diff --git a/.github/actions/rust-toolchain-setup/action.yml b/.github/actions/rust-toolchain-setup/action.yml
new file mode 100644
index 0000000000000..bf73fede16c7f
--- /dev/null
+++ b/.github/actions/rust-toolchain-setup/action.yml
@@ -0,0 +1,44 @@
+# yaml-language-server: $schema=https://json.schemastore.org/github-action.json
+
+name: 'Rust toolchain setup'
+description: 'Common setup steps for GitHub workflows for Rust projects'
+
+runs:
+  using: composite
+  steps:
+    - uses: dtolnay/rust-toolchain@1.71.0
+      with:
+        components: clippy, rustfmt
+    - uses: extractions/setup-just@v1
+      with:
+        just-version: '1.15.0' # optional semver specification, otherwise latest
+
+    ###
+    ### Linux setup
+    ###
+    - name: rustup
+      # We need to use the nightly rust tool change to enable registry-auth / to connect to ADO feeds.
+      if: ${{ (runner.os == 'Linux') }}
+      run: |
+        rustup set profile minimal
+        rustup install
+      shell: bash
+    # - name: Cargo login
+    #   if: ${{ (runner.os == 'Linux') }}
+    #   run: just cargo-login-ci
+    #   shell: bash
+
+      ###
+      ### Windows setup
+      ###
+    - name: rustup
+      # We need to use the nightly rust tool change to enable registry-auth / to connect to ADO feeds.
+      if: ${{ (runner.os == 'Windows') }}
+      run: |
+        rustup set profile minimal
+        rustup install
+      shell: pwsh
+    # - name: Cargo login
+    #   if: ${{ (runner.os == 'Windows') }}
+    #   run: just cargo-login-ci-windows
+    #   shell: pwsh
diff --git a/.github/stale.yml b/.github/stale.yml
deleted file mode 100644
index d89f0cdd91e52..0000000000000
--- a/.github/stale.yml
+++ /dev/null
@@ -1,22 +0,0 @@
-# Number of days of inactivity before an issue becomes stale
-daysUntilStale: 30
-
-# Number of days of inactivity before a stale issue is closed
-daysUntilClose: 7
-
-# Issues with these labels will never be considered stale
-exemptLabels:
-  - contributions welcome
-  - feature request
-  - regression
-  
-# Label to use when marking an issue as stale
-staleLabel: stale
-
-# Comment to post when marking an issue as stale. Set to `false` to disable
-markComment: >
-  This issue has been automatically marked as stale due to inactivity and will be closed in 7 days if no further activity occurs. If further support is needed, please provide an update and/or more details.
-
-# Comment to post when closing a stale issue. Set to `false` to disable
-closeComment: >
-  This issue has been automatically closed due to inactivity. Please reactivate if further support is needed.
diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml
new file mode 100644
index 0000000000000..6c3f2eb0fbbe1
--- /dev/null
+++ b/.github/workflows/rust-ci.yml
@@ -0,0 +1,132 @@
+name: Rust
+
+on: [pull_request]
+
+env:
+  CARGO_TERM_COLOR: always
+  RUST_LOG: onnxruntime=debug,onnxruntime-sys=debug
+  RUST_BACKTRACE: 1
+  MANIFEST_PATH: ${{ github.workspace }}/rust/Cargo.toml
+
+jobs:
+  fmt:
+    name: Rustfmt
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/rust-toolchain-setup
+      - name: vendor onnxruntime source
+        run: just vendor
+      - name: fmt
+        run: cargo fmt --all -- --check
+
+  download:
+    name: Download prebuilt ONNX Runtime archive from build.rs
+    runs-on: ubuntu-latest
+    env:
+      ORT_RUST_STRATEGY=download
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/rust-toolchain-setup
+      - run: rustup target install x86_64-unknown-linux-gnu
+      - run: rustup target install x86_64-apple-darwin
+      - run: rustup target install i686-pc-windows-msvc
+      - run: rustup target install x86_64-pc-windows-msvc
+      # ******************************************************************
+      - name: Download prebuilt archive (CPU, x86_64-unknown-linux-gnu)
+        run: cargo build --target x86_64-unknown-linux-gnu  --manifest-path ${{ env.MANIFEST_PATH }}
+      - name: Verify prebuilt archive downloaded (CPU, x86_64-unknown-linux-gnu)
+        run: ls -lh target/x86_64-unknown-linux-gnu/debug/build/onnxruntime-sys-*/out/onnxruntime-linux-x64-1.*.tgz
+      # ******************************************************************
+      - name: Download prebuilt archive (CPU, x86_64-apple-darwin)
+        run: cargo build --target x86_64-apple-darwin  --manifest-path ${{ env.MANIFEST_PATH }}
+      - name: Verify prebuilt archive downloaded (CPU, x86_64-apple-darwin)
+        run: ls -lh target/x86_64-apple-darwin/debug/build/onnxruntime-sys-*/out/onnxruntime-osx-x64-1.*.tgz
+      # ******************************************************************
+      - name: Download prebuilt archive (CPU, i686-pc-windows-msvc)
+        run: cargo build --target i686-pc-windows-msvc  --manifest-path ${{ env.MANIFEST_PATH }}
+      - name: Verify prebuilt archive downloaded (CPU, i686-pc-windows-msvc)
+        run: ls -lh target/i686-pc-windows-msvc/debug/build/onnxruntime-sys-*/out/onnxruntime-win-x86-1.*.zip
+      # ******************************************************************
+      - name: Download prebuilt archive (CPU, x86_64-pc-windows-msvc)
+        run: cargo build --target x86_64-pc-windows-msvc  --manifest-path ${{ env.MANIFEST_PATH }}
+      - name: Verify prebuilt archive downloaded (CPU, x86_64-pc-windows-msvc)
+        run: ls -lh target/x86_64-pc-windows-msvc/debug/build/onnxruntime-sys-*/out/onnxruntime-win-x64-1.*.zip
+      # ******************************************************************
+      - name: Download prebuilt archive (GPU, x86_64-unknown-linux-gnu)
+        env:
+          ORT_USE_CUDA: "yes"
+        run: cargo build --target x86_64-unknown-linux-gnu  --manifest-path ${{ env.MANIFEST_PATH }}
+      - name: Verify prebuilt archive downloaded (GPU, x86_64-unknown-linux-gnu)
+        run: ls -lh target/x86_64-unknown-linux-gnu/debug/build/onnxruntime-sys-*/out/onnxruntime-linux-x64-gpu-1.*.tgz
+      # ******************************************************************
+      - name: Download prebuilt archive (GPU, x86_64-pc-windows-msvc)
+        env:
+          ORT_USE_CUDA: "yes"
+        run: cargo build --target x86_64-pc-windows-msvc  --manifest-path ${{ env.MANIFEST_PATH }}
+      - name: Verify prebuilt archive downloaded (GPU, x86_64-pc-windows-msvc)
+        run: ls -lh target/x86_64-pc-windows-msvc/debug/build/onnxruntime-sys-*/out/onnxruntime-win-gpu-x64-1.*.zip
+
+  test:
+    name: Test Suite
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        target:
+          [
+            x86_64-unknown-linux-gnu,
+            x86_64-apple-darwin,
+            x86_64-pc-windows-msvc,
+            i686-pc-windows-msvc,
+          ]
+        include:
+          - target: x86_64-unknown-linux-gnu
+            os: ubuntu-latest
+          - target: x86_64-apple-darwin
+            os: macos-latest
+          - target: x86_64-pc-windows-msvc
+            os: windows-latest
+          - target: i686-pc-windows-msvc
+            os: windows-latest
+    env:
+      CARGO_BUILD_TARGET: ${{ matrix.target }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/rust-toolchain-setup
+      - name: vendor onnxruntime source
+        run: just vendor
+      - run: rustup target install ${{ matrix.target }}
+      - name: Install additional packages (macOS)
+        if: contains(matrix.target, 'x86_64-apple-darwin')
+        run: brew install libomp
+      - name: Build (cargo build)
+        run: cargo build --all --manifest-path ${{ env.MANIFEST_PATH }}
+      - name: Build tests (cargo test)
+        run: cargo test --no-run --manifest-path ${{ env.MANIFEST_PATH }}
+      - name: Build onnxruntime with 'model-fetching' feature
+        run: cargo build --manifest-path ${{ env.MANIFEST_PATH }} --features model-fetching
+      - name: Test onnxruntime-sys
+        run: cargo build --package onnxruntime-sys -- --test-threads=1 --nocapture
+      - name: Test onnxruntime
+        run: cargo test --manifest-path ${{ env.MANIFEST_PATH }} --features model-fetching -- --test-threads=1 --nocapture
+
+  clippy:
+    name: Clippy
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/rust-toolchain-setup
+      - name: vendor onnxruntime source
+        run: just vendor
+      - run: clippy --all-features --manifest-path ${{ env.MANIFEST_PATH }} -- -D warnings
+
+  package-sys:
+    name: Package onnxruntime-sys
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/rust-toolchain-setup
+      - name: vendor onnxruntime source
+        run: just vendor
+      - run: cargo package --allow-dirty --package onnxruntime-sys
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
new file mode 100644
index 0000000000000..95607f297c6bd
--- /dev/null
+++ b/.github/workflows/stale.yml
@@ -0,0 +1,39 @@
+name: Close stale issues
+on:
+  # Allows you to dictate when you want this workflow to run using cron syntax (times in UTC)
+  schedule:
+    - cron: "0 15 * * *"
+  # Allows you to run this workflow manually from the Actions tab
+  # workflow_dispatch:
+
+jobs:
+  close-stale-issues:
+    runs-on: ubuntu-latest
+    permissions:
+      issues: write
+      pull-requests: write
+    steps:
+      - uses: actions/stale@v8.0.0
+        with:
+          # Comma separated list of labels that can be assigned to issues to exclude them from being marked as stale
+          exempt-issue-labels: contributions welcome, feature request, regression
+          # Override exempt-all-assignees but only to exempt the issues with an assignee to be marked as stale automatically
+          exempt-all-issue-assignees: true
+          # Used to ignore the issues and pull requests created before the start date
+          # Start date should be April 19, 2022 - corresponds to the day previous stale bot stopped working
+          start-date: '2022-04-19T00:00:00Z'
+          # Number of days without activity before the actions/stale action labels an issue
+          days-before-issue-stale: 30
+          # Number of days without activity before the actions/stale action closes an issue
+          days-before-issue-close: 30
+          # Label you want to apply to issues that have been inactive for the amount of time specified by days-before-issue-stale
+          stale-issue-label: "stale"
+          # Comment that you want to add to issues that are labeled by the actions/stale action
+          stale-issue-message: "This issue has been automatically marked as stale due to inactivity and will be closed in 7 days if no further activity occurs. If further support is needed, please provide an update and/or more details."
+          # Comment that you want to add to issues that are closed by the actions/stale action
+          close-issue-message: "This issue has been automatically closed due to inactivity. Please reactivate if further support is needed."
+          # If you never want this action to label PRs, set this value to -1
+          days-before-pr-stale: -1
+          # If you never want this action to close PRs, set this value to -1
+          days-before-pr-close: -1
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.lintrunner.toml b/.lintrunner.toml
index 86be8d0d0bd38..4e5d077b08ff4 100644
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@@ -45,6 +45,7 @@ exclude_patterns = [
     'cmake/external/**',
     # ignore generated flatbuffers code
     'onnxruntime/core/flatbuffers/ort_flatbuffers_py/**',
+    'orttraining/orttraining/python/training/optim/_ds_code_store.py',
 ]
 command = [
     'python',
@@ -76,6 +77,7 @@ exclude_patterns = [
     'cmake/**',
     'orttraining/*',
     'onnxruntime/core/flatbuffers/**',
+    'orttraining/orttraining/python/training/optim/_ds_code_store.py',
 ]
 command = [
     'python',
@@ -97,33 +99,6 @@ init_command = [
 ]
 is_formatter = true
 
-[[linter]]
-code = 'PYLINT'
-include_patterns = [
-    # TODO: Opt in to pylint by adding paths here
-]
-exclude_patterns = [
-]
-command = [
-    'python',
-    '-m',
-    'lintrunner_adapters',
-    'run',
-    'pylint_linter',
-    '--rcfile=pyproject.toml',
-    '--',
-    '@{{PATHSFILE}}'
-]
-init_command = [
-    'python',
-    '-m',
-    'lintrunner_adapters',
-    'run',
-    'pip_init',
-    '--dry-run={{DRYRUN}}',
-    '--requirement=requirements-lintrunner.txt',
-]
-
 [[linter]]
 code = 'RUSTFMT'
 include_patterns = ['**/*.rs']
diff --git a/.vscode/settings.json b/.vscode/settings.json
index b7a1292efb2c6..2f2adc78f6de9 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -13,30 +13,13 @@
         "editor.codeActionsOnSave": {
             "source.organizeImports": true
         },
+        "editor.defaultFormatter": "ms-python.black-formatter"
     },
     // Enable Python linting and Pylance type checking
     "python.analysis.typeCheckingMode": "basic",
-    "python.formatting.provider": "black",
-    "python.formatting.blackArgs": [
-        "--line-length",
-        "120"
-    ],
-    "python.sortImports.args": [
-        "--profile",
-        "black",
-        "--line-length",
-        "120"
-    ],
-    "python.linting.enabled": true,
-    "python.linting.flake8Enabled": true,
-    "python.linting.pylintEnabled": true,
-    "python.linting.pydocstyleEnabled": true,
-    "python.linting.pydocstyleArgs": [
-        "--convention=google"
-    ],
-    "python.linting.banditEnabled": true,
     "cpplint.lineLength": 120,
     "cpplint.filters": [
         "-build/include_subdir",
         "-runtime/references"
     ]
+}
diff --git a/Package.swift b/Package.swift
deleted file mode 100644
index f8bf33001ea24..0000000000000
--- a/Package.swift
+++ /dev/null
@@ -1,109 +0,0 @@
-// swift-tools-version: 5.6
-//   The swift-tools-version declares the minimum version of Swift required to build this package and MUST be the first
-//   line of this file. 5.6 is required to support zip files for the pod archive binaryTarget.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-//
-// A user of the Swift Package Manager (SPM) package will consume this file directly from the ORT github repository.
-// For context, the end user's config will look something like:
-//
-//     dependencies: [
-//       .package(url: "https://github.com/microsoft/onnxruntime", branch: "rel-1.15.0"),
-//       ...
-//     ],
-//
-// NOTE: The direct consumption creates a somewhat complicated setup to 'release' a new version of the ORT SPM package.
-//       TBD: how to manage the release process
-
-import PackageDescription
-import class Foundation.ProcessInfo
-
-let package = Package(
-    name: "onnxruntime",
-    platforms: [.iOS(.v12)],
-    products: [
-        .library(name: "onnxruntime",
-                 type: .static,
-                 targets: ["OnnxRuntimeBindings"]),
-    ],
-    dependencies: [],
-    targets: [
-        .target(name: "OnnxRuntimeBindings",
-                dependencies: ["onnxruntime"],
-                path: "objectivec",
-                exclude: ["test", "docs", "ReadMe.md", "format_objc.sh",
-                            "ort_checkpoint.mm",
-                            "ort_checkpoint_internal.h",
-                            "ort_training_session_internal.h",
-                            "ort_training_session.mm",
-                            "include/ort_checkpoint.h",
-                            "include/ort_training_session.h",
-                            "include/onnxruntime_training.h"],
-                cxxSettings: [
-                    .define("SPM_BUILD"),
-                    .unsafeFlags(["-std=c++17",
-                                  "-fobjc-arc-exceptions"
-                                 ]),
-                ], linkerSettings: [
-                    .unsafeFlags(["-ObjC"]),
-                ]),
-        .testTarget(name: "OnnxRuntimeBindingsTests",
-                    dependencies: ["OnnxRuntimeBindings"],
-                    path: "swift/OnnxRuntimeBindingsTests",
-                    resources: [
-                        .copy("Resources/single_add.basic.ort")
-                    ]),
-    ]
-)
-
-// Add the ORT iOS Pod archive as a binary target.
-//
-// There are 2 scenarios:
-//
-// Release branch of ORT github repo:
-//    Target will be set to the released pod archive and its checksum.
-//
-// Any other branch/tag of ORT github repo:
-//    Invalid by default. We do not have a pod archive that is guaranteed to work
-//    as the objective-c bindings may have changed since the pod archive was released.
-
-// CI or local testing where you have built/obtained the iOS Pod archive matching the current source code.
-// Requires the ORT_IOS_POD_LOCAL_PATH environment variable to be set to specify the location of the pod.
-if let pod_archive_path = ProcessInfo.processInfo.environment["ORT_IOS_POD_LOCAL_PATH"] {
-    // ORT_IOS_POD_LOCAL_PATH MUST be a path that is relative to Package.swift.
-    //
-    // To build locally, tools/ci_build/github/apple/build_and_assemble_ios_pods.py can be used
-    // See https://onnxruntime.ai/docs/build/custom.html#ios
-    //  Example command:
-    //    python3 tools/ci_build/github/apple/build_and_assemble_ios_pods.py \
-    //      --variant Full \
-    //      --build-settings-file tools/ci_build/github/apple/default_full_ios_framework_build_settings.json
-    //
-    // This should produce the pod archive in build/ios_pod_staging, and ORT_IOS_POD_LOCAL_PATH can be set to
-    // "build/ios_pod_staging/pod-archive-onnxruntime-c-???.zip" where '???' is replaced by the version info in the
-    // actual filename.
-    package.targets.append(Target.binaryTarget(name: "onnxruntime", path: pod_archive_path))
-
-} else {
-    // When creating the release version:
-    //   - remove the fatalError
-    //   - uncomment the package.targets.append call
-    //   - update the major/minor/patch version info in the url
-    //   - insert the checksum info from the onnxruntime-ios-packaging-pipeline CI's 'Print ORT iOS Pod checksum'
-    //     stage output (or download the pod archive artifact from the CI and run `shasum -a 256 <path to pod zip>`
-    //     to manually calculate it).
-    //     The checksum length and chars should look something like
-    //       "c89cd106ff02eb3892243acd7c4f2bd8e68c2c94f2751b5e35f98722e10c042b"
-    //
-    // package.targets.append(
-    //    Target.binaryTarget(name: "onnxruntime",
-    //                        url: "https://onnxruntimepackages.z14.web.core.windows.net/pod-archive-onnxruntime-c-<major.minor.patch>.zip",
-    //                        checksum: "Insert checksum here")
-    // )
-
-    fatalError("It is not valid to use a non-release branch from https://github.com/microsoft/onnxruntime.\n" +
-               "Please use a release branch (e.g. rel-1.15.0), or build the ONNX Runtime iOS pod archive locally " +
-               "and set the ORT_IOS_POD_LOCAL_PATH environment variable.\n" +
-               "See Package.swift for more information on using a local pod archive.")
-}
diff --git a/cgmanifests/cgmanifest.json b/cgmanifests/cgmanifest.json
index 2a3de3bb0ee51..e8dbc9cf9eff6 100644
--- a/cgmanifests/cgmanifest.json
+++ b/cgmanifests/cgmanifest.json
@@ -568,7 +568,7 @@
          "component": {
             "type": "git",
             "git": {
-               "commitHash": "d10b27fe37736d2944630ecd7557cefa95cf87c9",
+               "commitHash": "e7248b26a1ed53fa030c5c459f7ea095dfd276ac",
                "repositoryUrl": "https://gitlab.com/libeigen/eigen.git"
             }            
          }
diff --git a/cgmanifests/generate_cgmanifest.py b/cgmanifests/generate_cgmanifest.py
index a9eaacc6f2938..81181d3ccfb20 100644
--- a/cgmanifests/generate_cgmanifest.py
+++ b/cgmanifests/generate_cgmanifest.py
@@ -90,55 +90,6 @@ def add_github_dep(name, parsed_url):
             git_deps[dep] = name
 
 
-with open(
-    os.path.join(REPO_DIR, "tools", "ci_build", "github", "linux", "docker", "Dockerfile.manylinux2_28_cuda11"),
-) as f:
-    for line in f:
-        if not line.strip():
-            package_name = None
-            package_filename = None
-            package_url = None
-        if package_filename is None:
-            m = re.match(r"RUN\s+export\s+(.+?)_ROOT=(\S+).*", line)
-            if m is not None:
-                package_name = m.group(1)
-                package_filename = m.group(2)
-            else:
-                m = re.match(r"RUN\s+export\s+(.+?)_VERSION=(\S+).*", line)
-                if m is not None:
-                    package_name = m.group(1)
-                    package_filename = m.group(2)
-        elif package_url is None:
-            m = re.match(r"(.+?)_DOWNLOAD_URL=(\S+)", line)
-            if m is not None:
-                package_url = m.group(2)
-                if package_name == "LIBXCRYPT":
-                    package_url = m.group(2) + "/v" + package_filename + ".tar.gz"
-                elif package_name == "CMAKE":
-                    package_url = m.group(2) + "/v" + package_filename + "/cmake-" + package_filename + ".tar.gz"
-                else:
-                    package_url = m.group(2) + "/" + package_filename + ".tar.gz"
-                parsed_url = urlparse(package_url)
-                if parsed_url.hostname == "github.com":
-                    add_github_dep("manylinux dependency " + package_name, parsed_url)
-                else:
-                    registration = {
-                        "Component": {
-                            "Type": "other",
-                            "other": {
-                                "Name": package_name.lower(),
-                                "Version": package_filename.split("-")[-1],
-                                "DownloadUrl": package_url,
-                            },
-                            "comments": "manylinux dependency",
-                        }
-                    }
-                    registrations.append(registration)
-                package_name = None
-                package_filename = None
-                package_url = None
-
-
 def normalize_path_separators(path):
     return path.replace(os.path.sep, "/")
 
diff --git a/cgmanifests/generated/cgmanifest.json b/cgmanifests/generated/cgmanifest.json
index 6f1ca84e1a304..12fbb291c3a70 100644
--- a/cgmanifests/generated/cgmanifest.json
+++ b/cgmanifests/generated/cgmanifest.json
@@ -2,82 +2,6 @@
   "$schema": "https://json.schemastore.org/component-detection-manifest.json",
   "Version": 1,
   "Registrations": [
-    {
-      "Component": {
-        "Type": "other",
-        "other": {
-          "Name": "autoconf",
-          "Version": "2.71",
-          "DownloadUrl": "http://ftp.gnu.org/gnu/autoconf/autoconf-2.71.tar.gz"
-        },
-        "comments": "manylinux dependency"
-      }
-    },
-    {
-      "Component": {
-        "Type": "other",
-        "other": {
-          "Name": "automake",
-          "Version": "1.16.5",
-          "DownloadUrl": "http://ftp.gnu.org/gnu/automake/automake-1.16.5.tar.gz"
-        },
-        "comments": "manylinux dependency"
-      }
-    },
-    {
-      "Component": {
-        "Type": "other",
-        "other": {
-          "Name": "libtool",
-          "Version": "2.4.7",
-          "DownloadUrl": "http://ftp.gnu.org/gnu/libtool/libtool-2.4.7.tar.gz"
-        },
-        "comments": "manylinux dependency"
-      }
-    },
-    {
-      "Component": {
-        "Type": "other",
-        "other": {
-          "Name": "git",
-          "Version": "2.36.2",
-          "DownloadUrl": "https://www.kernel.org/pub/software/scm/git/git-2.36.2.tar.gz"
-        },
-        "comments": "manylinux dependency"
-      }
-    },
-    {
-      "Component": {
-        "Type": "other",
-        "other": {
-          "Name": "sqlite_autoconf",
-          "Version": "3390200",
-          "DownloadUrl": "https://www.sqlite.org/2022/sqlite-autoconf-3390200.tar.gz"
-        },
-        "comments": "manylinux dependency"
-      }
-    },
-    {
-      "Component": {
-        "Type": "other",
-        "other": {
-          "Name": "openssl",
-          "Version": "1.1.1q",
-          "DownloadUrl": "https://www.openssl.org/source/openssl-1.1.1q.tar.gz"
-        },
-        "comments": "manylinux dependency"
-      }
-    },
-    {
-      "component": {
-        "type": "git",
-        "git": {
-          "commitHash": "50cf2b6dd4fdf04309445f2eec8de7051d953abf",
-          "repositoryUrl": "https://github.com/besser82/libxcrypt.git"
-        },
-        "comments": "manylinux dependency LIBXCRYPT"
-      }
-    },
     {
       "component": {
         "type": "git",
@@ -102,7 +26,7 @@
       "component": {
         "type": "git",
         "git": {
-          "commitHash": "e2525550194ce3d8a2c4a3af451c9d9b3ae6650e",
+          "commitHash": "b86cc54efce19530fb953e4b21f57e6b3888534c",
           "repositoryUrl": "https://github.com/onnx/onnx.git"
         },
         "comments": "git submodule at cmake/external/onnx"
@@ -212,7 +136,7 @@
       "component": {
         "type": "git",
         "git": {
-          "commitHash": "003c580e696a774afdc984996ee909b7c8d8128c",
+          "commitHash": "0da379fc4808f9601faef392352018c741c0f297",
           "repositoryUrl": "https://github.com/google/XNNPACK.git"
         },
         "comments": "googlexnnpack"
@@ -272,7 +196,7 @@
       "component": {
         "type": "git",
         "git": {
-          "commitHash": "0462dc31ae78f48744b6141ae376df1f96d3f459",
+          "commitHash": "a43ce67187bab219520fd80f21af8bbd4354bc8c",
           "repositoryUrl": "https://github.com/onnx/onnx-tensorrt.git"
         },
         "comments": "onnx_tensorrt"
@@ -302,7 +226,7 @@
       "component": {
         "type": "git",
         "git": {
-          "commitHash": "1787867f6183f056420e532eec640cba25efafea",
+          "commitHash": "4fe0e1e183925bf8cfa6aae24237e724a96479b8",
           "repositoryUrl": "https://github.com/Maratyszcza/pthreadpool.git"
         },
         "comments": "pthreadpool"
@@ -362,7 +286,7 @@
       "component": {
         "type": "git",
         "git": {
-          "commitHash": "c4f6b8c6bc94ff69048492fb34df0dfaf1983933",
+          "commitHash": "6f47420213f757831fae65c686aa471749fa8d60",
           "repositoryUrl": "https://github.com/NVIDIA/cutlass.git"
         },
         "comments": "cutlass"
@@ -392,7 +316,7 @@
       "component": {
         "type": "git",
         "git": {
-          "commitHash": "d52ec01652b7d620386251db92455968d8d90bdc",
+          "commitHash": "a4f72a314a85732ed67d5aa8d1088d207a7e0e61",
           "repositoryUrl": "https://github.com/ROCmSoftwarePlatform/composable_kernel.git"
         },
         "comments": "composable_kernel"
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index cf7565869e446..d2c56e455be4c 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -39,7 +39,12 @@ include(CMakeDependentOption)
 include(FetchContent)
 include(CheckFunctionExists)
 
+# TODO: update this once all system adapt c++20
+if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
+set(CMAKE_CXX_STANDARD 20)
+else()
 set(CMAKE_CXX_STANDARD 17)
+endif()
 
 set_property(GLOBAL PROPERTY USE_FOLDERS ON)
 # NOTE: POSITION INDEPENDENT CODE hurts performance, and it only make sense on POSIX systems
@@ -68,6 +73,12 @@ option(onnxruntime_ENABLE_PYTHON "Enable python buildings" OFF)
 # Enable it may cause LNK1169 error
 option(onnxruntime_ENABLE_MEMLEAK_CHECKER "Experimental: Enable memory leak checker in Windows debug build" OFF)
 option(onnxruntime_USE_CUDA "Build with CUDA support" OFF)
+# Enable ONNX Runtime CUDA EP's internal unit tests that directly access the EP's internal functions instead of through
+# OpKernels. When the option is ON, we will have two copies of GTest library in the same process. It is not a typical
+# use. If you hit any problem with that, please do not report it to GTest. Turn OFF the following build option instead.
+cmake_dependent_option(onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS "Build with CUDA unit tests" OFF "onnxruntime_USE_CUDA;onnxruntime_BUILD_UNIT_TESTS;LINUX" OFF)
+
+option(onnxruntime_USE_CUDA_NHWC_OPS "Build CUDA with NHWC op support" OFF)
 option(onnxruntime_ENABLE_CUDA_LINE_NUMBER_INFO "When building with CUDA support, generate device code line number information." OFF)
 option(onnxruntime_USE_OPENVINO "Build with OpenVINO support" OFF)
 option(onnxruntime_USE_COREML "Build with CoreML support" OFF)
@@ -103,9 +114,7 @@ option(onnxruntime_ENABLE_LTO "Enable link time optimization" OFF)
 option(onnxruntime_CROSS_COMPILING "Cross compiling onnx runtime" OFF)
 option(onnxruntime_GCOV_COVERAGE "Compile with options necessary to run code coverage" OFF)
 option(onnxruntime_DONT_VECTORIZE "Do not vectorize operations in Eigen" OFF)
-
-#It's preferred to turn it OFF when onnxruntime is dynamically linked to PROTOBUF. But Tensort always required the full version of protobuf.
-cmake_dependent_option(onnxruntime_USE_FULL_PROTOBUF "Link to libprotobuf instead of libprotobuf-lite when this option is ON" OFF "NOT onnxruntime_USE_TENSORRT" ON)
+option(onnxruntime_USE_FULL_PROTOBUF "Link to libprotobuf instead of libprotobuf-lite when this option is ON" OFF)
 option(tensorflow_C_PACKAGE_PATH "Path to tensorflow C package installation dir")
 option(onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS "Enable operator implemented in language other than cpp" OFF)
 option(onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS "Dump debug information about node inputs and outputs when executing the model." OFF)
@@ -142,10 +151,11 @@ option(onnxruntime_DISABLE_SPARSE_TENSORS "Disable sparse tensors data types" OF
 option(onnxruntime_DISABLE_OPTIONAL_TYPE "Disable optional type" OFF)
 option(onnxruntime_DISABLE_FLOAT8_TYPES "Disable float 8 types" OFF)
 option(onnxruntime_MINIMAL_BUILD "Exclude as much as possible from the build. Support ORT format models. No support for ONNX format models." OFF)
-cmake_dependent_option(onnxruntime_DISABLE_RTTI "Disable RTTI" ON "NOT onnxruntime_ENABLE_PYTHON" OFF)
+cmake_dependent_option(onnxruntime_DISABLE_RTTI "Disable RTTI" ON "NOT onnxruntime_ENABLE_PYTHON;NOT onnxruntime_USE_CUDA" OFF)
 # For now onnxruntime_DISABLE_EXCEPTIONS will only work with onnxruntime_MINIMAL_BUILD, more changes (ONNX, non-CPU EP, ...) are required to run this standalone
 cmake_dependent_option(onnxruntime_DISABLE_EXCEPTIONS "Disable exception handling. Requires onnxruntime_MINIMAL_BUILD currently." ON "onnxruntime_MINIMAL_BUILD;NOT onnxruntime_ENABLE_PYTHON" OFF)
-option(onnxruntime_DISABLE_ABSEIL "Do not link to Abseil. Redefine Inlined containers to STD containers." OFF)
+# Even when onnxruntime_DISABLE_ABSEIL is ON, ONNX Runtime still needs to link to abseil.
+option(onnxruntime_DISABLE_ABSEIL "Do not use Abseil data structures in ONNX Runtime source code. Redefine Inlined containers to STD containers." OFF)
 
 option(onnxruntime_EXTENDED_MINIMAL_BUILD "onnxruntime_MINIMAL_BUILD with support for execution providers that compile kernels." OFF)
 option(onnxruntime_MINIMAL_BUILD_CUSTOM_OPS "Add custom operator kernels support to a minimal build." OFF)
@@ -265,10 +275,6 @@ if (onnxruntime_ENABLE_TRAINING_APIS)
   endif()
 endif()
 
-if (onnxruntime_USE_CUDA)
-  set(onnxruntime_DISABLE_RTTI OFF)
-endif()
-
 if (onnxruntime_USE_ROCM)
   if (WIN32)
     message(FATAL_ERROR "ROCM does not support build in Windows!")
@@ -519,7 +525,21 @@ if(NOT WIN32 AND NOT CMAKE_SYSTEM_NAME STREQUAL "Android")
   find_package(Iconv REQUIRED)
   set(ICONV_LIB Iconv::Iconv)
 endif()
+
 find_package(Patch)
+if (WIN32 AND NOT Patch_FOUND)
+    # work around CI machines missing patch from the git install by falling back to the binary in this repo.
+    # replicate what happens in https://github.com/Kitware/CMake/blob/master/Modules/FindPatch.cmake but without
+    # the hardcoded suffixes in the path to the patch binary.
+    find_program(Patch_EXECUTABLE NAMES patch PATHS ${PROJECT_SOURCE_DIR}/external/git.Win32.2.41.03.patch)
+    if(Patch_EXECUTABLE)
+      set(Patch_FOUND 1)
+      if (NOT TARGET Patch::patch)
+        add_executable(Patch::patch IMPORTED)
+        set_property(TARGET Patch::patch PROPERTY IMPORTED_LOCATION ${Patch_EXECUTABLE})
+      endif()
+    endif()
+endif()
 if(Patch_FOUND)
   message("Patch found: ${Patch_EXECUTABLE}")
 endif()
@@ -665,6 +685,9 @@ set(ORT_PROVIDER_FLAGS)
 set(ORT_PROVIDER_CMAKE_FLAGS)
 
 if (onnxruntime_USE_CUDA)
+  if (onnxruntime_USE_CUDA_NHWC_OPS)
+    add_compile_definitions(ENABLE_CUDA_NHWC_OPS)
+  endif()
   enable_language(CUDA)
   message( STATUS "CMAKE_CUDA_COMPILER_VERSION: ${CMAKE_CUDA_COMPILER_VERSION}")
 
@@ -1276,14 +1299,6 @@ if (onnxruntime_USE_OPENVINO)
     add_definitions(-DOPENVINO_CONFIG_CPU_FP16=1)
   endif()
 
-  if (onnxruntime_USE_OPENVINO_VPUX_FP16)
-    add_definitions(-DOPENVINO_CONFIG_VPUX_FP16=1)
-  endif()
-
-  if (onnxruntime_USE_OPENVINO_VPUX_U8)
-    add_definitions(-DOPENVINO_CONFIG_VPUX_U8=1)
-  endif()
-
   if (onnxruntime_USE_OPENVINO_GPU_FP32_NP)
     add_definitions(-DOPENVINO_CONFIG_GPU_FP32=1)
     add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
@@ -1304,16 +1319,6 @@ if (onnxruntime_USE_OPENVINO)
     add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
   endif()
 
-  if (onnxruntime_USE_OPENVINO_VPUX_FP32_NP)
-    add_definitions(-DOPENVINO_CONFIG_VPUX_FP32=1)
-    add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
-  endif()
-
-  if (onnxruntime_USE_OPENVINO_VPUX_FP16_NP)
-    add_definitions(-DOPENVINO_CONFIG_VPUX_FP16=1)
-    add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
-  endif()
-
   if (onnxruntime_USE_OPENVINO_HETERO)
     add_definitions(-DOPENVINO_CONFIG_HETERO=1)
     add_definitions(-DDEVICE_NAME="${onnxruntime_USE_OPENVINO_DEVICE}")
@@ -1474,29 +1479,32 @@ if (onnxruntime_ENABLE_TRAINING)
   list(APPEND onnxruntime_EXTERNAL_LIBRARIES tensorboard)
 endif()
 
-if (UNIX AND onnxruntime_USE_MPI)
-  if (EXISTS "${onnxruntime_MPI_HOME}")
-    set(MPI_HOME "${onnxruntime_MPI_HOME}")
-  elseif (EXISTS "/bert_ort/openmpi")
-    set(MPI_HOME "/bert_ort/openmpi")
-  endif()
-
-  find_package(MPI)
+if (UNIX AND onnxruntime_USE_NCCL)
+  # MPI is INDEPENDENT of NCCL for now. You can build NCLL without MPI and launch multi-GPU with your own launcher.
+  if (onnxruntime_USE_MPI)
+    if (EXISTS "${onnxruntime_MPI_HOME}")
+      set(MPI_HOME "${onnxruntime_MPI_HOME}")
+    elseif (EXISTS "/bert_ort/openmpi")
+      set(MPI_HOME "/bert_ort/openmpi")
+    endif()
+    find_package(MPI)
 
-  if (MPI_CXX_FOUND)
-    message( STATUS "MPI Version: ${MPI_CXX_VERSION}")
-    message( STATUS "MPI (include: ${MPI_CXX_INCLUDE_DIRS}, library: ${MPI_CXX_LIBRARIES})" )
-    mark_as_advanced(MPI_CXX_INCLUDE_DIRS MPI_CXX_LIBRARIES)
-    list(APPEND onnxruntime_EXTERNAL_LIBRARIES ${MPI_CXX_LIBRARIES} ${MPI_CXX_LINK_FLAGS})
-  else ()
-    message(
-          FATAL_ERROR
-          "MPI is not found. Please define onnxruntime_MPI_HOME to specify the path of MPI. Otherwise, NCCL will be disabled."
-        )
+    if (MPI_CXX_FOUND)
+      message( STATUS "MPI Version: ${MPI_CXX_VERSION}")
+      message( STATUS "MPI (include: ${MPI_CXX_INCLUDE_DIRS}, library: ${MPI_CXX_LIBRARIES})" )
+      mark_as_advanced(MPI_CXX_INCLUDE_DIRS MPI_CXX_LIBRARIES)
+      list(APPEND onnxruntime_EXTERNAL_LIBRARIES ${MPI_CXX_LIBRARIES} ${MPI_CXX_LINK_FLAGS})
+    else ()
+      message(
+            FATAL_ERROR
+            "MPI is not found. Please define onnxruntime_MPI_HOME to specify the path of MPI. Otherwise, NCCL will be disabled."
+            "or you can remove --use_mpi from build args to disable MPI."
+          )
+    endif()
   endif()
 
-  # Find NCCL and MPI
-  if (onnxruntime_USE_NCCL AND MPI_CXX_FOUND)
+  # Find NCCL
+  if (onnxruntime_USE_NCCL)
     if (onnxruntime_USE_CUDA)
       set(NCCL_LIBNAME "nccl")
     elseif (onnxruntime_USE_ROCM)
diff --git a/cmake/deps.txt b/cmake/deps.txt
index 279b5ca649dba..e065cacdfc423 100644
--- a/cmake/deps.txt
+++ b/cmake/deps.txt
@@ -3,30 +3,40 @@
 #The columns are separated by ";" because a list in cmake is just a ";" separated group of strings.
 #Names should be in lower case. They will be used as variable names in cmake.
 #URLs can be either https URLs or local file paths in cmake-style(directory separator is a forward slash character).
-#SHA1 hashes can be generated by running sha1sum command.
+#SHA1 hashes can be generated by running sha1sum command on linux. PowerShell can also be used:
+# (Get-FileHash -Algorithm SHA1 <filename>).Hash.ToLower()
 #If you need to change abseil's version to a different one, you may also want to update external\abseil-cpp.natvis
 #since the file contains a version string: "lts_20230802". However, the file is for debugging purposes only and would
 #not affect built binaries.
+#
+# NOTE: You must run deps_update_and_upload.py and generate_cgmanifest.py when ready to test your changes in a CI.
+# See https://microsoft.sharepoint.com/teams/ONNX2/_layouts/OneNote.aspx?id=%2Fteams%2FONNX2%2FShared%20Documents%2FNotebooks%2FONNX%20Ecosystem%20Team%20Notebook&wd=target%28Development.one%7C63D3AB47-51D1-4A62-9965-66882234BD44%2FAdd%20or%20update%20a%20dependency%20in%20deps.txt%7C0E9ED71D-89D5-40FA-B05F-C0123289C591%2F%29
+#
 abseil_cpp;https://github.com/abseil/abseil-cpp/archive/refs/tags/20230802.0.zip;04271dfbfac59269b6939e1e9d5faf0d18a7ba91
 cxxopts;https://github.com/jarro2783/cxxopts/archive/3c73d91c0b04e2b59462f0a741be8c07024c1bc0.zip;6c6ca7f8480b26c8d00476e0e24b7184717fe4f0
 date;https://github.com/HowardHinnant/date/archive/refs/tags/v3.0.1.zip;2dac0c81dc54ebdd8f8d073a75c053b04b56e159
 dlpack;https://github.com/dmlc/dlpack/archive/refs/tags/v0.6.zip;4d565dd2e5b31321e5549591d78aa7f377173445
-eigen;https://gitlab.com/libeigen/eigen/-/archive/3.4/eigen-3.4.zip;ee201b07085203ea7bd8eb97cbcb31b07cfa3efb
+# This Eigen commit id matches the eigen archive being consumed from https://gitlab.com/libeigen/eigen/-/archive/3.4/eigen-3.4.zip
+# prior to the 3.4.1 RC changing the bits and invalidating the hash.
+# it contains changes on top of 3.4.0 which are required to fix build issues.
+# Until the 3.4.1 release this is the best option we have.
+# Issue link: https://gitlab.com/libeigen/eigen/-/issues/2744
+eigen;https://gitlab.com/libeigen/eigen/-/archive/e7248b26a1ed53fa030c5c459f7ea095dfd276ac/eigen-e7248b26a1ed53fa030c5c459f7ea095dfd276ac.zip;be8be39fdbc6e60e94fa7870b280707069b5b81a
 flatbuffers;https://github.com/google/flatbuffers/archive/refs/tags/v1.12.0.zip;ba0a75fd12dbef8f6557a74e611b7a3d0c5fe7bf
 fp16;https://github.com/Maratyszcza/FP16/archive/0a92994d729ff76a58f692d3028ca1b64b145d91.zip;b985f6985a05a1c03ff1bb71190f66d8f98a1494
 fxdiv;https://github.com/Maratyszcza/FXdiv/archive/63058eff77e11aa15bf531df5dd34395ec3017c8.zip;a5658f4036402dbca7cebee32be57fb8149811e1
 google_benchmark;https://github.com/google/benchmark/archive/refs/tags/v1.7.0.zip;e97c368b176e8614e3f1bf13dd9abcf6a7ad9908
 google_nsync;https://github.com/google/nsync/archive/refs/tags/1.26.0.zip;5e7c00ef6bf5b787386fc040067903ec774e2752
 googletest;https://github.com/google/googletest/archive/refs/tags/v1.14.0.zip;0ac421f2ec11af38b0fff0f1992184032731a8bc
-googlexnnpack;https://github.com/google/XNNPACK/archive/003c580e696a774afdc984996ee909b7c8d8128c.zip;9f192e3f15e1e37ae9c78d53eeea47e45c5eb31c
+googlexnnpack;https://github.com/google/XNNPACK/archive/0da379fc4808f9601faef392352018c741c0f297.zip;663883491e380b628e0a5b162b5f2658032fae73
 json;https://github.com/nlohmann/json/archive/refs/tags/v3.10.5.zip;f257f8dc27c5b8c085dc887b40cddd18ae1f725c
 microsoft_gsl;https://github.com/microsoft/GSL/archive/refs/tags/v4.0.0.zip;cf368104cd22a87b4dd0c80228919bb2df3e2a14
 microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.230629.1.zip;e4a542a323c070376f7c2d1973d0f7ddbc1d2fa5
 mimalloc;https://github.com/microsoft/mimalloc/archive/refs/tags/v2.1.1.zip;d5ee7d34223d0567892db5179849939c8769dc41
 mp11;https://github.com/boostorg/mp11/archive/refs/tags/boost-1.82.0.zip;9bc9e01dffb64d9e0773b2e44d2f22c51aace063
-onnx;https://github.com/onnx/onnx/archive/e2525550194ce3d8a2c4a3af451c9d9b3ae6650e.zip;782f23d788185887f520a90535513e244218e928
+onnx;https://github.com/onnx/onnx/archive/refs/tags/v1.15.0.zip;54c3f960a0541c5d8d3e60c2933e11f5d3688a11
 #use the commit of supporting all the plugins and TRT 8.6-GA (https://github.com/onnx/onnx-tensorrt/commit/0462dc31ae78f48744b6141ae376df1f96d3f459)
-onnx_tensorrt;https://github.com/onnx/onnx-tensorrt/archive/0462dc31ae78f48744b6141ae376df1f96d3f459.zip;5ff086361956cceb81ed17453a1fd8db2aa4328d
+onnx_tensorrt;https://github.com/onnx/onnx-tensorrt/archive/a43ce67187bab219520fd80f21af8bbd4354bc8c.zip;572535aefef477050f86744dfab1fef840198035
 protobuf;https://github.com/protocolbuffers/protobuf/archive/refs/tags/v21.12.zip;7cf2733949036c7d52fda017badcab093fe73bfa
 protoc_win64;https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip;b4521f7ada5b260380f94c4bd7f1b7684c76969a
 protoc_win32;https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win32.zip;3688010318192c46ce73213cdfb6b3e5656da874
@@ -35,13 +45,13 @@ protoc_linux_x86;https://github.com/protocolbuffers/protobuf/releases/download/v
 protoc_linux_aarch64;https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-linux-aarch_64.zip;df9d45470b0b8cf939dd2f0ec6b88e9cafc4d617
 protoc_mac_universal;https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-osx-universal_binary.zip;23710c3d1c2036d8d65a6a22234372fa2d7af9ef
 psimd;https://github.com/Maratyszcza/psimd/archive/072586a71b55b7f8c584153d223e95687148a900.zip;1f5454b01f06f9656b77e4a5e2e31d7422487013
-pthreadpool;https://github.com/Maratyszcza/pthreadpool/archive/1787867f6183f056420e532eec640cba25efafea.zip;e43e80781560c5ab404a4da20f34d846f5f5d101
+pthreadpool;https://github.com/Maratyszcza/pthreadpool/archive/4fe0e1e183925bf8cfa6aae24237e724a96479b8.zip;07a0aa91dd9bf86f31b95497e00f31d8a261a4bd
 pybind11;https://github.com/pybind/pybind11/archive/refs/tags/v2.10.1.zip;769b6aa67a77f17a770960f604b727645b6f6a13
 pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/959002f82d7962a473d8bf301845f2af720e0aa4.zip;85da3caa60eb2b148613b443fbc2bfdc30689965
 re2;https://github.com/google/re2/archive/refs/tags/2022-06-01.zip;aa77313b76e91b531ee7f3e45f004c6a502a5374
 safeint;https://github.com/dcleblanc/SafeInt/archive/refs/tags/3.0.28.zip;23f252040ff6cb9f1fd18575b32fa8fb5928daac
 tensorboard;https://github.com/tensorflow/tensorboard/archive/373eb09e4c5d2b3cc2493f0949dc4be6b6a45e81.zip;67b833913605a4f3f499894ab11528a702c2b381
-cutlass;https://github.com/NVIDIA/cutlass/archive/refs/tags/v3.0.0.zip;0f95b3c1fc1bd1175c4a90b2c9e39074d1bccefd
+cutlass;https://github.com/NVIDIA/cutlass/archive/refs/tags/v3.1.0.zip;757f90a795034a89d4f48a79d1f009f7a04c8dee
 utf8_range;https://github.com/protocolbuffers/utf8_range/archive/72c943dea2b9240cd09efde15191e144bc7c7d38.zip;9925739c9debc0efa2adcb194d371a35b6a03156
 extensions;https://github.com/microsoft/onnxruntime-extensions/archive/94142d8391c9791ec71c38336436319a2d4ac7a0.zip;4365ac5140338b4cb75a39944a4be276e3829b3c
-composable_kernel;https://github.com/ROCmSoftwarePlatform/composable_kernel/archive/d52ec01652b7d620386251db92455968d8d90bdc.zip;6b5ce8edf3625f8817086c194fbf94b664e1b0e0
\ No newline at end of file
+composable_kernel;https://github.com/ROCmSoftwarePlatform/composable_kernel/archive/5356c4a943a35e74d7cdc69486afcb8703b9a59a.zip;522382c2af437e09124287e5879ab64af5b2e299
diff --git a/cmake/deps_update_and_upload.py b/cmake/deps_update_and_upload.py
new file mode 100644
index 0000000000000..d357284d91225
--- /dev/null
+++ b/cmake/deps_update_and_upload.py
@@ -0,0 +1,56 @@
+# in case deps.txt is updated, run this file to update and upload the dependencies so that CI can use them.
+# Before running the script, increase the version number found at:
+# https://aiinfra.visualstudio.com/Lotus/_artifacts/feed/Lotus/UPack/onnxruntime_build_dependencies/versions
+# Run without --do-upload once to verify downloading. Use --do-upload when you are ready to publish.
+# python cmake/deps_update_and_upload.py --root-path C:/temp/onnxruntime_deps --version 1.0.82 --do-upload
+# update version number in tools\ci_build\github\azure-pipelines\templates\download-deps.yml
+import re
+import subprocess
+import os
+import argparse
+import tempfile
+
+parser = argparse.ArgumentParser(description="Update dependencies and publish to Azure Artifacts")
+parser.add_argument(
+    "--root-path", type=str, default=tempfile.gettempdir(), help="Target root path for downloaded files"
+)
+parser.add_argument("--version", type=str, default="1.0.82", help="Package version to publish")
+parser.add_argument("--do-upload", action="store_true", help="Upload the package to Azure Artifacts")
+args = parser.parse_args()
+
+with open("cmake/deps.txt") as file:
+    text = file.read()
+
+lines = [line for line in text.split("\n") if not line.startswith("#") and ";" in line]
+
+root_path = args.root_path
+
+for line in lines:
+    url = re.sub("^[^;]+?;https://([^;]+?);.*", r"https://\1", line)
+    filename = re.sub("^[^;]+?;https://([^;]+?);.*", r"\1", line)
+    full_path = os.path.join(root_path, filename)
+    subprocess.run(["curl", "-sSL", "--create-dirs", "-o", full_path, url])  # noqa: PLW1510
+
+package_name = "onnxruntime_build_dependencies"
+version = args.version
+
+# Check if the user is logged in to Azure
+result = subprocess.run("az account show", shell=True, capture_output=True, text=True)  # noqa: PLW1510
+if "No subscriptions found" in result.stderr:
+    # Prompt the user to log in to Azure
+    print("You are not logged in to Azure. Please log in to continue.")
+    subprocess.run("az login", shell=True)  # noqa: PLW1510
+
+# Publish the package to Azure Artifacts if --no-upload is not specified
+
+cmd = f'az artifacts universal publish --organization https://dev.azure.com/onnxruntime --feed onnxruntime --name {package_name} --version {version} --description "onnxruntime build time dependencies" --path {root_path}'
+if args.do_upload:
+    subprocess.run(cmd, shell=True)  # noqa: PLW1510
+else:
+    print("would have run: " + cmd)
+
+cmd = f'az artifacts universal publish --organization https://dev.azure.com/aiinfra --feed Lotus --name {package_name} --version {version} --description "onnxruntime build time dependencies" --path {root_path}'
+if args.do_upload:
+    subprocess.run(cmd, shell=True)  # noqa: PLW1510
+else:
+    print("would have run: " + cmd)
diff --git a/cmake/external/abseil-cpp.natvis b/cmake/external/abseil-cpp.natvis
index e923d5862ec2e..1e5a36fb9efb9 100644
--- a/cmake/external/abseil-cpp.natvis
+++ b/cmake/external/abseil-cpp.natvis
@@ -25,30 +25,55 @@
   </Type>
   <!-- Should handle both flat hash_set and hash_map -->
   <Type Name="absl::lts_20230802::container_internal::raw_hash_set&lt;*&gt;">
-    <DisplayString Condition="size_ == 0">empty</DisplayString>
-    <DisplayString>{{ size={size_} }}</DisplayString>
+    <Intrinsic Name="_commonfields" Expression="settings_.value"/>
+    <Intrinsic Name="_size" Expression="settings_.value.compressed_tuple_.value"/>
+    <Intrinsic Name="_capacity" Expression="_commonfields().capacity_"/>
+    <Intrinsic Name="_control" Expression="_commonfields().control_"/>
+    <Intrinsic Name="_slots" Expression="(slot_type*)(_commonfields().slots_)"/>
+    <DisplayString IncludeView="noparens">size={ _size() }</DisplayString>
+    <DisplayString ExcludeView="noparens">size=({_size()})</DisplayString>
     <Expand>
-      <Item Name="[size]" ExcludeView="simple">size_</Item>
-      <Item Name="[capacity]" ExcludeView="simple">capacity_</Item>
-      <CustomListItems MaxItemsPerView="5000">
+      <Item Name="[Size]">_size()</Item>
+      <Item Name="[Capacity]" ExcludeView="noparens">_capacity()</Item>
+      <CustomListItems MaxItemsPerView="100">
         <Variable Name="nslot" InitialValue="0" />
-        <Size>size_</Size>
+        <Size>_size()</Size>
         <Loop>
           <!-- bool IsFull(ctrl_t c) const { return c >= 0; } -->
-          <If Condition="ctrl_[nslot] &gt;= 0">
-            <Item>slots_[nslot]</Item>
+          <If Condition="_control()[nslot] &gt;= 0">
+            <Item>_slots()[nslot]</Item>
           </If>
           <Exec>nslot++</Exec>
-          <Break Condition="nslot == capacity_" />
+          <Break Condition="nslot == _capacity()" />
         </Loop>
       </CustomListItems>
     </Expand>
   </Type>
+
+  <!-- Primitive types stored as a value -->
+  <Type Name="absl::lts_20230802::container_internal::Storage&lt;*,*,0&gt;">
+    <DisplayString IncludeView="noparens">*($T1 *){value}</DisplayString>
+    <DisplayString ExcludeView="noparens">(*($T1 *){value})</DisplayString>
+    <Expand>
+      <ExpandedItem>*($T1 *){value}</ExpandedItem>
+    </Expand>
+  </Type>
+
+  <!-- For storage inherited from the type -->
+  <Type Name="absl::lts_20230802::container_internal::Storage&lt;*,*,1&gt;">
+    <DisplayString IncludeView="noparens">*($T1 *)this</DisplayString>
+    <DisplayString ExcludeView="noparens">(*($T1 *)this)</DisplayString>
+    <Expand>
+      <ExpandedItem>*($T1 *)this</ExpandedItem>
+    </Expand>
+  </Type>
+
   <Type Name="absl::lts_20230802::container_internal::map_slot_type&lt;*&gt;">
-    <DisplayString>{{ {value.first}:{value.second} }}</DisplayString>
+    <DisplayString IncludeView="noparens">{value.first}, {value.second}</DisplayString>
+    <DisplayString ExcludeView="noparens">({value.first}, {value.second})</DisplayString>
     <Expand>
-      <Item Name="[key]" ExcludeView="simple">value.first</Item>
-      <Item Name="[value]" ExcludeView="simple">value.second</Item>
+      <Item Name="first" ExcludeView="simple">value.first</Item>
+      <Item Name="second" ExcludeView="simple">value.second</Item>
     </Expand>
   </Type>
 </AutoVisualizer>
diff --git a/cmake/external/composable_kernel.cmake b/cmake/external/composable_kernel.cmake
index 7168cd1a22c53..b4e6c834c83ab 100644
--- a/cmake/external/composable_kernel.cmake
+++ b/cmake/external/composable_kernel.cmake
@@ -12,13 +12,14 @@ if(NOT composable_kernel_POPULATED)
   FetchContent_Populate(composable_kernel)
   set(BUILD_DEV OFF CACHE BOOL "Disable -Weverything, otherwise, error: 'constexpr' specifier is incompatible with C++98 [-Werror,-Wc++98-compat]" FORCE)
   # Exclude i8 device gemm instances due to excessive long compilation time and not being used
-  set(DTYPES fp32 fp16 bf16)
+  set(DTYPES fp32 fp16 bf16 fp8)
   set(INSTANCES_ONLY ON)
   add_subdirectory(${composable_kernel_SOURCE_DIR} ${composable_kernel_BINARY_DIR} EXCLUDE_FROM_ALL)
 
   add_library(onnxruntime_composable_kernel_includes INTERFACE)
   target_include_directories(onnxruntime_composable_kernel_includes INTERFACE
     ${composable_kernel_SOURCE_DIR}/include
+    ${composable_kernel_BINARY_DIR}/include
     ${composable_kernel_SOURCE_DIR}/library/include)
   target_compile_definitions(onnxruntime_composable_kernel_includes INTERFACE __fp32__ __fp16__ __bf16__)
 endif()
diff --git a/cmake/external/cutlass.cmake b/cmake/external/cutlass.cmake
index 8c5d81d638ced..983eecdd88235 100644
--- a/cmake/external/cutlass.cmake
+++ b/cmake/external/cutlass.cmake
@@ -4,7 +4,6 @@ if (onnxruntime_USE_FLASH_ATTENTION OR onnxruntime_USE_MEMORY_EFFICIENT_ATTENTIO
     cutlass
     URL ${DEP_URL_cutlass}
     URL_HASH SHA1=${DEP_SHA1_cutlass}
-    PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/cutlass/cutlass.patch
   )
 
   FetchContent_GetProperties(cutlass)
diff --git a/cmake/external/eigen.cmake b/cmake/external/eigen.cmake
index 5dd6c8e8dfe84..b123adb624fa4 100644
--- a/cmake/external/eigen.cmake
+++ b/cmake/external/eigen.cmake
@@ -1,23 +1,14 @@
-
 if (onnxruntime_USE_PREINSTALLED_EIGEN)
     add_library(eigen INTERFACE)
     file(TO_CMAKE_PATH ${eigen_SOURCE_PATH} eigen_INCLUDE_DIRS)
     target_include_directories(eigen INTERFACE ${eigen_INCLUDE_DIRS})
 else ()
-    if (onnxruntime_USE_ACL AND (NOT onnxruntime_USE_ACL_2308))
-        FetchContent_Declare(
-            eigen
-            URL ${DEP_URL_eigen}
-            URL_HASH SHA1=${DEP_SHA1_eigen}
-            PATCH_COMMAND ${Patch_EXECUTABLE} --ignore-space-change --ignore-whitespace < ${PROJECT_SOURCE_DIR}/patches/eigen/Fix_Eigen_Build_Break.patch
-        )
-    else()
-        FetchContent_Declare(
-            eigen
-            URL ${DEP_URL_eigen}
-            URL_HASH SHA1=${DEP_SHA1_eigen}
-        )
-    endif()
+    FetchContent_Declare(
+        eigen
+        URL ${DEP_URL_eigen}
+        URL_HASH SHA1=${DEP_SHA1_eigen}
+    )
+
     FetchContent_Populate(eigen)
     set(eigen_INCLUDE_DIRS  "${eigen_SOURCE_DIR}")
 endif()
diff --git a/cmake/external/git.Win32.2.41.03.patch/msys-2.0.dll b/cmake/external/git.Win32.2.41.03.patch/msys-2.0.dll
new file mode 100644
index 0000000000000..686afedb50bf3
Binary files /dev/null and b/cmake/external/git.Win32.2.41.03.patch/msys-2.0.dll differ
diff --git a/cmake/external/git.Win32.2.41.03.patch/msys-gcc_s-1.dll b/cmake/external/git.Win32.2.41.03.patch/msys-gcc_s-1.dll
new file mode 100644
index 0000000000000..1750b9ce92d72
Binary files /dev/null and b/cmake/external/git.Win32.2.41.03.patch/msys-gcc_s-1.dll differ
diff --git a/cmake/external/git.Win32.2.41.03.patch/patch.exe b/cmake/external/git.Win32.2.41.03.patch/patch.exe
new file mode 100644
index 0000000000000..8d784cb5d7e40
Binary files /dev/null and b/cmake/external/git.Win32.2.41.03.patch/patch.exe differ
diff --git a/cmake/external/onnx b/cmake/external/onnx
index e2525550194ce..b86cc54efce19 160000
--- a/cmake/external/onnx
+++ b/cmake/external/onnx
@@ -1 +1 @@
-Subproject commit e2525550194ce3d8a2c4a3af451c9d9b3ae6650e
+Subproject commit b86cc54efce19530fb953e4b21f57e6b3888534c
diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake
index e1671bcf43ed9..0fa5163dc06bf 100644
--- a/cmake/external/onnxruntime_external_deps.cmake
+++ b/cmake/external/onnxruntime_external_deps.cmake
@@ -37,8 +37,12 @@ if (onnxruntime_BUILD_UNIT_TESTS)
     set(gtest_disable_pthreads ON)
   endif()
   set(INSTALL_GTEST OFF CACHE BOOL "" FORCE)
-  # Set it to ON will cause crashes in onnxruntime_test_all when onnxruntime_USE_CUDA is ON
-  set(GTEST_HAS_ABSL OFF CACHE BOOL "" FORCE)
+  if (CMAKE_SYSTEM_NAME STREQUAL "iOS")
+    # Needs to update onnxruntime/test/xctest/xcgtest.mm
+    set(GTEST_HAS_ABSL OFF CACHE BOOL "" FORCE)
+  else()
+    set(GTEST_HAS_ABSL ON CACHE BOOL "" FORCE)
+  endif()
   # gtest and gmock
   FetchContent_Declare(
     googletest
@@ -331,6 +335,7 @@ if(onnxruntime_USE_CUDA)
     URL ${DEP_URL_microsoft_gsl}
     URL_HASH SHA1=${DEP_SHA1_microsoft_gsl}
     PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/gsl/1064.patch
+    FIND_PACKAGE_ARGS 4.0 NAMES Microsoft.GSL
   )
 else()
   FetchContent_Declare(
diff --git a/cmake/external/xnnpack.cmake b/cmake/external/xnnpack.cmake
index 7455584f1a625..e661aa51bfc17 100644
--- a/cmake/external/xnnpack.cmake
+++ b/cmake/external/xnnpack.cmake
@@ -25,17 +25,23 @@ set(FXDIV_SOURCE_DIR ${fxdiv_SOURCE_DIR})
 
 FetchContent_Declare(pthreadpool URL ${DEP_URL_pthreadpool} URL_HASH SHA1=${DEP_SHA1_pthreadpool})
 onnxruntime_fetchcontent_makeavailable(pthreadpool)
-FetchContent_Declare(googlexnnpack URL ${DEP_URL_googlexnnpack}  URL_HASH SHA1=${DEP_SHA1_googlexnnpack}
-PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/xnnpack/AddEmscriptenAndIosSupport.patch)
 
+FetchContent_Declare(googlexnnpack URL ${DEP_URL_googlexnnpack} URL_HASH SHA1=${DEP_SHA1_googlexnnpack}
+                     PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/xnnpack/AddEmscriptenAndIosSupport.patch
+                    )
 onnxruntime_fetchcontent_makeavailable(googlexnnpack)
 set(XNNPACK_DIR ${googlexnnpack_SOURCE_DIR})
 set(XNNPACK_INCLUDE_DIR ${XNNPACK_DIR}/include)
 
 set(onnxruntime_EXTERNAL_LIBRARIES_XNNPACK XNNPACK pthreadpool)
 
+
 # the XNNPACK CMake setup doesn't include the WASM kernels so we have to manually set those up
 if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+  # See source lists in _deps/googlexnnpack-src/BUILD.bazel for wasm_prod_microkernels
+  message("Adding WebAssembly Source Files to XNNPACK")
+  set(wasm_srcs "")
+
   file(READ "${XNNPACK_DIR}/BUILD.bazel" xnnpack_bazel_config)
 
   # Replace newlines with semicolon so that it is treated as a list by CMake
@@ -70,25 +76,23 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
     set(${target_srcs} ${bazel_srcs} PARENT_SCOPE)
   endfunction()
 
-  GetSrcListFromBazel("PROD_SCALAR_WASM_MICROKERNEL_SRCS" prod_scalar_wasm_srcs)
-  GetSrcListFromBazel("ALL_WASM_MICROKERNEL_SRCS" all_wasm_srcs)
-  GetSrcListFromBazel("WASM32_ASM_MICROKERNEL_SRCS" wasm32_asm_srcs)
+  GetSrcListFromBazel("OPERATOR_SRCS" operator_srcs)
+  GetSrcListFromBazel("TABLE_SRCS" table_srcs)
+  list(APPEND wasm_srcs ${operator_srcs} ${table_srcs})
 
-  message(DEBUG "prod_scalar_wasm_srcs: ${prod_scalar_wasm_srcs}\n")
-  message(DEBUG "all_wasm_srcs: ${all_wasm_srcs}\n")
-  message(DEBUG "wasm32_asm_srcs: ${wasm32_asm_srcs}\n")
+  # kernels
+  list(APPEND wasm_srcs ${XNNPACK_DIR}/src/amalgam/gen/scalar.c)
+  list(APPEND wasm_srcs ${XNNPACK_DIR}/src/amalgam/gen/wasm.c)
 
-  message("Adding WebAssembly Source Files to XNNPACK")
-  set(wasm_srcs "")
-  list(APPEND wasm_srcs ${prod_scalar_wasm_srcs})
-  list(APPEND wasm_srcs ${all_wasm_srcs})
-  list(APPEND wasm_srcs ${wasm32_asm_srcs})
+  if(onnxruntime_ENABLE_WEBASSEMBLY_SIMD)
+    list(APPEND wasm_srcs ${XNNPACK_DIR}/src/amalgam/gen/wasmsimd.c)
+    target_compile_options(XNNPACK PRIVATE "-msimd128")
+  endif()
 
+  message(DEBUG "wasm_srcs: ${wasm_srcs}\n")
   target_sources(XNNPACK PRIVATE ${wasm_srcs})
 
-  if(onnxruntime_ENABLE_WEBASSEMBLY_SIMD)
-    GetSrcListFromBazel("ALL_WASMSIMD_MICROKERNEL_SRCS" all_wasmsimd_srcs)
-    message(DEBUG "all_wasmsimd_srcs: ${all_wasmsimd_srcs}")
-    target_sources(XNNPACK PRIVATE ${all_wasmsimd_srcs})
-  endif()
+  # add flags from BAZEL.build
+  target_compile_options(XNNPACK PRIVATE "-fno-fast-math")
+  target_compile_options(XNNPACK PRIVATE "-fno-math-errno")
 endif()
diff --git a/cmake/linux_arm32_crosscompile_toolchain.cmake b/cmake/linux_arm32_crosscompile_toolchain.cmake
new file mode 100644
index 0000000000000..0183262a8875e
--- /dev/null
+++ b/cmake/linux_arm32_crosscompile_toolchain.cmake
@@ -0,0 +1,9 @@
+ #This file is just a sample. You may need to modify it before using.
+ SET(CMAKE_SYSTEM_NAME Linux)
+ SET(CMAKE_SYSTEM_VERSION 1)
+ SET(CMAKE_C_COMPILER arm-none-linux-gnueabihf-gcc)
+ SET(CMAKE_CXX_COMPILER arm-none-linux-gnueabihf-g++)
+ SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+ SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+ SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+ SET(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
\ No newline at end of file
diff --git a/cmake/linux_arm64_crosscompile_toolchain.cmake b/cmake/linux_arm64_crosscompile_toolchain.cmake
new file mode 100644
index 0000000000000..1a492bbc269e7
--- /dev/null
+++ b/cmake/linux_arm64_crosscompile_toolchain.cmake
@@ -0,0 +1,9 @@
+ #This file is just a sample. You may need to modify it before using.
+ SET(CMAKE_SYSTEM_NAME Linux)
+ SET(CMAKE_SYSTEM_VERSION 1)
+ SET(CMAKE_C_COMPILER aarch64-none-linux-gnu-gcc)
+ SET(CMAKE_CXX_COMPILER aarch64-none-linux-gnu-g++)
+ SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+ SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+ SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+ SET(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
\ No newline at end of file
diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake
index 59ebf8eca4306..c900f4d4b09a5 100644
--- a/cmake/onnxruntime.cmake
+++ b/cmake/onnxruntime.cmake
@@ -18,35 +18,21 @@ if (${CMAKE_SYSTEM_NAME} STREQUAL "iOS")
   set(OUTPUT_STYLE xcode)
 endif()
 
-set(ONNXRUNTIME_PUBLIC_HEADERS
-  "${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_c_api.h"
-  "${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_cxx_api.h"
-  "${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_float16.h"
-  "${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_cxx_inline.h"
-  "${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h"
-  "${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h"
-)
-
-if (onnxruntime_ENABLE_TRAINING_APIS)
-  list(APPEND ${_HEADERS} "${REPO_ROOT}/orttraining/orttraining/training_api/include/onnxruntime_training_c_api.h")
-  list(APPEND ${_HEADERS} "${REPO_ROOT}/orttraining/orttraining/training_api/include/onnxruntime_training_cxx_api.h")
-  list(APPEND ${_HEADERS} "${REPO_ROOT}/orttraining/orttraining/training_api/include/onnxruntime_training_cxx_inline.h")
-endif()
-
-# This macro is to get the path of header files for mobile packaging, for iOS and Android
-macro(get_mobile_api_headers _HEADERS)
-  # include both c and cxx api
-  set(${_HEADERS}
+# Gets the public C/C++ API header files
+function(get_c_cxx_api_headers HEADERS_VAR)
+  set(_headers
     "${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_c_api.h"
     "${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_cxx_api.h"
-    "${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_float16.h"
     "${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_cxx_inline.h"
+    "${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_float16.h"
+    "${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h"
+    "${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h"
   )
 
   if (onnxruntime_ENABLE_TRAINING_APIS)
-    list(APPEND ${_HEADERS} "${REPO_ROOT}/orttraining/orttraining/training_api/include/onnxruntime_training_c_api.h")
-    list(APPEND ${_HEADERS} "${REPO_ROOT}/orttraining/orttraining/training_api/include/onnxruntime_training_cxx_api.h")
-    list(APPEND ${_HEADERS} "${REPO_ROOT}/orttraining/orttraining/training_api/include/onnxruntime_training_cxx_inline.h")
+    list(APPEND _headers "${REPO_ROOT}/orttraining/orttraining/training_api/include/onnxruntime_training_c_api.h")
+    list(APPEND _headers "${REPO_ROOT}/orttraining/orttraining/training_api/include/onnxruntime_training_cxx_api.h")
+    list(APPEND _headers "${REPO_ROOT}/orttraining/orttraining/training_api/include/onnxruntime_training_cxx_inline.h")
   endif()
 
   # need to add header files for enabled EPs
@@ -54,10 +40,13 @@ macro(get_mobile_api_headers _HEADERS)
     file(GLOB _provider_headers CONFIGURE_DEPENDS
       "${REPO_ROOT}/include/onnxruntime/core/providers/${f}/*.h"
     )
-    list(APPEND ${_HEADERS} "${_provider_headers}")
-    unset(_provider_headers)
+    list(APPEND _headers ${_provider_headers})
   endforeach()
-endmacro()
+
+  set(${HEADERS_VAR} ${_headers} PARENT_SCOPE)
+endfunction()
+
+get_c_cxx_api_headers(ONNXRUNTIME_PUBLIC_HEADERS)
 
 #If you want to verify if there is any extra line in symbols.txt, run
 # nm -C -g --defined libonnxruntime.so |grep -v '\sA\s' | cut -f 3 -d ' ' | sort
@@ -84,11 +73,9 @@ if(WIN32)
     "${ONNXRUNTIME_ROOT}/core/dll/onnxruntime.rc"
   )
 elseif(onnxruntime_BUILD_APPLE_FRAMEWORK)
-  get_mobile_api_headers(APPLE_FRAMEWORK_HEADERS)
-
   # apple framework requires the header file be part of the library
   onnxruntime_add_shared_library(onnxruntime
-    ${APPLE_FRAMEWORK_HEADERS}
+    ${ONNXRUNTIME_PUBLIC_HEADERS}
     "${CMAKE_CURRENT_BINARY_DIR}/generated_source.c"
   )
 
@@ -107,10 +94,9 @@ elseif(onnxruntime_BUILD_APPLE_FRAMEWORK)
   set_target_properties(onnxruntime PROPERTIES
     FRAMEWORK TRUE
     FRAMEWORK_VERSION A
-    PUBLIC_HEADER "${APPLE_FRAMEWORK_HEADERS}"
-    MACOSX_FRAMEWORK_INFO_PLIST ${CMAKE_CURRENT_BINARY_DIR}/Info.plist
-    VERSION ${ORT_VERSION}
-    SOVERSION  ${ORT_VERSION}
+    MACOSX_FRAMEWORK_INFO_PLIST ${INFO_PLIST_PATH}
+    SOVERSION ${ORT_VERSION}
+    # Note: The PUBLIC_HEADER and VERSION properties for the 'onnxruntime' target will be set later in this file.
   )
 else()
   onnxruntime_add_shared_library(onnxruntime ${CMAKE_CURRENT_BINARY_DIR}/generated_source.c)
@@ -180,11 +166,10 @@ endif()
 
 # we need to copy C/C++ API headers to be packed into Android AAR package
 if(CMAKE_SYSTEM_NAME STREQUAL "Android" AND onnxruntime_BUILD_JAVA)
-  get_mobile_api_headers(ANDROID_AAR_HEADERS)
   set(ANDROID_HEADERS_DIR ${CMAKE_CURRENT_BINARY_DIR}/android/headers)
   file(MAKE_DIRECTORY ${ANDROID_HEADERS_DIR})
   # copy the header files one by one
-  foreach(h_ ${ANDROID_AAR_HEADERS})
+  foreach(h_ ${ONNXRUNTIME_PUBLIC_HEADERS})
     get_filename_component(HEADER_NAME_ ${h_} NAME)
     add_custom_command(TARGET onnxruntime POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different ${h_} ${ANDROID_HEADERS_DIR}/${HEADER_NAME_})
   endforeach()
@@ -232,6 +217,7 @@ if (onnxruntime_USE_EXTENSIONS)
   list(APPEND onnxruntime_INTERNAL_LIBRARIES
     onnxruntime_extensions
     ocos_operators
+    noexcep_operators
   )
 endif()
 
@@ -255,7 +241,7 @@ install(TARGETS onnxruntime
         PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime
         ARCHIVE   DESTINATION ${CMAKE_INSTALL_LIBDIR}
         LIBRARY   DESTINATION ${CMAKE_INSTALL_LIBDIR}
-        RUNTIME   DESTINATION ${CMAKE_INSTALL_LIBDIR}
+        RUNTIME   DESTINATION ${CMAKE_INSTALL_BINDIR}
         FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
 
 
@@ -296,44 +282,73 @@ endif()
 
 # Assemble the Apple static framework (iOS and macOS)
 if(onnxruntime_BUILD_APPLE_FRAMEWORK)
+  set(STATIC_FRAMEWORK_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}-${CMAKE_OSX_SYSROOT})
+
+  # Setup the various directories required. Remove any existing ones so we start with a clean directory.
   set(STATIC_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/static_libraries)
-  file(MAKE_DIRECTORY ${STATIC_LIB_DIR})
+  set(STATIC_LIB_TEMP_DIR ${STATIC_LIB_DIR}/temp)
+  add_custom_command(TARGET onnxruntime PRE_BUILD COMMAND ${CMAKE_COMMAND} -E rm -rf ${STATIC_LIB_DIR})
+  add_custom_command(TARGET onnxruntime PRE_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${STATIC_LIB_DIR})
+  add_custom_command(TARGET onnxruntime PRE_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${STATIC_LIB_TEMP_DIR})
+
+  set(STATIC_FRAMEWORK_DIR ${STATIC_FRAMEWORK_OUTPUT_DIR}/static_framework/onnxruntime.framework)
+  add_custom_command(TARGET onnxruntime PRE_BUILD COMMAND ${CMAKE_COMMAND} -E rm -rf ${STATIC_FRAMEWORK_DIR})
+  add_custom_command(TARGET onnxruntime PRE_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${STATIC_FRAMEWORK_DIR})
 
-  # Remove the existing files in the STATIC_LIB_DIR folder
-  file(GLOB _OLD_STATIC_LIBS ${STATIC_LIB_DIR}/*.a)
-  file(REMOVE "${_OLD_STATIC_LIBS}")
+  # replicate XCode's Single Object Pre-Link
+  # link the internal onnxruntime .o files with the external .a files into a single relocatable object
+  # to enforce symbol visibility. doing it this way limits the symbols included from the .a files to symbols used
+  # by the ORT .o files.
 
-  # Go through all the static libraries, and create symbolic links
-  foreach(_LIB ${onnxruntime_INTERNAL_LIBRARIES} ${onnxruntime_EXTERNAL_LIBRARIES})
+  # If it's an onnxruntime library, extract .o files to a separate directory for each library to avoid any clashes
+  # with filenames (e.g. utils.o)
+  foreach(_LIB ${onnxruntime_INTERNAL_LIBRARIES} )
     GET_TARGET_PROPERTY(_LIB_TYPE ${_LIB} TYPE)
     if(_LIB_TYPE STREQUAL "STATIC_LIBRARY")
-      add_custom_command(TARGET onnxruntime POST_BUILD COMMAND ${CMAKE_COMMAND} -E create_symlink $<TARGET_FILE:${_LIB}> ${STATIC_LIB_DIR}/$<TARGET_LINKER_FILE_NAME:${_LIB}>)
+      set(CUR_STATIC_LIB_OBJ_DIR ${STATIC_LIB_TEMP_DIR}/$<TARGET_LINKER_FILE_BASE_NAME:${_LIB}>)
+      add_custom_command(TARGET onnxruntime POST_BUILD
+                         COMMAND ${CMAKE_COMMAND} -E make_directory ${CUR_STATIC_LIB_OBJ_DIR})
+
+      add_custom_command(TARGET onnxruntime POST_BUILD
+                         COMMAND ar ARGS -x $<TARGET_FILE:${_LIB}>
+                         WORKING_DIRECTORY ${CUR_STATIC_LIB_OBJ_DIR})
     endif()
   endforeach()
 
-  if(${CMAKE_SYSTEM_NAME} STREQUAL "iOS")
-    set(STATIC_FRAMEWORK_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}-${CMAKE_OSX_SYSROOT})
-  else() # macOS
-    set(STATIC_FRAMEWORK_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR})
-  endif()
+  # for external libraries we create a symlink to the .a file
+  foreach(_LIB ${onnxruntime_EXTERNAL_LIBRARIES})
+    GET_TARGET_PROPERTY(_LIB_TYPE ${_LIB} TYPE)
+    if(_LIB_TYPE STREQUAL "STATIC_LIBRARY")
+      add_custom_command(TARGET onnxruntime POST_BUILD
+                         COMMAND ${CMAKE_COMMAND} -E create_symlink
+                           $<TARGET_FILE:${_LIB}> ${STATIC_LIB_DIR}/$<TARGET_LINKER_FILE_NAME:${_LIB}>)
+    endif()
+  endforeach()
 
-  # Assemble the static framework
-  set(STATIC_FRAMEWORK_DIR ${STATIC_FRAMEWORK_OUTPUT_DIR}/static_framework/onnxruntime.framework)
-  set(STATIC_FRAMEWORK_HEADER_DIR ${STATIC_FRAMEWORK_DIR}/Headers)
-  file(MAKE_DIRECTORY ${STATIC_FRAMEWORK_DIR})
-  # Remove all files under STATIC_FRAMEWORK_DIR (if any)
-  file(GLOB_RECURSE _OLD_STATIC_FRAMEWORK ${STATIC_FRAMEWORK_DIR}/*.*)
-  file(REMOVE "${_OLD_STATIC_FRAMEWORK}")
+  # do the pre-link with `ld -r` to create a single relocatable object with correct symbol visibility
+  add_custom_command(TARGET onnxruntime POST_BUILD
+                     COMMAND ld ARGS -r -o ${STATIC_LIB_DIR}/prelinked_objects.o */*.o ../*.a
+                     WORKING_DIRECTORY ${STATIC_LIB_TEMP_DIR})
+
+  # create the static library
+  add_custom_command(TARGET onnxruntime POST_BUILD
+                     COMMAND libtool -static -o ${STATIC_FRAMEWORK_DIR}/onnxruntime prelinked_objects.o
+                     WORKING_DIRECTORY ${STATIC_LIB_DIR})
 
+  # Assemble the other pieces of the static framework
+  add_custom_command(TARGET onnxruntime POST_BUILD
+                     COMMAND ${CMAKE_COMMAND} -E
+                       copy_if_different ${INFO_PLIST_PATH} ${STATIC_FRAMEWORK_DIR}/Info.plist)
+
+  # add the framework header files
+  set(STATIC_FRAMEWORK_HEADER_DIR ${STATIC_FRAMEWORK_DIR}/Headers)
   file(MAKE_DIRECTORY ${STATIC_FRAMEWORK_HEADER_DIR})
 
-  # copy the header files one by one, and the Info.plist
-  foreach(h_ ${APPLE_FRAMEWORK_HEADERS})
+  foreach(h_ ${ONNXRUNTIME_PUBLIC_HEADERS})
     get_filename_component(HEADER_NAME_ ${h_} NAME)
-    add_custom_command(TARGET onnxruntime POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different ${h_} ${STATIC_FRAMEWORK_HEADER_DIR}/${HEADER_NAME_})
+    add_custom_command(TARGET onnxruntime POST_BUILD
+                       COMMAND ${CMAKE_COMMAND} -E
+                         copy_if_different ${h_} ${STATIC_FRAMEWORK_HEADER_DIR}/${HEADER_NAME_})
   endforeach()
-  add_custom_command(TARGET onnxruntime POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different ${INFO_PLIST_PATH} ${STATIC_FRAMEWORK_DIR}/Info.plist)
 
-  # link the static library
-  add_custom_command(TARGET onnxruntime POST_BUILD COMMAND libtool -static -o ${STATIC_FRAMEWORK_DIR}/onnxruntime *.a WORKING_DIRECTORY ${STATIC_LIB_DIR})
 endif()
diff --git a/cmake/onnxruntime_config.h.in b/cmake/onnxruntime_config.h.in
index 2aef9dcf209e0..e3ea767401ddc 100644
--- a/cmake/onnxruntime_config.h.in
+++ b/cmake/onnxruntime_config.h.in
@@ -22,5 +22,5 @@
 #cmakedefine HAS_UNUSED_BUT_SET_VARIABLE
 #cmakedefine HAS_UNUSED_VARIABLE
 #cmakedefine HAS_USELESS_CAST
-#cmakedefine ORT_BUILD_INFO u8"@ORT_BUILD_INFO@"
-#cmakedefine ORT_VERSION u8"@ORT_VERSION@"
+#cmakedefine ORT_BUILD_INFO "@ORT_BUILD_INFO@"
+#cmakedefine ORT_VERSION "@ORT_VERSION@"
diff --git a/cmake/onnxruntime_graph.cmake b/cmake/onnxruntime_graph.cmake
index 735c86956ec4f..3f532ec2c3261 100644
--- a/cmake/onnxruntime_graph.cmake
+++ b/cmake/onnxruntime_graph.cmake
@@ -20,6 +20,8 @@ if (onnxruntime_MINIMAL_BUILD)
     "${ONNXRUNTIME_ROOT}/core/graph/contrib_ops/onnx_deprecated_operators.cc"
     "${ONNXRUNTIME_ROOT}/core/graph/contrib_ops/onnx_function_util.h"
     "${ONNXRUNTIME_ROOT}/core/graph/contrib_ops/onnx_function_util.cc"
+    "${ONNXRUNTIME_ROOT}/core/graph/contrib_ops/shape_inference_functions.h"
+    "${ONNXRUNTIME_ROOT}/core/graph/contrib_ops/shape_inference_functions.cc"
     "${ONNXRUNTIME_ROOT}/core/graph/function_template.h"
     "${ONNXRUNTIME_ROOT}/core/graph/function_utils.h"
     "${ONNXRUNTIME_ROOT}/core/graph/function_utils.cc"
diff --git a/cmake/onnxruntime_mlas.cmake b/cmake/onnxruntime_mlas.cmake
index e0ccc504d7b27..04efa5c2b4f6d 100644
--- a/cmake/onnxruntime_mlas.cmake
+++ b/cmake/onnxruntime_mlas.cmake
@@ -33,6 +33,7 @@ onnxruntime_add_static_library(onnxruntime_mlas
   ${MLAS_SRC_DIR}/qpostprocessor.cpp
   ${MLAS_SRC_DIR}/qlgavgpool.cpp
   ${MLAS_SRC_DIR}/qdwconv_kernelsize.cpp
+  ${MLAS_SRC_DIR}/sqnbitgemm.cpp
 )
 
 if (NOT onnxruntime_ORT_MINIMAL_BUILD)
@@ -68,6 +69,7 @@ function(setup_mlas_source_for_windows)
         ${MLAS_SRC_DIR}/qgemm_kernel_neon.cpp
         ${MLAS_SRC_DIR}/qgemm_kernel_udot.cpp
         ${MLAS_SRC_DIR}/qgemm_kernel_sdot.cpp
+        ${MLAS_SRC_DIR}/sqnbitgemm_kernel_neon.cpp
       )
 
       set(mlas_platform_preprocess_srcs
@@ -334,17 +336,24 @@ else()
           ${MLAS_SRC_DIR}/qgemm_kernel_neon.cpp
           ${MLAS_SRC_DIR}/qgemm_kernel_udot.cpp
           ${MLAS_SRC_DIR}/qgemm_kernel_sdot.cpp
+          ${MLAS_SRC_DIR}/sqnbitgemm_kernel_neon.cpp
         )
         if (NOT APPLE)
           set(mlas_platform_srcs
             ${mlas_platform_srcs}
             ${MLAS_SRC_DIR}/aarch64/HalfGemmKernelNeon.S
+            ${MLAS_SRC_DIR}/aarch64/QgemmS8S8KernelSmmla.S
+            ${MLAS_SRC_DIR}/aarch64/QgemmU8X8KernelUmmla.S
             ${MLAS_SRC_DIR}/activate_fp16.cpp
             ${MLAS_SRC_DIR}/dwconv.cpp
             ${MLAS_SRC_DIR}/halfgemm_kernel_neon.cpp
             ${MLAS_SRC_DIR}/pooling_fp16.cpp
+            ${MLAS_SRC_DIR}/qgemm_kernel_smmla.cpp
+            ${MLAS_SRC_DIR}/qgemm_kernel_ummla.cpp
           )
           set_source_files_properties(${MLAS_SRC_DIR}/aarch64/HalfGemmKernelNeon.S PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+fp16 ")
+          set_source_files_properties(${MLAS_SRC_DIR}/aarch64/QgemmS8S8KernelSmmla.S PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+i8mm ")
+          set_source_files_properties(${MLAS_SRC_DIR}/aarch64/QgemmU8X8KernelUmmla.S PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+i8mm ")
           set_source_files_properties(${MLAS_SRC_DIR}/activate_fp16.cpp PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+fp16 ")
           set_source_files_properties(${MLAS_SRC_DIR}/dwconv.cpp PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+fp16 ")
           set_source_files_properties(${MLAS_SRC_DIR}/pooling_fp16.cpp PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+fp16 ")
@@ -581,7 +590,7 @@ set_target_properties(onnxruntime_mlas PROPERTIES FOLDER "ONNXRuntime")
 if (WIN32)
   target_compile_options(onnxruntime_mlas PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:/wd6385>" "$<$<COMPILE_LANGUAGE:CXX>:/wd4127>")
   if (onnxruntime_ENABLE_STATIC_ANALYSIS)
-    target_compile_options(onnxruntime_mlas PRIVATE  "$<$<COMPILE_LANGUAGE:CXX>:/analyze:stacksize" 131072>)
+    target_compile_options(onnxruntime_mlas PRIVATE  "$<$<COMPILE_LANGUAGE:CXX>:/analyze:stacksize 131072>")
   endif()
 endif()
 
diff --git a/cmake/onnxruntime_optimizer.cmake b/cmake/onnxruntime_optimizer.cmake
index 3da4198573d54..6f09583199ffd 100644
--- a/cmake/onnxruntime_optimizer.cmake
+++ b/cmake/onnxruntime_optimizer.cmake
@@ -86,6 +86,8 @@ if (onnxruntime_ENABLE_TRAINING)
     "${ORTTRAINING_SOURCE_DIR}/core/optimizer/*.cc"
     "${ORTTRAINING_SOURCE_DIR}/core/optimizer/compute_optimizer/*.h"
     "${ORTTRAINING_SOURCE_DIR}/core/optimizer/compute_optimizer/*.cc"
+    "${ORTTRAINING_SOURCE_DIR}/core/optimizer/memory_optimizer/*.h"
+    "${ORTTRAINING_SOURCE_DIR}/core/optimizer/memory_optimizer/*.cc"
   )
 endif()
 
@@ -109,6 +111,9 @@ onnxruntime_add_include_to_target(onnxruntime_optimizer onnxruntime_common onnxr
 target_include_directories(onnxruntime_optimizer PRIVATE ${ONNXRUNTIME_ROOT})
 if (onnxruntime_ENABLE_TRAINING)
   target_include_directories(onnxruntime_optimizer PRIVATE ${ORTTRAINING_ROOT})
+  if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
+    onnxruntime_add_include_to_target(onnxruntime_optimizer Python::Module)
+  endif()
 endif()
 if (onnxruntime_ENABLE_TRITON)
   target_link_libraries(onnxruntime_optimizer PRIVATE nlohmann_json::nlohmann_json)
diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake
index b9e7873132089..8d3ea403fb74b 100644
--- a/cmake/onnxruntime_providers.cmake
+++ b/cmake/onnxruntime_providers.cmake
@@ -59,50 +59,6 @@ function(add_op_reduction_include_dirs target)
 endfunction()
 
 
-file(GLOB_RECURSE onnxruntime_providers_srcs CONFIGURE_DEPENDS
-  "${ONNXRUNTIME_ROOT}/core/providers/cpu/*.h"
-  "${ONNXRUNTIME_ROOT}/core/providers/cpu/*.cc"
-)
-
-if(onnxruntime_DISABLE_ML_OPS)
-  list(FILTER onnxruntime_providers_srcs EXCLUDE REGEX ".*/ml/.*")
-endif()
-
-file(GLOB_RECURSE onnxruntime_cpu_contrib_ops_srcs CONFIGURE_DEPENDS
-  "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/*.h"
-  "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/*.cc"
-)
-
-file(GLOB_RECURSE onnxruntime_cuda_contrib_ops_cc_srcs CONFIGURE_DEPENDS
-  "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/*.h"
-  "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/*.cc"
-)
-
-file(GLOB_RECURSE onnxruntime_cuda_contrib_ops_cu_srcs CONFIGURE_DEPENDS
-  "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/*.cu"
-  "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/*.cuh"
-)
-
-file(GLOB_RECURSE onnxruntime_rocm_contrib_ops_cc_srcs CONFIGURE_DEPENDS
-  "${ONNXRUNTIME_ROOT}/contrib_ops/rocm/*.h"
-  "${ONNXRUNTIME_ROOT}/contrib_ops/rocm/*.cc"
-)
-
-file(GLOB_RECURSE onnxruntime_rocm_contrib_ops_cu_srcs CONFIGURE_DEPENDS
-  "${ONNXRUNTIME_ROOT}/contrib_ops/rocm/*.cu"
-  "${ONNXRUNTIME_ROOT}/contrib_ops/rocm/*.cuh"
-)
-
-file(GLOB_RECURSE onnxruntime_js_contrib_ops_cc_srcs CONFIGURE_DEPENDS
-  "${ONNXRUNTIME_ROOT}/contrib_ops/js/*.h"
-  "${ONNXRUNTIME_ROOT}/contrib_ops/js/*.cc"
-)
-
-file(GLOB onnxruntime_providers_common_srcs CONFIGURE_DEPENDS
-  "${ONNXRUNTIME_ROOT}/core/providers/*.h"
-  "${ONNXRUNTIME_ROOT}/core/providers/*.cc"
-  "${ONNXRUNTIME_ROOT}/core/providers/op_kernel_type_control_overrides.inc"
-)
 if(onnxruntime_USE_VITISAI)
   set(PROVIDERS_VITISAI onnxruntime_providers_vitisai)
 endif()
@@ -155,9 +111,6 @@ endif()
 if(onnxruntime_USE_WEBNN)
   set(PROVIDERS_WEBNN onnxruntime_providers_webnn)
 endif()
-if(onnxruntime_USE_SNPE)
-    include(onnxruntime_snpe_provider.cmake)
-endif()
 if (onnxruntime_USE_CANN)
   set(PROVIDERS_CANN onnxruntime_providers_cann)
 endif()
@@ -165,1725 +118,88 @@ if (onnxruntime_USE_AZURE)
   set(PROVIDERS_AZURE onnxruntime_providers_azure)
 endif()
 
-source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_common_srcs} ${onnxruntime_providers_srcs})
-
-set(onnxruntime_providers_src ${onnxruntime_providers_common_srcs} ${onnxruntime_providers_srcs})
-
-# disable contrib ops conditionally
-if(NOT onnxruntime_DISABLE_CONTRIB_OPS)
-  if (NOT onnxruntime_ENABLE_ATEN)
-    list(REMOVE_ITEM onnxruntime_cpu_contrib_ops_srcs
-      "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/aten_ops/aten_op.h"
-      "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/aten_ops/aten_op.cc"
-      "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/aten_ops/aten_op_executor.cc"
-    )
-  endif()
-  # add using ONNXRUNTIME_ROOT so they show up under the 'contrib_ops' folder in Visual Studio
-  source_group(TREE ${ONNXRUNTIME_ROOT} FILES ${onnxruntime_cpu_contrib_ops_srcs})
-  list(APPEND onnxruntime_providers_src ${onnxruntime_cpu_contrib_ops_srcs})
-endif()
-
-if (onnxruntime_ENABLE_TRAINING_OPS AND NOT onnxruntime_ENABLE_TRAINING)
-  file(GLOB_RECURSE onnxruntime_cpu_training_ops_srcs CONFIGURE_DEPENDS
-    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/*.h"
-    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/*.cc"
-  )
-
-  source_group(TREE ${ORTTRAINING_ROOT}/ FILES ${onnxruntime_cpu_training_ops_srcs})
-  list(APPEND onnxruntime_providers_src ${onnxruntime_cpu_training_ops_srcs})
-
-  file(GLOB_RECURSE onnxruntime_cpu_full_training_only_srcs
-    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/collective/*.cc"
-    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/collective/*.h"
-    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/communication/*.cc"
-    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/communication/*.h"
-    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/controlflow/record.cc"
-    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/controlflow/record.h"
-    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/controlflow/wait.cc"
-    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/controlflow/wait.h"
-    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/controlflow/yield.cc"
-    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/controlflow/yield.h"
-    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/gist/*.cc"
-    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/gist/*.h"
-    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/tensorboard/*.cc"
-    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/tensorboard/*.h"
-    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/torch/*.cc"
-    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/torch/*.h"
-    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/triton/triton_op.cc"
-    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/triton/triton_op.h"
-  )
-
-  list(REMOVE_ITEM onnxruntime_providers_src ${onnxruntime_cpu_full_training_only_srcs})
-endif()
-
-if (onnxruntime_ENABLE_ATEN)
-  file(GLOB_RECURSE onnxruntime_providers_dlpack_srcs CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/core/dlpack/dlpack_converter.cc"
-    "${ONNXRUNTIME_ROOT}/core/dlpack/dlpack_converter.h"
-  )
-  set(onnxruntime_providers_dlpack_srcs ${onnxruntime_providers_dlpack_srcs})
-  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_dlpack_srcs})
-  list(APPEND onnxruntime_providers_src ${onnxruntime_providers_dlpack_srcs})
-endif()
-
-if (onnxruntime_ENABLE_TRAINING)
-  file(GLOB_RECURSE onnxruntime_cpu_training_ops_srcs CONFIGURE_DEPENDS
-    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/*.h"
-    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/*.cc"
-    "${ORTTRAINING_SOURCE_DIR}/core/framework/*.h"
-    "${ORTTRAINING_SOURCE_DIR}/core/framework/*.cc"
-    "${ORTTRAINING_SOURCE_DIR}/core/framework/adasum/*"
-    "${ORTTRAINING_SOURCE_DIR}/core/framework/communication/*"
-  )
-
-  # This is already built in framework.cmake
-  file(GLOB_RECURSE onnxruntime_training_framework_excude_srcs CONFIGURE_DEPENDS
-      "${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.h"
-      "${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.cc"
-      "${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.h"
-      "${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.cc"
-  )
-
-  list(REMOVE_ITEM onnxruntime_cpu_training_ops_srcs ${onnxruntime_training_framework_excude_srcs})
-
-  source_group(TREE ${ORTTRAINING_ROOT}/ FILES ${onnxruntime_cpu_training_ops_srcs})
-  list(APPEND onnxruntime_providers_src ${onnxruntime_cpu_training_ops_srcs})
-endif()
-
-if (onnxruntime_REDUCED_OPS_BUILD)
-  substitute_op_reduction_srcs(onnxruntime_providers_src)
-endif()
-onnxruntime_add_static_library(onnxruntime_providers ${onnxruntime_providers_src})
-if (onnxruntime_REDUCED_OPS_BUILD)
-  add_op_reduction_include_dirs(onnxruntime_providers)
-endif()
-
-if (HAS_BITWISE_INSTEAD_OF_LOGICAL)
-  target_compile_options(onnxruntime_providers PRIVATE "-Wno-bitwise-instead-of-logical")
-endif()
-
-if (MSVC)
-   target_compile_options(onnxruntime_providers PRIVATE "/bigobj")
-#   if(NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
-#      target_compile_options(onnxruntime_providers PRIVATE "/wd4244")
-#   endif()
-endif()
-onnxruntime_add_include_to_target(onnxruntime_providers onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface)
-
-if (onnxruntime_BUILD_MS_EXPERIMENTAL_OPS)
-  target_compile_definitions(onnxruntime_providers PRIVATE BUILD_MS_EXPERIMENTAL_OPS=1)
-endif()
-
-if(HAS_DEPRECATED_COPY)
-  #temporarily ignore this warning
-  #see: https://en.wikipedia.org/wiki/Rule_of_three_(C%2B%2B_programming)
-  set_source_files_properties("${ONNXRUNTIME_ROOT}/core/providers/cpu/math/matmul_integer.cc" PROPERTIES COMPILE_FLAGS -Wno-deprecated-copy)
-  set_source_files_properties("${ONNXRUNTIME_ROOT}/core/providers/cpu/math/quantize_linear_matmul.cc" PROPERTIES COMPILE_FLAGS -Wno-deprecated-copy)
-  set_source_files_properties("${ONNXRUNTIME_ROOT}/core/providers/cpu/nn/qlinearconv.cc" PROPERTIES COMPILE_FLAGS -Wno-deprecated-copy)
-  set_source_files_properties("${ONNXRUNTIME_ROOT}/core/providers/cpu/nn/conv_integer.cc" PROPERTIES COMPILE_FLAGS -Wno-deprecated-copy)
-  set_source_files_properties("${ONNXRUNTIME_ROOT}/core/providers/cpu/generator/random.cc" PROPERTIES COMPILE_FLAGS -Wno-deprecated-copy)
-  set_source_files_properties("${ONNXRUNTIME_ROOT}/core/providers/cpu/tensor/onehot.cc" PROPERTIES COMPILE_FLAGS -Wno-deprecated-copy)
-  set_source_files_properties("${ONNXRUNTIME_ROOT}/core/providers/cpu/tensor/where_op.cc" PROPERTIES COMPILE_FLAGS -Wno-deprecated-copy)
-endif()
-
-# This is enabled only for Adasum files in training mode.
-# The flags won't be applied globally since some high-precision training and inferencing ops will incur precision loss.
-if (onnxruntime_ENABLE_CPU_FP16_OPS)
-  set_source_files_properties(${ORTTRAINING_SOURCE_DIR}/core/framework/adasum/adasum_mpi.cc PROPERTIES COMPILE_FLAGS " -fassociative-math -ffast-math -ftree-vectorize -funsafe-math-optimizations -mf16c -mavx -mfma ")
-  set_source_files_properties(${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/collective/adasum_kernels.cc PROPERTIES COMPILE_FLAGS " -fassociative-math -ffast-math -ftree-vectorize -funsafe-math-optimizations -mf16c -mavx -mfma ")
-  set_source_files_properties(${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/collective/adasum_kernels.cc PROPERTIES COMPILE_FLAGS " -fassociative-math -ffast-math -ftree-vectorize -funsafe-math-optimizations -mf16c -mavx -mfma ")
-endif()
-
-target_include_directories(onnxruntime_providers PRIVATE ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS})
-onnxruntime_add_include_to_target(onnxruntime_providers re2::re2)
-add_dependencies(onnxruntime_providers onnx ${onnxruntime_EXTERNAL_DEPENDENCIES})
-
-if (onnxruntime_ENABLE_TRAINING_OPS)
-  target_include_directories(onnxruntime_providers PRIVATE ${ORTTRAINING_ROOT})
-endif()
-
-if (onnxruntime_ENABLE_ATEN)
-  target_compile_definitions(onnxruntime_providers PRIVATE ENABLE_ATEN)
-  # DLPack is a header-only dependency
-  set(DLPACK_INCLUDE_DIR ${dlpack_SOURCE_DIR}/include)
-  target_include_directories(onnxruntime_providers PRIVATE ${DLPACK_INCLUDE_DIR})
-endif()
-
-if (onnxruntime_ENABLE_TRAINING)
-  add_dependencies(onnxruntime_providers tensorboard)
-  onnxruntime_add_include_to_target(onnxruntime_providers tensorboard)
-  if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP OR onnxruntime_ENABLE_TRITON)
-    onnxruntime_add_include_to_target(onnxruntime_providers Python::Module)
-  endif()
 
-  if (onnxruntime_USE_NCCL OR onnxruntime_USE_MPI)
-    target_include_directories(onnxruntime_providers PUBLIC ${MPI_CXX_INCLUDE_DIRS})
-  endif()
-endif()
-
-install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/cpu/cpu_provider_factory.h  DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
-set_target_properties(onnxruntime_providers PROPERTIES LINKER_LANGUAGE CXX)
-set_target_properties(onnxruntime_providers PROPERTIES FOLDER "ONNXRuntime")
-
-if (NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD
-                                  AND NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin|iOS"
-                                  AND NOT CMAKE_SYSTEM_NAME STREQUAL "Android"
-                                  AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
-  file(GLOB onnxruntime_providers_shared_cc_srcs CONFIGURE_DEPENDS
-  "${ONNXRUNTIME_ROOT}/core/providers/shared/*.h"
-  "${ONNXRUNTIME_ROOT}/core/providers/shared/*.cc"
-  )
-
-  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_shared_cc_srcs})
-  onnxruntime_add_shared_library(onnxruntime_providers_shared ${onnxruntime_providers_shared_cc_srcs} "${ONNXRUNTIME_ROOT}/core/dll/onnxruntime.rc")
-  set_target_properties(onnxruntime_providers_shared PROPERTIES FOLDER "ONNXRuntime")
-  set_target_properties(onnxruntime_providers_shared PROPERTIES LINKER_LANGUAGE CXX)
-
-  target_compile_definitions(onnxruntime_providers_shared PRIVATE VER_MAJOR=${VERSION_MAJOR_PART})
-  target_compile_definitions(onnxruntime_providers_shared PRIVATE VER_MINOR=${VERSION_MINOR_PART})
-  target_compile_definitions(onnxruntime_providers_shared PRIVATE VER_BUILD=${VERSION_BUILD_PART})
-  target_compile_definitions(onnxruntime_providers_shared PRIVATE VER_PRIVATE=${VERSION_PRIVATE_PART})
-  target_compile_definitions(onnxruntime_providers_shared PRIVATE VER_STRING=\"${VERSION_STRING}\")
-  target_compile_definitions(onnxruntime_providers_shared PRIVATE FILE_NAME=\"onnxruntime_providers_shared.dll\")
-
-
-  # On Apple/Unix we don't directly link with this library as we load it with RTLD_GLOBAL, so this is only set to the actual library on WIN32
-  # But, in exchange we need to manually add Boost::mp11 to include dirs for every EP.
-  # It is because "provider_api.h" includes core/framework/op_kernel.h which includes op_kernel.h which includes "boost/mp11.hpp"
-  set(ONNXRUNTIME_PROVIDERS_SHARED)
-
-  if(APPLE)
-  set_property(TARGET onnxruntime_providers_shared APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker -exported_symbols_list ${ONNXRUNTIME_ROOT}/core/providers/shared/exported_symbols.lst")
-  elseif(UNIX)
-  set_property(TARGET onnxruntime_providers_shared APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/shared/version_script.lds -Xlinker --gc-sections")
-  elseif(WIN32)
-  set_property(TARGET onnxruntime_providers_shared APPEND_STRING PROPERTY LINK_FLAGS "-DEF:${ONNXRUNTIME_ROOT}/core/providers/shared/symbols.def")
-  set(ONNXRUNTIME_PROVIDERS_SHARED onnxruntime_providers_shared)
-  else()
-  message(FATAL_ERROR "onnxruntime_providers_shared unknown platform, need to specify shared library exports for it")
-  endif()
-
-  install(TARGETS onnxruntime_providers_shared
-          ARCHIVE  DESTINATION ${CMAKE_INSTALL_LIBDIR}
-          LIBRARY  DESTINATION ${CMAKE_INSTALL_LIBDIR}
-          RUNTIME  DESTINATION ${CMAKE_INSTALL_LIBDIR}
-  )
+if(onnxruntime_USE_SNPE)
+  include(onnxruntime_snpe_provider.cmake)
 endif()
 
+include(onnxruntime_providers_cpu.cmake)
 if (onnxruntime_USE_CUDA)
-  file(GLOB_RECURSE onnxruntime_providers_cuda_cc_srcs CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/core/providers/cuda/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/cuda/*.cc"
-  )
-  # Remove pch files
-  list(REMOVE_ITEM onnxruntime_providers_cuda_cc_srcs
-    "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_pch.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_pch.cc"
-  )
-
-  # The shared_library files are in a separate list since they use precompiled headers, and the above files have them disabled.
-  file(GLOB_RECURSE onnxruntime_providers_cuda_shared_srcs CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.cc"
-  )
-  file(GLOB_RECURSE onnxruntime_providers_cuda_cu_srcs CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/core/providers/cuda/*.cu"
-    "${ONNXRUNTIME_ROOT}/core/providers/cuda/*.cuh"
-  )
-
-  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_cuda_cc_srcs} ${onnxruntime_providers_cuda_shared_srcs} ${onnxruntime_providers_cuda_cu_srcs})
-  set(onnxruntime_providers_cuda_src ${onnxruntime_providers_cuda_cc_srcs} ${onnxruntime_providers_cuda_shared_srcs} ${onnxruntime_providers_cuda_cu_srcs})
-
-  # disable contrib ops conditionally
-  if(NOT onnxruntime_DISABLE_CONTRIB_OPS)
-    if (NOT onnxruntime_ENABLE_ATEN)
-      list(REMOVE_ITEM onnxruntime_cuda_contrib_ops_cc_srcs
-        "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/aten_ops/aten_op.cc"
-      )
-    endif()
-    if (NOT onnxruntime_USE_NCCL)
-      list(REMOVE_ITEM onnxruntime_cuda_contrib_ops_cc_srcs
-        "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/nccl_kernels.cc"
-      )
-    endif()
-    # add using ONNXRUNTIME_ROOT so they show up under the 'contrib_ops' folder in Visual Studio
-    source_group(TREE ${ONNXRUNTIME_ROOT} FILES ${onnxruntime_cuda_contrib_ops_cc_srcs} ${onnxruntime_cuda_contrib_ops_cu_srcs})
-    list(APPEND onnxruntime_providers_cuda_src ${onnxruntime_cuda_contrib_ops_cc_srcs} ${onnxruntime_cuda_contrib_ops_cu_srcs})
-  endif()
-
-  if (onnxruntime_ENABLE_TRAINING_OPS)
-    file(GLOB_RECURSE onnxruntime_cuda_training_ops_cc_srcs CONFIGURE_DEPENDS
-      "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/*.h"
-      "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/*.cc"
-    )
-
-    file(GLOB_RECURSE onnxruntime_cuda_training_ops_cu_srcs CONFIGURE_DEPENDS
-      "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/*.cu"
-      "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/*.cuh"
-    )
-
-    source_group(TREE ${ORTTRAINING_ROOT} FILES ${onnxruntime_cuda_training_ops_cc_srcs} ${onnxruntime_cuda_training_ops_cu_srcs})
-    list(APPEND onnxruntime_providers_cuda_src ${onnxruntime_cuda_training_ops_cc_srcs} ${onnxruntime_cuda_training_ops_cu_srcs})
-
-    if(NOT onnxruntime_ENABLE_TRAINING)
-      file(GLOB_RECURSE onnxruntime_cuda_full_training_only_srcs
-        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/collective/*.cc"
-        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/collective/*.h"
-        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/communication/*.cc"
-        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/communication/*.h"
-        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/controlflow/record.cc"
-        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/controlflow/record.h"
-        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/controlflow/wait.cc"
-        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/controlflow/wait.h"
-        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/controlflow/yield.cc"
-        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/gist/*.cc"
-        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/gist/*.h"
-        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/gist/*.cu"
-        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/torch/*.cc"
-        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/torch/*.h"
-        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/triton/triton_op.cc"
-      )
-
-      list(REMOVE_ITEM onnxruntime_providers_cuda_src ${onnxruntime_cuda_full_training_only_srcs})
-    elseif(WIN32 OR NOT onnxruntime_USE_NCCL)
-      # NCCL is not support in Windows build
-      file(GLOB_RECURSE onnxruntime_cuda_nccl_op_srcs
-        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/collective/nccl_common.cc"
-        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/collective/nccl_kernels.cc"
-        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/collective/megatron.cc"
-      )
-
-      list(REMOVE_ITEM onnxruntime_providers_cuda_src ${onnxruntime_cuda_nccl_op_srcs})
-    endif()
-  endif()
-
-  if (onnxruntime_REDUCED_OPS_BUILD)
-    substitute_op_reduction_srcs(onnxruntime_providers_cuda_src)
-  endif()
-  # cuda_provider_interface.cc is removed from the object target: onnxruntime_providers_cuda_obj and
-  # add to the lib onnxruntime_providers_cuda separatedly.
-  # onnxruntime_providers_cuda_ut can share all the object files with onnxruntime_providers_cuda except cuda_provider_interface.cc.
-  set(cuda_provider_interface_src ${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_provider_interface.cc)
-  list(REMOVE_ITEM onnxruntime_providers_cuda_src ${cuda_provider_interface_src})
-  onnxruntime_add_object_library(onnxruntime_providers_cuda_obj ${onnxruntime_providers_cuda_src})
-  onnxruntime_add_shared_library_module(onnxruntime_providers_cuda ${cuda_provider_interface_src} $<TARGET_OBJECTS:onnxruntime_providers_cuda_obj>)
-  # config_cuda_provider_shared_module can be used to config onnxruntime_providers_cuda_obj, onnxruntime_providers_cuda & onnxruntime_providers_cuda_ut.
-  # This function guarantees that all 3 targets have the same configurations.
-  function(config_cuda_provider_shared_module target)
-    if (onnxruntime_REDUCED_OPS_BUILD)
-      add_op_reduction_include_dirs(${target})
-    endif()
-
-    if (HAS_GUARD_CF)
-      target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler /guard:cf>")
-    endif()
-    if (HAS_QSPECTRE)
-      target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler /Qspectre>")
-    endif()
-    foreach(ORT_FLAG ${ORT_WARNING_FLAGS})
-        target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler \"${ORT_FLAG}\">")
-    endforeach()
-    # CUDA 11.3+ supports parallel compilation
-    # https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#options-for-guiding-compiler-driver-threads
-    if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.3)
-      target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--threads \"${onnxruntime_NVCC_THREADS}\">")
-    endif()
-    if (UNIX)
-      target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler -Wno-reorder>"
-                  "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-Wno-reorder>")
-      target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler -Wno-error=sign-compare>"
-                  "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-Wno-error=sign-compare>")
-    else()
-      #mutex.cuh(91): warning C4834: discarding return value of function with 'nodiscard' attribute
-      target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler /wd4834>")
-      target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler /wd4127>")
-    endif()
-
-    onnxruntime_add_include_to_target(${target} onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers)
-    if (onnxruntime_ENABLE_TRAINING_OPS)
-      onnxruntime_add_include_to_target(${target} onnxruntime_training)
-      if (onnxruntime_ENABLE_TRAINING)
-        target_link_libraries(${target} PRIVATE onnxruntime_training)
-      endif()
-      if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP OR onnxruntime_ENABLE_TRITON)
-        onnxruntime_add_include_to_target(${target} Python::Module)
-      endif()
-    endif()
-
-    add_dependencies(${target} onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES})
-    target_link_libraries(${target} PRIVATE cublasLt cublas cudnn curand cufft ${ABSEIL_LIBS} ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 safeint_interface)
-    if(onnxruntime_CUDNN_HOME)
-      target_include_directories(${target} PRIVATE ${onnxruntime_CUDNN_HOME}/include)
-      target_link_directories(${target} PRIVATE ${onnxruntime_CUDNN_HOME}/lib)
-    endif()
-
-    if (onnxruntime_USE_TRITON_KERNEL)
-      # compile triton kernel, generate .a and .h files
-      include(onnxruntime_compile_triton_kernel.cmake)
-      compile_triton_kernel(triton_kernel_obj_file triton_kernel_header_dir)
-      add_dependencies(${target} onnxruntime_triton_kernel)
-      target_compile_definitions(${target} PRIVATE USE_TRITON_KERNEL)
-      target_include_directories(${target} PRIVATE ${triton_kernel_header_dir})
-      target_link_libraries(${target} PUBLIC -Wl,--whole-archive ${triton_kernel_obj_file} -Wl,--no-whole-archive)
-      # lib cuda needed by cuLaunchKernel
-      target_link_libraries(${target} PRIVATE cuda)
-    endif()
-
-    if (onnxruntime_USE_FLASH_ATTENTION OR onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION)
-      include(cutlass)
-      target_include_directories(${target} PRIVATE ${cutlass_SOURCE_DIR}/include ${cutlass_SOURCE_DIR}/examples)
-    endif()
-
-    target_include_directories(${target} PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR}  ${eigen_INCLUDE_DIRS} ${TVM_INCLUDES} PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
-    # ${CMAKE_CURRENT_BINARY_DIR} is so that #include "onnxruntime_config.h" inside tensor_shape.h is found
-    set_target_properties(${target} PROPERTIES LINKER_LANGUAGE CUDA)
-    set_target_properties(${target} PROPERTIES FOLDER "ONNXRuntime")
-
-    if (onnxruntime_ENABLE_CUDA_PROFILING) # configure cupti for cuda profiling
-      target_include_directories(${target} PRIVATE ${onnxruntime_CUDA_HOME}/extras/CUPTI/include)
-      target_link_directories(${target} PRIVATE ${onnxruntime_CUDA_HOME}/extras/CUPTI/lib64)
-      target_link_libraries(${target} PRIVATE cupti)
-    endif()
-
-    if (onnxruntime_ENABLE_NVTX_PROFILE AND NOT WIN32)
-      target_link_libraries(${target} PRIVATE nvToolsExt)
-    endif()
-
-    if (onnxruntime_ENABLE_TRAINING_OPS)
-      target_include_directories(${target} PRIVATE ${ORTTRAINING_ROOT} ${MPI_CXX_INCLUDE_DIRS})
-    endif()
-
-    if(onnxruntime_USE_MPI)
-      target_link_libraries(${target} PRIVATE ${MPI_LIBRARIES} ${MPI_CXX_LINK_FLAGS})
-    endif()
-
-    if (onnxruntime_USE_NCCL)
-      target_include_directories(${target} PRIVATE ${NCCL_INCLUDE_DIRS})
-      target_link_libraries(${target} PRIVATE ${NCCL_LIBRARIES})
-    endif()
-
-    if (WIN32)
-      # *.cu cannot use PCH
-      if (NOT onnxruntime_BUILD_CACHE)
-        target_precompile_headers(${target} PUBLIC
-          "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_pch.h"
-          "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_pch.cc"
-        )
-      endif()
-
-      # minimize the Windows includes.
-      # this avoids an issue with CUDA 11.6 where 'small' is defined in the windows and cuda headers.
-      target_compile_definitions(${target} PRIVATE "WIN32_LEAN_AND_MEAN")
-
-      # disable a warning from the CUDA headers about unreferenced local functions
-      #target_compile_options(${target} PRIVATE /wd4505)
-      set(onnxruntime_providers_cuda_static_library_flags
-          -IGNORE:4221 # LNK4221: This object file does not define any previously undefined public symbols, so it will not be used by any link operation that consumes this library
-      )
-      set_target_properties(${target} PROPERTIES
-          STATIC_LIBRARY_FLAGS "${onnxruntime_providers_cuda_static_library_flags}")
-    endif()
-
-    if(APPLE)
-      set_property(TARGET ${target} APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker -exported_symbols_list ${ONNXRUNTIME_ROOT}/core/providers/cuda/exported_symbols.lst")
-      target_link_libraries(${target} PRIVATE nsync::nsync_cpp)
-    elseif(UNIX)
-      set_property(TARGET ${target} APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/cuda/version_script.lds -Xlinker --gc-sections")
-      target_link_libraries(${target} PRIVATE nsync::nsync_cpp)
-    elseif(WIN32)
-      set_property(TARGET ${target} APPEND_STRING PROPERTY LINK_FLAGS "-DEF:${ONNXRUNTIME_ROOT}/core/providers/cuda/symbols.def")
-    else()
-      message(FATAL_ERROR "${target} unknown platform, need to specify shared library exports for it")
-    endif()
-
-    if (onnxruntime_ENABLE_ATEN)
-      target_compile_definitions(${target} PRIVATE ENABLE_ATEN)
-    endif()
-  endfunction()
-  config_cuda_provider_shared_module(onnxruntime_providers_cuda_obj)
-  config_cuda_provider_shared_module(onnxruntime_providers_cuda)
-
-  install(TARGETS onnxruntime_providers_cuda
-          ARCHIVE  DESTINATION ${CMAKE_INSTALL_LIBDIR}
-          LIBRARY  DESTINATION ${CMAKE_INSTALL_LIBDIR}
-          RUNTIME  DESTINATION ${CMAKE_INSTALL_BINDIR})
-
+  include(onnxruntime_providers_cuda.cmake)
 endif()
 
 if (onnxruntime_USE_DNNL)
-  file(GLOB_RECURSE onnxruntime_providers_dnnl_cc_srcs CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/core/providers/dnnl/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/dnnl/*.cc"
-    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.cc"
-  )
-
-  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_dnnl_cc_srcs})
-  onnxruntime_add_shared_library_module(onnxruntime_providers_dnnl ${onnxruntime_providers_dnnl_cc_srcs})
-  target_link_directories(onnxruntime_providers_dnnl PRIVATE ${DNNL_LIB_DIR})
-  if (MSVC AND onnxruntime_ENABLE_STATIC_ANALYSIS)
-    # dnnl_convgrad.cc(47,0): Warning C6262: Function uses '38816' bytes of stack:  exceeds /analyze:stacksize '16384'.  Consider moving some data to heap.
-    target_compile_options(onnxruntime_providers_dnnl PRIVATE  "/analyze:stacksize 131072")
-  endif()
-
-  add_dependencies(onnxruntime_providers_dnnl onnxruntime_providers_shared project_dnnl ${onnxruntime_EXTERNAL_DEPENDENCIES})
-  target_include_directories(onnxruntime_providers_dnnl PRIVATE ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS} ${DNNL_INCLUDE_DIR} ${DNNL_OCL_INCLUDE_DIR})
-  # ${CMAKE_CURRENT_BINARY_DIR} is so that #include "onnxruntime_config.h" inside tensor_shape.h is found
-  target_link_libraries(onnxruntime_providers_dnnl PRIVATE dnnl ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 ${ABSEIL_LIBS} ${GSL_TARGET} safeint_interface)
-  install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/dnnl/dnnl_provider_options.h
-    DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
-  set_target_properties(onnxruntime_providers_dnnl PROPERTIES FOLDER "ONNXRuntime")
-  set_target_properties(onnxruntime_providers_dnnl PROPERTIES LINKER_LANGUAGE CXX)
-
-  # Needed for the provider interface, as it includes training headers when training is enabled
-  if (onnxruntime_ENABLE_TRAINING_OPS)
-    target_include_directories(onnxruntime_providers_dnnl PRIVATE ${ORTTRAINING_ROOT})
-  endif()
-
-  # Needed for threadpool handling
-  if(onnxruntime_BUILD_JAVA)
-    add_compile_definitions(DNNL_JAVA)
-  endif()
-
-  if(APPLE)
-    set_property(TARGET onnxruntime_providers_dnnl APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker -exported_symbols_list ${ONNXRUNTIME_ROOT}/core/providers/dnnl/exported_symbols.lst")
-    set_target_properties(onnxruntime_providers_dnnl PROPERTIES
-      INSTALL_RPATH "@loader_path"
-      BUILD_WITH_INSTALL_RPATH TRUE
-      INSTALL_RPATH_USE_LINK_PATH FALSE)
-    target_link_libraries(onnxruntime_providers_dnnl PRIVATE nsync::nsync_cpp)
-  elseif(UNIX)
-    set_property(TARGET onnxruntime_providers_dnnl APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/dnnl/version_script.lds -Xlinker --gc-sections -Xlinker -rpath=\$ORIGIN")
-    target_link_libraries(onnxruntime_providers_dnnl PRIVATE nsync::nsync_cpp)
-  elseif(WIN32)
-    set_property(TARGET onnxruntime_providers_dnnl APPEND_STRING PROPERTY LINK_FLAGS "-DEF:${ONNXRUNTIME_ROOT}/core/providers/dnnl/symbols.def")
-  else()
-    message(FATAL_ERROR "onnxruntime_providers_dnnl unknown platform, need to specify shared library exports for it")
-  endif()
-
-  install(TARGETS onnxruntime_providers_dnnl
-          ARCHIVE  DESTINATION ${CMAKE_INSTALL_LIBDIR}
-          LIBRARY  DESTINATION ${CMAKE_INSTALL_LIBDIR}
-          RUNTIME  DESTINATION ${CMAKE_INSTALL_BINDIR})
+  include(onnxruntime_providers_dnnl.cmake)
 endif()
 
 if (onnxruntime_USE_TENSORRT)
-  add_definitions(-DUSE_TENSORRT=1)
-  if (onnxruntime_TENSORRT_PLACEHOLDER_BUILDER)
-    add_definitions(-DORT_TENSORRT_PLACEHOLDER_BUILDER)
-  endif()
-  set(BUILD_LIBRARY_ONLY 1)
-  add_definitions("-DONNX_ML=1")
-  add_definitions("-DONNX_NAMESPACE=onnx")
-  set(CUDA_INCLUDE_DIRS ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
-  set(TENSORRT_ROOT ${onnxruntime_TENSORRT_HOME})
-  set(OLD_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
-  set(PROTOBUF_LIBRARY ${PROTOBUF_LIB})
-  if (WIN32)
-    add_definitions(-D_SILENCE_EXPERIMENTAL_FILESYSTEM_DEPRECATION_WARNING=1)
-    set(OLD_CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS})
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4099 /wd4551 /wd4505 /wd4515 /wd4706 /wd4456 /wd4324 /wd4701 /wd4804 /wd4702 /wd4458 /wd4703")
-    if (CMAKE_BUILD_TYPE STREQUAL "Debug")
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4805")
-    endif()
-    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -include algorithm")
-    set(DISABLED_WARNINGS_FOR_TRT /wd4456)
-  endif()
-  if ( CMAKE_COMPILER_IS_GNUCC )
-    set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS} -Wno-unused-parameter -Wno-missing-field-initializers")
-  endif()
-  set(CXX_VERSION_DEFINED TRUE)
-
-  # There is an issue when running "Debug build" TRT EP with "Release build" TRT builtin parser on Windows.
-  # We enforce following workaround for now until the real fix.
-  if (WIN32 AND CMAKE_BUILD_TYPE STREQUAL "Debug")
-    set(onnxruntime_USE_TENSORRT_BUILTIN_PARSER OFF)
-    MESSAGE(STATUS "[Note] There is an issue when running \"Debug build\" TRT EP with \"Release build\" TRT built-in parser on Windows. This build will use tensorrt oss parser instead.")
-  endif()
-
-  if (onnxruntime_USE_TENSORRT_BUILTIN_PARSER)
-    # Add TensorRT library
-    find_path(TENSORRT_INCLUDE_DIR NvInfer.h
-      HINTS ${TENSORRT_ROOT}
-      PATH_SUFFIXES include)
-    MESSAGE(STATUS "Found TensorRT headers at ${TENSORRT_INCLUDE_DIR}")
-    find_library(TENSORRT_LIBRARY_INFER nvinfer
-      HINTS ${TENSORRT_ROOT}
-      PATH_SUFFIXES lib lib64 lib/x64)
-    find_library(TENSORRT_LIBRARY_INFER_PLUGIN nvinfer_plugin
-      HINTS  ${TENSORRT_ROOT}
-      PATH_SUFFIXES lib lib64 lib/x64)
-    find_library(TENSORRT_LIBRARY_NVONNXPARSER nvonnxparser
-      HINTS  ${TENSORRT_ROOT}
-      PATH_SUFFIXES lib lib64 lib/x64)
-    set(TENSORRT_LIBRARY ${TENSORRT_LIBRARY_INFER} ${TENSORRT_LIBRARY_INFER_PLUGIN} ${TENSORRT_LIBRARY_NVONNXPARSER})
-    MESSAGE(STATUS "Find TensorRT libs at ${TENSORRT_LIBRARY}")
-  else()
-    FetchContent_Declare(
-      onnx_tensorrt
-      URL ${DEP_URL_onnx_tensorrt}
-      URL_HASH SHA1=${DEP_SHA1_onnx_tensorrt}
-    )
-    # The onnx_tensorrt repo contains a test program, getSupportedAPITest, which doesn't support Windows. It uses
-    # unistd.h. So we must exclude it from our build. onnxruntime_fetchcontent_makeavailable is for the purpose.
-    onnxruntime_fetchcontent_makeavailable(onnx_tensorrt)
-    include_directories(${onnx_tensorrt_SOURCE_DIR})
-    set(CMAKE_CXX_FLAGS ${OLD_CMAKE_CXX_FLAGS})
-    set(CUDA_INCLUDE_DIR ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) # onnx-tensorrt repo needs this variable to build
-    if ( CMAKE_COMPILER_IS_GNUCC )
-      set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS} -Wno-unused-parameter")
-    endif()
-    if (WIN32)
-      set(CMAKE_CUDA_FLAGS ${OLD_CMAKE_CUDA_FLAGS})
-      unset(PROTOBUF_LIBRARY)
-      unset(OLD_CMAKE_CXX_FLAGS)
-      unset(OLD_CMAKE_CUDA_FLAGS)
-      set_target_properties(nvonnxparser PROPERTIES LINK_FLAGS "/ignore:4199")
-      target_compile_options(nvonnxparser_static PRIVATE /FIio.h /wd4100)
-      target_compile_options(nvonnxparser PRIVATE /FIio.h /wd4100)
-    endif()
-    set(onnxparser_link_libs nvonnxparser_static)
-  endif()
-
-  include_directories(${TENSORRT_INCLUDE_DIR})
-  # ${TENSORRT_LIBRARY} is empty if we link nvonnxparser_static.
-  # nvonnxparser_static is linked against tensorrt libraries in onnx-tensorrt
-  # See https://github.com/onnx/onnx-tensorrt/blob/8af13d1b106f58df1e98945a5e7c851ddb5f0791/CMakeLists.txt#L121
-  set(trt_link_libs cudnn cublas ${CMAKE_DL_LIBS} ${TENSORRT_LIBRARY})
-
-  file(GLOB_RECURSE onnxruntime_providers_tensorrt_cc_srcs CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/core/providers/tensorrt/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/tensorrt/*.cc"
-    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.cc"
-    "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_stream_handle.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_stream_handle.cc"
-    "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_graph.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_graph.cc"
-  )
-
-  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_tensorrt_cc_srcs})
-  onnxruntime_add_shared_library_module(onnxruntime_providers_tensorrt ${onnxruntime_providers_tensorrt_cc_srcs})
-  onnxruntime_add_include_to_target(onnxruntime_providers_tensorrt onnxruntime_common onnx flatbuffers::flatbuffers Boost::mp11 safeint_interface)
-  add_dependencies(onnxruntime_providers_tensorrt onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES})
-  if (onnxruntime_USE_TENSORRT_BUILTIN_PARSER)
-    target_link_libraries(onnxruntime_providers_tensorrt PRIVATE ${trt_link_libs} cudart ${ONNXRUNTIME_PROVIDERS_SHARED} ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface ${ABSEIL_LIBS})
-  else()
-    target_link_libraries(onnxruntime_providers_tensorrt PRIVATE ${onnxparser_link_libs} ${trt_link_libs} cudart ${ONNXRUNTIME_PROVIDERS_SHARED} ${PROTOBUF_LIB} flatbuffers::flatbuffers ${ABSEIL_LIBS})
-  endif()
-  target_include_directories(onnxruntime_providers_tensorrt PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} ${eigen_INCLUDE_DIRS} PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
-  if(onnxruntime_CUDNN_HOME)
-    target_include_directories(onnxruntime_providers_tensorrt PRIVATE ${onnxruntime_CUDNN_HOME}/include)
-  endif()
-
-  # ${CMAKE_CURRENT_BINARY_DIR} is so that #include "onnxruntime_config.h" inside tensor_shape.h is found
-  set_target_properties(onnxruntime_providers_tensorrt PROPERTIES PUBLIC_HEADER ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h)
-  set_target_properties(onnxruntime_providers_tensorrt PROPERTIES LINKER_LANGUAGE CUDA)
-  set_target_properties(onnxruntime_providers_tensorrt PROPERTIES FOLDER "ONNXRuntime")
-  target_compile_definitions(onnxruntime_providers_tensorrt PRIVATE ONNXIFI_BUILD_LIBRARY=1)
-  target_compile_options(onnxruntime_providers_tensorrt PRIVATE ${DISABLED_WARNINGS_FOR_TRT})
-  if (WIN32)
-    target_compile_options(onnxruntime_providers_tensorrt INTERFACE /wd4456)
-  endif()
-
-  # Needed for the provider interface, as it includes training headers when training is enabled
-  if (onnxruntime_ENABLE_TRAINING_OPS)
-    target_include_directories(onnxruntime_providers_tensorrt PRIVATE ${ORTTRAINING_ROOT})
-    if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
-      onnxruntime_add_include_to_target(onnxruntime_providers_tensorrt Python::Module)
-    endif()
-  endif()
-
-  if(APPLE)
-    set_property(TARGET onnxruntime_providers_tensorrt APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker -exported_symbols_list ${ONNXRUNTIME_ROOT}/core/providers/tensorrt/exported_symbols.lst")
-    target_link_libraries(onnxruntime_providers_tensorrt PRIVATE nsync::nsync_cpp)
-  elseif(UNIX)
-    set_property(TARGET onnxruntime_providers_tensorrt APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations")
-    set_property(TARGET onnxruntime_providers_tensorrt APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/tensorrt/version_script.lds -Xlinker --gc-sections")
-    target_link_libraries(onnxruntime_providers_tensorrt PRIVATE nsync::nsync_cpp stdc++fs)
-  elseif(WIN32)
-    set_property(TARGET onnxruntime_providers_tensorrt APPEND_STRING PROPERTY LINK_FLAGS "-DEF:${ONNXRUNTIME_ROOT}/core/providers/tensorrt/symbols.def")
-  else()
-    message(FATAL_ERROR "onnxruntime_providers_tensorrt unknown platform, need to specify shared library exports for it")
-  endif()
-
-  install(TARGETS onnxruntime_providers_tensorrt
-          PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime
-          ARCHIVE  DESTINATION ${CMAKE_INSTALL_LIBDIR}
-          LIBRARY  DESTINATION ${CMAKE_INSTALL_LIBDIR}
-          RUNTIME  DESTINATION ${CMAKE_INSTALL_LIBDIR})
+  include(onnxruntime_providers_tensorrt.cmake)
 endif()
 
 if (onnxruntime_USE_VITISAI)
-  if ("${GIT_COMMIT_ID}" STREQUAL "")
-  execute_process(
-    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
-    COMMAND git rev-parse HEAD
-    OUTPUT_VARIABLE GIT_COMMIT_ID
-    OUTPUT_STRIP_TRAILING_WHITESPACE)
-  endif()
-  configure_file(${ONNXRUNTIME_ROOT}/core/providers/vitisai/imp/version_info.hpp.in ${CMAKE_CURRENT_BINARY_DIR}/VitisAI/version_info.h)
-  file(GLOB onnxruntime_providers_vitisai_cc_srcs CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/core/providers/vitisai/*.cc"
-    "${ONNXRUNTIME_ROOT}/core/providers/vitisai/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/vitisai/imp/*.cc"
-    "${ONNXRUNTIME_ROOT}/core/providers/vitisai/imp/*.h"
-  )
-  list(REMOVE_ITEM onnxruntime_providers_vitisai_cc_srcs "${ONNXRUNTIME_ROOT}/core/providers/vitisai/onnxruntime_vitisai_ep_stub.cc")
-  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_vitisai_cc_srcs})
-  onnxruntime_add_static_library(onnxruntime_providers_vitisai ${onnxruntime_providers_vitisai_cc_srcs})
-  onnxruntime_add_include_to_target(onnxruntime_providers_vitisai onnxruntime_common onnxruntime_framework onnx onnx_proto)
-  onnxruntime_add_shared_library(onnxruntime_vitisai_ep ${ONNXRUNTIME_ROOT}/core/providers/vitisai/onnxruntime_vitisai_ep_stub.cc)
-  onnxruntime_add_include_to_target(onnxruntime_vitisai_ep onnxruntime_common)
-  target_include_directories(onnxruntime_vitisai_ep PRIVATE "${ONNXRUNTIME_ROOT}" "${ONNXRUNTIME_ROOT}/core/providers/vitisai/include")
-  target_link_libraries(onnxruntime_providers_vitisai PUBLIC onnxruntime_vitisai_ep PRIVATE onnx protobuf::libprotobuf nlohmann_json::nlohmann_json )
-  target_compile_definitions(onnxruntime_vitisai_ep
-                           PRIVATE "-DONNXRUNTIME_VITISAI_EP_STUB=1" "-DONNXRUNTIME_VITISAI_EP_EXPORT_DLL=1")
-  if(NOT MSVC)
-    target_compile_options(onnxruntime_providers_vitisai PUBLIC $<$<CONFIG:DEBUG>:-U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0>)
-  endif(NOT MSVC)
-
-  target_include_directories(onnxruntime_providers_vitisai PRIVATE "${ONNXRUNTIME_ROOT}/core/providers/vitisai/include" ${XRT_INCLUDE_DIRS} ${CMAKE_CURRENT_BINARY_DIR}/VitisAI)
-  if(MSVC)
-    target_compile_options(onnxruntime_providers_vitisai PRIVATE "/Zc:__cplusplus")
-    # for dll interface warning.
-    target_compile_options(onnxruntime_providers_vitisai PRIVATE "/wd4251")
-    # for unused formal parameter
-    target_compile_options(onnxruntime_providers_vitisai PRIVATE "/wd4100")
-  else(MSVC)
-    target_compile_options(onnxruntime_providers_vitisai PRIVATE -Wno-unused-parameter)
-  endif(MSVC)
-
-  set_target_properties(onnxruntime_providers_vitisai PROPERTIES FOLDER "ONNXRuntime")
-  set_target_properties(onnxruntime_providers_vitisai PROPERTIES LINKER_LANGUAGE CXX)
-
-  if (NOT onnxruntime_BUILD_SHARED_LIB)
-    install(TARGETS onnxruntime_providers_vitisai
-            ARCHIVE   DESTINATION ${CMAKE_INSTALL_LIBDIR}
-            LIBRARY   DESTINATION ${CMAKE_INSTALL_LIBDIR}
-            RUNTIME   DESTINATION ${CMAKE_INSTALL_BINDIR}
-            FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
-  endif()
+  include(onnxruntime_providers_vitisai.cmake)
 endif()
 
 if (onnxruntime_USE_OPENVINO)
-
-#  include_directories("${CMAKE_CURRENT_BINARY_DIR}/onnx")
-  file(GLOB_RECURSE onnxruntime_providers_openvino_cc_srcs CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/core/providers/openvino/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/openvino/*.cc"
-    "${ONNXRUNTIME_ROOT}/core/providers/openvino/*.hpp"
-    "${ONNXRUNTIME_ROOT}/core/providers/openvino/*.cpp"
-    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.cc"
-  )
-
-  if (WIN32)
-      set(CMAKE_MAP_IMPORTED_CONFIG_RELWITHDEBINFO Release)
-  endif()
-
-  # Header paths
-  find_package(InferenceEngine REQUIRED)
-  find_package(ngraph REQUIRED)
-
-  if (OPENVINO_2022_1 OR OPENVINO_2022_2)
-  find_package(OpenVINO REQUIRED COMPONENTS Runtime ONNX)
-  list (OV_20_LIBS openvino::frontend::onnx openvino::runtime)
-  endif()
-
-  if (WIN32)
-    unset(CMAKE_MAP_IMPORTED_CONFIG_RELWITHDEBINFO)
-  endif()
-
-  if ((DEFINED ENV{OPENCL_LIBS}) AND (DEFINED ENV{OPENCL_INCS}))
-    add_definitions(-DIO_BUFFER_ENABLED=1)
-    list(APPEND OPENVINO_LIB_LIST $ENV{OPENCL_LIBS} ${OV_20_LIBS} ${InferenceEngine_LIBRARIES} ${NGRAPH_LIBRARIES} ngraph::onnx_importer ${PYTHON_LIBRARIES})
-  else()
-    list(APPEND OPENVINO_LIB_LIST ${OV_20_LIBS} ${InferenceEngine_LIBRARIES} ${NGRAPH_LIBRARIES} ngraph::onnx_importer ${PYTHON_LIBRARIES})
-  endif()
-
-  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_openvino_cc_srcs})
-  onnxruntime_add_shared_library_module(onnxruntime_providers_openvino ${onnxruntime_providers_openvino_cc_srcs} "${ONNXRUNTIME_ROOT}/core/dll/onnxruntime.rc")
-  onnxruntime_add_include_to_target(onnxruntime_providers_openvino onnxruntime_common onnx)
-  install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/openvino/openvino_provider_factory.h
-    DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
-  set_target_properties(onnxruntime_providers_openvino PROPERTIES LINKER_LANGUAGE CXX)
-  set_target_properties(onnxruntime_providers_openvino PROPERTIES FOLDER "ONNXRuntime")
-  if(NOT MSVC)
-    target_compile_options(onnxruntime_providers_openvino PRIVATE "-Wno-parentheses")
-  endif()
-  add_dependencies(onnxruntime_providers_openvino onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES})
-  target_include_directories(onnxruntime_providers_openvino SYSTEM PUBLIC ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} ${eigen_INCLUDE_DIRS} ${OpenVINO_INCLUDE_DIR} ${OPENVINO_INCLUDE_DIR_LIST} ${PYTHON_INCLUDE_DIRS} $ENV{OPENCL_INCS} $ENV{OPENCL_INCS}/../../cl_headers/)
-  target_link_libraries(onnxruntime_providers_openvino ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 ${OPENVINO_LIB_LIST} ${ABSEIL_LIBS})
-
-  target_compile_definitions(onnxruntime_providers_openvino PRIVATE VER_MAJOR=${VERSION_MAJOR_PART})
-  target_compile_definitions(onnxruntime_providers_openvino PRIVATE VER_MINOR=${VERSION_MINOR_PART})
-  target_compile_definitions(onnxruntime_providers_openvino PRIVATE VER_BUILD=${VERSION_BUILD_PART})
-  target_compile_definitions(onnxruntime_providers_openvino PRIVATE VER_PRIVATE=${VERSION_PRIVATE_PART})
-  target_compile_definitions(onnxruntime_providers_openvino PRIVATE VER_STRING=\"${VERSION_STRING}\")
-  target_compile_definitions(onnxruntime_providers_openvino PRIVATE FILE_NAME=\"onnxruntime_providers_openvino.dll\")
-
-  if(MSVC)
-    target_compile_options(onnxruntime_providers_openvino PUBLIC /wd4099 /wd4275 /wd4100 /wd4005 /wd4244 /wd4267)
-  endif()
-
-  # Needed for the provider interface, as it includes training headers when training is enabled
-  if (onnxruntime_ENABLE_TRAINING_OPS)
-    target_include_directories(onnxruntime_providers_openvino PRIVATE ${ORTTRAINING_ROOT})
-  endif()
-
-  if(APPLE)
-    set_property(TARGET onnxruntime_providers_openvino APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker -exported_symbols_list ${ONNXRUNTIME_ROOT}/core/providers/openvino/exported_symbols.lst")
-  elseif(UNIX)
-    set_property(TARGET onnxruntime_providers_openvino APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/openvino/version_script.lds -Xlinker --gc-sections")
-  elseif(WIN32)
-    set_property(TARGET onnxruntime_providers_openvino APPEND_STRING PROPERTY LINK_FLAGS "-DEF:${ONNXRUNTIME_ROOT}/core/providers/openvino/symbols.def")
-  else()
-    message(FATAL_ERROR "onnxruntime_providers_openvino unknown platform, need to specify shared library exports for it")
-  endif()
-
-  install(TARGETS onnxruntime_providers_openvino
-          ARCHIVE  DESTINATION ${CMAKE_INSTALL_LIBDIR}
-          LIBRARY  DESTINATION ${CMAKE_INSTALL_LIBDIR}
-          RUNTIME  DESTINATION ${CMAKE_INSTALL_BINDIR})
+  include(onnxruntime_providers_openvino.cmake)
 endif()
 
 if (onnxruntime_USE_COREML)
-  if (onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD)
-    message(FATAL_ERROR "CoreML EP can not be used in a basic minimal build. Please build with '--minimal_build extended'")
-  endif()
-
-  add_compile_definitions(USE_COREML=1)
-
-  # Compile CoreML proto definition to ${CMAKE_CURRENT_BINARY_DIR}/coreml
-  if (CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
-    set(COREML_PROTO_ROOT ${PROJECT_SOURCE_DIR}/../onnxruntime/core/providers/coreml/mlmodel_format)
-    file(GLOB coreml_proto_srcs
-      "${COREML_PROTO_ROOT}/*.proto"
-    )
-    onnxruntime_add_static_library(onnxruntime_coreml_proto ${coreml_proto_srcs})
-    target_include_directories(onnxruntime_coreml_proto PUBLIC $<TARGET_PROPERTY:${PROTOBUF_LIB},INTERFACE_INCLUDE_DIRECTORIES> "${CMAKE_CURRENT_BINARY_DIR}")
-    target_compile_definitions(onnxruntime_coreml_proto PUBLIC $<TARGET_PROPERTY:${PROTOBUF_LIB},INTERFACE_COMPILE_DEFINITIONS>)
-    set_target_properties(onnxruntime_coreml_proto PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
-    set_target_properties(onnxruntime_coreml_proto PROPERTIES COMPILE_FLAGS "-fvisibility-inlines-hidden")
-    set(_src_sub_dir "coreml/")
-    onnxruntime_protobuf_generate(
-      APPEND_PATH
-      GEN_SRC_SUB_DIR ${_src_sub_dir}
-      IMPORT_DIRS ${COREML_PROTO_ROOT}
-      TARGET onnxruntime_coreml_proto
-    )
-
-    if (NOT onnxruntime_BUILD_SHARED_LIB)
-      install(TARGETS onnxruntime_coreml_proto
-              ARCHIVE   DESTINATION ${CMAKE_INSTALL_LIBDIR}
-              LIBRARY   DESTINATION ${CMAKE_INSTALL_LIBDIR}
-              RUNTIME   DESTINATION ${CMAKE_INSTALL_BINDIR}
-              FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR}
-      )
-    endif()
-  endif()
-
-  # These are shared utils,
-  # TODO, move this to a separated lib when used by EPs other than NNAPI and CoreML
-  file(GLOB_RECURSE onnxruntime_providers_shared_utils_cc_srcs CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.cc"
-  )
-
-  file(GLOB
-    onnxruntime_providers_coreml_cc_srcs_top CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/core/providers/coreml/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/coreml/*.cc"
-  )
-
-  # Add builder source code
-  file(GLOB_RECURSE
-    onnxruntime_providers_coreml_cc_srcs_nested CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/core/providers/coreml/builders/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/coreml/builders/*.cc"
-  )
-  if (NOT CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND NOT CMAKE_SYSTEM_NAME STREQUAL "iOS")
-    list(REMOVE_ITEM onnxruntime_providers_coreml_cc_srcs_nested
-    "${ONNXRUNTIME_ROOT}/core/providers/coreml/builders/model_builder.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/coreml/builders/model_builder.cc"
-    )
-  endif()
-
-  # Add CoreML objective c++ source code
-  if (CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
-    file(GLOB
-      onnxruntime_providers_coreml_objcc_srcs CONFIGURE_DEPENDS
-      "${ONNXRUNTIME_ROOT}/core/providers/coreml/model/model.h"
-      "${ONNXRUNTIME_ROOT}/core/providers/coreml/model/model.mm"
-      "${ONNXRUNTIME_ROOT}/core/providers/coreml/model/host_utils.h"
-      "${ONNXRUNTIME_ROOT}/core/providers/coreml/model/host_utils.mm"
-    )
-  endif()
-
-  set(onnxruntime_providers_coreml_cc_srcs
-    ${onnxruntime_providers_coreml_cc_srcs_top}
-    ${onnxruntime_providers_coreml_cc_srcs_nested}
-    ${onnxruntime_providers_shared_utils_cc_srcs}
-  )
-
-  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_coreml_cc_srcs})
-  onnxruntime_add_static_library(onnxruntime_providers_coreml
-    ${onnxruntime_providers_coreml_cc_srcs} ${onnxruntime_providers_coreml_objcc_srcs}
-  )
-  onnxruntime_add_include_to_target(onnxruntime_providers_coreml
-    onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB}  flatbuffers::flatbuffers Boost::mp11 safeint_interface
-  )
-  if (CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
-    onnxruntime_add_include_to_target(onnxruntime_providers_coreml onnxruntime_coreml_proto)
-    target_link_libraries(onnxruntime_providers_coreml PRIVATE onnxruntime_coreml_proto "-framework Foundation" "-framework CoreML")
-    add_dependencies(onnxruntime_providers_coreml onnxruntime_coreml_proto)
-  endif()
-  add_dependencies(onnxruntime_providers_coreml ${onnxruntime_EXTERNAL_DEPENDENCIES})
-
-  set_target_properties(onnxruntime_providers_coreml PROPERTIES CXX_STANDARD_REQUIRED ON)
-  set_target_properties(onnxruntime_providers_coreml PROPERTIES FOLDER "ONNXRuntime")
-  target_include_directories(onnxruntime_providers_coreml PRIVATE ${ONNXRUNTIME_ROOT} ${coreml_INCLUDE_DIRS})
-  set_target_properties(onnxruntime_providers_coreml PROPERTIES LINKER_LANGUAGE CXX)
-
-  if (NOT onnxruntime_BUILD_SHARED_LIB)
-    install(TARGETS onnxruntime_providers_coreml
-            ARCHIVE   DESTINATION ${CMAKE_INSTALL_LIBDIR}
-            LIBRARY   DESTINATION ${CMAKE_INSTALL_LIBDIR}
-            RUNTIME   DESTINATION ${CMAKE_INSTALL_BINDIR}
-            FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
-  endif()
+  include(onnxruntime_providers_coreml.cmake)
 endif()
 
 if (onnxruntime_USE_WEBNN)
-  if (onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD)
-    message(FATAL_ERROR "WebNN EP can not be used in a basic minimal build. Please build with '--minimal_build extended'")
-  endif()
-
-  add_compile_definitions(USE_WEBNN=1)
-  if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
-    add_definitions(-DENABLE_WEBASSEMBLY_THREADS=1)
-  endif()
-  file(GLOB_RECURSE onnxruntime_providers_webnn_cc_srcs CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/core/providers/webnn/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/webnn/*.cc"
-    "${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.cc"
-  )
-
-  source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_providers_webnn_cc_srcs})
-  onnxruntime_add_static_library(onnxruntime_providers_webnn ${onnxruntime_providers_webnn_cc_srcs})
-  onnxruntime_add_include_to_target(onnxruntime_providers_webnn onnxruntime_common onnx onnx_proto flatbuffers::flatbuffers Boost::mp11 safeint_interface)
-
-  add_dependencies(onnxruntime_providers_webnn onnx ${onnxruntime_EXTERNAL_DEPENDENCIES})
-  set_target_properties(onnxruntime_providers_webnn PROPERTIES FOLDER "ONNXRuntime")
-  set_target_properties(onnxruntime_providers_webnn PROPERTIES LINKER_LANGUAGE CXX)
+  include(onnxruntime_providers_webnn.cmake)
 endif()
 
 if (onnxruntime_USE_NNAPI_BUILTIN)
-  if (onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD)
-    message(FATAL_ERROR "NNAPI can not be used in a basic minimal build. Please build with '--minimal_build extended'")
-  endif()
-
-  add_compile_definitions(USE_NNAPI=1)
-
-  # This is the minimum Android API Level required by ORT NNAPI EP to run
-  # ORT running on any host system with Android API level less than this will fall back to CPU EP
-  if(onnxruntime_NNAPI_MIN_API)
-    add_compile_definitions(ORT_NNAPI_MIN_API_LEVEL=${onnxruntime_NNAPI_MIN_API})
-  endif()
-
-  # This is the maximum Android API level supported in the ort model conversion for NNAPI EP
-  # Note: This is only for running NNAPI for ort format model conversion on non-Android system since we cannot
-  #       get the actually Android system version.
-  if(onnxruntime_NNAPI_HOST_API)
-    if(CMAKE_SYSTEM_NAME STREQUAL "Android")
-      message(FATAL_ERROR "onnxruntime_NNAPI_HOST_API should only be set for non-Android target")
-    endif()
-    add_compile_definitions(ORT_NNAPI_MAX_SUPPORTED_API_LEVEL=${onnxruntime_NNAPI_HOST_API})
-  endif()
-
-  set(onnxruntime_provider_nnapi_cc_src_patterns
-      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/*.h"
-      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/*.cc"
-      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/*.h"
-      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/*.cc"
-      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/builders/*.h"
-      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/builders/*.cc"
-      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/builders/impl/*.h"
-      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/builders/impl/*.cc"
-      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksTypes.h"
-      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksWrapper.h"
-      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksWrapper.cc"
-      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/nnapi_lib/nnapi_implementation.h"
-  )
-
-  # On Android, use the actual NNAPI implementation.
-  # Otherwise, use a stub implementation to support some unit testing.
-  if(CMAKE_SYSTEM_NAME STREQUAL "Android")
-    list(APPEND onnxruntime_provider_nnapi_cc_src_patterns
-         "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/nnapi_lib/nnapi_implementation.cc")
-  else()
-    list(APPEND onnxruntime_provider_nnapi_cc_src_patterns
-         "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/nnapi_lib/nnapi_implementation_stub.cc")
-  endif()
-
-  # These are shared utils,
-  # TODO, move this to a separated lib when used by EPs other than NNAPI and CoreML
-  list(APPEND onnxruntime_provider_nnapi_cc_src_patterns
-    "${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.cc"
-    "${ONNXRUNTIME_ROOT}/core/providers/shared/node_unit/node_unit.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/shared/node_unit/node_unit.cc"
-  )
-
-  file(GLOB onnxruntime_providers_nnapi_cc_srcs CONFIGURE_DEPENDS ${onnxruntime_provider_nnapi_cc_src_patterns})
-
-  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_nnapi_cc_srcs})
-  onnxruntime_add_static_library(onnxruntime_providers_nnapi ${onnxruntime_providers_nnapi_cc_srcs})
-  onnxruntime_add_include_to_target(onnxruntime_providers_nnapi
-    onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface
-  )
-  target_link_libraries(onnxruntime_providers_nnapi)
-  add_dependencies(onnxruntime_providers_nnapi onnx ${onnxruntime_EXTERNAL_DEPENDENCIES})
-  set_target_properties(onnxruntime_providers_nnapi PROPERTIES CXX_STANDARD_REQUIRED ON)
-  set_target_properties(onnxruntime_providers_nnapi PROPERTIES FOLDER "ONNXRuntime")
-  target_include_directories(onnxruntime_providers_nnapi PRIVATE ${ONNXRUNTIME_ROOT} ${nnapi_INCLUDE_DIRS})
-  set_target_properties(onnxruntime_providers_nnapi PROPERTIES LINKER_LANGUAGE CXX)
-  # ignore the warning unknown-pragmas on "pragma region"
-  if(NOT MSVC)
-    target_compile_options(onnxruntime_providers_nnapi PRIVATE "-Wno-unknown-pragmas")
-  endif()
-
-  if (NOT onnxruntime_BUILD_SHARED_LIB)
-    install(TARGETS onnxruntime_providers_nnapi
-            ARCHIVE   DESTINATION ${CMAKE_INSTALL_LIBDIR}
-            LIBRARY   DESTINATION ${CMAKE_INSTALL_LIBDIR}
-            RUNTIME   DESTINATION ${CMAKE_INSTALL_BINDIR}
-            FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
-  endif()
+  include(onnxruntime_providers_nnapi.cmake)
 endif()
 
 if (onnxruntime_USE_JSEP)
-  add_compile_definitions(USE_JSEP=1)
-
-  file(GLOB_RECURSE onnxruntime_providers_js_cc_srcs
-    "${ONNXRUNTIME_ROOT}/core/providers/js/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/js/*.cc"
-  )
-  if(NOT onnxruntime_DISABLE_CONTRIB_OPS)
-    source_group(TREE ${ONNXRUNTIME_ROOT} FILES ${onnxruntime_js_contrib_ops_cc_srcs})
-    list(APPEND onnxruntime_providers_js_cc_srcs ${onnxruntime_js_contrib_ops_cc_srcs})
-  endif()
-
-  source_group(TREE ${ONNXRUNTIME_ROOT} FILES ${onnxruntime_providers_js_cc_srcs})
-  onnxruntime_add_static_library(onnxruntime_providers_js ${onnxruntime_providers_js_cc_srcs})
-  onnxruntime_add_include_to_target(onnxruntime_providers_js
-    onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers Boost::mp11
-  )
-  target_include_directories(onnxruntime_providers_js PRIVATE  ${eigen_INCLUDE_DIRS})
-  add_dependencies(onnxruntime_providers_js ${onnxruntime_EXTERNAL_DEPENDENCIES})
-
+  include(onnxruntime_providers_js.cmake)
 endif()
 
 if (onnxruntime_USE_QNN)
-  add_compile_definitions(USE_QNN=1)
-
-  # These are shared utils,
-  # TODO, move this to a separated lib when used by EPs other than QNN, NNAPI and CoreML
-  file(GLOB_RECURSE onnxruntime_providers_shared_utils_cc_srcs CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.cc"
-    "${ONNXRUNTIME_ROOT}/core/providers/shared/node_unit/node_unit.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/shared/node_unit/node_unit.cc"
-  )
-
-  file(GLOB_RECURSE
-    onnxruntime_providers_qnn_ep_cc_srcs CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/core/providers/qnn/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/qnn/*.cc"
-  )
-
-  file(GLOB_RECURSE
-    onnxruntime_providers_qnn_builder_cc_srcs CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/core/providers/qnn/builder/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/qnn/builder/*.cc"
-  )
-
-  set(onnxruntime_providers_qnn_cc_srcs
-    ${onnxruntime_providers_shared_utils_cc_srcs}
-    ${onnxruntime_providers_qnn_ep_cc_srcs}
-    ${onnxruntime_providers_qnn_builder_cc_srcs}
-  )
-
-  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_qnn_cc_srcs})
-  onnxruntime_add_static_library(onnxruntime_providers_qnn ${onnxruntime_providers_qnn_cc_srcs})
-  onnxruntime_add_include_to_target(onnxruntime_providers_qnn onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf-lite flatbuffers::flatbuffers Boost::mp11)
-  target_link_libraries(onnxruntime_providers_qnn)
-  add_dependencies(onnxruntime_providers_qnn onnx ${onnxruntime_EXTERNAL_DEPENDENCIES})
-  set_target_properties(onnxruntime_providers_qnn PROPERTIES CXX_STANDARD_REQUIRED ON)
-  set_target_properties(onnxruntime_providers_qnn PROPERTIES FOLDER "ONNXRuntime")
-  target_include_directories(onnxruntime_providers_qnn PRIVATE ${ONNXRUNTIME_ROOT} ${onnxruntime_QNN_HOME}/include/QNN ${onnxruntime_QNN_HOME}/include)
-  set_target_properties(onnxruntime_providers_qnn PROPERTIES LINKER_LANGUAGE CXX)
-  # ignore the warning unknown-pragmas on "pragma region"
-  if(NOT MSVC)
-    target_compile_options(onnxruntime_providers_qnn PRIVATE "-Wno-unknown-pragmas")
-  endif()
+  include(onnxruntime_providers_qnn.cmake)
 endif()
 
 if (onnxruntime_USE_RKNPU)
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-variable -Wno-unused-parameter")
-  add_definitions(-DUSE_RKNPU=1)
-  option(DNN_READ_ONNX "" ON)
-  set(DNN_CUSTOM_PROTOC_EXECUTABLE ${ONNX_CUSTOM_PROTOC_EXECUTABLE})
-  option(DNN_CMAKE_INSTALL "" OFF)
-  option(DNN_BUILD_BIN "" OFF)
-  if (NOT RKNPU_DDK_PATH)
-    message(FATAL_ERROR "RKNPU_DDK_PATH required for onnxruntime_USE_RKNPU")
-  endif()
-  set(RKNPU_DDK_INCLUDE_DIR ${RKNPU_DDK_PATH}/include)
-  if (CMAKE_SIZEOF_VOID_P EQUAL 8)
-    set(RKNPU_DDK_LIB_DIR ${RKNPU_DDK_PATH}/lib64)
-  else()
-    set(RKNPU_DDK_LIB_DIR ${RKNPU_DDK_PATH}/lib)
-  endif()
-  file(GLOB_RECURSE
-    onnxruntime_providers_rknpu_cc_srcs CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/core/providers/rknpu/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/rknpu/*.cc"
-  )
-  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_rknpu_cc_srcs})
-  onnxruntime_add_static_library(onnxruntime_providers_rknpu ${onnxruntime_providers_rknpu_cc_srcs})
-  onnxruntime_add_include_to_target(onnxruntime_providers_rknpu
-    onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface
-  )
-  target_link_libraries(onnxruntime_providers_rknpu PRIVATE -lrknpu_ddk)
-  add_dependencies(onnxruntime_providers_rknpu onnx ${onnxruntime_EXTERNAL_DEPENDENCIES})
-  set_target_properties(onnxruntime_providers_rknpu PROPERTIES FOLDER "ONNXRuntime")
-  target_include_directories(onnxruntime_providers_rknpu PRIVATE
-    ${ONNXRUNTIME_ROOT} ${rknpu_INCLUDE_DIRS} ${RKNPU_DDK_INCLUDE_DIR}
-  )
-  link_directories(onnxruntime_providers_rknpu ${RKNPU_DDK_LIB_DIR})
-  set_target_properties(onnxruntime_providers_rknpu PROPERTIES LINKER_LANGUAGE CXX)
-
-  if (NOT onnxruntime_BUILD_SHARED_LIB)
-    install(TARGETS onnxruntime_providers_rknpu
-            ARCHIVE   DESTINATION ${CMAKE_INSTALL_LIBDIR}
-            LIBRARY   DESTINATION ${CMAKE_INSTALL_LIBDIR}
-            RUNTIME   DESTINATION ${CMAKE_INSTALL_BINDIR}
-            FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
-  endif()
+  include(onnxruntime_providers_rknpu.cmake)
 endif()
 
 if (onnxruntime_USE_DML)
-  file(GLOB_RECURSE onnxruntime_providers_dml_cc_srcs CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/core/providers/dml/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/dml/*.cpp"
-    "${ONNXRUNTIME_ROOT}/core/providers/dml/*.cc"
-  )
-  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_dml_cc_srcs})
-  onnxruntime_add_static_library(onnxruntime_providers_dml ${onnxruntime_providers_dml_cc_srcs})
-  onnxruntime_add_include_to_target(onnxruntime_providers_dml
-    onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface ${WIL_TARGET}
-  )
-  add_dependencies(onnxruntime_providers_dml ${onnxruntime_EXTERNAL_DEPENDENCIES})
-  target_include_directories(onnxruntime_providers_dml PRIVATE
-    ${ONNXRUNTIME_ROOT}
-  )
-
-  target_compile_definitions(onnxruntime_providers_dml PRIVATE DML_TARGET_VERSION_USE_LATEST=1)
-  if(WIN32)
-    target_compile_options(onnxruntime_providers_dml PRIVATE "/wd4100" "/wd4238" "/wd4189" "/wd4702")
-  endif()
-
-  if (NOT onnxruntime_USE_CUSTOM_DIRECTML)
-    foreach(file "DirectML.dll" "DirectML.pdb" "DirectML.Debug.dll" "DirectML.Debug.pdb")
-      add_custom_command(TARGET onnxruntime_providers_dml
-        POST_BUILD
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different
-          "${DML_PACKAGE_DIR}/bin/${onnxruntime_target_platform}-win/${file}" $<TARGET_FILE_DIR:onnxruntime_providers_dml>)
-    endforeach()
-  endif()
-
-  function(target_add_dml target)
-    if (onnxruntime_USE_CUSTOM_DIRECTML)
-      if (dml_EXTERNAL_PROJECT)
-        # Internal build of DirectML: link against the "DirectML" target.
-        target_link_libraries(${target} PRIVATE DirectML)
-      else()
-        if (dml_LIB_DIR)
-          target_link_libraries(${target} PRIVATE ${dml_LIB_DIR}/DirectML.lib)
-        else()
-          target_link_libraries(${target} PRIVATE DirectML)
-        endif()
-      endif()
-    else()
-      add_dependencies(${target} RESTORE_PACKAGES)
-      target_link_libraries(${target} PRIVATE "${DML_PACKAGE_DIR}/bin/${onnxruntime_target_platform}-win/DirectML.lib")
-        target_compile_definitions(${target} PRIVATE DML_TARGET_VERSION_USE_LATEST)
-    endif()
-  endfunction()
-
-  target_add_dml(onnxruntime_providers_dml)
-  target_link_libraries(onnxruntime_providers_dml PRIVATE onnxruntime_common)
-  target_link_libraries(onnxruntime_providers_dml PRIVATE onnxruntime_framework)
-  onnxruntime_add_include_to_target(onnxruntime_providers_dml onnxruntime_common)
-  if (GDK_PLATFORM STREQUAL Scarlett)
-    target_link_libraries(onnxruntime_providers_dml PRIVATE ${gdk_dx_libs})
-  else()
-    target_link_libraries(onnxruntime_providers_dml PRIVATE dxguid.lib d3d12.lib dxgi.lib)
-  endif()
-
-  target_link_libraries(onnxruntime_providers_dml PRIVATE delayimp.lib)
-
-  if (NOT GDK_PLATFORM)
-    set(onnxruntime_DELAYLOAD_FLAGS "${onnxruntime_DELAYLOAD_FLAGS} /DELAYLOAD:DirectML.dll /DELAYLOAD:d3d12.dll /DELAYLOAD:dxgi.dll /DELAYLOAD:api-ms-win-core-com-l1-1-0.dll /DELAYLOAD:shlwapi.dll /DELAYLOAD:oleaut32.dll /ignore:4199")
-  endif()
-
-  target_compile_definitions(onnxruntime_providers_dml
-    PRIVATE
-    ONNX_NAMESPACE=onnx ONNX_ML LOTUS_LOG_THRESHOLD=2 LOTUS_ENABLE_STDERR_LOGGING PLATFORM_WINDOWS
-  )
-  target_compile_definitions(onnxruntime_providers_dml PRIVATE UNICODE _UNICODE NOMINMAX)
-  if (MSVC)
-    target_compile_definitions(onnxruntime_providers_dml PRIVATE _SILENCE_CXX17_ITERATOR_BASE_CLASS_DEPRECATION_WARNING)
-    target_compile_options(onnxruntime_providers_dml PRIVATE "/W3")
-  endif()
-
-  install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/dml/dml_provider_factory.h
-    DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/
-  )
-
-  set_target_properties(onnxruntime_providers_dml PROPERTIES LINKER_LANGUAGE CXX)
-  set_target_properties(onnxruntime_providers_dml PROPERTIES FOLDER "ONNXRuntime")
-
-  if (NOT onnxruntime_BUILD_SHARED_LIB)
-    install(TARGETS onnxruntime_providers_dml
-            ARCHIVE   DESTINATION ${CMAKE_INSTALL_LIBDIR}
-            LIBRARY   DESTINATION ${CMAKE_INSTALL_LIBDIR}
-            RUNTIME   DESTINATION ${CMAKE_INSTALL_BINDIR}
-            FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
-  endif()
+  include(onnxruntime_providers_dml.cmake)
 endif()
 
 if (onnxruntime_USE_MIGRAPHX)
-  add_definitions(-DUSE_MIGRAPHX=1)
-  set(BUILD_LIBRARY_ONLY 1)
-  add_definitions("-DONNX_ML=1")
-  add_definitions("-DONNX_NAMESPACE=onnx")
-  include_directories(${protobuf_SOURCE_DIR} ${eigen_SOURCE_DIR})
-  set(MIGRAPHX_ROOT ${onnxruntime_MIGRAPHX_HOME})
-  include_directories(${onnx_SOURCE_DIR})
-  set(OLD_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
-  if ( CMAKE_COMPILER_IS_GNUCC )
-    set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS} -Wno-unused-parameter -Wno-missing-field-initializers")
-  endif()
-  set(CXX_VERSION_DEFINED TRUE)
-  set(CMAKE_CXX_FLAGS ${OLD_CMAKE_CXX_FLAGS})
-  if ( CMAKE_COMPILER_IS_GNUCC )
-    set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS} -Wno-unused-parameter")
-  endif()
-
-  # Add search paths for default rocm installation
-  list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hcc /opt/rocm/hip /opt/rocm)
-
-  find_package(hip)
-  find_package(migraphx PATHS ${AMD_MIGRAPHX_HOME})
-
-  find_package(miopen)
-  find_package(rocblas)
-
-  set(migraphx_libs migraphx::c hip::host MIOpen roc::rocblas)
-
-  file(GLOB_RECURSE onnxruntime_providers_migraphx_cc_srcs CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/core/providers/migraphx/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/migraphx/*.cc"
-    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.cc"
-    "${ONNXRUNTIME_ROOT}/core/providers/rocm/rocm_stream_handle.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/rocm/rocm_stream_handle.cc"
-  )
-  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_migraphx_cc_srcs})
-  onnxruntime_add_shared_library_module(onnxruntime_providers_migraphx ${onnxruntime_providers_migraphx_cc_srcs})
-  onnxruntime_add_include_to_target(onnxruntime_providers_migraphx onnxruntime_common onnx flatbuffers::flatbuffers Boost::mp11 safeint_interface)
-  add_dependencies(onnxruntime_providers_migraphx onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES})
-  target_link_libraries(onnxruntime_providers_migraphx PRIVATE ${migraphx_libs} ${ONNXRUNTIME_PROVIDERS_SHARED} onnx flatbuffers::flatbuffers Boost::mp11 safeint_interface)
-  target_include_directories(onnxruntime_providers_migraphx PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR})
-  set_target_properties(onnxruntime_providers_migraphx PROPERTIES LINKER_LANGUAGE CXX)
-  set_target_properties(onnxruntime_providers_migraphx PROPERTIES FOLDER "ONNXRuntime")
-  target_compile_definitions(onnxruntime_providers_migraphx PRIVATE ONNXIFI_BUILD_LIBRARY=1)
-  target_compile_options(onnxruntime_providers_migraphx PRIVATE -Wno-error=sign-compare)
-  set_property(TARGET onnxruntime_providers_migraphx APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations")
-  set_property(TARGET onnxruntime_providers_migraphx APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/migraphx/version_script.lds -Xlinker --gc-sections")
-  target_link_libraries(onnxruntime_providers_migraphx PRIVATE nsync::nsync_cpp stdc++fs)
-
-  include(CheckLibraryExists)
-  check_library_exists(migraphx::c "migraphx_program_run_async" "/opt/rocm/migraphx/lib" HAS_STREAM_SYNC)
-  if(HAS_STREAM_SYNC)
-      target_compile_definitions(onnxruntime_providers_migraphx PRIVATE -DMIGRAPHX_STREAM_SYNC)
-      message(STATUS "MIGRAPHX GPU STREAM SYNC is ENABLED")
-  else()
-      message(STATUS "MIGRAPHX GPU STREAM SYNC is DISABLED")
-  endif()
-
-  install(TARGETS onnxruntime_providers_migraphx
-          ARCHIVE  DESTINATION ${CMAKE_INSTALL_LIBDIR}
-          LIBRARY  DESTINATION ${CMAKE_INSTALL_LIBDIR}
-          RUNTIME  DESTINATION ${CMAKE_INSTALL_BINDIR}
-  )
+  include(onnxruntime_providers_migraphx.cmake)
 endif()
 
 if (onnxruntime_USE_ACL)
-  add_definitions(-DUSE_ACL=1)
-  file(GLOB_RECURSE onnxruntime_providers_acl_cc_srcs
-    "${ONNXRUNTIME_ROOT}/core/providers/acl/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/acl/*.cc"
-  )
-
-  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_acl_cc_srcs})
-  onnxruntime_add_static_library(onnxruntime_providers_acl ${onnxruntime_providers_acl_cc_srcs})
-  onnxruntime_add_include_to_target(onnxruntime_providers_acl
-    onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface
-  )
-
-  target_link_libraries(onnxruntime_providers_acl -L$ENV{LD_LIBRARY_PATH})
-  add_dependencies(onnxruntime_providers_acl ${onnxruntime_EXTERNAL_DEPENDENCIES})
-  set_target_properties(onnxruntime_providers_acl PROPERTIES FOLDER "ONNXRuntime")
-  target_include_directories(onnxruntime_providers_acl
-    PRIVATE
-    ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS} ${onnxruntime_ACL_HOME} ${onnxruntime_ACL_HOME}/include
-  )
-  install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/acl/acl_provider_factory.h
-    DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/
-  )
-  set_target_properties(onnxruntime_providers_acl PROPERTIES LINKER_LANGUAGE CXX)
-
-  if (NOT onnxruntime_BUILD_SHARED_LIB)
-    install(TARGETS onnxruntime_providers_acl
-            ARCHIVE   DESTINATION ${CMAKE_INSTALL_LIBDIR}
-            LIBRARY   DESTINATION ${CMAKE_INSTALL_LIBDIR}
-            RUNTIME   DESTINATION ${CMAKE_INSTALL_BINDIR}
-            FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
-  endif()
+  include(onnxruntime_providers_acl.cmake)
 endif()
 
 if (onnxruntime_USE_ARMNN)
-  add_definitions(-DUSE_ARMNN=1)
-  file(GLOB_RECURSE onnxruntime_providers_armnn_cc_srcs
-    "${ONNXRUNTIME_ROOT}/core/providers/armnn/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/armnn/*.cc"
-  )
-
-  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_armnn_cc_srcs})
-  onnxruntime_add_static_library(onnxruntime_providers_armnn ${onnxruntime_providers_armnn_cc_srcs})
-  onnxruntime_add_include_to_target(onnxruntime_providers_armnn
-    onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface
-  )
-
-  add_dependencies(onnxruntime_providers_armnn ${onnxruntime_EXTERNAL_DEPENDENCIES})
-  set_target_properties(onnxruntime_providers_armnn PROPERTIES FOLDER "ONNXRuntime")
-  target_include_directories(onnxruntime_providers_armnn PRIVATE
-    ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS} ${onnxruntime_ARMNN_HOME} ${onnxruntime_ARMNN_HOME}/include
-    ${onnxruntime_ACL_HOME} ${onnxruntime_ACL_HOME}/include
-  )
-  install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/armnn/armnn_provider_factory.h
-    DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
-
-  set_target_properties(onnxruntime_providers_armnn PROPERTIES LINKER_LANGUAGE CXX)
-
-  if (NOT onnxruntime_BUILD_SHARED_LIB)
-    install(TARGETS onnxruntime_providers_armnn
-            ARCHIVE   DESTINATION ${CMAKE_INSTALL_LIBDIR}
-            LIBRARY   DESTINATION ${CMAKE_INSTALL_LIBDIR}
-            RUNTIME   DESTINATION ${CMAKE_INSTALL_BINDIR}
-            FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
-  endif()
+  include(onnxruntime_providers_armnn.cmake)
 endif()
 
 if (onnxruntime_USE_ROCM)
-  add_definitions(-DUSE_ROCM=1)
-  include(onnxruntime_rocm_hipify.cmake)
-
-  list(APPEND CMAKE_PREFIX_PATH ${onnxruntime_ROCM_HOME})
-
-  find_package(HIP)
-  find_package(hiprand REQUIRED)
-  find_package(rocblas REQUIRED)
-  find_package(MIOpen REQUIRED)
-
-  # MIOpen version
-  if(NOT DEFINED ENV{MIOPEN_PATH})
-    set(MIOPEN_PATH ${onnxruntime_ROCM_HOME})
-  else()
-    set(MIOPEN_PATH $ENV{MIOPEN_PATH})
-  endif()
-  find_path(MIOPEN_VERSION_H_PATH
-    NAMES version.h
-    HINTS
-    ${MIOPEN_PATH}/include/miopen
-    ${MIOPEN_PATH}/miopen/include)
-  if (MIOPEN_VERSION_H_PATH-NOTFOUND)
-    MESSAGE(FATAL_ERROR "miopen version.h not found")
-  endif()
-  MESSAGE(STATUS "Found miopen version.h at ${MIOPEN_VERSION_H_PATH}")
-
-  file(READ ${MIOPEN_VERSION_H_PATH}/version.h MIOPEN_HEADER_CONTENTS)
-        string(REGEX MATCH "define MIOPEN_VERSION_MAJOR * +([0-9]+)"
-                                 MIOPEN_VERSION_MAJOR "${MIOPEN_HEADER_CONTENTS}")
-        string(REGEX REPLACE "define MIOPEN_VERSION_MAJOR * +([0-9]+)" "\\1"
-                                 MIOPEN_VERSION_MAJOR "${MIOPEN_VERSION_MAJOR}")
-        string(REGEX MATCH "define MIOPEN_VERSION_MINOR * +([0-9]+)"
-                                 MIOPEN_VERSION_MINOR "${MIOPEN_HEADER_CONTENTS}")
-        string(REGEX REPLACE "define MIOPEN_VERSION_MINOR * +([0-9]+)" "\\1"
-                                 MIOPEN_VERSION_MINOR "${MIOPEN_VERSION_MINOR}")
-        string(REGEX MATCH "define MIOPEN_VERSION_PATCH * +([0-9]+)"
-                                 MIOPEN_VERSION_PATCH "${MIOPEN_HEADER_CONTENTS}")
-        string(REGEX REPLACE "define MIOPEN_VERSION_PATCH * +([0-9]+)" "\\1"
-                                 MIOPEN_VERSION_PATCH "${MIOPEN_VERSION_PATCH}")
-  set(MIOPEN_VERSION_DEV "${MIOPEN_VERSION_MAJOR}.${MIOPEN_VERSION_MINOR}.${MIOPEN_VERSION_PATCH}")
-  math(EXPR MIOPEN_VERSION_DEV_INT "(${MIOPEN_VERSION_MAJOR}*10000) + (${MIOPEN_VERSION_MINOR}*100) + ${MIOPEN_VERSION_PATCH}")
-  message("MIOPEN_VERSION_DEV: ${MIOPEN_VERSION_DEV}")
-  message("MIOPEN_VERSION_DEV_INT:   ${MIOPEN_VERSION_DEV_INT}")
-  add_definitions(-DMIOPEN_VERSION=${MIOPEN_VERSION_DEV_INT})
-
-  find_library(RCCL_LIB rccl REQUIRED)
-  find_library(ROCTRACER_LIB roctracer64 REQUIRED)
-  set(ONNXRUNTIME_ROCM_LIBS roc::rocblas MIOpen ${RCCL_LIB} ${ROCTRACER_LIB})
-
-  file(GLOB_RECURSE onnxruntime_providers_rocm_cc_srcs CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/core/providers/rocm/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/rocm/*.cc"
-  )
-
-  # The shared_library files are in a separate list since they use precompiled headers, and the above files have them disabled.
-  file(GLOB_RECURSE onnxruntime_providers_rocm_shared_srcs CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.cc"
-  )
-
-  file(GLOB_RECURSE onnxruntime_providers_rocm_cu_srcs CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/core/providers/rocm/*.cu"
-    "${ONNXRUNTIME_ROOT}/core/providers/rocm/*.cuh"
-  )
-
-  hipify("onnxruntime/core/providers" provider_excluded_files onnxruntime_providers_rocm_generated_cc_srcs onnxruntime_providers_rocm_generated_cu_srcs)
-
-  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_rocm_cc_srcs} ${onnxruntime_providers_rocm_shared_srcs} ${onnxruntime_providers_rocm_cu_srcs})
-  set(onnxruntime_providers_rocm_src ${onnxruntime_providers_rocm_cc_srcs} ${onnxruntime_providers_rocm_shared_srcs} ${onnxruntime_providers_rocm_cu_srcs})
-  list(APPEND onnxruntime_providers_rocm_src ${onnxruntime_providers_rocm_generated_cc_srcs} ${onnxruntime_providers_rocm_generated_cu_srcs})
-
-  # disable contrib ops conditionally
-  if(NOT onnxruntime_DISABLE_CONTRIB_OPS)
-    hipify("onnxruntime/contrib_ops" contrib_ops_excluded_files onnxruntime_rocm_generated_contrib_ops_cc_srcs onnxruntime_rocm_generated_contrib_ops_cu_srcs)
-
-    # add using ONNXRUNTIME_ROOT so they show up under the 'contrib_ops' folder in Visual Studio
-    source_group(TREE ${ONNXRUNTIME_ROOT} FILES ${onnxruntime_rocm_contrib_ops_cc_srcs} ${onnxruntime_rocm_contrib_ops_cu_srcs})
-    list(APPEND onnxruntime_providers_rocm_src ${onnxruntime_rocm_contrib_ops_cc_srcs} ${onnxruntime_rocm_contrib_ops_cu_srcs})
-    list(APPEND onnxruntime_providers_rocm_src ${onnxruntime_rocm_generated_contrib_ops_cc_srcs} ${onnxruntime_rocm_generated_contrib_ops_cu_srcs})
-  endif()
-
-  if (onnxruntime_ENABLE_TRAINING_OPS)
-    file(GLOB_RECURSE onnxruntime_rocm_training_ops_cc_srcs CONFIGURE_DEPENDS
-      "${ORTTRAINING_SOURCE_DIR}/training_ops/rocm/*.h"
-      "${ORTTRAINING_SOURCE_DIR}/training_ops/rocm/*.cc"
-    )
-
-    file(GLOB_RECURSE onnxruntime_rocm_training_ops_cu_srcs CONFIGURE_DEPENDS
-      "${ORTTRAINING_SOURCE_DIR}/training_ops/rocm/*.cu"
-      "${ORTTRAINING_SOURCE_DIR}/training_ops/rocm/*.cuh"
-    )
-
-    hipify("orttraining/orttraining/training_ops" training_ops_excluded_files onnxruntime_rocm_generated_training_ops_cc_srcs onnxruntime_rocm_generated_training_ops_cu_srcs)
-
-    # NCCL is not support in Windows build
-    if (WIN32 OR NOT onnxruntime_USE_NCCL)
-      list(REMOVE_ITEM onnxruntime_rocm_generated_training_ops_cc_srcs
-      "${CMAKE_CURRENT_BINARY_DIR}/amdgpu/orttraining/orttraining/training_ops/rocm/collective/nccl_common.cc"
-      "${CMAKE_CURRENT_BINARY_DIR}/amdgpu/orttraining/orttraining/training_ops/rocm/collective/nccl_kernels.cc"
-      "${CMAKE_CURRENT_BINARY_DIR}/amdgpu/orttraining/orttraining/training_ops/rocm/collective/megatron.cc"
-      )
-    endif()
-
-    source_group(TREE ${ORTTRAINING_ROOT} FILES ${onnxruntime_rocm_training_ops_cc_srcs} ${onnxruntime_rocm_training_ops_cu_srcs})
-    list(APPEND onnxruntime_providers_rocm_src ${onnxruntime_rocm_training_ops_cc_srcs} ${onnxruntime_rocm_training_ops_cu_srcs})
-    list(APPEND onnxruntime_providers_rocm_src ${onnxruntime_rocm_generated_training_ops_cc_srcs} ${onnxruntime_rocm_generated_training_ops_cu_srcs})
-  endif()
-
-  auto_set_source_files_hip_language(${onnxruntime_providers_rocm_src})
-  onnxruntime_add_shared_library_module(onnxruntime_providers_rocm ${onnxruntime_providers_rocm_src})
-  target_compile_options(onnxruntime_providers_rocm PRIVATE -D__HIP_PLATFORM_AMD__=1 -D__HIP_PLATFORM_HCC__=1)
-
-  if(NOT MSVC)
-    target_compile_options(onnxruntime_providers_rocm PRIVATE -Wno-sign-compare)
-    target_compile_options(onnxruntime_providers_rocm PRIVATE -Wno-unused-parameter)
-    target_compile_options(onnxruntime_providers_rocm PRIVATE -Wno-undefined-var-template)
-  endif()
-
-  onnxruntime_add_include_to_target(onnxruntime_providers_rocm onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface)
-  if (onnxruntime_ENABLE_TRAINING_OPS)
-    onnxruntime_add_include_to_target(onnxruntime_providers_rocm onnxruntime_training)
-    target_link_libraries(onnxruntime_providers_rocm PRIVATE onnxruntime_training)
-    if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
-      onnxruntime_add_include_to_target(onnxruntime_providers_rocm Python::Module)
-    endif()
-  endif()
-
-  add_custom_target(generate_hipified_files DEPENDS
-    ${onnxruntime_providers_rocm_generated_cc_srcs}
-    ${onnxruntime_providers_rocm_generated_cu_srcs}
-    ${onnxruntime_rocm_generated_contrib_ops_cc_srcs}
-    ${onnxruntime_rocm_generated_contrib_ops_cu_srcs}
-    ${onnxruntime_rocm_generated_training_ops_cc_srcs}
-    ${onnxruntime_rocm_generated_training_ops_cu_srcs})
-
-  add_dependencies(onnxruntime_providers_rocm generate_hipified_files onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES})
-  target_link_libraries(onnxruntime_providers_rocm PRIVATE ${ONNXRUNTIME_ROCM_LIBS} ${ONNXRUNTIME_PROVIDERS_SHARED} ${ABSEIL_LIBS})
-  target_include_directories(onnxruntime_providers_rocm SYSTEM
-    PRIVATE
-      ${ONNXRUNTIME_ROOT}
-      ${CMAKE_CURRENT_BINARY_DIR}
-      ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/onnxruntime
-      ${eigen_INCLUDE_DIRS}
-    PUBLIC
-      ${onnxruntime_ROCM_HOME}/include
-      ${onnxruntime_ROCM_HOME}/include/roctracer)
-
-  set_target_properties(onnxruntime_providers_rocm PROPERTIES LINKER_LANGUAGE CXX)
-  set_target_properties(onnxruntime_providers_rocm PROPERTIES FOLDER "ONNXRuntime")
-
-  if (onnxruntime_ENABLE_TRAINING)
-    target_include_directories(onnxruntime_providers_rocm PRIVATE ${ORTTRAINING_ROOT} ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/orttraining ${MPI_CXX_INCLUDE_DIRS})
-    if(onnxruntime_USE_MPI)
-      target_link_libraries(onnxruntime_providers_rocm PRIVATE ${MPI_LIBRARIES} ${MPI_CXX_LINK_FLAGS})
-    endif()
-
-    # RCCL is enabled by default for ROCM builds
-    #if (onnxruntime_USE_NCCL)
-    #  target_include_directories(onnxruntime_providers_rocm PRIVATE ${NCCL_INCLUDE_DIRS})
-    #  target_link_libraries(onnxruntime_providers_rocm PRIVATE ${NCCL_LIBRARIES})
-    #endif()
-  endif()
-
-  if (onnxruntime_USE_ROCBLAS_EXTENSION_API)
-    target_compile_definitions(onnxruntime_providers_rocm PRIVATE USE_ROCBLAS_EXTENSION_API)
-    target_compile_definitions(onnxruntime_providers_rocm PRIVATE ROCBLAS_NO_DEPRECATED_WARNINGS)
-    target_compile_definitions(onnxruntime_providers_rocm PRIVATE ROCBLAS_BETA_FEATURES_API)
-  endif()
-
-  if (onnxruntime_USE_HIPBLASLT)
-    find_package(hipblaslt REQUIRED)
-    target_link_libraries(onnxruntime_providers_rocm PRIVATE roc::hipblaslt)
-    target_compile_definitions(onnxruntime_providers_rocm PRIVATE USE_HIPBLASLT)
-  endif()
-
-  if (onnxruntime_USE_TRITON_KERNEL)
-    # compile triton kernel, generate .a and .h files
-    include(onnxruntime_compile_triton_kernel.cmake)
-    compile_triton_kernel(triton_kernel_obj_file triton_kernel_header_dir)
-    add_dependencies(onnxruntime_providers_rocm onnxruntime_triton_kernel)
-    target_compile_definitions(onnxruntime_providers_rocm PRIVATE USE_TRITON_KERNEL)
-    target_include_directories(onnxruntime_providers_rocm PRIVATE ${triton_kernel_header_dir})
-    target_link_libraries(onnxruntime_providers_rocm PUBLIC -Wl,--whole-archive ${triton_kernel_obj_file} -Wl,--no-whole-archive)
-  endif()
-
-  if (onnxruntime_USE_COMPOSABLE_KERNEL)
-    include(composable_kernel)
-    target_link_libraries(onnxruntime_providers_rocm PRIVATE
-      onnxruntime_composable_kernel_includes
-      # Currently we shall not use composablekernels::device_operations, the target includes all conv dependencies, which
-      # are extremely slow to compile. Instead, we only link all gemm related objects. See the following directory on
-      # updating.
-      # https://github.com/ROCmSoftwarePlatform/composable_kernel/tree/develop/library/src/tensor_operation_instance/gpu
-      device_gemm_instance
-      device_gemm_add_fastgelu_instance
-      device_gemm_fastgelu_instance
-      device_gemm_splitk_instance
-      device_gemm_streamk_instance
-      device_batched_gemm_instance
-      device_softmax_instance
-    )
-    target_compile_definitions(onnxruntime_providers_rocm PRIVATE USE_COMPOSABLE_KERNEL)
-  endif()
-
-  if(UNIX)
-    set_property(TARGET onnxruntime_providers_rocm APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/rocm/version_script.lds -Xlinker --gc-sections")
-    target_link_libraries(onnxruntime_providers_rocm PRIVATE nsync::nsync_cpp)
-  else()
-    message(FATAL_ERROR "onnxruntime_providers_rocm unknown platform, need to specify shared library exports for it")
-  endif()
-
-  if (onnxruntime_ENABLE_ATEN)
-    target_compile_definitions(onnxruntime_providers_rocm PRIVATE ENABLE_ATEN)
-  endif()
-
-  install(TARGETS onnxruntime_providers_rocm
-          ARCHIVE  DESTINATION ${CMAKE_INSTALL_LIBDIR}
-          LIBRARY  DESTINATION ${CMAKE_INSTALL_LIBDIR}
-          RUNTIME  DESTINATION ${CMAKE_INSTALL_BINDIR})
-
+  include(onnxruntime_providers_rocm.cmake)
 endif()
 
 if (onnxruntime_USE_TVM)
-  add_definitions(-DUSE_TVM=1)
-  if (onnxruntime_TVM_USE_HASH)
-    add_definitions(-DUSE_TVM_HASH=1)
-  endif()
-
-  if (onnxruntime_TVM_USE_HASH)
-    file (GLOB_RECURSE onnxruntime_providers_tvm_cc_srcs CONFIGURE_DEPENDS
-      "${ONNXRUNTIME_ROOT}/core/providers/tvm/*.h"
-      "${ONNXRUNTIME_ROOT}/core/providers/tvm/*.cc"
-    )
-  else()
-    file (GLOB onnxruntime_providers_tvm_cc_srcs CONFIGURE_DEPENDS
-      "${ONNXRUNTIME_ROOT}/core/providers/tvm/*.h"
-      "${ONNXRUNTIME_ROOT}/core/providers/tvm/*.cc"
-    )
-  endif()
-
-  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_tvm_cc_srcs})
-  onnxruntime_add_static_library(onnxruntime_providers_tvm ${onnxruntime_providers_tvm_cc_srcs})
-
-  if ( CMAKE_COMPILER_IS_GNUCC )
-    target_compile_options(onnxruntime_providers_tvm PRIVATE -Wno-unused-parameter -Wno-missing-field-initializers)
-  endif()
-
-  target_include_directories(onnxruntime_providers_tvm PRIVATE
-          ${TVM_INCLUDES}
-          ${PYTHON_INLCUDE_DIRS})
-  onnxruntime_add_include_to_target(onnxruntime_providers_tvm onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface)
-
-  add_dependencies(onnxruntime_providers_tvm ${onnxruntime_EXTERNAL_DEPENDENCIES})
-
-  if (onnxruntime_TVM_USE_HASH)
-    add_dependencies(onnxruntime_providers_tvm ippcp_s)
-    target_include_directories(onnxruntime_providers_tvm PRIVATE ${IPP_CRYPTO_INCLUDE_DIR})
-    target_link_libraries(onnxruntime_providers_tvm PRIVATE ippcp_s)
-  endif()
-
-  set_target_properties(onnxruntime_providers_tvm PROPERTIES FOLDER "ONNXRuntime")
-  set_target_properties(onnxruntime_providers_tvm PROPERTIES LINKER_LANGUAGE CXX)
-
-  if (WIN32 AND MSVC)
-    # wd4100: identifier' : unreferenced formal parameter
-    # wd4127: conditional expression is constant
-    # wd4244: conversion from 'int' to 'char', possible loss of data
-    # TODO: 4244 should not be disabled
-    target_compile_options(onnxruntime_providers_tvm PRIVATE "/wd4100" "/wd4127" "/wd4244")
-  else()
-    target_compile_options(onnxruntime_providers_tvm PRIVATE "-Wno-error=type-limits")
-  endif()
-  target_compile_definitions(onnxruntime_providers_tvm PUBLIC DMLC_USE_LOGGING_LIBRARY=<tvm/runtime/logging.h>)
-
-  install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/tvm/tvm_provider_factory.h
-    DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
-
-  if (NOT onnxruntime_BUILD_SHARED_LIB)
-    install(TARGETS onnxruntime_providers_tvm
-            ARCHIVE   DESTINATION ${CMAKE_INSTALL_LIBDIR}
-            LIBRARY   DESTINATION ${CMAKE_INSTALL_LIBDIR}
-            RUNTIME   DESTINATION ${CMAKE_INSTALL_BINDIR}
-            FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
-  endif()
+  include(onnxruntime_providers_tvm.cmake)
 endif()
 
 if (onnxruntime_USE_XNNPACK)
-  add_compile_definitions(USE_XNNPACK=1)
-
-  file(GLOB_RECURSE onnxruntime_providers_xnnpack_cc_srcs CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_INCLUDE_DIR}/core/providers/xnnpack/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/xnnpack/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/xnnpack/*.cc"
-    # utils for handling QDQ models
-    "${ONNXRUNTIME_ROOT}/core/providers/shared/node_unit/node_unit.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/shared/node_unit/node_unit.cc"
-  )
-
-  source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_providers_xnnpack_cc_srcs})
-  onnxruntime_add_static_library(onnxruntime_providers_xnnpack ${onnxruntime_providers_xnnpack_cc_srcs})
-  onnxruntime_add_include_to_target(onnxruntime_providers_xnnpack
-    onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} XNNPACK pthreadpool flatbuffers::flatbuffers Boost::mp11 safeint_interface
-  )
-
-  add_dependencies(onnxruntime_providers_xnnpack onnx ${onnxruntime_EXTERNAL_DEPENDENCIES})
-  set_target_properties(onnxruntime_providers_xnnpack PROPERTIES FOLDER "ONNXRuntime")
-
-  set_target_properties(onnxruntime_providers_xnnpack PROPERTIES LINKER_LANGUAGE CXX)
-
-  if (NOT onnxruntime_BUILD_SHARED_LIB)
-    install(TARGETS onnxruntime_providers_xnnpack
-            ARCHIVE   DESTINATION ${CMAKE_INSTALL_LIBDIR}
-            LIBRARY   DESTINATION ${CMAKE_INSTALL_LIBDIR}
-            RUNTIME   DESTINATION ${CMAKE_INSTALL_BINDIR}
-            FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
-  endif()
-
-  # TODO fix shorten-64-to-32 warnings
-  # there are some in builds where sizeof(size_t) != sizeof(int64_t), e.g., in 'ONNX Runtime Web CI Pipeline'
-  if (HAS_SHORTEN_64_TO_32 AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
-    target_compile_options(onnxruntime_providers_xnnpack PRIVATE -Wno-error=shorten-64-to-32)
-  endif()
+  include(onnxruntime_providers_xnnpack.cmake)
 endif()
 
 if (onnxruntime_USE_CANN)
-  add_definitions(-DUSE_CANN=1)
-
-  file(GLOB_RECURSE onnxruntime_providers_cann_cc_srcs CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/core/providers/cann/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/cann/*.cc"
-  )
-
-  # The shared_library files are in a separate list since they use precompiled headers, and the above files have them disabled.
-  file(GLOB_RECURSE onnxruntime_providers_cann_shared_srcs CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.cc"
-  )
-
-  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_cann_cc_srcs} ${onnxruntime_providers_cann_shared_srcs})
-  set(onnxruntime_providers_cann_src ${onnxruntime_providers_cann_cc_srcs} ${onnxruntime_providers_cann_shared_srcs})
-
-  onnxruntime_add_shared_library_module(onnxruntime_providers_cann ${onnxruntime_providers_cann_src})
-  onnxruntime_add_include_to_target(onnxruntime_providers_cann onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface)
-
-  add_dependencies(onnxruntime_providers_cann onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES})
-  target_link_libraries(onnxruntime_providers_cann PRIVATE ascendcl acl_op_compiler fmk_onnx_parser nsync::nsync_cpp ${ABSEIL_LIBS} ${ONNXRUNTIME_PROVIDERS_SHARED})
-  target_link_directories(onnxruntime_providers_cann PRIVATE ${onnxruntime_CANN_HOME}/lib64)
-  target_include_directories(onnxruntime_providers_cann PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} ${eigen_INCLUDE_DIRS} ${onnxruntime_CANN_HOME} ${onnxruntime_CANN_HOME}/include)
-
-  set_target_properties(onnxruntime_providers_cann PROPERTIES LINKER_LANGUAGE CXX)
-  set_target_properties(onnxruntime_providers_cann PROPERTIES FOLDER "ONNXRuntime")
-
-  install(TARGETS onnxruntime_providers_cann
-          ARCHIVE  DESTINATION ${CMAKE_INSTALL_LIBDIR}
-          LIBRARY  DESTINATION ${CMAKE_INSTALL_LIBDIR}
-          RUNTIME  DESTINATION ${CMAKE_INSTALL_BINDIR})
+  include(onnxruntime_providers_cann.cmake)
 endif()
 
 if (onnxruntime_USE_AZURE)
-
-  file(GLOB_RECURSE onnxruntime_providers_azure_src CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/core/providers/azure/*.h"
-    "${ONNXRUNTIME_ROOT}/core/providers/azure/*.cc"
-  )
-  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_azure_src})
-  onnxruntime_add_static_library(onnxruntime_providers_azure ${onnxruntime_providers_azure_src})
-  add_dependencies(onnxruntime_providers_azure ${onnxruntime_EXTERNAL_DEPENDENCIES})
-  onnxruntime_add_include_to_target(onnxruntime_providers_azure onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11)
-  target_link_libraries(onnxruntime_providers_azure PRIVATE onnx onnxruntime_common onnxruntime_framework)
-  set_target_properties(onnxruntime_providers_azure PROPERTIES FOLDER "ONNXRuntime")
-  set_target_properties(onnxruntime_providers_azure PROPERTIES LINKER_LANGUAGE CXX)
-
-  install(TARGETS onnxruntime_providers_azure
-          ARCHIVE   DESTINATION ${CMAKE_INSTALL_LIBDIR}
-          LIBRARY   DESTINATION ${CMAKE_INSTALL_LIBDIR}
-          RUNTIME   DESTINATION ${CMAKE_INSTALL_BINDIR}
-          FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
-endif()
-
-if (NOT onnxruntime_BUILD_SHARED_LIB)
-  install(TARGETS onnxruntime_providers
-          ARCHIVE   DESTINATION ${CMAKE_INSTALL_LIBDIR}
-          LIBRARY   DESTINATION ${CMAKE_INSTALL_LIBDIR}
-          RUNTIME   DESTINATION ${CMAKE_INSTALL_BINDIR}
-          FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
+  include(onnxruntime_providers_azure.cmake)
 endif()
diff --git a/cmake/onnxruntime_providers_acl.cmake b/cmake/onnxruntime_providers_acl.cmake
new file mode 100644
index 0000000000000..e23d2892713fc
--- /dev/null
+++ b/cmake/onnxruntime_providers_acl.cmake
@@ -0,0 +1,34 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+  add_definitions(-DUSE_ACL=1)
+  file(GLOB_RECURSE onnxruntime_providers_acl_cc_srcs
+    "${ONNXRUNTIME_ROOT}/core/providers/acl/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/acl/*.cc"
+  )
+
+  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_acl_cc_srcs})
+  onnxruntime_add_static_library(onnxruntime_providers_acl ${onnxruntime_providers_acl_cc_srcs})
+  onnxruntime_add_include_to_target(onnxruntime_providers_acl
+    onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface
+  )
+
+  target_link_libraries(onnxruntime_providers_acl -L$ENV{LD_LIBRARY_PATH})
+  add_dependencies(onnxruntime_providers_acl ${onnxruntime_EXTERNAL_DEPENDENCIES})
+  set_target_properties(onnxruntime_providers_acl PROPERTIES FOLDER "ONNXRuntime")
+  target_include_directories(onnxruntime_providers_acl
+    PRIVATE
+    ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS} ${onnxruntime_ACL_HOME} ${onnxruntime_ACL_HOME}/include
+  )
+  install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/acl/acl_provider_factory.h
+    DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/
+  )
+  set_target_properties(onnxruntime_providers_acl PROPERTIES LINKER_LANGUAGE CXX)
+
+  if (NOT onnxruntime_BUILD_SHARED_LIB)
+    install(TARGETS onnxruntime_providers_acl
+            ARCHIVE   DESTINATION ${CMAKE_INSTALL_LIBDIR}
+            LIBRARY   DESTINATION ${CMAKE_INSTALL_LIBDIR}
+            RUNTIME   DESTINATION ${CMAKE_INSTALL_BINDIR}
+            FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
+  endif()
\ No newline at end of file
diff --git a/cmake/onnxruntime_providers_armnn.cmake b/cmake/onnxruntime_providers_armnn.cmake
new file mode 100644
index 0000000000000..33fadb7c64c2e
--- /dev/null
+++ b/cmake/onnxruntime_providers_armnn.cmake
@@ -0,0 +1,33 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+  add_definitions(-DUSE_ARMNN=1)
+  file(GLOB_RECURSE onnxruntime_providers_armnn_cc_srcs
+    "${ONNXRUNTIME_ROOT}/core/providers/armnn/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/armnn/*.cc"
+  )
+
+  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_armnn_cc_srcs})
+  onnxruntime_add_static_library(onnxruntime_providers_armnn ${onnxruntime_providers_armnn_cc_srcs})
+  onnxruntime_add_include_to_target(onnxruntime_providers_armnn
+    onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface
+  )
+
+  add_dependencies(onnxruntime_providers_armnn ${onnxruntime_EXTERNAL_DEPENDENCIES})
+  set_target_properties(onnxruntime_providers_armnn PROPERTIES FOLDER "ONNXRuntime")
+  target_include_directories(onnxruntime_providers_armnn PRIVATE
+    ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS} ${onnxruntime_ARMNN_HOME} ${onnxruntime_ARMNN_HOME}/include
+    ${onnxruntime_ACL_HOME} ${onnxruntime_ACL_HOME}/include
+  )
+  install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/armnn/armnn_provider_factory.h
+    DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
+
+  set_target_properties(onnxruntime_providers_armnn PROPERTIES LINKER_LANGUAGE CXX)
+
+  if (NOT onnxruntime_BUILD_SHARED_LIB)
+    install(TARGETS onnxruntime_providers_armnn
+            ARCHIVE   DESTINATION ${CMAKE_INSTALL_LIBDIR}
+            LIBRARY   DESTINATION ${CMAKE_INSTALL_LIBDIR}
+            RUNTIME   DESTINATION ${CMAKE_INSTALL_BINDIR}
+            FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
+  endif()
\ No newline at end of file
diff --git a/cmake/onnxruntime_providers_azure.cmake b/cmake/onnxruntime_providers_azure.cmake
new file mode 100644
index 0000000000000..1b03563848556
--- /dev/null
+++ b/cmake/onnxruntime_providers_azure.cmake
@@ -0,0 +1,20 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+  file(GLOB_RECURSE onnxruntime_providers_azure_src CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/core/providers/azure/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/azure/*.cc"
+  )
+  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_azure_src})
+  onnxruntime_add_static_library(onnxruntime_providers_azure ${onnxruntime_providers_azure_src})
+  add_dependencies(onnxruntime_providers_azure ${onnxruntime_EXTERNAL_DEPENDENCIES})
+  onnxruntime_add_include_to_target(onnxruntime_providers_azure onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11)
+  target_link_libraries(onnxruntime_providers_azure PRIVATE onnx onnxruntime_common onnxruntime_framework)
+  set_target_properties(onnxruntime_providers_azure PROPERTIES FOLDER "ONNXRuntime")
+  set_target_properties(onnxruntime_providers_azure PROPERTIES LINKER_LANGUAGE CXX)
+
+  install(TARGETS onnxruntime_providers_azure
+          ARCHIVE   DESTINATION ${CMAKE_INSTALL_LIBDIR}
+          LIBRARY   DESTINATION ${CMAKE_INSTALL_LIBDIR}
+          RUNTIME   DESTINATION ${CMAKE_INSTALL_BINDIR}
+          FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
\ No newline at end of file
diff --git a/cmake/onnxruntime_providers_cann.cmake b/cmake/onnxruntime_providers_cann.cmake
new file mode 100644
index 0000000000000..0e26f7ee3a57b
--- /dev/null
+++ b/cmake/onnxruntime_providers_cann.cmake
@@ -0,0 +1,34 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+  add_definitions(-DUSE_CANN=1)
+
+  file(GLOB_RECURSE onnxruntime_providers_cann_cc_srcs CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/core/providers/cann/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/cann/*.cc"
+  )
+
+  # The shared_library files are in a separate list since they use precompiled headers, and the above files have them disabled.
+  file(GLOB_RECURSE onnxruntime_providers_cann_shared_srcs CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.cc"
+  )
+
+  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_cann_cc_srcs} ${onnxruntime_providers_cann_shared_srcs})
+  set(onnxruntime_providers_cann_src ${onnxruntime_providers_cann_cc_srcs} ${onnxruntime_providers_cann_shared_srcs})
+
+  onnxruntime_add_shared_library_module(onnxruntime_providers_cann ${onnxruntime_providers_cann_src})
+  onnxruntime_add_include_to_target(onnxruntime_providers_cann onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface)
+
+  add_dependencies(onnxruntime_providers_cann onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES})
+  target_link_libraries(onnxruntime_providers_cann PRIVATE ascendcl acl_op_compiler fmk_onnx_parser nsync::nsync_cpp ${ABSEIL_LIBS} ${ONNXRUNTIME_PROVIDERS_SHARED})
+  target_link_directories(onnxruntime_providers_cann PRIVATE ${onnxruntime_CANN_HOME}/lib64)
+  target_include_directories(onnxruntime_providers_cann PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} ${eigen_INCLUDE_DIRS} ${onnxruntime_CANN_HOME} ${onnxruntime_CANN_HOME}/include)
+
+  set_target_properties(onnxruntime_providers_cann PROPERTIES LINKER_LANGUAGE CXX)
+  set_target_properties(onnxruntime_providers_cann PROPERTIES FOLDER "ONNXRuntime")
+
+  install(TARGETS onnxruntime_providers_cann
+          ARCHIVE  DESTINATION ${CMAKE_INSTALL_LIBDIR}
+          LIBRARY  DESTINATION ${CMAKE_INSTALL_LIBDIR}
+          RUNTIME  DESTINATION ${CMAKE_INSTALL_BINDIR})
\ No newline at end of file
diff --git a/cmake/onnxruntime_providers_coreml.cmake b/cmake/onnxruntime_providers_coreml.cmake
new file mode 100644
index 0000000000000..aa8c35526b274
--- /dev/null
+++ b/cmake/onnxruntime_providers_coreml.cmake
@@ -0,0 +1,107 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+  if (onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD)
+    message(FATAL_ERROR "CoreML EP can not be used in a basic minimal build. Please build with '--minimal_build extended'")
+  endif()
+
+  add_compile_definitions(USE_COREML=1)
+
+  # Compile CoreML proto definition to ${CMAKE_CURRENT_BINARY_DIR}/coreml
+  if (CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
+    set(COREML_PROTO_ROOT ${PROJECT_SOURCE_DIR}/../onnxruntime/core/providers/coreml/mlmodel_format)
+    file(GLOB coreml_proto_srcs
+      "${COREML_PROTO_ROOT}/*.proto"
+    )
+    onnxruntime_add_static_library(onnxruntime_coreml_proto ${coreml_proto_srcs})
+    target_include_directories(onnxruntime_coreml_proto PUBLIC $<TARGET_PROPERTY:${PROTOBUF_LIB},INTERFACE_INCLUDE_DIRECTORIES> "${CMAKE_CURRENT_BINARY_DIR}")
+    target_compile_definitions(onnxruntime_coreml_proto PUBLIC $<TARGET_PROPERTY:${PROTOBUF_LIB},INTERFACE_COMPILE_DEFINITIONS>)
+    set_target_properties(onnxruntime_coreml_proto PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
+    set_target_properties(onnxruntime_coreml_proto PROPERTIES COMPILE_FLAGS "-fvisibility-inlines-hidden")
+    set(_src_sub_dir "coreml/")
+    onnxruntime_protobuf_generate(
+      APPEND_PATH
+      GEN_SRC_SUB_DIR ${_src_sub_dir}
+      IMPORT_DIRS ${COREML_PROTO_ROOT}
+      TARGET onnxruntime_coreml_proto
+    )
+
+    if (NOT onnxruntime_BUILD_SHARED_LIB)
+      install(TARGETS onnxruntime_coreml_proto
+              ARCHIVE   DESTINATION ${CMAKE_INSTALL_LIBDIR}
+              LIBRARY   DESTINATION ${CMAKE_INSTALL_LIBDIR}
+              RUNTIME   DESTINATION ${CMAKE_INSTALL_BINDIR}
+              FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR}
+      )
+    endif()
+  endif()
+
+  # These are shared utils,
+  # TODO, move this to a separated lib when used by EPs other than NNAPI and CoreML
+  file(GLOB_RECURSE onnxruntime_providers_shared_utils_cc_srcs CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.cc"
+  )
+
+  file(GLOB
+    onnxruntime_providers_coreml_cc_srcs_top CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/core/providers/coreml/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/coreml/*.cc"
+  )
+
+  # Add builder source code
+  file(GLOB_RECURSE
+    onnxruntime_providers_coreml_cc_srcs_nested CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/core/providers/coreml/builders/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/coreml/builders/*.cc"
+  )
+  if (NOT CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND NOT CMAKE_SYSTEM_NAME STREQUAL "iOS")
+    list(REMOVE_ITEM onnxruntime_providers_coreml_cc_srcs_nested
+    "${ONNXRUNTIME_ROOT}/core/providers/coreml/builders/model_builder.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/coreml/builders/model_builder.cc"
+    )
+  endif()
+
+  # Add CoreML objective c++ source code
+  if (CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
+    file(GLOB
+      onnxruntime_providers_coreml_objcc_srcs CONFIGURE_DEPENDS
+      "${ONNXRUNTIME_ROOT}/core/providers/coreml/model/model.h"
+      "${ONNXRUNTIME_ROOT}/core/providers/coreml/model/model.mm"
+      "${ONNXRUNTIME_ROOT}/core/providers/coreml/model/host_utils.h"
+      "${ONNXRUNTIME_ROOT}/core/providers/coreml/model/host_utils.mm"
+    )
+  endif()
+
+  set(onnxruntime_providers_coreml_cc_srcs
+    ${onnxruntime_providers_coreml_cc_srcs_top}
+    ${onnxruntime_providers_coreml_cc_srcs_nested}
+    ${onnxruntime_providers_shared_utils_cc_srcs}
+  )
+
+  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_coreml_cc_srcs})
+  onnxruntime_add_static_library(onnxruntime_providers_coreml
+    ${onnxruntime_providers_coreml_cc_srcs} ${onnxruntime_providers_coreml_objcc_srcs}
+  )
+  onnxruntime_add_include_to_target(onnxruntime_providers_coreml
+    onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB}  flatbuffers::flatbuffers Boost::mp11 safeint_interface
+  )
+  if (CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
+    onnxruntime_add_include_to_target(onnxruntime_providers_coreml onnxruntime_coreml_proto)
+    target_link_libraries(onnxruntime_providers_coreml PRIVATE onnxruntime_coreml_proto "-framework Foundation" "-framework CoreML")
+    add_dependencies(onnxruntime_providers_coreml onnxruntime_coreml_proto)
+  endif()
+  add_dependencies(onnxruntime_providers_coreml ${onnxruntime_EXTERNAL_DEPENDENCIES})
+
+  set_target_properties(onnxruntime_providers_coreml PROPERTIES CXX_STANDARD_REQUIRED ON)
+  set_target_properties(onnxruntime_providers_coreml PROPERTIES FOLDER "ONNXRuntime")
+  target_include_directories(onnxruntime_providers_coreml PRIVATE ${ONNXRUNTIME_ROOT} ${coreml_INCLUDE_DIRS})
+  set_target_properties(onnxruntime_providers_coreml PROPERTIES LINKER_LANGUAGE CXX)
+
+  if (NOT onnxruntime_BUILD_SHARED_LIB)
+    install(TARGETS onnxruntime_providers_coreml
+            ARCHIVE   DESTINATION ${CMAKE_INSTALL_LIBDIR}
+            LIBRARY   DESTINATION ${CMAKE_INSTALL_LIBDIR}
+            RUNTIME   DESTINATION ${CMAKE_INSTALL_BINDIR}
+            FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
+  endif()
\ No newline at end of file
diff --git a/cmake/onnxruntime_providers_cpu.cmake b/cmake/onnxruntime_providers_cpu.cmake
new file mode 100644
index 0000000000000..f60faa4d39116
--- /dev/null
+++ b/cmake/onnxruntime_providers_cpu.cmake
@@ -0,0 +1,261 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+file(GLOB_RECURSE onnxruntime_providers_srcs CONFIGURE_DEPENDS
+  "${ONNXRUNTIME_ROOT}/core/providers/cpu/*.h"
+  "${ONNXRUNTIME_ROOT}/core/providers/cpu/*.cc"
+)
+
+if(onnxruntime_DISABLE_ML_OPS)
+  list(FILTER onnxruntime_providers_srcs EXCLUDE REGEX ".*/ml/.*")
+endif()
+
+file(GLOB_RECURSE onnxruntime_cpu_contrib_ops_srcs CONFIGURE_DEPENDS
+  "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/*.h"
+  "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/*.cc"
+)
+
+file(GLOB_RECURSE onnxruntime_cuda_contrib_ops_cc_srcs CONFIGURE_DEPENDS
+  "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/*.h"
+  "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/*.cc"
+)
+
+file(GLOB_RECURSE onnxruntime_cuda_contrib_ops_cu_srcs CONFIGURE_DEPENDS
+  "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/*.cu"
+  "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/*.cuh"
+)
+
+file(GLOB_RECURSE onnxruntime_rocm_contrib_ops_cc_srcs CONFIGURE_DEPENDS
+  "${ONNXRUNTIME_ROOT}/contrib_ops/rocm/*.h"
+  "${ONNXRUNTIME_ROOT}/contrib_ops/rocm/*.cc"
+)
+
+file(GLOB_RECURSE onnxruntime_rocm_contrib_ops_cu_srcs CONFIGURE_DEPENDS
+  "${ONNXRUNTIME_ROOT}/contrib_ops/rocm/*.cu"
+  "${ONNXRUNTIME_ROOT}/contrib_ops/rocm/*.cuh"
+)
+
+file(GLOB_RECURSE onnxruntime_js_contrib_ops_cc_srcs CONFIGURE_DEPENDS
+  "${ONNXRUNTIME_ROOT}/contrib_ops/js/*.h"
+  "${ONNXRUNTIME_ROOT}/contrib_ops/js/*.cc"
+)
+
+file(GLOB onnxruntime_providers_common_srcs CONFIGURE_DEPENDS
+  "${ONNXRUNTIME_ROOT}/core/providers/*.h"
+  "${ONNXRUNTIME_ROOT}/core/providers/*.cc"
+  "${ONNXRUNTIME_ROOT}/core/providers/op_kernel_type_control_overrides.inc"
+)
+
+
+source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_common_srcs} ${onnxruntime_providers_srcs})
+
+set(onnxruntime_providers_src ${onnxruntime_providers_common_srcs} ${onnxruntime_providers_srcs})
+
+# disable contrib ops conditionally
+if(NOT onnxruntime_DISABLE_CONTRIB_OPS)
+  if (NOT onnxruntime_ENABLE_ATEN)
+    list(REMOVE_ITEM onnxruntime_cpu_contrib_ops_srcs
+      "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/aten_ops/aten_op.h"
+      "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/aten_ops/aten_op.cc"
+      "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/aten_ops/aten_op_executor.cc"
+    )
+  endif()
+  # add using ONNXRUNTIME_ROOT so they show up under the 'contrib_ops' folder in Visual Studio
+  source_group(TREE ${ONNXRUNTIME_ROOT} FILES ${onnxruntime_cpu_contrib_ops_srcs})
+  list(APPEND onnxruntime_providers_src ${onnxruntime_cpu_contrib_ops_srcs})
+endif()
+
+if (onnxruntime_ENABLE_TRAINING_OPS AND NOT onnxruntime_ENABLE_TRAINING)
+  file(GLOB_RECURSE onnxruntime_cpu_training_ops_srcs CONFIGURE_DEPENDS
+    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/*.h"
+    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/*.cc"
+  )
+
+  source_group(TREE ${ORTTRAINING_ROOT}/ FILES ${onnxruntime_cpu_training_ops_srcs})
+  list(APPEND onnxruntime_providers_src ${onnxruntime_cpu_training_ops_srcs})
+
+  file(GLOB_RECURSE onnxruntime_cpu_full_training_only_srcs
+    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/collective/*.cc"
+    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/collective/*.h"
+    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/communication/*.cc"
+    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/communication/*.h"
+    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/controlflow/record.cc"
+    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/controlflow/record.h"
+    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/controlflow/wait.cc"
+    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/controlflow/wait.h"
+    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/controlflow/yield.cc"
+    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/controlflow/yield.h"
+    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/gist/*.cc"
+    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/gist/*.h"
+    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/tensorboard/*.cc"
+    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/tensorboard/*.h"
+    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/torch/*.cc"
+    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/torch/*.h"
+    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/triton/triton_op.cc"
+    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/triton/triton_op.h"
+  )
+
+  list(REMOVE_ITEM onnxruntime_providers_src ${onnxruntime_cpu_full_training_only_srcs})
+endif()
+
+if (onnxruntime_ENABLE_ATEN)
+  file(GLOB_RECURSE onnxruntime_providers_dlpack_srcs CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/core/dlpack/dlpack_converter.cc"
+    "${ONNXRUNTIME_ROOT}/core/dlpack/dlpack_converter.h"
+  )
+  set(onnxruntime_providers_dlpack_srcs ${onnxruntime_providers_dlpack_srcs})
+  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_dlpack_srcs})
+  list(APPEND onnxruntime_providers_src ${onnxruntime_providers_dlpack_srcs})
+endif()
+
+if (onnxruntime_ENABLE_TRAINING)
+  file(GLOB_RECURSE onnxruntime_cpu_training_ops_srcs CONFIGURE_DEPENDS
+    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/*.h"
+    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/*.cc"
+    "${ORTTRAINING_SOURCE_DIR}/core/framework/*.h"
+    "${ORTTRAINING_SOURCE_DIR}/core/framework/*.cc"
+    "${ORTTRAINING_SOURCE_DIR}/core/framework/adasum/*"
+    "${ORTTRAINING_SOURCE_DIR}/core/framework/communication/*"
+  )
+
+  # This is already built in framework.cmake
+  file(GLOB_RECURSE onnxruntime_training_framework_excude_srcs CONFIGURE_DEPENDS
+      "${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.h"
+      "${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.cc"
+      "${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.h"
+      "${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.cc"
+  )
+
+  list(REMOVE_ITEM onnxruntime_cpu_training_ops_srcs ${onnxruntime_training_framework_excude_srcs})
+
+  source_group(TREE ${ORTTRAINING_ROOT}/ FILES ${onnxruntime_cpu_training_ops_srcs})
+  list(APPEND onnxruntime_providers_src ${onnxruntime_cpu_training_ops_srcs})
+endif()
+
+if (onnxruntime_REDUCED_OPS_BUILD)
+  substitute_op_reduction_srcs(onnxruntime_providers_src)
+endif()
+onnxruntime_add_static_library(onnxruntime_providers ${onnxruntime_providers_src})
+if (onnxruntime_REDUCED_OPS_BUILD)
+  add_op_reduction_include_dirs(onnxruntime_providers)
+endif()
+
+if (HAS_BITWISE_INSTEAD_OF_LOGICAL)
+  target_compile_options(onnxruntime_providers PRIVATE "-Wno-bitwise-instead-of-logical")
+endif()
+
+if (MSVC)
+   target_compile_options(onnxruntime_providers PRIVATE "/bigobj")
+#   if(NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
+#      target_compile_options(onnxruntime_providers PRIVATE "/wd4244")
+#   endif()
+endif()
+onnxruntime_add_include_to_target(onnxruntime_providers onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface)
+
+if (onnxruntime_BUILD_MS_EXPERIMENTAL_OPS)
+  target_compile_definitions(onnxruntime_providers PRIVATE BUILD_MS_EXPERIMENTAL_OPS=1)
+endif()
+
+if(HAS_DEPRECATED_COPY)
+  #temporarily ignore this warning
+  #see: https://en.wikipedia.org/wiki/Rule_of_three_(C%2B%2B_programming)
+  set_source_files_properties("${ONNXRUNTIME_ROOT}/core/providers/cpu/math/matmul_integer.cc" PROPERTIES COMPILE_FLAGS -Wno-deprecated-copy)
+  set_source_files_properties("${ONNXRUNTIME_ROOT}/core/providers/cpu/math/quantize_linear_matmul.cc" PROPERTIES COMPILE_FLAGS -Wno-deprecated-copy)
+  set_source_files_properties("${ONNXRUNTIME_ROOT}/core/providers/cpu/nn/qlinearconv.cc" PROPERTIES COMPILE_FLAGS -Wno-deprecated-copy)
+  set_source_files_properties("${ONNXRUNTIME_ROOT}/core/providers/cpu/nn/conv_integer.cc" PROPERTIES COMPILE_FLAGS -Wno-deprecated-copy)
+  set_source_files_properties("${ONNXRUNTIME_ROOT}/core/providers/cpu/generator/random.cc" PROPERTIES COMPILE_FLAGS -Wno-deprecated-copy)
+  set_source_files_properties("${ONNXRUNTIME_ROOT}/core/providers/cpu/tensor/onehot.cc" PROPERTIES COMPILE_FLAGS -Wno-deprecated-copy)
+  set_source_files_properties("${ONNXRUNTIME_ROOT}/core/providers/cpu/tensor/where_op.cc" PROPERTIES COMPILE_FLAGS -Wno-deprecated-copy)
+endif()
+
+# This is enabled only for Adasum files in training mode.
+# The flags won't be applied globally since some high-precision training and inferencing ops will incur precision loss.
+if (onnxruntime_ENABLE_CPU_FP16_OPS)
+  set_source_files_properties(${ORTTRAINING_SOURCE_DIR}/core/framework/adasum/adasum_mpi.cc PROPERTIES COMPILE_FLAGS " -fassociative-math -ffast-math -ftree-vectorize -funsafe-math-optimizations -mf16c -mavx -mfma ")
+  set_source_files_properties(${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/collective/adasum_kernels.cc PROPERTIES COMPILE_FLAGS " -fassociative-math -ffast-math -ftree-vectorize -funsafe-math-optimizations -mf16c -mavx -mfma ")
+  set_source_files_properties(${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/collective/adasum_kernels.cc PROPERTIES COMPILE_FLAGS " -fassociative-math -ffast-math -ftree-vectorize -funsafe-math-optimizations -mf16c -mavx -mfma ")
+endif()
+
+target_include_directories(onnxruntime_providers PRIVATE ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS})
+onnxruntime_add_include_to_target(onnxruntime_providers re2::re2)
+add_dependencies(onnxruntime_providers onnx ${onnxruntime_EXTERNAL_DEPENDENCIES})
+
+if (onnxruntime_ENABLE_TRAINING_OPS)
+  target_include_directories(onnxruntime_providers PRIVATE ${ORTTRAINING_ROOT})
+endif()
+
+if (onnxruntime_ENABLE_ATEN)
+  target_compile_definitions(onnxruntime_providers PRIVATE ENABLE_ATEN)
+  # DLPack is a header-only dependency
+  set(DLPACK_INCLUDE_DIR ${dlpack_SOURCE_DIR}/include)
+  target_include_directories(onnxruntime_providers PRIVATE ${DLPACK_INCLUDE_DIR})
+endif()
+
+if (onnxruntime_ENABLE_TRAINING)
+  add_dependencies(onnxruntime_providers tensorboard)
+  onnxruntime_add_include_to_target(onnxruntime_providers tensorboard)
+  if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP OR onnxruntime_ENABLE_TRITON)
+    onnxruntime_add_include_to_target(onnxruntime_providers Python::Module)
+  endif()
+
+  if (onnxruntime_USE_NCCL OR onnxruntime_USE_MPI)
+    target_include_directories(onnxruntime_providers PUBLIC ${MPI_CXX_INCLUDE_DIRS})
+  endif()
+endif()
+
+install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/cpu/cpu_provider_factory.h  DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
+set_target_properties(onnxruntime_providers PROPERTIES LINKER_LANGUAGE CXX)
+set_target_properties(onnxruntime_providers PROPERTIES FOLDER "ONNXRuntime")
+
+if (NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD
+                                  AND NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin|iOS"
+                                  AND NOT CMAKE_SYSTEM_NAME STREQUAL "Android"
+                                  AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+  file(GLOB onnxruntime_providers_shared_cc_srcs CONFIGURE_DEPENDS
+  "${ONNXRUNTIME_ROOT}/core/providers/shared/*.h"
+  "${ONNXRUNTIME_ROOT}/core/providers/shared/*.cc"
+  )
+
+  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_shared_cc_srcs})
+  onnxruntime_add_shared_library(onnxruntime_providers_shared ${onnxruntime_providers_shared_cc_srcs} "${ONNXRUNTIME_ROOT}/core/dll/onnxruntime.rc")
+  set_target_properties(onnxruntime_providers_shared PROPERTIES FOLDER "ONNXRuntime")
+  set_target_properties(onnxruntime_providers_shared PROPERTIES LINKER_LANGUAGE CXX)
+
+  target_compile_definitions(onnxruntime_providers_shared PRIVATE VER_MAJOR=${VERSION_MAJOR_PART})
+  target_compile_definitions(onnxruntime_providers_shared PRIVATE VER_MINOR=${VERSION_MINOR_PART})
+  target_compile_definitions(onnxruntime_providers_shared PRIVATE VER_BUILD=${VERSION_BUILD_PART})
+  target_compile_definitions(onnxruntime_providers_shared PRIVATE VER_PRIVATE=${VERSION_PRIVATE_PART})
+  target_compile_definitions(onnxruntime_providers_shared PRIVATE VER_STRING=\"${VERSION_STRING}\")
+  target_compile_definitions(onnxruntime_providers_shared PRIVATE FILE_NAME=\"onnxruntime_providers_shared.dll\")
+
+
+  # On Apple/Unix we don't directly link with this library as we load it with RTLD_GLOBAL, so this is only set to the actual library on WIN32
+  # But, in exchange we need to manually add Boost::mp11 to include dirs for every EP.
+  # It is because "provider_api.h" includes core/framework/op_kernel.h which includes op_kernel.h which includes "boost/mp11.hpp"
+  set(ONNXRUNTIME_PROVIDERS_SHARED)
+
+  if(APPLE)
+  set_property(TARGET onnxruntime_providers_shared APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker -exported_symbols_list ${ONNXRUNTIME_ROOT}/core/providers/shared/exported_symbols.lst")
+  elseif(UNIX)
+  set_property(TARGET onnxruntime_providers_shared APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/shared/version_script.lds -Xlinker --gc-sections")
+  elseif(WIN32)
+  set_property(TARGET onnxruntime_providers_shared APPEND_STRING PROPERTY LINK_FLAGS "-DEF:${ONNXRUNTIME_ROOT}/core/providers/shared/symbols.def")
+  set(ONNXRUNTIME_PROVIDERS_SHARED onnxruntime_providers_shared)
+  else()
+  message(FATAL_ERROR "onnxruntime_providers_shared unknown platform, need to specify shared library exports for it")
+  endif()
+
+  install(TARGETS onnxruntime_providers_shared
+          ARCHIVE  DESTINATION ${CMAKE_INSTALL_LIBDIR}
+          LIBRARY  DESTINATION ${CMAKE_INSTALL_LIBDIR}
+          RUNTIME  DESTINATION ${CMAKE_INSTALL_BINDIR}
+  )
+endif()
+
+if (NOT onnxruntime_BUILD_SHARED_LIB)
+  install(TARGETS onnxruntime_providers
+          ARCHIVE   DESTINATION ${CMAKE_INSTALL_LIBDIR}
+          LIBRARY   DESTINATION ${CMAKE_INSTALL_LIBDIR}
+          RUNTIME   DESTINATION ${CMAKE_INSTALL_BINDIR}
+          FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
+endif()
\ No newline at end of file
diff --git a/cmake/onnxruntime_providers_cuda.cmake b/cmake/onnxruntime_providers_cuda.cmake
new file mode 100644
index 0000000000000..cf298aee9fa85
--- /dev/null
+++ b/cmake/onnxruntime_providers_cuda.cmake
@@ -0,0 +1,252 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+  file(GLOB_RECURSE onnxruntime_providers_cuda_cc_srcs CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/core/providers/cuda/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/cuda/*.cc"
+  )
+  # Remove pch files
+  list(REMOVE_ITEM onnxruntime_providers_cuda_cc_srcs
+    "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_pch.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_pch.cc"
+  )
+
+  # The shared_library files are in a separate list since they use precompiled headers, and the above files have them disabled.
+  file(GLOB_RECURSE onnxruntime_providers_cuda_shared_srcs CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.cc"
+  )
+  file(GLOB_RECURSE onnxruntime_providers_cuda_cu_srcs CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/core/providers/cuda/*.cu"
+    "${ONNXRUNTIME_ROOT}/core/providers/cuda/*.cuh"
+  )
+
+  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_cuda_cc_srcs} ${onnxruntime_providers_cuda_shared_srcs} ${onnxruntime_providers_cuda_cu_srcs})
+  set(onnxruntime_providers_cuda_src ${onnxruntime_providers_cuda_cc_srcs} ${onnxruntime_providers_cuda_shared_srcs} ${onnxruntime_providers_cuda_cu_srcs})
+
+  # disable contrib ops conditionally
+  if(NOT onnxruntime_DISABLE_CONTRIB_OPS)
+    if (NOT onnxruntime_ENABLE_ATEN)
+      list(REMOVE_ITEM onnxruntime_cuda_contrib_ops_cc_srcs
+        "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/aten_ops/aten_op.cc"
+      )
+    endif()
+    if (NOT onnxruntime_USE_NCCL)
+      list(REMOVE_ITEM onnxruntime_cuda_contrib_ops_cc_srcs
+        "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/nccl_kernels.cc"
+        "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/sharding_spec.cc"
+        "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/sharding.cc"
+        "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/distributed_matmul.cc"
+        "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/distributed_slice.cc"
+        "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/distributed_reshape.cc"
+        "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/distributed_expand.cc"
+        "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/distributed_reduce.cc"
+        "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/distributed_unsqueeze.cc"
+        "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/distributed_squeeze.cc"
+      )
+    endif()
+    # add using ONNXRUNTIME_ROOT so they show up under the 'contrib_ops' folder in Visual Studio
+    source_group(TREE ${ONNXRUNTIME_ROOT} FILES ${onnxruntime_cuda_contrib_ops_cc_srcs} ${onnxruntime_cuda_contrib_ops_cu_srcs})
+    list(APPEND onnxruntime_providers_cuda_src ${onnxruntime_cuda_contrib_ops_cc_srcs} ${onnxruntime_cuda_contrib_ops_cu_srcs})
+  endif()
+
+  if (onnxruntime_ENABLE_TRAINING_OPS)
+    file(GLOB_RECURSE onnxruntime_cuda_training_ops_cc_srcs CONFIGURE_DEPENDS
+      "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/*.h"
+      "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/*.cc"
+    )
+
+    file(GLOB_RECURSE onnxruntime_cuda_training_ops_cu_srcs CONFIGURE_DEPENDS
+      "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/*.cu"
+      "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/*.cuh"
+    )
+
+    source_group(TREE ${ORTTRAINING_ROOT} FILES ${onnxruntime_cuda_training_ops_cc_srcs} ${onnxruntime_cuda_training_ops_cu_srcs})
+    list(APPEND onnxruntime_providers_cuda_src ${onnxruntime_cuda_training_ops_cc_srcs} ${onnxruntime_cuda_training_ops_cu_srcs})
+
+    if(NOT onnxruntime_ENABLE_TRAINING)
+      file(GLOB_RECURSE onnxruntime_cuda_full_training_only_srcs
+        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/collective/*.cc"
+        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/collective/*.h"
+        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/communication/*.cc"
+        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/communication/*.h"
+        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/controlflow/record.cc"
+        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/controlflow/record.h"
+        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/controlflow/wait.cc"
+        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/controlflow/wait.h"
+        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/controlflow/yield.cc"
+        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/gist/*.cc"
+        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/gist/*.h"
+        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/gist/*.cu"
+        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/torch/*.cc"
+        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/torch/*.h"
+        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/triton/triton_op.cc"
+      )
+
+      list(REMOVE_ITEM onnxruntime_providers_cuda_src ${onnxruntime_cuda_full_training_only_srcs})
+    elseif(WIN32 OR NOT onnxruntime_USE_NCCL)
+      # NCCL is not support in Windows build
+      file(GLOB_RECURSE onnxruntime_cuda_nccl_op_srcs
+        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/collective/nccl_common.cc"
+        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/collective/nccl_kernels.cc"
+        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/collective/megatron.cc"
+      )
+      list(REMOVE_ITEM onnxruntime_providers_cuda_src ${onnxruntime_cuda_nccl_op_srcs})
+    endif()
+  endif()
+
+  if (onnxruntime_REDUCED_OPS_BUILD)
+    substitute_op_reduction_srcs(onnxruntime_providers_cuda_src)
+  endif()
+  if(onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS)
+    # cuda_provider_interface.cc is removed from the object target: onnxruntime_providers_cuda_obj and
+    # add to the lib onnxruntime_providers_cuda separatedly.
+    # onnxruntime_providers_cuda_ut can share all the object files with onnxruntime_providers_cuda except cuda_provider_interface.cc.
+    set(cuda_provider_interface_src ${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_provider_interface.cc)
+    list(REMOVE_ITEM onnxruntime_providers_cuda_src ${cuda_provider_interface_src})
+    onnxruntime_add_object_library(onnxruntime_providers_cuda_obj ${onnxruntime_providers_cuda_src})
+    onnxruntime_add_shared_library_module(onnxruntime_providers_cuda ${cuda_provider_interface_src} $<TARGET_OBJECTS:onnxruntime_providers_cuda_obj>)
+  else()
+    onnxruntime_add_shared_library_module(onnxruntime_providers_cuda ${onnxruntime_providers_cuda_src})
+  endif()
+  # config_cuda_provider_shared_module can be used to config onnxruntime_providers_cuda_obj, onnxruntime_providers_cuda & onnxruntime_providers_cuda_ut.
+  # This function guarantees that all 3 targets have the same configurations.
+  function(config_cuda_provider_shared_module target)
+    if (onnxruntime_REDUCED_OPS_BUILD)
+      add_op_reduction_include_dirs(${target})
+    endif()
+
+    if (HAS_GUARD_CF)
+      target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler /guard:cf>")
+    endif()
+    if (HAS_QSPECTRE)
+      target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler /Qspectre>")
+    endif()
+    foreach(ORT_FLAG ${ORT_WARNING_FLAGS})
+        target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler \"${ORT_FLAG}\">")
+    endforeach()
+    # CUDA 11.3+ supports parallel compilation
+    # https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#options-for-guiding-compiler-driver-threads
+    if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.3)
+      option(onnxruntime_NVCC_THREADS "Number of threads that NVCC can use for compilation." 1)
+      target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--threads \"${onnxruntime_NVCC_THREADS}\">")
+    endif()
+    if (UNIX)
+      target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler -Wno-reorder>"
+                  "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-Wno-reorder>")
+      target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler -Wno-error=sign-compare>"
+                  "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-Wno-error=sign-compare>")
+    else()
+      #mutex.cuh(91): warning C4834: discarding return value of function with 'nodiscard' attribute
+      target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler /wd4834>")
+      target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler /wd4127>")
+    endif()
+
+    onnxruntime_add_include_to_target(${target} onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers)
+    if (onnxruntime_ENABLE_TRAINING_OPS)
+      onnxruntime_add_include_to_target(${target} onnxruntime_training)
+      if (onnxruntime_ENABLE_TRAINING)
+        target_link_libraries(${target} PRIVATE onnxruntime_training)
+      endif()
+      if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP OR onnxruntime_ENABLE_TRITON)
+        onnxruntime_add_include_to_target(${target} Python::Module)
+      endif()
+    endif()
+
+    add_dependencies(${target} onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES})
+    target_link_libraries(${target} PRIVATE cublasLt cublas cudnn curand cufft ${ABSEIL_LIBS} ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 safeint_interface)
+    if(onnxruntime_CUDNN_HOME)
+      target_include_directories(${target} PRIVATE ${onnxruntime_CUDNN_HOME}/include)
+      target_link_directories(${target} PRIVATE ${onnxruntime_CUDNN_HOME}/lib)
+    endif()
+
+    if (onnxruntime_USE_TRITON_KERNEL)
+      # compile triton kernel, generate .a and .h files
+      include(onnxruntime_compile_triton_kernel.cmake)
+      compile_triton_kernel(triton_kernel_obj_file triton_kernel_header_dir)
+      add_dependencies(${target} onnxruntime_triton_kernel)
+      target_compile_definitions(${target} PRIVATE USE_TRITON_KERNEL)
+      target_include_directories(${target} PRIVATE ${triton_kernel_header_dir})
+      target_link_libraries(${target} PUBLIC -Wl,--whole-archive ${triton_kernel_obj_file} -Wl,--no-whole-archive)
+      # lib cuda needed by cuLaunchKernel
+      target_link_libraries(${target} PRIVATE cuda)
+    endif()
+
+    include(cutlass)
+    target_include_directories(${target} PRIVATE ${cutlass_SOURCE_DIR}/include ${cutlass_SOURCE_DIR}/examples)
+
+    target_include_directories(${target} PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR}  ${eigen_INCLUDE_DIRS} ${TVM_INCLUDES} PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
+    # ${CMAKE_CURRENT_BINARY_DIR} is so that #include "onnxruntime_config.h" inside tensor_shape.h is found
+    set_target_properties(${target} PROPERTIES LINKER_LANGUAGE CUDA)
+    set_target_properties(${target} PROPERTIES FOLDER "ONNXRuntime")
+
+    if (onnxruntime_ENABLE_CUDA_PROFILING) # configure cupti for cuda profiling
+      target_include_directories(${target} PRIVATE ${onnxruntime_CUDA_HOME}/extras/CUPTI/include)
+      target_link_directories(${target} PRIVATE ${onnxruntime_CUDA_HOME}/extras/CUPTI/lib64)
+      target_link_libraries(${target} PRIVATE cupti)
+    endif()
+
+    if (onnxruntime_ENABLE_NVTX_PROFILE AND NOT WIN32)
+      target_link_libraries(${target} PRIVATE nvToolsExt)
+    endif()
+
+    if (onnxruntime_ENABLE_TRAINING_OPS)
+      target_include_directories(${target} PRIVATE ${ORTTRAINING_ROOT} ${MPI_CXX_INCLUDE_DIRS})
+    endif()
+
+    if(onnxruntime_USE_MPI)
+      target_link_libraries(${target} PRIVATE ${MPI_LIBRARIES} ${MPI_CXX_LINK_FLAGS})
+    endif()
+
+    if (onnxruntime_USE_NCCL)
+      target_include_directories(${target} PRIVATE ${NCCL_INCLUDE_DIRS})
+      target_link_libraries(${target} PRIVATE ${NCCL_LIBRARIES})
+    endif()
+
+    if (WIN32)
+      # *.cu cannot use PCH
+      if (NOT onnxruntime_BUILD_CACHE)
+        target_precompile_headers(${target} PUBLIC
+          "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_pch.h"
+          "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_pch.cc"
+        )
+      endif()
+
+      # minimize the Windows includes.
+      # this avoids an issue with CUDA 11.6 where 'small' is defined in the windows and cuda headers.
+      target_compile_definitions(${target} PRIVATE "WIN32_LEAN_AND_MEAN")
+
+      # disable a warning from the CUDA headers about unreferenced local functions
+      #target_compile_options(${target} PRIVATE /wd4505)
+      set(onnxruntime_providers_cuda_static_library_flags
+          -IGNORE:4221 # LNK4221: This object file does not define any previously undefined public symbols, so it will not be used by any link operation that consumes this library
+      )
+      set_target_properties(${target} PROPERTIES
+          STATIC_LIBRARY_FLAGS "${onnxruntime_providers_cuda_static_library_flags}")
+    endif()
+
+    if(APPLE)
+      set_property(TARGET ${target} APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker -exported_symbols_list ${ONNXRUNTIME_ROOT}/core/providers/cuda/exported_symbols.lst")
+      target_link_libraries(${target} PRIVATE nsync::nsync_cpp)
+    elseif(UNIX)
+      set_property(TARGET ${target} APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/cuda/version_script.lds -Xlinker --gc-sections")
+      target_link_libraries(${target} PRIVATE nsync::nsync_cpp)
+    elseif(WIN32)
+      set_property(TARGET ${target} APPEND_STRING PROPERTY LINK_FLAGS "-DEF:${ONNXRUNTIME_ROOT}/core/providers/cuda/symbols.def")
+    else()
+      message(FATAL_ERROR "${target} unknown platform, need to specify shared library exports for it")
+    endif()
+
+    if (onnxruntime_ENABLE_ATEN)
+      target_compile_definitions(${target} PRIVATE ENABLE_ATEN)
+    endif()
+  endfunction()
+  if(onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS)
+    config_cuda_provider_shared_module(onnxruntime_providers_cuda_obj)
+  endif()
+  config_cuda_provider_shared_module(onnxruntime_providers_cuda)
+
+  install(TARGETS onnxruntime_providers_cuda
+          ARCHIVE  DESTINATION ${CMAKE_INSTALL_LIBDIR}
+          LIBRARY  DESTINATION ${CMAKE_INSTALL_LIBDIR}
+          RUNTIME  DESTINATION ${CMAKE_INSTALL_BINDIR})
diff --git a/cmake/onnxruntime_providers_dml.cmake b/cmake/onnxruntime_providers_dml.cmake
new file mode 100644
index 0000000000000..01b0bda9fea6b
--- /dev/null
+++ b/cmake/onnxruntime_providers_dml.cmake
@@ -0,0 +1,91 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+  file(GLOB_RECURSE onnxruntime_providers_dml_cc_srcs CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/core/providers/dml/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/dml/*.cpp"
+    "${ONNXRUNTIME_ROOT}/core/providers/dml/*.cc"
+  )
+  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_dml_cc_srcs})
+  onnxruntime_add_static_library(onnxruntime_providers_dml ${onnxruntime_providers_dml_cc_srcs})
+  onnxruntime_add_include_to_target(onnxruntime_providers_dml
+    onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface ${WIL_TARGET}
+  )
+  add_dependencies(onnxruntime_providers_dml ${onnxruntime_EXTERNAL_DEPENDENCIES})
+  target_include_directories(onnxruntime_providers_dml PRIVATE
+    ${ONNXRUNTIME_ROOT}
+  )
+
+  target_compile_definitions(onnxruntime_providers_dml PRIVATE DML_TARGET_VERSION_USE_LATEST=1)
+  if(WIN32)
+    target_compile_options(onnxruntime_providers_dml PRIVATE "/wd4100" "/wd4238" "/wd4189" "/wd4702")
+  endif()
+
+  if (NOT onnxruntime_USE_CUSTOM_DIRECTML)
+    foreach(file "DirectML.dll" "DirectML.pdb" "DirectML.Debug.dll" "DirectML.Debug.pdb")
+      add_custom_command(TARGET onnxruntime_providers_dml
+        POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different
+          "${DML_PACKAGE_DIR}/bin/${onnxruntime_target_platform}-win/${file}" $<TARGET_FILE_DIR:onnxruntime_providers_dml>)
+    endforeach()
+  endif()
+
+  function(target_add_dml target)
+    if (onnxruntime_USE_CUSTOM_DIRECTML)
+      if (dml_EXTERNAL_PROJECT)
+        # Internal build of DirectML: link against the "DirectML" target.
+        target_link_libraries(${target} PRIVATE DirectML)
+      else()
+        if (dml_LIB_DIR)
+          target_link_libraries(${target} PRIVATE ${dml_LIB_DIR}/DirectML.lib)
+        else()
+          target_link_libraries(${target} PRIVATE DirectML)
+        endif()
+      endif()
+    else()
+      add_dependencies(${target} RESTORE_PACKAGES)
+      target_link_libraries(${target} PRIVATE "${DML_PACKAGE_DIR}/bin/${onnxruntime_target_platform}-win/DirectML.lib")
+        target_compile_definitions(${target} PRIVATE DML_TARGET_VERSION_USE_LATEST)
+    endif()
+  endfunction()
+
+  target_add_dml(onnxruntime_providers_dml)
+  target_link_libraries(onnxruntime_providers_dml PRIVATE onnxruntime_common)
+  target_link_libraries(onnxruntime_providers_dml PRIVATE onnxruntime_framework)
+  onnxruntime_add_include_to_target(onnxruntime_providers_dml onnxruntime_common)
+  if (GDK_PLATFORM STREQUAL Scarlett)
+    target_link_libraries(onnxruntime_providers_dml PRIVATE ${gdk_dx_libs})
+  else()
+    target_link_libraries(onnxruntime_providers_dml PRIVATE dxguid.lib d3d12.lib dxgi.lib dxcore.lib)
+  endif()
+
+  target_link_libraries(onnxruntime_providers_dml PRIVATE delayimp.lib)
+
+  if (NOT GDK_PLATFORM)
+    set(onnxruntime_DELAYLOAD_FLAGS "${onnxruntime_DELAYLOAD_FLAGS} /DELAYLOAD:DirectML.dll /DELAYLOAD:d3d12.dll /DELAYLOAD:dxgi.dll /DELAYLOAD:api-ms-win-core-com-l1-1-0.dll /DELAYLOAD:shlwapi.dll /DELAYLOAD:oleaut32.dll /DELAYLOAD:ext-ms-win-dxcore-l1-*.dll /ignore:4199")
+  endif()
+
+  target_compile_definitions(onnxruntime_providers_dml
+    PRIVATE
+    ONNX_NAMESPACE=onnx ONNX_ML LOTUS_LOG_THRESHOLD=2 LOTUS_ENABLE_STDERR_LOGGING PLATFORM_WINDOWS
+  )
+  target_compile_definitions(onnxruntime_providers_dml PRIVATE UNICODE _UNICODE NOMINMAX)
+  if (MSVC)
+    target_compile_definitions(onnxruntime_providers_dml PRIVATE _SILENCE_CXX17_ITERATOR_BASE_CLASS_DEPRECATION_WARNING)
+    target_compile_options(onnxruntime_providers_dml PRIVATE "/W3")
+  endif()
+
+  install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/dml/dml_provider_factory.h
+    DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/
+  )
+
+  set_target_properties(onnxruntime_providers_dml PROPERTIES LINKER_LANGUAGE CXX)
+  set_target_properties(onnxruntime_providers_dml PROPERTIES FOLDER "ONNXRuntime")
+
+  if (NOT onnxruntime_BUILD_SHARED_LIB)
+    install(TARGETS onnxruntime_providers_dml
+            ARCHIVE   DESTINATION ${CMAKE_INSTALL_LIBDIR}
+            LIBRARY   DESTINATION ${CMAKE_INSTALL_LIBDIR}
+            RUNTIME   DESTINATION ${CMAKE_INSTALL_BINDIR}
+            FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
+  endif()
diff --git a/cmake/onnxruntime_providers_dnnl.cmake b/cmake/onnxruntime_providers_dnnl.cmake
new file mode 100644
index 0000000000000..f2965728524b7
--- /dev/null
+++ b/cmake/onnxruntime_providers_dnnl.cmake
@@ -0,0 +1,57 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+  file(GLOB_RECURSE onnxruntime_providers_dnnl_cc_srcs CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/core/providers/dnnl/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/dnnl/*.cc"
+    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.cc"
+  )
+
+  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_dnnl_cc_srcs})
+  onnxruntime_add_shared_library_module(onnxruntime_providers_dnnl ${onnxruntime_providers_dnnl_cc_srcs})
+  target_link_directories(onnxruntime_providers_dnnl PRIVATE ${DNNL_LIB_DIR})
+  if (MSVC AND onnxruntime_ENABLE_STATIC_ANALYSIS)
+    # dnnl_convgrad.cc(47,0): Warning C6262: Function uses '38816' bytes of stack:  exceeds /analyze:stacksize '16384'.  Consider moving some data to heap.
+    target_compile_options(onnxruntime_providers_dnnl PRIVATE  "/analyze:stacksize 131072")
+  endif()
+
+  add_dependencies(onnxruntime_providers_dnnl onnxruntime_providers_shared project_dnnl ${onnxruntime_EXTERNAL_DEPENDENCIES})
+  target_include_directories(onnxruntime_providers_dnnl PRIVATE ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS} ${DNNL_INCLUDE_DIR} ${DNNL_OCL_INCLUDE_DIR})
+  # ${CMAKE_CURRENT_BINARY_DIR} is so that #include "onnxruntime_config.h" inside tensor_shape.h is found
+  target_link_libraries(onnxruntime_providers_dnnl PRIVATE dnnl ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 ${ABSEIL_LIBS} ${GSL_TARGET} safeint_interface)
+  install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/dnnl/dnnl_provider_options.h
+    DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
+  set_target_properties(onnxruntime_providers_dnnl PROPERTIES FOLDER "ONNXRuntime")
+  set_target_properties(onnxruntime_providers_dnnl PROPERTIES LINKER_LANGUAGE CXX)
+
+  # Needed for the provider interface, as it includes training headers when training is enabled
+  if (onnxruntime_ENABLE_TRAINING_OPS)
+    target_include_directories(onnxruntime_providers_dnnl PRIVATE ${ORTTRAINING_ROOT})
+  endif()
+
+  # Needed for threadpool handling
+  if(onnxruntime_BUILD_JAVA)
+    add_compile_definitions(DNNL_JAVA)
+  endif()
+
+  if(APPLE)
+    set_property(TARGET onnxruntime_providers_dnnl APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker -exported_symbols_list ${ONNXRUNTIME_ROOT}/core/providers/dnnl/exported_symbols.lst")
+    set_target_properties(onnxruntime_providers_dnnl PROPERTIES
+      INSTALL_RPATH "@loader_path"
+      BUILD_WITH_INSTALL_RPATH TRUE
+      INSTALL_RPATH_USE_LINK_PATH FALSE)
+    target_link_libraries(onnxruntime_providers_dnnl PRIVATE nsync::nsync_cpp)
+  elseif(UNIX)
+    set_property(TARGET onnxruntime_providers_dnnl APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/dnnl/version_script.lds -Xlinker --gc-sections -Xlinker -rpath=\$ORIGIN")
+    target_link_libraries(onnxruntime_providers_dnnl PRIVATE nsync::nsync_cpp)
+  elseif(WIN32)
+    set_property(TARGET onnxruntime_providers_dnnl APPEND_STRING PROPERTY LINK_FLAGS "-DEF:${ONNXRUNTIME_ROOT}/core/providers/dnnl/symbols.def")
+  else()
+    message(FATAL_ERROR "onnxruntime_providers_dnnl unknown platform, need to specify shared library exports for it")
+  endif()
+
+  install(TARGETS onnxruntime_providers_dnnl
+          ARCHIVE  DESTINATION ${CMAKE_INSTALL_LIBDIR}
+          LIBRARY  DESTINATION ${CMAKE_INSTALL_LIBDIR}
+          RUNTIME  DESTINATION ${CMAKE_INSTALL_BINDIR})
\ No newline at end of file
diff --git a/cmake/onnxruntime_providers_js.cmake b/cmake/onnxruntime_providers_js.cmake
new file mode 100644
index 0000000000000..306f5c74cb4c6
--- /dev/null
+++ b/cmake/onnxruntime_providers_js.cmake
@@ -0,0 +1,21 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+  add_compile_definitions(USE_JSEP=1)
+
+  file(GLOB_RECURSE onnxruntime_providers_js_cc_srcs
+    "${ONNXRUNTIME_ROOT}/core/providers/js/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/js/*.cc"
+  )
+  if(NOT onnxruntime_DISABLE_CONTRIB_OPS)
+    source_group(TREE ${ONNXRUNTIME_ROOT} FILES ${onnxruntime_js_contrib_ops_cc_srcs})
+    list(APPEND onnxruntime_providers_js_cc_srcs ${onnxruntime_js_contrib_ops_cc_srcs})
+  endif()
+
+  source_group(TREE ${ONNXRUNTIME_ROOT} FILES ${onnxruntime_providers_js_cc_srcs})
+  onnxruntime_add_static_library(onnxruntime_providers_js ${onnxruntime_providers_js_cc_srcs})
+  onnxruntime_add_include_to_target(onnxruntime_providers_js
+    onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers Boost::mp11
+  )
+  target_include_directories(onnxruntime_providers_js PRIVATE  ${eigen_INCLUDE_DIRS})
+  add_dependencies(onnxruntime_providers_js ${onnxruntime_EXTERNAL_DEPENDENCIES})
\ No newline at end of file
diff --git a/cmake/onnxruntime_providers_migraphx.cmake b/cmake/onnxruntime_providers_migraphx.cmake
new file mode 100644
index 0000000000000..91ac66a40721d
--- /dev/null
+++ b/cmake/onnxruntime_providers_migraphx.cmake
@@ -0,0 +1,75 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+  add_definitions(-DUSE_MIGRAPHX=1)
+  set(BUILD_LIBRARY_ONLY 1)
+  add_definitions("-DONNX_ML=1")
+  add_definitions("-DONNX_NAMESPACE=onnx")
+  include_directories(${protobuf_SOURCE_DIR} ${eigen_SOURCE_DIR})
+  set(MIGRAPHX_ROOT ${onnxruntime_MIGRAPHX_HOME})
+  include_directories(${onnx_SOURCE_DIR})
+  set(OLD_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
+  if ( CMAKE_COMPILER_IS_GNUCC )
+    set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS} -Wno-unused-parameter -Wno-missing-field-initializers")
+  endif()
+  set(CXX_VERSION_DEFINED TRUE)
+  set(CMAKE_CXX_FLAGS ${OLD_CMAKE_CXX_FLAGS})
+  if ( CMAKE_COMPILER_IS_GNUCC )
+    set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS} -Wno-unused-parameter")
+  endif()
+
+  # Add search paths for default rocm installation
+  list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hcc /opt/rocm/hip /opt/rocm)
+
+  find_package(hip)
+  find_package(migraphx PATHS ${AMD_MIGRAPHX_HOME})
+
+  find_package(miopen)
+  find_package(rocblas)
+
+  set(migraphx_libs migraphx::c hip::host MIOpen roc::rocblas)
+
+  file(GLOB_RECURSE onnxruntime_providers_migraphx_cc_srcs CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/core/providers/migraphx/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/migraphx/*.cc"
+    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.cc"
+    "${ONNXRUNTIME_ROOT}/core/providers/rocm/rocm_stream_handle.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/rocm/rocm_stream_handle.cc"
+  )
+  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_migraphx_cc_srcs})
+  onnxruntime_add_shared_library_module(onnxruntime_providers_migraphx ${onnxruntime_providers_migraphx_cc_srcs})
+  onnxruntime_add_include_to_target(onnxruntime_providers_migraphx onnxruntime_common onnx flatbuffers::flatbuffers Boost::mp11 safeint_interface)
+  add_dependencies(onnxruntime_providers_migraphx onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES})
+  target_link_libraries(onnxruntime_providers_migraphx PRIVATE ${migraphx_libs} ${ONNXRUNTIME_PROVIDERS_SHARED} onnx flatbuffers::flatbuffers Boost::mp11 safeint_interface)
+  target_include_directories(onnxruntime_providers_migraphx PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/onnxruntime)
+  set_target_properties(onnxruntime_providers_migraphx PROPERTIES LINKER_LANGUAGE CXX)
+  set_target_properties(onnxruntime_providers_migraphx PROPERTIES FOLDER "ONNXRuntime")
+  target_compile_definitions(onnxruntime_providers_migraphx PRIVATE ONNXIFI_BUILD_LIBRARY=1)
+  target_compile_options(onnxruntime_providers_migraphx PRIVATE -Wno-error=sign-compare)
+  set_property(TARGET onnxruntime_providers_migraphx APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations")
+  set_property(TARGET onnxruntime_providers_migraphx APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/migraphx/version_script.lds -Xlinker --gc-sections")
+  target_link_libraries(onnxruntime_providers_migraphx PRIVATE nsync::nsync_cpp stdc++fs)
+
+  include(CheckLibraryExists)
+  check_library_exists(migraphx::c "migraphx_program_run_async" "/opt/rocm/migraphx/lib" HAS_STREAM_SYNC)
+  if(HAS_STREAM_SYNC)
+      target_compile_definitions(onnxruntime_providers_migraphx PRIVATE -DMIGRAPHX_STREAM_SYNC)
+      message(STATUS "MIGRAPHX GPU STREAM SYNC is ENABLED")
+  else()
+      message(STATUS "MIGRAPHX GPU STREAM SYNC is DISABLED")
+  endif()
+
+  if (onnxruntime_ENABLE_TRAINING_OPS)
+    onnxruntime_add_include_to_target(onnxruntime_providers_migraphx onnxruntime_training)
+    target_link_libraries(onnxruntime_providers_migraphx PRIVATE onnxruntime_training)
+    if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
+      onnxruntime_add_include_to_target(onnxruntime_providers_migraphx Python::Module)
+    endif()
+  endif()
+
+  install(TARGETS onnxruntime_providers_migraphx
+          ARCHIVE  DESTINATION ${CMAKE_INSTALL_LIBDIR}
+          LIBRARY  DESTINATION ${CMAKE_INSTALL_LIBDIR}
+          RUNTIME  DESTINATION ${CMAKE_INSTALL_BINDIR}
+  )
diff --git a/cmake/onnxruntime_providers_nnapi.cmake b/cmake/onnxruntime_providers_nnapi.cmake
new file mode 100644
index 0000000000000..5ac25a3b76efb
--- /dev/null
+++ b/cmake/onnxruntime_providers_nnapi.cmake
@@ -0,0 +1,84 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+  if (onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD)
+    message(FATAL_ERROR "NNAPI can not be used in a basic minimal build. Please build with '--minimal_build extended'")
+  endif()
+
+  add_compile_definitions(USE_NNAPI=1)
+
+  # This is the minimum Android API Level required by ORT NNAPI EP to run
+  # ORT running on any host system with Android API level less than this will fall back to CPU EP
+  if(onnxruntime_NNAPI_MIN_API)
+    add_compile_definitions(ORT_NNAPI_MIN_API_LEVEL=${onnxruntime_NNAPI_MIN_API})
+  endif()
+
+  # This is the maximum Android API level supported in the ort model conversion for NNAPI EP
+  # Note: This is only for running NNAPI for ort format model conversion on non-Android system since we cannot
+  #       get the actually Android system version.
+  if(onnxruntime_NNAPI_HOST_API)
+    if(CMAKE_SYSTEM_NAME STREQUAL "Android")
+      message(FATAL_ERROR "onnxruntime_NNAPI_HOST_API should only be set for non-Android target")
+    endif()
+    add_compile_definitions(ORT_NNAPI_MAX_SUPPORTED_API_LEVEL=${onnxruntime_NNAPI_HOST_API})
+  endif()
+
+  set(onnxruntime_provider_nnapi_cc_src_patterns
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/*.h"
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/*.cc"
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/*.h"
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/*.cc"
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/builders/*.h"
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/builders/*.cc"
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/builders/impl/*.h"
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/builders/impl/*.cc"
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksTypes.h"
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksWrapper.h"
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksWrapper.cc"
+      "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/nnapi_lib/nnapi_implementation.h"
+  )
+
+  # On Android, use the actual NNAPI implementation.
+  # Otherwise, use a stub implementation to support some unit testing.
+  if(CMAKE_SYSTEM_NAME STREQUAL "Android")
+    list(APPEND onnxruntime_provider_nnapi_cc_src_patterns
+         "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/nnapi_lib/nnapi_implementation.cc")
+  else()
+    list(APPEND onnxruntime_provider_nnapi_cc_src_patterns
+         "${ONNXRUNTIME_ROOT}/core/providers/nnapi/nnapi_builtin/nnapi_lib/nnapi_implementation_stub.cc")
+  endif()
+
+  # These are shared utils,
+  # TODO, move this to a separated lib when used by EPs other than NNAPI and CoreML
+  list(APPEND onnxruntime_provider_nnapi_cc_src_patterns
+    "${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.cc"
+    "${ONNXRUNTIME_ROOT}/core/providers/shared/node_unit/node_unit.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/shared/node_unit/node_unit.cc"
+  )
+
+  file(GLOB onnxruntime_providers_nnapi_cc_srcs CONFIGURE_DEPENDS ${onnxruntime_provider_nnapi_cc_src_patterns})
+
+  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_nnapi_cc_srcs})
+  onnxruntime_add_static_library(onnxruntime_providers_nnapi ${onnxruntime_providers_nnapi_cc_srcs})
+  onnxruntime_add_include_to_target(onnxruntime_providers_nnapi
+    onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface
+  )
+  target_link_libraries(onnxruntime_providers_nnapi)
+  add_dependencies(onnxruntime_providers_nnapi onnx ${onnxruntime_EXTERNAL_DEPENDENCIES})
+  set_target_properties(onnxruntime_providers_nnapi PROPERTIES CXX_STANDARD_REQUIRED ON)
+  set_target_properties(onnxruntime_providers_nnapi PROPERTIES FOLDER "ONNXRuntime")
+  target_include_directories(onnxruntime_providers_nnapi PRIVATE ${ONNXRUNTIME_ROOT} ${nnapi_INCLUDE_DIRS})
+  set_target_properties(onnxruntime_providers_nnapi PROPERTIES LINKER_LANGUAGE CXX)
+  # ignore the warning unknown-pragmas on "pragma region"
+  if(NOT MSVC)
+    target_compile_options(onnxruntime_providers_nnapi PRIVATE "-Wno-unknown-pragmas")
+  endif()
+
+  if (NOT onnxruntime_BUILD_SHARED_LIB)
+    install(TARGETS onnxruntime_providers_nnapi
+            ARCHIVE   DESTINATION ${CMAKE_INSTALL_LIBDIR}
+            LIBRARY   DESTINATION ${CMAKE_INSTALL_LIBDIR}
+            RUNTIME   DESTINATION ${CMAKE_INSTALL_BINDIR}
+            FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
+  endif()
\ No newline at end of file
diff --git a/cmake/onnxruntime_providers_openvino.cmake b/cmake/onnxruntime_providers_openvino.cmake
new file mode 100644
index 0000000000000..e26f0bfc0b751
--- /dev/null
+++ b/cmake/onnxruntime_providers_openvino.cmake
@@ -0,0 +1,81 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+#  include_directories("${CMAKE_CURRENT_BINARY_DIR}/onnx")
+  file(GLOB_RECURSE onnxruntime_providers_openvino_cc_srcs CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/core/providers/openvino/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/openvino/*.cc"
+    "${ONNXRUNTIME_ROOT}/core/providers/openvino/*.hpp"
+    "${ONNXRUNTIME_ROOT}/core/providers/openvino/*.cpp"
+    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.cc"
+  )
+
+  if (WIN32)
+      set(CMAKE_MAP_IMPORTED_CONFIG_RELWITHDEBINFO Release)
+  endif()
+
+  # Header paths
+  find_package(InferenceEngine REQUIRED)
+  find_package(ngraph REQUIRED)
+
+  if (OPENVINO_2022_1 OR OPENVINO_2022_2)
+  find_package(OpenVINO REQUIRED COMPONENTS Runtime ONNX)
+  list (OV_20_LIBS openvino::frontend::onnx openvino::runtime)
+  endif()
+
+  if (WIN32)
+    unset(CMAKE_MAP_IMPORTED_CONFIG_RELWITHDEBINFO)
+  endif()
+
+  if ((DEFINED ENV{OPENCL_LIBS}) AND (DEFINED ENV{OPENCL_INCS}))
+    add_definitions(-DIO_BUFFER_ENABLED=1)
+    list(APPEND OPENVINO_LIB_LIST $ENV{OPENCL_LIBS} ${OV_20_LIBS} ${InferenceEngine_LIBRARIES} ${NGRAPH_LIBRARIES} ngraph::onnx_importer ${PYTHON_LIBRARIES})
+  else()
+    list(APPEND OPENVINO_LIB_LIST ${OV_20_LIBS} ${InferenceEngine_LIBRARIES} ${NGRAPH_LIBRARIES} ngraph::onnx_importer ${PYTHON_LIBRARIES})
+  endif()
+
+  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_openvino_cc_srcs})
+  onnxruntime_add_shared_library_module(onnxruntime_providers_openvino ${onnxruntime_providers_openvino_cc_srcs} "${ONNXRUNTIME_ROOT}/core/dll/onnxruntime.rc")
+  onnxruntime_add_include_to_target(onnxruntime_providers_openvino onnxruntime_common onnx)
+  install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/openvino/openvino_provider_factory.h
+    DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
+  set_target_properties(onnxruntime_providers_openvino PROPERTIES LINKER_LANGUAGE CXX)
+  set_target_properties(onnxruntime_providers_openvino PROPERTIES FOLDER "ONNXRuntime")
+  if(NOT MSVC)
+    target_compile_options(onnxruntime_providers_openvino PRIVATE "-Wno-parentheses")
+  endif()
+  add_dependencies(onnxruntime_providers_openvino onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES})
+  target_include_directories(onnxruntime_providers_openvino SYSTEM PUBLIC ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} ${eigen_INCLUDE_DIRS} ${OpenVINO_INCLUDE_DIR} ${OPENVINO_INCLUDE_DIR_LIST} ${PYTHON_INCLUDE_DIRS} $ENV{OPENCL_INCS} $ENV{OPENCL_INCS}/../../cl_headers/)
+  target_link_libraries(onnxruntime_providers_openvino ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 ${OPENVINO_LIB_LIST} ${ABSEIL_LIBS})
+
+  target_compile_definitions(onnxruntime_providers_openvino PRIVATE VER_MAJOR=${VERSION_MAJOR_PART})
+  target_compile_definitions(onnxruntime_providers_openvino PRIVATE VER_MINOR=${VERSION_MINOR_PART})
+  target_compile_definitions(onnxruntime_providers_openvino PRIVATE VER_BUILD=${VERSION_BUILD_PART})
+  target_compile_definitions(onnxruntime_providers_openvino PRIVATE VER_PRIVATE=${VERSION_PRIVATE_PART})
+  target_compile_definitions(onnxruntime_providers_openvino PRIVATE VER_STRING=\"${VERSION_STRING}\")
+  target_compile_definitions(onnxruntime_providers_openvino PRIVATE FILE_NAME=\"onnxruntime_providers_openvino.dll\")
+
+  if(MSVC)
+    target_compile_options(onnxruntime_providers_openvino PUBLIC /wd4099 /wd4275 /wd4100 /wd4005 /wd4244 /wd4267)
+  endif()
+
+  # Needed for the provider interface, as it includes training headers when training is enabled
+  if (onnxruntime_ENABLE_TRAINING_OPS)
+    target_include_directories(onnxruntime_providers_openvino PRIVATE ${ORTTRAINING_ROOT})
+  endif()
+
+  if(APPLE)
+    set_property(TARGET onnxruntime_providers_openvino APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker -exported_symbols_list ${ONNXRUNTIME_ROOT}/core/providers/openvino/exported_symbols.lst")
+  elseif(UNIX)
+    set_property(TARGET onnxruntime_providers_openvino APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/openvino/version_script.lds -Xlinker --gc-sections")
+  elseif(WIN32)
+    set_property(TARGET onnxruntime_providers_openvino APPEND_STRING PROPERTY LINK_FLAGS "-DEF:${ONNXRUNTIME_ROOT}/core/providers/openvino/symbols.def")
+  else()
+    message(FATAL_ERROR "onnxruntime_providers_openvino unknown platform, need to specify shared library exports for it")
+  endif()
+
+  install(TARGETS onnxruntime_providers_openvino
+          ARCHIVE  DESTINATION ${CMAKE_INSTALL_LIBDIR}
+          LIBRARY  DESTINATION ${CMAKE_INSTALL_LIBDIR}
+          RUNTIME  DESTINATION ${CMAKE_INSTALL_BINDIR})
\ No newline at end of file
diff --git a/cmake/onnxruntime_providers_qnn.cmake b/cmake/onnxruntime_providers_qnn.cmake
new file mode 100644
index 0000000000000..a93a06e960c81
--- /dev/null
+++ b/cmake/onnxruntime_providers_qnn.cmake
@@ -0,0 +1,45 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+  add_compile_definitions(USE_QNN=1)
+
+  # These are shared utils,
+  # TODO, move this to a separated lib when used by EPs other than QNN, NNAPI and CoreML
+  file(GLOB_RECURSE onnxruntime_providers_shared_utils_cc_srcs CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.cc"
+    "${ONNXRUNTIME_ROOT}/core/providers/shared/node_unit/node_unit.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/shared/node_unit/node_unit.cc"
+  )
+
+  file(GLOB_RECURSE
+    onnxruntime_providers_qnn_ep_cc_srcs CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/core/providers/qnn/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/qnn/*.cc"
+  )
+
+  file(GLOB_RECURSE
+    onnxruntime_providers_qnn_builder_cc_srcs CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/core/providers/qnn/builder/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/qnn/builder/*.cc"
+  )
+
+  set(onnxruntime_providers_qnn_cc_srcs
+    ${onnxruntime_providers_shared_utils_cc_srcs}
+    ${onnxruntime_providers_qnn_ep_cc_srcs}
+    ${onnxruntime_providers_qnn_builder_cc_srcs}
+  )
+
+  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_qnn_cc_srcs})
+  onnxruntime_add_static_library(onnxruntime_providers_qnn ${onnxruntime_providers_qnn_cc_srcs})
+  onnxruntime_add_include_to_target(onnxruntime_providers_qnn onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf-lite flatbuffers::flatbuffers Boost::mp11)
+  target_link_libraries(onnxruntime_providers_qnn)
+  add_dependencies(onnxruntime_providers_qnn onnx ${onnxruntime_EXTERNAL_DEPENDENCIES})
+  set_target_properties(onnxruntime_providers_qnn PROPERTIES CXX_STANDARD_REQUIRED ON)
+  set_target_properties(onnxruntime_providers_qnn PROPERTIES FOLDER "ONNXRuntime")
+  target_include_directories(onnxruntime_providers_qnn PRIVATE ${ONNXRUNTIME_ROOT} ${onnxruntime_QNN_HOME}/include/QNN ${onnxruntime_QNN_HOME}/include)
+  set_target_properties(onnxruntime_providers_qnn PROPERTIES LINKER_LANGUAGE CXX)
+  # ignore the warning unknown-pragmas on "pragma region"
+  if(NOT MSVC)
+    target_compile_options(onnxruntime_providers_qnn PRIVATE "-Wno-unknown-pragmas")
+  endif()
\ No newline at end of file
diff --git a/cmake/onnxruntime_providers_rknpu.cmake b/cmake/onnxruntime_providers_rknpu.cmake
new file mode 100644
index 0000000000000..408bcfde06c36
--- /dev/null
+++ b/cmake/onnxruntime_providers_rknpu.cmake
@@ -0,0 +1,44 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-variable -Wno-unused-parameter")
+  add_definitions(-DUSE_RKNPU=1)
+  option(DNN_READ_ONNX "" ON)
+  set(DNN_CUSTOM_PROTOC_EXECUTABLE ${ONNX_CUSTOM_PROTOC_EXECUTABLE})
+  option(DNN_CMAKE_INSTALL "" OFF)
+  option(DNN_BUILD_BIN "" OFF)
+  if (NOT RKNPU_DDK_PATH)
+    message(FATAL_ERROR "RKNPU_DDK_PATH required for onnxruntime_USE_RKNPU")
+  endif()
+  set(RKNPU_DDK_INCLUDE_DIR ${RKNPU_DDK_PATH}/include)
+  if (CMAKE_SIZEOF_VOID_P EQUAL 8)
+    set(RKNPU_DDK_LIB_DIR ${RKNPU_DDK_PATH}/lib64)
+  else()
+    set(RKNPU_DDK_LIB_DIR ${RKNPU_DDK_PATH}/lib)
+  endif()
+  file(GLOB_RECURSE
+    onnxruntime_providers_rknpu_cc_srcs CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/core/providers/rknpu/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/rknpu/*.cc"
+  )
+  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_rknpu_cc_srcs})
+  onnxruntime_add_static_library(onnxruntime_providers_rknpu ${onnxruntime_providers_rknpu_cc_srcs})
+  onnxruntime_add_include_to_target(onnxruntime_providers_rknpu
+    onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface
+  )
+  target_link_libraries(onnxruntime_providers_rknpu PRIVATE -lrknpu_ddk)
+  add_dependencies(onnxruntime_providers_rknpu onnx ${onnxruntime_EXTERNAL_DEPENDENCIES})
+  set_target_properties(onnxruntime_providers_rknpu PROPERTIES FOLDER "ONNXRuntime")
+  target_include_directories(onnxruntime_providers_rknpu PRIVATE
+    ${ONNXRUNTIME_ROOT} ${rknpu_INCLUDE_DIRS} ${RKNPU_DDK_INCLUDE_DIR}
+  )
+  link_directories(onnxruntime_providers_rknpu ${RKNPU_DDK_LIB_DIR})
+  set_target_properties(onnxruntime_providers_rknpu PROPERTIES LINKER_LANGUAGE CXX)
+
+  if (NOT onnxruntime_BUILD_SHARED_LIB)
+    install(TARGETS onnxruntime_providers_rknpu
+            ARCHIVE   DESTINATION ${CMAKE_INSTALL_LIBDIR}
+            LIBRARY   DESTINATION ${CMAKE_INSTALL_LIBDIR}
+            RUNTIME   DESTINATION ${CMAKE_INSTALL_BINDIR}
+            FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
+  endif()
\ No newline at end of file
diff --git a/cmake/onnxruntime_providers_rocm.cmake b/cmake/onnxruntime_providers_rocm.cmake
new file mode 100644
index 0000000000000..b66268291579c
--- /dev/null
+++ b/cmake/onnxruntime_providers_rocm.cmake
@@ -0,0 +1,223 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+  add_definitions(-DUSE_ROCM=1)
+  include(onnxruntime_rocm_hipify.cmake)
+
+  list(APPEND CMAKE_PREFIX_PATH ${onnxruntime_ROCM_HOME})
+
+  find_package(HIP)
+  find_package(hiprand REQUIRED)
+  find_package(rocblas REQUIRED)
+  find_package(MIOpen REQUIRED)
+  find_package(hipfft REQUIRED)
+
+  # MIOpen version
+  if(NOT DEFINED ENV{MIOPEN_PATH})
+    set(MIOPEN_PATH ${onnxruntime_ROCM_HOME})
+  else()
+    set(MIOPEN_PATH $ENV{MIOPEN_PATH})
+  endif()
+  find_path(MIOPEN_VERSION_H_PATH
+    NAMES version.h
+    HINTS
+    ${MIOPEN_PATH}/include/miopen
+    ${MIOPEN_PATH}/miopen/include)
+  if (MIOPEN_VERSION_H_PATH-NOTFOUND)
+    MESSAGE(FATAL_ERROR "miopen version.h not found")
+  endif()
+  MESSAGE(STATUS "Found miopen version.h at ${MIOPEN_VERSION_H_PATH}")
+
+  file(READ ${MIOPEN_VERSION_H_PATH}/version.h MIOPEN_HEADER_CONTENTS)
+        string(REGEX MATCH "define MIOPEN_VERSION_MAJOR * +([0-9]+)"
+                                 MIOPEN_VERSION_MAJOR "${MIOPEN_HEADER_CONTENTS}")
+        string(REGEX REPLACE "define MIOPEN_VERSION_MAJOR * +([0-9]+)" "\\1"
+                                 MIOPEN_VERSION_MAJOR "${MIOPEN_VERSION_MAJOR}")
+        string(REGEX MATCH "define MIOPEN_VERSION_MINOR * +([0-9]+)"
+                                 MIOPEN_VERSION_MINOR "${MIOPEN_HEADER_CONTENTS}")
+        string(REGEX REPLACE "define MIOPEN_VERSION_MINOR * +([0-9]+)" "\\1"
+                                 MIOPEN_VERSION_MINOR "${MIOPEN_VERSION_MINOR}")
+        string(REGEX MATCH "define MIOPEN_VERSION_PATCH * +([0-9]+)"
+                                 MIOPEN_VERSION_PATCH "${MIOPEN_HEADER_CONTENTS}")
+        string(REGEX REPLACE "define MIOPEN_VERSION_PATCH * +([0-9]+)" "\\1"
+                                 MIOPEN_VERSION_PATCH "${MIOPEN_VERSION_PATCH}")
+  set(MIOPEN_VERSION_DEV "${MIOPEN_VERSION_MAJOR}.${MIOPEN_VERSION_MINOR}.${MIOPEN_VERSION_PATCH}")
+  math(EXPR MIOPEN_VERSION_DEV_INT "(${MIOPEN_VERSION_MAJOR}*10000) + (${MIOPEN_VERSION_MINOR}*100) + ${MIOPEN_VERSION_PATCH}")
+  message("MIOPEN_VERSION_DEV: ${MIOPEN_VERSION_DEV}")
+  message("MIOPEN_VERSION_DEV_INT:   ${MIOPEN_VERSION_DEV_INT}")
+  add_definitions(-DMIOPEN_VERSION=${MIOPEN_VERSION_DEV_INT})
+
+  find_library(RCCL_LIB rccl REQUIRED)
+  find_library(ROCTRACER_LIB roctracer64 REQUIRED)
+  set(ONNXRUNTIME_ROCM_LIBS roc::rocblas MIOpen hip::hipfft ${RCCL_LIB} ${ROCTRACER_LIB})
+
+  file(GLOB_RECURSE onnxruntime_providers_rocm_cc_srcs CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/core/providers/rocm/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/rocm/*.cc"
+  )
+
+  # The shared_library files are in a separate list since they use precompiled headers, and the above files have them disabled.
+  file(GLOB_RECURSE onnxruntime_providers_rocm_shared_srcs CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.cc"
+  )
+
+  file(GLOB_RECURSE onnxruntime_providers_rocm_cu_srcs CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/core/providers/rocm/*.cu"
+    "${ONNXRUNTIME_ROOT}/core/providers/rocm/*.cuh"
+  )
+
+  hipify("onnxruntime/core/providers" provider_excluded_files onnxruntime_providers_rocm_generated_cc_srcs onnxruntime_providers_rocm_generated_cu_srcs)
+
+  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_rocm_cc_srcs} ${onnxruntime_providers_rocm_shared_srcs} ${onnxruntime_providers_rocm_cu_srcs})
+  set(onnxruntime_providers_rocm_src ${onnxruntime_providers_rocm_cc_srcs} ${onnxruntime_providers_rocm_shared_srcs} ${onnxruntime_providers_rocm_cu_srcs})
+  list(APPEND onnxruntime_providers_rocm_src ${onnxruntime_providers_rocm_generated_cc_srcs} ${onnxruntime_providers_rocm_generated_cu_srcs})
+
+  # disable contrib ops conditionally
+  if(NOT onnxruntime_DISABLE_CONTRIB_OPS)
+    hipify("onnxruntime/contrib_ops" contrib_ops_excluded_files onnxruntime_rocm_generated_contrib_ops_cc_srcs onnxruntime_rocm_generated_contrib_ops_cu_srcs)
+
+    # add using ONNXRUNTIME_ROOT so they show up under the 'contrib_ops' folder in Visual Studio
+    source_group(TREE ${ONNXRUNTIME_ROOT} FILES ${onnxruntime_rocm_contrib_ops_cc_srcs} ${onnxruntime_rocm_contrib_ops_cu_srcs})
+    list(APPEND onnxruntime_providers_rocm_src ${onnxruntime_rocm_contrib_ops_cc_srcs} ${onnxruntime_rocm_contrib_ops_cu_srcs})
+    list(APPEND onnxruntime_providers_rocm_src ${onnxruntime_rocm_generated_contrib_ops_cc_srcs} ${onnxruntime_rocm_generated_contrib_ops_cu_srcs})
+  endif()
+
+  if (onnxruntime_ENABLE_TRAINING_OPS)
+    file(GLOB_RECURSE onnxruntime_rocm_training_ops_cc_srcs CONFIGURE_DEPENDS
+      "${ORTTRAINING_SOURCE_DIR}/training_ops/rocm/*.h"
+      "${ORTTRAINING_SOURCE_DIR}/training_ops/rocm/*.cc"
+    )
+
+    file(GLOB_RECURSE onnxruntime_rocm_training_ops_cu_srcs CONFIGURE_DEPENDS
+      "${ORTTRAINING_SOURCE_DIR}/training_ops/rocm/*.cu"
+      "${ORTTRAINING_SOURCE_DIR}/training_ops/rocm/*.cuh"
+    )
+
+    hipify("orttraining/orttraining/training_ops" training_ops_excluded_files onnxruntime_rocm_generated_training_ops_cc_srcs onnxruntime_rocm_generated_training_ops_cu_srcs)
+
+    # NCCL is not support in Windows build
+    if (WIN32 OR NOT onnxruntime_USE_NCCL)
+      list(REMOVE_ITEM onnxruntime_rocm_generated_training_ops_cc_srcs
+      "${CMAKE_CURRENT_BINARY_DIR}/amdgpu/orttraining/orttraining/training_ops/rocm/collective/nccl_common.cc"
+      "${CMAKE_CURRENT_BINARY_DIR}/amdgpu/orttraining/orttraining/training_ops/rocm/collective/nccl_kernels.cc"
+      "${CMAKE_CURRENT_BINARY_DIR}/amdgpu/orttraining/orttraining/training_ops/rocm/collective/megatron.cc"
+      )
+    endif()
+
+    source_group(TREE ${ORTTRAINING_ROOT} FILES ${onnxruntime_rocm_training_ops_cc_srcs} ${onnxruntime_rocm_training_ops_cu_srcs})
+    list(APPEND onnxruntime_providers_rocm_src ${onnxruntime_rocm_training_ops_cc_srcs} ${onnxruntime_rocm_training_ops_cu_srcs})
+    list(APPEND onnxruntime_providers_rocm_src ${onnxruntime_rocm_generated_training_ops_cc_srcs} ${onnxruntime_rocm_generated_training_ops_cu_srcs})
+  endif()
+
+  auto_set_source_files_hip_language(${onnxruntime_providers_rocm_src})
+  onnxruntime_add_shared_library_module(onnxruntime_providers_rocm ${onnxruntime_providers_rocm_src})
+  target_compile_options(onnxruntime_providers_rocm PRIVATE -D__HIP_PLATFORM_AMD__=1 -D__HIP_PLATFORM_HCC__=1)
+
+  if(NOT MSVC)
+    target_compile_options(onnxruntime_providers_rocm PRIVATE -Wno-sign-compare)
+    target_compile_options(onnxruntime_providers_rocm PRIVATE -Wno-unused-parameter)
+    target_compile_options(onnxruntime_providers_rocm PRIVATE -Wno-undefined-var-template)
+  endif()
+
+  onnxruntime_add_include_to_target(onnxruntime_providers_rocm onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface)
+  if (onnxruntime_ENABLE_TRAINING_OPS)
+    onnxruntime_add_include_to_target(onnxruntime_providers_rocm onnxruntime_training)
+    target_link_libraries(onnxruntime_providers_rocm PRIVATE onnxruntime_training)
+    if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
+      onnxruntime_add_include_to_target(onnxruntime_providers_rocm Python::Module)
+    endif()
+  endif()
+
+  add_custom_target(generate_hipified_files DEPENDS
+    ${onnxruntime_providers_rocm_generated_cc_srcs}
+    ${onnxruntime_providers_rocm_generated_cu_srcs}
+    ${onnxruntime_rocm_generated_contrib_ops_cc_srcs}
+    ${onnxruntime_rocm_generated_contrib_ops_cu_srcs}
+    ${onnxruntime_rocm_generated_training_ops_cc_srcs}
+    ${onnxruntime_rocm_generated_training_ops_cu_srcs})
+
+  add_dependencies(onnxruntime_providers_rocm generate_hipified_files onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES})
+  target_link_libraries(onnxruntime_providers_rocm PRIVATE ${ONNXRUNTIME_ROCM_LIBS} ${ONNXRUNTIME_PROVIDERS_SHARED} ${ABSEIL_LIBS})
+  target_include_directories(onnxruntime_providers_rocm SYSTEM
+    PRIVATE
+      ${ONNXRUNTIME_ROOT}
+      ${CMAKE_CURRENT_BINARY_DIR}
+      ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/onnxruntime
+      ${eigen_INCLUDE_DIRS}
+    PUBLIC
+      ${onnxruntime_ROCM_HOME}/include
+      ${onnxruntime_ROCM_HOME}/include/roctracer)
+
+  set_target_properties(onnxruntime_providers_rocm PROPERTIES LINKER_LANGUAGE CXX)
+  set_target_properties(onnxruntime_providers_rocm PROPERTIES FOLDER "ONNXRuntime")
+
+  if (onnxruntime_ENABLE_TRAINING)
+    target_include_directories(onnxruntime_providers_rocm PRIVATE ${ORTTRAINING_ROOT} ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/orttraining ${MPI_CXX_INCLUDE_DIRS})
+    if(onnxruntime_USE_MPI)
+      target_link_libraries(onnxruntime_providers_rocm PRIVATE ${MPI_LIBRARIES} ${MPI_CXX_LINK_FLAGS})
+    endif()
+
+    # RCCL is enabled by default for ROCM builds
+    #if (onnxruntime_USE_NCCL)
+    #  target_include_directories(onnxruntime_providers_rocm PRIVATE ${NCCL_INCLUDE_DIRS})
+    #  target_link_libraries(onnxruntime_providers_rocm PRIVATE ${NCCL_LIBRARIES})
+    #endif()
+  endif()
+
+  if (onnxruntime_USE_ROCBLAS_EXTENSION_API)
+    target_compile_definitions(onnxruntime_providers_rocm PRIVATE USE_ROCBLAS_EXTENSION_API)
+    target_compile_definitions(onnxruntime_providers_rocm PRIVATE ROCBLAS_NO_DEPRECATED_WARNINGS)
+    target_compile_definitions(onnxruntime_providers_rocm PRIVATE ROCBLAS_BETA_FEATURES_API)
+  endif()
+
+  if (onnxruntime_USE_HIPBLASLT)
+    find_package(hipblaslt REQUIRED)
+    target_link_libraries(onnxruntime_providers_rocm PRIVATE roc::hipblaslt)
+    target_compile_definitions(onnxruntime_providers_rocm PRIVATE USE_HIPBLASLT)
+  endif()
+
+  if (onnxruntime_USE_TRITON_KERNEL)
+    # compile triton kernel, generate .a and .h files
+    include(onnxruntime_compile_triton_kernel.cmake)
+    compile_triton_kernel(triton_kernel_obj_file triton_kernel_header_dir)
+    add_dependencies(onnxruntime_providers_rocm onnxruntime_triton_kernel)
+    target_compile_definitions(onnxruntime_providers_rocm PRIVATE USE_TRITON_KERNEL)
+    target_include_directories(onnxruntime_providers_rocm PRIVATE ${triton_kernel_header_dir})
+    target_link_libraries(onnxruntime_providers_rocm PUBLIC -Wl,--whole-archive ${triton_kernel_obj_file} -Wl,--no-whole-archive)
+  endif()
+
+  if (onnxruntime_USE_COMPOSABLE_KERNEL)
+    include(composable_kernel)
+    target_link_libraries(onnxruntime_providers_rocm PRIVATE
+      onnxruntime_composable_kernel_includes
+      # Currently we shall not use composablekernels::device_operations, the target includes all conv dependencies, which
+      # are extremely slow to compile. Instead, we only link all gemm related objects. See the following directory on
+      # updating.
+      # https://github.com/ROCmSoftwarePlatform/composable_kernel/tree/develop/library/src/tensor_operation_instance/gpu
+      device_gemm_instance
+      device_gemm_add_fastgelu_instance
+      device_gemm_fastgelu_instance
+      device_gemm_splitk_instance
+      device_gemm_streamk_instance
+      device_batched_gemm_instance
+      device_softmax_instance
+    )
+    target_compile_definitions(onnxruntime_providers_rocm PRIVATE USE_COMPOSABLE_KERNEL)
+  endif()
+
+  if(UNIX)
+    set_property(TARGET onnxruntime_providers_rocm APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/rocm/version_script.lds -Xlinker --gc-sections")
+    target_link_libraries(onnxruntime_providers_rocm PRIVATE nsync::nsync_cpp)
+  else()
+    message(FATAL_ERROR "onnxruntime_providers_rocm unknown platform, need to specify shared library exports for it")
+  endif()
+
+  if (onnxruntime_ENABLE_ATEN)
+    target_compile_definitions(onnxruntime_providers_rocm PRIVATE ENABLE_ATEN)
+  endif()
+
+  install(TARGETS onnxruntime_providers_rocm
+          ARCHIVE  DESTINATION ${CMAKE_INSTALL_LIBDIR}
+          LIBRARY  DESTINATION ${CMAKE_INSTALL_LIBDIR}
+          RUNTIME  DESTINATION ${CMAKE_INSTALL_BINDIR})
diff --git a/cmake/onnxruntime_providers_tensorrt.cmake b/cmake/onnxruntime_providers_tensorrt.cmake
new file mode 100644
index 0000000000000..686a993de3a4a
--- /dev/null
+++ b/cmake/onnxruntime_providers_tensorrt.cmake
@@ -0,0 +1,147 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+  add_definitions(-DUSE_TENSORRT=1)
+  if (onnxruntime_TENSORRT_PLACEHOLDER_BUILDER)
+    add_definitions(-DORT_TENSORRT_PLACEHOLDER_BUILDER)
+  endif()
+  set(BUILD_LIBRARY_ONLY 1)
+  add_definitions("-DONNX_ML=1")
+  add_definitions("-DONNX_NAMESPACE=onnx")
+  set(CUDA_INCLUDE_DIRS ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
+  set(TENSORRT_ROOT ${onnxruntime_TENSORRT_HOME})
+  set(OLD_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
+  set(PROTOBUF_LIBRARY ${PROTOBUF_LIB})
+  if (WIN32)
+    add_definitions(-D_SILENCE_EXPERIMENTAL_FILESYSTEM_DEPRECATION_WARNING=1)
+    set(OLD_CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS})
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4099 /wd4551 /wd4505 /wd4515 /wd4706 /wd4456 /wd4324 /wd4701 /wd4804 /wd4702 /wd4458 /wd4703")
+    if (CMAKE_BUILD_TYPE STREQUAL "Debug")
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4805")
+    endif()
+    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -include algorithm")
+    set(DISABLED_WARNINGS_FOR_TRT /wd4456)
+  endif()
+  if ( CMAKE_COMPILER_IS_GNUCC )
+    set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS} -Wno-unused-parameter -Wno-missing-field-initializers")
+  endif()
+  set(CXX_VERSION_DEFINED TRUE)
+
+  # There is an issue when running "Debug build" TRT EP with "Release build" TRT builtin parser on Windows.
+  # We enforce following workaround for now until the real fix.
+  if (WIN32 AND CMAKE_BUILD_TYPE STREQUAL "Debug")
+    set(onnxruntime_USE_TENSORRT_BUILTIN_PARSER OFF)
+    MESSAGE(STATUS "[Note] There is an issue when running \"Debug build\" TRT EP with \"Release build\" TRT built-in parser on Windows. This build will use tensorrt oss parser instead.")
+  endif()
+
+  if (onnxruntime_USE_TENSORRT_BUILTIN_PARSER)
+    # Add TensorRT library
+    find_path(TENSORRT_INCLUDE_DIR NvInfer.h
+      HINTS ${TENSORRT_ROOT}
+      PATH_SUFFIXES include)
+    MESSAGE(STATUS "Found TensorRT headers at ${TENSORRT_INCLUDE_DIR}")
+    find_library(TENSORRT_LIBRARY_INFER nvinfer
+      HINTS ${TENSORRT_ROOT}
+      PATH_SUFFIXES lib lib64 lib/x64)
+    find_library(TENSORRT_LIBRARY_INFER_PLUGIN nvinfer_plugin
+      HINTS  ${TENSORRT_ROOT}
+      PATH_SUFFIXES lib lib64 lib/x64)
+    find_library(TENSORRT_LIBRARY_NVONNXPARSER nvonnxparser
+      HINTS  ${TENSORRT_ROOT}
+      PATH_SUFFIXES lib lib64 lib/x64)
+    set(TENSORRT_LIBRARY ${TENSORRT_LIBRARY_INFER} ${TENSORRT_LIBRARY_INFER_PLUGIN} ${TENSORRT_LIBRARY_NVONNXPARSER})
+    MESSAGE(STATUS "Find TensorRT libs at ${TENSORRT_LIBRARY}")
+  else()
+    FetchContent_Declare(
+      onnx_tensorrt
+      URL ${DEP_URL_onnx_tensorrt}
+      URL_HASH SHA1=${DEP_SHA1_onnx_tensorrt}
+    )
+    if (NOT CUDA_INCLUDE_DIR)
+      set(CUDA_INCLUDE_DIR ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) # onnx-tensorrt repo needs this variable to build
+    endif()
+    # The onnx_tensorrt repo contains a test program, getSupportedAPITest, which doesn't support Windows. It uses
+    # unistd.h. So we must exclude it from our build. onnxruntime_fetchcontent_makeavailable is for the purpose.
+    onnxruntime_fetchcontent_makeavailable(onnx_tensorrt)
+    include_directories(${onnx_tensorrt_SOURCE_DIR})
+    set(CMAKE_CXX_FLAGS ${OLD_CMAKE_CXX_FLAGS})
+    if ( CMAKE_COMPILER_IS_GNUCC )
+      set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS} -Wno-unused-parameter")
+    endif()
+    if (WIN32)
+      set(CMAKE_CUDA_FLAGS ${OLD_CMAKE_CUDA_FLAGS})
+      unset(PROTOBUF_LIBRARY)
+      unset(OLD_CMAKE_CXX_FLAGS)
+      unset(OLD_CMAKE_CUDA_FLAGS)
+      set_target_properties(nvonnxparser PROPERTIES LINK_FLAGS "/ignore:4199")
+      target_compile_options(nvonnxparser_static PRIVATE /FIio.h /wd4100)
+      target_compile_options(nvonnxparser PRIVATE /FIio.h /wd4100)
+    endif()
+    set(onnxparser_link_libs nvonnxparser_static)
+  endif()
+
+  include_directories(${TENSORRT_INCLUDE_DIR})
+  # ${TENSORRT_LIBRARY} is empty if we link nvonnxparser_static.
+  # nvonnxparser_static is linked against tensorrt libraries in onnx-tensorrt
+  # See https://github.com/onnx/onnx-tensorrt/blob/8af13d1b106f58df1e98945a5e7c851ddb5f0791/CMakeLists.txt#L121
+  set(trt_link_libs cudnn cublas ${CMAKE_DL_LIBS} ${TENSORRT_LIBRARY})
+
+  file(GLOB_RECURSE onnxruntime_providers_tensorrt_cc_srcs CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/core/providers/tensorrt/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/tensorrt/*.cc"
+    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.cc"
+    "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_stream_handle.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_stream_handle.cc"
+    "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_graph.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_graph.cc"
+  )
+
+  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_tensorrt_cc_srcs})
+  onnxruntime_add_shared_library_module(onnxruntime_providers_tensorrt ${onnxruntime_providers_tensorrt_cc_srcs})
+  onnxruntime_add_include_to_target(onnxruntime_providers_tensorrt onnxruntime_common onnx flatbuffers::flatbuffers Boost::mp11 safeint_interface)
+  add_dependencies(onnxruntime_providers_tensorrt onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES})
+  if (onnxruntime_USE_TENSORRT_BUILTIN_PARSER)
+    target_link_libraries(onnxruntime_providers_tensorrt PRIVATE ${trt_link_libs} cudart ${ONNXRUNTIME_PROVIDERS_SHARED} ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface ${ABSEIL_LIBS})
+  else()
+    target_link_libraries(onnxruntime_providers_tensorrt PRIVATE ${onnxparser_link_libs} ${trt_link_libs} cudart ${ONNXRUNTIME_PROVIDERS_SHARED} ${PROTOBUF_LIB} flatbuffers::flatbuffers ${ABSEIL_LIBS})
+  endif()
+  target_include_directories(onnxruntime_providers_tensorrt PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} ${eigen_INCLUDE_DIRS} PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
+  if(onnxruntime_CUDNN_HOME)
+    target_include_directories(onnxruntime_providers_tensorrt PRIVATE ${onnxruntime_CUDNN_HOME}/include)
+  endif()
+
+  # ${CMAKE_CURRENT_BINARY_DIR} is so that #include "onnxruntime_config.h" inside tensor_shape.h is found
+  set_target_properties(onnxruntime_providers_tensorrt PROPERTIES LINKER_LANGUAGE CUDA)
+  set_target_properties(onnxruntime_providers_tensorrt PROPERTIES FOLDER "ONNXRuntime")
+  target_compile_definitions(onnxruntime_providers_tensorrt PRIVATE ONNXIFI_BUILD_LIBRARY=1)
+  target_compile_options(onnxruntime_providers_tensorrt PRIVATE ${DISABLED_WARNINGS_FOR_TRT})
+  if (WIN32)
+    target_compile_options(onnxruntime_providers_tensorrt INTERFACE /wd4456)
+  endif()
+
+  # Needed for the provider interface, as it includes training headers when training is enabled
+  if (onnxruntime_ENABLE_TRAINING_OPS)
+    target_include_directories(onnxruntime_providers_tensorrt PRIVATE ${ORTTRAINING_ROOT})
+    if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
+      onnxruntime_add_include_to_target(onnxruntime_providers_tensorrt Python::Module)
+    endif()
+  endif()
+
+  if(APPLE)
+    set_property(TARGET onnxruntime_providers_tensorrt APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker -exported_symbols_list ${ONNXRUNTIME_ROOT}/core/providers/tensorrt/exported_symbols.lst")
+    target_link_libraries(onnxruntime_providers_tensorrt PRIVATE nsync::nsync_cpp)
+  elseif(UNIX)
+    set_property(TARGET onnxruntime_providers_tensorrt APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations")
+    set_property(TARGET onnxruntime_providers_tensorrt APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/tensorrt/version_script.lds -Xlinker --gc-sections")
+    target_link_libraries(onnxruntime_providers_tensorrt PRIVATE nsync::nsync_cpp stdc++fs)
+  elseif(WIN32)
+    set_property(TARGET onnxruntime_providers_tensorrt APPEND_STRING PROPERTY LINK_FLAGS "-DEF:${ONNXRUNTIME_ROOT}/core/providers/tensorrt/symbols.def")
+  else()
+    message(FATAL_ERROR "onnxruntime_providers_tensorrt unknown platform, need to specify shared library exports for it")
+  endif()
+
+  install(TARGETS onnxruntime_providers_tensorrt
+          ARCHIVE  DESTINATION ${CMAKE_INSTALL_LIBDIR}
+          LIBRARY  DESTINATION ${CMAKE_INSTALL_LIBDIR}
+          RUNTIME  DESTINATION ${CMAKE_INSTALL_BINDIR})
diff --git a/cmake/onnxruntime_providers_tvm.cmake b/cmake/onnxruntime_providers_tvm.cmake
new file mode 100644
index 0000000000000..8fd50c70dd5d7
--- /dev/null
+++ b/cmake/onnxruntime_providers_tvm.cmake
@@ -0,0 +1,64 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+  add_definitions(-DUSE_TVM=1)
+  if (onnxruntime_TVM_USE_HASH)
+    add_definitions(-DUSE_TVM_HASH=1)
+  endif()
+
+  if (onnxruntime_TVM_USE_HASH)
+    file (GLOB_RECURSE onnxruntime_providers_tvm_cc_srcs CONFIGURE_DEPENDS
+      "${ONNXRUNTIME_ROOT}/core/providers/tvm/*.h"
+      "${ONNXRUNTIME_ROOT}/core/providers/tvm/*.cc"
+    )
+  else()
+    file (GLOB onnxruntime_providers_tvm_cc_srcs CONFIGURE_DEPENDS
+      "${ONNXRUNTIME_ROOT}/core/providers/tvm/*.h"
+      "${ONNXRUNTIME_ROOT}/core/providers/tvm/*.cc"
+    )
+  endif()
+
+  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_tvm_cc_srcs})
+  onnxruntime_add_static_library(onnxruntime_providers_tvm ${onnxruntime_providers_tvm_cc_srcs})
+
+  if ( CMAKE_COMPILER_IS_GNUCC )
+    target_compile_options(onnxruntime_providers_tvm PRIVATE -Wno-unused-parameter -Wno-missing-field-initializers)
+  endif()
+
+  target_include_directories(onnxruntime_providers_tvm PRIVATE
+          ${TVM_INCLUDES}
+          ${PYTHON_INCLUDE_DIRS})
+  onnxruntime_add_include_to_target(onnxruntime_providers_tvm onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface)
+
+  add_dependencies(onnxruntime_providers_tvm ${onnxruntime_EXTERNAL_DEPENDENCIES})
+
+  if (onnxruntime_TVM_USE_HASH)
+    add_dependencies(onnxruntime_providers_tvm ippcp_s)
+    target_include_directories(onnxruntime_providers_tvm PRIVATE ${IPP_CRYPTO_INCLUDE_DIR})
+    target_link_libraries(onnxruntime_providers_tvm PRIVATE ippcp_s)
+  endif()
+
+  set_target_properties(onnxruntime_providers_tvm PROPERTIES FOLDER "ONNXRuntime")
+  set_target_properties(onnxruntime_providers_tvm PROPERTIES LINKER_LANGUAGE CXX)
+
+  if (WIN32 AND MSVC)
+    # wd4100: identifier' : unreferenced formal parameter
+    # wd4127: conditional expression is constant
+    # wd4244: conversion from 'int' to 'char', possible loss of data
+    # TODO: 4244 should not be disabled
+    target_compile_options(onnxruntime_providers_tvm PRIVATE "/wd4100" "/wd4127" "/wd4244")
+  else()
+    target_compile_options(onnxruntime_providers_tvm PRIVATE "-Wno-error=type-limits")
+  endif()
+  target_compile_definitions(onnxruntime_providers_tvm PUBLIC DMLC_USE_LOGGING_LIBRARY=<tvm/runtime/logging.h>)
+
+  install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/tvm/tvm_provider_factory.h
+    DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
+
+  if (NOT onnxruntime_BUILD_SHARED_LIB)
+    install(TARGETS onnxruntime_providers_tvm
+            ARCHIVE   DESTINATION ${CMAKE_INSTALL_LIBDIR}
+            LIBRARY   DESTINATION ${CMAKE_INSTALL_LIBDIR}
+            RUNTIME   DESTINATION ${CMAKE_INSTALL_BINDIR}
+            FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
+  endif()
\ No newline at end of file
diff --git a/cmake/onnxruntime_providers_vitisai.cmake b/cmake/onnxruntime_providers_vitisai.cmake
new file mode 100644
index 0000000000000..7ac4a82c89a76
--- /dev/null
+++ b/cmake/onnxruntime_providers_vitisai.cmake
@@ -0,0 +1,52 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+  if ("${GIT_COMMIT_ID}" STREQUAL "")
+  execute_process(
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+    COMMAND git rev-parse HEAD
+    OUTPUT_VARIABLE GIT_COMMIT_ID
+    OUTPUT_STRIP_TRAILING_WHITESPACE)
+  endif()
+  configure_file(${ONNXRUNTIME_ROOT}/core/providers/vitisai/imp/version_info.hpp.in ${CMAKE_CURRENT_BINARY_DIR}/VitisAI/version_info.h)
+  file(GLOB onnxruntime_providers_vitisai_cc_srcs CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/core/providers/vitisai/*.cc"
+    "${ONNXRUNTIME_ROOT}/core/providers/vitisai/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/vitisai/imp/*.cc"
+    "${ONNXRUNTIME_ROOT}/core/providers/vitisai/imp/*.h"
+  )
+  list(REMOVE_ITEM onnxruntime_providers_vitisai_cc_srcs "${ONNXRUNTIME_ROOT}/core/providers/vitisai/onnxruntime_vitisai_ep_stub.cc")
+  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_vitisai_cc_srcs})
+  onnxruntime_add_static_library(onnxruntime_providers_vitisai ${onnxruntime_providers_vitisai_cc_srcs})
+  onnxruntime_add_include_to_target(onnxruntime_providers_vitisai onnxruntime_common onnxruntime_framework onnx onnx_proto)
+  onnxruntime_add_shared_library(onnxruntime_vitisai_ep ${ONNXRUNTIME_ROOT}/core/providers/vitisai/onnxruntime_vitisai_ep_stub.cc)
+  onnxruntime_add_include_to_target(onnxruntime_vitisai_ep onnxruntime_common)
+  target_include_directories(onnxruntime_vitisai_ep PRIVATE "${ONNXRUNTIME_ROOT}" "${ONNXRUNTIME_ROOT}/core/providers/vitisai/include")
+  target_link_libraries(onnxruntime_providers_vitisai PUBLIC onnxruntime_vitisai_ep PRIVATE onnx protobuf::libprotobuf nlohmann_json::nlohmann_json )
+  target_compile_definitions(onnxruntime_vitisai_ep
+                           PRIVATE "-DONNXRUNTIME_VITISAI_EP_STUB=1" "-DONNXRUNTIME_VITISAI_EP_EXPORT_DLL=1")
+  if(NOT MSVC)
+    target_compile_options(onnxruntime_providers_vitisai PUBLIC $<$<CONFIG:DEBUG>:-U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0>)
+  endif(NOT MSVC)
+
+  target_include_directories(onnxruntime_providers_vitisai PRIVATE "${ONNXRUNTIME_ROOT}/core/providers/vitisai/include" ${XRT_INCLUDE_DIRS} ${CMAKE_CURRENT_BINARY_DIR}/VitisAI)
+  if(MSVC)
+    target_compile_options(onnxruntime_providers_vitisai PRIVATE "/Zc:__cplusplus")
+    # for dll interface warning.
+    target_compile_options(onnxruntime_providers_vitisai PRIVATE "/wd4251")
+    # for unused formal parameter
+    target_compile_options(onnxruntime_providers_vitisai PRIVATE "/wd4100")
+  else(MSVC)
+    target_compile_options(onnxruntime_providers_vitisai PRIVATE -Wno-unused-parameter)
+  endif(MSVC)
+
+  set_target_properties(onnxruntime_providers_vitisai PROPERTIES FOLDER "ONNXRuntime")
+  set_target_properties(onnxruntime_providers_vitisai PROPERTIES LINKER_LANGUAGE CXX)
+
+  if (NOT onnxruntime_BUILD_SHARED_LIB)
+    install(TARGETS onnxruntime_providers_vitisai
+            ARCHIVE   DESTINATION ${CMAKE_INSTALL_LIBDIR}
+            LIBRARY   DESTINATION ${CMAKE_INSTALL_LIBDIR}
+            RUNTIME   DESTINATION ${CMAKE_INSTALL_BINDIR}
+            FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
+  endif()
\ No newline at end of file
diff --git a/cmake/onnxruntime_providers_webnn.cmake b/cmake/onnxruntime_providers_webnn.cmake
new file mode 100644
index 0000000000000..05c63c22244db
--- /dev/null
+++ b/cmake/onnxruntime_providers_webnn.cmake
@@ -0,0 +1,25 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+  if (onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD)
+    message(FATAL_ERROR "WebNN EP can not be used in a basic minimal build. Please build with '--minimal_build extended'")
+  endif()
+
+  add_compile_definitions(USE_WEBNN=1)
+  if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
+    add_definitions(-DENABLE_WEBASSEMBLY_THREADS=1)
+  endif()
+  file(GLOB_RECURSE onnxruntime_providers_webnn_cc_srcs CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/core/providers/webnn/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/webnn/*.cc"
+    "${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.cc"
+  )
+
+  source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_providers_webnn_cc_srcs})
+  onnxruntime_add_static_library(onnxruntime_providers_webnn ${onnxruntime_providers_webnn_cc_srcs})
+  onnxruntime_add_include_to_target(onnxruntime_providers_webnn onnxruntime_common onnx onnx_proto flatbuffers::flatbuffers Boost::mp11 safeint_interface)
+
+  add_dependencies(onnxruntime_providers_webnn onnx ${onnxruntime_EXTERNAL_DEPENDENCIES})
+  set_target_properties(onnxruntime_providers_webnn PROPERTIES FOLDER "ONNXRuntime")
+  set_target_properties(onnxruntime_providers_webnn PROPERTIES LINKER_LANGUAGE CXX)
\ No newline at end of file
diff --git a/cmake/onnxruntime_providers_xnnpack.cmake b/cmake/onnxruntime_providers_xnnpack.cmake
new file mode 100644
index 0000000000000..9c00703ca0846
--- /dev/null
+++ b/cmake/onnxruntime_providers_xnnpack.cmake
@@ -0,0 +1,39 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+  add_compile_definitions(USE_XNNPACK=1)
+
+  file(GLOB_RECURSE onnxruntime_providers_xnnpack_cc_srcs CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_INCLUDE_DIR}/core/providers/xnnpack/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/xnnpack/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/xnnpack/*.cc"
+    # utils for handling QDQ models
+    "${ONNXRUNTIME_ROOT}/core/providers/shared/node_unit/node_unit.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/shared/node_unit/node_unit.cc"
+  )
+
+  source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_providers_xnnpack_cc_srcs})
+  onnxruntime_add_static_library(onnxruntime_providers_xnnpack ${onnxruntime_providers_xnnpack_cc_srcs})
+  onnxruntime_add_include_to_target(onnxruntime_providers_xnnpack
+    onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} XNNPACK pthreadpool
+    flatbuffers::flatbuffers Boost::mp11 safeint_interface
+  )
+
+  add_dependencies(onnxruntime_providers_xnnpack onnx ${onnxruntime_EXTERNAL_DEPENDENCIES})
+  set_target_properties(onnxruntime_providers_xnnpack PROPERTIES FOLDER "ONNXRuntime")
+
+  set_target_properties(onnxruntime_providers_xnnpack PROPERTIES LINKER_LANGUAGE CXX)
+
+  if (NOT onnxruntime_BUILD_SHARED_LIB)
+    install(TARGETS onnxruntime_providers_xnnpack
+            ARCHIVE   DESTINATION ${CMAKE_INSTALL_LIBDIR}
+            LIBRARY   DESTINATION ${CMAKE_INSTALL_LIBDIR}
+            RUNTIME   DESTINATION ${CMAKE_INSTALL_BINDIR}
+            FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
+  endif()
+
+  # TODO fix shorten-64-to-32 warnings
+  # there are some in builds where sizeof(size_t) != sizeof(int64_t), e.g., in 'ONNX Runtime Web CI Pipeline'
+  if (HAS_SHORTEN_64_TO_32 AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
+    target_compile_options(onnxruntime_providers_xnnpack PRIVATE -Wno-error=shorten-64-to-32)
+  endif()
diff --git a/cmake/onnxruntime_python.cmake b/cmake/onnxruntime_python.cmake
index bf9adbaefabcc..345ef2b504aa4 100644
--- a/cmake/onnxruntime_python.cmake
+++ b/cmake/onnxruntime_python.cmake
@@ -339,9 +339,6 @@ configure_file(${ONNXRUNTIME_ROOT}/python/_pybind_state.py.in
                ${CMAKE_BINARY_DIR}/onnxruntime/capi/_pybind_state.py)
 
 if (onnxruntime_ENABLE_TRAINING)
-  file(GLOB onnxruntime_python_capi_training_srcs CONFIGURE_DEPENDS
-    "${ORTTRAINING_SOURCE_DIR}/python/deprecated/*.py"
-  )
   file(GLOB onnxruntime_python_root_srcs CONFIGURE_DEPENDS
     "${ORTTRAINING_SOURCE_DIR}/python/training/*.py"
   )
@@ -387,6 +384,9 @@ if (onnxruntime_ENABLE_TRAINING)
   file(GLOB onnxruntime_python_ortmodule_torch_cpp_ext_fused_ops_srcs CONFIGURE_DEPENDS
     "${ORTTRAINING_SOURCE_DIR}/python/training/ortmodule/torch_cpp_extensions/cuda/fused_ops/*"
   )
+  file(GLOB onnxruntime_python_ortmodule_graph_optimizers_srcs CONFIGURE_DEPENDS
+    "${ORTTRAINING_SOURCE_DIR}/python/training/ortmodule/graph_optimizers/*"
+  )
   file(GLOB onnxruntime_python_ort_triton_srcs CONFIGURE_DEPENDS
     "${ORTTRAINING_SOURCE_DIR}/python/training/ort_triton/*.py"
   )
@@ -416,10 +416,6 @@ if (onnxruntime_ENABLE_TRAINING)
     "${ORTTRAINING_SOURCE_DIR}/python/training/onnxblock/optim/*"
     )
   endif()
-else()
-  file(GLOB onnxruntime_python_capi_training_srcs CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/python/training/*.py"
-  )
 endif()
 
 if (onnxruntime_BUILD_UNIT_TESTS)
@@ -440,6 +436,9 @@ if (onnxruntime_BUILD_UNIT_TESTS)
   file(GLOB onnxruntime_python_transformers_testdata_whisper CONFIGURE_DEPENDS
       "${ONNXRUNTIME_ROOT}/test/python/transformers/test_data/models/whisper/*.onnx"
   )
+  file(GLOB onnxruntime_python_transformers_testdata_conformer CONFIGURE_DEPENDS
+      "${ONNXRUNTIME_ROOT}/test/python/transformers/test_data/models/conformer/*.onnx"
+  )
 endif()
 
 file(GLOB onnxruntime_python_tools_srcs CONFIGURE_DEPENDS
@@ -553,6 +552,7 @@ add_custom_command(
   COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/transformers/test_data/models
   COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/transformers/test_data/models/whisper
   COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/eager_test
+  COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/transformers/test_data/models/conformer
   COMMAND ${CMAKE_COMMAND} -E copy
       ${ONNXRUNTIME_ROOT}/__init__.py
       $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/
@@ -574,9 +574,6 @@ add_custom_command(
   COMMAND ${CMAKE_COMMAND} -E copy_if_different
       ${CMAKE_BINARY_DIR}/onnxruntime/capi/_pybind_state.py
       $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
-  COMMAND ${CMAKE_COMMAND} -E copy
-      ${onnxruntime_python_capi_training_srcs}
-      $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/training/
   COMMAND ${CMAKE_COMMAND} -E copy
       $<TARGET_FILE:onnxruntime_pybind11_state>
       $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
@@ -708,6 +705,9 @@ if (onnxruntime_BUILD_UNIT_TESTS)
     COMMAND ${CMAKE_COMMAND} -E copy
         ${onnxruntime_python_transformers_testdata_whisper}
         $<TARGET_FILE_DIR:${build_output_target}>/transformers/test_data/models/whisper/
+    COMMAND ${CMAKE_COMMAND} -E copy
+        ${onnxruntime_python_transformers_testdata_conformer}
+        $<TARGET_FILE_DIR:${build_output_target}>/transformers/test_data/models/conformer/
   )
 endif()
 
@@ -741,14 +741,12 @@ if (onnxruntime_ENABLE_TRAINING)
     COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/ortmodule/torch_cpp_extensions/cpu/torch_interop_utils
     COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/ortmodule/torch_cpp_extensions/cuda/torch_gpu_allocator
     COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/ortmodule/torch_cpp_extensions/cuda/fused_ops
+    COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/ortmodule/graph_optimizers
     COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/ort_triton
     COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/ort_triton/kernel
     COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/utils
     COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/utils/data/
     COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/utils/hooks/
-    COMMAND ${CMAKE_COMMAND} -E copy
-        ${onnxruntime_python_capi_training_srcs}
-        $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/training/
     COMMAND ${CMAKE_COMMAND} -E copy
         ${onnxruntime_python_root_srcs}
         $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/
@@ -794,6 +792,9 @@ if (onnxruntime_ENABLE_TRAINING)
     COMMAND ${CMAKE_COMMAND} -E copy
         ${onnxruntime_python_ortmodule_torch_cpp_ext_fused_ops_srcs}
         $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/ortmodule/torch_cpp_extensions/cuda/fused_ops/
+    COMMAND ${CMAKE_COMMAND} -E copy
+        ${onnxruntime_python_ortmodule_graph_optimizers_srcs}
+        $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/ortmodule/graph_optimizers/
     COMMAND ${CMAKE_COMMAND} -E copy
         ${onnxruntime_python_ort_triton_srcs}
         $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/ort_triton/
diff --git a/cmake/onnxruntime_rocm_hipify.cmake b/cmake/onnxruntime_rocm_hipify.cmake
index cf71b6bcf7c7d..980bd59b22c3f 100644
--- a/cmake/onnxruntime_rocm_hipify.cmake
+++ b/cmake/onnxruntime_rocm_hipify.cmake
@@ -48,19 +48,24 @@ set(contrib_ops_excluded_files
   "diffusion/group_norm_impl.cu"
   "diffusion/group_norm_impl.h"
   "diffusion/nhwc_conv.cc"
-  "math/complex_mul.cc"
-  "math/complex_mul.h"
-  "math/complex_mul_impl.cu"
-  "math/complex_mul_impl.h"
-  "math/cufft_plan_cache.h"
-  "math/fft_ops.cc"
-  "math/fft_ops.h"
-  "math/fft_ops_impl.cu"
-  "math/fft_ops_impl.h"
+  "math/gemm_float8.cc"
+  "math/gemm_float8.cu"
+  "math/gemm_float8.h"
+  "moe/*"
   "quantization/attention_quantization.cc"
   "quantization/attention_quantization.h"
   "quantization/attention_quantization_impl.cu"
   "quantization/attention_quantization_impl.cuh"
+  "quantization/dequantize_blockwise.cuh"
+  "quantization/dequantize_blockwise.cu"
+  "quantization/dequantize_blockwise_bnb4.cuh"
+  "quantization/dequantize_blockwise_bnb4.cu"
+  "quantization/matmul_bnb4.cc"
+  "quantization/matmul_bnb4.cuh"
+  "quantization/matmul_bnb4.cu"
+  "quantization/matmul_nbits.cc"
+  "quantization/matmul_nbits.cuh"
+  "quantization/matmul_nbits.cu"
   "quantization/quantize_dequantize_linear.cc"
   "quantization/qordered_ops/qordered_attention_impl.cu"
   "quantization/qordered_ops/qordered_attention_impl.h"
@@ -86,38 +91,37 @@ set(contrib_ops_excluded_files
   "quantization/qordered_ops/qordered_unary_ops.cc"
   "quantization/qordered_ops/qordered_unary_ops_impl.h"
   "quantization/qordered_ops/qordered_unary_ops_impl.cu"
-  "tensor/crop.cc"
-  "tensor/crop.h"
-  "tensor/crop_impl.cu"
-  "tensor/crop_impl.h"
-  "tensor/dynamicslice.cc"
-  "tensor/image_scaler.cc"
-  "tensor/image_scaler.h"
-  "tensor/image_scaler_impl.cu"
-  "tensor/image_scaler_impl.h"
-  "transformers/greedy_search.cc"
-  "transformers/greedy_search.h"
-  "conv_transpose_with_dynamic_pads.cc"
-  "conv_transpose_with_dynamic_pads.h"
   "cuda_contrib_kernels.cc"
   "cuda_contrib_kernels.h"
   "inverse.cc"
   "fused_conv.cc"
+  "bert/group_query_attention_helper.h"
+  "bert/group_query_attention.h"
+  "bert/group_query_attention.cc"
+  "bert/group_query_attention_impl.h"
+  "bert/group_query_attention_impl.cu"
 )
 
 if (NOT onnxruntime_ENABLE_ATEN)
   list(APPEND contrib_ops_excluded_files "aten_ops/aten_op.cc")
 endif()
 if (NOT onnxruntime_USE_NCCL)
+  # Those are string patterns to exclude. Do NOT use stars such as
+  # collective/*.cc or *.h.
   list(APPEND contrib_ops_excluded_files "collective/nccl_kernels.cc")
+  list(APPEND contrib_ops_excluded_files "collective/sharding.cc")
+  list(APPEND contrib_ops_excluded_files "collective/sharding_spec.cc")
+  list(APPEND contrib_ops_excluded_files "collective/distributed_matmul.cc")
+  list(APPEND contrib_ops_excluded_files "collective/distributed_slice.cc")
+  list(APPEND contrib_ops_excluded_files "collective/distributed_reshape.cc")
+  list(APPEND contrib_ops_excluded_files "collective/distributed_expand.cc")
+  list(APPEND contrib_ops_excluded_files "collective/distributed_reduce.cc")
+  list(APPEND contrib_ops_excluded_files "collective/distributed_unsqueeze.cc")
+  list(APPEND contrib_ops_excluded_files "collective/distributed_squeeze.cc")
 endif()
 
 set(provider_excluded_files
   "atomic/common.cuh"
-  "controlflow/loop.cc"
-  "controlflow/loop.h"
-  "controlflow/scan.cc"
-  "controlflow/scan.h"
   "cu_inc/common.cuh"
   "math/einsum_utils/einsum_auxiliary_ops.cc"
   "math/einsum_utils/einsum_auxiliary_ops.h"
@@ -165,7 +169,6 @@ set(provider_excluded_files
   "cuda_memory_check.h"
   "cuda_fence.cc"
   "cuda_fence.h"
-  "cuda_fwd.h"
   "cuda_kernel.h"
   "cuda_pch.cc"
   "cuda_pch.h"
@@ -185,6 +188,8 @@ set(provider_excluded_files
   "gpu_data_transfer.h"
   "integer_gemm.cc"
   "tunable/*"
+  "cuda_nhwc_kernels.cc"
+  "cuda_nhwc_kernels.h"
 )
 
 set(training_ops_excluded_files
diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake
index ec83eb2095071..df62199dc2b42 100644
--- a/cmake/onnxruntime_unittests.cmake
+++ b/cmake/onnxruntime_unittests.cmake
@@ -35,13 +35,17 @@ function(AddTest)
 
   if (MSVC AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
     #TODO: fix the warnings, they are dangerous
-    target_compile_options(${_UT_TARGET} PRIVATE "/wd4244")
+    target_compile_options(${_UT_TARGET} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /wd4244>"
+                "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd4244>")
   endif()
   if (MSVC)
-    target_compile_options(${_UT_TARGET} PRIVATE "/wd6330")
-    #Abseil has a lot of C4127/C4324 warnings. 
-    target_compile_options(${_UT_TARGET} PRIVATE "/wd4127")
-    target_compile_options(${_UT_TARGET} PRIVATE "/wd4324")
+    target_compile_options(${_UT_TARGET} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /wd6330>"
+                "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd6330>")
+    #Abseil has a lot of C4127/C4324 warnings.
+    target_compile_options(${_UT_TARGET} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /wd4127>"
+                "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd4127>")
+    target_compile_options(${_UT_TARGET} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /wd4324>"
+                "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd4324>")
   endif()
 
   set_target_properties(${_UT_TARGET} PROPERTIES FOLDER "ONNXRuntimeTest")
@@ -60,6 +64,11 @@ function(AddTest)
             Threads::Threads)
     target_compile_definitions(${_UT_TARGET} PRIVATE -DUSE_ONNXRUNTIME_DLL)
   else()
+    if(onnxruntime_USE_CUDA)
+      #XXX: we should not need to do this. onnxruntime_test_all.exe should not have direct dependency on CUDA DLLs,
+      # otherwise it will impact when CUDA DLLs can be unloaded.
+      target_link_libraries(${_UT_TARGET} PRIVATE cudart)
+    endif()
     target_link_libraries(${_UT_TARGET} PRIVATE ${_UT_LIBS} GTest::gtest GTest::gmock ${onnxruntime_EXTERNAL_LIBRARIES})
   endif()
 
@@ -85,29 +94,22 @@ function(AddTest)
     # include dbghelp in case tests throw an ORT exception, as that exception includes a stacktrace, which requires dbghelp.
     target_link_libraries(${_UT_TARGET} PRIVATE debug dbghelp)
 
-    if (onnxruntime_USE_CUDA)
-      # disable a warning from the CUDA headers about unreferenced local functions
-      if (MSVC)
-        target_compile_options(${_UT_TARGET} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd4505>"
-                "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd4505>")
-      endif()
-    endif()
     if (MSVC)
       # warning C6326: Potential comparison of a constant with another constant.
       # Lot of such things came from gtest
-      target_compile_options(${_UT_TARGET} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd6326>"
+      target_compile_options(${_UT_TARGET} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /wd6326>"
                 "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd6326>")
       # Raw new and delete. A lot of such things came from googletest.
-      target_compile_options(${_UT_TARGET} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd26409>"
+      target_compile_options(${_UT_TARGET} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /wd26409>"
                 "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26409>")
       # "Global initializer calls a non-constexpr function."
-      target_compile_options(${_UT_TARGET} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd26426>"
+      target_compile_options(${_UT_TARGET} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /wd26426>"
                 "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26426>")
     endif()
     target_compile_options(${_UT_TARGET} PRIVATE ${disabled_warnings})
   else()
     target_compile_options(${_UT_TARGET} PRIVATE ${DISABLED_WARNINGS_FOR_TVM})
-    target_compile_options(${_UT_TARGET} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler -Wno-error=sign-compare>"
+    target_compile_options(${_UT_TARGET} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options -Wno-error=sign-compare>"
             "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-Wno-error=sign-compare>")
     target_compile_options(${_UT_TARGET} PRIVATE "-Wno-error=uninitialized")
   endif()
@@ -199,8 +201,18 @@ function(AddTest)
           list(APPEND TEST_NODE_FLAGS "--experimental-wasm-simd")
         endif()
 
+        # prefer Node from emsdk so the version is more deterministic
+        if (DEFINED ENV{EMSDK_NODE})
+          set(NODE_EXECUTABLE $ENV{EMSDK_NODE})
+        else()
+          # warning as we don't know what node version is being used and whether things like the TEST_NODE_FLAGS
+          # will be valid. e.g. "--experimental-wasm-simd" is not valid with node v20 or later.
+          message(WARNING "EMSDK_NODE environment variable was not set. Falling back to system `node`.")
+          set(NODE_EXECUTABLE node)
+        endif()
+
         add_test(NAME ${_UT_TARGET}
-          COMMAND node ${TEST_NODE_FLAGS} ${_UT_TARGET}.js ${TEST_ARGS}
+          COMMAND ${NODE_EXECUTABLE} ${TEST_NODE_FLAGS} ${_UT_TARGET}.js ${TEST_ARGS}
           WORKING_DIRECTORY $<TARGET_FILE_DIR:${_UT_TARGET}>
         )
       endif()
@@ -372,6 +384,13 @@ if (onnxruntime_USE_CUDA AND NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_R
     "${TEST_SRC_DIR}/providers/cuda/*"
     )
   list(APPEND onnxruntime_test_providers_src ${onnxruntime_test_providers_cuda_src})
+
+  if (onnxruntime_USE_CUDA_NHWC_OPS)
+    file(GLOB onnxruntime_test_providers_cuda_nhwc_src CONFIGURE_DEPENDS
+      "${TEST_SRC_DIR}/providers/cuda/nhwc/*.cc"
+    )
+    list(APPEND onnxruntime_test_providers_src ${onnxruntime_test_providers_cuda_nhwc_src})
+  endif()
 endif()
 
 if (onnxruntime_USE_CANN)
@@ -698,7 +717,7 @@ onnxruntime_add_static_library(onnxruntime_test_utils ${onnxruntime_test_utils_s
 if(MSVC)
   target_compile_options(onnxruntime_test_utils PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /utf-8>"
           "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
-  target_compile_options(onnxruntime_test_utils PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd6326>"
+  target_compile_options(onnxruntime_test_utils PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /wd6326>"
                 "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd6326>")
 else()
   target_compile_definitions(onnxruntime_test_utils PUBLIC -DNSYNC_ATOMIC_CPP11)
@@ -755,13 +774,8 @@ set_target_properties(onnx_test_runner_common PROPERTIES FOLDER "ONNXRuntimeTest
 
 set(all_tests ${onnxruntime_test_common_src} ${onnxruntime_test_ir_src} ${onnxruntime_test_optimizer_src}
         ${onnxruntime_test_framework_src} ${onnxruntime_test_providers_src} ${onnxruntime_test_quantiztion_src})
-if(NOT TARGET onnxruntime AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
-  list(APPEND all_tests ${onnxruntime_shared_lib_test_SRC})
-endif()
 
-if (onnxruntime_USE_CUDA)
-  onnxruntime_add_static_library(onnxruntime_test_cuda_ops_lib ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/cuda_ops.cu)
-  list(APPEND onnxruntime_test_common_libs onnxruntime_test_cuda_ops_lib)
+if (onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS)
   file(GLOB onnxruntime_test_providers_cuda_ut_src CONFIGURE_DEPENDS
     "${TEST_SRC_DIR}/providers/cuda/test_cases/*"
   )
@@ -769,7 +783,7 @@ if (onnxruntime_USE_CUDA)
   onnxruntime_add_shared_library_module(onnxruntime_providers_cuda_ut ${onnxruntime_test_providers_cuda_ut_src} $<TARGET_OBJECTS:onnxruntime_providers_cuda_obj>)
   config_cuda_provider_shared_module(onnxruntime_providers_cuda_ut)
   onnxruntime_add_include_to_target(onnxruntime_providers_cuda_ut GTest::gtest GTest::gmock)
-  target_link_libraries(onnxruntime_providers_cuda_ut PRIVATE GTest::gtest GTest::gmock)
+  target_link_libraries(onnxruntime_providers_cuda_ut PRIVATE GTest::gtest GTest::gmock ${ONNXRUNTIME_MLAS_LIBS} onnxruntime_common)
   list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_cuda_ut)
 endif()
 
@@ -822,7 +836,9 @@ if (onnxruntime_USE_TENSORRT)
   # made test name contain the "ep" and "model path" information, so we can easily filter the tests using cuda ep or other ep with *cpu_* or *xxx_*.
   list(APPEND test_all_args "--gtest_filter=-*cpu_*:*cuda_*" )
 endif ()
-
+if(NOT onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS)
+  list(REMOVE_ITEM all_tests ${TEST_SRC_DIR}/providers/cuda/cuda_provider_test.cc)
+endif()
 AddTest(
   TARGET onnxruntime_test_all
   SOURCES ${all_tests} ${onnxruntime_unittest_main_src}
@@ -832,11 +848,15 @@ AddTest(
   DEPENDS ${all_dependencies}
   TEST_ARGS ${test_all_args}
 )
+
 if (MSVC)
   # The warning means the type of two integral values around a binary operator is narrow than their result.
   # If we promote the two input values first, it could be more tolerant to integer overflow.
   # However, this is test code. We are less concerned.
-  target_compile_options(onnxruntime_test_all PRIVATE "/wd26451" "/wd4244")
+  target_compile_options(onnxruntime_test_all PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /wd26451>"
+                "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26451>")
+  target_compile_options(onnxruntime_test_all PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /wd4244>"
+                "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd4244>")
 else()
   target_compile_options(onnxruntime_test_all PRIVATE "-Wno-parentheses")
 endif()
@@ -848,7 +868,7 @@ if (HAS_SHORTEN_64_TO_32 AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
 endif()
 
 if (UNIX AND onnxruntime_USE_TENSORRT)
-    # The test_main.cc includes NvInfer.h where it has many deprecated declarations  
+    # The test_main.cc includes NvInfer.h where it has many deprecated declarations
     # simply ignore them for TensorRT EP build
     set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations")
 endif()
@@ -886,7 +906,7 @@ if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
 endif()
 if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
   set_target_properties(onnxruntime_test_all PROPERTIES LINK_DEPENDS ${TEST_SRC_DIR}/wasm/onnxruntime_test_all_adapter.js)
-  set_target_properties(onnxruntime_test_all PROPERTIES LINK_FLAGS "-s STACK_SIZE=5242880 -s ALLOW_MEMORY_GROWTH=1 --pre-js \"${TEST_SRC_DIR}/wasm/onnxruntime_test_all_adapter.js\" -s \"EXPORTED_RUNTIME_METHODS=['FS']\" --preload-file ${CMAKE_CURRENT_BINARY_DIR}/testdata@/testdata -s EXIT_RUNTIME=1 -s DEMANGLE_SUPPORT=1")
+  set_target_properties(onnxruntime_test_all PROPERTIES LINK_FLAGS "-s STACK_SIZE=5242880 -s ALLOW_MEMORY_GROWTH=1 -s MAXIMUM_MEMORY=4294967296 --pre-js \"${TEST_SRC_DIR}/wasm/onnxruntime_test_all_adapter.js\" -s \"EXPORTED_RUNTIME_METHODS=['FS']\" --preload-file ${CMAKE_CURRENT_BINARY_DIR}/testdata@/testdata -s EXIT_RUNTIME=1 -s DEMANGLE_SUPPORT=1")
   if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
     set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY LINK_FLAGS " -s DEFAULT_PTHREAD_STACK_SIZE=131072 -s PROXY_TO_PTHREAD=1")
   endif()
@@ -972,9 +992,9 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
     endif()
 
     if (MSVC OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-        file(GLOB QNN_LIB_FILES LIST_DIRECTORIES false "${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/*.so" "${onnxruntime_QNN_HOME}/target/${QNN_ARCH_ABI}/lib/*.so" "${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/*.dll" "${onnxruntime_QNN_HOME}/target/${QNN_ARCH_ABI}/lib/*.dll")
+        file(GLOB QNN_LIB_FILES LIST_DIRECTORIES false "${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/*.so" "${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/*.dll")
         if (${QNN_ARCH_ABI} STREQUAL "aarch64-windows-msvc")
-          file(GLOB EXTRA_HTP_LIB LIST_DIRECTORIES false "${onnxruntime_QNN_HOME}/lib/hexagon-v68/unsigned/libQnnHtpV68Skel.so" "${onnxruntime_QNN_HOME}/target/hexagon-v68/lib/unsigned/libQnnHtpV68Skel.so")
+          file(GLOB EXTRA_HTP_LIB LIST_DIRECTORIES false "${onnxruntime_QNN_HOME}/lib/hexagon-v68/unsigned/libQnnHtpV68Skel.so" "${onnxruntime_QNN_HOME}/lib/hexagon-v73/unsigned/libQnnHtpV73Skel.so")
           list(APPEND QNN_LIB_FILES ${EXTRA_HTP_LIB})
         endif()
         message(STATUS "QNN lib files: " ${QNN_LIB_FILES})
@@ -1092,18 +1112,18 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
     target_include_directories(onnxruntime_benchmark PRIVATE ${ONNXRUNTIME_ROOT} ${onnxruntime_graph_header} ${ONNXRUNTIME_ROOT}/core/mlas/inc)
     target_compile_definitions(onnxruntime_benchmark PRIVATE BENCHMARK_STATIC_DEFINE)
     if(WIN32)
-      target_compile_options(onnxruntime_benchmark PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd4141>"
+      target_compile_options(onnxruntime_benchmark PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /wd4141>"
                         "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd4141>")
       # Avoid using new and delete. But this is a benchmark program, it's ok if it has a chance to leak.
-      target_compile_options(onnxruntime_benchmark PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd26409>"
+      target_compile_options(onnxruntime_benchmark PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /wd26409>"
                         "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26409>")
-      target_compile_options(onnxruntime_benchmark PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd26400>"
+      target_compile_options(onnxruntime_benchmark PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /wd26400>"
                         "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26400>")
-      target_compile_options(onnxruntime_benchmark PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd26814>"
+      target_compile_options(onnxruntime_benchmark PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /wd26814>"
                         "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26814>")
-      target_compile_options(onnxruntime_benchmark PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd26814>"
+      target_compile_options(onnxruntime_benchmark PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /wd26814>"
                         "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26497>")
-      target_compile_options(onnxruntime_benchmark PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd26426>"
+      target_compile_options(onnxruntime_benchmark PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /wd26426>"
                         "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26426>")
       target_compile_options(onnxruntime_benchmark PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /utf-8>"
               "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
@@ -1255,7 +1275,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
       list(APPEND onnxruntime_shared_lib_test_LIBS cpuinfo)
     endif()
     if (onnxruntime_USE_CUDA)
-      list(APPEND onnxruntime_shared_lib_test_LIBS onnxruntime_test_cuda_ops_lib cudart)
+      list(APPEND onnxruntime_shared_lib_test_LIBS cudart)
     endif()
     if (onnxruntime_USE_TENSORRT)
       list(APPEND onnxruntime_shared_lib_test_LIBS ${TENSORRT_LIBRARY_INFER})
@@ -1270,7 +1290,10 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
             LIBS ${onnxruntime_shared_lib_test_LIBS}
             DEPENDS ${all_dependencies}
     )
-
+    if (onnxruntime_USE_CUDA)
+      target_include_directories(onnxruntime_shared_lib_test PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
+      target_sources(onnxruntime_shared_lib_test PRIVATE ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/cuda_ops.cu)
+    endif()
     if (CMAKE_SYSTEM_NAME STREQUAL "Android")
       target_sources(onnxruntime_shared_lib_test PRIVATE
         "${ONNXRUNTIME_ROOT}/core/platform/android/cxa_demangle.cc"
@@ -1288,7 +1311,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
     endif()
 
     if (UNIX AND onnxruntime_USE_TENSORRT)
-        # The test_main.cc includes NvInfer.h where it has many deprecated declarations  
+        # The test_main.cc includes NvInfer.h where it has many deprecated declarations
         # simply ignore them for TensorRT EP build
         set_property(TARGET onnxruntime_shared_lib_test APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations")
     endif()
@@ -1356,13 +1379,13 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
   )
   onnxruntime_add_executable(onnxruntime_mlas_test ${onnxruntime_mlas_test_src})
   if(MSVC)
-    target_compile_options(onnxruntime_mlas_test PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd26409>"
+    target_compile_options(onnxruntime_mlas_test PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /wd26409>"
                 "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26409>")
     target_compile_options(onnxruntime_mlas_test PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /utf-8>"
             "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
-    target_compile_options(onnxruntime_mlas_test PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd6326>"
+    target_compile_options(onnxruntime_mlas_test PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /wd6326>"
                 "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd6326>")
-    target_compile_options(onnxruntime_mlas_test PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd26426>"
+    target_compile_options(onnxruntime_mlas_test PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /wd26426>"
                 "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26426>")
   endif()
   if(${CMAKE_SYSTEM_NAME} STREQUAL "iOS")
@@ -1476,7 +1499,7 @@ if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
         "${TEST_SRC_DIR}/testdata/custom_op_library/cuda/cuda_ops.*")
     list(APPEND custom_op_lib_include ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ${onnxruntime_CUDNN_HOME}/include)
     if (HAS_QSPECTRE)
-      list(APPEND custom_op_lib_option "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler /Qspectre>")
+      list(APPEND custom_op_lib_option "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /Qspectre>")
     endif()
   endif()
 
@@ -1503,7 +1526,7 @@ if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
   else()
     set(ONNXRUNTIME_CUSTOM_OP_LIB_LINK_FLAG "-DEF:${TEST_SRC_DIR}/testdata/custom_op_library/custom_op_library.def")
     if (NOT onnxruntime_USE_CUDA)
-      target_compile_options(custom_op_library PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd26409>"
+      target_compile_options(custom_op_library PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /wd26409>"
                     "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26409>")
     endif()
   endif()
@@ -1577,7 +1600,7 @@ if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
     endif()
 
     if (UNIX AND onnxruntime_USE_TENSORRT)
-        # The test_main.cc includes NvInfer.h where it has many deprecated declarations  
+        # The test_main.cc includes NvInfer.h where it has many deprecated declarations
         # simply ignore them for TensorRT EP build
         set_property(TARGET onnxruntime_customopregistration_test APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations")
     endif()
diff --git a/cmake/onnxruntime_webassembly.cmake b/cmake/onnxruntime_webassembly.cmake
index c6510c97a617e..9014089cb6112 100644
--- a/cmake/onnxruntime_webassembly.cmake
+++ b/cmake/onnxruntime_webassembly.cmake
@@ -192,8 +192,13 @@ else()
     onnxruntime_util
     re2::re2
   )
+
+  set(EXPORTED_RUNTIME_METHODS "'stackAlloc','stackRestore','stackSave','UTF8ToString','stringToUTF8','lengthBytesUTF8'")
+
   if (onnxruntime_USE_XNNPACK)
     target_link_libraries(onnxruntime_webassembly PRIVATE XNNPACK)
+    string(APPEND EXPORTED_RUNTIME_METHODS ",'addFunction'")
+    target_link_options(onnxruntime_webassembly PRIVATE "SHELL:-s ALLOW_TABLE_GROWTH=1")
   endif()
 
   if(onnxruntime_USE_WEBNN)
@@ -204,7 +209,6 @@ else()
     target_link_libraries(onnxruntime_webassembly PRIVATE tensorboard)
   endif()
 
-  set(EXPORTED_RUNTIME_METHODS "['stackAlloc','stackRestore','stackSave','UTF8ToString','stringToUTF8','lengthBytesUTF8']")
   if (onnxruntime_USE_JSEP)
     set(EXPORTED_FUNCTIONS "_malloc,_free,_JsepOutput,_JsepGetNodeName")
   else()
@@ -212,7 +216,7 @@ else()
   endif()
 
   target_link_options(onnxruntime_webassembly PRIVATE
-    "SHELL:-s EXPORTED_RUNTIME_METHODS=${EXPORTED_RUNTIME_METHODS}"
+    "SHELL:-s EXPORTED_RUNTIME_METHODS=[${EXPORTED_RUNTIME_METHODS}]"
     "SHELL:-s EXPORTED_FUNCTIONS=${EXPORTED_FUNCTIONS}"
     "SHELL:-s MAXIMUM_MEMORY=4294967296"
     "SHELL:-s EXIT_RUNTIME=0"
diff --git a/cmake/patches/abseil/absl_gh_issue_1435_workaround.patch b/cmake/patches/abseil/absl_gh_issue_1435_workaround.patch
new file mode 100644
index 0000000000000..0a864cdc019b4
--- /dev/null
+++ b/cmake/patches/abseil/absl_gh_issue_1435_workaround.patch
@@ -0,0 +1,17 @@
+--- absl/container/internal/layout.h	2023-11-28 09:35:48
++++ absl/container/internal/layout.updated.h	2023-11-28 10:13:14
+@@ -181,9 +181,11 @@
+ #include <sanitizer/asan_interface.h>
+ #endif
+ 
+-#if defined(__GXX_RTTI)
+-#define ABSL_INTERNAL_HAS_CXA_DEMANGLE
+-#endif
++// Comment out ABSL_INTERNAL_HAS_CXA_DEMANGLE definition to work around this issue:
++// https://github.com/abseil/abseil-cpp/issues/1435
++// #if defined(__GXX_RTTI)
++// #define ABSL_INTERNAL_HAS_CXA_DEMANGLE
++// #endif
+ 
+ #ifdef ABSL_INTERNAL_HAS_CXA_DEMANGLE
+ #include <cxxabi.h>
diff --git a/cmake/patches/composable_kernel/Fix_Clang_Build.patch b/cmake/patches/composable_kernel/Fix_Clang_Build.patch
index d564ffba914fe..15844dd917744 100644
--- a/cmake/patches/composable_kernel/Fix_Clang_Build.patch
+++ b/cmake/patches/composable_kernel/Fix_Clang_Build.patch
@@ -1,17 +1,17 @@
 diff --git a/CMakeLists.txt b/CMakeLists.txt
-index 514b98fde..59c8a568a 100644
+index 04674124c..12e8b8b00 100644
 --- a/CMakeLists.txt
 +++ b/CMakeLists.txt
-@@ -1,7 +1,7 @@
- cmake_minimum_required(VERSION 3.14)
+@@ -19,7 +19,7 @@ endif()
  
+ set(version 1.1.0)
  # Check support for CUDA/HIP in Cmake
--project(composable_kernel)
-+project(composable_kernel LANGUAGES CXX HIP)
+-project(composable_kernel VERSION ${version})
++project(composable_kernel VERSION ${version} LANGUAGES CXX HIP)
  
  list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
  
-@@ -94,27 +94,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
+@@ -173,27 +173,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
  set(CMAKE_CXX_EXTENSIONS OFF)
  message("CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}")
  
@@ -39,7 +39,7 @@ index 514b98fde..59c8a568a 100644
  ## HIP
  find_package(HIP REQUIRED)
  # Override HIP version in config.h, if necessary.
-@@ -136,8 +115,6 @@ if( DEFINED CK_OVERRIDE_HIP_VERSION_PATCH )
+@@ -215,8 +194,6 @@ if( DEFINED CK_OVERRIDE_HIP_VERSION_PATCH )
      message(STATUS "CK_HIP_VERSION_PATCH overriden with ${CK_OVERRIDE_HIP_VERSION_PATCH}")
  endif()
  message(STATUS "Build with HIP ${HIP_VERSION}")
@@ -48,7 +48,18 @@ index 514b98fde..59c8a568a 100644
  
  ## tidy
  include(EnableCompilerWarnings)
-@@ -391,11 +368,3 @@ rocm_install(FILES
+@@ -376,7 +353,9 @@ if(BUILD_DEV)
+     add_compile_options(-Werror -Weverything)
+ endif()
+ #add flags to reduce the size of binaries
+-add_compile_options(-Oz -flto=thin)
++# -flto requires ORT to use a linker that support LTO and -flto flag shoud be passed to linker together.
++# add_compile_options(-Oz -flto=thin)
++add_compile_options(-Oz)
+ message("CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
+ 
+ add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -C ${CMAKE_CFG_INTDIR})
+@@ -482,11 +461,3 @@ rocm_install(FILES
  
  set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
  set(CPACK_RPM_PACKAGE_LICENSE "MIT")
@@ -61,20 +72,21 @@ index 514b98fde..59c8a568a 100644
 -    HEADER_ONLY
 -)
 diff --git a/library/src/tensor_operation_instance/gpu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/CMakeLists.txt
-index 1d54a141b..4edd7dbfb 100644
+index 9cb5d0e9a..141a46f3d 100644
 --- a/library/src/tensor_operation_instance/gpu/CMakeLists.txt
 +++ b/library/src/tensor_operation_instance/gpu/CMakeLists.txt
-@@ -1,7 +1,13 @@
- function(add_instance_library INSTANCE_NAME)
-     message("adding instance ${INSTANCE_NAME}")
-+    set_source_files_properties(${ARGN} PROPERTIES LANGUAGE HIP)
-     add_library(${INSTANCE_NAME} OBJECT ${ARGN})
-+    # Always disable debug symbol and C debug assert due to
-+    # - Linker error: ... relocation truncated to fit ..., caused by object files to be linked are too huge.
-+    # - https://github.com/ROCmSoftwarePlatform/composable_kernel/issues/622
-+    target_compile_options(${INSTANCE_NAME} PRIVATE -g0 -DNDEBUG)
-     target_compile_features(${INSTANCE_NAME} PUBLIC)
-+    target_compile_definitions(${INSTANCE_NAME} PRIVATE "__HIP_PLATFORM_AMD__=1" "__HIP_PLATFORM_HCC__=1")
-     set_target_properties(${INSTANCE_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
-     clang_tidy_check(${INSTANCE_NAME})
- endfunction(add_instance_library INSTANCE_NAME)
+@@ -44,8 +44,14 @@ function(add_instance_library INSTANCE_NAME)
+     endforeach()
+     #only continue if there are some source files left on the list
+     if(ARGN)
++        set_source_files_properties(${ARGN} PROPERTIES LANGUAGE HIP)
+         add_library(${INSTANCE_NAME} OBJECT ${ARGN})
++        # Always disable debug symbol and C debug assert due to
++        # - Linker error: ... relocation truncated to fit ..., caused by object files to be linked are too huge.
++        # - https://github.com/ROCmSoftwarePlatform/composable_kernel/issues/622
++        target_compile_options(${INSTANCE_NAME} PRIVATE -g0 -DNDEBUG)
+         target_compile_features(${INSTANCE_NAME} PUBLIC)
++        target_compile_definitions(${INSTANCE_NAME} PRIVATE "__HIP_PLATFORM_AMD__=1" "__HIP_PLATFORM_HCC__=1")
+         set_target_properties(${INSTANCE_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+         clang_tidy_check(${INSTANCE_NAME})
+         set(result 0)
diff --git a/cmake/patches/cutlass/cutlass.patch b/cmake/patches/cutlass/cutlass.patch
deleted file mode 100644
index bda1de8b46916..0000000000000
--- a/cmake/patches/cutlass/cutlass.patch
+++ /dev/null
@@ -1,92 +0,0 @@
-diff --git a/include/cute/numeric/complex.hpp b/include/cute/numeric/complex.hpp
-index 3790ebd3..cf727d09 100644
---- a/include/cute/numeric/complex.hpp
-+++ b/include/cute/numeric/complex.hpp
-@@ -41,10 +41,14 @@
- // With CUDA 11.4, builds show spurious "-Wconversion" warnings
- // on line 656 of thrust/detail/type_traits.h.
- // These pragmas suppress the warnings.
-+#ifdef __GNUC__
- #pragma GCC diagnostic push
- #pragma GCC diagnostic ignored "-Wconversion"
-+#endif
- #include <thrust/complex.h>
-+#ifdef __GNUC__
- #pragma GCC diagnostic pop
-+#endif
- 
- #include <cute/config.hpp>
- 
-diff --git a/include/cutlass/functional.h b/include/cutlass/functional.h
-index 59aec46a..8f2a913a 100644
---- a/include/cutlass/functional.h
-+++ b/include/cutlass/functional.h
-@@ -89,7 +89,7 @@ struct multiplies {
-   }
- };
- 
--#if defined(__CUDA_ARCH__)
-+#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
- /// Partial specializations needed when __CUDA_NO_HALF2_OPERATORS__ is set
- template<>
- struct plus<__half2> {
-@@ -143,12 +143,12 @@ struct multiplies<__half> {
- 
- 
- // Maximum with nan propogation
--// To propgate the NANs, the "max" of a two element that contains NaNs should also return a NaN 
-+// To propgate the NANs, the "max" of a two element that contains NaNs should also return a NaN
- template <typename T>
- struct maximum_with_nan_propogation {
-   CUTLASS_HOST_DEVICE
-   T operator()(T const &lhs, T const &rhs) const {
--    return lhs > rhs or std::isnan(lhs) ? lhs : rhs;
-+    return lhs > rhs or isnan(lhs) ? lhs : rhs;
-   }
- };
- 
-@@ -160,7 +160,7 @@ struct maximum_with_nan_propogation<float> {
- #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 800)
-     asm volatile("max.NaN.f32 %0, %1, %2;\n" : "=f"(res) : "f"(lhs), "f"(rhs));
- #else
--    res = lhs > rhs or std::isnan(lhs) ? lhs : rhs;
-+    res = lhs > rhs or isnan(lhs) ? lhs : rhs;
- #endif
-     return res;
-   }
-@@ -233,7 +233,7 @@ struct negate {
-   }
- };
- 
--/// Greater equal 
-+/// Greater equal
- template <typename T>
- struct greater_equal {
-   CUTLASS_HOST_DEVICE
-@@ -242,7 +242,7 @@ struct greater_equal {
-   }
- };
- 
--/// Greater  
-+/// Greater
- template <typename T>
- struct greater {
-   CUTLASS_HOST_DEVICE
-@@ -251,7 +251,7 @@ struct greater {
-   }
- };
- 
--/// Less equal 
-+/// Less equal
- template <typename T>
- struct less_equal {
-   CUTLASS_HOST_DEVICE
-@@ -260,7 +260,7 @@ struct less_equal {
-   }
- };
- 
--/// Less  
-+/// Less
- template <typename T>
- struct less {
-   CUTLASS_HOST_DEVICE
diff --git a/cmake/patches/eigen/Fix_Eigen_Build_Break.patch b/cmake/patches/eigen/Fix_Eigen_Build_Break.patch
deleted file mode 100644
index ca0e0fd23ddee..0000000000000
--- a/cmake/patches/eigen/Fix_Eigen_Build_Break.patch
+++ /dev/null
@@ -1,27 +0,0 @@
-diff -Naur git_org/cmake/external/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h git/cmake/external/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h
---- git_org/cmake/external/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h	2019-07-17 15:27:59.540667336 -0500
-+++ git/cmake/external/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h	2019-07-17 15:30:16.000000000 -0500
-@@ -1076,8 +1076,9 @@
-     dest = *b;
-   }
- 
--  EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacketx4& dest) const
--  {}
-+  EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const
-+  {
-+  }
- 
-   EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const
-   {
-@@ -1145,8 +1146,9 @@
-     loadRhs(b,dest);
-   }
- 
--  EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacketx4& dest) const
--  {}
-+  EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const
-+  {
-+  }
- 
-   EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const
-   {
diff --git a/cmake/patches/onnx/onnx.patch b/cmake/patches/onnx/onnx.patch
index 155d153019f85..a2d7672a3d48d 100644
--- a/cmake/patches/onnx/onnx.patch
+++ b/cmake/patches/onnx/onnx.patch
@@ -64,16 +64,3 @@ index 0aab3e26..0f859267 100644
 +#endif
 +
  #endif // ! ONNX_ONNX_PB_H
-diff --git a/onnx/checker.cc b/onnx/checker.cc
-index 8fdaf037..1beb1b88 100644
---- a/onnx/checker.cc
-+++ b/onnx/checker.cc
-@@ -190,7 +190,7 @@ void check_tensor(const TensorProto& tensor, const CheckerContext& ctx) {
-         }
-         std::string data_path = path_join(ctx.get_model_dir(), relative_path);
-         // use stat64 to check whether the file exists
--#ifdef __APPLE__
-+#if defined(__APPLE__) || defined(__wasm__)
-         struct stat buffer; // APPLE does not have stat64
-         if (stat((data_path).c_str(), &buffer) != 0) {
- #else
diff --git a/cmake/patches/xnnpack/AddEmscriptenAndIosSupport.patch b/cmake/patches/xnnpack/AddEmscriptenAndIosSupport.patch
index 37bdbf9fb53f6..736fffb1e384c 100644
--- a/cmake/patches/xnnpack/AddEmscriptenAndIosSupport.patch
+++ b/cmake/patches/xnnpack/AddEmscriptenAndIosSupport.patch
@@ -1,66 +1,39 @@
 diff --git a/CMakeLists.txt b/CMakeLists.txt
-index d53c48aa1..77c3cf983 100755
+index dba9b4687..a4345898d 100755
 --- a/CMakeLists.txt
 +++ b/CMakeLists.txt
-@@ -105,22 +105,12 @@ ENDIF()
- 
+@@ -122,7 +122,7 @@ ENDIF()
+ # ---[ Build flags
  IF(NOT CMAKE_SYSTEM_NAME)
    MESSAGE(FATAL_ERROR "CMAKE_SYSTEM_NAME not defined")
--ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Darwin|Linux|Android|Windows|CYGWIN|MSYS)$")
-+ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Darwin|Linux|Android|Windows|CYGWIN|MSYS|Emscripten|iOS)$")
-   MESSAGE(FATAL_ERROR "Unrecognized CMAKE_SYSTEM_NAME = ${CMAKE_SYSTEM_NAME}")
+-ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Android|Darwin|iOS|Linux|Windows|CYGWIN|MSYS|QURT)$")
++ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Android|Darwin|iOS|Linux|Windows|CYGWIN|MSYS|QURT|Emscripten|iOS)$")
+   MESSAGE(FATAL_ERROR "Unrecognized CMAKE_SYSTEM_NAME value \"${CMAKE_SYSTEM_NAME}\"")
  ENDIF()
- 
- # ---[ Download deps
- IF(NOT XNNPACK_USE_SYSTEM_LIBS)
--  IF(NOT DEFINED CLOG_SOURCE_DIR)
--    MESSAGE(STATUS "Downloading clog to ${CMAKE_BINARY_DIR}/clog-source (define CLOG_SOURCE_DIR to avoid it)")
--    CONFIGURE_FILE(cmake/DownloadCLog.cmake "${CMAKE_BINARY_DIR}/clog-download/CMakeLists.txt")
--    EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
--      WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/clog-download")
--    EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" --build .
--      WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/clog-download")
--    SET(CLOG_SOURCE_DIR "${CMAKE_BINARY_DIR}/clog-source" CACHE STRING "clog source directory")
--  ENDIF()
--
-   IF(NOT DEFINED CPUINFO_SOURCE_DIR)
-     MESSAGE(STATUS "Downloading cpuinfo to ${CMAKE_BINARY_DIR}/cpuinfo-source (define CPUINFO_SOURCE_DIR to avoid it)")
-     CONFIGURE_FILE(cmake/DownloadCpuinfo.cmake "${CMAKE_BINARY_DIR}/cpuinfo-download/CMakeLists.txt")
-@@ -7108,6 +7098,10 @@ IF(MSVC)
-   SET_PROPERTY(SOURCE ${ALL_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS "$<$<NOT:$<CONFIG:Debug>>: /O2 >")
-   SET_PROPERTY(SOURCE ${HOT_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS "$<$<NOT:$<CONFIG:Debug>>: /O2 >")
-   SET_PROPERTY(SOURCE ${COLD_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS "$<$<NOT:$<CONFIG:Debug>>: /O1 >")
-+ELSEIF(CMAKE_GENERATOR STREQUAL Xcode)
-+  TARGET_COMPILE_OPTIONS(all_microkernels PRIVATE $<$<NOT:$<CONFIG:Debug>>: -O2 >)
-+  TARGET_COMPILE_OPTIONS(XNNPACK PRIVATE $<$<NOT:$<CONFIG:Debug>>: -O2 >)
-+  TARGET_COMPILE_OPTIONS(XNNPACK PRIVATE $<$<NOT:$<CONFIG:Debug>>: -Os >)
- ELSE()
-   SET_PROPERTY(SOURCE ${ALL_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS "$<$<NOT:$<CONFIG:Debug>>: -O2 >")
-   SET_PROPERTY(SOURCE ${HOT_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS "$<$<NOT:$<CONFIG:Debug>>: -O2 >")
-@@ -7142,26 +7136,6 @@ IF(LIBM)
-   TARGET_LINK_LIBRARIES(indirection PRIVATE ${LIBM})
+ IF(CMAKE_SYSTEM_NAME MATCHES "Windows")
+@@ -534,7 +534,12 @@ IF(XNNPACK_BUILD_LIBRARY)
+   TARGET_LINK_LIBRARIES(operator-utils PRIVATE logging)
+   TARGET_LINK_LIBRARIES(post-operation PRIVATE logging)
+   TARGET_LINK_LIBRARIES(subgraph PRIVATE allocator logging memory mutex operators operator-run)
+-  TARGET_LINK_LIBRARIES(XNNPACK PRIVATE allocator cache hardware-config indirection jit logging memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing post-operation microkernels-prod subgraph)
++  IF(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
++    # omit microkernels-prod as the list is manually created by ORT in cmake/external/xnnpack.cmake
++    TARGET_LINK_LIBRARIES(XNNPACK PRIVATE allocator cache hardware-config indirection jit logging memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing post-operation subgraph)
++  ELSE()
++    TARGET_LINK_LIBRARIES(XNNPACK PRIVATE allocator cache hardware-config indirection jit logging memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing post-operation microkernels-prod subgraph)
++  ENDIF()
+   SET_TARGET_PROPERTIES(XNNPACK PROPERTIES C_EXTENSIONS YES)
  ENDIF()
- 
--# ---[ Configure clog
--IF(NOT TARGET clog)
--  IF(NOT XNNPACK_USE_SYSTEM_LIBS)
--    SET(CLOG_BUILD_TESTS OFF CACHE BOOL "")
--    SET(CLOG_RUNTIME_TYPE "${CPUINFO_RUNTIME_TYPE}" CACHE STRING "")
--    ADD_SUBDIRECTORY(
--      "${CLOG_SOURCE_DIR}/deps/clog"
--      "${CMAKE_BINARY_DIR}/clog")
--    # We build static version of clog but a dynamic library may indirectly depend on it
--    SET_PROPERTY(TARGET clog PROPERTY POSITION_INDEPENDENT_CODE ON)
--  ELSE()
--    ADD_LIBRARY(clog STATIC IMPORTED)
--    FIND_LIBRARY(CLOG_LIBRARY clog)
--    IF(NOT CLOG_LIBRARY)
--      MESSAGE(FATAL_ERROR "Cannot find clog")
--    ENDIF()
--    SET_PROPERTY(TARGET clog PROPERTY IMPORTED_LOCATION "${CLOG_LIBRARY}")
--  ENDIF()
--ENDIF()
--
- # ---[ Configure cpuinfo
- IF(NOT TARGET cpuinfo)
-   IF(NOT XNNPACK_USE_SYSTEM_LIBS)
+ IF(NOT MSVC)
+@@ -543,8 +548,9 @@ ENDIF()
+ IF(XNNPACK_TARGET_PROCESSOR STREQUAL "arm")
+   SET_PROPERTY(SOURCE ${ALL_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -marm ")
+   SET_PROPERTY(SOURCE ${PROD_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -marm ")
+-  SET_PROPERTY(SOURCE ${ALL_ARMSIMD32_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv6 -mfpu=vfp -munaligned-access ")
+-  SET_PROPERTY(SOURCE ${PROD_ARMSIMD32_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv6 -mfpu=vfp -munaligned-access ")
++  # set this to armv7-a to workaround build issue. we don't target armv6 so it shouldn't matter
++  SET_PROPERTY(SOURCE ${ALL_ARMSIMD32_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv7-a -mfpu=vfp -munaligned-access ")
++  SET_PROPERTY(SOURCE ${PROD_ARMSIMD32_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv7-a -mfpu=vfp -munaligned-access ")
+   SET_PROPERTY(SOURCE ${ALL_NEON_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv7-a -mfpu=neon ")
+   SET_PROPERTY(SOURCE ${PROD_NEON_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv7-a -mfpu=neon ")
+   SET_PROPERTY(SOURCE ${ALL_NEONFP16_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv7-a -mfpu=neon-fp16 ")
diff --git a/cmake/winml.cmake b/cmake/winml.cmake
index 395996f0fa4b9..268ee3960e75a 100644
--- a/cmake/winml.cmake
+++ b/cmake/winml.cmake
@@ -451,6 +451,8 @@ onnxruntime_add_static_library(winml_lib_api
   ${winml_lib_api_dir}/impl/TensorKindFrom.h
   ${winml_lib_api_dir}/impl/TensorMemoryBufferReference.h
   ${winml_lib_api_dir}/NumericData.cpp
+  ${winml_lib_api_dir}/HardwareCoreEnumerator.cpp
+  ${winml_lib_api_dir}/HardwareCoreEnumerator.h
   ${winml_lib_api_dir}/ImageFeatureDescriptor.cpp
   ${winml_lib_api_dir}/ImageFeatureDescriptor.h
   ${winml_lib_api_dir}/ImageFeatureValue.cpp
diff --git a/csharp/OnnxRuntime.CSharp.proj b/csharp/OnnxRuntime.CSharp.proj
index 0288d752d8749..5e43756ced7b1 100644
--- a/csharp/OnnxRuntime.CSharp.proj
+++ b/csharp/OnnxRuntime.CSharp.proj
@@ -17,9 +17,13 @@ CMake creates a target to this project
     <TargetArchitecture Condition=" '$(TargetArchitecture)' == '' ">x64</TargetArchitecture>
     <IsReleaseBuild Condition=" '$(IsReleaseBuild)' == '' ">false</IsReleaseBuild>
     <ReleaseVersionSuffix></ReleaseVersionSuffix>
-    <IsLinuxBuild Condition=" '$(IsLinuxBuild)' == '' ">false</IsLinuxBuild>
+    <IsWindowsBuild Condition="'$([System.Runtime.InteropServices.RuntimeInformation]::IsOSPlatform($([System.Runtime.InteropServices.OSPlatform]::Windows)))' == 'true'">true</IsWindowsBuild>
+    <IsLinuxBuild Condition="'$([System.Runtime.InteropServices.RuntimeInformation]::IsOSPlatform($([System.Runtime.InteropServices.OSPlatform]::Linux)))' == 'true'">true</IsLinuxBuild>
     <ExecutionProvider Condition=" '$(ExecutionProvider)' == '' ">None</ExecutionProvider>
 
+    <!-- include Xamarin/MAUI Android and iOS target frameworks? Command line property overrides this value. -->
+    <IncludeMobileTargets>true</IncludeMobileTargets>
+
     <!--internal build related properties-->
     <OnnxRuntimeSourceDirectory Condition="'$(OnnxRuntimeSourceDirectory)'==''">..</OnnxRuntimeSourceDirectory>
     <GenerateNuspecScript>..\tools\nuget\generate_nuspec_for_native_nuget.py</GenerateNuspecScript>
@@ -30,13 +34,15 @@ CMake creates a target to this project
     <OnnxRuntimeBuildDirectory Condition="'$(OnnxRuntimeBuildDirectory)'==''">..\build\Linux</OnnxRuntimeBuildDirectory>
     <OnnxRuntimePackagesDirectory Condition="'$(OnnxRuntimePackagesDirectory)'==''">$(OnnxRuntimeBuildDirectory)\packages</OnnxRuntimePackagesDirectory>
     <NativeBuildOutputDir>$(OnnxRuntimeBuildDirectory)\$(Configuration)</NativeBuildOutputDir>
+    <PythonExe>python3</PythonExe>
   </PropertyGroup>
 
-  <PropertyGroup Condition="'$(IsLinuxBuild)'=='false'">
+  <PropertyGroup Condition="'$(IsWindowsBuild)'=='true'">
     <!--internal build related properties for Windows -->
     <OnnxRuntimeBuildDirectory Condition="'$(OnnxRuntimeBuildDirectory)'==''">..\build\Windows</OnnxRuntimeBuildDirectory>
     <OnnxRuntimePackagesDirectory Condition="'$(OnnxRuntimePackagesDirectory)'==''">$(OnnxRuntimeBuildDirectory)\packages</OnnxRuntimePackagesDirectory>
     <NativeBuildOutputDir>$(OnnxRuntimeBuildDirectory)\$(Configuration)\$(Configuration)</NativeBuildOutputDir>
+    <PythonExe>python</PythonExe>
   </PropertyGroup>
 
   <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
@@ -86,28 +92,55 @@ CMake creates a target to this project
   </Target>
 
   <Target Name="CreatePackage">
-    <Message Importance="High" Text="Bundling managed assemblies into a NuGet package ..." />
+    <!-- Validate no missing UnmanagedFunctionPointer attributes on delegates. -->
+    <Exec Condition="'$(IsWindowsBuild)' == 'true' AND $(OrtPackageId) == 'Microsoft.ML.OnnxRuntime'"
+          Command="$(PythonExe) tools/ValidateNativeDelegateAttributes.py"
+          ContinueOnError="False">
+        <Output TaskParameter="ConsoleOutput" PropertyName="GenerateNuspecOutput" />
+    </Exec>
+
+    <!-- Create Microsoft.ML.OnnxRuntime.Managed with the C# bindings using the C# project -->
+    <Message Importance="High" Text="Creating Microsoft.ML.OnnxRuntime.Managed nuget package..." />
     <MSBuild Projects="src\Microsoft.ML.OnnxRuntime\Microsoft.ML.OnnxRuntime.csproj"
              Targets="CopyMiscFiles;Pack"
-            Properties="NoBuild=true;Platform=AnyCPU;PackageVersion=$(PackageVersion);OrtPackageId=$(OrtPackageId);SelectedTargets=All"/>
+            Properties="NoBuild=true;Platform=AnyCPU;PackageVersion=$(PackageVersion);OrtPackageId=$(OrtPackageId);IncludeMobileTargets=$(IncludeMobileTargets)"/>
 
-    <Message Importance="High" Text="Generating nuspec for the native Nuget package ..." />
-    <Exec ContinueOnError="False" Command="python $(GenerateNuspecScript) --package_version $(PackageVersion) --package_name $(OrtPackageId) --target_architecture $(TargetArchitecture) --build_config $(Configuration) --native_build_path $(NativeBuildOutputDirAbs) --packages_path $(OnnxRuntimePackagesDirectoryAbs) --ort_build_path $(OnnxRuntimeBuildDirectoryAbs) --sources_path $(OnnxRuntimeSourceDirectoryAbs) --commit_id $(GitCommitHash) --is_release_build $(IsReleaseBuild) --execution_provider $(ExecutionProvider)" ConsoleToMSBuild="true">
+    <!-- Manually create the nuspec for the native Microsoft.ML.OnnxRuntime package -->
+    <Message Importance="High" Text="Generating nuspec for the native Microsoft.ML.OnnxRuntime nuget package..." />
+    <Exec Command="$(PythonExe) $(GenerateNuspecScript) --package_version $(PackageVersion) --package_name $(OrtPackageId) --target_architecture $(TargetArchitecture) --build_config $(Configuration) --native_build_path $(NativeBuildOutputDirAbs) --packages_path $(OnnxRuntimePackagesDirectoryAbs) --ort_build_path $(OnnxRuntimeBuildDirectoryAbs) --sources_path $(OnnxRuntimeSourceDirectoryAbs) --commit_id $(GitCommitHash) --is_release_build $(IsReleaseBuild) --execution_provider $(ExecutionProvider)"
+          ContinueOnError="False"
+          ConsoleToMSBuild="true">
         <Output TaskParameter="ConsoleOutput" PropertyName="GenerateNuspecOutput" />
     </Exec>
 
-    <Message Importance="High" Text="Bundling native shared library artifacts into a NuGet package ..." />
-    <Exec ContinueOnError="False" Command="$(NugetExe) pack NativeNuget.nuspec" ConsoleToMSBuild="true" WorkingDirectory="$(NativeBuildOutputDirAbs)" Condition=" '$(OS)' == 'Windows_NT'">
+    <!-- run `nuget pack` on Windows or `dotnet pack` on Linux to create the native nupkg -->
+    <Message Importance="High" Text="Bundling native shared library artifacts into Microsoft.ML.OnnxRuntime nuget package..." />
+    <Exec Condition=" '$(IsWindowsBuild)' == 'true'"
+          Command="$(NugetExe) pack NativeNuget.nuspec"
+          WorkingDirectory="$(NativeBuildOutputDirAbs)"
+          ContinueOnError="False"
+          ConsoleToMSBuild="true">
       <Output TaskParameter="ConsoleOutput" PropertyName="OutputOfExec" />
     </Exec>
 
-    <Exec ContinueOnError="False" Command="$(NugetExe) pack NativeNuget.nuspec" ConsoleToMSBuild="true" WorkingDirectory="$(NativeBuildOutputDirAbs)" Condition=" '$(OS)' != 'Windows_NT'">
+    <!-- build.py uses dotnet to build on linux so we know it's available.
+         nuget needs to be run using mono to work correctly, but installing mono on WSL breaks interop
+         (see https://github.com/microsoft/WSL/issues/8531). in order to play nicely with both we use `dotnet pack`
+         to pack the native nuget package using a stub csproj to provide the nuspec file path.
+    -->
+    <Exec Condition="'$(IsLinuxBuild)' == 'true'"
+          Command="dotnet pack tools/linux_pack/LinuxPackNativeNuget.csproj /p:Configuration=$(Configuration) /p:OnnxRuntimeBuildDirectory=$(NativeBuildOutputDirAbs)"
+          ContinueOnError="False"
+          ConsoleToMSBuild="true" >
       <Output TaskParameter="ConsoleOutput" PropertyName="OutputOfExec" />
     </Exec>
+    <!-- copy the nupkg to the build output directory so its location is consistent on all platforms -->
+    <Copy Condition="'$(IsLinuxBuild)' == 'true'"
+          SourceFiles="tools/linux_pack/bin/$(Configuration)/$(OrtPackageId).$(PackageVersion).nupkg"
+          DestinationFolder="$(NativeBuildOutputDirAbs)" />
 
-    <Copy
-    SourceFiles="$(NativeBuildOutputDirAbs)\$(OrtPackageId).$(PackageVersion).nupkg"
-    DestinationFolder="$(NativeBuildOutputDirAbs)\nuget-local-artifacts"
+    <Copy SourceFiles="$(NativeBuildOutputDirAbs)\$(OrtPackageId).$(PackageVersion).nupkg"
+          DestinationFolder="$(NativeBuildOutputDirAbs)\nuget-local-artifacts"
     />
   </Target>
 
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj
index 29ccf55f081d5..0c74a23204d4f 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj
@@ -4,66 +4,53 @@
     <OrtPackageId Condition="'$(OrtPackageId)' == ''">Microsoft.ML.OnnxRuntime</OrtPackageId>
   </PropertyGroup>
 
-  <!--
-  Temporary setup until official Visual Studio 2022 release supports .net6, as the CIs require the official release.
-  We will be able to build all targets with VS once that happens.
-
-  Until then, we need to build the pre-.net6 targets with VS and the .net6 targets with dotnet.
-
-  The pre-.net6 Xamarin targets are optional and only included if the machine has the required workloads.
-
-  We have 3 scenarios
-    1) Build pre-net6 targets with VS - SelectedTargets=PreNet6
-    2) Build net6 targets - SelectedTargets=Net6
-    3) Run 'Pack' target to create nuget package from combination of 1 and 2 - SelectedTargets=All
-
-  Default is PreNet6 so that existing projects and CI builds will do the same thing unless explicitly updated.
-  -->
   <PropertyGroup>
-    <SelectedTargets>PreNet6</SelectedTargets>
-    <BaseTargets>netstandard2.0;netcoreapp3.1;net6.0</BaseTargets>
+    <IncludeMobileTargets>true</IncludeMobileTargets>
+    <BaseTargets>netstandard2.0</BaseTargets>
+    <MobileTargets></MobileTargets>
   </PropertyGroup>
 
-  <!-- only set the Xamarin mobile targets if we're building an ORT package,
-       and only if the mobile workloads are installed -->
-  <PropertyGroup Condition="('$(OrtPackageId)' == 'Microsoft.ML.OnnxRuntime' OR
-                             '$(OrtPackageId)' == 'Microsoft.ML.OnnxRuntime.Gpu') AND
-                             Exists('$(MSBuildExtensionsPath)\Xamarin\Android') AND
-                             Exists('$(MSBuildExtensionsPath)\Xamarin\iOS')">
-    <XamarinTargets>xamarinios10;monoandroid11.0</XamarinTargets>
+  <!-- special case the DesktopOnly solution -->
+  <PropertyGroup Condition="'$(SolutionName)' == 'OnnxRuntime.DesktopOnly.CSharp'">
+    <IncludeMobileTargets>false</IncludeMobileTargets>
   </PropertyGroup>
 
-  <PropertyGroup Condition="('$(OrtPackageId)' == 'Microsoft.ML.OnnxRuntime.Training') AND
-                             Exists('$(MSBuildExtensionsPath)\Xamarin\Android')">
-    <XamarinTargetsForTraining>monoandroid11.0</XamarinTargetsForTraining>
-  </PropertyGroup>
+  <!-- add Xamarin mobile targets if we're building an ORT package and the Xamarin workloads are installed
 
-  <!-- only set the .net6 targets if we're building an ORT package.
-       we can add .net6 support to other packages later as needed -->
+       NOTE: We include in a build of the managed package when creating Microsoft.ML.OnnxRuntime.Gpu as both
+             the CPU and GPU packaging pipelines can publish Microsoft.ML.OnnxRuntime.Managed, and we need the targets
+             to be consistent in both.
+  -->
   <PropertyGroup Condition="('$(OrtPackageId)' == 'Microsoft.ML.OnnxRuntime' OR
-                             '$(OrtPackageId)' == 'Microsoft.ML.OnnxRuntime.Azure' OR
-                             '$(OrtPackageId)' == 'Microsoft.ML.OnnxRuntime.Gpu')">
-    <Net6Targets>net6.0;net6.0-android;net6.0-ios;net6.0-macos</Net6Targets>
+                             '$(OrtPackageId)' == 'Microsoft.ML.OnnxRuntime.Gpu') AND
+                            '$(IncludeMobileTargets)' == 'true' AND
+                            Exists('$(MSBuildExtensionsPath)\Xamarin\Android') AND
+                            Exists('$(MSBuildExtensionsPath)\Xamarin\iOS')">
+    <MobileTargets>xamarinios10;monoandroid11.0</MobileTargets>
   </PropertyGroup>
 
-  <PropertyGroup Condition="('$(OrtPackageId)' == 'Microsoft.ML.OnnxRuntime.Training')">
-    <Net6TargetsForTrainingPackage>net6.0;net6.0-android</Net6TargetsForTrainingPackage>
+  <PropertyGroup Condition="'$(OrtPackageId)' == 'Microsoft.ML.OnnxRuntime.Training' AND
+                            '$(IncludeMobileTargets)' == 'true' AND
+                            Exists('$(MSBuildExtensionsPath)\Xamarin\Android')">
+    <MobileTargets>monoandroid11.0</MobileTargets>
   </PropertyGroup>
 
-  <PropertyGroup Condition="'$(SelectedTargets)'=='PreNet6'">
-    <TargetFrameworks>$(BaseTargets);$(XamarinTargets);$(XamarinTargetsForTraining)</TargetFrameworks>
+  <!-- add MAUI targets if building ORT package -->
+  <PropertyGroup Condition="('$(OrtPackageId)' == 'Microsoft.ML.OnnxRuntime' OR
+                             '$(OrtPackageId)' == 'Microsoft.ML.OnnxRuntime.Azure') AND
+                            '$(IncludeMobileTargets)' == 'true'">
+    <MobileTargets>$(MobileTargets);net6.0-android;net6.0-ios</MobileTargets>
   </PropertyGroup>
 
-  <PropertyGroup Condition="'$(SelectedTargets)'=='Net6'">
-    <TargetFrameworks>$(Net6Targets);$(Net6TargetsForTrainingPackage)</TargetFrameworks>
+  <PropertyGroup Condition="'$(OrtPackageId)' == 'Microsoft.ML.OnnxRuntime.Training' AND
+                            '$(IncludeMobileTargets)' == 'true'">
+    <MobileTargets>$(MobileTargets);net6.0-android</MobileTargets>
   </PropertyGroup>
 
-  <!-- nuget package creation -->
-  <PropertyGroup Condition="'$(SelectedTargets)'=='All'">
-    <TargetFrameworks>$(BaseTargets);$(XamarinTargets);$(XamarinTargetsForTraining);$(Net6Targets);$(Net6TargetsForTrainingPackage)</TargetFrameworks>
+  <PropertyGroup>
+    <TargetFrameworks>$(BaseTargets);$(MobileTargets)</TargetFrameworks>
   </PropertyGroup>
 
-
   <PropertyGroup>
     <Platforms>AnyCPU;x86</Platforms>
     <LangVersion>default</LangVersion>
@@ -204,8 +191,9 @@
     <DefineConstants>$(DefineConstants);$(OrtConstants)</DefineConstants>
   </PropertyGroup>
 
-  <!-- debug output - makes finding/fixing any issues with the the conditions easy.
+  <!-- debug output - makes finding/fixing any issues with the the conditions easy.  -->
   <Target Name="DumpValues" BeforeTargets="PreBuildEvent">
+    <Message Text="SolutionName='$(SolutionName)'" />
     <Message Text="TargetPlatform='$(TargetPlatform)' TargetPlatformIdentifier='$(TargetPlatformIdentifier)' " />
     <Message Text="TargetFramework='$(TargetFramework)' TargetFrameworkIdentifier='$(TargetFrameworkIdentifier)' " />
     <Message Text="[MSBuild]::GetTargetPlatformIdentifier(TargetFramework)='$([MSBuild]::GetTargetPlatformIdentifier('$(TargetFramework)'))' " />
@@ -214,7 +202,6 @@
     <Message Text="OrtConstants='$(OrtConstants)' " />
     <Message Text="TargetFrameworks='$(TargetFrameworks)' " />
   </Target>
-  -->
 
   <ItemGroup>
     <None Include="$(OnnxRuntimeCsharpRoot)\..\include\onnxruntime\core\session\onnxruntime_*.h"
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.shared.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.shared.cs
index 2ba837be22041..4128524b30483 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.shared.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.shared.cs
@@ -373,7 +373,7 @@ static NativeMethods()
             OrtAddSessionConfigEntry = (DOrtAddSessionConfigEntry)Marshal.GetDelegateForFunctionPointer(api_.AddSessionConfigEntry, typeof(DOrtAddSessionConfigEntry));
             OrtAddInitializer = (DOrtAddInitializer)Marshal.GetDelegateForFunctionPointer(api_.AddInitializer, typeof(DOrtAddInitializer));
             SessionOptionsAppendExecutionProvider_TensorRT = (DSessionOptionsAppendExecutionProvider_TensorRT)Marshal.GetDelegateForFunctionPointer(
-                                                             api_.SessionOptionsAppendExecutionProvider_TensorRT, typeof(DSessionOptionsAppendExecutionProvider_TensorRT));
+                api_.SessionOptionsAppendExecutionProvider_TensorRT, typeof(DSessionOptionsAppendExecutionProvider_TensorRT));
 
             OrtCreateRunOptions = (DOrtCreateRunOptions)Marshal.GetDelegateForFunctionPointer(api_.CreateRunOptions, typeof(DOrtCreateRunOptions));
             OrtReleaseRunOptions = (DOrtReleaseRunOptions)Marshal.GetDelegateForFunctionPointer(api_.ReleaseRunOptions, typeof(DOrtReleaseRunOptions));
@@ -487,27 +487,26 @@ static NativeMethods()
             OrtReleasePrepackedWeightsContainer = (DOrtReleasePrepackedWeightsContainer)Marshal.GetDelegateForFunctionPointer(api_.ReleasePrepackedWeightsContainer, typeof(DOrtReleasePrepackedWeightsContainer));
 
             SessionOptionsAppendExecutionProvider_TensorRT_V2 = (DSessionOptionsAppendExecutionProvider_TensorRT_V2)Marshal.GetDelegateForFunctionPointer(
-                                                             api_.SessionOptionsAppendExecutionProvider_TensorRT_V2, typeof(DSessionOptionsAppendExecutionProvider_TensorRT_V2));
+                api_.SessionOptionsAppendExecutionProvider_TensorRT_V2, typeof(DSessionOptionsAppendExecutionProvider_TensorRT_V2));
             OrtCreateTensorRTProviderOptions = (DOrtCreateTensorRTProviderOptions)Marshal.GetDelegateForFunctionPointer(api_.CreateTensorRTProviderOptions, typeof(DOrtCreateTensorRTProviderOptions));
             OrtUpdateTensorRTProviderOptions = (DOrtUpdateTensorRTProviderOptions)Marshal.GetDelegateForFunctionPointer(api_.UpdateTensorRTProviderOptions, typeof(DOrtUpdateTensorRTProviderOptions));
             OrtGetTensorRTProviderOptionsAsString = (DOrtGetTensorRTProviderOptionsAsString)Marshal.GetDelegateForFunctionPointer(api_.GetTensorRTProviderOptionsAsString, typeof(DOrtGetTensorRTProviderOptionsAsString));
             OrtReleaseTensorRTProviderOptions = (DOrtReleaseTensorRTProviderOptions)Marshal.GetDelegateForFunctionPointer(api_.ReleaseTensorRTProviderOptions, typeof(DOrtReleaseTensorRTProviderOptions));
 
             SessionOptionsAppendExecutionProvider_CUDA = (DSessionOptionsAppendExecutionProvider_CUDA)Marshal.GetDelegateForFunctionPointer(
-                                                 api_.SessionOptionsAppendExecutionProvider_CUDA, typeof(DSessionOptionsAppendExecutionProvider_CUDA));
+                api_.SessionOptionsAppendExecutionProvider_CUDA, typeof(DSessionOptionsAppendExecutionProvider_CUDA));
             SessionOptionsAppendExecutionProvider_CUDA_V2 = (DSessionOptionsAppendExecutionProvider_CUDA_V2)Marshal.GetDelegateForFunctionPointer(
-                                                 api_.SessionOptionsAppendExecutionProvider_CUDA_V2, typeof(DSessionOptionsAppendExecutionProvider_CUDA_V2));
+                api_.SessionOptionsAppendExecutionProvider_CUDA_V2, typeof(DSessionOptionsAppendExecutionProvider_CUDA_V2));
             OrtCreateCUDAProviderOptions = (DOrtCreateCUDAProviderOptions)Marshal.GetDelegateForFunctionPointer(api_.CreateCUDAProviderOptions, typeof(DOrtCreateCUDAProviderOptions));
             OrtUpdateCUDAProviderOptions = (DOrtUpdateCUDAProviderOptions)Marshal.GetDelegateForFunctionPointer(api_.UpdateCUDAProviderOptions, typeof(DOrtUpdateCUDAProviderOptions));
             OrtGetCUDAProviderOptionsAsString = (DOrtGetCUDAProviderOptionsAsString)Marshal.GetDelegateForFunctionPointer(api_.GetCUDAProviderOptionsAsString, typeof(DOrtGetCUDAProviderOptionsAsString));
             OrtReleaseCUDAProviderOptions = (DOrtReleaseCUDAProviderOptions)Marshal.GetDelegateForFunctionPointer(api_.ReleaseCUDAProviderOptions, typeof(DOrtReleaseCUDAProviderOptions));
-            SessionOptionsAppendExecutionProvider
-                = (DSessionOptionsAppendExecutionProvider)Marshal.GetDelegateForFunctionPointer(
-                    api_.SessionOptionsAppendExecutionProvider,
-                    typeof(DSessionOptionsAppendExecutionProvider));
+            SessionOptionsAppendExecutionProvider = (DSessionOptionsAppendExecutionProvider)Marshal.GetDelegateForFunctionPointer(
+                api_.SessionOptionsAppendExecutionProvider,
+                typeof(DSessionOptionsAppendExecutionProvider));
             OrtUpdateEnvWithCustomLogLevel = (DOrtUpdateEnvWithCustomLogLevel)Marshal.GetDelegateForFunctionPointer(api_.UpdateEnvWithCustomLogLevel, typeof(DOrtUpdateEnvWithCustomLogLevel));
             SessionOptionsAppendExecutionProvider_ROCM = (DSessionOptionsAppendExecutionProvider_ROCM)Marshal.GetDelegateForFunctionPointer(
-                                                 api_.SessionOptionsAppendExecutionProvider_ROCM, typeof(DSessionOptionsAppendExecutionProvider_ROCM));
+                api_.SessionOptionsAppendExecutionProvider_ROCM, typeof(DSessionOptionsAppendExecutionProvider_ROCM));
             OrtCreateROCMProviderOptions = (DOrtCreateROCMProviderOptions)Marshal.GetDelegateForFunctionPointer(api_.CreateROCMProviderOptions, typeof(DOrtCreateROCMProviderOptions));
             OrtUpdateROCMProviderOptions = (DOrtUpdateROCMProviderOptions)Marshal.GetDelegateForFunctionPointer(api_.UpdateROCMProviderOptions, typeof(DOrtUpdateROCMProviderOptions));
             OrtGetROCMProviderOptionsAsString = (DOrtGetROCMProviderOptionsAsString)Marshal.GetDelegateForFunctionPointer(api_.GetROCMProviderOptionsAsString, typeof(DOrtGetROCMProviderOptionsAsString));
@@ -532,10 +531,10 @@ internal class NativeLib
         [DllImport(NativeLib.DllName, CharSet = CharSet.Ansi)]
         public static extern ref OrtApiBase OrtGetApiBase();
 
-        #region Runtime/Environment API
+#region Runtime / Environment API
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /* OrtStatus* */DOrtCreateEnv(
+        public delegate IntPtr /* OrtStatus* */ DOrtCreateEnv(
             OrtLoggingLevel defaultLoggingLevel,
             byte[] /*const char* */ logId,
             out IntPtr /*(OrtEnv*)*/ env);
@@ -543,7 +542,7 @@ internal class NativeLib
         public static DOrtCreateEnv OrtCreateEnv;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /* OrtStatus* */DOrtCreateEnvWithCustomLogger(
+        public delegate IntPtr /* OrtStatus* */ DOrtCreateEnvWithCustomLogger(
             IntPtr /* (OrtLoggingFunction*) */ loggingFunction,
             IntPtr /* (void*) */ loggerParam,
             OrtLoggingLevel defaultLoggingLevel,
@@ -553,7 +552,7 @@ internal class NativeLib
         public static DOrtCreateEnvWithCustomLogger OrtCreateEnvWithCustomLogger;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /* OrtStatus* */DOrtCreateEnvWithGlobalThreadPools(
+        public delegate IntPtr /* OrtStatus* */ DOrtCreateEnvWithGlobalThreadPools(
             OrtLoggingLevel defaultWarningLevel,
             byte[] /*const char* */ logId,
             IntPtr /*(const OrtThreadingOptions *) */ threadingOptions,
@@ -564,7 +563,7 @@ internal class NativeLib
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /* OrtStatus* */ DOrtCreateEnvWithCustomLoggerAndGlobalThreadPools(
             IntPtr /* OrtLoggingFunction */ loggingFunction,
-            IntPtr /* void* */loggerParam,
+            IntPtr /* void* */ loggerParam,
             OrtLoggingLevel logSeverityLevel,
             byte[] /* const char* */ logId,
             IntPtr /*(const OrtThreadingOptions *) */ threadingOptions,
@@ -578,27 +577,27 @@ internal class NativeLib
         public static DOrtReleaseEnv OrtReleaseEnv;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /* OrtStatus* */DOrtEnableTelemetryEvents(IntPtr /*(OrtEnv*)*/ env);
+        public delegate IntPtr /* OrtStatus* */ DOrtEnableTelemetryEvents(IntPtr /*(OrtEnv*)*/ env);
         public static DOrtEnableTelemetryEvents OrtEnableTelemetryEvents;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /* OrtStatus* */DOrtDisableTelemetryEvents(IntPtr /*(OrtEnv*)*/ env);
+        public delegate IntPtr /* OrtStatus* */ DOrtDisableTelemetryEvents(IntPtr /*(OrtEnv*)*/ env);
         public static DOrtDisableTelemetryEvents OrtDisableTelemetryEvents;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /* OrtStatus* */DOrtUpdateEnvWithCustomLogLevel(IntPtr /*(OrtEnv*)*/ env, OrtLoggingLevel custom_log_level);
+        public delegate IntPtr /* OrtStatus* */ DOrtUpdateEnvWithCustomLogLevel(IntPtr /*(OrtEnv*)*/ env, OrtLoggingLevel custom_log_level);
         public static DOrtUpdateEnvWithCustomLogLevel OrtUpdateEnvWithCustomLogLevel;
 
-        #endregion Runtime/Environment API
+#endregion Runtime / Environment API
 
-        #region Provider Options API
+#region Provider Options API
 
         /// <summary>
         /// Creates native OrtTensorRTProviderOptions instance
         /// </summary>
         /// <param name="trtProviderOptionsInstance">(output) native instance of OrtTensorRTProviderOptions</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /* OrtStatus* */DOrtCreateTensorRTProviderOptions(
+        public delegate IntPtr /* OrtStatus* */ DOrtCreateTensorRTProviderOptions(
             out IntPtr /*(OrtTensorRTProviderOptions**)*/ trtProviderOptionsInstance);
         public static DOrtCreateTensorRTProviderOptions OrtCreateTensorRTProviderOptions;
 
@@ -610,7 +609,7 @@ internal class NativeLib
         /// <param name="providerOptionsValues">configuration values of OrtTensorRTProviderOptions</param>
         /// <param name="numKeys">number of configuration keys</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /* OrtStatus* */DOrtUpdateTensorRTProviderOptions(
+        public delegate IntPtr /* OrtStatus* */ DOrtUpdateTensorRTProviderOptions(
             IntPtr /*(OrtTensorRTProviderOptions*)*/ trtProviderOptionsInstance,
             IntPtr[] /*(const char* const *)*/ providerOptionsKeys,
             IntPtr[] /*(const char* const *)*/ providerOptionsValues,
@@ -623,10 +622,10 @@ internal class NativeLib
         /// <param name="allocator">instance of OrtAllocator</param>
         /// <param name="ptr">is a UTF-8 null terminated string allocated using 'allocator'</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /* OrtStatus* */DOrtGetTensorRTProviderOptionsAsString(
+        public delegate IntPtr /* OrtStatus* */ DOrtGetTensorRTProviderOptionsAsString(
             IntPtr /*(OrtTensorRTProviderOptionsV2**)*/ trtProviderOptionsInstance,
             IntPtr /*(OrtAllocator*)*/ allocator,
-            out IntPtr /*(char**)*/ptr);
+            out IntPtr /*(char**)*/ ptr);
         public static DOrtGetTensorRTProviderOptionsAsString OrtGetTensorRTProviderOptionsAsString;
 
         /// <summary>
@@ -642,7 +641,7 @@ internal class NativeLib
         /// </summary>
         /// <param name="cudaProviderOptionsInstance">(output) native instance of OrtCUDAProviderOptions</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /* OrtStatus* */DOrtCreateCUDAProviderOptions(
+        public delegate IntPtr /* OrtStatus* */ DOrtCreateCUDAProviderOptions(
             out IntPtr /*(OrtCUDAProviderOptions**)*/ cudaProviderOptionsInstance);
         public static DOrtCreateCUDAProviderOptions OrtCreateCUDAProviderOptions;
 
@@ -654,7 +653,7 @@ internal class NativeLib
         /// <param name="providerOptionsValues">configuration values of OrtCUDAProviderOptions</param>
         /// <param name="numKeys">number of configuration keys</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /* OrtStatus* */DOrtUpdateCUDAProviderOptions(
+        public delegate IntPtr /* OrtStatus* */ DOrtUpdateCUDAProviderOptions(
             IntPtr /*(OrtCUDAProviderOptions*)*/ cudaProviderOptionsInstance,
             IntPtr[] /*(const char* const *)*/ providerOptionsKeys,
             IntPtr[] /*(const char* const *)*/ providerOptionsValues,
@@ -667,10 +666,10 @@ internal class NativeLib
         /// <param name="allocator">instance of OrtAllocator</param>
         /// <param name="ptr">is a UTF-8 null terminated string allocated using 'allocator'</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /* OrtStatus* */DOrtGetCUDAProviderOptionsAsString(
+        public delegate IntPtr /* OrtStatus* */ DOrtGetCUDAProviderOptionsAsString(
             IntPtr /*(OrtCUDAProviderOptionsV2**)*/ cudaProviderOptionsInstance,
             IntPtr /*(OrtAllocator*)*/ allocator,
-            out IntPtr /*(char**)*/ptr);
+            out IntPtr /*(char**)*/ ptr);
         public static DOrtGetCUDAProviderOptionsAsString OrtGetCUDAProviderOptionsAsString;
 
         /// <summary>
@@ -686,7 +685,7 @@ internal class NativeLib
         /// </summary>
         /// <param name="rocmProviderOptionsInstance">(output) native instance of OrtROCMProviderOptions</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /* OrtStatus* */DOrtCreateROCMProviderOptions(
+        public delegate IntPtr /* OrtStatus* */ DOrtCreateROCMProviderOptions(
             out IntPtr /*(OrtROCMProviderOptions**)*/ rocmProviderOptionsInstance);
         public static DOrtCreateROCMProviderOptions OrtCreateROCMProviderOptions;
 
@@ -698,7 +697,7 @@ internal class NativeLib
         /// <param name="providerOptionsValues">configuration values of OrtROCMProviderOptions</param>
         /// <param name="numKeys">number of configuration keys</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /* OrtStatus* */DOrtUpdateROCMProviderOptions(
+        public delegate IntPtr /* OrtStatus* */ DOrtUpdateROCMProviderOptions(
             IntPtr /*(OrtROCMProviderOptions*)*/ rocmProviderOptionsInstance,
             IntPtr[] /*(const char* const *)*/ providerOptionsKeys,
             IntPtr[] /*(const char* const *)*/ providerOptionsValues,
@@ -711,10 +710,10 @@ internal class NativeLib
         /// <param name="allocator">instance of OrtAllocator</param>
         /// <param name="ptr">is a UTF-8 null terminated string allocated using 'allocator'</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /* OrtStatus* */DOrtGetROCMProviderOptionsAsString(
+        public delegate IntPtr /* OrtStatus* */ DOrtGetROCMProviderOptionsAsString(
             IntPtr /*(OrtROCMProviderOptions**)*/ rocmProviderOptionsInstance,
             IntPtr /*(OrtAllocator*)*/ allocator,
-            out IntPtr /*(char**)*/ptr);
+            out IntPtr /*(char**)*/ ptr);
         public static DOrtGetROCMProviderOptionsAsString OrtGetROCMProviderOptionsAsString;
 
         /// <summary>
@@ -725,34 +724,34 @@ internal class NativeLib
         public delegate void DOrtReleaseROCMProviderOptions(IntPtr /*(OrtROCMProviderOptions*)*/ rocmProviderOptionsInstance);
         public static DOrtReleaseROCMProviderOptions OrtReleaseROCMProviderOptions;
 
-        #endregion
+#endregion
 
-        #region Status API
+#region Status API
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate ErrorCode DOrtGetErrorCode(IntPtr /*(OrtStatus*)*/status);
+        public delegate ErrorCode DOrtGetErrorCode(IntPtr /*(OrtStatus*)*/ status);
         public static DOrtGetErrorCode OrtGetErrorCode;
 
         // returns char*, need to convert to string by the caller.
         // does not free the underlying OrtStatus*
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /* char* */DOrtGetErrorMessage(IntPtr /* (OrtStatus*) */status);
+        public delegate IntPtr /* char* */ DOrtGetErrorMessage(IntPtr /* (OrtStatus*) */ status);
         public static DOrtGetErrorMessage OrtGetErrorMessage;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate void DOrtReleaseStatus(IntPtr /*(OrtStatus*)*/ statusPtr);
         public static DOrtReleaseStatus OrtReleaseStatus;
 
-        #endregion Status API
+#endregion Status API
 
-        #region InferenceSession API
+#region InferenceSession API
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /* OrtStatus* */DOrtCreateSession(
-                                                IntPtr /* (OrtEnv*) */ environment,
-                                                //[MarshalAs(UnmanagedType.LPStr)]string modelPath
-                                                byte[] modelPath,
-                                                IntPtr /* (OrtSessionOptions*) */sessopnOptions,
-                                                out IntPtr /**/ session);
+        public delegate IntPtr /* OrtStatus* */ DOrtCreateSession(
+            IntPtr /* (OrtEnv*) */ environment,
+            //[MarshalAs(UnmanagedType.LPStr)]string modelPath
+            byte[] modelPath,
+            IntPtr /* (OrtSessionOptions*) */ sessopnOptions,
+            out IntPtr /**/ session);
 
         public static DOrtCreateSession OrtCreateSession;
 
@@ -765,22 +764,22 @@ internal class NativeLib
         /// <param name="prepackedWeightsContainer">Native OrtPrepackedWeightsContainer instance</param>
         /// <param name="session">(Output) Created native OrtSession instance</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /* OrtStatus* */DOrtCreateSessionWithPrepackedWeightsContainer(
-                                        IntPtr /* (OrtEnv*) */ environment,
-                                        byte[] modelPath,
-                                        IntPtr /* (OrtSessionOptions*) */sessionOptions,
-                                        IntPtr /* (OrtPrepackedWeightsContainer*) */prepackedWeightsContainer,
-                                        out IntPtr /* (OrtSession**) */ session);
+        public delegate IntPtr /* OrtStatus* */ DOrtCreateSessionWithPrepackedWeightsContainer(
+            IntPtr /* (OrtEnv*) */ environment,
+            byte[] modelPath,
+            IntPtr /* (OrtSessionOptions*) */ sessionOptions,
+            IntPtr /* (OrtPrepackedWeightsContainer*) */ prepackedWeightsContainer,
+            out IntPtr /* (OrtSession**) */ session);
 
         public static DOrtCreateSessionWithPrepackedWeightsContainer OrtCreateSessionWithPrepackedWeightsContainer;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /* OrtStatus* */DOrtCreateSessionFromArray(
-                                                IntPtr /* (OrtEnv*) */ environment,
-                                                byte[] modelData,
-                                                UIntPtr modelSize,
-                                                IntPtr /* (OrtSessionOptions*) */ sessionOptions,
-                                                out IntPtr /**/ session);
+        public delegate IntPtr /* OrtStatus* */ DOrtCreateSessionFromArray(
+            IntPtr /* (OrtEnv*) */ environment,
+            byte[] modelData,
+            UIntPtr modelSize,
+            IntPtr /* (OrtSessionOptions*) */ sessionOptions,
+            out IntPtr /**/ session);
         public static DOrtCreateSessionFromArray OrtCreateSessionFromArray;
 
         /// <summary>
@@ -793,169 +792,167 @@ internal class NativeLib
         /// <param name="prepackedWeightsContainer">Native OrtPrepackedWeightsContainer instance</param>
         /// <param name="session">(Output) Created native OrtSession instance</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /* OrtStatus* */DOrtCreateSessionFromArrayWithPrepackedWeightsContainer(
-                                        IntPtr /* (OrtEnv*) */ environment,
-                                        byte[] /* (void*) */ modelData,
-                                        UIntPtr /* (size_t) */ modelSize,
-                                        IntPtr /* (OrtSessionOptions*) */ sessionOptions,
-                                        IntPtr /* (OrtPrepackedWeightsContainer*) */prepackedWeightsContainer,
-                                        out IntPtr /* (OrtSession**) */ session);
+        public delegate IntPtr /* OrtStatus* */ DOrtCreateSessionFromArrayWithPrepackedWeightsContainer(
+            IntPtr /* (OrtEnv*) */ environment,
+            byte[] /* (void*) */ modelData,
+            UIntPtr /* (size_t) */ modelSize,
+            IntPtr /* (OrtSessionOptions*) */ sessionOptions,
+            IntPtr /* (OrtPrepackedWeightsContainer*) */ prepackedWeightsContainer,
+            out IntPtr /* (OrtSession**) */ session);
         public static DOrtCreateSessionFromArrayWithPrepackedWeightsContainer OrtCreateSessionFromArrayWithPrepackedWeightsContainer;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(ONNStatus*)*/ DOrtRun(
-                                                IntPtr /*(OrtSession*)*/ session,
-                                                IntPtr /*(OrtSessionRunOptions*)*/ runOptions,  // can be null to use the default options
-                                                IntPtr[] inputNames,
-                                                IntPtr[] /* (OrtValue*[])*/ inputValues,
-                                                UIntPtr inputCount,
-                                                IntPtr[] outputNames,
-                                                UIntPtr outputCount,
-                                                IntPtr[] outputValues /* An array of output value pointers. Array must be allocated by the caller */
-                                                );
+            IntPtr /*(OrtSession*)*/ session,
+            IntPtr /*(OrtSessionRunOptions*)*/ runOptions, // can be null to use the default options
+            IntPtr[] inputNames,
+            IntPtr[] /* (OrtValue*[])*/ inputValues,
+            UIntPtr inputCount,
+            IntPtr[] outputNames,
+            UIntPtr outputCount,
+            IntPtr[] outputValues /* An array of output value pointers. Array must be allocated by the caller */
+        );
 
         public static DOrtRun OrtRun;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(ONNStatus*)*/ DOrtRunWithBinding(
-                                                IntPtr /*(OrtSession*)*/ session,
-                                                IntPtr /*(OrtSessionRunOptions*)*/ runOptions, // can not be null
-                                                IntPtr /*(const OrtIoBinding*)*/ io_binding
-                                                );
+            IntPtr /*(OrtSession*)*/ session,
+            IntPtr /*(OrtSessionRunOptions*)*/ runOptions, // can not be null
+            IntPtr /*(const OrtIoBinding*)*/ io_binding);
 
         public static DOrtRunWithBinding OrtRunWithBinding;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtSessionGetInputCount(
-                                                IntPtr /*(OrtSession*)*/ session,
-                                                out UIntPtr count);
+            IntPtr /*(OrtSession*)*/ session,
+            out UIntPtr count);
 
         public static DOrtSessionGetInputCount OrtSessionGetInputCount;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtSessionGetOutputCount(
-                                                IntPtr /*(OrtSession*)*/ session,
-                                                out UIntPtr count);
+            IntPtr /*(OrtSession*)*/ session,
+            out UIntPtr count);
 
         public static DOrtSessionGetOutputCount OrtSessionGetOutputCount;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtSessionGetOverridableInitializerCount(
-                                                IntPtr /*(OrtSession*)*/ session,
-                                                out UIntPtr count);
+            IntPtr /*(OrtSession*)*/ session,
+            out UIntPtr count);
 
         public static DOrtSessionGetOverridableInitializerCount OrtSessionGetOverridableInitializerCount;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /*(OrtStatus*)*/DOrtSessionGetInputName(
-                                                IntPtr /*(OrtSession*)*/ session,
-                                                UIntPtr index,
-                                                IntPtr /*(OrtAllocator*)*/ allocator,
-                                                out IntPtr /*(char**)*/name);
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtSessionGetInputName(
+            IntPtr /*(OrtSession*)*/ session,
+            UIntPtr index,
+            IntPtr /*(OrtAllocator*)*/ allocator,
+            out IntPtr /*(char**)*/ name);
 
         public static DOrtSessionGetInputName OrtSessionGetInputName;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /*(OrtStatus*)*/DOrtSessionGetOutputName(
-                                                IntPtr /*(OrtSession*)*/ session,
-                                                UIntPtr index,
-                                                IntPtr /*(OrtAllocator*)*/ allocator,
-                                                out IntPtr /*(char**)*/name);
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtSessionGetOutputName(
+            IntPtr /*(OrtSession*)*/ session,
+            UIntPtr index,
+            IntPtr /*(OrtAllocator*)*/ allocator,
+            out IntPtr /*(char**)*/ name);
 
         public static DOrtSessionGetOutputName OrtSessionGetOutputName;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /*(OrtStatus*)*/DOrtSessionEndProfiling(
-                                                IntPtr /*(const OrtSession*)*/ session,
-                                                IntPtr /*(OrtAllocator*)*/ allocator,
-                                                out IntPtr /*(char**)*/profile_file);
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtSessionEndProfiling(
+            IntPtr /*(const OrtSession*)*/ session,
+            IntPtr /*(OrtAllocator*)*/ allocator,
+            out IntPtr /*(char**)*/ profile_file);
 
         public static DOrtSessionEndProfiling OrtSessionEndProfiling;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /*(OrtStatus*)*/DOrtSessionGetOverridableInitializerName(
-                                                IntPtr /*(OrtSession*)*/ session,
-                                                UIntPtr index,
-                                                IntPtr /*(OrtAllocator*)*/ allocator,
-                                                out IntPtr /*(char**)*/name);
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtSessionGetOverridableInitializerName(
+            IntPtr /*(OrtSession*)*/ session,
+            UIntPtr index,
+            IntPtr /*(OrtAllocator*)*/ allocator,
+            out IntPtr /*(char**)*/ name);
 
         public static DOrtSessionGetOverridableInitializerName OrtSessionGetOverridableInitializerName;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /*(OrtStatus*)*/DOrtSessionGetInputTypeInfo(
-                                                IntPtr /*(const OrtSession*)*/ session,
-                                                UIntPtr index,
-                                                out IntPtr /*(struct OrtTypeInfo**)*/ typeInfo);
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtSessionGetInputTypeInfo(
+            IntPtr /*(const OrtSession*)*/ session,
+            UIntPtr index,
+            out IntPtr /*(struct OrtTypeInfo**)*/ typeInfo);
 
         public static DOrtSessionGetInputTypeInfo OrtSessionGetInputTypeInfo;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /*(OrtStatus*)*/DOrtSessionGetOutputTypeInfo(
-                                                IntPtr /*(const OrtSession*)*/ session,
-                                                UIntPtr index,
-                                                out IntPtr /* (struct OrtTypeInfo**)*/ typeInfo);
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtSessionGetOutputTypeInfo(
+            IntPtr /*(const OrtSession*)*/ session,
+            UIntPtr index,
+            out IntPtr /* (struct OrtTypeInfo**)*/ typeInfo);
 
         public static DOrtSessionGetOutputTypeInfo OrtSessionGetOutputTypeInfo;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /*(OrtStatus*)*/DOrtSessionGetOverridableInitializerTypeInfo(
-                                                IntPtr /*(const OrtSession*)*/ session,
-                                                UIntPtr index,
-                                                out IntPtr /* (struct OrtTypeInfo**)*/ typeInfo);
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtSessionGetOverridableInitializerTypeInfo(
+            IntPtr /*(const OrtSession*)*/ session,
+            UIntPtr index,
+            out IntPtr /* (struct OrtTypeInfo**)*/ typeInfo);
 
         public static DOrtSessionGetOverridableInitializerTypeInfo OrtSessionGetOverridableInitializerTypeInfo;
 
         // release the typeinfo using OrtReleaseTypeInfo
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate void DOrtReleaseTypeInfo(IntPtr /*(OrtTypeInfo*)*/session);
+        public delegate void DOrtReleaseTypeInfo(IntPtr /*(OrtTypeInfo*)*/ session);
         public static DOrtReleaseTypeInfo OrtReleaseTypeInfo;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate void DOrtReleaseSession(IntPtr /*(OrtSession*)*/session);
+        public delegate void DOrtReleaseSession(IntPtr /*(OrtSession*)*/ session);
         public static DOrtReleaseSession OrtReleaseSession;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtSessionGetProfilingStartTimeNs(
-                                                IntPtr /*(const OrtSession*)*/ session,
-                                                out UIntPtr /*(ulong* out)*/ startTime);
+            IntPtr /*(const OrtSession*)*/ session,
+            out UIntPtr /*(ulong* out)*/ startTime);
         public static DOrtSessionGetProfilingStartTimeNs OrtSessionGetProfilingStartTimeNs;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(ONNStatus*)*/ DCreateAndRegisterAllocatorV2(
-                                                IntPtr /* (OrtEnv*) */ environment,
-                                                IntPtr /*(char*)*/ provider_type,
-                                                IntPtr /*(OrtMemoryInfo*)*/ mem_info,
-                                                IntPtr /*(OrtArenaCfg*)*/ arena_cfg,
-                                                IntPtr /*(char**)*/ provider_options_keys,
-                                                IntPtr /*(char**)*/ provider_options_values,
-                                                UIntPtr /*(size_t)*/num_keys);
+            IntPtr /* (OrtEnv*) */ environment,
+            IntPtr /*(char*)*/ provider_type,
+            IntPtr /*(OrtMemoryInfo*)*/ mem_info,
+            IntPtr /*(OrtArenaCfg*)*/ arena_cfg,
+            IntPtr /*(char**)*/ provider_options_keys,
+            IntPtr /*(char**)*/ provider_options_values,
+            UIntPtr /*(size_t)*/ num_keys);
         public static DCreateAndRegisterAllocatorV2 OrtCreateAndRegisterAllocatorV2;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(ONNStatus*)*/ DOrtRunAsync(
-                                IntPtr /*(OrtSession*)*/ session,
-                                IntPtr /*(OrtSessionRunOptions*)*/ runOptions,  // can be null to use the default options
-                                IntPtr[] /*(char**)*/ inputNames,
-                                IntPtr[] /*(OrtValue*[])*/ inputValues,
-                                UIntPtr /*(size_t)*/ inputCount,
-                                IntPtr[] /*(char**)*/ outputNames,
-                                UIntPtr /*(size_t)*/ outputCount,
-                                IntPtr[] /*(OrtValue*[])*/ outputValues,
-                                IntPtr /*(void (*RunAsyncCallbackFn)(void* user_data, OrtValue** outputs, size_t num_outputs, OrtStatusPtr status))*/ callback,  // callback function
-                                IntPtr /*(void*)*/ user_data
-                                );
+            IntPtr /*(OrtSession*)*/ session,
+            IntPtr /*(OrtSessionRunOptions*)*/ runOptions, // can be null to use the default options
+            IntPtr[] /*(char**)*/ inputNames,
+            IntPtr[] /*(OrtValue*[])*/ inputValues,
+            UIntPtr /*(size_t)*/ inputCount,
+            IntPtr[] /*(char**)*/ outputNames,
+            UIntPtr /*(size_t)*/ outputCount,
+            IntPtr[] /*(OrtValue*[])*/ outputValues,
+            IntPtr /*(void (*RunAsyncCallbackFn)(void* user_data, OrtValue** outputs, size_t num_outputs, OrtStatusPtr status))*/ callback, // callback function
+            IntPtr /*(void*)*/ user_data);
         public static DOrtRunAsync OrtRunAsync;
 
-        #endregion InferenceSession API
+#endregion InferenceSession API
 
-        #region SessionOptions API
+#region SessionOptions API
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtCreateSessionOptions(out IntPtr /*(OrtSessionOptions**)*/ sessionOptions);
         public static DOrtCreateSessionOptions OrtCreateSessionOptions;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate void DOrtReleaseSessionOptions(IntPtr /*(OrtSessionOptions*)*/session);
+        public delegate void DOrtReleaseSessionOptions(IntPtr /*(OrtSessionOptions*)*/ session);
         public static DOrtReleaseSessionOptions OrtReleaseSessionOptions;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
@@ -964,7 +961,7 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtSetSessionExecutionMode(IntPtr /*(OrtSessionOptions*)*/ options,
-        ExecutionMode execution_mode);
+                                                                            ExecutionMode execution_mode);
         public static DOrtSetSessionExecutionMode OrtSetSessionExecutionMode;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
@@ -996,7 +993,7 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         public static DOrtDisableCpuMemArena OrtDisableCpuMemArena;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /*(OrtStatus*)*/ DOrtSetSessionLogId(IntPtr /* OrtSessionOptions* */ options, byte[] /* const char* */logId);
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtSetSessionLogId(IntPtr /* OrtSessionOptions* */ options, byte[] /* const char* */ logId);
         public static DOrtSetSessionLogId OrtSetSessionLogId;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
@@ -1027,7 +1024,7 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         /// <param name="configValue">Config value</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtAddSessionConfigEntry(IntPtr /* OrtSessionOptions* */ options,
-                                                                          byte[] /* const char* */configKey,
+                                                                          byte[] /* const char* */ configKey,
                                                                           byte[] /* const char* */ configValue);
         public static DOrtAddSessionConfigEntry OrtAddSessionConfigEntry;
 
@@ -1090,9 +1087,9 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         /// <param name="options">Native OrtSessionOptions instance</param>
         /// <param name="trtProviderOptions">Native OrtTensorRTProviderOptions instance</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /*(OrtStatus*)*/DSessionOptionsAppendExecutionProvider_TensorRT(
-                                               IntPtr /*(OrtSessionOptions*)*/ options,
-                                               IntPtr /*(const OrtTensorRTProviderOptions*)*/ trtProviderOptions);
+        public delegate IntPtr /*(OrtStatus*)*/ DSessionOptionsAppendExecutionProvider_TensorRT(
+            IntPtr /*(OrtSessionOptions*)*/ options,
+            IntPtr /*(const OrtTensorRTProviderOptions*)*/ trtProviderOptions);
 
         public static DSessionOptionsAppendExecutionProvider_TensorRT SessionOptionsAppendExecutionProvider_TensorRT;
 
@@ -1102,9 +1099,9 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         /// <param name="options">Native OrtSessionOptions instance</param>
         /// <param name="trtProviderOptions">Native OrtTensorRTProviderOptionsV2 instance</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /*(OrtStatus*)*/DSessionOptionsAppendExecutionProvider_TensorRT_V2(
-                                               IntPtr /*(OrtSessionOptions*)*/ options,
-                                               IntPtr /*(const OrtTensorRTProviderOptionsV2*)*/ trtProviderOptions);
+        public delegate IntPtr /*(OrtStatus*)*/ DSessionOptionsAppendExecutionProvider_TensorRT_V2(
+            IntPtr /*(OrtSessionOptions*)*/ options,
+            IntPtr /*(const OrtTensorRTProviderOptionsV2*)*/ trtProviderOptions);
 
         public static DSessionOptionsAppendExecutionProvider_TensorRT_V2 SessionOptionsAppendExecutionProvider_TensorRT_V2;
 
@@ -1114,9 +1111,9 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         /// <param name="options">Native OrtSessionOptions instance</param>
         /// <param name="cudaProviderOptions">Native OrtCUDAProviderOptions instance</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /*(OrtStatus*)*/DSessionOptionsAppendExecutionProvider_CUDA(
-                                               IntPtr /*(OrtSessionOptions*)*/ options,
-                                               IntPtr /*(const OrtCUDAProviderOptions*)*/ cudaProviderOptions);
+        public delegate IntPtr /*(OrtStatus*)*/ DSessionOptionsAppendExecutionProvider_CUDA(
+            IntPtr /*(OrtSessionOptions*)*/ options,
+            IntPtr /*(const OrtCUDAProviderOptions*)*/ cudaProviderOptions);
 
         public static DSessionOptionsAppendExecutionProvider_CUDA SessionOptionsAppendExecutionProvider_CUDA;
 
@@ -1126,9 +1123,9 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         /// <param name="options">Native OrtSessionOptions instance</param>
         /// <param name="cudaProviderOptions">Native OrtCUDAProviderOptionsV2 instance</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /*(OrtStatus*)*/DSessionOptionsAppendExecutionProvider_CUDA_V2(
-                                               IntPtr /*(OrtSessionOptions*)*/ options,
-                                               IntPtr /*(const OrtCUDAProviderOptionsV2*)*/ cudaProviderOptions);
+        public delegate IntPtr /*(OrtStatus*)*/ DSessionOptionsAppendExecutionProvider_CUDA_V2(
+            IntPtr /*(OrtSessionOptions*)*/ options,
+            IntPtr /*(const OrtCUDAProviderOptionsV2*)*/ cudaProviderOptions);
 
         public static DSessionOptionsAppendExecutionProvider_CUDA_V2 SessionOptionsAppendExecutionProvider_CUDA_V2;
 
@@ -1138,9 +1135,9 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         /// <param name="options">Native OrtSessionOptions instance</param>
         /// <param name="rocmProviderOptions">Native OrtROCMProviderOptions instance</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /*(OrtStatus*)*/DSessionOptionsAppendExecutionProvider_ROCM(
-                                               IntPtr /*(OrtSessionOptions*)*/ options,
-                                               IntPtr /*(const OrtROCMProviderOptions*)*/ rocmProviderOptions);
+        public delegate IntPtr /*(OrtStatus*)*/ DSessionOptionsAppendExecutionProvider_ROCM(
+            IntPtr /*(OrtSessionOptions*)*/ options,
+            IntPtr /*(const OrtROCMProviderOptions*)*/ rocmProviderOptions);
 
         public static DSessionOptionsAppendExecutionProvider_ROCM SessionOptionsAppendExecutionProvider_ROCM;
 
@@ -1151,9 +1148,9 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         /// <param name="dimDenotation">Dimension denotation</param>
         /// <param name="dimValue">Dimension value</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /*(OrtStatus*)*/DOrtAddFreeDimensionOverride(IntPtr /*(OrtSessionOptions*)*/ options,
-                                                                            byte[] /*(const char*)*/ dimDenotation,
-                                                                            long dimValue);
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtAddFreeDimensionOverride(IntPtr /*(OrtSessionOptions*)*/ options,
+                                                                             byte[] /*(const char*)*/ dimDenotation,
+                                                                             long dimValue);
 
         public static DOrtAddFreeDimensionOverride OrtAddFreeDimensionOverride;
 
@@ -1164,9 +1161,9 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         /// <param name="dimName">Dimension name</param>
         /// <param name="dimValue">Dimension value</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /*(OrtStatus*)*/DOrtAddFreeDimensionOverrideByName(IntPtr /*(OrtSessionOptions*)*/ options,
-                                                                                  byte[] /*(const char*)*/ dimName,
-                                                                                  long dimValue);
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtAddFreeDimensionOverrideByName(IntPtr /*(OrtSessionOptions*)*/ options,
+                                                                                   byte[] /*(const char*)*/ dimName,
+                                                                                   long dimValue);
 
         public static DOrtAddFreeDimensionOverrideByName OrtAddFreeDimensionOverrideByName;
 
@@ -1177,9 +1174,9 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         /// <param name="libraryPath">Library path</param>
         /// <param name="libraryHandle">(out) Native library handle</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /*(OrtStatus*)*/DOrtRegisterCustomOpsLibrary(IntPtr /*(OrtSessionOptions*) */ options,
-                                                                            byte[] /*(const char*)*/ libraryPath,
-                                                                            out IntPtr /*(void**)*/ libraryHandle);
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtRegisterCustomOpsLibrary(IntPtr /*(OrtSessionOptions*) */ options,
+                                                                             byte[] /*(const char*)*/ libraryPath,
+                                                                             out IntPtr /*(void**)*/ libraryHandle);
 
         public static DOrtRegisterCustomOpsLibrary OrtRegisterCustomOpsLibrary;
 
@@ -1189,8 +1186,8 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         /// <param name="options">Native SessionOptions instance</param>
         /// <param name="libraryPath">Library path</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /*(OrtStatus*)*/DOrtRegisterCustomOpsLibrary_V2(IntPtr /*(OrtSessionOptions*) */ options,
-                                                                               byte[] /*(const ORTCHAR_T*)*/ libraryPath);
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtRegisterCustomOpsLibrary_V2(IntPtr /*(OrtSessionOptions*) */ options,
+                                                                                byte[] /*(const ORTCHAR_T*)*/ libraryPath);
 
         public static DOrtRegisterCustomOpsLibrary_V2 OrtRegisterCustomOpsLibrary_V2;
 
@@ -1201,9 +1198,9 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         /// <param name="name">Name of the initializer</param>
         /// <param name="ortValue">Native OrtValue instnce</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /*(OrtStatus*)*/DOrtAddInitializer(IntPtr /*(OrtSessionOptions*)*/ options,
-                                                                  byte[] /*(const char*)*/ name,
-                                                                  IntPtr /*(OrtValue*)*/ ortValue);
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtAddInitializer(IntPtr /*(OrtSessionOptions*)*/ options,
+                                                                   byte[] /*(const char*)*/ name,
+                                                                   IntPtr /*(OrtValue*)*/ ortValue);
 
         public static DOrtAddInitializer OrtAddInitializer;
 
@@ -1220,25 +1217,25 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         /// <param name="providerOptionsValues">Configuration values to add</param>
         /// <param name="numKeys">Number of configuration keys</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /*(OrtStatus*)*/DSessionOptionsAppendExecutionProvider(
-                                               IntPtr /*(OrtSessionOptions*)*/ options,
-                                               byte[] /*(const char*)*/ providerName,
-                                               IntPtr[] /*(const char* const *)*/ providerOptionsKeys,
-                                               IntPtr[] /*(const char* const *)*/ providerOptionsValues,
-                                               UIntPtr /*(size_t)*/ numKeys);
+        public delegate IntPtr /*(OrtStatus*)*/ DSessionOptionsAppendExecutionProvider(
+            IntPtr /*(OrtSessionOptions*)*/ options,
+            byte[] /*(const char*)*/ providerName,
+            IntPtr[] /*(const char* const *)*/ providerOptionsKeys,
+            IntPtr[] /*(const char* const *)*/ providerOptionsValues,
+            UIntPtr /*(size_t)*/ numKeys);
 
         public static DSessionOptionsAppendExecutionProvider SessionOptionsAppendExecutionProvider;
 
-        #endregion
+#endregion
 
-        #region RunOptions API
+#region RunOptions API
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtCreateRunOptions(out IntPtr /* OrtRunOptions** */ runOptions);
         public static DOrtCreateRunOptions OrtCreateRunOptions;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate void DOrtReleaseRunOptions(IntPtr /*(OrtRunOptions*)*/options);
+        public delegate void DOrtReleaseRunOptions(IntPtr /*(OrtRunOptions*)*/ options);
         public static DOrtReleaseRunOptions OrtReleaseRunOptions;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
@@ -1259,11 +1256,11 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtRunOptionsGetRunLogSeverityLevel(IntPtr /* OrtRunOptions* */ options,
-            out OrtLoggingLevel severityLevel);
+                                                                                     out OrtLoggingLevel severityLevel);
         public static DOrtRunOptionsGetRunLogSeverityLevel OrtRunOptionsGetRunLogSeverityLevel;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /*(OrtStatus*)*/ DOrtRunOptionsGetRunTag(IntPtr /* const OrtRunOptions* */options, out IntPtr /* const char** */ runtag);
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtRunOptionsGetRunTag(IntPtr /* const OrtRunOptions* */ options, out IntPtr /* const char** */ runtag);
         public static DOrtRunOptionsGetRunTag OrtRunOptionsGetRunTag;
 
         // Set a flag so that any running OrtRun* calls that are using this instance of OrtRunOptions
@@ -1276,7 +1273,6 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         public delegate IntPtr /*(OrtStatus*)*/ DOrtRunOptionsUnsetTerminate(IntPtr /* OrtRunOptions* */ options);
         public static DOrtRunOptionsUnsetTerminate OrtRunOptionsUnsetTerminate;
 
-
         /// <summary>
         /// Add run config entry
         /// </summary>
@@ -1285,13 +1281,13 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         /// <param name="configValue">Config value</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtAddRunConfigEntry(IntPtr /* OrtRunOptions* */ options,
-                                                                      byte[] /* const char* */configKey,
+                                                                      byte[] /* const char* */ configKey,
                                                                       byte[] /* const char* */ configValue);
         public static DOrtAddRunConfigEntry OrtAddRunConfigEntry;
 
-        #endregion
+#endregion
 
-        #region ThreadingOptions API
+#region ThreadingOptions API
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtCreateThreadingOptions(out IntPtr /* OrtCreateThreadingOptions** */ threadingOptions);
@@ -1316,27 +1312,26 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtThreadingOptionsSetGlobalSpinControl(IntPtr /* OrtThreadingOptions* */ threadingOptions, int allowSpinning);
         public static DOrtThreadingOptionsSetGlobalSpinControl OrtThreadingOptionsSetGlobalSpinControl;
-        #endregion
+#endregion
 
-        #region Allocator/MemoryInfo API
+#region Allocator / MemoryInfo API
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /* (OrtStatus*)*/ DOrtCreateMemoryInfo(
-                                                            byte[] /*(const char*) */name,
-                                                            OrtAllocatorType allocatorType,
-                                                            int identifier,
-                                                            OrtMemType memType,
-                                                            out IntPtr /*(OrtMemoryInfo*)*/ allocatorInfo    // memory ownership transfered to caller
-                                                       );
+            byte[] /*(const char*) */ name,
+            OrtAllocatorType allocatorType,
+            int identifier,
+            OrtMemType memType,
+            out IntPtr /*(OrtMemoryInfo*)*/ allocatorInfo // memory ownership transfered to caller
+        );
 
         public static DOrtCreateMemoryInfo OrtCreateMemoryInfo;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /* (OrtStatus*)*/ DOrtCreateCpuMemoryInfo(
-                                                            OrtAllocatorType allocatorType,
-                                                            OrtMemType memoryType,
-                                                            out IntPtr /*(OrtMemoryInfo*)*/ allocatorInfo
-                                                        );
+            OrtAllocatorType allocatorType,
+            OrtMemType memoryType,
+            out IntPtr /*(OrtMemoryInfo*)*/ allocatorInfo);
 
         public static DOrtCreateCpuMemoryInfo OrtCreateCpuMemoryInfo;
 
@@ -1347,15 +1342,15 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtCompareMemoryInfo(
-                                               IntPtr /*(const OrtMemoryInfo*)*/ info1,
-                                               IntPtr /*(const OrtMemoryInfo*)*/ info2,
-                                               out int /*(int* out)*/ result);
+            IntPtr /*(const OrtMemoryInfo*)*/ info1,
+            IntPtr /*(const OrtMemoryInfo*)*/ info2,
+            out int /*(int* out)*/ result);
 
         public static DOrtCompareMemoryInfo OrtCompareMemoryInfo;
 
         /**
-        * Do not free the returned value
-        */
+         * Do not free the returned value
+         */
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtMemoryInfoGetName(IntPtr /*(const OrtMemoryInfo* ptr)*/ mem_info, out IntPtr /*(const char**)*/ name);
 
@@ -1368,26 +1363,25 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtMemoryInfoGetMemType(
-                                                IntPtr /*(const OrtMemoryInfo* ptr)*/ mem_info,
-                                                out OrtMemType /*(OrtMemType*)*/ mem_type);
+            IntPtr /*(const OrtMemoryInfo* ptr)*/ mem_info,
+            out OrtMemType /*(OrtMemType*)*/ mem_type);
 
         public static DOrtMemoryInfoGetMemType OrtMemoryInfoGetMemType;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtMemoryInfoGetType(
-                                                IntPtr /*(const OrtMemoryInfo* ptr)*/ mem_info,
-                                                out OrtAllocatorType /*(OrtAllocatorType*)*/ alloc_type
-                                                );
+            IntPtr /*(const OrtMemoryInfo* ptr)*/ mem_info,
+            out OrtAllocatorType /*(OrtAllocatorType*)*/ alloc_type);
 
         public static DOrtMemoryInfoGetType OrtMemoryInfoGetType;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /*(OrtStatus*)*/DOrtGetAllocatorWithDefaultOptions(out IntPtr /*(OrtAllocator**)*/ allocator);
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtGetAllocatorWithDefaultOptions(out IntPtr /*(OrtAllocator**)*/ allocator);
 
         public static DOrtGetAllocatorWithDefaultOptions OrtGetAllocatorWithDefaultOptions;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /*(OrtStatus*)*/DOrtAllocatorGetInfo(IntPtr /*(const OrtAllocator*)*/ ptr, out IntPtr /*(const struct OrtMemoryInfo**)*/info);
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtAllocatorGetInfo(IntPtr /*(const OrtAllocator*)*/ ptr, out IntPtr /*(const struct OrtMemoryInfo**)*/ info);
 
         public static DOrtAllocatorGetInfo OrtAllocatorGetInfo;
 
@@ -1402,8 +1396,8 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         /// <returns>Pointer to a native OrtStatus instance indicating success/failure of config creation</returns>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtCreateArenaCfg(UIntPtr /*(size_t)*/ maxMemory, int /*(int)*/ arenaExtendStrategy,
-                                                                  int /*(int)*/ initialChunkSizeBytes, int /*(int)*/ maxDeadBytesPerChunk,
-                                                                  out IntPtr /*(OrtArenaCfg**)*/ arenaCfg);
+                                                                   int /*(int)*/ initialChunkSizeBytes, int /*(int)*/ maxDeadBytesPerChunk,
+                                                                   out IntPtr /*(OrtArenaCfg**)*/ arenaCfg);
 
         public static DOrtCreateArenaCfg OrtCreateArenaCfg;
 
@@ -1457,9 +1451,9 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
 
         public static DOrtAllocatorFree OrtAllocatorFree;
 
-        #endregion Allocator/MemoryInfo API
+#endregion Allocator / MemoryInfo API
 
-        #region IoBinding API
+#region IoBinding API
 
         /// <summary>
         /// Create OrtIoBinding instance that is used to bind memory that is allocated
@@ -1634,7 +1628,7 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtCreateAndRegisterAllocator(IntPtr /*(OrtEnv*)*/ env,
                                                                                IntPtr /*(const OrtMemoryInfo*)*/ memInfo,
-                                                                               IntPtr/*(const OrtArenaCfg*)*/ arenaCfg);
+                                                                               IntPtr /*(const OrtArenaCfg*)*/ arenaCfg);
 
         public static DOrtCreateAndRegisterAllocator OrtCreateAndRegisterAllocator;
 
@@ -1644,13 +1638,13 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         /// <param name="projection">the source projected language</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtSetLanguageProjection(IntPtr /* (OrtEnv*) */ environment,
-            int projection);
+                                                                          int projection);
 
         public static DOrtSetLanguageProjection OrtSetLanguageProjection;
 
-        #endregion IoBinding API
+#endregion IoBinding API
 
-        #region ModelMetadata API
+#region ModelMetadata API
 
         /// <summary>
         /// Gets the ModelMetadata associated with an InferenceSession
@@ -1670,7 +1664,7 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         /// <param name="value">(output) producer name from the ModelMetadata instance</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /* (OrtStatus*) */ DOrtModelMetadataGetProducerName(IntPtr /* (const OrtModelMetadata*) */ modelMetadata,
-                                                                              IntPtr /* (OrtAllocator*) */ allocator, out IntPtr /* (char**) */ value);
+                                                                                   IntPtr /* (OrtAllocator*) */ allocator, out IntPtr /* (char**) */ value);
 
         public static DOrtModelMetadataGetProducerName OrtModelMetadataGetProducerName;
 
@@ -1682,7 +1676,7 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         /// <param name="value">(output) graph name from the ModelMetadata instance</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /* (OrtStatus*) */ DOrtModelMetadataGetGraphName(IntPtr /* (const OrtModelMetadata*) */ modelMetadata,
-                                                                              IntPtr /* (OrtAllocator*) */ allocator, out IntPtr /* (char**) */ value);
+                                                                                IntPtr /* (OrtAllocator*) */ allocator, out IntPtr /* (char**) */ value);
 
         public static DOrtModelMetadataGetGraphName OrtModelMetadataGetGraphName;
 
@@ -1694,7 +1688,7 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         /// <param name="value">(output) domain from the ModelMetadata instance</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /* (OrtStatus*) */ DOrtModelMetadataGetDomain(IntPtr /* (const OrtModelMetadata*) */ modelMetadata,
-                                                                              IntPtr /* (OrtAllocator*) */ allocator, out IntPtr /* (char**) */ value);
+                                                                             IntPtr /* (OrtAllocator*) */ allocator, out IntPtr /* (char**) */ value);
 
         public static DOrtModelMetadataGetDomain OrtModelMetadataGetDomain;
 
@@ -1706,7 +1700,7 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         /// <param name="value">(output) description from the ModelMetadata instance</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /* (OrtStatus*) */ DOrtModelMetadataGetDescription(IntPtr /* (const OrtModelMetadata*) */ modelMetadata,
-                                                                              IntPtr /* (OrtAllocator*) */ allocator, out IntPtr /* (char**) */ value);
+                                                                                  IntPtr /* (OrtAllocator*) */ allocator, out IntPtr /* (char**) */ value);
 
         public static DOrtModelMetadataGetDescription OrtModelMetadataGetDescription;
 
@@ -1718,7 +1712,7 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         /// <param name="value">(output) graph description from the ModelMetadata instance</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /* (OrtStatus*) */ DOrtModelMetadataGetGraphDescription(IntPtr /* (const OrtModelMetadata*) */ modelMetadata,
-                                                                              IntPtr /* (OrtAllocator*) */ allocator, out IntPtr /* (char**) */ value);
+                                                                                       IntPtr /* (OrtAllocator*) */ allocator, out IntPtr /* (char**) */ value);
 
         public static DOrtModelMetadataGetGraphDescription OrtModelMetadataGetGraphDescription;
 
@@ -1742,7 +1736,7 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         /// <param name="numKeys">(output) number of keys in the custom metadata map</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /* (OrtStatus*) */ DOrtModelMetadataGetCustomMetadataMapKeys(IntPtr /* (const OrtModelMetadata*) */ modelMetadata,
-            IntPtr /* (OrtAllocator*) */ allocator, out IntPtr /* (char***) */ keys, out long /* (int64_t*) */ numKeys);
+                                                                                            IntPtr /* (OrtAllocator*) */ allocator, out IntPtr /* (char***) */ keys, out long /* (int64_t*) */ numKeys);
 
         public static DOrtModelMetadataGetCustomMetadataMapKeys OrtModelMetadataGetCustomMetadataMapKeys;
 
@@ -1755,7 +1749,7 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         /// <param name="value">(output) value for the key in the custom metadata map</param>
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /* (OrtStatus*) */ DOrtModelMetadataLookupCustomMetadataMap(IntPtr /* (const OrtModelMetadata*) */ modelMetadata,
-            IntPtr /* (OrtAllocator*) */ allocator, IntPtr /* (const char*) */ key, out IntPtr /* (char**) */ value);
+                                                                                           IntPtr /* (OrtAllocator*) */ allocator, IntPtr /* (const char*) */ key, out IntPtr /* (char**) */ value);
 
         public static DOrtModelMetadataLookupCustomMetadataMap OrtModelMetadataLookupCustomMetadataMap;
 
@@ -1768,9 +1762,9 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
 
         public static DOrtReleaseModelMetadata OrtReleaseModelMetadata;
 
-        #endregion ModelMetadata API
+#endregion ModelMetadata API
 
-        #region OrtValue API
+#region OrtValue API
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtHasValue(IntPtr /*(OrtValue*)*/ value, out IntPtr /*(int*)*/ hasValue);
@@ -1779,9 +1773,9 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtGetValue(IntPtr /*(OrtValue*)*/ value,
-                                                                 int index,
-                                                                 IntPtr /*(OrtAllocator*)*/ allocator,
-                                                                 out IntPtr /*(OrtValue**)*/ outputValue);
+                                                             int index,
+                                                             IntPtr /*(OrtAllocator*)*/ allocator,
+                                                             out IntPtr /*(OrtValue**)*/ outputValue);
 
         public static DOrtGetValue OrtGetValue;
 
@@ -1801,8 +1795,8 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         public static DOrtGetValueCount OrtGetValueCount;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr/*(OrtStatus*)*/ DOrtCreateValue(IntPtr[] /* const OrtValue* const* in */ values,
-            UIntPtr /* size_t */ num_values, IntPtr /* (OnnxValueType */ onnxValueType, out IntPtr /* OrtValue** */ ortValue);
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtCreateValue(IntPtr[] /* const OrtValue* const* in */ values,
+                                                                UIntPtr /* size_t */ num_values, IntPtr /* (OnnxValueType */ onnxValueType, out IntPtr /* OrtValue** */ ortValue);
 
         public static DOrtCreateValue OrtCreateValue;
 
@@ -1813,23 +1807,23 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtCreateTensorAsOrtValue(
-                        IntPtr /*_Inout_ OrtAllocator* */ allocator,
-                        long[] /*_In_ const int64_t* */ shape,
-                        UIntPtr /*size_t*/ shape_len,
-                        Tensors.TensorElementType type,
-                        out IntPtr /* OrtValue** */ outputValue);
+            IntPtr /*_Inout_ OrtAllocator* */ allocator,
+            long[] /*_In_ const int64_t* */ shape,
+            UIntPtr /*size_t*/ shape_len,
+            Tensors.TensorElementType type,
+            out IntPtr /* OrtValue** */ outputValue);
 
         public static DOrtCreateTensorAsOrtValue OrtCreateTensorAsOrtValue;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /* OrtStatus */ DOrtCreateTensorWithDataAsOrtValue(
-                                                        IntPtr /* (const OrtMemoryInfo*) */ allocatorInfo,
-                                                        IntPtr /* (void*) */dataBufferHandle,
-                                                        UIntPtr dataLength,
-                                                        long[] shape,
-                                                        UIntPtr shapeLength,
-                                                        Tensors.TensorElementType type,
-                                                        out IntPtr /* OrtValue** */ outputValue);
+            IntPtr /* (const OrtMemoryInfo*) */ allocatorInfo,
+            IntPtr /* (void*) */ dataBufferHandle,
+            UIntPtr dataLength,
+            long[] shape,
+            UIntPtr shapeLength,
+            Tensors.TensorElementType type,
+            out IntPtr /* OrtValue** */ outputValue);
 
         public static DOrtCreateTensorWithDataAsOrtValue OrtCreateTensorWithDataAsOrtValue;
 
@@ -1854,60 +1848,67 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         /// \param len total data length, not including the trailing '\0' chars.
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtFillStringTensor(
-                                                        IntPtr /* OrtValue */ value,
-                                                        IntPtr[] /* const char* const* */s,
-                                                        UIntPtr /* size_t */ s_len);
+            IntPtr /* OrtValue */ value,
+            IntPtr[] /* const char* const* */ s,
+            UIntPtr /* size_t */ s_len);
 
         public static DOrtFillStringTensor OrtFillStringTensor;
 
+        /// \param value A tensor created from OrtCreateTensor... function.
+        /// \param index The index of the entry in the tensor to resize. <summary>
+        /// \param length_in_bytes Length to resize the string to.
+        /// \param buffer The resized buffer.
+        [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtGetResizedStringTensorElementBuffer(
-                IntPtr /* OrtValue */ value,
-                UIntPtr /* size_t */ index,
-                UIntPtr /* size_t */ length_in_bytes,
-                out IntPtr /* char** */ buffer
-            );
+            IntPtr /* OrtValue */ value,
+            UIntPtr /* size_t */ index,
+            UIntPtr /* size_t */ length_in_bytes,
+            out IntPtr /* char** */ buffer);
 
         public static DOrtGetResizedStringTensorElementBuffer OrtGetResizedStringTensorElementBuffer;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtGetStringTensorContent(
-                                                        IntPtr /*(OrtValue*)*/ value,
-                                                        byte[] /*(void*)*/  dst_buffer,
-                                                        UIntPtr dst_buffer_len,
-                                                        UIntPtr[] offsets,
-                                                        UIntPtr offsets_len);
+            IntPtr /*(OrtValue*)*/ value,
+            byte[] /*(void*)*/ dst_buffer,
+            UIntPtr dst_buffer_len,
+            UIntPtr[] offsets,
+            UIntPtr offsets_len);
 
         public static DOrtGetStringTensorContent OrtGetStringTensorContent;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtGetStringTensorDataLength(IntPtr /*(OrtValue*)*/ value,
-                                                        out UIntPtr /*(size_t*)*/ len);
+                                                                              out UIntPtr /*(size_t*)*/ len);
 
         public static DOrtGetStringTensorDataLength OrtGetStringTensorDataLength;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtGetStringTensorElementLength(IntPtr /*(OrtValue*)*/ value,
-                                                        UIntPtr /*(size_t)*/ index,
-                                                        out UIntPtr /*(size_t*)*/ len);
+                                                                                 UIntPtr /*(size_t)*/ index,
+                                                                                 out UIntPtr /*(size_t*)*/ len);
 
         public static DOrtGetStringTensorElementLength OrtGetStringTensorElementLength;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtGetStringTensorElement(IntPtr /*(OrtValue*)*/ value,
-                                                UIntPtr /*(size_t)*/ bufferLength,
-                                                UIntPtr /*(size_t)*/ elementIndex,
-                                                byte[] buffer);
+                                                                           UIntPtr /*(size_t)*/ bufferLength,
+                                                                           UIntPtr /*(size_t)*/ elementIndex,
+                                                                           byte[] buffer);
 
         public static DOrtGetStringTensorElement OrtGetStringTensorElement;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /*(OrtStatus*)*/
-                                DOrtCastTypeInfoToTensorInfo(IntPtr /*(struct OrtTypeInfo*)*/ typeInfo, out IntPtr /*(const struct OrtTensorTypeAndShapeInfo**)*/ typeAndShapeInfo);
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtCastTypeInfoToTensorInfo(
+            IntPtr /*(struct OrtTypeInfo*)*/ typeInfo,
+            out IntPtr /*(const struct OrtTensorTypeAndShapeInfo**)*/ typeAndShapeInfo);
 
         public static DOrtCastTypeInfoToTensorInfo OrtCastTypeInfoToTensorInfo;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /*(OrtStatus*)*/ DOrtGetTensorTypeAndShape(IntPtr /*(OrtValue*)*/ value, out IntPtr /*(struct OrtTensorTypeAndShapeInfo*)*/ typeAndShapeInfo);
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtGetTensorTypeAndShape(
+            IntPtr /*(OrtValue*)*/ value,
+            out IntPtr /*(struct OrtTensorTypeAndShapeInfo*)*/ typeAndShapeInfo);
 
         public static DOrtGetTensorTypeAndShape OrtGetTensorTypeAndShape;
 
@@ -1917,39 +1918,43 @@ out IntPtr /* char** */ buffer
         public static DOrtReleaseTensorTypeAndShapeInfo OrtReleaseTensorTypeAndShapeInfo;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /*(OrtStatus*)*/ DOrtGetTensorElementType(IntPtr /*(const struct OrtTensorTypeAndShapeInfo*)*/ typeAndShapeInfo, out IntPtr /*(TensorElementType*)*/ output);
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtGetTensorElementType(
+            IntPtr /*(const struct OrtTensorTypeAndShapeInfo*)*/ typeAndShapeInfo,
+            out IntPtr /*(TensorElementType*)*/ output);
 
         public static DOrtGetTensorElementType OrtGetTensorElementType;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /*(OrtStatus*)*/ DOrtGetDimensionsCount(IntPtr /*(const struct OrtTensorTypeAndShapeInfo*)*/ typeAndShapeInfo, out UIntPtr output);
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtGetDimensionsCount(
+            IntPtr /*(const struct OrtTensorTypeAndShapeInfo*)*/ typeAndShapeInfo,
+            out UIntPtr output);
 
         public static DOrtGetDimensionsCount OrtGetDimensionsCount;
 
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtGetDimensions(
-                            IntPtr /*(const struct OrtTensorTypeAndShapeInfo*)*/ typeAndShapeInfo,
-                            long[] dim_values,
-                            UIntPtr dim_values_length);
+            IntPtr /*(const struct OrtTensorTypeAndShapeInfo*)*/ typeAndShapeInfo,
+            long[] dim_values,
+            UIntPtr dim_values_length);
 
         public static DOrtGetDimensions OrtGetDimensions;
 
         /**
-        * Get the symbolic dimension names for dimensions with a value of -1.
-        * Order and number of entries is the same as values returned by GetDimensions.
-        * The name may be empty for an unnamed symbolic dimension.
-        * e.g.
-        * If OrtGetDimensions returns [-1, -1, 2], OrtGetSymbolicDimensions would return an array with 3 entries.
-        * If the values returned were ['batch', '', ''] it would indicate that
-        *  - the first dimension was a named symbolic dimension (-1 dim value and name in symbolic dimensions),
-        *  - the second dimension was an unnamed symbolic dimension (-1 dim value and empty string),
-        *  - the entry for the third dimension should be ignored as it is not a symbolic dimension (dim value >= 0).
-        */
+         * Get the symbolic dimension names for dimensions with a value of -1.
+         * Order and number of entries is the same as values returned by GetDimensions.
+         * The name may be empty for an unnamed symbolic dimension.
+         * e.g.
+         * If OrtGetDimensions returns [-1, -1, 2], OrtGetSymbolicDimensions would return an array with 3 entries.
+         * If the values returned were ['batch', '', ''] it would indicate that
+         *  - the first dimension was a named symbolic dimension (-1 dim value and name in symbolic dimensions),
+         *  - the second dimension was an unnamed symbolic dimension (-1 dim value and empty string),
+         *  - the entry for the third dimension should be ignored as it is not a symbolic dimension (dim value >= 0).
+         */
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtGetSymbolicDimensions(
-                    IntPtr /*(const struct OrtTensorTypeAndShapeInfo*)*/ typeAndShapeInfo,
-                    IntPtr[] dim_params, /* const char* values, converted to string by caller */
-                    UIntPtr dim_params_length);
+            IntPtr /*(const struct OrtTensorTypeAndShapeInfo*)*/ typeAndShapeInfo,
+            IntPtr[] dim_params, /* const char* values, converted to string by caller */
+            UIntPtr dim_params_length);
 
         public static DOrtGetSymbolicDimensions OrtGetSymbolicDimensions;
 
@@ -1964,15 +1969,15 @@ out IntPtr /* char** */ buffer
          */
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtGetTensorShapeElementCount(IntPtr /*(const struct OrtTensorTypeAndShapeInfo*)*/ typeAndShapeInfo,
-            out UIntPtr /* size_t */ output);
+                                                                               out UIntPtr /* size_t */ output);
 
         public static DOrtGetTensorShapeElementCount OrtGetTensorShapeElementCount;
 
-        [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         // The out ortMemoryInfo must not be destroyed/deallocated. The pointer points to an object owned by
         // the contained Tensor/SparseTensor.
+        [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtGetTensorMemoryInfo(IntPtr /* const OrtValue* */ ortValue,
-            out IntPtr /* const OrtMemoryInfo** */ ortMemoryInfo);
+                                                                        out IntPtr /* const OrtMemoryInfo** */ ortMemoryInfo);
 
         public static DOrtGetTensorMemoryInfo OrtGetTensorMemoryInfo;
 
@@ -1982,10 +1987,12 @@ out IntPtr /* char** */ buffer
 
         public static DCastTypeInfoToMapTypeInfo OrtCastTypeInfoToMapTypeInfo;
 
+        [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DGetMapKeyType(IntPtr /*const OrtMapTypeInfo* */ mapTypeInfo, out IntPtr /*(TensorElementType*)*/ tensorElementType);
 
         public static DGetMapKeyType OrtGetMapKeyType;
 
+        [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DGetMapValueType(IntPtr /* const OrtMapTypeInfo* */ map_type_info, out IntPtr /* OrtTypeInfo** */ type_info);
 
         public static DGetMapValueType OrtGetMapValueType;
@@ -1996,13 +2003,14 @@ out IntPtr /* char** */ buffer
 
         public static DCastTypeInfoToSequenceTypeInfo OrtCastTypeInfoToSequenceTypeInfo;
 
+        [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DGetSequenceElementType(IntPtr /* const OrtSequenceTypeInfo* */ sequenceTypeInfo, out IntPtr /* OrtTypeInfo** */ elementTypeInfo);
 
         public static DGetSequenceElementType OrtGetSequenceElementType;
 
         // OptionalTypeInfo
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
-        public delegate IntPtr /*(OrtStatus*)*/  DOrtCastTypeInfoToOptionalTypeInfo(IntPtr /*(struct OrtTypeInfo*)*/ typeInfo, out IntPtr /* const struct OrtOptionalTypeInfo** */  optionalTypeInfo);
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtCastTypeInfoToOptionalTypeInfo(IntPtr /*(struct OrtTypeInfo*)*/ typeInfo, out IntPtr /* const struct OrtOptionalTypeInfo** */ optionalTypeInfo);
 
         public static DOrtCastTypeInfoToOptionalTypeInfo OrtCastTypeInfoToOptionalTypeInfo;
 
@@ -2016,10 +2024,9 @@ out IntPtr /* char** */ buffer
 
         public static DOrtReleaseValue OrtReleaseValue;
 
-        #endregion
-
+#endregion
 
-        #region Misc API
+#region Misc API
 
         /// <summary>
         /// Queries all the execution providers supported in the native onnxruntime shared library
@@ -2059,8 +2066,8 @@ out IntPtr /* char** */ buffer
 
         public static DOrtReleasePrepackedWeightsContainer OrtReleasePrepackedWeightsContainer;
 
-        #endregion
-    } //class NativeMethods
+#endregion
+    } // class NativeMethods
 
     // onnxruntime-extensions helpers to make usage simpler.
     // The onnxruntime-extensions nuget package containing the native library can be optionally added to the app.
@@ -2081,7 +2088,5 @@ internal static class OrtExtensionsNativeMethods
                    CallingConvention = CallingConvention.Winapi)]
         public static extern IntPtr /* OrtStatus* */ RegisterCustomOps(IntPtr /* OrtSessionOptions* */ sessionOptions,
                                                                        ref OrtApiBase /* OrtApiBase* */ ortApiBase);
-
-
     }
-} //namespace
+} // namespace
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Training/CheckpointState.shared.cs b/csharp/src/Microsoft.ML.OnnxRuntime/Training/CheckpointState.shared.cs
index 659c6303702ac..6889112acb385 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/Training/CheckpointState.shared.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/Training/CheckpointState.shared.cs
@@ -40,20 +40,16 @@ internal enum PropertyType : long
             String = 2
         }
 
-        private void AddPropertyImpl<T>(string propertyName, PropertyType propertyType, T propertyValue)
+        private void AddPropertyImpl<T>(string propertyName, PropertyType propertyType, T propertyValue) where T : unmanaged
         {
             var propertyNameUtf8 = NativeOnnxValueHelper.StringToZeroTerminatedUtf8(propertyName);
-            T[] value = new T[1];
-            value[0] = propertyValue;
-            Memory<T> memory = value;
-            using (var memHandle = memory.Pin())
+            T[] value = { propertyValue };
+            unsafe
             {
-                IntPtr memPtr;
-                unsafe
+                fixed (T* memPtr = value)
                 {
-                    memPtr = (IntPtr)memHandle.Pointer;
+                    NativeApiStatus.VerifySuccess(NativeTrainingMethods.OrtAddProperty(handle, propertyNameUtf8, propertyType, (IntPtr)memPtr));
                 }
-                NativeApiStatus.VerifySuccess(NativeTrainingMethods.OrtAddProperty(handle, propertyNameUtf8, propertyType, memPtr));
             }
         }
 
@@ -103,13 +99,13 @@ public static void SaveCheckpoint(CheckpointState state, string checkpointPath,
         }
 
         /// <summary>
-        /// Adds the given int property to the checkpoint state.
+        /// Adds or updates the given int property to/in the checkpoint state.
         ///
-        /// Runtime properties that are ints such as epoch, training step, and others can be added to the checkpoint
-        /// state by the user if they desire by calling this function with the appropriate property name and
-        /// value. The given property name must be unique to be able to successfully add the property.
+        /// Runtime properties such as epoch, training step, best score, and others can be added to the checkpoint
+        /// state by the user by calling this function with the corresponding property name and value.
+        /// The given property name must be unique to be able to successfully add the property.
         /// </summary>
-        /// <param name="propertyName">Unique name of the property being added.</param>
+        /// <param name="propertyName">Name of the property being added or updated.</param>
         /// <param name="propertyValue">Property value associated with the given name.</param>
         public void AddProperty(string propertyName, long propertyValue)
         {
@@ -117,13 +113,13 @@ public void AddProperty(string propertyName, long propertyValue)
         }
 
         /// <summary>
-        /// Adds the given float property to the checkpoint state.
+        /// Adds or updates the given float property to/in the checkpoint state.
         ///
-        /// Runtime properties that are floats such as loss, best score, and others can be added to the checkpoint
-        /// state by the user if they desire by calling this function with the appropriate property name and
-        /// value. The given property name must be unique to be able to successfully add the property.
+        /// Runtime properties such as epoch, training step, best score, and others can be added to the checkpoint
+        /// state by the user by calling this function with the corresponding property name and value.
+        /// The given property name must be unique to be able to successfully add the property.
         /// </summary>
-        /// <param name="propertyName">Unique name of the property being added.</param>
+        /// <param name="propertyName">Name of the property being added or updated.</param>
         /// <param name="propertyValue">Property value associated with the given name.</param>
         public void AddProperty(string propertyName, float propertyValue)
         {
@@ -131,28 +127,25 @@ public void AddProperty(string propertyName, float propertyValue)
         }
 
         /// <summary>
-        /// Adds the given string property to the checkpoint state.
+        /// Adds or updates the given string property to/in the checkpoint state.
         ///
-        /// Runtime properties that are strings such as parameter names, custom strings, and others can be added
-        /// to the checkpoint state by the user if they desire by calling this function with the appropriate property
-        /// name and value. The given property name must be unique to be able to successfully add the property.
+        /// Runtime properties such as epoch, training step, best score, and others can be added to the checkpoint
+        /// state by the user by calling this function with the corresponding property name and value.
+        /// The given property name must be unique to be able to successfully add the property.
         /// </summary>
-        /// <param name="propertyName">Unique name of the property being added.</param>
+        /// <param name="propertyName">Name of the property being added or updated.</param>
         /// <param name="propertyValue">Property value associated with the given name.</param>
         public void AddProperty(string propertyName, string propertyValue)
         {
             var propertyNameUtf8 = NativeOnnxValueHelper.StringToZeroTerminatedUtf8(propertyName);
             var propertyValueUtf8 = NativeOnnxValueHelper.StringToZeroTerminatedUtf8(propertyValue);
 
-            IntPtr unmanagedPointer = Marshal.AllocHGlobal(propertyValueUtf8.Length);
-            try
-            {
-                Marshal.Copy(propertyValueUtf8, 0, unmanagedPointer, propertyValueUtf8.Length);
-                NativeApiStatus.VerifySuccess(NativeTrainingMethods.OrtAddProperty(handle, propertyNameUtf8, PropertyType.String, unmanagedPointer));
-            }
-            finally
+            unsafe
             {
-                Marshal.FreeHGlobal(unmanagedPointer);
+                fixed (byte* p = propertyValueUtf8)
+                {
+                    NativeApiStatus.VerifySuccess(NativeTrainingMethods.OrtAddProperty(handle, propertyNameUtf8, PropertyType.String, (IntPtr)p));
+                }
             }
         }
 
@@ -162,34 +155,86 @@ public void AddProperty(string propertyName, string propertyValue)
         /// Gets the property value from an existing entry in the checkpoint state. The property must
         /// exist in the checkpoint state to be able to retrieve it successfully.
         /// </summary>
-        /// <param name="propertyName">Unique name of the property being retrieved.</param>
+        /// <param name="propertyName">Name of the property being retrieved.</param>
         /// <returns>Property value associated with the given property name.</returns>
         public object GetProperty(string propertyName)
         {
             var propertyNameUtf8 = NativeOnnxValueHelper.StringToZeroTerminatedUtf8(propertyName);
             var allocator = OrtAllocator.DefaultInstance;
             IntPtr propertyValue = IntPtr.Zero;
+
             NativeApiStatus.VerifySuccess(NativeTrainingMethods.OrtGetProperty(handle, propertyNameUtf8, allocator.Pointer, out PropertyType propertyType, out propertyValue));
 
-            if (propertyType == PropertyType.Int)
+            try
             {
-                var longPropertyValue = Marshal.ReadInt64(propertyValue);
-                allocator.FreeMemory(propertyValue);
-                return longPropertyValue;
+                if (propertyType == PropertyType.Int)
+                {
+                    Int64 value;
+                    unsafe
+                    {
+                        value = *(Int64*)propertyValue;
+                    }
+                    return value;
+                }
+                else if (propertyType == PropertyType.Float)
+                {
+                    float value;
+                    unsafe
+                    {
+                        value = *(float*)propertyValue;
+                    }
+                    return value;
+                }
+                else if (propertyType == PropertyType.String)
+                {
+                    return NativeOnnxValueHelper.StringFromNativeUtf8(propertyValue);
+                }
+
+                throw new ArgumentException("Expected the property type to be one of long, float or string. Unknown type retrieved " + propertyValue.ToString());
             }
-            else if (propertyType == PropertyType.Float)
+            finally
             {
-                float[] value = new float[1];
-                Marshal.Copy(propertyValue, value, 0, 1);
                 allocator.FreeMemory(propertyValue);
-                return value[0];
             }
-            else if (propertyType == PropertyType.String)
+        }
+
+        /// <summary>
+        /// Updates the data associated with the model parameter in the checkpoint state for the given parameter name.
+        ///
+        /// This function updates a model parameter in the checkpoint state with the given parameter data.
+        /// The training session must be already created with the checkpoint state that contains the parameter
+        /// being updated. The given parameter is copied over to the registered device for the training session.
+        /// The parameter must exist in the checkpoint state to be able to update it successfully.
+        /// </summary>
+        /// <param name="parameterName">Name of the parameter being updated.</param>
+        /// <param name="parameter">The parameter data that should replace the existing parameter data.</param>
+        public void UpdateParameter(string parameterName, OrtValue parameter)
+        {
+            if (parameter.OnnxType != OnnxValueType.ONNX_TYPE_TENSOR)
             {
-                return NativeOnnxValueHelper.StringFromNativeUtf8(propertyValue, allocator);
+                throw new ArgumentException("Incorrect buffer received. Expected a tensor parameter.");
             }
 
-            throw new ArgumentException("Expected the property type to be one of long, float or string. Unknown type retrieved " + propertyValue.ToString());
+            var parameterNameUtf8 = NativeOnnxValueHelper.StringToZeroTerminatedUtf8(parameterName);
+            NativeApiStatus.VerifySuccess(NativeTrainingMethods.OrtUpdateParameter(handle, parameterNameUtf8, parameter.Handle));
+        }
+
+        /// <summary>
+        /// Gets the data associated with the model parameter from the checkpoint state for the given parameter name.
+        ///
+        /// This function retrieves the model parameter data from the checkpoint state for the given parameter name.
+        /// The parameter is copied over to the provided OrtValue. The training session must be already created
+        /// with the checkpoint state that contains the parameter being retrieved.
+        /// The parameter must exist in the checkpoint state to be able to retrieve it successfully.
+        /// </summary>
+        /// <param name="parameterName">Name of the parameter being updated.</param>
+        /// <returns>The parameter data that is retrieved from the checkpoint state.</returns>
+        public OrtValue GetParameter(string parameterName)
+        {
+            var parameterNameUtf8 = NativeOnnxValueHelper.StringToZeroTerminatedUtf8(parameterName);
+            NativeApiStatus.VerifySuccess(NativeTrainingMethods.OrtGetParameter(handle, parameterNameUtf8, OrtAllocator.DefaultInstance.Pointer, out IntPtr parameterHandle));
+
+            return new OrtValue(parameterHandle);
         }
 
 #region SafeHandle
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Training/NativeTrainingMethods.shared.cs b/csharp/src/Microsoft.ML.OnnxRuntime/Training/NativeTrainingMethods.shared.cs
index 1868ff509bfc3..68a399f8b9671 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/Training/NativeTrainingMethods.shared.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/Training/NativeTrainingMethods.shared.cs
@@ -42,6 +42,9 @@ public struct OrtTrainingApi
             public IntPtr AddProperty;
             public IntPtr GetProperty;
             public IntPtr LoadCheckpointFromBuffer;
+            public IntPtr GetParameterTypeAndShape;
+            public IntPtr UpdateParameter;
+            public IntPtr GetParameter;
         }
 
         internal static class NativeTrainingMethods
@@ -97,6 +100,9 @@ static NativeTrainingMethods()
                     OrtGetEvalModelInputName = (DOrtGetEvalModelInputName)Marshal.GetDelegateForFunctionPointer(trainingApi_.TrainingSessionGetEvalModelInputName, typeof(DOrtGetEvalModelInputName));
                     OrtAddProperty = (DOrtAddProperty)Marshal.GetDelegateForFunctionPointer(trainingApi_.AddProperty, typeof(DOrtAddProperty));
                     OrtGetProperty = (DOrtGetProperty)Marshal.GetDelegateForFunctionPointer(trainingApi_.GetProperty, typeof(DOrtGetProperty));
+                    OrtGetParameterTypeAndShape = (DOrtGetParameterTypeAndShape)Marshal.GetDelegateForFunctionPointer(trainingApi_.GetParameterTypeAndShape, typeof(DOrtGetParameterTypeAndShape));
+                    OrtUpdateParameter = (DOrtUpdateParameter)Marshal.GetDelegateForFunctionPointer(trainingApi_.UpdateParameter, typeof(DOrtUpdateParameter));
+                    OrtGetParameter = (DOrtGetParameter)Marshal.GetDelegateForFunctionPointer(trainingApi_.GetParameter, typeof(DOrtGetParameter));
                 }
 
             }
@@ -359,6 +365,34 @@ out UIntPtr inputCount
 
             public static DOrtGetProperty OrtGetProperty;
 
+            [UnmanagedFunctionPointer(CallingConvention.Winapi)]
+            public delegate IntPtr /*(OrtStatus*)*/ DOrtGetParameterTypeAndShape(
+                                                    IntPtr /*(OrtCheckpointState*)*/ checkpointState,
+                                                    byte[] /*(const char*)*/ parameterName,
+                                                    out IntPtr /*(OrtTensorTypeAndShapeInfo**)*/ parameterTypeAndShape
+                                                    );
+
+            public static DOrtGetParameterTypeAndShape OrtGetParameterTypeAndShape;
+
+            [UnmanagedFunctionPointer(CallingConvention.Winapi)]
+            public delegate IntPtr /*(OrtStatus*)*/ DOrtUpdateParameter(
+                                                    IntPtr /*(OrtCheckpointState*)*/ checkpointState,
+                                                    byte[] /*(const char*)*/ parameterName,
+                                                    IntPtr /*(OrtValue*)*/ parameter
+                                                    );
+
+            public static DOrtUpdateParameter OrtUpdateParameter;
+
+            [UnmanagedFunctionPointer(CallingConvention.Winapi)]
+            public delegate IntPtr /*(OrtStatus*)*/ DOrtGetParameter(
+                                                    IntPtr /*(OrtCheckpointState*)*/ checkpointState,
+                                                    byte[] /*(const char*)*/ parameterName,
+                                                    IntPtr /*(OrtAllocator*)*/ allocator,
+                                                    out IntPtr /*(OrtValue**)*/ parameter
+                                                    );
+
+            public static DOrtGetParameter OrtGetParameter;
+
     #endregion TrainingSession API
 
             public static bool TrainingEnabled()
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Training/TrainingSession.shared.cs b/csharp/src/Microsoft.ML.OnnxRuntime/Training/TrainingSession.shared.cs
index 33993c2be135b..877677dcad57b 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/Training/TrainingSession.shared.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/Training/TrainingSession.shared.cs
@@ -358,13 +358,14 @@ public void EvalStep(
             IReadOnlyCollection<FixedBufferOnnxValue> inputValues,
             IReadOnlyCollection<FixedBufferOnnxValue> outputValues)
         {
-            if (!_evalOutputCount.Equals(outputValues.Count))
+            if (_evalOutputCount != (ulong)outputValues.Count())
             {
-                throw new ArgumentException($"Length of {nameof(outputValues)} ({outputValues.Count}) must match that of train model ({_trainOutputCount}).");
+                throw new ArgumentException($"Length of {nameof(outputValues)} ({outputValues.Count}) must match that of eval model ({_evalOutputCount}).");
             }
-            IntPtr[] inputValuesArray = GetOrtValuesHandles(inputValues, true);
+            const bool isInput = true;
+            IntPtr[] inputValuesArray = GetOrtValuesHandles(inputValues, isInput);
 
-            IntPtr[] outputValuesArray = GetOrtValuesHandles(outputValues, false); /* pointers to Pre-allocated OrtValue instances */
+            IntPtr[] outputValuesArray = GetOrtValuesHandles(outputValues, !isInput); /* pointers to Pre-allocated OrtValue instances */
             NativeApiStatus.VerifySuccess(NativeTrainingMethods.OrtEvalStep(_nativeHandle, options.Handle, (UIntPtr)inputValues.Count,
                 inputValuesArray, (UIntPtr)outputValues.Count, outputValuesArray));
         }
@@ -509,18 +510,17 @@ public void ExportModelForInferencing(string inferenceModelPath, IReadOnlyCollec
         /// Returns a contiguous buffer that holds a copy of all training state parameters
         /// </summary>
         /// <param name="onlyTrainable">Whether to only copy trainable parameters or to copy all parameters.</param>
-        public FixedBufferOnnxValue ToBuffer(bool onlyTrainable)
+        public OrtValue ToBuffer(bool onlyTrainable)
         {
             UIntPtr bufferSize = UIntPtr.Zero;
             NativeApiStatus.VerifySuccess(NativeTrainingMethods.OrtGetParametersSize(_nativeHandle, out bufferSize, onlyTrainable));
 
             float[] bufferMemory = new float[bufferSize.ToUInt64()];
 
-            var memInfo = OrtMemoryInfo.DefaultInstance; // CPU
-            var shape = new long[] { (long)bufferSize.ToUInt64() };
-            var buffer = FixedBufferOnnxValue.CreateFromMemory<float>(memInfo, bufferMemory, Tensors.TensorElementType.Float, shape, (long)bufferSize.ToUInt64() * sizeof(float));
+            var shape = new long[] { (long)bufferSize };
+            var buffer = OrtValue.CreateAllocatedTensorValue(OrtAllocator.DefaultInstance, Tensors.TensorElementType.Float, shape);
 
-            NativeApiStatus.VerifySuccess(NativeTrainingMethods.OrtCopyParametersToBuffer(_nativeHandle, buffer.Value.Handle, onlyTrainable));
+            NativeApiStatus.VerifySuccess(NativeTrainingMethods.OrtCopyParametersToBuffer(_nativeHandle, buffer.Handle, onlyTrainable));
 
             return buffer;
         }
@@ -528,45 +528,30 @@ public FixedBufferOnnxValue ToBuffer(bool onlyTrainable)
         /// <summary>
         /// Loads the training session model parameters from a contiguous buffer
         /// </summary>
-        /// <param name="buffer">Contiguous buffer to load the parameters from.</param>
-        public void FromBuffer(FixedBufferOnnxValue buffer)
+        /// <param name="ortValue">Contiguous buffer to load the parameters from.</param>
+        /// <param name="onlyTrainable">Whether to only load trainable parameters or to load all parameters.</param>
+        public void FromBuffer(OrtValue ortValue, bool onlyTrainable)
         {
-            if (buffer.OnnxValueType != OnnxValueType.ONNX_TYPE_TENSOR)
+            if (ortValue.OnnxType != OnnxValueType.ONNX_TYPE_TENSOR)
             {
                 throw new ArgumentException("Incorrect buffer received. Expected a tensor buffer.");
             }
 
-            IntPtr typeAndShapeInfo = IntPtr.Zero;
-            NativeApiStatus.VerifySuccess(NativeMethods.OrtGetTensorTypeAndShape(buffer.Value.Handle, out typeAndShapeInfo));
-            UIntPtr numDimensions = UIntPtr.Zero;
-            NativeApiStatus.VerifySuccess(NativeMethods.OrtGetDimensionsCount(typeAndShapeInfo, out numDimensions));
-            if (numDimensions.ToUInt64() != 1)
+            var tensorInfo = ortValue.GetTensorTypeAndShape();
+            if (tensorInfo.ElementDataType != Tensors.TensorElementType.Float)
             {
-                string errorMessage = "Incorrect buffer shape received. Expected a contiguous tensor buffer. Expected number of dimensions: 1, Actual: " + numDimensions.ToString();
-                throw new ArgumentException(errorMessage);
-            }
-
-            // Here buffer size represents the number of elements in the buffer
-            NativeApiStatus.VerifySuccess(NativeMethods.OrtGetTensorShapeElementCount(typeAndShapeInfo, out UIntPtr bufferSize));
-
-            // OrtGetParametersSize returns the total number of elements in the model's parameters.
-            UIntPtr numElementsTrainingOnly = UIntPtr.Zero;
-            NativeApiStatus.VerifySuccess(NativeTrainingMethods.OrtGetParametersSize(_nativeHandle, out numElementsTrainingOnly, true));
-            if ((ulong)bufferSize == (ulong)numElementsTrainingOnly)
-            {
-                NativeApiStatus.VerifySuccess(NativeTrainingMethods.OrtCopyBufferToParameters(_nativeHandle, buffer.Value.Handle, true));
-                return;
+                throw new ArgumentException("Incorrect buffer received. Expected a tensor buffer of type float.");
             }
 
             UIntPtr numElements = UIntPtr.Zero;
-            NativeApiStatus.VerifySuccess(NativeTrainingMethods.OrtGetParametersSize(_nativeHandle, out numElements, false));
-            if ((ulong)bufferSize != (ulong)numElements)
+            NativeApiStatus.VerifySuccess(NativeTrainingMethods.OrtGetParametersSize(_nativeHandle, out numElements, onlyTrainable));
+            if ((ulong)tensorInfo.ElementCount != (ulong)numElements)
             {
-                string errorMessage = "Incorrect buffer size received. Expected size to be one of " + numElementsTrainingOnly.ToString() + " (training only) or " + numElements.ToString() + " (all parameters). Actual size: " + bufferSize.ToString();
+                string errorMessage = "Incorrect buffer size received. Expected size to be " + numElements.ToString() + ". Actual size: " + tensorInfo.ElementCount.ToString();
                 throw new ArgumentException(errorMessage);
             }
 
-            NativeApiStatus.VerifySuccess(NativeTrainingMethods.OrtCopyBufferToParameters(_nativeHandle, buffer.Value.Handle, false));
+            NativeApiStatus.VerifySuccess(NativeTrainingMethods.OrtCopyBufferToParameters(_nativeHandle, ortValue.Handle, onlyTrainable));
         }
 
         /// <summary>
diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests.Common/TrainingTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests.Common/TrainingTest.cs
index ea2b6d7dbc118..68b1d5bcc6147 100644
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests.Common/TrainingTest.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests.Common/TrainingTest.cs
@@ -484,20 +484,23 @@ public void TestEvalModelOutputNames()
         public void TestToBuffer()
         {
             string checkpointPath = Path.Combine(Directory.GetCurrentDirectory(), "checkpoint.ckpt");
-            using (var cleanUp = new DisposableListTest<IDisposable>())
+            string trainingPath = Path.Combine(Directory.GetCurrentDirectory(), "training_model.onnx");
+            string evalPath = Path.Combine(Directory.GetCurrentDirectory(), "eval_model.onnx");
+            string optimizerPath = Path.Combine(Directory.GetCurrentDirectory(), "adamw.onnx");
+
+            using (var state = CheckpointState.LoadCheckpoint(checkpointPath))
+            using (var trainingSession = new TrainingSession(state, trainingPath, evalPath, optimizerPath))
             {
-                var state = CheckpointState.LoadCheckpoint(checkpointPath);
-                cleanUp.Add(state);
                 Assert.NotNull(state);
-                string trainingPath = Path.Combine(Directory.GetCurrentDirectory(), "training_model.onnx");
-                string evalPath = Path.Combine(Directory.GetCurrentDirectory(), "eval_model.onnx");
-                string optimizerPath = Path.Combine(Directory.GetCurrentDirectory(), "adamw.onnx");
 
-                var trainingSession = new TrainingSession(state, trainingPath, evalPath, optimizerPath);
-                cleanUp.Add(trainingSession);
-
-                var buffer = trainingSession.ToBuffer(true);
-                cleanUp.Add(buffer);
+                using (var buffer = trainingSession.ToBuffer(true))
+                {
+                    Assert.NotNull(buffer);
+                    var typeShape = buffer.GetTensorTypeAndShape();
+                    Assert.Equal(1, typeShape.DimensionsCount);
+                    var fetchedShape = typeShape.Shape;
+                    Assert.Equal(397510, fetchedShape[0]);
+                }
             }
         }
 
@@ -505,22 +508,25 @@ public void TestToBuffer()
         public void TestFromBuffer()
         {
             string checkpointPath = Path.Combine(Directory.GetCurrentDirectory(), "checkpoint.ckpt");
-            using (var cleanUp = new DisposableListTest<IDisposable>())
+            string trainingPath = Path.Combine(Directory.GetCurrentDirectory(), "training_model.onnx");
+            string evalPath = Path.Combine(Directory.GetCurrentDirectory(), "eval_model.onnx");
+            string optimizerPath = Path.Combine(Directory.GetCurrentDirectory(), "adamw.onnx");
+
+            using (var state = CheckpointState.LoadCheckpoint(checkpointPath))
+            using (var trainingSession = new TrainingSession(state, trainingPath, evalPath, optimizerPath))
             {
-                var state = CheckpointState.LoadCheckpoint(checkpointPath);
-                cleanUp.Add(state);
                 Assert.NotNull(state);
-                string trainingPath = Path.Combine(Directory.GetCurrentDirectory(), "training_model.onnx");
-                string evalPath = Path.Combine(Directory.GetCurrentDirectory(), "eval_model.onnx");
-                string optimizerPath = Path.Combine(Directory.GetCurrentDirectory(), "adamw.onnx");
-
-                var trainingSession = new TrainingSession(state, trainingPath, evalPath, optimizerPath);
-                cleanUp.Add(trainingSession);
 
-                var buffer = trainingSession.ToBuffer(true);
-                cleanUp.Add(buffer);
+                using (var buffer = trainingSession.ToBuffer(true))
+                {
+                    Assert.NotNull(buffer);
+                    var typeShape = buffer.GetTensorTypeAndShape();
+                    Assert.Equal(1, typeShape.DimensionsCount);
+                    var fetchedShape = typeShape.Shape;
+                    Assert.Equal(397510, fetchedShape[0]);
 
-                trainingSession.FromBuffer(buffer);
+                    trainingSession.FromBuffer(buffer, true);
+                }
             }
         }
 
@@ -530,6 +536,82 @@ public void TestSetSeed()
             TrainingUtils.SetSeed(8888);
         }
 
+        [Fact(DisplayName = "TestGetParameter")]
+        public void TestGetParameter()
+        {
+            string checkpointPath = Path.Combine(Directory.GetCurrentDirectory(), "checkpoint.ckpt");
+            string trainingPath = Path.Combine(Directory.GetCurrentDirectory(), "training_model.onnx");
+            string evalPath = Path.Combine(Directory.GetCurrentDirectory(), "eval_model.onnx");
+            string optimizerPath = Path.Combine(Directory.GetCurrentDirectory(), "adamw.onnx");
+
+            using (var state = CheckpointState.LoadCheckpoint(checkpointPath))
+            using (var trainingSession = new TrainingSession(state, trainingPath, evalPath, optimizerPath))
+            using (var parameter = state.GetParameter("fc1.weight"))
+            {
+                Assert.NotNull(state);
+                Assert.NotNull(parameter);
+
+                var typeShape = parameter.GetTensorTypeAndShape();
+                Assert.Equal(2, typeShape.DimensionsCount);
+                var fetchedShape = typeShape.Shape;
+                Assert.Equal(500, fetchedShape[0]);
+                Assert.Equal(784, fetchedShape[1]);
+            }
+        }
+
+        [Fact(DisplayName = "TestUpdateParameter")]
+        public void TestUpdateParameter()
+        {
+            string checkpointPath = Path.Combine(Directory.GetCurrentDirectory(), "checkpoint.ckpt");
+            string trainingPath = Path.Combine(Directory.GetCurrentDirectory(), "training_model.onnx");
+            string evalPath = Path.Combine(Directory.GetCurrentDirectory(), "eval_model.onnx");
+            string optimizerPath = Path.Combine(Directory.GetCurrentDirectory(), "adamw.onnx");
+
+            using (var state = CheckpointState.LoadCheckpoint(checkpointPath))
+            using (var trainingSession = new TrainingSession(state, trainingPath, evalPath, optimizerPath))
+            {
+                Assert.NotNull(state);
+
+                using (var parameter = state.GetParameter("fc1.weight"))
+                {
+                    Assert.NotNull(parameter);
+                    var typeShape = parameter.GetTensorTypeAndShape();
+
+                    Assert.Equal(2, typeShape.DimensionsCount);
+                    var fetchedShape = typeShape.Shape;
+                    Assert.Equal(500, fetchedShape[0]);
+                    Assert.Equal(784, fetchedShape[1]);
+
+                    float maxVal = 20;
+                    Random randNum = new Random();
+                    float[] updated_parameter_buffer = Enumerable
+                        .Repeat(0, 500 * 784)
+                        .Select(i => maxVal * (float)randNum.NextDouble())
+                        .ToArray();
+
+                    using (var updated_parameter = OrtValue.CreateTensorValueFromMemory(updated_parameter_buffer, fetchedShape))
+                    {
+                        state.UpdateParameter("fc1.weight", updated_parameter);
+                        using (var current_parameter = state.GetParameter("fc1.weight"))
+                        {
+                            var current_parameter_tensor = current_parameter.GetTensorDataAsSpan<float>().ToArray();
+                            Assert.Equal(updated_parameter_buffer, current_parameter_tensor);
+                            Assert.NotEqual(parameter.GetTensorDataAsSpan<float>().ToArray(), current_parameter_tensor);
+                        }
+
+                        state.UpdateParameter("fc1.weight", parameter);
+
+                        using (var current_parameter = state.GetParameter("fc1.weight"))
+                        {
+                            var current_parameter_tensor = current_parameter.GetTensorDataAsSpan<float>().ToArray();
+                            Assert.Equal(parameter.GetTensorDataAsSpan<float>().ToArray(), current_parameter_tensor);
+                            Assert.NotEqual(updated_parameter_buffer, current_parameter_tensor);
+                        }
+                    }
+                }
+            }
+        }
+
         internal class FloatComparer : IEqualityComparer<float>
         {
             private float atol = 1e-3f;
diff --git a/csharp/tools/ValidateNativeDelegateAttributes.py b/csharp/tools/ValidateNativeDelegateAttributes.py
new file mode 100644
index 0000000000000..acd6c173bfeb0
--- /dev/null
+++ b/csharp/tools/ValidateNativeDelegateAttributes.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import argparse
+import pathlib
+
+
+def check_all_delegates_have_unmanaged_function_pointer_attribute(file: pathlib.Path):
+    """
+    Check that all 'public delegate' declarations have a matching UnmanagedFunctionPointer attribute.
+    :param file: C# source file to check.
+    :return: Number of errors
+    """
+
+    print(f"Checking {file!s}")
+
+    errors = 0
+    line_num = 0
+    with open(str(file.resolve(strict=True))) as f:
+        prev_line = ""
+        for line in f.readlines():
+            line_num += 1
+
+            # strip so it's easier to deal with commented out lines.
+            line = line.strip()  # noqa
+            if line.startswith("public delegate ") and not prev_line.startswith("[UnmanagedFunctionPointer"):
+                errors += 1
+                print(f"Line {line_num} is missing UnmanagedFunctionPointer attribute:\n\t{prev_line}\n\t{line}")
+
+            prev_line = line
+
+    return errors
+
+
+def main():
+    arg_parser = argparse.ArgumentParser(
+        "Script to validate that the native delegates for the ONNX Runtime C# managed projects have the required "
+        "attributes for iOS AOT. Paths are inferred from the script location."
+        "Errors of this nature can only be detected at runtime, in a release build, of a Xamarin/MAUI app, "
+        "on an actual iOS device. Due to that we take extra steps to identify problems early."
+    )
+
+    # no real args. just using this to provide description as help message
+    _ = arg_parser.parse_args()
+
+    # CI needs resolve() as __file__ is a relative path when the script is run there
+    script_dir = pathlib.Path(__file__).resolve().parent
+    csharp_root = script_dir.parent
+
+    managed_dir = csharp_root / "src" / "Microsoft.ML.OnnxRuntime"
+    native_methods = managed_dir / "NativeMethods.shared.cs"
+    training_native_methods = managed_dir / "Training" / "NativeTrainingMethods.shared.cs"
+    errors = check_all_delegates_have_unmanaged_function_pointer_attribute(native_methods)
+    errors += check_all_delegates_have_unmanaged_function_pointer_attribute(training_native_methods)
+
+    if errors:
+        raise ValueError(f"{errors} errors were found. Please check output for specifics.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/csharp/tools/linux_pack/LinuxPackNativeNuget.csproj b/csharp/tools/linux_pack/LinuxPackNativeNuget.csproj
new file mode 100644
index 0000000000000..098078d2e3683
--- /dev/null
+++ b/csharp/tools/linux_pack/LinuxPackNativeNuget.csproj
@@ -0,0 +1,15 @@
+<!-- csproj for use with `dotnet pack` on linux via build.py's `build_nuget` option.
+
+     Expected usage is to create a Microsoft.ML.OnnxRuntime native nuget package containing only the linux
+     libonnxruntime.so for local testing.
+     We only enable netstandard2.0 due to that, but additional frameworks can be added as needed.
+
+     If you need a more sophisticated package for testing, you can run the production packaging pipeline against your
+     branch and download the resulting nuget package from the build artifacts.
+ -->
+<Project Sdk="MSBuild.Sdk.Extras/3.0.22">
+  <PropertyGroup>
+    <TargetFrameworks>netstandard2.0</TargetFrameworks>
+    <NuspecFile>$(OnnxRuntimeBuildDirectory)/NativeNuget.nuspec</NuspecFile>
+  </PropertyGroup>
+</Project>
diff --git a/dockerfiles/Dockerfile.source b/dockerfiles/Dockerfile.source
index 110e484e77d21..5822a805c674e 100644
--- a/dockerfiles/Dockerfile.source
+++ b/dockerfiles/Dockerfile.source
@@ -8,13 +8,14 @@ FROM mcr.microsoft.com/cbl-mariner/base/python:3
 MAINTAINER Changming Sun "chasun@microsoft.com"
 ADD . /code
 
-RUN tdnf install -y tar ca-certificates build-essential python3-numpy cmake python3-setuptools python3-wheel python3-pip curl python3-devel
+RUN tdnf install -y tar ca-certificates build-essential cmake curl python3-devel python3-setuptools python3-wheel python3-pip python3-numpy python3-flatbuffers python3-packaging python3-protobuf
+# The latest cmake version in Mariner2 is 3.21, but we need 3.26+
 RUN /code/dockerfiles/scripts/install_cmake.sh
 
 # Prepare onnxruntime repository & build onnxruntime
-RUN cd /code && python3 -m pip install -r tools/ci_build/github/linux/docker/inference/x64/python/cpu/scripts/requirements.txt && /bin/bash ./build.sh --allow_running_as_root --skip_submodule_sync --config Release --build_wheel --update --build --parallel --cmake_extra_defines ONNXRUNTIME_VERSION=$(cat ./VERSION_NUMBER)
+RUN cd /code && /bin/bash ./build.sh --allow_running_as_root --skip_submodule_sync --config Release --build_wheel --update --build --parallel --cmake_extra_defines ONNXRUNTIME_VERSION=$(cat ./VERSION_NUMBER)
 
 FROM mcr.microsoft.com/cbl-mariner/base/python:3
 COPY --from=0 /code/build/Linux/Release/dist /root
 COPY --from=0 /code/dockerfiles/LICENSE-IMAGE.txt /code/LICENSE-IMAGE.txt
-RUN tdnf install -y  ca-certificates python3-setuptools python3-wheel python3-pip && python3 -m pip install /root/*.whl && rm -rf /root/*.whl
+RUN tdnf install -y ca-certificates python3-setuptools python3-wheel python3-pip python3-numpy python3-flatbuffers python3-packaging python3-protobuf python3-mpmath python3-sympy && python3 -m pip install coloredlogs humanfriendly && python3 -m pip install --no-index --find-links /root onnxruntime  && rm -rf /root/*.whl
diff --git a/docs/ABI_Dev_Notes.md b/docs/ABI_Dev_Notes.md
index f9b55176cf95b..f85dd9d19a336 100644
--- a/docs/ABI_Dev_Notes.md
+++ b/docs/ABI_Dev_Notes.md
@@ -4,7 +4,7 @@ Global variables may get constructed or destructed inside "DllMain". There are s
 ## Thread Local variables
 Onnxruntime must support explicit linking, where the operating system loads the DLL on demand at runtime, instead of process startup time. This is required by our language bindings like C#/Java.
 
-However, there are some special restrictions on this, If a thread local variable need non-trivial construction, for the threads already exist before onnxruntime.dll is loaded, the variable won't get initialized correctly. So it's better to only access such variables from onnxruntime internal threads, or make these variables function local (Like the magic statics).
+However, there are some special restrictions on this. If a thread local variable need non-trivial construction, for the threads already exist before onnxruntime.dll is loaded, the variable won't get initialized correctly. So it's better to only access such variables from onnxruntime internal threads, or make these variables function local (Like the magic statics).
 
 
 ## No undefined symbols
diff --git a/docs/ContribOperators.md b/docs/ContribOperators.md
index 95dc8c3cde46c..c73f978bdf404 100644
--- a/docs/ContribOperators.md
+++ b/docs/ContribOperators.md
@@ -27,6 +27,8 @@ Do not modify directly.*
   * <a href="#com.microsoft.DequantizeWithOrder">com.microsoft.DequantizeWithOrder</a>
   * <a href="#com.microsoft.DynamicQuantizeLSTM">com.microsoft.DynamicQuantizeLSTM</a>
   * <a href="#com.microsoft.DynamicQuantizeMatMul">com.microsoft.DynamicQuantizeMatMul</a>
+  * <a href="#com.microsoft.DynamicTimeWarping">com.microsoft.DynamicTimeWarping</a>
+  * <a href="#com.microsoft.EPContext">com.microsoft.EPContext</a>
   * <a href="#com.microsoft.EmbedLayerNormalization">com.microsoft.EmbedLayerNormalization</a>
   * <a href="#com.microsoft.ExpandDims">com.microsoft.ExpandDims</a>
   * <a href="#com.microsoft.FastGelu">com.microsoft.FastGelu</a>
@@ -38,16 +40,21 @@ Do not modify directly.*
   * <a href="#com.microsoft.GatherND">com.microsoft.GatherND</a>
   * <a href="#com.microsoft.Gelu">com.microsoft.Gelu</a>
   * <a href="#com.microsoft.GemmFastGelu">com.microsoft.GemmFastGelu</a>
+  * <a href="#com.microsoft.GemmFloat8">com.microsoft.GemmFloat8</a>
   * <a href="#com.microsoft.GreedySearch">com.microsoft.GreedySearch</a>
   * <a href="#com.microsoft.GridSample">com.microsoft.GridSample</a>
   * <a href="#com.microsoft.GroupNorm">com.microsoft.GroupNorm</a>
+  * <a href="#com.microsoft.GroupQueryAttention">com.microsoft.GroupQueryAttention</a>
   * <a href="#com.microsoft.Inverse">com.microsoft.Inverse</a>
   * <a href="#com.microsoft.Irfft">com.microsoft.Irfft</a>
   * <a href="#com.microsoft.LongformerAttention">com.microsoft.LongformerAttention</a>
+  * <a href="#com.microsoft.MatMulBnb4">com.microsoft.MatMulBnb4</a>
   * <a href="#com.microsoft.MatMulFpQ4">com.microsoft.MatMulFpQ4</a>
   * <a href="#com.microsoft.MatMulInteger16">com.microsoft.MatMulInteger16</a>
   * <a href="#com.microsoft.MatMulIntegerToFloat">com.microsoft.MatMulIntegerToFloat</a>
+  * <a href="#com.microsoft.MatMulNBits">com.microsoft.MatMulNBits</a>
   * <a href="#com.microsoft.MaxpoolWithMask">com.microsoft.MaxpoolWithMask</a>
+  * <a href="#com.microsoft.MoE">com.microsoft.MoE</a>
   * <a href="#com.microsoft.MulInteger">com.microsoft.MulInteger</a>
   * <a href="#com.microsoft.MultiHeadAttention">com.microsoft.MultiHeadAttention</a>
   * <a href="#com.microsoft.MurmurHash3">com.microsoft.MurmurHash3</a>
@@ -86,8 +93,10 @@ Do not modify directly.*
   * <a href="#com.microsoft.RemovePadding">com.microsoft.RemovePadding</a>
   * <a href="#com.microsoft.RestorePadding">com.microsoft.RestorePadding</a>
   * <a href="#com.microsoft.Rfft">com.microsoft.Rfft</a>
+  * <a href="#com.microsoft.RotaryEmbedding">com.microsoft.RotaryEmbedding</a>
   * <a href="#com.microsoft.SampleOp">com.microsoft.SampleOp</a>
   * <a href="#com.microsoft.Sampling">com.microsoft.Sampling</a>
+  * <a href="#com.microsoft.SkipGroupNorm">com.microsoft.SkipGroupNorm</a>
   * <a href="#com.microsoft.SkipLayerNormalization">com.microsoft.SkipLayerNormalization</a>
   * <a href="#com.microsoft.SkipSimplifiedLayerNormalization">com.microsoft.SkipSimplifiedLayerNormalization</a>
   * <a href="#com.microsoft.Snpe">com.microsoft.Snpe</a>
@@ -96,7 +105,9 @@ Do not modify directly.*
   * <a href="#com.microsoft.TorchEmbedding">com.microsoft.TorchEmbedding</a>
   * <a href="#com.microsoft.TransposeMatMul">com.microsoft.TransposeMatMul</a>
   * <a href="#com.microsoft.Trilu">com.microsoft.Trilu</a>
+  * <a href="#com.microsoft.UnfoldTensor">com.microsoft.UnfoldTensor</a>
   * <a href="#com.microsoft.Unique">com.microsoft.Unique</a>
+  * <a href="#com.microsoft.WhisperBeamSearch">com.microsoft.WhisperBeamSearch</a>
   * <a href="#com.microsoft.WordConvEmbedding">com.microsoft.WordConvEmbedding</a>
   * <sub>experimental</sub> <a href="#com.microsoft.IsAllFinite">com.microsoft.IsAllFinite</a>
   * <sub>experimental</sub> <a href="#com.microsoft.QEmbedLayerNormalization">com.microsoft.QEmbedLayerNormalization</a>
@@ -1130,6 +1141,8 @@ This version of the operator has been available since version 1 of the 'com.micr
 <dd>The value to be filled in the attention mask. Default value is -10000.0f</dd>
 <dt><tt>num_heads</tt> : int (required)</dt>
 <dd>Number of attention heads</dd>
+<dt><tt>output_qk</tt> : int</dt>
+<dd>Need output the cross attention MatMul(Q, K)</dd>
 <dt><tt>past_present_share_buffer</tt> : int</dt>
 <dd>Corresponding past and present are same tensor, its size is (batch_size, num_heads, max_sequence_length, head_size)</dd>
 <dt><tt>scale</tt> : float</dt>
@@ -1163,20 +1176,24 @@ This version of the operator has been available since version 1 of the 'com.micr
 <dd>Bias tensor with shape (hidden_size + hidden_size + v_hidden_size) from input projection</dd>
 </dl>
 
-#### Outputs (1 - 3)
+#### Outputs (1 - 4)
 
 <dl>
 <dt><tt>output</tt> : T</dt>
 <dd>3D output tensor with shape (batch_size, sequence_length, v_hidden_size)</dd>
 <dt><tt>present_key</tt> (optional) : T</dt>
-<dd>past state for key with shape (batch_size, num_heads, total_sequence_length, head_size). If past_present_share_buffer is set, its shape is (batch_size, num_heads, max_sequence_length, head_size), while effective_seq_length = (past_sequence_length + kv_sequence_length).</dd>
+<dd>present state for key with shape (batch_size, num_heads, total_sequence_length, head_size). If past_present_share_buffer is set, its shape is (batch_size, num_heads, max_sequence_length, head_size), while effective_seq_length = (past_sequence_length + kv_sequence_length).</dd>
 <dt><tt>present_value</tt> (optional) : T</dt>
-<dd>past state for value with shape (batch_size, num_heads, total_sequence_length, head_size). If past_present_share_buffer is set, its shape is (batch_size, num_heads, max_sequence_length, head_size), while effective_seq_length = (past_sequence_length + kv_sequence_length).</dd>
+<dd>present state for value with shape (batch_size, num_heads, total_sequence_length, head_size). If past_present_share_buffer is set, its shape is (batch_size, num_heads, max_sequence_length, head_size), while effective_seq_length = (past_sequence_length + kv_sequence_length).</dd>
+<dt><tt>qk</tt> (optional) : V</dt>
+<dd>normalized Q * K, of shape (batch_size, num_heads, 1, head_size). </dd>
 </dl>
 
 #### Type Constraints
 
 <dl>
+<dt><tt>V</tt> : tensor(float)</dt>
+<dd>Constrain qk output types to float32 tensors.</dd>
 <dt><tt>T</tt> : tensor(float), tensor(float16)</dt>
 <dd>Constrain input and output types to float tensors.</dd>
 <dt><tt>M</tt> : tensor(int32)</dt>
@@ -1520,6 +1537,87 @@ This version of the operator has been available since version 1 of the 'com.micr
 </dl>
 
 
+### <a name="com.microsoft.DynamicTimeWarping"></a><a name="com.microsoft.dynamictimewarping">**com.microsoft.DynamicTimeWarping**</a>
+
+  Input is cost matrix where each value in input[r][c] is the cost for pass the point (r, c). From current point(r, c),  points (r+1, c), (r+1, c+1) or (r, c+1) could be arrived in next move. Given such cost matrix, return dynamic time wrapping of shape [2, x], where the path made by all points (output[0][t], output[1][t])have the lowest cost among all paths from (0, 0) to (M-1, N-1).
+
+#### Version
+
+This version of the operator has been available since version 1 of the 'com.microsoft' operator set.
+
+#### Inputs
+
+<dl>
+<dt><tt>input</tt> : F</dt>
+<dd>Input cost tensor, it must be 2D tensor of shape M x N, or 1 x M x N</dd>
+</dl>
+
+#### Outputs
+
+<dl>
+<dt><tt>output</tt> : I</dt>
+<dd>Output tensor. shape is [2, x], where max(M, N) <= x < M + N</dd>
+</dl>
+
+#### Type Constraints
+
+<dl>
+<dt><tt>F</tt> : tensor(float)</dt>
+<dd>Constrain to float tensors.</dd>
+<dt><tt>I</tt> : tensor(int32)</dt>
+<dd>Constrain to integer types.</dd>
+</dl>
+
+
+### <a name="com.microsoft.EPContext"></a><a name="com.microsoft.epcontext">**com.microsoft.EPContext**</a>
+
+  Onnx node container for EP context.
+
+#### Version
+
+This version of the operator has been available since version 1 of the 'com.microsoft' operator set.
+
+#### Attributes
+
+<dl>
+<dt><tt>embed_mode</tt> : int</dt>
+<dd>1: indicate ep_cache_context is the context content. 0: indicate ep_cache_context is the file path to the context content.The path is relative to this Onnx file. Default is 1.</dd>
+<dt><tt>ep_cache_context</tt> : string</dt>
+<dd>payload of the execution provider context if embed_mode=1, or path to the context file if embed_mode=0.</dd>
+<dt><tt>ep_sdk_version</tt> : string</dt>
+<dd>(Optional) SDK version used to convert the model.</dd>
+<dt><tt>main_context</tt> : int</dt>
+<dd>Usually each single EPContext associate with a graph partition.But for some case like QNN, it has single EPContext contains all partitions.In that case, the node with ep_cache_context should set main_context=1. Other nodes set main_context=0 and skip ep_cache_context.The path is relative to this Onnx file. Default is 1.</dd>
+<dt><tt>notes</tt> : string</dt>
+<dd>(Optional) Some notes for the model</dd>
+<dt><tt>partition_name</tt> : string</dt>
+<dd>(Optional) partitioned graph name.</dd>
+<dt><tt>source</tt> : string</dt>
+<dd>(Optional) the source used to generate the engine/context cache file. Ort EP or native SDK tool chain</dd>
+</dl>
+
+#### Inputs (1 - &#8734;)
+
+<dl>
+<dt><tt>inputs</tt> (variadic) : T</dt>
+<dd>List of tensors for inputs</dd>
+</dl>
+
+#### Outputs (1 - &#8734;)
+
+<dl>
+<dt><tt>outputs</tt> (variadic) : T</dt>
+<dd>One or more outputs, list of tensors for outputs</dd>
+</dl>
+
+#### Type Constraints
+
+<dl>
+<dt><tt>T</tt> : tensor(int8), tensor(int16), tensor(int32), tensor(int64), tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(float16), tensor(float), tensor(double)</dt>
+<dd>Constrain input and output types.</dd>
+</dl>
+
+
 ### <a name="com.microsoft.EmbedLayerNormalization"></a><a name="com.microsoft.embedlayernormalization">**com.microsoft.EmbedLayerNormalization**</a>
 
   EmbedLayerNormalization is the fusion of embedding layer in BERT model, with optional mask processing.
@@ -2042,6 +2140,71 @@ This version of the operator has been available since version 1 of the 'com.micr
 </dl>
 
 
+### <a name="com.microsoft.GemmFloat8"></a><a name="com.microsoft.gemmfloat8">**com.microsoft.GemmFloat8**</a>
+
+  Generic Gemm for float and float 8.
+
+#### Version
+
+This version of the operator has been available since version 1 of the 'com.microsoft' operator set.
+
+#### Attributes
+
+<dl>
+<dt><tt>activation</tt> : string</dt>
+<dd>Activation function, RELU or GELU or NONE (default).</dd>
+<dt><tt>alpha</tt> : float</dt>
+<dd>Scalar multiplier for the product of input tensors A * B.</dd>
+<dt><tt>beta</tt> : float</dt>
+<dd>Scalar multiplier for the product of input bias C.</dd>
+<dt><tt>dtype</tt> : int</dt>
+<dd>Output Type. Same definition as attribute 'to' for operator Cast.</dd>
+<dt><tt>transA</tt> : int</dt>
+<dd>Whether A should be transposed. Float 8 only supprted transA=0.</dd>
+<dt><tt>transB</tt> : int</dt>
+<dd>Whether B should be transposed. Float 8 only supprted transB=1.</dd>
+</dl>
+
+#### Inputs (2 - 6)
+
+<dl>
+<dt><tt>A</tt> : TA</dt>
+<dd>Input tensor A. The shape of A should be (M, K) if transA is 0, or (K, M) if transA is non-zero.</dd>
+<dt><tt>B</tt> : TB</dt>
+<dd>Input tensor B. The shape of B should be (K, N) if transB is 0, or (N, K) if transB is non-zero.</dd>
+<dt><tt>C</tt> (optional) : TC</dt>
+<dd>Input tensor C.</dd>
+<dt><tt>scaleA</tt> (optional) : TS</dt>
+<dd>Scale of tensor A if A is float 8 tensor</dd>
+<dt><tt>scaleB</tt> (optional) : TS</dt>
+<dd>Scale of tensor B if B is float 8 tensor</dd>
+<dt><tt>scaleY</tt> (optional) : TS</dt>
+<dd>Scale of the output tensor if A or B is float 8.</dd>
+</dl>
+
+#### Outputs
+
+<dl>
+<dt><tt>Y</tt> : TR</dt>
+<dd>Output tensor of shape (M, N).</dd>
+</dl>
+
+#### Type Constraints
+
+<dl>
+<dt><tt>TA</tt> : tensor(float8e4m3fn), tensor(float8e5m2), tensor(float16), tensor(bfloat16), tensor(float)</dt>
+<dd>Constrain type to input A.</dd>
+<dt><tt>TB</tt> : tensor(float8e4m3fn), tensor(float8e5m2), tensor(float16), tensor(bfloat16), tensor(float)</dt>
+<dd>Constrain type to input B.</dd>
+<dt><tt>TC</tt> : tensor(float16), tensor(bfloat16), tensor(float)</dt>
+<dd>Constrain type to input C.</dd>
+<dt><tt>TR</tt> : tensor(float8e4m3fn), tensor(float8e5m2), tensor(float16), tensor(bfloat16), tensor(float)</dt>
+<dd>Constrain type to result type.</dd>
+<dt><tt>TS</tt> : tensor(float)</dt>
+<dd>Constrain type for all input scales (scaleA, scaleB, scaleY).</dd>
+</dl>
+
+
 ### <a name="com.microsoft.GreedySearch"></a><a name="com.microsoft.greedysearch">**com.microsoft.GreedySearch**</a>
 
   Greedy Search for text generation.
@@ -2181,7 +2344,7 @@ This version of the operator has been available since version 1 of the 'com.micr
 
 <dl>
 <dt><tt>activation</tt> : int (required)</dt>
-<dd>Activation after group normalization: 0 for None, 1 for Swish</dd>
+<dd>Activation after group normalization: 0 for None, 1 for SiLU</dd>
 <dt><tt>channels_last</tt> : int</dt>
 <dd>1 if the input and output are in the NHWC layout, 0 if it is in the NCHW layout. Defaults to 1.</dd>
 <dt><tt>epsilon</tt> : float</dt>
@@ -2218,6 +2381,69 @@ This version of the operator has been available since version 1 of the 'com.micr
 </dl>
 
 
+### <a name="com.microsoft.GroupQueryAttention"></a><a name="com.microsoft.groupqueryattention">**com.microsoft.GroupQueryAttention**</a>
+
+  Group Query Self/Cross Attention.
+  
+  Supports different number of heads for q and kv. Only supports causal or local attention.
+
+#### Version
+
+This version of the operator has been available since version 1 of the 'com.microsoft' operator set.
+
+#### Attributes
+
+<dl>
+<dt><tt>kv_num_heads</tt> : int (required)</dt>
+<dd>Number of attention heads for k and v</dd>
+<dt><tt>local_window_size</tt> : int</dt>
+<dd>left_window_size for local attention (like Mistral). Default value is -1 meaning unused.</dd>
+<dt><tt>num_heads</tt> : int (required)</dt>
+<dd>Number of attention heads for q</dd>
+<dt><tt>scale</tt> : float</dt>
+<dd>Custom scale will be used if specified. Default value is 1/sqrt(head_size)</dd>
+</dl>
+
+#### Inputs
+
+<dl>
+<dt><tt>query</tt> : T</dt>
+<dd>Query with shape (batch_size, sequence_length, hidden_size)</dd>
+<dt><tt>key</tt> : T</dt>
+<dd>Key with shape (batch_size, kv_sequence_length, kv_hidden_size) </dd>
+<dt><tt>value</tt> : T</dt>
+<dd>Value with shape (batch_size, kv_sequence_length, kv_hidden_size)</dd>
+<dt><tt>past_key</tt> (optional) : T</dt>
+<dd>past state key with support for format BNSH. When past_key uses same tensor as present_key(k-v cache), it is of length max_sequence_length... otherwise of length past_sequence_length.</dd>
+<dt><tt>past_value</tt> (optional) : T</dt>
+<dd>past state value with support for format BNSH. When past_value uses same tensor as present_value(k-v cache), it is of length max_sequence_length... otherwise of length past_sequence_length.</dd>
+<dt><tt>seqlens_k</tt> : M</dt>
+<dd>1d Tensor of shape (batch_size). Indicates past sequence lengths for token generation case.</dd>
+<dt><tt>total_sequence_length</tt> : M</dt>
+<dd>Scalar tensor of total sequence length (past + new).</dd>
+</dl>
+
+#### Outputs
+
+<dl>
+<dt><tt>output</tt> : T</dt>
+<dd>3D output tensor with shape (batch_size, sequence_length, hidden_size)</dd>
+<dt><tt>present_key</tt> : T</dt>
+<dd>present state key with support for format BNSH. When past_key uses same tensor as present_key(k-v buffer), it is of length max_sequence_length... otherwise of length past_sequence_length +kv_sequence_length.</dd>
+<dt><tt>present_value</tt> : T</dt>
+<dd>present state value with support for format BNSH. When past_value uses same tensor as present_value(k-v buffer), it is of length max_sequence_length... otherwise of length past_sequence_length +kv_sequence_length.</dd>
+</dl>
+
+#### Type Constraints
+
+<dl>
+<dt><tt>T</tt> : tensor(float16)</dt>
+<dd>Constrain input and output to float tensors.</dd>
+<dt><tt>M</tt> : tensor(int32)</dt>
+<dd>Constrain mask to int tensor.</dd>
+</dl>
+
+
 ### <a name="com.microsoft.Inverse"></a><a name="com.microsoft.inverse">**com.microsoft.Inverse**</a>
 
 #### Version
@@ -2347,6 +2573,89 @@ This version of the operator has been available since version 1 of the 'com.micr
 </dl>
 
 
+### <a name="com.microsoft.MatMulBnb4"></a><a name="com.microsoft.matmulbnb4">**com.microsoft.MatMulBnb4**</a>
+
+  MatMulBnb4 is a MatMul with weight quantized with 4 bits using either FP4 or NF4 data type (https://arxiv.org/pdf/2305.14314.pdf). It does Matrix Multiplication like MatMul (https://github.com/onnx/onnx/blob/main/docs/Operators.md#matmul) with differences:
+    1. Input B is a 2D constant Matrix. Its input feature count and output feature count are specified by attribute 'K' and 'N'.
+    2. Input B is quantized with 4 bits with quantization data type specified by attribute 'quant_type'. It is transposed, flattened and quantized blockwisely with block size specified by attribute 'block_size'.
+       And block_size is not an arbitrary number and must be a power of 2 and not smaller than 16, like 16, 32, 64, 128,..
+    3. Input B's quantization constants or scales are specified by input 'absmax'.
+  
+    Input B is stored as uint8_t with shape: [(N * K + 1) / 2].
+    Input absmax is stored in same type as original type of B(float32, float16) with shape like: [(N * K + block_size - 1) / block_size].
+  
+  
+    1. (Default value) transB=True (Majorly used for forward pass)
+      Shape of A: [D0, D1, ..., Dn, K]
+      Shape of Dequanted B: [N, K], this is aligned with how PyTorch defined the linear weight, .e.g [out_features, in_features].
+  
+      The computation math:
+        dequant_B = dequant(B, absmax, quant_type, block_size)
+        transposed_dequant_B = dequant_B^T
+        output = A @ transposed_dequant_B
+  
+      Shape of output: [D0, D1, ..., Dn, N]
+  
+    2. transB=False (Majorly used for backward pass)
+      Shape of A: [D0, D1, ..., Dn, N]
+      Shape of Dequanted B: [N, K], this is aligned with how PyTorch defined the linear weight, .e.g [out_features, in_features].
+  
+      The computation math:
+        dequant_B = dequant(B, absmax, quant_type, block_size)
+        output = A @ dequant_B
+  
+      Shape of output: [D0, D1, ..., Dn, K]
+  
+
+#### Version
+
+This version of the operator has been available since version 1 of the 'com.microsoft' operator set.
+
+#### Attributes
+
+<dl>
+<dt><tt>K</tt> : int (required)</dt>
+<dd>size of each input feature</dd>
+<dt><tt>N</tt> : int (required)</dt>
+<dd>size of each output feature</dd>
+<dt><tt>block_size</tt> : int (required)</dt>
+<dd>number of groupsize used for weight quantization. It needs to be a power of 2 and not smaller than 16.</dd>
+<dt><tt>quant_type</tt> : int (required)</dt>
+<dd>quantization data type. 0 for FP4, 1 for NF4.</dd>
+<dt><tt>training_mode</tt> : int</dt>
+<dd>Indicate if the ops run in training_mode, by default, False.</dd>
+<dt><tt>transB</tt> : int</dt>
+<dd>Whether B should be transposed on the last two dimensions before doing multiplication. Default to be 1.</dd>
+</dl>
+
+#### Inputs
+
+<dl>
+<dt><tt>A</tt> : T1</dt>
+<dd>The input tensor, not quantized</dd>
+<dt><tt>B</tt> : T2</dt>
+<dd>1-dimensional quantized data for weight</dd>
+<dt><tt>absmax</tt> : T1</dt>
+<dd>quantization constants</dd>
+</dl>
+
+#### Outputs
+
+<dl>
+<dt><tt>Y</tt> : T1</dt>
+<dd>tensor. The output tensor has the same rank as the input. </dd>
+</dl>
+
+#### Type Constraints
+
+<dl>
+<dt><tt>T1</tt> : tensor(float), tensor(float16), tensor(bfloat16)</dt>
+<dd>Constrain input and output types to float/half_float/brain_float tensors.</dd>
+<dt><tt>T2</tt> : tensor(uint8)</dt>
+<dd>Constrain quantized weight types to uint8.</dd>
+</dl>
+
+
 ### <a name="com.microsoft.MatMulFpQ4"></a><a name="com.microsoft.matmulfpq4">**com.microsoft.MatMulFpQ4**</a>
 
   Matrix product with right hand matrix being pre-packed and quantized int4 data blob.
@@ -2479,6 +2788,78 @@ This version of the operator has been available since version 1 of the 'com.micr
 </dl>
 
 
+### <a name="com.microsoft.MatMulNBits"></a><a name="com.microsoft.matmulnbits">**com.microsoft.MatMulNBits**</a>
+
+  MatMulNBits is a MatMul with weight quantized with N bits(e.g., 2, 3, 4, 5, 6, 7).It does Matrix Multiplication like MatMul (https://github.com/onnx/onnx/blob/main/docs/Operators.md#matmul) with differences:
+    1. Input B is a 2D constant Matrix. Its input feature count and output feature count are specified by attribute 'K' and 'N'.
+    2. Input B is quantized with x bits which is specified by attribute 'bits'. It is quantized blockwisely along dimension 0 (e.g. column) with block size specified by attribute block_size.
+       And block_size is not an arbitrary number and must be a power of 2 and not smaller than 16, like 16, 32, 64, 128,..
+    3. Input B's scale and zero point are specified by input scales and zero_points.
+  
+  Input B is stored as uint8_t with shape: [N][n_blocks_per_col][blob_size] in which:
+  - n_blocks_per_col = (K + block_size - 1) / block_size
+  - blob_size = block_size / 8 * bits
+  
+    For a block blob. It is stored in format:
+    struct Blob {
+      uint8 one_bits[(bits & 0x1) * 1 * block_size / 8];  // highest 1 bit for 3, 5, 7 bits quantization
+      uint8 two_bits[(bits & 0x2) * 2 * block_size / 8];  // high 2 bits for 2, 6, 7 bits quantization
+      uint8 four_bits[(bits & 0x4) * 4 * block_size / 8]; // low 4 bits for 4, 5, 6 bits quantization
+    }
+  
+  Input scales is stored in same type as original type of B(float32, float16) with shape like: [N * n_blocks_per_col]
+  Input zero_points is stored as uint8_t. If bits <= 4, two zero points are stored as one unit8_t. If bits > 4, one zero point is stored with one unit8_t. Thus, its shape is:
+    - [(N * n_blocks_per_col + 1) / 2] if bits <=4
+    - [N * n_blocks_per_col] if bits > 4
+  
+
+#### Version
+
+This version of the operator has been available since version 1 of the 'com.microsoft' operator set.
+
+#### Attributes
+
+<dl>
+<dt><tt>K</tt> : int (required)</dt>
+<dd>size of each input feature</dd>
+<dt><tt>N</tt> : int (required)</dt>
+<dd>size of each output feature</dd>
+<dt><tt>bits</tt> : int (required)</dt>
+<dd>number of bits used for weight quantization (default 4)</dd>
+<dt><tt>block_size</tt> : int (required)</dt>
+<dd>number of groupsize used for weight quantization,(default 128). It needs to be a power of 2 and not smaller than 16.</dd>
+</dl>
+
+#### Inputs (3 - 4)
+
+<dl>
+<dt><tt>A</tt> : T1</dt>
+<dd>The input tensor, not quantized</dd>
+<dt><tt>B</tt> : T2</dt>
+<dd>1-dimensional data blob</dd>
+<dt><tt>scales</tt> : T1</dt>
+<dd>quantization scale</dd>
+<dt><tt>zero_points</tt> (optional) : T2</dt>
+<dd>quantization zero points</dd>
+</dl>
+
+#### Outputs
+
+<dl>
+<dt><tt>Y</tt> : T1</dt>
+<dd>tensor. The output tensor has the same rank as the input. </dd>
+</dl>
+
+#### Type Constraints
+
+<dl>
+<dt><tt>T1</tt> : tensor(float), tensor(float16)</dt>
+<dd>Constrain input and output types to float/half_float tensors.</dd>
+<dt><tt>T2</tt> : tensor(uint8)</dt>
+<dd>Constrain quantized weight types to uint8.</dd>
+</dl>
+
+
 ### <a name="com.microsoft.MaxpoolWithMask"></a><a name="com.microsoft.maxpoolwithmask">**com.microsoft.MaxpoolWithMask**</a>
 
   For internal use.
@@ -2526,6 +2907,58 @@ This version of the operator has been available since version 1 of the 'com.micr
 </dl>
 
 
+### <a name="com.microsoft.MoE"></a><a name="com.microsoft.moe">**com.microsoft.MoE**</a>
+
+  Mixture of experts. Examples: Switch transformer(https://arxiv.org/pdf/2101.03961.pdf) use top 1,
+        GLaM(https://arxiv.org/abs/2112.06905) activates top 2 FFN, and Vision MOE(https://arxiv.org/pdf/2106.05974.pdf)
+        usually uses top 32 experts.
+        
+
+#### Version
+
+This version of the operator has been available since version 1 of the 'com.microsoft' operator set.
+
+#### Attributes
+
+<dl>
+<dt><tt>activation_type</tt> : string</dt>
+<dd>Activation function to use. Choose from relu, gelu, silu and identity. Default is relu</dd>
+<dt><tt>k</tt> : int</dt>
+<dd>Number of top experts to select from expert pool</dd>
+</dl>
+
+#### Inputs (4 - 6)
+
+<dl>
+<dt><tt>input</tt> : T</dt>
+<dd>2D input tensor with shape (num_rows, hidden_size) or 3D input tensor with shape (batch_size, sequence_length, hidden_size)</dd>
+<dt><tt>router_probs</tt> : T</dt>
+<dd>2D input tensor with shape (num_rows, num_experts)</dd>
+<dt><tt>fc1_experts_weights</tt> : T</dt>
+<dd>3D input tensor with shape (num_experts, hidden_size, inter_size)</dd>
+<dt><tt>fc2_experts_weights</tt> : T</dt>
+<dd>3D input tensor with shape (num_experts, inter_size, hidden_size)</dd>
+<dt><tt>fc1_experts_bias</tt> (optional) : T</dt>
+<dd>2D optional input tensor with shape (num_experts, inter_size)</dd>
+<dt><tt>fc2_experts_bias</tt> (optional) : T</dt>
+<dd>2D optional input tensor with shape (num_experts, hidden_size)</dd>
+</dl>
+
+#### Outputs
+
+<dl>
+<dt><tt>output</tt> : T</dt>
+<dd>2D input tensor with shape (num_rows, hidden_size) or 3D input tensor with shape (batch_size, sequence_length, hidden_size)</dd>
+</dl>
+
+#### Type Constraints
+
+<dl>
+<dt><tt>T</tt> : tensor(float), tensor(float16)</dt>
+<dd>Constrain input and output types to float or float16 tensors.</dd>
+</dl>
+
+
 ### <a name="com.microsoft.MulInteger"></a><a name="com.microsoft.mulinteger">**com.microsoft.MulInteger**</a>
 
   Performs element-wise binary quantized multiplication (with Numpy-style broadcasting support).
@@ -2606,7 +3039,7 @@ This version of the operator has been available since version 1 of the 'com.micr
 <dt><tt>bias</tt> (optional) : T</dt>
 <dd>Bias tensor with shape (hidden_size + hidden_size + v_hidden_size) from input projection</dd>
 <dt><tt>key_padding_mask</tt> (optional) : M</dt>
-<dd>Key padding mask with shape (batch_size) or (3 * batch_size + 2) or (batch_size, kv_sequence_length)</dd>
+<dd>Key padding mask with shape (batch_size), (3 * batch_size + 2), (batch_size, kv_sequence_length), (batch_size, total_sequence_length), or (batch_size, sequence_length, total_sequence_length)</dd>
 <dt><tt>relative_position_bias</tt> (optional) : T</dt>
 <dd>relative position bias: addition to QxK' with shape (batch_size, num_heads, sequence_length, total_sequence_length) or (1, num_heads, sequence_length, total_sequence_length)</dd>
 <dt><tt>past_key</tt> (optional) : T</dt>
@@ -4568,6 +5001,54 @@ This version of the operator has been available since version 1 of the 'com.micr
 </dl>
 
 
+### <a name="com.microsoft.RotaryEmbedding"></a><a name="com.microsoft.rotaryembedding">**com.microsoft.RotaryEmbedding**</a>
+
+  RotaryEmbedding is the implementation of rotary positional embeddings (RoPE). The positions are represented as rotation matrices
+  that are multiplied to query and key before the inner product of query and key is taken.
+
+#### Version
+
+This version of the operator has been available since version 1 of the 'com.microsoft' operator set.
+
+#### Attributes
+
+<dl>
+<dt><tt>interleaved</tt> : int</dt>
+<dd>Rotate using interleaved pattern. Default value is 0 (False).</dd>
+<dt><tt>scale</tt> : float</dt>
+<dd>Custom scale will be used if specified. Default value is 1.0</dd>
+</dl>
+
+#### Inputs
+
+<dl>
+<dt><tt>input</tt> : T</dt>
+<dd>3D tensor with shape (batch_size, sequence_length, hidden_size) or 4D with shape (batch_size, num_heads, sequence_length, head_size)</dd>
+<dt><tt>position_ids</tt> : M</dt>
+<dd>1D tensor with shape (1) or 2D tensor with shape (batch_size, sequence_length)</dd>
+<dt><tt>cos_cache</tt> : T</dt>
+<dd>2D tensor with shape (max_sequence_length, head_size / 2).</dd>
+<dt><tt>sin_cache</tt> : T</dt>
+<dd>2D tensor with shape (max_sequence_length, head_size / 2).</dd>
+</dl>
+
+#### Outputs
+
+<dl>
+<dt><tt>output</tt> : T</dt>
+<dd>tensor with same shape as input.</dd>
+</dl>
+
+#### Type Constraints
+
+<dl>
+<dt><tt>T</tt> : tensor(float), tensor(float16)</dt>
+<dd>Constrain input and output types to float tensors.</dd>
+<dt><tt>M</tt> : tensor(int64)</dt>
+<dd>Constrain input and output types to integer tensors</dd>
+</dl>
+
+
 ### <a name="com.microsoft.SampleOp"></a><a name="com.microsoft.sampleop">**com.microsoft.SampleOp**</a>
 
   Sample echo operator.
@@ -4683,6 +5164,72 @@ This version of the operator has been available since version 1 of the 'com.micr
 </dl>
 
 
+### <a name="com.microsoft.SkipGroupNorm"></a><a name="com.microsoft.skipgroupnorm">**com.microsoft.SkipGroupNorm**</a>
+
+  This operator element-wise adds x, skip and bias, then apply group normalization and optional activation.
+  
+  This operator transforms input according to
+    s = x + skip + bias
+    y = gamma * (s - mean) / sqrt(variance + epsilon) + beta
+  
+  The input channels are separated into num_groups groups, each containing num_channels / num_groups channels.
+  The num_channels must be divisible by num_groups.
+  The mean and standard-deviation of s are calculated separately over the each group.
+  The weight and bias are per-channel affine transform parameter vectors of size num_channels.
+  
+  The activation attribute can be used to enable activation after group normalization.
+
+#### Version
+
+This version of the operator has been available since version 1 of the 'com.microsoft' operator set.
+
+#### Attributes
+
+<dl>
+<dt><tt>activation</tt> : int (required)</dt>
+<dd>Activation after group normalization: 0 for None, 1 for SiLU</dd>
+<dt><tt>channels_last</tt> : int</dt>
+<dd>1 if the input and output are in the NHWC layout, 0 if it is in the NCHW layout. Defaults to 1.</dd>
+<dt><tt>epsilon</tt> : float</dt>
+<dd>The epsilon value to use to avoid division by zero</dd>
+<dt><tt>groups</tt> : int (required)</dt>
+<dd>The number of groups of channels. It should be a divisor of the number of channels C</dd>
+</dl>
+
+#### Inputs (4 - 5)
+
+<dl>
+<dt><tt>X</tt> : T</dt>
+<dd>Input data tensor. Dimensions are (N x H x W x C) when channels_last is 1  or (N x C x H x W) otherwise, where N is the batch size, C is the number of channels, and H and W are the height and width of the data</dd>
+<dt><tt>gamma</tt> : M</dt>
+<dd>1D gamma tensor for normalization with shape (C), where C is number of channels</dd>
+<dt><tt>beta</tt> : M</dt>
+<dd>1D beta tensor for normalization with shape (C), where C is number of channels</dd>
+<dt><tt>skip</tt> : T</dt>
+<dd>4D or 2D skip tensor. The shape can be (N x H x W x C) or (N x 1 x 1 x C) or (N x C)</dd>
+<dt><tt>bias</tt> (optional) : T</dt>
+<dd>1D bias tensor. Dimensions are (C), where C is number of channels</dd>
+</dl>
+
+#### Outputs (1 - 2)
+
+<dl>
+<dt><tt>Y</tt> : T</dt>
+<dd>The output tensor of the same shape as X</dd>
+<dt><tt>S</tt> (optional) : T</dt>
+<dd>The element-wise sum of input x, skip and bias tensors. It has the same shape as X</dd>
+</dl>
+
+#### Type Constraints
+
+<dl>
+<dt><tt>T</tt> : tensor(float16), tensor(float)</dt>
+<dd>Constrain input X, skip, bias and output Y, S types to float tensors.</dd>
+<dt><tt>M</tt> : tensor(float16), tensor(float)</dt>
+<dd>Constrain gamma and beta to float tensors.</dd>
+</dl>
+
+
 ### <a name="com.microsoft.SkipLayerNormalization"></a><a name="com.microsoft.skiplayernormalization">**com.microsoft.SkipLayerNormalization**</a>
 
   Skip and Layer Normalization Fusion
@@ -5078,6 +5625,47 @@ This version of the operator has been available since version 1 of the 'com.micr
 </dl>
 
 
+### <a name="com.microsoft.UnfoldTensor"></a><a name="com.microsoft.unfoldtensor">**com.microsoft.UnfoldTensor**</a>
+
+  Returns a tensor which contains all slices of size size from input tensor in the dimension dim. Step between two slices is given by step. If sizedim is the size of dimension dim for input tensor, the size of dimension dim in the returned tensor will be (sizedim - size) / step + 1. An additional dimension of size size is appended in the returned tensor.
+
+#### Version
+
+This version of the operator has been available since version 1 of the 'com.microsoft' operator set.
+
+#### Attributes
+
+<dl>
+<dt><tt>dim</tt> : int</dt>
+<dd>specify the dimension to unfold</dd>
+<dt><tt>size</tt> : int (required)</dt>
+<dd>specify the size</dd>
+<dt><tt>step</tt> : int</dt>
+<dd>specify the step.</dd>
+</dl>
+
+#### Inputs
+
+<dl>
+<dt><tt>input</tt> : T</dt>
+<dd>input tensor</dd>
+</dl>
+
+#### Outputs
+
+<dl>
+<dt><tt>output</tt> : T</dt>
+<dd>Output tensor.</dd>
+</dl>
+
+#### Type Constraints
+
+<dl>
+<dt><tt>T</tt> : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8), tensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16), tensor(float), tensor(double), tensor(string), tensor(bool), tensor(complex64), tensor(complex128)</dt>
+<dd>Allow inputs and outputs to be any kind of tensor.</dd>
+</dl>
+
+
 ### <a name="com.microsoft.Unique"></a><a name="com.microsoft.unique">**com.microsoft.Unique**</a>
 
   Finds all the unique values (deduped list) present in the given input tensor.
@@ -5124,6 +5712,107 @@ This version of the operator has been available since version 1 of the 'com.micr
 </dl>
 
 
+### <a name="com.microsoft.WhisperBeamSearch"></a><a name="com.microsoft.whisperbeamsearch">**com.microsoft.WhisperBeamSearch**</a>
+
+  Beam Search for whisper model, especiall with cross_qk features etc.
+
+#### Version
+
+This version of the operator has been available since version 1 of the 'com.microsoft' operator set.
+
+#### Attributes
+
+<dl>
+<dt><tt>decoder</tt> : graph (required)</dt>
+<dd>Decoder subgraph to execute in a loop.</dd>
+<dt><tt>decoder_output_cross_qk</tt> : int</dt>
+<dd>If nozero, decoder subgraph contains output Q*K from cross attentions. Default 0.</dd>
+<dt><tt>decoder_start_token_id</tt> : int</dt>
+<dd>The id of the token that indicates decoding starts.</dd>
+<dt><tt>early_stopping</tt> : int</dt>
+<dd>early stop or not</dd>
+<dt><tt>encoder</tt> : graph</dt>
+<dd>The subgraph for initialization of encoder and decoder. It will be called once before decoder subgraph.</dd>
+<dt><tt>eos_token_id</tt> : int (required)</dt>
+<dd>The id of the end-of-sequence token</dd>
+<dt><tt>init_decoder</tt> : graph</dt>
+<dd>The subgraph for the first decoding run. It will be called once before `decoder` subgraph. This is relevant only for the GPT2 model. If this attribute is missing, the `decoder` subgraph will be used for all decoding runs</dd>
+<dt><tt>model_type</tt> : int</dt>
+<dd>Must be 2 for whisper</dd>
+<dt><tt>no_repeat_ngram_size</tt> : int</dt>
+<dd>no repeat ngrams size</dd>
+<dt><tt>no_speech_token</tt> : int</dt>
+<dd>The token in whisper model that marks all sequence empty. With this model, whisper could output no_speech_prob after. Default -1.</dd>
+<dt><tt>pad_token_id</tt> : int (required)</dt>
+<dd>The id of the padding token</dd>
+<dt><tt>vocab_size</tt> : int</dt>
+<dd>Size of the vocabulary. If not provided, it will be inferred from the decoder subgraph's output shape</dd>
+</dl>
+
+#### Inputs (5 - 14)
+
+<dl>
+<dt><tt>input_ids</tt> : F</dt>
+<dd>The sequence used as a prompt for the generation in the encoder subgraph. Shape is (batch_size, sequence_length)</dd>
+<dt><tt>max_length</tt> : I</dt>
+<dd>The maximum length of the sequence to be generated. Shape is (1)</dd>
+<dt><tt>min_length</tt> (optional) : I</dt>
+<dd>The minimum length below which the score of eos_token_id is set to -Inf. Shape is (1)</dd>
+<dt><tt>num_beams</tt> : I</dt>
+<dd>Number of beams for beam search. 1 means no beam search. Shape is (1)</dd>
+<dt><tt>num_return_sequences</tt> : I</dt>
+<dd>The number of returned sequences in the batch. Shape is (1)</dd>
+<dt><tt>length_penalty</tt> (optional) : T</dt>
+<dd>Exponential penalty to the length. Default value 1.0 means no penalty.Value > 1.0 encourages longer sequences, while values < 1.0 produces shorter sequences.Shape is (1,)</dd>
+<dt><tt>repetition_penalty</tt> (optional) : T</dt>
+<dd>The parameter for repetition penalty. Default value 1.0 means no penalty. Accepts value > 0.0. Shape is (1)</dd>
+<dt><tt>vocab_mask</tt> (optional) : M</dt>
+<dd>Mask of vocabulary. Words that masked with 0 are not allowed to be generated, and 1 is allowed. Shape is (vacab_size)</dd>
+<dt><tt>prefix_vocab_mask</tt> (optional) : M</dt>
+<dd>Mask of vocabulary for first step. Words that masked with 0 are not allowed to be generated, and 1 is allowed. Shape is (batch_size, vocab_size)</dd>
+<dt><tt>attention_mask</tt> (optional) : I</dt>
+<dd>Custom attention mask. Shape is (batch_size, sequence_length)</dd>
+<dt><tt>decoder_input_ids</tt> (optional) : I</dt>
+<dd>The forced input id sequence for the decoder subgraph. Shape is (batch_size, initial_sequence_length)</dd>
+<dt><tt>logits_processor</tt> (optional) : I</dt>
+<dd>Specific logits processor for different types of beamsearch models. Default value 0 means no specific logit processor. Accepts value >= 0. Shape is (1)</dd>
+<dt><tt>cross_qk_layer_head</tt> (optional) : I</dt>
+<dd>Only keep this list of (layer, head) of QK in the final cross_qk output when use_cross_qk is set. Default collect allits shape is (number of (layer, head) to keep, 2), i.e., [[layer_id1, head_id1], [layer_id2, head_id2]......]</dd>
+<dt><tt>extra_decoding_ids</tt> (optional) : I</dt>
+<dd>Part of the decoder_input_ids that we need cross qk for it. it is of shape  (batch_size, extra_decoding_ids_len).In such case, we should remove this from the tail of the decoder_input_ids, and put it here. ids < 0 in it (for multiple batch) are treated as stop of the extra_decoding_ids for corresponding batch.</dd>
+</dl>
+
+#### Outputs (1 - 5)
+
+<dl>
+<dt><tt>sequences</tt> : I</dt>
+<dd>Word IDs of generated sequences. Shape is (batch_size, num_return_sequences, max_sequence_length)</dd>
+<dt><tt>sequences_scores</tt> (optional) : T</dt>
+<dd>Final beam score of the generated sequences. Shape is (batch_size, num_return_sequences)</dd>
+<dt><tt>scores</tt> (optional) : T</dt>
+<dd>Processed beam scores for each vocabulary token at each generation step.Beam scores consisting of log softmax scores for each vocabulary token and sum of log softmax of previously generated tokens in this beam.Shape is (max_length - sequence_length, batch_size, num_beams, vocab_size)</dd>
+<dt><tt>cross_qk</tt> (optional) : V</dt>
+<dd>Output the accumulated stacked Q*K in cross attentions. Let H = number of Head of cross attention, F = the frames or kv-seq-len of the cross attention input, T = real decoded token length, L = number of layers,B = batch size, R = num_return_sequences. It then should return tensor of shape [B, R, L*H, T, F].If cross_qk_layer_head is given, shape is [B, R, cross_qk_layer_head.shape[0], T, F]</dd>
+<dt><tt>non_speech_probs</tt> (optional) : T</dt>
+<dd>For whisper model, output the probabilities from logits after encoder and context decoding for the no_speech_token.Currently we treat the last token's logits is what we need, in future extra graph logic may be add to the encoder/context-decoder subgraph.The prob is save before logits may be updated by extra-decoding-ids. The shape of non_speech_probs is [B]</dd>
+</dl>
+
+#### Type Constraints
+
+<dl>
+<dt><tt>T</tt> : tensor(float), tensor(float16)</dt>
+<dd>Constrain to float tensors.</dd>
+<dt><tt>F</tt> : tensor(float), tensor(int32), tensor(float16)</dt>
+<dd>Constrain input type to float or int tensors.</dd>
+<dt><tt>I</tt> : tensor(int32)</dt>
+<dd>Constrain to integer types</dd>
+<dt><tt>M</tt> : tensor(int32)</dt>
+<dd>Constrain mask to integer types</dd>
+<dt><tt>V</tt> : tensor(float)</dt>
+<dd>Constrain cross_qk to float32 tensors.</dd>
+</dl>
+
+
 ### <a name="com.microsoft.WordConvEmbedding"></a><a name="com.microsoft.wordconvembedding">**com.microsoft.WordConvEmbedding**</a>
 
   The WordConvEmbedding takes in a batch of sequence words and embed each word to a vector.
diff --git a/docs/FAQ.md b/docs/FAQ.md
index e039f4e4e4160..70bedbd02e944 100644
--- a/docs/FAQ.md
+++ b/docs/FAQ.md
@@ -2,13 +2,13 @@
 Here are some commonly raised questions from users of ONNX Runtime and brought up in [Issues](https://github.com/microsoft/onnxruntime/issues).
 
 ## Do the GPU builds support quantized models?
-The default CUDA build supports 3 standard quantization operators: QuantizeLinear, DequantizeLinear, and MatMulInteger. The TensorRT EP has limited support for INT8 quantized ops. In general, support of quantized models through ORT is continuing to expand on a model-driven basis. For performance improvements, quantization is not always required, and we suggest trying alternative strategies to [performance tune](./ONNX_Runtime_Perf_Tuning.md) before determining that quantization is necessary.
+The default CUDA build supports 3 standard quantization operators: QuantizeLinear, DequantizeLinear, and MatMulInteger. The TensorRT EP has limited support for INT8 quantized ops. In general, support of quantized models through ORT is continuing to expand on a model-driven basis. For performance improvements, quantization is not always required, and we suggest trying alternative strategies to [performance tune](https://onnxruntime.ai/docs/performance/tune-performance/) before determining that quantization is necessary.
 
 ## How do I change the severity level of the default logger to something other than the default (WARNING)?
 Setting the severity level to VERBOSE is most useful when debugging errors.
 
 Refer to the API documentation:
-* Python - [RunOptions.log_severity_level](https://microsoft.github.io/onnxruntime/python/api_summary.html#onnxruntime.RunOptions.log_severity_level)
+* Python - [RunOptions.log_severity_level](https://onnxruntime.ai/docs/api/python/api_summary.html#onnxruntime.RunOptions.log_severity_level)
 ```
 import onnxruntime as ort
 ort.set_default_logger_severity(0)
diff --git a/docs/Memory_Optimizer.md b/docs/Memory_Optimizer.md
index 3ef3a575f20a1..0147a937db81d 100644
--- a/docs/Memory_Optimizer.md
+++ b/docs/Memory_Optimizer.md
@@ -22,68 +22,113 @@ Not all models and recipes need this optimizer technique. Imagine if your traini
 1. Make sure ONNX Runtime training wheel is installed and correctly configured.
 2. Integrate models using `ORTModule`, be noted log_level should be equal or lower than INFO.
 	> ort_model = ORTModule(pt_model, DebugOptions(log_level=LogLevel.INFO))
-3. Run the training as usual and redirect all outputs into log file; then stop it after training few steps.
-4. Check the logging file, search "Summary", you could possibly find something like this:
+3. Run the training as usual; then stop it after training few steps.
+4. Check the logs, you could find something like this:
 	```
-	MemoryOptimizer Summary:
-	User config:
-
-	=================================
-	########Recompute########
-	Subgraph: CumSum+Sub+Mul+Unsqueeze+Cast+Mul+Cast+Reshape+Mul+FusedMatMul+Add+Reshape+Cast+Where+Softmax+
-		OptimizationType: Disabled
-		Patterns:
-		PatternShape:input_ids_dim0 x 16 x input_ids_dim1 x input_ids_dim1 x  Frequency:23
-	--------------------------------
-	Subgraph: FastGelu+
-		OptimizationType: Disabled
-		Patterns:
-		PatternShape:input_ids_dim0 x input_ids_dim1 x 4096 x   Frequency:24
-	=================================
-	########RecomputeWithCompromise########
-	Subgraph: Cast+Where+Softmax+
-		OptimizationType: Disabled
-		Patterns:
-		PatternShape:input_ids_dim0 x 16 x input_ids_dim1 x input_ids_dim1 x  Frequency:24
-	--------------------------------
-	=================================
+	Memory Optimizer     :   OFF   :   Enable with env ORTMODULE_MEMORY_OPT_CONFIG=<config>, available configs:
+	                                   Config                                                      Freq    Max Saving(B)   Saving Symbolic(Bytes)
+	- Plan 1             :   OFF   :   Reshape+Where+BiasSoftmax+:1:-1                             5       671,088,640     640.0*inputs_input_ids_dim0*inputs_input_ids_dim1**2
+	- Plan 2             :   OFF   :   Cast+:1:-1                                                  6       402,587,648     inputs_input_ids_dim0*inputs_input_ids_dim1*(384.0*inputs_input_ids_dim1 - 64.0)
+	- Plan 3             :   OFF   :   Reshape+Where+:1:-1                                         1       134,217,728     128.0*inputs_input_ids_dim0*inputs_input_ids_dim1**2
+	- Plan 4             :   OFF   :   BiasSoftmax+:1:-1                                           1       134,086,656     128.0*inputs_input_ids_dim0*inputs_input_ids_dim1*(inputs_input_ids_dim1 - 1)
+	- Plan 5             :   OFF   :   BiasGelu+:1:-1                                              6       125,808,640     inputs_input_ids_dim0*(122880.0*inputs_input_ids_dim1 - 20480.0)
+	- Plan 6             :   OFF   :   FusedMatMul+:1:-1                                           6       125,808,640     inputs_input_ids_dim0*(122880.0*inputs_input_ids_dim1 - 20480.0)
+	- Plan 7             :   OFF   :   FusedMatMul+Add+FusedMatMul+Add+Add+Add+:1:-1               5       26,214,400      25600.0*inputs_input_ids_dim0*inputs_input_ids_dim1
+	- Plan 8             :   OFF   :   Add+:1:-1                                                   1       5,237,760       5120.0*inputs_input_ids_dim0*(inputs_input_ids_dim1 - 1)
+	- Plan 9             :   OFF   :   Reshape+Unsqueeze+Unsqueeze+Cast+Sub+Mul+Cast+:1:-1         1       4,096           4.0*inputs_input_ids_dim0*inputs_input_ids_dim1
+	- Plan 10            :   OFF   :   Cast+:2:-1                                                  1       2,048           2.0*inputs_input_ids_dim0*inputs_input_ids_dim1
+
+
+	Note 1: use comma as delimiter to enable multiple memory optimization plans at the same time:
+	export ORTMODULE_MEMORY_OPT_CONFIG=<plan1 config>,<plan2 config>,...
+	Note 2: memory saving is calculated based on the 1st batch symbolic dim values:
+	inputs_input_ids_dim0=1,  inputs_input_ids_dim1=1024,  inputs_attention_mask_dim0=1,  inputs_attention_mask_dim1=1024,  inputs_labels_dim0=1,  inputs_labels_dim1=1024,
 	```
-5. As shown above, 'Subgraph' shows 1) a string representative for a re-computable subgraph; and 2) current status of memory optimization. All are disabled for recompute in this case.
-6. Set environment variable `ORTMODULE_MEMORY_OPT_CONFIG` to enable some of the subgraph to do recompute. In below example, 12 FastGelu related subgraphs are allowed to recompute.
-`FastGelu+` is the subgraph string representative; `1` in the middle indicates 'Recompute' is enabled (0, on the contrary indicates it's disabled); `12` means the initial 12 subgraph occurrences will be recomputed, all others are left as it is, filling `-1` will make all occurrences be recomputed.
+5. As shown above, `Config` is a string representative for a re-computable subgraph. All are disabled for recompute in this case.
+6. Set environment variable `ORTMODULE_MEMORY_OPT_CONFIG` to enable some of the subgraph to do recompute. In below example, `6` `BiasGelu+` related subgraphs are allowed to recompute.
+`BiasGelu+` is the subgraph string representative; `1` in the middle indicates 'Recompute' is enabled (0, on the contrary indicates it's disabled); `6` means the initial 6 subgraph occurrences will be recomputed, all others are left as it is, filling `-1` will make all occurrences be recomputed.
 	```
-	export ORTMODULE_MEMORY_OPT_CONFIG="FastGelu+:1:12"
+	export ORTMODULE_MEMORY_OPT_CONFIG="BiasGelu+:1:6" # Use comma as separator for enabling more than one subgraphs.
 	```
-7. Then run the training again, you will see logs like this:
+7. Then run the training again, and you will see logs like this:
 	```
-	MemoryOptimizer Summary:
-	User config:
-	**FastGelu+:1:12**
-	=================================
-	########Recompute########
-	Subgraph: CumSum+Sub+Mul+Unsqueeze+Cast+Mul+Cast+Reshape+Mul+FusedMatMul+Add+Reshape+Cast+Where+Softmax+
-		OptimizationType: Disabled
-		Patterns:
-		PatternShape:input_ids_dim0 x 16 x input_ids_dim1 x input_ids_dim1 x  Frequency:23
-	--------------------------------
-	Subgraph: FastGelu+
-		OptimizationType: **Recompute (requested_count=12, actual applied_count=12)**
-		Patterns:
-		PatternShape:input_ids_dim0 x input_ids_dim1 x 4096 x   Frequency:24
-	=================================
-	########RecomputeWithCompromise########
-	Subgraph: Cast+Where+Softmax+
-		OptimizationType: Disabled
-		Patterns:
-		PatternShape:input_ids_dim0 x 16 x input_ids_dim1 x input_ids_dim1 x  Frequency:24
-	--------------------------------
-	=================================
+	Memory Optimizer     :   ON    :   User config: Reshape+Where+BiasSoftmax+:1:-1, probe level: 1, available configs:
+	                                   Config                                                      Freq    Max Saving(B)   Saving Symbolic(Bytes)
+	- Plan 1             :   OFF   :   Reshape+Where+BiasSoftmax+:1:-1                             5       671,088,640     640.0*inputs_input_ids_dim0*inputs_input_ids_dim1**2
+	- Plan 2             :   OFF   :   Cast+:1:-1                                                  6       402,587,648     inputs_input_ids_dim0*inputs_input_ids_dim1*(384.0*inputs_input_ids_dim1 - 64.0)
+	- Plan 3             :   OFF   :   Reshape+Where+:1:-1                                         1       134,217,728     128.0*inputs_input_ids_dim0*inputs_input_ids_dim1**2
+	- Plan 4             :   OFF   :   BiasSoftmax+:1:-1                                           1       134,086,656     128.0*inputs_input_ids_dim0*inputs_input_ids_dim1*(inputs_input_ids_dim1 - 1)
+	- Plan 5             :   ON    :   BiasGelu+:1:-1                                              6       125,808,640     inputs_input_ids_dim0*(122880.0*inputs_input_ids_dim1 - 20480.0)
+	- Plan 6             :   OFF   :   FusedMatMul+:1:-1                                           6       125,808,640     inputs_input_ids_dim0*(122880.0*inputs_input_ids_dim1 - 20480.0)
+	- Plan 7             :   OFF   :   FusedMatMul+Add+FusedMatMul+Add+Add+Add+:1:-1               5       26,214,400      25600.0*inputs_input_ids_dim0*inputs_input_ids_dim1
+	- Plan 8             :   OFF   :   Add+:1:-1                                                   1       5,237,760       5120.0*inputs_input_ids_dim0*(inputs_input_ids_dim1 - 1)
+	- Plan 9             :   OFF   :   Reshape+Unsqueeze+Unsqueeze+Cast+Sub+Mul+Cast+:1:-1         1       4,096           4.0*inputs_input_ids_dim0*inputs_input_ids_dim1
+	- Plan 10            :   OFF   :   Cast+:2:-1                                                  1       2,048           2.0*inputs_input_ids_dim0*inputs_input_ids_dim1
 	```
 8. You may need iterate few times on step 6 and 7 until you find a good config for this model to run a bigger batch size. Or you may fail to find if memory optimization does not apply to the model well.
 
+## Optimization Configuration
+
+The basic optimization unit is represented with a unique `cluster id`, for example `BiasGelu+` is one `cluster id`.
+Following `cluster id` is the `optimization strategy`: 0 - none, 1 - recompute, 2 - recompute with compromised memory saving.
+Following `optimization strategy` is the `request count` to apply the given optimization. Using `-1` to apply all. This would give user a bit more flexibility to avoid unnecessary memory saving.
+
 ## Compromised Recompute
 
-If you check the above logs, there is a separate section called "RecomputeWithCompromise". Recompute the subgraphs under it usually will save part of the activation (for example half of them), not all of them. Follow the same way to enable it.
+If you check the above logs, there is a config `Cast+:2:-1`, `2` indicates it's a recomputation than can save part of the stashed activation size, not all. Recompute the subgraphs under it usually will save part of the activation (for example half of them), not all of them. Follow the same way to enable it.
+
+## Memory Optimization Debug Infos
+
+Using following log level
+> ort_model = ORTModule(pt_model, DebugOptions(log_level=LogLevel.DEVINFO))
+
+Besides the logs shown in `LogLevel.INFO`, you can also see different node patterns that can apply different optimization options.
+
+The way we get the table:
+- For a specific node, it might has different optimization options, we [generates](../orttraining/orttraining/core/optimizer/memory_optimizer/common.h#L124C26-L124C26) a hash (called `Node Cluster ID`) for the node according to all available optimization options.
+- Map all nodes having same `Node Cluster ID` in buckets, each bucket is displayed as one row.
+
+```
+MemoryInsight Summary - User config: not provided
+===========================================================================================================================================
+|Freq   | Memory Optimization Opportunities (Clustered by node-level activation patterns)                                                 |
+|_ _ _ _|_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _|
+|6      |For each row options are mutually exclusive, only one of them can be enabled.                                                    |
+|       |                                                                                                                                 |
+|       |>>Option 1     : Recompute subgraph FusedMatMul+Add+Reshape+                                                                     |
+|       |  Status       : Disabled. Enable with export ORTMODULE_MEMORY_OPT_CONFIG=FusedMatMul+Add+Reshape+:1:-1                          |
+|       |  Stashed Activations:                                                                                                           |
+|       |   - ReuseFreq :  Output 0(6),                                                                                                   |
+|       |   - Output 0  : [((inputs_input_ids_dim0)*(inputs_input_ids_dim1)*(32)*(240))], byte/elem: 2, 100% saved                        |
+|_ _ _ _|_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _|
+|5      |For each row options are mutually exclusive, only one of them can be enabled.                                                    |
+|       |                                                                                                                                 |
+|       |>>Option 1     : Recompute subgraph FusedMatMul+                                                                                 |
+|       |  Status       : Disabled. Enable with export ORTMODULE_MEMORY_OPT_CONFIG=FusedMatMul+:1:-1                                      |
+|       |  Stashed Activations:                                                                                                           |
+|       |   - Output 0  : [((inputs_input_ids_dim0)*(inputs_input_ids_dim1)*(10240))], byte/elem: 2, 100% saved                           |
+|_ _ _ _|_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _|
+|5      |For each row options are mutually exclusive, only one of them can be enabled.                                                    |
+|       |                                                                                                                                 |
+|       |>>Option 1     : Recompute subgraph Cast+                                                                                        |
+|       |  Status       : Disabled. Enable with export ORTMODULE_MEMORY_OPT_CONFIG=Cast+:1:-1                                             |
+|       |  Stashed Activations:                                                                                                           |
+|       |   - Output 0  : [((inputs_input_ids_dim0)*(32)*(inputs_input_ids_dim1)*(inputs_input_ids_dim1))], byte/elem: 2, 100% saved      |
+|_ _ _ _|_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _|
+|1      |For each row options are mutually exclusive, only one of them can be enabled.                                                    |
+|       |                                                                                                                                 |
+|       |>>Option 1     : Recompute subgraph Reshape+Unsqueeze+Unsqueeze+Cast+Sub+Mul+Cast+                                               |
+|       |  Status       : Disabled. Enable with export ORTMODULE_MEMORY_OPT_CONFIG=Reshape+Unsqueeze+Unsqueeze+Cast+Sub+Mul+Cast+:1:-1    |
+|       |  Stashed Activations:                                                                                                           |
+|       |   - Output 0  : [((inputs_input_ids_dim0)*(1)*(1)*(inputs_input_ids_dim1))], byte/elem: 4, 100% saved                           |
+|       |                                                                                                                                 |
+|       |>>Option 2     : RecomputeWithCompromise subgraph Cast+                                                                          |
+|       |  Status       : Disabled. Enable with export ORTMODULE_MEMORY_OPT_CONFIG=Cast+:2:-1                                             |
+|       |  Stashed Activations:                                                                                                           |
+|       |   - Output 0  : [((inputs_input_ids_dim0)*(1)*(1)*(inputs_input_ids_dim1))], byte/elem: 4, 50% saved                            |
+|_ _ _ _|_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _|
+
+```
 
 ## Notes
 
diff --git a/docs/ORTModule_PythonOp_Notes.md b/docs/ORTModule_PythonOp_Notes.md
new file mode 100644
index 0000000000000..1bb549f8fab9d
--- /dev/null
+++ b/docs/ORTModule_PythonOp_Notes.md
@@ -0,0 +1,156 @@
+# ORTModule Custom Autograd Function Support
+
+## What is autograd Functions?
+
+`PyTorch` allows users to define customized operators (for its forward and backward implementations) [PyTorch: Defining New autograd Functions](https://github.com/pytorch/tutorials/blob/d98606855d3c8c5bd78d55b95717be5a02960363/beginner_source/examples_autograd/polynomial_custom_function.py#L25).
+
+There are many such use cases as more optimized deep learning projects keep growing, here we just name a few:
+- [NVIDIA/apex](https://github.com/NVIDIA/apex/blob/58acf96915eecd7e13adff61d2c389fba3efede2/apex/transformer/functional/fused_softmax.py#L21)
+- [NVIDIA/Megatron-LM](https://github.com/NVIDIA/Megatron-LM/blob/f7727433293427bef04858f67b2889fe9b177d88/megatron/core/tensor_parallel/mappings.py#L220C31-L220C31)
+- [Dao-AILab/flash-attention](https://github.com/Dao-AILab/flash-attention/blob/3a9fe7b0faaa9d648394026c9c20231c07bf999d/flash_attn/flash_attn_interface.py#L429),
+- [openai/triton](https://github.com/openai/triton/blob/424e67e7275f0cb2cd231e7a4d17ff8570530b77/python/tutorials/06-fused-attention.py#L457)
+- ...
+
+Those operators are used in training/evaluation scenarios a lot, where is ORTModule capability overlaps.
+To best release ORTModule's acceleration power, we need tolerant and handle those customized operators
+from the to-onnx conversion, to backward graph building, and also its execution in runtime as a full lifecycle.
+
+## How ORTModule support autograd.Function?
+
+The way we have here is through introduced `PythonOp`/`PythonOpGrad` MS domain operators in `ONNX Runtime`,
+- Map autograd Function (`prim::PythonOp` in `PyTorch`) to `PythonOp` in `ONNX Runtime` during model export by [registering customized export function](https://github.com/microsoft/onnxruntime/blob/c2bd5b70b29eb3c687c5497696e7b0a1930604d3/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function.py#L69C16-L69C16)
+  ```
+    class ScalarAndTupleFunction(torch.autograd.Function):
+        @staticmethod
+        def forward(ctx, input, alpha, beta, gamma):
+            ctx.save_for_backward(input)
+            ctx.alpha = alpha
+            ctx.beta = beta
+            ctx.gamma = gamma
+            return alpha * beta[0] * beta[1] * gamma * input.clamp(min=0)
+
+        @staticmethod
+        def backward(ctx, grad_output):
+            input, = ctx.saved_tensors
+            alpha = ctx.alpha
+            beta = ctx.beta
+            gamma = ctx.gamma
+            grad_input = grad_output.clone()
+            grad_input[input < 0] = 0
+            return alpha * beta[0] * beta[1] * gamma * grad_input, None, None, None
+  ```
+  The example above shows a customized function taking 4 inputs (despite of ctx), the first input is a tensor [exporter treats it as input for `PythonOp`](https://github.com/microsoft/onnxruntime/blob/c2bd5b70b29eb3c687c5497696e7b0a1930604d3/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_exporter.py#L174),
+  the others are scalars, export function will convert all such non-tensor inputs to constant and [stores
+  in `PythonOp`'s attributes](https://github.com/microsoft/onnxruntime/blob/c2bd5b70b29eb3c687c5497696e7b0a1930604d3/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_exporter.py#L272). Things to be noted here: if the non-tensor
+  input is one of those types "bool scalar, int scalar, float scalar, bool tuple, int tuple, float tuple", they will be
+  stored in corresponding attributes; otherwise, they will be treated a `object` and the object address stored in `input_pointer_scalars` ([reference count will be increased](https://github.com/microsoft/onnxruntime/blob/c2bd5b70b29eb3c687c5497696e7b0a1930604d3/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_exporter.py#L250C27-L250C27) also to make sure it exists during model run).
+- [PythonOp kernel](https://github.com/microsoft/onnxruntime/blob/c2bd5b70b29eb3c687c5497696e7b0a1930604d3/orttraining/orttraining/training_ops/cuda/torch/torch_custom_function_kernel.cc#L38) is responsible to run the `forward` interface user defined through [forward runner](https://github.com/microsoft/onnxruntime/blob/c2bd5b70b29eb3c687c5497696e7b0a1930604d3/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_runner.py#L409).
+Similarly, [PythonOpGrad kernel](https://github.com/microsoft/onnxruntime/blob/c2bd5b70b29eb3c687c5497696e7b0a1930604d3/orttraining/orttraining/training_ops/cuda/torch/torch_custom_function_kernel.cc#L49) is responsible to run the `backward` interface user defined through [backward runner](https://github.com/microsoft/onnxruntime/blob/c2bd5b70b29eb3c687c5497696e7b0a1930604d3/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_runner.py#L554).
+
+Currently, for training python wheel, `PythonOp` support is by default enabled, users don't need to be aware of it. As long as the
+defined torch.autograd.Function is working in `PyTorch` run, it should be runnable with `ORTModule`. If you need to enable it or
+disable it explicitly, refer to the [wiki](https://github.com/microsoft/onnxruntime/blob/main/docs/ORTModule_Training_Guidelines.md#ortmodule_enable_custom_autograd).
+
+
+
+## Known Issues and Workaround
+
+PyTorch Versions
+- Minimum version 1.9 (introduced "Support registering custom export for `prim::PythonOp`` from torch.autograd.Function ([#55630](https://github.com/pytorch/pytorch/pull/55630)) ([#57600](https://github.com/pytorch/pytorch/pull/57600))")
+- If the static forward function has only one output, any version of Pytorch 1.9 is fine. Otherwise, a PyTorch version containing [this commit](https://github.com/pytorch/pytorch/commit/a55cae3d37e0f7852e391886c3904307caa4d06d) is required.
+- [Throw _Map_base::at Exception](https://github.com/pytorch/pytorch/issues/88286), export errors like this:
+  ```
+	RuntimeError: There was an error while exporting the PyTorch model to ONNX:
+
+	Traceback (most recent call last):
+	File "/opt/conda/envs/ptca/lib/python3.8/site-packages/onnxruntime/training/ortmodule/_utils.py", line 316, in get_exception_as_string
+		raise exception
+	File "/opt/conda/envs/ptca/lib/python3.8/site-packages/onnxruntime/training/ortmodule/_graph_execution_manager.py", line 425, in _get_exported_model
+		torch.onnx.export(
+	File "/opt/conda/envs/ptca/lib/python3.8/site-packages/torch/onnx/utils.py", line 506, in export
+		_export(
+	File "/opt/conda/envs/ptca/lib/python3.8/site-packages/torch/onnx/utils.py", line 1548, in _export
+		graph, params_dict, torch_out = _model_to_graph(
+	File "/opt/conda/envs/ptca/lib/python3.8/site-packages/torch/onnx/utils.py", line 1113, in _model_to_graph
+		graph, params, torch_out, module = _create_jit_graph(model, args)
+	File "/opt/conda/envs/ptca/lib/python3.8/site-packages/torch/onnx/utils.py", line 989, in _create_jit_graph
+		graph, torch_out = _trace_and_get_graph_from_model(model, args)
+	File "/opt/conda/envs/ptca/lib/python3.8/site-packages/torch/onnx/utils.py", line 893, in _trace_and_get_graph_from_model
+		trace_graph, torch_out, inputs_states = torch.jit._get_trace_graph(
+	File "/opt/conda/envs/ptca/lib/python3.8/site-packages/torch/jit/_trace.py", line 1268, in _get_trace_graph
+		outs = ONNXTracedModule(f, strict, _force_outplace, return_inputs, _return_inputs_states)(*args, **kwargs)
+	...
+	File "/opt/conda/envs/ptca/lib/python3.8/site-packages/deepspeed-0.9.5+95680ca-py3.8.egg/deepspeed/runtime/zero/parameter_offload.py", line 632, in _ort_post_forward_module_hook
+		a = ORTPostForwardwardFunction.apply(module, _post_forward_module_hook, _ort_run_before_backward_function, len(input), len(output), *input_and_output)
+	File "/opt/conda/envs/ptca/lib/python3.8/site-packages/torch/autograd/function.py", line 506, in apply
+		return super().apply(*args, **kwargs)  # type: ignore[misc]
+	RuntimeError: _Map_base::at
+  ```
+  Resolution: upgrade `PyTorch` to new versions containing [this commit](https://github.com/thiagocrepaldi/pytorch/commit/3d3da109e3afa617c513e78aa999f5a1f44ffbce), when export param `autograd_inlining` is [set to false](https://github.com/microsoft/onnxruntime/blob/0e2782438a65b97919f15af14d2a4ada361157b6/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager.py#L387C26-L387C26) to skip this error.
+- "Tried to trace <__torch__.torch.classes.c10d.ProcessGroup object at 0x2969c520> but it is not part of the active trace"
+   This usually happens when torch.autograd.Function's forward function used `PyTorch` collective calls and pass the group explicitly.
+  ```
+	RuntimeError: There was an error while exporting the PyTorch model to ONNX:
+
+	Traceback (most recent call last):
+	File "/bert_ort/pengwa/py3.8/lib/python3.8/site-packages/onnxruntime/training/ortmodule/_utils.py", line 324, in get_exception_as_string
+		raise exception
+	File "/bert_ort/pengwa/py3.8/lib/python3.8/site-packages/onnxruntime/training/ortmodule/_graph_execution_manager.py", line 342, in _get_exported_model
+		torch.onnx.export(
+	File "/bert_ort/pengwa/py3.8/lib/python3.8/site-packages/torch/onnx/utils.py", line 507, in export
+		_export(
+	File "/bert_ort/pengwa/py3.8/lib/python3.8/site-packages/torch/onnx/utils.py", line 1567, in _export
+		graph, params_dict, torch_out = _model_to_graph(
+	File "/bert_ort/pengwa/py3.8/lib/python3.8/site-packages/torch/onnx/utils.py", line 1124, in _model_to_graph
+		graph, params, torch_out, module = _create_jit_graph(model, args)
+	File "/bert_ort/pengwa/py3.8/lib/python3.8/site-packages/torch/onnx/utils.py", line 1000, in _create_jit_graph
+		graph, torch_out = _trace_and_get_graph_from_model(model, args)
+	File "/bert_ort/pengwa/py3.8/lib/python3.8/site-packages/torch/onnx/utils.py", line 904, in _trace_and_get_graph_from_model
+		trace_graph, torch_out, inputs_states = torch.jit._get_trace_graph(
+	File "/bert_ort/pengwa/py3.8/lib/python3.8/site-packages/torch/jit/_trace.py", line 1269, in _get_trace_graph
+		outs = ONNXTracedModule(f, strict, _force_outplace, return_inputs, _return_inputs_states)(*args, **kwargs)
+	File "/bert_ort/pengwa/py3.8/lib/python3.8/site-packages/torch/jit/_trace.py", line 128, in forward
+		graph, out = torch._C._create_graph_by_tracing(
+    ...
+	File "/bert_ort/pengwa/deepspeed/deepspeed/runtime/zero/parameter_offload.py", line 640, in _ort_pre_forward_module_hook
+		rets = ORTPreForwardwardFunction.apply(self, module, _ort_run_after_backward_function, *inputs)
+	...
+	File "/bert_ort/pengwa/deepspeed/deepspeed/runtime/zero/parameter_offload.py", line 823, in pre_sub_module_forward_function
+		param_coordinator.fetch_sub_module(sub_module, forward=True)
+	...
+	File "/bert_ort/pengwa/py3.8/lib/python3.8/site-packages/torch/distributed/distributed_c10d.py", line 2841, in all_gather_into_tensor
+		work = group._allgather_base(output_tensor, input_tensor)
+	RuntimeError: Tried to trace <__torch__.torch.classes.c10d.ProcessGroup object at 0x56250ad114a0> but it is not part of the active trace. Modules that are called during a trace must be registered as submodules of the thing being traced.
+  ```
+  Resolution: modify the autograd.Function, to skip the run the collection operator during onnx export, here is an example.
+  ```python
+    # Pre
+	def allgather_fn(output_tensor, input_tensor, group=None, async_op=False, debug=get_caller_func()):
+		return torch.distributed.all_gather_into_tensor(output_tensor, input_tensor, group=group, async_op=async_op, debug=debug)
+
+	# Workaround
+	from typing import Any, List
+	class DummyWork(torch.distributed.distributed_c10d.Work):
+		def is_completed(self) -> bool:
+			return True
+		def is_success(self) -> bool:
+			return True
+		def exception(self) -> Any:
+			return None
+		def wait(self, timeout: timedelta = timedelta) -> bool:
+			return True
+		def source_rank(self) -> int:
+			return 0
+		def _source_rank(self) -> int:
+			return 0
+		def result(self) -> List[torch.Tensor]:
+			return []
+		def synchronize(self):
+			pass
+
+	def allgather_fn(output_tensor, input_tensor, group=None, async_op=False, debug=get_caller_func()):
+		if torch.onnx.is_in_onnx_export():
+			return DummyWork()
+
+		return torch.distributed.all_gather_into_tensor(output_tensor, input_tensor, group=group, async_op=async_op, debug=debug)
+  ```
diff --git a/docs/ORTModule_Training_Guidelines.md b/docs/ORTModule_Training_Guidelines.md
index 5350988b20964..7fa89cca381d9 100644
--- a/docs/ORTModule_Training_Guidelines.md
+++ b/docs/ORTModule_Training_Guidelines.md
@@ -49,6 +49,90 @@ More options for **developers**.
 ```
 Check [DebugOptions implementation](../orttraining/orttraining/python/training/ortmodule/options.py) for more details.
 
+#### Log Level Explanations
+
+<table>
+<tr>
+<th style="width:20%">Log Level</th>
+<th style="width:80%">Description</th>
+</tr>
+<tr>
+<td>
+
+`FATAL` | `ERROR` | `WARNING` (For Users)
+
+<sup>`WARNING` is the default and recommended level for
+<br>users.</sup>
+</td>
+<td>
+
+- ONNX Runtime backend log level - `FATAL` | `ERROR` | `WARNING`.
+- ORTModule log level - `FATAL` | `ERROR` | `WARNING`.
+- Rank-0 log filtering is `ON` (e.g. logging on rank-0-only).
+- PyTorch exporter export logs filtering is `ON`.
+- PyTorch exporter verbose logs (including tracing graph) filtering is `ON`.
+
+</td>
+</tr>
+<tr>
+<td>
+
+`INFO` (For Users | ORT Developers)
+
+<sup>`INFO` is used for collecting experimental
+<br>feature stats, or a little bit more error messages.</sup>
+</td>
+<td>
+
+- ONNX Runtime backend log level - `WARNING`.
+- ORTModule log level - `INFO`.
+- Rank-0 log filtering is `ON` (e.g. logging on rank-0-only).
+- PyTorch exporter export logs filtering is `ON`.
+- PyTorch exporter verbose logs (including tracing graph) filtering is `OFF`.
+
+</td>
+</tr>
+<tr>
+<td>
+
+`DEVINFO` (For ORT Developers)
+
+<sup>`DEVINFO` is the recommended level for
+<br>debugging purposes.</sup>
+</td>
+<td>
+
+- ONNX Runtime backend log level - `INFO`.
+- ORTModule log level - `INFO`.
+- Rank-0 log filtering is `OFF` (e.g. logging on all ranks).
+- PyTorch exporter export logs filtering is `OFF`.
+- PyTorch exporter verbose logs (including tracing graph) filtering is `OFF`.
+
+</td>
+</tr>
+
+<tr>
+<td>
+
+`VERBOSE` (For ORT Developers)
+
+<sup>`VERBOSE` is the last resort for debugging
+<br>hard problems.</sup>
+</td>
+<td>
+
+- ONNX Runtime backend log level - `VERBOSE`.
+- ORTModule log level - `VERBOSE`.
+- Rank-0 log filtering is `OFF` (e.g. logging on all ranks).
+- PyTorch exporter export logs filtering is `OFF`.
+- PyTorch exporter verbose logs (including tracing graph) filtering is `OFF`.
+
+</td>
+</tr>
+
+</table>
+
+
 ### 2.1 Environment Variables
 
 `ORTModule` provides environment variables targeting different use cases.
@@ -185,6 +269,15 @@ data sparsity based performance optimizations.
 	unset ORTMODULE_CACHE_DIR # Disable
 	```
 
+#### ORTMODULE_USE_EFFICIENT_ATTENTION
+
+- **Feature Area**: *ORTMODULE/Optimizations*
+- **Description**: By default, this is disabled. This env var can be used for enabling attention fusion and falling back to PyTorch's efficient_attention ATen kernel for execution. NOTE that it requires torch's version is 2.1.1 or above. There are some build-in patterns for attention fusion, if none of the patterns works for your model, you can add a custom one in your user script manually.
+
+    ```bash
+    export ORTMODULE_USE_EFFICIENT_ATTENTION=1
+    ```
+
 ### 2.2 Memory Optimization
 
 Q: *Want to run a bigger batch size?*
@@ -313,6 +406,15 @@ Check [FP16_Optimizer implementation](../orttraining/orttraining/python/training
     export ORTMODULE_TUNING_RESULTS_PATH=/tmp/tuning_results
     ```
 
+#### ORTMODULE_USE_FLASH_ATTENTION
+
+- **Feature Area**: *ORTMODULE/TritonOp*
+- **Description**: By default, this is disabled. This env var can be used for enabling attention fusion and using Flash Attention's Triton version as the kernel. NOTE that it requires ORTMODULE_USE_TRITON to be enabled, and CUDA device capability is 8.0 or above. There are some build-in patterns for attention fusion, if none of the patterns works for your model, you can add a custom one in your user script manually.
+
+    ```bash
+    export ORTMODULE_USE_FLASH_ATTENTION=1
+    ```
+
 #### ORTMODULE_TRITON_DEBUG
 
 - **Feature Area**: *ORTMODULE/TritonOp*
diff --git a/docs/OperatorKernels.md b/docs/OperatorKernels.md
index 33c187a28b62e..edf249a816923 100644
--- a/docs/OperatorKernels.md
+++ b/docs/OperatorKernels.md
@@ -25,6 +25,7 @@ Do not modify directly.*
 |||13|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)|
 |||[7, 12]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)|
 |Affine|*in* X:**T**<br> *out* Y:**T**|1+|**T** = tensor(float)|
+|AffineGrid|*in* theta:**T1**<br> *in* size:**T2**<br> *out* grid:**T1**|20+|**T1** = tensor(double), tensor(float)<br/> **T2** = tensor(int64)|
 |And|*in* A:**T**<br> *in* B:**T**<br> *out* C:**T1**|7+|**T** = tensor(bool)<br/> **T1** = tensor(bool)|
 |ArgMax|*in* data:**T**<br> *out* reduced:**tensor(int64)**|13+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int8), tensor(uint8)|
 |||[11, 12]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int8), tensor(uint8)|
@@ -67,7 +68,8 @@ Do not modify directly.*
 |||[11, 12]|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
 |||[4, 10]|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
 |ConcatFromSequence|*in* input_sequence:**S**<br> *out* concat_result:**T**|11+|**S** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(string)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8))|
-|ConstantOfShape|*in* input:**T1**<br> *out* output:**T2**|9+|**T1** = tensor(int64)<br/> **T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
+|ConstantOfShape|*in* input:**T1**<br> *out* output:**T2**|20+|**T1** = tensor(int64)<br/> **T2** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
+|||[9, 19]|**T1** = tensor(int64)<br/> **T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
 |Conv|*in* X:**T**<br> *in* W:**T**<br> *in* B:**T**<br> *out* Y:**T**|11+|**T** = tensor(float)|
 |||[1, 10]|**T** = tensor(float)|
 |ConvInteger|*in* x:**T1**<br> *in* w:**T2**<br> *in* x_zero_point:**T1**<br> *in* w_zero_point:**T2**<br> *out* y:**T3**|10+|**T1** = tensor(uint8)<br/> **T2** = tensor(uint8)<br/> **T3** = tensor(int32)|
@@ -78,7 +80,7 @@ Do not modify directly.*
 |Crop|*in* input:**T**<br> *out* output:**T**|1+|**T** = tensor(float)|
 |CumSum|*in* x:**T**<br> *in* axis:**T2**<br> *out* y:**T**|14+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)<br/> **T2** = tensor(int32), tensor(int64)|
 |||[11, 13]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)<br/> **T2** = tensor(int32), tensor(int64)|
-|DFT|*in* input:**T1**<br> *in* dft_length:**T2**<br> *out* output:**T1**|17+|**T1** = tensor(double), tensor(float)<br/> **T2** = tensor(int32), tensor(int64)|
+|DFT|*in* input:**T1**<br> *in* dft_length:**T2**<br> *in* axis:**tensor(int64)**<br> *out* output:**T1**<br><br>or<br><br>*in* input:**T1**<br> *in* dft_length:**T2**<br> *out* output:**T1**|17+|**T1** = tensor(double), tensor(float)<br/> **T2** = tensor(int32), tensor(int64)|
 |DepthToSpace|*in* input:**T**<br> *out* output:**T**|13+|**T** = tensor(double), tensor(float)|
 |||[11, 12]|**T** = tensor(double), tensor(float)|
 |||[1, 10]|**T** = tensor(double), tensor(float)|
@@ -136,7 +138,8 @@ Do not modify directly.*
 |||[7, 8]|**T** = tensor(double), tensor(float)<br/> **T1** = tensor(bool)|
 |GreaterOrEqual|*in* A:**T**<br> *in* B:**T**<br> *out* C:**T1**|16+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)<br/> **T1** = tensor(bool)|
 |||[12, 15]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)<br/> **T1** = tensor(bool)|
-|GridSample|*in* X:**T1**<br> *in* grid:**T2**<br> *out* Y:**T1**|16+|**T1** = tensor(float)<br/> **T2** = tensor(float)|
+|GridSample|*in* X:**T1**<br> *in* grid:**T2**<br> *out* Y:**T1**|20+|**T1** = tensor(double), tensor(float)<br/> **T2** = tensor(double), tensor(float)|
+|||[16, 19]|**T1** = tensor(float)<br/> **T2** = tensor(float)|
 |HammingWindow|*in* size:**T1**<br> *out* output:**T2**|17+|**T1** = tensor(int32), tensor(int64)<br/> **T2** = tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
 |HannWindow|*in* size:**T1**<br> *out* output:**T2**|17+|**T1** = tensor(int32), tensor(int64)<br/> **T2** = tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
 |HardSigmoid|*in* X:**T**<br> *out* Y:**T**|6+|**T** = tensor(float)|
@@ -155,8 +158,10 @@ Do not modify directly.*
 |||[1, 10]|**B** = tensor(bool)<br/> **V** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
 |ImageScaler|*in* input:**T**<br> *out* output:**T**|1+|**T** = tensor(float)|
 |InstanceNormalization|*in* input:**T**<br> *in* scale:**T**<br> *in* B:**T**<br> *out* output:**T**|6+|**T** = tensor(float)|
-|IsInf|*in* X:**T1**<br> *out* Y:**T2**|10+|**T1** = tensor(double), tensor(float)<br/> **T2** = tensor(bool)|
-|IsNaN|*in* X:**T1**<br> *out* Y:**T2**|13+|**T1** = tensor(double), tensor(float), tensor(float16)<br/> **T2** = tensor(bool)|
+|IsInf|*in* X:**T1**<br> *out* Y:**T2**|20+|**T1** = tensor(double), tensor(float), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz)<br/> **T2** = tensor(bool)|
+|||[10, 19]|**T1** = tensor(double), tensor(float)<br/> **T2** = tensor(bool)|
+|IsNaN|*in* X:**T1**<br> *out* Y:**T2**|20+|**T1** = tensor(double), tensor(float), tensor(float16), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz)<br/> **T2** = tensor(bool)|
+|||[13, 19]|**T1** = tensor(double), tensor(float), tensor(float16)<br/> **T2** = tensor(bool)|
 |||[9, 12]|**T1** = tensor(double), tensor(float), tensor(float16)<br/> **T2** = tensor(bool)|
 |LRN|*in* X:**T**<br> *out* Y:**T**|13+|**T** = tensor(float)|
 |||[1, 12]|**T** = tensor(float)|
@@ -368,7 +373,7 @@ Do not modify directly.*
 |||[13, 17]|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
 |||[11, 12]|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
 |||[2, 10]|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
-|SplitToSequence|*in* input:**T**<br> *in* split:**I**<br> *out* output_sequence:**S**|11+|**I** = tensor(int32), tensor(int64)<br/> **S** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(string)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8))<br/> **T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(string)|
+|SplitToSequence|*in* input:**T**<br> *in* split:**I**<br> *out* output_sequence:**S**|11+|**I** = tensor(int32), tensor(int64)<br/> **S** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(string)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8))<br/> **T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(string)|
 |Sqrt|*in* X:**T**<br> *out* Y:**T**|13+|**T** = tensor(double), tensor(float)|
 |||[6, 12]|**T** = tensor(double), tensor(float)|
 |Squeeze|*in* data:**T**<br> *in* axes:**tensor(int64)**<br> *out* squeezed:**T**<br><br>or<br><br>*in* data:**T**<br> *out* squeezed:**T**|13+|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
@@ -453,9 +458,11 @@ Do not modify directly.*
 |GreedySearch|*in* input_ids:**I**<br> *in* max_length:**I**<br> *in* min_length:**I**<br> *in* repetition_penalty:**T**<br> *in* vocab_mask:**I**<br> *in* prefix_vocab_mask:**I**<br> *in* attention_mask:**I**<br> *out* sequences:**I**|1+|**T** = tensor(float)|
 |GridSample|*in* X:**T1**<br> *in* Grid:**T1**<br> *out* Y:**T2**|1+|**T1** = tensor(float)<br/> **T2** = tensor(float)|
 |Inverse|*in* X:**T**<br> *out* Y:**T**|1+|**T** = tensor(double), tensor(float), tensor(float16)|
+|MatMulBnb4|*in* A:**T1**<br> *in* B:**T2**<br> *in* absmax:**T1**<br> *out* Y:**T1**|1+|**T1** = tensor(float)<br/> **T2** = tensor(uint8)|
 |MatMulFpQ4|*in* A:**T1**<br> *in* B:**T2**<br> *in* B_shape:**T3**<br> *out* Y:**T1**|1+|**T1** = tensor(float)<br/> **T2** = tensor(uint8)<br/> **T3** = tensor(int64)|
 |MatMulInteger16|*in* A:**T1**<br> *in* B:**T2**<br> *out* Y:**T3**|1+|**T1** = tensor(int16)<br/> **T2** = tensor(int16)<br/> **T3** = tensor(int32)|
 |MatMulIntegerToFloat|*in* A:**T1**<br> *in* B:**T2**<br> *in* a_scale:**T3**<br> *in* b_scale:**T3**<br> *in* a_zero_point:**T1**<br> *in* b_zero_point:**T2**<br> *in* bias:**T3**<br> *out* Y:**T3**|1+|**T1** = tensor(int8), tensor(uint8)<br/> **T2** = tensor(int8), tensor(uint8)<br/> **T3** = tensor(float)|
+|MatMulNBits|*in* A:**T1**<br> *in* B:**T2**<br> *in* scales:**T1**<br> *in* zero_points:**T2**<br> *out* Y:**T1**|1+|**T1** = tensor(float)<br/> **T2** = tensor(uint8)|
 |MaxpoolWithMask|*in* X:**T**<br> *in* M:**tensor(int32)**<br> *out* Y:**T**|1+|**T** = tensor(float)|
 |MultiHeadAttention|*in* query:**T**<br> *in* key:**T**<br> *in* value:**T**<br> *in* bias:**T**<br> *in* key_padding_mask:**M**<br> *in* relative_position_bias:**T**<br> *in* past_key:**T**<br> *in* past_value:**T**<br> *out* output:**T**<br> *out* present_key:**T**<br> *out* present_value:**T**|1+|**T** = tensor(float)|
 |MurmurHash3|*in* X:**T1**<br> *out* Y:**T2**|1+|**T1** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(string), tensor(uint32), tensor(uint64)<br/> **T2** = tensor(int32), tensor(uint32)|
@@ -475,14 +482,17 @@ Do not modify directly.*
 |QuantizeLinear|*in* x:**T1**<br> *in* y_scale:**T1**<br> *in* y_zero_point:**T2**<br> *out* y:**T2**|1+|**T1** = tensor(float)<br/> **T2** = tensor(int16), tensor(int8), tensor(uint16), tensor(uint8)|
 |QuickGelu|*in* X:**T**<br> *out* Y:**T**|1+|**T** = tensor(float)|
 |Range|*in* start:**T**<br> *in* limit:**T**<br> *in* delta:**T**<br> *out* Y:**T**|1+|**T** = tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64)|
+|RotaryEmbedding|*in* input:**T**<br> *in* position_ids:**M**<br> *in* cos_cache:**T**<br> *in* sin_cache:**T**<br> *out* output:**T**|1+|**M** = tensor(int64)<br/> **T** = tensor(float)|
 |SampleOp|*in* X:**T**<br> *out* Y:**T**|1+|**T** = tensor(float)|
 |Sampling|*in* input_ids:**I**<br> *in* max_length:**I**<br> *in* min_length:**I**<br> *in* repetition_penalty:**T**<br> *in* vocab_mask:**I**<br> *in* prefix_vocab_mask:**I**<br> *in* attention_mask:**I**<br> *in* presence_mask:**I**<br> *in* seed:**I**<br> *out* sequences:**I**<br> *out* filtered_logits:**T**|1+|**T** = tensor(float)|
 |SkipLayerNormalization|*in* input:**T**<br> *in* skip:**T**<br> *in* gamma:**T**<br> *in* beta:**T**<br> *in* bias:**T**<br> *out* output:**T**<br> *out* mean:**U**<br> *out* inv_std_var:**U**<br> *out* input_skip_bias_sum:**T**|1+|**T** = tensor(double), tensor(float)|
+|SkipSimplifiedLayerNormalization|*in* input:**T**<br> *in* skip:**T**<br> *in* gamma:**T**<br> *in* bias:**T**<br> *out* output:**T**<br> *out* mean:**U**<br> *out* inv_std_var:**U**<br> *out* input_skip_bias_sum:**T**|1+|**T** = tensor(double), tensor(float)|
 |SparseToDenseMatMul|*in* A:**T**<br> *in* B:**T1**<br> *out* Y:**T1**|1+|**T** = sparse_tensor(double), sparse_tensor(float), sparse_tensor(int32), sparse_tensor(int64), sparse_tensor(uint32), sparse_tensor(uint64)<br/> **T1** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)|
 |Tokenizer|*in* X:**T**<br> *out* Y:**T**|1+|**T** = tensor(string)|
 |TransposeMatMul|*in* A:**T**<br> *in* B:**T**<br> *out* Y:**T**|1+|**T** = tensor(float)|
 |Trilu|*in* X:**T**<br> *in* k:**tensor(int64)**<br> *out* Y:**T**|1+|**T** = tensor(double), tensor(float), tensor(int64)|
 |Unique|*in* x:**T**<br> *out* y:**T**<br> *out* idx:**tensor(int64)**<br> *out* counts:**tensor(int64)**|1+|**T** = tensor(float)|
+|WhisperBeamSearch|*in* input_ids:**F**<br> *in* max_length:**I**<br> *in* min_length:**I**<br> *in* num_beams:**I**<br> *in* num_return_sequences:**I**<br> *in* length_penalty:**T**<br> *in* repetition_penalty:**T**<br> *in* vocab_mask:**M**<br> *in* prefix_vocab_mask:**M**<br> *in* attention_mask:**I**<br> *in* decoder_input_ids:**I**<br> *in* logits_processor:**I**<br> *in* cross_qk_layer_head:**I**<br> *in* extra_decoding_ids:**I**<br> *out* sequences:**I**<br> *out* sequences_scores:**T**<br> *out* scores:**T**<br> *out* cross_qk:**V**<br> *out* non_speech_probs:**T**|1+|**T** = tensor(float)|
 |WordConvEmbedding|*in* Sequence:**T**<br> *in* W:**T1**<br> *in* B:**T1**<br> *in* C:**T1**<br> *out* Y:**T1**|1+|**T** = tensor(int32)<br/> **T1** = tensor(float)|
 | |
 | |
@@ -513,10 +523,8 @@ Do not modify directly.*
 |||[7, 12]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)|
 |Affine|*in* X:**T**<br> *out* Y:**T**|1+|**T** = tensor(double), tensor(float), tensor(float16)|
 |And|*in* A:**T**<br> *in* B:**T**<br> *out* C:**T1**|7+|**T** = tensor(bool)<br/> **T1** = tensor(bool)|
-|ArgMax|*in* data:**T**<br> *out* reduced:**tensor(int64)**|11|**T** = tensor(double), tensor(float), tensor(float16)|
-|||[1, 10]|**T** = tensor(double), tensor(float), tensor(float16)|
-|ArgMin|*in* data:**T**<br> *out* reduced:**tensor(int64)**|11|**T** = tensor(double), tensor(float), tensor(float16)|
-|||[1, 10]|**T** = tensor(double), tensor(float), tensor(float16)|
+|ArgMax|*in* data:**T**<br> *out* reduced:**tensor(int64)**|[1, 11]|**T** = tensor(double), tensor(float), tensor(float16)|
+|ArgMin|*in* data:**T**<br> *out* reduced:**tensor(int64)**|[1, 11]|**T** = tensor(double), tensor(float), tensor(float16)|
 |AveragePool|*in* X:**T**<br> *out* Y:**T**|11+|**T** = tensor(double), tensor(float), tensor(float16)|
 |||10|**T** = tensor(double), tensor(float), tensor(float16)|
 |||[7, 9]|**T** = tensor(double), tensor(float), tensor(float16)|
@@ -657,7 +665,7 @@ Do not modify directly.*
 |Mul|*in* A:**T**<br> *in* B:**T**<br> *out* C:**T**|14+|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)|
 |||13|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)|
 |||[7, 12]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)|
-|Neg|*in* X:**T**<br> *out* Y:**T**|13+|**T** = tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8)|
+|Neg|*in* X:**T**<br> *out* Y:**T**|13+|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8)|
 |||[6, 12]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8)|
 |NonZero|*in* X:**T**<br> *out* Y:**tensor(int64)**|13+|**T** = tensor(bool), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint8)|
 |||[9, 12]|**T** = tensor(bool), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint8)|
@@ -687,39 +695,26 @@ Do not modify directly.*
 |Range|*in* start:**T**<br> *in* limit:**T**<br> *in* delta:**T**<br> *out* output:**T**|11+|**T** = tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64)|
 |Reciprocal|*in* X:**T**<br> *out* Y:**T**|13+|**T** = tensor(double), tensor(float), tensor(float16)|
 |||[6, 12]|**T** = tensor(double), tensor(float), tensor(float16)|
-|ReduceL1|*in* data:**T**<br> *in* axes:**tensor(int64)**<br> *out* reduced:**T**<br><br>or<br><br>*in* data:**T**<br> *out* reduced:**T**|13+|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32)|
-|||[11, 12]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32)|
-|||[1, 10]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32)|
-|ReduceL2|*in* data:**T**<br> *in* axes:**tensor(int64)**<br> *out* reduced:**T**<br><br>or<br><br>*in* data:**T**<br> *out* reduced:**T**|13+|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32)|
-|||[11, 12]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32)|
-|||[1, 10]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32)|
-|ReduceLogSum|*in* data:**T**<br> *in* axes:**tensor(int64)**<br> *out* reduced:**T**<br><br>or<br><br>*in* data:**T**<br> *out* reduced:**T**|13+|**T** = tensor(double), tensor(float), tensor(float16)|
-|||[11, 12]|**T** = tensor(double), tensor(float), tensor(float16)|
-|||[1, 10]|**T** = tensor(double), tensor(float), tensor(float16)|
-|ReduceLogSumExp|*in* data:**T**<br> *in* axes:**tensor(int64)**<br> *out* reduced:**T**<br><br>or<br><br>*in* data:**T**<br> *out* reduced:**T**|13+|**T** = tensor(double), tensor(float), tensor(float16)|
-|||[11, 12]|**T** = tensor(double), tensor(float), tensor(float16)|
-|||[1, 10]|**T** = tensor(double), tensor(float), tensor(float16)|
-|ReduceMax|*in* data:**T**<br> *in* axes:**tensor(int64)**<br> *out* reduced:**T**<br><br>or<br><br>*in* data:**T**<br> *out* reduced:**T**|13+|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(int8), tensor(uint8)|
-|||12|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(int8), tensor(uint8)|
-|||11|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64)|
-|||[1, 10]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64)|
-|ReduceMean|*in* data:**T**<br> *in* axes:**tensor(int64)**<br> *out* reduced:**T**<br><br>or<br><br>*in* data:**T**<br> *out* reduced:**T**|13+|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32)|
-|||[11, 12]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32)|
-|||[1, 10]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32)|
-|ReduceMin|*in* data:**T**<br> *in* axes:**tensor(int64)**<br> *out* reduced:**T**<br><br>or<br><br>*in* data:**T**<br> *out* reduced:**T**|14+|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(int8), tensor(uint8)|
-|||13|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(int8), tensor(uint8)|
-|||12|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(int8), tensor(uint8)|
-|||11|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32)|
-|||[1, 10]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32)|
-|ReduceProd|*in* data:**T**<br> *in* axes:**tensor(int64)**<br> *out* reduced:**T**<br><br>or<br><br>*in* data:**T**<br> *out* reduced:**T**|13+|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32)|
-|||[11, 12]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32)|
-|||[1, 10]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32)|
+|ReduceL1|*in* data:**T**<br> *in* axes:**tensor(int64)**<br> *out* reduced:**T**<br><br>or<br><br>*in* data:**T**<br> *out* reduced:**T**|18+|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32)|
+|||[1, 17]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32)|
+|ReduceL2|*in* data:**T**<br> *in* axes:**tensor(int64)**<br> *out* reduced:**T**<br><br>or<br><br>*in* data:**T**<br> *out* reduced:**T**|18+|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32)|
+|||[1, 17]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32)|
+|ReduceLogSum|*in* data:**T**<br> *in* axes:**tensor(int64)**<br> *out* reduced:**T**<br><br>or<br><br>*in* data:**T**<br> *out* reduced:**T**|18+|**T** = tensor(double), tensor(float), tensor(float16)|
+|||[1, 17]|**T** = tensor(double), tensor(float), tensor(float16)|
+|ReduceLogSumExp|*in* data:**T**<br> *in* axes:**tensor(int64)**<br> *out* reduced:**T**<br><br>or<br><br>*in* data:**T**<br> *out* reduced:**T**|18+|**T** = tensor(double), tensor(float), tensor(float16)|
+|||[1, 17]|**T** = tensor(double), tensor(float), tensor(float16)|
+|ReduceMax|*in* data:**T**<br> *in* axes:**tensor(int64)**<br> *out* reduced:**T**<br><br>or<br><br>*in* data:**T**<br> *out* reduced:**T**|18+|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64)|
+|||[1, 17]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64)|
+|ReduceMean|*in* data:**T**<br> *in* axes:**tensor(int64)**<br> *out* reduced:**T**<br><br>or<br><br>*in* data:**T**<br> *out* reduced:**T**|18+|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32)|
+|||[1, 17]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32)|
+|ReduceMin|*in* data:**T**<br> *in* axes:**tensor(int64)**<br> *out* reduced:**T**<br><br>or<br><br>*in* data:**T**<br> *out* reduced:**T**|18+|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(int8), tensor(uint8)|
+|||[1, 17]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(int8), tensor(uint8)|
+|ReduceProd|*in* data:**T**<br> *in* axes:**tensor(int64)**<br> *out* reduced:**T**<br><br>or<br><br>*in* data:**T**<br> *out* reduced:**T**|18+|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32)|
+|||[1, 17]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32)|
 |ReduceSum|*in* data:**T**<br> *in* axes:**tensor(int64)**<br> *out* reduced:**T**<br><br>or<br><br>*in* data:**T**<br> *out* reduced:**T**|13+|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64)|
-|||[11, 12]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64)|
-|||[1, 10]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64)|
-|ReduceSumSquare|*in* data:**T**<br> *in* axes:**tensor(int64)**<br> *out* reduced:**T**<br><br>or<br><br>*in* data:**T**<br> *out* reduced:**T**|13+|**T** = tensor(double), tensor(float), tensor(float16)|
-|||[11, 12]|**T** = tensor(double), tensor(float), tensor(float16)|
-|||[1, 10]|**T** = tensor(double), tensor(float), tensor(float16)|
+|||[1, 12]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64)|
+|ReduceSumSquare|*in* data:**T**<br> *in* axes:**tensor(int64)**<br> *out* reduced:**T**<br><br>or<br><br>*in* data:**T**<br> *out* reduced:**T**|18+|**T** = tensor(double), tensor(float), tensor(float16)|
+|||[1, 17]|**T** = tensor(double), tensor(float), tensor(float16)|
 |Relu|*in* X:**T**<br> *out* Y:**T**|14+|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16)|
 |||13|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16)|
 |||[6, 12]|**T** = tensor(double), tensor(float), tensor(float16)|
@@ -807,7 +802,7 @@ Do not modify directly.*
 |||[1, 10]|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
 |Upsample|*in* X:**T**<br> *in* scales:**tensor(float)**<br> *out* Y:**T**<br><br>or<br><br>*in* X:**T**<br> *out* Y:**T**|9|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(uint8)|
 |||[7, 8]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(uint8)|
-|Where|*in* condition:**B**<br> *in* X:**T**<br> *in* Y:**T**<br> *out* output:**T**|16+|**B** = tensor(bool)<br/> **T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint8)|
+|Where|*in* condition:**B**<br> *in* X:**T**<br> *in* Y:**T**<br> *out* output:**T**|16+|**B** = tensor(bool)<br/> **T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint8)|
 |||[9, 15]|**B** = tensor(bool)<br/> **T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint8)|
 |Xor|*in* A:**T**<br> *in* B:**T**<br> *out* C:**T1**|7+|**T** = tensor(bool)<br/> **T1** = tensor(bool)|
 | |
@@ -826,22 +821,28 @@ Do not modify directly.*
 |ComplexMulConj|*in* A:**T**<br> *in* B:**T**<br> *out* C:**T**|1+|**T** = tensor(float), tensor(float16)|
 |ConvTransposeWithDynamicPads|*in* X:**T**<br> *in* W:**T**<br> *in* Pads:**tensor(int64)**<br> *in* B:**T**<br> *out* Y:**T**|1+|**T** = tensor(float)|
 |DecoderAttention|*in* query:**T**<br> *in* key:**T**<br> *in* q_weight:**T**<br> *in* kv_weight:**T**<br> *in* bias:**T**<br> *in* key_padding_mask:**B**<br> *in* key_cache:**T**<br> *in* value_cache:**T**<br> *in* static_kv:**B**<br> *in* use_past:**B**<br> *in* has_layer_state:**B**<br> *in* has_key_padding_mask:**B**<br> *out* output:**T**<br> *out* new_key_cache:**T**<br> *out* new_value_cache:**T**|1+|**T** = tensor(float), tensor(float16)|
-|DecoderMaskedMultiHeadAttention|*in* query:**T**<br> *in* key:**T**<br> *in* value:**T**<br> *in* mask_index:**M**<br> *in* relative_position_bias:**T**<br> *in* past_key:**T**<br> *in* past_value:**T**<br> *in* past_sequence_length:**M**<br> *in* beam_width:**M**<br> *in* cache_indirection:**M**<br> *in* bias:**T**<br> *out* output:**T**<br> *out* present_key:**T**<br> *out* present_value:**T**|1+|**T** = tensor(float), tensor(float16)|
+|DecoderMaskedMultiHeadAttention|*in* query:**T**<br> *in* key:**T**<br> *in* value:**T**<br> *in* mask_index:**M**<br> *in* relative_position_bias:**T**<br> *in* past_key:**T**<br> *in* past_value:**T**<br> *in* past_sequence_length:**M**<br> *in* beam_width:**M**<br> *in* cache_indirection:**M**<br> *in* bias:**T**<br> *out* output:**T**<br> *out* present_key:**T**<br> *out* present_value:**T**<br> *out* qk:**V**|1+|**T** = tensor(float), tensor(float16)|
 |DecoderMaskedSelfAttention|*in* input:**T**<br> *in* weights:**T**<br> *in* bias:**T**<br> *in* mask_index:**M**<br> *in* past:**T**<br> *in* relative_position_bias:**T**<br> *in* past_sequence_length:**M**<br> *in* beam_width:**M**<br> *in* cache_indirection:**M**<br> *out* output:**T**<br> *out* present:**T**|1+|**T** = tensor(float), tensor(float16)|
 |DequantizeLinear|*in* x:**T1**<br> *in* x_scale:**T2**<br> *in* x_zero_point:**T1**<br> *out* y:**T2**|1+|**T1** = tensor(int8), tensor(uint8)<br/> **T2** = tensor(float16)|
 |DequantizeWithOrder|*in* input:**Q**<br> *in* scale_input:**S**<br> *out* output:**F**|1+|**F** = tensor(float), tensor(float16)<br/> **Q** = tensor(int8)<br/> **S** = tensor(float)|
+|DynamicTimeWarping|*in* input:**F**<br> *out* output:**I**|1+|**F** = tensor(float)<br/> **I** = tensor(int32)|
 |EmbedLayerNormalization|*in* input_ids:**T1**<br> *in* segment_ids:**T1**<br> *in* word_embedding:**T**<br> *in* position_embedding:**T**<br> *in* segment_embedding:**T**<br> *in* gamma:**T**<br> *in* beta:**T**<br> *in* mask:**T1**<br> *in* position_ids:**T1**<br> *out* output:**T**<br> *out* mask_index:**T1**<br> *out* embedding_sum:**T**|1+|**T** = tensor(float), tensor(float16)|
 |FastGelu|*in* X:**T**<br> *in* bias:**T**<br> *out* Y:**T**|1+|**T** = tensor(bfloat16), tensor(float), tensor(float16)|
 |FusedConv|*in* X:**T**<br> *in* W:**T**<br> *in* B:**T**<br> *in* Z:**T**<br> *out* Y:**T**|1+|**T** = tensor(float)|
 |FusedMatMul|*in* A:**T**<br> *in* B:**T**<br> *out* Y:**T**|1+|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16)|
 |GatedRelativePositionBias|*in* query_layer:**T**<br> *in* query_bias:**T**<br> *in* rel_pos:**T**<br> *in* weight:**T**<br> *in* bias:**T**<br> *in* eco_a:**T**<br> *in* token_offset:**M**<br> *out* output:**T**|1+|**T** = tensor(float), tensor(float16)|
 |Gelu|*in* X:**T**<br> *out* Y:**T**|1+|**T** = tensor(double), tensor(float), tensor(float16)|
+|GemmFloat8|*in* A:**TA**<br> *in* B:**TB**<br> *in* C:**TC**<br> *in* scaleA:**TS**<br> *in* scaleB:**TS**<br> *in* scaleY:**TS**<br> *out* Y:**TR**|1+|**TA** = tensor(bfloat16), tensor(float), tensor(float16), tensor(float8e4m3fn), tensor(float8e5m2)<br/> **TB** = tensor(bfloat16), tensor(float), tensor(float16), tensor(float8e4m3fn), tensor(float8e5m2)<br/> **TR** = tensor(bfloat16), tensor(float), tensor(float16), tensor(float8e4m3fn), tensor(float8e5m2)<br/> **TS** = tensor(float)|
 |GreedySearch|*in* input_ids:**I**<br> *in* max_length:**I**<br> *in* min_length:**I**<br> *in* repetition_penalty:**T**<br> *in* vocab_mask:**I**<br> *in* prefix_vocab_mask:**I**<br> *in* attention_mask:**I**<br> *out* sequences:**I**|1+|**T** = tensor(float), tensor(float16)|
 |GridSample|*in* X:**T1**<br> *in* Grid:**T1**<br> *out* Y:**T2**|1+|**T1** = tensor(float)<br/> **T2** = tensor(float)|
 |GroupNorm|*in* X:**T**<br> *in* gamma:**M**<br> *in* beta:**M**<br> *out* Y:**T**|1+|**T** = tensor(float), tensor(float16)|
+|GroupQueryAttention|*in* query:**T**<br> *in* key:**T**<br> *in* value:**T**<br> *in* past_key:**T**<br> *in* past_value:**T**<br> *in* seqlens_k:**M**<br> *in* total_sequence_length:**M**<br> *out* output:**T**<br> *out* present_key:**T**<br> *out* present_value:**T**|1+|**M** = tensor(int32)<br/> **T** = tensor(float16)|
 |Inverse|*in* X:**T**<br> *out* Y:**T**|1+|**T** = tensor(double), tensor(float), tensor(float16)|
 |Irfft|*in* X:**T**<br> *out* Y:**T**|1+|**T** = tensor(double), tensor(float), tensor(float16)|
 |LongformerAttention|*in* input:**T**<br> *in* weight:**T**<br> *in* bias:**T**<br> *in* mask:**T**<br> *in* global_weight:**T**<br> *in* global_bias:**T**<br> *in* global:**G**<br> *out* output:**T**|1+|**T** = tensor(float), tensor(float16)|
+|MatMulBnb4|*in* A:**T1**<br> *in* B:**T2**<br> *in* absmax:**T1**<br> *out* Y:**T1**|1+|**T1** = tensor(bfloat16), tensor(float), tensor(float16)<br/> **T2** = tensor(uint8)|
+|MatMulNBits|*in* A:**T1**<br> *in* B:**T2**<br> *in* scales:**T1**<br> *in* zero_points:**T2**<br> *out* Y:**T1**|1+|**T1** = tensor(float), tensor(float16)<br/> **T2** = tensor(uint8)|
+|MoE|*in* input:**T**<br> *in* router_probs:**T**<br> *in* fc1_experts_weights:**T**<br> *in* fc2_experts_weights:**T**<br> *in* fc1_experts_bias:**T**<br> *in* fc2_experts_bias:**T**<br> *out* output:**T**|1+|**T** = tensor(float), tensor(float16)|
 |MultiHeadAttention|*in* query:**T**<br> *in* key:**T**<br> *in* value:**T**<br> *in* bias:**T**<br> *in* key_padding_mask:**M**<br> *in* relative_position_bias:**T**<br> *in* past_key:**T**<br> *in* past_value:**T**<br> *out* output:**T**<br> *out* present_key:**T**<br> *out* present_value:**T**|1+|**T** = tensor(float), tensor(float16)|
 |NGramRepeatBlock|*in* input_ids:**Tid**<br> *in* scores:**T**<br> *out* scores_out:**T**|1+|**T** = tensor(float)<br/> **Tid** = tensor(int64)|
 |NhwcConv|*in* X:**T**<br> *in* W:**T**<br> *in* B:**T**<br> *out* Y:**T**|1+|**T** = tensor(float), tensor(float16)|
@@ -860,11 +861,15 @@ Do not modify directly.*
 |RemovePadding|*in* input:**T**<br> *in* sequence_token_count:**M**<br> *out* output:**T**<br> *out* token_offset:**M**<br> *out* cumulated_seq_len:**M**<br> *out* max_seq_len:**M**|1+|**T** = tensor(float), tensor(float16)|
 |RestorePadding|*in* input:**T**<br> *in* token_offset:**M**<br> *out* output:**T**|1+|**T** = tensor(float), tensor(float16)|
 |Rfft|*in* X:**T**<br> *out* Y:**T**|1+|**T** = tensor(double), tensor(float), tensor(float16)|
+|RotaryEmbedding|*in* input:**T**<br> *in* position_ids:**M**<br> *in* cos_cache:**T**<br> *in* sin_cache:**T**<br> *out* output:**T**|1+|**M** = tensor(int64)<br/> **T** = tensor(float), tensor(float16)|
 |Sampling|*in* input_ids:**I**<br> *in* max_length:**I**<br> *in* min_length:**I**<br> *in* repetition_penalty:**T**<br> *in* vocab_mask:**I**<br> *in* prefix_vocab_mask:**I**<br> *in* attention_mask:**I**<br> *in* presence_mask:**I**<br> *in* seed:**I**<br> *out* sequences:**I**<br> *out* filtered_logits:**T**|1+|**T** = tensor(float), tensor(float16)|
+|SkipGroupNorm|*in* X:**T**<br> *in* gamma:**M**<br> *in* beta:**M**<br> *in* skip:**T**<br> *in* bias:**T**<br> *out* Y:**T**<br> *out* S:**T**|1+|**T** = tensor(float), tensor(float16)|
 |SkipLayerNormalization|*in* input:**T**<br> *in* skip:**T**<br> *in* gamma:**T**<br> *in* beta:**T**<br> *in* bias:**T**<br> *out* output:**T**<br> *out* mean:**U**<br> *out* inv_std_var:**U**<br> *out* input_skip_bias_sum:**T**|1+|**T** = tensor(float), tensor(float16)|
 |SkipSimplifiedLayerNormalization|*in* input:**T**<br> *in* skip:**T**<br> *in* gamma:**T**<br> *in* bias:**T**<br> *out* output:**T**<br> *out* mean:**U**<br> *out* inv_std_var:**U**<br> *out* input_skip_bias_sum:**T**|1+|**T** = tensor(float), tensor(float16)|
 |TransposeMatMul|*in* A:**T**<br> *in* B:**T**<br> *out* Y:**T**|1+|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16)|
 |Trilu|*in* X:**T**<br> *in* k:**tensor(int64)**<br> *out* Y:**T**|1+|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
+|UnfoldTensor|*in* input:**T**<br> *out* output:**T**|1+|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
+|WhisperBeamSearch|*in* input_ids:**F**<br> *in* max_length:**I**<br> *in* min_length:**I**<br> *in* num_beams:**I**<br> *in* num_return_sequences:**I**<br> *in* length_penalty:**T**<br> *in* repetition_penalty:**T**<br> *in* vocab_mask:**M**<br> *in* prefix_vocab_mask:**M**<br> *in* attention_mask:**I**<br> *in* decoder_input_ids:**I**<br> *in* logits_processor:**I**<br> *in* cross_qk_layer_head:**I**<br> *in* extra_decoding_ids:**I**<br> *out* sequences:**I**<br> *out* sequences_scores:**T**<br> *out* scores:**T**<br> *out* cross_qk:**V**<br> *out* non_speech_probs:**T**|1+|**T** = tensor(float), tensor(float16)|
 | |
 | |
 
@@ -935,7 +940,7 @@ Do not modify directly.*
 |Crop|*in* input:**T**<br> *out* output:**T**|1+|**T** = tensor(float), tensor(float16)|
 |CumSum|*in* x:**T**<br> *in* axis:**T2**<br> *out* y:**T**|14+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)|
 |||11+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)|
-|DFT|*in* input:**T1**<br> *in* dft_length:**T2**<br> *out* output:**T1**|17+|**T1** = tensor(float), tensor(float16)<br/> **T2** = tensor(int64)|
+|DFT|*in* input:**T1**<br> *in* dft_length:**T2**<br> *in* axis:**tensor(int64)**<br> *out* output:**T1**<br><br>or<br><br>*in* input:**T1**<br> *in* dft_length:**T2**<br> *out* output:**T1**|17+|**T1** = tensor(float), tensor(float16)<br/> **T2** = tensor(int64)|
 |DepthToSpace|*in* input:**T**<br> *out* output:**T**|13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
 |||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
 |||1+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
@@ -1244,6 +1249,7 @@ Do not modify directly.*
 |QLinearSigmoid|*in* X:**T**<br> *in* X_scale:**tensor(float)**<br> *in* X_zero_point:**T**<br> *in* Y_scale:**tensor(float)**<br> *in* Y_zero_point:**T**<br> *out* Y:**T**|1+|**T** = tensor(int8), tensor(uint8)|
 |QuantizeLinear|*in* x:**T1**<br> *in* y_scale:**T1**<br> *in* y_zero_point:**T2**<br> *out* y:**T2**|1+|**T1** = tensor(float), tensor(float16), tensor(int32)<br/> **T2** = tensor(int8), tensor(uint8)|
 |QuickGelu|*in* X:**T**<br> *out* Y:**T**|1+|**T** = tensor(float), tensor(float16)|
+|RotaryEmbedding|*in* input:**T**<br> *in* position_ids:**M**<br> *in* cos_cache:**T**<br> *in* sin_cache:**T**<br> *out* output:**T**|1+|**M** = tensor(int64)<br/> **T** = tensor(float), tensor(float16)|
 |SkipLayerNormalization|*in* input:**T**<br> *in* skip:**T**<br> *in* gamma:**T**<br> *in* beta:**T**<br> *in* bias:**T**<br> *out* output:**T**<br> *out* mean:**U**<br> *out* inv_std_var:**U**<br> *out* input_skip_bias_sum:**T**|1+|**T** = tensor(float), tensor(float16)|
 | |
 | |
diff --git a/docs/python/ReadMeOV.rst b/docs/python/ReadMeOV.rst
index f12c01d278dca..6ef16e1378139 100644
--- a/docs/python/ReadMeOV.rst
+++ b/docs/python/ReadMeOV.rst
@@ -7,7 +7,6 @@ OpenVINO™ Execution Provider for ONNX Runtime accelerates inference across man
  - Intel® CPUs
  - Intel® integrated GPUs
  - Intel® discrete GPUs
- - Intel® integrated VPUs
 
 Installation
 ------------
@@ -22,7 +21,6 @@ This package supports:
  - Intel® CPUs
  - Intel® integrated GPUs
  - Intel® discrete GPUs
- - Intel® integrated VPUs
 
 ``pip3 install onnxruntime-openvino``
 
diff --git a/include/onnxruntime/core/framework/float8.h b/include/onnxruntime/core/framework/float8.h
index 0fd04f28d44b7..dd607cbbc6952 100644
--- a/include/onnxruntime/core/framework/float8.h
+++ b/include/onnxruntime/core/framework/float8.h
@@ -208,9 +208,10 @@ struct Float8E4M3FNUZ {
     val = static_cast<uint8_t>((b & 0x80000000) >> 24);  // sign
     if ((b & 0x7fffffff) == 0x7f800000) {                // infinity
       if (saturate) {
+        // the highest available value
         val |= 0x7F;
       } else {
-        // infinity
+        // NaN
         val = 0x80;
       }
     } else if ((b & 0x7F800000) == 0x7F800000) {  // NaN
@@ -362,8 +363,10 @@ struct Float8E5M2 {
     val = (b & 0x80000000) >> 24;          // sign
     if ((b & 0x7FFFFFFF) == 0x7F800000) {  // inf
       if (saturate) {
+        // the highest available value
         val |= 0x7B;
       } else {
+        // the infinity
         val |= 0x7C;
       }
     } else if ((b & 0x7F800000) == 0x7F800000) {  // NaN
diff --git a/include/onnxruntime/core/framework/op_node_proto_helper.h b/include/onnxruntime/core/framework/op_node_proto_helper.h
index 700e1edc0cb7d..e7ac01947af41 100644
--- a/include/onnxruntime/core/framework/op_node_proto_helper.h
+++ b/include/onnxruntime/core/framework/op_node_proto_helper.h
@@ -10,20 +10,6 @@
 #include "core/common/gsl.h"
 #endif
 
-#ifdef __has_attribute
-#define ORT_HAVE_ATTRIBUTE(x) __has_attribute(x)
-#else
-#define ORT_HAVE_ATTRIBUTE(x) 0
-#endif
-
-#if ORT_HAVE_ATTRIBUTE(nodiscard)
-#define MUST_USE_RESULT [[nodiscard]]
-#elif defined(__clang__) && ORT_HAVE_ATTRIBUTE(warn_unused_result)
-#define MUST_USE_RESULT __attribute__((warn_unused_result))
-#else
-#define MUST_USE_RESULT
-#endif
-
 class IMLOpKernel;
 
 namespace onnxruntime {
@@ -43,14 +29,26 @@ class OpNodeProtoHelper {
      Call this function for a required attribute or when a default value for an optional attribute is specified in the op schema
   */
   template <typename T>
-  MUST_USE_RESULT Status GetAttr(const std::string& name, T* value) const;
+  Status GetAttr(const std::string& name, T* value) const;
+
+  /**
+     Get a single attribute
+     Call this function for a required attribute or when a default value for an optional attribute is specified in the op schema
+     Throws if an attribute with the specified type doesn't exist
+  */
+  template <typename T>
+  [[nodiscard]] T GetAttr(const std::string& name) const {
+    T value;
+    ORT_THROW_IF_ERROR(GetAttr(name, &value));
+    return value;
+  }
 
   /**
      Get a single attribute
      Call this function only when a default value for an optional attribute isn't specified in the op schema
   */
   template <typename T>
-  T GetAttrOrDefault(const std::string& name, const T& default_value) const {
+  [[nodiscard]] T GetAttrOrDefault(const std::string& name, const T& default_value) const {
     T tmp;
     return GetAttr<T>(name, &tmp).IsOK() ? tmp : default_value;
   }
@@ -70,7 +68,8 @@ class OpNodeProtoHelper {
      Call this function only when a default value for an optional attribute isn't specified in the op schema
   */
   template <typename T>
-  MUST_USE_RESULT std::vector<T> GetAttrsOrDefault(const std::string& name, const std::vector<T>& default_value = std::vector<T>{}) const {
+  [[nodiscard]] std::vector<T> GetAttrsOrDefault(const std::string& name,
+                                                 const std::vector<T>& default_value = {}) const {
     std::vector<T> tmp;
     return GetAttrs<T>(name, tmp).IsOK() ? tmp : default_value;
   }
@@ -87,11 +86,12 @@ class OpNodeProtoHelper {
   /// <param name="values">Attribute data in a span, out parameter</param>
   /// <returns>Status</returns>
   template <typename T>
-  MUST_USE_RESULT Status GetAttrsAsSpan(const std::string& name, gsl::span<const T>& values) const;
+  Status GetAttrsAsSpan(const std::string& name, gsl::span<const T>& values) const;
 
-  MUST_USE_RESULT Status GetAttrs(const std::string& name, TensorShapeVector& out) const;
+  Status GetAttrs(const std::string& name, TensorShapeVector& out) const;
 
-  MUST_USE_RESULT TensorShapeVector GetAttrsOrDefault(const std::string& name, const TensorShapeVector& default_value = TensorShapeVector{}) const {
+  [[nodiscard]] TensorShapeVector GetAttrsOrDefault(const std::string& name,
+                                                    const TensorShapeVector& default_value = {}) const {
     TensorShapeVector tmp;
     return GetAttrs(name, tmp).IsOK() ? tmp : default_value;
   }
@@ -100,43 +100,43 @@ class OpNodeProtoHelper {
      Get repeated attributes
   */
   template <typename T>
-  MUST_USE_RESULT Status GetAttrs(const std::string& name, std::vector<T>& values) const;
+  Status GetAttrs(const std::string& name, std::vector<T>& values) const;
 
   template <typename T>
-  MUST_USE_RESULT Status GetAttrs(const std::string& name, gsl::span<T> values) const;
+  Status GetAttrs(const std::string& name, gsl::span<T> values) const;
 
-  MUST_USE_RESULT Status GetAttrsStringRefs(const std::string& name,
-                                            std::vector<std::reference_wrapper<const std::string>>& refs) const;
+  Status GetAttrsStringRefs(const std::string& name,
+                            std::vector<std::reference_wrapper<const std::string>>& refs) const;
 
-  uint32_t GetPrimitiveAttrElementCount(ONNX_NAMESPACE::AttributeProto_AttributeType type,
-                                        const std::string& name) const noexcept;
+  [[nodiscard]] uint32_t GetPrimitiveAttrElementCount(ONNX_NAMESPACE::AttributeProto_AttributeType type,
+                                                      const std::string& name) const noexcept;
 
-  bool HasPrimitiveAttribute(ONNX_NAMESPACE::AttributeProto_AttributeType type,
-                             const std::string& name) const noexcept;
+  [[nodiscard]] bool HasPrimitiveAttribute(ONNX_NAMESPACE::AttributeProto_AttributeType type,
+                                           const std::string& name) const noexcept;
 
-  uint32_t GetInputCount() const {
+  [[nodiscard]] uint32_t GetInputCount() const {
     return gsl::narrow_cast<uint32_t>(impl_->getNumInputs());
   }
 
-  uint32_t GetOutputCount() const {
+  [[nodiscard]] uint32_t GetOutputCount() const {
     return gsl::narrow_cast<uint32_t>(impl_->getNumOutputs());
   }
 
-  const ONNX_NAMESPACE::TypeProto* GetInputType(size_t index) const {
+  [[nodiscard]] const ONNX_NAMESPACE::TypeProto* GetInputType(size_t index) const {
     return impl_->getInputType(index);
   }
 
-  const ONNX_NAMESPACE::TypeProto* GetOutputType(size_t index) const {
+  [[nodiscard]] const ONNX_NAMESPACE::TypeProto* GetOutputType(size_t index) const {
     // Work around lack of a const method from the onnx InferenceContext interface
     return const_cast<Impl_t*>(impl_)->getOutputType(index);
   }
 
   // Try to query an attribute, returning nullptr if it doesn't exist
-  const ONNX_NAMESPACE::AttributeProto* TryGetAttribute(const std::string& name) const {
+  [[nodiscard]] const ONNX_NAMESPACE::AttributeProto* TryGetAttribute(const std::string& name) const {
     return impl_->getAttribute(name);
   }
 
-  const ONNX_NAMESPACE::AttributeProto* GetAttribute(const std::string& name) const {
+  [[nodiscard]] const ONNX_NAMESPACE::AttributeProto* GetAttribute(const std::string& name) const {
     const ONNX_NAMESPACE::AttributeProto* attr = TryGetAttribute(name);
     ORT_ENFORCE(attr != nullptr);
     return attr;
diff --git a/include/onnxruntime/core/framework/tensor.h b/include/onnxruntime/core/framework/tensor.h
index 7f3f26fa4aa02..a867ab6066485 100644
--- a/include/onnxruntime/core/framework/tensor.h
+++ b/include/onnxruntime/core/framework/tensor.h
@@ -3,8 +3,9 @@
 
 #pragma once
 
-#include <stddef.h>
+#include <cstddef>
 #include <iostream>
+#include <memory>
 #include <string>
 #include <vector>
 
@@ -37,7 +38,8 @@ namespace onnxruntime {
 
 class Tensor final {
  public:
-  // NB! Removing Create() methods returning unique_ptr<Tensor>. Still available in other EPs that are dynamically linked.
+  // NB! Removing Create() methods returning unique_ptr<Tensor>.
+  // Still available in other EPs that are dynamically linked.
   // Strive not to allocate Tensor with new/delete as it is a shallow class and using it by value is just fine.
   // Use InitOrtValue() methods to allocate for OrtValue.
 
@@ -45,105 +47,104 @@ class Tensor final {
 
   /**
    * Create tensor with given type, shape, pre-allocated memory and allocator info.
-   * This function won't check if the preallocated buffer(p_data) has enough room for the shape.
-   * \param p_type Data type of the tensor
+   * This function does not check if the preallocated buffer(p_data) has enough room for the shape.
+   * \param elt_type Data type of the tensor elements.
    * \param shape Shape of the tensor
    * \param p_data A preallocated buffer. Can be NULL if the shape is empty.
-   *              Tensor does not own the data and will not delete it
-   * \param alloc Where the buffer('p_data') was allocated from
+   *               Tensor does not own the data and will not delete it
+   * \param location Memory info for location of p_data.
    * \param offset Offset in bytes to start of Tensor within p_data.
    * \param strides Strides span. Can be empty if the tensor is contiguous.
    */
-  Tensor(MLDataType p_type, const TensorShape& shape, void* p_data, const OrtMemoryInfo& alloc,
+  Tensor(MLDataType elt_type, const TensorShape& shape, void* p_data, const OrtMemoryInfo& location,
          ptrdiff_t offset = 0, gsl::span<const int64_t> strides = {});
 
-  /// <summary>
-  /// Creates an instance of Tensor on the heap using the appropriate __ctor and
-  /// initializes OrtValue with it.
-  /// </summary>
-  /// <param name="p_type"></param>
-  /// <param name="shape"></param>
-  /// <param name="p_data"></param>
-  /// <param name="info"></param>
-  /// <param name="offset"></param>
-  /// <param name="strides"></param>
-  static void InitOrtValue(MLDataType p_type, const TensorShape& shape,
-                           void* p_data, const OrtMemoryInfo& location,
-                           OrtValue& ort_value, ptrdiff_t offset = 0,
-                           gsl::span<const int64_t> strides = {});
-
-  /// <summary>
-  /// Creates an instance of Tensor who own the pre-allocated buffer.
-  /// </summary>
-  /// <param name="p_type"></param>
-  /// <param name="shape"></param>
-  /// <param name="p_data"></param>
-  /// <param name="allocator"></param>
-  /// <param name="offset"></param>
-  /// <param name="strides"></param>
-  static void InitOrtValue(MLDataType p_type, const TensorShape& shape,
-                           void* p_data, std::shared_ptr<IAllocator> allocator,
-                           OrtValue& ort_value, ptrdiff_t offset = 0,
-                           gsl::span<const int64_t> strides = {});
-
-  static size_t CalculateTensorStorageSize(MLDataType p_type,
-                                           const TensorShape& shape,
-                                           gsl::span<const int64_t> strides = {});
-
   /**
-   * Deprecated. The original design is this Tensor class won't do any allocation / release.
-   * However, this function will allocate the buffer for the shape, and do placement new if p_type is string tensor.
-   */
-  Tensor(MLDataType p_type, const TensorShape& shape, std::shared_ptr<IAllocator> allocator,
-         gsl::span<const int64_t> strides = {});
-
-  /// <summary>
-  /// Creates an instance of Tensor on the heap using the appropriate __ctor and
-  /// initializes OrtValue with it.
-  /// </summary>
-  /// <param name="elt_type"></param>
-  /// <param name="shape"></param>
-  /// <param name="allocator"></param>
-  /// <param name="ort_value"></param>
-  /// <param name="strides"></param>
-  static void InitOrtValue(MLDataType elt_type,
-                           const TensorShape& shape,
-                           std::shared_ptr<IAllocator> allocator,
-                           OrtValue& ort_value,
-                           gsl::span<const int64_t> strides = {});
-
-  /// <summary>
-  /// Creates an instance of Tensor on the heap using the appropriate __ctor and
-  /// initializes OrtValue with it.
-  /// </summary>
-  /// <param name="tensor"></param>
-  /// <param name="ort_value"></param>
-  static void InitOrtValue(Tensor&& tensor, OrtValue& ort_value);
-
-  /**
-   * Create tensor with given type, shape, pre-allocated memory and allocator which will be used to free the pre-allocated memory.
-   * This function won't check if the preallocated buffer(p_data) has enough room for the shape.
-   * However, this function will de-allocate the buffer upon the tensor getting destructed.
-   * \param p_type Data type of the tensor
+   * Create tensor with given type, shape, pre-allocated memory and allocator which will be used to free the
+   * pre-allocated memory. The Tensor will take over ownership of p_data.
+   * This function does not check if the preallocated buffer(p_data) has enough room for the shape.
+   * \param elt_type Data type of the tensor elements.
    * \param shape Shape of the tensor
    * \param p_data A preallocated buffer. Can be NULL if the shape is empty.
-   *              Tensor will own the memory and will delete it when the tensor instance is destructed.
+   *               Tensor will own the memory and will delete it when the tensor instance is destructed.
    * \param deleter Allocator used to free the pre-allocated memory
    * \param offset Offset in bytes to start of Tensor within p_data.
    * \param strides Strides span. Can be empty if the tensor is contiguous.
    */
-  Tensor(MLDataType p_type, const TensorShape& shape, void* p_data, std::shared_ptr<IAllocator> deleter,
+  Tensor(MLDataType elt_type, const TensorShape& shape, void* p_data, std::shared_ptr<IAllocator> deleter,
          ptrdiff_t offset = 0, gsl::span<const int64_t> strides = {});
 
+  /// <summary>
+  /// Create a Tensor that allocates and owns the buffer required for the specified shape.
+  /// </summary>
+  /// <param name="elt_type">Data type of the tensor elements.</param>
+  /// <param name="shape">Tensor shape.</param>
+  /// <param name="allocator">Allocator to use to create and free buffer.</param>
+  Tensor(MLDataType elt_type, const TensorShape& shape, std::shared_ptr<IAllocator> allocator);
+
   ~Tensor();
 
   // Move is allowed
   ORT_DISALLOW_COPY_AND_ASSIGNMENT(Tensor);
 
   Tensor(Tensor&& other) noexcept;
-
   Tensor& operator=(Tensor&& other) noexcept;
 
+  /// <summary>
+  /// Creates an instance of Tensor on the heap and initializes OrtValue with it.
+  /// </summary>
+  /// <param name="elt_type">Data type of the tensor elements.</param>
+  /// <param name="shape">Tensor shape.</param>
+  /// <param name="p_data">Tensor data.</param>
+  /// <param name="location">Memory info for location of p_data.</param>
+  /// <param name="ort_value">OrtValue to populate with Tensor.</param>
+  /// <param name="offset">Optional offset if Tensor refers to a subset of p_data.</param>
+  /// <param name="strides">Optional strides if Tensor refers to a subset of p_data.</param>
+  static void InitOrtValue(MLDataType elt_type, const TensorShape& shape, void* p_data, const OrtMemoryInfo& location,
+                           OrtValue& ort_value,
+                           ptrdiff_t offset = 0, gsl::span<const int64_t> strides = {});
+
+  /// <summary>
+  /// Creates an instance of Tensor on the heap which will take over ownership of the pre-allocated buffer.
+  /// </summary>
+  /// <param name="elt_type">Data type of the tensor elements.</param>
+  /// <param name="shape"Tensor shape.</param>
+  /// <param name="p_data">Tensor data.</param>
+  /// <param name="allocator">Allocator that was used to create p_data and will be used to free it.</param>
+  /// <param name="ort_value">OrtValue to populate with Tensor.</param>
+  /// <param name="offset">Optional offset if Tensor refers to a subset of p_data.</param>
+  /// <param name="strides">Optional strides if Tensor refers to a subset of p_data.</param>
+  static void InitOrtValue(MLDataType elt_type, const TensorShape& shape, void* p_data,
+                           std::shared_ptr<IAllocator> allocator,
+                           OrtValue& ort_value,
+                           ptrdiff_t offset = 0, gsl::span<const int64_t> strides = {});
+
+  /// <summary>
+  /// Creates an instance of Tensor on the heap and initializes OrtValue with it.
+  /// The Tensor instance will allocate and own the data required for `shape`.
+  /// </summary>
+  /// <param name="elt_type">Data type of the tensor elements.</param>
+  /// <param name="shape">Tensor shape.</param>
+  /// <param name="allocator">Allocator that was used to create p_data and will be used to free it.</param>
+  /// <param name="ort_value">OrtValue to populate with Tensor.</param>
+  static void InitOrtValue(MLDataType elt_type, const TensorShape& shape, std::shared_ptr<IAllocator> allocator,
+                           OrtValue& ort_value);
+
+  /// <summary>
+  /// Initializes OrtValue with an existing Tensor.
+  /// </summary>
+  /// <param name="tensor">Tensor.</param>
+  /// <param name="ort_value">OrtValue to populate with Tensor.</param>
+  static void InitOrtValue(Tensor&& tensor, OrtValue& ort_value);
+
+  /// <summary>
+  /// Calculate the required storage for the tensor.
+  /// </summary>
+  /// <param name="elt_type">Data type of the tensor elements.</param>
+  /// <param name="shape">Tensor shape.</param>
+  /// <returns>Bytes required.</returns>
+  static size_t CalculateTensorStorageSize(MLDataType elt_type, const TensorShape& shape);
+
   /**
      Returns the data type.
   */
@@ -294,7 +295,7 @@ class Tensor final {
 
   // More API methods.
  private:
-  void Init(MLDataType p_type,
+  void Init(MLDataType elt_type,
             const TensorShape& shape,
             void* p_raw_data,
             AllocatorPtr deleter,
diff --git a/include/onnxruntime/core/framework/tensor_shape.h b/include/onnxruntime/core/framework/tensor_shape.h
index b3783696b8d78..82a1c1de83523 100644
--- a/include/onnxruntime/core/framework/tensor_shape.h
+++ b/include/onnxruntime/core/framework/tensor_shape.h
@@ -2,34 +2,17 @@
 // Licensed under the MIT License.
 
 #pragma once
-#include <iosfwd>
-#include <vector>
+
 #include <algorithm>
-#include <string>
 #include <cstring>
-#include "core/common/gsl.h"
-#include "onnxruntime_config.h"
-
-#ifndef DISABLE_ABSEIL
-// Need to include abseil inlined_vector.h header directly here
-// as hash tables cause CUDA 10.2 compilers to fail. inlined_vector.h is fine.
-#ifdef _MSC_VER
-#pragma warning(push)
-// C4127: conditional expression is constant
-#pragma warning(disable : 4127)
-// C4324: structure was padded due to alignment specifier
-// Usage of alignas causes some internal padding in places.
-#pragma warning(disable : 4324)
-#endif
-
-#include <absl/container/inlined_vector.h>
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-#endif  // DISABLE_ABSEIL
+#include <iosfwd>
+#include <string>
+#include <vector>
 
+#include "core/common/gsl.h"
+#include "core/common/inlined_containers_fwd.h"
 #include "core/common/span_utils.h"
+#include "onnxruntime_config.h"
 
 namespace onnxruntime {
 #ifdef __GNUC__
@@ -41,18 +24,10 @@ namespace onnxruntime {
 
 constexpr size_t kTensorShapeSmallBufferElementsSize = 5;
 
-#ifndef DISABLE_ABSEIL
 // Use this type to build a shape and then create TensorShape.
-using TensorShapeVector = absl::InlinedVector<int64_t, kTensorShapeSmallBufferElementsSize>;
-#else
-class TensorShapeVector : public std::vector<int64_t> {
-  using Base = std::vector<int64_t>;
-
- public:
-  using Base::Base;
-};
-
-#endif  // DISABLE_ABSEIL
+// We opt to re-use a common instantiation instead of a typedef with kTensorShapeSmallBufferElementsSize
+// To reduce on binary size.
+using TensorShapeVector = InlinedVector<int64_t>;
 
 inline TensorShapeVector ToShapeVector(const gsl::span<const int64_t>& span) {
   TensorShapeVector out;
@@ -194,9 +169,7 @@ class TensorShape {
 
   friend struct ProviderHostImpl;  // So that the shared provider interface can access Allocate
 };
-#ifdef __GNUC__
-#pragma GCC diagnostic pop
-#endif
+
 // operator<< to nicely output to a stream
 std::ostream& operator<<(std::ostream& out, const TensorShape& shape);
 
diff --git a/include/onnxruntime/core/graph/graph.h b/include/onnxruntime/core/graph/graph.h
index f153e88909b8d..22827d43b200f 100644
--- a/include/onnxruntime/core/graph/graph.h
+++ b/include/onnxruntime/core/graph/graph.h
@@ -3,6 +3,7 @@
 
 #pragma once
 
+#include <functional>
 #include <limits>
 #include <memory>
 #include <string>
@@ -83,10 +84,10 @@ class Node {
        gsl::span<NodeArg* const> output_args,
        const NodeAttributes* attributes,
        std::string_view domain) {
-    Init(std::string{name}, std::string{op_type}, std::string{description},
-         std::vector<NodeArg*>{input_args.begin(), input_args.end()},
-         std::vector<NodeArg*>{output_args.begin(), output_args.end()},
-         attributes, std::string{domain});
+    Init(name, op_type, description,
+         input_args,
+         output_args,
+         attributes, domain);
   }
 #endif
 
@@ -396,6 +397,10 @@ class Node {
 #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
   /** Remove the specified attribute from this Node */
   bool ClearAttribute(const std::string& attr_name);
+
+  /** Gets the Node's mutable attributes. */
+  NodeAttributes& GetMutableAttributes() noexcept { return attributes_; }
+
 #endif  // !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
 
   /**
@@ -405,8 +410,6 @@ class Node {
   int PruneRemovableAttributes(gsl::span<const std::string> removable_attributes);
 
 #if !defined(ORT_MINIMAL_BUILD)
-  /** Gets the Node's mutable attributes. */
-  NodeAttributes& GetMutableAttributes() noexcept { return attributes_; }
 
   /** Gets the Graph instance that is instantiated from a GraphProto attribute during Graph::Resolve.
   @param attr_name Attribute name for the GraphProto attribute.
@@ -440,6 +443,13 @@ class Node {
     return attr_to_subgraph_map_;
   }
 
+  /** Gets a map of attribute name to the mutable Graph instances for all subgraphs of the Node.
+   * @returns a mutable map of mutable subgraphs.
+   */
+  std::unordered_map<std::string, gsl::not_null<Graph*>>& GetMutableMapOfAttributeNameToSubgraph() {
+    return attr_to_subgraph_map_;
+  }
+
   /** Gets a map of attribute name to the const Graph instances for all subgraphs of the Node.
   @returns Map of the attribute name that defines the subgraph to the subgraph's Graph instance.
            nullptr if the Node has no subgraphs.
@@ -563,13 +573,13 @@ class Node {
   ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Node);
 
 #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD) || defined(ORT_MINIMAL_BUILD_CUSTOM_OPS)
-  void Init(const std::string& name,
-            const std::string& op_type,
-            const std::string& description,
-            const std::vector<NodeArg*>& input_args,
-            const std::vector<NodeArg*>& output_args,
+  void Init(std::string_view name,
+            std::string_view op_type,
+            std::string_view description,
+            gsl::span<NodeArg* const> input_args,
+            gsl::span<NodeArg* const> output_args,
             const NodeAttributes* attributes,
-            const std::string& domain);
+            std::string_view domain);
 #endif
 
 #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
@@ -585,7 +595,7 @@ class Node {
   // create a Graph instance for an attribute that contains a GraphProto
   void CreateSubgraph(const std::string& attr_name);
 
-  const std::vector<std::unique_ptr<Graph>>& MutableSubgraphs() noexcept { return subgraphs_; }
+  std::vector<std::unique_ptr<Graph>>& MutableSubgraphs() noexcept { return subgraphs_; }
 
   // validate and update the input arg count
   common::Status UpdateInputArgCount();
@@ -658,7 +668,7 @@ class Node {
 The Graph representation containing the graph inputs and outputs, the Node instances,
 and the edges connecting the nodes.
 */
-class Graph {
+class Graph {  // NOLINT(clang-analyzer-optin.performance.Padding): preserve existing data member order for readability
  public:
   /** Gets the Graph name. */
   const std::string& Name() const noexcept;
@@ -1133,6 +1143,26 @@ class Graph {
   */
   Node& FuseSubGraph(const IndexedSubGraph& sub_graph, const std::string& fused_node_name);
 
+  /**
+    Directly insert one of the If node branches into this Graph.
+    `If` node condition must be a constant. The function would
+    rename the nodes of the corresponding subgraph to make sure there is no conflict.
+
+    Explicit and implicit inputs references stay the same.
+
+    All of the outputs of the subgraph being inlined should be renamed
+    to the outputs of the If node.
+
+    The function will process any subgraphs in each of the nodes being inlined,
+    and will rename any references to the new names introduced.
+
+    @param condition_value If condition value
+    @param if_node - the node that contains the graph_to_inline. This node is going
+    to be deleted and replaced by the corresponding graph (either then or else)
+    @param logger
+  */
+  Status InlineIfSubgraph(bool condition_value, Node& if_node, const logging::Logger& logger);
+
   /**
   Directly insert the nodes in the function Node provided into this Graph.
   The Graph needs to be Resolve()d after this call.
@@ -1141,8 +1171,22 @@ class Graph {
   */
   Status InlineFunction(Node& node);
 
+  /**
+  Directly insert the nodes in the function proto provided into the graph.
+  The function converts Constant nodes into the initializers in the graph.
+  It then creates a node in the graph for each of the function nodes.
+  All of the names are expected to be specialized, and, therefore unique.
+  See function_utils::Specialize().
+
+  The Graph needs to be Resolve()d after this call.
+  @param func_to_inline
+  @returns Status indicating success or providing an error message.
+  */
+
+  Status InlineFunctionProto(const ONNX_NAMESPACE::FunctionProto& func_to_inline);
+
   /** Mark a NodeArg name as coming from the outer scope when programmatically constructing a Graph that will
-  be used as a GraphProto attribute in another Node..
+  be used as a GraphProto attribute in another Node.
   e.g. when creating a Graph instance that will be used as a subgraph in a control flow operator, it is necessary to
   define placeholder NodeArgs for outer scope values. This prevents these values from becoming explicit graph inputs
   when the Graph is resolved.
@@ -1391,6 +1435,13 @@ class Graph {
   Node& AddNode(const ONNX_NAMESPACE::NodeProto& node_proto,
                 const ArgNameToTypeMap& name_to_type);
 
+  /** Helper that converts and adds constant node proto to an initializer in the graph.
+   @param constant_node_proto Constant node to convert
+   @param new_name use the new name for the initializer.
+  */
+  Status AddConstantProtoAsInitializer(const ONNX_NAMESPACE::NodeProto& constant_node_proto,
+                                       std::optional<std::string_view> new_name);
+
 #endif
 
   Version IrVersion() const noexcept {
diff --git a/include/onnxruntime/core/platform/EigenNonBlockingThreadPool.h b/include/onnxruntime/core/platform/EigenNonBlockingThreadPool.h
index a57385f6e23f1..f9b694efb936f 100644
--- a/include/onnxruntime/core/platform/EigenNonBlockingThreadPool.h
+++ b/include/onnxruntime/core/platform/EigenNonBlockingThreadPool.h
@@ -278,7 +278,7 @@ class ThreadPoolProfiler {
   int num_threads_;
 #ifdef _MSC_VER
 #pragma warning(push)
-// C4324: structure was padded due to alignment specifier
+  // C4324: structure was padded due to alignment specifier
 #pragma warning(disable : 4324)
 #endif  // _MSC_VER
   struct ORT_ALIGN_TO_AVOID_FALSE_SHARING ChildThreadStat {
diff --git a/include/onnxruntime/core/providers/cuda/cuda_context.h b/include/onnxruntime/core/providers/cuda/cuda_context.h
index 13c176dad3cc5..d73d551920d47 100644
--- a/include/onnxruntime/core/providers/cuda/cuda_context.h
+++ b/include/onnxruntime/core/providers/cuda/cuda_context.h
@@ -1,5 +1,13 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
+
+// This header is to expose a context for cuda custom ops.
+// By the context, a custom cuda operator could fetch existing resources,
+// such as cuda stream and cudnn handle, for reusing.
+
+// For concrete usage, pls find page here:
+// https://onnxruntime.ai/docs/reference/operators/add-custom-op.html#custom-ops-for-cuda-and-rocm
+
 #pragma once
 
 #define ORT_CUDA_CTX
@@ -19,8 +27,9 @@ struct CudaContext : public CustomOpContext {
   cudaStream_t cuda_stream = {};
   cudnnHandle_t cudnn_handle = {};
   cublasHandle_t cublas_handle = {};
+  OrtAllocator* deferred_cpu_allocator = {};
 
-  void Init(const OrtKernelContext& kernel_ctx) override {
+  void Init(const OrtKernelContext& kernel_ctx) {
     const auto& ort_api = Ort::GetApi();
     void* resource = {};
     OrtStatus* status = nullptr;
@@ -44,6 +53,36 @@ struct CudaContext : public CustomOpContext {
       ORT_CXX_API_THROW("failed to fetch cublas handle", OrtErrorCode::ORT_RUNTIME_EXCEPTION);
     }
     cublas_handle = reinterpret_cast<cublasHandle_t>(resource);
+
+    resource = {};
+    status = ort_api.KernelContext_GetResource(&kernel_ctx, ORT_CUDA_RESOUCE_VERSION, CudaResource::deferred_cpu_allocator_t, &resource);
+    if (status) {
+      ORT_CXX_API_THROW("failed to fetch deferred cpu allocator", OrtErrorCode::ORT_RUNTIME_EXCEPTION);
+    }
+    deferred_cpu_allocator = reinterpret_cast<OrtAllocator*>(resource);
+  }
+
+  void* AllocDeferredCpuMem(size_t size) const {
+    if (0 == size) {
+      return {};
+    }
+    const auto& ort_api = Ort::GetApi();
+    void* mem = {};
+    auto status = ort_api.AllocatorAlloc(deferred_cpu_allocator, size, &mem);
+    if (status) {
+      ORT_CXX_API_THROW("failed to allocate deferred cpu memory", OrtErrorCode::ORT_RUNTIME_EXCEPTION);
+    }
+    return mem;
+  }
+
+  void FreeDeferredCpuMem(void* mem) const {
+    if (mem) {
+      const auto& ort_api = Ort::GetApi();
+      auto status = ort_api.AllocatorFree(deferred_cpu_allocator, mem);
+      if (status) {
+        ORT_CXX_API_THROW("failed to free deferred cpu memory", OrtErrorCode::ORT_RUNTIME_EXCEPTION);
+      }
+    }
   }
 };
 
diff --git a/include/onnxruntime/core/providers/cuda/cuda_provider_options.h b/include/onnxruntime/core/providers/cuda/cuda_provider_options.h
index eaf0e5337b8b6..82bb8ba83be4a 100644
--- a/include/onnxruntime/core/providers/cuda/cuda_provider_options.h
+++ b/include/onnxruntime/core/providers/cuda/cuda_provider_options.h
@@ -1,8 +1,11 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
 // Licensed under the MIT License.
 
 #pragma once
 
+#include <limits>
+
 #include "onnxruntime_c_api.h"
 #include "core/framework/arena_extend_strategy.h"
 
@@ -32,4 +35,6 @@ struct OrtCUDAProviderOptionsV2 {
   int tunable_op_max_tuning_duration_ms = 0;                                                                   // Max tuning duration time limit for TunableOp.
   int enable_skip_layer_norm_strict_mode = 0;                                                                  // flag specifying if SkipLayerNorm is in strict mode. If true, use LayerNormalization kernel.
                                                                                                                // The strict mode has better accuracy but lower performance.
+  int prefer_nhwc = 0;                                                                                         // make the CUDA EP NHWC preferred
+  int use_ep_level_unified_stream = 0;                                                                         // flag specifying if ep level stream is used or not
 };
diff --git a/include/onnxruntime/core/providers/cuda/cuda_resource.h b/include/onnxruntime/core/providers/cuda/cuda_resource.h
index e46fc5b4219dd..8c3ed46ade6a1 100644
--- a/include/onnxruntime/core/providers/cuda/cuda_resource.h
+++ b/include/onnxruntime/core/providers/cuda/cuda_resource.h
@@ -3,10 +3,11 @@
 
 #include "core/providers/resource.h"
 
-#define ORT_CUDA_RESOUCE_VERSION 1
+#define ORT_CUDA_RESOUCE_VERSION 2
 
 enum CudaResource : int {
   cuda_stream_t = cuda_resource_offset,
   cudnn_handle_t,
-  cublas_handle_t
+  cublas_handle_t,
+  deferred_cpu_allocator_t,
 };
\ No newline at end of file
diff --git a/include/onnxruntime/core/providers/custom_op_context.h b/include/onnxruntime/core/providers/custom_op_context.h
index 547f9a90aff85..8f3d2476d4fdb 100644
--- a/include/onnxruntime/core/providers/custom_op_context.h
+++ b/include/onnxruntime/core/providers/custom_op_context.h
@@ -3,11 +3,8 @@
 
 #pragma once
 
-#include <core/session/onnxruntime_cxx_api.h>
-
 // CustomOpContext defines an interface allowing a custom op to access ep-specific resources.
 struct CustomOpContext {
   CustomOpContext() = default;
   virtual ~CustomOpContext(){};
-  virtual void Init(const OrtKernelContext&){};
 };
\ No newline at end of file
diff --git a/include/onnxruntime/core/providers/dml/dml_provider_factory.h b/include/onnxruntime/core/providers/dml/dml_provider_factory.h
index 0782d2d9ed760..7d7f05193f486 100644
--- a/include/onnxruntime/core/providers/dml/dml_provider_factory.h
+++ b/include/onnxruntime/core/providers/dml/dml_provider_factory.h
@@ -30,6 +30,35 @@ typedef struct IDMLDevice IDMLDevice;
 extern "C" {
 #endif
 
+enum OrtDmlPerformancePreference {
+  Default = 0,
+  HighPerformance = 1,
+  MinimumPower = 2
+};
+
+enum OrtDmlDeviceFilter : uint32_t {
+#ifdef ENABLE_NPU_ADAPTER_ENUMERATION
+  Any = 0xffffffff,
+  Gpu = 1 << 0,
+  Npu = 1 << 1,
+#else
+  Gpu = 1 << 0,
+#endif
+};
+
+inline OrtDmlDeviceFilter operator~(OrtDmlDeviceFilter a) { return (OrtDmlDeviceFilter) ~(int)a; }
+inline OrtDmlDeviceFilter operator|(OrtDmlDeviceFilter a, OrtDmlDeviceFilter b) { return (OrtDmlDeviceFilter)((int)a | (int)b); }
+inline OrtDmlDeviceFilter operator&(OrtDmlDeviceFilter a, OrtDmlDeviceFilter b) { return (OrtDmlDeviceFilter)((int)a & (int)b); }
+inline OrtDmlDeviceFilter operator^(OrtDmlDeviceFilter a, OrtDmlDeviceFilter b) { return (OrtDmlDeviceFilter)((int)a ^ (int)b); }
+inline OrtDmlDeviceFilter& operator|=(OrtDmlDeviceFilter& a, OrtDmlDeviceFilter b) { return (OrtDmlDeviceFilter&)((int&)a |= (int)b); }
+inline OrtDmlDeviceFilter& operator&=(OrtDmlDeviceFilter& a, OrtDmlDeviceFilter b) { return (OrtDmlDeviceFilter&)((int&)a &= (int)b); }
+inline OrtDmlDeviceFilter& operator^=(OrtDmlDeviceFilter& a, OrtDmlDeviceFilter b) { return (OrtDmlDeviceFilter&)((int&)a ^= (int)b); }
+
+struct OrtDmlDeviceOptions {
+  OrtDmlPerformancePreference Preference;
+  OrtDmlDeviceFilter Filter;
+};
+
 /**
  * [[deprecated]]
  * This export is deprecated.
@@ -99,6 +128,13 @@ struct OrtDmlApi {
    * This API gets the D3D12 resource when an OrtValue has been allocated by the DML EP.
    */
   ORT_API2_STATUS(GetD3D12ResourceFromAllocation, _In_ OrtAllocator* provider, _In_ void* dml_resource, _Out_ ID3D12Resource** d3d_resource);
+
+  /**
+   * SessionOptionsAppendExecutionProvider_DML2
+   * Creates a DirectML Execution Provider given the supplied device options that contain a performance preference
+   * (high power, low power, or default) and a device filter (None, GPU, or NPU).
+   */
+  ORT_API2_STATUS(SessionOptionsAppendExecutionProvider_DML2, _In_ OrtSessionOptions* options, OrtDmlDeviceOptions* device_opts);
 };
 
 #ifdef __cplusplus
diff --git a/include/onnxruntime/core/providers/rocm/rocm_context.h b/include/onnxruntime/core/providers/rocm/rocm_context.h
index ff62094f3f439..5f04289a8c6e0 100644
--- a/include/onnxruntime/core/providers/rocm/rocm_context.h
+++ b/include/onnxruntime/core/providers/rocm/rocm_context.h
@@ -18,7 +18,7 @@ struct RocmContext : public CustomOpContext {
   miopenHandle_t miopen_handle = {};
   rocblas_handle rblas_handle = {};
 
-  void Init(const OrtKernelContext& kernel_ctx) override {
+  void Init(const OrtKernelContext& kernel_ctx) {
     const auto& ort_api = Ort::GetApi();
     void* resource = {};
     OrtStatus* status = nullptr;
diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h
deleted file mode 100644
index 44debc901cb77..0000000000000
--- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-#include "onnxruntime_c_api.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_Tensorrt, _In_ OrtSessionOptions* options, int device_id);
-
-#ifdef __cplusplus
-}
-#endif
diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
index e7d0f9f03ade9..680ce1cc5b9a2 100644
--- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
+++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
@@ -11,38 +11,39 @@
 /// User can only get the instance of OrtTensorRTProviderOptionsV2 via CreateTensorRTProviderOptions.
 /// </summary>
 struct OrtTensorRTProviderOptionsV2 {
-  int device_id;                                // cuda device id.
-  int has_user_compute_stream;                  // indicator of user specified CUDA compute stream.
-  void* user_compute_stream;                    // user specified CUDA compute stream.
-  int trt_max_partition_iterations;             // maximum iterations for TensorRT parser to get capability
-  int trt_min_subgraph_size;                    // minimum size of TensorRT subgraphs
-  size_t trt_max_workspace_size;                // maximum workspace size for TensorRT.
-  int trt_fp16_enable;                          // enable TensorRT FP16 precision. Default 0 = false, nonzero = true
-  int trt_int8_enable;                          // enable TensorRT INT8 precision. Default 0 = false, nonzero = true
-  const char* trt_int8_calibration_table_name;  // TensorRT INT8 calibration table name.
-  int trt_int8_use_native_calibration_table;    // use native TensorRT generated calibration table. Default 0 = false, nonzero = true
-  int trt_dla_enable;                           // enable DLA. Default 0 = false, nonzero = true
-  int trt_dla_core;                             // DLA core number. Default 0
-  int trt_dump_subgraphs;                       // dump TRT subgraph. Default 0 = false, nonzero = true
-  int trt_engine_cache_enable;                  // enable engine caching. Default 0 = false, nonzero = true
-  const char* trt_engine_cache_path;            // specify engine cache path
-  int trt_engine_decryption_enable;             // enable engine decryption. Default 0 = false, nonzero = true
-  const char* trt_engine_decryption_lib_path;   // specify engine decryption library path
-  int trt_force_sequential_engine_build;        // force building TensorRT engine sequentially. Default 0 = false, nonzero = true
-  int trt_context_memory_sharing_enable;        // enable context memory sharing between subgraphs. Default 0 = false, nonzero = true
-  int trt_layer_norm_fp32_fallback;             // force Pow + Reduce ops in layer norm to FP32. Default 0 = false, nonzero = true
-  int trt_timing_cache_enable;                  // enable TensorRT timing cache. Default 0 = false, nonzero = true
-  int trt_force_timing_cache;                   // force the TensorRT cache to be used even if device profile does not match. Default 0 = false, nonzero = true
-  int trt_detailed_build_log;                   // Enable detailed build step logging on TensorRT EP with timing for each engine build. Default 0 = false, nonzero = true
-  int trt_build_heuristics_enable;              // Build engine using heuristics to reduce build time. Default 0 = false, nonzero = true
-  int trt_sparsity_enable;                      // Control if sparsity can be used by TRT. Default 0 = false, 1 = true
-  int trt_builder_optimization_level;           // Set the builder optimization level. WARNING: levels below 3 do not guarantee good engine performance, but greatly improve build time.  Default 3, valid range [0-5]
-  int trt_auxiliary_streams;                    // Set maximum number of auxiliary streams per inference stream. Setting this value to 0 will lead to optimal memory usage. Default -1 = heuristics
-  const char* trt_tactic_sources;               // pecify the tactics to be used by adding (+) or removing (-) tactics from the default
-                                                // tactic sources (default = all available tactics) e.g. "-CUDNN,+CUBLAS" available keys: "CUBLAS"|"CUBLAS_LT"|"CUDNN"|"EDGE_MASK_CONVOLUTIONS"
-  const char* trt_extra_plugin_lib_paths;       // specify extra TensorRT plugin library paths
-  const char* trt_profile_min_shapes;           // Specify the range of the input shapes to build the engine with
-  const char* trt_profile_max_shapes;           // Specify the range of the input shapes to build the engine with
-  const char* trt_profile_opt_shapes;           // Specify the range of the input shapes to build the engine with
-  int trt_cuda_graph_enable;                    // Enable CUDA graph in ORT TRT
+  int device_id{0};                                      // cuda device id.
+  int has_user_compute_stream{0};                        // indicator of user specified CUDA compute stream.
+  void* user_compute_stream{nullptr};                    // user specified CUDA compute stream.
+  int trt_max_partition_iterations{1000};                // maximum iterations for TensorRT parser to get capability
+  int trt_min_subgraph_size{1};                          // minimum size of TensorRT subgraphs
+  size_t trt_max_workspace_size{1 << 30};                // maximum workspace size for TensorRT.
+  int trt_fp16_enable{0};                                // enable TensorRT FP16 precision. Default 0 = false, nonzero = true
+  int trt_int8_enable{0};                                // enable TensorRT INT8 precision. Default 0 = false, nonzero = true
+  const char* trt_int8_calibration_table_name{nullptr};  // TensorRT INT8 calibration table name.
+  int trt_int8_use_native_calibration_table{0};          // use native TensorRT generated calibration table. Default 0 = false, nonzero = true
+  int trt_dla_enable{0};                                 // enable DLA. Default 0 = false, nonzero = true
+  int trt_dla_core{0};                                   // DLA core number. Default 0
+  int trt_dump_subgraphs{0};                             // dump TRT subgraph. Default 0 = false, nonzero = true
+  int trt_engine_cache_enable{0};                        // enable engine caching. Default 0 = false, nonzero = true
+  const char* trt_engine_cache_path{nullptr};            // specify engine cache path, defaults to the working directory
+  int trt_engine_decryption_enable{0};                   // enable engine decryption. Default 0 = false, nonzero = true
+  const char* trt_engine_decryption_lib_path{nullptr};   // specify engine decryption library path
+  int trt_force_sequential_engine_build{0};              // force building TensorRT engine sequentially. Default 0 = false, nonzero = true
+  int trt_context_memory_sharing_enable{0};              // enable context memory sharing between subgraphs. Default 0 = false, nonzero = true
+  int trt_layer_norm_fp32_fallback{0};                   // force Pow + Reduce ops in layer norm to FP32. Default 0 = false, nonzero = true
+  int trt_timing_cache_enable{0};                        // enable TensorRT timing cache. Default 0 = false, nonzero = true
+  const char* trt_timing_cache_path{nullptr};            // specify timing cache path, if none is provided the trt_engine_cache_path is used
+  int trt_force_timing_cache{0};                         // force the TensorRT cache to be used even if device profile does not match. Default 0 = false, nonzero = true
+  int trt_detailed_build_log{0};                         // Enable detailed build step logging on TensorRT EP with timing for each engine build. Default 0 = false, nonzero = true
+  int trt_build_heuristics_enable{0};                    // Build engine using heuristics to reduce build time. Default 0 = false, nonzero = true
+  int trt_sparsity_enable{0};                            // Control if sparsity can be used by TRT. Default 0 = false, 1 = true
+  int trt_builder_optimization_level{3};                 // Set the builder optimization level. WARNING: levels below 3 do not guarantee good engine performance, but greatly improve build time.  Default 3, valid range [0-5]
+  int trt_auxiliary_streams{-1};                         // Set maximum number of auxiliary streams per inference stream. Setting this value to 0 will lead to optimal memory usage. Default -1 = heuristics
+  const char* trt_tactic_sources{nullptr};               // pecify the tactics to be used by adding (+) or removing (-) tactics from the default
+                                                         // tactic sources (default = all available tactics) e.g. "-CUDNN,+CUBLAS" available keys: "CUBLAS"|"CUBLAS_LT"|"CUDNN"|"EDGE_MASK_CONVOLUTIONS"
+  const char* trt_extra_plugin_lib_paths{nullptr};       // specify extra TensorRT plugin library paths
+  const char* trt_profile_min_shapes{nullptr};           // Specify the range of the input shapes to build the engine with
+  const char* trt_profile_max_shapes{nullptr};           // Specify the range of the input shapes to build the engine with
+  const char* trt_profile_opt_shapes{nullptr};           // Specify the range of the input shapes to build the engine with
+  int trt_cuda_graph_enable{0};                          // Enable CUDA graph in ORT TRT
 };
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index e483c67a0cfe6..cddad732104ed 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -299,6 +299,7 @@ ORT_RUNTIME_CLASS(DnnlProviderOptions);
 ORT_RUNTIME_CLASS(Op);
 ORT_RUNTIME_CLASS(OpAttr);
 ORT_RUNTIME_CLASS(Logger);
+ORT_RUNTIME_CLASS(ShapeInferContext);
 
 #ifdef _WIN32
 typedef _Return_type_success_(return == 0) OrtStatus* OrtStatusPtr;
@@ -598,9 +599,11 @@ typedef struct OrtTensorRTProviderOptions {
  * \see OrtApi::SessionOptionsAppendExecutionProvider_MIGraphX
  */
 typedef struct OrtMIGraphXProviderOptions {
-  int device_id;             // hip device id.
-  int migraphx_fp16_enable;  // enable MIGraphX FP16 precision. Default 0 = false, nonzero = true
-  int migraphx_int8_enable;  // enable MIGraphX INT8 precision. Default 0 = false, nonzero = true
+  int device_id;                                     // hip device id.
+  int migraphx_fp16_enable;                          // MIGraphX FP16 precision. Default 0 = false, nonzero = true
+  int migraphx_int8_enable;                          // MIGraphX INT8 precision. Default 0 = false, nonzero = true
+  int migraphx_use_native_calibration_table;         // MIGraphx INT8 cal table. Default 0 = false, noznero = true
+  const char* migraphx_int8_calibration_table_name;  // MIGraphx INT8 calibration table name
 } OrtMIGraphXProviderOptions;
 
 /** \brief OpenVINO Provider Options
@@ -610,7 +613,7 @@ typedef struct OrtMIGraphXProviderOptions {
 typedef struct OrtOpenVINOProviderOptions {
 #ifdef __cplusplus
   OrtOpenVINOProviderOptions() : device_type{},
-                                 enable_vpu_fast_compile{},
+                                 enable_npu_fast_compile{},
                                  device_id{},
                                  num_of_threads{},
                                  cache_dir{},
@@ -623,7 +626,7 @@ typedef struct OrtOpenVINOProviderOptions {
    * Valid settings are one of: "CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16"
    */
   const char* device_type;
-  unsigned char enable_vpu_fast_compile;  ///< 0 = disabled, nonzero = enabled
+  unsigned char enable_npu_fast_compile;  ///< 0 = disabled, nonzero = enabled
   const char* device_id;
   size_t num_of_threads;  ///< 0 = Use default number of threads
   const char* cache_dir;  // path is set to empty by default
@@ -745,6 +748,8 @@ struct OrtApi {
 
   /** \brief Create an OrtEnv
    *
+   * \note Invoking this function will return the same instance of the environment as that returned by a previous call
+   * to another env creation function; all arguments to this function will be ignored.
    * \param[in] log_severity_level The log severity level.
    * \param[in] logid The log identifier.
    * \param[out] out Returned newly created OrtEnv. Must be freed with OrtApi::ReleaseEnv
@@ -755,17 +760,20 @@ struct OrtApi {
 
   /** \brief Create an OrtEnv
    *
+   * \note Invoking this function will return the same instance of the environment as that returned by a previous call
+   * to another env creation function; all arguments to this function will be ignored. If you want to provide your
+   * own logging function, consider setting it using the SetUserLoggingFunction API instead.
    * \param[in] logging_function A pointer to a logging function.
    * \param[in] logger_param A pointer to arbitrary data passed as the ::OrtLoggingFunction `param` parameter to
-   *                         `logging_function`.
+   *                         `logging_function`. This parameter is optional.
    * \param[in] log_severity_level The log severity level.
    * \param[in] logid The log identifier.
    * \param[out] out Returned newly created OrtEnv. Must be freed with OrtApi::ReleaseEnv
    *
    * \snippet{doc} snippets.dox OrtStatus Return Value
    */
-  ORT_API2_STATUS(CreateEnvWithCustomLogger, OrtLoggingFunction logging_function, _In_opt_ void* logger_param,
-                  OrtLoggingLevel log_severity_level, _In_ const char* logid, _Outptr_ OrtEnv** out);
+  ORT_API2_STATUS(CreateEnvWithCustomLogger, _In_ OrtLoggingFunction logging_function, _In_opt_ void* logger_param,
+                  _In_ OrtLoggingLevel log_severity_level, _In_ const char* logid, _Outptr_ OrtEnv** out);
 
   /** \brief Enable Telemetry
    *
@@ -3592,6 +3600,18 @@ struct OrtApi {
    *   "rpc_control_latency": QNN RPC control latency.
    *   "htp_performance_mode": QNN performance mode, options: "burst", "balanced", "default", "high_performance",
    *   "high_power_saver", "low_balanced", "low_power_saver", "power_saver", "sustained_high_performance". Default to "default".
+   *   "qnn_context_embed_mode", 1 means dump the QNN context binary into node attribute EPContext->ep_cache_context in the ONNX skeleton model.
+   *   0 means dump the QNN context binary into separate bin file and set the path to EPContext->ep_cache_context.
+   *   The path is relative path to the ONNX skeleton model file.
+   *   "qnn_saver_path": File path to the QNN Saver backend library. If specified, QNN Saver will be enabled and will
+   *   dump QNN API calls to disk for replay/debugging. QNN Saver produces incorrect model inference results and
+   *   may alter model/EP partitioning. Use only for debugging.
+   *   "qnn_context_priority": QNN context priority, options: "low", "normal", "normal_high", "high". Default to "normal".
+   *   "htp_graph_finalization_optimization_mode": Set the optimization mode for graph finalization on the HTP backend. Available options:
+   *     - "0": Default.
+   *     - "1": Faster preparation time, less optimal graph.
+   *     - "2": Longer preparation time, more optimal graph.
+   *     - "3": Longest preparation time, most likely even more optimal graph. See QNN SDK documentation for specific details.
    *
    * SNPE supported keys:
    *   "runtime": SNPE runtime engine, options: "CPU", "CPU_FLOAT32", "GPU", "GPU_FLOAT32_16_HYBRID", "GPU_FLOAT16",
@@ -4413,6 +4433,93 @@ struct OrtApi {
    * \since Version 1.16.
    */
   ORT_API2_STATUS(KernelContext_GetResource, _In_ const OrtKernelContext* context, _In_ int resouce_version, _In_ int resource_id, _Outptr_ void** resource);
+
+  /** \brief Set user logging function
+   *
+   *  By default the logger created by the CreateEnv* functions is used to create the session logger as well.
+   *  This function allows a user to override this default session logger with a logger of their own choosing. This way
+   *  the user doesn't have to create a separate environment with a custom logger. This addresses the problem when
+   *  the user already created an env but now wants to use a different logger for a specific session (for debugging or
+   *  other reasons).
+   *
+   * \param[in] options
+   * \param[in] user_logging_function A pointer to a logging function.
+   * \param[in] user_logging_param A pointer to arbitrary data passed as the ::OrtLoggingFunction `param` parameter to
+   *                         `user_logging_function`. This parameter is optional.
+   *
+   * \snippet{doc} snippets.dox OrtStatus Return Value
+   *
+   * \since Version 1.17.
+   */
+  ORT_API2_STATUS(SetUserLoggingFunction, _Inout_ OrtSessionOptions* options,
+                  _In_ OrtLoggingFunction user_logging_function, _In_opt_ void* user_logging_param);
+
+  /**
+   * Get number of input from OrtShapeInferContext
+   *
+   * \param[in] context
+   * \param[out] out The number of inputs
+   *
+   * \since Version 1.17.
+   */
+  ORT_API2_STATUS(ShapeInferContext_GetInputCount, _In_ const OrtShapeInferContext* context, _Out_ size_t* out);
+
+  /**
+   * Get type and shape info of an input
+   *
+   * \param[in] context
+   * \param[in] index The index of the input
+   * \param[out] info Type shape info of the input
+   *
+   * \since Version 1.17.
+   */
+  ORT_API2_STATUS(ShapeInferContext_GetInputTypeShape, _In_ const OrtShapeInferContext* context, _In_ size_t index, _Outptr_ OrtTensorTypeAndShapeInfo** info);
+
+  /**
+   * Get attribute from OrtShapeInferContext. Note that OrtShapeInferContext is a per-node context, one could only read attribute from current node.
+   *
+   * \param[in] context
+   * \param[in] attr_name Name of the attribute
+   * \param[out] attr Handle of the attribute fetched
+   *
+   * \since Version 1.17.
+   */
+  ORT_API2_STATUS(ShapeInferContext_GetAttribute, _In_ const OrtShapeInferContext* context, _In_ const char* attr_name, _Outptr_ const OrtOpAttr** attr);
+
+  /**
+   * Set type and shape info of an ouput
+   *
+   * \param[in] context
+   * \param[in] index The index of the ouput
+   * \param[out] info Type shape info of the output
+   *
+   * \since Version 1.17.
+   */
+  ORT_API2_STATUS(ShapeInferContext_SetOutputTypeShape, _In_ const OrtShapeInferContext* context, _In_ size_t index, _In_ const OrtTensorTypeAndShapeInfo* info);
+
+  /**
+   * Set symbolic shape to type shape info
+   *
+   * \param[in] info Type shape info
+   * \param[in] dim_params Symbolic strings
+   * \param[in] dim_params_length Number of strings
+   *
+   * \since Version 1.17.
+   */
+  ORT_API2_STATUS(SetSymbolicDimensions, _In_ OrtTensorTypeAndShapeInfo* info, _In_ const char* dim_params[], _In_ size_t dim_params_length);
+
+  /**
+   * Read contents of an attribute to data
+   *
+   * \param[in] op_attr
+   * \param[in] type Attribute type
+   * \param[out] data Memory address to save raw content of the attribute
+   * \param[in] len Number of bytes allowed to store in data
+   * \param[out] out Number of bytes required to save the data when the call failed, or the real number of bytes saved to data on success
+   *
+   * \since Version 1.17.
+   */
+  ORT_API2_STATUS(ReadOpAttr, _In_ const OrtOpAttr* op_attr, _In_ OrtOpAttrType type, _Inout_ void* data, _In_ size_t len, _Out_ size_t* out);
 };
 
 /*
@@ -4504,6 +4611,12 @@ struct OrtCustomOp {
 
   // Perform the computation step.
   OrtStatusPtr(ORT_API_CALL* KernelComputeV2)(_In_ void* op_kernel, _In_ OrtKernelContext* context);
+
+  OrtStatusPtr(ORT_API_CALL* InferOutputShapeFn)(_In_ const struct OrtCustomOp* op, _In_ OrtShapeInferContext*);
+
+  // Get start range
+  int(ORT_API_CALL* GetStartVersion)(_In_ const struct OrtCustomOp* op);
+  int(ORT_API_CALL* GetEndVersion)(_In_ const struct OrtCustomOp* op);
 };
 
 /*
@@ -4544,6 +4657,14 @@ ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_MIGraphX, _In_ OrtSessio
  */
 ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_Dnnl, _In_ OrtSessionOptions* options, int use_arena);
 
+/*
+ * This is the old way to add the TensorRT provider to the session, please use SessionOptionsAppendExecutionProvider_TensorRT_V2 above to access the latest functionality
+ * This function always exists, but will only succeed if Onnxruntime was built with TensorRT support and the TensorRT provider shared library exists
+ *
+ * \param device_id CUDA device id, starts from zero.
+ */
+ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_Tensorrt, _In_ OrtSessionOptions* options, int device_id);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
index 47356c3fe3608..92c25d8688b66 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@@ -2055,6 +2055,7 @@ struct KernelContext {
   void* GetGPUComputeStream() const;
   Logger GetLogger() const;
   OrtAllocator* GetAllocator(const OrtMemoryInfo& memory_info) const;
+  OrtKernelContext* GetOrtKernelContext() const { return ctx_; }
 
  private:
   OrtKernelContext* ctx_;
@@ -2155,6 +2156,80 @@ struct Op : detail::Base<OrtOp> {
               size_t output_count);
 };
 
+/// <summary>
+/// Provide access to per-node attributes and input shapes, so one could compute and set output shapes.
+/// </summary>
+struct ShapeInferContext {
+  struct SymbolicInteger {
+    SymbolicInteger(int64_t i) : i_(i), is_int_(true){};
+    SymbolicInteger(const char* s) : s_(s), is_int_(false){};
+    SymbolicInteger(const SymbolicInteger&) = default;
+    SymbolicInteger(SymbolicInteger&&) = default;
+
+    SymbolicInteger& operator=(const SymbolicInteger&) = default;
+    SymbolicInteger& operator=(SymbolicInteger&&) = default;
+
+    bool operator==(const SymbolicInteger& dim) const {
+      if (is_int_ == dim.is_int_) {
+        if (is_int_) {
+          return i_ == dim.i_;
+        } else {
+          return std::string{s_} == std::string{dim.s_};
+        }
+      }
+      return false;
+    }
+
+    bool IsInt() const { return is_int_; }
+    int64_t AsInt() const { return i_; }
+    const char* AsSym() const { return s_; }
+
+    static constexpr int INVALID_INT_DIM = -2;
+
+   private:
+    union {
+      int64_t i_;
+      const char* s_;
+    };
+    bool is_int_;
+  };
+
+  using Shape = std::vector<SymbolicInteger>;
+
+  ShapeInferContext(const OrtApi* ort_api, OrtShapeInferContext* ctx);
+
+  const Shape& GetInputShape(size_t indice) const { return input_shapes_.at(indice); }
+
+  size_t GetInputCount() const { return input_shapes_.size(); }
+
+  Status SetOutputShape(size_t indice, const Shape& shape);
+
+  int64_t GetAttrInt(const char* attr_name);
+
+  using Ints = std::vector<int64_t>;
+  Ints GetAttrInts(const char* attr_name);
+
+  float GetAttrFloat(const char* attr_name);
+
+  using Floats = std::vector<float>;
+  Floats GetAttrFloats(const char* attr_name);
+
+  std::string GetAttrString(const char* attr_name);
+
+  using Strings = std::vector<std::string>;
+  Strings GetAttrStrings(const char* attr_name);
+
+ private:
+  const OrtOpAttr* GetAttrHdl(const char* attr_name) const;
+  const OrtApi* ort_api_;
+  OrtShapeInferContext* ctx_;
+  std::vector<Shape> input_shapes_;
+};
+
+using ShapeInferFn = Ort::Status (*)(Ort::ShapeInferContext&);
+
+#define MAX_CUSTOM_OP_END_VER (1UL << 31) - 1
+
 template <typename TOp, typename TKernel, bool WithStatus = false>
 struct CustomOpBase : OrtCustomOp {
   CustomOpBase() {
@@ -2205,6 +2280,16 @@ struct CustomOpBase : OrtCustomOp {
         static_cast<TKernel*>(op_kernel)->Compute(context);
       };
     }
+
+    SetShapeInferFn<TOp>(0);
+
+    OrtCustomOp::GetStartVersion = [](const OrtCustomOp* this_) {
+      return static_cast<const TOp*>(this_)->start_ver_;
+    };
+
+    OrtCustomOp::GetEndVersion = [](const OrtCustomOp* this_) {
+      return static_cast<const TOp*>(this_)->end_ver_;
+    };
   }
 
   // Default implementation of GetExecutionProviderType that returns nullptr to default to the CPU provider
@@ -2256,9 +2341,26 @@ struct CustomOpBase : OrtCustomOp {
     return std::vector<std::string>{};
   }
 
+  template <typename C>
+  decltype(&C::InferOutputShape) SetShapeInferFn(decltype(&C::InferOutputShape)) {
+    OrtCustomOp::InferOutputShapeFn = [](const OrtCustomOp*, OrtShapeInferContext* ort_ctx) -> OrtStatusPtr {
+      ShapeInferContext ctx(&GetApi(), ort_ctx);
+      return C::InferOutputShape(ctx);
+    };
+    return {};
+  }
+
+  template <typename C>
+  void SetShapeInferFn(...) {
+    OrtCustomOp::InferOutputShapeFn = {};
+  }
+
  protected:
   // Helper function that returns a map of session config entries specified by CustomOpBase::GetSessionConfigKeys.
   void GetSessionConfigs(std::unordered_map<std::string, std::string>& out, ConstSessionOptions options) const;
+
+  int start_ver_ = 1;
+  int end_ver_ = MAX_CUSTOM_OP_END_VER;
 };
 
 }  // namespace Ort
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
index 22172832cde8e..860a27fc73f79 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -8,6 +8,15 @@
 // the main C++ file with implementation details.
 
 #include <cstring>
+#include <functional>
+
+#define RETURN_ON_API_FAIL(expression) \
+  {                                    \
+    auto err = (expression);           \
+    if (err) {                         \
+      return Status(err);              \
+    }                                  \
+  }
 
 namespace Ort {
 
@@ -1883,4 +1892,154 @@ void CustomOpBase<TOp, TKernel, WithStatus>::GetSessionConfigs(std::unordered_ma
   }
 }
 
+inline ShapeInferContext::ShapeInferContext(const OrtApi* ort_api,
+                                            OrtShapeInferContext* ctx) : ort_api_(ort_api), ctx_(ctx) {
+  size_t input_count = 0;
+  Ort::ThrowOnError(ort_api_->ShapeInferContext_GetInputCount(ctx_, &input_count));
+  for (size_t ith_input = 0; ith_input < input_count; ++ith_input) {
+    OrtTensorTypeAndShapeInfo* info{};
+    Ort::ThrowOnError(ort_api_->ShapeInferContext_GetInputTypeShape(ctx, ith_input, &info));
+    TensorTypeAndShapeInfo type_shape_info(info);
+    auto integer_shape = type_shape_info.GetShape();
+    std::vector<const char*> symbolic_shape(integer_shape.size(), {});
+    type_shape_info.GetSymbolicDimensions(&symbolic_shape[0], integer_shape.size());
+    Shape shape;
+    for (size_t ith = 0; ith < integer_shape.size(); ++ith) {
+      if (symbolic_shape[ith] && std::string{symbolic_shape[ith]}.size() > 0) {
+        shape.emplace_back(symbolic_shape[ith]);
+      } else {
+        shape.emplace_back(integer_shape[ith]);
+      }
+    }
+    input_shapes_.push_back(std::move(shape));
+    type_shape_info.release();
+  }
+}
+
+inline Status ShapeInferContext::SetOutputShape(size_t indice, const Shape& shape) {
+  OrtTensorTypeAndShapeInfo* info = {};
+  RETURN_ON_API_FAIL(ort_api_->CreateTensorTypeAndShapeInfo(&info));
+
+  using InfoPtr = std::unique_ptr<OrtTensorTypeAndShapeInfo, std::function<void(OrtTensorTypeAndShapeInfo*)>>;
+
+  InfoPtr info_ptr(info, [this](OrtTensorTypeAndShapeInfo* obj) {
+    ort_api_->ReleaseTensorTypeAndShapeInfo(obj);
+  });
+
+  std::vector<int64_t> integer_dims;
+  std::vector<const char*> symbolic_dims;
+
+  for (const auto dim : shape) {
+    if (dim.IsInt()) {
+      integer_dims.push_back(dim.IsInt());
+      symbolic_dims.push_back("");
+    } else {
+      if (!dim.AsSym() || std::string{dim.AsSym()}.empty()) {
+        ORT_CXX_API_THROW("Symbolic dim must not be an empty string", ORT_INVALID_ARGUMENT);
+      }
+      integer_dims.push_back(SymbolicInteger::INVALID_INT_DIM);
+      symbolic_dims.push_back(dim.AsSym());
+    }
+  }
+
+  RETURN_ON_API_FAIL(ort_api_->SetDimensions(info, integer_dims.data(), integer_dims.size()));
+  RETURN_ON_API_FAIL(ort_api_->SetSymbolicDimensions(info, symbolic_dims.data(), symbolic_dims.size()));
+  RETURN_ON_API_FAIL(ort_api_->ShapeInferContext_SetOutputTypeShape(ctx_, indice, info));
+  return Status{nullptr};
+}
+
+inline int64_t ShapeInferContext::GetAttrInt(const char* attr_name) {
+  const auto* attr = GetAttrHdl(attr_name);
+  int64_t i = {};
+  size_t out = {};
+  Ort::ThrowOnError(ort_api_->ReadOpAttr(attr, ORT_OP_ATTR_INT, &i, sizeof(i), &out));
+  return i;
+}
+
+inline ShapeInferContext::Ints ShapeInferContext::GetAttrInts(const char* attr_name) {
+  const auto* attr = GetAttrHdl(attr_name);
+  int64_t i = {};
+  size_t out = {};
+  // first call to get the bytes needed
+  auto status = ort_api_->ReadOpAttr(attr, ORT_OP_ATTR_INTS, &i, sizeof(i), &out);
+  if (status) {
+    size_t num_i = out / sizeof(int64_t);
+    ShapeInferContext::Ints ints(num_i, 0);
+    Ort::ThrowOnError(ort_api_->ReadOpAttr(attr, ORT_OP_ATTR_INTS, ints.data(), out, &out));
+    return ints;
+  } else {
+    return {i};
+  }
+}
+
+inline float ShapeInferContext::GetAttrFloat(const char* attr_name) {
+  const auto* attr = GetAttrHdl(attr_name);
+  float f = {};
+  size_t out = {};
+  Ort::ThrowOnError(ort_api_->ReadOpAttr(attr, ORT_OP_ATTR_FLOAT, &f, sizeof(f), &out));
+  return f;
+}
+
+inline ShapeInferContext::Floats ShapeInferContext::GetAttrFloats(const char* attr_name) {
+  const auto* attr = GetAttrHdl(attr_name);
+  float f = {};
+  size_t out = {};
+  // first call to get the bytes needed
+  auto status = ort_api_->ReadOpAttr(attr, ORT_OP_ATTR_FLOATS, &f, sizeof(f), &out);
+  if (status) {
+    size_t num_f = out / sizeof(float);
+    ShapeInferContext::Floats floats(num_f, 0);
+    Ort::ThrowOnError(ort_api_->ReadOpAttr(attr, ORT_OP_ATTR_FLOATS, floats.data(), out, &out));
+    return floats;
+  } else {
+    return {f};
+  }
+}
+
+inline std::string ShapeInferContext::GetAttrString(const char* attr_name) {
+  const auto* attr = GetAttrHdl(attr_name);
+  char c = {};
+  size_t out = {};
+  // first call to get the bytes needed
+  auto status = ort_api_->ReadOpAttr(attr, ORT_OP_ATTR_STRING, &c, sizeof(char), &out);
+  if (status) {
+    std::vector<char> chars(out, '\0');
+    Ort::ThrowOnError(ort_api_->ReadOpAttr(attr, ORT_OP_ATTR_STRING, chars.data(), out, &out));
+    return {chars.data()};
+  } else {
+    return {c};
+  }
+}
+
+inline ShapeInferContext::Strings ShapeInferContext::GetAttrStrings(const char* attr_name) {
+  const auto* attr = GetAttrHdl(attr_name);
+  char c = {};
+  size_t out = {};
+  // first call to get the bytes needed
+  auto status = ort_api_->ReadOpAttr(attr, ORT_OP_ATTR_STRINGS, &c, sizeof(char), &out);
+  if (status) {
+    std::vector<char> chars(out, '\0');
+    Ort::ThrowOnError(ort_api_->ReadOpAttr(attr, ORT_OP_ATTR_STRINGS, chars.data(), out, &out));
+    ShapeInferContext::Strings strings;
+    char* char_st = chars.data();
+    char* char_ed = char_st + out;
+    while (char_st < char_ed) {
+      strings.emplace_back(char_st);
+      while (*char_st != '\0') {
+        char_st++;
+      }
+      char_st++;
+    }
+    return strings;
+  } else {
+    return {std::string{c}};
+  }
+}
+
+inline const OrtOpAttr* ShapeInferContext::GetAttrHdl(const char* attr_name) const {
+  const OrtOpAttr* attr_hdl = {};
+  Ort::ThrowOnError(ort_api_->ShapeInferContext_GetAttribute(ctx_, attr_name, &attr_hdl));
+  return attr_hdl;
+}
+
 }  // namespace Ort
diff --git a/include/onnxruntime/core/session/onnxruntime_lite_custom_op.h b/include/onnxruntime/core/session/onnxruntime_lite_custom_op.h
index fd42824e81a56..0c0af16d4e20c 100644
--- a/include/onnxruntime/core/session/onnxruntime_lite_custom_op.h
+++ b/include/onnxruntime/core/session/onnxruntime_lite_custom_op.h
@@ -18,22 +18,81 @@
 #include "onnxruntime_cxx_api.h"
 #include <optional>
 #include <numeric>
+#include <functional>
 #include <unordered_set>
 
 namespace Ort {
 namespace Custom {
 
-class TensorBase {
+class ArgBase {
  public:
-  TensorBase(OrtKernelContext* ctx) : ctx_(ctx) {}
-  virtual ~TensorBase() {}
+  ArgBase(OrtKernelContext* ctx,
+          size_t indice,
+          bool is_input) : ctx_(ctx), indice_(indice), is_input_(is_input) {}
+  virtual ~ArgBase(){};
+
+ protected:
+  struct KernelContext ctx_;
+  size_t indice_;
+  bool is_input_;
+};
+
+using ArgPtr = std::unique_ptr<Custom::ArgBase>;
+using ArgPtrs = std::vector<ArgPtr>;
+
+class TensorBase : public ArgBase {
+ public:
+  TensorBase(OrtKernelContext* ctx,
+             size_t indice,
+             bool is_input) : ArgBase(ctx, indice, is_input) {}
+
   operator bool() const {
     return shape_.has_value();
   }
 
+  const std::vector<int64_t>& Shape() const {
+    if (!shape_.has_value()) {
+      ORT_CXX_API_THROW("tensor shape is not yet initialized", OrtErrorCode::ORT_RUNTIME_EXCEPTION);
+    }
+    return shape_.value();
+  }
+
+  ONNXTensorElementDataType Type() const {
+    return type_;
+  }
+
+  int64_t NumberOfElement() const {
+    if (shape_.has_value()) {
+      return std::accumulate(shape_->begin(), shape_->end(), 1LL, std::multiplies<int64_t>());
+    } else {
+      return 0;
+    }
+  }
+
+  std::string Shape2Str() const {
+    if (shape_.has_value()) {
+      std::string shape_str;
+      for (const auto& dim : *shape_) {
+        shape_str.append(std::to_string(dim));
+        shape_str.append(", ");
+      }
+      return shape_str;
+    } else {
+      return "empty";
+    }
+  }
+
+  bool IsCpuTensor() const {
+    return strcmp("Cpu", mem_type_) == 0;
+  }
+
+  virtual const void* DataRaw() const = 0;
+  virtual size_t SizeInBytes() const = 0;
+
  protected:
-  struct KernelContext ctx_;
   std::optional<std::vector<int64_t>> shape_;
+  ONNXTensorElementDataType type_ = ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
+  const char* mem_type_ = "Cpu";
 };
 
 template <typename T>
@@ -48,13 +107,14 @@ struct Span {
   T operator[](size_t indice) const {
     return data_[indice];
   }
+  const T* data() const { return data_; }
 };
 
 template <typename T>
 class Tensor : public TensorBase {
  public:
   using TT = typename std::remove_reference<T>::type;
-  Tensor(OrtKernelContext* ctx, size_t indice, bool is_input) : TensorBase(ctx), indice_(indice), is_input_(is_input) {
+  Tensor(OrtKernelContext* ctx, size_t indice, bool is_input) : TensorBase(ctx, indice, is_input) {
     if (is_input_) {
       if (indice >= ctx_.GetInputCount()) {
         ORT_CXX_API_THROW("invalid indice for Ort::Custom::Tensor", OrtErrorCode::ORT_INVALID_ARGUMENT);
@@ -64,19 +124,6 @@ class Tensor : public TensorBase {
       shape_ = type_shape_info.GetShape();
     }
   }
-  const std::vector<int64_t>& Shape() const {
-    if (!shape_.has_value()) {
-      ORT_CXX_API_THROW("tensor shape is not yet initialized", OrtErrorCode::ORT_RUNTIME_EXCEPTION);
-    }
-    return shape_.value();
-  }
-  int64_t NumberOfElement() const {
-    if (shape_.has_value()) {
-      return std::accumulate(shape_->begin(), shape_->end(), 1LL, std::multiplies<int64_t>());
-    } else {
-      return 0;
-    }
-  }
   const TT* Data() const {
     return reinterpret_cast<const TT*>(const_value_.GetTensorRawData());
   }
@@ -104,10 +151,15 @@ class Tensor : public TensorBase {
     }
     return *Data();
   }
+  const void* DataRaw() const override {
+    return reinterpret_cast<const void*>(Data());
+  }
+
+  size_t SizeInBytes() const override {
+    return sizeof(TT) * static_cast<size_t>(NumberOfElement());
+  }
 
  private:
-  size_t indice_;
-  bool is_input_;
   ConstValue const_value_;  // for input
   TT* data_{};              // for output
   Span<T> span_;
@@ -118,7 +170,7 @@ class Tensor<std::string> : public TensorBase {
  public:
   using strings = std::vector<std::string>;
 
-  Tensor(OrtKernelContext* ctx, size_t indice, bool is_input) : TensorBase(ctx), indice_(indice), is_input_(is_input) {
+  Tensor(OrtKernelContext* ctx, size_t indice, bool is_input) : TensorBase(ctx, indice, is_input) {
     if (is_input_) {
       if (indice >= ctx_.GetInputCount()) {
         ORT_CXX_API_THROW("invalid indice for Ort::Custom::Tensor", OrtErrorCode::ORT_INVALID_ARGUMENT);
@@ -147,16 +199,21 @@ class Tensor<std::string> : public TensorBase {
       }
     }
   }
-  int64_t NumberOfElement() const {
-    if (shape_.has_value()) {
-      return std::accumulate(shape_->begin(), shape_->end(), 1ULL, std::multiplies<int64_t>());
-    } else {
-      return 0;
-    }
-  }
   const strings& Data() const {
     return input_strings_;
   }
+  const void* DataRaw() const override {
+    if (input_strings_.size() != 1) {
+      ORT_CXX_API_THROW("DataRaw() only applies to string scalar", ORT_RUNTIME_EXCEPTION);
+    }
+    return reinterpret_cast<const void*>(input_strings_[0].c_str());
+  }
+  size_t SizeInBytes() const override {
+    if (input_strings_.size() != 1) {
+      ORT_CXX_API_THROW("SizeInBytes() only applies to string scalar", ORT_RUNTIME_EXCEPTION);
+    }
+    return input_strings_[0].size();
+  }
   void SetStringOutput(const strings& ss, const std::vector<int64_t>& dims) {
     shape_ = dims;
     std::vector<const char*> raw;
@@ -179,8 +236,6 @@ class Tensor<std::string> : public TensorBase {
   }
 
  private:
-  size_t indice_;
-  bool is_input_;
   std::vector<std::string> input_strings_;  // for input
 };
 
@@ -190,7 +245,7 @@ class Tensor<std::string_view> : public TensorBase {
   using strings = std::vector<std::string>;
   using string_views = std::vector<std::string_view>;
 
-  Tensor(OrtKernelContext* ctx, size_t indice, bool is_input) : TensorBase(ctx), indice_(indice), is_input_(is_input) {
+  Tensor(OrtKernelContext* ctx, size_t indice, bool is_input) : TensorBase(ctx, indice, is_input) {
     if (is_input_) {
       if (indice >= ctx_.GetInputCount()) {
         ORT_CXX_API_THROW("invalid indice for Ort::Custom::Tensor", OrtErrorCode::ORT_INVALID_ARGUMENT);
@@ -211,16 +266,21 @@ class Tensor<std::string_view> : public TensorBase {
       }
     }
   }
-  int64_t NumberOfElement() const {
-    if (shape_.has_value()) {
-      return std::accumulate(shape_->begin(), shape_->end(), 1ULL, std::multiplies<int64_t>());
-    } else {
-      return 0;
-    }
-  }
   const string_views& Data() const {
     return input_string_views_;
   }
+  const void* DataRaw() const override {
+    if (input_string_views_.size() != 1) {
+      ORT_CXX_API_THROW("DataRaw() only applies to string scalar", ORT_RUNTIME_EXCEPTION);
+    }
+    return reinterpret_cast<const void*>(input_string_views_[0].data());
+  }
+  size_t SizeInBytes() const override {
+    if (input_string_views_.size() != 1) {
+      ORT_CXX_API_THROW("SizeInBytes() only applies to string scalar", ORT_RUNTIME_EXCEPTION);
+    }
+    return input_string_views_[0].size();
+  }
   void SetStringOutput(const strings& ss, const std::vector<int64_t>& dims) {
     shape_ = dims;
     std::vector<const char*> raw;
@@ -243,16 +303,111 @@ class Tensor<std::string_view> : public TensorBase {
   }
 
  private:
-  size_t indice_;
-  bool is_input_;
   std::vector<char> chars_;                           // for input
   std::vector<std::string_view> input_string_views_;  // for input
 };
 
 using TensorPtr = std::unique_ptr<Custom::TensorBase>;
+using TensorPtrs = std::vector<TensorPtr>;
 
-//////////////////////////// OrtLiteCustomOp ////////////////////////////////
+struct TensorArray : public ArgBase {
+  TensorArray(OrtKernelContext* ctx,
+              size_t start_indice,
+              bool is_input) : ArgBase(ctx,
+                                       start_indice,
+                                       is_input) {
+    if (is_input) {
+      auto input_count = ctx_.GetInputCount();
+      for (size_t ith_input = start_indice; ith_input < input_count; ++ith_input) {
+        auto const_value = ctx_.GetInput(start_indice);
+        auto type_shape_info = const_value.GetTensorTypeAndShapeInfo();
+        auto type = type_shape_info.GetElementType();
+        TensorPtr tensor;
+        switch (type) {
+          case ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL:
+            tensor = std::make_unique<Custom::Tensor<bool>>(ctx, ith_input, true);
+            break;
+          case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT:
+            tensor = std::make_unique<Custom::Tensor<float>>(ctx, ith_input, true);
+            break;
+          case ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE:
+            tensor = std::make_unique<Custom::Tensor<double>>(ctx, ith_input, true);
+            break;
+          case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8:
+            tensor = std::make_unique<Custom::Tensor<uint8_t>>(ctx, ith_input, true);
+            break;
+          case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8:
+            tensor = std::make_unique<Custom::Tensor<int8_t>>(ctx, ith_input, true);
+            break;
+          case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16:
+            tensor = std::make_unique<Custom::Tensor<uint16_t>>(ctx, ith_input, true);
+            break;
+          case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16:
+            tensor = std::make_unique<Custom::Tensor<int16_t>>(ctx, ith_input, true);
+            break;
+          case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32:
+            tensor = std::make_unique<Custom::Tensor<uint32_t>>(ctx, ith_input, true);
+            break;
+          case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32:
+            tensor = std::make_unique<Custom::Tensor<int32_t>>(ctx, ith_input, true);
+            break;
+          case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64:
+            tensor = std::make_unique<Custom::Tensor<uint64_t>>(ctx, ith_input, true);
+            break;
+          case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64:
+            tensor = std::make_unique<Custom::Tensor<int64_t>>(ctx, ith_input, true);
+            break;
+          case ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING:
+            tensor = std::make_unique<Custom::Tensor<std::string>>(ctx, ith_input, true);
+            break;
+          default:
+            ORT_CXX_API_THROW("unknow input type", ORT_RUNTIME_EXCEPTION);
+            break;
+        }
+        tensors_.emplace_back(tensor.release());
+      }  // for
+    }
+  }
+  template <typename T>
+  T* AllocateOutput(size_t ith_output, const std::vector<int64_t>& shape) {
+    // ith_output is the indice of output relative to the tensor array
+    // indice_ + ith_output is the indice relative to context
+    auto tensor = std::make_unique<Tensor<T>>(ctx_.GetOrtKernelContext(), indice_ + ith_output, false);
+    auto raw_output = tensor.get()->Allocate(shape);
+    tensors_.emplace_back(tensor.release());
+    return raw_output;
+  }
+  Tensor<std::string>& AllocateStringTensor(size_t ith_output) {
+    // ith_output is the indice of output relative to the tensor array
+    // indice_ + ith_output is the indice relative to context
+    auto tensor = std::make_unique<Tensor<std::string>>(ctx_.GetOrtKernelContext(), indice_ + ith_output, false);
+    Tensor<std::string>& output = *tensor;
+    tensors_.emplace_back(tensor.release());
+    return output;
+  }
+  size_t Size() const {
+    return tensors_.size();
+  }
+  const TensorPtr& operator[](size_t ith_input) const {
+    // ith_input is the indice of output relative to the tensor array
+    return tensors_.at(ith_input);
+  }
 
+ private:
+  TensorPtrs tensors_;
+};
+
+using Variadic = TensorArray;
+
+/*
+Note:
+OrtLiteCustomOp inherits from OrtCustomOp to bridge tween a custom func/struct and ort core.
+The lifetime of an OrtLiteCustomOp instance is managed by customer code, not ort, so:
+1. DO NOT cast OrtLiteCustomOp to OrtCustomOp and release since there is no virtual destructor in the hierachy.
+2. OrtLiteCustomFunc and OrtLiteCustomStruct, as two sub-structs, can be released in form of OrtLiteCustomOp since all members are kept in the OrtLiteCustomOp,
+   hence memory could still be recycled properly.
+Further, OrtCustomOp is a c struct bearing no v-table, so offspring structs are by design to be of zero virtual functions to maintain cast safety.
+*/
 struct OrtLiteCustomOp : public OrtCustomOp {
   using ConstOptionalFloatTensor = std::optional<const Custom::Tensor<float>&>;
   using OptionalFloatTensor = std::optional<Custom::Tensor<float>>;
@@ -260,34 +415,34 @@ struct OrtLiteCustomOp : public OrtCustomOp {
   // CreateTuple
   template <size_t ith_input, size_t ith_output, typename... Ts>
   static typename std::enable_if<sizeof...(Ts) == 0, std::tuple<>>::type
-  CreateTuple(OrtKernelContext*, std::vector<TensorPtr>&, size_t, size_t, const std::string&) {
+  CreateTuple(OrtKernelContext*, ArgPtrs&, size_t, size_t, const std::string&) {
     return std::make_tuple();
   }
 
   template <size_t ith_input, size_t ith_output, typename T, typename... Ts>
   static typename std::enable_if<std::is_same<T, OrtKernelContext*>::value, std::tuple<T, Ts...>>::type
-  CreateTuple(OrtKernelContext* context, std::vector<TensorPtr>& tensors, size_t num_input, size_t num_output, const std::string& ep) {
+  CreateTuple(OrtKernelContext* context, ArgPtrs& args, size_t num_input, size_t num_output, const std::string& ep) {
     std::tuple<T> current = std::tuple<OrtKernelContext*>{context};
-    auto next = CreateTuple<ith_input, ith_output, Ts...>(context, tensors, num_input, num_output, ep);
+    auto next = CreateTuple<ith_input, ith_output, Ts...>(context, args, num_input, num_output, ep);
     return std::tuple_cat(current, next);
   }
 
   template <size_t ith_input, size_t ith_output, typename T, typename... Ts>
   static typename std::enable_if<std::is_same<T, OrtKernelContext&>::value, std::tuple<T, Ts...>>::type
-  CreateTuple(OrtKernelContext* context, std::vector<TensorPtr>& tensors, size_t num_input, size_t num_output, const std::string& ep) {
+  CreateTuple(OrtKernelContext* context, ArgPtrs& args, size_t num_input, size_t num_output, const std::string& ep) {
     std::tuple<T> current = std::tuple<OrtKernelContext&>{*context};
-    auto next = CreateTuple<ith_input, ith_output, Ts...>(context, tensors, num_input, num_output, ep);
+    auto next = CreateTuple<ith_input, ith_output, Ts...>(context, args, num_input, num_output, ep);
     return std::tuple_cat(current, next);
   }
 
 #ifdef ORT_CUDA_CTX
   template <size_t ith_input, size_t ith_output, typename T, typename... Ts>
   static typename std::enable_if<std::is_same<T, const CudaContext&>::value, std::tuple<T, Ts...>>::type
-  CreateTuple(OrtKernelContext* context, std::vector<TensorPtr>& tensors, size_t num_input, size_t num_output, const std::string& ep) {
+  CreateTuple(OrtKernelContext* context, ArgPtrs& args, size_t num_input, size_t num_output, const std::string& ep) {
     thread_local CudaContext cuda_context;
     cuda_context.Init(*context);
     std::tuple<T> current = std::tuple<const CudaContext&>{cuda_context};
-    auto next = CreateTuple<ith_input, ith_output, Ts...>(context, tensors, num_input, num_output, ep);
+    auto next = CreateTuple<ith_input, ith_output, Ts...>(context, args, num_input, num_output, ep);
     return std::tuple_cat(current, next);
   }
 #endif
@@ -295,143 +450,179 @@ struct OrtLiteCustomOp : public OrtCustomOp {
 #ifdef ORT_ROCM_CTX
   template <size_t ith_input, size_t ith_output, typename T, typename... Ts>
   static typename std::enable_if<std::is_same<T, const RocmContext&>::value, std::tuple<T, Ts...>>::type
-  CreateTuple(OrtKernelContext* context, std::vector<TensorPtr>& tensors, size_t num_input, size_t num_output, const std::string& ep) {
+  CreateTuple(OrtKernelContext* context, ArgPtrs& args, size_t num_input, size_t num_output, const std::string& ep) {
     thread_local RocmContext rocm_context;
     rocm_context.Init(*context);
     std::tuple<T> current = std::tuple<const RocmContext&>{rocm_context};
-    auto next = CreateTuple<ith_input, ith_output, Ts...>(context, tensors, num_input, num_output, ep);
+    auto next = CreateTuple<ith_input, ith_output, Ts...>(context, args, num_input, num_output, ep);
     return std::tuple_cat(current, next);
   }
 #endif
 
-#define CREATE_TUPLE_INPUT(data_type)                                                                                                   \
-  template <size_t ith_input, size_t ith_output, typename T, typename... Ts>                                                            \
-  static typename std::enable_if<std::is_same<T, const Custom::Tensor<data_type>*>::value, std::tuple<T, Ts...>>::type                  \
-  CreateTuple(OrtKernelContext* context, std::vector<TensorPtr>& tensors, size_t num_input, size_t num_output, const std::string& ep) { \
-    tensors.push_back(std::make_unique<Custom::Tensor<data_type>>(context, ith_input, true));                                           \
-    std::tuple<T> current = std::tuple<T>{reinterpret_cast<T>(tensors.back().get())};                                                   \
-    auto next = CreateTuple<ith_input + 1, ith_output, Ts...>(context, tensors, num_input, num_output, ep);                             \
-    return std::tuple_cat(current, next);                                                                                               \
-  }                                                                                                                                     \
-  template <size_t ith_input, size_t ith_output, typename T, typename... Ts>                                                            \
-  static typename std::enable_if<std::is_same<T, const Custom::Tensor<data_type>&>::value, std::tuple<T, Ts...>>::type                  \
-  CreateTuple(OrtKernelContext* context, std::vector<TensorPtr>& tensors, size_t num_input, size_t num_output, const std::string& ep) { \
-    tensors.push_back(std::make_unique<Custom::Tensor<data_type>>(context, ith_input, true));                                           \
-    std::tuple<T> current = std::tuple<T>{reinterpret_cast<T>(*tensors.back().get())};                                                  \
-    auto next = CreateTuple<ith_input + 1, ith_output, Ts...>(context, tensors, num_input, num_output, ep);                             \
-    return std::tuple_cat(current, next);                                                                                               \
-  }                                                                                                                                     \
-  template <size_t ith_input, size_t ith_output, typename T, typename... Ts>                                                            \
-  static typename std::enable_if<std::is_same<T, std::optional<const Custom::Tensor<data_type>*>>::value, std::tuple<T, Ts...>>::type   \
-  CreateTuple(OrtKernelContext* context, std::vector<TensorPtr>& tensors, size_t num_input, size_t num_output, const std::string& ep) { \
-    if (ith_input < num_input) {                                                                                                        \
-      tensors.push_back(std::make_unique<Custom::Tensor<data_type>>(context, ith_input, true));                                         \
-      std::tuple<T> current = std::tuple<T>{reinterpret_cast<Custom::Tensor<data_type>*>(tensors.back().get())};                        \
-      auto next = CreateTuple<ith_input + 1, ith_output, Ts...>(context, tensors, num_input, num_output, ep);                           \
-      return std::tuple_cat(current, next);                                                                                             \
-    } else {                                                                                                                            \
-      std::tuple<T> current = std::tuple<T>{};                                                                                          \
-      auto next = CreateTuple<ith_input + 1, ith_output, Ts...>(context, tensors, num_input, num_output, ep);                           \
-      return std::tuple_cat(current, next);                                                                                             \
-    }                                                                                                                                   \
-  }                                                                                                                                     \
-  template <size_t ith_input, size_t ith_output, typename T, typename... Ts>                                                            \
-  static typename std::enable_if<std::is_same<T, const Custom::Span<data_type>*>::value, std::tuple<T, Ts...>>::type                    \
-  CreateTuple(OrtKernelContext* context, std::vector<TensorPtr>& tensors, size_t num_input, size_t num_output, const std::string& ep) { \
-    if ("CPUExecutionProvider" != ep) {                                                                                                 \
-      ORT_CXX_API_THROW("span input could only be applied to CPU EP", OrtErrorCode::ORT_RUNTIME_EXCEPTION);                             \
-    }                                                                                                                                   \
-    tensors.push_back(std::make_unique<Custom::Tensor<data_type>>(context, ith_input, true));                                           \
-    std::tuple<T> current = std::tuple<T>{&reinterpret_cast<Custom::Tensor<data_type>*>(tensors.back().get())->AsSpan()};               \
-    auto next = CreateTuple<ith_input + 1, ith_output, Ts...>(context, tensors, num_input, num_output, ep);                             \
-    return std::tuple_cat(current, next);                                                                                               \
-  }                                                                                                                                     \
-  template <size_t ith_input, size_t ith_output, typename T, typename... Ts>                                                            \
-  static typename std::enable_if<std::is_same<T, const Custom::Span<data_type>&>::value, std::tuple<T, Ts...>>::type                    \
-  CreateTuple(OrtKernelContext* context, std::vector<TensorPtr>& tensors, size_t num_input, size_t num_output, const std::string& ep) { \
-    if ("CPUExecutionProvider" != ep) {                                                                                                 \
-      ORT_CXX_API_THROW("span input could only be applied to CPU EP", OrtErrorCode::ORT_RUNTIME_EXCEPTION);                             \
-    }                                                                                                                                   \
-    tensors.push_back(std::make_unique<Custom::Tensor<data_type>>(context, ith_input, true));                                           \
-    std::tuple<T> current = std::tuple<T>{reinterpret_cast<Custom::Tensor<data_type>*>(tensors.back().get())->AsSpan()};                \
-    auto next = CreateTuple<ith_input + 1, ith_output, Ts...>(context, tensors, num_input, num_output, ep);                             \
-    return std::tuple_cat(current, next);                                                                                               \
-  }                                                                                                                                     \
-  template <size_t ith_input, size_t ith_output, typename T, typename... Ts>                                                            \
-  static typename std::enable_if<std::is_same<T, std::optional<const Custom::Span<data_type>*>>::value, std::tuple<T, Ts...>>::type     \
-  CreateTuple(OrtKernelContext* context, std::vector<TensorPtr>& tensors, size_t num_input, size_t num_output, const std::string& ep) { \
-    if (ith_input < num_input) {                                                                                                        \
-      if ("CPUExecutionProvider" != ep) {                                                                                               \
-        ORT_CXX_API_THROW("span input could only be applied to CPU EP", OrtErrorCode::ORT_RUNTIME_EXCEPTION);                           \
-      }                                                                                                                                 \
-      tensors.push_back(std::make_unique<Custom::Tensor<data_type>>(context, ith_input, true));                                         \
-      std::tuple<T> current = std::tuple<T>{&reinterpret_cast<Custom::Tensor<data_type>*>(tensors.back().get())->AsSpan()};             \
-      auto next = CreateTuple<ith_input + 1, ith_output, Ts...>(context, tensors, num_input, num_output, ep);                           \
-      return std::tuple_cat(current, next);                                                                                             \
-    } else {                                                                                                                            \
-      std::tuple<T> current = std::tuple<T>{};                                                                                          \
-      auto next = CreateTuple<ith_input + 1, ith_output, Ts...>(context, tensors, num_input, num_output, ep);                           \
-      return std::tuple_cat(current, next);                                                                                             \
-    }                                                                                                                                   \
-  }                                                                                                                                     \
-  template <size_t ith_input, size_t ith_output, typename T, typename... Ts>                                                            \
-  static typename std::enable_if<std::is_same<T, data_type>::value, std::tuple<T, Ts...>>::type                                         \
-  CreateTuple(OrtKernelContext* context, std::vector<TensorPtr>& tensors, size_t num_input, size_t num_output, const std::string& ep) { \
-    if ("CPUExecutionProvider" != ep) {                                                                                                 \
-      ORT_CXX_API_THROW("scalar input could only be applied to CPU EP", OrtErrorCode::ORT_RUNTIME_EXCEPTION);                           \
-    }                                                                                                                                   \
-    tensors.push_back(std::make_unique<Custom::Tensor<data_type>>(context, ith_input, true));                                           \
-    std::tuple<T> current = std::tuple<T>{reinterpret_cast<Custom::Tensor<data_type>*>(tensors.back().get())->AsScalar()};              \
-    auto next = CreateTuple<ith_input + 1, ith_output, Ts...>(context, tensors, num_input, num_output, ep);                             \
-    return std::tuple_cat(current, next);                                                                                               \
-  }                                                                                                                                     \
-  template <size_t ith_input, size_t ith_output, typename T, typename... Ts>                                                            \
-  static typename std::enable_if<std::is_same<T, std::optional<data_type>>::value, std::tuple<T, Ts...>>::type                          \
-  CreateTuple(OrtKernelContext* context, std::vector<TensorPtr>& tensors, size_t num_input, size_t num_output, const std::string& ep) { \
-    if (ith_input < num_input) {                                                                                                        \
-      if ("CPUExecutionProvider" != ep) {                                                                                               \
-        ORT_CXX_API_THROW("scalar input could only be applied to CPU EP", OrtErrorCode::ORT_RUNTIME_EXCEPTION);                         \
-      }                                                                                                                                 \
-      tensors.push_back(std::make_unique<Custom::Tensor<data_type>>(context, ith_input, true));                                         \
-      std::tuple<T> current = std::tuple<T>{reinterpret_cast<Custom::Tensor<data_type>*>(tensors.back().get())->AsScalar()};            \
-      auto next = CreateTuple<ith_input + 1, ith_output, Ts...>(context, tensors, num_input, num_output, ep);                           \
-      return std::tuple_cat(current, next);                                                                                             \
-    } else {                                                                                                                            \
-      std::tuple<T> current = std::tuple<T>{};                                                                                          \
-      auto next = CreateTuple<ith_input + 1, ith_output, Ts...>(context, tensors, num_input, num_output, ep);                           \
-      return std::tuple_cat(current, next);                                                                                             \
-    }                                                                                                                                   \
-  }
-#define CREATE_TUPLE_OUTPUT(data_type)                                                                                                  \
-  template <size_t ith_input, size_t ith_output, typename T, typename... Ts>                                                            \
-  static typename std::enable_if<std::is_same<T, Custom::Tensor<data_type>*>::value, std::tuple<T, Ts...>>::type                        \
-  CreateTuple(OrtKernelContext* context, std::vector<TensorPtr>& tensors, size_t num_input, size_t num_output, const std::string& ep) { \
-    tensors.push_back(std::make_unique<Custom::Tensor<data_type>>(context, ith_output, false));                                         \
-    std::tuple<T> current = std::tuple<T>{reinterpret_cast<T>(tensors.back().get())};                                                   \
-    auto next = CreateTuple<ith_input, ith_output + 1, Ts...>(context, tensors, num_input, num_output, ep);                             \
-    return std::tuple_cat(current, next);                                                                                               \
-  }                                                                                                                                     \
-  template <size_t ith_input, size_t ith_output, typename T, typename... Ts>                                                            \
-  static typename std::enable_if<std::is_same<T, Custom::Tensor<data_type>&>::value, std::tuple<T, Ts...>>::type                        \
-  CreateTuple(OrtKernelContext* context, std::vector<TensorPtr>& tensors, size_t num_input, size_t num_output, const std::string& ep) { \
-    tensors.push_back(std::make_unique<Custom::Tensor<data_type>>(context, ith_output, false));                                         \
-    std::tuple<T> current = std::tuple<T>{reinterpret_cast<T>(*tensors.back().get())};                                                  \
-    auto next = CreateTuple<ith_input, ith_output + 1, Ts...>(context, tensors, num_input, num_output, ep);                             \
-    return std::tuple_cat(current, next);                                                                                               \
-  }                                                                                                                                     \
-  template <size_t ith_input, size_t ith_output, typename T, typename... Ts>                                                            \
-  static typename std::enable_if<std::is_same<T, std::optional<Custom::Tensor<data_type>*>>::value, std::tuple<T, Ts...>>::type         \
-  CreateTuple(OrtKernelContext* context, std::vector<TensorPtr>& tensors, size_t num_input, size_t num_output, const std::string& ep) { \
-    if (ith_output < num_output) {                                                                                                      \
-      tensors.push_back(std::make_unique<Custom::Tensor<data_type>>(context, ith_output, false));                                       \
-      std::tuple<T> current = std::tuple<T>{reinterpret_cast<Custom::Tensor<data_type>*>(tensors.back().get())};                        \
-      auto next = CreateTuple<ith_input, ith_output + 1, Ts...>(context, tensors, num_input, num_output, ep);                           \
-      return std::tuple_cat(current, next);                                                                                             \
-    } else {                                                                                                                            \
-      std::tuple<T> current = std::tuple<T>{};                                                                                          \
-      auto next = CreateTuple<ith_input, ith_output + 1, Ts...>(context, tensors, num_input, num_output, ep);                           \
-      return std::tuple_cat(current, next);                                                                                             \
-    }                                                                                                                                   \
+  template <size_t ith_input, size_t ith_output, typename T, typename... Ts>
+  static typename std::enable_if<std::is_same<T, const TensorArray*>::value, std::tuple<T, Ts...>>::type
+  CreateTuple(OrtKernelContext* context, ArgPtrs& args, size_t num_input, size_t num_output, const std::string& ep) {
+    args.push_back(std::make_unique<TensorArray>(context, ith_input, true));
+    std::tuple<T> current = std::tuple<T>{reinterpret_cast<T>(args.back().get())};
+    auto next = CreateTuple<ith_input + 1, ith_output, Ts...>(context, args, num_input, num_output, ep);
+    return std::tuple_cat(current, next);
+  }
+
+  template <size_t ith_input, size_t ith_output, typename T, typename... Ts>
+  static typename std::enable_if<std::is_same<T, const TensorArray&>::value, std::tuple<T, Ts...>>::type
+  CreateTuple(OrtKernelContext* context, ArgPtrs& args, size_t num_input, size_t num_output, const std::string& ep) {
+    args.push_back(std::make_unique<TensorArray>(context, ith_input, true));
+    std::tuple<T> current = std::tuple<T>{reinterpret_cast<T>(*args.back().get())};
+    auto next = CreateTuple<ith_input + 1, ith_output, Ts...>(context, args, num_input, num_output, ep);
+    return std::tuple_cat(current, next);
+  }
+
+  template <size_t ith_input, size_t ith_output, typename T, typename... Ts>
+  static typename std::enable_if<std::is_same<T, TensorArray*>::value, std::tuple<T, Ts...>>::type
+  CreateTuple(OrtKernelContext* context, ArgPtrs& args, size_t num_input, size_t num_output, const std::string& ep) {
+    args.push_back(std::make_unique<TensorArray>(context, ith_output, false));
+    std::tuple<T> current = std::tuple<T>{reinterpret_cast<T>(args.back().get())};
+    auto next = CreateTuple<ith_input, ith_output + 1, Ts...>(context, args, num_input, num_output, ep);
+    return std::tuple_cat(current, next);
+  }
+
+  template <size_t ith_input, size_t ith_output, typename T, typename... Ts>
+  static typename std::enable_if<std::is_same<T, TensorArray&>::value, std::tuple<T, Ts...>>::type
+  CreateTuple(OrtKernelContext* context, ArgPtrs& args, size_t num_input, size_t num_output, const std::string& ep) {
+    args.push_back(std::make_unique<TensorArray>(context, ith_output, false));
+    std::tuple<T> current = std::tuple<T>{reinterpret_cast<T>(*args.back().get())};
+    auto next = CreateTuple<ith_input, ith_output + 1, Ts...>(context, args, num_input, num_output, ep);
+    return std::tuple_cat(current, next);
+  }
+
+#define CREATE_TUPLE_INPUT(data_type)                                                                                                 \
+  template <size_t ith_input, size_t ith_output, typename T, typename... Ts>                                                          \
+  static typename std::enable_if<std::is_same<T, const Custom::Tensor<data_type>*>::value, std::tuple<T, Ts...>>::type                \
+  CreateTuple(OrtKernelContext* context, ArgPtrs& args, size_t num_input, size_t num_output, const std::string& ep) {                 \
+    args.push_back(std::make_unique<Custom::Tensor<data_type>>(context, ith_input, true));                                            \
+    std::tuple<T> current = std::tuple<T>{reinterpret_cast<T>(args.back().get())};                                                    \
+    auto next = CreateTuple<ith_input + 1, ith_output, Ts...>(context, args, num_input, num_output, ep);                              \
+    return std::tuple_cat(current, next);                                                                                             \
+  }                                                                                                                                   \
+  template <size_t ith_input, size_t ith_output, typename T, typename... Ts>                                                          \
+  static typename std::enable_if<std::is_same<T, const Custom::Tensor<data_type>&>::value, std::tuple<T, Ts...>>::type                \
+  CreateTuple(OrtKernelContext* context, ArgPtrs& args, size_t num_input, size_t num_output, const std::string& ep) {                 \
+    args.push_back(std::make_unique<Custom::Tensor<data_type>>(context, ith_input, true));                                            \
+    std::tuple<T> current = std::tuple<T>{reinterpret_cast<T>(*args.back().get())};                                                   \
+    auto next = CreateTuple<ith_input + 1, ith_output, Ts...>(context, args, num_input, num_output, ep);                              \
+    return std::tuple_cat(current, next);                                                                                             \
+  }                                                                                                                                   \
+  template <size_t ith_input, size_t ith_output, typename T, typename... Ts>                                                          \
+  static typename std::enable_if<std::is_same<T, std::optional<const Custom::Tensor<data_type>*>>::value, std::tuple<T, Ts...>>::type \
+  CreateTuple(OrtKernelContext* context, ArgPtrs& args, size_t num_input, size_t num_output, const std::string& ep) {                 \
+    if (ith_input < num_input) {                                                                                                      \
+      args.push_back(std::make_unique<Custom::Tensor<data_type>>(context, ith_input, true));                                          \
+      std::tuple<T> current = std::tuple<T>{reinterpret_cast<Custom::Tensor<data_type>*>(args.back().get())};                         \
+      auto next = CreateTuple<ith_input + 1, ith_output, Ts...>(context, args, num_input, num_output, ep);                            \
+      return std::tuple_cat(current, next);                                                                                           \
+    } else {                                                                                                                          \
+      std::tuple<T> current = std::tuple<T>{};                                                                                        \
+      auto next = CreateTuple<ith_input + 1, ith_output, Ts...>(context, args, num_input, num_output, ep);                            \
+      return std::tuple_cat(current, next);                                                                                           \
+    }                                                                                                                                 \
+  }                                                                                                                                   \
+  template <size_t ith_input, size_t ith_output, typename T, typename... Ts>                                                          \
+  static typename std::enable_if<std::is_same<T, const Custom::Span<data_type>*>::value, std::tuple<T, Ts...>>::type                  \
+  CreateTuple(OrtKernelContext* context, ArgPtrs& args, size_t num_input, size_t num_output, const std::string& ep) {                 \
+    if ("CPUExecutionProvider" != ep) {                                                                                               \
+      ORT_CXX_API_THROW("span input could only be applied to CPU EP", OrtErrorCode::ORT_RUNTIME_EXCEPTION);                           \
+    }                                                                                                                                 \
+    args.push_back(std::make_unique<Custom::Tensor<data_type>>(context, ith_input, true));                                            \
+    std::tuple<T> current = std::tuple<T>{&reinterpret_cast<Custom::Tensor<data_type>*>(args.back().get())->AsSpan()};                \
+    auto next = CreateTuple<ith_input + 1, ith_output, Ts...>(context, args, num_input, num_output, ep);                              \
+    return std::tuple_cat(current, next);                                                                                             \
+  }                                                                                                                                   \
+  template <size_t ith_input, size_t ith_output, typename T, typename... Ts>                                                          \
+  static typename std::enable_if<std::is_same<T, const Custom::Span<data_type>&>::value, std::tuple<T, Ts...>>::type                  \
+  CreateTuple(OrtKernelContext* context, ArgPtrs& args, size_t num_input, size_t num_output, const std::string& ep) {                 \
+    if ("CPUExecutionProvider" != ep) {                                                                                               \
+      ORT_CXX_API_THROW("span input could only be applied to CPU EP", OrtErrorCode::ORT_RUNTIME_EXCEPTION);                           \
+    }                                                                                                                                 \
+    args.push_back(std::make_unique<Custom::Tensor<data_type>>(context, ith_input, true));                                            \
+    std::tuple<T> current = std::tuple<T>{reinterpret_cast<Custom::Tensor<data_type>*>(args.back().get())->AsSpan()};                 \
+    auto next = CreateTuple<ith_input + 1, ith_output, Ts...>(context, args, num_input, num_output, ep);                              \
+    return std::tuple_cat(current, next);                                                                                             \
+  }                                                                                                                                   \
+  template <size_t ith_input, size_t ith_output, typename T, typename... Ts>                                                          \
+  static typename std::enable_if<std::is_same<T, std::optional<const Custom::Span<data_type>*>>::value, std::tuple<T, Ts...>>::type   \
+  CreateTuple(OrtKernelContext* context, ArgPtrs& args, size_t num_input, size_t num_output, const std::string& ep) {                 \
+    if (ith_input < num_input) {                                                                                                      \
+      if ("CPUExecutionProvider" != ep) {                                                                                             \
+        ORT_CXX_API_THROW("span input could only be applied to CPU EP", OrtErrorCode::ORT_RUNTIME_EXCEPTION);                         \
+      }                                                                                                                               \
+      args.push_back(std::make_unique<Custom::Tensor<data_type>>(context, ith_input, true));                                          \
+      std::tuple<T> current = std::tuple<T>{&reinterpret_cast<Custom::Tensor<data_type>*>(args.back().get())->AsSpan()};              \
+      auto next = CreateTuple<ith_input + 1, ith_output, Ts...>(context, args, num_input, num_output, ep);                            \
+      return std::tuple_cat(current, next);                                                                                           \
+    } else {                                                                                                                          \
+      std::tuple<T> current = std::tuple<T>{};                                                                                        \
+      auto next = CreateTuple<ith_input + 1, ith_output, Ts...>(context, args, num_input, num_output, ep);                            \
+      return std::tuple_cat(current, next);                                                                                           \
+    }                                                                                                                                 \
+  }                                                                                                                                   \
+  template <size_t ith_input, size_t ith_output, typename T, typename... Ts>                                                          \
+  static typename std::enable_if<std::is_same<T, data_type>::value, std::tuple<T, Ts...>>::type                                       \
+  CreateTuple(OrtKernelContext* context, ArgPtrs& args, size_t num_input, size_t num_output, const std::string& ep) {                 \
+    if ("CPUExecutionProvider" != ep) {                                                                                               \
+      ORT_CXX_API_THROW("scalar input could only be applied to CPU EP", OrtErrorCode::ORT_RUNTIME_EXCEPTION);                         \
+    }                                                                                                                                 \
+    args.push_back(std::make_unique<Custom::Tensor<data_type>>(context, ith_input, true));                                            \
+    std::tuple<T> current = std::tuple<T>{reinterpret_cast<Custom::Tensor<data_type>*>(args.back().get())->AsScalar()};               \
+    auto next = CreateTuple<ith_input + 1, ith_output, Ts...>(context, args, num_input, num_output, ep);                              \
+    return std::tuple_cat(current, next);                                                                                             \
+  }                                                                                                                                   \
+  template <size_t ith_input, size_t ith_output, typename T, typename... Ts>                                                          \
+  static typename std::enable_if<std::is_same<T, std::optional<data_type>>::value, std::tuple<T, Ts...>>::type                        \
+  CreateTuple(OrtKernelContext* context, ArgPtrs& args, size_t num_input, size_t num_output, const std::string& ep) {                 \
+    if (ith_input < num_input) {                                                                                                      \
+      if ("CPUExecutionProvider" != ep) {                                                                                             \
+        ORT_CXX_API_THROW("scalar input could only be applied to CPU EP", OrtErrorCode::ORT_RUNTIME_EXCEPTION);                       \
+      }                                                                                                                               \
+      args.push_back(std::make_unique<Custom::Tensor<data_type>>(context, ith_input, true));                                          \
+      std::tuple<T> current = std::tuple<T>{reinterpret_cast<Custom::Tensor<data_type>*>(args.back().get())->AsScalar()};             \
+      auto next = CreateTuple<ith_input + 1, ith_output, Ts...>(context, args, num_input, num_output, ep);                            \
+      return std::tuple_cat(current, next);                                                                                           \
+    } else {                                                                                                                          \
+      std::tuple<T> current = std::tuple<T>{};                                                                                        \
+      auto next = CreateTuple<ith_input + 1, ith_output, Ts...>(context, args, num_input, num_output, ep);                            \
+      return std::tuple_cat(current, next);                                                                                           \
+    }                                                                                                                                 \
+  }
+#define CREATE_TUPLE_OUTPUT(data_type)                                                                                          \
+  template <size_t ith_input, size_t ith_output, typename T, typename... Ts>                                                    \
+  static typename std::enable_if<std::is_same<T, Custom::Tensor<data_type>*>::value, std::tuple<T, Ts...>>::type                \
+  CreateTuple(OrtKernelContext* context, ArgPtrs& args, size_t num_input, size_t num_output, const std::string& ep) {           \
+    args.push_back(std::make_unique<Custom::Tensor<data_type>>(context, ith_output, false));                                    \
+    std::tuple<T> current = std::tuple<T>{reinterpret_cast<T>(args.back().get())};                                              \
+    auto next = CreateTuple<ith_input, ith_output + 1, Ts...>(context, args, num_input, num_output, ep);                        \
+    return std::tuple_cat(current, next);                                                                                       \
+  }                                                                                                                             \
+  template <size_t ith_input, size_t ith_output, typename T, typename... Ts>                                                    \
+  static typename std::enable_if<std::is_same<T, Custom::Tensor<data_type>&>::value, std::tuple<T, Ts...>>::type                \
+  CreateTuple(OrtKernelContext* context, ArgPtrs& args, size_t num_input, size_t num_output, const std::string& ep) {           \
+    args.push_back(std::make_unique<Custom::Tensor<data_type>>(context, ith_output, false));                                    \
+    std::tuple<T> current = std::tuple<T>{reinterpret_cast<T>(*args.back().get())};                                             \
+    auto next = CreateTuple<ith_input, ith_output + 1, Ts...>(context, args, num_input, num_output, ep);                        \
+    return std::tuple_cat(current, next);                                                                                       \
+  }                                                                                                                             \
+  template <size_t ith_input, size_t ith_output, typename T, typename... Ts>                                                    \
+  static typename std::enable_if<std::is_same<T, std::optional<Custom::Tensor<data_type>*>>::value, std::tuple<T, Ts...>>::type \
+  CreateTuple(OrtKernelContext* context, ArgPtrs& args, size_t num_input, size_t num_output, const std::string& ep) {           \
+    if (ith_output < num_output) {                                                                                              \
+      args.push_back(std::make_unique<Custom::Tensor<data_type>>(context, ith_output, false));                                  \
+      std::tuple<T> current = std::tuple<T>{reinterpret_cast<Custom::Tensor<data_type>*>(args.back().get())};                   \
+      auto next = CreateTuple<ith_input, ith_output + 1, Ts...>(context, args, num_input, num_output, ep);                      \
+      return std::tuple_cat(current, next);                                                                                     \
+    } else {                                                                                                                    \
+      std::tuple<T> current = std::tuple<T>{};                                                                                  \
+      auto next = CreateTuple<ith_input, ith_output + 1, Ts...>(context, args, num_input, num_output, ep);                      \
+      return std::tuple_cat(current, next);                                                                                     \
+    }                                                                                                                           \
   }
 #define CREATE_TUPLE(data_type) \
   CREATE_TUPLE_INPUT(data_type) \
@@ -491,6 +682,34 @@ struct OrtLiteCustomOp : public OrtCustomOp {
   }
 #endif
 
+  template <typename T, typename... Ts>
+  static typename std::enable_if<0 <= sizeof...(Ts) && std::is_same<T, const TensorArray&>::value>::type
+  ParseArgs(std::vector<ONNXTensorElementDataType>& input_types, std::vector<ONNXTensorElementDataType>& output_types) {
+    input_types.push_back(ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED);
+    ParseArgs<Ts...>(input_types, output_types);
+  }
+
+  template <typename T, typename... Ts>
+  static typename std::enable_if<0 <= sizeof...(Ts) && std::is_same<T, const TensorArray*>::value>::type
+  ParseArgs(std::vector<ONNXTensorElementDataType>& input_types, std::vector<ONNXTensorElementDataType>& output_types) {
+    input_types.push_back(ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED);
+    ParseArgs<Ts...>(input_types, output_types);
+  }
+
+  template <typename T, typename... Ts>
+  static typename std::enable_if<0 <= sizeof...(Ts) && std::is_same<T, TensorArray&>::value>::type
+  ParseArgs(std::vector<ONNXTensorElementDataType>& input_types, std::vector<ONNXTensorElementDataType>& output_types) {
+    output_types.push_back(ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED);
+    ParseArgs<Ts...>(input_types, output_types);
+  }
+
+  template <typename T, typename... Ts>
+  static typename std::enable_if<0 <= sizeof...(Ts) && std::is_same<T, TensorArray*>::value>::type
+  ParseArgs(std::vector<ONNXTensorElementDataType>& input_types, std::vector<ONNXTensorElementDataType>& output_types) {
+    output_types.push_back(ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED);
+    ParseArgs<Ts...>(input_types, output_types);
+  }
+
 #define PARSE_INPUT_BASE(pack_type, onnx_type)                                                                           \
   template <typename T, typename... Ts>                                                                                  \
   static typename std::enable_if<0 <= sizeof...(Ts) && std::is_same<T, pack_type>::value>::type                          \
@@ -499,6 +718,12 @@ struct OrtLiteCustomOp : public OrtCustomOp {
     ParseArgs<Ts...>(input_types, output_types);                                                                         \
   }                                                                                                                      \
   template <typename T, typename... Ts>                                                                                  \
+  static typename std::enable_if<0 <= sizeof...(Ts) && std::is_same<T, const std::optional<pack_type>>::value>::type     \
+  ParseArgs(std::vector<ONNXTensorElementDataType>& input_types, std::vector<ONNXTensorElementDataType>& output_types) { \
+    input_types.push_back(onnx_type);                                                                                    \
+    ParseArgs<Ts...>(input_types, output_types);                                                                         \
+  }                                                                                                                      \
+  template <typename T, typename... Ts>                                                                                  \
   static typename std::enable_if<0 <= sizeof...(Ts) && std::is_same<T, std::optional<pack_type>>::value>::type           \
   ParseArgs(std::vector<ONNXTensorElementDataType>& input_types, std::vector<ONNXTensorElementDataType>& output_types) { \
     input_types.push_back(onnx_type);                                                                                    \
@@ -557,8 +782,14 @@ struct OrtLiteCustomOp : public OrtCustomOp {
   PARSE_ARGS(Ort::Float8E5M2FNUZ_t, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E5M2FNUZ)
 
   OrtLiteCustomOp(const char* op_name,
-                  const char* execution_provider) : op_name_(op_name),
-                                                    execution_provider_(execution_provider) {
+                  const char* execution_provider,
+                  ShapeInferFn shape_infer_fn,
+                  int start_ver = 1,
+                  int end_ver = MAX_CUSTOM_OP_END_VER) : op_name_(op_name),
+                                                         execution_provider_(execution_provider),
+                                                         shape_infer_fn_(shape_infer_fn),
+                                                         start_ver_(start_ver),
+                                                         end_ver_(end_ver) {
     OrtCustomOp::version = ORT_API_VERSION;
 
     OrtCustomOp::GetName = [](const OrtCustomOp* op) { return static_cast<const OrtLiteCustomOp*>(op)->op_name_.c_str(); };
@@ -585,18 +816,52 @@ struct OrtLiteCustomOp : public OrtCustomOp {
       return self->output_types_[indice];
     };
 
-    OrtCustomOp::GetInputCharacteristic = [](const OrtCustomOp*, size_t) {
-      return INPUT_OUTPUT_OPTIONAL;
+    OrtCustomOp::GetInputCharacteristic = [](const OrtCustomOp* op, size_t indice) {
+      auto self = reinterpret_cast<const OrtLiteCustomOp*>(op);
+      return self->input_types_[indice] == ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED ? INPUT_OUTPUT_VARIADIC : INPUT_OUTPUT_OPTIONAL;
+    };
+
+    OrtCustomOp::GetOutputCharacteristic = [](const OrtCustomOp* op, size_t indice) {
+      auto self = reinterpret_cast<const OrtLiteCustomOp*>(op);
+      return self->output_types_[indice] == ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED ? INPUT_OUTPUT_VARIADIC : INPUT_OUTPUT_OPTIONAL;
     };
 
-    OrtCustomOp::GetOutputCharacteristic = [](const OrtCustomOp*, size_t) {
-      return INPUT_OUTPUT_OPTIONAL;
+    OrtCustomOp::GetVariadicInputMinArity = [](const OrtCustomOp*) {
+      return 1;
+    };
+
+    OrtCustomOp::GetVariadicInputHomogeneity = [](const OrtCustomOp*) {
+      return 0;
+    };
+
+    OrtCustomOp::GetVariadicOutputMinArity = [](const OrtCustomOp*) {
+      return 1;
+    };
+
+    OrtCustomOp::GetVariadicOutputHomogeneity = [](const OrtCustomOp*) {
+      return 0;
     };
 
     OrtCustomOp::GetVariadicInputMinArity = [](const OrtCustomOp*) { return 0; };
     OrtCustomOp::GetVariadicInputHomogeneity = [](const OrtCustomOp*) { return 0; };
     OrtCustomOp::GetVariadicOutputMinArity = [](const OrtCustomOp*) { return 0; };
     OrtCustomOp::GetVariadicOutputHomogeneity = [](const OrtCustomOp*) { return 0; };
+
+    OrtCustomOp::CreateKernelV2 = {};
+    OrtCustomOp::KernelComputeV2 = {};
+    OrtCustomOp::KernelCompute = {};
+
+    OrtCustomOp::InferOutputShapeFn = {};
+
+    OrtCustomOp::GetStartVersion = [](const OrtCustomOp* op) {
+      auto self = reinterpret_cast<const OrtLiteCustomOp*>(op);
+      return self->start_ver_;
+    };
+
+    OrtCustomOp::GetEndVersion = [](const OrtCustomOp* op) {
+      auto self = reinterpret_cast<const OrtLiteCustomOp*>(op);
+      return self->end_ver_;
+    };
   }
 
   const std::string op_name_;
@@ -604,6 +869,14 @@ struct OrtLiteCustomOp : public OrtCustomOp {
 
   std::vector<ONNXTensorElementDataType> input_types_;
   std::vector<ONNXTensorElementDataType> output_types_;
+
+  ShapeInferFn shape_infer_fn_ = {};
+
+  int start_ver_ = 1;
+  int end_ver_ = MAX_CUSTOM_OP_END_VER;
+
+  void* compute_fn_ = {};
+  void* compute_fn_return_status_ = {};
 };
 
 //////////////////////////// OrtLiteCustomFunc ////////////////////////////////
@@ -619,31 +892,37 @@ struct OrtLiteCustomOp : public OrtCustomOp {
 template <typename... Args>
 struct OrtLiteCustomFunc : public OrtLiteCustomOp {
   using ComputeFn = void (*)(Args...);
+  using ComputeFnReturnStatus = Status (*)(Args...);
   using MyType = OrtLiteCustomFunc<Args...>;
 
   struct Kernel {
     size_t num_input_{};
     size_t num_output_{};
     ComputeFn compute_fn_{};
+    ComputeFnReturnStatus compute_fn_return_status_{};
     std::string ep_{};
   };
 
   OrtLiteCustomFunc(const char* op_name,
                     const char* execution_provider,
-                    ComputeFn compute_fn) : OrtLiteCustomOp(op_name, execution_provider),
-                                            compute_fn_(compute_fn) {
+                    ComputeFn compute_fn,
+                    ShapeInferFn shape_infer_fn = {},
+                    int start_ver = 1,
+                    int end_ver = MAX_CUSTOM_OP_END_VER) : OrtLiteCustomOp(op_name, execution_provider, shape_infer_fn, start_ver, end_ver) {
+    compute_fn_ = reinterpret_cast<void*>(compute_fn);
     ParseArgs<Args...>(input_types_, output_types_);
 
     OrtCustomOp::KernelCompute = [](void* op_kernel, OrtKernelContext* context) {
       auto kernel = reinterpret_cast<Kernel*>(op_kernel);
-      std::vector<TensorPtr> tensors;
-      auto t = CreateTuple<0, 0, Args...>(context, tensors, kernel->num_input_, kernel->num_output_, kernel->ep_);
+      std::vector<ArgPtr> args;
+      auto t = CreateTuple<0, 0, Args...>(context, args, kernel->num_input_, kernel->num_output_, kernel->ep_);
       std::apply([kernel](Args const&... t_args) { kernel->compute_fn_(t_args...); }, t);
     };
 
     OrtCustomOp::CreateKernel = [](const OrtCustomOp* this_, const OrtApi* ort_api, const OrtKernelInfo* info) {
       auto kernel = std::make_unique<Kernel>();
-      kernel->compute_fn_ = static_cast<const MyType*>(this_)->compute_fn_;
+      auto me = static_cast<const MyType*>(this_);
+      kernel->compute_fn_ = reinterpret_cast<ComputeFn>(me->compute_fn_);
       Ort::ThrowOnError(ort_api->KernelInfo_GetInputCount(info, &kernel->num_input_));
       Ort::ThrowOnError(ort_api->KernelInfo_GetOutputCount(info, &kernel->num_output_));
       auto self = static_cast<const OrtLiteCustomFunc*>(this_);
@@ -654,9 +933,55 @@ struct OrtLiteCustomFunc : public OrtLiteCustomOp {
     OrtCustomOp::KernelDestroy = [](void* op_kernel) {
       delete reinterpret_cast<Kernel*>(op_kernel);
     };
+
+    if (shape_infer_fn_) {
+      OrtCustomOp::InferOutputShapeFn = [](const OrtCustomOp* op, OrtShapeInferContext* ort_ctx) -> OrtStatusPtr {
+        auto shape_info_fn = static_cast<const MyType*>(op)->shape_infer_fn_;
+        ShapeInferContext ctx(&GetApi(), ort_ctx);
+        return shape_info_fn(ctx);
+      };
+    }
   }
 
-  ComputeFn compute_fn_;
+  OrtLiteCustomFunc(const char* op_name,
+                    const char* execution_provider,
+                    ComputeFnReturnStatus compute_fn_return_status,
+                    ShapeInferFn shape_infer_fn = {},
+                    int start_ver = 1,
+                    int end_ver = MAX_CUSTOM_OP_END_VER) : OrtLiteCustomOp(op_name, execution_provider, shape_infer_fn, start_ver, end_ver) {
+    compute_fn_return_status_ = reinterpret_cast<void*>(compute_fn_return_status);
+    ParseArgs<Args...>(input_types_, output_types_);
+
+    OrtCustomOp::KernelComputeV2 = [](void* op_kernel, OrtKernelContext* context) -> OrtStatusPtr {
+      auto kernel = reinterpret_cast<Kernel*>(op_kernel);
+      std::vector<ArgPtr> args;
+      auto t = CreateTuple<0, 0, Args...>(context, args, kernel->num_input_, kernel->num_output_, kernel->ep_);
+      return std::apply([kernel](Args const&... t_args) { Status status = kernel->compute_fn_return_status_(t_args...); return status.release(); }, t);
+    };
+
+    OrtCustomOp::CreateKernel = [](const OrtCustomOp* this_, const OrtApi* ort_api, const OrtKernelInfo* info) {
+      auto kernel = std::make_unique<Kernel>();
+      auto me = static_cast<const MyType*>(this_);
+      kernel->compute_fn_return_status_ = reinterpret_cast<ComputeFnReturnStatus>(me->compute_fn_return_status_);
+      Ort::ThrowOnError(ort_api->KernelInfo_GetInputCount(info, &kernel->num_input_));
+      Ort::ThrowOnError(ort_api->KernelInfo_GetOutputCount(info, &kernel->num_output_));
+      auto self = static_cast<const OrtLiteCustomFunc*>(this_);
+      kernel->ep_ = self->execution_provider_;
+      return reinterpret_cast<void*>(kernel.release());
+    };
+
+    OrtCustomOp::KernelDestroy = [](void* op_kernel) {
+      delete reinterpret_cast<Kernel*>(op_kernel);
+    };
+
+    if (shape_infer_fn_) {
+      OrtCustomOp::InferOutputShapeFn = [](const OrtCustomOp* op, OrtShapeInferContext* ort_ctx) -> OrtStatusPtr {
+        auto shape_info_fn = static_cast<const MyType*>(op)->shape_infer_fn_;
+        ShapeInferContext ctx(&GetApi(), ort_ctx);
+        return shape_info_fn(ctx);
+      };
+    }
+  }
 };  // struct OrtLiteCustomFunc
 
 /////////////////////////// OrtLiteCustomStruct ///////////////////////////
@@ -679,6 +1004,10 @@ template <typename CustomOp>
 struct OrtLiteCustomStruct : public OrtLiteCustomOp {
   template <typename... Args>
   using CustomComputeFn = void (CustomOp::*)(Args...);
+
+  template <typename... Args>
+  using CustomComputeFnReturnStatus = Status (CustomOp::*)(Args...);
+
   using MyType = OrtLiteCustomStruct<CustomOp>;
 
   struct Kernel {
@@ -689,21 +1018,10 @@ struct OrtLiteCustomStruct : public OrtLiteCustomOp {
   };
 
   OrtLiteCustomStruct(const char* op_name,
-                      const char* execution_provider) : OrtLiteCustomOp(op_name,
-                                                                        execution_provider) {
-    init(&CustomOp::Compute);
-  }
-
-  template <typename... Args>
-  void init(CustomComputeFn<Args...>) {
-    ParseArgs<Args...>(input_types_, output_types_);
-
-    OrtCustomOp::KernelCompute = [](void* op_kernel, OrtKernelContext* context) {
-      auto kernel = reinterpret_cast<Kernel*>(op_kernel);
-      std::vector<TensorPtr> tensors;
-      auto t = CreateTuple<0, 0, Args...>(context, tensors, kernel->num_input_, kernel->num_output_, kernel->ep_);
-      std::apply([kernel](Args const&... t_args) { kernel->custom_op_->Compute(t_args...); }, t);
-    };
+                      const char* execution_provider,
+                      int start_ver = 1,
+                      int end_ver = MAX_CUSTOM_OP_END_VER) : OrtLiteCustomOp(op_name, execution_provider, {}, start_ver, end_ver) {
+    SetCompute(&CustomOp::Compute);
 
     OrtCustomOp::CreateKernel = [](const OrtCustomOp* this_, const OrtApi* ort_api, const OrtKernelInfo* info) {
       auto kernel = std::make_unique<Kernel>();
@@ -718,6 +1036,44 @@ struct OrtLiteCustomStruct : public OrtLiteCustomOp {
     OrtCustomOp::KernelDestroy = [](void* op_kernel) {
       delete reinterpret_cast<Kernel*>(op_kernel);
     };
+
+    SetShapeInfer<CustomOp>(0);
+  }
+
+  template <typename... Args>
+  void SetCompute(CustomComputeFn<Args...>) {
+    ParseArgs<Args...>(input_types_, output_types_);
+    OrtCustomOp::KernelCompute = [](void* op_kernel, OrtKernelContext* context) {
+      auto kernel = reinterpret_cast<Kernel*>(op_kernel);
+      ArgPtrs args;
+      auto t = CreateTuple<0, 0, Args...>(context, args, kernel->num_input_, kernel->num_output_, kernel->ep_);
+      std::apply([kernel](Args const&... t_args) { kernel->custom_op_->Compute(t_args...); }, t);
+    };
+  }
+
+  template <typename... Args>
+  void SetCompute(CustomComputeFnReturnStatus<Args...>) {
+    ParseArgs<Args...>(input_types_, output_types_);
+    OrtCustomOp::KernelComputeV2 = [](void* op_kernel, OrtKernelContext* context) -> OrtStatusPtr {
+      auto kernel = reinterpret_cast<Kernel*>(op_kernel);
+      ArgPtrs args;
+      auto t = CreateTuple<0, 0, Args...>(context, args, kernel->num_input_, kernel->num_output_, kernel->ep_);
+      return std::apply([kernel](Args const&... t_args) { Status status = kernel->custom_op_->Compute(t_args...); return status.release(); }, t);
+    };
+  }
+
+  template <typename C>
+  decltype(&C::InferOutputShape) SetShapeInfer(decltype(&C::InferOutputShape)) {
+    OrtCustomOp::InferOutputShapeFn = [](const OrtCustomOp*, OrtShapeInferContext* ort_ctx) -> OrtStatusPtr {
+      ShapeInferContext ctx(&GetApi(), ort_ctx);
+      return C::InferOutputShape(ctx);
+    };
+    return {};
+  }
+
+  template <typename C>
+  void SetShapeInfer(...) {
+    OrtCustomOp::InferOutputShapeFn = {};
   }
 };  // struct OrtLiteCustomStruct
 
@@ -726,17 +1082,33 @@ struct OrtLiteCustomStruct : public OrtLiteCustomOp {
 template <typename... Args>
 OrtLiteCustomOp* CreateLiteCustomOp(const char* op_name,
                                     const char* execution_provider,
-                                    void (*custom_compute_fn)(Args...)) {
+                                    void (*custom_compute_fn)(Args...),
+                                    Status (*shape_infer_fn)(ShapeInferContext&) = {},
+                                    int start_ver = 1,
+                                    int end_ver = MAX_CUSTOM_OP_END_VER) {
   using LiteOp = OrtLiteCustomFunc<Args...>;
-  return std::make_unique<LiteOp>(op_name, execution_provider, custom_compute_fn).release();
+  return std::make_unique<LiteOp>(op_name, execution_provider, custom_compute_fn, shape_infer_fn, start_ver, end_ver).release();
+}
+
+template <typename... Args>
+OrtLiteCustomOp* CreateLiteCustomOp(const char* op_name,
+                                    const char* execution_provider,
+                                    Status (*custom_compute_fn_v2)(Args...),
+                                    Status (*shape_infer_fn)(ShapeInferContext&) = {},
+                                    int start_ver = 1,
+                                    int end_ver = MAX_CUSTOM_OP_END_VER) {
+  using LiteOp = OrtLiteCustomFunc<Args...>;
+  return std::make_unique<LiteOp>(op_name, execution_provider, custom_compute_fn_v2, shape_infer_fn, start_ver, end_ver).release();
 }
 
 template <typename CustomOp>
 OrtLiteCustomOp* CreateLiteCustomOp(const char* op_name,
-                                    const char* execution_provider) {
+                                    const char* execution_provider,
+                                    int start_ver = 1,
+                                    int end_ver = MAX_CUSTOM_OP_END_VER) {
   using LiteOp = OrtLiteCustomStruct<CustomOp>;
-  return std::make_unique<LiteOp>(op_name, execution_provider).release();
+  return std::make_unique<LiteOp>(op_name, execution_provider, start_ver, end_ver).release();
 }
 
 }  // namespace Custom
-}  // namespace Ort
+}  // namespace Ort
\ No newline at end of file
diff --git a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h
index 37545f41b43dd..4628afbb5a702 100644
--- a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h
+++ b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h
@@ -67,16 +67,26 @@ static const char* const kOrtSessionOptionsEnableQuantQDQCleanup = "session.enab
 // GeluApproximation has side effects which may change the inference results. It is disabled by default due to this.
 static const char* const kOrtSessionOptionsEnableGeluApproximation = "optimization.enable_gelu_approximation";
 
+// This setting controls whether to enable AheadOfTime function inlining.
+// AOT function inlining examines the graph and attempts to inline as many locally defined functions in the model
+// as possible with the help of enabled execution providers.
+// This can reduce the number of function calls and improve performance because it is done before
+// Level1 optimizers and constant folding. However, under some circumstances, when the EPs are not available,
+// one can disable the AOT inlining, produce an optimized model and postpone AOT until run time.
+// "0": enable; "1": disable.
+// Its default value is "0".
+static const char* const kOrtSessionOptionsDisableAheadOfTimeFunctionInlining = "session.disable_aot_function_inlining";
+
 #ifdef ENABLE_TRAINING
 // Specifies a list of op types for memory footprint reduction.
 // The value should be a ","-delimited list of pair of
-// <subgraph string : optimization strategy : number of subgraph to apply>.
+// <subgraph string: optimization strategy: number of subgraph to apply>.
 // For example, "Gelu+Cast+:1:0,Dropout+:1:1".
 //   A valid "subgraph string" should be one subgraph representation output by ORT graph transformations.
 //   "optimization strategy" currently has valid values: 0 - disabled, 1 - recompute.
 //   "number of subgraph to apply" is used to control how many subgraphs to apply optimization, to avoid "oversaving"
 //   the memory.
-static const char* const kOrtSessionOptionsMemoryOptimizerEnabler = "optimization.enable_memory_optimizer";
+static const char* const kOrtSessionOptionsMemoryOptimizerEnabler = "optimization.memory_optimizer_config";
 
 // Specifies the level for detecting subgraphs for memory footprint reduction.
 // The value should be an integer. The default value is 0.
diff --git a/java/src/main/java/ai/onnxruntime/OnnxTensor.java b/java/src/main/java/ai/onnxruntime/OnnxTensor.java
index 09d2cefbb8224..0078adb6402f8 100644
--- a/java/src/main/java/ai/onnxruntime/OnnxTensor.java
+++ b/java/src/main/java/ai/onnxruntime/OnnxTensor.java
@@ -13,6 +13,7 @@
 import java.nio.IntBuffer;
 import java.nio.LongBuffer;
 import java.nio.ShortBuffer;
+import java.util.Optional;
 
 /**
  * A Java object wrapping an OnnxTensor. Tensors are the main input to the library, and can also be
@@ -21,18 +22,60 @@
 public class OnnxTensor extends OnnxTensorLike {
 
   /**
-   * This reference is held for OnnxTensors backed by a Java nio buffer to ensure the buffer does
+   * This reference is held for OnnxTensors backed by a java.nio.Buffer to ensure the buffer does
    * not go out of scope while the OnnxTensor exists.
    */
   private final Buffer buffer;
 
+  /**
+   * Denotes if the OnnxTensor made a copy of the buffer on construction (i.e. it may have the only
+   * reference).
+   */
+  private final boolean ownsBuffer;
+
   OnnxTensor(long nativeHandle, long allocatorHandle, TensorInfo info) {
-    this(nativeHandle, allocatorHandle, info, null);
+    this(nativeHandle, allocatorHandle, info, null, false);
   }
 
-  OnnxTensor(long nativeHandle, long allocatorHandle, TensorInfo info, Buffer buffer) {
+  OnnxTensor(
+      long nativeHandle, long allocatorHandle, TensorInfo info, Buffer buffer, boolean ownsBuffer) {
     super(nativeHandle, allocatorHandle, info);
     this.buffer = buffer;
+    this.ownsBuffer = ownsBuffer;
+  }
+
+  /**
+   * Returns true if the buffer in this OnnxTensor was created on construction of this tensor, i.e.,
+   * it is a copy of a user supplied buffer or array and may hold the only reference to that buffer.
+   *
+   * <p>When this is true the backing buffer was copied from the user input, so users cannot mutate
+   * the state of this buffer without first getting the reference via {@link #getBufferRef()}.
+   *
+   * @return True if the buffer in this OnnxTensor was allocated by it on construction (i.e., it is
+   *     a copy of a user buffer.)
+   */
+  public boolean ownsBuffer() {
+    return this.ownsBuffer;
+  }
+
+  /**
+   * Returns a reference to the buffer which backs this {@code OnnxTensor}. If the tensor is not
+   * backed by a buffer (i.e., it was created from a Java array, or is backed by memory allocated by
+   * ORT) this method returns an empty {@link Optional}.
+   *
+   * <p>Changes to the buffer elements will be reflected in the native {@code OrtValue}, this can be
+   * used to repeatedly update a single tensor for multiple different inferences without allocating
+   * new tensors, though the inputs <b>must</b> remain the same size and shape.
+   *
+   * <p>Note: the tensor could refer to a contiguous range of elements in this buffer, not the whole
+   * buffer. It is up to the user to manage this information by respecting the position and limit.
+   * As a consequence, accessing this reference should be considered problematic when multiple
+   * threads hold references to the buffer.
+   *
+   * @return A reference to the buffer.
+   */
+  public Optional<Buffer> getBufferRef() {
+    return Optional.ofNullable(buffer);
   }
 
   @Override
@@ -45,7 +88,8 @@ public OnnxValueType getType() {
    * primitives if it has multiple dimensions.
    *
    * <p>Java multidimensional arrays are quite slow for more than 2 dimensions, in that case it is
-   * recommended you use the java.nio.Buffer extractors below (e.g. {@link #getFloatBuffer}).
+   * recommended you use the {@link java.nio.Buffer} extractors below (e.g., {@link
+   * #getFloatBuffer}).
    *
    * @return A Java value.
    * @throws OrtException If the value could not be extracted as the Tensor is invalid, or if the
@@ -283,6 +327,12 @@ private native void getArray(long apiHandle, long nativeHandle, Object carrier)
    * multidimensional array. The shape is inferred from the object using reflection. The default
    * allocator is used.
    *
+   * <p>Note: Java multidimensional arrays are not dense and this method requires traversing a large
+   * number of pointers for high dimensional arrays. For types other than Strings it is recommended
+   * to use one of the {@code createTensor} methods which accepts a {@link java.nio.Buffer}, e.g.
+   * {@link #createTensor(OrtEnvironment, FloatBuffer, long[])} as those methods are zero copy to
+   * transfer data into ORT when using direct buffers.
+   *
    * @param env The current OrtEnvironment.
    * @param data The data to store in a tensor.
    * @return An OnnxTensor storing the data.
@@ -700,7 +750,8 @@ private static OnnxTensor createTensor(
             info.onnxType.value),
         allocator.handle,
         info,
-        tuple.data);
+        tuple.data,
+        tuple.isCopy);
   }
 
   private static native long createTensor(
diff --git a/java/src/main/java/ai/onnxruntime/OrtSession.java b/java/src/main/java/ai/onnxruntime/OrtSession.java
index 435f86daa5fe2..fbea13d155507 100644
--- a/java/src/main/java/ai/onnxruntime/OrtSession.java
+++ b/java/src/main/java/ai/onnxruntime/OrtSession.java
@@ -239,7 +239,7 @@ public Result run(Map<String, ? extends OnnxTensorLike> inputs, RunOptions runOp
    */
   public Result run(Map<String, ? extends OnnxTensorLike> inputs, Set<String> requestedOutputs)
       throws OrtException {
-    return run(inputs, requestedOutputs, null);
+    return run(inputs, requestedOutputs, Collections.emptyMap(), null);
   }
 
   /**
@@ -259,17 +259,90 @@ public Result run(
       Set<String> requestedOutputs,
       RunOptions runOptions)
       throws OrtException {
+    return run(inputs, requestedOutputs, Collections.emptyMap(), runOptions);
+  }
+
+  /**
+   * Scores an input feed dict, returning the map of pinned outputs.
+   *
+   * <p>The outputs are sorted based on the supplied map traversal order.
+   *
+   * <p>Note: pinned outputs are not owned by the {@link Result} object, and are <b>not</b> closed
+   * when the result object is closed.
+   *
+   * @param inputs The inputs to score.
+   * @param pinnedOutputs The requested outputs which the user has allocated.
+   * @return The inferred outputs.
+   * @throws OrtException If there was an error in native code, the input or output names are
+   *     invalid, or if there are zero or too many inputs or outputs.
+   */
+  public Result run(
+      Map<String, ? extends OnnxTensorLike> inputs, Map<String, ? extends OnnxValue> pinnedOutputs)
+      throws OrtException {
+    return run(inputs, Collections.emptySet(), pinnedOutputs, null);
+  }
+
+  /**
+   * Scores an input feed dict, returning the map of requested and pinned outputs.
+   *
+   * <p>The outputs are sorted based on the supplied set traversal order with pinned outputs first,
+   * then requested outputs. An {@link IllegalArgumentException} is thrown if the same output name
+   * appears in both the requested outputs and the pinned outputs.
+   *
+   * <p>Note: pinned outputs are not owned by the {@link Result} object, and are <b>not</b> closed
+   * when the result object is closed.
+   *
+   * @param inputs The inputs to score.
+   * @param requestedOutputs The requested outputs which ORT will allocate.
+   * @param pinnedOutputs The requested outputs which the user has allocated.
+   * @return The inferred outputs.
+   * @throws OrtException If there was an error in native code, the input or output names are
+   *     invalid, or if there are zero or too many inputs or outputs.
+   */
+  public Result run(
+      Map<String, ? extends OnnxTensorLike> inputs,
+      Set<String> requestedOutputs,
+      Map<String, ? extends OnnxValue> pinnedOutputs)
+      throws OrtException {
+    return run(inputs, requestedOutputs, pinnedOutputs, null);
+  }
+
+  /**
+   * Scores an input feed dict, returning the map of requested and pinned outputs.
+   *
+   * <p>The outputs are sorted based on the supplied set traversal order with pinned outputs first,
+   * then requested outputs. An {@link IllegalArgumentException} is thrown if the same output name
+   * appears in both the requested outputs and the pinned outputs.
+   *
+   * <p>Note: pinned outputs are not owned by the {@link Result} object, and are <b>not</b> closed
+   * when the result object is closed.
+   *
+   * @param inputs The inputs to score.
+   * @param requestedOutputs The requested outputs which ORT will allocate.
+   * @param pinnedOutputs The requested outputs which the user has allocated.
+   * @param runOptions The RunOptions to control this run.
+   * @return The inferred outputs.
+   * @throws OrtException If there was an error in native code, the input or output names are
+   *     invalid, or if there are zero or too many inputs or outputs.
+   */
+  public Result run(
+      Map<String, ? extends OnnxTensorLike> inputs,
+      Set<String> requestedOutputs,
+      Map<String, ? extends OnnxValue> pinnedOutputs,
+      RunOptions runOptions)
+      throws OrtException {
     if (!closed) {
       if ((inputs.isEmpty() && (numInputs != 0)) || (inputs.size() > numInputs)) {
         throw new OrtException(
             "Unexpected number of inputs, expected [1," + numInputs + ") found " + inputs.size());
       }
-      if (requestedOutputs.isEmpty() || (requestedOutputs.size() > numOutputs)) {
+      int totalOutputs = requestedOutputs.size() + pinnedOutputs.size();
+      if ((totalOutputs == 0) || (totalOutputs > numOutputs)) {
         throw new OrtException(
-            "Unexpected number of requestedOutputs, expected [1,"
+            "Unexpected number of requestedOutputs & pinnedOutputs, expected [1,"
                 + numOutputs
                 + ") found "
-                + requestedOutputs.size());
+                + totalOutputs);
       }
       String[] inputNamesArray = new String[inputs.size()];
       long[] inputHandles = new long[inputs.size()];
@@ -284,20 +357,41 @@ public Result run(
               "Unknown input name " + t.getKey() + ", expected one of " + inputNames.toString());
         }
       }
-      String[] outputNamesArray = new String[requestedOutputs.size()];
+      String[] outputNamesArray = new String[requestedOutputs.size() + pinnedOutputs.size()];
+      OnnxValue[] outputValues = new OnnxValue[outputNamesArray.length];
+      long[] outputHandles = new long[outputNamesArray.length];
       i = 0;
+      for (Map.Entry<String, ? extends OnnxValue> e : pinnedOutputs.entrySet()) {
+        if (outputNames.contains(e.getKey())) {
+          outputNamesArray[i] = e.getKey();
+          outputValues[i] = e.getValue();
+          outputHandles[i] = getHandle(e.getValue());
+          i++;
+        } else {
+          throw new OrtException(
+              "Unknown output name " + e.getKey() + ", expected one of " + outputNames.toString());
+        }
+      }
       for (String s : requestedOutputs) {
         if (outputNames.contains(s)) {
-          outputNamesArray[i] = s;
-          i++;
+          if (!pinnedOutputs.containsKey(s)) {
+            outputNamesArray[i] = s;
+            // outputValues and outputHandles can be null/0 for these outputs as ORT will allocate
+            // them.
+            i++;
+          } else {
+            throw new OrtException(
+                "Output '"
+                    + s
+                    + "' was found in both the requested outputs and the pinned outputs");
+          }
         } else {
           throw new OrtException(
               "Unknown output name " + s + ", expected one of " + outputNames.toString());
         }
       }
       long runOptionsHandle = runOptions == null ? 0 : runOptions.getNativeHandle();
-
-      OnnxValue[] outputValues =
+      boolean[] ownedByResult =
           run(
               OnnxRuntime.ortApiHandle,
               nativeHandle,
@@ -307,13 +401,40 @@ public Result run(
               inputNamesArray.length,
               outputNamesArray,
               outputNamesArray.length,
+              outputValues,
+              outputHandles,
               runOptionsHandle);
-      return new Result(outputNamesArray, outputValues);
+      return new Result(outputNamesArray, outputValues, ownedByResult);
     } else {
       throw new IllegalStateException("Trying to score a closed OrtSession.");
     }
   }
 
+  /**
+   * Pulls out the native handle by casting it to the appropriate type.
+   *
+   * @param v The OnnxValue.
+   * @return The native handle.
+   */
+  static long getHandle(OnnxValue v) {
+    /*
+     * Note this method exists as interface methods are all public, but we do not want users to be
+     * able to access the native pointer via a public API so can't add a method to OnnxValue which
+     * exposes it.
+     */
+    if (v instanceof OnnxTensorLike) {
+      return ((OnnxTensorLike) v).nativeHandle;
+    } else if (v instanceof OnnxSequence) {
+      return ((OnnxSequence) v).nativeHandle;
+    } else if (v instanceof OnnxMap) {
+      return ((OnnxMap) v).nativeHandle;
+    } else {
+      throw new IllegalArgumentException(
+          "Unexpected OnnxValue subclass, should be {OnnxTensorLike, OnnxSequence, OnnxMap}, found "
+              + v.getClass());
+    }
+  }
+
   /**
    * Gets the metadata for the currently loaded model.
    *
@@ -409,8 +530,9 @@ private native NodeInfo[] getOutputInfo(long apiHandle, long nativeHandle, long
       throws OrtException;
 
   /**
-   * The native run call. runOptionsHandle can be zero (i.e. the null pointer), but all other
-   * handles must be valid pointers.
+   * The native run call. runOptionsHandle can be zero (i.e. the null pointer), outputValues can
+   * contain null entries, and outputHandles can contain zero values (i.e. the null pointer), but
+   * all other handles must be valid pointers.
    *
    * @param apiHandle The pointer to the api.
    * @param nativeHandle The pointer to the session.
@@ -419,12 +541,14 @@ private native NodeInfo[] getOutputInfo(long apiHandle, long nativeHandle, long
    * @param inputs The input tensors.
    * @param numInputs The number of inputs.
    * @param outputNamesArray The requested output names.
+   * @param outputValues The OnnxValue output array.
+   * @param outputHandles The OrtValue output pointer array.
    * @param numOutputs The number of requested outputs.
    * @param runOptionsHandle The (possibly null) pointer to the run options.
-   * @return The OnnxValues produced by this run.
+   * @return A boolean array representing if the OnnxValues were allocated by this run call.
    * @throws OrtException If the native call failed in some way.
    */
-  private native OnnxValue[] run(
+  private native boolean[] run(
       long apiHandle,
       long nativeHandle,
       long allocatorHandle,
@@ -433,6 +557,8 @@ private native OnnxValue[] run(
       long numInputs,
       String[] outputNamesArray,
       long numOutputs,
+      OnnxValue[] outputValues,
+      long[] outputHandles,
       long runOptionsHandle)
       throws OrtException;
 
@@ -1417,9 +1543,13 @@ private native void addRunConfigEntry(
   /**
    * An {@link AutoCloseable} wrapper around a {@link Map} containing {@link OnnxValue}s.
    *
-   * <p>When this is closed it closes all the {@link OnnxValue}s inside it. If you maintain a
-   * reference to a value after this object has been closed it will throw an {@link
+   * <p>When this is closed it closes all the {@link OnnxValue}s owned by the result object. If you
+   * maintain a reference to a value after this object has been closed it will throw an {@link
    * IllegalStateException} upon access.
+   *
+   * <p>{@link OnnxValue}s which are supplied as pinned outputs to a {@code run} call are not closed
+   * by the {@link Result#close()} method. Ownership of each output can be checked with {@link
+   * Result#isResultOwner(int)}.
    */
   public static class Result implements AutoCloseable, Iterable<Map.Entry<String, OnnxValue>> {
 
@@ -1429,6 +1559,8 @@ public static class Result implements AutoCloseable, Iterable<Map.Entry<String,
 
     private final List<OnnxValue> list;
 
+    private final boolean[] ownedByResult;
+
     private boolean closed;
 
     /**
@@ -1437,21 +1569,23 @@ public static class Result implements AutoCloseable, Iterable<Map.Entry<String,
      * @param names The output names.
      * @param values The output values.
      */
-    Result(String[] names, OnnxValue[] values) {
-      if (names.length != values.length) {
+    Result(String[] names, OnnxValue[] values, boolean[] ownedByResult) {
+      if ((names.length != values.length) || (names.length != ownedByResult.length)) {
         throw new IllegalArgumentException(
-            "Expected same number of names and values, found names.length = "
+            "Expected same number of names, values and ownedByResult, found names.length = "
                 + names.length
                 + ", values.length = "
-                + values.length);
+                + values.length
+                + ", ownedByResult.length = "
+                + ownedByResult.length);
       }
 
       map = new LinkedHashMap<>(OrtUtil.capacityFromSize(names.length));
-      list = new ArrayList<>(names.length);
+      list = new ArrayList<>(Arrays.asList(values));
+      this.ownedByResult = ownedByResult;
 
       for (int i = 0; i < names.length; i++) {
         map.put(names[i], values[i]);
-        list.add(values[i]);
       }
       this.closed = false;
     }
@@ -1460,8 +1594,11 @@ public static class Result implements AutoCloseable, Iterable<Map.Entry<String,
     public void close() {
       if (!closed) {
         closed = true;
-        for (OnnxValue t : map.values()) {
-          t.close();
+        for (int i = 0; i < list.size(); i++) {
+          if (ownedByResult[i]) {
+            OnnxValue value = list.get(i);
+            value.close();
+          }
         }
       } else {
         logger.warning("Closing an already closed Result");
@@ -1494,6 +1631,23 @@ public OnnxValue get(int index) {
       }
     }
 
+    /**
+     * Gets the value from the container at the specified index.
+     *
+     * <p>Throws {@link IllegalStateException} if the container has been closed, and {@link
+     * ArrayIndexOutOfBoundsException} if the index is invalid.
+     *
+     * @param index The index to lookup.
+     * @return Is that value owned by this result object?
+     */
+    public boolean isResultOwner(int index) {
+      if (!closed) {
+        return ownedByResult[index];
+      } else {
+        throw new IllegalStateException("Result is closed");
+      }
+    }
+
     /**
      * Returns the number of outputs in this Result.
      *
diff --git a/java/src/main/java/ai/onnxruntime/OrtTrainingSession.java b/java/src/main/java/ai/onnxruntime/OrtTrainingSession.java
index 8c03c5b80433c..49ddf29c22335 100644
--- a/java/src/main/java/ai/onnxruntime/OrtTrainingSession.java
+++ b/java/src/main/java/ai/onnxruntime/OrtTrainingSession.java
@@ -418,7 +418,7 @@ private static native void setSeed(long apiHandle, long trainingHandle, long see
    */
   public OrtSession.Result trainStep(Map<String, ? extends OnnxTensorLike> inputs)
       throws OrtException {
-    return trainStep(inputs, trainOutputNames, null);
+    return trainStep(inputs, trainOutputNames, Collections.emptyMap(), null);
   }
 
   /**
@@ -432,7 +432,7 @@ public OrtSession.Result trainStep(Map<String, ? extends OnnxTensorLike> inputs)
   public OrtSession.Result trainStep(
       Map<String, ? extends OnnxTensorLike> inputs, OrtSession.RunOptions runOptions)
       throws OrtException {
-    return trainStep(inputs, trainOutputNames, runOptions);
+    return trainStep(inputs, trainOutputNames, Collections.emptyMap(), runOptions);
   }
 
   /**
@@ -446,14 +446,41 @@ public OrtSession.Result trainStep(
   public OrtSession.Result trainStep(
       Map<String, ? extends OnnxTensorLike> inputs, Set<String> requestedOutputs)
       throws OrtException {
-    return trainStep(inputs, requestedOutputs, null);
+    return trainStep(inputs, requestedOutputs, Collections.emptyMap(), null);
   }
 
   /**
    * Performs a single step of training, accumulating the gradients.
    *
+   * <p>The outputs are sorted based on the supplied map traversal order.
+   *
+   * <p>Note: pinned outputs are not owned by the {@link OrtSession.Result} object, and are
+   * <b>not</b> closed when the result object is closed.
+   *
    * @param inputs The inputs (must include both the features and the target).
-   * @param requestedOutputs The requested outputs.
+   * @param pinnedOutputs The requested outputs which the user has allocated.
+   * @return Requested outputs produced by the training step.
+   * @throws OrtException If the native call failed.
+   */
+  public OrtSession.Result trainStep(
+      Map<String, ? extends OnnxTensorLike> inputs, Map<String, ? extends OnnxValue> pinnedOutputs)
+      throws OrtException {
+    return trainStep(inputs, Collections.emptySet(), pinnedOutputs, null);
+  }
+
+  /**
+   * Performs a single step of training, accumulating the gradients.
+   *
+   * <p>The outputs are sorted based on the supplied set traversal order with pinned outputs first,
+   * then requested outputs. An {@link IllegalArgumentException} is thrown if the same output name
+   * appears in both the requested outputs and the pinned outputs.
+   *
+   * <p>Note: pinned outputs are not owned by the {@link OrtSession.Result} object, and are
+   * <b>not</b> closed when the result object is closed.
+   *
+   * @param inputs The inputs (must include both the features and the target).
+   * @param requestedOutputs The requested outputs which ORT will allocate.
+   * @param pinnedOutputs The requested outputs which the user has allocated.
    * @param runOptions Run options for controlling this specific call.
    * @return Requested outputs produced by the training step.
    * @throws OrtException If the native call failed.
@@ -461,6 +488,7 @@ public OrtSession.Result trainStep(
   public OrtSession.Result trainStep(
       Map<String, ? extends OnnxTensorLike> inputs,
       Set<String> requestedOutputs,
+      Map<String, ? extends OnnxValue> pinnedOutputs,
       OrtSession.RunOptions runOptions)
       throws OrtException {
     checkClosed();
@@ -472,12 +500,14 @@ public OrtSession.Result trainStep(
               + ") found "
               + inputs.size());
     }
-    if (requestedOutputs.isEmpty() || (requestedOutputs.size() > trainOutputNames.size())) {
+    int numTrainOutputs = trainOutputNames.size();
+    int totalOutputs = requestedOutputs.size() + pinnedOutputs.size();
+    if ((totalOutputs == 0) || (totalOutputs > numTrainOutputs)) {
       throw new OrtException(
-          "Unexpected number of requestedOutputs, expected [1,"
-              + trainOutputNames.size()
+          "Unexpected number of requestedOutputs & pinnedOutputs, expected [1,"
+              + numTrainOutputs
               + ") found "
-              + requestedOutputs.size());
+              + totalOutputs);
     }
     String[] inputNamesArray = new String[inputs.size()];
     long[] inputHandles = new long[inputs.size()];
@@ -492,12 +522,35 @@ public OrtSession.Result trainStep(
             "Unknown input name " + t.getKey() + ", expected one of " + trainInputNames);
       }
     }
-    String[] outputNamesArray = new String[requestedOutputs.size()];
+    String[] outputNamesArray = new String[requestedOutputs.size() + pinnedOutputs.size()];
+    OnnxValue[] outputValues = new OnnxValue[outputNamesArray.length];
+    long[] outputHandles = new long[outputNamesArray.length];
     i = 0;
+    for (Map.Entry<String, ? extends OnnxValue> e : pinnedOutputs.entrySet()) {
+      if (trainOutputNames.contains(e.getKey())) {
+        outputNamesArray[i] = e.getKey();
+        outputValues[i] = e.getValue();
+        outputHandles[i] = OrtSession.getHandle(e.getValue());
+        i++;
+      } else {
+        throw new OrtException(
+            "Unknown output name "
+                + e.getKey()
+                + ", expected one of "
+                + trainOutputNames.toString());
+      }
+    }
     for (String s : requestedOutputs) {
       if (trainOutputNames.contains(s)) {
-        outputNamesArray[i] = s;
-        i++;
+        if (!pinnedOutputs.containsKey(s)) {
+          outputNamesArray[i] = s;
+          // outputValues and outputHandles can be null/0 for these outputs as ORT will allocate
+          // them.
+          i++;
+        } else {
+          throw new OrtException(
+              "Output '" + s + "' was found in both the requested outputs and the pinned outputs");
+        }
       } else {
         throw new OrtException(
             "Unknown output name " + s + ", expected one of " + trainOutputNames.toString());
@@ -505,7 +558,7 @@ public OrtSession.Result trainStep(
     }
     long runOptionsHandle = runOptions == null ? 0 : runOptions.getNativeHandle();
 
-    OnnxValue[] outputValues =
+    boolean[] ownedByResult =
         trainStep(
             OnnxRuntime.ortApiHandle,
             OnnxRuntime.ortTrainingApiHandle,
@@ -516,8 +569,10 @@ public OrtSession.Result trainStep(
             inputNamesArray.length,
             outputNamesArray,
             outputNamesArray.length,
+            outputValues,
+            outputHandles,
             runOptionsHandle);
-    return new OrtSession.Result(outputNamesArray, outputValues);
+    return new OrtSession.Result(outputNamesArray, outputValues, ownedByResult);
   }
 
   /*
@@ -540,7 +595,7 @@ public OrtSession.Result trainStep(
    * run_options, size_t inputs_len, _In_reads_(inputs_len) const OrtValue* const* inputs, size_t
    * outputs_len, _Inout_updates_all_(outputs_len) OrtValue** outputs);
    */
-  private native OnnxValue[] trainStep(
+  private native boolean[] trainStep(
       long apiHandle,
       long trainingApiHandle,
       long nativeHandle,
@@ -550,6 +605,8 @@ private native OnnxValue[] trainStep(
       long numInputs,
       String[] outputNamesArray,
       long numOutputs,
+      OnnxValue[] outputValues,
+      long[] outputHandles,
       long runOptionsHandle);
 
   /**
@@ -561,7 +618,7 @@ private native OnnxValue[] trainStep(
    */
   public OrtSession.Result evalStep(Map<String, ? extends OnnxTensorLike> inputs)
       throws OrtException {
-    return evalStep(inputs, evalOutputNames, null);
+    return evalStep(inputs, evalOutputNames, Collections.emptyMap(), null);
   }
 
   /**
@@ -575,7 +632,7 @@ public OrtSession.Result evalStep(Map<String, ? extends OnnxTensorLike> inputs)
   public OrtSession.Result evalStep(
       Map<String, ? extends OnnxTensorLike> inputs, OrtSession.RunOptions runOptions)
       throws OrtException {
-    return evalStep(inputs, evalOutputNames, runOptions);
+    return evalStep(inputs, evalOutputNames, Collections.emptyMap(), runOptions);
   }
 
   /**
@@ -589,14 +646,41 @@ public OrtSession.Result evalStep(
   public OrtSession.Result evalStep(
       Map<String, ? extends OnnxTensorLike> inputs, Set<String> requestedOutputs)
       throws OrtException {
-    return evalStep(inputs, requestedOutputs, null);
+    return evalStep(inputs, requestedOutputs, Collections.emptyMap(), null);
   }
 
   /**
    * Performs a single evaluation step using the supplied inputs.
    *
-   * @param inputs The model inputs.
-   * @param requestedOutputs The requested output names.
+   * <p>The outputs are sorted based on the supplied map traversal order.
+   *
+   * <p>Note: pinned outputs are not owned by the {@link OrtSession.Result} object, and are
+   * <b>not</b> closed when the result object is closed.
+   *
+   * @param inputs The inputs to score.
+   * @param pinnedOutputs The requested outputs which the user has allocated.
+   * @return The requested outputs.
+   * @throws OrtException If the native call failed.
+   */
+  public OrtSession.Result evalStep(
+      Map<String, ? extends OnnxTensorLike> inputs, Map<String, ? extends OnnxValue> pinnedOutputs)
+      throws OrtException {
+    return evalStep(inputs, Collections.emptySet(), pinnedOutputs, null);
+  }
+
+  /**
+   * Performs a single evaluation step using the supplied inputs.
+   *
+   * <p>The outputs are sorted based on the supplied set traversal order with pinned outputs first,
+   * then requested outputs. An {@link IllegalArgumentException} is thrown if the same output name
+   * appears in both the requested outputs and the pinned outputs.
+   *
+   * <p>Note: pinned outputs are not owned by the {@link OrtSession.Result} object, and are
+   * <b>not</b> closed when the result object is closed.
+   *
+   * @param inputs The inputs to score.
+   * @param requestedOutputs The requested outputs which ORT will allocate.
+   * @param pinnedOutputs The requested outputs which the user has allocated.
    * @param runOptions Run options for controlling this specific call.
    * @return The requested outputs.
    * @throws OrtException If the native call failed.
@@ -604,6 +688,7 @@ public OrtSession.Result evalStep(
   public OrtSession.Result evalStep(
       Map<String, ? extends OnnxTensorLike> inputs,
       Set<String> requestedOutputs,
+      Map<String, ? extends OnnxValue> pinnedOutputs,
       OrtSession.RunOptions runOptions)
       throws OrtException {
     checkClosed();
@@ -615,12 +700,14 @@ public OrtSession.Result evalStep(
               + ") found "
               + inputs.size());
     }
-    if (requestedOutputs.isEmpty() || (requestedOutputs.size() > evalOutputNames.size())) {
+    int numEvalOutputs = evalOutputNames.size();
+    int totalOutputs = requestedOutputs.size() + pinnedOutputs.size();
+    if ((totalOutputs == 0) || (totalOutputs > numEvalOutputs)) {
       throw new OrtException(
-          "Unexpected number of requestedOutputs, expected [1,"
-              + evalOutputNames.size()
+          "Unexpected number of requestedOutputs & pinnedOutputs, expected [1,"
+              + numEvalOutputs
               + ") found "
-              + requestedOutputs.size());
+              + totalOutputs);
     }
     String[] inputNamesArray = new String[inputs.size()];
     long[] inputHandles = new long[inputs.size()];
@@ -635,12 +722,35 @@ public OrtSession.Result evalStep(
             "Unknown input name " + t.getKey() + ", expected one of " + evalInputNames.toString());
       }
     }
-    String[] outputNamesArray = new String[requestedOutputs.size()];
+    String[] outputNamesArray = new String[requestedOutputs.size() + pinnedOutputs.size()];
+    OnnxValue[] outputValues = new OnnxValue[outputNamesArray.length];
+    long[] outputHandles = new long[outputNamesArray.length];
     i = 0;
+    for (Map.Entry<String, ? extends OnnxValue> e : pinnedOutputs.entrySet()) {
+      if (evalOutputNames.contains(e.getKey())) {
+        outputNamesArray[i] = e.getKey();
+        outputValues[i] = e.getValue();
+        outputHandles[i] = OrtSession.getHandle(e.getValue());
+        i++;
+      } else {
+        throw new OrtException(
+            "Unknown output name "
+                + e.getKey()
+                + ", expected one of "
+                + evalOutputNames.toString());
+      }
+    }
     for (String s : requestedOutputs) {
       if (evalOutputNames.contains(s)) {
-        outputNamesArray[i] = s;
-        i++;
+        if (!pinnedOutputs.containsKey(s)) {
+          outputNamesArray[i] = s;
+          // outputValues and outputHandles can be null/0 for these outputs as ORT will allocate
+          // them.
+          i++;
+        } else {
+          throw new OrtException(
+              "Output '" + s + "' was found in both the requested outputs and the pinned outputs");
+        }
       } else {
         throw new OrtException(
             "Unknown output name " + s + ", expected one of " + evalOutputNames.toString());
@@ -648,7 +758,7 @@ public OrtSession.Result evalStep(
     }
     long runOptionsHandle = runOptions == null ? 0 : runOptions.getNativeHandle();
 
-    OnnxValue[] outputValues =
+    boolean[] ownedByResult =
         evalStep(
             OnnxRuntime.ortApiHandle,
             OnnxRuntime.ortTrainingApiHandle,
@@ -659,8 +769,10 @@ public OrtSession.Result evalStep(
             inputNamesArray.length,
             outputNamesArray,
             outputNamesArray.length,
+            outputValues,
+            outputHandles,
             runOptionsHandle);
-    return new OrtSession.Result(outputNamesArray, outputValues);
+    return new OrtSession.Result(outputNamesArray, outputValues, ownedByResult);
   }
 
   /*
@@ -682,7 +794,7 @@ public OrtSession.Result evalStep(
    * run_options, size_t inputs_len, _In_reads_(inputs_len) const OrtValue* const* inputs, size_t
    * outputs_len, _Inout_updates_all_(outputs_len) OrtValue** outputs);
    */
-  private native OnnxValue[] evalStep(
+  private native boolean[] evalStep(
       long apiHandle,
       long trainingApiHandle,
       long nativeHandle,
@@ -692,6 +804,8 @@ private native OnnxValue[] evalStep(
       long numInputs,
       String[] outputNamesArray,
       long numOutputs,
+      OnnxValue[] outputValues,
+      long[] outputHandles,
       long runOptionsHandle)
       throws OrtException;
 
diff --git a/java/src/main/java/ai/onnxruntime/TensorInfo.java b/java/src/main/java/ai/onnxruntime/TensorInfo.java
index 34d635b5c40f8..69ccb954e8afe 100644
--- a/java/src/main/java/ai/onnxruntime/TensorInfo.java
+++ b/java/src/main/java/ai/onnxruntime/TensorInfo.java
@@ -58,14 +58,37 @@ public enum OnnxTensorType {
      */
     ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16(
         16), // Non-IEEE floating-point format based on IEEE754 single-precision
-    ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E4M3FN(
-        17), // Non-IEEE floating-point format based on IEEE754 single-precision
-    ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E4M3FNUZ(
-        18), // Non-IEEE floating-point format based on IEEE754 single-precision
-    ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E5M2(
-        19), // Non-IEEE floating-point format based on IEEE754 single-precision
-    ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E5M2FNUZ(
-        20); // Non-IEEE floating-point format based on IEEE754 single-precision
+    /**
+     * A non-IEEE 8-bit floating point format with 4 exponent bits and 3 mantissa bits, with NaN and
+     * no infinite values (FN).
+     *
+     * <p>See <a href="https://onnx.ai/onnx/technical/float8.html">the float 8 ONNX standard</a> for
+     * details.
+     */
+    ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E4M3FN(17),
+    /**
+     * A non-IEEE 8-bit floating point format with 4 exponent bits and 3 mantissa bits, with NaN, no
+     * infinite values (FN) and no negative zero (UZ).
+     *
+     * <p>See <a href="https://onnx.ai/onnx/technical/float8.html">the float 8 ONNX standard</a> for
+     * details.
+     */
+    ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E4M3FNUZ(18),
+    /**
+     * A non-IEEE 8-bit floating point format with 5 exponent bits and 2 mantissa bits.
+     *
+     * <p>See <a href="https://onnx.ai/onnx/technical/float8.html">the float 8 ONNX standard</a> for
+     * details.
+     */
+    ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E5M2(19),
+    /**
+     * A non-IEEE 8-bit floating point format with 5 exponent bits and 2 mantissa bits, with NaN, no
+     * infinite values (FN) and no negative zero (UZ).
+     *
+     * <p>See <a href="https://onnx.ai/onnx/technical/float8.html">the float 8 ONNX standard</a> for
+     * details.
+     */
+    ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E5M2FNUZ(20);
 
     /** The int id on the native side. */
     public final int value;
diff --git a/java/src/main/native/ai_onnxruntime_OrtSession.c b/java/src/main/native/ai_onnxruntime_OrtSession.c
index 6f4e34648cf81..f4d5ab080cd31 100644
--- a/java/src/main/native/ai_onnxruntime_OrtSession.c
+++ b/java/src/main/native/ai_onnxruntime_OrtSession.c
@@ -316,14 +316,19 @@ JNIEXPORT jobjectArray JNICALL Java_ai_onnxruntime_OrtSession_getOutputInfo(JNIE
 /*
  * Class:     ai_onnxruntime_OrtSession
  * Method:    run
- * Signature: (JJJ[Ljava/lang/String;[JJ[Ljava/lang/String;JJ)[Lai/onnxruntime/OnnxValue;
- * private native OnnxValue[] run(long apiHandle, long nativeHandle, long allocatorHandle, String[] inputNamesArray, long[] inputs, long numInputs, String[] outputNamesArray, long numOutputs)
+ * Signature: (JJJ[Ljava/lang/String;[JJ[Ljava/lang/String;J[Lai/onnxruntime/OnnxValue;[JJ)[Z
+ * private native boolean[] run(long apiHandle, long nativeHandle, long allocatorHandle,
+ *                              String[] inputNamesArray, long[] inputs, long numInputs,
+ *                              String[] outputNamesArray, long numOutputs,
+ *                              OnnxValue[] outputValues, long[] outputHandles,
+ *                              long runOptionsHandle) throws OrtException;
  */
-JNIEXPORT jobjectArray JNICALL Java_ai_onnxruntime_OrtSession_run(JNIEnv* jniEnv, jobject jobj, jlong apiHandle,
+JNIEXPORT jbooleanArray JNICALL Java_ai_onnxruntime_OrtSession_run(JNIEnv* jniEnv, jobject jobj, jlong apiHandle,
                                                                   jlong sessionHandle, jlong allocatorHandle,
                                                                   jobjectArray inputNamesArr, jlongArray tensorArr,
                                                                   jlong numInputs, jobjectArray outputNamesArr,
-                                                                  jlong numOutputs, jlong runOptionsHandle) {
+                                                                  jlong numOutputs, jobjectArray outputValuesArr,
+                                                                  jlongArray outputHandlesArr, jlong runOptionsHandle) {
 
   (void)jobj;  // Required JNI parameter not needed by functions which don't need to access their host object.
   const OrtApi* api = (const OrtApi*)apiHandle;
@@ -331,7 +336,7 @@ JNIEXPORT jobjectArray JNICALL Java_ai_onnxruntime_OrtSession_run(JNIEnv* jniEnv
   OrtSession* session = (OrtSession*)sessionHandle;
   OrtRunOptions* runOptions = (OrtRunOptions*)runOptionsHandle;
 
-  jobjectArray outputArray = NULL;
+  jbooleanArray outputArray = NULL;
 
   // Create the buffers for the Java input & output strings, and the input pointers
   const char** inputNames = allocarray(numInputs, sizeof(char*));
@@ -376,13 +381,19 @@ JNIEXPORT jobjectArray JNICALL Java_ai_onnxruntime_OrtSession_run(JNIEnv* jniEnv
   // Release the java array copy of pointers to the tensors.
   (*jniEnv)->ReleaseLongArrayElements(jniEnv, tensorArr, inputValueLongs, JNI_ABORT);
 
+  // Extract a C array of longs which are pointers to the output tensors.
+  jlong* outputHandleLongs = (*jniEnv)->GetLongArrayElements(jniEnv, outputHandlesArr, NULL);
+
   // Extract the names of the output values.
   for (int i = 0; i < numOutputs; i++) {
     javaOutputStrings[i] = (*jniEnv)->GetObjectArrayElement(jniEnv, outputNamesArr, i);
     outputNames[i] = (*jniEnv)->GetStringUTFChars(jniEnv, javaOutputStrings[i], NULL);
-    outputValues[i] = NULL;
+    outputValues[i] = (OrtValue*)outputHandleLongs[i];
   }
 
+  // Release the java array copy of pointers to the outputs.
+  (*jniEnv)->ReleaseLongArrayElements(jniEnv, outputHandlesArr, outputHandleLongs, JNI_ABORT);
+
   // Actually score the inputs.
   // ORT_API_STATUS(OrtRun, _Inout_ OrtSession* sess, _In_ OrtRunOptions* run_options,
   // _In_ const char* const* input_names, _In_ const OrtValue* const* input, size_t input_len,
@@ -394,21 +405,26 @@ JNIEXPORT jobjectArray JNICALL Java_ai_onnxruntime_OrtSession_run(JNIEnv* jniEnv
     goto cleanup_output_values;
   }
 
-  // Construct the output array of ONNXValues
-  jclass onnxValueClass = (*jniEnv)->FindClass(jniEnv, ORTJNI_OnnxValueClassName);
-  outputArray = (*jniEnv)->NewObjectArray(jniEnv, safecast_int64_to_jsize(numOutputs), onnxValueClass, NULL);
+  // Create the output boolean array denoting if ORT owns the memory for each output.
+  // Java boolean arrays are initialized to false.
+  outputArray = (*jniEnv)->NewBooleanArray(jniEnv, safecast_int64_to_jsize(numOutputs));
+  jboolean* boolArr = (*jniEnv)->GetBooleanArrayElements(jniEnv, outputArray, NULL);
 
   // Convert the output tensors into ONNXValues
   for (int i = 0; i < numOutputs; i++) {
-    if (outputValues[i] != NULL) {
+    if (outputValues[i] != NULL && (*jniEnv)->GetObjectArrayElement(jniEnv, outputValuesArr, i) == NULL) {
       jobject onnxValue = convertOrtValueToONNXValue(jniEnv, api, allocator, outputValues[i]);
       if (onnxValue == NULL) {
         break;  // go to cleanup, exception thrown
       }
-      (*jniEnv)->SetObjectArrayElement(jniEnv, outputArray, i, onnxValue);
+      boolArr[i] = 1;
+      (*jniEnv)->SetObjectArrayElement(jniEnv, outputValuesArr, i, onnxValue);
     }
   }
 
+  // Write the output array back to Java.
+  (*jniEnv)->ReleaseBooleanArrayElements(jniEnv, outputArray, boolArr, 0);
+
   // Note these gotos are in a specific order so they mirror the allocation pattern above.
   // They must be changed if the allocation code is rearranged.
 cleanup_output_values:
diff --git a/java/src/main/native/ai_onnxruntime_OrtSession_SessionOptions.c b/java/src/main/native/ai_onnxruntime_OrtSession_SessionOptions.c
index d3239c7442c80..3a1c0d1bb8fa1 100644
--- a/java/src/main/native/ai_onnxruntime_OrtSession_SessionOptions.c
+++ b/java/src/main/native/ai_onnxruntime_OrtSession_SessionOptions.c
@@ -19,7 +19,6 @@
 #include "onnxruntime/core/providers/nnapi/nnapi_provider_factory.h"
 #include "onnxruntime/core/providers/tvm/tvm_provider_factory.h"
 #include "onnxruntime/core/providers/openvino/openvino_provider_factory.h"
-#include "onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h"
 #include "onnxruntime/core/providers/acl/acl_provider_factory.h"
 #include "onnxruntime/core/providers/armnn/armnn_provider_factory.h"
 #include "onnxruntime/core/providers/coreml/coreml_provider_factory.h"
diff --git a/java/src/main/native/ai_onnxruntime_OrtTrainingSession.c b/java/src/main/native/ai_onnxruntime_OrtTrainingSession.c
index b3b530a8b15aa..9f7b8d3a3dcfc 100644
--- a/java/src/main/native/ai_onnxruntime_OrtTrainingSession.c
+++ b/java/src/main/native/ai_onnxruntime_OrtTrainingSession.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2022, 2023, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the MIT License.
  */
 #include <jni.h>
@@ -330,12 +330,12 @@ JNIEXPORT void JNICALL Java_ai_onnxruntime_OrtTrainingSession_lazyResetGrad
 /*
  * Class:     ai_onnxruntime_OrtTrainingSession
  * Method:    trainStep
- * Signature: (JJJJ[Ljava/lang/String;[JJ[Ljava/lang/String;JJ)[Lai/onnxruntime/OnnxValue;
+ * Signature: (JJJJ[Ljava/lang/String;[JJ[Ljava/lang/String;J[Lai/onnxruntime/OnnxValue;[JJ)[Z
  */
-JNIEXPORT jobjectArray JNICALL Java_ai_onnxruntime_OrtTrainingSession_trainStep
+JNIEXPORT jbooleanArray JNICALL Java_ai_onnxruntime_OrtTrainingSession_trainStep
   (JNIEnv * jniEnv, jobject jobj, jlong apiHandle, jlong trainApiHandle,
      jlong nativeHandle, jlong allocatorHandle, jobjectArray inputNamesArr, jlongArray inputHandles, jlong numInputs,
-     jobjectArray outputNamesArr, jlong numOutputs, jlong runOptionsHandle) {
+     jobjectArray outputNamesArr, jlong numOutputs, jobjectArray outputValuesArr, jlongArray outputHandlesArr, jlong runOptionsHandle) {
   (void)jobj;  // Required JNI parameter not needed by functions which don't need to access their host object.
   const OrtApi* api = (const OrtApi*)apiHandle;
   const OrtTrainingApi* trainApi = (const OrtTrainingApi*)trainApiHandle;
@@ -343,31 +343,31 @@ JNIEXPORT jobjectArray JNICALL Java_ai_onnxruntime_OrtTrainingSession_trainStep
   OrtTrainingSession* trainSession = (OrtTrainingSession*)nativeHandle;
   OrtRunOptions* runOptions = (OrtRunOptions*)runOptionsHandle;
 
-  jobjectArray outputArray = NULL;
+  jbooleanArray outputArray = NULL;
 
   // Create the buffers for the Java input & output strings, and the input pointers
-  const char** inputNames = malloc(sizeof(char*) * numInputs);
+  const char** inputNames = allocarray(numInputs, sizeof(char*));
   if (inputNames == NULL) {
     // Nothing to cleanup, return and throw exception
     return outputArray;
   }
-  const char** outputNames = malloc(sizeof(char*) * numOutputs);
+  const char** outputNames = allocarray(numOutputs, sizeof(char*));
   if (outputNames == NULL) {
     goto cleanup_input_names;
   }
-  jobject* javaInputStrings = malloc(sizeof(jobject) * numInputs);
+  jobject* javaInputStrings = allocarray(numInputs, sizeof(jobject));
   if (javaInputStrings == NULL) {
     goto cleanup_output_names;
   }
-  jobject* javaOutputStrings = malloc(sizeof(jobject) * numOutputs);
+  jobject* javaOutputStrings = allocarray(numOutputs, sizeof(jobject));
   if (javaOutputStrings == NULL) {
     goto cleanup_java_input_strings;
   }
-  const OrtValue** inputValuePtrs = malloc(sizeof(OrtValue*) * numInputs);
+  const OrtValue** inputValuePtrs = allocarray(numInputs, sizeof(OrtValue*));
   if (inputValuePtrs == NULL) {
     goto cleanup_java_output_strings;
   }
-  OrtValue** outputValues = malloc(sizeof(OrtValue*) * numOutputs);
+  OrtValue** outputValues = allocarray(numOutputs, sizeof(OrtValue*));
   if (outputValues == NULL) {
     goto cleanup_input_values;
   }
@@ -388,13 +388,19 @@ JNIEXPORT jobjectArray JNICALL Java_ai_onnxruntime_OrtTrainingSession_trainStep
   // Release the java array copy of pointers to the tensors.
   (*jniEnv)->ReleaseLongArrayElements(jniEnv, inputHandles, inputValueLongs, JNI_ABORT);
 
+  // Extract a C array of longs which are pointers to the output tensors.
+  jlong* outputHandleLongs = (*jniEnv)->GetLongArrayElements(jniEnv, outputHandlesArr, NULL);
+
   // Extract the names of the output values.
   for (int i = 0; i < numOutputs; i++) {
     javaOutputStrings[i] = (*jniEnv)->GetObjectArrayElement(jniEnv, outputNamesArr, i);
     outputNames[i] = (*jniEnv)->GetStringUTFChars(jniEnv, javaOutputStrings[i], NULL);
-    outputValues[i] = NULL;
+    outputValues[i] = (OrtValue*)outputHandleLongs[i];
   }
 
+  // Release the java array copy of pointers to the outputs.
+  (*jniEnv)->ReleaseLongArrayElements(jniEnv, outputHandlesArr, outputHandleLongs, JNI_ABORT);
+
   // Actually score the inputs.
   //ORT_API2_STATUS(TrainStep, _Inout_ OrtTrainingSession* sess, _In_opt_ const OrtRunOptions* run_options,
   //                size_t inputs_len, _In_reads_(inputs_len) const OrtValue* const* inputs,
@@ -406,24 +412,29 @@ JNIEXPORT jobjectArray JNICALL Java_ai_onnxruntime_OrtTrainingSession_trainStep
     goto cleanup_output_values;
   }
 
-  // Construct the output array of ONNXValues
-  jclass onnxValueClass = (*jniEnv)->FindClass(jniEnv, "ai/onnxruntime/OnnxValue");
-  outputArray = (*jniEnv)->NewObjectArray(jniEnv, safecast_int64_to_jsize(numOutputs), onnxValueClass, NULL);
+  // Create the output boolean array denoting if ORT owns the memory for each output.
+  // Java boolean arrays are initialized to false.
+  outputArray = (*jniEnv)->NewBooleanArray(jniEnv, safecast_int64_to_jsize(numOutputs));
+  jboolean* boolArr = (*jniEnv)->GetBooleanArrayElements(jniEnv, outputArray, NULL);
 
   // Convert the output tensors into ONNXValues
   for (int i = 0; i < numOutputs; i++) {
-    if (outputValues[i] != NULL) {
+    if (outputValues[i] != NULL && (*jniEnv)->GetObjectArrayElement(jniEnv, outputValuesArr, i) == NULL) {
       jobject onnxValue = convertOrtValueToONNXValue(jniEnv, api, allocator, outputValues[i]);
       if (onnxValue == NULL) {
         break;  // go to cleanup, exception thrown
       }
-      (*jniEnv)->SetObjectArrayElement(jniEnv, outputArray, i, onnxValue);
+      boolArr[i] = 1;
+      (*jniEnv)->SetObjectArrayElement(jniEnv, outputValuesArr, i, onnxValue);
     }
   }
 
+  // Write the output array back to Java.
+  (*jniEnv)->ReleaseBooleanArrayElements(jniEnv, outputArray, boolArr, 0);
+
   // Note these gotos are in a specific order so they mirror the allocation pattern above.
   // They must be changed if the allocation code is rearranged.
-  cleanup_output_values:
+cleanup_output_values:
   free(outputValues);
 
   // Release the Java output strings
@@ -437,15 +448,15 @@ JNIEXPORT jobjectArray JNICALL Java_ai_onnxruntime_OrtTrainingSession_trainStep
   }
 
   // Release the buffers
-  cleanup_input_values:
+cleanup_input_values:
   free((void*)inputValuePtrs);
-  cleanup_java_output_strings:
+cleanup_java_output_strings:
   free(javaOutputStrings);
-  cleanup_java_input_strings:
+cleanup_java_input_strings:
   free(javaInputStrings);
-  cleanup_output_names:
+cleanup_output_names:
   free((void*)outputNames);
-  cleanup_input_names:
+cleanup_input_names:
   free((void*)inputNames);
 
   return outputArray;
@@ -454,12 +465,12 @@ JNIEXPORT jobjectArray JNICALL Java_ai_onnxruntime_OrtTrainingSession_trainStep
 /*
  * Class:     ai_onnxruntime_OrtTrainingSession
  * Method:    evalStep
- * Signature: (JJJJ[Ljava/lang/String;[JJ[Ljava/lang/String;JJ)[Lai/onnxruntime/OnnxValue;
+ * Signature: (JJJJ[Ljava/lang/String;[JJ[Ljava/lang/String;J[Lai/onnxruntime/OnnxValue;[JJ)[Z
  */
-JNIEXPORT jobjectArray JNICALL Java_ai_onnxruntime_OrtTrainingSession_evalStep
+JNIEXPORT jbooleanArray JNICALL Java_ai_onnxruntime_OrtTrainingSession_evalStep
     (JNIEnv * jniEnv, jobject jobj, jlong apiHandle, jlong trainApiHandle,
      jlong nativeHandle, jlong allocatorHandle, jobjectArray inputNamesArr, jlongArray inputHandles, jlong numInputs,
-     jobjectArray outputNamesArr, jlong numOutputs, jlong runOptionsHandle) {
+     jobjectArray outputNamesArr, jlong numOutputs, jobjectArray outputValuesArr, jlongArray outputHandlesArr, jlong runOptionsHandle) {
   (void)jobj;  // Required JNI parameter not needed by functions which don't need to access their host object.
   const OrtApi* api = (const OrtApi*)apiHandle;
   const OrtTrainingApi* trainApi = (const OrtTrainingApi*)trainApiHandle;
@@ -467,31 +478,31 @@ JNIEXPORT jobjectArray JNICALL Java_ai_onnxruntime_OrtTrainingSession_evalStep
   OrtTrainingSession* trainSession = (OrtTrainingSession*)nativeHandle;
   OrtRunOptions* runOptions = (OrtRunOptions*)runOptionsHandle;
 
-  jobjectArray outputArray = NULL;
+  jbooleanArray outputArray = NULL;
 
   // Create the buffers for the Java input & output strings, and the input pointers
-  const char** inputNames = malloc(sizeof(char*) * numInputs);
+  const char** inputNames = allocarray(numInputs, sizeof(char*));
   if (inputNames == NULL) {
     // Nothing to cleanup, return and throw exception
     return outputArray;
   }
-  const char** outputNames = malloc(sizeof(char*) * numOutputs);
+  const char** outputNames = allocarray(numOutputs, sizeof(char*));
   if (outputNames == NULL) {
     goto cleanup_input_names;
   }
-  jobject* javaInputStrings = malloc(sizeof(jobject) * numInputs);
+  jobject* javaInputStrings = allocarray(numInputs, sizeof(jobject));
   if (javaInputStrings == NULL) {
     goto cleanup_output_names;
   }
-  jobject* javaOutputStrings = malloc(sizeof(jobject) * numOutputs);
+  jobject* javaOutputStrings = allocarray(numOutputs, sizeof(jobject));
   if (javaOutputStrings == NULL) {
     goto cleanup_java_input_strings;
   }
-  const OrtValue** inputValuePtrs = malloc(sizeof(OrtValue*) * numInputs);
+  const OrtValue** inputValuePtrs = allocarray(numInputs, sizeof(OrtValue*));
   if (inputValuePtrs == NULL) {
     goto cleanup_java_output_strings;
   }
-  OrtValue** outputValues = malloc(sizeof(OrtValue*) * numOutputs);
+  OrtValue** outputValues = allocarray(numOutputs, sizeof(OrtValue*));
   if (outputValues == NULL) {
     goto cleanup_input_values;
   }
@@ -512,11 +523,14 @@ JNIEXPORT jobjectArray JNICALL Java_ai_onnxruntime_OrtTrainingSession_evalStep
   // Release the java array copy of pointers to the tensors.
   (*jniEnv)->ReleaseLongArrayElements(jniEnv, inputHandles, inputValueLongs, JNI_ABORT);
 
+  // Extract a C array of longs which are pointers to the output tensors.
+  jlong* outputHandleLongs = (*jniEnv)->GetLongArrayElements(jniEnv, outputHandlesArr, NULL);
+
   // Extract the names of the output values.
   for (int i = 0; i < numOutputs; i++) {
     javaOutputStrings[i] = (*jniEnv)->GetObjectArrayElement(jniEnv, outputNamesArr, i);
     outputNames[i] = (*jniEnv)->GetStringUTFChars(jniEnv, javaOutputStrings[i], NULL);
-    outputValues[i] = NULL;
+    outputValues[i] = (OrtValue*)outputHandleLongs[i];
   }
 
   // Actually score the inputs.
@@ -530,24 +544,29 @@ JNIEXPORT jobjectArray JNICALL Java_ai_onnxruntime_OrtTrainingSession_evalStep
     goto cleanup_output_values;
   }
 
-  // Construct the output array of ONNXValues
-  jclass onnxValueClass = (*jniEnv)->FindClass(jniEnv, "ai/onnxruntime/OnnxValue");
-  outputArray = (*jniEnv)->NewObjectArray(jniEnv, safecast_int64_to_jsize(numOutputs), onnxValueClass, NULL);
+  // Create the output boolean array denoting if ORT owns the memory for each output.
+  // Java boolean arrays are initialized to false.
+  outputArray = (*jniEnv)->NewBooleanArray(jniEnv, safecast_int64_to_jsize(numOutputs));
+  jboolean* boolArr = (*jniEnv)->GetBooleanArrayElements(jniEnv, outputArray, NULL);
 
   // Convert the output tensors into ONNXValues
   for (int i = 0; i < numOutputs; i++) {
-    if (outputValues[i] != NULL) {
+    if (outputValues[i] != NULL && (*jniEnv)->GetObjectArrayElement(jniEnv, outputValuesArr, i) == NULL) {
       jobject onnxValue = convertOrtValueToONNXValue(jniEnv, api, allocator, outputValues[i]);
       if (onnxValue == NULL) {
         break;  // go to cleanup, exception thrown
       }
-      (*jniEnv)->SetObjectArrayElement(jniEnv, outputArray, i, onnxValue);
+      boolArr[i] = 1;
+      (*jniEnv)->SetObjectArrayElement(jniEnv, outputValuesArr, i, onnxValue);
     }
   }
 
+  // Write the output array back to Java.
+  (*jniEnv)->ReleaseBooleanArrayElements(jniEnv, outputArray, boolArr, 0);
+
   // Note these gotos are in a specific order so they mirror the allocation pattern above.
   // They must be changed if the allocation code is rearranged.
-  cleanup_output_values:
+cleanup_output_values:
   free(outputValues);
 
   // Release the Java output strings
@@ -561,15 +580,15 @@ JNIEXPORT jobjectArray JNICALL Java_ai_onnxruntime_OrtTrainingSession_evalStep
   }
 
   // Release the buffers
-  cleanup_input_values:
+cleanup_input_values:
   free((void*)inputValuePtrs);
-  cleanup_java_output_strings:
+cleanup_java_output_strings:
   free(javaOutputStrings);
-  cleanup_java_input_strings:
+cleanup_java_input_strings:
   free(javaInputStrings);
-  cleanup_output_names:
+cleanup_output_names:
   free((void*)outputNames);
-  cleanup_input_names:
+cleanup_input_names:
   free((void*)inputNames);
 
   return outputArray;
diff --git a/java/src/test/java/ai/onnxruntime/InferenceTest.java b/java/src/test/java/ai/onnxruntime/InferenceTest.java
index 08d2a5698d579..e975117fb75bd 100644
--- a/java/src/test/java/ai/onnxruntime/InferenceTest.java
+++ b/java/src/test/java/ai/onnxruntime/InferenceTest.java
@@ -6,11 +6,14 @@
 
 import static org.junit.jupiter.api.Assertions.assertArrayEquals;
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNotSame;
 import static org.junit.jupiter.api.Assertions.assertSame;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assertions.fail;
 
+import ai.onnxruntime.OrtException.OrtErrorCode;
 import ai.onnxruntime.OrtSession.Result;
 import ai.onnxruntime.OrtSession.SessionOptions;
 import ai.onnxruntime.OrtSession.SessionOptions.ExecutionMode;
@@ -31,6 +34,8 @@
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
@@ -71,7 +76,7 @@ public void environmentTest() {
   @Test
   public void testVersion() {
     String version = env.getVersion();
-    Assertions.assertFalse(version.isEmpty());
+    assertFalse(version.isEmpty());
   }
 
   @Test
@@ -749,6 +754,151 @@ public void testOverridingInitializer() throws OrtException {
     }
   }
 
+  @Test
+  public void testPinnedOutputs() throws OrtException {
+    String modelPath = TestHelpers.getResourcePath("/java-three-output-matmul.onnx").toString();
+    FloatBuffer outputABuf =
+        ByteBuffer.allocateDirect(4 * 4).order(ByteOrder.nativeOrder()).asFloatBuffer();
+    FloatBuffer outputBBuf =
+        ByteBuffer.allocateDirect(4 * 4).order(ByteOrder.nativeOrder()).asFloatBuffer();
+    FloatBuffer outputCBuf =
+        ByteBuffer.allocateDirect(4 * 4).order(ByteOrder.nativeOrder()).asFloatBuffer();
+    FloatBuffer tooSmallBuf =
+        ByteBuffer.allocateDirect(4 * 2).order(ByteOrder.nativeOrder()).asFloatBuffer();
+    FloatBuffer tooBigBuf =
+        ByteBuffer.allocateDirect(4 * 6).order(ByteOrder.nativeOrder()).asFloatBuffer();
+    FloatBuffer wrongShapeBuf =
+        ByteBuffer.allocateDirect(4 * 4).order(ByteOrder.nativeOrder()).asFloatBuffer();
+    LongBuffer wrongTypeBuf =
+        ByteBuffer.allocateDirect(8 * 4).order(ByteOrder.nativeOrder()).asLongBuffer();
+
+    try (SessionOptions options = new SessionOptions()) {
+      try (OrtSession session = env.createSession(modelPath, options);
+          OnnxTensor t = OnnxTensor.createTensor(env, new float[][] {{1, 2, 3, 4}});
+          OnnxTensor outputA = OnnxTensor.createTensor(env, outputABuf, new long[] {1, 4});
+          OnnxTensor outputB = OnnxTensor.createTensor(env, outputBBuf, new long[] {1, 4});
+          OnnxTensor outputC = OnnxTensor.createTensor(env, outputCBuf, new long[] {1, 4});
+          OnnxTensor tooSmall = OnnxTensor.createTensor(env, tooSmallBuf, new long[] {1, 2});
+          OnnxTensor tooBig = OnnxTensor.createTensor(env, tooBigBuf, new long[] {1, 6});
+          OnnxTensor wrongShape = OnnxTensor.createTensor(env, wrongShapeBuf, new long[] {2, 2});
+          OnnxTensor wrongType = OnnxTensor.createTensor(env, wrongTypeBuf, new long[] {1, 4})) {
+        Map<String, OnnxTensor> inputMap = Collections.singletonMap("input", t);
+        Set<String> requestedOutputs = new LinkedHashSet<>();
+        Map<String, OnnxTensor> pinnedOutputs = new LinkedHashMap<>();
+
+        // Test that all outputs can be pinned
+        pinnedOutputs.put("output-0", outputA);
+        pinnedOutputs.put("output-1", outputB);
+        pinnedOutputs.put("output-2", outputC);
+        try (OrtSession.Result r = session.run(inputMap, requestedOutputs, pinnedOutputs)) {
+          assertEquals(3, r.size());
+          assertSame(outputA, r.get(0));
+          assertSame(outputB, r.get(1));
+          assertSame(outputC, r.get(2));
+          assertFalse(r.isResultOwner(0));
+          assertFalse(r.isResultOwner(1));
+          assertFalse(r.isResultOwner(2));
+          // More tests
+        }
+        TestHelpers.zeroBuffer(outputABuf);
+        TestHelpers.zeroBuffer(outputBBuf);
+        TestHelpers.zeroBuffer(outputCBuf);
+        requestedOutputs.clear();
+        pinnedOutputs.clear();
+
+        // Test a single pinned output
+        pinnedOutputs.put("output-1", outputB);
+        try (OrtSession.Result r = session.run(inputMap, requestedOutputs, pinnedOutputs)) {
+          assertEquals(1, r.size());
+          assertSame(outputB, r.get(0));
+          assertSame(outputB, r.get("output-1").get());
+          assertFalse(r.isResultOwner(0));
+          // More tests
+        }
+        TestHelpers.zeroBuffer(outputABuf);
+        TestHelpers.zeroBuffer(outputBBuf);
+        TestHelpers.zeroBuffer(outputCBuf);
+        requestedOutputs.clear();
+        pinnedOutputs.clear();
+
+        // Test a mixture of pinned and generated outputs
+        requestedOutputs.add("output-0");
+        requestedOutputs.add("output-2");
+        pinnedOutputs.put("output-1", outputB);
+        try (OrtSession.Result r = session.run(inputMap, requestedOutputs, pinnedOutputs)) {
+          assertEquals(3, r.size());
+          // pinned outputs are first
+          assertSame(outputB, r.get(0));
+          assertSame(outputB, r.get("output-1").get());
+          // requested outputs are different
+          assertNotSame(outputA, r.get("output-0").get());
+          assertNotSame(outputC, r.get("output-2").get());
+          // check ownership.
+          assertFalse(r.isResultOwner(0));
+          assertTrue(r.isResultOwner(1));
+          assertTrue(r.isResultOwner(2));
+          // More tests
+        }
+        TestHelpers.zeroBuffer(outputABuf);
+        TestHelpers.zeroBuffer(outputBBuf);
+        TestHelpers.zeroBuffer(outputCBuf);
+        requestedOutputs.clear();
+        pinnedOutputs.clear();
+
+        // Test that overlapping names causes an error
+        requestedOutputs.add("output-1");
+        pinnedOutputs.put("output-1", outputB);
+        try (OrtSession.Result r = session.run(inputMap, requestedOutputs, pinnedOutputs)) {
+          fail("Should have thrown OrtException");
+        } catch (OrtException e) {
+          assertEquals(OrtErrorCode.ORT_JAVA_UNKNOWN, e.getCode());
+        }
+        requestedOutputs.clear();
+        pinnedOutputs.clear();
+
+        // Test that a tensor of the wrong type causes an error
+        pinnedOutputs.put("output-0", wrongType);
+        try (OrtSession.Result r = session.run(inputMap, requestedOutputs, pinnedOutputs)) {
+          fail("Should have thrown OrtException");
+        } catch (OrtException e) {
+          assertEquals(OrtErrorCode.ORT_INVALID_ARGUMENT, e.getCode());
+        }
+        requestedOutputs.clear();
+        pinnedOutputs.clear();
+
+        // Test that a tensor of the wrong shape (but right capacity) causes an error.
+        pinnedOutputs.put("output-1", wrongShape);
+        try (OrtSession.Result r = session.run(inputMap, requestedOutputs, pinnedOutputs)) {
+          fail("Should have thrown OrtException");
+        } catch (OrtException e) {
+          assertEquals(OrtErrorCode.ORT_INVALID_ARGUMENT, e.getCode());
+        }
+        requestedOutputs.clear();
+        pinnedOutputs.clear();
+
+        // Test that a tensor which is too small causes an error
+        pinnedOutputs.put("output-1", tooSmall);
+        try (OrtSession.Result r = session.run(inputMap, requestedOutputs, pinnedOutputs)) {
+          fail("Should have thrown OrtException");
+        } catch (OrtException e) {
+          assertEquals(OrtErrorCode.ORT_INVALID_ARGUMENT, e.getCode());
+        }
+        requestedOutputs.clear();
+        pinnedOutputs.clear();
+
+        // Test that a tensor which is too large causes an error
+        pinnedOutputs.put("output-1", tooBig);
+        try (OrtSession.Result r = session.run(inputMap, requestedOutputs, pinnedOutputs)) {
+          fail("Should have thrown OrtException");
+        } catch (OrtException e) {
+          assertEquals(OrtErrorCode.ORT_INVALID_ARGUMENT, e.getCode());
+        }
+        requestedOutputs.clear();
+        pinnedOutputs.clear();
+      }
+    }
+  }
+
   private static File getTestModelsDir() throws IOException {
     // get build directory, append downloaded models location
     String cwd = System.getProperty("user.dir");
diff --git a/java/src/test/java/ai/onnxruntime/ModelGenerators.java b/java/src/test/java/ai/onnxruntime/ModelGenerators.java
index 90fda4c5cf610..7bf7cef43208a 100644
--- a/java/src/test/java/ai/onnxruntime/ModelGenerators.java
+++ b/java/src/test/java/ai/onnxruntime/ModelGenerators.java
@@ -182,6 +182,102 @@ public void generateMatMul() throws IOException {
     }
   }
 
+  public void generateThreeOutputMatmul() throws IOException {
+    OnnxMl.GraphProto.Builder graph = OnnxMl.GraphProto.newBuilder();
+    graph.setName("ort-test-three-matmul");
+
+    // Add placeholders
+    OnnxMl.ValueInfoProto.Builder input = OnnxMl.ValueInfoProto.newBuilder();
+    input.setName("input");
+    OnnxMl.TypeProto inputType =
+        buildTensorTypeNode(
+            new long[] {-1, 4},
+            new String[] {"batch_size", null},
+            OnnxMl.TensorProto.DataType.FLOAT);
+    input.setType(inputType);
+    graph.addInput(input);
+    OnnxMl.ValueInfoProto.Builder outputA = OnnxMl.ValueInfoProto.newBuilder();
+    outputA.setName("output-0");
+    OnnxMl.TypeProto outputType =
+        buildTensorTypeNode(
+            new long[] {-1, 4},
+            new String[] {"batch_size", null},
+            OnnxMl.TensorProto.DataType.FLOAT);
+    outputA.setType(outputType);
+    graph.addOutput(outputA);
+    OnnxMl.ValueInfoProto.Builder outputB = OnnxMl.ValueInfoProto.newBuilder();
+    outputB.setName("output-1");
+    outputB.setType(outputType);
+    graph.addOutput(outputB);
+    OnnxMl.ValueInfoProto.Builder outputC = OnnxMl.ValueInfoProto.newBuilder();
+    outputC.setName("output-2");
+    outputC.setType(outputType);
+    graph.addOutput(outputC);
+
+    // Add initializers
+    OnnxMl.TensorProto.Builder tensor = OnnxMl.TensorProto.newBuilder();
+    tensor.addDims(4);
+    tensor.addDims(4);
+    Float[] floats =
+        new Float[] {1f, 2f, 3f, 4f, 5f, 6f, 7f, 8f, 9f, 10f, 11f, 12f, 13f, 14f, 15f, 16f};
+    tensor.addAllFloatData(Arrays.asList(floats));
+    tensor.setDataType(OnnxMl.TensorProto.DataType.FLOAT.getNumber());
+    tensor.setName("tensor");
+    graph.addInitializer(tensor);
+    OnnxMl.TensorProto.Builder addInit = OnnxMl.TensorProto.newBuilder();
+    addInit.addDims(4);
+    Float[] addFloats = new Float[] {1f, 2f, 3f, 4f};
+    addInit.addAllFloatData(Arrays.asList(addFloats));
+    addInit.setDataType(OnnxMl.TensorProto.DataType.FLOAT.getNumber());
+    addInit.setName("add-init");
+    graph.addInitializer(addInit);
+
+    // Add operations
+    OnnxMl.NodeProto.Builder matmul = OnnxMl.NodeProto.newBuilder();
+    matmul.setName("matmul-0");
+    matmul.setOpType("MatMul");
+    matmul.addInput("input");
+    matmul.addInput("tensor");
+    matmul.addOutput("matmul-output");
+    graph.addNode(matmul);
+
+    OnnxMl.NodeProto.Builder id = OnnxMl.NodeProto.newBuilder();
+    id.setName("id-1");
+    id.setOpType("Identity");
+    id.addInput("matmul-output");
+    id.addOutput("output-0");
+    graph.addNode(id);
+
+    OnnxMl.NodeProto.Builder add = OnnxMl.NodeProto.newBuilder();
+    add.setName("add-2");
+    add.setOpType("Add");
+    add.addInput("matmul-output");
+    add.addInput("add-init");
+    add.addOutput("output-1");
+    graph.addNode(add);
+
+    OnnxMl.NodeProto.Builder log = OnnxMl.NodeProto.newBuilder();
+    log.setName("log-3");
+    log.setOpType("Log");
+    log.addInput("matmul-output");
+    log.addOutput("output-2");
+    graph.addNode(log);
+
+    // Build model
+    OnnxMl.ModelProto.Builder model = OnnxMl.ModelProto.newBuilder();
+    model.setGraph(graph);
+    model.setDocString("ORT three output matmul test");
+    model.setModelVersion(0);
+    model.setIrVersion(8);
+    model.setDomain("ai.onnxruntime.test");
+    model.addOpsetImport(OnnxMl.OperatorSetIdProto.newBuilder().setVersion(18).build());
+    try (OutputStream os =
+        Files.newOutputStream(
+            Paths.get("src", "test", "resources", "java-three-output-matmul.onnx"))) {
+      model.build().writeTo(os);
+    }
+  }
+
   private static void genCast(
       String name,
       OnnxMl.TensorProto.DataType inputDataType,
diff --git a/java/src/test/java/ai/onnxruntime/OnnxTensorTest.java b/java/src/test/java/ai/onnxruntime/OnnxTensorTest.java
index fcb4590717fea..a5f285ba86a14 100644
--- a/java/src/test/java/ai/onnxruntime/OnnxTensorTest.java
+++ b/java/src/test/java/ai/onnxruntime/OnnxTensorTest.java
@@ -88,6 +88,91 @@ public void testScalarCreation() throws OrtException {
     }
   }
 
+  @Test
+  public void testBufferCreation() throws OrtException {
+    OrtEnvironment env = OrtEnvironment.getEnvironment();
+
+    // Test creating a value from an array
+    // Arrays result in tensors allocated by ORT, so they do not have a backing java.nio.Buffer
+    float[] arrValues = new float[] {0, 1, 2, 3, 4};
+    try (OnnxTensor t = OnnxTensor.createTensor(env, arrValues)) {
+      // array creation isn't backed by buffers
+      Assertions.assertFalse(t.ownsBuffer());
+      Assertions.assertFalse(t.getBufferRef().isPresent());
+      FloatBuffer buf = t.getFloatBuffer();
+      float[] output = new float[arrValues.length];
+      buf.get(output);
+      Assertions.assertArrayEquals(arrValues, output);
+
+      // Can't modify the tensor through this buffer.
+      buf.put(0, 25);
+      Assertions.assertArrayEquals(arrValues, output);
+    }
+
+    // Test creating a value from a non-direct byte buffer
+    // Non-direct byte buffers are allocated on the Java heap and must be copied into off-heap
+    // direct byte buffers
+    // which can be directly passed to ORT
+    FloatBuffer nonDirectBuffer = FloatBuffer.allocate(5);
+    nonDirectBuffer.put(arrValues);
+    nonDirectBuffer.rewind();
+    try (OnnxTensor t = OnnxTensor.createTensor(env, nonDirectBuffer, new long[] {1, 5})) {
+      // non-direct buffers trigger a copy
+      Assertions.assertTrue(t.ownsBuffer());
+      // tensors backed by buffers can get the buffer ref back out
+      Assertions.assertTrue(t.getBufferRef().isPresent());
+      FloatBuffer buf = t.getFloatBuffer();
+      float[] output = new float[arrValues.length];
+      buf.get(output);
+      Assertions.assertArrayEquals(arrValues, output);
+
+      // Can't modify the tensor through getFloatBuffer.
+      buf.put(0, 25);
+      Assertions.assertArrayEquals(arrValues, output);
+
+      // Can modify the tensor through getBufferRef.
+      FloatBuffer ref = (FloatBuffer) t.getBufferRef().get();
+      ref.put(0, 25);
+      buf = t.getFloatBuffer();
+      buf.get(output);
+      Assertions.assertEquals(25, output[0]);
+    }
+
+    // Test creating a value from a direct byte buffer
+    // Direct byte buffers can be passed into ORT without additional copies or processing
+    FloatBuffer directBuffer =
+        ByteBuffer.allocateDirect(5 * 4).order(ByteOrder.nativeOrder()).asFloatBuffer();
+    directBuffer.put(arrValues);
+    directBuffer.rewind();
+    try (OnnxTensor t = OnnxTensor.createTensor(env, directBuffer, new long[] {1, 5})) {
+      // direct buffers don't trigger a copy
+      Assertions.assertFalse(t.ownsBuffer());
+      // tensors backed by buffers can get the buffer ref back out
+      Assertions.assertTrue(t.getBufferRef().isPresent());
+      FloatBuffer buf = t.getFloatBuffer();
+      float[] output = new float[arrValues.length];
+      buf.get(output);
+      Assertions.assertArrayEquals(arrValues, output);
+
+      // Can't modify the tensor through getFloatBuffer.
+      buf.put(0, 25);
+      Assertions.assertArrayEquals(arrValues, output);
+
+      // Can modify the tensor through getBufferRef.
+      FloatBuffer ref = (FloatBuffer) t.getBufferRef().get();
+      ref.put(0, 25);
+      buf = t.getFloatBuffer();
+      buf.get(output);
+      Assertions.assertEquals(25, output[0]);
+
+      // Can modify the tensor through our original ref to the direct byte buffer
+      directBuffer.put(1, 15);
+      buf = t.getFloatBuffer();
+      buf.get(output);
+      Assertions.assertEquals(15, output[1]);
+    }
+  }
+
   @Test
   public void testStringCreation() throws OrtException {
     OrtEnvironment env = OrtEnvironment.getEnvironment();
diff --git a/java/src/test/java/ai/onnxruntime/TestHelpers.java b/java/src/test/java/ai/onnxruntime/TestHelpers.java
index 7d41918b1c6c7..55d8169434d48 100644
--- a/java/src/test/java/ai/onnxruntime/TestHelpers.java
+++ b/java/src/test/java/ai/onnxruntime/TestHelpers.java
@@ -262,6 +262,12 @@ public static Path getResourcePath(String path) {
     return new File(TestHelpers.class.getResource(path).getFile()).toPath();
   }
 
+  public static void zeroBuffer(FloatBuffer buf) {
+    for (int i = 0; i < buf.capacity(); i++) {
+      buf.put(i, 0.0f);
+    }
+  }
+
   public static float[] loadTensorFromFile(Path filename) {
     return loadTensorFromFile(filename, true);
   }
diff --git a/java/src/test/java/ai/onnxruntime/TrainingTest.java b/java/src/test/java/ai/onnxruntime/TrainingTest.java
index a02f5a88b2ac5..eaa7da1fc6a16 100644
--- a/java/src/test/java/ai/onnxruntime/TrainingTest.java
+++ b/java/src/test/java/ai/onnxruntime/TrainingTest.java
@@ -16,7 +16,6 @@
 import java.util.Map;
 import java.util.Set;
 import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.condition.EnabledIfSystemProperty;
 
@@ -69,8 +68,6 @@ public void testCreateTrainingSessionWithEval() throws OrtException {
     }
   }
 
-  // this test is not enabled as ORT Java doesn't support supplying an output buffer
-  @Disabled
   @Test
   public void testTrainingSessionTrainStep() throws OrtException {
     String checkpointPath = TestHelpers.getResourcePath("/checkpoint.ckpt").toString();
@@ -99,14 +96,11 @@ public void testTrainingSessionTrainStep() throws OrtException {
             ByteBuffer.allocateDirect(4 * expectedOutput.length)
                 .order(ByteOrder.nativeOrder())
                 .asFloatBuffer();
-        OnnxTensor outputTensor =
-            OnnxTensor.createTensor(env, output, new long[expectedOutput.length]);
+        OnnxTensor outputTensor = OnnxTensor.createTensor(env, output, new long[0]);
         outputMap.put("onnx::loss::21273", outputTensor);
-        /* Disabled as we haven't implemented this yet
-        try (trainingSession.trainStep(pinnedInputs, outputMap)) {
-          Assertions.assertArrayEquals(expectedOutput, (float[]) outputTensor.getValue(), 1e-3f);
+        try (OrtSession.Result r = trainingSession.trainStep(pinnedInputs, outputMap)) {
+          Assertions.assertEquals(expectedOutput[0], (float) outputTensor.getValue(), 1e-3f);
         }
-        */
       } finally {
         OnnxValue.close(outputMap);
         OnnxValue.close(pinnedInputs);
diff --git a/java/src/test/resources/java-three-output-matmul.onnx b/java/src/test/resources/java-three-output-matmul.onnx
new file mode 100644
index 0000000000000..fed0bbca460cf
Binary files /dev/null and b/java/src/test/resources/java-three-output-matmul.onnx differ
diff --git a/js/.eslintrc.js b/js/.eslintrc.js
index e13cabae9ed45..0bf47c5264f61 100644
--- a/js/.eslintrc.js
+++ b/js/.eslintrc.js
@@ -5,10 +5,18 @@
 
 module.exports = {
   root: true,
-  ignorePatterns: ['**/*.js', 'ort-schema/', 'common/test/type-tests/', 'node_modules/', 'types/', 'dist/'],
+  ignorePatterns: [
+    '**/*.js',
+    'node_modules/',
+    'ort-schema/',
+    'common/test/type-tests/',
+    'web/types.d.ts',
+    'test/data/',
+    'dist/',
+  ],
   env: { 'es6': true },
   parser: '@typescript-eslint/parser',
-  parserOptions: { 'project': 'tsconfig.json', 'sourceType': 'module' },
+  parserOptions: { 'project': true, 'sourceType': 'module' },
   plugins: ['@typescript-eslint', 'prefer-arrow', 'header', 'import', 'unicorn', 'jsdoc'],
   rules: {
     'unicorn/filename-case': 'error',
@@ -119,7 +127,6 @@ module.exports = {
     rules: {
       'jsdoc/check-alignment': 'error',
       'jsdoc/check-indentation': 'error',
-      'jsdoc/newline-after-description': 'error',
     }
   }, {
     files: ['common/test/**/*.ts'],
@@ -146,7 +153,54 @@ module.exports = {
     }
   }, {
     files: ['web/lib/**/*.ts'], rules: {
-      'no-underscore-dangle': 'off',
+      'no-underscore-dangle': ['error', {
+        'allow': [
+          '_free',
+          '_malloc',
+          '_JsepGetNodeName',
+          '_JsepOutput',
+          '_OrtAddFreeDimensionOverride',
+          '_OrtAddRunConfigEntry',
+          '_OrtAddSessionConfigEntry',
+          '_OrtAppendExecutionProvider',
+          '_OrtBindInput',
+          '_OrtBindOutput',
+          '_OrtClearBoundOutputs',
+          '_OrtCreateBinding',
+          '_OrtCreateRunOptions',
+          '_OrtCreateSession',
+          '_OrtCreateSessionOptions',
+          '_OrtCreateTensor',
+          '_OrtEndProfiling',
+          '_OrtFree',
+          '_OrtGetInputName',
+          '_OrtGetInputOutputCount',
+          '_OrtGetLastError',
+          '_OrtGetOutputName',
+          '_OrtGetTensorData',
+          '_OrtInit',
+          '_OrtReleaseBinding',
+          '_OrtReleaseRunOptions',
+          '_OrtReleaseSession',
+          '_OrtReleaseSessionOptions',
+          '_OrtReleaseTensor',
+          '_OrtRun',
+          '_OrtRunWithBinding',
+          '_OrtTrainingCopyParametersFromBuffer',
+          '_OrtTrainingCopyParametersToBuffer',
+          '_OrtTrainingCreateSession',
+          '_OrtTrainingEvalStep',
+          '_OrtTrainingGetModelInputOutputCount',
+          '_OrtTrainingGetModelInputOutputName',
+          '_OrtTrainingGetParametersSize',
+          '_OrtTrainingLazyResetGrad',
+          '_OrtTrainingLoadCheckpoint',
+          '_OrtTrainingOptimizerStep',
+          '_OrtTrainingReleaseCheckpoint',
+          '_OrtTrainingReleaseSession',
+          '_OrtTrainingRunTrainStep'
+        ]
+      }]
     }
   }, {
     files: ['web/lib/onnxjs/**/*.ts'], rules: {
@@ -159,6 +213,7 @@ module.exports = {
       'import/no-internal-modules': 'off',
       'prefer-arrow/prefer-arrow-functions': 'off',
       'no-param-reassign': 'off',
+      'no-underscore-dangle': 'off',
       'guard-for-in': 'off'
     }
   }, {
diff --git a/js/.gitignore b/js/.gitignore
index 076984956d0b8..028fde4c17af9 100644
--- a/js/.gitignore
+++ b/js/.gitignore
@@ -9,3 +9,5 @@ tsconfig.tsbuildinfo
 *.d.ts
 
 *.tgz
+
+*.esbuild.metafile.json
diff --git a/js/.vscode/launch.json b/js/.vscode/launch.json
index 3f4ec74b7de58..26a08d37488ba 100644
--- a/js/.vscode/launch.json
+++ b/js/.vscode/launch.json
@@ -5,7 +5,7 @@
   "version": "0.2.0",
   "configurations": [
     {
-      "name": "[common] Launch Unit Tests",
+      "name": "[common] Launch Unit Tests in Node.js",
       "args": ["-u", "bdd", "--timeout", "999999", "--colors", "${workspaceFolder}/common/test/**/*.js"],
       "internalConsoleOptions": "openOnSessionStart",
       "program": "${workspaceFolder}/node_modules/mocha/bin/_mocha",
@@ -17,7 +17,7 @@
       "preLaunchTask": "tsc: build - common/test/tsconfig.json"
     },
     {
-      "name": "[web] Launch Test Runner",
+      "name": "[web] Launch Test Runner CLI in Node.js",
       "program": "${workspaceFolder}/web/script/test-runner-cli.js",
       "request": "launch",
       "skipFiles": ["<node_internals>/**"],
@@ -26,17 +26,35 @@
       "args": ["suite1"]
     },
     {
-      "name": "[web] Attach to Chrome",
+      "name": "[web] Launch NPM tests in Node.js",
+      "args": [
+        "--timeout",
+        "999999",
+        "--colors",
+        "-r",
+        "${workspaceFolder}/web/dist/ort.node.min.js",
+        "${workspaceFolder}/web/test/test-main"
+      ],
+      "internalConsoleOptions": "openOnSessionStart",
+      "program": "${workspaceFolder}/node_modules/mocha/bin/_mocha",
+      "request": "launch",
+      "skipFiles": ["<node_internals>/**"],
+      "type": "node",
+      "cwd": "${workspaceFolder}/web"
+    },
+    {
+      "name": "[web] Attach to Chrome for NPM tests",
       "type": "chrome",
       "request": "attach",
       "port": 9333,
       "webRoot": "${workspaceFolder}",
       "sourceMapPathOverrides": {
-        "webpack://ort/*": "${webRoot}/common/*",
-        "webpack:///*": "${webRoot}/web/*"
+        "../../common/*": "${webRoot}/common/*",
+        "../lib/*": "${webRoot}/web/lib/*"
       },
       "sourceMaps": true,
-      "smartStep": true
+      "smartStep": true,
+      "skipFiles": ["**/node_modules/**"]
     },
     {
       "name": "[web] Remote Browser via Webkit Adaptor",
diff --git a/js/.vscode/settings.json b/js/.vscode/settings.json
index 4948899ec671b..9c2fe646d728d 100644
--- a/js/.vscode/settings.json
+++ b/js/.vscode/settings.json
@@ -36,7 +36,8 @@
     "node/lib/**/*.js.map": true,
     "node/lib/**/*.js": true,
     "web/lib/**/*.js.map": true,
-    "web/lib/**/*.js": true
+    "web/lib/**/*.js": true,
+    "web/lib/**/*.d.ts": true
   },
   "files.insertFinalNewline": true,
   "files.trimTrailingWhitespace": true,
diff --git a/js/README.md b/js/README.md
index 7e6681e6bd897..1662de6d4ac78 100644
--- a/js/README.md
+++ b/js/README.md
@@ -344,13 +344,13 @@ From ORT v1.13 onwards the 'full' ONNX Runtime package is used. It supports both
       Full build:
 
       ```sh
-      python tools/ci_build/github/apple/build_ios_framework.py tools/ci_build/github/apple/default_full_ios_framework_build_settings.json --config Release
+      python tools/ci_build/github/apple/build_apple_framework.py tools/ci_build/github/apple/default_full_apple_framework_build_settings.json --config Release
       ```
 
       Reduced size build:
 
       ```sh
-      python tools/ci_build/github/apple/build_ios_framework.py tools/ci_build/github/apple/default_mobile_ios_framework_build_settings.json --config MinSizeRel --include_ops_by_config <required_ops_and_types_for_your_models.config> --enable_reduced_operator_type_support
+      python tools/ci_build/github/apple/build_apple_framework.py tools/ci_build/github/apple/default_mobile_ios_framework_build_settings.json --config MinSizeRel --include_ops_by_config <required_ops_and_types_for_your_models.config> --enable_reduced_operator_type_support
       ```
 
       The build creates `Headers`, `LICENSE`, and `onnxruntime.xcframework` in `build/iOS_framework/framework_out` directory. From `framework_out` directory, create an archive file named `onnxruntime-c.zip` for a full build or `onnxruntime-mobile-c.zip` for a reduced size build and copy to `<ORT_ROOT>/js/react_native/local_pods` directory.
diff --git a/js/build_jsep.bat b/js/build_jsep.bat
index 02f1170ecb067..acd40ff920774 100644
--- a/js/build_jsep.bat
+++ b/js/build_jsep.bat
@@ -22,7 +22,7 @@ if ["%~1"]==["d"] (
 )
 if ["%~1"]==["r"] (
     set CONFIG=Release
-    set CONFIG_EXTRA_FLAG=
+    set CONFIG_EXTRA_FLAG=--enable_wasm_api_exception_catching --disable_rtti
     goto :arg2
 )
 echo Invalid configuration "%~1", must be "d"(Debug) or "r"(Release)
diff --git a/js/common/build.js b/js/common/build.js
index cf459f5efa812..b0956c608b350 100644
--- a/js/common/build.js
+++ b/js/common/build.js
@@ -21,6 +21,3 @@ execSync('npm run build:esm', {shell: true, stdio: 'inherit', cwd: __dirname});
 // see also: https://evertpot.com/universal-commonjs-esm-typescript-packages/
 writeFileSync(resolve(__dirname, './dist/cjs', 'package.json'), '{"type": "commonjs"}');
 writeFileSync(resolve(__dirname, './dist/esm', 'package.json'), '{"type": "module"}');
-
-// launch webpack to generate bundles
-execSync('npm run build:bundles', {shell: true, stdio: 'inherit', cwd: __dirname});
diff --git a/js/common/lib/backend-impl.ts b/js/common/lib/backend-impl.ts
index 75feba1d0ae08..e129c6971a85c 100644
--- a/js/common/lib/backend-impl.ts
+++ b/js/common/lib/backend-impl.ts
@@ -26,7 +26,7 @@ const backendsSortedByPriority: string[] = [];
  * @ignore
  */
 export const registerBackend = (name: string, backend: Backend, priority: number): void => {
-  if (backend && typeof backend.init === 'function' && typeof backend.createSessionHandler === 'function') {
+  if (backend && typeof backend.init === 'function' && typeof backend.createInferenceSessionHandler === 'function') {
     const currentBackend = backends.get(name);
     if (currentBackend === undefined) {
       backends.set(name, {backend, priority});
diff --git a/js/common/lib/backend.ts b/js/common/lib/backend.ts
index 804f33f00d103..67d283b694955 100644
--- a/js/common/lib/backend.ts
+++ b/js/common/lib/backend.ts
@@ -3,6 +3,7 @@
 
 import {InferenceSession} from './inference-session.js';
 import {OnnxValue} from './onnx-value.js';
+import {TrainingSession} from './training-session.js';
 
 /**
  * @ignore
@@ -14,16 +15,23 @@ export declare namespace SessionHandler {
 }
 
 /**
- * Represent a handler instance of an inference session.
+ * Represents shared SessionHandler functionality
  *
  * @ignore
  */
-export interface SessionHandler {
+interface SessionHandler {
   dispose(): Promise<void>;
 
   readonly inputNames: readonly string[];
   readonly outputNames: readonly string[];
+}
 
+/**
+ * Represent a handler instance of an inference session.
+ *
+ * @ignore
+ */
+export interface InferenceSessionHandler extends SessionHandler {
   startProfiling(): void;
   endProfiling(): void;
 
@@ -31,6 +39,21 @@ export interface SessionHandler {
       options: InferenceSession.RunOptions): Promise<SessionHandler.ReturnType>;
 }
 
+/**
+ * Represent a handler instance of a training inference session.
+ *
+ * @ignore
+ */
+export interface TrainingSessionHandler extends SessionHandler {
+  runTrainStep(
+      feeds: SessionHandler.FeedsType, fetches: SessionHandler.FetchesType,
+      options: InferenceSession.RunOptions): Promise<SessionHandler.ReturnType>;
+
+  getParametersSize(trainableOnly: boolean): Promise<number>;
+  loadParametersBuffer(array: Uint8Array, trainableOnly: boolean): Promise<void>;
+  getContiguousParameters(trainableOnly: boolean): Promise<OnnxValue>;
+}
+
 /**
  * Represent a backend that provides implementation of model inferencing.
  *
@@ -42,8 +65,13 @@ export interface Backend {
    */
   init(): Promise<void>;
 
-  createSessionHandler(uriOrBuffer: string|Uint8Array, options?: InferenceSession.SessionOptions):
-      Promise<SessionHandler>;
+  createInferenceSessionHandler(uriOrBuffer: string|Uint8Array, options?: InferenceSession.SessionOptions):
+      Promise<InferenceSessionHandler>;
+
+  createTrainingSessionHandler?
+      (checkpointStateUriOrBuffer: TrainingSession.URIorBuffer, trainModelUriOrBuffer: TrainingSession.URIorBuffer,
+       evalModelUriOrBuffer: TrainingSession.URIorBuffer, optimizerModelUriOrBuffer: TrainingSession.URIorBuffer,
+       options: InferenceSession.SessionOptions): Promise<TrainingSessionHandler>;
 }
 
 export {registerBackend} from './backend-impl.js';
diff --git a/js/common/lib/env.ts b/js/common/lib/env.ts
index 525272294c587..76575ef7b9368 100644
--- a/js/common/lib/env.ts
+++ b/js/common/lib/env.ts
@@ -9,6 +9,7 @@ export declare namespace Env {
     'ort-wasm.wasm'?: string;
     'ort-wasm-threaded.wasm'?: string;
     'ort-wasm-simd.wasm'?: string;
+    'ort-training-wasm-simd.wasm'?: string;
     'ort-wasm-simd-threaded.wasm'?: string;
     /* eslint-enable @typescript-eslint/naming-convention */
   };
@@ -105,6 +106,12 @@ export declare namespace Env {
      * see comments on {@link GpuBufferType} for more details about why not use types defined in "@webgpu/types".
      */
     readonly device: unknown;
+    /**
+     * Set or get whether validate input content.
+     *
+     * @defaultValue `false`
+     */
+    validateInputContent?: boolean;
   }
 }
 
diff --git a/js/common/lib/index.ts b/js/common/lib/index.ts
index 85df1747f8576..9cbfcc4e8bcdc 100644
--- a/js/common/lib/index.ts
+++ b/js/common/lib/index.ts
@@ -22,3 +22,4 @@ export * from './env.js';
 export * from './inference-session.js';
 export * from './tensor.js';
 export * from './onnx-value.js';
+export * from './training-session.js';
diff --git a/js/common/lib/inference-session-impl.ts b/js/common/lib/inference-session-impl.ts
index 06949b4a26c0d..9bc2088f2088a 100644
--- a/js/common/lib/inference-session-impl.ts
+++ b/js/common/lib/inference-session-impl.ts
@@ -2,7 +2,7 @@
 // Licensed under the MIT License.
 
 import {resolveBackend} from './backend-impl.js';
-import {SessionHandler} from './backend.js';
+import {InferenceSessionHandler} from './backend.js';
 import {InferenceSession as InferenceSessionInterface} from './inference-session.js';
 import {OnnxValue} from './onnx-value.js';
 import {Tensor} from './tensor.js';
@@ -14,7 +14,7 @@ type FetchesType = InferenceSessionInterface.FetchesType;
 type ReturnType = InferenceSessionInterface.ReturnType;
 
 export class InferenceSession implements InferenceSessionInterface {
-  private constructor(handler: SessionHandler) {
+  private constructor(handler: InferenceSessionHandler) {
     this.handler = handler;
   }
   run(feeds: FeedsType, options?: RunOptions): Promise<ReturnType>;
@@ -195,7 +195,7 @@ export class InferenceSession implements InferenceSessionInterface {
     const eps = options.executionProviders || [];
     const backendHints = eps.map(i => typeof i === 'string' ? i : i.name);
     const backend = await resolveBackend(backendHints);
-    const handler = await backend.createSessionHandler(filePathOrUint8Array, options);
+    const handler = await backend.createInferenceSessionHandler(filePathOrUint8Array, options);
     return new InferenceSession(handler);
   }
 
@@ -213,5 +213,5 @@ export class InferenceSession implements InferenceSessionInterface {
     return this.handler.outputNames;
   }
 
-  private handler: SessionHandler;
+  private handler: InferenceSessionHandler;
 }
diff --git a/js/common/lib/inference-session.ts b/js/common/lib/inference-session.ts
index 71a5912df2464..c7760692eed00 100644
--- a/js/common/lib/inference-session.ts
+++ b/js/common/lib/inference-session.ts
@@ -192,6 +192,7 @@ export declare namespace InferenceSession {
     wasm: WebAssemblyExecutionProviderOption;
     webgl: WebGLExecutionProviderOption;
     xnnpack: XnnpackExecutionProviderOption;
+    webgpu: WebGpuExecutionProviderOption;
     webnn: WebNNExecutionProviderOption;
     nnapi: NnapiExecutionProviderOption;
   }
@@ -233,9 +234,14 @@ export declare namespace InferenceSession {
   export interface XnnpackExecutionProviderOption extends ExecutionProviderOption {
     readonly name: 'xnnpack';
   }
+  export interface WebGpuExecutionProviderOption extends ExecutionProviderOption {
+    readonly name: 'webgpu';
+    preferredLayout?: 'NCHW'|'NHWC';
+  }
   export interface WebNNExecutionProviderOption extends ExecutionProviderOption {
     readonly name: 'webnn';
     deviceType?: 'cpu'|'gpu';
+    numThreads?: number;
     powerPreference?: 'default'|'low-power'|'high-performance';
   }
   export interface CoreMLExecutionProviderOption extends ExecutionProviderOption {
diff --git a/js/common/lib/training-session-impl.ts b/js/common/lib/training-session-impl.ts
new file mode 100644
index 0000000000000..03694738387f2
--- /dev/null
+++ b/js/common/lib/training-session-impl.ts
@@ -0,0 +1,202 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+import {resolveBackend} from './backend-impl.js';
+import {SessionHandler, TrainingSessionHandler} from './backend.js';
+import {InferenceSession as InferenceSession} from './inference-session.js';
+import {OnnxValue} from './onnx-value.js';
+import {Tensor} from './tensor.js';
+import {TrainingSession as TrainingSessionInterface, TrainingSessionCreateOptions} from './training-session.js';
+
+type SessionOptions = InferenceSession.SessionOptions;
+type FeedsType = InferenceSession.FeedsType;
+type FetchesType = InferenceSession.FetchesType;
+type ReturnType = InferenceSession.ReturnType;
+type RunOptions = InferenceSession.RunOptions;
+
+const noBackendErrMsg: string = 'Training backend could not be resolved. ' +
+    'Make sure you\'re using the correct configuration & WebAssembly files.';
+
+export class TrainingSession implements TrainingSessionInterface {
+  private constructor(handler: TrainingSessionHandler) {
+    this.handler = handler;
+  }
+  private handler: TrainingSessionHandler;
+
+  get inputNames(): readonly string[] {
+    return this.handler.inputNames;
+  }
+  get outputNames(): readonly string[] {
+    return this.handler.outputNames;
+  }
+
+  static async create(trainingOptions: TrainingSessionCreateOptions, sessionOptions?: SessionOptions):
+      Promise<TrainingSession> {
+    const evalModel: string|Uint8Array = trainingOptions.evalModel || '';
+    const optimizerModel: string|Uint8Array = trainingOptions.optimizerModel || '';
+    const options: SessionOptions = sessionOptions || {};
+
+    // get backend hints
+    const eps = options.executionProviders || [];
+    const backendHints = eps.map(i => typeof i === 'string' ? i : i.name);
+    const backend = await resolveBackend(backendHints);
+    if (backend.createTrainingSessionHandler) {
+      const handler = await backend.createTrainingSessionHandler(
+          trainingOptions.checkpointState, trainingOptions.trainModel, evalModel, optimizerModel, options);
+      return new TrainingSession(handler);
+    } else {
+      throw new Error(noBackendErrMsg);
+    }
+  }
+
+  /**
+   * Helper function for runTrainStep and future runStep methods that handles the type-narrowing conversion from
+   * the given parameters to SessionHandler.FetchesType and RunOptions.
+   *
+   * @param feeds the required input
+   * @param arg1 narrowed & converted into the SessionHandler.FetchesType or RunOptions object
+   * @param arg2 optional RunOptions object.
+   * @returns
+   */
+  typeNarrowingForRunStep(feeds: FeedsType, arg1?: FetchesType|RunOptions, arg2?: RunOptions):
+      [SessionHandler.FetchesType, RunOptions] {
+    const fetches: {[name: string]: OnnxValue|null} = {};
+    let options: RunOptions = {};
+    // check inputs
+    if (typeof feeds !== 'object' || feeds === null || feeds instanceof Tensor || Array.isArray(feeds)) {
+      throw new TypeError(
+          '\'feeds\' must be an object that use input names as keys and OnnxValue as corresponding values.');
+    }
+
+    let isFetchesEmpty = true;
+    // determine which override is being used
+    if (typeof arg1 === 'object') {
+      if (arg1 === null) {
+        throw new TypeError('Unexpected argument[1]: cannot be null.');
+      }
+      if (arg1 instanceof Tensor) {
+        throw new TypeError('\'fetches\' cannot be a Tensor');
+      }
+
+      if (Array.isArray(arg1)) {
+        if (arg1.length === 0) {
+          throw new TypeError('\'fetches\' cannot be an empty array.');
+        }
+        isFetchesEmpty = false;
+        // output names
+        for (const name of arg1) {
+          if (typeof name !== 'string') {
+            throw new TypeError('\'fetches\' must be a string array or an object.');
+          }
+          if (this.outputNames.indexOf(name) === -1) {
+            throw new RangeError(`'fetches' contains invalid output name: ${name}.`);
+          }
+          fetches[name] = null;
+        }
+
+        if (typeof arg2 === 'object' && arg2 !== null) {
+          options = arg2;
+        } else if (typeof arg2 !== 'undefined') {
+          throw new TypeError('\'options\' must be an object.');
+        }
+      } else {
+        // decide whether arg1 is fetches or options
+        // if any output name is present and its value is valid OnnxValue, we consider it fetches
+        let isFetches = false;
+        const arg1Keys = Object.getOwnPropertyNames(arg1);
+        for (const name of this.outputNames) {
+          if (arg1Keys.indexOf(name) !== -1) {
+            const v = (arg1 as InferenceSession.NullableOnnxValueMapType)[name];
+            if (v === null || v instanceof Tensor) {
+              isFetches = true;
+              isFetchesEmpty = false;
+              fetches[name] = v;
+            }
+          }
+        }
+
+        if (isFetches) {
+          if (typeof arg2 === 'object' && arg2 !== null) {
+            options = arg2;
+          } else if (typeof arg2 !== 'undefined') {
+            throw new TypeError('\'options\' must be an object.');
+          }
+        } else {
+          options = arg1 as RunOptions;
+        }
+      }
+    } else if (typeof arg1 !== 'undefined') {
+      throw new TypeError('Unexpected argument[1]: must be \'fetches\' or \'options\'.');
+    }
+
+    // check if all inputs are in feed
+    for (const name of this.inputNames) {
+      if (typeof feeds[name] === 'undefined') {
+        throw new Error(`input '${name}' is missing in 'feeds'.`);
+      }
+    }
+
+    // if no fetches is specified, we use the full output names list
+    if (isFetchesEmpty) {
+      for (const name of this.outputNames) {
+        fetches[name] = null;
+      }
+    }
+
+    return [fetches, options];
+  }
+
+  /**
+   * Helper method for runTrainStep and any other runStep methods. Takes the ReturnType result from the SessionHandler
+   * and changes it into a map of Tensors.
+   *
+   * @param results
+   * @returns
+   */
+  convertHandlerReturnTypeToMapOfTensors(results: SessionHandler.ReturnType): ReturnType {
+    const returnValue: {[name: string]: OnnxValue} = {};
+    for (const key in results) {
+      if (Object.hasOwnProperty.call(results, key)) {
+        const result = results[key];
+        if (result instanceof Tensor) {
+          returnValue[key] = result;
+        } else {
+          returnValue[key] = new Tensor(result.type, result.data, result.dims);
+        }
+      }
+    }
+    return returnValue;
+  }
+
+  runTrainStep(feeds: FeedsType, options?: RunOptions): Promise<ReturnType>;
+  runTrainStep(feeds: FeedsType, fetches: FetchesType, options?: RunOptions): Promise<ReturnType>;
+  async runTrainStep(feeds: FeedsType, arg1?: FetchesType|RunOptions, arg2?: RunOptions): Promise<ReturnType> {
+    const [fetches, options] = this.typeNarrowingForRunStep(feeds, arg1, arg2);
+    const results = await this.handler.runTrainStep(feeds, fetches, options);
+    return this.convertHandlerReturnTypeToMapOfTensors(results);
+  }
+
+  async getParametersSize(trainableOnly = true): Promise<number> {
+    return this.handler.getParametersSize(trainableOnly);
+  }
+
+  async loadParametersBuffer(array: Uint8Array, trainableOnly = true): Promise<void> {
+    const paramsSize = await this.getParametersSize(trainableOnly);
+    // checking that the size of the Uint8Array is equivalent to the byte length of a Float32Array of the number
+    // of parameters
+    if (array.length !== 4 * paramsSize) {
+      throw new Error(
+          'Size of the buffer passed into loadParametersBuffer must match the number of parameters in ' +
+          'the model. Please use getParametersSize method to check.');
+    }
+    return this.handler.loadParametersBuffer(array, trainableOnly);
+  }
+
+  async getContiguousParameters(trainableOnly = true): Promise<OnnxValue> {
+    return this.handler.getContiguousParameters(trainableOnly);
+  }
+
+  async release(): Promise<void> {
+    return this.handler.dispose();
+  }
+}
diff --git a/js/common/lib/training-session.ts b/js/common/lib/training-session.ts
new file mode 100644
index 0000000000000..810ec2a8583b3
--- /dev/null
+++ b/js/common/lib/training-session.ts
@@ -0,0 +1,147 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+import {InferenceSession} from './inference-session.js';
+import {OnnxValue} from './onnx-value.js';
+import {TrainingSession as TrainingSessionImpl} from './training-session-impl.js';
+
+/* eslint-disable @typescript-eslint/no-redeclare */
+
+export declare namespace TrainingSession {
+  /**
+   * Either URI file path (string) or Uint8Array containing model or checkpoint information.
+   */
+  type URIorBuffer = string|Uint8Array;
+}
+
+/**
+ * Represent a runtime instance of an ONNX training session,
+ * which contains a model that can be trained, and, optionally,
+ * an eval and optimizer model.
+ */
+export interface TrainingSession {
+  // #region run()
+
+  /**
+   * Run TrainStep asynchronously with the given feeds and options.
+   *
+   * @param feeds - Representation of the model input. See type description of `InferenceSession.InputType` for
+   detail.
+   * @param options - Optional. A set of options that controls the behavior of model training.
+   * @returns A promise that resolves to a map, which uses output names as keys and OnnxValue as corresponding values.
+   */
+  runTrainStep(feeds: InferenceSession.FeedsType, options?: InferenceSession.RunOptions):
+      Promise<InferenceSession.ReturnType>;
+
+  /**
+   * Run a single train step with the given inputs and options.
+   *
+   * @param feeds - Representation of the model input.
+   * @param fetches - Representation of the model output.
+   * detail.
+   * @param options - Optional. A set of options that controls the behavior of model inference.
+   * @returns A promise that resolves to a map, which uses output names as keys and OnnxValue as corresponding
+   values.
+   */
+  runTrainStep(
+      feeds: InferenceSession.FeedsType, fetches: InferenceSession.FetchesType,
+      options?: InferenceSession.RunOptions): Promise<InferenceSession.ReturnType>;
+
+  // #endregion
+
+  // #region copy parameters
+
+  /**
+   * Retrieves the size of all parameters for the training state. Calculates the total number of primitive (datatype of
+   * the parameters) elements of all the parameters in the training state.
+   *
+   * @param trainableOnly - When set to true, the size is calculated for trainable params only. Default value is true.
+   */
+  getParametersSize(trainableOnly: boolean): Promise<number>;
+
+  /**
+   * Copies parameter values from the given array to the training state. Currently, only supporting models with
+   * parameters of type Float32.
+   *
+   * @param buffer - Float32 buffer containing parameters converted to a Uint8Array.
+   * @param trainableOnly - True if trainable parameters only to be modified, false otherwise. Default value is true.
+   */
+  loadParametersBuffer(array: Uint8Array, trainableOnly: boolean): Promise<void>;
+
+  /**
+   * Copies the model parameters to a contiguous buffer. Usually used in the context of Federated Learning.
+   * Currently, only supporting models with parameters of type Float32.
+   *
+   * @param trainableOnly - When set to true, only trainable parameters are copied. Trainable parameters are parameters
+   * for which requires_grad is set to true. Default value is true.
+   * @returns A promise that resolves to a Float32 OnnxValue of the requested parameters.
+   */
+  getContiguousParameters(trainableOnly: boolean): Promise<OnnxValue>;
+  // #endregion
+
+  // #region release()
+
+  /**
+   * Release the inference session and the underlying resources.
+   */
+  release(): Promise<void>;
+  // #endregion
+
+  // #region metadata
+
+  /**
+   * Get input names of the loaded model.
+   */
+  readonly inputNames: readonly string[];
+
+  /**
+   * Get output names of the loaded model.
+   */
+  readonly outputNames: readonly string[];
+  // #endregion
+}
+
+/**
+ * Represents the optional parameters that can be passed into the TrainingSessionFactory.
+ */
+export interface TrainingSessionCreateOptions {
+  /**
+   * URI or buffer for a .ckpt file that contains the checkpoint for the training model.
+   */
+  checkpointState: TrainingSession.URIorBuffer;
+  /**
+   * URI or buffer for the .onnx training file.
+   */
+  trainModel: TrainingSession.URIorBuffer;
+  /**
+   * Optional. URI or buffer for the .onnx optimizer model file.
+   */
+  optimizerModel?: TrainingSession.URIorBuffer;
+  /**
+   * Optional. URI or buffer for the .onnx eval model file.
+   */
+  evalModel?: TrainingSession.URIorBuffer;
+}
+
+/**
+ * Defines method overload possibilities for creating a TrainingSession.
+ */
+export interface TrainingSessionFactory {
+  // #region create()
+
+  /**
+   * Creates a new TrainingSession and asynchronously loads any models passed in through trainingOptions
+   *
+   * @param trainingOptions specify models and checkpoints to load into the Training Session
+   * @param sessionOptions specify configuration for training session behavior
+   *
+   * @returns Promise that resolves to a TrainingSession object
+   */
+  create(trainingOptions: TrainingSessionCreateOptions, sessionOptions?: InferenceSession.SessionOptions):
+      Promise<TrainingSession>;
+
+  // #endregion
+}
+
+// eslint-disable-next-line @typescript-eslint/naming-convention
+export const TrainingSession: TrainingSessionFactory = TrainingSessionImpl;
diff --git a/js/common/tsconfig.json b/js/common/tsconfig.json
index 4751cd7564f5d..d7bb3a593f66f 100644
--- a/js/common/tsconfig.json
+++ b/js/common/tsconfig.json
@@ -4,8 +4,7 @@
     "outDir": "./dist/esm",
     "declaration": true,
     "declarationMap": true,
-    "esModuleInterop": false,
-    "noUnusedParameters": true
+    "esModuleInterop": false
   },
   "include": ["lib"]
 }
diff --git a/js/node/lib/backend.ts b/js/node/lib/backend.ts
index d3680f9d44236..5f5ad49a2dea8 100644
--- a/js/node/lib/backend.ts
+++ b/js/node/lib/backend.ts
@@ -1,11 +1,11 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-import {Backend, InferenceSession, SessionHandler} from 'onnxruntime-common';
+import {Backend, InferenceSession, InferenceSessionHandler, SessionHandler} from 'onnxruntime-common';
 
 import {Binding, binding} from './binding';
 
-class OnnxruntimeSessionHandler implements SessionHandler {
+class OnnxruntimeSessionHandler implements InferenceSessionHandler {
   #inferenceSession: Binding.InferenceSession;
 
   constructor(pathOrBuffer: string|Uint8Array, options: InferenceSession.SessionOptions) {
@@ -53,8 +53,8 @@ class OnnxruntimeBackend implements Backend {
     return Promise.resolve();
   }
 
-  async createSessionHandler(pathOrBuffer: string|Uint8Array, options?: InferenceSession.SessionOptions):
-      Promise<SessionHandler> {
+  async createInferenceSessionHandler(pathOrBuffer: string|Uint8Array, options?: InferenceSession.SessionOptions):
+      Promise<InferenceSessionHandler> {
     return new Promise((resolve, reject) => {
       process.nextTick(() => {
         try {
diff --git a/js/node/package-lock.json b/js/node/package-lock.json
index ce390aa88c0aa..c1cf8af4bb80e 100644
--- a/js/node/package-lock.json
+++ b/js/node/package-lock.json
@@ -22,7 +22,7 @@
         "jsonc": "^2.0.0",
         "minimist": "^1.2.8",
         "node-addon-api": "^6.0.0",
-        "onnx-proto": "^8.0.1"
+        "protobufjs": "^7.2.4"
       }
     },
     "../common": {
@@ -97,12 +97,6 @@
       "integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==",
       "dev": true
     },
-    "node_modules/@types/long": {
-      "version": "4.0.2",
-      "resolved": "https://registry.npmjs.org/@types/long/-/long-4.0.2.tgz",
-      "integrity": "sha512-MqTGEo5bj5t157U6fA/BiDynNkn0YknVdh48CMPkTSpFTVmvao5UQmm7uEF6xBEo7qIMAlY/JSleYaE6VOdpaA==",
-      "dev": true
-    },
     "node_modules/@types/minimist": {
       "version": "1.2.2",
       "resolved": "https://registry.npmjs.org/@types/minimist/-/minimist-1.2.2.tgz",
@@ -165,9 +159,9 @@
       "dev": true
     },
     "node_modules/axios": {
-      "version": "1.3.4",
-      "resolved": "https://registry.npmjs.org/axios/-/axios-1.3.4.tgz",
-      "integrity": "sha512-toYm+Bsyl6VC5wSkfkbbNB6ROv7KY93PEBBL6xyDczaIHasAiv4wPqQ/c4RjoQzipxRD2W5g21cOqQulZ7rHwQ==",
+      "version": "1.6.1",
+      "resolved": "https://registry.npmjs.org/axios/-/axios-1.6.1.tgz",
+      "integrity": "sha512-vfBmhDpKafglh0EldBEbVuoe7DyAavGSLWhuSm5ZSEKQnHhBf0xAAwybbNH1IkrJNGnS/VG4I5yxig1pCEXE4g==",
       "dev": true,
       "dependencies": {
         "follow-redirects": "^1.15.0",
@@ -528,9 +522,9 @@
       "dev": true
     },
     "node_modules/long": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/long/-/long-4.0.0.tgz",
-      "integrity": "sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA==",
+      "version": "5.2.3",
+      "resolved": "https://registry.npmjs.org/long/-/long-5.2.3.tgz",
+      "integrity": "sha512-lcHwpNoggQTObv5apGNCTdJrO69eHOZMi4BNC+rTLER8iHAqGrUVeLh/irVIM7zTw2bOXA8T6uNPeujwOLg/2Q==",
       "dev": true
     },
     "node_modules/lru-cache": {
@@ -663,15 +657,6 @@
         "node": "^12.13.0 || ^14.15.0 || >=16.0.0"
       }
     },
-    "node_modules/onnx-proto": {
-      "version": "8.0.1",
-      "resolved": "https://registry.npmjs.org/onnx-proto/-/onnx-proto-8.0.1.tgz",
-      "integrity": "sha512-ZpPTqp5dneh2bvavk/QpDsf20JJRArjqTkiMfshGmxR8ocjmfTk80fkW00FwLO7qRtybo9NPugcWQrumHYctLQ==",
-      "dev": true,
-      "dependencies": {
-        "protobufjs": "^6.11.2"
-      }
-    },
     "node_modules/onnxruntime-common": {
       "resolved": "../common",
       "link": true
@@ -690,9 +675,9 @@
       }
     },
     "node_modules/protobufjs": {
-      "version": "6.11.4",
-      "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-6.11.4.tgz",
-      "integrity": "sha512-5kQWPaJHi1WoCpjTGszzQ32PG2F4+wRY6BmAT4Vfw56Q2FZ4YZzK20xUYQH4YkfehY1e6QSICrJquM6xXZNcrw==",
+      "version": "7.2.5",
+      "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.2.5.tgz",
+      "integrity": "sha512-gGXRSXvxQ7UiPgfw8gevrfRWcTlSbOFg+p/N+JVJEK5VhueL2miT6qTymqAmjr1Q5WbOCyJbyrk6JfWKwlFn6A==",
       "dev": true,
       "hasInstallScript": true,
       "dependencies": {
@@ -706,13 +691,11 @@
         "@protobufjs/path": "^1.1.2",
         "@protobufjs/pool": "^1.1.0",
         "@protobufjs/utf8": "^1.1.0",
-        "@types/long": "^4.0.1",
         "@types/node": ">=13.7.0",
-        "long": "^4.0.0"
+        "long": "^5.0.0"
       },
-      "bin": {
-        "pbjs": "bin/pbjs",
-        "pbts": "bin/pbts"
+      "engines": {
+        "node": ">=12.0.0"
       }
     },
     "node_modules/proxy-from-env": {
@@ -789,9 +772,9 @@
       ]
     },
     "node_modules/semver": {
-      "version": "7.3.8",
-      "resolved": "https://registry.npmjs.org/semver/-/semver-7.3.8.tgz",
-      "integrity": "sha512-NB1ctGL5rlHrPJtFDVIVzTyQylMLu9N9VICA6HSFJo8MCGVTMW6gfpicwKmmK/dAjTOrqu5l63JJOpDSrAis3A==",
+      "version": "7.5.4",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-7.5.4.tgz",
+      "integrity": "sha512-1bCSESV6Pv+i21Hvpxp3Dx+pSD8lIPt8uVjRrxAUt/nbswYc+tK6Y2btiULjd4+fnq15PX+nqQDC7Oft7WkwcA==",
       "dev": true,
       "dependencies": {
         "lru-cache": "^6.0.0"
@@ -1070,12 +1053,6 @@
       "integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==",
       "dev": true
     },
-    "@types/long": {
-      "version": "4.0.2",
-      "resolved": "https://registry.npmjs.org/@types/long/-/long-4.0.2.tgz",
-      "integrity": "sha512-MqTGEo5bj5t157U6fA/BiDynNkn0YknVdh48CMPkTSpFTVmvao5UQmm7uEF6xBEo7qIMAlY/JSleYaE6VOdpaA==",
-      "dev": true
-    },
     "@types/minimist": {
       "version": "1.2.2",
       "resolved": "https://registry.npmjs.org/@types/minimist/-/minimist-1.2.2.tgz",
@@ -1126,9 +1103,9 @@
       "dev": true
     },
     "axios": {
-      "version": "1.3.4",
-      "resolved": "https://registry.npmjs.org/axios/-/axios-1.3.4.tgz",
-      "integrity": "sha512-toYm+Bsyl6VC5wSkfkbbNB6ROv7KY93PEBBL6xyDczaIHasAiv4wPqQ/c4RjoQzipxRD2W5g21cOqQulZ7rHwQ==",
+      "version": "1.6.1",
+      "resolved": "https://registry.npmjs.org/axios/-/axios-1.6.1.tgz",
+      "integrity": "sha512-vfBmhDpKafglh0EldBEbVuoe7DyAavGSLWhuSm5ZSEKQnHhBf0xAAwybbNH1IkrJNGnS/VG4I5yxig1pCEXE4g==",
       "dev": true,
       "requires": {
         "follow-redirects": "^1.15.0",
@@ -1413,9 +1390,9 @@
       "dev": true
     },
     "long": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/long/-/long-4.0.0.tgz",
-      "integrity": "sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA==",
+      "version": "5.2.3",
+      "resolved": "https://registry.npmjs.org/long/-/long-5.2.3.tgz",
+      "integrity": "sha512-lcHwpNoggQTObv5apGNCTdJrO69eHOZMi4BNC+rTLER8iHAqGrUVeLh/irVIM7zTw2bOXA8T6uNPeujwOLg/2Q==",
       "dev": true
     },
     "lru-cache": {
@@ -1523,15 +1500,6 @@
         "set-blocking": "^2.0.0"
       }
     },
-    "onnx-proto": {
-      "version": "8.0.1",
-      "resolved": "https://registry.npmjs.org/onnx-proto/-/onnx-proto-8.0.1.tgz",
-      "integrity": "sha512-ZpPTqp5dneh2bvavk/QpDsf20JJRArjqTkiMfshGmxR8ocjmfTk80fkW00FwLO7qRtybo9NPugcWQrumHYctLQ==",
-      "dev": true,
-      "requires": {
-        "protobufjs": "^6.11.2"
-      }
-    },
     "onnxruntime-common": {
       "version": "file:../common",
       "requires": {
@@ -1549,9 +1517,9 @@
       }
     },
     "protobufjs": {
-      "version": "6.11.4",
-      "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-6.11.4.tgz",
-      "integrity": "sha512-5kQWPaJHi1WoCpjTGszzQ32PG2F4+wRY6BmAT4Vfw56Q2FZ4YZzK20xUYQH4YkfehY1e6QSICrJquM6xXZNcrw==",
+      "version": "7.2.5",
+      "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.2.5.tgz",
+      "integrity": "sha512-gGXRSXvxQ7UiPgfw8gevrfRWcTlSbOFg+p/N+JVJEK5VhueL2miT6qTymqAmjr1Q5WbOCyJbyrk6JfWKwlFn6A==",
       "dev": true,
       "requires": {
         "@protobufjs/aspromise": "^1.1.2",
@@ -1564,9 +1532,8 @@
         "@protobufjs/path": "^1.1.2",
         "@protobufjs/pool": "^1.1.0",
         "@protobufjs/utf8": "^1.1.0",
-        "@types/long": "^4.0.1",
         "@types/node": ">=13.7.0",
-        "long": "^4.0.0"
+        "long": "^5.0.0"
       }
     },
     "proxy-from-env": {
@@ -1619,9 +1586,9 @@
       "dev": true
     },
     "semver": {
-      "version": "7.3.8",
-      "resolved": "https://registry.npmjs.org/semver/-/semver-7.3.8.tgz",
-      "integrity": "sha512-NB1ctGL5rlHrPJtFDVIVzTyQylMLu9N9VICA6HSFJo8MCGVTMW6gfpicwKmmK/dAjTOrqu5l63JJOpDSrAis3A==",
+      "version": "7.5.4",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-7.5.4.tgz",
+      "integrity": "sha512-1bCSESV6Pv+i21Hvpxp3Dx+pSD8lIPt8uVjRrxAUt/nbswYc+tK6Y2btiULjd4+fnq15PX+nqQDC7Oft7WkwcA==",
       "dev": true,
       "requires": {
         "lru-cache": "^6.0.0"
diff --git a/js/node/package.json b/js/node/package.json
index 0f8f0e9d2260c..8e591d8f46b9d 100644
--- a/js/node/package.json
+++ b/js/node/package.json
@@ -19,6 +19,7 @@
   },
   "scripts": {
     "buildr": "tsc && node ./script/build --config=RelWithDebInfo",
+    "preprepare": "node -e \"require('node:fs').copyFileSync('./node_modules/long/index.d.ts', './node_modules/long/umd/index.d.ts')\"",
     "prepare": "tsc --build script test .",
     "rebuild": "tsc && node ./script/build --rebuild",
     "rebuildd": "tsc && node ./script/build --rebuild --config=Debug",
@@ -39,7 +40,7 @@
     "jsonc": "^2.0.0",
     "minimist": "^1.2.8",
     "node-addon-api": "^6.0.0",
-    "onnx-proto": "^8.0.1"
+    "protobufjs": "^7.2.4"
   },
   "main": "dist/index.js",
   "os": [
diff --git a/js/node/src/session_options_helper.cc b/js/node/src/session_options_helper.cc
index 70e63da7cefa7..a0de832d87fe5 100644
--- a/js/node/src/session_options_helper.cc
+++ b/js/node/src/session_options_helper.cc
@@ -16,7 +16,6 @@
 #include "core/providers/dml/dml_provider_factory.h"
 #endif
 #ifdef USE_TENSORRT
-#include "core/providers/tensorrt/tensorrt_provider_factory.h"
 #include "core/providers/tensorrt/tensorrt_provider_options.h"
 #endif
 #ifdef USE_COREML
diff --git a/js/node/test/ort-schema/protobuf/.gitignore b/js/node/test/ort-schema/protobuf/.gitignore
new file mode 100644
index 0000000000000..092bb6c1c9fb4
--- /dev/null
+++ b/js/node/test/ort-schema/protobuf/.gitignore
@@ -0,0 +1,2 @@
+!onnx.js
+!onnx.d.ts
diff --git a/js/node/test/ort-schema/protobuf/README.md b/js/node/test/ort-schema/protobuf/README.md
new file mode 100644
index 0000000000000..f5f52c602f1ad
--- /dev/null
+++ b/js/node/test/ort-schema/protobuf/README.md
@@ -0,0 +1,21 @@
+# ONNX protobuf
+
+This directory contains generated protobuf definition for onnx:
+
+- onnx.js
+- onnx.d.ts
+
+These files are generated from [a fork of onnx-proto](https://github.com/fs-eire/onnx-proto/tree/update-v9).
+
+The ONNX protobuf uses protobufjs@7.2.4, which depends on long@5.2.3, the version contains 2 bugs:
+
+- type export does not work with commonjs. described in https://github.com/dcodeIO/long.js/pull/124. added a "postinstall" script to fix.
+- in the generated typescript declaration file 'onnx.d.ts', the following line:
+  ```ts
+  import Long = require("long");
+  ```
+  need to be replaced to fix type import error:
+  ```ts
+  import Long from "long";
+  ```
+  this replacement is done and code format is also applied to file 'onnx.d.ts'.
diff --git a/js/node/test/ort-schema/protobuf/onnx.d.ts b/js/node/test/ort-schema/protobuf/onnx.d.ts
new file mode 100644
index 0000000000000..c60264dca2a8d
--- /dev/null
+++ b/js/node/test/ort-schema/protobuf/onnx.d.ts
@@ -0,0 +1,2627 @@
+import Long from 'long';
+import * as $protobuf from 'protobufjs';
+
+/** Namespace onnx. */
+export namespace onnx {
+
+  /** Version enum. */
+  enum Version {
+    _START_VERSION = 0,
+    IR_VERSION_2017_10_10 = 1,
+    IR_VERSION_2017_10_30 = 2,
+    IR_VERSION_2017_11_3 = 3,
+    IR_VERSION_2019_1_22 = 4,
+    IR_VERSION_2019_3_18 = 5,
+    IR_VERSION_2019_9_19 = 6,
+    IR_VERSION_2020_5_8 = 7,
+    IR_VERSION_2021_7_30 = 8,
+    IR_VERSION = 9
+  }
+
+  /** Properties of an AttributeProto. */
+  interface IAttributeProto {
+    /** AttributeProto name */
+    name?: (string|null);
+
+    /** AttributeProto refAttrName */
+    refAttrName?: (string|null);
+
+    /** AttributeProto docString */
+    docString?: (string|null);
+
+    /** AttributeProto type */
+    type?: (onnx.AttributeProto.AttributeType|null);
+
+    /** AttributeProto f */
+    f?: (number|null);
+
+    /** AttributeProto i */
+    i?: (number|Long|null);
+
+    /** AttributeProto s */
+    s?: (Uint8Array|null);
+
+    /** AttributeProto t */
+    t?: (onnx.ITensorProto|null);
+
+    /** AttributeProto g */
+    g?: (onnx.IGraphProto|null);
+
+    /** AttributeProto sparseTensor */
+    sparseTensor?: (onnx.ISparseTensorProto|null);
+
+    /** AttributeProto tp */
+    tp?: (onnx.ITypeProto|null);
+
+    /** AttributeProto floats */
+    floats?: (number[]|null);
+
+    /** AttributeProto ints */
+    ints?: ((number | Long)[]|null);
+
+    /** AttributeProto strings */
+    strings?: (Uint8Array[]|null);
+
+    /** AttributeProto tensors */
+    tensors?: (onnx.ITensorProto[]|null);
+
+    /** AttributeProto graphs */
+    graphs?: (onnx.IGraphProto[]|null);
+
+    /** AttributeProto sparseTensors */
+    sparseTensors?: (onnx.ISparseTensorProto[]|null);
+
+    /** AttributeProto typeProtos */
+    typeProtos?: (onnx.ITypeProto[]|null);
+  }
+
+  /** Represents an AttributeProto. */
+  class AttributeProto implements IAttributeProto {
+    /**
+     * Constructs a new AttributeProto.
+     * @param [properties] Properties to set
+     */
+    constructor(properties?: onnx.IAttributeProto);
+
+    /** AttributeProto name. */
+    public name: string;
+
+    /** AttributeProto refAttrName. */
+    public refAttrName: string;
+
+    /** AttributeProto docString. */
+    public docString: string;
+
+    /** AttributeProto type. */
+    public type: onnx.AttributeProto.AttributeType;
+
+    /** AttributeProto f. */
+    public f: number;
+
+    /** AttributeProto i. */
+    public i: (number|Long);
+
+    /** AttributeProto s. */
+    public s: Uint8Array;
+
+    /** AttributeProto t. */
+    public t?: (onnx.ITensorProto|null);
+
+    /** AttributeProto g. */
+    public g?: (onnx.IGraphProto|null);
+
+    /** AttributeProto sparseTensor. */
+    public sparseTensor?: (onnx.ISparseTensorProto|null);
+
+    /** AttributeProto tp. */
+    public tp?: (onnx.ITypeProto|null);
+
+    /** AttributeProto floats. */
+    public floats: number[];
+
+    /** AttributeProto ints. */
+    public ints: (number|Long)[];
+
+    /** AttributeProto strings. */
+    public strings: Uint8Array[];
+
+    /** AttributeProto tensors. */
+    public tensors: onnx.ITensorProto[];
+
+    /** AttributeProto graphs. */
+    public graphs: onnx.IGraphProto[];
+
+    /** AttributeProto sparseTensors. */
+    public sparseTensors: onnx.ISparseTensorProto[];
+
+    /** AttributeProto typeProtos. */
+    public typeProtos: onnx.ITypeProto[];
+
+    /**
+     * Creates a new AttributeProto instance using the specified properties.
+     * @param [properties] Properties to set
+     * @returns AttributeProto instance
+     */
+    public static create(properties?: onnx.IAttributeProto): onnx.AttributeProto;
+
+    /**
+     * Encodes the specified AttributeProto message. Does not implicitly {@link onnx.AttributeProto.verify|verify}
+     * messages.
+     * @param message AttributeProto message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encode(message: onnx.IAttributeProto, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Encodes the specified AttributeProto message, length delimited. Does not implicitly {@link
+     * onnx.AttributeProto.verify|verify} messages.
+     * @param message AttributeProto message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encodeDelimited(message: onnx.IAttributeProto, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Decodes an AttributeProto message from the specified reader or buffer.
+     * @param reader Reader or buffer to decode from
+     * @param [length] Message length if known beforehand
+     * @returns AttributeProto
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decode(reader: ($protobuf.Reader|Uint8Array), length?: number): onnx.AttributeProto;
+
+    /**
+     * Decodes an AttributeProto message from the specified reader or buffer, length delimited.
+     * @param reader Reader or buffer to decode from
+     * @returns AttributeProto
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decodeDelimited(reader: ($protobuf.Reader|Uint8Array)): onnx.AttributeProto;
+
+    /**
+     * Verifies an AttributeProto message.
+     * @param message Plain object to verify
+     * @returns `null` if valid, otherwise the reason why it is not
+     */
+    public static verify(message: {[k: string]: any}): (string|null);
+
+    /**
+     * Creates an AttributeProto message from a plain object. Also converts values to their respective internal types.
+     * @param object Plain object
+     * @returns AttributeProto
+     */
+    public static fromObject(object: {[k: string]: any}): onnx.AttributeProto;
+
+    /**
+     * Creates a plain object from an AttributeProto message. Also converts values to other types if specified.
+     * @param message AttributeProto
+     * @param [options] Conversion options
+     * @returns Plain object
+     */
+    public static toObject(message: onnx.AttributeProto, options?: $protobuf.IConversionOptions): {[k: string]: any};
+
+    /**
+     * Converts this AttributeProto to JSON.
+     * @returns JSON object
+     */
+    public toJSON(): {[k: string]: any};
+
+    /**
+     * Gets the default type url for AttributeProto
+     * @param [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+     * @returns The default type url
+     */
+    public static getTypeUrl(typeUrlPrefix?: string): string;
+  }
+
+  namespace AttributeProto {
+
+    /** AttributeType enum. */
+    enum AttributeType {
+      UNDEFINED = 0,
+      FLOAT = 1,
+      INT = 2,
+      STRING = 3,
+      TENSOR = 4,
+      GRAPH = 5,
+      SPARSE_TENSOR = 11,
+      TYPE_PROTO = 13,
+      FLOATS = 6,
+      INTS = 7,
+      STRINGS = 8,
+      TENSORS = 9,
+      GRAPHS = 10,
+      SPARSE_TENSORS = 12,
+      TYPE_PROTOS = 14
+    }
+  }
+
+  /** Properties of a ValueInfoProto. */
+  interface IValueInfoProto {
+    /** ValueInfoProto name */
+    name?: (string|null);
+
+    /** ValueInfoProto type */
+    type?: (onnx.ITypeProto|null);
+
+    /** ValueInfoProto docString */
+    docString?: (string|null);
+  }
+
+  /** Represents a ValueInfoProto. */
+  class ValueInfoProto implements IValueInfoProto {
+    /**
+     * Constructs a new ValueInfoProto.
+     * @param [properties] Properties to set
+     */
+    constructor(properties?: onnx.IValueInfoProto);
+
+    /** ValueInfoProto name. */
+    public name: string;
+
+    /** ValueInfoProto type. */
+    public type?: (onnx.ITypeProto|null);
+
+    /** ValueInfoProto docString. */
+    public docString: string;
+
+    /**
+     * Creates a new ValueInfoProto instance using the specified properties.
+     * @param [properties] Properties to set
+     * @returns ValueInfoProto instance
+     */
+    public static create(properties?: onnx.IValueInfoProto): onnx.ValueInfoProto;
+
+    /**
+     * Encodes the specified ValueInfoProto message. Does not implicitly {@link onnx.ValueInfoProto.verify|verify}
+     * messages.
+     * @param message ValueInfoProto message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encode(message: onnx.IValueInfoProto, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Encodes the specified ValueInfoProto message, length delimited. Does not implicitly {@link
+     * onnx.ValueInfoProto.verify|verify} messages.
+     * @param message ValueInfoProto message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encodeDelimited(message: onnx.IValueInfoProto, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Decodes a ValueInfoProto message from the specified reader or buffer.
+     * @param reader Reader or buffer to decode from
+     * @param [length] Message length if known beforehand
+     * @returns ValueInfoProto
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decode(reader: ($protobuf.Reader|Uint8Array), length?: number): onnx.ValueInfoProto;
+
+    /**
+     * Decodes a ValueInfoProto message from the specified reader or buffer, length delimited.
+     * @param reader Reader or buffer to decode from
+     * @returns ValueInfoProto
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decodeDelimited(reader: ($protobuf.Reader|Uint8Array)): onnx.ValueInfoProto;
+
+    /**
+     * Verifies a ValueInfoProto message.
+     * @param message Plain object to verify
+     * @returns `null` if valid, otherwise the reason why it is not
+     */
+    public static verify(message: {[k: string]: any}): (string|null);
+
+    /**
+     * Creates a ValueInfoProto message from a plain object. Also converts values to their respective internal types.
+     * @param object Plain object
+     * @returns ValueInfoProto
+     */
+    public static fromObject(object: {[k: string]: any}): onnx.ValueInfoProto;
+
+    /**
+     * Creates a plain object from a ValueInfoProto message. Also converts values to other types if specified.
+     * @param message ValueInfoProto
+     * @param [options] Conversion options
+     * @returns Plain object
+     */
+    public static toObject(message: onnx.ValueInfoProto, options?: $protobuf.IConversionOptions): {[k: string]: any};
+
+    /**
+     * Converts this ValueInfoProto to JSON.
+     * @returns JSON object
+     */
+    public toJSON(): {[k: string]: any};
+
+    /**
+     * Gets the default type url for ValueInfoProto
+     * @param [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+     * @returns The default type url
+     */
+    public static getTypeUrl(typeUrlPrefix?: string): string;
+  }
+
+  /** Properties of a NodeProto. */
+  interface INodeProto {
+    /** NodeProto input */
+    input?: (string[]|null);
+
+    /** NodeProto output */
+    output?: (string[]|null);
+
+    /** NodeProto name */
+    name?: (string|null);
+
+    /** NodeProto opType */
+    opType?: (string|null);
+
+    /** NodeProto domain */
+    domain?: (string|null);
+
+    /** NodeProto attribute */
+    attribute?: (onnx.IAttributeProto[]|null);
+
+    /** NodeProto docString */
+    docString?: (string|null);
+  }
+
+  /** Represents a NodeProto. */
+  class NodeProto implements INodeProto {
+    /**
+     * Constructs a new NodeProto.
+     * @param [properties] Properties to set
+     */
+    constructor(properties?: onnx.INodeProto);
+
+    /** NodeProto input. */
+    public input: string[];
+
+    /** NodeProto output. */
+    public output: string[];
+
+    /** NodeProto name. */
+    public name: string;
+
+    /** NodeProto opType. */
+    public opType: string;
+
+    /** NodeProto domain. */
+    public domain: string;
+
+    /** NodeProto attribute. */
+    public attribute: onnx.IAttributeProto[];
+
+    /** NodeProto docString. */
+    public docString: string;
+
+    /**
+     * Creates a new NodeProto instance using the specified properties.
+     * @param [properties] Properties to set
+     * @returns NodeProto instance
+     */
+    public static create(properties?: onnx.INodeProto): onnx.NodeProto;
+
+    /**
+     * Encodes the specified NodeProto message. Does not implicitly {@link onnx.NodeProto.verify|verify} messages.
+     * @param message NodeProto message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encode(message: onnx.INodeProto, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Encodes the specified NodeProto message, length delimited. Does not implicitly {@link
+     * onnx.NodeProto.verify|verify} messages.
+     * @param message NodeProto message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encodeDelimited(message: onnx.INodeProto, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Decodes a NodeProto message from the specified reader or buffer.
+     * @param reader Reader or buffer to decode from
+     * @param [length] Message length if known beforehand
+     * @returns NodeProto
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decode(reader: ($protobuf.Reader|Uint8Array), length?: number): onnx.NodeProto;
+
+    /**
+     * Decodes a NodeProto message from the specified reader or buffer, length delimited.
+     * @param reader Reader or buffer to decode from
+     * @returns NodeProto
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decodeDelimited(reader: ($protobuf.Reader|Uint8Array)): onnx.NodeProto;
+
+    /**
+     * Verifies a NodeProto message.
+     * @param message Plain object to verify
+     * @returns `null` if valid, otherwise the reason why it is not
+     */
+    public static verify(message: {[k: string]: any}): (string|null);
+
+    /**
+     * Creates a NodeProto message from a plain object. Also converts values to their respective internal types.
+     * @param object Plain object
+     * @returns NodeProto
+     */
+    public static fromObject(object: {[k: string]: any}): onnx.NodeProto;
+
+    /**
+     * Creates a plain object from a NodeProto message. Also converts values to other types if specified.
+     * @param message NodeProto
+     * @param [options] Conversion options
+     * @returns Plain object
+     */
+    public static toObject(message: onnx.NodeProto, options?: $protobuf.IConversionOptions): {[k: string]: any};
+
+    /**
+     * Converts this NodeProto to JSON.
+     * @returns JSON object
+     */
+    public toJSON(): {[k: string]: any};
+
+    /**
+     * Gets the default type url for NodeProto
+     * @param [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+     * @returns The default type url
+     */
+    public static getTypeUrl(typeUrlPrefix?: string): string;
+  }
+
+  /** Properties of a TrainingInfoProto. */
+  interface ITrainingInfoProto {
+    /** TrainingInfoProto initialization */
+    initialization?: (onnx.IGraphProto|null);
+
+    /** TrainingInfoProto algorithm */
+    algorithm?: (onnx.IGraphProto|null);
+
+    /** TrainingInfoProto initializationBinding */
+    initializationBinding?: (onnx.IStringStringEntryProto[]|null);
+
+    /** TrainingInfoProto updateBinding */
+    updateBinding?: (onnx.IStringStringEntryProto[]|null);
+  }
+
+  /** Represents a TrainingInfoProto. */
+  class TrainingInfoProto implements ITrainingInfoProto {
+    /**
+     * Constructs a new TrainingInfoProto.
+     * @param [properties] Properties to set
+     */
+    constructor(properties?: onnx.ITrainingInfoProto);
+
+    /** TrainingInfoProto initialization. */
+    public initialization?: (onnx.IGraphProto|null);
+
+    /** TrainingInfoProto algorithm. */
+    public algorithm?: (onnx.IGraphProto|null);
+
+    /** TrainingInfoProto initializationBinding. */
+    public initializationBinding: onnx.IStringStringEntryProto[];
+
+    /** TrainingInfoProto updateBinding. */
+    public updateBinding: onnx.IStringStringEntryProto[];
+
+    /**
+     * Creates a new TrainingInfoProto instance using the specified properties.
+     * @param [properties] Properties to set
+     * @returns TrainingInfoProto instance
+     */
+    public static create(properties?: onnx.ITrainingInfoProto): onnx.TrainingInfoProto;
+
+    /**
+     * Encodes the specified TrainingInfoProto message. Does not implicitly {@link onnx.TrainingInfoProto.verify|verify}
+     * messages.
+     * @param message TrainingInfoProto message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encode(message: onnx.ITrainingInfoProto, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Encodes the specified TrainingInfoProto message, length delimited. Does not implicitly {@link
+     * onnx.TrainingInfoProto.verify|verify} messages.
+     * @param message TrainingInfoProto message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encodeDelimited(message: onnx.ITrainingInfoProto, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Decodes a TrainingInfoProto message from the specified reader or buffer.
+     * @param reader Reader or buffer to decode from
+     * @param [length] Message length if known beforehand
+     * @returns TrainingInfoProto
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decode(reader: ($protobuf.Reader|Uint8Array), length?: number): onnx.TrainingInfoProto;
+
+    /**
+     * Decodes a TrainingInfoProto message from the specified reader or buffer, length delimited.
+     * @param reader Reader or buffer to decode from
+     * @returns TrainingInfoProto
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decodeDelimited(reader: ($protobuf.Reader|Uint8Array)): onnx.TrainingInfoProto;
+
+    /**
+     * Verifies a TrainingInfoProto message.
+     * @param message Plain object to verify
+     * @returns `null` if valid, otherwise the reason why it is not
+     */
+    public static verify(message: {[k: string]: any}): (string|null);
+
+    /**
+     * Creates a TrainingInfoProto message from a plain object. Also converts values to their respective internal types.
+     * @param object Plain object
+     * @returns TrainingInfoProto
+     */
+    public static fromObject(object: {[k: string]: any}): onnx.TrainingInfoProto;
+
+    /**
+     * Creates a plain object from a TrainingInfoProto message. Also converts values to other types if specified.
+     * @param message TrainingInfoProto
+     * @param [options] Conversion options
+     * @returns Plain object
+     */
+    public static toObject(message: onnx.TrainingInfoProto, options?: $protobuf.IConversionOptions): {[k: string]: any};
+
+    /**
+     * Converts this TrainingInfoProto to JSON.
+     * @returns JSON object
+     */
+    public toJSON(): {[k: string]: any};
+
+    /**
+     * Gets the default type url for TrainingInfoProto
+     * @param [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+     * @returns The default type url
+     */
+    public static getTypeUrl(typeUrlPrefix?: string): string;
+  }
+
+  /** Properties of a ModelProto. */
+  interface IModelProto {
+    /** ModelProto irVersion */
+    irVersion?: (number|Long|null);
+
+    /** ModelProto opsetImport */
+    opsetImport?: (onnx.IOperatorSetIdProto[]|null);
+
+    /** ModelProto producerName */
+    producerName?: (string|null);
+
+    /** ModelProto producerVersion */
+    producerVersion?: (string|null);
+
+    /** ModelProto domain */
+    domain?: (string|null);
+
+    /** ModelProto modelVersion */
+    modelVersion?: (number|Long|null);
+
+    /** ModelProto docString */
+    docString?: (string|null);
+
+    /** ModelProto graph */
+    graph?: (onnx.IGraphProto|null);
+
+    /** ModelProto metadataProps */
+    metadataProps?: (onnx.IStringStringEntryProto[]|null);
+
+    /** ModelProto trainingInfo */
+    trainingInfo?: (onnx.ITrainingInfoProto[]|null);
+
+    /** ModelProto functions */
+    functions?: (onnx.IFunctionProto[]|null);
+  }
+
+  /** Represents a ModelProto. */
+  class ModelProto implements IModelProto {
+    /**
+     * Constructs a new ModelProto.
+     * @param [properties] Properties to set
+     */
+    constructor(properties?: onnx.IModelProto);
+
+    /** ModelProto irVersion. */
+    public irVersion: (number|Long);
+
+    /** ModelProto opsetImport. */
+    public opsetImport: onnx.IOperatorSetIdProto[];
+
+    /** ModelProto producerName. */
+    public producerName: string;
+
+    /** ModelProto producerVersion. */
+    public producerVersion: string;
+
+    /** ModelProto domain. */
+    public domain: string;
+
+    /** ModelProto modelVersion. */
+    public modelVersion: (number|Long);
+
+    /** ModelProto docString. */
+    public docString: string;
+
+    /** ModelProto graph. */
+    public graph?: (onnx.IGraphProto|null);
+
+    /** ModelProto metadataProps. */
+    public metadataProps: onnx.IStringStringEntryProto[];
+
+    /** ModelProto trainingInfo. */
+    public trainingInfo: onnx.ITrainingInfoProto[];
+
+    /** ModelProto functions. */
+    public functions: onnx.IFunctionProto[];
+
+    /**
+     * Creates a new ModelProto instance using the specified properties.
+     * @param [properties] Properties to set
+     * @returns ModelProto instance
+     */
+    public static create(properties?: onnx.IModelProto): onnx.ModelProto;
+
+    /**
+     * Encodes the specified ModelProto message. Does not implicitly {@link onnx.ModelProto.verify|verify} messages.
+     * @param message ModelProto message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encode(message: onnx.IModelProto, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Encodes the specified ModelProto message, length delimited. Does not implicitly {@link
+     * onnx.ModelProto.verify|verify} messages.
+     * @param message ModelProto message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encodeDelimited(message: onnx.IModelProto, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Decodes a ModelProto message from the specified reader or buffer.
+     * @param reader Reader or buffer to decode from
+     * @param [length] Message length if known beforehand
+     * @returns ModelProto
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decode(reader: ($protobuf.Reader|Uint8Array), length?: number): onnx.ModelProto;
+
+    /**
+     * Decodes a ModelProto message from the specified reader or buffer, length delimited.
+     * @param reader Reader or buffer to decode from
+     * @returns ModelProto
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decodeDelimited(reader: ($protobuf.Reader|Uint8Array)): onnx.ModelProto;
+
+    /**
+     * Verifies a ModelProto message.
+     * @param message Plain object to verify
+     * @returns `null` if valid, otherwise the reason why it is not
+     */
+    public static verify(message: {[k: string]: any}): (string|null);
+
+    /**
+     * Creates a ModelProto message from a plain object. Also converts values to their respective internal types.
+     * @param object Plain object
+     * @returns ModelProto
+     */
+    public static fromObject(object: {[k: string]: any}): onnx.ModelProto;
+
+    /**
+     * Creates a plain object from a ModelProto message. Also converts values to other types if specified.
+     * @param message ModelProto
+     * @param [options] Conversion options
+     * @returns Plain object
+     */
+    public static toObject(message: onnx.ModelProto, options?: $protobuf.IConversionOptions): {[k: string]: any};
+
+    /**
+     * Converts this ModelProto to JSON.
+     * @returns JSON object
+     */
+    public toJSON(): {[k: string]: any};
+
+    /**
+     * Gets the default type url for ModelProto
+     * @param [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+     * @returns The default type url
+     */
+    public static getTypeUrl(typeUrlPrefix?: string): string;
+  }
+
+  /** Properties of a StringStringEntryProto. */
+  interface IStringStringEntryProto {
+    /** StringStringEntryProto key */
+    key?: (string|null);
+
+    /** StringStringEntryProto value */
+    value?: (string|null);
+  }
+
+  /** Represents a StringStringEntryProto. */
+  class StringStringEntryProto implements IStringStringEntryProto {
+    /**
+     * Constructs a new StringStringEntryProto.
+     * @param [properties] Properties to set
+     */
+    constructor(properties?: onnx.IStringStringEntryProto);
+
+    /** StringStringEntryProto key. */
+    public key: string;
+
+    /** StringStringEntryProto value. */
+    public value: string;
+
+    /**
+     * Creates a new StringStringEntryProto instance using the specified properties.
+     * @param [properties] Properties to set
+     * @returns StringStringEntryProto instance
+     */
+    public static create(properties?: onnx.IStringStringEntryProto): onnx.StringStringEntryProto;
+
+    /**
+     * Encodes the specified StringStringEntryProto message. Does not implicitly {@link
+     * onnx.StringStringEntryProto.verify|verify} messages.
+     * @param message StringStringEntryProto message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encode(message: onnx.IStringStringEntryProto, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Encodes the specified StringStringEntryProto message, length delimited. Does not implicitly {@link
+     * onnx.StringStringEntryProto.verify|verify} messages.
+     * @param message StringStringEntryProto message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encodeDelimited(message: onnx.IStringStringEntryProto, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Decodes a StringStringEntryProto message from the specified reader or buffer.
+     * @param reader Reader or buffer to decode from
+     * @param [length] Message length if known beforehand
+     * @returns StringStringEntryProto
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decode(reader: ($protobuf.Reader|Uint8Array), length?: number): onnx.StringStringEntryProto;
+
+    /**
+     * Decodes a StringStringEntryProto message from the specified reader or buffer, length delimited.
+     * @param reader Reader or buffer to decode from
+     * @returns StringStringEntryProto
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decodeDelimited(reader: ($protobuf.Reader|Uint8Array)): onnx.StringStringEntryProto;
+
+    /**
+     * Verifies a StringStringEntryProto message.
+     * @param message Plain object to verify
+     * @returns `null` if valid, otherwise the reason why it is not
+     */
+    public static verify(message: {[k: string]: any}): (string|null);
+
+    /**
+     * Creates a StringStringEntryProto message from a plain object. Also converts values to their respective internal
+     * types.
+     * @param object Plain object
+     * @returns StringStringEntryProto
+     */
+    public static fromObject(object: {[k: string]: any}): onnx.StringStringEntryProto;
+
+    /**
+     * Creates a plain object from a StringStringEntryProto message. Also converts values to other types if specified.
+     * @param message StringStringEntryProto
+     * @param [options] Conversion options
+     * @returns Plain object
+     */
+    public static toObject(message: onnx.StringStringEntryProto, options?: $protobuf.IConversionOptions):
+        {[k: string]: any};
+
+    /**
+     * Converts this StringStringEntryProto to JSON.
+     * @returns JSON object
+     */
+    public toJSON(): {[k: string]: any};
+
+    /**
+     * Gets the default type url for StringStringEntryProto
+     * @param [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+     * @returns The default type url
+     */
+    public static getTypeUrl(typeUrlPrefix?: string): string;
+  }
+
+  /** Properties of a TensorAnnotation. */
+  interface ITensorAnnotation {
+    /** TensorAnnotation tensorName */
+    tensorName?: (string|null);
+
+    /** TensorAnnotation quantParameterTensorNames */
+    quantParameterTensorNames?: (onnx.IStringStringEntryProto[]|null);
+  }
+
+  /** Represents a TensorAnnotation. */
+  class TensorAnnotation implements ITensorAnnotation {
+    /**
+     * Constructs a new TensorAnnotation.
+     * @param [properties] Properties to set
+     */
+    constructor(properties?: onnx.ITensorAnnotation);
+
+    /** TensorAnnotation tensorName. */
+    public tensorName: string;
+
+    /** TensorAnnotation quantParameterTensorNames. */
+    public quantParameterTensorNames: onnx.IStringStringEntryProto[];
+
+    /**
+     * Creates a new TensorAnnotation instance using the specified properties.
+     * @param [properties] Properties to set
+     * @returns TensorAnnotation instance
+     */
+    public static create(properties?: onnx.ITensorAnnotation): onnx.TensorAnnotation;
+
+    /**
+     * Encodes the specified TensorAnnotation message. Does not implicitly {@link onnx.TensorAnnotation.verify|verify}
+     * messages.
+     * @param message TensorAnnotation message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encode(message: onnx.ITensorAnnotation, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Encodes the specified TensorAnnotation message, length delimited. Does not implicitly {@link
+     * onnx.TensorAnnotation.verify|verify} messages.
+     * @param message TensorAnnotation message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encodeDelimited(message: onnx.ITensorAnnotation, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Decodes a TensorAnnotation message from the specified reader or buffer.
+     * @param reader Reader or buffer to decode from
+     * @param [length] Message length if known beforehand
+     * @returns TensorAnnotation
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decode(reader: ($protobuf.Reader|Uint8Array), length?: number): onnx.TensorAnnotation;
+
+    /**
+     * Decodes a TensorAnnotation message from the specified reader or buffer, length delimited.
+     * @param reader Reader or buffer to decode from
+     * @returns TensorAnnotation
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decodeDelimited(reader: ($protobuf.Reader|Uint8Array)): onnx.TensorAnnotation;
+
+    /**
+     * Verifies a TensorAnnotation message.
+     * @param message Plain object to verify
+     * @returns `null` if valid, otherwise the reason why it is not
+     */
+    public static verify(message: {[k: string]: any}): (string|null);
+
+    /**
+     * Creates a TensorAnnotation message from a plain object. Also converts values to their respective internal types.
+     * @param object Plain object
+     * @returns TensorAnnotation
+     */
+    public static fromObject(object: {[k: string]: any}): onnx.TensorAnnotation;
+
+    /**
+     * Creates a plain object from a TensorAnnotation message. Also converts values to other types if specified.
+     * @param message TensorAnnotation
+     * @param [options] Conversion options
+     * @returns Plain object
+     */
+    public static toObject(message: onnx.TensorAnnotation, options?: $protobuf.IConversionOptions): {[k: string]: any};
+
+    /**
+     * Converts this TensorAnnotation to JSON.
+     * @returns JSON object
+     */
+    public toJSON(): {[k: string]: any};
+
+    /**
+     * Gets the default type url for TensorAnnotation
+     * @param [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+     * @returns The default type url
+     */
+    public static getTypeUrl(typeUrlPrefix?: string): string;
+  }
+
+  /** Properties of a GraphProto. */
+  interface IGraphProto {
+    /** GraphProto node */
+    node?: (onnx.INodeProto[]|null);
+
+    /** GraphProto name */
+    name?: (string|null);
+
+    /** GraphProto initializer */
+    initializer?: (onnx.ITensorProto[]|null);
+
+    /** GraphProto sparseInitializer */
+    sparseInitializer?: (onnx.ISparseTensorProto[]|null);
+
+    /** GraphProto docString */
+    docString?: (string|null);
+
+    /** GraphProto input */
+    input?: (onnx.IValueInfoProto[]|null);
+
+    /** GraphProto output */
+    output?: (onnx.IValueInfoProto[]|null);
+
+    /** GraphProto valueInfo */
+    valueInfo?: (onnx.IValueInfoProto[]|null);
+
+    /** GraphProto quantizationAnnotation */
+    quantizationAnnotation?: (onnx.ITensorAnnotation[]|null);
+  }
+
+  /** Represents a GraphProto. */
+  class GraphProto implements IGraphProto {
+    /**
+     * Constructs a new GraphProto.
+     * @param [properties] Properties to set
+     */
+    constructor(properties?: onnx.IGraphProto);
+
+    /** GraphProto node. */
+    public node: onnx.INodeProto[];
+
+    /** GraphProto name. */
+    public name: string;
+
+    /** GraphProto initializer. */
+    public initializer: onnx.ITensorProto[];
+
+    /** GraphProto sparseInitializer. */
+    public sparseInitializer: onnx.ISparseTensorProto[];
+
+    /** GraphProto docString. */
+    public docString: string;
+
+    /** GraphProto input. */
+    public input: onnx.IValueInfoProto[];
+
+    /** GraphProto output. */
+    public output: onnx.IValueInfoProto[];
+
+    /** GraphProto valueInfo. */
+    public valueInfo: onnx.IValueInfoProto[];
+
+    /** GraphProto quantizationAnnotation. */
+    public quantizationAnnotation: onnx.ITensorAnnotation[];
+
+    /**
+     * Creates a new GraphProto instance using the specified properties.
+     * @param [properties] Properties to set
+     * @returns GraphProto instance
+     */
+    public static create(properties?: onnx.IGraphProto): onnx.GraphProto;
+
+    /**
+     * Encodes the specified GraphProto message. Does not implicitly {@link onnx.GraphProto.verify|verify} messages.
+     * @param message GraphProto message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encode(message: onnx.IGraphProto, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Encodes the specified GraphProto message, length delimited. Does not implicitly {@link
+     * onnx.GraphProto.verify|verify} messages.
+     * @param message GraphProto message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encodeDelimited(message: onnx.IGraphProto, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Decodes a GraphProto message from the specified reader or buffer.
+     * @param reader Reader or buffer to decode from
+     * @param [length] Message length if known beforehand
+     * @returns GraphProto
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decode(reader: ($protobuf.Reader|Uint8Array), length?: number): onnx.GraphProto;
+
+    /**
+     * Decodes a GraphProto message from the specified reader or buffer, length delimited.
+     * @param reader Reader or buffer to decode from
+     * @returns GraphProto
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decodeDelimited(reader: ($protobuf.Reader|Uint8Array)): onnx.GraphProto;
+
+    /**
+     * Verifies a GraphProto message.
+     * @param message Plain object to verify
+     * @returns `null` if valid, otherwise the reason why it is not
+     */
+    public static verify(message: {[k: string]: any}): (string|null);
+
+    /**
+     * Creates a GraphProto message from a plain object. Also converts values to their respective internal types.
+     * @param object Plain object
+     * @returns GraphProto
+     */
+    public static fromObject(object: {[k: string]: any}): onnx.GraphProto;
+
+    /**
+     * Creates a plain object from a GraphProto message. Also converts values to other types if specified.
+     * @param message GraphProto
+     * @param [options] Conversion options
+     * @returns Plain object
+     */
+    public static toObject(message: onnx.GraphProto, options?: $protobuf.IConversionOptions): {[k: string]: any};
+
+    /**
+     * Converts this GraphProto to JSON.
+     * @returns JSON object
+     */
+    public toJSON(): {[k: string]: any};
+
+    /**
+     * Gets the default type url for GraphProto
+     * @param [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+     * @returns The default type url
+     */
+    public static getTypeUrl(typeUrlPrefix?: string): string;
+  }
+
+  /** Properties of a TensorProto. */
+  interface ITensorProto {
+    /** TensorProto dims */
+    dims?: ((number | Long)[]|null);
+
+    /** TensorProto dataType */
+    dataType?: (number|null);
+
+    /** TensorProto segment */
+    segment?: (onnx.TensorProto.ISegment|null);
+
+    /** TensorProto floatData */
+    floatData?: (number[]|null);
+
+    /** TensorProto int32Data */
+    int32Data?: (number[]|null);
+
+    /** TensorProto stringData */
+    stringData?: (Uint8Array[]|null);
+
+    /** TensorProto int64Data */
+    int64Data?: ((number | Long)[]|null);
+
+    /** TensorProto name */
+    name?: (string|null);
+
+    /** TensorProto docString */
+    docString?: (string|null);
+
+    /** TensorProto rawData */
+    rawData?: (Uint8Array|null);
+
+    /** TensorProto externalData */
+    externalData?: (onnx.IStringStringEntryProto[]|null);
+
+    /** TensorProto dataLocation */
+    dataLocation?: (onnx.TensorProto.DataLocation|null);
+
+    /** TensorProto doubleData */
+    doubleData?: (number[]|null);
+
+    /** TensorProto uint64Data */
+    uint64Data?: ((number | Long)[]|null);
+  }
+
+  /** Represents a TensorProto. */
+  class TensorProto implements ITensorProto {
+    /**
+     * Constructs a new TensorProto.
+     * @param [properties] Properties to set
+     */
+    constructor(properties?: onnx.ITensorProto);
+
+    /** TensorProto dims. */
+    public dims: (number|Long)[];
+
+    /** TensorProto dataType. */
+    public dataType: number;
+
+    /** TensorProto segment. */
+    public segment?: (onnx.TensorProto.ISegment|null);
+
+    /** TensorProto floatData. */
+    public floatData: number[];
+
+    /** TensorProto int32Data. */
+    public int32Data: number[];
+
+    /** TensorProto stringData. */
+    public stringData: Uint8Array[];
+
+    /** TensorProto int64Data. */
+    public int64Data: (number|Long)[];
+
+    /** TensorProto name. */
+    public name: string;
+
+    /** TensorProto docString. */
+    public docString: string;
+
+    /** TensorProto rawData. */
+    public rawData: Uint8Array;
+
+    /** TensorProto externalData. */
+    public externalData: onnx.IStringStringEntryProto[];
+
+    /** TensorProto dataLocation. */
+    public dataLocation: onnx.TensorProto.DataLocation;
+
+    /** TensorProto doubleData. */
+    public doubleData: number[];
+
+    /** TensorProto uint64Data. */
+    public uint64Data: (number|Long)[];
+
+    /**
+     * Creates a new TensorProto instance using the specified properties.
+     * @param [properties] Properties to set
+     * @returns TensorProto instance
+     */
+    public static create(properties?: onnx.ITensorProto): onnx.TensorProto;
+
+    /**
+     * Encodes the specified TensorProto message. Does not implicitly {@link onnx.TensorProto.verify|verify} messages.
+     * @param message TensorProto message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encode(message: onnx.ITensorProto, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Encodes the specified TensorProto message, length delimited. Does not implicitly {@link
+     * onnx.TensorProto.verify|verify} messages.
+     * @param message TensorProto message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encodeDelimited(message: onnx.ITensorProto, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Decodes a TensorProto message from the specified reader or buffer.
+     * @param reader Reader or buffer to decode from
+     * @param [length] Message length if known beforehand
+     * @returns TensorProto
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decode(reader: ($protobuf.Reader|Uint8Array), length?: number): onnx.TensorProto;
+
+    /**
+     * Decodes a TensorProto message from the specified reader or buffer, length delimited.
+     * @param reader Reader or buffer to decode from
+     * @returns TensorProto
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decodeDelimited(reader: ($protobuf.Reader|Uint8Array)): onnx.TensorProto;
+
+    /**
+     * Verifies a TensorProto message.
+     * @param message Plain object to verify
+     * @returns `null` if valid, otherwise the reason why it is not
+     */
+    public static verify(message: {[k: string]: any}): (string|null);
+
+    /**
+     * Creates a TensorProto message from a plain object. Also converts values to their respective internal types.
+     * @param object Plain object
+     * @returns TensorProto
+     */
+    public static fromObject(object: {[k: string]: any}): onnx.TensorProto;
+
+    /**
+     * Creates a plain object from a TensorProto message. Also converts values to other types if specified.
+     * @param message TensorProto
+     * @param [options] Conversion options
+     * @returns Plain object
+     */
+    public static toObject(message: onnx.TensorProto, options?: $protobuf.IConversionOptions): {[k: string]: any};
+
+    /**
+     * Converts this TensorProto to JSON.
+     * @returns JSON object
+     */
+    public toJSON(): {[k: string]: any};
+
+    /**
+     * Gets the default type url for TensorProto
+     * @param [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+     * @returns The default type url
+     */
+    public static getTypeUrl(typeUrlPrefix?: string): string;
+  }
+
+  namespace TensorProto {
+
+    /** DataType enum. */
+    enum DataType {
+      UNDEFINED = 0,
+      FLOAT = 1,
+      UINT8 = 2,
+      INT8 = 3,
+      UINT16 = 4,
+      INT16 = 5,
+      INT32 = 6,
+      INT64 = 7,
+      STRING = 8,
+      BOOL = 9,
+      FLOAT16 = 10,
+      DOUBLE = 11,
+      UINT32 = 12,
+      UINT64 = 13,
+      COMPLEX64 = 14,
+      COMPLEX128 = 15,
+      BFLOAT16 = 16,
+      FLOAT8E4M3FN = 17,
+      FLOAT8E4M3FNUZ = 18,
+      FLOAT8E5M2 = 19,
+      FLOAT8E5M2FNUZ = 20
+    }
+
+    /** Properties of a Segment. */
+    interface ISegment {
+      /** Segment begin */
+      begin?: (number|Long|null);
+
+      /** Segment end */
+      end?: (number|Long|null);
+    }
+
+    /** Represents a Segment. */
+    class Segment implements ISegment {
+      /**
+       * Constructs a new Segment.
+       * @param [properties] Properties to set
+       */
+      constructor(properties?: onnx.TensorProto.ISegment);
+
+      /** Segment begin. */
+      public begin: (number|Long);
+
+      /** Segment end. */
+      public end: (number|Long);
+
+      /**
+       * Creates a new Segment instance using the specified properties.
+       * @param [properties] Properties to set
+       * @returns Segment instance
+       */
+      public static create(properties?: onnx.TensorProto.ISegment): onnx.TensorProto.Segment;
+
+      /**
+       * Encodes the specified Segment message. Does not implicitly {@link onnx.TensorProto.Segment.verify|verify}
+       * messages.
+       * @param message Segment message or plain object to encode
+       * @param [writer] Writer to encode to
+       * @returns Writer
+       */
+      public static encode(message: onnx.TensorProto.ISegment, writer?: $protobuf.Writer): $protobuf.Writer;
+
+      /**
+       * Encodes the specified Segment message, length delimited. Does not implicitly {@link
+       * onnx.TensorProto.Segment.verify|verify} messages.
+       * @param message Segment message or plain object to encode
+       * @param [writer] Writer to encode to
+       * @returns Writer
+       */
+      public static encodeDelimited(message: onnx.TensorProto.ISegment, writer?: $protobuf.Writer): $protobuf.Writer;
+
+      /**
+       * Decodes a Segment message from the specified reader or buffer.
+       * @param reader Reader or buffer to decode from
+       * @param [length] Message length if known beforehand
+       * @returns Segment
+       * @throws {Error} If the payload is not a reader or valid buffer
+       * @throws {$protobuf.util.ProtocolError} If required fields are missing
+       */
+      public static decode(reader: ($protobuf.Reader|Uint8Array), length?: number): onnx.TensorProto.Segment;
+
+      /**
+       * Decodes a Segment message from the specified reader or buffer, length delimited.
+       * @param reader Reader or buffer to decode from
+       * @returns Segment
+       * @throws {Error} If the payload is not a reader or valid buffer
+       * @throws {$protobuf.util.ProtocolError} If required fields are missing
+       */
+      public static decodeDelimited(reader: ($protobuf.Reader|Uint8Array)): onnx.TensorProto.Segment;
+
+      /**
+       * Verifies a Segment message.
+       * @param message Plain object to verify
+       * @returns `null` if valid, otherwise the reason why it is not
+       */
+      public static verify(message: {[k: string]: any}): (string|null);
+
+      /**
+       * Creates a Segment message from a plain object. Also converts values to their respective internal types.
+       * @param object Plain object
+       * @returns Segment
+       */
+      public static fromObject(object: {[k: string]: any}): onnx.TensorProto.Segment;
+
+      /**
+       * Creates a plain object from a Segment message. Also converts values to other types if specified.
+       * @param message Segment
+       * @param [options] Conversion options
+       * @returns Plain object
+       */
+      public static toObject(message: onnx.TensorProto.Segment, options?: $protobuf.IConversionOptions):
+          {[k: string]: any};
+
+      /**
+       * Converts this Segment to JSON.
+       * @returns JSON object
+       */
+      public toJSON(): {[k: string]: any};
+
+      /**
+       * Gets the default type url for Segment
+       * @param [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+       * @returns The default type url
+       */
+      public static getTypeUrl(typeUrlPrefix?: string): string;
+    }
+
+    /** DataLocation enum. */
+    enum DataLocation { DEFAULT = 0, EXTERNAL = 1 }
+  }
+
+  /** Properties of a SparseTensorProto. */
+  interface ISparseTensorProto {
+    /** SparseTensorProto values */
+    values?: (onnx.ITensorProto|null);
+
+    /** SparseTensorProto indices */
+    indices?: (onnx.ITensorProto|null);
+
+    /** SparseTensorProto dims */
+    dims?: ((number | Long)[]|null);
+  }
+
+  /** Represents a SparseTensorProto. */
+  class SparseTensorProto implements ISparseTensorProto {
+    /**
+     * Constructs a new SparseTensorProto.
+     * @param [properties] Properties to set
+     */
+    constructor(properties?: onnx.ISparseTensorProto);
+
+    /** SparseTensorProto values. */
+    public values?: (onnx.ITensorProto|null);
+
+    /** SparseTensorProto indices. */
+    public indices?: (onnx.ITensorProto|null);
+
+    /** SparseTensorProto dims. */
+    public dims: (number|Long)[];
+
+    /**
+     * Creates a new SparseTensorProto instance using the specified properties.
+     * @param [properties] Properties to set
+     * @returns SparseTensorProto instance
+     */
+    public static create(properties?: onnx.ISparseTensorProto): onnx.SparseTensorProto;
+
+    /**
+     * Encodes the specified SparseTensorProto message. Does not implicitly {@link onnx.SparseTensorProto.verify|verify}
+     * messages.
+     * @param message SparseTensorProto message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encode(message: onnx.ISparseTensorProto, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Encodes the specified SparseTensorProto message, length delimited. Does not implicitly {@link
+     * onnx.SparseTensorProto.verify|verify} messages.
+     * @param message SparseTensorProto message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encodeDelimited(message: onnx.ISparseTensorProto, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Decodes a SparseTensorProto message from the specified reader or buffer.
+     * @param reader Reader or buffer to decode from
+     * @param [length] Message length if known beforehand
+     * @returns SparseTensorProto
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decode(reader: ($protobuf.Reader|Uint8Array), length?: number): onnx.SparseTensorProto;
+
+    /**
+     * Decodes a SparseTensorProto message from the specified reader or buffer, length delimited.
+     * @param reader Reader or buffer to decode from
+     * @returns SparseTensorProto
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decodeDelimited(reader: ($protobuf.Reader|Uint8Array)): onnx.SparseTensorProto;
+
+    /**
+     * Verifies a SparseTensorProto message.
+     * @param message Plain object to verify
+     * @returns `null` if valid, otherwise the reason why it is not
+     */
+    public static verify(message: {[k: string]: any}): (string|null);
+
+    /**
+     * Creates a SparseTensorProto message from a plain object. Also converts values to their respective internal types.
+     * @param object Plain object
+     * @returns SparseTensorProto
+     */
+    public static fromObject(object: {[k: string]: any}): onnx.SparseTensorProto;
+
+    /**
+     * Creates a plain object from a SparseTensorProto message. Also converts values to other types if specified.
+     * @param message SparseTensorProto
+     * @param [options] Conversion options
+     * @returns Plain object
+     */
+    public static toObject(message: onnx.SparseTensorProto, options?: $protobuf.IConversionOptions): {[k: string]: any};
+
+    /**
+     * Converts this SparseTensorProto to JSON.
+     * @returns JSON object
+     */
+    public toJSON(): {[k: string]: any};
+
+    /**
+     * Gets the default type url for SparseTensorProto
+     * @param [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+     * @returns The default type url
+     */
+    public static getTypeUrl(typeUrlPrefix?: string): string;
+  }
+
+  /** Properties of a TensorShapeProto. */
+  interface ITensorShapeProto {
+    /** TensorShapeProto dim */
+    dim?: (onnx.TensorShapeProto.IDimension[]|null);
+  }
+
+  /** Represents a TensorShapeProto. */
+  class TensorShapeProto implements ITensorShapeProto {
+    /**
+     * Constructs a new TensorShapeProto.
+     * @param [properties] Properties to set
+     */
+    constructor(properties?: onnx.ITensorShapeProto);
+
+    /** TensorShapeProto dim. */
+    public dim: onnx.TensorShapeProto.IDimension[];
+
+    /**
+     * Creates a new TensorShapeProto instance using the specified properties.
+     * @param [properties] Properties to set
+     * @returns TensorShapeProto instance
+     */
+    public static create(properties?: onnx.ITensorShapeProto): onnx.TensorShapeProto;
+
+    /**
+     * Encodes the specified TensorShapeProto message. Does not implicitly {@link onnx.TensorShapeProto.verify|verify}
+     * messages.
+     * @param message TensorShapeProto message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encode(message: onnx.ITensorShapeProto, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Encodes the specified TensorShapeProto message, length delimited. Does not implicitly {@link
+     * onnx.TensorShapeProto.verify|verify} messages.
+     * @param message TensorShapeProto message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encodeDelimited(message: onnx.ITensorShapeProto, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Decodes a TensorShapeProto message from the specified reader or buffer.
+     * @param reader Reader or buffer to decode from
+     * @param [length] Message length if known beforehand
+     * @returns TensorShapeProto
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decode(reader: ($protobuf.Reader|Uint8Array), length?: number): onnx.TensorShapeProto;
+
+    /**
+     * Decodes a TensorShapeProto message from the specified reader or buffer, length delimited.
+     * @param reader Reader or buffer to decode from
+     * @returns TensorShapeProto
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decodeDelimited(reader: ($protobuf.Reader|Uint8Array)): onnx.TensorShapeProto;
+
+    /**
+     * Verifies a TensorShapeProto message.
+     * @param message Plain object to verify
+     * @returns `null` if valid, otherwise the reason why it is not
+     */
+    public static verify(message: {[k: string]: any}): (string|null);
+
+    /**
+     * Creates a TensorShapeProto message from a plain object. Also converts values to their respective internal types.
+     * @param object Plain object
+     * @returns TensorShapeProto
+     */
+    public static fromObject(object: {[k: string]: any}): onnx.TensorShapeProto;
+
+    /**
+     * Creates a plain object from a TensorShapeProto message. Also converts values to other types if specified.
+     * @param message TensorShapeProto
+     * @param [options] Conversion options
+     * @returns Plain object
+     */
+    public static toObject(message: onnx.TensorShapeProto, options?: $protobuf.IConversionOptions): {[k: string]: any};
+
+    /**
+     * Converts this TensorShapeProto to JSON.
+     * @returns JSON object
+     */
+    public toJSON(): {[k: string]: any};
+
+    /**
+     * Gets the default type url for TensorShapeProto
+     * @param [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+     * @returns The default type url
+     */
+    public static getTypeUrl(typeUrlPrefix?: string): string;
+  }
+
+  namespace TensorShapeProto {
+
+    /** Properties of a Dimension. */
+    interface IDimension {
+      /** Dimension dimValue */
+      dimValue?: (number|Long|null);
+
+      /** Dimension dimParam */
+      dimParam?: (string|null);
+
+      /** Dimension denotation */
+      denotation?: (string|null);
+    }
+
+    /** Represents a Dimension. */
+    class Dimension implements IDimension {
+      /**
+       * Constructs a new Dimension.
+       * @param [properties] Properties to set
+       */
+      constructor(properties?: onnx.TensorShapeProto.IDimension);
+
+      /** Dimension dimValue. */
+      public dimValue?: (number|Long|null);
+
+      /** Dimension dimParam. */
+      public dimParam?: (string|null);
+
+      /** Dimension denotation. */
+      public denotation: string;
+
+      /** Dimension value. */
+      public value?: ('dimValue'|'dimParam');
+
+      /**
+       * Creates a new Dimension instance using the specified properties.
+       * @param [properties] Properties to set
+       * @returns Dimension instance
+       */
+      public static create(properties?: onnx.TensorShapeProto.IDimension): onnx.TensorShapeProto.Dimension;
+
+      /**
+       * Encodes the specified Dimension message. Does not implicitly {@link
+       * onnx.TensorShapeProto.Dimension.verify|verify} messages.
+       * @param message Dimension message or plain object to encode
+       * @param [writer] Writer to encode to
+       * @returns Writer
+       */
+      public static encode(message: onnx.TensorShapeProto.IDimension, writer?: $protobuf.Writer): $protobuf.Writer;
+
+      /**
+       * Encodes the specified Dimension message, length delimited. Does not implicitly {@link
+       * onnx.TensorShapeProto.Dimension.verify|verify} messages.
+       * @param message Dimension message or plain object to encode
+       * @param [writer] Writer to encode to
+       * @returns Writer
+       */
+      public static encodeDelimited(message: onnx.TensorShapeProto.IDimension, writer?: $protobuf.Writer):
+          $protobuf.Writer;
+
+      /**
+       * Decodes a Dimension message from the specified reader or buffer.
+       * @param reader Reader or buffer to decode from
+       * @param [length] Message length if known beforehand
+       * @returns Dimension
+       * @throws {Error} If the payload is not a reader or valid buffer
+       * @throws {$protobuf.util.ProtocolError} If required fields are missing
+       */
+      public static decode(reader: ($protobuf.Reader|Uint8Array), length?: number): onnx.TensorShapeProto.Dimension;
+
+      /**
+       * Decodes a Dimension message from the specified reader or buffer, length delimited.
+       * @param reader Reader or buffer to decode from
+       * @returns Dimension
+       * @throws {Error} If the payload is not a reader or valid buffer
+       * @throws {$protobuf.util.ProtocolError} If required fields are missing
+       */
+      public static decodeDelimited(reader: ($protobuf.Reader|Uint8Array)): onnx.TensorShapeProto.Dimension;
+
+      /**
+       * Verifies a Dimension message.
+       * @param message Plain object to verify
+       * @returns `null` if valid, otherwise the reason why it is not
+       */
+      public static verify(message: {[k: string]: any}): (string|null);
+
+      /**
+       * Creates a Dimension message from a plain object. Also converts values to their respective internal types.
+       * @param object Plain object
+       * @returns Dimension
+       */
+      public static fromObject(object: {[k: string]: any}): onnx.TensorShapeProto.Dimension;
+
+      /**
+       * Creates a plain object from a Dimension message. Also converts values to other types if specified.
+       * @param message Dimension
+       * @param [options] Conversion options
+       * @returns Plain object
+       */
+      public static toObject(message: onnx.TensorShapeProto.Dimension, options?: $protobuf.IConversionOptions):
+          {[k: string]: any};
+
+      /**
+       * Converts this Dimension to JSON.
+       * @returns JSON object
+       */
+      public toJSON(): {[k: string]: any};
+
+      /**
+       * Gets the default type url for Dimension
+       * @param [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+       * @returns The default type url
+       */
+      public static getTypeUrl(typeUrlPrefix?: string): string;
+    }
+  }
+
+  /** Properties of a TypeProto. */
+  interface ITypeProto {
+    /** TypeProto tensorType */
+    tensorType?: (onnx.TypeProto.ITensor|null);
+
+    /** TypeProto sequenceType */
+    sequenceType?: (onnx.TypeProto.ISequence|null);
+
+    /** TypeProto mapType */
+    mapType?: (onnx.TypeProto.IMap|null);
+
+    /** TypeProto optionalType */
+    optionalType?: (onnx.TypeProto.IOptional|null);
+
+    /** TypeProto sparseTensorType */
+    sparseTensorType?: (onnx.TypeProto.ISparseTensor|null);
+
+    /** TypeProto denotation */
+    denotation?: (string|null);
+  }
+
+  /** Represents a TypeProto. */
+  class TypeProto implements ITypeProto {
+    /**
+     * Constructs a new TypeProto.
+     * @param [properties] Properties to set
+     */
+    constructor(properties?: onnx.ITypeProto);
+
+    /** TypeProto tensorType. */
+    public tensorType?: (onnx.TypeProto.ITensor|null);
+
+    /** TypeProto sequenceType. */
+    public sequenceType?: (onnx.TypeProto.ISequence|null);
+
+    /** TypeProto mapType. */
+    public mapType?: (onnx.TypeProto.IMap|null);
+
+    /** TypeProto optionalType. */
+    public optionalType?: (onnx.TypeProto.IOptional|null);
+
+    /** TypeProto sparseTensorType. */
+    public sparseTensorType?: (onnx.TypeProto.ISparseTensor|null);
+
+    /** TypeProto denotation. */
+    public denotation: string;
+
+    /** TypeProto value. */
+    public value?: ('tensorType'|'sequenceType'|'mapType'|'optionalType'|'sparseTensorType');
+
+    /**
+     * Creates a new TypeProto instance using the specified properties.
+     * @param [properties] Properties to set
+     * @returns TypeProto instance
+     */
+    public static create(properties?: onnx.ITypeProto): onnx.TypeProto;
+
+    /**
+     * Encodes the specified TypeProto message. Does not implicitly {@link onnx.TypeProto.verify|verify} messages.
+     * @param message TypeProto message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encode(message: onnx.ITypeProto, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Encodes the specified TypeProto message, length delimited. Does not implicitly {@link
+     * onnx.TypeProto.verify|verify} messages.
+     * @param message TypeProto message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encodeDelimited(message: onnx.ITypeProto, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Decodes a TypeProto message from the specified reader or buffer.
+     * @param reader Reader or buffer to decode from
+     * @param [length] Message length if known beforehand
+     * @returns TypeProto
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decode(reader: ($protobuf.Reader|Uint8Array), length?: number): onnx.TypeProto;
+
+    /**
+     * Decodes a TypeProto message from the specified reader or buffer, length delimited.
+     * @param reader Reader or buffer to decode from
+     * @returns TypeProto
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decodeDelimited(reader: ($protobuf.Reader|Uint8Array)): onnx.TypeProto;
+
+    /**
+     * Verifies a TypeProto message.
+     * @param message Plain object to verify
+     * @returns `null` if valid, otherwise the reason why it is not
+     */
+    public static verify(message: {[k: string]: any}): (string|null);
+
+    /**
+     * Creates a TypeProto message from a plain object. Also converts values to their respective internal types.
+     * @param object Plain object
+     * @returns TypeProto
+     */
+    public static fromObject(object: {[k: string]: any}): onnx.TypeProto;
+
+    /**
+     * Creates a plain object from a TypeProto message. Also converts values to other types if specified.
+     * @param message TypeProto
+     * @param [options] Conversion options
+     * @returns Plain object
+     */
+    public static toObject(message: onnx.TypeProto, options?: $protobuf.IConversionOptions): {[k: string]: any};
+
+    /**
+     * Converts this TypeProto to JSON.
+     * @returns JSON object
+     */
+    public toJSON(): {[k: string]: any};
+
+    /**
+     * Gets the default type url for TypeProto
+     * @param [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+     * @returns The default type url
+     */
+    public static getTypeUrl(typeUrlPrefix?: string): string;
+  }
+
+  namespace TypeProto {
+
+    /** Properties of a Tensor. */
+    interface ITensor {
+      /** Tensor elemType */
+      elemType?: (number|null);
+
+      /** Tensor shape */
+      shape?: (onnx.ITensorShapeProto|null);
+    }
+
+    /** Represents a Tensor. */
+    class Tensor implements ITensor {
+      /**
+       * Constructs a new Tensor.
+       * @param [properties] Properties to set
+       */
+      constructor(properties?: onnx.TypeProto.ITensor);
+
+      /** Tensor elemType. */
+      public elemType: number;
+
+      /** Tensor shape. */
+      public shape?: (onnx.ITensorShapeProto|null);
+
+      /**
+       * Creates a new Tensor instance using the specified properties.
+       * @param [properties] Properties to set
+       * @returns Tensor instance
+       */
+      public static create(properties?: onnx.TypeProto.ITensor): onnx.TypeProto.Tensor;
+
+      /**
+       * Encodes the specified Tensor message. Does not implicitly {@link onnx.TypeProto.Tensor.verify|verify} messages.
+       * @param message Tensor message or plain object to encode
+       * @param [writer] Writer to encode to
+       * @returns Writer
+       */
+      public static encode(message: onnx.TypeProto.ITensor, writer?: $protobuf.Writer): $protobuf.Writer;
+
+      /**
+       * Encodes the specified Tensor message, length delimited. Does not implicitly {@link
+       * onnx.TypeProto.Tensor.verify|verify} messages.
+       * @param message Tensor message or plain object to encode
+       * @param [writer] Writer to encode to
+       * @returns Writer
+       */
+      public static encodeDelimited(message: onnx.TypeProto.ITensor, writer?: $protobuf.Writer): $protobuf.Writer;
+
+      /**
+       * Decodes a Tensor message from the specified reader or buffer.
+       * @param reader Reader or buffer to decode from
+       * @param [length] Message length if known beforehand
+       * @returns Tensor
+       * @throws {Error} If the payload is not a reader or valid buffer
+       * @throws {$protobuf.util.ProtocolError} If required fields are missing
+       */
+      public static decode(reader: ($protobuf.Reader|Uint8Array), length?: number): onnx.TypeProto.Tensor;
+
+      /**
+       * Decodes a Tensor message from the specified reader or buffer, length delimited.
+       * @param reader Reader or buffer to decode from
+       * @returns Tensor
+       * @throws {Error} If the payload is not a reader or valid buffer
+       * @throws {$protobuf.util.ProtocolError} If required fields are missing
+       */
+      public static decodeDelimited(reader: ($protobuf.Reader|Uint8Array)): onnx.TypeProto.Tensor;
+
+      /**
+       * Verifies a Tensor message.
+       * @param message Plain object to verify
+       * @returns `null` if valid, otherwise the reason why it is not
+       */
+      public static verify(message: {[k: string]: any}): (string|null);
+
+      /**
+       * Creates a Tensor message from a plain object. Also converts values to their respective internal types.
+       * @param object Plain object
+       * @returns Tensor
+       */
+      public static fromObject(object: {[k: string]: any}): onnx.TypeProto.Tensor;
+
+      /**
+       * Creates a plain object from a Tensor message. Also converts values to other types if specified.
+       * @param message Tensor
+       * @param [options] Conversion options
+       * @returns Plain object
+       */
+      public static toObject(message: onnx.TypeProto.Tensor, options?: $protobuf.IConversionOptions):
+          {[k: string]: any};
+
+      /**
+       * Converts this Tensor to JSON.
+       * @returns JSON object
+       */
+      public toJSON(): {[k: string]: any};
+
+      /**
+       * Gets the default type url for Tensor
+       * @param [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+       * @returns The default type url
+       */
+      public static getTypeUrl(typeUrlPrefix?: string): string;
+    }
+
+    /** Properties of a Sequence. */
+    interface ISequence {
+      /** Sequence elemType */
+      elemType?: (onnx.ITypeProto|null);
+    }
+
+    /** Represents a Sequence. */
+    class Sequence implements ISequence {
+      /**
+       * Constructs a new Sequence.
+       * @param [properties] Properties to set
+       */
+      constructor(properties?: onnx.TypeProto.ISequence);
+
+      /** Sequence elemType. */
+      public elemType?: (onnx.ITypeProto|null);
+
+      /**
+       * Creates a new Sequence instance using the specified properties.
+       * @param [properties] Properties to set
+       * @returns Sequence instance
+       */
+      public static create(properties?: onnx.TypeProto.ISequence): onnx.TypeProto.Sequence;
+
+      /**
+       * Encodes the specified Sequence message. Does not implicitly {@link onnx.TypeProto.Sequence.verify|verify}
+       * messages.
+       * @param message Sequence message or plain object to encode
+       * @param [writer] Writer to encode to
+       * @returns Writer
+       */
+      public static encode(message: onnx.TypeProto.ISequence, writer?: $protobuf.Writer): $protobuf.Writer;
+
+      /**
+       * Encodes the specified Sequence message, length delimited. Does not implicitly {@link
+       * onnx.TypeProto.Sequence.verify|verify} messages.
+       * @param message Sequence message or plain object to encode
+       * @param [writer] Writer to encode to
+       * @returns Writer
+       */
+      public static encodeDelimited(message: onnx.TypeProto.ISequence, writer?: $protobuf.Writer): $protobuf.Writer;
+
+      /**
+       * Decodes a Sequence message from the specified reader or buffer.
+       * @param reader Reader or buffer to decode from
+       * @param [length] Message length if known beforehand
+       * @returns Sequence
+       * @throws {Error} If the payload is not a reader or valid buffer
+       * @throws {$protobuf.util.ProtocolError} If required fields are missing
+       */
+      public static decode(reader: ($protobuf.Reader|Uint8Array), length?: number): onnx.TypeProto.Sequence;
+
+      /**
+       * Decodes a Sequence message from the specified reader or buffer, length delimited.
+       * @param reader Reader or buffer to decode from
+       * @returns Sequence
+       * @throws {Error} If the payload is not a reader or valid buffer
+       * @throws {$protobuf.util.ProtocolError} If required fields are missing
+       */
+      public static decodeDelimited(reader: ($protobuf.Reader|Uint8Array)): onnx.TypeProto.Sequence;
+
+      /**
+       * Verifies a Sequence message.
+       * @param message Plain object to verify
+       * @returns `null` if valid, otherwise the reason why it is not
+       */
+      public static verify(message: {[k: string]: any}): (string|null);
+
+      /**
+       * Creates a Sequence message from a plain object. Also converts values to their respective internal types.
+       * @param object Plain object
+       * @returns Sequence
+       */
+      public static fromObject(object: {[k: string]: any}): onnx.TypeProto.Sequence;
+
+      /**
+       * Creates a plain object from a Sequence message. Also converts values to other types if specified.
+       * @param message Sequence
+       * @param [options] Conversion options
+       * @returns Plain object
+       */
+      public static toObject(message: onnx.TypeProto.Sequence, options?: $protobuf.IConversionOptions):
+          {[k: string]: any};
+
+      /**
+       * Converts this Sequence to JSON.
+       * @returns JSON object
+       */
+      public toJSON(): {[k: string]: any};
+
+      /**
+       * Gets the default type url for Sequence
+       * @param [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+       * @returns The default type url
+       */
+      public static getTypeUrl(typeUrlPrefix?: string): string;
+    }
+
+    /** Properties of a Map. */
+    interface IMap {
+      /** Map keyType */
+      keyType?: (number|null);
+
+      /** Map valueType */
+      valueType?: (onnx.ITypeProto|null);
+    }
+
+    /** Represents a Map. */
+    class Map implements IMap {
+      /**
+       * Constructs a new Map.
+       * @param [properties] Properties to set
+       */
+      constructor(properties?: onnx.TypeProto.IMap);
+
+      /** Map keyType. */
+      public keyType: number;
+
+      /** Map valueType. */
+      public valueType?: (onnx.ITypeProto|null);
+
+      /**
+       * Creates a new Map instance using the specified properties.
+       * @param [properties] Properties to set
+       * @returns Map instance
+       */
+      public static create(properties?: onnx.TypeProto.IMap): onnx.TypeProto.Map;
+
+      /**
+       * Encodes the specified Map message. Does not implicitly {@link onnx.TypeProto.Map.verify|verify} messages.
+       * @param message Map message or plain object to encode
+       * @param [writer] Writer to encode to
+       * @returns Writer
+       */
+      public static encode(message: onnx.TypeProto.IMap, writer?: $protobuf.Writer): $protobuf.Writer;
+
+      /**
+       * Encodes the specified Map message, length delimited. Does not implicitly {@link
+       * onnx.TypeProto.Map.verify|verify} messages.
+       * @param message Map message or plain object to encode
+       * @param [writer] Writer to encode to
+       * @returns Writer
+       */
+      public static encodeDelimited(message: onnx.TypeProto.IMap, writer?: $protobuf.Writer): $protobuf.Writer;
+
+      /**
+       * Decodes a Map message from the specified reader or buffer.
+       * @param reader Reader or buffer to decode from
+       * @param [length] Message length if known beforehand
+       * @returns Map
+       * @throws {Error} If the payload is not a reader or valid buffer
+       * @throws {$protobuf.util.ProtocolError} If required fields are missing
+       */
+      public static decode(reader: ($protobuf.Reader|Uint8Array), length?: number): onnx.TypeProto.Map;
+
+      /**
+       * Decodes a Map message from the specified reader or buffer, length delimited.
+       * @param reader Reader or buffer to decode from
+       * @returns Map
+       * @throws {Error} If the payload is not a reader or valid buffer
+       * @throws {$protobuf.util.ProtocolError} If required fields are missing
+       */
+      public static decodeDelimited(reader: ($protobuf.Reader|Uint8Array)): onnx.TypeProto.Map;
+
+      /**
+       * Verifies a Map message.
+       * @param message Plain object to verify
+       * @returns `null` if valid, otherwise the reason why it is not
+       */
+      public static verify(message: {[k: string]: any}): (string|null);
+
+      /**
+       * Creates a Map message from a plain object. Also converts values to their respective internal types.
+       * @param object Plain object
+       * @returns Map
+       */
+      public static fromObject(object: {[k: string]: any}): onnx.TypeProto.Map;
+
+      /**
+       * Creates a plain object from a Map message. Also converts values to other types if specified.
+       * @param message Map
+       * @param [options] Conversion options
+       * @returns Plain object
+       */
+      public static toObject(message: onnx.TypeProto.Map, options?: $protobuf.IConversionOptions): {[k: string]: any};
+
+      /**
+       * Converts this Map to JSON.
+       * @returns JSON object
+       */
+      public toJSON(): {[k: string]: any};
+
+      /**
+       * Gets the default type url for Map
+       * @param [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+       * @returns The default type url
+       */
+      public static getTypeUrl(typeUrlPrefix?: string): string;
+    }
+
+    /** Properties of an Optional. */
+    interface IOptional {
+      /** Optional elemType */
+      elemType?: (onnx.ITypeProto|null);
+    }
+
+    /** Represents an Optional. */
+    class Optional implements IOptional {
+      /**
+       * Constructs a new Optional.
+       * @param [properties] Properties to set
+       */
+      constructor(properties?: onnx.TypeProto.IOptional);
+
+      /** Optional elemType. */
+      public elemType?: (onnx.ITypeProto|null);
+
+      /**
+       * Creates a new Optional instance using the specified properties.
+       * @param [properties] Properties to set
+       * @returns Optional instance
+       */
+      public static create(properties?: onnx.TypeProto.IOptional): onnx.TypeProto.Optional;
+
+      /**
+       * Encodes the specified Optional message. Does not implicitly {@link onnx.TypeProto.Optional.verify|verify}
+       * messages.
+       * @param message Optional message or plain object to encode
+       * @param [writer] Writer to encode to
+       * @returns Writer
+       */
+      public static encode(message: onnx.TypeProto.IOptional, writer?: $protobuf.Writer): $protobuf.Writer;
+
+      /**
+       * Encodes the specified Optional message, length delimited. Does not implicitly {@link
+       * onnx.TypeProto.Optional.verify|verify} messages.
+       * @param message Optional message or plain object to encode
+       * @param [writer] Writer to encode to
+       * @returns Writer
+       */
+      public static encodeDelimited(message: onnx.TypeProto.IOptional, writer?: $protobuf.Writer): $protobuf.Writer;
+
+      /**
+       * Decodes an Optional message from the specified reader or buffer.
+       * @param reader Reader or buffer to decode from
+       * @param [length] Message length if known beforehand
+       * @returns Optional
+       * @throws {Error} If the payload is not a reader or valid buffer
+       * @throws {$protobuf.util.ProtocolError} If required fields are missing
+       */
+      public static decode(reader: ($protobuf.Reader|Uint8Array), length?: number): onnx.TypeProto.Optional;
+
+      /**
+       * Decodes an Optional message from the specified reader or buffer, length delimited.
+       * @param reader Reader or buffer to decode from
+       * @returns Optional
+       * @throws {Error} If the payload is not a reader or valid buffer
+       * @throws {$protobuf.util.ProtocolError} If required fields are missing
+       */
+      public static decodeDelimited(reader: ($protobuf.Reader|Uint8Array)): onnx.TypeProto.Optional;
+
+      /**
+       * Verifies an Optional message.
+       * @param message Plain object to verify
+       * @returns `null` if valid, otherwise the reason why it is not
+       */
+      public static verify(message: {[k: string]: any}): (string|null);
+
+      /**
+       * Creates an Optional message from a plain object. Also converts values to their respective internal types.
+       * @param object Plain object
+       * @returns Optional
+       */
+      public static fromObject(object: {[k: string]: any}): onnx.TypeProto.Optional;
+
+      /**
+       * Creates a plain object from an Optional message. Also converts values to other types if specified.
+       * @param message Optional
+       * @param [options] Conversion options
+       * @returns Plain object
+       */
+      public static toObject(message: onnx.TypeProto.Optional, options?: $protobuf.IConversionOptions):
+          {[k: string]: any};
+
+      /**
+       * Converts this Optional to JSON.
+       * @returns JSON object
+       */
+      public toJSON(): {[k: string]: any};
+
+      /**
+       * Gets the default type url for Optional
+       * @param [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+       * @returns The default type url
+       */
+      public static getTypeUrl(typeUrlPrefix?: string): string;
+    }
+
+    /** Properties of a SparseTensor. */
+    interface ISparseTensor {
+      /** SparseTensor elemType */
+      elemType?: (number|null);
+
+      /** SparseTensor shape */
+      shape?: (onnx.ITensorShapeProto|null);
+    }
+
+    /** Represents a SparseTensor. */
+    class SparseTensor implements ISparseTensor {
+      /**
+       * Constructs a new SparseTensor.
+       * @param [properties] Properties to set
+       */
+      constructor(properties?: onnx.TypeProto.ISparseTensor);
+
+      /** SparseTensor elemType. */
+      public elemType: number;
+
+      /** SparseTensor shape. */
+      public shape?: (onnx.ITensorShapeProto|null);
+
+      /**
+       * Creates a new SparseTensor instance using the specified properties.
+       * @param [properties] Properties to set
+       * @returns SparseTensor instance
+       */
+      public static create(properties?: onnx.TypeProto.ISparseTensor): onnx.TypeProto.SparseTensor;
+
+      /**
+       * Encodes the specified SparseTensor message. Does not implicitly {@link
+       * onnx.TypeProto.SparseTensor.verify|verify} messages.
+       * @param message SparseTensor message or plain object to encode
+       * @param [writer] Writer to encode to
+       * @returns Writer
+       */
+      public static encode(message: onnx.TypeProto.ISparseTensor, writer?: $protobuf.Writer): $protobuf.Writer;
+
+      /**
+       * Encodes the specified SparseTensor message, length delimited. Does not implicitly {@link
+       * onnx.TypeProto.SparseTensor.verify|verify} messages.
+       * @param message SparseTensor message or plain object to encode
+       * @param [writer] Writer to encode to
+       * @returns Writer
+       */
+      public static encodeDelimited(message: onnx.TypeProto.ISparseTensor, writer?: $protobuf.Writer): $protobuf.Writer;
+
+      /**
+       * Decodes a SparseTensor message from the specified reader or buffer.
+       * @param reader Reader or buffer to decode from
+       * @param [length] Message length if known beforehand
+       * @returns SparseTensor
+       * @throws {Error} If the payload is not a reader or valid buffer
+       * @throws {$protobuf.util.ProtocolError} If required fields are missing
+       */
+      public static decode(reader: ($protobuf.Reader|Uint8Array), length?: number): onnx.TypeProto.SparseTensor;
+
+      /**
+       * Decodes a SparseTensor message from the specified reader or buffer, length delimited.
+       * @param reader Reader or buffer to decode from
+       * @returns SparseTensor
+       * @throws {Error} If the payload is not a reader or valid buffer
+       * @throws {$protobuf.util.ProtocolError} If required fields are missing
+       */
+      public static decodeDelimited(reader: ($protobuf.Reader|Uint8Array)): onnx.TypeProto.SparseTensor;
+
+      /**
+       * Verifies a SparseTensor message.
+       * @param message Plain object to verify
+       * @returns `null` if valid, otherwise the reason why it is not
+       */
+      public static verify(message: {[k: string]: any}): (string|null);
+
+      /**
+       * Creates a SparseTensor message from a plain object. Also converts values to their respective internal types.
+       * @param object Plain object
+       * @returns SparseTensor
+       */
+      public static fromObject(object: {[k: string]: any}): onnx.TypeProto.SparseTensor;
+
+      /**
+       * Creates a plain object from a SparseTensor message. Also converts values to other types if specified.
+       * @param message SparseTensor
+       * @param [options] Conversion options
+       * @returns Plain object
+       */
+      public static toObject(message: onnx.TypeProto.SparseTensor, options?: $protobuf.IConversionOptions):
+          {[k: string]: any};
+
+      /**
+       * Converts this SparseTensor to JSON.
+       * @returns JSON object
+       */
+      public toJSON(): {[k: string]: any};
+
+      /**
+       * Gets the default type url for SparseTensor
+       * @param [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+       * @returns The default type url
+       */
+      public static getTypeUrl(typeUrlPrefix?: string): string;
+    }
+  }
+
+  /** Properties of an OperatorSetIdProto. */
+  interface IOperatorSetIdProto {
+    /** OperatorSetIdProto domain */
+    domain?: (string|null);
+
+    /** OperatorSetIdProto version */
+    version?: (number|Long|null);
+  }
+
+  /** Represents an OperatorSetIdProto. */
+  class OperatorSetIdProto implements IOperatorSetIdProto {
+    /**
+     * Constructs a new OperatorSetIdProto.
+     * @param [properties] Properties to set
+     */
+    constructor(properties?: onnx.IOperatorSetIdProto);
+
+    /** OperatorSetIdProto domain. */
+    public domain: string;
+
+    /** OperatorSetIdProto version. */
+    public version: (number|Long);
+
+    /**
+     * Creates a new OperatorSetIdProto instance using the specified properties.
+     * @param [properties] Properties to set
+     * @returns OperatorSetIdProto instance
+     */
+    public static create(properties?: onnx.IOperatorSetIdProto): onnx.OperatorSetIdProto;
+
+    /**
+     * Encodes the specified OperatorSetIdProto message. Does not implicitly {@link
+     * onnx.OperatorSetIdProto.verify|verify} messages.
+     * @param message OperatorSetIdProto message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encode(message: onnx.IOperatorSetIdProto, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Encodes the specified OperatorSetIdProto message, length delimited. Does not implicitly {@link
+     * onnx.OperatorSetIdProto.verify|verify} messages.
+     * @param message OperatorSetIdProto message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encodeDelimited(message: onnx.IOperatorSetIdProto, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Decodes an OperatorSetIdProto message from the specified reader or buffer.
+     * @param reader Reader or buffer to decode from
+     * @param [length] Message length if known beforehand
+     * @returns OperatorSetIdProto
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decode(reader: ($protobuf.Reader|Uint8Array), length?: number): onnx.OperatorSetIdProto;
+
+    /**
+     * Decodes an OperatorSetIdProto message from the specified reader or buffer, length delimited.
+     * @param reader Reader or buffer to decode from
+     * @returns OperatorSetIdProto
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decodeDelimited(reader: ($protobuf.Reader|Uint8Array)): onnx.OperatorSetIdProto;
+
+    /**
+     * Verifies an OperatorSetIdProto message.
+     * @param message Plain object to verify
+     * @returns `null` if valid, otherwise the reason why it is not
+     */
+    public static verify(message: {[k: string]: any}): (string|null);
+
+    /**
+     * Creates an OperatorSetIdProto message from a plain object. Also converts values to their respective internal
+     * types.
+     * @param object Plain object
+     * @returns OperatorSetIdProto
+     */
+    public static fromObject(object: {[k: string]: any}): onnx.OperatorSetIdProto;
+
+    /**
+     * Creates a plain object from an OperatorSetIdProto message. Also converts values to other types if specified.
+     * @param message OperatorSetIdProto
+     * @param [options] Conversion options
+     * @returns Plain object
+     */
+    public static toObject(message: onnx.OperatorSetIdProto, options?: $protobuf.IConversionOptions):
+        {[k: string]: any};
+
+    /**
+     * Converts this OperatorSetIdProto to JSON.
+     * @returns JSON object
+     */
+    public toJSON(): {[k: string]: any};
+
+    /**
+     * Gets the default type url for OperatorSetIdProto
+     * @param [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+     * @returns The default type url
+     */
+    public static getTypeUrl(typeUrlPrefix?: string): string;
+  }
+
+  /** OperatorStatus enum. */
+  enum OperatorStatus { EXPERIMENTAL = 0, STABLE = 1 }
+
+  /** Properties of a FunctionProto. */
+  interface IFunctionProto {
+    /** FunctionProto name */
+    name?: (string|null);
+
+    /** FunctionProto input */
+    input?: (string[]|null);
+
+    /** FunctionProto output */
+    output?: (string[]|null);
+
+    /** FunctionProto attribute */
+    attribute?: (string[]|null);
+
+    /** FunctionProto attributeProto */
+    attributeProto?: (onnx.IAttributeProto[]|null);
+
+    /** FunctionProto node */
+    node?: (onnx.INodeProto[]|null);
+
+    /** FunctionProto docString */
+    docString?: (string|null);
+
+    /** FunctionProto opsetImport */
+    opsetImport?: (onnx.IOperatorSetIdProto[]|null);
+
+    /** FunctionProto domain */
+    domain?: (string|null);
+  }
+
+  /** Represents a FunctionProto. */
+  class FunctionProto implements IFunctionProto {
+    /**
+     * Constructs a new FunctionProto.
+     * @param [properties] Properties to set
+     */
+    constructor(properties?: onnx.IFunctionProto);
+
+    /** FunctionProto name. */
+    public name: string;
+
+    /** FunctionProto input. */
+    public input: string[];
+
+    /** FunctionProto output. */
+    public output: string[];
+
+    /** FunctionProto attribute. */
+    public attribute: string[];
+
+    /** FunctionProto attributeProto. */
+    public attributeProto: onnx.IAttributeProto[];
+
+    /** FunctionProto node. */
+    public node: onnx.INodeProto[];
+
+    /** FunctionProto docString. */
+    public docString: string;
+
+    /** FunctionProto opsetImport. */
+    public opsetImport: onnx.IOperatorSetIdProto[];
+
+    /** FunctionProto domain. */
+    public domain: string;
+
+    /**
+     * Creates a new FunctionProto instance using the specified properties.
+     * @param [properties] Properties to set
+     * @returns FunctionProto instance
+     */
+    public static create(properties?: onnx.IFunctionProto): onnx.FunctionProto;
+
+    /**
+     * Encodes the specified FunctionProto message. Does not implicitly {@link onnx.FunctionProto.verify|verify}
+     * messages.
+     * @param message FunctionProto message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encode(message: onnx.IFunctionProto, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Encodes the specified FunctionProto message, length delimited. Does not implicitly {@link
+     * onnx.FunctionProto.verify|verify} messages.
+     * @param message FunctionProto message or plain object to encode
+     * @param [writer] Writer to encode to
+     * @returns Writer
+     */
+    public static encodeDelimited(message: onnx.IFunctionProto, writer?: $protobuf.Writer): $protobuf.Writer;
+
+    /**
+     * Decodes a FunctionProto message from the specified reader or buffer.
+     * @param reader Reader or buffer to decode from
+     * @param [length] Message length if known beforehand
+     * @returns FunctionProto
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decode(reader: ($protobuf.Reader|Uint8Array), length?: number): onnx.FunctionProto;
+
+    /**
+     * Decodes a FunctionProto message from the specified reader or buffer, length delimited.
+     * @param reader Reader or buffer to decode from
+     * @returns FunctionProto
+     * @throws {Error} If the payload is not a reader or valid buffer
+     * @throws {$protobuf.util.ProtocolError} If required fields are missing
+     */
+    public static decodeDelimited(reader: ($protobuf.Reader|Uint8Array)): onnx.FunctionProto;
+
+    /**
+     * Verifies a FunctionProto message.
+     * @param message Plain object to verify
+     * @returns `null` if valid, otherwise the reason why it is not
+     */
+    public static verify(message: {[k: string]: any}): (string|null);
+
+    /**
+     * Creates a FunctionProto message from a plain object. Also converts values to their respective internal types.
+     * @param object Plain object
+     * @returns FunctionProto
+     */
+    public static fromObject(object: {[k: string]: any}): onnx.FunctionProto;
+
+    /**
+     * Creates a plain object from a FunctionProto message. Also converts values to other types if specified.
+     * @param message FunctionProto
+     * @param [options] Conversion options
+     * @returns Plain object
+     */
+    public static toObject(message: onnx.FunctionProto, options?: $protobuf.IConversionOptions): {[k: string]: any};
+
+    /**
+     * Converts this FunctionProto to JSON.
+     * @returns JSON object
+     */
+    public toJSON(): {[k: string]: any};
+
+    /**
+     * Gets the default type url for FunctionProto
+     * @param [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+     * @returns The default type url
+     */
+    public static getTypeUrl(typeUrlPrefix?: string): string;
+  }
+}
diff --git a/js/node/test/ort-schema/protobuf/onnx.js b/js/node/test/ort-schema/protobuf/onnx.js
new file mode 100644
index 0000000000000..681855132d4e8
--- /dev/null
+++ b/js/node/test/ort-schema/protobuf/onnx.js
@@ -0,0 +1,7658 @@
+/*eslint-disable block-scoped-var, id-length, no-control-regex, no-magic-numbers, no-prototype-builtins, no-redeclare, no-shadow, no-var, sort-vars*/
+"use strict";
+
+var $protobuf = require("protobufjs/minimal");
+
+// Common aliases
+var $Reader = $protobuf.Reader, $Writer = $protobuf.Writer, $util = $protobuf.util;
+
+// Exported root namespace
+var $root = $protobuf.roots["default"] || ($protobuf.roots["default"] = {});
+
+$root.onnx = (function() {
+
+    /**
+     * Namespace onnx.
+     * @exports onnx
+     * @namespace
+     */
+    var onnx = {};
+
+    /**
+     * Version enum.
+     * @name onnx.Version
+     * @enum {number}
+     * @property {number} _START_VERSION=0 _START_VERSION value
+     * @property {number} IR_VERSION_2017_10_10=1 IR_VERSION_2017_10_10 value
+     * @property {number} IR_VERSION_2017_10_30=2 IR_VERSION_2017_10_30 value
+     * @property {number} IR_VERSION_2017_11_3=3 IR_VERSION_2017_11_3 value
+     * @property {number} IR_VERSION_2019_1_22=4 IR_VERSION_2019_1_22 value
+     * @property {number} IR_VERSION_2019_3_18=5 IR_VERSION_2019_3_18 value
+     * @property {number} IR_VERSION_2019_9_19=6 IR_VERSION_2019_9_19 value
+     * @property {number} IR_VERSION_2020_5_8=7 IR_VERSION_2020_5_8 value
+     * @property {number} IR_VERSION_2021_7_30=8 IR_VERSION_2021_7_30 value
+     * @property {number} IR_VERSION=9 IR_VERSION value
+     */
+    onnx.Version = (function() {
+        var valuesById = {}, values = Object.create(valuesById);
+        values[valuesById[0] = "_START_VERSION"] = 0;
+        values[valuesById[1] = "IR_VERSION_2017_10_10"] = 1;
+        values[valuesById[2] = "IR_VERSION_2017_10_30"] = 2;
+        values[valuesById[3] = "IR_VERSION_2017_11_3"] = 3;
+        values[valuesById[4] = "IR_VERSION_2019_1_22"] = 4;
+        values[valuesById[5] = "IR_VERSION_2019_3_18"] = 5;
+        values[valuesById[6] = "IR_VERSION_2019_9_19"] = 6;
+        values[valuesById[7] = "IR_VERSION_2020_5_8"] = 7;
+        values[valuesById[8] = "IR_VERSION_2021_7_30"] = 8;
+        values[valuesById[9] = "IR_VERSION"] = 9;
+        return values;
+    })();
+
+    onnx.AttributeProto = (function() {
+
+        /**
+         * Properties of an AttributeProto.
+         * @memberof onnx
+         * @interface IAttributeProto
+         * @property {string|null} [name] AttributeProto name
+         * @property {string|null} [refAttrName] AttributeProto refAttrName
+         * @property {string|null} [docString] AttributeProto docString
+         * @property {onnx.AttributeProto.AttributeType|null} [type] AttributeProto type
+         * @property {number|null} [f] AttributeProto f
+         * @property {number|Long|null} [i] AttributeProto i
+         * @property {Uint8Array|null} [s] AttributeProto s
+         * @property {onnx.ITensorProto|null} [t] AttributeProto t
+         * @property {onnx.IGraphProto|null} [g] AttributeProto g
+         * @property {onnx.ISparseTensorProto|null} [sparseTensor] AttributeProto sparseTensor
+         * @property {onnx.ITypeProto|null} [tp] AttributeProto tp
+         * @property {Array.<number>|null} [floats] AttributeProto floats
+         * @property {Array.<number|Long>|null} [ints] AttributeProto ints
+         * @property {Array.<Uint8Array>|null} [strings] AttributeProto strings
+         * @property {Array.<onnx.ITensorProto>|null} [tensors] AttributeProto tensors
+         * @property {Array.<onnx.IGraphProto>|null} [graphs] AttributeProto graphs
+         * @property {Array.<onnx.ISparseTensorProto>|null} [sparseTensors] AttributeProto sparseTensors
+         * @property {Array.<onnx.ITypeProto>|null} [typeProtos] AttributeProto typeProtos
+         */
+
+        /**
+         * Constructs a new AttributeProto.
+         * @memberof onnx
+         * @classdesc Represents an AttributeProto.
+         * @implements IAttributeProto
+         * @constructor
+         * @param {onnx.IAttributeProto=} [properties] Properties to set
+         */
+        function AttributeProto(properties) {
+            this.floats = [];
+            this.ints = [];
+            this.strings = [];
+            this.tensors = [];
+            this.graphs = [];
+            this.sparseTensors = [];
+            this.typeProtos = [];
+            if (properties)
+                for (var keys = Object.keys(properties), i = 0; i < keys.length; ++i)
+                    if (properties[keys[i]] != null)
+                        this[keys[i]] = properties[keys[i]];
+        }
+
+        /**
+         * AttributeProto name.
+         * @member {string} name
+         * @memberof onnx.AttributeProto
+         * @instance
+         */
+        AttributeProto.prototype.name = "";
+
+        /**
+         * AttributeProto refAttrName.
+         * @member {string} refAttrName
+         * @memberof onnx.AttributeProto
+         * @instance
+         */
+        AttributeProto.prototype.refAttrName = "";
+
+        /**
+         * AttributeProto docString.
+         * @member {string} docString
+         * @memberof onnx.AttributeProto
+         * @instance
+         */
+        AttributeProto.prototype.docString = "";
+
+        /**
+         * AttributeProto type.
+         * @member {onnx.AttributeProto.AttributeType} type
+         * @memberof onnx.AttributeProto
+         * @instance
+         */
+        AttributeProto.prototype.type = 0;
+
+        /**
+         * AttributeProto f.
+         * @member {number} f
+         * @memberof onnx.AttributeProto
+         * @instance
+         */
+        AttributeProto.prototype.f = 0;
+
+        /**
+         * AttributeProto i.
+         * @member {number|Long} i
+         * @memberof onnx.AttributeProto
+         * @instance
+         */
+        AttributeProto.prototype.i = $util.Long ? $util.Long.fromBits(0,0,false) : 0;
+
+        /**
+         * AttributeProto s.
+         * @member {Uint8Array} s
+         * @memberof onnx.AttributeProto
+         * @instance
+         */
+        AttributeProto.prototype.s = $util.newBuffer([]);
+
+        /**
+         * AttributeProto t.
+         * @member {onnx.ITensorProto|null|undefined} t
+         * @memberof onnx.AttributeProto
+         * @instance
+         */
+        AttributeProto.prototype.t = null;
+
+        /**
+         * AttributeProto g.
+         * @member {onnx.IGraphProto|null|undefined} g
+         * @memberof onnx.AttributeProto
+         * @instance
+         */
+        AttributeProto.prototype.g = null;
+
+        /**
+         * AttributeProto sparseTensor.
+         * @member {onnx.ISparseTensorProto|null|undefined} sparseTensor
+         * @memberof onnx.AttributeProto
+         * @instance
+         */
+        AttributeProto.prototype.sparseTensor = null;
+
+        /**
+         * AttributeProto tp.
+         * @member {onnx.ITypeProto|null|undefined} tp
+         * @memberof onnx.AttributeProto
+         * @instance
+         */
+        AttributeProto.prototype.tp = null;
+
+        /**
+         * AttributeProto floats.
+         * @member {Array.<number>} floats
+         * @memberof onnx.AttributeProto
+         * @instance
+         */
+        AttributeProto.prototype.floats = $util.emptyArray;
+
+        /**
+         * AttributeProto ints.
+         * @member {Array.<number|Long>} ints
+         * @memberof onnx.AttributeProto
+         * @instance
+         */
+        AttributeProto.prototype.ints = $util.emptyArray;
+
+        /**
+         * AttributeProto strings.
+         * @member {Array.<Uint8Array>} strings
+         * @memberof onnx.AttributeProto
+         * @instance
+         */
+        AttributeProto.prototype.strings = $util.emptyArray;
+
+        /**
+         * AttributeProto tensors.
+         * @member {Array.<onnx.ITensorProto>} tensors
+         * @memberof onnx.AttributeProto
+         * @instance
+         */
+        AttributeProto.prototype.tensors = $util.emptyArray;
+
+        /**
+         * AttributeProto graphs.
+         * @member {Array.<onnx.IGraphProto>} graphs
+         * @memberof onnx.AttributeProto
+         * @instance
+         */
+        AttributeProto.prototype.graphs = $util.emptyArray;
+
+        /**
+         * AttributeProto sparseTensors.
+         * @member {Array.<onnx.ISparseTensorProto>} sparseTensors
+         * @memberof onnx.AttributeProto
+         * @instance
+         */
+        AttributeProto.prototype.sparseTensors = $util.emptyArray;
+
+        /**
+         * AttributeProto typeProtos.
+         * @member {Array.<onnx.ITypeProto>} typeProtos
+         * @memberof onnx.AttributeProto
+         * @instance
+         */
+        AttributeProto.prototype.typeProtos = $util.emptyArray;
+
+        /**
+         * Creates a new AttributeProto instance using the specified properties.
+         * @function create
+         * @memberof onnx.AttributeProto
+         * @static
+         * @param {onnx.IAttributeProto=} [properties] Properties to set
+         * @returns {onnx.AttributeProto} AttributeProto instance
+         */
+        AttributeProto.create = function create(properties) {
+            return new AttributeProto(properties);
+        };
+
+        /**
+         * Encodes the specified AttributeProto message. Does not implicitly {@link onnx.AttributeProto.verify|verify} messages.
+         * @function encode
+         * @memberof onnx.AttributeProto
+         * @static
+         * @param {onnx.IAttributeProto} message AttributeProto message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        AttributeProto.encode = function encode(message, writer) {
+            if (!writer)
+                writer = $Writer.create();
+            if (message.name != null && Object.hasOwnProperty.call(message, "name"))
+                writer.uint32(/* id 1, wireType 2 =*/10).string(message.name);
+            if (message.f != null && Object.hasOwnProperty.call(message, "f"))
+                writer.uint32(/* id 2, wireType 5 =*/21).float(message.f);
+            if (message.i != null && Object.hasOwnProperty.call(message, "i"))
+                writer.uint32(/* id 3, wireType 0 =*/24).int64(message.i);
+            if (message.s != null && Object.hasOwnProperty.call(message, "s"))
+                writer.uint32(/* id 4, wireType 2 =*/34).bytes(message.s);
+            if (message.t != null && Object.hasOwnProperty.call(message, "t"))
+                $root.onnx.TensorProto.encode(message.t, writer.uint32(/* id 5, wireType 2 =*/42).fork()).ldelim();
+            if (message.g != null && Object.hasOwnProperty.call(message, "g"))
+                $root.onnx.GraphProto.encode(message.g, writer.uint32(/* id 6, wireType 2 =*/50).fork()).ldelim();
+            if (message.floats != null && message.floats.length) {
+                writer.uint32(/* id 7, wireType 2 =*/58).fork();
+                for (var i = 0; i < message.floats.length; ++i)
+                    writer.float(message.floats[i]);
+                writer.ldelim();
+            }
+            if (message.ints != null && message.ints.length) {
+                writer.uint32(/* id 8, wireType 2 =*/66).fork();
+                for (var i = 0; i < message.ints.length; ++i)
+                    writer.int64(message.ints[i]);
+                writer.ldelim();
+            }
+            if (message.strings != null && message.strings.length)
+                for (var i = 0; i < message.strings.length; ++i)
+                    writer.uint32(/* id 9, wireType 2 =*/74).bytes(message.strings[i]);
+            if (message.tensors != null && message.tensors.length)
+                for (var i = 0; i < message.tensors.length; ++i)
+                    $root.onnx.TensorProto.encode(message.tensors[i], writer.uint32(/* id 10, wireType 2 =*/82).fork()).ldelim();
+            if (message.graphs != null && message.graphs.length)
+                for (var i = 0; i < message.graphs.length; ++i)
+                    $root.onnx.GraphProto.encode(message.graphs[i], writer.uint32(/* id 11, wireType 2 =*/90).fork()).ldelim();
+            if (message.docString != null && Object.hasOwnProperty.call(message, "docString"))
+                writer.uint32(/* id 13, wireType 2 =*/106).string(message.docString);
+            if (message.tp != null && Object.hasOwnProperty.call(message, "tp"))
+                $root.onnx.TypeProto.encode(message.tp, writer.uint32(/* id 14, wireType 2 =*/114).fork()).ldelim();
+            if (message.typeProtos != null && message.typeProtos.length)
+                for (var i = 0; i < message.typeProtos.length; ++i)
+                    $root.onnx.TypeProto.encode(message.typeProtos[i], writer.uint32(/* id 15, wireType 2 =*/122).fork()).ldelim();
+            if (message.type != null && Object.hasOwnProperty.call(message, "type"))
+                writer.uint32(/* id 20, wireType 0 =*/160).int32(message.type);
+            if (message.refAttrName != null && Object.hasOwnProperty.call(message, "refAttrName"))
+                writer.uint32(/* id 21, wireType 2 =*/170).string(message.refAttrName);
+            if (message.sparseTensor != null && Object.hasOwnProperty.call(message, "sparseTensor"))
+                $root.onnx.SparseTensorProto.encode(message.sparseTensor, writer.uint32(/* id 22, wireType 2 =*/178).fork()).ldelim();
+            if (message.sparseTensors != null && message.sparseTensors.length)
+                for (var i = 0; i < message.sparseTensors.length; ++i)
+                    $root.onnx.SparseTensorProto.encode(message.sparseTensors[i], writer.uint32(/* id 23, wireType 2 =*/186).fork()).ldelim();
+            return writer;
+        };
+
+        /**
+         * Encodes the specified AttributeProto message, length delimited. Does not implicitly {@link onnx.AttributeProto.verify|verify} messages.
+         * @function encodeDelimited
+         * @memberof onnx.AttributeProto
+         * @static
+         * @param {onnx.IAttributeProto} message AttributeProto message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        AttributeProto.encodeDelimited = function encodeDelimited(message, writer) {
+            return this.encode(message, writer).ldelim();
+        };
+
+        /**
+         * Decodes an AttributeProto message from the specified reader or buffer.
+         * @function decode
+         * @memberof onnx.AttributeProto
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @param {number} [length] Message length if known beforehand
+         * @returns {onnx.AttributeProto} AttributeProto
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        AttributeProto.decode = function decode(reader, length) {
+            if (!(reader instanceof $Reader))
+                reader = $Reader.create(reader);
+            var end = length === undefined ? reader.len : reader.pos + length, message = new $root.onnx.AttributeProto();
+            while (reader.pos < end) {
+                var tag = reader.uint32();
+                switch (tag >>> 3) {
+                case 1: {
+                        message.name = reader.string();
+                        break;
+                    }
+                case 21: {
+                        message.refAttrName = reader.string();
+                        break;
+                    }
+                case 13: {
+                        message.docString = reader.string();
+                        break;
+                    }
+                case 20: {
+                        message.type = reader.int32();
+                        break;
+                    }
+                case 2: {
+                        message.f = reader.float();
+                        break;
+                    }
+                case 3: {
+                        message.i = reader.int64();
+                        break;
+                    }
+                case 4: {
+                        message.s = reader.bytes();
+                        break;
+                    }
+                case 5: {
+                        message.t = $root.onnx.TensorProto.decode(reader, reader.uint32());
+                        break;
+                    }
+                case 6: {
+                        message.g = $root.onnx.GraphProto.decode(reader, reader.uint32());
+                        break;
+                    }
+                case 22: {
+                        message.sparseTensor = $root.onnx.SparseTensorProto.decode(reader, reader.uint32());
+                        break;
+                    }
+                case 14: {
+                        message.tp = $root.onnx.TypeProto.decode(reader, reader.uint32());
+                        break;
+                    }
+                case 7: {
+                        if (!(message.floats && message.floats.length))
+                            message.floats = [];
+                        if ((tag & 7) === 2) {
+                            var end2 = reader.uint32() + reader.pos;
+                            while (reader.pos < end2)
+                                message.floats.push(reader.float());
+                        } else
+                            message.floats.push(reader.float());
+                        break;
+                    }
+                case 8: {
+                        if (!(message.ints && message.ints.length))
+                            message.ints = [];
+                        if ((tag & 7) === 2) {
+                            var end2 = reader.uint32() + reader.pos;
+                            while (reader.pos < end2)
+                                message.ints.push(reader.int64());
+                        } else
+                            message.ints.push(reader.int64());
+                        break;
+                    }
+                case 9: {
+                        if (!(message.strings && message.strings.length))
+                            message.strings = [];
+                        message.strings.push(reader.bytes());
+                        break;
+                    }
+                case 10: {
+                        if (!(message.tensors && message.tensors.length))
+                            message.tensors = [];
+                        message.tensors.push($root.onnx.TensorProto.decode(reader, reader.uint32()));
+                        break;
+                    }
+                case 11: {
+                        if (!(message.graphs && message.graphs.length))
+                            message.graphs = [];
+                        message.graphs.push($root.onnx.GraphProto.decode(reader, reader.uint32()));
+                        break;
+                    }
+                case 23: {
+                        if (!(message.sparseTensors && message.sparseTensors.length))
+                            message.sparseTensors = [];
+                        message.sparseTensors.push($root.onnx.SparseTensorProto.decode(reader, reader.uint32()));
+                        break;
+                    }
+                case 15: {
+                        if (!(message.typeProtos && message.typeProtos.length))
+                            message.typeProtos = [];
+                        message.typeProtos.push($root.onnx.TypeProto.decode(reader, reader.uint32()));
+                        break;
+                    }
+                default:
+                    reader.skipType(tag & 7);
+                    break;
+                }
+            }
+            return message;
+        };
+
+        /**
+         * Decodes an AttributeProto message from the specified reader or buffer, length delimited.
+         * @function decodeDelimited
+         * @memberof onnx.AttributeProto
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @returns {onnx.AttributeProto} AttributeProto
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        AttributeProto.decodeDelimited = function decodeDelimited(reader) {
+            if (!(reader instanceof $Reader))
+                reader = new $Reader(reader);
+            return this.decode(reader, reader.uint32());
+        };
+
+        /**
+         * Verifies an AttributeProto message.
+         * @function verify
+         * @memberof onnx.AttributeProto
+         * @static
+         * @param {Object.<string,*>} message Plain object to verify
+         * @returns {string|null} `null` if valid, otherwise the reason why it is not
+         */
+        AttributeProto.verify = function verify(message) {
+            if (typeof message !== "object" || message === null)
+                return "object expected";
+            if (message.name != null && message.hasOwnProperty("name"))
+                if (!$util.isString(message.name))
+                    return "name: string expected";
+            if (message.refAttrName != null && message.hasOwnProperty("refAttrName"))
+                if (!$util.isString(message.refAttrName))
+                    return "refAttrName: string expected";
+            if (message.docString != null && message.hasOwnProperty("docString"))
+                if (!$util.isString(message.docString))
+                    return "docString: string expected";
+            if (message.type != null && message.hasOwnProperty("type"))
+                switch (message.type) {
+                default:
+                    return "type: enum value expected";
+                case 0:
+                case 1:
+                case 2:
+                case 3:
+                case 4:
+                case 5:
+                case 11:
+                case 13:
+                case 6:
+                case 7:
+                case 8:
+                case 9:
+                case 10:
+                case 12:
+                case 14:
+                    break;
+                }
+            if (message.f != null && message.hasOwnProperty("f"))
+                if (typeof message.f !== "number")
+                    return "f: number expected";
+            if (message.i != null && message.hasOwnProperty("i"))
+                if (!$util.isInteger(message.i) && !(message.i && $util.isInteger(message.i.low) && $util.isInteger(message.i.high)))
+                    return "i: integer|Long expected";
+            if (message.s != null && message.hasOwnProperty("s"))
+                if (!(message.s && typeof message.s.length === "number" || $util.isString(message.s)))
+                    return "s: buffer expected";
+            if (message.t != null && message.hasOwnProperty("t")) {
+                var error = $root.onnx.TensorProto.verify(message.t);
+                if (error)
+                    return "t." + error;
+            }
+            if (message.g != null && message.hasOwnProperty("g")) {
+                var error = $root.onnx.GraphProto.verify(message.g);
+                if (error)
+                    return "g." + error;
+            }
+            if (message.sparseTensor != null && message.hasOwnProperty("sparseTensor")) {
+                var error = $root.onnx.SparseTensorProto.verify(message.sparseTensor);
+                if (error)
+                    return "sparseTensor." + error;
+            }
+            if (message.tp != null && message.hasOwnProperty("tp")) {
+                var error = $root.onnx.TypeProto.verify(message.tp);
+                if (error)
+                    return "tp." + error;
+            }
+            if (message.floats != null && message.hasOwnProperty("floats")) {
+                if (!Array.isArray(message.floats))
+                    return "floats: array expected";
+                for (var i = 0; i < message.floats.length; ++i)
+                    if (typeof message.floats[i] !== "number")
+                        return "floats: number[] expected";
+            }
+            if (message.ints != null && message.hasOwnProperty("ints")) {
+                if (!Array.isArray(message.ints))
+                    return "ints: array expected";
+                for (var i = 0; i < message.ints.length; ++i)
+                    if (!$util.isInteger(message.ints[i]) && !(message.ints[i] && $util.isInteger(message.ints[i].low) && $util.isInteger(message.ints[i].high)))
+                        return "ints: integer|Long[] expected";
+            }
+            if (message.strings != null && message.hasOwnProperty("strings")) {
+                if (!Array.isArray(message.strings))
+                    return "strings: array expected";
+                for (var i = 0; i < message.strings.length; ++i)
+                    if (!(message.strings[i] && typeof message.strings[i].length === "number" || $util.isString(message.strings[i])))
+                        return "strings: buffer[] expected";
+            }
+            if (message.tensors != null && message.hasOwnProperty("tensors")) {
+                if (!Array.isArray(message.tensors))
+                    return "tensors: array expected";
+                for (var i = 0; i < message.tensors.length; ++i) {
+                    var error = $root.onnx.TensorProto.verify(message.tensors[i]);
+                    if (error)
+                        return "tensors." + error;
+                }
+            }
+            if (message.graphs != null && message.hasOwnProperty("graphs")) {
+                if (!Array.isArray(message.graphs))
+                    return "graphs: array expected";
+                for (var i = 0; i < message.graphs.length; ++i) {
+                    var error = $root.onnx.GraphProto.verify(message.graphs[i]);
+                    if (error)
+                        return "graphs." + error;
+                }
+            }
+            if (message.sparseTensors != null && message.hasOwnProperty("sparseTensors")) {
+                if (!Array.isArray(message.sparseTensors))
+                    return "sparseTensors: array expected";
+                for (var i = 0; i < message.sparseTensors.length; ++i) {
+                    var error = $root.onnx.SparseTensorProto.verify(message.sparseTensors[i]);
+                    if (error)
+                        return "sparseTensors." + error;
+                }
+            }
+            if (message.typeProtos != null && message.hasOwnProperty("typeProtos")) {
+                if (!Array.isArray(message.typeProtos))
+                    return "typeProtos: array expected";
+                for (var i = 0; i < message.typeProtos.length; ++i) {
+                    var error = $root.onnx.TypeProto.verify(message.typeProtos[i]);
+                    if (error)
+                        return "typeProtos." + error;
+                }
+            }
+            return null;
+        };
+
+        /**
+         * Creates an AttributeProto message from a plain object. Also converts values to their respective internal types.
+         * @function fromObject
+         * @memberof onnx.AttributeProto
+         * @static
+         * @param {Object.<string,*>} object Plain object
+         * @returns {onnx.AttributeProto} AttributeProto
+         */
+        AttributeProto.fromObject = function fromObject(object) {
+            if (object instanceof $root.onnx.AttributeProto)
+                return object;
+            var message = new $root.onnx.AttributeProto();
+            if (object.name != null)
+                message.name = String(object.name);
+            if (object.refAttrName != null)
+                message.refAttrName = String(object.refAttrName);
+            if (object.docString != null)
+                message.docString = String(object.docString);
+            switch (object.type) {
+            default:
+                if (typeof object.type === "number") {
+                    message.type = object.type;
+                    break;
+                }
+                break;
+            case "UNDEFINED":
+            case 0:
+                message.type = 0;
+                break;
+            case "FLOAT":
+            case 1:
+                message.type = 1;
+                break;
+            case "INT":
+            case 2:
+                message.type = 2;
+                break;
+            case "STRING":
+            case 3:
+                message.type = 3;
+                break;
+            case "TENSOR":
+            case 4:
+                message.type = 4;
+                break;
+            case "GRAPH":
+            case 5:
+                message.type = 5;
+                break;
+            case "SPARSE_TENSOR":
+            case 11:
+                message.type = 11;
+                break;
+            case "TYPE_PROTO":
+            case 13:
+                message.type = 13;
+                break;
+            case "FLOATS":
+            case 6:
+                message.type = 6;
+                break;
+            case "INTS":
+            case 7:
+                message.type = 7;
+                break;
+            case "STRINGS":
+            case 8:
+                message.type = 8;
+                break;
+            case "TENSORS":
+            case 9:
+                message.type = 9;
+                break;
+            case "GRAPHS":
+            case 10:
+                message.type = 10;
+                break;
+            case "SPARSE_TENSORS":
+            case 12:
+                message.type = 12;
+                break;
+            case "TYPE_PROTOS":
+            case 14:
+                message.type = 14;
+                break;
+            }
+            if (object.f != null)
+                message.f = Number(object.f);
+            if (object.i != null)
+                if ($util.Long)
+                    (message.i = $util.Long.fromValue(object.i)).unsigned = false;
+                else if (typeof object.i === "string")
+                    message.i = parseInt(object.i, 10);
+                else if (typeof object.i === "number")
+                    message.i = object.i;
+                else if (typeof object.i === "object")
+                    message.i = new $util.LongBits(object.i.low >>> 0, object.i.high >>> 0).toNumber();
+            if (object.s != null)
+                if (typeof object.s === "string")
+                    $util.base64.decode(object.s, message.s = $util.newBuffer($util.base64.length(object.s)), 0);
+                else if (object.s.length >= 0)
+                    message.s = object.s;
+            if (object.t != null) {
+                if (typeof object.t !== "object")
+                    throw TypeError(".onnx.AttributeProto.t: object expected");
+                message.t = $root.onnx.TensorProto.fromObject(object.t);
+            }
+            if (object.g != null) {
+                if (typeof object.g !== "object")
+                    throw TypeError(".onnx.AttributeProto.g: object expected");
+                message.g = $root.onnx.GraphProto.fromObject(object.g);
+            }
+            if (object.sparseTensor != null) {
+                if (typeof object.sparseTensor !== "object")
+                    throw TypeError(".onnx.AttributeProto.sparseTensor: object expected");
+                message.sparseTensor = $root.onnx.SparseTensorProto.fromObject(object.sparseTensor);
+            }
+            if (object.tp != null) {
+                if (typeof object.tp !== "object")
+                    throw TypeError(".onnx.AttributeProto.tp: object expected");
+                message.tp = $root.onnx.TypeProto.fromObject(object.tp);
+            }
+            if (object.floats) {
+                if (!Array.isArray(object.floats))
+                    throw TypeError(".onnx.AttributeProto.floats: array expected");
+                message.floats = [];
+                for (var i = 0; i < object.floats.length; ++i)
+                    message.floats[i] = Number(object.floats[i]);
+            }
+            if (object.ints) {
+                if (!Array.isArray(object.ints))
+                    throw TypeError(".onnx.AttributeProto.ints: array expected");
+                message.ints = [];
+                for (var i = 0; i < object.ints.length; ++i)
+                    if ($util.Long)
+                        (message.ints[i] = $util.Long.fromValue(object.ints[i])).unsigned = false;
+                    else if (typeof object.ints[i] === "string")
+                        message.ints[i] = parseInt(object.ints[i], 10);
+                    else if (typeof object.ints[i] === "number")
+                        message.ints[i] = object.ints[i];
+                    else if (typeof object.ints[i] === "object")
+                        message.ints[i] = new $util.LongBits(object.ints[i].low >>> 0, object.ints[i].high >>> 0).toNumber();
+            }
+            if (object.strings) {
+                if (!Array.isArray(object.strings))
+                    throw TypeError(".onnx.AttributeProto.strings: array expected");
+                message.strings = [];
+                for (var i = 0; i < object.strings.length; ++i)
+                    if (typeof object.strings[i] === "string")
+                        $util.base64.decode(object.strings[i], message.strings[i] = $util.newBuffer($util.base64.length(object.strings[i])), 0);
+                    else if (object.strings[i].length >= 0)
+                        message.strings[i] = object.strings[i];
+            }
+            if (object.tensors) {
+                if (!Array.isArray(object.tensors))
+                    throw TypeError(".onnx.AttributeProto.tensors: array expected");
+                message.tensors = [];
+                for (var i = 0; i < object.tensors.length; ++i) {
+                    if (typeof object.tensors[i] !== "object")
+                        throw TypeError(".onnx.AttributeProto.tensors: object expected");
+                    message.tensors[i] = $root.onnx.TensorProto.fromObject(object.tensors[i]);
+                }
+            }
+            if (object.graphs) {
+                if (!Array.isArray(object.graphs))
+                    throw TypeError(".onnx.AttributeProto.graphs: array expected");
+                message.graphs = [];
+                for (var i = 0; i < object.graphs.length; ++i) {
+                    if (typeof object.graphs[i] !== "object")
+                        throw TypeError(".onnx.AttributeProto.graphs: object expected");
+                    message.graphs[i] = $root.onnx.GraphProto.fromObject(object.graphs[i]);
+                }
+            }
+            if (object.sparseTensors) {
+                if (!Array.isArray(object.sparseTensors))
+                    throw TypeError(".onnx.AttributeProto.sparseTensors: array expected");
+                message.sparseTensors = [];
+                for (var i = 0; i < object.sparseTensors.length; ++i) {
+                    if (typeof object.sparseTensors[i] !== "object")
+                        throw TypeError(".onnx.AttributeProto.sparseTensors: object expected");
+                    message.sparseTensors[i] = $root.onnx.SparseTensorProto.fromObject(object.sparseTensors[i]);
+                }
+            }
+            if (object.typeProtos) {
+                if (!Array.isArray(object.typeProtos))
+                    throw TypeError(".onnx.AttributeProto.typeProtos: array expected");
+                message.typeProtos = [];
+                for (var i = 0; i < object.typeProtos.length; ++i) {
+                    if (typeof object.typeProtos[i] !== "object")
+                        throw TypeError(".onnx.AttributeProto.typeProtos: object expected");
+                    message.typeProtos[i] = $root.onnx.TypeProto.fromObject(object.typeProtos[i]);
+                }
+            }
+            return message;
+        };
+
+        /**
+         * Creates a plain object from an AttributeProto message. Also converts values to other types if specified.
+         * @function toObject
+         * @memberof onnx.AttributeProto
+         * @static
+         * @param {onnx.AttributeProto} message AttributeProto
+         * @param {$protobuf.IConversionOptions} [options] Conversion options
+         * @returns {Object.<string,*>} Plain object
+         */
+        AttributeProto.toObject = function toObject(message, options) {
+            if (!options)
+                options = {};
+            var object = {};
+            if (options.arrays || options.defaults) {
+                object.floats = [];
+                object.ints = [];
+                object.strings = [];
+                object.tensors = [];
+                object.graphs = [];
+                object.typeProtos = [];
+                object.sparseTensors = [];
+            }
+            if (options.defaults) {
+                object.name = "";
+                object.f = 0;
+                if ($util.Long) {
+                    var long = new $util.Long(0, 0, false);
+                    object.i = options.longs === String ? long.toString() : options.longs === Number ? long.toNumber() : long;
+                } else
+                    object.i = options.longs === String ? "0" : 0;
+                if (options.bytes === String)
+                    object.s = "";
+                else {
+                    object.s = [];
+                    if (options.bytes !== Array)
+                        object.s = $util.newBuffer(object.s);
+                }
+                object.t = null;
+                object.g = null;
+                object.docString = "";
+                object.tp = null;
+                object.type = options.enums === String ? "UNDEFINED" : 0;
+                object.refAttrName = "";
+                object.sparseTensor = null;
+            }
+            if (message.name != null && message.hasOwnProperty("name"))
+                object.name = message.name;
+            if (message.f != null && message.hasOwnProperty("f"))
+                object.f = options.json && !isFinite(message.f) ? String(message.f) : message.f;
+            if (message.i != null && message.hasOwnProperty("i"))
+                if (typeof message.i === "number")
+                    object.i = options.longs === String ? String(message.i) : message.i;
+                else
+                    object.i = options.longs === String ? $util.Long.prototype.toString.call(message.i) : options.longs === Number ? new $util.LongBits(message.i.low >>> 0, message.i.high >>> 0).toNumber() : message.i;
+            if (message.s != null && message.hasOwnProperty("s"))
+                object.s = options.bytes === String ? $util.base64.encode(message.s, 0, message.s.length) : options.bytes === Array ? Array.prototype.slice.call(message.s) : message.s;
+            if (message.t != null && message.hasOwnProperty("t"))
+                object.t = $root.onnx.TensorProto.toObject(message.t, options);
+            if (message.g != null && message.hasOwnProperty("g"))
+                object.g = $root.onnx.GraphProto.toObject(message.g, options);
+            if (message.floats && message.floats.length) {
+                object.floats = [];
+                for (var j = 0; j < message.floats.length; ++j)
+                    object.floats[j] = options.json && !isFinite(message.floats[j]) ? String(message.floats[j]) : message.floats[j];
+            }
+            if (message.ints && message.ints.length) {
+                object.ints = [];
+                for (var j = 0; j < message.ints.length; ++j)
+                    if (typeof message.ints[j] === "number")
+                        object.ints[j] = options.longs === String ? String(message.ints[j]) : message.ints[j];
+                    else
+                        object.ints[j] = options.longs === String ? $util.Long.prototype.toString.call(message.ints[j]) : options.longs === Number ? new $util.LongBits(message.ints[j].low >>> 0, message.ints[j].high >>> 0).toNumber() : message.ints[j];
+            }
+            if (message.strings && message.strings.length) {
+                object.strings = [];
+                for (var j = 0; j < message.strings.length; ++j)
+                    object.strings[j] = options.bytes === String ? $util.base64.encode(message.strings[j], 0, message.strings[j].length) : options.bytes === Array ? Array.prototype.slice.call(message.strings[j]) : message.strings[j];
+            }
+            if (message.tensors && message.tensors.length) {
+                object.tensors = [];
+                for (var j = 0; j < message.tensors.length; ++j)
+                    object.tensors[j] = $root.onnx.TensorProto.toObject(message.tensors[j], options);
+            }
+            if (message.graphs && message.graphs.length) {
+                object.graphs = [];
+                for (var j = 0; j < message.graphs.length; ++j)
+                    object.graphs[j] = $root.onnx.GraphProto.toObject(message.graphs[j], options);
+            }
+            if (message.docString != null && message.hasOwnProperty("docString"))
+                object.docString = message.docString;
+            if (message.tp != null && message.hasOwnProperty("tp"))
+                object.tp = $root.onnx.TypeProto.toObject(message.tp, options);
+            if (message.typeProtos && message.typeProtos.length) {
+                object.typeProtos = [];
+                for (var j = 0; j < message.typeProtos.length; ++j)
+                    object.typeProtos[j] = $root.onnx.TypeProto.toObject(message.typeProtos[j], options);
+            }
+            if (message.type != null && message.hasOwnProperty("type"))
+                object.type = options.enums === String ? $root.onnx.AttributeProto.AttributeType[message.type] === undefined ? message.type : $root.onnx.AttributeProto.AttributeType[message.type] : message.type;
+            if (message.refAttrName != null && message.hasOwnProperty("refAttrName"))
+                object.refAttrName = message.refAttrName;
+            if (message.sparseTensor != null && message.hasOwnProperty("sparseTensor"))
+                object.sparseTensor = $root.onnx.SparseTensorProto.toObject(message.sparseTensor, options);
+            if (message.sparseTensors && message.sparseTensors.length) {
+                object.sparseTensors = [];
+                for (var j = 0; j < message.sparseTensors.length; ++j)
+                    object.sparseTensors[j] = $root.onnx.SparseTensorProto.toObject(message.sparseTensors[j], options);
+            }
+            return object;
+        };
+
+        /**
+         * Converts this AttributeProto to JSON.
+         * @function toJSON
+         * @memberof onnx.AttributeProto
+         * @instance
+         * @returns {Object.<string,*>} JSON object
+         */
+        AttributeProto.prototype.toJSON = function toJSON() {
+            return this.constructor.toObject(this, $protobuf.util.toJSONOptions);
+        };
+
+        /**
+         * Gets the default type url for AttributeProto
+         * @function getTypeUrl
+         * @memberof onnx.AttributeProto
+         * @static
+         * @param {string} [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+         * @returns {string} The default type url
+         */
+        AttributeProto.getTypeUrl = function getTypeUrl(typeUrlPrefix) {
+            if (typeUrlPrefix === undefined) {
+                typeUrlPrefix = "type.googleapis.com";
+            }
+            return typeUrlPrefix + "/onnx.AttributeProto";
+        };
+
+        /**
+         * AttributeType enum.
+         * @name onnx.AttributeProto.AttributeType
+         * @enum {number}
+         * @property {number} UNDEFINED=0 UNDEFINED value
+         * @property {number} FLOAT=1 FLOAT value
+         * @property {number} INT=2 INT value
+         * @property {number} STRING=3 STRING value
+         * @property {number} TENSOR=4 TENSOR value
+         * @property {number} GRAPH=5 GRAPH value
+         * @property {number} SPARSE_TENSOR=11 SPARSE_TENSOR value
+         * @property {number} TYPE_PROTO=13 TYPE_PROTO value
+         * @property {number} FLOATS=6 FLOATS value
+         * @property {number} INTS=7 INTS value
+         * @property {number} STRINGS=8 STRINGS value
+         * @property {number} TENSORS=9 TENSORS value
+         * @property {number} GRAPHS=10 GRAPHS value
+         * @property {number} SPARSE_TENSORS=12 SPARSE_TENSORS value
+         * @property {number} TYPE_PROTOS=14 TYPE_PROTOS value
+         */
+        AttributeProto.AttributeType = (function() {
+            var valuesById = {}, values = Object.create(valuesById);
+            values[valuesById[0] = "UNDEFINED"] = 0;
+            values[valuesById[1] = "FLOAT"] = 1;
+            values[valuesById[2] = "INT"] = 2;
+            values[valuesById[3] = "STRING"] = 3;
+            values[valuesById[4] = "TENSOR"] = 4;
+            values[valuesById[5] = "GRAPH"] = 5;
+            values[valuesById[11] = "SPARSE_TENSOR"] = 11;
+            values[valuesById[13] = "TYPE_PROTO"] = 13;
+            values[valuesById[6] = "FLOATS"] = 6;
+            values[valuesById[7] = "INTS"] = 7;
+            values[valuesById[8] = "STRINGS"] = 8;
+            values[valuesById[9] = "TENSORS"] = 9;
+            values[valuesById[10] = "GRAPHS"] = 10;
+            values[valuesById[12] = "SPARSE_TENSORS"] = 12;
+            values[valuesById[14] = "TYPE_PROTOS"] = 14;
+            return values;
+        })();
+
+        return AttributeProto;
+    })();
+
+    onnx.ValueInfoProto = (function() {
+
+        /**
+         * Properties of a ValueInfoProto.
+         * @memberof onnx
+         * @interface IValueInfoProto
+         * @property {string|null} [name] ValueInfoProto name
+         * @property {onnx.ITypeProto|null} [type] ValueInfoProto type
+         * @property {string|null} [docString] ValueInfoProto docString
+         */
+
+        /**
+         * Constructs a new ValueInfoProto.
+         * @memberof onnx
+         * @classdesc Represents a ValueInfoProto.
+         * @implements IValueInfoProto
+         * @constructor
+         * @param {onnx.IValueInfoProto=} [properties] Properties to set
+         */
+        function ValueInfoProto(properties) {
+            if (properties)
+                for (var keys = Object.keys(properties), i = 0; i < keys.length; ++i)
+                    if (properties[keys[i]] != null)
+                        this[keys[i]] = properties[keys[i]];
+        }
+
+        /**
+         * ValueInfoProto name.
+         * @member {string} name
+         * @memberof onnx.ValueInfoProto
+         * @instance
+         */
+        ValueInfoProto.prototype.name = "";
+
+        /**
+         * ValueInfoProto type.
+         * @member {onnx.ITypeProto|null|undefined} type
+         * @memberof onnx.ValueInfoProto
+         * @instance
+         */
+        ValueInfoProto.prototype.type = null;
+
+        /**
+         * ValueInfoProto docString.
+         * @member {string} docString
+         * @memberof onnx.ValueInfoProto
+         * @instance
+         */
+        ValueInfoProto.prototype.docString = "";
+
+        /**
+         * Creates a new ValueInfoProto instance using the specified properties.
+         * @function create
+         * @memberof onnx.ValueInfoProto
+         * @static
+         * @param {onnx.IValueInfoProto=} [properties] Properties to set
+         * @returns {onnx.ValueInfoProto} ValueInfoProto instance
+         */
+        ValueInfoProto.create = function create(properties) {
+            return new ValueInfoProto(properties);
+        };
+
+        /**
+         * Encodes the specified ValueInfoProto message. Does not implicitly {@link onnx.ValueInfoProto.verify|verify} messages.
+         * @function encode
+         * @memberof onnx.ValueInfoProto
+         * @static
+         * @param {onnx.IValueInfoProto} message ValueInfoProto message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        ValueInfoProto.encode = function encode(message, writer) {
+            if (!writer)
+                writer = $Writer.create();
+            if (message.name != null && Object.hasOwnProperty.call(message, "name"))
+                writer.uint32(/* id 1, wireType 2 =*/10).string(message.name);
+            if (message.type != null && Object.hasOwnProperty.call(message, "type"))
+                $root.onnx.TypeProto.encode(message.type, writer.uint32(/* id 2, wireType 2 =*/18).fork()).ldelim();
+            if (message.docString != null && Object.hasOwnProperty.call(message, "docString"))
+                writer.uint32(/* id 3, wireType 2 =*/26).string(message.docString);
+            return writer;
+        };
+
+        /**
+         * Encodes the specified ValueInfoProto message, length delimited. Does not implicitly {@link onnx.ValueInfoProto.verify|verify} messages.
+         * @function encodeDelimited
+         * @memberof onnx.ValueInfoProto
+         * @static
+         * @param {onnx.IValueInfoProto} message ValueInfoProto message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        ValueInfoProto.encodeDelimited = function encodeDelimited(message, writer) {
+            return this.encode(message, writer).ldelim();
+        };
+
+        /**
+         * Decodes a ValueInfoProto message from the specified reader or buffer.
+         * @function decode
+         * @memberof onnx.ValueInfoProto
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @param {number} [length] Message length if known beforehand
+         * @returns {onnx.ValueInfoProto} ValueInfoProto
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        ValueInfoProto.decode = function decode(reader, length) {
+            if (!(reader instanceof $Reader))
+                reader = $Reader.create(reader);
+            var end = length === undefined ? reader.len : reader.pos + length, message = new $root.onnx.ValueInfoProto();
+            while (reader.pos < end) {
+                var tag = reader.uint32();
+                switch (tag >>> 3) {
+                case 1: {
+                        message.name = reader.string();
+                        break;
+                    }
+                case 2: {
+                        message.type = $root.onnx.TypeProto.decode(reader, reader.uint32());
+                        break;
+                    }
+                case 3: {
+                        message.docString = reader.string();
+                        break;
+                    }
+                default:
+                    reader.skipType(tag & 7);
+                    break;
+                }
+            }
+            return message;
+        };
+
+        /**
+         * Decodes a ValueInfoProto message from the specified reader or buffer, length delimited.
+         * @function decodeDelimited
+         * @memberof onnx.ValueInfoProto
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @returns {onnx.ValueInfoProto} ValueInfoProto
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        ValueInfoProto.decodeDelimited = function decodeDelimited(reader) {
+            if (!(reader instanceof $Reader))
+                reader = new $Reader(reader);
+            return this.decode(reader, reader.uint32());
+        };
+
+        /**
+         * Verifies a ValueInfoProto message.
+         * @function verify
+         * @memberof onnx.ValueInfoProto
+         * @static
+         * @param {Object.<string,*>} message Plain object to verify
+         * @returns {string|null} `null` if valid, otherwise the reason why it is not
+         */
+        ValueInfoProto.verify = function verify(message) {
+            if (typeof message !== "object" || message === null)
+                return "object expected";
+            if (message.name != null && message.hasOwnProperty("name"))
+                if (!$util.isString(message.name))
+                    return "name: string expected";
+            if (message.type != null && message.hasOwnProperty("type")) {
+                var error = $root.onnx.TypeProto.verify(message.type);
+                if (error)
+                    return "type." + error;
+            }
+            if (message.docString != null && message.hasOwnProperty("docString"))
+                if (!$util.isString(message.docString))
+                    return "docString: string expected";
+            return null;
+        };
+
+        /**
+         * Creates a ValueInfoProto message from a plain object. Also converts values to their respective internal types.
+         * @function fromObject
+         * @memberof onnx.ValueInfoProto
+         * @static
+         * @param {Object.<string,*>} object Plain object
+         * @returns {onnx.ValueInfoProto} ValueInfoProto
+         */
+        ValueInfoProto.fromObject = function fromObject(object) {
+            if (object instanceof $root.onnx.ValueInfoProto)
+                return object;
+            var message = new $root.onnx.ValueInfoProto();
+            if (object.name != null)
+                message.name = String(object.name);
+            if (object.type != null) {
+                if (typeof object.type !== "object")
+                    throw TypeError(".onnx.ValueInfoProto.type: object expected");
+                message.type = $root.onnx.TypeProto.fromObject(object.type);
+            }
+            if (object.docString != null)
+                message.docString = String(object.docString);
+            return message;
+        };
+
+        /**
+         * Creates a plain object from a ValueInfoProto message. Also converts values to other types if specified.
+         * @function toObject
+         * @memberof onnx.ValueInfoProto
+         * @static
+         * @param {onnx.ValueInfoProto} message ValueInfoProto
+         * @param {$protobuf.IConversionOptions} [options] Conversion options
+         * @returns {Object.<string,*>} Plain object
+         */
+        ValueInfoProto.toObject = function toObject(message, options) {
+            if (!options)
+                options = {};
+            var object = {};
+            if (options.defaults) {
+                object.name = "";
+                object.type = null;
+                object.docString = "";
+            }
+            if (message.name != null && message.hasOwnProperty("name"))
+                object.name = message.name;
+            if (message.type != null && message.hasOwnProperty("type"))
+                object.type = $root.onnx.TypeProto.toObject(message.type, options);
+            if (message.docString != null && message.hasOwnProperty("docString"))
+                object.docString = message.docString;
+            return object;
+        };
+
+        /**
+         * Converts this ValueInfoProto to JSON.
+         * @function toJSON
+         * @memberof onnx.ValueInfoProto
+         * @instance
+         * @returns {Object.<string,*>} JSON object
+         */
+        ValueInfoProto.prototype.toJSON = function toJSON() {
+            return this.constructor.toObject(this, $protobuf.util.toJSONOptions);
+        };
+
+        /**
+         * Gets the default type url for ValueInfoProto
+         * @function getTypeUrl
+         * @memberof onnx.ValueInfoProto
+         * @static
+         * @param {string} [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+         * @returns {string} The default type url
+         */
+        ValueInfoProto.getTypeUrl = function getTypeUrl(typeUrlPrefix) {
+            if (typeUrlPrefix === undefined) {
+                typeUrlPrefix = "type.googleapis.com";
+            }
+            return typeUrlPrefix + "/onnx.ValueInfoProto";
+        };
+
+        return ValueInfoProto;
+    })();
+
+    onnx.NodeProto = (function() {
+
+        /**
+         * Properties of a NodeProto.
+         * @memberof onnx
+         * @interface INodeProto
+         * @property {Array.<string>|null} [input] NodeProto input
+         * @property {Array.<string>|null} [output] NodeProto output
+         * @property {string|null} [name] NodeProto name
+         * @property {string|null} [opType] NodeProto opType
+         * @property {string|null} [domain] NodeProto domain
+         * @property {Array.<onnx.IAttributeProto>|null} [attribute] NodeProto attribute
+         * @property {string|null} [docString] NodeProto docString
+         */
+
+        /**
+         * Constructs a new NodeProto.
+         * @memberof onnx
+         * @classdesc Represents a NodeProto.
+         * @implements INodeProto
+         * @constructor
+         * @param {onnx.INodeProto=} [properties] Properties to set
+         */
+        function NodeProto(properties) {
+            this.input = [];
+            this.output = [];
+            this.attribute = [];
+            if (properties)
+                for (var keys = Object.keys(properties), i = 0; i < keys.length; ++i)
+                    if (properties[keys[i]] != null)
+                        this[keys[i]] = properties[keys[i]];
+        }
+
+        /**
+         * NodeProto input.
+         * @member {Array.<string>} input
+         * @memberof onnx.NodeProto
+         * @instance
+         */
+        NodeProto.prototype.input = $util.emptyArray;
+
+        /**
+         * NodeProto output.
+         * @member {Array.<string>} output
+         * @memberof onnx.NodeProto
+         * @instance
+         */
+        NodeProto.prototype.output = $util.emptyArray;
+
+        /**
+         * NodeProto name.
+         * @member {string} name
+         * @memberof onnx.NodeProto
+         * @instance
+         */
+        NodeProto.prototype.name = "";
+
+        /**
+         * NodeProto opType.
+         * @member {string} opType
+         * @memberof onnx.NodeProto
+         * @instance
+         */
+        NodeProto.prototype.opType = "";
+
+        /**
+         * NodeProto domain.
+         * @member {string} domain
+         * @memberof onnx.NodeProto
+         * @instance
+         */
+        NodeProto.prototype.domain = "";
+
+        /**
+         * NodeProto attribute.
+         * @member {Array.<onnx.IAttributeProto>} attribute
+         * @memberof onnx.NodeProto
+         * @instance
+         */
+        NodeProto.prototype.attribute = $util.emptyArray;
+
+        /**
+         * NodeProto docString.
+         * @member {string} docString
+         * @memberof onnx.NodeProto
+         * @instance
+         */
+        NodeProto.prototype.docString = "";
+
+        /**
+         * Creates a new NodeProto instance using the specified properties.
+         * @function create
+         * @memberof onnx.NodeProto
+         * @static
+         * @param {onnx.INodeProto=} [properties] Properties to set
+         * @returns {onnx.NodeProto} NodeProto instance
+         */
+        NodeProto.create = function create(properties) {
+            return new NodeProto(properties);
+        };
+
+        /**
+         * Encodes the specified NodeProto message. Does not implicitly {@link onnx.NodeProto.verify|verify} messages.
+         * @function encode
+         * @memberof onnx.NodeProto
+         * @static
+         * @param {onnx.INodeProto} message NodeProto message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        NodeProto.encode = function encode(message, writer) {
+            if (!writer)
+                writer = $Writer.create();
+            if (message.input != null && message.input.length)
+                for (var i = 0; i < message.input.length; ++i)
+                    writer.uint32(/* id 1, wireType 2 =*/10).string(message.input[i]);
+            if (message.output != null && message.output.length)
+                for (var i = 0; i < message.output.length; ++i)
+                    writer.uint32(/* id 2, wireType 2 =*/18).string(message.output[i]);
+            if (message.name != null && Object.hasOwnProperty.call(message, "name"))
+                writer.uint32(/* id 3, wireType 2 =*/26).string(message.name);
+            if (message.opType != null && Object.hasOwnProperty.call(message, "opType"))
+                writer.uint32(/* id 4, wireType 2 =*/34).string(message.opType);
+            if (message.attribute != null && message.attribute.length)
+                for (var i = 0; i < message.attribute.length; ++i)
+                    $root.onnx.AttributeProto.encode(message.attribute[i], writer.uint32(/* id 5, wireType 2 =*/42).fork()).ldelim();
+            if (message.docString != null && Object.hasOwnProperty.call(message, "docString"))
+                writer.uint32(/* id 6, wireType 2 =*/50).string(message.docString);
+            if (message.domain != null && Object.hasOwnProperty.call(message, "domain"))
+                writer.uint32(/* id 7, wireType 2 =*/58).string(message.domain);
+            return writer;
+        };
+
+        /**
+         * Encodes the specified NodeProto message, length delimited. Does not implicitly {@link onnx.NodeProto.verify|verify} messages.
+         * @function encodeDelimited
+         * @memberof onnx.NodeProto
+         * @static
+         * @param {onnx.INodeProto} message NodeProto message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        NodeProto.encodeDelimited = function encodeDelimited(message, writer) {
+            return this.encode(message, writer).ldelim();
+        };
+
+        /**
+         * Decodes a NodeProto message from the specified reader or buffer.
+         * @function decode
+         * @memberof onnx.NodeProto
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @param {number} [length] Message length if known beforehand
+         * @returns {onnx.NodeProto} NodeProto
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        NodeProto.decode = function decode(reader, length) {
+            if (!(reader instanceof $Reader))
+                reader = $Reader.create(reader);
+            var end = length === undefined ? reader.len : reader.pos + length, message = new $root.onnx.NodeProto();
+            while (reader.pos < end) {
+                var tag = reader.uint32();
+                switch (tag >>> 3) {
+                case 1: {
+                        if (!(message.input && message.input.length))
+                            message.input = [];
+                        message.input.push(reader.string());
+                        break;
+                    }
+                case 2: {
+                        if (!(message.output && message.output.length))
+                            message.output = [];
+                        message.output.push(reader.string());
+                        break;
+                    }
+                case 3: {
+                        message.name = reader.string();
+                        break;
+                    }
+                case 4: {
+                        message.opType = reader.string();
+                        break;
+                    }
+                case 7: {
+                        message.domain = reader.string();
+                        break;
+                    }
+                case 5: {
+                        if (!(message.attribute && message.attribute.length))
+                            message.attribute = [];
+                        message.attribute.push($root.onnx.AttributeProto.decode(reader, reader.uint32()));
+                        break;
+                    }
+                case 6: {
+                        message.docString = reader.string();
+                        break;
+                    }
+                default:
+                    reader.skipType(tag & 7);
+                    break;
+                }
+            }
+            return message;
+        };
+
+        /**
+         * Decodes a NodeProto message from the specified reader or buffer, length delimited.
+         * @function decodeDelimited
+         * @memberof onnx.NodeProto
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @returns {onnx.NodeProto} NodeProto
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        NodeProto.decodeDelimited = function decodeDelimited(reader) {
+            if (!(reader instanceof $Reader))
+                reader = new $Reader(reader);
+            return this.decode(reader, reader.uint32());
+        };
+
+        /**
+         * Verifies a NodeProto message.
+         * @function verify
+         * @memberof onnx.NodeProto
+         * @static
+         * @param {Object.<string,*>} message Plain object to verify
+         * @returns {string|null} `null` if valid, otherwise the reason why it is not
+         */
+        NodeProto.verify = function verify(message) {
+            if (typeof message !== "object" || message === null)
+                return "object expected";
+            if (message.input != null && message.hasOwnProperty("input")) {
+                if (!Array.isArray(message.input))
+                    return "input: array expected";
+                for (var i = 0; i < message.input.length; ++i)
+                    if (!$util.isString(message.input[i]))
+                        return "input: string[] expected";
+            }
+            if (message.output != null && message.hasOwnProperty("output")) {
+                if (!Array.isArray(message.output))
+                    return "output: array expected";
+                for (var i = 0; i < message.output.length; ++i)
+                    if (!$util.isString(message.output[i]))
+                        return "output: string[] expected";
+            }
+            if (message.name != null && message.hasOwnProperty("name"))
+                if (!$util.isString(message.name))
+                    return "name: string expected";
+            if (message.opType != null && message.hasOwnProperty("opType"))
+                if (!$util.isString(message.opType))
+                    return "opType: string expected";
+            if (message.domain != null && message.hasOwnProperty("domain"))
+                if (!$util.isString(message.domain))
+                    return "domain: string expected";
+            if (message.attribute != null && message.hasOwnProperty("attribute")) {
+                if (!Array.isArray(message.attribute))
+                    return "attribute: array expected";
+                for (var i = 0; i < message.attribute.length; ++i) {
+                    var error = $root.onnx.AttributeProto.verify(message.attribute[i]);
+                    if (error)
+                        return "attribute." + error;
+                }
+            }
+            if (message.docString != null && message.hasOwnProperty("docString"))
+                if (!$util.isString(message.docString))
+                    return "docString: string expected";
+            return null;
+        };
+
+        /**
+         * Creates a NodeProto message from a plain object. Also converts values to their respective internal types.
+         * @function fromObject
+         * @memberof onnx.NodeProto
+         * @static
+         * @param {Object.<string,*>} object Plain object
+         * @returns {onnx.NodeProto} NodeProto
+         */
+        NodeProto.fromObject = function fromObject(object) {
+            if (object instanceof $root.onnx.NodeProto)
+                return object;
+            var message = new $root.onnx.NodeProto();
+            if (object.input) {
+                if (!Array.isArray(object.input))
+                    throw TypeError(".onnx.NodeProto.input: array expected");
+                message.input = [];
+                for (var i = 0; i < object.input.length; ++i)
+                    message.input[i] = String(object.input[i]);
+            }
+            if (object.output) {
+                if (!Array.isArray(object.output))
+                    throw TypeError(".onnx.NodeProto.output: array expected");
+                message.output = [];
+                for (var i = 0; i < object.output.length; ++i)
+                    message.output[i] = String(object.output[i]);
+            }
+            if (object.name != null)
+                message.name = String(object.name);
+            if (object.opType != null)
+                message.opType = String(object.opType);
+            if (object.domain != null)
+                message.domain = String(object.domain);
+            if (object.attribute) {
+                if (!Array.isArray(object.attribute))
+                    throw TypeError(".onnx.NodeProto.attribute: array expected");
+                message.attribute = [];
+                for (var i = 0; i < object.attribute.length; ++i) {
+                    if (typeof object.attribute[i] !== "object")
+                        throw TypeError(".onnx.NodeProto.attribute: object expected");
+                    message.attribute[i] = $root.onnx.AttributeProto.fromObject(object.attribute[i]);
+                }
+            }
+            if (object.docString != null)
+                message.docString = String(object.docString);
+            return message;
+        };
+
+        /**
+         * Creates a plain object from a NodeProto message. Also converts values to other types if specified.
+         * @function toObject
+         * @memberof onnx.NodeProto
+         * @static
+         * @param {onnx.NodeProto} message NodeProto
+         * @param {$protobuf.IConversionOptions} [options] Conversion options
+         * @returns {Object.<string,*>} Plain object
+         */
+        NodeProto.toObject = function toObject(message, options) {
+            if (!options)
+                options = {};
+            var object = {};
+            if (options.arrays || options.defaults) {
+                object.input = [];
+                object.output = [];
+                object.attribute = [];
+            }
+            if (options.defaults) {
+                object.name = "";
+                object.opType = "";
+                object.docString = "";
+                object.domain = "";
+            }
+            if (message.input && message.input.length) {
+                object.input = [];
+                for (var j = 0; j < message.input.length; ++j)
+                    object.input[j] = message.input[j];
+            }
+            if (message.output && message.output.length) {
+                object.output = [];
+                for (var j = 0; j < message.output.length; ++j)
+                    object.output[j] = message.output[j];
+            }
+            if (message.name != null && message.hasOwnProperty("name"))
+                object.name = message.name;
+            if (message.opType != null && message.hasOwnProperty("opType"))
+                object.opType = message.opType;
+            if (message.attribute && message.attribute.length) {
+                object.attribute = [];
+                for (var j = 0; j < message.attribute.length; ++j)
+                    object.attribute[j] = $root.onnx.AttributeProto.toObject(message.attribute[j], options);
+            }
+            if (message.docString != null && message.hasOwnProperty("docString"))
+                object.docString = message.docString;
+            if (message.domain != null && message.hasOwnProperty("domain"))
+                object.domain = message.domain;
+            return object;
+        };
+
+        /**
+         * Converts this NodeProto to JSON.
+         * @function toJSON
+         * @memberof onnx.NodeProto
+         * @instance
+         * @returns {Object.<string,*>} JSON object
+         */
+        NodeProto.prototype.toJSON = function toJSON() {
+            return this.constructor.toObject(this, $protobuf.util.toJSONOptions);
+        };
+
+        /**
+         * Gets the default type url for NodeProto
+         * @function getTypeUrl
+         * @memberof onnx.NodeProto
+         * @static
+         * @param {string} [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+         * @returns {string} The default type url
+         */
+        NodeProto.getTypeUrl = function getTypeUrl(typeUrlPrefix) {
+            if (typeUrlPrefix === undefined) {
+                typeUrlPrefix = "type.googleapis.com";
+            }
+            return typeUrlPrefix + "/onnx.NodeProto";
+        };
+
+        return NodeProto;
+    })();
+
+    onnx.TrainingInfoProto = (function() {
+
+        /**
+         * Properties of a TrainingInfoProto.
+         * @memberof onnx
+         * @interface ITrainingInfoProto
+         * @property {onnx.IGraphProto|null} [initialization] TrainingInfoProto initialization
+         * @property {onnx.IGraphProto|null} [algorithm] TrainingInfoProto algorithm
+         * @property {Array.<onnx.IStringStringEntryProto>|null} [initializationBinding] TrainingInfoProto initializationBinding
+         * @property {Array.<onnx.IStringStringEntryProto>|null} [updateBinding] TrainingInfoProto updateBinding
+         */
+
+        /**
+         * Constructs a new TrainingInfoProto.
+         * @memberof onnx
+         * @classdesc Represents a TrainingInfoProto.
+         * @implements ITrainingInfoProto
+         * @constructor
+         * @param {onnx.ITrainingInfoProto=} [properties] Properties to set
+         */
+        function TrainingInfoProto(properties) {
+            this.initializationBinding = [];
+            this.updateBinding = [];
+            if (properties)
+                for (var keys = Object.keys(properties), i = 0; i < keys.length; ++i)
+                    if (properties[keys[i]] != null)
+                        this[keys[i]] = properties[keys[i]];
+        }
+
+        /**
+         * TrainingInfoProto initialization.
+         * @member {onnx.IGraphProto|null|undefined} initialization
+         * @memberof onnx.TrainingInfoProto
+         * @instance
+         */
+        TrainingInfoProto.prototype.initialization = null;
+
+        /**
+         * TrainingInfoProto algorithm.
+         * @member {onnx.IGraphProto|null|undefined} algorithm
+         * @memberof onnx.TrainingInfoProto
+         * @instance
+         */
+        TrainingInfoProto.prototype.algorithm = null;
+
+        /**
+         * TrainingInfoProto initializationBinding.
+         * @member {Array.<onnx.IStringStringEntryProto>} initializationBinding
+         * @memberof onnx.TrainingInfoProto
+         * @instance
+         */
+        TrainingInfoProto.prototype.initializationBinding = $util.emptyArray;
+
+        /**
+         * TrainingInfoProto updateBinding.
+         * @member {Array.<onnx.IStringStringEntryProto>} updateBinding
+         * @memberof onnx.TrainingInfoProto
+         * @instance
+         */
+        TrainingInfoProto.prototype.updateBinding = $util.emptyArray;
+
+        /**
+         * Creates a new TrainingInfoProto instance using the specified properties.
+         * @function create
+         * @memberof onnx.TrainingInfoProto
+         * @static
+         * @param {onnx.ITrainingInfoProto=} [properties] Properties to set
+         * @returns {onnx.TrainingInfoProto} TrainingInfoProto instance
+         */
+        TrainingInfoProto.create = function create(properties) {
+            return new TrainingInfoProto(properties);
+        };
+
+        /**
+         * Encodes the specified TrainingInfoProto message. Does not implicitly {@link onnx.TrainingInfoProto.verify|verify} messages.
+         * @function encode
+         * @memberof onnx.TrainingInfoProto
+         * @static
+         * @param {onnx.ITrainingInfoProto} message TrainingInfoProto message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        TrainingInfoProto.encode = function encode(message, writer) {
+            if (!writer)
+                writer = $Writer.create();
+            if (message.initialization != null && Object.hasOwnProperty.call(message, "initialization"))
+                $root.onnx.GraphProto.encode(message.initialization, writer.uint32(/* id 1, wireType 2 =*/10).fork()).ldelim();
+            if (message.algorithm != null && Object.hasOwnProperty.call(message, "algorithm"))
+                $root.onnx.GraphProto.encode(message.algorithm, writer.uint32(/* id 2, wireType 2 =*/18).fork()).ldelim();
+            if (message.initializationBinding != null && message.initializationBinding.length)
+                for (var i = 0; i < message.initializationBinding.length; ++i)
+                    $root.onnx.StringStringEntryProto.encode(message.initializationBinding[i], writer.uint32(/* id 3, wireType 2 =*/26).fork()).ldelim();
+            if (message.updateBinding != null && message.updateBinding.length)
+                for (var i = 0; i < message.updateBinding.length; ++i)
+                    $root.onnx.StringStringEntryProto.encode(message.updateBinding[i], writer.uint32(/* id 4, wireType 2 =*/34).fork()).ldelim();
+            return writer;
+        };
+
+        /**
+         * Encodes the specified TrainingInfoProto message, length delimited. Does not implicitly {@link onnx.TrainingInfoProto.verify|verify} messages.
+         * @function encodeDelimited
+         * @memberof onnx.TrainingInfoProto
+         * @static
+         * @param {onnx.ITrainingInfoProto} message TrainingInfoProto message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        TrainingInfoProto.encodeDelimited = function encodeDelimited(message, writer) {
+            return this.encode(message, writer).ldelim();
+        };
+
+        /**
+         * Decodes a TrainingInfoProto message from the specified reader or buffer.
+         * @function decode
+         * @memberof onnx.TrainingInfoProto
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @param {number} [length] Message length if known beforehand
+         * @returns {onnx.TrainingInfoProto} TrainingInfoProto
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        TrainingInfoProto.decode = function decode(reader, length) {
+            if (!(reader instanceof $Reader))
+                reader = $Reader.create(reader);
+            var end = length === undefined ? reader.len : reader.pos + length, message = new $root.onnx.TrainingInfoProto();
+            while (reader.pos < end) {
+                var tag = reader.uint32();
+                switch (tag >>> 3) {
+                case 1: {
+                        message.initialization = $root.onnx.GraphProto.decode(reader, reader.uint32());
+                        break;
+                    }
+                case 2: {
+                        message.algorithm = $root.onnx.GraphProto.decode(reader, reader.uint32());
+                        break;
+                    }
+                case 3: {
+                        if (!(message.initializationBinding && message.initializationBinding.length))
+                            message.initializationBinding = [];
+                        message.initializationBinding.push($root.onnx.StringStringEntryProto.decode(reader, reader.uint32()));
+                        break;
+                    }
+                case 4: {
+                        if (!(message.updateBinding && message.updateBinding.length))
+                            message.updateBinding = [];
+                        message.updateBinding.push($root.onnx.StringStringEntryProto.decode(reader, reader.uint32()));
+                        break;
+                    }
+                default:
+                    reader.skipType(tag & 7);
+                    break;
+                }
+            }
+            return message;
+        };
+
+        /**
+         * Decodes a TrainingInfoProto message from the specified reader or buffer, length delimited.
+         * @function decodeDelimited
+         * @memberof onnx.TrainingInfoProto
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @returns {onnx.TrainingInfoProto} TrainingInfoProto
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        TrainingInfoProto.decodeDelimited = function decodeDelimited(reader) {
+            if (!(reader instanceof $Reader))
+                reader = new $Reader(reader);
+            return this.decode(reader, reader.uint32());
+        };
+
+        /**
+         * Verifies a TrainingInfoProto message.
+         * @function verify
+         * @memberof onnx.TrainingInfoProto
+         * @static
+         * @param {Object.<string,*>} message Plain object to verify
+         * @returns {string|null} `null` if valid, otherwise the reason why it is not
+         */
+        TrainingInfoProto.verify = function verify(message) {
+            if (typeof message !== "object" || message === null)
+                return "object expected";
+            if (message.initialization != null && message.hasOwnProperty("initialization")) {
+                var error = $root.onnx.GraphProto.verify(message.initialization);
+                if (error)
+                    return "initialization." + error;
+            }
+            if (message.algorithm != null && message.hasOwnProperty("algorithm")) {
+                var error = $root.onnx.GraphProto.verify(message.algorithm);
+                if (error)
+                    return "algorithm." + error;
+            }
+            if (message.initializationBinding != null && message.hasOwnProperty("initializationBinding")) {
+                if (!Array.isArray(message.initializationBinding))
+                    return "initializationBinding: array expected";
+                for (var i = 0; i < message.initializationBinding.length; ++i) {
+                    var error = $root.onnx.StringStringEntryProto.verify(message.initializationBinding[i]);
+                    if (error)
+                        return "initializationBinding." + error;
+                }
+            }
+            if (message.updateBinding != null && message.hasOwnProperty("updateBinding")) {
+                if (!Array.isArray(message.updateBinding))
+                    return "updateBinding: array expected";
+                for (var i = 0; i < message.updateBinding.length; ++i) {
+                    var error = $root.onnx.StringStringEntryProto.verify(message.updateBinding[i]);
+                    if (error)
+                        return "updateBinding." + error;
+                }
+            }
+            return null;
+        };
+
+        /**
+         * Creates a TrainingInfoProto message from a plain object. Also converts values to their respective internal types.
+         * @function fromObject
+         * @memberof onnx.TrainingInfoProto
+         * @static
+         * @param {Object.<string,*>} object Plain object
+         * @returns {onnx.TrainingInfoProto} TrainingInfoProto
+         */
+        TrainingInfoProto.fromObject = function fromObject(object) {
+            if (object instanceof $root.onnx.TrainingInfoProto)
+                return object;
+            var message = new $root.onnx.TrainingInfoProto();
+            if (object.initialization != null) {
+                if (typeof object.initialization !== "object")
+                    throw TypeError(".onnx.TrainingInfoProto.initialization: object expected");
+                message.initialization = $root.onnx.GraphProto.fromObject(object.initialization);
+            }
+            if (object.algorithm != null) {
+                if (typeof object.algorithm !== "object")
+                    throw TypeError(".onnx.TrainingInfoProto.algorithm: object expected");
+                message.algorithm = $root.onnx.GraphProto.fromObject(object.algorithm);
+            }
+            if (object.initializationBinding) {
+                if (!Array.isArray(object.initializationBinding))
+                    throw TypeError(".onnx.TrainingInfoProto.initializationBinding: array expected");
+                message.initializationBinding = [];
+                for (var i = 0; i < object.initializationBinding.length; ++i) {
+                    if (typeof object.initializationBinding[i] !== "object")
+                        throw TypeError(".onnx.TrainingInfoProto.initializationBinding: object expected");
+                    message.initializationBinding[i] = $root.onnx.StringStringEntryProto.fromObject(object.initializationBinding[i]);
+                }
+            }
+            if (object.updateBinding) {
+                if (!Array.isArray(object.updateBinding))
+                    throw TypeError(".onnx.TrainingInfoProto.updateBinding: array expected");
+                message.updateBinding = [];
+                for (var i = 0; i < object.updateBinding.length; ++i) {
+                    if (typeof object.updateBinding[i] !== "object")
+                        throw TypeError(".onnx.TrainingInfoProto.updateBinding: object expected");
+                    message.updateBinding[i] = $root.onnx.StringStringEntryProto.fromObject(object.updateBinding[i]);
+                }
+            }
+            return message;
+        };
+
+        /**
+         * Creates a plain object from a TrainingInfoProto message. Also converts values to other types if specified.
+         * @function toObject
+         * @memberof onnx.TrainingInfoProto
+         * @static
+         * @param {onnx.TrainingInfoProto} message TrainingInfoProto
+         * @param {$protobuf.IConversionOptions} [options] Conversion options
+         * @returns {Object.<string,*>} Plain object
+         */
+        TrainingInfoProto.toObject = function toObject(message, options) {
+            if (!options)
+                options = {};
+            var object = {};
+            if (options.arrays || options.defaults) {
+                object.initializationBinding = [];
+                object.updateBinding = [];
+            }
+            if (options.defaults) {
+                object.initialization = null;
+                object.algorithm = null;
+            }
+            if (message.initialization != null && message.hasOwnProperty("initialization"))
+                object.initialization = $root.onnx.GraphProto.toObject(message.initialization, options);
+            if (message.algorithm != null && message.hasOwnProperty("algorithm"))
+                object.algorithm = $root.onnx.GraphProto.toObject(message.algorithm, options);
+            if (message.initializationBinding && message.initializationBinding.length) {
+                object.initializationBinding = [];
+                for (var j = 0; j < message.initializationBinding.length; ++j)
+                    object.initializationBinding[j] = $root.onnx.StringStringEntryProto.toObject(message.initializationBinding[j], options);
+            }
+            if (message.updateBinding && message.updateBinding.length) {
+                object.updateBinding = [];
+                for (var j = 0; j < message.updateBinding.length; ++j)
+                    object.updateBinding[j] = $root.onnx.StringStringEntryProto.toObject(message.updateBinding[j], options);
+            }
+            return object;
+        };
+
+        /**
+         * Converts this TrainingInfoProto to JSON.
+         * @function toJSON
+         * @memberof onnx.TrainingInfoProto
+         * @instance
+         * @returns {Object.<string,*>} JSON object
+         */
+        TrainingInfoProto.prototype.toJSON = function toJSON() {
+            return this.constructor.toObject(this, $protobuf.util.toJSONOptions);
+        };
+
+        /**
+         * Gets the default type url for TrainingInfoProto
+         * @function getTypeUrl
+         * @memberof onnx.TrainingInfoProto
+         * @static
+         * @param {string} [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+         * @returns {string} The default type url
+         */
+        TrainingInfoProto.getTypeUrl = function getTypeUrl(typeUrlPrefix) {
+            if (typeUrlPrefix === undefined) {
+                typeUrlPrefix = "type.googleapis.com";
+            }
+            return typeUrlPrefix + "/onnx.TrainingInfoProto";
+        };
+
+        return TrainingInfoProto;
+    })();
+
+    onnx.ModelProto = (function() {
+
+        /**
+         * Properties of a ModelProto.
+         * @memberof onnx
+         * @interface IModelProto
+         * @property {number|Long|null} [irVersion] ModelProto irVersion
+         * @property {Array.<onnx.IOperatorSetIdProto>|null} [opsetImport] ModelProto opsetImport
+         * @property {string|null} [producerName] ModelProto producerName
+         * @property {string|null} [producerVersion] ModelProto producerVersion
+         * @property {string|null} [domain] ModelProto domain
+         * @property {number|Long|null} [modelVersion] ModelProto modelVersion
+         * @property {string|null} [docString] ModelProto docString
+         * @property {onnx.IGraphProto|null} [graph] ModelProto graph
+         * @property {Array.<onnx.IStringStringEntryProto>|null} [metadataProps] ModelProto metadataProps
+         * @property {Array.<onnx.ITrainingInfoProto>|null} [trainingInfo] ModelProto trainingInfo
+         * @property {Array.<onnx.IFunctionProto>|null} [functions] ModelProto functions
+         */
+
+        /**
+         * Constructs a new ModelProto.
+         * @memberof onnx
+         * @classdesc Represents a ModelProto.
+         * @implements IModelProto
+         * @constructor
+         * @param {onnx.IModelProto=} [properties] Properties to set
+         */
+        function ModelProto(properties) {
+            this.opsetImport = [];
+            this.metadataProps = [];
+            this.trainingInfo = [];
+            this.functions = [];
+            if (properties)
+                for (var keys = Object.keys(properties), i = 0; i < keys.length; ++i)
+                    if (properties[keys[i]] != null)
+                        this[keys[i]] = properties[keys[i]];
+        }
+
+        /**
+         * ModelProto irVersion.
+         * @member {number|Long} irVersion
+         * @memberof onnx.ModelProto
+         * @instance
+         */
+        ModelProto.prototype.irVersion = $util.Long ? $util.Long.fromBits(0,0,false) : 0;
+
+        /**
+         * ModelProto opsetImport.
+         * @member {Array.<onnx.IOperatorSetIdProto>} opsetImport
+         * @memberof onnx.ModelProto
+         * @instance
+         */
+        ModelProto.prototype.opsetImport = $util.emptyArray;
+
+        /**
+         * ModelProto producerName.
+         * @member {string} producerName
+         * @memberof onnx.ModelProto
+         * @instance
+         */
+        ModelProto.prototype.producerName = "";
+
+        /**
+         * ModelProto producerVersion.
+         * @member {string} producerVersion
+         * @memberof onnx.ModelProto
+         * @instance
+         */
+        ModelProto.prototype.producerVersion = "";
+
+        /**
+         * ModelProto domain.
+         * @member {string} domain
+         * @memberof onnx.ModelProto
+         * @instance
+         */
+        ModelProto.prototype.domain = "";
+
+        /**
+         * ModelProto modelVersion.
+         * @member {number|Long} modelVersion
+         * @memberof onnx.ModelProto
+         * @instance
+         */
+        ModelProto.prototype.modelVersion = $util.Long ? $util.Long.fromBits(0,0,false) : 0;
+
+        /**
+         * ModelProto docString.
+         * @member {string} docString
+         * @memberof onnx.ModelProto
+         * @instance
+         */
+        ModelProto.prototype.docString = "";
+
+        /**
+         * ModelProto graph.
+         * @member {onnx.IGraphProto|null|undefined} graph
+         * @memberof onnx.ModelProto
+         * @instance
+         */
+        ModelProto.prototype.graph = null;
+
+        /**
+         * ModelProto metadataProps.
+         * @member {Array.<onnx.IStringStringEntryProto>} metadataProps
+         * @memberof onnx.ModelProto
+         * @instance
+         */
+        ModelProto.prototype.metadataProps = $util.emptyArray;
+
+        /**
+         * ModelProto trainingInfo.
+         * @member {Array.<onnx.ITrainingInfoProto>} trainingInfo
+         * @memberof onnx.ModelProto
+         * @instance
+         */
+        ModelProto.prototype.trainingInfo = $util.emptyArray;
+
+        /**
+         * ModelProto functions.
+         * @member {Array.<onnx.IFunctionProto>} functions
+         * @memberof onnx.ModelProto
+         * @instance
+         */
+        ModelProto.prototype.functions = $util.emptyArray;
+
+        /**
+         * Creates a new ModelProto instance using the specified properties.
+         * @function create
+         * @memberof onnx.ModelProto
+         * @static
+         * @param {onnx.IModelProto=} [properties] Properties to set
+         * @returns {onnx.ModelProto} ModelProto instance
+         */
+        ModelProto.create = function create(properties) {
+            return new ModelProto(properties);
+        };
+
+        /**
+         * Encodes the specified ModelProto message. Does not implicitly {@link onnx.ModelProto.verify|verify} messages.
+         * @function encode
+         * @memberof onnx.ModelProto
+         * @static
+         * @param {onnx.IModelProto} message ModelProto message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        ModelProto.encode = function encode(message, writer) {
+            if (!writer)
+                writer = $Writer.create();
+            if (message.irVersion != null && Object.hasOwnProperty.call(message, "irVersion"))
+                writer.uint32(/* id 1, wireType 0 =*/8).int64(message.irVersion);
+            if (message.producerName != null && Object.hasOwnProperty.call(message, "producerName"))
+                writer.uint32(/* id 2, wireType 2 =*/18).string(message.producerName);
+            if (message.producerVersion != null && Object.hasOwnProperty.call(message, "producerVersion"))
+                writer.uint32(/* id 3, wireType 2 =*/26).string(message.producerVersion);
+            if (message.domain != null && Object.hasOwnProperty.call(message, "domain"))
+                writer.uint32(/* id 4, wireType 2 =*/34).string(message.domain);
+            if (message.modelVersion != null && Object.hasOwnProperty.call(message, "modelVersion"))
+                writer.uint32(/* id 5, wireType 0 =*/40).int64(message.modelVersion);
+            if (message.docString != null && Object.hasOwnProperty.call(message, "docString"))
+                writer.uint32(/* id 6, wireType 2 =*/50).string(message.docString);
+            if (message.graph != null && Object.hasOwnProperty.call(message, "graph"))
+                $root.onnx.GraphProto.encode(message.graph, writer.uint32(/* id 7, wireType 2 =*/58).fork()).ldelim();
+            if (message.opsetImport != null && message.opsetImport.length)
+                for (var i = 0; i < message.opsetImport.length; ++i)
+                    $root.onnx.OperatorSetIdProto.encode(message.opsetImport[i], writer.uint32(/* id 8, wireType 2 =*/66).fork()).ldelim();
+            if (message.metadataProps != null && message.metadataProps.length)
+                for (var i = 0; i < message.metadataProps.length; ++i)
+                    $root.onnx.StringStringEntryProto.encode(message.metadataProps[i], writer.uint32(/* id 14, wireType 2 =*/114).fork()).ldelim();
+            if (message.trainingInfo != null && message.trainingInfo.length)
+                for (var i = 0; i < message.trainingInfo.length; ++i)
+                    $root.onnx.TrainingInfoProto.encode(message.trainingInfo[i], writer.uint32(/* id 20, wireType 2 =*/162).fork()).ldelim();
+            if (message.functions != null && message.functions.length)
+                for (var i = 0; i < message.functions.length; ++i)
+                    $root.onnx.FunctionProto.encode(message.functions[i], writer.uint32(/* id 25, wireType 2 =*/202).fork()).ldelim();
+            return writer;
+        };
+
+        /**
+         * Encodes the specified ModelProto message, length delimited. Does not implicitly {@link onnx.ModelProto.verify|verify} messages.
+         * @function encodeDelimited
+         * @memberof onnx.ModelProto
+         * @static
+         * @param {onnx.IModelProto} message ModelProto message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        ModelProto.encodeDelimited = function encodeDelimited(message, writer) {
+            return this.encode(message, writer).ldelim();
+        };
+
+        /**
+         * Decodes a ModelProto message from the specified reader or buffer.
+         * @function decode
+         * @memberof onnx.ModelProto
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @param {number} [length] Message length if known beforehand
+         * @returns {onnx.ModelProto} ModelProto
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        ModelProto.decode = function decode(reader, length) {
+            if (!(reader instanceof $Reader))
+                reader = $Reader.create(reader);
+            var end = length === undefined ? reader.len : reader.pos + length, message = new $root.onnx.ModelProto();
+            while (reader.pos < end) {
+                var tag = reader.uint32();
+                switch (tag >>> 3) {
+                case 1: {
+                        message.irVersion = reader.int64();
+                        break;
+                    }
+                case 8: {
+                        if (!(message.opsetImport && message.opsetImport.length))
+                            message.opsetImport = [];
+                        message.opsetImport.push($root.onnx.OperatorSetIdProto.decode(reader, reader.uint32()));
+                        break;
+                    }
+                case 2: {
+                        message.producerName = reader.string();
+                        break;
+                    }
+                case 3: {
+                        message.producerVersion = reader.string();
+                        break;
+                    }
+                case 4: {
+                        message.domain = reader.string();
+                        break;
+                    }
+                case 5: {
+                        message.modelVersion = reader.int64();
+                        break;
+                    }
+                case 6: {
+                        message.docString = reader.string();
+                        break;
+                    }
+                case 7: {
+                        message.graph = $root.onnx.GraphProto.decode(reader, reader.uint32());
+                        break;
+                    }
+                case 14: {
+                        if (!(message.metadataProps && message.metadataProps.length))
+                            message.metadataProps = [];
+                        message.metadataProps.push($root.onnx.StringStringEntryProto.decode(reader, reader.uint32()));
+                        break;
+                    }
+                case 20: {
+                        if (!(message.trainingInfo && message.trainingInfo.length))
+                            message.trainingInfo = [];
+                        message.trainingInfo.push($root.onnx.TrainingInfoProto.decode(reader, reader.uint32()));
+                        break;
+                    }
+                case 25: {
+                        if (!(message.functions && message.functions.length))
+                            message.functions = [];
+                        message.functions.push($root.onnx.FunctionProto.decode(reader, reader.uint32()));
+                        break;
+                    }
+                default:
+                    reader.skipType(tag & 7);
+                    break;
+                }
+            }
+            return message;
+        };
+
+        /**
+         * Decodes a ModelProto message from the specified reader or buffer, length delimited.
+         * @function decodeDelimited
+         * @memberof onnx.ModelProto
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @returns {onnx.ModelProto} ModelProto
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        ModelProto.decodeDelimited = function decodeDelimited(reader) {
+            if (!(reader instanceof $Reader))
+                reader = new $Reader(reader);
+            return this.decode(reader, reader.uint32());
+        };
+
+        /**
+         * Verifies a ModelProto message.
+         * @function verify
+         * @memberof onnx.ModelProto
+         * @static
+         * @param {Object.<string,*>} message Plain object to verify
+         * @returns {string|null} `null` if valid, otherwise the reason why it is not
+         */
+        ModelProto.verify = function verify(message) {
+            if (typeof message !== "object" || message === null)
+                return "object expected";
+            if (message.irVersion != null && message.hasOwnProperty("irVersion"))
+                if (!$util.isInteger(message.irVersion) && !(message.irVersion && $util.isInteger(message.irVersion.low) && $util.isInteger(message.irVersion.high)))
+                    return "irVersion: integer|Long expected";
+            if (message.opsetImport != null && message.hasOwnProperty("opsetImport")) {
+                if (!Array.isArray(message.opsetImport))
+                    return "opsetImport: array expected";
+                for (var i = 0; i < message.opsetImport.length; ++i) {
+                    var error = $root.onnx.OperatorSetIdProto.verify(message.opsetImport[i]);
+                    if (error)
+                        return "opsetImport." + error;
+                }
+            }
+            if (message.producerName != null && message.hasOwnProperty("producerName"))
+                if (!$util.isString(message.producerName))
+                    return "producerName: string expected";
+            if (message.producerVersion != null && message.hasOwnProperty("producerVersion"))
+                if (!$util.isString(message.producerVersion))
+                    return "producerVersion: string expected";
+            if (message.domain != null && message.hasOwnProperty("domain"))
+                if (!$util.isString(message.domain))
+                    return "domain: string expected";
+            if (message.modelVersion != null && message.hasOwnProperty("modelVersion"))
+                if (!$util.isInteger(message.modelVersion) && !(message.modelVersion && $util.isInteger(message.modelVersion.low) && $util.isInteger(message.modelVersion.high)))
+                    return "modelVersion: integer|Long expected";
+            if (message.docString != null && message.hasOwnProperty("docString"))
+                if (!$util.isString(message.docString))
+                    return "docString: string expected";
+            if (message.graph != null && message.hasOwnProperty("graph")) {
+                var error = $root.onnx.GraphProto.verify(message.graph);
+                if (error)
+                    return "graph." + error;
+            }
+            if (message.metadataProps != null && message.hasOwnProperty("metadataProps")) {
+                if (!Array.isArray(message.metadataProps))
+                    return "metadataProps: array expected";
+                for (var i = 0; i < message.metadataProps.length; ++i) {
+                    var error = $root.onnx.StringStringEntryProto.verify(message.metadataProps[i]);
+                    if (error)
+                        return "metadataProps." + error;
+                }
+            }
+            if (message.trainingInfo != null && message.hasOwnProperty("trainingInfo")) {
+                if (!Array.isArray(message.trainingInfo))
+                    return "trainingInfo: array expected";
+                for (var i = 0; i < message.trainingInfo.length; ++i) {
+                    var error = $root.onnx.TrainingInfoProto.verify(message.trainingInfo[i]);
+                    if (error)
+                        return "trainingInfo." + error;
+                }
+            }
+            if (message.functions != null && message.hasOwnProperty("functions")) {
+                if (!Array.isArray(message.functions))
+                    return "functions: array expected";
+                for (var i = 0; i < message.functions.length; ++i) {
+                    var error = $root.onnx.FunctionProto.verify(message.functions[i]);
+                    if (error)
+                        return "functions." + error;
+                }
+            }
+            return null;
+        };
+
+        /**
+         * Creates a ModelProto message from a plain object. Also converts values to their respective internal types.
+         * @function fromObject
+         * @memberof onnx.ModelProto
+         * @static
+         * @param {Object.<string,*>} object Plain object
+         * @returns {onnx.ModelProto} ModelProto
+         */
+        ModelProto.fromObject = function fromObject(object) {
+            if (object instanceof $root.onnx.ModelProto)
+                return object;
+            var message = new $root.onnx.ModelProto();
+            if (object.irVersion != null)
+                if ($util.Long)
+                    (message.irVersion = $util.Long.fromValue(object.irVersion)).unsigned = false;
+                else if (typeof object.irVersion === "string")
+                    message.irVersion = parseInt(object.irVersion, 10);
+                else if (typeof object.irVersion === "number")
+                    message.irVersion = object.irVersion;
+                else if (typeof object.irVersion === "object")
+                    message.irVersion = new $util.LongBits(object.irVersion.low >>> 0, object.irVersion.high >>> 0).toNumber();
+            if (object.opsetImport) {
+                if (!Array.isArray(object.opsetImport))
+                    throw TypeError(".onnx.ModelProto.opsetImport: array expected");
+                message.opsetImport = [];
+                for (var i = 0; i < object.opsetImport.length; ++i) {
+                    if (typeof object.opsetImport[i] !== "object")
+                        throw TypeError(".onnx.ModelProto.opsetImport: object expected");
+                    message.opsetImport[i] = $root.onnx.OperatorSetIdProto.fromObject(object.opsetImport[i]);
+                }
+            }
+            if (object.producerName != null)
+                message.producerName = String(object.producerName);
+            if (object.producerVersion != null)
+                message.producerVersion = String(object.producerVersion);
+            if (object.domain != null)
+                message.domain = String(object.domain);
+            if (object.modelVersion != null)
+                if ($util.Long)
+                    (message.modelVersion = $util.Long.fromValue(object.modelVersion)).unsigned = false;
+                else if (typeof object.modelVersion === "string")
+                    message.modelVersion = parseInt(object.modelVersion, 10);
+                else if (typeof object.modelVersion === "number")
+                    message.modelVersion = object.modelVersion;
+                else if (typeof object.modelVersion === "object")
+                    message.modelVersion = new $util.LongBits(object.modelVersion.low >>> 0, object.modelVersion.high >>> 0).toNumber();
+            if (object.docString != null)
+                message.docString = String(object.docString);
+            if (object.graph != null) {
+                if (typeof object.graph !== "object")
+                    throw TypeError(".onnx.ModelProto.graph: object expected");
+                message.graph = $root.onnx.GraphProto.fromObject(object.graph);
+            }
+            if (object.metadataProps) {
+                if (!Array.isArray(object.metadataProps))
+                    throw TypeError(".onnx.ModelProto.metadataProps: array expected");
+                message.metadataProps = [];
+                for (var i = 0; i < object.metadataProps.length; ++i) {
+                    if (typeof object.metadataProps[i] !== "object")
+                        throw TypeError(".onnx.ModelProto.metadataProps: object expected");
+                    message.metadataProps[i] = $root.onnx.StringStringEntryProto.fromObject(object.metadataProps[i]);
+                }
+            }
+            if (object.trainingInfo) {
+                if (!Array.isArray(object.trainingInfo))
+                    throw TypeError(".onnx.ModelProto.trainingInfo: array expected");
+                message.trainingInfo = [];
+                for (var i = 0; i < object.trainingInfo.length; ++i) {
+                    if (typeof object.trainingInfo[i] !== "object")
+                        throw TypeError(".onnx.ModelProto.trainingInfo: object expected");
+                    message.trainingInfo[i] = $root.onnx.TrainingInfoProto.fromObject(object.trainingInfo[i]);
+                }
+            }
+            if (object.functions) {
+                if (!Array.isArray(object.functions))
+                    throw TypeError(".onnx.ModelProto.functions: array expected");
+                message.functions = [];
+                for (var i = 0; i < object.functions.length; ++i) {
+                    if (typeof object.functions[i] !== "object")
+                        throw TypeError(".onnx.ModelProto.functions: object expected");
+                    message.functions[i] = $root.onnx.FunctionProto.fromObject(object.functions[i]);
+                }
+            }
+            return message;
+        };
+
+        /**
+         * Creates a plain object from a ModelProto message. Also converts values to other types if specified.
+         * @function toObject
+         * @memberof onnx.ModelProto
+         * @static
+         * @param {onnx.ModelProto} message ModelProto
+         * @param {$protobuf.IConversionOptions} [options] Conversion options
+         * @returns {Object.<string,*>} Plain object
+         */
+        ModelProto.toObject = function toObject(message, options) {
+            if (!options)
+                options = {};
+            var object = {};
+            if (options.arrays || options.defaults) {
+                object.opsetImport = [];
+                object.metadataProps = [];
+                object.trainingInfo = [];
+                object.functions = [];
+            }
+            if (options.defaults) {
+                if ($util.Long) {
+                    var long = new $util.Long(0, 0, false);
+                    object.irVersion = options.longs === String ? long.toString() : options.longs === Number ? long.toNumber() : long;
+                } else
+                    object.irVersion = options.longs === String ? "0" : 0;
+                object.producerName = "";
+                object.producerVersion = "";
+                object.domain = "";
+                if ($util.Long) {
+                    var long = new $util.Long(0, 0, false);
+                    object.modelVersion = options.longs === String ? long.toString() : options.longs === Number ? long.toNumber() : long;
+                } else
+                    object.modelVersion = options.longs === String ? "0" : 0;
+                object.docString = "";
+                object.graph = null;
+            }
+            if (message.irVersion != null && message.hasOwnProperty("irVersion"))
+                if (typeof message.irVersion === "number")
+                    object.irVersion = options.longs === String ? String(message.irVersion) : message.irVersion;
+                else
+                    object.irVersion = options.longs === String ? $util.Long.prototype.toString.call(message.irVersion) : options.longs === Number ? new $util.LongBits(message.irVersion.low >>> 0, message.irVersion.high >>> 0).toNumber() : message.irVersion;
+            if (message.producerName != null && message.hasOwnProperty("producerName"))
+                object.producerName = message.producerName;
+            if (message.producerVersion != null && message.hasOwnProperty("producerVersion"))
+                object.producerVersion = message.producerVersion;
+            if (message.domain != null && message.hasOwnProperty("domain"))
+                object.domain = message.domain;
+            if (message.modelVersion != null && message.hasOwnProperty("modelVersion"))
+                if (typeof message.modelVersion === "number")
+                    object.modelVersion = options.longs === String ? String(message.modelVersion) : message.modelVersion;
+                else
+                    object.modelVersion = options.longs === String ? $util.Long.prototype.toString.call(message.modelVersion) : options.longs === Number ? new $util.LongBits(message.modelVersion.low >>> 0, message.modelVersion.high >>> 0).toNumber() : message.modelVersion;
+            if (message.docString != null && message.hasOwnProperty("docString"))
+                object.docString = message.docString;
+            if (message.graph != null && message.hasOwnProperty("graph"))
+                object.graph = $root.onnx.GraphProto.toObject(message.graph, options);
+            if (message.opsetImport && message.opsetImport.length) {
+                object.opsetImport = [];
+                for (var j = 0; j < message.opsetImport.length; ++j)
+                    object.opsetImport[j] = $root.onnx.OperatorSetIdProto.toObject(message.opsetImport[j], options);
+            }
+            if (message.metadataProps && message.metadataProps.length) {
+                object.metadataProps = [];
+                for (var j = 0; j < message.metadataProps.length; ++j)
+                    object.metadataProps[j] = $root.onnx.StringStringEntryProto.toObject(message.metadataProps[j], options);
+            }
+            if (message.trainingInfo && message.trainingInfo.length) {
+                object.trainingInfo = [];
+                for (var j = 0; j < message.trainingInfo.length; ++j)
+                    object.trainingInfo[j] = $root.onnx.TrainingInfoProto.toObject(message.trainingInfo[j], options);
+            }
+            if (message.functions && message.functions.length) {
+                object.functions = [];
+                for (var j = 0; j < message.functions.length; ++j)
+                    object.functions[j] = $root.onnx.FunctionProto.toObject(message.functions[j], options);
+            }
+            return object;
+        };
+
+        /**
+         * Converts this ModelProto to JSON.
+         * @function toJSON
+         * @memberof onnx.ModelProto
+         * @instance
+         * @returns {Object.<string,*>} JSON object
+         */
+        ModelProto.prototype.toJSON = function toJSON() {
+            return this.constructor.toObject(this, $protobuf.util.toJSONOptions);
+        };
+
+        /**
+         * Gets the default type url for ModelProto
+         * @function getTypeUrl
+         * @memberof onnx.ModelProto
+         * @static
+         * @param {string} [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+         * @returns {string} The default type url
+         */
+        ModelProto.getTypeUrl = function getTypeUrl(typeUrlPrefix) {
+            if (typeUrlPrefix === undefined) {
+                typeUrlPrefix = "type.googleapis.com";
+            }
+            return typeUrlPrefix + "/onnx.ModelProto";
+        };
+
+        return ModelProto;
+    })();
+
+    onnx.StringStringEntryProto = (function() {
+
+        /**
+         * Properties of a StringStringEntryProto.
+         * @memberof onnx
+         * @interface IStringStringEntryProto
+         * @property {string|null} [key] StringStringEntryProto key
+         * @property {string|null} [value] StringStringEntryProto value
+         */
+
+        /**
+         * Constructs a new StringStringEntryProto.
+         * @memberof onnx
+         * @classdesc Represents a StringStringEntryProto.
+         * @implements IStringStringEntryProto
+         * @constructor
+         * @param {onnx.IStringStringEntryProto=} [properties] Properties to set
+         */
+        function StringStringEntryProto(properties) {
+            if (properties)
+                for (var keys = Object.keys(properties), i = 0; i < keys.length; ++i)
+                    if (properties[keys[i]] != null)
+                        this[keys[i]] = properties[keys[i]];
+        }
+
+        /**
+         * StringStringEntryProto key.
+         * @member {string} key
+         * @memberof onnx.StringStringEntryProto
+         * @instance
+         */
+        StringStringEntryProto.prototype.key = "";
+
+        /**
+         * StringStringEntryProto value.
+         * @member {string} value
+         * @memberof onnx.StringStringEntryProto
+         * @instance
+         */
+        StringStringEntryProto.prototype.value = "";
+
+        /**
+         * Creates a new StringStringEntryProto instance using the specified properties.
+         * @function create
+         * @memberof onnx.StringStringEntryProto
+         * @static
+         * @param {onnx.IStringStringEntryProto=} [properties] Properties to set
+         * @returns {onnx.StringStringEntryProto} StringStringEntryProto instance
+         */
+        StringStringEntryProto.create = function create(properties) {
+            return new StringStringEntryProto(properties);
+        };
+
+        /**
+         * Encodes the specified StringStringEntryProto message. Does not implicitly {@link onnx.StringStringEntryProto.verify|verify} messages.
+         * @function encode
+         * @memberof onnx.StringStringEntryProto
+         * @static
+         * @param {onnx.IStringStringEntryProto} message StringStringEntryProto message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        StringStringEntryProto.encode = function encode(message, writer) {
+            if (!writer)
+                writer = $Writer.create();
+            if (message.key != null && Object.hasOwnProperty.call(message, "key"))
+                writer.uint32(/* id 1, wireType 2 =*/10).string(message.key);
+            if (message.value != null && Object.hasOwnProperty.call(message, "value"))
+                writer.uint32(/* id 2, wireType 2 =*/18).string(message.value);
+            return writer;
+        };
+
+        /**
+         * Encodes the specified StringStringEntryProto message, length delimited. Does not implicitly {@link onnx.StringStringEntryProto.verify|verify} messages.
+         * @function encodeDelimited
+         * @memberof onnx.StringStringEntryProto
+         * @static
+         * @param {onnx.IStringStringEntryProto} message StringStringEntryProto message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        StringStringEntryProto.encodeDelimited = function encodeDelimited(message, writer) {
+            return this.encode(message, writer).ldelim();
+        };
+
+        /**
+         * Decodes a StringStringEntryProto message from the specified reader or buffer.
+         * @function decode
+         * @memberof onnx.StringStringEntryProto
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @param {number} [length] Message length if known beforehand
+         * @returns {onnx.StringStringEntryProto} StringStringEntryProto
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        StringStringEntryProto.decode = function decode(reader, length) {
+            if (!(reader instanceof $Reader))
+                reader = $Reader.create(reader);
+            var end = length === undefined ? reader.len : reader.pos + length, message = new $root.onnx.StringStringEntryProto();
+            while (reader.pos < end) {
+                var tag = reader.uint32();
+                switch (tag >>> 3) {
+                case 1: {
+                        message.key = reader.string();
+                        break;
+                    }
+                case 2: {
+                        message.value = reader.string();
+                        break;
+                    }
+                default:
+                    reader.skipType(tag & 7);
+                    break;
+                }
+            }
+            return message;
+        };
+
+        /**
+         * Decodes a StringStringEntryProto message from the specified reader or buffer, length delimited.
+         * @function decodeDelimited
+         * @memberof onnx.StringStringEntryProto
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @returns {onnx.StringStringEntryProto} StringStringEntryProto
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        StringStringEntryProto.decodeDelimited = function decodeDelimited(reader) {
+            if (!(reader instanceof $Reader))
+                reader = new $Reader(reader);
+            return this.decode(reader, reader.uint32());
+        };
+
+        /**
+         * Verifies a StringStringEntryProto message.
+         * @function verify
+         * @memberof onnx.StringStringEntryProto
+         * @static
+         * @param {Object.<string,*>} message Plain object to verify
+         * @returns {string|null} `null` if valid, otherwise the reason why it is not
+         */
+        StringStringEntryProto.verify = function verify(message) {
+            if (typeof message !== "object" || message === null)
+                return "object expected";
+            if (message.key != null && message.hasOwnProperty("key"))
+                if (!$util.isString(message.key))
+                    return "key: string expected";
+            if (message.value != null && message.hasOwnProperty("value"))
+                if (!$util.isString(message.value))
+                    return "value: string expected";
+            return null;
+        };
+
+        /**
+         * Creates a StringStringEntryProto message from a plain object. Also converts values to their respective internal types.
+         * @function fromObject
+         * @memberof onnx.StringStringEntryProto
+         * @static
+         * @param {Object.<string,*>} object Plain object
+         * @returns {onnx.StringStringEntryProto} StringStringEntryProto
+         */
+        StringStringEntryProto.fromObject = function fromObject(object) {
+            if (object instanceof $root.onnx.StringStringEntryProto)
+                return object;
+            var message = new $root.onnx.StringStringEntryProto();
+            if (object.key != null)
+                message.key = String(object.key);
+            if (object.value != null)
+                message.value = String(object.value);
+            return message;
+        };
+
+        /**
+         * Creates a plain object from a StringStringEntryProto message. Also converts values to other types if specified.
+         * @function toObject
+         * @memberof onnx.StringStringEntryProto
+         * @static
+         * @param {onnx.StringStringEntryProto} message StringStringEntryProto
+         * @param {$protobuf.IConversionOptions} [options] Conversion options
+         * @returns {Object.<string,*>} Plain object
+         */
+        StringStringEntryProto.toObject = function toObject(message, options) {
+            if (!options)
+                options = {};
+            var object = {};
+            if (options.defaults) {
+                object.key = "";
+                object.value = "";
+            }
+            if (message.key != null && message.hasOwnProperty("key"))
+                object.key = message.key;
+            if (message.value != null && message.hasOwnProperty("value"))
+                object.value = message.value;
+            return object;
+        };
+
+        /**
+         * Converts this StringStringEntryProto to JSON.
+         * @function toJSON
+         * @memberof onnx.StringStringEntryProto
+         * @instance
+         * @returns {Object.<string,*>} JSON object
+         */
+        StringStringEntryProto.prototype.toJSON = function toJSON() {
+            return this.constructor.toObject(this, $protobuf.util.toJSONOptions);
+        };
+
+        /**
+         * Gets the default type url for StringStringEntryProto
+         * @function getTypeUrl
+         * @memberof onnx.StringStringEntryProto
+         * @static
+         * @param {string} [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+         * @returns {string} The default type url
+         */
+        StringStringEntryProto.getTypeUrl = function getTypeUrl(typeUrlPrefix) {
+            if (typeUrlPrefix === undefined) {
+                typeUrlPrefix = "type.googleapis.com";
+            }
+            return typeUrlPrefix + "/onnx.StringStringEntryProto";
+        };
+
+        return StringStringEntryProto;
+    })();
+
+    onnx.TensorAnnotation = (function() {
+
+        /**
+         * Properties of a TensorAnnotation.
+         * @memberof onnx
+         * @interface ITensorAnnotation
+         * @property {string|null} [tensorName] TensorAnnotation tensorName
+         * @property {Array.<onnx.IStringStringEntryProto>|null} [quantParameterTensorNames] TensorAnnotation quantParameterTensorNames
+         */
+
+        /**
+         * Constructs a new TensorAnnotation.
+         * @memberof onnx
+         * @classdesc Represents a TensorAnnotation.
+         * @implements ITensorAnnotation
+         * @constructor
+         * @param {onnx.ITensorAnnotation=} [properties] Properties to set
+         */
+        function TensorAnnotation(properties) {
+            this.quantParameterTensorNames = [];
+            if (properties)
+                for (var keys = Object.keys(properties), i = 0; i < keys.length; ++i)
+                    if (properties[keys[i]] != null)
+                        this[keys[i]] = properties[keys[i]];
+        }
+
+        /**
+         * TensorAnnotation tensorName.
+         * @member {string} tensorName
+         * @memberof onnx.TensorAnnotation
+         * @instance
+         */
+        TensorAnnotation.prototype.tensorName = "";
+
+        /**
+         * TensorAnnotation quantParameterTensorNames.
+         * @member {Array.<onnx.IStringStringEntryProto>} quantParameterTensorNames
+         * @memberof onnx.TensorAnnotation
+         * @instance
+         */
+        TensorAnnotation.prototype.quantParameterTensorNames = $util.emptyArray;
+
+        /**
+         * Creates a new TensorAnnotation instance using the specified properties.
+         * @function create
+         * @memberof onnx.TensorAnnotation
+         * @static
+         * @param {onnx.ITensorAnnotation=} [properties] Properties to set
+         * @returns {onnx.TensorAnnotation} TensorAnnotation instance
+         */
+        TensorAnnotation.create = function create(properties) {
+            return new TensorAnnotation(properties);
+        };
+
+        /**
+         * Encodes the specified TensorAnnotation message. Does not implicitly {@link onnx.TensorAnnotation.verify|verify} messages.
+         * @function encode
+         * @memberof onnx.TensorAnnotation
+         * @static
+         * @param {onnx.ITensorAnnotation} message TensorAnnotation message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        TensorAnnotation.encode = function encode(message, writer) {
+            if (!writer)
+                writer = $Writer.create();
+            if (message.tensorName != null && Object.hasOwnProperty.call(message, "tensorName"))
+                writer.uint32(/* id 1, wireType 2 =*/10).string(message.tensorName);
+            if (message.quantParameterTensorNames != null && message.quantParameterTensorNames.length)
+                for (var i = 0; i < message.quantParameterTensorNames.length; ++i)
+                    $root.onnx.StringStringEntryProto.encode(message.quantParameterTensorNames[i], writer.uint32(/* id 2, wireType 2 =*/18).fork()).ldelim();
+            return writer;
+        };
+
+        /**
+         * Encodes the specified TensorAnnotation message, length delimited. Does not implicitly {@link onnx.TensorAnnotation.verify|verify} messages.
+         * @function encodeDelimited
+         * @memberof onnx.TensorAnnotation
+         * @static
+         * @param {onnx.ITensorAnnotation} message TensorAnnotation message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        TensorAnnotation.encodeDelimited = function encodeDelimited(message, writer) {
+            return this.encode(message, writer).ldelim();
+        };
+
+        /**
+         * Decodes a TensorAnnotation message from the specified reader or buffer.
+         * @function decode
+         * @memberof onnx.TensorAnnotation
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @param {number} [length] Message length if known beforehand
+         * @returns {onnx.TensorAnnotation} TensorAnnotation
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        TensorAnnotation.decode = function decode(reader, length) {
+            if (!(reader instanceof $Reader))
+                reader = $Reader.create(reader);
+            var end = length === undefined ? reader.len : reader.pos + length, message = new $root.onnx.TensorAnnotation();
+            while (reader.pos < end) {
+                var tag = reader.uint32();
+                switch (tag >>> 3) {
+                case 1: {
+                        message.tensorName = reader.string();
+                        break;
+                    }
+                case 2: {
+                        if (!(message.quantParameterTensorNames && message.quantParameterTensorNames.length))
+                            message.quantParameterTensorNames = [];
+                        message.quantParameterTensorNames.push($root.onnx.StringStringEntryProto.decode(reader, reader.uint32()));
+                        break;
+                    }
+                default:
+                    reader.skipType(tag & 7);
+                    break;
+                }
+            }
+            return message;
+        };
+
+        /**
+         * Decodes a TensorAnnotation message from the specified reader or buffer, length delimited.
+         * @function decodeDelimited
+         * @memberof onnx.TensorAnnotation
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @returns {onnx.TensorAnnotation} TensorAnnotation
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        TensorAnnotation.decodeDelimited = function decodeDelimited(reader) {
+            if (!(reader instanceof $Reader))
+                reader = new $Reader(reader);
+            return this.decode(reader, reader.uint32());
+        };
+
+        /**
+         * Verifies a TensorAnnotation message.
+         * @function verify
+         * @memberof onnx.TensorAnnotation
+         * @static
+         * @param {Object.<string,*>} message Plain object to verify
+         * @returns {string|null} `null` if valid, otherwise the reason why it is not
+         */
+        TensorAnnotation.verify = function verify(message) {
+            if (typeof message !== "object" || message === null)
+                return "object expected";
+            if (message.tensorName != null && message.hasOwnProperty("tensorName"))
+                if (!$util.isString(message.tensorName))
+                    return "tensorName: string expected";
+            if (message.quantParameterTensorNames != null && message.hasOwnProperty("quantParameterTensorNames")) {
+                if (!Array.isArray(message.quantParameterTensorNames))
+                    return "quantParameterTensorNames: array expected";
+                for (var i = 0; i < message.quantParameterTensorNames.length; ++i) {
+                    var error = $root.onnx.StringStringEntryProto.verify(message.quantParameterTensorNames[i]);
+                    if (error)
+                        return "quantParameterTensorNames." + error;
+                }
+            }
+            return null;
+        };
+
+        /**
+         * Creates a TensorAnnotation message from a plain object. Also converts values to their respective internal types.
+         * @function fromObject
+         * @memberof onnx.TensorAnnotation
+         * @static
+         * @param {Object.<string,*>} object Plain object
+         * @returns {onnx.TensorAnnotation} TensorAnnotation
+         */
+        TensorAnnotation.fromObject = function fromObject(object) {
+            if (object instanceof $root.onnx.TensorAnnotation)
+                return object;
+            var message = new $root.onnx.TensorAnnotation();
+            if (object.tensorName != null)
+                message.tensorName = String(object.tensorName);
+            if (object.quantParameterTensorNames) {
+                if (!Array.isArray(object.quantParameterTensorNames))
+                    throw TypeError(".onnx.TensorAnnotation.quantParameterTensorNames: array expected");
+                message.quantParameterTensorNames = [];
+                for (var i = 0; i < object.quantParameterTensorNames.length; ++i) {
+                    if (typeof object.quantParameterTensorNames[i] !== "object")
+                        throw TypeError(".onnx.TensorAnnotation.quantParameterTensorNames: object expected");
+                    message.quantParameterTensorNames[i] = $root.onnx.StringStringEntryProto.fromObject(object.quantParameterTensorNames[i]);
+                }
+            }
+            return message;
+        };
+
+        /**
+         * Creates a plain object from a TensorAnnotation message. Also converts values to other types if specified.
+         * @function toObject
+         * @memberof onnx.TensorAnnotation
+         * @static
+         * @param {onnx.TensorAnnotation} message TensorAnnotation
+         * @param {$protobuf.IConversionOptions} [options] Conversion options
+         * @returns {Object.<string,*>} Plain object
+         */
+        TensorAnnotation.toObject = function toObject(message, options) {
+            if (!options)
+                options = {};
+            var object = {};
+            if (options.arrays || options.defaults)
+                object.quantParameterTensorNames = [];
+            if (options.defaults)
+                object.tensorName = "";
+            if (message.tensorName != null && message.hasOwnProperty("tensorName"))
+                object.tensorName = message.tensorName;
+            if (message.quantParameterTensorNames && message.quantParameterTensorNames.length) {
+                object.quantParameterTensorNames = [];
+                for (var j = 0; j < message.quantParameterTensorNames.length; ++j)
+                    object.quantParameterTensorNames[j] = $root.onnx.StringStringEntryProto.toObject(message.quantParameterTensorNames[j], options);
+            }
+            return object;
+        };
+
+        /**
+         * Converts this TensorAnnotation to JSON.
+         * @function toJSON
+         * @memberof onnx.TensorAnnotation
+         * @instance
+         * @returns {Object.<string,*>} JSON object
+         */
+        TensorAnnotation.prototype.toJSON = function toJSON() {
+            return this.constructor.toObject(this, $protobuf.util.toJSONOptions);
+        };
+
+        /**
+         * Gets the default type url for TensorAnnotation
+         * @function getTypeUrl
+         * @memberof onnx.TensorAnnotation
+         * @static
+         * @param {string} [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+         * @returns {string} The default type url
+         */
+        TensorAnnotation.getTypeUrl = function getTypeUrl(typeUrlPrefix) {
+            if (typeUrlPrefix === undefined) {
+                typeUrlPrefix = "type.googleapis.com";
+            }
+            return typeUrlPrefix + "/onnx.TensorAnnotation";
+        };
+
+        return TensorAnnotation;
+    })();
+
+    onnx.GraphProto = (function() {
+
+        /**
+         * Properties of a GraphProto.
+         * @memberof onnx
+         * @interface IGraphProto
+         * @property {Array.<onnx.INodeProto>|null} [node] GraphProto node
+         * @property {string|null} [name] GraphProto name
+         * @property {Array.<onnx.ITensorProto>|null} [initializer] GraphProto initializer
+         * @property {Array.<onnx.ISparseTensorProto>|null} [sparseInitializer] GraphProto sparseInitializer
+         * @property {string|null} [docString] GraphProto docString
+         * @property {Array.<onnx.IValueInfoProto>|null} [input] GraphProto input
+         * @property {Array.<onnx.IValueInfoProto>|null} [output] GraphProto output
+         * @property {Array.<onnx.IValueInfoProto>|null} [valueInfo] GraphProto valueInfo
+         * @property {Array.<onnx.ITensorAnnotation>|null} [quantizationAnnotation] GraphProto quantizationAnnotation
+         */
+
+        /**
+         * Constructs a new GraphProto.
+         * @memberof onnx
+         * @classdesc Represents a GraphProto.
+         * @implements IGraphProto
+         * @constructor
+         * @param {onnx.IGraphProto=} [properties] Properties to set
+         */
+        function GraphProto(properties) {
+            this.node = [];
+            this.initializer = [];
+            this.sparseInitializer = [];
+            this.input = [];
+            this.output = [];
+            this.valueInfo = [];
+            this.quantizationAnnotation = [];
+            if (properties)
+                for (var keys = Object.keys(properties), i = 0; i < keys.length; ++i)
+                    if (properties[keys[i]] != null)
+                        this[keys[i]] = properties[keys[i]];
+        }
+
+        /**
+         * GraphProto node.
+         * @member {Array.<onnx.INodeProto>} node
+         * @memberof onnx.GraphProto
+         * @instance
+         */
+        GraphProto.prototype.node = $util.emptyArray;
+
+        /**
+         * GraphProto name.
+         * @member {string} name
+         * @memberof onnx.GraphProto
+         * @instance
+         */
+        GraphProto.prototype.name = "";
+
+        /**
+         * GraphProto initializer.
+         * @member {Array.<onnx.ITensorProto>} initializer
+         * @memberof onnx.GraphProto
+         * @instance
+         */
+        GraphProto.prototype.initializer = $util.emptyArray;
+
+        /**
+         * GraphProto sparseInitializer.
+         * @member {Array.<onnx.ISparseTensorProto>} sparseInitializer
+         * @memberof onnx.GraphProto
+         * @instance
+         */
+        GraphProto.prototype.sparseInitializer = $util.emptyArray;
+
+        /**
+         * GraphProto docString.
+         * @member {string} docString
+         * @memberof onnx.GraphProto
+         * @instance
+         */
+        GraphProto.prototype.docString = "";
+
+        /**
+         * GraphProto input.
+         * @member {Array.<onnx.IValueInfoProto>} input
+         * @memberof onnx.GraphProto
+         * @instance
+         */
+        GraphProto.prototype.input = $util.emptyArray;
+
+        /**
+         * GraphProto output.
+         * @member {Array.<onnx.IValueInfoProto>} output
+         * @memberof onnx.GraphProto
+         * @instance
+         */
+        GraphProto.prototype.output = $util.emptyArray;
+
+        /**
+         * GraphProto valueInfo.
+         * @member {Array.<onnx.IValueInfoProto>} valueInfo
+         * @memberof onnx.GraphProto
+         * @instance
+         */
+        GraphProto.prototype.valueInfo = $util.emptyArray;
+
+        /**
+         * GraphProto quantizationAnnotation.
+         * @member {Array.<onnx.ITensorAnnotation>} quantizationAnnotation
+         * @memberof onnx.GraphProto
+         * @instance
+         */
+        GraphProto.prototype.quantizationAnnotation = $util.emptyArray;
+
+        /**
+         * Creates a new GraphProto instance using the specified properties.
+         * @function create
+         * @memberof onnx.GraphProto
+         * @static
+         * @param {onnx.IGraphProto=} [properties] Properties to set
+         * @returns {onnx.GraphProto} GraphProto instance
+         */
+        GraphProto.create = function create(properties) {
+            return new GraphProto(properties);
+        };
+
+        /**
+         * Encodes the specified GraphProto message. Does not implicitly {@link onnx.GraphProto.verify|verify} messages.
+         * @function encode
+         * @memberof onnx.GraphProto
+         * @static
+         * @param {onnx.IGraphProto} message GraphProto message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        GraphProto.encode = function encode(message, writer) {
+            if (!writer)
+                writer = $Writer.create();
+            if (message.node != null && message.node.length)
+                for (var i = 0; i < message.node.length; ++i)
+                    $root.onnx.NodeProto.encode(message.node[i], writer.uint32(/* id 1, wireType 2 =*/10).fork()).ldelim();
+            if (message.name != null && Object.hasOwnProperty.call(message, "name"))
+                writer.uint32(/* id 2, wireType 2 =*/18).string(message.name);
+            if (message.initializer != null && message.initializer.length)
+                for (var i = 0; i < message.initializer.length; ++i)
+                    $root.onnx.TensorProto.encode(message.initializer[i], writer.uint32(/* id 5, wireType 2 =*/42).fork()).ldelim();
+            if (message.docString != null && Object.hasOwnProperty.call(message, "docString"))
+                writer.uint32(/* id 10, wireType 2 =*/82).string(message.docString);
+            if (message.input != null && message.input.length)
+                for (var i = 0; i < message.input.length; ++i)
+                    $root.onnx.ValueInfoProto.encode(message.input[i], writer.uint32(/* id 11, wireType 2 =*/90).fork()).ldelim();
+            if (message.output != null && message.output.length)
+                for (var i = 0; i < message.output.length; ++i)
+                    $root.onnx.ValueInfoProto.encode(message.output[i], writer.uint32(/* id 12, wireType 2 =*/98).fork()).ldelim();
+            if (message.valueInfo != null && message.valueInfo.length)
+                for (var i = 0; i < message.valueInfo.length; ++i)
+                    $root.onnx.ValueInfoProto.encode(message.valueInfo[i], writer.uint32(/* id 13, wireType 2 =*/106).fork()).ldelim();
+            if (message.quantizationAnnotation != null && message.quantizationAnnotation.length)
+                for (var i = 0; i < message.quantizationAnnotation.length; ++i)
+                    $root.onnx.TensorAnnotation.encode(message.quantizationAnnotation[i], writer.uint32(/* id 14, wireType 2 =*/114).fork()).ldelim();
+            if (message.sparseInitializer != null && message.sparseInitializer.length)
+                for (var i = 0; i < message.sparseInitializer.length; ++i)
+                    $root.onnx.SparseTensorProto.encode(message.sparseInitializer[i], writer.uint32(/* id 15, wireType 2 =*/122).fork()).ldelim();
+            return writer;
+        };
+
+        /**
+         * Encodes the specified GraphProto message, length delimited. Does not implicitly {@link onnx.GraphProto.verify|verify} messages.
+         * @function encodeDelimited
+         * @memberof onnx.GraphProto
+         * @static
+         * @param {onnx.IGraphProto} message GraphProto message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        GraphProto.encodeDelimited = function encodeDelimited(message, writer) {
+            return this.encode(message, writer).ldelim();
+        };
+
+        /**
+         * Decodes a GraphProto message from the specified reader or buffer.
+         * @function decode
+         * @memberof onnx.GraphProto
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @param {number} [length] Message length if known beforehand
+         * @returns {onnx.GraphProto} GraphProto
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        GraphProto.decode = function decode(reader, length) {
+            if (!(reader instanceof $Reader))
+                reader = $Reader.create(reader);
+            var end = length === undefined ? reader.len : reader.pos + length, message = new $root.onnx.GraphProto();
+            while (reader.pos < end) {
+                var tag = reader.uint32();
+                switch (tag >>> 3) {
+                case 1: {
+                        if (!(message.node && message.node.length))
+                            message.node = [];
+                        message.node.push($root.onnx.NodeProto.decode(reader, reader.uint32()));
+                        break;
+                    }
+                case 2: {
+                        message.name = reader.string();
+                        break;
+                    }
+                case 5: {
+                        if (!(message.initializer && message.initializer.length))
+                            message.initializer = [];
+                        message.initializer.push($root.onnx.TensorProto.decode(reader, reader.uint32()));
+                        break;
+                    }
+                case 15: {
+                        if (!(message.sparseInitializer && message.sparseInitializer.length))
+                            message.sparseInitializer = [];
+                        message.sparseInitializer.push($root.onnx.SparseTensorProto.decode(reader, reader.uint32()));
+                        break;
+                    }
+                case 10: {
+                        message.docString = reader.string();
+                        break;
+                    }
+                case 11: {
+                        if (!(message.input && message.input.length))
+                            message.input = [];
+                        message.input.push($root.onnx.ValueInfoProto.decode(reader, reader.uint32()));
+                        break;
+                    }
+                case 12: {
+                        if (!(message.output && message.output.length))
+                            message.output = [];
+                        message.output.push($root.onnx.ValueInfoProto.decode(reader, reader.uint32()));
+                        break;
+                    }
+                case 13: {
+                        if (!(message.valueInfo && message.valueInfo.length))
+                            message.valueInfo = [];
+                        message.valueInfo.push($root.onnx.ValueInfoProto.decode(reader, reader.uint32()));
+                        break;
+                    }
+                case 14: {
+                        if (!(message.quantizationAnnotation && message.quantizationAnnotation.length))
+                            message.quantizationAnnotation = [];
+                        message.quantizationAnnotation.push($root.onnx.TensorAnnotation.decode(reader, reader.uint32()));
+                        break;
+                    }
+                default:
+                    reader.skipType(tag & 7);
+                    break;
+                }
+            }
+            return message;
+        };
+
+        /**
+         * Decodes a GraphProto message from the specified reader or buffer, length delimited.
+         * @function decodeDelimited
+         * @memberof onnx.GraphProto
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @returns {onnx.GraphProto} GraphProto
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        GraphProto.decodeDelimited = function decodeDelimited(reader) {
+            if (!(reader instanceof $Reader))
+                reader = new $Reader(reader);
+            return this.decode(reader, reader.uint32());
+        };
+
+        /**
+         * Verifies a GraphProto message.
+         * @function verify
+         * @memberof onnx.GraphProto
+         * @static
+         * @param {Object.<string,*>} message Plain object to verify
+         * @returns {string|null} `null` if valid, otherwise the reason why it is not
+         */
+        GraphProto.verify = function verify(message) {
+            if (typeof message !== "object" || message === null)
+                return "object expected";
+            if (message.node != null && message.hasOwnProperty("node")) {
+                if (!Array.isArray(message.node))
+                    return "node: array expected";
+                for (var i = 0; i < message.node.length; ++i) {
+                    var error = $root.onnx.NodeProto.verify(message.node[i]);
+                    if (error)
+                        return "node." + error;
+                }
+            }
+            if (message.name != null && message.hasOwnProperty("name"))
+                if (!$util.isString(message.name))
+                    return "name: string expected";
+            if (message.initializer != null && message.hasOwnProperty("initializer")) {
+                if (!Array.isArray(message.initializer))
+                    return "initializer: array expected";
+                for (var i = 0; i < message.initializer.length; ++i) {
+                    var error = $root.onnx.TensorProto.verify(message.initializer[i]);
+                    if (error)
+                        return "initializer." + error;
+                }
+            }
+            if (message.sparseInitializer != null && message.hasOwnProperty("sparseInitializer")) {
+                if (!Array.isArray(message.sparseInitializer))
+                    return "sparseInitializer: array expected";
+                for (var i = 0; i < message.sparseInitializer.length; ++i) {
+                    var error = $root.onnx.SparseTensorProto.verify(message.sparseInitializer[i]);
+                    if (error)
+                        return "sparseInitializer." + error;
+                }
+            }
+            if (message.docString != null && message.hasOwnProperty("docString"))
+                if (!$util.isString(message.docString))
+                    return "docString: string expected";
+            if (message.input != null && message.hasOwnProperty("input")) {
+                if (!Array.isArray(message.input))
+                    return "input: array expected";
+                for (var i = 0; i < message.input.length; ++i) {
+                    var error = $root.onnx.ValueInfoProto.verify(message.input[i]);
+                    if (error)
+                        return "input." + error;
+                }
+            }
+            if (message.output != null && message.hasOwnProperty("output")) {
+                if (!Array.isArray(message.output))
+                    return "output: array expected";
+                for (var i = 0; i < message.output.length; ++i) {
+                    var error = $root.onnx.ValueInfoProto.verify(message.output[i]);
+                    if (error)
+                        return "output." + error;
+                }
+            }
+            if (message.valueInfo != null && message.hasOwnProperty("valueInfo")) {
+                if (!Array.isArray(message.valueInfo))
+                    return "valueInfo: array expected";
+                for (var i = 0; i < message.valueInfo.length; ++i) {
+                    var error = $root.onnx.ValueInfoProto.verify(message.valueInfo[i]);
+                    if (error)
+                        return "valueInfo." + error;
+                }
+            }
+            if (message.quantizationAnnotation != null && message.hasOwnProperty("quantizationAnnotation")) {
+                if (!Array.isArray(message.quantizationAnnotation))
+                    return "quantizationAnnotation: array expected";
+                for (var i = 0; i < message.quantizationAnnotation.length; ++i) {
+                    var error = $root.onnx.TensorAnnotation.verify(message.quantizationAnnotation[i]);
+                    if (error)
+                        return "quantizationAnnotation." + error;
+                }
+            }
+            return null;
+        };
+
+        /**
+         * Creates a GraphProto message from a plain object. Also converts values to their respective internal types.
+         * @function fromObject
+         * @memberof onnx.GraphProto
+         * @static
+         * @param {Object.<string,*>} object Plain object
+         * @returns {onnx.GraphProto} GraphProto
+         */
+        GraphProto.fromObject = function fromObject(object) {
+            if (object instanceof $root.onnx.GraphProto)
+                return object;
+            var message = new $root.onnx.GraphProto();
+            if (object.node) {
+                if (!Array.isArray(object.node))
+                    throw TypeError(".onnx.GraphProto.node: array expected");
+                message.node = [];
+                for (var i = 0; i < object.node.length; ++i) {
+                    if (typeof object.node[i] !== "object")
+                        throw TypeError(".onnx.GraphProto.node: object expected");
+                    message.node[i] = $root.onnx.NodeProto.fromObject(object.node[i]);
+                }
+            }
+            if (object.name != null)
+                message.name = String(object.name);
+            if (object.initializer) {
+                if (!Array.isArray(object.initializer))
+                    throw TypeError(".onnx.GraphProto.initializer: array expected");
+                message.initializer = [];
+                for (var i = 0; i < object.initializer.length; ++i) {
+                    if (typeof object.initializer[i] !== "object")
+                        throw TypeError(".onnx.GraphProto.initializer: object expected");
+                    message.initializer[i] = $root.onnx.TensorProto.fromObject(object.initializer[i]);
+                }
+            }
+            if (object.sparseInitializer) {
+                if (!Array.isArray(object.sparseInitializer))
+                    throw TypeError(".onnx.GraphProto.sparseInitializer: array expected");
+                message.sparseInitializer = [];
+                for (var i = 0; i < object.sparseInitializer.length; ++i) {
+                    if (typeof object.sparseInitializer[i] !== "object")
+                        throw TypeError(".onnx.GraphProto.sparseInitializer: object expected");
+                    message.sparseInitializer[i] = $root.onnx.SparseTensorProto.fromObject(object.sparseInitializer[i]);
+                }
+            }
+            if (object.docString != null)
+                message.docString = String(object.docString);
+            if (object.input) {
+                if (!Array.isArray(object.input))
+                    throw TypeError(".onnx.GraphProto.input: array expected");
+                message.input = [];
+                for (var i = 0; i < object.input.length; ++i) {
+                    if (typeof object.input[i] !== "object")
+                        throw TypeError(".onnx.GraphProto.input: object expected");
+                    message.input[i] = $root.onnx.ValueInfoProto.fromObject(object.input[i]);
+                }
+            }
+            if (object.output) {
+                if (!Array.isArray(object.output))
+                    throw TypeError(".onnx.GraphProto.output: array expected");
+                message.output = [];
+                for (var i = 0; i < object.output.length; ++i) {
+                    if (typeof object.output[i] !== "object")
+                        throw TypeError(".onnx.GraphProto.output: object expected");
+                    message.output[i] = $root.onnx.ValueInfoProto.fromObject(object.output[i]);
+                }
+            }
+            if (object.valueInfo) {
+                if (!Array.isArray(object.valueInfo))
+                    throw TypeError(".onnx.GraphProto.valueInfo: array expected");
+                message.valueInfo = [];
+                for (var i = 0; i < object.valueInfo.length; ++i) {
+                    if (typeof object.valueInfo[i] !== "object")
+                        throw TypeError(".onnx.GraphProto.valueInfo: object expected");
+                    message.valueInfo[i] = $root.onnx.ValueInfoProto.fromObject(object.valueInfo[i]);
+                }
+            }
+            if (object.quantizationAnnotation) {
+                if (!Array.isArray(object.quantizationAnnotation))
+                    throw TypeError(".onnx.GraphProto.quantizationAnnotation: array expected");
+                message.quantizationAnnotation = [];
+                for (var i = 0; i < object.quantizationAnnotation.length; ++i) {
+                    if (typeof object.quantizationAnnotation[i] !== "object")
+                        throw TypeError(".onnx.GraphProto.quantizationAnnotation: object expected");
+                    message.quantizationAnnotation[i] = $root.onnx.TensorAnnotation.fromObject(object.quantizationAnnotation[i]);
+                }
+            }
+            return message;
+        };
+
+        /**
+         * Creates a plain object from a GraphProto message. Also converts values to other types if specified.
+         * @function toObject
+         * @memberof onnx.GraphProto
+         * @static
+         * @param {onnx.GraphProto} message GraphProto
+         * @param {$protobuf.IConversionOptions} [options] Conversion options
+         * @returns {Object.<string,*>} Plain object
+         */
+        GraphProto.toObject = function toObject(message, options) {
+            if (!options)
+                options = {};
+            var object = {};
+            if (options.arrays || options.defaults) {
+                object.node = [];
+                object.initializer = [];
+                object.input = [];
+                object.output = [];
+                object.valueInfo = [];
+                object.quantizationAnnotation = [];
+                object.sparseInitializer = [];
+            }
+            if (options.defaults) {
+                object.name = "";
+                object.docString = "";
+            }
+            if (message.node && message.node.length) {
+                object.node = [];
+                for (var j = 0; j < message.node.length; ++j)
+                    object.node[j] = $root.onnx.NodeProto.toObject(message.node[j], options);
+            }
+            if (message.name != null && message.hasOwnProperty("name"))
+                object.name = message.name;
+            if (message.initializer && message.initializer.length) {
+                object.initializer = [];
+                for (var j = 0; j < message.initializer.length; ++j)
+                    object.initializer[j] = $root.onnx.TensorProto.toObject(message.initializer[j], options);
+            }
+            if (message.docString != null && message.hasOwnProperty("docString"))
+                object.docString = message.docString;
+            if (message.input && message.input.length) {
+                object.input = [];
+                for (var j = 0; j < message.input.length; ++j)
+                    object.input[j] = $root.onnx.ValueInfoProto.toObject(message.input[j], options);
+            }
+            if (message.output && message.output.length) {
+                object.output = [];
+                for (var j = 0; j < message.output.length; ++j)
+                    object.output[j] = $root.onnx.ValueInfoProto.toObject(message.output[j], options);
+            }
+            if (message.valueInfo && message.valueInfo.length) {
+                object.valueInfo = [];
+                for (var j = 0; j < message.valueInfo.length; ++j)
+                    object.valueInfo[j] = $root.onnx.ValueInfoProto.toObject(message.valueInfo[j], options);
+            }
+            if (message.quantizationAnnotation && message.quantizationAnnotation.length) {
+                object.quantizationAnnotation = [];
+                for (var j = 0; j < message.quantizationAnnotation.length; ++j)
+                    object.quantizationAnnotation[j] = $root.onnx.TensorAnnotation.toObject(message.quantizationAnnotation[j], options);
+            }
+            if (message.sparseInitializer && message.sparseInitializer.length) {
+                object.sparseInitializer = [];
+                for (var j = 0; j < message.sparseInitializer.length; ++j)
+                    object.sparseInitializer[j] = $root.onnx.SparseTensorProto.toObject(message.sparseInitializer[j], options);
+            }
+            return object;
+        };
+
+        /**
+         * Converts this GraphProto to JSON.
+         * @function toJSON
+         * @memberof onnx.GraphProto
+         * @instance
+         * @returns {Object.<string,*>} JSON object
+         */
+        GraphProto.prototype.toJSON = function toJSON() {
+            return this.constructor.toObject(this, $protobuf.util.toJSONOptions);
+        };
+
+        /**
+         * Gets the default type url for GraphProto
+         * @function getTypeUrl
+         * @memberof onnx.GraphProto
+         * @static
+         * @param {string} [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+         * @returns {string} The default type url
+         */
+        GraphProto.getTypeUrl = function getTypeUrl(typeUrlPrefix) {
+            if (typeUrlPrefix === undefined) {
+                typeUrlPrefix = "type.googleapis.com";
+            }
+            return typeUrlPrefix + "/onnx.GraphProto";
+        };
+
+        return GraphProto;
+    })();
+
+    onnx.TensorProto = (function() {
+
+        /**
+         * Properties of a TensorProto.
+         * @memberof onnx
+         * @interface ITensorProto
+         * @property {Array.<number|Long>|null} [dims] TensorProto dims
+         * @property {number|null} [dataType] TensorProto dataType
+         * @property {onnx.TensorProto.ISegment|null} [segment] TensorProto segment
+         * @property {Array.<number>|null} [floatData] TensorProto floatData
+         * @property {Array.<number>|null} [int32Data] TensorProto int32Data
+         * @property {Array.<Uint8Array>|null} [stringData] TensorProto stringData
+         * @property {Array.<number|Long>|null} [int64Data] TensorProto int64Data
+         * @property {string|null} [name] TensorProto name
+         * @property {string|null} [docString] TensorProto docString
+         * @property {Uint8Array|null} [rawData] TensorProto rawData
+         * @property {Array.<onnx.IStringStringEntryProto>|null} [externalData] TensorProto externalData
+         * @property {onnx.TensorProto.DataLocation|null} [dataLocation] TensorProto dataLocation
+         * @property {Array.<number>|null} [doubleData] TensorProto doubleData
+         * @property {Array.<number|Long>|null} [uint64Data] TensorProto uint64Data
+         */
+
+        /**
+         * Constructs a new TensorProto.
+         * @memberof onnx
+         * @classdesc Represents a TensorProto.
+         * @implements ITensorProto
+         * @constructor
+         * @param {onnx.ITensorProto=} [properties] Properties to set
+         */
+        function TensorProto(properties) {
+            this.dims = [];
+            this.floatData = [];
+            this.int32Data = [];
+            this.stringData = [];
+            this.int64Data = [];
+            this.externalData = [];
+            this.doubleData = [];
+            this.uint64Data = [];
+            if (properties)
+                for (var keys = Object.keys(properties), i = 0; i < keys.length; ++i)
+                    if (properties[keys[i]] != null)
+                        this[keys[i]] = properties[keys[i]];
+        }
+
+        /**
+         * TensorProto dims.
+         * @member {Array.<number|Long>} dims
+         * @memberof onnx.TensorProto
+         * @instance
+         */
+        TensorProto.prototype.dims = $util.emptyArray;
+
+        /**
+         * TensorProto dataType.
+         * @member {number} dataType
+         * @memberof onnx.TensorProto
+         * @instance
+         */
+        TensorProto.prototype.dataType = 0;
+
+        /**
+         * TensorProto segment.
+         * @member {onnx.TensorProto.ISegment|null|undefined} segment
+         * @memberof onnx.TensorProto
+         * @instance
+         */
+        TensorProto.prototype.segment = null;
+
+        /**
+         * TensorProto floatData.
+         * @member {Array.<number>} floatData
+         * @memberof onnx.TensorProto
+         * @instance
+         */
+        TensorProto.prototype.floatData = $util.emptyArray;
+
+        /**
+         * TensorProto int32Data.
+         * @member {Array.<number>} int32Data
+         * @memberof onnx.TensorProto
+         * @instance
+         */
+        TensorProto.prototype.int32Data = $util.emptyArray;
+
+        /**
+         * TensorProto stringData.
+         * @member {Array.<Uint8Array>} stringData
+         * @memberof onnx.TensorProto
+         * @instance
+         */
+        TensorProto.prototype.stringData = $util.emptyArray;
+
+        /**
+         * TensorProto int64Data.
+         * @member {Array.<number|Long>} int64Data
+         * @memberof onnx.TensorProto
+         * @instance
+         */
+        TensorProto.prototype.int64Data = $util.emptyArray;
+
+        /**
+         * TensorProto name.
+         * @member {string} name
+         * @memberof onnx.TensorProto
+         * @instance
+         */
+        TensorProto.prototype.name = "";
+
+        /**
+         * TensorProto docString.
+         * @member {string} docString
+         * @memberof onnx.TensorProto
+         * @instance
+         */
+        TensorProto.prototype.docString = "";
+
+        /**
+         * TensorProto rawData.
+         * @member {Uint8Array} rawData
+         * @memberof onnx.TensorProto
+         * @instance
+         */
+        TensorProto.prototype.rawData = $util.newBuffer([]);
+
+        /**
+         * TensorProto externalData.
+         * @member {Array.<onnx.IStringStringEntryProto>} externalData
+         * @memberof onnx.TensorProto
+         * @instance
+         */
+        TensorProto.prototype.externalData = $util.emptyArray;
+
+        /**
+         * TensorProto dataLocation.
+         * @member {onnx.TensorProto.DataLocation} dataLocation
+         * @memberof onnx.TensorProto
+         * @instance
+         */
+        TensorProto.prototype.dataLocation = 0;
+
+        /**
+         * TensorProto doubleData.
+         * @member {Array.<number>} doubleData
+         * @memberof onnx.TensorProto
+         * @instance
+         */
+        TensorProto.prototype.doubleData = $util.emptyArray;
+
+        /**
+         * TensorProto uint64Data.
+         * @member {Array.<number|Long>} uint64Data
+         * @memberof onnx.TensorProto
+         * @instance
+         */
+        TensorProto.prototype.uint64Data = $util.emptyArray;
+
+        /**
+         * Creates a new TensorProto instance using the specified properties.
+         * @function create
+         * @memberof onnx.TensorProto
+         * @static
+         * @param {onnx.ITensorProto=} [properties] Properties to set
+         * @returns {onnx.TensorProto} TensorProto instance
+         */
+        TensorProto.create = function create(properties) {
+            return new TensorProto(properties);
+        };
+
+        /**
+         * Encodes the specified TensorProto message. Does not implicitly {@link onnx.TensorProto.verify|verify} messages.
+         * @function encode
+         * @memberof onnx.TensorProto
+         * @static
+         * @param {onnx.ITensorProto} message TensorProto message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        TensorProto.encode = function encode(message, writer) {
+            if (!writer)
+                writer = $Writer.create();
+            if (message.dims != null && message.dims.length) {
+                writer.uint32(/* id 1, wireType 2 =*/10).fork();
+                for (var i = 0; i < message.dims.length; ++i)
+                    writer.int64(message.dims[i]);
+                writer.ldelim();
+            }
+            if (message.dataType != null && Object.hasOwnProperty.call(message, "dataType"))
+                writer.uint32(/* id 2, wireType 0 =*/16).int32(message.dataType);
+            if (message.segment != null && Object.hasOwnProperty.call(message, "segment"))
+                $root.onnx.TensorProto.Segment.encode(message.segment, writer.uint32(/* id 3, wireType 2 =*/26).fork()).ldelim();
+            if (message.floatData != null && message.floatData.length) {
+                writer.uint32(/* id 4, wireType 2 =*/34).fork();
+                for (var i = 0; i < message.floatData.length; ++i)
+                    writer.float(message.floatData[i]);
+                writer.ldelim();
+            }
+            if (message.int32Data != null && message.int32Data.length) {
+                writer.uint32(/* id 5, wireType 2 =*/42).fork();
+                for (var i = 0; i < message.int32Data.length; ++i)
+                    writer.int32(message.int32Data[i]);
+                writer.ldelim();
+            }
+            if (message.stringData != null && message.stringData.length)
+                for (var i = 0; i < message.stringData.length; ++i)
+                    writer.uint32(/* id 6, wireType 2 =*/50).bytes(message.stringData[i]);
+            if (message.int64Data != null && message.int64Data.length) {
+                writer.uint32(/* id 7, wireType 2 =*/58).fork();
+                for (var i = 0; i < message.int64Data.length; ++i)
+                    writer.int64(message.int64Data[i]);
+                writer.ldelim();
+            }
+            if (message.name != null && Object.hasOwnProperty.call(message, "name"))
+                writer.uint32(/* id 8, wireType 2 =*/66).string(message.name);
+            if (message.rawData != null && Object.hasOwnProperty.call(message, "rawData"))
+                writer.uint32(/* id 9, wireType 2 =*/74).bytes(message.rawData);
+            if (message.doubleData != null && message.doubleData.length) {
+                writer.uint32(/* id 10, wireType 2 =*/82).fork();
+                for (var i = 0; i < message.doubleData.length; ++i)
+                    writer.double(message.doubleData[i]);
+                writer.ldelim();
+            }
+            if (message.uint64Data != null && message.uint64Data.length) {
+                writer.uint32(/* id 11, wireType 2 =*/90).fork();
+                for (var i = 0; i < message.uint64Data.length; ++i)
+                    writer.uint64(message.uint64Data[i]);
+                writer.ldelim();
+            }
+            if (message.docString != null && Object.hasOwnProperty.call(message, "docString"))
+                writer.uint32(/* id 12, wireType 2 =*/98).string(message.docString);
+            if (message.externalData != null && message.externalData.length)
+                for (var i = 0; i < message.externalData.length; ++i)
+                    $root.onnx.StringStringEntryProto.encode(message.externalData[i], writer.uint32(/* id 13, wireType 2 =*/106).fork()).ldelim();
+            if (message.dataLocation != null && Object.hasOwnProperty.call(message, "dataLocation"))
+                writer.uint32(/* id 14, wireType 0 =*/112).int32(message.dataLocation);
+            return writer;
+        };
+
+        /**
+         * Encodes the specified TensorProto message, length delimited. Does not implicitly {@link onnx.TensorProto.verify|verify} messages.
+         * @function encodeDelimited
+         * @memberof onnx.TensorProto
+         * @static
+         * @param {onnx.ITensorProto} message TensorProto message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        TensorProto.encodeDelimited = function encodeDelimited(message, writer) {
+            return this.encode(message, writer).ldelim();
+        };
+
+        /**
+         * Decodes a TensorProto message from the specified reader or buffer.
+         * @function decode
+         * @memberof onnx.TensorProto
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @param {number} [length] Message length if known beforehand
+         * @returns {onnx.TensorProto} TensorProto
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        TensorProto.decode = function decode(reader, length) {
+            if (!(reader instanceof $Reader))
+                reader = $Reader.create(reader);
+            var end = length === undefined ? reader.len : reader.pos + length, message = new $root.onnx.TensorProto();
+            while (reader.pos < end) {
+                var tag = reader.uint32();
+                switch (tag >>> 3) {
+                case 1: {
+                        if (!(message.dims && message.dims.length))
+                            message.dims = [];
+                        if ((tag & 7) === 2) {
+                            var end2 = reader.uint32() + reader.pos;
+                            while (reader.pos < end2)
+                                message.dims.push(reader.int64());
+                        } else
+                            message.dims.push(reader.int64());
+                        break;
+                    }
+                case 2: {
+                        message.dataType = reader.int32();
+                        break;
+                    }
+                case 3: {
+                        message.segment = $root.onnx.TensorProto.Segment.decode(reader, reader.uint32());
+                        break;
+                    }
+                case 4: {
+                        if (!(message.floatData && message.floatData.length))
+                            message.floatData = [];
+                        if ((tag & 7) === 2) {
+                            var end2 = reader.uint32() + reader.pos;
+                            while (reader.pos < end2)
+                                message.floatData.push(reader.float());
+                        } else
+                            message.floatData.push(reader.float());
+                        break;
+                    }
+                case 5: {
+                        if (!(message.int32Data && message.int32Data.length))
+                            message.int32Data = [];
+                        if ((tag & 7) === 2) {
+                            var end2 = reader.uint32() + reader.pos;
+                            while (reader.pos < end2)
+                                message.int32Data.push(reader.int32());
+                        } else
+                            message.int32Data.push(reader.int32());
+                        break;
+                    }
+                case 6: {
+                        if (!(message.stringData && message.stringData.length))
+                            message.stringData = [];
+                        message.stringData.push(reader.bytes());
+                        break;
+                    }
+                case 7: {
+                        if (!(message.int64Data && message.int64Data.length))
+                            message.int64Data = [];
+                        if ((tag & 7) === 2) {
+                            var end2 = reader.uint32() + reader.pos;
+                            while (reader.pos < end2)
+                                message.int64Data.push(reader.int64());
+                        } else
+                            message.int64Data.push(reader.int64());
+                        break;
+                    }
+                case 8: {
+                        message.name = reader.string();
+                        break;
+                    }
+                case 12: {
+                        message.docString = reader.string();
+                        break;
+                    }
+                case 9: {
+                        message.rawData = reader.bytes();
+                        break;
+                    }
+                case 13: {
+                        if (!(message.externalData && message.externalData.length))
+                            message.externalData = [];
+                        message.externalData.push($root.onnx.StringStringEntryProto.decode(reader, reader.uint32()));
+                        break;
+                    }
+                case 14: {
+                        message.dataLocation = reader.int32();
+                        break;
+                    }
+                case 10: {
+                        if (!(message.doubleData && message.doubleData.length))
+                            message.doubleData = [];
+                        if ((tag & 7) === 2) {
+                            var end2 = reader.uint32() + reader.pos;
+                            while (reader.pos < end2)
+                                message.doubleData.push(reader.double());
+                        } else
+                            message.doubleData.push(reader.double());
+                        break;
+                    }
+                case 11: {
+                        if (!(message.uint64Data && message.uint64Data.length))
+                            message.uint64Data = [];
+                        if ((tag & 7) === 2) {
+                            var end2 = reader.uint32() + reader.pos;
+                            while (reader.pos < end2)
+                                message.uint64Data.push(reader.uint64());
+                        } else
+                            message.uint64Data.push(reader.uint64());
+                        break;
+                    }
+                default:
+                    reader.skipType(tag & 7);
+                    break;
+                }
+            }
+            return message;
+        };
+
+        /**
+         * Decodes a TensorProto message from the specified reader or buffer, length delimited.
+         * @function decodeDelimited
+         * @memberof onnx.TensorProto
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @returns {onnx.TensorProto} TensorProto
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        TensorProto.decodeDelimited = function decodeDelimited(reader) {
+            if (!(reader instanceof $Reader))
+                reader = new $Reader(reader);
+            return this.decode(reader, reader.uint32());
+        };
+
+        /**
+         * Verifies a TensorProto message.
+         * @function verify
+         * @memberof onnx.TensorProto
+         * @static
+         * @param {Object.<string,*>} message Plain object to verify
+         * @returns {string|null} `null` if valid, otherwise the reason why it is not
+         */
+        TensorProto.verify = function verify(message) {
+            if (typeof message !== "object" || message === null)
+                return "object expected";
+            if (message.dims != null && message.hasOwnProperty("dims")) {
+                if (!Array.isArray(message.dims))
+                    return "dims: array expected";
+                for (var i = 0; i < message.dims.length; ++i)
+                    if (!$util.isInteger(message.dims[i]) && !(message.dims[i] && $util.isInteger(message.dims[i].low) && $util.isInteger(message.dims[i].high)))
+                        return "dims: integer|Long[] expected";
+            }
+            if (message.dataType != null && message.hasOwnProperty("dataType"))
+                if (!$util.isInteger(message.dataType))
+                    return "dataType: integer expected";
+            if (message.segment != null && message.hasOwnProperty("segment")) {
+                var error = $root.onnx.TensorProto.Segment.verify(message.segment);
+                if (error)
+                    return "segment." + error;
+            }
+            if (message.floatData != null && message.hasOwnProperty("floatData")) {
+                if (!Array.isArray(message.floatData))
+                    return "floatData: array expected";
+                for (var i = 0; i < message.floatData.length; ++i)
+                    if (typeof message.floatData[i] !== "number")
+                        return "floatData: number[] expected";
+            }
+            if (message.int32Data != null && message.hasOwnProperty("int32Data")) {
+                if (!Array.isArray(message.int32Data))
+                    return "int32Data: array expected";
+                for (var i = 0; i < message.int32Data.length; ++i)
+                    if (!$util.isInteger(message.int32Data[i]))
+                        return "int32Data: integer[] expected";
+            }
+            if (message.stringData != null && message.hasOwnProperty("stringData")) {
+                if (!Array.isArray(message.stringData))
+                    return "stringData: array expected";
+                for (var i = 0; i < message.stringData.length; ++i)
+                    if (!(message.stringData[i] && typeof message.stringData[i].length === "number" || $util.isString(message.stringData[i])))
+                        return "stringData: buffer[] expected";
+            }
+            if (message.int64Data != null && message.hasOwnProperty("int64Data")) {
+                if (!Array.isArray(message.int64Data))
+                    return "int64Data: array expected";
+                for (var i = 0; i < message.int64Data.length; ++i)
+                    if (!$util.isInteger(message.int64Data[i]) && !(message.int64Data[i] && $util.isInteger(message.int64Data[i].low) && $util.isInteger(message.int64Data[i].high)))
+                        return "int64Data: integer|Long[] expected";
+            }
+            if (message.name != null && message.hasOwnProperty("name"))
+                if (!$util.isString(message.name))
+                    return "name: string expected";
+            if (message.docString != null && message.hasOwnProperty("docString"))
+                if (!$util.isString(message.docString))
+                    return "docString: string expected";
+            if (message.rawData != null && message.hasOwnProperty("rawData"))
+                if (!(message.rawData && typeof message.rawData.length === "number" || $util.isString(message.rawData)))
+                    return "rawData: buffer expected";
+            if (message.externalData != null && message.hasOwnProperty("externalData")) {
+                if (!Array.isArray(message.externalData))
+                    return "externalData: array expected";
+                for (var i = 0; i < message.externalData.length; ++i) {
+                    var error = $root.onnx.StringStringEntryProto.verify(message.externalData[i]);
+                    if (error)
+                        return "externalData." + error;
+                }
+            }
+            if (message.dataLocation != null && message.hasOwnProperty("dataLocation"))
+                switch (message.dataLocation) {
+                default:
+                    return "dataLocation: enum value expected";
+                case 0:
+                case 1:
+                    break;
+                }
+            if (message.doubleData != null && message.hasOwnProperty("doubleData")) {
+                if (!Array.isArray(message.doubleData))
+                    return "doubleData: array expected";
+                for (var i = 0; i < message.doubleData.length; ++i)
+                    if (typeof message.doubleData[i] !== "number")
+                        return "doubleData: number[] expected";
+            }
+            if (message.uint64Data != null && message.hasOwnProperty("uint64Data")) {
+                if (!Array.isArray(message.uint64Data))
+                    return "uint64Data: array expected";
+                for (var i = 0; i < message.uint64Data.length; ++i)
+                    if (!$util.isInteger(message.uint64Data[i]) && !(message.uint64Data[i] && $util.isInteger(message.uint64Data[i].low) && $util.isInteger(message.uint64Data[i].high)))
+                        return "uint64Data: integer|Long[] expected";
+            }
+            return null;
+        };
+
+        /**
+         * Creates a TensorProto message from a plain object. Also converts values to their respective internal types.
+         * @function fromObject
+         * @memberof onnx.TensorProto
+         * @static
+         * @param {Object.<string,*>} object Plain object
+         * @returns {onnx.TensorProto} TensorProto
+         */
+        TensorProto.fromObject = function fromObject(object) {
+            if (object instanceof $root.onnx.TensorProto)
+                return object;
+            var message = new $root.onnx.TensorProto();
+            if (object.dims) {
+                if (!Array.isArray(object.dims))
+                    throw TypeError(".onnx.TensorProto.dims: array expected");
+                message.dims = [];
+                for (var i = 0; i < object.dims.length; ++i)
+                    if ($util.Long)
+                        (message.dims[i] = $util.Long.fromValue(object.dims[i])).unsigned = false;
+                    else if (typeof object.dims[i] === "string")
+                        message.dims[i] = parseInt(object.dims[i], 10);
+                    else if (typeof object.dims[i] === "number")
+                        message.dims[i] = object.dims[i];
+                    else if (typeof object.dims[i] === "object")
+                        message.dims[i] = new $util.LongBits(object.dims[i].low >>> 0, object.dims[i].high >>> 0).toNumber();
+            }
+            if (object.dataType != null)
+                message.dataType = object.dataType | 0;
+            if (object.segment != null) {
+                if (typeof object.segment !== "object")
+                    throw TypeError(".onnx.TensorProto.segment: object expected");
+                message.segment = $root.onnx.TensorProto.Segment.fromObject(object.segment);
+            }
+            if (object.floatData) {
+                if (!Array.isArray(object.floatData))
+                    throw TypeError(".onnx.TensorProto.floatData: array expected");
+                message.floatData = [];
+                for (var i = 0; i < object.floatData.length; ++i)
+                    message.floatData[i] = Number(object.floatData[i]);
+            }
+            if (object.int32Data) {
+                if (!Array.isArray(object.int32Data))
+                    throw TypeError(".onnx.TensorProto.int32Data: array expected");
+                message.int32Data = [];
+                for (var i = 0; i < object.int32Data.length; ++i)
+                    message.int32Data[i] = object.int32Data[i] | 0;
+            }
+            if (object.stringData) {
+                if (!Array.isArray(object.stringData))
+                    throw TypeError(".onnx.TensorProto.stringData: array expected");
+                message.stringData = [];
+                for (var i = 0; i < object.stringData.length; ++i)
+                    if (typeof object.stringData[i] === "string")
+                        $util.base64.decode(object.stringData[i], message.stringData[i] = $util.newBuffer($util.base64.length(object.stringData[i])), 0);
+                    else if (object.stringData[i].length >= 0)
+                        message.stringData[i] = object.stringData[i];
+            }
+            if (object.int64Data) {
+                if (!Array.isArray(object.int64Data))
+                    throw TypeError(".onnx.TensorProto.int64Data: array expected");
+                message.int64Data = [];
+                for (var i = 0; i < object.int64Data.length; ++i)
+                    if ($util.Long)
+                        (message.int64Data[i] = $util.Long.fromValue(object.int64Data[i])).unsigned = false;
+                    else if (typeof object.int64Data[i] === "string")
+                        message.int64Data[i] = parseInt(object.int64Data[i], 10);
+                    else if (typeof object.int64Data[i] === "number")
+                        message.int64Data[i] = object.int64Data[i];
+                    else if (typeof object.int64Data[i] === "object")
+                        message.int64Data[i] = new $util.LongBits(object.int64Data[i].low >>> 0, object.int64Data[i].high >>> 0).toNumber();
+            }
+            if (object.name != null)
+                message.name = String(object.name);
+            if (object.docString != null)
+                message.docString = String(object.docString);
+            if (object.rawData != null)
+                if (typeof object.rawData === "string")
+                    $util.base64.decode(object.rawData, message.rawData = $util.newBuffer($util.base64.length(object.rawData)), 0);
+                else if (object.rawData.length >= 0)
+                    message.rawData = object.rawData;
+            if (object.externalData) {
+                if (!Array.isArray(object.externalData))
+                    throw TypeError(".onnx.TensorProto.externalData: array expected");
+                message.externalData = [];
+                for (var i = 0; i < object.externalData.length; ++i) {
+                    if (typeof object.externalData[i] !== "object")
+                        throw TypeError(".onnx.TensorProto.externalData: object expected");
+                    message.externalData[i] = $root.onnx.StringStringEntryProto.fromObject(object.externalData[i]);
+                }
+            }
+            switch (object.dataLocation) {
+            default:
+                if (typeof object.dataLocation === "number") {
+                    message.dataLocation = object.dataLocation;
+                    break;
+                }
+                break;
+            case "DEFAULT":
+            case 0:
+                message.dataLocation = 0;
+                break;
+            case "EXTERNAL":
+            case 1:
+                message.dataLocation = 1;
+                break;
+            }
+            if (object.doubleData) {
+                if (!Array.isArray(object.doubleData))
+                    throw TypeError(".onnx.TensorProto.doubleData: array expected");
+                message.doubleData = [];
+                for (var i = 0; i < object.doubleData.length; ++i)
+                    message.doubleData[i] = Number(object.doubleData[i]);
+            }
+            if (object.uint64Data) {
+                if (!Array.isArray(object.uint64Data))
+                    throw TypeError(".onnx.TensorProto.uint64Data: array expected");
+                message.uint64Data = [];
+                for (var i = 0; i < object.uint64Data.length; ++i)
+                    if ($util.Long)
+                        (message.uint64Data[i] = $util.Long.fromValue(object.uint64Data[i])).unsigned = true;
+                    else if (typeof object.uint64Data[i] === "string")
+                        message.uint64Data[i] = parseInt(object.uint64Data[i], 10);
+                    else if (typeof object.uint64Data[i] === "number")
+                        message.uint64Data[i] = object.uint64Data[i];
+                    else if (typeof object.uint64Data[i] === "object")
+                        message.uint64Data[i] = new $util.LongBits(object.uint64Data[i].low >>> 0, object.uint64Data[i].high >>> 0).toNumber(true);
+            }
+            return message;
+        };
+
+        /**
+         * Creates a plain object from a TensorProto message. Also converts values to other types if specified.
+         * @function toObject
+         * @memberof onnx.TensorProto
+         * @static
+         * @param {onnx.TensorProto} message TensorProto
+         * @param {$protobuf.IConversionOptions} [options] Conversion options
+         * @returns {Object.<string,*>} Plain object
+         */
+        TensorProto.toObject = function toObject(message, options) {
+            if (!options)
+                options = {};
+            var object = {};
+            if (options.arrays || options.defaults) {
+                object.dims = [];
+                object.floatData = [];
+                object.int32Data = [];
+                object.stringData = [];
+                object.int64Data = [];
+                object.doubleData = [];
+                object.uint64Data = [];
+                object.externalData = [];
+            }
+            if (options.defaults) {
+                object.dataType = 0;
+                object.segment = null;
+                object.name = "";
+                if (options.bytes === String)
+                    object.rawData = "";
+                else {
+                    object.rawData = [];
+                    if (options.bytes !== Array)
+                        object.rawData = $util.newBuffer(object.rawData);
+                }
+                object.docString = "";
+                object.dataLocation = options.enums === String ? "DEFAULT" : 0;
+            }
+            if (message.dims && message.dims.length) {
+                object.dims = [];
+                for (var j = 0; j < message.dims.length; ++j)
+                    if (typeof message.dims[j] === "number")
+                        object.dims[j] = options.longs === String ? String(message.dims[j]) : message.dims[j];
+                    else
+                        object.dims[j] = options.longs === String ? $util.Long.prototype.toString.call(message.dims[j]) : options.longs === Number ? new $util.LongBits(message.dims[j].low >>> 0, message.dims[j].high >>> 0).toNumber() : message.dims[j];
+            }
+            if (message.dataType != null && message.hasOwnProperty("dataType"))
+                object.dataType = message.dataType;
+            if (message.segment != null && message.hasOwnProperty("segment"))
+                object.segment = $root.onnx.TensorProto.Segment.toObject(message.segment, options);
+            if (message.floatData && message.floatData.length) {
+                object.floatData = [];
+                for (var j = 0; j < message.floatData.length; ++j)
+                    object.floatData[j] = options.json && !isFinite(message.floatData[j]) ? String(message.floatData[j]) : message.floatData[j];
+            }
+            if (message.int32Data && message.int32Data.length) {
+                object.int32Data = [];
+                for (var j = 0; j < message.int32Data.length; ++j)
+                    object.int32Data[j] = message.int32Data[j];
+            }
+            if (message.stringData && message.stringData.length) {
+                object.stringData = [];
+                for (var j = 0; j < message.stringData.length; ++j)
+                    object.stringData[j] = options.bytes === String ? $util.base64.encode(message.stringData[j], 0, message.stringData[j].length) : options.bytes === Array ? Array.prototype.slice.call(message.stringData[j]) : message.stringData[j];
+            }
+            if (message.int64Data && message.int64Data.length) {
+                object.int64Data = [];
+                for (var j = 0; j < message.int64Data.length; ++j)
+                    if (typeof message.int64Data[j] === "number")
+                        object.int64Data[j] = options.longs === String ? String(message.int64Data[j]) : message.int64Data[j];
+                    else
+                        object.int64Data[j] = options.longs === String ? $util.Long.prototype.toString.call(message.int64Data[j]) : options.longs === Number ? new $util.LongBits(message.int64Data[j].low >>> 0, message.int64Data[j].high >>> 0).toNumber() : message.int64Data[j];
+            }
+            if (message.name != null && message.hasOwnProperty("name"))
+                object.name = message.name;
+            if (message.rawData != null && message.hasOwnProperty("rawData"))
+                object.rawData = options.bytes === String ? $util.base64.encode(message.rawData, 0, message.rawData.length) : options.bytes === Array ? Array.prototype.slice.call(message.rawData) : message.rawData;
+            if (message.doubleData && message.doubleData.length) {
+                object.doubleData = [];
+                for (var j = 0; j < message.doubleData.length; ++j)
+                    object.doubleData[j] = options.json && !isFinite(message.doubleData[j]) ? String(message.doubleData[j]) : message.doubleData[j];
+            }
+            if (message.uint64Data && message.uint64Data.length) {
+                object.uint64Data = [];
+                for (var j = 0; j < message.uint64Data.length; ++j)
+                    if (typeof message.uint64Data[j] === "number")
+                        object.uint64Data[j] = options.longs === String ? String(message.uint64Data[j]) : message.uint64Data[j];
+                    else
+                        object.uint64Data[j] = options.longs === String ? $util.Long.prototype.toString.call(message.uint64Data[j]) : options.longs === Number ? new $util.LongBits(message.uint64Data[j].low >>> 0, message.uint64Data[j].high >>> 0).toNumber(true) : message.uint64Data[j];
+            }
+            if (message.docString != null && message.hasOwnProperty("docString"))
+                object.docString = message.docString;
+            if (message.externalData && message.externalData.length) {
+                object.externalData = [];
+                for (var j = 0; j < message.externalData.length; ++j)
+                    object.externalData[j] = $root.onnx.StringStringEntryProto.toObject(message.externalData[j], options);
+            }
+            if (message.dataLocation != null && message.hasOwnProperty("dataLocation"))
+                object.dataLocation = options.enums === String ? $root.onnx.TensorProto.DataLocation[message.dataLocation] === undefined ? message.dataLocation : $root.onnx.TensorProto.DataLocation[message.dataLocation] : message.dataLocation;
+            return object;
+        };
+
+        /**
+         * Converts this TensorProto to JSON.
+         * @function toJSON
+         * @memberof onnx.TensorProto
+         * @instance
+         * @returns {Object.<string,*>} JSON object
+         */
+        TensorProto.prototype.toJSON = function toJSON() {
+            return this.constructor.toObject(this, $protobuf.util.toJSONOptions);
+        };
+
+        /**
+         * Gets the default type url for TensorProto
+         * @function getTypeUrl
+         * @memberof onnx.TensorProto
+         * @static
+         * @param {string} [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+         * @returns {string} The default type url
+         */
+        TensorProto.getTypeUrl = function getTypeUrl(typeUrlPrefix) {
+            if (typeUrlPrefix === undefined) {
+                typeUrlPrefix = "type.googleapis.com";
+            }
+            return typeUrlPrefix + "/onnx.TensorProto";
+        };
+
+        /**
+         * DataType enum.
+         * @name onnx.TensorProto.DataType
+         * @enum {number}
+         * @property {number} UNDEFINED=0 UNDEFINED value
+         * @property {number} FLOAT=1 FLOAT value
+         * @property {number} UINT8=2 UINT8 value
+         * @property {number} INT8=3 INT8 value
+         * @property {number} UINT16=4 UINT16 value
+         * @property {number} INT16=5 INT16 value
+         * @property {number} INT32=6 INT32 value
+         * @property {number} INT64=7 INT64 value
+         * @property {number} STRING=8 STRING value
+         * @property {number} BOOL=9 BOOL value
+         * @property {number} FLOAT16=10 FLOAT16 value
+         * @property {number} DOUBLE=11 DOUBLE value
+         * @property {number} UINT32=12 UINT32 value
+         * @property {number} UINT64=13 UINT64 value
+         * @property {number} COMPLEX64=14 COMPLEX64 value
+         * @property {number} COMPLEX128=15 COMPLEX128 value
+         * @property {number} BFLOAT16=16 BFLOAT16 value
+         * @property {number} FLOAT8E4M3FN=17 FLOAT8E4M3FN value
+         * @property {number} FLOAT8E4M3FNUZ=18 FLOAT8E4M3FNUZ value
+         * @property {number} FLOAT8E5M2=19 FLOAT8E5M2 value
+         * @property {number} FLOAT8E5M2FNUZ=20 FLOAT8E5M2FNUZ value
+         */
+        TensorProto.DataType = (function() {
+            var valuesById = {}, values = Object.create(valuesById);
+            values[valuesById[0] = "UNDEFINED"] = 0;
+            values[valuesById[1] = "FLOAT"] = 1;
+            values[valuesById[2] = "UINT8"] = 2;
+            values[valuesById[3] = "INT8"] = 3;
+            values[valuesById[4] = "UINT16"] = 4;
+            values[valuesById[5] = "INT16"] = 5;
+            values[valuesById[6] = "INT32"] = 6;
+            values[valuesById[7] = "INT64"] = 7;
+            values[valuesById[8] = "STRING"] = 8;
+            values[valuesById[9] = "BOOL"] = 9;
+            values[valuesById[10] = "FLOAT16"] = 10;
+            values[valuesById[11] = "DOUBLE"] = 11;
+            values[valuesById[12] = "UINT32"] = 12;
+            values[valuesById[13] = "UINT64"] = 13;
+            values[valuesById[14] = "COMPLEX64"] = 14;
+            values[valuesById[15] = "COMPLEX128"] = 15;
+            values[valuesById[16] = "BFLOAT16"] = 16;
+            values[valuesById[17] = "FLOAT8E4M3FN"] = 17;
+            values[valuesById[18] = "FLOAT8E4M3FNUZ"] = 18;
+            values[valuesById[19] = "FLOAT8E5M2"] = 19;
+            values[valuesById[20] = "FLOAT8E5M2FNUZ"] = 20;
+            return values;
+        })();
+
+        TensorProto.Segment = (function() {
+
+            /**
+             * Properties of a Segment.
+             * @memberof onnx.TensorProto
+             * @interface ISegment
+             * @property {number|Long|null} [begin] Segment begin
+             * @property {number|Long|null} [end] Segment end
+             */
+
+            /**
+             * Constructs a new Segment.
+             * @memberof onnx.TensorProto
+             * @classdesc Represents a Segment.
+             * @implements ISegment
+             * @constructor
+             * @param {onnx.TensorProto.ISegment=} [properties] Properties to set
+             */
+            function Segment(properties) {
+                if (properties)
+                    for (var keys = Object.keys(properties), i = 0; i < keys.length; ++i)
+                        if (properties[keys[i]] != null)
+                            this[keys[i]] = properties[keys[i]];
+            }
+
+            /**
+             * Segment begin.
+             * @member {number|Long} begin
+             * @memberof onnx.TensorProto.Segment
+             * @instance
+             */
+            Segment.prototype.begin = $util.Long ? $util.Long.fromBits(0,0,false) : 0;
+
+            /**
+             * Segment end.
+             * @member {number|Long} end
+             * @memberof onnx.TensorProto.Segment
+             * @instance
+             */
+            Segment.prototype.end = $util.Long ? $util.Long.fromBits(0,0,false) : 0;
+
+            /**
+             * Creates a new Segment instance using the specified properties.
+             * @function create
+             * @memberof onnx.TensorProto.Segment
+             * @static
+             * @param {onnx.TensorProto.ISegment=} [properties] Properties to set
+             * @returns {onnx.TensorProto.Segment} Segment instance
+             */
+            Segment.create = function create(properties) {
+                return new Segment(properties);
+            };
+
+            /**
+             * Encodes the specified Segment message. Does not implicitly {@link onnx.TensorProto.Segment.verify|verify} messages.
+             * @function encode
+             * @memberof onnx.TensorProto.Segment
+             * @static
+             * @param {onnx.TensorProto.ISegment} message Segment message or plain object to encode
+             * @param {$protobuf.Writer} [writer] Writer to encode to
+             * @returns {$protobuf.Writer} Writer
+             */
+            Segment.encode = function encode(message, writer) {
+                if (!writer)
+                    writer = $Writer.create();
+                if (message.begin != null && Object.hasOwnProperty.call(message, "begin"))
+                    writer.uint32(/* id 1, wireType 0 =*/8).int64(message.begin);
+                if (message.end != null && Object.hasOwnProperty.call(message, "end"))
+                    writer.uint32(/* id 2, wireType 0 =*/16).int64(message.end);
+                return writer;
+            };
+
+            /**
+             * Encodes the specified Segment message, length delimited. Does not implicitly {@link onnx.TensorProto.Segment.verify|verify} messages.
+             * @function encodeDelimited
+             * @memberof onnx.TensorProto.Segment
+             * @static
+             * @param {onnx.TensorProto.ISegment} message Segment message or plain object to encode
+             * @param {$protobuf.Writer} [writer] Writer to encode to
+             * @returns {$protobuf.Writer} Writer
+             */
+            Segment.encodeDelimited = function encodeDelimited(message, writer) {
+                return this.encode(message, writer).ldelim();
+            };
+
+            /**
+             * Decodes a Segment message from the specified reader or buffer.
+             * @function decode
+             * @memberof onnx.TensorProto.Segment
+             * @static
+             * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+             * @param {number} [length] Message length if known beforehand
+             * @returns {onnx.TensorProto.Segment} Segment
+             * @throws {Error} If the payload is not a reader or valid buffer
+             * @throws {$protobuf.util.ProtocolError} If required fields are missing
+             */
+            Segment.decode = function decode(reader, length) {
+                if (!(reader instanceof $Reader))
+                    reader = $Reader.create(reader);
+                var end = length === undefined ? reader.len : reader.pos + length, message = new $root.onnx.TensorProto.Segment();
+                while (reader.pos < end) {
+                    var tag = reader.uint32();
+                    switch (tag >>> 3) {
+                    case 1: {
+                            message.begin = reader.int64();
+                            break;
+                        }
+                    case 2: {
+                            message.end = reader.int64();
+                            break;
+                        }
+                    default:
+                        reader.skipType(tag & 7);
+                        break;
+                    }
+                }
+                return message;
+            };
+
+            /**
+             * Decodes a Segment message from the specified reader or buffer, length delimited.
+             * @function decodeDelimited
+             * @memberof onnx.TensorProto.Segment
+             * @static
+             * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+             * @returns {onnx.TensorProto.Segment} Segment
+             * @throws {Error} If the payload is not a reader or valid buffer
+             * @throws {$protobuf.util.ProtocolError} If required fields are missing
+             */
+            Segment.decodeDelimited = function decodeDelimited(reader) {
+                if (!(reader instanceof $Reader))
+                    reader = new $Reader(reader);
+                return this.decode(reader, reader.uint32());
+            };
+
+            /**
+             * Verifies a Segment message.
+             * @function verify
+             * @memberof onnx.TensorProto.Segment
+             * @static
+             * @param {Object.<string,*>} message Plain object to verify
+             * @returns {string|null} `null` if valid, otherwise the reason why it is not
+             */
+            Segment.verify = function verify(message) {
+                if (typeof message !== "object" || message === null)
+                    return "object expected";
+                if (message.begin != null && message.hasOwnProperty("begin"))
+                    if (!$util.isInteger(message.begin) && !(message.begin && $util.isInteger(message.begin.low) && $util.isInteger(message.begin.high)))
+                        return "begin: integer|Long expected";
+                if (message.end != null && message.hasOwnProperty("end"))
+                    if (!$util.isInteger(message.end) && !(message.end && $util.isInteger(message.end.low) && $util.isInteger(message.end.high)))
+                        return "end: integer|Long expected";
+                return null;
+            };
+
+            /**
+             * Creates a Segment message from a plain object. Also converts values to their respective internal types.
+             * @function fromObject
+             * @memberof onnx.TensorProto.Segment
+             * @static
+             * @param {Object.<string,*>} object Plain object
+             * @returns {onnx.TensorProto.Segment} Segment
+             */
+            Segment.fromObject = function fromObject(object) {
+                if (object instanceof $root.onnx.TensorProto.Segment)
+                    return object;
+                var message = new $root.onnx.TensorProto.Segment();
+                if (object.begin != null)
+                    if ($util.Long)
+                        (message.begin = $util.Long.fromValue(object.begin)).unsigned = false;
+                    else if (typeof object.begin === "string")
+                        message.begin = parseInt(object.begin, 10);
+                    else if (typeof object.begin === "number")
+                        message.begin = object.begin;
+                    else if (typeof object.begin === "object")
+                        message.begin = new $util.LongBits(object.begin.low >>> 0, object.begin.high >>> 0).toNumber();
+                if (object.end != null)
+                    if ($util.Long)
+                        (message.end = $util.Long.fromValue(object.end)).unsigned = false;
+                    else if (typeof object.end === "string")
+                        message.end = parseInt(object.end, 10);
+                    else if (typeof object.end === "number")
+                        message.end = object.end;
+                    else if (typeof object.end === "object")
+                        message.end = new $util.LongBits(object.end.low >>> 0, object.end.high >>> 0).toNumber();
+                return message;
+            };
+
+            /**
+             * Creates a plain object from a Segment message. Also converts values to other types if specified.
+             * @function toObject
+             * @memberof onnx.TensorProto.Segment
+             * @static
+             * @param {onnx.TensorProto.Segment} message Segment
+             * @param {$protobuf.IConversionOptions} [options] Conversion options
+             * @returns {Object.<string,*>} Plain object
+             */
+            Segment.toObject = function toObject(message, options) {
+                if (!options)
+                    options = {};
+                var object = {};
+                if (options.defaults) {
+                    if ($util.Long) {
+                        var long = new $util.Long(0, 0, false);
+                        object.begin = options.longs === String ? long.toString() : options.longs === Number ? long.toNumber() : long;
+                    } else
+                        object.begin = options.longs === String ? "0" : 0;
+                    if ($util.Long) {
+                        var long = new $util.Long(0, 0, false);
+                        object.end = options.longs === String ? long.toString() : options.longs === Number ? long.toNumber() : long;
+                    } else
+                        object.end = options.longs === String ? "0" : 0;
+                }
+                if (message.begin != null && message.hasOwnProperty("begin"))
+                    if (typeof message.begin === "number")
+                        object.begin = options.longs === String ? String(message.begin) : message.begin;
+                    else
+                        object.begin = options.longs === String ? $util.Long.prototype.toString.call(message.begin) : options.longs === Number ? new $util.LongBits(message.begin.low >>> 0, message.begin.high >>> 0).toNumber() : message.begin;
+                if (message.end != null && message.hasOwnProperty("end"))
+                    if (typeof message.end === "number")
+                        object.end = options.longs === String ? String(message.end) : message.end;
+                    else
+                        object.end = options.longs === String ? $util.Long.prototype.toString.call(message.end) : options.longs === Number ? new $util.LongBits(message.end.low >>> 0, message.end.high >>> 0).toNumber() : message.end;
+                return object;
+            };
+
+            /**
+             * Converts this Segment to JSON.
+             * @function toJSON
+             * @memberof onnx.TensorProto.Segment
+             * @instance
+             * @returns {Object.<string,*>} JSON object
+             */
+            Segment.prototype.toJSON = function toJSON() {
+                return this.constructor.toObject(this, $protobuf.util.toJSONOptions);
+            };
+
+            /**
+             * Gets the default type url for Segment
+             * @function getTypeUrl
+             * @memberof onnx.TensorProto.Segment
+             * @static
+             * @param {string} [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+             * @returns {string} The default type url
+             */
+            Segment.getTypeUrl = function getTypeUrl(typeUrlPrefix) {
+                if (typeUrlPrefix === undefined) {
+                    typeUrlPrefix = "type.googleapis.com";
+                }
+                return typeUrlPrefix + "/onnx.TensorProto.Segment";
+            };
+
+            return Segment;
+        })();
+
+        /**
+         * DataLocation enum.
+         * @name onnx.TensorProto.DataLocation
+         * @enum {number}
+         * @property {number} DEFAULT=0 DEFAULT value
+         * @property {number} EXTERNAL=1 EXTERNAL value
+         */
+        TensorProto.DataLocation = (function() {
+            var valuesById = {}, values = Object.create(valuesById);
+            values[valuesById[0] = "DEFAULT"] = 0;
+            values[valuesById[1] = "EXTERNAL"] = 1;
+            return values;
+        })();
+
+        return TensorProto;
+    })();
+
+    onnx.SparseTensorProto = (function() {
+
+        /**
+         * Properties of a SparseTensorProto.
+         * @memberof onnx
+         * @interface ISparseTensorProto
+         * @property {onnx.ITensorProto|null} [values] SparseTensorProto values
+         * @property {onnx.ITensorProto|null} [indices] SparseTensorProto indices
+         * @property {Array.<number|Long>|null} [dims] SparseTensorProto dims
+         */
+
+        /**
+         * Constructs a new SparseTensorProto.
+         * @memberof onnx
+         * @classdesc Represents a SparseTensorProto.
+         * @implements ISparseTensorProto
+         * @constructor
+         * @param {onnx.ISparseTensorProto=} [properties] Properties to set
+         */
+        function SparseTensorProto(properties) {
+            this.dims = [];
+            if (properties)
+                for (var keys = Object.keys(properties), i = 0; i < keys.length; ++i)
+                    if (properties[keys[i]] != null)
+                        this[keys[i]] = properties[keys[i]];
+        }
+
+        /**
+         * SparseTensorProto values.
+         * @member {onnx.ITensorProto|null|undefined} values
+         * @memberof onnx.SparseTensorProto
+         * @instance
+         */
+        SparseTensorProto.prototype.values = null;
+
+        /**
+         * SparseTensorProto indices.
+         * @member {onnx.ITensorProto|null|undefined} indices
+         * @memberof onnx.SparseTensorProto
+         * @instance
+         */
+        SparseTensorProto.prototype.indices = null;
+
+        /**
+         * SparseTensorProto dims.
+         * @member {Array.<number|Long>} dims
+         * @memberof onnx.SparseTensorProto
+         * @instance
+         */
+        SparseTensorProto.prototype.dims = $util.emptyArray;
+
+        /**
+         * Creates a new SparseTensorProto instance using the specified properties.
+         * @function create
+         * @memberof onnx.SparseTensorProto
+         * @static
+         * @param {onnx.ISparseTensorProto=} [properties] Properties to set
+         * @returns {onnx.SparseTensorProto} SparseTensorProto instance
+         */
+        SparseTensorProto.create = function create(properties) {
+            return new SparseTensorProto(properties);
+        };
+
+        /**
+         * Encodes the specified SparseTensorProto message. Does not implicitly {@link onnx.SparseTensorProto.verify|verify} messages.
+         * @function encode
+         * @memberof onnx.SparseTensorProto
+         * @static
+         * @param {onnx.ISparseTensorProto} message SparseTensorProto message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        SparseTensorProto.encode = function encode(message, writer) {
+            if (!writer)
+                writer = $Writer.create();
+            if (message.values != null && Object.hasOwnProperty.call(message, "values"))
+                $root.onnx.TensorProto.encode(message.values, writer.uint32(/* id 1, wireType 2 =*/10).fork()).ldelim();
+            if (message.indices != null && Object.hasOwnProperty.call(message, "indices"))
+                $root.onnx.TensorProto.encode(message.indices, writer.uint32(/* id 2, wireType 2 =*/18).fork()).ldelim();
+            if (message.dims != null && message.dims.length) {
+                writer.uint32(/* id 3, wireType 2 =*/26).fork();
+                for (var i = 0; i < message.dims.length; ++i)
+                    writer.int64(message.dims[i]);
+                writer.ldelim();
+            }
+            return writer;
+        };
+
+        /**
+         * Encodes the specified SparseTensorProto message, length delimited. Does not implicitly {@link onnx.SparseTensorProto.verify|verify} messages.
+         * @function encodeDelimited
+         * @memberof onnx.SparseTensorProto
+         * @static
+         * @param {onnx.ISparseTensorProto} message SparseTensorProto message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        SparseTensorProto.encodeDelimited = function encodeDelimited(message, writer) {
+            return this.encode(message, writer).ldelim();
+        };
+
+        /**
+         * Decodes a SparseTensorProto message from the specified reader or buffer.
+         * @function decode
+         * @memberof onnx.SparseTensorProto
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @param {number} [length] Message length if known beforehand
+         * @returns {onnx.SparseTensorProto} SparseTensorProto
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        SparseTensorProto.decode = function decode(reader, length) {
+            if (!(reader instanceof $Reader))
+                reader = $Reader.create(reader);
+            var end = length === undefined ? reader.len : reader.pos + length, message = new $root.onnx.SparseTensorProto();
+            while (reader.pos < end) {
+                var tag = reader.uint32();
+                switch (tag >>> 3) {
+                case 1: {
+                        message.values = $root.onnx.TensorProto.decode(reader, reader.uint32());
+                        break;
+                    }
+                case 2: {
+                        message.indices = $root.onnx.TensorProto.decode(reader, reader.uint32());
+                        break;
+                    }
+                case 3: {
+                        if (!(message.dims && message.dims.length))
+                            message.dims = [];
+                        if ((tag & 7) === 2) {
+                            var end2 = reader.uint32() + reader.pos;
+                            while (reader.pos < end2)
+                                message.dims.push(reader.int64());
+                        } else
+                            message.dims.push(reader.int64());
+                        break;
+                    }
+                default:
+                    reader.skipType(tag & 7);
+                    break;
+                }
+            }
+            return message;
+        };
+
+        /**
+         * Decodes a SparseTensorProto message from the specified reader or buffer, length delimited.
+         * @function decodeDelimited
+         * @memberof onnx.SparseTensorProto
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @returns {onnx.SparseTensorProto} SparseTensorProto
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        SparseTensorProto.decodeDelimited = function decodeDelimited(reader) {
+            if (!(reader instanceof $Reader))
+                reader = new $Reader(reader);
+            return this.decode(reader, reader.uint32());
+        };
+
+        /**
+         * Verifies a SparseTensorProto message.
+         * @function verify
+         * @memberof onnx.SparseTensorProto
+         * @static
+         * @param {Object.<string,*>} message Plain object to verify
+         * @returns {string|null} `null` if valid, otherwise the reason why it is not
+         */
+        SparseTensorProto.verify = function verify(message) {
+            if (typeof message !== "object" || message === null)
+                return "object expected";
+            if (message.values != null && message.hasOwnProperty("values")) {
+                var error = $root.onnx.TensorProto.verify(message.values);
+                if (error)
+                    return "values." + error;
+            }
+            if (message.indices != null && message.hasOwnProperty("indices")) {
+                var error = $root.onnx.TensorProto.verify(message.indices);
+                if (error)
+                    return "indices." + error;
+            }
+            if (message.dims != null && message.hasOwnProperty("dims")) {
+                if (!Array.isArray(message.dims))
+                    return "dims: array expected";
+                for (var i = 0; i < message.dims.length; ++i)
+                    if (!$util.isInteger(message.dims[i]) && !(message.dims[i] && $util.isInteger(message.dims[i].low) && $util.isInteger(message.dims[i].high)))
+                        return "dims: integer|Long[] expected";
+            }
+            return null;
+        };
+
+        /**
+         * Creates a SparseTensorProto message from a plain object. Also converts values to their respective internal types.
+         * @function fromObject
+         * @memberof onnx.SparseTensorProto
+         * @static
+         * @param {Object.<string,*>} object Plain object
+         * @returns {onnx.SparseTensorProto} SparseTensorProto
+         */
+        SparseTensorProto.fromObject = function fromObject(object) {
+            if (object instanceof $root.onnx.SparseTensorProto)
+                return object;
+            var message = new $root.onnx.SparseTensorProto();
+            if (object.values != null) {
+                if (typeof object.values !== "object")
+                    throw TypeError(".onnx.SparseTensorProto.values: object expected");
+                message.values = $root.onnx.TensorProto.fromObject(object.values);
+            }
+            if (object.indices != null) {
+                if (typeof object.indices !== "object")
+                    throw TypeError(".onnx.SparseTensorProto.indices: object expected");
+                message.indices = $root.onnx.TensorProto.fromObject(object.indices);
+            }
+            if (object.dims) {
+                if (!Array.isArray(object.dims))
+                    throw TypeError(".onnx.SparseTensorProto.dims: array expected");
+                message.dims = [];
+                for (var i = 0; i < object.dims.length; ++i)
+                    if ($util.Long)
+                        (message.dims[i] = $util.Long.fromValue(object.dims[i])).unsigned = false;
+                    else if (typeof object.dims[i] === "string")
+                        message.dims[i] = parseInt(object.dims[i], 10);
+                    else if (typeof object.dims[i] === "number")
+                        message.dims[i] = object.dims[i];
+                    else if (typeof object.dims[i] === "object")
+                        message.dims[i] = new $util.LongBits(object.dims[i].low >>> 0, object.dims[i].high >>> 0).toNumber();
+            }
+            return message;
+        };
+
+        /**
+         * Creates a plain object from a SparseTensorProto message. Also converts values to other types if specified.
+         * @function toObject
+         * @memberof onnx.SparseTensorProto
+         * @static
+         * @param {onnx.SparseTensorProto} message SparseTensorProto
+         * @param {$protobuf.IConversionOptions} [options] Conversion options
+         * @returns {Object.<string,*>} Plain object
+         */
+        SparseTensorProto.toObject = function toObject(message, options) {
+            if (!options)
+                options = {};
+            var object = {};
+            if (options.arrays || options.defaults)
+                object.dims = [];
+            if (options.defaults) {
+                object.values = null;
+                object.indices = null;
+            }
+            if (message.values != null && message.hasOwnProperty("values"))
+                object.values = $root.onnx.TensorProto.toObject(message.values, options);
+            if (message.indices != null && message.hasOwnProperty("indices"))
+                object.indices = $root.onnx.TensorProto.toObject(message.indices, options);
+            if (message.dims && message.dims.length) {
+                object.dims = [];
+                for (var j = 0; j < message.dims.length; ++j)
+                    if (typeof message.dims[j] === "number")
+                        object.dims[j] = options.longs === String ? String(message.dims[j]) : message.dims[j];
+                    else
+                        object.dims[j] = options.longs === String ? $util.Long.prototype.toString.call(message.dims[j]) : options.longs === Number ? new $util.LongBits(message.dims[j].low >>> 0, message.dims[j].high >>> 0).toNumber() : message.dims[j];
+            }
+            return object;
+        };
+
+        /**
+         * Converts this SparseTensorProto to JSON.
+         * @function toJSON
+         * @memberof onnx.SparseTensorProto
+         * @instance
+         * @returns {Object.<string,*>} JSON object
+         */
+        SparseTensorProto.prototype.toJSON = function toJSON() {
+            return this.constructor.toObject(this, $protobuf.util.toJSONOptions);
+        };
+
+        /**
+         * Gets the default type url for SparseTensorProto
+         * @function getTypeUrl
+         * @memberof onnx.SparseTensorProto
+         * @static
+         * @param {string} [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+         * @returns {string} The default type url
+         */
+        SparseTensorProto.getTypeUrl = function getTypeUrl(typeUrlPrefix) {
+            if (typeUrlPrefix === undefined) {
+                typeUrlPrefix = "type.googleapis.com";
+            }
+            return typeUrlPrefix + "/onnx.SparseTensorProto";
+        };
+
+        return SparseTensorProto;
+    })();
+
+    onnx.TensorShapeProto = (function() {
+
+        /**
+         * Properties of a TensorShapeProto.
+         * @memberof onnx
+         * @interface ITensorShapeProto
+         * @property {Array.<onnx.TensorShapeProto.IDimension>|null} [dim] TensorShapeProto dim
+         */
+
+        /**
+         * Constructs a new TensorShapeProto.
+         * @memberof onnx
+         * @classdesc Represents a TensorShapeProto.
+         * @implements ITensorShapeProto
+         * @constructor
+         * @param {onnx.ITensorShapeProto=} [properties] Properties to set
+         */
+        function TensorShapeProto(properties) {
+            this.dim = [];
+            if (properties)
+                for (var keys = Object.keys(properties), i = 0; i < keys.length; ++i)
+                    if (properties[keys[i]] != null)
+                        this[keys[i]] = properties[keys[i]];
+        }
+
+        /**
+         * TensorShapeProto dim.
+         * @member {Array.<onnx.TensorShapeProto.IDimension>} dim
+         * @memberof onnx.TensorShapeProto
+         * @instance
+         */
+        TensorShapeProto.prototype.dim = $util.emptyArray;
+
+        /**
+         * Creates a new TensorShapeProto instance using the specified properties.
+         * @function create
+         * @memberof onnx.TensorShapeProto
+         * @static
+         * @param {onnx.ITensorShapeProto=} [properties] Properties to set
+         * @returns {onnx.TensorShapeProto} TensorShapeProto instance
+         */
+        TensorShapeProto.create = function create(properties) {
+            return new TensorShapeProto(properties);
+        };
+
+        /**
+         * Encodes the specified TensorShapeProto message. Does not implicitly {@link onnx.TensorShapeProto.verify|verify} messages.
+         * @function encode
+         * @memberof onnx.TensorShapeProto
+         * @static
+         * @param {onnx.ITensorShapeProto} message TensorShapeProto message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        TensorShapeProto.encode = function encode(message, writer) {
+            if (!writer)
+                writer = $Writer.create();
+            if (message.dim != null && message.dim.length)
+                for (var i = 0; i < message.dim.length; ++i)
+                    $root.onnx.TensorShapeProto.Dimension.encode(message.dim[i], writer.uint32(/* id 1, wireType 2 =*/10).fork()).ldelim();
+            return writer;
+        };
+
+        /**
+         * Encodes the specified TensorShapeProto message, length delimited. Does not implicitly {@link onnx.TensorShapeProto.verify|verify} messages.
+         * @function encodeDelimited
+         * @memberof onnx.TensorShapeProto
+         * @static
+         * @param {onnx.ITensorShapeProto} message TensorShapeProto message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        TensorShapeProto.encodeDelimited = function encodeDelimited(message, writer) {
+            return this.encode(message, writer).ldelim();
+        };
+
+        /**
+         * Decodes a TensorShapeProto message from the specified reader or buffer.
+         * @function decode
+         * @memberof onnx.TensorShapeProto
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @param {number} [length] Message length if known beforehand
+         * @returns {onnx.TensorShapeProto} TensorShapeProto
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        TensorShapeProto.decode = function decode(reader, length) {
+            if (!(reader instanceof $Reader))
+                reader = $Reader.create(reader);
+            var end = length === undefined ? reader.len : reader.pos + length, message = new $root.onnx.TensorShapeProto();
+            while (reader.pos < end) {
+                var tag = reader.uint32();
+                switch (tag >>> 3) {
+                case 1: {
+                        if (!(message.dim && message.dim.length))
+                            message.dim = [];
+                        message.dim.push($root.onnx.TensorShapeProto.Dimension.decode(reader, reader.uint32()));
+                        break;
+                    }
+                default:
+                    reader.skipType(tag & 7);
+                    break;
+                }
+            }
+            return message;
+        };
+
+        /**
+         * Decodes a TensorShapeProto message from the specified reader or buffer, length delimited.
+         * @function decodeDelimited
+         * @memberof onnx.TensorShapeProto
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @returns {onnx.TensorShapeProto} TensorShapeProto
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        TensorShapeProto.decodeDelimited = function decodeDelimited(reader) {
+            if (!(reader instanceof $Reader))
+                reader = new $Reader(reader);
+            return this.decode(reader, reader.uint32());
+        };
+
+        /**
+         * Verifies a TensorShapeProto message.
+         * @function verify
+         * @memberof onnx.TensorShapeProto
+         * @static
+         * @param {Object.<string,*>} message Plain object to verify
+         * @returns {string|null} `null` if valid, otherwise the reason why it is not
+         */
+        TensorShapeProto.verify = function verify(message) {
+            if (typeof message !== "object" || message === null)
+                return "object expected";
+            if (message.dim != null && message.hasOwnProperty("dim")) {
+                if (!Array.isArray(message.dim))
+                    return "dim: array expected";
+                for (var i = 0; i < message.dim.length; ++i) {
+                    var error = $root.onnx.TensorShapeProto.Dimension.verify(message.dim[i]);
+                    if (error)
+                        return "dim." + error;
+                }
+            }
+            return null;
+        };
+
+        /**
+         * Creates a TensorShapeProto message from a plain object. Also converts values to their respective internal types.
+         * @function fromObject
+         * @memberof onnx.TensorShapeProto
+         * @static
+         * @param {Object.<string,*>} object Plain object
+         * @returns {onnx.TensorShapeProto} TensorShapeProto
+         */
+        TensorShapeProto.fromObject = function fromObject(object) {
+            if (object instanceof $root.onnx.TensorShapeProto)
+                return object;
+            var message = new $root.onnx.TensorShapeProto();
+            if (object.dim) {
+                if (!Array.isArray(object.dim))
+                    throw TypeError(".onnx.TensorShapeProto.dim: array expected");
+                message.dim = [];
+                for (var i = 0; i < object.dim.length; ++i) {
+                    if (typeof object.dim[i] !== "object")
+                        throw TypeError(".onnx.TensorShapeProto.dim: object expected");
+                    message.dim[i] = $root.onnx.TensorShapeProto.Dimension.fromObject(object.dim[i]);
+                }
+            }
+            return message;
+        };
+
+        /**
+         * Creates a plain object from a TensorShapeProto message. Also converts values to other types if specified.
+         * @function toObject
+         * @memberof onnx.TensorShapeProto
+         * @static
+         * @param {onnx.TensorShapeProto} message TensorShapeProto
+         * @param {$protobuf.IConversionOptions} [options] Conversion options
+         * @returns {Object.<string,*>} Plain object
+         */
+        TensorShapeProto.toObject = function toObject(message, options) {
+            if (!options)
+                options = {};
+            var object = {};
+            if (options.arrays || options.defaults)
+                object.dim = [];
+            if (message.dim && message.dim.length) {
+                object.dim = [];
+                for (var j = 0; j < message.dim.length; ++j)
+                    object.dim[j] = $root.onnx.TensorShapeProto.Dimension.toObject(message.dim[j], options);
+            }
+            return object;
+        };
+
+        /**
+         * Converts this TensorShapeProto to JSON.
+         * @function toJSON
+         * @memberof onnx.TensorShapeProto
+         * @instance
+         * @returns {Object.<string,*>} JSON object
+         */
+        TensorShapeProto.prototype.toJSON = function toJSON() {
+            return this.constructor.toObject(this, $protobuf.util.toJSONOptions);
+        };
+
+        /**
+         * Gets the default type url for TensorShapeProto
+         * @function getTypeUrl
+         * @memberof onnx.TensorShapeProto
+         * @static
+         * @param {string} [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+         * @returns {string} The default type url
+         */
+        TensorShapeProto.getTypeUrl = function getTypeUrl(typeUrlPrefix) {
+            if (typeUrlPrefix === undefined) {
+                typeUrlPrefix = "type.googleapis.com";
+            }
+            return typeUrlPrefix + "/onnx.TensorShapeProto";
+        };
+
+        TensorShapeProto.Dimension = (function() {
+
+            /**
+             * Properties of a Dimension.
+             * @memberof onnx.TensorShapeProto
+             * @interface IDimension
+             * @property {number|Long|null} [dimValue] Dimension dimValue
+             * @property {string|null} [dimParam] Dimension dimParam
+             * @property {string|null} [denotation] Dimension denotation
+             */
+
+            /**
+             * Constructs a new Dimension.
+             * @memberof onnx.TensorShapeProto
+             * @classdesc Represents a Dimension.
+             * @implements IDimension
+             * @constructor
+             * @param {onnx.TensorShapeProto.IDimension=} [properties] Properties to set
+             */
+            function Dimension(properties) {
+                if (properties)
+                    for (var keys = Object.keys(properties), i = 0; i < keys.length; ++i)
+                        if (properties[keys[i]] != null)
+                            this[keys[i]] = properties[keys[i]];
+            }
+
+            /**
+             * Dimension dimValue.
+             * @member {number|Long|null|undefined} dimValue
+             * @memberof onnx.TensorShapeProto.Dimension
+             * @instance
+             */
+            Dimension.prototype.dimValue = null;
+
+            /**
+             * Dimension dimParam.
+             * @member {string|null|undefined} dimParam
+             * @memberof onnx.TensorShapeProto.Dimension
+             * @instance
+             */
+            Dimension.prototype.dimParam = null;
+
+            /**
+             * Dimension denotation.
+             * @member {string} denotation
+             * @memberof onnx.TensorShapeProto.Dimension
+             * @instance
+             */
+            Dimension.prototype.denotation = "";
+
+            // OneOf field names bound to virtual getters and setters
+            var $oneOfFields;
+
+            /**
+             * Dimension value.
+             * @member {"dimValue"|"dimParam"|undefined} value
+             * @memberof onnx.TensorShapeProto.Dimension
+             * @instance
+             */
+            Object.defineProperty(Dimension.prototype, "value", {
+                get: $util.oneOfGetter($oneOfFields = ["dimValue", "dimParam"]),
+                set: $util.oneOfSetter($oneOfFields)
+            });
+
+            /**
+             * Creates a new Dimension instance using the specified properties.
+             * @function create
+             * @memberof onnx.TensorShapeProto.Dimension
+             * @static
+             * @param {onnx.TensorShapeProto.IDimension=} [properties] Properties to set
+             * @returns {onnx.TensorShapeProto.Dimension} Dimension instance
+             */
+            Dimension.create = function create(properties) {
+                return new Dimension(properties);
+            };
+
+            /**
+             * Encodes the specified Dimension message. Does not implicitly {@link onnx.TensorShapeProto.Dimension.verify|verify} messages.
+             * @function encode
+             * @memberof onnx.TensorShapeProto.Dimension
+             * @static
+             * @param {onnx.TensorShapeProto.IDimension} message Dimension message or plain object to encode
+             * @param {$protobuf.Writer} [writer] Writer to encode to
+             * @returns {$protobuf.Writer} Writer
+             */
+            Dimension.encode = function encode(message, writer) {
+                if (!writer)
+                    writer = $Writer.create();
+                if (message.dimValue != null && Object.hasOwnProperty.call(message, "dimValue"))
+                    writer.uint32(/* id 1, wireType 0 =*/8).int64(message.dimValue);
+                if (message.dimParam != null && Object.hasOwnProperty.call(message, "dimParam"))
+                    writer.uint32(/* id 2, wireType 2 =*/18).string(message.dimParam);
+                if (message.denotation != null && Object.hasOwnProperty.call(message, "denotation"))
+                    writer.uint32(/* id 3, wireType 2 =*/26).string(message.denotation);
+                return writer;
+            };
+
+            /**
+             * Encodes the specified Dimension message, length delimited. Does not implicitly {@link onnx.TensorShapeProto.Dimension.verify|verify} messages.
+             * @function encodeDelimited
+             * @memberof onnx.TensorShapeProto.Dimension
+             * @static
+             * @param {onnx.TensorShapeProto.IDimension} message Dimension message or plain object to encode
+             * @param {$protobuf.Writer} [writer] Writer to encode to
+             * @returns {$protobuf.Writer} Writer
+             */
+            Dimension.encodeDelimited = function encodeDelimited(message, writer) {
+                return this.encode(message, writer).ldelim();
+            };
+
+            /**
+             * Decodes a Dimension message from the specified reader or buffer.
+             * @function decode
+             * @memberof onnx.TensorShapeProto.Dimension
+             * @static
+             * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+             * @param {number} [length] Message length if known beforehand
+             * @returns {onnx.TensorShapeProto.Dimension} Dimension
+             * @throws {Error} If the payload is not a reader or valid buffer
+             * @throws {$protobuf.util.ProtocolError} If required fields are missing
+             */
+            Dimension.decode = function decode(reader, length) {
+                if (!(reader instanceof $Reader))
+                    reader = $Reader.create(reader);
+                var end = length === undefined ? reader.len : reader.pos + length, message = new $root.onnx.TensorShapeProto.Dimension();
+                while (reader.pos < end) {
+                    var tag = reader.uint32();
+                    switch (tag >>> 3) {
+                    case 1: {
+                            message.dimValue = reader.int64();
+                            break;
+                        }
+                    case 2: {
+                            message.dimParam = reader.string();
+                            break;
+                        }
+                    case 3: {
+                            message.denotation = reader.string();
+                            break;
+                        }
+                    default:
+                        reader.skipType(tag & 7);
+                        break;
+                    }
+                }
+                return message;
+            };
+
+            /**
+             * Decodes a Dimension message from the specified reader or buffer, length delimited.
+             * @function decodeDelimited
+             * @memberof onnx.TensorShapeProto.Dimension
+             * @static
+             * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+             * @returns {onnx.TensorShapeProto.Dimension} Dimension
+             * @throws {Error} If the payload is not a reader or valid buffer
+             * @throws {$protobuf.util.ProtocolError} If required fields are missing
+             */
+            Dimension.decodeDelimited = function decodeDelimited(reader) {
+                if (!(reader instanceof $Reader))
+                    reader = new $Reader(reader);
+                return this.decode(reader, reader.uint32());
+            };
+
+            /**
+             * Verifies a Dimension message.
+             * @function verify
+             * @memberof onnx.TensorShapeProto.Dimension
+             * @static
+             * @param {Object.<string,*>} message Plain object to verify
+             * @returns {string|null} `null` if valid, otherwise the reason why it is not
+             */
+            Dimension.verify = function verify(message) {
+                if (typeof message !== "object" || message === null)
+                    return "object expected";
+                var properties = {};
+                if (message.dimValue != null && message.hasOwnProperty("dimValue")) {
+                    properties.value = 1;
+                    if (!$util.isInteger(message.dimValue) && !(message.dimValue && $util.isInteger(message.dimValue.low) && $util.isInteger(message.dimValue.high)))
+                        return "dimValue: integer|Long expected";
+                }
+                if (message.dimParam != null && message.hasOwnProperty("dimParam")) {
+                    if (properties.value === 1)
+                        return "value: multiple values";
+                    properties.value = 1;
+                    if (!$util.isString(message.dimParam))
+                        return "dimParam: string expected";
+                }
+                if (message.denotation != null && message.hasOwnProperty("denotation"))
+                    if (!$util.isString(message.denotation))
+                        return "denotation: string expected";
+                return null;
+            };
+
+            /**
+             * Creates a Dimension message from a plain object. Also converts values to their respective internal types.
+             * @function fromObject
+             * @memberof onnx.TensorShapeProto.Dimension
+             * @static
+             * @param {Object.<string,*>} object Plain object
+             * @returns {onnx.TensorShapeProto.Dimension} Dimension
+             */
+            Dimension.fromObject = function fromObject(object) {
+                if (object instanceof $root.onnx.TensorShapeProto.Dimension)
+                    return object;
+                var message = new $root.onnx.TensorShapeProto.Dimension();
+                if (object.dimValue != null)
+                    if ($util.Long)
+                        (message.dimValue = $util.Long.fromValue(object.dimValue)).unsigned = false;
+                    else if (typeof object.dimValue === "string")
+                        message.dimValue = parseInt(object.dimValue, 10);
+                    else if (typeof object.dimValue === "number")
+                        message.dimValue = object.dimValue;
+                    else if (typeof object.dimValue === "object")
+                        message.dimValue = new $util.LongBits(object.dimValue.low >>> 0, object.dimValue.high >>> 0).toNumber();
+                if (object.dimParam != null)
+                    message.dimParam = String(object.dimParam);
+                if (object.denotation != null)
+                    message.denotation = String(object.denotation);
+                return message;
+            };
+
+            /**
+             * Creates a plain object from a Dimension message. Also converts values to other types if specified.
+             * @function toObject
+             * @memberof onnx.TensorShapeProto.Dimension
+             * @static
+             * @param {onnx.TensorShapeProto.Dimension} message Dimension
+             * @param {$protobuf.IConversionOptions} [options] Conversion options
+             * @returns {Object.<string,*>} Plain object
+             */
+            Dimension.toObject = function toObject(message, options) {
+                if (!options)
+                    options = {};
+                var object = {};
+                if (options.defaults)
+                    object.denotation = "";
+                if (message.dimValue != null && message.hasOwnProperty("dimValue")) {
+                    if (typeof message.dimValue === "number")
+                        object.dimValue = options.longs === String ? String(message.dimValue) : message.dimValue;
+                    else
+                        object.dimValue = options.longs === String ? $util.Long.prototype.toString.call(message.dimValue) : options.longs === Number ? new $util.LongBits(message.dimValue.low >>> 0, message.dimValue.high >>> 0).toNumber() : message.dimValue;
+                    if (options.oneofs)
+                        object.value = "dimValue";
+                }
+                if (message.dimParam != null && message.hasOwnProperty("dimParam")) {
+                    object.dimParam = message.dimParam;
+                    if (options.oneofs)
+                        object.value = "dimParam";
+                }
+                if (message.denotation != null && message.hasOwnProperty("denotation"))
+                    object.denotation = message.denotation;
+                return object;
+            };
+
+            /**
+             * Converts this Dimension to JSON.
+             * @function toJSON
+             * @memberof onnx.TensorShapeProto.Dimension
+             * @instance
+             * @returns {Object.<string,*>} JSON object
+             */
+            Dimension.prototype.toJSON = function toJSON() {
+                return this.constructor.toObject(this, $protobuf.util.toJSONOptions);
+            };
+
+            /**
+             * Gets the default type url for Dimension
+             * @function getTypeUrl
+             * @memberof onnx.TensorShapeProto.Dimension
+             * @static
+             * @param {string} [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+             * @returns {string} The default type url
+             */
+            Dimension.getTypeUrl = function getTypeUrl(typeUrlPrefix) {
+                if (typeUrlPrefix === undefined) {
+                    typeUrlPrefix = "type.googleapis.com";
+                }
+                return typeUrlPrefix + "/onnx.TensorShapeProto.Dimension";
+            };
+
+            return Dimension;
+        })();
+
+        return TensorShapeProto;
+    })();
+
+    onnx.TypeProto = (function() {
+
+        /**
+         * Properties of a TypeProto.
+         * @memberof onnx
+         * @interface ITypeProto
+         * @property {onnx.TypeProto.ITensor|null} [tensorType] TypeProto tensorType
+         * @property {onnx.TypeProto.ISequence|null} [sequenceType] TypeProto sequenceType
+         * @property {onnx.TypeProto.IMap|null} [mapType] TypeProto mapType
+         * @property {onnx.TypeProto.IOptional|null} [optionalType] TypeProto optionalType
+         * @property {onnx.TypeProto.ISparseTensor|null} [sparseTensorType] TypeProto sparseTensorType
+         * @property {string|null} [denotation] TypeProto denotation
+         */
+
+        /**
+         * Constructs a new TypeProto.
+         * @memberof onnx
+         * @classdesc Represents a TypeProto.
+         * @implements ITypeProto
+         * @constructor
+         * @param {onnx.ITypeProto=} [properties] Properties to set
+         */
+        function TypeProto(properties) {
+            if (properties)
+                for (var keys = Object.keys(properties), i = 0; i < keys.length; ++i)
+                    if (properties[keys[i]] != null)
+                        this[keys[i]] = properties[keys[i]];
+        }
+
+        /**
+         * TypeProto tensorType.
+         * @member {onnx.TypeProto.ITensor|null|undefined} tensorType
+         * @memberof onnx.TypeProto
+         * @instance
+         */
+        TypeProto.prototype.tensorType = null;
+
+        /**
+         * TypeProto sequenceType.
+         * @member {onnx.TypeProto.ISequence|null|undefined} sequenceType
+         * @memberof onnx.TypeProto
+         * @instance
+         */
+        TypeProto.prototype.sequenceType = null;
+
+        /**
+         * TypeProto mapType.
+         * @member {onnx.TypeProto.IMap|null|undefined} mapType
+         * @memberof onnx.TypeProto
+         * @instance
+         */
+        TypeProto.prototype.mapType = null;
+
+        /**
+         * TypeProto optionalType.
+         * @member {onnx.TypeProto.IOptional|null|undefined} optionalType
+         * @memberof onnx.TypeProto
+         * @instance
+         */
+        TypeProto.prototype.optionalType = null;
+
+        /**
+         * TypeProto sparseTensorType.
+         * @member {onnx.TypeProto.ISparseTensor|null|undefined} sparseTensorType
+         * @memberof onnx.TypeProto
+         * @instance
+         */
+        TypeProto.prototype.sparseTensorType = null;
+
+        /**
+         * TypeProto denotation.
+         * @member {string} denotation
+         * @memberof onnx.TypeProto
+         * @instance
+         */
+        TypeProto.prototype.denotation = "";
+
+        // OneOf field names bound to virtual getters and setters
+        var $oneOfFields;
+
+        /**
+         * TypeProto value.
+         * @member {"tensorType"|"sequenceType"|"mapType"|"optionalType"|"sparseTensorType"|undefined} value
+         * @memberof onnx.TypeProto
+         * @instance
+         */
+        Object.defineProperty(TypeProto.prototype, "value", {
+            get: $util.oneOfGetter($oneOfFields = ["tensorType", "sequenceType", "mapType", "optionalType", "sparseTensorType"]),
+            set: $util.oneOfSetter($oneOfFields)
+        });
+
+        /**
+         * Creates a new TypeProto instance using the specified properties.
+         * @function create
+         * @memberof onnx.TypeProto
+         * @static
+         * @param {onnx.ITypeProto=} [properties] Properties to set
+         * @returns {onnx.TypeProto} TypeProto instance
+         */
+        TypeProto.create = function create(properties) {
+            return new TypeProto(properties);
+        };
+
+        /**
+         * Encodes the specified TypeProto message. Does not implicitly {@link onnx.TypeProto.verify|verify} messages.
+         * @function encode
+         * @memberof onnx.TypeProto
+         * @static
+         * @param {onnx.ITypeProto} message TypeProto message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        TypeProto.encode = function encode(message, writer) {
+            if (!writer)
+                writer = $Writer.create();
+            if (message.tensorType != null && Object.hasOwnProperty.call(message, "tensorType"))
+                $root.onnx.TypeProto.Tensor.encode(message.tensorType, writer.uint32(/* id 1, wireType 2 =*/10).fork()).ldelim();
+            if (message.sequenceType != null && Object.hasOwnProperty.call(message, "sequenceType"))
+                $root.onnx.TypeProto.Sequence.encode(message.sequenceType, writer.uint32(/* id 4, wireType 2 =*/34).fork()).ldelim();
+            if (message.mapType != null && Object.hasOwnProperty.call(message, "mapType"))
+                $root.onnx.TypeProto.Map.encode(message.mapType, writer.uint32(/* id 5, wireType 2 =*/42).fork()).ldelim();
+            if (message.denotation != null && Object.hasOwnProperty.call(message, "denotation"))
+                writer.uint32(/* id 6, wireType 2 =*/50).string(message.denotation);
+            if (message.sparseTensorType != null && Object.hasOwnProperty.call(message, "sparseTensorType"))
+                $root.onnx.TypeProto.SparseTensor.encode(message.sparseTensorType, writer.uint32(/* id 8, wireType 2 =*/66).fork()).ldelim();
+            if (message.optionalType != null && Object.hasOwnProperty.call(message, "optionalType"))
+                $root.onnx.TypeProto.Optional.encode(message.optionalType, writer.uint32(/* id 9, wireType 2 =*/74).fork()).ldelim();
+            return writer;
+        };
+
+        /**
+         * Encodes the specified TypeProto message, length delimited. Does not implicitly {@link onnx.TypeProto.verify|verify} messages.
+         * @function encodeDelimited
+         * @memberof onnx.TypeProto
+         * @static
+         * @param {onnx.ITypeProto} message TypeProto message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        TypeProto.encodeDelimited = function encodeDelimited(message, writer) {
+            return this.encode(message, writer).ldelim();
+        };
+
+        /**
+         * Decodes a TypeProto message from the specified reader or buffer.
+         * @function decode
+         * @memberof onnx.TypeProto
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @param {number} [length] Message length if known beforehand
+         * @returns {onnx.TypeProto} TypeProto
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        TypeProto.decode = function decode(reader, length) {
+            if (!(reader instanceof $Reader))
+                reader = $Reader.create(reader);
+            var end = length === undefined ? reader.len : reader.pos + length, message = new $root.onnx.TypeProto();
+            while (reader.pos < end) {
+                var tag = reader.uint32();
+                switch (tag >>> 3) {
+                case 1: {
+                        message.tensorType = $root.onnx.TypeProto.Tensor.decode(reader, reader.uint32());
+                        break;
+                    }
+                case 4: {
+                        message.sequenceType = $root.onnx.TypeProto.Sequence.decode(reader, reader.uint32());
+                        break;
+                    }
+                case 5: {
+                        message.mapType = $root.onnx.TypeProto.Map.decode(reader, reader.uint32());
+                        break;
+                    }
+                case 9: {
+                        message.optionalType = $root.onnx.TypeProto.Optional.decode(reader, reader.uint32());
+                        break;
+                    }
+                case 8: {
+                        message.sparseTensorType = $root.onnx.TypeProto.SparseTensor.decode(reader, reader.uint32());
+                        break;
+                    }
+                case 6: {
+                        message.denotation = reader.string();
+                        break;
+                    }
+                default:
+                    reader.skipType(tag & 7);
+                    break;
+                }
+            }
+            return message;
+        };
+
+        /**
+         * Decodes a TypeProto message from the specified reader or buffer, length delimited.
+         * @function decodeDelimited
+         * @memberof onnx.TypeProto
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @returns {onnx.TypeProto} TypeProto
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        TypeProto.decodeDelimited = function decodeDelimited(reader) {
+            if (!(reader instanceof $Reader))
+                reader = new $Reader(reader);
+            return this.decode(reader, reader.uint32());
+        };
+
+        /**
+         * Verifies a TypeProto message.
+         * @function verify
+         * @memberof onnx.TypeProto
+         * @static
+         * @param {Object.<string,*>} message Plain object to verify
+         * @returns {string|null} `null` if valid, otherwise the reason why it is not
+         */
+        TypeProto.verify = function verify(message) {
+            if (typeof message !== "object" || message === null)
+                return "object expected";
+            var properties = {};
+            if (message.tensorType != null && message.hasOwnProperty("tensorType")) {
+                properties.value = 1;
+                {
+                    var error = $root.onnx.TypeProto.Tensor.verify(message.tensorType);
+                    if (error)
+                        return "tensorType." + error;
+                }
+            }
+            if (message.sequenceType != null && message.hasOwnProperty("sequenceType")) {
+                if (properties.value === 1)
+                    return "value: multiple values";
+                properties.value = 1;
+                {
+                    var error = $root.onnx.TypeProto.Sequence.verify(message.sequenceType);
+                    if (error)
+                        return "sequenceType." + error;
+                }
+            }
+            if (message.mapType != null && message.hasOwnProperty("mapType")) {
+                if (properties.value === 1)
+                    return "value: multiple values";
+                properties.value = 1;
+                {
+                    var error = $root.onnx.TypeProto.Map.verify(message.mapType);
+                    if (error)
+                        return "mapType." + error;
+                }
+            }
+            if (message.optionalType != null && message.hasOwnProperty("optionalType")) {
+                if (properties.value === 1)
+                    return "value: multiple values";
+                properties.value = 1;
+                {
+                    var error = $root.onnx.TypeProto.Optional.verify(message.optionalType);
+                    if (error)
+                        return "optionalType." + error;
+                }
+            }
+            if (message.sparseTensorType != null && message.hasOwnProperty("sparseTensorType")) {
+                if (properties.value === 1)
+                    return "value: multiple values";
+                properties.value = 1;
+                {
+                    var error = $root.onnx.TypeProto.SparseTensor.verify(message.sparseTensorType);
+                    if (error)
+                        return "sparseTensorType." + error;
+                }
+            }
+            if (message.denotation != null && message.hasOwnProperty("denotation"))
+                if (!$util.isString(message.denotation))
+                    return "denotation: string expected";
+            return null;
+        };
+
+        /**
+         * Creates a TypeProto message from a plain object. Also converts values to their respective internal types.
+         * @function fromObject
+         * @memberof onnx.TypeProto
+         * @static
+         * @param {Object.<string,*>} object Plain object
+         * @returns {onnx.TypeProto} TypeProto
+         */
+        TypeProto.fromObject = function fromObject(object) {
+            if (object instanceof $root.onnx.TypeProto)
+                return object;
+            var message = new $root.onnx.TypeProto();
+            if (object.tensorType != null) {
+                if (typeof object.tensorType !== "object")
+                    throw TypeError(".onnx.TypeProto.tensorType: object expected");
+                message.tensorType = $root.onnx.TypeProto.Tensor.fromObject(object.tensorType);
+            }
+            if (object.sequenceType != null) {
+                if (typeof object.sequenceType !== "object")
+                    throw TypeError(".onnx.TypeProto.sequenceType: object expected");
+                message.sequenceType = $root.onnx.TypeProto.Sequence.fromObject(object.sequenceType);
+            }
+            if (object.mapType != null) {
+                if (typeof object.mapType !== "object")
+                    throw TypeError(".onnx.TypeProto.mapType: object expected");
+                message.mapType = $root.onnx.TypeProto.Map.fromObject(object.mapType);
+            }
+            if (object.optionalType != null) {
+                if (typeof object.optionalType !== "object")
+                    throw TypeError(".onnx.TypeProto.optionalType: object expected");
+                message.optionalType = $root.onnx.TypeProto.Optional.fromObject(object.optionalType);
+            }
+            if (object.sparseTensorType != null) {
+                if (typeof object.sparseTensorType !== "object")
+                    throw TypeError(".onnx.TypeProto.sparseTensorType: object expected");
+                message.sparseTensorType = $root.onnx.TypeProto.SparseTensor.fromObject(object.sparseTensorType);
+            }
+            if (object.denotation != null)
+                message.denotation = String(object.denotation);
+            return message;
+        };
+
+        /**
+         * Creates a plain object from a TypeProto message. Also converts values to other types if specified.
+         * @function toObject
+         * @memberof onnx.TypeProto
+         * @static
+         * @param {onnx.TypeProto} message TypeProto
+         * @param {$protobuf.IConversionOptions} [options] Conversion options
+         * @returns {Object.<string,*>} Plain object
+         */
+        TypeProto.toObject = function toObject(message, options) {
+            if (!options)
+                options = {};
+            var object = {};
+            if (options.defaults)
+                object.denotation = "";
+            if (message.tensorType != null && message.hasOwnProperty("tensorType")) {
+                object.tensorType = $root.onnx.TypeProto.Tensor.toObject(message.tensorType, options);
+                if (options.oneofs)
+                    object.value = "tensorType";
+            }
+            if (message.sequenceType != null && message.hasOwnProperty("sequenceType")) {
+                object.sequenceType = $root.onnx.TypeProto.Sequence.toObject(message.sequenceType, options);
+                if (options.oneofs)
+                    object.value = "sequenceType";
+            }
+            if (message.mapType != null && message.hasOwnProperty("mapType")) {
+                object.mapType = $root.onnx.TypeProto.Map.toObject(message.mapType, options);
+                if (options.oneofs)
+                    object.value = "mapType";
+            }
+            if (message.denotation != null && message.hasOwnProperty("denotation"))
+                object.denotation = message.denotation;
+            if (message.sparseTensorType != null && message.hasOwnProperty("sparseTensorType")) {
+                object.sparseTensorType = $root.onnx.TypeProto.SparseTensor.toObject(message.sparseTensorType, options);
+                if (options.oneofs)
+                    object.value = "sparseTensorType";
+            }
+            if (message.optionalType != null && message.hasOwnProperty("optionalType")) {
+                object.optionalType = $root.onnx.TypeProto.Optional.toObject(message.optionalType, options);
+                if (options.oneofs)
+                    object.value = "optionalType";
+            }
+            return object;
+        };
+
+        /**
+         * Converts this TypeProto to JSON.
+         * @function toJSON
+         * @memberof onnx.TypeProto
+         * @instance
+         * @returns {Object.<string,*>} JSON object
+         */
+        TypeProto.prototype.toJSON = function toJSON() {
+            return this.constructor.toObject(this, $protobuf.util.toJSONOptions);
+        };
+
+        /**
+         * Gets the default type url for TypeProto
+         * @function getTypeUrl
+         * @memberof onnx.TypeProto
+         * @static
+         * @param {string} [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+         * @returns {string} The default type url
+         */
+        TypeProto.getTypeUrl = function getTypeUrl(typeUrlPrefix) {
+            if (typeUrlPrefix === undefined) {
+                typeUrlPrefix = "type.googleapis.com";
+            }
+            return typeUrlPrefix + "/onnx.TypeProto";
+        };
+
+        TypeProto.Tensor = (function() {
+
+            /**
+             * Properties of a Tensor.
+             * @memberof onnx.TypeProto
+             * @interface ITensor
+             * @property {number|null} [elemType] Tensor elemType
+             * @property {onnx.ITensorShapeProto|null} [shape] Tensor shape
+             */
+
+            /**
+             * Constructs a new Tensor.
+             * @memberof onnx.TypeProto
+             * @classdesc Represents a Tensor.
+             * @implements ITensor
+             * @constructor
+             * @param {onnx.TypeProto.ITensor=} [properties] Properties to set
+             */
+            function Tensor(properties) {
+                if (properties)
+                    for (var keys = Object.keys(properties), i = 0; i < keys.length; ++i)
+                        if (properties[keys[i]] != null)
+                            this[keys[i]] = properties[keys[i]];
+            }
+
+            /**
+             * Tensor elemType.
+             * @member {number} elemType
+             * @memberof onnx.TypeProto.Tensor
+             * @instance
+             */
+            Tensor.prototype.elemType = 0;
+
+            /**
+             * Tensor shape.
+             * @member {onnx.ITensorShapeProto|null|undefined} shape
+             * @memberof onnx.TypeProto.Tensor
+             * @instance
+             */
+            Tensor.prototype.shape = null;
+
+            /**
+             * Creates a new Tensor instance using the specified properties.
+             * @function create
+             * @memberof onnx.TypeProto.Tensor
+             * @static
+             * @param {onnx.TypeProto.ITensor=} [properties] Properties to set
+             * @returns {onnx.TypeProto.Tensor} Tensor instance
+             */
+            Tensor.create = function create(properties) {
+                return new Tensor(properties);
+            };
+
+            /**
+             * Encodes the specified Tensor message. Does not implicitly {@link onnx.TypeProto.Tensor.verify|verify} messages.
+             * @function encode
+             * @memberof onnx.TypeProto.Tensor
+             * @static
+             * @param {onnx.TypeProto.ITensor} message Tensor message or plain object to encode
+             * @param {$protobuf.Writer} [writer] Writer to encode to
+             * @returns {$protobuf.Writer} Writer
+             */
+            Tensor.encode = function encode(message, writer) {
+                if (!writer)
+                    writer = $Writer.create();
+                if (message.elemType != null && Object.hasOwnProperty.call(message, "elemType"))
+                    writer.uint32(/* id 1, wireType 0 =*/8).int32(message.elemType);
+                if (message.shape != null && Object.hasOwnProperty.call(message, "shape"))
+                    $root.onnx.TensorShapeProto.encode(message.shape, writer.uint32(/* id 2, wireType 2 =*/18).fork()).ldelim();
+                return writer;
+            };
+
+            /**
+             * Encodes the specified Tensor message, length delimited. Does not implicitly {@link onnx.TypeProto.Tensor.verify|verify} messages.
+             * @function encodeDelimited
+             * @memberof onnx.TypeProto.Tensor
+             * @static
+             * @param {onnx.TypeProto.ITensor} message Tensor message or plain object to encode
+             * @param {$protobuf.Writer} [writer] Writer to encode to
+             * @returns {$protobuf.Writer} Writer
+             */
+            Tensor.encodeDelimited = function encodeDelimited(message, writer) {
+                return this.encode(message, writer).ldelim();
+            };
+
+            /**
+             * Decodes a Tensor message from the specified reader or buffer.
+             * @function decode
+             * @memberof onnx.TypeProto.Tensor
+             * @static
+             * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+             * @param {number} [length] Message length if known beforehand
+             * @returns {onnx.TypeProto.Tensor} Tensor
+             * @throws {Error} If the payload is not a reader or valid buffer
+             * @throws {$protobuf.util.ProtocolError} If required fields are missing
+             */
+            Tensor.decode = function decode(reader, length) {
+                if (!(reader instanceof $Reader))
+                    reader = $Reader.create(reader);
+                var end = length === undefined ? reader.len : reader.pos + length, message = new $root.onnx.TypeProto.Tensor();
+                while (reader.pos < end) {
+                    var tag = reader.uint32();
+                    switch (tag >>> 3) {
+                    case 1: {
+                            message.elemType = reader.int32();
+                            break;
+                        }
+                    case 2: {
+                            message.shape = $root.onnx.TensorShapeProto.decode(reader, reader.uint32());
+                            break;
+                        }
+                    default:
+                        reader.skipType(tag & 7);
+                        break;
+                    }
+                }
+                return message;
+            };
+
+            /**
+             * Decodes a Tensor message from the specified reader or buffer, length delimited.
+             * @function decodeDelimited
+             * @memberof onnx.TypeProto.Tensor
+             * @static
+             * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+             * @returns {onnx.TypeProto.Tensor} Tensor
+             * @throws {Error} If the payload is not a reader or valid buffer
+             * @throws {$protobuf.util.ProtocolError} If required fields are missing
+             */
+            Tensor.decodeDelimited = function decodeDelimited(reader) {
+                if (!(reader instanceof $Reader))
+                    reader = new $Reader(reader);
+                return this.decode(reader, reader.uint32());
+            };
+
+            /**
+             * Verifies a Tensor message.
+             * @function verify
+             * @memberof onnx.TypeProto.Tensor
+             * @static
+             * @param {Object.<string,*>} message Plain object to verify
+             * @returns {string|null} `null` if valid, otherwise the reason why it is not
+             */
+            Tensor.verify = function verify(message) {
+                if (typeof message !== "object" || message === null)
+                    return "object expected";
+                if (message.elemType != null && message.hasOwnProperty("elemType"))
+                    if (!$util.isInteger(message.elemType))
+                        return "elemType: integer expected";
+                if (message.shape != null && message.hasOwnProperty("shape")) {
+                    var error = $root.onnx.TensorShapeProto.verify(message.shape);
+                    if (error)
+                        return "shape." + error;
+                }
+                return null;
+            };
+
+            /**
+             * Creates a Tensor message from a plain object. Also converts values to their respective internal types.
+             * @function fromObject
+             * @memberof onnx.TypeProto.Tensor
+             * @static
+             * @param {Object.<string,*>} object Plain object
+             * @returns {onnx.TypeProto.Tensor} Tensor
+             */
+            Tensor.fromObject = function fromObject(object) {
+                if (object instanceof $root.onnx.TypeProto.Tensor)
+                    return object;
+                var message = new $root.onnx.TypeProto.Tensor();
+                if (object.elemType != null)
+                    message.elemType = object.elemType | 0;
+                if (object.shape != null) {
+                    if (typeof object.shape !== "object")
+                        throw TypeError(".onnx.TypeProto.Tensor.shape: object expected");
+                    message.shape = $root.onnx.TensorShapeProto.fromObject(object.shape);
+                }
+                return message;
+            };
+
+            /**
+             * Creates a plain object from a Tensor message. Also converts values to other types if specified.
+             * @function toObject
+             * @memberof onnx.TypeProto.Tensor
+             * @static
+             * @param {onnx.TypeProto.Tensor} message Tensor
+             * @param {$protobuf.IConversionOptions} [options] Conversion options
+             * @returns {Object.<string,*>} Plain object
+             */
+            Tensor.toObject = function toObject(message, options) {
+                if (!options)
+                    options = {};
+                var object = {};
+                if (options.defaults) {
+                    object.elemType = 0;
+                    object.shape = null;
+                }
+                if (message.elemType != null && message.hasOwnProperty("elemType"))
+                    object.elemType = message.elemType;
+                if (message.shape != null && message.hasOwnProperty("shape"))
+                    object.shape = $root.onnx.TensorShapeProto.toObject(message.shape, options);
+                return object;
+            };
+
+            /**
+             * Converts this Tensor to JSON.
+             * @function toJSON
+             * @memberof onnx.TypeProto.Tensor
+             * @instance
+             * @returns {Object.<string,*>} JSON object
+             */
+            Tensor.prototype.toJSON = function toJSON() {
+                return this.constructor.toObject(this, $protobuf.util.toJSONOptions);
+            };
+
+            /**
+             * Gets the default type url for Tensor
+             * @function getTypeUrl
+             * @memberof onnx.TypeProto.Tensor
+             * @static
+             * @param {string} [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+             * @returns {string} The default type url
+             */
+            Tensor.getTypeUrl = function getTypeUrl(typeUrlPrefix) {
+                if (typeUrlPrefix === undefined) {
+                    typeUrlPrefix = "type.googleapis.com";
+                }
+                return typeUrlPrefix + "/onnx.TypeProto.Tensor";
+            };
+
+            return Tensor;
+        })();
+
+        TypeProto.Sequence = (function() {
+
+            /**
+             * Properties of a Sequence.
+             * @memberof onnx.TypeProto
+             * @interface ISequence
+             * @property {onnx.ITypeProto|null} [elemType] Sequence elemType
+             */
+
+            /**
+             * Constructs a new Sequence.
+             * @memberof onnx.TypeProto
+             * @classdesc Represents a Sequence.
+             * @implements ISequence
+             * @constructor
+             * @param {onnx.TypeProto.ISequence=} [properties] Properties to set
+             */
+            function Sequence(properties) {
+                if (properties)
+                    for (var keys = Object.keys(properties), i = 0; i < keys.length; ++i)
+                        if (properties[keys[i]] != null)
+                            this[keys[i]] = properties[keys[i]];
+            }
+
+            /**
+             * Sequence elemType.
+             * @member {onnx.ITypeProto|null|undefined} elemType
+             * @memberof onnx.TypeProto.Sequence
+             * @instance
+             */
+            Sequence.prototype.elemType = null;
+
+            /**
+             * Creates a new Sequence instance using the specified properties.
+             * @function create
+             * @memberof onnx.TypeProto.Sequence
+             * @static
+             * @param {onnx.TypeProto.ISequence=} [properties] Properties to set
+             * @returns {onnx.TypeProto.Sequence} Sequence instance
+             */
+            Sequence.create = function create(properties) {
+                return new Sequence(properties);
+            };
+
+            /**
+             * Encodes the specified Sequence message. Does not implicitly {@link onnx.TypeProto.Sequence.verify|verify} messages.
+             * @function encode
+             * @memberof onnx.TypeProto.Sequence
+             * @static
+             * @param {onnx.TypeProto.ISequence} message Sequence message or plain object to encode
+             * @param {$protobuf.Writer} [writer] Writer to encode to
+             * @returns {$protobuf.Writer} Writer
+             */
+            Sequence.encode = function encode(message, writer) {
+                if (!writer)
+                    writer = $Writer.create();
+                if (message.elemType != null && Object.hasOwnProperty.call(message, "elemType"))
+                    $root.onnx.TypeProto.encode(message.elemType, writer.uint32(/* id 1, wireType 2 =*/10).fork()).ldelim();
+                return writer;
+            };
+
+            /**
+             * Encodes the specified Sequence message, length delimited. Does not implicitly {@link onnx.TypeProto.Sequence.verify|verify} messages.
+             * @function encodeDelimited
+             * @memberof onnx.TypeProto.Sequence
+             * @static
+             * @param {onnx.TypeProto.ISequence} message Sequence message or plain object to encode
+             * @param {$protobuf.Writer} [writer] Writer to encode to
+             * @returns {$protobuf.Writer} Writer
+             */
+            Sequence.encodeDelimited = function encodeDelimited(message, writer) {
+                return this.encode(message, writer).ldelim();
+            };
+
+            /**
+             * Decodes a Sequence message from the specified reader or buffer.
+             * @function decode
+             * @memberof onnx.TypeProto.Sequence
+             * @static
+             * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+             * @param {number} [length] Message length if known beforehand
+             * @returns {onnx.TypeProto.Sequence} Sequence
+             * @throws {Error} If the payload is not a reader or valid buffer
+             * @throws {$protobuf.util.ProtocolError} If required fields are missing
+             */
+            Sequence.decode = function decode(reader, length) {
+                if (!(reader instanceof $Reader))
+                    reader = $Reader.create(reader);
+                var end = length === undefined ? reader.len : reader.pos + length, message = new $root.onnx.TypeProto.Sequence();
+                while (reader.pos < end) {
+                    var tag = reader.uint32();
+                    switch (tag >>> 3) {
+                    case 1: {
+                            message.elemType = $root.onnx.TypeProto.decode(reader, reader.uint32());
+                            break;
+                        }
+                    default:
+                        reader.skipType(tag & 7);
+                        break;
+                    }
+                }
+                return message;
+            };
+
+            /**
+             * Decodes a Sequence message from the specified reader or buffer, length delimited.
+             * @function decodeDelimited
+             * @memberof onnx.TypeProto.Sequence
+             * @static
+             * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+             * @returns {onnx.TypeProto.Sequence} Sequence
+             * @throws {Error} If the payload is not a reader or valid buffer
+             * @throws {$protobuf.util.ProtocolError} If required fields are missing
+             */
+            Sequence.decodeDelimited = function decodeDelimited(reader) {
+                if (!(reader instanceof $Reader))
+                    reader = new $Reader(reader);
+                return this.decode(reader, reader.uint32());
+            };
+
+            /**
+             * Verifies a Sequence message.
+             * @function verify
+             * @memberof onnx.TypeProto.Sequence
+             * @static
+             * @param {Object.<string,*>} message Plain object to verify
+             * @returns {string|null} `null` if valid, otherwise the reason why it is not
+             */
+            Sequence.verify = function verify(message) {
+                if (typeof message !== "object" || message === null)
+                    return "object expected";
+                if (message.elemType != null && message.hasOwnProperty("elemType")) {
+                    var error = $root.onnx.TypeProto.verify(message.elemType);
+                    if (error)
+                        return "elemType." + error;
+                }
+                return null;
+            };
+
+            /**
+             * Creates a Sequence message from a plain object. Also converts values to their respective internal types.
+             * @function fromObject
+             * @memberof onnx.TypeProto.Sequence
+             * @static
+             * @param {Object.<string,*>} object Plain object
+             * @returns {onnx.TypeProto.Sequence} Sequence
+             */
+            Sequence.fromObject = function fromObject(object) {
+                if (object instanceof $root.onnx.TypeProto.Sequence)
+                    return object;
+                var message = new $root.onnx.TypeProto.Sequence();
+                if (object.elemType != null) {
+                    if (typeof object.elemType !== "object")
+                        throw TypeError(".onnx.TypeProto.Sequence.elemType: object expected");
+                    message.elemType = $root.onnx.TypeProto.fromObject(object.elemType);
+                }
+                return message;
+            };
+
+            /**
+             * Creates a plain object from a Sequence message. Also converts values to other types if specified.
+             * @function toObject
+             * @memberof onnx.TypeProto.Sequence
+             * @static
+             * @param {onnx.TypeProto.Sequence} message Sequence
+             * @param {$protobuf.IConversionOptions} [options] Conversion options
+             * @returns {Object.<string,*>} Plain object
+             */
+            Sequence.toObject = function toObject(message, options) {
+                if (!options)
+                    options = {};
+                var object = {};
+                if (options.defaults)
+                    object.elemType = null;
+                if (message.elemType != null && message.hasOwnProperty("elemType"))
+                    object.elemType = $root.onnx.TypeProto.toObject(message.elemType, options);
+                return object;
+            };
+
+            /**
+             * Converts this Sequence to JSON.
+             * @function toJSON
+             * @memberof onnx.TypeProto.Sequence
+             * @instance
+             * @returns {Object.<string,*>} JSON object
+             */
+            Sequence.prototype.toJSON = function toJSON() {
+                return this.constructor.toObject(this, $protobuf.util.toJSONOptions);
+            };
+
+            /**
+             * Gets the default type url for Sequence
+             * @function getTypeUrl
+             * @memberof onnx.TypeProto.Sequence
+             * @static
+             * @param {string} [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+             * @returns {string} The default type url
+             */
+            Sequence.getTypeUrl = function getTypeUrl(typeUrlPrefix) {
+                if (typeUrlPrefix === undefined) {
+                    typeUrlPrefix = "type.googleapis.com";
+                }
+                return typeUrlPrefix + "/onnx.TypeProto.Sequence";
+            };
+
+            return Sequence;
+        })();
+
+        TypeProto.Map = (function() {
+
+            /**
+             * Properties of a Map.
+             * @memberof onnx.TypeProto
+             * @interface IMap
+             * @property {number|null} [keyType] Map keyType
+             * @property {onnx.ITypeProto|null} [valueType] Map valueType
+             */
+
+            /**
+             * Constructs a new Map.
+             * @memberof onnx.TypeProto
+             * @classdesc Represents a Map.
+             * @implements IMap
+             * @constructor
+             * @param {onnx.TypeProto.IMap=} [properties] Properties to set
+             */
+            function Map(properties) {
+                if (properties)
+                    for (var keys = Object.keys(properties), i = 0; i < keys.length; ++i)
+                        if (properties[keys[i]] != null)
+                            this[keys[i]] = properties[keys[i]];
+            }
+
+            /**
+             * Map keyType.
+             * @member {number} keyType
+             * @memberof onnx.TypeProto.Map
+             * @instance
+             */
+            Map.prototype.keyType = 0;
+
+            /**
+             * Map valueType.
+             * @member {onnx.ITypeProto|null|undefined} valueType
+             * @memberof onnx.TypeProto.Map
+             * @instance
+             */
+            Map.prototype.valueType = null;
+
+            /**
+             * Creates a new Map instance using the specified properties.
+             * @function create
+             * @memberof onnx.TypeProto.Map
+             * @static
+             * @param {onnx.TypeProto.IMap=} [properties] Properties to set
+             * @returns {onnx.TypeProto.Map} Map instance
+             */
+            Map.create = function create(properties) {
+                return new Map(properties);
+            };
+
+            /**
+             * Encodes the specified Map message. Does not implicitly {@link onnx.TypeProto.Map.verify|verify} messages.
+             * @function encode
+             * @memberof onnx.TypeProto.Map
+             * @static
+             * @param {onnx.TypeProto.IMap} message Map message or plain object to encode
+             * @param {$protobuf.Writer} [writer] Writer to encode to
+             * @returns {$protobuf.Writer} Writer
+             */
+            Map.encode = function encode(message, writer) {
+                if (!writer)
+                    writer = $Writer.create();
+                if (message.keyType != null && Object.hasOwnProperty.call(message, "keyType"))
+                    writer.uint32(/* id 1, wireType 0 =*/8).int32(message.keyType);
+                if (message.valueType != null && Object.hasOwnProperty.call(message, "valueType"))
+                    $root.onnx.TypeProto.encode(message.valueType, writer.uint32(/* id 2, wireType 2 =*/18).fork()).ldelim();
+                return writer;
+            };
+
+            /**
+             * Encodes the specified Map message, length delimited. Does not implicitly {@link onnx.TypeProto.Map.verify|verify} messages.
+             * @function encodeDelimited
+             * @memberof onnx.TypeProto.Map
+             * @static
+             * @param {onnx.TypeProto.IMap} message Map message or plain object to encode
+             * @param {$protobuf.Writer} [writer] Writer to encode to
+             * @returns {$protobuf.Writer} Writer
+             */
+            Map.encodeDelimited = function encodeDelimited(message, writer) {
+                return this.encode(message, writer).ldelim();
+            };
+
+            /**
+             * Decodes a Map message from the specified reader or buffer.
+             * @function decode
+             * @memberof onnx.TypeProto.Map
+             * @static
+             * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+             * @param {number} [length] Message length if known beforehand
+             * @returns {onnx.TypeProto.Map} Map
+             * @throws {Error} If the payload is not a reader or valid buffer
+             * @throws {$protobuf.util.ProtocolError} If required fields are missing
+             */
+            Map.decode = function decode(reader, length) {
+                if (!(reader instanceof $Reader))
+                    reader = $Reader.create(reader);
+                var end = length === undefined ? reader.len : reader.pos + length, message = new $root.onnx.TypeProto.Map();
+                while (reader.pos < end) {
+                    var tag = reader.uint32();
+                    switch (tag >>> 3) {
+                    case 1: {
+                            message.keyType = reader.int32();
+                            break;
+                        }
+                    case 2: {
+                            message.valueType = $root.onnx.TypeProto.decode(reader, reader.uint32());
+                            break;
+                        }
+                    default:
+                        reader.skipType(tag & 7);
+                        break;
+                    }
+                }
+                return message;
+            };
+
+            /**
+             * Decodes a Map message from the specified reader or buffer, length delimited.
+             * @function decodeDelimited
+             * @memberof onnx.TypeProto.Map
+             * @static
+             * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+             * @returns {onnx.TypeProto.Map} Map
+             * @throws {Error} If the payload is not a reader or valid buffer
+             * @throws {$protobuf.util.ProtocolError} If required fields are missing
+             */
+            Map.decodeDelimited = function decodeDelimited(reader) {
+                if (!(reader instanceof $Reader))
+                    reader = new $Reader(reader);
+                return this.decode(reader, reader.uint32());
+            };
+
+            /**
+             * Verifies a Map message.
+             * @function verify
+             * @memberof onnx.TypeProto.Map
+             * @static
+             * @param {Object.<string,*>} message Plain object to verify
+             * @returns {string|null} `null` if valid, otherwise the reason why it is not
+             */
+            Map.verify = function verify(message) {
+                if (typeof message !== "object" || message === null)
+                    return "object expected";
+                if (message.keyType != null && message.hasOwnProperty("keyType"))
+                    if (!$util.isInteger(message.keyType))
+                        return "keyType: integer expected";
+                if (message.valueType != null && message.hasOwnProperty("valueType")) {
+                    var error = $root.onnx.TypeProto.verify(message.valueType);
+                    if (error)
+                        return "valueType." + error;
+                }
+                return null;
+            };
+
+            /**
+             * Creates a Map message from a plain object. Also converts values to their respective internal types.
+             * @function fromObject
+             * @memberof onnx.TypeProto.Map
+             * @static
+             * @param {Object.<string,*>} object Plain object
+             * @returns {onnx.TypeProto.Map} Map
+             */
+            Map.fromObject = function fromObject(object) {
+                if (object instanceof $root.onnx.TypeProto.Map)
+                    return object;
+                var message = new $root.onnx.TypeProto.Map();
+                if (object.keyType != null)
+                    message.keyType = object.keyType | 0;
+                if (object.valueType != null) {
+                    if (typeof object.valueType !== "object")
+                        throw TypeError(".onnx.TypeProto.Map.valueType: object expected");
+                    message.valueType = $root.onnx.TypeProto.fromObject(object.valueType);
+                }
+                return message;
+            };
+
+            /**
+             * Creates a plain object from a Map message. Also converts values to other types if specified.
+             * @function toObject
+             * @memberof onnx.TypeProto.Map
+             * @static
+             * @param {onnx.TypeProto.Map} message Map
+             * @param {$protobuf.IConversionOptions} [options] Conversion options
+             * @returns {Object.<string,*>} Plain object
+             */
+            Map.toObject = function toObject(message, options) {
+                if (!options)
+                    options = {};
+                var object = {};
+                if (options.defaults) {
+                    object.keyType = 0;
+                    object.valueType = null;
+                }
+                if (message.keyType != null && message.hasOwnProperty("keyType"))
+                    object.keyType = message.keyType;
+                if (message.valueType != null && message.hasOwnProperty("valueType"))
+                    object.valueType = $root.onnx.TypeProto.toObject(message.valueType, options);
+                return object;
+            };
+
+            /**
+             * Converts this Map to JSON.
+             * @function toJSON
+             * @memberof onnx.TypeProto.Map
+             * @instance
+             * @returns {Object.<string,*>} JSON object
+             */
+            Map.prototype.toJSON = function toJSON() {
+                return this.constructor.toObject(this, $protobuf.util.toJSONOptions);
+            };
+
+            /**
+             * Gets the default type url for Map
+             * @function getTypeUrl
+             * @memberof onnx.TypeProto.Map
+             * @static
+             * @param {string} [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+             * @returns {string} The default type url
+             */
+            Map.getTypeUrl = function getTypeUrl(typeUrlPrefix) {
+                if (typeUrlPrefix === undefined) {
+                    typeUrlPrefix = "type.googleapis.com";
+                }
+                return typeUrlPrefix + "/onnx.TypeProto.Map";
+            };
+
+            return Map;
+        })();
+
+        TypeProto.Optional = (function() {
+
+            /**
+             * Properties of an Optional.
+             * @memberof onnx.TypeProto
+             * @interface IOptional
+             * @property {onnx.ITypeProto|null} [elemType] Optional elemType
+             */
+
+            /**
+             * Constructs a new Optional.
+             * @memberof onnx.TypeProto
+             * @classdesc Represents an Optional.
+             * @implements IOptional
+             * @constructor
+             * @param {onnx.TypeProto.IOptional=} [properties] Properties to set
+             */
+            function Optional(properties) {
+                if (properties)
+                    for (var keys = Object.keys(properties), i = 0; i < keys.length; ++i)
+                        if (properties[keys[i]] != null)
+                            this[keys[i]] = properties[keys[i]];
+            }
+
+            /**
+             * Optional elemType.
+             * @member {onnx.ITypeProto|null|undefined} elemType
+             * @memberof onnx.TypeProto.Optional
+             * @instance
+             */
+            Optional.prototype.elemType = null;
+
+            /**
+             * Creates a new Optional instance using the specified properties.
+             * @function create
+             * @memberof onnx.TypeProto.Optional
+             * @static
+             * @param {onnx.TypeProto.IOptional=} [properties] Properties to set
+             * @returns {onnx.TypeProto.Optional} Optional instance
+             */
+            Optional.create = function create(properties) {
+                return new Optional(properties);
+            };
+
+            /**
+             * Encodes the specified Optional message. Does not implicitly {@link onnx.TypeProto.Optional.verify|verify} messages.
+             * @function encode
+             * @memberof onnx.TypeProto.Optional
+             * @static
+             * @param {onnx.TypeProto.IOptional} message Optional message or plain object to encode
+             * @param {$protobuf.Writer} [writer] Writer to encode to
+             * @returns {$protobuf.Writer} Writer
+             */
+            Optional.encode = function encode(message, writer) {
+                if (!writer)
+                    writer = $Writer.create();
+                if (message.elemType != null && Object.hasOwnProperty.call(message, "elemType"))
+                    $root.onnx.TypeProto.encode(message.elemType, writer.uint32(/* id 1, wireType 2 =*/10).fork()).ldelim();
+                return writer;
+            };
+
+            /**
+             * Encodes the specified Optional message, length delimited. Does not implicitly {@link onnx.TypeProto.Optional.verify|verify} messages.
+             * @function encodeDelimited
+             * @memberof onnx.TypeProto.Optional
+             * @static
+             * @param {onnx.TypeProto.IOptional} message Optional message or plain object to encode
+             * @param {$protobuf.Writer} [writer] Writer to encode to
+             * @returns {$protobuf.Writer} Writer
+             */
+            Optional.encodeDelimited = function encodeDelimited(message, writer) {
+                return this.encode(message, writer).ldelim();
+            };
+
+            /**
+             * Decodes an Optional message from the specified reader or buffer.
+             * @function decode
+             * @memberof onnx.TypeProto.Optional
+             * @static
+             * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+             * @param {number} [length] Message length if known beforehand
+             * @returns {onnx.TypeProto.Optional} Optional
+             * @throws {Error} If the payload is not a reader or valid buffer
+             * @throws {$protobuf.util.ProtocolError} If required fields are missing
+             */
+            Optional.decode = function decode(reader, length) {
+                if (!(reader instanceof $Reader))
+                    reader = $Reader.create(reader);
+                var end = length === undefined ? reader.len : reader.pos + length, message = new $root.onnx.TypeProto.Optional();
+                while (reader.pos < end) {
+                    var tag = reader.uint32();
+                    switch (tag >>> 3) {
+                    case 1: {
+                            message.elemType = $root.onnx.TypeProto.decode(reader, reader.uint32());
+                            break;
+                        }
+                    default:
+                        reader.skipType(tag & 7);
+                        break;
+                    }
+                }
+                return message;
+            };
+
+            /**
+             * Decodes an Optional message from the specified reader or buffer, length delimited.
+             * @function decodeDelimited
+             * @memberof onnx.TypeProto.Optional
+             * @static
+             * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+             * @returns {onnx.TypeProto.Optional} Optional
+             * @throws {Error} If the payload is not a reader or valid buffer
+             * @throws {$protobuf.util.ProtocolError} If required fields are missing
+             */
+            Optional.decodeDelimited = function decodeDelimited(reader) {
+                if (!(reader instanceof $Reader))
+                    reader = new $Reader(reader);
+                return this.decode(reader, reader.uint32());
+            };
+
+            /**
+             * Verifies an Optional message.
+             * @function verify
+             * @memberof onnx.TypeProto.Optional
+             * @static
+             * @param {Object.<string,*>} message Plain object to verify
+             * @returns {string|null} `null` if valid, otherwise the reason why it is not
+             */
+            Optional.verify = function verify(message) {
+                if (typeof message !== "object" || message === null)
+                    return "object expected";
+                if (message.elemType != null && message.hasOwnProperty("elemType")) {
+                    var error = $root.onnx.TypeProto.verify(message.elemType);
+                    if (error)
+                        return "elemType." + error;
+                }
+                return null;
+            };
+
+            /**
+             * Creates an Optional message from a plain object. Also converts values to their respective internal types.
+             * @function fromObject
+             * @memberof onnx.TypeProto.Optional
+             * @static
+             * @param {Object.<string,*>} object Plain object
+             * @returns {onnx.TypeProto.Optional} Optional
+             */
+            Optional.fromObject = function fromObject(object) {
+                if (object instanceof $root.onnx.TypeProto.Optional)
+                    return object;
+                var message = new $root.onnx.TypeProto.Optional();
+                if (object.elemType != null) {
+                    if (typeof object.elemType !== "object")
+                        throw TypeError(".onnx.TypeProto.Optional.elemType: object expected");
+                    message.elemType = $root.onnx.TypeProto.fromObject(object.elemType);
+                }
+                return message;
+            };
+
+            /**
+             * Creates a plain object from an Optional message. Also converts values to other types if specified.
+             * @function toObject
+             * @memberof onnx.TypeProto.Optional
+             * @static
+             * @param {onnx.TypeProto.Optional} message Optional
+             * @param {$protobuf.IConversionOptions} [options] Conversion options
+             * @returns {Object.<string,*>} Plain object
+             */
+            Optional.toObject = function toObject(message, options) {
+                if (!options)
+                    options = {};
+                var object = {};
+                if (options.defaults)
+                    object.elemType = null;
+                if (message.elemType != null && message.hasOwnProperty("elemType"))
+                    object.elemType = $root.onnx.TypeProto.toObject(message.elemType, options);
+                return object;
+            };
+
+            /**
+             * Converts this Optional to JSON.
+             * @function toJSON
+             * @memberof onnx.TypeProto.Optional
+             * @instance
+             * @returns {Object.<string,*>} JSON object
+             */
+            Optional.prototype.toJSON = function toJSON() {
+                return this.constructor.toObject(this, $protobuf.util.toJSONOptions);
+            };
+
+            /**
+             * Gets the default type url for Optional
+             * @function getTypeUrl
+             * @memberof onnx.TypeProto.Optional
+             * @static
+             * @param {string} [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+             * @returns {string} The default type url
+             */
+            Optional.getTypeUrl = function getTypeUrl(typeUrlPrefix) {
+                if (typeUrlPrefix === undefined) {
+                    typeUrlPrefix = "type.googleapis.com";
+                }
+                return typeUrlPrefix + "/onnx.TypeProto.Optional";
+            };
+
+            return Optional;
+        })();
+
+        TypeProto.SparseTensor = (function() {
+
+            /**
+             * Properties of a SparseTensor.
+             * @memberof onnx.TypeProto
+             * @interface ISparseTensor
+             * @property {number|null} [elemType] SparseTensor elemType
+             * @property {onnx.ITensorShapeProto|null} [shape] SparseTensor shape
+             */
+
+            /**
+             * Constructs a new SparseTensor.
+             * @memberof onnx.TypeProto
+             * @classdesc Represents a SparseTensor.
+             * @implements ISparseTensor
+             * @constructor
+             * @param {onnx.TypeProto.ISparseTensor=} [properties] Properties to set
+             */
+            function SparseTensor(properties) {
+                if (properties)
+                    for (var keys = Object.keys(properties), i = 0; i < keys.length; ++i)
+                        if (properties[keys[i]] != null)
+                            this[keys[i]] = properties[keys[i]];
+            }
+
+            /**
+             * SparseTensor elemType.
+             * @member {number} elemType
+             * @memberof onnx.TypeProto.SparseTensor
+             * @instance
+             */
+            SparseTensor.prototype.elemType = 0;
+
+            /**
+             * SparseTensor shape.
+             * @member {onnx.ITensorShapeProto|null|undefined} shape
+             * @memberof onnx.TypeProto.SparseTensor
+             * @instance
+             */
+            SparseTensor.prototype.shape = null;
+
+            /**
+             * Creates a new SparseTensor instance using the specified properties.
+             * @function create
+             * @memberof onnx.TypeProto.SparseTensor
+             * @static
+             * @param {onnx.TypeProto.ISparseTensor=} [properties] Properties to set
+             * @returns {onnx.TypeProto.SparseTensor} SparseTensor instance
+             */
+            SparseTensor.create = function create(properties) {
+                return new SparseTensor(properties);
+            };
+
+            /**
+             * Encodes the specified SparseTensor message. Does not implicitly {@link onnx.TypeProto.SparseTensor.verify|verify} messages.
+             * @function encode
+             * @memberof onnx.TypeProto.SparseTensor
+             * @static
+             * @param {onnx.TypeProto.ISparseTensor} message SparseTensor message or plain object to encode
+             * @param {$protobuf.Writer} [writer] Writer to encode to
+             * @returns {$protobuf.Writer} Writer
+             */
+            SparseTensor.encode = function encode(message, writer) {
+                if (!writer)
+                    writer = $Writer.create();
+                if (message.elemType != null && Object.hasOwnProperty.call(message, "elemType"))
+                    writer.uint32(/* id 1, wireType 0 =*/8).int32(message.elemType);
+                if (message.shape != null && Object.hasOwnProperty.call(message, "shape"))
+                    $root.onnx.TensorShapeProto.encode(message.shape, writer.uint32(/* id 2, wireType 2 =*/18).fork()).ldelim();
+                return writer;
+            };
+
+            /**
+             * Encodes the specified SparseTensor message, length delimited. Does not implicitly {@link onnx.TypeProto.SparseTensor.verify|verify} messages.
+             * @function encodeDelimited
+             * @memberof onnx.TypeProto.SparseTensor
+             * @static
+             * @param {onnx.TypeProto.ISparseTensor} message SparseTensor message or plain object to encode
+             * @param {$protobuf.Writer} [writer] Writer to encode to
+             * @returns {$protobuf.Writer} Writer
+             */
+            SparseTensor.encodeDelimited = function encodeDelimited(message, writer) {
+                return this.encode(message, writer).ldelim();
+            };
+
+            /**
+             * Decodes a SparseTensor message from the specified reader or buffer.
+             * @function decode
+             * @memberof onnx.TypeProto.SparseTensor
+             * @static
+             * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+             * @param {number} [length] Message length if known beforehand
+             * @returns {onnx.TypeProto.SparseTensor} SparseTensor
+             * @throws {Error} If the payload is not a reader or valid buffer
+             * @throws {$protobuf.util.ProtocolError} If required fields are missing
+             */
+            SparseTensor.decode = function decode(reader, length) {
+                if (!(reader instanceof $Reader))
+                    reader = $Reader.create(reader);
+                var end = length === undefined ? reader.len : reader.pos + length, message = new $root.onnx.TypeProto.SparseTensor();
+                while (reader.pos < end) {
+                    var tag = reader.uint32();
+                    switch (tag >>> 3) {
+                    case 1: {
+                            message.elemType = reader.int32();
+                            break;
+                        }
+                    case 2: {
+                            message.shape = $root.onnx.TensorShapeProto.decode(reader, reader.uint32());
+                            break;
+                        }
+                    default:
+                        reader.skipType(tag & 7);
+                        break;
+                    }
+                }
+                return message;
+            };
+
+            /**
+             * Decodes a SparseTensor message from the specified reader or buffer, length delimited.
+             * @function decodeDelimited
+             * @memberof onnx.TypeProto.SparseTensor
+             * @static
+             * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+             * @returns {onnx.TypeProto.SparseTensor} SparseTensor
+             * @throws {Error} If the payload is not a reader or valid buffer
+             * @throws {$protobuf.util.ProtocolError} If required fields are missing
+             */
+            SparseTensor.decodeDelimited = function decodeDelimited(reader) {
+                if (!(reader instanceof $Reader))
+                    reader = new $Reader(reader);
+                return this.decode(reader, reader.uint32());
+            };
+
+            /**
+             * Verifies a SparseTensor message.
+             * @function verify
+             * @memberof onnx.TypeProto.SparseTensor
+             * @static
+             * @param {Object.<string,*>} message Plain object to verify
+             * @returns {string|null} `null` if valid, otherwise the reason why it is not
+             */
+            SparseTensor.verify = function verify(message) {
+                if (typeof message !== "object" || message === null)
+                    return "object expected";
+                if (message.elemType != null && message.hasOwnProperty("elemType"))
+                    if (!$util.isInteger(message.elemType))
+                        return "elemType: integer expected";
+                if (message.shape != null && message.hasOwnProperty("shape")) {
+                    var error = $root.onnx.TensorShapeProto.verify(message.shape);
+                    if (error)
+                        return "shape." + error;
+                }
+                return null;
+            };
+
+            /**
+             * Creates a SparseTensor message from a plain object. Also converts values to their respective internal types.
+             * @function fromObject
+             * @memberof onnx.TypeProto.SparseTensor
+             * @static
+             * @param {Object.<string,*>} object Plain object
+             * @returns {onnx.TypeProto.SparseTensor} SparseTensor
+             */
+            SparseTensor.fromObject = function fromObject(object) {
+                if (object instanceof $root.onnx.TypeProto.SparseTensor)
+                    return object;
+                var message = new $root.onnx.TypeProto.SparseTensor();
+                if (object.elemType != null)
+                    message.elemType = object.elemType | 0;
+                if (object.shape != null) {
+                    if (typeof object.shape !== "object")
+                        throw TypeError(".onnx.TypeProto.SparseTensor.shape: object expected");
+                    message.shape = $root.onnx.TensorShapeProto.fromObject(object.shape);
+                }
+                return message;
+            };
+
+            /**
+             * Creates a plain object from a SparseTensor message. Also converts values to other types if specified.
+             * @function toObject
+             * @memberof onnx.TypeProto.SparseTensor
+             * @static
+             * @param {onnx.TypeProto.SparseTensor} message SparseTensor
+             * @param {$protobuf.IConversionOptions} [options] Conversion options
+             * @returns {Object.<string,*>} Plain object
+             */
+            SparseTensor.toObject = function toObject(message, options) {
+                if (!options)
+                    options = {};
+                var object = {};
+                if (options.defaults) {
+                    object.elemType = 0;
+                    object.shape = null;
+                }
+                if (message.elemType != null && message.hasOwnProperty("elemType"))
+                    object.elemType = message.elemType;
+                if (message.shape != null && message.hasOwnProperty("shape"))
+                    object.shape = $root.onnx.TensorShapeProto.toObject(message.shape, options);
+                return object;
+            };
+
+            /**
+             * Converts this SparseTensor to JSON.
+             * @function toJSON
+             * @memberof onnx.TypeProto.SparseTensor
+             * @instance
+             * @returns {Object.<string,*>} JSON object
+             */
+            SparseTensor.prototype.toJSON = function toJSON() {
+                return this.constructor.toObject(this, $protobuf.util.toJSONOptions);
+            };
+
+            /**
+             * Gets the default type url for SparseTensor
+             * @function getTypeUrl
+             * @memberof onnx.TypeProto.SparseTensor
+             * @static
+             * @param {string} [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+             * @returns {string} The default type url
+             */
+            SparseTensor.getTypeUrl = function getTypeUrl(typeUrlPrefix) {
+                if (typeUrlPrefix === undefined) {
+                    typeUrlPrefix = "type.googleapis.com";
+                }
+                return typeUrlPrefix + "/onnx.TypeProto.SparseTensor";
+            };
+
+            return SparseTensor;
+        })();
+
+        return TypeProto;
+    })();
+
+    onnx.OperatorSetIdProto = (function() {
+
+        /**
+         * Properties of an OperatorSetIdProto.
+         * @memberof onnx
+         * @interface IOperatorSetIdProto
+         * @property {string|null} [domain] OperatorSetIdProto domain
+         * @property {number|Long|null} [version] OperatorSetIdProto version
+         */
+
+        /**
+         * Constructs a new OperatorSetIdProto.
+         * @memberof onnx
+         * @classdesc Represents an OperatorSetIdProto.
+         * @implements IOperatorSetIdProto
+         * @constructor
+         * @param {onnx.IOperatorSetIdProto=} [properties] Properties to set
+         */
+        function OperatorSetIdProto(properties) {
+            if (properties)
+                for (var keys = Object.keys(properties), i = 0; i < keys.length; ++i)
+                    if (properties[keys[i]] != null)
+                        this[keys[i]] = properties[keys[i]];
+        }
+
+        /**
+         * OperatorSetIdProto domain.
+         * @member {string} domain
+         * @memberof onnx.OperatorSetIdProto
+         * @instance
+         */
+        OperatorSetIdProto.prototype.domain = "";
+
+        /**
+         * OperatorSetIdProto version.
+         * @member {number|Long} version
+         * @memberof onnx.OperatorSetIdProto
+         * @instance
+         */
+        OperatorSetIdProto.prototype.version = $util.Long ? $util.Long.fromBits(0,0,false) : 0;
+
+        /**
+         * Creates a new OperatorSetIdProto instance using the specified properties.
+         * @function create
+         * @memberof onnx.OperatorSetIdProto
+         * @static
+         * @param {onnx.IOperatorSetIdProto=} [properties] Properties to set
+         * @returns {onnx.OperatorSetIdProto} OperatorSetIdProto instance
+         */
+        OperatorSetIdProto.create = function create(properties) {
+            return new OperatorSetIdProto(properties);
+        };
+
+        /**
+         * Encodes the specified OperatorSetIdProto message. Does not implicitly {@link onnx.OperatorSetIdProto.verify|verify} messages.
+         * @function encode
+         * @memberof onnx.OperatorSetIdProto
+         * @static
+         * @param {onnx.IOperatorSetIdProto} message OperatorSetIdProto message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        OperatorSetIdProto.encode = function encode(message, writer) {
+            if (!writer)
+                writer = $Writer.create();
+            if (message.domain != null && Object.hasOwnProperty.call(message, "domain"))
+                writer.uint32(/* id 1, wireType 2 =*/10).string(message.domain);
+            if (message.version != null && Object.hasOwnProperty.call(message, "version"))
+                writer.uint32(/* id 2, wireType 0 =*/16).int64(message.version);
+            return writer;
+        };
+
+        /**
+         * Encodes the specified OperatorSetIdProto message, length delimited. Does not implicitly {@link onnx.OperatorSetIdProto.verify|verify} messages.
+         * @function encodeDelimited
+         * @memberof onnx.OperatorSetIdProto
+         * @static
+         * @param {onnx.IOperatorSetIdProto} message OperatorSetIdProto message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        OperatorSetIdProto.encodeDelimited = function encodeDelimited(message, writer) {
+            return this.encode(message, writer).ldelim();
+        };
+
+        /**
+         * Decodes an OperatorSetIdProto message from the specified reader or buffer.
+         * @function decode
+         * @memberof onnx.OperatorSetIdProto
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @param {number} [length] Message length if known beforehand
+         * @returns {onnx.OperatorSetIdProto} OperatorSetIdProto
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        OperatorSetIdProto.decode = function decode(reader, length) {
+            if (!(reader instanceof $Reader))
+                reader = $Reader.create(reader);
+            var end = length === undefined ? reader.len : reader.pos + length, message = new $root.onnx.OperatorSetIdProto();
+            while (reader.pos < end) {
+                var tag = reader.uint32();
+                switch (tag >>> 3) {
+                case 1: {
+                        message.domain = reader.string();
+                        break;
+                    }
+                case 2: {
+                        message.version = reader.int64();
+                        break;
+                    }
+                default:
+                    reader.skipType(tag & 7);
+                    break;
+                }
+            }
+            return message;
+        };
+
+        /**
+         * Decodes an OperatorSetIdProto message from the specified reader or buffer, length delimited.
+         * @function decodeDelimited
+         * @memberof onnx.OperatorSetIdProto
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @returns {onnx.OperatorSetIdProto} OperatorSetIdProto
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        OperatorSetIdProto.decodeDelimited = function decodeDelimited(reader) {
+            if (!(reader instanceof $Reader))
+                reader = new $Reader(reader);
+            return this.decode(reader, reader.uint32());
+        };
+
+        /**
+         * Verifies an OperatorSetIdProto message.
+         * @function verify
+         * @memberof onnx.OperatorSetIdProto
+         * @static
+         * @param {Object.<string,*>} message Plain object to verify
+         * @returns {string|null} `null` if valid, otherwise the reason why it is not
+         */
+        OperatorSetIdProto.verify = function verify(message) {
+            if (typeof message !== "object" || message === null)
+                return "object expected";
+            if (message.domain != null && message.hasOwnProperty("domain"))
+                if (!$util.isString(message.domain))
+                    return "domain: string expected";
+            if (message.version != null && message.hasOwnProperty("version"))
+                if (!$util.isInteger(message.version) && !(message.version && $util.isInteger(message.version.low) && $util.isInteger(message.version.high)))
+                    return "version: integer|Long expected";
+            return null;
+        };
+
+        /**
+         * Creates an OperatorSetIdProto message from a plain object. Also converts values to their respective internal types.
+         * @function fromObject
+         * @memberof onnx.OperatorSetIdProto
+         * @static
+         * @param {Object.<string,*>} object Plain object
+         * @returns {onnx.OperatorSetIdProto} OperatorSetIdProto
+         */
+        OperatorSetIdProto.fromObject = function fromObject(object) {
+            if (object instanceof $root.onnx.OperatorSetIdProto)
+                return object;
+            var message = new $root.onnx.OperatorSetIdProto();
+            if (object.domain != null)
+                message.domain = String(object.domain);
+            if (object.version != null)
+                if ($util.Long)
+                    (message.version = $util.Long.fromValue(object.version)).unsigned = false;
+                else if (typeof object.version === "string")
+                    message.version = parseInt(object.version, 10);
+                else if (typeof object.version === "number")
+                    message.version = object.version;
+                else if (typeof object.version === "object")
+                    message.version = new $util.LongBits(object.version.low >>> 0, object.version.high >>> 0).toNumber();
+            return message;
+        };
+
+        /**
+         * Creates a plain object from an OperatorSetIdProto message. Also converts values to other types if specified.
+         * @function toObject
+         * @memberof onnx.OperatorSetIdProto
+         * @static
+         * @param {onnx.OperatorSetIdProto} message OperatorSetIdProto
+         * @param {$protobuf.IConversionOptions} [options] Conversion options
+         * @returns {Object.<string,*>} Plain object
+         */
+        OperatorSetIdProto.toObject = function toObject(message, options) {
+            if (!options)
+                options = {};
+            var object = {};
+            if (options.defaults) {
+                object.domain = "";
+                if ($util.Long) {
+                    var long = new $util.Long(0, 0, false);
+                    object.version = options.longs === String ? long.toString() : options.longs === Number ? long.toNumber() : long;
+                } else
+                    object.version = options.longs === String ? "0" : 0;
+            }
+            if (message.domain != null && message.hasOwnProperty("domain"))
+                object.domain = message.domain;
+            if (message.version != null && message.hasOwnProperty("version"))
+                if (typeof message.version === "number")
+                    object.version = options.longs === String ? String(message.version) : message.version;
+                else
+                    object.version = options.longs === String ? $util.Long.prototype.toString.call(message.version) : options.longs === Number ? new $util.LongBits(message.version.low >>> 0, message.version.high >>> 0).toNumber() : message.version;
+            return object;
+        };
+
+        /**
+         * Converts this OperatorSetIdProto to JSON.
+         * @function toJSON
+         * @memberof onnx.OperatorSetIdProto
+         * @instance
+         * @returns {Object.<string,*>} JSON object
+         */
+        OperatorSetIdProto.prototype.toJSON = function toJSON() {
+            return this.constructor.toObject(this, $protobuf.util.toJSONOptions);
+        };
+
+        /**
+         * Gets the default type url for OperatorSetIdProto
+         * @function getTypeUrl
+         * @memberof onnx.OperatorSetIdProto
+         * @static
+         * @param {string} [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+         * @returns {string} The default type url
+         */
+        OperatorSetIdProto.getTypeUrl = function getTypeUrl(typeUrlPrefix) {
+            if (typeUrlPrefix === undefined) {
+                typeUrlPrefix = "type.googleapis.com";
+            }
+            return typeUrlPrefix + "/onnx.OperatorSetIdProto";
+        };
+
+        return OperatorSetIdProto;
+    })();
+
+    /**
+     * OperatorStatus enum.
+     * @name onnx.OperatorStatus
+     * @enum {number}
+     * @property {number} EXPERIMENTAL=0 EXPERIMENTAL value
+     * @property {number} STABLE=1 STABLE value
+     */
+    onnx.OperatorStatus = (function() {
+        var valuesById = {}, values = Object.create(valuesById);
+        values[valuesById[0] = "EXPERIMENTAL"] = 0;
+        values[valuesById[1] = "STABLE"] = 1;
+        return values;
+    })();
+
+    onnx.FunctionProto = (function() {
+
+        /**
+         * Properties of a FunctionProto.
+         * @memberof onnx
+         * @interface IFunctionProto
+         * @property {string|null} [name] FunctionProto name
+         * @property {Array.<string>|null} [input] FunctionProto input
+         * @property {Array.<string>|null} [output] FunctionProto output
+         * @property {Array.<string>|null} [attribute] FunctionProto attribute
+         * @property {Array.<onnx.IAttributeProto>|null} [attributeProto] FunctionProto attributeProto
+         * @property {Array.<onnx.INodeProto>|null} [node] FunctionProto node
+         * @property {string|null} [docString] FunctionProto docString
+         * @property {Array.<onnx.IOperatorSetIdProto>|null} [opsetImport] FunctionProto opsetImport
+         * @property {string|null} [domain] FunctionProto domain
+         */
+
+        /**
+         * Constructs a new FunctionProto.
+         * @memberof onnx
+         * @classdesc Represents a FunctionProto.
+         * @implements IFunctionProto
+         * @constructor
+         * @param {onnx.IFunctionProto=} [properties] Properties to set
+         */
+        function FunctionProto(properties) {
+            this.input = [];
+            this.output = [];
+            this.attribute = [];
+            this.attributeProto = [];
+            this.node = [];
+            this.opsetImport = [];
+            if (properties)
+                for (var keys = Object.keys(properties), i = 0; i < keys.length; ++i)
+                    if (properties[keys[i]] != null)
+                        this[keys[i]] = properties[keys[i]];
+        }
+
+        /**
+         * FunctionProto name.
+         * @member {string} name
+         * @memberof onnx.FunctionProto
+         * @instance
+         */
+        FunctionProto.prototype.name = "";
+
+        /**
+         * FunctionProto input.
+         * @member {Array.<string>} input
+         * @memberof onnx.FunctionProto
+         * @instance
+         */
+        FunctionProto.prototype.input = $util.emptyArray;
+
+        /**
+         * FunctionProto output.
+         * @member {Array.<string>} output
+         * @memberof onnx.FunctionProto
+         * @instance
+         */
+        FunctionProto.prototype.output = $util.emptyArray;
+
+        /**
+         * FunctionProto attribute.
+         * @member {Array.<string>} attribute
+         * @memberof onnx.FunctionProto
+         * @instance
+         */
+        FunctionProto.prototype.attribute = $util.emptyArray;
+
+        /**
+         * FunctionProto attributeProto.
+         * @member {Array.<onnx.IAttributeProto>} attributeProto
+         * @memberof onnx.FunctionProto
+         * @instance
+         */
+        FunctionProto.prototype.attributeProto = $util.emptyArray;
+
+        /**
+         * FunctionProto node.
+         * @member {Array.<onnx.INodeProto>} node
+         * @memberof onnx.FunctionProto
+         * @instance
+         */
+        FunctionProto.prototype.node = $util.emptyArray;
+
+        /**
+         * FunctionProto docString.
+         * @member {string} docString
+         * @memberof onnx.FunctionProto
+         * @instance
+         */
+        FunctionProto.prototype.docString = "";
+
+        /**
+         * FunctionProto opsetImport.
+         * @member {Array.<onnx.IOperatorSetIdProto>} opsetImport
+         * @memberof onnx.FunctionProto
+         * @instance
+         */
+        FunctionProto.prototype.opsetImport = $util.emptyArray;
+
+        /**
+         * FunctionProto domain.
+         * @member {string} domain
+         * @memberof onnx.FunctionProto
+         * @instance
+         */
+        FunctionProto.prototype.domain = "";
+
+        /**
+         * Creates a new FunctionProto instance using the specified properties.
+         * @function create
+         * @memberof onnx.FunctionProto
+         * @static
+         * @param {onnx.IFunctionProto=} [properties] Properties to set
+         * @returns {onnx.FunctionProto} FunctionProto instance
+         */
+        FunctionProto.create = function create(properties) {
+            return new FunctionProto(properties);
+        };
+
+        /**
+         * Encodes the specified FunctionProto message. Does not implicitly {@link onnx.FunctionProto.verify|verify} messages.
+         * @function encode
+         * @memberof onnx.FunctionProto
+         * @static
+         * @param {onnx.IFunctionProto} message FunctionProto message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        FunctionProto.encode = function encode(message, writer) {
+            if (!writer)
+                writer = $Writer.create();
+            if (message.name != null && Object.hasOwnProperty.call(message, "name"))
+                writer.uint32(/* id 1, wireType 2 =*/10).string(message.name);
+            if (message.input != null && message.input.length)
+                for (var i = 0; i < message.input.length; ++i)
+                    writer.uint32(/* id 4, wireType 2 =*/34).string(message.input[i]);
+            if (message.output != null && message.output.length)
+                for (var i = 0; i < message.output.length; ++i)
+                    writer.uint32(/* id 5, wireType 2 =*/42).string(message.output[i]);
+            if (message.attribute != null && message.attribute.length)
+                for (var i = 0; i < message.attribute.length; ++i)
+                    writer.uint32(/* id 6, wireType 2 =*/50).string(message.attribute[i]);
+            if (message.node != null && message.node.length)
+                for (var i = 0; i < message.node.length; ++i)
+                    $root.onnx.NodeProto.encode(message.node[i], writer.uint32(/* id 7, wireType 2 =*/58).fork()).ldelim();
+            if (message.docString != null && Object.hasOwnProperty.call(message, "docString"))
+                writer.uint32(/* id 8, wireType 2 =*/66).string(message.docString);
+            if (message.opsetImport != null && message.opsetImport.length)
+                for (var i = 0; i < message.opsetImport.length; ++i)
+                    $root.onnx.OperatorSetIdProto.encode(message.opsetImport[i], writer.uint32(/* id 9, wireType 2 =*/74).fork()).ldelim();
+            if (message.domain != null && Object.hasOwnProperty.call(message, "domain"))
+                writer.uint32(/* id 10, wireType 2 =*/82).string(message.domain);
+            if (message.attributeProto != null && message.attributeProto.length)
+                for (var i = 0; i < message.attributeProto.length; ++i)
+                    $root.onnx.AttributeProto.encode(message.attributeProto[i], writer.uint32(/* id 11, wireType 2 =*/90).fork()).ldelim();
+            return writer;
+        };
+
+        /**
+         * Encodes the specified FunctionProto message, length delimited. Does not implicitly {@link onnx.FunctionProto.verify|verify} messages.
+         * @function encodeDelimited
+         * @memberof onnx.FunctionProto
+         * @static
+         * @param {onnx.IFunctionProto} message FunctionProto message or plain object to encode
+         * @param {$protobuf.Writer} [writer] Writer to encode to
+         * @returns {$protobuf.Writer} Writer
+         */
+        FunctionProto.encodeDelimited = function encodeDelimited(message, writer) {
+            return this.encode(message, writer).ldelim();
+        };
+
+        /**
+         * Decodes a FunctionProto message from the specified reader or buffer.
+         * @function decode
+         * @memberof onnx.FunctionProto
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @param {number} [length] Message length if known beforehand
+         * @returns {onnx.FunctionProto} FunctionProto
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        FunctionProto.decode = function decode(reader, length) {
+            if (!(reader instanceof $Reader))
+                reader = $Reader.create(reader);
+            var end = length === undefined ? reader.len : reader.pos + length, message = new $root.onnx.FunctionProto();
+            while (reader.pos < end) {
+                var tag = reader.uint32();
+                switch (tag >>> 3) {
+                case 1: {
+                        message.name = reader.string();
+                        break;
+                    }
+                case 4: {
+                        if (!(message.input && message.input.length))
+                            message.input = [];
+                        message.input.push(reader.string());
+                        break;
+                    }
+                case 5: {
+                        if (!(message.output && message.output.length))
+                            message.output = [];
+                        message.output.push(reader.string());
+                        break;
+                    }
+                case 6: {
+                        if (!(message.attribute && message.attribute.length))
+                            message.attribute = [];
+                        message.attribute.push(reader.string());
+                        break;
+                    }
+                case 11: {
+                        if (!(message.attributeProto && message.attributeProto.length))
+                            message.attributeProto = [];
+                        message.attributeProto.push($root.onnx.AttributeProto.decode(reader, reader.uint32()));
+                        break;
+                    }
+                case 7: {
+                        if (!(message.node && message.node.length))
+                            message.node = [];
+                        message.node.push($root.onnx.NodeProto.decode(reader, reader.uint32()));
+                        break;
+                    }
+                case 8: {
+                        message.docString = reader.string();
+                        break;
+                    }
+                case 9: {
+                        if (!(message.opsetImport && message.opsetImport.length))
+                            message.opsetImport = [];
+                        message.opsetImport.push($root.onnx.OperatorSetIdProto.decode(reader, reader.uint32()));
+                        break;
+                    }
+                case 10: {
+                        message.domain = reader.string();
+                        break;
+                    }
+                default:
+                    reader.skipType(tag & 7);
+                    break;
+                }
+            }
+            return message;
+        };
+
+        /**
+         * Decodes a FunctionProto message from the specified reader or buffer, length delimited.
+         * @function decodeDelimited
+         * @memberof onnx.FunctionProto
+         * @static
+         * @param {$protobuf.Reader|Uint8Array} reader Reader or buffer to decode from
+         * @returns {onnx.FunctionProto} FunctionProto
+         * @throws {Error} If the payload is not a reader or valid buffer
+         * @throws {$protobuf.util.ProtocolError} If required fields are missing
+         */
+        FunctionProto.decodeDelimited = function decodeDelimited(reader) {
+            if (!(reader instanceof $Reader))
+                reader = new $Reader(reader);
+            return this.decode(reader, reader.uint32());
+        };
+
+        /**
+         * Verifies a FunctionProto message.
+         * @function verify
+         * @memberof onnx.FunctionProto
+         * @static
+         * @param {Object.<string,*>} message Plain object to verify
+         * @returns {string|null} `null` if valid, otherwise the reason why it is not
+         */
+        FunctionProto.verify = function verify(message) {
+            if (typeof message !== "object" || message === null)
+                return "object expected";
+            if (message.name != null && message.hasOwnProperty("name"))
+                if (!$util.isString(message.name))
+                    return "name: string expected";
+            if (message.input != null && message.hasOwnProperty("input")) {
+                if (!Array.isArray(message.input))
+                    return "input: array expected";
+                for (var i = 0; i < message.input.length; ++i)
+                    if (!$util.isString(message.input[i]))
+                        return "input: string[] expected";
+            }
+            if (message.output != null && message.hasOwnProperty("output")) {
+                if (!Array.isArray(message.output))
+                    return "output: array expected";
+                for (var i = 0; i < message.output.length; ++i)
+                    if (!$util.isString(message.output[i]))
+                        return "output: string[] expected";
+            }
+            if (message.attribute != null && message.hasOwnProperty("attribute")) {
+                if (!Array.isArray(message.attribute))
+                    return "attribute: array expected";
+                for (var i = 0; i < message.attribute.length; ++i)
+                    if (!$util.isString(message.attribute[i]))
+                        return "attribute: string[] expected";
+            }
+            if (message.attributeProto != null && message.hasOwnProperty("attributeProto")) {
+                if (!Array.isArray(message.attributeProto))
+                    return "attributeProto: array expected";
+                for (var i = 0; i < message.attributeProto.length; ++i) {
+                    var error = $root.onnx.AttributeProto.verify(message.attributeProto[i]);
+                    if (error)
+                        return "attributeProto." + error;
+                }
+            }
+            if (message.node != null && message.hasOwnProperty("node")) {
+                if (!Array.isArray(message.node))
+                    return "node: array expected";
+                for (var i = 0; i < message.node.length; ++i) {
+                    var error = $root.onnx.NodeProto.verify(message.node[i]);
+                    if (error)
+                        return "node." + error;
+                }
+            }
+            if (message.docString != null && message.hasOwnProperty("docString"))
+                if (!$util.isString(message.docString))
+                    return "docString: string expected";
+            if (message.opsetImport != null && message.hasOwnProperty("opsetImport")) {
+                if (!Array.isArray(message.opsetImport))
+                    return "opsetImport: array expected";
+                for (var i = 0; i < message.opsetImport.length; ++i) {
+                    var error = $root.onnx.OperatorSetIdProto.verify(message.opsetImport[i]);
+                    if (error)
+                        return "opsetImport." + error;
+                }
+            }
+            if (message.domain != null && message.hasOwnProperty("domain"))
+                if (!$util.isString(message.domain))
+                    return "domain: string expected";
+            return null;
+        };
+
+        /**
+         * Creates a FunctionProto message from a plain object. Also converts values to their respective internal types.
+         * @function fromObject
+         * @memberof onnx.FunctionProto
+         * @static
+         * @param {Object.<string,*>} object Plain object
+         * @returns {onnx.FunctionProto} FunctionProto
+         */
+        FunctionProto.fromObject = function fromObject(object) {
+            if (object instanceof $root.onnx.FunctionProto)
+                return object;
+            var message = new $root.onnx.FunctionProto();
+            if (object.name != null)
+                message.name = String(object.name);
+            if (object.input) {
+                if (!Array.isArray(object.input))
+                    throw TypeError(".onnx.FunctionProto.input: array expected");
+                message.input = [];
+                for (var i = 0; i < object.input.length; ++i)
+                    message.input[i] = String(object.input[i]);
+            }
+            if (object.output) {
+                if (!Array.isArray(object.output))
+                    throw TypeError(".onnx.FunctionProto.output: array expected");
+                message.output = [];
+                for (var i = 0; i < object.output.length; ++i)
+                    message.output[i] = String(object.output[i]);
+            }
+            if (object.attribute) {
+                if (!Array.isArray(object.attribute))
+                    throw TypeError(".onnx.FunctionProto.attribute: array expected");
+                message.attribute = [];
+                for (var i = 0; i < object.attribute.length; ++i)
+                    message.attribute[i] = String(object.attribute[i]);
+            }
+            if (object.attributeProto) {
+                if (!Array.isArray(object.attributeProto))
+                    throw TypeError(".onnx.FunctionProto.attributeProto: array expected");
+                message.attributeProto = [];
+                for (var i = 0; i < object.attributeProto.length; ++i) {
+                    if (typeof object.attributeProto[i] !== "object")
+                        throw TypeError(".onnx.FunctionProto.attributeProto: object expected");
+                    message.attributeProto[i] = $root.onnx.AttributeProto.fromObject(object.attributeProto[i]);
+                }
+            }
+            if (object.node) {
+                if (!Array.isArray(object.node))
+                    throw TypeError(".onnx.FunctionProto.node: array expected");
+                message.node = [];
+                for (var i = 0; i < object.node.length; ++i) {
+                    if (typeof object.node[i] !== "object")
+                        throw TypeError(".onnx.FunctionProto.node: object expected");
+                    message.node[i] = $root.onnx.NodeProto.fromObject(object.node[i]);
+                }
+            }
+            if (object.docString != null)
+                message.docString = String(object.docString);
+            if (object.opsetImport) {
+                if (!Array.isArray(object.opsetImport))
+                    throw TypeError(".onnx.FunctionProto.opsetImport: array expected");
+                message.opsetImport = [];
+                for (var i = 0; i < object.opsetImport.length; ++i) {
+                    if (typeof object.opsetImport[i] !== "object")
+                        throw TypeError(".onnx.FunctionProto.opsetImport: object expected");
+                    message.opsetImport[i] = $root.onnx.OperatorSetIdProto.fromObject(object.opsetImport[i]);
+                }
+            }
+            if (object.domain != null)
+                message.domain = String(object.domain);
+            return message;
+        };
+
+        /**
+         * Creates a plain object from a FunctionProto message. Also converts values to other types if specified.
+         * @function toObject
+         * @memberof onnx.FunctionProto
+         * @static
+         * @param {onnx.FunctionProto} message FunctionProto
+         * @param {$protobuf.IConversionOptions} [options] Conversion options
+         * @returns {Object.<string,*>} Plain object
+         */
+        FunctionProto.toObject = function toObject(message, options) {
+            if (!options)
+                options = {};
+            var object = {};
+            if (options.arrays || options.defaults) {
+                object.input = [];
+                object.output = [];
+                object.attribute = [];
+                object.node = [];
+                object.opsetImport = [];
+                object.attributeProto = [];
+            }
+            if (options.defaults) {
+                object.name = "";
+                object.docString = "";
+                object.domain = "";
+            }
+            if (message.name != null && message.hasOwnProperty("name"))
+                object.name = message.name;
+            if (message.input && message.input.length) {
+                object.input = [];
+                for (var j = 0; j < message.input.length; ++j)
+                    object.input[j] = message.input[j];
+            }
+            if (message.output && message.output.length) {
+                object.output = [];
+                for (var j = 0; j < message.output.length; ++j)
+                    object.output[j] = message.output[j];
+            }
+            if (message.attribute && message.attribute.length) {
+                object.attribute = [];
+                for (var j = 0; j < message.attribute.length; ++j)
+                    object.attribute[j] = message.attribute[j];
+            }
+            if (message.node && message.node.length) {
+                object.node = [];
+                for (var j = 0; j < message.node.length; ++j)
+                    object.node[j] = $root.onnx.NodeProto.toObject(message.node[j], options);
+            }
+            if (message.docString != null && message.hasOwnProperty("docString"))
+                object.docString = message.docString;
+            if (message.opsetImport && message.opsetImport.length) {
+                object.opsetImport = [];
+                for (var j = 0; j < message.opsetImport.length; ++j)
+                    object.opsetImport[j] = $root.onnx.OperatorSetIdProto.toObject(message.opsetImport[j], options);
+            }
+            if (message.domain != null && message.hasOwnProperty("domain"))
+                object.domain = message.domain;
+            if (message.attributeProto && message.attributeProto.length) {
+                object.attributeProto = [];
+                for (var j = 0; j < message.attributeProto.length; ++j)
+                    object.attributeProto[j] = $root.onnx.AttributeProto.toObject(message.attributeProto[j], options);
+            }
+            return object;
+        };
+
+        /**
+         * Converts this FunctionProto to JSON.
+         * @function toJSON
+         * @memberof onnx.FunctionProto
+         * @instance
+         * @returns {Object.<string,*>} JSON object
+         */
+        FunctionProto.prototype.toJSON = function toJSON() {
+            return this.constructor.toObject(this, $protobuf.util.toJSONOptions);
+        };
+
+        /**
+         * Gets the default type url for FunctionProto
+         * @function getTypeUrl
+         * @memberof onnx.FunctionProto
+         * @static
+         * @param {string} [typeUrlPrefix] your custom typeUrlPrefix(default "type.googleapis.com")
+         * @returns {string} The default type url
+         */
+        FunctionProto.getTypeUrl = function getTypeUrl(typeUrlPrefix) {
+            if (typeUrlPrefix === undefined) {
+                typeUrlPrefix = "type.googleapis.com";
+            }
+            return typeUrlPrefix + "/onnx.FunctionProto";
+        };
+
+        return FunctionProto;
+    })();
+
+    return onnx;
+})();
+
+module.exports = $root;
diff --git a/js/node/test/test-utils.ts b/js/node/test/test-utils.ts
index 968e8a1881810..3eef90356a335 100644
--- a/js/node/test/test-utils.ts
+++ b/js/node/test/test-utils.ts
@@ -4,10 +4,11 @@
 import assert from 'assert';
 import * as fs from 'fs-extra';
 import {jsonc} from 'jsonc';
-import * as onnx_proto from 'onnx-proto';
 import {InferenceSession, Tensor} from 'onnxruntime-common';
 import * as path from 'path';
 
+import * as onnx_proto from './ort-schema/protobuf/onnx';
+
 export const TEST_ROOT = __dirname;
 export const TEST_DATA_ROOT = path.join(TEST_ROOT, 'testdata');
 
diff --git a/js/node/tsconfig.json b/js/node/tsconfig.json
index c8eb433577515..c154c3e148ed0 100644
--- a/js/node/tsconfig.json
+++ b/js/node/tsconfig.json
@@ -1,7 +1,6 @@
 {
   "extends": "../tsconfig.json",
   "compilerOptions": {
-    "module": "CommonJS",
     "outDir": "dist"
   },
   "include": ["lib"]
diff --git a/js/package-lock.json b/js/package-lock.json
index be7b3c9cd7d30..c16a8b59a3a6f 100644
--- a/js/package-lock.json
+++ b/js/package-lock.json
@@ -6,33 +6,37 @@
     "": {
       "license": "MIT",
       "devDependencies": {
-        "@types/fs-extra": "^11.0.1",
-        "@types/mocha": "^10.0.1",
+        "@types/fs-extra": "^11.0.2",
+        "@types/mocha": "^10.0.2",
         "@types/node": "^18.14.6",
         "@types/npmlog": "^4.1.4",
-        "@typescript-eslint/eslint-plugin": "^5.54.1",
-        "@typescript-eslint/parser": "^5.54.1",
+        "@typescript-eslint/eslint-plugin": "^6.7.4",
+        "@typescript-eslint/parser": "^6.7.4",
         "clang-format": "^1.8.0",
-        "dir-compare": "^4.0.0",
-        "eslint": "^8.35.0",
+        "dir-compare": "^4.2.0",
+        "esbuild": "^0.19.3",
+        "esbuild-plugin-polyfill-node": "^0.3.0",
+        "eslint": "^8.51.0",
         "eslint-plugin-header": "^3.1.1",
-        "eslint-plugin-import": "^2.27.5",
-        "eslint-plugin-jsdoc": "^40.0.1",
+        "eslint-plugin-import": "^2.28.1",
+        "eslint-plugin-jsdoc": "^46.8.2",
         "eslint-plugin-prefer-arrow": "^1.2.3",
-        "eslint-plugin-unicorn": "^46.0.0",
-        "fs-extra": "^11.1.0",
+        "eslint-plugin-unicorn": "^48.0.1",
+        "fs-extra": "^11.1.1",
         "jszip": "^3.10.1",
         "mocha": "^10.2.0",
-        "node-polyfill-webpack-plugin": "^2.0.1",
         "npmlog": "^7.0.1",
-        "prettier": "^3.0.0",
-        "terser": "^5.16.5",
-        "ts-loader": "^9.4.2",
-        "typescript": "^4.9.5",
-        "webpack": "^5.76.0",
-        "webpack-bundle-analyzer": "^4.8.0",
-        "webpack-cli": "^5.0.1",
-        "worker-loader": "^3.0.8"
+        "prettier": "^3.0.3",
+        "typescript": "^5.2.2"
+      }
+    },
+    "node_modules/@aashutoshrathi/word-wrap": {
+      "version": "1.2.6",
+      "resolved": "https://registry.npmjs.org/@aashutoshrathi/word-wrap/-/word-wrap-1.2.6.tgz",
+      "integrity": "sha512-1Yjs2SvM8TflER/OD3cOjhWWOZb58A2t7wpE2S9XfBYTiIl+XFhQG2bjy4Pu1I+EAlCNUzRDYDdFwFYUKvXcIA==",
+      "dev": true,
+      "engines": {
+        "node": ">=0.10.0"
       }
     },
     "node_modules/@babel/code-frame": {
@@ -48,9 +52,9 @@
       }
     },
     "node_modules/@babel/helper-validator-identifier": {
-      "version": "7.19.1",
-      "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.19.1.tgz",
-      "integrity": "sha512-awrNfaMtnHUr653GgGEs++LlAvW6w+DcPrOliSMXWCKo597CwL5Acf/wWdNkf/tfEQE3mjkeD1YOVZOUV/od1w==",
+      "version": "7.22.20",
+      "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.22.20.tgz",
+      "integrity": "sha512-Y4OZ+ytlatR8AI+8KZfKuL5urKp7qey08ha31L8b3BwewJAoJamTzyvxPR/5D+KkdJCGPq/+8TukHBlY10FX9A==",
       "dev": true,
       "engines": {
         "node": ">=6.9.0"
@@ -141,33 +145,376 @@
         "node": ">=4"
       }
     },
-    "node_modules/@discoveryjs/json-ext": {
-      "version": "0.5.7",
-      "resolved": "https://registry.npmjs.org/@discoveryjs/json-ext/-/json-ext-0.5.7.tgz",
-      "integrity": "sha512-dBVuXR082gk3jsFp7Rd/JI4kytwGHecnCoTtXFb7DB6CNHp4rg5k1bhg0nWdLGLnOV71lmDzGQaLMy8iPLY0pw==",
+    "node_modules/@es-joy/jsdoccomment": {
+      "version": "0.40.1",
+      "resolved": "https://registry.npmjs.org/@es-joy/jsdoccomment/-/jsdoccomment-0.40.1.tgz",
+      "integrity": "sha512-YORCdZSusAlBrFpZ77pJjc5r1bQs5caPWtAu+WWmiSo+8XaUzseapVrfAtiRFbQWnrBxxLLEwF6f6ZG/UgCQCg==",
       "dev": true,
+      "dependencies": {
+        "comment-parser": "1.4.0",
+        "esquery": "^1.5.0",
+        "jsdoc-type-pratt-parser": "~4.0.0"
+      },
       "engines": {
-        "node": ">=10.0.0"
+        "node": ">=16"
       }
     },
-    "node_modules/@es-joy/jsdoccomment": {
-      "version": "0.36.1",
-      "resolved": "https://registry.npmjs.org/@es-joy/jsdoccomment/-/jsdoccomment-0.36.1.tgz",
-      "integrity": "sha512-922xqFsTpHs6D0BUiG4toiyPOMc8/jafnWKxz1KWgS4XzKPy2qXf1Pe6UFuNSCQqt6tOuhAWXBNuuyUhJmw9Vg==",
+    "node_modules/@esbuild/android-arm": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.19.3.tgz",
+      "integrity": "sha512-Lemgw4io4VZl9GHJmjiBGzQ7ONXRfRPHcUEerndjwiSkbxzrpq0Uggku5MxxrXdwJ+pTj1qyw4jwTu7hkPsgIA==",
+      "cpu": [
+        "arm"
+      ],
       "dev": true,
-      "dependencies": {
-        "comment-parser": "1.3.1",
-        "esquery": "^1.4.0",
-        "jsdoc-type-pratt-parser": "~3.1.0"
-      },
+      "optional": true,
+      "os": [
+        "android"
+      ],
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/@esbuild/android-arm64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.19.3.tgz",
+      "integrity": "sha512-w+Akc0vv5leog550kjJV9Ru+MXMR2VuMrui3C61mnysim0gkFCPOUTAfzTP0qX+HpN9Syu3YA3p1hf3EPqObRw==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "android"
+      ],
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/@esbuild/android-x64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.19.3.tgz",
+      "integrity": "sha512-FKQJKkK5MXcBHoNZMDNUAg1+WcZlV/cuXrWCoGF/TvdRiYS4znA0m5Il5idUwfxrE20bG/vU1Cr5e1AD6IEIjQ==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "android"
+      ],
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/@esbuild/darwin-arm64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.19.3.tgz",
+      "integrity": "sha512-kw7e3FXU+VsJSSSl2nMKvACYlwtvZB8RUIeVShIEY6PVnuZ3c9+L9lWB2nWeeKWNNYDdtL19foCQ0ZyUL7nqGw==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/@esbuild/darwin-x64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.19.3.tgz",
+      "integrity": "sha512-tPfZiwF9rO0jW6Jh9ipi58N5ZLoSjdxXeSrAYypy4psA2Yl1dAMhM71KxVfmjZhJmxRjSnb29YlRXXhh3GqzYw==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/@esbuild/freebsd-arm64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.19.3.tgz",
+      "integrity": "sha512-ERDyjOgYeKe0Vrlr1iLrqTByB026YLPzTytDTz1DRCYM+JI92Dw2dbpRHYmdqn6VBnQ9Bor6J8ZlNwdZdxjlSg==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "freebsd"
+      ],
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/@esbuild/freebsd-x64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.19.3.tgz",
+      "integrity": "sha512-nXesBZ2Ad1qL+Rm3crN7NmEVJ5uvfLFPLJev3x1j3feCQXfAhoYrojC681RhpdOph8NsvKBBwpYZHR7W0ifTTA==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "freebsd"
+      ],
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/@esbuild/linux-arm": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.19.3.tgz",
+      "integrity": "sha512-zr48Cg/8zkzZCzDHNxXO/89bf9e+r4HtzNUPoz4GmgAkF1gFAFmfgOdCbR8zMbzFDGb1FqBBhdXUpcTQRYS1cQ==",
+      "cpu": [
+        "arm"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/@esbuild/linux-arm64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.19.3.tgz",
+      "integrity": "sha512-qXvYKmXj8GcJgWq3aGvxL/JG1ZM3UR272SdPU4QSTzD0eymrM7leiZH77pvY3UetCy0k1xuXZ+VPvoJNdtrsWQ==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/@esbuild/linux-ia32": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.19.3.tgz",
+      "integrity": "sha512-7XlCKCA0nWcbvYpusARWkFjRQNWNGlt45S+Q18UeS///K6Aw8bB2FKYe9mhVWy/XLShvCweOLZPrnMswIaDXQA==",
+      "cpu": [
+        "ia32"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/@esbuild/linux-loong64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.19.3.tgz",
+      "integrity": "sha512-qGTgjweER5xqweiWtUIDl9OKz338EQqCwbS9c2Bh5jgEH19xQ1yhgGPNesugmDFq+UUSDtWgZ264st26b3de8A==",
+      "cpu": [
+        "loong64"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/@esbuild/linux-mips64el": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.19.3.tgz",
+      "integrity": "sha512-gy1bFskwEyxVMFRNYSvBauDIWNggD6pyxUksc0MV9UOBD138dKTzr8XnM2R4mBsHwVzeuIH8X5JhmNs2Pzrx+A==",
+      "cpu": [
+        "mips64el"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/@esbuild/linux-ppc64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.19.3.tgz",
+      "integrity": "sha512-UrYLFu62x1MmmIe85rpR3qou92wB9lEXluwMB/STDzPF9k8mi/9UvNsG07Tt9AqwPQXluMQ6bZbTzYt01+Ue5g==",
+      "cpu": [
+        "ppc64"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/@esbuild/linux-riscv64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.19.3.tgz",
+      "integrity": "sha512-9E73TfyMCbE+1AwFOg3glnzZ5fBAFK4aawssvuMgCRqCYzE0ylVxxzjEfut8xjmKkR320BEoMui4o/t9KA96gA==",
+      "cpu": [
+        "riscv64"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/@esbuild/linux-s390x": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.19.3.tgz",
+      "integrity": "sha512-LlmsbuBdm1/D66TJ3HW6URY8wO6IlYHf+ChOUz8SUAjVTuaisfuwCOAgcxo3Zsu3BZGxmI7yt//yGOxV+lHcEA==",
+      "cpu": [
+        "s390x"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/@esbuild/linux-x64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.19.3.tgz",
+      "integrity": "sha512-ogV0+GwEmvwg/8ZbsyfkYGaLACBQWDvO0Kkh8LKBGKj9Ru8VM39zssrnu9Sxn1wbapA2qNS6BiLdwJZGouyCwQ==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/@esbuild/netbsd-x64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.19.3.tgz",
+      "integrity": "sha512-o1jLNe4uzQv2DKXMlmEzf66Wd8MoIhLNO2nlQBHLtWyh2MitDG7sMpfCO3NTcoTMuqHjfufgUQDFRI5C+xsXQw==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "netbsd"
+      ],
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/@esbuild/openbsd-x64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.19.3.tgz",
+      "integrity": "sha512-AZJCnr5CZgZOdhouLcfRdnk9Zv6HbaBxjcyhq0StNcvAdVZJSKIdOiPB9az2zc06ywl0ePYJz60CjdKsQacp5Q==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "openbsd"
+      ],
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/@esbuild/sunos-x64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.19.3.tgz",
+      "integrity": "sha512-Acsujgeqg9InR4glTRvLKGZ+1HMtDm94ehTIHKhJjFpgVzZG9/pIcWW/HA/DoMfEyXmANLDuDZ2sNrWcjq1lxw==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "sunos"
+      ],
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/@esbuild/win32-arm64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.19.3.tgz",
+      "integrity": "sha512-FSrAfjVVy7TifFgYgliiJOyYynhQmqgPj15pzLyJk8BUsnlWNwP/IAy6GAiB1LqtoivowRgidZsfpoYLZH586A==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/@esbuild/win32-ia32": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.19.3.tgz",
+      "integrity": "sha512-xTScXYi12xLOWZ/sc5RBmMN99BcXp/eEf7scUC0oeiRoiT5Vvo9AycuqCp+xdpDyAU+LkrCqEpUS9fCSZF8J3Q==",
+      "cpu": [
+        "ia32"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/@esbuild/win32-x64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.19.3.tgz",
+      "integrity": "sha512-FbUN+0ZRXsypPyWE2IwIkVjDkDnJoMJARWOcFZn4KPPli+QnKqF0z1anvfaYe3ev5HFCpRDLLBDHyOALLppWHw==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "win32"
+      ],
       "engines": {
-        "node": "^14 || ^16 || ^17 || ^18 || ^19"
+        "node": ">=12"
       }
     },
     "node_modules/@eslint-community/eslint-utils": {
-      "version": "4.2.0",
-      "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.2.0.tgz",
-      "integrity": "sha512-gB8T4H4DEfX2IV9zGDJPOBgP1e/DbfCPDTtEqUMckpvzS1OYtva8JdFYBqMwYk7xAQ429WGF/UPqn8uQ//h2vQ==",
+      "version": "4.4.0",
+      "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.4.0.tgz",
+      "integrity": "sha512-1/sA4dwrzBAyeUoQ6oxahHKmrZvsnLCg4RfxW3ZFGGmQkSNQPFNLV9CUEFQP1x9EYXHTo5p6xdhZM1Ne9p/AfA==",
       "dev": true,
       "dependencies": {
         "eslint-visitor-keys": "^3.3.0"
@@ -179,15 +526,24 @@
         "eslint": "^6.0.0 || ^7.0.0 || >=8.0.0"
       }
     },
+    "node_modules/@eslint-community/regexpp": {
+      "version": "4.9.1",
+      "resolved": "https://registry.npmjs.org/@eslint-community/regexpp/-/regexpp-4.9.1.tgz",
+      "integrity": "sha512-Y27x+MBLjXa+0JWDhykM3+JE+il3kHKAEqabfEWq3SDhZjLYb6/BHL/JKFnH3fe207JaXkyDo685Oc2Glt6ifA==",
+      "dev": true,
+      "engines": {
+        "node": "^12.0.0 || ^14.0.0 || >=16.0.0"
+      }
+    },
     "node_modules/@eslint/eslintrc": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-2.0.0.tgz",
-      "integrity": "sha512-fluIaaV+GyV24CCu/ggiHdV+j4RNh85yQnAYS/G2mZODZgGmmlrgCydjUcV3YvxCm9x8nMAfThsqTni4KiXT4A==",
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-2.1.2.tgz",
+      "integrity": "sha512-+wvgpDsrB1YqAMdEUCcnTlpfVBH7Vqn6A/NT3D8WVXFIaKMlErPIZT3oCIAVCOtarRpMtelZLqJeU3t7WY6X6g==",
       "dev": true,
       "dependencies": {
         "ajv": "^6.12.4",
         "debug": "^4.3.2",
-        "espree": "^9.4.0",
+        "espree": "^9.6.0",
         "globals": "^13.19.0",
         "ignore": "^5.2.0",
         "import-fresh": "^3.2.1",
@@ -203,18 +559,18 @@
       }
     },
     "node_modules/@eslint/js": {
-      "version": "8.35.0",
-      "resolved": "https://registry.npmjs.org/@eslint/js/-/js-8.35.0.tgz",
-      "integrity": "sha512-JXdzbRiWclLVoD8sNUjR443VVlYqiYmDVT6rGUEIEHU5YJW0gaVZwV2xgM7D4arkvASqD0IlLUVjHiFuxaftRw==",
+      "version": "8.51.0",
+      "resolved": "https://registry.npmjs.org/@eslint/js/-/js-8.51.0.tgz",
+      "integrity": "sha512-HxjQ8Qn+4SI3/AFv6sOrDB+g6PpUTDwSJiQqOrnneEk8L71161srI9gjzzZvYVbzHiVg/BvcH95+cK/zfIt4pg==",
       "dev": true,
       "engines": {
         "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
       }
     },
     "node_modules/@humanwhocodes/config-array": {
-      "version": "0.11.8",
-      "resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.11.8.tgz",
-      "integrity": "sha512-UybHIJzJnR5Qc/MsD9Kr+RpO2h+/P1GhOwdiLPXK5TWk5sgTdu88bTD9UP+CKbPPh5Rni1u0GjAdYQLemG8g+g==",
+      "version": "0.11.11",
+      "resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.11.11.tgz",
+      "integrity": "sha512-N2brEuAadi0CcdeMXUkhbZB84eskAc8MEX1By6qEchoVywSgXPIjou4rYsl0V3Hj0ZnuGycGCjdNgockbzeWNA==",
       "dev": true,
       "dependencies": {
         "@humanwhocodes/object-schema": "^1.2.1",
@@ -244,64 +600,12 @@
       "integrity": "sha512-ZnQMnLV4e7hDlUvw8H+U8ASL02SS2Gn6+9Ac3wGGLIe7+je2AeAOxPY+izIPJDfFDb7eDjev0Us8MO1iFRN8hA==",
       "dev": true
     },
-    "node_modules/@jridgewell/gen-mapping": {
-      "version": "0.3.2",
-      "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.2.tgz",
-      "integrity": "sha512-mh65xKQAzI6iBcFzwv28KVWSmCkdRBWoOh+bYQGW3+6OZvbbN3TqMGo5hqYxQniRcH9F2VZIoJCm4pa3BPDK/A==",
-      "dev": true,
-      "dependencies": {
-        "@jridgewell/set-array": "^1.0.1",
-        "@jridgewell/sourcemap-codec": "^1.4.10",
-        "@jridgewell/trace-mapping": "^0.3.9"
-      },
-      "engines": {
-        "node": ">=6.0.0"
-      }
-    },
-    "node_modules/@jridgewell/resolve-uri": {
-      "version": "3.1.0",
-      "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.0.tgz",
-      "integrity": "sha512-F2msla3tad+Mfht5cJq7LSXcdudKTWCVYUgw6pLFOOHSTtZlj6SWNYAp+AhuqLmWdBO2X5hPrLcu8cVP8fy28w==",
-      "dev": true,
-      "engines": {
-        "node": ">=6.0.0"
-      }
-    },
-    "node_modules/@jridgewell/set-array": {
-      "version": "1.1.2",
-      "resolved": "https://registry.npmjs.org/@jridgewell/set-array/-/set-array-1.1.2.tgz",
-      "integrity": "sha512-xnkseuNADM0gt2bs+BvhO0p78Mk762YnZdsuzFV018NoG1Sj1SCQvpSqa7XUaTam5vAGasABV9qXASMKnFMwMw==",
-      "dev": true,
-      "engines": {
-        "node": ">=6.0.0"
-      }
-    },
-    "node_modules/@jridgewell/source-map": {
-      "version": "0.3.2",
-      "resolved": "https://registry.npmjs.org/@jridgewell/source-map/-/source-map-0.3.2.tgz",
-      "integrity": "sha512-m7O9o2uR8k2ObDysZYzdfhb08VuEml5oWGiosa1VdaPZ/A6QyPkAJuwN0Q1lhULOf6B7MtQmHENS743hWtCrgw==",
-      "dev": true,
-      "dependencies": {
-        "@jridgewell/gen-mapping": "^0.3.0",
-        "@jridgewell/trace-mapping": "^0.3.9"
-      }
-    },
-    "node_modules/@jridgewell/sourcemap-codec": {
-      "version": "1.4.14",
-      "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.14.tgz",
-      "integrity": "sha512-XPSJHWmi394fuUuzDnGz1wiKqWfo1yXecHQMRf2l6hztTO+nPru658AyDngaBe7isIxEkRsPR3FZh+s7iVa4Uw==",
+    "node_modules/@jspm/core": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/@jspm/core/-/core-2.0.1.tgz",
+      "integrity": "sha512-Lg3PnLp0QXpxwLIAuuJboLeRaIhrgJjeuh797QADg3xz8wGLugQOS5DpsE8A6i6Adgzf+bacllkKZG3J0tGfDw==",
       "dev": true
     },
-    "node_modules/@jridgewell/trace-mapping": {
-      "version": "0.3.17",
-      "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.17.tgz",
-      "integrity": "sha512-MCNzAp77qzKca9+W/+I0+sEpaUnZoeasnghNeVc41VZCEKaCH73Vq3BZZ/SzWIgrqE4H4ceI+p+b6C0mHf9T4g==",
-      "dev": true,
-      "dependencies": {
-        "@jridgewell/resolve-uri": "3.1.0",
-        "@jridgewell/sourcemap-codec": "1.4.14"
-      }
-    },
     "node_modules/@nodelib/fs.scandir": {
       "version": "2.1.5",
       "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
@@ -337,42 +641,10 @@
         "node": ">= 8"
       }
     },
-    "node_modules/@polka/url": {
-      "version": "1.0.0-next.21",
-      "resolved": "https://registry.npmjs.org/@polka/url/-/url-1.0.0-next.21.tgz",
-      "integrity": "sha512-a5Sab1C4/icpTZVzZc5Ghpz88yQtGOyNqYXcZgOssB2uuAr+wF/MvN6bgtW32q7HHrvBki+BsZ0OuNv6EV3K9g==",
-      "dev": true
-    },
-    "node_modules/@types/eslint": {
-      "version": "8.21.1",
-      "resolved": "https://registry.npmjs.org/@types/eslint/-/eslint-8.21.1.tgz",
-      "integrity": "sha512-rc9K8ZpVjNcLs8Fp0dkozd5Pt2Apk1glO4Vgz8ix1u6yFByxfqo5Yavpy65o+93TAe24jr7v+eSBtFLvOQtCRQ==",
-      "dev": true,
-      "dependencies": {
-        "@types/estree": "*",
-        "@types/json-schema": "*"
-      }
-    },
-    "node_modules/@types/eslint-scope": {
-      "version": "3.7.4",
-      "resolved": "https://registry.npmjs.org/@types/eslint-scope/-/eslint-scope-3.7.4.tgz",
-      "integrity": "sha512-9K4zoImiZc3HlIp6AVUDE4CWYx22a+lhSZMYNpbjW04+YF0KWj4pJXnEMjdnFTiQibFFmElcsasJXDbdI/EPhA==",
-      "dev": true,
-      "dependencies": {
-        "@types/eslint": "*",
-        "@types/estree": "*"
-      }
-    },
-    "node_modules/@types/estree": {
-      "version": "0.0.51",
-      "resolved": "https://registry.npmjs.org/@types/estree/-/estree-0.0.51.tgz",
-      "integrity": "sha512-CuPgU6f3eT/XgKKPqKd/gLZV1Xmvf1a2R5POBOGQa6uv82xpls89HU5zKeVoyR8XzHd1RGNOlQlvUe3CFkjWNQ==",
-      "dev": true
-    },
     "node_modules/@types/fs-extra": {
-      "version": "11.0.1",
-      "resolved": "https://registry.npmjs.org/@types/fs-extra/-/fs-extra-11.0.1.tgz",
-      "integrity": "sha512-MxObHvNl4A69ofaTRU8DFqvgzzv8s9yRtaPPm5gud9HDNvpB3GPQFvNuTWAI59B9huVGV5jXYJwbCsmBsOGYWA==",
+      "version": "11.0.2",
+      "resolved": "https://registry.npmjs.org/@types/fs-extra/-/fs-extra-11.0.2.tgz",
+      "integrity": "sha512-c0hrgAOVYr21EX8J0jBMXGLMgJqVf/v6yxi0dLaJboW9aQPh16Id+z6w2Tx1hm+piJOLv8xPfVKZCLfjPw/IMQ==",
       "dev": true,
       "dependencies": {
         "@types/jsonfile": "*",
@@ -380,9 +652,9 @@
       }
     },
     "node_modules/@types/json-schema": {
-      "version": "7.0.11",
-      "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.11.tgz",
-      "integrity": "sha512-wOuvG1SN4Us4rez+tylwwwCV1psiNVOkJeM3AUWUNWg/jDQY2+HE/444y5gc+jBmRqASOm2Oeh5c1axHobwRKQ==",
+      "version": "7.0.13",
+      "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.13.tgz",
+      "integrity": "sha512-RbSSoHliUbnXj3ny0CNFOoxrIDV6SUGyStHsvDqosw6CkdPV8TtWGlfecuK4ToyMEAql6pzNxgCFKanovUzlgQ==",
       "dev": true
     },
     "node_modules/@types/json5": {
@@ -401,9 +673,9 @@
       }
     },
     "node_modules/@types/mocha": {
-      "version": "10.0.1",
-      "resolved": "https://registry.npmjs.org/@types/mocha/-/mocha-10.0.1.tgz",
-      "integrity": "sha512-/fvYntiO1GeICvqbQ3doGDIP97vWmvFt83GKguJ6prmQM2iXZfFcq6YE8KteFyRtX2/h5Hf91BYvPodJKFYv5Q==",
+      "version": "10.0.2",
+      "resolved": "https://registry.npmjs.org/@types/mocha/-/mocha-10.0.2.tgz",
+      "integrity": "sha512-NaHL0+0lLNhX6d9rs+NSt97WH/gIlRHmszXbQ/8/MV/eVcFNdeJ/GYhrFuUc8K7WuPhRhTSdMkCp8VMzhUq85w==",
       "dev": true
     },
     "node_modules/@types/node": {
@@ -425,38 +697,39 @@
       "dev": true
     },
     "node_modules/@types/semver": {
-      "version": "7.3.13",
-      "resolved": "https://registry.npmjs.org/@types/semver/-/semver-7.3.13.tgz",
-      "integrity": "sha512-21cFJr9z3g5dW8B0CVI9g2O9beqaThGQ6ZFBqHfwhzLDKUxaqTIy3vnfah/UPkfOiF2pLq+tGz+W8RyCskuslw==",
+      "version": "7.5.3",
+      "resolved": "https://registry.npmjs.org/@types/semver/-/semver-7.5.3.tgz",
+      "integrity": "sha512-OxepLK9EuNEIPxWNME+C6WwbRAOOI2o2BaQEGzz5Lu2e4Z5eDnEo+/aVEDMIXywoJitJ7xWd641wrGLZdtwRyw==",
       "dev": true
     },
     "node_modules/@typescript-eslint/eslint-plugin": {
-      "version": "5.54.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-5.54.1.tgz",
-      "integrity": "sha512-a2RQAkosH3d3ZIV08s3DcL/mcGc2M/UC528VkPULFxR9VnVPT8pBu0IyBAJJmVsCmhVfwQX1v6q+QGnmSe1bew==",
+      "version": "6.7.4",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-6.7.4.tgz",
+      "integrity": "sha512-DAbgDXwtX+pDkAHwiGhqP3zWUGpW49B7eqmgpPtg+BKJXwdct79ut9+ifqOFPJGClGKSHXn2PTBatCnldJRUoA==",
       "dev": true,
       "dependencies": {
-        "@typescript-eslint/scope-manager": "5.54.1",
-        "@typescript-eslint/type-utils": "5.54.1",
-        "@typescript-eslint/utils": "5.54.1",
+        "@eslint-community/regexpp": "^4.5.1",
+        "@typescript-eslint/scope-manager": "6.7.4",
+        "@typescript-eslint/type-utils": "6.7.4",
+        "@typescript-eslint/utils": "6.7.4",
+        "@typescript-eslint/visitor-keys": "6.7.4",
         "debug": "^4.3.4",
-        "grapheme-splitter": "^1.0.4",
-        "ignore": "^5.2.0",
-        "natural-compare-lite": "^1.4.0",
-        "regexpp": "^3.2.0",
-        "semver": "^7.3.7",
-        "tsutils": "^3.21.0"
+        "graphemer": "^1.4.0",
+        "ignore": "^5.2.4",
+        "natural-compare": "^1.4.0",
+        "semver": "^7.5.4",
+        "ts-api-utils": "^1.0.1"
       },
       "engines": {
-        "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
+        "node": "^16.0.0 || >=18.0.0"
       },
       "funding": {
         "type": "opencollective",
         "url": "https://opencollective.com/typescript-eslint"
       },
       "peerDependencies": {
-        "@typescript-eslint/parser": "^5.0.0",
-        "eslint": "^6.0.0 || ^7.0.0 || ^8.0.0"
+        "@typescript-eslint/parser": "^6.0.0 || ^6.0.0-alpha",
+        "eslint": "^7.0.0 || ^8.0.0"
       },
       "peerDependenciesMeta": {
         "typescript": {
@@ -465,25 +738,26 @@
       }
     },
     "node_modules/@typescript-eslint/parser": {
-      "version": "5.54.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-5.54.1.tgz",
-      "integrity": "sha512-8zaIXJp/nG9Ff9vQNh7TI+C3nA6q6iIsGJ4B4L6MhZ7mHnTMR4YP5vp2xydmFXIy8rpyIVbNAG44871LMt6ujg==",
+      "version": "6.7.4",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-6.7.4.tgz",
+      "integrity": "sha512-I5zVZFY+cw4IMZUeNCU7Sh2PO5O57F7Lr0uyhgCJmhN/BuTlnc55KxPonR4+EM3GBdfiCyGZye6DgMjtubQkmA==",
       "dev": true,
       "dependencies": {
-        "@typescript-eslint/scope-manager": "5.54.1",
-        "@typescript-eslint/types": "5.54.1",
-        "@typescript-eslint/typescript-estree": "5.54.1",
+        "@typescript-eslint/scope-manager": "6.7.4",
+        "@typescript-eslint/types": "6.7.4",
+        "@typescript-eslint/typescript-estree": "6.7.4",
+        "@typescript-eslint/visitor-keys": "6.7.4",
         "debug": "^4.3.4"
       },
       "engines": {
-        "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
+        "node": "^16.0.0 || >=18.0.0"
       },
       "funding": {
         "type": "opencollective",
         "url": "https://opencollective.com/typescript-eslint"
       },
       "peerDependencies": {
-        "eslint": "^6.0.0 || ^7.0.0 || ^8.0.0"
+        "eslint": "^7.0.0 || ^8.0.0"
       },
       "peerDependenciesMeta": {
         "typescript": {
@@ -492,16 +766,16 @@
       }
     },
     "node_modules/@typescript-eslint/scope-manager": {
-      "version": "5.54.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-5.54.1.tgz",
-      "integrity": "sha512-zWKuGliXxvuxyM71UA/EcPxaviw39dB2504LqAmFDjmkpO8qNLHcmzlh6pbHs1h/7YQ9bnsO8CCcYCSA8sykUg==",
+      "version": "6.7.4",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-6.7.4.tgz",
+      "integrity": "sha512-SdGqSLUPTXAXi7c3Ob7peAGVnmMoGzZ361VswK2Mqf8UOYcODiYvs8rs5ILqEdfvX1lE7wEZbLyELCW+Yrql1A==",
       "dev": true,
       "dependencies": {
-        "@typescript-eslint/types": "5.54.1",
-        "@typescript-eslint/visitor-keys": "5.54.1"
+        "@typescript-eslint/types": "6.7.4",
+        "@typescript-eslint/visitor-keys": "6.7.4"
       },
       "engines": {
-        "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
+        "node": "^16.0.0 || >=18.0.0"
       },
       "funding": {
         "type": "opencollective",
@@ -509,25 +783,25 @@
       }
     },
     "node_modules/@typescript-eslint/type-utils": {
-      "version": "5.54.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-5.54.1.tgz",
-      "integrity": "sha512-WREHsTz0GqVYLIbzIZYbmUUr95DKEKIXZNH57W3s+4bVnuF1TKe2jH8ZNH8rO1CeMY3U4j4UQeqPNkHMiGem3g==",
+      "version": "6.7.4",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-6.7.4.tgz",
+      "integrity": "sha512-n+g3zi1QzpcAdHFP9KQF+rEFxMb2KxtnJGID3teA/nxKHOVi3ylKovaqEzGBbVY2pBttU6z85gp0D00ufLzViQ==",
       "dev": true,
       "dependencies": {
-        "@typescript-eslint/typescript-estree": "5.54.1",
-        "@typescript-eslint/utils": "5.54.1",
+        "@typescript-eslint/typescript-estree": "6.7.4",
+        "@typescript-eslint/utils": "6.7.4",
         "debug": "^4.3.4",
-        "tsutils": "^3.21.0"
+        "ts-api-utils": "^1.0.1"
       },
       "engines": {
-        "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
+        "node": "^16.0.0 || >=18.0.0"
       },
       "funding": {
         "type": "opencollective",
         "url": "https://opencollective.com/typescript-eslint"
       },
       "peerDependencies": {
-        "eslint": "*"
+        "eslint": "^7.0.0 || ^8.0.0"
       },
       "peerDependenciesMeta": {
         "typescript": {
@@ -536,12 +810,12 @@
       }
     },
     "node_modules/@typescript-eslint/types": {
-      "version": "5.54.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-5.54.1.tgz",
-      "integrity": "sha512-G9+1vVazrfAfbtmCapJX8jRo2E4MDXxgm/IMOF4oGh3kq7XuK3JRkOg6y2Qu1VsTRmWETyTkWt1wxy7X7/yLkw==",
+      "version": "6.7.4",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-6.7.4.tgz",
+      "integrity": "sha512-o9XWK2FLW6eSS/0r/tgjAGsYasLAnOWg7hvZ/dGYSSNjCh+49k5ocPN8OmG5aZcSJ8pclSOyVKP2x03Sj+RrCA==",
       "dev": true,
       "engines": {
-        "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
+        "node": "^16.0.0 || >=18.0.0"
       },
       "funding": {
         "type": "opencollective",
@@ -549,21 +823,21 @@
       }
     },
     "node_modules/@typescript-eslint/typescript-estree": {
-      "version": "5.54.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-5.54.1.tgz",
-      "integrity": "sha512-bjK5t+S6ffHnVwA0qRPTZrxKSaFYocwFIkZx5k7pvWfsB1I57pO/0M0Skatzzw1sCkjJ83AfGTL0oFIFiDX3bg==",
+      "version": "6.7.4",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-6.7.4.tgz",
+      "integrity": "sha512-ty8b5qHKatlNYd9vmpHooQz3Vki3gG+3PchmtsA4TgrZBKWHNjWfkQid7K7xQogBqqc7/BhGazxMD5vr6Ha+iQ==",
       "dev": true,
       "dependencies": {
-        "@typescript-eslint/types": "5.54.1",
-        "@typescript-eslint/visitor-keys": "5.54.1",
+        "@typescript-eslint/types": "6.7.4",
+        "@typescript-eslint/visitor-keys": "6.7.4",
         "debug": "^4.3.4",
         "globby": "^11.1.0",
         "is-glob": "^4.0.3",
-        "semver": "^7.3.7",
-        "tsutils": "^3.21.0"
+        "semver": "^7.5.4",
+        "ts-api-utils": "^1.0.1"
       },
       "engines": {
-        "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
+        "node": "^16.0.0 || >=18.0.0"
       },
       "funding": {
         "type": "opencollective",
@@ -576,250 +850,47 @@
       }
     },
     "node_modules/@typescript-eslint/utils": {
-      "version": "5.54.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-5.54.1.tgz",
-      "integrity": "sha512-IY5dyQM8XD1zfDe5X8jegX6r2EVU5o/WJnLu/znLPWCBF7KNGC+adacXnt5jEYS9JixDcoccI6CvE4RCjHMzCQ==",
+      "version": "6.7.4",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-6.7.4.tgz",
+      "integrity": "sha512-PRQAs+HUn85Qdk+khAxsVV+oULy3VkbH3hQ8hxLRJXWBEd7iI+GbQxH5SEUSH7kbEoTp6oT1bOwyga24ELALTA==",
       "dev": true,
       "dependencies": {
-        "@types/json-schema": "^7.0.9",
-        "@types/semver": "^7.3.12",
-        "@typescript-eslint/scope-manager": "5.54.1",
-        "@typescript-eslint/types": "5.54.1",
-        "@typescript-eslint/typescript-estree": "5.54.1",
-        "eslint-scope": "^5.1.1",
-        "eslint-utils": "^3.0.0",
-        "semver": "^7.3.7"
+        "@eslint-community/eslint-utils": "^4.4.0",
+        "@types/json-schema": "^7.0.12",
+        "@types/semver": "^7.5.0",
+        "@typescript-eslint/scope-manager": "6.7.4",
+        "@typescript-eslint/types": "6.7.4",
+        "@typescript-eslint/typescript-estree": "6.7.4",
+        "semver": "^7.5.4"
       },
       "engines": {
-        "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
+        "node": "^16.0.0 || >=18.0.0"
       },
       "funding": {
         "type": "opencollective",
         "url": "https://opencollective.com/typescript-eslint"
       },
       "peerDependencies": {
-        "eslint": "^6.0.0 || ^7.0.0 || ^8.0.0"
+        "eslint": "^7.0.0 || ^8.0.0"
       }
     },
     "node_modules/@typescript-eslint/visitor-keys": {
-      "version": "5.54.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-5.54.1.tgz",
-      "integrity": "sha512-q8iSoHTgwCfgcRJ2l2x+xCbu8nBlRAlsQ33k24Adj8eoVBE0f8dUeI+bAa8F84Mv05UGbAx57g2zrRsYIooqQg==",
+      "version": "6.7.4",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-6.7.4.tgz",
+      "integrity": "sha512-pOW37DUhlTZbvph50x5zZCkFn3xzwkGtNoJHzIM3svpiSkJzwOYr/kVBaXmf+RAQiUDs1AHEZVNPg6UJCJpwRA==",
       "dev": true,
       "dependencies": {
-        "@typescript-eslint/types": "5.54.1",
-        "eslint-visitor-keys": "^3.3.0"
+        "@typescript-eslint/types": "6.7.4",
+        "eslint-visitor-keys": "^3.4.1"
       },
       "engines": {
-        "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
+        "node": "^16.0.0 || >=18.0.0"
       },
       "funding": {
         "type": "opencollective",
         "url": "https://opencollective.com/typescript-eslint"
       }
     },
-    "node_modules/@webassemblyjs/ast": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/ast/-/ast-1.11.1.tgz",
-      "integrity": "sha512-ukBh14qFLjxTQNTXocdyksN5QdM28S1CxHt2rdskFyL+xFV7VremuBLVbmCePj+URalXBENx/9Lm7lnhihtCSw==",
-      "dev": true,
-      "dependencies": {
-        "@webassemblyjs/helper-numbers": "1.11.1",
-        "@webassemblyjs/helper-wasm-bytecode": "1.11.1"
-      }
-    },
-    "node_modules/@webassemblyjs/floating-point-hex-parser": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/floating-point-hex-parser/-/floating-point-hex-parser-1.11.1.tgz",
-      "integrity": "sha512-iGRfyc5Bq+NnNuX8b5hwBrRjzf0ocrJPI6GWFodBFzmFnyvrQ83SHKhmilCU/8Jv67i4GJZBMhEzltxzcNagtQ==",
-      "dev": true
-    },
-    "node_modules/@webassemblyjs/helper-api-error": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-api-error/-/helper-api-error-1.11.1.tgz",
-      "integrity": "sha512-RlhS8CBCXfRUR/cwo2ho9bkheSXG0+NwooXcc3PAILALf2QLdFyj7KGsKRbVc95hZnhnERon4kW/D3SZpp6Tcg==",
-      "dev": true
-    },
-    "node_modules/@webassemblyjs/helper-buffer": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-buffer/-/helper-buffer-1.11.1.tgz",
-      "integrity": "sha512-gwikF65aDNeeXa8JxXa2BAk+REjSyhrNC9ZwdT0f8jc4dQQeDQ7G4m0f2QCLPJiMTTO6wfDmRmj/pW0PsUvIcA==",
-      "dev": true
-    },
-    "node_modules/@webassemblyjs/helper-numbers": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-numbers/-/helper-numbers-1.11.1.tgz",
-      "integrity": "sha512-vDkbxiB8zfnPdNK9Rajcey5C0w+QJugEglN0of+kmO8l7lDb77AnlKYQF7aarZuCrv+l0UvqL+68gSDr3k9LPQ==",
-      "dev": true,
-      "dependencies": {
-        "@webassemblyjs/floating-point-hex-parser": "1.11.1",
-        "@webassemblyjs/helper-api-error": "1.11.1",
-        "@xtuc/long": "4.2.2"
-      }
-    },
-    "node_modules/@webassemblyjs/helper-wasm-bytecode": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-bytecode/-/helper-wasm-bytecode-1.11.1.tgz",
-      "integrity": "sha512-PvpoOGiJwXeTrSf/qfudJhwlvDQxFgelbMqtq52WWiXC6Xgg1IREdngmPN3bs4RoO83PnL/nFrxucXj1+BX62Q==",
-      "dev": true
-    },
-    "node_modules/@webassemblyjs/helper-wasm-section": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-section/-/helper-wasm-section-1.11.1.tgz",
-      "integrity": "sha512-10P9No29rYX1j7F3EVPX3JvGPQPae+AomuSTPiF9eBQeChHI6iqjMIwR9JmOJXwpnn/oVGDk7I5IlskuMwU/pg==",
-      "dev": true,
-      "dependencies": {
-        "@webassemblyjs/ast": "1.11.1",
-        "@webassemblyjs/helper-buffer": "1.11.1",
-        "@webassemblyjs/helper-wasm-bytecode": "1.11.1",
-        "@webassemblyjs/wasm-gen": "1.11.1"
-      }
-    },
-    "node_modules/@webassemblyjs/ieee754": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/ieee754/-/ieee754-1.11.1.tgz",
-      "integrity": "sha512-hJ87QIPtAMKbFq6CGTkZYJivEwZDbQUgYd3qKSadTNOhVY7p+gfP6Sr0lLRVTaG1JjFj+r3YchoqRYxNH3M0GQ==",
-      "dev": true,
-      "dependencies": {
-        "@xtuc/ieee754": "^1.2.0"
-      }
-    },
-    "node_modules/@webassemblyjs/leb128": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/leb128/-/leb128-1.11.1.tgz",
-      "integrity": "sha512-BJ2P0hNZ0u+Th1YZXJpzW6miwqQUGcIHT1G/sf72gLVD9DZ5AdYTqPNbHZh6K1M5VmKvFXwGSWZADz+qBWxeRw==",
-      "dev": true,
-      "dependencies": {
-        "@xtuc/long": "4.2.2"
-      }
-    },
-    "node_modules/@webassemblyjs/utf8": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/utf8/-/utf8-1.11.1.tgz",
-      "integrity": "sha512-9kqcxAEdMhiwQkHpkNiorZzqpGrodQQ2IGrHHxCy+Ozng0ofyMA0lTqiLkVs1uzTRejX+/O0EOT7KxqVPuXosQ==",
-      "dev": true
-    },
-    "node_modules/@webassemblyjs/wasm-edit": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-edit/-/wasm-edit-1.11.1.tgz",
-      "integrity": "sha512-g+RsupUC1aTHfR8CDgnsVRVZFJqdkFHpsHMfJuWQzWU3tvnLC07UqHICfP+4XyL2tnr1amvl1Sdp06TnYCmVkA==",
-      "dev": true,
-      "dependencies": {
-        "@webassemblyjs/ast": "1.11.1",
-        "@webassemblyjs/helper-buffer": "1.11.1",
-        "@webassemblyjs/helper-wasm-bytecode": "1.11.1",
-        "@webassemblyjs/helper-wasm-section": "1.11.1",
-        "@webassemblyjs/wasm-gen": "1.11.1",
-        "@webassemblyjs/wasm-opt": "1.11.1",
-        "@webassemblyjs/wasm-parser": "1.11.1",
-        "@webassemblyjs/wast-printer": "1.11.1"
-      }
-    },
-    "node_modules/@webassemblyjs/wasm-gen": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-gen/-/wasm-gen-1.11.1.tgz",
-      "integrity": "sha512-F7QqKXwwNlMmsulj6+O7r4mmtAlCWfO/0HdgOxSklZfQcDu0TpLiD1mRt/zF25Bk59FIjEuGAIyn5ei4yMfLhA==",
-      "dev": true,
-      "dependencies": {
-        "@webassemblyjs/ast": "1.11.1",
-        "@webassemblyjs/helper-wasm-bytecode": "1.11.1",
-        "@webassemblyjs/ieee754": "1.11.1",
-        "@webassemblyjs/leb128": "1.11.1",
-        "@webassemblyjs/utf8": "1.11.1"
-      }
-    },
-    "node_modules/@webassemblyjs/wasm-opt": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-opt/-/wasm-opt-1.11.1.tgz",
-      "integrity": "sha512-VqnkNqnZlU5EB64pp1l7hdm3hmQw7Vgqa0KF/KCNO9sIpI6Fk6brDEiX+iCOYrvMuBWDws0NkTOxYEb85XQHHw==",
-      "dev": true,
-      "dependencies": {
-        "@webassemblyjs/ast": "1.11.1",
-        "@webassemblyjs/helper-buffer": "1.11.1",
-        "@webassemblyjs/wasm-gen": "1.11.1",
-        "@webassemblyjs/wasm-parser": "1.11.1"
-      }
-    },
-    "node_modules/@webassemblyjs/wasm-parser": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-parser/-/wasm-parser-1.11.1.tgz",
-      "integrity": "sha512-rrBujw+dJu32gYB7/Lup6UhdkPx9S9SnobZzRVL7VcBH9Bt9bCBLEuX/YXOOtBsOZ4NQrRykKhffRWHvigQvOA==",
-      "dev": true,
-      "dependencies": {
-        "@webassemblyjs/ast": "1.11.1",
-        "@webassemblyjs/helper-api-error": "1.11.1",
-        "@webassemblyjs/helper-wasm-bytecode": "1.11.1",
-        "@webassemblyjs/ieee754": "1.11.1",
-        "@webassemblyjs/leb128": "1.11.1",
-        "@webassemblyjs/utf8": "1.11.1"
-      }
-    },
-    "node_modules/@webassemblyjs/wast-printer": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wast-printer/-/wast-printer-1.11.1.tgz",
-      "integrity": "sha512-IQboUWM4eKzWW+N/jij2sRatKMh99QEelo3Eb2q0qXkvPRISAj8Qxtmw5itwqK+TTkBuUIE45AxYPToqPtL5gg==",
-      "dev": true,
-      "dependencies": {
-        "@webassemblyjs/ast": "1.11.1",
-        "@xtuc/long": "4.2.2"
-      }
-    },
-    "node_modules/@webpack-cli/configtest": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/@webpack-cli/configtest/-/configtest-2.0.1.tgz",
-      "integrity": "sha512-njsdJXJSiS2iNbQVS0eT8A/KPnmyH4pv1APj2K0d1wrZcBLw+yppxOy4CGqa0OxDJkzfL/XELDhD8rocnIwB5A==",
-      "dev": true,
-      "engines": {
-        "node": ">=14.15.0"
-      },
-      "peerDependencies": {
-        "webpack": "5.x.x",
-        "webpack-cli": "5.x.x"
-      }
-    },
-    "node_modules/@webpack-cli/info": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/@webpack-cli/info/-/info-2.0.1.tgz",
-      "integrity": "sha512-fE1UEWTwsAxRhrJNikE7v4EotYflkEhBL7EbajfkPlf6E37/2QshOy/D48Mw8G5XMFlQtS6YV42vtbG9zBpIQA==",
-      "dev": true,
-      "engines": {
-        "node": ">=14.15.0"
-      },
-      "peerDependencies": {
-        "webpack": "5.x.x",
-        "webpack-cli": "5.x.x"
-      }
-    },
-    "node_modules/@webpack-cli/serve": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/@webpack-cli/serve/-/serve-2.0.1.tgz",
-      "integrity": "sha512-0G7tNyS+yW8TdgHwZKlDWYXFA6OJQnoLCQvYKkQP0Q2X205PSQ6RNUj0M+1OB/9gRQaUZ/ccYfaxd0nhaWKfjw==",
-      "dev": true,
-      "engines": {
-        "node": ">=14.15.0"
-      },
-      "peerDependencies": {
-        "webpack": "5.x.x",
-        "webpack-cli": "5.x.x"
-      },
-      "peerDependenciesMeta": {
-        "webpack-dev-server": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/@xtuc/ieee754": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/@xtuc/ieee754/-/ieee754-1.2.0.tgz",
-      "integrity": "sha512-DX8nKgqcGwsc0eJSqYt5lwP4DH5FlHnmuWWBRy7X0NcaGR0ZtuyeESgMwTYVEtxmsNGY+qit4QYT/MIYTOTPeA==",
-      "dev": true
-    },
-    "node_modules/@xtuc/long": {
-      "version": "4.2.2",
-      "resolved": "https://registry.npmjs.org/@xtuc/long/-/long-4.2.2.tgz",
-      "integrity": "sha512-NuHqBY1PB/D8xU6s/thBgOAiAP7HOYDQ32+BFZILJ8ivkUkAHQnWfn6WhL79Owj1qmUnoN/YPhktdIoucipkAQ==",
-      "dev": true
-    },
     "node_modules/abort-controller": {
       "version": "3.0.0",
       "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz",
@@ -833,9 +904,9 @@
       }
     },
     "node_modules/acorn": {
-      "version": "8.8.2",
-      "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.8.2.tgz",
-      "integrity": "sha512-xjIYgE8HBrkpd/sJqOGNspf8uHG+NOHGOw6a/Urj8taM2EXfdNAH2oFcPeIFfsv3+kz/mJrS5VuMqbNLjCa2vw==",
+      "version": "8.10.0",
+      "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.10.0.tgz",
+      "integrity": "sha512-F0SAmZ8iUtS//m8DmCTA0jlh6TDKkHQyK6xc6V4KDTyZKA9dnvX9/3sRTVQrWm79glUAZbnmmNcdYwUIHWVybw==",
       "dev": true,
       "bin": {
         "acorn": "bin/acorn"
@@ -844,15 +915,6 @@
         "node": ">=0.4.0"
       }
     },
-    "node_modules/acorn-import-assertions": {
-      "version": "1.8.0",
-      "resolved": "https://registry.npmjs.org/acorn-import-assertions/-/acorn-import-assertions-1.8.0.tgz",
-      "integrity": "sha512-m7VZ3jwz4eK6A4Vtt8Ew1/mNbP24u0FhdyfA7fSvnJR6LMdfOYnmuIrrJAgrYfYJ10F/otaHTtrtrtmHdMNzEw==",
-      "dev": true,
-      "peerDependencies": {
-        "acorn": "^8"
-      }
-    },
     "node_modules/acorn-jsx": {
       "version": "5.3.2",
       "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.2.tgz",
@@ -862,15 +924,6 @@
         "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0"
       }
     },
-    "node_modules/acorn-walk": {
-      "version": "8.2.0",
-      "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.2.0.tgz",
-      "integrity": "sha512-k+iyHEuPgSw6SbuDpGQM+06HQUa04DZ3o+F6CSzXMvvI5KMvnaEqXe+YVe555R9nn6GPt404fos4wcgpw12SDA==",
-      "dev": true,
-      "engines": {
-        "node": ">=0.4.0"
-      }
-    },
     "node_modules/ajv": {
       "version": "6.12.6",
       "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
@@ -887,15 +940,6 @@
         "url": "https://github.com/sponsors/epoberezkin"
       }
     },
-    "node_modules/ajv-keywords": {
-      "version": "3.5.2",
-      "resolved": "https://registry.npmjs.org/ajv-keywords/-/ajv-keywords-3.5.2.tgz",
-      "integrity": "sha512-5p6WTN0DdTGVQk6VjcEju19IgaHudalcfabD7yhDGeA6bcQnmL+CpveLJq/3hvfwd1aof6L386Ougkx6RfyMIQ==",
-      "dev": true,
-      "peerDependencies": {
-        "ajv": "^6.9.1"
-      }
-    },
     "node_modules/ansi-colors": {
       "version": "4.1.1",
       "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.1.tgz",
@@ -948,6 +992,15 @@
       "integrity": "sha512-lYe4Gx7QT+MKGbDsA+Z+he/Wtef0BiwDOlK/XkBrdfsh9J/jPPXbX0tE9x9cl27Tmu5gg3QUbUrQYa/y+KOHPQ==",
       "dev": true
     },
+    "node_modules/are-docs-informative": {
+      "version": "0.0.2",
+      "resolved": "https://registry.npmjs.org/are-docs-informative/-/are-docs-informative-0.0.2.tgz",
+      "integrity": "sha512-ixiS0nLNNG5jNQzgZJNoUpBKdo9yTYZMGJ+QgT2jmjR7G7+QHRCc4v6LQ3NgE7EBJq+o0ams3waJwkrlBom8Ig==",
+      "dev": true,
+      "engines": {
+        "node": ">=14"
+      }
+    },
     "node_modules/are-we-there-yet": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/are-we-there-yet/-/are-we-there-yet-4.0.0.tgz",
@@ -982,6 +1035,19 @@
       "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==",
       "dev": true
     },
+    "node_modules/array-buffer-byte-length": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/array-buffer-byte-length/-/array-buffer-byte-length-1.0.0.tgz",
+      "integrity": "sha512-LPuwb2P+NrQw3XhxGc36+XSvuBPopovXYTR9Ew++Du9Yb/bx5AzBfrIsBoj0EZUifjQU+sHL21sseZ3jerWO/A==",
+      "dev": true,
+      "dependencies": {
+        "call-bind": "^1.0.2",
+        "is-array-buffer": "^3.0.1"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
     "node_modules/array-includes": {
       "version": "3.1.6",
       "resolved": "https://registry.npmjs.org/array-includes/-/array-includes-3.1.6.tgz",
@@ -1010,6 +1076,25 @@
         "node": ">=8"
       }
     },
+    "node_modules/array.prototype.findlastindex": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/array.prototype.findlastindex/-/array.prototype.findlastindex-1.2.3.tgz",
+      "integrity": "sha512-LzLoiOMAxvy+Gd3BAq3B7VeIgPdo+Q8hthvKtXybMvRV0jrXfJM/t8mw7nNlpEcVlVUnCnM2KSX4XU5HmpodOA==",
+      "dev": true,
+      "dependencies": {
+        "call-bind": "^1.0.2",
+        "define-properties": "^1.2.0",
+        "es-abstract": "^1.22.1",
+        "es-shim-unscopables": "^1.0.0",
+        "get-intrinsic": "^1.2.1"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
     "node_modules/array.prototype.flat": {
       "version": "1.3.1",
       "resolved": "https://registry.npmjs.org/array.prototype.flat/-/array.prototype.flat-1.3.1.tgz",
@@ -1046,34 +1131,25 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
-    "node_modules/asn1.js": {
-      "version": "5.4.1",
-      "resolved": "https://registry.npmjs.org/asn1.js/-/asn1.js-5.4.1.tgz",
-      "integrity": "sha512-+I//4cYPccV8LdmBLiX8CYvf9Sp3vQsrqu2QNXRcrbiWvcx/UdlFiqUJJzxRQxgsZmvhXhn4cSKeSmoFjVdupA==",
-      "dev": true,
-      "dependencies": {
-        "bn.js": "^4.0.0",
-        "inherits": "^2.0.1",
-        "minimalistic-assert": "^1.0.0",
-        "safer-buffer": "^2.1.0"
-      }
-    },
-    "node_modules/asn1.js/node_modules/bn.js": {
-      "version": "4.12.0",
-      "resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.0.tgz",
-      "integrity": "sha512-c98Bf3tPniI+scsdk237ku1Dc3ujXQTSgyiPUDEOe7tRkhrqridvh8klBv0HCEso1OLOYcHuCv/cS6DNxKH+ZA==",
-      "dev": true
-    },
-    "node_modules/assert": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/assert/-/assert-2.0.0.tgz",
-      "integrity": "sha512-se5Cd+js9dXJnu6Ag2JFc00t+HmHOen+8Q+L7O9zI0PqQXr20uk2J0XQqMxZEeo5U50o8Nvmmx7dZrl+Ufr35A==",
+    "node_modules/arraybuffer.prototype.slice": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/arraybuffer.prototype.slice/-/arraybuffer.prototype.slice-1.0.2.tgz",
+      "integrity": "sha512-yMBKppFur/fbHu9/6USUe03bZ4knMYiwFBcyiaXB8Go0qNehwX6inYPzK9U0NeQvGxKthcmHcaR8P5MStSRBAw==",
       "dev": true,
       "dependencies": {
-        "es6-object-assign": "^1.1.0",
-        "is-nan": "^1.2.1",
-        "object-is": "^1.0.1",
-        "util": "^0.12.0"
+        "array-buffer-byte-length": "^1.0.0",
+        "call-bind": "^1.0.2",
+        "define-properties": "^1.2.0",
+        "es-abstract": "^1.22.1",
+        "get-intrinsic": "^1.2.1",
+        "is-array-buffer": "^3.0.2",
+        "is-shared-array-buffer": "^1.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
       }
     },
     "node_modules/async": {
@@ -1120,15 +1196,6 @@
         }
       ]
     },
-    "node_modules/big.js": {
-      "version": "5.2.2",
-      "resolved": "https://registry.npmjs.org/big.js/-/big.js-5.2.2.tgz",
-      "integrity": "sha512-vyL2OymJxmarO8gxMr0mhChsO9QGwhynfuu4+MHTAW6czfq9humCB7rKpUjDd9YUiDPU4mzpyupFSvOClAwbmQ==",
-      "dev": true,
-      "engines": {
-        "node": "*"
-      }
-    },
     "node_modules/binary-extensions": {
       "version": "2.2.0",
       "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.2.0.tgz",
@@ -1138,12 +1205,6 @@
         "node": ">=8"
       }
     },
-    "node_modules/bn.js": {
-      "version": "5.2.1",
-      "resolved": "https://registry.npmjs.org/bn.js/-/bn.js-5.2.1.tgz",
-      "integrity": "sha512-eXRvHzWyYPBuB4NBy0cmYQjGitUrtqwbvlzP3G6VFnNRbsZQIxQ10PbKKHt8gZ/HW/D/747aDl+QkDqg3KQLMQ==",
-      "dev": true
-    },
     "node_modules/brace-expansion": {
       "version": "1.1.11",
       "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
@@ -1166,153 +1227,12 @@
         "node": ">=8"
       }
     },
-    "node_modules/brorand": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/brorand/-/brorand-1.1.0.tgz",
-      "integrity": "sha512-cKV8tMCEpQs4hK/ik71d6LrPOnpkpGBR0wzxqr68g2m/LB2GxVYQroAjMJZRVM1Y4BCjCKc3vAamxSzOY2RP+w==",
-      "dev": true
-    },
     "node_modules/browser-stdout": {
       "version": "1.3.1",
       "resolved": "https://registry.npmjs.org/browser-stdout/-/browser-stdout-1.3.1.tgz",
       "integrity": "sha512-qhAVI1+Av2X7qelOfAIYwXONood6XlZE/fXaBSmW/T5SzLAmCgzi+eiWE7fUvbHaeNBQH13UftjpXxsfLkMpgw==",
       "dev": true
     },
-    "node_modules/browserify-aes": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/browserify-aes/-/browserify-aes-1.2.0.tgz",
-      "integrity": "sha512-+7CHXqGuspUn/Sl5aO7Ea0xWGAtETPXNSAjHo48JfLdPWcMng33Xe4znFvQweqc/uzk5zSOI3H52CYnjCfb5hA==",
-      "dev": true,
-      "dependencies": {
-        "buffer-xor": "^1.0.3",
-        "cipher-base": "^1.0.0",
-        "create-hash": "^1.1.0",
-        "evp_bytestokey": "^1.0.3",
-        "inherits": "^2.0.1",
-        "safe-buffer": "^5.0.1"
-      }
-    },
-    "node_modules/browserify-cipher": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/browserify-cipher/-/browserify-cipher-1.0.1.tgz",
-      "integrity": "sha512-sPhkz0ARKbf4rRQt2hTpAHqn47X3llLkUGn+xEJzLjwY8LRs2p0v7ljvI5EyoRO/mexrNunNECisZs+gw2zz1w==",
-      "dev": true,
-      "dependencies": {
-        "browserify-aes": "^1.0.4",
-        "browserify-des": "^1.0.0",
-        "evp_bytestokey": "^1.0.0"
-      }
-    },
-    "node_modules/browserify-des": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/browserify-des/-/browserify-des-1.0.2.tgz",
-      "integrity": "sha512-BioO1xf3hFwz4kc6iBhI3ieDFompMhrMlnDFC4/0/vd5MokpuAc3R+LYbwTA9A5Yc9pq9UYPqffKpW2ObuwX5A==",
-      "dev": true,
-      "dependencies": {
-        "cipher-base": "^1.0.1",
-        "des.js": "^1.0.0",
-        "inherits": "^2.0.1",
-        "safe-buffer": "^5.1.2"
-      }
-    },
-    "node_modules/browserify-rsa": {
-      "version": "4.1.0",
-      "resolved": "https://registry.npmjs.org/browserify-rsa/-/browserify-rsa-4.1.0.tgz",
-      "integrity": "sha512-AdEER0Hkspgno2aR97SAf6vi0y0k8NuOpGnVH3O99rcA5Q6sh8QxcngtHuJ6uXwnfAXNM4Gn1Gb7/MV1+Ymbog==",
-      "dev": true,
-      "dependencies": {
-        "bn.js": "^5.0.0",
-        "randombytes": "^2.0.1"
-      }
-    },
-    "node_modules/browserify-sign": {
-      "version": "4.2.1",
-      "resolved": "https://registry.npmjs.org/browserify-sign/-/browserify-sign-4.2.1.tgz",
-      "integrity": "sha512-/vrA5fguVAKKAVTNJjgSm1tRQDHUU6DbwO9IROu/0WAzC8PKhucDSh18J0RMvVeHAn5puMd+QHC2erPRNf8lmg==",
-      "dev": true,
-      "dependencies": {
-        "bn.js": "^5.1.1",
-        "browserify-rsa": "^4.0.1",
-        "create-hash": "^1.2.0",
-        "create-hmac": "^1.1.7",
-        "elliptic": "^6.5.3",
-        "inherits": "^2.0.4",
-        "parse-asn1": "^5.1.5",
-        "readable-stream": "^3.6.0",
-        "safe-buffer": "^5.2.0"
-      }
-    },
-    "node_modules/browserify-sign/node_modules/readable-stream": {
-      "version": "3.6.1",
-      "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.1.tgz",
-      "integrity": "sha512-+rQmrWMYGA90yenhTYsLWAsLsqVC8osOw6PKE1HDYiO0gdPeKe/xDHNzIAIn4C91YQ6oenEhfYqqc1883qHbjQ==",
-      "dev": true,
-      "dependencies": {
-        "inherits": "^2.0.3",
-        "string_decoder": "^1.1.1",
-        "util-deprecate": "^1.0.1"
-      },
-      "engines": {
-        "node": ">= 6"
-      }
-    },
-    "node_modules/browserify-sign/node_modules/safe-buffer": {
-      "version": "5.2.1",
-      "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
-      "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
-      "dev": true,
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ]
-    },
-    "node_modules/browserify-zlib": {
-      "version": "0.2.0",
-      "resolved": "https://registry.npmjs.org/browserify-zlib/-/browserify-zlib-0.2.0.tgz",
-      "integrity": "sha512-Z942RysHXmJrhqk88FmKBVq/v5tqmSkDz7p54G/MGyjMnCFFnC79XWNbg+Vta8W6Wb2qtSZTSxIGkJrRpCFEiA==",
-      "dev": true,
-      "dependencies": {
-        "pako": "~1.0.5"
-      }
-    },
-    "node_modules/browserslist": {
-      "version": "4.21.5",
-      "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.21.5.tgz",
-      "integrity": "sha512-tUkiguQGW7S3IhB7N+c2MV/HZPSCPAAiYBZXLsBhFB/PCy6ZKKsZrmBayHV9fdGV/ARIfJ14NkxKzRDjvp7L6w==",
-      "dev": true,
-      "funding": [
-        {
-          "type": "opencollective",
-          "url": "https://opencollective.com/browserslist"
-        },
-        {
-          "type": "tidelift",
-          "url": "https://tidelift.com/funding/github/npm/browserslist"
-        }
-      ],
-      "dependencies": {
-        "caniuse-lite": "^1.0.30001449",
-        "electron-to-chromium": "^1.4.284",
-        "node-releases": "^2.0.8",
-        "update-browserslist-db": "^1.0.10"
-      },
-      "bin": {
-        "browserslist": "cli.js"
-      },
-      "engines": {
-        "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7"
-      }
-    },
     "node_modules/buffer": {
       "version": "6.0.3",
       "resolved": "https://registry.npmjs.org/buffer/-/buffer-6.0.3.tgz",
@@ -1337,18 +1257,6 @@
         "ieee754": "^1.2.1"
       }
     },
-    "node_modules/buffer-from": {
-      "version": "1.1.2",
-      "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz",
-      "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==",
-      "dev": true
-    },
-    "node_modules/buffer-xor": {
-      "version": "1.0.3",
-      "resolved": "https://registry.npmjs.org/buffer-xor/-/buffer-xor-1.0.3.tgz",
-      "integrity": "sha512-571s0T7nZWK6vB67HI5dyUF7wXiNcfaPPPTl6zYCNApANjIvYJTg7hlud/+cJpdAhS7dVzqMLmfhfHR3rAcOjQ==",
-      "dev": true
-    },
     "node_modules/builtin-modules": {
       "version": "3.3.0",
       "resolved": "https://registry.npmjs.org/builtin-modules/-/builtin-modules-3.3.0.tgz",
@@ -1361,12 +1269,6 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
-    "node_modules/builtin-status-codes": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/builtin-status-codes/-/builtin-status-codes-3.0.0.tgz",
-      "integrity": "sha512-HpGFw18DgFWlncDfjTa2rcQ4W88O1mC8e8yZ2AvQY5KDaktSTwo+KRf6nHK6FRI5FyRyb/5T6+TSxfP7QyGsmQ==",
-      "dev": true
-    },
     "node_modules/call-bind": {
       "version": "1.0.2",
       "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.2.tgz",
@@ -1401,22 +1303,6 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
-    "node_modules/caniuse-lite": {
-      "version": "1.0.30001460",
-      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001460.tgz",
-      "integrity": "sha512-Bud7abqjvEjipUkpLs4D7gR0l8hBYBHoa+tGtKJHvT2AYzLp1z7EmVkUT4ERpVUfca8S2HGIVs883D8pUH1ZzQ==",
-      "dev": true,
-      "funding": [
-        {
-          "type": "opencollective",
-          "url": "https://opencollective.com/browserslist"
-        },
-        {
-          "type": "tidelift",
-          "url": "https://tidelift.com/funding/github/npm/caniuse-lite"
-        }
-      ]
-    },
     "node_modules/chalk": {
       "version": "4.1.2",
       "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz",
@@ -1472,15 +1358,6 @@
         "node": ">= 6"
       }
     },
-    "node_modules/chrome-trace-event": {
-      "version": "1.0.3",
-      "resolved": "https://registry.npmjs.org/chrome-trace-event/-/chrome-trace-event-1.0.3.tgz",
-      "integrity": "sha512-p3KULyQg4S7NIHixdwbGX+nFHkoBiA4YQmyWtjb8XngSKV124nJmRysgAeujbUVb15vh+RvFUfCPqU7rXk+hZg==",
-      "dev": true,
-      "engines": {
-        "node": ">=6.0"
-      }
-    },
     "node_modules/ci-info": {
       "version": "3.8.0",
       "resolved": "https://registry.npmjs.org/ci-info/-/ci-info-3.8.0.tgz",
@@ -1496,16 +1373,6 @@
         "node": ">=8"
       }
     },
-    "node_modules/cipher-base": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/cipher-base/-/cipher-base-1.0.4.tgz",
-      "integrity": "sha512-Kkht5ye6ZGmwv40uUDZztayT2ThLQGfnj/T71N/XzeZeo3nf8foyW7zGTsPYkEya3m5f3cAypH+qe7YOrM1U2Q==",
-      "dev": true,
-      "dependencies": {
-        "inherits": "^2.0.1",
-        "safe-buffer": "^5.0.1"
-      }
-    },
     "node_modules/clang-format": {
       "version": "1.8.0",
       "resolved": "https://registry.npmjs.org/clang-format/-/clang-format-1.8.0.tgz",
@@ -1554,20 +1421,6 @@
         "wrap-ansi": "^7.0.0"
       }
     },
-    "node_modules/clone-deep": {
-      "version": "4.0.1",
-      "resolved": "https://registry.npmjs.org/clone-deep/-/clone-deep-4.0.1.tgz",
-      "integrity": "sha512-neHB9xuzh/wk0dIHweyAXv2aPGZIVk3pLMe+/RNzINf17fe0OG96QroktYAUm7SM1PBnzTabaLboqqxDyMU+SQ==",
-      "dev": true,
-      "dependencies": {
-        "is-plain-object": "^2.0.4",
-        "kind-of": "^6.0.2",
-        "shallow-clone": "^3.0.0"
-      },
-      "engines": {
-        "node": ">=6"
-      }
-    },
     "node_modules/color-convert": {
       "version": "2.0.1",
       "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
@@ -1595,22 +1448,10 @@
         "color-support": "bin.js"
       }
     },
-    "node_modules/colorette": {
-      "version": "2.0.19",
-      "resolved": "https://registry.npmjs.org/colorette/-/colorette-2.0.19.tgz",
-      "integrity": "sha512-3tlv/dIP7FWvj3BsbHrGLJ6l/oKh1O3TcgBqMn+yyCagOxc23fyzDS6HypQbgxWbkpDnf52p1LuR4eWDQ/K9WQ==",
-      "dev": true
-    },
-    "node_modules/commander": {
-      "version": "2.20.3",
-      "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.3.tgz",
-      "integrity": "sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==",
-      "dev": true
-    },
     "node_modules/comment-parser": {
-      "version": "1.3.1",
-      "resolved": "https://registry.npmjs.org/comment-parser/-/comment-parser-1.3.1.tgz",
-      "integrity": "sha512-B52sN2VNghyq5ofvUsqZjmk6YkihBX5vMSChmSK9v4ShjKf3Vk5Xcmgpw4o+iIgtrnM/u5FiMpz9VKb8lpBveA==",
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/comment-parser/-/comment-parser-1.4.0.tgz",
+      "integrity": "sha512-QLyTNiZ2KDOibvFPlZ6ZngVsZ/0gYnE6uTXi5aoDg8ed3AkJAz4sEje3Y8a29hQ1s6A99MZXe47fLAXQ1rTqaw==",
       "dev": true,
       "engines": {
         "node": ">= 12.0.0"
@@ -1622,73 +1463,18 @@
       "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==",
       "dev": true
     },
-    "node_modules/console-browserify": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/console-browserify/-/console-browserify-1.2.0.tgz",
-      "integrity": "sha512-ZMkYO/LkF17QvCPqM0gxw8yUzigAOZOSWSHg91FH6orS7vcEj5dVZTidN2fQ14yBSdg97RqhSNwLUXInd52OTA==",
-      "dev": true
-    },
     "node_modules/console-control-strings": {
       "version": "1.1.0",
       "resolved": "https://registry.npmjs.org/console-control-strings/-/console-control-strings-1.1.0.tgz",
       "integrity": "sha512-ty/fTekppD2fIwRvnZAVdeOiGd1c7YXEixbgJTNzqcxJWKQnjJ/V1bNEEE6hygpM3WjwHFUVK6HTjWSzV4a8sQ==",
       "dev": true
     },
-    "node_modules/constants-browserify": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/constants-browserify/-/constants-browserify-1.0.0.tgz",
-      "integrity": "sha512-xFxOwqIzR/e1k1gLiWEophSCMqXcwVHIH7akf7b/vxcUeGunlj3hvZaaqxwHsTgn+IndtkQJgSztIDWeumWJDQ==",
-      "dev": true
-    },
     "node_modules/core-util-is": {
       "version": "1.0.3",
       "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.3.tgz",
       "integrity": "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==",
       "dev": true
     },
-    "node_modules/create-ecdh": {
-      "version": "4.0.4",
-      "resolved": "https://registry.npmjs.org/create-ecdh/-/create-ecdh-4.0.4.tgz",
-      "integrity": "sha512-mf+TCx8wWc9VpuxfP2ht0iSISLZnt0JgWlrOKZiNqyUZWnjIaCIVNQArMHnCZKfEYRg6IM7A+NeJoN8gf/Ws0A==",
-      "dev": true,
-      "dependencies": {
-        "bn.js": "^4.1.0",
-        "elliptic": "^6.5.3"
-      }
-    },
-    "node_modules/create-ecdh/node_modules/bn.js": {
-      "version": "4.12.0",
-      "resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.0.tgz",
-      "integrity": "sha512-c98Bf3tPniI+scsdk237ku1Dc3ujXQTSgyiPUDEOe7tRkhrqridvh8klBv0HCEso1OLOYcHuCv/cS6DNxKH+ZA==",
-      "dev": true
-    },
-    "node_modules/create-hash": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/create-hash/-/create-hash-1.2.0.tgz",
-      "integrity": "sha512-z00bCGNHDG8mHAkP7CtT1qVu+bFQUPjYq/4Iv3C3kWjTFV10zIjfSoeqXo9Asws8gwSHDGj/hl2u4OGIjapeCg==",
-      "dev": true,
-      "dependencies": {
-        "cipher-base": "^1.0.1",
-        "inherits": "^2.0.1",
-        "md5.js": "^1.3.4",
-        "ripemd160": "^2.0.1",
-        "sha.js": "^2.4.0"
-      }
-    },
-    "node_modules/create-hmac": {
-      "version": "1.1.7",
-      "resolved": "https://registry.npmjs.org/create-hmac/-/create-hmac-1.1.7.tgz",
-      "integrity": "sha512-MJG9liiZ+ogc4TzUwuvbER1JRdgvUFSB5+VR/g5h82fGaIRWMWddtKBHi7/sVhfjQZ6SehlyhvQYrcYkaUIpLg==",
-      "dev": true,
-      "dependencies": {
-        "cipher-base": "^1.0.3",
-        "create-hash": "^1.1.0",
-        "inherits": "^2.0.1",
-        "ripemd160": "^2.0.0",
-        "safe-buffer": "^5.0.1",
-        "sha.js": "^2.4.8"
-      }
-    },
     "node_modules/cross-spawn": {
       "version": "7.0.3",
       "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
@@ -1703,28 +1489,6 @@
         "node": ">= 8"
       }
     },
-    "node_modules/crypto-browserify": {
-      "version": "3.12.0",
-      "resolved": "https://registry.npmjs.org/crypto-browserify/-/crypto-browserify-3.12.0.tgz",
-      "integrity": "sha512-fz4spIh+znjO2VjL+IdhEpRJ3YN6sMzITSBijk6FK2UvTqruSQW+/cCZTSNsMiZNvUeq0CqurF+dAbyiGOY6Wg==",
-      "dev": true,
-      "dependencies": {
-        "browserify-cipher": "^1.0.0",
-        "browserify-sign": "^4.0.0",
-        "create-ecdh": "^4.0.0",
-        "create-hash": "^1.1.0",
-        "create-hmac": "^1.1.0",
-        "diffie-hellman": "^5.0.0",
-        "inherits": "^2.0.1",
-        "pbkdf2": "^3.0.3",
-        "public-encrypt": "^4.0.0",
-        "randombytes": "^2.0.0",
-        "randomfill": "^1.0.3"
-      },
-      "engines": {
-        "node": "*"
-      }
-    },
     "node_modules/debug": {
       "version": "4.3.4",
       "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz",
@@ -1760,6 +1524,20 @@
       "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==",
       "dev": true
     },
+    "node_modules/define-data-property": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.0.tgz",
+      "integrity": "sha512-UzGwzcjyv3OtAvolTj1GoyNYzfFR+iqbGjcnBEENZVCpM4/Ng1yhGNvS3lR/xDS74Tb2wGG9WzNSNIOS9UVb2g==",
+      "dev": true,
+      "dependencies": {
+        "get-intrinsic": "^1.2.1",
+        "gopd": "^1.0.1",
+        "has-property-descriptors": "^1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
     "node_modules/define-properties": {
       "version": "1.2.0",
       "resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.2.0.tgz",
@@ -1782,16 +1560,6 @@
       "integrity": "sha512-bd2L678uiWATM6m5Z1VzNCErI3jiGzt6HGY8OVICs40JQq/HALfbyNJmp0UDakEY4pMMaN0Ly5om/B1VI/+xfQ==",
       "dev": true
     },
-    "node_modules/des.js": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/des.js/-/des.js-1.0.1.tgz",
-      "integrity": "sha512-Q0I4pfFrv2VPd34/vfLrFOoRmlYj3OV50i7fskps1jZWK1kApMWWT9G6RRUeYedLcBDIhnSDaUvJMb3AhUlaEA==",
-      "dev": true,
-      "dependencies": {
-        "inherits": "^2.0.1",
-        "minimalistic-assert": "^1.0.0"
-      }
-    },
     "node_modules/diff": {
       "version": "5.0.0",
       "resolved": "https://registry.npmjs.org/diff/-/diff-5.0.0.tgz",
@@ -1801,30 +1569,13 @@
         "node": ">=0.3.1"
       }
     },
-    "node_modules/diffie-hellman": {
-      "version": "5.0.3",
-      "resolved": "https://registry.npmjs.org/diffie-hellman/-/diffie-hellman-5.0.3.tgz",
-      "integrity": "sha512-kqag/Nl+f3GwyK25fhUMYj81BUOrZ9IuJsjIcDE5icNM9FJHAVm3VcUDxdLPoQtTuUylWm6ZIknYJwwaPxsUzg==",
-      "dev": true,
-      "dependencies": {
-        "bn.js": "^4.1.0",
-        "miller-rabin": "^4.0.0",
-        "randombytes": "^2.0.0"
-      }
-    },
-    "node_modules/diffie-hellman/node_modules/bn.js": {
-      "version": "4.12.0",
-      "resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.0.tgz",
-      "integrity": "sha512-c98Bf3tPniI+scsdk237ku1Dc3ujXQTSgyiPUDEOe7tRkhrqridvh8klBv0HCEso1OLOYcHuCv/cS6DNxKH+ZA==",
-      "dev": true
-    },
     "node_modules/dir-compare": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/dir-compare/-/dir-compare-4.0.0.tgz",
-      "integrity": "sha512-wC7thVKL3V656tO61rbEDE4LTeeYrUC2pAUL00AaXYghBhjjVNRyBlpH6POzb44ZuK23OSrqF6TbSC/QYeqfAg==",
+      "version": "4.2.0",
+      "resolved": "https://registry.npmjs.org/dir-compare/-/dir-compare-4.2.0.tgz",
+      "integrity": "sha512-2xMCmOoMrdQIPHdsTawECdNPwlVFB9zGcz3kuhmBO6U3oU+UQjsue0i8ayLKpgBcm+hcXPMVSGUN9d+pvJ6+VQ==",
       "dev": true,
       "dependencies": {
-        "minimatch": "^3.0.4",
+        "minimatch": "^3.0.5",
         "p-limit": "^3.1.0 "
       }
     },
@@ -1852,91 +1603,12 @@
         "node": ">=6.0.0"
       }
     },
-    "node_modules/domain-browser": {
-      "version": "4.22.0",
-      "resolved": "https://registry.npmjs.org/domain-browser/-/domain-browser-4.22.0.tgz",
-      "integrity": "sha512-IGBwjF7tNk3cwypFNH/7bfzBcgSCbaMOD3GsaY1AU/JRrnHnYgEM0+9kQt52iZxjNsjBtJYtao146V+f8jFZNw==",
-      "dev": true,
-      "engines": {
-        "node": ">=10"
-      },
-      "funding": {
-        "url": "https://bevry.me/fund"
-      }
-    },
-    "node_modules/duplexer": {
-      "version": "0.1.2",
-      "resolved": "https://registry.npmjs.org/duplexer/-/duplexer-0.1.2.tgz",
-      "integrity": "sha512-jtD6YG370ZCIi/9GTaJKQxWTZD045+4R4hTk/x1UyoqadyJ9x9CgSi1RlVDQF8U2sxLLSnFkCaMihqljHIWgMg==",
-      "dev": true
-    },
-    "node_modules/electron-to-chromium": {
-      "version": "1.4.320",
-      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.320.tgz",
-      "integrity": "sha512-h70iRscrNluMZPVICXYl5SSB+rBKo22XfuIS1ER0OQxQZpKTnFpuS6coj7wY9M/3trv7OR88rRMOlKmRvDty7Q==",
-      "dev": true
-    },
-    "node_modules/elliptic": {
-      "version": "6.5.4",
-      "resolved": "https://registry.npmjs.org/elliptic/-/elliptic-6.5.4.tgz",
-      "integrity": "sha512-iLhC6ULemrljPZb+QutR5TQGB+pdW6KGD5RSegS+8sorOZT+rdQFbsQFJgvN3eRqNALqJer4oQ16YvJHlU8hzQ==",
-      "dev": true,
-      "dependencies": {
-        "bn.js": "^4.11.9",
-        "brorand": "^1.1.0",
-        "hash.js": "^1.0.0",
-        "hmac-drbg": "^1.0.1",
-        "inherits": "^2.0.4",
-        "minimalistic-assert": "^1.0.1",
-        "minimalistic-crypto-utils": "^1.0.1"
-      }
-    },
-    "node_modules/elliptic/node_modules/bn.js": {
-      "version": "4.12.0",
-      "resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.0.tgz",
-      "integrity": "sha512-c98Bf3tPniI+scsdk237ku1Dc3ujXQTSgyiPUDEOe7tRkhrqridvh8klBv0HCEso1OLOYcHuCv/cS6DNxKH+ZA==",
-      "dev": true
-    },
     "node_modules/emoji-regex": {
       "version": "8.0.0",
       "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
       "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
       "dev": true
     },
-    "node_modules/emojis-list": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/emojis-list/-/emojis-list-3.0.0.tgz",
-      "integrity": "sha512-/kyM18EfinwXZbno9FyUGeFh87KC8HRQBQGildHZbEuRyWFOmv1U10o9BBp8XVZDVNNuQKyIGIu5ZYAAXJ0V2Q==",
-      "dev": true,
-      "engines": {
-        "node": ">= 4"
-      }
-    },
-    "node_modules/enhanced-resolve": {
-      "version": "5.12.0",
-      "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.12.0.tgz",
-      "integrity": "sha512-QHTXI/sZQmko1cbDoNAa3mJ5qhWUUNAq3vR0/YiD379fWQrcfuoX1+HW2S0MTt7XmoPLapdaDKUtelUSPic7hQ==",
-      "dev": true,
-      "dependencies": {
-        "graceful-fs": "^4.2.4",
-        "tapable": "^2.2.0"
-      },
-      "engines": {
-        "node": ">=10.13.0"
-      }
-    },
-    "node_modules/envinfo": {
-      "version": "7.8.1",
-      "resolved": "https://registry.npmjs.org/envinfo/-/envinfo-7.8.1.tgz",
-      "integrity": "sha512-/o+BXHmB7ocbHEAs6F2EnG0ogybVVUdkRunTT2glZU9XAaGmhqskrvKwqXuDfNjEO0LZKWdejEEpnq8aM0tOaw==",
-      "dev": true,
-      "bin": {
-        "envinfo": "dist/cli.js"
-      },
-      "engines": {
-        "node": ">=4"
-      }
-    },
     "node_modules/error-ex": {
       "version": "1.3.2",
       "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.2.tgz",
@@ -1947,18 +1619,19 @@
       }
     },
     "node_modules/es-abstract": {
-      "version": "1.21.1",
-      "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.21.1.tgz",
-      "integrity": "sha512-QudMsPOz86xYz/1dG1OuGBKOELjCh99IIWHLzy5znUB6j8xG2yMA7bfTV86VSqKF+Y/H08vQPR+9jyXpuC6hfg==",
+      "version": "1.22.2",
+      "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.22.2.tgz",
+      "integrity": "sha512-YoxfFcDmhjOgWPWsV13+2RNjq1F6UQnfs+8TftwNqtzlmFzEXvlUwdrNrYeaizfjQzRMxkZ6ElWMOJIFKdVqwA==",
       "dev": true,
       "dependencies": {
+        "array-buffer-byte-length": "^1.0.0",
+        "arraybuffer.prototype.slice": "^1.0.2",
         "available-typed-arrays": "^1.0.5",
         "call-bind": "^1.0.2",
         "es-set-tostringtag": "^2.0.1",
         "es-to-primitive": "^1.2.1",
-        "function-bind": "^1.1.1",
-        "function.prototype.name": "^1.1.5",
-        "get-intrinsic": "^1.1.3",
+        "function.prototype.name": "^1.1.6",
+        "get-intrinsic": "^1.2.1",
         "get-symbol-description": "^1.0.0",
         "globalthis": "^1.0.3",
         "gopd": "^1.0.1",
@@ -1966,25 +1639,30 @@
         "has-property-descriptors": "^1.0.0",
         "has-proto": "^1.0.1",
         "has-symbols": "^1.0.3",
-        "internal-slot": "^1.0.4",
-        "is-array-buffer": "^3.0.1",
+        "internal-slot": "^1.0.5",
+        "is-array-buffer": "^3.0.2",
         "is-callable": "^1.2.7",
         "is-negative-zero": "^2.0.2",
         "is-regex": "^1.1.4",
         "is-shared-array-buffer": "^1.0.2",
         "is-string": "^1.0.7",
-        "is-typed-array": "^1.1.10",
+        "is-typed-array": "^1.1.12",
         "is-weakref": "^1.0.2",
-        "object-inspect": "^1.12.2",
+        "object-inspect": "^1.12.3",
         "object-keys": "^1.1.1",
         "object.assign": "^4.1.4",
-        "regexp.prototype.flags": "^1.4.3",
+        "regexp.prototype.flags": "^1.5.1",
+        "safe-array-concat": "^1.0.1",
         "safe-regex-test": "^1.0.0",
-        "string.prototype.trimend": "^1.0.6",
-        "string.prototype.trimstart": "^1.0.6",
+        "string.prototype.trim": "^1.2.8",
+        "string.prototype.trimend": "^1.0.7",
+        "string.prototype.trimstart": "^1.0.7",
+        "typed-array-buffer": "^1.0.0",
+        "typed-array-byte-length": "^1.0.0",
+        "typed-array-byte-offset": "^1.0.0",
         "typed-array-length": "^1.0.4",
         "unbox-primitive": "^1.0.2",
-        "which-typed-array": "^1.1.9"
+        "which-typed-array": "^1.1.11"
       },
       "engines": {
         "node": ">= 0.4"
@@ -1993,12 +1671,6 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
-    "node_modules/es-module-lexer": {
-      "version": "0.9.3",
-      "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-0.9.3.tgz",
-      "integrity": "sha512-1HQ2M2sPtxwnvOvT1ZClHyQDiggdNjURWpY2we6aMKCQiUVxTmVs2UYPLIrD84sS+kMdUwfBSylbJPwNnBrnHQ==",
-      "dev": true
-    },
     "node_modules/es-set-tostringtag": {
       "version": "2.0.1",
       "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.0.1.tgz",
@@ -2039,11 +1711,55 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
-    "node_modules/es6-object-assign": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/es6-object-assign/-/es6-object-assign-1.1.0.tgz",
-      "integrity": "sha512-MEl9uirslVwqQU369iHNWZXsI8yaZYGg/D65aOgZkeyFJwHYSxilf7rQzXKI7DdDuBPrBXbfk3sl9hJhmd5AUw==",
-      "dev": true
+    "node_modules/esbuild": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.19.3.tgz",
+      "integrity": "sha512-UlJ1qUUA2jL2nNib1JTSkifQTcYTroFqRjwCFW4QYEKEsixXD5Tik9xML7zh2gTxkYTBKGHNH9y7txMwVyPbjw==",
+      "dev": true,
+      "hasInstallScript": true,
+      "bin": {
+        "esbuild": "bin/esbuild"
+      },
+      "engines": {
+        "node": ">=12"
+      },
+      "optionalDependencies": {
+        "@esbuild/android-arm": "0.19.3",
+        "@esbuild/android-arm64": "0.19.3",
+        "@esbuild/android-x64": "0.19.3",
+        "@esbuild/darwin-arm64": "0.19.3",
+        "@esbuild/darwin-x64": "0.19.3",
+        "@esbuild/freebsd-arm64": "0.19.3",
+        "@esbuild/freebsd-x64": "0.19.3",
+        "@esbuild/linux-arm": "0.19.3",
+        "@esbuild/linux-arm64": "0.19.3",
+        "@esbuild/linux-ia32": "0.19.3",
+        "@esbuild/linux-loong64": "0.19.3",
+        "@esbuild/linux-mips64el": "0.19.3",
+        "@esbuild/linux-ppc64": "0.19.3",
+        "@esbuild/linux-riscv64": "0.19.3",
+        "@esbuild/linux-s390x": "0.19.3",
+        "@esbuild/linux-x64": "0.19.3",
+        "@esbuild/netbsd-x64": "0.19.3",
+        "@esbuild/openbsd-x64": "0.19.3",
+        "@esbuild/sunos-x64": "0.19.3",
+        "@esbuild/win32-arm64": "0.19.3",
+        "@esbuild/win32-ia32": "0.19.3",
+        "@esbuild/win32-x64": "0.19.3"
+      }
+    },
+    "node_modules/esbuild-plugin-polyfill-node": {
+      "version": "0.3.0",
+      "resolved": "https://registry.npmjs.org/esbuild-plugin-polyfill-node/-/esbuild-plugin-polyfill-node-0.3.0.tgz",
+      "integrity": "sha512-SHG6CKUfWfYyYXGpW143NEZtcVVn8S/WHcEOxk62LuDXnY4Zpmc+WmxJKN6GMTgTClXJXhEM5KQlxKY6YjbucQ==",
+      "dev": true,
+      "dependencies": {
+        "@jspm/core": "^2.0.1",
+        "import-meta-resolve": "^3.0.0"
+      },
+      "peerDependencies": {
+        "esbuild": "*"
+      }
     },
     "node_modules/escalade": {
       "version": "3.1.1",
@@ -2067,26 +1783,27 @@
       }
     },
     "node_modules/eslint": {
-      "version": "8.35.0",
-      "resolved": "https://registry.npmjs.org/eslint/-/eslint-8.35.0.tgz",
-      "integrity": "sha512-BxAf1fVL7w+JLRQhWl2pzGeSiGqbWumV4WNvc9Rhp6tiCtm4oHnyPBSEtMGZwrQgudFQ+otqzWoPB7x+hxoWsw==",
+      "version": "8.51.0",
+      "resolved": "https://registry.npmjs.org/eslint/-/eslint-8.51.0.tgz",
+      "integrity": "sha512-2WuxRZBrlwnXi+/vFSJyjMqrNjtJqiasMzehF0shoLaW7DzS3/9Yvrmq5JiT66+pNjiX4UBnLDiKHcWAr/OInA==",
       "dev": true,
       "dependencies": {
-        "@eslint/eslintrc": "^2.0.0",
-        "@eslint/js": "8.35.0",
-        "@humanwhocodes/config-array": "^0.11.8",
+        "@eslint-community/eslint-utils": "^4.2.0",
+        "@eslint-community/regexpp": "^4.6.1",
+        "@eslint/eslintrc": "^2.1.2",
+        "@eslint/js": "8.51.0",
+        "@humanwhocodes/config-array": "^0.11.11",
         "@humanwhocodes/module-importer": "^1.0.1",
         "@nodelib/fs.walk": "^1.2.8",
-        "ajv": "^6.10.0",
+        "ajv": "^6.12.4",
         "chalk": "^4.0.0",
         "cross-spawn": "^7.0.2",
         "debug": "^4.3.2",
         "doctrine": "^3.0.0",
         "escape-string-regexp": "^4.0.0",
-        "eslint-scope": "^7.1.1",
-        "eslint-utils": "^3.0.0",
-        "eslint-visitor-keys": "^3.3.0",
-        "espree": "^9.4.0",
+        "eslint-scope": "^7.2.2",
+        "eslint-visitor-keys": "^3.4.3",
+        "espree": "^9.6.1",
         "esquery": "^1.4.2",
         "esutils": "^2.0.2",
         "fast-deep-equal": "^3.1.3",
@@ -2094,23 +1811,19 @@
         "find-up": "^5.0.0",
         "glob-parent": "^6.0.2",
         "globals": "^13.19.0",
-        "grapheme-splitter": "^1.0.4",
+        "graphemer": "^1.4.0",
         "ignore": "^5.2.0",
-        "import-fresh": "^3.0.0",
         "imurmurhash": "^0.1.4",
         "is-glob": "^4.0.0",
         "is-path-inside": "^3.0.3",
-        "js-sdsl": "^4.1.4",
         "js-yaml": "^4.1.0",
         "json-stable-stringify-without-jsonify": "^1.0.1",
         "levn": "^0.4.1",
         "lodash.merge": "^4.6.2",
         "minimatch": "^3.1.2",
         "natural-compare": "^1.4.0",
-        "optionator": "^0.9.1",
-        "regexpp": "^3.2.0",
+        "optionator": "^0.9.3",
         "strip-ansi": "^6.0.1",
-        "strip-json-comments": "^3.1.0",
         "text-table": "^0.2.0"
       },
       "bin": {
@@ -2144,9 +1857,9 @@
       }
     },
     "node_modules/eslint-module-utils": {
-      "version": "2.7.4",
-      "resolved": "https://registry.npmjs.org/eslint-module-utils/-/eslint-module-utils-2.7.4.tgz",
-      "integrity": "sha512-j4GT+rqzCoRKHwURX7pddtIPGySnX9Si/cgMI5ztrcqOPtk5dDEeZ34CQVPphnqkJytlc97Vuk05Um2mJ3gEQA==",
+      "version": "2.8.0",
+      "resolved": "https://registry.npmjs.org/eslint-module-utils/-/eslint-module-utils-2.8.0.tgz",
+      "integrity": "sha512-aWajIYfsqCKRDgUfjEXNN/JlrzauMuSEy5sbd7WXbtW3EH6A6MpwEh42c7qD+MqQo9QMJ6fWLAeIJynx0g6OAw==",
       "dev": true,
       "dependencies": {
         "debug": "^3.2.7"
@@ -2179,26 +1892,28 @@
       }
     },
     "node_modules/eslint-plugin-import": {
-      "version": "2.27.5",
-      "resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.27.5.tgz",
-      "integrity": "sha512-LmEt3GVofgiGuiE+ORpnvP+kAm3h6MLZJ4Q5HCyHADofsb4VzXFsRiWj3c0OFiV+3DWFh0qg3v9gcPlfc3zRow==",
+      "version": "2.28.1",
+      "resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.28.1.tgz",
+      "integrity": "sha512-9I9hFlITvOV55alzoKBI+K9q74kv0iKMeY6av5+umsNwayt59fz692daGyjR+oStBQgx6nwR9rXldDev3Clw+A==",
       "dev": true,
       "dependencies": {
         "array-includes": "^3.1.6",
+        "array.prototype.findlastindex": "^1.2.2",
         "array.prototype.flat": "^1.3.1",
         "array.prototype.flatmap": "^1.3.1",
         "debug": "^3.2.7",
         "doctrine": "^2.1.0",
         "eslint-import-resolver-node": "^0.3.7",
-        "eslint-module-utils": "^2.7.4",
+        "eslint-module-utils": "^2.8.0",
         "has": "^1.0.3",
-        "is-core-module": "^2.11.0",
+        "is-core-module": "^2.13.0",
         "is-glob": "^4.0.3",
         "minimatch": "^3.1.2",
+        "object.fromentries": "^2.0.6",
+        "object.groupby": "^1.0.0",
         "object.values": "^1.1.6",
-        "resolve": "^1.22.1",
-        "semver": "^6.3.0",
-        "tsconfig-paths": "^3.14.1"
+        "semver": "^6.3.1",
+        "tsconfig-paths": "^3.14.2"
       },
       "engines": {
         "node": ">=4"
@@ -2229,30 +1944,32 @@
       }
     },
     "node_modules/eslint-plugin-import/node_modules/semver": {
-      "version": "6.3.0",
-      "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.0.tgz",
-      "integrity": "sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==",
+      "version": "6.3.1",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz",
+      "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==",
       "dev": true,
       "bin": {
         "semver": "bin/semver.js"
       }
     },
     "node_modules/eslint-plugin-jsdoc": {
-      "version": "40.0.1",
-      "resolved": "https://registry.npmjs.org/eslint-plugin-jsdoc/-/eslint-plugin-jsdoc-40.0.1.tgz",
-      "integrity": "sha512-KkiRInury7YrjjV5aCHDxwsPy6XFt5p2b2CnpDMITnWs8patNPf5kj24+VXIWw45kP6z/B0GOKfrYczB56OjQQ==",
+      "version": "46.8.2",
+      "resolved": "https://registry.npmjs.org/eslint-plugin-jsdoc/-/eslint-plugin-jsdoc-46.8.2.tgz",
+      "integrity": "sha512-5TSnD018f3tUJNne4s4gDWQflbsgOycIKEUBoCLn6XtBMgNHxQFmV8vVxUtiPxAQq8lrX85OaSG/2gnctxw9uQ==",
       "dev": true,
       "dependencies": {
-        "@es-joy/jsdoccomment": "~0.36.1",
-        "comment-parser": "1.3.1",
+        "@es-joy/jsdoccomment": "~0.40.1",
+        "are-docs-informative": "^0.0.2",
+        "comment-parser": "1.4.0",
         "debug": "^4.3.4",
         "escape-string-regexp": "^4.0.0",
-        "esquery": "^1.4.0",
-        "semver": "^7.3.8",
+        "esquery": "^1.5.0",
+        "is-builtin-module": "^3.2.1",
+        "semver": "^7.5.4",
         "spdx-expression-parse": "^3.0.1"
       },
       "engines": {
-        "node": "^14 || ^16 || ^17 || ^18 || ^19"
+        "node": ">=16"
       },
       "peerDependencies": {
         "eslint": "^7.0.0 || ^8.0.0"
@@ -2268,118 +1985,74 @@
       }
     },
     "node_modules/eslint-plugin-unicorn": {
-      "version": "46.0.0",
-      "resolved": "https://registry.npmjs.org/eslint-plugin-unicorn/-/eslint-plugin-unicorn-46.0.0.tgz",
-      "integrity": "sha512-j07WkC+PFZwk8J33LYp6JMoHa1lXc1u6R45pbSAipjpfpb7KIGr17VE2D685zCxR5VL4cjrl65kTJflziQWMDA==",
+      "version": "48.0.1",
+      "resolved": "https://registry.npmjs.org/eslint-plugin-unicorn/-/eslint-plugin-unicorn-48.0.1.tgz",
+      "integrity": "sha512-FW+4r20myG/DqFcCSzoumaddKBicIPeFnTrifon2mWIzlfyvzwyqZjqVP7m4Cqr/ZYisS2aiLghkUWaPg6vtCw==",
       "dev": true,
       "dependencies": {
-        "@babel/helper-validator-identifier": "^7.19.1",
-        "@eslint-community/eslint-utils": "^4.1.2",
-        "ci-info": "^3.6.1",
+        "@babel/helper-validator-identifier": "^7.22.5",
+        "@eslint-community/eslint-utils": "^4.4.0",
+        "ci-info": "^3.8.0",
         "clean-regexp": "^1.0.0",
-        "esquery": "^1.4.0",
+        "esquery": "^1.5.0",
         "indent-string": "^4.0.0",
-        "is-builtin-module": "^3.2.0",
+        "is-builtin-module": "^3.2.1",
         "jsesc": "^3.0.2",
         "lodash": "^4.17.21",
         "pluralize": "^8.0.0",
         "read-pkg-up": "^7.0.1",
-        "regexp-tree": "^0.1.24",
-        "regjsparser": "^0.9.1",
-        "safe-regex": "^2.1.1",
-        "semver": "^7.3.8",
+        "regexp-tree": "^0.1.27",
+        "regjsparser": "^0.10.0",
+        "semver": "^7.5.4",
         "strip-indent": "^3.0.0"
       },
       "engines": {
-        "node": ">=14.18"
+        "node": ">=16"
       },
       "funding": {
         "url": "https://github.com/sindresorhus/eslint-plugin-unicorn?sponsor=1"
       },
       "peerDependencies": {
-        "eslint": ">=8.28.0"
+        "eslint": ">=8.44.0"
       }
     },
     "node_modules/eslint-scope": {
-      "version": "5.1.1",
-      "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-5.1.1.tgz",
-      "integrity": "sha512-2NxwbF/hZ0KpepYN0cNbo+FN6XoK7GaHlQhgx/hIZl6Va0bF45RQOOwhLIy8lQDbuCiadSLCBnH2CFYquit5bw==",
+      "version": "7.2.2",
+      "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-7.2.2.tgz",
+      "integrity": "sha512-dOt21O7lTMhDM+X9mB4GX+DZrZtCUJPL/wlcTqxyrx5IvO0IYtILdtrQGQp+8n5S0gwSVmOf9NQrjMOgfQZlIg==",
       "dev": true,
       "dependencies": {
         "esrecurse": "^4.3.0",
-        "estraverse": "^4.1.1"
-      },
-      "engines": {
-        "node": ">=8.0.0"
-      }
-    },
-    "node_modules/eslint-utils": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/eslint-utils/-/eslint-utils-3.0.0.tgz",
-      "integrity": "sha512-uuQC43IGctw68pJA1RgbQS8/NP7rch6Cwd4j3ZBtgo4/8Flj4eGE7ZYSZRN3iq5pVUv6GPdW5Z1RFleo84uLDA==",
-      "dev": true,
-      "dependencies": {
-        "eslint-visitor-keys": "^2.0.0"
+        "estraverse": "^5.2.0"
       },
       "engines": {
-        "node": "^10.0.0 || ^12.0.0 || >= 14.0.0"
+        "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
       },
       "funding": {
-        "url": "https://github.com/sponsors/mysticatea"
-      },
-      "peerDependencies": {
-        "eslint": ">=5"
-      }
-    },
-    "node_modules/eslint-utils/node_modules/eslint-visitor-keys": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-2.1.0.tgz",
-      "integrity": "sha512-0rSmRBzXgDzIsD6mGdJgevzgezI534Cer5L/vyMX0kHzT/jiB43jRhd9YUlMGYLQy2zprNmoT8qasCGtY+QaKw==",
-      "dev": true,
-      "engines": {
-        "node": ">=10"
+        "url": "https://opencollective.com/eslint"
       }
     },
     "node_modules/eslint-visitor-keys": {
-      "version": "3.3.0",
-      "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-3.3.0.tgz",
-      "integrity": "sha512-mQ+suqKJVyeuwGYHAdjMFqjCyfl8+Ldnxuyp3ldiMBFKkvytrXUZWaiPCEav8qDHKty44bD+qV1IP4T+w+xXRA==",
+      "version": "3.4.3",
+      "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-3.4.3.tgz",
+      "integrity": "sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag==",
       "dev": true,
       "engines": {
         "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
-      }
-    },
-    "node_modules/eslint/node_modules/eslint-scope": {
-      "version": "7.1.1",
-      "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-7.1.1.tgz",
-      "integrity": "sha512-QKQM/UXpIiHcLqJ5AOyIW7XZmzjkzQXYE54n1++wb0u9V/abW3l9uQnxX8Z5Xd18xyKIMTUAyQ0k1e8pz6LUrw==",
-      "dev": true,
-      "dependencies": {
-        "esrecurse": "^4.3.0",
-        "estraverse": "^5.2.0"
       },
-      "engines": {
-        "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
-      }
-    },
-    "node_modules/eslint/node_modules/estraverse": {
-      "version": "5.3.0",
-      "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz",
-      "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==",
-      "dev": true,
-      "engines": {
-        "node": ">=4.0"
+      "funding": {
+        "url": "https://opencollective.com/eslint"
       }
     },
     "node_modules/espree": {
-      "version": "9.4.1",
-      "resolved": "https://registry.npmjs.org/espree/-/espree-9.4.1.tgz",
-      "integrity": "sha512-XwctdmTO6SIvCzd9810yyNzIrOrqNYV9Koizx4C/mRhf9uq0o4yHoCEU/670pOxOL/MSraektvSAji79kX90Vg==",
+      "version": "9.6.1",
+      "resolved": "https://registry.npmjs.org/espree/-/espree-9.6.1.tgz",
+      "integrity": "sha512-oruZaFkjorTpF32kDSI5/75ViwGeZginGGy2NoOSg3Q9bnwlnmDm4HLnkl0RE3n+njDXR037aY1+x58Z/zFdwQ==",
       "dev": true,
       "dependencies": {
-        "acorn": "^8.8.0",
+        "acorn": "^8.9.0",
         "acorn-jsx": "^5.3.2",
-        "eslint-visitor-keys": "^3.3.0"
+        "eslint-visitor-keys": "^3.4.1"
       },
       "engines": {
         "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
@@ -2400,15 +2073,6 @@
         "node": ">=0.10"
       }
     },
-    "node_modules/esquery/node_modules/estraverse": {
-      "version": "5.3.0",
-      "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz",
-      "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==",
-      "dev": true,
-      "engines": {
-        "node": ">=4.0"
-      }
-    },
     "node_modules/esrecurse": {
       "version": "4.3.0",
       "resolved": "https://registry.npmjs.org/esrecurse/-/esrecurse-4.3.0.tgz",
@@ -2421,7 +2085,7 @@
         "node": ">=4.0"
       }
     },
-    "node_modules/esrecurse/node_modules/estraverse": {
+    "node_modules/estraverse": {
       "version": "5.3.0",
       "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz",
       "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==",
@@ -2430,15 +2094,6 @@
         "node": ">=4.0"
       }
     },
-    "node_modules/estraverse": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-4.3.0.tgz",
-      "integrity": "sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw==",
-      "dev": true,
-      "engines": {
-        "node": ">=4.0"
-      }
-    },
     "node_modules/esutils": {
       "version": "2.0.3",
       "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz",
@@ -2466,16 +2121,6 @@
         "node": ">=0.8.x"
       }
     },
-    "node_modules/evp_bytestokey": {
-      "version": "1.0.3",
-      "resolved": "https://registry.npmjs.org/evp_bytestokey/-/evp_bytestokey-1.0.3.tgz",
-      "integrity": "sha512-/f2Go4TognH/KvCISP7OUsHn85hT9nUkxxA9BEWxFn+Oj9o8ZNLm/40hdlgSLyuOimsrTKLUMEorQexp/aPQeA==",
-      "dev": true,
-      "dependencies": {
-        "md5.js": "^1.3.4",
-        "safe-buffer": "^5.1.1"
-      }
-    },
     "node_modules/fast-deep-equal": {
       "version": "3.1.3",
       "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz",
@@ -2483,9 +2128,9 @@
       "dev": true
     },
     "node_modules/fast-glob": {
-      "version": "3.2.12",
-      "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.2.12.tgz",
-      "integrity": "sha512-DVj4CQIYYow0BlaelwK1pHl5n5cRSJfM60UA0zK891sVInoPri2Ekj7+e1CT3/3qxXenpI+nBBmQAcJPJgaj4w==",
+      "version": "3.3.1",
+      "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.1.tgz",
+      "integrity": "sha512-kNFPyjhh5cKjrUltxs+wFx+ZkbRaxxmZ+X0ZU31SOsxCEtP9VPgtq2teZw1DebupL5GmDaNQ6yKMMVcM41iqDg==",
       "dev": true,
       "dependencies": {
         "@nodelib/fs.stat": "^2.0.2",
@@ -2522,15 +2167,6 @@
       "integrity": "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==",
       "dev": true
     },
-    "node_modules/fastest-levenshtein": {
-      "version": "1.0.16",
-      "resolved": "https://registry.npmjs.org/fastest-levenshtein/-/fastest-levenshtein-1.0.16.tgz",
-      "integrity": "sha512-eRnCtTTtGZFpQCwhJiUOuxPQWRXVKYDn0b2PeHfXL6/Zi53SLAzAHfVhVWK2AryC/WH05kGfxhFIPvTF0SXQzg==",
-      "dev": true,
-      "engines": {
-        "node": ">= 4.9.1"
-      }
-    },
     "node_modules/fastq": {
       "version": "1.15.0",
       "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.15.0.tgz",
@@ -2564,15 +2200,6 @@
         "node": ">=8"
       }
     },
-    "node_modules/filter-obj": {
-      "version": "2.0.2",
-      "resolved": "https://registry.npmjs.org/filter-obj/-/filter-obj-2.0.2.tgz",
-      "integrity": "sha512-lO3ttPjHZRfjMcxWKb1j1eDhTFsu4meeR3lnMcnBFhk6RuLhvEiuALu2TlfL310ph4lCYYwgF/ElIjdP739tdg==",
-      "dev": true,
-      "engines": {
-        "node": ">=8"
-      }
-    },
     "node_modules/find-up": {
       "version": "5.0.0",
       "resolved": "https://registry.npmjs.org/find-up/-/find-up-5.0.0.tgz",
@@ -2627,9 +2254,9 @@
       }
     },
     "node_modules/fs-extra": {
-      "version": "11.1.0",
-      "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.1.0.tgz",
-      "integrity": "sha512-0rcTq621PD5jM/e0a3EJoGC/1TC5ZBCERW82LQuwfGnCa1V8w7dpYH1yNu+SLb6E5dkeCBzKEyLGlFrnr+dUyw==",
+      "version": "11.1.1",
+      "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.1.1.tgz",
+      "integrity": "sha512-MGIE4HOvQCeUCzmlHs0vXpih4ysz4wg9qiSAu6cd42lVwPbTM1TjV7RusoyQqMmk/95gdQZX72u+YW+c3eEpFQ==",
       "dev": true,
       "dependencies": {
         "graceful-fs": "^4.2.0",
@@ -2667,15 +2294,15 @@
       "dev": true
     },
     "node_modules/function.prototype.name": {
-      "version": "1.1.5",
-      "resolved": "https://registry.npmjs.org/function.prototype.name/-/function.prototype.name-1.1.5.tgz",
-      "integrity": "sha512-uN7m/BzVKQnCUF/iW8jYea67v++2u7m5UgENbHRtdDVclOUP+FMPlCNdmk0h/ysGyo2tavMJEDqJAkJdRa1vMA==",
+      "version": "1.1.6",
+      "resolved": "https://registry.npmjs.org/function.prototype.name/-/function.prototype.name-1.1.6.tgz",
+      "integrity": "sha512-Z5kx79swU5P27WEayXM1tBi5Ze/lbIyiNgU3qyXUOf9b2rgXYyF9Dy9Cx+IQv/Lc8WCG6L82zwUPpSS9hGehIg==",
       "dev": true,
       "dependencies": {
         "call-bind": "^1.0.2",
-        "define-properties": "^1.1.3",
-        "es-abstract": "^1.19.0",
-        "functions-have-names": "^1.2.2"
+        "define-properties": "^1.2.0",
+        "es-abstract": "^1.22.1",
+        "functions-have-names": "^1.2.3"
       },
       "engines": {
         "node": ">= 0.4"
@@ -2722,13 +2349,14 @@
       }
     },
     "node_modules/get-intrinsic": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.0.tgz",
-      "integrity": "sha512-L049y6nFOuom5wGyRc3/gdTLO94dySVKRACj1RmJZBQXlbTMhtNIgkWkUHq+jYmZvKf14EW1EoJnnjbmoHij0Q==",
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.1.tgz",
+      "integrity": "sha512-2DcsyfABl+gVHEfCOaTrWgyt+tb6MSEGmKq+kI5HwLbIYgjgmMcV8KQ41uaKz1xxUcn9tJtgFbQUEVcEbd0FYw==",
       "dev": true,
       "dependencies": {
         "function-bind": "^1.1.1",
         "has": "^1.0.3",
+        "has-proto": "^1.0.1",
         "has-symbols": "^1.0.3"
       },
       "funding": {
@@ -2783,16 +2411,10 @@
         "node": ">=10.13.0"
       }
     },
-    "node_modules/glob-to-regexp": {
-      "version": "0.4.1",
-      "resolved": "https://registry.npmjs.org/glob-to-regexp/-/glob-to-regexp-0.4.1.tgz",
-      "integrity": "sha512-lkX1HJXwyMcprw/5YUZc2s7DrpAiHB21/V+E1rHUrVNokkvB6bqMzT0VfV6/86ZNabt1k14YOIaT7nDvOX3Iiw==",
-      "dev": true
-    },
     "node_modules/globals": {
-      "version": "13.20.0",
-      "resolved": "https://registry.npmjs.org/globals/-/globals-13.20.0.tgz",
-      "integrity": "sha512-Qg5QtVkCy/kv3FUSlu4ukeZDVf9ee0iXLAUYX13gbR17bnejFTzr4iS9bY7kwCf1NztRNm1t91fjOiyx4CSwPQ==",
+      "version": "13.23.0",
+      "resolved": "https://registry.npmjs.org/globals/-/globals-13.23.0.tgz",
+      "integrity": "sha512-XAmF0RjlrjY23MA51q3HltdlGxUpXPvg0GioKiD9X6HD28iMjo2dKC8Vqwm7lne4GNr78+RHTfliktR6ZH09wA==",
       "dev": true,
       "dependencies": {
         "type-fest": "^0.20.2"
@@ -2857,27 +2479,12 @@
       "integrity": "sha512-9ByhssR2fPVsNZj478qUUbKfmL0+t5BDVyjShtyZZLiK7ZDAArFFfopyOTj0M05wE2tJPisA4iTnnXl2YoPvOA==",
       "dev": true
     },
-    "node_modules/grapheme-splitter": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/grapheme-splitter/-/grapheme-splitter-1.0.4.tgz",
-      "integrity": "sha512-bzh50DW9kTPM00T8y4o8vQg89Di9oLJVLW/KaOGIXJWP/iqCN6WKYkbNOF04vFLJhwcpYUh9ydh/+5vpOqV4YQ==",
+    "node_modules/graphemer": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/graphemer/-/graphemer-1.4.0.tgz",
+      "integrity": "sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag==",
       "dev": true
     },
-    "node_modules/gzip-size": {
-      "version": "6.0.0",
-      "resolved": "https://registry.npmjs.org/gzip-size/-/gzip-size-6.0.0.tgz",
-      "integrity": "sha512-ax7ZYomf6jqPTQ4+XCpUGyXKHk5WweS+e05MBO4/y3WJ5RkmPXNKvX+bx1behVILVwr6JSQvZAku021CHPXG3Q==",
-      "dev": true,
-      "dependencies": {
-        "duplexer": "^0.1.2"
-      },
-      "engines": {
-        "node": ">=10"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
     "node_modules/has": {
       "version": "1.0.3",
       "resolved": "https://registry.npmjs.org/has/-/has-1.0.3.tgz",
@@ -2945,84 +2552,26 @@
       }
     },
     "node_modules/has-tostringtag": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.0.tgz",
-      "integrity": "sha512-kFjcSNhnlGV1kyoGk7OXKSawH5JOb/LzUc5w9B02hOTO0dfFRjbHQKvg1d6cf3HbeUmtU9VbbV3qzZ2Teh97WQ==",
-      "dev": true,
-      "dependencies": {
-        "has-symbols": "^1.0.2"
-      },
-      "engines": {
-        "node": ">= 0.4"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/ljharb"
-      }
-    },
-    "node_modules/has-unicode": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/has-unicode/-/has-unicode-2.0.1.tgz",
-      "integrity": "sha512-8Rf9Y83NBReMnx0gFzA8JImQACstCYWUplepDa9xprwwtmgEZUF0h/i5xSA625zB/I37EtrswSST6OXxwaaIJQ==",
-      "dev": true
-    },
-    "node_modules/hash-base": {
-      "version": "3.1.0",
-      "resolved": "https://registry.npmjs.org/hash-base/-/hash-base-3.1.0.tgz",
-      "integrity": "sha512-1nmYp/rhMDiE7AYkDw+lLwlAzz0AntGIe51F3RfFfEqyQ3feY2eI/NcwC6umIQVOASPMsWJLJScWKSSvzL9IVA==",
-      "dev": true,
-      "dependencies": {
-        "inherits": "^2.0.4",
-        "readable-stream": "^3.6.0",
-        "safe-buffer": "^5.2.0"
-      },
-      "engines": {
-        "node": ">=4"
-      }
-    },
-    "node_modules/hash-base/node_modules/readable-stream": {
-      "version": "3.6.1",
-      "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.1.tgz",
-      "integrity": "sha512-+rQmrWMYGA90yenhTYsLWAsLsqVC8osOw6PKE1HDYiO0gdPeKe/xDHNzIAIn4C91YQ6oenEhfYqqc1883qHbjQ==",
-      "dev": true,
-      "dependencies": {
-        "inherits": "^2.0.3",
-        "string_decoder": "^1.1.1",
-        "util-deprecate": "^1.0.1"
-      },
-      "engines": {
-        "node": ">= 6"
-      }
-    },
-    "node_modules/hash-base/node_modules/safe-buffer": {
-      "version": "5.2.1",
-      "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
-      "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
-      "dev": true,
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ]
-    },
-    "node_modules/hash.js": {
-      "version": "1.1.7",
-      "resolved": "https://registry.npmjs.org/hash.js/-/hash.js-1.1.7.tgz",
-      "integrity": "sha512-taOaskGt4z4SOANNseOviYDvjEJinIkRgmp7LbKP2YTTmVxWBl87s/uzK9r+44BclBSp2X7K1hqeNfz9JbBeXA==",
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.0.tgz",
+      "integrity": "sha512-kFjcSNhnlGV1kyoGk7OXKSawH5JOb/LzUc5w9B02hOTO0dfFRjbHQKvg1d6cf3HbeUmtU9VbbV3qzZ2Teh97WQ==",
       "dev": true,
       "dependencies": {
-        "inherits": "^2.0.3",
-        "minimalistic-assert": "^1.0.1"
+        "has-symbols": "^1.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
       }
     },
+    "node_modules/has-unicode": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/has-unicode/-/has-unicode-2.0.1.tgz",
+      "integrity": "sha512-8Rf9Y83NBReMnx0gFzA8JImQACstCYWUplepDa9xprwwtmgEZUF0h/i5xSA625zB/I37EtrswSST6OXxwaaIJQ==",
+      "dev": true
+    },
     "node_modules/he": {
       "version": "1.2.0",
       "resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz",
@@ -3032,29 +2581,12 @@
         "he": "bin/he"
       }
     },
-    "node_modules/hmac-drbg": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/hmac-drbg/-/hmac-drbg-1.0.1.tgz",
-      "integrity": "sha512-Tti3gMqLdZfhOQY1Mzf/AanLiqh1WTiJgEj26ZuYQ9fbkLomzGchCws4FyrSd4VkpBfiNhaE1On+lOz894jvXg==",
-      "dev": true,
-      "dependencies": {
-        "hash.js": "^1.0.3",
-        "minimalistic-assert": "^1.0.0",
-        "minimalistic-crypto-utils": "^1.0.1"
-      }
-    },
     "node_modules/hosted-git-info": {
       "version": "2.8.9",
       "resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-2.8.9.tgz",
       "integrity": "sha512-mxIDAb9Lsm6DoOJ7xH+5+X4y1LU/4Hi50L9C5sIswK3JzULS4bwk1FvjdBgvYR4bzT4tuUQiC15FE2f5HbLvYw==",
       "dev": true
     },
-    "node_modules/https-browserify": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/https-browserify/-/https-browserify-1.0.0.tgz",
-      "integrity": "sha512-J+FkSdyD+0mA0N+81tMotaRMfSL9SGi+xpD3T6YApKsc3bGSXJlfXri3VyFOeYkfLRQisDk1W+jIFFKBeUBbBg==",
-      "dev": true
-    },
     "node_modules/ieee754": {
       "version": "1.2.1",
       "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
@@ -3106,23 +2638,14 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
-    "node_modules/import-local": {
-      "version": "3.1.0",
-      "resolved": "https://registry.npmjs.org/import-local/-/import-local-3.1.0.tgz",
-      "integrity": "sha512-ASB07uLtnDs1o6EHjKpX34BKYDSqnFerfTOJL2HvMqF70LnxpjkzDB8J44oT9pu4AMPkQwf8jl6szgvNd2tRIg==",
+    "node_modules/import-meta-resolve": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/import-meta-resolve/-/import-meta-resolve-3.0.0.tgz",
+      "integrity": "sha512-4IwhLhNNA8yy445rPjD/lWh++7hMDOml2eHtd58eG7h+qK3EryMuuRbsHGPikCoAgIkkDnckKfWSk2iDla/ejg==",
       "dev": true,
-      "dependencies": {
-        "pkg-dir": "^4.2.0",
-        "resolve-cwd": "^3.0.0"
-      },
-      "bin": {
-        "import-local-fixture": "fixtures/cli.js"
-      },
-      "engines": {
-        "node": ">=8"
-      },
       "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
+        "type": "github",
+        "url": "https://github.com/sponsors/wooorm"
       }
     },
     "node_modules/imurmurhash": {
@@ -3173,31 +2696,6 @@
         "node": ">= 0.4"
       }
     },
-    "node_modules/interpret": {
-      "version": "3.1.1",
-      "resolved": "https://registry.npmjs.org/interpret/-/interpret-3.1.1.tgz",
-      "integrity": "sha512-6xwYfHbajpoF0xLW+iwLkhwgvLoZDfjYfoFNu8ftMoXINzwuymNLd9u/KmwtdT2GbR+/Cz66otEGEVVUHX9QLQ==",
-      "dev": true,
-      "engines": {
-        "node": ">=10.13.0"
-      }
-    },
-    "node_modules/is-arguments": {
-      "version": "1.1.1",
-      "resolved": "https://registry.npmjs.org/is-arguments/-/is-arguments-1.1.1.tgz",
-      "integrity": "sha512-8Q7EARjzEnKpt/PCD7e1cgUS0a6X8u5tdSiMqXhojOdoV9TsMsiO+9VLC5vAmO8N7/GmXn7yjR8qnA6bVAEzfA==",
-      "dev": true,
-      "dependencies": {
-        "call-bind": "^1.0.2",
-        "has-tostringtag": "^1.0.0"
-      },
-      "engines": {
-        "node": ">= 0.4"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/ljharb"
-      }
-    },
     "node_modules/is-array-buffer": {
       "version": "3.0.2",
       "resolved": "https://registry.npmjs.org/is-array-buffer/-/is-array-buffer-3.0.2.tgz",
@@ -3286,9 +2784,9 @@
       }
     },
     "node_modules/is-core-module": {
-      "version": "2.11.0",
-      "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.11.0.tgz",
-      "integrity": "sha512-RRjxlvLDkD1YJwDbroBHMb+cukurkDWNyHx7D3oNB5x9rb5ogcksMC5wHCadcXoo67gVr/+3GFySh3134zi6rw==",
+      "version": "2.13.0",
+      "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.13.0.tgz",
+      "integrity": "sha512-Z7dk6Qo8pOCp3l4tsX2C5ZVas4V+UxwQodwZhLopL91TX8UyyHEXafPcyoeeWuLrwzHcr3igO78wNLwHJHsMCQ==",
       "dev": true,
       "dependencies": {
         "has": "^1.0.3"
@@ -3330,21 +2828,6 @@
         "node": ">=8"
       }
     },
-    "node_modules/is-generator-function": {
-      "version": "1.0.10",
-      "resolved": "https://registry.npmjs.org/is-generator-function/-/is-generator-function-1.0.10.tgz",
-      "integrity": "sha512-jsEjy9l3yiXEQ+PsXdmBwEPcOxaXWLspKdplFUVI9vq1iZgIekeC0L167qeu86czQaxed3q/Uzuw0swL0irL8A==",
-      "dev": true,
-      "dependencies": {
-        "has-tostringtag": "^1.0.0"
-      },
-      "engines": {
-        "node": ">= 0.4"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/ljharb"
-      }
-    },
     "node_modules/is-glob": {
       "version": "4.0.3",
       "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz",
@@ -3357,22 +2840,6 @@
         "node": ">=0.10.0"
       }
     },
-    "node_modules/is-nan": {
-      "version": "1.3.2",
-      "resolved": "https://registry.npmjs.org/is-nan/-/is-nan-1.3.2.tgz",
-      "integrity": "sha512-E+zBKpQ2t6MEo1VsonYmluk9NxGrbzpeeLC2xIViuO2EjU2xsXsBPwTr3Ykv9l08UYEVEdWeRZNouaZqF6RN0w==",
-      "dev": true,
-      "dependencies": {
-        "call-bind": "^1.0.0",
-        "define-properties": "^1.1.3"
-      },
-      "engines": {
-        "node": ">= 0.4"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/ljharb"
-      }
-    },
     "node_modules/is-negative-zero": {
       "version": "2.0.2",
       "resolved": "https://registry.npmjs.org/is-negative-zero/-/is-negative-zero-2.0.2.tgz",
@@ -3427,18 +2894,6 @@
         "node": ">=8"
       }
     },
-    "node_modules/is-plain-object": {
-      "version": "2.0.4",
-      "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-2.0.4.tgz",
-      "integrity": "sha512-h5PpgXkWitc38BBMYawTYMWJHFZJVnBquFE57xFpjB8pJFiF6gZ+bU+WyI/yqXiFR5mdLsgYNaPe8uao6Uv9Og==",
-      "dev": true,
-      "dependencies": {
-        "isobject": "^3.0.1"
-      },
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
     "node_modules/is-regex": {
       "version": "1.1.4",
       "resolved": "https://registry.npmjs.org/is-regex/-/is-regex-1.1.4.tgz",
@@ -3498,16 +2953,12 @@
       }
     },
     "node_modules/is-typed-array": {
-      "version": "1.1.10",
-      "resolved": "https://registry.npmjs.org/is-typed-array/-/is-typed-array-1.1.10.tgz",
-      "integrity": "sha512-PJqgEHiWZvMpaFZ3uTc8kHPM4+4ADTlDniuQL7cU/UDA0Ql7F70yGfHph3cLNe+c9toaigv+DFzTJKhc2CtO6A==",
+      "version": "1.1.12",
+      "resolved": "https://registry.npmjs.org/is-typed-array/-/is-typed-array-1.1.12.tgz",
+      "integrity": "sha512-Z14TF2JNG8Lss5/HMqt0//T9JeHXttXy5pH/DBU4vi98ozO2btxzq9MwYDZYnKwU8nRsz/+GVFVRDq3DkVuSPg==",
       "dev": true,
       "dependencies": {
-        "available-typed-arrays": "^1.0.5",
-        "call-bind": "^1.0.2",
-        "for-each": "^0.3.3",
-        "gopd": "^1.0.1",
-        "has-tostringtag": "^1.0.0"
+        "which-typed-array": "^1.1.11"
       },
       "engines": {
         "node": ">= 0.4"
@@ -3552,54 +3003,6 @@
       "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==",
       "dev": true
     },
-    "node_modules/isobject": {
-      "version": "3.0.1",
-      "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
-      "integrity": "sha512-WhB9zCku7EGTj/HQQRz5aUQEUeoQZH2bWcltRErOpymJ4boYE6wL9Tbr23krRPSZ+C5zqNSrSw+Cc7sZZ4b7vg==",
-      "dev": true,
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/jest-worker": {
-      "version": "27.5.1",
-      "resolved": "https://registry.npmjs.org/jest-worker/-/jest-worker-27.5.1.tgz",
-      "integrity": "sha512-7vuh85V5cdDofPyxn58nrPjBktZo0u9x1g8WtjQol+jZDaE+fhN+cIvTj11GndBnMnyfrUOG1sZQxCdjKh+DKg==",
-      "dev": true,
-      "dependencies": {
-        "@types/node": "*",
-        "merge-stream": "^2.0.0",
-        "supports-color": "^8.0.0"
-      },
-      "engines": {
-        "node": ">= 10.13.0"
-      }
-    },
-    "node_modules/jest-worker/node_modules/supports-color": {
-      "version": "8.1.1",
-      "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-8.1.1.tgz",
-      "integrity": "sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==",
-      "dev": true,
-      "dependencies": {
-        "has-flag": "^4.0.0"
-      },
-      "engines": {
-        "node": ">=10"
-      },
-      "funding": {
-        "url": "https://github.com/chalk/supports-color?sponsor=1"
-      }
-    },
-    "node_modules/js-sdsl": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/js-sdsl/-/js-sdsl-4.3.0.tgz",
-      "integrity": "sha512-mifzlm2+5nZ+lEcLJMoBK0/IH/bDg8XnJfd/Wq6IP+xoCjLZsTOnV2QpxlVbX9bMnkl5PdEjNtBJ9Cj1NjifhQ==",
-      "dev": true,
-      "funding": {
-        "type": "opencollective",
-        "url": "https://opencollective.com/js-sdsl"
-      }
-    },
     "node_modules/js-tokens": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
@@ -3619,9 +3022,9 @@
       }
     },
     "node_modules/jsdoc-type-pratt-parser": {
-      "version": "3.1.0",
-      "resolved": "https://registry.npmjs.org/jsdoc-type-pratt-parser/-/jsdoc-type-pratt-parser-3.1.0.tgz",
-      "integrity": "sha512-MgtD0ZiCDk9B+eI73BextfRrVQl0oyzRG8B2BjORts6jbunj4ScKPcyXGTbB6eXL4y9TzxCm6hyeLq/2ASzNdw==",
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/jsdoc-type-pratt-parser/-/jsdoc-type-pratt-parser-4.0.0.tgz",
+      "integrity": "sha512-YtOli5Cmzy3q4dP26GraSOeAhqecewG04hoO8DY56CH4KJ9Fvv5qKWUCCo3HZob7esJQHCv6/+bnTy72xZZaVQ==",
       "dev": true,
       "engines": {
         "node": ">=12.0.0"
@@ -3693,15 +3096,6 @@
         "setimmediate": "^1.0.5"
       }
     },
-    "node_modules/kind-of": {
-      "version": "6.0.3",
-      "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.3.tgz",
-      "integrity": "sha512-dcS1ul+9tmeD95T+x28/ehLgd9mENa3LsvDTtzm3vyBEO7RPptvAD+t44WVXaUjTBRcrpFeFlC8WCruUR456hw==",
-      "dev": true,
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
     "node_modules/levn": {
       "version": "0.4.1",
       "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz",
@@ -3730,41 +3124,6 @@
       "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==",
       "dev": true
     },
-    "node_modules/loader-runner": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/loader-runner/-/loader-runner-4.3.0.tgz",
-      "integrity": "sha512-3R/1M+yS3j5ou80Me59j7F9IMs4PXs3VqRrm0TU3AbKPxlmpoY1TNscJV/oGJXo8qCatFGTfDbY6W6ipGOYXfg==",
-      "dev": true,
-      "engines": {
-        "node": ">=6.11.5"
-      }
-    },
-    "node_modules/loader-utils": {
-      "version": "2.0.4",
-      "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-2.0.4.tgz",
-      "integrity": "sha512-xXqpXoINfFhgua9xiqD8fPFHgkoq1mmmpE92WlDbm9rNRd/EbRb+Gqf908T2DMfuHjjJlksiK2RbHVOdD/MqSw==",
-      "dev": true,
-      "dependencies": {
-        "big.js": "^5.2.2",
-        "emojis-list": "^3.0.0",
-        "json5": "^2.1.2"
-      },
-      "engines": {
-        "node": ">=8.9.0"
-      }
-    },
-    "node_modules/loader-utils/node_modules/json5": {
-      "version": "2.2.3",
-      "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz",
-      "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==",
-      "dev": true,
-      "bin": {
-        "json5": "lib/cli.js"
-      },
-      "engines": {
-        "node": ">=6"
-      }
-    },
     "node_modules/locate-path": {
       "version": "6.0.0",
       "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz",
@@ -3820,23 +3179,6 @@
         "node": ">=10"
       }
     },
-    "node_modules/md5.js": {
-      "version": "1.3.5",
-      "resolved": "https://registry.npmjs.org/md5.js/-/md5.js-1.3.5.tgz",
-      "integrity": "sha512-xitP+WxNPcTTOgnTJcrhM0xvdPepipPSf3I8EIpGKeFLjt3PlJLIDG3u8EX53ZIubkb+5U2+3rELYpEhHhzdkg==",
-      "dev": true,
-      "dependencies": {
-        "hash-base": "^3.0.0",
-        "inherits": "^2.0.1",
-        "safe-buffer": "^5.1.2"
-      }
-    },
-    "node_modules/merge-stream": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz",
-      "integrity": "sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==",
-      "dev": true
-    },
     "node_modules/merge2": {
       "version": "1.4.1",
       "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz",
@@ -3859,46 +3201,6 @@
         "node": ">=8.6"
       }
     },
-    "node_modules/miller-rabin": {
-      "version": "4.0.1",
-      "resolved": "https://registry.npmjs.org/miller-rabin/-/miller-rabin-4.0.1.tgz",
-      "integrity": "sha512-115fLhvZVqWwHPbClyntxEVfVDfl9DLLTuJvq3g2O/Oxi8AiNouAHvDSzHS0viUJc+V5vm3eq91Xwqn9dp4jRA==",
-      "dev": true,
-      "dependencies": {
-        "bn.js": "^4.0.0",
-        "brorand": "^1.0.1"
-      },
-      "bin": {
-        "miller-rabin": "bin/miller-rabin"
-      }
-    },
-    "node_modules/miller-rabin/node_modules/bn.js": {
-      "version": "4.12.0",
-      "resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.0.tgz",
-      "integrity": "sha512-c98Bf3tPniI+scsdk237ku1Dc3ujXQTSgyiPUDEOe7tRkhrqridvh8klBv0HCEso1OLOYcHuCv/cS6DNxKH+ZA==",
-      "dev": true
-    },
-    "node_modules/mime-db": {
-      "version": "1.52.0",
-      "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
-      "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
-      "dev": true,
-      "engines": {
-        "node": ">= 0.6"
-      }
-    },
-    "node_modules/mime-types": {
-      "version": "2.1.35",
-      "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
-      "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
-      "dev": true,
-      "dependencies": {
-        "mime-db": "1.52.0"
-      },
-      "engines": {
-        "node": ">= 0.6"
-      }
-    },
     "node_modules/min-indent": {
       "version": "1.0.1",
       "resolved": "https://registry.npmjs.org/min-indent/-/min-indent-1.0.1.tgz",
@@ -3908,18 +3210,6 @@
         "node": ">=4"
       }
     },
-    "node_modules/minimalistic-assert": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/minimalistic-assert/-/minimalistic-assert-1.0.1.tgz",
-      "integrity": "sha512-UtJcAD4yEaGtjPezWuO9wC4nwUnVH/8/Im3yEHQP4b67cXlD/Qr9hdITCU1xDbSEXg2XKNaP8jsReV7vQd00/A==",
-      "dev": true
-    },
-    "node_modules/minimalistic-crypto-utils": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/minimalistic-crypto-utils/-/minimalistic-crypto-utils-1.0.1.tgz",
-      "integrity": "sha512-JIYlbt6g8i5jKfJ3xz7rF0LXmv2TkDxBLUkiBeZ7bAx4GnnNMr8xFpGnOxn6GhTEHx3SjRrZEoU+j04prX1ktg==",
-      "dev": true
-    },
     "node_modules/minimatch": {
       "version": "3.1.2",
       "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
@@ -4064,15 +3354,6 @@
         "url": "https://github.com/chalk/supports-color?sponsor=1"
       }
     },
-    "node_modules/mrmime": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/mrmime/-/mrmime-1.0.1.tgz",
-      "integrity": "sha512-hzzEagAgDyoU1Q6yg5uI+AorQgdvMCur3FcKf7NhMKWsaYg+RnbTyHRa/9IlLF9rf455MOCtcqqrQQ83pPP7Uw==",
-      "dev": true,
-      "engines": {
-        "node": ">=10"
-      }
-    },
     "node_modules/ms": {
       "version": "2.1.2",
       "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
@@ -4080,134 +3361,21 @@
       "dev": true
     },
     "node_modules/nanoid": {
-      "version": "3.3.3",
-      "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.3.tgz",
-      "integrity": "sha512-p1sjXuopFs0xg+fPASzQ28agW1oHD7xDsd9Xkf3T15H3c/cifrFHVwrh74PdoklAPi+i7MdRsE47vm2r6JoB+w==",
-      "dev": true,
-      "bin": {
-        "nanoid": "bin/nanoid.cjs"
-      },
-      "engines": {
-        "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1"
-      }
-    },
-    "node_modules/natural-compare": {
-      "version": "1.4.0",
-      "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz",
-      "integrity": "sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==",
-      "dev": true
-    },
-    "node_modules/natural-compare-lite": {
-      "version": "1.4.0",
-      "resolved": "https://registry.npmjs.org/natural-compare-lite/-/natural-compare-lite-1.4.0.tgz",
-      "integrity": "sha512-Tj+HTDSJJKaZnfiuw+iaF9skdPpTo2GtEly5JHnWV/hfv2Qj/9RKsGISQtLh2ox3l5EAGw487hnBee0sIJ6v2g==",
-      "dev": true
-    },
-    "node_modules/neo-async": {
-      "version": "2.6.2",
-      "resolved": "https://registry.npmjs.org/neo-async/-/neo-async-2.6.2.tgz",
-      "integrity": "sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==",
-      "dev": true
-    },
-    "node_modules/node-polyfill-webpack-plugin": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/node-polyfill-webpack-plugin/-/node-polyfill-webpack-plugin-2.0.1.tgz",
-      "integrity": "sha512-ZUMiCnZkP1LF0Th2caY6J/eKKoA0TefpoVa68m/LQU1I/mE8rGt4fNYGgNuCcK+aG8P8P43nbeJ2RqJMOL/Y1A==",
-      "dev": true,
-      "dependencies": {
-        "assert": "^2.0.0",
-        "browserify-zlib": "^0.2.0",
-        "buffer": "^6.0.3",
-        "console-browserify": "^1.2.0",
-        "constants-browserify": "^1.0.0",
-        "crypto-browserify": "^3.12.0",
-        "domain-browser": "^4.22.0",
-        "events": "^3.3.0",
-        "filter-obj": "^2.0.2",
-        "https-browserify": "^1.0.0",
-        "os-browserify": "^0.3.0",
-        "path-browserify": "^1.0.1",
-        "process": "^0.11.10",
-        "punycode": "^2.1.1",
-        "querystring-es3": "^0.2.1",
-        "readable-stream": "^4.0.0",
-        "stream-browserify": "^3.0.0",
-        "stream-http": "^3.2.0",
-        "string_decoder": "^1.3.0",
-        "timers-browserify": "^2.0.12",
-        "tty-browserify": "^0.0.1",
-        "type-fest": "^2.14.0",
-        "url": "^0.11.0",
-        "util": "^0.12.4",
-        "vm-browserify": "^1.1.2"
-      },
-      "engines": {
-        "node": ">=12"
-      },
-      "peerDependencies": {
-        "webpack": ">=5"
-      }
-    },
-    "node_modules/node-polyfill-webpack-plugin/node_modules/readable-stream": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-4.3.0.tgz",
-      "integrity": "sha512-MuEnA0lbSi7JS8XM+WNJlWZkHAAdm7gETHdFK//Q/mChGyj2akEFtdLZh32jSdkWGbRwCW9pn6g3LWDdDeZnBQ==",
-      "dev": true,
-      "dependencies": {
-        "abort-controller": "^3.0.0",
-        "buffer": "^6.0.3",
-        "events": "^3.3.0",
-        "process": "^0.11.10"
-      },
-      "engines": {
-        "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
-      }
-    },
-    "node_modules/node-polyfill-webpack-plugin/node_modules/safe-buffer": {
-      "version": "5.2.1",
-      "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
-      "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
-      "dev": true,
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ]
-    },
-    "node_modules/node-polyfill-webpack-plugin/node_modules/string_decoder": {
-      "version": "1.3.0",
-      "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",
-      "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==",
-      "dev": true,
-      "dependencies": {
-        "safe-buffer": "~5.2.0"
-      }
-    },
-    "node_modules/node-polyfill-webpack-plugin/node_modules/type-fest": {
-      "version": "2.19.0",
-      "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-2.19.0.tgz",
-      "integrity": "sha512-RAH822pAdBgcNMAfWnCBU3CFZcfZ/i1eZjwFU/dsLKumyuuP3niueg2UAukXYF0E2AAoc82ZSSf9J0WQBinzHA==",
+      "version": "3.3.3",
+      "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.3.tgz",
+      "integrity": "sha512-p1sjXuopFs0xg+fPASzQ28agW1oHD7xDsd9Xkf3T15H3c/cifrFHVwrh74PdoklAPi+i7MdRsE47vm2r6JoB+w==",
       "dev": true,
-      "engines": {
-        "node": ">=12.20"
+      "bin": {
+        "nanoid": "bin/nanoid.cjs"
       },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
+      "engines": {
+        "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1"
       }
     },
-    "node_modules/node-releases": {
-      "version": "2.0.10",
-      "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.10.tgz",
-      "integrity": "sha512-5GFldHPXVG/YZmFzJvKK2zDSzPKhEp0+ZR5SVaoSag9fsL5YgHbUHDfnG5494ISANDcK4KwPXAx2xqVEydmd7w==",
+    "node_modules/natural-compare": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz",
+      "integrity": "sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==",
       "dev": true
     },
     "node_modules/normalize-package-data": {
@@ -4223,9 +3391,9 @@
       }
     },
     "node_modules/normalize-package-data/node_modules/semver": {
-      "version": "5.7.1",
-      "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.1.tgz",
-      "integrity": "sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ==",
+      "version": "5.7.2",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.2.tgz",
+      "integrity": "sha512-cBznnQ9KjJqU67B52RMC65CMarK2600WFnbkcaiwWq3xy/5haFJlshgnpjovMVJ+Hff49d8GEn0b87C5pDQ10g==",
       "dev": true,
       "bin": {
         "semver": "bin/semver"
@@ -4264,22 +3432,6 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
-    "node_modules/object-is": {
-      "version": "1.1.5",
-      "resolved": "https://registry.npmjs.org/object-is/-/object-is-1.1.5.tgz",
-      "integrity": "sha512-3cyDsyHgtmi7I7DfSSI2LDp6SK2lwvtbg0p0R1e0RvTqF5ceGx+K2dfSjm1bKDMVCFEDAQvy+o8c6a7VujOddw==",
-      "dev": true,
-      "dependencies": {
-        "call-bind": "^1.0.2",
-        "define-properties": "^1.1.3"
-      },
-      "engines": {
-        "node": ">= 0.4"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/ljharb"
-      }
-    },
     "node_modules/object-keys": {
       "version": "1.1.1",
       "resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.1.1.tgz",
@@ -4307,6 +3459,35 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
+    "node_modules/object.fromentries": {
+      "version": "2.0.7",
+      "resolved": "https://registry.npmjs.org/object.fromentries/-/object.fromentries-2.0.7.tgz",
+      "integrity": "sha512-UPbPHML6sL8PI/mOqPwsH4G6iyXcCGzLin8KvEPenOZN5lpCNBZZQ+V62vdjB1mQHrmqGQt5/OJzemUA+KJmEA==",
+      "dev": true,
+      "dependencies": {
+        "call-bind": "^1.0.2",
+        "define-properties": "^1.2.0",
+        "es-abstract": "^1.22.1"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/object.groupby": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/object.groupby/-/object.groupby-1.0.1.tgz",
+      "integrity": "sha512-HqaQtqLnp/8Bn4GL16cj+CUYbnpe1bh0TtEaWvybszDG4tgxCJuRpV8VGuvNaI1fAnI4lUJzDG55MXcOH4JZcQ==",
+      "dev": true,
+      "dependencies": {
+        "call-bind": "^1.0.2",
+        "define-properties": "^1.2.0",
+        "es-abstract": "^1.22.1",
+        "get-intrinsic": "^1.2.1"
+      }
+    },
     "node_modules/object.values": {
       "version": "1.1.6",
       "resolved": "https://registry.npmjs.org/object.values/-/object.values-1.1.6.tgz",
@@ -4333,38 +3514,23 @@
         "wrappy": "1"
       }
     },
-    "node_modules/opener": {
-      "version": "1.5.2",
-      "resolved": "https://registry.npmjs.org/opener/-/opener-1.5.2.tgz",
-      "integrity": "sha512-ur5UIdyw5Y7yEj9wLzhqXiy6GZ3Mwx0yGI+5sMn2r0N0v3cKJvUmFH5yPP+WXh9e0xfyzyJX95D8l088DNFj7A==",
-      "dev": true,
-      "bin": {
-        "opener": "bin/opener-bin.js"
-      }
-    },
     "node_modules/optionator": {
-      "version": "0.9.1",
-      "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.1.tgz",
-      "integrity": "sha512-74RlY5FCnhq4jRxVUPKDaRwrVNXMqsGsiW6AJw4XK8hmtm10wC0ypZBLw5IIp85NZMr91+qd1RvvENwg7jjRFw==",
+      "version": "0.9.3",
+      "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.3.tgz",
+      "integrity": "sha512-JjCoypp+jKn1ttEFExxhetCKeJt9zhAgAve5FXHixTvFDW/5aEktX9bufBKLRRMdU7bNtpLfcGu94B3cdEJgjg==",
       "dev": true,
       "dependencies": {
+        "@aashutoshrathi/word-wrap": "^1.2.3",
         "deep-is": "^0.1.3",
         "fast-levenshtein": "^2.0.6",
         "levn": "^0.4.1",
         "prelude-ls": "^1.2.1",
-        "type-check": "^0.4.0",
-        "word-wrap": "^1.2.3"
+        "type-check": "^0.4.0"
       },
       "engines": {
         "node": ">= 0.8.0"
       }
     },
-    "node_modules/os-browserify": {
-      "version": "0.3.0",
-      "resolved": "https://registry.npmjs.org/os-browserify/-/os-browserify-0.3.0.tgz",
-      "integrity": "sha512-gjcpUc3clBf9+210TRaDWbf+rZZZEshZ+DlXMRCeAjp0xhTrnQsKHypIy1J3d5hKdUzj69t708EHtU8P6bUn0A==",
-      "dev": true
-    },
     "node_modules/p-limit": {
       "version": "3.1.0",
       "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz",
@@ -4422,19 +3588,6 @@
         "node": ">=6"
       }
     },
-    "node_modules/parse-asn1": {
-      "version": "5.1.6",
-      "resolved": "https://registry.npmjs.org/parse-asn1/-/parse-asn1-5.1.6.tgz",
-      "integrity": "sha512-RnZRo1EPU6JBnra2vGHj0yhp6ebyjBZpmUCLHWiFhxlzvBCCpAuZ7elsBp1PVAbQN0/04VD/19rfzlBSwLstMw==",
-      "dev": true,
-      "dependencies": {
-        "asn1.js": "^5.2.0",
-        "browserify-aes": "^1.0.0",
-        "evp_bytestokey": "^1.0.0",
-        "pbkdf2": "^3.0.3",
-        "safe-buffer": "^5.1.1"
-      }
-    },
     "node_modules/parse-json": {
       "version": "5.2.0",
       "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-5.2.0.tgz",
@@ -4453,12 +3606,6 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
-    "node_modules/path-browserify": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/path-browserify/-/path-browserify-1.0.1.tgz",
-      "integrity": "sha512-b7uo2UCUOYZcnF/3ID0lulOJi/bafxa1xPe7ZPsammBSpjSWQkjNxlt635YGS2MiR9GjvuXCtz2emr3jbsz98g==",
-      "dev": true
-    },
     "node_modules/path-exists": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz",
@@ -4501,28 +3648,6 @@
         "node": ">=8"
       }
     },
-    "node_modules/pbkdf2": {
-      "version": "3.1.2",
-      "resolved": "https://registry.npmjs.org/pbkdf2/-/pbkdf2-3.1.2.tgz",
-      "integrity": "sha512-iuh7L6jA7JEGu2WxDwtQP1ddOpaJNC4KlDEFfdQajSGgGPNi4OyDc2R7QnbY2bR9QjBVGwgvTdNJZoE7RaxUMA==",
-      "dev": true,
-      "dependencies": {
-        "create-hash": "^1.1.2",
-        "create-hmac": "^1.1.4",
-        "ripemd160": "^2.0.1",
-        "safe-buffer": "^5.0.1",
-        "sha.js": "^2.4.8"
-      },
-      "engines": {
-        "node": ">=0.12"
-      }
-    },
-    "node_modules/picocolors": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.0.0.tgz",
-      "integrity": "sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ==",
-      "dev": true
-    },
     "node_modules/picomatch": {
       "version": "2.3.1",
       "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz",
@@ -4535,70 +3660,6 @@
         "url": "https://github.com/sponsors/jonschlinkert"
       }
     },
-    "node_modules/pkg-dir": {
-      "version": "4.2.0",
-      "resolved": "https://registry.npmjs.org/pkg-dir/-/pkg-dir-4.2.0.tgz",
-      "integrity": "sha512-HRDzbaKjC+AOWVXxAU/x54COGeIv9eb+6CkDSQoNTt4XyWoIJvuPsXizxu/Fr23EiekbtZwmh1IcIG/l/a10GQ==",
-      "dev": true,
-      "dependencies": {
-        "find-up": "^4.0.0"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/pkg-dir/node_modules/find-up": {
-      "version": "4.1.0",
-      "resolved": "https://registry.npmjs.org/find-up/-/find-up-4.1.0.tgz",
-      "integrity": "sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw==",
-      "dev": true,
-      "dependencies": {
-        "locate-path": "^5.0.0",
-        "path-exists": "^4.0.0"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/pkg-dir/node_modules/locate-path": {
-      "version": "5.0.0",
-      "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-5.0.0.tgz",
-      "integrity": "sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==",
-      "dev": true,
-      "dependencies": {
-        "p-locate": "^4.1.0"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/pkg-dir/node_modules/p-limit": {
-      "version": "2.3.0",
-      "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.3.0.tgz",
-      "integrity": "sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==",
-      "dev": true,
-      "dependencies": {
-        "p-try": "^2.0.0"
-      },
-      "engines": {
-        "node": ">=6"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
-    "node_modules/pkg-dir/node_modules/p-locate": {
-      "version": "4.1.0",
-      "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-4.1.0.tgz",
-      "integrity": "sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==",
-      "dev": true,
-      "dependencies": {
-        "p-limit": "^2.2.0"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
     "node_modules/pluralize": {
       "version": "8.0.0",
       "resolved": "https://registry.npmjs.org/pluralize/-/pluralize-8.0.0.tgz",
@@ -4618,9 +3679,9 @@
       }
     },
     "node_modules/prettier": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.0.0.tgz",
-      "integrity": "sha512-zBf5eHpwHOGPC47h0zrPyNn+eAEIdEzfywMoYn2XPi0P44Zp0tSq64rq0xAREh4auw2cJZHo9QUob+NqCQky4g==",
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.0.3.tgz",
+      "integrity": "sha512-L/4pUDMxcNa8R/EthV08Zt42WBO4h1rarVtK0K+QJG0X187OLo7l699jWw0GKuwzkPQ//jMFA/8Xm6Fh3J/DAg==",
       "dev": true,
       "bin": {
         "prettier": "bin/prettier.cjs"
@@ -4647,26 +3708,6 @@
       "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==",
       "dev": true
     },
-    "node_modules/public-encrypt": {
-      "version": "4.0.3",
-      "resolved": "https://registry.npmjs.org/public-encrypt/-/public-encrypt-4.0.3.tgz",
-      "integrity": "sha512-zVpa8oKZSz5bTMTFClc1fQOnyyEzpl5ozpi1B5YcvBrdohMjH2rfsBtyXcuNuwjsDIXmBYlF2N5FlJYhR29t8Q==",
-      "dev": true,
-      "dependencies": {
-        "bn.js": "^4.1.0",
-        "browserify-rsa": "^4.0.0",
-        "create-hash": "^1.1.0",
-        "parse-asn1": "^5.0.0",
-        "randombytes": "^2.0.1",
-        "safe-buffer": "^5.1.2"
-      }
-    },
-    "node_modules/public-encrypt/node_modules/bn.js": {
-      "version": "4.12.0",
-      "resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.0.tgz",
-      "integrity": "sha512-c98Bf3tPniI+scsdk237ku1Dc3ujXQTSgyiPUDEOe7tRkhrqridvh8klBv0HCEso1OLOYcHuCv/cS6DNxKH+ZA==",
-      "dev": true
-    },
     "node_modules/punycode": {
       "version": "2.3.0",
       "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.0.tgz",
@@ -4676,25 +3717,6 @@
         "node": ">=6"
       }
     },
-    "node_modules/querystring": {
-      "version": "0.2.0",
-      "resolved": "https://registry.npmjs.org/querystring/-/querystring-0.2.0.tgz",
-      "integrity": "sha512-X/xY82scca2tau62i9mDyU9K+I+djTMUsvwf7xnUX5GLvVzgJybOJf4Y6o9Zx3oJK/LSXg5tTZBjwzqVPaPO2g==",
-      "deprecated": "The querystring API is considered Legacy. new code should use the URLSearchParams API instead.",
-      "dev": true,
-      "engines": {
-        "node": ">=0.4.x"
-      }
-    },
-    "node_modules/querystring-es3": {
-      "version": "0.2.1",
-      "resolved": "https://registry.npmjs.org/querystring-es3/-/querystring-es3-0.2.1.tgz",
-      "integrity": "sha512-773xhDQnZBMFobEiztv8LIl70ch5MSF/jUQVlhwFyBILqq96anmoctVIYz+ZRp0qbCKATTn6ev02M3r7Ga5vqA==",
-      "dev": true,
-      "engines": {
-        "node": ">=0.4.x"
-      }
-    },
     "node_modules/queue-microtask": {
       "version": "1.2.3",
       "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz",
@@ -4724,16 +3746,6 @@
         "safe-buffer": "^5.1.0"
       }
     },
-    "node_modules/randomfill": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/randomfill/-/randomfill-1.0.4.tgz",
-      "integrity": "sha512-87lcbR8+MhcWcUiQ+9e+Rwx8MyR2P7qnt15ynUlbm3TU/fjbgz4GsvfSUDTemtCCtVCqb4ZcEFlyPNTh9bBTLw==",
-      "dev": true,
-      "dependencies": {
-        "randombytes": "^2.0.5",
-        "safe-buffer": "^5.1.0"
-      }
-    },
     "node_modules/read-pkg": {
       "version": "5.2.0",
       "resolved": "https://registry.npmjs.org/read-pkg/-/read-pkg-5.2.0.tgz",
@@ -4863,36 +3875,24 @@
         "node": ">=8.10.0"
       }
     },
-    "node_modules/rechoir": {
-      "version": "0.8.0",
-      "resolved": "https://registry.npmjs.org/rechoir/-/rechoir-0.8.0.tgz",
-      "integrity": "sha512-/vxpCXddiX8NGfGO/mTafwjq4aFa/71pvamip0++IQk3zG8cbCj0fifNPrjjF1XMXUne91jL9OoxmdykoEtifQ==",
-      "dev": true,
-      "dependencies": {
-        "resolve": "^1.20.0"
-      },
-      "engines": {
-        "node": ">= 10.13.0"
-      }
-    },
     "node_modules/regexp-tree": {
-      "version": "0.1.24",
-      "resolved": "https://registry.npmjs.org/regexp-tree/-/regexp-tree-0.1.24.tgz",
-      "integrity": "sha512-s2aEVuLhvnVJW6s/iPgEGK6R+/xngd2jNQ+xy4bXNDKxZKJH6jpPHY6kVeVv1IeLCHgswRj+Kl3ELaDjG6V1iw==",
+      "version": "0.1.27",
+      "resolved": "https://registry.npmjs.org/regexp-tree/-/regexp-tree-0.1.27.tgz",
+      "integrity": "sha512-iETxpjK6YoRWJG5o6hXLwvjYAoW+FEZn9os0PD/b6AP6xQwsa/Y7lCVgIixBbUPMfhu+i2LtdeAqVTgGlQarfA==",
       "dev": true,
       "bin": {
         "regexp-tree": "bin/regexp-tree"
       }
     },
     "node_modules/regexp.prototype.flags": {
-      "version": "1.4.3",
-      "resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.4.3.tgz",
-      "integrity": "sha512-fjggEOO3slI6Wvgjwflkc4NFRCTZAu5CnNfBd5qOMYhWdn67nJBBu34/TkD++eeFmd8C9r9jfXJ27+nSiRkSUA==",
+      "version": "1.5.1",
+      "resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.5.1.tgz",
+      "integrity": "sha512-sy6TXMN+hnP/wMy+ISxg3krXx7BAtWVO4UouuCN/ziM9UEne0euamVNafDfvC83bRNr95y0V5iijeDQFUNpvrg==",
       "dev": true,
       "dependencies": {
         "call-bind": "^1.0.2",
-        "define-properties": "^1.1.3",
-        "functions-have-names": "^1.2.2"
+        "define-properties": "^1.2.0",
+        "set-function-name": "^2.0.0"
       },
       "engines": {
         "node": ">= 0.4"
@@ -4901,22 +3901,10 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
-    "node_modules/regexpp": {
-      "version": "3.2.0",
-      "resolved": "https://registry.npmjs.org/regexpp/-/regexpp-3.2.0.tgz",
-      "integrity": "sha512-pq2bWo9mVD43nbts2wGv17XLiNLya+GklZ8kaDLV2Z08gDCsGpnKn9BFMepvWuHCbyVvY7J5o5+BVvoQbmlJLg==",
-      "dev": true,
-      "engines": {
-        "node": ">=8"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/mysticatea"
-      }
-    },
     "node_modules/regjsparser": {
-      "version": "0.9.1",
-      "resolved": "https://registry.npmjs.org/regjsparser/-/regjsparser-0.9.1.tgz",
-      "integrity": "sha512-dQUtn90WanSNl+7mQKcXAgZxvUe7Z0SqXlgzv0za4LwiUhyzBC58yQO3liFoUgu8GiJVInAhJjkj1N0EtQ5nkQ==",
+      "version": "0.10.0",
+      "resolved": "https://registry.npmjs.org/regjsparser/-/regjsparser-0.10.0.tgz",
+      "integrity": "sha512-qx+xQGZVsy55CH0a1hiVwHmqjLryfh7wQyF5HO07XJ9f7dQMY/gPQHhlyDkIzJKC+x2fUCpCcUODUUUFrm7SHA==",
       "dev": true,
       "dependencies": {
         "jsesc": "~0.5.0"
@@ -4960,27 +3948,6 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
-    "node_modules/resolve-cwd": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/resolve-cwd/-/resolve-cwd-3.0.0.tgz",
-      "integrity": "sha512-OrZaX2Mb+rJCpH/6CpSqt9xFVpN++x01XnN2ie9g6P5/3xelLAkXWVADpdz1IHD/KFfEXyE6V0U01OQ3UO2rEg==",
-      "dev": true,
-      "dependencies": {
-        "resolve-from": "^5.0.0"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/resolve-cwd/node_modules/resolve-from": {
-      "version": "5.0.0",
-      "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-5.0.0.tgz",
-      "integrity": "sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw==",
-      "dev": true,
-      "engines": {
-        "node": ">=8"
-      }
-    },
     "node_modules/resolve-from": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz",
@@ -5015,16 +3982,6 @@
         "url": "https://github.com/sponsors/isaacs"
       }
     },
-    "node_modules/ripemd160": {
-      "version": "2.0.2",
-      "resolved": "https://registry.npmjs.org/ripemd160/-/ripemd160-2.0.2.tgz",
-      "integrity": "sha512-ii4iagi25WusVoiC4B4lq7pbXfAp3D9v5CwfkY33vffw2+pkDjY1D8GaN7spsxvCSx8dkPqOZCEZyfxcmJG2IA==",
-      "dev": true,
-      "dependencies": {
-        "hash-base": "^3.0.0",
-        "inherits": "^2.0.1"
-      }
-    },
     "node_modules/run-parallel": {
       "version": "1.2.0",
       "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz",
@@ -5048,21 +4005,36 @@
         "queue-microtask": "^1.2.2"
       }
     },
+    "node_modules/safe-array-concat": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/safe-array-concat/-/safe-array-concat-1.0.1.tgz",
+      "integrity": "sha512-6XbUAseYE2KtOuGueyeobCySj9L4+66Tn6KQMOPQJrAJEowYKW/YR/MGJZl7FdydUdaFu4LYyDZjxf4/Nmo23Q==",
+      "dev": true,
+      "dependencies": {
+        "call-bind": "^1.0.2",
+        "get-intrinsic": "^1.2.1",
+        "has-symbols": "^1.0.3",
+        "isarray": "^2.0.5"
+      },
+      "engines": {
+        "node": ">=0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/safe-array-concat/node_modules/isarray": {
+      "version": "2.0.5",
+      "resolved": "https://registry.npmjs.org/isarray/-/isarray-2.0.5.tgz",
+      "integrity": "sha512-xHjhDr3cNBK0BzdUJSPXZntQUx/mwMS5Rw4A7lPJ90XGAO6ISP/ePDNuo0vhqOZU+UD5JoodwCAAoZQd3FeAKw==",
+      "dev": true
+    },
     "node_modules/safe-buffer": {
       "version": "5.1.2",
       "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
       "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==",
       "dev": true
     },
-    "node_modules/safe-regex": {
-      "version": "2.1.1",
-      "resolved": "https://registry.npmjs.org/safe-regex/-/safe-regex-2.1.1.tgz",
-      "integrity": "sha512-rx+x8AMzKb5Q5lQ95Zoi6ZbJqwCLkqi3XuJXp5P3rT8OEc6sZCJG5AE5dU3lsgRr/F4Bs31jSlVN+j5KrsGu9A==",
-      "dev": true,
-      "dependencies": {
-        "regexp-tree": "~0.1.1"
-      }
-    },
     "node_modules/safe-regex-test": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/safe-regex-test/-/safe-regex-test-1.0.0.tgz",
@@ -5077,34 +4049,10 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
-    "node_modules/safer-buffer": {
-      "version": "2.1.2",
-      "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
-      "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==",
-      "dev": true
-    },
-    "node_modules/schema-utils": {
-      "version": "3.1.1",
-      "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-3.1.1.tgz",
-      "integrity": "sha512-Y5PQxS4ITlC+EahLuXaY86TXfR7Dc5lw294alXOq86JAHCihAIZfqv8nNCWvaEJvaC51uN9hbLGeV0cFBdH+Fw==",
-      "dev": true,
-      "dependencies": {
-        "@types/json-schema": "^7.0.8",
-        "ajv": "^6.12.5",
-        "ajv-keywords": "^3.5.2"
-      },
-      "engines": {
-        "node": ">= 10.13.0"
-      },
-      "funding": {
-        "type": "opencollective",
-        "url": "https://opencollective.com/webpack"
-      }
-    },
     "node_modules/semver": {
-      "version": "7.3.8",
-      "resolved": "https://registry.npmjs.org/semver/-/semver-7.3.8.tgz",
-      "integrity": "sha512-NB1ctGL5rlHrPJtFDVIVzTyQylMLu9N9VICA6HSFJo8MCGVTMW6gfpicwKmmK/dAjTOrqu5l63JJOpDSrAis3A==",
+      "version": "7.5.4",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-7.5.4.tgz",
+      "integrity": "sha512-1bCSESV6Pv+i21Hvpxp3Dx+pSD8lIPt8uVjRrxAUt/nbswYc+tK6Y2btiULjd4+fnq15PX+nqQDC7Oft7WkwcA==",
       "dev": true,
       "dependencies": {
         "lru-cache": "^6.0.0"
@@ -5116,52 +4064,32 @@
         "node": ">=10"
       }
     },
-    "node_modules/serialize-javascript": {
-      "version": "6.0.1",
-      "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.1.tgz",
-      "integrity": "sha512-owoXEFjWRllis8/M1Q+Cw5k8ZH40e3zhp/ovX+Xr/vi1qj6QesbyXXViFbpNvWvPNAD62SutwEXavefrLJWj7w==",
-      "dev": true,
-      "dependencies": {
-        "randombytes": "^2.1.0"
-      }
-    },
     "node_modules/set-blocking": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/set-blocking/-/set-blocking-2.0.0.tgz",
       "integrity": "sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw==",
       "dev": true
     },
-    "node_modules/setimmediate": {
-      "version": "1.0.5",
-      "resolved": "https://registry.npmjs.org/setimmediate/-/setimmediate-1.0.5.tgz",
-      "integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==",
-      "dev": true
-    },
-    "node_modules/sha.js": {
-      "version": "2.4.11",
-      "resolved": "https://registry.npmjs.org/sha.js/-/sha.js-2.4.11.tgz",
-      "integrity": "sha512-QMEp5B7cftE7APOjk5Y6xgrbWu+WkLVQwk8JNjZ8nKRciZaByEW6MubieAiToS7+dwvrjGhH8jRXz3MVd0AYqQ==",
-      "dev": true,
-      "dependencies": {
-        "inherits": "^2.0.1",
-        "safe-buffer": "^5.0.1"
-      },
-      "bin": {
-        "sha.js": "bin.js"
-      }
-    },
-    "node_modules/shallow-clone": {
-      "version": "3.0.1",
-      "resolved": "https://registry.npmjs.org/shallow-clone/-/shallow-clone-3.0.1.tgz",
-      "integrity": "sha512-/6KqX+GVUdqPuPPd2LxDDxzX6CAbjJehAAOKlNpqqUpAqPM6HeL8f+o3a+JsyGjn2lv0WY8UsTgUJjU9Ok55NA==",
+    "node_modules/set-function-name": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/set-function-name/-/set-function-name-2.0.1.tgz",
+      "integrity": "sha512-tMNCiqYVkXIZgc2Hnoy2IvC/f8ezc5koaRFkCjrpWzGpCd3qbZXPzVy9MAZzK1ch/X0jvSkojys3oqJN0qCmdA==",
       "dev": true,
       "dependencies": {
-        "kind-of": "^6.0.2"
+        "define-data-property": "^1.0.1",
+        "functions-have-names": "^1.2.3",
+        "has-property-descriptors": "^1.0.0"
       },
       "engines": {
-        "node": ">=8"
+        "node": ">= 0.4"
       }
     },
+    "node_modules/setimmediate": {
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/setimmediate/-/setimmediate-1.0.5.tgz",
+      "integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==",
+      "dev": true
+    },
     "node_modules/shebang-command": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
@@ -5203,46 +4131,13 @@
       "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==",
       "dev": true
     },
-    "node_modules/sirv": {
-      "version": "1.0.19",
-      "resolved": "https://registry.npmjs.org/sirv/-/sirv-1.0.19.tgz",
-      "integrity": "sha512-JuLThK3TnZG1TAKDwNIqNq6QA2afLOCcm+iE8D1Kj3GA40pSPsxQjjJl0J8X3tsR7T+CP1GavpzLwYkgVLWrZQ==",
-      "dev": true,
-      "dependencies": {
-        "@polka/url": "^1.0.0-next.20",
-        "mrmime": "^1.0.0",
-        "totalist": "^1.0.0"
-      },
-      "engines": {
-        "node": ">= 10"
-      }
-    },
     "node_modules/slash": {
       "version": "3.0.0",
       "resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz",
       "integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==",
       "dev": true,
       "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/source-map": {
-      "version": "0.6.1",
-      "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",
-      "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==",
-      "dev": true,
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/source-map-support": {
-      "version": "0.5.21",
-      "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.21.tgz",
-      "integrity": "sha512-uBHU3L3czsIyYXKX88fdrGovxdSCoTGDRZ6SYXtSRxLZUzHg5P/66Ht6uoUlHu9EZod+inXhKo3qQgwXUT/y1w==",
-      "dev": true,
-      "dependencies": {
-        "buffer-from": "^1.0.0",
-        "source-map": "^0.6.0"
+        "node": ">=8"
       }
     },
     "node_modules/spdx-correct": {
@@ -5277,56 +4172,6 @@
       "integrity": "sha512-rr+VVSXtRhO4OHbXUiAF7xW3Bo9DuuF6C5jH+q/x15j2jniycgKbxU09Hr0WqlSLUs4i4ltHGXqTe7VHclYWyA==",
       "dev": true
     },
-    "node_modules/stream-browserify": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/stream-browserify/-/stream-browserify-3.0.0.tgz",
-      "integrity": "sha512-H73RAHsVBapbim0tU2JwwOiXUj+fikfiaoYAKHF3VJfA0pe2BCzkhAHBlLG6REzE+2WNZcxOXjK7lkso+9euLA==",
-      "dev": true,
-      "dependencies": {
-        "inherits": "~2.0.4",
-        "readable-stream": "^3.5.0"
-      }
-    },
-    "node_modules/stream-browserify/node_modules/readable-stream": {
-      "version": "3.6.1",
-      "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.1.tgz",
-      "integrity": "sha512-+rQmrWMYGA90yenhTYsLWAsLsqVC8osOw6PKE1HDYiO0gdPeKe/xDHNzIAIn4C91YQ6oenEhfYqqc1883qHbjQ==",
-      "dev": true,
-      "dependencies": {
-        "inherits": "^2.0.3",
-        "string_decoder": "^1.1.1",
-        "util-deprecate": "^1.0.1"
-      },
-      "engines": {
-        "node": ">= 6"
-      }
-    },
-    "node_modules/stream-http": {
-      "version": "3.2.0",
-      "resolved": "https://registry.npmjs.org/stream-http/-/stream-http-3.2.0.tgz",
-      "integrity": "sha512-Oq1bLqisTyK3TSCXpPbT4sdeYNdmyZJv1LxpEm2vu1ZhK89kSE5YXwZc3cWk0MagGaKriBh9mCFbVGtO+vY29A==",
-      "dev": true,
-      "dependencies": {
-        "builtin-status-codes": "^3.0.0",
-        "inherits": "^2.0.4",
-        "readable-stream": "^3.6.0",
-        "xtend": "^4.0.2"
-      }
-    },
-    "node_modules/stream-http/node_modules/readable-stream": {
-      "version": "3.6.1",
-      "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.1.tgz",
-      "integrity": "sha512-+rQmrWMYGA90yenhTYsLWAsLsqVC8osOw6PKE1HDYiO0gdPeKe/xDHNzIAIn4C91YQ6oenEhfYqqc1883qHbjQ==",
-      "dev": true,
-      "dependencies": {
-        "inherits": "^2.0.3",
-        "string_decoder": "^1.1.1",
-        "util-deprecate": "^1.0.1"
-      },
-      "engines": {
-        "node": ">= 6"
-      }
-    },
     "node_modules/string_decoder": {
       "version": "1.1.1",
       "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz",
@@ -5350,29 +4195,46 @@
         "node": ">=8"
       }
     },
+    "node_modules/string.prototype.trim": {
+      "version": "1.2.8",
+      "resolved": "https://registry.npmjs.org/string.prototype.trim/-/string.prototype.trim-1.2.8.tgz",
+      "integrity": "sha512-lfjY4HcixfQXOfaqCvcBuOIapyaroTXhbkfJN3gcB1OtyupngWK4sEET9Knd0cXd28kTUqu/kHoV4HKSJdnjiQ==",
+      "dev": true,
+      "dependencies": {
+        "call-bind": "^1.0.2",
+        "define-properties": "^1.2.0",
+        "es-abstract": "^1.22.1"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
     "node_modules/string.prototype.trimend": {
-      "version": "1.0.6",
-      "resolved": "https://registry.npmjs.org/string.prototype.trimend/-/string.prototype.trimend-1.0.6.tgz",
-      "integrity": "sha512-JySq+4mrPf9EsDBEDYMOb/lM7XQLulwg5R/m1r0PXEFqrV0qHvl58sdTilSXtKOflCsK2E8jxf+GKC0T07RWwQ==",
+      "version": "1.0.7",
+      "resolved": "https://registry.npmjs.org/string.prototype.trimend/-/string.prototype.trimend-1.0.7.tgz",
+      "integrity": "sha512-Ni79DqeB72ZFq1uH/L6zJ+DKZTkOtPIHovb3YZHQViE+HDouuU4mBrLOLDn5Dde3RF8qw5qVETEjhu9locMLvA==",
       "dev": true,
       "dependencies": {
         "call-bind": "^1.0.2",
-        "define-properties": "^1.1.4",
-        "es-abstract": "^1.20.4"
+        "define-properties": "^1.2.0",
+        "es-abstract": "^1.22.1"
       },
       "funding": {
         "url": "https://github.com/sponsors/ljharb"
       }
     },
     "node_modules/string.prototype.trimstart": {
-      "version": "1.0.6",
-      "resolved": "https://registry.npmjs.org/string.prototype.trimstart/-/string.prototype.trimstart-1.0.6.tgz",
-      "integrity": "sha512-omqjMDaY92pbn5HOX7f9IccLA+U1tA9GvtU4JrodiXFfYB7jPzzHpRzpglLAjtUV6bB557zwClJezTqnAiYnQA==",
+      "version": "1.0.7",
+      "resolved": "https://registry.npmjs.org/string.prototype.trimstart/-/string.prototype.trimstart-1.0.7.tgz",
+      "integrity": "sha512-NGhtDFu3jCEm7B4Fy0DpLewdJQOZcQ0rGbwQ/+stjnrp2i+rlKeCvos9hOIeCmqwratM47OBxY7uFZzjxHXmrg==",
       "dev": true,
       "dependencies": {
         "call-bind": "^1.0.2",
-        "define-properties": "^1.1.4",
-        "es-abstract": "^1.20.4"
+        "define-properties": "^1.2.0",
+        "es-abstract": "^1.22.1"
       },
       "funding": {
         "url": "https://github.com/sponsors/ljharb"
@@ -5447,85 +4309,12 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
-    "node_modules/tapable": {
-      "version": "2.2.1",
-      "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.2.1.tgz",
-      "integrity": "sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ==",
-      "dev": true,
-      "engines": {
-        "node": ">=6"
-      }
-    },
-    "node_modules/terser": {
-      "version": "5.16.5",
-      "resolved": "https://registry.npmjs.org/terser/-/terser-5.16.5.tgz",
-      "integrity": "sha512-qcwfg4+RZa3YvlFh0qjifnzBHjKGNbtDo9yivMqMFDy9Q6FSaQWSB/j1xKhsoUFJIqDOM3TsN6D5xbrMrFcHbg==",
-      "dev": true,
-      "dependencies": {
-        "@jridgewell/source-map": "^0.3.2",
-        "acorn": "^8.5.0",
-        "commander": "^2.20.0",
-        "source-map-support": "~0.5.20"
-      },
-      "bin": {
-        "terser": "bin/terser"
-      },
-      "engines": {
-        "node": ">=10"
-      }
-    },
-    "node_modules/terser-webpack-plugin": {
-      "version": "5.3.6",
-      "resolved": "https://registry.npmjs.org/terser-webpack-plugin/-/terser-webpack-plugin-5.3.6.tgz",
-      "integrity": "sha512-kfLFk+PoLUQIbLmB1+PZDMRSZS99Mp+/MHqDNmMA6tOItzRt+Npe3E+fsMs5mfcM0wCtrrdU387UnV+vnSffXQ==",
-      "dev": true,
-      "dependencies": {
-        "@jridgewell/trace-mapping": "^0.3.14",
-        "jest-worker": "^27.4.5",
-        "schema-utils": "^3.1.1",
-        "serialize-javascript": "^6.0.0",
-        "terser": "^5.14.1"
-      },
-      "engines": {
-        "node": ">= 10.13.0"
-      },
-      "funding": {
-        "type": "opencollective",
-        "url": "https://opencollective.com/webpack"
-      },
-      "peerDependencies": {
-        "webpack": "^5.1.0"
-      },
-      "peerDependenciesMeta": {
-        "@swc/core": {
-          "optional": true
-        },
-        "esbuild": {
-          "optional": true
-        },
-        "uglify-js": {
-          "optional": true
-        }
-      }
-    },
     "node_modules/text-table": {
       "version": "0.2.0",
       "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz",
       "integrity": "sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw==",
       "dev": true
     },
-    "node_modules/timers-browserify": {
-      "version": "2.0.12",
-      "resolved": "https://registry.npmjs.org/timers-browserify/-/timers-browserify-2.0.12.tgz",
-      "integrity": "sha512-9phl76Cqm6FhSX9Xe1ZUAMLtm1BLkKj2Qd5ApyWkXzsMRaA7dgr81kf4wJmQf/hAvg8EEyJxDo3du/0KlhPiKQ==",
-      "dev": true,
-      "dependencies": {
-        "setimmediate": "^1.0.4"
-      },
-      "engines": {
-        "node": ">=0.6.0"
-      }
-    },
     "node_modules/to-regex-range": {
       "version": "5.0.1",
       "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz",
@@ -5538,32 +4327,16 @@
         "node": ">=8.0"
       }
     },
-    "node_modules/totalist": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/totalist/-/totalist-1.1.0.tgz",
-      "integrity": "sha512-gduQwd1rOdDMGxFG1gEvhV88Oirdo2p+KjoYFU7k2g+i7n6AFFbDQ5kMPUsW0pNbfQsB/cwXvT1i4Bue0s9g5g==",
-      "dev": true,
-      "engines": {
-        "node": ">=6"
-      }
-    },
-    "node_modules/ts-loader": {
-      "version": "9.4.2",
-      "resolved": "https://registry.npmjs.org/ts-loader/-/ts-loader-9.4.2.tgz",
-      "integrity": "sha512-OmlC4WVmFv5I0PpaxYb+qGeGOdm5giHU7HwDDUjw59emP2UYMHy9fFSDcYgSNoH8sXcj4hGCSEhlDZ9ULeDraA==",
+    "node_modules/ts-api-utils": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-1.0.3.tgz",
+      "integrity": "sha512-wNMeqtMz5NtwpT/UZGY5alT+VoKdSsOOP/kqHFcUW1P/VRhH2wJ48+DN2WwUliNbQ976ETwDL0Ifd2VVvgonvg==",
       "dev": true,
-      "dependencies": {
-        "chalk": "^4.1.0",
-        "enhanced-resolve": "^5.0.0",
-        "micromatch": "^4.0.0",
-        "semver": "^7.3.4"
-      },
       "engines": {
-        "node": ">=12.0.0"
+        "node": ">=16.13.0"
       },
       "peerDependencies": {
-        "typescript": "*",
-        "webpack": "^5.0.0"
+        "typescript": ">=4.2.0"
       }
     },
     "node_modules/tsconfig-paths": {
@@ -5578,33 +4351,6 @@
         "strip-bom": "^3.0.0"
       }
     },
-    "node_modules/tslib": {
-      "version": "1.14.1",
-      "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz",
-      "integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==",
-      "dev": true
-    },
-    "node_modules/tsutils": {
-      "version": "3.21.0",
-      "resolved": "https://registry.npmjs.org/tsutils/-/tsutils-3.21.0.tgz",
-      "integrity": "sha512-mHKK3iUXL+3UF6xL5k0PEhKRUBKPBCv/+RkEOpjRWxxx27KKRBmmA60A9pgOUvMi8GKhRMPEmjBRPzs2W7O1OA==",
-      "dev": true,
-      "dependencies": {
-        "tslib": "^1.8.1"
-      },
-      "engines": {
-        "node": ">= 6"
-      },
-      "peerDependencies": {
-        "typescript": ">=2.8.0 || >= 3.2.0-dev || >= 3.3.0-dev || >= 3.4.0-dev || >= 3.5.0-dev || >= 3.6.0-dev || >= 3.6.0-beta || >= 3.7.0-dev || >= 3.7.0-beta"
-      }
-    },
-    "node_modules/tty-browserify": {
-      "version": "0.0.1",
-      "resolved": "https://registry.npmjs.org/tty-browserify/-/tty-browserify-0.0.1.tgz",
-      "integrity": "sha512-C3TaO7K81YvjCgQH9Q1S3R3P3BtN3RIM8n+OvX4il1K1zgE8ZhI0op7kClgkxtutIE8hQrcrHBXvIheqKUUCxw==",
-      "dev": true
-    },
     "node_modules/type-check": {
       "version": "0.4.0",
       "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz",
@@ -5629,6 +4375,57 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
+    "node_modules/typed-array-buffer": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/typed-array-buffer/-/typed-array-buffer-1.0.0.tgz",
+      "integrity": "sha512-Y8KTSIglk9OZEr8zywiIHG/kmQ7KWyjseXs1CbSo8vC42w7hg2HgYTxSWwP0+is7bWDc1H+Fo026CpHFwm8tkw==",
+      "dev": true,
+      "dependencies": {
+        "call-bind": "^1.0.2",
+        "get-intrinsic": "^1.2.1",
+        "is-typed-array": "^1.1.10"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/typed-array-byte-length": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/typed-array-byte-length/-/typed-array-byte-length-1.0.0.tgz",
+      "integrity": "sha512-Or/+kvLxNpeQ9DtSydonMxCx+9ZXOswtwJn17SNLvhptaXYDJvkFFP5zbfU/uLmvnBJlI4yrnXRxpdWH/M5tNA==",
+      "dev": true,
+      "dependencies": {
+        "call-bind": "^1.0.2",
+        "for-each": "^0.3.3",
+        "has-proto": "^1.0.1",
+        "is-typed-array": "^1.1.10"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/typed-array-byte-offset": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/typed-array-byte-offset/-/typed-array-byte-offset-1.0.0.tgz",
+      "integrity": "sha512-RD97prjEt9EL8YgAgpOkf3O4IF9lhJFr9g0htQkm0rchFp/Vx7LW5Q8fSXXub7BXAODyUQohRMyOc3faCPd0hg==",
+      "dev": true,
+      "dependencies": {
+        "available-typed-arrays": "^1.0.5",
+        "call-bind": "^1.0.2",
+        "for-each": "^0.3.3",
+        "has-proto": "^1.0.1",
+        "is-typed-array": "^1.1.10"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
     "node_modules/typed-array-length": {
       "version": "1.0.4",
       "resolved": "https://registry.npmjs.org/typed-array-length/-/typed-array-length-1.0.4.tgz",
@@ -5644,16 +4441,16 @@
       }
     },
     "node_modules/typescript": {
-      "version": "4.9.5",
-      "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.9.5.tgz",
-      "integrity": "sha512-1FXk9E2Hm+QzZQ7z+McJiHL4NW1F2EzMu9Nq9i3zAaGqibafqYwCVU6WyWAuyQRRzOlxou8xZSyXLEN8oKj24g==",
+      "version": "5.2.2",
+      "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.2.2.tgz",
+      "integrity": "sha512-mI4WrpHsbCIcwT9cF4FZvr80QUeKvsUsUvKDoR+X/7XHQH98xYD8YHZg7ANtz2GtZt/CBq2QJ0thkGJMHfqc1w==",
       "dev": true,
       "bin": {
         "tsc": "bin/tsc",
         "tsserver": "bin/tsserver"
       },
       "engines": {
-        "node": ">=4.2.0"
+        "node": ">=14.17"
       }
     },
     "node_modules/unbox-primitive": {
@@ -5680,32 +4477,6 @@
         "node": ">= 10.0.0"
       }
     },
-    "node_modules/update-browserslist-db": {
-      "version": "1.0.10",
-      "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.10.tgz",
-      "integrity": "sha512-OztqDenkfFkbSG+tRxBeAnCVPckDBcvibKd35yDONx6OU8N7sqgwc7rCbkJ/WcYtVRZ4ba68d6byhC21GFh7sQ==",
-      "dev": true,
-      "funding": [
-        {
-          "type": "opencollective",
-          "url": "https://opencollective.com/browserslist"
-        },
-        {
-          "type": "tidelift",
-          "url": "https://tidelift.com/funding/github/npm/browserslist"
-        }
-      ],
-      "dependencies": {
-        "escalade": "^3.1.1",
-        "picocolors": "^1.0.0"
-      },
-      "bin": {
-        "browserslist-lint": "cli.js"
-      },
-      "peerDependencies": {
-        "browserslist": ">= 4.21.0"
-      }
-    },
     "node_modules/uri-js": {
       "version": "4.4.1",
       "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz",
@@ -5715,35 +4486,6 @@
         "punycode": "^2.1.0"
       }
     },
-    "node_modules/url": {
-      "version": "0.11.0",
-      "resolved": "https://registry.npmjs.org/url/-/url-0.11.0.tgz",
-      "integrity": "sha512-kbailJa29QrtXnxgq+DdCEGlbTeYM2eJUxsz6vjZavrCYPMIFHMKQmSKYAIuUK2i7hgPm28a8piX5NTUtM/LKQ==",
-      "dev": true,
-      "dependencies": {
-        "punycode": "1.3.2",
-        "querystring": "0.2.0"
-      }
-    },
-    "node_modules/url/node_modules/punycode": {
-      "version": "1.3.2",
-      "resolved": "https://registry.npmjs.org/punycode/-/punycode-1.3.2.tgz",
-      "integrity": "sha512-RofWgt/7fL5wP1Y7fxE7/EmTLzQVnB0ycyibJ0OOHIlJqTNzglYFxVwETOcIoJqJmpDXJ9xImDv+Fq34F/d4Dw==",
-      "dev": true
-    },
-    "node_modules/util": {
-      "version": "0.12.5",
-      "resolved": "https://registry.npmjs.org/util/-/util-0.12.5.tgz",
-      "integrity": "sha512-kZf/K6hEIrWHI6XqOFUiiMa+79wE/D8Q+NCNAWclkyg3b4d2k7s0QGepNjiABc+aR3N1PAyHL7p6UcLY6LmrnA==",
-      "dev": true,
-      "dependencies": {
-        "inherits": "^2.0.3",
-        "is-arguments": "^1.0.4",
-        "is-generator-function": "^1.0.7",
-        "is-typed-array": "^1.1.3",
-        "which-typed-array": "^1.1.2"
-      }
-    },
     "node_modules/util-deprecate": {
       "version": "1.0.2",
       "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
@@ -5753,186 +4495,11 @@
     "node_modules/validate-npm-package-license": {
       "version": "3.0.4",
       "resolved": "https://registry.npmjs.org/validate-npm-package-license/-/validate-npm-package-license-3.0.4.tgz",
-      "integrity": "sha512-DpKm2Ui/xN7/HQKCtpZxoRWBhZ9Z0kqtygG8XCgNQ8ZlDnxuQmWhj566j8fN4Cu3/JmbhsDo7fcAJq4s9h27Ew==",
-      "dev": true,
-      "dependencies": {
-        "spdx-correct": "^3.0.0",
-        "spdx-expression-parse": "^3.0.0"
-      }
-    },
-    "node_modules/vm-browserify": {
-      "version": "1.1.2",
-      "resolved": "https://registry.npmjs.org/vm-browserify/-/vm-browserify-1.1.2.tgz",
-      "integrity": "sha512-2ham8XPWTONajOR0ohOKOHXkm3+gaBmGut3SRuu75xLd/RRaY6vqgh8NBYYk7+RW3u5AtzPQZG8F10LHkl0lAQ==",
-      "dev": true
-    },
-    "node_modules/watchpack": {
-      "version": "2.4.0",
-      "resolved": "https://registry.npmjs.org/watchpack/-/watchpack-2.4.0.tgz",
-      "integrity": "sha512-Lcvm7MGST/4fup+ifyKi2hjyIAwcdI4HRgtvTpIUxBRhB+RFtUh8XtDOxUfctVCnhVi+QQj49i91OyvzkJl6cg==",
-      "dev": true,
-      "dependencies": {
-        "glob-to-regexp": "^0.4.1",
-        "graceful-fs": "^4.1.2"
-      },
-      "engines": {
-        "node": ">=10.13.0"
-      }
-    },
-    "node_modules/webpack": {
-      "version": "5.76.0",
-      "resolved": "https://registry.npmjs.org/webpack/-/webpack-5.76.0.tgz",
-      "integrity": "sha512-l5sOdYBDunyf72HW8dF23rFtWq/7Zgvt/9ftMof71E/yUb1YLOBmTgA2K4vQthB3kotMrSj609txVE0dnr2fjA==",
-      "dev": true,
-      "dependencies": {
-        "@types/eslint-scope": "^3.7.3",
-        "@types/estree": "^0.0.51",
-        "@webassemblyjs/ast": "1.11.1",
-        "@webassemblyjs/wasm-edit": "1.11.1",
-        "@webassemblyjs/wasm-parser": "1.11.1",
-        "acorn": "^8.7.1",
-        "acorn-import-assertions": "^1.7.6",
-        "browserslist": "^4.14.5",
-        "chrome-trace-event": "^1.0.2",
-        "enhanced-resolve": "^5.10.0",
-        "es-module-lexer": "^0.9.0",
-        "eslint-scope": "5.1.1",
-        "events": "^3.2.0",
-        "glob-to-regexp": "^0.4.1",
-        "graceful-fs": "^4.2.9",
-        "json-parse-even-better-errors": "^2.3.1",
-        "loader-runner": "^4.2.0",
-        "mime-types": "^2.1.27",
-        "neo-async": "^2.6.2",
-        "schema-utils": "^3.1.0",
-        "tapable": "^2.1.1",
-        "terser-webpack-plugin": "^5.1.3",
-        "watchpack": "^2.4.0",
-        "webpack-sources": "^3.2.3"
-      },
-      "bin": {
-        "webpack": "bin/webpack.js"
-      },
-      "engines": {
-        "node": ">=10.13.0"
-      },
-      "funding": {
-        "type": "opencollective",
-        "url": "https://opencollective.com/webpack"
-      },
-      "peerDependenciesMeta": {
-        "webpack-cli": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/webpack-bundle-analyzer": {
-      "version": "4.8.0",
-      "resolved": "https://registry.npmjs.org/webpack-bundle-analyzer/-/webpack-bundle-analyzer-4.8.0.tgz",
-      "integrity": "sha512-ZzoSBePshOKhr+hd8u6oCkZVwpVaXgpw23ScGLFpR6SjYI7+7iIWYarjN6OEYOfRt8o7ZyZZQk0DuMizJ+LEIg==",
-      "dev": true,
-      "dependencies": {
-        "@discoveryjs/json-ext": "0.5.7",
-        "acorn": "^8.0.4",
-        "acorn-walk": "^8.0.0",
-        "chalk": "^4.1.0",
-        "commander": "^7.2.0",
-        "gzip-size": "^6.0.0",
-        "lodash": "^4.17.20",
-        "opener": "^1.5.2",
-        "sirv": "^1.0.7",
-        "ws": "^7.3.1"
-      },
-      "bin": {
-        "webpack-bundle-analyzer": "lib/bin/analyzer.js"
-      },
-      "engines": {
-        "node": ">= 10.13.0"
-      }
-    },
-    "node_modules/webpack-bundle-analyzer/node_modules/commander": {
-      "version": "7.2.0",
-      "resolved": "https://registry.npmjs.org/commander/-/commander-7.2.0.tgz",
-      "integrity": "sha512-QrWXB+ZQSVPmIWIhtEO9H+gwHaMGYiF5ChvoJ+K9ZGHG/sVsa6yiesAD1GC/x46sET00Xlwo1u49RVVVzvcSkw==",
-      "dev": true,
-      "engines": {
-        "node": ">= 10"
-      }
-    },
-    "node_modules/webpack-cli": {
-      "version": "5.0.1",
-      "resolved": "https://registry.npmjs.org/webpack-cli/-/webpack-cli-5.0.1.tgz",
-      "integrity": "sha512-S3KVAyfwUqr0Mo/ur3NzIp6jnerNpo7GUO6so51mxLi1spqsA17YcMXy0WOIJtBSnj748lthxC6XLbNKh/ZC+A==",
-      "dev": true,
-      "dependencies": {
-        "@discoveryjs/json-ext": "^0.5.0",
-        "@webpack-cli/configtest": "^2.0.1",
-        "@webpack-cli/info": "^2.0.1",
-        "@webpack-cli/serve": "^2.0.1",
-        "colorette": "^2.0.14",
-        "commander": "^9.4.1",
-        "cross-spawn": "^7.0.3",
-        "envinfo": "^7.7.3",
-        "fastest-levenshtein": "^1.0.12",
-        "import-local": "^3.0.2",
-        "interpret": "^3.1.1",
-        "rechoir": "^0.8.0",
-        "webpack-merge": "^5.7.3"
-      },
-      "bin": {
-        "webpack-cli": "bin/cli.js"
-      },
-      "engines": {
-        "node": ">=14.15.0"
-      },
-      "funding": {
-        "type": "opencollective",
-        "url": "https://opencollective.com/webpack"
-      },
-      "peerDependencies": {
-        "webpack": "5.x.x"
-      },
-      "peerDependenciesMeta": {
-        "@webpack-cli/generators": {
-          "optional": true
-        },
-        "webpack-bundle-analyzer": {
-          "optional": true
-        },
-        "webpack-dev-server": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/webpack-cli/node_modules/commander": {
-      "version": "9.5.0",
-      "resolved": "https://registry.npmjs.org/commander/-/commander-9.5.0.tgz",
-      "integrity": "sha512-KRs7WVDKg86PWiuAqhDrAQnTXZKraVcCc6vFdL14qrZ/DcWwuRo7VoiYXalXO7S5GKpqYiVEwCbgFDfxNHKJBQ==",
-      "dev": true,
-      "engines": {
-        "node": "^12.20.0 || >=14"
-      }
-    },
-    "node_modules/webpack-merge": {
-      "version": "5.8.0",
-      "resolved": "https://registry.npmjs.org/webpack-merge/-/webpack-merge-5.8.0.tgz",
-      "integrity": "sha512-/SaI7xY0831XwP6kzuwhKWVKDP9t1QY1h65lAFLbZqMPIuYcD9QAW4u9STIbU9kaJbPBB/geU/gLr1wDjOhQ+Q==",
-      "dev": true,
-      "dependencies": {
-        "clone-deep": "^4.0.1",
-        "wildcard": "^2.0.0"
-      },
-      "engines": {
-        "node": ">=10.0.0"
-      }
-    },
-    "node_modules/webpack-sources": {
-      "version": "3.2.3",
-      "resolved": "https://registry.npmjs.org/webpack-sources/-/webpack-sources-3.2.3.tgz",
-      "integrity": "sha512-/DyMEOrDgLKKIG0fmvtz+4dUX/3Ghozwgm6iPp8KRhvn+eQf9+Q7GWxVNMk3+uCPWfdXYC4ExGBckIXdFEfH1w==",
+      "integrity": "sha512-DpKm2Ui/xN7/HQKCtpZxoRWBhZ9Z0kqtygG8XCgNQ8ZlDnxuQmWhj566j8fN4Cu3/JmbhsDo7fcAJq4s9h27Ew==",
       "dev": true,
-      "engines": {
-        "node": ">=10.13.0"
+      "dependencies": {
+        "spdx-correct": "^3.0.0",
+        "spdx-expression-parse": "^3.0.0"
       }
     },
     "node_modules/which": {
@@ -5967,17 +4534,16 @@
       }
     },
     "node_modules/which-typed-array": {
-      "version": "1.1.9",
-      "resolved": "https://registry.npmjs.org/which-typed-array/-/which-typed-array-1.1.9.tgz",
-      "integrity": "sha512-w9c4xkx6mPidwp7180ckYWfMmvxpjlZuIudNtDf4N/tTAUB8VJbX25qZoAsrtGuYNnGw3pa0AXgbGKRB8/EceA==",
+      "version": "1.1.11",
+      "resolved": "https://registry.npmjs.org/which-typed-array/-/which-typed-array-1.1.11.tgz",
+      "integrity": "sha512-qe9UWWpkeG5yzZ0tNYxDmd7vo58HDBc39mZ0xWWpolAGADdFOzkfamWLDxkOWcvHQKVmdTyQdLD4NOfjLWTKew==",
       "dev": true,
       "dependencies": {
         "available-typed-arrays": "^1.0.5",
         "call-bind": "^1.0.2",
         "for-each": "^0.3.3",
         "gopd": "^1.0.1",
-        "has-tostringtag": "^1.0.0",
-        "is-typed-array": "^1.1.10"
+        "has-tostringtag": "^1.0.0"
       },
       "engines": {
         "node": ">= 0.4"
@@ -5995,41 +4561,6 @@
         "string-width": "^1.0.2 || 2 || 3 || 4"
       }
     },
-    "node_modules/wildcard": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/wildcard/-/wildcard-2.0.0.tgz",
-      "integrity": "sha512-JcKqAHLPxcdb9KM49dufGXn2x3ssnfjbcaQdLlfZsL9rH9wgDQjUtDxbo8NE0F6SFvydeu1VhZe7hZuHsB2/pw==",
-      "dev": true
-    },
-    "node_modules/word-wrap": {
-      "version": "1.2.4",
-      "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.4.tgz",
-      "integrity": "sha512-2V81OA4ugVo5pRo46hAoD2ivUJx8jXmWXfUkY4KFNw0hEptvN0QfH3K4nHiwzGeKl5rFKedV48QVoqYavy4YpA==",
-      "dev": true,
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/worker-loader": {
-      "version": "3.0.8",
-      "resolved": "https://registry.npmjs.org/worker-loader/-/worker-loader-3.0.8.tgz",
-      "integrity": "sha512-XQyQkIFeRVC7f7uRhFdNMe/iJOdO6zxAaR3EWbDp45v3mDhrTi+++oswKNxShUNjPC/1xUp5DB29YKLhFo129g==",
-      "dev": true,
-      "dependencies": {
-        "loader-utils": "^2.0.0",
-        "schema-utils": "^3.0.0"
-      },
-      "engines": {
-        "node": ">= 10.13.0"
-      },
-      "funding": {
-        "type": "opencollective",
-        "url": "https://opencollective.com/webpack"
-      },
-      "peerDependencies": {
-        "webpack": "^4.0.0 || ^5.0.0"
-      }
-    },
     "node_modules/workerpool": {
       "version": "6.2.1",
       "resolved": "https://registry.npmjs.org/workerpool/-/workerpool-6.2.1.tgz",
@@ -6059,36 +4590,6 @@
       "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
       "dev": true
     },
-    "node_modules/ws": {
-      "version": "7.5.9",
-      "resolved": "https://registry.npmjs.org/ws/-/ws-7.5.9.tgz",
-      "integrity": "sha512-F+P9Jil7UiSKSkppIiD94dN07AwvFixvLIj1Og1Rl9GGMuNipJnV9JzjD6XuqmAeiswGvUmNLjr5cFuXwNS77Q==",
-      "dev": true,
-      "engines": {
-        "node": ">=8.3.0"
-      },
-      "peerDependencies": {
-        "bufferutil": "^4.0.1",
-        "utf-8-validate": "^5.0.2"
-      },
-      "peerDependenciesMeta": {
-        "bufferutil": {
-          "optional": true
-        },
-        "utf-8-validate": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/xtend": {
-      "version": "4.0.2",
-      "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz",
-      "integrity": "sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==",
-      "dev": true,
-      "engines": {
-        "node": ">=0.4"
-      }
-    },
     "node_modules/y18n": {
       "version": "5.0.8",
       "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
@@ -6160,6 +4661,12 @@
     }
   },
   "dependencies": {
+    "@aashutoshrathi/word-wrap": {
+      "version": "1.2.6",
+      "resolved": "https://registry.npmjs.org/@aashutoshrathi/word-wrap/-/word-wrap-1.2.6.tgz",
+      "integrity": "sha512-1Yjs2SvM8TflER/OD3cOjhWWOZb58A2t7wpE2S9XfBYTiIl+XFhQG2bjy4Pu1I+EAlCNUzRDYDdFwFYUKvXcIA==",
+      "dev": true
+    },
     "@babel/code-frame": {
       "version": "7.18.6",
       "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.18.6.tgz",
@@ -6170,9 +4677,9 @@
       }
     },
     "@babel/helper-validator-identifier": {
-      "version": "7.19.1",
-      "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.19.1.tgz",
-      "integrity": "sha512-awrNfaMtnHUr653GgGEs++LlAvW6w+DcPrOliSMXWCKo597CwL5Acf/wWdNkf/tfEQE3mjkeD1YOVZOUV/od1w==",
+      "version": "7.22.20",
+      "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.22.20.tgz",
+      "integrity": "sha512-Y4OZ+ytlatR8AI+8KZfKuL5urKp7qey08ha31L8b3BwewJAoJamTzyvxPR/5D+KkdJCGPq/+8TukHBlY10FX9A==",
       "dev": true
     },
     "@babel/highlight": {
@@ -6244,41 +4751,195 @@
         }
       }
     },
-    "@discoveryjs/json-ext": {
-      "version": "0.5.7",
-      "resolved": "https://registry.npmjs.org/@discoveryjs/json-ext/-/json-ext-0.5.7.tgz",
-      "integrity": "sha512-dBVuXR082gk3jsFp7Rd/JI4kytwGHecnCoTtXFb7DB6CNHp4rg5k1bhg0nWdLGLnOV71lmDzGQaLMy8iPLY0pw==",
-      "dev": true
-    },
     "@es-joy/jsdoccomment": {
-      "version": "0.36.1",
-      "resolved": "https://registry.npmjs.org/@es-joy/jsdoccomment/-/jsdoccomment-0.36.1.tgz",
-      "integrity": "sha512-922xqFsTpHs6D0BUiG4toiyPOMc8/jafnWKxz1KWgS4XzKPy2qXf1Pe6UFuNSCQqt6tOuhAWXBNuuyUhJmw9Vg==",
+      "version": "0.40.1",
+      "resolved": "https://registry.npmjs.org/@es-joy/jsdoccomment/-/jsdoccomment-0.40.1.tgz",
+      "integrity": "sha512-YORCdZSusAlBrFpZ77pJjc5r1bQs5caPWtAu+WWmiSo+8XaUzseapVrfAtiRFbQWnrBxxLLEwF6f6ZG/UgCQCg==",
       "dev": true,
       "requires": {
-        "comment-parser": "1.3.1",
-        "esquery": "^1.4.0",
-        "jsdoc-type-pratt-parser": "~3.1.0"
+        "comment-parser": "1.4.0",
+        "esquery": "^1.5.0",
+        "jsdoc-type-pratt-parser": "~4.0.0"
       }
     },
+    "@esbuild/android-arm": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.19.3.tgz",
+      "integrity": "sha512-Lemgw4io4VZl9GHJmjiBGzQ7ONXRfRPHcUEerndjwiSkbxzrpq0Uggku5MxxrXdwJ+pTj1qyw4jwTu7hkPsgIA==",
+      "dev": true,
+      "optional": true
+    },
+    "@esbuild/android-arm64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.19.3.tgz",
+      "integrity": "sha512-w+Akc0vv5leog550kjJV9Ru+MXMR2VuMrui3C61mnysim0gkFCPOUTAfzTP0qX+HpN9Syu3YA3p1hf3EPqObRw==",
+      "dev": true,
+      "optional": true
+    },
+    "@esbuild/android-x64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.19.3.tgz",
+      "integrity": "sha512-FKQJKkK5MXcBHoNZMDNUAg1+WcZlV/cuXrWCoGF/TvdRiYS4znA0m5Il5idUwfxrE20bG/vU1Cr5e1AD6IEIjQ==",
+      "dev": true,
+      "optional": true
+    },
+    "@esbuild/darwin-arm64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.19.3.tgz",
+      "integrity": "sha512-kw7e3FXU+VsJSSSl2nMKvACYlwtvZB8RUIeVShIEY6PVnuZ3c9+L9lWB2nWeeKWNNYDdtL19foCQ0ZyUL7nqGw==",
+      "dev": true,
+      "optional": true
+    },
+    "@esbuild/darwin-x64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.19.3.tgz",
+      "integrity": "sha512-tPfZiwF9rO0jW6Jh9ipi58N5ZLoSjdxXeSrAYypy4psA2Yl1dAMhM71KxVfmjZhJmxRjSnb29YlRXXhh3GqzYw==",
+      "dev": true,
+      "optional": true
+    },
+    "@esbuild/freebsd-arm64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.19.3.tgz",
+      "integrity": "sha512-ERDyjOgYeKe0Vrlr1iLrqTByB026YLPzTytDTz1DRCYM+JI92Dw2dbpRHYmdqn6VBnQ9Bor6J8ZlNwdZdxjlSg==",
+      "dev": true,
+      "optional": true
+    },
+    "@esbuild/freebsd-x64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.19.3.tgz",
+      "integrity": "sha512-nXesBZ2Ad1qL+Rm3crN7NmEVJ5uvfLFPLJev3x1j3feCQXfAhoYrojC681RhpdOph8NsvKBBwpYZHR7W0ifTTA==",
+      "dev": true,
+      "optional": true
+    },
+    "@esbuild/linux-arm": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.19.3.tgz",
+      "integrity": "sha512-zr48Cg/8zkzZCzDHNxXO/89bf9e+r4HtzNUPoz4GmgAkF1gFAFmfgOdCbR8zMbzFDGb1FqBBhdXUpcTQRYS1cQ==",
+      "dev": true,
+      "optional": true
+    },
+    "@esbuild/linux-arm64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.19.3.tgz",
+      "integrity": "sha512-qXvYKmXj8GcJgWq3aGvxL/JG1ZM3UR272SdPU4QSTzD0eymrM7leiZH77pvY3UetCy0k1xuXZ+VPvoJNdtrsWQ==",
+      "dev": true,
+      "optional": true
+    },
+    "@esbuild/linux-ia32": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.19.3.tgz",
+      "integrity": "sha512-7XlCKCA0nWcbvYpusARWkFjRQNWNGlt45S+Q18UeS///K6Aw8bB2FKYe9mhVWy/XLShvCweOLZPrnMswIaDXQA==",
+      "dev": true,
+      "optional": true
+    },
+    "@esbuild/linux-loong64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.19.3.tgz",
+      "integrity": "sha512-qGTgjweER5xqweiWtUIDl9OKz338EQqCwbS9c2Bh5jgEH19xQ1yhgGPNesugmDFq+UUSDtWgZ264st26b3de8A==",
+      "dev": true,
+      "optional": true
+    },
+    "@esbuild/linux-mips64el": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.19.3.tgz",
+      "integrity": "sha512-gy1bFskwEyxVMFRNYSvBauDIWNggD6pyxUksc0MV9UOBD138dKTzr8XnM2R4mBsHwVzeuIH8X5JhmNs2Pzrx+A==",
+      "dev": true,
+      "optional": true
+    },
+    "@esbuild/linux-ppc64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.19.3.tgz",
+      "integrity": "sha512-UrYLFu62x1MmmIe85rpR3qou92wB9lEXluwMB/STDzPF9k8mi/9UvNsG07Tt9AqwPQXluMQ6bZbTzYt01+Ue5g==",
+      "dev": true,
+      "optional": true
+    },
+    "@esbuild/linux-riscv64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.19.3.tgz",
+      "integrity": "sha512-9E73TfyMCbE+1AwFOg3glnzZ5fBAFK4aawssvuMgCRqCYzE0ylVxxzjEfut8xjmKkR320BEoMui4o/t9KA96gA==",
+      "dev": true,
+      "optional": true
+    },
+    "@esbuild/linux-s390x": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.19.3.tgz",
+      "integrity": "sha512-LlmsbuBdm1/D66TJ3HW6URY8wO6IlYHf+ChOUz8SUAjVTuaisfuwCOAgcxo3Zsu3BZGxmI7yt//yGOxV+lHcEA==",
+      "dev": true,
+      "optional": true
+    },
+    "@esbuild/linux-x64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.19.3.tgz",
+      "integrity": "sha512-ogV0+GwEmvwg/8ZbsyfkYGaLACBQWDvO0Kkh8LKBGKj9Ru8VM39zssrnu9Sxn1wbapA2qNS6BiLdwJZGouyCwQ==",
+      "dev": true,
+      "optional": true
+    },
+    "@esbuild/netbsd-x64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.19.3.tgz",
+      "integrity": "sha512-o1jLNe4uzQv2DKXMlmEzf66Wd8MoIhLNO2nlQBHLtWyh2MitDG7sMpfCO3NTcoTMuqHjfufgUQDFRI5C+xsXQw==",
+      "dev": true,
+      "optional": true
+    },
+    "@esbuild/openbsd-x64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.19.3.tgz",
+      "integrity": "sha512-AZJCnr5CZgZOdhouLcfRdnk9Zv6HbaBxjcyhq0StNcvAdVZJSKIdOiPB9az2zc06ywl0ePYJz60CjdKsQacp5Q==",
+      "dev": true,
+      "optional": true
+    },
+    "@esbuild/sunos-x64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.19.3.tgz",
+      "integrity": "sha512-Acsujgeqg9InR4glTRvLKGZ+1HMtDm94ehTIHKhJjFpgVzZG9/pIcWW/HA/DoMfEyXmANLDuDZ2sNrWcjq1lxw==",
+      "dev": true,
+      "optional": true
+    },
+    "@esbuild/win32-arm64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.19.3.tgz",
+      "integrity": "sha512-FSrAfjVVy7TifFgYgliiJOyYynhQmqgPj15pzLyJk8BUsnlWNwP/IAy6GAiB1LqtoivowRgidZsfpoYLZH586A==",
+      "dev": true,
+      "optional": true
+    },
+    "@esbuild/win32-ia32": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.19.3.tgz",
+      "integrity": "sha512-xTScXYi12xLOWZ/sc5RBmMN99BcXp/eEf7scUC0oeiRoiT5Vvo9AycuqCp+xdpDyAU+LkrCqEpUS9fCSZF8J3Q==",
+      "dev": true,
+      "optional": true
+    },
+    "@esbuild/win32-x64": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.19.3.tgz",
+      "integrity": "sha512-FbUN+0ZRXsypPyWE2IwIkVjDkDnJoMJARWOcFZn4KPPli+QnKqF0z1anvfaYe3ev5HFCpRDLLBDHyOALLppWHw==",
+      "dev": true,
+      "optional": true
+    },
     "@eslint-community/eslint-utils": {
-      "version": "4.2.0",
-      "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.2.0.tgz",
-      "integrity": "sha512-gB8T4H4DEfX2IV9zGDJPOBgP1e/DbfCPDTtEqUMckpvzS1OYtva8JdFYBqMwYk7xAQ429WGF/UPqn8uQ//h2vQ==",
+      "version": "4.4.0",
+      "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.4.0.tgz",
+      "integrity": "sha512-1/sA4dwrzBAyeUoQ6oxahHKmrZvsnLCg4RfxW3ZFGGmQkSNQPFNLV9CUEFQP1x9EYXHTo5p6xdhZM1Ne9p/AfA==",
       "dev": true,
       "requires": {
         "eslint-visitor-keys": "^3.3.0"
       }
     },
+    "@eslint-community/regexpp": {
+      "version": "4.9.1",
+      "resolved": "https://registry.npmjs.org/@eslint-community/regexpp/-/regexpp-4.9.1.tgz",
+      "integrity": "sha512-Y27x+MBLjXa+0JWDhykM3+JE+il3kHKAEqabfEWq3SDhZjLYb6/BHL/JKFnH3fe207JaXkyDo685Oc2Glt6ifA==",
+      "dev": true
+    },
     "@eslint/eslintrc": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-2.0.0.tgz",
-      "integrity": "sha512-fluIaaV+GyV24CCu/ggiHdV+j4RNh85yQnAYS/G2mZODZgGmmlrgCydjUcV3YvxCm9x8nMAfThsqTni4KiXT4A==",
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-2.1.2.tgz",
+      "integrity": "sha512-+wvgpDsrB1YqAMdEUCcnTlpfVBH7Vqn6A/NT3D8WVXFIaKMlErPIZT3oCIAVCOtarRpMtelZLqJeU3t7WY6X6g==",
       "dev": true,
       "requires": {
         "ajv": "^6.12.4",
         "debug": "^4.3.2",
-        "espree": "^9.4.0",
+        "espree": "^9.6.0",
         "globals": "^13.19.0",
         "ignore": "^5.2.0",
         "import-fresh": "^3.2.1",
@@ -6288,15 +4949,15 @@
       }
     },
     "@eslint/js": {
-      "version": "8.35.0",
-      "resolved": "https://registry.npmjs.org/@eslint/js/-/js-8.35.0.tgz",
-      "integrity": "sha512-JXdzbRiWclLVoD8sNUjR443VVlYqiYmDVT6rGUEIEHU5YJW0gaVZwV2xgM7D4arkvASqD0IlLUVjHiFuxaftRw==",
+      "version": "8.51.0",
+      "resolved": "https://registry.npmjs.org/@eslint/js/-/js-8.51.0.tgz",
+      "integrity": "sha512-HxjQ8Qn+4SI3/AFv6sOrDB+g6PpUTDwSJiQqOrnneEk8L71161srI9gjzzZvYVbzHiVg/BvcH95+cK/zfIt4pg==",
       "dev": true
     },
     "@humanwhocodes/config-array": {
-      "version": "0.11.8",
-      "resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.11.8.tgz",
-      "integrity": "sha512-UybHIJzJnR5Qc/MsD9Kr+RpO2h+/P1GhOwdiLPXK5TWk5sgTdu88bTD9UP+CKbPPh5Rni1u0GjAdYQLemG8g+g==",
+      "version": "0.11.11",
+      "resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.11.11.tgz",
+      "integrity": "sha512-N2brEuAadi0CcdeMXUkhbZB84eskAc8MEX1By6qEchoVywSgXPIjou4rYsl0V3Hj0ZnuGycGCjdNgockbzeWNA==",
       "dev": true,
       "requires": {
         "@humanwhocodes/object-schema": "^1.2.1",
@@ -6316,55 +4977,12 @@
       "integrity": "sha512-ZnQMnLV4e7hDlUvw8H+U8ASL02SS2Gn6+9Ac3wGGLIe7+je2AeAOxPY+izIPJDfFDb7eDjev0Us8MO1iFRN8hA==",
       "dev": true
     },
-    "@jridgewell/gen-mapping": {
-      "version": "0.3.2",
-      "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.2.tgz",
-      "integrity": "sha512-mh65xKQAzI6iBcFzwv28KVWSmCkdRBWoOh+bYQGW3+6OZvbbN3TqMGo5hqYxQniRcH9F2VZIoJCm4pa3BPDK/A==",
-      "dev": true,
-      "requires": {
-        "@jridgewell/set-array": "^1.0.1",
-        "@jridgewell/sourcemap-codec": "^1.4.10",
-        "@jridgewell/trace-mapping": "^0.3.9"
-      }
-    },
-    "@jridgewell/resolve-uri": {
-      "version": "3.1.0",
-      "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.0.tgz",
-      "integrity": "sha512-F2msla3tad+Mfht5cJq7LSXcdudKTWCVYUgw6pLFOOHSTtZlj6SWNYAp+AhuqLmWdBO2X5hPrLcu8cVP8fy28w==",
-      "dev": true
-    },
-    "@jridgewell/set-array": {
-      "version": "1.1.2",
-      "resolved": "https://registry.npmjs.org/@jridgewell/set-array/-/set-array-1.1.2.tgz",
-      "integrity": "sha512-xnkseuNADM0gt2bs+BvhO0p78Mk762YnZdsuzFV018NoG1Sj1SCQvpSqa7XUaTam5vAGasABV9qXASMKnFMwMw==",
-      "dev": true
-    },
-    "@jridgewell/source-map": {
-      "version": "0.3.2",
-      "resolved": "https://registry.npmjs.org/@jridgewell/source-map/-/source-map-0.3.2.tgz",
-      "integrity": "sha512-m7O9o2uR8k2ObDysZYzdfhb08VuEml5oWGiosa1VdaPZ/A6QyPkAJuwN0Q1lhULOf6B7MtQmHENS743hWtCrgw==",
-      "dev": true,
-      "requires": {
-        "@jridgewell/gen-mapping": "^0.3.0",
-        "@jridgewell/trace-mapping": "^0.3.9"
-      }
-    },
-    "@jridgewell/sourcemap-codec": {
-      "version": "1.4.14",
-      "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.14.tgz",
-      "integrity": "sha512-XPSJHWmi394fuUuzDnGz1wiKqWfo1yXecHQMRf2l6hztTO+nPru658AyDngaBe7isIxEkRsPR3FZh+s7iVa4Uw==",
+    "@jspm/core": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/@jspm/core/-/core-2.0.1.tgz",
+      "integrity": "sha512-Lg3PnLp0QXpxwLIAuuJboLeRaIhrgJjeuh797QADg3xz8wGLugQOS5DpsE8A6i6Adgzf+bacllkKZG3J0tGfDw==",
       "dev": true
     },
-    "@jridgewell/trace-mapping": {
-      "version": "0.3.17",
-      "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.17.tgz",
-      "integrity": "sha512-MCNzAp77qzKca9+W/+I0+sEpaUnZoeasnghNeVc41VZCEKaCH73Vq3BZZ/SzWIgrqE4H4ceI+p+b6C0mHf9T4g==",
-      "dev": true,
-      "requires": {
-        "@jridgewell/resolve-uri": "3.1.0",
-        "@jridgewell/sourcemap-codec": "1.4.14"
-      }
-    },
     "@nodelib/fs.scandir": {
       "version": "2.1.5",
       "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
@@ -6391,42 +5009,10 @@
         "fastq": "^1.6.0"
       }
     },
-    "@polka/url": {
-      "version": "1.0.0-next.21",
-      "resolved": "https://registry.npmjs.org/@polka/url/-/url-1.0.0-next.21.tgz",
-      "integrity": "sha512-a5Sab1C4/icpTZVzZc5Ghpz88yQtGOyNqYXcZgOssB2uuAr+wF/MvN6bgtW32q7HHrvBki+BsZ0OuNv6EV3K9g==",
-      "dev": true
-    },
-    "@types/eslint": {
-      "version": "8.21.1",
-      "resolved": "https://registry.npmjs.org/@types/eslint/-/eslint-8.21.1.tgz",
-      "integrity": "sha512-rc9K8ZpVjNcLs8Fp0dkozd5Pt2Apk1glO4Vgz8ix1u6yFByxfqo5Yavpy65o+93TAe24jr7v+eSBtFLvOQtCRQ==",
-      "dev": true,
-      "requires": {
-        "@types/estree": "*",
-        "@types/json-schema": "*"
-      }
-    },
-    "@types/eslint-scope": {
-      "version": "3.7.4",
-      "resolved": "https://registry.npmjs.org/@types/eslint-scope/-/eslint-scope-3.7.4.tgz",
-      "integrity": "sha512-9K4zoImiZc3HlIp6AVUDE4CWYx22a+lhSZMYNpbjW04+YF0KWj4pJXnEMjdnFTiQibFFmElcsasJXDbdI/EPhA==",
-      "dev": true,
-      "requires": {
-        "@types/eslint": "*",
-        "@types/estree": "*"
-      }
-    },
-    "@types/estree": {
-      "version": "0.0.51",
-      "resolved": "https://registry.npmjs.org/@types/estree/-/estree-0.0.51.tgz",
-      "integrity": "sha512-CuPgU6f3eT/XgKKPqKd/gLZV1Xmvf1a2R5POBOGQa6uv82xpls89HU5zKeVoyR8XzHd1RGNOlQlvUe3CFkjWNQ==",
-      "dev": true
-    },
     "@types/fs-extra": {
-      "version": "11.0.1",
-      "resolved": "https://registry.npmjs.org/@types/fs-extra/-/fs-extra-11.0.1.tgz",
-      "integrity": "sha512-MxObHvNl4A69ofaTRU8DFqvgzzv8s9yRtaPPm5gud9HDNvpB3GPQFvNuTWAI59B9huVGV5jXYJwbCsmBsOGYWA==",
+      "version": "11.0.2",
+      "resolved": "https://registry.npmjs.org/@types/fs-extra/-/fs-extra-11.0.2.tgz",
+      "integrity": "sha512-c0hrgAOVYr21EX8J0jBMXGLMgJqVf/v6yxi0dLaJboW9aQPh16Id+z6w2Tx1hm+piJOLv8xPfVKZCLfjPw/IMQ==",
       "dev": true,
       "requires": {
         "@types/jsonfile": "*",
@@ -6434,9 +5020,9 @@
       }
     },
     "@types/json-schema": {
-      "version": "7.0.11",
-      "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.11.tgz",
-      "integrity": "sha512-wOuvG1SN4Us4rez+tylwwwCV1psiNVOkJeM3AUWUNWg/jDQY2+HE/444y5gc+jBmRqASOm2Oeh5c1axHobwRKQ==",
+      "version": "7.0.13",
+      "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.13.tgz",
+      "integrity": "sha512-RbSSoHliUbnXj3ny0CNFOoxrIDV6SUGyStHsvDqosw6CkdPV8TtWGlfecuK4ToyMEAql6pzNxgCFKanovUzlgQ==",
       "dev": true
     },
     "@types/json5": {
@@ -6455,9 +5041,9 @@
       }
     },
     "@types/mocha": {
-      "version": "10.0.1",
-      "resolved": "https://registry.npmjs.org/@types/mocha/-/mocha-10.0.1.tgz",
-      "integrity": "sha512-/fvYntiO1GeICvqbQ3doGDIP97vWmvFt83GKguJ6prmQM2iXZfFcq6YE8KteFyRtX2/h5Hf91BYvPodJKFYv5Q==",
+      "version": "10.0.2",
+      "resolved": "https://registry.npmjs.org/@types/mocha/-/mocha-10.0.2.tgz",
+      "integrity": "sha512-NaHL0+0lLNhX6d9rs+NSt97WH/gIlRHmszXbQ/8/MV/eVcFNdeJ/GYhrFuUc8K7WuPhRhTSdMkCp8VMzhUq85w==",
       "dev": true
     },
     "@types/node": {
@@ -6479,289 +5065,111 @@
       "dev": true
     },
     "@types/semver": {
-      "version": "7.3.13",
-      "resolved": "https://registry.npmjs.org/@types/semver/-/semver-7.3.13.tgz",
-      "integrity": "sha512-21cFJr9z3g5dW8B0CVI9g2O9beqaThGQ6ZFBqHfwhzLDKUxaqTIy3vnfah/UPkfOiF2pLq+tGz+W8RyCskuslw==",
+      "version": "7.5.3",
+      "resolved": "https://registry.npmjs.org/@types/semver/-/semver-7.5.3.tgz",
+      "integrity": "sha512-OxepLK9EuNEIPxWNME+C6WwbRAOOI2o2BaQEGzz5Lu2e4Z5eDnEo+/aVEDMIXywoJitJ7xWd641wrGLZdtwRyw==",
       "dev": true
     },
     "@typescript-eslint/eslint-plugin": {
-      "version": "5.54.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-5.54.1.tgz",
-      "integrity": "sha512-a2RQAkosH3d3ZIV08s3DcL/mcGc2M/UC528VkPULFxR9VnVPT8pBu0IyBAJJmVsCmhVfwQX1v6q+QGnmSe1bew==",
+      "version": "6.7.4",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-6.7.4.tgz",
+      "integrity": "sha512-DAbgDXwtX+pDkAHwiGhqP3zWUGpW49B7eqmgpPtg+BKJXwdct79ut9+ifqOFPJGClGKSHXn2PTBatCnldJRUoA==",
       "dev": true,
       "requires": {
-        "@typescript-eslint/scope-manager": "5.54.1",
-        "@typescript-eslint/type-utils": "5.54.1",
-        "@typescript-eslint/utils": "5.54.1",
+        "@eslint-community/regexpp": "^4.5.1",
+        "@typescript-eslint/scope-manager": "6.7.4",
+        "@typescript-eslint/type-utils": "6.7.4",
+        "@typescript-eslint/utils": "6.7.4",
+        "@typescript-eslint/visitor-keys": "6.7.4",
         "debug": "^4.3.4",
-        "grapheme-splitter": "^1.0.4",
-        "ignore": "^5.2.0",
-        "natural-compare-lite": "^1.4.0",
-        "regexpp": "^3.2.0",
-        "semver": "^7.3.7",
-        "tsutils": "^3.21.0"
+        "graphemer": "^1.4.0",
+        "ignore": "^5.2.4",
+        "natural-compare": "^1.4.0",
+        "semver": "^7.5.4",
+        "ts-api-utils": "^1.0.1"
       }
     },
     "@typescript-eslint/parser": {
-      "version": "5.54.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-5.54.1.tgz",
-      "integrity": "sha512-8zaIXJp/nG9Ff9vQNh7TI+C3nA6q6iIsGJ4B4L6MhZ7mHnTMR4YP5vp2xydmFXIy8rpyIVbNAG44871LMt6ujg==",
+      "version": "6.7.4",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-6.7.4.tgz",
+      "integrity": "sha512-I5zVZFY+cw4IMZUeNCU7Sh2PO5O57F7Lr0uyhgCJmhN/BuTlnc55KxPonR4+EM3GBdfiCyGZye6DgMjtubQkmA==",
       "dev": true,
       "requires": {
-        "@typescript-eslint/scope-manager": "5.54.1",
-        "@typescript-eslint/types": "5.54.1",
-        "@typescript-eslint/typescript-estree": "5.54.1",
+        "@typescript-eslint/scope-manager": "6.7.4",
+        "@typescript-eslint/types": "6.7.4",
+        "@typescript-eslint/typescript-estree": "6.7.4",
+        "@typescript-eslint/visitor-keys": "6.7.4",
         "debug": "^4.3.4"
       }
     },
     "@typescript-eslint/scope-manager": {
-      "version": "5.54.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-5.54.1.tgz",
-      "integrity": "sha512-zWKuGliXxvuxyM71UA/EcPxaviw39dB2504LqAmFDjmkpO8qNLHcmzlh6pbHs1h/7YQ9bnsO8CCcYCSA8sykUg==",
+      "version": "6.7.4",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-6.7.4.tgz",
+      "integrity": "sha512-SdGqSLUPTXAXi7c3Ob7peAGVnmMoGzZ361VswK2Mqf8UOYcODiYvs8rs5ILqEdfvX1lE7wEZbLyELCW+Yrql1A==",
       "dev": true,
       "requires": {
-        "@typescript-eslint/types": "5.54.1",
-        "@typescript-eslint/visitor-keys": "5.54.1"
+        "@typescript-eslint/types": "6.7.4",
+        "@typescript-eslint/visitor-keys": "6.7.4"
       }
     },
     "@typescript-eslint/type-utils": {
-      "version": "5.54.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-5.54.1.tgz",
-      "integrity": "sha512-WREHsTz0GqVYLIbzIZYbmUUr95DKEKIXZNH57W3s+4bVnuF1TKe2jH8ZNH8rO1CeMY3U4j4UQeqPNkHMiGem3g==",
+      "version": "6.7.4",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-6.7.4.tgz",
+      "integrity": "sha512-n+g3zi1QzpcAdHFP9KQF+rEFxMb2KxtnJGID3teA/nxKHOVi3ylKovaqEzGBbVY2pBttU6z85gp0D00ufLzViQ==",
       "dev": true,
       "requires": {
-        "@typescript-eslint/typescript-estree": "5.54.1",
-        "@typescript-eslint/utils": "5.54.1",
+        "@typescript-eslint/typescript-estree": "6.7.4",
+        "@typescript-eslint/utils": "6.7.4",
         "debug": "^4.3.4",
-        "tsutils": "^3.21.0"
+        "ts-api-utils": "^1.0.1"
       }
     },
     "@typescript-eslint/types": {
-      "version": "5.54.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-5.54.1.tgz",
-      "integrity": "sha512-G9+1vVazrfAfbtmCapJX8jRo2E4MDXxgm/IMOF4oGh3kq7XuK3JRkOg6y2Qu1VsTRmWETyTkWt1wxy7X7/yLkw==",
+      "version": "6.7.4",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-6.7.4.tgz",
+      "integrity": "sha512-o9XWK2FLW6eSS/0r/tgjAGsYasLAnOWg7hvZ/dGYSSNjCh+49k5ocPN8OmG5aZcSJ8pclSOyVKP2x03Sj+RrCA==",
       "dev": true
     },
     "@typescript-eslint/typescript-estree": {
-      "version": "5.54.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-5.54.1.tgz",
-      "integrity": "sha512-bjK5t+S6ffHnVwA0qRPTZrxKSaFYocwFIkZx5k7pvWfsB1I57pO/0M0Skatzzw1sCkjJ83AfGTL0oFIFiDX3bg==",
+      "version": "6.7.4",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-6.7.4.tgz",
+      "integrity": "sha512-ty8b5qHKatlNYd9vmpHooQz3Vki3gG+3PchmtsA4TgrZBKWHNjWfkQid7K7xQogBqqc7/BhGazxMD5vr6Ha+iQ==",
       "dev": true,
       "requires": {
-        "@typescript-eslint/types": "5.54.1",
-        "@typescript-eslint/visitor-keys": "5.54.1",
+        "@typescript-eslint/types": "6.7.4",
+        "@typescript-eslint/visitor-keys": "6.7.4",
         "debug": "^4.3.4",
         "globby": "^11.1.0",
         "is-glob": "^4.0.3",
-        "semver": "^7.3.7",
-        "tsutils": "^3.21.0"
+        "semver": "^7.5.4",
+        "ts-api-utils": "^1.0.1"
       }
     },
     "@typescript-eslint/utils": {
-      "version": "5.54.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-5.54.1.tgz",
-      "integrity": "sha512-IY5dyQM8XD1zfDe5X8jegX6r2EVU5o/WJnLu/znLPWCBF7KNGC+adacXnt5jEYS9JixDcoccI6CvE4RCjHMzCQ==",
+      "version": "6.7.4",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-6.7.4.tgz",
+      "integrity": "sha512-PRQAs+HUn85Qdk+khAxsVV+oULy3VkbH3hQ8hxLRJXWBEd7iI+GbQxH5SEUSH7kbEoTp6oT1bOwyga24ELALTA==",
       "dev": true,
       "requires": {
-        "@types/json-schema": "^7.0.9",
-        "@types/semver": "^7.3.12",
-        "@typescript-eslint/scope-manager": "5.54.1",
-        "@typescript-eslint/types": "5.54.1",
-        "@typescript-eslint/typescript-estree": "5.54.1",
-        "eslint-scope": "^5.1.1",
-        "eslint-utils": "^3.0.0",
-        "semver": "^7.3.7"
+        "@eslint-community/eslint-utils": "^4.4.0",
+        "@types/json-schema": "^7.0.12",
+        "@types/semver": "^7.5.0",
+        "@typescript-eslint/scope-manager": "6.7.4",
+        "@typescript-eslint/types": "6.7.4",
+        "@typescript-eslint/typescript-estree": "6.7.4",
+        "semver": "^7.5.4"
       }
     },
     "@typescript-eslint/visitor-keys": {
-      "version": "5.54.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-5.54.1.tgz",
-      "integrity": "sha512-q8iSoHTgwCfgcRJ2l2x+xCbu8nBlRAlsQ33k24Adj8eoVBE0f8dUeI+bAa8F84Mv05UGbAx57g2zrRsYIooqQg==",
-      "dev": true,
-      "requires": {
-        "@typescript-eslint/types": "5.54.1",
-        "eslint-visitor-keys": "^3.3.0"
-      }
-    },
-    "@webassemblyjs/ast": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/ast/-/ast-1.11.1.tgz",
-      "integrity": "sha512-ukBh14qFLjxTQNTXocdyksN5QdM28S1CxHt2rdskFyL+xFV7VremuBLVbmCePj+URalXBENx/9Lm7lnhihtCSw==",
-      "dev": true,
-      "requires": {
-        "@webassemblyjs/helper-numbers": "1.11.1",
-        "@webassemblyjs/helper-wasm-bytecode": "1.11.1"
-      }
-    },
-    "@webassemblyjs/floating-point-hex-parser": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/floating-point-hex-parser/-/floating-point-hex-parser-1.11.1.tgz",
-      "integrity": "sha512-iGRfyc5Bq+NnNuX8b5hwBrRjzf0ocrJPI6GWFodBFzmFnyvrQ83SHKhmilCU/8Jv67i4GJZBMhEzltxzcNagtQ==",
-      "dev": true
-    },
-    "@webassemblyjs/helper-api-error": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-api-error/-/helper-api-error-1.11.1.tgz",
-      "integrity": "sha512-RlhS8CBCXfRUR/cwo2ho9bkheSXG0+NwooXcc3PAILALf2QLdFyj7KGsKRbVc95hZnhnERon4kW/D3SZpp6Tcg==",
-      "dev": true
-    },
-    "@webassemblyjs/helper-buffer": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-buffer/-/helper-buffer-1.11.1.tgz",
-      "integrity": "sha512-gwikF65aDNeeXa8JxXa2BAk+REjSyhrNC9ZwdT0f8jc4dQQeDQ7G4m0f2QCLPJiMTTO6wfDmRmj/pW0PsUvIcA==",
-      "dev": true
-    },
-    "@webassemblyjs/helper-numbers": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-numbers/-/helper-numbers-1.11.1.tgz",
-      "integrity": "sha512-vDkbxiB8zfnPdNK9Rajcey5C0w+QJugEglN0of+kmO8l7lDb77AnlKYQF7aarZuCrv+l0UvqL+68gSDr3k9LPQ==",
-      "dev": true,
-      "requires": {
-        "@webassemblyjs/floating-point-hex-parser": "1.11.1",
-        "@webassemblyjs/helper-api-error": "1.11.1",
-        "@xtuc/long": "4.2.2"
-      }
-    },
-    "@webassemblyjs/helper-wasm-bytecode": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-bytecode/-/helper-wasm-bytecode-1.11.1.tgz",
-      "integrity": "sha512-PvpoOGiJwXeTrSf/qfudJhwlvDQxFgelbMqtq52WWiXC6Xgg1IREdngmPN3bs4RoO83PnL/nFrxucXj1+BX62Q==",
-      "dev": true
-    },
-    "@webassemblyjs/helper-wasm-section": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-section/-/helper-wasm-section-1.11.1.tgz",
-      "integrity": "sha512-10P9No29rYX1j7F3EVPX3JvGPQPae+AomuSTPiF9eBQeChHI6iqjMIwR9JmOJXwpnn/oVGDk7I5IlskuMwU/pg==",
-      "dev": true,
-      "requires": {
-        "@webassemblyjs/ast": "1.11.1",
-        "@webassemblyjs/helper-buffer": "1.11.1",
-        "@webassemblyjs/helper-wasm-bytecode": "1.11.1",
-        "@webassemblyjs/wasm-gen": "1.11.1"
-      }
-    },
-    "@webassemblyjs/ieee754": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/ieee754/-/ieee754-1.11.1.tgz",
-      "integrity": "sha512-hJ87QIPtAMKbFq6CGTkZYJivEwZDbQUgYd3qKSadTNOhVY7p+gfP6Sr0lLRVTaG1JjFj+r3YchoqRYxNH3M0GQ==",
-      "dev": true,
-      "requires": {
-        "@xtuc/ieee754": "^1.2.0"
-      }
-    },
-    "@webassemblyjs/leb128": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/leb128/-/leb128-1.11.1.tgz",
-      "integrity": "sha512-BJ2P0hNZ0u+Th1YZXJpzW6miwqQUGcIHT1G/sf72gLVD9DZ5AdYTqPNbHZh6K1M5VmKvFXwGSWZADz+qBWxeRw==",
-      "dev": true,
-      "requires": {
-        "@xtuc/long": "4.2.2"
-      }
-    },
-    "@webassemblyjs/utf8": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/utf8/-/utf8-1.11.1.tgz",
-      "integrity": "sha512-9kqcxAEdMhiwQkHpkNiorZzqpGrodQQ2IGrHHxCy+Ozng0ofyMA0lTqiLkVs1uzTRejX+/O0EOT7KxqVPuXosQ==",
-      "dev": true
-    },
-    "@webassemblyjs/wasm-edit": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-edit/-/wasm-edit-1.11.1.tgz",
-      "integrity": "sha512-g+RsupUC1aTHfR8CDgnsVRVZFJqdkFHpsHMfJuWQzWU3tvnLC07UqHICfP+4XyL2tnr1amvl1Sdp06TnYCmVkA==",
-      "dev": true,
-      "requires": {
-        "@webassemblyjs/ast": "1.11.1",
-        "@webassemblyjs/helper-buffer": "1.11.1",
-        "@webassemblyjs/helper-wasm-bytecode": "1.11.1",
-        "@webassemblyjs/helper-wasm-section": "1.11.1",
-        "@webassemblyjs/wasm-gen": "1.11.1",
-        "@webassemblyjs/wasm-opt": "1.11.1",
-        "@webassemblyjs/wasm-parser": "1.11.1",
-        "@webassemblyjs/wast-printer": "1.11.1"
-      }
-    },
-    "@webassemblyjs/wasm-gen": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-gen/-/wasm-gen-1.11.1.tgz",
-      "integrity": "sha512-F7QqKXwwNlMmsulj6+O7r4mmtAlCWfO/0HdgOxSklZfQcDu0TpLiD1mRt/zF25Bk59FIjEuGAIyn5ei4yMfLhA==",
-      "dev": true,
-      "requires": {
-        "@webassemblyjs/ast": "1.11.1",
-        "@webassemblyjs/helper-wasm-bytecode": "1.11.1",
-        "@webassemblyjs/ieee754": "1.11.1",
-        "@webassemblyjs/leb128": "1.11.1",
-        "@webassemblyjs/utf8": "1.11.1"
-      }
-    },
-    "@webassemblyjs/wasm-opt": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-opt/-/wasm-opt-1.11.1.tgz",
-      "integrity": "sha512-VqnkNqnZlU5EB64pp1l7hdm3hmQw7Vgqa0KF/KCNO9sIpI6Fk6brDEiX+iCOYrvMuBWDws0NkTOxYEb85XQHHw==",
-      "dev": true,
-      "requires": {
-        "@webassemblyjs/ast": "1.11.1",
-        "@webassemblyjs/helper-buffer": "1.11.1",
-        "@webassemblyjs/wasm-gen": "1.11.1",
-        "@webassemblyjs/wasm-parser": "1.11.1"
-      }
-    },
-    "@webassemblyjs/wasm-parser": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-parser/-/wasm-parser-1.11.1.tgz",
-      "integrity": "sha512-rrBujw+dJu32gYB7/Lup6UhdkPx9S9SnobZzRVL7VcBH9Bt9bCBLEuX/YXOOtBsOZ4NQrRykKhffRWHvigQvOA==",
-      "dev": true,
-      "requires": {
-        "@webassemblyjs/ast": "1.11.1",
-        "@webassemblyjs/helper-api-error": "1.11.1",
-        "@webassemblyjs/helper-wasm-bytecode": "1.11.1",
-        "@webassemblyjs/ieee754": "1.11.1",
-        "@webassemblyjs/leb128": "1.11.1",
-        "@webassemblyjs/utf8": "1.11.1"
-      }
-    },
-    "@webassemblyjs/wast-printer": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wast-printer/-/wast-printer-1.11.1.tgz",
-      "integrity": "sha512-IQboUWM4eKzWW+N/jij2sRatKMh99QEelo3Eb2q0qXkvPRISAj8Qxtmw5itwqK+TTkBuUIE45AxYPToqPtL5gg==",
+      "version": "6.7.4",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-6.7.4.tgz",
+      "integrity": "sha512-pOW37DUhlTZbvph50x5zZCkFn3xzwkGtNoJHzIM3svpiSkJzwOYr/kVBaXmf+RAQiUDs1AHEZVNPg6UJCJpwRA==",
       "dev": true,
       "requires": {
-        "@webassemblyjs/ast": "1.11.1",
-        "@xtuc/long": "4.2.2"
+        "@typescript-eslint/types": "6.7.4",
+        "eslint-visitor-keys": "^3.4.1"
       }
     },
-    "@webpack-cli/configtest": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/@webpack-cli/configtest/-/configtest-2.0.1.tgz",
-      "integrity": "sha512-njsdJXJSiS2iNbQVS0eT8A/KPnmyH4pv1APj2K0d1wrZcBLw+yppxOy4CGqa0OxDJkzfL/XELDhD8rocnIwB5A==",
-      "dev": true,
-      "requires": {}
-    },
-    "@webpack-cli/info": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/@webpack-cli/info/-/info-2.0.1.tgz",
-      "integrity": "sha512-fE1UEWTwsAxRhrJNikE7v4EotYflkEhBL7EbajfkPlf6E37/2QshOy/D48Mw8G5XMFlQtS6YV42vtbG9zBpIQA==",
-      "dev": true,
-      "requires": {}
-    },
-    "@webpack-cli/serve": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/@webpack-cli/serve/-/serve-2.0.1.tgz",
-      "integrity": "sha512-0G7tNyS+yW8TdgHwZKlDWYXFA6OJQnoLCQvYKkQP0Q2X205PSQ6RNUj0M+1OB/9gRQaUZ/ccYfaxd0nhaWKfjw==",
-      "dev": true,
-      "requires": {}
-    },
-    "@xtuc/ieee754": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/@xtuc/ieee754/-/ieee754-1.2.0.tgz",
-      "integrity": "sha512-DX8nKgqcGwsc0eJSqYt5lwP4DH5FlHnmuWWBRy7X0NcaGR0ZtuyeESgMwTYVEtxmsNGY+qit4QYT/MIYTOTPeA==",
-      "dev": true
-    },
-    "@xtuc/long": {
-      "version": "4.2.2",
-      "resolved": "https://registry.npmjs.org/@xtuc/long/-/long-4.2.2.tgz",
-      "integrity": "sha512-NuHqBY1PB/D8xU6s/thBgOAiAP7HOYDQ32+BFZILJ8ivkUkAHQnWfn6WhL79Owj1qmUnoN/YPhktdIoucipkAQ==",
-      "dev": true
-    },
     "abort-controller": {
       "version": "3.0.0",
       "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz",
@@ -6772,18 +5180,11 @@
       }
     },
     "acorn": {
-      "version": "8.8.2",
-      "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.8.2.tgz",
-      "integrity": "sha512-xjIYgE8HBrkpd/sJqOGNspf8uHG+NOHGOw6a/Urj8taM2EXfdNAH2oFcPeIFfsv3+kz/mJrS5VuMqbNLjCa2vw==",
+      "version": "8.10.0",
+      "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.10.0.tgz",
+      "integrity": "sha512-F0SAmZ8iUtS//m8DmCTA0jlh6TDKkHQyK6xc6V4KDTyZKA9dnvX9/3sRTVQrWm79glUAZbnmmNcdYwUIHWVybw==",
       "dev": true
     },
-    "acorn-import-assertions": {
-      "version": "1.8.0",
-      "resolved": "https://registry.npmjs.org/acorn-import-assertions/-/acorn-import-assertions-1.8.0.tgz",
-      "integrity": "sha512-m7VZ3jwz4eK6A4Vtt8Ew1/mNbP24u0FhdyfA7fSvnJR6LMdfOYnmuIrrJAgrYfYJ10F/otaHTtrtrtmHdMNzEw==",
-      "dev": true,
-      "requires": {}
-    },
     "acorn-jsx": {
       "version": "5.3.2",
       "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.2.tgz",
@@ -6791,12 +5192,6 @@
       "dev": true,
       "requires": {}
     },
-    "acorn-walk": {
-      "version": "8.2.0",
-      "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.2.0.tgz",
-      "integrity": "sha512-k+iyHEuPgSw6SbuDpGQM+06HQUa04DZ3o+F6CSzXMvvI5KMvnaEqXe+YVe555R9nn6GPt404fos4wcgpw12SDA==",
-      "dev": true
-    },
     "ajv": {
       "version": "6.12.6",
       "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
@@ -6809,13 +5204,6 @@
         "uri-js": "^4.2.2"
       }
     },
-    "ajv-keywords": {
-      "version": "3.5.2",
-      "resolved": "https://registry.npmjs.org/ajv-keywords/-/ajv-keywords-3.5.2.tgz",
-      "integrity": "sha512-5p6WTN0DdTGVQk6VjcEju19IgaHudalcfabD7yhDGeA6bcQnmL+CpveLJq/3hvfwd1aof6L386Ougkx6RfyMIQ==",
-      "dev": true,
-      "requires": {}
-    },
     "ansi-colors": {
       "version": "4.1.1",
       "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.1.tgz",
@@ -6853,6 +5241,12 @@
       "integrity": "sha512-lYe4Gx7QT+MKGbDsA+Z+he/Wtef0BiwDOlK/XkBrdfsh9J/jPPXbX0tE9x9cl27Tmu5gg3QUbUrQYa/y+KOHPQ==",
       "dev": true
     },
+    "are-docs-informative": {
+      "version": "0.0.2",
+      "resolved": "https://registry.npmjs.org/are-docs-informative/-/are-docs-informative-0.0.2.tgz",
+      "integrity": "sha512-ixiS0nLNNG5jNQzgZJNoUpBKdo9yTYZMGJ+QgT2jmjR7G7+QHRCc4v6LQ3NgE7EBJq+o0ams3waJwkrlBom8Ig==",
+      "dev": true
+    },
     "are-we-there-yet": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/are-we-there-yet/-/are-we-there-yet-4.0.0.tgz",
@@ -6883,6 +5277,16 @@
       "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==",
       "dev": true
     },
+    "array-buffer-byte-length": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/array-buffer-byte-length/-/array-buffer-byte-length-1.0.0.tgz",
+      "integrity": "sha512-LPuwb2P+NrQw3XhxGc36+XSvuBPopovXYTR9Ew++Du9Yb/bx5AzBfrIsBoj0EZUifjQU+sHL21sseZ3jerWO/A==",
+      "dev": true,
+      "requires": {
+        "call-bind": "^1.0.2",
+        "is-array-buffer": "^3.0.1"
+      }
+    },
     "array-includes": {
       "version": "3.1.6",
       "resolved": "https://registry.npmjs.org/array-includes/-/array-includes-3.1.6.tgz",
@@ -6902,6 +5306,19 @@
       "integrity": "sha512-HGyxoOTYUyCM6stUe6EJgnd4EoewAI7zMdfqO+kGjnlZmBDz/cR5pf8r/cR4Wq60sL/p0IkcjUEEPwS3GFrIyw==",
       "dev": true
     },
+    "array.prototype.findlastindex": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/array.prototype.findlastindex/-/array.prototype.findlastindex-1.2.3.tgz",
+      "integrity": "sha512-LzLoiOMAxvy+Gd3BAq3B7VeIgPdo+Q8hthvKtXybMvRV0jrXfJM/t8mw7nNlpEcVlVUnCnM2KSX4XU5HmpodOA==",
+      "dev": true,
+      "requires": {
+        "call-bind": "^1.0.2",
+        "define-properties": "^1.2.0",
+        "es-abstract": "^1.22.1",
+        "es-shim-unscopables": "^1.0.0",
+        "get-intrinsic": "^1.2.1"
+      }
+    },
     "array.prototype.flat": {
       "version": "1.3.1",
       "resolved": "https://registry.npmjs.org/array.prototype.flat/-/array.prototype.flat-1.3.1.tgz",
@@ -6926,36 +5343,19 @@
         "es-shim-unscopables": "^1.0.0"
       }
     },
-    "asn1.js": {
-      "version": "5.4.1",
-      "resolved": "https://registry.npmjs.org/asn1.js/-/asn1.js-5.4.1.tgz",
-      "integrity": "sha512-+I//4cYPccV8LdmBLiX8CYvf9Sp3vQsrqu2QNXRcrbiWvcx/UdlFiqUJJzxRQxgsZmvhXhn4cSKeSmoFjVdupA==",
-      "dev": true,
-      "requires": {
-        "bn.js": "^4.0.0",
-        "inherits": "^2.0.1",
-        "minimalistic-assert": "^1.0.0",
-        "safer-buffer": "^2.1.0"
-      },
-      "dependencies": {
-        "bn.js": {
-          "version": "4.12.0",
-          "resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.0.tgz",
-          "integrity": "sha512-c98Bf3tPniI+scsdk237ku1Dc3ujXQTSgyiPUDEOe7tRkhrqridvh8klBv0HCEso1OLOYcHuCv/cS6DNxKH+ZA==",
-          "dev": true
-        }
-      }
-    },
-    "assert": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/assert/-/assert-2.0.0.tgz",
-      "integrity": "sha512-se5Cd+js9dXJnu6Ag2JFc00t+HmHOen+8Q+L7O9zI0PqQXr20uk2J0XQqMxZEeo5U50o8Nvmmx7dZrl+Ufr35A==",
+    "arraybuffer.prototype.slice": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/arraybuffer.prototype.slice/-/arraybuffer.prototype.slice-1.0.2.tgz",
+      "integrity": "sha512-yMBKppFur/fbHu9/6USUe03bZ4knMYiwFBcyiaXB8Go0qNehwX6inYPzK9U0NeQvGxKthcmHcaR8P5MStSRBAw==",
       "dev": true,
       "requires": {
-        "es6-object-assign": "^1.1.0",
-        "is-nan": "^1.2.1",
-        "object-is": "^1.0.1",
-        "util": "^0.12.0"
+        "array-buffer-byte-length": "^1.0.0",
+        "call-bind": "^1.0.2",
+        "define-properties": "^1.2.0",
+        "es-abstract": "^1.22.1",
+        "get-intrinsic": "^1.2.1",
+        "is-array-buffer": "^3.0.2",
+        "is-shared-array-buffer": "^1.0.2"
       }
     },
     "async": {
@@ -6982,24 +5382,12 @@
       "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
       "dev": true
     },
-    "big.js": {
-      "version": "5.2.2",
-      "resolved": "https://registry.npmjs.org/big.js/-/big.js-5.2.2.tgz",
-      "integrity": "sha512-vyL2OymJxmarO8gxMr0mhChsO9QGwhynfuu4+MHTAW6czfq9humCB7rKpUjDd9YUiDPU4mzpyupFSvOClAwbmQ==",
-      "dev": true
-    },
     "binary-extensions": {
       "version": "2.2.0",
       "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.2.0.tgz",
       "integrity": "sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA==",
       "dev": true
     },
-    "bn.js": {
-      "version": "5.2.1",
-      "resolved": "https://registry.npmjs.org/bn.js/-/bn.js-5.2.1.tgz",
-      "integrity": "sha512-eXRvHzWyYPBuB4NBy0cmYQjGitUrtqwbvlzP3G6VFnNRbsZQIxQ10PbKKHt8gZ/HW/D/747aDl+QkDqg3KQLMQ==",
-      "dev": true
-    },
     "brace-expansion": {
       "version": "1.1.11",
       "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
@@ -7019,122 +5407,12 @@
         "fill-range": "^7.0.1"
       }
     },
-    "brorand": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/brorand/-/brorand-1.1.0.tgz",
-      "integrity": "sha512-cKV8tMCEpQs4hK/ik71d6LrPOnpkpGBR0wzxqr68g2m/LB2GxVYQroAjMJZRVM1Y4BCjCKc3vAamxSzOY2RP+w==",
-      "dev": true
-    },
     "browser-stdout": {
       "version": "1.3.1",
       "resolved": "https://registry.npmjs.org/browser-stdout/-/browser-stdout-1.3.1.tgz",
       "integrity": "sha512-qhAVI1+Av2X7qelOfAIYwXONood6XlZE/fXaBSmW/T5SzLAmCgzi+eiWE7fUvbHaeNBQH13UftjpXxsfLkMpgw==",
       "dev": true
     },
-    "browserify-aes": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/browserify-aes/-/browserify-aes-1.2.0.tgz",
-      "integrity": "sha512-+7CHXqGuspUn/Sl5aO7Ea0xWGAtETPXNSAjHo48JfLdPWcMng33Xe4znFvQweqc/uzk5zSOI3H52CYnjCfb5hA==",
-      "dev": true,
-      "requires": {
-        "buffer-xor": "^1.0.3",
-        "cipher-base": "^1.0.0",
-        "create-hash": "^1.1.0",
-        "evp_bytestokey": "^1.0.3",
-        "inherits": "^2.0.1",
-        "safe-buffer": "^5.0.1"
-      }
-    },
-    "browserify-cipher": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/browserify-cipher/-/browserify-cipher-1.0.1.tgz",
-      "integrity": "sha512-sPhkz0ARKbf4rRQt2hTpAHqn47X3llLkUGn+xEJzLjwY8LRs2p0v7ljvI5EyoRO/mexrNunNECisZs+gw2zz1w==",
-      "dev": true,
-      "requires": {
-        "browserify-aes": "^1.0.4",
-        "browserify-des": "^1.0.0",
-        "evp_bytestokey": "^1.0.0"
-      }
-    },
-    "browserify-des": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/browserify-des/-/browserify-des-1.0.2.tgz",
-      "integrity": "sha512-BioO1xf3hFwz4kc6iBhI3ieDFompMhrMlnDFC4/0/vd5MokpuAc3R+LYbwTA9A5Yc9pq9UYPqffKpW2ObuwX5A==",
-      "dev": true,
-      "requires": {
-        "cipher-base": "^1.0.1",
-        "des.js": "^1.0.0",
-        "inherits": "^2.0.1",
-        "safe-buffer": "^5.1.2"
-      }
-    },
-    "browserify-rsa": {
-      "version": "4.1.0",
-      "resolved": "https://registry.npmjs.org/browserify-rsa/-/browserify-rsa-4.1.0.tgz",
-      "integrity": "sha512-AdEER0Hkspgno2aR97SAf6vi0y0k8NuOpGnVH3O99rcA5Q6sh8QxcngtHuJ6uXwnfAXNM4Gn1Gb7/MV1+Ymbog==",
-      "dev": true,
-      "requires": {
-        "bn.js": "^5.0.0",
-        "randombytes": "^2.0.1"
-      }
-    },
-    "browserify-sign": {
-      "version": "4.2.1",
-      "resolved": "https://registry.npmjs.org/browserify-sign/-/browserify-sign-4.2.1.tgz",
-      "integrity": "sha512-/vrA5fguVAKKAVTNJjgSm1tRQDHUU6DbwO9IROu/0WAzC8PKhucDSh18J0RMvVeHAn5puMd+QHC2erPRNf8lmg==",
-      "dev": true,
-      "requires": {
-        "bn.js": "^5.1.1",
-        "browserify-rsa": "^4.0.1",
-        "create-hash": "^1.2.0",
-        "create-hmac": "^1.1.7",
-        "elliptic": "^6.5.3",
-        "inherits": "^2.0.4",
-        "parse-asn1": "^5.1.5",
-        "readable-stream": "^3.6.0",
-        "safe-buffer": "^5.2.0"
-      },
-      "dependencies": {
-        "readable-stream": {
-          "version": "3.6.1",
-          "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.1.tgz",
-          "integrity": "sha512-+rQmrWMYGA90yenhTYsLWAsLsqVC8osOw6PKE1HDYiO0gdPeKe/xDHNzIAIn4C91YQ6oenEhfYqqc1883qHbjQ==",
-          "dev": true,
-          "requires": {
-            "inherits": "^2.0.3",
-            "string_decoder": "^1.1.1",
-            "util-deprecate": "^1.0.1"
-          }
-        },
-        "safe-buffer": {
-          "version": "5.2.1",
-          "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
-          "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
-          "dev": true
-        }
-      }
-    },
-    "browserify-zlib": {
-      "version": "0.2.0",
-      "resolved": "https://registry.npmjs.org/browserify-zlib/-/browserify-zlib-0.2.0.tgz",
-      "integrity": "sha512-Z942RysHXmJrhqk88FmKBVq/v5tqmSkDz7p54G/MGyjMnCFFnC79XWNbg+Vta8W6Wb2qtSZTSxIGkJrRpCFEiA==",
-      "dev": true,
-      "requires": {
-        "pako": "~1.0.5"
-      }
-    },
-    "browserslist": {
-      "version": "4.21.5",
-      "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.21.5.tgz",
-      "integrity": "sha512-tUkiguQGW7S3IhB7N+c2MV/HZPSCPAAiYBZXLsBhFB/PCy6ZKKsZrmBayHV9fdGV/ARIfJ14NkxKzRDjvp7L6w==",
-      "dev": true,
-      "requires": {
-        "caniuse-lite": "^1.0.30001449",
-        "electron-to-chromium": "^1.4.284",
-        "node-releases": "^2.0.8",
-        "update-browserslist-db": "^1.0.10"
-      }
-    },
     "buffer": {
       "version": "6.0.3",
       "resolved": "https://registry.npmjs.org/buffer/-/buffer-6.0.3.tgz",
@@ -7145,30 +5423,12 @@
         "ieee754": "^1.2.1"
       }
     },
-    "buffer-from": {
-      "version": "1.1.2",
-      "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz",
-      "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==",
-      "dev": true
-    },
-    "buffer-xor": {
-      "version": "1.0.3",
-      "resolved": "https://registry.npmjs.org/buffer-xor/-/buffer-xor-1.0.3.tgz",
-      "integrity": "sha512-571s0T7nZWK6vB67HI5dyUF7wXiNcfaPPPTl6zYCNApANjIvYJTg7hlud/+cJpdAhS7dVzqMLmfhfHR3rAcOjQ==",
-      "dev": true
-    },
     "builtin-modules": {
       "version": "3.3.0",
       "resolved": "https://registry.npmjs.org/builtin-modules/-/builtin-modules-3.3.0.tgz",
       "integrity": "sha512-zhaCDicdLuWN5UbN5IMnFqNMhNfo919sH85y2/ea+5Yg9TsTkeZxpL+JLbp6cgYFS4sRLp3YV4S6yDuqVWHYOw==",
       "dev": true
     },
-    "builtin-status-codes": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/builtin-status-codes/-/builtin-status-codes-3.0.0.tgz",
-      "integrity": "sha512-HpGFw18DgFWlncDfjTa2rcQ4W88O1mC8e8yZ2AvQY5KDaktSTwo+KRf6nHK6FRI5FyRyb/5T6+TSxfP7QyGsmQ==",
-      "dev": true
-    },
     "call-bind": {
       "version": "1.0.2",
       "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.2.tgz",
@@ -7191,12 +5451,6 @@
       "integrity": "sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==",
       "dev": true
     },
-    "caniuse-lite": {
-      "version": "1.0.30001460",
-      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001460.tgz",
-      "integrity": "sha512-Bud7abqjvEjipUkpLs4D7gR0l8hBYBHoa+tGtKJHvT2AYzLp1z7EmVkUT4ERpVUfca8S2HGIVs883D8pUH1ZzQ==",
-      "dev": true
-    },
     "chalk": {
       "version": "4.1.2",
       "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz",
@@ -7234,28 +5488,12 @@
         }
       }
     },
-    "chrome-trace-event": {
-      "version": "1.0.3",
-      "resolved": "https://registry.npmjs.org/chrome-trace-event/-/chrome-trace-event-1.0.3.tgz",
-      "integrity": "sha512-p3KULyQg4S7NIHixdwbGX+nFHkoBiA4YQmyWtjb8XngSKV124nJmRysgAeujbUVb15vh+RvFUfCPqU7rXk+hZg==",
-      "dev": true
-    },
     "ci-info": {
       "version": "3.8.0",
       "resolved": "https://registry.npmjs.org/ci-info/-/ci-info-3.8.0.tgz",
       "integrity": "sha512-eXTggHWSooYhq49F2opQhuHWgzucfF2YgODK4e1566GQs5BIfP30B0oenwBJHfWxAs2fyPB1s7Mg949zLf61Yw==",
       "dev": true
     },
-    "cipher-base": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/cipher-base/-/cipher-base-1.0.4.tgz",
-      "integrity": "sha512-Kkht5ye6ZGmwv40uUDZztayT2ThLQGfnj/T71N/XzeZeo3nf8foyW7zGTsPYkEya3m5f3cAypH+qe7YOrM1U2Q==",
-      "dev": true,
-      "requires": {
-        "inherits": "^2.0.1",
-        "safe-buffer": "^5.0.1"
-      }
-    },
     "clang-format": {
       "version": "1.8.0",
       "resolved": "https://registry.npmjs.org/clang-format/-/clang-format-1.8.0.tgz",
@@ -7295,17 +5533,6 @@
         "wrap-ansi": "^7.0.0"
       }
     },
-    "clone-deep": {
-      "version": "4.0.1",
-      "resolved": "https://registry.npmjs.org/clone-deep/-/clone-deep-4.0.1.tgz",
-      "integrity": "sha512-neHB9xuzh/wk0dIHweyAXv2aPGZIVk3pLMe+/RNzINf17fe0OG96QroktYAUm7SM1PBnzTabaLboqqxDyMU+SQ==",
-      "dev": true,
-      "requires": {
-        "is-plain-object": "^2.0.4",
-        "kind-of": "^6.0.2",
-        "shallow-clone": "^3.0.0"
-      }
-    },
     "color-convert": {
       "version": "2.0.1",
       "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
@@ -7327,22 +5554,10 @@
       "integrity": "sha512-qiBjkpbMLO/HL68y+lh4q0/O1MZFj2RX6X/KmMa3+gJD3z+WwI1ZzDHysvqHGS3mP6mznPckpXmw1nI9cJjyRg==",
       "dev": true
     },
-    "colorette": {
-      "version": "2.0.19",
-      "resolved": "https://registry.npmjs.org/colorette/-/colorette-2.0.19.tgz",
-      "integrity": "sha512-3tlv/dIP7FWvj3BsbHrGLJ6l/oKh1O3TcgBqMn+yyCagOxc23fyzDS6HypQbgxWbkpDnf52p1LuR4eWDQ/K9WQ==",
-      "dev": true
-    },
-    "commander": {
-      "version": "2.20.3",
-      "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.3.tgz",
-      "integrity": "sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==",
-      "dev": true
-    },
     "comment-parser": {
-      "version": "1.3.1",
-      "resolved": "https://registry.npmjs.org/comment-parser/-/comment-parser-1.3.1.tgz",
-      "integrity": "sha512-B52sN2VNghyq5ofvUsqZjmk6YkihBX5vMSChmSK9v4ShjKf3Vk5Xcmgpw4o+iIgtrnM/u5FiMpz9VKb8lpBveA==",
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/comment-parser/-/comment-parser-1.4.0.tgz",
+      "integrity": "sha512-QLyTNiZ2KDOibvFPlZ6ZngVsZ/0gYnE6uTXi5aoDg8ed3AkJAz4sEje3Y8a29hQ1s6A99MZXe47fLAXQ1rTqaw==",
       "dev": true
     },
     "concat-map": {
@@ -7351,75 +5566,18 @@
       "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==",
       "dev": true
     },
-    "console-browserify": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/console-browserify/-/console-browserify-1.2.0.tgz",
-      "integrity": "sha512-ZMkYO/LkF17QvCPqM0gxw8yUzigAOZOSWSHg91FH6orS7vcEj5dVZTidN2fQ14yBSdg97RqhSNwLUXInd52OTA==",
-      "dev": true
-    },
     "console-control-strings": {
       "version": "1.1.0",
       "resolved": "https://registry.npmjs.org/console-control-strings/-/console-control-strings-1.1.0.tgz",
       "integrity": "sha512-ty/fTekppD2fIwRvnZAVdeOiGd1c7YXEixbgJTNzqcxJWKQnjJ/V1bNEEE6hygpM3WjwHFUVK6HTjWSzV4a8sQ==",
       "dev": true
     },
-    "constants-browserify": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/constants-browserify/-/constants-browserify-1.0.0.tgz",
-      "integrity": "sha512-xFxOwqIzR/e1k1gLiWEophSCMqXcwVHIH7akf7b/vxcUeGunlj3hvZaaqxwHsTgn+IndtkQJgSztIDWeumWJDQ==",
-      "dev": true
-    },
     "core-util-is": {
       "version": "1.0.3",
       "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.3.tgz",
       "integrity": "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==",
       "dev": true
     },
-    "create-ecdh": {
-      "version": "4.0.4",
-      "resolved": "https://registry.npmjs.org/create-ecdh/-/create-ecdh-4.0.4.tgz",
-      "integrity": "sha512-mf+TCx8wWc9VpuxfP2ht0iSISLZnt0JgWlrOKZiNqyUZWnjIaCIVNQArMHnCZKfEYRg6IM7A+NeJoN8gf/Ws0A==",
-      "dev": true,
-      "requires": {
-        "bn.js": "^4.1.0",
-        "elliptic": "^6.5.3"
-      },
-      "dependencies": {
-        "bn.js": {
-          "version": "4.12.0",
-          "resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.0.tgz",
-          "integrity": "sha512-c98Bf3tPniI+scsdk237ku1Dc3ujXQTSgyiPUDEOe7tRkhrqridvh8klBv0HCEso1OLOYcHuCv/cS6DNxKH+ZA==",
-          "dev": true
-        }
-      }
-    },
-    "create-hash": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/create-hash/-/create-hash-1.2.0.tgz",
-      "integrity": "sha512-z00bCGNHDG8mHAkP7CtT1qVu+bFQUPjYq/4Iv3C3kWjTFV10zIjfSoeqXo9Asws8gwSHDGj/hl2u4OGIjapeCg==",
-      "dev": true,
-      "requires": {
-        "cipher-base": "^1.0.1",
-        "inherits": "^2.0.1",
-        "md5.js": "^1.3.4",
-        "ripemd160": "^2.0.1",
-        "sha.js": "^2.4.0"
-      }
-    },
-    "create-hmac": {
-      "version": "1.1.7",
-      "resolved": "https://registry.npmjs.org/create-hmac/-/create-hmac-1.1.7.tgz",
-      "integrity": "sha512-MJG9liiZ+ogc4TzUwuvbER1JRdgvUFSB5+VR/g5h82fGaIRWMWddtKBHi7/sVhfjQZ6SehlyhvQYrcYkaUIpLg==",
-      "dev": true,
-      "requires": {
-        "cipher-base": "^1.0.3",
-        "create-hash": "^1.1.0",
-        "inherits": "^2.0.1",
-        "ripemd160": "^2.0.0",
-        "safe-buffer": "^5.0.1",
-        "sha.js": "^2.4.8"
-      }
-    },
     "cross-spawn": {
       "version": "7.0.3",
       "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
@@ -7431,25 +5589,6 @@
         "which": "^2.0.1"
       }
     },
-    "crypto-browserify": {
-      "version": "3.12.0",
-      "resolved": "https://registry.npmjs.org/crypto-browserify/-/crypto-browserify-3.12.0.tgz",
-      "integrity": "sha512-fz4spIh+znjO2VjL+IdhEpRJ3YN6sMzITSBijk6FK2UvTqruSQW+/cCZTSNsMiZNvUeq0CqurF+dAbyiGOY6Wg==",
-      "dev": true,
-      "requires": {
-        "browserify-cipher": "^1.0.0",
-        "browserify-sign": "^4.0.0",
-        "create-ecdh": "^4.0.0",
-        "create-hash": "^1.1.0",
-        "create-hmac": "^1.1.0",
-        "diffie-hellman": "^5.0.0",
-        "inherits": "^2.0.1",
-        "pbkdf2": "^3.0.3",
-        "public-encrypt": "^4.0.0",
-        "randombytes": "^2.0.0",
-        "randomfill": "^1.0.3"
-      }
-    },
     "debug": {
       "version": "4.3.4",
       "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz",
@@ -7471,6 +5610,17 @@
       "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==",
       "dev": true
     },
+    "define-data-property": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.0.tgz",
+      "integrity": "sha512-UzGwzcjyv3OtAvolTj1GoyNYzfFR+iqbGjcnBEENZVCpM4/Ng1yhGNvS3lR/xDS74Tb2wGG9WzNSNIOS9UVb2g==",
+      "dev": true,
+      "requires": {
+        "get-intrinsic": "^1.2.1",
+        "gopd": "^1.0.1",
+        "has-property-descriptors": "^1.0.0"
+      }
+    },
     "define-properties": {
       "version": "1.2.0",
       "resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.2.0.tgz",
@@ -7487,48 +5637,19 @@
       "integrity": "sha512-bd2L678uiWATM6m5Z1VzNCErI3jiGzt6HGY8OVICs40JQq/HALfbyNJmp0UDakEY4pMMaN0Ly5om/B1VI/+xfQ==",
       "dev": true
     },
-    "des.js": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/des.js/-/des.js-1.0.1.tgz",
-      "integrity": "sha512-Q0I4pfFrv2VPd34/vfLrFOoRmlYj3OV50i7fskps1jZWK1kApMWWT9G6RRUeYedLcBDIhnSDaUvJMb3AhUlaEA==",
-      "dev": true,
-      "requires": {
-        "inherits": "^2.0.1",
-        "minimalistic-assert": "^1.0.0"
-      }
-    },
     "diff": {
       "version": "5.0.0",
       "resolved": "https://registry.npmjs.org/diff/-/diff-5.0.0.tgz",
       "integrity": "sha512-/VTCrvm5Z0JGty/BWHljh+BAiw3IK+2j87NGMu8Nwc/f48WoDAC395uomO9ZD117ZOBaHmkX1oyLvkVM/aIT3w==",
       "dev": true
     },
-    "diffie-hellman": {
-      "version": "5.0.3",
-      "resolved": "https://registry.npmjs.org/diffie-hellman/-/diffie-hellman-5.0.3.tgz",
-      "integrity": "sha512-kqag/Nl+f3GwyK25fhUMYj81BUOrZ9IuJsjIcDE5icNM9FJHAVm3VcUDxdLPoQtTuUylWm6ZIknYJwwaPxsUzg==",
-      "dev": true,
-      "requires": {
-        "bn.js": "^4.1.0",
-        "miller-rabin": "^4.0.0",
-        "randombytes": "^2.0.0"
-      },
-      "dependencies": {
-        "bn.js": {
-          "version": "4.12.0",
-          "resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.0.tgz",
-          "integrity": "sha512-c98Bf3tPniI+scsdk237ku1Dc3ujXQTSgyiPUDEOe7tRkhrqridvh8klBv0HCEso1OLOYcHuCv/cS6DNxKH+ZA==",
-          "dev": true
-        }
-      }
-    },
     "dir-compare": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/dir-compare/-/dir-compare-4.0.0.tgz",
-      "integrity": "sha512-wC7thVKL3V656tO61rbEDE4LTeeYrUC2pAUL00AaXYghBhjjVNRyBlpH6POzb44ZuK23OSrqF6TbSC/QYeqfAg==",
+      "version": "4.2.0",
+      "resolved": "https://registry.npmjs.org/dir-compare/-/dir-compare-4.2.0.tgz",
+      "integrity": "sha512-2xMCmOoMrdQIPHdsTawECdNPwlVFB9zGcz3kuhmBO6U3oU+UQjsue0i8ayLKpgBcm+hcXPMVSGUN9d+pvJ6+VQ==",
       "dev": true,
       "requires": {
-        "minimatch": "^3.0.4",
+        "minimatch": "^3.0.5",
         "p-limit": "^3.1.0 "
       }
     },
@@ -7550,75 +5671,12 @@
         "esutils": "^2.0.2"
       }
     },
-    "domain-browser": {
-      "version": "4.22.0",
-      "resolved": "https://registry.npmjs.org/domain-browser/-/domain-browser-4.22.0.tgz",
-      "integrity": "sha512-IGBwjF7tNk3cwypFNH/7bfzBcgSCbaMOD3GsaY1AU/JRrnHnYgEM0+9kQt52iZxjNsjBtJYtao146V+f8jFZNw==",
-      "dev": true
-    },
-    "duplexer": {
-      "version": "0.1.2",
-      "resolved": "https://registry.npmjs.org/duplexer/-/duplexer-0.1.2.tgz",
-      "integrity": "sha512-jtD6YG370ZCIi/9GTaJKQxWTZD045+4R4hTk/x1UyoqadyJ9x9CgSi1RlVDQF8U2sxLLSnFkCaMihqljHIWgMg==",
-      "dev": true
-    },
-    "electron-to-chromium": {
-      "version": "1.4.320",
-      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.320.tgz",
-      "integrity": "sha512-h70iRscrNluMZPVICXYl5SSB+rBKo22XfuIS1ER0OQxQZpKTnFpuS6coj7wY9M/3trv7OR88rRMOlKmRvDty7Q==",
-      "dev": true
-    },
-    "elliptic": {
-      "version": "6.5.4",
-      "resolved": "https://registry.npmjs.org/elliptic/-/elliptic-6.5.4.tgz",
-      "integrity": "sha512-iLhC6ULemrljPZb+QutR5TQGB+pdW6KGD5RSegS+8sorOZT+rdQFbsQFJgvN3eRqNALqJer4oQ16YvJHlU8hzQ==",
-      "dev": true,
-      "requires": {
-        "bn.js": "^4.11.9",
-        "brorand": "^1.1.0",
-        "hash.js": "^1.0.0",
-        "hmac-drbg": "^1.0.1",
-        "inherits": "^2.0.4",
-        "minimalistic-assert": "^1.0.1",
-        "minimalistic-crypto-utils": "^1.0.1"
-      },
-      "dependencies": {
-        "bn.js": {
-          "version": "4.12.0",
-          "resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.0.tgz",
-          "integrity": "sha512-c98Bf3tPniI+scsdk237ku1Dc3ujXQTSgyiPUDEOe7tRkhrqridvh8klBv0HCEso1OLOYcHuCv/cS6DNxKH+ZA==",
-          "dev": true
-        }
-      }
-    },
     "emoji-regex": {
       "version": "8.0.0",
       "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
       "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
       "dev": true
     },
-    "emojis-list": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/emojis-list/-/emojis-list-3.0.0.tgz",
-      "integrity": "sha512-/kyM18EfinwXZbno9FyUGeFh87KC8HRQBQGildHZbEuRyWFOmv1U10o9BBp8XVZDVNNuQKyIGIu5ZYAAXJ0V2Q==",
-      "dev": true
-    },
-    "enhanced-resolve": {
-      "version": "5.12.0",
-      "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.12.0.tgz",
-      "integrity": "sha512-QHTXI/sZQmko1cbDoNAa3mJ5qhWUUNAq3vR0/YiD379fWQrcfuoX1+HW2S0MTt7XmoPLapdaDKUtelUSPic7hQ==",
-      "dev": true,
-      "requires": {
-        "graceful-fs": "^4.2.4",
-        "tapable": "^2.2.0"
-      }
-    },
-    "envinfo": {
-      "version": "7.8.1",
-      "resolved": "https://registry.npmjs.org/envinfo/-/envinfo-7.8.1.tgz",
-      "integrity": "sha512-/o+BXHmB7ocbHEAs6F2EnG0ogybVVUdkRunTT2glZU9XAaGmhqskrvKwqXuDfNjEO0LZKWdejEEpnq8aM0tOaw==",
-      "dev": true
-    },
     "error-ex": {
       "version": "1.3.2",
       "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.2.tgz",
@@ -7629,18 +5687,19 @@
       }
     },
     "es-abstract": {
-      "version": "1.21.1",
-      "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.21.1.tgz",
-      "integrity": "sha512-QudMsPOz86xYz/1dG1OuGBKOELjCh99IIWHLzy5znUB6j8xG2yMA7bfTV86VSqKF+Y/H08vQPR+9jyXpuC6hfg==",
+      "version": "1.22.2",
+      "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.22.2.tgz",
+      "integrity": "sha512-YoxfFcDmhjOgWPWsV13+2RNjq1F6UQnfs+8TftwNqtzlmFzEXvlUwdrNrYeaizfjQzRMxkZ6ElWMOJIFKdVqwA==",
       "dev": true,
       "requires": {
+        "array-buffer-byte-length": "^1.0.0",
+        "arraybuffer.prototype.slice": "^1.0.2",
         "available-typed-arrays": "^1.0.5",
         "call-bind": "^1.0.2",
         "es-set-tostringtag": "^2.0.1",
         "es-to-primitive": "^1.2.1",
-        "function-bind": "^1.1.1",
-        "function.prototype.name": "^1.1.5",
-        "get-intrinsic": "^1.1.3",
+        "function.prototype.name": "^1.1.6",
+        "get-intrinsic": "^1.2.1",
         "get-symbol-description": "^1.0.0",
         "globalthis": "^1.0.3",
         "gopd": "^1.0.1",
@@ -7648,33 +5707,32 @@
         "has-property-descriptors": "^1.0.0",
         "has-proto": "^1.0.1",
         "has-symbols": "^1.0.3",
-        "internal-slot": "^1.0.4",
-        "is-array-buffer": "^3.0.1",
+        "internal-slot": "^1.0.5",
+        "is-array-buffer": "^3.0.2",
         "is-callable": "^1.2.7",
         "is-negative-zero": "^2.0.2",
         "is-regex": "^1.1.4",
         "is-shared-array-buffer": "^1.0.2",
         "is-string": "^1.0.7",
-        "is-typed-array": "^1.1.10",
+        "is-typed-array": "^1.1.12",
         "is-weakref": "^1.0.2",
-        "object-inspect": "^1.12.2",
+        "object-inspect": "^1.12.3",
         "object-keys": "^1.1.1",
         "object.assign": "^4.1.4",
-        "regexp.prototype.flags": "^1.4.3",
+        "regexp.prototype.flags": "^1.5.1",
+        "safe-array-concat": "^1.0.1",
         "safe-regex-test": "^1.0.0",
-        "string.prototype.trimend": "^1.0.6",
-        "string.prototype.trimstart": "^1.0.6",
+        "string.prototype.trim": "^1.2.8",
+        "string.prototype.trimend": "^1.0.7",
+        "string.prototype.trimstart": "^1.0.7",
+        "typed-array-buffer": "^1.0.0",
+        "typed-array-byte-length": "^1.0.0",
+        "typed-array-byte-offset": "^1.0.0",
         "typed-array-length": "^1.0.4",
         "unbox-primitive": "^1.0.2",
-        "which-typed-array": "^1.1.9"
+        "which-typed-array": "^1.1.11"
       }
     },
-    "es-module-lexer": {
-      "version": "0.9.3",
-      "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-0.9.3.tgz",
-      "integrity": "sha512-1HQ2M2sPtxwnvOvT1ZClHyQDiggdNjURWpY2we6aMKCQiUVxTmVs2UYPLIrD84sS+kMdUwfBSylbJPwNnBrnHQ==",
-      "dev": true
-    },
     "es-set-tostringtag": {
       "version": "2.0.1",
       "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.0.1.tgz",
@@ -7706,11 +5764,45 @@
         "is-symbol": "^1.0.2"
       }
     },
-    "es6-object-assign": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/es6-object-assign/-/es6-object-assign-1.1.0.tgz",
-      "integrity": "sha512-MEl9uirslVwqQU369iHNWZXsI8yaZYGg/D65aOgZkeyFJwHYSxilf7rQzXKI7DdDuBPrBXbfk3sl9hJhmd5AUw==",
-      "dev": true
+    "esbuild": {
+      "version": "0.19.3",
+      "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.19.3.tgz",
+      "integrity": "sha512-UlJ1qUUA2jL2nNib1JTSkifQTcYTroFqRjwCFW4QYEKEsixXD5Tik9xML7zh2gTxkYTBKGHNH9y7txMwVyPbjw==",
+      "dev": true,
+      "requires": {
+        "@esbuild/android-arm": "0.19.3",
+        "@esbuild/android-arm64": "0.19.3",
+        "@esbuild/android-x64": "0.19.3",
+        "@esbuild/darwin-arm64": "0.19.3",
+        "@esbuild/darwin-x64": "0.19.3",
+        "@esbuild/freebsd-arm64": "0.19.3",
+        "@esbuild/freebsd-x64": "0.19.3",
+        "@esbuild/linux-arm": "0.19.3",
+        "@esbuild/linux-arm64": "0.19.3",
+        "@esbuild/linux-ia32": "0.19.3",
+        "@esbuild/linux-loong64": "0.19.3",
+        "@esbuild/linux-mips64el": "0.19.3",
+        "@esbuild/linux-ppc64": "0.19.3",
+        "@esbuild/linux-riscv64": "0.19.3",
+        "@esbuild/linux-s390x": "0.19.3",
+        "@esbuild/linux-x64": "0.19.3",
+        "@esbuild/netbsd-x64": "0.19.3",
+        "@esbuild/openbsd-x64": "0.19.3",
+        "@esbuild/sunos-x64": "0.19.3",
+        "@esbuild/win32-arm64": "0.19.3",
+        "@esbuild/win32-ia32": "0.19.3",
+        "@esbuild/win32-x64": "0.19.3"
+      }
+    },
+    "esbuild-plugin-polyfill-node": {
+      "version": "0.3.0",
+      "resolved": "https://registry.npmjs.org/esbuild-plugin-polyfill-node/-/esbuild-plugin-polyfill-node-0.3.0.tgz",
+      "integrity": "sha512-SHG6CKUfWfYyYXGpW143NEZtcVVn8S/WHcEOxk62LuDXnY4Zpmc+WmxJKN6GMTgTClXJXhEM5KQlxKY6YjbucQ==",
+      "dev": true,
+      "requires": {
+        "@jspm/core": "^2.0.1",
+        "import-meta-resolve": "^3.0.0"
+      }
     },
     "escalade": {
       "version": "3.1.1",
@@ -7725,26 +5817,27 @@
       "dev": true
     },
     "eslint": {
-      "version": "8.35.0",
-      "resolved": "https://registry.npmjs.org/eslint/-/eslint-8.35.0.tgz",
-      "integrity": "sha512-BxAf1fVL7w+JLRQhWl2pzGeSiGqbWumV4WNvc9Rhp6tiCtm4oHnyPBSEtMGZwrQgudFQ+otqzWoPB7x+hxoWsw==",
+      "version": "8.51.0",
+      "resolved": "https://registry.npmjs.org/eslint/-/eslint-8.51.0.tgz",
+      "integrity": "sha512-2WuxRZBrlwnXi+/vFSJyjMqrNjtJqiasMzehF0shoLaW7DzS3/9Yvrmq5JiT66+pNjiX4UBnLDiKHcWAr/OInA==",
       "dev": true,
       "requires": {
-        "@eslint/eslintrc": "^2.0.0",
-        "@eslint/js": "8.35.0",
-        "@humanwhocodes/config-array": "^0.11.8",
+        "@eslint-community/eslint-utils": "^4.2.0",
+        "@eslint-community/regexpp": "^4.6.1",
+        "@eslint/eslintrc": "^2.1.2",
+        "@eslint/js": "8.51.0",
+        "@humanwhocodes/config-array": "^0.11.11",
         "@humanwhocodes/module-importer": "^1.0.1",
         "@nodelib/fs.walk": "^1.2.8",
-        "ajv": "^6.10.0",
+        "ajv": "^6.12.4",
         "chalk": "^4.0.0",
         "cross-spawn": "^7.0.2",
         "debug": "^4.3.2",
         "doctrine": "^3.0.0",
         "escape-string-regexp": "^4.0.0",
-        "eslint-scope": "^7.1.1",
-        "eslint-utils": "^3.0.0",
-        "eslint-visitor-keys": "^3.3.0",
-        "espree": "^9.4.0",
+        "eslint-scope": "^7.2.2",
+        "eslint-visitor-keys": "^3.4.3",
+        "espree": "^9.6.1",
         "esquery": "^1.4.2",
         "esutils": "^2.0.2",
         "fast-deep-equal": "^3.1.3",
@@ -7752,42 +5845,20 @@
         "find-up": "^5.0.0",
         "glob-parent": "^6.0.2",
         "globals": "^13.19.0",
-        "grapheme-splitter": "^1.0.4",
+        "graphemer": "^1.4.0",
         "ignore": "^5.2.0",
-        "import-fresh": "^3.0.0",
         "imurmurhash": "^0.1.4",
         "is-glob": "^4.0.0",
         "is-path-inside": "^3.0.3",
-        "js-sdsl": "^4.1.4",
         "js-yaml": "^4.1.0",
         "json-stable-stringify-without-jsonify": "^1.0.1",
         "levn": "^0.4.1",
         "lodash.merge": "^4.6.2",
         "minimatch": "^3.1.2",
         "natural-compare": "^1.4.0",
-        "optionator": "^0.9.1",
-        "regexpp": "^3.2.0",
+        "optionator": "^0.9.3",
         "strip-ansi": "^6.0.1",
-        "strip-json-comments": "^3.1.0",
         "text-table": "^0.2.0"
-      },
-      "dependencies": {
-        "eslint-scope": {
-          "version": "7.1.1",
-          "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-7.1.1.tgz",
-          "integrity": "sha512-QKQM/UXpIiHcLqJ5AOyIW7XZmzjkzQXYE54n1++wb0u9V/abW3l9uQnxX8Z5Xd18xyKIMTUAyQ0k1e8pz6LUrw==",
-          "dev": true,
-          "requires": {
-            "esrecurse": "^4.3.0",
-            "estraverse": "^5.2.0"
-          }
-        },
-        "estraverse": {
-          "version": "5.3.0",
-          "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz",
-          "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==",
-          "dev": true
-        }
       }
     },
     "eslint-import-resolver-node": {
@@ -7813,9 +5884,9 @@
       }
     },
     "eslint-module-utils": {
-      "version": "2.7.4",
-      "resolved": "https://registry.npmjs.org/eslint-module-utils/-/eslint-module-utils-2.7.4.tgz",
-      "integrity": "sha512-j4GT+rqzCoRKHwURX7pddtIPGySnX9Si/cgMI5ztrcqOPtk5dDEeZ34CQVPphnqkJytlc97Vuk05Um2mJ3gEQA==",
+      "version": "2.8.0",
+      "resolved": "https://registry.npmjs.org/eslint-module-utils/-/eslint-module-utils-2.8.0.tgz",
+      "integrity": "sha512-aWajIYfsqCKRDgUfjEXNN/JlrzauMuSEy5sbd7WXbtW3EH6A6MpwEh42c7qD+MqQo9QMJ6fWLAeIJynx0g6OAw==",
       "dev": true,
       "requires": {
         "debug": "^3.2.7"
@@ -7840,26 +5911,28 @@
       "requires": {}
     },
     "eslint-plugin-import": {
-      "version": "2.27.5",
-      "resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.27.5.tgz",
-      "integrity": "sha512-LmEt3GVofgiGuiE+ORpnvP+kAm3h6MLZJ4Q5HCyHADofsb4VzXFsRiWj3c0OFiV+3DWFh0qg3v9gcPlfc3zRow==",
+      "version": "2.28.1",
+      "resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.28.1.tgz",
+      "integrity": "sha512-9I9hFlITvOV55alzoKBI+K9q74kv0iKMeY6av5+umsNwayt59fz692daGyjR+oStBQgx6nwR9rXldDev3Clw+A==",
       "dev": true,
       "requires": {
         "array-includes": "^3.1.6",
+        "array.prototype.findlastindex": "^1.2.2",
         "array.prototype.flat": "^1.3.1",
         "array.prototype.flatmap": "^1.3.1",
         "debug": "^3.2.7",
         "doctrine": "^2.1.0",
         "eslint-import-resolver-node": "^0.3.7",
-        "eslint-module-utils": "^2.7.4",
+        "eslint-module-utils": "^2.8.0",
         "has": "^1.0.3",
-        "is-core-module": "^2.11.0",
+        "is-core-module": "^2.13.0",
         "is-glob": "^4.0.3",
         "minimatch": "^3.1.2",
+        "object.fromentries": "^2.0.6",
+        "object.groupby": "^1.0.0",
         "object.values": "^1.1.6",
-        "resolve": "^1.22.1",
-        "semver": "^6.3.0",
-        "tsconfig-paths": "^3.14.1"
+        "semver": "^6.3.1",
+        "tsconfig-paths": "^3.14.2"
       },
       "dependencies": {
         "debug": {
@@ -7881,25 +5954,27 @@
           }
         },
         "semver": {
-          "version": "6.3.0",
-          "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.0.tgz",
-          "integrity": "sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==",
+          "version": "6.3.1",
+          "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz",
+          "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==",
           "dev": true
         }
       }
     },
     "eslint-plugin-jsdoc": {
-      "version": "40.0.1",
-      "resolved": "https://registry.npmjs.org/eslint-plugin-jsdoc/-/eslint-plugin-jsdoc-40.0.1.tgz",
-      "integrity": "sha512-KkiRInury7YrjjV5aCHDxwsPy6XFt5p2b2CnpDMITnWs8patNPf5kj24+VXIWw45kP6z/B0GOKfrYczB56OjQQ==",
+      "version": "46.8.2",
+      "resolved": "https://registry.npmjs.org/eslint-plugin-jsdoc/-/eslint-plugin-jsdoc-46.8.2.tgz",
+      "integrity": "sha512-5TSnD018f3tUJNne4s4gDWQflbsgOycIKEUBoCLn6XtBMgNHxQFmV8vVxUtiPxAQq8lrX85OaSG/2gnctxw9uQ==",
       "dev": true,
       "requires": {
-        "@es-joy/jsdoccomment": "~0.36.1",
-        "comment-parser": "1.3.1",
+        "@es-joy/jsdoccomment": "~0.40.1",
+        "are-docs-informative": "^0.0.2",
+        "comment-parser": "1.4.0",
         "debug": "^4.3.4",
         "escape-string-regexp": "^4.0.0",
-        "esquery": "^1.4.0",
-        "semver": "^7.3.8",
+        "esquery": "^1.5.0",
+        "is-builtin-module": "^3.2.1",
+        "semver": "^7.5.4",
         "spdx-expression-parse": "^3.0.1"
       }
     },
@@ -7911,71 +5986,53 @@
       "requires": {}
     },
     "eslint-plugin-unicorn": {
-      "version": "46.0.0",
-      "resolved": "https://registry.npmjs.org/eslint-plugin-unicorn/-/eslint-plugin-unicorn-46.0.0.tgz",
-      "integrity": "sha512-j07WkC+PFZwk8J33LYp6JMoHa1lXc1u6R45pbSAipjpfpb7KIGr17VE2D685zCxR5VL4cjrl65kTJflziQWMDA==",
+      "version": "48.0.1",
+      "resolved": "https://registry.npmjs.org/eslint-plugin-unicorn/-/eslint-plugin-unicorn-48.0.1.tgz",
+      "integrity": "sha512-FW+4r20myG/DqFcCSzoumaddKBicIPeFnTrifon2mWIzlfyvzwyqZjqVP7m4Cqr/ZYisS2aiLghkUWaPg6vtCw==",
       "dev": true,
       "requires": {
-        "@babel/helper-validator-identifier": "^7.19.1",
-        "@eslint-community/eslint-utils": "^4.1.2",
-        "ci-info": "^3.6.1",
+        "@babel/helper-validator-identifier": "^7.22.5",
+        "@eslint-community/eslint-utils": "^4.4.0",
+        "ci-info": "^3.8.0",
         "clean-regexp": "^1.0.0",
-        "esquery": "^1.4.0",
+        "esquery": "^1.5.0",
         "indent-string": "^4.0.0",
-        "is-builtin-module": "^3.2.0",
+        "is-builtin-module": "^3.2.1",
         "jsesc": "^3.0.2",
         "lodash": "^4.17.21",
         "pluralize": "^8.0.0",
         "read-pkg-up": "^7.0.1",
-        "regexp-tree": "^0.1.24",
-        "regjsparser": "^0.9.1",
-        "safe-regex": "^2.1.1",
-        "semver": "^7.3.8",
+        "regexp-tree": "^0.1.27",
+        "regjsparser": "^0.10.0",
+        "semver": "^7.5.4",
         "strip-indent": "^3.0.0"
       }
     },
     "eslint-scope": {
-      "version": "5.1.1",
-      "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-5.1.1.tgz",
-      "integrity": "sha512-2NxwbF/hZ0KpepYN0cNbo+FN6XoK7GaHlQhgx/hIZl6Va0bF45RQOOwhLIy8lQDbuCiadSLCBnH2CFYquit5bw==",
+      "version": "7.2.2",
+      "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-7.2.2.tgz",
+      "integrity": "sha512-dOt21O7lTMhDM+X9mB4GX+DZrZtCUJPL/wlcTqxyrx5IvO0IYtILdtrQGQp+8n5S0gwSVmOf9NQrjMOgfQZlIg==",
       "dev": true,
       "requires": {
         "esrecurse": "^4.3.0",
-        "estraverse": "^4.1.1"
-      }
-    },
-    "eslint-utils": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/eslint-utils/-/eslint-utils-3.0.0.tgz",
-      "integrity": "sha512-uuQC43IGctw68pJA1RgbQS8/NP7rch6Cwd4j3ZBtgo4/8Flj4eGE7ZYSZRN3iq5pVUv6GPdW5Z1RFleo84uLDA==",
-      "dev": true,
-      "requires": {
-        "eslint-visitor-keys": "^2.0.0"
-      },
-      "dependencies": {
-        "eslint-visitor-keys": {
-          "version": "2.1.0",
-          "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-2.1.0.tgz",
-          "integrity": "sha512-0rSmRBzXgDzIsD6mGdJgevzgezI534Cer5L/vyMX0kHzT/jiB43jRhd9YUlMGYLQy2zprNmoT8qasCGtY+QaKw==",
-          "dev": true
-        }
+        "estraverse": "^5.2.0"
       }
     },
     "eslint-visitor-keys": {
-      "version": "3.3.0",
-      "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-3.3.0.tgz",
-      "integrity": "sha512-mQ+suqKJVyeuwGYHAdjMFqjCyfl8+Ldnxuyp3ldiMBFKkvytrXUZWaiPCEav8qDHKty44bD+qV1IP4T+w+xXRA==",
+      "version": "3.4.3",
+      "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-3.4.3.tgz",
+      "integrity": "sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag==",
       "dev": true
     },
     "espree": {
-      "version": "9.4.1",
-      "resolved": "https://registry.npmjs.org/espree/-/espree-9.4.1.tgz",
-      "integrity": "sha512-XwctdmTO6SIvCzd9810yyNzIrOrqNYV9Koizx4C/mRhf9uq0o4yHoCEU/670pOxOL/MSraektvSAji79kX90Vg==",
+      "version": "9.6.1",
+      "resolved": "https://registry.npmjs.org/espree/-/espree-9.6.1.tgz",
+      "integrity": "sha512-oruZaFkjorTpF32kDSI5/75ViwGeZginGGy2NoOSg3Q9bnwlnmDm4HLnkl0RE3n+njDXR037aY1+x58Z/zFdwQ==",
       "dev": true,
       "requires": {
-        "acorn": "^8.8.0",
+        "acorn": "^8.9.0",
         "acorn-jsx": "^5.3.2",
-        "eslint-visitor-keys": "^3.3.0"
+        "eslint-visitor-keys": "^3.4.1"
       }
     },
     "esquery": {
@@ -7985,14 +6042,6 @@
       "dev": true,
       "requires": {
         "estraverse": "^5.1.0"
-      },
-      "dependencies": {
-        "estraverse": {
-          "version": "5.3.0",
-          "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz",
-          "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==",
-          "dev": true
-        }
       }
     },
     "esrecurse": {
@@ -8002,20 +6051,12 @@
       "dev": true,
       "requires": {
         "estraverse": "^5.2.0"
-      },
-      "dependencies": {
-        "estraverse": {
-          "version": "5.3.0",
-          "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz",
-          "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==",
-          "dev": true
-        }
       }
     },
     "estraverse": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-4.3.0.tgz",
-      "integrity": "sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw==",
+      "version": "5.3.0",
+      "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz",
+      "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==",
       "dev": true
     },
     "esutils": {
@@ -8036,16 +6077,6 @@
       "integrity": "sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==",
       "dev": true
     },
-    "evp_bytestokey": {
-      "version": "1.0.3",
-      "resolved": "https://registry.npmjs.org/evp_bytestokey/-/evp_bytestokey-1.0.3.tgz",
-      "integrity": "sha512-/f2Go4TognH/KvCISP7OUsHn85hT9nUkxxA9BEWxFn+Oj9o8ZNLm/40hdlgSLyuOimsrTKLUMEorQexp/aPQeA==",
-      "dev": true,
-      "requires": {
-        "md5.js": "^1.3.4",
-        "safe-buffer": "^5.1.1"
-      }
-    },
     "fast-deep-equal": {
       "version": "3.1.3",
       "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz",
@@ -8053,9 +6084,9 @@
       "dev": true
     },
     "fast-glob": {
-      "version": "3.2.12",
-      "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.2.12.tgz",
-      "integrity": "sha512-DVj4CQIYYow0BlaelwK1pHl5n5cRSJfM60UA0zK891sVInoPri2Ekj7+e1CT3/3qxXenpI+nBBmQAcJPJgaj4w==",
+      "version": "3.3.1",
+      "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.1.tgz",
+      "integrity": "sha512-kNFPyjhh5cKjrUltxs+wFx+ZkbRaxxmZ+X0ZU31SOsxCEtP9VPgtq2teZw1DebupL5GmDaNQ6yKMMVcM41iqDg==",
       "dev": true,
       "requires": {
         "@nodelib/fs.stat": "^2.0.2",
@@ -8088,12 +6119,6 @@
       "integrity": "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==",
       "dev": true
     },
-    "fastest-levenshtein": {
-      "version": "1.0.16",
-      "resolved": "https://registry.npmjs.org/fastest-levenshtein/-/fastest-levenshtein-1.0.16.tgz",
-      "integrity": "sha512-eRnCtTTtGZFpQCwhJiUOuxPQWRXVKYDn0b2PeHfXL6/Zi53SLAzAHfVhVWK2AryC/WH05kGfxhFIPvTF0SXQzg==",
-      "dev": true
-    },
     "fastq": {
       "version": "1.15.0",
       "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.15.0.tgz",
@@ -8121,12 +6146,6 @@
         "to-regex-range": "^5.0.1"
       }
     },
-    "filter-obj": {
-      "version": "2.0.2",
-      "resolved": "https://registry.npmjs.org/filter-obj/-/filter-obj-2.0.2.tgz",
-      "integrity": "sha512-lO3ttPjHZRfjMcxWKb1j1eDhTFsu4meeR3lnMcnBFhk6RuLhvEiuALu2TlfL310ph4lCYYwgF/ElIjdP739tdg==",
-      "dev": true
-    },
     "find-up": {
       "version": "5.0.0",
       "resolved": "https://registry.npmjs.org/find-up/-/find-up-5.0.0.tgz",
@@ -8169,9 +6188,9 @@
       }
     },
     "fs-extra": {
-      "version": "11.1.0",
-      "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.1.0.tgz",
-      "integrity": "sha512-0rcTq621PD5jM/e0a3EJoGC/1TC5ZBCERW82LQuwfGnCa1V8w7dpYH1yNu+SLb6E5dkeCBzKEyLGlFrnr+dUyw==",
+      "version": "11.1.1",
+      "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.1.1.tgz",
+      "integrity": "sha512-MGIE4HOvQCeUCzmlHs0vXpih4ysz4wg9qiSAu6cd42lVwPbTM1TjV7RusoyQqMmk/95gdQZX72u+YW+c3eEpFQ==",
       "dev": true,
       "requires": {
         "graceful-fs": "^4.2.0",
@@ -8199,15 +6218,15 @@
       "dev": true
     },
     "function.prototype.name": {
-      "version": "1.1.5",
-      "resolved": "https://registry.npmjs.org/function.prototype.name/-/function.prototype.name-1.1.5.tgz",
-      "integrity": "sha512-uN7m/BzVKQnCUF/iW8jYea67v++2u7m5UgENbHRtdDVclOUP+FMPlCNdmk0h/ysGyo2tavMJEDqJAkJdRa1vMA==",
+      "version": "1.1.6",
+      "resolved": "https://registry.npmjs.org/function.prototype.name/-/function.prototype.name-1.1.6.tgz",
+      "integrity": "sha512-Z5kx79swU5P27WEayXM1tBi5Ze/lbIyiNgU3qyXUOf9b2rgXYyF9Dy9Cx+IQv/Lc8WCG6L82zwUPpSS9hGehIg==",
       "dev": true,
       "requires": {
         "call-bind": "^1.0.2",
-        "define-properties": "^1.1.3",
-        "es-abstract": "^1.19.0",
-        "functions-have-names": "^1.2.2"
+        "define-properties": "^1.2.0",
+        "es-abstract": "^1.22.1",
+        "functions-have-names": "^1.2.3"
       }
     },
     "functions-have-names": {
@@ -8239,13 +6258,14 @@
       "dev": true
     },
     "get-intrinsic": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.0.tgz",
-      "integrity": "sha512-L049y6nFOuom5wGyRc3/gdTLO94dySVKRACj1RmJZBQXlbTMhtNIgkWkUHq+jYmZvKf14EW1EoJnnjbmoHij0Q==",
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.1.tgz",
+      "integrity": "sha512-2DcsyfABl+gVHEfCOaTrWgyt+tb6MSEGmKq+kI5HwLbIYgjgmMcV8KQ41uaKz1xxUcn9tJtgFbQUEVcEbd0FYw==",
       "dev": true,
       "requires": {
         "function-bind": "^1.1.1",
         "has": "^1.0.3",
+        "has-proto": "^1.0.1",
         "has-symbols": "^1.0.3"
       }
     },
@@ -8282,16 +6302,10 @@
         "is-glob": "^4.0.3"
       }
     },
-    "glob-to-regexp": {
-      "version": "0.4.1",
-      "resolved": "https://registry.npmjs.org/glob-to-regexp/-/glob-to-regexp-0.4.1.tgz",
-      "integrity": "sha512-lkX1HJXwyMcprw/5YUZc2s7DrpAiHB21/V+E1rHUrVNokkvB6bqMzT0VfV6/86ZNabt1k14YOIaT7nDvOX3Iiw==",
-      "dev": true
-    },
     "globals": {
-      "version": "13.20.0",
-      "resolved": "https://registry.npmjs.org/globals/-/globals-13.20.0.tgz",
-      "integrity": "sha512-Qg5QtVkCy/kv3FUSlu4ukeZDVf9ee0iXLAUYX13gbR17bnejFTzr4iS9bY7kwCf1NztRNm1t91fjOiyx4CSwPQ==",
+      "version": "13.23.0",
+      "resolved": "https://registry.npmjs.org/globals/-/globals-13.23.0.tgz",
+      "integrity": "sha512-XAmF0RjlrjY23MA51q3HltdlGxUpXPvg0GioKiD9X6HD28iMjo2dKC8Vqwm7lne4GNr78+RHTfliktR6ZH09wA==",
       "dev": true,
       "requires": {
         "type-fest": "^0.20.2"
@@ -8335,21 +6349,12 @@
       "integrity": "sha512-9ByhssR2fPVsNZj478qUUbKfmL0+t5BDVyjShtyZZLiK7ZDAArFFfopyOTj0M05wE2tJPisA4iTnnXl2YoPvOA==",
       "dev": true
     },
-    "grapheme-splitter": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/grapheme-splitter/-/grapheme-splitter-1.0.4.tgz",
-      "integrity": "sha512-bzh50DW9kTPM00T8y4o8vQg89Di9oLJVLW/KaOGIXJWP/iqCN6WKYkbNOF04vFLJhwcpYUh9ydh/+5vpOqV4YQ==",
+    "graphemer": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/graphemer/-/graphemer-1.4.0.tgz",
+      "integrity": "sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag==",
       "dev": true
     },
-    "gzip-size": {
-      "version": "6.0.0",
-      "resolved": "https://registry.npmjs.org/gzip-size/-/gzip-size-6.0.0.tgz",
-      "integrity": "sha512-ax7ZYomf6jqPTQ4+XCpUGyXKHk5WweS+e05MBO4/y3WJ5RkmPXNKvX+bx1behVILVwr6JSQvZAku021CHPXG3Q==",
-      "dev": true,
-      "requires": {
-        "duplexer": "^0.1.2"
-      }
-    },
     "has": {
       "version": "1.0.3",
       "resolved": "https://registry.npmjs.org/has/-/has-1.0.3.tgz",
@@ -8387,95 +6392,38 @@
       "dev": true
     },
     "has-symbols": {
-      "version": "1.0.3",
-      "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.3.tgz",
-      "integrity": "sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A==",
-      "dev": true
-    },
-    "has-tostringtag": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.0.tgz",
-      "integrity": "sha512-kFjcSNhnlGV1kyoGk7OXKSawH5JOb/LzUc5w9B02hOTO0dfFRjbHQKvg1d6cf3HbeUmtU9VbbV3qzZ2Teh97WQ==",
-      "dev": true,
-      "requires": {
-        "has-symbols": "^1.0.2"
-      }
-    },
-    "has-unicode": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/has-unicode/-/has-unicode-2.0.1.tgz",
-      "integrity": "sha512-8Rf9Y83NBReMnx0gFzA8JImQACstCYWUplepDa9xprwwtmgEZUF0h/i5xSA625zB/I37EtrswSST6OXxwaaIJQ==",
-      "dev": true
-    },
-    "hash-base": {
-      "version": "3.1.0",
-      "resolved": "https://registry.npmjs.org/hash-base/-/hash-base-3.1.0.tgz",
-      "integrity": "sha512-1nmYp/rhMDiE7AYkDw+lLwlAzz0AntGIe51F3RfFfEqyQ3feY2eI/NcwC6umIQVOASPMsWJLJScWKSSvzL9IVA==",
-      "dev": true,
-      "requires": {
-        "inherits": "^2.0.4",
-        "readable-stream": "^3.6.0",
-        "safe-buffer": "^5.2.0"
-      },
-      "dependencies": {
-        "readable-stream": {
-          "version": "3.6.1",
-          "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.1.tgz",
-          "integrity": "sha512-+rQmrWMYGA90yenhTYsLWAsLsqVC8osOw6PKE1HDYiO0gdPeKe/xDHNzIAIn4C91YQ6oenEhfYqqc1883qHbjQ==",
-          "dev": true,
-          "requires": {
-            "inherits": "^2.0.3",
-            "string_decoder": "^1.1.1",
-            "util-deprecate": "^1.0.1"
-          }
-        },
-        "safe-buffer": {
-          "version": "5.2.1",
-          "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
-          "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
-          "dev": true
-        }
-      }
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.3.tgz",
+      "integrity": "sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A==",
+      "dev": true
     },
-    "hash.js": {
-      "version": "1.1.7",
-      "resolved": "https://registry.npmjs.org/hash.js/-/hash.js-1.1.7.tgz",
-      "integrity": "sha512-taOaskGt4z4SOANNseOviYDvjEJinIkRgmp7LbKP2YTTmVxWBl87s/uzK9r+44BclBSp2X7K1hqeNfz9JbBeXA==",
+    "has-tostringtag": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.0.tgz",
+      "integrity": "sha512-kFjcSNhnlGV1kyoGk7OXKSawH5JOb/LzUc5w9B02hOTO0dfFRjbHQKvg1d6cf3HbeUmtU9VbbV3qzZ2Teh97WQ==",
       "dev": true,
       "requires": {
-        "inherits": "^2.0.3",
-        "minimalistic-assert": "^1.0.1"
+        "has-symbols": "^1.0.2"
       }
     },
+    "has-unicode": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/has-unicode/-/has-unicode-2.0.1.tgz",
+      "integrity": "sha512-8Rf9Y83NBReMnx0gFzA8JImQACstCYWUplepDa9xprwwtmgEZUF0h/i5xSA625zB/I37EtrswSST6OXxwaaIJQ==",
+      "dev": true
+    },
     "he": {
       "version": "1.2.0",
       "resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz",
       "integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==",
       "dev": true
     },
-    "hmac-drbg": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/hmac-drbg/-/hmac-drbg-1.0.1.tgz",
-      "integrity": "sha512-Tti3gMqLdZfhOQY1Mzf/AanLiqh1WTiJgEj26ZuYQ9fbkLomzGchCws4FyrSd4VkpBfiNhaE1On+lOz894jvXg==",
-      "dev": true,
-      "requires": {
-        "hash.js": "^1.0.3",
-        "minimalistic-assert": "^1.0.0",
-        "minimalistic-crypto-utils": "^1.0.1"
-      }
-    },
     "hosted-git-info": {
       "version": "2.8.9",
       "resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-2.8.9.tgz",
       "integrity": "sha512-mxIDAb9Lsm6DoOJ7xH+5+X4y1LU/4Hi50L9C5sIswK3JzULS4bwk1FvjdBgvYR4bzT4tuUQiC15FE2f5HbLvYw==",
       "dev": true
     },
-    "https-browserify": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/https-browserify/-/https-browserify-1.0.0.tgz",
-      "integrity": "sha512-J+FkSdyD+0mA0N+81tMotaRMfSL9SGi+xpD3T6YApKsc3bGSXJlfXri3VyFOeYkfLRQisDk1W+jIFFKBeUBbBg==",
-      "dev": true
-    },
     "ieee754": {
       "version": "1.2.1",
       "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
@@ -8504,15 +6452,11 @@
         "resolve-from": "^4.0.0"
       }
     },
-    "import-local": {
-      "version": "3.1.0",
-      "resolved": "https://registry.npmjs.org/import-local/-/import-local-3.1.0.tgz",
-      "integrity": "sha512-ASB07uLtnDs1o6EHjKpX34BKYDSqnFerfTOJL2HvMqF70LnxpjkzDB8J44oT9pu4AMPkQwf8jl6szgvNd2tRIg==",
-      "dev": true,
-      "requires": {
-        "pkg-dir": "^4.2.0",
-        "resolve-cwd": "^3.0.0"
-      }
+    "import-meta-resolve": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/import-meta-resolve/-/import-meta-resolve-3.0.0.tgz",
+      "integrity": "sha512-4IwhLhNNA8yy445rPjD/lWh++7hMDOml2eHtd58eG7h+qK3EryMuuRbsHGPikCoAgIkkDnckKfWSk2iDla/ejg==",
+      "dev": true
     },
     "imurmurhash": {
       "version": "0.1.4",
@@ -8553,22 +6497,6 @@
         "side-channel": "^1.0.4"
       }
     },
-    "interpret": {
-      "version": "3.1.1",
-      "resolved": "https://registry.npmjs.org/interpret/-/interpret-3.1.1.tgz",
-      "integrity": "sha512-6xwYfHbajpoF0xLW+iwLkhwgvLoZDfjYfoFNu8ftMoXINzwuymNLd9u/KmwtdT2GbR+/Cz66otEGEVVUHX9QLQ==",
-      "dev": true
-    },
-    "is-arguments": {
-      "version": "1.1.1",
-      "resolved": "https://registry.npmjs.org/is-arguments/-/is-arguments-1.1.1.tgz",
-      "integrity": "sha512-8Q7EARjzEnKpt/PCD7e1cgUS0a6X8u5tdSiMqXhojOdoV9TsMsiO+9VLC5vAmO8N7/GmXn7yjR8qnA6bVAEzfA==",
-      "dev": true,
-      "requires": {
-        "call-bind": "^1.0.2",
-        "has-tostringtag": "^1.0.0"
-      }
-    },
     "is-array-buffer": {
       "version": "3.0.2",
       "resolved": "https://registry.npmjs.org/is-array-buffer/-/is-array-buffer-3.0.2.tgz",
@@ -8630,9 +6558,9 @@
       "dev": true
     },
     "is-core-module": {
-      "version": "2.11.0",
-      "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.11.0.tgz",
-      "integrity": "sha512-RRjxlvLDkD1YJwDbroBHMb+cukurkDWNyHx7D3oNB5x9rb5ogcksMC5wHCadcXoo67gVr/+3GFySh3134zi6rw==",
+      "version": "2.13.0",
+      "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.13.0.tgz",
+      "integrity": "sha512-Z7dk6Qo8pOCp3l4tsX2C5ZVas4V+UxwQodwZhLopL91TX8UyyHEXafPcyoeeWuLrwzHcr3igO78wNLwHJHsMCQ==",
       "dev": true,
       "requires": {
         "has": "^1.0.3"
@@ -8659,15 +6587,6 @@
       "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
       "dev": true
     },
-    "is-generator-function": {
-      "version": "1.0.10",
-      "resolved": "https://registry.npmjs.org/is-generator-function/-/is-generator-function-1.0.10.tgz",
-      "integrity": "sha512-jsEjy9l3yiXEQ+PsXdmBwEPcOxaXWLspKdplFUVI9vq1iZgIekeC0L167qeu86czQaxed3q/Uzuw0swL0irL8A==",
-      "dev": true,
-      "requires": {
-        "has-tostringtag": "^1.0.0"
-      }
-    },
     "is-glob": {
       "version": "4.0.3",
       "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz",
@@ -8677,16 +6596,6 @@
         "is-extglob": "^2.1.1"
       }
     },
-    "is-nan": {
-      "version": "1.3.2",
-      "resolved": "https://registry.npmjs.org/is-nan/-/is-nan-1.3.2.tgz",
-      "integrity": "sha512-E+zBKpQ2t6MEo1VsonYmluk9NxGrbzpeeLC2xIViuO2EjU2xsXsBPwTr3Ykv9l08UYEVEdWeRZNouaZqF6RN0w==",
-      "dev": true,
-      "requires": {
-        "call-bind": "^1.0.0",
-        "define-properties": "^1.1.3"
-      }
-    },
     "is-negative-zero": {
       "version": "2.0.2",
       "resolved": "https://registry.npmjs.org/is-negative-zero/-/is-negative-zero-2.0.2.tgz",
@@ -8720,15 +6629,6 @@
       "integrity": "sha512-YWnfyRwxL/+SsrWYfOpUtz5b3YD+nyfkHvjbcanzk8zgyO4ASD67uVMRt8k5bM4lLMDnXfriRhOpemw+NfT1eA==",
       "dev": true
     },
-    "is-plain-object": {
-      "version": "2.0.4",
-      "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-2.0.4.tgz",
-      "integrity": "sha512-h5PpgXkWitc38BBMYawTYMWJHFZJVnBquFE57xFpjB8pJFiF6gZ+bU+WyI/yqXiFR5mdLsgYNaPe8uao6Uv9Og==",
-      "dev": true,
-      "requires": {
-        "isobject": "^3.0.1"
-      }
-    },
     "is-regex": {
       "version": "1.1.4",
       "resolved": "https://registry.npmjs.org/is-regex/-/is-regex-1.1.4.tgz",
@@ -8767,16 +6667,12 @@
       }
     },
     "is-typed-array": {
-      "version": "1.1.10",
-      "resolved": "https://registry.npmjs.org/is-typed-array/-/is-typed-array-1.1.10.tgz",
-      "integrity": "sha512-PJqgEHiWZvMpaFZ3uTc8kHPM4+4ADTlDniuQL7cU/UDA0Ql7F70yGfHph3cLNe+c9toaigv+DFzTJKhc2CtO6A==",
+      "version": "1.1.12",
+      "resolved": "https://registry.npmjs.org/is-typed-array/-/is-typed-array-1.1.12.tgz",
+      "integrity": "sha512-Z14TF2JNG8Lss5/HMqt0//T9JeHXttXy5pH/DBU4vi98ozO2btxzq9MwYDZYnKwU8nRsz/+GVFVRDq3DkVuSPg==",
       "dev": true,
       "requires": {
-        "available-typed-arrays": "^1.0.5",
-        "call-bind": "^1.0.2",
-        "for-each": "^0.3.3",
-        "gopd": "^1.0.1",
-        "has-tostringtag": "^1.0.0"
+        "which-typed-array": "^1.1.11"
       }
     },
     "is-unicode-supported": {
@@ -8806,40 +6702,6 @@
       "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==",
       "dev": true
     },
-    "isobject": {
-      "version": "3.0.1",
-      "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
-      "integrity": "sha512-WhB9zCku7EGTj/HQQRz5aUQEUeoQZH2bWcltRErOpymJ4boYE6wL9Tbr23krRPSZ+C5zqNSrSw+Cc7sZZ4b7vg==",
-      "dev": true
-    },
-    "jest-worker": {
-      "version": "27.5.1",
-      "resolved": "https://registry.npmjs.org/jest-worker/-/jest-worker-27.5.1.tgz",
-      "integrity": "sha512-7vuh85V5cdDofPyxn58nrPjBktZo0u9x1g8WtjQol+jZDaE+fhN+cIvTj11GndBnMnyfrUOG1sZQxCdjKh+DKg==",
-      "dev": true,
-      "requires": {
-        "@types/node": "*",
-        "merge-stream": "^2.0.0",
-        "supports-color": "^8.0.0"
-      },
-      "dependencies": {
-        "supports-color": {
-          "version": "8.1.1",
-          "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-8.1.1.tgz",
-          "integrity": "sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==",
-          "dev": true,
-          "requires": {
-            "has-flag": "^4.0.0"
-          }
-        }
-      }
-    },
-    "js-sdsl": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/js-sdsl/-/js-sdsl-4.3.0.tgz",
-      "integrity": "sha512-mifzlm2+5nZ+lEcLJMoBK0/IH/bDg8XnJfd/Wq6IP+xoCjLZsTOnV2QpxlVbX9bMnkl5PdEjNtBJ9Cj1NjifhQ==",
-      "dev": true
-    },
     "js-tokens": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
@@ -8856,9 +6718,9 @@
       }
     },
     "jsdoc-type-pratt-parser": {
-      "version": "3.1.0",
-      "resolved": "https://registry.npmjs.org/jsdoc-type-pratt-parser/-/jsdoc-type-pratt-parser-3.1.0.tgz",
-      "integrity": "sha512-MgtD0ZiCDk9B+eI73BextfRrVQl0oyzRG8B2BjORts6jbunj4ScKPcyXGTbB6eXL4y9TzxCm6hyeLq/2ASzNdw==",
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/jsdoc-type-pratt-parser/-/jsdoc-type-pratt-parser-4.0.0.tgz",
+      "integrity": "sha512-YtOli5Cmzy3q4dP26GraSOeAhqecewG04hoO8DY56CH4KJ9Fvv5qKWUCCo3HZob7esJQHCv6/+bnTy72xZZaVQ==",
       "dev": true
     },
     "jsesc": {
@@ -8916,12 +6778,6 @@
         "setimmediate": "^1.0.5"
       }
     },
-    "kind-of": {
-      "version": "6.0.3",
-      "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.3.tgz",
-      "integrity": "sha512-dcS1ul+9tmeD95T+x28/ehLgd9mENa3LsvDTtzm3vyBEO7RPptvAD+t44WVXaUjTBRcrpFeFlC8WCruUR456hw==",
-      "dev": true
-    },
     "levn": {
       "version": "0.4.1",
       "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz",
@@ -8947,31 +6803,6 @@
       "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==",
       "dev": true
     },
-    "loader-runner": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/loader-runner/-/loader-runner-4.3.0.tgz",
-      "integrity": "sha512-3R/1M+yS3j5ou80Me59j7F9IMs4PXs3VqRrm0TU3AbKPxlmpoY1TNscJV/oGJXo8qCatFGTfDbY6W6ipGOYXfg==",
-      "dev": true
-    },
-    "loader-utils": {
-      "version": "2.0.4",
-      "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-2.0.4.tgz",
-      "integrity": "sha512-xXqpXoINfFhgua9xiqD8fPFHgkoq1mmmpE92WlDbm9rNRd/EbRb+Gqf908T2DMfuHjjJlksiK2RbHVOdD/MqSw==",
-      "dev": true,
-      "requires": {
-        "big.js": "^5.2.2",
-        "emojis-list": "^3.0.0",
-        "json5": "^2.1.2"
-      },
-      "dependencies": {
-        "json5": {
-          "version": "2.2.3",
-          "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz",
-          "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==",
-          "dev": true
-        }
-      }
-    },
     "locate-path": {
       "version": "6.0.0",
       "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz",
@@ -9012,23 +6843,6 @@
         "yallist": "^4.0.0"
       }
     },
-    "md5.js": {
-      "version": "1.3.5",
-      "resolved": "https://registry.npmjs.org/md5.js/-/md5.js-1.3.5.tgz",
-      "integrity": "sha512-xitP+WxNPcTTOgnTJcrhM0xvdPepipPSf3I8EIpGKeFLjt3PlJLIDG3u8EX53ZIubkb+5U2+3rELYpEhHhzdkg==",
-      "dev": true,
-      "requires": {
-        "hash-base": "^3.0.0",
-        "inherits": "^2.0.1",
-        "safe-buffer": "^5.1.2"
-      }
-    },
-    "merge-stream": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz",
-      "integrity": "sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==",
-      "dev": true
-    },
     "merge2": {
       "version": "1.4.1",
       "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz",
@@ -9045,57 +6859,12 @@
         "picomatch": "^2.3.1"
       }
     },
-    "miller-rabin": {
-      "version": "4.0.1",
-      "resolved": "https://registry.npmjs.org/miller-rabin/-/miller-rabin-4.0.1.tgz",
-      "integrity": "sha512-115fLhvZVqWwHPbClyntxEVfVDfl9DLLTuJvq3g2O/Oxi8AiNouAHvDSzHS0viUJc+V5vm3eq91Xwqn9dp4jRA==",
-      "dev": true,
-      "requires": {
-        "bn.js": "^4.0.0",
-        "brorand": "^1.0.1"
-      },
-      "dependencies": {
-        "bn.js": {
-          "version": "4.12.0",
-          "resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.0.tgz",
-          "integrity": "sha512-c98Bf3tPniI+scsdk237ku1Dc3ujXQTSgyiPUDEOe7tRkhrqridvh8klBv0HCEso1OLOYcHuCv/cS6DNxKH+ZA==",
-          "dev": true
-        }
-      }
-    },
-    "mime-db": {
-      "version": "1.52.0",
-      "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
-      "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
-      "dev": true
-    },
-    "mime-types": {
-      "version": "2.1.35",
-      "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
-      "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
-      "dev": true,
-      "requires": {
-        "mime-db": "1.52.0"
-      }
-    },
     "min-indent": {
       "version": "1.0.1",
       "resolved": "https://registry.npmjs.org/min-indent/-/min-indent-1.0.1.tgz",
       "integrity": "sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg==",
       "dev": true
     },
-    "minimalistic-assert": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/minimalistic-assert/-/minimalistic-assert-1.0.1.tgz",
-      "integrity": "sha512-UtJcAD4yEaGtjPezWuO9wC4nwUnVH/8/Im3yEHQP4b67cXlD/Qr9hdITCU1xDbSEXg2XKNaP8jsReV7vQd00/A==",
-      "dev": true
-    },
-    "minimalistic-crypto-utils": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/minimalistic-crypto-utils/-/minimalistic-crypto-utils-1.0.1.tgz",
-      "integrity": "sha512-JIYlbt6g8i5jKfJ3xz7rF0LXmv2TkDxBLUkiBeZ7bAx4GnnNMr8xFpGnOxn6GhTEHx3SjRrZEoU+j04prX1ktg==",
-      "dev": true
-    },
     "minimatch": {
       "version": "3.1.2",
       "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
@@ -9211,12 +6980,6 @@
         }
       }
     },
-    "mrmime": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/mrmime/-/mrmime-1.0.1.tgz",
-      "integrity": "sha512-hzzEagAgDyoU1Q6yg5uI+AorQgdvMCur3FcKf7NhMKWsaYg+RnbTyHRa/9IlLF9rf455MOCtcqqrQQ83pPP7Uw==",
-      "dev": true
-    },
     "ms": {
       "version": "2.1.2",
       "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
@@ -9235,92 +6998,6 @@
       "integrity": "sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==",
       "dev": true
     },
-    "natural-compare-lite": {
-      "version": "1.4.0",
-      "resolved": "https://registry.npmjs.org/natural-compare-lite/-/natural-compare-lite-1.4.0.tgz",
-      "integrity": "sha512-Tj+HTDSJJKaZnfiuw+iaF9skdPpTo2GtEly5JHnWV/hfv2Qj/9RKsGISQtLh2ox3l5EAGw487hnBee0sIJ6v2g==",
-      "dev": true
-    },
-    "neo-async": {
-      "version": "2.6.2",
-      "resolved": "https://registry.npmjs.org/neo-async/-/neo-async-2.6.2.tgz",
-      "integrity": "sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==",
-      "dev": true
-    },
-    "node-polyfill-webpack-plugin": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/node-polyfill-webpack-plugin/-/node-polyfill-webpack-plugin-2.0.1.tgz",
-      "integrity": "sha512-ZUMiCnZkP1LF0Th2caY6J/eKKoA0TefpoVa68m/LQU1I/mE8rGt4fNYGgNuCcK+aG8P8P43nbeJ2RqJMOL/Y1A==",
-      "dev": true,
-      "requires": {
-        "assert": "^2.0.0",
-        "browserify-zlib": "^0.2.0",
-        "buffer": "^6.0.3",
-        "console-browserify": "^1.2.0",
-        "constants-browserify": "^1.0.0",
-        "crypto-browserify": "^3.12.0",
-        "domain-browser": "^4.22.0",
-        "events": "^3.3.0",
-        "filter-obj": "^2.0.2",
-        "https-browserify": "^1.0.0",
-        "os-browserify": "^0.3.0",
-        "path-browserify": "^1.0.1",
-        "process": "^0.11.10",
-        "punycode": "^2.1.1",
-        "querystring-es3": "^0.2.1",
-        "readable-stream": "^4.0.0",
-        "stream-browserify": "^3.0.0",
-        "stream-http": "^3.2.0",
-        "string_decoder": "^1.3.0",
-        "timers-browserify": "^2.0.12",
-        "tty-browserify": "^0.0.1",
-        "type-fest": "^2.14.0",
-        "url": "^0.11.0",
-        "util": "^0.12.4",
-        "vm-browserify": "^1.1.2"
-      },
-      "dependencies": {
-        "readable-stream": {
-          "version": "4.3.0",
-          "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-4.3.0.tgz",
-          "integrity": "sha512-MuEnA0lbSi7JS8XM+WNJlWZkHAAdm7gETHdFK//Q/mChGyj2akEFtdLZh32jSdkWGbRwCW9pn6g3LWDdDeZnBQ==",
-          "dev": true,
-          "requires": {
-            "abort-controller": "^3.0.0",
-            "buffer": "^6.0.3",
-            "events": "^3.3.0",
-            "process": "^0.11.10"
-          }
-        },
-        "safe-buffer": {
-          "version": "5.2.1",
-          "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
-          "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
-          "dev": true
-        },
-        "string_decoder": {
-          "version": "1.3.0",
-          "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",
-          "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==",
-          "dev": true,
-          "requires": {
-            "safe-buffer": "~5.2.0"
-          }
-        },
-        "type-fest": {
-          "version": "2.19.0",
-          "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-2.19.0.tgz",
-          "integrity": "sha512-RAH822pAdBgcNMAfWnCBU3CFZcfZ/i1eZjwFU/dsLKumyuuP3niueg2UAukXYF0E2AAoc82ZSSf9J0WQBinzHA==",
-          "dev": true
-        }
-      }
-    },
-    "node-releases": {
-      "version": "2.0.10",
-      "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.10.tgz",
-      "integrity": "sha512-5GFldHPXVG/YZmFzJvKK2zDSzPKhEp0+ZR5SVaoSag9fsL5YgHbUHDfnG5494ISANDcK4KwPXAx2xqVEydmd7w==",
-      "dev": true
-    },
     "normalize-package-data": {
       "version": "2.5.0",
       "resolved": "https://registry.npmjs.org/normalize-package-data/-/normalize-package-data-2.5.0.tgz",
@@ -9334,9 +7011,9 @@
       },
       "dependencies": {
         "semver": {
-          "version": "5.7.1",
-          "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.1.tgz",
-          "integrity": "sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ==",
+          "version": "5.7.2",
+          "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.2.tgz",
+          "integrity": "sha512-cBznnQ9KjJqU67B52RMC65CMarK2600WFnbkcaiwWq3xy/5haFJlshgnpjovMVJ+Hff49d8GEn0b87C5pDQ10g==",
           "dev": true
         }
       }
@@ -9365,16 +7042,6 @@
       "integrity": "sha512-geUvdk7c+eizMNUDkRpW1wJwgfOiOeHbxBR/hLXK1aT6zmVSO0jsQcs7fj6MGw89jC/cjGfLcNOrtMYtGqm81g==",
       "dev": true
     },
-    "object-is": {
-      "version": "1.1.5",
-      "resolved": "https://registry.npmjs.org/object-is/-/object-is-1.1.5.tgz",
-      "integrity": "sha512-3cyDsyHgtmi7I7DfSSI2LDp6SK2lwvtbg0p0R1e0RvTqF5ceGx+K2dfSjm1bKDMVCFEDAQvy+o8c6a7VujOddw==",
-      "dev": true,
-      "requires": {
-        "call-bind": "^1.0.2",
-        "define-properties": "^1.1.3"
-      }
-    },
     "object-keys": {
       "version": "1.1.1",
       "resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.1.1.tgz",
@@ -9393,6 +7060,29 @@
         "object-keys": "^1.1.1"
       }
     },
+    "object.fromentries": {
+      "version": "2.0.7",
+      "resolved": "https://registry.npmjs.org/object.fromentries/-/object.fromentries-2.0.7.tgz",
+      "integrity": "sha512-UPbPHML6sL8PI/mOqPwsH4G6iyXcCGzLin8KvEPenOZN5lpCNBZZQ+V62vdjB1mQHrmqGQt5/OJzemUA+KJmEA==",
+      "dev": true,
+      "requires": {
+        "call-bind": "^1.0.2",
+        "define-properties": "^1.2.0",
+        "es-abstract": "^1.22.1"
+      }
+    },
+    "object.groupby": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/object.groupby/-/object.groupby-1.0.1.tgz",
+      "integrity": "sha512-HqaQtqLnp/8Bn4GL16cj+CUYbnpe1bh0TtEaWvybszDG4tgxCJuRpV8VGuvNaI1fAnI4lUJzDG55MXcOH4JZcQ==",
+      "dev": true,
+      "requires": {
+        "call-bind": "^1.0.2",
+        "define-properties": "^1.2.0",
+        "es-abstract": "^1.22.1",
+        "get-intrinsic": "^1.2.1"
+      }
+    },
     "object.values": {
       "version": "1.1.6",
       "resolved": "https://registry.npmjs.org/object.values/-/object.values-1.1.6.tgz",
@@ -9413,32 +7103,20 @@
         "wrappy": "1"
       }
     },
-    "opener": {
-      "version": "1.5.2",
-      "resolved": "https://registry.npmjs.org/opener/-/opener-1.5.2.tgz",
-      "integrity": "sha512-ur5UIdyw5Y7yEj9wLzhqXiy6GZ3Mwx0yGI+5sMn2r0N0v3cKJvUmFH5yPP+WXh9e0xfyzyJX95D8l088DNFj7A==",
-      "dev": true
-    },
     "optionator": {
-      "version": "0.9.1",
-      "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.1.tgz",
-      "integrity": "sha512-74RlY5FCnhq4jRxVUPKDaRwrVNXMqsGsiW6AJw4XK8hmtm10wC0ypZBLw5IIp85NZMr91+qd1RvvENwg7jjRFw==",
+      "version": "0.9.3",
+      "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.3.tgz",
+      "integrity": "sha512-JjCoypp+jKn1ttEFExxhetCKeJt9zhAgAve5FXHixTvFDW/5aEktX9bufBKLRRMdU7bNtpLfcGu94B3cdEJgjg==",
       "dev": true,
       "requires": {
+        "@aashutoshrathi/word-wrap": "^1.2.3",
         "deep-is": "^0.1.3",
         "fast-levenshtein": "^2.0.6",
         "levn": "^0.4.1",
         "prelude-ls": "^1.2.1",
-        "type-check": "^0.4.0",
-        "word-wrap": "^1.2.3"
+        "type-check": "^0.4.0"
       }
     },
-    "os-browserify": {
-      "version": "0.3.0",
-      "resolved": "https://registry.npmjs.org/os-browserify/-/os-browserify-0.3.0.tgz",
-      "integrity": "sha512-gjcpUc3clBf9+210TRaDWbf+rZZZEshZ+DlXMRCeAjp0xhTrnQsKHypIy1J3d5hKdUzj69t708EHtU8P6bUn0A==",
-      "dev": true
-    },
     "p-limit": {
       "version": "3.1.0",
       "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz",
@@ -9478,19 +7156,6 @@
         "callsites": "^3.0.0"
       }
     },
-    "parse-asn1": {
-      "version": "5.1.6",
-      "resolved": "https://registry.npmjs.org/parse-asn1/-/parse-asn1-5.1.6.tgz",
-      "integrity": "sha512-RnZRo1EPU6JBnra2vGHj0yhp6ebyjBZpmUCLHWiFhxlzvBCCpAuZ7elsBp1PVAbQN0/04VD/19rfzlBSwLstMw==",
-      "dev": true,
-      "requires": {
-        "asn1.js": "^5.2.0",
-        "browserify-aes": "^1.0.0",
-        "evp_bytestokey": "^1.0.0",
-        "pbkdf2": "^3.0.3",
-        "safe-buffer": "^5.1.1"
-      }
-    },
     "parse-json": {
       "version": "5.2.0",
       "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-5.2.0.tgz",
@@ -9503,12 +7168,6 @@
         "lines-and-columns": "^1.1.6"
       }
     },
-    "path-browserify": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/path-browserify/-/path-browserify-1.0.1.tgz",
-      "integrity": "sha512-b7uo2UCUOYZcnF/3ID0lulOJi/bafxa1xPe7ZPsammBSpjSWQkjNxlt635YGS2MiR9GjvuXCtz2emr3jbsz98g==",
-      "dev": true
-    },
     "path-exists": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz",
@@ -9537,80 +7196,13 @@
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/path-type/-/path-type-4.0.0.tgz",
       "integrity": "sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==",
-      "dev": true
-    },
-    "pbkdf2": {
-      "version": "3.1.2",
-      "resolved": "https://registry.npmjs.org/pbkdf2/-/pbkdf2-3.1.2.tgz",
-      "integrity": "sha512-iuh7L6jA7JEGu2WxDwtQP1ddOpaJNC4KlDEFfdQajSGgGPNi4OyDc2R7QnbY2bR9QjBVGwgvTdNJZoE7RaxUMA==",
-      "dev": true,
-      "requires": {
-        "create-hash": "^1.1.2",
-        "create-hmac": "^1.1.4",
-        "ripemd160": "^2.0.1",
-        "safe-buffer": "^5.0.1",
-        "sha.js": "^2.4.8"
-      }
-    },
-    "picocolors": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.0.0.tgz",
-      "integrity": "sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ==",
-      "dev": true
-    },
-    "picomatch": {
-      "version": "2.3.1",
-      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz",
-      "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==",
-      "dev": true
-    },
-    "pkg-dir": {
-      "version": "4.2.0",
-      "resolved": "https://registry.npmjs.org/pkg-dir/-/pkg-dir-4.2.0.tgz",
-      "integrity": "sha512-HRDzbaKjC+AOWVXxAU/x54COGeIv9eb+6CkDSQoNTt4XyWoIJvuPsXizxu/Fr23EiekbtZwmh1IcIG/l/a10GQ==",
-      "dev": true,
-      "requires": {
-        "find-up": "^4.0.0"
-      },
-      "dependencies": {
-        "find-up": {
-          "version": "4.1.0",
-          "resolved": "https://registry.npmjs.org/find-up/-/find-up-4.1.0.tgz",
-          "integrity": "sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw==",
-          "dev": true,
-          "requires": {
-            "locate-path": "^5.0.0",
-            "path-exists": "^4.0.0"
-          }
-        },
-        "locate-path": {
-          "version": "5.0.0",
-          "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-5.0.0.tgz",
-          "integrity": "sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==",
-          "dev": true,
-          "requires": {
-            "p-locate": "^4.1.0"
-          }
-        },
-        "p-limit": {
-          "version": "2.3.0",
-          "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.3.0.tgz",
-          "integrity": "sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==",
-          "dev": true,
-          "requires": {
-            "p-try": "^2.0.0"
-          }
-        },
-        "p-locate": {
-          "version": "4.1.0",
-          "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-4.1.0.tgz",
-          "integrity": "sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==",
-          "dev": true,
-          "requires": {
-            "p-limit": "^2.2.0"
-          }
-        }
-      }
+      "dev": true
+    },
+    "picomatch": {
+      "version": "2.3.1",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz",
+      "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==",
+      "dev": true
     },
     "pluralize": {
       "version": "8.0.0",
@@ -9625,9 +7217,9 @@
       "dev": true
     },
     "prettier": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.0.0.tgz",
-      "integrity": "sha512-zBf5eHpwHOGPC47h0zrPyNn+eAEIdEzfywMoYn2XPi0P44Zp0tSq64rq0xAREh4auw2cJZHo9QUob+NqCQky4g==",
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.0.3.tgz",
+      "integrity": "sha512-L/4pUDMxcNa8R/EthV08Zt42WBO4h1rarVtK0K+QJG0X187OLo7l699jWw0GKuwzkPQ//jMFA/8Xm6Fh3J/DAg==",
       "dev": true
     },
     "process": {
@@ -9642,46 +7234,12 @@
       "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==",
       "dev": true
     },
-    "public-encrypt": {
-      "version": "4.0.3",
-      "resolved": "https://registry.npmjs.org/public-encrypt/-/public-encrypt-4.0.3.tgz",
-      "integrity": "sha512-zVpa8oKZSz5bTMTFClc1fQOnyyEzpl5ozpi1B5YcvBrdohMjH2rfsBtyXcuNuwjsDIXmBYlF2N5FlJYhR29t8Q==",
-      "dev": true,
-      "requires": {
-        "bn.js": "^4.1.0",
-        "browserify-rsa": "^4.0.0",
-        "create-hash": "^1.1.0",
-        "parse-asn1": "^5.0.0",
-        "randombytes": "^2.0.1",
-        "safe-buffer": "^5.1.2"
-      },
-      "dependencies": {
-        "bn.js": {
-          "version": "4.12.0",
-          "resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.0.tgz",
-          "integrity": "sha512-c98Bf3tPniI+scsdk237ku1Dc3ujXQTSgyiPUDEOe7tRkhrqridvh8klBv0HCEso1OLOYcHuCv/cS6DNxKH+ZA==",
-          "dev": true
-        }
-      }
-    },
     "punycode": {
       "version": "2.3.0",
       "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.0.tgz",
       "integrity": "sha512-rRV+zQD8tVFys26lAGR9WUuS4iUAngJScM+ZRSKtvl5tKeZ2t5bvdNFdNHBW9FWR4guGHlgmsZ1G7BSm2wTbuA==",
       "dev": true
     },
-    "querystring": {
-      "version": "0.2.0",
-      "resolved": "https://registry.npmjs.org/querystring/-/querystring-0.2.0.tgz",
-      "integrity": "sha512-X/xY82scca2tau62i9mDyU9K+I+djTMUsvwf7xnUX5GLvVzgJybOJf4Y6o9Zx3oJK/LSXg5tTZBjwzqVPaPO2g==",
-      "dev": true
-    },
-    "querystring-es3": {
-      "version": "0.2.1",
-      "resolved": "https://registry.npmjs.org/querystring-es3/-/querystring-es3-0.2.1.tgz",
-      "integrity": "sha512-773xhDQnZBMFobEiztv8LIl70ch5MSF/jUQVlhwFyBILqq96anmoctVIYz+ZRp0qbCKATTn6ev02M3r7Ga5vqA==",
-      "dev": true
-    },
     "queue-microtask": {
       "version": "1.2.3",
       "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz",
@@ -9697,16 +7255,6 @@
         "safe-buffer": "^5.1.0"
       }
     },
-    "randomfill": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/randomfill/-/randomfill-1.0.4.tgz",
-      "integrity": "sha512-87lcbR8+MhcWcUiQ+9e+Rwx8MyR2P7qnt15ynUlbm3TU/fjbgz4GsvfSUDTemtCCtVCqb4ZcEFlyPNTh9bBTLw==",
-      "dev": true,
-      "requires": {
-        "randombytes": "^2.0.5",
-        "safe-buffer": "^5.1.0"
-      }
-    },
     "read-pkg": {
       "version": "5.2.0",
       "resolved": "https://registry.npmjs.org/read-pkg/-/read-pkg-5.2.0.tgz",
@@ -9807,42 +7355,27 @@
         "picomatch": "^2.2.1"
       }
     },
-    "rechoir": {
-      "version": "0.8.0",
-      "resolved": "https://registry.npmjs.org/rechoir/-/rechoir-0.8.0.tgz",
-      "integrity": "sha512-/vxpCXddiX8NGfGO/mTafwjq4aFa/71pvamip0++IQk3zG8cbCj0fifNPrjjF1XMXUne91jL9OoxmdykoEtifQ==",
-      "dev": true,
-      "requires": {
-        "resolve": "^1.20.0"
-      }
-    },
     "regexp-tree": {
-      "version": "0.1.24",
-      "resolved": "https://registry.npmjs.org/regexp-tree/-/regexp-tree-0.1.24.tgz",
-      "integrity": "sha512-s2aEVuLhvnVJW6s/iPgEGK6R+/xngd2jNQ+xy4bXNDKxZKJH6jpPHY6kVeVv1IeLCHgswRj+Kl3ELaDjG6V1iw==",
+      "version": "0.1.27",
+      "resolved": "https://registry.npmjs.org/regexp-tree/-/regexp-tree-0.1.27.tgz",
+      "integrity": "sha512-iETxpjK6YoRWJG5o6hXLwvjYAoW+FEZn9os0PD/b6AP6xQwsa/Y7lCVgIixBbUPMfhu+i2LtdeAqVTgGlQarfA==",
       "dev": true
     },
     "regexp.prototype.flags": {
-      "version": "1.4.3",
-      "resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.4.3.tgz",
-      "integrity": "sha512-fjggEOO3slI6Wvgjwflkc4NFRCTZAu5CnNfBd5qOMYhWdn67nJBBu34/TkD++eeFmd8C9r9jfXJ27+nSiRkSUA==",
+      "version": "1.5.1",
+      "resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.5.1.tgz",
+      "integrity": "sha512-sy6TXMN+hnP/wMy+ISxg3krXx7BAtWVO4UouuCN/ziM9UEne0euamVNafDfvC83bRNr95y0V5iijeDQFUNpvrg==",
       "dev": true,
       "requires": {
         "call-bind": "^1.0.2",
-        "define-properties": "^1.1.3",
-        "functions-have-names": "^1.2.2"
+        "define-properties": "^1.2.0",
+        "set-function-name": "^2.0.0"
       }
     },
-    "regexpp": {
-      "version": "3.2.0",
-      "resolved": "https://registry.npmjs.org/regexpp/-/regexpp-3.2.0.tgz",
-      "integrity": "sha512-pq2bWo9mVD43nbts2wGv17XLiNLya+GklZ8kaDLV2Z08gDCsGpnKn9BFMepvWuHCbyVvY7J5o5+BVvoQbmlJLg==",
-      "dev": true
-    },
     "regjsparser": {
-      "version": "0.9.1",
-      "resolved": "https://registry.npmjs.org/regjsparser/-/regjsparser-0.9.1.tgz",
-      "integrity": "sha512-dQUtn90WanSNl+7mQKcXAgZxvUe7Z0SqXlgzv0za4LwiUhyzBC58yQO3liFoUgu8GiJVInAhJjkj1N0EtQ5nkQ==",
+      "version": "0.10.0",
+      "resolved": "https://registry.npmjs.org/regjsparser/-/regjsparser-0.10.0.tgz",
+      "integrity": "sha512-qx+xQGZVsy55CH0a1hiVwHmqjLryfh7wQyF5HO07XJ9f7dQMY/gPQHhlyDkIzJKC+x2fUCpCcUODUUUFrm7SHA==",
       "dev": true,
       "requires": {
         "jsesc": "~0.5.0"
@@ -9873,23 +7406,6 @@
         "supports-preserve-symlinks-flag": "^1.0.0"
       }
     },
-    "resolve-cwd": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/resolve-cwd/-/resolve-cwd-3.0.0.tgz",
-      "integrity": "sha512-OrZaX2Mb+rJCpH/6CpSqt9xFVpN++x01XnN2ie9g6P5/3xelLAkXWVADpdz1IHD/KFfEXyE6V0U01OQ3UO2rEg==",
-      "dev": true,
-      "requires": {
-        "resolve-from": "^5.0.0"
-      },
-      "dependencies": {
-        "resolve-from": {
-          "version": "5.0.0",
-          "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-5.0.0.tgz",
-          "integrity": "sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw==",
-          "dev": true
-        }
-      }
-    },
     "resolve-from": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz",
@@ -9911,16 +7427,6 @@
         "glob": "^7.1.3"
       }
     },
-    "ripemd160": {
-      "version": "2.0.2",
-      "resolved": "https://registry.npmjs.org/ripemd160/-/ripemd160-2.0.2.tgz",
-      "integrity": "sha512-ii4iagi25WusVoiC4B4lq7pbXfAp3D9v5CwfkY33vffw2+pkDjY1D8GaN7spsxvCSx8dkPqOZCEZyfxcmJG2IA==",
-      "dev": true,
-      "requires": {
-        "hash-base": "^3.0.0",
-        "inherits": "^2.0.1"
-      }
-    },
     "run-parallel": {
       "version": "1.2.0",
       "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz",
@@ -9930,21 +7436,32 @@
         "queue-microtask": "^1.2.2"
       }
     },
+    "safe-array-concat": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/safe-array-concat/-/safe-array-concat-1.0.1.tgz",
+      "integrity": "sha512-6XbUAseYE2KtOuGueyeobCySj9L4+66Tn6KQMOPQJrAJEowYKW/YR/MGJZl7FdydUdaFu4LYyDZjxf4/Nmo23Q==",
+      "dev": true,
+      "requires": {
+        "call-bind": "^1.0.2",
+        "get-intrinsic": "^1.2.1",
+        "has-symbols": "^1.0.3",
+        "isarray": "^2.0.5"
+      },
+      "dependencies": {
+        "isarray": {
+          "version": "2.0.5",
+          "resolved": "https://registry.npmjs.org/isarray/-/isarray-2.0.5.tgz",
+          "integrity": "sha512-xHjhDr3cNBK0BzdUJSPXZntQUx/mwMS5Rw4A7lPJ90XGAO6ISP/ePDNuo0vhqOZU+UD5JoodwCAAoZQd3FeAKw==",
+          "dev": true
+        }
+      }
+    },
     "safe-buffer": {
       "version": "5.1.2",
       "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
       "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==",
       "dev": true
     },
-    "safe-regex": {
-      "version": "2.1.1",
-      "resolved": "https://registry.npmjs.org/safe-regex/-/safe-regex-2.1.1.tgz",
-      "integrity": "sha512-rx+x8AMzKb5Q5lQ95Zoi6ZbJqwCLkqi3XuJXp5P3rT8OEc6sZCJG5AE5dU3lsgRr/F4Bs31jSlVN+j5KrsGu9A==",
-      "dev": true,
-      "requires": {
-        "regexp-tree": "~0.1.1"
-      }
-    },
     "safe-regex-test": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/safe-regex-test/-/safe-regex-test-1.0.0.tgz",
@@ -9956,72 +7473,38 @@
         "is-regex": "^1.1.4"
       }
     },
-    "safer-buffer": {
-      "version": "2.1.2",
-      "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
-      "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==",
-      "dev": true
-    },
-    "schema-utils": {
-      "version": "3.1.1",
-      "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-3.1.1.tgz",
-      "integrity": "sha512-Y5PQxS4ITlC+EahLuXaY86TXfR7Dc5lw294alXOq86JAHCihAIZfqv8nNCWvaEJvaC51uN9hbLGeV0cFBdH+Fw==",
-      "dev": true,
-      "requires": {
-        "@types/json-schema": "^7.0.8",
-        "ajv": "^6.12.5",
-        "ajv-keywords": "^3.5.2"
-      }
-    },
     "semver": {
-      "version": "7.3.8",
-      "resolved": "https://registry.npmjs.org/semver/-/semver-7.3.8.tgz",
-      "integrity": "sha512-NB1ctGL5rlHrPJtFDVIVzTyQylMLu9N9VICA6HSFJo8MCGVTMW6gfpicwKmmK/dAjTOrqu5l63JJOpDSrAis3A==",
+      "version": "7.5.4",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-7.5.4.tgz",
+      "integrity": "sha512-1bCSESV6Pv+i21Hvpxp3Dx+pSD8lIPt8uVjRrxAUt/nbswYc+tK6Y2btiULjd4+fnq15PX+nqQDC7Oft7WkwcA==",
       "dev": true,
       "requires": {
         "lru-cache": "^6.0.0"
       }
     },
-    "serialize-javascript": {
-      "version": "6.0.1",
-      "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.1.tgz",
-      "integrity": "sha512-owoXEFjWRllis8/M1Q+Cw5k8ZH40e3zhp/ovX+Xr/vi1qj6QesbyXXViFbpNvWvPNAD62SutwEXavefrLJWj7w==",
-      "dev": true,
-      "requires": {
-        "randombytes": "^2.1.0"
-      }
-    },
     "set-blocking": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/set-blocking/-/set-blocking-2.0.0.tgz",
       "integrity": "sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw==",
       "dev": true
     },
+    "set-function-name": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/set-function-name/-/set-function-name-2.0.1.tgz",
+      "integrity": "sha512-tMNCiqYVkXIZgc2Hnoy2IvC/f8ezc5koaRFkCjrpWzGpCd3qbZXPzVy9MAZzK1ch/X0jvSkojys3oqJN0qCmdA==",
+      "dev": true,
+      "requires": {
+        "define-data-property": "^1.0.1",
+        "functions-have-names": "^1.2.3",
+        "has-property-descriptors": "^1.0.0"
+      }
+    },
     "setimmediate": {
       "version": "1.0.5",
       "resolved": "https://registry.npmjs.org/setimmediate/-/setimmediate-1.0.5.tgz",
       "integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==",
       "dev": true
     },
-    "sha.js": {
-      "version": "2.4.11",
-      "resolved": "https://registry.npmjs.org/sha.js/-/sha.js-2.4.11.tgz",
-      "integrity": "sha512-QMEp5B7cftE7APOjk5Y6xgrbWu+WkLVQwk8JNjZ8nKRciZaByEW6MubieAiToS7+dwvrjGhH8jRXz3MVd0AYqQ==",
-      "dev": true,
-      "requires": {
-        "inherits": "^2.0.1",
-        "safe-buffer": "^5.0.1"
-      }
-    },
-    "shallow-clone": {
-      "version": "3.0.1",
-      "resolved": "https://registry.npmjs.org/shallow-clone/-/shallow-clone-3.0.1.tgz",
-      "integrity": "sha512-/6KqX+GVUdqPuPPd2LxDDxzX6CAbjJehAAOKlNpqqUpAqPM6HeL8f+o3a+JsyGjn2lv0WY8UsTgUJjU9Ok55NA==",
-      "dev": true,
-      "requires": {
-        "kind-of": "^6.0.2"
-      }
-    },
     "shebang-command": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
@@ -10054,39 +7537,12 @@
       "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==",
       "dev": true
     },
-    "sirv": {
-      "version": "1.0.19",
-      "resolved": "https://registry.npmjs.org/sirv/-/sirv-1.0.19.tgz",
-      "integrity": "sha512-JuLThK3TnZG1TAKDwNIqNq6QA2afLOCcm+iE8D1Kj3GA40pSPsxQjjJl0J8X3tsR7T+CP1GavpzLwYkgVLWrZQ==",
-      "dev": true,
-      "requires": {
-        "@polka/url": "^1.0.0-next.20",
-        "mrmime": "^1.0.0",
-        "totalist": "^1.0.0"
-      }
-    },
     "slash": {
       "version": "3.0.0",
       "resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz",
       "integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==",
       "dev": true
     },
-    "source-map": {
-      "version": "0.6.1",
-      "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",
-      "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==",
-      "dev": true
-    },
-    "source-map-support": {
-      "version": "0.5.21",
-      "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.21.tgz",
-      "integrity": "sha512-uBHU3L3czsIyYXKX88fdrGovxdSCoTGDRZ6SYXtSRxLZUzHg5P/66Ht6uoUlHu9EZod+inXhKo3qQgwXUT/y1w==",
-      "dev": true,
-      "requires": {
-        "buffer-from": "^1.0.0",
-        "source-map": "^0.6.0"
-      }
-    },
     "spdx-correct": {
       "version": "3.1.1",
       "resolved": "https://registry.npmjs.org/spdx-correct/-/spdx-correct-3.1.1.tgz",
@@ -10119,54 +7575,6 @@
       "integrity": "sha512-rr+VVSXtRhO4OHbXUiAF7xW3Bo9DuuF6C5jH+q/x15j2jniycgKbxU09Hr0WqlSLUs4i4ltHGXqTe7VHclYWyA==",
       "dev": true
     },
-    "stream-browserify": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/stream-browserify/-/stream-browserify-3.0.0.tgz",
-      "integrity": "sha512-H73RAHsVBapbim0tU2JwwOiXUj+fikfiaoYAKHF3VJfA0pe2BCzkhAHBlLG6REzE+2WNZcxOXjK7lkso+9euLA==",
-      "dev": true,
-      "requires": {
-        "inherits": "~2.0.4",
-        "readable-stream": "^3.5.0"
-      },
-      "dependencies": {
-        "readable-stream": {
-          "version": "3.6.1",
-          "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.1.tgz",
-          "integrity": "sha512-+rQmrWMYGA90yenhTYsLWAsLsqVC8osOw6PKE1HDYiO0gdPeKe/xDHNzIAIn4C91YQ6oenEhfYqqc1883qHbjQ==",
-          "dev": true,
-          "requires": {
-            "inherits": "^2.0.3",
-            "string_decoder": "^1.1.1",
-            "util-deprecate": "^1.0.1"
-          }
-        }
-      }
-    },
-    "stream-http": {
-      "version": "3.2.0",
-      "resolved": "https://registry.npmjs.org/stream-http/-/stream-http-3.2.0.tgz",
-      "integrity": "sha512-Oq1bLqisTyK3TSCXpPbT4sdeYNdmyZJv1LxpEm2vu1ZhK89kSE5YXwZc3cWk0MagGaKriBh9mCFbVGtO+vY29A==",
-      "dev": true,
-      "requires": {
-        "builtin-status-codes": "^3.0.0",
-        "inherits": "^2.0.4",
-        "readable-stream": "^3.6.0",
-        "xtend": "^4.0.2"
-      },
-      "dependencies": {
-        "readable-stream": {
-          "version": "3.6.1",
-          "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.1.tgz",
-          "integrity": "sha512-+rQmrWMYGA90yenhTYsLWAsLsqVC8osOw6PKE1HDYiO0gdPeKe/xDHNzIAIn4C91YQ6oenEhfYqqc1883qHbjQ==",
-          "dev": true,
-          "requires": {
-            "inherits": "^2.0.3",
-            "string_decoder": "^1.1.1",
-            "util-deprecate": "^1.0.1"
-          }
-        }
-      }
-    },
     "string_decoder": {
       "version": "1.1.1",
       "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz",
@@ -10187,26 +7595,37 @@
         "strip-ansi": "^6.0.1"
       }
     },
+    "string.prototype.trim": {
+      "version": "1.2.8",
+      "resolved": "https://registry.npmjs.org/string.prototype.trim/-/string.prototype.trim-1.2.8.tgz",
+      "integrity": "sha512-lfjY4HcixfQXOfaqCvcBuOIapyaroTXhbkfJN3gcB1OtyupngWK4sEET9Knd0cXd28kTUqu/kHoV4HKSJdnjiQ==",
+      "dev": true,
+      "requires": {
+        "call-bind": "^1.0.2",
+        "define-properties": "^1.2.0",
+        "es-abstract": "^1.22.1"
+      }
+    },
     "string.prototype.trimend": {
-      "version": "1.0.6",
-      "resolved": "https://registry.npmjs.org/string.prototype.trimend/-/string.prototype.trimend-1.0.6.tgz",
-      "integrity": "sha512-JySq+4mrPf9EsDBEDYMOb/lM7XQLulwg5R/m1r0PXEFqrV0qHvl58sdTilSXtKOflCsK2E8jxf+GKC0T07RWwQ==",
+      "version": "1.0.7",
+      "resolved": "https://registry.npmjs.org/string.prototype.trimend/-/string.prototype.trimend-1.0.7.tgz",
+      "integrity": "sha512-Ni79DqeB72ZFq1uH/L6zJ+DKZTkOtPIHovb3YZHQViE+HDouuU4mBrLOLDn5Dde3RF8qw5qVETEjhu9locMLvA==",
       "dev": true,
       "requires": {
         "call-bind": "^1.0.2",
-        "define-properties": "^1.1.4",
-        "es-abstract": "^1.20.4"
+        "define-properties": "^1.2.0",
+        "es-abstract": "^1.22.1"
       }
     },
     "string.prototype.trimstart": {
-      "version": "1.0.6",
-      "resolved": "https://registry.npmjs.org/string.prototype.trimstart/-/string.prototype.trimstart-1.0.6.tgz",
-      "integrity": "sha512-omqjMDaY92pbn5HOX7f9IccLA+U1tA9GvtU4JrodiXFfYB7jPzzHpRzpglLAjtUV6bB557zwClJezTqnAiYnQA==",
+      "version": "1.0.7",
+      "resolved": "https://registry.npmjs.org/string.prototype.trimstart/-/string.prototype.trimstart-1.0.7.tgz",
+      "integrity": "sha512-NGhtDFu3jCEm7B4Fy0DpLewdJQOZcQ0rGbwQ/+stjnrp2i+rlKeCvos9hOIeCmqwratM47OBxY7uFZzjxHXmrg==",
       "dev": true,
       "requires": {
         "call-bind": "^1.0.2",
-        "define-properties": "^1.1.4",
-        "es-abstract": "^1.20.4"
+        "define-properties": "^1.2.0",
+        "es-abstract": "^1.22.1"
       }
     },
     "strip-ansi": {
@@ -10254,52 +7673,12 @@
       "integrity": "sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==",
       "dev": true
     },
-    "tapable": {
-      "version": "2.2.1",
-      "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.2.1.tgz",
-      "integrity": "sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ==",
-      "dev": true
-    },
-    "terser": {
-      "version": "5.16.5",
-      "resolved": "https://registry.npmjs.org/terser/-/terser-5.16.5.tgz",
-      "integrity": "sha512-qcwfg4+RZa3YvlFh0qjifnzBHjKGNbtDo9yivMqMFDy9Q6FSaQWSB/j1xKhsoUFJIqDOM3TsN6D5xbrMrFcHbg==",
-      "dev": true,
-      "requires": {
-        "@jridgewell/source-map": "^0.3.2",
-        "acorn": "^8.5.0",
-        "commander": "^2.20.0",
-        "source-map-support": "~0.5.20"
-      }
-    },
-    "terser-webpack-plugin": {
-      "version": "5.3.6",
-      "resolved": "https://registry.npmjs.org/terser-webpack-plugin/-/terser-webpack-plugin-5.3.6.tgz",
-      "integrity": "sha512-kfLFk+PoLUQIbLmB1+PZDMRSZS99Mp+/MHqDNmMA6tOItzRt+Npe3E+fsMs5mfcM0wCtrrdU387UnV+vnSffXQ==",
-      "dev": true,
-      "requires": {
-        "@jridgewell/trace-mapping": "^0.3.14",
-        "jest-worker": "^27.4.5",
-        "schema-utils": "^3.1.1",
-        "serialize-javascript": "^6.0.0",
-        "terser": "^5.14.1"
-      }
-    },
     "text-table": {
       "version": "0.2.0",
       "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz",
       "integrity": "sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw==",
       "dev": true
     },
-    "timers-browserify": {
-      "version": "2.0.12",
-      "resolved": "https://registry.npmjs.org/timers-browserify/-/timers-browserify-2.0.12.tgz",
-      "integrity": "sha512-9phl76Cqm6FhSX9Xe1ZUAMLtm1BLkKj2Qd5ApyWkXzsMRaA7dgr81kf4wJmQf/hAvg8EEyJxDo3du/0KlhPiKQ==",
-      "dev": true,
-      "requires": {
-        "setimmediate": "^1.0.4"
-      }
-    },
     "to-regex-range": {
       "version": "5.0.1",
       "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz",
@@ -10309,23 +7688,12 @@
         "is-number": "^7.0.0"
       }
     },
-    "totalist": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/totalist/-/totalist-1.1.0.tgz",
-      "integrity": "sha512-gduQwd1rOdDMGxFG1gEvhV88Oirdo2p+KjoYFU7k2g+i7n6AFFbDQ5kMPUsW0pNbfQsB/cwXvT1i4Bue0s9g5g==",
-      "dev": true
-    },
-    "ts-loader": {
-      "version": "9.4.2",
-      "resolved": "https://registry.npmjs.org/ts-loader/-/ts-loader-9.4.2.tgz",
-      "integrity": "sha512-OmlC4WVmFv5I0PpaxYb+qGeGOdm5giHU7HwDDUjw59emP2UYMHy9fFSDcYgSNoH8sXcj4hGCSEhlDZ9ULeDraA==",
+    "ts-api-utils": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-1.0.3.tgz",
+      "integrity": "sha512-wNMeqtMz5NtwpT/UZGY5alT+VoKdSsOOP/kqHFcUW1P/VRhH2wJ48+DN2WwUliNbQ976ETwDL0Ifd2VVvgonvg==",
       "dev": true,
-      "requires": {
-        "chalk": "^4.1.0",
-        "enhanced-resolve": "^5.0.0",
-        "micromatch": "^4.0.0",
-        "semver": "^7.3.4"
-      }
+      "requires": {}
     },
     "tsconfig-paths": {
       "version": "3.14.2",
@@ -10339,27 +7707,6 @@
         "strip-bom": "^3.0.0"
       }
     },
-    "tslib": {
-      "version": "1.14.1",
-      "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz",
-      "integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==",
-      "dev": true
-    },
-    "tsutils": {
-      "version": "3.21.0",
-      "resolved": "https://registry.npmjs.org/tsutils/-/tsutils-3.21.0.tgz",
-      "integrity": "sha512-mHKK3iUXL+3UF6xL5k0PEhKRUBKPBCv/+RkEOpjRWxxx27KKRBmmA60A9pgOUvMi8GKhRMPEmjBRPzs2W7O1OA==",
-      "dev": true,
-      "requires": {
-        "tslib": "^1.8.1"
-      }
-    },
-    "tty-browserify": {
-      "version": "0.0.1",
-      "resolved": "https://registry.npmjs.org/tty-browserify/-/tty-browserify-0.0.1.tgz",
-      "integrity": "sha512-C3TaO7K81YvjCgQH9Q1S3R3P3BtN3RIM8n+OvX4il1K1zgE8ZhI0op7kClgkxtutIE8hQrcrHBXvIheqKUUCxw==",
-      "dev": true
-    },
     "type-check": {
       "version": "0.4.0",
       "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz",
@@ -10375,6 +7722,42 @@
       "integrity": "sha512-Ne+eE4r0/iWnpAxD852z3A+N0Bt5RN//NjJwRd2VFHEmrywxf5vsZlh4R6lixl6B+wz/8d+maTSAkN1FIkI3LQ==",
       "dev": true
     },
+    "typed-array-buffer": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/typed-array-buffer/-/typed-array-buffer-1.0.0.tgz",
+      "integrity": "sha512-Y8KTSIglk9OZEr8zywiIHG/kmQ7KWyjseXs1CbSo8vC42w7hg2HgYTxSWwP0+is7bWDc1H+Fo026CpHFwm8tkw==",
+      "dev": true,
+      "requires": {
+        "call-bind": "^1.0.2",
+        "get-intrinsic": "^1.2.1",
+        "is-typed-array": "^1.1.10"
+      }
+    },
+    "typed-array-byte-length": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/typed-array-byte-length/-/typed-array-byte-length-1.0.0.tgz",
+      "integrity": "sha512-Or/+kvLxNpeQ9DtSydonMxCx+9ZXOswtwJn17SNLvhptaXYDJvkFFP5zbfU/uLmvnBJlI4yrnXRxpdWH/M5tNA==",
+      "dev": true,
+      "requires": {
+        "call-bind": "^1.0.2",
+        "for-each": "^0.3.3",
+        "has-proto": "^1.0.1",
+        "is-typed-array": "^1.1.10"
+      }
+    },
+    "typed-array-byte-offset": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/typed-array-byte-offset/-/typed-array-byte-offset-1.0.0.tgz",
+      "integrity": "sha512-RD97prjEt9EL8YgAgpOkf3O4IF9lhJFr9g0htQkm0rchFp/Vx7LW5Q8fSXXub7BXAODyUQohRMyOc3faCPd0hg==",
+      "dev": true,
+      "requires": {
+        "available-typed-arrays": "^1.0.5",
+        "call-bind": "^1.0.2",
+        "for-each": "^0.3.3",
+        "has-proto": "^1.0.1",
+        "is-typed-array": "^1.1.10"
+      }
+    },
     "typed-array-length": {
       "version": "1.0.4",
       "resolved": "https://registry.npmjs.org/typed-array-length/-/typed-array-length-1.0.4.tgz",
@@ -10387,9 +7770,9 @@
       }
     },
     "typescript": {
-      "version": "4.9.5",
-      "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.9.5.tgz",
-      "integrity": "sha512-1FXk9E2Hm+QzZQ7z+McJiHL4NW1F2EzMu9Nq9i3zAaGqibafqYwCVU6WyWAuyQRRzOlxou8xZSyXLEN8oKj24g==",
+      "version": "5.2.2",
+      "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.2.2.tgz",
+      "integrity": "sha512-mI4WrpHsbCIcwT9cF4FZvr80QUeKvsUsUvKDoR+X/7XHQH98xYD8YHZg7ANtz2GtZt/CBq2QJ0thkGJMHfqc1w==",
       "dev": true
     },
     "unbox-primitive": {
@@ -10410,16 +7793,6 @@
       "integrity": "sha512-hAZsKq7Yy11Zu1DE0OzWjw7nnLZmJZYTDZZyEFHZdUhV8FkH5MCfoU1XMaxXovpyW5nq5scPqq0ZDP9Zyl04oQ==",
       "dev": true
     },
-    "update-browserslist-db": {
-      "version": "1.0.10",
-      "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.10.tgz",
-      "integrity": "sha512-OztqDenkfFkbSG+tRxBeAnCVPckDBcvibKd35yDONx6OU8N7sqgwc7rCbkJ/WcYtVRZ4ba68d6byhC21GFh7sQ==",
-      "dev": true,
-      "requires": {
-        "escalade": "^3.1.1",
-        "picocolors": "^1.0.0"
-      }
-    },
     "uri-js": {
       "version": "4.4.1",
       "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz",
@@ -10429,37 +7802,6 @@
         "punycode": "^2.1.0"
       }
     },
-    "url": {
-      "version": "0.11.0",
-      "resolved": "https://registry.npmjs.org/url/-/url-0.11.0.tgz",
-      "integrity": "sha512-kbailJa29QrtXnxgq+DdCEGlbTeYM2eJUxsz6vjZavrCYPMIFHMKQmSKYAIuUK2i7hgPm28a8piX5NTUtM/LKQ==",
-      "dev": true,
-      "requires": {
-        "punycode": "1.3.2",
-        "querystring": "0.2.0"
-      },
-      "dependencies": {
-        "punycode": {
-          "version": "1.3.2",
-          "resolved": "https://registry.npmjs.org/punycode/-/punycode-1.3.2.tgz",
-          "integrity": "sha512-RofWgt/7fL5wP1Y7fxE7/EmTLzQVnB0ycyibJ0OOHIlJqTNzglYFxVwETOcIoJqJmpDXJ9xImDv+Fq34F/d4Dw==",
-          "dev": true
-        }
-      }
-    },
-    "util": {
-      "version": "0.12.5",
-      "resolved": "https://registry.npmjs.org/util/-/util-0.12.5.tgz",
-      "integrity": "sha512-kZf/K6hEIrWHI6XqOFUiiMa+79wE/D8Q+NCNAWclkyg3b4d2k7s0QGepNjiABc+aR3N1PAyHL7p6UcLY6LmrnA==",
-      "dev": true,
-      "requires": {
-        "inherits": "^2.0.3",
-        "is-arguments": "^1.0.4",
-        "is-generator-function": "^1.0.7",
-        "is-typed-array": "^1.1.3",
-        "which-typed-array": "^1.1.2"
-      }
-    },
     "util-deprecate": {
       "version": "1.0.2",
       "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
@@ -10476,125 +7818,6 @@
         "spdx-expression-parse": "^3.0.0"
       }
     },
-    "vm-browserify": {
-      "version": "1.1.2",
-      "resolved": "https://registry.npmjs.org/vm-browserify/-/vm-browserify-1.1.2.tgz",
-      "integrity": "sha512-2ham8XPWTONajOR0ohOKOHXkm3+gaBmGut3SRuu75xLd/RRaY6vqgh8NBYYk7+RW3u5AtzPQZG8F10LHkl0lAQ==",
-      "dev": true
-    },
-    "watchpack": {
-      "version": "2.4.0",
-      "resolved": "https://registry.npmjs.org/watchpack/-/watchpack-2.4.0.tgz",
-      "integrity": "sha512-Lcvm7MGST/4fup+ifyKi2hjyIAwcdI4HRgtvTpIUxBRhB+RFtUh8XtDOxUfctVCnhVi+QQj49i91OyvzkJl6cg==",
-      "dev": true,
-      "requires": {
-        "glob-to-regexp": "^0.4.1",
-        "graceful-fs": "^4.1.2"
-      }
-    },
-    "webpack": {
-      "version": "5.76.0",
-      "resolved": "https://registry.npmjs.org/webpack/-/webpack-5.76.0.tgz",
-      "integrity": "sha512-l5sOdYBDunyf72HW8dF23rFtWq/7Zgvt/9ftMof71E/yUb1YLOBmTgA2K4vQthB3kotMrSj609txVE0dnr2fjA==",
-      "dev": true,
-      "requires": {
-        "@types/eslint-scope": "^3.7.3",
-        "@types/estree": "^0.0.51",
-        "@webassemblyjs/ast": "1.11.1",
-        "@webassemblyjs/wasm-edit": "1.11.1",
-        "@webassemblyjs/wasm-parser": "1.11.1",
-        "acorn": "^8.7.1",
-        "acorn-import-assertions": "^1.7.6",
-        "browserslist": "^4.14.5",
-        "chrome-trace-event": "^1.0.2",
-        "enhanced-resolve": "^5.10.0",
-        "es-module-lexer": "^0.9.0",
-        "eslint-scope": "5.1.1",
-        "events": "^3.2.0",
-        "glob-to-regexp": "^0.4.1",
-        "graceful-fs": "^4.2.9",
-        "json-parse-even-better-errors": "^2.3.1",
-        "loader-runner": "^4.2.0",
-        "mime-types": "^2.1.27",
-        "neo-async": "^2.6.2",
-        "schema-utils": "^3.1.0",
-        "tapable": "^2.1.1",
-        "terser-webpack-plugin": "^5.1.3",
-        "watchpack": "^2.4.0",
-        "webpack-sources": "^3.2.3"
-      }
-    },
-    "webpack-bundle-analyzer": {
-      "version": "4.8.0",
-      "resolved": "https://registry.npmjs.org/webpack-bundle-analyzer/-/webpack-bundle-analyzer-4.8.0.tgz",
-      "integrity": "sha512-ZzoSBePshOKhr+hd8u6oCkZVwpVaXgpw23ScGLFpR6SjYI7+7iIWYarjN6OEYOfRt8o7ZyZZQk0DuMizJ+LEIg==",
-      "dev": true,
-      "requires": {
-        "@discoveryjs/json-ext": "0.5.7",
-        "acorn": "^8.0.4",
-        "acorn-walk": "^8.0.0",
-        "chalk": "^4.1.0",
-        "commander": "^7.2.0",
-        "gzip-size": "^6.0.0",
-        "lodash": "^4.17.20",
-        "opener": "^1.5.2",
-        "sirv": "^1.0.7",
-        "ws": "^7.3.1"
-      },
-      "dependencies": {
-        "commander": {
-          "version": "7.2.0",
-          "resolved": "https://registry.npmjs.org/commander/-/commander-7.2.0.tgz",
-          "integrity": "sha512-QrWXB+ZQSVPmIWIhtEO9H+gwHaMGYiF5ChvoJ+K9ZGHG/sVsa6yiesAD1GC/x46sET00Xlwo1u49RVVVzvcSkw==",
-          "dev": true
-        }
-      }
-    },
-    "webpack-cli": {
-      "version": "5.0.1",
-      "resolved": "https://registry.npmjs.org/webpack-cli/-/webpack-cli-5.0.1.tgz",
-      "integrity": "sha512-S3KVAyfwUqr0Mo/ur3NzIp6jnerNpo7GUO6so51mxLi1spqsA17YcMXy0WOIJtBSnj748lthxC6XLbNKh/ZC+A==",
-      "dev": true,
-      "requires": {
-        "@discoveryjs/json-ext": "^0.5.0",
-        "@webpack-cli/configtest": "^2.0.1",
-        "@webpack-cli/info": "^2.0.1",
-        "@webpack-cli/serve": "^2.0.1",
-        "colorette": "^2.0.14",
-        "commander": "^9.4.1",
-        "cross-spawn": "^7.0.3",
-        "envinfo": "^7.7.3",
-        "fastest-levenshtein": "^1.0.12",
-        "import-local": "^3.0.2",
-        "interpret": "^3.1.1",
-        "rechoir": "^0.8.0",
-        "webpack-merge": "^5.7.3"
-      },
-      "dependencies": {
-        "commander": {
-          "version": "9.5.0",
-          "resolved": "https://registry.npmjs.org/commander/-/commander-9.5.0.tgz",
-          "integrity": "sha512-KRs7WVDKg86PWiuAqhDrAQnTXZKraVcCc6vFdL14qrZ/DcWwuRo7VoiYXalXO7S5GKpqYiVEwCbgFDfxNHKJBQ==",
-          "dev": true
-        }
-      }
-    },
-    "webpack-merge": {
-      "version": "5.8.0",
-      "resolved": "https://registry.npmjs.org/webpack-merge/-/webpack-merge-5.8.0.tgz",
-      "integrity": "sha512-/SaI7xY0831XwP6kzuwhKWVKDP9t1QY1h65lAFLbZqMPIuYcD9QAW4u9STIbU9kaJbPBB/geU/gLr1wDjOhQ+Q==",
-      "dev": true,
-      "requires": {
-        "clone-deep": "^4.0.1",
-        "wildcard": "^2.0.0"
-      }
-    },
-    "webpack-sources": {
-      "version": "3.2.3",
-      "resolved": "https://registry.npmjs.org/webpack-sources/-/webpack-sources-3.2.3.tgz",
-      "integrity": "sha512-/DyMEOrDgLKKIG0fmvtz+4dUX/3Ghozwgm6iPp8KRhvn+eQf9+Q7GWxVNMk3+uCPWfdXYC4ExGBckIXdFEfH1w==",
-      "dev": true
-    },
     "which": {
       "version": "2.0.2",
       "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
@@ -10618,17 +7841,16 @@
       }
     },
     "which-typed-array": {
-      "version": "1.1.9",
-      "resolved": "https://registry.npmjs.org/which-typed-array/-/which-typed-array-1.1.9.tgz",
-      "integrity": "sha512-w9c4xkx6mPidwp7180ckYWfMmvxpjlZuIudNtDf4N/tTAUB8VJbX25qZoAsrtGuYNnGw3pa0AXgbGKRB8/EceA==",
+      "version": "1.1.11",
+      "resolved": "https://registry.npmjs.org/which-typed-array/-/which-typed-array-1.1.11.tgz",
+      "integrity": "sha512-qe9UWWpkeG5yzZ0tNYxDmd7vo58HDBc39mZ0xWWpolAGADdFOzkfamWLDxkOWcvHQKVmdTyQdLD4NOfjLWTKew==",
       "dev": true,
       "requires": {
         "available-typed-arrays": "^1.0.5",
         "call-bind": "^1.0.2",
         "for-each": "^0.3.3",
         "gopd": "^1.0.1",
-        "has-tostringtag": "^1.0.0",
-        "is-typed-array": "^1.1.10"
+        "has-tostringtag": "^1.0.0"
       }
     },
     "wide-align": {
@@ -10640,28 +7862,6 @@
         "string-width": "^1.0.2 || 2 || 3 || 4"
       }
     },
-    "wildcard": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/wildcard/-/wildcard-2.0.0.tgz",
-      "integrity": "sha512-JcKqAHLPxcdb9KM49dufGXn2x3ssnfjbcaQdLlfZsL9rH9wgDQjUtDxbo8NE0F6SFvydeu1VhZe7hZuHsB2/pw==",
-      "dev": true
-    },
-    "word-wrap": {
-      "version": "1.2.4",
-      "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.4.tgz",
-      "integrity": "sha512-2V81OA4ugVo5pRo46hAoD2ivUJx8jXmWXfUkY4KFNw0hEptvN0QfH3K4nHiwzGeKl5rFKedV48QVoqYavy4YpA==",
-      "dev": true
-    },
-    "worker-loader": {
-      "version": "3.0.8",
-      "resolved": "https://registry.npmjs.org/worker-loader/-/worker-loader-3.0.8.tgz",
-      "integrity": "sha512-XQyQkIFeRVC7f7uRhFdNMe/iJOdO6zxAaR3EWbDp45v3mDhrTi+++oswKNxShUNjPC/1xUp5DB29YKLhFo129g==",
-      "dev": true,
-      "requires": {
-        "loader-utils": "^2.0.0",
-        "schema-utils": "^3.0.0"
-      }
-    },
     "workerpool": {
       "version": "6.2.1",
       "resolved": "https://registry.npmjs.org/workerpool/-/workerpool-6.2.1.tgz",
@@ -10685,19 +7885,6 @@
       "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
       "dev": true
     },
-    "ws": {
-      "version": "7.5.9",
-      "resolved": "https://registry.npmjs.org/ws/-/ws-7.5.9.tgz",
-      "integrity": "sha512-F+P9Jil7UiSKSkppIiD94dN07AwvFixvLIj1Og1Rl9GGMuNipJnV9JzjD6XuqmAeiswGvUmNLjr5cFuXwNS77Q==",
-      "dev": true,
-      "requires": {}
-    },
-    "xtend": {
-      "version": "4.0.2",
-      "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz",
-      "integrity": "sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==",
-      "dev": true
-    },
     "y18n": {
       "version": "5.0.8",
       "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
diff --git a/js/package.json b/js/package.json
index 82d644ae6570f..d2c689265c6ad 100644
--- a/js/package.json
+++ b/js/package.json
@@ -1,32 +1,27 @@
 {
   "devDependencies": {
-    "@types/fs-extra": "^11.0.1",
-    "@types/mocha": "^10.0.1",
+    "@types/fs-extra": "^11.0.2",
+    "@types/mocha": "^10.0.2",
     "@types/node": "^18.14.6",
     "@types/npmlog": "^4.1.4",
-    "@typescript-eslint/eslint-plugin": "^5.54.1",
-    "@typescript-eslint/parser": "^5.54.1",
+    "@typescript-eslint/eslint-plugin": "^6.7.4",
+    "@typescript-eslint/parser": "^6.7.4",
     "clang-format": "^1.8.0",
-    "dir-compare": "^4.0.0",
-    "eslint": "^8.35.0",
+    "dir-compare": "^4.2.0",
+    "esbuild": "^0.19.3",
+    "esbuild-plugin-polyfill-node": "^0.3.0",
+    "eslint": "^8.51.0",
     "eslint-plugin-header": "^3.1.1",
-    "eslint-plugin-import": "^2.27.5",
-    "eslint-plugin-jsdoc": "^40.0.1",
+    "eslint-plugin-import": "^2.28.1",
+    "eslint-plugin-jsdoc": "^46.8.2",
     "eslint-plugin-prefer-arrow": "^1.2.3",
-    "eslint-plugin-unicorn": "^46.0.0",
-    "fs-extra": "^11.1.0",
+    "eslint-plugin-unicorn": "^48.0.1",
+    "fs-extra": "^11.1.1",
     "jszip": "^3.10.1",
     "mocha": "^10.2.0",
-    "node-polyfill-webpack-plugin": "^2.0.1",
     "npmlog": "^7.0.1",
-    "prettier": "^3.0.0",
-    "terser": "^5.16.5",
-    "ts-loader": "^9.4.2",
-    "typescript": "^4.9.5",
-    "webpack": "^5.76.0",
-    "webpack-bundle-analyzer": "^4.8.0",
-    "webpack-cli": "^5.0.1",
-    "worker-loader": "^3.0.8"
+    "prettier": "^3.0.3",
+    "typescript": "^5.2.2"
   },
   "scripts": {
     "prepare": "tsc --build scripts",
diff --git a/js/react_native/e2e/package.json b/js/react_native/e2e/package.json
index 969c70c110123..cd97ec1d099e4 100644
--- a/js/react_native/e2e/package.json
+++ b/js/react_native/e2e/package.json
@@ -10,7 +10,8 @@
   },
   "dependencies": {
     "react": "^18.1.0",
-    "react-native": "^0.69.1"
+    "react-native": "^0.69.1",
+    "react-native-fs": "^2.20.0"
   },
   "devDependencies": {
     "@babel/core": "^7.17.0",
diff --git a/js/react_native/e2e/src/App.tsx b/js/react_native/e2e/src/App.tsx
index f3e415f0c5a55..8a76edabc613e 100644
--- a/js/react_native/e2e/src/App.tsx
+++ b/js/react_native/e2e/src/App.tsx
@@ -8,6 +8,7 @@ import { Image, Text, TextInput, View } from 'react-native';
 import { InferenceSession, Tensor } from 'onnxruntime-react-native';
 import MNIST, { MNISTInput, MNISTOutput, MNISTResult, } from './mnist-data-handler';
 import { Buffer } from 'buffer';
+import { readFile } from 'react-native-fs';
 
 interface State {
   session:
@@ -39,10 +40,21 @@ export default class App extends React.PureComponent<{}, State> {
         this.setState({ imagePath });
 
         const modelPath = await MNIST.getLocalModelPath();
-        const session: InferenceSession = await InferenceSession.create(modelPath);
+
+        // test creating session with path
+        console.log('Creating with path');
+        const pathSession: InferenceSession = await InferenceSession.create(modelPath);
+        pathSession.release();
+
+        // and with bytes
+        console.log('Creating with bytes');
+        const base64Str = await readFile(modelPath, 'base64');
+        const bytes = Buffer.from(base64Str, 'base64');
+        const session: InferenceSession = await InferenceSession.create(bytes);
         this.setState({ session });
 
-        void this.infer();
+        console.log('Test session created');
+        void await this.infer();
       } catch (err) {
         console.log(err.message);
       }
diff --git a/js/react_native/e2e/yarn.lock b/js/react_native/e2e/yarn.lock
index aaa35ae7895b9..9e20a286c4e27 100644
--- a/js/react_native/e2e/yarn.lock
+++ b/js/react_native/e2e/yarn.lock
@@ -39,6 +39,14 @@
   dependencies:
     "@babel/highlight" "^7.18.6"
 
+"@babel/code-frame@^7.22.13":
+  version "7.22.13"
+  resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.22.13.tgz#e3c1c099402598483b7a8c46a721d1038803755e"
+  integrity sha512-XktuhWlJ5g+3TJXc5upd9Ks1HutSArik6jf2eAjYFyIOf4ej3RN+184cZbzDvbPnuTJIUhPKKJE3cIsYTiAT3w==
+  dependencies:
+    "@babel/highlight" "^7.22.13"
+    chalk "^2.4.2"
+
 "@babel/compat-data@^7.13.11", "@babel/compat-data@^7.17.10":
   version "7.18.5"
   resolved "https://registry.yarnpkg.com/@babel/compat-data/-/compat-data-7.18.5.tgz#acac0c839e317038c73137fbb6ef71a1d6238471"
@@ -100,15 +108,6 @@
     "@jridgewell/gen-mapping" "^0.3.0"
     jsesc "^2.5.1"
 
-"@babel/generator@^7.18.7":
-  version "7.18.7"
-  resolved "https://registry.yarnpkg.com/@babel/generator/-/generator-7.18.7.tgz#2aa78da3c05aadfc82dbac16c99552fc802284bd"
-  integrity sha512-shck+7VLlY72a2w9c3zYWuE1pwOKEiQHV7GTUbSnhyl5eu3i04t30tBY82ZRWrDfo3gkakCFtevExnxbkf2a3A==
-  dependencies:
-    "@babel/types" "^7.18.7"
-    "@jridgewell/gen-mapping" "^0.3.2"
-    jsesc "^2.5.1"
-
 "@babel/generator@^7.21.4", "@babel/generator@^7.7.2":
   version "7.21.4"
   resolved "https://registry.yarnpkg.com/@babel/generator/-/generator-7.21.4.tgz#64a94b7448989f421f919d5239ef553b37bb26bc"
@@ -119,6 +118,16 @@
     "@jridgewell/trace-mapping" "^0.3.17"
     jsesc "^2.5.1"
 
+"@babel/generator@^7.23.0":
+  version "7.23.0"
+  resolved "https://registry.yarnpkg.com/@babel/generator/-/generator-7.23.0.tgz#df5c386e2218be505b34837acbcb874d7a983420"
+  integrity sha512-lN85QRR+5IbYrMWM6Y4pE/noaQtg4pNiqeNGX60eqOfo6gtEj6uw/JagelB8vVztSd7R6M5n1+PQkDbHbBRU4g==
+  dependencies:
+    "@babel/types" "^7.23.0"
+    "@jridgewell/gen-mapping" "^0.3.2"
+    "@jridgewell/trace-mapping" "^0.3.17"
+    jsesc "^2.5.1"
+
 "@babel/helper-annotate-as-pure@^7.16.7":
   version "7.16.7"
   resolved "https://registry.yarnpkg.com/@babel/helper-annotate-as-pure/-/helper-annotate-as-pure-7.16.7.tgz#bb2339a7534a9c128e3102024c60760a3a7f3862"
@@ -220,6 +229,11 @@
   resolved "https://registry.yarnpkg.com/@babel/helper-environment-visitor/-/helper-environment-visitor-7.18.9.tgz#0c0cee9b35d2ca190478756865bb3528422f51be"
   integrity sha512-3r/aACDJ3fhQ/EVgFy0hpj8oHyHpQc+LPtJoY9SzTThAsStm4Ptegq92vqKoE3vD706ZVFWITnMnxucw+S9Ipg==
 
+"@babel/helper-environment-visitor@^7.22.20":
+  version "7.22.20"
+  resolved "https://registry.yarnpkg.com/@babel/helper-environment-visitor/-/helper-environment-visitor-7.22.20.tgz#96159db61d34a29dba454c959f5ae4a649ba9167"
+  integrity sha512-zfedSIzFhat/gFhWfHtgWvlec0nqB9YEIVrpuwjruLlXfUSnA8cJB0miHKwqDnQ7d32aKo2xt88/xZptwxbfhA==
+
 "@babel/helper-explode-assignable-expression@^7.16.7":
   version "7.16.7"
   resolved "https://registry.yarnpkg.com/@babel/helper-explode-assignable-expression/-/helper-explode-assignable-expression-7.16.7.tgz#12a6d8522fdd834f194e868af6354e8650242b7a"
@@ -243,27 +257,20 @@
     "@babel/template" "^7.18.6"
     "@babel/types" "^7.18.6"
 
-"@babel/helper-function-name@^7.21.0":
-  version "7.21.0"
-  resolved "https://registry.yarnpkg.com/@babel/helper-function-name/-/helper-function-name-7.21.0.tgz#d552829b10ea9f120969304023cd0645fa00b1b4"
-  integrity sha512-HfK1aMRanKHpxemaY2gqBmL04iAPOPRj7DxtNbiDOrJK+gdwkiNRVpCpUJYbUT+aZyemKN8brqTOxzCaG6ExRg==
-  dependencies:
-    "@babel/template" "^7.20.7"
-    "@babel/types" "^7.21.0"
-
-"@babel/helper-hoist-variables@^7.16.7":
-  version "7.16.7"
-  resolved "https://registry.yarnpkg.com/@babel/helper-hoist-variables/-/helper-hoist-variables-7.16.7.tgz#86bcb19a77a509c7b77d0e22323ef588fa58c246"
-  integrity sha512-m04d/0Op34H5v7pbZw6pSKP7weA6lsMvfiIAMeIvkY/R4xQtBSMFEigu9QTZ2qB/9l22vsxtM8a+Q8CzD255fg==
+"@babel/helper-function-name@^7.23.0":
+  version "7.23.0"
+  resolved "https://registry.yarnpkg.com/@babel/helper-function-name/-/helper-function-name-7.23.0.tgz#1f9a3cdbd5b2698a670c30d2735f9af95ed52759"
+  integrity sha512-OErEqsrxjZTJciZ4Oo+eoZqeW9UIiOcuYKRJA4ZAgV9myA+pOXhhmpfNCKjEH/auVfEYVFJ6y1Tc4r0eIApqiw==
   dependencies:
-    "@babel/types" "^7.16.7"
+    "@babel/template" "^7.22.15"
+    "@babel/types" "^7.23.0"
 
-"@babel/helper-hoist-variables@^7.18.6":
-  version "7.18.6"
-  resolved "https://registry.yarnpkg.com/@babel/helper-hoist-variables/-/helper-hoist-variables-7.18.6.tgz#d4d2c8fb4baeaa5c68b99cc8245c56554f926678"
-  integrity sha512-UlJQPkFqFULIcyW5sbzgbkxn2FKRgwWiRexcuaR8RNJRy8+LLveqPjwZV/bwrLZCN0eUHD/x8D0heK1ozuoo6Q==
+"@babel/helper-hoist-variables@^7.22.5":
+  version "7.22.5"
+  resolved "https://registry.yarnpkg.com/@babel/helper-hoist-variables/-/helper-hoist-variables-7.22.5.tgz#c01a007dac05c085914e8fb652b339db50d823bb"
+  integrity sha512-wGjk9QZVzvknA6yKIUURb8zY3grXCcOZt+/7Wcy8O2uctxhplmUPkOdlgoNhmdVee2c92JXbf1xpMtVNbfoxRw==
   dependencies:
-    "@babel/types" "^7.18.6"
+    "@babel/types" "^7.22.5"
 
 "@babel/helper-member-expression-to-functions@^7.17.7":
   version "7.17.7"
@@ -401,11 +408,23 @@
   dependencies:
     "@babel/types" "^7.18.6"
 
+"@babel/helper-split-export-declaration@^7.22.6":
+  version "7.22.6"
+  resolved "https://registry.yarnpkg.com/@babel/helper-split-export-declaration/-/helper-split-export-declaration-7.22.6.tgz#322c61b7310c0997fe4c323955667f18fcefb91c"
+  integrity sha512-AsUnxuLhRYsisFiaJwvp1QF+I3KjD5FOxut14q/GzovUe6orHLesW2C7d754kRm53h5gqrz6sFl6sxc4BVtE/g==
+  dependencies:
+    "@babel/types" "^7.22.5"
+
 "@babel/helper-string-parser@^7.19.4":
   version "7.19.4"
   resolved "https://registry.yarnpkg.com/@babel/helper-string-parser/-/helper-string-parser-7.19.4.tgz#38d3acb654b4701a9b77fb0615a96f775c3a9e63"
   integrity sha512-nHtDoQcuqFmwYNYPz3Rah5ph2p8PFeFCsZk9A/48dPc/rGocJ5J3hAAZ7pb76VWX3fZKu+uEr/FhH5jLx7umrw==
 
+"@babel/helper-string-parser@^7.22.5":
+  version "7.22.5"
+  resolved "https://registry.yarnpkg.com/@babel/helper-string-parser/-/helper-string-parser-7.22.5.tgz#533f36457a25814cf1df6488523ad547d784a99f"
+  integrity sha512-mM4COjgZox8U+JcXQwPijIZLElkgEpO5rsERVDJTc2qfCDfERyob6k5WegS14SX18IIjv+XD+GrqNumY5JRCDw==
+
 "@babel/helper-validator-identifier@^7.16.7":
   version "7.16.7"
   resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.16.7.tgz#e8c602438c4a8195751243da9031d1607d247cad"
@@ -421,6 +440,11 @@
   resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.19.1.tgz#7eea834cf32901ffdc1a7ee555e2f9c27e249ca2"
   integrity sha512-awrNfaMtnHUr653GgGEs++LlAvW6w+DcPrOliSMXWCKo597CwL5Acf/wWdNkf/tfEQE3mjkeD1YOVZOUV/od1w==
 
+"@babel/helper-validator-identifier@^7.22.20":
+  version "7.22.20"
+  resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.22.20.tgz#c4ae002c61d2879e724581d96665583dbc1dc0e0"
+  integrity sha512-Y4OZ+ytlatR8AI+8KZfKuL5urKp7qey08ha31L8b3BwewJAoJamTzyvxPR/5D+KkdJCGPq/+8TukHBlY10FX9A==
+
 "@babel/helper-validator-option@^7.16.7":
   version "7.16.7"
   resolved "https://registry.yarnpkg.com/@babel/helper-validator-option/-/helper-validator-option-7.16.7.tgz#b203ce62ce5fe153899b617c08957de860de4d23"
@@ -487,6 +511,15 @@
     chalk "^2.0.0"
     js-tokens "^4.0.0"
 
+"@babel/highlight@^7.22.13":
+  version "7.22.20"
+  resolved "https://registry.yarnpkg.com/@babel/highlight/-/highlight-7.22.20.tgz#4ca92b71d80554b01427815e06f2df965b9c1f54"
+  integrity sha512-dkdMCN3py0+ksCgYmGG8jKeGA/8Tk+gJwSYYlFGxG5lmhfKNoAy004YpLxpS1W2J8m/EK2Ew+yOs9pVRwO89mg==
+  dependencies:
+    "@babel/helper-validator-identifier" "^7.22.20"
+    chalk "^2.4.2"
+    js-tokens "^4.0.0"
+
 "@babel/parser@^7.1.0", "@babel/parser@^7.14.7", "@babel/parser@^7.20.7", "@babel/parser@^7.21.4":
   version "7.21.4"
   resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.21.4.tgz#94003fdfc520bbe2875d4ae557b43ddb6d880f17"
@@ -497,11 +530,16 @@
   resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.18.5.tgz#337062363436a893a2d22faa60be5bb37091c83c"
   integrity sha512-YZWVaglMiplo7v8f1oMQ5ZPQr0vn7HPeZXxXWsxXJRjGVrzUFn9OxFQl1sb5wzfootjA/yChhW84BV+383FSOw==
 
-"@babel/parser@^7.18.6", "@babel/parser@^7.18.8":
+"@babel/parser@^7.18.6":
   version "7.18.8"
   resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.18.8.tgz#822146080ac9c62dac0823bb3489622e0bc1cbdf"
   integrity sha512-RSKRfYX20dyH+elbJK2uqAkVyucL+xXzhqlMD5/ZXx+dAAwpyB7HsvnHe/ZUGOF+xLr5Wx9/JoXVTj6BQE2/oA==
 
+"@babel/parser@^7.22.15", "@babel/parser@^7.23.0":
+  version "7.23.0"
+  resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.23.0.tgz#da950e622420bf96ca0d0f2909cdddac3acd8719"
+  integrity sha512-vvPKKdMemU85V9WE/l5wZEmImpCtLqbnTvqDS2U1fJ96KrxoW7KrXhNsNCblQlg8Ck4b85yxdTyelsMUgFUXiw==
+
 "@babel/plugin-proposal-async-generator-functions@^7.0.0":
   version "7.18.6"
   resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-async-generator-functions/-/plugin-proposal-async-generator-functions-7.18.6.tgz#aedac81e6fc12bb643374656dd5f2605bf743d17"
@@ -1016,51 +1054,28 @@
     "@babel/parser" "^7.20.7"
     "@babel/types" "^7.20.7"
 
-"@babel/traverse@^7.13.0", "@babel/traverse@^7.14.0", "@babel/traverse@^7.16.8", "@babel/traverse@^7.18.0", "@babel/traverse@^7.18.2", "@babel/traverse@^7.18.5":
-  version "7.18.5"
-  resolved "https://registry.yarnpkg.com/@babel/traverse/-/traverse-7.18.5.tgz#94a8195ad9642801837988ab77f36e992d9a20cd"
-  integrity sha512-aKXj1KT66sBj0vVzk6rEeAO6Z9aiiQ68wfDgge3nHhA/my6xMM/7HGQUNumKZaoa2qUPQ5whJG9aAifsxUKfLA==
-  dependencies:
-    "@babel/code-frame" "^7.16.7"
-    "@babel/generator" "^7.18.2"
-    "@babel/helper-environment-visitor" "^7.18.2"
-    "@babel/helper-function-name" "^7.17.9"
-    "@babel/helper-hoist-variables" "^7.16.7"
-    "@babel/helper-split-export-declaration" "^7.16.7"
-    "@babel/parser" "^7.18.5"
-    "@babel/types" "^7.18.4"
-    debug "^4.1.0"
-    globals "^11.1.0"
-
-"@babel/traverse@^7.18.6":
-  version "7.18.8"
-  resolved "https://registry.yarnpkg.com/@babel/traverse/-/traverse-7.18.8.tgz#f095e62ab46abf1da35e5a2011f43aee72d8d5b0"
-  integrity sha512-UNg/AcSySJYR/+mIcJQDCv00T+AqRO7j/ZEJLzpaYtgM48rMg5MnkJgyNqkzo88+p4tfRvZJCEiwwfG6h4jkRg==
-  dependencies:
-    "@babel/code-frame" "^7.18.6"
-    "@babel/generator" "^7.18.7"
-    "@babel/helper-environment-visitor" "^7.18.6"
-    "@babel/helper-function-name" "^7.18.6"
-    "@babel/helper-hoist-variables" "^7.18.6"
-    "@babel/helper-split-export-declaration" "^7.18.6"
-    "@babel/parser" "^7.18.8"
-    "@babel/types" "^7.18.8"
-    debug "^4.1.0"
-    globals "^11.1.0"
-
-"@babel/traverse@^7.21.0", "@babel/traverse@^7.21.2", "@babel/traverse@^7.21.4", "@babel/traverse@^7.7.2":
-  version "7.21.4"
-  resolved "https://registry.yarnpkg.com/@babel/traverse/-/traverse-7.21.4.tgz#a836aca7b116634e97a6ed99976236b3282c9d36"
-  integrity sha512-eyKrRHKdyZxqDm+fV1iqL9UAHMoIg0nDaGqfIOd8rKH17m5snv7Gn4qgjBoFfLz9APvjFU/ICT00NVCv1Epp8Q==
-  dependencies:
-    "@babel/code-frame" "^7.21.4"
-    "@babel/generator" "^7.21.4"
-    "@babel/helper-environment-visitor" "^7.18.9"
-    "@babel/helper-function-name" "^7.21.0"
-    "@babel/helper-hoist-variables" "^7.18.6"
-    "@babel/helper-split-export-declaration" "^7.18.6"
-    "@babel/parser" "^7.21.4"
-    "@babel/types" "^7.21.4"
+"@babel/template@^7.22.15":
+  version "7.22.15"
+  resolved "https://registry.yarnpkg.com/@babel/template/-/template-7.22.15.tgz#09576efc3830f0430f4548ef971dde1350ef2f38"
+  integrity sha512-QPErUVm4uyJa60rkI73qneDacvdvzxshT3kksGqlGWYdOTIUOwJ7RDUL8sGqslY1uXWSL6xMFKEXDS3ox2uF0w==
+  dependencies:
+    "@babel/code-frame" "^7.22.13"
+    "@babel/parser" "^7.22.15"
+    "@babel/types" "^7.22.15"
+
+"@babel/traverse@^7.13.0", "@babel/traverse@^7.14.0", "@babel/traverse@^7.16.8", "@babel/traverse@^7.18.0", "@babel/traverse@^7.18.2", "@babel/traverse@^7.18.5", "@babel/traverse@^7.18.6", "@babel/traverse@^7.21.0", "@babel/traverse@^7.21.2", "@babel/traverse@^7.21.4", "@babel/traverse@^7.7.2":
+  version "7.23.2"
+  resolved "https://registry.yarnpkg.com/@babel/traverse/-/traverse-7.23.2.tgz#329c7a06735e144a506bdb2cad0268b7f46f4ad8"
+  integrity sha512-azpe59SQ48qG6nu2CzcMLbxUudtN+dOM9kDbUqGq3HXUJRlo7i8fvPoxQUzYgLZ4cMVmuZgm8vvBpNeRhd6XSw==
+  dependencies:
+    "@babel/code-frame" "^7.22.13"
+    "@babel/generator" "^7.23.0"
+    "@babel/helper-environment-visitor" "^7.22.20"
+    "@babel/helper-function-name" "^7.23.0"
+    "@babel/helper-hoist-variables" "^7.22.5"
+    "@babel/helper-split-export-declaration" "^7.22.6"
+    "@babel/parser" "^7.23.0"
+    "@babel/types" "^7.23.0"
     debug "^4.1.0"
     globals "^11.1.0"
 
@@ -1072,7 +1087,7 @@
     "@babel/helper-validator-identifier" "^7.16.7"
     to-fast-properties "^2.0.0"
 
-"@babel/types@^7.18.6", "@babel/types@^7.18.7", "@babel/types@^7.18.8":
+"@babel/types@^7.18.6":
   version "7.18.8"
   resolved "https://registry.yarnpkg.com/@babel/types/-/types-7.18.8.tgz#c5af199951bf41ba4a6a9a6d0d8ad722b30cd42f"
   integrity sha512-qwpdsmraq0aJ3osLJRApsc2ouSJCdnMeZwB0DhbtHAtRpZNZCdlbRnHIgcRKzdE1g0iOGg644fzjOBcdOz9cPw==
@@ -1089,6 +1104,15 @@
     "@babel/helper-validator-identifier" "^7.19.1"
     to-fast-properties "^2.0.0"
 
+"@babel/types@^7.22.15", "@babel/types@^7.22.5", "@babel/types@^7.23.0":
+  version "7.23.0"
+  resolved "https://registry.yarnpkg.com/@babel/types/-/types-7.23.0.tgz#8c1f020c9df0e737e4e247c0619f58c68458aaeb"
+  integrity sha512-0oIyUfKoI3mSqMvsxBdclDwxXKXAUA8v/apZbc+iSyARYou1o8ZGDxbUYyLFoW2arqS2jDGqJuZvv1d/io1axg==
+  dependencies:
+    "@babel/helper-string-parser" "^7.22.5"
+    "@babel/helper-validator-identifier" "^7.22.20"
+    to-fast-properties "^2.0.0"
+
 "@bcoe/v8-coverage@^0.2.3":
   version "0.2.3"
   resolved "https://registry.yarnpkg.com/@bcoe/v8-coverage/-/v8-coverage-0.2.3.tgz#75a2e8b51cb758a7553d6804a5932d7aace75c39"
@@ -2050,6 +2074,11 @@ balanced-match@^1.0.0:
   resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.2.tgz#e83e3a7e3f300b34cb9d87f615fa0cbf357690ee"
   integrity sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==
 
+base-64@^0.1.0:
+  version "0.1.0"
+  resolved "https://registry.yarnpkg.com/base-64/-/base-64-0.1.0.tgz#780a99c84e7d600260361511c4877613bf24f6bb"
+  integrity sha512-Y5gU45svrR5tI2Vt/X9GPd3L0HNIKzGu202EjxrXMpuc2V2CiKgemAbUUsqYmZJvPtCXoUKjNZwBJzsNScUbXA==
+
 base64-js@^1.1.2, base64-js@^1.3.1, base64-js@^1.5.1:
   version "1.5.1"
   resolved "https://registry.yarnpkg.com/base64-js/-/base64-js-1.5.1.tgz#1b1b440160a5bf7ad40b650f095963481903930a"
@@ -2260,7 +2289,7 @@ caniuse-lite@^1.0.30001449:
   resolved "https://registry.yarnpkg.com/caniuse-lite/-/caniuse-lite-1.0.30001478.tgz#0ef8a1cf8b16be47a0f9fc4ecfc952232724b32a"
   integrity sha512-gMhDyXGItTHipJj2ApIvR+iVB5hd0KP3svMWWXDvZOmjzJJassGLMfxRkQCSYgGd2gtdL/ReeiyvMSFD1Ss6Mw==
 
-chalk@^2.0.0:
+chalk@^2.0.0, chalk@^2.4.2:
   version "2.4.2"
   resolved "https://registry.yarnpkg.com/chalk/-/chalk-2.4.2.tgz#cd42541677a54333cf541a49108c1432b44c9424"
   integrity sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==
@@ -5205,6 +5234,14 @@ react-native-codegen@^0.69.2:
     jscodeshift "^0.13.1"
     nullthrows "^1.1.1"
 
+react-native-fs@^2.20.0:
+  version "2.20.0"
+  resolved "https://registry.yarnpkg.com/react-native-fs/-/react-native-fs-2.20.0.tgz#05a9362b473bfc0910772c0acbb73a78dbc810f6"
+  integrity sha512-VkTBzs7fIDUiy/XajOSNk0XazFE9l+QlMAce7lGuebZcag5CnjszB+u4BdqzwaQOdcYb5wsJIsqq4kxInIRpJQ==
+  dependencies:
+    base-64 "^0.1.0"
+    utf8 "^3.0.0"
+
 react-native-gradle-plugin@^0.0.7:
   version "0.0.7"
   resolved "https://registry.yarnpkg.com/react-native-gradle-plugin/-/react-native-gradle-plugin-0.0.7.tgz#96602f909745239deab7b589443f14fce5da2056"
@@ -6167,6 +6204,11 @@ utf8-byte-length@^1.0.1:
   resolved "https://registry.yarnpkg.com/utf8-byte-length/-/utf8-byte-length-1.0.4.tgz#f45f150c4c66eee968186505ab93fcbb8ad6bf61"
   integrity sha512-4+wkEYLBbWxqTahEsWrhxepcoVOJ+1z5PGIjPZxRkytcdSUaNjIjBM7Xn8E+pdSuV7SzvWovBFA54FO0JSoqhA==
 
+utf8@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/utf8/-/utf8-3.0.0.tgz#f052eed1364d696e769ef058b183df88c87f69d1"
+  integrity sha512-E8VjFIQ/TyQgp+TZfS6l8yp/xWppSAHzidGiRrqe4bK4XP9pTRyKFgGJpO3SN7zdX4DeomTrwaseCHovfpFcqQ==
+
 util-deprecate@^1.0.1, util-deprecate@~1.0.1:
   version "1.0.2"
   resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf"
diff --git a/js/react_native/lib/backend.ts b/js/react_native/lib/backend.ts
index b3f0c466308a5..3d3569028e636 100644
--- a/js/react_native/lib/backend.ts
+++ b/js/react_native/lib/backend.ts
@@ -1,10 +1,10 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-import {Backend, InferenceSession, SessionHandler, Tensor,} from 'onnxruntime-common';
+import {type Backend, InferenceSession, type InferenceSessionHandler, type SessionHandler, Tensor} from 'onnxruntime-common';
 import {Platform} from 'react-native';
 
-import {binding, Binding, JSIBlob, jsiHelper} from './binding';
+import {binding, type Binding, type JSIBlob, jsiHelper} from './binding';
 
 type SupportedTypedArray = Exclude<Tensor.DataType, string[]>;
 
@@ -43,7 +43,7 @@ const normalizePath = (path: string): string => {
   return path;
 };
 
-class OnnxruntimeSessionHandler implements SessionHandler {
+class OnnxruntimeSessionHandler implements InferenceSessionHandler {
   #inferenceSession: Binding.InferenceSession;
   #key: string;
 
@@ -66,12 +66,14 @@ class OnnxruntimeSessionHandler implements SessionHandler {
       let results: Binding.ModelLoadInfoType;
       // load a model
       if (typeof this.#pathOrBuffer === 'string') {
+        // load model from model path
         results = await this.#inferenceSession.loadModel(normalizePath(this.#pathOrBuffer), options);
       } else {
+        // load model from buffer
         if (!this.#inferenceSession.loadModelFromBlob) {
           throw new Error('Native module method "loadModelFromBlob" is not defined');
         }
-        const modelBlob = jsiHelper.storeArrayBuffer(this.#pathOrBuffer);
+        const modelBlob = jsiHelper.storeArrayBuffer(this.#pathOrBuffer.buffer);
         results = await this.#inferenceSession.loadModelFromBlob(modelBlob, options);
       }
       // resolve promise if onnxruntime session is successfully created
@@ -163,8 +165,8 @@ class OnnxruntimeBackend implements Backend {
     return Promise.resolve();
   }
 
-  async createSessionHandler(pathOrBuffer: string|Uint8Array, options?: InferenceSession.SessionOptions):
-      Promise<SessionHandler> {
+  async createInferenceSessionHandler(pathOrBuffer: string|Uint8Array, options?: InferenceSession.SessionOptions):
+      Promise<InferenceSessionHandler> {
     const handler = new OnnxruntimeSessionHandler(pathOrBuffer);
     await handler.loadModel(options || {});
     return handler;
diff --git a/js/react_native/tsconfig.json b/js/react_native/tsconfig.json
index 9e929c4530982..2817d5512a186 100644
--- a/js/react_native/tsconfig.json
+++ b/js/react_native/tsconfig.json
@@ -7,7 +7,6 @@
     "allowUnreachableCode": false,
     "allowUnusedLabels": false,
     "esModuleInterop": true,
-    "importsNotUsedAsValues": "error",
     "forceConsistentCasingInFileNames": true,
     "jsx": "react",
     "lib": ["esnext"],
diff --git a/js/react_native/yarn.lock b/js/react_native/yarn.lock
index 0b47158ab705b..ff9be7fbe3a5b 100644
--- a/js/react_native/yarn.lock
+++ b/js/react_native/yarn.lock
@@ -24,6 +24,14 @@
   dependencies:
     "@babel/highlight" "^7.18.6"
 
+"@babel/code-frame@^7.22.13":
+  version "7.22.13"
+  resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.22.13.tgz#e3c1c099402598483b7a8c46a721d1038803755e"
+  integrity sha512-XktuhWlJ5g+3TJXc5upd9Ks1HutSArik6jf2eAjYFyIOf4ej3RN+184cZbzDvbPnuTJIUhPKKJE3cIsYTiAT3w==
+  dependencies:
+    "@babel/highlight" "^7.22.13"
+    chalk "^2.4.2"
+
 "@babel/code-frame@~7.10.4":
   version "7.10.4"
   resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.10.4.tgz#168da1a36e90da68ae8d49c0f1b48c7c6249213a"
@@ -66,13 +74,14 @@
     "@jridgewell/gen-mapping" "^0.3.0"
     jsesc "^2.5.1"
 
-"@babel/generator@^7.18.7":
-  version "7.18.7"
-  resolved "https://registry.yarnpkg.com/@babel/generator/-/generator-7.18.7.tgz#2aa78da3c05aadfc82dbac16c99552fc802284bd"
-  integrity sha512-shck+7VLlY72a2w9c3zYWuE1pwOKEiQHV7GTUbSnhyl5eu3i04t30tBY82ZRWrDfo3gkakCFtevExnxbkf2a3A==
+"@babel/generator@^7.23.0":
+  version "7.23.0"
+  resolved "https://registry.yarnpkg.com/@babel/generator/-/generator-7.23.0.tgz#df5c386e2218be505b34837acbcb874d7a983420"
+  integrity sha512-lN85QRR+5IbYrMWM6Y4pE/noaQtg4pNiqeNGX60eqOfo6gtEj6uw/JagelB8vVztSd7R6M5n1+PQkDbHbBRU4g==
   dependencies:
-    "@babel/types" "^7.18.7"
+    "@babel/types" "^7.23.0"
     "@jridgewell/gen-mapping" "^0.3.2"
+    "@jridgewell/trace-mapping" "^0.3.17"
     jsesc "^2.5.1"
 
 "@babel/helper-annotate-as-pure@^7.16.7":
@@ -160,6 +169,11 @@
   resolved "https://registry.yarnpkg.com/@babel/helper-environment-visitor/-/helper-environment-visitor-7.18.6.tgz#b7eee2b5b9d70602e59d1a6cad7dd24de7ca6cd7"
   integrity sha512-8n6gSfn2baOY+qlp+VSzsosjCVGFqWKmDF0cCWOybh52Dw3SEyoWR1KrhMJASjLwIEkkAufZ0xvr+SxLHSpy2Q==
 
+"@babel/helper-environment-visitor@^7.22.20":
+  version "7.22.20"
+  resolved "https://registry.yarnpkg.com/@babel/helper-environment-visitor/-/helper-environment-visitor-7.22.20.tgz#96159db61d34a29dba454c959f5ae4a649ba9167"
+  integrity sha512-zfedSIzFhat/gFhWfHtgWvlec0nqB9YEIVrpuwjruLlXfUSnA8cJB0miHKwqDnQ7d32aKo2xt88/xZptwxbfhA==
+
 "@babel/helper-explode-assignable-expression@^7.16.7":
   version "7.16.7"
   resolved "https://registry.yarnpkg.com/@babel/helper-explode-assignable-expression/-/helper-explode-assignable-expression-7.16.7.tgz#12a6d8522fdd834f194e868af6354e8650242b7a"
@@ -183,6 +197,14 @@
     "@babel/template" "^7.18.6"
     "@babel/types" "^7.18.6"
 
+"@babel/helper-function-name@^7.23.0":
+  version "7.23.0"
+  resolved "https://registry.yarnpkg.com/@babel/helper-function-name/-/helper-function-name-7.23.0.tgz#1f9a3cdbd5b2698a670c30d2735f9af95ed52759"
+  integrity sha512-OErEqsrxjZTJciZ4Oo+eoZqeW9UIiOcuYKRJA4ZAgV9myA+pOXhhmpfNCKjEH/auVfEYVFJ6y1Tc4r0eIApqiw==
+  dependencies:
+    "@babel/template" "^7.22.15"
+    "@babel/types" "^7.23.0"
+
 "@babel/helper-hoist-variables@^7.16.7":
   version "7.16.7"
   resolved "https://registry.yarnpkg.com/@babel/helper-hoist-variables/-/helper-hoist-variables-7.16.7.tgz#86bcb19a77a509c7b77d0e22323ef588fa58c246"
@@ -190,12 +212,12 @@
   dependencies:
     "@babel/types" "^7.16.7"
 
-"@babel/helper-hoist-variables@^7.18.6":
-  version "7.18.6"
-  resolved "https://registry.yarnpkg.com/@babel/helper-hoist-variables/-/helper-hoist-variables-7.18.6.tgz#d4d2c8fb4baeaa5c68b99cc8245c56554f926678"
-  integrity sha512-UlJQPkFqFULIcyW5sbzgbkxn2FKRgwWiRexcuaR8RNJRy8+LLveqPjwZV/bwrLZCN0eUHD/x8D0heK1ozuoo6Q==
+"@babel/helper-hoist-variables@^7.22.5":
+  version "7.22.5"
+  resolved "https://registry.yarnpkg.com/@babel/helper-hoist-variables/-/helper-hoist-variables-7.22.5.tgz#c01a007dac05c085914e8fb652b339db50d823bb"
+  integrity sha512-wGjk9QZVzvknA6yKIUURb8zY3grXCcOZt+/7Wcy8O2uctxhplmUPkOdlgoNhmdVee2c92JXbf1xpMtVNbfoxRw==
   dependencies:
-    "@babel/types" "^7.18.6"
+    "@babel/types" "^7.22.5"
 
 "@babel/helper-member-expression-to-functions@^7.17.7":
   version "7.17.7"
@@ -293,12 +315,17 @@
   dependencies:
     "@babel/types" "^7.16.7"
 
-"@babel/helper-split-export-declaration@^7.18.6":
-  version "7.18.6"
-  resolved "https://registry.yarnpkg.com/@babel/helper-split-export-declaration/-/helper-split-export-declaration-7.18.6.tgz#7367949bc75b20c6d5a5d4a97bba2824ae8ef075"
-  integrity sha512-bde1etTx6ZyTmobl9LLMMQsaizFVZrquTEHOqKeQESMKo4PlObf+8+JA25ZsIpZhT/WEd39+vOdLXAFG/nELpA==
+"@babel/helper-split-export-declaration@^7.22.6":
+  version "7.22.6"
+  resolved "https://registry.yarnpkg.com/@babel/helper-split-export-declaration/-/helper-split-export-declaration-7.22.6.tgz#322c61b7310c0997fe4c323955667f18fcefb91c"
+  integrity sha512-AsUnxuLhRYsisFiaJwvp1QF+I3KjD5FOxut14q/GzovUe6orHLesW2C7d754kRm53h5gqrz6sFl6sxc4BVtE/g==
   dependencies:
-    "@babel/types" "^7.18.6"
+    "@babel/types" "^7.22.5"
+
+"@babel/helper-string-parser@^7.22.5":
+  version "7.22.5"
+  resolved "https://registry.yarnpkg.com/@babel/helper-string-parser/-/helper-string-parser-7.22.5.tgz#533f36457a25814cf1df6488523ad547d784a99f"
+  integrity sha512-mM4COjgZox8U+JcXQwPijIZLElkgEpO5rsERVDJTc2qfCDfERyob6k5WegS14SX18IIjv+XD+GrqNumY5JRCDw==
 
 "@babel/helper-validator-identifier@^7.16.7":
   version "7.19.1"
@@ -310,6 +337,11 @@
   resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.18.6.tgz#9c97e30d31b2b8c72a1d08984f2ca9b574d7a076"
   integrity sha512-MmetCkz9ej86nJQV+sFCxoGGrUbU3q02kgLciwkrt9QqEB7cP39oKEY0PakknEO0Gu20SskMRi+AYZ3b1TpN9g==
 
+"@babel/helper-validator-identifier@^7.22.20":
+  version "7.22.20"
+  resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.22.20.tgz#c4ae002c61d2879e724581d96665583dbc1dc0e0"
+  integrity sha512-Y4OZ+ytlatR8AI+8KZfKuL5urKp7qey08ha31L8b3BwewJAoJamTzyvxPR/5D+KkdJCGPq/+8TukHBlY10FX9A==
+
 "@babel/helper-validator-option@^7.16.7":
   version "7.16.7"
   resolved "https://registry.yarnpkg.com/@babel/helper-validator-option/-/helper-validator-option-7.16.7.tgz#b203ce62ce5fe153899b617c08957de860de4d23"
@@ -362,16 +394,30 @@
     chalk "^2.0.0"
     js-tokens "^4.0.0"
 
+"@babel/highlight@^7.22.13":
+  version "7.22.20"
+  resolved "https://registry.yarnpkg.com/@babel/highlight/-/highlight-7.22.20.tgz#4ca92b71d80554b01427815e06f2df965b9c1f54"
+  integrity sha512-dkdMCN3py0+ksCgYmGG8jKeGA/8Tk+gJwSYYlFGxG5lmhfKNoAy004YpLxpS1W2J8m/EK2Ew+yOs9pVRwO89mg==
+  dependencies:
+    "@babel/helper-validator-identifier" "^7.22.20"
+    chalk "^2.4.2"
+    js-tokens "^4.0.0"
+
 "@babel/parser@^7.1.0", "@babel/parser@^7.13.16", "@babel/parser@^7.14.0", "@babel/parser@^7.14.7", "@babel/parser@^7.16.7", "@babel/parser@^7.18.0":
   version "7.18.3"
   resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.18.3.tgz#39e99c7b0c4c56cef4d1eed8de9f506411c2ebc2"
   integrity sha512-rL50YcEuHbbauAFAysNsJA4/f89fGTOBRNs9P81sniKnKAr4xULe5AecolcsKbi88xu0ByWYDj/S1AJ3FSFuSQ==
 
-"@babel/parser@^7.18.6", "@babel/parser@^7.18.8":
+"@babel/parser@^7.18.6":
   version "7.18.8"
   resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.18.8.tgz#822146080ac9c62dac0823bb3489622e0bc1cbdf"
   integrity sha512-RSKRfYX20dyH+elbJK2uqAkVyucL+xXzhqlMD5/ZXx+dAAwpyB7HsvnHe/ZUGOF+xLr5Wx9/JoXVTj6BQE2/oA==
 
+"@babel/parser@^7.22.15", "@babel/parser@^7.23.0":
+  version "7.23.0"
+  resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.23.0.tgz#da950e622420bf96ca0d0f2909cdddac3acd8719"
+  integrity sha512-vvPKKdMemU85V9WE/l5wZEmImpCtLqbnTvqDS2U1fJ96KrxoW7KrXhNsNCblQlg8Ck4b85yxdTyelsMUgFUXiw==
+
 "@babel/plugin-bugfix-safari-id-destructuring-collision-in-function-expression@^7.17.12":
   version "7.17.12"
   resolved "https://registry.yarnpkg.com/@babel/plugin-bugfix-safari-id-destructuring-collision-in-function-expression/-/plugin-bugfix-safari-id-destructuring-collision-in-function-expression-7.17.12.tgz#1dca338caaefca368639c9ffb095afbd4d420b1e"
@@ -1182,35 +1228,28 @@
     "@babel/parser" "^7.18.6"
     "@babel/types" "^7.18.6"
 
-"@babel/traverse@^7.13.0", "@babel/traverse@^7.14.0", "@babel/traverse@^7.16.8", "@babel/traverse@^7.18.0", "@babel/traverse@^7.18.2", "@babel/traverse@^7.7.2":
-  version "7.18.2"
-  resolved "https://registry.yarnpkg.com/@babel/traverse/-/traverse-7.18.2.tgz#b77a52604b5cc836a9e1e08dca01cba67a12d2e8"
-  integrity sha512-9eNwoeovJ6KH9zcCNnENY7DMFwTU9JdGCFtqNLfUAqtUHRCOsTOqWoffosP8vKmNYeSBUv3yVJXjfd8ucwOjUA==
-  dependencies:
-    "@babel/code-frame" "^7.16.7"
-    "@babel/generator" "^7.18.2"
-    "@babel/helper-environment-visitor" "^7.18.2"
-    "@babel/helper-function-name" "^7.17.9"
-    "@babel/helper-hoist-variables" "^7.16.7"
-    "@babel/helper-split-export-declaration" "^7.16.7"
-    "@babel/parser" "^7.18.0"
-    "@babel/types" "^7.18.2"
-    debug "^4.1.0"
-    globals "^11.1.0"
-
-"@babel/traverse@^7.18.6":
-  version "7.18.8"
-  resolved "https://registry.yarnpkg.com/@babel/traverse/-/traverse-7.18.8.tgz#f095e62ab46abf1da35e5a2011f43aee72d8d5b0"
-  integrity sha512-UNg/AcSySJYR/+mIcJQDCv00T+AqRO7j/ZEJLzpaYtgM48rMg5MnkJgyNqkzo88+p4tfRvZJCEiwwfG6h4jkRg==
-  dependencies:
-    "@babel/code-frame" "^7.18.6"
-    "@babel/generator" "^7.18.7"
-    "@babel/helper-environment-visitor" "^7.18.6"
-    "@babel/helper-function-name" "^7.18.6"
-    "@babel/helper-hoist-variables" "^7.18.6"
-    "@babel/helper-split-export-declaration" "^7.18.6"
-    "@babel/parser" "^7.18.8"
-    "@babel/types" "^7.18.8"
+"@babel/template@^7.22.15":
+  version "7.22.15"
+  resolved "https://registry.yarnpkg.com/@babel/template/-/template-7.22.15.tgz#09576efc3830f0430f4548ef971dde1350ef2f38"
+  integrity sha512-QPErUVm4uyJa60rkI73qneDacvdvzxshT3kksGqlGWYdOTIUOwJ7RDUL8sGqslY1uXWSL6xMFKEXDS3ox2uF0w==
+  dependencies:
+    "@babel/code-frame" "^7.22.13"
+    "@babel/parser" "^7.22.15"
+    "@babel/types" "^7.22.15"
+
+"@babel/traverse@^7.13.0", "@babel/traverse@^7.14.0", "@babel/traverse@^7.16.8", "@babel/traverse@^7.18.0", "@babel/traverse@^7.18.2", "@babel/traverse@^7.18.6", "@babel/traverse@^7.7.2":
+  version "7.23.2"
+  resolved "https://registry.yarnpkg.com/@babel/traverse/-/traverse-7.23.2.tgz#329c7a06735e144a506bdb2cad0268b7f46f4ad8"
+  integrity sha512-azpe59SQ48qG6nu2CzcMLbxUudtN+dOM9kDbUqGq3HXUJRlo7i8fvPoxQUzYgLZ4cMVmuZgm8vvBpNeRhd6XSw==
+  dependencies:
+    "@babel/code-frame" "^7.22.13"
+    "@babel/generator" "^7.23.0"
+    "@babel/helper-environment-visitor" "^7.22.20"
+    "@babel/helper-function-name" "^7.23.0"
+    "@babel/helper-hoist-variables" "^7.22.5"
+    "@babel/helper-split-export-declaration" "^7.22.6"
+    "@babel/parser" "^7.23.0"
+    "@babel/types" "^7.23.0"
     debug "^4.1.0"
     globals "^11.1.0"
 
@@ -1222,7 +1261,7 @@
     "@babel/helper-validator-identifier" "^7.16.7"
     to-fast-properties "^2.0.0"
 
-"@babel/types@^7.18.6", "@babel/types@^7.18.7", "@babel/types@^7.18.8":
+"@babel/types@^7.18.6":
   version "7.18.8"
   resolved "https://registry.yarnpkg.com/@babel/types/-/types-7.18.8.tgz#c5af199951bf41ba4a6a9a6d0d8ad722b30cd42f"
   integrity sha512-qwpdsmraq0aJ3osLJRApsc2ouSJCdnMeZwB0DhbtHAtRpZNZCdlbRnHIgcRKzdE1g0iOGg644fzjOBcdOz9cPw==
@@ -1230,6 +1269,15 @@
     "@babel/helper-validator-identifier" "^7.18.6"
     to-fast-properties "^2.0.0"
 
+"@babel/types@^7.22.15", "@babel/types@^7.22.5", "@babel/types@^7.23.0":
+  version "7.23.0"
+  resolved "https://registry.yarnpkg.com/@babel/types/-/types-7.23.0.tgz#8c1f020c9df0e737e4e247c0619f58c68458aaeb"
+  integrity sha512-0oIyUfKoI3mSqMvsxBdclDwxXKXAUA8v/apZbc+iSyARYou1o8ZGDxbUYyLFoW2arqS2jDGqJuZvv1d/io1axg==
+  dependencies:
+    "@babel/helper-string-parser" "^7.22.5"
+    "@babel/helper-validator-identifier" "^7.22.20"
+    to-fast-properties "^2.0.0"
+
 "@bcoe/v8-coverage@^0.2.3":
   version "0.2.3"
   resolved "https://registry.yarnpkg.com/@bcoe/v8-coverage/-/v8-coverage-0.2.3.tgz#75a2e8b51cb758a7553d6804a5932d7aace75c39"
@@ -1530,6 +1578,11 @@
   resolved "https://registry.yarnpkg.com/@jridgewell/resolve-uri/-/resolve-uri-3.0.7.tgz#30cd49820a962aff48c8fffc5cd760151fca61fe"
   integrity sha512-8cXDaBBHOr2pQ7j77Y6Vp5VDT2sIqWyWQ56TjEq4ih/a4iST3dItRe8Q9fp0rrIl9DoKhWQtUQz/YpOxLkXbNA==
 
+"@jridgewell/resolve-uri@^3.1.0":
+  version "3.1.1"
+  resolved "https://registry.yarnpkg.com/@jridgewell/resolve-uri/-/resolve-uri-3.1.1.tgz#c08679063f279615a3326583ba3a90d1d82cc721"
+  integrity sha512-dSYZh7HhCDtCKm4QakX0xFpsRDqjjtZf/kjI/v3T3Nwt5r8/qz/M19F9ySyOqU94SXBmeG9ttTul+YnR4LOxFA==
+
 "@jridgewell/set-array@^1.0.0":
   version "1.1.1"
   resolved "https://registry.yarnpkg.com/@jridgewell/set-array/-/set-array-1.1.1.tgz#36a6acc93987adcf0ba50c66908bd0b70de8afea"
@@ -1545,6 +1598,19 @@
   resolved "https://registry.yarnpkg.com/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.13.tgz#b6461fb0c2964356c469e115f504c95ad97ab88c"
   integrity sha512-GryiOJmNcWbovBxTfZSF71V/mXbgcV3MewDe3kIMCLyIh5e7SKAeUZs+rMnJ8jkMolZ/4/VsdBmMrw3l+VdZ3w==
 
+"@jridgewell/sourcemap-codec@^1.4.14":
+  version "1.4.15"
+  resolved "https://registry.yarnpkg.com/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.15.tgz#d7c6e6755c78567a951e04ab52ef0fd26de59f32"
+  integrity sha512-eF2rxCRulEKXHTRiDrDy6erMYWqNw4LPdQ8UQA4huuxaQsVeRPFl2oM8oDGxMFhJUWZf9McpLtJasDDZb/Bpeg==
+
+"@jridgewell/trace-mapping@^0.3.17":
+  version "0.3.19"
+  resolved "https://registry.yarnpkg.com/@jridgewell/trace-mapping/-/trace-mapping-0.3.19.tgz#f8a3249862f91be48d3127c3cfe992f79b4b8811"
+  integrity sha512-kf37QtfW+Hwx/buWGMPcR60iF9ziHa6r/CZJIHbmcm4+0qrXiVdxegAH0F6yddEVQ7zdkjcGCgCzUu+BcbhQxw==
+  dependencies:
+    "@jridgewell/resolve-uri" "^3.1.0"
+    "@jridgewell/sourcemap-codec" "^1.4.14"
+
 "@jridgewell/trace-mapping@^0.3.9":
   version "0.3.13"
   resolved "https://registry.yarnpkg.com/@jridgewell/trace-mapping/-/trace-mapping-0.3.13.tgz#dcfe3e95f224c8fe97a87a5235defec999aa92ea"
@@ -2470,7 +2536,7 @@ caniuse-lite@^1.0.30001332:
   resolved "https://registry.yarnpkg.com/caniuse-lite/-/caniuse-lite-1.0.30001342.tgz#87152b1e3b950d1fbf0093e23f00b6c8e8f1da96"
   integrity sha512-bn6sOCu7L7jcbBbyNhLg0qzXdJ/PMbybZTH/BA6Roet9wxYRm6Tr9D0s0uhLkOZ6MSG+QU6txUgdpr3MXIVqjA==
 
-chalk@^2.0.0:
+chalk@^2.0.0, chalk@^2.4.2:
   version "2.4.2"
   resolved "https://registry.yarnpkg.com/chalk/-/chalk-2.4.2.tgz#cd42541677a54333cf541a49108c1432b44c9424"
   integrity sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==
diff --git a/js/tsconfig.json b/js/tsconfig.json
index d199cb9d0a6f5..faf9066b3d96e 100644
--- a/js/tsconfig.json
+++ b/js/tsconfig.json
@@ -1,7 +1,7 @@
 {
   "compilerOptions": {
-    "module": "ES2015",
-    "moduleResolution": "node16",
+    "module": "Node16",
+    "moduleResolution": "Node16",
     "esModuleInterop": true,
     "target": "ES2020",
     "lib": ["ES2020", "ESNext.BigInt", "dom"],
@@ -10,7 +10,7 @@
     "noImplicitAny": true,
     "noImplicitReturns": true,
     "noImplicitThis": true,
-    "noUnusedParameters": false,
+    "noUnusedParameters": true,
     "alwaysStrict": true,
     "strictNullChecks": true,
     "pretty": true,
diff --git a/js/tsconfig.tools.json b/js/tsconfig.tools.json
index e55ef7e3eb57c..a70ca0388034d 100644
--- a/js/tsconfig.tools.json
+++ b/js/tsconfig.tools.json
@@ -1,7 +1,6 @@
 {
   "extends": "./tsconfig.json",
   "compilerOptions": {
-    "module": "CommonJS",
     "declaration": false,
     "sourceMap": false
   }
diff --git a/js/web/.npmignore b/js/web/.npmignore
index 8e08db5917149..0f018f525a8d6 100644
--- a/js/web/.npmignore
+++ b/js/web/.npmignore
@@ -4,11 +4,30 @@
 
 /dist/**/*.report.html
 
+# We remove some of the files in NPM packages because restrictions in jsdelivr:
+#
+# "Packages larger than 150 MB or single files larger than 20 MB (in the case of GitHub) are not supported"
+#
+# from https://www.jsdelivr.com/documentation
+#
+# We only include development build in the NPM package for the following targets:
+# - /dist/ort.js
+# - /dist/ort.all.js
+#
+/dist/cjs/ort.js
+/dist/esm/ort.js
+/dist/cjs/ort.all.js
+/dist/esm/ort.all.js
+/dist/**/ort.wasm.js
+/dist/**/ort.wasm-core.js
+/dist/**/ort.webgl.js
+/dist/**/ort.webgpu.js
+/dist/**/ort.training.wasm.js
+
 /types/
 
 karma.conf.js
 tsconfig.json
 tsconfig.tsbuildinfo
-webpack.config.js
 
 *.tgz
diff --git a/js/web/docs/webgl-operators.md b/js/web/docs/webgl-operators.md
index de84134ddbb3f..7c129b66bfa3d 100644
--- a/js/web/docs/webgl-operators.md
+++ b/js/web/docs/webgl-operators.md
@@ -12,6 +12,7 @@ See [Compatibility](../README.md#Compatibility) for a list of the supported plat
 | [Acos](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Acos) | [7+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Acos-7) |
 | [Acosh](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Acosh) |  |
 | [Add](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Add) | [7-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Add-7), [13](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Add-13), [14+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Add-14) |
+| [AffineGrid](https://github.com/onnx/onnx/blob/main/docs/Operators.md#AffineGrid) |  |
 | [And](https://github.com/onnx/onnx/blob/main/docs/Operators.md#And) | [7+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#And-7) |
 | [ArgMax](https://github.com/onnx/onnx/blob/main/docs/Operators.md#ArgMax) |  |
 | [ArgMin](https://github.com/onnx/onnx/blob/main/docs/Operators.md#ArgMin) |  |
@@ -67,6 +68,7 @@ See [Compatibility](../README.md#Compatibility) for a list of the supported plat
 | [Gather](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Gather) | [1-10](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Gather-1), [11-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Gather-11), [13+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Gather-13) |
 | [GatherElements](https://github.com/onnx/onnx/blob/main/docs/Operators.md#GatherElements) |  |
 | [GatherND](https://github.com/onnx/onnx/blob/main/docs/Operators.md#GatherND) |  |
+| [Gelu](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Gelu) |  |
 | [Gemm](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Gemm) | [7-8](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Gemm-7), [9-10](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Gemm-9), [11-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Gemm-11), [13+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Gemm-13) |
 | [GlobalAveragePool](https://github.com/onnx/onnx/blob/main/docs/Operators.md#GlobalAveragePool) | [1+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#GlobalAveragePool-1) |
 | [GlobalLpPool](https://github.com/onnx/onnx/blob/main/docs/Operators.md#GlobalLpPool) |  |
@@ -82,6 +84,7 @@ See [Compatibility](../README.md#Compatibility) for a list of the supported plat
 | [Hardmax](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Hardmax) |  |
 | [Identity](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Identity) | [1-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Identity-1), [13](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Identity-13), [14-15](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Identity-14), [16-18](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Identity-16), [19+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Identity-19) |
 | [If](https://github.com/onnx/onnx/blob/main/docs/Operators.md#If) |  |
+| [ImageDecoder](https://github.com/onnx/onnx/blob/main/docs/Operators.md#ImageDecoder) |  |
 | [InstanceNormalization](https://github.com/onnx/onnx/blob/main/docs/Operators.md#InstanceNormalization) | [6+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#InstanceNormalization-6) |
 | [IsInf](https://github.com/onnx/onnx/blob/main/docs/Operators.md#IsInf) |  |
 | [IsNaN](https://github.com/onnx/onnx/blob/main/docs/Operators.md#IsNaN) |  |
@@ -137,12 +140,13 @@ See [Compatibility](../README.md#Compatibility) for a list of the supported plat
 | [ReduceL2](https://github.com/onnx/onnx/blob/main/docs/Operators.md#ReduceL2) |  |
 | [ReduceLogSum](https://github.com/onnx/onnx/blob/main/docs/Operators.md#ReduceLogSum) | [1-10](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceLogSum-1), [11-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceLogSum-11), [13-17](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceLogSum-13), [18+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceLogSum-18) |
 | [ReduceLogSumExp](https://github.com/onnx/onnx/blob/main/docs/Operators.md#ReduceLogSumExp) |  |
-| [ReduceMax](https://github.com/onnx/onnx/blob/main/docs/Operators.md#ReduceMax) | [1-10](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceMax-1), [11](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceMax-11), [12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceMax-12), [13-17](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceMax-13), [18+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceMax-18) |
+| [ReduceMax](https://github.com/onnx/onnx/blob/main/docs/Operators.md#ReduceMax) | [1-10](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceMax-1), [11](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceMax-11), [12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceMax-12), [13-17](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceMax-13), [18-19](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceMax-18), [20+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceMax-20) |
 | [ReduceMean](https://github.com/onnx/onnx/blob/main/docs/Operators.md#ReduceMean) | [1-10](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceMean-1), [11-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceMean-11), [13-17](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceMean-13), [18+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceMean-18) |
-| [ReduceMin](https://github.com/onnx/onnx/blob/main/docs/Operators.md#ReduceMin) | [1-10](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceMin-1), [11](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceMin-11), [12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceMin-12), [13-17](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceMin-13), [18+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceMin-18) |
+| [ReduceMin](https://github.com/onnx/onnx/blob/main/docs/Operators.md#ReduceMin) | [1-10](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceMin-1), [11](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceMin-11), [12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceMin-12), [13-17](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceMin-13), [18-19](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceMin-18), [20+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceMin-20) |
 | [ReduceProd](https://github.com/onnx/onnx/blob/main/docs/Operators.md#ReduceProd) | [1-10](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceProd-1), [11-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceProd-11), [13-17](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceProd-13), [18+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceProd-18) |
 | [ReduceSum](https://github.com/onnx/onnx/blob/main/docs/Operators.md#ReduceSum) | [1-10](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceSum-1), [11-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceSum-11) |
 | [ReduceSumSquare](https://github.com/onnx/onnx/blob/main/docs/Operators.md#ReduceSumSquare) | [1-10](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceSumSquare-1), [11-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceSumSquare-11), [13-17](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceSumSquare-13), [18+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ReduceSumSquare-18) |
+| [RegexFullMatch](https://github.com/onnx/onnx/blob/main/docs/Operators.md#RegexFullMatch) |  |
 | [Relu](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Relu) | [6-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Relu-6), [13](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Relu-13), [14+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Relu-14) |
 | [Reshape](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Reshape) | [5-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Reshape-5), [13](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Reshape-13), [14-18](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Reshape-14), [19+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Reshape-19) |
 | [Resize](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Resize) | [10](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Resize-10), [11-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Resize-11), [13-17](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Resize-13), [18](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Resize-18), [19+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Resize-19) |
@@ -179,7 +183,9 @@ See [Compatibility](../README.md#Compatibility) for a list of the supported plat
 | [SplitToSequence](https://github.com/onnx/onnx/blob/main/docs/Operators.md#SplitToSequence) |  |
 | [Sqrt](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Sqrt) | [6-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Sqrt-6), [13+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Sqrt-13) |
 | [Squeeze](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Squeeze) | [1-10](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Squeeze-1), [11-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Squeeze-11), [13+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Squeeze-13) |
+| [StringConcat](https://github.com/onnx/onnx/blob/main/docs/Operators.md#StringConcat) |  |
 | [StringNormalizer](https://github.com/onnx/onnx/blob/main/docs/Operators.md#StringNormalizer) |  |
+| [StringSplit](https://github.com/onnx/onnx/blob/main/docs/Operators.md#StringSplit) |  |
 | [Sub](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Sub) | [7-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Sub-7), [13](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Sub-13), [14+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Sub-14) |
 | [Sum](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Sum) | [6-7](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Sum-6), [8-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Sum-8), [13+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Sum-13) |
 | [Tan](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Tan) | [7+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Tan-7) |
diff --git a/js/web/docs/webgpu-operators.md b/js/web/docs/webgpu-operators.md
index a87a894e3b3c5..00c27fe3ab034 100644
--- a/js/web/docs/webgpu-operators.md
+++ b/js/web/docs/webgpu-operators.md
@@ -20,13 +20,17 @@ Do not modify directly.*
 | Asinh | ai.onnx(9+) |  |
 | Atan | ai.onnx(7+) |  |
 | Atanh | ai.onnx(9+) |  |
-| AveragePool | ai.onnx(7-9,10,11+); com.ms.internal.nhwc(11+) | need perf optimization; need implementing activation |
+| Attention | com.microsoft(1+) | need implementing mask and past/present |
+| AveragePool | ai.onnx(7-9,10,11+); com.ms.internal.nhwc(7-9,10,11+) | need perf optimization; need implementing activation |
+| BatchNormalization | ai.onnx(7-8,9-13,14,15+); com.ms.internal.nhwc(7-8,9-13,14,15+) |  |
+| BiasAdd | com.microsoft(1+) |  |
+| BiasSplitGelu | com.microsoft(1+) |  |
 | Cast | ai.onnx(6-8,9-12,13-18,19+) |  |
 | Ceil | ai.onnx(6-12,13+) |  |
 | Clip | ai.onnx(6-10,11,12,13+) |  |
 | Concat | ai.onnx(1-3,4-10,11-12,13+) |  |
-| Conv | ai.onnx(1-10,11+); com.ms.internal.nhwc(11+) | need perf optimization; conv3d is not supported; need implementing activation |
-| ConvTranspose | ai.onnx(1-10,11+); com.ms.internal.nhwc(11+) | need perf optimization; ConvTranspose3d is not supported; need implementing activation |
+| Conv | ai.onnx(1-10,11+); com.ms.internal.nhwc(1-10,11+) | need perf optimization; conv3d is not supported; need implementing activation |
+| ConvTranspose | ai.onnx(1-10,11+); com.ms.internal.nhwc(1-10,11+) | need perf optimization; ConvTranspose3d is not supported; need implementing activation |
 | Cos | ai.onnx(7+) |  |
 | Cosh | ai.onnx(9+) |  |
 | Div | ai.onnx(7-12,13,14+) |  |
@@ -38,6 +42,7 @@ Do not modify directly.*
 | Expand | ai.onnx(8-12,13+) |  |
 | Flatten | ai.onnx(1-8,9-10,11-12,13+) |  |
 | Floor | ai.onnx(6-12,13+) |  |
+| FusedConv | com.microsoft(1+) |  |
 | Gather | ai.onnx(1-10,11-12,13+) |  |
 | GatherElements | ai.onnx(11-12,13+) |  |
 | Gelu | com.microsoft(1+) |  |
@@ -54,14 +59,16 @@ Do not modify directly.*
 | LessOrEqual | ai.onnx(12-15,16+) |  |
 | Log | ai.onnx(6-12,13+) |  |
 | MatMul | ai.onnx(1-12,13+) |  |
-| MaxPool | ai.onnx(1-7,8-9,10,11,12+); com.ms.internal.nhwc(11,12+) | need perf optimization; need implementing activation |
+| MaxPool | ai.onnx(1-7,8-9,10,11,12+); com.ms.internal.nhwc(1-7,8-9,10,11,12+) | need perf optimization; need implementing activation |
 | MemcpyFromHost | ai.onnx(1+) |  |
 | MemcpyToHost | ai.onnx(1+) |  |
 | Mul | ai.onnx(7-12,13,14+) |  |
+| MultiHeadAttention | com.microsoft(1+) | need implementing mask and past/present |
 | Neg | ai.onnx(6-12,13+) |  |
 | Not | ai.onnx(1+) |  |
 | Pad | ai.onnx(2-10,11-12,13-17,18,19+) |  |
 | Pow | ai.onnx(7-11,12,13-14,15+) |  |
+| Range | ai.onnx(11+) |  |
 | Reciprocal | ai.onnx(6-12,13+) |  |
 | ReduceL1 | ai.onnx(1-10,11-12,13-17,18+) |  |
 | ReduceL2 | ai.onnx(1-10,11-12,13-17,18+) |  |
@@ -75,7 +82,7 @@ Do not modify directly.*
 | ReduceSumSquare | ai.onnx(1-10,11-12,13-17,18+) |  |
 | Relu | ai.onnx(6-12,13,14+) |  |
 | Reshape | ai.onnx(5-12,13,14+) | no GPU kernel |
-| Resize | ai.onnx(10,11-12,13-17,18,19+); com.ms.internal.nhwc(11-12,13-17,18,19+) | CoordinateTransformMode align_corners is not supported with downsampling |
+| Resize | ai.onnx(10,11-12,13-17,18,19+); com.ms.internal.nhwc(10,11-12,13-17,18,19+) | CoordinateTransformMode align_corners is not supported with downsampling |
 | Shape | ai.onnx(1-12,13-14,15+) | no GPU kernel; an ORT warning is generated - need to fix |
 | Sigmoid | ai.onnx(6-12,13+) |  |
 | Sin | ai.onnx(7+) |  |
@@ -93,3 +100,4 @@ Do not modify directly.*
 | Tile | ai.onnx(6-12,13+) |  |
 | Transpose | ai.onnx(1-12,13+) | need perf optimization |
 | Unsqueeze | ai.onnx(1-10,11-12,13+) |  |
+| Where | ai.onnx(9-15,16+) |  |
diff --git a/js/web/karma.conf.js b/js/web/karma.conf.js
index 35f782d1fdca3..8fce79843f617 100644
--- a/js/web/karma.conf.js
+++ b/js/web/karma.conf.js
@@ -4,7 +4,7 @@
 'use strict';
 
 const args = require('minimist')(process.argv, {});
-const bundleMode = args['bundle-mode'] || 'dev';  // 'dev'|'perf'|undefined;
+const bundleMode = args['bundle-mode'] || 'dev';  // 'dev'|'perf'
 const karmaPlugins = args['karma-plugins'] || undefined;
 const timeoutMocha = args['timeout-mocha'] || 60000;
 const forceLocalHost = !!args['force-localhost'];
@@ -19,8 +19,8 @@ if (!chromiumFlags) {
   throw new Error(`Invalid command line arg: --chromium-flags: ${chromiumFlags}`);
 }
 
-const commonFile = bundleMode === 'dev' ? '../common/dist/ort-common.js' : '../common/dist/ort-common.min.js'
-const mainFile = bundleMode === 'dev' ? 'test/ort.dev.js' : 'test/ort.perf.js';
+const ORT_FILE = bundleMode === 'dev' ? 'dist/ort.all.js' : 'dist/ort.all.min.js';
+const TEST_FILE = bundleMode === 'dev' ? 'test/ort.test.js' : 'test/ort.test.min.js';
 
 // it's a known issue that Safari does not work with "localhost" in BrowserStack:
 // https://www.browserstack.com/question/663
@@ -67,30 +67,14 @@ module.exports = function(config) {
     },
     frameworks: ['mocha'],
     files: [
-      {pattern: commonFile},
-      {pattern: mainFile},
-      {pattern: 'test/testdata-file-cache-*.json', included: false},
-      {pattern: 'test/data/**/*', included: false, nocache: true},
-      {pattern: 'dist/ort-wasm.wasm', included: false},
-      {pattern: 'dist/ort-wasm-threaded.wasm', included: false},
-      {pattern: 'dist/ort-wasm-simd.wasm', included: false},
-      {pattern: 'dist/ort-wasm-simd-threaded.wasm', included: false},
-      {pattern: 'dist/ort-wasm-simd.jsep.wasm', included: false},
-      {pattern: 'dist/ort-wasm-simd-threaded.jsep.wasm', included: false},
-      {pattern: 'dist/ort-wasm-threaded.worker.js', included: false},
+      {pattern: ORT_FILE},
+      {pattern: TEST_FILE},
+      {pattern: 'test/testdata-file-cache-*.json', included: false, watched: false},
+      {pattern: 'test/data/**/*', included: false, nocache: true, watched: false},
+      {pattern: 'dist/*.wasm', included: false, watched: false},
     ],
-    proxies: {
-      '/base/test/ort-wasm.wasm': '/base/dist/ort-wasm.wasm',
-      '/base/test/ort-wasm-threaded.wasm': '/base/dist/ort-wasm-threaded.wasm',
-      '/base/test/ort-wasm-simd.wasm': '/base/dist/ort-wasm-simd.wasm',
-      '/base/test/ort-wasm-simd-threaded.wasm': '/base/dist/ort-wasm-simd-threaded.wasm',
-      '/base/test/ort-wasm-simd.jsep.wasm': '/base/dist/ort-wasm-simd.jsep.wasm',
-      '/base/test/ort-wasm-simd-threaded.jsep.wasm': '/base/dist/ort-wasm-simd-threaded.jsep.wasm',
-      '/base/test/ort-wasm-threaded.worker.js': '/base/dist/ort-wasm-threaded.worker.js',
-    },
     plugins: karmaPlugins,
     client: {captureConsole: true, mocha: {expose: ['body'], timeout: timeoutMocha}},
-    preprocessors: {mainFile: ['sourcemap']},
     reporters: ['mocha', 'BrowserStack'],
     browsers: [],
     captureTimeout: 120000,
diff --git a/js/web/lib/backend-onnxjs.ts b/js/web/lib/backend-onnxjs.ts
index 18a068e0ced8b..7176823c9bf13 100644
--- a/js/web/lib/backend-onnxjs.ts
+++ b/js/web/lib/backend-onnxjs.ts
@@ -2,17 +2,17 @@
 // Licensed under the MIT License.
 
 /* eslint-disable import/no-internal-modules */
-import {Backend, InferenceSession, SessionHandler} from 'onnxruntime-common';
+import {Backend, InferenceSession, InferenceSessionHandler} from 'onnxruntime-common';
 
 import {Session} from './onnxjs/session';
-import {OnnxjsSessionHandler} from './onnxjs/session-handler';
+import {OnnxjsSessionHandler} from './onnxjs/session-handler-inference';
 
 class OnnxjsBackend implements Backend {
   // eslint-disable-next-line @typescript-eslint/no-empty-function
   async init(): Promise<void> {}
 
-  async createSessionHandler(pathOrBuffer: string|Uint8Array, options?: InferenceSession.SessionOptions):
-      Promise<SessionHandler> {
+  async createInferenceSessionHandler(pathOrBuffer: string|Uint8Array, options?: InferenceSession.SessionOptions):
+      Promise<InferenceSessionHandler> {
     // NOTE: Session.Config(from onnx.js) is not compatible with InferenceSession.SessionOptions(from
     // onnxruntime-common).
     //       In future we should remove Session.Config and use InferenceSession.SessionOptions.
diff --git a/js/web/lib/backend-wasm-inference.ts b/js/web/lib/backend-wasm-inference.ts
new file mode 100644
index 0000000000000..475a0243ebd3d
--- /dev/null
+++ b/js/web/lib/backend-wasm-inference.ts
@@ -0,0 +1,5 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+import {OnnxruntimeWebAssemblyBackend} from './backend-wasm';
+export const wasmBackend = new OnnxruntimeWebAssemblyBackend();
diff --git a/js/web/lib/backend-wasm-training.ts b/js/web/lib/backend-wasm-training.ts
new file mode 100644
index 0000000000000..09dac3a85311c
--- /dev/null
+++ b/js/web/lib/backend-wasm-training.ts
@@ -0,0 +1,21 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+import {InferenceSession, TrainingSessionHandler} from 'onnxruntime-common';
+
+import {OnnxruntimeWebAssemblyBackend} from './backend-wasm';
+import {OnnxruntimeWebAssemblyTrainingSessionHandler} from './wasm/session-handler-training';
+
+class OnnxruntimeTrainingWebAssemblyBackend extends OnnxruntimeWebAssemblyBackend {
+  async createTrainingSessionHandler(
+      checkpointStateUriOrBuffer: string|Uint8Array, trainModelUriOrBuffer: string|Uint8Array,
+      evalModelUriOrBuffer: string|Uint8Array, optimizerModelUriOrBuffer: string|Uint8Array,
+      options: InferenceSession.SessionOptions): Promise<TrainingSessionHandler> {
+    const handler = new OnnxruntimeWebAssemblyTrainingSessionHandler();
+    await handler.createTrainingSession(
+        checkpointStateUriOrBuffer, trainModelUriOrBuffer, evalModelUriOrBuffer, optimizerModelUriOrBuffer, options);
+    return Promise.resolve(handler);
+  }
+}
+
+export const wasmBackend = new OnnxruntimeTrainingWebAssemblyBackend();
diff --git a/js/web/lib/backend-wasm.ts b/js/web/lib/backend-wasm.ts
index ceb20044d97b6..78edcc90f55f9 100644
--- a/js/web/lib/backend-wasm.ts
+++ b/js/web/lib/backend-wasm.ts
@@ -1,11 +1,11 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-import {Backend, env, InferenceSession, SessionHandler} from 'onnxruntime-common';
-import {cpus} from 'os';
+import {cpus} from 'node:os';
+import {Backend, env, InferenceSession, InferenceSessionHandler} from 'onnxruntime-common';
 
 import {initializeWebAssemblyInstance} from './wasm/proxy-wrapper';
-import {OnnxruntimeWebAssemblySessionHandler} from './wasm/session-handler';
+import {OnnxruntimeWebAssemblySessionHandler} from './wasm/session-handler-inference';
 
 /**
  * This function initializes all flags for WebAssembly.
@@ -32,7 +32,7 @@ export const initializeFlags = (): void => {
   }
 };
 
-class OnnxruntimeWebAssemblyBackend implements Backend {
+export class OnnxruntimeWebAssemblyBackend implements Backend {
   async init(): Promise<void> {
     // populate wasm flags
     initializeFlags();
@@ -40,14 +40,14 @@ class OnnxruntimeWebAssemblyBackend implements Backend {
     // init wasm
     await initializeWebAssemblyInstance();
   }
-  createSessionHandler(path: string, options?: InferenceSession.SessionOptions): Promise<SessionHandler>;
-  createSessionHandler(buffer: Uint8Array, options?: InferenceSession.SessionOptions): Promise<SessionHandler>;
-  async createSessionHandler(pathOrBuffer: string|Uint8Array, options?: InferenceSession.SessionOptions):
-      Promise<SessionHandler> {
+  createInferenceSessionHandler(path: string, options?: InferenceSession.SessionOptions):
+      Promise<InferenceSessionHandler>;
+  createInferenceSessionHandler(buffer: Uint8Array, options?: InferenceSession.SessionOptions):
+      Promise<InferenceSessionHandler>;
+  async createInferenceSessionHandler(pathOrBuffer: string|Uint8Array, options?: InferenceSession.SessionOptions):
+      Promise<InferenceSessionHandler> {
     const handler = new OnnxruntimeWebAssemblySessionHandler();
     await handler.loadModel(pathOrBuffer, options);
     return Promise.resolve(handler);
   }
 }
-
-export const wasmBackend = new OnnxruntimeWebAssemblyBackend();
diff --git a/js/web/lib/build-def.d.ts b/js/web/lib/build-def.d.ts
index 2049b2663ead3..fb714bf5996f1 100644
--- a/js/web/lib/build-def.d.ts
+++ b/js/web/lib/build-def.d.ts
@@ -1,35 +1,39 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-/* eslint-disable @typescript-eslint/no-unused-vars, @typescript-eslint/naming-convention */
+/* eslint-disable @typescript-eslint/naming-convention */
 
 /**
  * The interface BuildDefinitions contains a set of flags which are defined at build time.
  *
- * Those flags are processed in terser for tree shaking to remove unused code.
+ * Those flags are processed in bundler for tree shaking to remove unused code.
  * No flags in this file should present in production build.
  */
 interface BuildDefinitions {
   /**
    * defines whether to disable the whole WebGL backend in the build.
    */
-  DISABLE_WEBGL: boolean;
+  readonly DISABLE_WEBGL: boolean;
   /**
    * defines whether to disable the whole WebGpu backend in the build.
    */
-  DISABLE_WEBGPU: boolean;
+  readonly DISABLE_WEBGPU: boolean;
   /**
    * defines whether to disable the whole WebAssembly backend in the build.
    */
-  DISABLE_WASM: boolean;
+  readonly DISABLE_WASM: boolean;
   /**
    * defines whether to disable proxy feature in WebAssembly backend in the build.
    */
-  DISABLE_WASM_PROXY: boolean;
+  readonly DISABLE_WASM_PROXY: boolean;
   /**
    * defines whether to disable multi-threading feature in WebAssembly backend in the build.
    */
-  DISABLE_WASM_THREAD: boolean;
+  readonly DISABLE_WASM_THREAD: boolean;
+  /**
+   * defines whether to disable training APIs in WebAssembly backend.
+   */
+  readonly DISABLE_TRAINING: boolean;
 }
 
-declare let BUILD_DEFS: BuildDefinitions;
+declare const BUILD_DEFS: BuildDefinitions;
diff --git a/js/web/lib/index.ts b/js/web/lib/index.ts
index d5ed536034f3e..6060271ced156 100644
--- a/js/web/lib/index.ts
+++ b/js/web/lib/index.ts
@@ -3,10 +3,13 @@
 
 /* eslint-disable @typescript-eslint/no-var-requires, @typescript-eslint/no-require-imports */
 // We use "require" instead of "import" here because import statement must be put in top level. Our current code does
-// not allow terser to tree-shaking code as expected because some codes are treated as having side effects.
-// So we import code inside the if-clause to allow terser remove the code safely.
+// not allow bundler to tree-shaking code as expected because some codes are treated as having side effects.
+// So we import code inside the if-clause to allow bundler remove the code safely.
 
 export * from 'onnxruntime-common';
+import * as ort from 'onnxruntime-common';
+export default ort;
+
 import {registerBackend, env} from 'onnxruntime-common';
 import {version} from './version';
 
@@ -16,14 +19,17 @@ if (!BUILD_DEFS.DISABLE_WEBGL) {
 }
 
 if (!BUILD_DEFS.DISABLE_WASM) {
-  const wasmBackend = require('./backend-wasm').wasmBackend;
+  const wasmBackend = BUILD_DEFS.DISABLE_TRAINING ? require('./backend-wasm-inference').wasmBackend :
+                                                    require('./backend-wasm-training').wasmBackend;
   if (!BUILD_DEFS.DISABLE_WEBGPU && typeof navigator !== 'undefined' && navigator.gpu) {
     registerBackend('webgpu', wasmBackend, 5);
   }
   registerBackend('cpu', wasmBackend, 10);
   registerBackend('wasm', wasmBackend, 10);
-  registerBackend('xnnpack', wasmBackend, 9);
-  registerBackend('webnn', wasmBackend, 9);
+  if (BUILD_DEFS.DISABLE_TRAINING) {
+    registerBackend('xnnpack', wasmBackend, 9);
+    registerBackend('webnn', wasmBackend, 9);
+  }
 }
 
 Object.defineProperty(env.versions, 'web', {value: version, enumerable: true});
diff --git a/js/web/lib/onnxjs/attribute-with-cache-key.ts b/js/web/lib/onnxjs/attribute-with-cache-key.ts
index 6608b00471e77..5d47570f267a6 100644
--- a/js/web/lib/onnxjs/attribute-with-cache-key.ts
+++ b/js/web/lib/onnxjs/attribute-with-cache-key.ts
@@ -6,13 +6,13 @@ class AttributeWithCacheKeyImpl {
     Object.assign(this, attribute);
   }
 
-  private _cacheKey: string;
+  private key: string;
   public get cacheKey(): string {
-    if (!this._cacheKey) {
-      this._cacheKey =
+    if (!this.key) {
+      this.key =
           Object.getOwnPropertyNames(this).sort().map(name => `${(this as Record<string, unknown>)[name]}`).join(';');
     }
-    return this._cacheKey;
+    return this.key;
   }
 }
 
diff --git a/js/web/lib/onnxjs/backends/backend-webgl.ts b/js/web/lib/onnxjs/backends/backend-webgl.ts
index 74716ca0edcb3..21ed7e38b9f86 100644
--- a/js/web/lib/onnxjs/backends/backend-webgl.ts
+++ b/js/web/lib/onnxjs/backends/backend-webgl.ts
@@ -72,7 +72,9 @@ export class WebGLBackend implements Backend {
 
       Logger.setWithEnv(env);
 
-      Object.defineProperty(env.webgl, 'context', {value: this.glContext.gl});
+      if (!env.webgl.context) {
+        Object.defineProperty(env.webgl, 'context', {value: this.glContext.gl});
+      }
 
       Logger.verbose(
           'WebGLBackend',
diff --git a/js/web/lib/onnxjs/backends/webgl/glsl-coordinate-lib.ts b/js/web/lib/onnxjs/backends/webgl/glsl-coordinate-lib.ts
index dd3f1b30dfb46..1f2b27c7bdea8 100644
--- a/js/web/lib/onnxjs/backends/webgl/glsl-coordinate-lib.ts
+++ b/js/web/lib/onnxjs/backends/webgl/glsl-coordinate-lib.ts
@@ -186,7 +186,7 @@ export class CoordsGlslLib extends GlslLib {
   /**
    * 1D packed output coordinates.
    */
-  protected getOutputPacked1DCoords(shape: [number], texShape: [number, number]): GlslLibRoutine {
+  protected getOutputPacked1DCoords(_shape: [number], texShape: [number, number]): GlslLibRoutine {
     const packedTexShape = texShape;
     let source = '';
     if (packedTexShape[0] === 1) {
@@ -331,7 +331,7 @@ export class CoordsGlslLib extends GlslLib {
   /**
    * Unpacked 1D output coordinates.
    */
-  protected getOutputUnpacked1DCoords(shape: [number], texShape: [number, number]): GlslLibRoutine {
+  protected getOutputUnpacked1DCoords(_shape: [number], texShape: [number, number]): GlslLibRoutine {
     const source = `
         int getOutputCoords() {
           ivec2 resTexRC = ivec2(TexCoords.xy *
@@ -641,7 +641,7 @@ export class CoordsGlslLib extends GlslLib {
     if (outRank < 2 && inRank > 0) {
       unpackedCoordsSnippet = 'coords';
     } else {
-      unpackedCoordsSnippet = inShape.map((s, i) => `coords.${fields[i + rankDiff]}`).join(', ');
+      unpackedCoordsSnippet = inShape.map((_s, i) => `coords.${fields[i + rankDiff]}`).join(', ');
     }
 
     let output = 'return outputValue;';
@@ -734,7 +734,7 @@ export class CoordsGlslLib extends GlslLib {
     if (outRank < 2 && inRank > 0) {
       unpackedCoordsSnippet = 'coords';
     } else {
-      unpackedCoordsSnippet = inputLayout.unpackedShape.map((s, i) => `coords.${fields[i + rankDiff]}`).join(', ');
+      unpackedCoordsSnippet = inputLayout.unpackedShape.map((_s, i) => `coords.${fields[i + rankDiff]}`).join(', ');
     }
     const source = `
         float ${funcName}() {
diff --git a/js/web/lib/onnxjs/backends/webgl/inference-handler.ts b/js/web/lib/onnxjs/backends/webgl/inference-handler.ts
index afb39e84f0060..0a51ff7c4029e 100644
--- a/js/web/lib/onnxjs/backends/webgl/inference-handler.ts
+++ b/js/web/lib/onnxjs/backends/webgl/inference-handler.ts
@@ -11,7 +11,7 @@ import {createPackedReshape3DProgramInfoLoader, isReshapeCheap, processDims3D} f
 import {encodeAsUint8} from './ops/uint8-encode';
 import {createUnpackProgramInfoLoader} from './ops/unpack';
 import {WebGLSessionHandler} from './session-handler';
-import {Encoder} from './texture-data-encoder';
+import {EncoderUsage} from './texture-data-encoder';
 import {calculateTextureWidthAndHeight, createTextureLayoutFromShape, createTextureLayoutFromTextureType} from './texture-layout';
 import {Artifact, ProgramInfo, ProgramInfoLoader, TextureData, TextureLayout, TextureType} from './types';
 
@@ -101,7 +101,7 @@ export class WebGLInferenceHandler implements InferenceHandler {
 
   /**
    * Create a TextureData object from a tensor.
-   * Usage = Encoder.Usage.UploadOnly.
+   * Usage = EncoderUsage.UploadOnly.
    * If a related texture data is found in cache, returns it;
    * Otherwise:
    *   Creates a new texture layout if not provided;
@@ -156,7 +156,7 @@ export class WebGLInferenceHandler implements InferenceHandler {
               buffer.set(tensor.numberData.subarray(oldOffset, oldOffset + oldRowSize), newOffset);
             }
           }
-          return this.createTextureData(adjustedLayout, tensor.type, buffer, tensor, Encoder.Usage.UploadOnly);
+          return this.createTextureData(adjustedLayout, tensor.type, buffer, tensor, EncoderUsage.UploadOnly);
         }
       }
 
@@ -164,10 +164,10 @@ export class WebGLInferenceHandler implements InferenceHandler {
         const unpackedTextureLayout =
             createTextureLayoutFromShape(this.session.layoutStrategy, tensor.dims, 1, [], {reverseWH: true});
         const unpackedTextureData = this.createTextureData(
-            unpackedTextureLayout, tensor.type, tensor.numberData, tensor, Encoder.Usage.UploadOnly);
+            unpackedTextureLayout, tensor.type, tensor.numberData, tensor, EncoderUsage.UploadOnly);
         td = this.pack(unpackedTextureData);
       } else {
-        td = this.createTextureData(layout, tensor.type, tensor.numberData, tensor, Encoder.Usage.UploadOnly);
+        td = this.createTextureData(layout, tensor.type, tensor.numberData, tensor, EncoderUsage.UploadOnly);
       }
     }
     return td;
@@ -175,7 +175,7 @@ export class WebGLInferenceHandler implements InferenceHandler {
 
   /**
    * Create a TextureData object using the given data and bind to the given tensor.
-   * Usage = Encoder.Usage.UploadOnly.
+   * Usage = EncoderUsage.UploadOnly.
    * NOTE: this function is a hack for Conv implementation. should remove this function, after rewriting Conv
    * implementation by Graph.Transformer
    * @param dataType the tensor data type
@@ -184,12 +184,12 @@ export class WebGLInferenceHandler implements InferenceHandler {
    */
   createTextureDataFromLayoutBindTensor(
       layout: TextureLayout, dataType: Tensor.DataType, data: Tensor.NumberType, tensor: Tensor): TextureData {
-    return this.createTextureData(layout, dataType, data, tensor, Encoder.Usage.UploadOnly);
+    return this.createTextureData(layout, dataType, data, tensor, EncoderUsage.UploadOnly);
   }
 
   private createTextureData(
       layout: TextureLayout, dataType: Tensor.DataType, data?: Tensor.NumberType, tensor?: Tensor,
-      usage?: Encoder.Usage): TextureData {
+      usage?: EncoderUsage): TextureData {
     Logger.verbose('InferenceHandler', `Creating TextureData: layout:[${JSON.stringify(layout)}]`);
     const texture = this.session.textureManager.createTextureFromLayout(dataType, layout, data, usage);
     return this.createTextureDataFromTexture(layout, dataType, texture, tensor);
diff --git a/js/web/lib/onnxjs/backends/webgl/ops/concat-packed.ts b/js/web/lib/onnxjs/backends/webgl/ops/concat-packed.ts
index 709f883ae12c9..d0e589a428825 100644
--- a/js/web/lib/onnxjs/backends/webgl/ops/concat-packed.ts
+++ b/js/web/lib/onnxjs/backends/webgl/ops/concat-packed.ts
@@ -12,7 +12,7 @@ import {getChannels, unpackFromChannel} from './packing-utils';
 
 const createPackedConcatProgramMetadata = (inputCount: number, cacheHint: string) => ({
   name: 'Concat (packed)',
-  inputNames: Array.from({length: inputCount}, (v, i) => `X${i}`),
+  inputNames: Array.from({length: inputCount}, (_v, i) => `X${i}`),
   inputTypes: Array(inputCount).fill(TextureType.packed),
   cacheHint
 });
diff --git a/js/web/lib/onnxjs/backends/webgl/ops/concat.ts b/js/web/lib/onnxjs/backends/webgl/ops/concat.ts
index c2b18ef86f814..f85f4032feae1 100644
--- a/js/web/lib/onnxjs/backends/webgl/ops/concat.ts
+++ b/js/web/lib/onnxjs/backends/webgl/ops/concat.ts
@@ -30,13 +30,13 @@ export const concat: OperatorImplementation<ConcatAttributes> =
 
 const createUnpackedConcatProgramMetadata = (inputCount: number, cacheHint: string) => ({
   name: 'Concat',
-  inputNames: Array.from({length: inputCount}, (v, i) => `X${i}`),
+  inputNames: Array.from({length: inputCount}, (_v, i) => `X${i}`),
   inputTypes: Array(inputCount).fill(TextureType.unpacked),
   cacheHint
 });
 
 const createUnpackedConcatProgramInfo =
-    (handler: WebGLInferenceHandler, metadata: ProgramMetadata, inputs: Tensor[], axis: number): ProgramInfo => {
+    (_handler: WebGLInferenceHandler, metadata: ProgramMetadata, inputs: Tensor[], axis: number): ProgramInfo => {
       const inputShape = inputs[0].dims.slice();
       if (axis >= inputShape.length || axis < (-1 * inputShape.length)) {
         throw new Error('axis specified for concat doesn\'t match input dimensionality');
diff --git a/js/web/lib/onnxjs/backends/webgl/ops/gather.ts b/js/web/lib/onnxjs/backends/webgl/ops/gather.ts
index 54b6ccd1a3685..bb44a20d75f34 100644
--- a/js/web/lib/onnxjs/backends/webgl/ops/gather.ts
+++ b/js/web/lib/onnxjs/backends/webgl/ops/gather.ts
@@ -30,7 +30,7 @@ const gatherProgramMetadata = {
 };
 
 const createGatherProgramInfo =
-    (handler: WebGLInferenceHandler, metadata: ProgramMetadata, inputs: Tensor[], axis: number): ProgramInfo => {
+    (_handler: WebGLInferenceHandler, metadata: ProgramMetadata, inputs: Tensor[], axis: number): ProgramInfo => {
       const inputShape = inputs[0].dims.slice();
       const indexDataShape = inputs[1].dims.slice();
       const outputShape = new Array(inputShape.length + indexDataShape.length - 1);
diff --git a/js/web/lib/onnxjs/backends/webgl/ops/im2col.ts b/js/web/lib/onnxjs/backends/webgl/ops/im2col.ts
index f74c35b612665..a1da13ec48d70 100644
--- a/js/web/lib/onnxjs/backends/webgl/ops/im2col.ts
+++ b/js/web/lib/onnxjs/backends/webgl/ops/im2col.ts
@@ -15,7 +15,7 @@ const createIm2ColProgramMetadata = (cacheHint: string) => ({
 });
 
 const createIm2ColProgramInfo =
-    (inferenceHandler: WebGLInferenceHandler, metadata: ProgramMetadata, x: Tensor, w: Tensor,
+    (_inferenceHandler: WebGLInferenceHandler, metadata: ProgramMetadata, x: Tensor, w: Tensor,
      outputShape: readonly number[], attributes: ConvAttributes): ProgramInfo => {
       const xshape = x.dims;
       const wshape = w.dims;
diff --git a/js/web/lib/onnxjs/backends/webgl/ops/image-scaler.ts b/js/web/lib/onnxjs/backends/webgl/ops/image-scaler.ts
index 1cd5288251433..efc79f686c960 100644
--- a/js/web/lib/onnxjs/backends/webgl/ops/image-scaler.ts
+++ b/js/web/lib/onnxjs/backends/webgl/ops/image-scaler.ts
@@ -35,7 +35,7 @@ const imageScalerProgramMetadata = {
 };
 
 const createImageScalerProgramInfo =
-    (handler: WebGLInferenceHandler, metadata: ProgramMetadata, inputs: Tensor[], attributes: ImageScalerAttributes):
+    (_handler: WebGLInferenceHandler, metadata: ProgramMetadata, inputs: Tensor[], attributes: ImageScalerAttributes):
         ProgramInfo => {
           const outputShape = inputs[0].dims.slice();
           const rank = outputShape.length;
diff --git a/js/web/lib/onnxjs/backends/webgl/ops/matmul-pack.ts b/js/web/lib/onnxjs/backends/webgl/ops/matmul-pack.ts
index fb3c2357ae8fe..0be6d1ba8bcd2 100644
--- a/js/web/lib/onnxjs/backends/webgl/ops/matmul-pack.ts
+++ b/js/web/lib/onnxjs/backends/webgl/ops/matmul-pack.ts
@@ -107,10 +107,10 @@ function getBcastSamplerForMatmul(
   const rankADiff = outRank - inARank;
   const rankBDiff = outRank - inBRank;
 
-  unpackedACoordsSnippet = inAShape.map((s, i) => `coords.${allGlChannels[i + rankADiff]}`);
+  unpackedACoordsSnippet = inAShape.map((_s, i) => `coords.${allGlChannels[i + rankADiff]}`);
   unpackedACoordsSnippet[inARank - 1] = 'i*2';
   unpackedACoordsSnippet.join(', ');
-  unpackedBCoordsSnippet = inBShape.map((s, i) => `coords.${allGlChannels[i + rankBDiff]}`);
+  unpackedBCoordsSnippet = inBShape.map((_s, i) => `coords.${allGlChannels[i + rankBDiff]}`);
   unpackedBCoordsSnippet[inBRank - 2] = 'i*2';
   unpackedBCoordsSnippet.join(', ');
 
diff --git a/js/web/lib/onnxjs/backends/webgl/ops/matmul.ts b/js/web/lib/onnxjs/backends/webgl/ops/matmul.ts
index 704128fb4858e..523165f29f852 100644
--- a/js/web/lib/onnxjs/backends/webgl/ops/matmul.ts
+++ b/js/web/lib/onnxjs/backends/webgl/ops/matmul.ts
@@ -117,7 +117,7 @@ export function getBiasForMatmul(
   if (outRank < 2 && inRank > 0) {
     unpackedCoordsSnippet = 'coords';
   } else {
-    unpackedCoordsSnippet = inShape.map((s, i) => `coords.${allGlChannels[i + rankDiff]}`).join(', ');
+    unpackedCoordsSnippet = inShape.map((_s, i) => `coords.${allGlChannels[i + rankDiff]}`).join(', ');
   }
   const broadcastDims = BroadcastUtil.getBroadcastDims(inShape, outShape);
   const coordsSnippet = broadcastDims.map(d => `coords.${allGlChannels[d + rankDiff]} = 0;`).join('\n');
diff --git a/js/web/lib/onnxjs/backends/webgl/ops/reduce.ts b/js/web/lib/onnxjs/backends/webgl/ops/reduce.ts
index 1a2bc7422c833..c9ea460a6f1fc 100644
--- a/js/web/lib/onnxjs/backends/webgl/ops/reduce.ts
+++ b/js/web/lib/onnxjs/backends/webgl/ops/reduce.ts
@@ -46,7 +46,7 @@ export const parseReduceAttributes: OperatorInitialization<ReduceAttributes> = (
 };
 
 const createReduceProgramInfo =
-    (handler: WebGLInferenceHandler, inputs: Tensor[], attributes: ReduceAttributes, name: string, reduceOp: ReduceOp,
+    (_handler: WebGLInferenceHandler, inputs: Tensor[], attributes: ReduceAttributes, _name: string, reduceOp: ReduceOp,
      reduceProgramMetadata: ProgramMetadata): ProgramInfo => {
       const outputShape: number[] = [];
       const iRank = inputs[0].dims.length || 1;
diff --git a/js/web/lib/onnxjs/backends/webgl/ops/shape.ts b/js/web/lib/onnxjs/backends/webgl/ops/shape.ts
index 51acf5042d8bd..c2d703ed04fa0 100644
--- a/js/web/lib/onnxjs/backends/webgl/ops/shape.ts
+++ b/js/web/lib/onnxjs/backends/webgl/ops/shape.ts
@@ -4,7 +4,7 @@
 import {Tensor} from '../../../tensor';
 import {WebGLInferenceHandler} from '../inference-handler';
 
-export const shape = (inferenceHandler: WebGLInferenceHandler, inputs: Tensor[]): Tensor[] => {
+export const shape = (_inferenceHandler: WebGLInferenceHandler, inputs: Tensor[]): Tensor[] => {
   validateInputs(inputs);
   return [new Tensor([inputs[0].dims.length], 'int32', undefined, undefined, new Int32Array(inputs[0].dims))];
 };
@@ -13,4 +13,4 @@ const validateInputs = (inputs: Tensor[]): void => {
   if (!inputs || inputs.length !== 1) {
     throw new Error('Shape requires 1 input.');
   }
-};
\ No newline at end of file
+};
diff --git a/js/web/lib/onnxjs/backends/webgl/ops/slice.ts b/js/web/lib/onnxjs/backends/webgl/ops/slice.ts
index d32a76bbc8628..81fc1b7076fdb 100644
--- a/js/web/lib/onnxjs/backends/webgl/ops/slice.ts
+++ b/js/web/lib/onnxjs/backends/webgl/ops/slice.ts
@@ -42,8 +42,8 @@ export const parseSliceAttributes: OperatorInitialization<SliceAttributes> = (no
 };
 
 const createSliceProgramInfo =
-    (inferenceHandler: WebGLInferenceHandler, input: Tensor, attributes: SliceAttributes): ProgramInfo => {
-      const axes = (attributes.axes.length === 0) ? input.dims.slice(0).map((val, i) => i) : attributes.axes;
+    (_inferenceHandler: WebGLInferenceHandler, input: Tensor, attributes: SliceAttributes): ProgramInfo => {
+      const axes = (attributes.axes.length === 0) ? input.dims.slice(0).map((_val, i) => i) : attributes.axes;
       const normalizedAxes = ShapeUtil.normalizeAxes(axes, input.dims.length);
       const starts = attributes.starts.map((start, i) => {
         if (start > input.dims[normalizedAxes[i]] - 1) {
diff --git a/js/web/lib/onnxjs/backends/webgl/ops/split.ts b/js/web/lib/onnxjs/backends/webgl/ops/split.ts
index d1bd00d47eebd..2ab14563d80e2 100644
--- a/js/web/lib/onnxjs/backends/webgl/ops/split.ts
+++ b/js/web/lib/onnxjs/backends/webgl/ops/split.ts
@@ -49,13 +49,13 @@ export const parseSplitAttributes: OperatorInitialization<SplitAttributes> = (no
 };
 
 const getProgramCount =
-    (inferenceHandler: WebGLInferenceHandler, inputs: Tensor[], axis: number, attributes: SplitAttributes): number => {
+    (_inferenceHandler: WebGLInferenceHandler, inputs: Tensor[], axis: number, attributes: SplitAttributes): number => {
       const [, offsets] = SplitUtil.splitShape(inputs[0].dims, axis, attributes.split, attributes.numOutputs);
       return offsets.length;
     };
 
 const createSplitProgramInfo =
-    (inferenceHandler: WebGLInferenceHandler, input: Tensor, attributes: SplitAttributes, axis: number, index: number):
+    (_inferenceHandler: WebGLInferenceHandler, input: Tensor, attributes: SplitAttributes, axis: number, index: number):
         ProgramInfo => {
           const [shapes, offsets] = SplitUtil.splitShape(input.dims, axis, attributes.split, attributes.numOutputs);
           const offset = offsets[index];
diff --git a/js/web/lib/onnxjs/backends/webgl/ops/sum.ts b/js/web/lib/onnxjs/backends/webgl/ops/sum.ts
index c05286d16f936..2c25b10c5872c 100644
--- a/js/web/lib/onnxjs/backends/webgl/ops/sum.ts
+++ b/js/web/lib/onnxjs/backends/webgl/ops/sum.ts
@@ -11,7 +11,7 @@ export const sum = (inferenceHandler: WebGLInferenceHandler, inputs: Tensor[]):
 
   const sumProgramMetadata = {
     name: 'Sum',
-    inputNames: inputs.map((v, i) => `X${i}`),
+    inputNames: inputs.map((_v, i) => `X${i}`),
     inputTypes: new Array(inputs.length).fill(TextureType.unpacked)
   };
 
@@ -24,7 +24,7 @@ const createSumProgramInfo =
     (inferenceHandler: WebGLInferenceHandler, inputs: Tensor[], sumProgramMetadata: ProgramMetadata): ProgramInfo => {
       const glsl = getGlsl(inferenceHandler.session.backend.glContext.version);
       const outputShape = inputs[0].dims.slice();
-      const sumLine = inputs.map((v, i) => `${glsl.texture2D}(X${i},TexCoords)`).join(' + ');
+      const sumLine = inputs.map((_v, i) => `${glsl.texture2D}(X${i},TexCoords)`).join(' + ');
       const shaderSource = `
       void main() {
         vec4 result = ${sumLine};
@@ -65,4 +65,4 @@ const validateInputs = (inputs: Tensor[]): void => {
       throw new Error('Input types are not matched.');
     }
   }
-};
\ No newline at end of file
+};
diff --git a/js/web/lib/onnxjs/backends/webgl/ops/tile.ts b/js/web/lib/onnxjs/backends/webgl/ops/tile.ts
index 42128c7abc48c..1d2cba7d9d75f 100644
--- a/js/web/lib/onnxjs/backends/webgl/ops/tile.ts
+++ b/js/web/lib/onnxjs/backends/webgl/ops/tile.ts
@@ -22,7 +22,7 @@ export const tile = (inferenceHandler: WebGLInferenceHandler, inputs: Tensor[]):
 };
 
 const createTileProgramInfo =
-    (handler: WebGLInferenceHandler, inputs: Tensor[], tileProgramMetadata: ProgramMetadata): ProgramInfo => {
+    (_handler: WebGLInferenceHandler, inputs: Tensor[], tileProgramMetadata: ProgramMetadata): ProgramInfo => {
       const inputShape = inputs[0].dims.slice();
       const outputShape = new Array(inputShape.length);
 
@@ -63,4 +63,4 @@ const validateInputs = (inputs: Tensor[]): void => {
   if (inputs[1].type !== 'int32' && inputs[1].type !== 'int16') {
     throw new Error('Invalid repeat type.');
   }
-};
\ No newline at end of file
+};
diff --git a/js/web/lib/onnxjs/backends/webgl/ops/transpose.ts b/js/web/lib/onnxjs/backends/webgl/ops/transpose.ts
index 815ff13f1f925..d3e7b3c0823be 100644
--- a/js/web/lib/onnxjs/backends/webgl/ops/transpose.ts
+++ b/js/web/lib/onnxjs/backends/webgl/ops/transpose.ts
@@ -36,7 +36,7 @@ export const parseTransposeAttributes: OperatorInitialization<TransposeAttribute
     (node: Graph.Node): TransposeAttributes => createAttributeWithCacheKey({perm: node.attributes.getInts('perm', [])});
 
 const createTransposeProgramInfo =
-    (inferenceHandler: WebGLInferenceHandler, input: Tensor, perm: number[]): ProgramInfo => {
+    (_inferenceHandler: WebGLInferenceHandler, input: Tensor, perm: number[]): ProgramInfo => {
       const inputShape = input.dims;
       perm = getAdjustedPerm(inputShape, perm);
       const unpackedOutputShape = getOutputShape(inputShape, perm);
diff --git a/js/web/lib/onnxjs/backends/webgl/texture-data-encoder.ts b/js/web/lib/onnxjs/backends/webgl/texture-data-encoder.ts
index 6ddd420c6fa81..4b0cf3f037921 100644
--- a/js/web/lib/onnxjs/backends/webgl/texture-data-encoder.ts
+++ b/js/web/lib/onnxjs/backends/webgl/texture-data-encoder.ts
@@ -11,14 +11,15 @@ export declare namespace Encoder {
   }
   export type DataType = keyof DataTypeMap;
   type DataArrayType = DataTypeMap[DataType];
+}
 
-  /* eslint-disable @typescript-eslint/naming-convention */
-  export const enum Usage {
-    Default = 0,
-    UploadOnly,
-    Download4BytesAsFloat32,
-  }
+/* eslint-disable @typescript-eslint/naming-convention */
+export const enum EncoderUsage {
+  Default = 0,
+  UploadOnly,
+  Download4BytesAsFloat32,
 }
+/* eslint-enable @typescript-eslint/naming-convention */
 
 /**
  * Abstraction for mapping data types to texture texlets
@@ -81,7 +82,7 @@ export class RedFloat32DataEncoder implements DataEncoder {
   }
   decode(buffer: Encoder.DataArrayType, dataSize: number): Float32Array {
     if (this.channelSize === 1) {
-      const filteredData = (buffer as Float32Array).filter((value, index) => index % 4 === 0).subarray(0, dataSize);
+      const filteredData = (buffer as Float32Array).filter((_value, index) => index % 4 === 0).subarray(0, dataSize);
       return filteredData;
     }
     return buffer.subarray(0, dataSize) as Float32Array;
@@ -118,7 +119,7 @@ export class RGBAFloatDataEncoder implements DataEncoder {
   }
   decode(buffer: Encoder.DataArrayType, dataSize: number): Float32Array {
     if (this.channelSize === 1) {
-      const filteredData = (buffer as Float32Array).filter((value, index) => index % 4 === 0).subarray(0, dataSize);
+      const filteredData = (buffer as Float32Array).filter((_value, index) => index % 4 === 0).subarray(0, dataSize);
       return filteredData;
     }
     return buffer.subarray(0, dataSize) as Float32Array;
diff --git a/js/web/lib/onnxjs/backends/webgl/texture-layout-strategy.ts b/js/web/lib/onnxjs/backends/webgl/texture-layout-strategy.ts
index c89ef3d23638d..f8e370747928c 100644
--- a/js/web/lib/onnxjs/backends/webgl/texture-layout-strategy.ts
+++ b/js/web/lib/onnxjs/backends/webgl/texture-layout-strategy.ts
@@ -105,7 +105,7 @@ export class PreferLogicalStrategy implements TextureLayoutStrategy {
       // tensor has 3 rows, we pretend it has 4 rows in order to account for the
       // fact that the texels containing the third row are half empty.
       logShape = logShape.map(
-          (d, i) => i >= logShape.length - 2 ? (logShape[i] % 2 === 0 ? logShape[i] : logShape[i] + 1) : logShape[i]);
+          (_d, i) => i >= logShape.length - 2 ? (logShape[i] % 2 === 0 ? logShape[i] : logShape[i] + 1) : logShape[i]);
 
       // Packed texture height is at least 2 (the channel height of a single
       // texel).
@@ -182,7 +182,7 @@ export function parseAxisParam(axis: number|number[], shape: number[]): number[]
   const rank = shape.length;
 
   // Normalize input
-  axis = axis == null ? shape.map((s, i) => i) : ([] as number[]).concat(axis);
+  axis = axis == null ? shape.map((_s, i) => i) : ([] as number[]).concat(axis);
 
   // Check for valid range
   assert(
diff --git a/js/web/lib/onnxjs/backends/webgl/texture-manager.ts b/js/web/lib/onnxjs/backends/webgl/texture-manager.ts
index 36c7fe7603aa0..effb65288dc1c 100644
--- a/js/web/lib/onnxjs/backends/webgl/texture-manager.ts
+++ b/js/web/lib/onnxjs/backends/webgl/texture-manager.ts
@@ -4,7 +4,7 @@
 import {Logger, Profiler} from '../../instrument';
 import {Tensor} from '../../tensor';
 
-import {Encoder} from './texture-data-encoder';
+import {Encoder, EncoderUsage} from './texture-data-encoder';
 import {TextureLayoutStrategy} from './texture-layout-strategy';
 import {TextureData, TextureLayout} from './types';
 import {WebGLContext} from './webgl-context';
@@ -39,11 +39,11 @@ export class TextureManager {
     }
   }
   createTextureFromLayout(
-      dataType: Tensor.DataType, layout: TextureLayout, data?: Tensor.NumberType, usage?: Encoder.Usage) {
+      dataType: Tensor.DataType, layout: TextureLayout, data?: Tensor.NumberType, usage?: EncoderUsage) {
     const textureDataType = this.toEncoderType(dataType);
 
     const encoder = this.glContext.getEncoder(textureDataType, layout.channels || 1, usage);
-    if (layout.isPacked && usage === Encoder.Usage.UploadOnly) {
+    if (layout.isPacked && usage === EncoderUsage.UploadOnly) {
       throw new Error('not implemented');
     }
     const width = layout.width;
@@ -63,7 +63,7 @@ export class TextureManager {
       if (idleTextures && idleTextures.length > 0) {
         const texture = idleTextures.pop()!;
         inUseTextures.push(texture);
-        if (usage === Encoder.Usage.UploadOnly) {
+        if (usage === EncoderUsage.UploadOnly) {
           this.glContext.updateTexture(texture, width, height, encoder, this.toTextureData(dataType, data)!);
         }
         return texture;
@@ -172,7 +172,7 @@ export class TextureManager {
         throw new Error(`TensorData type ${dataType} is not supported`);
     }
   }
-  toTextureData(dataType: Tensor.DataType, data: Tensor.NumberType|undefined): Encoder.DataArrayType|undefined {
+  toTextureData(_dataType: Tensor.DataType, data: Tensor.NumberType|undefined): Encoder.DataArrayType|undefined {
     if (!data) {
       return undefined;
     }
diff --git a/js/web/lib/onnxjs/backends/webgl/webgl-context.ts b/js/web/lib/onnxjs/backends/webgl/webgl-context.ts
index 2d5b844c57f60..744f206e38334 100644
--- a/js/web/lib/onnxjs/backends/webgl/webgl-context.ts
+++ b/js/web/lib/onnxjs/backends/webgl/webgl-context.ts
@@ -4,7 +4,7 @@
 import {env} from 'onnxruntime-common';
 
 import * as DataEncoders from './texture-data-encoder';
-import {DataEncoder, Encoder} from './texture-data-encoder';
+import {DataEncoder, Encoder, EncoderUsage} from './texture-data-encoder';
 import {repeatedTry} from './utils';
 
 export interface FenceContext {
@@ -257,14 +257,14 @@ ${shaderSource}`);
   deleteProgram(program: WebGLProgram): void {
     this.gl.deleteProgram(program);
   }
-  getEncoder(dataType: Encoder.DataType, channels: number, usage: Encoder.Usage = Encoder.Usage.Default): DataEncoder {
+  getEncoder(dataType: Encoder.DataType, channels: number, usage: EncoderUsage = EncoderUsage.Default): DataEncoder {
     if (this.version === 2) {
       return new DataEncoders.RedFloat32DataEncoder(this.gl as WebGL2RenderingContext, channels);
     }
 
     switch (dataType) {
       case 'float':
-        if (usage === Encoder.Usage.UploadOnly || this.isRenderFloat32Supported) {
+        if (usage === EncoderUsage.UploadOnly || this.isRenderFloat32Supported) {
           return new DataEncoders.RGBAFloatDataEncoder(this.gl, channels);
         } else {
           return new DataEncoders.RGBAFloatDataEncoder(
diff --git a/js/web/lib/onnxjs/execution-plan.ts b/js/web/lib/onnxjs/execution-plan.ts
index b95e639817dbf..5599087ab46f5 100644
--- a/js/web/lib/onnxjs/execution-plan.ts
+++ b/js/web/lib/onnxjs/execution-plan.ts
@@ -114,7 +114,7 @@ export class ExecutionPlan {
 
         // resolve downstream nodes
         const downstreamNodes = new Set<number>();
-        outputList.forEach((output, i) => {
+        outputList.forEach((_output, i) => {
           const j = thisOp.node.outputs[i];
           for (const currentDownstreamNodeIndex of graphValues[j].to) {
             const currentDownstreamNode = graphNodes[currentDownstreamNodeIndex];
diff --git a/js/web/lib/onnxjs/instrument.ts b/js/web/lib/onnxjs/instrument.ts
index 4c543cab157d7..4f865503d50ec 100644
--- a/js/web/lib/onnxjs/instrument.ts
+++ b/js/web/lib/onnxjs/instrument.ts
@@ -176,7 +176,7 @@ function createCategorizedLogger(category: string): Logger.CategorizedLogger {
 // NOTE: argument 'category' is put the last parameter beacause typescript
 // doesn't allow optional argument put in front of required argument. This
 // order is different from a usual logging API.
-function logInternal(severity: Logger.Severity, content: string, stack: number, category?: string) {
+function logInternal(severity: Logger.Severity, content: string, _stack: number, category?: string) {
   const config = LOGGER_CONFIG_MAP[category || ''] || LOGGER_CONFIG_MAP[''];
   if (SEVERITY_VALUE[severity] < SEVERITY_VALUE[config.minimalSeverity]) {
     return;
diff --git a/js/web/lib/onnxjs/session-handler.ts b/js/web/lib/onnxjs/session-handler-inference.ts
similarity index 88%
rename from js/web/lib/onnxjs/session-handler.ts
rename to js/web/lib/onnxjs/session-handler-inference.ts
index 0b06a7a747a44..47e50aeab673a 100644
--- a/js/web/lib/onnxjs/session-handler.ts
+++ b/js/web/lib/onnxjs/session-handler-inference.ts
@@ -1,12 +1,12 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-import {InferenceSession, SessionHandler, Tensor} from 'onnxruntime-common';
+import {InferenceSession, InferenceSessionHandler, SessionHandler, Tensor} from 'onnxruntime-common';
 
 import {Session} from './session';
 import {Tensor as OnnxjsTensor} from './tensor';
 
-export class OnnxjsSessionHandler implements SessionHandler {
+export class OnnxjsSessionHandler implements InferenceSessionHandler {
   constructor(private session: Session) {
     this.inputNames = this.session.inputNames;
     this.outputNames = this.session.outputNames;
diff --git a/js/web/lib/onnxjs/session.ts b/js/web/lib/onnxjs/session.ts
index 790be3c740cd5..cf8793e1f26f5 100644
--- a/js/web/lib/onnxjs/session.ts
+++ b/js/web/lib/onnxjs/session.ts
@@ -1,8 +1,7 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-import {readFile} from 'fs';
-import {promisify} from 'util';
+import {readFile} from 'node:fs/promises';
 
 import {resolveBackend, SessionHandlerType} from './backend';
 import {ExecutionPlan} from './execution-plan';
@@ -62,7 +61,7 @@ export class Session {
         const isOrtFormat = arg.endsWith('.ort');
         if (typeof process !== 'undefined' && process.versions && process.versions.node) {
           // node
-          const buf = await promisify(readFile)(arg);
+          const buf = await readFile(arg);
           this.initialize(buf, isOrtFormat);
         } else {
           // browser
diff --git a/js/web/lib/onnxjs/util.ts b/js/web/lib/onnxjs/util.ts
index 0a76d75e79bbf..d697a8b3138cf 100644
--- a/js/web/lib/onnxjs/util.ts
+++ b/js/web/lib/onnxjs/util.ts
@@ -967,7 +967,7 @@ export class ReduceUtil {
     const dims = a.dims.slice(0);
     // if axes is not set, perform reduce on all axes
     if (axes.length === 0) {
-      dims.forEach((d, ind) => axes.push(ind));
+      dims.forEach((_d, ind) => axes.push(ind));
     }
     // get a temporary broadcastable output shape
     const outputDims = ReduceUtil.calcReduceShape(dims, axes, true);
diff --git a/js/web/lib/wasm/binding/ort-wasm.d.ts b/js/web/lib/wasm/binding/ort-wasm.d.ts
index 59da1369e152e..00431a4e86d5b 100644
--- a/js/web/lib/wasm/binding/ort-wasm.d.ts
+++ b/js/web/lib/wasm/binding/ort-wasm.d.ts
@@ -1,6 +1,8 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+import type {Tensor} from 'onnxruntime-common';
+
 export declare namespace JSEP {
   type BackendType = unknown;
   type AllocFunction = (size: number) => number;
@@ -9,11 +11,8 @@ export declare namespace JSEP {
   type DownloadFunction = (gpuDataId: number, dataOffset: number, size: number) => Promise<void>;
   type CreateKernelFunction = (name: string, kernel: number, attribute: unknown) => void;
   type ReleaseKernelFunction = (kernel: number) => void;
-  type RunFunction = (kernel: number, contextDataOffset: number, sessionState: SessionState) => number;
-  export interface SessionState {
-    sessionId: number;
-    errors: Array<Promise<string|null>>;
-  }
+  type RunFunction =
+      (kernel: number, contextDataOffset: number, sessionHandle: number, errors: Array<Promise<string|null>>) => number;
 }
 
 export interface OrtWasmModule extends EmscriptenModule {
@@ -40,14 +39,23 @@ export interface OrtWasmModule extends EmscriptenModule {
 
   _OrtFree(stringHandle: number): void;
 
-  _OrtCreateTensor(dataType: number, dataOffset: number, dataLength: number, dimsOffset: number, dimsLength: number):
-      number;
+  _OrtCreateTensor(
+      dataType: number, dataOffset: number, dataLength: number, dimsOffset: number, dimsLength: number,
+      dataLocation: number): number;
   _OrtGetTensorData(tensorHandle: number, dataType: number, dataOffset: number, dimsOffset: number, dimsLength: number):
       number;
   _OrtReleaseTensor(tensorHandle: number): void;
+  _OrtCreateBinding(sessionHandle: number): number;
+  _OrtBindInput(bindingHandle: number, nameOffset: number, tensorHandle: number): Promise<number>;
+  _OrtBindOutput(bindingHandle: number, nameOffset: number, tensorHandle: number, location: number): number;
+  _OrtClearBoundOutputs(ioBindingHandle: number): void;
+  _OrtReleaseBinding(ioBindingHandle: number): void;
+  _OrtRunWithBinding(
+      sessionHandle: number, ioBindingHandle: number, outputCount: number, outputsOffset: number,
+      runOptionsHandle: number): Promise<number>;
   _OrtRun(
       sessionHandle: number, inputNamesOffset: number, inputsOffset: number, inputCount: number,
-      outputNamesOffset: number, outputCount: number, outputsOffset: number, runOptionsHandle: number): number;
+      outputNamesOffset: number, outputCount: number, outputsOffset: number, runOptionsHandle: number): Promise<number>;
 
   _OrtCreateSessionOptions(
       graphOptimizationLevel: number, enableCpuMemArena: boolean, enableMemPattern: boolean, executionMode: number,
@@ -94,6 +102,11 @@ export interface OrtWasmModule extends EmscriptenModule {
   _OrtTrainingCopyParametersFromBuffer?
       (trainingHandle: number, parametersBuffer: number, parameterCount: number, trainableOnly: boolean): number;
 
+  _OrtTrainingGetModelInputOutputCount?
+      (trainingHandle: number, inputCount: number, outputCount: number, isEvalModel: boolean): number;
+  _OrtTrainingGetModelInputOutputName?
+      (trainingHandle: number, index: number, isInput: boolean, isEvalModel: boolean): number;
+
   _OrtTrainingReleaseSession?(trainingHandle: number): void;
   // #endregion
 
@@ -102,17 +115,67 @@ export interface OrtWasmModule extends EmscriptenModule {
   // #endregion
 
   // #region JSEP
+  /**
+   * This is the entry of JSEP initialization. This function is called once when initializing ONNX Runtime.
+   * This function initializes WebGPU backend and registers a few callbacks that will be called in C++ code.
+   */
   jsepInit?
       (backend: JSEP.BackendType, alloc: JSEP.AllocFunction, free: JSEP.FreeFunction, upload: JSEP.UploadFunction,
        download: JSEP.DownloadFunction, createKernel: JSEP.CreateKernelFunction,
        releaseKernel: JSEP.ReleaseKernelFunction, run: JSEP.RunFunction): void;
 
+  /**
+   * [exported from wasm] Specify a kernel's output when running OpKernel::Compute().
+   *
+   * @param context - specify the kernel context pointer.
+   * @param index - specify the index of the output.
+   * @param data - specify the pointer to encoded data of type and dims.
+   */
   _JsepOutput(context: number, index: number, data: number): number;
+  /**
+   * [exported from wasm] Get name of an operator node.
+   *
+   * @param kernel - specify the kernel pointer.
+   * @returns the pointer to a C-style UTF8 encoded string representing the node name.
+   */
   _JsepGetNodeName(kernel: number): number;
 
-  jsepOnRunStart?(sessionId: number): void;
-  jsepOnRunEnd?(sessionId: number): Promise<void>;
-  jsepRunPromise?: Promise<number>;
+  /**
+   * [exported from js_internal_api.js] Register a user GPU buffer for usage of a session's input or output.
+   *
+   * @param sessionId - specify the session ID.
+   * @param index - specify an integer to represent which input/output it is registering for. For input, it is the
+   *     input_index corresponding to the session's inputNames. For output, it is the inputCount + output_index
+   *     corresponding to the session's ouputNames.
+   * @param buffer - specify the GPU buffer to register.
+   * @param size - specify the original data size in byte.
+   * @returns the GPU data ID for the registered GPU buffer.
+   */
+  jsepRegisterBuffer: (sessionId: number, index: number, buffer: GPUBuffer, size: number) => number;
+  /**
+   * [exported from js_internal_api.js] Unregister all user GPU buffers for a session.
+   *
+   * @param sessionId - specify the session ID.
+   */
+  jsepUnregisterBuffers?: (sessionId: number) => void;
+  /**
+   * [exported from js_internal_api.js] Get the GPU buffer by GPU data ID.
+   *
+   * @param dataId - specify the GPU data ID
+   * @returns the GPU buffer.
+   */
+  jsepGetBuffer: (dataId: number) => GPUBuffer;
+  /**
+   * [exported from js_internal_api.js] Create a function to be used to create a GPU Tensor.
+   *
+   * @param gpuBuffer - specify the GPU buffer
+   * @param size - specify the original data size in byte.
+   * @param type - specify the tensor type.
+   * @returns the generated downloader function.
+   */
+  jsepCreateDownloader:
+      (gpuBuffer: GPUBuffer, size: number,
+       type: Tensor.GpuBufferDataTypes) => () => Promise<Tensor.DataTypeMap[Tensor.GpuBufferDataTypes]>;
   // #endregion
 }
 
diff --git a/js/web/lib/wasm/jsep/backend-webgpu.ts b/js/web/lib/wasm/jsep/backend-webgpu.ts
index 5e77a0343b4ee..e2c2bc8deccf4 100644
--- a/js/web/lib/wasm/jsep/backend-webgpu.ts
+++ b/js/web/lib/wasm/jsep/backend-webgpu.ts
@@ -1,14 +1,51 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-import {Env} from 'onnxruntime-common';
+import {Env, Tensor} from 'onnxruntime-common';
 
 import {configureLogger, LOG_DEBUG} from './log';
-import {TensorView} from './tensor-view';
-import {createGpuDataManager, GpuDataManager} from './webgpu/gpu-data-manager';
+import {createView, TensorView} from './tensor-view';
+import {createGpuDataManager, downloadGpuData, GpuDataManager} from './webgpu/gpu-data-manager';
 import {RunFunction, WEBGPU_OP_RESOLVE_RULES} from './webgpu/op-resolve-rules';
 import {ProgramManager} from './webgpu/program-manager';
-import {ComputeContext, GpuData, ProgramInfo, ProgramInfoLoader} from './webgpu/types';
+import {ComputeContext, GpuData, ProgramInfo, ProgramInputTensorInfoDependency} from './webgpu/types';
+
+const getProgramInputTensorInfoDependencyKey =
+    (inputTensors: readonly TensorView[], inputDependencies: readonly ProgramInputTensorInfoDependency[]): string => {
+      if (inputDependencies.length !== inputTensors.length) {
+        throw new Error(`inputDependencies length ${inputDependencies.length} is not equal to inputTensors length ${
+            inputTensors.length}.`);
+      }
+
+      const inputInfos: string[] = [];
+      for (let i = 0; i < inputTensors.length; ++i) {
+        const type = inputTensors[i].dataType;
+        switch (inputDependencies[i]) {
+          case 'none': {
+            inputInfos.push('');
+            break;
+          }
+          case 'type': {
+            inputInfos.push(`${type}`);
+            break;
+          }
+          case 'rank': {
+            const rank = inputTensors[i].dims.length;
+            inputInfos.push(`${type};${rank}`);
+            break;
+          }
+          case 'dims': {
+            const dims = inputTensors[i].dims.join(',');
+            inputInfos.push(`${type};${dims}`);
+            break;
+          }
+          default:
+            throw new Error(`unsupported input dependency: ${inputDependencies[i]}`);
+        }
+      }
+
+      return inputInfos.join('|');
+    };
 
 /**
  * get a unique key representing the program from the program info, input shapes and types.
@@ -18,15 +55,19 @@ import {ComputeContext, GpuData, ProgramInfo, ProgramInfoLoader} from './webgpu/
  *
  */
 const getProgramInfoUniqueKey =
-    (programInfo: ProgramInfo|ProgramInfoLoader, inputTensors: readonly TensorView[]): string => {
+    (programInfo: ProgramInfo, inputTensors: readonly TensorView[], is1DimensionDispatch: boolean): string => {
       // final key format:
-      // <PROGRAM_NAME>[<PROGRAM_CUSTOM_CACHE_HINT>]:<INPUTS_INFO_0>|<INPUTS_INFO_1>|...
-      const inputInfos = inputTensors.map(tensor => `${tensor.dataType};${tensor.dims.join(',')}`).join('|');
+      // <PROGRAM_NAME>[<PROGRAM_CUSTOM_CACHE_HINT>]:is1DimensionDispatch:<INPUTS_INFO_0>|<INPUTS_INFO_1>|...
       let key = programInfo.name;
-      if (programInfo.cacheHint) {
-        key += '[' + programInfo.cacheHint + ']';
+      if (programInfo.shaderCache?.hint) {
+        key += '[' + programInfo.shaderCache.hint + ']';
       }
-      key += ':' + inputInfos;
+      key += ':' + is1DimensionDispatch +
+          `:${
+                 getProgramInputTensorInfoDependencyKey(
+                     inputTensors,
+                     programInfo.shaderCache?.inputDependencies ??
+                         new Array<ProgramInputTensorInfoDependency>(inputTensors.length).fill('dims'))}`;
       return key;
     };
 
@@ -87,17 +128,22 @@ export class WebGpuBackend {
    */
   kernels: Map<number, [string, string, RunFunction, [((attribute: unknown) => unknown) | undefined, unknown]]>;
 
-  commandEncoder: GPUCommandEncoder|null = null;
-  computePassEncoder: GPUComputePassEncoder|null = null;
+  private commandEncoder: GPUCommandEncoder|null = null;
+  private computePassEncoder: GPUComputePassEncoder|null = null;
   pendingDispatchNumber = 0;
 
-  supportTimestampQuery = false;
-  profilingQuerySet: GPUQuerySet;
-  profilingQueryData: GpuData;
-  profilingTimeBase?: bigint;
+  queryData?: GpuData;
+  querySet?: GPUQuerySet;
+  querySetCount = 2;
+  queryTimeBase?: bigint;
 
   env: Env;
 
+  /**
+   * a SessionID -> a Map of (InputOutputIndex -> [ID, GPUBuffer]) mapping.
+   */
+  sessionExternalDataMapping: Map<number, Map<number, [number, GPUBuffer]>> = new Map();
+
   async initialize(env: Env): Promise<void> {
     if (!navigator.gpu) {
       // WebGPU is not available.
@@ -124,11 +170,9 @@ export class WebGpuBackend {
       },
       requiredFeatures,
     };
-    // WebGPU Spec: Timestamp Queries Inside Passes
-    // https://github.com/gpuweb/gpuweb/blob/main/proposals/timestamp-query-inside-passes.md
-    if (adapter.features.has('timestamp-query-inside-passes')) {
-      this.supportTimestampQuery = true;
-      requiredFeatures.push('timestamp-query-inside-passes' as GPUFeatureName);
+
+    if (adapter.features.has('timestamp-query')) {
+      requiredFeatures.push('timestamp-query');
     }
     if (adapter.features.has('shader-f16')) {
       requiredFeatures.push('shader-f16');
@@ -153,21 +197,14 @@ export class WebGpuBackend {
       }
     };
 
-    if (this.supportTimestampQuery) {
-      this.profilingQuerySet = this.device.createQuerySet({
-        type: 'timestamp',
-        count: 2,
-      });
-    }
-
     Object.defineProperty(this.env.webgpu, 'device', {value: this.device});
   }
 
   dispose(): void {
-    // currently, we do not do anything in this function. In all known use cases, we don't have the requirement to
-    // actually dispose the WebGpuBackend instance, because it's always used as a singleton.
-    //
-    // revisit this place if we get real requirement to dispose the instance.
+    if (typeof this.querySet !== 'undefined') {
+      this.querySet.destroy();
+    }
+    this.gpuDataManager.dispose();
   }
 
   getCommandEncoder(): GPUCommandEncoder {
@@ -179,7 +216,22 @@ export class WebGpuBackend {
 
   getComputePassEncoder(): GPUComputePassEncoder {
     if (!this.computePassEncoder) {
-      this.computePassEncoder = this.getCommandEncoder().beginComputePass();
+      const computePassDescriptor: GPUComputePassDescriptor = {};
+      if (this.isQueryEnabled()) {
+        if (typeof this.querySet === 'undefined') {
+          this.querySet = this.device.createQuerySet({
+            type: 'timestamp',
+            count: this.querySetCount,
+          });
+        }
+        computePassDescriptor.timestampWrites = {
+          querySet: this.querySet,
+          beginningOfPassWriteIndex: 0,
+          endOfPassWriteIndex: 1,
+        };
+      }
+
+      this.computePassEncoder = this.getCommandEncoder().beginComputePass(computePassDescriptor);
     }
     return this.computePassEncoder;
   }
@@ -192,18 +244,27 @@ export class WebGpuBackend {
   }
 
   flush(): void {
-    this.endComputePass();
-    this.device.queue.submit([this.getCommandEncoder().finish()]);
-    this.gpuDataManager.refreshPendingBuffers();
-    this.commandEncoder = null;
-    this.pendingDispatchNumber = 0;
+    if (this.commandEncoder) {
+      this.endComputePass();
+      this.device.queue.submit([this.getCommandEncoder().finish()]);
+      this.gpuDataManager.refreshPendingBuffers();
+      this.commandEncoder = null;
+      this.pendingDispatchNumber = 0;
+    }
+  }
+
+  isQueryEnabled(): boolean {
+    if (this.device.features.has('timestamp-query') && this.env.webgpu.profilingMode === 'default') {
+      return true;
+    } else {
+      return false;
+    }
   }
 
   /**
    * run a WebGPU program.
-   * @param program either a ProgramInfo instance containing metadata including the shader code, or a function that
-   * can be called and return a ProgramInfo instance
-   * @param inputs a TensorView array. each element represents a value already exists in GPU.
+   * @param program a ProgramInfo instance
+   * @param inputTensorViews a TensorView array. each element represents a value already exists in GPU.
    * @param outputIndices an indices array. each element can be either -1 (temporary data), -2 (persistent data) or an
    * index to the kernel's output.
    * @param createKernelOutput a callback function that create a value to kernel's output with the given index
@@ -211,45 +272,36 @@ export class WebGpuBackend {
    * or persistent (owned by the current kernel)
    * @returns a TensorView array representing the result.
    */
-  run(program: ProgramInfoLoader|ProgramInfo, inputs: readonly TensorView[], outputIndices: readonly number[],
+  run(program: ProgramInfo, inputTensorViews: readonly TensorView[], outputIndices: readonly number[],
       createKernelOutput: (index: number, dataType: number, dims: readonly number[]) => TensorView,
       createIntermediateOutput: (dataType: number, dims: readonly number[]) => TensorView): TensorView[] {
-    if (inputs.length !== program.inputTypes.length) {
-      throw new Error(`Input size must be equal to ${program.inputTypes.length}.`);
-    }
-
     // create info for inputs
     const inputDatas: GpuData[] = [];
-    for (let i = 0; i < inputs.length; ++i) {
-      const gpuData = this.gpuDataManager.get(inputs[i].data);
+    for (let i = 0; i < inputTensorViews.length; ++i) {
+      const gpuData = this.gpuDataManager.get(inputTensorViews[i].data);
       if (!gpuData) {
-        throw new Error(`no GPU data for input: ${inputs[i].data}`);
+        throw new Error(`no GPU data for input: ${inputTensorViews[i].data}`);
       }
       inputDatas[i] = gpuData;
     }
 
-    const key = getProgramInfoUniqueKey(program, inputs);
-    let artifact = this.programManager.getArtifact(key);
-    const programInfo = artifact ?
-        artifact.programInfo :
-        (typeof (program as ProgramInfoLoader).get === 'function' ? (program as ProgramInfoLoader).get() :
-                                                                    (program as ProgramInfo));
+    const {outputs, dispatchGroup, programUniforms} = program.getRunData(inputTensorViews);
 
     // check output indices
-    const validatedOutputIndices = outputIndices.length === 0 ? programInfo.outputs.map((_, i) => i) : outputIndices;
-    if (validatedOutputIndices.length !== programInfo.outputs.length) {
-      throw new Error(`Output size ${validatedOutputIndices.length} must be equal to ${programInfo.outputs.length}.`);
+    const validatedOutputIndices = outputIndices.length === 0 ? outputs.map((_, i) => i) : outputIndices;
+    if (validatedOutputIndices.length !== outputs.length) {
+      throw new Error(`Output size ${validatedOutputIndices.length} must be equal to ${outputs.length}.`);
     }
 
     // create info for outputs
     const outputTensorViews: TensorView[] = [];
     const outputDatas: GpuData[] = [];
-    for (let i = 0; i < programInfo.outputs.length; ++i) {
+    for (let i = 0; i < outputs.length; ++i) {
       // value -1 and -2 are used for creating temporary and persistent outputs.
       // value -3 is used for placeholder output. So -3, -2, -1 and 0, 1, 2, ... are valid
       // output indices. see type definition of ComputeContextInputsOutputsMapping for more details.
       if (!Number.isInteger(validatedOutputIndices[i]) || validatedOutputIndices[i] < -3 ||
-          validatedOutputIndices[i] >= programInfo.outputs.length) {
+          validatedOutputIndices[i] >= outputs.length) {
         throw new Error(`Invalid output index: ${validatedOutputIndices[i]}`);
       }
       if (validatedOutputIndices[i] === -3) {
@@ -258,8 +310,8 @@ export class WebGpuBackend {
       const isTemporary = validatedOutputIndices[i] === -1;
       const isPersistent = validatedOutputIndices[i] === -2;
       const tensorView = (isTemporary || isPersistent) ?
-          createIntermediateOutput(programInfo.outputs[i].dataType, programInfo.outputs[i].dims) :
-          createKernelOutput(validatedOutputIndices[i], programInfo.outputs[i].dataType, programInfo.outputs[i].dims);
+          createIntermediateOutput(outputs[i].dataType, outputs[i].dims) :
+          createKernelOutput(validatedOutputIndices[i], outputs[i].dataType, outputs[i].dims);
       const gpuData = this.gpuDataManager.get(tensorView.data);
       if (!gpuData) {
         throw new Error(`no GPU data for output: ${tensorView.data}`);
@@ -279,18 +331,97 @@ export class WebGpuBackend {
       outputDatas.push(gpuData);
     }
 
-    const normalizedDispatchGroup = this.programManager.normalizeDispatchGroupSize(programInfo.dispatchGroup(inputs));
 
+    // load uniforms
+    // TODO: add cache for uniform (is it necessary?)
+    //
+    let uniformBufferBinding: GPUBindingResource|undefined;
+    if (programUniforms) {
+      let currentOffset = 0;
+      let preLength = 0;
+      const offsets: number[] = [];
+      let maxAlignmentOfField = 1;
+      programUniforms.forEach(v => {
+        const data = typeof v.data === 'number' ? [v.data] : v.data;
+        if (data.length === 0) {
+          return;
+        }
+        // https://www.w3.org/TR/WGSL/#alignof
+        let baseAlignment: number;
+        switch (data.length) {
+          case 1:
+            baseAlignment = 4;
+            break;
+          case 2:
+            baseAlignment = 8;
+            break;
+          case 3:
+            baseAlignment = 16;
+            break;
+          case 4:
+            baseAlignment = 16;
+            break;
+          case 5:
+            baseAlignment = 16;
+            break;
+          case 6:
+            baseAlignment = 16;
+            break;
+          default:
+            throw new Error(`unsupported data length: ${data.length}`);
+        }
+
+        if (preLength === 5 || preLength === 6) {
+          baseAlignment = 16;
+        }
+        if (baseAlignment > maxAlignmentOfField) {
+          maxAlignmentOfField = baseAlignment;
+        }
+        currentOffset = Math.ceil(currentOffset / baseAlignment) * baseAlignment;
+        preLength = data.length;
+        offsets.push(currentOffset);
+        currentOffset += data.length * 4;
+      });
+
+      currentOffset = Math.ceil(currentOffset / maxAlignmentOfField) * maxAlignmentOfField;
+      const arrayBuffer = new ArrayBuffer(currentOffset);
+      programUniforms.forEach((v, i) => {
+        const offset = offsets[i];
+        const data = typeof v.data === 'number' ? [v.data] : v.data;
+        if (v.type === 'int32') {
+          new Int32Array(arrayBuffer, offset, data.length).set(data);
+        } else if (v.type === 'uint32') {
+          new Uint32Array(arrayBuffer, offset, data.length).set(data);
+        } else {
+          new Float32Array(arrayBuffer, offset, data.length).set(data);
+        }
+      });
+
+      const uniformBufferData =
+          // eslint-disable-next-line no-bitwise
+          this.gpuDataManager.create(currentOffset, GPUBufferUsage.COPY_DST | GPUBufferUsage.UNIFORM);
+      this.device.queue.writeBuffer(uniformBufferData.buffer, 0, arrayBuffer, 0, currentOffset);
+      this.gpuDataManager.release(uniformBufferData.id);
+      uniformBufferBinding = {offset: 0, size: currentOffset, buffer: uniformBufferData.buffer};
+    }
+
+    const normalizedDispatchGroup = this.programManager.normalizeDispatchGroupSize(dispatchGroup);
+    const is1DimensionDispatch = normalizedDispatchGroup[1] === 1 && normalizedDispatchGroup[2] === 1;
+    // get program info
+    const key = getProgramInfoUniqueKey(program, inputTensorViews, is1DimensionDispatch);
+    let artifact = this.programManager.getArtifact(key);
     if (!artifact) {
-      artifact = this.programManager.build(programInfo, normalizedDispatchGroup);
+      artifact = this.programManager.build(program, normalizedDispatchGroup);
       this.programManager.setArtifact(key, artifact);
     }
 
     LOG_DEBUG(
         'info',
-        () => `[ProgramManager] run "${programInfo.name}" (key=${key}) with ${normalizedDispatchGroup[0]}x${
+        () => `[ProgramManager] run "${program.name}" (key=${key}) with ${normalizedDispatchGroup[0]}x${
             normalizedDispatchGroup[1]}x${normalizedDispatchGroup[2]}`);
-    this.programManager.run(artifact, inputs, inputDatas, outputDatas, normalizedDispatchGroup);
+    this.programManager.run(
+        artifact, inputTensorViews, outputTensorViews, inputDatas, outputDatas, normalizedDispatchGroup,
+        uniformBufferBinding);
 
     return outputTensorViews;
   }
@@ -304,12 +435,9 @@ export class WebGpuBackend {
   }
 
   async download(gpuDataId: number, getTargetBuffer: () => Uint8Array): Promise<void> {
-    const arrayBuffer = await this.gpuDataManager.download(gpuDataId);
-
     // the underlying buffer may be changed after the async function is called. so we use a getter function to make sure
     // the buffer is up-to-date.
-    const data = getTargetBuffer();
-    data.set(new Uint8Array(arrayBuffer, 0, data.byteLength));
+    await this.gpuDataManager.download(gpuDataId, getTargetBuffer);
   }
 
   alloc(size: number): number {
@@ -372,7 +500,7 @@ export class WebGpuBackend {
       kernelEntry(context, attributes[1]);
       return 0;  // ORT_OK
     } catch (e) {
-      LOG_DEBUG('warning', `[WebGPU] Kernel "[${opType}] ${nodeName}" failed. Error: ${e}`);
+      errors.push(Promise.resolve(`[WebGPU] Kernel "[${opType}] ${nodeName}" failed. ${e}`));
       return 1;  // ORT_FAIL
     } finally {
       if (useErrorScope) {
@@ -387,4 +515,40 @@ export class WebGpuBackend {
       this.currentKernelId = null;
     }
   }
+
+  // #region external buffer
+  registerBuffer(sessionId: number, index: number, buffer: GPUBuffer, size: number): number {
+    let sessionInputOutputMapping = this.sessionExternalDataMapping.get(sessionId);
+    if (!sessionInputOutputMapping) {
+      sessionInputOutputMapping = new Map();
+      this.sessionExternalDataMapping.set(sessionId, sessionInputOutputMapping);
+    }
+
+    const previousBuffer = sessionInputOutputMapping.get(index);
+    const id = this.gpuDataManager.registerExternalBuffer(buffer, size, previousBuffer?.[1]);
+    sessionInputOutputMapping.set(index, [id, buffer]);
+    return id;
+  }
+  unregisterBuffers(sessionId: number): void {
+    const sessionInputOutputMapping = this.sessionExternalDataMapping.get(sessionId);
+    if (sessionInputOutputMapping) {
+      sessionInputOutputMapping.forEach(bufferInfo => this.gpuDataManager.unregisterExternalBuffer(bufferInfo[1]));
+      this.sessionExternalDataMapping.delete(sessionId);
+    }
+  }
+  getBuffer(gpuDataId: number): GPUBuffer {
+    const gpuData = this.gpuDataManager.get(gpuDataId);
+    if (!gpuData) {
+      throw new Error(`no GPU data for buffer: ${gpuDataId}`);
+    }
+    return gpuData.buffer;
+  }
+  createDownloader(gpuBuffer: GPUBuffer, size: number, type: Tensor.GpuBufferDataTypes):
+      () => Promise<Tensor.DataType> {
+    return async () => {
+      const data = await downloadGpuData(this, gpuBuffer, size);
+      return createView(data.buffer, type);
+    };
+  }
+  // #endregion
 }
diff --git a/js/web/lib/wasm/jsep/init.ts b/js/web/lib/wasm/jsep/init.ts
index 78316cbe1c825..d66357e729d5d 100644
--- a/js/web/lib/wasm/jsep/init.ts
+++ b/js/web/lib/wasm/jsep/init.ts
@@ -3,14 +3,14 @@
 
 import {Env} from 'onnxruntime-common';
 
-import {JSEP, OrtWasmModule} from '../binding/ort-wasm';
+import {OrtWasmModule} from '../binding/ort-wasm';
 import {DataType, getTensorElementSize} from '../wasm-common';
 
 import {WebGpuBackend} from './backend-webgpu';
 import {LOG_DEBUG} from './log';
 import {TensorView} from './tensor-view';
 import {ShapeUtil} from './util';
-import {ComputeContext, ComputeContextInputsOutputsMapping, ProgramInfo, ProgramInfoLoader} from './webgpu/types';
+import {ComputeContext, ComputeContextInputsOutputsMapping, ProgramInfo} from './webgpu/types';
 
 /* eslint-disable no-bitwise */
 
@@ -90,8 +90,7 @@ class ComputeContextImpl implements ComputeContext {
     this.inputs = inputs;
   }
 
-  compute(program: ProgramInfoLoader|ProgramInfo, inputsOutputsMapping?: ComputeContextInputsOutputsMapping):
-      TensorView[] {
+  compute(program: ProgramInfo, inputsOutputsMapping?: ComputeContextInputsOutputsMapping): TensorView[] {
     // prepare inputs. inputs should always be valid data.
     const mappedInputs =
         inputsOutputsMapping?.inputs?.map(i => typeof i === 'number' ? this.inputs[i] : i) ?? this.inputs;
@@ -120,6 +119,11 @@ class ComputeContextImpl implements ComputeContext {
         this.module.HEAPU32[offset++] = dims[i];
       }
       return this.module._JsepOutput(this.opKernelContext, index, data);
+    } catch (e) {
+      throw new Error(
+          `Failed to generate kernel's output[${index}] with dims [${dims}]. ` +
+          'If you are running with pre-allocated output, please make sure the output type/dims are correct. ' +
+          `Error: ${e}`);
     } finally {
       this.module.stackRestore(stack);
     }
@@ -138,7 +142,7 @@ export const init = async(module: OrtWasmModule, env: Env): Promise<void> => {
 
     init(
         // backend
-        {backend},
+        backend,
 
         // jsepAlloc()
         (size: number) => backend.alloc(size),
@@ -178,13 +182,13 @@ export const init = async(module: OrtWasmModule, env: Env): Promise<void> => {
         (kernel: number) => backend.releaseKernel(kernel),
 
         // jsepRun
-        (kernel: number, contextDataOffset: number, sessionState: JSEP.SessionState) => {
+        (kernel: number, contextDataOffset: number, sessionHandle: number, errors: Array<Promise<string|null>>) => {
           LOG_DEBUG(
               'verbose',
-              () => `[WebGPU] jsepRun: sessionId=${sessionState.sessionId}, kernel=${kernel}, contextDataOffset=${
+              () => `[WebGPU] jsepRun: sessionHandle=${sessionHandle}, kernel=${kernel}, contextDataOffset=${
                   contextDataOffset}`);
           const context = new ComputeContextImpl(module, backend, contextDataOffset);
-          return backend.computeKernel(kernel, context, sessionState.errors);
+          return backend.computeKernel(kernel, context, errors);
         });
   }
 };
diff --git a/js/web/lib/wasm/jsep/webgpu/attribute-with-cache-key.ts b/js/web/lib/wasm/jsep/webgpu/attribute-with-cache-key.ts
index adba0fb9d022d..ad56b92c1d869 100644
--- a/js/web/lib/wasm/jsep/webgpu/attribute-with-cache-key.ts
+++ b/js/web/lib/wasm/jsep/webgpu/attribute-with-cache-key.ts
@@ -6,13 +6,13 @@ class AttributeWithCacheKeyImpl {
     Object.assign(this, attribute);
   }
 
-  private _cacheKey: string;
+  private key: string;
   public get cacheKey(): string {
-    if (!this._cacheKey) {
-      this._cacheKey =
+    if (!this.key) {
+      this.key =
           Object.getOwnPropertyNames(this).sort().map(name => `${(this as Record<string, unknown>)[name]}`).join(';');
     }
-    return this._cacheKey;
+    return this.key;
   }
 }
 
diff --git a/js/web/lib/wasm/jsep/webgpu/gpu-data-manager.ts b/js/web/lib/wasm/jsep/webgpu/gpu-data-manager.ts
index 92fdd5abc3892..6f3d9a52d9f5d 100644
--- a/js/web/lib/wasm/jsep/webgpu/gpu-data-manager.ts
+++ b/js/web/lib/wasm/jsep/webgpu/gpu-data-manager.ts
@@ -35,7 +35,7 @@ export interface GpuDataManager {
   /**
    * copy data from GPU to CPU.
    */
-  download(id: GpuDataId): Promise<ArrayBufferLike>;
+  download(id: GpuDataId, getTargetBuffer: () => Uint8Array): Promise<void>;
 
   /**
    * refresh the buffers that marked for release.
@@ -46,6 +46,19 @@ export interface GpuDataManager {
    */
   refreshPendingBuffers(): void;
 
+  /**
+   * register an external buffer for IO Binding. If the buffer is already registered, return the existing GPU data ID.
+   *
+   * GPU data manager only manages a mapping between the buffer and the GPU data ID. It will not manage the lifecycle of
+   * the external buffer.
+   */
+  registerExternalBuffer(buffer: GPUBuffer, originalSize: number, previousBuffer?: GPUBuffer): number;
+
+  /**
+   * unregister an external buffer for IO Binding.
+   */
+  unregisterExternalBuffer(buffer: GPUBuffer): void;
+
   /**
    * destroy all gpu buffers. Call this when the session.release is called.
    */
@@ -62,12 +75,56 @@ interface StorageCacheValue {
  */
 const calcNormalizedBufferSize = (size: number) => Math.ceil(size / 16) * 16;
 
-let guid = 0;
+let guid = 1;
 const createNewGpuDataId = () => guid++;
 
+/**
+ * exported standard download function. This function is used by the session to download the data from GPU, and also by
+ * factory to create GPU tensors with the capacity of downloading data from GPU.
+ *
+ * @param backend - the WebGPU backend
+ * @param gpuBuffer - the GPU buffer to download
+ * @param originalSize - the original size of the data
+ * @param getTargetBuffer - optional. If provided, the data will be copied to the target buffer. Otherwise, a new buffer
+ * will be created and returned.
+ */
+export const downloadGpuData =
+    async(backend: WebGpuBackend, gpuBuffer: GPUBuffer, originalSize: number, getTargetBuffer?: () => Uint8Array):
+        Promise<Uint8Array> => {
+          const bufferSize = calcNormalizedBufferSize(originalSize);
+          const gpuReadBuffer = backend.device.createBuffer(
+              // eslint-disable-next-line no-bitwise
+              {size: bufferSize, usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ});
+          try {
+            const commandEncoder = backend.getCommandEncoder();
+            backend.endComputePass();
+            commandEncoder.copyBufferToBuffer(
+                gpuBuffer /* source buffer */, 0 /* source offset */, gpuReadBuffer /* destination buffer */,
+                0 /* destination offset */, bufferSize /* size */
+            );
+            backend.flush();
+
+            await gpuReadBuffer.mapAsync(GPUMapMode.READ);
+
+            const arrayBuffer = gpuReadBuffer.getMappedRange();
+            if (getTargetBuffer) {
+              // if we already have a CPU buffer to accept the data, no need to clone the ArrayBuffer.
+              const targetBuffer = getTargetBuffer();
+              targetBuffer.set(new Uint8Array(arrayBuffer, 0, originalSize));
+              return targetBuffer;
+            } else {
+              // the mapped ArrayBuffer will be released when the GPU buffer is destroyed. Need to clone the
+              // ArrayBuffer.
+              return new Uint8Array(arrayBuffer.slice(0, originalSize));
+            }
+          } finally {
+            gpuReadBuffer.destroy();
+          }
+        };
+
 class GpuDataManagerImpl implements GpuDataManager {
   // GPU Data ID => GPU Data ( storage buffer )
-  storageCache: Map<GpuDataId, StorageCacheValue>;
+  private storageCache: Map<GpuDataId, StorageCacheValue>;
 
   // pending buffers for uploading ( data is unmapped )
   private buffersForUploadingPending: GPUBuffer[];
@@ -76,12 +133,19 @@ class GpuDataManagerImpl implements GpuDataManager {
 
   // The reusable storage buffers for computing.
   private freeBuffers: Map<number, GPUBuffer[]>;
+  // The reusable uniform buffers
+  private freeUniformBuffers: Map<number, GPUBuffer[]>;
+
+  // The external buffers registered users for IO Binding.
+  private externalBuffers: Map<GPUBuffer, GpuDataId>;
 
   constructor(private backend: WebGpuBackend) {
     this.storageCache = new Map();
     this.freeBuffers = new Map();
+    this.freeUniformBuffers = new Map();
     this.buffersForUploadingPending = [];
     this.buffersPending = [];
+    this.externalBuffers = new Map();
   }
 
   upload(id: GpuDataId, data: Uint8Array): void {
@@ -143,6 +207,42 @@ class GpuDataManagerImpl implements GpuDataManager {
         sourceGpuDataCache.gpuData.buffer, 0, destinationGpuDataCache.gpuData.buffer, 0, size);
   }
 
+  registerExternalBuffer(buffer: GPUBuffer, originalSize: number, previousBuffer?: GPUBuffer): number {
+    let id: number|undefined;
+    if (previousBuffer) {
+      id = this.externalBuffers.get(previousBuffer);
+      if (id === undefined) {
+        throw new Error('previous buffer is not registered');
+      }
+      if (buffer === previousBuffer) {
+        LOG_DEBUG(
+            'verbose',
+            () => `[WebGPU] GpuDataManager.registerExternalBuffer(size=${originalSize}) => id=${
+                id}, buffer is the same, skip.`);
+        return id;
+      }
+      this.externalBuffers.delete(previousBuffer);
+    } else {
+      id = createNewGpuDataId();
+    }
+
+    this.storageCache.set(id, {gpuData: {id, type: GpuDataType.default, buffer}, originalSize});
+    this.externalBuffers.set(buffer, id);
+    LOG_DEBUG(
+        'verbose',
+        () => `[WebGPU] GpuDataManager.registerExternalBuffer(size=${originalSize}) => id=${id}, registered.`);
+    return id;
+  }
+
+  unregisterExternalBuffer(buffer: GPUBuffer): void {
+    const id = this.externalBuffers.get(buffer);
+    if (id !== undefined) {
+      this.storageCache.delete(id);
+      this.externalBuffers.delete(buffer);
+      LOG_DEBUG('verbose', () => `[WebGPU] GpuDataManager.unregisterExternalBuffer() => id=${id}`);
+    }
+  }
+
   // eslint-disable-next-line no-bitwise
   create(size: number, usage = GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST): GpuData {
     const bufferSize = calcNormalizedBufferSize(size);
@@ -150,11 +250,15 @@ class GpuDataManagerImpl implements GpuDataManager {
     let gpuBuffer;
     // Currently, only storage buffers are reused.
     // eslint-disable-next-line no-bitwise
-    if ((usage & GPUBufferUsage.STORAGE) === GPUBufferUsage.STORAGE) {
-      let buffers = this.freeBuffers.get(bufferSize);
+    const isStorage = (usage & GPUBufferUsage.STORAGE) === GPUBufferUsage.STORAGE;
+    // eslint-disable-next-line no-bitwise
+    const isUniform = (usage & GPUBufferUsage.UNIFORM) === GPUBufferUsage.UNIFORM;
+    if (isStorage || isUniform) {
+      const freeBuffers = isStorage ? this.freeBuffers : this.freeUniformBuffers;
+      let buffers = freeBuffers.get(bufferSize);
       if (!buffers) {
         buffers = [];
-        this.freeBuffers.set(bufferSize, buffers);
+        freeBuffers.set(bufferSize, buffers);
       }
       if (buffers.length > 0) {
         gpuBuffer = buffers.pop() as GPUBuffer;
@@ -193,31 +297,13 @@ class GpuDataManagerImpl implements GpuDataManager {
     return cachedData.originalSize;
   }
 
-  async download(id: GpuDataId): Promise<ArrayBufferLike> {
+  async download(id: GpuDataId, getTargetBuffer: () => Uint8Array): Promise<void> {
     const cachedData = this.storageCache.get(id);
     if (!cachedData) {
       throw new Error('data does not exist');
     }
 
-    const commandEncoder = this.backend.getCommandEncoder();
-    this.backend.endComputePass();
-    const bufferSize = calcNormalizedBufferSize(cachedData.originalSize);
-    const gpuReadBuffer = this.backend.device.createBuffer(
-        // eslint-disable-next-line no-bitwise
-        {size: bufferSize, usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ});
-    commandEncoder.copyBufferToBuffer(
-        cachedData.gpuData.buffer /* source buffer */, 0 /* source offset */, gpuReadBuffer /* destination buffer */,
-        0 /* destination offset */, bufferSize /* size */
-    );
-    this.backend.flush();
-
-    return new Promise<ArrayBuffer>((resolve) => {
-      gpuReadBuffer.mapAsync(GPUMapMode.READ).then(() => {
-        const data = gpuReadBuffer.getMappedRange().slice(0);
-        gpuReadBuffer.destroy();
-        resolve(data);
-      });
-    });
+    await downloadGpuData(this.backend, cachedData.gpuData.buffer, cachedData.originalSize, getTargetBuffer);
   }
 
   refreshPendingBuffers(): void {
@@ -231,6 +317,10 @@ class GpuDataManagerImpl implements GpuDataManager {
       if ((buffer.usage & GPUBufferUsage.STORAGE) === GPUBufferUsage.STORAGE) {
         // Put the pending buffer to freeBuffers list instead of really destroying it for buffer reusing.
         this.freeBuffers.get(buffer.size)!.push(buffer);
+        // eslint-disable-next-line no-bitwise
+      } else if ((buffer.usage & GPUBufferUsage.UNIFORM) === GPUBufferUsage.UNIFORM) {
+        // Put the pending buffer to freeUniformBuffers list instead of really destroying it for buffer reusing.
+        this.freeUniformBuffers.get(buffer.size)!.push(buffer);
       } else {
         buffer.destroy();
       }
@@ -244,6 +334,11 @@ class GpuDataManagerImpl implements GpuDataManager {
         buffer.destroy();
       });
     });
+    this.freeUniformBuffers.forEach((buffers) => {
+      buffers.forEach(buffer => {
+        buffer.destroy();
+      });
+    });
 
     this.storageCache.forEach((storage) => {
       storage.gpuData.buffer.destroy();
@@ -251,6 +346,7 @@ class GpuDataManagerImpl implements GpuDataManager {
 
     this.storageCache = new Map();
     this.freeBuffers = new Map();
+    this.freeUniformBuffers = new Map();
   }
 }
 
diff --git a/js/web/lib/wasm/jsep/webgpu/op-resolve-rules.ts b/js/web/lib/wasm/jsep/webgpu/op-resolve-rules.ts
index e92e6696d9a78..80f6e3bc11195 100644
--- a/js/web/lib/wasm/jsep/webgpu/op-resolve-rules.ts
+++ b/js/web/lib/wasm/jsep/webgpu/op-resolve-rules.ts
@@ -2,6 +2,10 @@
 // Licensed under the MIT License.
 
 import {argMax, argMin, parseArgMinMaxAttributes} from './ops/argminmax';
+import {attention, parseAttentionAttributes} from './ops/attention';
+import {batchNorm} from './ops/batch-norm';
+import {biasAdd} from './ops/bias-add';
+import {biasSplitGelu} from './ops/bias-split-gelu';
 import * as binaryOps from './ops/binary-op';
 import {concat, parseConcatAttributes} from './ops/concat';
 import {conv, parseConvAttributes} from './ops/conv';
@@ -14,8 +18,10 @@ import {gemm, parseGemmAttributes} from './ops/gemm';
 import {instanceNorm, parseInstanceNormAttributes} from './ops/instance-norm';
 import {layerNorm, parseLayerNormAttributes} from './ops/layer-norm';
 import {matMul} from './ops/matmul';
+import {multiHeadAttention, parseMultiHeadAttentionAttributes} from './ops/multi-head-attentiion';
 import {pad, parsePadAttributes} from './ops/pad';
 import * as pool from './ops/pool';
+import {range} from './ops/range';
 import {parseReduceAttributes, reduceL1, reduceL2, reduceLogSum, reduceLogSumExp, reduceMax, reduceMean, reduceMin, reduceProd, reduceSum, reduceSumSquare} from './ops/reduce';
 import {parseResizeAttributes, resize} from './ops/resize';
 import {parseSkipLayerNormAttributes, skipLayerNorm} from './ops/skip-layer-norm';
@@ -25,6 +31,7 @@ import {parseSplitAttributes, split} from './ops/split';
 import {tile} from './ops/tile';
 import {parseTransposeAttributes, transpose} from './ops/transpose';
 import * as unaryOps from './ops/unary-op';
+import {where} from './ops/where';
 import {ComputeContext} from './types';
 
 export type RunFunction = (context: ComputeContext, attribute?: unknown) => void;
@@ -42,11 +49,14 @@ export const WEBGPU_OP_RESOLVE_RULES: Map<string, OperatorImplementation> = new
   ['Asinh', [unaryOps.asinh]],
   ['Atan', [unaryOps.atan]],
   ['Atanh', [unaryOps.atanh]],
+  ['Attention', [attention, parseAttentionAttributes]],
   // TODO: support new attributes for AveragePool-10
   ['AveragePool', [pool.averagePool, pool.parseAveragePoolAttributes]],
+  ['BatchNormalization', [batchNorm]],
+  ['BiasAdd', [biasAdd]],
+  ['BiasSplitGelu', [biasSplitGelu]],
   ['Cast', [unaryOps.cast, unaryOps.parseCastAttributes]],
   ['Ceil', [unaryOps.ceil]],
-  ['ClipV10', [unaryOps.clipV10]],
   ['Clip', [unaryOps.clip]],
   ['Concat', [concat, parseConcatAttributes]],
   ['Conv', [conv, parseConvAttributes]],
@@ -61,6 +71,7 @@ export const WEBGPU_OP_RESOLVE_RULES: Map<string, OperatorImplementation> = new
   ['Exp', [unaryOps.exp]],
   ['Expand', [expand]],
   ['Floor', [unaryOps.floor]],
+  ['FusedConv', [conv, parseConvAttributes]],
   ['Gather', [gather, parseGatherAttributes]],
   ['GatherElements', [gatherElements, parseGatherElementsAttributes]],
   ['Gelu', [unaryOps.gelu]],
@@ -79,10 +90,12 @@ export const WEBGPU_OP_RESOLVE_RULES: Map<string, OperatorImplementation> = new
   // TODO: support new attributes for MaxPool-8 and MaxPool-10
   ['MaxPool', [pool.maxPool, pool.parseMaxPoolAttributes]],
   ['Mul', [binaryOps.mul]],
+  ['MultiHeadAttention', [multiHeadAttention, parseMultiHeadAttentionAttributes]],
   ['Neg', [unaryOps.neg]],
   ['Not', [unaryOps.not]],
   ['Pad', [pad, parsePadAttributes]],
   ['Pow', [binaryOps.pow]],
+  ['Range', [range]],
   ['Reciprocal', [unaryOps.reciprocal]],
   ['ReduceMin', [reduceMin, parseReduceAttributes]],
   ['ReduceMean', [reduceMean, parseReduceAttributes]],
@@ -110,4 +123,5 @@ export const WEBGPU_OP_RESOLVE_RULES: Map<string, OperatorImplementation> = new
   ['ThresholdedRelu', [unaryOps.thresholdedRelu, unaryOps.parseAlphaAttributes]],
   ['Tile', [tile]],
   ['Transpose', [transpose, parseTransposeAttributes]],
+  ['Where', [where]],
 ]);
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/3rd-party/activation_util.ts b/js/web/lib/wasm/jsep/webgpu/ops/3rd-party/activation_util.ts
index dd4f13e76ee04..a121bf3892a32 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/3rd-party/activation_util.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/3rd-party/activation_util.ts
@@ -19,34 +19,21 @@
 //
 // modified to fit the needs of the project
 
-export declare type Activation = 'linear' | 'relu' | 'prelu' | 'elu' | 'relu6' | 'leakyrelu' | 'sigmoid' | 'gelu';
-
-export const typeSnippet = (component: number) => {
+export const typeSnippet = (component: number, dataType: string) => {
   switch (component) {
     case 1:
-      return 'f32';
+      return dataType;
     case 2:
-      return 'vec2<f32>';
+      return `vec2<${dataType}>`;
     case 3:
-      return 'vec3<f32>';
+      return `vec3<${dataType}>`;
     case 4:
-      return 'vec4<f32>';
+      return `vec4<${dataType}>`;
     default:
       throw new Error(`${component}-component is not supported.`);
   }
 };
 
-export const activationFnSnippet =
-    (activation?: Activation, _hasPreluActivationWeights = false, _packed = false, _coordsLength = 3): string => {
-      if (!activation) {
-        return '';
-      }
-
-      // TODO: add implementations
-      return '';
-    };
-
-export const biasActivationSnippet = (hasBias: boolean, activation?: Activation): string => `
+export const biasSnippet = (hasBias: boolean): string => `
       ${hasBias ? 'value = value + getBiasByOutputCoords(coords);' : ''}
-      ${activation ? 'value = activation(value, coords);' : ''}
       `;
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/3rd-party/conv2d_mm_webgpu.ts b/js/web/lib/wasm/jsep/webgpu/ops/3rd-party/conv2d_mm_webgpu.ts
index 08b1d1f30b233..3638938df7dbe 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/3rd-party/conv2d_mm_webgpu.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/3rd-party/conv2d_mm_webgpu.ts
@@ -21,24 +21,25 @@
 
 import {LOG_DEBUG} from '../../../log';
 import {TensorView} from '../../../tensor-view';
-import {ShapeUtil} from '../../../util';
-import {GpuDataType, ProgramInfo, ProgramMetadata} from '../../types';
+import {ProgramInfo, ProgramUniform} from '../../types';
+import {createTensorShapeVariables, inputVariable, outputVariable, ShaderHelper, tensorTypeToWsglStorageType} from '../common';
 import {ConvAttributes} from '../conv';
+import {getActivationSnippet} from '../fuse-utils';
 
-import {Activation, activationFnSnippet, biasActivationSnippet, typeSnippet} from './activation_util';
+import {biasSnippet, typeSnippet} from './activation_util';
 import {utilFunctions} from './conv_util';
 import {makeMatMulPackedSource, makeMatMulPackedVec4Source} from './matmul_packed_webgpu';
 
 const conv2dCommonSnippet =
     (isChannelsLast: boolean, fitAOuter: boolean, fitBOuter: boolean, fitInner: boolean, addBias = false,
-     activation?: Activation, hasPreluActivationWeights = false, innerElementSizeX = 4, innerElementSizeW = 4,
-     innerElementSize = 4): string => {
+     attributes: ConvAttributes, innerElementSizeX = 4, innerElementSizeW = 4, innerElementSize = 4,
+     dataType = 'f32'): string => {
       const getXSnippet = (innerElementSize: number) => {
         switch (innerElementSize) {
           case 1:
             return 'resData = x[xIndex];';
           case 3:
-            return 'resData = vec3<f32>(x[xIndex], x[xIndex + 1], x[xIndex + 2]);';
+            return `resData = vec3<${dataType}>(x[xIndex], x[xIndex + 1], x[xIndex + 2]);`;
           case 4:
             return 'resData = x[xIndex / 4];';
           default:
@@ -48,9 +49,9 @@ const conv2dCommonSnippet =
       const getWSnippet = (innerElementSize: number) => {
         switch (innerElementSize) {
           case 1:
-            return 'return w[row * wShape[3] + colIn];';
+            return 'return w[row * i32(uniforms.w_shape[3]) + colIn];';
           case 4:
-            return 'return w[row * wShape[3] / 4 + colIn];';
+            return 'return w[row * i32(uniforms.w_shape[3]) / 4 + colIn];';
           default:
             throw new Error(`innerElementSize ${innerElementSize} is not supported.`);
         }
@@ -77,13 +78,13 @@ const conv2dCommonSnippet =
       col % outWidth);
     `;
 
-      const xHeight = isChannelsLast ? 'xShape[1]' : 'xShape[2]';
-      const xWidth = isChannelsLast ? 'xShape[2]' : 'xShape[3]';
+      const xHeight = isChannelsLast ? 'i32(uniforms.x_shape[1])' : 'i32(uniforms.x_shape[2])';
+      const xWidth = isChannelsLast ? 'i32(uniforms.x_shape[2])' : 'i32(uniforms.x_shape[3])';
       const row = isChannelsLast ? 'row' : 'col';
       const col = isChannelsLast ? 'col' : 'row';
       const readXSnippet = `
-    let inChannels = wShape[2];
-    let outWidth = ${isChannelsLast ? 'outShape[2]' : 'outShape[3]'};
+    let inChannels = i32(uniforms.w_shape[2]);
+    let outWidth = ${isChannelsLast ? 'i32(uniforms.result_shape[2])' : 'i32(uniforms.result_shape[3])'};
     let outRow = ${row} / outWidth;
     let outCol = ${row} % outWidth;
 
@@ -92,12 +93,12 @@ const conv2dCommonSnippet =
     let xRow = outRow * stride[0] + dilation[0] * WRow - pad[0];
     let xCol = outCol * stride[1] + dilation[1] * WCol - pad[1];
     let xCh = ${col} % inChannels;
-    var resData = ${typeSnippet(innerElementSizeX)}(0.0);
+    var resData = ${typeSnippet(innerElementSizeX, dataType)}(0.0);
     // The bounds checking is always needed since we use it to pad zero for
     // the 'same' padding type.
     if (xRow >= 0 && xRow < ${xHeight} && xCol >= 0 && xCol < ${xWidth}) {
       ${coordASnippet}
-      let xIndex = getIndexFromCoords4D(coord, xShape);
+      let xIndex = getIndexFromCoords4D(coord, vec4<i32>(uniforms.x_shape));
       ${getXSnippet(innerElementSizeX)}
     }
     return resData;`;
@@ -107,27 +108,30 @@ const conv2dCommonSnippet =
     ${readXSnippet}` :
                                                                 `
     let col = colIn * ${innerElementSizeX};
-    if (row < dimAOuter && col < dimInner) {
+    if (row < uniforms.dimAOuter && col < uniforms.dimInner) {
       ${readXSnippet}
     }
-    return ${typeSnippet(innerElementSizeX)}(0.0);`) :
+    return ${typeSnippet(innerElementSizeX, dataType)}(0.0);`) :
                                        (fitInner && fitBOuter ? `
     let col = colIn * ${innerElementSizeX};
     ${readXSnippet}` :
                                                                 `
     let col = colIn * ${innerElementSizeX};
-    if (row < dimInner && col < dimBOuter) {
+    if (row < uniforms.dimInner && col < uniforms.dimBOuter) {
       ${readXSnippet}
     }
-    return ${typeSnippet(innerElementSizeX)}(0.0);`);
+    return ${typeSnippet(innerElementSizeX, dataType)}(0.0);`);
 
       const sampleW = `${getWSnippet(innerElementSizeW)}`;
 
-      const resType = typeSnippet(innerElementSize);
-      const aType = isChannelsLast ? typeSnippet(innerElementSizeX) : typeSnippet(innerElementSizeW);
-      const bType = isChannelsLast ? typeSnippet(innerElementSizeW) : typeSnippet(innerElementSizeX);
+      const resType = typeSnippet(innerElementSize, dataType);
+      const aType =
+          isChannelsLast ? typeSnippet(innerElementSizeX, dataType) : typeSnippet(innerElementSizeW, dataType);
+      const bType =
+          isChannelsLast ? typeSnippet(innerElementSizeW, dataType) : typeSnippet(innerElementSizeX, dataType);
+      const {activationFunction, applyActivation} = getActivationSnippet(attributes, resType);
       const userCode = `
-    ${activationFnSnippet(activation, hasPreluActivationWeights, innerElementSize === 4, 4)}
+    ${activationFunction}
     fn mm_readA(batch: i32, row : i32, colIn : i32) -> ${aType} {
       ${isChannelsLast ? sampleX : sampleW}
     }
@@ -138,12 +142,13 @@ const conv2dCommonSnippet =
 
     fn mm_write(batch: i32, row : i32, colIn : i32, valueIn : ${resType}) {
       let col = colIn * ${innerElementSize};
-      if (row < dimAOuter && col < dimBOuter)
+      if (row < uniforms.dimAOuter && col < uniforms.dimBOuter)
       {
       var value = valueIn;
-      let outWidth = ${isChannelsLast ? 'outShape[2]' : 'outShape[3]'};
+      let outWidth = ${isChannelsLast ? 'i32(uniforms.result_shape[2])' : 'i32(uniforms.result_shape[3])'};
       ${coordResSnippet}
-      ${biasActivationSnippet(addBias, activation)}
+      ${biasSnippet(addBias)}
+      ${applyActivation}
       setOutputAtCoords(coords[0], coords[1], coords[2], coords[3], value);
       }
     }`;
@@ -151,26 +156,22 @@ const conv2dCommonSnippet =
     };
 
 export const createConv2DMatMulProgramInfo =
-    (inputs: readonly TensorView[], metadata: ProgramMetadata, attributes: ConvAttributes,
-     outputShape: readonly number[], dimAOuter: number, dimBOuter: number, dimInner: number, hasBias: boolean,
-     sequentialAccessByThreads: boolean): ProgramInfo => {
+    (inputs: readonly TensorView[], attributes: ConvAttributes, outputShape: readonly number[], dimAOuter: number,
+     dimBOuter: number, dimInner: number, hasBias: boolean, sequentialAccessByThreads: boolean): ProgramInfo => {
       const isChannelsLast = attributes.format === 'NHWC';
       const inChannels = isChannelsLast ? inputs[0].dims[3] : inputs[0].dims[1];
       const batchSize = outputShape[0];
       const outWidth = isChannelsLast ? outputShape[2] : outputShape[3];
       const outHeight = isChannelsLast ? outputShape[1] : outputShape[2];
       const outChannels = isChannelsLast ? outputShape[3] : outputShape[1];
-      const isVec4 = (((inChannels % 4 === 0 || inChannels % 3 === 0) && isChannelsLast) ||
-                      (outWidth % 4 === 0 && !isChannelsLast)) &&
-          outChannels % 4 === 0;
+      // TODO: enable vec4 for NCHW
+      const isVec4 = isChannelsLast && (inChannels % 4 === 0 || inChannels % 3 === 0) && outChannels % 4 === 0;
 
       // TODO: fine tune size
       const dispatchX = isChannelsLast ? outChannels : outWidth * outHeight;
       const dispatchY = isChannelsLast ? outWidth * outHeight : outChannels;
-      const workGroupSize: [number, number, number] =
-          isVec4 ? [8, 8, 1] : [dispatchX <= 4 ? 4 : 16, dispatchX > 4 && dispatchY <= 4 ? 4 : 16, 1];
-      const elementsPerThread =
-          isVec4 ? [4, 4, 1] : [dispatchX <= 4 ? 1 : 2, dispatchX > 4 && dispatchY <= 4 ? 1 : 2, 1];
+      const workGroupSize: [number, number, number] = [8, 8, 1];
+      const elementsPerThread = dimAOuter <= 8 ? [4, 1, 1] : [4, 4, 1];
       const dispatch = [
         Math.ceil(dispatchX / workGroupSize[0] / elementsPerThread[0]),
         Math.ceil(dispatchY / workGroupSize[1] / elementsPerThread[1]),
@@ -179,7 +180,7 @@ export const createConv2DMatMulProgramInfo =
 
       LOG_DEBUG('verbose', () => `[conv2d_mm_webgpu] dispatch = ${dispatch}`);
 
-      const innerElementSize = isVec4 ? (isChannelsLast && inChannels % 4 !== 0 ? 3 : 4) : elementsPerThread[0];
+      const innerElementSize = isVec4 ? (isChannelsLast && inChannels % 4 !== 0 ? 3 : 4) : 1;
 
       const tileAOuter = workGroupSize[1] * elementsPerThread[1];
       const tileBOuter = workGroupSize[0] * elementsPerThread[0];
@@ -190,62 +191,73 @@ export const createConv2DMatMulProgramInfo =
       const fitInner = dimInner % tileInner === 0;
 
       const elementsSize = isVec4 ? [innerElementSize, 4, 4] : [1, 1, 1];
+      const t = tensorTypeToWsglStorageType(inputs[0].dataType);
+
+      // TODO: support component 2, 3.
+      const components = isVec4 ? 4 : 1;
+      const programUniforms: ProgramUniform[] =
+          [{type: 'int32', data: dimAOuter}, {type: 'int32', data: dimBOuter}, {type: 'int32', data: dimInner}];
+      const x =
+          inputVariable('x', inputs[0].dataType, inputs[0].dims.length, innerElementSize === 3 ? 1 : innerElementSize);
+      const w = inputVariable('w', inputs[1].dataType, inputs[1].dims.length, components);
+      const inputVariables = [x, w];
+
+      programUniforms.push(...createTensorShapeVariables(inputs[0].dims));
+      programUniforms.push(...createTensorShapeVariables(inputs[1].dims));
 
-      const declareInputs = [
-        `@group(0) @binding(0) var<storage, read> x: array<${isVec4 && innerElementSize === 4 ? 'vec4<f32>' : 'f32'}>;`,
-        `@group(0) @binding(1) var<storage, read> w: array<${isVec4 ? 'vec4<f32>' : 'f32'}>;`
-      ];
       let declareFunctions = `
-      fn setOutputAtIndex(flatIndex : i32, value : ${isVec4 ? 'vec4<f32>' : 'f32'}) {
-        result[flatIndex] = ${isVec4 ? 'vec4<f32>' : 'f32'}(value);
+      fn setOutputAtIndex(flatIndex : i32, value : ${isVec4 ? `vec4<${t}>` : t}) {
+        result[flatIndex] = ${isVec4 ? `vec4<${t}>` : t}(value);
       }
-      fn setOutputAtCoords(d0 : i32, d1 : i32, d2 : i32, d3 : i32, value : ${isVec4 ? 'vec4<f32>' : 'f32'}) {
+      fn setOutputAtCoords(d0 : i32, d1 : i32, d2 : i32, d3 : i32, value : ${isVec4 ? `vec4<${t}>` : t}) {
         let flatIndex = getOutputIndexFromCoords(vec4<i32>(d0, d1, d2, d3));
         setOutputAtIndex(flatIndex ${isVec4 ? '/ 4' : ''}, value);
       }`;
       if (hasBias) {
-        declareInputs.push(`@group(0) @binding(2) var<storage, read> bias: array<${isVec4 ? 'vec4<f32>' : 'f32'}>;`);
+        const bias = inputVariable('bias', inputs[2].dataType, inputs[2].dims.length, components);
+        inputVariables.push(bias);
+
+        programUniforms.push(...createTensorShapeVariables(inputs[2].dims));
+
         declareFunctions += `
-        fn getBiasByOutputCoords(coords : vec4<i32>) -> ${isVec4 ? 'vec4<f32>' : 'f32'} {
+        fn getBiasByOutputCoords(coords : vec4<i32>) -> ${isVec4 ? `vec4<${t}>` : t} {
           return bias[coords.${isChannelsLast ? 'w' : 'y'}${isVec4 ? '/ 4' : ''}];
         }`;
       }
-
+      const output = outputVariable('result', inputs[0].dataType, outputShape.length, components);
+      programUniforms.push(...createTensorShapeVariables(outputShape));
       return {
-        ...metadata,
-        outputs: [{dims: outputShape, dataType: inputs[0].dataType, gpuDataType: GpuDataType.default}],
-        dispatchGroup: () => ({x: dispatch[0], y: dispatch[1], z: dispatch[2]}),
-        getShaderSource: () => `
-        ${utilFunctions}
+        name: 'Conv2DMatMul',
+        shaderCache: {hint: attributes.cacheKey},
+        getRunData: () => ({
+          outputs: [{dims: outputShape, dataType: inputs[0].dataType}],
+          dispatchGroup: {x: dispatch[0], y: dispatch[1], z: dispatch[2]},
+          programUniforms,
+        }),
+        getShaderSource: (shaderHelper: ShaderHelper) => `
+        ${utilFunctions('uniforms.result_strides')}
         //struct Uniforms { xShape : vec4<i32>, wShape : vec4<i32>, outShape : vec4<i32>,
         //  outShapeStrides: vec3<i32>, filterDims : vec2<i32>, pad : vec2<i32>, stride : vec2<i32>,
         //  dilation : vec2<i32>, dimAOuter : i32, dimBOuter : i32, dimInner : i32 };
-        ${declareInputs.join('')}
-        @group(0) @binding(${declareInputs.length}) var<storage, read_write> result: array<${
-            isVec4 ? 'vec4<f32>' : 'f32'}>;
-        //@group(0) @binding(${declareInputs.length + 1}) var<uniform> uniforms: Uniforms;
-
-        const xShape : vec4<i32> = vec4<i32>(${inputs[0].dims.join(',')});
-        const wShape : vec4<i32> = vec4<i32>(${inputs[1].dims.join(',')});
-        const outShape : vec4<i32> = vec4<i32>(${outputShape.join(',')});
-        const outShapeStrides : vec3<i32> = vec3<i32>(${ShapeUtil.computeStrides(outputShape).slice(0, 3).join(',')});
+        ${
+            shaderHelper.registerUniform('dimAOuter', 'i32')
+                .registerUniform('dimBOuter', 'i32')
+                .registerUniform('dimInner', 'i32')
+                .declareVariables(...inputVariables, output)}
         const filterDims : vec2<i32> = vec2<i32>(${attributes.kernelShape[0]}, ${attributes.kernelShape[1]});
         const pad : vec2<i32> = vec2<i32>(${attributes.pads[0]}, ${attributes.pads[1]});
         const stride : vec2<i32> = vec2<i32>(${attributes.strides[0]}, ${attributes.strides[1]});
         const dilation : vec2<i32> = vec2<i32>(${attributes.dilations[0]}, ${attributes.dilations[1]});
-        const dimAOuter : i32 = ${dimAOuter};
-        const dimBOuter : i32 = ${dimBOuter};
-        const dimInner : i32 = ${dimInner};
         ${declareFunctions}
         ${
             conv2dCommonSnippet(
-                isChannelsLast, fitAOuter, fitBOuter, fitInner, hasBias, undefined, false, elementsSize[0],
-                elementsSize[1], elementsSize[2])}
+                isChannelsLast, fitAOuter, fitBOuter, fitInner, hasBias, attributes, elementsSize[0], elementsSize[1],
+                elementsSize[2], t)}
             ${
             isVec4 ?
-                makeMatMulPackedVec4Source(elementsPerThread, workGroupSize, undefined, !isChannelsLast, tileInner) :
+                makeMatMulPackedVec4Source(elementsPerThread, workGroupSize, t, undefined, !isChannelsLast, tileInner) :
                 makeMatMulPackedSource(
-                    elementsPerThread, workGroupSize, undefined, !isChannelsLast, tileInner, false, undefined,
+                    elementsPerThread, workGroupSize, t, undefined, !isChannelsLast, tileInner, false, undefined,
                     sequentialAccessByThreads)}`
       };
     };
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/3rd-party/conv_backprop_mm_webgpu.ts b/js/web/lib/wasm/jsep/webgpu/ops/3rd-party/conv_backprop_mm_webgpu.ts
new file mode 100644
index 0000000000000..d425155857e14
--- /dev/null
+++ b/js/web/lib/wasm/jsep/webgpu/ops/3rd-party/conv_backprop_mm_webgpu.ts
@@ -0,0 +1,257 @@
+/**
+ * @license
+ * Copyright 2021 Google LLC. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+// sampled from [@tensorflow/tfjs] tfjs-backend-webgpu/src/conv_backprop_mm_webgpu.ts
+//
+// modified to fit the needs of the project
+
+import {LOG_DEBUG} from '../../../log';
+import {TensorView} from '../../../tensor-view';
+import {ProgramInfo, ProgramUniform} from '../../types';
+import {createTensorShapeVariables, inputVariable, outputVariable, ShaderHelper} from '../common';
+import {ConvTransposeAttributes} from '../conv-transpose';
+import {getActivationSnippet} from '../fuse-utils';
+
+import {biasSnippet, typeSnippet} from './activation_util';
+import {utilFunctions} from './conv_util';
+import {makeMatMulPackedSource, makeMatMulPackedVec4Source} from './matmul_packed_webgpu';
+
+const conv2dTransposeCommonSnippet =
+    (isChannelsLast: boolean, addBias = false, attributes: ConvTransposeAttributes, innerElementSize = 4): string => {
+      const type = typeSnippet(innerElementSize, 'f32');
+      const getWSnippet = (innerElementSize: number) => {
+        switch (innerElementSize) {
+          case 1:
+            return 'return w[getIndexFromCoords4D(coord, vec4<i32>(uniforms.w_shape))];';
+          case 4:
+            return `
+            let coord1 = vec4<i32>(coordX, coordY, col + 1, rowInner);
+            let coord2 = vec4<i32>(coordX, coordY, col + 2, rowInner);
+            let coord3 = vec4<i32>(coordX, coordY, col + 3, rowInner);
+            let v0 = w[getIndexFromCoords4D(coord, vec4<i32>(uniforms.w_shape))];
+            let v1 = w[getIndexFromCoords4D(coord1, vec4<i32>(uniforms.w_shape))];
+            let v2 = w[getIndexFromCoords4D(coord2, vec4<i32>(uniforms.w_shape))];
+            let v3 = w[getIndexFromCoords4D(coord3, vec4<i32>(uniforms.w_shape))];
+            return vec4<f32>(v0, v1, v2, v3);
+            `;
+          default:
+            throw new Error(`innerElementSize ${innerElementSize} is not supported.`);
+        }
+      };
+      const coordASnippet = isChannelsLast ? `
+      let coord = vec4<i32>(batch, iXR, iXC, xCh);
+      ` :
+                                             `
+      let coord = vec4<i32>(batch, xCh, iXR, iXC);
+      `;
+
+      const coordResSnippet = isChannelsLast ? `
+    let coords = vec4<i32>(
+      batch,
+      row / outWidth,
+      row % outWidth,
+      col);
+    ` :
+                                               `
+    let coords = vec4<i32>(
+      batch,
+      row,
+      col / outWidth,
+      col % outWidth);
+    `;
+
+      const xHeight = isChannelsLast ? 'outBackprop[1]' : 'outBackprop[2]';
+      const xWidth = isChannelsLast ? 'outBackprop[2]' : 'outBackprop[3]';
+      const row = isChannelsLast ? 'row' : 'col';
+      const col = isChannelsLast ? 'col' : 'row';
+
+      const readASnippet = `
+      let inChannels = ${isChannelsLast ? 'outBackprop[3]' : 'outBackprop[1]'};
+      let outWidth = ${isChannelsLast ? 'i32(uniforms.result_shape[2])' : 'i32(uniforms.result_shape[3])'};
+      let outRow = ${row} / outWidth;
+      let outCol = ${row} % outWidth;
+
+      let WRow = ${col} / (filterDims[1] * inChannels);
+      let WCol = ${col} / inChannels % filterDims[1];
+      let xR = f32(outRow - pads[0] + dilation[0] * WRow) / f32(strides[0]);
+      let xC = f32(outCol - pads[1] + dilation[1] * WCol) / f32(strides[1]);
+      if (xR < 0.0 || xR >= f32(${xHeight}) || fract(xR) > 0.0) {
+        return ${type}(0.0);
+      }
+      if (xC < 0.0 || xC >= f32(${xWidth}) || fract(xC) > 0.0) {
+        return ${type}(0.0);
+      }
+      let iXR = i32(xR);
+      let iXC = i32(xC);
+      let xCh = ${col} % inChannels;
+      ${coordASnippet}
+      return x[getIndexFromCoords4D(coord, vec4<i32>(uniforms.x_shape))/${innerElementSize}];`;
+
+      const sampleA = isChannelsLast ? `
+      let col = colIn * ${innerElementSize};
+      if (row < uniforms.dimAOuter && col < uniforms.dimInner) {
+        ${readASnippet}
+      }
+      return ${type}(0.0);` :
+                                       `
+      let col = colIn * ${innerElementSize};
+      if (row < uniforms.dimInner && col < uniforms.dimBOuter) {
+        ${readASnippet}
+      }
+      return ${type}(0.0);`;
+
+      const sampleW = `
+      let col = colIn * ${innerElementSize};
+      let inChannels = ${isChannelsLast ? 'outBackprop[3]' : 'outBackprop[1]'};
+      let coordX = filterDims.x - 1 - row / (filterDims[1] * inChannels);
+      let coordY = filterDims.y - 1 - (row / inChannels) % filterDims[1];
+      if (${
+          isChannelsLast ? 'row < uniforms.dimInner && col < uniforms.dimBOuter' :
+                           'row < uniforms.dimInner && col < uniforms.dimAOuter'}  && coordX >= 0 && coordY >= 0) {
+        let rowInner = row % inChannels;
+        let coord = vec4<i32>(coordX, coordY, col, rowInner);
+        ${getWSnippet(innerElementSize)}
+      }
+      return ${type}(0.0);
+      `;
+
+      const {activationFunction, applyActivation} = getActivationSnippet(attributes, type);
+      const userCode = `
+      ${activationFunction}
+  fn mm_readA(batch: i32, row : i32, colIn : i32) -> ${type} {
+    ${isChannelsLast ? sampleA : sampleW}
+  }
+
+  fn mm_readB(batch: i32, row : i32, colIn : i32) -> ${type} {
+    ${isChannelsLast ? sampleW : sampleA}
+  }
+
+  fn mm_write(batch: i32, row : i32, colIn : i32, valueInput : ${type}) {
+    let col = colIn * ${innerElementSize};
+    if (row < uniforms.dimAOuter && col < uniforms.dimBOuter) {
+      var value = valueInput;
+      let outWidth = ${isChannelsLast ? 'i32(uniforms.result_shape[2])' : 'i32(uniforms.result_shape[3])'};
+      ${coordResSnippet}
+      ${biasSnippet(addBias)}
+      ${applyActivation}
+      result[getIndexFromCoords4D(coords, vec4<i32>(uniforms.result_shape))/${innerElementSize}] = value;
+    }
+  }`;
+      return userCode;
+    };
+
+export const createConv2DTransposeMatMulProgramInfo =
+    (inputs: readonly TensorView[], attributes: ConvTransposeAttributes, outputShape: readonly number[],
+     dimAOuter: number, dimBOuter: number, dimInner: number, hasBias: boolean,
+     sequentialAccessByThreads: boolean): ProgramInfo => {
+      const isChannelsLast = attributes.format === 'NHWC';
+      const inChannels = isChannelsLast ? inputs[0].dims[3] : inputs[0].dims[1];
+      const batchSize = outputShape[0];
+      const outWidth = isChannelsLast ? outputShape[2] : outputShape[3];
+      const outHeight = isChannelsLast ? outputShape[1] : outputShape[2];
+      const outChannels = isChannelsLast ? outputShape[3] : outputShape[1];
+      const isVec4 =
+          isChannelsLast ? inChannels % 4 === 0 && outChannels % 4 === 0 : outWidth % 4 === 0 && outChannels % 4 === 0;
+
+      // TODO: fine tune size
+      const dispatchX = isChannelsLast ? outChannels : outWidth * outHeight;
+      const dispatchY = isChannelsLast ? outWidth * outHeight : outChannels;
+      const workGroupSize: [number, number, number] = isVec4 ?
+          [8, 8, 1] :
+          [(dispatchX <= 4 || dispatchY <= 4) ? 4 : 16, dispatchX > 4 && dispatchY <= 4 ? 4 : 16, 1];
+      const elementsPerThread =
+          isVec4 ? [4, 4, 1] : [dispatchX <= 4 ? 1 : 4, dispatchX > 4 && dispatchY <= 4 ? 1 : 4, 1];
+      const dispatch = [
+        Math.ceil(dispatchX / workGroupSize[0] / elementsPerThread[0]),
+        Math.ceil(dispatchY / workGroupSize[1] / elementsPerThread[1]),
+        Math.ceil(batchSize / workGroupSize[2] / elementsPerThread[2])
+      ];
+
+      LOG_DEBUG('verbose', () => `[conv_backprop_mm_webgpu] dispatch = ${dispatch}`);
+
+      const innerElementSize = isVec4 ? 4 : 1;
+      const tileInner = Math.max(workGroupSize[0] * innerElementSize, workGroupSize[1]);
+      const components = isVec4 ? 4 : 1;
+      const programUniforms: ProgramUniform[] =
+          [{type: 'int32', data: dimAOuter}, {type: 'int32', data: dimBOuter}, {type: 'int32', data: dimInner}];
+      const x = inputVariable('x', inputs[0].dataType, inputs[0].dims.length, components);
+      const w = inputVariable('w', inputs[1].dataType, inputs[1].dims.length, 1);
+      const output = outputVariable('result', inputs[0].dataType, outputShape.length, components);
+      const inputVariables = [x, w];
+      programUniforms.push(...createTensorShapeVariables(inputs[0].dims));
+      programUniforms.push(...createTensorShapeVariables(inputs[1].dims));
+
+      let declareFunctions = '';
+      if (hasBias) {
+        const bias = inputVariable('bias', inputs[2].dataType, inputs[2].dims.length, components);
+        inputVariables.push(bias);
+        programUniforms.push(...createTensorShapeVariables(inputs[2].dims));
+
+        declareFunctions += `
+        fn getBiasByOutputCoords(coords : vec4<i32>) -> ${isVec4 ? 'vec4<f32>' : 'f32'} {
+          return bias[coords.${isChannelsLast ? 'w' : 'y'}${isVec4 ? '/ 4' : ''}];
+        }`;
+      }
+
+      programUniforms.push(...createTensorShapeVariables(outputShape));
+
+      return {
+        name: 'Conv2DTransposeMatMul',
+        shaderCache: {hint: attributes.cacheKey},
+        getRunData: () => ({
+          outputs: [{dims: outputShape, dataType: inputs[0].dataType}],
+          dispatchGroup: {x: dispatch[0], y: dispatch[1], z: dispatch[2]},
+          programUniforms
+        }),
+        getShaderSource: (shaderHelper: ShaderHelper) => `
+        ${utilFunctions('uniforms.result_strides')}
+        ${
+            shaderHelper.registerUniform('dimAOuter', 'i32')
+                .registerUniform('dimBOuter', 'i32')
+                .registerUniform('dimInner', 'i32')
+                .declareVariables(...inputVariables, output)};
+        const outBackprop : vec4<i32> = vec4<i32>(${inputs[0].dims.join(',')});
+        const filterDims : vec2<i32> = vec2<i32>(${attributes.kernelShape[isChannelsLast ? 1 : 2]}, ${
+            attributes.kernelShape[isChannelsLast ? 2 : 3]});
+        const effectiveFilterDims : vec2<i32> = filterDims + vec2<i32>(
+              ${
+            attributes.dilations[0] <= 1 ?
+                0 :
+                (attributes.kernelShape[isChannelsLast ? 1 : 2] - 1) * (attributes.dilations[0] - 1)},
+              ${
+            attributes.dilations[1] <= 1 ?
+                0 :
+                (attributes.kernelShape[isChannelsLast ? 2 : 3] - 1) * (attributes.dilations[1] - 1)});
+        const pads : vec2<i32> = vec2<i32>(i32(effectiveFilterDims[0]) - 1 - (${
+            attributes.pads[0] + attributes.pads[2]})/2,
+                                         i32(effectiveFilterDims[1]) - 1 - (${
+            attributes.pads[1] + attributes.pads[3]})/2);
+        const strides : vec2<i32> = vec2<i32>(${attributes.strides[0]}, ${attributes.strides[1]});
+        const dilation : vec2<i32> = vec2<i32>(${attributes.dilations[0]}, ${attributes.dilations[1]});
+        const dimAOuter : i32 = ${dimAOuter};
+        const dimBOuter : i32 = ${dimBOuter};
+        const dimInner : i32 = ${dimInner};
+        ${declareFunctions}
+        ${conv2dTransposeCommonSnippet(isChannelsLast, hasBias, attributes, innerElementSize)}
+        ${
+            isVec4 ? makeMatMulPackedVec4Source(
+                         elementsPerThread, workGroupSize, 'f32', undefined, !isChannelsLast, tileInner) :
+                     makeMatMulPackedSource(
+                         elementsPerThread, workGroupSize, 'f32', undefined, !isChannelsLast, tileInner, false,
+                         undefined, sequentialAccessByThreads)}`
+      };
+    };
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/3rd-party/conv_backprop_webgpu.ts b/js/web/lib/wasm/jsep/webgpu/ops/3rd-party/conv_backprop_webgpu.ts
index ec6df438129fb..2e6392aada454 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/3rd-party/conv_backprop_webgpu.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/3rd-party/conv_backprop_webgpu.ts
@@ -20,13 +20,14 @@
 import {LOG_DEBUG} from '../../../log';
 import {TensorView} from '../../../tensor-view';
 import {ShapeUtil} from '../../../util';
-import {GpuDataType, ProgramInfo, ProgramMetadata} from '../../types';
-import {inputVariable, outputVariable, ShaderHelper} from '../common';
+import {ProgramInfo} from '../../types';
+import {inputVariable, outputVariable, ShaderHelper, tensorTypeToWsglStorageType} from '../common';
 import {ConvTransposeAttributes} from '../conv-transpose';
 
 const createConvTranspose2DOpProgramShaderSource =
     (shaderHelper: ShaderHelper, inputs: readonly TensorView[], attributes: ConvTransposeAttributes,
-     outputShape: readonly number[], hasBias: boolean, is1DimensionDispatch: boolean, isVec4 = false): string => {
+     outputShape: readonly number[], hasBias: boolean, is1DimensionDispatch: boolean, isVec4 = false,
+     dataType: string): string => {
       const isChannelsLast = attributes.format === 'NHWC';
       const rowDim = isChannelsLast ? 1 : 2;
       const colDim = isChannelsLast ? 2 : 3;
@@ -39,12 +40,12 @@ const createConvTranspose2DOpProgramShaderSource =
       const outputChannelsPerGroup = wShape[1];
 
       let declareFunctions = `
-  fn setOutputAtIndex(flatIndex : u32, value : ${isVec4 ? 'vec4<f32>' : 'f32'}) {
-    result[flatIndex] = ${isVec4 ? 'vec4<f32>' : 'f32'}(value);
+  fn setOutputAtIndex(flatIndex : u32, value : ${isVec4 ? `vec4<${dataType}>` : dataType}) {
+    result[flatIndex] = ${isVec4 ? `vec4<${dataType}>` : dataType}(value);
   }`;
       if (hasBias) {
         declareFunctions += `
-    fn getBiasByOutputCoords(coords : vec4<u32>) -> ${isVec4 ? 'vec4<f32>' : 'f32'} {
+    fn getBiasByOutputCoords(coords : vec4<u32>) -> ${isVec4 ? `vec4<${dataType}>` : dataType} {
       return bias[coords.${isChannelsLast ? 'w' : 'y'}${isVec4 ? '/ 4' : ''}];
     }`;
       }
@@ -66,33 +67,33 @@ const createConvTranspose2DOpProgramShaderSource =
 
         // Convolve dy(?, ?, d2) with w(:, :, d1, d2) to compute dx(xR, xC, d1).
         // ? = to be determined. : = across all values in that axis.
-        var dotProd: array<vec4<f32>, ${workPerThread}>;
+        var dotProd: array<vec4<${dataType}>, ${workPerThread}>;
         for (var i = 0; i < ${workPerThread}; i++) {
-          dotProd[i] = vec4<f32>(0.0);
+          dotProd[i] = vec4<${dataType}>(0.0);
         }
         for (var wR: u32 = 0; wR < filterDims[0]; wR = wR + 1) {
-          var dyR = (f32(dyCorner.x) + f32(wR)) / f32(strides.x);
+          var dyR = (${dataType}(dyCorner.x) + ${dataType}(wR)) / ${dataType}(strides.x);
           let wRPerm = filterDims[0] - 1 - wR;
-          if (dyR < 0.0 || dyR >= f32(outBackprop[1]) ||
+          if (dyR < 0.0 || dyR >= ${dataType}(outBackprop[1]) ||
               fract(dyR) > 0.0 || wRPerm < 0) {
             continue;
           }
           let idyR: u32 = u32(dyR);
 
           for (var wC: u32 = 0; wC < filterDims[1]; wC = wC + 1) {
-            let dyC = (f32(dyCorner.y) + f32(wC)) / f32(strides.y);
-            let dyC2 = (f32(dyCorner.y) + 1.0 + f32(wC)) / f32(strides.y);
+            let dyC = (${dataType}(dyCorner.y) + ${dataType}(wC)) / ${dataType}(strides.y);
+            let dyC2 = (${dataType}(dyCorner.y) + 1.0 + ${dataType}(wC)) / ${dataType}(strides.y);
             let wCPerm = filterDims[1] - 1 - wC;
             if (wCPerm < 0) {
               continue;
             }
             var bDyCVal = true;
             var bDyCVal2 = true;
-            if (dyC < 0.0 || dyC >= f32(outBackprop[2]) ||
+            if (dyC < 0.0 || dyC >= ${dataType}(outBackprop[2]) ||
                 fract(dyC) > 0.0) {
               bDyCVal = false;
             }
-            if (dyC2 < 0.0 || dyC2 >= f32(outBackprop[2]) ||
+            if (dyC2 < 0.0 || dyC2 >= ${dataType}(outBackprop[2]) ||
                 fract(dyC2) > 0.0) {
               bDyCVal2 = false;
             }
@@ -108,7 +109,7 @@ const createConvTranspose2DOpProgramShaderSource =
                 let wValue3 = ${w.get('u32(wRPerm)', 'u32(wCPerm)', 'd1 + 3', 'd2')};
 
                 var xValue = ${dy.get('batch', 'idyR', 'idyC', 'd2')};
-                let tmpval = vec4<f32>(dot(xValue, wValue0),
+                let tmpval = vec4<${dataType}>(dot(xValue, wValue0),
                                       dot(xValue, wValue1),
                                       dot(xValue, wValue2),
                                       dot(xValue, wValue3));
@@ -116,7 +117,7 @@ const createConvTranspose2DOpProgramShaderSource =
 
                 xValue =  ${dy.get('batch', 'idyR', 'idyC2', 'd2')};
 
-                dotProd[1] = dotProd[1] + vec4<f32>(dot(xValue, wValue0),
+                dotProd[1] = dotProd[1] + vec4<${dataType}>(dot(xValue, wValue0),
                                                     dot(xValue, wValue1),
                                                     dot(xValue, wValue2),
                                                     dot(xValue, wValue3));
@@ -130,7 +131,7 @@ const createConvTranspose2DOpProgramShaderSource =
                 let wValue3 = ${w.get('u32(wRPerm)', 'u32(wCPerm)', 'd1 + 3', 'd2')};
 
                 var xValue = ${dy.get('batch', 'idyR', 'idyC', 'd2')};
-                let tmpval = vec4<f32>(dot(xValue, wValue0),
+                let tmpval = vec4<${dataType}>(dot(xValue, wValue0),
                                       dot(xValue, wValue1),
                                       dot(xValue, wValue2),
                                       dot(xValue, wValue3));
@@ -145,7 +146,7 @@ const createConvTranspose2DOpProgramShaderSource =
                 let wValue3 = ${w.get('u32(wRPerm)', 'u32(wCPerm)', 'd1 + 3', 'd2')};
 
                 var xValue = ${dy.get('batch', 'idyR', 'idyC2', 'd2')};
-                let tmpval = vec4<f32>(dot(xValue, wValue0),
+                let tmpval = vec4<${dataType}>(dot(xValue, wValue0),
                                       dot(xValue, wValue1),
                                       dot(xValue, wValue2),
                                       dot(xValue, wValue3));
@@ -178,9 +179,9 @@ const createConvTranspose2DOpProgramShaderSource =
             if (wR % dilations.x != 0) {
               continue;
             }
-            let dyR = (f32(dyRCorner) + f32(wR)) / f32(strides[0]);
+            let dyR = (${dataType}(dyRCorner) + ${dataType}(wR)) / ${dataType}(strides[0]);
             let wRPerm = filterDims.x - 1 - wR / dilations.x;
-            if (dyR < 0.0 || dyR >= f32(outBackprop[${rowDim}]) || fract(dyR) > 0.0 ||
+            if (dyR < 0.0 || dyR >= ${dataType}(outBackprop[${rowDim}]) || fract(dyR) > 0.0 ||
                 wRPerm < 0) {
               continue;
             }
@@ -190,21 +191,21 @@ const createConvTranspose2DOpProgramShaderSource =
               if (wC % dilations.y != 0) {
                 continue;
               }
-              let dyC = (f32(dyCCorner) + f32(wC)) / f32(strides.y);
+              let dyC = (${dataType}(dyCCorner) + ${dataType}(wC)) / ${dataType}(strides.y);
               let wCPerm = filterDims.y - 1 - wC / dilations.y;
-              if (dyC < 0.0 || dyC >= f32(outBackprop[${colDim}]) ||
+              if (dyC < 0.0 || dyC >= ${dataType}(outBackprop[${colDim}]) ||
                   fract(dyC) > 0.0 || wCPerm < 0) {
                 continue;
               }
               let idyC: u32 = u32(dyC);
-
+              var inputChannel = groupId * ${inputChannelsPerGroup};
               for (var d2: u32 = 0; d2 < ${inputChannelsPerGroup}; d2 = d2 + 1) {
-                let inputChannel = groupId * ${inputChannelsPerGroup} + d2;
                 let xValue = ${
           isChannelsLast ? dy.get('batch', 'idyR', 'idyC', 'inputChannel') :
                            dy.get('batch', 'inputChannel', 'idyR', 'idyC')};
                 let wValue = ${w.get('inputChannel', 'wOutChannel', 'u32(wRPerm)', 'u32(wCPerm)')};
                 dotProd = dotProd + xValue * wValue;
+                inputChannel = inputChannel + 1;
               }
             }
           }
@@ -238,7 +239,7 @@ const createConvTranspose2DOpProgramShaderSource =
     };
 
 export const createConvTranspose2DProgramInfo =
-    (inputs: readonly TensorView[], metadata: ProgramMetadata, attributes: ConvTransposeAttributes,
+    (inputs: readonly TensorView[], attributes: ConvTransposeAttributes,
      squeezeOutputShapeFunction?: (shape: readonly number[]) => number[]): ProgramInfo => {
       const hasBias = inputs.length > 2;
       // const isChannelsLast = attributes.format === 'NHWC';
@@ -256,15 +257,19 @@ export const createConvTranspose2DProgramInfo =
       ];
       LOG_DEBUG('verbose', () => `[conv2d_backprop_webgpu] dispatch = ${dispatch}`);
 
+      const dataType = tensorTypeToWsglStorageType(inputs[0].dataType);
       return {
-        ...metadata,
-        outputs: [{
-          dims: squeezeOutputShapeFunction ? squeezeOutputShapeFunction(outputShape) : outputShape,
-          dataType: inputs[0].dataType,
-          gpuDataType: GpuDataType.default
-        }],
-        dispatchGroup: () => ({x: dispatch[0], y: dispatch[1], z: dispatch[2]}),
+        name: 'ConvTranspose2D',
+        shaderCache: {hint: attributes.cacheKey},
+        getRunData: () => ({
+          dispatchGroup: {x: dispatch[0], y: dispatch[1], z: dispatch[2]},
+          outputs: [{
+            dims: squeezeOutputShapeFunction ? squeezeOutputShapeFunction(outputShape) : outputShape,
+            dataType: inputs[0].dataType
+          }]
+        }),
         getShaderSource: (shaderHelper: ShaderHelper) => createConvTranspose2DOpProgramShaderSource(
-            shaderHelper, inputs, attributes, outputShape, hasBias, dispatch[1] === 1 && dispatch[2] === 1),
+            shaderHelper, inputs, attributes, outputShape, hasBias, dispatch[1] === 1 && dispatch[2] === 1, false,
+            dataType),
       };
     };
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/3rd-party/conv_util.ts b/js/web/lib/wasm/jsep/webgpu/ops/3rd-party/conv_util.ts
index 0ba48a33fbc47..6f2c0231104dc 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/3rd-party/conv_util.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/3rd-party/conv_util.ts
@@ -19,13 +19,13 @@
 //
 // modified to fit the needs of the project
 
-export const utilFunctions = `
+export const utilFunctions = (strideStr: string) => (`
 fn getIndexFromCoords4D(coords : vec4<i32>, shape : vec4<i32>) -> i32 {
   return dot(coords, vec4<i32>(
       shape.y * shape.z * shape.w, shape.z * shape.w, shape.w, 1));
 }
 fn getOutputIndexFromCoords(coords : vec4<i32>) -> i32 {
   return dot(coords, vec4<i32>(
-    outShapeStrides.x, outShapeStrides.y, outShapeStrides.z, 1));
+    i32(${strideStr}.x), i32(${strideStr}.y), i32(${strideStr}.z), 1));
 }
-`;
+`);
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/3rd-party/matmul_packed_webgpu.ts b/js/web/lib/wasm/jsep/webgpu/ops/3rd-party/matmul_packed_webgpu.ts
index 8d43dbb378a69..a8f296ea0c865 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/3rd-party/matmul_packed_webgpu.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/3rd-party/matmul_packed_webgpu.ts
@@ -21,9 +21,9 @@
 
 import {TensorView} from '../../../tensor-view';
 import {ShapeUtil} from '../../../util';
-import {GpuDataType, ProgramInfo, ProgramMetadata} from '../../types';
-import {getBroadcastDims, IndicesHelper, inputVariable, outputVariable, ShaderHelper} from '../common';
-import {getActicationSnippet, InternalActivationAttributes} from '../fuse-utils';
+import {ProgramInfo, ProgramInputTensorInfoDependency, ProgramUniform} from '../../types';
+import {createTensorShapeVariables, enableShapesUniforms, getBroadcastDims, IndicesHelper, inputVariable, internalVariable, outputVariable, ShaderHelper, tensorTypeToWsglStorageType} from '../common';
+import {getActivationSnippet, InternalActivationAttributes} from '../fuse-utils';
 
 import {typeSnippet} from './activation_util';
 
@@ -70,8 +70,8 @@ const calculateResultSnippet = (transposeA: boolean, innerElementSize: number) =
 };
 
 export const makeMatMulPackedVec4Source =
-    (workPerThread: number[], workgroupSize: [number, number, number], batchDims?: IndicesHelper, transposeA = false,
-     tileInner = 32, splitK = false, splitedDimInner = 32): string => {
+    (workPerThread: number[], workgroupSize: [number, number, number], type = 'f32', batchDims?: IndicesHelper,
+     transposeA = false, tileInner = 32, splitK = false, splitedDimInner = 32): string => {
       const tileAOuter = workgroupSize[1] * workPerThread[1];
       const tileBOuter = workgroupSize[0] * workPerThread[0];
       const tileAWidth = transposeA ? tileAOuter : tileInner;
@@ -90,8 +90,8 @@ export const makeMatMulPackedVec4Source =
             workPerThread[0]} must be 4.`);
       }
       return `
-var<workgroup> mm_Asub : array<array<vec${innerElementSize}<f32>, ${tileAWidth / innerElementSize}>, ${tileAHight}>;
-var<workgroup> mm_Bsub : array<array<vec4<f32>, ${tileBOuter / workPerThread[0]}>, ${tileInner}>;
+var<workgroup> mm_Asub: array<array<vec${innerElementSize}<${type}>, ${tileAWidth / innerElementSize}>, ${tileAHight}>;
+var<workgroup> mm_Bsub: array<array<vec4<${type}>, ${tileBOuter / workPerThread[0]}>, ${tileInner}>;
 
 const rowPerThread = ${workPerThread[1]};
 const colPerThread = ${workPerThread[0]};
@@ -112,10 +112,10 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
   ${batchDims ? `let batchIndices = ${batchDims.offsetToIndices('u32(batch)')};` : ''}
   let globalRowStart = i32(workgroupId.y) * ${tileAOuter};
 
-  let numTiles = ${splitK ? `${Math.ceil(splitedDimInner / tileInner)}` : '(dimInner - 1) / tileInner + 1'};
+  let numTiles = ${splitK ? `${Math.ceil(splitedDimInner / tileInner)}` : '(uniforms.dimInner - 1) / tileInner + 1'};
   var kStart = ${splitK ? `i32(globalId.z) * ${splitedDimInner}` : '0'};
 
-  var acc: array<vec4<f32>, rowPerThread>;
+  var acc: array<vec4<${type}>, rowPerThread>;
 
   // Loop over shared dimension.
   let tileRowB = localRow * ${rowPerThreadB};
@@ -179,8 +179,9 @@ const readDataFromSubASnippet = (transposeA: boolean) =>
 // sequentialAccessByThreads means sequential data in memory is accessed by
 // threads, instead of a single thread (default behavior).
 export const makeMatMulPackedSource =
-    (workPerThread: number[], workgroupSize: [number, number, number], batchDims?: IndicesHelper, transposeA = false,
-     tileInner = 32, splitK = false, splitedDimInner = 32, sequentialAccessByThreads = false): string => {
+    (workPerThread: number[], workgroupSize: [number, number, number], type = 'f32', batchDims?: IndicesHelper,
+     transposeA = false, tileInner = 32, splitK = false, splitedDimInner = 32,
+     sequentialAccessByThreads = false): string => {
       const tileAOuter = workPerThread[1] * workgroupSize[1];
       const tileBOuter = workPerThread[0] * workgroupSize[0];
       const tileAWidth = transposeA ? tileAOuter : tileInner;
@@ -222,7 +223,7 @@ export const makeMatMulPackedSource =
       workgroupBarrier();
 
       // Compute acc values for a single thread.
-      var BCached : array<f32, colPerThread>;
+      var BCached : array<${type}, colPerThread>;
       for (var k = 0; k < tileInner; k = k + 1) {
         for (var inner = 0; inner < colPerThread; inner = inner + 1) {
           BCached[inner] = mm_Bsub[k][localCol + inner * ${workgroupSize[0]}];
@@ -283,7 +284,7 @@ for (var t = 0; t < numTiles; t = t + 1) {
   workgroupBarrier();
 
   // Compute acc values for a single thread.
-  var BCached : array<f32, colPerThread>;
+  var BCached : array<${type}, colPerThread>;
   for (var k = 0; k < tileInner; k = k + 1) {
     for (var inner = 0; inner < colPerThread; inner = inner + 1) {
       BCached[inner] = mm_Bsub[k][tileCol + inner];
@@ -309,8 +310,8 @@ for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) {
 `;
 
       return `
-  var<workgroup> mm_Asub : array<array<f32, ${tileAWidth}>, ${tileAHight}>;
-  var<workgroup> mm_Bsub : array<array<f32, ${tileBOuter}>, ${tileInner}>;
+  var<workgroup> mm_Asub : array<array<${type}, ${tileAWidth}>, ${tileAHight}>;
+  var<workgroup> mm_Bsub : array<array<${type}, ${tileBOuter}>, ${tileInner}>;
   const rowPerThread = ${workPerThread[1]};
   const colPerThread = ${workPerThread[0]};
   const tileInner = ${tileInner};
@@ -321,10 +322,10 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
         @builtin(workgroup_id) workgroupId : vec3<u32>) {
     let batch = ${splitK ? '0' : 'i32(globalId.z)'};
     ${batchDims ? `let batchIndices = ${batchDims.offsetToIndices('u32(batch)')};` : ''}
-    let numTiles = ${splitK ? `${Math.ceil(splitedDimInner / tileInner)}` : '(dimInner - 1) / tileInner + 1'};
+    let numTiles = ${splitK ? `${Math.ceil(splitedDimInner / tileInner)}` : '(uniforms.dimInner - 1) / tileInner + 1'};
     var kStart = ${splitK ? `i32(globalId.z) * ${splitedDimInner}` : '0'};
 
-    var acc : array<array<f32, colPerThread>, rowPerThread>;
+    var acc : array<array<${type}, colPerThread>, rowPerThread>;
 
     // Without this initialization strange values show up in acc.
     for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) {
@@ -338,18 +339,16 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
     };
 
 const matMulReadWriteFnSource =
-    (component: number, hasBias: boolean, applyActivation: string, variables: IndicesHelper[]): string => {
-      const batchAVariable = variables[0];
-      const batchBVariable = variables[1];
-      const batchVariable = variables[2];
-      const aVariable = variables[3];
-      const bVariable = variables[4];
-      const outputVariable = variables[5];
-      const broadCastADims = getBroadcastDims(batchAVariable.shape, batchVariable.shape);
-      const broadCastBDims = getBroadcastDims(batchBVariable.shape, batchVariable.shape);
+    (component: number, hasBias: boolean, applyActivation: string, variables: IndicesHelper[],
+     batchShapes: Array<readonly number[]>, isChannelsLast = false): string => {
+      const [batchAShape, batchBShape, batchShape] = batchShapes;
+      const [batchVariable, aVariable, bVariable, outputVariable] = variables;
+      const broadCastADims = getBroadcastDims(batchAShape, batchShape);
+      const broadCastBDims = getBroadcastDims(batchBShape, batchShape);
+      const dataType = tensorTypeToWsglStorageType(variables[0].type.tensor);
       const getAIndices = () => {
-        const aRank = aVariable.shape.length;
-        const batchRank = batchVariable.shape.length;
+        const aRank = aVariable.rank;
+        const batchRank = batchVariable.rank;
         let resStr = `var aIndices: ${aVariable.type.indices};`;
         for (let i = aRank - 2 - 1, j = batchRank - 1; i >= 0; i--, j--) {
           resStr += `\naIndices[${i}] = ${batchRank > 1 ? `batchIndices[${j}]` : 'batchIndices'};`;
@@ -362,8 +361,8 @@ const matMulReadWriteFnSource =
         return resStr;
       };
       const getBIndices = () => {
-        const bRank = bVariable.shape.length;
-        const batchRank = batchVariable.shape.length;
+        const bRank = bVariable.rank;
+        const batchRank = batchVariable.rank;
         let resStr = `var bIndices: ${bVariable.type.indices};`;
         for (let i = bRank - 2 - 1, j = batchRank - 1; i >= 0; i--, j--) {
           resStr += `\nbIndices[${i}] = ${batchRank > 1 ? `batchIndices[${j}]` : 'batchIndices'};`;
@@ -377,10 +376,10 @@ const matMulReadWriteFnSource =
       };
       const source = `
     fn mm_readA(batch: i32, row: i32, colIn: i32, batchIndices: ${batchVariable.type.indices}) -> ${
-          typeSnippet(component)} {
-      var value = ${typeSnippet(component)}(0.0);
+          typeSnippet(component, dataType)} {
+      var value = ${typeSnippet(component, dataType)}(0.0);
       let col = colIn * ${component};
-      if(row < dimAOuter && col < dimInner)
+      if(row < uniforms.dimAOuter && col < uniforms.dimInner)
       {
         ${getAIndices()}
         value = ${aVariable.getByIndices('aIndices')};
@@ -389,10 +388,10 @@ const matMulReadWriteFnSource =
     }
 
     fn mm_readB(batch: i32, row: i32, colIn: i32, batchIndices: ${batchVariable.type.indices}) -> ${
-          typeSnippet(component)} {
-      var value = ${typeSnippet(component)}(0.0);
+          typeSnippet(component, dataType)} {
+      var value = ${typeSnippet(component, dataType)}(0.0);
       let col = colIn * ${component};
-      if(row < dimInner && col < dimBOuter)
+      if(row < uniforms.dimInner && col < uniforms.dimBOuter)
       {
         ${getBIndices()}
         value = ${bVariable.getByIndices('bIndices')};
@@ -400,12 +399,15 @@ const matMulReadWriteFnSource =
       return value;
     }
 
-    fn mm_write(batch: i32, row: i32, colIn: i32, valueIn: ${typeSnippet(component)}) {
+    fn mm_write(batch: i32, row: i32, colIn: i32, valueIn: ${typeSnippet(component, dataType)}) {
       let col = colIn * ${component};
-      if (row < dimAOuter && col < dimBOuter) {
+      if (row < uniforms.dimAOuter && col < uniforms.dimBOuter) {
         var value = valueIn;
         let coords = vec3<i32>(batch, row, colIn);
-        ${hasBias ? 'value = value + bias[colIn];' : ''}
+        ${
+          hasBias ?
+              `value = value + ${isChannelsLast ? 'bias[colIn]' : `${typeSnippet(component, dataType)}(bias[row])`};` :
+                                                  ''                                    }
         ${applyActivation}
         ${outputVariable.setByIndices('vec3<u32>(coords)', 'value')}
       }
@@ -415,25 +417,25 @@ const matMulReadWriteFnSource =
     };
 
 export const createMatmulProgramInfo =
-    (metadata: ProgramMetadata, inputs: readonly TensorView[], activationAttributes: InternalActivationAttributes,
-     outputShape: readonly number[], reshapedOutputShape?: readonly number[]): ProgramInfo => {
+    (inputs: readonly TensorView[], activationAttributes: InternalActivationAttributes, outputShape: readonly number[],
+     reshapedOutputShape?: readonly number[],
+     isChannelsLast = false /* only used for conv2dByMatMul*/): ProgramInfo => {
       const aShape = inputs[0].dims;
       const bShape = inputs[1].dims;
 
       const outerDimsA = aShape.slice(0, -2);
       const outerDimsB = bShape.slice(0, -2);
+
       const outerDims = reshapedOutputShape ? reshapedOutputShape.slice(0, -2) : outputShape.slice(0, -2);
-      const batchDims = inputVariable('batchDims', inputs[0].dataType, outerDims);
-      const batchADims = inputVariable('batchADims', inputs[0].dataType, outerDimsA);
-      const batchBDims = inputVariable('batchBDims', inputs[0].dataType, outerDimsB);
-      const variables = [batchADims, batchBDims, batchDims];
+      const enableBatchUniforms = enableShapesUniforms(outerDims.length);
+      const batchShapeOrRank = enableBatchUniforms ? outerDims.length : outerDims;
+      const batchDims = internalVariable('batchDims', inputs[0].dataType, batchShapeOrRank, 1);
       const batchSize = ShapeUtil.size(outerDims);
 
       const dimAOuter = aShape[aShape.length - 2];
       const dimInner = aShape[aShape.length - 1];
       const dimBOuter = bShape[bShape.length - 1];
       const isVec4 = dimInner % 4 === 0 && dimBOuter % 4 === 0;
-      const {activationFunction, applyActivation} = getActicationSnippet(activationAttributes);
 
       // TODO: fine tune size
       const elementsPerThread = dimAOuter <= 8 ? [4, 1, 1] : [4, 4, 1];
@@ -444,35 +446,83 @@ export const createMatmulProgramInfo =
         Math.ceil(batchSize / workgroupSize[2] / elementsPerThread[2])
       ];
 
+      const dataType = tensorTypeToWsglStorageType(inputs[0].dataType);
       const components = isVec4 ? 4 : 1;
-      const A = inputVariable('a', inputs[0].dataType, [...outerDimsA, dimAOuter, dimInner / components], components);
-      const B = inputVariable('b', inputs[1].dataType, [...outerDimsB, dimInner, dimBOuter / components], components);
-      const output =
-          outputVariable('result', inputs[0].dataType, [batchSize, dimAOuter, dimBOuter / components], components);
-      variables.push(A);
-      variables.push(B);
-      variables.push(output);
+
+      const aShapeTemp = [...outerDimsA, dimAOuter, dimInner / components];
+      const enableAShapesUniforms = enableShapesUniforms(aShapeTemp.length);
+      const aShapeOrRank = enableAShapesUniforms ? aShapeTemp.length : aShapeTemp;
+
+      const bShapeTemp = [...outerDimsB, dimInner, dimBOuter / components];
+      const enableBShapesUniforms = enableShapesUniforms(bShapeTemp.length);
+      const bShapeOrRank = enableBShapesUniforms ? bShapeTemp.length : bShapeTemp;
+
+      const outputShapeTemp = [batchSize, dimAOuter, dimBOuter / components];
+
+      const A = inputVariable('a', inputs[0].dataType, aShapeOrRank, components);
+      const B = inputVariable('b', inputs[1].dataType, bShapeOrRank, components);
+      const output = outputVariable('result', inputs[0].dataType, outputShapeTemp.length, components);
       const inputVariables = [A, B];
+      const programUniforms: ProgramUniform[] =
+          [{type: 'int32', data: dimAOuter}, {type: 'int32', data: dimBOuter}, {type: 'int32', data: dimInner}];
+      if (enableBatchUniforms) {
+        programUniforms.push(...createTensorShapeVariables(outerDims));
+      }
+      if (enableAShapesUniforms) {
+        programUniforms.push(...createTensorShapeVariables(aShapeTemp));
+      }
+      if (enableBShapesUniforms) {
+        programUniforms.push(...createTensorShapeVariables(bShapeTemp));
+      }
+      const inputDependencies: ProgramInputTensorInfoDependency[] = [];
+      inputDependencies.push(enableAShapesUniforms ? 'rank' : 'dims');
+      inputDependencies.push(enableBShapesUniforms ? 'rank' : 'dims');
+
       const hasBias = inputs.length > 2;
-      const declareFunctions = matMulReadWriteFnSource(components, hasBias, applyActivation, variables);
+      const {activationFunction, applyActivation} = getActivationSnippet(activationAttributes, output.type.value);
+      const declareFunctions = matMulReadWriteFnSource(
+          components, hasBias, applyActivation, [batchDims, A, B, output], [outerDimsA, outerDimsB, outerDims],
+          isChannelsLast);
       if (hasBias) {
-        inputVariables.push(inputVariable('bias', inputs[2].dataType, [dimBOuter / components], components));
+        const biasComponents = isChannelsLast ? components : 1;
+        inputVariables.push(inputVariable('bias', inputs[2].dataType, inputs[2].dims.length, biasComponents));
+        programUniforms.push(...createTensorShapeVariables(inputs[2].dims));
+
+        inputDependencies.push('rank');
       }
+      programUniforms.push(...createTensorShapeVariables(outputShapeTemp));
+
       const getShaderSource = (shaderHelper: ShaderHelper) => `
-  const dimAOuter: i32 = ${dimAOuter};
-  const dimBOuter: i32 = ${dimBOuter};
-  const dimInner: i32 = ${dimInner};
-  ${shaderHelper.declareVariables(...inputVariables, output)}
-  ${declareFunctions}
+  ${
+          shaderHelper.registerUniform('dimAOuter', 'i32')
+              .registerUniform('dimBOuter', 'i32')
+              .registerUniform('dimInner', 'i32')
+              .registerInternalVariables(batchDims)
+              .declareVariables(...inputVariables, output)}
   ${activationFunction}
+  ${declareFunctions}
   ${
-          isVec4 ? makeMatMulPackedVec4Source(elementsPerThread, workgroupSize, batchDims) :
-                   makeMatMulPackedSource(elementsPerThread, workgroupSize, batchDims)}
-                   ${batchDims.impl()}`;
+          isVec4 ? makeMatMulPackedVec4Source(elementsPerThread, workgroupSize, dataType, batchDims) :
+                   makeMatMulPackedSource(elementsPerThread, workgroupSize, dataType, batchDims)}
+                   `;
+      // TODO: turn clipMax and clipMin to uniforms.
       return {
-        ...metadata,
-        outputs: [{dims: outputShape, dataType: inputs[0].dataType, gpuDataType: GpuDataType.default}],
+        name: 'MatMul',
+        shaderCache: {
+          hint: activationAttributes.activationCacheKey + `${elementsPerThread}` +
+              `${activationAttributes.activation}` +
+              `${activationAttributes.clipMax}` +
+              `${activationAttributes.clipMin}` +
+              `${isVec4}` +
+              `${hasBias}` +
+              `${isChannelsLast}`,
+          inputDependencies
+        },
+        getRunData: () => ({
+          outputs: [{dims: outputShape, dataType: inputs[0].dataType}],
+          dispatchGroup: {x: dispatch[0], y: dispatch[1], z: dispatch[2]},
+          programUniforms
+        }),
         getShaderSource,
-        dispatchGroup: () => ({x: dispatch[0], y: dispatch[1], z: dispatch[2]})
       };
     };
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/argminmax.ts b/js/web/lib/wasm/jsep/webgpu/ops/argminmax.ts
index 412e61a3cc0f9..b6c6853c8f222 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/argminmax.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/argminmax.ts
@@ -8,7 +8,7 @@
 import {DataType} from '../../../wasm-common';
 import {TensorView} from '../../tensor-view';
 import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key';
-import {ComputeContext, GpuDataType, ProgramInfoLoader, ProgramMetadata} from '../types';
+import {ComputeContext} from '../types';
 
 import {createReduceProgramInfo, ReduceOp} from './reduce';
 
@@ -27,31 +27,11 @@ export interface ArgMinMaxAttributes extends AttributeWithCacheKey {
   selectLastIndex: number;
 }
 
-const createArgMinMaxAttributesFromInputs =
-    (inputs: readonly TensorView[], attributes: ArgMinMaxAttributes): ArgMinMaxAttributes =>
-        createAttributeWithCacheKey(
-            {axis: attributes.axis, keepDims: attributes.keepDims, selectLastIndex: attributes.selectLastIndex});
-
-const createArgMinMaxProgramInfoLoader =
-    (inputs: readonly TensorView[], name: string, attributes: ArgMinMaxAttributes, reduceOp: ReduceOp):
-        ProgramInfoLoader => {
-          const updatedAttributes: ArgMinMaxAttributes =
-              inputs.length === 1 ? attributes : createArgMinMaxAttributesFromInputs(inputs, attributes);
-          const cacheHint = updatedAttributes.cacheKey + inputs.map(x => x.dims.toString()).join('_');
-          const metadata: ProgramMetadata = {name, inputTypes: [GpuDataType.default], cacheHint};
-          return {
-            ...metadata,
-            get: () => createReduceProgramInfo(
-                metadata, [inputs[0]], reduceOp, [updatedAttributes.axis], DataType.int64, updatedAttributes.keepDims)
-          };
-        };
-
-
 export const argMin = (context: ComputeContext, attributes: ArgMinMaxAttributes): void => {
   validateInputs(context.inputs);
   const argMinMaxOp: ReduceOp = (input, output, axes) => {
     const idxZero = [];
-    for (let k = 0; k < input.shape.length; k++) {
+    for (let k = 0; k < input.rank; k++) {
       if (axes.indexOf(k) >= 0 || axes.length === 0) {
         idxZero.push(`inputIndices[${k}] = 0;`);  // first element
       }
@@ -65,14 +45,19 @@ export const argMin = (context: ComputeContext, attributes: ArgMinMaxAttributes)
       '', output.setByOffset('global_idx', 'bestIndex')
     ];
   };
-  context.compute(createArgMinMaxProgramInfoLoader(context.inputs, 'ArgMin', attributes, argMinMaxOp), {inputs: [0]});
+
+  context.compute(
+      createReduceProgramInfo(
+          'ArgMin', {hint: attributes.cacheKey}, [context.inputs[0]], argMinMaxOp, [attributes.axis], DataType.int64,
+          attributes.keepDims),
+      {inputs: [0]});
 };
 
 export const argMax = (context: ComputeContext, attributes: ArgMinMaxAttributes): void => {
   validateInputs(context.inputs);
   const argMinMaxOp: ReduceOp = (input, output, axes) => {
     const idxZero = [];
-    for (let k = 0; k < input.shape.length; k++) {
+    for (let k = 0; k < input.rank; k++) {
       if (axes.indexOf(k) >= 0 || axes.length === 0) {
         idxZero.push(`inputIndices[${k}] = 0;`);  // first element
       }
@@ -86,7 +71,12 @@ export const argMax = (context: ComputeContext, attributes: ArgMinMaxAttributes)
       '', output.setByOffset('global_idx', 'bestIndex')
     ];
   };
-  context.compute(createArgMinMaxProgramInfoLoader(context.inputs, 'argMax', attributes, argMinMaxOp), {inputs: [0]});
+
+  context.compute(
+      createReduceProgramInfo(
+          'argMax', {hint: attributes.cacheKey}, [context.inputs[0]], argMinMaxOp, [attributes.axis], DataType.int64,
+          attributes.keepDims),
+      {inputs: [0]});
 };
 
 export const parseArgMinMaxAttributes = (attributes: Record<string, unknown>): ArgMinMaxAttributes =>
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/attention.ts b/js/web/lib/wasm/jsep/webgpu/ops/attention.ts
new file mode 100644
index 0000000000000..e1f2a47301bfb
--- /dev/null
+++ b/js/web/lib/wasm/jsep/webgpu/ops/attention.ts
@@ -0,0 +1,635 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+import {TensorView} from '../../tensor-view';
+import {createAttributeWithCacheKey} from '../attribute-with-cache-key';
+import {ComputeContext, GpuDataType} from '../types';
+
+import {castToF32, fillVector, getMaxComponents, inputVariable, outputVariable, ShaderHelper, sumVector, tensorTypeToWsglStorageType} from './common';
+
+export const enum AttentionQkvFormat {
+  unknown,          // enum value not set, or depends on qkv projection implementation details
+  qkvBNSH,          // for non-packed qkv, permuted
+  qkvBSNH,          // for non-packed qkv, not permuted, used by memory efficient attention or MultiHeadAttention
+  qkvBSN3H,         // for TRT fused attention, qkv are packed
+  qkvBNSHqkvBS3NH,  // for TRT fused causal attention, data has two formats (qkv is 3BNSH, gemm_buffer is BS3NH)
+  qKvBSNHxBSN2H,    // for TRT fused cross attention, kv are packed
+  qkvTNH,           // for memory efficient attention, qkv are not packed, and paddings are removed.
+  qkvTN3H,          // for TRT fused attention, qkv are packed and paddings are removed
+}
+
+export const enum AttentionMaskType {
+  none,                  // No mask
+  mask1dKeySeqLen,       // [batch_size], key sequence length
+  mask1dEndStart,        // [2 * batch_size] with end positions and start positions
+  mask1DKeySeqLenStart,  // [3 * batch_size + 2] with [key_len[0], ..., key_len[batch_size - 1], query_start[0],
+                         // ..., query_start[batch_size - 1], query_end[batch_size - 1], key_start[0], ...,
+                         // key_start[batch_size - 1], key_end[batch_size - 1]]
+  mask2dDummy,           // dummy mask with shape [1, 1] or [batch_size, 1]. It has same effect as no mask.
+  mask2dKeyPadding,      // [batch_size, total_sequence_length]
+  mask3dAttention,       // [batch_size, sequence_length, total_sequence_length]
+  mask4dMegatron,        // Megatron causal mask with shape [batch_size, 1, max_sequence_length, max_sequence_length]
+  maskUnknown
+}
+
+export interface AttentionParameters {
+  batchSize: number;
+  sequenceLength: number;
+  pastSequenceLength: number;
+  kvSequenceLength: number;
+  totalSequenceLength: number;
+  maxSequenceLength: number;
+  inputHiddenSize: number;
+  hiddenSize: number;
+  vHiddenSize: number;
+  headSize: number;
+  vHeadSize: number;
+  numHeads: number;
+  isUnidirectional: boolean;
+  pastPresentShareBuffer: boolean;
+  maskFilterValue: number;
+  maskType: AttentionMaskType;
+  scale: number;
+  broadcastResPosBias: boolean;
+  passPastInKv: boolean;
+  qkvFormat: AttentionQkvFormat;
+}
+
+export interface AttentionAttrs {
+  numHeads: number;
+  isUnidirectional: number;
+  maskFilterValue: number;
+  scale: number;
+  doRotary: number;
+  qkvHiddenSizes: number[];
+  pastPresentShareBuffer: boolean;
+}
+
+const validateAttentionInputs = (inputs: readonly TensorView[], attributes: AttentionAttrs): AttentionParameters => {
+  // Abbreviation and Meanings:
+  //   B:    batch_size
+  //   S:    sequence_length (input sequence length of query)
+  //   P:    past_sequence_length (past sequence length of key or value)
+  //   L:    kv_sequence_length (input sequence length of key or value)
+  //   M:    max_sequence_length
+  //   T:    total_sequence_length = past_sequence_length + kv_sequence_length
+  //   N:    num_heads
+  //   H:    head size for Q and K, aka q_head_size or k_head_size or qk_head_size
+  //   H_v:  v_head_size
+  //   D_i:  input hidden size
+  //   D:    hidden size for Q and K (D = N * H), aka q_hidden_size or k_hidden_size or qk_hidden_size
+  //   D_v:  v_hidden_size = num_heads * v_head_size
+
+  // When past state is used, Q, K and V should have same hidden size (unless we split it into past_key and past_value).
+
+  // Input shapes:
+  //   input        (Q/K/V)    : (B, S, D_i)
+  //   weights      (Q/K/V)    : (D_i, D + D + D_v)
+  //   bias         (Q/K/V)    : (D + D + D_v)
+  //   mask_index              : see below
+  //   past         (K/V)      : (2, B, N, P, H) or NULL
+  //   relative_position_bias            : (B, N, S, T) or NULL
+
+  // For mask_index, the following shapes are supported:
+  //     NULL, (B, 1), (1, 1)
+  //     (B), (2 * B), (3 * B + 2)
+  //     (B, T)
+  //     (B, S, T)
+  //     (B, 1, M, M)
+  //
+  // When a model is pruned (like some attention heads are removed in Q/K/V), input_hidden_size could be larger
+  // than hidden dimension of Q, K and V.
+
+  const input = inputs[0];
+  const weights = inputs[1];
+  const bias = inputs[2];
+  const maskIndex = inputs[3];
+  const past = inputs[4];
+  const relativePositionBias = inputs[5];
+
+  if (past && relativePositionBias) {
+    throw new Error('Attention cannot have both past and relative_position_bias');
+  }
+
+  if (input.dims.length !== 3) {
+    throw new Error('Input "input" must have 3 dimensions');
+  }
+
+  const batchSize = input.dims[0];
+  const sequenceLength = input.dims[1];
+  const inputHiddenSize = input.dims[2];
+
+  if (bias.dims.length !== 1) {
+    throw new Error('Input "bias" is expected to have 1 dimensions');
+  }
+
+  if (weights.dims.length !== 2) {
+    throw new Error('Input "weights" is expected to have 2 dimensions');
+  }
+
+  if (weights.dims[0] !== inputHiddenSize) {
+    throw new Error('Input 1 dimension 0 should have same length as dimension 2 of input 0');
+  }
+
+  if (bias.dims[0] !== weights.dims[1]) {
+    throw new Error('Input "bias" dimension 0 should have same length as dimension 1 of input "weights"');
+  }
+
+  let qHiddenSize = bias.dims[0] / 3;
+  let kHiddenSize = qHiddenSize;
+  let vHiddenSize = kHiddenSize;
+  if (attributes.qkvHiddenSizes.length > 0) {
+    if (attributes.qkvHiddenSizes.length !== 3) {
+      throw new Error('qkv_hidden_sizes attribute should have 3 elements');
+    }
+    for (const sz of attributes.qkvHiddenSizes) {
+      if (sz % attributes.numHeads !== 0) {
+        throw new Error('qkv_hidden_sizes should be divisible by num_heads');
+      }
+    }
+
+    qHiddenSize = attributes.qkvHiddenSizes[0];
+    kHiddenSize = attributes.qkvHiddenSizes[1];
+    vHiddenSize = attributes.qkvHiddenSizes[2];
+  }
+
+  const kvSequenceLength = sequenceLength;
+
+  if (qHiddenSize !== kHiddenSize) {
+    throw new Error('qkv_hidden_sizes first element should be same as the second');
+  }
+
+  if (bias.dims[0] !== qHiddenSize + kHiddenSize + vHiddenSize) {
+    throw new Error('Input "bias" dimension 0 should have same length as sum of Q/K/V hidden sizes');
+  }
+
+  let pastSequenceLength = 0;
+  if (past) {
+    if (kHiddenSize !== vHiddenSize) {
+      throw new Error('Input "past" expect k_hidden_size == v_hidden_size');
+    }
+    if (past.dims.length !== 5) {
+      throw new Error('Input "past" must have 5 dimensions');
+    }
+    if (past.dims[0] !== 2) {
+      throw new Error('Input "past" first dimension must be 2');
+    }
+    if (past.dims[1] !== batchSize) {
+      throw new Error('Input "past" second dimension must be batch_size');
+    }
+    if (past.dims[2] !== attributes.numHeads) {
+      throw new Error('Input "past" third dimension must be num_heads');
+    }
+    if (past.dims[4] !== kHiddenSize / attributes.numHeads) {
+      throw new Error('Input "past" fifth dimension must be k_hidden_size / num_heads');
+    }
+
+    if (!attributes.pastPresentShareBuffer) {
+      pastSequenceLength = past.dims[3];
+    }
+    // TODO: handle past_seq_len
+  }
+
+  const totalSequenceLength = kvSequenceLength + pastSequenceLength;
+  const maxSequenceLength = -1;
+
+  const maskType = AttentionMaskType.none;
+  if (maskIndex) {
+    // maskType = AttentionMaskType.MASK_UNKNOWN;
+    // TODO: handle mask
+    throw new Error('Mask not supported');
+  }
+
+  if (past) {
+    throw new Error('past is not supported');
+  }
+  if (relativePositionBias) {
+    throw new Error('relativePositionBias is not supported');
+  }
+
+  return {
+    batchSize,
+    sequenceLength,
+    pastSequenceLength,
+    kvSequenceLength,
+    totalSequenceLength,
+    maxSequenceLength,
+    inputHiddenSize,
+    hiddenSize: qHiddenSize,
+    vHiddenSize,
+    headSize: Math.floor(qHiddenSize / attributes.numHeads),
+    vHeadSize: Math.floor(vHiddenSize / attributes.numHeads),
+    numHeads: attributes.numHeads,
+    isUnidirectional: false,
+    pastPresentShareBuffer: false,
+    maskFilterValue: attributes.maskFilterValue,
+    maskType,
+    scale: attributes.scale,
+    broadcastResPosBias: false,
+    passPastInKv: false,
+    qkvFormat: AttentionQkvFormat.qkvBNSH,
+  };
+};
+
+export const parseAttentionAttributes = (attributes: AttentionAttrs): AttentionAttrs =>
+    createAttributeWithCacheKey({...attributes});
+
+export const computeInPlaceSoftmax = (context: ComputeContext, input: TensorView, n: number, d: number) => {
+  const components = getMaxComponents(d);
+  const inputHelper = outputVariable('x', input.dataType, input.dims, components);
+
+  let threadMaxValue = 'threadMaxVector';
+  if (components === 2) {
+    threadMaxValue = 'max(threadMaxVector.x, threadMaxVector.y)';
+  } else if (components === 4) {
+    threadMaxValue = 'max(max(threadMaxVector.x, threadMaxVector.y), max(threadMaxVector.z, threadMaxVector.w))';
+  }
+  const dataType = tensorTypeToWsglStorageType(input.dataType);
+  let WG = 64;
+  const dComp = d / components;
+  if (dComp < WG) {
+    WG = 1;
+  } else if (dComp / 8 < 64) {
+    WG = Math.ceil(dComp / 8);
+  }
+  const elementsPerWG = Math.ceil(d / components / WG);
+
+  const getShaderSource = (shaderHelper: ShaderHelper) => `
+  const dInv: ${dataType} = 1 / ${d};
+  const dComp = ${d / components};
+  var<workgroup> wgMax: array<f32, ${WG}>;
+  var<workgroup> wgSum: array<f32, ${WG}>;
+
+  ${shaderHelper.declareVariables(inputHelper)}
+  @compute @workgroup_size(${WG}, 1, 1)
+  fn main(@builtin(workgroup_id) workgroup_id : vec3<u32>,
+    @builtin(local_invocation_index) local_index : u32) {
+    let localOffset = local_index * ${elementsPerWG};
+    let offset: u32 = workgroup_id.x * dComp + localOffset;
+
+    var threadMaxVector = ${fillVector('f32', components, '-3.402823e+38f')};
+    for (var i: u32 = 0; i < ${elementsPerWG} && i + localOffset < dComp; i++) {
+      threadMaxVector = max(${castToF32(dataType, components, 'x[offset + i]')}, threadMaxVector);
+    }
+    wgMax[local_index] = ${threadMaxValue};
+    workgroupBarrier();
+
+    var maxValue = -3.402823e+38f;
+    for (var i = 0u; i < ${WG}; i++) {
+      maxValue = max(wgMax[i], maxValue);
+    }
+
+    var sumVector = ${fillVector('f32', components, '0')};
+    for (var i: u32 = 0; i < ${elementsPerWG} && i + localOffset < dComp; i++) {
+      sumVector += exp(${castToF32(dataType, components, 'x[offset + i]')} - maxValue);
+    }
+    wgSum[local_index] = ${sumVector('sumVector', components)};
+    workgroupBarrier();
+
+    var sum: f32 = 0;
+    for (var i = 0u; i < ${WG}; i++) {
+      sum += wgSum[i];
+    }
+
+    if (sum == 0) {
+      for (var i: u32 = 0; i < ${elementsPerWG} && i + localOffset < dComp; i++) {
+        x[offset + i] = ${fillVector(dataType, components, 'dInv')};
+      }
+    } else {
+      for (var i: u32 = 0; i < ${elementsPerWG} && i + localOffset < dComp; i++) {
+        let f32input = ${castToF32(dataType, components, 'x[offset + i]')};
+        x[offset + i] = ${inputHelper.type.value}(exp(f32input - maxValue) / sum);
+      }
+    }
+  }`;
+
+  context.compute(
+      {
+        name: 'AttentionProbsSoftmax',
+        shaderCache: {hint: `${d}`},
+        getShaderSource,
+        getRunData: () => ({
+          outputs: [],
+          dispatchGroup: {x: n},
+        }),
+      },
+      {inputs: [input], outputs: []});
+};
+
+const computeAttentionProbs =
+    (context: ComputeContext, q: TensorView, key: TensorView, _bias: TensorView|undefined,
+     parameters: AttentionParameters, attributes: AttentionAttrs) => {
+      const probsShape = [
+        parameters.batchSize, parameters.numHeads, parameters.sequenceLength,
+        parameters.kvSequenceLength + parameters.pastSequenceLength
+      ];
+      // TODO: handle mask
+
+      const alpha = attributes.scale === 0 ? 1.0 / Math.sqrt(parameters.headSize) : attributes.scale;
+
+      const dataType = tensorTypeToWsglStorageType(q.dataType);
+
+      const components = getMaxComponents(parameters.headSize);
+      const qInput = inputVariable('q', q.dataType, q.dims, components);
+      const kInput = inputVariable('key', key.dataType, key.dims, components);
+      const output = outputVariable('output', q.dataType, probsShape);
+
+      const vectorizedHeadSize = parameters.headSize / components;
+      const M = parameters.sequenceLength;
+      const N = parameters.totalSequenceLength;
+      const K = vectorizedHeadSize;
+
+      const TILE_SIZE = 12;
+
+      const dispatch = {
+        x: Math.ceil(parameters.totalSequenceLength / TILE_SIZE),
+        y: Math.ceil(parameters.sequenceLength / TILE_SIZE),
+        z: parameters.batchSize * parameters.numHeads
+      };
+
+      const inputs = [q, key];
+      const getShaderSource = (shaderHelper: ShaderHelper) => `
+  const M: u32 = ${M}u;
+  const N: u32 = ${N}u;
+  const K: u32 = ${K}u;
+  const alpha: ${dataType} = ${alpha};
+  const beta: ${dataType} = 1.0;
+  const TILE_SIZE = ${TILE_SIZE}u;
+
+  var<workgroup> tileQ: array<${qInput.type.storage}, ${TILE_SIZE * TILE_SIZE}>;
+  var<workgroup> tileK: array<${qInput.type.storage}, ${TILE_SIZE * TILE_SIZE}>;
+
+  ${shaderHelper.declareVariables(qInput, kInput, output)}
+
+  @compute @workgroup_size(${TILE_SIZE}, ${TILE_SIZE}, 1)
+  fn main(@builtin(workgroup_id) workgroup_id : vec3<u32>,
+   @builtin(local_invocation_id) local_id : vec3<u32>, @builtin(local_invocation_index) local_index : u32) {
+   let global_idx = (workgroup_id.z * ${dispatch.x * dispatch.y}u +
+          workgroup_id.y * ${dispatch.x}u + workgroup_id.x) * ${TILE_SIZE * TILE_SIZE}u + local_index;
+
+    // x holds the N and y holds the M
+    let headIdx = workgroup_id.z;
+    let m = workgroup_id.y * TILE_SIZE;
+    let n = workgroup_id.x * TILE_SIZE;
+    let lm = m + local_id.y;
+    let ln = n + local_id.x;
+
+    let qOffset = ${parameters.sequenceLength * vectorizedHeadSize} * headIdx + m * K;
+    let kOffset = ${parameters.kvSequenceLength * vectorizedHeadSize} * headIdx + n * K;
+
+    var value = ${fillVector(dataType, components)};
+    for (var w: u32 = 0u; w < K; w += TILE_SIZE) {
+      if (m + local_id.y < M && w + local_id.x < K) {
+        tileQ[TILE_SIZE * local_id.y + local_id.x] = q[qOffset + local_id.y * K + w + local_id.x];
+      }
+      if (n + local_id.y < N && w + local_id.x < K) {
+        tileK[TILE_SIZE * local_id.y + local_id.x] = key[kOffset + local_id.y * K + w + local_id.x];
+      }
+      workgroupBarrier();
+
+      for (var k: u32 = 0u; k<TILE_SIZE && w+k < K; k++) {
+        value += tileQ[TILE_SIZE * local_id.y + k] * tileK[TILE_SIZE * local_id.x + k];
+      }
+
+      workgroupBarrier();
+    }
+
+    let headOffset = headIdx * M * N;
+    if (lm < M && ln < N) {
+      let outputIdx = headOffset + lm * N + ln;
+      output[outputIdx] = ${sumVector('value', components)} * alpha;
+    }
+  }`;
+
+      const probs = context.compute(
+          {
+            name: 'AttentionProbs',
+            shaderCache: {hint: JSON.stringify(parameters)},
+            getRunData: () => ({
+              outputs: [{dims: probsShape, dataType: q.dataType, gpuDataType: GpuDataType.default}],
+              dispatchGroup: dispatch,
+            }),
+            getShaderSource,
+          },
+          {inputs, outputs: [-1]})[0];
+
+      computeInPlaceSoftmax(
+          context, probs, parameters.batchSize * parameters.numHeads * parameters.sequenceLength,
+          parameters.totalSequenceLength);
+
+      return probs;
+    };
+
+const computeVxAttentionScore =
+    (context: ComputeContext, probs: TensorView, v: TensorView, params: AttentionParameters) => {
+      const outputShape = [params.batchSize, params.sequenceLength, params.vHiddenSize];
+
+      const probsHelper = inputVariable('probs', probs.dataType, probs.dims);
+      const vHelper = inputVariable('v', v.dataType, v.dims);
+      const output = outputVariable('output', probs.dataType, outputShape);
+
+      const dataType = tensorTypeToWsglStorageType(probs.dataType);
+
+      const TILE_SIZE = 12;
+      const dispatch = {
+        x: Math.ceil(params.vHeadSize / TILE_SIZE),
+        y: Math.ceil(params.sequenceLength / TILE_SIZE),
+        z: params.batchSize * params.numHeads
+      };
+
+      const getShaderSource = (shaderHelper: ShaderHelper) => `
+  const M: u32 = ${params.sequenceLength}u;
+  const N: u32 = ${params.vHeadSize}u;
+  const K: u32 = ${params.totalSequenceLength}u;
+  const numHeads: u32 = ${params.numHeads}u;
+  const TILE_SIZE = ${TILE_SIZE}u;
+
+  var<workgroup> tileQ: array<${probsHelper.type.storage}, ${TILE_SIZE * TILE_SIZE}>;
+  var<workgroup> tileK: array<${probsHelper.type.storage}, ${TILE_SIZE * TILE_SIZE}>;
+
+  ${shaderHelper.declareVariables(probsHelper, vHelper, output)}
+
+  @compute @workgroup_size(${TILE_SIZE}, ${TILE_SIZE}, 1)
+  fn main(@builtin(workgroup_id) workgroup_id : vec3<u32>,
+   @builtin(local_invocation_id) local_id : vec3<u32>, @builtin(local_invocation_index) local_index : u32) {
+   let global_idx = (workgroup_id.z * ${dispatch.x * dispatch.y}u +
+          workgroup_id.y * ${dispatch.x}u + workgroup_id.x) * ${TILE_SIZE * TILE_SIZE}u + local_index;
+
+   let headIdx = workgroup_id.z;
+   let m = workgroup_id.y * TILE_SIZE + local_id.y;
+   let n = workgroup_id.x * TILE_SIZE + local_id.x;
+
+   let offsetA = headIdx * (M * K) + m * K;
+   let offsetB = headIdx * (N * K) + n;
+
+   var value = ${dataType}(0);
+   for (var w: u32 = 0u; w < K; w += TILE_SIZE) {
+     if (m < M && w + local_id.x < K) {
+       tileQ[TILE_SIZE * local_id.y + local_id.x] = probs[offsetA + w + local_id.x];
+     }
+     if (n < N && w + local_id.y < K) {
+       tileK[TILE_SIZE * local_id.y + local_id.x] = v[offsetB + (w + local_id.y) * N];
+     }
+     workgroupBarrier();
+     for (var k: u32 = 0u; k<TILE_SIZE && w+k < K; k++) {
+       value += tileQ[TILE_SIZE * local_id.y + k] * tileK[TILE_SIZE * k + local_id.x];
+     }
+     workgroupBarrier();
+   }
+
+   // we need to transpose output from BNSH_v to BSND_v
+   let batchIdx = workgroup_id.z / ${params.numHeads};
+   let currentBatchHeadNumber = workgroup_id.z % ${params.numHeads};
+   let headOffset = (batchIdx * M * ${params.numHeads} + currentBatchHeadNumber) * ${params.vHeadSize};
+   if (m < M && n < N) {
+     let outputIdx = batchIdx * ${params.sequenceLength * params.vHiddenSize} + m * ${params.vHiddenSize}
+       + currentBatchHeadNumber * ${params.vHeadSize} + n;
+     output[outputIdx] = value;
+   }
+  }`;
+
+      return context.compute(
+          {
+            name: 'AttentionScore',
+            shaderCache: {hint: JSON.stringify(params)},
+            getRunData: () => ({
+              outputs: [{dims: outputShape, dataType: probs.dataType, gpuDataType: GpuDataType.default}],
+              dispatchGroup: dispatch,
+            }),
+            getShaderSource,
+          },
+          {inputs: [probs, v], outputs: [0]})[0];
+    };
+
+export const applyAttention =
+    (context: ComputeContext, q: TensorView, k: TensorView, v: TensorView, _maskIndex: TensorView|undefined,
+     _past: TensorView|undefined, _pastKey: TensorView|undefined, _pastValue: TensorView|undefined,
+     relativePositionBias: TensorView|undefined, parameters: AttentionParameters, attributes: AttentionAttrs) => {
+      const probs = computeAttentionProbs(context, q, k, relativePositionBias, parameters, attributes);
+
+      computeVxAttentionScore(context, probs, v, parameters);
+    };
+
+const prepare = (context: ComputeContext, parameters: AttentionParameters) => {
+  const outputShape = [
+    parameters.batchSize,
+    parameters.numHeads,
+    parameters.sequenceLength,
+    parameters.headSize,
+  ];
+
+  const dataType = tensorTypeToWsglStorageType(context.inputs[0].dataType);
+
+  const M = parameters.sequenceLength;
+  const K = parameters.inputHiddenSize;
+  const N = parameters.headSize;
+
+  const TILE_SIZE = 12;
+  const dispatch = {
+    x: Math.ceil(parameters.headSize / TILE_SIZE),
+    y: Math.ceil(parameters.sequenceLength / TILE_SIZE),
+    z: parameters.batchSize * parameters.numHeads
+  };
+
+  const getShaderSource = () => `
+  const M: u32 = ${M}u;
+  const K: u32 = ${K}u;
+  const N: u32 = ${N}u;
+  const numHeads: u32 = ${parameters.numHeads};
+  const ldb = ${parameters.hiddenSize + parameters.hiddenSize + parameters.vHiddenSize}u;
+  const TILE_SIZE = ${TILE_SIZE}u;
+
+  var<workgroup> tileInput: array<${dataType}, ${TILE_SIZE * TILE_SIZE}>;
+  var<workgroup> tileWeightQ: array<${dataType}, ${TILE_SIZE * TILE_SIZE}>;
+  var<workgroup> tileWeightK: array<${dataType}, ${TILE_SIZE * TILE_SIZE}>;
+  var<workgroup> tileWeightV: array<${dataType}, ${TILE_SIZE * TILE_SIZE}>;
+
+  @group(0) @binding(0) var<storage, read> input: array<${dataType}>;
+  @group(0) @binding(1) var<storage, read> weight: array<${dataType}>;
+  @group(0) @binding(2) var<storage, read> bias: array<${dataType}>;
+  @group(0) @binding(3) var<storage, read_write> outputQ: array<${dataType}>;
+  @group(0) @binding(4) var<storage, read_write> outputK: array<${dataType}>;
+  @group(0) @binding(5) var<storage, read_write> outputV: array<${dataType}>;
+
+  @compute @workgroup_size(${TILE_SIZE}, ${TILE_SIZE}, 1)
+  fn main(@builtin(workgroup_id) workgroup_id : vec3<u32>,
+   @builtin(local_invocation_id) local_id : vec3<u32>, @builtin(local_invocation_index) local_index : u32) {
+   let global_idx = (workgroup_id.z * ${dispatch.x * dispatch.y}u +
+          workgroup_id.y * ${dispatch.x}u + workgroup_id.x) * ${TILE_SIZE * TILE_SIZE}u + local_index;
+
+    let batchIndex = workgroup_id.z / ${parameters.numHeads};
+    let headNumber = workgroup_id.z % ${parameters.numHeads};
+    let m = workgroup_id.y * TILE_SIZE + local_id.y;
+    let n = workgroup_id.x * TILE_SIZE + local_id.x;
+
+    let inputOffset = batchIndex * (M * K) + m * K;
+    let biasOffsetQ = headNumber * ${parameters.headSize};
+    let biasOffsetK = ${parameters.hiddenSize} + biasOffsetQ;
+    let biasOffsetV = ${parameters.hiddenSize} + biasOffsetK;
+
+    var valueQ = ${dataType}(0);
+    var valueK = ${dataType}(0);
+    var valueV = ${dataType}(0);
+    for (var w: u32 = 0u; w < K; w += TILE_SIZE) {
+      if (m < M && w + local_id.x < K) {
+        tileInput[TILE_SIZE * local_id.y + local_id.x] = input[inputOffset + w + local_id.x];
+      }
+      if (n < N && w + local_id.y < K) {
+        let offset = n + (w + local_id.y) * ldb;
+        tileWeightQ[TILE_SIZE * local_id.y + local_id.x] = weight[biasOffsetQ + offset];
+        tileWeightK[TILE_SIZE * local_id.y + local_id.x] = weight[biasOffsetK + offset];
+        tileWeightV[TILE_SIZE * local_id.y + local_id.x] = weight[biasOffsetV + offset];
+      }
+      workgroupBarrier();
+      for (var k: u32 = 0u; k<TILE_SIZE && w+k < K; k++) {
+        let inputTileOffset = TILE_SIZE * local_id.y + k;
+        let weightTileOffset = TILE_SIZE * k + local_id.x;
+        valueQ += tileInput[inputTileOffset] * tileWeightQ[weightTileOffset];
+        valueK += tileInput[inputTileOffset] * tileWeightK[weightTileOffset];
+        valueV += tileInput[inputTileOffset] * tileWeightV[weightTileOffset];
+      }
+
+      workgroupBarrier();
+    }
+
+    let headOffset = (m * N + n) % ${parameters.headSize};
+    valueQ += bias[headOffset + biasOffsetQ];
+    valueK += bias[headOffset + biasOffsetK];
+    valueV += bias[headOffset + biasOffsetV];
+
+    let offset = workgroup_id.z * M * N;
+    if (m < M && n < N) {
+      let outputIdx = offset + m * N + n;
+      outputQ[outputIdx] = valueQ;
+      outputK[outputIdx] = valueK;
+      outputV[outputIdx] = valueV;
+    }
+  }`;
+
+  const inputs = [context.inputs[0], context.inputs[1], context.inputs[2]];
+
+  return context.compute(
+      {
+        name: 'AttentionPrepare',
+        shaderCache: {hint: JSON.stringify(parameters)},
+        getRunData: () => ({
+          outputs: [
+            {dims: outputShape, dataType: context.inputs[0].dataType, gpuDataType: GpuDataType.default},
+            {dims: outputShape, dataType: context.inputs[0].dataType, gpuDataType: GpuDataType.default},
+            {dims: outputShape, dataType: context.inputs[0].dataType, gpuDataType: GpuDataType.default},
+          ],
+          dispatchGroup: dispatch,
+        }),
+        getShaderSource,
+      },
+      {inputs, outputs: [-1, -1, -1]});
+};
+
+export const attention = (context: ComputeContext, attributes: AttentionAttrs): void => {
+  const params = validateAttentionInputs(context.inputs, attributes);
+
+  const [q, k, v] = prepare(context, params);
+
+  return applyAttention(
+      context, q, k, v, context.inputs[4], undefined, undefined, undefined, context.inputs[5], params, attributes);
+};
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/batch-norm.ts b/js/web/lib/wasm/jsep/webgpu/ops/batch-norm.ts
new file mode 100644
index 0000000000000..ec9da2613f406
--- /dev/null
+++ b/js/web/lib/wasm/jsep/webgpu/ops/batch-norm.ts
@@ -0,0 +1,150 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+import {env} from 'onnxruntime-common';
+
+import {TensorView} from '../../tensor-view';
+import {ShapeUtil} from '../../util';
+import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key';
+import {ComputeContext, ProgramInfo} from '../types';
+
+import {createTensorShapeVariables, enableShapesUniforms, getMaxComponents, inputVariable, outputVariable, ShaderHelper} from './common';
+
+export interface BatchNormAttributes extends AttributeWithCacheKey {
+  readonly epsilon: number;
+  readonly momentum: number;
+  readonly spatial: boolean;
+  readonly trainingMode: boolean;
+  readonly format: 'NHWC'|'NCHW';
+  readonly outputCount: number;
+}
+
+const validateInputs = (inputs: readonly TensorView[], attributes: BatchNormAttributes): void => {
+  if (!inputs || inputs.length !== 5) {
+    throw new Error('BatchNormalization requires 5 inputs');
+  }
+
+  const checkShapeEqual = (actual: readonly number[], expected: readonly number[], message: string) => {
+    const r = expected.length;
+    if (r !== actual.length) {
+      throw new Error(`${message}: num dimensions != ${r}`);
+    }
+    expected.forEach((v, i) => {
+      if (v !== actual[i]) {
+        throw new Error(`${message}: dim[${i}] do not match`);
+      }
+    });
+  };
+
+  if (inputs[0].dims.length > 1) {
+    const shape = attributes.format === 'NHWC' ?
+        (attributes.spatial ? inputs[0].dims.slice(-1) :
+                              inputs[0].dims.slice(-1).concat(inputs[0].dims.slice(1, inputs[0].dims.length - 1))) :
+        inputs[0].dims.slice(1, attributes.spatial ? 2 : undefined);
+    checkShapeEqual(inputs[1].dims, shape, 'Invalid input scale');
+    checkShapeEqual(inputs[2].dims, shape, 'Invalid input B');
+    checkShapeEqual(inputs[3].dims, shape, 'Invalid input mean');
+    checkShapeEqual(inputs[4].dims, shape, 'Invalid input var');
+  } else {
+    checkShapeEqual(inputs[1].dims, [1], 'Invalid input scale');
+    checkShapeEqual(inputs[2].dims, [1], 'Invalid input B');
+    checkShapeEqual(inputs[3].dims, [1], 'Invalid input mean');
+    checkShapeEqual(inputs[4].dims, [1], 'Invalid input var');
+  }
+};
+
+const createBatchNormInferenceProgramInfo =
+    (inputs: readonly TensorView[], attributes: BatchNormAttributes): ProgramInfo => {
+      const {epsilon, spatial, format} = attributes;
+      const yShape = inputs[0].dims;
+      const components = spatial ? getMaxComponents(yShape[yShape.length - 1]) : 1;
+      const cComponents = format === 'NHWC' && yShape.length > 1 ? components : 1;
+      const outputSize = ShapeUtil.size(yShape) / components;
+      // Only support uniforms for opset version >= 9 (spatial = true).
+      const useShapesUniforms = enableShapesUniforms(yShape.length) && spatial;
+      const shapeOrRank = useShapesUniforms ? yShape.length : yShape;
+      const x = inputVariable('x', inputs[0].dataType, inputs[0].dims, components);
+      const scale = inputVariable('scale', inputs[1].dataType, inputs[1].dims, cComponents);
+      const bias = inputVariable('bias', inputs[2].dataType, inputs[2].dims, cComponents);
+      const inputMean = inputVariable('inputMean', inputs[3].dataType, inputs[3].dims, cComponents);
+      const inputVar = inputVariable('inputVar', inputs[4].dataType, inputs[4].dims, cComponents);
+      const y = outputVariable('y', inputs[0].dataType, shapeOrRank, components);
+      // TODO: support inputs with different data type. Current we need to make sure all inputs have the same data type.
+      // Otherwise, the shader compilation will fail.
+      const calcCOffset = (): string => {
+        let cOffset = '';
+        if (spatial) {
+          cOffset = `let cOffset = ${
+              yShape.length === 1   ? '0u' :
+                  format === 'NHWC' ? `outputIndices[${yShape.length - 1}] / ${components}` :
+                                      'outputIndices[1]'};`;
+        } else {
+          if (format === 'NCHW') {
+            cOffset = `
+            ${y.indicesSet('outputIndices', '0', '0')}
+            let cOffset = ${y.indicesToOffset('outputIndices')};`;
+          } else {
+            // update C channel.
+            cOffset = `var cIndices = ${scale.type.indices}(0);
+                       cIndices[0] = outputIndices[${yShape.length - 1}];`;
+            // update D1 x ... x Dn channels.
+            for (let i = 1; i < scale.rank; i++) {
+              cOffset += `cIndices[${i}] = outputIndices[${i}];`;
+            }
+            cOffset += `let cOffset = ${scale.indicesToOffset('cIndices')};`;
+          }
+        }
+        return cOffset;
+      };
+      const getInferenceModeShaderSource = (helper: ShaderHelper) => `
+  const epsilon = ${epsilon};
+  ${helper.registerUniform('outputSize', 'u32').declareVariables(x, scale, bias, inputMean, inputVar, y)}
+  ${helper.mainStart()}
+  ${helper.guardAgainstOutOfBoundsWorkgroupSizes('uniforms.outputSize')}
+    var outputIndices = ${y.offsetToIndices(`global_idx * ${components}`)};
+    ${calcCOffset()}
+    let scale = ${scale.getByOffset('cOffset')};
+    let bias = ${bias.getByOffset('cOffset')};
+    let inputMean = ${inputMean.getByOffset('cOffset')};
+    let inputVar = ${inputVar.getByOffset('cOffset')};
+    let x = ${x.getByOffset('global_idx')};
+    let value = (x - inputMean) / sqrt(inputVar + epsilon) * scale + bias;
+    ${y.setByOffset('global_idx', 'value')}
+  }`;
+      return {
+        name: 'BatchNormalization',
+        shaderCache: {
+          hint: `${attributes.epsilon}_${attributes.format}_${spatial}_${components}`,
+          inputDependencies: useShapesUniforms ? ['rank', 'type', 'type', 'type', 'type'] : undefined,
+        },
+        getShaderSource: getInferenceModeShaderSource,
+        getRunData: () => ({
+          outputs: [{dims: inputs[0].dims, dataType: inputs[0].dataType}],
+          dispatchGroup: {x: Math.ceil(outputSize / 64 /* workgroup size */)},
+          programUniforms: useShapesUniforms ?
+              [
+                {type: 'uint32', data: outputSize},
+                ...createTensorShapeVariables(yShape),
+              ] :
+              [
+                {type: 'uint32', data: outputSize},
+              ],
+        }),
+      };
+    };
+
+export const parseBatchNormAttributes = (attributes: Record<string, unknown>): BatchNormAttributes =>
+    createAttributeWithCacheKey(attributes as Omit<BatchNormAttributes, keyof AttributeWithCacheKey>);
+
+export const batchNorm = (context: ComputeContext, attributes: Record<string, unknown>): void => {
+  const {inputs, outputCount} = context;
+  const updatedAttributes = parseBatchNormAttributes({...attributes, outputCount});
+  if (env.webgpu.validateInputContent) {
+    validateInputs(inputs, updatedAttributes);
+  }
+  if (attributes.trainingMode) {
+    throw new Error('BatchNormalization trainingMode is not supported yet.');
+  } else {
+    context.compute(createBatchNormInferenceProgramInfo(inputs, updatedAttributes));
+  }
+};
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/bias-add.ts b/js/web/lib/wasm/jsep/webgpu/ops/bias-add.ts
new file mode 100644
index 0000000000000..e2b8412000ef9
--- /dev/null
+++ b/js/web/lib/wasm/jsep/webgpu/ops/bias-add.ts
@@ -0,0 +1,65 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+import {TensorView} from '../../tensor-view';
+import {ShapeUtil} from '../../util';
+import {ComputeContext, ProgramInfo} from '../types';
+
+import {inputVariable, outputVariable, ShaderHelper} from './common';
+
+const validateInputs = (inputs: readonly TensorView[]): void => {
+  if (inputs[0].dims.length !== 3) {
+    throw new Error('input should have 3 dimensions');
+  }
+
+  if (![320, 640, 1280].includes(inputs[0].dims[2])) {
+    throw new Error('number of channels should be 320, 640 or 1280');
+  }
+
+  if (inputs[1].dims.length !== 1) {
+    throw new Error('bias is expected to have 1 dimensions');
+  }
+
+  if (inputs[0].dims[2] !== inputs[1].dims[0]) {
+    throw new Error('last dimension of input and bias are not the same');
+  }
+};
+
+const createBiasAddProgramInfo = (inputs: readonly TensorView[]): ProgramInfo => {
+  const outputShape = inputs[0].dims;
+
+  const channels = inputs[0].dims[2];
+  // since channel number can be only 320/640/1280, it's always divisable by 4
+  const outputSize = ShapeUtil.size(outputShape) / 4;
+
+  const dataType = inputs[0].dataType;
+  const input = inputVariable('input', dataType, outputShape, 4);
+  const bias = inputVariable('bias', dataType, [channels], 4);
+  const residual = inputVariable('residual', dataType, outputShape, 4);
+  const output = outputVariable('output', dataType, outputShape, 4);
+
+  const getShaderSource = (shaderHelper: ShaderHelper) => `
+  const channels = ${channels}u / 4;
+  ${shaderHelper.declareVariables(input, bias, residual, output)}
+
+  ${shaderHelper.mainStart()}
+    ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes(outputSize)}
+    let value = ${input.getByOffset('global_idx')}
+      + ${bias.getByOffset('global_idx % channels')} + ${residual.getByOffset('global_idx')};
+    ${output.setByOffset('global_idx', 'value')}
+  }`;
+
+  return {
+    name: 'BiasAdd',
+    getRunData: () => ({
+      outputs: [{dims: outputShape, dataType: inputs[0].dataType}],
+      dispatchGroup: {x: Math.ceil(outputSize / 64 /* workgroup size */)}
+    }),
+    getShaderSource,
+  };
+};
+
+export const biasAdd = (context: ComputeContext): void => {
+  validateInputs(context.inputs);
+  context.compute(createBiasAddProgramInfo(context.inputs));
+};
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/bias-split-gelu.ts b/js/web/lib/wasm/jsep/webgpu/ops/bias-split-gelu.ts
new file mode 100644
index 0000000000000..a81a7a8f1df5c
--- /dev/null
+++ b/js/web/lib/wasm/jsep/webgpu/ops/bias-split-gelu.ts
@@ -0,0 +1,73 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+import {TensorView} from '../../tensor-view';
+import {ShapeUtil} from '../../util';
+import {ComputeContext, ProgramInfo} from '../types';
+
+import {inputVariable, outputVariable, ShaderHelper, tensorTypeToWsglStorageType} from './common';
+import {erfImpl} from './unary-op';
+
+const validateInputs = (inputs: readonly TensorView[]): void => {
+  if (inputs[0].dims.length !== 3) {
+    throw new Error('input should have 3 dimensions');
+  }
+
+  if (![2560, 5120, 10240].includes(inputs[0].dims[2])) {
+    throw new Error('hidden state should be 2560, 5120 or 10240');
+  }
+
+  if (inputs[1].dims.length !== 1) {
+    throw new Error('bias is expected to have 1 dimensions');
+  }
+
+  if (inputs[0].dims[2] !== inputs[1].dims[0]) {
+    throw new Error('last dimension of input and bias are not the same');
+  }
+};
+
+const createBiasSplitGeluProgramInfo = (inputs: readonly TensorView[]): ProgramInfo => {
+  const outputShape = inputs[0].dims.slice();
+  outputShape[2] = outputShape[2] / 2;
+
+  const input = inputVariable('input', inputs[0].dataType, inputs[0].dims, 4);
+  const bias = inputVariable('bias', inputs[0].dataType, [inputs[0].dims[2]], 4);
+  const output = outputVariable('output', inputs[0].dataType, outputShape, 4);
+
+  const outputSize = ShapeUtil.size(outputShape) / 4;
+  const dataType = tensorTypeToWsglStorageType(inputs[0].dataType);
+
+  const getShaderSource = (shaderHelper: ShaderHelper) => `
+  const M_SQRT2 = sqrt(2.0);
+  const halfChannels = ${inputs[0].dims[2] / 4 / 2}u;
+
+  ${shaderHelper.declareVariables(input, bias, output)}
+
+  ${erfImpl(`vec4<${dataType}>`, dataType)}
+
+  ${shaderHelper.mainStart()}
+    ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes(outputSize)}
+    let biasIdx = global_idx % halfChannels;
+    let batchIndex = global_idx / halfChannels;
+    let inputOffset = biasIdx + batchIndex * halfChannels * 2;
+    let valueLeft = input[inputOffset] + bias[biasIdx];
+    let valueRight = input[inputOffset + halfChannels] + bias[biasIdx + halfChannels];
+    let geluRight = valueRight * 0.5 * (erf_vf32(valueRight / M_SQRT2) + 1);
+
+    ${output.setByOffset('global_idx', 'valueLeft * geluRight')}
+  }`;
+
+  return {
+    name: 'BiasSplitGelu',
+    getRunData: () => ({
+      outputs: [{dims: outputShape, dataType: inputs[0].dataType}],
+      dispatchGroup: {x: Math.ceil(outputSize / 64 /* workgroup size */)}
+    }),
+    getShaderSource,
+  };
+};
+
+export const biasSplitGelu = (context: ComputeContext): void => {
+  validateInputs(context.inputs);
+  context.compute(createBiasSplitGeluProgramInfo(context.inputs));
+};
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/binary-op.ts b/js/web/lib/wasm/jsep/webgpu/ops/binary-op.ts
index 9c05080f7e118..c033c0ba05356 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/binary-op.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/binary-op.ts
@@ -4,9 +4,9 @@
 import {DataType} from '../../../wasm-common';
 import {TensorView} from '../../tensor-view';
 import {BroadcastUtil, ShapeUtil} from '../../util';
-import {ComputeContext, GpuDataType, ProgramInfo, ProgramInfoLoader, ProgramMetadata} from '../types';
+import {ComputeContext, ProgramInfo} from '../types';
 
-import {inputVariable, outputVariable, ShaderHelper} from './common';
+import {createTensorShapeVariables, enableShapesUniforms, inputVariable, outputVariable, ShaderHelper} from './common';
 
 type BuiltinFunctionName = string;
 type BinaryCustomExpression = (expressionA: string, expressionB: string) => string;
@@ -17,11 +17,9 @@ type BinaryFunctionCall = BuiltinFunctionName|BinaryCustomExpression|{
 
 const createBinaryOpProgramShader =
     (shaderHelper: ShaderHelper, dimsA: readonly number[], dimsB: readonly number[], dimsOutput: readonly number[],
-     vectorize: boolean, doBroadcast: boolean, funcCall: BinaryFunctionCall, typeA: number, typeB: number,
-     typeOutput: number, additionalImplementation?: string) => {
-      const outputSize = ShapeUtil.size(dimsOutput);
-      const vecSize = Math.ceil(outputSize / 4);
-
+     vectorize: boolean, doBroadcast: boolean, sharedDimensionDivisibleBy4: boolean, funcCall: BinaryFunctionCall,
+     typeA: number, typeB: number, typeOutput: number, useShapesUniforms: boolean,
+     additionalImplementation?: string) => {
       let expressionScalar: BinaryCustomExpression;
       let expressionVector: BinaryCustomExpression;
       if (typeof funcCall === 'string') {
@@ -33,37 +31,20 @@ const createBinaryOpProgramShader =
         expressionVector = funcCall.vector;
       }
 
-      let broadcastImpl = '';
-      const output = outputVariable('outputData', typeOutput, dimsOutput, 4);
-      const a = inputVariable('aData', typeA, dimsA, 4);
-      const b = inputVariable('bData', typeB, dimsB, 4);
-      if (doBroadcast) {
-        const calcOffsetImpl = (dims: readonly number[]) => {
-          const strides = ShapeUtil.computeStrides(dims);
-          const offsets: string[] = [];
-          for (let i = dims.length - 1; i >= 0; i--) {
-            const idx = output.indicesGet('outputIndices', i + dimsOutput.length - dims.length);
-            offsets.push(`${strides[i]}u * (${idx} % ${dims[i]}u)`);
-          }
-          return offsets.length > 0 ? offsets.join('+') : '0u';
-        };
-
-        broadcastImpl = `
-          fn calcOffsetA(outputIndices: ${output.type.indices}) -> u32 {
-            return ${calcOffsetImpl(dimsA)};
-          }
-
-          fn calcOffsetB(outputIndices: ${output.type.indices}) -> u32 {
-            return ${calcOffsetImpl(dimsB)};
-          }
-        `;
-      }
+      const inputAShapeOrRank = useShapesUniforms ? dimsA.length : dimsA;
+      const inputBShapeOrRank = useShapesUniforms ? dimsB.length : dimsB;
+      const outputShapeOrRank = useShapesUniforms ? dimsOutput.length : dimsOutput;
+      const output = outputVariable('outputData', typeOutput, outputShapeOrRank, 4);
+      const a = inputVariable('aData', typeA, inputAShapeOrRank, 4);
+      const b = inputVariable('bData', typeB, inputBShapeOrRank, 4);
 
       let assignment: string;
       if (vectorize) {
         if (doBroadcast) {
           const isAOneElement = ShapeUtil.size(dimsA) === 1;
           const isBOneElement = ShapeUtil.size(dimsB) === 1;
+          const aLastDimDivisibleBy4 = dimsA.length > 0 && dimsA[dimsA.length - 1] % 4 === 0;
+          const bLastDimDivisibleBy4 = dimsB.length > 0 && dimsB[dimsB.length - 1] % 4 === 0;
           if (isAOneElement || isBOneElement) {
             assignment = output.setByOffset(
                 'global_idx',
@@ -73,11 +54,18 @@ const createBinaryOpProgramShader =
           } else {
             assignment = `
             let outputIndices = ${output.offsetToIndices('global_idx * 4u')};
-            let offsetA = calcOffsetA(outputIndices);
-            let offsetB = calcOffsetB(outputIndices);
+            let offsetA = ${a.broadcastedIndicesToOffset('outputIndices', output)};
+            let offsetB = ${b.broadcastedIndicesToOffset('outputIndices', output)};
             ${
                 output.setByOffset(
-                    'global_idx', expressionVector(a.getByOffset('offsetA / 4u'), b.getByOffset('offsetB / 4u')))}
+                    'global_idx',
+                    expressionVector(
+                        sharedDimensionDivisibleBy4 || aLastDimDivisibleBy4 ?
+                            a.getByOffset('offsetA / 4u') :
+                            `${a.type.value}(${a.getByOffset('offsetA / 4u')}[offsetA % 4u])`,
+                        sharedDimensionDivisibleBy4 || bLastDimDivisibleBy4 ?
+                            b.getByOffset('offsetB / 4u') :
+                            `${b.type.value}(${b.getByOffset('offsetB / 4u')}[offsetB % 4u])`))}
           `;
           }
         } else {
@@ -94,8 +82,8 @@ const createBinaryOpProgramShader =
           const expressionB = `bData[indexB${x}][componentB${x}]`;
           return `
             let outputIndices${x} = ${output.offsetToIndices(`global_idx * 4u + ${x}u`)};
-            let offsetA${x} = calcOffsetA(outputIndices${x});
-            let offsetB${x} = calcOffsetB(outputIndices${x});
+            let offsetA${x} = ${a.broadcastedIndicesToOffset(`outputIndices${x}`, output)};
+            let offsetB${x} = ${b.broadcastedIndicesToOffset(`outputIndices${x}`, output)};
             let indexA${x} = offsetA${x} / 4u;
             let indexB${x} = offsetB${x} / 4u;
             let componentA${x} = offsetA${x} % 4u;
@@ -122,28 +110,28 @@ const createBinaryOpProgramShader =
       }
 
       return `
-        ${shaderHelper.declareVariables(a, b, output)}
+        ${shaderHelper.registerUniform('vec_size', 'u32').declareVariables(a, b, output)}
 
         ${additionalImplementation ?? ''}
-        ${broadcastImpl}
 
         ${shaderHelper.mainStart()}
-        ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes(vecSize)}
+        ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes('uniforms.vec_size')}
         ${assignment}
       }`;
     };
 
 const createBinaryOpProgramInfo =
-    (metadata: ProgramMetadata, a: TensorView, b: TensorView, funcCall: BinaryFunctionCall,
+    (name: string, cacheKey: string, a: TensorView, b: TensorView, funcCall: BinaryFunctionCall,
      additionalImplementation?: string, outputDataType: number = a.dataType): ProgramInfo => {
       const isBroadcast = !ShapeUtil.areEqual(a.dims, b.dims);
       let outputShape = a.dims;
       let outputSize = ShapeUtil.size(a.dims);
 
       let vectorize = false;
+      let sharedDimensionDivisibleBy4 = false;
 
       // TODO: deal with zero-sized tensors (eg. dims=[1,0])
-
+      const cacheKeyAux = [isBroadcast];
       if (isBroadcast) {
         const calculatedShape = BroadcastUtil.calcShape(a.dims, b.dims, false);
         if (!calculatedShape) {
@@ -153,7 +141,12 @@ const createBinaryOpProgramInfo =
         outputSize = ShapeUtil.size(outputShape);
         const isAOneElement = ShapeUtil.size(a.dims) === 1;
         const isBOneElement = ShapeUtil.size(b.dims) === 1;
-
+        const aLastDimDivisibleBy4 = a.dims.length > 0 && a.dims[a.dims.length - 1] % 4 === 0;
+        const bLastDimDivisibleBy4 = b.dims.length > 0 && b.dims[b.dims.length - 1] % 4 === 0;
+        cacheKeyAux.push(isAOneElement);
+        cacheKeyAux.push(isBOneElement);
+        cacheKeyAux.push(aLastDimDivisibleBy4);
+        cacheKeyAux.push(bLastDimDivisibleBy4);
         // check whether vectorize can be enabled
         let sharedDimension = 1;
         for (let i = 1; i < outputShape.length; i++) {
@@ -165,60 +158,76 @@ const createBinaryOpProgramInfo =
             break;
           }
         }
-        if (sharedDimension % 4 === 0 || isAOneElement || isBOneElement) {
+        if (sharedDimension % 4 === 0) {
+          sharedDimensionDivisibleBy4 = true;
+          vectorize = true;
+        } else if (isAOneElement || isBOneElement || aLastDimDivisibleBy4 || bLastDimDivisibleBy4) {
           vectorize = true;
         }
       } else {
         // element-wise
         vectorize = true;
       }
-
+      cacheKeyAux.push(vectorize);
+      const useShapesUniforms = enableShapesUniforms(a.dims.length) && enableShapesUniforms(b.dims.length) &&
+          enableShapesUniforms(outputShape.length);
       return {
-        ...metadata,
+        name,
+        shaderCache: {
+          hint: cacheKey + cacheKeyAux.map((x) => x.toString()).join('_'),
+          inputDependencies: useShapesUniforms ? ['rank', 'rank'] : ['dims', 'dims'],
+        },
         getShaderSource: (shaderHelper) => createBinaryOpProgramShader(
-            shaderHelper, a.dims, b.dims, outputShape, vectorize, isBroadcast, funcCall, a.dataType, b.dataType,
-            outputDataType, additionalImplementation),
-        outputs: [{dims: outputShape, dataType: outputDataType, gpuDataType: GpuDataType.default}],
-        dispatchGroup: () => ({x: Math.ceil(outputSize / 64 /* workgroup size */ / 4 /* component size */)})
+            shaderHelper, a.dims, b.dims, outputShape, vectorize, isBroadcast, sharedDimensionDivisibleBy4, funcCall,
+            a.dataType, b.dataType, outputDataType, useShapesUniforms, additionalImplementation),
+        getRunData: () => ({
+          outputs: [{dims: outputShape, dataType: outputDataType}],
+          dispatchGroup: {x: Math.ceil(outputSize / 64 /* workgroup size */ / 4 /* component size */)},
+          programUniforms: useShapesUniforms ?
+              [
+                {type: 'uint32', data: Math.ceil(ShapeUtil.size(outputShape) / 4)},
+                ...createTensorShapeVariables(a.dims),
+                ...createTensorShapeVariables(b.dims),
+                ...createTensorShapeVariables(outputShape),
+              ] :
+              [
+                {type: 'uint32', data: Math.ceil(ShapeUtil.size(outputShape) / 4)},
+              ],
+        }),
       };
     };
 
-const createBinaryOpProgramInfoLoader =
-    (inputs: readonly TensorView[], name: string, funcCall: BinaryFunctionCall, additionalImplementation?: string,
-     cacheKey?: string, outputDataType?: number): ProgramInfoLoader => {
-      const metadata:
-          ProgramMetadata = {name, inputTypes: [GpuDataType.default, GpuDataType.default], cacheHint: cacheKey};
-      return {
-        ...metadata,
-        get: () => createBinaryOpProgramInfo(
-            metadata, inputs[0], inputs[1], funcCall, additionalImplementation, outputDataType)
-      };
+const runBinaryOp =
+    (context: ComputeContext, name: string, funcCall: BinaryFunctionCall, additionalImplementation?: string,
+     cacheKey?: string, outputDataType?: number): void => {
+      context.compute(createBinaryOpProgramInfo(
+          name, cacheKey ?? '', context.inputs[0], context.inputs[1], funcCall, additionalImplementation,
+          outputDataType));
     };
 
 export const add = (context: ComputeContext): void => {
-  context.compute(createBinaryOpProgramInfoLoader(context.inputs, 'Add', (a, b) => `${a}+${b}`));
+  runBinaryOp(context, 'Add', (a, b) => `${a}+${b}`);
 };
 
 export const div = (context: ComputeContext): void => {
-  context.compute(createBinaryOpProgramInfoLoader(context.inputs, 'Div', (a, b) => `${a}/${b}`));
+  runBinaryOp(context, 'Div', (a, b) => `${a}/${b}`);
 };
 
 export const equal = (context: ComputeContext): void => {
-  context.compute(createBinaryOpProgramInfoLoader(
-      context.inputs, 'Equal', ({scalar: (a, b) => `u32(${a}==${b})`, vector: (a, b) => `vec4<u32>(${a}==${b})`}),
-      undefined, undefined, DataType.bool));
+  runBinaryOp(
+      context, 'Equal', ({scalar: (a, b) => `u32(${a}==${b})`, vector: (a, b) => `vec4<u32>(${a}==${b})`}), undefined,
+      undefined, DataType.bool);
 };
 
 export const mul = (context: ComputeContext): void => {
-  context.compute(createBinaryOpProgramInfoLoader(context.inputs, 'Mul', (a, b) => `${a}*${b}`));
+  runBinaryOp(context, 'Mul', (a, b) => `${a}*${b}`);
 };
 
 export const pow = (context: ComputeContext): void => {
   const type = inputVariable('input', context.inputs[0].dataType, context.inputs[0].dims).type.value;
   const roundStr = type === 'i32' ? 'round' : '';
-  context.compute(createBinaryOpProgramInfoLoader(
-      context.inputs, 'Pow',
-      ({scalar: (a, b) => `pow_custom(${a},${b})`, vector: (a, b) => `pow_vector_custom(${a},${b})`}),
+  runBinaryOp(
+      context, 'Pow', ({scalar: (a, b) => `pow_custom(${a},${b})`, vector: (a, b) => `pow_vector_custom(${a},${b})`}),
       `
     fn pow_custom(a : ${type}, b : ${type}) -> ${type} {
       if (b == ${type}(0.0)) {
@@ -233,34 +242,33 @@ export const pow = (context: ComputeContext): void => {
       // TODO: implement vectorized pow
       return vec4<${type}>(pow_custom(a.x, b.x), pow_custom(a.y, b.y), pow_custom(a.z, b.z), pow_custom(a.w, b.w));
     }
-      `));
+      `);
 };
 
 export const sub = (context: ComputeContext): void => {
-  context.compute(createBinaryOpProgramInfoLoader(context.inputs, 'Sub', (a, b) => `${a}-${b}`));
+  runBinaryOp(context, 'Sub', (a, b) => `${a}-${b}`);
 };
 
 export const greater = (context: ComputeContext): void => {
-  context.compute(createBinaryOpProgramInfoLoader(
-      context.inputs, 'Greater', ({scalar: (a, b) => `u32(${a}>${b})`, vector: (a, b) => `vec4<u32>(${a}>${b})`}),
-      undefined, undefined, DataType.bool));
+  runBinaryOp(
+      context, 'Greater', ({scalar: (a, b) => `u32(${a}>${b})`, vector: (a, b) => `vec4<u32>(${a}>${b})`}), undefined,
+      undefined, DataType.bool);
 };
 
 export const less = (context: ComputeContext): void => {
-  context.compute(createBinaryOpProgramInfoLoader(
-      context.inputs, 'Less', ({scalar: (a, b) => `u32(${a}<${b})`, vector: (a, b) => `vec4<u32>(${a}<${b})`}),
-      undefined, undefined, DataType.bool));
+  runBinaryOp(
+      context, 'Less', ({scalar: (a, b) => `u32(${a}<${b})`, vector: (a, b) => `vec4<u32>(${a}<${b})`}), undefined,
+      undefined, DataType.bool);
 };
 
 export const greaterOrEqual = (context: ComputeContext): void => {
-  context.compute(createBinaryOpProgramInfoLoader(
-      context.inputs, 'GreaterOrEqual',
-      ({scalar: (a, b) => `u32(${a}>=${b})`, vector: (a, b) => `vec4<u32>(${a}>=${b})`}), undefined, undefined,
-      DataType.bool));
+  runBinaryOp(
+      context, 'GreaterOrEqual', ({scalar: (a, b) => `u32(${a}>=${b})`, vector: (a, b) => `vec4<u32>(${a}>=${b})`}),
+      undefined, undefined, DataType.bool);
 };
 
 export const lessOrEqual = (context: ComputeContext): void => {
-  context.compute(createBinaryOpProgramInfoLoader(
-      context.inputs, 'LessOrEqual', ({scalar: (a, b) => `u32(${a}<=${b})`, vector: (a, b) => `vec4<u32>(${a}<=${b})`}),
-      undefined, undefined, DataType.bool));
+  runBinaryOp(
+      context, 'LessOrEqual', ({scalar: (a, b) => `u32(${a}<=${b})`, vector: (a, b) => `vec4<u32>(${a}<=${b})`}),
+      undefined, undefined, DataType.bool);
 };
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/common.ts b/js/web/lib/wasm/jsep/webgpu/ops/common.ts
index 0ab777bfbdee9..b7a391ee667bb 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/common.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/common.ts
@@ -3,6 +3,7 @@
 
 import {DataType} from '../../../wasm-common';
 import {ShapeUtil} from '../../util';
+import {ProgramUniform} from '../types';
 
 /**
  * constant value for a workgroup size.
@@ -57,10 +58,11 @@ interface IndicesHelperTypes {
  * create an instance of an indices helper:
  * - `inputVariable()`: create an indices helper instance for an input.
  * - `outputVariable()`: create an indices helper instance for an output.
+ * - `internalVariable()`: create an indices helper instance for an internal variable.
  *
  * An indices helper instance contains helper functions for the following operations:
  * - access readonly basic information, including: `name`(the name of the input or output), `usage`(whether it's an
- * input or an output) and `shape`(the passed in shape).
+ * input, an output or an internal variable) and `shape`(the passed in shape).
  * - `type`: access readonly type information, including: `indices`(the type of indices), `value`(the type of value at
  * runtime), `storage`(the type of value at storage) and `tensor`(the tensor type as represented in TensorView).
  * - generate WGSL code for getting indices from offset. Use `offsetToIndices()` for WGSL code snippet to calculate
@@ -102,6 +104,16 @@ export interface IndicesHelper {
    */
   readonly indicesToOffset: (varIndices: string) => string;
 
+  /**
+   * WGSL code of an `u32` expression for getting original offset from broadcasted indices.
+   *
+   * @param varIndices - a `type.indices` expression representing the output indices.
+   * @param output - output IndicesHelper.
+   *
+   * @returns an `u32` expression
+   */
+  readonly broadcastedIndicesToOffset: (varIndices: string, output: IndicesHelper) => string;
+
   /**
    * WGSL code of generating an indices literal
    *
@@ -181,14 +193,24 @@ export interface IndicesHelper {
   readonly name: string;
 
   /**
-   * whether the helper is for an input or an output.
+   * whether the helper is for an input, an output or an internal variable.
+   */
+  readonly usage: 'input'|'output'|'internal';
+
+  /**
+   * the rank of the input or output.
+   */
+  readonly rank: number;
+
+  /**
+   * a string representing the variable name for the shape of the input or output.
    */
-  readonly usage: 'input'|'output';
+  readonly shape: string;
 
   /**
-   * the shape of the input or output.
+   * a string representing the variable name for the strides of the input or output.
    */
-  readonly shape: readonly number[];
+  readonly strides: string;
 }
 
 const getWgslMappedType = (type: number, components: 1|2|3|4): string|[string, string] => {
@@ -237,20 +259,88 @@ export const tensorTypeToWsglValueType = (type: DataType, components: 1|2|3|4 =
   return typeof mappedType === 'string' ? mappedType : mappedType[1];
 };
 
+export const createTensorShapeVariables = (dims: readonly number[]): ProgramUniform[] =>
+    dims.length === 0 ? [] : [{type: 'uint32', data: dims}, {type: 'uint32', data: ShapeUtil.computeStrides(dims)}];
+
+/**
+ * A helper function to get maximum vector size for specified data length
+ * @param size
+ */
+export const getMaxComponents = (size: number) => {
+  // we cannot use vec3 type since it has alignment of 16 bytes
+  if (size % 4 === 0) {
+    return 4;
+  } else if (size % 2 === 0) {
+    return 2;
+  }
+
+  return 1;
+};
+
+/**
+ * A helper function that initializes variable as a scalar or vector. e.g. f32(0) or vec4f(0,0,0,0)
+ * @param dataType
+ * @param components
+ * @param value
+ */
+export const fillVector = (dataType = 'f32', components?: number, value = '0') => {
+  if (!components || components === 1) {
+    return `${dataType}(${value})`;
+  }
+
+  return `vec${components}<${dataType}>(${value})`;
+};
+
+/**
+ * A helper function that casts value or vector to f32
+ * @param dataType
+ * @param components
+ * @param value
+ */
+export const castToF32 = (dataType: string, components: number, value: string) => {
+  if (dataType === 'f32') {
+    return value;
+  }
+  if (components === 1) {
+    return `f32(${value})`;
+  }
+
+  return `vec${components}f(${value})`;
+};
+
+/**
+ * A helper function that returns scalar or sums all components of a vector
+ * @param name
+ * @param components
+ */
+export const sumVector = (name: string, components: number) => {
+  if (components === 4) {
+    return `(${name}.x + ${name}.y + ${name}.z + ${name}.w)`;
+  } else if (components === 2) {
+    return `(${name}.x + ${name}.y)`;
+  } else if (components === 3) {
+    return `(${name}.x + ${name}.y + ${name}.z)`;
+  }
+
+  return name;
+};
+
 /**
  * A helper function to get a IndicesHelper for a given input or output.
  *
  * @param name - the name of the input or output.
  * @param tensorType - the tensor type of the input or output.
- * @param shape - the tensor shape of the input or output.
- * @param isInput - whether the helper is for an input or an output.
+ * @param shapeOrRank - the tensor shape or the rank of the input or output.
+ * @param usage - the usage of the indices helper.
  * @param components - indicates the number of components of each element. 1 for scalar, 2 for vec2, 3 for vec3, 4 for
  *    vec4.
  */
 const createIndicesHelper =
-    (name: string, tensorType: number, shape: readonly number[], isInput: boolean,
+    (name: string, tensorType: number, shapeOrRank: number|readonly number[], usage: IndicesHelper['usage'],
      components: 1|2|3|4): IndicesHelper => {
-      const rank = shape.length;
+      const useUniform = typeof shapeOrRank === 'number';
+      const rank = useUniform ? shapeOrRank : shapeOrRank.length;
+      const rankIdentity = [...new Array(rank).keys()];
       const indicesType = rank < 2 ? 'u32' : rank <= 4 ? `vec${rank}<u32>` : `array<u32, ${rank}>`;
       const mappedType = getWgslMappedType(tensorType, components);
       const valueType = typeof mappedType === 'string' ? mappedType : mappedType[1];
@@ -262,18 +352,21 @@ const createIndicesHelper =
       const implementationUsed = {
         offsetToIndices: false,
         indicesToOffset: false,
+        broadcastedIndicesToOffset: false,
         set: false,
         setByIndices: false,
         get: false,
         getByIndices: false,
       };
 
-      const strides = ShapeUtil.computeStrides(shape);
+      const uniformPrefix = useUniform ? 'uniforms.' : '';
+      const shape = `${uniformPrefix}${name}_shape`;
+      const strides = `${uniformPrefix}${name}_strides`;
       let o2iSnippet = '';
       for (let i = 0; i < rank - 1; i++) {
         o2iSnippet += `
-    let dim${i} = current / ${strides[i]}u;
-    let rest${i} = current % ${strides[i]}u;
+    let dim${i} = current / ${strides}[${i}];
+    let rest${i} = current % ${strides}[${i}];
     indices[${i}] = dim${i};
     current = rest${i};
     `;
@@ -296,7 +389,7 @@ const createIndicesHelper =
       const offsets: string[] = [];
       if (rank >= 2) {
         for (let i = rank - 1; i >= 0; i--) {
-          offsets.push(`${strides[i]}u * (indices[${i}])`);
+          offsets.push(`${strides}[${i}] * (indices[${i}])`);
         }
       }
 
@@ -329,6 +422,26 @@ const createIndicesHelper =
         }
       };
 
+      const broadcastedIndicesToOffsetImplementation: {[key: string]: string} = {};
+      const broadcastedIndicesToOffset = (varIndices: string, output: IndicesHelper) => {
+        implementationUsed.broadcastedIndicesToOffset = true;
+        const implKey = `${output.name}broadcastedIndicesTo${name}Offset`;
+        if (implKey in broadcastedIndicesToOffsetImplementation) {
+          return `${implKey}(${varIndices})`;
+        }
+        const offsets = [];
+        for (let i = rank - 1; i >= 0; i--) {
+          const idx = output.indicesGet('outputIndices', i + output.rank - rank);
+          offsets.push(`${indicesGet(strides, i)} * (${idx} % ${indicesGet(shape, i)})`);
+        }
+        broadcastedIndicesToOffsetImplementation[implKey] =
+            `fn ${implKey}(outputIndices: ${output.type.indices}) -> u32 {
+             return ${offsets.length > 0 ? offsets.join('+') : '0u'};
+           }`;
+
+        return `${implKey}(${varIndices})`;
+      };
+
       const setByOffset = (offset: number|string, value: string) => (() => {
         if (type.storage === type.value) {
           return `${name}[${offset}]=${value};`;
@@ -370,11 +483,11 @@ const createIndicesHelper =
   }`;
 
       const getImplementation = rank < 2 ? '' : (() => {
-        const params = shape.map((_, i) => `d${i}: u32`).join(', ');
-        const dims = shape.map((_, i) => `d${i}`).join(', ');
+        const functionParams = rankIdentity.map(i => `d${i}: u32`).join(', ');
+        const dimsParams = rankIdentity.map(i => `d${i}`).join(', ');
         return `
-  fn get_${name}(${params}) -> ${valueType} {
-    return get_${name}ByIndices(${indices(dims)});
+  fn get_${name}(${functionParams}) -> ${valueType} {
+    return get_${name}ByIndices(${indices(dimsParams)});
   }`;
       })();
 
@@ -413,11 +526,11 @@ const createIndicesHelper =
   }`;
 
       const setImplementation = rank < 2 ? '' : (() => {
-        const params = shape.map((_, i) => `d${i}: u32`).join(', ');
-        const dims = shape.map((_, i) => `d${i}`).join(', ');
+        const functionParams = rankIdentity.map(i => `d${i}: u32`).join(', ');
+        const dimsParams = rankIdentity.map(i => `d${i}`).join(', ');
         return `
-  fn set_${name}(${params}, value: ${valueType}) {
-    set_${name}ByIndices(${indices(dims)}, value);
+  fn set_${name}(${functionParams}, value: ${valueType}) {
+    set_${name}ByIndices(${indices(dimsParams)}, value);
   }`;
       })();
 
@@ -456,12 +569,19 @@ const createIndicesHelper =
 
       const impl = () => {
         const impls = [];
+        if (!useUniform) {
+          impls.push(`const ${shape} = ${type.indices}(${shapeOrRank.join(',')});`);
+          impls.push(`const ${strides} = ${type.indices}(${ShapeUtil.computeStrides(shapeOrRank).join(',')});`);
+        }
         if (implementationUsed.offsetToIndices) {
           impls.push(offsetToIndicesImplementation);
         }
         if (implementationUsed.indicesToOffset) {
           impls.push(indicesToOffsetImplementation);
         }
+        if (implementationUsed.broadcastedIndicesToOffset) {
+          Object.values(broadcastedIndicesToOffsetImplementation).forEach(impl => impls.push(impl));
+        }
         if (implementationUsed.set) {
           impls.push(setImplementation);
         }
@@ -482,6 +602,7 @@ const createIndicesHelper =
         type,
         offsetToIndices,
         indicesToOffset,
+        broadcastedIndicesToOffset,
         indices,
         indicesGet,
         indicesSet,
@@ -492,9 +613,11 @@ const createIndicesHelper =
         getByOffset,
         getByIndices,
         // isVec4,
-        usage: isInput ? 'input' : 'output',
+        usage,
         name,
-        shape
+        strides,
+        shape,
+        rank
       };
     };
 
@@ -503,26 +626,41 @@ const createIndicesHelper =
  *
  * @param name - the name of the input.
  * @param type - the tensor type of the input.
- * @param shape - the tensor shape of the input.
+ * @param shapeOrRank - the tensor shape or the rank of the input.
  * @param components - the number of components of the input. available values are 1, 2, 3, 4. default is 1.
  * @returns an IndicesHelper for the input.
  */
 export const inputVariable =
-    (name: string, type: number, shape: readonly number[], components: 1|2|3|4 = 1): IndicesHelper =>
-        createIndicesHelper(name, type, shape, true, components);
+    (name: string, type: number, shapeOrRank: number|readonly number[], components: 1|2|3|4 = 1): IndicesHelper =>
+        createIndicesHelper(name, type, shapeOrRank, 'input', components);
 
 /**
  * Create a IndicesHelper for an output.
  *
  * @param name - the name of the output.
  * @param type - the tensor type of the output.
- * @param shape - the tensor shape of the output.
- * @param components - the number of components of the input. available values are 1, 2, 3, 4. default is 1.
+ * @param shapeOrRank - the tensor shape or the rank of the output.
+ * @param components - the number of components of the output. available values are 1, 2, 3, 4. default is 1.
  * @returns an IndicesHelper for the output.
  */
 export const outputVariable =
-    (name: string, type: number, shape: readonly number[], components: 1|2|3|4 = 1): IndicesHelper =>
-        createIndicesHelper(name, type, shape, false, components);
+    (name: string, type: number, shapeOrRank: number|readonly number[], components: 1|2|3|4 = 1): IndicesHelper =>
+        createIndicesHelper(name, type, shapeOrRank, 'output', components);
+
+/**
+ * Create a IndicesHelper for an internal variable.
+ *
+ * @param name - the name of the variable.
+ * @param type - the tensor type of the variable.
+ * @param shapeOrRank - the tensor shape or the rank of the variable.
+ * @param components - the number of components of the variable. available values are 1, 2, 3, 4. default is 1.
+ * @returns an IndicesHelper for the variable.
+ */
+export const internalVariable =
+    (name: string, type: number, shapeOrRank: number|readonly number[], components: 1|2|3|4 = 1): IndicesHelper =>
+        createIndicesHelper(name, type, shapeOrRank, 'internal', components);
+
+export type UniformsArrayType = Array<{name: string; type: string}>;
 
 /**
  * A ShaderHelper is a helper class for generating WGSL code.
@@ -572,9 +710,28 @@ export interface ShaderHelper {
   declareVariables(...variables: IndicesHelper[]): string;
 
   /**
-   * Get additional implementation that needs to be added to the shader source.
+   * A helper function to register one uniform. Can be called multiple times to register multiple uniforms.
+   *
+   * @param name - the name of the uniform.
+   * @param type - the type of the uniform.
+   */
+  registerUniform(name: string, type: string): ShaderHelper;
+
+  /**
+   * A helper function to register multiple uniforms. Can be called multiple times to register multiple uniforms.
+   *
+   * @param uniforms - an array of uniforms. Each element of the array is an object with 2 properties: `name` and
+   *     `type`.
    */
-  readonly additionalImplementations: string;
+  registerUniforms(uniforms: UniformsArrayType): ShaderHelper;
+
+  /**
+   * A helper function to register multiple internal variables. Can be called multiple times to register multiple
+   * internal variables.
+   *
+   * @param variables - an array of IndicesHelper for the variables.
+   */
+  registerInternalVariables(...variables: IndicesHelper[]): ShaderHelper;
 }
 
 class ShaderHelperImpl implements ShaderHelper {
@@ -595,11 +752,12 @@ class ShaderHelperImpl implements ShaderHelper {
     const paramList = is1DimensionDispatch ? `@builtin(global_invocation_id) global_id : vec3<u32>,
     @builtin(local_invocation_id) local_id : vec3<u32>` :
                                              `@builtin(local_invocation_index) local_index : u32,
-    @builtin(workgroup_id) workgroup_id : vec3<u32>`;
+    @builtin(workgroup_id) workgroup_id : vec3<u32>,
+    @builtin(num_workgroups) num_workgroups : vec3<u32>`;
     const globalIdxDefinition = is1DimensionDispatch ?
         'let global_idx = global_id.x;' :
-        `let global_idx = (workgroup_id.z * ${this.normalizedDispatchGroup[0] * this.normalizedDispatchGroup[1]}u +
-          workgroup_id.y * ${this.normalizedDispatchGroup[0]}u + workgroup_id.x) * ${
+        `let global_idx = (workgroup_id.z * num_workgroups[0] * num_workgroups[1] +
+          workgroup_id.y * num_workgroups[0] + workgroup_id.x) * ${
             workgroupSizeX * workgroupSizeY * workgroupSizeZ}u + local_index;`;
 
     return `@compute @workgroup_size(${workgroupSizeX}, ${workgroupSizeY}, ${workgroupSizeZ})
@@ -608,27 +766,87 @@ class ShaderHelperImpl implements ShaderHelper {
   `;
   }
 
-  declareVariable(variable: IndicesHelper, bindingIndex: number): string {
-    this.indicesHelpers.push(variable);
+  private appendVariableUniforms(variable: IndicesHelper): void {
+    if (variable.rank !== 0) {
+      if (variable.shape.startsWith('uniforms.')) {
+        this.uniforms.push({name: variable.shape.replace('uniforms.', ''), type: variable.type.indices});
+      }
+      if (variable.strides.startsWith('uniforms.')) {
+        this.uniforms.push({name: variable.strides.replace('uniforms.', ''), type: variable.type.indices});
+      }
+    }
+  }
+
+  private declareVariable(variable: IndicesHelper, bindingIndex: number): string {
+    if (variable.usage === 'internal') {
+      throw new Error('cannot use internal variable with declareVariable(). use registerInternalVariables() instead.');
+    }
+    this.variables.push(variable);
+    this.appendVariableUniforms(variable);
+
     const access = variable.usage === 'input' ? 'read' : 'read_write';
     const storageType = variable.type.storage;
     return `@group(0) @binding(${bindingIndex}) var<storage, ${access}> ${variable.name}: array<${storageType}>;`;
   }
 
   declareVariables(...variables: IndicesHelper[]): string {
-    let i = 0;
-    return variables.filter(v => ShapeUtil.size(v.shape) > 0).map(v => this.declareVariable(v, i++)).join('\n');
+    return variables.map(v => this.declareVariable(v, this.variableIndex++)).join('\n');
+  }
+
+  private registerInternalVariable(variable: IndicesHelper): void {
+    if (variable.usage !== 'internal') {
+      throw new Error(
+          'cannot use input or output variable with registerInternalVariable(). use declareVariables() instead.');
+    }
+
+    this.internalVariables.push(variable);
+    this.appendVariableUniforms(variable);
+  }
+
+  registerInternalVariables(...variables: IndicesHelper[]): ShaderHelper {
+    variables.forEach(v => this.registerInternalVariable(v));
+    return this;
+  }
+
+  registerUniform(name: string, type: string): ShaderHelper {
+    this.uniforms.push({name, type});
+    return this;
   }
 
-  private indicesHelpers: IndicesHelper[] = [];
+  registerUniforms(additionalUniforms: UniformsArrayType): ShaderHelper {
+    this.uniforms = this.uniforms.concat(additionalUniforms);
+    return this;
+  }
+
+  private internalVariables: IndicesHelper[] = [];
+  private variables: IndicesHelper[] = [];
+  private uniforms: UniformsArrayType = [];
+  private uniformDeclaration(): string {
+    if (this.uniforms.length === 0) {
+      return '';
+    }
 
+    const uniformSnippets: string[] = [];
+    for (const {name, type} of this.uniforms) {
+      uniformSnippets.push(`${name}:${type}`);
+    }
+
+    return `
+      struct Uniforms { ${uniformSnippets.join(', ')} };
+      @group(0) @binding(${this.variableIndex}) var<uniform> uniforms: Uniforms;`;
+  }
+  private variableIndex = 0;
+
+  /**
+   * Get additional implementation that needs to be added to the shader source.
+   */
   get additionalImplementations(): string {
-    return this.indicesHelpers.map(i => i.impl()).join('\n');
+    return this.uniformDeclaration() + this.variables.map(i => i.impl()).join('\n') +
+        this.internalVariables.map(i => i.impl()).join('\n');
   }
 }
 
-export const createShaderHelper = (dispatchGroup: [number, number, number]): ShaderHelper =>
-    new ShaderHelperImpl(dispatchGroup);
+export const createShaderHelper = (dispatchGroup: [number, number, number]) => new ShaderHelperImpl(dispatchGroup);
 
 /**
  * This function comes from https://github.com/tensorflow/tfjs/blob/master/tfjs-core/src/ops/broadcast_util.ts#L18-L40
@@ -653,3 +871,6 @@ export const getBroadcastDims = (inShape: readonly number[], outShape: readonly
   }
   return dims;
 };
+
+// TODO: remove this limitation once >4D dims are supported by uniform.
+export const enableShapesUniforms = (rank: number): boolean => rank <= 4;
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/concat.ts b/js/web/lib/wasm/jsep/webgpu/ops/concat.ts
index 279632c190ded..43cc4a4c080bd 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/concat.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/concat.ts
@@ -4,9 +4,9 @@
 import {TensorView} from '../../tensor-view';
 import {ShapeUtil} from '../../util';
 import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key';
-import {ComputeContext, GpuDataType, ProgramInfo, ProgramInfoLoader, ProgramMetadata} from '../types';
+import {ComputeContext, ProgramInfo, ProgramInputTensorInfoDependency, ProgramUniform} from '../types';
 
-import {IndicesHelper, inputVariable, outputVariable, ShaderHelper} from './common';
+import {createTensorShapeVariables, enableShapesUniforms, IndicesHelper, inputVariable, outputVariable, ShaderHelper} from './common';
 
 export interface ConcatAttributes extends AttributeWithCacheKey {
   readonly axis: number;
@@ -33,12 +33,10 @@ const validateInputs = (inputs: readonly TensorView[]): void => {
   }
 };
 
-const createConcatProgramMetadata = (inputCount: number, cacheHint: string) =>
-    ({name: 'Concat', inputTypes: Array(inputCount).fill(GpuDataType.default), cacheHint});
-
-const calculateInputIndexImpl = (numberOfTensors: number): string => `
+const calculateInputIndexImpl = (numberOfTensors: number, sizeInConcatAxisStr: string): string => `
   fn calculateInputIndex(index: u32) -> u32 {
-    for (var i: u32 = 0u; i < ${numberOfTensors}u; i += 1u ) {
+    let sizeInConcatAxis = array<u32, ${numberOfTensors}u>(${sizeInConcatAxisStr});
+    for (var i: u32 = 0u; i < ${numberOfTensors}; i += 1u ) {
       if (index < sizeInConcatAxis[i]) {
         return i;
       }
@@ -65,82 +63,107 @@ const assignOutputData = (inputs: readonly IndicesHelper[], output: IndicesHelpe
   return codeLines.join('\n');
 };
 
-const createConcatProgramInfo =
-    (metadata: ProgramMetadata, inputs: readonly TensorView[], axis: number): ProgramInfo => {
-      const inputShape = inputs[0].dims.slice();
-      if (axis >= inputShape.length || axis < (-1 * inputShape.length)) {
-        throw new Error('axis specified for concat doesn\'t match input dimensionality');
+const createConcatProgramInfo = (inputs: readonly TensorView[], axis: number): ProgramInfo => {
+  const inputShape = inputs[0].dims.slice();
+  if (axis >= inputShape.length || axis < (-1 * inputShape.length)) {
+    throw new Error('axis specified for concat doesn\'t match input dimensionality');
+  }
+  const adjustedAxis = (axis < 0) ? inputShape.length + axis : axis;
+  // ensure all of the non-concatenated axes match each other
+  // calculate the shape of the output tensor while we do that
+  const outputShape = inputShape.slice(0);
+  for (let i = 1; i < inputs.length; i++) {
+    const dataNShape = inputs[i].dims.slice();
+    for (let axisIndex = 0; axisIndex < inputShape.length; axisIndex++) {
+      // add to the placeholder for computing output shape
+      if (axisIndex === adjustedAxis) {
+        outputShape[adjustedAxis] += dataNShape[axisIndex];
       }
-      const adjustedAxis = (axis < 0) ? inputShape.length + axis : axis;
-      // ensure all of the non-concatenated axes match each other
-      // calculate the shape of the output tensor while we do that
-      const outputShape = inputShape.slice(0);
-      for (let i = 1; i < inputs.length; i++) {
-        const dataNShape = inputs[i].dims.slice();
-        for (let axisIndex = 0; axisIndex < inputShape.length; axisIndex++) {
-          // add to the placeholder for computing output shape
-          if (axisIndex === adjustedAxis) {
-            outputShape[adjustedAxis] += dataNShape[axisIndex];
-          }
-          // ensure all non-cancatenated axes match each other
-          else if (inputShape[axisIndex] !== dataNShape[axisIndex]) {
-            throw new Error('non concat dimensions must match');
-          }
-        }
+      // ensure all non-cancatenated axes match each other
+      else if (inputShape[axisIndex] !== dataNShape[axisIndex]) {
+        throw new Error('non concat dimensions must match');
       }
+    }
+  }
 
-      const outputSize = ShapeUtil.size(outputShape);
-
-      const sizeInConcatAxis = new Array<number>(inputs.length);
-      const inputVars = new Array<IndicesHelper>(inputs.length);
-      const dataType = inputs[0].dataType;
+  const outputSize = ShapeUtil.size(outputShape);
+
+  const sizeInConcatAxis = new Array<number>(inputs.length);
+  const inputVars = new Array<IndicesHelper>(inputs.length);
+  const dataType = inputs[0].dataType;
+
+  let previousSum = 0;
+  const inputDependencies: ProgramInputTensorInfoDependency[] = [];
+  const inputShapeOrRanks = [];
+  const enableInputShapesUniforms = [];
+  const programUniforms: ProgramUniform[] = [{type: 'uint32', data: outputSize}];
+  for (let i = 0; i < inputs.length; ++i) {
+    previousSum += inputs[i].dims[adjustedAxis];
+    sizeInConcatAxis[i] = previousSum;
+    enableInputShapesUniforms.push(enableShapesUniforms(inputs[i].dims.length));
+    inputShapeOrRanks.push(enableInputShapesUniforms[i] ? inputs[i].dims.length : inputs[i].dims);
+    inputVars[i] = inputVariable(`input${i}`, dataType, inputShapeOrRanks[i]);
+    inputDependencies.push(enableInputShapesUniforms[i] ? 'rank' : 'dims');
+    programUniforms.push({type: 'uint32', data: sizeInConcatAxis[i]});
+  }
+  for (let i = 0; i < inputs.length; ++i) {
+    if (enableInputShapesUniforms[i]) {
+      programUniforms.push(...createTensorShapeVariables(inputs[i].dims));
+    }
+  }
 
-      let previousSum = 0;
-      for (let i = 0; i < inputs.length; ++i) {
-        previousSum += inputs[i].dims[adjustedAxis];
-        sizeInConcatAxis[i] = previousSum;
+  const enableOutputShapesUniforms = enableShapesUniforms(outputShape.length);
+  if (enableOutputShapesUniforms) {
+    programUniforms.push(...createTensorShapeVariables(outputShape));
+  }
 
-        inputVars[i] = inputVariable(`input${i}`, dataType, inputs[i].dims);
-      }
+  const outputShapeOrRank = enableOutputShapesUniforms ? outputShape.length : outputShape;
+  const output = outputVariable('output', dataType, outputShapeOrRank);
 
-      const output = outputVariable('output', dataType, outputShape);
+  const indicesAxis = output.indicesGet('indices', adjustedAxis);
+  const sizeInConcatAxisStr =
+      Array.from(Array(sizeInConcatAxis.length).keys()).map(i => `uniforms.sizeInConcatAxis${i}`).join(',');
+  const getShaderSource = (shaderHelper: ShaderHelper) => `
 
-      const indicesAxis = output.indicesGet('indices', adjustedAxis);
-      const getShaderSource = (shaderHelper: ShaderHelper) => `
-  ${shaderHelper.declareVariables(...inputVars, output)}
+  ${(() => {
+    shaderHelper.registerUniform('outputSize', 'u32');
+    for (let i = 0; i < inputs.length; i++) {
+      shaderHelper.registerUniform(`sizeInConcatAxis${i}`, 'u32');
+    }
+    return shaderHelper.declareVariables(...inputVars, output);
+  })()}
 
-  const sizeInConcatAxis = array<u32, ${sizeInConcatAxis.length}>(${sizeInConcatAxis.map(i => `${i}u`).join(',')});
-  ${calculateInputIndexImpl(sizeInConcatAxis.length)}
+  ${calculateInputIndexImpl(sizeInConcatAxis.length, sizeInConcatAxisStr)}
 
   ${shaderHelper.mainStart()}
-    ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes(outputSize)}
+    ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes('uniforms.outputSize')}
 
     var indices = ${output.offsetToIndices('global_idx')};
 
     let inputIndex = calculateInputIndex(${indicesAxis});
     if (inputIndex != 0u) {
+      let sizeInConcatAxis = array<u32, ${sizeInConcatAxis.length}u>(${sizeInConcatAxisStr});
       ${indicesAxis} -= sizeInConcatAxis[inputIndex - 1u];
     }
 
     ${assignOutputData(inputVars, output)}
   }`;
-      return {
-        ...metadata,
-        outputs: [{dims: outputShape, dataType: inputs[0].dataType, gpuDataType: GpuDataType.default}],
-        getShaderSource,
-        dispatchGroup: () => ({x: Math.ceil(outputSize / 64 /* workgroup size */)})
-      };
-    };
-
-const createConcatProgramInfoLoader =
-    (inputs: readonly TensorView[], attributes: ConcatAttributes): ProgramInfoLoader => {
-      const metadata = createConcatProgramMetadata(inputs.length, attributes.cacheKey);
-      return {...metadata, get: () => createConcatProgramInfo(metadata, inputs, attributes.axis)};
-    };
+
+  return {
+    name: 'Concat',
+    shaderCache: {hint: `${axis}`, inputDependencies},
+    getRunData: () => ({
+      outputs: [{dims: outputShape, dataType: inputs[0].dataType}],
+      dispatchGroup: {x: Math.ceil(outputSize / 64 /* workgroup size */)},
+      programUniforms,
+    }),
+    getShaderSource,
+  };
+};
 
 export const concat = (context: ComputeContext, attributes: ConcatAttributes): void => {
   validateInputs(context.inputs);
-  context.compute(createConcatProgramInfoLoader(context.inputs, attributes));
+  context.compute(createConcatProgramInfo(context.inputs, attributes.axis));
 };
 
 export const parseConcatAttributes = (attributes: Record<string, unknown>): ConcatAttributes =>
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/conv-grouped.ts b/js/web/lib/wasm/jsep/webgpu/ops/conv-grouped.ts
index 1b7b7e0b29a25..14482272bad38 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/conv-grouped.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/conv-grouped.ts
@@ -3,21 +3,18 @@
 
 import {TensorView} from '../../tensor-view';
 import {ShapeUtil} from '../../util';
-import {GpuDataType, ProgramInfo, ProgramInfoLoader, ProgramMetadata} from '../types';
+import {ProgramInfo} from '../types';
 
 import {inputVariable, outputVariable, ShaderHelper} from './common';
 import {calculateOutputShape, ConvAttributes} from './conv';
-import {getActicationSnippet} from './fuse-utils';
+import {getActivationSnippet} from './fuse-utils';
 
-const createGroupedConvProgramMetadata = (hasBias: boolean, cacheHint: string): ProgramMetadata => ({
-  name: 'GroupedConv',
-  inputTypes: hasBias ? [GpuDataType.default, GpuDataType.default, GpuDataType.default] :
-                        [GpuDataType.default, GpuDataType.default],
-  cacheHint
-});
-
-const createGroupedConvProgramInfo =
-    (inputs: readonly TensorView[], metadata: ProgramMetadata, attributes: ConvAttributes,
+/**
+ * naive grouped conv implementation, supports 1d/2d conv
+ * @param squeezeOutputShapeFunction - an optional function to squeeze the output shape, only used in conv1d
+ */
+export const createGroupedConvProgramInfo =
+    (inputs: readonly TensorView[], attributes: ConvAttributes,
      squeezeOutputShapeFunction?: (shape: readonly number[]) => number[]): ProgramInfo => {
       const hasBias = inputs.length > 2;
       const processBias = hasBias ? 'value += b[output_channel];' : '';
@@ -25,14 +22,13 @@ const createGroupedConvProgramInfo =
       const wShape = inputs[1].dims;
       const outputChannelsPerGroup = wShape[0] / attributes.group;
 
-      const {activationFunction, applyActivation} = getActicationSnippet(attributes);
-
       const isChannelLast = attributes.format === 'NHWC';
       const outputShape = calculateOutputShape(
           xShape, wShape, attributes.dilations, attributes.pads, attributes.strides, isChannelLast);
       const outputSize = ShapeUtil.size(outputShape);
 
       const output = outputVariable('output', inputs[0].dataType, outputShape);
+      const {activationFunction, applyActivation} = getActivationSnippet(attributes, output.type.value);
       const x = inputVariable('x', inputs[0].dataType, xShape);
       const w = inputVariable('w', inputs[1].dataType, wShape);
       const inputVars = [x, w];
@@ -87,27 +83,15 @@ const createGroupedConvProgramInfo =
     ${output.setByOffset('global_idx', 'value')}
   }`;
       return {
-        ...metadata,
-        outputs: [{
-          dims: squeezeOutputShapeFunction ? squeezeOutputShapeFunction(outputShape) : outputShape,
-          dataType: inputs[0].dataType,
-          gpuDataType: GpuDataType.default
-        }],
+        name: 'GroupedConv',
+        shaderCache: {hint: attributes.cacheKey},
+        getRunData: () => ({
+          outputs: [{
+            dims: squeezeOutputShapeFunction ? squeezeOutputShapeFunction(outputShape) : outputShape,
+            dataType: inputs[0].dataType
+          }],
+          dispatchGroup: {x: Math.ceil(outputSize / 64 /* workgroup size */)},
+        }),
         getShaderSource,
-        dispatchGroup: () => ({x: Math.ceil(outputSize / 64 /* workgroup size */)})
-      };
-    };
-
-/**
- * naive grouped conv implementation, supports 1d/2d conv
- * @param squeezeOutputShapeFunction - an optional function to squeeze the output shape, only used in conv1d
- */
-export const createGroupedConvProgramInfoLoader =
-    (inputs: readonly TensorView[], attributes: ConvAttributes,
-     squeezeOutputShapeFunction?: (shape: readonly number[]) => number[]): ProgramInfoLoader => {
-      const metadata = createGroupedConvProgramMetadata(inputs.length > 2, attributes.cacheKey);
-      return {
-        ...metadata,
-        get: () => createGroupedConvProgramInfo(inputs, metadata, attributes, squeezeOutputShapeFunction)
       };
     };
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/conv-transpose.ts b/js/web/lib/wasm/jsep/webgpu/ops/conv-transpose.ts
index e7d1ddf771650..e880afe09a5d8 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/conv-transpose.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/conv-transpose.ts
@@ -1,14 +1,15 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-import {DataType} from '../../../wasm-common';
 import {TensorView} from '../../tensor-view';
 import {createAttributeWithCacheKey} from '../attribute-with-cache-key';
-import {ComputeContext, GpuDataType, ProgramInfoLoader, ProgramMetadata} from '../types';
+import {ComputeContext} from '../types';
 
+import {createConv2DTransposeMatMulProgramInfo} from './3rd-party/conv_backprop_mm_webgpu';
 import {createConvTranspose2DProgramInfo} from './3rd-party/conv_backprop_webgpu';
 import {ConvAttributes} from './conv';
 import {parseInternalActivationAttributes} from './fuse-utils';
+import {createTransposeProgramInfo} from './transpose';
 
 const computeTotalPad =
     (inDim: number, stride: number, adj: number, kernel: number, dilation: number, outSize: number) =>
@@ -63,7 +64,7 @@ const getAdjustedConvTransposeAttributes =
     <T extends ConvTransposeAttributes>(attributes: T, inputs: readonly TensorView[]): T => {
       const kernelShape = attributes.kernelShape.slice();
       // if kernelShape is not specified in the attributes of this op, infer it from the weight tensor dims
-      if (attributes.kernelShape.length === 0 || attributes.kernelShape.reduce((a, b) => a * b, 0) === 0) {
+      if (attributes.kernelShape.length === 0 || attributes.kernelShape.reduce((a, b) => a * b, 1) === 0) {
         kernelShape.length = 0;
         for (let i = 2; i < inputs[1].dims.length; ++i) {
           kernelShape.push(inputs[1].dims[i]);
@@ -95,9 +96,11 @@ const getAdjustedConvTransposeAttributes =
 
       // always return a new object so does not modify the original attributes
       const newAttributes: T = Object.assign({}, attributes);
-      Object.assign(
-          newAttributes,
-          {kernelShape, pads, outputPadding, outputShape, dilations, strides, cacheKey: attributes.cacheKey});
+      const cacheKey = attributes.cacheKey + [
+        kernelShape.join('n,'), pads.join(','), strides.join(','), outputPadding.join(','), outputShape.join(','),
+        dilations.join(',')
+      ].join('_');
+      Object.assign(newAttributes, {kernelShape, pads, outputPadding, outputShape, dilations, strides, cacheKey});
       return newAttributes;
     };
 
@@ -197,41 +200,62 @@ const validateInputs = (inputs: readonly TensorView[], attributes: ConvTranspose
   if (attributes.outputShape.length !== 0 && attributes.outputShape.length !== inputs[0].dims.length - 2) {
     throw new Error('invalid output shape');
   }
-
-  // TODO : Need to add support for float64
-  if (inputs[0].dataType !== DataType.float || inputs[1].dataType !== DataType.float) {
-    throw new Error('ConvTranspose input(X,W) should be float tensor');
-  }
-
-  if (inputs.length === 3 && inputs[2].dataType !== DataType.float) {
-    throw new Error('ConvTranspose input(bias) should be float tensor');
-  }
 };
 
-const createConvTranspose2DProgramMetadata = (hasBias: boolean, cacheHint: string): ProgramMetadata => ({
-  name: 'ConvTranspose2D',
-  inputTypes: hasBias ? [GpuDataType.default, GpuDataType.default, GpuDataType.default] :
-                        [GpuDataType.default, GpuDataType.default],
-  cacheHint
-});
-
-const createConvTranspose2DProgramInfoLoader =
-    (inputs: readonly TensorView[], attributes: ConvTransposeAttributes,
-     squeezeOutputShapeFunction?: (shape: readonly number[]) => number[]): ProgramInfoLoader => {
-      const hasBias = inputs.length === 3;
-      const metadata = createConvTranspose2DProgramMetadata(hasBias, attributes.cacheKey);
-      return {
-        ...metadata,
-        get: () => createConvTranspose2DProgramInfo(inputs, metadata, attributes, squeezeOutputShapeFunction)
-      };
-    };
+// for transposing weight tensor from [C, M/group, KH, KW] to [KH, KW, M/group, C]
+const weightTransposePerm = [2, 3, 1, 0];
 
 const convTranspose2d =
     (context: ComputeContext, inputs: readonly TensorView[], attributes: ConvTransposeAttributes): void => {
       const adjustedAttributes = getAdjustedConvTransposeAttributes(attributes, inputs);
+      const isChannelsLast = attributes.format === 'NHWC';
+      const hasBias = inputs.length === 3;
+      if (adjustedAttributes.group !== 1) {
+        context.compute(createConvTranspose2DProgramInfo(inputs, adjustedAttributes));
+        return;
+      }
+      const outputShape = adjustedAttributes.outputShape;
+      const outHeight = outputShape[isChannelsLast ? 1 : 2];
+      const outWidth = outputShape[isChannelsLast ? 2 : 3];
+      const outChannels = outputShape[isChannelsLast ? 3 : 1];
+      const weightHeight = inputs[1].dims[2];
+      const weightWidth = inputs[1].dims[3];
+      const inputChannels = inputs[0].dims[isChannelsLast ? 3 : 1];
 
-      context.compute(createConvTranspose2DProgramInfoLoader(inputs, adjustedAttributes));
+      const dimAOuter = isChannelsLast ? outHeight * outWidth : outChannels;
+      const dimBOuter = isChannelsLast ? outChannels : outHeight * outWidth;
+      const dimInner = weightHeight * weightWidth * inputChannels;
+
+      const sequentialAccessByThreads = /* backend.adapterInfo.isIntel() */ true;
+
+
+      // STEP.1: transpose weight
+      const transposedWeight = (context.kernelCustomData.wT as TensorView | undefined) ??
+          context.compute(
+              createTransposeProgramInfo(inputs[1], weightTransposePerm),
+              {inputs: [1], outputs: [attributes.wIsConst ? -2 : -1]})[0];
+      if (attributes.wIsConst && !context.kernelCustomData.wT) {
+        context.kernelCustomData.wT = transposedWeight;
+      }
+
+      // STEP.2: prepare reshaped inputs
+      const convTransposeInputs = [inputs[0], transposedWeight];
+      if (hasBias) {
+        if (!isChannelsLast && inputs[2].dims.length === 1) {
+          convTransposeInputs.push(inputs[2].reshape([inputs[2].dims[0], 1, 1]));
+        } else {
+          convTransposeInputs.push(inputs[2]);
+        }
+      }
+
+      // STEP.3: compute matmul
+      context.compute(
+          createConv2DTransposeMatMulProgramInfo(
+              convTransposeInputs, adjustedAttributes, outputShape, dimAOuter, dimBOuter, dimInner, hasBias,
+              sequentialAccessByThreads),
+          {inputs: convTransposeInputs});
     };
+
 const convTranspose1d = (context: ComputeContext, attributes: ConvTransposeAttributes): void => {
   // extend the input to 2D by adding H dimension
   const isChannelLast = attributes.format === 'NHWC';
@@ -271,7 +295,7 @@ const convTranspose1d = (context: ComputeContext, attributes: ConvTransposeAttri
   kernelShape = [1].concat(kernelShape);
   const adjustedAttributes =
       getAdjustedConvTransposeAttributes({...attributes, pads, strides, dilations, kernelShape}, inputs);
-  context.compute(createConvTranspose2DProgramInfoLoader(
+  context.compute(createConvTranspose2DProgramInfo(
       inputs, adjustedAttributes,
       outputShape => isChannelLast ? [outputShape[0], outputShape[2], outputShape[3]] :
                                      [outputShape[0], outputShape[1], outputShape[3]]));
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/conv.ts b/js/web/lib/wasm/jsep/webgpu/ops/conv.ts
index 95a64e5787841..c7ea0cffe51c3 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/conv.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/conv.ts
@@ -1,17 +1,16 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-import {DataType} from '../../../wasm-common';
 import {TensorView} from '../../tensor-view';
 import {PoolConvUtil} from '../../util';
 import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key';
 import {ComputeContext} from '../types';
 
-import {createGroupedConvProgramInfoLoader} from './conv-grouped';
-import {createConv2DMatMulProgramInfoLoader} from './conv2d-mm';
+import {createConv2DMatMulProgramInfo} from './3rd-party/conv2d_mm_webgpu';
+import {createMatmulProgramInfo} from './3rd-party/matmul_packed_webgpu';
+import {createGroupedConvProgramInfo} from './conv-grouped';
 import {InternalActivationAttributes, parseInternalActivationAttributes} from './fuse-utils';
-import {createMatmulProgramInfoLoader} from './matmul';
-import {createTransposeProgramInfo, TransposeAttributes, transposeProgramMetadata} from './transpose';
+import {createTransposeProgramInfo} from './transpose';
 
 export const calculateOutputShape =
     (inputShape: readonly number[], kernelShape: readonly number[], dilations: readonly number[],
@@ -42,7 +41,7 @@ export interface ConvAttributes extends InternalActivationAttributes, AttributeW
 }
 
 // for transposing weight tensor from [M, C/group, KH, KW] to [KH, KW, C/group, M]
-const weightTransposeAttribute: TransposeAttributes = createAttributeWithCacheKey({perm: [2, 3, 1, 0]});
+const weightTransposeAttribute = [2, 3, 1, 0];
 
 const validateInputs = (inputs: readonly TensorView[], attributes: ConvAttributes): void => {
   // Refer to the below link for all input checks
@@ -93,15 +92,6 @@ const validateInputs = (inputs: readonly TensorView[], attributes: ConvAttribute
   if (attributes.kernelShape.length !== 0 && attributes.kernelShape.length !== inputs[1].dims.length - 2) {
     throw new Error('invalid kernel shape');
   }
-
-  // TODO : Need to add support for float64
-  if (inputs[0].dataType !== DataType.float || inputs[1].dataType !== DataType.float) {
-    throw new Error('Conv input(X,W) should be float tensor');
-  }
-
-  if (inputs.length === 3 && inputs[2].dataType !== DataType.float) {
-    throw new Error('Conv input(bias) should be float tensor');
-  }
 };
 
 const getAdjustedConvAttributes = <T extends ConvAttributes>(attributes: T, inputs: readonly TensorView[]): T => {
@@ -144,15 +134,14 @@ const conv2d = (context: ComputeContext, inputs: readonly TensorView[], attribut
 
   // check attributes
 
-  const hasBias = inputs.length === 3;
   // const hasPreluActivationWeights = false; /* TODO: add support for prelu activation weights */
-  const isChannelsLast = attributes.format === 'NHWC';
-  if (!isChannelsLast || attributes.group !== 1) {
-    context.compute(createGroupedConvProgramInfoLoader(inputs, adjustedAttributes));
+  if (attributes.group !== 1) {
+    context.compute(createGroupedConvProgramInfo(inputs, adjustedAttributes));
     return;
   }
 
-  // const batchSize = context.inputs[0].dims[0];
+  const isChannelsLast = attributes.format === 'NHWC';
+  const hasBias = inputs.length === 3;
   const inputHeight = inputs[0].dims[isChannelsLast ? 1 : 2];
   const inputWidth = inputs[0].dims[isChannelsLast ? 2 : 3];
   const inputChannels = inputs[0].dims[isChannelsLast ? 3 : 1];
@@ -165,57 +154,61 @@ const conv2d = (context: ComputeContext, inputs: readonly TensorView[], attribut
   const outHeight = outputShape[isChannelsLast ? 1 : 2];
   const outWidth = outputShape[isChannelsLast ? 2 : 3];
   const outChannels = outputShape[isChannelsLast ? 3 : 1];
-  const batch = outputShape[0];
 
-  const sameSize =
-      isChannelsLast && weightHeight === inputHeight && weightWidth === inputWidth && attributes.autoPad === 'VALID';
+  const sameSize = isChannelsLast && weightHeight === inputHeight && weightWidth === inputWidth &&
+      attributes.pads[0] === 0 && attributes.pads[1] === 0;
   if (sameSize ||
       (weightHeight === 1 && weightWidth === 1 && attributes.dilations[0] === 1 && attributes.dilations[1] === 1 &&
        attributes.strides[0] === 1 && attributes.strides[1] === 1 && attributes.pads[0] === 0 &&
        attributes.pads[1] === 0)) {
     // conv2dByMatMul
-    const transposedWeight = (context.kernelCustomData.wT as TensorView | undefined) ??
-        context.compute(
-            {
-              ...transposeProgramMetadata,
-              cacheHint: weightTransposeAttribute.cacheKey,
-              get: () => createTransposeProgramInfo(inputs[1], weightTransposeAttribute.perm)
-            },
-            {inputs: [1], outputs: [attributes.wIsConst ? -2 : -1]})[0];
-    if (attributes.wIsConst && !context.kernelCustomData.wT) {
-      context.kernelCustomData.wT = transposedWeight;
-    }
-
+    const batch = outputShape[0];
+    let xReshaped, wReshaped, matmulOutputShape;
     const matmulInputs = [];
-    matmulInputs.push(inputs[0].reshape([batch, inputHeight * inputWidth, inputChannels]));
-    matmulInputs.push(transposedWeight.reshape([1, inputChannels, outChannels]));
+    if (isChannelsLast) {
+      const transposedWeight = (context.kernelCustomData.wT as TensorView | undefined) ??
+          context.compute(
+              createTransposeProgramInfo(inputs[1], weightTransposeAttribute),
+              {inputs: [1], outputs: [attributes.wIsConst ? -2 : -1]})[0];
+      if (attributes.wIsConst && !context.kernelCustomData.wT) {
+        context.kernelCustomData.wT = transposedWeight;
+      }
+      if (sameSize) {
+        const sharedDim = inputHeight * inputWidth * inputChannels;
+        xReshaped = inputs[0].reshape([1, batch, sharedDim]);
+        wReshaped = transposedWeight.reshape([1, sharedDim, outChannels]);
+        matmulOutputShape = [1, batch, outChannels];
+      } else {
+        xReshaped = inputs[0].reshape([batch, inputHeight * inputWidth, inputChannels]);
+        wReshaped = transposedWeight.reshape([1, inputChannels, outChannels]);
+        matmulOutputShape = [batch, outHeight * outWidth, outChannels];
+      }
+      matmulInputs.push(xReshaped);
+      matmulInputs.push(wReshaped);
+    } else {
+      xReshaped = inputs[0].reshape([batch, inputChannels, inputHeight * inputWidth]);
+      wReshaped = inputs[1].reshape([1, outChannels, inputChannels]);
+      matmulOutputShape = [batch, outChannels, outHeight * outWidth];
+      matmulInputs.push(wReshaped);
+      matmulInputs.push(xReshaped);
+    }
     if (hasBias) {
       matmulInputs.push(inputs[2]);
     }
-    const matmulOutputShape = [batch, outHeight * outWidth, outChannels];
     context.compute(
-        createMatmulProgramInfoLoader(matmulInputs, adjustedAttributes, outputShape, matmulOutputShape),
+        createMatmulProgramInfo(matmulInputs, adjustedAttributes, outputShape, matmulOutputShape, isChannelsLast),
         {inputs: matmulInputs});
-
     return;
   }
 
   // TODO: implement conv2dWithIm2Col()
 
-  const dimAOuter = isChannelsLast ? outHeight * outWidth : outChannels;
-  const dimBOuter = isChannelsLast ? outChannels : outHeight * outWidth;
-  const dimInner = weightHeight * weightWidth * inputChannels;
-
   const sequentialAccessByThreads = /* backend.adapterInfo.isIntel() */ true;
 
   // STEP.1: transpose weight
   const transposedWeight = (context.kernelCustomData.wT as TensorView | undefined) ??
       context.compute(
-          {
-            ...transposeProgramMetadata,
-            cacheHint: weightTransposeAttribute.cacheKey,
-            get: () => createTransposeProgramInfo(inputs[1], weightTransposeAttribute.perm)
-          },
+          createTransposeProgramInfo(inputs[1], weightTransposeAttribute),
           {inputs: [1], outputs: [attributes.wIsConst ? -2 : -1]})[0];
   if (attributes.wIsConst && !context.kernelCustomData.wT) {
     context.kernelCustomData.wT = transposedWeight;
@@ -224,16 +217,15 @@ const conv2d = (context: ComputeContext, inputs: readonly TensorView[], attribut
   // STEP.2: prepare reshaped inputs
   const convInputs = [inputs[0], transposedWeight];
   if (hasBias) {
-    if (!isChannelsLast && inputs[2].dims.length === 1) {
-      convInputs.push(inputs[2].reshape([inputs[2].dims[0], 1, 1]));
-    } else {
-      convInputs.push(inputs[2]);
-    }
+    convInputs.push(inputs[2]);
   }
 
   // STEP.3: compute matmul
+  const dimAOuter = isChannelsLast ? outHeight * outWidth : outChannels;
+  const dimBOuter = isChannelsLast ? outChannels : outHeight * outWidth;
+  const dimInner = weightHeight * weightWidth * inputChannels;
   context.compute(
-      createConv2DMatMulProgramInfoLoader(
+      createConv2DMatMulProgramInfo(
           convInputs, adjustedAttributes, outputShape, dimAOuter, dimBOuter, dimInner, hasBias,
           sequentialAccessByThreads),
       {inputs: convInputs});
@@ -260,7 +252,7 @@ const conv1d = (context: ComputeContext, attributes: ConvAttributes): void => {
   const dilations = [1].concat(attributes.dilations);
   const kernelShape = [1].concat(attributes.kernelShape);
   const adjustedAttributes = getAdjustedConvAttributes({...attributes, pads, strides, dilations, kernelShape}, inputs);
-  context.compute(createGroupedConvProgramInfoLoader(
+  context.compute(createGroupedConvProgramInfo(
       inputs, adjustedAttributes,
       outputShape => isChannelLast ? [outputShape[0], outputShape[2], outputShape[3]] : []));
 };
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/conv2d-mm.ts b/js/web/lib/wasm/jsep/webgpu/ops/conv2d-mm.ts
deleted file mode 100644
index 21c0b97042fbb..0000000000000
--- a/js/web/lib/wasm/jsep/webgpu/ops/conv2d-mm.ts
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-import {TensorView} from '../../tensor-view';
-import {GpuDataType, ProgramInfoLoader, ProgramMetadata} from '../types';
-
-import {createConv2DMatMulProgramInfo} from './3rd-party/conv2d_mm_webgpu';
-import {ConvAttributes} from './conv';
-
-
-const createConv2DMatMulProgramMetadata = (hasBias: boolean, cacheHint: string): ProgramMetadata => ({
-  name: 'Conv2DMatMul',
-  inputTypes: hasBias ? [GpuDataType.default, GpuDataType.default, GpuDataType.default] :
-                        [GpuDataType.default, GpuDataType.default],
-  cacheHint
-});
-
-export const createConv2DMatMulProgramInfoLoader =
-    (inputs: readonly TensorView[], attributes: ConvAttributes, outputShape: readonly number[], dimAOuter: number,
-     dimBOuter: number, dimInner: number, hasBias: boolean, sequentialAccessByThreads: boolean): ProgramInfoLoader => {
-      const metadata = createConv2DMatMulProgramMetadata(hasBias, attributes.cacheKey);
-      return {
-        ...metadata,
-        get: () => createConv2DMatMulProgramInfo(
-            inputs, metadata, attributes, outputShape, dimAOuter, dimBOuter, dimInner, hasBias,
-            sequentialAccessByThreads)
-      };
-    };
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/einsum.ts b/js/web/lib/wasm/jsep/webgpu/ops/einsum.ts
index fc9ebf004ad25..a233d37a79e65 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/einsum.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/einsum.ts
@@ -4,7 +4,7 @@
 import {TensorView} from '../../tensor-view';
 import {ShapeUtil} from '../../util';
 import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key';
-import {ComputeContext, GpuDataType, ProgramInfo, ProgramInfoLoader, ProgramMetadata} from '../types';
+import {ComputeContext, ProgramInfo} from '../types';
 
 import {IndicesHelper, inputVariable, outputVariable, ShaderHelper} from './common';
 
@@ -54,7 +54,7 @@ class EinsumTerm {
 }
 
 class EinsumEquation {
-  constructor(inputs: readonly TensorView[], equation: string) {
+  constructor(inputs: readonly TensorView[], public readonly equation: string) {
     this.hasEllipsis = false;
     this.symbolToInfo = new Map<string, SymbolInfo>();
     this.lhs = new Array<EinsumTerm>();
@@ -177,111 +177,101 @@ class EinsumEquation {
   outputDims: number[];                   // Output dimensions of the equation
 }  // End of class EinsumEquation
 
-
-const createEinsumProgramMetadata = (inputCount: number, cacheHint: string): ProgramMetadata =>
-    ({name: 'Einsum', inputTypes: Array(inputCount).fill(GpuDataType.default), cacheHint});
-
-const createEinsumProgramInfo =
-    (metadata: ProgramMetadata, inputs: readonly TensorView[], einsumEquation: EinsumEquation): ProgramInfo => {
-      const dataType = inputs[0].dataType;
-      const inputVars = new Array<IndicesHelper>(inputs.length);
-      for (let i = 0; i < inputs.length; ++i) {
-        inputVars[i] = inputVariable(`input${i}`, dataType, inputs[i].dims);
-      }
-      const outputShape = einsumEquation.outputDims;
-      const outputSize = ShapeUtil.size(outputShape);
-      const output = outputVariable('output', dataType, outputShape);
-      const idxCopy: string[] = [];
-      const rhsSymbols = Array.from(einsumEquation.rhs.symbolToIndices.keys());
-      const initProd = 'var prod = 1.0;';
-      const initSum = 'var sum = 0.0;';
-      const updateSum = 'sum += prod;';
-      const reduceOpsSetIndices: string[] = [];
-      const reduceOpsLoopHeaders: string[] = [];
-      const reduceOpsLoopFooters: string[] = [];
-      const reduceOpCompute: string[] = [];
-      const isReduceOpsWithoutLoop = einsumEquation.symbolToInfo.size === rhsSymbols.length;
-      einsumEquation.symbolToInfo.forEach((info, symbol) => {
-        if (rhsSymbols.includes(symbol)) {
-          const outputIndex = rhsSymbols.indexOf(symbol);
-          einsumEquation.lhs.forEach((term, i) => {
-            if (info.inputIndices.includes(i)) {
-              const indices = term.symbolToIndices.get(symbol);
-              if (indices === undefined) {
-                throw new Error('Invalid symbol error');
-              }
-              indices.forEach((index) => {
-                idxCopy.push(`${
-                    inputVars[i].indicesSet(
-                        `input${i}Indices`, index, output.indicesGet('outputIndices', outputIndex))}`);
-              });
-            }
+const createEinsumProgramInfo = (inputs: readonly TensorView[], einsumEquation: EinsumEquation): ProgramInfo => {
+  const dataType = inputs[0].dataType;
+  const inputVars = new Array<IndicesHelper>(inputs.length);
+  for (let i = 0; i < inputs.length; ++i) {
+    inputVars[i] = inputVariable(`input${i}`, dataType, inputs[i].dims);
+  }
+  const outputShape = einsumEquation.outputDims;
+  const outputSize = ShapeUtil.size(outputShape);
+  const output = outputVariable('output', dataType, outputShape);
+  const idxCopy: string[] = [];
+  const rhsSymbols = Array.from(einsumEquation.rhs.symbolToIndices.keys());
+  const initProd = 'var prod = 1.0;';
+  const initSum = 'var sum = 0.0;';
+  const updateSum = 'sum += prod;';
+  const reduceOpsSetIndices: string[] = [];
+  const reduceOpsLoopHeaders: string[] = [];
+  const reduceOpsLoopFooters: string[] = [];
+  const reduceOpCompute: string[] = [];
+  const isReduceOpsWithoutLoop = einsumEquation.symbolToInfo.size === rhsSymbols.length;
+  einsumEquation.symbolToInfo.forEach((info, symbol) => {
+    if (rhsSymbols.includes(symbol)) {
+      const outputIndex = rhsSymbols.indexOf(symbol);
+      einsumEquation.lhs.forEach((term, i) => {
+        if (info.inputIndices.includes(i)) {
+          const indices = term.symbolToIndices.get(symbol);
+          if (indices === undefined) {
+            throw new Error('Invalid symbol error');
+          }
+          indices.forEach((index) => {
+            idxCopy.push(`${
+                inputVars[i].indicesSet(`input${i}Indices`, index, output.indicesGet('outputIndices', outputIndex))}`);
           });
-        } else {
-          einsumEquation.lhs.forEach((term, i) => {
-            const info = einsumEquation.symbolToInfo.get(symbol);
-            if (info === undefined) {
-              throw new Error('Invalid symbol error');
-            }
-            if (info.inputIndices.includes(i)) {
-              const indices = term.symbolToIndices.get(symbol);
-              if (indices === undefined) {
-                throw new Error('Invalid symbol error');
-              }
-              indices.forEach((index) => {
-                reduceOpsSetIndices.push(`${inputVars[i].indicesSet(`input${i}Indices`, index, `${symbol}`)}`);
-              });
-              reduceOpCompute.push(`prod *= ${inputVars[i].getByIndices(`input${i}Indices`)};`);
-            }
+        }
+      });
+    } else {
+      einsumEquation.lhs.forEach((term, i) => {
+        const info = einsumEquation.symbolToInfo.get(symbol);
+        if (info === undefined) {
+          throw new Error('Invalid symbol error');
+        }
+        if (info.inputIndices.includes(i)) {
+          const indices = term.symbolToIndices.get(symbol);
+          if (indices === undefined) {
+            throw new Error('Invalid symbol error');
+          }
+          indices.forEach((index) => {
+            reduceOpsSetIndices.push(`${inputVars[i].indicesSet(`input${i}Indices`, index, `${symbol}`)}`);
           });
-          reduceOpsLoopHeaders.push(`for(var ${symbol}: u32 = 0; ${symbol} < ${
-              einsumEquation.symbolToInfo.get(symbol)?.dimValue}; ${symbol}++) {`);
-          reduceOpsLoopFooters.push('}');
+          reduceOpCompute.push(`prod *= ${inputVars[i].getByIndices(`input${i}Indices`)};`);
         }
       });
-      const reduceOps = isReduceOpsWithoutLoop ?
-          [
-            ...idxCopy,
-            `let sum = ${inputVars.map((inputVar, i) => inputVar.getByIndices(`input${i}Indices`)).join(' * ')};`
-          ] :
-          [
-            ...idxCopy,
-            initSum,
-            ...reduceOpsLoopHeaders,
-            ...reduceOpsSetIndices,
-            initProd,
-            ...reduceOpCompute,
-            updateSum,
-            ...reduceOpsLoopFooters,
-          ];
-      const getShaderSource = (shaderHelper: ShaderHelper) => `
+      reduceOpsLoopHeaders.push(`for(var ${symbol}: u32 = 0; ${symbol} < ${
+          einsumEquation.symbolToInfo.get(symbol)?.dimValue}; ${symbol}++) {`);
+      reduceOpsLoopFooters.push('}');
+    }
+  });
+  const reduceOps = isReduceOpsWithoutLoop ?
+      [
+        ...idxCopy,
+        `let sum = ${inputVars.map((inputVar, i) => inputVar.getByIndices(`input${i}Indices`)).join(' * ')};`
+      ] :
+      [
+        ...idxCopy,
+        initSum,
+        ...reduceOpsLoopHeaders,
+        ...reduceOpsSetIndices,
+        initProd,
+        ...reduceOpCompute,
+        updateSum,
+        ...reduceOpsLoopFooters,
+      ];
+  const getShaderSource = (shaderHelper: ShaderHelper) => `
       ${shaderHelper.declareVariables(...inputVars, output)}
 
       ${shaderHelper.mainStart()}
         ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes(outputSize)}
         var outputIndices = ${output.offsetToIndices('global_idx')};
-        ${inputVars.map((inputVar, i) => `var input${i}Indices: ${inputVars[i].type.indices};`).join('\n')}
+        ${inputVars.map((_var, i) => `var input${i}Indices: ${inputVars[i].type.indices};`).join('\n')}
         ${reduceOps.join('\n')};
         ${output.setByOffset('global_idx', 'sum')};
       }`;
-      return {
-        ...metadata,
-        outputs: [{dims: outputShape, dataType: inputs[0].dataType, gpuDataType: GpuDataType.default}],
-        getShaderSource,
-        dispatchGroup: () => ({x: Math.ceil(outputSize / 64 /* workgroup size */)})
-      };
-    };
-
-const createEinsumProgramInfoLoader =
-    (inputs: readonly TensorView[], einsumEquation: EinsumEquation, attributes: EinsumAttributes):
-        ProgramInfoLoader => {
-          const metadata = createEinsumProgramMetadata(inputs.length, attributes.cacheKey);
-          return {...metadata, get: () => createEinsumProgramInfo(metadata, inputs, einsumEquation)};
-        };
+  return {
+    name: 'Einsum',
+    shaderCache: {hint: einsumEquation.equation},
+    getRunData: () => ({
+      outputs: [{dims: outputShape, dataType: inputs[0].dataType}],
+      dispatchGroup: {x: Math.ceil(outputSize / 64 /* workgroup size */)}
+    }),
+    getShaderSource,
+  };
+};
 
 export const einsum = (context: ComputeContext, attributes: EinsumAttributes): void => {
   const einsumEquation = new EinsumEquation(context.inputs, attributes.equation);
-  context.compute(createEinsumProgramInfoLoader(context.inputs, einsumEquation, attributes));
+  context.compute(createEinsumProgramInfo(context.inputs, einsumEquation));
 };
 
 export const parseEinsumAttributes = (attributes: Record<string, unknown>): EinsumAttributes => {
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/expand.ts b/js/web/lib/wasm/jsep/webgpu/ops/expand.ts
index 824ce682c0c4b..d998013352d77 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/expand.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/expand.ts
@@ -3,14 +3,9 @@
 
 import {TensorView} from '../../tensor-view';
 import {ShapeUtil} from '../../util';
-import {ComputeContext, GpuDataType, ProgramInfo, ProgramMetadata} from '../types';
+import {ComputeContext, ProgramInfo, ProgramUniform} from '../types';
 
-import {inputVariable, outputVariable, ShaderHelper} from './common';
-
-export const expandProgramMetadata = {
-  name: 'Expand',
-  inputTypes: [GpuDataType.default]
-};
+import {createTensorShapeVariables, enableShapesUniforms, inputVariable, outputVariable, ShaderHelper} from './common';
 
 const validateInputs = (inputs: readonly TensorView[]): void => {
   if (!inputs || inputs.length !== 2) {
@@ -45,21 +40,25 @@ const calculateOutputShape = (inputShape: readonly number[], shape: readonly num
     (inputShape.length > shape.length) ? getAdjustedShape(inputShape, shape) : getAdjustedShape(shape, inputShape);
 
 
-const createExpandProgramInfo = (metadata: ProgramMetadata, inputs: readonly TensorView[]): ProgramInfo => {
+const createExpandProgramInfo = (inputs: readonly TensorView[]): ProgramInfo => {
   const inputShape = inputs[0].dims;
   const shape = Array.from(inputs[1].getBigInt64Array(), Number);
   const outputShape: number[] = calculateOutputShape(inputShape, shape);
   const outputSize = ShapeUtil.size(outputShape);
 
   const dataType = inputs[0].dataType;
-  const input = inputVariable('input', dataType, inputShape);
-  const output = outputVariable('output', dataType, outputShape);
+  const enableInputShapeUniform = enableShapesUniforms(inputShape.length);
+  const inputShapeOrRank = enableInputShapeUniform ? inputShape.length : inputShape;
+  const input = inputVariable('input', dataType, inputShapeOrRank);
+  const enableOutputShapeUniform = enableShapesUniforms(outputShape.length);
+  const outputShapeOrRank = enableOutputShapeUniform ? outputShape.length : outputShape;
+  const output = outputVariable('output', dataType, outputShapeOrRank);
 
   const getShaderSource = (shaderHelper: ShaderHelper) => `
   const inputShape = ${input.indices(...inputShape)};
-  ${shaderHelper.declareVariables(input, output)}
+  ${shaderHelper.registerUniform('vec_size', 'u32').declareVariables(input, output)}
   ${shaderHelper.mainStart()}
-  ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes(outputSize)}
+  ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes('uniforms.vec_size')}
     let outputIndices = ${output.offsetToIndices('global_idx')};
     var inputIndices: ${input.type.indices};
     for (var i = 0; i < ${inputShape.length}; i++) {
@@ -73,19 +72,26 @@ const createExpandProgramInfo = (metadata: ProgramMetadata, inputs: readonly Ten
     }
     ${output.setByOffset('global_idx', input.getByIndices('inputIndices'))}
   }`;
+  const programUniforms: ProgramUniform[] = [{type: 'uint32', data: outputSize}];
+  if (enableInputShapeUniform) {
+    programUniforms.push(...createTensorShapeVariables(inputShape));
+  }
+  if (enableOutputShapeUniform) {
+    programUniforms.push(...createTensorShapeVariables(outputShape));
+  }
   return {
-    ...metadata,
+    name: 'Expand',
+    shaderCache: {hint: `${outputShape}`, inputDependencies: [enableInputShapeUniform ? 'rank' : 'dims']},
     getShaderSource,
-    outputs: [{dims: outputShape, dataType: inputs[0].dataType, gpuDataType: GpuDataType.default}],
-    dispatchGroup: () => ({x: Math.ceil(outputSize / 64 /* workgroup size */)})
+    getRunData: () => ({
+      outputs: [{dims: outputShape, dataType: inputs[0].dataType}],
+      dispatchGroup: {x: Math.ceil(outputSize / 64 /* workgroup size */)},
+      programUniforms
+    })
   };
 };
 
 export const expand = (context: ComputeContext): void => {
   validateInputs(context.inputs);
-  const outputShape = Array.from(context.inputs[1].getBigInt64Array(), Number);
-  const cacheHint = outputShape.toString();
-  context.compute(
-      {...expandProgramMetadata, cacheHint, get: () => createExpandProgramInfo(expandProgramMetadata, context.inputs)},
-      {inputs: [0]});
+  context.compute(createExpandProgramInfo(context.inputs), {inputs: [0]});
 };
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/fuse-utils.ts b/js/web/lib/wasm/jsep/webgpu/ops/fuse-utils.ts
index 92105859a8c0e..0b5c0db2b5112 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/fuse-utils.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/fuse-utils.ts
@@ -10,17 +10,20 @@ export interface InternalActivationAttributes {
   readonly activationCacheKey: string;
 }
 
-export const getActicationSnippet =
-    (attributes: InternalActivationAttributes): {activationFunction: string; applyActivation: string} => {
+export const getActivationSnippet = (attributes: InternalActivationAttributes, valueType: string):
+    {activationFunction: string; applyActivation: string} => {
       switch (attributes.activation) {
         case 'Relu':
-          return {activationFunction: '', applyActivation: 'value = max(value, 0.0);'};
+          return {activationFunction: '', applyActivation: `value = max(value, ${valueType}(0.0));`};
         case 'Sigmoid':
-          return {activationFunction: '', applyActivation: 'value = (1.0 / (1.0 + exp(-value)));'};
+          return {
+            activationFunction: '',
+            applyActivation: `value = (${valueType}(1.0) / (${valueType}(1.0) + exp(-value)));`
+          };
         case 'Clip':
           return {
-            activationFunction:
-                `const clip_min_=f32(${attributes.clipMin!});const clip_max_=f32(${attributes.clipMax!});`,
+            activationFunction: `const clip_min_=${valueType}(${attributes.clipMin!});const clip_max_=${valueType}(${
+                attributes.clipMax!});`,
             applyActivation: 'value = clamp(value, clip_min_, clip_max_);'
           };
           // TODO: adding other activations that can be fused.
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/gather-elements.ts b/js/web/lib/wasm/jsep/webgpu/ops/gather-elements.ts
index a7d355bc13704..9924a50e2ae6f 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/gather-elements.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/gather-elements.ts
@@ -4,7 +4,7 @@
 import {TensorView} from '../../tensor-view';
 import {ShapeUtil} from '../../util';
 import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key';
-import {ComputeContext, GpuDataType, ProgramInfo, ProgramMetadata} from '../types';
+import {ComputeContext, ProgramInfo} from '../types';
 
 import {inputVariable, outputVariable, ShaderHelper} from './common';
 
@@ -28,7 +28,7 @@ const validateInputs = (inputs: readonly TensorView[]): void => {
 };
 
 const createGatherElementsProgramInfo =
-    (metadata: ProgramMetadata, inputs: readonly TensorView[], attributes: GatherElementsAttributes): ProgramInfo => {
+    (inputs: readonly TensorView[], attributes: GatherElementsAttributes): ProgramInfo => {
       const inputShape = inputs[0].dims;
       const inputOutputDataType = inputs[0].dataType;
       const inputRank = inputShape.length;
@@ -86,10 +86,13 @@ const createGatherElementsProgramInfo =
   }`;
 
       return {
-        ...metadata,
-        outputs: [{dims: outputShape, dataType: inputs[0].dataType, gpuDataType: GpuDataType.default}],
+        name: 'GatherElements',
+        shaderCache: {hint: attributes.cacheKey},
+        getRunData: () => ({
+          outputs: [{dims: outputShape, dataType: inputs[0].dataType}],
+          dispatchGroup: {x: Math.ceil(outputSize / 64 /* workgroup size */)}
+        }),
         getShaderSource,
-        dispatchGroup: () => ({x: Math.ceil(outputSize / 64 /* workgroup size */)})
       };
     };
 
@@ -99,12 +102,5 @@ export const parseGatherElementsAttributes = (attributes: Record<string, unknown
 export const gatherElements = (context: ComputeContext, attributes: GatherElementsAttributes): void => {
   const inputs = context.inputs;
   validateInputs(inputs);
-
-  const metadata = {
-    name: 'GatherElements',
-    inputTypes: [GpuDataType.default, GpuDataType.default],
-    cacheHint: attributes.cacheKey,
-  };
-
-  context.compute(createGatherElementsProgramInfo(metadata, context.inputs, attributes));
+  context.compute(createGatherElementsProgramInfo(context.inputs, attributes));
 };
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/gather.ts b/js/web/lib/wasm/jsep/webgpu/ops/gather.ts
index 47aae13d6799d..5d6d6debadb9a 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/gather.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/gather.ts
@@ -4,9 +4,9 @@
 import {TensorView} from '../../tensor-view';
 import {ShapeUtil} from '../../util';
 import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key';
-import {ComputeContext, GpuDataType, ProgramInfo, ProgramMetadata} from '../types';
+import {ComputeContext, ProgramInfo, ProgramInputTensorInfoDependency, ProgramUniform} from '../types';
 
-import {inputVariable, outputVariable, ShaderHelper} from './common';
+import {createTensorShapeVariables, enableShapesUniforms, inputVariable, outputVariable, ShaderHelper} from './common';
 
 export interface GatherAttributes extends AttributeWithCacheKey {
   axis: number;
@@ -18,68 +18,99 @@ const validateInputs = (inputs: readonly TensorView[]): void => {
   }
 };
 
-const createGatherProgramInfo =
-    (metadata: ProgramMetadata, inputs: readonly TensorView[], attributes: GatherAttributes): ProgramInfo => {
-      const inputShape = inputs[0].dims;
-      const indicesShape = inputs[1].dims;
-
-      const inputRank = inputShape.length;
-      const axis = ShapeUtil.normalizeAxis(attributes.axis, inputRank);
-
-      const outputShape = inputShape.slice(0);
-      outputShape.splice(axis, 1, ...indicesShape);
-
-      const axisDimLimit = inputShape[axis];
-      const outputSize = ShapeUtil.size(outputShape);
-
-      const data = inputVariable('data', inputs[0].dataType, inputs[0].dims);
-      const indices = inputVariable('inputIndices', inputs[1].dataType, inputs[1].dims);
-      const output = outputVariable('output', inputs[0].dataType, outputShape);
-      const calcDataIndices = (): string => {
-        const indicesRank = indicesShape.length;
-        let calcStr = `var indicesIndices  = ${indices.type.indices}(0);`;
-        for (let i = 0; i < indicesRank; i++) {
-          calcStr += `${indicesRank > 1 ? `indicesIndices[${i}]` : 'indicesIndices'} = ${
-              outputShape.length > 1 ? `outputIndices[${axis + i}]` : 'outputIndices'};`;
-        }
-        calcStr += `
+const createGatherProgramInfo = (inputs: readonly TensorView[], attributes: GatherAttributes): ProgramInfo => {
+  const inputShape = inputs[0].dims;
+  const indicesShape = inputs[1].dims;
+
+  const inputRank = inputShape.length;
+  const axis = ShapeUtil.normalizeAxis(attributes.axis, inputRank);
+
+  const outputShape = inputShape.slice(0);
+  outputShape.splice(axis, 1, ...indicesShape);
+
+  const axisDimLimit = inputShape[axis];
+  const outputSize = ShapeUtil.size(outputShape);
+
+  const enableInputShapesUniforms = enableShapesUniforms(inputs[0].dims.length);
+  const inputShapeOrRank = enableInputShapesUniforms ? inputs[0].dims.length : inputs[0].dims;
+  const enableIndicesShapesUniforms = enableShapesUniforms(inputs[1].dims.length);
+  const indicesShapeOrRank = enableIndicesShapesUniforms ? inputs[1].dims.length : inputs[1].dims;
+  const enableOutputShapesUniforms = enableShapesUniforms(outputShape.length);
+  const outputShapeOrRank = enableOutputShapesUniforms ? outputShape.length : outputShape;
+
+  const data = inputVariable('data', inputs[0].dataType, inputShapeOrRank);
+  const indices = inputVariable('inputIndices', inputs[1].dataType, indicesShapeOrRank);
+  const output = outputVariable('output', inputs[0].dataType, outputShapeOrRank);
+
+  const programUniforms: ProgramUniform[] =
+      [{type: 'uint32', data: outputSize}, {type: 'int32', data: axisDimLimit}, {type: 'uint32', data: axis}];
+  if (enableInputShapesUniforms) {
+    programUniforms.push(...createTensorShapeVariables(inputs[0].dims));
+  }
+  if (enableIndicesShapesUniforms) {
+    programUniforms.push(...createTensorShapeVariables(inputs[1].dims));
+  }
+  if (enableOutputShapesUniforms) {
+    programUniforms.push(...createTensorShapeVariables(outputShape));
+  }
+
+  const inputDependencies: ProgramInputTensorInfoDependency[] = [];
+  inputDependencies.push(enableInputShapesUniforms ? 'rank' : 'dims');
+  inputDependencies.push(enableIndicesShapesUniforms ? 'rank' : 'dims');
+
+  const calcDataIndices = (): string => {
+    const indicesRank = indicesShape.length;
+    let calcStr = `var indicesIndices  = ${indices.type.indices}(0);`;
+    for (let i = 0; i < indicesRank; i++) {
+      calcStr += `${indicesRank > 1 ? `indicesIndices[${i}]` : 'indicesIndices'} = ${
+          outputShape.length > 1 ? `outputIndices[uniforms.axis + ${i}]` : 'outputIndices'};`;
+    }
+    calcStr += `
         var idx = ${indices.getByIndices('indicesIndices')};
         if (idx < 0) {
-          idx = idx + ${axisDimLimit};
+          idx = idx + uniforms.axisDimLimit;
         }
         var dataIndices = ${data.type.indices}(0);
       `;
-        for (let i = 0, j = 0; i < inputRank; i++) {
-          if (i === axis) {
-            calcStr += `${inputRank > 1 ? `dataIndices[${i}]` : 'dataIndices'} = u32(idx);`;
-            j += indicesRank;
-          } else {
-            calcStr += `${inputRank > 1 ? `dataIndices[${i}]` : 'dataIndices'} = ${
-                outputShape.length > 1 ? `outputIndices[${j}]` : 'outputIndices'};`;
-            j++;
-          }
-        }
-        return calcStr;
-      };
+    for (let i = 0, j = 0; i < inputRank; i++) {
+      if (i === axis) {
+        calcStr += `${inputRank > 1 ? `dataIndices[${i}]` : 'dataIndices'} = u32(idx);`;
+        j += indicesRank;
+      } else {
+        calcStr += `${inputRank > 1 ? `dataIndices[${i}]` : 'dataIndices'} = ${
+            outputShape.length > 1 ? `outputIndices[${j}]` : 'outputIndices'};`;
+        j++;
+      }
+    }
+    return calcStr;
+  };
 
-      const getShaderSource = (shaderHelper: ShaderHelper) => `
-      ${shaderHelper.declareVariables(data, indices, output)}
+  const getShaderSource = (shaderHelper: ShaderHelper) => `
+      ${
+      shaderHelper.registerUniform('outputSize', 'u32')
+          .registerUniform('axisDimLimit', 'i32')
+          .registerUniform('axis', 'u32')
+          .declareVariables(data, indices, output)}
       ${shaderHelper.mainStart()}
-        ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes(outputSize)}
+        ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes('uniforms.outputSize')}
         let outputIndices = ${output.offsetToIndices('global_idx')};
         ${calcDataIndices()};
         let value = ${data.getByIndices('dataIndices')};
         ${output.setByOffset('global_idx', 'value')};
       }`;
-      return {
-        ...metadata,
-        outputs: [
-          {dims: outputShape, dataType: inputs[0].dataType, gpuDataType: GpuDataType.default},
-        ],
-        getShaderSource,
-        dispatchGroup: () => ({x: Math.ceil(outputSize / 64 /* workgroup size */)})
-      };
-    };
+  return {
+    name: 'Gather',
+    shaderCache: {hint: attributes.cacheKey, inputDependencies},
+    getRunData: () => ({
+      outputs: [
+        {dims: outputShape, dataType: inputs[0].dataType},
+      ],
+      dispatchGroup: {x: Math.ceil(outputSize / 64 /* workgroup size */)},
+      programUniforms
+    }),
+    getShaderSource,
+  };
+};
 
 export const parseGatherAttributes = (attributes: Record<string, unknown>): GatherAttributes =>
     createAttributeWithCacheKey({axis: attributes.axis as number});
@@ -87,12 +118,5 @@ export const parseGatherAttributes = (attributes: Record<string, unknown>): Gath
 export const gather = (context: ComputeContext, attributes: GatherAttributes): void => {
   const inputs = context.inputs;
   validateInputs(inputs);
-
-  const metadata = {
-    name: 'Gather',
-    inputTypes: [GpuDataType.default, GpuDataType.default],
-    cacheHint: attributes.cacheKey,
-  };
-
-  context.compute(createGatherProgramInfo(metadata, context.inputs, attributes));
+  context.compute(createGatherProgramInfo(context.inputs, attributes));
 };
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/gemm.ts b/js/web/lib/wasm/jsep/webgpu/ops/gemm.ts
index 1a36d4a7545d6..6e9dee41ce488 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/gemm.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/gemm.ts
@@ -4,7 +4,7 @@
 import {TensorView} from '../../tensor-view';
 import {GemmUtil, ShapeUtil} from '../../util';
 import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key';
-import {ComputeContext, GpuDataType, ProgramInfo, ProgramInfoLoader, ProgramMetadata} from '../types';
+import {ComputeContext, ProgramInfo} from '../types';
 
 import {ShaderHelper, tensorTypeToWsglStorageType} from './common';
 
@@ -53,39 +53,38 @@ const offsetC = (m: number, n: number, dims: readonly number[]): string => {
   return offset;
 };
 
-const createGemmProgramInfo =
-    (metadata: ProgramMetadata, inputs: readonly TensorView[], attributes: GemmAttributes): ProgramInfo => {
-      const aShape = inputs[0].dims.slice();
-      const bShape = inputs[1].dims.slice();
-      const [M, N, K] = GemmUtil.getShapeOfGemmResult(
-          aShape, attributes.transA, bShape, attributes.transB, inputs.length === 3 ? inputs[2].dims : undefined);
-      const outputShape = [M, N];
-      if (!outputShape) {
-        throw new Error('Can\'t use gemm on the given tensors');
-      }
-      const outputSize = ShapeUtil.size(outputShape);
-      let line = '';
-      if (attributes.transA && attributes.transB) {
-        line = 'value += a[k * M + m] * b[n * K + k];';
-      } else if (attributes.transA && !attributes.transB) {
-        line = 'value += a[k * M + m] * b[k * N + n];';
-      } else if (!attributes.transA && attributes.transB) {
-        line = 'value += a[m * K + k] * b[n * K + k];';
-      } else if (!attributes.transA && !attributes.transB) {
-        line = 'value += a[m * K + k] * b[k * N + n];';
-      }
-
-      const dataType = tensorTypeToWsglStorageType(inputs[0].dataType);
-      const calculateAlpha = attributes.alpha === 1 ? '' : 'value *= alpha;';
-      const calculateC = inputs.length === 3 ? `value += beta * c[${offsetC(M, N, inputs[2].dims)}];` : '';
-      const inputStorageBuffersDeclarations = [
-        `@group(0) @binding(0) var<storage, read> a : array<${dataType}>;`,
-        `@group(0) @binding(1) var<storage, read> b : array<${dataType}>;`
-      ];
-      if (inputs.length === 3) {
-        inputStorageBuffersDeclarations.push(`@group(0) @binding(2) var<storage, read> c : array<${dataType}>;`);
-      }
-      const getShaderSource = (shaderHelper: ShaderHelper) => `
+const createGemmProgramInfo = (inputs: readonly TensorView[], attributes: GemmAttributes): ProgramInfo => {
+  const aShape = inputs[0].dims.slice();
+  const bShape = inputs[1].dims.slice();
+  const [M, N, K] = GemmUtil.getShapeOfGemmResult(
+      aShape, attributes.transA, bShape, attributes.transB, inputs.length === 3 ? inputs[2].dims : undefined);
+  const outputShape = [M, N];
+  if (!outputShape) {
+    throw new Error('Can\'t use gemm on the given tensors');
+  }
+  const outputSize = ShapeUtil.size(outputShape);
+  let line = '';
+  if (attributes.transA && attributes.transB) {
+    line = 'value += a[k * M + m] * b[n * K + k];';
+  } else if (attributes.transA && !attributes.transB) {
+    line = 'value += a[k * M + m] * b[k * N + n];';
+  } else if (!attributes.transA && attributes.transB) {
+    line = 'value += a[m * K + k] * b[n * K + k];';
+  } else if (!attributes.transA && !attributes.transB) {
+    line = 'value += a[m * K + k] * b[k * N + n];';
+  }
+
+  const dataType = tensorTypeToWsglStorageType(inputs[0].dataType);
+  const calculateAlpha = attributes.alpha === 1 ? '' : 'value *= alpha;';
+  const calculateC = inputs.length === 3 ? `value += beta * c[${offsetC(M, N, inputs[2].dims)}];` : '';
+  const inputStorageBuffersDeclarations = [
+    `@group(0) @binding(0) var<storage, read> a : array<${dataType}>;`,
+    `@group(0) @binding(1) var<storage, read> b : array<${dataType}>;`
+  ];
+  if (inputs.length === 3) {
+    inputStorageBuffersDeclarations.push(`@group(0) @binding(2) var<storage, read> c : array<${dataType}>;`);
+  }
+  const getShaderSource = (shaderHelper: ShaderHelper) => `
   const M: u32 = ${M}u;
   const N: u32 = ${N}u;
   const K: u32 = ${K}u;
@@ -111,28 +110,20 @@ const createGemmProgramInfo =
     output[global_id.x] = value;
 
   }`;
-      return {
-        ...metadata,
-        outputs: [{dims: outputShape, dataType: inputs[0].dataType, gpuDataType: GpuDataType.default}],
-        getShaderSource,
-        dispatchGroup: () => ({x: Math.ceil(outputSize / 64 /* workgroup size */)})
-      };
-    };
-
-const createGemmProgramInfoLoader = (inputs: readonly TensorView[], attributes: GemmAttributes): ProgramInfoLoader => {
-  const metadata = {
+  return {
     name: 'Gemm',
-    inputTypes: inputs.length === 3 ? [GpuDataType.default, GpuDataType.default, GpuDataType.default] :
-                                      [GpuDataType.default, GpuDataType.default],
-    cacheHint: attributes.cacheKey
+    shaderCache: {hint: attributes.cacheKey},
+    getRunData: () => ({
+      outputs: [{dims: outputShape, dataType: inputs[0].dataType}],
+      dispatchGroup: {x: Math.ceil(outputSize / 64 /* workgroup size */)}
+    }),
+    getShaderSource,
   };
-
-  return {...metadata, get: () => createGemmProgramInfo(metadata, inputs, attributes)};
 };
 
 export const gemm = (context: ComputeContext, attributes: GemmAttributes): void => {
   validateInputs(context.inputs);
-  context.compute(createGemmProgramInfoLoader(context.inputs, attributes));
+  context.compute(createGemmProgramInfo(context.inputs, attributes));
 };
 
 export const parseGemmAttributes = (attributes: Record<string, unknown>): GemmAttributes =>
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/instance-norm.ts b/js/web/lib/wasm/jsep/webgpu/ops/instance-norm.ts
index 5a148bda0a9f7..97f633c7cf47e 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/instance-norm.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/instance-norm.ts
@@ -1,20 +1,25 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+import {DataType} from '../../../wasm-common';
 import {TensorView} from '../../tensor-view';
 import {ShapeUtil} from '../../util';
 import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key';
-import {ComputeContext, GpuDataType, ProgramInfo, ProgramMetadata} from '../types';
+import {ComputeContext, ProgramInfo} from '../types';
 
-import {inputVariable, outputVariable, ShaderHelper, tensorTypeToWsglStorageType} from './common';
+import {fillVector, getMaxComponents, inputVariable, outputVariable, ShaderHelper, tensorTypeToWsglStorageType} from './common';
 
 export interface InstanceNormAttributes extends AttributeWithCacheKey {
   epsilon: number;
   format: 'NHWC'|'NCHW';
 }
 
+const metadata = {
+  name: 'InstanceNormalization'
+};
+
 const createInstanceNormProgramInfo =
-    (metadata: ProgramMetadata, inputs: readonly TensorView[], attributes: InstanceNormAttributes): ProgramInfo => {
+    (inputs: readonly TensorView[], attributes: InstanceNormAttributes): ProgramInfo => {
       const xShape = inputs[0].dims;
 
       const outputShape = xShape;
@@ -96,89 +101,179 @@ const createInstanceNormProgramInfo =
   }`;
       return {
         ...metadata,
-        outputs: [
-          {dims: outputShape, dataType: inputs[0].dataType, gpuDataType: GpuDataType.default},
-        ],
+        shaderCache: {hint: attributes.cacheKey},
+        getRunData: () => ({
+          outputs: [
+            {dims: outputShape, dataType: inputs[0].dataType},
+          ],
+          dispatchGroup: {x: normCount}
+        }),
         getShaderSource,
-        dispatchGroup: () => ({x: normCount})
       };
     };
 
+const computeMean =
+    (context: ComputeContext, input: TensorView, scale: TensorView, bias: TensorView, n: number, h: number, c: number,
+     epsilon: number) => {
+      const components = getMaxComponents(c);
+      const inputHelper = inputVariable('input', input.dataType, input.dims, components);
+      const scaleHelper = inputVariable('scale', scale.dataType, scale.dims, components);
+      const biasHelper = inputVariable('bias', bias.dataType, bias.dims, components);
+
+      const WG = 64;
+      // we will store channel scale and channel shift in [2, components] matrix
+      // or in vec2 when components == 1
+      const outputType = components === 1 ? 'vec2f' : `mat2x${components}f`;
+      const sumCastType = components === 1 ? 'f32' : `vec${components}f`;
+      const setOutputValue = (var1: string, var2: string) => `${outputType}(${var1}, ${var2})`;
+      const unitsOfWork = n * c / components;
+      const wgSize = Math.ceil(h / WG);
+
+      const getMeanShaderSource = (shaderHelper: ShaderHelper) => `
+  const H: u32 = ${h};
+  const C: u32 = ${c / components};
+  const imageSize: u32 = ${h * c / components};
+
+  ${shaderHelper.declareVariables(inputHelper)}
+  @group(0) @binding(1) var<storage, read_write> output : array<${outputType}>;
+
+  ${shaderHelper.mainStart(WG)}
+    let currentImageNumber = global_idx / ${WG} / C;
+    let currentChannelNumber = (global_idx / ${WG}) % C;
+    let wgId = global_idx % ${WG};
+    let wgOffset = wgId * ${wgSize};
+    if (wgOffset >= H) {
+        return;
+    }
+    let wgMax = min(wgOffset + ${wgSize}, H);
+
+    let offset = currentImageNumber * imageSize + currentChannelNumber;
+    var sum = ${fillVector('f32', components)};
+    var squaredSum = ${fillVector('f32', components)};
+    for (var i: u32 = wgOffset; i < wgMax; i++) {
+        let value = ${sumCastType}(input[offset + i * C]);
+        sum += value;
+        squaredSum += value * value;
+    }
+    output[global_idx] = ${setOutputValue('sum', 'squaredSum')};
+  }`;
+
+      const meanValues = context.compute(
+          {
+            name: 'InstanceNormComputeMean',
+            shaderCache: {hint: JSON.stringify({components, n, h, c})},
+            getRunData: () => ({
+              outputs: [
+                {dims: [n, c, WG, 2], dataType: DataType.float},
+              ],
+              dispatchGroup: {x: n * c / components},
+            }),
+            getShaderSource: getMeanShaderSource,
+          },
+          {inputs: [input], outputs: [-1]})[0];
+      const getShaderSource = (shaderHelper: ShaderHelper) => `
+  const H: u32 = ${h};
+  const C: u32 = ${c / components};
+  const imageSize: u32 = ${WG * c / components};
+  const epsilon: f32 = ${epsilon};
+
+  @group(0) @binding(0) var<storage, read> input : array<${outputType}>;
+  @group(0) @binding(1) var<storage, read> scale : array<${scaleHelper.type.storage}>;
+  @group(0) @binding(2) var<storage, read> bias : array<${biasHelper.type.storage}>;
+  @group(0) @binding(3) var<storage, read_write> output : array<${outputType}>;
+
+  ${shaderHelper.mainStart()}
+    ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes(unitsOfWork)}
+    let currentImageNumber = global_idx / C;
+    let currentChannelNumber = global_idx % C;
+
+    let offset = currentImageNumber * imageSize;
+    var sum = ${fillVector('f32', components)};
+    var squaredSum = ${fillVector('f32', components)};
+    for (var i: u32 = 0; i < ${WG}; i++) {
+        let value = input[offset + i + currentChannelNumber * ${WG}];
+        sum += value[0];
+        squaredSum += value[1];
+    }
+    sum = sum / f32(H);
+    squaredSum = squaredSum / f32(H);
+    let invStdDev = 1 / sqrt(squaredSum - sum * sum + epsilon);
+    let channelScale = invStdDev * ${sumCastType}(scale[currentChannelNumber]);
+    let channelShift = ${sumCastType}(bias[currentChannelNumber]) - sum * channelScale;
+
+    output[global_idx] = ${setOutputValue('channelScale', 'channelShift')};
+  }`;
+
+      return context.compute(
+          {
+            name: 'InstanceNormComputeChannelScaleShift',
+            shaderCache: {hint: JSON.stringify({components, n, h, c, epsilon})},
+            getRunData: () => ({
+              outputs: [
+                {dims: [n, c, 2], dataType: DataType.float},
+              ],
+              dispatchGroup: {x: Math.ceil(unitsOfWork / 64 /* workgroup size */)},
+            }),
+            getShaderSource,
+          },
+          {inputs: [meanValues, scale, bias], outputs: [-1]})[0];
+    };
+
 const createInstanceNormNHWCProgramInfo =
-    (metadata: ProgramMetadata, inputs: readonly TensorView[], attributes: InstanceNormAttributes): ProgramInfo => {
+    (context: ComputeContext, inputs: readonly TensorView[], attributes: InstanceNormAttributes) => {
       const xShape = inputs[0].dims;
       const outputShape = xShape;
-      const outputSize = ShapeUtil.size(outputShape);
       const N = xShape[0];
       const C = xShape[xShape.length - 1];
       const H = ShapeUtil.sizeFromDimension(xShape, 1) / C;
 
+      const components = getMaxComponents(C);
+      const outputSize = ShapeUtil.size(outputShape) / components;
+      const inputHelper = inputVariable('input', inputs[0].dataType, inputs[0].dims, components);
+      const outputHelper = outputVariable('output', inputs[0].dataType, outputShape, components);
+
       const dataType = tensorTypeToWsglStorageType(inputs[0].dataType);
+      const scaleType = components === 1 ? 'vec2f' : `mat2x${components}f`;
+      const scaleCastType = components === 1 ? dataType : `vec${components}<${dataType}>`;
+      // first compute mean
+      const channelScaleShift = computeMean(context, inputs[0], inputs[1], inputs[2], N, H, C, attributes.epsilon);
 
-      const normCount = C * N;
       const getShaderSource = (shaderHelper: ShaderHelper) => `
-  const N: u32 = ${N};
   const H: u32 = ${H};
-  const C: u32 = ${C};
-  const normSizeTyped: ${dataType} = ${H};
-  const imageSize: u32 = ${H * C};
-  const epsilon: f32 = ${attributes.epsilon};
+  const C: u32 = ${C / components};
 
-  @group(0) @binding(0) var<storage, read> x : array<${dataType}>;
-  @group(0) @binding(1) var<storage, read> scale : array<${dataType}>;
-  @group(0) @binding(2) var<storage, read> bias : array<${dataType}>;
-  @group(0) @binding(3) var<storage, read_write> output : array<${dataType}>;
+  @group(0) @binding(0) var<storage, read> input : array<${inputHelper.type.storage}>;
+  @group(0) @binding(1) var<storage, read> scaleInput : array<${scaleType}>;
+  @group(0) @binding(2) var<storage, read_write> output : array<${outputHelper.type.storage}>;
 
   ${shaderHelper.mainStart()}
-    let currentImageNumber = global_idx / C;
+    let currentImageNumber = global_idx / (C * H);
     let currentChannelNumber = global_idx % C;
 
-    // offset is channel num * N
-    let offset = currentImageNumber * imageSize;
-    if (offset >= ${outputSize}) { return; }
-    var mean: ${dataType} = 0;
-
-    for (var i: u32 = 0u; i < H; i++) {
-        mean = mean + x[offset + i * C + currentChannelNumber];
-    }
-    mean = mean / normSizeTyped;
-
-    var squaredNorm: ${dataType} = 0;
-    for (var i: u32 = 0u; i < H; i++) {
-        let deviation: f32 = x[offset + i * C + currentChannelNumber] - mean;
-        squaredNorm = squaredNorm + deviation * deviation;
-    }
-    let invStdDev = 1 / sqrt(squaredNorm / normSizeTyped + epsilon);
-    let channelScale = invStdDev * scale[currentChannelNumber];
-    let channelShift = bias[currentChannelNumber] - mean * channelScale;
-    for (var i: u32 = 0u; i < H; i++) {
-        let currentOffset = offset + i * C + currentChannelNumber;
-        output[currentOffset] = x[currentOffset] * channelScale + channelShift;
-    }
+    let scaleOffset = currentImageNumber * C + currentChannelNumber;
+    let scale = scaleInput[scaleOffset];
+    output[global_idx] = fma(input[global_idx], ${scaleCastType}(scale[0]), ${scaleCastType}(scale[1]));
   }`;
-      return {
-        ...metadata,
-        outputs: [
-          {dims: outputShape, dataType: inputs[0].dataType, gpuDataType: GpuDataType.default},
-        ],
-        getShaderSource,
-        dispatchGroup: () => ({x: Math.ceil(normCount / 64 /* workgroup size */)})
-      };
+      context.compute(
+          {
+            name: 'InstanceNormalization',
+            shaderCache: {hint: `${attributes.cacheKey}`},
+            getRunData: () => ({
+              outputs: [{dims: outputShape, dataType: inputs[0].dataType}],
+              dispatchGroup: {x: Math.ceil(outputSize / 64 /* workgroup size */)}
+            }),
+            getShaderSource,
+          },
+          {inputs: [inputs[0], channelScaleShift]});
     };
 
 export const parseInstanceNormAttributes = (attributes: InstanceNormAttributes): InstanceNormAttributes =>
     createAttributeWithCacheKey({epsilon: attributes.epsilon, format: attributes.format});
 
 export const instanceNorm = (context: ComputeContext, attributes: InstanceNormAttributes): void => {
-  const metadata = {
-    name: 'InstanceNormalization',
-    inputTypes: [GpuDataType.default, GpuDataType.default, GpuDataType.default],
-    cacheHint: attributes.cacheKey,
-  };
-
   if (attributes.format === 'NHWC') {
-    context.compute(createInstanceNormNHWCProgramInfo(metadata, context.inputs, attributes));
+    createInstanceNormNHWCProgramInfo(context, context.inputs, attributes);
   } else {
-    context.compute(createInstanceNormProgramInfo(metadata, context.inputs, attributes));
+    context.compute(createInstanceNormProgramInfo(context.inputs, attributes));
   }
 };
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/layer-norm.ts b/js/web/lib/wasm/jsep/webgpu/ops/layer-norm.ts
index d6a79e9460c3f..8a9eeecf2c68d 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/layer-norm.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/layer-norm.ts
@@ -5,9 +5,9 @@ import {DataType} from '../../../wasm-common';
 import {TensorView} from '../../tensor-view';
 import {ShapeUtil} from '../../util';
 import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key';
-import {ComputeContext, GpuDataType, ProgramInfo, ProgramMetadata} from '../types';
+import {ComputeContext, ProgramInfo} from '../types';
 
-import {ShaderHelper, tensorTypeToWsglStorageType} from './common';
+import {castToF32, fillVector, getMaxComponents, inputVariable, outputVariable, ShaderHelper, sumVector, tensorTypeToWsglStorageType,} from './common';
 
 export interface LayerNormAttributes extends AttributeWithCacheKey {
   axis: number;
@@ -18,117 +18,109 @@ const validateInputs = (inputs: readonly TensorView[]): void => {
   if (!inputs || inputs.length < 2) {
     throw new Error('layerNorm requires at least 2 inputs.');
   }
-
-  if (inputs[0].dataType !== DataType.float || inputs[1].dataType !== DataType.float) {
-    throw new Error('inputs should be float type');
-  }
 };
 
 const createLayerNormProgramInfo =
-    (metadata: ProgramMetadata, inputs: readonly TensorView[], attributes: LayerNormAttributes, outputCount: number):
-        ProgramInfo => {
-          const xShape = inputs[0].dims;
-          const scale = inputs[1];
-          const bias = inputs[2];
-
-          const outputShape = xShape;
-          const outputSize = ShapeUtil.size(outputShape);
-          const axis = ShapeUtil.normalizeAxis(attributes.axis, xShape.length);
-          const normCount = ShapeUtil.sizeToDimension(xShape, axis);
-          const normSize = ShapeUtil.sizeFromDimension(xShape, axis);
-
-          const scaleSize = ShapeUtil.size(scale.dims);
-          const biasSize = bias ? ShapeUtil.size(bias.dims) : 0;
-          if (scaleSize !== normSize || (bias && biasSize !== normSize)) {
-            throw new Error(`Size of X.shape()[axis:] == ${normSize}.
+    (inputs: readonly TensorView[], attributes: LayerNormAttributes, outputCount: number): ProgramInfo => {
+      const xShape = inputs[0].dims;
+      const scale = inputs[1];
+      const bias = inputs[2];
+
+      const outputShape = xShape;
+      const axis = ShapeUtil.normalizeAxis(attributes.axis, xShape.length);
+      const normCount = ShapeUtil.sizeToDimension(xShape, axis);
+      const normSize = ShapeUtil.sizeFromDimension(xShape, axis);
+
+      const scaleSize = ShapeUtil.size(scale.dims);
+      const biasSize = bias ? ShapeUtil.size(bias.dims) : 0;
+      if (scaleSize !== normSize || (bias && biasSize !== normSize)) {
+        throw new Error(`Size of X.shape()[axis:] == ${normSize}.
        Size of scale and bias (if provided) must match this.
        Got scale size of ${scaleSize} and bias size of ${biasSize}`);
-          }
-
-          const meanInvStdDevDim = [];
-          for (let i = 0; i < xShape.length; ++i) {
-            if (i < axis) {
-              meanInvStdDevDim.push(xShape[i]);
-            } else {
-              meanInvStdDevDim.push(1);
-            }
-          }
-
-          const dataType = tensorTypeToWsglStorageType(inputs[0].dataType);
-
-          const hasMeanDataOutput = outputCount > 1;
-          const hasInvStdOutput = outputCount > 2;
-          let bindingIndex = 0;
-          const getShaderSource = (shaderHelper: ShaderHelper) => `
-  const normSize: u32 = ${normSize};
-  const normSizeTyped: ${dataType} = ${normSize};
+      }
+
+      const meanInvStdDevDim = [];
+      for (let i = 0; i < xShape.length; ++i) {
+        if (i < axis) {
+          meanInvStdDevDim.push(xShape[i]);
+        } else {
+          meanInvStdDevDim.push(1);
+        }
+      }
+
+      const components = getMaxComponents(normSize);
+      const dataType = tensorTypeToWsglStorageType(inputs[0].dataType);
+      const variables = [
+        inputVariable('x', inputs[0].dataType, inputs[0].dims, components),
+        inputVariable('scale', scale.dataType, scale.dims, components),
+      ];
+      if (bias) {
+        variables.push(inputVariable('bias', bias.dataType, bias.dims, components));
+      }
+      variables.push(outputVariable('output', inputs[0].dataType, outputShape, components));
+
+      const hasMeanDataOutput = outputCount > 1;
+      const hasInvStdOutput = outputCount > 2;
+
+      if (hasMeanDataOutput) {
+        variables.push(outputVariable('meanDataOutput', DataType.float, meanInvStdDevDim));
+      }
+      if (hasInvStdOutput) {
+        variables.push(outputVariable('invStdOutput', DataType.float, meanInvStdDevDim));
+      }
+
+      const getShaderSource = (shaderHelper: ShaderHelper) => `
+  const normSize: f32 = ${normSize};
+  const normSizeVectorized: u32 = ${normSize / components};
   const epsilon: f32 = ${attributes.epsilon};
 
-  @group(0) @binding(${bindingIndex++}) var<storage, read> x : array<${dataType}>;
-  @group(0) @binding(${bindingIndex++}) var<storage, read> scale : array<${dataType}>;
-  ${bias ? `@group(0) @binding(${bindingIndex++}) var<storage, read> bias : array<${dataType}>;` : ''}
-  @group(0) @binding(${bindingIndex++}) var<storage, read_write> output : array<${dataType}>;
-  ${
-              hasMeanDataOutput ?
-                  `@group(0) @binding(${bindingIndex++}) var<storage, read_write> meanDataOutput : array<${dataType}>` :
-                  ''};
-  ${
-              hasInvStdOutput ?
-                  `@group(0) @binding(${bindingIndex++}) var<storage, read_write> invStdOutput : array<${dataType}>` :
-                  ''};
-
+  ${shaderHelper.declareVariables(...variables)}
   ${shaderHelper.mainStart()}
-    let offset = global_idx * normSize;
-    if (offset >= ${outputSize}) { return; }
-    var mean: ${dataType} = 0;
-    var meanSquare: ${dataType} = 0;
-
-    for (var h: u32 = 0u; h < normSize; h++) {
-      mean = mean + x[h + offset];
-      meanSquare = meanSquare + x[h + offset] * x[h + offset];
+    ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes(normCount)}
+    let offset = global_idx * normSizeVectorized;
+    var meanVector = ${fillVector('f32', components)};
+    var meanSquareVector = ${fillVector('f32', components)};
+
+    for (var h: u32 = 0u; h < normSizeVectorized; h++) {
+      let value = ${castToF32(dataType, components, 'x[h + offset]')};
+      meanVector += value;
+      meanSquareVector += value * value;
     }
-    mean = mean / normSizeTyped;
-    meanSquare = sqrt(meanSquare / normSizeTyped - mean * mean + epsilon);
-
-    for (var j: u32 = 0; j < normSize; j++) {
-      output[j + offset] = (x[j + offset] - mean) / meanSquare * scale[j] ${bias ? '+ bias[j]' : ''};
+    let mean = ${sumVector('meanVector', components)} / normSize;
+    let meanSquare = sqrt(${sumVector('meanSquareVector', components)} 
+      / normSize - mean * mean + epsilon);
+
+    for (var j: u32 = 0; j < normSizeVectorized; j++) {
+      let f32input = ${castToF32(dataType, components, 'x[j + offset]')};
+      let f32scale = ${castToF32(dataType, components, 'scale[j]')};
+      output[j + offset] = ${variables[0].type.value}((f32input - mean) / meanSquare * f32scale
+        ${bias ? `+ ${castToF32(dataType, components, 'bias[j]')}` : ''}
+      );
     }
 
     ${hasMeanDataOutput ? 'meanDataOutput[global_idx] = mean' : ''};
     ${hasInvStdOutput ? 'invStdOutput[global_idx] = 1 / meanSquare' : ''};
   }`;
-          const outputs = [{dims: outputShape, dataType: inputs[0].dataType, gpuDataType: GpuDataType.default}];
-          if (hasMeanDataOutput) {
-            outputs.push(
-                {dims: meanInvStdDevDim, dataType: inputs[0].dataType, gpuDataType: GpuDataType.default},
-            );
-          }
-          if (hasInvStdOutput) {
-            outputs.push(
-                {dims: meanInvStdDevDim, dataType: inputs[0].dataType, gpuDataType: GpuDataType.default},
-            );
-          }
-
-          return {
-            ...metadata,
-            outputs,
-            getShaderSource,
-            dispatchGroup: () => ({x: Math.ceil(normCount / 64 /* workgroup size */)})
-          };
-        };
+      const outputs = [{dims: outputShape, dataType: inputs[0].dataType}];
+      if (hasMeanDataOutput) {
+        outputs.push({dims: meanInvStdDevDim, dataType: DataType.float});
+      }
+      if (hasInvStdOutput) {
+        outputs.push({dims: meanInvStdDevDim, dataType: DataType.float});
+      }
+
+      return {
+        name: 'LayerNormalization',
+        shaderCache: {hint: `${attributes.cacheKey}|${outputCount}|${inputs.length}`},
+        getRunData: () => ({outputs, dispatchGroup: {x: Math.ceil(normCount / 64 /* workgroup size */)}}),
+        getShaderSource,
+      };
+    };
 
 export const parseLayerNormAttributes = (attributes: LayerNormAttributes): LayerNormAttributes =>
     createAttributeWithCacheKey({axis: attributes.axis, epsilon: attributes.epsilon});
 
 export const layerNorm = (context: ComputeContext, attributes: LayerNormAttributes): void => {
   validateInputs(context.inputs);
-
-  const metadata = {
-    name: 'LayerNormalization',
-    inputTypes: context.inputs.length === 2 ? [GpuDataType.default, GpuDataType.default] :
-                                              [GpuDataType.default, GpuDataType.default, GpuDataType.default],
-    cacheHint: attributes.cacheKey + context.outputCount.toString(10) + context.inputs.length.toString(10),
-  };
-
-  context.compute(createLayerNormProgramInfo(metadata, context.inputs, attributes, context.outputCount));
+  context.compute(createLayerNormProgramInfo(context.inputs, attributes, context.outputCount));
 };
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/matmul.ts b/js/web/lib/wasm/jsep/webgpu/ops/matmul.ts
index 837ac8410f291..19ca4ac5358ae 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/matmul.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/matmul.ts
@@ -1,31 +1,11 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-import {DataType} from '../../../wasm-common';
 import {TensorView} from '../../tensor-view';
 import {BroadcastUtil} from '../../util';
-import {ComputeContext, GpuDataType, ProgramInfoLoader} from '../types';
+import {ComputeContext} from '../types';
 
 import {createMatmulProgramInfo} from './3rd-party/matmul_packed_webgpu';
-import {InternalActivationAttributes} from './fuse-utils';
-
-
-const createMatmulProgramMetadata = (hasBias: boolean, cacheHint: string) => ({
-  name: 'MatMul',
-  inputTypes: hasBias ? [GpuDataType.default, GpuDataType.default, GpuDataType.default] :
-                        [GpuDataType.default, GpuDataType.default],
-  cacheHint
-});
-
-export const createMatmulProgramInfoLoader =
-    (inputs: readonly TensorView[], activationAttributes: InternalActivationAttributes, outputShape: readonly number[],
-     reshapedOutputShape?: readonly number[]): ProgramInfoLoader => {
-      const metadata = createMatmulProgramMetadata(inputs.length > 2, activationAttributes.activationCacheKey);
-      return {
-        ...metadata,
-        get: () => createMatmulProgramInfo(metadata, inputs, activationAttributes, outputShape, reshapedOutputShape)
-      };
-    };
 
 const validateInputs = (inputs: readonly TensorView[]): void => {
   if (!inputs || inputs.length !== 2) {
@@ -35,10 +15,6 @@ const validateInputs = (inputs: readonly TensorView[]): void => {
   if (inputs[0].dims[inputs[0].dims.length - 1] !== inputs[1].dims[inputs[1].dims.length - 2]) {
     throw new Error('shared dimension does not match.');
   }
-
-  if (inputs[0].dataType !== DataType.float || inputs[1].dataType !== DataType.float) {
-    throw new Error('inputs should be float type');
-  }
 };
 
 export const matMul = (context: ComputeContext): void => {
@@ -47,5 +23,5 @@ export const matMul = (context: ComputeContext): void => {
   if (!outputShape) {
     throw new Error('Can\'t use matmul on the given tensors');
   }
-  context.compute(createMatmulProgramInfoLoader(context.inputs, {activation: '', activationCacheKey: ''}, outputShape));
+  context.compute(createMatmulProgramInfo(context.inputs, {activation: '', activationCacheKey: ''}, outputShape));
 };
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/multi-head-attentiion.ts b/js/web/lib/wasm/jsep/webgpu/ops/multi-head-attentiion.ts
new file mode 100644
index 0000000000000..b7726a36bcaad
--- /dev/null
+++ b/js/web/lib/wasm/jsep/webgpu/ops/multi-head-attentiion.ts
@@ -0,0 +1,335 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+import {TensorView} from '../../tensor-view';
+import {ShapeUtil} from '../../util';
+import {createAttributeWithCacheKey} from '../attribute-with-cache-key';
+import {ComputeContext, GpuDataType} from '../types';
+
+import {applyAttention, AttentionAttrs, AttentionMaskType, AttentionParameters, AttentionQkvFormat} from './attention';
+import {ShaderHelper, tensorTypeToWsglStorageType} from './common';
+import {createTransposeProgramInfo, TransposeAttributes} from './transpose';
+
+const validateInputs = (inputs: readonly TensorView[], attributes: AttentionAttrs): AttentionParameters => {
+  const query = inputs[0];
+  const key = inputs[1];
+  const value = inputs[2];
+  const bias = inputs[3];
+  const keyPaddingMask = inputs[4];
+  const relativePositionBias = inputs[5];
+  const pastKey = inputs[6];
+  const pastValue = inputs[7];
+
+  // Abbreviation and Meanings:
+  //   B:    batch_size
+  //   S:    sequence_length (input sequence length of query)
+  //   P:    past_sequence_length (past sequence length of key or value)
+  //   L:    kv_sequence_length (input sequence length of key or value)
+  //   M:    max_sequence_length
+  //   T:    total_sequence_length = past_sequence_length + kv_sequence_length
+  //   N:    num_heads
+  //   H:    head size for Q and K, aka q_head_size or k_head_size or qk_head_size
+  //   H_v:  v_head_size
+  //   D_i:  input hidden size
+  //   D:    hidden size for Q and K (D = N * H), aka q_hidden_size or k_hidden_size or qk_hidden_size
+  //   D_v:  v_hidden_size = num_heads * v_head_size
+
+  //     key_padding_mask (K/V)     : (B) or (2*B + 1) or (B, L) or None
+  //     relative_position_bias     : (B, 1, S, L)
+  //     past_key                   : (B, N, S*, H)
+  //     past_value                 : (B, N, S*, H)
+  // When no packing for q/k/v:
+  //     query            (Q)       : (B, S, D)
+  //     key              (K)       : (B, L, D) or (B, N, S*, H)
+  //     value            (V)       : (B, L, D_v) or (B, N, S*, H)
+  //     bias             (Q/K/V)   : (D + D + D_v)
+  // When packed kv is used:
+  //     query            (Q)       : (B, S, D)
+  //     key              (K)       : (B, L, N, 2, H)
+  //     value            (V)       : None
+  //     bias             (Q/K/V)   : None
+  // When packed qkv is used:
+  //     query            (Q)       : (B, L, N, 3, H) or (B, S, 3*D)
+  //     key              (K)       : None
+  //     value            (V)       : None
+  //     bias             (Q/K/V)   : None or (D + D + D_v)
+
+  if (query.dims.length !== 3 && query.dims.length !== 5) {
+    throw new Error('Input query is expected to have 3 or 5 dimensions');
+  }
+
+  const dmmhaPacking = false;
+  const batchSize = query.dims[0];
+  const sequenceLength = query.dims[1];
+  const hiddenSize = query.dims.length === 3 ? (dmmhaPacking ? query.dims[2] / 3 : query.dims[2]) :
+                                               attributes.numHeads * query.dims[4];
+  let kvSequenceLength = sequenceLength;
+
+  let pastSequenceLength = 0;
+  let maxSequenceLength = 0;
+  const headSize = Math.floor(hiddenSize / attributes.numHeads);
+  if (pastKey && pastValue) {
+    if (pastKey.dims.length !== 4) {
+      throw new Error('Input "past_key" is expected to have 4 dimensions');
+    }
+    if (pastValue.dims.length !== 4) {
+      throw new Error('Input "past_value" is expected to have 4 dimensions');
+    }
+    pastSequenceLength = pastKey.dims[2];
+    maxSequenceLength = pastKey.dims[2];
+  } else if (pastKey || pastValue) {
+    throw new Error('Input "past_key" and "past_value" shall be both present or both absent');
+  }
+
+  let qkvFormat: AttentionQkvFormat;
+  if (key) {
+    if (query.dims.length !== 3) {
+      throw new Error('Input "query" is expected to have 3 dimensions when key is given');
+    }
+    if (key.dims.length < 3 || key.dims.length > 5) {
+      throw new Error('Input "key" is expected to have 3, 4, or 5 dimensions');
+    }
+    if (query.dims[0] !== key.dims[0]) {
+      throw new Error('Input "query" and "key" shall have same dim 0 (batch size)');
+    }
+
+    if (key.dims.length === 3) {
+      if (key.dims[2] !== query.dims[2]) {
+        throw new Error('Input "query" and "key" shall have same dim 2 (hidden_size)');
+      }
+      qkvFormat = AttentionQkvFormat.qkvBSNH;
+      kvSequenceLength = key.dims[1];
+    } else if (key.dims.length === 5) {
+      if (key.dims[2] !== attributes.numHeads || key.dims[3] !== 2 || key.dims[4] !== headSize) {
+        throw new Error('Expect "key" shape (batch_size, kv_sequence_length, num_heads, 2, head_size) for packed kv');
+      }
+      if (value) {
+        throw new Error('Expect "value" be none when "key" has packed kv format.');
+      }
+      qkvFormat = AttentionQkvFormat.qKvBSNHxBSN2H;
+      kvSequenceLength = key.dims[1];
+    } else {  // key_dims.size() == 4 (cross-attention with past_key)
+      if (key.dims[1] !== attributes.numHeads || key.dims[3] !== headSize) {
+        throw new Error('Expect "key" shape (batch_size, num_heads, kv_sequence_length, head_size) for past_key');
+      }
+
+      qkvFormat = AttentionQkvFormat.unknown;
+      kvSequenceLength = key.dims[2];
+    }
+  } else {  // packed QKV
+    if (query.dims.length !== 3 && query.dims.length !== 5) {
+      throw new Error('Input "query" is expected to have 3 or 5 dimensions when key is empty');
+    }
+    if (query.dims.length === 5 && (query.dims[2] !== attributes.numHeads || query.dims[3] !== 3)) {
+      throw new Error('Expect "query" shape (batch_size, kv_sequence_length, num_heads, 3, head_size) for packed kv');
+    }
+
+    qkvFormat = AttentionQkvFormat.qkvBSN3H;
+  }
+
+  if (bias) {
+    if (bias.dims.length !== 1) {
+      throw new Error('Input "bias" is expected to have 1 dimension');
+    }
+
+    if (value) {
+      if (query.dims.length === 5 && query.dims[3] === 2) {
+        throw new Error('bias is not allowed for packed kv.');
+      }
+    }
+  }
+
+  let maskType: AttentionMaskType = AttentionMaskType.none;
+  if (keyPaddingMask) {
+    maskType = AttentionMaskType.maskUnknown;
+    const maskDims = keyPaddingMask.dims;
+    if (maskDims.length === 1) {
+      if (maskDims[0] === batchSize) {
+        maskType = AttentionMaskType.mask1dKeySeqLen;
+      } else if (maskDims[0] === 3 * batchSize + 2) {
+        maskType = AttentionMaskType.mask1DKeySeqLenStart;
+      }
+    } else if (maskDims.length === 2 && maskDims[0] === batchSize && maskDims[1] === kvSequenceLength) {
+      maskType = AttentionMaskType.mask2dKeyPadding;
+    }
+    if (maskType === AttentionMaskType.maskUnknown) {
+      throw new Error('Input "key_padding_mask" shape shall be (batch_size) or (batch_size, kv_sequence_length)');
+    }
+    throw new Error('Mask not supported');
+  }
+
+  let passPastInKv = false;
+  let vHiddenSize = hiddenSize;
+  if (value) {
+    if (value.dims.length !== 3 && value.dims.length !== 4) {
+      throw new Error('Input "value" is expected to have 3 or 4 dimensions');
+    }
+
+    if (query.dims[0] !== value.dims[0]) {
+      throw new Error('Input "query" and "value" shall have same dim 0 (batch_size)');
+    }
+
+    if (value.dims.length === 3) {
+      if (kvSequenceLength !== value.dims[1]) {
+        throw new Error('Input "key" and "value" shall have the same dim 1 (kv_sequence_length)');
+      }
+      vHiddenSize = value.dims[2];
+    } else {
+      if (kvSequenceLength !== value.dims[2]) {
+        throw new Error('Input "past_key" and "past_value" shall have the same dim 2 (kv_sequence_length)');
+      }
+      vHiddenSize = value.dims[1] * value.dims[3];
+      passPastInKv = true;
+    }
+  }
+
+  const totalSequenceLength = pastSequenceLength + kvSequenceLength;
+  const broadcastResPosBias = false;
+  // if (extraAddQk) {
+  //   if (extraAddQk.dims[0] === 1) {
+  //     broadcastResPosBias = true;
+  //   }
+  // }
+
+  if (keyPaddingMask) {
+    throw new Error('Key padding mask is not supported');
+  }
+  if (relativePositionBias) {
+    throw new Error('extraAddQk is not supported');
+  }
+  if (pastKey) {
+    throw new Error('pastKey is not supported');
+  }
+  if (pastValue) {
+    throw new Error('pastValue is not supported');
+  }
+
+  return {
+    batchSize,
+    sequenceLength,
+    pastSequenceLength,
+    kvSequenceLength,
+    totalSequenceLength,
+    maxSequenceLength,
+    inputHiddenSize: 0,
+    hiddenSize,
+    vHiddenSize,
+    headSize,
+    vHeadSize: Math.floor(vHiddenSize / attributes.numHeads),
+    numHeads: attributes.numHeads,
+    isUnidirectional: false,
+    pastPresentShareBuffer: false,
+    maskFilterValue: attributes.maskFilterValue,
+    maskType,
+    scale: attributes.scale,
+    broadcastResPosBias,
+    passPastInKv,
+    qkvFormat,
+  };
+};
+
+
+export const parseMultiHeadAttentionAttributes = (attributes: AttentionAttrs): AttentionAttrs =>
+    createAttributeWithCacheKey({...attributes});
+
+const weightTransposeAttribute: TransposeAttributes = createAttributeWithCacheKey({perm: [0, 2, 1, 3]});
+
+const addBiasTranspose =
+    (context: ComputeContext, qkv: TensorView, bias: TensorView, batchSize: number, sequenceLength: number,
+     hiddenSize: number, biasOffset: number) => {
+      const outputShape = [batchSize, sequenceLength, hiddenSize];
+      const outputSize = ShapeUtil.size(outputShape);
+
+      const dataType = tensorTypeToWsglStorageType(qkv.dataType);
+      const getShaderSource = (shaderHelper: ShaderHelper) => `
+  const biasOffset = ${biasOffset}u;
+  const hiddenSize = ${hiddenSize}u;
+
+  @group(0) @binding(0) var<storage, read> qkv: array<${dataType}>;
+  @group(0) @binding(1) var<storage, read> bias: array<${dataType}>;
+  @group(0) @binding(2) var<storage, read_write> qkv_with_bias: array<${dataType}>;
+
+  ${shaderHelper.mainStart()}
+    ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes(outputSize)}
+    let biasOffsetIdx = (global_idx % hiddenSize) + biasOffset;
+
+    qkv_with_bias[global_idx] = qkv[global_idx] + bias[biasOffsetIdx];
+  }`;
+
+      return context.compute(
+          {
+            name: 'MultiHeadAttentionAddBias',
+            shaderCache: {hint: JSON.stringify({batchSize, sequenceLength, hiddenSize, biasOffset})},
+            getRunData: () => ({
+              outputs: [{dims: outputShape, dataType: qkv.dataType, gpuDataType: GpuDataType.default}],
+              dispatchGroup: {x: Math.ceil(outputSize / 64 /* workgroup size */)},
+            }),
+            getShaderSource,
+          },
+          {inputs: [qkv, bias], outputs: [-1]})[0];
+    };
+
+const maybeTransposeToBNSHAndAddBias =
+    (context: ComputeContext, batchSize: number, numHeads: number, sequenceLength: number, headSize: number,
+     input: TensorView, bias?: TensorView, biasOffset?: number) => {
+      // const newDims = [];
+
+      let reshapedInput = input;
+      if (!bias) {
+        if (input.dims.length === 3) {
+          reshapedInput = input.reshape([batchSize, sequenceLength, numHeads, headSize]);
+        }
+        return context.compute(
+            createTransposeProgramInfo(reshapedInput, weightTransposeAttribute.perm),
+            {inputs: [reshapedInput], outputs: [-1]})[0];
+      } else {
+        if (sequenceLength === 1) {
+          throw new Error('AddBiasReshape is not implemented. Please export your model with packed QKV or KV');
+        } else {
+          reshapedInput =
+              addBiasTranspose(context, input, bias, batchSize, sequenceLength, numHeads * headSize, biasOffset!);
+          reshapedInput = reshapedInput.reshape([batchSize, sequenceLength, numHeads, headSize]);
+          return context.compute(
+              createTransposeProgramInfo(reshapedInput, weightTransposeAttribute.perm),
+              {inputs: [reshapedInput], outputs: [-1]})[0];
+        }
+      }
+    };
+
+export const multiHeadAttention = (context: ComputeContext, attributes: AttentionAttrs): void => {
+  const params = validateInputs(context.inputs, attributes);
+
+  if (context.inputs[0].dims.length === 5) {
+    throw new Error('Packed QKV is not implemented');
+  }
+
+  if (context.inputs[1]?.dims.length === 5) {
+    throw new Error('Packed KV is not implemented');
+  }
+
+  // applyAttention expects BNSH inputs
+  const kvBNSH = context.inputs[1] && context.inputs[2] && context.inputs[1].dims.length === 4 &&
+      context.inputs[2].dims.length === 4;
+
+  const Q = maybeTransposeToBNSHAndAddBias(
+      context, params.batchSize, params.numHeads, params.sequenceLength, params.headSize, context.inputs[0],
+      context.inputs[3], 0);
+
+  if (kvBNSH) {
+    return applyAttention(
+        context, Q, context.inputs[1], context.inputs[2], context.inputs[4], undefined, undefined, undefined,
+        context.inputs[5], params, attributes);
+  }
+
+  const K = maybeTransposeToBNSHAndAddBias(
+      context, params.batchSize, params.numHeads, params.kvSequenceLength, params.headSize, context.inputs[1],
+      context.inputs[3], params.hiddenSize);
+
+  const V = maybeTransposeToBNSHAndAddBias(
+      context, params.batchSize, params.numHeads, params.kvSequenceLength, params.vHeadSize, context.inputs[2],
+      context.inputs[3], 2 * params.hiddenSize);
+
+  applyAttention(
+      context, Q, K, V, context.inputs[4], undefined, context.inputs[6], context.inputs[7], context.inputs[5], params,
+      attributes);
+};
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/pad.ts b/js/web/lib/wasm/jsep/webgpu/ops/pad.ts
index c2f89fd2845df..18859e253aa02 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/pad.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/pad.ts
@@ -5,7 +5,7 @@ import {DataType} from '../../../wasm-common';
 import {TensorView} from '../../tensor-view';
 import {ShapeUtil} from '../../util';
 import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key';
-import {ComputeContext, GpuDataType, ProgramInfo, ProgramInfoLoader, ProgramMetadata} from '../types';
+import {ComputeContext, ProgramInfo} from '../types';
 
 import {IndicesHelper, inputVariable, outputVariable, ShaderHelper} from './common';
 
@@ -36,8 +36,8 @@ const validateInputs = (inputs: readonly TensorView[]): void => {
 };
 
 const getPadConstant =
-    (output: IndicesHelper, outputDims: readonly number[], inputDims: readonly number[],
-     inputStrides: readonly number[], pads: number[], dataType: string, constantValue: number): string => {
+    (output: IndicesHelper, inputDims: readonly number[], inputStrides: readonly number[], pads: number[],
+     dataType: string, constantValue: number): string => {
       const inputRank = inputDims.length;
 
       let block = '';
@@ -66,8 +66,7 @@ const getPadConstant =
     };
 
 const getPadReflect =
-    (output: IndicesHelper, outputDims: readonly number[], inputDims: readonly number[],
-     inputStrides: readonly number[], pads: number[]): string => {
+    (output: IndicesHelper, inputDims: readonly number[], inputStrides: readonly number[], pads: number[]): string => {
       const inputRank = inputDims.length;
 
       let block = '';
@@ -97,8 +96,7 @@ const getPadReflect =
     };
 
 const getPadEdge =
-    (output: IndicesHelper, outputDims: readonly number[], inputDims: readonly number[],
-     inputStrides: readonly number[], pads: number[]): string => {
+    (output: IndicesHelper, inputDims: readonly number[], inputStrides: readonly number[], pads: number[]): string => {
       const inputRank = inputDims.length;
 
       let block = '';
@@ -124,8 +122,7 @@ const getPadEdge =
     };
 
 const getPadWrap =
-    (output: IndicesHelper, outputDims: readonly number[], inputDims: readonly number[],
-     inputStrides: readonly number[], pads: number[]): string => {
+    (output: IndicesHelper, inputDims: readonly number[], inputStrides: readonly number[], pads: number[]): string => {
       const inputRank = inputDims.length;
 
       let block = '';
@@ -151,18 +148,17 @@ const getPadWrap =
     };
 
 const getPadSnippet =
-    (output: IndicesHelper, outputDims: readonly number[], inputDims: readonly number[],
-     inputStrides: readonly number[], attributes: PadAttributes, dataType: string): string => {
+    (output: IndicesHelper, inputDims: readonly number[], inputStrides: readonly number[], attributes: PadAttributes,
+     dataType: string): string => {
       switch (attributes.mode) {
         case 0:
-          return getPadConstant(
-              output, outputDims, inputDims, inputStrides, attributes.pads, dataType, attributes.value);
+          return getPadConstant(output, inputDims, inputStrides, attributes.pads, dataType, attributes.value);
         case 1:
-          return getPadReflect(output, outputDims, inputDims, inputStrides, attributes.pads);
+          return getPadReflect(output, inputDims, inputStrides, attributes.pads);
         case 2:
-          return getPadEdge(output, outputDims, inputDims, inputStrides, attributes.pads);
+          return getPadEdge(output, inputDims, inputStrides, attributes.pads);
         case 3:
-          return getPadWrap(output, outputDims, inputDims, inputStrides, attributes.pads);
+          return getPadWrap(output, inputDims, inputStrides, attributes.pads);
         default:
           throw new Error('Invalid mode');
       }
@@ -179,10 +175,9 @@ const generatePadCode =
           const output = outputVariable('output', inputs[0].dataType, outputDims);
           const input = inputVariable('x', inputs[0].dataType, inputDims);
 
-          const padSnippet = getPadSnippet(output, outputDims, inputDims, inputStrides, attributes, dataType);
+          const padSnippet = getPadSnippet(output, inputDims, inputStrides, attributes, dataType);
           const padCode = `
               ${shaderHelper.declareVariables(input, output)}
-              ${output.impl()}
               ${shaderHelper.mainStart()}
               ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes(outputSize)}
 
@@ -195,21 +190,23 @@ const generatePadCode =
           return padCode;
         };
 
-const createPadProgramInfo =
-    (inputs: readonly TensorView[], metadata: ProgramMetadata, attributes: PadAttributes): ProgramInfo => {
-      const outputShape = ShapeUtil.padShape(inputs[0].dims.slice(), attributes.pads);
-      return {
-        ...metadata,
-        outputs: [{dims: outputShape, dataType: inputs[0].dataType, gpuDataType: GpuDataType.default}],
-        getShaderSource: shaderHelper => generatePadCode(shaderHelper, inputs, attributes, 'f32'),
-        dispatchGroup: () => ({x: Math.ceil(ShapeUtil.size(outputShape) / 64 /* workgroup size */)})
-      };
-    };
+const createPadProgramInfo = (inputs: readonly TensorView[], attributes: PadAttributes): ProgramInfo => {
+  const outputShape = ShapeUtil.padShape(inputs[0].dims.slice(), attributes.pads);
+  return {
+    name: 'Pad',
+    shaderCache: {hint: attributes.cacheKey},
+    getRunData: () => ({
+      outputs: [{dims: outputShape, dataType: inputs[0].dataType}],
+      dispatchGroup: {x: Math.ceil(ShapeUtil.size(outputShape) / 64 /* workgroup size */)}
+    }),
+    getShaderSource: shaderHelper => generatePadCode(shaderHelper, inputs, attributes, 'f32'),
+  };
+};
 
 const createPadAttributesFromInputs = (inputs: readonly TensorView[], attributes: PadAttributes): PadAttributes => {
   if (inputs.length > 1) {
     const bigInt64Pads = inputs[1].getBigInt64Array();
-    const value = (inputs.length >= 3) ? inputs[2].getFloat32Array()[0] : 0.0;
+    const value = (inputs.length >= 3 && inputs[2].data) ? inputs[2].getFloat32Array()[0] : 0.0;
 
     const inputRank = inputs[0].dims.length;
     const updatePads = new Int32Array(2 * inputRank).fill(0);
@@ -220,7 +217,7 @@ const createPadAttributesFromInputs = (inputs: readonly TensorView[], attributes
         updatePads[Number(axes[i]) + inputRank] = Number(bigInt64Pads[i + axes.length]);
       }
     } else {
-      bigInt64Pads.forEach((i, v) => updatePads[Number(i)] = (Number(v)));
+      bigInt64Pads.forEach((v, i) => updatePads[Number(i)] = (Number(v)));
     }
 
     const pads: number[] = [];
@@ -232,16 +229,10 @@ const createPadAttributesFromInputs = (inputs: readonly TensorView[], attributes
   }
 };
 
-const createPadProgramInfoLoader = (inputs: readonly TensorView[], attributes: PadAttributes): ProgramInfoLoader => {
-  const updatedAttributes = createPadAttributesFromInputs(inputs, attributes);
-  const metadata:
-      ProgramMetadata = {name: 'Pad', inputTypes: [GpuDataType.default], cacheHint: updatedAttributes.cacheKey};
-  return {...metadata, get: () => createPadProgramInfo(inputs, metadata, updatedAttributes)};
-};
-
 export const pad = (context: ComputeContext, attributes: PadAttributes): void => {
   validateInputs(context.inputs);
-  context.compute(createPadProgramInfoLoader(context.inputs, attributes), {inputs: [0]});
+  const updatedAttributes = createPadAttributesFromInputs(context.inputs, attributes);
+  context.compute(createPadProgramInfo(context.inputs, updatedAttributes), {inputs: [0]});
 };
 
 export const parsePadAttributes = (attributes: Record<string, unknown>): PadAttributes => {
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/pool.ts b/js/web/lib/wasm/jsep/webgpu/ops/pool.ts
index 120a0e9de5490..1538644412afd 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/pool.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/pool.ts
@@ -4,7 +4,7 @@
 import {TensorView} from '../../tensor-view';
 import {PoolConvUtil, ShapeUtil} from '../../util';
 import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key';
-import {ComputeContext, GpuDataType, ProgramInfo, ProgramMetadata} from '../types';
+import {ComputeContext, ProgramInfo} from '../types';
 
 import {IndicesHelper, inputVariable, outputVariable, ShaderHelper} from './common';
 
@@ -18,16 +18,18 @@ const validateInputs = (inputs: readonly TensorView[]): void => {
   if (!inputs || inputs.length !== 1) {
     throw new Error('Pool ops requires 1 input.');
   }
-  if (inputs[0].dims.length !== 4) {
-    throw new Error('Pool ops supports 2-D inputs only for now.');
+  if (inputs[0].dims.length !== 4 && inputs[0].dims.length !== 3) {
+    throw new Error('Pool ops supports 1-D or 2-D inputs only for now.');
   }
 };
 
 const getAdjustedPoolAttributesAndOutputShape = <AttributeType extends AveragePoolAttributes|MaxPoolAttributes>(
     input: TensorView, attributes: AttributeType, isGlobalOperator: boolean): [AttributeType, number[]] => {
   const isChannelsLast = attributes.format === 'NHWC';
-  const inputShapeAsChannelFirst =
-      isChannelsLast ? [input.dims[0], input.dims[3], input.dims[1], input.dims[2]] : input.dims.slice();
+  const inputShapeAsChannelFirst = input.dims.slice();
+  if (isChannelsLast) {
+    inputShapeAsChannelFirst.splice(1, 0, inputShapeAsChannelFirst.pop()!);  // Move channel to the second position.
+  }
   const hasDilations = Object.hasOwnProperty.call(attributes, 'dilations');
   const kernelShape = attributes.kernelShape.slice();
   const strides = attributes.strides.slice();
@@ -44,22 +46,16 @@ const getAdjustedPoolAttributesAndOutputShape = <AttributeType extends AveragePo
   } else {
     Object.assign(newAttributes, {kernelShape, strides, pads, cacheKey: attributes.cacheKey});
   }
-  return [
-    newAttributes,
-    isChannelsLast ?
-        [
-          outputShapeAsChannelFirst[0], outputShapeAsChannelFirst[2], outputShapeAsChannelFirst[3],
-          outputShapeAsChannelFirst[1]
-        ] :
-        outputShapeAsChannelFirst
-  ];
+  const outputShapeAsChannelLast = outputShapeAsChannelFirst.slice();
+  outputShapeAsChannelLast.push(outputShapeAsChannelLast.splice(1, 1)[0]);
+  return [newAttributes, isChannelsLast ? outputShapeAsChannelLast : outputShapeAsChannelFirst];
 };
 
 const generatePoolingCode = <AttributeType extends AveragePoolAttributes|MaxPoolAttributes>(
-    shaderHelper: ShaderHelper, x: IndicesHelper, outputShape: readonly number[], attributes: AttributeType,
-    op1: string, op2: string, start: string): string => {
+    shaderHelper: ShaderHelper, x: IndicesHelper, xShape: readonly number[], outputShape: readonly number[],
+    attributes: AttributeType, op1: string, op2: string, start: string): string => {
   const isChannelsLast = attributes.format === 'NHWC';
-  const inputDims = x.shape;
+  const inputDims = xShape;
   const dataType = x.type.value;
   const rank = inputDims.length;
   const outputSize = ShapeUtil.size(outputShape);
@@ -76,22 +72,22 @@ const generatePoolingCode = <AttributeType extends AveragePoolAttributes|MaxPool
     let codeHEnd = '';
     if (pwStart + pwEnd !== 0) {
       codeW = `
-              for (var i: u32 = 0u; i < ${kw}u; i++) {
-                xIndices[${dimIdxW}] = indices[${dimIdxW}] * ${sw} - ${pwStart} + i;
-                if (xIndices[${dimIdxW}] < 0 || xIndices[${dimIdxW}] >= ${inputDims[dimIdxW]}) {
-                  pad++;
-                  continue;
-                }
-                let x_val = x[${x.indicesToOffset('xIndices')}];
-                ${op1}
-              }`;
+                for (var i: u32 = 0u; i < ${kw}u; i++) {
+                  xIndices[${dimIdxW}] = indices[${dimIdxW}] * ${sw} - ${pwStart} + i;
+                  if (xIndices[${dimIdxW}] < 0 || xIndices[${dimIdxW}] >= ${inputDims[dimIdxW]}) {
+                    pad++;
+                    continue;
+                  }
+                  let x_val = x[${x.indicesToOffset('xIndices')}];
+                  ${op1}
+                }`;
     } else {
       codeW = `
-              for (var i: u32 = 0u; i < ${kw}u; i++) {
-                xIndices[${dimIdxW}] = indices[${dimIdxW}] * ${sw} - ${pwStart} + i;
-                let x_val = x[${x.indicesToOffset('xIndices')}];
-                ${op1}
-              }`;
+                for (var i: u32 = 0u; i < ${kw}u; i++) {
+                  xIndices[${dimIdxW}] = indices[${dimIdxW}] * ${sw} - ${pwStart} + i;
+                  let x_val = x[${x.indicesToOffset('xIndices')}];
+                  ${op1}
+                }`;
     }
 
     if (attributes.kernelShape.length === 2) {
@@ -237,30 +233,32 @@ export interface AveragePoolAttributes extends PoolCommonAttributes, AttributeWi
 }
 
 const createAveragePoolProgramInfo =
-    (input: TensorView, metadata: ProgramMetadata, isGlobalOperator: boolean, attributes: AveragePoolAttributes):
-        ProgramInfo => {
-          const [adjustedAttributes, outputShape] =
-              getAdjustedPoolAttributesAndOutputShape(input, attributes, isGlobalOperator);
-          const kernelSize = ShapeUtil.size(adjustedAttributes.kernelShape);
-
-          const x = inputVariable('x', input.dataType, input.dims);
-          const dataType = x.type.value;
-
-          const op1 = 'value += x_val;';
-          let op2 = '';
-          if (adjustedAttributes.countIncludePad) {
-            op2 += `value /= ${dataType}(${kernelSize});`;
-          } else {
-            op2 += `value /= ${dataType}(${kernelSize} - pad);`;
-          }
-          return {
-            ...metadata,
-            outputs: [{dims: outputShape, dataType: input.dataType, gpuDataType: GpuDataType.default}],
-            getShaderSource: shaderHelper =>
-                generatePoolingCode(shaderHelper, x, outputShape, adjustedAttributes, op1, op2, '0.0'),
-            dispatchGroup: () => ({x: Math.ceil(ShapeUtil.size(outputShape) / 64 /* workgroup size */)})
-          };
-        };
+    (name: string, input: TensorView, isGlobalOperator: boolean, attributes: AveragePoolAttributes): ProgramInfo => {
+      const [adjustedAttributes, outputShape] =
+          getAdjustedPoolAttributesAndOutputShape(input, attributes, isGlobalOperator);
+      const kernelSize = ShapeUtil.size(adjustedAttributes.kernelShape);
+
+      const x = inputVariable('x', input.dataType, input.dims);
+      const dataType = x.type.value;
+
+      const op1 = 'value += x_val;';
+      let op2 = '';
+      if (adjustedAttributes.countIncludePad) {
+        op2 += `value /= ${dataType}(${kernelSize});`;
+      } else {
+        op2 += `value /= ${dataType}(${kernelSize} - pad);`;
+      }
+      return {
+        name,
+        shaderCache: {hint: attributes.cacheKey},
+        getRunData: () => ({
+          outputs: [{dims: outputShape, dataType: input.dataType}],
+          dispatchGroup: {x: Math.ceil(ShapeUtil.size(outputShape) / 64 /* workgroup size */)}
+        }),
+        getShaderSource: shaderHelper =>
+            generatePoolingCode(shaderHelper, x, input.dims, outputShape, adjustedAttributes, op1, op2, '0.0'),
+      };
+    };
 
 export const parseAveragePoolAttributes = (attributes: Record<string, unknown>): AveragePoolAttributes => {
   const countIncludePad = (attributes.count_include_pad as number) === 0 ? false : true;
@@ -276,9 +274,7 @@ export const parseAveragePoolAttributes = (attributes: Record<string, unknown>):
 
 export const averagePool = (context: ComputeContext, attributes: AveragePoolAttributes): void => {
   validateInputs(context.inputs);
-  const metadata = {name: 'AveragePool', inputTypes: [GpuDataType.default], cacheHint: attributes.cacheKey};
-  context.compute(
-      {...metadata, get: () => createAveragePoolProgramInfo(context.inputs[0], metadata, false, attributes)});
+  context.compute(createAveragePoolProgramInfo('AveragePool', context.inputs[0], false, attributes));
 };
 
 const globalPoolAttributes = {
@@ -300,9 +296,7 @@ export const parseGlobalAveragePoolAttributes = (attributes: Record<string, unkn
 
 export const globalAveragePool = (context: ComputeContext, attributes: AveragePoolAttributes): void => {
   validateInputs(context.inputs);
-  const metadata = {name: 'GlobalAveragePool', inputTypes: [GpuDataType.default], cacheHint: attributes.cacheKey};
-  context.compute(
-      {...metadata, get: () => createAveragePoolProgramInfo(context.inputs[0], metadata, true, attributes)});
+  context.compute(createAveragePoolProgramInfo('GlobalAveragePool', context.inputs[0], true, attributes));
 };
 
 export interface MaxPoolAttributes extends PoolCommonAttributes, AttributeWithCacheKey {
@@ -311,28 +305,29 @@ export interface MaxPoolAttributes extends PoolCommonAttributes, AttributeWithCa
 }
 
 const createMaxPoolProgramInfo =
-    (input: TensorView, metadata: ProgramMetadata, isGlobalOperator: boolean, attributes: MaxPoolAttributes):
-        ProgramInfo => {
-          const [adjustedAttributes, outputShape] =
-              getAdjustedPoolAttributesAndOutputShape(input, attributes, isGlobalOperator);
-          const op1 = `
+    (name: string, input: TensorView, isGlobalOperator: boolean, attributes: MaxPoolAttributes): ProgramInfo => {
+      const [adjustedAttributes, outputShape] =
+          getAdjustedPoolAttributesAndOutputShape(input, attributes, isGlobalOperator);
+      const op1 = `
       value = max(x_val, value);
     `;
-          const op2 = '';
-          const x = inputVariable('x', input.dataType, input.dims);
-          return {
-            ...metadata,
-            outputs: [{dims: outputShape, dataType: input.dataType, gpuDataType: GpuDataType.default}],
-            getShaderSource: shaderHelper =>
-                generatePoolingCode(shaderHelper, x, outputShape, adjustedAttributes, op1, op2, '-1e5'),
-            dispatchGroup: () => ({x: Math.ceil(ShapeUtil.size(outputShape) / 64 /* workgroup size */)})
-          };
-        };
+      const op2 = '';
+      const x = inputVariable('x', input.dataType, input.dims);
+      return {
+        name,
+        shaderCache: {hint: attributes.cacheKey},
+        getRunData: () => ({
+          outputs: [{dims: outputShape, dataType: input.dataType}],
+          dispatchGroup: {x: Math.ceil(ShapeUtil.size(outputShape) / 64 /* workgroup size */)}
+        }),
+        getShaderSource: shaderHelper =>
+            generatePoolingCode(shaderHelper, x, input.dims, outputShape, adjustedAttributes, op1, op2, '-1e5'),
+      };
+    };
 
 export const maxPool = (context: ComputeContext, attributes: MaxPoolAttributes): void => {
   validateInputs(context.inputs);
-  const metadata = {name: 'MaxPool', inputTypes: [GpuDataType.default], cacheHint: attributes.cacheKey};
-  context.compute({...metadata, get: () => createMaxPoolProgramInfo(context.inputs[0], metadata, false, attributes)});
+  context.compute(createMaxPoolProgramInfo('MaxPool', context.inputs[0], false, attributes));
 };
 
 export const parseMaxPoolAttributes = (attributes: Record<string, unknown>): MaxPoolAttributes => {
@@ -358,6 +353,5 @@ export const parseGlobalMaxPoolAttributes = (attributes: Record<string, unknown>
 
 export const globalMaxPool = (context: ComputeContext, attributes: MaxPoolAttributes): void => {
   validateInputs(context.inputs);
-  const metadata = {name: 'GlobalMaxPool', inputTypes: [GpuDataType.default], cacheHint: attributes.cacheKey};
-  context.compute({...metadata, get: () => createMaxPoolProgramInfo(context.inputs[0], metadata, true, attributes)});
+  context.compute(createMaxPoolProgramInfo('GlobalMaxPool', context.inputs[0], true, attributes));
 };
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/range.ts b/js/web/lib/wasm/jsep/webgpu/ops/range.ts
new file mode 100644
index 0000000000000..9cf66111bf707
--- /dev/null
+++ b/js/web/lib/wasm/jsep/webgpu/ops/range.ts
@@ -0,0 +1,63 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+import {env} from 'onnxruntime-common';
+
+import {DataType} from '../../../wasm-common';
+import {ComputeContext, ProgramInfo} from '../types';
+
+import {outputVariable, ShaderHelper} from './common';
+
+const validateInputsContent = (start: number, limit: number, delta: number): void => {
+  const sameStartLimit = start === limit;
+  const increasingRangeNegativeStep = start < limit && delta < 0;
+  const decreasingRangePositiveStep = start > limit && delta > 0;
+
+  if (sameStartLimit || increasingRangeNegativeStep || decreasingRangePositiveStep) {
+    throw new Error('Range these inputs\' contents are invalid.');
+  }
+};
+
+const createRangeProgramInfo = (start: number, limit: number, delta: number, dataType: DataType): ProgramInfo => {
+  const numElements = Math.abs(Math.ceil((limit - start) / delta));
+  const outputShape: number[] = [numElements];
+  const outputSize = numElements;
+
+  const output = outputVariable('output', dataType, outputShape);
+  const wgslType = output.type.storage;
+
+  const getShaderSource = (shaderHelper: ShaderHelper) => `
+        ${shaderHelper.declareVariables(output)}
+        ${shaderHelper.mainStart()}
+        ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes(outputSize)}
+        output[global_idx] = ${wgslType}(${start}) + ${wgslType}(global_idx) * ${wgslType}(${delta});
+      }`;
+  return {
+    name: 'Range',
+    shaderCache: {hint: [start, limit, delta].map(x => x.toString()).join('_')},
+    getShaderSource,
+    getRunData: () => (
+        {outputs: [{dims: outputShape, dataType}],
+         dispatchGroup: {x: Math.ceil(outputSize / 64 /* workgroup size */)}})
+  };
+};
+
+export const range = (context: ComputeContext): void => {
+  let start = 0;
+  let limit = 0;
+  let delta = 0;
+  if (context.inputs[0].dataType === DataType.int32) {
+    start = context.inputs[0].getInt32Array()[0];
+    limit = context.inputs[1].getInt32Array()[0];
+    delta = context.inputs[2].getInt32Array()[0];
+  } else if (context.inputs[0].dataType === DataType.float) {
+    start = context.inputs[0].getFloat32Array()[0];
+    limit = context.inputs[1].getFloat32Array()[0];
+    delta = context.inputs[2].getFloat32Array()[0];
+  }
+  if (env.webgpu.validateInputContent) {
+    validateInputsContent(start, limit, delta);
+  }
+
+  context.compute(createRangeProgramInfo(start, limit, delta, context.inputs[0].dataType), {inputs: []});
+};
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/reduce-shared.ts b/js/web/lib/wasm/jsep/webgpu/ops/reduce-shared.ts
new file mode 100644
index 0000000000000..1365d1e9a12a4
--- /dev/null
+++ b/js/web/lib/wasm/jsep/webgpu/ops/reduce-shared.ts
@@ -0,0 +1,266 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+import {DataType} from '../../../wasm-common';
+import {TensorView} from '../../tensor-view';
+import {ShapeUtil} from '../../util';
+import {ComputeContext, ProgramInfo, ProgramShaderCacheInfo} from '../types';
+
+import {inputVariable, outputVariable, ShaderHelper} from './common';
+import {createReduceAttributesFromInputs, ReduceAttributes} from './reduce';
+import {createTransposeProgramInfo} from './transpose';
+
+const reduceOps: {[key: string]: string} = {
+  max: 'select(bestValue, candidate, candidate > bestValue)',
+  min: 'select(bestValue, candidate, candidate < bestValue)',
+  mean: 'bestValue + candidate',
+  sum: 'bestValue + candidate',
+  prod: 'bestValue * candidate',
+  sumSquare: 'bestValue + candidate * candidate',
+  logSumExp: 'bestValue + exp(candidate)',
+  l1: 'bestValue + abs(candidate)',
+  l2: 'bestValue + candidate * candidate',
+  logSum: 'bestValue + candidate'
+};
+
+const reduceSharedOps: {[key: string]: string} = {
+  max: 'select(bestValue, candidate, candidate > bestValue)',
+  min: 'select(bestValue, candidate, candidate < bestValue)',
+  mean: 'bestValue + candidate',
+  sum: 'bestValue + candidate',
+  prod: 'bestValue * candidate',
+  sumSquare: 'bestValue + candidate',
+  logSumExp: 'bestValue + candidate',
+  l1: 'bestValue + candidate',
+  l2: 'bestValue + candidate',
+  logSum: 'bestValue + candidate'
+};
+
+const reduceInitValues: {[key: string]: string} = {
+  max: '_A[offset]',
+  min: '_A[offset]',
+  mean: '0',
+  sum: '0',
+  prod: '1',
+  sumSquare: '0',
+  logSumExp: '0',
+  l1: '0',
+  l2: '0',
+  logSum: '0'
+};
+
+const reduceOutputValues: {[key: string]: string} = {
+  max: 'bestValue',
+  min: 'bestValue',
+  sum: 'bestValue',
+  prod: 'bestValue',
+  sumSquare: 'bestValue',
+  logSumExp: 'log(bestValue)',
+  l1: 'bestValue',
+  l2: 'sqrt(bestValue)',
+  logSum: 'log(bestValue)'
+};
+
+const getInnerMostAxes = (numInnerAxes: number, rank: number): number[] => {
+  const res = [];
+  for (let i = rank - numInnerAxes; i < rank; ++i) {
+    res.push(i);
+  }
+  return res;
+};
+
+const computeOutAndReduceShapes = (shape: readonly number[], axes: readonly number[]): [number[], number[]] => {
+  const outputShape = [];
+  const rank = shape.length;
+  for (let dim = 0; dim < rank; dim++) {
+    if (axes.indexOf(dim) === -1) {
+      outputShape.push(shape[dim]);
+    }
+  }
+  const reduceShape = axes.map(dim => shape[dim]);
+  return [outputShape, reduceShape];
+};
+
+const expandShapeToKeepDim = (shape: number[], axes: number[]): number[] => {
+  const rank = shape.length + axes.length;
+  const expandShape = [];
+  let shapeIdx = 0;
+  for (let dim = 0; dim < rank; dim++) {
+    if (axes.indexOf(dim) === -1) {
+      expandShape.push(shape[shapeIdx++]);
+    } else {
+      expandShape.push(1);
+    }
+  }
+  return expandShape;
+};
+
+const areAxesInnerMostDims = (axes: number[], rank: number): boolean => {
+  for (let i = 0; i < axes.length; ++i) {
+    if (axes[axes.length - i - 1] !== rank - 1 - i) {
+      return false;
+    }
+  }
+  return true;
+};
+
+const getAxesPermutation = (axes: number[], rank: number): number[] => {
+  const res = [];
+  if (!areAxesInnerMostDims(axes, rank)) {
+    for (let i = 0; i < rank; ++i) {
+      if (axes.indexOf(i) === -1) {
+        res.push(i);
+      }
+    }
+    axes.forEach(axis => res.push(axis));
+  }
+  return res;
+};
+
+export const createReduceSharedProgramInfo =
+    (name: string, shaderCache: ProgramShaderCacheInfo, inputs: readonly TensorView[], reduceType: string,
+     outputDataType: DataType, outputShape: number[], reduceShape: number[]): ProgramInfo => {
+      const inputShape = inputs[0].dims;
+
+      const outputSize = ShapeUtil.size(outputShape);
+      const reduceSize = ShapeUtil.size(reduceShape);
+
+      const input = inputVariable('_A', inputs[0].dataType, inputShape);
+      const output = outputVariable('output', outputDataType, outputShape);
+
+      const workgroupSize = 32;
+
+      const sharedMemorySnippet = `
+          var<workgroup> aBestValues : array<${output.type.storage}, ${workgroupSize}>;
+       `;
+
+      const getShaderSource = (shaderHelper: ShaderHelper) => `
+        ${shaderHelper.registerUniform('reduceSize', 'u32').declareVariables(input, output)}
+        ${sharedMemorySnippet}
+        fn DIV_CEIL(a : u32, b : u32) -> u32 {
+          return ((a - 1u) / b + 1u);
+         }
+         ${shaderHelper.mainStart(workgroupSize)}
+          let local_idx = local_id.x;
+
+          let outputIndex = global_idx / ${workgroupSize};
+          let offset = outputIndex * uniforms.reduceSize;
+
+          var bestValue = ${output.type.storage}(${reduceInitValues[reduceType]});
+          let Length = uniforms.reduceSize;
+          for (var k = local_idx; k < Length; k = k + ${workgroupSize}) {
+           let candidate = ${output.type.storage}(${input.getByOffset('offset + k')});
+           bestValue = ${reduceOps[reduceType]};
+          }
+          aBestValues[local_idx] = bestValue;
+          workgroupBarrier();
+
+         var reduceSize = min(Length, ${workgroupSize}u);
+         for (var currentSize = reduceSize / 2u; reduceSize > 1u;
+             currentSize = reduceSize / 2u) {
+           let interval = DIV_CEIL(reduceSize, 2u);
+           if (local_idx < currentSize) {
+            let candidate = aBestValues[local_idx + interval];
+            bestValue = ${reduceSharedOps[reduceType]};
+            aBestValues[local_idx] = bestValue;
+           }
+           reduceSize = interval;
+           workgroupBarrier();
+         }
+
+         if (local_idx == 0u) {
+          ${
+          output.setByOffset(
+              'outputIndex',
+              `${
+                  reduceType === 'mean' ? `bestValue / ${output.type.storage}(uniforms.reduceSize)` :
+                                          `${reduceOutputValues[reduceType]}`}`)};
+         }
+        }`;
+
+      // One work group is responsible for only one element of output.
+      return {
+        name,
+        shaderCache,
+        getShaderSource,
+        getRunData: () => ({
+          outputs: [{dims: outputShape, dataType: outputDataType}],
+          dispatchGroup: {x: outputSize},
+          programUniforms: [{type: 'uint32', data: reduceSize}]
+        }),
+      };
+    };
+
+const reduceCommon =
+    (context: ComputeContext, name: string, attributes: ReduceAttributes,
+     reduceType: 'sum'|'sumSquare'|'prod'|'min'|'max'|'mean'|'logSumExp'|'l1'|'l2'|'logSum'): void => {
+      const updatedAttributes: ReduceAttributes =
+          context.inputs.length === 1 ? attributes : createReduceAttributesFromInputs(context.inputs, attributes);
+
+      let updatedAxes = updatedAttributes.axes;
+      if (updatedAxes.length === 0 && !updatedAttributes.noopWithEmptyAxes) {
+        updatedAxes = context.inputs[0].dims.map((_dim, i) => i);
+      }
+      const normalizeAxes = ShapeUtil.normalizeAxes(updatedAxes, context.inputs[0].dims.length);
+
+      let axes = normalizeAxes;
+      let input = context.inputs[0];
+      const permutedAxes = getAxesPermutation(axes, context.inputs[0].dims.length);
+      if (permutedAxes.length > 0) {
+        input = context.compute(
+            createTransposeProgramInfo(context.inputs[0], permutedAxes), {inputs: [0], outputs: [-1]})[0];
+        axes = getInnerMostAxes(axes.length, input.dims.length);
+      }
+
+      const [outputShape, reduceShape] = computeOutAndReduceShapes(input.dims, axes);
+      let finalOutputShape = outputShape;
+      if (updatedAttributes.keepDims) {
+        finalOutputShape = expandShapeToKeepDim(outputShape, normalizeAxes);
+      }
+
+      context.compute(
+          createReduceSharedProgramInfo(
+              name, {hint: updatedAttributes.cacheKey, inputDependencies: ['type']}, [input], reduceType,
+              context.inputs[0].dataType, finalOutputShape, reduceShape),
+          {inputs: [input]});
+    };
+
+export const reduceMeanShared = (context: ComputeContext, attributes: ReduceAttributes): void => {
+  reduceCommon(context, 'ReduceMeanShared', attributes, 'mean');
+};
+
+export const reduceL1Shared = (context: ComputeContext, attributes: ReduceAttributes): void => {
+  reduceCommon(context, 'ReduceL1Shared', attributes, 'l1');
+};
+
+export const reduceL2Shared = (context: ComputeContext, attributes: ReduceAttributes): void => {
+  reduceCommon(context, 'ReduceL2Shared', attributes, 'l2');
+};
+
+export const reduceLogSumExpShared = (context: ComputeContext, attributes: ReduceAttributes): void => {
+  reduceCommon(context, 'ReduceLogSumExpShared', attributes, 'logSumExp');
+};
+
+export const reduceMaxShared = (context: ComputeContext, attributes: ReduceAttributes): void => {
+  reduceCommon(context, 'ReduceMaxShared', attributes, 'max');
+};
+
+export const reduceMinShared = (context: ComputeContext, attributes: ReduceAttributes): void => {
+  reduceCommon(context, 'ReduceMinShared', attributes, 'min');
+};
+
+export const reduceProdShared = (context: ComputeContext, attributes: ReduceAttributes): void => {
+  reduceCommon(context, 'ReduceProdShared', attributes, 'prod');
+};
+
+export const reduceSumShared = (context: ComputeContext, attributes: ReduceAttributes): void => {
+  reduceCommon(context, 'ReduceSumShared', attributes, 'sum');
+};
+
+export const reduceSumSquareShared = (context: ComputeContext, attributes: ReduceAttributes): void => {
+  reduceCommon(context, 'ReduceSumSquareShared', attributes, 'sumSquare');
+};
+
+export const reduceLogSumShared = (context: ComputeContext, attributes: ReduceAttributes): void => {
+  reduceCommon(context, 'ReduceLogSumShared', attributes, 'logSum');
+};
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/reduce.ts b/js/web/lib/wasm/jsep/webgpu/ops/reduce.ts
index 598b1db033c61..b5c956e57a9b1 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/reduce.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/reduce.ts
@@ -5,9 +5,10 @@ import {DataType} from '../../../wasm-common';
 import {TensorView} from '../../tensor-view';
 import {ShapeUtil} from '../../util';
 import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key';
-import {ComputeContext, GpuDataType, ProgramInfo, ProgramInfoLoader, ProgramMetadata} from '../types';
+import {ComputeContext, ProgramInfo, ProgramShaderCacheInfo} from '../types';
 
 import {IndicesHelper, inputVariable, outputVariable, ShaderHelper} from './common';
+import {reduceL1Shared, reduceL2Shared, reduceLogSumExpShared, reduceLogSumShared, reduceMaxShared, reduceMeanShared, reduceMinShared, reduceProdShared, reduceSumShared, reduceSumSquareShared} from './reduce-shared';
 
 const validateInputs = (inputs: readonly TensorView[]): void => {
   if (!inputs || inputs.length === 0 || inputs.length > 2) {
@@ -31,8 +32,8 @@ export type ReduceOp =
 
 const noOp: ReduceOp = (input) => ['', '', `var value = ${input.getByOffset('inputOffset')};`, ''];
 export const createReduceProgramInfo =
-    (metadata: ProgramMetadata, inputs: readonly TensorView[], reduceOp: ReduceOp, axesInput: number[],
-     outputDataType: DataType, keepDims = false, noopWithEmptyAxes = false): ProgramInfo => {
+    (name: string, shaderCache: ProgramShaderCacheInfo, inputs: readonly TensorView[], reduceOp: ReduceOp,
+     axesInput: number[], outputDataType: DataType, keepDims = false, noopWithEmptyAxes = false): ProgramInfo => {
       const outputShape: number[] = [];
       const inputShape = inputs[0].dims;
 
@@ -96,14 +97,17 @@ export const createReduceProgramInfo =
         }`;
 
       return {
-        ...metadata,
+        name,
+        shaderCache,
         getShaderSource,
-        outputs: [{dims: outputShape, dataType: outputDataType, gpuDataType: GpuDataType.default}],
-        dispatchGroup: () => ({x: Math.ceil(outputSize / 64 /* workgroup size */)})
+        getRunData: () => ({
+          outputs: [{dims: outputShape, dataType: outputDataType}],
+          dispatchGroup: {x: Math.ceil(outputSize / 64 /* workgroup size */)}
+        }),
       };
     };
 
-const createReduceAttributesFromInputs =
+export const createReduceAttributesFromInputs =
     (inputs: readonly TensorView[], attributes: ReduceAttributes): ReduceAttributes => {
       const axes: number[] = [];
       if (inputs[1].dims[0] > 0) {
@@ -113,26 +117,22 @@ const createReduceAttributesFromInputs =
           {axes, keepDims: attributes.keepDims, noopWithEmptyAxes: attributes.noopWithEmptyAxes});
     };
 
-const createReduceProgramInfoLoader =
-    (inputs: readonly TensorView[], name: string, attributes: ReduceAttributes,
-     reduceOp: ReduceOp): ProgramInfoLoader => {
+const runReduceProgram =
+    (context: ComputeContext, name: string, attributes: ReduceAttributes, reduceOp: ReduceOp): void => {
+      const inputs = context.inputs;
       const updatedAttributes: ReduceAttributes =
           inputs.length === 1 ? attributes : createReduceAttributesFromInputs(inputs, attributes);
-      const metadata: ProgramMetadata = {
-        name,
-        inputTypes: [GpuDataType.default],
-        cacheHint: updatedAttributes.cacheKey + '_' + inputs[0].dims.map(d => d.toString()).join(',')
-      };
-      return {
-        ...metadata,
-        get: () => createReduceProgramInfo(
-            metadata, [inputs[0]],
-            updatedAttributes.noopWithEmptyAxes && updatedAttributes.axes.length === 0 ? noOp : reduceOp,
-            updatedAttributes.axes, inputs[0].dataType, updatedAttributes.keepDims, updatedAttributes.noopWithEmptyAxes)
-      };
+
+      context.compute(
+          createReduceProgramInfo(
+              name, {hint: updatedAttributes.cacheKey}, [inputs[0]],
+              updatedAttributes.noopWithEmptyAxes && updatedAttributes.axes.length === 0 ? noOp : reduceOp,
+              updatedAttributes.axes, inputs[0].dataType, updatedAttributes.keepDims,
+              updatedAttributes.noopWithEmptyAxes),
+          {inputs: [0]});
     };
 
-export const reduceLogSum = (context: ComputeContext, attributes: ReduceAttributes): void => {
+const reduceLogSumNaive = (context: ComputeContext, attributes: ReduceAttributes): void => {
   validateInputs(context.inputs);
   const reduceOp: ReduceOp = (input, output) =>
       [`var value = ${output.type.storage}(0);`,
@@ -140,10 +140,10 @@ export const reduceLogSum = (context: ComputeContext, attributes: ReduceAttribut
        `value += ${input.getByOffset('inputOffset')};`,
        'value = log(value);',
   ];
-  context.compute(createReduceProgramInfoLoader(context.inputs, 'ReduceLogSum', attributes, reduceOp), {inputs: [0]});
+  runReduceProgram(context, 'ReduceLogSum', attributes, reduceOp);
 };
 
-export const reduceL1 = (context: ComputeContext, attributes: ReduceAttributes): void => {
+const reduceL1Naive = (context: ComputeContext, attributes: ReduceAttributes): void => {
   validateInputs(context.inputs);
   const reduceOp: ReduceOp = (input, output) =>
       [`var value = ${output.type.storage}(0);`,
@@ -151,10 +151,10 @@ export const reduceL1 = (context: ComputeContext, attributes: ReduceAttributes):
        `value += abs(${input.getByOffset('inputOffset')});`,
        '',
   ];
-  context.compute(createReduceProgramInfoLoader(context.inputs, 'ReduceL1', attributes, reduceOp), {inputs: [0]});
+  runReduceProgram(context, 'ReduceL1', attributes, reduceOp);
 };
 
-export const reduceL2 = (context: ComputeContext, attributes: ReduceAttributes): void => {
+const reduceL2Naive = (context: ComputeContext, attributes: ReduceAttributes): void => {
   validateInputs(context.inputs);
   const reduceOp: ReduceOp = (input, output) =>
       [`var t = ${output.type.value}(0); var value = ${output.type.value}(0);`,
@@ -162,10 +162,10 @@ export const reduceL2 = (context: ComputeContext, attributes: ReduceAttributes):
        `t = ${input.getByOffset('inputOffset')}; value += (t * t);`,
        'value = sqrt(value);',
   ];
-  context.compute(createReduceProgramInfoLoader(context.inputs, 'ReduceL2', attributes, reduceOp), {inputs: [0]});
+  runReduceProgram(context, 'ReduceL2', attributes, reduceOp);
 };
 
-export const reduceLogSumExp = (context: ComputeContext, attributes: ReduceAttributes): void => {
+const reduceLogSumExpNaive = (context: ComputeContext, attributes: ReduceAttributes): void => {
   validateInputs(context.inputs);
   const reduceOp: ReduceOp = (input, output) =>
       [`var value = ${output.type.storage}(0);`,
@@ -173,15 +173,14 @@ export const reduceLogSumExp = (context: ComputeContext, attributes: ReduceAttri
        `value += exp(${input.getByOffset('inputOffset')});`,
        'value = log(value);',
   ];
-  context.compute(
-      createReduceProgramInfoLoader(context.inputs, 'ReduceLogSumExp', attributes, reduceOp), {inputs: [0]});
+  runReduceProgram(context, 'ReduceLogSumExp', attributes, reduceOp);
 };
 
-export const reduceMax = (context: ComputeContext, attributes: ReduceAttributes): void => {
+const reduceMaxNaive = (context: ComputeContext, attributes: ReduceAttributes): void => {
   validateInputs(context.inputs);
   const reduceOp: ReduceOp = (input, _output, axes) => {
     const idxZero = [];
-    for (let k = 0; k < input.shape.length; k++) {
+    for (let k = 0; k < input.rank; k++) {
       if (axes.indexOf(k) >= 0 || axes.length === 0) {
         idxZero.push(input.indicesSet('inputIndices', k, 0));
       }
@@ -194,16 +193,17 @@ export const reduceMax = (context: ComputeContext, attributes: ReduceAttributes)
       '',
     ];
   };
-  context.compute(createReduceProgramInfoLoader(context.inputs, 'ReduceMax', attributes, reduceOp), {inputs: [0]});
+  runReduceProgram(context, 'ReduceMax', attributes, reduceOp);
 };
 
-export const reduceMean = (context: ComputeContext, attributes: ReduceAttributes): void => {
+const reduceMeanNaive = (context: ComputeContext, attributes: ReduceAttributes): void => {
   validateInputs(context.inputs);
   const reduceOp: ReduceOp = (input, output, axes) => {
     let size = 1.0;
-    for (let k = 0; k < input.shape.length; k++) {
+    for (let k = 0; k < input.rank; k++) {
       if (axes.indexOf(k) >= 0 || axes.length === 0) {
-        size *= input.shape[k];
+        // TODO: this depends on the input dims. If we want to use uniform, this need to be updated.
+        size *= context.inputs[0].dims[k];
       }
     }
 
@@ -214,14 +214,14 @@ export const reduceMean = (context: ComputeContext, attributes: ReduceAttributes
       `let value = ${output.type.value}(sum / ${size});`,
     ];
   };
-  context.compute(createReduceProgramInfoLoader(context.inputs, 'ReduceMean', attributes, reduceOp), {inputs: [0]});
+  runReduceProgram(context, 'ReduceMean', attributes, reduceOp);
 };
 
-export const reduceMin = (context: ComputeContext, attributes: ReduceAttributes): void => {
+const reduceMinNaive = (context: ComputeContext, attributes: ReduceAttributes): void => {
   validateInputs(context.inputs);
   const reduceOp: ReduceOp = (input, _output, axes) => {
     const idxZero = [];
-    for (let k = 0; k < input.shape.length; k++) {
+    for (let k = 0; k < input.rank; k++) {
       if (axes.indexOf(k) >= 0 || axes.length === 0) {
         idxZero.push(`inputIndices[${k}] = 0;`);  // first element
       }
@@ -234,10 +234,10 @@ export const reduceMin = (context: ComputeContext, attributes: ReduceAttributes)
       '',
     ];
   };
-  context.compute(createReduceProgramInfoLoader(context.inputs, 'ReduceMin', attributes, reduceOp), {inputs: [0]});
+  runReduceProgram(context, 'ReduceMin', attributes, reduceOp);
 };
 
-export const reduceProd = (context: ComputeContext, attributes: ReduceAttributes): void => {
+const reduceProdNaive = (context: ComputeContext, attributes: ReduceAttributes): void => {
   validateInputs(context.inputs);
   const reduceOp: ReduceOp = (input, output) =>
       [`var value = ${output.type.storage}(1);`,
@@ -245,10 +245,10 @@ export const reduceProd = (context: ComputeContext, attributes: ReduceAttributes
        `value *= ${input.getByOffset('inputOffset')};`,
        '',
   ];
-  context.compute(createReduceProgramInfoLoader(context.inputs, 'ReduceProd', attributes, reduceOp), {inputs: [0]});
+  runReduceProgram(context, 'ReduceProd', attributes, reduceOp);
 };
 
-export const reduceSum = (context: ComputeContext, attributes: ReduceAttributes): void => {
+const reduceSumNaive = (context: ComputeContext, attributes: ReduceAttributes): void => {
   validateInputs(context.inputs);
   const reduceOp: ReduceOp = (input, output) =>
       [`var value = ${output.type.storage}(0);`,
@@ -256,10 +256,10 @@ export const reduceSum = (context: ComputeContext, attributes: ReduceAttributes)
        `value += ${input.getByOffset('inputOffset')};`,
        '',
   ];
-  context.compute(createReduceProgramInfoLoader(context.inputs, 'ReduceSum', attributes, reduceOp), {inputs: [0]});
+  runReduceProgram(context, 'ReduceSum', attributes, reduceOp);
 };
 
-export const reduceSumSquare = (context: ComputeContext, attributes: ReduceAttributes): void => {
+const reduceSumSquareNaive = (context: ComputeContext, attributes: ReduceAttributes): void => {
   validateInputs(context.inputs);
   const reduceOp: ReduceOp = (input, output) =>
       [`var t = ${output.type.value}(0); var value = ${output.type.value}(0);`,
@@ -267,8 +267,109 @@ export const reduceSumSquare = (context: ComputeContext, attributes: ReduceAttri
        `t = ${input.getByOffset('inputOffset')}; value += t * t;`,
        '',
   ];
-  context.compute(
-      createReduceProgramInfoLoader(context.inputs, 'ReduceSumSquare', attributes, reduceOp), {inputs: [0]});
+  runReduceProgram(context, 'ReduceSumSquare', attributes, reduceOp);
+};
+
+const useNaiveReduceMethod =
+    (shape: readonly number[], axes: readonly number[], noopWithEmptyAxes: boolean): boolean => {
+      if (axes.length === 0) {
+        return noopWithEmptyAxes ? true : false;
+      }
+
+      let outputSize = 1;
+      let reduceSize = 1;
+      for (let dim = 0; dim < axes.length; dim++) {
+        if (axes.indexOf(dim) === -1) {
+          outputSize *= shape[dim];
+        } else {
+          reduceSize *= shape[dim];
+        }
+      }
+
+      // The condition data is very rough, although considering the count of Execution Unit (EU), the potential
+      // work groups in a EU and the counts of loops in the naive and shared methods, also doing experiments
+      // on some machines.
+      return reduceSize < 32 && outputSize > 1024 ? true : false;
+    };
+
+export const reduceMean = (context: ComputeContext, attributes: ReduceAttributes): void => {
+  if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {
+    reduceMeanNaive(context, attributes);
+  } else {
+    reduceMeanShared(context, attributes);
+  }
+};
+
+export const reduceL1 = (context: ComputeContext, attributes: ReduceAttributes): void => {
+  if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {
+    reduceL1Naive(context, attributes);
+  } else {
+    reduceL1Shared(context, attributes);
+  }
+};
+
+export const reduceL2 = (context: ComputeContext, attributes: ReduceAttributes): void => {
+  if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {
+    reduceL2Naive(context, attributes);
+  } else {
+    reduceL2Shared(context, attributes);
+  }
+};
+
+export const reduceLogSumExp = (context: ComputeContext, attributes: ReduceAttributes): void => {
+  if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {
+    reduceLogSumExpNaive(context, attributes);
+  } else {
+    reduceLogSumExpShared(context, attributes);
+  }
+};
+
+export const reduceMax = (context: ComputeContext, attributes: ReduceAttributes): void => {
+  if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {
+    reduceMaxNaive(context, attributes);
+  } else {
+    reduceMaxShared(context, attributes);
+  }
+};
+
+export const reduceMin = (context: ComputeContext, attributes: ReduceAttributes): void => {
+  if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {
+    reduceMinNaive(context, attributes);
+  } else {
+    reduceMinShared(context, attributes);
+  }
+};
+
+export const reduceProd = (context: ComputeContext, attributes: ReduceAttributes): void => {
+  if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {
+    reduceProdNaive(context, attributes);
+  } else {
+    reduceProdShared(context, attributes);
+  }
+};
+
+export const reduceSum = (context: ComputeContext, attributes: ReduceAttributes): void => {
+  if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {
+    reduceSumNaive(context, attributes);
+  } else {
+    reduceSumShared(context, attributes);
+  }
+};
+
+export const reduceSumSquare = (context: ComputeContext, attributes: ReduceAttributes): void => {
+  if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {
+    reduceSumSquareNaive(context, attributes);
+  } else {
+    reduceSumSquareShared(context, attributes);
+  }
+};
+
+export const reduceLogSum = (context: ComputeContext, attributes: ReduceAttributes): void => {
+  if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {
+    reduceLogSumNaive(context, attributes);
+  } else {
+    reduceLogSumShared(context, attributes);
+  }
 };
 
 export const parseReduceAttributes = (attributes: Record<string, unknown>): ReduceAttributes =>
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/resize.ts b/js/web/lib/wasm/jsep/webgpu/ops/resize.ts
index 8b9dbbf57ac75..973a607f9377e 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/resize.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/resize.ts
@@ -5,7 +5,7 @@
 import {TensorView} from '../../tensor-view';
 import {ShapeUtil} from '../../util';
 import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key';
-import {ComputeContext, GpuDataType, ProgramInfo, ProgramInfoLoader, ProgramMetadata} from '../types';
+import {ComputeContext, ProgramInfo} from '../types';
 
 import {IndicesHelper, inputVariable, outputVariable, ShaderHelper} from './common';
 
@@ -105,50 +105,51 @@ const validateInputs =
       }
     };
 
-const getOriginalCoordinateFromResizedCoordinate = (coordinateTransferMode: CoordinateTransformMode): string =>
-    'fn getOriginalCoordinateFromResizedCoordinate(xResized: f32, xScale: f32, lengthResized: f32,\
-    lengthOriginal: f32, roiStart: f32, roiEnd: f32) -> f32 { ' +
+const getOriginalCoordinateFromResizedCoordinate =
+    (coordinateTransferMode: CoordinateTransformMode, dType: string): string =>
+        `fn getOriginalCoordinateFromResizedCoordinate(xResized: ${dType}, xScale: ${dType}, lengthResized: ${dType},
+     lengthOriginal: ${dType}, roiStart: ${dType}, roiEnd: ${dType}) -> ${dType} { ` +
     (() => {
-      switch (coordinateTransferMode) {
-        case 'asymmetric':
-          return 'return xResized / xScale;';
-        case 'pytorch_half_pixel':
-          return 'if (lengthResized > 1) { \
+          switch (coordinateTransferMode) {
+            case 'asymmetric':
+              return 'return xResized / xScale;';
+            case 'pytorch_half_pixel':
+              return 'if (lengthResized > 1) { \
                     return (xResized + 0.5) / xScale - 0.5; \
                   } else { \
                     return 0.0; \
                   }';
-        case 'tf_half_pixel_for_nn':
-          return 'return (xResized + 0.5) / xScale;';
-        case 'align_corners':
-          return 'if (lengthResized == 1) { \
+            case 'tf_half_pixel_for_nn':
+              return 'return (xResized + 0.5) / xScale;';
+            case 'align_corners':
+              return 'if (lengthResized == 1) { \
                     return 0.0; \
                   } else { \
                     return xResized * (lengthOriginal - 1) / (lengthResized - 1); \
                   }';
-        case 'tf_crop_and_resize':
-          return 'if (lengthResized > 1) { \
+            case 'tf_crop_and_resize':
+              return `if (lengthResized > 1) { \
                     return roiStart * (lengthOriginal - 1) + \
                           (xResized * (roiEnd - roiStart) * (lengthOriginal - 1)) / (lengthResized - 1); \
                   } else { \
-                    return 0.5 * (roiStart + roiEnd) * f32(lengthOriginal - 1); \
-                  }';
-        case 'half_pixel_symmetric':
-          return [
-            'const outputWidth = xScale * lengthResized;', 'const adjustment = lengthResized / outputWidth;',
-            'const center = lengthOriginal / 2;', 'const offset = center * (1 - adjustment);',
-            'return offset + ((xResized + 0.5) / xScale) - 0.5;'
-          ].join('\n');
-        case 'half_pixel':
-          return 'return ((xResized + 0.5) / xScale) - 0.5;';
-        default:
-          throw new Error(`Coordinate transform mode ${coordinateTransferMode} is not supported`);
-      }
-    })() +
+                    return 0.5 * (roiStart + roiEnd) * ${dType}(lengthOriginal - 1); \
+                  }`;
+            case 'half_pixel_symmetric':
+              return [
+                'const outputWidth = xScale * lengthResized;', 'const adjustment = lengthResized / outputWidth;',
+                'const center = lengthOriginal / 2;', 'const offset = center * (1 - adjustment);',
+                'return offset + ((xResized + 0.5) / xScale) - 0.5;'
+              ].join('\n');
+            case 'half_pixel':
+              return 'return ((xResized + 0.5) / xScale) - 0.5;';
+            default:
+              throw new Error(`Coordinate transform mode ${coordinateTransferMode} is not supported`);
+          }
+        })() +
     '}';
 
-const getNearestPixelFromOriginal = (nearestMode: NearestMode, opsetVersion: number): string =>
-    'fn getNearestPixelFromOriginal(xOriginal: f32, isDownSample: bool) -> f32 {' + (() => {
+const getNearestPixelFromOriginal = (nearestMode: NearestMode, opsetVersion: number, dType: string): string =>
+    `fn getNearestPixelFromOriginal(xOriginal: ${dType}, isDownSample: bool) -> ${dType} {` + (() => {
       switch (nearestMode) {
         case 'round_prefer_ceil':
           return 'if (fract(xOriginal) == 0.5) { \
@@ -218,50 +219,49 @@ const initOutputShape =
           return outputShape;
         };
 
-const adjustOutputShape =
-    (inputShape: readonly number[], outputShape: readonly number[], scales: number[], attributes: ResizeAttributes):
-        number[] => {
-          const scaleInPolicy = (() => {
-            switch (attributes.keepAspectRatioPolicy) {
-              case 'not_larger':
-                return attributes.axes.length > 0 ? Math.min(...attributes.axes.map(i => scales[i]), Number.MAX_VALUE) :
-                                                    Math.min(...scales, Number.MAX_VALUE);
-              case 'not_smaller':
-                return attributes.axes.length > 0 ? Math.max(...attributes.axes.map(i => scales[i]), Number.MIN_VALUE) :
-                                                    Math.max(...scales, Number.MIN_VALUE);
-              default:
-                throw new Error(`Keep aspect ratio policy ${attributes.keepAspectRatioPolicy} is not supported`);
-            }
-          })();
-          scales.fill(1.0, 0, scales.length);
-          const adjustedOutputShape = inputShape.slice();
-          if (attributes.axes.length > 0) {
-            attributes.axes.forEach((v) => scales[v] = scaleInPolicy);
-            attributes.axes.forEach((v) => adjustedOutputShape[v] = Math.round(inputShape[v] * scales[v]));
-          } else {
-            scales.fill(scaleInPolicy, 0, scales.length);
-            adjustedOutputShape.forEach((v, i) => adjustedOutputShape[i] = Math.round(v * scales[i]));
-          }
-          return adjustedOutputShape;
-        };
+const adjustOutputShape = (inputShape: readonly number[], scales: number[], attributes: ResizeAttributes): number[] => {
+  const scaleInPolicy = (() => {
+    switch (attributes.keepAspectRatioPolicy) {
+      case 'not_larger':
+        return attributes.axes.length > 0 ? Math.min(...attributes.axes.map(i => scales[i]), Number.MAX_VALUE) :
+                                            Math.min(...scales, Number.MAX_VALUE);
+      case 'not_smaller':
+        return attributes.axes.length > 0 ? Math.max(...attributes.axes.map(i => scales[i]), Number.MIN_VALUE) :
+                                            Math.max(...scales, Number.MIN_VALUE);
+      default:
+        throw new Error(`Keep aspect ratio policy ${attributes.keepAspectRatioPolicy} is not supported`);
+    }
+  })();
+  scales.fill(1.0, 0, scales.length);
+  const adjustedOutputShape = inputShape.slice();
+  if (attributes.axes.length > 0) {
+    attributes.axes.forEach((v) => scales[v] = scaleInPolicy);
+    attributes.axes.forEach((v) => adjustedOutputShape[v] = Math.round(inputShape[v] * scales[v]));
+  } else {
+    scales.fill(scaleInPolicy, 0, scales.length);
+    adjustedOutputShape.forEach((v, i) => adjustedOutputShape[i] = Math.round(v * scales[i]));
+  }
+  return adjustedOutputShape;
+};
 
 const calculateOriginalIndicesFromOutputIndices =
     (output: IndicesHelper, inputShape: readonly number[], outputShape: readonly number[], scales: readonly number[],
      roi: readonly number[]): string => `
-    fn calculateOriginalIndicesFromOutputIndices(outputIndices: ${output.type.indices}) -> array<f32, ${
-        outputShape.length}> {
+    fn calculateOriginalIndicesFromOutputIndices(outputIndices: ${output.type.indices}) -> array<${
+        output.type.value}, ${outputShape.length}> {
       const inputShape = array<u32, ${inputShape.length}>(${inputShape.map(i => `${i}u`).join(',')});
       const outputShape = array<u32, ${outputShape.length}>(${outputShape.map(i => `${i}u`).join(',')});
-      const scales = array<f32, ${scales.length}>(${scales.map(i => `${i}f`).join(',')});
-      const roi = array<f32, ${roi.length}>(${roi.map(i => `${i}f`).join(',')});
-      var originalIndices: array<f32, ${outputShape.length}>;
+      const scales = array<${output.type.value}, ${scales.length}>(${scales.map(i => `${i}f`).join(',')});
+      const roi = array<${output.type.value}, ${roi.length}>(${roi.map(i => `${i}f`).join(',')});
+      var originalIndices: array<${output.type.value}, ${outputShape.length}>;
       for (var i:u32 = 0; i < ${outputShape.length}; i++) {
         var outputIndex = ${outputShape.length === 1 ? 'outputIndices' : 'outputIndices[i]'};
         if (scales[i] == 1.0) {
-          originalIndices[i] = f32(outputIndex);
+          originalIndices[i] = ${output.type.value}(outputIndex);
         } else {
-          originalIndices[i] = getOriginalCoordinateFromResizedCoordinate(f32(outputIndex), scales[i],
-                f32(outputShape[i]), f32(inputShape[i]), roi[i], roi[i + ${inputShape.length}]);
+          originalIndices[i] = getOriginalCoordinateFromResizedCoordinate(${output.type.value}(outputIndex), scales[i],
+                ${output.type.value}(outputShape[i]), ${output.type.value}(inputShape[i]), roi[i], roi[i + ${
+        inputShape.length}]);
         }
       }
       return originalIndices;
@@ -273,8 +273,8 @@ const calculateInputIndicesFromOutputIndices =
     fn calculateInputIndicesFromOutputIndices(outputIndices: ${output.type.indices}) -> ${input.type.indices} {
         const inputShape = array<u32, ${inputShape.length}>(${inputShape.map(i => `${i}u`).join(',')});
         const outputShape = array<u32, ${outputShape.length}>(${outputShape.map(i => `${i}u`).join(',')});
-        const scales = array<f32, ${scales.length}>(${scales.map(i => `${i}f`).join(',')});
-        const roi = array<f32, ${roi.length}>(${roi.map(i => `${i}f`).join(',')});
+        const scales = array<${input.type.value}, ${scales.length}>(${scales.map(i => `${i}`).join(',')});
+        const roi = array<${input.type.value}, ${roi.length}>(${roi.map(i => `${i}`).join(',')});
         var inputIndices: ${input.type.indices};
         for (var i:u32 = 0; i < ${outputShape.length}; i++) {
           var outputIndex = ${outputShape.length === 1 ? 'outputIndices' : 'outputIndices[i]'};
@@ -282,12 +282,13 @@ const calculateInputIndicesFromOutputIndices =
           if (scales[i] == 1.0) {
             inputIndex = outputIndex;
           } else {
-            var original_idx = getOriginalCoordinateFromResizedCoordinate(f32(outputIndex), scales[i],
-                    f32(outputShape[i]), f32(inputShape[i]), roi[i], roi[i + ${inputShape.length}]);
-            if (!${useExtrapolation} || (original_idx >= 0 && original_idx < f32(inputShape[i]))) {
+            var original_idx = getOriginalCoordinateFromResizedCoordinate(${input.type.value}(outputIndex), scales[i],
+                    ${input.type.value}(outputShape[i]), ${input.type.value}(inputShape[i]), roi[i], roi[i + ${
+        inputShape.length}]);
+            if (!${useExtrapolation} || (original_idx >= 0 && original_idx < ${input.type.value}(inputShape[i]))) {
               if (original_idx < 0) {
                 inputIndex = 0;
-              } else if (original_idx > (f32(inputShape[i]) - 1)) {
+              } else if (original_idx > (${input.type.value}(inputShape[i]) - 1)) {
                 inputIndex = inputShape[i] - 1;
               } else {
                 inputIndex = u32(getNearestPixelFromOriginal(original_idx, scales[i] < 1));
@@ -314,12 +315,13 @@ const checkInputIndices = (input: IndicesHelper, inputShape: readonly number[]):
     }`;
 
 const bilinearInterpolation =
-    (input: IndicesHelper, output: IndicesHelper, inputShape: readonly number[], outputShape: readonly number[],
-     scales: readonly number[], useExtrapolation: boolean, extrapolationValue: number): string => {
+    (input: IndicesHelper, output: IndicesHelper, inputShape: readonly number[], scales: readonly number[],
+     useExtrapolation: boolean, extrapolationValue: number): string => {
       const [batchIdx, heightIdx, widthIdx, channelIdx] =
           inputShape.length === 2 ? [-1, 0, 1, -1] : (scales[1] === 1.0 ? [0, 2, 3, 1] : [0, 1, 2, 3]);
+      const dType = input.type.value;
       return `
-    fn getInputValue(batch: u32, channel: u32, row: u32, col: u32) -> f32 {
+    fn getInputValue(batch: u32, channel: u32, row: u32, col: u32) -> ${dType} {
       var inputIndices: ${input.type.indices};
       inputIndices[${heightIdx}] = max(0, min(row, ${inputShape[heightIdx]} - 1));
       inputIndices[${widthIdx}] = max(0, min(col, ${inputShape[widthIdx]} - 1));
@@ -330,10 +332,10 @@ const bilinearInterpolation =
       return input[${input.indicesToOffset('inputIndices')}];
     }
 
-    fn bilinearInterpolation(outputIndices: ${output.type.indices}) -> f32 {
+    fn bilinearInterpolation(outputIndices: ${output.type.indices}) -> ${dType} {
       var originalIndices = calculateOriginalIndicesFromOutputIndices(outputIndices);
-      var row:f32 = originalIndices[${heightIdx}];
-      var col:f32 = originalIndices[${widthIdx}];
+      var row:${dType} = originalIndices[${heightIdx}];
+      var col:${dType} = originalIndices[${widthIdx}];
       if (${useExtrapolation} && (row < 0 || row > (${inputShape[heightIdx]} - 1) || col < 0 || col > ${
           inputShape[widthIdx]} - 1)) {
         return ${extrapolationValue};
@@ -350,14 +352,14 @@ const bilinearInterpolation =
         channel = u32(originalIndices[${channelIdx}]);
         batch = u32(originalIndices[${batchIdx}]);
       }
-      var x11: f32 = getInputValue(batch, channel, row1, col1);
-      var x12: f32 = getInputValue(batch, channel, row1, col2);
-      var x21: f32 = getInputValue(batch, channel, row2, col1);
-      var x22: f32 = getInputValue(batch, channel, row2, col2);
-      var dx1: f32 = row - f32(row1);
-      var dx2: f32 = f32(row2 ) - row;
-      var dy1 = col - f32(col1);
-      var dy2 = f32(col2) - col;
+      var x11: ${dType} = getInputValue(batch, channel, row1, col1);
+      var x12: ${dType} = getInputValue(batch, channel, row1, col2);
+      var x21: ${dType} = getInputValue(batch, channel, row2, col1);
+      var x22: ${dType} = getInputValue(batch, channel, row2, col2);
+      var dx1: ${dType} = row - ${dType}(row1);
+      var dx2: ${dType} = ${dType}(row2) - row;
+      var dy1 = col - ${dType}(col1);
+      var dy2 = ${dType}(col2) - col;
       return (x11 * dx2 * dy2 + x12 * dx2 * dy1 + x21 * dx1 * dy2 + x22 * dx1 * dy1);
     }`;
     };
@@ -367,24 +369,24 @@ const bicubicInterpolation =
      scales: readonly number[], roi: readonly number[], cubicCoeffA: number, useExtrapolation: boolean,
      extrapolationValue: number, excludeOutside: boolean): string => {
       const [heightIdx, widthIdx] = inputShape.length === 2 ? [0, 1] : (scales[1] === 1.0) ? [2, 3] : [1, 2];
-
+      const dType = input.type.value;
       const createCubicInterpolationFunction = (idx: number): string => {
         const direction = idx === heightIdx ? 'row' : 'col';
         return `
       fn ${direction}CubicInterpolation(inputIndices: ${input.type.indices}, outputIndices: ${
-            output.type.indices}) -> f32 {
+            output.type.indices}) -> ${dType} {
         var outputIndex = ${outputShape.length === 1 ? 'outputIndices' : `outputIndices[${idx}]`};
-        var originalIdx: f32 = getOriginalCoordinateFromResizedCoordinate(f32(outputIndex), ${scales[idx]},
-        f32(${outputShape[idx]}), f32(${inputShape[idx]}), ${roi[idx]}, ${roi[idx]} + ${inputShape.length});
-        var fractOriginalIdx: f32 = originalIdx - floor(originalIdx);
+        var originalIdx: ${dType} = getOriginalCoordinateFromResizedCoordinate(${dType}(outputIndex), ${scales[idx]},
+        ${dType}(${outputShape[idx]}), ${dType}(${inputShape[idx]}), ${roi[idx]}, ${roi[idx]} + ${inputShape.length});
+        var fractOriginalIdx: ${dType} = originalIdx - floor(originalIdx);
         var coefs = getCubicInterpolationCoefs(fractOriginalIdx);
 
         if (${useExtrapolation} && (originalIdx < 0 || originalIdx > (${inputShape[idx]} - 1))) {
           return ${extrapolationValue};
         }
-        var data: array<f32, 4> = array<f32, 4>(0.0, 0.0, 0.0, 0.0);
+        var data: array<${dType}, 4> = array<${dType}, 4>(0.0, 0.0, 0.0, 0.0);
         for (var i: i32 = -1; i < 3; i++) {
-          var ${direction}: f32 = originalIdx + f32(i);
+          var ${direction}: ${dType} = originalIdx + ${dType}(i);
           if (${direction} < 0 || ${direction} >= ${inputShape[idx]}) {
             if (${excludeOutside}) {
               coefs[i + 1] = 0.0;
@@ -407,12 +409,12 @@ const bicubicInterpolation =
       return `
     ${createCubicInterpolationFunction(heightIdx)};
     ${createCubicInterpolationFunction(widthIdx)};
-  fn getCubicInterpolationCoefs(s: f32) -> array<f32, 4> {
+  fn getCubicInterpolationCoefs(s: ${dType}) -> array<${dType}, 4> {
     var absS = abs(s);
-    var coeffs: array<f32, 4> = array<f32, 4>(0.0, 0.0, 0.0, 0.0);
-    var oneMinusAbsS: f32 = 1.0 - absS;
-    var twoMinusAbsS: f32 = 2.0 - absS;
-    var onePlusAbsS: f32 = 1.0 + absS;
+    var coeffs: array<${dType}, 4> = array<${dType}, 4>(0.0, 0.0, 0.0, 0.0);
+    var oneMinusAbsS: ${dType} = 1.0 - absS;
+    var twoMinusAbsS: ${dType} = 2.0 - absS;
+    var onePlusAbsS: ${dType} = 1.0 + absS;
     coeffs[0] = ((${cubicCoeffA} * onePlusAbsS - 5 * ${cubicCoeffA}) * onePlusAbsS + 8 * ${
           cubicCoeffA}) * onePlusAbsS - 4 * ${cubicCoeffA};
     coeffs[1] = ((${cubicCoeffA} + 2) * absS - (${cubicCoeffA} + 3)) * absS * absS + 1;
@@ -422,12 +424,12 @@ const bicubicInterpolation =
     return coeffs;
   }
 
-  fn cubicInterpolation1D(x: array<f32, 4>, coefs: array<f32, 4>) -> f32 {
-    var coefsSum: f32 = coefs[0] + coefs[1] + coefs[2] + coefs[3];
+  fn cubicInterpolation1D(x: array<${dType}, 4>, coefs: array<${dType}, 4>) -> ${dType} {
+    var coefsSum: ${dType} = coefs[0] + coefs[1] + coefs[2] + coefs[3];
     return (x[0] * coefs[0] + x[1] * coefs[1]+ x[2] * coefs[2]+ x[3] * coefs[3]) / coefsSum;
   }
 
-  fn bicubicInterpolation(outputIndices: ${output.type.indices}) -> f32 {
+  fn bicubicInterpolation(outputIndices: ${output.type.indices}) -> ${dType} {
     var inputIndices: ${input.type.indices} = outputIndices;
     return colCubicInterpolation(inputIndices, outputIndices);
   }
@@ -435,8 +437,8 @@ const bicubicInterpolation =
     };
 
 const createResizeProgramInfo =
-    (metadata: ProgramMetadata, inputTensor: TensorView, attributes: ResizeAttributes, opsetVersion: number,
-     scalesInput: readonly number[], sizes: readonly number[], roiInput: readonly number[]): ProgramInfo => {
+    (inputTensor: TensorView, attributes: ResizeAttributes, opsetVersion: number, scalesInput: readonly number[],
+     sizes: readonly number[], roiInput: readonly number[]): ProgramInfo => {
       const inputShape = inputTensor.dims;
       const roi = updateRoI(roiInput, attributes.axes, inputShape.length);
 
@@ -445,7 +447,7 @@ const createResizeProgramInfo =
       if (scalesInput.length === 0) {
         scales = inputShape.map((value, index) => value === 0 ? 1.0 : outputShape[index] / value);
         if (attributes.keepAspectRatioPolicy !== 'stretch') {
-          outputShape = adjustOutputShape(inputShape, outputShape, scales, attributes);
+          outputShape = adjustOutputShape(inputShape, scales, attributes);
         }
       }
       const output = outputVariable('output', inputTensor.dataType, outputShape);
@@ -453,14 +455,16 @@ const createResizeProgramInfo =
       const outputSize = ShapeUtil.size(outputShape);
       const noScale = inputShape.length === outputShape.length && inputShape.every((d, i) => d === outputShape[i]);
       const useExtrapolation = attributes.coordinateTransformMode === 'tf_crop_and_resize';
+      const dataType = input.type.value;
       const getShaderSource = (shaderHelper: ShaderHelper) => `
-      ${getOriginalCoordinateFromResizedCoordinate(attributes.coordinateTransformMode)};
+      ${noScale ? '' : `
+      ${getOriginalCoordinateFromResizedCoordinate(attributes.coordinateTransformMode, dataType)};
       ${(() => {
         switch (attributes.mode) {
           case 'nearest':
             return `
               ${checkInputIndices(input, inputShape)};
-              ${getNearestPixelFromOriginal(attributes.nearestMode, opsetVersion)};
+              ${getNearestPixelFromOriginal(attributes.nearestMode, opsetVersion, dataType)};
               ${
                 calculateInputIndicesFromOutputIndices(
                     input, output, inputShape, outputShape, scales, roi, useExtrapolation)};
@@ -470,7 +474,7 @@ const createResizeProgramInfo =
               ${calculateOriginalIndicesFromOutputIndices(output, inputShape, outputShape, scales, roi)};
               ${
                 bilinearInterpolation(
-                    input, output, inputShape, outputShape, scales, useExtrapolation, attributes.extrapolationValue)};
+                    input, output, inputShape, scales, useExtrapolation, attributes.extrapolationValue)};
               `;
           case 'cubic':
             return `
@@ -483,23 +487,22 @@ const createResizeProgramInfo =
             throw Error('Invalid resize mode');
         }
       })()};
+      `}
       ${shaderHelper.declareVariables(input, output)}
       ${shaderHelper.mainStart()}
         ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes(outputSize)}
-        if (${noScale}) {
-          output[global_idx] = input[global_idx];
-        } else {
-          let outputIndices = ${output.offsetToIndices('global_idx')};
-          var inputIndices: ${input.type.indices};
-          ${(() => {
+        ${noScale ? 'output[global_idx] = input[global_idx];' : `
+        let outputIndices = ${output.offsetToIndices('global_idx')};
+        var inputIndices: ${input.type.indices};
+        ${(() => {
         switch (attributes.mode) {
           case 'nearest':
             return `inputIndices = calculateInputIndicesFromOutputIndices(outputIndices);
-                  if (checkInputIndices(inputIndices)) {
-                    output[global_idx] = input[${input.indicesToOffset('inputIndices')}];
-                  } else {
-                    output[global_idx] = ${attributes.extrapolationValue};
-                  }`;
+                if (checkInputIndices(inputIndices)) {
+                  output[global_idx] = input[${input.indicesToOffset('inputIndices')}];
+                } else {
+                  output[global_idx] = ${attributes.extrapolationValue};
+                }`;
           case 'linear':
             return 'output[global_idx] = bilinearInterpolation(outputIndices);';
           case 'cubic':
@@ -508,30 +511,20 @@ const createResizeProgramInfo =
             throw Error(`Unsupported resize mode: ${attributes.mode}`);
         }
       })()};
-        }
+        `}
       }`;
 
       return {
-        ...metadata,
-        getShaderSource,
-        outputs: [{dims: outputShape, dataType: inputTensor.dataType, gpuDataType: GpuDataType.default}],
-        dispatchGroup: () => ({x: Math.ceil(outputSize / 64 /* workgroup size */)})
-      };
-    };
-
-export const createResizeProgramInfoLoader =
-    (input: TensorView, attributes: ResizeAttributes, opsetVersion: number, scales: readonly number[],
-     sizes: readonly number[], roi: readonly number[]): ProgramInfoLoader => {
-      const metadata: ProgramMetadata = {
         name: 'Resize',
-        inputTypes: [GpuDataType.default],
-        cacheHint: attributes.cacheKey + opsetVersion.toString() +
-            (scales.length > 0 ? '_scales_' + scales.toString() : '') +
-            (sizes.length > 0 ? '_sizes_' + sizes.toString() : ''),
-      };
-      return {
-        ...metadata,
-        get: () => createResizeProgramInfo(metadata, input, attributes, opsetVersion, scales, sizes, roi)
+        shaderCache: {
+          hint: `${attributes.cacheKey}|${opsetVersion}|${scales.length > 0 ? scales : ''}|${
+              sizes.length > 0 ? sizes : ''}|${noScale}`
+        },
+        getShaderSource,
+        getRunData: () => ({
+          outputs: [{dims: outputShape, dataType: inputTensor.dataType}],
+          dispatchGroup: {x: Math.ceil(outputSize / 64 /* workgroup size */)}
+        })
       };
     };
 
@@ -549,7 +542,7 @@ export const resize = (context: ComputeContext, attributes: ResizeAttributes): v
   const opsetVersion = getOpsetVersionFromCustomDataBuffer(context);
   validateInputs(context.inputs, attributes, opsetVersion, scales, sizes, roi);
   context.compute(
-      createResizeProgramInfoLoader(context.inputs[0], attributes, opsetVersion, scales, sizes, roi), {inputs: [0]});
+      createResizeProgramInfo(context.inputs[0], attributes, opsetVersion, scales, sizes, roi), {inputs: [0]});
 };
 
 export const parseResizeAttributes = (attributes: Record<string, unknown>): ResizeAttributes => {
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/skip-layer-norm.ts b/js/web/lib/wasm/jsep/webgpu/ops/skip-layer-norm.ts
index 7bfdd73b8af18..7e500f865c19b 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/skip-layer-norm.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/skip-layer-norm.ts
@@ -5,9 +5,9 @@ import {DataType} from '../../../wasm-common';
 import {TensorView} from '../../tensor-view';
 import {ShapeUtil} from '../../util';
 import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key';
-import {ComputeContext, GpuDataType, ProgramInfo, ProgramInfoLoader, ProgramMetadata} from '../types';
+import {ComputeContext, ProgramInfo} from '../types';
 
-import {ShaderHelper, tensorTypeToWsglStorageType} from './common';
+import {castToF32, fillVector, getMaxComponents, inputVariable, outputVariable, ShaderHelper, sumVector, tensorTypeToWsglStorageType,} from './common';
 
 export interface SkipLayerNormAttributes extends AttributeWithCacheKey {
   epsilon: number;
@@ -18,9 +18,6 @@ const validateInputs = (inputs: readonly TensorView[]): void => {
     throw new Error('layerNorm requires at least 3 inputs.');
   }
 
-  if (inputs[0].dataType !== DataType.float || inputs[1].dataType !== DataType.float) {
-    throw new Error('inputs should be float type');
-  }
   const input: TensorView = inputs[0];
   const skip: TensorView = inputs[1];
   const gamma: TensorView = inputs[2];
@@ -74,98 +71,91 @@ const validateInputs = (inputs: readonly TensorView[]): void => {
 };
 
 const createSkipLayerNormProgramInfo =
-    (metadata: ProgramMetadata, inputs: readonly TensorView[], attributes: SkipLayerNormAttributes, outputCount: number,
-     isTraining: boolean): ProgramInfo => {
-      const inputShape = inputs[0].dims;
-      const inputSize = ShapeUtil.size(inputShape);
-      const outputShape = inputShape;
-      const outputSize = inputSize;
-      const hiddenSize = inputShape.slice(-1)[0];
-      const meanInvStdDevDim = isTraining ? inputShape.slice(0, -1).concat(1) : [];
-      const hasBetaInput = inputs.length > 3;
-      const hasBiasInput = inputs.length > 4;
-      const dataType = tensorTypeToWsglStorageType(inputs[0].dataType);
-      const hasMeanOutput = isTraining && outputCount > 1;
-      const hasInvStdDevOutput = isTraining && outputCount > 2;
-      const hasInputSkipBiasSumOutput = outputCount > 3;
-      let bindingNumber = 0;
-      const getShaderSource = (shaderHelper: ShaderHelper) => `
-      const hiddenSize: u32 = ${hiddenSize};
+    (inputs: readonly TensorView[], attributes: SkipLayerNormAttributes, outputCount: number, isTraining: boolean):
+        ProgramInfo => {
+          const inputShape = inputs[0].dims;
+          const inputSize = ShapeUtil.size(inputShape);
+          const outputShape = inputShape;
+          const outputSize = inputSize;
+          const hiddenSize = inputShape.slice(-1)[0];
+          const meanInvStdDevDim = isTraining ? inputShape.slice(0, -1).concat(1) : [];
+          const hasBetaInput = inputs.length > 3;
+          const hasBiasInput = inputs.length > 4;
+          const hasMeanOutput = isTraining && outputCount > 1;
+          const hasInvStdDevOutput = isTraining && outputCount > 2;
+          const hasInputSkipBiasSumOutput = outputCount > 3;
+
+          const components = getMaxComponents(hiddenSize);
+          const variables = [
+            inputVariable('x', inputs[0].dataType, inputs[0].dims, components),
+            inputVariable('skip', inputs[1].dataType, inputs[1].dims, components),
+            inputVariable('gamma', inputs[2].dataType, inputs[2].dims, components),
+          ];
+          if (hasBetaInput) {
+            variables.push(inputVariable('beta', inputs[3].dataType, inputs[3].dims, components));
+          }
+          if (hasBiasInput) {
+            variables.push(inputVariable('bias', inputs[4].dataType, inputs[4].dims, components));
+          }
+          variables.push(outputVariable('output', inputs[0].dataType, outputShape, components));
+          if (hasMeanOutput) {
+            variables.push(outputVariable('meanOutput', DataType.float, meanInvStdDevDim));
+          }
+          if (hasInvStdDevOutput) {
+            variables.push(outputVariable('invStdOutput', DataType.float, meanInvStdDevDim));
+          }
+          if (hasInputSkipBiasSumOutput) {
+            variables.push(outputVariable('inputSkipBiasSum', inputs[0].dataType, outputShape, components));
+          }
+          const dataType = tensorTypeToWsglStorageType(inputs[0].dataType);
+          const getShaderSource = (shaderHelper: ShaderHelper) => `
+      const hiddenSize: f32 = ${hiddenSize};
+      const hiddenSizeVectorized: u32 = ${hiddenSize / components};
       const epsilon: f32 = ${attributes.epsilon};
 
-      @group(0) @binding(${bindingNumber++}) var<storage, read> x : array<${dataType}>;
-      @group(0) @binding(${bindingNumber++}) var<storage, read> skip : array<${dataType}>;
-      @group(0) @binding(${bindingNumber++}) var<storage, read> gamma : array<${dataType}>;
-      ${hasBetaInput ? `@group(0) @binding(${bindingNumber++}) var<storage, read> beta : array<${dataType}>;` : ''}
-      ${hasBiasInput ? `@group(0) @binding(${bindingNumber++}) var<storage, read> bias : array<${dataType}>;` : ''}
-      @group(0) @binding(${bindingNumber++}) var<storage, read_write> output : array<${dataType}>;
-      ${
-          hasMeanOutput ?
-              `@group(0) @binding(${bindingNumber++}) var<storage, read_write> meanOutput : array<${dataType}>;` :
-              ''}
-      ${
-          hasInvStdDevOutput ?
-              `@group(0) @binding(${bindingNumber++}) var<storage, read_write> invStdOutput : array<${dataType}>;` :
-              ''}
-      ${
-          hasInputSkipBiasSumOutput ?
-              `@group(0) @binding(${bindingNumber++}) var<storage, read_write> inputSkipBiasSum : array<${dataType}>;` :
-              ''}
+      ${shaderHelper.declareVariables(...variables)}
 
       ${shaderHelper.mainStart()}
         ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes(outputSize / hiddenSize)}
-        let offset = global_idx * hiddenSize;
-        var sum: f32 = 0.0;
-        var squareSum: f32 = 0.0;
-        for (var i: u32 = 0; i < hiddenSize; i++) {
+        let offset = global_idx * hiddenSizeVectorized;
+        var sum = ${fillVector('f32', components)};
+        var squareSum = ${fillVector('f32', components)};
+        for (var i: u32 = 0; i < hiddenSizeVectorized; i++) {
           let skipValue = skip[offset + i];
           let biasValue = ${hasBiasInput ? 'bias[i]' : '0.0'};
           let inputValue = x[offset + i];
           let value = inputValue + skipValue + biasValue;
           ${hasInputSkipBiasSumOutput ? 'inputSkipBiasSum[offset + i] = value;' : ''}
           output[offset + i] = value;
-          sum += value;
-          squareSum += value * value;
+          let f32Value = ${castToF32(dataType, components, 'value')};
+          sum += f32Value;
+          squareSum += f32Value * f32Value;
         }
-        let mean: f32 = sum / f32(hiddenSize);
-        let variance: f32 = sqrt(squareSum / f32(hiddenSize) - mean * mean + epsilon);
+        let mean = ${sumVector('sum', components)} / hiddenSize;
+        let variance = sqrt(${sumVector('squareSum', components)} / hiddenSize - mean * mean + epsilon);
         ${hasMeanOutput ? 'meanOutput[global_idx] = mean;' : ''}
         ${hasInvStdDevOutput ? 'invStdOutput[global_idx] = 1.0 / variance;' : ''}
-        for (var i: u32 = 0; i < hiddenSize; i++) {
-          output[offset + i] = (output[offset + i] - mean) / variance * gamma[i] + ${hasBetaInput ? 'beta[i]' : '0.0'};
+        for (var i: u32 = 0; i < hiddenSizeVectorized; i++) {
+          output[offset + i] = (output[offset + i] - ${dataType}(mean)) / ${dataType}(variance) * gamma[i]
+           + ${hasBetaInput ? 'beta[i]' : '0.0'};
         }
       }`;
-      const outputs = [{dims: outputShape, dataType: inputs[0].dataType, gpuDataType: GpuDataType.default}];
-      if (outputCount > 1) {
-        outputs.push({dims: meanInvStdDevDim, dataType: inputs[0].dataType, gpuDataType: GpuDataType.default});
-      }
-      if (outputCount > 2) {
-        outputs.push({dims: meanInvStdDevDim, dataType: inputs[0].dataType, gpuDataType: GpuDataType.default});
-      }
-      if (outputCount > 3) {
-        outputs.push({dims: inputShape, dataType: inputs[0].dataType, gpuDataType: GpuDataType.default});
-      }
-
-      return {
-        ...metadata,
-        getShaderSource,
-        outputs,
-        dispatchGroup: () => ({x: Math.ceil(outputSize / hiddenSize / 64)})
-      };
-    };
-
-const createSkipLayerNormProgramInfoLoader =
-    (inputs: readonly TensorView[], attributes: SkipLayerNormAttributes, outputCount: number, isTraining: boolean):
-        ProgramInfoLoader => {
-          const inputTypes = new Array(inputs.length).fill(GpuDataType.default);
-          const metadata: ProgramMetadata = {
-            name: 'SkipLayerNormalization',
-            inputTypes,
-            cacheHint: attributes.cacheKey,
-          };
+          const outputs = [{dims: outputShape, dataType: inputs[0].dataType}];
+          if (outputCount > 1) {
+            outputs.push({dims: meanInvStdDevDim, dataType: DataType.float});
+          }
+          if (outputCount > 2) {
+            outputs.push({dims: meanInvStdDevDim, dataType: DataType.float});
+          }
+          if (outputCount > 3) {
+            outputs.push({dims: inputShape, dataType: inputs[0].dataType});
+          }
+
           return {
-            ...metadata,
-            get: () => createSkipLayerNormProgramInfo(metadata, inputs, attributes, outputCount, isTraining)
+            name: 'SkipLayerNormalization',
+            shaderCache: {hint: attributes.cacheKey},
+            getShaderSource,
+            getRunData: () => ({outputs, dispatchGroup: {x: Math.ceil(outputSize / hiddenSize / 64)}}),
           };
         };
 
@@ -186,7 +176,7 @@ export const skipLayerNorm = (context: ComputeContext, attributes: SkipLayerNorm
     outputs.push(3);
   }
   context.compute(
-      createSkipLayerNormProgramInfoLoader(context.inputs, attributes, context.outputCount, isTraining), {outputs});
+      createSkipLayerNormProgramInfo(context.inputs, attributes, context.outputCount, isTraining), {outputs});
 };
 
 export const parseSkipLayerNormAttributes = (attributes: Record<string, unknown>): SkipLayerNormAttributes => {
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/slice.ts b/js/web/lib/wasm/jsep/webgpu/ops/slice.ts
index 257b9ebc1fdac..7458579bf4340 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/slice.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/slice.ts
@@ -5,9 +5,9 @@ import {DataType} from '../../../wasm-common';
 import {TensorView} from '../../tensor-view';
 import {ShapeUtil} from '../../util';
 import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key';
-import {ComputeContext, GpuDataType, ProgramInfo, ProgramInfoLoader, ProgramMetadata, TensorInfo} from '../types';
+import {ComputeContext, ProgramInfo, ProgramUniform, TensorInfo} from '../types';
 
-import {IndicesHelper, inputVariable, outputVariable, ShaderHelper} from './common';
+import {createTensorShapeVariables, enableShapesUniforms, IndicesHelper, inputVariable, outputVariable, ShaderHelper, UniformsArrayType} from './common';
 
 export interface SliceAttributes extends AttributeWithCacheKey {
   readonly starts: number[];
@@ -77,118 +77,147 @@ const fixStartEndValues =
         };
 
 const calculateInputIndicesImpl =
-    (input: IndicesHelper, output: IndicesHelper, inputShape: readonly number[], outputShape: readonly number[]):
-        string => `fn calculateInputIndices(outputIndices: ${output.type.indices}) -> ${input.type.indices} {
+    (input: IndicesHelper, output: IndicesHelper, inputShape: readonly number[], outputShape: readonly number[],
+     enableInputShapeUniforms: boolean): string =>
+        `fn calculateInputIndices(outputIndices: ${output.type.indices}) -> ${input.type.indices} {
           var inputIndices: ${input.type.indices};
           var carry = 0u;
           for (var i = ${inputShape.length}; i >= 0; i--) {
+            let input_shape_i = ${
+            enableInputShapeUniforms ? `uniforms.input_shape${inputShape.length > 1 ? '[i]' : ''}` : 'inputShape[i]'};
+            let steps_i  = ${
+            enableInputShapeUniforms ? `uniforms.steps${inputShape.length > 1 ? '[i]' : ''}` : 'steps[i]'};
+            let signs_i  = ${
+            enableInputShapeUniforms ? `uniforms.signs${inputShape.length > 1 ? '[i]' : ''}` : 'signs[i]'};
+            let starts_i  = ${
+            enableInputShapeUniforms ? `uniforms.starts${inputShape.length > 1 ? '[i]' : ''}` : 'starts[i]'};
             var outputIndex = ${outputShape.length === 1 ? 'outputIndices' : 'outputIndices[i]'};
-            var inputIndex = outputIndex * steps[i] + starts[i] + carry;
-            carry = inputIndex / inputShape[i];
-            inputIndex = inputIndex % inputShape[i];
-            if (signs[i] < 0) {
-              inputIndex = inputShape[i] - inputIndex - 1u + starts[i];
+            var inputIndex = outputIndex * steps_i + starts_i + carry;
+            carry = inputIndex / input_shape_i;
+            inputIndex = inputIndex % input_shape_i;
+            if (signs_i < 0) {
+              inputIndex = input_shape_i - inputIndex - 1u + starts_i;
             }
             ${inputShape.length === 1 ? 'inputIndices' : 'inputIndices[i]'} = inputIndex;
           }
           return inputIndices;
       }`;
 
-const createSliceProgramInfo =
-    (metadata: ProgramMetadata, inputs: readonly TensorView[], attributes: SliceAttributes): ProgramInfo => {
-      const inputShape = inputs[0].dims;
-      const inputSize = ShapeUtil.size(inputShape);
-      const axes = (attributes.axes.length > 0) ? ShapeUtil.normalizeAxes(attributes.axes, inputShape.length) :
-                                                  [...Array(inputShape.length).keys()];
-      let steps = readInput(inputs, 4);
-      steps.forEach((step) => step !== 0 || (() => {
-                                throw new Error('step cannot be 0');
-                              }));
-      if (steps.length === 0) {
-        steps = Array(axes.length).fill(1);
-      }
-      const starts = attributes.starts.map((start, i) => fixStartEndValues(start, i, inputShape, axes, steps));
+const createSliceProgramInfo = (inputs: readonly TensorView[], attributes: SliceAttributes): ProgramInfo => {
+  const inputShape = inputs[0].dims;
+  const inputSize = ShapeUtil.size(inputShape);
+  const axes = (attributes.axes.length > 0) ? ShapeUtil.normalizeAxes(attributes.axes, inputShape.length) :
+                                              [...Array(inputShape.length).keys()];
+  let steps = readInput(inputs, 4);
+  steps.forEach((step) => step !== 0 || (() => {
+                            throw new Error('step cannot be 0');
+                          }));
+  if (steps.length === 0) {
+    steps = Array(axes.length).fill(1);
+  }
+  const starts = attributes.starts.map((start, i) => fixStartEndValues(start, i, inputShape, axes, steps));
 
-      const ends = attributes.ends.map((end, i) => fixStartEndValues(end, i, inputShape, axes, steps));
+  const ends = attributes.ends.map((end, i) => fixStartEndValues(end, i, inputShape, axes, steps));
 
-      if (axes.length !== inputShape.length) {
-        for (let i = 0; i < inputShape.length; ++i) {
-          if (!axes.includes(i)) {
-            starts.splice(i, 0, 0);
-            ends.splice(i, 0, inputShape[i]);
-            steps.splice(i, 0, 1);
-          }
-        }
-      }
-      const signs = steps.map(step => Math.sign(step));
-      // Convert negative steps to positive steps and reverse starts and ends
-      steps.forEach((step, i, array) => {
-        if (step < 0) {
-          const numSteps = (ends[i] - starts[i]) / step;
-          const newEnd = starts[i];
-          const newStart = newEnd + numSteps * steps[i];
-          starts[i] = newStart;
-          ends[i] = newEnd;
-          array[i] = -step;
-        }
-      });
-
-      const outputShape = inputShape.slice(0);
-      axes.forEach((axis, _) => {
-        outputShape[axis] = Math.ceil((ends[axis] - starts[axis]) / steps[axis]);
-      });
+  if (axes.length !== starts.length || axes.length !== ends.length) {
+    throw new Error('start, ends and axes should have the same number of elements');
+  }
 
-      const outputTensorInfo:
-          TensorInfo = {dims: outputShape, dataType: inputs[0].dataType, gpuDataType: GpuDataType.default};
+  if (axes.length !== inputShape.length) {
+    for (let i = 0; i < inputShape.length; ++i) {
+      if (!axes.includes(i)) {
+        starts.splice(i, 0, 0);
+        ends.splice(i, 0, inputShape[i]);
+        steps.splice(i, 0, 1);
+      }
+    }
+  }
+  const signs = steps.map(step => Math.sign(step));
+  // Convert negative steps to positive steps and reverse starts and ends
+  steps.forEach((step, i, array) => {
+    if (step < 0) {
+      const numSteps = (ends[i] - starts[i]) / step;
+      const newEnd = starts[i];
+      const newStart = newEnd + numSteps * steps[i];
+      starts[i] = newStart;
+      ends[i] = newEnd;
+      array[i] = -step;
+    }
+  });
+  // Output rank is expected to be less than or equal to the input rank.
+  const enableShapeUniforms = enableShapesUniforms(inputs[0].dims.length);
+  const inputShapeOrRank = enableShapeUniforms ? inputs[0].dims.length : inputs[0].dims;
 
-      const output = outputVariable('output', inputs[0].dataType, outputShape);
-      const input = inputVariable('input', inputs[0].dataType, inputShape);
-      const outputSize = ShapeUtil.size(outputShape);
+  const outputShape = inputShape.slice(0);
+  axes.forEach((axis, _) => {
+    outputShape[axis] = Math.ceil((ends[axis] - starts[axis]) / steps[axis]);
+  });
+  const outputShapeOrRank = enableShapeUniforms ? outputShape.length : outputShape;
+
+  const outputTensorInfo: TensorInfo = {dims: outputShape, dataType: inputs[0].dataType};
+
+  const output = outputVariable('output', inputs[0].dataType, outputShapeOrRank);
+  const input = inputVariable('input', inputs[0].dataType, inputShapeOrRank);
+  const outputSize = ShapeUtil.size(outputShape);
+  const programUniforms: ProgramUniform[] = [];
+  const uniforms: UniformsArrayType = [];
+  if (enableShapeUniforms) {
+    uniforms.push({name: 'starts', type: starts.length > 1 ? `vec${starts.length}<u32>` : 'u32'});
+    uniforms.push({name: 'signs', type: signs.length > 1 ? `vec${signs.length}<i32>` : 'i32'});
+    uniforms.push({name: 'steps', type: steps.length > 1 ? `vec${steps.length}<u32>` : 'u32'});
+    programUniforms.push({type: 'uint32', data: starts});
+    programUniforms.push({type: 'int32', data: signs});
+    programUniforms.push({type: 'uint32', data: steps});
+  }
+  uniforms.push({name: 'outputSize', type: 'u32'});
+  programUniforms.push({type: 'uint32', data: outputSize});
+  if (enableShapeUniforms) {
+    programUniforms.push(...createTensorShapeVariables(inputs[0].dims));
+    programUniforms.push(...createTensorShapeVariables(outputShape));
+  }
 
-      const getShaderSource = (shaderHelper: ShaderHelper) => `
-      ${shaderHelper.declareVariables(input, output)}
-        const signs = array<i32, ${signs.length}>(${signs.map(i => `${i}i`).join(',')});
-        const starts = array<u32, ${starts.length}>(${starts.map(i => `${i}u`).join(',')});
-        const ends = array<u32, ${ends.length}>(${ends.map(i => `${i}u`).join(',')});
-        const steps = array<u32, ${steps.length}>(${steps.map(i => `${i}u`).join(',')});
-        const inputShape = array<u32, ${inputShape.length}>(${inputShape.map(i => `${i}u`).join(',')});
+  const getShaderSource = (shaderHelper: ShaderHelper) => `
+      ${shaderHelper.registerUniforms(uniforms).declareVariables(input, output)}
+        ${enableShapeUniforms ? '' : [
+    `const signs = array<i32, ${signs.length}>(${signs.map(i => `${i}i`).join(',')});`,
+    `const starts = array<u32, ${starts.length}>(${starts.map(i => `${i}u`).join(',')});`,
+    `const steps = array<u32, ${steps.length}>(${steps.map(i => `${i}u`).join(',')});`,
+    `const inputShape = array<u32, ${inputShape.length}>(${inputShape.map(i => `${i}u`).join(',')});`
+  ].join('\n')}
 
-        ${calculateInputIndicesImpl(input, output, inputShape, outputShape)}
+        ${calculateInputIndicesImpl(input, output, inputShape, outputShape, enableShapeUniforms)}
         ${shaderHelper.mainStart()}
-          ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes(outputSize)}
+          ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes('uniforms.outputSize')}
           let outputIndices = ${output.offsetToIndices('global_idx')};
           let inputIndices = calculateInputIndices(outputIndices);
           ${output.setByOffset('global_idx', input.getByIndices('inputIndices'))}
       }`;
-      return {
-        ...metadata,
-        getShaderSource,
-        outputs: [outputTensorInfo],
-        dispatchGroup: () => ({x: Math.ceil(inputSize / 64 /* workgroup size */)})
-      };
-    };
-
-const createSliceProgramInfoLoader =
-    (inputs: readonly TensorView[], attributes: SliceAttributes): ProgramInfoLoader => {
-      const updatedAttributes = createSliceAttributesFromInputs(inputs, attributes);
-      const metadata: ProgramMetadata = {
-        name: 'Slice',
-        inputTypes: [GpuDataType.default],
-        cacheHint: updatedAttributes.cacheKey + (inputs.length > 4 ? 'steps_' + inputs[4].dims.toString() : '')
-      };
-      return {...metadata, get: () => createSliceProgramInfo(metadata, inputs, updatedAttributes)};
-    };
+  return {
+    name: 'Slice',
+    shaderCache: {
+      hint: enableShapeUniforms ? `${signs.length}_${starts.length}_${steps.length}` :
+                                  `${attributes.cacheKey} | ${inputs[4]?.dims ?? ''}`,
+      inputDependencies: [enableShapeUniforms ? 'rank' : 'dims']
+    },
+    getShaderSource,
+    getRunData: () => ({
+      outputs: [outputTensorInfo],
+      dispatchGroup: {x: Math.ceil(inputSize / 64 /* workgroup size */)},
+      programUniforms
+    })
+  };
+};
 
 export const slice = (context: ComputeContext, attributes: SliceAttributes): void => {
   validateInputs(context.inputs, attributes);
-  const programInfoLoader = createSliceProgramInfoLoader(context.inputs, attributes);
-  const program = programInfoLoader.get();
-  if (ShapeUtil.size(program.outputs[0].dims) > 0) {
-    context.compute(programInfoLoader, {inputs: [0]});
-  } else {
-    // TODO: support empty output
-    throw new Error('slice: output size is 0');
-  }
+  const updatedAttributes = createSliceAttributesFromInputs(context.inputs, attributes);
+  context.compute(createSliceProgramInfo(context.inputs, updatedAttributes), {inputs: [0]});
+  // if (ShapeUtil.size(program.outputs[0].dims) > 0) {
+  //   context.compute(programInfoLoader, {inputs: [0]});
+  // } else {
+  //   // TODO: support empty output
+  //   throw new Error('slice: output size is 0');
+  // }
 };
 
 export const parseSliceAttributes = (attributes: Record<string, unknown>): SliceAttributes => {
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/softmax.ts b/js/web/lib/wasm/jsep/webgpu/ops/softmax.ts
index 495a4bcea4f47..378a7e738dac9 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/softmax.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/softmax.ts
@@ -8,9 +8,9 @@
 import {TensorView} from '../../tensor-view';
 import {ShapeUtil} from '../../util';
 import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key';
-import {ComputeContext, GpuDataType, ProgramInfo} from '../types';
+import {ComputeContext, ProgramInfo} from '../types';
 
-import {ShaderHelper, tensorTypeToWsglStorageType} from './common';
+import {getMaxComponents, inputVariable, outputVariable, ShaderHelper, sumVector, tensorTypeToWsglStorageType} from './common';
 
 const validateInputs = (inputs: readonly TensorView[]): void => {
   if (!inputs || inputs.length !== 1) {
@@ -22,14 +22,7 @@ export interface SoftmaxAttributes extends AttributeWithCacheKey {
   readonly axis: number;
 }
 
-export const softmaxProgramMetadata = {
-  name: 'Softmax',
-  inputTypes: [GpuDataType.default]
-};
-
-
 const createSoftmaxProgramInfo = (input: TensorView, attributes: SoftmaxAttributes): ProgramInfo => {
-  const dataType = tensorTypeToWsglStorageType(input.dataType);
   const shape = input.dims;
   const outputSize = ShapeUtil.size(shape);
   const WG = 64;
@@ -43,35 +36,49 @@ const createSoftmaxProgramInfo = (input: TensorView, attributes: SoftmaxAttribut
 
   const cols = shape[axis];
   const rows = outputSize / cols;
-
+  const components = getMaxComponents(cols);
+  const packedCols = cols / components;
+
+  const maxVector = (name: string, components: number) => {
+    if (components === 4) {
+      return `max(max(${name}.x, ${name}.y), max(${name}.z, ${name}.w))`;
+    } else if (components === 2) {
+      return `max(${name}.x, ${name}.y)`;
+    } else if (components === 3) {
+      return `max(max(${name}.x, ${name}.y), ${name}.z)`;
+    }
+
+    return name;
+  };
+  const x = inputVariable('x', input.dataType, input.dims, components);
+  const output = outputVariable('result', input.dataType, input.dims, components);
+  const valueType = x.type.value;
   // 6.2.4 in wgsl spec
-  const threadMaxDecl = dataType === 'f32' ? 'var threadMax: f32 = -3.402823e+38f;' : 'var threadMax: f16 = -65504.0h;';
-  const getShaderSource = (_shaderHelper: ShaderHelper) => `
-      var<workgroup> rowMaxShared : ${dataType};
-      var<workgroup> rowSumShared : ${dataType};
-      var<workgroup> threadShared : array<${dataType}, ${WG}>;
-
-      @group(0) @binding(0) var<storage, read> x : array<${dataType}>;
-      @group(0) @binding(1) var<storage, read_write> result : array<${dataType}>;
-
-      fn getValue(row: i32, col: i32, row_stride: i32) -> ${dataType} {
+  const threadMaxDecl = tensorTypeToWsglStorageType(input.dataType) === 'f32' ?
+      `var threadMax = ${valueType}(-3.402823e+38f);` :
+      `var threadMax = ${valueType}(-65504.0h);`;
+  const getShaderSource = (shaderHelper: ShaderHelper) => `
+      var<workgroup> rowMaxShared : ${valueType};
+      var<workgroup> rowSumShared : ${valueType};
+      var<workgroup> threadShared : array<${valueType}, ${WG}>;
+
+      fn getValue(row: i32, col: i32, row_stride: i32) -> ${valueType} {
         let index = row * row_stride + col;
         return x[index];
       }
 
-      fn setValue(row: i32, col: i32, row_stride: i32, value: ${dataType}) {
+      fn setValue(row: i32, col: i32, row_stride: i32, value: ${valueType}) {
         let index = row * row_stride + col;
         result[index] = value;
       }
-
-      @compute @workgroup_size(${WG}, 1, 1)
-      fn main(@builtin(local_invocation_id) local_id : vec3<u32>, @builtin(global_invocation_id) global_id : vec3u) {
+      ${shaderHelper.registerUniform('packedCols', 'i32').declareVariables(x, output)}
+      ${shaderHelper.mainStart()}
         let gindex = i32(global_id.x);
         let lindex = i32(local_id.x);
         const wg = ${WG};
         let row = gindex / wg;
-        let cols = ${cols};
-        let row_stride : i32 = ${cols};
+        let cols = uniforms.packedCols;
+        let row_stride : i32 = uniforms.packedCols;
 
         // find the rows max
         ${threadMaxDecl}
@@ -93,12 +100,12 @@ const createSoftmaxProgramInfo = (input: TensorView, attributes: SoftmaxAttribut
           workgroupBarrier();
         }
         if (lindex == 0) {
-          rowMaxShared = threadShared[0];
+          rowMaxShared = ${valueType}(${maxVector('threadShared[0]', components)});
         }
         workgroupBarrier();
 
         // find the rows sum
-        var threadSum: ${dataType} = 0.0;
+        var threadSum = ${valueType}(0.0);
         for (var col = lindex; col < cols; col += wg) {
           let subExp = exp(getValue(row, col, row_stride) - rowMaxShared);
           threadSum += subExp;
@@ -113,7 +120,7 @@ const createSoftmaxProgramInfo = (input: TensorView, attributes: SoftmaxAttribut
           workgroupBarrier();
         }
         if (lindex == 0) {
-          rowSumShared = threadShared[0];
+          rowSumShared = ${valueType}(${sumVector('threadShared[0]', components)});
         }
         workgroupBarrier();
 
@@ -124,21 +131,20 @@ const createSoftmaxProgramInfo = (input: TensorView, attributes: SoftmaxAttribut
         }
       }`;
   return {
-    ...softmaxProgramMetadata,
-    outputs: [{dims: shape, dataType: input.dataType, gpuDataType: GpuDataType.default}],
+    name: 'Softmax',
+    shaderCache: {hint: `${components}`, inputDependencies: ['type']},
+    getRunData: () => ({
+      outputs: [{dims: shape, dataType: input.dataType}],
+      dispatchGroup: {x: rows},
+      programUniforms: [{type: 'uint32', data: packedCols}]
+    }),
     getShaderSource,
-    dispatchGroup: () => ({x: rows})
   };
 };
 
-
 export const softmax = (context: ComputeContext, attributes: SoftmaxAttributes): void => {
   validateInputs(context.inputs);
-  context.compute({
-    ...softmaxProgramMetadata,
-    cacheHint: attributes.cacheKey,
-    get: () => createSoftmaxProgramInfo(context.inputs[0], attributes)
-  });
+  context.compute(createSoftmaxProgramInfo(context.inputs[0], attributes));
 };
 
 export const parseSoftmaxAttributes = (attributes: Record<string, unknown>): SoftmaxAttributes =>
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/split.ts b/js/web/lib/wasm/jsep/webgpu/ops/split.ts
index 3367091bbac23..fd60d81b87ae1 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/split.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/split.ts
@@ -4,7 +4,7 @@
 import {TensorView} from '../../tensor-view';
 import {ShapeUtil} from '../../util';
 import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key';
-import {ComputeContext, GpuDataType, ProgramInfo, ProgramInfoLoader, ProgramMetadata, TensorInfo} from '../types';
+import {ComputeContext, ProgramInfo, TensorInfo} from '../types';
 
 import {IndicesHelper, inputVariable, outputVariable, ShaderHelper} from './common';
 
@@ -61,31 +61,30 @@ const writeBufferDataImpl = (outputs: readonly IndicesHelper[]) => {
       }`;
 };
 
-const createSplitProgramInfo =
-    (metadata: ProgramMetadata, inputs: readonly TensorView[], attributes: SplitAttributes): ProgramInfo => {
-      const inputShape = inputs[0].dims;
-      const inputSize = ShapeUtil.size(inputShape);
-      const dataType = inputs[0].dataType;
-      const rank = inputShape.length;
-      const axis = attributes.axis;
-      const adjustedAxis = (axis < 0) ? inputShape.length + axis : axis;
-      const outputs = new Array<IndicesHelper>(attributes.numOutputs);
-      const input = inputVariable('input', dataType, inputShape);
-      const sizeInConcatAxis = new Array<number>(attributes.numOutputs);
-      const outputsTensorInfo: TensorInfo[] = [];
-      const outputShapes: number[][] = [];
-      let previousSum = 0;
-      for (let i = 0; i < attributes.numOutputs; i++) {
-        previousSum += attributes.splitSizes[i];
-        sizeInConcatAxis[i] = previousSum;
-        const outputShape = inputShape.slice();
-        outputShape[attributes.axis] = attributes.splitSizes[i];
-        outputShapes.push(outputShape);
-        outputs[i] = outputVariable(`output${i}`, dataType, outputShapes[i]);
-        outputsTensorInfo.push({dims: outputShapes[i], dataType: inputs[0].dataType, gpuDataType: GpuDataType.default});
-      }
-      const indicesAxis = rank < 2 ? 'indices' : `indices[${adjustedAxis}]`;
-      const getShaderSource = (shaderHelper: ShaderHelper) => `
+const createSplitProgramInfo = (inputs: readonly TensorView[], attributes: SplitAttributes): ProgramInfo => {
+  const inputShape = inputs[0].dims;
+  const inputSize = ShapeUtil.size(inputShape);
+  const dataType = inputs[0].dataType;
+  const rank = inputShape.length;
+  const axis = attributes.axis;
+  const adjustedAxis = (axis < 0) ? inputShape.length + axis : axis;
+  const outputs = new Array<IndicesHelper>(attributes.numOutputs);
+  const input = inputVariable('input', dataType, inputShape);
+  const sizeInConcatAxis = new Array<number>(attributes.numOutputs);
+  const outputsTensorInfo: TensorInfo[] = [];
+  const outputShapes: number[][] = [];
+  let previousSum = 0;
+  for (let i = 0; i < attributes.numOutputs; i++) {
+    previousSum += attributes.splitSizes[i];
+    sizeInConcatAxis[i] = previousSum;
+    const outputShape = inputShape.slice();
+    outputShape[attributes.axis] = attributes.splitSizes[i];
+    outputShapes.push(outputShape);
+    outputs[i] = outputVariable(`output${i}`, dataType, outputShapes[i]);
+    outputsTensorInfo.push({dims: outputShapes[i], dataType: inputs[0].dataType});
+  }
+  const indicesAxis = rank < 2 ? 'indices' : `indices[${adjustedAxis}]`;
+  const getShaderSource = (shaderHelper: ShaderHelper) => `
   ${shaderHelper.declareVariables(input, ...outputs)}
   const sizeInConcatAxis = array<u32, ${sizeInConcatAxis.length}>(${sizeInConcatAxis.map(i => `${i}u`).join(',')});
   ${calculateOutputIndexImpl(sizeInConcatAxis.length)}
@@ -101,25 +100,22 @@ const createSplitProgramInfo =
     }
     writeBufferData(outputNumber, indices, global_idx);
   }`;
-      return {
-        ...metadata,
-        getShaderSource,
-        outputs: outputsTensorInfo,
-        dispatchGroup: () => ({x: Math.ceil(inputSize / 64 /* workgroup size */)})
-      };
-    };
-
-const createSplitProgramInfoLoader =
-    (inputs: readonly TensorView[], attributes: SplitAttributes): ProgramInfoLoader => {
-      const updatedAttributes = inputs.length === 1 ? attributes : createSplitAttributesFromInputs(inputs, attributes);
-      const metadata:
-          ProgramMetadata = {name: 'Split', inputTypes: [GpuDataType.default], cacheHint: updatedAttributes.cacheKey};
-      return {...metadata, get: () => createSplitProgramInfo(metadata, [inputs[0]], updatedAttributes)};
-    };
+  return {
+    name: 'Split',
+    shaderCache: {hint: attributes.cacheKey},
+    getShaderSource,
+    getRunData: () => ({
+      outputs: outputsTensorInfo,
+      dispatchGroup: {x: Math.ceil(inputSize / 64 /* workgroup size */)},
+    })
+  };
+};
 
 export const split = (context: ComputeContext, attributes: SplitAttributes): void => {
   validateInputs(context.inputs);
-  context.compute(createSplitProgramInfoLoader(context.inputs, attributes), {inputs: [0]});
+  const updatedAttributes =
+      context.inputs.length === 1 ? attributes : createSplitAttributesFromInputs(context.inputs, attributes);
+  context.compute(createSplitProgramInfo(context.inputs, updatedAttributes), {inputs: [0]});
 };
 
 export const parseSplitAttributes = (attributes: Record<string, unknown>): SplitAttributes => {
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/tile.ts b/js/web/lib/wasm/jsep/webgpu/ops/tile.ts
index 109c29bfc8a80..e294541a775ca 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/tile.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/tile.ts
@@ -4,15 +4,10 @@
 import {DataType} from '../../../wasm-common';
 import {TensorView} from '../../tensor-view';
 import {ShapeUtil} from '../../util';
-import {ComputeContext, GpuDataType, ProgramInfo, ProgramMetadata} from '../types';
+import {ComputeContext, ProgramInfo} from '../types';
 
 import {inputVariable, outputVariable, ShaderHelper} from './common';
 
-export const tileProgramMetadata = {
-  name: 'Tile',
-  inputTypes: [GpuDataType.default]
-};
-
 const getRepeats = (repeatsTensorView: TensorView): readonly number[] =>
     Array.from(repeatsTensorView.getBigInt64Array(), Number);
 
@@ -52,18 +47,17 @@ const getOutputShape = (inputShape: readonly number[], repeats: readonly number[
   return outputShape;
 };
 
-export const createTileProgramInfo =
-    (tileProgramMetadata: ProgramMetadata, inputs: readonly TensorView[]): ProgramInfo => {
-      const inputShape = inputs[0].dims;
-      const repeats: readonly number[] = getRepeats(inputs[1]);
-      const outputShape = getOutputShape(inputShape, repeats);
-      const outputSize = ShapeUtil.size(outputShape);
+export const createTileProgramInfo = (inputs: readonly TensorView[]): ProgramInfo => {
+  const inputShape = inputs[0].dims;
+  const repeats: readonly number[] = getRepeats(inputs[1]);
+  const outputShape = getOutputShape(inputShape, repeats);
+  const outputSize = ShapeUtil.size(outputShape);
 
-      const dataType = inputs[0].dataType;
-      const input = inputVariable('input', dataType, inputShape);
-      const output = outputVariable('output', dataType, outputShape);
+  const dataType = inputs[0].dataType;
+  const input = inputVariable('input', dataType, inputShape);
+  const output = outputVariable('output', dataType, outputShape);
 
-      const getShaderSource = (shaderHelper: ShaderHelper) => `
+  const getShaderSource = (shaderHelper: ShaderHelper) => `
       const inputShape = ${input.indices(...inputShape)};
       ${shaderHelper.declareVariables(input, output)}
       ${shaderHelper.mainStart()}
@@ -78,19 +72,18 @@ export const createTileProgramInfo =
       ${output.setByOffset('global_idx', input.getByIndices('inputIndices'))}
     }`;
 
-      return {
-        ...tileProgramMetadata,
-        outputs: [{dims: outputShape, dataType: inputs[0].dataType, gpuDataType: GpuDataType.default}],
-        getShaderSource,
-        dispatchGroup: () => ({x: Math.ceil(outputSize / 64 /* workgroup size */)})
-      };
-    };
+  return {
+    name: 'Tile',
+    shaderCache: {hint: `${repeats}`},
+    getRunData: () => ({
+      outputs: [{dims: outputShape, dataType: inputs[0].dataType}],
+      dispatchGroup: {x: Math.ceil(outputSize / 64 /* workgroup size */)},
+    }),
+    getShaderSource,
+  };
+};
 
 export const tile = (context: ComputeContext): void => {
   validateInputs(context.inputs);
-  const repeats: readonly number[] = getRepeats(context.inputs[1]);
-  const cacheHint = repeats.toString();
-  context.compute(
-      {...tileProgramMetadata, cacheHint, get: () => createTileProgramInfo(tileProgramMetadata, context.inputs)},
-      {inputs: [0]});
+  context.compute(createTileProgramInfo(context.inputs), {inputs: [0]});
 };
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/transpose.ts b/js/web/lib/wasm/jsep/webgpu/ops/transpose.ts
index 38dcaeab54c54..c4d43e9f466f5 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/transpose.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/transpose.ts
@@ -4,30 +4,25 @@
 import {TensorView} from '../../tensor-view';
 import {ShapeUtil} from '../../util';
 import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key';
-import {ComputeContext, GpuDataType, ProgramInfo} from '../types';
+import {ComputeContext, ProgramInfo} from '../types';
 
-import {IndicesHelper, inputVariable, outputVariable, ShaderHelper} from './common';
+import {createTensorShapeVariables, enableShapesUniforms, IndicesHelper, inputVariable, outputVariable, ShaderHelper} from './common';
 
 export interface TransposeAttributes extends AttributeWithCacheKey {
   readonly perm: number[];
 }
 
-export const transposeProgramMetadata = {
-  name: 'Transpose',
-  inputTypes: [GpuDataType.default]
-};
-
 const validateInputs = (inputs: readonly TensorView[]): void => {
   if (!inputs || inputs.length !== 1) {
     throw new Error('Transpose requires 1 input.');
   }
 };
 
-const getAdjustedPerm = (inputShape: readonly number[], perm: number[]): number[] =>
-    (perm && perm.length !== inputShape.length) ? [...(inputShape.keys())].reverse() : perm;
+const getAdjustedPerm = (inputRank: number, perm: number[]): number[] =>
+    (perm && perm.length !== inputRank) ? [...(new Array(inputRank).keys())].reverse() : perm;
 
 const getOutputShape = (inputShape: readonly number[], perm: number[]): readonly number[] =>
-    ShapeUtil.sortBasedOnPerm(inputShape, getAdjustedPerm(inputShape, perm));
+    ShapeUtil.sortBasedOnPerm(inputShape, getAdjustedPerm(inputShape.length, perm));
 
 const permFunctionBody = (perm: number[], rank: number, input: IndicesHelper, output: IndicesHelper): string => {
   const reverseFunc = [];
@@ -41,26 +36,23 @@ const permFunctionBody = (perm: number[], rank: number, input: IndicesHelper, ou
 };
 
 export const createTransposeProgramInfo = (inputTensor: TensorView, permAttr: number[]): ProgramInfo => {
-  const dataType = inputTensor.dataType;
-  const inputShape = inputTensor.dims;
-  const perm = getAdjustedPerm(inputShape, permAttr);
-  const outputShape = getOutputShape(inputShape, perm);
-  const rank = inputShape.length;
-  const outputSize = ShapeUtil.size(outputShape);
-  // A dims=[${inputs[0].dims.toString()}]
-  // out Dims=[${unpackedOutputShape.toString()}]
-  // based on perm=[${perm.toString()}]
-
-  const output = outputVariable('output', dataType, outputShape);
-  const input = inputVariable('a', dataType, inputShape);
+  const inputDataType = inputTensor.dataType;
+  const inputRank = inputTensor.dims.length;
+  const perm = getAdjustedPerm(inputRank, permAttr);
+  const useShapesUniforms = enableShapesUniforms(inputRank);
+  const outputShape = getOutputShape(inputTensor.dims, perm);
+  const outShapeOrRank = useShapesUniforms ? outputShape.length : outputShape;
+  const inShapeOrRank = useShapesUniforms ? inputRank : inputTensor.dims;
+  const output = outputVariable('output', inputDataType, outShapeOrRank);
+  const input = inputVariable('a', inputDataType, inShapeOrRank);
 
   const getShaderSource = (shaderHelper: ShaderHelper) => `
-  ${shaderHelper.declareVariables(input, output)}
+  ${shaderHelper.registerUniform('output_size', 'u32').declareVariables(input, output)}
 
-  ${permFunctionBody(perm, rank, input, output)}
+  ${permFunctionBody(perm, inputRank, input, output)}
 
   ${shaderHelper.mainStart()}
-    ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes(outputSize)}
+    ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes('uniforms.output_size')}
 
     let indices = ${output.offsetToIndices('global_idx')};
     let aIndices = perm(indices);
@@ -68,20 +60,31 @@ export const createTransposeProgramInfo = (inputTensor: TensorView, permAttr: nu
     ${output.setByOffset('global_idx', input.getByIndices('aIndices'))}
   }`;
   return {
-    ...transposeProgramMetadata,
-    outputs: [{dims: outputShape, dataType: inputTensor.dataType, gpuDataType: GpuDataType.default}],
+    name: 'Transpose',
+    shaderCache: {hint: `${permAttr}`, inputDependencies: useShapesUniforms ? ['rank'] : ['dims']},
+    getRunData: (inputs) => {
+      const outputSize = ShapeUtil.size(outputShape);
+      return {
+        outputs: [{dims: outputShape, dataType: inputs[0].dataType}],
+        dispatchGroup: {x: Math.ceil(outputSize / 64 /* workgroup size */)},
+        programUniforms: useShapesUniforms ?
+            [
+              {type: 'uint32', data: outputSize},
+              ...createTensorShapeVariables(inputs[0].dims),
+              ...createTensorShapeVariables(outputShape),
+            ] :
+            [
+              {type: 'uint32', data: outputSize},
+            ],
+      };
+    },
     getShaderSource,
-    dispatchGroup: () => ({x: Math.ceil(outputSize / 64 /* workgroup size */)})
   };
 };
 
 export const transpose = (context: ComputeContext, attributes: TransposeAttributes): void => {
   validateInputs(context.inputs);
-  context.compute({
-    ...transposeProgramMetadata,
-    cacheHint: attributes.cacheKey,
-    get: () => createTransposeProgramInfo(context.inputs[0], attributes.perm)
-  });
+  context.compute(createTransposeProgramInfo(context.inputs[0], attributes.perm));
 };
 
 export const parseTransposeAttributes = (attributes: Record<string, unknown>): TransposeAttributes =>
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/unary-op.ts b/js/web/lib/wasm/jsep/webgpu/ops/unary-op.ts
index f08d7a77d1099..119609e06f5a3 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/unary-op.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/unary-op.ts
@@ -5,7 +5,7 @@ import {DataType} from '../../../wasm-common';
 import {TensorView} from '../../tensor-view';
 import {MAX_CLIP, MIN_CLIP, ShapeUtil} from '../../util';
 import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key';
-import {ComputeContext, GpuDataType, ProgramInfo, ProgramInfoLoader, ProgramMetadata} from '../types';
+import {ComputeContext, ProgramInfo} from '../types';
 
 import {inputVariable, outputVariable, ShaderHelper, tensorTypeToWsglStorageType} from './common';
 
@@ -29,12 +29,12 @@ const createElementwiseProgramShader =
       const output = outputVariable('outputData', outputDataType, [vecSize], 4);
 
       return `
-  ${shaderHelper.declareVariables(input, output)}
+      ${shaderHelper.registerUniform('vec_size', 'u32').declareVariables(input, output)}
 
   ${additionalImplementation ?? ''}
 
   ${shaderHelper.mainStart()}
-    ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes(vecSize)}
+    ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes('uniforms.vec_size')}
 
     let a = ${input.getByOffset('global_idx')};
     ${output.setByOffset('global_idx', expression)}
@@ -42,51 +42,47 @@ const createElementwiseProgramShader =
     };
 
 const createElementwiseProgramInfo =
-    (metadata: ProgramMetadata, input: TensorView, outputDataType: number, funcCall: ElementwiseFunctionCall,
-     additionalImplementation?: string): ProgramInfo => ({
-      ...metadata,
+    (input: TensorView, name: string, funcCall: ElementwiseFunctionCall, additionalImplementation?: string,
+     cacheKey?: string, outputDataType: number = input.dataType): ProgramInfo => ({
+      name,
+      shaderCache: {hint: cacheKey, inputDependencies: ['type']},
       getShaderSource: shaderHelper => createElementwiseProgramShader(
           shaderHelper, ShapeUtil.size(input.dims), input.dataType, outputDataType, funcCall, additionalImplementation),
-      outputs: [{dims: input.dims, dataType: outputDataType, gpuDataType: GpuDataType.default}],
-      dispatchGroup: (inputTensors) =>
-          ({x: Math.ceil(ShapeUtil.size(inputTensors[0].dims) / 64 /* workgroup size */ / 4 /* vec size */)})
+      getRunData: (inputTensors) => ({
+        outputs: [{dims: input.dims, dataType: outputDataType}],
+        dispatchGroup:
+            {x: Math.ceil(ShapeUtil.size(inputTensors[0].dims) / 64 /* workgroup size */ / 4 /* vec size */)},
+        programUniforms: [
+          {type: 'uint32', data: Math.ceil(ShapeUtil.size(input.dims) / 4)},
+        ],
+      })
     });
 
-const createElementwiseProgramInfoLoader =
-    (input: TensorView, name: string, funcCall: ElementwiseFunctionCall, additionalImplementation?: string,
-     cacheKey?: string, outputDataType: number = input.dataType): ProgramInfoLoader => {
-      const metadata: ProgramMetadata = {name, inputTypes: [GpuDataType.default], cacheHint: cacheKey};
-      return {
-        ...metadata,
-        get: () => createElementwiseProgramInfo(metadata, input, outputDataType, funcCall, additionalImplementation)
-      };
-    };
-
 export const abs = (context: ComputeContext): void => {
-  context.compute(createElementwiseProgramInfoLoader(context.inputs[0], 'Abs', 'abs'));
+  context.compute(createElementwiseProgramInfo(context.inputs[0], 'Abs', 'abs'));
 };
 
 export const acos = (context: ComputeContext): void => {
-  context.compute(createElementwiseProgramInfoLoader(context.inputs[0], 'Acos', 'acos'));
+  context.compute(createElementwiseProgramInfo(context.inputs[0], 'Acos', 'acos'));
 };
 
 export const acosh = (context: ComputeContext): void => {
-  context.compute(createElementwiseProgramInfoLoader(context.inputs[0], 'Acosh', 'acosh'));
+  context.compute(createElementwiseProgramInfo(context.inputs[0], 'Acosh', 'acosh'));
 };
 
 export const asin = (context: ComputeContext): void => {
-  context.compute(createElementwiseProgramInfoLoader(context.inputs[0], 'Asin', 'asin'));
+  context.compute(createElementwiseProgramInfo(context.inputs[0], 'Asin', 'asin'));
 };
 
 export const asinh = (context: ComputeContext): void => {
-  context.compute(createElementwiseProgramInfoLoader(context.inputs[0], 'Asinh', 'asinh'));
+  context.compute(createElementwiseProgramInfo(context.inputs[0], 'Asinh', 'asinh'));
 };
 
 export const atan = (context: ComputeContext): void => {
-  context.compute(createElementwiseProgramInfoLoader(context.inputs[0], 'Atan', 'atan'));
+  context.compute(createElementwiseProgramInfo(context.inputs[0], 'Atan', 'atan'));
 };
 export const atanh = (context: ComputeContext): void => {
-  context.compute(createElementwiseProgramInfoLoader(context.inputs[0], 'Atanh', 'atanh'));
+  context.compute(createElementwiseProgramInfo(context.inputs[0], 'Atanh', 'atanh'));
 };
 
 export interface CastAttributes extends AttributeWithCacheKey {
@@ -119,8 +115,8 @@ export const cast = (context: ComputeContext, attributes: CastAttributes): void
     default:
       throw new RangeError(`not supported type (specified in attribute 'to' from 'Cast' operator): ${attributes.to}`);
   }
-  context.compute(createElementwiseProgramInfoLoader(
-      context.inputs[0], 'Cast', func, undefined, attributes.cacheKey, attributes.to));
+  context.compute(
+      createElementwiseProgramInfo(context.inputs[0], 'Cast', func, undefined, attributes.cacheKey, attributes.to));
 };
 
 export interface ClipAttributes extends AttributeWithCacheKey {
@@ -128,10 +124,17 @@ export interface ClipAttributes extends AttributeWithCacheKey {
   readonly max: number;
 }
 
-export const clipV10 = (context: ComputeContext, attributes: ClipAttributes): void => {
+const generateClipAttributesFromInputs = (inputs: readonly TensorView[]): ClipAttributes => {
+  const min = (inputs.length >= 2) ? inputs[1].getFloat32Array()[0] : MIN_CLIP;
+  const max = (inputs.length >= 3) ? inputs[2].getFloat32Array()[0] : MAX_CLIP;
+  return createAttributeWithCacheKey({min, max});
+};
+
+export const clip = (context: ComputeContext, clipAttributes: ClipAttributes): void => {
+  const attributes = context.inputs.length === 1 ? clipAttributes : generateClipAttributesFromInputs(context.inputs);
   const dataType = tensorTypeToWsglStorageType(context.inputs[0].dataType);
   context.compute(
-      createElementwiseProgramInfoLoader(
+      createElementwiseProgramInfo(
           context.inputs[0], 'Clip', a => `clamp(${a}, clip_min_, clip_max_)`, `
     const clip_min_: vec4<${dataType}> = vec4(${dataType}(${attributes.min}));
     const clip_max_: vec4<${dataType}> = vec4(${dataType}(${attributes.max}));
@@ -139,27 +142,17 @@ export const clipV10 = (context: ComputeContext, attributes: ClipAttributes): vo
           attributes.cacheKey),
       {inputs: [0]});
 };
-const generateClipAttributesFromInputs = (inputs: readonly TensorView[]): ClipAttributes => {
-  const min = (inputs.length >= 2) ? inputs[1].getFloat32Array()[0] : MIN_CLIP;
-  const max = (inputs.length >= 3) ? inputs[2].getFloat32Array()[0] : MAX_CLIP;
-  return createAttributeWithCacheKey({min, max});
-};
-
-export const clip = (context: ComputeContext): void => {
-  const attributes = generateClipAttributesFromInputs(context.inputs);
-  clipV10(context, attributes);
-};
 
 export const ceil = (context: ComputeContext): void => {
-  context.compute(createElementwiseProgramInfoLoader(context.inputs[0], 'Ceil', 'ceil'));
+  context.compute(createElementwiseProgramInfo(context.inputs[0], 'Ceil', 'ceil'));
 };
 
 export const cos = (context: ComputeContext): void => {
-  context.compute(createElementwiseProgramInfoLoader(context.inputs[0], 'Cos', 'cos'));
+  context.compute(createElementwiseProgramInfo(context.inputs[0], 'Cos', 'cos'));
 };
 
 export const cosh = (context: ComputeContext): void => {
-  context.compute(createElementwiseProgramInfoLoader(context.inputs[0], 'Cosh', 'cosh'));
+  context.compute(createElementwiseProgramInfo(context.inputs[0], 'Cosh', 'cosh'));
 };
 
 export interface AlphaAttributes extends AttributeWithCacheKey {
@@ -170,7 +163,7 @@ export const parseAlphaAttributes = (attributes: Record<string, unknown>): Alpha
     createAttributeWithCacheKey(attributes as {alpha: number});
 
 export const elu = (context: ComputeContext, attributes: AlphaAttributes): void => {
-  context.compute(createElementwiseProgramInfoLoader(
+  context.compute(createElementwiseProgramInfo(
       context.inputs[0], 'Elu', a => `elu_vf32(${a})`, `
   const elu_alpha_: f32 = f32(${attributes.alpha});
 
@@ -200,79 +193,79 @@ fn erf_vf32(v: ${dataType}) -> ${dataType} {
 
 export const erf = (context: ComputeContext): void => {
   const dataType = tensorTypeToWsglStorageType(context.inputs[0].dataType);
-  context.compute(createElementwiseProgramInfoLoader(
+  context.compute(createElementwiseProgramInfo(
       context.inputs[0], 'Erf', a => `erf_vf32(${a})`, erfImpl(`vec4<${dataType}>`, dataType)));
 };
 
 export const exp = (context: ComputeContext): void => {
-  context.compute(createElementwiseProgramInfoLoader(context.inputs[0], 'Exp', 'exp'));
+  context.compute(createElementwiseProgramInfo(context.inputs[0], 'Exp', 'exp'));
 };
 
 export const floor = (context: ComputeContext): void => {
-  context.compute(createElementwiseProgramInfoLoader(context.inputs[0], 'Floor', 'floor'));
+  context.compute(createElementwiseProgramInfo(context.inputs[0], 'Floor', 'floor'));
 };
 
 export const gelu = (context: ComputeContext): void => {
   const dataType = tensorTypeToWsglStorageType(context.inputs[0].dataType);
-  context.compute(createElementwiseProgramInfoLoader(
+  context.compute(createElementwiseProgramInfo(
       context.inputs[0], 'Gelu', a => `0.5 * ${a} * (1.0 + erf_vf32(${a} * 0.7071067811865475))`,
       erfImpl(`vec4<${dataType}>`, dataType)));
 };
 
 export const leakyRelu = (context: ComputeContext, attributes: AlphaAttributes): void => {
-  context.compute(createElementwiseProgramInfoLoader(
+  context.compute(createElementwiseProgramInfo(
       context.inputs[0], 'LeakyRelu', a => `select(leaky_relu_alpha_ * ${a}, ${a}, ${a} >= vec4<f32>(0.0))`,
       `const leaky_relu_alpha_: f32 = f32(${attributes.alpha});`, attributes.cacheKey));
 };
 
 export const not = (context: ComputeContext): void => {
-  context.compute(createElementwiseProgramInfoLoader(context.inputs[0], 'Not', a => `!${a}`));
+  context.compute(createElementwiseProgramInfo(context.inputs[0], 'Not', a => `!${a}`));
 };
 
 export const neg = (context: ComputeContext): void => {
-  context.compute(createElementwiseProgramInfoLoader(context.inputs[0], 'Neg', a => `-${a}`));
+  context.compute(createElementwiseProgramInfo(context.inputs[0], 'Neg', a => `-${a}`));
 };
 
 export const reciprocal = (context: ComputeContext): void => {
-  context.compute(createElementwiseProgramInfoLoader(context.inputs[0], 'Reciprocal', a => `1.0/${a}`));
+  context.compute(createElementwiseProgramInfo(context.inputs[0], 'Reciprocal', a => `1.0/${a}`));
 };
 
 export const relu = (context: ComputeContext): void => {
-  context.compute(createElementwiseProgramInfoLoader(
+  context.compute(createElementwiseProgramInfo(
       context.inputs[0], 'Relu', a => `select(vec4<f32>(0.0), ${a}, ${a} > vec4<f32>(0.0))`));
 };
 
 export const sigmoid = (context: ComputeContext): void => {
-  context.compute(createElementwiseProgramInfoLoader(context.inputs[0], 'Sigmoid', a => `(1.0 / (1.0 + exp(-${a})))`));
+  context.compute(createElementwiseProgramInfo(context.inputs[0], 'Sigmoid', a => `(1.0 / (1.0 + exp(-${a})))`));
 };
 
 export const sin = (context: ComputeContext): void => {
-  context.compute(createElementwiseProgramInfoLoader(context.inputs[0], 'Sin', 'sin'));
+  context.compute(createElementwiseProgramInfo(context.inputs[0], 'Sin', 'sin'));
 };
 
 export const sinh = (context: ComputeContext): void => {
-  context.compute(createElementwiseProgramInfoLoader(context.inputs[0], 'Sinh', 'sinh'));
+  context.compute(createElementwiseProgramInfo(context.inputs[0], 'Sinh', 'sinh'));
 };
 
 export const sqrt = (context: ComputeContext): void => {
-  context.compute(createElementwiseProgramInfoLoader(context.inputs[0], 'Sqrt', 'sqrt'));
+  context.compute(createElementwiseProgramInfo(context.inputs[0], 'Sqrt', 'sqrt'));
 };
 
 export const tan = (context: ComputeContext): void => {
-  context.compute(createElementwiseProgramInfoLoader(context.inputs[0], 'Tan', 'tan'));
+  context.compute(createElementwiseProgramInfo(context.inputs[0], 'Tan', 'tan'));
 };
 
 export const tanh = (context: ComputeContext): void => {
-  context.compute(createElementwiseProgramInfoLoader(context.inputs[0], 'Tanh', 'tanh'));
+  context.compute(createElementwiseProgramInfo(context.inputs[0], 'Tanh', 'tanh'));
 };
 
 export const thresholdedRelu = (context: ComputeContext, attributes: AlphaAttributes): number => {
-  context.compute(createElementwiseProgramInfoLoader(
+  context.compute(createElementwiseProgramInfo(
       context.inputs[0], 'ThresholdedRelu', a => `select(vec4<f32>(0.0), ${a}, ${a} > thresholded_relu_alpha_)`,
       `const thresholded_relu_alpha_: vec4<f32> = vec4<f32>(${attributes.alpha});`, attributes.cacheKey));
   return 0;
 };
 
 export const log = (context: ComputeContext): void => {
-  context.compute(createElementwiseProgramInfoLoader(context.inputs[0], 'Log', 'log'));
+  context.compute(createElementwiseProgramInfo(context.inputs[0], 'Log', 'log'));
 };
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/where.ts b/js/web/lib/wasm/jsep/webgpu/ops/where.ts
new file mode 100644
index 0000000000000..6f66dd86b4088
--- /dev/null
+++ b/js/web/lib/wasm/jsep/webgpu/ops/where.ts
@@ -0,0 +1,106 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+import {DataType} from '../../../wasm-common';
+import {TensorView} from '../../tensor-view';
+import {BroadcastUtil, ShapeUtil} from '../../util';
+import {ComputeContext, ProgramInfo} from '../types';
+
+import {inputVariable, outputVariable, ShaderHelper} from './common';
+
+const createWhereOpProgramShader =
+    (shaderHelper: ShaderHelper, inputs: readonly TensorView[], dimsOutput: readonly number[], isBroadcast: boolean,
+     typeOutput: number) => {
+      const outputSize = ShapeUtil.size(dimsOutput);
+      const vecSize = Math.ceil(outputSize / 4);
+
+      const output = outputVariable('outputData', typeOutput, dimsOutput, 4);
+      const a = inputVariable('aData', inputs[1].dataType, inputs[1].dims, 4);
+      const b = inputVariable('bData', inputs[2].dataType, inputs[2].dims, 4);
+      const c = inputVariable('cData', inputs[0].dataType, inputs[0].dims, 4);
+
+      let assignment: string;
+      const expression = (a: string, b: string, c: string) => `select(${b}, ${a}, ${c})`;
+      if (!isBroadcast) {
+        assignment = output.setByOffset(
+            'global_idx',
+            expression(a.getByOffset('global_idx'), b.getByOffset('global_idx'), c.getByOffset('global_idx')));
+      } else {
+        const singleAssignment = (resStr: string, x: number, typeCast = '') => {
+          const expressionA = `aData[indexA${x}][componentA${x}]`;
+          const expressionB = `bData[indexB${x}][componentB${x}]`;
+          // eslint-disable-next-line no-bitwise
+          const expressionC = `bool(cData[indexC${x}] & ${0xff000000 >>> ((3 - x) * 8)}u)`;
+          return `
+            let outputIndices${x} = ${output.offsetToIndices(`global_idx * 4u + ${x}u`)};
+            let offsetA${x} = ${a.broadcastedIndicesToOffset(`outputIndices${x}`, output)};
+            let offsetB${x} = ${b.broadcastedIndicesToOffset(`outputIndices${x}`, output)};
+            let offsetC${x} = ${c.broadcastedIndicesToOffset(`outputIndices${x}`, output)};
+            let indexA${x} = offsetA${x} / 4u;
+            let indexB${x} = offsetB${x} / 4u;
+            let indexC${x} = offsetC${x} / 4u;
+            let componentA${x} = offsetA${x} % 4u;
+            let componentB${x} = offsetB${x} % 4u;
+            ${resStr}[${x}] = ${typeCast}(${expression(expressionA, expressionB, expressionC)});
+          `;
+        };
+        if (typeOutput === DataType.bool) {
+          assignment = `
+            var data = vec4<u32>(0);
+            ${singleAssignment('data', 0, 'u32')}
+            ${singleAssignment('data', 1, 'u32')}
+            ${singleAssignment('data', 2, 'u32')}
+            ${singleAssignment('data', 3, 'u32')}
+            outputData[global_idx] = dot(vec4<u32>(0x1, 0x100, 0x10000, 0x1000000), vec4<u32>(data));`;
+        } else {
+          assignment = `
+            ${singleAssignment('outputData[global_idx]', 0)}
+            ${singleAssignment('outputData[global_idx]', 1)}
+            ${singleAssignment('outputData[global_idx]', 2)}
+            ${singleAssignment('outputData[global_idx]', 3)}
+          `;
+        }
+      }
+
+      return `
+        ${shaderHelper.declareVariables(c, a, b, output)}
+        ${shaderHelper.mainStart()}
+        ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes(vecSize)}
+        ${assignment}
+      }`;
+    };
+
+const createWhereOpProgramInfo = (inputs: readonly TensorView[]): ProgramInfo => {
+  const dimsA = inputs[1].dims;
+  const dimsB = inputs[2].dims;
+  const dimsC = inputs[0].dims;
+  const outputDataType = inputs[1].dataType;
+
+  const isBroadcast = !(ShapeUtil.areEqual(dimsA, dimsB) && ShapeUtil.areEqual(dimsB, dimsC));
+  let outputShape = dimsA;
+  let outputSize = ShapeUtil.size(dimsA);
+  // TODO: deal with zero-sized tensors (eg. dims=[1,0])
+
+  if (isBroadcast) {
+    const calculatedShape = BroadcastUtil.calcShape(BroadcastUtil.calcShape(dimsA, dimsB, false)!, dimsC, false);
+    if (!calculatedShape) {
+      throw new Error('Can\'t perform where op on the given tensors');
+    }
+    outputShape = calculatedShape;
+    outputSize = ShapeUtil.size(outputShape);
+  }
+
+  return {
+    name: 'Where',
+    getShaderSource: (shaderHelper) =>
+        createWhereOpProgramShader(shaderHelper, inputs, outputShape, isBroadcast, outputDataType),
+    getRunData: () => ({
+      outputs: [{dims: outputShape, dataType: outputDataType}],
+      dispatchGroup: {x: Math.ceil(outputSize / 64 /* workgroup size */ / 4 /* vec size */)}
+    }),
+  };
+};
+
+export const where = (context: ComputeContext): void => {
+  context.compute(createWhereOpProgramInfo(context.inputs));
+};
diff --git a/js/web/lib/wasm/jsep/webgpu/program-manager.ts b/js/web/lib/wasm/jsep/webgpu/program-manager.ts
index cf2687e4c7382..0b0a545f46481 100644
--- a/js/web/lib/wasm/jsep/webgpu/program-manager.ts
+++ b/js/web/lib/wasm/jsep/webgpu/program-manager.ts
@@ -32,18 +32,12 @@ export class ProgramManager {
   setArtifact(key: unknown, artifact: Artifact): void {
     this.repo.set(key, artifact);
   }
-  run(buildArtifact: Artifact, inputsTensorView: readonly TensorView[], inputs: GpuData[], outputs: GpuData[],
-      dispatchGroup: [number, number, number]): void {
+  run(buildArtifact: Artifact, inputTensorViews: readonly TensorView[], outputTensorViews: readonly TensorView[],
+      inputs: GpuData[], outputs: GpuData[], dispatchGroup: [number, number, number],
+      uniformBufferBinding: GPUBindingResource|undefined): void {
     const device = this.backend.device;
-    const computePassEncoder = this.backend.getComputePassEncoder();
-    const profilingEnabled = this.backend.supportTimestampQuery && this.backend.env.webgpu.profilingMode === 'default';
-    if (profilingEnabled) {
-      // profiling write start timestamp
-
-      // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      (computePassEncoder as any).writeTimestamp(this.backend.profilingQuerySet, 0);
-    }
 
+    const computePassEncoder = this.backend.getComputePassEncoder();
     computePassEncoder.setPipeline(buildArtifact.computePipeline);
     const entries = [];
     for (const input of inputs) {
@@ -52,6 +46,9 @@ export class ProgramManager {
     for (const output of outputs) {
       entries.push({binding: entries.length, resource: {buffer: output.buffer}});
     }
+    if (uniformBufferBinding) {
+      entries.push({binding: entries.length, resource: uniformBufferBinding});
+    }
     const bindGroup = device.createBindGroup(
         {layout: buildArtifact.computePipeline.getBindGroupLayout(0), entries, label: buildArtifact.programInfo.name});
     computePassEncoder.setBindGroup(0, bindGroup);
@@ -60,43 +57,39 @@ export class ProgramManager {
 
     this.backend.pendingDispatchNumber++;
 
-    if (profilingEnabled) {
-      // profiling write end timestamp
-
-      // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      (computePassEncoder as any).writeTimestamp(this.backend.profilingQuerySet, 1);
-      if (this.backend.profilingQueryData == null) {
-        this.backend.profilingQueryData =
+    if (this.backend.isQueryEnabled()) {
+      if (typeof this.backend.queryData === 'undefined') {
+        this.backend.queryData = this.backend.gpuDataManager.create(
             // eslint-disable-next-line no-bitwise
-            this.backend.gpuDataManager.create(16, GPUBufferUsage.COPY_SRC | GPUBufferUsage.QUERY_RESOLVE);
+            this.backend.querySetCount * 8, GPUBufferUsage.COPY_SRC | GPUBufferUsage.QUERY_RESOLVE);
       }
-      // eslint-disable-next-line no-bitwise
-      const syncData = this.backend.gpuDataManager.create(16, GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST);
+      const syncData = this.backend.gpuDataManager.create(
+          // eslint-disable-next-line no-bitwise
+          this.backend.querySetCount * 8, GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST);
 
       this.backend.endComputePass();
-      this.backend.getCommandEncoder().resolveQuerySet(
-          this.backend.profilingQuerySet, 0, 2, this.backend.profilingQueryData.buffer, 0);
+      this.backend.getCommandEncoder().resolveQuerySet(this.backend.querySet!, 0, 2, this.backend.queryData.buffer, 0);
       this.backend.getCommandEncoder().copyBufferToBuffer(
-          this.backend.profilingQueryData.buffer, 0, syncData.buffer, 0, 16);
+          this.backend.queryData.buffer, 0, syncData.buffer, 0, this.backend.querySetCount * 8);
       this.backend.flush();
 
       const kernelId = this.backend.currentKernelId!;
       const kernelInfo = this.backend.kernels.get(kernelId)!;
       const kernelName = `[${kernelInfo[0]}] ${kernelInfo[1]}`;
 
-      syncData.buffer.mapAsync(GPUMapMode.READ).then(() => {
+      void syncData.buffer.mapAsync(GPUMapMode.READ).then(() => {
         const mappedData = new BigUint64Array(syncData.buffer.getMappedRange());
         const startTimeU64 = mappedData[0];
         const endTimeU64 = mappedData[1];
 
         syncData.buffer.unmap();
 
-        if (typeof this.backend.profilingTimeBase === 'undefined') {
-          this.backend.profilingTimeBase = startTimeU64;
+        if (typeof this.backend.queryTimeBase === 'undefined') {
+          this.backend.queryTimeBase = startTimeU64;
         }
 
-        const startTime = Number(startTimeU64 - this.backend.profilingTimeBase);
-        const endTime = Number(endTimeU64 - this.backend.profilingTimeBase);
+        const startTime = Number(startTimeU64 - this.backend.queryTimeBase);
+        const endTime = Number(endTimeU64 - this.backend.queryTimeBase);
 
         if (!Number.isSafeInteger(startTime) || !Number.isSafeInteger(endTime)) {
           throw new RangeError('incorrect timestamp range');
@@ -104,11 +97,11 @@ export class ProgramManager {
 
         this.backend.gpuDataManager.release(syncData.id);
         let inputShapes = '';
-        inputsTensorView.forEach((value, i) => {
+        inputTensorViews.forEach((value, i) => {
           inputShapes += `input[${i}]: [${value.dims}] | ${tensorDataTypeEnumToString(value.dataType)}, `;
         });
         let outputShapes = '';
-        buildArtifact.programInfo.outputs.forEach((value, i) => {
+        outputTensorViews.forEach((value, i) => {
           outputShapes += `output[${i}]: [${value.dims}] | ${tensorDataTypeEnumToString(value.dataType)}, `;
         });
         // eslint-disable-next-line no-console
@@ -134,7 +127,7 @@ export class ProgramManager {
     const userCode = programInfo.getShaderSource(shaderHelper);
     const code = `${extensions.join('\n')}\n${shaderHelper.additionalImplementations}\n${userCode}`;
     const shaderModule = device.createShaderModule({code, label: programInfo.name});
-    LOG_DEBUG('verbose', () => `[WebGPU] shader code: ${code}`);
+    LOG_DEBUG('verbose', () => `[WebGPU] ${programInfo.name} shader code: ${code}`);
 
     const computePipeline = device.createComputePipeline(
         {compute: {module: shaderModule, entryPoint: 'main'}, layout: 'auto', label: programInfo.name});
@@ -142,7 +135,8 @@ export class ProgramManager {
     return {programInfo, computePipeline};
   }
 
-  normalizeDispatchGroupSize(dispatchGroup: ReturnType<ProgramInfo['dispatchGroup']>): [number, number, number] {
+  normalizeDispatchGroupSize(dispatchGroup: ReturnType<ProgramInfo['getRunData']>['dispatchGroup']):
+      [number, number, number] {
     const x = typeof dispatchGroup === 'number' ? dispatchGroup : dispatchGroup.x;
     const y = typeof dispatchGroup === 'number' ? 1 : (dispatchGroup.y || 1);
     const z = typeof dispatchGroup === 'number' ? 1 : (dispatchGroup.z || 1);
diff --git a/js/web/lib/wasm/jsep/webgpu/types.ts b/js/web/lib/wasm/jsep/webgpu/types.ts
index 78f80b89774e2..23fa33a9bba8f 100644
--- a/js/web/lib/wasm/jsep/webgpu/types.ts
+++ b/js/web/lib/wasm/jsep/webgpu/types.ts
@@ -21,69 +21,95 @@ export interface GpuData {
 export interface TensorInfo {
   dims: readonly number[];
   dataType: number;
-  gpuDataType: GpuDataType;
 }
 
 
-export interface ProgramVariable {
-  type: 'float'|'int';
-  name: string;
-  arrayLength?: number;
-  data: number|number[];
+export interface ProgramUniform {
+  type: 'int32'|'float32'|'uint32';
+  data: number|readonly number[];
 }
 
+/**
+ * Represent the dependency of a program on a specific input tensor.
+ *
+ * - 'none': the shader/uniform does not depend on this input's info
+ * - 'type': the shader/uniform depends on data type of this input
+ * - 'rank': the shader/uniform depends on data type and the rank of this input
+ * - 'dims': the shader/uniform depends on data type and the dims of this input
+ * - 'data': the shader/uniform depends on data type, the dims and the data of this input
+ */
+export type ProgramInputTensorInfoDependency = 'none'|'type'|'rank'|'dims'|'data';
 
-export interface ProgramMetadata {
+/**
+ * Represent information about a program's cache for shader.
+ */
+export interface ProgramShaderCacheInfo {
   /**
-   * the name of the program. used for debugging and profiling
+   * an optional string as a cache hint in the artifact cache. If this is not specified, the cache hint will be empty.
+   *
+   * This hint string should only contains initializing-time information, such as the attributes or any information of
+   * initializers. It should NOT contain any runtime information, such as the shape of inputs.
    */
-  name: string;
+  hint?: string;
 
   /**
-   * gpu data types for each input
-   */
-  inputTypes: GpuDataType[];
-  /**
-   * an optional string as a cache hint in the artifact cache
+   * an optional list of dependencies of the program on the input tensors. If this is not specified, the program depends
+   * on 'dims' of all inputs.
    */
-  cacheHint?: string;
+  inputDependencies?: ProgramInputTensorInfoDependency[];
 }
 
 /**
- * A ProgramInfoLoader allows
+ * Represent information about a program's cache for uniform.
  */
-export interface ProgramInfoLoader extends ProgramMetadata {
+export interface ProgramUniformCacheInfo {
+  /**
+   * an optional string as a cache hint in the uniform cache. If this is not specified, the cache hint will be empty.
+   *
+   * This hint string should only contains runtime information, such as the shape of inputs.
+   */
+  hint?: string;
+
   /**
-   * a function to get the program info
+   * an optional list of dependencies of the program on the input tensors. If this is not specified, the program depends
+   * on 'none' of all inputs.
    */
-  get(): ProgramInfo;
+  inputDependencies?: ProgramInputTensorInfoDependency[];
 }
 
+
 /**
  * A set of data that represent a shader program
  */
-export interface ProgramInfo extends ProgramMetadata {
+export interface ProgramInfo {
   /**
-   * information of uniform variables
+   * the name of the program. used for debugging and profiling
    */
-  variables?: ProgramVariable[];
+  name: string;
+
   /**
-   * tensor info for outputs
+   * an optional object describing the cache information of the program shader.
+   *
+   * If this is not specified, assume hint is empty and inputDependencies are ['dims'] for all inputs.
    */
-  outputs: TensorInfo[];
+  shaderCache?: ProgramShaderCacheInfo;
+
   /**
-   * the shader's processing source code
+   * the shader's processing source code.
+   *
+   * This function will be called when shader cache missed.
    */
   getShaderSource: (shaderHelper: ShaderHelper) => string;
+
   /**
-   * default is "main"
+   * A function to get run data required to run the program.
+   *
+   * This function will be called every time the program is executed. Should keep this function as simple as possible.
    */
-  // entryPoint: string;
-
-  dispatchGroup: (inputs: readonly TensorView[]) => {
-    x: number;
-    y?: number;
-    z?: number;
+  getRunData: (inputs: readonly TensorView[]) => {
+    outputs: readonly TensorInfo[];
+    dispatchGroup: {x: number; y?: number; z?: number};
+    programUniforms?: readonly ProgramUniform[];
   };
 }
 
@@ -143,7 +169,6 @@ export interface ComputeContext {
    */
   readonly outputCount: number;
 
-  compute(program: ProgramInfoLoader|ProgramInfo, inputsOutputsMapping?: ComputeContextInputsOutputsMapping):
-      TensorView[];
+  compute(program: ProgramInfo, inputsOutputsMapping?: ComputeContextInputsOutputsMapping): TensorView[];
   output(index: number, dims: readonly number[]): number;
 }
diff --git a/js/web/lib/wasm/proxy-messages.ts b/js/web/lib/wasm/proxy-messages.ts
index e5a2d8c2351b8..efeb086256cf3 100644
--- a/js/web/lib/wasm/proxy-messages.ts
+++ b/js/web/lib/wasm/proxy-messages.ts
@@ -1,22 +1,26 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-import {Env, InferenceSession, Tensor} from 'onnxruntime-common';
+import type {Env, InferenceSession, Tensor} from 'onnxruntime-common';
 
-/**
- *  tuple elements are: ORT element type; dims; tensor data
- */
-export type SerializableTensor = [Tensor.Type, readonly number[], Tensor.DataType];
+export type SerializableTensorMetadata =
+    [dataType: Tensor.Type, dims: readonly number[], data: Tensor.DataType, location: 'cpu'];
 
-/**
- *  tuple elements are: InferenceSession handle; input names; output names
- */
-export type SerializableSessionMetadata = [number, string[], string[]];
+export type GpuBufferMetadata = {
+  gpuBuffer: Tensor.GpuBufferType;
+  download?: () => Promise<Tensor.DataTypeMap[Tensor.GpuBufferDataTypes]>;
+  dispose?: () => void;
+};
 
-/**
- *  tuple elements are: modeldata.offset, modeldata.length
- */
-export type SerializableModeldata = [number, number];
+export type UnserializableTensorMetadata =
+    [dataType: Tensor.Type, dims: readonly number[], data: GpuBufferMetadata, location: 'gpu-buffer']|
+    [dataType: Tensor.Type, dims: readonly number[], data: Tensor.DataType, location: 'cpu-pinned'];
+
+export type TensorMetadata = SerializableTensorMetadata|UnserializableTensorMetadata;
+
+export type SerializableSessionMetadata = [sessionHandle: number, inputNames: string[], outputNames: string[]];
+
+export type SerializableModeldata = [modelDataOffset: number, modelDataLength: number];
 
 interface MessageError {
   err?: string;
@@ -58,10 +62,10 @@ interface MessageReleaseSession extends MessageError {
 interface MessageRun extends MessageError {
   type: 'run';
   in ?: {
-    sessionId: number; inputIndices: number[]; inputs: SerializableTensor[]; outputIndices: number[];
+    sessionId: number; inputIndices: number[]; inputs: SerializableTensorMetadata[]; outputIndices: number[];
     options: InferenceSession.RunOptions;
   };
-  out?: SerializableTensor[];
+  out?: SerializableTensorMetadata[];
 }
 
 interface MesssageEndProfiling extends MessageError {
@@ -69,5 +73,10 @@ interface MesssageEndProfiling extends MessageError {
   in ?: number;
 }
 
+interface MessageIsOrtEnvInitialized extends MessageError {
+  type: 'is-ort-env-initialized';
+  out?: boolean;
+}
+
 export type OrtWasmMessage = MessageInitWasm|MessageInitOrt|MessageCreateSessionAllocate|MessageCreateSessionFinalize|
-    MessageCreateSession|MessageReleaseSession|MessageRun|MesssageEndProfiling;
+    MessageCreateSession|MessageReleaseSession|MessageRun|MesssageEndProfiling|MessageIsOrtEnvInitialized;
diff --git a/js/web/lib/wasm/proxy-worker/main.ts b/js/web/lib/wasm/proxy-worker/main.ts
index 2ea3645bf387e..1cb6d9e391e4f 100644
--- a/js/web/lib/wasm/proxy-worker/main.ts
+++ b/js/web/lib/wasm/proxy-worker/main.ts
@@ -3,15 +3,47 @@
 
 /// <reference lib="webworker" />
 
-import {OrtWasmMessage} from '../proxy-messages';
-import {createSession, createSessionAllocate, createSessionFinalize, endProfiling, extractTransferableBuffers, initRuntime, releaseSession, run} from '../wasm-core-impl';
+//
+// * type hack for "HTMLImageElement"
+//
+// in typescript, the type of "HTMLImageElement" is defined in lib.dom.d.ts, which is conflict with lib.webworker.d.ts.
+// when we use webworker, the lib.webworker.d.ts will be used, which does not have HTMLImageElement defined.
+//
+// we will get the following errors complaining that HTMLImageElement is not defined:
+//
+// ====================================================================================================================
+//
+// ../common/dist/cjs/tensor-factory.d.ts:187:29 - error TS2552: Cannot find name 'HTMLImageElement'. Did you mean
+// 'HTMLLIElement'?
+//
+// 187     fromImage(imageElement: HTMLImageElement, options?: TensorFromImageElementOptions):
+// Promise<TypedTensor<'float32'> | TypedTensor<'uint8'>>;
+//                                 ~~~~~~~~~~~~~~~~
+//
+// node_modules/@webgpu/types/dist/index.d.ts:83:7 - error TS2552: Cannot find name 'HTMLImageElement'. Did you mean
+// 'HTMLLIElement'?
+//
+// 83     | HTMLImageElement
+//          ~~~~~~~~~~~~~~~~
+//
+// ====================================================================================================================
+//
+// `HTMLImageElement` is only used in type declaration and not in real code. So we define it as `unknown` here to
+// bypass the type check.
+//
+declare global {
+  type HTMLImageElement = unknown;
+}
+
+import {OrtWasmMessage, SerializableTensorMetadata} from '../proxy-messages';
+import {createSession, createSessionAllocate, createSessionFinalize, endProfiling, extractTransferableBuffers, initRuntime, isOrtEnvInitialized, releaseSession, run} from '../wasm-core-impl';
 import {initializeWebAssembly} from '../wasm-factory';
 
 self.onmessage = (ev: MessageEvent<OrtWasmMessage>): void => {
   switch (ev.data.type) {
     case 'init-wasm':
       try {
-        initializeWebAssembly(ev.data.in)
+        initializeWebAssembly(ev.data.in!)
             .then(
                 () => postMessage({type: 'init-wasm'} as OrtWasmMessage),
                 err => postMessage({type: 'init-wasm', err} as OrtWasmMessage));
@@ -21,11 +53,10 @@ self.onmessage = (ev: MessageEvent<OrtWasmMessage>): void => {
       break;
     case 'init-ort':
       try {
-        initRuntime(ev.data.in).then(() => postMessage({type: 'init-ort'} as OrtWasmMessage), err => postMessage({
-                                                                                                type: 'init-ort',
-                                                                                                err
-                                                                                              } as OrtWasmMessage));
-        postMessage({type: 'init-ort'} as OrtWasmMessage);
+        initRuntime(ev.data.in!).then(() => postMessage({type: 'init-ort'} as OrtWasmMessage), err => postMessage({
+                                                                                                 type: 'init-ort',
+                                                                                                 err
+                                                                                               } as OrtWasmMessage));
       } catch (err) {
         postMessage({type: 'init-ort', err} as OrtWasmMessage);
       }
@@ -59,8 +90,7 @@ self.onmessage = (ev: MessageEvent<OrtWasmMessage>): void => {
       break;
     case 'release':
       try {
-        const handler = ev.data.in!;
-        releaseSession(handler);
+        releaseSession(ev.data.in!);
         postMessage({type: 'release'} as OrtWasmMessage);
       } catch (err) {
         postMessage({type: 'release', err} as OrtWasmMessage);
@@ -69,10 +99,16 @@ self.onmessage = (ev: MessageEvent<OrtWasmMessage>): void => {
     case 'run':
       try {
         const {sessionId, inputIndices, inputs, outputIndices, options} = ev.data.in!;
-        run(sessionId, inputIndices, inputs, outputIndices, options)
+        run(sessionId, inputIndices, inputs, outputIndices, new Array(outputIndices.length).fill(null), options)
             .then(
                 outputs => {
-                  postMessage({type: 'run', out: outputs} as OrtWasmMessage, extractTransferableBuffers(outputs));
+                  if (outputs.some(o => o[3] !== 'cpu')) {
+                    postMessage({type: 'run', err: 'Proxy does not support non-cpu tensor location.'});
+                  } else {
+                    postMessage(
+                        {type: 'run', out: outputs} as OrtWasmMessage,
+                        extractTransferableBuffers(outputs as SerializableTensorMetadata[]));
+                  }
                 },
                 err => {
                   postMessage({type: 'run', err} as OrtWasmMessage);
@@ -90,6 +126,14 @@ self.onmessage = (ev: MessageEvent<OrtWasmMessage>): void => {
         postMessage({type: 'end-profiling', err} as OrtWasmMessage);
       }
       break;
+    case 'is-ort-env-initialized':
+      try {
+        const ortEnvInitialized = isOrtEnvInitialized();
+        postMessage({type: 'is-ort-env-initialized', out: ortEnvInitialized} as OrtWasmMessage);
+      } catch (err) {
+        postMessage({type: 'is-ort-env-initialized', err} as OrtWasmMessage);
+      }
+      break;
     default:
   }
 };
diff --git a/js/web/lib/wasm/proxy-worker/tsconfig.json b/js/web/lib/wasm/proxy-worker/tsconfig.json
new file mode 100644
index 0000000000000..ec1044612a569
--- /dev/null
+++ b/js/web/lib/wasm/proxy-worker/tsconfig.json
@@ -0,0 +1,8 @@
+{
+  "extends": "../../../tsconfig.json",
+  "compilerOptions": {
+    "lib": ["WebWorker"]
+  },
+  "include": ["main.ts", "../../build-def.d.ts"],
+  "exclude": []
+}
diff --git a/js/web/lib/wasm/proxy-wrapper.ts b/js/web/lib/wasm/proxy-wrapper.ts
index 815b223e40379..069a1fa452dbc 100644
--- a/js/web/lib/wasm/proxy-wrapper.ts
+++ b/js/web/lib/wasm/proxy-wrapper.ts
@@ -3,7 +3,7 @@
 
 import {Env, env, InferenceSession} from 'onnxruntime-common';
 
-import {OrtWasmMessage, SerializableModeldata, SerializableSessionMetadata, SerializableTensor} from './proxy-messages';
+import {OrtWasmMessage, SerializableModeldata, SerializableSessionMetadata, SerializableTensorMetadata, TensorMetadata} from './proxy-messages';
 import * as core from './wasm-core-impl';
 import {initializeWebAssembly} from './wasm-factory';
 
@@ -22,8 +22,9 @@ const createSessionAllocateCallbacks: Array<PromiseCallbacks<SerializableModelda
 const createSessionFinalizeCallbacks: Array<PromiseCallbacks<SerializableSessionMetadata>> = [];
 const createSessionCallbacks: Array<PromiseCallbacks<SerializableSessionMetadata>> = [];
 const releaseSessionCallbacks: Array<PromiseCallbacks<void>> = [];
-const runCallbacks: Array<PromiseCallbacks<SerializableTensor[]>> = [];
+const runCallbacks: Array<PromiseCallbacks<SerializableTensorMetadata[]>> = [];
 const endProfilingCallbacks: Array<PromiseCallbacks<void>> = [];
+const isOrtEnvInitializedCallbacks: Array<PromiseCallbacks<boolean>> = [];
 
 const ensureWorker = (): void => {
   if (initializing || !initialized || aborted || !proxyWorker) {
@@ -92,6 +93,13 @@ const onProxyWorkerMessage = (ev: MessageEvent<OrtWasmMessage>): void => {
         endProfilingCallbacks.shift()![0]();
       }
       break;
+    case 'is-ort-env-initialized':
+      if (ev.data.err) {
+        isOrtEnvInitializedCallbacks.shift()![1](ev.data.err);
+      } else {
+        isOrtEnvInitializedCallbacks.shift()![0](ev.data.out!);
+      }
+      break;
     default:
   }
 };
@@ -121,9 +129,18 @@ export const initializeWebAssemblyInstance = async(): Promise<void> => {
 
     return new Promise<void>((resolve, reject) => {
       proxyWorker?.terminate();
-      // eslint-disable-next-line @typescript-eslint/no-var-requires, @typescript-eslint/no-require-imports
-      proxyWorker = require('worker-loader?inline=no-fallback!./proxy-worker/main').default() as Worker;
+
+      const workerUrl = URL.createObjectURL(new Blob(
+          [
+            // This require() function is handled by esbuild plugin to load file content as string.
+            // eslint-disable-next-line @typescript-eslint/no-require-imports
+            require('./proxy-worker/main')
+          ],
+          {type: 'text/javascript'}));
+      proxyWorker = new Worker(workerUrl, {name: 'ort-wasm-proxy-worker'});
+      proxyWorker.onerror = (ev: ErrorEvent) => reject(ev);
       proxyWorker.onmessage = onProxyWorkerMessage;
+      URL.revokeObjectURL(workerUrl);
       initWasmCallbacks = [resolve, reject];
       const message: OrtWasmMessage = {type: 'init-wasm', in : env.wasm};
       proxyWorker.postMessage(message);
@@ -177,6 +194,10 @@ export const createSessionFinalize = async(modeldata: SerializableModeldata, opt
 export const createSession =
     async(model: Uint8Array, options?: InferenceSession.SessionOptions): Promise<SerializableSessionMetadata> => {
   if (!BUILD_DEFS.DISABLE_WASM_PROXY && isProxy()) {
+    // check unsupported options
+    if (options?.preferredOutputLocation) {
+      throw new Error('session option "preferredOutputLocation" is not supported for proxy.');
+    }
     ensureWorker();
     return new Promise<SerializableSessionMetadata>((resolve, reject) => {
       createSessionCallbacks.push([resolve, reject]);
@@ -202,17 +223,27 @@ export const releaseSession = async(sessionId: number): Promise<void> => {
 };
 
 export const run = async(
-    sessionId: number, inputIndices: number[], inputs: SerializableTensor[], outputIndices: number[],
-    options: InferenceSession.RunOptions): Promise<SerializableTensor[]> => {
+    sessionId: number, inputIndices: number[], inputs: TensorMetadata[], outputIndices: number[],
+    outputs: Array<TensorMetadata|null>, options: InferenceSession.RunOptions): Promise<TensorMetadata[]> => {
   if (!BUILD_DEFS.DISABLE_WASM_PROXY && isProxy()) {
+    // check inputs location
+    if (inputs.some(t => t[3] !== 'cpu')) {
+      throw new Error('input tensor on GPU is not supported for proxy.');
+    }
+    // check outputs location
+    if (outputs.some(t => t)) {
+      throw new Error('pre-allocated output tensor is not supported for proxy.');
+    }
     ensureWorker();
-    return new Promise<SerializableTensor[]>((resolve, reject) => {
+    return new Promise<SerializableTensorMetadata[]>((resolve, reject) => {
       runCallbacks.push([resolve, reject]);
-      const message: OrtWasmMessage = {type: 'run', in : {sessionId, inputIndices, inputs, outputIndices, options}};
-      proxyWorker!.postMessage(message, core.extractTransferableBuffers(inputs));
+      const serializableInputs = inputs as SerializableTensorMetadata[];  // every input is on CPU.
+      const message: OrtWasmMessage =
+          {type: 'run', in : {sessionId, inputIndices, inputs: serializableInputs, outputIndices, options}};
+      proxyWorker!.postMessage(message, core.extractTransferableBuffers(serializableInputs));
     });
   } else {
-    return core.run(sessionId, inputIndices, inputs, outputIndices, options);
+    return core.run(sessionId, inputIndices, inputs, outputIndices, outputs, options);
   }
 };
 
@@ -228,3 +259,16 @@ export const endProfiling = async(sessionId: number): Promise<void> => {
     core.endProfiling(sessionId);
   }
 };
+
+export const isOrtEnvInitialized = async(): Promise<boolean> => {
+  if (!BUILD_DEFS.DISABLE_WASM_PROXY && isProxy()) {
+    ensureWorker();
+    return new Promise<boolean>((resolve, reject) => {
+      isOrtEnvInitializedCallbacks.push([resolve, reject]);
+      const message: OrtWasmMessage = {type: 'is-ort-env-initialized'};
+      proxyWorker!.postMessage(message);
+    });
+  } else {
+    return core.isOrtEnvInitialized();
+  }
+};
diff --git a/js/web/lib/wasm/session-handler.ts b/js/web/lib/wasm/session-handler-inference.ts
similarity index 55%
rename from js/web/lib/wasm/session-handler.ts
rename to js/web/lib/wasm/session-handler-inference.ts
index d8c5ae7886fe4..3ca34d957c572 100644
--- a/js/web/lib/wasm/session-handler.ts
+++ b/js/web/lib/wasm/session-handler-inference.ts
@@ -1,17 +1,44 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-import {readFile} from 'fs';
-import {env, InferenceSession, SessionHandler, Tensor} from 'onnxruntime-common';
-import {promisify} from 'util';
+import {readFile} from 'node:fs/promises';
+import {env, InferenceSession, InferenceSessionHandler, SessionHandler, Tensor} from 'onnxruntime-common';
 
-import {SerializableModeldata} from './proxy-messages';
-import {createSession, createSessionAllocate, createSessionFinalize, endProfiling, initializeRuntime, releaseSession, run} from './proxy-wrapper';
+import {SerializableModeldata, TensorMetadata} from './proxy-messages';
+import {createSession, createSessionAllocate, createSessionFinalize, endProfiling, initializeRuntime, isOrtEnvInitialized, releaseSession, run} from './proxy-wrapper';
+import {isGpuBufferSupportedType} from './wasm-common';
 
-let runtimeInitialized: boolean;
 let runtimeInitializationPromise: Promise<void>|undefined;
 
-export class OnnxruntimeWebAssemblySessionHandler implements SessionHandler {
+export const encodeTensorMetadata = (tensor: Tensor, getName: () => string): TensorMetadata => {
+  switch (tensor.location) {
+    case 'cpu':
+      return [tensor.type, tensor.dims, tensor.data, 'cpu'];
+    case 'gpu-buffer':
+      return [tensor.type, tensor.dims, {gpuBuffer: tensor.gpuBuffer}, 'gpu-buffer'];
+    default:
+      throw new Error(`invalid data location: ${tensor.location} for ${getName()}`);
+  }
+};
+
+export const decodeTensorMetadata = (tensor: TensorMetadata): Tensor => {
+  switch (tensor[3]) {
+    case 'cpu':
+      return new Tensor(tensor[0], tensor[2], tensor[1]);
+    case 'gpu-buffer': {
+      const dataType = tensor[0];
+      if (!isGpuBufferSupportedType(dataType)) {
+        throw new Error(`not supported data type: ${dataType} for deserializing GPU tensor`);
+      }
+      const {gpuBuffer, download, dispose} = tensor[2];
+      return Tensor.fromGpuBuffer(gpuBuffer, {dataType, dims: tensor[1], download, dispose});
+    }
+    default:
+      throw new Error(`invalid data location: ${tensor[3]}`);
+  }
+};
+
+export class OnnxruntimeWebAssemblySessionHandler implements InferenceSessionHandler {
   private sessionId: number;
 
   inputNames: string[];
@@ -29,19 +56,18 @@ export class OnnxruntimeWebAssemblySessionHandler implements SessionHandler {
   }
 
   async loadModel(pathOrBuffer: string|Uint8Array, options?: InferenceSession.SessionOptions): Promise<void> {
-    if (!runtimeInitialized) {
+    if (!(await isOrtEnvInitialized())) {
       if (!runtimeInitializationPromise) {
         runtimeInitializationPromise = initializeRuntime(env);
       }
       await runtimeInitializationPromise;
       runtimeInitializationPromise = undefined;
-      runtimeInitialized = true;
     }
 
     if (typeof pathOrBuffer === 'string') {
       if (typeof process !== 'undefined' && process.versions && process.versions.node) {
         // node
-        const model = await promisify(readFile)(pathOrBuffer);
+        const model = await readFile(pathOrBuffer);
         [this.sessionId, this.inputNames, this.outputNames] = await createSession(model, options);
       } else {
         // browser
@@ -74,25 +100,31 @@ export class OnnxruntimeWebAssemblySessionHandler implements SessionHandler {
       inputIndices.push(index);
     });
 
+    const outputArray: Array<Tensor|null> = [];
     const outputIndices: number[] = [];
     Object.entries(fetches).forEach(kvp => {
       const name = kvp[0];
-      // TODO: support pre-allocated output
+      const tensor = kvp[1];
       const index = this.outputNames.indexOf(name);
       if (index === -1) {
         throw new Error(`invalid output '${name}'`);
       }
+      outputArray.push(tensor);
       outputIndices.push(index);
     });
 
-    const outputs =
-        await run(this.sessionId, inputIndices, inputArray.map(t => [t.type, t.dims, t.data]), outputIndices, options);
+    const inputs =
+        inputArray.map((t, i) => encodeTensorMetadata(t, () => `input "${this.inputNames[inputIndices[i]]}"`));
+    const outputs = outputArray.map(
+        (t, i) => t ? encodeTensorMetadata(t, () => `output "${this.outputNames[outputIndices[i]]}"`) : null);
+
+    const results = await run(this.sessionId, inputIndices, inputs, outputIndices, outputs, options);
 
-    const result: SessionHandler.ReturnType = {};
-    for (let i = 0; i < outputs.length; i++) {
-      result[this.outputNames[outputIndices[i]]] = new Tensor(outputs[i][0], outputs[i][2], outputs[i][1]);
+    const resultMap: SessionHandler.ReturnType = {};
+    for (let i = 0; i < results.length; i++) {
+      resultMap[this.outputNames[outputIndices[i]]] = outputArray[i] ?? decodeTensorMetadata(results[i]);
     }
-    return result;
+    return resultMap;
   }
 
   startProfiling(): void {
diff --git a/js/web/lib/wasm/session-handler-training.ts b/js/web/lib/wasm/session-handler-training.ts
new file mode 100644
index 0000000000000..7de3f4dc2c89e
--- /dev/null
+++ b/js/web/lib/wasm/session-handler-training.ts
@@ -0,0 +1,137 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+import {env, InferenceSession, OnnxValue, SessionHandler, Tensor, TrainingSessionHandler} from 'onnxruntime-common';
+
+import {SerializableModeldata, TensorMetadata} from './proxy-messages';
+import {decodeTensorMetadata, encodeTensorMetadata} from './session-handler-inference';
+import {createSessionAllocate, initRuntime, isOrtEnvInitialized} from './wasm-core-impl';
+import {createCheckpointHandle, createTrainingSessionHandle, getContiguousParameters, getParametersSize, loadParametersBuffer, releaseTrainingSessionAndCheckpoint, runTrainStep} from './wasm-training-core-impl';
+
+export class OnnxruntimeWebAssemblyTrainingSessionHandler implements TrainingSessionHandler {
+  private sessionId: number;
+  private checkpointId: number;
+
+  inputNames: string[];
+  outputNames: string[];
+
+  inputEncodedNames: number[];
+  outputEncodedNames: number[];
+
+  async uriOrBufferToHeap(uriOrBuffer: string|Uint8Array): Promise<SerializableModeldata> {
+    let buffer: Uint8Array;
+    if (typeof uriOrBuffer === 'string') {
+      const response = await fetch(uriOrBuffer);
+      const arrayBuffer = await response.arrayBuffer();
+      buffer = new Uint8Array(arrayBuffer);
+    } else {
+      buffer = uriOrBuffer;
+    }
+    return createSessionAllocate(buffer);
+  }
+
+  async createTrainingSession(
+      checkpointStateUriOrBuffer: string|Uint8Array, trainModelUriOrBuffer: string|Uint8Array,
+      evalModelUriOrBuffer: string|Uint8Array, optimizerModelUriOrBuffer: string|Uint8Array,
+      options: InferenceSession.SessionOptions) {
+    if (!isOrtEnvInitialized()) {
+      await initRuntime(env);
+    }
+    const checkpointData: SerializableModeldata = await this.uriOrBufferToHeap(checkpointStateUriOrBuffer);
+    const trainModelData: SerializableModeldata = await this.uriOrBufferToHeap(trainModelUriOrBuffer);
+    // 0 is supposed to be the nullptr
+    let evalModelData: SerializableModeldata = [0, 0];
+    let optimizerModelData: SerializableModeldata = [0, 0];
+
+    if (evalModelUriOrBuffer !== '') {
+      evalModelData = await this.uriOrBufferToHeap(evalModelUriOrBuffer);
+    }
+    if (optimizerModelUriOrBuffer !== '') {
+      optimizerModelData = await this.uriOrBufferToHeap(optimizerModelUriOrBuffer);
+    }
+
+    this.checkpointId = createCheckpointHandle(checkpointData);
+    [[this.sessionId, this.inputNames, this.outputNames], this.inputEncodedNames, this.outputEncodedNames] =
+        createTrainingSessionHandle(this.checkpointId, trainModelData, evalModelData, optimizerModelData, options);
+  }
+
+  /**
+   * Helper method that converts a feeds or fetches datatype to two arrays, one of values and one that stores the
+   * corresponding name as a number referring to the index in the list of names provided.
+   *
+   * @param feeds meant to match either SessionHandler.FeedsType or SessionHandler.FetchesType
+   * @param names either inputNames or outputNames
+   * @returns a tuple of a list of values and a list of indices.
+   */
+  convertMapIntoValuesArrayAndIndicesArray<T, U>(
+      feeds: {[name: string]: T}, names: string[], mapFunc: (val: T, index: number) => U): [T[], number[], U[]] {
+    const values: T[] = [];
+    const indices: number[] = [];
+    Object.entries(feeds).forEach(kvp => {
+      const name = kvp[0];
+      const tensor = kvp[1];
+      const index = names.indexOf(name);
+      if (index === -1) {
+        throw new Error(`invalid input '${name}`);
+      }
+      values.push(tensor);
+      indices.push(index);
+    });
+
+    const uList = values.map(mapFunc);
+    return [values, indices, uList];
+  }
+
+  /**
+   * Helper method that converts the TensorMetadata that the wasm-core functions return to the
+   * SessionHandler.ReturnType. Any outputs in the provided outputArray that are falsy will be populated with the
+   * corresponding result.
+   *
+   * @param results used to populate the resultMap if there is no value for that outputName already
+   * @param outputArray used to populate the resultMap. If null or undefined, use the corresponding result from results
+   * @param outputIndices specifies which outputName the corresponding value for outputArray refers to.
+   * @returns a map of output names and OnnxValues.
+   */
+  convertTensorMetadataToReturnType(
+      results: TensorMetadata[], outputArray: Array<Tensor|null>, outputIndices: number[]): SessionHandler.ReturnType {
+    const resultMap: SessionHandler.ReturnType = {};
+    for (let i = 0; i < results.length; i++) {
+      resultMap[this.outputNames[outputIndices[i]]] = outputArray[i] ?? decodeTensorMetadata(results[i]);
+    }
+    return resultMap;
+  }
+
+  async runTrainStep(
+      feeds: SessionHandler.FeedsType, fetches: SessionHandler.FetchesType,
+      options: InferenceSession.RunOptions): Promise<SessionHandler.ReturnType> {
+    const [, inputIndices, inputs] = this.convertMapIntoValuesArrayAndIndicesArray<Tensor, TensorMetadata>(
+        feeds, this.inputNames,
+        (t, i): TensorMetadata => encodeTensorMetadata(t, () => `input "${this.inputNames[inputIndices[i]]}"`));
+
+    const [outputArray, outputIndices, outputs] =
+        this.convertMapIntoValuesArrayAndIndicesArray<Tensor|null, TensorMetadata|null>(
+            fetches, this.outputNames,
+            (t, i): TensorMetadata|null =>
+                t ? encodeTensorMetadata(t, () => `output "${this.outputNames[outputIndices[i]]}"`) : null);
+
+    const results = await runTrainStep(this.sessionId, inputIndices, inputs, outputIndices, outputs, options);
+    return this.convertTensorMetadataToReturnType(results, outputArray, outputIndices);
+  }
+
+  async getParametersSize(trainableOnly: boolean): Promise<number> {
+    return getParametersSize(this.sessionId, trainableOnly);
+  }
+
+  async loadParametersBuffer(array: Uint8Array, trainableOnly: boolean): Promise<void> {
+    await loadParametersBuffer(this.sessionId, array, trainableOnly);
+  }
+  async getContiguousParameters(trainableOnly: boolean): Promise<OnnxValue> {
+    const tensorResult = await getContiguousParameters(this.sessionId, trainableOnly);
+    return decodeTensorMetadata(tensorResult);
+  }
+
+  async dispose(): Promise<void> {
+    return releaseTrainingSessionAndCheckpoint(
+        this.checkpointId, this.sessionId, this.inputEncodedNames, this.outputEncodedNames);
+  }
+}
diff --git a/js/web/lib/wasm/session-options.ts b/js/web/lib/wasm/session-options.ts
index 2659b471733f5..45ea48a2df209 100644
--- a/js/web/lib/wasm/session-options.ts
+++ b/js/web/lib/wasm/session-options.ts
@@ -75,6 +75,19 @@ const setExecutionProviders =
                   checkLastError(`Can't set a session config entry: 'deviceType' - ${webnnOptions.deviceType}.`);
                 }
               }
+              if (webnnOptions?.numThreads) {
+                let numThreads = webnnOptions.numThreads;
+                // Just ignore invalid webnnOptions.numThreads.
+                if (typeof numThreads != 'number' || !Number.isInteger(numThreads) || numThreads < 0) {
+                  numThreads = 0;
+                }
+                const keyDataOffset = allocWasmString('numThreads', allocs);
+                const valueDataOffset = allocWasmString(numThreads.toString(), allocs);
+                if (getInstance()._OrtAddSessionConfigEntry(sessionOptionsHandle, keyDataOffset, valueDataOffset) !==
+                    0) {
+                  checkLastError(`Can't set a session config entry: 'numThreads' - ${webnnOptions.numThreads}.`);
+                }
+              }
               if (webnnOptions?.powerPreference) {
                 const keyDataOffset = allocWasmString('powerPreference', allocs);
                 const valueDataOffset = allocWasmString(webnnOptions.powerPreference, allocs);
@@ -88,6 +101,21 @@ const setExecutionProviders =
             break;
           case 'webgpu':
             epName = 'JS';
+            if (typeof ep !== 'string') {
+              const webgpuOptions = ep as InferenceSession.WebGpuExecutionProviderOption;
+              if (webgpuOptions?.preferredLayout) {
+                if (webgpuOptions.preferredLayout !== 'NCHW' && webgpuOptions.preferredLayout !== 'NHWC') {
+                  throw new Error(`preferredLayout must be either 'NCHW' or 'NHWC': ${webgpuOptions.preferredLayout}`);
+                }
+                const keyDataOffset = allocWasmString('preferredLayout', allocs);
+                const valueDataOffset = allocWasmString(webgpuOptions.preferredLayout, allocs);
+                if (getInstance()._OrtAddSessionConfigEntry(sessionOptionsHandle, keyDataOffset, valueDataOffset) !==
+                    0) {
+                  checkLastError(
+                      `Can't set a session config entry: 'preferredLayout' - ${webgpuOptions.preferredLayout}.`);
+                }
+              }
+            }
             break;
           case 'wasm':
           case 'cpu':
diff --git a/js/web/lib/wasm/wasm-common.ts b/js/web/lib/wasm/wasm-common.ts
index 389773f3e8884..b9eff45e890c4 100644
--- a/js/web/lib/wasm/wasm-common.ts
+++ b/js/web/lib/wasm/wasm-common.ts
@@ -164,3 +164,35 @@ export const logLevelStringToEnum = (logLevel?: 'verbose'|'info'|'warning'|'erro
       throw new Error(`unsupported logging level: ${logLevel}`);
   }
 };
+
+/**
+ * Check whether the given tensor type is supported by GPU buffer
+ */
+export const isGpuBufferSupportedType = (type: Tensor.Type): type is Tensor.GpuBufferDataTypes => type === 'float32' ||
+    type === 'int32' || type === 'int64' || type === 'bool' || type === 'float16' || type === 'uint32';
+
+/**
+ * Map string data location to integer value
+ */
+export const dataLocationStringToEnum = (location: Tensor.DataLocation): number => {
+  switch (location) {
+    case 'none':
+      return 0;
+    case 'cpu':
+      return 1;
+    case 'cpu-pinned':
+      return 2;
+    case 'texture':
+      return 3;
+    case 'gpu-buffer':
+      return 4;
+    default:
+      throw new Error(`unsupported data location: ${location}`);
+  }
+};
+
+/**
+ * Map integer data location to string value
+ */
+export const dataLocationEnumToString = (location: number): Tensor.DataLocation|undefined =>
+    (['none', 'cpu', 'cpu-pinned', 'texture', 'gpu-buffer'] as const)[location];
diff --git a/js/web/lib/wasm/wasm-core-impl.ts b/js/web/lib/wasm/wasm-core-impl.ts
index fcca82ab2aa54..3aacf8f4d90e0 100644
--- a/js/web/lib/wasm/wasm-core-impl.ts
+++ b/js/web/lib/wasm/wasm-core-impl.ts
@@ -3,13 +3,15 @@
 
 import {Env, InferenceSession, Tensor} from 'onnxruntime-common';
 
-import {SerializableModeldata, SerializableSessionMetadata, SerializableTensor} from './proxy-messages';
+import {SerializableModeldata, SerializableSessionMetadata, SerializableTensorMetadata, TensorMetadata} from './proxy-messages';
 import {setRunOptions} from './run-options';
 import {setSessionOptions} from './session-options';
-import {logLevelStringToEnum, tensorDataTypeEnumToString, tensorDataTypeStringToEnum, tensorTypeToTypedArrayConstructor} from './wasm-common';
+import {dataLocationStringToEnum, getTensorElementSize, isGpuBufferSupportedType, logLevelStringToEnum, tensorDataTypeEnumToString, tensorDataTypeStringToEnum, tensorTypeToTypedArrayConstructor} from './wasm-common';
 import {getInstance} from './wasm-factory';
 import {allocWasmString, checkLastError} from './wasm-utils';
 
+let ortEnvInitialized = false;
+
 /**
  * get the input/output count of the session.
  * @param sessionHandle the handle representing the session. should be non-zero.
@@ -57,15 +59,46 @@ export const initRuntime = async(env: Env): Promise<void> => {
     const initJsep = require('./jsep/init').init;
     await initJsep(getInstance(), env);
   }
+
+  ortEnvInitialized = true;
+};
+
+/**
+ * valid data locations for input/output tensors.
+ */
+type SupportedTensorDataLocationForInputOutput = 'cpu'|'cpu-pinned'|'gpu-buffer';
+
+type IOBindingState = {
+  /**
+   * the handle of IO binding.
+   */
+  readonly handle: number;
+
+  /**
+   * the preferred location for each output tensor.
+   *
+   * value is one of 'cpu', 'cpu-pinned', 'gpu-buffer'.
+   */
+  readonly outputPreferredLocations: readonly SupportedTensorDataLocationForInputOutput[];
+
+  /**
+   * enum value of the preferred location for each output tensor.
+   */
+  readonly outputPreferredLocationsEncoded: readonly number[];
 };
 
 /**
- *  tuple elements are: InferenceSession ID; inputNamesUTF8Encoded; outputNamesUTF8Encoded
+ *  tuple elements are: InferenceSession ID; inputNamesUTF8Encoded; outputNamesUTF8Encoded; bindingState
  */
-type SessionMetadata = [number, number[], number[]];
+type SessionMetadata = [
+  inferenceSessionId: number, inputNamesUTF8Encoded: number[], outputNamesUTF8Encoded: number[],
+  bindingState: IOBindingState|null
+];
 
 const activeSessions = new Map<number, SessionMetadata>();
 
+export const isOrtEnvInitialized = (): boolean => ortEnvInitialized;
+
 /**
  * allocate the memory and memcpy the model bytes, preparing for creating an instance of InferenceSession.
  * @returns a 2-elements tuple - the pointer and size of the allocated buffer
@@ -92,6 +125,7 @@ export const createSessionFinalize =
 
       let sessionHandle = 0;
       let sessionOptionsHandle = 0;
+      let ioBindingHandle = 0;
       let allocs: number[] = [];
       const inputNamesUTF8Encoded = [];
       const outputNamesUTF8Encoded = [];
@@ -108,6 +142,7 @@ export const createSessionFinalize =
 
         const inputNames = [];
         const outputNames = [];
+        const outputPreferredLocations: SupportedTensorDataLocationForInputOutput[] = [];
         for (let i = 0; i < inputCount; i++) {
           const name = wasm._OrtGetInputName(sessionHandle, i);
           if (name === 0) {
@@ -122,15 +157,45 @@ export const createSessionFinalize =
             checkLastError('Can\'t get an output name.');
           }
           outputNamesUTF8Encoded.push(name);
-          outputNames.push(wasm.UTF8ToString(name));
+          const nameString = wasm.UTF8ToString(name);
+          outputNames.push(nameString);
+
+          if (!BUILD_DEFS.DISABLE_WEBGPU) {
+            const location = typeof options?.preferredOutputLocation === 'string' ?
+                options.preferredOutputLocation :
+                options?.preferredOutputLocation?.[nameString] ?? 'cpu';
+            if (location !== 'cpu' && location !== 'cpu-pinned' && location !== 'gpu-buffer') {
+              throw new Error(`Not supported preferred output location: ${location}.`);
+            }
+            outputPreferredLocations.push(location);
+          }
         }
 
-        activeSessions.set(sessionHandle, [sessionHandle, inputNamesUTF8Encoded, outputNamesUTF8Encoded]);
+        // use IO binding only when at least one output is preffered to be on GPU.
+        let bindingState: IOBindingState|null = null;
+        if (!BUILD_DEFS.DISABLE_WEBGPU && outputPreferredLocations.some(l => l === 'gpu-buffer')) {
+          ioBindingHandle = wasm._OrtCreateBinding(sessionHandle);
+          if (ioBindingHandle === 0) {
+            checkLastError('Can\'t create IO binding.');
+          }
+
+          bindingState = {
+            handle: ioBindingHandle,
+            outputPreferredLocations,
+            outputPreferredLocationsEncoded: outputPreferredLocations.map(l => dataLocationStringToEnum(l)),
+          };
+        }
+
+        activeSessions.set(sessionHandle, [sessionHandle, inputNamesUTF8Encoded, outputNamesUTF8Encoded, bindingState]);
         return [sessionHandle, inputNames, outputNames];
       } catch (e) {
         inputNamesUTF8Encoded.forEach(buf => wasm._OrtFree(buf));
         outputNamesUTF8Encoded.forEach(buf => wasm._OrtFree(buf));
 
+        if (ioBindingHandle !== 0) {
+          wasm._OrtReleaseBinding(ioBindingHandle);
+        }
+
         if (sessionHandle !== 0) {
           wasm._OrtReleaseSession(sessionHandle);
         }
@@ -161,7 +226,13 @@ export const releaseSession = (sessionId: number): void => {
   if (!session) {
     throw new Error(`cannot release session. invalid session id: ${sessionId}`);
   }
-  const [sessionHandle, inputNamesUTF8Encoded, outputNamesUTF8Encoded] = session;
+  const [sessionHandle, inputNamesUTF8Encoded, outputNamesUTF8Encoded, ioBindingState] = session;
+
+  if (ioBindingState) {
+    wasm._OrtReleaseBinding(ioBindingState.handle);
+  }
+
+  wasm.jsepUnregisterBuffers?.(sessionId);
 
   inputNamesUTF8Encoded.forEach(buf => wasm._OrtFree(buf));
   outputNamesUTF8Encoded.forEach(buf => wasm._OrtFree(buf));
@@ -169,18 +240,84 @@ export const releaseSession = (sessionId: number): void => {
   activeSessions.delete(sessionId);
 };
 
+export const prepareInputOutputTensor =
+    (tensor: TensorMetadata|null, tensorHandles: number[], allocs: number[], sessionId: number, index: number):
+        void => {
+          if (!tensor) {
+            tensorHandles.push(0);
+            return;
+          }
+
+          const wasm = getInstance();
+
+          const dataType = tensor[0];
+          const dims = tensor[1];
+          const location = tensor[3];
+
+          let rawData: number;
+          let dataByteLength: number;
+
+          if (dataType === 'string' && location === 'gpu-buffer') {
+            throw new Error('String tensor is not supported on GPU.');
+          }
+
+          if (location === 'gpu-buffer') {
+            const gpuBuffer = tensor[2].gpuBuffer as GPUBuffer;
+            const elementSizeInBytes = getTensorElementSize(tensorDataTypeStringToEnum(dataType))!;
+            dataByteLength = dims.reduce((a, b) => a * b, 1) * elementSizeInBytes;
+            rawData = wasm.jsepRegisterBuffer(sessionId, index, gpuBuffer, dataByteLength);
+          } else {
+            const data = tensor[2];
+
+            if (Array.isArray(data)) {
+              // string tensor
+              dataByteLength = 4 * data.length;
+              rawData = wasm._malloc(dataByteLength);
+              allocs.push(rawData);
+              let dataIndex = rawData / 4;
+              for (let i = 0; i < data.length; i++) {
+                if (typeof data[i] !== 'string') {
+                  throw new TypeError(`tensor data at index ${i} is not a string`);
+                }
+                wasm.HEAPU32[dataIndex++] = allocWasmString(data[i], allocs);
+              }
+            } else {
+              dataByteLength = data.byteLength;
+              rawData = wasm._malloc(dataByteLength);
+              allocs.push(rawData);
+              wasm.HEAPU8.set(new Uint8Array(data.buffer, data.byteOffset, dataByteLength), rawData);
+            }
+          }
+
+          const stack = wasm.stackSave();
+          const dimsOffset = wasm.stackAlloc(4 * dims.length);
+          try {
+            let dimIndex = dimsOffset / 4;
+            dims.forEach(d => wasm.HEAP32[dimIndex++] = d);
+            const tensor = wasm._OrtCreateTensor(
+                tensorDataTypeStringToEnum(dataType), rawData, dataByteLength, dimsOffset, dims.length,
+                dataLocationStringToEnum(location));
+            if (tensor === 0) {
+              checkLastError(`Can't create tensor for input/output. session=${sessionId}, index=${index}.`);
+            }
+            tensorHandles.push(tensor);
+          } finally {
+            wasm.stackRestore(stack);
+          }
+        };
+
 /**
  * perform inference run
  */
 export const run = async(
-    sessionId: number, inputIndices: number[], inputs: SerializableTensor[], outputIndices: number[],
-    options: InferenceSession.RunOptions): Promise<SerializableTensor[]> => {
+    sessionId: number, inputIndices: number[], inputTensors: TensorMetadata[], outputIndices: number[],
+    outputTensors: Array<TensorMetadata|null>, options: InferenceSession.RunOptions): Promise<TensorMetadata[]> => {
   const wasm = getInstance();
   const session = activeSessions.get(sessionId);
   if (!session) {
     throw new Error(`cannot run inference. invalid session id: ${sessionId}`);
   }
-  const [sessionHandle, inputNamesUTF8Encoded, outputNamesUTF8Encoded] = session;
+  const [sessionHandle, inputNamesUTF8Encoded, outputNamesUTF8Encoded, ioBindingState] = session;
 
   const inputCount = inputIndices.length;
   const outputCount = outputIndices.length;
@@ -188,171 +325,200 @@ export const run = async(
   let runOptionsHandle = 0;
   let runOptionsAllocs: number[] = [];
 
-  const inputValues: number[] = [];
-  const inputAllocs: number[] = [];
+  const inputTensorHandles: number[] = [];
+  const outputTensorHandles: number[] = [];
+  const inputOutputAllocs: number[] = [];
+
+  const beforeRunStack = wasm.stackSave();
+  const inputValuesOffset = wasm.stackAlloc(inputCount * 4);
+  const inputNamesOffset = wasm.stackAlloc(inputCount * 4);
+  const outputValuesOffset = wasm.stackAlloc(outputCount * 4);
+  const outputNamesOffset = wasm.stackAlloc(outputCount * 4);
 
   try {
     [runOptionsHandle, runOptionsAllocs] = setRunOptions(options);
 
     // create input tensors
     for (let i = 0; i < inputCount; i++) {
-      const dataType = inputs[i][0];
-      const dims = inputs[i][1];
-      const data = inputs[i][2];
-
-      let dataOffset: number;
-      let dataByteLength: number;
-
-      if (Array.isArray(data)) {
-        // string tensor
-        dataByteLength = 4 * data.length;
-        dataOffset = wasm._malloc(dataByteLength);
-        inputAllocs.push(dataOffset);
-        let dataIndex = dataOffset / 4;
-        for (let i = 0; i < data.length; i++) {
-          if (typeof data[i] !== 'string') {
-            throw new TypeError(`tensor data at index ${i} is not a string`);
-          }
-          wasm.HEAPU32[dataIndex++] = allocWasmString(data[i], inputAllocs);
-        }
-      } else {
-        dataByteLength = data.byteLength;
-        dataOffset = wasm._malloc(dataByteLength);
-        inputAllocs.push(dataOffset);
-        wasm.HEAPU8.set(new Uint8Array(data.buffer, data.byteOffset, dataByteLength), dataOffset);
-      }
+      prepareInputOutputTensor(inputTensors[i], inputTensorHandles, inputOutputAllocs, sessionId, inputIndices[i]);
+    }
 
-      const stack = wasm.stackSave();
-      const dimsOffset = wasm.stackAlloc(4 * dims.length);
-      try {
-        let dimIndex = dimsOffset / 4;
-        dims.forEach(d => wasm.HEAP32[dimIndex++] = d);
-        const tensor = wasm._OrtCreateTensor(
-            tensorDataTypeStringToEnum(dataType), dataOffset, dataByteLength, dimsOffset, dims.length);
-        if (tensor === 0) {
-          checkLastError(`Can't create tensor for input[${i}].`);
-        }
-        inputValues.push(tensor);
-      } finally {
-        wasm.stackRestore(stack);
-      }
+    // create output tensors
+    for (let i = 0; i < outputCount; i++) {
+      prepareInputOutputTensor(
+          outputTensors[i], outputTensorHandles, inputOutputAllocs, sessionId, inputCount + outputIndices[i]);
+    }
+
+    let inputValuesIndex = inputValuesOffset / 4;
+    let inputNamesIndex = inputNamesOffset / 4;
+    let outputValuesIndex = outputValuesOffset / 4;
+    let outputNamesIndex = outputNamesOffset / 4;
+    for (let i = 0; i < inputCount; i++) {
+      wasm.HEAPU32[inputValuesIndex++] = inputTensorHandles[i];
+      wasm.HEAPU32[inputNamesIndex++] = inputNamesUTF8Encoded[inputIndices[i]];
+    }
+    for (let i = 0; i < outputCount; i++) {
+      wasm.HEAPU32[outputValuesIndex++] = outputTensorHandles[i];
+      wasm.HEAPU32[outputNamesIndex++] = outputNamesUTF8Encoded[outputIndices[i]];
     }
 
-    const beforeRunStack = wasm.stackSave();
-    const inputValuesOffset = wasm.stackAlloc(inputCount * 4);
-    const inputNamesOffset = wasm.stackAlloc(inputCount * 4);
-    const outputValuesOffset = wasm.stackAlloc(outputCount * 4);
-    const outputNamesOffset = wasm.stackAlloc(outputCount * 4);
-
-    try {
-      let inputValuesIndex = inputValuesOffset / 4;
-      let inputNamesIndex = inputNamesOffset / 4;
-      let outputValuesIndex = outputValuesOffset / 4;
-      let outputNamesIndex = outputNamesOffset / 4;
+    if (!BUILD_DEFS.DISABLE_WEBGPU && ioBindingState) {
+      const {handle, outputPreferredLocations, outputPreferredLocationsEncoded} = ioBindingState;
+
+      if (inputNamesUTF8Encoded.length !== inputCount) {
+        throw new Error(`input count from feeds (${
+            inputCount}) is expected to be always equal to model's input count (${inputNamesUTF8Encoded.length}).`);
+      }
+
+      // process inputs
       for (let i = 0; i < inputCount; i++) {
-        wasm.HEAPU32[inputValuesIndex++] = inputValues[i];
-        wasm.HEAPU32[inputNamesIndex++] = inputNamesUTF8Encoded[inputIndices[i]];
+        const index = inputIndices[i];
+        const errorCode = await wasm._OrtBindInput(handle, inputNamesUTF8Encoded[index], inputTensorHandles[i]);
+        if (errorCode !== 0) {
+          checkLastError(`Can't bind input[${i}] for session=${sessionId}.`);
+        }
       }
+
+      // process pre-allocated outputs
       for (let i = 0; i < outputCount; i++) {
-        wasm.HEAPU32[outputValuesIndex++] = 0;
-        wasm.HEAPU32[outputNamesIndex++] = outputNamesUTF8Encoded[outputIndices[i]];
+        const index = outputIndices[i];
+        const location = outputTensors[i]?.[3];  // undefined means output is not pre-allocated.
+
+        if (location) {
+          // output is pre-allocated. bind the tensor.
+          const errorCode = wasm._OrtBindOutput(handle, outputNamesUTF8Encoded[index], outputTensorHandles[i], 0);
+          if (errorCode !== 0) {
+            checkLastError(`Can't bind pre-allocated output[${i}] for session=${sessionId}.`);
+          }
+        } else {
+          // output is not pre-allocated. reset preferred location.
+          const errorCode =
+              wasm._OrtBindOutput(handle, outputNamesUTF8Encoded[index], 0, outputPreferredLocationsEncoded[index]);
+          if (errorCode !== 0) {
+            checkLastError(`Can't bind output[${i}] to ${outputPreferredLocations[i]} for session=${sessionId}.`);
+          }
+        }
       }
+    }
 
-      // jsepOnRunStart is only available when JSEP is enabled.
-      wasm.jsepOnRunStart?.(sessionId);
+    let errorCode: number;
 
-      // support RunOptions
-      let errorCode = wasm._OrtRun(
+    if (!BUILD_DEFS.DISABLE_WEBGPU && ioBindingState) {
+      errorCode = await wasm._OrtRunWithBinding(
+          sessionHandle, ioBindingState.handle, outputCount, outputValuesOffset, runOptionsHandle);
+    } else {
+      errorCode = await wasm._OrtRun(
           sessionHandle, inputNamesOffset, inputValuesOffset, inputCount, outputNamesOffset, outputCount,
           outputValuesOffset, runOptionsHandle);
+    }
 
-      const runPromise = wasm.jsepRunPromise;
-      if (runPromise) {
-        // jsepRunPromise is a Promise object. It is only available when JSEP is enabled.
-        //
-        // OrtRun() is a synchrnous call, but it internally calls async functions. Emscripten's ASYNCIFY allows it to
-        // work in this way. However, OrtRun() does not return a promise, so when code reaches here, it is earlier than
-        // the async functions are finished.
-        //
-        // To make it work, we created a Promise and resolve the promise when the C++ code actually reaches the end of
-        // OrtRun(). If the promise exists, we need to await for the promise to be resolved.
-        errorCode = await runPromise;
-      }
+    if (errorCode !== 0) {
+      checkLastError('failed to call OrtRun().');
+    }
+
+    const output: TensorMetadata[] = [];
 
-      const jsepOnRunEnd = wasm.jsepOnRunEnd;
-      if (jsepOnRunEnd) {
-        // jsepOnRunEnd is only available when JSEP is enabled.
-        //
-        // it returns a promise, which is resolved or rejected when the following async functions are finished:
-        // - collecting GPU validation errors.
-        await jsepOnRunEnd(sessionId);
+    for (let i = 0; i < outputCount; i++) {
+      const tensor = wasm.HEAPU32[outputValuesOffset / 4 + i];
+      if (tensor === outputTensorHandles[i]) {
+        // output tensor is pre-allocated. no need to copy data.
+        output.push(outputTensors[i]!);
+        continue;
       }
 
-      const output: SerializableTensor[] = [];
+      const beforeGetTensorDataStack = wasm.stackSave();
+      // stack allocate 4 pointer value
+      const tensorDataOffset = wasm.stackAlloc(4 * 4);
 
-      if (errorCode !== 0) {
-        checkLastError('failed to call OrtRun().');
-      }
+      let keepOutputTensor = false;
+      let type: Tensor.Type|undefined, dataOffset = 0;
+      try {
+        const errorCode = wasm._OrtGetTensorData(
+            tensor, tensorDataOffset, tensorDataOffset + 4, tensorDataOffset + 8, tensorDataOffset + 12);
+        if (errorCode !== 0) {
+          checkLastError(`Can't access output tensor data on index ${i}.`);
+        }
+        let tensorDataIndex = tensorDataOffset / 4;
+        const dataType = wasm.HEAPU32[tensorDataIndex++];
+        dataOffset = wasm.HEAPU32[tensorDataIndex++];
+        const dimsOffset = wasm.HEAPU32[tensorDataIndex++];
+        const dimsLength = wasm.HEAPU32[tensorDataIndex++];
+        const dims = [];
+        for (let i = 0; i < dimsLength; i++) {
+          dims.push(wasm.HEAPU32[dimsOffset / 4 + i]);
+        }
+        wasm._OrtFree(dimsOffset);
 
-      for (let i = 0; i < outputCount; i++) {
-        const tensor = wasm.HEAPU32[outputValuesOffset / 4 + i];
+        const size = dims.reduce((a, b) => a * b, 1);
+        type = tensorDataTypeEnumToString(dataType);
 
-        const beforeGetTensorDataStack = wasm.stackSave();
-        // stack allocate 4 pointer value
-        const tensorDataOffset = wasm.stackAlloc(4 * 4);
+        const preferredLocation = ioBindingState?.outputPreferredLocations[outputIndices[i]];
 
-        let type: Tensor.Type|undefined, dataOffset = 0;
-        try {
-          errorCode = wasm._OrtGetTensorData(
-              tensor, tensorDataOffset, tensorDataOffset + 4, tensorDataOffset + 8, tensorDataOffset + 12);
-          if (errorCode !== 0) {
-            checkLastError(`Can't access output tensor data on index ${i}.`);
+        if (type === 'string') {
+          if (preferredLocation === 'gpu-buffer') {
+            throw new Error('String tensor is not supported on GPU.');
           }
-          let tensorDataIndex = tensorDataOffset / 4;
-          const dataType = wasm.HEAPU32[tensorDataIndex++];
-          dataOffset = wasm.HEAPU32[tensorDataIndex++];
-          const dimsOffset = wasm.HEAPU32[tensorDataIndex++];
-          const dimsLength = wasm.HEAPU32[tensorDataIndex++];
-          const dims = [];
-          for (let i = 0; i < dimsLength; i++) {
-            dims.push(wasm.HEAPU32[dimsOffset / 4 + i]);
+          const stringData: string[] = [];
+          let dataIndex = dataOffset / 4;
+          for (let i = 0; i < size; i++) {
+            const offset = wasm.HEAPU32[dataIndex++];
+            const maxBytesToRead = i === size - 1 ? undefined : wasm.HEAPU32[dataIndex] - offset;
+            stringData.push(wasm.UTF8ToString(offset, maxBytesToRead));
           }
-          wasm._OrtFree(dimsOffset);
-
-          const size = dims.length === 0 ? 1 : dims.reduce((a, b) => a * b);
-          type = tensorDataTypeEnumToString(dataType);
-          if (type === 'string') {
-            const stringData: string[] = [];
-            let dataIndex = dataOffset / 4;
-            for (let i = 0; i < size; i++) {
-              const offset = wasm.HEAPU32[dataIndex++];
-              const maxBytesToRead = i === size - 1 ? undefined : wasm.HEAPU32[dataIndex] - offset;
-              stringData.push(wasm.UTF8ToString(offset, maxBytesToRead));
+          output.push([type, dims, stringData, 'cpu']);
+        } else {
+          // If a certain output's preferred location is GPU but the tensor is empty, we still need to create a CPU
+          // tensor for it. There is no mapping GPU buffer for an empty tensor.
+          if (preferredLocation === 'gpu-buffer' && size > 0) {
+            const gpuBuffer = wasm.jsepGetBuffer(dataOffset);
+            const elementSize = getTensorElementSize(dataType);
+            if (elementSize === undefined || !isGpuBufferSupportedType(type)) {
+              throw new Error(`Unsupported data type: ${type}`);
             }
-            output.push([type, dims, stringData]);
+
+            // do not release the tensor right now. it will be released when user calls tensor.dispose().
+            keepOutputTensor = true;
+
+            output.push([
+              type, dims, {
+                gpuBuffer,
+                download: wasm.jsepCreateDownloader(gpuBuffer, size * elementSize, type),
+                dispose: () => {
+                  wasm._OrtReleaseTensor(tensor);
+                }
+              },
+              'gpu-buffer'
+            ]);
           } else {
             const typedArrayConstructor = tensorTypeToTypedArrayConstructor(type);
             const data = new typedArrayConstructor(size);
             new Uint8Array(data.buffer, data.byteOffset, data.byteLength)
                 .set(wasm.HEAPU8.subarray(dataOffset, dataOffset + data.byteLength));
-            output.push([type, dims, data]);
-          }
-        } finally {
-          wasm.stackRestore(beforeGetTensorDataStack);
-          if (type === 'string' && dataOffset) {
-            wasm._free(dataOffset);
+            output.push([type, dims, data, 'cpu']);
           }
+        }
+      } finally {
+        wasm.stackRestore(beforeGetTensorDataStack);
+        if (type === 'string' && dataOffset) {
+          wasm._free(dataOffset);
+        }
+        if (!keepOutputTensor) {
           wasm._OrtReleaseTensor(tensor);
         }
       }
+    }
 
-      return output;
-    } finally {
-      wasm.stackRestore(beforeRunStack);
+    if (ioBindingState) {
+      wasm._OrtClearBoundOutputs(ioBindingState.handle);
     }
+
+    return output;
   } finally {
-    inputValues.forEach(v => wasm._OrtReleaseTensor(v));
-    inputAllocs.forEach(p => wasm._free(p));
+    wasm.stackRestore(beforeRunStack);
+
+    inputTensorHandles.forEach(v => wasm._OrtReleaseTensor(v));
+    outputTensorHandles.forEach(v => wasm._OrtReleaseTensor(v));
+    inputOutputAllocs.forEach(p => wasm._free(p));
 
     if (runOptionsHandle !== 0) {
       wasm._OrtReleaseRunOptions(runOptionsHandle);
@@ -380,11 +546,11 @@ export const endProfiling = (sessionId: number): void => {
   wasm._OrtFree(profileFileName);
 };
 
-export const extractTransferableBuffers = (tensors: readonly SerializableTensor[]): ArrayBufferLike[] => {
+export const extractTransferableBuffers = (tensors: readonly SerializableTensorMetadata[]): ArrayBufferLike[] => {
   const buffers: ArrayBufferLike[] = [];
   for (const tensor of tensors) {
     const data = tensor[2];
-    if (!Array.isArray(data) && data.buffer) {
+    if (!Array.isArray(data) && 'buffer' in data) {
       buffers.push(data.buffer);
     }
   }
diff --git a/js/web/lib/wasm/wasm-factory.ts b/js/web/lib/wasm/wasm-factory.ts
index 7648f0c473f07..2b7d492cc70ba 100644
--- a/js/web/lib/wasm/wasm-factory.ts
+++ b/js/web/lib/wasm/wasm-factory.ts
@@ -1,15 +1,21 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+import * as path from 'node:path';
 import {Env} from 'onnxruntime-common';
-import * as path from 'path';
 
 import {OrtWasmModule} from './binding/ort-wasm';
 import {OrtWasmThreadedModule} from './binding/ort-wasm-threaded';
 
 /* eslint-disable @typescript-eslint/no-require-imports */
-const ortWasmFactory: EmscriptenModuleFactory<OrtWasmModule> =
-    BUILD_DEFS.DISABLE_WEBGPU ? require('./binding/ort-wasm.js') : require('./binding/ort-wasm-simd.jsep.js');
+let ortWasmFactory: EmscriptenModuleFactory<OrtWasmModule>;
+
+if (!BUILD_DEFS.DISABLE_TRAINING) {
+  ortWasmFactory = require('./binding/ort-training-wasm-simd.js');
+} else {
+  ortWasmFactory =
+      BUILD_DEFS.DISABLE_WEBGPU ? require('./binding/ort-wasm.js') : require('./binding/ort-wasm-simd.jsep.js');
+}
 
 const ortWasmFactoryThreaded: EmscriptenModuleFactory<OrtWasmModule> = !BUILD_DEFS.DISABLE_WASM_THREAD ?
     (BUILD_DEFS.DISABLE_WEBGPU ? require('./binding/ort-wasm-threaded.js') :
@@ -72,10 +78,13 @@ const isSimdSupported = (): boolean => {
 };
 
 const getWasmFileName = (useSimd: boolean, useThreads: boolean) => {
-  if (useThreads) {
-    return useSimd ? 'ort-wasm-simd-threaded.wasm' : 'ort-wasm-threaded.wasm';
+  if (useSimd) {
+    if (!BUILD_DEFS.DISABLE_TRAINING) {
+      return 'ort-training-wasm-simd.wasm';
+    }
+    return useThreads ? 'ort-wasm-simd-threaded.wasm' : 'ort-wasm-simd.wasm';
   } else {
-    return useSimd ? 'ort-wasm-simd.wasm' : 'ort-wasm.wasm';
+    return useThreads ? 'ort-wasm-threaded.wasm' : 'ort-wasm.wasm';
   }
 };
 
@@ -128,7 +137,7 @@ export const initializeWebAssembly = async(flags: Env.WebAssemblyFlags): Promise
             typeof Blob !== 'undefined') {
           return URL.createObjectURL(new Blob(
               [
-                // This require() function is handled by webpack to load file content of the corresponding .worker.js
+                // This require() function is handled by esbuild plugin to load file content as string.
                 // eslint-disable-next-line @typescript-eslint/no-require-imports
                 require('./binding/ort-wasm-threaded.worker.js')
               ],
@@ -161,7 +170,7 @@ export const initializeWebAssembly = async(flags: Env.WebAssemblyFlags): Promise
       if (typeof Blob === 'undefined') {
         config.mainScriptUrlOrBlob = path.join(__dirname, 'ort-wasm-threaded.js');
       } else {
-        const scriptSourceCode = `var ortWasmThreaded=(function(){var _scriptDir;return ${factory.toString()}})();`;
+        const scriptSourceCode = `var ortWasmThreaded=${factory.toString()};`;
         config.mainScriptUrlOrBlob = new Blob([scriptSourceCode], {type: 'text/javascript'});
       }
     }
diff --git a/js/web/lib/wasm/wasm-training-core-impl.ts b/js/web/lib/wasm/wasm-training-core-impl.ts
new file mode 100644
index 0000000000000..c0a4235113148
--- /dev/null
+++ b/js/web/lib/wasm/wasm-training-core-impl.ts
@@ -0,0 +1,455 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+import {InferenceSession, Tensor} from 'onnxruntime-common';
+
+import {SerializableModeldata, SerializableSessionMetadata, TensorMetadata} from './proxy-messages';
+import {setRunOptions} from './run-options';
+import {setSessionOptions} from './session-options';
+import {dataLocationStringToEnum, tensorDataTypeEnumToString, tensorDataTypeStringToEnum, tensorTypeToTypedArrayConstructor} from './wasm-common';
+import {prepareInputOutputTensor} from './wasm-core-impl';
+import {getInstance} from './wasm-factory';
+import {checkLastError} from './wasm-utils';
+
+const NO_TRAIN_FUNCS_MSG =
+    'Built without training API\'s enabled. Use the onnxruntime-web/training import for training ' +
+    'functionality, and make sure that all the correct artifacts are built & moved to the correct folder if ' +
+    'using a custom build. Check https://onnxruntime.ai/docs/build/web.html for more information.';
+
+/**
+ * Runs the checkLastError function which will throw an error, if the provided error code matches the specified
+ * pattern for an error code.
+ * @param errCode number to evaluated for if it's an error
+ * @param message message to pass into checkLastError
+ * @param checkNeqZero when true, treats not equal to zero as an error.
+ *                     When false, treats equal to zero as an error.
+ */
+const ifErrCodeCheckLastError = (errCode: number, message: string, checkNeqZero = true) => {
+  if (checkNeqZero && errCode !== 0) {
+    checkLastError(message);
+  } else if (!checkNeqZero && errCode === 0) {
+    checkLastError(message);
+  }
+};
+
+export const createCheckpointHandle = (checkpointData: SerializableModeldata): number => {
+  const wasm = getInstance();
+
+  const [checkpointDataOffset, checkpointDataLength] = checkpointData;
+  let checkpointHandle = 0;
+
+  try {
+    if (wasm._OrtTrainingLoadCheckpoint) {
+      checkpointHandle = wasm._OrtTrainingLoadCheckpoint(checkpointDataOffset, checkpointDataLength);
+    } else {
+      throw new Error(NO_TRAIN_FUNCS_MSG);
+    }
+
+    ifErrCodeCheckLastError(checkpointHandle, 'Error occurred when trying to create a CheckpointState', false);
+    return checkpointHandle;
+  } catch (e) {
+    if (wasm._OrtTrainingReleaseCheckpoint && checkpointHandle !== 0) {
+      wasm._OrtTrainingReleaseCheckpoint(checkpointHandle);
+    }
+    throw e;
+  } finally {
+    // free buffer from wasm heap
+    wasm._OrtFree(checkpointData[0]);
+  }
+};
+
+const getModelInputOutputCount = (trainingSessionId: number, isEvalModel: boolean): [number, number] => {
+  const wasm = getInstance();
+  const stack = wasm.stackSave();
+  try {
+    const dataOffset = wasm.stackAlloc(8);
+    if (wasm._OrtTrainingGetModelInputOutputCount) {
+      const errorCode =
+          wasm._OrtTrainingGetModelInputOutputCount(trainingSessionId, dataOffset, dataOffset + 4, isEvalModel);
+      ifErrCodeCheckLastError(errorCode, 'Can\'t get session input/output count.');
+      return [wasm.HEAP32[dataOffset / 4], wasm.HEAP32[dataOffset / 4 + 1]];
+    } else {
+      throw new Error(NO_TRAIN_FUNCS_MSG);
+    }
+  } finally {
+    wasm.stackRestore(stack);
+  }
+};
+
+const getModelInputOutputNamesLoop =
+    (trainingSessionId: number, count: number, isInput: boolean, isEvalModel: boolean): [string[], number[]] => {
+      const names = [];
+      const wasm = getInstance();
+
+      const namesUTF8Encoded = [];
+
+      for (let i = 0; i < count; i++) {
+        if (wasm._OrtTrainingGetModelInputOutputName) {
+          const name = wasm._OrtTrainingGetModelInputOutputName(trainingSessionId, i, isInput, isEvalModel);
+          ifErrCodeCheckLastError(name, `Can't get input or output name -- is input: ${isInput}, index ${i}`, false);
+
+          namesUTF8Encoded.push(name);
+          names.push(wasm.UTF8ToString(name));
+        } else {
+          throw new Error(NO_TRAIN_FUNCS_MSG);
+        }
+      }
+      return [names, namesUTF8Encoded];
+    };
+
+const getTrainingModelInputOutputNames = (trainingSessionId: number): [string[], number[], string[], number[]] => {
+  const [inputCount, outputCount] = getModelInputOutputCount(trainingSessionId, false);
+
+  const [inputNames, inputNamesUTF8Encoded] = getModelInputOutputNamesLoop(trainingSessionId, inputCount, true, false);
+  const [outputNames, outputNamesUTF8Encoded] =
+      getModelInputOutputNamesLoop(trainingSessionId, outputCount, false, false);
+
+  return [inputNames, inputNamesUTF8Encoded, outputNames, outputNamesUTF8Encoded];
+};
+
+export const createTrainingSessionHandle =
+    (checkpointHandle: number, trainModelData: SerializableModeldata, evalModelData: SerializableModeldata,
+     optimizerModelData: SerializableModeldata,
+     options: InferenceSession.SessionOptions): [SerializableSessionMetadata, number[], number[]] => {
+      const wasm = getInstance();
+
+      let trainingSessionHandle = 0;
+      let sessionOptionsHandle = 0;
+      let allocs: number[] = [];
+      let inputNamesUTF8Encoded: number[] = [];
+      let outputNamesUTF8Encoded: number[] = [];
+
+      let inputNames: string[] = [];
+      let outputNames: string[] = [];
+
+      try {
+        [sessionOptionsHandle, allocs] = setSessionOptions(options);
+        if (wasm._OrtTrainingCreateSession) {
+          trainingSessionHandle = wasm._OrtTrainingCreateSession(
+              sessionOptionsHandle, checkpointHandle, trainModelData[0], trainModelData[1], evalModelData[0],
+              evalModelData[1], optimizerModelData[0], optimizerModelData[1]);
+        } else {
+          throw new Error(NO_TRAIN_FUNCS_MSG);
+        }
+
+        ifErrCodeCheckLastError(trainingSessionHandle, 'Error occurred when trying to create a TrainingSession', false);
+
+        [inputNames, inputNamesUTF8Encoded, outputNames, outputNamesUTF8Encoded] =
+            getTrainingModelInputOutputNames(trainingSessionHandle);
+        return [[trainingSessionHandle, inputNames, outputNames], inputNamesUTF8Encoded, outputNamesUTF8Encoded];
+
+      } catch (e) {
+        if (wasm._OrtTrainingReleaseSession && trainingSessionHandle !== 0) {
+          wasm._OrtTrainingReleaseSession(trainingSessionHandle);
+        }
+        throw e;
+      } finally {
+        wasm._free(trainModelData[0]);
+        wasm._free(evalModelData[0]);
+        wasm._free(optimizerModelData[0]);
+
+        if (sessionOptionsHandle !== 0) {
+          wasm._OrtReleaseSessionOptions(sessionOptionsHandle);
+        }
+        allocs.forEach(alloc => wasm._free(alloc));
+        inputNamesUTF8Encoded.forEach(buf => wasm._OrtFree(buf));
+        outputNamesUTF8Encoded.forEach(buf => wasm._OrtFree(buf));
+      }
+    };
+
+/**
+ * Prepares input and output tensors by creating the tensors in the WASM side then creates a list of the handles of the
+ * WASM tensors.
+ *
+ * @param trainingSessionId
+ * @param indices for each tensor, the index of the input or output name that the tensor corresponds with
+ * @param tensors list of TensorMetaData
+ * @param tensorHandles should pass in an empty list of numbers; modified in-place by this method & stores the resulting
+ *                      handles of the allocated tensors on the heap
+ * @param inputOutputAllocs modified in-place by this method
+ * @param indexAdd constant to add to the index that is passed to prepareInputOutputTensor
+ */
+const createAndAllocateTensors =
+    (trainingSessionId: number, indices: number[], tensors: Array<TensorMetadata|null>, tensorHandles: number[],
+     inputOutputAllocs: number[], indexAdd: number) => {
+      const count = indices.length;
+
+      // creates the tensors
+      for (let i = 0; i < count; i++) {
+        prepareInputOutputTensor(
+            tensors[i], tensorHandles, inputOutputAllocs, trainingSessionId, indexAdd + indices[i]);
+      }
+
+      // moves to heap
+      const wasm = getInstance();
+      const valuesOffset = wasm.stackAlloc(count * 4);
+      let valuesIndex = valuesOffset / 4;
+      for (let i = 0; i < count; i++) {
+        wasm.HEAPU32[valuesIndex++] = tensorHandles[i];
+      }
+
+      return valuesOffset;
+    };
+
+/**
+ * Retrieves the information from the output tensor handles, copies to an array, and frees the WASM information
+ * associated with the tensor handle.
+ *
+ * @param outputValuesOffset
+ * @param outputCount
+ * @returns list of TensorMetadata retrieved from the output handles.
+ */
+const moveOutputToTensorMetadataArr =
+    (outputValuesOffset: number, outputCount: number, outputTensorHandles: number[],
+     outputTensors: Array<TensorMetadata|null>) => {
+      const wasm = getInstance();
+      const output: TensorMetadata[] = [];
+
+      for (let i = 0; i < outputCount; i++) {
+        const tensor = wasm.HEAPU32[outputValuesOffset / 4 + i];
+        if (tensor === outputTensorHandles[i]) {
+          // output tensor is pre-allocated. no need to copy data.
+          output.push(outputTensors[i]!);
+          continue;
+        }
+
+        const beforeGetTensorDataStack = wasm.stackSave();
+        // stack allocate 4 pointer value
+        const tensorDataOffset = wasm.stackAlloc(4 * 4);
+
+        let type: Tensor.Type|undefined, dataOffset = 0;
+        try {
+          const errorCode = wasm._OrtGetTensorData(
+              tensor, tensorDataOffset, tensorDataOffset + 4, tensorDataOffset + 8, tensorDataOffset + 12);
+          ifErrCodeCheckLastError(errorCode, `Can't access output tensor data on index ${i}.`);
+
+          let tensorDataIndex = tensorDataOffset / 4;
+          const dataType = wasm.HEAPU32[tensorDataIndex++];
+          dataOffset = wasm.HEAPU32[tensorDataIndex++];
+          const dimsOffset = wasm.HEAPU32[tensorDataIndex++];
+          const dimsLength = wasm.HEAPU32[tensorDataIndex++];
+          const dims = [];
+          for (let i = 0; i < dimsLength; i++) {
+            dims.push(wasm.HEAPU32[dimsOffset / 4 + i]);
+          }
+          wasm._OrtFree(dimsOffset);
+
+          const size = dims.reduce((a, b) => a * b, 1);
+          type = tensorDataTypeEnumToString(dataType);
+
+          if (type === 'string') {
+            const stringData: string[] = [];
+            let dataIndex = dataOffset / 4;
+            for (let i = 0; i < size; i++) {
+              const offset = wasm.HEAPU32[dataIndex++];
+              const maxBytesToRead = i === size - 1 ? undefined : wasm.HEAPU32[dataIndex] - offset;
+              stringData.push(wasm.UTF8ToString(offset, maxBytesToRead));
+            }
+            output.push([type, dims, stringData, 'cpu']);
+          } else {
+            const typedArrayConstructor = tensorTypeToTypedArrayConstructor(type);
+            const data = new typedArrayConstructor(size);
+            new Uint8Array(data.buffer, data.byteOffset, data.byteLength)
+                .set(wasm.HEAPU8.subarray(dataOffset, dataOffset + data.byteLength));
+            output.push([type, dims, data, 'cpu']);
+          }
+        } finally {
+          wasm.stackRestore(beforeGetTensorDataStack);
+          if (type === 'string' && dataOffset) {
+            wasm._free(dataOffset);
+          }
+          wasm._OrtReleaseTensor(tensor);
+        }
+      }
+
+      return output;
+    };
+
+export const runTrainStep = async(
+    trainingSessionId: number, inputIndices: number[], inputTensors: TensorMetadata[], outputIndices: number[],
+    outputTensors: Array<TensorMetadata|null>, options: InferenceSession.RunOptions): Promise<TensorMetadata[]> => {
+  const wasm = getInstance();
+
+  const inputCount = inputIndices.length;
+  const outputCount = outputIndices.length;
+
+  let runOptionsHandle = 0;
+  let runOptionsAllocs: number[] = [];
+
+  const inputTensorHandles: number[] = [];
+  const outputTensorHandles: number[] = [];
+  const inputOutputAllocs: number[] = [];
+
+  const beforeRunStack = wasm.stackSave();
+
+  try {
+    // prepare parameters by moving them to heap
+    [runOptionsHandle, runOptionsAllocs] = setRunOptions(options);
+
+    // handle inputs -- you don't want anything added to the index
+    const inputValuesOffset = createAndAllocateTensors(
+        trainingSessionId, inputIndices, inputTensors, inputTensorHandles, inputOutputAllocs, 0);
+    // handle outputs
+    // you want inputCount to be added to the index of every output tensor passed to prepareInputOutputTensor
+    const outputValuesOffset = createAndAllocateTensors(
+        trainingSessionId, outputIndices, outputTensors, outputTensorHandles, inputOutputAllocs, inputCount);
+
+    if (wasm._OrtTrainingRunTrainStep) {
+      const errorCode = wasm._OrtTrainingRunTrainStep(
+          trainingSessionId, inputValuesOffset, inputCount, outputValuesOffset, outputCount, runOptionsHandle);
+      ifErrCodeCheckLastError(errorCode, 'failed to call OrtTrainingRunTrainStep in the WebAssembly layer');
+    } else {
+      throw new Error(NO_TRAIN_FUNCS_MSG);
+    }
+
+    return moveOutputToTensorMetadataArr(outputValuesOffset, outputCount, outputTensorHandles, outputTensors);
+  } finally {
+    wasm.stackRestore(beforeRunStack);
+
+    inputTensorHandles.forEach(v => wasm._OrtReleaseTensor(v));
+    outputTensorHandles.forEach(v => wasm._OrtReleaseTensor(v));
+    inputOutputAllocs.forEach(p => wasm._free(p));
+
+    if (runOptionsHandle !== 0) {
+      wasm._OrtReleaseRunOptions(runOptionsHandle);
+    }
+    runOptionsAllocs.forEach(p => wasm._free(p));
+  }
+};
+
+export const getParametersSize = (trainingSessionId: number, trainableOnly: boolean): number => {
+  const wasm = getInstance();
+  const stack = wasm.stackSave();
+
+  try {
+    const sizeOffset = wasm.stackAlloc(4);
+    if (wasm._OrtTrainingGetParametersSize) {
+      const errorCode = wasm._OrtTrainingGetParametersSize(trainingSessionId, sizeOffset, trainableOnly);
+      ifErrCodeCheckLastError(errorCode, 'Can\'t get parameters size');
+
+      return wasm.HEAP32[sizeOffset / 4];
+    } else {
+      throw new Error(NO_TRAIN_FUNCS_MSG);
+    }
+  } finally {
+    wasm.stackRestore(stack);
+  }
+};
+
+export const getContiguousParameters =
+    async(trainingSessionId: number, trainableOnly: boolean): Promise<TensorMetadata> => {
+  const wasm = getInstance();
+  const stack = wasm.stackSave();
+
+  const tensorTypeAsString = 'float32';
+  const locationAsString = 'cpu';
+
+  const parametersSize = getParametersSize(trainingSessionId, trainableOnly);
+  let tensor = 0;
+
+  // allocates a buffer of the correct size on the WASM heap
+  const paramsByteLength = 4 * parametersSize;
+  const paramsOffset = wasm._malloc(paramsByteLength);
+
+  // handles the dimensions-related createTensor parameters
+  const dims = [parametersSize];
+
+  const dimsOffset = wasm.stackAlloc(4);
+  const dimsIndex = dimsOffset / 4;
+  wasm.HEAP32[dimsIndex] = parametersSize;
+
+  try {
+    // wraps allocated array in a tensor
+    tensor = wasm._OrtCreateTensor(
+        tensorDataTypeStringToEnum(tensorTypeAsString), paramsOffset, paramsByteLength, dimsOffset, dims.length,
+        dataLocationStringToEnum(locationAsString));
+    ifErrCodeCheckLastError(
+        tensor, `Can't create tensor for getContiguousParameters. session=${trainingSessionId}.`, false);
+
+    if (wasm._OrtTrainingCopyParametersToBuffer) {
+      const errCode = wasm._OrtTrainingCopyParametersToBuffer(trainingSessionId, tensor, parametersSize, trainableOnly);
+      ifErrCodeCheckLastError(errCode, 'Can\'t get contiguous parameters.');
+
+    } else {
+      throw new Error(NO_TRAIN_FUNCS_MSG);
+    }
+
+    // copies from WASM memory to a JavaScript typed array, which is then put into a TensorMetadata object
+    const typedArrayConstructor = tensorTypeToTypedArrayConstructor(tensorTypeAsString);
+    const data = new typedArrayConstructor(parametersSize);
+    const output: TensorMetadata[] = [];
+    new Uint8Array(data.buffer, data.byteOffset, data.byteLength)
+        .set(wasm.HEAPU8.subarray(paramsOffset, paramsOffset + paramsByteLength));
+    output.push([tensorTypeAsString, dims, data, locationAsString]);
+    if (output.length !== 1) {
+      throw new Error(`something unexpected happened in the getContiguousParameters function. Expected output length of
+     one, got ${output.length}`);
+    } else {
+      return output[0];
+    }
+  } finally {
+    if (tensor !== 0) {
+      wasm._OrtReleaseTensor(tensor);
+    }
+    wasm._free(paramsOffset);
+    wasm._free(dimsOffset);
+    wasm.stackRestore(stack);
+  }
+};
+
+export const loadParametersBuffer =
+    async(trainingSessionId: number, buffer: Uint8Array, trainableOnly: boolean): Promise<void> => {
+  const wasm = getInstance();
+  const stack = wasm.stackSave();
+
+  const tensorTypeAsString = 'float32';
+  const locationAsString = 'cpu';
+
+  // allocates & copies JavaScript buffer to WASM heap
+  const bufferByteLength = buffer.length;
+  const bufferCount = bufferByteLength / 4;
+  const bufferOffset = wasm._malloc(bufferByteLength);
+  wasm.HEAPU8.set(buffer, bufferOffset);
+
+  // allocates and handles moving dimensions information to WASM memory
+  const dimsOffset = wasm.stackAlloc(4);
+  wasm.HEAP32[dimsOffset / 4] = bufferCount;
+  const dimsLength = 1;
+  let tensor = 0;
+
+  try {
+    tensor = wasm._OrtCreateTensor(
+        tensorDataTypeStringToEnum(tensorTypeAsString), bufferOffset, bufferByteLength, dimsOffset, dimsLength,
+        dataLocationStringToEnum(locationAsString));
+    ifErrCodeCheckLastError(tensor, `Can't create tensor for input/output. session=${trainingSessionId}`, false);
+
+    if (wasm._OrtTrainingCopyParametersFromBuffer) {
+      const errCode = wasm._OrtTrainingCopyParametersFromBuffer(trainingSessionId, tensor, bufferCount, trainableOnly);
+      ifErrCodeCheckLastError(errCode, 'Can\'t copy buffer to parameters.');
+    } else {
+      throw new Error(NO_TRAIN_FUNCS_MSG);
+    }
+  } finally {
+    if (tensor !== 0) {
+      wasm._OrtReleaseTensor(tensor);
+    }
+    wasm.stackRestore(stack);
+    wasm._free(bufferOffset);
+    wasm._free(dimsOffset);
+  }
+};
+
+export const releaseTrainingSessionAndCheckpoint =
+    (checkpointId: number, sessionId: number, inputNamesUTF8Encoded: number[], outputNamesUTF8Encoded: number[]):
+        void => {
+          const wasm = getInstance();
+          inputNamesUTF8Encoded.forEach(buf => wasm._OrtFree(buf));
+          outputNamesUTF8Encoded.forEach(buf => wasm._OrtFree(buf));
+
+          if (wasm._OrtTrainingReleaseSession) {
+            wasm._OrtTrainingReleaseSession(sessionId);
+          }
+          if (wasm._OrtTrainingReleaseCheckpoint) {
+            wasm._OrtTrainingReleaseCheckpoint(checkpointId);
+          }
+        };
diff --git a/js/web/package-lock.json b/js/web/package-lock.json
index 9567bc172c9ed..890c5a0f34765 100644
--- a/js/web/package-lock.json
+++ b/js/web/package-lock.json
@@ -25,7 +25,7 @@
         "@types/minimatch": "^5.1.2",
         "@types/minimist": "^1.2.2",
         "@types/platform": "^1.3.4",
-        "@webgpu/types": "^0.1.30",
+        "@webgpu/types": "^0.1.38",
         "base64-js": "^1.5.1",
         "chai": "^4.3.7",
         "electron": "^23.1.2",
@@ -323,9 +323,9 @@
       }
     },
     "node_modules/@webgpu/types": {
-      "version": "0.1.30",
-      "resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.30.tgz",
-      "integrity": "sha512-9AXJSmL3MzY8ZL//JjudA//q+2kBRGhLBFpkdGksWIuxrMy81nFrCzj2Am+mbh8WoU6rXmv7cY5E3rdlyru2Qg==",
+      "version": "0.1.38",
+      "resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.38.tgz",
+      "integrity": "sha512-7LrhVKz2PRh+DD7+S+PVaFd5HxaWQvoMqBbsV9fNJO1pjUs1P8bM2vQVNfk+3URTqbuTI7gkXi0rfsN0IadoBA==",
       "dev": true
     },
     "node_modules/accepts": {
@@ -3767,9 +3767,9 @@
       }
     },
     "@webgpu/types": {
-      "version": "0.1.30",
-      "resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.30.tgz",
-      "integrity": "sha512-9AXJSmL3MzY8ZL//JjudA//q+2kBRGhLBFpkdGksWIuxrMy81nFrCzj2Am+mbh8WoU6rXmv7cY5E3rdlyru2Qg==",
+      "version": "0.1.38",
+      "resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.38.tgz",
+      "integrity": "sha512-7LrhVKz2PRh+DD7+S+PVaFd5HxaWQvoMqBbsV9fNJO1pjUs1P8bM2vQVNfk+3URTqbuTI7gkXi0rfsN0IadoBA==",
       "dev": true
     },
     "accepts": {
diff --git a/js/web/package.json b/js/web/package.json
index 8ae5b733e5f21..9b4531d7766fe 100644
--- a/js/web/package.json
+++ b/js/web/package.json
@@ -20,10 +20,11 @@
   },
   "scripts": {
     "preprepare": "node -e \"require('node:fs').copyFileSync('./node_modules/long/index.d.ts', './node_modules/long/umd/index.d.ts')\"",
-    "prepare": "tsc",
+    "prepare": "tsc --build ./script",
     "build:doc": "node ./script/generate-webgl-operator-md && node ./script/generate-webgpu-operator-md",
     "pull:wasm": "node ./script/pull-prebuilt-wasm-artifacts",
     "test:e2e": "node ./test/e2e/run",
+    "prebuild": "tsc -p . --noEmit && tsc -p lib/wasm/proxy-worker --noEmit",
     "build": "node ./script/build",
     "test": "tsc --build ../scripts && node ../scripts/prepare-onnx-node-tests && node ./script/test-runner-cli",
     "prepack": "node ./script/build && node ./script/prepack"
@@ -42,7 +43,7 @@
     "@types/minimatch": "^5.1.2",
     "@types/minimist": "^1.2.2",
     "@types/platform": "^1.3.4",
-    "@webgpu/types": "^0.1.30",
+    "@webgpu/types": "^0.1.38",
     "base64-js": "^1.5.1",
     "chai": "^4.3.7",
     "electron": "^23.1.2",
@@ -66,18 +67,48 @@
   "main": "dist/ort-web.node.js",
   "exports": {
     ".": {
-      "node": {
-        "types": "./types.d.ts",
-        "default": "./dist/ort-web.node.js"
-      },
+      "node": "./dist/ort.node.min.js",
       "default": {
-        "types": "./types.d.ts",
-        "default": "./dist/ort.min.js"
+        "import": "./dist/esm/ort.min.js",
+        "require": "./dist/cjs/ort.min.js",
+        "default": {
+          "development": "./dist/ort.js",
+          "default": "./dist/ort.min.js"
+        }
       }
     },
+    "./experimental": {
+      "import": "./dist/esm/ort.all.min.js",
+      "require": "./dist/cjs/ort.all.min.js",
+      "default": {
+        "development": "./dist/ort.all.js",
+        "default": "./dist/ort.all.min.js"
+      }
+    },
+    "./wasm": {
+      "import": "./dist/esm/ort.wasm.min.js",
+      "require": "./dist/cjs/ort.wasm.min.js",
+      "default": "./dist/ort.wasm.min.js"
+    },
+    "./wasm-core": {
+      "import": "./dist/esm/ort.wasm-core.min.js",
+      "require": "./dist/cjs/ort.wasm-core.min.js",
+      "default": "./dist/ort.wasm-core.min.js"
+    },
+    "./webgl": {
+      "import": "./dist/esm/ort.webgl.min.js",
+      "require": "./dist/cjs/ort.webgl.min.js",
+      "default": "./dist/ort.webgl.min.js"
+    },
     "./webgpu": {
-      "types": "./types.d.ts",
+      "import": "./dist/esm/ort.webgpu.min.js",
+      "require": "./dist/cjs/ort.webgpu.min.js",
       "default": "./dist/ort.webgpu.min.js"
+    },
+    "./training": {
+      "import": "./dist/esm/ort.training.wasm.min.js",
+      "require": "./dist/cjs/ort.training.wasm.min.js",
+      "default": "./dist/ort.training.wasm.min.js"
     }
   },
   "types": "./types.d.ts",
diff --git a/js/web/script/build.ts b/js/web/script/build.ts
index d3a5be429bfa1..5151f27582c1f 100644
--- a/js/web/script/build.ts
+++ b/js/web/script/build.ts
@@ -1,186 +1,447 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-import {spawnSync} from 'child_process';
-import * as fs from 'fs-extra';
+import * as esbuild from 'esbuild';
 import minimist from 'minimist';
-import npmlog from 'npmlog';
-import * as path from 'path';
-
-// CMD args
-const args = minimist(process.argv);
-
-// --bundle-mode=prod (default)
-// --bundle-mode=dev
-// --bundle-mode=perf
-// --bundle-mode=node
-const MODE = args['bundle-mode'] || 'prod';
-if (['prod', 'dev', 'perf', 'node'].indexOf(MODE) === -1) {
-  throw new Error(`unknown build mode: ${MODE}`);
-}
+import * as fs from 'node:fs/promises';
+import * as path from 'node:path';
 
-// --wasm (default)
-// --no-wasm
-const WASM = typeof args.wasm === 'undefined' ? true : !!args.wasm;
-
-// -a; --analyzer
-const ANALYZER = !!args.a || !!args.analyzer;
-
-// -f; --filter=<regex>
-const FILTER = args.f || args.filter;
-
-// Path variables
-const ROOT_FOLDER = path.join(__dirname, '..');
-const WASM_BINDING_FOLDER = path.join(ROOT_FOLDER, 'lib', 'wasm', 'binding');
-const WASM_BINDING_JS_PATH = path.join(WASM_BINDING_FOLDER, 'ort-wasm.js');
-const WASM_BINDING_THREADED_JS_PATH = path.join(WASM_BINDING_FOLDER, 'ort-wasm-threaded.js');
-const WASM_BINDING_SIMD_THREADED_JSEP_JS_PATH = path.join(WASM_BINDING_FOLDER, 'ort-wasm-simd-threaded.jsep.js');
-const WASM_BINDING_THREADED_WORKER_JS_PATH = path.join(WASM_BINDING_FOLDER, 'ort-wasm-threaded.worker.js');
-const WASM_BINDING_THREADED_MIN_JS_PATH = path.join(WASM_BINDING_FOLDER, 'ort-wasm-threaded.min.js');
-const WASM_BINDING_SIMD_THREADED_JSEP_MIN_JS_PATH =
-    path.join(WASM_BINDING_FOLDER, 'ort-wasm-simd-threaded.jsep.min.js');
-const WASM_BINDING_THREADED_MIN_WORKER_JS_PATH = path.join(WASM_BINDING_FOLDER, 'ort-wasm-threaded.min.worker.js');
-
-const WASM_DIST_FOLDER = path.join(ROOT_FOLDER, 'dist');
-const WASM_WASM_PATH = path.join(WASM_DIST_FOLDER, 'ort-wasm.wasm');
-const WASM_THREADED_WASM_PATH = path.join(WASM_DIST_FOLDER, 'ort-wasm-threaded.wasm');
-const WASM_SIMD_WASM_PATH = path.join(WASM_DIST_FOLDER, 'ort-wasm-simd.wasm');
-const WASM_SIMD_THREADED_WASM_PATH = path.join(WASM_DIST_FOLDER, 'ort-wasm-simd-threaded.wasm');
-const WASM_SIMD_JSEP_WASM_PATH = path.join(WASM_DIST_FOLDER, 'ort-wasm-simd.jsep.wasm');
-const WASM_SIMD_THREADED_JSEP_WASM_PATH = path.join(WASM_DIST_FOLDER, 'ort-wasm-simd-threaded.jsep.wasm');
-const WASM_THREADED_WORKER_JS_PATH = path.join(WASM_DIST_FOLDER, 'ort-wasm-threaded.worker.js');
-const WASM_THREADED_JS_PATH = path.join(WASM_DIST_FOLDER, 'ort-wasm-threaded.js');
-const WASM_SIMD_THREADED_JSEP_JS_PATH = path.join(WASM_DIST_FOLDER, 'ort-wasm-simd-threaded.jsep.js');
-
-function validateFile(path: string): void {
-  npmlog.info('Build', `Ensure file: ${path}`);
-  if (!fs.pathExistsSync(path)) {
-    throw new Error(`file does not exist: ${path}`);
-  }
-  if (fs.statSync(path).size === 0) {
-    throw new Error(`file is empty: ${path}`);
-  }
-}
+/**
+ * @summary Build script for ort-web using esbuild.
+ */
 
-if (WASM) {
-  npmlog.info('Build', 'Validating WebAssembly artifacts...');
-  try {
-    validateFile(WASM_BINDING_JS_PATH);
-    validateFile(WASM_BINDING_THREADED_JS_PATH);
-    validateFile(WASM_BINDING_SIMD_THREADED_JSEP_JS_PATH);
-    validateFile(WASM_BINDING_THREADED_WORKER_JS_PATH);
-    validateFile(WASM_WASM_PATH);
-    validateFile(WASM_THREADED_WASM_PATH);
-    validateFile(WASM_SIMD_WASM_PATH);
-    validateFile(WASM_SIMD_THREADED_WASM_PATH);
-    validateFile(WASM_SIMD_JSEP_WASM_PATH);
-    validateFile(WASM_SIMD_THREADED_JSEP_WASM_PATH);
-  } catch (e) {
-    npmlog.error('Build', `WebAssembly files are not ready. build WASM first. ERR: ${e}`);
-    throw e;
-  }
-  npmlog.info('Build', 'Validating WebAssembly artifacts... DONE');
+const args = minimist(process.argv.slice(2));
+/**
+ * --bundle-mode=prod (default)
+ *   Build multiple ort-web bundles for production.
+ *
+ * --bundle-mode=dev
+ *   Build a single ort-web bundle for development, and a test bundle.
+ *
+ * --bundle-mode=perf
+ *   Build a single ort-web bundle for performance test, and a test bundle.
+ *
+ * --bundle-mode=node
+ *   Build a single ort-web bundle for nodejs.
+ */
+const BUNDLE_MODE: 'prod'|'dev'|'perf'|'node' = args['bundle-mode'] || 'prod';
+
+/**
+ * --debug
+ *   Enable debug mode. In this mode, esbuild metafile feature will be enabled. Simple bundle analysis will be printed.
+ *
+ * --debug=verbose
+ *   Enable debug mode. In this mode, esbuild metafile feature will be enabled. Detailed bundle analysis will be
+ * printed.
+ *
+ * --debug=save
+ *  Enable debug mode. In this mode, esbuild metafile feature will be enabled. Full bundle analysis will be saved to a
+ * file as JSON.
+ */
+const DEBUG = args.debug;  // boolean|'verbose'|'save'
 
-  const VERSION = require(path.join(__dirname, '../package.json')).version;
-  const COPYRIGHT_BANNER = `/*!
- * ONNX Runtime Web v${VERSION}
+const SOURCE_ROOT_FOLDER = path.join(__dirname, '../..');  // <ORT_ROOT>/js/
+const DEFAULT_DEFINE = {
+  'BUILD_DEFS.DISABLE_WEBGL': 'false',
+  'BUILD_DEFS.DISABLE_WEBGPU': 'false',
+  'BUILD_DEFS.DISABLE_WASM': 'false',
+  'BUILD_DEFS.DISABLE_WASM_PROXY': 'false',
+  'BUILD_DEFS.DISABLE_WASM_THREAD': 'false',
+  'BUILD_DEFS.DISABLE_TRAINING': 'true',
+};
+
+const COPYRIGHT_HEADER = `/*!
+ * ONNX Runtime Web v${require('../package.json').version}
  * Copyright (c) Microsoft Corporation. All rights reserved.
  * Licensed under the MIT License.
- */
-`;
-
-  npmlog.info('Build', 'Minimizing file "ort-wasm-threaded.js"...');
-  try {
-    const terser = spawnSync(
-        'npx',
-        [
-          'terser', WASM_BINDING_THREADED_JS_PATH, '--compress', 'passes=2', '--format', 'comments=false', '--mangle',
-          'reserved=[_scriptDir,startWorker]', '--module'
-        ],
-        {shell: true, encoding: 'utf-8', cwd: ROOT_FOLDER});
-    if (terser.status !== 0) {
-      console.error(terser.error);
-      process.exit(terser.status === null ? undefined : terser.status);
-    }
+ */`;
 
-    fs.writeFileSync(WASM_BINDING_THREADED_MIN_JS_PATH, terser.stdout);
-    fs.writeFileSync(WASM_THREADED_JS_PATH, `${COPYRIGHT_BANNER}${terser.stdout}`);
+interface OrtBuildOptions {
+  isProduction?: boolean;
+  isNode?: boolean;
+  format: 'iife'|'cjs'|'esm';
+  outputBundleName: string;
+  define?: Record<string, string>;
+}
 
-    validateFile(WASM_BINDING_THREADED_MIN_JS_PATH);
-    validateFile(WASM_THREADED_JS_PATH);
-  } catch (e) {
-    npmlog.error('Build', `Failed to run terser on ort-wasm-threaded.js. ERR: ${e}`);
-    throw e;
+async function buildBundle(options: esbuild.BuildOptions) {
+  const result = await esbuild.build({
+    logLevel: DEBUG ? (DEBUG === 'verbose' || DEBUG === 'save' ? 'verbose' : 'debug') : 'info',
+    metafile: !!DEBUG,
+    absWorkingDir: SOURCE_ROOT_FOLDER,
+    bundle: true,
+    banner: {js: COPYRIGHT_HEADER},
+    ...options
+  });
+  if (DEBUG) {
+    if (DEBUG === 'save') {
+      await fs.writeFile(
+          `${path.basename(options.outfile!)}.esbuild.metafile.json`, JSON.stringify(result.metafile!, null, 2));
+    } else {
+      console.log(await esbuild.analyzeMetafile(result.metafile!, {verbose: DEBUG === 'verbose'}));
+    }
   }
-  npmlog.info('Build', 'Minimizing file "ort-wasm-threaded.js"... DONE');
-
-  npmlog.info('Build', 'Minimizing file "ort-wasm-simd-threaded.jsep.js"...');
-  try {
-    const terser = spawnSync(
-        'npx',
-        [
-          'terser', WASM_BINDING_SIMD_THREADED_JSEP_JS_PATH, '--compress', 'passes=2', '--format', 'comments=false',
-          '--mangle', 'reserved=[_scriptDir,startWorker]', '--module'
-        ],
-        {shell: true, encoding: 'utf-8', cwd: ROOT_FOLDER});
-    if (terser.status !== 0) {
-      console.error(terser.error);
-      process.exit(terser.status === null ? undefined : terser.status);
+  return result;
+}
+
+async function minifyCode(sourceCode: string): Promise<string> {
+  const result = await esbuild.transform(sourceCode, {
+    minify: true,
+    legalComments: 'none',
+  });
+  return result.code;
+}
+
+async function buildOrt({
+  isProduction = false,
+  isNode = false,
+  format,
+  outputBundleName,
+  define = DEFAULT_DEFINE,
+}: OrtBuildOptions) {
+  // #region Plugin: resolve ignore imports
+
+  /**
+   * This plugin is used to ignore a few nodejs imports that are not used in the browser. Those imported functions are
+   * not really used in the browser because they are usually put behind a feature check. However, esbuild does not know
+   * that. It will complain about those imports are not available in the browser.
+   *
+   * This plugin will ignore those imports and replace them with empty exports.
+   */
+  const excludeNodejsImports = {
+    name: 'exclude-nodejs-imports',
+    setup(build: esbuild.PluginBuild) {
+      build.onResolve({filter: /(^node:|^worker_threads$|^fs$|^path$|^perf_hooks$|^os$)/}, args => ({
+                                                                                             namespace: 'nodejs-ignore',
+                                                                                             path: args.path,
+                                                                                             sideEffects: false,
+                                                                                           }));
+      build.onLoad({filter: /.*/, namespace: 'nodejs-ignore'}, args => {
+        switch (args.path) {
+          case 'node:fs/promises':
+          case 'node:fs':
+          case 'fs':
+            return {contents: 'export const readFile = undefined;'};
+          case 'node:os':
+          case 'os':
+            return {contents: 'export const cpus = undefined;'};
+          case 'node:path':
+          case 'path':
+            return {contents: 'export const join = undefined;'};
+          default:
+            return {contents: ''};
+        }
+      });
+    },
+  };
+  // #endregion
+
+  // #region Plugin: web assembly multi-thread worker loader
+
+  /**
+   * This plugin is used to load web assembly multi-thread worker code as string.
+   *
+   * This allows to create the worker from a Blob, so we don't need to create a separate file for the worker.
+   */
+  const wasmThreadedHandler = {
+    name: 'wasm-threaded-handler',
+    setup(build: esbuild.PluginBuild) {
+      build.onLoad({filter: /[\\/]ort-wasm-threaded\.worker\.js$/}, async args => {
+        let contents = await fs.readFile(args.path, {encoding: 'utf-8'});
+        if (isProduction) {
+          contents = await minifyCode(contents);
+        }
+        return {loader: 'text', contents};
+      });
+    },
+  };
+  // #endregion
+
+  // #region Plugin: generated emscripten .js loader
+
+  /**
+   * This plugin is used to patch the generated emscripten .js file for multi-thread build.
+   *
+   * Since we use inline worker for multi-thread, we make an optimization to use function.toString() to get the
+   * implementation of the exported `ortWasmThreaded` function to reduce the size of the bundle. However, the generated
+   * function uses a variable `_scriptDir` which is defined inside an IIFE closure. When we use function.toString(), the
+   * worker code will throw "_scriptDir is not defined" error.
+   *
+   * To fix this error, we need to patch the generated code to replace access to `_scriptDir` with `typeof _scriptDir
+   * !== "undefined" && _scriptDir`.
+   */
+  const emscriptenThreadedJsHandler = {
+    name: 'emscripten-threaded-js-handler',
+    setup(build: esbuild.PluginBuild) {
+      build.onLoad({filter: /ort-wasm.*-threaded.*\.js$/}, async args => {
+        let contents = await fs.readFile(args.path, {encoding: 'utf-8'});
+        // For debug build, Emscripten generates the following code:
+        //
+        // if (_scriptDir) {
+        //   scriptDirectory = _scriptDir;
+        // }
+        //
+        // We replace it with:
+        //
+        // if (typeof _scriptDir !== "undefined" && _scriptDir) {
+        //   scriptDirectory = _scriptDir;
+        // }
+        contents = contents.replace('if (_scriptDir) {', 'if (typeof _scriptDir !== "undefined" && _scriptDir) {');
+
+        // For release build, Emscripten generates the following code:
+        //
+        // ...,_scriptDir&&(H=_scriptDir),...
+        // We replace it with:
+        // ...,(typeof _scriptDir !== "undefined" && _scriptDir)&&(H=_scriptDir),...
+        contents =
+            contents.replace(/_scriptDir(&&\(.+=_scriptDir\))/, '(typeof _scriptDir !== "undefined" && _scriptDir)$1');
+
+        return {contents};
+      });
     }
+  };
+  // #endregion
 
-    fs.writeFileSync(WASM_BINDING_SIMD_THREADED_JSEP_MIN_JS_PATH, terser.stdout);
-    fs.writeFileSync(WASM_SIMD_THREADED_JSEP_JS_PATH, `${COPYRIGHT_BANNER}${terser.stdout}`);
+  // #region Plugin: proxy worker loader
 
-    validateFile(WASM_BINDING_SIMD_THREADED_JSEP_MIN_JS_PATH);
-    validateFile(WASM_SIMD_THREADED_JSEP_JS_PATH);
-  } catch (e) {
-    npmlog.error('Build', `Failed to run terser on ort-wasm-threaded.js. ERR: ${e}`);
-    throw e;
-  }
-  npmlog.info('Build', 'Minimizing file "ort-wasm-simd-threaded.jsep.js"... DONE');
-
-  npmlog.info('Build', 'Minimizing file "ort-wasm-threaded.worker.js"...');
-  try {
-    const terser = spawnSync(
-        'npx',
-        [
-          'terser', WASM_BINDING_THREADED_WORKER_JS_PATH, '--compress', 'passes=2', '--format', 'comments=false',
-          '--mangle', 'reserved=[_scriptDir,startWorker]', '--toplevel'
-        ],
-        {shell: true, encoding: 'utf-8'});
-    if (terser.status !== 0) {
-      console.error(terser.error);
-      process.exit(terser.status === null ? undefined : terser.status);
+  /**
+   * This plugin is used to load proxy worker code as string.
+   */
+  const proxyWorkerHandler = {
+    name: 'proxy-worker-handler',
+    setup(build: esbuild.PluginBuild) {
+      build.onResolve(
+          {filter: /proxy-worker\/main$/},
+          async args => ({path: args.path, namespace: 'proxy-worker', pluginData: args.resolveDir}));
+
+      build.onLoad({filter: /.*/, namespace: 'proxy-worker'}, async args => {
+        const result = await buildBundle({
+          entryPoints: [path.resolve(args.pluginData, args.path)],
+          outfile: `web/dist/${outputBundleName}.proxy.js`,
+          platform: 'browser',
+          plugins: [excludeNodejsImports, wasmThreadedHandler, emscriptenThreadedJsHandler],
+          define: {
+            ...build.initialOptions.define,
+            'BUILD_DEFS.DISABLE_WASM_PROXY': 'true',
+          },
+          sourcemap: isProduction ? false : 'inline',
+          minify: isProduction,
+          write: false,
+        });
+
+        return {loader: 'text', contents: result.outputFiles![0].text};
+      });
+    },
+  };
+  // #endregion
+
+  await buildBundle({
+    entryPoints: ['web/lib/index.ts'],
+    outfile: `web/dist/${outputBundleName}.js`,
+    platform: isNode ? 'node' : 'browser',
+    format,
+    globalName: 'ort',
+    plugins: isNode ? undefined :
+                      [excludeNodejsImports, proxyWorkerHandler, wasmThreadedHandler, emscriptenThreadedJsHandler],
+    external: isNode ? ['onnxruntime-common'] : undefined,
+    define,
+    sourcemap: isProduction ? 'linked' : 'inline',
+    minify: isProduction,
+  });
+}
+
+async function buildTest() {
+  const isProduction = BUNDLE_MODE === 'perf';
+
+  await buildBundle({
+    absWorkingDir: path.join(SOURCE_ROOT_FOLDER, 'web/test'),
+
+    entryPoints: ['test-main.ts'],
+    outfile: isProduction ? 'ort.test.min.js' : 'ort.test.js',
+    platform: 'browser',
+    format: 'iife',
+    define: DEFAULT_DEFINE,
+    sourcemap: isProduction ? false : 'inline',
+    sourceRoot: path.join(SOURCE_ROOT_FOLDER, 'web/test'),
+    external: ['../../node'],
+    plugins: [
+      // polyfill nodejs modules
+      require('esbuild-plugin-polyfill-node').polyfillNode({globals: false}),
+      // make "ort" external
+      {
+        name: 'make-ort-external',
+        setup(build: esbuild.PluginBuild) {
+          build.onResolve(
+              {filter: /^onnxruntime-common$/},
+              _args => ({path: 'onnxruntime-common', namespace: 'make-ort-external'}));
+          build.onLoad(
+              {filter: /.*/, namespace: 'make-ort-external'},
+              _args => ({contents: 'module.exports = globalThis.ort;'}));
+        }
+      }
+    ],
+    minify: isProduction,
+  });
+}
+
+async function main() {
+  // tasks for each esbuild bundle
+  const buildTasks: Array<Promise<void>> = [];
+  /**
+   * add one build task
+   */
+  const addBuildTask = async (task: Promise<void>) => {
+    if (DEBUG) {
+      // in DEBUG mode, build sequentially
+      await task;
+    } else {
+      buildTasks.push(task);
     }
+  };
+  /**
+   * add all 6 build tasks for web bundles. Includes:
+   * - IIFE, debug:                [name].js
+   * - IIFE, production:           [name].min.js
+   * - CJS, debug:                 cjs/[name].js
+   * - CJS, production:            cjs/[name].min.js
+   * - ESM, debug:                 esm/[name].js
+   * - ESM, production:            esm/[name].min.js
+   */
+  const addAllWebBuildTasks = async (options: Omit<OrtBuildOptions, 'format'>) => {
+    // [name].js
+    await addBuildTask(buildOrt({
+      ...options,
+      format: 'iife',
+    }));
+    // [name].min.js
+    await addBuildTask(buildOrt({
+      ...options,
+      outputBundleName: options.outputBundleName + '.min',
+      format: 'iife',
+      isProduction: true,
+    }));
+    // cjs/[name].js
+    await addBuildTask(buildOrt({
+      ...options,
+      outputBundleName: 'cjs/' + options.outputBundleName,
+      format: 'cjs',
+    }));
+    // cjs/[name].min.js
+    await addBuildTask(buildOrt({
+      ...options,
+      outputBundleName: 'cjs/' + options.outputBundleName + '.min',
+      format: 'cjs',
+      isProduction: true,
+    }));
+    // esm/[name].js
+    await addBuildTask(buildOrt({
+      ...options,
+      outputBundleName: 'esm/' + options.outputBundleName,
+      format: 'esm',
+    }));
+    // esm/[name].min.js
+    await addBuildTask(buildOrt({
+      ...options,
+      outputBundleName: 'esm/' + options.outputBundleName + '.min',
+      format: 'esm',
+      isProduction: true,
+    }));
+  };
 
-    fs.writeFileSync(WASM_BINDING_THREADED_MIN_WORKER_JS_PATH, terser.stdout);
-    fs.writeFileSync(WASM_THREADED_WORKER_JS_PATH, `${COPYRIGHT_BANNER}${terser.stdout}`);
+  if (BUNDLE_MODE === 'node' || BUNDLE_MODE === 'prod') {
+    // ort.node.min.js
+    await addBuildTask(buildOrt({
+      isProduction: true,
+      isNode: true,
+      format: 'cjs',
+      outputBundleName: 'ort.node.min',
+      define: {
+        ...DEFAULT_DEFINE,
+        'BUILD_DEFS.DISABLE_WEBGPU': 'true',
+        'BUILD_DEFS.DISABLE_WEBGL': 'true',
+        'BUILD_DEFS.DISABLE_WASM_PROXY': 'true',
+        'BUILD_DEFS.DISABLE_WASM_THREAD': 'true',
+      },
+    }));
+  }
 
-    validateFile(WASM_BINDING_THREADED_MIN_WORKER_JS_PATH);
-    validateFile(WASM_THREADED_WORKER_JS_PATH);
-  } catch (e) {
-    npmlog.error('Build', `Failed to run terser on ort-wasm-threaded.worker.js. ERR: ${e}`);
-    throw e;
+  if (BUNDLE_MODE === 'dev') {
+    // ort.all.js
+    await addBuildTask(buildOrt({
+      outputBundleName: 'ort.all',
+      format: 'iife',
+    }));
   }
-  npmlog.info('Build', 'Minimizing file "ort-wasm-threaded.worker.js"... DONE');
-}
 
-npmlog.info('Build', 'Building bundle...');
-{
-  npmlog.info('Build.Bundle', 'Running webpack to generate bundles...');
-  const webpackArgs = ['webpack', '--env', `--bundle-mode=${MODE}`];
-  if (ANALYZER) {
-    webpackArgs.push('--env', '-a');
+  if (BUNDLE_MODE === 'perf') {
+    // ort.all.min.js
+    await addBuildTask(buildOrt({
+      isProduction: true,
+      outputBundleName: 'ort.all.min',
+      format: 'iife',
+    }));
   }
-  if (FILTER) {
-    webpackArgs.push('--env', `-f=${FILTER}`);
+
+  if (BUNDLE_MODE === 'prod') {
+    // ort.all[.min].js
+    await addAllWebBuildTasks({outputBundleName: 'ort.all'});
+
+    // ort[.min].js
+    await addAllWebBuildTasks({
+      outputBundleName: 'ort',
+      define: {...DEFAULT_DEFINE, 'BUILD_DEFS.DISABLE_WEBGPU': 'true'},
+    });
+    // ort.webgpu[.min].js
+    await addAllWebBuildTasks({
+      outputBundleName: 'ort.webgpu',
+      define: {...DEFAULT_DEFINE, 'BUILD_DEFS.DISABLE_WEBGL': 'true'},
+    });
+    // ort.wasm[.min].js
+    await addAllWebBuildTasks({
+      outputBundleName: 'ort.wasm',
+      define: {...DEFAULT_DEFINE, 'BUILD_DEFS.DISABLE_WEBGPU': 'true', 'BUILD_DEFS.DISABLE_WEBGL': 'true'},
+    });
+    // ort.webgl[.min].js
+    await addAllWebBuildTasks({
+      outputBundleName: 'ort.webgl',
+      define: {...DEFAULT_DEFINE, 'BUILD_DEFS.DISABLE_WEBGPU': 'true', 'BUILD_DEFS.DISABLE_WASM': 'true'},
+    });
+    // ort.wasm-core[.min].js
+    await addAllWebBuildTasks({
+      outputBundleName: 'ort.wasm-core',
+      define: {
+        ...DEFAULT_DEFINE,
+        'BUILD_DEFS.DISABLE_WEBGPU': 'true',
+        'BUILD_DEFS.DISABLE_WEBGL': 'true',
+        'BUILD_DEFS.DISABLE_WASM_PROXY': 'true',
+        'BUILD_DEFS.DISABLE_WASM_THREAD': 'true',
+      },
+    });
+    // ort.training.wasm[.min].js
+    await addAllWebBuildTasks({
+      outputBundleName: 'ort.training.wasm',
+      define: {
+        ...DEFAULT_DEFINE,
+        'BUILD_DEFS.DISABLE_TRAINING': 'false',
+        'BUILD_DEFS.DISABLE_WEBGPU': 'true',
+        'BUILD_DEFS.DISABLE_WEBGL': 'true',
+      },
+    });
   }
-  npmlog.info('Build.Bundle', `CMD: npx ${webpackArgs.join(' ')}`);
-  const webpack = spawnSync('npx', webpackArgs, {shell: true, stdio: 'inherit', cwd: ROOT_FOLDER});
-  if (webpack.status !== 0) {
-    console.error(webpack.error);
-    process.exit(webpack.status === null ? undefined : webpack.status);
+
+  if (BUNDLE_MODE === 'dev' || BUNDLE_MODE === 'perf') {
+    await addBuildTask(buildTest());
+  }
+
+  await Promise.all(buildTasks);
+
+  if (BUNDLE_MODE === 'prod') {
+    // generate package.json files under each of the dist folders for commonJS and ESModule
+    // this trick allows typescript to import this package as different module type
+    // see also: https://evertpot.com/universal-commonjs-esm-typescript-packages/
+    await fs.writeFile(path.resolve(__dirname, '../dist/cjs', 'package.json'), '{"type": "commonjs"}');
+    await fs.writeFile(path.resolve(__dirname, '../dist/esm', 'package.json'), '{"type": "module"}');
   }
-  npmlog.info('Build.Bundle', 'Running webpack to generate bundles... DONE');
 }
-npmlog.info('Build', 'Building bundle... DONE');
+
+void main();
diff --git a/js/web/script/generate-webgpu-operator-md.ts b/js/web/script/generate-webgpu-operator-md.ts
index 7408f17004f5e..eab8175a941bd 100644
--- a/js/web/script/generate-webgpu-operator-md.ts
+++ b/js/web/script/generate-webgpu-operator-md.ts
@@ -16,6 +16,8 @@ const COMMENTS: Record<string, string> = {
   'Reshape': 'no GPU kernel',
   'Shape': 'no GPU kernel; an ORT warning is generated - need to fix',
   'Resize': 'CoordinateTransformMode align_corners is not supported with downsampling',
+  'Attention': 'need implementing mask and past/present',
+  'MultiHeadAttention': 'need implementing mask and past/present',
 };
 
 /* eslint-disable max-len */
diff --git a/js/web/script/test-runner-cli-args.ts b/js/web/script/test-runner-cli-args.ts
index f90f568879146..ee955ec8d4f17 100644
--- a/js/web/script/test-runner-cli-args.ts
+++ b/js/web/script/test-runner-cli-args.ts
@@ -51,6 +51,10 @@ Options:
  -P[=<...>], --perf[=<...>]    Generate performance number. Cannot be used with flag --debug.
                                  This flag can be used with a number as value, specifying the total count of test cases to run. The test cases may be used multiple times. Default value is 10.
  -c, --file-cache              Enable file cache.
+ -i=<...>, --io-binding=<...>  Specify the IO binding testing type. Should be one of the following:
+                                 none          (default)
+                                 gpu-tensor      use pre-allocated GPU tensors for inputs and outputs
+                                 gpu-location    use pre-allocated GPU tensors for inputs and set preferredOutputLocation to 'gpu-buffer'
 
 *** Session Options ***
  -u=<...>, --optimized-model-file-path=<...>        Specify whether to dump the optimized model.
@@ -108,7 +112,8 @@ export declare namespace TestRunnerCliArgs {
   type Mode = 'suite0'|'suite1'|'model'|'unittest'|'op';
   type Backend = 'cpu'|'webgl'|'webgpu'|'wasm'|'onnxruntime'|'xnnpack'|'webnn';
   type Environment = 'chrome'|'edge'|'firefox'|'electron'|'safari'|'node'|'bs';
-  type BundleMode = 'prod'|'dev'|'perf';
+  type BundleMode = 'dev'|'perf';
+  type IOBindingMode = 'none'|'gpu-tensor'|'gpu-location';
 }
 
 export interface TestRunnerCliArgs {
@@ -124,22 +129,19 @@ export interface TestRunnerCliArgs {
   /**
    * Bundle Mode
    *
-   * this field affects the behavior of Karma and Webpack.
+   * this field affects the behavior of Karma and build script.
    *
-   * For Karma, if flag '--bundle-mode' is not set, the default behavior is 'dev'
-   * For Webpack, if flag '--bundle-mode' is not set, the default behavior is 'prod'
-   *
-   * For running tests, the default mode is 'dev'. If flag '--perf' is set, the mode will be set to 'perf'.
-   *
-   * Mode   | Output File           | Main                 | Source Map         | Webpack Config
-   * ------ | --------------------- | -------------------- | ------------------ | --------------
-   * prod   | /dist/ort.min.js      | /lib/index.ts        | source-map         | production
-   * node   | /dist/ort-web.node.js | /lib/index.ts        | source-map         | production
-   * dev    | /test/ort.dev.js      | /test/test-main.ts   | inline-source-map  | development
-   * perf   | /test/ort.perf.js     | /test/test-main.ts   | (none)             | production
+   * Mode "perf":
+   *   - use "dist/ort.all.min.js" as main file
+   *   - use "test/ort.test.min.js" as test file
+   * Mode "dev":
+   *   - use "dist/ort.all.js" as main file
+   *   - use "test/ort.test.js" as test file
    */
   bundleMode: TestRunnerCliArgs.BundleMode;
 
+  ioBindingMode: TestRunnerCliArgs.IOBindingMode;
+
   logConfig: Test.Config['log'];
 
   /**
@@ -326,7 +328,11 @@ function parseWebgpuFlags(args: minimist.ParsedArgs): Partial<Env.WebGpuFlags> {
   if (profilingMode !== undefined && profilingMode !== 'off' && profilingMode !== 'default') {
     throw new Error('Flag "webgpu-profiling-mode" is invalid');
   }
-  return {profilingMode};
+  const validateInputContent = args['webgpu-validate-input-content'];
+  if (validateInputContent !== undefined && typeof validateInputContent !== 'boolean') {
+    throw new Error('Flag "webgpu-validate-input-content" is invalid');
+  }
+  return {profilingMode, validateInputContent};
 }
 
 function parseGlobalEnvFlags(args: minimist.ParsedArgs): NonNullable<TestRunnerCliArgs['globalEnvFlags']> {
@@ -416,6 +422,13 @@ export function parseTestRunnerCliArgs(cmdlineArgs: string[]): TestRunnerCliArgs
     logConfig.push({category: 'TestRunner.Perf', config: {minimalSeverity: 'verbose'}});
   }
 
+  // Option: -i=<...>, --io-binding=<...>
+  const ioBindingArg = args['io-binding'] || args.i;
+  const ioBindingMode = (typeof ioBindingArg !== 'string') ? 'none' : ioBindingArg;
+  if (['none', 'gpu-tensor', 'gpu-location'].indexOf(ioBindingMode) === -1) {
+    throw new Error(`not supported io binding mode ${ioBindingMode}`);
+  }
+
   // Option: -u, --optimized-model-file-path
   const optimizedModelFilePath = args['optimized-model-file-path'] || args.u || undefined;
   if (typeof optimizedModelFilePath !== 'undefined' && typeof optimizedModelFilePath !== 'string') {
@@ -455,6 +468,7 @@ export function parseTestRunnerCliArgs(cmdlineArgs: string[]): TestRunnerCliArgs
   npmlog.verbose('TestRunnerCli.Init', ` Env:               ${env}`);
   npmlog.verbose('TestRunnerCli.Init', ` Debug:             ${debug}`);
   npmlog.verbose('TestRunnerCli.Init', ` Backend:           ${backend}`);
+  npmlog.verbose('TestRunnerCli.Init', ` IO Binding Mode:   ${ioBindingMode}`);
   npmlog.verbose('TestRunnerCli.Init', 'Parsing commandline arguments... DONE');
 
   return {
@@ -467,6 +481,7 @@ export function parseTestRunnerCliArgs(cmdlineArgs: string[]): TestRunnerCliArgs
     logConfig,
     profile,
     times: perf ? times : undefined,
+    ioBindingMode: ioBindingMode as TestRunnerCliArgs['ioBindingMode'],
     optimizedModelFilePath,
     graphOptimizationLevel: graphOptimizationLevel as TestRunnerCliArgs['graphOptimizationLevel'],
     fileCache,
diff --git a/js/web/script/test-runner-cli.ts b/js/web/script/test-runner-cli.ts
index a75321d45f1ef..74a03290332a8 100644
--- a/js/web/script/test-runner-cli.ts
+++ b/js/web/script/test-runner-cli.ts
@@ -28,6 +28,7 @@ async function main() {
 
   npmlog.verbose('TestRunnerCli.Init.Config', inspect(args));
 
+  const DIST_ROOT = path.join(__dirname, '..', 'dist');
   const TEST_ROOT = path.join(__dirname, '..', 'test');
   const TEST_DATA_MODEL_NODE_ROOT = path.join(TEST_ROOT, 'data', 'node');
   const TEST_DATA_OP_ROOT = path.join(TEST_ROOT, 'data', 'ops');
@@ -257,7 +258,7 @@ async function main() {
       times?: number): Test.ModelTest {
     if (times === 0) {
       npmlog.verbose('TestRunnerCli.Init.Model', `Skip test data from folder: ${testDataRootFolder}`);
-      return {name: path.basename(testDataRootFolder), backend, modelUrl: '', cases: []};
+      return {name: path.basename(testDataRootFolder), backend, modelUrl: '', cases: [], ioBinding: args.ioBindingMode};
     }
 
     let modelUrl: string|null = null;
@@ -323,6 +324,16 @@ async function main() {
       }
     }
 
+    let ioBinding: Test.IOBindingMode;
+    if (backend !== 'webgpu' && args.ioBindingMode !== 'none') {
+      npmlog.warn(
+          'TestRunnerCli.Init.Model', `Ignoring IO Binding Mode "${args.ioBindingMode}" for backend "${backend}".`);
+      ioBinding = 'none';
+    } else {
+      ioBinding = args.ioBindingMode;
+    }
+
+
     npmlog.verbose('TestRunnerCli.Init.Model', 'Finished preparing test data.');
     npmlog.verbose('TestRunnerCli.Init.Model', '===============================================================');
     npmlog.verbose('TestRunnerCli.Init.Model', ` Model file: ${modelUrl}`);
@@ -330,7 +341,7 @@ async function main() {
     npmlog.verbose('TestRunnerCli.Init.Model', ` Test set(s): ${cases.length} (${caseCount})`);
     npmlog.verbose('TestRunnerCli.Init.Model', '===============================================================');
 
-    return {name: path.basename(testDataRootFolder), platformCondition, modelUrl, backend, cases};
+    return {name: path.basename(testDataRootFolder), platformCondition, modelUrl, backend, cases, ioBinding};
   }
 
   function tryLocateModelTestFolder(searchPattern: string): string {
@@ -390,6 +401,13 @@ async function main() {
       for (const test of tests) {
         test.backend = backend;
         test.opset = test.opset || {domain: '', version: MAX_OPSET_VERSION};
+        if (backend !== 'webgpu' && args.ioBindingMode !== 'none') {
+          npmlog.warn(
+              'TestRunnerCli.Init.Op', `Ignoring IO Binding Mode "${args.ioBindingMode}" for backend "${backend}".`);
+          test.ioBinding = 'none';
+        } else {
+          test.ioBinding = args.ioBindingMode;
+        }
       }
       npmlog.verbose('TestRunnerCli.Init.Op', 'Finished preparing test data.');
       npmlog.verbose('TestRunnerCli.Init.Op', '===============================================================');
@@ -436,9 +454,6 @@ async function main() {
     npmlog.info('TestRunnerCli.Run', '(3/4) Running build to generate bundle...');
     const buildCommand = `node ${path.join(__dirname, 'build')}`;
     const buildArgs = [`--bundle-mode=${args.env === 'node' ? 'node' : args.bundleMode}`];
-    if (args.backends.indexOf('wasm') === -1) {
-      buildArgs.push('--no-wasm');
-    }
     npmlog.info('TestRunnerCli.Run', `CMD: ${buildCommand} ${buildArgs.join(' ')}`);
     const build = spawnSync(buildCommand, buildArgs, {shell: true, stdio: 'inherit'});
     if (build.status !== 0) {
@@ -458,7 +473,14 @@ async function main() {
       npmlog.info('TestRunnerCli.Run', '(4/4) Running tsc... DONE');
 
       npmlog.info('TestRunnerCli.Run', '(4/4) Running mocha...');
-      const mochaArgs = ['mocha', path.join(TEST_ROOT, 'test-main'), `--timeout ${args.debug ? 9999999 : 60000}`];
+      const mochaArgs = [
+        'mocha',
+        '--timeout',
+        `${args.debug ? 9999999 : 60000}`,
+        '-r',
+        path.join(DIST_ROOT, 'ort.node.min.js'),
+        path.join(TEST_ROOT, 'test-main'),
+      ];
       npmlog.info('TestRunnerCli.Run', `CMD: npx ${mochaArgs.join(' ')}`);
       const mocha = spawnSync('npx', mochaArgs, {shell: true, stdio: 'inherit'});
       if (mocha.status !== 0) {
@@ -493,19 +515,13 @@ async function main() {
         karmaArgs.push('--force-localhost');
       }
       if (webgpu) {
-        if (browser.includes('Canary')) {
-          chromiumFlags.push('--enable-dawn-features=allow_unsafe_apis,use_dxc');
-        } else {
-          chromiumFlags.push('--enable-dawn-features=use_dxc');
-          chromiumFlags.push('--disable-dawn-features=disallow_unsafe_apis');
-        }
+        // flag 'allow_unsafe_apis' is required to enable experimental features like fp16 and profiling inside pass.
+        // flag 'use_dxc' is required to enable DXC compiler.
+        chromiumFlags.push('--enable-dawn-features=allow_unsafe_apis,use_dxc');
       }
       if (webnn) {
         chromiumFlags.push('--enable-experimental-web-platform-features');
       }
-      if (config.options.globalEnvFlags?.webgpu?.profilingMode === 'default') {
-        chromiumFlags.push('--disable-dawn-features=disallow_unsafe_apis');
-      }
       karmaArgs.push(`--bundle-mode=${args.bundleMode}`);
       karmaArgs.push(...chromiumFlags.map(flag => `--chromium-flags=${flag}`));
       if (browser.startsWith('Edge')) {
diff --git a/js/web/script/tsconfig.json b/js/web/script/tsconfig.json
new file mode 100644
index 0000000000000..23b1fc96eb558
--- /dev/null
+++ b/js/web/script/tsconfig.json
@@ -0,0 +1,6 @@
+{
+  "extends": "../../tsconfig.tools.json",
+  "compilerOptions": {
+    "sourceMap": true
+  }
+}
diff --git a/js/web/test/data/ops/attention.jsonc b/js/web/test/data/ops/attention.jsonc
new file mode 100644
index 0000000000000..bd4483027cc25
--- /dev/null
+++ b/js/web/test/data/ops/attention.jsonc
@@ -0,0 +1,557 @@
+[
+  {
+    "name": "Attention Basic",
+    "operator": "Attention",
+    "opset": { "domain": "com.microsoft", "version": 1 },
+    "attributes": [{ "name": "num_heads", "data": 1, "type": "int" }],
+    "cases": [
+      {
+        "name": "T[0]",
+        "inputs": [
+          {
+            "data": [1, 2, 3, 4, 5, 6, 7, 8],
+            "dims": [1, 2, 4],
+            "type": "float32"
+          },
+          {
+            "data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
+            "dims": [4, 3],
+            "type": "float32"
+          },
+          {
+            "data": [1, 2, 3],
+            "dims": [3],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [213, 213],
+            "dims": [1, 2, 1],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "name": "Attention Basic Batch 2 with 2 heads",
+    "operator": "Attention",
+    "opset": { "domain": "com.microsoft", "version": 1 },
+    "attributes": [{ "name": "num_heads", "data": 2, "type": "int" }],
+    "cases": [
+      {
+        "name": "T[0]",
+        "inputs": [
+          {
+            "data": [
+              1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+              16
+            ],
+            "dims": [2, 2, 8],
+            "type": "float32"
+          },
+          {
+            "data": [
+              1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4,
+              4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
+            ],
+            "dims": [8, 6],
+            "type": "float32"
+          },
+          {
+            "data": [1, 2, 3, 4, 5, 6],
+            "dims": [6],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [320, 321, 320, 321, 320, 321, 320, 321],
+            "dims": [2, 2, 2],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "name": "Attention Basic",
+    "operator": "Attention",
+    "opset": { "domain": "com.microsoft", "version": 1 },
+    "attributes": [{ "name": "num_heads", "data": 1, "type": "int" }],
+    "cases": [
+      {
+        "name": "T[0]",
+        "inputs": [
+          {
+            "data": [0.3367, 0.1288, 0.2345, 0.2303, -1.1229, -0.1863],
+            "dims": [1, 3, 2],
+            "type": "float32"
+          },
+          {
+            "data": [2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094],
+            "dims": [2, 3],
+            "type": "float32"
+          },
+          {
+            "data": [1.1103, -1.6898, -0.989],
+            "dims": [3],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [-1.328187108039856, -1.297916054725647, -0.8599594831466675],
+            "dims": [1, 3, 1],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "name": "Attention Basic one head, batch 2",
+    "operator": "Attention",
+    "opset": { "domain": "com.microsoft", "version": 1 },
+    "attributes": [{ "name": "num_heads", "data": 1, "type": "int" }],
+    "cases": [
+      {
+        "name": "T[0]",
+        "inputs": [
+          {
+            "data": [0.3367, 0.1288, 0.2345, 0.2303, -1.1229, -0.1863, 2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094],
+            "dims": [2, 3, 2],
+            "type": "float32"
+          },
+          {
+            "data": [2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094],
+            "dims": [2, 3],
+            "type": "float32"
+          },
+          {
+            "data": [1.1103, -1.6898, -0.989],
+            "dims": [3],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [
+              -1.328187108039856, -1.297916054725647, -0.8599594831466675, -0.1792980432510376, -0.26380985975265503,
+              -0.25473490357398987
+            ],
+            "dims": [2, 3, 1],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "name": "Attention Basic 2 head, batch 1",
+    "operator": "Attention",
+    "opset": { "domain": "com.microsoft", "version": 1 },
+    "attributes": [{ "name": "num_heads", "data": 2, "type": "int" }],
+    "cases": [
+      {
+        "name": "T[0]",
+        "inputs": [
+          {
+            "data": [0.3367, 0.1288, 0.2345, 0.2303, -1.1229, -0.1863, 2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094],
+            "dims": [2, 3, 2],
+            "type": "float32"
+          },
+          {
+            "data": [2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094, 0.2345, 0.2303, 0.4617, 1.44, -2.22, 3.6643],
+            "dims": [2, 6],
+            "type": "float32"
+          },
+          {
+            "data": [1.1103, -1.6898, -0.989, -0.989, 1.1103, -1.6898],
+            "dims": [6],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [
+              0.8701779842376709, -2.6158859729766846, 0.8710794448852539, -2.5763747692108154, 0.9005484580993652,
+              -2.182751178741455, 2.1661579608917236, -2.1045265197753906, 1.6716957092285156, -1.797281265258789,
+              1.7134947776794434, -1.765358328819275
+            ],
+            "dims": [2, 3, 2],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "name": "Attention Basic 5 head, batch 2",
+    "operator": "Attention",
+    "opset": { "domain": "com.microsoft", "version": 1 },
+    "attributes": [{ "name": "num_heads", "data": 5, "type": "int" }],
+    "cases": [
+      {
+        "name": "T[0]",
+        "inputs": [
+          {
+            "data": [
+              0.3367, 0.1288, 0.2345, 0.2303, -1.1229, -0.1863, 2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094,
+              0.8701779842376709, 0.9005484580993652, -1.9029953479766846, 0.8710794448852539, -1.9054111242294312,
+              -1.8803634643554688, 2.1661579608917236, 1.7134947776794434, -1.5250005722045898, 1.6716957092285156,
+              -1.0069535970687866, -1.486573576927185, -1.328187108039856, -1.297916054725647, -0.8599594831466675,
+              -0.1792980432510376, -0.26380985975265503, -0.25473490357398987
+            ],
+            "dims": [2, 3, 5],
+            "type": "float32"
+          },
+          {
+            "data": [
+              2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094, 0.2345, 0.2303, 0.4617, 1.44, -2.22, 3.6643,
+              0.8710794448852539, -1.9054111242294312, 0.9005484580993652, 0.8701779842376709, 0.9005484580993652,
+              -1.9029953479766846, 0.8710794448852539, -1.9054111242294312, -1.8803634643554688, 2.1661579608917236,
+              1.7134947776794434, -1.5250005722045898, 1.6716957092285156, -1.0069535970687866, -1.486573576927185,
+              -1.328187108039856, -1.297916054725647, -0.8599594831466675, -0.1792980432510376, -0.26380985975265503,
+              -0.25473490357398987, 2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094, 0.2345, 0.2303, 0.4617, 1.44, -2.22,
+              3.6643, 0.8710794448852539, -1.9054111242294312, 0.9005484580993652, 0.8701779842376709,
+              0.9005484580993652, -1.9029953479766846, 0.8710794448852539, -1.9054111242294312, -1.8803634643554688,
+              2.1661579608917236, 1.7134947776794434, -1.5250005722045898, 1.6716957092285156, -1.0069535970687866,
+              -1.486573576927185, -1.328187108039856, -1.297916054725647, -0.8599594831466675, -0.1792980432510376,
+              -0.26380985975265503, -0.25473490357398987, 2.2082, 0.8710794448852539, -1.9054111242294312,
+              0.9005484580993652, 1.9029953479766846, 0.8710794448852539, -1.9054111242294312, -1.8803634643554688,
+              2.1661579608917236
+            ],
+            "dims": [5, 15],
+            "type": "float32"
+          },
+          {
+            "data": [
+              1.1103, -1.328187108039856, -1.297916054725647, -0.8599594831466675, -0.1792980432510376,
+              -0.26380985975265503, -0.25473490357398987, -1.6898, -0.989, -1.9029953479766846, 0.8710794448852539,
+              -1.9054111242294312, -1.8803634643554688, 2.1661579608917236, 1.7134947776794434
+            ],
+            "dims": [15],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [
+              -1.6956915855407715, -2.8863370418548584, 1.3899128437042236, 1.6789076328277588, -1.4083852767944336,
+              -1.7009180784225464, -3.1053788661956787, 3.5959298610687256, 1.1027096509933472, -0.009643087163567543,
+              -1.694351315498352, -2.9284396171569824, 1.734721302986145, 2.0606398582458496, -0.2571452260017395,
+              3.671973943710327, -5.285338401794434, -6.833454132080078, 1.7506506443023682, -2.262148380279541,
+              2.5110034942626953, 1.440049171447754, -0.9423203468322754, 1.7506506443023682, -1.86212158203125,
+              -0.5036701560020447, -5.732386589050293, -1.5674757957458496, 1.7506510019302368, -2.264472246170044
+            ],
+            "dims": [2, 3, 5],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "name": "Attention Basic 5 head, batch 1",
+    "operator": "Attention",
+    "opset": { "domain": "com.microsoft", "version": 1 },
+    "attributes": [{ "name": "num_heads", "data": 5, "type": "int" }],
+    "cases": [
+      {
+        "name": "T[0]",
+        "inputs": [
+          {
+            "data": [
+              0.3367, 0.1288, 0.2345, 0.2303, -1.1229, -0.1863, 2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094,
+              0.8701779842376709, 0.9005484580993652, -1.9029953479766846
+            ],
+            "dims": [1, 3, 5],
+            "type": "float32"
+          },
+          {
+            "data": [
+              2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094, 0.2345, 0.2303, 0.4617, 1.44, -2.22, 3.6643,
+              0.8710794448852539, -1.9054111242294312, 0.9005484580993652, 0.8701779842376709, 0.9005484580993652,
+              -1.9029953479766846, 0.8710794448852539, -1.9054111242294312, -1.8803634643554688, 2.1661579608917236,
+              1.7134947776794434, -1.5250005722045898, 1.6716957092285156, -1.0069535970687866, -1.486573576927185,
+              -1.328187108039856, -1.297916054725647, -0.8599594831466675, -0.1792980432510376, -0.26380985975265503,
+              -0.25473490357398987, 2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094, 0.2345, 0.2303, 0.4617, 1.44, -2.22,
+              3.6643, 0.8710794448852539, -1.9054111242294312, 0.9005484580993652, 0.8701779842376709,
+              0.9005484580993652, -1.9029953479766846, 0.8710794448852539, -1.9054111242294312, -1.8803634643554688,
+              2.1661579608917236, 1.7134947776794434, -1.5250005722045898, 1.6716957092285156, -1.0069535970687866,
+              -1.486573576927185, -1.328187108039856, -1.297916054725647, -0.8599594831466675, -0.1792980432510376,
+              -0.26380985975265503, -0.25473490357398987, 2.2082, 0.8710794448852539, -1.9054111242294312,
+              0.9005484580993652, 1.9029953479766846, 0.8710794448852539, -1.9054111242294312, -1.8803634643554688,
+              2.1661579608917236
+            ],
+            "dims": [5, 15],
+            "type": "float32"
+          },
+          {
+            "data": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+            "dims": [15],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [
+              -1.5670859813690186, -3.7310283184051514, -2.7460145950317383, 0.8121700286865234, -3.350031852722168,
+              -1.5735238790512085, -3.7310383319854736, 6.124307632446289, 0.7840213775634766, -0.7250789403915405,
+              -1.565433382987976, -3.731032371520996, -2.7436347007751465, 1.0472451448440552, -2.7828547954559326
+            ],
+            "dims": [1, 3, 5],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "name": "Attention Basic 5 head, batch 3",
+    "operator": "Attention",
+    "opset": { "domain": "com.microsoft", "version": 1 },
+    "attributes": [{ "name": "num_heads", "data": 5, "type": "int" }],
+    "cases": [
+      {
+        "name": "T[0]",
+        "inputs": [
+          {
+            "data": [
+              0.3367, 0.1288, 0.2345, 0.2303, -1.1229, -0.1863, 2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094,
+              0.8701779842376709, 0.9005484580993652, -1.9029953479766846, 0.3367, 0.1288, 0.2345, 0.2303, -1.1229,
+              -0.1863, 2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094, 0.8701779842376709, 0.9005484580993652,
+              -1.9029953479766846, 0.8710794448852539, -1.9054111242294312, -1.8803634643554688, 2.1661579608917236,
+              1.7134947776794434, -1.5250005722045898, 1.6716957092285156, -1.0069535970687866, -1.486573576927185,
+              -1.328187108039856, -1.297916054725647, -0.8599594831466675, -0.1792980432510376, -0.26380985975265503,
+              -0.25473490357398987
+            ],
+            "dims": [3, 3, 5],
+            "type": "float32"
+          },
+          {
+            "data": [
+              2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094, 0.2345, 0.2303, 0.4617, 1.44, -2.22, 3.6643,
+              0.8710794448852539, -1.9054111242294312, 0.9005484580993652, 0.8701779842376709, 0.9005484580993652,
+              -1.9029953479766846, 0.8710794448852539, -1.9054111242294312, -1.8803634643554688, 2.1661579608917236,
+              1.7134947776794434, -1.5250005722045898, 1.6716957092285156, -1.0069535970687866, -1.486573576927185,
+              -1.328187108039856, -1.297916054725647, -0.8599594831466675, -0.1792980432510376, -0.26380985975265503,
+              -0.25473490357398987, 2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094, 0.2345, 0.2303, 0.4617, 1.44, -2.22,
+              3.6643, 0.8710794448852539, -1.9054111242294312, 0.9005484580993652, 0.8701779842376709,
+              0.9005484580993652, -1.9029953479766846, 0.8710794448852539, -1.9054111242294312, -1.8803634643554688,
+              2.1661579608917236, 1.7134947776794434, -1.5250005722045898, 1.6716957092285156, -1.0069535970687866,
+              -1.486573576927185, -1.328187108039856, -1.297916054725647, -0.8599594831466675, -0.1792980432510376,
+              -0.26380985975265503, -0.25473490357398987, 2.2082, 0.8710794448852539, -1.9054111242294312,
+              0.9005484580993652, 1.9029953479766846, 0.8710794448852539, -1.9054111242294312, -1.8803634643554688,
+              2.1661579608917236
+            ],
+            "dims": [5, 15],
+            "type": "float32"
+          },
+          {
+            "data": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+            "dims": [15],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [
+              -1.5670859813690186, -3.7310283184051514, -2.7460145950317383, 0.8121700286865234, -3.350031852722168,
+              -1.5735238790512085, -3.7310383319854736, 6.124307632446289, 0.7840213775634766, -0.7250789403915405,
+              -1.565433382987976, -3.731032371520996, -2.7436347007751465, 1.0472451448440552, -2.7828547954559326,
+              -1.5670859813690186, -3.7310283184051514, -2.7460145950317383, 0.8121700286865234, -3.350031852722168,
+              -1.5735238790512085, -3.7310383319854736, 6.124307632446289, 0.7840213775634766, -0.7250789403915405,
+              -1.565433382987976, -3.731032371520996, -2.7436347007751465, 1.0472451448440552, -2.7828547954559326,
+              3.7965505123138428, -2.3799397945404053, -3.9530906677246094, 0.5844926834106445, -2.9756431579589844,
+              2.448162794113159, 4.34546422958374, 1.9380426406860352, 0.5870105624198914, -2.7368364334106445,
+              -0.4769568145275116, 4.255186557769775, -3.9529950618743896, 0.6987408995628357, -2.9756433963775635
+            ],
+            "dims": [3, 3, 5],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "name": "Attention Basic 5 head, batch 3",
+    "operator": "Attention",
+    "opset": { "domain": "com.microsoft", "version": 1 },
+    "attributes": [{ "name": "num_heads", "data": 5, "type": "int" }],
+    "cases": [
+      {
+        "name": "T[0]",
+        "inputs": [
+          {
+            "data": [
+              0.3367, 0.1288, 0.2345, 0.2303, -1.1229, -0.1863, 2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094,
+              0.8701779842376709, 0.9005484580993652, -1.9029953479766846, 0.3367, 0.1288, 0.2345, 0.2303, -1.1229,
+              -0.1863, 2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094, 0.8701779842376709, 0.9005484580993652,
+              -1.9029953479766846, 0.8710794448852539, -1.9054111242294312, -1.8803634643554688, 2.1661579608917236,
+              1.7134947776794434, -1.5250005722045898, 1.6716957092285156, -1.0069535970687866, -1.486573576927185,
+              -1.328187108039856, -1.297916054725647, -0.8599594831466675, -0.1792980432510376, -0.26380985975265503,
+              -0.25473490357398987, 0.3367, 0.1288, 0.2345, 0.2303, -1.1229, -0.1863, 2.2082, -0.638, 0.4617, 0.2674,
+              0.5349, 0.8094, 0.8701779842376709, 0.9005484580993652, -1.9029953479766846, 0.3367, 0.1288, 0.2345,
+              0.2303, -1.1229, -0.1863, 2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094, 0.8701779842376709,
+              0.9005484580993652, -1.9029953479766846, 0.8710794448852539, -1.9054111242294312, -1.8803634643554688,
+              2.1661579608917236, 1.7134947776794434, -1.5250005722045898, 1.6716957092285156, -1.0069535970687866,
+              -1.486573576927185, -1.328187108039856, -1.297916054725647, -0.8599594831466675, -0.1792980432510376,
+              -0.26380985975265503, -0.25473490357398987
+            ],
+            "dims": [3, 3, 10],
+            "type": "float32"
+          },
+          {
+            "data": [
+              2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094, 0.2345, 0.2303, 0.4617, 1.44, -2.22, 3.6643,
+              0.8710794448852539, -1.9054111242294312, 0.9005484580993652, 0.8701779842376709, 0.9005484580993652,
+              -1.9029953479766846, 0.8710794448852539, -1.9054111242294312, -1.8803634643554688, 2.1661579608917236,
+              1.7134947776794434, -1.5250005722045898, 1.6716957092285156, -1.0069535970687866, -1.486573576927185,
+              -1.328187108039856, -1.297916054725647, -0.8599594831466675, -0.1792980432510376, -0.26380985975265503,
+              -0.25473490357398987, 2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094, 0.2345, 0.2303, 0.4617, 1.44, -2.22,
+              3.6643, 0.8710794448852539, -1.9054111242294312, 0.9005484580993652, 0.8701779842376709,
+              0.9005484580993652, -1.9029953479766846, 0.8710794448852539, -1.9054111242294312, -1.8803634643554688,
+              2.1661579608917236, 1.7134947776794434, -1.5250005722045898, 1.6716957092285156, -1.0069535970687866,
+              -1.486573576927185, -1.328187108039856, -1.297916054725647, -0.8599594831466675, -0.1792980432510376,
+              -0.26380985975265503, -0.25473490357398987, 2.2082, 0.8710794448852539, -1.9054111242294312,
+              0.9005484580993652, 1.9029953479766846, 0.8710794448852539, -1.9054111242294312, -1.8803634643554688,
+              2.1661579608917236, 2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094, 0.2345, 0.2303, 0.4617, 1.44, -2.22,
+              3.6643, 0.8710794448852539, -1.9054111242294312, 0.9005484580993652, 0.8701779842376709,
+              0.9005484580993652, -1.9029953479766846, 0.8710794448852539, -1.9054111242294312, -1.8803634643554688,
+              2.1661579608917236, 1.7134947776794434, -1.5250005722045898, 1.6716957092285156, -1.0069535970687866,
+              -1.486573576927185, -1.328187108039856, -1.297916054725647, -0.8599594831466675, -0.1792980432510376,
+              -0.26380985975265503, -0.25473490357398987, 2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094, 0.2345,
+              0.2303, 0.4617, 1.44, -2.22, 3.6643, 0.8710794448852539, -1.9054111242294312, 0.9005484580993652,
+              0.8701779842376709, 0.9005484580993652, -1.9029953479766846, 0.8710794448852539, -1.9054111242294312,
+              -1.8803634643554688, 2.1661579608917236, 1.7134947776794434, -1.5250005722045898, 1.6716957092285156,
+              -1.0069535970687866, -1.486573576927185, -1.328187108039856, -1.297916054725647, -0.8599594831466675,
+              -0.1792980432510376, -0.26380985975265503, -0.25473490357398987, 2.2082, 0.8710794448852539,
+              -1.9054111242294312, 0.9005484580993652, 1.9029953479766846, 0.8710794448852539, -1.9054111242294312,
+              -1.8803634643554688, 2.1661579608917236
+            ],
+            "dims": [10, 15],
+            "type": "float32"
+          },
+          {
+            "data": [
+              -1.5670859813690186, -3.7310283184051514, -2.7460145950317383, 0.8121700286865234, -3.350031852722168,
+              -1.5735238790512085, -3.7310383319854736, 6.124307632446289, 0.7840213775634766, -0.7250789403915405,
+              -1.565433382987976, -3.731032371520996, -2.7436347007751465, 1.0472451448440552, -2.7828547954559326
+            ],
+            "dims": [15],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [
+              -8.01101303100586, -5.782258987426758, 6.016238689422607, 0.26747000217437744, -6.992541313171387,
+              -8.011263847351074, -5.782248020172119, 5.366001129150391, 0.26747000217437744, -6.99449348449707,
+              -8.011263847351074, -5.782265663146973, 6.016238689422607, 0.26747000217437744, -6.992537021636963,
+              -6.102723598480225, -7.28973388671875, -4.578637599945068, 7.2203369140625, -6.028444766998291,
+              -6.102705478668213, -7.2897748947143555, -3.7882626056671143, 5.393260478973389, -5.754333972930908,
+              -1.3616288900375366, -7.289827823638916, -6.341128349304199, 6.329389572143555, -5.751791954040527,
+              -2.3945987224578857, -14.532954216003418, 3.969801902770996, 12.744998931884766, -11.1966552734375,
+              -2.4002532958984375, -14.538958549499512, -6.684961318969727, 12.476543426513672, -9.24352741241455,
+              -4.787771701812744, -8.640848159790039, 3.969801902770996, -0.6471102833747864, -11.1966552734375
+            ],
+            "dims": [3, 3, 5],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "name": "Attention Basic 1 head, batch 3",
+    "operator": "Attention",
+    "opset": { "domain": "com.microsoft", "version": 1 },
+    "attributes": [{ "name": "num_heads", "data": 1, "type": "int" }],
+    "cases": [
+      {
+        "name": "T[0]",
+        "inputs": [
+          {
+            "data": [
+              0.3367, 0.1288, 0.2345, 0.2303, -1.1229, -0.1863, 2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094,
+              0.8701779842376709, 0.9005484580993652, -1.9029953479766846, 0.3367, 0.1288, 0.2345, 0.2303, -1.1229,
+              -0.1863, 2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094, 0.8701779842376709, 0.9005484580993652,
+              -1.9029953479766846, 0.8710794448852539, -1.9054111242294312, -1.8803634643554688, 2.1661579608917236,
+              1.7134947776794434, -1.5250005722045898, 1.6716957092285156, -1.0069535970687866, -1.486573576927185,
+              -1.328187108039856, -1.297916054725647, -0.8599594831466675, -0.1792980432510376, -0.26380985975265503,
+              -0.25473490357398987, 0.3367, 0.1288, 0.2345, 0.2303, -1.1229, -0.1863, 2.2082, -0.638, 0.4617, 0.2674,
+              0.5349, 0.8094, 0.8701779842376709, 0.9005484580993652, -1.9029953479766846, 0.3367, 0.1288, 0.2345,
+              0.2303, -1.1229, -0.1863, 2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094, 0.8701779842376709,
+              0.9005484580993652, -1.9029953479766846, 0.8710794448852539, -1.9054111242294312, -1.8803634643554688,
+              2.1661579608917236, 1.7134947776794434, -1.5250005722045898, 1.6716957092285156, -1.0069535970687866,
+              -1.486573576927185, -1.328187108039856, -1.297916054725647, -0.8599594831466675, -0.1792980432510376,
+              -0.26380985975265503, -0.25473490357398987
+            ],
+            "dims": [3, 3, 10],
+            "type": "float32"
+          },
+          {
+            "data": [
+              2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094, 0.2345, 0.2303, 0.4617, 1.44, -2.22, 3.6643,
+              0.8710794448852539, -1.9054111242294312, 0.9005484580993652, 0.8701779842376709, 0.9005484580993652,
+              -1.9029953479766846, 0.8710794448852539, -1.9054111242294312, -1.8803634643554688, 2.1661579608917236,
+              1.7134947776794434, -1.5250005722045898, 1.6716957092285156, -1.0069535970687866, -1.486573576927185,
+              -1.328187108039856, -1.297916054725647, -0.8599594831466675, -0.1792980432510376, -0.26380985975265503,
+              -0.25473490357398987, 2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094, 0.2345, 0.2303, 0.4617, 1.44, -2.22,
+              3.6643, 0.8710794448852539, -1.9054111242294312, 0.9005484580993652, 0.8701779842376709,
+              0.9005484580993652, -1.9029953479766846, 0.8710794448852539, -1.9054111242294312, -1.8803634643554688,
+              2.1661579608917236, 1.7134947776794434, -1.5250005722045898, 1.6716957092285156, -1.0069535970687866,
+              -1.486573576927185, -1.328187108039856, -1.297916054725647, -0.8599594831466675, -0.1792980432510376,
+              -0.26380985975265503, -0.25473490357398987, 2.2082, 0.8710794448852539, -1.9054111242294312,
+              0.9005484580993652, 1.9029953479766846, 0.8710794448852539, -1.9054111242294312, -1.8803634643554688,
+              2.1661579608917236, 2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094, 0.2345, 0.2303, 0.4617, 1.44, -2.22,
+              3.6643, 0.8710794448852539, -1.9054111242294312, 0.9005484580993652, 0.8701779842376709,
+              0.9005484580993652, -1.9029953479766846, 0.8710794448852539, -1.9054111242294312, -1.8803634643554688,
+              2.1661579608917236, 1.7134947776794434, -1.5250005722045898, 1.6716957092285156, -1.0069535970687866,
+              -1.486573576927185, -1.328187108039856, -1.297916054725647, -0.8599594831466675, -0.1792980432510376,
+              -0.26380985975265503, -0.25473490357398987, 2.2082, -0.638, 0.4617, 0.2674, 0.5349, 0.8094, 0.2345,
+              0.2303, 0.4617, 1.44, -2.22, 3.6643, 0.8710794448852539, -1.9054111242294312, 0.9005484580993652,
+              0.8701779842376709, 0.9005484580993652, -1.9029953479766846, 0.8710794448852539, -1.9054111242294312,
+              -1.8803634643554688, 2.1661579608917236, 1.7134947776794434, -1.5250005722045898, 1.6716957092285156,
+              -1.0069535970687866, -1.486573576927185, -1.328187108039856, -1.297916054725647, -0.8599594831466675,
+              -0.1792980432510376, -0.26380985975265503, -0.25473490357398987, 2.2082, 0.8710794448852539,
+              -1.9054111242294312, 0.9005484580993652, 1.9029953479766846, 0.8710794448852539, -1.9054111242294312,
+              -1.8803634643554688, 2.1661579608917236
+            ],
+            "dims": [10, 15],
+            "type": "float32"
+          },
+          {
+            "data": [
+              -1.5670859813690186, -3.7310283184051514, -2.7460145950317383, 0.8121700286865234, -3.350031852722168,
+              -1.5735238790512085, -3.7310383319854736, 6.124307632446289, 0.7840213775634766, -0.7250789403915405,
+              -1.565433382987976, -3.731032371520996, -2.7436347007751465, 1.0472451448440552, -2.7828547954559326
+            ],
+            "dims": [15],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [
+              -8.011263847351074, -5.7822418212890625, 6.016238689422607, 0.26747000217437744, -6.992536544799805,
+              -8.011263847351074, -5.7822418212890625, 6.016238689422607, 0.26747000217437744, -6.992536544799805,
+              -8.011263847351074, -5.7822418212890625, 6.016238689422607, 0.26747000217437744, -6.992536544799805,
+              1.3541864156723022, -7.813620090484619, -6.758509635925293, 7.597365856170654, -13.926229476928711,
+              -1.322464108467102, -7.297357559204102, -0.05962071940302849, 6.347561836242676, -5.869992256164551,
+              -1.3616288900375366, -7.28973388671875, 0.0386197566986084, 6.329389572143555, -5.751791954040527,
+              -2.400698661804199, -14.538958549499512, -7.898950576782227, 12.744998931884766, -11.1966552734375,
+              -2.400698661804199, -14.538958549499512, -7.898950576782227, 12.744998931884766, -11.1966552734375,
+              1.021930456161499, -2.373898983001709, 3.8501391410827637, -0.6108309626579285, -9.256340980529785
+            ],
+            "dims": [3, 3, 5],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  }
+]
diff --git a/js/web/test/data/ops/batch-norm.jsonc b/js/web/test/data/ops/batch-norm.jsonc
new file mode 100644
index 0000000000000..4ea16f290dc8f
--- /dev/null
+++ b/js/web/test/data/ops/batch-norm.jsonc
@@ -0,0 +1,446 @@
+[
+  {
+    "name": "BatchNormalization with no attributes",
+    "operator": "BatchNormalization",
+    "attributes": [],
+    "cases": [
+      {
+        "name": "T[64]",
+        "inputs": [
+          {
+            "data": [
+              2.02384, -0.935186, 0.488569, -0.513934, -1.27082, -0.131913, -1.806, -0.37904, 0.667796, -1.14826,
+              1.2522, 0.0300339, 2.4758, 1.55511, 0.385341, 1.46645, -1.09355, -2.56309, 0.976015, -1.47036, 0.89486,
+              0.580989, -1.12418, -0.339189, 1.3314, 0.418893, -0.301401, -1.2983, -0.839063, 0.170261, 1.15486,
+              -0.255735, -0.589851, -0.416289, -0.952648, -0.360487, 0.253287, 0.437195, 0.32023, 0.209606, -0.279519,
+              -0.546527, 0.265286, -1.07383, -1.65879, 1.1222, 0.946612, 0.822549, 0.64689, -0.292639, -0.73995,
+              -0.694949, 1.33899, -0.0652476, 1.61791, 1.49692, -0.761145, -0.201874, -1.15431, -1.83111, -0.705267,
+              -0.143026, -0.129819, -0.799425
+            ],
+            "dims": [64],
+            "type": "float32"
+          },
+          {
+            "data": [0.241661],
+            "dims": [1],
+            "type": "float32"
+          },
+          {
+            "data": [0],
+            "dims": [1],
+            "type": "float32"
+          },
+          {
+            "data": [0],
+            "dims": [1],
+            "type": "float32"
+          },
+          {
+            "data": [1],
+            "dims": [1],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [
+              0.489082, -0.225997, 0.118068, -0.124197, -0.307105, -0.031878, -0.436439, -0.0915989, 0.16138, -0.277489,
+              0.302606, 0.007258, 0.598301, 0.375807, 0.0931215, 0.354382, -0.264267, -0.619395, 0.235864, -0.355328,
+              0.216252, 0.140402, -0.271669, -0.0819684, 0.321747, 0.10123, -0.0728365, -0.313746, -0.202768, 0.0411454,
+              0.279085, -0.0618009, -0.142543, -0.1006, -0.230217, -0.0871152, 0.0612094, 0.105652, 0.0773867,
+              0.0506533, -0.0675486, -0.132074, 0.064109, -0.259501, -0.400863, 0.271191, 0.228758, 0.198777, 0.156327,
+              -0.0707191, -0.178816, -0.167941, 0.323581, -0.0157677, 0.390985, 0.361745, -0.183938, -0.0487849,
+              -0.27895, -0.442507, -0.170435, -0.0345637, -0.031372, -0.193189
+            ],
+            "dims": [64],
+            "type": "float32"
+          }
+        ]
+      },
+      {
+        "name": "T[2,3,4,4,4]",
+        "inputs": [
+          {
+            "data": [
+              2.02384, -0.935186, 0.488569, -0.513934, -1.27082, -0.131913, -1.806, -0.37904, 0.667796, -1.14826,
+              1.2522, 0.0300339, 2.4758, 1.55511, 0.385341, 1.46645, -1.09355, -2.56309, 0.976015, -1.47036, 0.89486,
+              0.580989, -1.12418, -0.339189, 1.3314, 0.418893, -0.301401, -1.2983, -0.839063, 0.170261, 1.15486,
+              -0.255735, -0.589851, -0.416289, -0.952648, -0.360487, 0.253287, 0.437195, 0.32023, 0.209606, -0.279519,
+              -0.546527, 0.265286, -1.07383, -1.65879, 1.1222, 0.946612, 0.822549, 0.64689, -0.292639, -0.73995,
+              -0.694949, 1.33899, -0.0652476, 1.61791, 1.49692, -0.761145, -0.201874, -1.15431, -1.83111, -0.705267,
+              -0.143026, -0.129819, -0.799425, 0.168795, 0.740422, -0.377683, 0.432598, -2.07414, -2.85251, 0.273531,
+              0.0532606, 1.31052, -0.769382, 0.9976, 0.850536, -1.53812, -0.00496016, 0.931242, 0.0517056, -0.497829,
+              0.275869, 0.860001, 1.23747, 0.179686, 1.5914, 0.740327, 0.798208, 2.12478, 1.74205, -0.322054,
+              -0.0112451, 0.204525, -0.431252, -1.3114, 0.186204, 0.780569, -1.42994, 1.63344, -0.00839034, -0.187035,
+              1.8406, 1.32053, -0.636963, 0.408944, -1.50846, -1.2076, -0.129118, -0.0441307, 1.47558, 1.07251, 1.05295,
+              -0.420297, -1.13402, -0.524053, 3.20754, -0.588935, -0.527549, 0.591928, -1.10529, 0.520412, 0.19404,
+              -1.21229, -0.399594, -0.280935, -0.363324, -0.00804771, 1.43102, -0.523222, 1.17608, -0.53195, 0.914993,
+              2.69308, -0.517211, 0.472273, -0.464725, -0.929768, -0.631145, 0.919709, -0.27391, 1.76689, 0.894897,
+              0.235798, 1.2544, 0.858985, -0.139707, 0.354544, 0.200878, 0.353255, 0.0722632, -1.56074, 1.03685,
+              1.73434, 0.193269, -0.864609, 0.842739, -0.372717, 0.584484, 0.16315, 1.60674, -0.0611289, -1.24544,
+              1.33361, -0.961942, -0.15732, -0.348637, 0.361842, 0.7386, 0.517256, 1.20406, -2.07277, -1.01983, -1.9163,
+              0.239934, 0.177979, 0.464564, 0.988822, 0.284607, -1.56099, -0.429143, 0.111043, -0.0853688, -0.319176,
+              -0.279777, 0.520971, -1.078, -0.670242, 0.065652, 0.468538, -0.825062, 0.370068, 1.68751, -1.16928,
+              -0.411782, 1.61624, -0.973004, 2.64703, -0.220014, -1.43954, -0.018692, 1.34982, -0.95197, -1.72586,
+              1.32725, 0.280984, 0.00847463, 0.512869, 0.0378154, 0.13898, 0.35758, -0.084558, 1.04045, -1.79933,
+              1.3002, 0.390457, 1.22267, 0.959344, -0.964296, -0.0935597, 0.288953, -0.158046, 0.532672, -0.500988,
+              0.25187, -2.14384, -0.633315, 1.24612, -1.41525, 0.36494, -0.00714732, -0.608963, 0.508496, 0.995365,
+              1.21159, -0.169055, -0.968783, 1.52779, -0.082381, 2.2049, 0.928655, 0.120245, 0.911429, -0.885258,
+              -1.2072, 0.770694, 2.36621, 1.08456, -1.60069, 0.0345025, 0.359559, -0.785411, 0.466532, -0.78543,
+              0.024879, 1.59337, 1.13718, -1.27073, -0.263788, -1.7702, 0.203263, 1.34631, 1.11914, -2.04911, -0.804137,
+              0.466763, 2.18386, 1.4689, 0.898297, -0.648948, 0.252202, 1.12501, -0.204563, 0.124608, 0.377214,
+              0.894327, -0.249118, 0.709188, 0.999397, -1.4079, 0.193873, 0.657753, -0.709732, 1.09897, -0.145793,
+              0.779199, 0.88378, -1.2676, 1.15709, 0.62295, -0.370894, -0.103268, -1.55949, -0.470747, 0.100394,
+              0.422334, -0.0685312, -0.434488, -0.568974, -0.256987, 2.01276, -0.923322, -0.613144, 1.50676, 0.65756,
+              1.20524, 1.10395, -0.975241, 2.44035, 1.08276, 0.330393, -0.508918, -1.25545, 0.189815, -0.156263,
+              -0.960866, 1.0859, -0.674478, 2.76743, 1.21399, 1.71666, -1.73198, -1.1062, 0.951285, -0.713336, 1.61586,
+              1.96514, 0.002603, 0.0953297, 0.949256, -1.76552, 0.372816, -0.781229, 1.50532, 1.28462, 1.31116,
+              0.731908, 1.54835, 0.371081, 0.409244, -0.106938, -1.79396, -1.61198, -0.80869, -1.10381, 1.1872,
+              -0.832439, 0.0755941, -1.09553, 0.960059, 1.44252, -0.196482, -1.07364, 0.165547, 0.630078, 1.56569,
+              -0.669592, 1.15974, 0.0953399, -0.202313, 0.812631, -0.318567, -0.16644, 0.887062, -0.0264821, -0.740725,
+              0.0797577, -1.1037, 0.90236, 1.13427, 0.364186, -2.01043, -0.415748, 0.116046, 0.369949, 0.317886,
+              0.530332, 1.48341, 0.74666, -1.64142, 0.22569, 1.18015, 1.31827, -1.33904, -0.101125
+            ],
+            "dims": [2, 3, 4, 4, 4],
+            "type": "float32"
+          },
+          {
+            "data": [0.241661, 0.960798, 0.474727],
+            "dims": [3],
+            "type": "float32"
+          },
+          {
+            "data": [0, 0, 0],
+            "dims": [3],
+            "type": "float32"
+          },
+          {
+            "data": [0, 0, 0],
+            "dims": [3],
+            "type": "float32"
+          },
+          {
+            "data": [1, 1, 1],
+            "dims": [3],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [
+              0.489082, -0.225997, 0.118068, -0.124197, -0.307105, -0.031878, -0.436439, -0.0915989, 0.16138, -0.277489,
+              0.302606, 0.007258, 0.598301, 0.375807, 0.0931215, 0.354382, -0.264267, -0.619395, 0.235864, -0.355328,
+              0.216252, 0.140402, -0.271669, -0.0819684, 0.321747, 0.10123, -0.0728365, -0.313746, -0.202768, 0.0411454,
+              0.279085, -0.0618009, -0.142543, -0.1006, -0.230217, -0.0871152, 0.0612094, 0.105652, 0.0773867,
+              0.0506533, -0.0675486, -0.132074, 0.064109, -0.259501, -0.400863, 0.271191, 0.228758, 0.198777, 0.156327,
+              -0.0707191, -0.178816, -0.167941, 0.323581, -0.0157677, 0.390985, 0.361745, -0.183938, -0.0487849,
+              -0.27895, -0.442507, -0.170435, -0.0345637, -0.031372, -0.193189, 0.162177, 0.711393, -0.362876, 0.415637,
+              -1.99282, -2.74067, 0.262807, 0.0511725, 1.25914, -0.739217, 0.958488, 0.817189, -1.47782, -0.00476569,
+              0.894731, 0.0496784, -0.478311, 0.265053, 0.826283, 1.18895, 0.172641, 1.52901, 0.711301, 0.766913,
+              2.04147, 1.67375, -0.309427, -0.0108042, 0.196507, -0.414344, -1.25999, 0.178903, 0.749965, -1.37387,
+              1.5694, -0.00806138, -0.179702, 1.76844, 1.26875, -0.61199, 0.392911, -1.44932, -1.16025, -0.124055,
+              -0.0424004, 1.41773, 1.03046, 1.01167, -0.403818, -1.08956, -0.503507, 3.08178, -0.565845, -0.506866,
+              0.56872, -1.06196, 0.500008, 0.186433, -1.16476, -0.383928, -0.269921, -0.349079, -0.00773219, 1.37492,
+              -0.248386, 0.558316, -0.25253, 0.43437, 1.27847, -0.245533, 0.2242, -0.220617, -0.441384, -0.29962,
+              0.436609, -0.130032, 0.838785, 0.424829, 0.111939, 0.595496, 0.407781, -0.0663221, 0.168311, 0.0953618,
+              0.167699, 0.0343051, -0.74092, 0.492219, 0.823334, 0.0917494, -0.410451, 0.400069, -0.176938, 0.277469,
+              0.0774512, 0.762761, -0.0290194, -0.59124, 0.6331, -0.456657, -0.0746837, -0.165507, 0.171775, 0.350631,
+              0.245554, 0.571595, -0.983996, -0.484139, -0.909715, 0.113902, 0.0844908, 0.22054, 0.469418, 0.13511,
+              -0.741041, -0.203725, 0.0527148, -0.0405267, -0.151521, -0.132817, 0.247318, -0.511752, -0.31818,
+              0.0311666, 0.222426, -0.391677, 0.17568, 0.801104, -0.282569, -0.0995112, 0.39058, -0.235136, 0.639682,
+              -0.0531687, -0.347878, -0.0045171, 0.326198, -0.230053, -0.41707, 0.320744, 0.0679025, 0.00204798,
+              0.12394, 0.00913847, 0.0335859, 0.0864127, -0.0204343, 0.251436, -0.434827, 0.314206, 0.0943579, 0.295471,
+              0.231835, -0.233032, -0.0226096, 0.0698283, -0.0381934, 0.128725, -0.121069, 0.060867, -0.51808,
+              -0.153047, 0.301137, -0.342009, 0.0881915, -0.00172722, -0.147162, 0.122883, 0.24054, 0.292792,
+              -0.0408538, -0.234116, 0.369206, -0.0199082, 0.532835, 0.224419, 0.0290583, 0.220256, -0.213931,
+              -0.291733, 0.186246, 0.571817, 0.262095, -0.386822, 0.00833788, 0.086891, -0.189802, 0.112742, -0.189807,
+              0.00601226, 0.385054, 0.274811, -1.22091, -0.253445, -1.7008, 0.195294, 1.29353, 1.07526, -1.96877,
+              -0.772609, 0.448463, 2.09824, 1.4113, 0.863078, -0.623505, 0.242314, 1.0809, -0.196543, 0.119722,
+              0.362425, 0.859263, -0.239351, 0.681383, 0.960214, -1.3527, 0.186272, 0.631964, -0.681905, 1.05588,
+              -0.140077, 0.748649, 0.84913, -1.2179, 1.11172, 0.598526, -0.356353, -0.099219, -1.49835, -0.452291,
+              0.0964582, 0.405776, -0.0658444, -0.417454, -0.546667, -0.246911, 1.93385, -0.887121, -0.589104, 1.44769,
+              0.631779, 1.15798, 1.06067, -0.937005, 2.34467, 1.04031, 0.31744, -0.488965, -1.20623, 0.182373,
+              -0.150136, -0.923194, 1.04332, -0.648034, 2.65893, 1.1664, 1.64935, -0.822216, -0.525139, 0.451599,
+              -0.338638, 0.767087, 0.932899, 0.00123571, 0.0452554, 0.450635, -0.838136, 0.176985, -0.370868, 0.714614,
+              0.60984, 0.622438, 0.347455, 0.73504, 0.176161, 0.194278, -0.0507662, -0.851639, -0.765246, -0.383905,
+              -0.524005, 0.563593, -0.395179, 0.0358864, -0.520076, 0.455763, 0.684801, -0.093275, -0.509682, 0.0785892,
+              0.299113, 0.743272, -0.317872, 0.550556, 0.0452602, -0.0960432, 0.385776, -0.151232, -0.079013, 0.42111,
+              -0.0125717, -0.35164, 0.0378629, -0.523955, 0.428372, 0.538468, 0.172888, -0.954402, -0.197366, 0.0550898,
+              0.175624, 0.150908, 0.251761, 0.704209, 0.354458, -0.779221, 0.107141, 0.560244, 0.625814, -0.635675,
+              -0.0480064
+            ],
+            "dims": [2, 3, 4, 4, 4],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "name": "BatchNormalization with no attributes - NHWC",
+    "operator": "BatchNormalization",
+    "opset": { "domain": "com.ms.internal.nhwc", "version": 12 },
+    "attributes": [],
+    "cases": [
+      {
+        "name": "T[64]",
+        "inputs": [
+          {
+            "data": [
+              2.02384, -0.935186, 0.488569, -0.513934, -1.27082, -0.131913, -1.806, -0.37904, 0.667796, -1.14826,
+              1.2522, 0.0300339, 2.4758, 1.55511, 0.385341, 1.46645, -1.09355, -2.56309, 0.976015, -1.47036, 0.89486,
+              0.580989, -1.12418, -0.339189, 1.3314, 0.418893, -0.301401, -1.2983, -0.839063, 0.170261, 1.15486,
+              -0.255735, -0.589851, -0.416289, -0.952648, -0.360487, 0.253287, 0.437195, 0.32023, 0.209606, -0.279519,
+              -0.546527, 0.265286, -1.07383, -1.65879, 1.1222, 0.946612, 0.822549, 0.64689, -0.292639, -0.73995,
+              -0.694949, 1.33899, -0.0652476, 1.61791, 1.49692, -0.761145, -0.201874, -1.15431, -1.83111, -0.705267,
+              -0.143026, -0.129819, -0.799425
+            ],
+            "dims": [64],
+            "type": "float32"
+          },
+          {
+            "data": [0.241661],
+            "dims": [1],
+            "type": "float32"
+          },
+          {
+            "data": [0],
+            "dims": [1],
+            "type": "float32"
+          },
+          {
+            "data": [0],
+            "dims": [1],
+            "type": "float32"
+          },
+          {
+            "data": [1],
+            "dims": [1],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [
+              0.489082, -0.225997, 0.118068, -0.124197, -0.307105, -0.031878, -0.436439, -0.0915989, 0.16138, -0.277489,
+              0.302606, 0.007258, 0.598301, 0.375807, 0.0931215, 0.354382, -0.264267, -0.619395, 0.235864, -0.355328,
+              0.216252, 0.140402, -0.271669, -0.0819684, 0.321747, 0.10123, -0.0728365, -0.313746, -0.202768, 0.0411454,
+              0.279085, -0.0618009, -0.142543, -0.1006, -0.230217, -0.0871152, 0.0612094, 0.105652, 0.0773867,
+              0.0506533, -0.0675486, -0.132074, 0.064109, -0.259501, -0.400863, 0.271191, 0.228758, 0.198777, 0.156327,
+              -0.0707191, -0.178816, -0.167941, 0.323581, -0.0157677, 0.390985, 0.361745, -0.183938, -0.0487849,
+              -0.27895, -0.442507, -0.170435, -0.0345637, -0.031372, -0.193189
+            ],
+            "dims": [64],
+            "type": "float32"
+          }
+        ]
+      },
+      {
+        "name": "T[2,4,4,4,3]",
+        "inputs": [
+          {
+            "data": [
+              2.02384, 0.168795, -0.523222, -0.935186, 0.740422, 1.17608, 0.488569, -0.377683, -0.53195, -0.513934,
+              0.432598, 0.914993, -1.27082, -2.07414, 2.69308, -0.131913, -2.85251, -0.517211, -1.806, 0.273531,
+              0.472273, -0.37904, 0.0532606, -0.464725, 0.667796, 1.31052, -0.929768, -1.14826, -0.769382, -0.631145,
+              1.2522, 0.9976, 0.919709, 0.0300339, 0.850536, -0.27391, 2.4758, -1.53812, 1.76689, 1.55511, -0.00496016,
+              0.894897, 0.385341, 0.931242, 0.235798, 1.46645, 0.0517056, 1.2544, -1.09355, -0.497829, 0.858985,
+              -2.56309, 0.275869, -0.139707, 0.976015, 0.860001, 0.354544, -1.47036, 1.23747, 0.200878, 0.89486,
+              0.179686, 0.353255, 0.580989, 1.5914, 0.0722632, -1.12418, 0.740327, -1.56074, -0.339189, 0.798208,
+              1.03685, 1.3314, 2.12478, 1.73434, 0.418893, 1.74205, 0.193269, -0.301401, -0.322054, -0.864609, -1.2983,
+              -0.0112451, 0.842739, -0.839063, 0.204525, -0.372717, 0.170261, -0.431252, 0.584484, 1.15486, -1.3114,
+              0.16315, -0.255735, 0.186204, 1.60674, -0.589851, 0.780569, -0.0611289, -0.416289, -1.42994, -1.24544,
+              -0.952648, 1.63344, 1.33361, -0.360487, -0.00839034, -0.961942, 0.253287, -0.187035, -0.15732, 0.437195,
+              1.8406, -0.348637, 0.32023, 1.32053, 0.361842, 0.209606, -0.636963, 0.7386, -0.279519, 0.408944, 0.517256,
+              -0.546527, -1.50846, 1.20406, 0.265286, -1.2076, -2.07277, -1.07383, -0.129118, -1.01983, -1.65879,
+              -0.0441307, -1.9163, 1.1222, 1.47558, 0.239934, 0.946612, 1.07251, 0.177979, 0.822549, 1.05295, 0.464564,
+              0.64689, -0.420297, 0.988822, -0.292639, -1.13402, 0.284607, -0.73995, -0.524053, -1.56099, -0.694949,
+              3.20754, -0.429143, 1.33899, -0.588935, 0.111043, -0.0652476, -0.527549, -0.0853688, 1.61791, 0.591928,
+              -0.319176, 1.49692, -1.10529, -0.279777, -0.761145, 0.520412, 0.520971, -0.201874, 0.19404, -1.078,
+              -1.15431, -1.21229, -0.670242, -1.83111, -0.399594, 0.065652, -0.705267, -0.280935, 0.468538, -0.143026,
+              -0.363324, -0.825062, -0.129819, -0.00804771, 0.370068, -0.799425, 1.43102, 1.68751, -1.16928, -1.27073,
+              -1.73198, -0.411782, -0.263788, -1.1062, 1.61624, -1.7702, 0.951285, -0.973004, 0.203263, -0.713336,
+              2.64703, 1.34631, 1.61586, -0.220014, 1.11914, 1.96514, -1.43954, -2.04911, 0.002603, -0.018692,
+              -0.804137, 0.0953297, 1.34982, 0.466763, 0.949256, -0.95197, 2.18386, -1.76552, -1.72586, 1.4689,
+              0.372816, 1.32725, 0.898297, -0.781229, 0.280984, -0.648948, 1.50532, 0.00847463, 0.252202, 1.28462,
+              0.512869, 1.12501, 1.31116, 0.0378154, -0.204563, 0.731908, 0.13898, 0.124608, 1.54835, 0.35758, 0.377214,
+              0.371081, -0.084558, 0.894327, 0.409244, 1.04045, -0.249118, -0.106938, -1.79933, 0.709188, -1.79396,
+              1.3002, 0.999397, -1.61198, 0.390457, -1.4079, -0.80869, 1.22267, 0.193873, -1.10381, 0.959344, 0.657753,
+              1.1872, -0.964296, -0.709732, -0.832439, -0.0935597, 1.09897, 0.0755941, 0.288953, -0.145793, -1.09553,
+              -0.158046, 0.779199, 0.960059, 0.532672, 0.88378, 1.44252, -0.500988, -1.2676, -0.196482, 0.25187,
+              1.15709, -1.07364, -2.14384, 0.62295, 0.165547, -0.633315, -0.370894, 0.630078, 1.24612, -0.103268,
+              1.56569, -1.41525, -1.55949, -0.669592, 0.36494, -0.470747, 1.15974, -0.00714732, 0.100394, 0.0953399,
+              -0.608963, 0.422334, -0.202313, 0.508496, -0.0685312, 0.812631, 0.995365, -0.434488, -0.318567, 1.21159,
+              -0.568974, -0.16644, -0.169055, -0.256987, 0.887062, -0.968783, 2.01276, -0.0264821, 1.52779, -0.923322,
+              -0.740725, -0.082381, -0.613144, 0.0797577, 2.2049, 1.50676, -1.1037, 0.928655, 0.65756, 0.90236,
+              0.120245, 1.20524, 1.13427, 0.911429, 1.10395, 0.364186, -0.885258, -0.975241, -2.01043, -1.2072, 2.44035,
+              -0.415748, 0.770694, 1.08276, 0.116046, 2.36621, 0.330393, 0.369949, 1.08456, -0.508918, 0.317886,
+              -1.60069, -1.25545, 0.530332, 0.0345025, 0.189815, 1.48341, 0.359559, -0.156263, 0.74666, -0.785411,
+              -0.960866, -1.64142, 0.466532, 1.0859, 0.22569, -0.78543, -0.674478, 1.18015, 0.024879, 2.76743, 1.31827,
+              1.59337, 1.21399, -1.33904, 1.13718, 1.71666, -0.101125
+            ],
+            "dims": [2, 4, 4, 4, 3],
+            "type": "float32"
+          },
+          {
+            "data": [0.241661, 0.960798, 0.474727],
+            "dims": [3],
+            "type": "float32"
+          },
+          {
+            "data": [0, 0, 0],
+            "dims": [3],
+            "type": "float32"
+          },
+          {
+            "data": [0, 0, 0],
+            "dims": [3],
+            "type": "float32"
+          },
+          {
+            "data": [1, 1, 1],
+            "dims": [3],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [
+              0.489082, 0.162177, -0.248386, -0.225997, 0.711393, 0.558316, 0.118068, -0.362876, -0.25253, -0.124197,
+              0.415637, 0.43437, -0.307105, -1.99282, 1.27847, -0.031878, -2.74067, -0.245533, -0.436439, 0.262807,
+              0.2242, -0.0915989, 0.0511725, -0.220617, 0.16138, 1.25914, -0.441384, -0.277489, -0.739217, -0.29962,
+              0.302606, 0.958488, 0.436609, 0.007258, 0.817189, -0.130032, 0.598301, -1.47782, 0.838785, 0.375807,
+              -0.00476569, 0.424829, 0.0931215, 0.894731, 0.111939, 0.354382, 0.0496784, 0.595496, -0.264267, -0.478311,
+              0.407781, -0.619395, 0.265053, -0.0663221, 0.235864, 0.826283, 0.168311, -0.355328, 1.18895, 0.0953618,
+              0.216252, 0.172641, 0.167699, 0.140402, 1.52901, 0.0343051, -0.271669, 0.711301, -0.74092, -0.0819684,
+              0.766913, 0.492219, 0.321747, 2.04147, 0.823334, 0.10123, 1.67375, 0.0917494, -0.0728365, -0.309427,
+              -0.410451, -0.313746, -0.0108042, 0.400069, -0.202768, 0.196507, -0.176938, 0.0411454, -0.414344,
+              0.277469, 0.279085, -1.25999, 0.0774512, -0.0618009, 0.178903, 0.762761, -0.142543, 0.749965, -0.0290194,
+              -0.1006, -1.37387, -0.59124, -0.230217, 1.5694, 0.6331, -0.0871152, -0.00806138, -0.456657, 0.0612094,
+              -0.179702, -0.0746837, 0.105652, 1.76844, -0.165507, 0.0773867, 1.26875, 0.171775, 0.0506533, -0.61199,
+              0.350631, -0.0675486, 0.392911, 0.245554, -0.132074, -1.44932, 0.571595, 0.064109, -1.16025, -0.983996,
+              -0.259501, -0.124055, -0.484139, -0.400863, -0.0424004, -0.909715, 0.271191, 1.41773, 0.113902, 0.228758,
+              1.03046, 0.0844908, 0.198777, 1.01167, 0.22054, 0.156327, -0.403818, 0.469418, -0.0707191, -1.08956,
+              0.13511, -0.178816, -0.503507, -0.741041, -0.167941, 3.08178, -0.203725, 0.323581, -0.565845, 0.0527148,
+              -0.0157677, -0.506866, -0.0405267, 0.390985, 0.56872, -0.151521, 0.361745, -1.06196, -0.132817, -0.183938,
+              0.500008, 0.247318, -0.0487849, 0.186433, -0.511752, -0.27895, -1.16476, -0.31818, -0.442507, -0.383928,
+              0.0311666, -0.170435, -0.269921, 0.222426, -0.0345637, -0.349079, -0.391677, -0.031372, -0.00773219,
+              0.17568, -0.193189, 1.37492, 0.801104, -0.282569, -1.22091, -0.822216, -0.0995112, -0.253445, -0.525139,
+              0.39058, -1.7008, 0.451599, -0.235136, 0.195294, -0.338638, 0.639682, 1.29353, 0.767087, -0.0531687,
+              1.07526, 0.932899, -0.347878, -1.96877, 0.00123571, -0.0045171, -0.772609, 0.0452554, 0.326198, 0.448463,
+              0.450635, -0.230053, 2.09824, -0.838136, -0.41707, 1.4113, 0.176985, 0.320744, 0.863078, -0.370868,
+              0.0679025, -0.623505, 0.714614, 0.00204798, 0.242314, 0.60984, 0.12394, 1.0809, 0.622438, 0.00913847,
+              -0.196543, 0.347455, 0.0335859, 0.119722, 0.73504, 0.0864127, 0.362425, 0.176161, -0.0204343, 0.859263,
+              0.194278, 0.251436, -0.239351, -0.0507662, -0.434827, 0.681383, -0.851639, 0.314206, 0.960214, -0.765246,
+              0.0943579, -1.3527, -0.383905, 0.295471, 0.186272, -0.524005, 0.231835, 0.631964, 0.563593, -0.233032,
+              -0.681905, -0.395179, -0.0226096, 1.05588, 0.0358864, 0.0698283, -0.140077, -0.520076, -0.0381934,
+              0.748649, 0.455763, 0.128725, 0.84913, 0.684801, -0.121069, -1.2179, -0.093275, 0.060867, 1.11172,
+              -0.509682, -0.51808, 0.598526, 0.0785892, -0.153047, -0.356353, 0.299113, 0.301137, -0.099219, 0.743272,
+              -0.342009, -1.49835, -0.317872, 0.0881915, -0.452291, 0.550556, -0.00172722, 0.0964582, 0.0452602,
+              -0.147162, 0.405776, -0.0960432, 0.122883, -0.0658444, 0.385776, 0.24054, -0.417454, -0.151232, 0.292792,
+              -0.546667, -0.079013, -0.0408538, -0.246911, 0.42111, -0.234116, 1.93385, -0.0125717, 0.369206, -0.887121,
+              -0.35164, -0.0199082, -0.589104, 0.0378629, 0.532835, 1.44769, -0.523955, 0.224419, 0.631779, 0.428372,
+              0.0290583, 1.15798, 0.538468, 0.220256, 1.06067, 0.172888, -0.213931, -0.937005, -0.954402, -0.291733,
+              2.34467, -0.197366, 0.186246, 1.04031, 0.0550898, 0.571817, 0.31744, 0.175624, 0.262095, -0.488965,
+              0.150908, -0.386822, -1.20623, 0.251761, 0.00833788, 0.182373, 0.704209, 0.086891, -0.150136, 0.354458,
+              -0.189802, -0.923194, -0.779221, 0.112742, 1.04332, 0.107141, -0.189807, -0.648034, 0.560244, 0.00601226,
+              2.65893, 0.625814, 0.385054, 1.1664, -0.635675, 0.274811, 1.64935, -0.0480064
+            ],
+            "dims": [2, 4, 4, 4, 3],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "name": "BatchNormalization non-spatial mode",
+    "operator": "BatchNormalization",
+    "opset": { "domain": "", "version": 7 },
+    "attributes": [{ "name": "spatial", "data": 0, "type": "int" }],
+    "cases": [
+      {
+        "name": "T[3,1,2]",
+        "inputs": [
+          {
+            "data": [0.2134, 0.32434, 0.5644, 0.3234, 0.4545, 0.3445],
+            "dims": [3, 1, 2],
+            "type": "float32"
+          },
+          {
+            "data": [0.5, 0.6],
+            "dims": [1, 2],
+            "type": "float32"
+          },
+          {
+            "data": [0.2, 0.1],
+            "dims": [1, 2],
+            "type": "float32"
+          },
+          {
+            "data": [0.034, 0.342],
+            "dims": [1, 2],
+            "type": "float32"
+          },
+          {
+            "data": [1, 1],
+            "dims": [1, 2],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [0.2897, 0.089404, 0.4652, 0.08884, 0.41025, 0.1015],
+            "dims": [3, 1, 2],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "name": "BatchNormalization non-spatial mode - NHWC",
+    "operator": "BatchNormalization",
+    "opset": { "domain": "com.ms.internal.nhwc", "version": 7 },
+    "attributes": [{ "name": "spatial", "data": 0, "type": "int" }],
+    "cases": [
+      {
+        "name": "T[3,2,1]",
+        "inputs": [
+          {
+            "data": [0.2134, 0.32434, 0.5644, 0.3234, 0.4545, 0.3445],
+            "dims": [3, 2, 1],
+            "type": "float32"
+          },
+          {
+            "data": [0.5, 0.6],
+            "dims": [1, 2],
+            "type": "float32"
+          },
+          {
+            "data": [0.2, 0.1],
+            "dims": [1, 2],
+            "type": "float32"
+          },
+          {
+            "data": [0.034, 0.342],
+            "dims": [1, 2],
+            "type": "float32"
+          },
+          {
+            "data": [1, 1],
+            "dims": [1, 2],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [0.2897, 0.089404, 0.4652, 0.08884, 0.41025, 0.1015],
+            "dims": [3, 2, 1],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  }
+]
diff --git a/js/web/test/data/ops/bias-add.jsonc b/js/web/test/data/ops/bias-add.jsonc
new file mode 100644
index 0000000000000..e89c5dd81cc23
--- /dev/null
+++ b/js/web/test/data/ops/bias-add.jsonc
@@ -0,0 +1,874 @@
+[
+  {
+    "name": "BiasAdd",
+    "operator": "BiasAdd",
+    "attributes": [],
+    "opset": { "domain": "com.microsoft", "version": 1 },
+    "cases": [
+      {
+        "name": "bias add [2,2,320]x[320]x[2,2,320]",
+        "inputs": [
+          {
+            "data": [
+              -0.43078827160569055, -1.3343044914862014, -1.3875186857333706, 0.7550491056943578, -0.015677493769832296,
+              1.2761569600658005, -0.033221806310058, 1.3590872185896297, 1.706516873231478, 1.429932535224122,
+              -0.39598259309643424, -0.6735725816667406, 0.5551536414690581, 1.9642482005713608, 0.1481069760277398,
+              -0.6706041659325672, 1.7626582023316804, 1.8587314247747715, 0.6761988056588422, -1.2865903673587722,
+              0.17451384248240753, -1.468579522325724, -1.4426371855482047, -1.101469412150644, 0.32065561169114787,
+              -1.7280217475756476, 0.8472414982170147, 1.089925472742105, -1.9188568763699578, -1.6897769809368715,
+              -0.22443847491362767, -1.592027916773227, -1.403545711986184, -0.992398507333391, 1.501364958055385,
+              1.1969883087000177, -1.742362862568375, -1.3122082038099396, -1.602285316113413, 1.7122406231186638,
+              -1.7331729420495074, -1.1660018974894326, 1.8466385944978407, 0.8059189037532883, 0.42819875266849383,
+              1.2622036788694384, 1.09253289978553, 1.7169775828443141, 0.8965924110901806, 1.731515754612638,
+              -0.710260858323644, 0.6949611010518701, 1.9392089898271783, -0.06855145845621369, 1.831602787077924,
+              1.919258334566825, -0.1750228495679611, -1.385684174878624, 0.1864456393673155, -0.7627497248975219,
+              0.5460459705476177, 1.0151263124631544, 1.0476468224745439, -1.4662684764200389, 0.8737230293064417,
+              -1.5018396956497346, 1.7171541562949724, 0.33089656971471637, -0.4497250989135697, 0.03155539207838842,
+              0.051811401515028166, -0.6417182241860351, 0.2892848388089577, 1.7313934166665597, 0.07283972370973935,
+              -1.1648958342544793, 0.31235891981372976, 0.8667946196377363, -1.8955234844794324, -0.7935013829177411,
+              1.7461436471789726, -0.7863083195781497, -0.24062975690975286, 0.5319240675275907, -1.093540712132092,
+              -0.08133035383567844, 0.24368896952864638, -0.7817379372747375, 0.09213174790435374, 0.7008130043325052,
+              1.2549161788773802, -1.3617356636970648, 0.9203525826936687, -0.17473559280037954, 0.8615190853991157,
+              1.7227407352985065, -1.6408918805405301, -1.2530895034442668, -1.561750342643168, 0.9122283638044753,
+              1.52061857140668, -1.757751307093299, 1.9174275778315568, 0.8751522548565935, -0.40373986100628567,
+              -0.9081360776169474, -1.0200280593711373, 1.8684056043061243, -0.8656213251095854, 1.2016289009297738,
+              -0.6420074354987753, 0.8277984540094971, -0.7794691695697935, -0.3682767999575489, -0.8431108361591697,
+              -1.8337997630639924, -1.497840701527875, 0.3582228355620778, -1.7648364421707035, 1.2295209603744386,
+              -1.4630737828552771, -1.0901242069115051, 0.16944674169381635, -1.1367614122844483, 1.1108472254071025,
+              -1.823264607399035, -1.9393711989501217, 1.5655717488598917, -1.2791502509294475, 0.5264599738022175,
+              1.2793776364048464, 0.6643881105536087, -1.8021867427977876, 1.2507197673418382, 1.5096918274472868,
+              -0.35787327298378635, -0.8142338946526149, 0.05554038558981045, -0.8292295885917778, -1.9802571383548457,
+              0.15711486642700123, 1.0606485766817002, -0.75648306593627, 0.7821766109776371, 1.5576015081398582,
+              1.274065256431233, -0.11137614589209388, 1.5523940512159609, -0.9225168834642474, -1.52372850136818,
+              -0.09285110141468689, 0.951570914266842, 1.4257064421442056, -1.1151817450836203, -1.0405979771695408,
+              -0.3512294660068509, 0.5544676624698717, -1.1641268738929513, -1.1024667332826468, -0.8470834386045532,
+              1.0200780368213875, -0.4111438148101385, 1.5060564363688043, -0.9264136525377458, -1.1144625610145829,
+              -0.8955596783917263, 1.1298063812792858, 1.1669470152018224, -0.9624856700584665, -0.5105634488042536,
+              0.3350150739180062, 1.8252865056498973, 1.8318837072890988, 1.247181799708187, 0.953253999223505,
+              0.33583064317486144, 0.8060935063640473, 0.025351359439778953, -1.9020713785704064, -1.4395100399524523,
+              -1.4434709436802136, 1.1887525549807645, -0.7404500263025655, 0.43567229698745447, 0.697905733863994,
+              1.4955234099092705, 1.8798334809531436, 0.28521714680739496, -1.8247616864327485, -0.8899636892784697,
+              -1.9902986052685518, -0.8612351129594646, 1.4473654540376284, -0.027164144754272534, -1.3409554794285556,
+              1.8216393608213144, 1.6287547173387606, 0.34546830263617245, -1.8554217975954366, -0.1599936883574884,
+              -1.1517327823070804, -1.1043403244478194, 0.9959357690900248, 0.47246889924079394, 1.3040046448975406,
+              1.3219564442035914, 1.5522794215260332, -1.3132538270303629, -0.6568357397541558, -0.3166961264888011,
+              1.155856117058299, -0.2856093347506521, 0.37282953339474023, 1.3624325643069906, -0.5961360461760625,
+              0.15430022597664106, 1.459554220213934, -0.4578169766295437, -1.5520231063597194, -0.9692019226485984,
+              -0.7868623417517249, -0.47648838559948903, 0.521518493577898, -0.43202750995667394, -0.2800695092149823,
+              -0.062073964072573595, -1.280928976710845, 1.653227202229826, -1.3266797422187144, -1.8026162116413413,
+              -1.6342686696555697, -1.8535208355884771, -0.6740602051435252, -1.1112082500330684, -0.2784013709506814,
+              -0.38477554625820254, -1.8030977138882127, 1.0792754568917307, 1.7171188079371378, 1.0025920437036717,
+              0.4909327973461517, 1.7583392556519017, -0.9513053055982352, 0.9836640607205354, 0.676230592061307,
+              -1.532719596602865, 1.7094356339043584, 1.275239052465298, -0.7097409099122736, -0.07447816392320394,
+              0.6054981041837371, -1.368606109962121, 0.5491138352978151, 0.9607716616110995, -0.8472145355857039,
+              -1.5962071764683028, -1.3945278279448647, 1.6804737734010837, -0.2799307641611124, -1.5199944467651827,
+              -0.6284155111805374, -1.387427601467464, -1.3689101475786654, 0.5479648937942034, -0.35058122336125397,
+              -0.01913895405776156, 1.9421209900010803, 1.521339125582216, 0.44317460300853284, 1.4673321860144632,
+              1.0844686121268872, 1.5454854860881193, -0.10676223292802778, 1.304830685737219, 1.5144316149285784,
+              -0.9206202207515908, 1.800878209478248, -1.4022748170800003, 1.8475728691446207, -0.6049511260966813,
+              1.3794138268485217, -0.5504308384418284, -1.308589620881369, -1.5124914949500843, -0.9843890898875198,
+              -0.47517812559034134, 0.019190610059508728, 1.3102818494016093, 0.02046481460734384, 0.8966955765481393,
+              -1.9544981815442322, 1.4818794231229235, 1.701164813487166, -0.1853726987447688, -0.9284129805631327,
+              -0.6665475855012435, 1.7174739766102922, -0.6866660477705144, 1.3741442823600236, 1.3144332615465366,
+              -1.6821345841035624, 1.4369774381581397, -0.40113703543299106, 1.2155358501458204, 0.3746014766056245,
+              -1.2797835093659478, -1.2733146708696594, 1.7592472733050606, 0.7411515442284751, -0.8417009900350942,
+              0.43950532643238294, -0.4849697519720575, 0.4561330711561924, -0.9993716519294349, 1.2993450426953688,
+              0.6077681589510648, -1.1884025219126162, 0.7423783440021925, 0.2164111466551839, -1.8404401979886122,
+              -1.6417250174486764, 0.33169566624656355, 1.4670345153781224, 0.05770292022609169, -0.7574653293187339,
+              -1.8332930679907182, 0.30545703812352265, 1.2645746651782588, 0.06851662509789058, -1.5007737660843672,
+              -0.7895770838504896, -0.16973917251686643, -1.1309776011957018, -0.5245832860094533, 1.2865060486729964,
+              -0.6877713804065921, 0.8603087489217494, -1.6760835939805823, -0.3421307621860663, -0.08163355960848584,
+              0.8960283575910557, -0.07414831058651483, 0.06552900030788233, -0.28343554832982676, 0.8580233436387052,
+              0.9756624200728901, -0.31454210863680565, -1.3961591961928228, 1.4357660237613095, 0.995011454280724,
+              1.7963312095595976, 0.44298210926821735, -0.4481873251636479, 0.04661154364950004, -1.0205093044441567,
+              -1.416501713829339, -1.5609202237237856, -1.9678529588325686, 0.054492766820211536, 1.9234783545287293,
+              -1.730337423937696, 1.0002270533854967, 0.912626942921996, 1.6082280213507438, -0.0007848079092749316,
+              -0.16260961343095381, -0.4951675344800499, -1.358161813033476, 0.0952066467959174, 1.1451260564923738,
+              0.5658192251004577, 1.638639222136547, 1.4561159943128157, 1.394720518360975, -1.6647338550020887,
+              -0.35227744791908844, -1.4997218325299349, 1.7269852905741923, -1.6265531305359326, 0.08177780426753678,
+              -0.2548418092940219, -0.5491200865930868, -1.3448390466679427, -1.4669459626844805, 1.6556888857778747,
+              0.9797534441152536, 0.011435434178378223, -1.0444316805293496, -0.21951923449225497, 1.2775877064028922,
+              1.3912532203430734, -0.41034439679528845, 1.6871573166908078, -0.2949753341249375, -0.19366027881779058,
+              -0.8063505536355873, -1.9683493413513462, 1.1343233715358245, 0.5863358659604163, -1.7161271586916902,
+              -1.1511237584065572, -1.026334237004506, 0.051935455424261256, -0.47999388673650234, 1.3653965992268642,
+              -1.6246601676055645, 1.0720439180368135, 1.7513891096415195, -1.9557074487732296, -0.6497504028268937,
+              1.5657013366841293, 1.480374539039186, 0.6254160749725743, -0.1386074990036681, 1.4879074098885399,
+              0.8608723264201785, 1.3707693807951804, -1.9599278528285744, -0.08178821222314703, -0.8801306486657889,
+              0.2672127121047323, -1.0905213718834403, 1.2845923771997692, 0.7483674434994203, 1.692846569262815,
+              -1.4081461907911006, 0.4847990697153808, -1.384710972149036, -0.48477323237460457, 0.5075573472017414,
+              -0.469975647682892, -1.7330175330396296, 0.4900846176343876, -0.5504384887286493, -1.3105683633488505,
+              1.7832520063588246, -0.008133111014310579, -0.4117632340667363, -1.364428903076842, 1.1502921666750634,
+              -1.297368143184646, 1.2657228492344146, -0.8339630526727912, 1.6036841813714835, 1.7388392748246462,
+              1.0764044765977916, 0.8635247292187316, 0.8915437658945047, 0.048593859303998954, 0.6397243841879581,
+              -0.32613694518247716, -0.5352551370184315, 1.0102009591194312, 1.2995349433229801, 0.873915978492783,
+              0.28688858188963984, 1.0299796543068158, -0.21558135465200134, -0.8175153872775942, -0.2228988257546698,
+              1.3651108393401516, 1.7419897496979102, -0.4287431897946634, -1.7210342396761868, -1.1328295428945232,
+              0.45940815613566865, 1.225728222018235, -1.8492110470394127, 0.11914217556997819, -0.5298005043189082,
+              0.2548881953681379, 1.4159379393782983, 1.4882705620355736, -1.778746106600524, -0.1322461839301079,
+              0.959787258509837, -1.2087764883807948, -0.656679223823172, -0.8052024037155077, 0.28857972466710535,
+              -1.9964605007110707, -1.618829946697912, 0.0003216720189040956, 1.3814498186817072, 1.267459171727996,
+              1.7808899740756745, -0.026284995100580133, 1.8415787116931153, 1.588128771971907, -1.379239932131604,
+              1.4954639033867787, -0.5365530743272542, -1.689396586297355, -1.5263365207258657, -0.15914036263121556,
+              -0.2236484993504826, -0.233339567785662, -0.6479528728470942, 1.7496861376407091, 1.9658328381866337,
+              0.7465900143344948, 0.9538445157519684, -1.233501367636971, 1.7832767928842772, -0.05854857072903297,
+              -1.4741131369232807, -0.8878273465278168, 0.7857197910561684, -1.3138933804749309, -1.6218392819603826,
+              -1.5976871654455351, 0.7862199318904839, 0.7930452731942381, -0.3851655998300325, 1.454630867585033,
+              0.07699835053088844, 1.9635265550996897, -0.9018052833921555, -1.3260562107679466, 1.591219679914813,
+              -1.9325066606486736, 1.6231422543543284, 0.06461353520395186, 1.938690006387282, 1.18237528878611,
+              0.08913168691813134, -0.24910533784113476, -0.24270271174439095, -0.058892972285330636,
+              0.3017252069250542, -1.6443343279597213, -0.35086444759952506, 1.2568748876859326, 1.2564694463685155,
+              0.5001323821158774, 1.6753738435417835, 0.12536895878538168, 0.11445707906508318, -1.4674734226308148,
+              -1.664480357904826, 1.2455086575046002, 0.016219310045000768, 0.6915540154848818, 0.8863957336976913,
+              -1.902221644800517, 1.1943261373616156, 1.5343838485344943, 0.26951920872184143, 1.1650565743686334,
+              0.8796643687219943, -0.9914368645961655, -0.4140197685853506, -0.31070218781242964, 0.18576121794650557,
+              -1.8127899542033798, 0.7364855692315055, 0.61436653795095, -1.340047295554938, -0.9787870418028497,
+              1.467065674277963, -1.3989058830195544, 0.8760353392917564, 1.103715585163978, 1.5993888842634876,
+              -1.4487285544801765, 1.0735445052923094, -1.7221746626643117, 1.3406229033114787, -0.31808486639617684,
+              0.18778714988957468, 0.3360116223452154, 0.9957693335536861, -0.7082173962327145, 0.13563127391102814,
+              -0.6103565638388746, -0.27366984218234336, 1.670182285266585, 0.7450288088790513, -0.7376055316855803,
+              -1.9155349843040552, -0.5647372929979877, 0.38985718472222164, -1.0859956865362559, -1.2774336101522819,
+              1.6716179824020179, -1.8248740137755952, -0.2462417745015948, -1.6330831414998679, 1.2633365236447727,
+              1.8405595764836615, -1.5041331151923663, 0.7795364178340263, 0.30228912276856246, -1.4199604845773672,
+              -1.9675070264789785, -1.5847567364669262, 0.8417404542568088, 1.411592224813667, -0.7798761695294161,
+              0.3836369752883826, -1.8656778968593093, -0.5928302815127546, -1.9170044216421092, -0.05015509660959072,
+              -0.2949046704017606, 0.7706683291518104, -1.0318399043402815, 1.673370077717097, -0.014510791777835763,
+              -1.7993886007705928, -1.6310599365379153, 0.47517801288072725, 1.7067820519317198, 0.2398133204231856,
+              0.8136128605181492, 1.7661699247824538, -1.7400434804598, 1.6440515677939507, 1.0630270197569356,
+              1.604276559525923, -0.6932776253265054, -0.5527120052345769, -0.15050627035753017, 1.9862178971553677,
+              -0.015247839923115514, -1.973561045412188, -0.7527150187762626, -1.8069828707308844, -1.0877483004326844,
+              1.5632037263398146, -0.7907173278699853, 0.020757273269920162, -0.9162761989634776, -0.871619440202827,
+              0.9274209555443278, 0.2788106849606935, -1.139267389509799, 1.8554359980704245, -1.2900715232363025,
+              1.5740910757851019, 1.2022687452616108, -1.3263358341556861, 1.705785771419051, -0.2966767936771886,
+              1.0184489507711243, 0.9331775150786781, -0.09496306724418613, 1.6169644094277178, -0.43071270921050253,
+              0.00617695499471882, 1.9046927055754255, -0.3800897035987143, -1.2677566100047573, -1.6177276055783159,
+              0.41451247814580316, 1.9136674265154827, -1.4827469552090689, -1.7754603228705612, -0.6490018494468908,
+              1.0832837177200183, 1.5236866225466352, -1.3663768537968632, 0.3345867923992154, -1.3264457751116838,
+              1.1302163417527789, 0.7987367218594539, -0.16727587952795364, -0.7323977510251147, -1.3812007830618693,
+              0.7210535269663234, 1.1937695850586403, 1.9659603230065574, -0.12903581925605057, 0.22599793243849042,
+              -1.055270780364089, 1.0726029698666473, -1.8921222720165147, 1.232307457573829, -1.6501289040499376,
+              -1.2203689042981933, -1.5782693671316723, 0.84090024006949, 1.1443588250664956, -0.8862909159261179,
+              -0.34912733758720993, 0.15172846490974035, 0.6801601864117464, -1.3948321257969702, 1.9080269171533608,
+              -1.851455291444804, -0.8525139187927957, -0.9629354426957466, -0.3970802170728076, 0.2714456125847784,
+              1.9355349765888947, 1.643295056500194, 0.461049347109487, -0.819228054522112, -0.7773196244370615,
+              0.27305821451894285, -0.5007383808686932, -0.5307070901422906, -1.6087255013287924, 1.708746273758031,
+              -0.18903643771546896, 0.4237658727537639, 0.7912222637914885, -0.06713232018529425, -0.9303076017910259,
+              -0.9715788400465986, 1.9239773722802864, -0.13605413264730704, 1.63369388301102, -1.4098364226383078,
+              -1.8177000155999794, -1.0786108587058756, 1.1467184875003493, 1.6942868827942474, 0.3743937965110735,
+              -0.5205992959118086, 1.9008470494268277, -0.33613881942363744, 1.408798355509151, 1.4374267676314432,
+              -1.6266431395044751, -1.2795792184706105, -1.6942133113310183, 0.44536668255477885, -0.5438457389607523,
+              -0.10839952389615792, -0.741753360770752, 1.9252204052044393, -1.298683868110981, -0.6674482773778925,
+              1.7250903096066814, -0.31851968039415723, 0.09917784413898989, -1.2300134366181839, -1.6661221342983064,
+              -1.7392500850180967, 1.1202143474944348, -0.008191271742616912, -1.9527622539466778, -1.112511983718873,
+              0.8023412387190376, -0.8667541685114779, 1.5324025634620666, -0.885176692529595, 0.28103075310068526,
+              0.5555445946177473, 0.0746344154810279, 1.8279995093264354, 1.589298526844452, -1.1728977455798502,
+              -1.2266193029402643, -0.08324504041088154, -1.5997622864209005, -1.9929554997781622, 1.649795503298476,
+              -1.825621659313522, 1.2037940913995477, -0.1478578137836779, -1.791209833936434, -1.5875938503209248,
+              -1.54554126750838, 0.22767426425123372, -0.9730859655005233, 1.01498892879457, -0.14002274071109522,
+              1.5196250773427282, -0.262915623574389, -0.8395076912698629, -1.6509617804598316, 0.4853743370864656,
+              -1.2426649457879062, -0.34746037671673236, -0.2668537063224701, -1.5727266735586252, -1.117657993368196,
+              0.38564526206938243, 1.8759791120426446, -0.9537042932874478, 1.8291799127445367, -0.005069359588546263,
+              0.9881621403906173, -1.0839433778149212, -0.4270854569893858, 1.6796287101836995, 1.4763471217127018,
+              -1.0142364314111942, -1.9083290608311998, 1.8813585092075078, -1.7461290703297596, -0.21674842672674544,
+              0.5502685329995662, -0.4411322123830974, -1.8416200674737109, -1.0844672555196375, -1.4812521607836615,
+              1.5083908944091222, -1.038918979055473, 0.7142655565242073, -1.1043893519874795, -0.9397073551320325,
+              1.335714667963627, -0.39552262419583606, 0.941189318776745, -1.973265716530558, 0.29314288363790375,
+              1.1149136203907473, -1.3280789878295156, 0.4344131789940944, -1.4976865930360805, -0.18683967016091696,
+              1.6841429470215354, 0.9990587834251263, -0.9896683309850935, 1.7344823188063838, 1.2809894053746431,
+              0.11253057753370044, 0.6202153889005659, -1.1393527545232924, -0.13267950871413792, 1.9929188473410813,
+              -0.5751171946115745, -1.5738159418363038, -0.722979035597497, -0.3845867260905491, 0.6726496775295967,
+              1.5922485171567802, -1.2068498206133187, 0.412853384927236, -1.1136351543496117, 0.4046306514337976,
+              1.2348205714395197, -0.6837477630512465, 0.4396481646202046, -1.689933367495156, 1.2059642129302333,
+              -0.9663178383992985, 1.1606541969900555, 1.6008140707565817, -0.49901900408756017, -0.3295636539441862,
+              -1.6961254597784983, -1.2594474125668986, -1.8290342261999246, -1.3195471501043112, -1.3360729012617636,
+              0.4819319828437685, -0.6044650413524977, 1.5401325916637765, 1.3108212503564856, 0.6641610189431937,
+              -1.167107917049922, 1.3717452437078013, 1.8234968598766077, -0.059000791459610014, 0.5078759939367101,
+              1.7012186950957178, -0.8153543329038886, -0.8116555600682265, 1.1042603281155614, -0.5230370384584662,
+              1.5907666644047485, 1.3585126059484214, -1.1604013321546773, 0.2832653250904853, 0.6146831527317715,
+              1.710136815942171, -0.4339250659200289, 0.5404568535663827, 0.9731252061576328, 1.667624932562064,
+              -0.8395294570873553, 0.9655900545408684, -0.8459497721445768, -0.3303605936459242, -1.0228351996527785,
+              1.64618479653826, -0.17369144560780647, -0.762588585662475, -1.420812072676508, 0.04977508086731497,
+              1.1411840603073538, 1.2658579855151144, -0.7695492083057207, -1.5802557368206935, 1.8063629418173504,
+              -1.3314629726856042, 0.926332179655863, -0.29403591774091886, -1.4368324532624133, 1.794172160269225,
+              -1.45190145484798, -0.6970484207532923, -0.7393596578156938, -1.3777134665955009, 1.698548607127142,
+              -1.5277458031231408, 0.8121596602029895, -0.4871450419173451, -0.8973481250486168, 0.999939728035903,
+              -1.622149524743996, -1.8097564354752569, -1.9184903777986992, 1.6699213455758075, -1.8966494026411178,
+              -1.5537605568683883, 1.0114197035541261, -1.7582102654477056, -1.881605728865896, 1.9424710063889181,
+              -1.387404030261334, 1.3858022761207254, 0.6698691050652563, -1.7882425787208467, 0.05463416162273482,
+              1.151588572666963, 0.9448022669750591, 1.2079032520591921, 1.3271868932748552, 0.7459734492068639,
+              1.4448931947680101, 1.741554075288172, -0.49778064496799157, -0.05357363118867564, 0.17060355537450267,
+              1.8858952797885928, 1.1828850394664219, 1.4398264570692447, -1.0269803221490008, 0.12795611159499387,
+              1.4338295119095292, 1.7680805378740985, 1.8889001943775678, -0.023646131688371597, -0.055364618226636075,
+              1.3107732868213482, -1.3726761197824935, -0.48421640176631975, 1.8520978683112554, 0.14900451528494418,
+              0.7553309487914097, 0.995210897988966, -0.2653148753757497, -1.0047335940870337, -0.15140716923573905,
+              1.0342357378533045, 0.9590192011054128, 1.3276618340182669, 1.7076552004070518, 1.3639368693762144,
+              1.0626034699464553, 1.0985888634186862, 1.0871213821052033, 0.3518298069849042, 0.6905794127769393,
+              -0.5700252629850588, -0.10814050178161683, 1.3965639143955952, 0.8292785089561896, 0.25327348015151596,
+              -1.334944218927732, 1.6209990328336517, -1.26244979705121, 1.771347153639546, -1.4436659851102362,
+              -0.5033590550326617, 1.484309499478445, -0.3804774758165417, -0.6854434358446646, 0.11814627802495625,
+              -1.8940220672649586, 1.9948327521339193, -0.31419774418955715, -1.708608376028823, -1.8717143806284637,
+              1.4405268554284918, 1.8275766986420505, 1.613878636732296, -1.6842307903910925, 0.40437384436799473,
+              -1.7974817731693786, 1.1222020737272933, 0.8616912290576968, 0.7450260507858868, -1.262819663341098,
+              -1.3890825964448705, -0.24733521021608595, -1.9566763230316564, 1.240599031645397, 1.0005078570110628,
+              -1.2429059760645886, -0.8343788480444481, 1.8180187022655172, -1.2406155795725864, 1.206905860440954,
+              1.0657535671563094, 1.0688089382594308, -0.1623146723818225, -0.9394369384107097, 0.1644095126054408,
+              -1.5836754669396766, -0.45886757503464093, 0.02309988717518241, -0.11609000844996142, -1.8627934732063016,
+              1.9415986762986073, -1.1741977923279308, 1.2850766678166368, -1.6650362245910895, 0.8711235235579853,
+              1.703790813181027, -0.20031635110543, 0.7709122587840396, 0.4676763407673441, 0.7333591027965438,
+              0.010661459873729129, -0.6248209657856156, 1.3499622620431584, -1.010555890938674, -1.9094767924575482,
+              -0.4118954261411796, -1.9764407569645153, -0.11597198863706204, -1.2058671493925148, -0.9480128119239577,
+              -0.5293044156931037, -1.0802020289569132, -0.17428061346628443, 1.79479586490669, 0.4507608914027035,
+              -1.5890677193516893, -0.4180241158854212, 1.1247910122551152, -0.10769135533882057, 0.2413054436244062,
+              1.3070197809453399, -0.7234463442247714, -0.9044481440724681, 0.808060474881219, -1.9681916392611978,
+              0.6794353030118225, -1.8140413117066592, -0.21172484209703857, -0.3970612901969721, -0.22610168646442563,
+              1.8446444889972504, -0.14161684848047962, -1.0612317380319158, 1.6805704263182024, -1.5680342684533937,
+              1.6367583739045974, 1.9603810572547848, -1.9461850695662726, -1.669279137293902, -0.4582040515383534,
+              -0.4903387593204007, 1.700009791169296, -0.1006260106974528, 0.46356012096704813, -1.148937310426322,
+              1.1291686959534264, -0.43326682883595513, -0.7294554760312604, -1.3404464141642505, 1.4428709283346048,
+              -0.31527585219642784, 0.5232849548965959, 0.840594052127317, 0.8605144687711537, -1.6471991161039679,
+              -0.32119284017874694, -1.199582906673192, 0.3080262169024959, 0.14151294810583348, -1.7354321287231,
+              0.4873457081904098, 0.30837874931057474, 1.3003825539901728, 1.9636934942685267, -1.017158000827136,
+              -1.7596484196087827, -0.6817110071876389, 1.8933995404689652, -0.27989446483984093, -1.067564992804905,
+              0.48291319348514605, -1.6740080386493696, 0.2807422201361458, -1.9110968101706796, 1.6831495761856532,
+              0.11512823651793447, -0.01736208420134222, 0.4405414565435697, -1.8085718479527353, -0.9564467319845411,
+              -1.58340676850203, -0.647955866711154, 1.6543925512752926, -0.7724666857844449, 0.5113949921548908,
+              -0.39267895944443065, 0.40761631065773063, 1.7690968773142668, -0.21764226027462374, -0.5169606846714005,
+              1.4412302687210286, -1.8896763215831802, 0.07979887381656514, -0.11436797170178448, 1.0634323241712869,
+              0.26858767414261475, 1.582753510128553, -0.010528543666477042, -0.3613643892495162, 1.391514209117238,
+              -1.4700872595733046, -1.5362821122874086, -1.818586245442571, 1.9678697208900475, -0.9362374796105595,
+              -1.4709960962767532, -1.182374325374778, 1.6385607669867177, -0.46775448579892487, 1.9576437315276696,
+              0.915531228777362, -1.0860235734385926, 1.0655104012081509, -1.7770877181643954, 0.1443659128293806,
+              -0.23955298993629803, 0.41725367891443366, -0.8558589757408512, 1.024674449305122, -0.8538581096220099,
+              -0.17121172366938264, -1.0495343198650096, -0.8461809157835463, 1.956660524400533, -0.10451516941234473,
+              -0.9119888509755709, 0.9341633453090434, 0.5765821236303488, -1.8017153374435075, -1.6959921212218267,
+              0.3565838506194048, 1.986423720658717, -1.1810787364750697, -1.3554314442606277, 1.2292087344595828,
+              1.9389467760629646, -0.06060251846881748, 0.6471281822482204, 1.028562584237319, -0.9889764039700069,
+              -0.30300382154607064, 1.8809742113734886, -1.7911374091327446, 0.4093234223382991, -0.692170253260544,
+              -0.5766217362114325, -1.234711065294488, 1.4845455677723791, 1.1142993730640143, 1.0351547495051978,
+              -0.2304804542756207, 0.7896680860540854, 0.7368394967498872, 0.6117647784314304, 0.9649509001647774,
+              -1.4794529756304886, -1.5330264541276408, -1.1347331780500776, -1.957296773370273, 1.0497217009949296,
+              -1.876577007676679, -1.8707142834400772, 1.0355676671507679, -1.3024864669068572, -0.8110172097035955,
+              -0.7956308468122133, 1.5651626086889294, -0.5950947090287055, -0.15363512205018193, -1.7408026469236138,
+              -1.8514840915078024, -0.40821529034130855, 1.2085979600022174, -0.953165571253348, -0.03303673441403454,
+              -1.6012777563202603, -1.1080907034689993, 0.4974859939502432, -1.2774517368694216, -0.20163785863448247,
+              0.8261780324851822, -1.5127015843190126, 0.14100033563999403, -1.052646319316592, 1.6782929279009817,
+              -1.5464154280508744, 1.7486715792103427, 1.7780537663957405, 1.7209562957702067, -1.5054888719925499,
+              1.5292916602749358, 1.179965119787405, 1.5170349093126205, 0.7433092687415837, 0.9522185073327041,
+              -1.2380413345480523, 1.9354870169011447, 0.625636243747028, -0.09000816987169546, -0.5335972012344188,
+              0.9674628266238745, -0.03967494279717343, -1.8591428816092792, 0.2309446236016406, -0.9041639531030761,
+              -1.9026103934874206, 1.5541589920102101, -0.45696090245009824, 1.8466298423672463, -0.36055327204706167,
+              -1.2458073226198056, -1.4410345639464017, 0.4731557626110279, 1.2307498218360111, -0.30913913858563724,
+              1.8557259220973865, 1.1724797822135766, -1.516241961681641, 1.147047572924638, 0.009295148811337306,
+              0.8291735590935811, 0.9825251314963639, -1.6702374836146134, -1.3070146895265724, -0.35977729833032246,
+              0.10882986028094521, -0.1545812635060546, 0.5966946312401102, -0.12463585998219351, 0.764026848253426,
+              -0.0653501987613172, 0.5337159207310522, -1.3783865008607394, 0.3440914524635428, -1.6128660868012537,
+              1.0505520366072156, 1.4508195056160966, -0.3811605116562866, -1.0184337989154448, 0.3472432185953034,
+              0.7934690008453043, -1.1871814411996455, 1.7160465328415073, -0.8932034391682153, 0.22684342695521842,
+              0.006601468173127678, -0.8703158970162406, 0.7854001417512366, -1.6096149032006064, 0.5734105371918883,
+              1.8323642183966413, 0.8494195484926621, 1.52159530384528, -0.43666400911265324, 0.6949749758230679,
+              0.09014001060463439, 1.1181673671725294, 0.23797216532766896, 0.9091467606318648, -0.051242214293259813,
+              -0.3492957666583303
+            ],
+            "dims": [2, 2, 320],
+            "type": "float32"
+          },
+          {
+            "data": [
+              0.0018182522274203805, 0.06756509596322857, 1.889723294866065, -0.10289095754140298, 1.5711519216894745,
+              0.027529292075774592, 0.9603256438495507, -1.497309631471758, 1.9251601219617065, 0.8851491878732389,
+              0.05078780805071137, 0.40903741455911735, -1.6644840015459215, -1.348225759557871, 1.615832737926227,
+              1.042719864089511, -1.9289326046242312, 1.3417535199012995, -1.710655801290117, -1.130165128147044,
+              -0.3755000776719024, 0.6155781582426902, -0.5883485771887473, -1.7159986811406176, -0.16333854572017525,
+              -0.06079239446971929, 1.6926064002585495, -1.8776332892248098, 1.4601970742576578, -1.3202352800423185,
+              -0.12899708506012164, -0.6003093613879029, -0.1726349092091164, 1.2394146364350664, -1.769629141089184,
+              1.4197330981295524, -0.9504267735259635, -1.240675610662361, 1.4018548317486923, 0.5332018345413356,
+              -0.16073415033536875, 0.15303724703170385, -1.8037963193841238, -1.311714810716846, -0.5740602095553404,
+              -1.0372165240096223, -1.370949121899355, 0.29661966702940035, 0.07816374250571023, -0.41396787300651905,
+              -0.3694698645575212, 0.6759765867037197, 0.2952400780995772, -0.06275069272676337, -0.9130274419561628,
+              -1.8944701092982958, 0.33465806810173593, -0.404939193749847, 1.4043718178232805, -0.5590711165263631,
+              1.2184926422968934, -0.7087036307842709, -1.6055109382118182, 1.968257767003597, 0.529695028652811,
+              -1.9967381817454308, 1.595078125176956, 0.9871155490120058, 1.6566751957870993, -1.609626148231829,
+              -1.1397801527001823, 0.02238544560446254, 1.4873497063814245, -0.4755743745599572, -1.6926423664304844,
+              -1.4161828320433028, 0.346372427157398, -0.18203459832580027, 1.4635583159542183, 1.5944148599650028,
+              1.3186726267824955, 1.5675687012032968, -1.9754809408365706, 1.44557963549327, 0.9397875688795354,
+              1.4424046221061442, -1.4458135310352649, 0.9975520389856136, -1.9027511578082317, 0.9144382540308484,
+              1.052124261689804, -1.4732678674195494, 0.29024955712503164, -1.2231252144665383, 0.34787712508784985,
+              0.3556934319800238, -1.7419738471239645, 0.8630538908485903, 0.5386452782458866, 1.7600516786463105,
+              1.8905437777505014, -1.5744952794523028, -0.7530004157782235, 1.5678919268380707, 0.034533101389558674,
+              0.7325333516090975, 0.9775064333478163, -1.6408433791748216, -0.7414398323785214, 1.6725433719876586,
+              1.0072882099919305, 1.4341931058179327, 0.7139948421146176, 0.40545031341822124, -0.11478362063979386,
+              -0.9345270890825441, 1.4281286745225614, -1.39970554180245, -1.1485396410325386, 1.1495990036520798,
+              -1.2916127094423402, 1.4211660589871826, -1.0749317173140405, 1.4370776307284663, 0.7880288709576773,
+              0.46732661965227873, 1.56798877542517, -0.9531716195760707, -1.3739051298849194, 0.9766290318098436,
+              -1.307661662111708, 1.8574559170417002, 1.35797073743995, -1.6940130226054606, 0.28491131826133387,
+              -0.36419491260352554, -1.3047662545854015, -0.9266815176320033, -1.2358711507932467, 0.9127887752631247,
+              -1.6466848327495578, 1.1607458121027339, 0.46297657760513733, -1.5495718508374514, 0.3292413137438217,
+              0.7675934897387728, 1.4008121445440214, 1.4898570624591958, 1.6030744917802648, 1.2925872420362232,
+              -0.8421561750911684, -0.3407292616133608, 0.38924919209979336, 1.6793513775487527, -1.0373013949726966,
+              -1.5337353736283532, 1.5143316995909872, -1.6320472160478126, -1.3996482770156646, 0.6337864872715988,
+              0.5406528347636357, 1.2967809902878562, -1.5182702863754916, -0.7399098341126589, 0.31978027899894723,
+              -0.4320909370805026, -0.057815767103424065, -0.42656779912656795, -0.7191461156604344, 1.732444695508872,
+              -0.16793165663622744, -1.029044319841585, -0.7379183254565955, 0.6335667491493258, -0.7407757474651113,
+              0.737814588729532, -0.41713542698826167, -0.862992043249343, -1.8537968903371889, -0.480058608858549,
+              0.04028745468513595, -0.11696118988455151, 1.2159286584219329, -1.4551651039165874, 1.8518920420484895,
+              0.8324620148383071, -1.9503205997190287, 1.3118092522348013, -0.41781057862944326, 0.47025354333711356,
+              -0.08599400306878557, -1.398138636933056, 1.799030968066016, -0.9016154689967486, -0.44642885397970034,
+              -1.6161407274817075, 0.6108393015698415, -0.9652371448534662, 1.472448459030451, -0.12097411552763226,
+              -1.7427779621544364, 0.6772588555443013, 1.239525535102806, 1.4978793781566582, 0.9794171716198061,
+              0.37480400234555056, 1.7099069435864092, 0.5339030487857208, -1.7368267186422761, 0.3401246801395308,
+              -1.495349576003802, 1.1154539341471592, -0.5739747352480027, 1.7719108709631328, 0.7087464471791378,
+              -1.407094251765498, -0.5711994993106657, 1.6197007171162792, -1.665245693725593, -1.4093290138388097,
+              0.8150971020478908, 1.1565262598728276, 0.007036682898540647, -1.067724969488646, 1.1760370444772006,
+              -0.4660822995530971, 0.18663889657333232, 0.8600384570962394, 0.07639203983671461, 1.7055162765205303,
+              -1.7134292208088802, -1.3413558800873675, -1.338677372528159, 1.4246968540400653, -1.1823984287999973,
+              1.4751654585472211, 0.5262834049380078, 1.5117343050060867, -1.409416488118043, -0.39544742603356386,
+              -0.6577586706586658, -0.5919201797053688, 0.6013534842506445, -1.1862135968111707, -1.229417973714626,
+              -1.803412419156234, -0.7655790098575235, 0.9128632433156794, 0.9036623476529559, -1.9831271121679324,
+              0.8324308647368319, -1.759507307385337, -1.7725931616787687, -1.7039303423725647, -1.4439967872268928,
+              -1.7432455401143834, -0.02216033991501387, 1.2819676717165, 0.16659457648361364, -1.167642388668959,
+              -1.7143084152722228, -0.7289345444538382, -0.02925241516287791, 1.9566358667801342, -1.2857581699546135,
+              1.0915031830445114, 0.05084200795390714, 1.083568818422366, -1.1315486700234478, -0.8881346175534794,
+              -0.63619987674788, 0.3799832019858531, 0.2477670922101094, -0.6132210208290614, -1.8451948781462812,
+              -0.22847217268867048, 0.0025467735349682386, 0.1315834394384794, 0.1776926575489597, -0.8691295174311664,
+              1.6637912565242994, 0.448901769947029, 1.233816013145204, 1.7971799993597228, 1.8719614934816882,
+              1.655937636621596, -0.27359273976124054, 0.08461142131684696, -0.2757947346097396, -0.9521228519499276,
+              1.766034536643284, 0.8831916052200137, 0.9813027219865562, -0.322591101625501, -0.20675723380495992,
+              1.0866641329284041, -0.6397672290782843, -1.9715973970816654, -0.36395252045986304, -1.4160336028155198,
+              -0.7487477697571272, 1.4091533113140509, 1.2152244001439598, 1.0139512253023701, -0.5841820989850488,
+              0.36171343432432046, 1.1810326691265303, -0.044977125366693294, -1.5719763377131732, 1.636814383280785,
+              -0.8254090686593019, -0.2739258751225844, -1.5838736296117837, 0.057544692367468286, -1.6536791042504957,
+              0.8676152862870037, -0.6012988236535559, 1.0789190140651197, -0.21655562768188386, 1.5865699400089461
+            ],
+            "dims": [320],
+            "type": "float32"
+          },
+          {
+            "data": [
+              1.535672932186043, -0.3469466691127403, 0.7594896463626952, -0.05450122463129414, 1.4639377922956394,
+              -0.6333990278356465, -0.8242789470237648, 0.5117653543833605, 1.6078505759993273, -1.410275750604895,
+              -1.6792951377646883, -1.783057576321041, 1.1956662347204423, -1.3979831191193002, 1.7644067312268517,
+              0.4240762243207543, 1.986096182518743, 0.36545941859180964, -0.8774236745093011, -0.8647372274160503,
+              0.8720148666725347, -1.022106286236455, 0.5503111675120635, 1.0204841436521281, -0.9254965061314904,
+              -1.3449432022823808, 0.006824458535456657, 0.07690008991648423, -0.8426817383905396, 0.9996621329373534,
+              -0.23056243949407484, -1.0440039859718286, 1.9168909615768683, 1.5600000104620682, -1.9890822883775865,
+              -0.3604004168107382, -0.028511959235538065, 0.8476098198214288, 0.22053970034789216, -0.42929632097288817,
+              0.6599479925924321, 0.6647860485919495, 0.10175396167639938, 0.22650892002231515, 0.4701540897019987,
+              0.624214514356682, -0.6652257805050041, 0.8518349008799753, -0.9562813618340789, -1.657496508881473,
+              -0.3312572279583206, -1.5494034812904562, 0.18877981986543801, -1.2351800795813066, -0.07918559380797063,
+              -0.09391536586009241, -0.2856357420391582, 1.9393958604954182, -0.7529216437305211, 1.525084648903749,
+              -0.07883509109638975, 1.376637107607113, 0.5783362536287875, 0.961847664027677, -1.6855455725917468,
+              -0.5830772019897683, 0.4271291901307981, -1.8373857521152086, -1.7965394924729141, 1.7115697467771378,
+              0.2565457539488545, -1.3360260284983019, 0.4353676471582455, 1.7248708601658969, -0.9750598890096729,
+              0.05312641822767361, 1.5787554531472985, 0.9667162219022503, -1.364971428290251, -0.2814850946411962,
+              0.9013643208289279, -1.4725055888862961, 0.6001425944665559, 1.2723681158746203, 1.7714493392964075,
+              1.4044899825398272, -1.5787548082153382, -1.1589036159974757, -0.4012478414167475, -0.06868641055197777,
+              -0.7534521481998526, -0.8700101449208493, -1.1662115104665567, 1.7611310737805477, -0.2501517942331226,
+              0.12866215308587936, -1.4699875001512854, 1.0395395370450604, -0.5782390952646876, 0.63115653417037,
+              0.10138581116634082, -0.07007439881121424, -0.4276277546360472, 0.418589841403306, 0.9267207479900215,
+              -1.0293664343515356, -0.1495871781602336, 1.4452889339030666, -1.7189823464809564, 1.8323799237149645,
+              1.8914008693919682, 0.3829486757403364, -0.8735369861149813, 1.602486711188683, -0.39959917784662924,
+              -0.8673792916868024, -1.2627215362178648, 1.8597348040684398, -0.8688156300975107, 0.15713415561611388,
+              -0.13148226217512082, 1.883732805180382, 0.11420203807616502, -1.6552288945493094, 1.0335466032430753,
+              1.9806710089769703, 1.988269693866676, 0.29427412741632075, 1.4966799360753749, 0.6937827119996989,
+              -1.298620046493725, -1.752952308784005, 0.46645438478103873, -0.898908219432915, 0.7139098459371658,
+              -0.16215199540773462, 0.07954281050960432, 0.795652990025399, 1.5967568490712063, -1.2445652996859247,
+              1.9127555713254205, -0.4996844935898572, -1.1156627480592256, -1.2948343944985163, 0.18276720875230268,
+              -0.748683470251498, -1.5079466014120557, -0.2494558107532141, -0.9231537960141623, -1.4121241243829443,
+              1.2059834829573104, 1.905725511300579, -0.39337905860681044, 1.8425190053973166, -1.6566221588247219,
+              -0.242919176072947, 1.2425502129492436, -1.4417507121400348, 0.015600407032383856, -0.2242098694907284,
+              0.18796276556529357, 0.08107732342066765, 0.7149451467441841, -0.20769007081368773, 0.4421202004832834,
+              1.5233025839787455, -0.6642431462292846, 1.5564028464468986, -0.1586815058735116, 0.6099306071219655,
+              0.8180887224937807, 1.9911546339103818, -0.005984685083011421, 0.6777759409892354, 1.7289968623869099,
+              0.5264262640237458, -0.511038272902959, -1.7235775305068346, -1.138944875679032, -0.9623892814614488,
+              -0.6380738572168294, 1.8832106250881075, 0.028541651530706424, 0.7394956760616829, -1.5455450050824036,
+              0.598697699776686, 1.44227094769795, -1.842926293477114, -1.9786511228960357, -1.774125089606943,
+              0.4273755931309067, -1.1833540770674968, -0.29742688579612864, -1.7932368057978882, -1.3999703979662605,
+              -0.5494229951060436, -0.7692231154827809, -1.0112160791506497, 1.633993910846237, 1.3945699010195831,
+              -0.8649776103569309, 1.921348771224042, 0.832322610715301, 1.3754060709990767, 0.3497018723561567,
+              -0.7191957838389857, -1.9794221990125722, -1.84384806203993, 1.2324522851211803, 1.7698494016317143,
+              0.006624102198243165, 1.5911519918365267, -0.762455861009844, 1.4479210196035108, 0.7818151145500849,
+              -1.9876926272814606, 1.8202062970885162, -1.6446357331454369, 0.8692666690506741, -0.7358532212979823,
+              -0.8444806659707744, -1.6015224446062994, 0.7479281419258141, -1.6523782603794155, 0.6710725185977369,
+              -1.1710932073100304, 1.4784513737588512, -1.212966513263102, -1.3741809040280142, 0.25437428444308896,
+              0.8440351752665407, -1.1722116121570672, 1.104161389783421, -1.645735790976162, -0.4286533806712738,
+              -0.37044520217626875, -1.574330285391767, 1.4899314272896893, -0.8495642882336822, -1.714377156019676,
+              -0.4893435327563349, -0.7616337581393848, -0.5339391487933929, -0.3003289730553087, 1.3489307896261735,
+              -0.14680109166432054, 0.1026969670558735, 0.32430953678969043, 0.1795871726769951, 0.9696062238740311,
+              -1.5296687271207166, -0.2770372362376037, -1.0409472934130868, -0.17306368093190905, -1.1960408781183967,
+              0.984219061951209, -0.4077661181651919, 1.7423047847942446, 0.5608878908901787, 1.4329493489434109,
+              1.8986413512869937, 0.19154199669760352, -1.3315756935180012, 1.8870822754754517, 0.5674631415439482,
+              1.1017148980678542, 0.7256621357674105, -1.8682573426264009, -1.2687446906641284, 0.5430939279068951,
+              1.8279931413962558, 0.15890686973919443, 1.394841983124743, -0.8330211159668224, -1.2412716683059033,
+              -1.1755274256803165, 0.3146422214936937, 0.5127310756940888, -0.6223681329826247, -1.3728009148038876,
+              -0.5073994704733549, -1.1727465329222264, -0.07518002339175833, -1.6218851358655701, -1.3314808424730247,
+              0.2696107099425271, -1.353815758928219, 1.6070801592460056, -0.7018653814032136, -1.594649470877921,
+              1.8662880614030657, 0.009632792539534307, 0.885433106263176, 0.7081454198732997, 0.12480572241808119,
+              -1.9002637028711113, -0.8823815470565757, -0.12794198437065507, 0.3682196882451354, -1.1962414622570767,
+              -1.101920787984521, 0.1703217046774217, 1.2755057257388405, 1.2757461273763866, -1.7253598839195732,
+              -0.3935586680170111, -1.6790297555951925, 1.1726640337873802, -0.7187759606615787, -1.5997974808572053,
+              1.9512036824878933, 0.8991982283799391, -1.516998597379371, -1.0918962406357053, 0.12845929863120276,
+              0.387447437135819, -1.6766371647631972, 0.4172435231617522, -0.8587881195399367, 0.8973805509978297,
+              0.5384910477202398, 0.22290981983700497, -1.9824980848037859, -0.19789410371539873, 1.0396641208977249,
+              1.9498654847750698, 1.752979186273122, -0.10251547854421528, -1.7031576116596918, -0.6422947693243835,
+              -0.5947775282776488, 0.25094777162345583, 0.5519773563378578, 1.1845669608153342, 0.07011886849473115,
+              -1.5689347607142432, -0.9068208446502926, 0.4518736648271817, -1.1908598340431444, 0.9123278060019366,
+              0.3808045721687314, -0.5161116183400685, 1.4633312728276353, -0.24955275031843804, -1.9270793627181808,
+              0.5510310380033525, 0.002103402836195478, -1.9722027133603266, 0.8207770388309132, -1.2709862666051333,
+              0.03660015849392373, -0.08721025552259398, 0.1480447971653538, 1.3975878551198289, -1.8688681862560603,
+              -0.2735983144132472, -0.29150197793885635, -1.349553505848272, -0.14289894302424067, -1.0632608448362548,
+              0.9197316019995538, 1.6766092374653363, 1.4333994578157911, 1.8497508886723608, -1.8365902161760914,
+              0.3329875047259945, 0.28711035354851955, 0.018743287980965917, -0.47550704561352664, 0.026002587809994537,
+              -0.9815518239812109, -0.30422655490353545, 1.1236748290508274, -1.996970334350796, -1.663190926732148,
+              -1.4930228184840004, 1.2293686779591093, -0.11228295031816327, -1.399262159949875, -1.2745774075202778,
+              1.0404471355251506, -0.9042932188930193, -0.483855773240883, -0.051899262666108115, -1.1517591694487734,
+              1.631117268451015, -0.4341760983538707, -1.5093199848977354, -1.524695207871412, -1.179033179719653,
+              1.203939869858461, 1.2278371883112191, -0.7764972190751465, 0.12469436067847539, 1.1254668267275294,
+              0.1253270059252225, 1.02529025377972, 0.37477534712132243, -0.816607896481754, 0.7652933238577306,
+              0.0816252203587613, -1.6877073529228523, 0.282188424454314, -0.48899417877023144, -1.10579595806544,
+              0.4180711569457314, 1.239608967084651, -0.8553327976952234, 0.7601553028351749, 1.017191993054694,
+              -1.561711107008871, 0.18166558203866234, 1.4575039351725838, -0.7919992885427041, -0.05528739747934974,
+              0.5393789182198327, 0.9208003955213648, 0.8037584630910892, 0.2508199691349171, 0.5025718274381168,
+              0.40223725437742086, 0.43401128486340124, -1.918673978558985, 0.38895512761013773, 0.9647875436316022,
+              0.356426573504554, 0.7676218046110401, 0.15946706730485438, 1.2727737024033576, -1.1428215846133938,
+              0.36778995418490545, -0.41392909578544224, -1.550642999283478, -1.7016418383565188, -0.3516276355010701,
+              -1.6424434547903983, 1.2296355686757101, -1.3262048004001983, 1.9866748350391505, 1.9039145370701833,
+              -0.4605978047947623, 0.37289955561548194, 1.2909351136100344, -1.4775326769813537, 1.8608708474080071,
+              0.6440656172393684, 1.4358923542702868, 1.6635530454398575, -1.7844300247360296, -1.470415868795488,
+              0.21864396672047892, 1.5488664195436606, -1.0864322992770177, -1.550780881959068, -0.8331945313037004,
+              -0.9367699280324953, -1.9013228249406309, 0.7807264098375688, 0.06677827961955263, -0.4865949947067687,
+              -1.9079733463147095, -0.8445233464370387, 0.4065074139836655, -0.3310839064029283, -0.2904445573034993,
+              -1.1753367420636245, 1.3721435340052208, 0.36660883645931097, 1.27723053302687, -0.9359216637576937,
+              -1.732231846976478, 1.0644600709999477, -1.6378422934868384, -0.8826400850725795, 1.950622879844948,
+              -1.9911319096225792, -0.6598073662934398, 1.8955996856482145, -1.4071132961709223, -0.8795225115767629,
+              -0.5029228970810946, -0.7734268477225967, -1.716542237524993, 0.04010671043366898, -0.7937284158037281,
+              1.030026939297609, 0.9801808342123648, 1.2953427689382302, 0.20610803631475605, -1.672761300291775,
+              -0.690673451769495, 1.6609033000524338, -1.897131087105456, 1.2029533984904228, -0.5681454803874688,
+              -1.3646956682920965, 0.22071074912276334, -1.4735886916157908, -0.9695144027680014, 1.626222864433485,
+              -1.8694559899308487, -1.8879003983306655, -1.0176033048635613, 1.7915586444709328, 1.8810192124623084,
+              0.5319984718680058, 0.0113238596202212, -0.09805090157632446, -0.5444299501215024, 1.1135935258682927,
+              1.17684427133796, -1.85426568001437, -1.7530946944132086, 0.3038089938756876, -1.5870230070820002,
+              1.9106333020042747, 1.3937407603560725, 0.8591788216145968, -1.612956779000272, -1.7151209190289016,
+              -0.13707423626294535, -1.4389728179178984, -0.05236093609874359, 0.9751452825232896, 1.744306648935904,
+              0.7254929535860901, -0.09824503868926815, -1.4925208247531838, -1.985227418605298, -1.405540454178178,
+              1.692915817031472, 1.2230668144021735, 0.04262811065188643, -1.1756894733009666, 1.0222275091190456,
+              0.4934708666464802, 0.08979456736565261, 0.10059671914562518, 0.7155249975927536, 0.04082949674837977,
+              -1.715826873724553, -0.979189481763262, -1.1065843508804214, 1.481429410565739, 0.5278383608268999,
+              1.4941027771635946, 1.4151786058577498, 0.07974076288029774, 0.3167509060420519, -1.269619345887964,
+              1.8667680276727765, 0.527367815431, -1.874110045435497, -1.9373013120064702, 1.5330729150450173,
+              -1.7833509822122444, -1.7182607692067773, 0.7561591559894678, -0.1056962696530368, 0.13014948563496898,
+              1.804101947913626, 0.7276195691909635, 0.021465712639121115, -0.5163553036182069, 1.1855106734783103,
+              -0.532372100695512, -0.5871635445412506, 0.161643292508721, 0.61018489160484, -0.9869821416193743,
+              -0.5318766940780302, 0.9532631042147388, 0.4597709134353236, 1.2142228742259942, -0.8224515402258339,
+              -0.879922983657166, -1.4710925151016916, -0.29851124917883975, -1.6631372706933156, -1.417993373545026,
+              0.6364481896978704, -0.6013255938328603, -0.046835161333119935, -1.7247175181005758, 0.9825982199711403,
+              -0.7264776248635592, -1.463988875635824, -0.5013956201257255, 1.1933878395314643, 1.3056455851087287,
+              -1.4398688148432273, 1.7038722585040453, 0.46568790654958114, 0.481485333420693, -1.2873930064513237,
+              -1.1475617778051763, -1.9673375617555031, 0.39874490557435127, 1.0960170584095357, -1.8987243885981488,
+              0.36983554057526735, 1.9718844590478293, 0.7894176749822801, 0.06983603687412288, -0.9000466156841869,
+              0.6428129286371904, 1.704798225993037, 0.5950045030048496, -1.1678955586471442, 1.237662010594594,
+              -1.6482921001228146, 0.7270614937877813, -1.0006186813130835, -0.5305400798817805, -1.5252716548293819,
+              0.18855276048488978, 1.5437352372976703, -0.9397215004565727, -0.4258934153954881, -1.0950445559616853,
+              1.1844079915434298, 0.024990774215178035, -0.16149461270780652, -1.4078837300903269, 0.09499589792836627,
+              0.516842370641422, 0.4800833347119191, -0.539291197739594, -1.5117979701954605, 1.354396143788092,
+              1.28278689745333, 1.8488206619245648, 1.3022599053953776, 1.6609548614809775, 0.3269713781203789,
+              -0.38485903666664, -1.464958277436181, -0.3992461504929734, 1.0699961189397085, -1.0135843210651023,
+              -1.458604697589653, 1.490121083969428, -0.2595359932483561, 0.20854389182544342, -1.7482190390121701,
+              1.3007127316326326, -1.9884730509986825, -1.4952841032131454, -1.3179011133758536, -0.388478076479009,
+              -1.1589100485370416, 0.8387145536985532, -1.3384696651494759, 1.4683529176022008, 1.303145953986827,
+              -1.3041819891109316, -0.03449749547681513, -0.6608734038387656, 1.1683787754166381, 0.5655509145236746,
+              0.37738607602963814, 1.2152762148898635, -0.29353655718583926, -1.0509092280694636, 0.7139081884804019,
+              1.5196106141395527, -0.530586207320952, -1.558831387258346, 0.01131046295330318, 1.9344117181061735,
+              -0.6850503993030497, 0.9331665418290909, 1.1688357095654807, -0.42466684124295995, -0.49121961262440816,
+              -0.11897540791552874, 0.5942162255141863, 1.7548838522451646, 0.4438013028171106, -0.28183936745813476,
+              -0.07495854303862437, 0.9303587326961971, -1.3198776631733748, -0.591718773961956, 1.1127108159764676,
+              1.3939520197540327, 0.6360105102962654, 1.3722503898910716, 0.1757960098808633, -1.8297470389548955,
+              -1.9205472381959057, 1.4666198830651629, -0.7830326162911714, -1.4564248566278515, -1.0967977812614587,
+              1.164770039819981, -0.5760771475874042, -0.7667709006388028, 1.371522788043694, 1.7326600398634024,
+              -1.8193902025531763, -1.6090197630011929, 0.09836987546776577, 1.0677415637460363, -1.5307232030478781,
+              -1.5599516580470505, -0.2609675276531007, 1.0598276568453162, 1.8794380814113483, 0.5316994667209949,
+              -0.7552146503779023, -0.4617287817040179, -0.3819745586646004, -0.42575961119349426, -1.9237942552613312,
+              -0.8825058198571423, -1.8728798417790404, 1.7802885739921077, 1.8333435291969842, -0.3098252256281784,
+              -0.029956863413143964, -1.0772837825116914, -0.08180463340649524, -0.6945910113459792, 1.1668128443816146,
+              -0.02738480437430635, 0.39293059281590104, -1.6704359314356383, 1.6869956995774205, -0.2294375604199823,
+              -0.32757809443951746, 1.9764189201357567, 1.5201484938081151, 1.087504317388186, -1.6272710803209698,
+              1.0397868469069298, -0.22176854941092294, -1.8820468396186323, -0.5303897107068192, 0.594170569473933,
+              1.6960001373937432, 1.9644545152850057, -1.7960342834770175, 1.7873883299813267, 1.3489957623885935,
+              -1.6820391707003042, -1.5713129762775537, -1.3637851932919034, 1.5936068708950781, -1.2089638711610604,
+              1.322028643928432, 0.8929678781012855, 1.8401053408016272, 1.5452683829326075, -0.9171427145163484,
+              -0.06745535875434427, 1.9379586035273615, 0.5855503730756357, 0.03549855059545948, 0.6527698319031092,
+              1.6754602207349976, 0.7728323704391817, -0.9665588877441182, 0.6173545510334506, 1.3120695172827377,
+              1.181821226786317, 0.1841309435168954, 0.32318631702986167, -0.790159398034489, -1.385019609574396,
+              -0.7118643666238835, -1.0439971536099275, 0.017584768122861583, 1.7536303032255187, -1.1965922071808155,
+              1.4548082915973595, 1.562828560283652, 0.0920828524560271, 1.892000960124009, 0.3648061373597171,
+              -1.9613669287159263, 0.841563763070833, -0.9118328355251464, 1.9226565574363006, 0.3988462224271192,
+              -1.970188432590363, -0.8264337415665439, -1.9090851263430704, 1.8428915650288547, 0.28596991752644385,
+              -1.6708643684685667, -1.0762549708362332, 1.9492472760488564, -0.17802109704659852, 1.9236671687550047,
+              1.9548632849049623, 1.9566450030001414, 1.3303550873049677, -0.5915124672929295, -0.0037832544010933944,
+              -1.6026781861800838, 1.7578833516354813, -0.2956678774270767, 1.4060455643195402, -0.7370157759032727,
+              1.8789198126787916, 1.1123902493105078, -0.8769185681462304, -1.2618214191177506, 1.8674610245111278,
+              0.5103075356648485, -0.020685118611023512, -1.407221324818173, -0.7491588381751608, -1.3743460812306214,
+              -1.7710712130536228, -0.19455369352318552, 1.3434990212660862, -0.5544338320325721, -1.324247058015053,
+              0.8874849369101687, 0.6838871095643375, -1.5617313105262172, 1.204432716341258, -1.4981479923604955,
+              -0.06499977622096687, -0.8060264147106553, -0.36092597365795775, -1.307326777195418, 1.6399424900785,
+              0.429157912433868, -0.9915570262096942, 1.5128426032058089, 1.6375586318255548, -0.1737010535017669,
+              -1.21285453753765, -1.8844155037723906, -0.2590630754224348, 1.7328565249414716, -1.260633142919116,
+              0.3637043664955444, -0.48087365705468965, -1.7001295586898113, -1.0775016378447075, 0.2620695698901221,
+              0.5015363913767086, -0.42080100290276246, 0.5338170065286052, -0.43568151602634764, -0.744286733837793,
+              1.57647267103789, -1.2491109283310529, 0.10032144655805375, -0.46919353377702855, 1.9415827315644636,
+              1.2111393515469855, 1.744725164783687, -0.6871612254817352, 1.406736078990102, -1.063724178982385,
+              -0.904699966390976, 1.5681407930006221, 0.79849818604837, -1.7759907970834616, -1.6325947440964974,
+              -1.0309733602826086, 0.29563414198237936, -1.7157737037653629, 0.2876568188935451, 0.21411659926835913,
+              -0.1601632043965786, -0.02605079418095091, 1.2041639219664182, -0.6351323647136597, 1.1149646585336592,
+              0.6657515663650084, -0.4672646227384094, -0.5117766415018226, -1.4643244157794149, 0.39081520672097003,
+              -1.502649477455031, -1.637368884151761, 0.34542161036123176, 0.060151105688381, -0.5045040651104555,
+              -1.761988723037204, -1.9197872473179176, -1.3665270161331975, 1.3928026939637972, 0.39218445250695577,
+              -0.8470063024385848, 0.009038121027233892, 0.46871439485211575, 1.459827780771806, 1.4853766551455694,
+              -1.2321752545416356, 0.3748806345040103, 0.20582729258619814, -1.4266279966077402, 1.2950255786963805,
+              1.7125611822808544, -1.407545517068188, 0.5169179018491512, 1.8595592751857541, 0.9487671455033482,
+              1.9467423989905699, -1.5919149626150748, -0.4630901723451881, -0.698284068975914, 0.6197574561950008,
+              -0.8199869405915381, -1.7196626702920055, 0.6036024034626806, -0.8348164600145518, -1.7650166093756132,
+              1.5829990521620996, -1.588645487863901, 0.7633248861408699, 0.5800948434762754, -1.7159447523887836,
+              -0.3836837699904496, -0.9746560067630572, 1.4480442893861705, -0.24403527878135645, -0.6397662241706819,
+              0.956203271386264, -1.4601856308265049, -1.5649468816584298, 1.731664582215319, 0.9679933953420496,
+              -1.9722379093946607, 0.24076423675934056, -0.19244242272389211, -1.3854799949067935, 1.3744990882455346,
+              -1.121046645776083, -0.4342567706309435, -1.7159646482293107, -0.9317859666979054, -1.698219647396134,
+              -0.8288368620433939, -1.3875410583085985, -0.16399331338641066, 1.4667160798353667, -0.020345764369364083,
+              -1.9585520591695529, 0.9886716666217517, -1.0701744437434098, -1.3248249591382057, -1.3272246201915312,
+              0.906046259715148, -0.9554587301398687, 0.16744253332193715, 0.40874734944503466, -0.7237514235199383,
+              -0.8028952942996463, 0.9478548199038599, 1.6268191787625108, -0.7376232063503751, -0.6874490141085632,
+              0.20469380737641973, -1.940886393624119, -0.9715176541080677, -0.11409081023343237, 1.8208884259904847,
+              -0.05753377002269655, -1.2533113228725696, 0.18235199190840046, 1.3670427559403047, 0.7183594427747524,
+              -1.9834311091439476, 0.09488256814231644, 0.07406140049599319, 1.0427950622016802, -0.7928805141629418,
+              1.7221214208634228, -0.06548459693275177, 1.6984102601031559, 1.8777510809050026, -1.735259524674964,
+              -1.1416240368033357, -1.7612022583614682, 1.721880360655705, 0.14372177475853665, 1.9269311955654835,
+              0.19978809107127216, 1.0299566806165856, -1.7617419918814026, -1.2737765895488096, 0.7789099525859564,
+              1.9816257384474012, 0.4482897887627919, -0.9051913536142644, -1.152506387584042, -1.8817136441487783,
+              1.1054935295772461, 1.577999662025542, 1.600449927735128, -1.4919075331081064, -1.9550057574515671,
+              0.1306184124670624, 1.4754764229533928, -0.808023880270273, -0.21695285993080393, -0.539628797891055,
+              -0.7836468765498132, -0.8815668388678288, 1.8917264703112755, 0.028934119940069003, 0.06879472114883711,
+              0.4407647131615784, -1.702696302284755, 1.7815067716148931, 1.7950168026349171, 1.1405438335719111,
+              -1.1434283018085534, -1.720238715793207, -0.7729623733152229, 0.17672006075090962, -0.14942755614865622,
+              -1.6229777838891115, 0.3793725781830055, -1.0113407389657345, -1.9280392460441265, 0.7422498462017861,
+              0.8331559663193939, 1.3063596659922263, 1.8113167679814106, -0.1401093760534291, -0.24674083884906395,
+              0.15509679692376732, 1.8667087827355466, 1.1906398286118094, -1.673307806924564, 0.41063702592861695,
+              0.2436862014560477, 0.24067383021132027, 0.22686603382511628, -1.7295093225806442, 0.6565075922933001,
+              -0.5514412373381097, -0.5236684516031653, 1.8733248509057603, 1.082970345098504, 0.3340204503283841,
+              0.5450315229688343, 1.0954041212853163, 1.565649272477267, -0.5469992182522905, 0.7193108029242588,
+              -0.9050070254533322, -0.5121370718971949, 0.962566205706815, -0.24631520617082092, 1.3340325816997325,
+              -0.8820024080231894, 0.22736077826137535, 0.2252389330707789, -1.947448723525529, -0.9518843625899782,
+              1.7502182429516546, 1.558646352665332, -0.838440251378624, 1.541757246903681, 0.44677553405529213,
+              0.9918545507928869, 1.060951650228274, -1.3653319701374311, 0.2635328688559797, -1.6894618652561055,
+              -1.9316398959917604, -1.6545844047461316, -0.8374565390669062, 0.5467667551875302, -1.1703334497283162,
+              0.79898936445238, -0.48742537394255603, 0.05126348262407365, -1.0630031367885744, -1.6755563384807575,
+              -1.7470496911251123, -1.4045037572548411, 1.697678496203098, 0.541058257415223, 1.9355948975325852,
+              -0.8470115353500489, -1.2030885197848056, 0.8919323754916997, 0.0702516207867685, 0.5155253592422371,
+              -0.3579514965668338, 1.7112737380442171, 1.9947965056065957, 1.2741397687110538, 0.09885151767767297,
+              -0.9770807797341039, -0.11682236263324342, 0.7272198637411007, -1.987824039940028, -1.1358258258310752,
+              -0.11090836034305251, -1.9915135816887366, 0.39056056844969866, 1.2932859858303178, 1.7109978939050503,
+              1.1846928384025448, -1.7330449982026206, -1.1525984164585106, 1.104166927781134, -0.28565377527521196,
+              -0.9685059019914002, 1.7093828969134002, 1.9709107005494806, 0.049031526597832276, 0.4472417265612556,
+              1.0921859039999235, 0.8763632205063123, 0.8161138478535914, -1.0720275802414108, 0.7266994153226873,
+              1.233185460886041, -1.127435043988318, 1.0918127239321773, -1.8540096367958645, 1.9681192361925266,
+              -1.176325917090126, -0.30265014266672097, -0.44524467812690727, -0.9978154618024702, -0.667478816738317,
+              0.15065079333379305, -1.0302715841959227, 0.829863553229278, 0.8134089689909292, -0.6474889076993566,
+              -1.079618527738825, -1.783292588379826, 0.748112963221554, -0.6286053844150628, 0.48331041409284303,
+              1.663305348437456, 0.18479680885937455, 0.6293791717008288, -0.6005275360880811, 1.5747695362530774,
+              1.5708476785905807, 0.5755861487097542, 1.2041008720516082, 1.6685888824542738, -0.10278064261508835,
+              0.9057150675313927, 0.6510335974298398, -0.10744692672758216, -1.7129461062136837, -1.4064873182457918,
+              0.4781316094642234, -0.37189635012197275, 0.16614793992804522, 0.03645962620683285, -1.6224894209420242,
+              -1.8138940006820983, 0.5069783696842336, 0.6849365239989318, 0.8037589654894051, 1.979213666270276,
+              1.6861127134381242, 1.2233661916798626, -0.3986509966310168, 1.274497735591801, 1.605857523477285,
+              -1.2118797206236485, -1.0066619307873124, -1.358968189183389, -1.9510798049888383, -0.9808314235618916,
+              1.742926920936518, -1.1022645984613817, 1.0929594394621382, 0.48488158650621127, -0.32877770055973077,
+              -0.47650834081572935, -0.5160849885006016, 1.3738126318494883, 0.8827072110361662, 0.48644110690758247,
+              1.0382179714335322, 1.6721919595070132, 1.341715329629717, 1.7295025892939409, -1.522344995293861,
+              -1.5965490751871654, -1.7983927509857223, -1.0759710407128011, -1.3793282201703079, -1.443902375079908,
+              -0.9426382639949571, 0.5602210832754357, -1.0965977429851064, -0.19857124750589872, -0.7431182359930233,
+              -1.2699260459939286, 1.4876549876992726, 1.6274319173214051, -0.3309529465344534, -1.9454352826883534,
+              0.12935585140981676, -1.0093456723551508, 1.7243377444859647, 0.10127369924344443, 0.697537788222963,
+              -1.521134755613331, -0.442714777461525, -1.6896188579102178, -1.9330985764980921, 1.9140786772267155,
+              -1.2925077312416482, -0.9509978830442094, -1.1889787216631778, 0.795835379830006, 1.4837581515063887,
+              -0.8597344665233413, -0.7448025823499504, 1.7455825639820093, 0.33723505300912304, -1.8208678041990423,
+              -0.12753920031860666, -1.757360720716986, 0.8256076807737855, -1.9972549760931448, 0.844750409785961,
+              -0.9594803067513551, 0.7862268813645183, 1.7393046013815212, -1.161126895447727, -1.6347790700422697,
+              -1.3348870119154936, -1.1621632421015011, 1.2696646718252413, 0.4845759791644788, 1.0668299384975475,
+              -0.6942334010657198, 1.4734240949292259, 0.4282074397978146, 1.6699946816827183, -0.6802086123370898,
+              1.9208442056043609, -1.8532082289660545, -0.1592261674427098, -1.2431462763761214, -0.7286614982674164,
+              -1.522868872353036, -0.3825873577199159, 1.431979005569458, 0.43719966684470446, 1.6478260330278633,
+              -0.06620691473965401, -0.36945868484144917, -0.3809516652838498, 1.6855172591399752, 0.31969027259376137,
+              0.09157179754578149, -1.3138870107882425, 1.4208147318276607, -0.03157398665509881, 0.03702456744844529,
+              1.4819698957492982, -1.6015809663105944, 1.8331399913105164, -0.6094891041007129, 0.9393020005799118,
+              0.6313754553821562, -0.3128111370670492, -1.324295564232262, 1.7609361120800635, 1.5935407847968914,
+              -1.280640014867119, 1.4668684416985176, 1.460389700948717, 1.0299991397017587, 1.2266139129378075
+            ],
+            "dims": [2, 2, 320],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [
+              1.1067028045654297, -1.6136860847473145, 1.261694312095642, 0.5976569056510925, 3.0194122791290283,
+              0.6702871322631836, 0.10282492637634277, 0.3735429048538208, 5.239527702331543, 0.9048061370849609,
+              -2.0244898796081543, -2.0475926399230957, 0.08633577823638916, -0.7819606065750122, 3.528346300125122,
+              0.796191930770874, 1.819821834564209, 3.565944194793701, -1.9118807315826416, -3.2814927101135254,
+              0.6710286140441895, -1.8751076459884644, -1.4806747436523438, -1.796984076499939, -0.7681794166564941,
+              -3.1337573528289795, 2.5466723442077637, -0.710807740688324, -1.3013415336608887, -2.010349988937378,
+              -0.5839980244636536, -3.2363412380218506, 0.34071028232574463, 1.8070162534713745, -2.2573466300964355,
+              2.2563209533691406, -2.721301555633545, -1.7052738666534424, 0.020109310746192932, 1.8161461353302002,
+              -1.2339590787887573, -0.3481786847114563, 0.14459623396396637, -0.2792869210243225, 0.3242926299571991,
+              0.8492016792297363, -0.9436420798301697, 2.8654322624206543, 0.018474817276000977, -0.33994853496551514,
+              -1.4109879732131958, -0.17846572399139404, 2.4232289791107178, -1.366482138633728, 0.8393897414207458,
+              -0.06912710517644882, -0.1260005384683609, 0.14877259731292725, 0.8378958106040955, 0.2032637596130371,
+              1.6857033967971802, 1.683059811592102, 0.020472168922424316, 1.4638370275497437, -0.2821274995803833,
+              -4.081655025482178, 3.739361524581909, -0.5193736553192139, -0.5895893573760986, 0.13349902629852295,
+              -0.8314229249954224, -1.955358862876892, 2.2120022773742676, 2.9806900024414062, -2.594862461090088,
+              -2.5279524326324463, 2.2374868392944336, 1.651476263999939, -1.7969365119934082, 0.5194283723831177,
+              3.9661808013916016, -0.6912452578544617, -1.615968108177185, 3.2498717308044434, 1.6176962852478027,
+              2.765564441680908, -2.780879497528076, -0.943089485168457, -2.211867332458496, 1.5465649366378784,
+              1.5535883903503418, -3.7050137519836426, 0.04439067840576172, 0.36327028274536133, 0.9592444896697998,
+              2.2070963382720947, -4.852853298187256, 0.6495039463043213, -1.601344108581543, 3.303436756134033,
+              3.5125482082366943, -3.4023211002349854, 0.7367992997169495, 2.8616340160369873, 0.557513952255249,
+              -1.2049691677093506, -0.19210883975028992, 1.6728510856628418, -3.3260436058044434, 4.706552028656006,
+              2.2566816806793213, 2.644940137863159, -0.9390113353729248, 1.6396602392196655, -1.3574936389923096,
+              -3.6357059478759766, -1.3324334621429443, 0.8182520866394043, -3.782191753387451, 2.5362539291381836,
+              -2.8861687183380127, 2.2147746086120605, -0.7912830114364624, -1.3549126386642456, 2.932422637939453,
+              0.6247330904006958, 1.6168872117996216, 0.9066742658615112, -1.156375527381897, 2.196871757507324,
+              -1.3269041776657104, 0.7688918113708496, 0.02223837375640869, -1.3422014713287354, 2.5085129737854004,
+              -0.8842201828956604, -2.039457321166992, -0.0754881501197815, -0.4683438539505005, -2.3120336532592773,
+              0.4231855869293213, 1.7217100858688354, -1.4091691970825195, -2.062229633331299, 2.0696098804473877,
+              1.2929754257202148, -0.21851062774658203, 2.792795181274414, -0.24259614944458008, -1.6432653665542603,
+              0.2709762454032898, 2.5165672302246094, 1.4215764999389648, 2.406688690185547, -3.7345216274261475,
+              -2.1278839111328125, 3.311349868774414, -4.237924575805664, -2.4865145683288574, -0.4375068247318268,
+              1.7486937046051025, 0.9667145013809204, 0.7027313113212585, -1.8740135431289673, -0.3525621294975281,
+              0.19565200805664062, 0.40774744749069214, 2.2967820167541504, -1.8403133153915405, 1.831811785697937,
+              0.9851721525192261, 2.7873969078063965, 1.0879806280136108, 2.5585243701934814, 1.9414751529693604,
+              1.6000714302062988, -0.12208014726638794, -2.56121826171875, -4.894813060760498, -2.881957769393921,
+              -2.041257381439209, 2.9550018310546875, 0.5040202736854553, -0.27999716997146606, 1.0042527914047241,
+              2.926683187484741, 1.3717838525772095, -0.24589979648590088, -4.2212233543396, -2.1938352584838867,
+              -1.6489169597625732, -3.442727565765381, 2.948969602584839, -2.7220163345336914, -3.187354803085327,
+              -0.34392428398132324, 1.470370888710022, -1.630984902381897, 1.2510205507278442, 1.1136020421981812,
+              -3.759488344192505, 1.4942673444747925, 3.067783832550049, 3.345754384994507, 2.6331236362457275,
+              0.9775646328926086, 1.2827643156051636, -2.623198986053467, -1.1612101793289185, 1.7932779788970947,
+              -0.332869291305542, 2.42099666595459, -0.9636011123657227, 4.5822649002075195, 0.8944255113601685,
+              -3.2404866218566895, 2.7085609436035156, -0.4827519655227661, -2.3480019569396973, -3.114384174346924,
+              -0.8162459135055542, -0.9214845895767212, 1.2764832973480225, -3.152130603790283, 1.567040205001831,
+              -1.699249505996704, 0.3841613531112671, 1.300299048423767, -2.6244685649871826, 0.1572742760181427,
+              -2.503662586212158, -4.367088317871094, -0.9085763692855835, -1.3322471380233765, -1.8894531726837158,
+              0.7199447751045227, -2.851144790649414, 4.080941200256348, -0.541861891746521, -1.1072325706481934,
+              -0.6561694145202637, 0.40478527545928955, -0.8838909864425659, -0.5028785467147827, 0.7957435250282288,
+              -3.4829330444335938, 1.046553611755371, 2.5124118328094482, 0.3735085725784302, -1.0879991054534912,
+              -0.09173977375030518, -3.4051504135131836, -2.2644267082214355, -0.9162223935127258, -3.4872522354125977,
+              -2.355233669281006, -1.8244541883468628, 4.704746246337891, 0.4475516676902771, -1.2546875476837158,
+              -0.44408249855041504, -1.924820065498352, -2.729738235473633, 4.391683101654053, -1.0688762664794922,
+              2.174078941345215, 2.718625068664551, 0.7366507053375244, -1.9571187496185303, 1.1222915649414062,
+              2.276261806488037, 2.0843756198883057, 1.5358469486236572, -0.14141148328781128, -1.5720349550247192,
+              -2.324619770050049, 2.1180672645568848, -0.757960319519043, 1.402897596359253, -2.846881628036499,
+              2.5358057022094727, -1.274275541305542, -0.14995357394218445, -1.3371965885162354, -0.4439084529876709,
+              1.4503703117370605, -1.6082179546356201, 3.0019733905792236, -0.9571952819824219, -1.6500767469406128,
+              1.6778243780136108, 2.374703884124756, 3.5679006576538086, 0.20018166303634644, -1.0103645324707031,
+              -1.480147123336792, 0.19532525539398193, -2.786205530166626, 1.3784115314483643, -1.2978419065475464,
+              -3.5328032970428467, 3.0164525508880615, 2.0895931720733643, 3.5052330493927, -1.9349405765533447,
+              -1.311628818511963, -1.7713117599487305, 2.886934280395508, -1.5496007204055786, -0.8046841025352478,
+              1.5652999877929688, 0.1403025984764099, -2.6447391510009766, -2.0337233543395996, -0.22587481141090393,
+              1.8628309965133667, -3.466338634490967, 2.2385408878326416, -0.858932614326477, 0.6435102820396423,
+              -1.1014156341552734, 0.6221705675125122, 1.3742595911026, -0.24308213591575623, 1.8533508777618408,
+              0.14410161972045898, 3.0187618732452393, -0.33525052666664124, 0.290519118309021, -1.2579193115234375,
+              -1.3335667848587036, 0.4902459979057312, -2.2434842586517334, -0.6882419586181641, 2.9724576473236084,
+              -1.2139863967895508, -1.9754445552825928, 0.11754357814788818, -3.2436463832855225, -0.29947084188461304,
+              0.9013328552246094, 0.025318264961242676, 0.9405116438865662, -2.2489869594573975, -1.2323944568634033,
+              1.4659011363983154, 1.380167841911316, -5.245995044708252, 3.716740131378174, -1.5962101221084595,
+              1.7039341926574707, -0.24453751742839813, -0.47277745604515076, 2.6836142539978027, -4.659006595611572,
+              -0.2703670263290405, -2.802849054336548, -4.558082103729248, 1.3134486675262451, 1.3934195041656494,
+              -0.9713399410247803, 2.829873561859131, 0.5422300696372986, 2.14626407623291, -2.411435127258301,
+              -0.8668385744094849, -1.579006314277649, -1.0427988767623901, -0.3021366596221924, 0.7571608424186707,
+              -0.7852025032043457, 2.0103890895843506, 2.875030994415283, -0.6650004386901855, -4.240952491760254,
+              -3.7397704124450684, 0.06430482864379883, 1.2097631692886353, -1.621443510055542, -1.7518706321716309,
+              2.0040979385375977, -2.1621170043945312, -3.4342057704925537, 0.4494125247001648, 1.0336246490478516,
+              0.6141325235366821, 1.1723374128341675, -1.566636085510254, -0.0875391960144043, -1.5110716819763184,
+              1.4554129838943481, 0.839878261089325, 2.398009777069092, -0.6458553671836853, -0.7608357667922974,
+              -2.0972063541412354, -0.596686601638794, 1.327064037322998, 1.2332861423492432, 0.643580973148346,
+              0.2491741180419922, -1.1464729309082031, -1.6413570642471313, 0.4765915870666504, 1.1993881464004517,
+              0.2358156442642212, 0.8658393621444702, 1.8936083316802979, -3.0983033180236816, 1.2818799018859863,
+              1.0561144351959229, 0.18877224624156952, 2.373169422149658, -2.1537320613861084, 1.7804971933364868,
+              1.7559447288513184, 0.5495958924293518, -0.29311543703079224, 0.7076770067214966, 1.3824928998947144,
+              2.5599937438964844, -2.2310054302215576, -1.3870820999145508, 2.705214500427246, 2.692167282104492,
+              -0.3191862404346466, 2.2299273014068604, -2.8660874366760254, 0.04656076431274414, 1.0372791290283203,
+              0.9051024913787842, -0.7127535343170166, -0.346563458442688, -1.8466299772262573, -1.776979684829712,
+              -0.7937185168266296, 2.6496312618255615, -3.1376733779907227, -0.5262937545776367, 4.203805446624756,
+              -3.0495786666870117, 3.059788465499878, -0.6179596185684204, 1.5632293224334717, 4.387739181518555,
+              2.1877965927124023, 3.867405891418457, 1.6019251346588135, -3.1097412109375, 0.14593756198883057,
+              -1.4151546955108643, 2.8710670471191406, 1.281739354133606, -1.9452589750289917, 0.3256327509880066,
+              -1.0140762329101562, -2.1761093139648438, -0.36153650283813477, -1.9866083860397339, 0.20329490303993225,
+              -2.189547300338745, 2.0582122802734375, 0.44074079394340515, -3.6016898155212402, -1.0940327644348145,
+              0.05166494846343994, 3.9986839294433594, 0.007254809141159058, 2.9994473457336426, -0.17313486337661743,
+              -2.319499969482422, 2.1396687030792236, 0.23967742919921875, -0.9820348620414734, 0.7810753583908081,
+              -2.565080165863037, -0.3542521595954895, -0.39312660694122314, -3.611963987350464, 0.042843759059906006,
+              -1.9587305784225464, -1.0954759120941162, -3.2344908714294434, 0.6816467046737671, 0.7935110926628113,
+              2.3788259029388428, 0.8960800766944885, 2.7103538513183594, 1.0750906467437744, -1.3195565938949585,
+              0.6368587017059326, 0.09530603885650635, -4.324446201324463, 0.31018364429473877, -1.4680615663528442,
+              -0.8505295515060425, -0.4297642111778259, -2.9845335483551025, -1.073625087738037, 3.111997127532959,
+              -1.082578420639038, -1.0510170459747314, -1.0351759195327759, 2.1196703910827637, 3.6743626594543457,
+              -0.10965263843536377, -2.8268239498138428, 1.9994782209396362, -2.2761340141296387, -0.037992119789123535,
+              -0.5068368911743164, -2.466184377670288, 0.8389816284179688, -0.9829720854759216, -0.578821063041687,
+              0.3714909553527832, 3.968106746673584, -1.0078635215759277, -1.4665645360946655, -0.24487531185150146,
+              -3.812358856201172, 0.8614283800125122, 1.251778244972229, 4.411714553833008, 3.906099319458008,
+              1.1894285678863525, 1.3625565767288208, -1.2013204097747803, -3.780947208404541, -0.7636905908584595,
+              -1.4467679262161255, 1.9876563549041748, 0.7255282998085022, 1.8526909351348877, 2.2311062812805176,
+              0.7617504596710205, -0.3560359477996826, 1.834754467010498, -2.417194128036499, -3.032979965209961,
+              0.3447788953781128, 0.193556010723114, -0.4079936146736145, 1.300100326538086, -0.19834625720977783,
+              2.222346782684326, 3.1362013816833496, 1.2092983722686768, 1.5581995248794556, 1.3155611753463745,
+              -0.8380979299545288, -1.2280077934265137, -3.5234897136688232, -0.32684326171875, -1.4621152877807617,
+              0.428300142288208, -0.5776108503341675, 0.9278461337089539, -2.4938998222351074, 1.2017678022384644,
+              -0.2525625228881836, 1.0117347240447998, 1.7265347242355347, -0.10318005084991455, -1.492635726928711,
+              -1.2622400522232056, -3.0749173164367676, 2.4151294231414795, 1.1957623958587646, -2.7823221683502197,
+              0.6365658044815063, 0.18952512741088867, -2.0210397243499756, -0.3540761470794678, -2.876804828643799,
+              -2.8968381881713867, 0.17692947387695312, 1.728485107421875, -2.2341482639312744, -4.501170635223389,
+              -1.6425974369049072, -0.9404029250144958, -1.1620832681655884, -1.0455152988433838, 1.3684580326080322,
+              -1.4598485231399536, -1.6593886613845825, -1.0509099960327148, 1.3251757621765137, 2.258070468902588,
+              -3.5802016258239746, 2.863391876220703, 1.0157440900802612, -1.5516963005065918, -5.100094795227051,
+              -2.9607906341552734, -1.1230504512786865, 1.9419206380844116, 0.4938334822654724, -2.3842170238494873,
+              0.1679488718509674, 1.827955961227417, 0.10622924566268921, 1.8168610334396362, 0.677010178565979,
+              3.0694189071655273, 0.3993656635284424, 2.3529860973358154, -1.4582010507583618, -1.5138496160507202,
+              -1.5133174657821655, 2.0854310989379883, 1.6874661445617676, -0.6133178472518921, -0.9184160232543945,
+              3.041386842727661, -0.8360755443572998, -1.2672674655914307, 0.27318108081817627, -0.906801700592041,
+              -0.2576174736022949, 0.8814321160316467, 0.9032235145568848, 1.5922852754592896, -0.5044339895248413,
+              -1.0950052738189697, 0.9084010124206543, -2.3912510871887207, -4.171522617340088, 4.554413795471191,
+              -0.3333394527435303, 1.5956521034240723, -1.197889804840088, 0.8468800783157349, -0.39928677678108215,
+              0.7615669369697571, -3.205524444580078, 2.535108804702759, -0.4366309642791748, 2.1470766067504883,
+              -0.25451767444610596, 0.23135042190551758, 3.335973024368286, -0.19102385640144348, 0.8413820266723633,
+              2.2614195346832275, -1.1231105327606201, -1.3756293058395386, 0.17654633522033691, 0.5028480291366577,
+              0.7965704202651978, 0.867662250995636, -4.270709991455078, -1.4976004362106323, 3.333491325378418,
+              1.6522053480148315, -3.4461770057678223, -1.0945802927017212, -1.1912789344787598, 0.5186694860458374,
+              1.525572657585144, 0.4644775390625, -0.5472983121871948, -4.093353748321533, 1.6807860136032104,
+              2.2575550079345703, 0.9947443604469299, -4.168862342834473, 0.09030676633119583, 1.3352301120758057,
+              0.37972205877304077, 2.2988173961639404, 0.8671650290489197, 1.040745735168457, -3.316119432449341,
+              2.3733606338500977, -2.248332977294922, 1.7465157508850098, 0.19552722573280334, -0.9690064191818237,
+              -1.8139621019363403, 1.9242961406707764, -1.9793150424957275, -2.789724349975586, 0.18952327966690063,
+              0.5084639191627502, -0.054778456687927246, 0.2740379571914673, 2.1619865894317627, -4.095170497894287,
+              -3.142530918121338, 1.1796610355377197, -0.8848727345466614, -1.2477298974990845, -0.07429039478302002,
+              0.9135949611663818, 0.21963024139404297, -1.9909381866455078, 1.99857497215271, 1.4466471672058105,
+              -1.1016359329223633, -2.8484303951263428, -3.1158666610717773, 4.74474573135376, -1.1900646686553955,
+              -3.1329240798950195, 2.125332832336426, 1.9798109531402588, 2.6058056354522705, -2.0495054721832275,
+              0.028982579708099365, -0.5753974914550781, 2.7390692234039307, -2.3111703395843506, -5.434136390686035,
+              -3.3772997856140137, -0.37978899478912354, 3.2925407886505127, 3.671295642852783, 0.7639904022216797,
+              3.1895627975463867, 0.15414607524871826, -0.6484872102737427, 2.18841552734375, 0.4799572825431824,
+              0.1354406625032425, -2.747096300125122, -0.22751712799072266, -0.7596011161804199, 0.5766011476516724,
+              -0.017207175493240356, 2.4283714294433594, 0.5117142200469971, -0.8030692338943481, 0.44569623470306396,
+              1.076960563659668, -1.8645645380020142, -2.2490062713623047, -1.6578664779663086, 0.6149722337722778,
+              4.706758499145508, 0.38176798820495605, -4.501796722412109, 2.2427682876586914, 2.1858701705932617,
+              -1.8162599802017212, 0.9385958909988403, -3.889805316925049, 1.1331977844238281, 1.0191240310668945,
+              2.4039511680603027, 4.155160427093506, 4.143398284912109, 0.7778210639953613, -2.2585456371307373,
+              -1.085227608680725, 1.7663249969482422, -2.8071107864379883, 0.5367544293403625, -0.02325284481048584,
+              1.5876415967941284, 2.046140670776367, -3.832700490951538, 0.46683841943740845, 0.5545571446418762,
+              1.8768221139907837, 0.7790337800979614, 0.38500359654426575, -2.3040874004364014, 1.1112343072891235,
+              -2.2824416160583496, -0.026048898696899414, -0.27540627121925354, 0.5449916124343872, -2.154345989227295,
+              0.7431529760360718, -0.008791446685791016, -2.407325506210327, -0.46152830123901367, 1.6632401943206787,
+              -1.7320727109909058, 1.0486053228378296, 1.3803236484527588, 0.3680152893066406, 1.716249704360962,
+              -2.2865381240844727, 0.14729297161102295, 1.260400652885437, 4.922313213348389, 0.5643207430839539,
+              -4.42134952545166, 0.464374303817749, 0.59236741065979, 1.2845817804336548, 1.4366343021392822,
+              -0.0200042724609375, 1.6293566226959229, -1.3861595392227173, -3.4724128246307373, 2.1383941173553467,
+              -2.1009442806243896, 3.7689297199249268, -2.918327569961548, -0.27357161045074463, 0.9184791445732117,
+              1.0513062477111816, 1.9957637786865234, -3.276752233505249, -1.6878246068954468, 4.714818954467773,
+              -0.9857031106948853, -0.6153162121772766, -3.6428263187408447, -0.30243179202079773, -0.4309789538383484,
+              -0.03419780731201172, -1.6013574600219727, 2.214989185333252, -1.1272412538528442, -1.6917750835418701,
+              1.547987699508667, -0.5724269151687622, -0.47848212718963623, 1.742186427116394, -0.2213730812072754,
+              -0.8063536882400513, -3.479326009750366, 0.5662966370582581, -1.0524877309799194, 3.702444553375244,
+              -0.8636859059333801, -1.9768422842025757, 2.1982383728027344, 1.1405737400054932, 0.6146906614303589,
+              -3.5127429962158203, -0.8339279890060425, -2.914233446121216, 4.411269187927246, -2.3479251861572266,
+              -0.2184194028377533, 1.7971992492675781, -0.9596229791641235, 0.09081411361694336, -0.4546387791633606,
+              -0.38310706615448, -0.5399283170700073, -0.2518271207809448, -3.5085813999176025, 0.077769935131073,
+              -0.5233420133590698, 1.4064757823944092, 0.8371680378913879, 1.9668782949447632, 1.483221173286438,
+              1.1757903099060059, 2.9970226287841797, 0.8735387325286865, 0.24936652183532715, -0.7718344926834106,
+              -0.9049572348594666, 1.9130113124847412, 1.9097952842712402, -3.3667526245117188, 1.1342090368270874,
+              -0.1385430097579956, -0.6781283020973206, -0.57246994972229, 0.9787319898605347, 3.6297695636749268,
+              -2.3075175285339355, -0.8269498944282532, 0.8386117219924927, 2.4571895599365234, -0.9069632291793823,
+              3.1065073013305664, -0.786931037902832, 0.6695969700813293, -3.896576166152954, 1.6415526866912842,
+              -2.334099531173706, -2.991877555847168, -0.4740370512008667, -1.0762873888015747, -0.5927379131317139,
+              -2.2995433807373047, -3.4549155235290527, -2.033919334411621, 4.102828025817871, -2.922405481338501,
+              0.9117567539215088, -2.0445048809051514, -2.740710973739624, 1.5500695705413818, -1.4105217456817627,
+              -3.672469139099121, -0.38663938641548157, 0.1100814938545227, 0.43851518630981445, -1.4003627300262451,
+              0.8104124069213867, -2.6236252784729004, -0.40968263149261475, 4.816134929656982, -1.9591403007507324,
+              1.2284891605377197, -3.4595632553100586, 2.2904000282287598, -3.7264821529388428, -1.8221375942230225,
+              -0.44476717710494995, -3.0978899002075195, -1.0302362442016602, 0.49443352222442627, -4.997615814208984,
+              2.7403292655944824, -0.9162295460700989, -0.8933342695236206, 0.8124216198921204, -1.433485746383667,
+              2.224909782409668, 0.6821490526199341, 4.009047508239746, 2.2991182804107666, 2.677088499069214,
+              4.353694915771484, -2.2315590381622314, -1.5339090824127197, 1.626473307609558, 1.9017658233642578,
+              0.9766815900802612, 2.563782215118408, -0.2381199598312378, -1.5801150798797607, 2.601571559906006,
+              1.7336980104446411, 0.8148760795593262, -3.7112083435058594, -1.3511030673980713, -1.8034800291061401,
+              -2.950260877609253, -0.4626041054725647, 2.9033288955688477, 2.629671812057495, 0.1508030742406845,
+              -0.6016277074813843, 1.9043893814086914, -2.119884967803955, -3.048208236694336, 1.3438254594802856,
+              1.039656400680542, 0.0982772707939148, 0.29122409224510193, 1.8302289247512817, -1.3148270845413208,
+              1.16330885887146, 1.4336774349212646, 3.057568073272705, -0.2635994255542755, 0.3290955424308777,
+              0.09837156534194946, -0.4767574071884155, 1.7474840879440308, 0.036291711032390594, 2.057096004486084,
+              1.5909944772720337, -1.5554354190826416, 0.45638948678970337, 1.8485369682312012, 1.1001496315002441,
+              -0.448333740234375, 0.12344249337911606, -2.2758660316467285, -0.18728435039520264, -1.0710699558258057,
+              4.759674072265625, -2.308614730834961, 1.3315553665161133, -1.7046191692352295, -1.4248977899551392,
+              0.31045258045196533, 0.4682546854019165, -0.5506991147994995, -1.167902946472168, -0.033889174461364746,
+              1.2611976861953735, 3.584254264831543, -2.8943490982055664, -1.0763990879058838, -1.9304077625274658,
+              1.6052935123443604, -2.1086959838867188, 0.16277271509170532, 1.087416172027588, -4.894248962402344,
+              1.6908477544784546, 2.445591688156128, -0.8808413743972778, 1.1168533563613892, -0.35605037212371826,
+              1.0386931896209717, 1.4661989212036133, -3.5512571334838867, -1.364258050918579, -2.697364568710327,
+              -2.279731035232544, -2.2294161319732666, 2.072256088256836, -1.7556955814361572, 1.5964255332946777,
+              -1.102902889251709, 0.25835680961608887, 0.41171061992645264, 2.6033897399902344, 1.931307077407837,
+              -3.2382147312164307, -0.6146683692932129, -0.7102252244949341, 2.314451217651367, -0.697837233543396,
+              -1.0293060541152954, 1.020631194114685, -3.6274075508117676, -1.869258165359497, 0.8600494861602783,
+              -3.1400227546691895, 2.785465717315674, 1.5925650596618652, 0.5685530304908752, -2.385671377182007,
+              -2.064885377883911, 1.7148135900497437, 4.472785472869873, -1.6981213092803955, -2.8710732460021973,
+              -1.5905208587646484, 1.7118372917175293, -0.0628599226474762, -0.024645566940307617, 3.5579423904418945,
+              0.043785035610198975, 0.1394520401954651, 0.705904483795166, 0.5603584051132202, 1.9532432556152344,
+              0.17339491844177246, -0.5621342658996582, 2.166140079498291, -2.675852060317993, 3.4783935546875,
+              0.005500435829162598, -3.0466365814208984, 2.9333863258361816, -3.0374748706817627, 3.3186678886413574,
+              1.4340720176696777, -3.4607982635498047, -0.5809862613677979, -1.8670074939727783, 0.31782853603363037,
+              5.3407721519470215, -0.11647498607635498, -1.127880573272705, 1.9607118368148804, 1.6104774475097656,
+              1.291121006011963, 1.3090026378631592, -4.346621990203857, 0.9649640321731567, -0.3321942090988159,
+              -0.40106678009033203, 0.6202027797698975, 0.737052857875824, -0.6949820518493652, -1.6063098907470703,
+              -1.335120677947998, 1.2487294673919678, -1.206929087638855, -3.946974754333496, -0.038611650466918945,
+              -2.730283737182617, 1.3170448541641235, 2.586440086364746, 0.9609778523445129, 0.9639753699302673,
+              -1.0613958835601807, 2.2582998275756836, -0.341133713722229, -2.37121844291687, 1.9750676155090332,
+              -3.339621067047119, 3.8494720458984375, 1.4416704177856445, 1.2632763385772705, 0.49889105558395386,
+              -1.358637809753418, -0.9810472130775452, -2.008033514022827, -4.180141925811768, -1.8064439296722412,
+              -2.4055490493774414, 0.8236247301101685, 0.08107048273086548, 0.2551090717315674, 1.6475603580474854,
+              2.3599026203155518, 1.4368640184402466, 0.11961638927459717, 2.1902809143066406, 2.4481637477874756,
+              -3.700338363647461, 1.4484524726867676, 2.2457919120788574, 2.7918150424957275, -0.3214719891548157,
+              1.1412039995193481, 2.3801662921905518, -1.1772977113723755, -1.080161690711975, -0.10960137844085693,
+              0.23755615949630737, -1.6492403745651245, 1.5414122343063354, -3.5301568508148193, 0.8169034719467163,
+              -2.7907910346984863, 1.27809476852417, -1.339566946029663, 0.24068212509155273, 1.980497121810913,
+              -1.103304386138916, -0.9938054084777832, -1.6851425170898438, 1.5913416147232056, -1.6278176307678223,
+              0.07544970512390137, -3.0562868118286133, 0.909795343875885, -3.3362603187561035, -0.16795992851257324,
+              0.654058575630188, -0.7783452868461609, 3.801968574523926, -2.160207748413086, 2.5146727561950684,
+              3.337472915649414, -0.7981523871421814, 0.7141947746276855, -0.44521379470825195, 0.7240567803382874,
+              2.8061447143554688, -1.9281837940216064, 0.33615267276763916, -1.3853528499603271, 0.08603048324584961,
+              -1.1986116170883179, 0.8860710859298706, 0.22947335243225098, 0.5199316740036011, -2.0464673042297363,
+              -1.6756978034973145, -0.6069002151489258, 2.3337855339050293, 1.6094681024551392, 2.3820090293884277,
+              0.8262057304382324, 4.417818546295166, 1.2495514154434204, 0.5728256702423096, 1.7155016660690308,
+              -1.9175611734390259, 0.8456315994262695, -1.3211780786514282, 0.7857818603515625, -1.7515380382537842,
+              -1.1971937417984009, -2.808197259902954, 0.7553633451461792, -0.1306423544883728, -3.6146838665008545,
+              -1.5321255922317505, 1.676008939743042, 0.07836294174194336, -0.9960349798202515, 0.8668472766876221,
+              2.137298345565796, 1.2637362480163574, 2.0481185913085938, 0.06509196758270264, -1.6683127880096436,
+              -3.718193531036377, -1.9311506748199463, -3.367814064025879, 0.012331843376159668, -4.227578639984131,
+              -0.5755635499954224, 1.583990454673767, -0.86231529712677, -1.809625506401062, -0.3123876452445984,
+              -3.4403862953186035, 1.0367351770401, 1.1761391162872314, 0.16112631559371948, -4.721268653869629,
+              0.07461494207382202, 0.003129124641418457, 4.358157157897949, -0.5005528330802917, 0.24370229244232178,
+              0.4912611246109009, -0.6851872205734253, -2.718902587890625, -2.6848104000091553, -0.7679593563079834,
+              -1.1002662181854248, -0.3475220203399658, 0.002980828285217285, 0.4288327097892761, 1.283578634262085,
+              -2.613717794418335, -3.3328800201416016, 1.9472748041152954, 1.3897069692611694, -5.0092363357543945,
+              2.518123149871826, -2.1634795665740967, 3.7558064460754395, -3.4893569946289062, -1.289191484451294,
+              -3.036714792251587, 1.6393659114837646, 3.2178215980529785, -2.083487033843994, -1.6242481470108032,
+              -0.39087945222854614, -2.675858497619629, 2.548295497894287, 0.6715638041496277, 1.0268739461898804,
+              1.9520831108093262, 0.2520883083343506, 0.3550087511539459, 3.1073975563049316, 1.3005826473236084,
+              3.4222006797790527, -1.5301063060760498, -0.19925060868263245, -0.7549142241477966, -1.7461345195770264,
+              0.7768814563751221, -0.8777822256088257, 2.757373332977295, -1.4982573986053467, 2.4916207790374756,
+              2.4712767601013184, -1.3903863430023193, -3.3709828853607178, 1.6688079833984375, -0.3028743267059326,
+              -1.8443574905395508, 1.8113127946853638, 1.7428357601165771, 1.2092206478118896, -0.5405560731887817,
+              0.97336745262146, 0.36485183238983154, 0.17854809761047363, -1.6080548763275146, 4.408480644226074,
+              0.6553859114646912, 0.9348583221435547, -3.3448331356048584, 2.513455867767334, 0.03000164031982422,
+              0.705142617225647, 1.1035417318344116, 3.448455572128296, 0.7622013092041016, 2.463888168334961
+            ],
+            "dims": [2, 2, 320],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  }
+]
diff --git a/js/web/test/data/ops/bias-split-gelu.jsonc b/js/web/test/data/ops/bias-split-gelu.jsonc
new file mode 100644
index 0000000000000..23fcb488ca4d9
--- /dev/null
+++ b/js/web/test/data/ops/bias-split-gelu.jsonc
@@ -0,0 +1,1332 @@
+[
+  {
+    "name": "BiasSplitGelu",
+    "operator": "BiasSplitGelu",
+    "attributes": [],
+    "opset": { "domain": "com.microsoft", "version": 1 },
+    "cases": [
+      {
+        "name": "bias split gelu [1,1,2560]x[2560]",
+        "inputs": [
+          {
+            "data": [
+              -0.2565546032426438, -0.4308542731494329, 0.7196725122004919, 0.049034255098233004, -0.6348569496555116,
+              1.5952359184580631, 0.8451805251092992, -0.31838310590966934, 0.8187686985927041, 0.5208222841347814,
+              -1.36437690702164, 1.9883897131851596, 1.5564695381513953, 1.6179857166855847, -1.6925162826813818,
+              1.7367654350206285, 0.7210294356326798, -0.16665830399749915, 0.7502629374213177, 0.49174373254887005,
+              0.05228599187298144, -1.8492674426703974, -0.13963175206123601, 0.3689713052652124, -0.539726857153676,
+              0.4047910328979869, -1.5364223249042084, 1.5401613243237753, -1.5533776810895263, -1.503725108181306,
+              1.4980778036916167, 0.3954579111685037, 0.6720461504101429, 1.2082071287930374, -1.9053414457057452,
+              0.5161807133471896, 1.876123128617479, 1.4026319996913186, -1.0578832074721918, -0.5667177472093234,
+              0.7427152553125103, 1.3309336325346122, -1.250604470605933, -1.5581827636425984, 1.6094888834151977,
+              0.8346945311402969, 1.964734881798945, -1.1480686695741893, -1.4692802823913134, 1.443324467502948,
+              -1.5059857923356113, -0.4324256636772654, 0.47787327572253613, 0.010830153648134555, -0.552568788591528,
+              0.976425831078207, -0.3964598095089391, -0.8550178676766054, 1.7829908691025604, -1.4386851327204164,
+              1.2586393384114123, 0.6282746108828974, -1.9228284428211788, -1.723501329370512, 0.07875604943128245,
+              -0.6805248059926141, -0.09444157676724618, -1.2578296605984862, 1.4748594927305314, -1.1326833840447499,
+              1.2899356994911972, 0.36076376234182295, 1.0687687434117796, 0.016260539139403285, 0.8790330834375988,
+              1.958147414219165, -0.6744379666162423, -0.11384066716133123, -0.8573301336636696, -1.6026243400276634,
+              0.947103941481469, -1.9714924616237095, 1.4248854638955484, 0.20743958585530908, 1.1632144973105554,
+              -1.755170686870751, -0.7194149118428639, 0.14148466161285445, -0.2721811711092048, 1.5603318294054445,
+              0.4281402047349676, 0.9777768965070042, -0.4340528216185948, -1.4027853075880978, -1.786451668170261,
+              -1.0399446061083921, 0.5016682459975055, 0.47912646038882833, -1.4994325065634833, -0.5813174761669684,
+              -1.7675659877499355, -0.5295448113440351, -0.185677936488422, 0.6133721122115032, 0.8342835422965917,
+              -1.8851704197237282, 1.8145028466620214, -0.6693817855471478, -1.2825600860901352, -1.9614837268208936,
+              1.7498654134131364, -0.9137222020716864, -0.3419963114086366, 0.4718774290086678, -1.087807747894451,
+              1.7131835407851748, -1.3262901364654693, -1.3116975227056313, -1.3834647528821646, -1.6786541344436587,
+              1.7754973215348864, -0.2608287138885421, 0.7649932766417926, -1.620043859042676, 1.6639963441569705,
+              0.8487054193287991, -0.0440434794759188, -1.7095242256175753, 1.285898353046436, -0.8541456852270608,
+              -1.0975953883519463, 1.6456192019739255, -0.9829899938198103, 1.5115291047537394, -0.1356101800325593,
+              0.3163471293050524, -0.7350393764769096, -0.2635909692970628, -0.8961556079942969, 1.35594272476665,
+              1.5346085770720466, -1.561332626472022, -0.7482744786530882, 0.7415275042470979, -1.3455806550694929,
+              -0.17403414417677876, 0.13174239791099396, 1.617942860808114, -0.21372789574955942, 0.1252618392038798,
+              0.8356601282506393, -1.4645614042274238, -1.2750059490075998, 0.30114108212769697, 0.718759572394414,
+              -1.7476419557101002, 1.3774845116886816, -1.5548787928223922, 1.2843809545104072, 0.40166155990403407,
+              0.7182073117046643, -0.9178049843649907, 0.23022861413351325, -0.16782915827518163, 0.809538498880066,
+              1.5368543357363587, -1.9144180623212463, 0.22370787233170475, 1.754935494591769, -0.300812686337701,
+              1.6324016835327004, 0.42942625235999277, -1.821577324695582, 0.7065235323323327, 1.6359230558031488,
+              -1.6698325562461855, -1.0322767487026727, 0.230813812551105, 0.10476742241238135, 1.8969745358419585,
+              1.6692111883958614, -0.13495950982519744, -1.3187891429974368, 0.33479728114140084, -0.3567915710179923,
+              -0.757581852807693, 1.0471229062934597, -0.15293599163291116, 0.8603181880710826, 1.5915256233625152,
+              -1.146248153475943, 1.4333844348196614, -0.4549776123350959, -0.24793256123671004, -1.1103266883866416,
+              -1.8671773666157563, -1.2229905095092333, 0.06266819913029753, -1.0062234212958696, 0.8035035300131232,
+              -0.7305732410705756, -0.18203039831679124, -0.31014428798975313, 0.3622048186467879, 0.45985097885235504,
+              1.4809283400531248, -0.43833106689297807, 0.04401934126909346, 1.5587830597086887, -1.6615450592134344,
+              -1.0549727490794032, 0.10619243811841983, 1.6457168234317425, 0.8542761080095067, 1.0005810856142707,
+              -1.5553877748221447, -0.9212057116120924, -0.8894180684757362, 1.2348207479698639, -1.867513894231462,
+              -0.3703762605562, 0.6409515313866843, 0.07519639585361748, -0.6450469579811191, -1.1954961398506443,
+              -1.1204107450288525, -1.7282576070578317, 1.3286514210595293, 0.9221546638897049, 0.3164348496087497,
+              1.1124119320099153, -0.1657137161424238, 1.8878952460812632, 0.1293077579568429, 0.23358535468938246,
+              1.6758942267934263, -1.4201635109571313, -0.15778983434253657, 0.7416701308494114, 0.5883762821820557,
+              1.4927448431941999, 0.6935037789859804, -0.7735384747712253, -0.003653232296928266, 1.016177625769398,
+              1.663989224847544, -1.860463076082154, 0.586684836283415, 0.35116588158246387, 1.1544623264859073,
+              1.900422405885342, -1.4569177282686354, 0.04845063476308198, 1.3176042838894286, 0.7208418723320795,
+              -1.2204473994940885, -1.4000622968397813, 0.21650376984443565, 1.060631946423273, 1.5077365306547108,
+              -1.0212630681047514, -1.9198532775330452, 0.7154901442715236, -0.2676631096599271, 1.3808441670529703,
+              -0.09885367904367648, 1.4777610290353689, 0.056817122949397, -0.21052160911698614, 1.163420787671865,
+              -0.9278068609037406, 1.6649534188139832, 0.651092543699634, -1.9558728896680586, -0.17393499677517266,
+              1.4142540331221616, -0.6414265768865439, -1.796476122987272, 1.3019592923170675, 1.8063975233357983,
+              -0.09846336861938809, -0.44669537007711746, -1.8509683249130777, -1.2535113000292872,
+              -0.26272617023889033, 1.618948771545468, -0.24333144562387243, 1.684567206060013, -0.0684671993602386,
+              -1.2513800864625626, -0.42209759253339296, -0.5204624726492675, -1.2354597847242017, -1.8741410257015954,
+              -0.4694216976292269, -0.6222814585320586, 0.21189389441688178, -0.07825818775306104, 1.7163253327182595,
+              -0.5828872666159741, 1.2859942412787504, -1.6213500187165488, 1.3292413342594758, -1.8919626255232016,
+              -0.5376250474966104, 1.8689278008038874, -1.7144714737277686, -1.0258522713372873, 1.7337707335280257,
+              0.7736026666583085, 0.5294771168151202, 0.018158706442022776, 0.06181604275240726, 0.39921503387415935,
+              1.1876014889699817, -0.3054069392890115, 0.16369159901033914, -1.7827759163094807, -1.972081887839714,
+              0.6481860379313247, -0.35352594703309315, -1.3849570316603321, 0.623606163848514, -0.05008698162477465,
+              -0.7604090786231117, 0.018133973172791862, 1.4085369489144304, 0.0006579664080073044, -0.945901823508092,
+              1.9768973551218298, 0.8031916889887203, 1.787407754969756, 1.4724476919448328, 0.4025600955058959,
+              -0.8188218839399735, 1.9450326825091242, -0.7081203957970317, 0.2756639708632225, 0.5758621242189532,
+              -1.3400477780804723, 0.9643250467598792, 1.6902983107628202, -0.9456991797498446, -1.214924995943468,
+              -1.7096508074160557, 0.6280377071410248, 0.043386564374169545, 1.4018487692877626, 1.9387558240379494,
+              0.6404760132931342, 1.9907639837009397, -0.9629506658962566, 0.44160372290058625, 0.5311677453788013,
+              -1.98443782839251, -0.8098531120098231, 0.6492275695305256, -0.8908778962502977, 1.3356991895363226,
+              0.22938352294803988, -0.7322473322214602, 1.5046263929054788, 1.3645550724874669, 0.9961702988823644,
+              0.5139435687003742, 1.4501909090527283, -0.7800082224423388, 0.35291863289434566, -1.146090063932382,
+              0.30350456267852977, 0.6439700368534709, -1.9972080095325557, -1.6099893116712707, 0.8052080484401971,
+              -1.0954234253610906, -0.7337556567039494, -0.31964730584596346, -0.0775965423638807, 1.9583401341749038,
+              -1.2551023178822023, 0.8562333066218226, 0.7147503883958679, -0.7448967487912448, 1.935551654646078,
+              -0.8599046782377142, 0.2970146461500134, 0.8930246286483605, 1.1182036059156406, -1.197201583834416,
+              0.07161352111517694, 1.9408265571293892, -1.0932315538260982, -1.599500716435764, -1.3924326798020594,
+              0.11983524432330928, -0.7679553338056699, 1.8370796383852186, -1.2704307623609719, -1.1997771708610943,
+              -1.888947914144012, 0.55632142669847, -0.34454450486308286, 1.4998341008691707, 1.5090724650893934,
+              -0.39294559939271156, -0.5834694562549148, -0.5369747247041268, 0.09250855982787431, 1.2568497810992714,
+              0.3053435200201413, 1.9380606411981267, -1.7300384067590375, 1.8430090411386875, -0.09705704918122926,
+              -0.9748664840480341, -0.13310751475233928, 1.8695019507218804, 0.6192212840529407, -1.8594506513740239,
+              -0.5527467955831558, 0.42368164563035027, -0.3623000694050589, 1.4671722318441134, -1.9926015337743834,
+              0.6390178007629022, 0.3930378886525858, -1.999173071586763, -0.10690790413254625, -1.8526590162958945,
+              -1.7626087675511721, -0.37087181562117433, 0.8387521856704474, -0.19069686876676517, -0.8188898303583647,
+              0.12674602912986455, 1.1094188774808789, -1.3557441716004606, -0.6605764711080546, 1.5447276204253857,
+              -0.8769307424824397, -0.2545000118362921, -1.0291738091907403, 1.7938016119427465, -1.9984894749397677,
+              -1.2827802315328531, 0.9376743174843778, 1.6630184567373671, 1.3959796712419958, 0.9312602680557571,
+              1.1022358675981447, -1.8339474355859497, -0.4903489710565081, -0.8895348660922506, 0.6526243952924284,
+              -1.3544145682501627, 1.7693021030400526, 0.16080394788547636, -0.012568942703699904, -1.15491020041351,
+              1.2537510862653143, -1.0969062449531704, -1.2759047604132565, 1.5734543561168284, -0.10691180117407129,
+              -0.26934435340525376, -0.9933427134070101, 1.7043012450253494, 0.02549752010787465, -0.08701034102069993,
+              -0.2674176298500974, -0.08499649536913978, -1.9065384537204872, -1.8548124018210599, 1.4114174077197168,
+              -0.1003529232852527, 1.4168803569119728, 1.2546078925285231, 0.2836297019652072, -1.741854513232557,
+              0.4335127344631058, -1.7722508801895138, 0.8906617987453824, -1.1907389082962334, -0.4570346317067928,
+              -1.65487466866219, -1.6799510105998428, -0.7345507991250928, 1.0450995087483994, 1.6718914380761127,
+              1.9492487109001955, -1.603110762963735, -1.1386896023097748, -1.2314080609240623, 0.9155801473532383,
+              -1.7678615945378713, -1.2302067186131085, -0.008026479019882515, 1.7076934044219962, 1.2976204179783615,
+              0.23979601472188072, -0.31452550079775676, 1.237656674724093, -0.5073967065142844, -1.4684831151454167,
+              -0.31751104037333455, -0.18632018989018295, 1.9021272175218185, -1.605133256102862, 0.7057429809617588,
+              1.131130449558933, -0.37696933268230914, -1.9999062126002407, 1.2947486718643644, -1.5494680428486927,
+              -0.619492250822522, -0.7119383385868714, -1.2267826883342403, -0.05700963867832698, -1.6002949090802074,
+              -1.235668500595783, 1.0038351588940309, 0.9591804998296345, 0.3500270456646062, 0.9985378178389439,
+              0.37800440518144107, -1.2954921521047265, -0.06652125976806822, -0.5529207993209635, 0.799277104478481,
+              -0.3195922296721463, 1.278513926080179, 1.7667762251394539, 1.6741322500537104, -0.7688735332563814,
+              0.3718086223078023, -1.8559862073263451, 0.6460500799921309, 0.9538174223011966, -0.9599765113376018,
+              0.2363354873707415, 1.1507344321172663, -1.5062754878549311, -0.6521456232305631, 0.26796953563726245,
+              -1.6029382384422037, -0.13330793525728346, -0.4700350464240364, 1.2426614036874923, 1.8410794059261688,
+              -0.29943068600730616, 0.23080019611542912, 1.1289988352417586, -0.9307260459011948, 1.5075366130642047,
+              -0.75430635423512, 0.7786360347486365, 0.43281144997343457, -1.9253410424489141, -1.7129308932126097,
+              -0.7115090993554389, 0.09728563961266712, 0.9612524095722907, 1.2677697737234288, 0.17580981533895557,
+              -0.07099813019347945, 0.6420914403628677, 1.929710466274961, 0.8224262711223203, 1.197715527419664,
+              -1.5306699759366271, -0.4379817520480751, -0.6193479789854166, -1.4632019512465932, -1.259260972161262,
+              0.8218577982330961, -0.5872036188804506, 0.9619144554878618, 1.9136356497081524, -0.0054634952730427955,
+              0.918959735070624, -0.5711442865104566, -1.7438335683612998, -0.09911336983085484, 0.9296115985185578,
+              1.4616523373203654, 0.46274661162826547, 1.0360815568791644, 1.2212946749865532, 0.15313173395745583,
+              0.822676634908933, 0.4284785502131063, -1.003861397431101, -0.1736541682765118, 0.6790160543077883,
+              -0.14098755750254632, 0.3123446972161563, -1.1989246192108354, -1.8129870085835993, 0.32605203312965525,
+              -1.1125265808505294, 1.7561842733362214, 1.7739812007749807, -1.4043301791912688, -0.8943531190371674,
+              -0.6297367970706924, -0.7981479808539875, 0.8794315514521189, -0.3994743473829594, -1.401573672109106,
+              -0.8912997118364743, 0.6495541504851907, 1.9230113410471903, -0.6785063630402766, 0.5089535993984642,
+              -1.8974271070828808, -0.2620702810213116, 0.1991815065696141, -1.207848636028186, -0.8986511371196979,
+              -0.7061642892592088, 1.9332451504308255, -0.9830761395756227, -1.244182804715014, -0.017993897992697683,
+              0.8786296671983029, 0.540637680299664, 0.8969343374913903, 0.7955401708949532, -1.529657800511421,
+              1.8639189521351138, 1.067177379712641, 0.7401683858489143, 0.7294620382453241, 0.5904153067983531,
+              -0.49843243603051146, -1.6804352670210738, -1.0693615378859294, -1.9090165306219946, -1.4036168596502474,
+              -1.2392401335977272, -0.7753989839018987, 1.8934648648263472, 0.3583307892541949, 0.1338886754962525,
+              1.8690213745085558, 1.455690685933316, 1.1699346906107797, 1.6059636790463596, -1.2445702023047094,
+              -0.26540435557319864, 0.7367777535407951, -0.06848824820724975, -0.35847294758367365, -1.8422274132326661,
+              -1.0257628610133205, -1.6669678072109697, 0.37716994278958094, 0.238235954755047, -0.2999780255928437,
+              -1.8840499434617515, -0.2146515363892023, 1.683440503823257, 0.11413961404491424, -1.6059835147695,
+              1.1951777078503847, -0.5174290916747344, 1.3440252628311207, 0.21500397686099326, -0.9421891710579917,
+              0.0593410986318057, 0.34006850000048683, 1.644097326727004, 1.74676874874047, -1.6742906231992345,
+              -0.33329412020743643, -1.6254771174048148, -1.6581767039373991, 0.791705460659057, 0.9383035214148672,
+              1.7805390345327456, 0.5776366760158806, 1.587860436382865, -1.5903762069130911, 0.16034052878776794,
+              -0.2414627652627388, 1.2751236768892227, 0.3209997221960421, 0.31176177950076234, 0.6234148156263783,
+              1.144504541840126, 1.1535423529138784, 0.11665655599341473, 1.9697764827003628, 0.14558312336598078,
+              0.36578124791517297, -1.769415346962682, -0.8303724165129278, 0.44703666963932154, 0.35095942056362794,
+              -1.197063815711486, -1.5390788457973201, -0.8313129454097989, 1.952907404456571, 0.30612523411761394,
+              -0.9380264922530621, 0.9822286847681259, -0.12269281399330456, 0.6557752769532215, 0.48870196679108435,
+              1.347011507573625, 1.6519563808835915, -0.7385795429014586, -1.048048723619047, -1.0859902684402032,
+              1.7187556784188924, -1.8663335499394762, 1.3448325108921972, 1.5973182779732955, -1.9246562924691855,
+              -0.896157435511153, 0.5381287932952938, -1.7180528810790427, 1.6998135385965343, -1.99001646343652,
+              1.7545133161159834, 1.9333853932807186, 0.509746393965961, -0.0675591816104566, -0.5487596842885525,
+              1.1654829640998834, 1.2418276538086106, 1.1046551528849635, 1.1541227103848648, 1.3688916432757363,
+              1.8682574813085164, -0.9855654818965549, 0.5606398966348412, 1.9470096222814695, -1.9903658034181957,
+              -0.48344153717960925, 0.6353768282033556, -0.0513621772163555, 1.097950015862886, 0.24567995461843584,
+              0.8059244051805585, 1.3521742423136072, 0.47230481196519936, 0.6133818327341913, 1.9135550317308576,
+              -1.8499763818701833, 0.5894967733509713, -1.920326871370679, 1.6405640996262383, -1.1695280806136248,
+              1.4241423736983672, -1.67349053378969, -0.8405344375321118, 1.4551541878625045, -0.9884334361969964,
+              -1.274246357510064, -0.5067514453663833, 0.5492300144792424, -1.063358889380532, 0.3303561470509724,
+              -0.9579434760073298, 0.057620205260851876, -1.4742255794760677, -1.6308108759776871, 0.050309644830520917,
+              -0.4069039662995735, -0.046184249642958086, 0.07949429360802096, 0.12024378717844808, -0.7878588154579758,
+              1.472302474285109, -0.29877233638272127, 0.3469939919860652, 1.0030914450113793, -0.033399023382078674,
+              1.337733463292298, 1.322404140400704, -0.9344843531853249, -0.7431360466648114, 1.6844959250870701,
+              1.361728870488129, 0.9343674315961268, -0.027258361373763584, 1.838512510349391, 1.6503912849225708,
+              -0.4099533673620952, 1.0611278108159041, -1.6127864143155026, 0.24393478918735934, -0.44450380494664365,
+              -1.6597118492450527, -0.6490315790577066, -1.7612694260424497, -1.4276602313620197, 0.940659328987655,
+              -1.263148442418843, 0.5147963537125868, -1.3438202624356137, -1.2843267233008957, -1.2655953556300048,
+              1.8008689611198623, -1.0529595001338974, -0.8423440348538298, 1.4068621033910276, 0.007761032105414678,
+              1.0935924640537378, -0.09622562614772612, 1.5915223843314257, -0.6702176163118629, 0.773624899360053,
+              -1.0994916609949348, 1.8469641173001987, 0.8777243936892871, -0.7039703784245424, -0.6127726098737645,
+              -0.8456557917329288, -0.21546583787614892, -0.9360415939958875, 0.5705354560747677, 1.4330326228154542,
+              1.5996006008539831, -0.07367645458791117, -1.5628497471538445, 0.6808136908868017, -0.6639347562269995,
+              0.9713336233567063, 0.914773615335891, -1.4236420984162779, -0.02242621653471044, -1.898261801965826,
+              1.8572128094883267, 1.5674319640876826, -0.825255227018233, -1.2984530858686814, -1.8947198959796596,
+              0.587966233462053, 0.7276082291256891, -0.7118337876069631, -1.9491744048376294, -1.9749840187715382,
+              0.6636341374951193, -0.8911042565498537, 1.1064802004273853, 0.5698018751524465, 0.31161591659845556,
+              -1.4542202550786563, -0.4973178781500307, -1.261090074552551, 1.202428109165787, -0.6502124259985935,
+              0.875011031676804, -1.0459657545700463, -0.42208724851625856, -0.056326319293169114, 1.033820082809723,
+              -1.7520891696720033, 0.8087980713817391, 1.4090272899721707, 1.7934272795205217, 1.625604511965781,
+              -1.1093024540507015, -1.1836309931833142, 1.2726582815916636, -1.348432635892622, -1.3318038843301023,
+              1.8025664988508767, -0.9447266569517438, -0.5044961285117484, -1.6608791173499942, -0.5404247801346038,
+              -1.8417295161873213, 1.3703119773103376, -1.762654389000578, 0.654516908110395, 1.7444654879377754,
+              0.06700313474848674, -0.11772438211787417, -1.4346934108510236, -1.3471639350541027, -1.8846130174975517,
+              1.4341206352848488, -0.1781484331915868, 1.8660723422398098, -0.14067413462914313, 0.8690053039391579,
+              0.14439977774786517, 0.39859516757572777, 1.338326821281048, -0.3540839451368738, -0.3799467769042604,
+              -1.6627115692361185, -0.27677435345149703, -0.9817434354820271, -1.7561599198792992, -0.09529627982909172,
+              1.0892976295039958, 0.8013191504371955, -1.6378286446982058, -0.2422910554223776, 1.192900022577974,
+              1.552603422632739, -1.9305123473184267, -0.20948858983751073, 1.9616644595376727, 1.6353585267617419,
+              0.8302939534322658, -1.3824308029410997, 1.4671017215695894, 1.075525400633838, -0.8516882354421424,
+              -1.764852757300301, -0.6508588489877889, -0.5864767969763536, 1.1318668208548859, -1.2774769138035325,
+              0.28054164298575834, -0.7225628233832841, 1.0930686104208345, 1.3757645119448334, -0.8436086923316299,
+              0.15317111418019813, 1.0696425581898357, 1.2977798557148477, -0.9586985895916751, -0.9572006108424791,
+              -0.5173013825178812, 0.9558842805311762, 1.4588133070282732, 1.4300025626225263, 0.9236625391445488,
+              0.6245988744867041, -1.4555968079681971, -1.9081528171774265, 1.4969305861374398, 1.8013611610843867,
+              -1.215264739954538, -0.6460349115684512, -1.7385666591205897, 0.4952215434575882, 1.7813317720717237,
+              1.4779804705753685, -1.6448372710981527, -1.7914377349318746, -0.8701351514587072, -0.6086582613844049,
+              -1.7386736832736416, -1.6630595398426538, 0.06404471619092522, 0.12472538361116214, 0.25173024087120677,
+              -1.4925192750724054, 1.1326320024353294, -0.6723638830002354, 0.9276409891081574, 0.5160132920113458,
+              -1.7619800169226787, 0.7729306209435105, 1.4852229135771582, -1.4043797098218525, -0.26979894999654697,
+              -1.5676449182307888, -0.8285263778416416, 1.7968643376162596, 1.1503154963149846, 1.3682632091264955,
+              1.9014644171021402, 1.2035803257561772, 1.5807589620662768, 0.6681530530278019, 1.7430055744872055,
+              1.4516895295108698, -0.6636088362253298, -1.3265544726243066, 0.7260245399794885, 1.0068518018712478,
+              0.2208570840730273, 0.8459119656002851, -1.8915833254450725, 1.2162158433713186, 0.9752766886721753,
+              -1.2607916054285697, 1.9003684087234687, -1.6824694825149118, -1.3545700227621689, 0.5912583336167279,
+              -0.7008114462062913, -0.022208913414461406, 1.4871167887266532, -0.47220337297808346,
+              0.001402495408155957, -1.6337795432062068, 1.6557142707874517, -0.21911880468117495, 1.994215681564901,
+              -1.4675327906481472, 1.9744129850181764, -0.10991781070844464, -1.5582267736493964, 0.9509729601966104,
+              0.27383527366630744, -1.0109967848840293, -0.2652951445752292, -0.31773126890169845, 1.9347214689284513,
+              -0.48900940865557896, 1.0348946328564068, 1.6101647718098393, 1.2224869337691553, -0.24528963586677577,
+              -0.8282995437227312, -0.74214677104667, 1.9022077987920571, 1.065511429772795, 0.5557978714258756,
+              -0.8552846035431614, 0.14131421568194025, 1.6415849500887356, -1.0979455229862056, -0.1899406250116744,
+              1.882935380340533, -0.47086245203046495, 1.1173180765349162, -0.38373005169056196, -1.9204322585926832,
+              -1.9947555620271595, 0.25610856180969677, 1.672078838942273, -1.9323275104124873, -0.7955457526088345,
+              0.9709167319971037, -0.6356155194456985, 1.6590519014057472, 0.2576466445092809, 0.06826734181142502,
+              -0.8077944062086111, 1.6116753503934271, -0.360670679232701, 0.665216992360695, -1.3443131827622485,
+              1.004588252277319, 1.4090953833875757, -1.6465558763926547, 1.5390983488230878, -1.3071804786368446,
+              0.6990024492525402, 1.0093027361688671, 1.097146827869869, -1.1912001568366906, 1.630219037886489,
+              0.5744645582929406, 0.6113886454941007, 1.9913520749447864, 0.44093025553359233, 0.08768839917963245,
+              0.8058107685571905, -1.033178784078995, -0.8225351475861098, -1.033391812065922, -1.9518439963887113,
+              -1.4013652343418483, -0.3575891026292508, 0.5391177792156094, -0.589608397581415, -0.8183723923536403,
+              1.2663801923646707, 0.2780137273448853, -0.6282934713194672, -0.3515950748704757, 0.32366450506041744,
+              1.0378037684314076, -1.4606869437722425, -1.1777315953586012, 0.20077305098986375, 1.1132819638088014,
+              -1.3403580753455646, -0.8471624166708764, -0.78670588443378, 0.411075409745707, -1.1567621787661597,
+              -1.644706858279382, -1.6923798602467333, 0.7295344690619672, -1.6826221173466598, -0.5883451091973848,
+              0.401937953067649, -1.9630301043637237, 0.9577847247199625, 1.2919304030896734, 1.225292029861409,
+              1.0377797459888836, -1.4163731758310805, -1.1848812939548496, -0.6833661697787141, 1.8924034115938815,
+              1.0815655745578274, 1.6514910861689147, 0.13638193195368675, -0.7978052236681465, 1.534221720841157,
+              -0.9153275868493269, 1.8162414196916217, -0.18899261449512395, -0.41163974536752157, -1.7273888453908217,
+              -0.2259883780746561, -1.5648477168095223, 1.483239033708478, -1.3942275569974942, -0.47490997296002035,
+              -1.7533267576457146, 1.885960859958221, -0.2403666825901727, 0.5586086137789854, -1.0921597903975728,
+              0.31058170316775424, 1.3277885858021783, 0.7148623072607876, 1.557240774068096, -0.37204592237067047,
+              1.296024489070251, 1.0544578912529046, 1.6854705704653057, 1.511922732706835, -1.7363032773317322,
+              -0.2092964598440208, 0.4481058099152593, -1.6737966633530679, -1.090792800318261, 0.6450021408332125,
+              0.20592552116749374, 0.8156159228622641, -1.568521435345816, -1.9264895113126421, 1.3354691459199177,
+              0.5736596388344699, -1.1328195208816618, -0.9748407260828982, 0.12745284809908775, 0.12755406203831754,
+              -0.402313518792381, -1.2084251106645967, -0.766591500306526, 0.5831135977679764, -1.9502950151238094,
+              0.02368696030780093, 0.9474278651806163, 1.200170490724969, -0.23626565831359603, -0.00155175398078633,
+              -1.4962274409033034, 1.4172670942494623, 1.6336646415640272, 0.9015591359514898, 0.0028804858578190817,
+              1.8517288251219313, -0.5845464836204135, 1.4764716372203344, -1.3353282541360834, 1.2177029719595032,
+              1.990053698845105, -1.5060495488639427, 0.3106662987296689, 0.2870946491789992, -1.0746935708007666,
+              0.895601302779335, -1.5268871259283276, 0.2678390558787376, 1.864513144299627, -1.509732497733654,
+              -1.5904836915336782, 1.677740063904718, -1.5200319590355935, -0.7931901348349131, -0.057131201288623146,
+              -0.5299918958977132, 1.5844048394762984, -0.9074859645216646, -0.6250324408575869, -1.7483344039680153,
+              -1.648406830815511, 1.790525414138699, 1.1087046319299025, 0.42706999037150517, -1.1252987787944102,
+              -1.7926103436944185, -1.661929519642099, 1.6441197132107996, -0.7994467312378033, 0.4924994603048294,
+              -0.08241669477843683, 1.3633290277505736, -1.5418640660707128, 0.05946840692833444, -1.6001798975438088,
+              1.1048132720023895, 1.0870814820606292, 0.5831092120375949, -0.4133915802678638, -0.9142815961432058,
+              1.9472407631770015, -1.4219510012962004, -1.1714172709742634, 1.4079274160525728, -0.34506772652017137,
+              -1.0158502812481522, -0.8168949547547397, -1.1457275079452227, 0.12910364073916902, -0.4662867248454887,
+              -1.9437241965128846, 0.07261938805201762, 1.600763502162561, 1.0777174066413018, -1.2723083195923186,
+              1.3113857387077417, -0.5228664205198017, 0.4450488409424249, -0.5683762553586282, 1.8256298065201282,
+              -0.5555324792306022, -0.5028443495682451, 1.550965251170056, -0.47857481650681066, -1.008285169693293,
+              1.9029801635553145, 0.7739617661198315, 1.8099201835531415, -0.3994817059435505, -0.8127756747385817,
+              1.4033307810724054, -1.359844813448376, -1.3846355261466767, 1.8540201060398234, 1.0970430821179962,
+              1.3778953118217157, -0.6311210216871839, -1.8270928773238353, -0.29073753592093343, 1.063407723752193,
+              0.769348666705115, 1.069807859635052, -0.13297318999054397, -0.7495627942438086, 0.4278305696495597,
+              -1.7534013899377605, 0.20503621122624516, 0.8877917416885026, 1.9219368972107151, -0.7795858126195832,
+              1.8045722365205155, 0.01848994995789255, 1.9822395081707462, 0.5682615557282436, 1.096590333951183,
+              -1.1060317246730396, 1.0869871276155, -1.8681569307257382, 1.9498301468214843, -0.5725242199723457,
+              -0.754441782550737, -0.0400922717249097, 0.010590885689596874, -0.6969977940409491, 0.6620666861327669,
+              -1.5969982725685883, 0.17340909047153819, 0.47755863566996126, -1.6291589696000264, 0.5780359168220688,
+              -0.5173306807336635, -0.9514848124225095, 1.6169705288679577, -0.42893373795490586, -1.1528283547930025,
+              -1.0400955977716713, 1.9827399061918518, -1.327376858845513, 1.6043081593845727, -0.11039938533269034,
+              -0.24997046912904874, 0.47014693724974954, 0.729145170631436, 0.7014015249399357, -0.210704593378912,
+              -0.8898579600723409, 1.127679820439794, 1.379686041589359, -0.781681363239616, 0.2858562618428895,
+              -0.7131287792008063, 1.7878252016142655, -1.0588662101910593, 0.4893786031875278, -0.8406649057821527,
+              -0.1534439859760095, 0.2331374640695536, 0.469582749149386, 0.4137313563589631, -0.9769266749700227,
+              -0.37870419628070984, -0.778681560279427, 0.9076631441596534, 0.9624606550623103, -0.10250544734763523,
+              -0.2518054637298146, -1.8389925418951307, -1.7523906632013846, -1.8251290048632933, -0.49859600328817244,
+              -1.5645964425382966, -0.46692843392327266, 0.3025335867203003, -0.8785897670006184, -1.7537151065566459,
+              1.9360855593038684, 0.03479121265661611, -1.9402430169146303, -0.8981491188900641, -0.9720525655542991,
+              -1.2872361339345169, 1.9657361835316278, -1.9654227152525223, 1.4590349841798576, 1.7417951527725704,
+              -0.7636287264036836, 1.6938802231015364, 0.3969017158868704, -0.9308527980280088, 0.44396078845267084,
+              0.8114974124677037, 0.22323733905690712, -1.157000049324795, -1.2116172131012615, -0.9832275983234169,
+              1.773233656033006, -1.6481062641009663, -1.9471951041445985, 1.1654342998679619, -1.8679076405021187,
+              1.9134708504745168, 1.9270958489182615, -0.4877809076980144, -1.4674512268342745, 0.006115322418878577,
+              1.5523105881305073, 1.008791751555858, 1.7292932521498168, -1.2446660428848375, -1.32058622408507,
+              -1.6942157582592943, 0.9218514004458749, -0.823621328629307, -1.0203195530541063, 0.07206341884947509,
+              1.214451058931207, -0.40454188129729296, 1.0066091638178039, 0.0801907243894604, -1.3250420419558173,
+              1.6740542746900093, 1.1525840942242223, -1.7538751715267296, 1.2289357346449874, 0.44273632243705485,
+              -1.480515264351281, 0.7203216915034076, 1.736757457268701, -1.6126702540429125, 1.3353291202473017,
+              -0.3386246414186953, -0.15824184756675486, -0.9082701908024067, 0.19739090770367174, -0.6056382353292964,
+              -1.5021205949442713, -0.13004055376809376, -1.9680841369920756, -1.0085004366482355, -1.4660753620146698,
+              0.5310372051600734, 0.6252656799282139, 0.28834705715856845, 1.9582690133559009, 1.0284365097891248,
+              1.5791162728965862, 1.5890315798131152, -1.5740074592032895, -1.2346249557276874, 0.09869464843015763,
+              -1.9888659899819219, 1.6245510003031853, -1.8240356816949115, -1.1160775047609688, -0.9717920085031029,
+              0.026054846056297265, -1.5990410562524549, 0.6191498034442082, 1.4318181978005144, 1.1449640789498945,
+              1.435002701727269, -1.8991365043582489, 0.9679929619919578, -1.9806014397574234, 1.4536549482052994,
+              1.2369898149063783, 0.0942097559544548, -0.44988290575276135, 0.6393419762034132, -0.6790983093222227,
+              -0.33932133359722183, -1.3323692893954657, 0.051614649124500644, -1.4850113224086279, -0.6288795685974646,
+              1.6283138375987471, -1.7789341475324125, 0.9641353407036455, -0.8859758584446853, -1.3905521072955613,
+              1.248121111974715, -0.148429098769566, -0.4602995590886545, 1.1003026112521521, -0.6827850627285645,
+              1.9771039126131766, 1.227966495460028, -1.9054616874551513, -0.17250407406937818, -1.5976498513957544,
+              0.04439303257666172, 1.51163887169748, 0.646404644877455, -1.721204116533527, -1.491970732730513,
+              0.2989738629420451, -0.9550137794523614, 1.2025118418310514, 0.17278672215112767, -1.7073672613962216,
+              -0.6869493466963412, 1.4221175269968072, 0.8136078027936655, -0.09457917718456343, -0.6500956499585442,
+              -1.0403599385456666, -1.4197371934035345, 1.2190245805380533, 1.4761477510781154, 0.8005307377799946,
+              1.5249741316932477, 1.0855648961952538, 1.9224829076060752, 0.2869108546051615, -1.5091598215762332,
+              1.4501191712612789, 1.8870984229831107, -1.7302116354922958, -0.3545753940936196, 1.442437478066842,
+              -0.7823996675200249, -1.131407428223576, -0.5013183847734197, -1.4979613863674297, -0.24415013272007524,
+              -0.6627159008601602, -1.0181273908903332, 1.0328362758216585, 1.260250651113478, 1.0138156752044916,
+              0.5371615864333288, 1.8553073783399325, 1.951357678034606, 0.7194607934648083, -1.0589630923826236,
+              -0.9620500497996112, 0.67763129815489, -1.6212754527879456, 0.8824272362591019, -1.8034693359581802,
+              1.7422096047831568, -1.010660238079712, -1.8120346724245788, 0.7326343612924848, 0.8912492318672749,
+              0.669928902828123, 1.5191540472733163, 1.9408210365763843, -0.2675149665538239, -1.5368478010395314,
+              1.1378158248590227, -0.14340274845268297, -1.0420396266499052, 1.4238975359786146, 1.0186548966434374,
+              -0.37302282044451296, -1.6665521929489486, -1.9414179347758749, 1.9845099037831098, 0.25190468995929116,
+              -1.3565033558826896, -1.430084412385555, -1.9049229412063653, 1.221645795300951, -0.5219823891712627,
+              1.9368378730562315, 0.7035479701902823, -0.3754047433995442, -0.14526093695990294, 0.17885379911634125,
+              -1.7453384743240203, 0.8465052490601339, -1.6823293227525946, -0.8161516604165566, 1.8312443096922442,
+              -1.9192510920287678, 0.37723942145518574, 1.725107407940751, 0.21381615826643507, -1.1716001801855649,
+              -1.0611345679669162, -1.9732355910502637, -0.4461777828323239, -0.23052068805350423, 1.218942575723136,
+              -0.22572812681057552, 1.9428383183668947, -0.06997110252961747, 0.2238383470247678, -1.178747770654164,
+              1.289511874981887, -1.9756722906104285, -0.42650188553983703, -0.5494388087356263, 0.6619450518994654,
+              -0.8233262341940337, -0.551580552700945, 0.5817278377322888, 1.8685269618613036, -1.3000953227319512,
+              -1.4283838275880294, 0.5999358561474102, 1.0645958312240245, 0.014353697508937557, 1.2413161019277963,
+              -0.6897291610817131, 1.12297456609934, -1.8432752527139797, 0.9084575027035138, -0.1243916597867818,
+              0.33130042087381195, 0.66895259903393, 1.6557830983974302, 0.08287276029065538, -1.5357530396168295,
+              1.2343785017416087, 0.12902705924095592, -1.3495271524839367, -0.8728580889008626, -0.5244139433889465,
+              -0.3879006179068245, 0.9188768930207276, 0.7793805226934829, -1.4393071368258976, 1.613705376668844,
+              -1.0843958887438374, 1.7124014800074736, -0.9964777990871019, -0.8670020603088275, -1.779081809150778,
+              0.12646904349631516, 1.6571805108463433, 0.12600417813102993, 0.49996900162664826, 1.9525284377831484,
+              -1.9652455426976498, 0.9494015016667543, 0.38443667884960586, -1.7724098362330505, -0.4082684743558227,
+              -1.7879879942659969, 0.08275123875162116, 1.7475525868209036, 0.6792010104118127, 0.039437186277630154,
+              1.2836761397306624, 0.43674745284168726, -0.758347022092078, -1.7870493658991657, -1.7978809807072968,
+              1.0584586913661846, -1.1157899056211305, 1.2701741054323072, 0.4374863843249699, -0.8214325870445185,
+              -0.7728421127264511, -1.5577282427526624, -0.23594830217496732, -0.5391955305244664, -0.5624792843906574,
+              -0.1938002353617021, 0.5223396103139288, 1.538375944848121, -1.9694136326451632, 0.8633772521280756,
+              0.7208433609240901, 1.7666620769537023, -0.43532694305019426, 0.411879568237544, -1.2098790166305973,
+              -0.36423075107954883, 1.8891146830740784, 1.748054742284471, -0.24142532135673545, 0.5927554266793296,
+              -0.9152877032152276, -1.8063969967782656, 0.48207466271184707, 1.213706670479203, 1.6604107491663145,
+              1.7558340275933135, -1.7932219074543312, 1.9856023833866967, 0.4913618217526574, 0.6789406744471576,
+              -1.2508048490603496, -1.3830358311750315, 1.217800508245622, -1.363187904834711, -1.9078698738816984,
+              -0.28177773798420525, 0.5582837083561785, -1.3049002248929513, 0.08913900413869147, 1.6980987887729269,
+              -1.1912978387544912, -0.8931915280846656, -0.18855808865450108, -0.45803053755017054, -0.8712638141863449,
+              1.7094200997790034, -1.663338331843227, 1.2015315071432422, 1.7919441084478605, 0.31409629120692095,
+              -0.2597694212249051, -0.9168044364289045, 1.216621830723616, -1.6662413518480443, -0.8080880254762635,
+              1.4073145445825483, 0.545639136243703, 1.4013255263406696, -0.5546097014600111, 1.0974522886316729,
+              1.5316488607435108, 1.834526451907207, -1.9144935270991548, -0.7645822781961176, 0.06654825465795611,
+              0.33190077251492234, 1.5544507881908975, -0.3538304760036741, -0.9572664339018262, 0.9528358413200575,
+              -1.6745715540252952, 0.507318989134915, -0.20165453900363595, -0.785857858404909, 0.8550126703052818,
+              -0.0012624172438071568, -0.9598028440876272, 1.2244820355277088, 0.2255077411064299, 1.8056720264962882,
+              0.037756919076751494, -1.343540974005494, -0.5094599890534655, -1.9469287581463544, -1.9209187756131971,
+              -1.9807700974543714, 1.6311835876110612, 1.3993893579005068, -0.43399651753379054, 0.5299120477626555,
+              -1.9670191857782173, -0.5248122259552108, 1.7175353497229144, 1.7289010829074938, 0.5729177740933844,
+              1.5352457320381498, 0.4553403180302915, 0.29793068106143483, -1.5797876923914638, -0.40357306624290423,
+              1.265178047524694, 1.0072829549062359, -0.1769350094041604, 0.6540198866379798, -0.568023907344485,
+              -0.9385372669098198, -0.9547551373309231, 0.9690083247728642, -0.49448270214331114, -0.23248401423341658,
+              0.8559950134661438, 1.5269704580997994, -0.6731018952946073, 1.4821498012885321, -0.086653738716163,
+              1.5513273624777053, 0.4908490299053412, 0.608900001002695, 0.9347951454225933, -1.8764423825502883,
+              -0.5383639900183281, -0.9057735867914714, -0.4471390378994089, -0.9345110033957944, -1.2485918565222498,
+              1.3231857428964897, 0.14498731120726926, 1.2862055594556612, 1.3345878394286323, -1.2756222907563037,
+              -1.3083824064683531, 1.5045780173000916, -0.3988401099234551, -1.8345796826462557, 0.9521512093444242,
+              1.37476939261499, 1.7326371780327987, -1.1085753439494548, -1.9216876869475268, 0.32444794625794593,
+              -0.35462190108024316, -1.6069584401996142, -0.6840247431355166, -0.6774334428214628, -0.6167820243082769,
+              -0.7218673863781015, 0.4950955340603276, -1.9104643839392264, -1.8658682955390713, 1.6439842658028825,
+              -1.9665747219603489, 0.877772486257161, -1.447763157358751, -1.602252241120052, 0.44016487530366266,
+              1.010332237856809, 1.1627057025546774, 1.3589381505180222, 1.6636605093405308, -0.30457004904529494,
+              1.2269612708937165, 0.6454558582632632, -0.3281883947244264, 0.8131649340163314, -1.0598316735137594,
+              -0.42299178838843243, 1.2938482948248247, -1.9502364601006974, 0.33584798243720115, 1.960552892008372,
+              -1.5357606602986342, 0.7325579192102154, -1.7068807453175427, -0.27946385324092926, -1.8639767385643475,
+              0.026428176272640158, 1.8041446105579393, -1.9753854510739615, -1.2061334845302873, 0.7372916274629793,
+              1.2179795802972748, -1.3143781251446196, 0.682430888983494, 1.765010566861033, 0.7860745956875865,
+              1.5788062358182353, -1.575242377037637, -1.6479554611466636, 1.2726425469891218, 1.3214340571366705,
+              -1.1176914021551418, -0.28104090150176475, -0.8644022366981297, -1.0077418811855443, 0.015316356685453059,
+              -1.8951979061169633, -0.5196653139834044, 0.8125953302477926, -1.823298987094664, -1.772714760583118,
+              -0.42385130914514946, 0.8535556140476261, -0.21002977910714105, -1.0489096773038966, 1.693320576211045,
+              -0.7552501574944035, -1.70051398713119, 1.7993076735058722, -1.5431556753936784, -0.44776937868310007,
+              -0.0034131973262736537, -0.08787851222156462, 0.1325414438300383, -1.9039783696720445, 0.9518867483514475,
+              -0.9603800917394327, -0.0648448360480609, 0.5218985531710549, -1.6635189965029777, 1.9924108002877654,
+              -0.26339506136937363, 1.1928641143061727, 0.3810845521550812, 1.1123779174325765, 1.2834151041594684,
+              1.7494397068262924, 0.7406099190962179, -1.0134737497144135, 1.055889762638893, -0.8394462038526926,
+              -0.28477578596133046, -1.3393678408508727, -0.012496365797505682, -0.03748743063321758,
+              -0.09254003926345167, -0.19080760280617248, 0.34473162850229677, 1.3043786112928455, 0.33958283219176444,
+              -1.0099885582662163, 1.5669995819775036, -0.2714415980354179, 1.3346740594279858, 0.602475697679103,
+              -0.27309507620431717, -0.4346043241348738, 1.12017383631626, -1.6915194091713959, -1.1923394655016413,
+              -1.8515166462293298, -0.591963112025546, 0.5023793945680568, -0.4921032884548957, -0.4819170803548376,
+              -1.4966363280675248, -1.421677402946293, -1.6850285211377178, -1.9944566759402251, 0.23292198758882154,
+              0.8553269921007791, -1.948229679796313, -0.13143026366451505, -1.6538966244541689, -0.9216045649630527,
+              1.7264765451863706, 1.8222181199058705, 1.9306597140261852, -0.42071370405153363, -0.994060577839833,
+              1.3461298706581069, 0.6155835113186514, -0.4233898713404436, 1.0628290316641253, -1.882099316617622,
+              1.0589656835123407, -1.1563854484115064, 0.06709310867074869, 0.6384730812451886, 0.10574897066907951,
+              -1.702659493082189, 1.532461866391916, -1.4099555819463054, 0.9274818832867693, 1.5037867997803227,
+              0.7264189517035895, 1.739697756396633, -0.6088742760292636, -1.6856099921156718, -1.772602909046058,
+              0.3867733521889116, 0.43414404525556716, -1.4477323883785695, -0.5550413636141851, -1.0684298188676138,
+              0.49963854275697717, -0.6428666361617692, -1.2461600431717166, 0.139396634056558, -0.8713484091078527,
+              0.9279324928307542, -0.860886224082325, 0.6052214303020085, -0.07407732844211701, 0.8385159528403303,
+              -1.2883611258333367, -1.594397303034099, -1.288952302189137, -1.5793499078181599, -0.6082222788183742,
+              0.35912648776686495, -0.607260423160894, -1.253926812856471, 0.5102013763540754, -1.6947762434136582,
+              -1.675889179914285, -1.7314605301275563, 1.5448045270744153, -0.6686717741699395, -0.5356827185210964,
+              0.12358215343481938, -0.6340124361185859, -0.2817760753415781, 1.164433875881067, -1.0118173150641265,
+              1.8268866563720367, 1.7413521495755342, -1.3276318599091654, -1.7238317321272323, -1.7921370418547173,
+              0.3558056543449375, 1.6918022479955095, 0.9222053317838856, -0.052028097382029515, 0.5122787494307435,
+              -0.32626022494247575, 0.15032399126484997, 0.8080660425425252, 0.8796007677651145, 1.6881141214849356,
+              -1.9923518519205325, 1.1594510791129853, 1.8780756936993601, -1.7055973395367428, 0.7022611016052407,
+              -1.2390916075946476, 1.489326601979406, 0.09129782431581734, -1.0503781321438632, -1.830361985060848,
+              -1.6234239674810462, -1.9400691667730579, -1.775525052900898, 0.49354389704319335, -0.294835573216341,
+              1.6593428114830902, 1.1178351651841174, 1.392043569467992, -0.23893431171828805, -1.4623844403945752,
+              -1.5343016105239773, 1.1865854046143252, -0.030035182509464242, -0.23319636854325143, 0.16118837392623053,
+              -1.4249606736799842, -0.10348851980420637, -0.512221808349385, 1.3638877591460936, 1.6510345373891697,
+              0.5403817230171999, -1.7568167360776457, 1.582586423119313, 1.8459420743954364, -0.0937741201677813,
+              -0.07681514921378785, -1.6485771749332665, 1.405045845579937, -1.380639120572158, 1.3920918243796292,
+              1.8133826455276356, -1.6212352715972855, -1.6619339109060558, 0.5860174449496753, 0.8058352539157623,
+              1.2357143629814047, 0.22041032204430522, 0.7808171688721632, -1.8606664108725086, 1.8543721730560465,
+              -1.2809372837528121, 1.8923388155485306, 0.7298982025806318, 0.08659163513354162, -1.7687987574607682,
+              -0.4763155379116144, -1.5473458996242258, 1.5866611468929568, -1.5386305359062193, 0.2081267635605304,
+              -1.381273405047125, 0.29199706105659384, 0.5721917482420489, -1.6925391452117218, 0.7916231259738984,
+              -1.4231755623012, 0.3405385083200416, 0.12077341714245371, -1.203092512782769, 0.9840212059218176,
+              0.8659573692212614, -1.7404045592057091, -1.200177869243232, 1.9929041194760506, 1.6176296085808595,
+              1.15438166620096, 0.41001019693662766, 1.545005651638757, 1.4880131034252333, 0.6483595755736316,
+              1.9934284869908785, -0.4425968131492626, 1.1431733762266791, -1.5542365498656396, -1.4707761026218984,
+              1.6864275163379014, 1.8816548331946308, -1.896526297348939, -1.2767368243378723, 1.7876805565314937,
+              -1.9714294114028945, -0.4825746666154789, -1.7490480229694754, -1.6790035262680618, -1.89004754358142,
+              0.37907912536042243, 1.2624866059593396, 0.10773840252143074, -0.6872687299448277, 1.9925905119168483,
+              1.6613584791100244, 1.3497294256582544, 0.14770827679848963, -0.386793508812298, 1.6398868686034165,
+              -0.41758827391224607, 1.7067458043950365, 1.0278221550032498, 1.7159317753776575, 0.5096805329124043,
+              0.9834561119760092, 0.09498575572277979, 1.9951803301511655, -0.378982487863734, -0.8263817670300764,
+              -1.0229038853801748, 0.3913502332282688, -0.6996774883339203, 1.8060537458340287, -1.5307723308680314,
+              1.8274255271921316, 0.3374273766186473, 0.504303338314112, -1.6080869793956403, -1.158919549492441,
+              1.5813373512787257, 0.9432537197048445, 0.7842306706160116, 1.3546056018625912, -0.9933122900788947,
+              1.0300108626811424, -1.8830535281855107, 1.2388444130694714, 1.5820432556020423, 1.7375339367053177,
+              1.6102618826367072, -0.8665078865132916, -0.6732392396345555, -0.6529773281138276, 0.10547233393450206,
+              -0.6941730912707627, 0.910541741327461, 0.9098240508320066, -1.6363195940333455, 0.5463776877988993,
+              -0.33425194180314666, 1.0461836058570837, -0.9752798490633703, 0.8152462082002794, -1.5875859561094057,
+              -1.9077898642385165, -0.6579383417965277, -1.3678615233634694, 1.285752623992095, -1.258025248987475,
+              -1.734869037268262, 1.0429220147507374, 0.7326509890514723, 1.9986880192758498, 0.6509156089882868,
+              1.3547313230945077, -0.5885137816765393, 0.17851497147549722, -0.48206201174941743, 0.7757155223228231,
+              1.5797039076642765, -1.8365491055202483, -0.5557915214323543, 0.8098649490633258, -0.9482838270042748,
+              1.7240038491867988, -1.727136737786945, 1.504375516257281, 1.8508968488624733, -1.781048252815335,
+              0.29406576456043076, 1.3929052384310125, -0.5672101671346796, 1.1612683698036195, 1.0365664134132775,
+              0.52678187508665, 0.3071827595777501, 0.7041562182045773, -1.1798720159212701, -0.3576278976339484,
+              -1.816947298419736, 1.1348446198592024, -1.5847397567297339, 0.3604508734069167, 0.46593859852538255,
+              -1.7777395155730495, 1.6287619990007203, 0.05390138452125459, -1.6910054822794427, -1.1955797005332123,
+              0.1527150449913588, 0.7396002223852989, -0.18108018677808335, 1.5581070045362102, -1.8797418765958547,
+              0.8650557162994215, -1.611948653627663, 1.1857525788641103, -1.0890741767763679, -0.7623497273737962,
+              -0.5452689372225361, 1.4851221090925062, -1.5985252342075071, 0.020384445673001572, -0.9046335836897486,
+              0.553216643980825, -1.9081035245013105, 1.1735365048310928, 0.1195026810678641, -0.13729942676911122,
+              -1.5259805046535062, 0.18920225926555112, 0.6705947977532496, 1.0030469424172317, 0.7610916391668443,
+              -1.2463291870058537, -1.0249840040963818, -0.19173741736380734, -0.6665958901111466, 0.4839557261866174,
+              -0.3485528787244352, -1.5242944187930236, -0.14435052683644933, 1.4529331704829067, 0.6458683077621643,
+              -0.20459459850901585, 0.631062246518785, -0.8791486672647375, -1.2386308323664368, -0.7619627858914138,
+              1.2107017989790583, -0.33473019147897354, -1.9130923107708755, -1.7056520026340571, 0.8880970113501672,
+              1.3775061802643274, 1.6842143744397662, 0.8956663807740703, -0.14207737810877585, 1.15151215723502,
+              1.9554721163330937, -0.7192433376047749, 1.9055716193191357, -1.7324310669463001, -0.9426609664206191,
+              1.4304692916943642, -1.4208060554924176, -1.6703510665846304, -0.11789403764850537, 1.1035841327225944,
+              1.19193146120024, -1.735646676252749, 1.9517684584286812, 1.800446091627931, 1.2643875465217338,
+              -1.2016658470474093, 1.096389364153712, 1.5201049232112753, 0.30449137858893494, -1.1869295719311896,
+              -0.8315768582782672, 0.4346498942421677, 1.8028550790017723, 1.8857086005150672, 1.1520127103731062,
+              -0.1519769293665103, -1.9810749314865586, 0.6722940736723881, -0.8875375147759952, -0.7891504144449426,
+              -1.6946353432210914, -0.4359020089062122, -0.6980599672246557, -0.07982954815920884, 0.19084722828248868,
+              0.6845828835105898, 1.361502619024912, -0.6198314937755827, 0.43206457200540616, -0.4275542957416594,
+              -0.5942951907386576, -1.2680930727916406, 0.8768595741043272, -1.621734829642608, -0.5341763533991353,
+              0.5480403433778003, 0.04432939753449716, 0.6820566847717622, 1.9586624282689744, 0.32087743739982866,
+              -0.6031550453761696, -1.9650369588045296, -0.01282494430097092, 0.39651817911017506, -1.3672271702505698,
+              0.5253796884817046, -0.9414239928629815, -0.8381706914260612, -1.9383642756327601, -1.035705422287875,
+              -1.206785655560016, 1.2381881481085397, 1.2976332585562043, 1.6406249791463123, 0.10308205600753784,
+              -0.5475238811744738, 0.29899086302238675, -0.8482038855976217, -0.5137345687600776, -0.9065955517315878,
+              1.3104099207076656, 0.5025101714926583, -0.19511985691542133, -1.7979503287642702, -1.902372134245554,
+              -0.8024648307653406, -0.8296523085064926, -0.9765719040493881, -0.32443266986702035, 0.6539733476893286,
+              0.5973369569721942, 0.9620679364522129, -1.1326092851006697, -0.5203786703966227, -1.9445684352154542,
+              1.120817472266788, -0.7832944208518358, 1.7728569415904172, 0.4746637992232019, -0.6331056497466134,
+              -0.9560458440421451, 0.774229832283905, 0.9430691283749084, 1.7415084291941643, 1.7617290689369067,
+              -1.241891773067345, -0.4745318258222131, 0.170886449126332, 0.280332317095926, 1.797798588989031,
+              1.4955462166754243, -1.9714454758512092, -1.0474834990256507, -1.4515647324997412, 0.5231202814417619,
+              -0.8247619693573318, 0.842919528017017, 0.3442132040114725, -0.7990310505752118, -0.3629900475529535,
+              0.3295033891840484, 1.7075620006843595, -1.7886707971887938, -0.4229652804748518, -1.2955282842854743,
+              -1.178898100821427, 1.0539302135104265, 0.07818911036924803, -1.572166269506143, -0.9256373147747796,
+              -0.4523743452766471, 0.870552332090627, -0.14850807893624118, 0.4287510203485754, 0.7234083474785322,
+              -1.8786486629917283, -1.3259155700503378, -0.347806801338562, 1.3631756709064557, -1.8448244492904076,
+              1.0352554853447788, 1.2734756957434143, -1.8995503975543802, 1.3929138458956896, 0.4683038046394792,
+              -0.2679051570033124, 0.8144995104717001, 1.4932193857012948, -1.6953990593425603, 1.402756253554589,
+              0.3808795564720828, 0.8990244719837941, 0.40911995577962923, -1.1361078826263524, 0.17385823702487802,
+              -0.8186745793721117, 1.7076078317166345, -1.236408907822983, 1.6848698673082323, 1.1057305891182168,
+              0.684172881489129, 0.6339043483376958, -0.5196970144327304, 0.3521577727883862, 1.650116506377742,
+              0.2682799961840745, -0.4735884200746394, -0.37060613769970896, 1.1147917782653982, 1.7887865852421685,
+              1.6978458641851324, 0.27152326646567104, -0.7479948142297932, -1.2016639963842959, 0.49444674806749767,
+              -1.3781311297932053, 1.6512147519128018, 1.7416186949120336, 0.045136221165612334, 0.5209559439848652,
+              1.259816622507529, -1.4608419771090535, 0.044609757110229076, 0.9224148305655282, 1.001777160557932,
+              0.6923994259395219, -1.7332027007479764, 0.1740919172843176, -1.6691024022127667, 0.020765839823362775,
+              -0.5411621161873468, -1.9512100832757255, 1.2205713100210618, -1.1974515945322022, -1.8219003860967407,
+              -0.20156369520917128, 0.8953615730514599, -0.6851191939445931, -0.9784599914417944, 0.6357787809646211,
+              1.2543221592701617, 1.154804559643825, -1.3175939582140597, -0.6021071761808416, -1.3594268073464137,
+              1.445454919786025, 1.6373224793127816, 0.4773095450916047, -0.5842274784088719, -0.7097055492598594,
+              1.221870791596726, 0.5841175201744218, -0.6088866249685312, -1.0239540299091434, -1.5776221817975893,
+              -1.0528530592443435, -1.7599610522525682, -0.009359693820525372, -1.5849216381687343, 1.1123817614821956,
+              0.48090947307860965, 0.5755896514757204, -0.49340183569225626, 0.38999288587119985, 1.9076108980193105,
+              1.7293269863481244, -1.3443206598919062, 0.001652373471082491, -0.3612982015066102, -1.666179661360145,
+              -1.5652606963428637, 1.2847799597537284, -1.8746367656966019, -0.5051948920106453, 1.480902450916811,
+              -0.6574579095355038, 0.41299775479925227, 0.0038296277371880905, -0.49511696025555096,
+              0.33076498894339057, 0.07531759452269782, 0.3925650285564686, 0.04553443036132965, -1.2835239354793773,
+              1.022597041467466, -0.14452142564653414, -0.6834698246070756, -0.26296934920727555, -0.1553100097881268,
+              -0.7486942835272403, 1.6436947866485108, -1.453108500403955, -1.142814165110103, 0.8561776338025142,
+              0.9114113939243937, -1.6289171667242108, -0.8765736035046707, -1.3197920400322358, -0.3734912789205094,
+              1.5370462570492807, 1.0649239894263536, 0.8535930082816217, -1.6961129271355224, 1.4179270256764838,
+              1.4041314424292972, -0.0700614584636794, -0.5005397839840926, 0.23183520631057597, -0.10990588024738823,
+              -1.147089599229715, 1.8964022235002318, 0.03743472262799674, 0.15742108441832148, -0.7185738786020632,
+              -1.1856642140796065, -1.044955084073341, 0.6700862122512099, -1.6025569167524276, 0.5441522921772082,
+              1.756702564317389, 0.1676615923964384, 1.9361822628230643, 1.7064197592361863, 1.6880082988048626,
+              1.4936942432907419, -0.5799520435760606, 0.990660775656627, -1.9229488942439295, 0.3155933315458741,
+              0.27140426916735727, -0.5632551338077167, -1.4590940918768025, 1.5983827694310806, -1.4330645192158764,
+              0.8003275119389475, -1.9841806470403869, -1.8395944973457397, -1.5552105471701392, 1.6842460594625583,
+              -1.7017713102134175, -1.3427034266215978, 0.6775219782398878, -0.3351302524035349, -0.022448660326078063,
+              -1.3500598715209335, -0.5202469644373027, 1.921619202644619, 1.3872706825409837, 1.396529188979435,
+              0.07083965303226236, 0.6914877624468483, -0.29224764274698156, -1.751452537465684, 1.0064318828131888,
+              1.5123878406285325, 0.4033025380645654, -0.9820341018265148, 1.7861648401668955, -1.0181815370408254,
+              -1.903647522119213, -0.6561248619205413, -0.8699254533593299, 1.7173299560789754, -1.9229485367712744,
+              -0.690953419069066, 1.2737422710672446, 1.1066566790733132, 0.8606336717720096, 0.8621209988717888,
+              -0.10083318929855434, 1.3453155024821513, 0.10215147045998396, 1.3751499812523518, -1.2192061255665605,
+              -1.0236206808587962, -1.533123935767816, 0.15060837984637665, 0.20593075721578913, -0.1420114293640644,
+              0.17633888228692118, -0.5175320445499185, 1.3087124943884705, 1.8324200965520019, -0.4738758668846961,
+              0.5584879952099433, 1.9477363800804062, -1.8628690990218209, 1.42344054481267, 1.9288039965215793,
+              -1.8566049760751664, -1.2884135845361993, -1.19038243691399, 0.7683237483024614, 0.4802985717653341,
+              1.4329557197969898, 0.9128985393650337, 0.9461407015680665, -1.65078779921165, 0.4192615449543542,
+              0.32096438114763437, -0.5129450234494479, 0.631644434863107, -0.47335834996781756, 1.3100891597589213,
+              -1.4790022015467565, 1.725986057822876, 0.8701026300053698, -0.6819596038865807, 0.4664857353751577,
+              -0.7112013772427535, 0.12572492597099938, -1.7769733290901675, -1.6440093209585278, 1.125533492349331,
+              0.19218031047016204, 0.02403105386135085, -1.7090825450471296, -1.312565935939367, 0.03858791868454148,
+              -1.2641036387182139, 0.904155292993349, 1.3561478854255968, -1.539635445223043, 1.9235783529452943,
+              -1.874310049415433, -1.7773733191470518, 0.11118483759793829, -1.0886413092825036
+            ],
+            "dims": [1, 1, 2560],
+            "type": "float32"
+          },
+          {
+            "data": [
+              -1.0123639551600512, -0.1262791332695521, -0.5528788189121379, -0.9891722578914903, -0.8541177111117033,
+              -1.842327110585285, -0.8753504664996115, 1.645494290881846, 0.12876899266900654, 0.5739158499727086,
+              1.1027206966256946, -1.3155458981065662, -1.1433211051679475, 1.4367855916029315, 1.4674402192278242,
+              -1.3373231059554618, 0.8170172046647917, 1.1074697240531268, -1.6004007249577086, 1.4644646696571568,
+              -1.827927680383385, 1.4548611857965401, -0.8614990138298273, 1.6706016048131325, 1.5096827794979042,
+              -0.4651782953949448, -0.8577219028996828, -1.299490913831483, -1.339145989117756, -1.0017632367283333,
+              -1.3586772742922255, -1.8799261724983776, 0.059417093938870735, -0.6646734157727456, 0.5388638764003799,
+              -0.6378909942726629, -1.1647516486356562, -0.058721485739401835, -1.814477796499844, -1.189167849669282,
+              -0.6380012350722168, 0.5285662507696332, -0.534701982091482, 0.5570990437739303, -1.755585696977759,
+              1.27238726136709, -1.9571057028071568, 0.04195657651183726, -1.9047024137637942, -0.5116506294039525,
+              0.9926189908254566, 1.7759871807627992, -1.301591689492625, -0.9524123108659142, 0.524043944088068,
+              0.5401946307447156, 1.2036398911372181, 0.3219194319137575, 1.9433711281899884, 0.33648919584354076,
+              0.9772519308773946, -0.5575502080369272, -0.7345410843307336, 0.5778449333097511, -1.9408006240005902,
+              1.9819202164371932, 0.8468700855540847, -1.3899691404202503, 0.15850835128301544, 0.49059781858603113,
+              1.7764286491001, 1.7946165130578535, -1.16168298050607, 1.032789240397304, -0.7706982026329863,
+              -1.1038032957670127, 1.1838096309519006, -1.6323318982074788, 1.1064057844588042, 0.391546384023683,
+              0.8726810963884386, -0.28563916045025906, -1.960002018275822, -1.3867181381833626, 1.921900210546779,
+              0.23545042298506935, 1.84976268271061, 1.0525315891685612, -0.7472942377034979, 0.7577710804753881,
+              0.3083793892222504, 1.327301485825914, -1.5659874146221533, -0.8978311834083046, -0.62907789098844,
+              0.4870403076019034, 0.630783738162723, -1.9216326727288857, 0.9012236985202584, -0.7852645198565309,
+              -0.6937624671663629, -0.17653350051115613, -0.5561473457869717, 0.6698782481609369, 0.23362849702887267,
+              -1.2054404784680655, 0.7698259603739315, -1.1714183267177214, 1.1184512448333477, -1.6690588449303805,
+              0.9147154841361029, -0.8955722282828678, 0.5231382201829353, -1.0773616491852902, -1.8410207920577744,
+              -1.1337885739036437, 0.5219438601865445, -0.5731516829203676, 1.5757208446602124, -1.8470713081756802,
+              -0.14985389360149082, -1.9692981274100303, -0.9532002408436595, -0.9539842568916299, 0.17378022347200517,
+              -0.41623035688267596, 1.6481595173848254, -1.026339675363599, -0.9699532421892103, 1.9461597162595536,
+              1.3640648912196438, 1.1391500889364954, -0.5595329932725566, 0.43069270118926184, 0.9216291855227485,
+              -1.440051073691266, -1.47987236009598, 0.9087155921703598, 1.2787682017568338, 1.2363303394454128,
+              -0.49711585260182556, -0.9387884495809562, -0.19011148798368716, -0.08611760612433539, 0.8656095244085549,
+              -1.4316244544821588, -1.558915825119862, -0.6738541489070196, 0.7358531504667214, -1.5716157542957179,
+              -0.3210549969144969, -0.20072745672949832, -0.2416365577548918, 1.5081023632879136, -1.547604775100064,
+              -0.6334244403609635, 0.14810360033380032, -0.7978288773497635, 0.6204344672116999, 0.4773642826492761,
+              1.2087249539596163, -0.6643075818626354, -0.4170560884596144, -1.6192321024457321, 0.7844847722786517,
+              -0.629690651133866, -0.7380758723814482, 0.303414620658204, 1.5479875220490822, -1.198103302774089,
+              0.9760982659095188, -0.7574001500859886, -1.2724614749813545, 1.0658176639069543, -0.8843666652730136,
+              -0.6427064732600343, -1.4416669867869603, 1.473657450100351, -0.8344994942004691, -1.4224435385472942,
+              -1.0338023533751777, -1.933568422908838, -1.0802998481520287, -0.38091309180010224, 1.6199945117010506,
+              -1.702101910236685, -1.4725385504086255, -1.413591341417039, 0.540278745507603, 0.3517718642795238,
+              -1.590795907883174, -1.765499823368284, 1.7366923492439614, 0.4582221558773192, -0.10581682008337268,
+              -0.18516227544796227, -0.21097779387988158, -0.1428735544745896, -1.97510241493318, 0.32449001731988947,
+              -0.1832218746003349, 0.26181337286546746, -1.1227369967165552, 0.35351574098454375, -0.21205956319428498,
+              -1.3866497212089195, 0.5946688412485415, -0.3417425538750871, -0.33633058083047906, -1.7852940300594273,
+              1.9919312461265548, -0.7629882135863388, 1.4620310920385196, 1.1115061446942711, -0.9057539166302142,
+              1.775862903430335, -1.1324751374031425, -1.5851970376790732, 0.9843604936500894, 1.5734177841900427,
+              0.9515914445205862, 0.034323622285483246, 0.8075573695504703, 0.5332420240003275, 0.7767308358623826,
+              1.0329131214994085, -0.9838298807872725, -1.7429868813963063, 0.03740922197745089, -1.3794671490283932,
+              0.9772124799843054, 1.6546060756751624, -1.345806362676182, -1.620585515255308, 1.3498272448019941,
+              -0.25283974040314394, 1.8309785362540882, 0.8336568766196351, -1.407378961144727, -0.8870392599067882,
+              1.5455801491463914, 1.1404840595611354, 1.9778865957841072, 1.9026326233043243, -1.2919286899267508,
+              1.5194536255103763, -0.40024201189426734, -0.38767200629106124, 0.37883119550528654, -0.8971148848399899,
+              1.1472506966060552, 1.6769048658537242, 0.39834963390946676, -0.8584979863189526, 1.4851684858856853,
+              -1.9898922021489387, 1.7271323520062838, 1.8848497191146922, 1.7439889790675318, 0.5311134881425099,
+              0.2302960173495876, -1.6217864910988453, 0.28492260856667784, 1.3550969896689358, 1.6762026245924515,
+              0.45464402605973575, 1.8447468286497628, 1.7489125819896838, -0.7745650567526248, 0.6473255323813127,
+              1.88270574713889, -1.4231865592800421, 1.406236181817195, -0.05820536366515672, -1.9830176146937077,
+              -0.927096735728453, -1.37521200952383, 1.6293084827507869, 0.18916714867483186, -0.3559388864834574,
+              -0.0626685044384443, 1.4510888124049117, 0.00665671994549033, 0.5852250937009087, -1.964947150735517,
+              1.086994355276114, -1.5545621604146378, -0.6702039017668291, 0.15273130009205538, -1.354848404243989,
+              0.8081822753111396, -0.2990136329330131, -0.1334268300545549, 1.2936295445817017, -0.5276761138383153,
+              0.06209853112125252, -0.35227980331045927, -0.6683541541821878, 1.5365781152706175, -0.5227637702649135,
+              -0.43751245897261537, 0.39166051967309556, 0.6145502882685348, 0.6764920150128493, -0.46478346293163764,
+              0.40093484640123567, -1.4385605602950564, 1.5318810200296449, -0.7902920012169599, -0.22815329205907098,
+              1.5159148518766017, 1.7440445423086697, -0.7705868478778743, -1.0446035338845894, 1.4407728607631372,
+              -1.7690868678646723, -1.9956594357087072, 0.9165504950260104, 1.1647979922386025, 1.7626373785022524,
+              -0.3262003585763962, -0.7291462643423232, 1.2691368673965409, 0.9833027096614515, 0.7052758987187504,
+              -1.4080008451270958, 0.2004861907693547, 1.92413536100345, 1.8633978379666134, -1.5597901041000588,
+              -1.232525418601906, -1.9326471509575835, -0.23851047841947803, -1.745957663852197, -1.027455630245683,
+              1.7842373183009093, 0.7098705198166604, -1.3523419086313861, 0.2493915779920206, 0.5836072040016118,
+              0.8452857075528275, -0.6044200471234227, 1.335947146234287, 1.9535634253874816, 1.8737477649440653,
+              1.6787256628480751, 1.3475059469256392, -0.9023420902836907, -1.815324493360138, -1.7487338231501415,
+              -0.08107787176718784, 1.178869071718574, 0.5869021791922719, 0.1289991861916615, 0.9871714466975163,
+              -0.7828180891664971, -0.9162265218952319, 0.7883323334301799, -0.7738825207321494, 1.0578051800827781,
+              -0.48483804389576335, -1.7003938250158095, -1.7474401518911815, -0.6024807198720463, 1.470072074418848,
+              -0.809852698248462, -1.8087803758512981, 1.1275613461510172, 0.9110052554791794, -1.4827836388852713,
+              -1.3641845213240744, -1.9188108209559402, 0.8859208024949954, 1.7438050845669144, 0.14476912919518536,
+              0.6121128834023981, -1.7692670213619586, 0.023661688752081744, 1.1007625036098432, 1.1758330104763122,
+              0.4546664062325476, 0.022499008403786824, 0.8120850018523171, -0.7886059301759083, 0.8107426171843777,
+              0.015751759753425354, 1.9515003227015306, -0.3285612629290764, 1.758730602588753, -1.9178063288185045,
+              -1.3319225925070368, 0.5970900239608552, 1.8634221473873263, -0.7483844730402502, 0.0851383845623852,
+              -0.10037389959678844, 1.8601880295663413, -0.5358906627108242, 1.5311027975069011, -0.7567148434480719,
+              1.7810484758849983, 1.5004941791198378, 1.238866744077014, 0.019238796977725237, 0.7314924609545477,
+              -0.6404106749076366, -0.30544348502988683, -0.7754562102568752, -1.1903829239480253, -0.7557972926946839,
+              1.418804956107497, 1.6841275666684883, -0.35403092145419013, 0.3072276436064163, 0.4941160183076647,
+              -0.010460638654985033, -0.7496577784263767, -0.05957826320949966, -0.5349743628709929,
+              0.44780861823397355, -1.9548584156880642, -0.6834407857845042, -0.6574778495500677, -0.2568872307434864,
+              -0.8179424074332058, -1.9399599284052886, -1.7438777236599172, -1.9046697213047699, 0.33576417528481173,
+              1.0390831565369494, -1.2867357835981865, -0.9105779330773034, -1.2600968940701254, 1.7546113033912878,
+              0.8638193166816803, 1.915934439034265, -0.18936860893703056, 1.6490561383957179, -1.7404200826424407,
+              0.15942118157817387, 1.174512061322961, 1.087287672904493, 1.182852158431765, -0.12430741231511089,
+              1.6711861366379157, 0.7124940145742862, -0.3946246470773911, -0.7754542725640272, 0.9539784330716907,
+              -0.5716889107776746, 1.4262896723570924, 1.4675456840569163, -1.7077488525524833, -1.6888666810589683,
+              1.2108429896458865, -0.30524840414522547, -0.18167408305726607, -0.2569749511019337, 1.2912167486614727,
+              0.6208472747047127, 0.9472464500515958, -1.1302136544927563, -1.478282134349313, 0.4848945322578242,
+              0.8298435424742152, 1.6932133553283863, -1.4458048451455756, 1.4088139925156833, 0.505348371415975,
+              -0.21105001864112882, -0.04858175142791943, 1.3570555900503694, 1.2673714070205957, 1.1469844853077413,
+              1.2000011591622064, -1.0780533577358637, 0.37698814259115565, 0.6997609434842227, -0.7604196150995675,
+              1.8410835681246196, -1.6836663805912915, -1.6352482015283725, -0.4811456756273813, 1.3045848106454878,
+              -0.823583139102726, -0.646527859067727, 1.5092372843244393, -1.0424659042584983, -1.9448695809676995,
+              0.2678845821239566, -0.6194354091338141, -0.3172475643478627, 0.16481577119936563, -1.1026554846901258,
+              -0.8352503899270465, -1.8149755146432849, 0.4839677208020152, -1.9367901959501284, 0.8680859459275245,
+              -1.2035834761537227, -0.8748603808576707, -1.8417628093555134, -1.0429294120821577, -0.2520578638761588,
+              1.7833216800296539, 1.4367696159460968, 1.8669976567111535, -1.405562858069989, 1.0576377264778563,
+              -0.4569713929987014, 1.3255011842556819, 1.6171166029195225, 1.0403552739195874, 1.2264321768656314,
+              -0.47396544132443275, 0.7118492170263346, 0.6260191876547241, -1.2179712214091793, 1.5120789908822676,
+              -1.657525645319189, -1.2991286032659461, 0.22202239748400387, -0.5389051448124622, -1.594992260705033,
+              -0.0487688918807363, 1.1512759563478916, 1.4679486318272383, 0.8813613284468369, 1.4328674044139706,
+              1.9999268579039367, -0.47950159568339323, -1.2281571927849866, -0.4554451947054856, -0.15429012922622043,
+              0.19786052464284776, -0.3680312279906497, -0.18825645901610866, 0.13700608028054084, 0.11417316734012051,
+              -0.6463349589003959, -0.3770634118502656, -1.9950465240002844, 1.4676192894281632, -1.6060448800367215,
+              -0.6182395877160713, 1.2695963682598732, 1.4459649727588744, -1.88317964468765, 1.4240536704934144,
+              -1.5317465035623874, 1.9497915396745666, -1.991995390985421, -1.4828801801030478, -0.03471214257257316,
+              1.0775554630217314, 0.38086611278178495, -0.1958126129950628, 0.3711869657718534, 1.7307000355063105,
+              1.3370240564962907, -0.6892941432270163, -0.6176252997554714, -1.2761391889511922, -1.9463694295411074,
+              -0.820841598715079, -0.42139807880407787, -0.7976620746519121, 1.934915457164431, -0.4497028214466532,
+              0.5258289450636102, -0.3002339930414486, 1.4317770429007526, -0.10670432773391081, 1.477187387671167,
+              1.6422292268536607, 0.30393726544465505, -0.16649028812518285, -1.1690831963968895, -0.7043985973685203,
+              0.47350023974648625, 1.657836032561474, 0.16219091081621606, 1.2307861721904754, -0.7270831655242516,
+              -1.0574142264570137, -1.9134290652692378, 0.1585901752075669, -1.922458865955397, -0.5216475421535529,
+              1.4438431375673586, -0.2874803852531551, 0.3370681487492808, 0.9173203757850725, -1.3751125170831138,
+              -1.014212305918492, -1.5475568694685897, -0.5834419852252983, -0.022709263779811195, 1.4145035718404255,
+              -1.9267536984073965, -1.871498186038706, 0.525620783057489, -0.48663912480579086, -1.0308451661848164,
+              -0.1369351560707246, -0.7876105221422698, 0.6955249722293555, 0.29453585260072757, -0.06514154829755281,
+              -1.6429080966047698, 0.15520901396599296, -1.518429991046033, 0.6839405241853216, -1.8300625086431346,
+              -0.15898426442765246, 1.9278290352285792, -0.7150445644634695, 0.34034454186145346, -0.2506887167667946,
+              0.2912251513885442, 0.10434269155791664, -0.5637887420304368, 0.031008416285043694, -1.0816174134360272,
+              0.05203114530680164, 0.03172694813978172, -0.7646387549793285, 0.36213414786228526, 0.0060869909269349876,
+              -1.0367311632092022, -1.0684702277942222, 1.1874407786461294, -1.9290032593242623, -1.8550268296137276,
+              -1.161269082907582, -0.18656240236501098, 1.1070044180055767, 1.6261946461581385, -0.5698373516978554,
+              0.9631347920513802, -1.0985201941795912, -0.45509125721838917, 1.6535643092193615, 1.9696290288271951,
+              1.0266341388473261, -0.23790773845234447, 0.2828088466454748, -1.6537561622154024, -0.2286353308074256,
+              0.8766588558049788, -0.8195788808423616, 0.7718354518479451, 0.46796484124644344, -1.7212327769837446,
+              0.0658435971926874, -0.6407624425160288, -0.5647885630487526, -0.5284202936353299, 1.8818650199438771,
+              0.29252062160862025, 0.09136912052125101, 0.4321630239196912, -1.1485094277982304, 1.6036235307678686,
+              0.3318334927588493, 1.7219946936827109, -0.09166860362313312, -0.9321185046623404, -0.5842230824766759,
+              0.5762857089716649, 0.6237761258836967, 1.3257989135149089, 1.65675048758645, -1.0060167288419342,
+              -0.08448091333478214, -1.2793076427969634, 0.7514972175750367, -0.4193024725154899, 1.427794959994305,
+              0.9558973375817734, -0.00039143542951691757, 1.7030425931606343, 1.8219801925309609, 1.7260980421968792,
+              -1.9249357614979115, 1.6285038041870772, -1.6118493301059527, -0.4294907666236245, -0.1659993953929053,
+              -0.5722726383494532, 1.2935829105972072, -0.06859448172655114, 0.6177602091273879, -1.3370886026529494,
+              -0.8003712871381898, 1.4776462171750593, 1.4184671800982604, 0.5433276418773598, -1.1103872044287346,
+              -0.7572146251109908, 1.1710857107940438, -1.8705799333769377, -0.31024900334903194, 0.34866139709491595,
+              1.4866168061361806, -1.9774625782466435, -1.9891386648785518, -1.7735018436923857, -1.5751766748778406,
+              0.7218521749338684, -1.9390531947989702, 0.10502871018805493, -0.6908737000286438, -0.583334654840761,
+              1.465181746808006, 0.9232784443998208, -0.37400862804876933, 0.6661364913985333, 1.8688403166211724,
+              -0.8717922772171374, 1.40123243258902, -1.9913513342271694, 0.7369262986601814, 1.2562396521830914,
+              0.7638029152143444, -1.8164465814226718, 1.7184240047901733, -0.911895923498772, -0.43161449539234464,
+              -1.09011215721989, -1.865570383600069, -1.2232212962752618, -1.943030725162366, -1.3198980808588407,
+              0.0564583162685901, -1.1298601037432707, 0.6392469941959655, -1.8442933136946733, -1.0692331296192306,
+              -0.29525834417416963, 1.1184299311108798, -1.6129180448223925, -1.2727580333965411, -0.9415967651718447,
+              -0.2597646669604643, -1.511150740860189, -1.769101860129168, -0.9185489030191736, 1.6841872338621604,
+              1.7266136417112579, -0.6047332956995355, -0.4784036452377798, -0.6987121488961696, -0.3950169895430573,
+              0.29099877073820757, -1.5250611710167732, -0.5876293953125105, -0.5938486494168753, -1.0021656820999798,
+              -1.9666708037201044, -1.272140943592933, 0.7880251982149247, 0.11964755378902137, -0.3901422866566291,
+              0.7163616669643105, -0.04395212244207691, -1.0791955402155438, -1.2675936648298745, 1.0655012879795382,
+              -1.2960016160353804, 1.1268487593724137, -1.4561611267402474, 1.7853064994708516, 0.9817883639607627,
+              -1.509195648143982, -0.5791158763214721, 0.2952226835939813, 0.978029471962218, -1.7020877610480865,
+              1.2949154364706787, -1.8669978207007674, 1.9804087372291983, -0.1920592681769353, -1.3129464854527964,
+              -0.13084211958976688, -0.25279655392730405, -1.8414897577067046, -0.7363208735547797, -0.6909260581968182,
+              -1.8811392695885178, -1.5901068180742568, 1.758878672856656, 1.5387787983193055, 1.6713805051822828,
+              0.28500585759464503, 1.3914306792968247, -1.112480424362695, 0.43326162263712487, -1.8142315585546145,
+              0.04023793859339708, -0.29805331377366073, -1.5940199056342657, 0.598129666067309, 0.625004812581027,
+              -0.911960460437454, -1.973405398299871, -0.7574758313972065, 1.6261060948310595, -1.0639316504874738,
+              -0.8549167612511983, -1.6781924250988283, -1.2461164334164385, -1.4396767476893544, 0.588676315376695,
+              -1.3923513282535058, 1.960640665522404, -1.1216598084556173, 0.29702774865635373, -1.1441990771482464,
+              -0.9733601129567919, -0.13533496827900127, 1.2875809157665516, -1.004348467034836, 1.501216437295625,
+              -1.7257128690349832, 1.3038540536955745, -0.23514094567048183, -1.0545846838443325, -1.5628126421353628,
+              1.758225843292891, -1.752217343717482, 0.8827182187480176, 1.9633500079396518, 1.4124055174643644,
+              -1.6009057792139894, -0.124257691420528, -1.6361563376854855, -0.7163857270237415, 0.5991086423774714,
+              1.7584781739562239, -1.625063774845441, -0.37572359945414213, -1.9506995916793395, -1.951072499257542,
+              -0.8315895595505731, -0.7002195813028456, 0.31048848147933406, 0.19118037223773499, -0.836819966187166,
+              0.8992259497849764, 0.2769262236848036, -0.8190887502725346, 0.5908744335005158, 0.8309308801063224,
+              1.828667115346997, 0.050270920754735826, -1.4675310474376078, 1.6474157726281966, 1.4412270025481906,
+              -0.071799132580602, 1.2723657902431542, 0.9847345744230269, -1.7967618044169429, -0.38502605748464447,
+              -0.9911154903546722, -0.9911306363398822, -0.9822148063420846, 1.404660627884402, -1.7223894428279198,
+              -1.906376932077218, 1.1267093944315762, 0.005733157947431344, 1.5499657009743384, -1.2918427917389055,
+              1.8878866260898972, -1.450986879628605, -1.0670858020560647, 1.012435839252528, -1.0904895043171203,
+              -0.7636238525274122, 1.8215658692720762, 0.802604215057829, 1.6666057955071523, -0.6857256630224935,
+              -0.5501356674470292, 0.810089459752044, -1.6169276394201413, -1.7364078843810304, -1.1867030927097977,
+              -0.5172860730134063, -0.8556026745046195, 1.7395171980402528, 0.8977518661224195, -0.715248100272647,
+              0.42642471199620147, -0.1359154018671509, -1.017818228497351, -0.00905895348889274, -1.6541703500137865,
+              0.5001119548133026, 1.7346626988667904, -1.1674654589916598, -0.735219697011062, -0.1962670855393469,
+              0.21602710767932987, 1.634800475118543, -1.614549402431435, -1.86469031751599, -1.0722793725135675,
+              -1.6751255258166085, -0.34784828468612705, -1.333517864681233, 0.2286247270719235, -0.9686327464308748,
+              1.8030391927221219, -1.260653677947687, -1.3291707209191737, -1.7464317874557151, -0.6022055677476486,
+              0.4234332187817236, -1.3942184957445614, -0.49460322127068856, 0.846493215661404, -1.3779621390020038,
+              1.6170651737934367, -1.5949106936017516, -1.993666650794867, 0.517274246668932, 0.636335391123283,
+              0.09728792236496986, 0.21871120388837983, 1.9033150412817141, 0.3761639225094582, -1.7448084623773052,
+              1.001122609393847, -0.44673552515686765, -0.6566748795770616, 1.0022029703662332, 0.49152185517814306,
+              -0.19632140501675632, 0.6593309755584418, -1.42607069406814, -0.2499327686935615, -1.4645970035455589,
+              -1.8214929827258137, 1.7849263457214155, -0.46930999932929396, 0.930852011498847, 1.4657054090327062,
+              -0.8598379219960437, 0.21923117934684644, -0.2719980917718665, 1.1382814204088554, -1.4437234293121408,
+              -0.08437654030814734, 0.9230522551879243, 0.17552818859532504, -1.3982719952892557, 0.7727609217240659,
+              -1.3512364654797944, 0.4217546307725639, 1.8959084151076748, -1.9009721763514387, -0.2084686501701638,
+              1.34507209606525, 1.4812500373319821, -0.25452451515050356, -0.7547650359872655, -1.5901912558006952,
+              -0.617303822265475, -1.568626713241147, 1.3511951372228292, 0.46680417658438866, -0.9843992974612537,
+              0.21141544095185427, 1.9555502838890186, -0.5622924926922526, -0.7074175111692584, 1.6856741408764497,
+              1.4329492504371748, -0.6904233032298688, -0.16570616327044707, -0.5819404191250754, 1.5298308400435117,
+              1.2873904282242874, 1.75253332340609, -0.3969229369696805, 0.9712496560090953, -0.984449102903409,
+              1.845837132921134, -1.23000834955623, 0.25823037305241403, 0.6562595586377551, 1.426434937488695,
+              1.2050365141327637, 0.35112023386450986, -1.423781157416867, -1.22442697877245, 0.8857806584751993,
+              -0.27941851495344316, 0.383573200806417, -1.6546531309712131, -1.0620419037179136, -1.6487673588042684,
+              -1.1583303816085477, 0.11883432462925647, 1.8623629910270258, 0.7814730455397738, 1.3892839510915138,
+              -1.2109955091247775, -1.1531820955625154, 1.4249824872214445, 1.878872910977651, 1.96640914460413,
+              -1.7574195668520565, -0.17080472539130565, -1.2334517024508829, -0.042203796430729135,
+              -0.5119900340796173, 1.8245076363940829, -1.9504677488809792, -0.15838646478579133, 1.8653261691127963,
+              0.9818831615417336, -1.1498049891062543, 1.8177635453973222, 1.0307183336158117, -0.9459693602670747,
+              -0.6967235469867861, -0.6765683802163993, -0.25117711682611343, 1.7085642032810782, 1.2354232326963057,
+              0.16304953424899615, 0.8288006493055686, 0.7426908331423769, -1.8567733398412107, 1.9363614866712426,
+              -1.1491819532508236, 0.4456745704746172, -1.5200940589370164, -1.2921806881240192, -1.8425344342113679,
+              1.385258197718982, -1.4468713069952912, -1.7194028306161009, 0.11112342320496005, -0.46993304971516636,
+              -0.3532497592673005, -0.20705867891006324, 1.1503266436423507, -0.8615322336734987, 1.4243814245202273,
+              0.44605877897824087, 1.4291899741133527, -0.5503339260133986, 1.6845285529242942, -0.11203488733585854,
+              0.7838364265704332, -0.3478757678382811, 1.4617786521240204, 0.6797786349237454, -1.4070556983478548,
+              1.5368835556999283, 0.7270943692880465, -0.3943204095728534, -1.0159540349760245, 1.3420827647018054,
+              1.4944969205796248, -1.3846348875424548, -1.1070204938214987, -1.5623431163391235, -0.7285340046827864,
+              -1.6146739312377756, 0.7914876850628412, -1.4285275663878165, -0.2102551967847095, -0.6036343290031185,
+              -0.7863519667198808, -0.5232027574551195, -1.5248951664568793, 0.6403374226115135, -1.3332654904167054,
+              -1.6013714748847017, 1.5264343369773945, -1.4659567584783701, -0.1255742527269854, -0.05787364846985188,
+              1.0630262325298858, -1.0251739144160261, 0.6341878529485214, -0.37708094500223766, 1.5820017651752964,
+              -0.9754158194342759, 0.021470140570154506, -0.270780715342573, 0.8513662098629382, 1.812913501299759,
+              -0.5507306480213909, 1.6252304329937193, 1.6166807780171624, -1.0015815783816109, -1.6525598491008084,
+              -1.7640141719015405, 1.4567526655551655, 1.4314017477260261, 1.1147464550993922, -0.8609069210724725,
+              -0.8835445478716384, -0.8807052221352922, -1.054638273386189, -0.9307483447707048, 0.6532758793412583,
+              0.2251469563444708, -1.4983410691150256, -1.355149970924261, 1.825265403016327, 0.3882557252462826,
+              -1.2005370275411478, 0.5167632305655108, 1.258505468633845, 0.09615276706317388, -0.11253109876707157,
+              1.301050271249565, -0.5926127701907813, 1.6730534476647492, 1.2040207312610214, -1.5220497985479415,
+              1.7064305519329883, -0.7793546956972763, 1.0964366887160475, -0.8642894018251441, -1.9411871186407836,
+              0.074716954760353, -0.14369204309870298, 1.8393941069756865, -1.3769308839327685, -0.30862200557660646,
+              -1.993793823331563, 0.3709006233167482, -1.6557247604820402, 0.32053951400820324, 1.418554947267494,
+              -0.14801920801346657, 0.25882446183466357, -0.30227778350472967, -1.4281993644549162, -1.2907922764091362,
+              0.9110864171884971, 0.613974184551024, 0.6697305289032087, 0.8489421527088039, 1.498148243683703,
+              -1.4269397350154973, 0.6132189565263042, 1.8741137765083877, -0.05705777446194382, -0.8855796810622429,
+              -0.8656995527854097, -0.5082467483431357, 0.4332387677470342, -0.7541429782381526, 1.305940642158534,
+              1.3554774725998202, -1.2111195490457929, -1.4381676776657422, -0.5207599119467634, -1.8228914923142483,
+              -0.1877702958456453, 0.36230894851909135, 0.17851993376959374, 1.7927379289246463, 1.1368406732884377,
+              -0.9950802434664396, 1.6043789056058433, 0.6484661390901874, 0.933455445947418, 1.0965484754420745,
+              -0.6939481831648528, 1.2328397545284568, 0.9378872541025238, 0.34445900641106775, 0.1278365294249939,
+              -1.0258774152176224, 1.0892898808290514, -1.067964672512037, 0.44689053769430487, 1.9413539674195102,
+              0.9528679183933839, 1.7587111895733223, 0.5576643641512362, -1.9897364013123235, 0.47036230007388813,
+              -1.7897636522061902, 1.5010421043239726, -0.6901446605239645, 0.45393761765945406, 0.2829324542693943,
+              0.7192447802434581, -1.1331672455879191, 0.17337802502719768, 0.49963004068736083, -0.7815511951172738,
+              0.9636628323801011, -0.8323427533238403, 1.2095900671544157, 1.934951536397083, 0.5200093238971917,
+              1.158643992086569, -1.5891421117437572, 1.1934443042649656, 0.8276249819736909, 1.4212639972350827,
+              -0.8419641775597322, 1.2661381994885206, 0.06158022749540493, 0.46246628371777465, 0.7573951522202043,
+              0.7619102543908491, -1.9354772481952196, -0.08541307148121735, 0.32286401465719994, 1.170684883004844,
+              0.7749781307966064, 0.07936620410936168, 0.2315148010116328, 1.263046672689227, 1.031153500041193,
+              -1.4759863901212418, 0.9873606549190814, 0.7777062346878534, -1.7228292836209809, -1.5782061917261343,
+              -1.9224252408205036, 1.6523756827872802, 0.2993123495389698, -1.0626357773248643, 1.0197875082835361,
+              1.9808878982391658, -1.7335070696659765, -1.3148099108168987, -1.9494880550804297, 0.17268926636423831,
+              1.242098464223531, 1.1180045679110648, -0.6736825181756254, 1.0154763824746373, -0.19957976202073535,
+              1.1193883202782464, -1.1478784045364039, -1.1330343367225462, -0.8689855158445736, 1.0116971724295913,
+              1.2506562934116516, 0.9967792918784815, -1.0610583651118048, -0.532297674201299, -0.7150119770938783,
+              -1.2232688695213954, -0.4743547399060102, -0.46434726873258025, 1.0115464987548544, -1.0177528333698103,
+              1.733606846210808, -0.8155729251191701, 0.5232276995816267, 1.7914758177269183, 1.0157926433716051,
+              -1.8351739772356694, 1.2012667794467289, 0.8545667843161366, 1.294851140223634, -1.3948286719024399,
+              -1.1466399375859053, 1.3041194739119701, 0.7377935849631942, -0.7995103884432107, 1.6159710967964172,
+              1.1620542442509896, 0.0055755030499886615, -1.0334307489311154, -1.5841651529973335, 1.4111075346503084,
+              -1.2009959445559417, 1.9720845657548844, -0.8035695524416591, -0.14331494930025102, -1.4264826197967428,
+              1.7586100751820988, -0.739427319576853, 1.0666567765029669, -1.5916344843881065, -1.4350385771936356,
+              0.42668754843352463, -1.6484548280031257, -0.37728075139212613, -0.7687371980269919, -0.8179781982796062,
+              0.465506361112749, 1.3840790073759015, 1.0880219954714985, -0.7036856119504717, -0.6963478247947235,
+              1.0979632285661367, -1.3810178288903288, 0.8156134298411253, 0.10032276394202011, -1.6008032936016479,
+              0.18932441617959217, 1.3551150619453578, 1.3534267176398442, -1.1276635081675348, 0.6608005967002919,
+              0.793182461268664, 1.398769261405878, -1.2123058565244103, -0.08803245110073288, -0.2893447181014084,
+              -0.9972961711861705, 1.5618332897004663, 1.1591927593779072, 0.511047619279922, -1.970138349713964,
+              -1.3628804504805752, 0.2782295809685751, 0.30358322411230354, 1.9514398542744873, 0.25960063763317454,
+              0.4976234205537926, 0.012047558099780531, 1.79534431915478, -1.7723391117592922, -1.9992623394682916,
+              -0.4524505481322034, 1.3804610881366495, 1.1587664810582172, 0.5111716739430667, -1.6928217537440542,
+              -1.0278751605013383, 0.20893412968684988, 0.5871739815665329, -0.8412950581167742, -0.4077765748738731,
+              1.7498754266646204, 0.8583271186920243, 0.5762482317954367, -1.8599099610537024, -0.19242912582490845,
+              1.2512291284228754, -0.8441763329152305, 0.26980735485206075, 1.4044456507515894, -0.8516268695811835,
+              1.4493090144656193, -1.3915783403234894, 0.35557624127716814, 0.17226516309619733, -0.5021504124493701,
+              -0.766188811190383, 1.1332244159180078, 0.011135590774230764, 1.8851307343362258, 0.9148262788018782,
+              -0.8299956158151707, -1.6057691996197043, 0.5678238711359924, 1.8008767630518667, -0.586304639586193,
+              0.47029839294118503, -1.463460016599707, 0.20856103503853962, 1.2545845494965118, -1.0729668619560213,
+              -0.14947337388785709, -0.20035875199434283, -0.07202935566940027, 0.05533721254453372, 1.3677731442776313,
+              1.7011893855187177, -1.7202195328563636, 1.9488792451860384, 1.3096386167232117, -0.5132153326702822,
+              0.5616165083457831, 0.4157359121447879, 1.8006481124839855, 0.230442477572935, 1.1686013774607265,
+              -1.7879670674147912, -0.842370723742838, -0.7927388944332199, -0.9586442598316518, -1.54708954114047,
+              -1.2956507442445577, -1.4031204732951874, -1.6120562181795481, 0.6283505387959369, 1.9223686678649798,
+              0.12298814371626143, 1.6278360280654836, -1.6223557147160461, 0.43054457669015456, -1.7908842288361821,
+              0.5775385836169233, -0.15097004219870414, -1.2290692851647318, -0.620782793926316, 1.1062043604891931,
+              -1.9746433547898716, 0.6382174626600765, 1.749692571795931, -1.8339775549081967, -1.464038954875173,
+              -0.9639795224425223, -0.990228592162139, -0.9403728223487793, -1.6685943188697578, 0.07041255085387288,
+              1.5308882897823413, -0.47253846241489494, 0.8106189739961147, -0.2270261582976314, 1.8799983165866934,
+              -1.2472611795739992, -0.15798247303785384, 1.1021702350596438, 1.554345756888078, -0.555551779439396,
+              -0.2519441527853594, 1.6402787480890648, 1.0543147407284197, 1.6335920152443828, 1.6300453015691483,
+              -1.0481818985064661, -0.7279789339473091, 0.4888000242214119, 0.48732772667936697, 0.8584068837243475,
+              -0.2507103566018847, -0.4408909179300613, 1.1233796101976283, -0.4632288097116861, 0.2476082637987398,
+              1.0440154957174563, -0.8869375992382329, -1.4091245527531013, 1.1244796013617524, 0.6892639663640718,
+              -0.022508581368523295, -0.1947662213695871, -0.03308275174489772, -1.5224685354120124, -1.749461912732114,
+              -0.8082369514527556, -0.25696601764524907, -0.29739460489203395, -0.7761972897539025, 0.11904461166545932,
+              0.12301125764257481, -0.42555313462267197, 1.193050472577613, 0.41286096527825666, 0.19893639265378305,
+              0.8311360551038502, 0.07310473388978878, 0.6685202630904961, 1.3372821732384246, -0.12066792119153114,
+              0.05609759368011602, 1.0451437108149522, 1.662035784060775, -1.1922702285472315, 0.6416834232307735,
+              0.6055133359661493, 0.7942484561725927, 1.7276358502409623, -1.2864330076965746, -1.0918326505522646,
+              1.1998891150473536, -0.28628797182602295, -0.5716447940943157, -0.5689380516868878, -0.12795378416769676,
+              -0.6791193619117468, 0.10038501986448622, 1.7063771482852008, 1.194191767284221, 0.6647694331145066,
+              -1.7348358815181681, 0.2817755499501846, -1.8525340342841066, 0.9851618492112006, -1.1059877587552673,
+              -1.5295669583116638, -1.0505227522752554, -0.9487207502273058, 0.809796237150997, -1.8498482833968302,
+              0.48019511430536, 1.1226579267420975, 0.742760641350614, 0.29074715598422696, 1.3377685110252564,
+              1.0867564725571857, 0.18478374884500237, -0.3164295444608616, 1.622419371685548, -1.1918941739540632,
+              -1.85979992788012, -0.12624583269268985, -0.7280639736725645, 1.7648123921702625, -0.2906781194024406,
+              -0.09262137204418774, 0.4138200526064937, 0.04327935477419942, 0.9797051661471672, 1.6815036980821159,
+              1.1825812431197686, -0.4448889424945657, -1.7444130322963716, -1.2294019413222204, 0.050868336661046065,
+              -0.28346687593229625, 1.76715189128082, 1.611534349398629, -1.1862000782338198, -0.18760651204489776,
+              -0.6355147050302099, -1.0693954841485978, -1.3343928935517813, -0.027843745181175272, 1.8949354550001445,
+              1.1947313752564401, -1.705187723165774, 0.7263276294399761, -1.7205254544798727, 1.3224746522778261,
+              1.1708741480141782, -0.521723626523988, 0.14495711998638772, 0.1539508663177509, 1.3474086945015884,
+              1.629428728782094, -0.4727414254013107, 0.6353417064465656, -0.03451981234038648, -0.5836884681507799,
+              -0.20106395276242583, -1.7734283674131142, 0.32803960400104515, 1.9133097530382273, -0.24540943135655446,
+              0.5453897283838289, -0.5427539051784143, -0.6173702928258038, 0.9933437009783477, -0.3068382522087365,
+              -0.9721176288546554, -0.2527049737425271, -1.273295878249181, 1.0233037474978097, -0.3711521481099638,
+              0.5595202642561681, 0.8557760663482039, -1.8239035041840888, 0.9147544038362989, 1.7978278449861724,
+              -1.4194290997912544, 1.963345131841125, 0.23090855457281645, -1.3144424697360408, -0.1798082266955383,
+              -1.2650654602972473, -0.18679991460173095, -0.17629662209988428, -0.18349530363472422,
+              -0.5475428774726536, -1.04729696306776, -0.6411208082297168, -0.7308839648617003, -1.3455838296857312,
+              1.3597877917216312, -1.8340993142240434, -0.6466367039451653, -1.8498009040527492, 1.50126492806735,
+              -0.8473870152631546, -0.566754366425382, -0.11291722444101016, -1.204932180016593, -1.3014629360740422,
+              0.8120263473720843, -1.8930799484123453, 0.5895700763514027, 1.7580882681692787, 1.477127335430259,
+              1.2047061381469248, 0.16485767274321272, 0.23788298792881424, 1.2386497309565767, -0.05281444305869076,
+              0.17334853252953497, -0.9480401485516037, -0.12016072866190708, -1.5973543471706693, 0.9977273313654775,
+              -1.859593065673394, -1.22159503592711, 1.1674399144996856, 1.2941842856062413, -1.1135548933000354,
+              -0.9788839012477277, 1.5718286586532253, -0.1759982975417227, 1.1031262106075763, -0.8778538891016368,
+              -1.0912009563680698, -1.4342020269338933, 0.7224191131816049, -1.2061468497546022, 1.5502197738160985,
+              -0.4251181088474105, -0.3206510906592923, -0.20288873333289725, -0.3560455119064194, -1.7508196425056557,
+              1.5986301909131457, -1.4438601542872123, -1.7892166215086185, 0.7616375057554672, 1.6373087559633968,
+              0.384206177448279, 0.4567136418012572, 1.2972172920819673, -1.1093690558377691, 1.6750979445730616,
+              0.90908880550879, 1.2770950805973325, -0.30072973792624325, -0.30858954557714835, -1.9794583286836787,
+              1.4463537661478734, 1.0183415951718624, 0.19309738632155593, -1.9449394663020856, 0.7108312298345272,
+              -1.131148529447878, 1.5259401710667637, -1.2736225271446795, 1.449852979043179, -0.5704566653927854,
+              -0.3074713127723667, 1.3993673001014262, 1.7718101827800963, -1.4492416161385497, 1.204820691151994,
+              0.537241600964693, -1.5810854626566924, -1.5966679426112087, -0.6946214128685, 0.7401275132249641,
+              -1.7160959560539784, 0.4050021853660404, -1.2105835684442203, 1.7944565918560151, -0.6818406209243566,
+              -1.2359644949792958, 0.49907683448423157, -0.8405824207686488, 0.4689270476935219, -1.4699087331797918,
+              1.43264169315706, 0.10499119180317251, -1.9830520821430992, -1.5927469176409472, -0.7632048947095695,
+              1.303303968850459, 0.5773554905161333, 0.6761130632322967, 0.9023788989770569, 1.8960847479504084,
+              0.8846144527507596, 0.9891128512774987, -0.7137249307539442, 1.267181508288493, -1.5113665535004523,
+              -0.17564834166799148, 0.9032525164747707, -0.25795858643541525, -0.2153155122989885, 0.14650372443070392,
+              -0.9984704344431465, 0.19261182317116177, -1.568804815745514, -1.8496400745041237, 0.7219284702138937,
+              0.47816525621959105, -0.3273800096758981, -1.3390312793543542, -1.9838474983553418, 1.4066433632074071,
+              1.9258390074704312, 0.4520281509654822, -0.30119846185025256, -1.8086624212334463, 0.851460380433025,
+              -0.4149432793401253, 1.4970655678791776, -0.9139304175330043, -1.1721517169571731, -1.9882366923130537,
+              0.20630155701555886, 0.0891351853539204, -0.18485046053672338, -0.5253430902979694, 1.1136150007281822,
+              -1.072256739674419, 0.5677711226994742, -1.6986682182236068, -1.373143853609375, 0.5391705517446521,
+              1.615483488858379, -0.18222418110590155, 1.5270125615115786, 1.4186450525284275, 0.6856859039097802,
+              0.5948037341597869, -1.0097732940745248, -0.7260016082299225, -0.1705798585617213, -1.4460592122059417,
+              0.2804912469966476, -0.0574570618149588, -1.4226509159038505, -1.3490817825559507, 0.7561887451342573,
+              -1.0315310280500372, 0.7865868802852711, -1.7955739447423928, -0.20476732094967787, 0.6532859024525468,
+              -1.398626809307176, 0.31416850473475755, -1.0474173751356446, 0.049027524534579925, 1.335442264825483,
+              0.5839485880852768, 0.8416818491436544, 0.7729008830376998, 1.7957935152184445, -0.20047560204525272,
+              1.9653799460331678, -0.756998178675067, -0.12357101901807699, -1.4272827743751613, 0.7149414745051672,
+              1.4783565252719182, -1.2368177109511205, -1.4571248051607144, -0.7948678149157731, -0.6295946982419727,
+              -0.022851757488315805, -0.07947620035768654, -1.3106359681202076, 0.1591438592300909, -1.4970586188027868,
+              1.3181273904865316, -1.508591213967403, 0.5722257787143228, 0.774539967054146, -0.5579675263215638,
+              -0.801690277809052, -0.8966439545169163, -0.2168181087774288, 1.8549965661558616, 0.7870136331314779,
+              1.0426166176054243, 1.2052992540989846, -0.6116512580549873, -1.7800528483131748, 0.6162047118916432,
+              -1.1406795391578877, -1.3126212462178328, -0.1255252753148266, -0.048214851156274996, -1.7513823416941525,
+              -1.9966724157135571, 1.468282137353885, 1.1596808879879097, 1.848952713577705, 1.9276331797246486,
+              -0.6082295997412146, 1.9590194651252002, -0.6705403599782791, 0.8982591946264264, -1.8582005994721253,
+              -0.6224103416017206, 1.3118474535601639, 1.927285880838153, -1.3435831019941835, -0.02035775798119932,
+              -0.258091815197548, 1.5685276792778557, -1.7504336743073416, -1.3270808448193447, 1.9609655615175043,
+              -0.5002114597187894, -0.8302889305621663, 0.6662682285835677, -1.3588868202703237, -1.4263374077454936,
+              -1.117653746556062, 1.6959423725848142, -0.3368698386266633, 0.6329184444264122, 1.4360518922995382,
+              0.2209792086889042, -1.7826312330601093, -1.9055378329489479, 1.8363537758423742, 1.8612237061845747,
+              -1.163857834211714, 0.38823573714522475, 0.9933133475252713, -1.769852560129741, 0.6303163049709841,
+              1.6352278260339865, -1.4220707937174062, 0.4996182092181929, 0.1748538915264719, -1.389807604688972,
+              -0.5041547053983226, -0.7755917479953034, 0.33822942573796055, -1.3957767536429841, -0.16066323457963172,
+              1.8426173458683097, 1.0912529333551886, 0.04454407634104118, 1.4585397734066836, 1.314915917164475,
+              1.0930141444320949, -0.9720567164640972, -0.5831452038265033, 0.8082335756515109, 0.4358913655339238,
+              0.8310387682873994, -0.8242800720840835, -0.47497624245619896, 0.000058968841639917, -1.6746583184349388,
+              0.2586283765233146, -0.03952361428650608, 1.9572062803747263, -1.364317103129661, 0.16484595584710782,
+              0.6889848970954304, 0.33625779127527444, 0.28142293472509294, -1.5510992496482494, 1.6785313595707674,
+              0.4921495479711657, -0.42294403727168906, -0.10192465238332815, 1.583070264702826, 0.6464143795128816,
+              0.7706704090619576, -0.45316577898360944, -0.6156337052461307, 0.2949317256431403, -1.1153946167003506,
+              0.23143632095143918, -1.187495465719234, -0.754948635807529, -1.090644714217727, 0.8562387289761135,
+              1.4209567719285578, -1.867698005779011, 1.3320884849513037, 0.5619380450950349, 1.8886416226851166,
+              -1.7314027359692306, -1.0362482885730966, 0.9807231768105664, -1.3689591083054822, 1.5694772951886131,
+              0.4400722090716478, 0.9539178709143741, 0.4832872148319014, 0.23471769113792984, 1.9643745055943542,
+              1.1325801513292664, -0.43752654225713705, 0.4538975778222154, -1.6157155513403065, -0.961125955159364,
+              -1.1751535270699955, 1.1277536127856669, 0.11594556933087752, -1.9276503102738447, -1.5774089828974898,
+              -1.0029301039964427, 0.4455245589428616, -0.4739643281334569, -0.8513671370845639, -1.0336436816615233,
+              0.6626347865920605, 0.7885413873550009, -0.013439463013608766, 0.6488139123172507, 1.4291110296253855,
+              -1.9761431450732667, -0.5012957954679527, -1.1585910698227027, -1.1021436093929422, 0.036919383239228054,
+              1.8089329170710071, -0.005231354013648826, -1.2082234644042886, -0.3456887578591781, -0.8017405353429492,
+              -0.5345375675659492, 0.8534420279659507, 1.7447905633469585, 0.43817127727920724, -1.8499205219957702,
+              1.4797731845522186, -1.5443888715914138, 1.0131225647292235, 0.34701885989022063, -0.41455116200553377,
+              0.40209313291363724, 0.11900781713274, -0.9935386808363758, -1.8322340002578068, 0.9811839836103111,
+              1.502154507354498, 0.8891949169357574, 0.899071159318308, 1.752337905147142, -0.04599799842734953,
+              -1.6347681983052045, -0.5522741247690393, 1.505215487771519, 0.8504281241898015, 1.8693941265525265,
+              -1.1512863792577441, -1.8748160415118837, -1.879939448107617, 0.9353149913960506, -1.077101932112896,
+              1.2322050595012843, 1.2672982902122678, 0.9384368132472858, 1.7274119921052788, -0.9601726232935137,
+              0.19420343716687505, -0.7830049935581602, -1.9099470296794694, -1.213386784368356, 1.6800660417837605,
+              -0.9282638481321719, 0.5088239004955142, -0.5528513330962577, -0.4235136044745138, 1.6316021980530238,
+              -0.3087654696690505, -0.10527992793999896, -1.4364007982935343, -0.4455364976497762, -1.3433044303003099,
+              0.6517505064656408, 1.6050250028051813, 1.6490276577492855, 1.9140119353414144, -0.7684496098140174,
+              -1.01738188731548, -1.1250647161193914, -1.6586222112755102, 1.1599068196677091, 0.795751774794466,
+              0.5733174614685748, -1.3655937932875277, -1.507254849973065, -0.2831083653801638, 1.3241227396573514,
+              1.574957068221127, -0.31194765030973937, 0.4008126582755933, 0.43635579619776443, -1.3214048572867325,
+              -0.8447194221435215, -1.1526249262582748, 0.7073544609451421, 1.17078844004073, 0.2425026449956018,
+              -1.7518561882120753, 1.6591407848437605, 0.06616038448738504, -1.928680221520497, -1.0504809684365677,
+              -1.0974712342176778, 1.6344494477175475, -0.4129201382527832, -1.7111789594333953, -1.6070549808753904,
+              -1.2456702084965565, -0.012663680475193395, -1.1305840149083926, 0.734392120651302, 0.18651679771884844,
+              -0.22974141381305735, 1.9415149817194726, 1.9280078232850126, -1.2072428658632042, -0.14782869942839927,
+              -1.6523593328098034, 0.4844141001145905, 1.0492278525622805, 0.5924539450553175, 0.848097235977705,
+              1.8881210898619676, 0.20004070245023797, 1.4305799893712425, -0.9082660328332564, -0.14268688754147085,
+              -1.1201061991671262, 1.1399839045134712, -1.5579448101377515, 0.5516078322124933, 0.3365679579810825,
+              -0.636402425334972, 0.7364990374614768, -1.2657328423109204, 1.4084870144147636, -0.5538274490613713,
+              0.43684201943536305, -0.706532199493215, -1.7678543182116737, 0.5086879667154935, -0.8888826793267235,
+              -1.0510640830474856, -1.775013227468511, -0.7345226367397419, 0.9474796694127203, -0.649964939391042,
+              -1.5189099534245498, -1.9260526549789567, -0.457330394781688, -0.9340352374682741, -1.3868164983748459,
+              0.553888202560878, 0.36818698767921365, -0.9382183717778192, -1.0829596839250488, 1.3646658325042367,
+              -1.3240476722940633, 1.9816923192707012, 0.5300123477141927, 0.0790085088366057, -0.4455760575475125,
+              0.48463653297167486, -0.7788483158746828, 0.8253771773416885, -0.6823431576948558, 1.7776737534704772,
+              0.5497713214586923, 1.4452464852137838, 0.23037431004796094, 0.31188142524786766, -1.7543267850797761,
+              0.6063452856820941, 1.207122989395999, -1.926907332363795, 0.45038239145265724, 1.0988284911574286,
+              -1.6007436457047142, -0.4728890687538678, 0.3195037474199047, 0.8855124961325762, -0.2555993730577626,
+              1.8813620496087493, 1.8900177166377103, 0.09592367474164032, 1.8974568987778628, -1.1058972953708501,
+              -1.1512017435907103, 0.40201549011430693, -0.1831060132280804, 0.22245091899613723, -1.1866541831479385,
+              -0.5451040730392975, 0.9199451579519691, -0.42060255461704177, -1.3791747236441925, -0.3024448490768936,
+              -1.6611455107283604, -0.3106541240888907, -0.9498356682876157, 1.769660410309836, 1.598216213741022,
+              0.4623205859503434, 0.03664778458072249, -0.6655252973523123, -0.7325818423601653, -1.591871681771024,
+              0.9451427301297981, -0.8203468674560934, -1.5069504221011005, 0.7243170638862324, 1.1839749702725175,
+              -0.7128348341329511, -1.5076965090949397, -1.59865172895221, 0.08910680490749368, 1.5717880586471278,
+              1.8951504684652152, 0.42550207471805646, -0.128409822054123, 0.9896766313315162, 0.34808462644009275,
+              -1.7082990487472571, -1.0459982270685435, -1.1132292311691874, -0.3022325459842996, -1.7274216318536348,
+              -0.11775921716410043, 0.7403577685290532, 1.188824227090608, 1.387282721223393, 1.0688709331799577,
+              -0.6395615121564866, 0.8142138261269114, -1.4467483751545576, 0.8996177593321626, -1.8193866881462766,
+              0.08924208518874632, -1.405297919708996, 0.31754790231458685, 0.9823851818369507, -0.49590144528424585,
+              1.4194064220328588, 0.9729299634967079, -0.46170090347918613, -1.634203532024186, -1.3139980454214522,
+              -1.8469876250843802, 0.710926322864931, -0.4029599381569682, 1.7246833539931403, -1.4088169680807,
+              -1.9165388068372708, 0.21804317359714798, 0.3898186987610348, -1.6118063668363405, 0.28583673086194583,
+              1.7015683175211391, -1.2836168642070582, 0.8463494611619371, -1.1625839799245696, 0.2640138032690018,
+              0.5041551687310717, -1.755824925370514, -1.177748867346489, -0.3829444120449246, 0.8360805034202388,
+              0.05022254918868896, 0.4276469609032256, -0.8235567451730139, -1.94062145827791, 0.35097020666890355,
+              -0.6636358495150212, -1.2587452298002964, -1.6575362996910616, 1.060467707494971, 0.9661831305087887,
+              -1.77149852066976, 0.18844425542251564, 0.0897431507375952, -0.8281105706620897, 1.5632959207508623,
+              0.07141359825195703, -1.0844701043735414, -1.905443802421968, -0.27588161311845294, -0.40342423607415956,
+              -0.34332304727825136, 0.0176022295550462, 1.94359831375926, 0.09702777017089215, -0.11695098349068722,
+              -1.2810374187149858, 0.37597456306160204, 1.7631374725308877, -0.7830266259108773, -0.5605784036815882,
+              -0.4409773606270875, -0.49636250754717803, -1.549108447216227, 0.6261185797820117, -1.260881611110821,
+              1.691411217905281, 0.899655093658585, -1.0875528162122174, -0.7120948701980732, 1.8214705523154269,
+              1.3010380076854968, -0.4492643980075144, 0.9914465230608682, 1.7590027691290615, 0.8514661670055963,
+              1.5263431803492642, -1.7260779024351258, -0.14589666108296218, 0.18011804793376918, 0.7175880982696512,
+              -1.0399388762140145, -1.0480376846250712, 1.5656146512942648, 0.4435540525930799, -0.4175857955829816,
+              -1.8218436496980575, -0.9346408060646185, 1.40089015453285, 1.6926667426168764, 0.4187248632147291,
+              -0.6755275086264145, -0.7011229448771363, -0.9528087286614646, 0.0730922604589237, 0.6252467328216804,
+              -0.5573518555770702, 0.9864121888624755, 0.6646486706800783, -0.8405364163020792, -1.0505815213688878,
+              0.8989991238262265, -0.5022516947851985, -1.784806766373471, 1.2637708002659025, 0.5065772818030325,
+              0.9973024415787677, 0.08348064671549338, -1.757630249437522, -0.2016005631449005, -1.0360120513086803,
+              1.786128872822113, -1.2720919225213843, 1.3430514506545927, -1.0762325516117865, -1.0578995596104255,
+              -0.47242526972271204, 0.05539697038437996, -1.08558757453563, -1.404337586710036, -0.059247790489052043,
+              1.3998034069978171, 1.6067367856721155, -0.5185826391883994, -0.051896682542695416, 0.11112005542023429,
+              -1.2231398633939348, 0.2886372299242277, 0.6564469519248641, 1.67404118804063, -1.538487261793886,
+              0.18551945213331145, -1.1342837192256061, -0.3318725405647953, -1.4531152273595227, -0.6934713826285721,
+              -0.24436235286417052, 0.6776292484438171, 0.8871814678850702, 0.41826798275898014, 1.0161513742931785,
+              -0.13947907673300097, -0.7736759327375049, -0.43981678279829683, 1.635191807530191, 1.3044401854805878,
+              -0.3097446711021723, 1.8125726195847056, -0.26127912212234694, 0.8564630403854094, -1.519521818793156,
+              -0.5727391479884938, 1.7015469847109976, 0.663240965083145, 0.31064120951508656, 1.4030451184981052,
+              0.3325065959732836, -0.7178902057747756, 0.6090652378284442, 0.8426138183122633, -0.580146652112278,
+              -0.6076938097212707, -1.6599273271373782, 0.29960912457791444, -1.6741835731853065, 1.5428301790607195,
+              0.8970548194971704, 1.6066845600081736, 0.5404165757730146, -1.9537941867764292, -1.5234595572340748,
+              0.5293735217702951, -0.64620260665742, 1.8818640992235771, -1.7237764606754276, -0.8040024538741264,
+              0.0642546885214017, 1.4395299343641659, -1.462587128675942, 0.011882540823848764, 0.12033421748154716,
+              0.5458210215408705, -1.5141295301316422, 1.8809343680577308, -1.8801856621666753, -1.901376259472575,
+              -1.4374202976060095, 0.8473513507453765, -0.896351895119154, 0.457751001832321, 1.876657552919962,
+              1.267733433184599, -0.30894648094866195, -1.8178016120669414, -0.7711776919446018, 0.29038564786361576,
+              1.6396189720781438, 0.9597929848181161, -0.34227788522140834, -1.4450087753233527, -1.7068508353679626,
+              0.8426935759536303, 0.7173810205823674, -0.6580236891322881, 0.8663322021812405, -0.38112472550089915,
+              -0.3331447946260786, 1.8551673806318556, -0.6731525492100126, -0.009001319785657103, 0.41039833755685784,
+              0.025091358839535616, -0.49823213412251555, -0.9827448714264726, 1.1077851800046377, 0.5740585983905078,
+              -0.7235926614954762, 0.5059901875180826, -0.850177898505664, -0.05453987892121592, 0.8633840127545733,
+              -0.3153969644106205, -1.3028092681229868, 0.7523083527030074, 1.413775575813558, 1.0697458650110754,
+              -1.1839403319780022, 1.5167022074836893, -0.36486781099211996, -1.7835462010879564, -0.6061285803342944,
+              1.9969466536022722, 1.9531672204642883, 0.7967381388222403, 1.0934589095880973, 1.6405590176012312,
+              0.3501113568054244, -0.8786338692108497, -0.0545508019996932, 1.4464849975584952, -1.8853956921596513,
+              0.25983013847132774, 0.8440414107184964, -1.8818826057620326, -0.22674619971532906, -0.35951513414106007,
+              1.6757192364875237, -0.15819503713874195, 1.6691357866915144, 1.8534771980207108, -1.8297709996602967,
+              -1.6514392305036534, -0.2343385561012088, -1.820925987823773, -1.2556074451315578, 1.0621490016055715,
+              1.2109203955756476, -1.1500152481919903, 1.0466452723330733, -0.5833431814034746, -1.0817348313127493,
+              -0.40295742758029984, 1.0986483593098368, -0.42704879342020696, 0.7065668399686658, -1.7278295290305223,
+              -0.7183051438456021, 0.15944089245801774, -1.0276995188812146, -1.6398474518024653, -1.2318071869917695,
+              0.23333723988807886, 0.4626060172442088, 1.2255286520425894, 0.9652309086097803, -1.5254810192280601,
+              -0.6683416541099767, -1.9000628944332894, -0.7244291249780632, 1.0347731523086239, -0.9009629081875952,
+              -0.11734057204667625, -0.6698286923748489, -0.9472592207823913, -1.8286232413501864, 0.2898215560382935,
+              -1.422921306299517, 0.6696091233175077, -1.014229444534946, 0.7139087775492996, -0.23241859018004174,
+              0.9620272535910068, 1.1497473812544134, -0.8640235723632799, -1.3121563547519584, -1.0396316763878488,
+              1.7769188425035614, -1.014039070903868, 0.4489833453895331, 1.0763456360970807, 0.6229672422660295,
+              -1.7464692834364435, -0.3663893352922596, -0.8769410861076103, 1.0608706111705635, -0.9262810932490861,
+              -0.5468726859924029, 1.7966956706569919, -1.0663859467239112, 0.7378193848221777, 1.068192632208282,
+              1.3312476842417755, -1.5902814653913628, -1.0131078061920382, -0.6235296781383814, 0.37504339068020176,
+              0.9126508132330242, 0.3999532385546267, 0.6552059838941, -1.6053942866342332, 1.7900258342291853,
+              0.08171912833062756, 1.6137883979635745, 1.3466843147948442, 1.8505801094553158, 1.3728813966930913,
+              -0.4473279660140852, -0.20009909626620814, 1.4067472413245437, 0.36658966226851764, 1.4566800897303072,
+              -0.11633958899045194, -1.9458410018060368, 0.5651869174802018, -0.9885077925334622, 0.24385043055374833,
+              -0.4407908079663816, -1.7015252126482139, 1.5396273477916198, -0.9801103159055833, 0.9331708410017399,
+              0.058036076446482454, 0.29277070369481883, 1.6896333641554682, -0.2872886303585469, 0.2981100430160728,
+              0.1670720357805502, -1.6828245857476496, -1.4681960401028125, -0.9933436100210251, -1.827639383468739,
+              0.08433714147463611, -1.1318904274562795, 0.9840669856671846, 0.8204547128989219, 0.5959008566248984,
+              -0.22424536381303728, 1.765380932910376, 1.050492887173749, 0.8249285352430338, 1.5823516671950122,
+              -1.4695844512182843, 1.4009128159343485, 1.0886951647082785, -0.4963319371911856, -1.8633848779197413,
+              0.660465126445569, -1.2319891082878298, 1.6547000157065659, -1.6403428022350113, -1.2308283749125177,
+              0.9142339764828238, 0.18691349086990705, -1.148271069003111, -1.266859733272054, -1.4482873768560758,
+              1.6888579757850914, 1.5392518897570104, 0.41499451567073464, -1.0517290742419663, -0.9856143466540894,
+              0.704611691207357, -0.27871441123648655, 0.445828139270918, -0.8125969294930622, 1.521716695437079,
+              0.5657668386735519, -1.813374372841099, 1.0076529676525672, -0.5864288471977783, 0.5855480422270194,
+              -1.8330974772064481, 0.9782157266479414, -1.6230556142249775, 0.5265126362718373, -1.6878701852107563,
+              -0.3955226747487526, -1.3888929741627605, 0.2905034183357449, 1.0489208524387843, -0.3118857187498678,
+              -0.6289506096761981, -0.05735383950307149, 1.8668941791416147, 0.8898345005884769, 1.7147482078759548,
+              -0.12387314928310289, 0.2298818139402634, 1.9294076224252024, -0.43580099597679656, 1.7512542893273144,
+              -1.258214124547644, 0.9779750741630782, -0.2566261319632144, -1.9813300069235993, -1.3498734101224414,
+              0.7506344777083953, 1.8867470646651894, -1.918953273635191, 1.7429571494233906, 0.7638060343526085,
+              -0.44782770384121484, -1.1300950570142518, -1.4753506380821149
+            ],
+            "dims": [2560],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [
+              0.027313262224197388, -0.005701353773474693, 0.1959753781557083, 0.10011828690767288, -3.6098804473876953,
+              0.00864929985255003, -0.011981655843555927, -0.11036527156829834, 0.6647213101387024, 0.276733934879303,
+              -0.6354819536209106, -0.014075735583901405, -0.059462033212184906, -0.3388662040233612,
+              -0.017422985285520554, -0.043299876153469086, -0.16756349802017212, -0.07582926005125046,
+              -0.16514767706394196, 2.9962074756622314, -2.600733757019043, 0.04413439333438873, 0.07896167039871216,
+              1.1207873821258545, -0.032255738973617554, -0.09964963793754578, -2.1782073974609375,
+              -0.01814177632331848, 0.08586198836565018, 0.380964457988739, -0.01918521337211132, 0.006902141962200403,
+              0.0669674500823021, -0.09234043955802917, -1.0496017932891846, 0.020094068720936775, -0.11474193632602692,
+              -0.056350305676460266, -2.2275612354278564, -2.648808240890503, -0.017779357731342316,
+              -0.2514607608318329, 0.008559616282582283, 0.010673644952476025, -0.32376542687416077,
+              -0.16903237998485565, 0.026010606437921524, -2.163571357727051, 0.35461699962615967, 1.5194188356399536,
+              -1.1094666719436646, -0.012471643276512623, 0.0767873078584671, -0.21644049882888794,
+              -0.043257202953100204, 0.001341399853117764, -0.1367240697145462, 0.005313852336257696, 2.144134759902954,
+              0.11904949694871902, -0.26428619027137756, 0.014375614002346992, 0.06913577765226364, -4.196413516998291,
+              -5.172718524932861, 0.06162356957793236, -0.0010976337362080812, 0.21020400524139404, 4.567638397216797,
+              -0.059758733958005905, 5.990215301513672, 0.19405193626880646, 0.003011247143149376, -0.1036064475774765,
+              -0.016247211024165154, -0.12790939211845398, -0.08561908453702927, 0.25051021575927734,
+              0.07514326274394989, -0.6767844557762146, 1.5661166906356812, -4.326471328735352, 0.07481537014245987,
+              -0.7969828248023987, -0.45468205213546753, 0.21233250200748444, 3.420551061630249, -0.0759267508983612,
+              0.16086462140083313, -0.3939729928970337, 1.3957020044326782, -0.2972649931907654, -0.31666669249534607,
+              0.35118427872657776, 0.3117898404598236, 0.088602215051651, 0.17165301740169525, -0.8542330265045166,
+              -0.06893759965896606, 0.08126193284988403, 0.02327258512377739, -0.1314769983291626, 0.035079699009656906,
+              1.2096712589263916, 0.9461245536804199, -6.337772846221924, 5.575413703918457, -3.9876515865325928,
+              0.01430205162614584, -2.093717098236084, -0.056584782898426056, 0.05612698942422867, -0.01935030147433281,
+              0.0010159225203096867, -0.38109132647514343, -0.000587565591558814, 0.12273997068405151,
+              -1.4854758977890015, 0.016024703159928322, -0.05192752555012703, -0.257480651140213, -4.023406982421875,
+              0.03150588274002075, -5.065948486328125, -0.07601942121982574, -0.04482676833868027, -0.01937261037528515,
+              -6.9667582511901855, -0.05368780344724655, 0.6142992377281189, 0.3128206431865692, -0.3862888216972351,
+              0.053061991930007935, -0.24360240995883942, -0.018439287319779396, 0.1868235021829605,
+              0.005632609128952026, -0.10385553538799286, 1.2077943086624146, -0.07107469439506531, 1.771382212638855,
+              0.3696843981742859, -0.31587034463882446, 0.0002820117224473506, 0.055834002792835236,
+              -0.7621694803237915, -3.773604393005371, -0.12602387368679047, 0.8626934289932251, -4.139935493469238,
+              -0.08643748611211777, -2.25795841217041, -0.025201046839356422, -0.28647178411483765, -0.5088312029838562,
+              -2.3566224575042725, -0.20447342097759247, 0.3922976553440094, 0.047735944390296936, -0.09984598308801651,
+              4.436963081359863, 0.17725177109241486, 0.01968466490507126, -0.4080508351325989, 0.600350558757782,
+              -0.1489681750535965, -2.3178586959838867, 0.010645782575011253, -0.5052445530891418, 0.12876634299755096,
+              2.72904109954834, 0.007315368857234716, 0.503023624420166, 0.9695355892181396, 0.4959081709384918,
+              -3.562389612197876, 0.3780525028705597, -0.194877028465271, -1.0815603733062744, 0.6436595320701599,
+              -0.10088582336902618, -0.06308454275131226, -3.7394943237304688, -0.0011674398556351662,
+              -0.19378826022148132, -0.2329375147819519, 0.029814809560775757, 0.20438098907470703,
+              -0.23114298284053802, 0.026816120371222496, -7.350013256072998, -0.011900502257049084, 2.0180928707122803,
+              0.20987474918365479, -3.209254503250122, -7.5496602058410645, 0.232008695602417, 0.0027162893675267696,
+              -0.4211888611316681, 0.287914901971817, 0.028367964550852776, 0.015583046711981297, 0.07393462210893631,
+              0.6514078974723816, -0.04090245068073273, -0.004561522509902716, 0.2931022346019745, -0.4355356991291046,
+              -0.1867547184228897, -5.984931945800781, 0.044270407408475876, -0.35987964272499084,
+              -0.033762961626052856, -0.2677021622657776, -0.013161826878786087, -0.010206296108663082,
+              -4.528798580169678, 0.4174078106880188, 0.12906667590141296, 0.04690857604146004, -0.08034832775592804,
+              1.5188398361206055, 1.3247699737548828, 0.011872933246195316, 0.055544108152389526, 0.0025585023686289787,
+              -7.696174621582031, 0.030730921775102615, 0.039231084287166595, -0.4407111704349518, -0.3110845386981964,
+              2.284346342086792, -0.027610689401626587, 0.09054349362850189, 1.7885178327560425, -0.11802572757005692,
+              0.03795969486236572, 2.373623847961426, 0.11311819404363632, 0.009557336568832397, -0.02887658029794693,
+              -0.28853726387023926, -0.17708882689476013, -0.22821268439292908, 0.0237746462225914, 3.257477283477783,
+              0.2507217526435852, 0.17421714961528778, -0.12231585383415222, 0.18179824948310852, -0.3428541123867035,
+              0.024907970800995827, 0.2441745400428772, 1.13312828540802, -0.0009440237190574408, -0.594701886177063,
+              -0.008615869097411633, 4.071537017822266, 0.6198470592498779, -0.3097928464412689, -0.4404515027999878,
+              -7.008431911468506, 0.024559520184993744, 0.1267288327217102, 0.2140975296497345, 0.5778637528419495,
+              -0.03296203166246414, 0.8842242360115051, 0.16367295384407043, -0.3035202920436859, -0.09384048730134964,
+              0.6805808544158936, -0.2706672251224518, -1.429656982421875, -0.1497703641653061, 0.4302230775356293,
+              -1.864505648612976, 0.01007054653018713, 0.23598365485668182, -0.08086620271205902, 0.001842734171077609,
+              0.08458849042654037, 0.3059651553630829, -0.06515960395336151, -3.803208589553833, -0.41865429282188416,
+              0.2828770875930786, 3.459416151046753, -0.00129605398979038, -9.578699111938477, -0.06560757756233215,
+              0.026055261492729187, 0.0672057718038559, 0.08423102647066116, -1.3624160289764404, 0.013521464541554451,
+              -0.027731282636523247, -0.9650477766990662, -0.012694457545876503, -0.2116907835006714,
+              -0.10714730620384216, 0.0034909709356725216, 1.5338910818099976, 0.0006434338865801692,
+              0.1618947833776474, -0.10659407079219818, -6.774624347686768, -0.08567759394645691, 0.5162889361381531,
+              0.11074300855398178, -0.09961605817079544, -0.005474632140249014, 0.1132681593298912, 0.10878968983888626,
+              -0.4140564203262329, -6.274385452270508, -3.410104274749756, -0.2155490219593048, 0.13330507278442383,
+              -0.2973288297653198, -0.5738739371299744, 0.3465871810913086, -0.2567448318004608, -0.13507360219955444,
+              -0.014550707302987576, 0.039058394730091095, -0.25891509652137756, -0.30598220229148865,
+              -0.14163219928741455, 1.2217881679534912, -0.2967555820941925, 0.024605438113212585, -0.03864026814699173,
+              -1.4379907846450806, -3.0257911682128906, 10.609665870666504, -0.0002576113329268992, 0.1658751666545868,
+              -0.01822504773736, 0.1141287237405777, -0.3072766363620758, 2.9927172660827637, 0.42983293533325195,
+              0.9799204468727112, -0.007520963903516531, 3.565046787261963, -0.18206597864627838, 1.1247198581695557,
+              -0.0011717785382643342, 0.0026591955684125423, 3.689824104309082, 0.03598639369010925,
+              -0.09997520595788956, 0.06576227396726608, -1.7916548252105713, 0.030312752351164818, -0.4527510106563568,
+              -0.26613515615463257, 0.025749003514647484, -0.17866003513336182, 0.18729515373706818,
+              0.003528681118041277, -0.1579633355140686, 1.070467472076416, -0.20637144148349762, -0.10882926732301712,
+              -6.439236640930176, -0.25033196806907654, -0.26708030700683594, 0.036800775676965714, -1.9130735397338867,
+              0.11082696169614792, 0.10686857253313065, 7.136363506317139, -2.1805343627929688, 0.002802944276481867,
+              -1.0081117153167725, -0.08366546779870987, -0.07263432443141937, -0.011199882254004478,
+              -0.015524221584200859, -0.008838756941258907, -0.005488056223839521, 0.6502953767776489,
+              -0.010726823471486568, 0.41685575246810913, -0.23590049147605896, -0.0868658497929573,
+              -0.07914192229509354, 0.22732190787792206, 0.0985199362039566, 0.013477811589837074, 0.5970719456672668,
+              -0.12020514905452728, -0.0009808604372665286, 0.1139480322599411, -0.5872443914413452, -5.610537528991699,
+              0.14893069863319397, 0.44541916251182556, 0.599539041519165, -0.028194887563586235, -0.42580458521842957,
+              -0.24352392554283142, 0.25486475229263306, -3.251058578491211, 0.042388759553432465, -0.15446388721466064,
+              -0.01016155257821083, 0.07647261768579483, 4.707305431365967, -0.0834866315126419, -0.21240641176700592,
+              0.34789028763771057, 0.0710633248090744, 0.013448074460029602, -0.18779638409614563, 0.022113602608442307,
+              -1.8543815612792969, 0.012882213108241558, 0.0508059561252594, -2.1125378608703613, 1.00347900390625,
+              0.34287792444229126, 0.023498279973864555, 0.2604916989803314, -0.854418158531189, 0.3368889391422272,
+              -3.5361156463623047, -0.3238249719142914, 0.09940877556800842, 0.011137581430375576, -0.09505806118249893,
+              1.4575674533843994, 0.18798890709877014, 0.13481135666370392, -3.1009016036987305, -0.0046508763916790485,
+              -0.002944883657619357, 0.008598391897976398, -0.05753857269883156, -0.007956058718264103,
+              0.7023902535438538, -2.114570140838623, 0.6187217235565186, 4.448208808898926, 2.5069539546966553,
+              -0.10476846992969513, -0.04466601461172104, 0.32297447323799133, 0.06604880094528198,
+              -0.0016604098491370678, -2.8530216217041016, -1.2369211912155151, -0.02766953594982624,
+              -0.025159431621432304, 0.0029653196688741446, 0.04569535329937935, 0.03927958756685257,
+              -0.0021295847836881876, 0.024881726130843163, 0.028491219505667686, -0.0042065782472491264,
+              -0.05266435816884041, -0.08988969027996063, -0.04083021357655525, -0.040847159922122955,
+              3.154191732406616, -0.06132543459534645, -0.7507759928703308, -0.029571423307061195, 0.03537856787443161,
+              -1.4017058610916138, 0.3888748586177826, 0.9719987511634827, -0.010947618633508682, 3.847195863723755,
+              1.015498161315918, 0.012234801426529884, -0.3849196434020996, 0.5072981119155884, -0.07829593122005463,
+              0.2524659037590027, -0.13102610409259796, 0.020525088533759117, 0.15267324447631836, -0.11044808477163315,
+              0.008630136027932167, 0.0009689829312264919, 2.615210771560669, 0.3638320863246918, 0.2452821582555771,
+              0.01092306338250637, 0.03127167001366615, -3.899691104888916, 0.16573800146579742, -0.06733611971139908,
+              -0.39246127009391785, 5.207749843597412, 0.05021298676729202, 0.17778877913951874, -1.4260956048965454,
+              0.19870443642139435, -0.27705708146095276, -0.11092191934585571, -0.09528861939907074,
+              -0.5703115463256836, 0.5077508687973022, 0.3938415050506592, 0.47991737723350525, 0.7821948528289795,
+              -0.2891596853733063, -0.3829837143421173, -0.010832893662154675, 0.15224608778953552,
+              -0.00014581253344658762, 0.00025647180154919624, 0.02536843903362751, -0.06366542726755142,
+              -8.023703575134277, 0.027589797973632812, -0.1799485832452774, -0.2505863904953003, -0.3841714859008789,
+              -0.00031740960548631847, -0.04642002657055855, 0.38759565353393555, -0.05341910943388939,
+              -0.37632811069488525, 0.6983012557029724, -0.10781889408826828, -0.0007781427120789886,
+              -0.0877101942896843, -0.5221861600875854, -0.07871037721633911, -2.2496471405029297,
+              -0.042697690427303314, 2.38197922706604, 0.035262834280729294, 0.0695495679974556, 1.6927565336227417,
+              -10.396214485168457, 0.05338706448674202, -2.813828468322754, -3.691652536392212, -0.008508461527526379,
+              1.570121169090271, -0.4011033773422241, -0.24479898810386658, 0.30835238099098206, 0.1998486965894699,
+              0.0945337787270546, 0.39656326174736023, -0.23758645355701447, 0.1661674976348877, -0.04912934452295303,
+              0.024212513118982315, -1.0319569110870361, 0.04704924300312996, -0.058226123452186584,
+              -1.6492913961410522, 0.6406868100166321, -0.005447663366794586, 0.19865849614143372, -0.3373563289642334,
+              -0.03675329312682152, -0.19241032004356384, -0.43262550234794617, -0.08300381153821945,
+              -0.014068910852074623, 0.11309102177619934, -0.02719084918498993, 0.2096254676580429,
+              -0.02292095310986042, 0.4072689712047577, 0.0003724964044522494, 0.20711149275302887, 1.0793871879577637,
+              0.06120060756802559, 0.11688049882650375, -0.0023522432893514633, -0.9283630847930908, 2.477475881576538,
+              0.26047614216804504, 0.7143173813819885, -1.4795730113983154, -0.15119962394237518, -1.4587875604629517,
+              -0.03378799930214882, -0.3518248498439789, 0.1747346669435501, 0.002720446093007922, 0.865147590637207,
+              0.015568590722978115, 0.1952929049730301, 0.1818414330482483, -1.4265116453170776, 0.2012042999267578,
+              -0.2151491343975067, 0.11098571866750717, -0.16003955900669098, 7.798532962799072, 0.299221396446228,
+              -1.0280503034591675, -0.1838797777891159, 0.005458994302898645, -0.13420982658863068,
+              -0.06905319541692734, -1.8678100109100342, 0.40917104482650757, -0.09650467336177826, 0.2953720986843109,
+              0.008414564654231071, 0.1998010128736496, 0.34882158041000366, -0.17196929454803467, 0.031611330807209015,
+              0.08629407733678818, -0.1856321394443512, -0.22879824042320251, -0.09241079539060593, 0.2628664970397949,
+              0.03050280548632145, 0.15829861164093018, -0.06391621381044388, -0.01048242673277855,
+              -0.010927671566605568, 1.0013593435287476, -0.15796290338039398, -0.10746872425079346,
+              -0.0013137627393007278, -0.2024063915014267, 0.0005700114998035133, 0.03609214723110199,
+              -0.4168614447116852, 0.12660957872867584, -0.005800928454846144, -0.5319929718971252, 0.32967525720596313,
+              0.028021158650517464, -1.217489242553711, -0.09096843004226685, 2.344956636428833, 5.432365894317627,
+              0.7993219494819641, -0.15543963015079498, -0.0007157247746363282, -0.08481337875127792,
+              -0.12131065130233765, 1.1516586542129517, -0.01504613272845745, 0.03704383224248886, 0.004402496851980686,
+              -0.581766664981842, 0.07592090964317322, 0.3745843768119812, -0.4989067614078522, 0.04438084363937378,
+              3.175107479095459, -4.4077911376953125, -0.002988559426739812, 1.0332038402557373, 0.006027049385011196,
+              -0.0018258332274854183, 2.3033533096313477, -0.10134122520685196, 0.02520211599767208,
+              0.005497010890394449, 0.0003968894889112562, -0.00029831190477125347, 1.2718113660812378,
+              -0.34650272130966187, 9.8225736618042, -6.33831787109375, -0.9639870524406433, -4.028343677520752,
+              0.016925739124417305, -0.0449683852493763, 0.6271276473999023, -0.13903772830963135, -0.06179821118712425,
+              5.860689640045166, -0.005071749445050955, 0.5026626586914062, 0.3309268057346344, 2.2567200660705566,
+              -4.23521089553833, -0.01613122597336769, -0.02665529027581215, 0.04668727517127991, 3.150425434112549,
+              -0.0052042026072740555, 1.067151427268982, 0.025044972077012062, -1.325771689414978, -0.1094195619225502,
+              0.26904425024986267, 0.6204038262367249, 0.006285298615694046, 0.002915250603109598, -0.6165238618850708,
+              -4.090943813323975, 2.8669519424438477, -0.09453117847442627, -0.09316729754209518, 0.034191157668828964,
+              -6.707476615905762, -0.20231620967388153, -1.6191682815551758, 2.0373117923736572, -0.10501966625452042,
+              0.0019581259693950415, 0.21420015394687653, 0.0156276635825634, 7.224427223205566, 0.1236666664481163,
+              0.294806569814682, -0.0061331382021307945, -0.10612531006336212, -0.8333144187927246,
+              -0.001029952079989016, 0.38204053044319153, -0.03597458079457283, -1.41422438621521, -0.2833155691623688,
+              -0.0006075138808228076, -0.3701440095901489, 0.1309424191713333, 0.06839437037706375,
+              -0.0017361472127959132, -1.69569993019104, -0.20629459619522095, -0.5999218225479126, 0.114132359623909,
+              6.6828436851501465, -0.36263933777809143, 0.41539111733436584, 0.022192703559994698, -0.06610587984323502,
+              -1.683022141456604, -0.2835130989551544, 0.27643388509750366, -0.6247501373291016, -1.421617865562439,
+              -0.08159351348876953, 0.005017416086047888, -2.026592493057251, 0.0009393739746883512, 1.760980486869812,
+              0.00019237841479480267, 0.0022294363006949425, 0.22415778040885925, -0.09657209366559982,
+              -3.056180953979492, -0.24515365064144135, -1.7638490200042725, -1.900456428527832, 1.7747641801834106,
+              -0.9473960399627686, 0.27619242668151855, -0.11893711239099503, 0.7769895792007446, 0.09835439175367355,
+              0.0019296495011076331, -0.043601375073194504, 0.03626292571425438, 0.1591210663318634,
+              0.45964139699935913, -0.06853707879781723, -2.4563350677490234, -0.13421472907066345,
+              0.040955424308776855, -0.2855738699436188, 0.11433675140142441, 0.00306497560814023, -0.48573875427246094,
+              -0.046301428228616714, 0.6907474994659424, -3.983771800994873, 2.3131954669952393, 0.05256381258368492,
+              -0.0911293551325798, -1.8945766687393188, 0.03453084081411362, 1.8747694492340088, 0.3433213233947754,
+              -1.1485600471496582, 1.6418366432189941, -0.13894057273864746, -3.1275031566619873, -0.9726752638816833,
+              -0.0012102212058380246, 3.898921251296997, 0.2646528482437134, 0.01665414497256279, -0.06312943994998932,
+              -3.0655016899108887, 0.024803519248962402, -0.25584203004837036, -1.3387784957885742,
+              -0.03684002161026001, -0.524848461151123, -0.9969499707221985, -1.8777778148651123, -0.14820723235607147,
+              -8.182234764099121, 0.015234949067234993, -0.010302969254553318, 0.10785042494535446,
+              -0.02237902209162712, -2.500221014022827, -0.006121153011918068, 0.054380882531404495, 2.607618808746338,
+              -0.48403942584991455, 1.7271841764450073, -0.054084569215774536, 0.04733904451131821, 0.29113033413887024,
+              0.3090323507785797, -0.4069989323616028, 0.827186644077301, -0.8676308393478394, -0.18980173766613007,
+              0.017093969509005547, 0.05046425387263298, 0.025303032249212265, -0.9938563704490662, 0.0307749193161726,
+              -0.003506980137899518, -2.145794153213501, 0.08889109641313553, -0.2744760513305664, 0.02533753775060177,
+              -0.008416163735091686, 1.6139867305755615, -6.39102840423584, -4.842134952545166, -0.7291613817214966,
+              0.9694556593894958, 0.07247202843427658, 0.005149913020431995, 0.0029090321622788906, -0.1867554932832718,
+              0.0015806574374437332, -0.04847263917326927, -0.18512502312660217, -0.04184968024492264,
+              -1.2331782579421997, -0.6159178018569946, 0.025481248274445534, 0.11850030720233917, -0.2734290063381195,
+              0.26392263174057007, -0.2278929203748703, -2.4300358295440674, 0.0007563972030766308, 1.2603007555007935,
+              -0.009525062516331673, -2.5598459243774414, -0.1015859916806221, -0.3136966824531555,
+              -0.0023580349516123533, 3.0281076431274414, 0.0851983055472374, 0.18700700998306274,
+              -0.008541906252503395, -0.007119827438145876, 0.42274990677833557, -0.06235692277550697,
+              0.3246764540672302, 0.047069381922483444, 0.00011004792031599209, -0.49105241894721985,
+              0.041874051094055176, 0.013326031155884266, -2.525364875793457, 0.5126351714134216, -0.01582668349146843,
+              -0.3391125500202179, -0.1049877479672432, -0.36534854769706726, 0.027926098555326462,
+              0.004374756012111902, -0.10876958072185516, 0.579942524433136, 0.34367814660072327, 0.12710949778556824,
+              -0.28762391209602356, 0.028134111315011978, 0.001072783605195582, -0.430772066116333, 0.21052536368370056,
+              0.09690351784229279, 0.000786184798926115, 0.06906910240650177, -3.5896573066711426, 0.24118882417678833,
+              -3.176041841506958, 1.3121603727340698, -0.40836477279663086, -7.590582370758057, -1.9390276670455933,
+              -0.06406442821025848, 0.00011302023631287739, 0.013246525079011917, 0.21886053681373596,
+              0.090825155377388, -0.06342892348766327, -0.14027893543243408, 0.017751706764101982, 0.11045858263969421,
+              -0.05397825688123703, 0.2152465432882309, 0.14184458553791046, -1.6443814039230347, -1.023624300956726,
+              0.050706081092357635, -0.8185511231422424, -0.009972692467272282, -1.6231411695480347,
+              -0.010527506470680237, 1.5382870435714722, -2.6943516731262207, 0.965884804725647, -0.5423170924186707,
+              -2.0661613941192627, -0.4436858892440796, 0.0058816042728722095, -0.665194034576416, 0.8273401260375977,
+              0.10996203124523163, -0.1316700130701065, 0.027179520577192307, -0.2735114097595215, -0.10301132500171661,
+              -1.906333565711975, -0.32074108719825745, 0.4478001892566681, -1.1052520275115967, 0.009423047304153442,
+              0.5322814583778381, -0.004648196045309305, -0.009632693603634834, -0.7735386490821838,
+              0.005249344743788242, 0.11850841343402863, -0.0034776863176375628, -0.1439099758863449,
+              0.2767007648944855, -2.8716399669647217, -0.16290035843849182, -0.1801692247390747, 0.19117145240306854,
+              -0.7634338736534119, -0.29985561966896057, 0.009378351271152496, -0.6186265349388123,
+              -0.13845475018024445, 0.03558935597538948, -0.20145508646965027, -0.5337783694267273, 0.28876203298568726,
+              -0.5732369422912598, 0.03304499760270119, 0.8687714338302612, -0.2524224817752838, 0.4371426999568939,
+              0.03568745777010918, 0.4382450580596924, 0.03245728462934494, -0.14247629046440125, 0.7598915696144104,
+              0.30114904046058655, -0.21331092715263367, -0.0028205476701259613, 0.09227168560028076,
+              0.008056613616645336, 1.635034203529358, -0.166751429438591, -0.020675446838140488, -1.2244166135787964,
+              -0.10547340661287308, 2.802537441253662, -0.004014655947685242, 3.690307140350342, 0.0017954192589968443,
+              -0.45281466841697693, 0.020796259865164757, 1.5265557765960693, 0.20084713399410248, -0.21376214921474457,
+              -0.025406286120414734, 1.9211277961730957, -0.7583361268043518, 4.267587661743164, -4.551294803619385,
+              -0.08887865394353867, 0.07695532590150833, -0.17959536612033844, 0.5096666216850281, -9.957548141479492,
+              -0.11618410050868988, 0.09543278813362122, 0.270590603351593, 0.024046115577220917, -0.245524600148201,
+              -0.307966023683548, 2.2781827449798584, -0.14958485960960388, -0.06977607309818268, 2.3428077697753906,
+              0.8067795038223267, 0.9448233842849731, 0.35110360383987427, 0.4814533293247223, 0.00956026278436184,
+              -0.03395213186740875, 0.2255835384130478, 0.4806722402572632, 0.0005861452082172036, -0.5671629309654236,
+              0.5004423260688782, 7.04985237121582, -0.41439759731292725, -0.2847898304462433, -0.10965365916490555,
+              0.3427604138851166, 0.12897160649299622, -0.6046913266181946, -0.1840457171201706, 0.002393739065155387,
+              0.41798320412635803, 3.0662004947662354, -0.0002512158825993538, 3.1039047241210938, -0.6795744895935059,
+              0.3395420014858246, 0.33144497871398926, 6.939244270324707, 0.0011752373538911343, 0.09660591185092926,
+              0.4894343912601471, 4.00507116317749, 0.005761822685599327, 0.5680277943611145, 3.315598726272583,
+              -0.5373169779777527, 0.44444069266319275, -0.0027646978851407766, 1.6829670667648315, -2.6327450275421143,
+              7.026787757873535, 1.5361366271972656, -0.3418486714363098, -0.21611177921295166, -0.05756537616252899,
+              -0.023443035781383514, 0.23109422624111176, 0.21568432450294495, 1.236294150352478, -0.4581376612186432,
+              -4.188883304595947, 0.28338590264320374, -0.04076884314417839, -0.0198111142963171, -1.1872543096542358,
+              -0.062372151762247086, 2.7870607376098633, -0.6517040729522705, -1.7516529560089111, -1.8091800212860107,
+              -0.15286022424697876, 0.09354468435049057, 0.11334053426980972, -1.8259668350219727,
+              -0.017136069014668465, 0.12429703027009964, 0.00773972412571311, -1.0446529388427734, 0.2356342375278473,
+              0.9886437654495239, 0.18150633573532104, 0.4682118594646454, -0.11415664851665497, 0.003153775818645954,
+              0.03332524746656418, -1.1180988550186157, 4.163827896118164, -0.18159173429012299, 0.0999181717634201,
+              2.058738946914673, 4.2595014572143555, 0.010485258884727955, 0.16270849108695984, -0.9842506051063538,
+              -0.0003203578235115856, 6.040156364440918, 1.308574914932251, -0.36853301525115967, -0.29669252038002014,
+              0.2741331160068512, -0.3422505855560303, -0.7587988972663879, 2.9686927795410156, 0.8209773898124695,
+              -0.0007857486489228904, 0.02395622618496418, 0.05102493613958359, 0.15041151642799377,
+              -0.002741719363257289, -4.980742454528809, 0.2880830764770508, 0.24828404188156128, 0.15224777162075043,
+              0.13059845566749573, 1.5662918090820312, -0.8474074006080627, 0.08810389041900635, -0.2630780041217804,
+              0.43268874287605286, -0.001932788989506662, 0.012391841039061546, 3.4719245433807373,
+              -0.0024825239088386297, -0.11508434265851974, 0.40480512380599976, 0.4639693796634674, 1.0610097646713257,
+              -0.3626452386379242, -0.18480324745178223, 0.2711804509162903, 0.21260038018226624, -0.02785545215010643,
+              -0.11340377479791641, -0.027071641758084297, -0.22035427391529083, -0.10667331516742706,
+              0.16908612847328186, 0.10428506135940552, 0.1233300194144249, -0.06643304973840714, 0.2004912942647934,
+              0.09342359751462936, 0.03175133094191551, -1.5805491209030151, -0.4311752915382385, 0.5455954074859619,
+              0.15516425669193268, -0.04940091818571091, -0.1447768211364746, 0.21044854819774628, 4.243553161621094,
+              0.2046128362417221, -0.2688649892807007, 8.694140434265137, 1.6753796339035034, 0.05555065721273422,
+              -0.16497156023979187, 0.1828891634941101, 1.505862832069397, 0.08261094242334366, 3.104039430618286,
+              5.931700706481934, 0.4487259089946747, -0.011016261763870716, 0.012441087514162064, -0.5082470774650574,
+              -0.11641799658536911, 0.01356368139386177, -0.01659458689391613, 7.667582988739014, -0.346441388130188,
+              -5.981383323669434, 7.6806206703186035, -0.0383722260594368, 0.4603561460971832, -0.16010521352291107,
+              -3.173022985458374, -0.4581749737262726, 0.06485684961080551, -0.4382486939430237, 0.25564998388290405,
+              0.21537242829799652, 8.642731666564941, -0.00621763477101922, 1.9488575458526611, -0.030582817271351814,
+              0.00024394349020440131, -0.015434199944138527, 1.1591683626174927, 0.29453498125076294,
+              -0.06397286057472229, 0.2931598126888275, 3.056126594543457, 0.35364240407943726, -0.07242457568645477,
+              0.19602720439434052, 3.9850471019744873, -0.141653373837471, 0.7269659042358398, 0.020487932488322258,
+              -1.1716344356536865, -0.13872867822647095, 0.004658155608922243, 0.0002524183946661651,
+              -0.3965473771095276, 0.058615222573280334, -0.005210256204009056, -6.64661169052124,
+              -0.003978024236857891, -0.11946024745702744, -0.15917553007602692, -2.0323069095611572, 3.694988250732422,
+              -0.2484176903963089, 1.117063283920288, 0.04259220138192177, 5.167333126068115, -4.669308185577393,
+              0.0845528095960617, 0.011936561204493046, 5.875088691711426, -0.032051537185907364, -1.2815053462982178,
+              -0.010812301188707352, -0.021371034905314445, -0.0037929099053144455, 0.09110360592603683,
+              -0.2871546745300293, -0.2895969748497009, 0.2745248079299927, -2.462489366531372, -1.4751100540161133,
+              0.6493473052978516, 0.105231374502182, -0.11311662197113037, 5.5921311378479, -0.11590596288442612,
+              -4.284017562866211, -3.0435032844543457, 0.2767241597175598, -0.2098703682422638, -0.008056361228227615,
+              3.738365888595581, 0.03918733820319176, 0.5250627994537354, -0.03754068538546562, -1.1869362592697144,
+              0.0016376300482079387, -0.19201913475990295, -0.12353025376796722, -0.038338642567396164,
+              -0.5192811489105225, -0.07935589551925659, 2.1531660556793213, -0.002360888756811619, 0.3615436553955078,
+              -1.499021291732788, 0.6402538418769836, -2.886809825897217, 2.502922296524048, -0.0014745928347110748,
+              -0.09228605031967163, -0.14953434467315674, 0.2779182493686676, 2.071781635284424, -0.2248198240995407,
+              0.5830495357513428, -0.1257641464471817, -0.06734845042228699, 0.003910396713763475, -1.285413146018982,
+              -5.392889976501465, -0.0003311980399303138, 8.632763862609863, 0.1819709688425064, -0.013432486914098263,
+              -0.019152071326971054, -0.026376325637102127
+            ],
+            "dims": [1, 1, 1280],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  }
+]
diff --git a/js/web/test/data/ops/conv-transpose.jsonc b/js/web/test/data/ops/conv-transpose.jsonc
index a249dc807fa0b..7038e2a4f8766 100644
--- a/js/web/test/data/ops/conv-transpose.jsonc
+++ b/js/web/test/data/ops/conv-transpose.jsonc
@@ -28,6 +28,37 @@
       }
     ]
   },
+  {
+    "name": "ConvTranspose without bias addition A - NHWC",
+    "inputShapeDefinitions": "rankOnly",
+    "opset": { "domain": "", "version": 17 },
+    "operator": "ConvTranspose",
+    "attributes": [{ "name": "kernel_shape", "data": [2, 2], "type": "ints" }],
+    "cases": [
+      {
+        "name": "T[0]",
+        "inputs": [
+          {
+            "data": [10, 20, 30, 40],
+            "dims": [1, 1, 2, 2],
+            "type": "float32"
+          },
+          {
+            "data": [1, 2, 3, 4],
+            "dims": [1, 1, 2, 2],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [10, 40, 40, 60, 200, 160, 90, 240, 160],
+            "dims": [1, 1, 3, 3],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  },
   {
     "name": "ConvTranspose without bias addition B",
     "operator": "ConvTranspose",
@@ -74,26 +105,22 @@
           },
           {
             "data": [
-              1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-              1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+              1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+              30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
+              56, 57, 58, 59, 60, 61, 62, 63, 64
             ],
             "dims": [4, 4, 2, 2],
             "type": "float32"
           },
           {
-            "data": [0.1, 0.2, 0.3, 0.4],
+            "data": [65, 66, 67, 68],
             "dims": [4],
             "type": "float32"
           }
         ],
         "outputs": [
           {
-            "data": [
-              100.0999984741211, 100.0999984741211, 100.0999984741211, 100.0999984741211, 100.19999694824219,
-              100.19999694824219, 100.19999694824219, 100.19999694824219, 100.30000305175781, 100.30000305175781,
-              100.30000305175781, 100.30000305175781, 100.4000015258789, 100.4000015258789, 100.4000015258789,
-              100.4000015258789
-            ],
+            "data": [3365, 3465, 3565, 3665, 3766, 3866, 3966, 4066, 4167, 4267, 4367, 4467, 4568, 4668, 4768, 4868],
             "dims": [1, 4, 2, 2],
             "type": "float32"
           }
@@ -115,7 +142,43 @@
             "type": "float32"
           },
           {
-            "data": [1, 1, 1, 1],
+            "data": [1, 2, 3, 4],
+            "dims": [1, 1, 2, 2],
+            "type": "float32"
+          },
+          {
+            "data": [5],
+            "dims": [1],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [11, 25, 28, 19, 32, 86, 99, 55, 40, 114, 131, 67, 29, 73, 80, 41],
+            "dims": [1, 1, 4, 4],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "name": "ConvTranspose with bias addition B - NHWC",
+    "operator": "ConvTranspose",
+    "inputShapeDefinitions": "rankOnly",
+    "opset": { "domain": "", "version": 17 },
+    "attributes": [{ "name": "kernel_shape", "data": [2, 2], "type": "ints" }],
+    "cases": [
+      {
+        "name": "T[0]",
+        "inputs": [
+          {
+            "data": [6, 8, 7, 9, 15, 11, 8, 12, 9],
+            "dims": [1, 1, 3, 3],
+            "type": "float32"
+          },
+          {
+            "data": [1, 2, 3, 4],
             "dims": [1, 1, 2, 2],
             "type": "float32"
           },
@@ -127,7 +190,7 @@
         ],
         "outputs": [
           {
-            "data": [11, 19, 20, 12, 20, 43, 46, 23, 22, 49, 52, 25, 13, 25, 26, 14],
+            "data": [11, 25, 28, 19, 32, 86, 99, 55, 40, 114, 131, 67, 29, 73, 80, 41],
             "dims": [1, 1, 4, 4],
             "type": "float32"
           }
@@ -251,7 +314,6 @@
       }
     ]
   },
-
   {
     "name": "ConvTranspose- pointwise",
     "operator": "ConvTranspose",
@@ -285,5 +347,50 @@
         ]
       }
     ]
+  },
+  {
+    "name": "ConvTranspose with bias addition C",
+    "operator": "ConvTranspose",
+    "inputShapeDefinitions": "rankOnly",
+    "opset": { "domain": "", "version": 17 },
+    "attributes": [{ "name": "kernel_shape", "data": [1, 1], "type": "ints" }],
+    "cases": [
+      {
+        "name": "T[0]",
+        "inputs": [
+          {
+            "data": [
+              1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+              30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
+              56, 57, 58, 59, 60, 61, 62, 63, 64
+            ],
+            "dims": [1, 4, 4, 4],
+            "type": "float32"
+          },
+          {
+            "data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+            "dims": [4, 4, 1, 1],
+            "type": "float32"
+          },
+          {
+            "data": [1, 2, 3, 4],
+            "dims": [4],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [
+              1021, 1049, 1077, 1105, 1133, 1161, 1189, 1217, 1245, 1273, 1301, 1329, 1357, 1385, 1413, 1441, 1122,
+              1154, 1186, 1218, 1250, 1282, 1314, 1346, 1378, 1410, 1442, 1474, 1506, 1538, 1570, 1602, 1223, 1259,
+              1295, 1331, 1367, 1403, 1439, 1475, 1511, 1547, 1583, 1619, 1655, 1691, 1727, 1763, 1324, 1364, 1404,
+              1444, 1484, 1524, 1564, 1604, 1644, 1684, 1724, 1764, 1804, 1844, 1884, 1924
+            ],
+            "dims": [1, 4, 4, 4],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
   }
 ]
diff --git a/js/web/test/data/ops/conv.jsonc b/js/web/test/data/ops/conv.jsonc
index 928192bb219f2..2e8eaaba191d0 100644
--- a/js/web/test/data/ops/conv.jsonc
+++ b/js/web/test/data/ops/conv.jsonc
@@ -125,6 +125,72 @@
       }
     ]
   },
+  {
+    "name": "conv with bias addition C - NHWC",
+    "operator": "Conv",
+    "inputShapeDefinitions": "rankOnly",
+    "opset": { "domain": "", "version": 17 },
+    "attributes": [{ "name": "kernel_shape", "data": [2, 2], "type": "ints" }],
+    "cases": [
+      {
+        "name": "T[0]",
+        "inputs": [
+          {
+            "data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
+            "dims": [3, 1, 2, 2],
+            "type": "float32"
+          },
+          {
+            "data": [1, 1, 1, 1, 2, 3, 4, 5],
+            "dims": [2, 1, 2, 2],
+            "type": "float32"
+          },
+          {
+            "data": [5, 6],
+            "dims": [2],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [15, 46, 31, 102, 47, 158],
+            "dims": [3, 2, 1, 1],
+            "type": "float32"
+          }
+        ]
+      },
+      {
+        "name": "inChannel = 3, outChannel = 4",
+        "inputs": [
+          {
+            "data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 10],
+            "dims": [1, 3, 3, 3],
+            "type": "float32"
+          },
+          {
+            "data": [
+              1, 1, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 1, 1, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+              10, 11, 12, 13, 14, 15, 16, 17, 1, 2, 3, 4, 5, 6, 7, 8
+            ],
+            "dims": [4, 3, 2, 2],
+            "type": "float32"
+          },
+          {
+            "data": [5, 6, 7, 8],
+            "dims": [4],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [360, 334, 271, 323, 909, 963, 1024, 1028, 683, 655, 576, 650, 473, 508, 570, 677],
+            "dims": [1, 4, 2, 2],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  },
   {
     "name": "conv - group - A",
     "operator": "Conv",
diff --git a/js/web/test/data/ops/expand.jsonc b/js/web/test/data/ops/expand.jsonc
index 460122b4e085c..35888e2fc3709 100644
--- a/js/web/test/data/ops/expand.jsonc
+++ b/js/web/test/data/ops/expand.jsonc
@@ -85,5 +85,34 @@
         ]
       }
     ]
+  },
+  {
+    "name": "Expand 5D - float32",
+    "operator": "Expand",
+    "attributes": [],
+    "cases": [
+      {
+        "name": "Expand 5 - float32",
+        "inputs": [
+          {
+            "data": [1],
+            "dims": [1, 1, 1, 1, 1],
+            "type": "float32"
+          },
+          {
+            "data": [1, 1, 1, 1, 6],
+            "dims": [5],
+            "type": "int64"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [1, 1, 1, 1, 1, 1],
+            "dims": [1, 1, 1, 1, 6],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
   }
 ]
diff --git a/js/web/test/data/ops/fused-conv.jsonc b/js/web/test/data/ops/fused-conv.jsonc
new file mode 100644
index 0000000000000..812e9d7c2def0
--- /dev/null
+++ b/js/web/test/data/ops/fused-conv.jsonc
@@ -0,0 +1,112 @@
+[
+  {
+    "name": "conv without bias addition A",
+    "operator": "FusedConv",
+    "attributes": [
+      { "name": "activation", "data": "Relu", "type": "string" },
+      { "name": "kernel_shape", "data": [2, 2], "type": "ints" }
+    ],
+    "opset": { "domain": "com.microsoft", "version": 1 },
+    "cases": [
+      {
+        "name": "T[0]",
+        "inputs": [
+          {
+            "data": [10, 20, 30, 40, 50, 60, 70, 80, 90],
+            "dims": [1, 1, 3, 3],
+            "type": "float32"
+          },
+          {
+            "data": [1, 2, 3, 4],
+            "dims": [1, 1, 2, 2],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [370, 470, 670, 770],
+            "dims": [1, 1, 2, 2],
+            "type": "float32"
+          }
+        ]
+      },
+      {
+        "name": "T[1]",
+        "inputs": [
+          {
+            "data": [10, 20, -30, -40, -50, -60, 70, 80, 90],
+            "dims": [1, 1, 3, 3],
+            "type": "float32"
+          },
+          {
+            "data": [1, 2, 3, 4],
+            "dims": [1, 1, 2, 2],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [0, 0, 390, 430],
+            "dims": [1, 1, 2, 2],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "name": "NHWC conv without bias addition A",
+    "operator": "Conv",
+    "attributes": [
+      { "name": "activation", "data": "Relu", "type": "string" },
+      { "name": "kernel_shape", "data": [2, 2], "type": "ints" }
+    ],
+    "opset": { "domain": "com.ms.internal.nhwc", "version": 11 },
+    "cases": [
+      {
+        "name": "T[2]",
+        "inputs": [
+          {
+            "data": [10, 20, 30, 40, 50, 60, 70, 80, 90],
+            "dims": [1, 3, 3, 1],
+            "type": "float32"
+          },
+          {
+            "data": [1, 2, 3, 4],
+            "dims": [1, 1, 2, 2],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [370, 470, 670, 770],
+            "dims": [1, 2, 2, 1],
+            "type": "float32"
+          }
+        ]
+      },
+      {
+        "name": "T[3]",
+        "inputs": [
+          {
+            "data": [10, 20, -30, -40, -50, -60, 70, 80, 90],
+            "dims": [1, 3, 3, 1],
+            "type": "float32"
+          },
+          {
+            "data": [1, 2, 3, 4],
+            "dims": [1, 1, 2, 2],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [0, 0, 390, 430],
+            "dims": [1, 2, 2, 1],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  }
+]
diff --git a/js/web/test/data/ops/multi-head-attention.jsonc b/js/web/test/data/ops/multi-head-attention.jsonc
new file mode 100644
index 0000000000000..05687bd482e24
--- /dev/null
+++ b/js/web/test/data/ops/multi-head-attention.jsonc
@@ -0,0 +1,194 @@
+[
+  {
+    "name": "MultiHeadAttention Basic, one head",
+    "operator": "MultiHeadAttention",
+    "opset": { "domain": "com.microsoft", "version": 1 },
+    "attributes": [{ "name": "num_heads", "data": 1, "type": "int" }],
+    "cases": [
+      {
+        "name": "T[0]",
+        "inputs": [
+          {
+            "data": [1, 2, 3, 4, 5, 6, 7, 8],
+            "dims": [1, 2, 4],
+            "type": "float32"
+          },
+          {
+            "data": [1, 1, 1, 1, 2, 2, 2, 2],
+            "dims": [1, 2, 4],
+            "type": "float32"
+          },
+          {
+            "data": [1, 2, 3, 4, 5, 6, 7, 8],
+            "dims": [1, 2, 4],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [
+              4.973228454589844, 5.973228454589844, 6.973228454589844, 7.973228454589844, 4.999990940093994,
+              5.999990940093994, 6.999990940093994, 7.999990940093994
+            ],
+            "dims": [1, 2, 4],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "name": "MultiHeadAttention Basic",
+    "operator": "MultiHeadAttention",
+    "opset": { "domain": "com.microsoft", "version": 1 },
+    "attributes": [{ "name": "num_heads", "data": 2, "type": "int" }],
+    "cases": [
+      {
+        "name": "T[0]",
+        "inputs": [
+          {
+            "data": [1, 2, 3, 4, 5, 6, 7, 8],
+            "dims": [1, 2, 4],
+            "type": "float32"
+          },
+          {
+            "data": [1, 1, 1, 1, 2, 2, 2, 2],
+            "dims": [1, 2, 4],
+            "type": "float32"
+          },
+          {
+            "data": [1, 2, 3, 4, 5, 6, 7, 8],
+            "dims": [1, 2, 4],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [
+              4.571832656860352, 5.571832656860352, 6.971858501434326, 7.971858501434326, 4.998325824737549,
+              5.998325824737549, 6.999900817871094, 7.999900817871094
+            ],
+            "dims": [1, 2, 4],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "name": "MultiHeadAttention Basic with bias",
+    "operator": "MultiHeadAttention",
+    "opset": { "domain": "com.microsoft", "version": 1 },
+    "attributes": [{ "name": "num_heads", "data": 2, "type": "int" }],
+    "cases": [
+      {
+        "name": "T[0]",
+        "inputs": [
+          {
+            "data": [1, 2, 3, 4, 5, 6, 7, 8],
+            "dims": [1, 2, 4],
+            "type": "float32"
+          },
+          {
+            "data": [1, 1, 1, 1, 2, 2, 2, 2],
+            "dims": [1, 2, 4],
+            "type": "float32"
+          },
+          {
+            "data": [1, 2, 3, 4, 5, 6, 7, 8],
+            "dims": [1, 2, 4],
+            "type": "float32"
+          },
+          {
+            "data": [1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4],
+            "dims": [12],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [
+              5.943336009979248, 7.94333553314209, 9.999799728393555, 11.999798774719238, 5.9997992515563965,
+              7.9997992515563965, 10, 11.999999046325684
+            ],
+            "dims": [1, 2, 4],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "name": "MultiHeadAttention two heads",
+    "operator": "MultiHeadAttention",
+    "opset": { "domain": "com.microsoft", "version": 1 },
+    "attributes": [{ "name": "num_heads", "data": 2, "type": "int" }],
+    "cases": [
+      {
+        "name": "T[0]",
+        "inputs": [
+          {
+            "data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+            "dims": [1, 2, 8],
+            "type": "float32"
+          },
+          {
+            "data": [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4],
+            "dims": [1, 2, 8],
+            "type": "float32"
+          },
+          {
+            "data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+            "dims": [1, 2, 8],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [
+              8.99963665008545, 9.99963665008545, 10.99963665008545, 11.999635696411133, 13, 14, 15, 16, 9, 10, 11, 12,
+              13, 14, 15, 16
+            ],
+            "dims": [1, 2, 8],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "name": "MultiHeadAttention two heads",
+    "operator": "MultiHeadAttention",
+    "opset": { "domain": "com.microsoft", "version": 1 },
+    "attributes": [{ "name": "num_heads", "data": 2, "type": "int" }],
+    "cases": [
+      {
+        "name": "T[1]",
+        "inputs": [
+          {
+            "data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+            "dims": [1, 2, 8],
+            "type": "float32"
+          },
+          {
+            "data": [1, 1, 1, 1, 2, 2, 2, 2],
+            "dims": [1, 1, 8],
+            "type": "float32"
+          },
+          {
+            "data": [1, 2, 3, 4, 5, 6, 7, 8],
+            "dims": [1, 1, 8],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8],
+            "dims": [1, 2, 8],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  }
+]
diff --git a/js/web/test/data/ops/slice.jsonc b/js/web/test/data/ops/slice.jsonc
index 9c90817a80c36..beef154a29932 100644
--- a/js/web/test/data/ops/slice.jsonc
+++ b/js/web/test/data/ops/slice.jsonc
@@ -21,6 +21,29 @@
       }
     ]
   },
+  {
+    "name": "Slice float32 with input[0] dim > 4",
+    "operator": "Slice",
+    "attributes": [],
+    "cases": [
+      {
+        "name": "T[1, 1, 1, 1, 5] T[1] T[1] T[1] (float32)",
+        "inputs": [
+          {
+            "data": [
+              0.3964604139328003, -0.8916832804679871, -1.6578896045684814, 1.960708737373352, 1.181204915046692
+            ],
+            "dims": [1, 1, 1, 1, 5],
+            "type": "float32"
+          },
+          { "data": [3], "dims": [1], "type": "int64" },
+          { "data": [4], "dims": [1], "type": "int64" },
+          { "data": [4], "dims": [1], "type": "int64" }
+        ],
+        "outputs": [{ "data": [1.960708737373352], "dims": [1, 1, 1, 1, 1], "type": "float32" }]
+      }
+    ]
+  },
   {
     "name": "Slice int32",
     "operator": "Slice",
diff --git a/js/web/test/data/ops/softmax.jsonc b/js/web/test/data/ops/softmax.jsonc
index 85d4096ee0493..98573fcd73ba2 100644
--- a/js/web/test/data/ops/softmax.jsonc
+++ b/js/web/test/data/ops/softmax.jsonc
@@ -5,7 +5,7 @@
     "attributes": [],
     "cases": [
       {
-        "name": "T[2,4]",
+        "name": "T[2,2]",
         "inputs": [
           {
             "data": [1.0, 2.0, 3.0, 4.0],
@@ -22,5 +22,32 @@
         ]
       }
     ]
+  },
+  {
+    "name": "Softmax with no attributes",
+    "operator": "Softmax",
+    "attributes": [],
+    "cases": [
+      {
+        "name": "T[2, 2, 2]",
+        "inputs": [
+          {
+            "data": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
+            "dims": [2, 2, 2],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [
+              0.2689414322376251, 0.7310585975646973, 0.2689414322376251, 0.7310585975646973, 0.2689414322376251,
+              0.7310585975646973, 0.2689414322376251, 0.7310585975646973
+            ],
+            "dims": [2, 2, 2],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
   }
 ]
diff --git a/js/web/test/data/ops/transpose.jsonc b/js/web/test/data/ops/transpose.jsonc
index 285d14018e74d..e1edfa7e41513 100644
--- a/js/web/test/data/ops/transpose.jsonc
+++ b/js/web/test/data/ops/transpose.jsonc
@@ -166,5 +166,29 @@
         ]
       }
     ]
+  },
+  {
+    "name": "Transpose 5D - perms:[4, 3, 1, 0, 2]",
+    "operator": "Transpose",
+    "attributes": [{ "name": "perm", "data": [4, 3, 1, 0, 2], "type": "ints" }],
+    "cases": [
+      {
+        "name": "T[3, 1, 2, 1, 4]",
+        "inputs": [
+          {
+            "data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24],
+            "dims": [3, 1, 2, 1, 4],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [1, 5, 9, 13, 17, 21, 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23, 4, 8, 12, 16, 20, 24],
+            "dims": [4, 1, 1, 3, 2],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
   }
 ]
diff --git a/js/web/test/data/ops/where.jsonc b/js/web/test/data/ops/where.jsonc
new file mode 100644
index 0000000000000..047fd6fd7511b
--- /dev/null
+++ b/js/web/test/data/ops/where.jsonc
@@ -0,0 +1,172 @@
+[
+  {
+    "name": "Where with no attributes",
+    "operator": "Where",
+    "attributes": [],
+    "cases": [
+      {
+        "name": "T[3] T[3] T[3] float32 T[3] ",
+        "inputs": [
+          {
+            "data": [true, false, true, false, true, false, true, false],
+            "dims": [8],
+            "type": "bool"
+          },
+          {
+            "data": [4.0, 8.0, 7.0, 2.0, 4.0, 8.0, 7.0, 1.0],
+            "dims": [8],
+            "type": "float32"
+          },
+          {
+            "data": [1.0, 3.0, 9.0, 6.0, 1.0, 3.0, 9.0, 2.0],
+            "dims": [8],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [4.0, 3.0, 7.0, 6.0, 4.0, 3.0, 7.0, 2.0],
+            "dims": [8],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "name": "Where with no attributes",
+    "operator": "Where",
+    "attributes": [],
+    "cases": [
+      {
+        "name": "T[3] T[3] T[3] int32 T[3] ",
+        "inputs": [
+          {
+            "data": [true, false, true, false, true, false, true, false],
+            "dims": [8],
+            "type": "bool"
+          },
+          {
+            "data": [4, 8, 7, 2, 4, 8, 7, 1],
+            "dims": [8],
+            "type": "int32"
+          },
+          {
+            "data": [1, 3, 9, 6, 1, 3, 9, 2],
+            "dims": [8],
+            "type": "int32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [4, 3, 7, 6, 4, 3, 7, 2],
+            "dims": [8],
+            "type": "int32"
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "name": "Where with no attributes",
+    "operator": "Where",
+    "attributes": [],
+    "cases": [
+      {
+        "name": "T[3] T[3] T[3] uint32 T[3] ",
+        "inputs": [
+          {
+            "data": [true, false, true, false, true, false, true, false],
+            "dims": [8],
+            "type": "bool"
+          },
+          {
+            "data": [4, 8, 7, 2, 4, 8, 7, 1],
+            "dims": [8],
+            "type": "uint32"
+          },
+          {
+            "data": [1, 4294967295, 9, 6, 1, 3, 9, 2],
+            "dims": [8],
+            "type": "uint32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [4, 4294967295, 7, 6, 4, 3, 7, 2],
+            "dims": [8],
+            "type": "uint32"
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "name": "Where with no attributes",
+    "operator": "Where",
+    "attributes": [],
+    "cases": [
+      {
+        "name": "T[3] T[3] T[3] bool T[3] ",
+        "inputs": [
+          {
+            "data": [true, false, true, false, true, false, true, false],
+            "dims": [8],
+            "type": "bool"
+          },
+          {
+            "data": [true, true, true, true, true, true, true, true],
+            "dims": [8],
+            "type": "float32"
+          },
+          {
+            "data": [true, false, true, false, true, false, true, false],
+            "dims": [8],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [true, false, true, false, true, false, true, false],
+            "dims": [8],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "name": "Where with no attributes",
+    "operator": "Where",
+    "attributes": [],
+    "cases": [
+      {
+        "name": "T[3 3] T[3 3] T[1] float32 broadcast",
+        "inputs": [
+          {
+            "data": [true, true, true, true, true, false, false, false, false],
+            "dims": [3, 3],
+            "type": "bool"
+          },
+          {
+            "data": [0, 1, 2, 3, 4, 5, 6, 7, 8],
+            "dims": [3, 3],
+            "type": "float32"
+          },
+          {
+            "data": [-1.0],
+            "dims": [1],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [0, 1, 2, 3, 4, -1, -1, -1, -1],
+            "dims": [3, 3],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  }
+]
diff --git a/js/web/test/data/ops/where_broadcast.jsonc b/js/web/test/data/ops/where_broadcast.jsonc
new file mode 100644
index 0000000000000..ad97177bb101b
--- /dev/null
+++ b/js/web/test/data/ops/where_broadcast.jsonc
@@ -0,0 +1,84 @@
+[
+  {
+    "name": "Where with no attributes",
+    "operator": "Where",
+    "attributes": [],
+    "cases": [
+      {
+        // This failed due to: https://github.com/microsoft/onnxruntime/issues/17405.
+        "name": "T[3 6] T[3 6] T[1] float32 broadcast",
+        "inputs": [
+          {
+            "data": [
+              true,
+              true,
+              true,
+              true,
+              true,
+              false,
+              false,
+              false,
+              false,
+              false,
+              false,
+              true,
+              true,
+              true,
+              true,
+              true,
+              true,
+              true
+            ],
+            "dims": [3, 6],
+            "type": "bool"
+          },
+          {
+            "data": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17],
+            "dims": [3, 6],
+            "type": "float32"
+          },
+          {
+            "data": [-1.0],
+            "dims": [1],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [0, 1, 2, 3, 4, -1, -1, -1, -1, -1, -1, 11, 12, 13, 14, 15, 16, 17],
+            "dims": [3, 6],
+            "type": "float32"
+          }
+        ]
+      },
+      {
+        // This failed due to: https://github.com/microsoft/onnxruntime/issues/17405.
+        "name": "T[3 1] T[3 6] T[1] float32 broadcast",
+        "inputs": [
+          {
+            "data": [true, false, true],
+            "dims": [3, 1],
+            "type": "bool"
+          },
+          {
+            "data": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17],
+            "dims": [3, 6],
+            "type": "float32"
+          },
+          {
+            "data": [-1.0],
+            "dims": [1],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [0, 1, 2, 3, 4, 5, -1, -1, -1, -1, -1, -1, 12, 13, 14, 15, 16, 17],
+            "dims": [3, 6],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  }
+]
diff --git a/js/web/test/suite-test-list.jsonc b/js/web/test/suite-test-list.jsonc
index 6e65645ef4756..a313adef7151b 100644
--- a/js/web/test/suite-test-list.jsonc
+++ b/js/web/test/suite-test-list.jsonc
@@ -885,10 +885,10 @@
       // // "test_qlinearmatmul_3D",
       // // "test_quantizelinear_axis",
       // // "test_quantizelinear",
-      // "test_range_float_type_positive_delta_expanded",
-      // "test_range_float_type_positive_delta",
-      // "test_range_int32_type_negative_delta_expanded",
-      // "test_range_int32_type_negative_delta",
+      "test_range_float_type_positive_delta_expanded",
+      "test_range_float_type_positive_delta",
+      "test_range_int32_type_negative_delta_expanded",
+      "test_range_int32_type_negative_delta",
       "test_reciprocal_example",
       "test_reciprocal",
       "test_reduce_l1_default_axes_keepdims_example",
@@ -1336,6 +1336,10 @@
       "add_int32.jsonc",
       //"and.jsonc",
       "asin.jsonc",
+      "attention.jsonc",
+      "batch-norm.jsonc",
+      "bias-add.jsonc",
+      "bias-split-gelu.jsonc",
       "ceil.jsonc",
       "concat.jsonc",
       "concat_int32.jsonc",
@@ -1360,6 +1364,7 @@
       "matmul-broadcast.jsonc",
       "mul.jsonc",
       "mul_int32.jsonc",
+      "multi-head-attention.jsonc",
       //"neg.jsonc",
       "neg-int32.jsonc",
       "not.jsonc",
@@ -1386,7 +1391,10 @@
       "tan.jsonc",
       "tile.jsonc",
       "transpose.jsonc",
-      "transpose_int32_uint32.jsonc"
+      "transpose_int32_uint32.jsonc",
+      "where.jsonc"
+      // Turn on this when https://github.com/microsoft/onnxruntime/issues/17405 is fixed.
+      //"where_broadcast.jsonc",
       //"xor.jsonc"
     ]
   },
diff --git a/js/web/test/test-main.ts b/js/web/test/test-main.ts
index 49d0ac225be2f..24ab0694b32b8 100644
--- a/js/web/test/test-main.ts
+++ b/js/web/test/test-main.ts
@@ -1,9 +1,10 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-// Load onnxruntime-web and testdata-config.
+// Load onnxruntime-common and testdata-config.
 // NOTE: this need to be called before import any other library.
-const ort = require('..');
+import * as ort from 'onnxruntime-common';
+
 const ORT_WEB_TEST_CONFIG = require('./testdata-config.json') as Test.Config;
 
 import * as platform from 'platform';
@@ -57,6 +58,9 @@ if (options.globalEnvFlags) {
   if (flags.webgpu?.profilingMode !== undefined) {
     ort.env.webgpu.profilingMode = flags.webgpu.profilingMode;
   }
+  if (flags.webgpu?.validateInputContent !== undefined) {
+    ort.env.webgpu.validateInputContent = flags.webgpu.validateInputContent;
+  }
 }
 
 // Set logging configuration
diff --git a/js/web/test/test-runner.ts b/js/web/test/test-runner.ts
index 46d80a9f56f35..29acc07e118f9 100644
--- a/js/web/test/test-runner.ts
+++ b/js/web/test/test-runner.ts
@@ -14,7 +14,8 @@ import {Operator} from '../lib/onnxjs/operators';
 import {onnx} from '../lib/onnxjs/ort-schema/protobuf/onnx';
 import {Tensor} from '../lib/onnxjs/tensor';
 import {ProtoUtil} from '../lib/onnxjs/util';
-import {tensorDataTypeStringToEnum} from '../lib/wasm/wasm-common';
+import {createView} from '../lib/wasm/jsep/tensor-view';
+import {getTensorElementSize, isGpuBufferSupportedType, tensorDataTypeStringToEnum} from '../lib/wasm/wasm-common';
 
 import {base64toBuffer, createMockGraph, readFile} from './test-shared';
 import {Test} from './test-types';
@@ -136,8 +137,8 @@ async function loadTensors(
 }
 
 async function initializeSession(
-    modelFilePath: string, backendHint: string, profile: boolean, sessionOptions: ort.InferenceSession.SessionOptions,
-    fileCache?: FileCacheBuffer): Promise<ort.InferenceSession> {
+    modelFilePath: string, backendHint: string, ioBindingMode: Test.IOBindingMode, profile: boolean,
+    sessionOptions: ort.InferenceSession.SessionOptions, fileCache?: FileCacheBuffer): Promise<ort.InferenceSession> {
   const preloadModelData: Uint8Array|undefined =
       fileCache && fileCache[modelFilePath] ? fileCache[modelFilePath] : undefined;
   Logger.verbose(
@@ -146,8 +147,14 @@ async function initializeSession(
           preloadModelData ? ` [preloaded(${preloadModelData.byteLength})]` : ''}`);
 
   const profilerConfig = profile ? {maxNumberEvents: 65536} : undefined;
-  const sessionConfig =
-      {...sessionOptions, executionProviders: [backendHint], profiler: profilerConfig, enableProfiling: profile};
+  const sessionConfig = {
+    ...sessionOptions,
+    executionProviders: [backendHint],
+    profiler: profilerConfig,
+    enableProfiling: profile,
+    preferredOutputLocation: ioBindingMode === 'gpu-location' ? ('gpu-buffer' as const) : undefined
+  };
+
   let session: ort.InferenceSession;
 
   try {
@@ -157,7 +164,10 @@ async function initializeSession(
       session = await ort.InferenceSession.create(modelFilePath, sessionConfig);
     }
   } catch (e) {
-    Logger.error('TestRunner', `Failed to load model from file: ${modelFilePath}. Error: ${inspect(e)}`);
+    Logger.error(
+        'TestRunner',
+        `Failed to load model from file: ${modelFilePath}. ` +
+            `Error: ${e.message} @ ${e.fileName}:${e.lineNumber}`);
     throw e;
   }
 
@@ -181,6 +191,7 @@ export class ModelTestContext {
       readonly session: ort.InferenceSession,
       readonly backend: string,
       readonly perfData: ModelTestContext.ModelTestPerfData,
+      readonly ioBinding: Test.IOBindingMode,
       private readonly profile: boolean,
   ) {}
 
@@ -232,8 +243,8 @@ export class ModelTestContext {
       this.initializing = true;
 
       const initStart = now();
-      const session =
-          await initializeSession(modelTest.modelUrl, modelTest.backend!, profile, sessionOptions || {}, this.cache);
+      const session = await initializeSession(
+          modelTest.modelUrl, modelTest.backend!, modelTest.ioBinding, profile, sessionOptions || {}, this.cache);
       const initEnd = now();
 
       for (const testCase of modelTest.cases) {
@@ -244,6 +255,7 @@ export class ModelTestContext {
           session,
           modelTest.backend!,
           {init: initEnd - initStart, firstRun: -1, runs: [], count: 0},
+          modelTest.ioBinding,
           profile,
       );
     } finally {
@@ -481,6 +493,130 @@ export class TensorResultValidator {
   }
 }
 
+function createGpuTensorForInput(cpuTensor: ort.Tensor): ort.Tensor {
+  if (!isGpuBufferSupportedType(cpuTensor.type) || Array.isArray(cpuTensor.data)) {
+    throw new Error(`createGpuTensorForInput can not work with ${cpuTensor.type} tensor`);
+  }
+  const device = ort.env.webgpu.device as GPUDevice;
+  const gpuBuffer = device.createBuffer({
+    // eslint-disable-next-line no-bitwise
+    usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE,
+    size: Math.ceil(cpuTensor.data.byteLength / 16) * 16,
+    mappedAtCreation: true
+  });
+  const arrayBuffer = gpuBuffer.getMappedRange();
+  new Uint8Array(arrayBuffer)
+      .set(new Uint8Array(cpuTensor.data.buffer, cpuTensor.data.byteOffset, cpuTensor.data.byteLength));
+  gpuBuffer.unmap();
+
+  // TODO: how to "await" for the copy to finish, so that we can get more accurate performance data?
+
+  return ort.Tensor.fromGpuBuffer(
+      gpuBuffer, {dataType: cpuTensor.type, dims: cpuTensor.dims, dispose: () => gpuBuffer.destroy()});
+}
+
+function createGpuTensorForOutput(type: ort.Tensor.Type, dims: readonly number[]) {
+  if (!isGpuBufferSupportedType(type)) {
+    throw new Error(`createGpuTensorForOutput can not work with ${type} tensor`);
+  }
+
+  const elementSizeInBytes = getTensorElementSize(tensorDataTypeStringToEnum(type))!;
+  const size = dims.reduce((a, b) => a * b, 1) * elementSizeInBytes;
+
+  const device = ort.env.webgpu.device as GPUDevice;
+  const gpuBuffer = device.createBuffer({
+    // eslint-disable-next-line no-bitwise
+    usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE,
+    size: Math.ceil(size / 16) * 16
+  });
+
+  return ort.Tensor.fromGpuBuffer(gpuBuffer, {
+    dataType: type,
+    dims,
+    dispose: () => gpuBuffer.destroy(),
+    download: async () => {
+      const stagingBuffer = device.createBuffer({
+        // eslint-disable-next-line no-bitwise
+        usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST,
+        size: gpuBuffer.size
+      });
+      const encoder = device.createCommandEncoder();
+      encoder.copyBufferToBuffer(gpuBuffer, 0, stagingBuffer, 0, gpuBuffer.size);
+      device.queue.submit([encoder.finish()]);
+
+      await stagingBuffer.mapAsync(GPUMapMode.READ);
+      const arrayBuffer = stagingBuffer.getMappedRange().slice(0, size);
+      stagingBuffer.destroy();
+
+      return createView(arrayBuffer, type) as ort.Tensor.DataTypeMap[ort.Tensor.GpuBufferDataTypes];
+    }
+  });
+}
+
+export async function sessionRun(options: {
+  session: ort.InferenceSession; feeds: Record<string, ort.Tensor>;
+  outputsMetaInfo: Record<string, Pick<ort.Tensor, 'dims'|'type'>>;
+  ioBinding: Test.IOBindingMode;
+}): Promise<[number, number, ort.InferenceSession.OnnxValueMapType]> {
+  const session = options.session;
+  const feeds = options.feeds;
+  const fetches: Record<string, ort.Tensor> = {};
+
+  // currently we only support IO Binding for WebGPU
+  //
+  // For inputs, we create GPU tensors on both 'gpu-tensor' and 'gpu-location' binding testing mode.
+  // For outputs, we create GPU tensors on 'gpu-tensor' binding testing mode only.
+  //              in 'gpu-device' binding mode, outputs are not pre-allocated.
+  const shouldUploadInput = options.ioBinding === 'gpu-tensor' || options.ioBinding === 'gpu-location';
+  const shouldUploadOutput = options.ioBinding === 'gpu-tensor';
+  try {
+    if (shouldUploadInput) {
+      // replace the CPU tensors in feeds into GPU tensors
+      for (const name in feeds) {
+        if (Object.hasOwnProperty.call(feeds, name)) {
+          feeds[name] = createGpuTensorForInput(feeds[name]);
+        }
+      }
+    }
+
+    if (shouldUploadOutput) {
+      for (const name in options.outputsMetaInfo) {
+        if (Object.hasOwnProperty.call(options.outputsMetaInfo, name)) {
+          const {type, dims} = options.outputsMetaInfo[name];
+          fetches[name] = createGpuTensorForOutput(type, dims);
+        }
+      }
+    }
+
+    const start = now();
+    Logger.verbose('TestRunner', `Timestamp before session run: ${start}`);
+    const outputs = await (
+        shouldUploadOutput ? session.run(feeds, fetches) :
+                             session.run(feeds, Object.getOwnPropertyNames(options.outputsMetaInfo)));
+    const end = now();
+    Logger.verbose('TestRunner', `Timestamp after session run: ${end}`);
+
+    // download each output tensor if needed
+    for (const name in outputs) {
+      if (Object.hasOwnProperty.call(outputs, name)) {
+        const tensor = outputs[name];
+        // Tensor.getData(true) release the underlying resource
+        await tensor.getData(true);
+      }
+    }
+
+    return [start, end, outputs];
+  } finally {
+    // dispose the GPU tensors in feeds
+    for (const name in feeds) {
+      if (Object.hasOwnProperty.call(feeds, name)) {
+        const tensor = feeds[name];
+        tensor.dispose();
+      }
+    }
+  }
+}
+
 /**
  * run a single model test case. the inputs/outputs tensors should already been prepared.
  */
@@ -491,12 +627,11 @@ export async function runModelTestSet(
   const validator = new TensorResultValidator(context.backend);
   try {
     const feeds: Record<string, ort.Tensor> = {};
+    const outputsMetaInfo: Record<string, ort.Tensor> = {};
     testCase.inputs!.forEach((tensor, i) => feeds[context.session.inputNames[i]] = tensor);
-    const start = now();
-    Logger.verbose('TestRunner', `Timestamp before session run: ${start}`);
-    const outputs = await context.session.run(feeds);
-    const end = now();
-    Logger.verbose('TestRunner', `Timestamp after session run: ${end}`);
+    testCase.outputs!.forEach((tensor, i) => outputsMetaInfo[context.session.outputNames[i]] = tensor);
+    const [start, end, outputs] =
+        await sessionRun({session: context.session, feeds, outputsMetaInfo, ioBinding: context.ioBinding});
     if (context.perfData.count === 0) {
       context.perfData.firstRun = end - start;
     } else {
@@ -575,6 +710,7 @@ export class ProtoOpTestContext {
   private readonly loadedData: Uint8Array;  // model data, inputs, outputs
   session: ort.InferenceSession;
   readonly backendHint: string;
+  readonly ioBindingMode: Test.IOBindingMode;
   constructor(test: Test.OperatorTest, private readonly sessionOptions: ort.InferenceSession.SessionOptions = {}) {
     const opsetImport = onnx.OperatorSetIdProto.create(test.opset);
     const operator = test.operator;
@@ -713,6 +849,7 @@ export class ProtoOpTestContext {
     model.graph.name = test.name;
 
     this.backendHint = test.backend!;
+    this.ioBindingMode = test.ioBinding;
     this.loadedData = onnx.ModelProto.encode(model).finish();
 
     // in debug mode, open a new tab in browser for the generated onnx model.
@@ -729,8 +866,11 @@ export class ProtoOpTestContext {
     }
   }
   async init(): Promise<void> {
-    this.session = await ort.InferenceSession.create(
-        this.loadedData, {executionProviders: [this.backendHint], ...this.sessionOptions});
+    this.session = await ort.InferenceSession.create(this.loadedData, {
+      executionProviders: [this.backendHint],
+      preferredOutputLocation: this.ioBindingMode === 'gpu-location' ? ('gpu-buffer' as const) : undefined,
+      ...this.sessionOptions
+    });
   }
 
   async dispose(): Promise<void> {
@@ -739,10 +879,11 @@ export class ProtoOpTestContext {
 }
 
 async function runProtoOpTestcase(
-    session: ort.InferenceSession, testCase: Test.OperatorTestCase, validator: TensorResultValidator): Promise<void> {
+    session: ort.InferenceSession, testCase: Test.OperatorTestCase, ioBindingMode: Test.IOBindingMode,
+    validator: TensorResultValidator): Promise<void> {
   const feeds: Record<string, ort.Tensor> = {};
-  const fetches: string[] = [];
-  testCase.inputs!.forEach((input, i) => {
+  const fetches: Record<string, Pick<ort.Tensor, 'dims'|'type'>> = {};
+  testCase.inputs.forEach((input, i) => {
     if (input.data) {
       let data: number[]|BigUint64Array|BigInt64Array = input.data;
       if (input.type === 'uint64') {
@@ -756,7 +897,7 @@ async function runProtoOpTestcase(
 
   const outputs: ort.Tensor[] = [];
   const expectedOutputNames: string[] = [];
-  testCase.outputs!.forEach((output, i) => {
+  testCase.outputs.forEach((output, i) => {
     if (output.data) {
       let data: number[]|BigUint64Array|BigInt64Array = output.data;
       if (output.type === 'uint64') {
@@ -766,11 +907,11 @@ async function runProtoOpTestcase(
       }
       outputs.push(new ort.Tensor(output.type, data, output.dims));
       expectedOutputNames.push(`output_${i}`);
-      fetches.push(`output_${i}`);
+      fetches[`output_${i}`] = {dims: output.dims, type: output.type};
     }
   });
 
-  const results = await session.run(feeds, fetches);
+  const [, , results] = await sessionRun({session, feeds, outputsMetaInfo: fetches, ioBinding: ioBindingMode});
 
   const actualOutputNames = Object.getOwnPropertyNames(results);
   expect(actualOutputNames.length).to.equal(expectedOutputNames.length);
@@ -821,7 +962,8 @@ async function runOpTestcase(
 export async function runOpTest(
     testcase: Test.OperatorTestCase, context: ProtoOpTestContext|OpTestContext): Promise<void> {
   if (context instanceof ProtoOpTestContext) {
-    await runProtoOpTestcase(context.session, testcase, new TensorResultValidator(context.backendHint));
+    await runProtoOpTestcase(
+        context.session, testcase, context.ioBindingMode, new TensorResultValidator(context.backendHint));
   } else {
     await runOpTestcase(
         context.inferenceHandler, context.createOperator(), testcase, new TensorResultValidator(context.backendHint));
diff --git a/js/web/test/test-shared.ts b/js/web/test/test-shared.ts
index 68d7852ce86da..7c327e7c97ac4 100644
--- a/js/web/test/test-shared.ts
+++ b/js/web/test/test-shared.ts
@@ -2,8 +2,7 @@
 // Licensed under the MIT License.
 
 import * as base64 from 'base64-js';
-import * as fs from 'fs';
-import {promisify} from 'util';
+import * as fs from 'node:fs/promises';
 
 import {Attribute} from '../lib/onnxjs/attribute';
 import {Graph} from '../lib/onnxjs/graph';
@@ -19,7 +18,7 @@ export function bufferToBase64(buffer: Uint8Array): string {
 export async function readFile(file: string) {
   if (typeof process !== 'undefined' && process.versions && process.versions.node) {
     // node
-    return promisify(fs.readFile)(file);
+    return fs.readFile(file);
   } else {
     // browser
     const response = await fetch(file);
diff --git a/js/web/test/test-types.ts b/js/web/test/test-types.ts
index 1f95d1cd8e682..5bdc8d84cc7a5 100644
--- a/js/web/test/test-types.ts
+++ b/js/web/test/test-types.ts
@@ -43,6 +43,18 @@ export declare namespace Test {
    */
   export type PlatformCondition = string;
 
+  /**
+   * The IOBindingMode represents how to test a model with GPU data.
+   *
+   * - none: inputs will be pre-allocated as CPU tensors; no output will be pre-allocated; `preferredOutputLocation`
+   * will not be set.
+   * - gpu-location: inputs will be pre-allocated as GPU tensors; no output will be pre-allocated;
+   * `preferredOutputLocation` will be set to `gpu-buffer`.
+   * - gpu-tensor: inputs and outputs will all be pre-allocated as GPU tensors. `preferredOutputLocation`
+   * will not be set.
+   */
+  export type IOBindingMode = 'none'|'gpu-tensor'|'gpu-location';
+
   export interface ModelTestCase {
     name: string;
     dataFiles: readonly string[];
@@ -54,6 +66,7 @@ export declare namespace Test {
     name: string;
     modelUrl: string;
     backend?: string;  // value should be populated at build time
+    ioBinding: IOBindingMode;
     platformCondition?: PlatformCondition;
     cases: readonly ModelTestCase[];
   }
@@ -82,6 +95,7 @@ export declare namespace Test {
     inputShapeDefinitions?: 'none'|'rankOnly'|'static'|ReadonlyArray<InputShapeDefinition|undefined>;
     opset?: OperatorTestOpsetImport;
     backend?: string;  // value should be populated at build time
+    ioBinding: IOBindingMode;
     platformCondition?: PlatformCondition;
     attributes?: readonly AttributeValue[];
     cases: readonly OperatorTestCase[];
@@ -114,6 +128,7 @@ export declare namespace Test {
     wasm: Partial<Env.WebAssemblyFlags>;
     webgl: Partial<Env.WebGLFlags>;
     webgpu: Partial<Env.WebGpuFlags>;
+    logLevel?: Env['logLevel'];
   }
 
   /**
diff --git a/js/web/test/unittests/backends/webgl/test-conv-new.ts b/js/web/test/unittests/backends/webgl/test-conv-new.ts
index 0fddddf58181c..8c186b9b36451 100644
--- a/js/web/test/unittests/backends/webgl/test-conv-new.ts
+++ b/js/web/test/unittests/backends/webgl/test-conv-new.ts
@@ -14,7 +14,7 @@ import {conv2d} from './test-conv-utils';
 function createRandomArray(size: number): Float32Array {
   const randomTable = [0, 3, 6, 9, 2, 5, 8, 1, 4, 7];
   return new Float32Array(
-      Array.from({length: size}, (v, k) => randomTable[k % 10] * 0.1 + randomTable[Math.trunc(k / 10) % 10] * 0.01));
+      Array.from({length: size}, (_v, k) => randomTable[k % 10] * 0.1 + randomTable[Math.trunc(k / 10) % 10] * 0.01));
 }
 interface TestData {
   inputShape: number[];
diff --git a/js/web/test/unittests/backends/webgl/test-pack-unpack.ts b/js/web/test/unittests/backends/webgl/test-pack-unpack.ts
index 0b70144733227..61c21d4b689fb 100644
--- a/js/web/test/unittests/backends/webgl/test-pack-unpack.ts
+++ b/js/web/test/unittests/backends/webgl/test-pack-unpack.ts
@@ -291,7 +291,7 @@ describe('#UnitTest# - unpack - Tensor unpack', () => {
       webglInferenceHandler.session.textureManager.glContext.checkError();
       const webglTexture = createTextureFromArray(
           webglInferenceHandler.session.textureManager.glContext, testData.rawData ? testData.rawData : inputData,
-          gl.RGBA, inputTextureShape[0], inputTextureShape[1]);
+          inputTextureShape[0], inputTextureShape[1]);
       webglInferenceHandler.session.textureManager.glContext.checkError();
       const packedShape = inputTextureShape;
       const textureData = {
diff --git a/js/web/test/unittests/backends/webgl/test-utils.ts b/js/web/test/unittests/backends/webgl/test-utils.ts
index acb3f0002ce2f..092d63cd2ade4 100644
--- a/js/web/test/unittests/backends/webgl/test-utils.ts
+++ b/js/web/test/unittests/backends/webgl/test-utils.ts
@@ -4,7 +4,7 @@
 import {WebGLContext} from '../../../../lib/onnxjs/backends/webgl/webgl-context';
 
 export function createAscendingArray(size: number): Float32Array {
-  return new Float32Array(Array.from({length: size}, (v, i) => (i + 1)));
+  return new Float32Array(Array.from({length: size}, (_v, i) => (i + 1)));
 }
 
 // Returns an array by injecting 3 zeros after every element in the input array to be used for creating unpacked
@@ -19,7 +19,7 @@ export function generateArrayForUnpackedTexture(input: Float32Array): Float32Arr
 
 // create a webgl texture and fill it with the array content
 export function createTextureFromArray(
-    glContext: WebGLContext, dataArray: Float32Array, type: GLenum, width: number, height: number): WebGLTexture {
+    glContext: WebGLContext, dataArray: Float32Array, width: number, height: number): WebGLTexture {
   const gl = glContext.gl;
 
   // create the texture
diff --git a/js/web/tsconfig.json b/js/web/tsconfig.json
index 0a4d19af9981f..d60d746e9328d 100644
--- a/js/web/tsconfig.json
+++ b/js/web/tsconfig.json
@@ -1,12 +1,10 @@
 {
   "extends": "../tsconfig.json",
   "compilerOptions": {
-    "module": "CommonJS",
     "downlevelIteration": true,
     "declaration": true,
-    "declarationDir": "./types",
     "typeRoots": ["./node_modules/@webgpu/types", "./node_modules/@types", "../node_modules/@types"]
   },
-  "include": ["lib", "script", "test"],
+  "include": ["lib", "test"],
   "exclude": ["lib/wasm/proxy-worker"]
 }
diff --git a/js/web/types.d.ts b/js/web/types.d.ts
index c6cff64c8a732..b9d12cf47b5c5 100644
--- a/js/web/types.d.ts
+++ b/js/web/types.d.ts
@@ -5,6 +5,26 @@ declare module 'onnxruntime-web' {
   export * from 'onnxruntime-common';
 }
 
+declare module 'onnxruntime-web/experimental' {
+  export * from 'onnxruntime-web';
+}
+
+declare module 'onnxruntime-web/wasm' {
+  export * from 'onnxruntime-web';
+}
+
+declare module 'onnxruntime-web/wasm-core' {
+  export * from 'onnxruntime-web';
+}
+
+declare module 'onnxruntime-web/webgl' {
+  export * from 'onnxruntime-web';
+}
+
 declare module 'onnxruntime-web/webgpu' {
   export * from 'onnxruntime-web';
 }
+
+declare module 'onnxruntime-web/training' {
+  export * from 'onnxruntime-web';
+}
diff --git a/js/web/webpack.config.js b/js/web/webpack.config.js
deleted file mode 100644
index 81c69ffdcf6bf..0000000000000
--- a/js/web/webpack.config.js
+++ /dev/null
@@ -1,330 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-'use strict';
-
-const path = require('path');
-const webpack = require('webpack');
-const BundleAnalyzerPlugin = require('webpack-bundle-analyzer').BundleAnalyzerPlugin;
-const NodePolyfillPlugin = require('node-polyfill-webpack-plugin');
-const TerserPlugin = require('terser-webpack-plugin');
-const minimist = require('minimist');
-
-// commandline args
-const args = minimist(process.argv);
-const bundleMode = args['bundle-mode'] || 'prod';        // 'prod'|'dev'|'perf'|'node'|undefined;
-const useAnalyzer = !!args.a || !!args['use-analyzer'];  // -a, --use-analyzer
-const filter = args.f || args.filter;
-
-const VERSION = require(path.join(__dirname, 'package.json')).version;
-const COPYRIGHT_BANNER = `/*!
-* ONNX Runtime Web v${VERSION}
-* Copyright (c) Microsoft Corporation. All rights reserved.
-* Licensed under the MIT License.
-*/`;
-
-function terserEcmaVersionFromWebpackTarget(target) {
-  switch (target) {
-    case 'es5':
-      return 5;
-    case 'es6':
-    case 'es2015':
-      return 2015;
-    case 'es2017':
-      return 2017;
-    default:
-      throw new RangeError(`not supported ECMA version: ${target}`);
-  }
-}
-
-function defaultTerserPluginOptions(target) {
-  return {
-    extractComments: false,
-    terserOptions: {
-      ecma: terserEcmaVersionFromWebpackTarget(target),
-      format: {
-        comments: false,
-      },
-      compress: {passes: 2},
-      mangle: {reserved: ['_scriptDir', 'startWorker']}
-    }
-  };
-}
-
-const DEFAULT_BUILD_DEFS = {
-  DISABLE_WEBGL: false,
-  DISABLE_WEBGPU: true,
-  DISABLE_WASM: false,
-  DISABLE_WASM_PROXY: false,
-  DISABLE_WASM_THREAD: false,
-};
-
-// common config for release bundle
-function buildConfig({filename, format, target, mode, devtool, build_defs}) {
-  const config = {
-    target: [format === 'commonjs' ? 'node' : 'web', target],
-    entry: path.resolve(__dirname, 'lib/index.ts'),
-    output: {path: path.resolve(__dirname, 'dist'), filename, library: {type: format}},
-    resolve: {
-      extensions: ['.ts', '.js'],
-      alias: {
-        'util': false,
-      },
-      fallback: {
-        'crypto': false,
-        'fs': false,
-        'path': false,
-        'util': false,
-        'os': false,
-        'worker_threads': false,
-        'perf_hooks': false,
-      }
-    },
-    plugins: [
-      new webpack.DefinePlugin({BUILD_DEFS: build_defs}), new webpack.WatchIgnorePlugin({paths: [/\.js$/, /\.d\.ts$/]})
-    ],
-    module: {
-      rules: [
-        {test: /\.ts$/, use: [{loader: 'ts-loader', options: {compilerOptions: {target}}}]},
-        {test: /ort-wasm.*\.worker\.js$/, type: 'asset/source'}
-      ]
-    },
-    mode,
-    node: false,
-    devtool
-  };
-
-  if (useAnalyzer) {
-    config.plugins.unshift(
-        new BundleAnalyzerPlugin({analyzerMode: 'static', reportFilename: `${filename}.report.html`}));
-  }
-
-  if (mode === 'production') {
-    config.resolve.alias['./binding/ort-wasm-threaded.js'] = './binding/ort-wasm-threaded.min.js';
-    config.resolve.alias['./binding/ort-wasm-threaded-simd.jsep.js'] = './binding/ort-wasm-threaded-simd.jsep.min.js';
-    config.resolve.alias['./binding/ort-wasm-threaded.worker.js'] = './binding/ort-wasm-threaded.min.worker.js';
-
-    const options = defaultTerserPluginOptions(target);
-    options.terserOptions.format.preamble = COPYRIGHT_BANNER;
-    config.plugins.push(new TerserPlugin(options));
-
-    // add a custom plugin to check whether code contains 'BUILD_DEFS'
-    config.plugins.push({
-      apply: (compiler) => {
-        compiler.hooks.afterCompile.tap('Check BUILD_DEFS', (compilation) => {
-          for (const filename of compilation.assetsInfo.keys()) {
-            if (filename.endsWith('.js')) {
-              const asset = compilation.assets[filename];
-              if (asset) {
-                const content = asset.source();
-                if (typeof content !== 'string') {
-                  throw new Error(`content for target file '${filename}' is not string.`);
-                }
-                if (content.includes('DISABLE_WEBGL') || content.includes('DISABLE_WASM') ||
-                    content.includes('DISABLE_WASM_PROXY') || content.includes('DISABLE_WASM_THREAD')) {
-                  throw new Error(`target file '${filename}' contains data fields from "BUILD_DEFS".`);
-                }
-              }
-            }
-          }
-        });
-      }
-    });
-  } else {
-    config.plugins.push(new webpack.BannerPlugin({banner: COPYRIGHT_BANNER, raw: true}));
-  }
-
-  return config;
-}
-
-// "ort{.min}.js" config
-function buildOrtConfig(
-    {suffix = '', target = 'es2017', mode = 'production', devtool = 'source-map', build_defs = DEFAULT_BUILD_DEFS}) {
-  const config = buildConfig({filename: `ort${suffix}.js`, format: 'umd', target, mode, devtool, build_defs});
-  // set global name 'ort'
-  config.output.library.name = 'ort';
-  return config;
-}
-
-// "ort-web{.min|.node}.js" config
-function buildOrtWebConfig({
-  suffix = '',
-  format = 'umd',
-  target = 'es2017',
-  mode = 'production',
-  devtool = 'source-map',
-  build_defs = DEFAULT_BUILD_DEFS
-}) {
-  const config = buildConfig({filename: `ort-web${suffix}.js`, format, target, mode, devtool, build_defs});
-  // exclude onnxruntime-common from bundle
-  config.externals = {
-    'onnxruntime-common': {commonjs: 'onnxruntime-common', commonjs2: 'onnxruntime-common', root: 'ort'}
-  };
-  // in nodejs, treat as external dependencies
-  if (format === 'commonjs') {
-    config.externals.path = 'path';
-    config.externals.fs = 'fs';
-    config.externals.util = 'util';
-    config.externals.worker_threads = 'worker_threads';
-    config.externals.perf_hooks = 'perf_hooks';
-    config.externals.os = 'os';
-  }
-  return config;
-}
-
-function buildTestRunnerConfig({
-  suffix = '',
-  format = 'umd',
-  target = 'es2017',
-  mode = 'production',
-  devtool = 'source-map',
-  build_defs = DEFAULT_BUILD_DEFS
-}) {
-  const config = {
-    target: ['web', target],
-    entry: path.resolve(__dirname, 'test/test-main.ts'),
-    output: {
-      path: path.resolve(__dirname, 'test'),
-      filename: `ort${suffix}.js`,
-      library: {type: format},
-      devtoolNamespace: '',
-    },
-    externals: {
-      'onnxruntime-common': 'ort',
-      'fs': 'fs',
-      'perf_hooks': 'perf_hooks',
-      'worker_threads': 'worker_threads',
-      '../../node': '../../node'
-    },
-    resolve: {
-      alias: {
-        // make sure to refer to original source files instead of generated bundle in test-main.
-        '..$': '../lib/index'
-      },
-      extensions: ['.ts', '.js'],
-      fallback: {
-        './binding/ort-wasm.js': false,
-        './binding/ort-wasm-threaded.js': false,
-        './binding/ort-wasm-threaded.worker.js': false
-      }
-    },
-    plugins: [
-      new webpack.DefinePlugin({BUILD_DEFS: build_defs}),
-      new webpack.WatchIgnorePlugin({paths: [/\.js$/, /\.d\.ts$/]}),
-      new NodePolyfillPlugin({excludeAliases: ['console', 'Buffer']}),
-    ],
-    module: {
-      rules: [
-        {test: /\.ts$/, use: [{loader: 'ts-loader', options: {compilerOptions: {target}}}]},
-        {test: /ort-wasm.*\.worker\.js$/, type: 'asset/source'}
-      ]
-    },
-    mode,
-    node: false,
-    devtool,
-  };
-
-  if (mode === 'production') {
-    config.plugins.push(new TerserPlugin(defaultTerserPluginOptions(target)));
-  }
-
-  return config;
-}
-
-module.exports = () => {
-  const builds = [];
-
-  switch (bundleMode) {
-    case 'prod':
-      builds.push(
-          // ort.min.js
-          buildOrtConfig({suffix: '.min'}),
-          // ort.js
-          buildOrtConfig({mode: 'development', devtool: 'inline-source-map'}),
-          // ort.es6.min.js
-          buildOrtConfig({suffix: '.es6.min', target: 'es6'}),
-          // ort.es5.min.js
-          buildOrtConfig({suffix: '.es5.min', target: 'es5'}),
-
-          // ort.wasm.min.js
-          buildOrtConfig({
-            suffix: '.wasm.min',
-            build_defs: {
-              ...DEFAULT_BUILD_DEFS,
-              DISABLE_WEBGL: true,
-            }
-          }),
-          // ort.webgl.min.js
-          buildOrtConfig({
-            suffix: '.webgl.min',
-            build_defs: {
-              ...DEFAULT_BUILD_DEFS,
-              DISABLE_WASM: true,
-            }
-          }),
-          // ort.wasm-core.min.js
-          buildOrtConfig({
-            suffix: '.wasm-core.min',
-            build_defs: {
-              ...DEFAULT_BUILD_DEFS,
-              DISABLE_WEBGL: true,
-              DISABLE_WASM_PROXY: true,
-              DISABLE_WASM_THREAD: true,
-            }
-          }),
-          // ort.webgpu.min.js
-          buildOrtConfig({
-            suffix: '.webgpu.min',
-            build_defs: {
-              ...DEFAULT_BUILD_DEFS,
-              DISABLE_WEBGPU: false,
-            }
-          }),
-
-          // ort-web.min.js
-          buildOrtWebConfig({suffix: '.min'}),
-          // ort-web.js
-          buildOrtWebConfig({mode: 'development', devtool: 'inline-source-map'}),
-          // ort-web.es6.min.js
-          buildOrtWebConfig({suffix: '.es6.min', target: 'es6'}),
-          // ort-web.es5.min.js
-          buildOrtWebConfig({suffix: '.es5.min', target: 'es5'}),
-      );
-
-    case 'node':
-      builds.push(
-          // ort-web.node.js
-          buildOrtWebConfig({suffix: '.node', format: 'commonjs'}),
-      );
-      break;
-    case 'dev':
-      builds.push(buildTestRunnerConfig({
-        suffix: '.dev',
-        mode: 'development',
-        devtool: 'inline-source-map',
-        build_defs: {
-          ...DEFAULT_BUILD_DEFS,
-          DISABLE_WEBGPU: false,
-        }
-      }));
-      break;
-    case 'perf':
-      builds.push(buildTestRunnerConfig({
-        suffix: '.perf',
-        build_defs: {
-          ...DEFAULT_BUILD_DEFS,
-          DISABLE_WEBGPU: false,
-        }
-      }));
-      break;
-    default:
-      throw new Error(`unsupported bundle mode: ${bundleMode}`);
-  }
-
-  if (filter) {
-    const filterRegex = new RegExp(filter);
-    return builds.filter(b => filterRegex.test(b.output.filename));
-  }
-
-  return builds;
-};
diff --git a/js/webpack.shared.mjs b/js/webpack.shared.mjs
deleted file mode 100644
index d1b95722ff4de..0000000000000
--- a/js/webpack.shared.mjs
+++ /dev/null
@@ -1,94 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-'use strict';
-
-import webpack from 'webpack';
-import TerserPlugin from 'terser-webpack-plugin';
-import {resolve, dirname} from 'node:path';
-import {readFileSync} from 'node:fs';
-import {fileURLToPath} from 'node:url';
-
-/**
- * ECMAScript version for default onnxruntime JavaScript API builds
- */
-export const DEFAULT_ES_VERSION = 'es2017';
-
-// how to use "__dirname" in ESM: https://shramko.dev/blog/dirname-error
-const __dirname = dirname(fileURLToPath(import.meta.url));
-
-const terserEcmaVersionFromWebpackEsVersion = (target) => {
-  switch (target) {
-    case 'es5':
-      return 5;
-    case 'es6':
-    case 'es2015':
-      return 2015;
-    case 'es2017':
-      return 2017;
-    default:
-      throw new RangeError(`not supported ECMA version: ${target}`);
-  }
-};
-
-const getPackageFullName = (name) => {
-  switch (name) {
-    case 'common':
-      return `ONNX Runtime Common`;
-    case 'node':
-      return `ONNX Runtime Node.js Binding`;
-    case 'web':
-      return `ONNX Runtime Web`;
-    case 'react-native':
-      return `ONNX Runtime React-native`;
-    default:
-      throw new RangeError(`unknown package name: ${name}`);
-  }
-};
-
-/**
- * Get package version by reading the file "package.json" under the package folder
- * @param {'common'|'node'|'web'|'react-native'} name - the package name
- * @returns a string representing the package version
- */
-const getPackageVersion = (name) => {
-  const normalizedName = name.replace('-', '_');
-  const packageJsonFileContent = readFileSync(resolve(__dirname, normalizedName, 'package.json'));
-  const packageJson = JSON.parse(packageJsonFileContent);
-  return packageJson.version;
-};
-
-/**
- *
- * @param {'development'|'production'} mode - specify webpack build mode
- * @param {'common'|'node'|'web'|'react-native'} packageName - specify the name of the package
- * @param {'es5'|'es6'|'es2015'|'es2017'} esVersion - specify the ECMAScript version
- * @returns
- */
-export const addCopyrightBannerPlugin = (mode, packageName, esVersion) => {
-  const COPYRIGHT_BANNER = `/*!
- * ${getPackageFullName(packageName)} v${getPackageVersion(packageName)}
- * Copyright (c) Microsoft Corporation. All rights reserved.
- * Licensed under the MIT License.
- */`;
-
-  if (mode === 'production') {
-    // in 'production' mode, webpack uses terser to minimize the code.
-    // we set options.format.preamble to make sure terser generates correct copyright banner.
-    return new TerserPlugin({
-      extractComments: false,
-      terserOptions: {
-        ecma: terserEcmaVersionFromWebpackEsVersion(esVersion),
-        format: {
-          preamble: COPYRIGHT_BANNER,
-          comments: false,
-        },
-        compress: {passes: 2}
-      }
-    });
-  } else {
-    // in 'development' mode, webpack does not minimize the code.
-    // we use the webpack builtin plugin BannerPlugin to insert the banner.
-    return new webpack.BannerPlugin({banner: COPYRIGHT_BANNER, raw: true});
-  }
-};
diff --git a/onnxruntime/__init__.py b/onnxruntime/__init__.py
index fd147eaa11f3f..57219c50f39aa 100644
--- a/onnxruntime/__init__.py
+++ b/onnxruntime/__init__.py
@@ -42,6 +42,7 @@
     from onnxruntime.capi._pybind_state import get_build_info  # noqa: F401
     from onnxruntime.capi._pybind_state import get_device  # noqa: F401
     from onnxruntime.capi._pybind_state import get_version_string  # noqa: F401
+    from onnxruntime.capi._pybind_state import has_collective_ops  # noqa: F401
     from onnxruntime.capi._pybind_state import set_default_logger_severity  # noqa: F401
     from onnxruntime.capi._pybind_state import set_default_logger_verbosity  # noqa: F401
     from onnxruntime.capi._pybind_state import set_seed  # noqa: F401
@@ -60,7 +61,6 @@
 from onnxruntime.capi.onnxruntime_inference_collection import OrtDevice  # noqa: F401
 from onnxruntime.capi.onnxruntime_inference_collection import OrtValue  # noqa: F401
 from onnxruntime.capi.onnxruntime_inference_collection import SparseTensor  # noqa: F401
-from onnxruntime.capi.training import *  # noqa: F403
 
 # TODO: thiagofc: Temporary experimental namespace for new PyTorch front-end
 try:  # noqa: SIM105
diff --git a/onnxruntime/contrib_ops/cpu/aten_ops/aten_op.cc b/onnxruntime/contrib_ops/cpu/aten_ops/aten_op.cc
index 945c3aebce579..d0abf58922f88 100644
--- a/onnxruntime/contrib_ops/cpu/aten_ops/aten_op.cc
+++ b/onnxruntime/contrib_ops/cpu/aten_ops/aten_op.cc
@@ -32,8 +32,10 @@ Status ATen::Compute(OpKernelContext* p_ctx) const {
   aten_ops::ATenOperatorExecutor::Instance()(op_name_, overload_name_, input_size, dlpack_inputs.get(), output_size,
                                              dlpack_outputs.get());
   for (size_t i = 0; i < output_size; ++i) {
-    ORT_RETURN_IF_ERROR(
-        p_ctx_internal->SetOutputMLValue(static_cast<int>(i), dlpack::DlpackToOrtValue(dlpack_outputs[i])));
+    if (dlpack_outputs[i]) {
+      ORT_RETURN_IF_ERROR(
+          p_ctx_internal->SetOutputMLValue(static_cast<int>(i), dlpack::DlpackToOrtValue(dlpack_outputs[i])));
+    }
   }
 
   return Status::OK();
diff --git a/onnxruntime/contrib_ops/cpu/aten_ops/aten_op_executor.h b/onnxruntime/contrib_ops/cpu/aten_ops/aten_op_executor.h
index be9650d96b004..d72868cd8fa9f 100644
--- a/onnxruntime/contrib_ops/cpu/aten_ops/aten_op_executor.h
+++ b/onnxruntime/contrib_ops/cpu/aten_ops/aten_op_executor.h
@@ -10,7 +10,7 @@ namespace onnxruntime {
 namespace contrib {
 namespace aten_ops {
 
-typedef bool (*IsTensorArgumentFunc)(const char* op_name, const char* overload_name, size_t index);
+typedef bool (*IsCpuArgumentFunc)(const char* op_name, const char* overload_name, size_t index, bool is_input);
 typedef void (*ExecuteATenOperatorFunc)(const char* op_name, const char* overload_name, size_t input_size,
                                         DLManagedTensor** dlpack_inputs, size_t output_size,
                                         DLManagedTensor** dlpack_outputs);
@@ -22,17 +22,17 @@ class ATenOperatorExecutor {
     return instance;
   }
 
-  void Initialize(void* p_is_tensor_argument_func_raw, void* p_execute_aten_op_func_raw) {
-    ORT_ENFORCE(p_is_tensor_argument_func_raw && p_execute_aten_op_func_raw);
-    p_is_tensor_argument_func_ = reinterpret_cast<IsTensorArgumentFunc>(p_is_tensor_argument_func_raw);
+  void Initialize(void* p_is_cpu_argument_func_raw, void* p_execute_aten_op_func_raw) {
+    ORT_ENFORCE(p_is_cpu_argument_func_raw && p_execute_aten_op_func_raw);
+    p_is_cpu_argument_func_ = reinterpret_cast<IsCpuArgumentFunc>(p_is_cpu_argument_func_raw);
     p_execute_aten_op_func_ = reinterpret_cast<ExecuteATenOperatorFunc>(p_execute_aten_op_func_raw);
   }
 
   bool IsInitialized() { return p_execute_aten_op_func_ != nullptr; }
 
-  bool IsTensorArgument(const std::string& op_name, const std::string& overload_name, size_t index) {
-    ORT_ENFORCE(p_is_tensor_argument_func_, "ATenOperatorExecutor is not initialized.");
-    return p_is_tensor_argument_func_(op_name.c_str(), overload_name.c_str(), index);
+  bool IsCpuArgument(const std::string& op_name, const std::string& overload_name, size_t index, bool is_input) {
+    ORT_ENFORCE(p_is_cpu_argument_func_, "ATenOperatorExecutor is not initialized.");
+    return p_is_cpu_argument_func_(op_name.c_str(), overload_name.c_str(), index, is_input);
   }
 
   void operator()(const std::string& op_name, const std::string& overload_name, size_t input_size,
@@ -43,7 +43,7 @@ class ATenOperatorExecutor {
   }
 
  private:
-  IsTensorArgumentFunc p_is_tensor_argument_func_ = nullptr;
+  IsCpuArgumentFunc p_is_cpu_argument_func_ = nullptr;
   ExecuteATenOperatorFunc p_execute_aten_op_func_ = nullptr;
 };
 
diff --git a/onnxruntime/contrib_ops/cpu/bert/attention_common.h b/onnxruntime/contrib_ops/cpu/bert/attention_common.h
index 4c9c15d07a9b8..a7f83469a768d 100644
--- a/onnxruntime/contrib_ops/cpu/bert/attention_common.h
+++ b/onnxruntime/contrib_ops/cpu/bert/attention_common.h
@@ -55,6 +55,7 @@ struct AttentionParameters {
   int v_hidden_size;          // hidden size of V
   int v_head_size;            // hidden size per head of V
   int num_heads;
+  int num_splits;
   bool is_unidirectional;
   bool past_present_share_buffer;
   bool do_rotary;
@@ -82,6 +83,27 @@ struct PackedAttentionParameters {
   bool broadcast_res_pos_bias;
 };
 
+// Parameters deduced from node attributes and inputs/outputs.
+struct GroupQueryAttentionParameters {
+  int batch_size;
+  int sequence_length;          // sequence length of input query, key, value
+  int seqlen_past_kv_cache;     // sequence length of past kv tensor
+  int seqlen_present_kv_cache;  // sequence length of present kv tensor
+  int hidden_size;
+  int num_heads;
+  int head_size;
+  int kv_hidden_size;
+  int kv_num_heads;
+  int num_splits;          // number of splits for splitkv
+  bool is_unidirectional;  // causal
+  int local_window_size;
+  bool kv_share_buffer;
+  bool is_prompt;  // determines if seqlens_k is past or kv sequence length tensor
+  float scale;
+  AttentionQkvFormat qkv_format;
+  AttentionQkvFormat past_kv_format;
+};
+
 namespace attention {
 // Environment variable to enable or disable TRT fused self attention kernel. Default is 0 (enabled).
 constexpr const char* kDisableFusedSelfAttention = "ORT_DISABLE_FUSED_ATTENTION";
diff --git a/onnxruntime/contrib_ops/cpu/bert/multihead_attention.cc b/onnxruntime/contrib_ops/cpu/bert/multihead_attention.cc
index 0b55cb7804c61..694c40bf3eda6 100644
--- a/onnxruntime/contrib_ops/cpu/bert/multihead_attention.cc
+++ b/onnxruntime/contrib_ops/cpu/bert/multihead_attention.cc
@@ -16,7 +16,6 @@
 
 #include <unsupported/Eigen/SpecialFunctions>
 #include <vector>
-#include <iostream>
 
 using onnxruntime::concurrency::ThreadPool;
 
diff --git a/onnxruntime/contrib_ops/cpu/bert/multihead_attention_helper.h b/onnxruntime/contrib_ops/cpu/bert/multihead_attention_helper.h
index 1dc85e6d345d7..00e82c9844b3d 100644
--- a/onnxruntime/contrib_ops/cpu/bert/multihead_attention_helper.h
+++ b/onnxruntime/contrib_ops/cpu/bert/multihead_attention_helper.h
@@ -103,7 +103,8 @@ Status CheckInputs(const T* query,
     }
     if (past_key_dims[2] != past_value_dims[2]) {
       return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
-                             "Input 'past_key' and 'past_value' shall have same dim 2 (past_sequence_length)");
+                             "Input 'past_key' and 'past_value' shall have same dim 2 (past_sequence_length). ",
+                             past_key_dims[2], " vs ", past_value_dims[2]);
     }
     if (past_key_dims[3] != head_size) {
       return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
@@ -205,6 +206,7 @@ Status CheckInputs(const T* query,
     }
   }
 
+  int total_sequence_length = past_sequence_length + kv_sequence_length;
   AttentionMaskType mask_type = AttentionMaskType::MASK_NONE;
   if (key_padding_mask != nullptr) {
     mask_type = AttentionMaskType::MASK_UNKNOWN;
@@ -215,13 +217,21 @@ Status CheckInputs(const T* query,
       } else if (mask_dims[0] == static_cast<int64_t>(3) * static_cast<int64_t>(batch_size) + static_cast<int64_t>(2)) {
         mask_type = AttentionMaskType::MASK_1D_KEY_SEQ_LEN_START;
       }
-    } else if (mask_dims.size() == 2 && mask_dims[0] == static_cast<int64_t>(batch_size) && mask_dims[1] == static_cast<int64_t>(kv_sequence_length)) {
+    } else if (mask_dims.size() == 2 && mask_dims[0] == static_cast<int64_t>(batch_size) &&
+               mask_dims[1] == static_cast<int64_t>(kv_sequence_length)) {
+      mask_type = AttentionMaskType::MASK_2D_KEY_PADDING;
+    } else if (mask_dims.size() == 2 && mask_dims[0] == static_cast<int64_t>(batch_size) &&
+               mask_dims[1] == static_cast<int64_t>(total_sequence_length)) {
       mask_type = AttentionMaskType::MASK_2D_KEY_PADDING;
+    } else if (mask_dims.size() == 3 && mask_dims[0] == static_cast<int64_t>(batch_size) &&
+               mask_dims[1] == static_cast<int64_t>(sequence_length) &&
+               mask_dims[2] == static_cast<int64_t>(total_sequence_length)) {
+      mask_type = AttentionMaskType::MASK_3D_ATTENTION;
     }
 
     if (mask_type == AttentionMaskType::MASK_UNKNOWN) {
       return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
-                             "Input 'key_padding_mask' shape shall be (batch_size) or (batch_size, kv_sequence_length)");
+                             "Input 'key_padding_mask' shape shall be 1D, 2D, or 3D");
     }
   }
 
@@ -256,7 +266,6 @@ Status CheckInputs(const T* query,
     }
   }
 
-  int total_sequence_length = past_sequence_length + kv_sequence_length;
   bool broadcast_res_pos_bias = false;
   if (relative_position_bias != nullptr) {
     const auto& relative_position_bias_dims = relative_position_bias->Shape().GetDims();
diff --git a/onnxruntime/contrib_ops/cpu/bert/rotary_embedding.cc b/onnxruntime/contrib_ops/cpu/bert/rotary_embedding.cc
new file mode 100644
index 0000000000000..47f462d75fcc4
--- /dev/null
+++ b/onnxruntime/contrib_ops/cpu/bert/rotary_embedding.cc
@@ -0,0 +1,124 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "contrib_ops/cpu/bert/rotary_embedding.h"
+#include "contrib_ops/cpu/bert/rotary_embedding_helper.h"
+
+#include "core/platform/threadpool.h"
+
+using onnxruntime::concurrency::ThreadPool;
+using namespace onnxruntime::contrib::rotary_embedding_helper;
+
+namespace onnxruntime {
+namespace contrib {
+
+// These ops are internal-only, so register outside of onnx
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    RotaryEmbedding,
+    kMSDomain,
+    1,
+    float,
+    kCpuExecutionProvider,
+    KernelDefBuilder()
+        .TypeConstraint("T", DataTypeImpl::GetTensorType<float>())
+        .TypeConstraint("M", DataTypeImpl::GetTensorType<int64_t>()),
+    RotaryEmbedding<float>);
+
+template <typename T>
+RotaryEmbedding<T>::RotaryEmbedding(const OpKernelInfo& info) : OpKernel(info) {
+  scale = info.GetAttrOrDefault<float>("scale", 1.0);
+  interleaved = (info.GetAttrOrDefault<int64_t>("interleaved", 0) == 1);
+}
+
+template <typename T>
+Status RotaryEmbedding<T>::Compute(OpKernelContext* context) const {
+  const Tensor* input = context->Input<Tensor>(0);
+  const Tensor* position_ids = context->Input<Tensor>(1);
+  const Tensor* cos_cache = context->Input<Tensor>(2);
+  const Tensor* sin_cache = context->Input<Tensor>(3);
+
+  RotaryParameters parameters = {};
+  ORT_RETURN_IF_ERROR(rotary_embedding_helper::CheckInputs<Tensor>(input,
+                                                                   position_ids,
+                                                                   cos_cache,
+                                                                   sin_cache,
+                                                                   &parameters));
+
+  Tensor* output = context->Output(0, input->Shape());
+
+  if (parameters.sequence_length > parameters.max_sequence_length) {
+    // Launch update_cos_sin_cache kernel with scale
+    ORT_NOT_IMPLEMENTED("Updating cos_cache and sin_cache in RotaryEmbedding is not currently supported");
+  }
+
+  const T* input_src = input->Data<T>();
+  const int64_t* pos_ids_data = position_ids->Data<int64_t>();
+  const T* cos_cache_data = cos_cache->Data<T>();
+  const T* sin_cache_data = sin_cache->Data<T>();
+  T* output_dest = output->MutableData<T>();
+
+  const int batch_size = parameters.batch_size;
+  const int sequence_length = parameters.sequence_length;
+  const int num_heads = parameters.num_heads;
+  const int head_size = parameters.head_size;
+  const int position_ids_format = parameters.position_ids_format;
+  const int half_head_size = head_size / 2;
+  // Default input tensor shape is [batch, seq_len, hidden_size]
+  int head_stride = head_size;
+  int seq_stride = num_heads * head_stride;
+  int batch_stride = sequence_length * seq_stride;
+  if (parameters.transposed) {
+    // Transposed input tensor shape is [batch, num_heads, seq_len, head_size]
+    seq_stride = head_size;
+    head_stride = sequence_length * seq_stride;
+    batch_stride = num_heads * head_stride;
+  }
+
+  AllocatorPtr allocator;
+  ORT_RETURN_IF_ERROR(context->GetTempSpaceAllocator(&allocator));
+  auto* tp = context->GetOperatorThreadPool();
+
+  const int loop_len = batch_size * sequence_length * num_heads;
+  const double cost = static_cast<double>(head_size);
+  ThreadPool::TryParallelFor(tp, loop_len, cost, [&](std::ptrdiff_t begin, std::ptrdiff_t end) {
+    for (std::ptrdiff_t ptr = begin; ptr != end; ++ptr) {
+      const int b = static_cast<int>((ptr / num_heads) / sequence_length);
+      const int s = static_cast<int>((ptr / num_heads) % sequence_length);
+      const int n = static_cast<int>(ptr % num_heads);
+
+      const int block_offset = b * batch_stride + s * seq_stride + n * head_stride;
+
+      const T* input_data = input_src + block_offset;
+      T* output_data = output_dest + block_offset;
+
+      // Cache is (M, H/2)
+      const int position_id = (position_ids_format == 0)
+                                  ? static_cast<int>(pos_ids_data[0]) + s
+                                  : static_cast<int>(pos_ids_data[b * sequence_length + s]);
+      const int cache_offset = position_id * half_head_size;
+      const T* cos_data = cos_cache_data + cache_offset;
+      const T* sin_data = sin_cache_data + cache_offset;
+
+      int cache_idx = 0;
+      T sign = 0;
+      int j = 0;
+      for (int i = 0; i < head_size; i++) {
+        if (interleaved) {
+          cache_idx = (i / 2) % half_head_size;
+          sign = (i % 2 == 0) ? static_cast<T>(-1) : static_cast<T>(1);
+          j = (i % 2 == 0) ? i + 1 : i - 1;  // i - sign
+        } else {
+          cache_idx = i % half_head_size;
+          sign = (i < half_head_size) ? static_cast<T>(-1) : static_cast<T>(1);
+          j = (i + half_head_size) % head_size;
+        }
+        output_data[i] = input_data[i] * cos_data[cache_idx] + sign * input_data[j] * sin_data[cache_idx];
+      }
+    }
+  });
+
+  return Status::OK();
+}
+
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cpu/bert/rotary_embedding.h b/onnxruntime/contrib_ops/cpu/bert/rotary_embedding.h
new file mode 100644
index 0000000000000..be834a66cdc69
--- /dev/null
+++ b/onnxruntime/contrib_ops/cpu/bert/rotary_embedding.h
@@ -0,0 +1,23 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+#include "core/common/common.h"
+#include "core/framework/op_kernel.h"
+
+namespace onnxruntime {
+namespace contrib {
+
+template <typename T>
+class RotaryEmbedding final : public OpKernel {
+ public:
+  RotaryEmbedding(const OpKernelInfo& info);
+  Status Compute(OpKernelContext* context) const override;
+
+ protected:
+  float scale;
+  bool interleaved;
+};
+
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cpu/bert/rotary_embedding_helper.h b/onnxruntime/contrib_ops/cpu/bert/rotary_embedding_helper.h
new file mode 100644
index 0000000000000..7b2e8289f7b06
--- /dev/null
+++ b/onnxruntime/contrib_ops/cpu/bert/rotary_embedding_helper.h
@@ -0,0 +1,131 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+#include "core/common/common.h"
+#include "core/providers/common.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace rotary_embedding_helper {
+
+// Parameters deduced from node attributes and inputs/outputs.
+struct RotaryParameters {
+  int batch_size;           // Batch size used by input
+  int sequence_length;      // Sequence length used by input
+  int hidden_size;          // Hidden size used by input
+  int head_size;            // Head size used by cos/sin cache * 2
+  int num_heads;            // num_heads = hidden_size / head_size
+  int max_sequence_length;  // Sequence length used by cos/sin cache
+  int position_ids_format;  // Format of position ids - 0 is (1), 1 is (batch_size, sequence_length)
+  bool transposed;          // Whether the input tensor has been transposed into (batch, num_heads, seq_len, hidden)
+};
+
+template <typename T>
+Status CheckInputs(const T* input,
+                   const T* position_ids,
+                   const T* cos_cache,
+                   const T* sin_cache,
+                   void* parameters) {
+  //    input        : (batch_size, sequence_length, hidden_size)
+  //    position ids : (1) or (batch_size, sequence_length)
+  //    cos cache    : (max_sequence_length, head_size / 2)
+  //    sin cache    : (max_sequence_length, head_size / 2)
+
+  // Check input
+  const auto& input_dims = input->Shape().GetDims();
+  if (input_dims.size() != 3 && input_dims.size() != 4) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 'x' is expected to have 3 or 4 dimensions, got ",
+                           input_dims.size());
+  }
+  // Check position_ids
+  const auto& position_ids_dims = position_ids->Shape().GetDims();
+  if (!onnxruntime::IsScalarOr1ElementVector(position_ids) && position_ids_dims.size() != 2) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 'position_ids' is expected to have 0, 1, or 2 ",
+                           "dimensions, got ", position_ids_dims.size());
+  }
+  // Check cos_cache and sin_cache
+  const auto& cos_cache_dims = cos_cache->Shape().GetDims();
+  if (cos_cache_dims.size() != 2) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 'cos_cache' is expected to have 2 dimensions, got ",
+                           cos_cache_dims.size());
+  }
+  const auto& sin_cache_dims = sin_cache->Shape().GetDims();
+  if (sin_cache_dims.size() != 2) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 'sin_cache' is expected to have 2 dimensions, got ",
+                           sin_cache_dims.size());
+  }
+  if (cos_cache_dims[0] != sin_cache_dims[0] || cos_cache_dims[1] != sin_cache_dims[1]) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Inputs 'cos_cache' and 'sin_cache' are expected to have ",
+                           "the same shape");
+  }
+
+  // Get attributes from inputs
+  int batch_size = static_cast<int>(input_dims[0]);
+  int sequence_length = static_cast<int>(input_dims[1]);
+  int hidden_size = static_cast<int>(input_dims[2]);
+
+  bool transposed = false;
+  if (input_dims.size() == 4) {
+    // input is [batch, num_heads, seq, head_size]
+    sequence_length = static_cast<int>(input_dims[2]);
+    hidden_size = static_cast<int>(input_dims[1]) * static_cast<int>(input_dims[3]);
+    transposed = true;
+  }
+  int max_sequence_length = static_cast<int>(cos_cache_dims[0]);
+  int head_size = static_cast<int>(cos_cache_dims[1]) * 2;
+  int num_heads = hidden_size / head_size;
+  int position_ids_format = -1;
+
+  // Check position_ids input shapes
+  if (!onnxruntime::IsScalarOr1ElementVector(position_ids)) {
+    if (batch_size != static_cast<int>(position_ids_dims[0])) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 'position_ids' dimension 0 should be of size ",
+                             "batch_size, got ", position_ids_dims[0]);
+    }
+    if (sequence_length != static_cast<int>(position_ids_dims[1])) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 'position_ids' dimension 1 should be of size ",
+                             "sequence_length, got ", position_ids_dims[1]);
+    }
+    position_ids_format = 1;
+  } else {
+    position_ids_format = 0;
+  }
+  // Check cos_cache input shapes
+  if (max_sequence_length != static_cast<int>(cos_cache_dims[0])) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 'cos_cache' dimension 0 should be same as ",
+                           "max_sequence_length, got ", cos_cache_dims[0]);
+  }
+  if ((head_size / 2) != static_cast<int>(cos_cache_dims[1])) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 'cos_cache' dimension 1 should be same as ",
+                           "head_size / 2, got ", cos_cache_dims[1]);
+  }
+  // Check sin_cache input shapes
+  if (max_sequence_length != static_cast<int>(sin_cache_dims[0])) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 'sin_cache' dimension 0 should be same as ",
+                           "max_sequence_length, got ", sin_cache_dims[0]);
+  }
+  if ((head_size / 2) != static_cast<int>(sin_cache_dims[1])) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 'sin_cache' dimension 1 should be same as ",
+                           "head_size / 2, got ", sin_cache_dims[1]);
+  }
+
+  // Set rotary parameters
+  if (parameters != nullptr) {
+    RotaryParameters* output_parameters = reinterpret_cast<RotaryParameters*>(parameters);
+    output_parameters->batch_size = batch_size;
+    output_parameters->sequence_length = sequence_length;
+    output_parameters->hidden_size = hidden_size;
+    output_parameters->head_size = head_size;
+    output_parameters->num_heads = num_heads;
+    output_parameters->max_sequence_length = max_sequence_length;
+    output_parameters->position_ids_format = position_ids_format;
+    output_parameters->transposed = transposed;
+  }
+
+  return Status::OK();
+}
+
+}  // namespace rotary_embedding_helper
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc b/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc
index 0ec5088808656..f9d9b13f0fedc 100644
--- a/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc
+++ b/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc
@@ -13,12 +13,14 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1,
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, GridSample);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, Attention);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, BeamSearch);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, WhisperBeamSearch);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, EmbedLayerNormalization);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, ExpandDims);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, FusedConv);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, FusedGemm);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, GreedySearch);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, MultiHeadAttention);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, RotaryEmbedding);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, Sampling);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, AttnLSTM);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, string, Tokenizer);
@@ -27,6 +29,8 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, WordC
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, GatherND);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, TransposeMatMul);  // backward compatibility
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, FusedMatMul);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MatMulNBits);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MatMulBnb4);
 #ifndef ORT_MINIMAL_BUILD
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MatMulFpQ4);
 #endif
@@ -122,6 +126,8 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain,
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, double, SimplifiedLayerNormalization);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, SkipLayerNormalization);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double, SkipLayerNormalization);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, SkipSimplifiedLayerNormalization);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double, SkipSimplifiedLayerNormalization);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, Inverse);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, Trilu);
 
@@ -244,12 +250,14 @@ Status RegisterCpuContribKernels(KernelRegistry& kernel_registry) {
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, GridSample)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, Attention)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, BeamSearch)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, WhisperBeamSearch)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, EmbedLayerNormalization)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, ExpandDims)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, FusedConv)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, FusedGemm)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, GreedySearch)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, MultiHeadAttention)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, RotaryEmbedding)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, Sampling)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, AttnLSTM)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, string, Tokenizer)>,
@@ -262,6 +270,8 @@ Status RegisterCpuContribKernels(KernelRegistry& kernel_registry) {
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MurmurHash3)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, TransposeMatMul)>,  // backward compatibility
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, FusedMatMul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MatMulNBits)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MatMulBnb4)>,
 #ifndef ORT_MINIMAL_BUILD
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MatMulFpQ4)>,
 #endif
@@ -295,6 +305,8 @@ Status RegisterCpuContribKernels(KernelRegistry& kernel_registry) {
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, double, SimplifiedLayerNormalization)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, SkipLayerNormalization)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double, SkipLayerNormalization)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, SkipSimplifiedLayerNormalization)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double, SkipSimplifiedLayerNormalization)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, Inverse)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, Trilu)>,
 
diff --git a/onnxruntime/contrib_ops/cpu/quantization/blockwise_quant_block_bnb4.h b/onnxruntime/contrib_ops/cpu/quantization/blockwise_quant_block_bnb4.h
new file mode 100644
index 0000000000000..cb8e97a592d8c
--- /dev/null
+++ b/onnxruntime/contrib_ops/cpu/quantization/blockwise_quant_block_bnb4.h
@@ -0,0 +1,202 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include <cstdint>
+#include <algorithm>
+#include <cmath>
+
+namespace onnxruntime {
+namespace contrib {
+
+#if defined(_MSC_VER)
+#define FORCEINLINE __forceinline
+#else
+#define FORCEINLINE __attribute__((always_inline)) inline
+#endif
+
+typedef enum Bnb_DataType_t {
+  FP4 = 0,
+  NF4 = 1,
+} Bnb_DataType_t;
+
+FORCEINLINE uint8_t QuantizeOneFP4(float x) {
+  // FP4 with bias of 3
+  // first bit is a sign
+  // subnormals
+  // 0b000 = 0
+  // 0b001 = 0.0625
+  // 0b110 = 2
+  // 0b111 = 3
+  // 0b100 = 4
+  // 0b101 = 6
+  // 0b010 = 8
+  // 0b011 = 12
+
+  // we do a binary search
+  // the pivots are divided by 12 (the FP4 absmax)
+  // since we assum input data is in [-1.0, 1.0]
+
+  // !be careful here, its easy to make a mistake
+  // that is difficult to noice if you add an extra
+  // zero somewhere!
+
+  uint8_t sign = x < 0 ? 0b1000 : 0b0000;
+  x = fabsf(x);
+  if (x > 0.29166667f) {
+    if (x > 0.583333f) {
+      if (x > 0.8333333f) {
+        return 0b0011 + sign;
+      } else {
+        return 0b0010 + sign;
+      }
+    } else if (x > 0.4166667f) {
+      return 0b101 + sign;
+    } else {
+      return 0b100 + sign;
+    }
+  } else if (x > 0.0859375f) {
+    if (x > 0.20833333f) {
+      return 0b0111 + sign;
+    } else {
+      return 0b0110 + sign;
+    }
+  } else if (x > 0.00260417f) {
+    return 0b0001 + sign;
+  } else {
+    return 0b0000 + sign;
+  }
+}
+
+FORCEINLINE uint8_t QuantizeOneNF4(float x) {
+  if (x > 0.03979014977812767f) {
+    if (x > 0.3893125355243683f) {      // 1
+      if (x > 0.6427869200706482f) {    // 11
+        if (x > 0.8614784181118011f) {  // 111
+          return 0b1111;
+        } else {
+          return 0b1110;
+        }
+      } else if (x > 0.5016634166240692f) {  // 110
+        return 0b1101;
+      } else {
+        return 0b1100;
+      }
+    } else if (x > 0.2035212516784668f) {  // 10
+      if (x > 0.2920137718319893f) {       // 101
+        return 0b1011;
+      } else {
+        return 0b1010;
+      }
+    } else if (x > 0.1202552504837513f) {  // 100
+      return 0b1001;
+    } else {
+      return 0b1000;
+    }
+  } else if (x > -0.33967943489551544f) {  // 0
+    if (x > -0.13791173323988914f) {       // 01
+      if (x > -0.045525018125772476f) {    // 011
+        return 0b0111;
+      } else {
+        return 0b0110;
+      }
+    } else if (x > -0.23460740596055984f) {  // 010
+      return 0b0101;
+    } else {
+      return 0b0100;
+    }
+  } else if (x > -0.6106329262256622f) {  // 00
+    if (x > -0.4599952697753906f) {       // 001
+      return 0b0011;
+    } else {
+      return 0b0010;
+    }
+  } else if (x > -0.8480964004993439f) {  // 000
+    return 0b0001;
+  } else {
+    return 0b0000;
+  }
+}
+
+template <int32_t DATA_TYPE>
+FORCEINLINE uint8_t QuantizeOneBnb4(float x) {
+  if constexpr (DATA_TYPE == FP4)
+    return QuantizeOneFP4(x);
+  else
+    return QuantizeOneNF4(x);
+}
+
+template <typename T, int32_t block_size, int32_t DATA_TYPE>
+FORCEINLINE void QuantizeBlockBnb4(const T* src, uint8_t* dst, T& absmax_block, int32_t block_idx, int32_t numel) {
+  float local_absmax = 0.0f;
+
+  int32_t block_len = std::min(block_size, numel - block_idx * block_size);
+  int32_t src_offset = block_idx * block_size;
+  int32_t dst_offset = block_idx * block_size / 2;
+
+  for (int32_t idx = 0; idx < block_len; idx++) {
+    const float v = static_cast<float>(src[src_offset + idx]);
+    local_absmax = fmaxf(local_absmax, fabsf(v));
+  }
+
+  absmax_block = static_cast<T>(local_absmax);
+  const float reciprocal_absmax = local_absmax ? 1.0f / local_absmax : 0.0f;
+
+  for (int32_t idx = 0; idx < block_len; idx += 2) {
+    const float v0 = static_cast<float>(src[src_offset + idx]) * reciprocal_absmax;
+    const uint8_t vi0 = QuantizeOneBnb4<DATA_TYPE>(v0);
+
+    const float v1 = (idx + 1 < block_len) ? static_cast<float>(src[src_offset + idx + 1]) * reciprocal_absmax : 0;
+    const uint8_t vi1 = QuantizeOneBnb4<DATA_TYPE>(v1);
+
+    dst[dst_offset + idx / 2] = (vi0 << 4) | vi1;
+  }
+}
+
+static float fp4_qaunt_map[16] = {0.00000000f, 5.208333333e-03f, 0.66666667f, 1.00000000f,
+                                  0.33333333f, 0.50000000f, 0.16666667f, 0.25000000f,
+                                  -0.00000000f, -5.208333333e-03f, -0.66666667f, -1.00000000f,
+                                  -0.33333333f, -0.50000000f, -0.16666667f, -0.25000000f};
+
+static float nf4_qaunt_map[16] = {-1.0f,
+                                  -0.6961928009986877f,
+                                  -0.5250730514526367f,
+                                  -0.39491748809814453f,
+                                  -0.28444138169288635f,
+                                  -0.18477343022823334f,
+                                  -0.09105003625154495f,
+                                  0.0f,
+                                  0.07958029955625534f,
+                                  0.16093020141124725f,
+                                  0.24611230194568634f,
+                                  0.33791524171829224f,
+                                  0.44070982933044434f,
+                                  0.5626170039176941f,
+                                  0.7229568362236023f,
+                                  1.0f};
+
+template <typename T, int32_t DATA_TYPE>
+FORCEINLINE T DequantizeOneBnb4(uint8_t x) {
+  if constexpr (DATA_TYPE == FP4)
+    return static_cast<T>(fp4_qaunt_map[x]);
+  else
+    return static_cast<T>(nf4_qaunt_map[x]);
+}
+
+template <typename T, int32_t block_size, int32_t DATA_TYPE>
+FORCEINLINE void DequantizeBlockBnb4(const uint8_t* src, T* dst, T absmax_block, int32_t block_idx, int32_t numel) {
+  int32_t block_len = std::min(block_size, numel - block_idx * block_size);
+  int32_t src_offset = block_idx * block_size / 2;
+  int32_t dst_offset = block_idx * block_size;
+
+  for (int32_t idx = 0; idx < block_len; idx += 2) {
+    const uint8_t val = src[src_offset + idx / 2];
+
+    dst[dst_offset + idx] = DequantizeOneBnb4<T, DATA_TYPE>(val >> 4) * absmax_block;
+    if (idx + 1 < block_len) dst[dst_offset + idx + 1] = DequantizeOneBnb4<T, DATA_TYPE>(val & 0xF) * absmax_block;
+  }
+}
+
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cpu/quantization/dequantize_blockwise_bnb4.h b/onnxruntime/contrib_ops/cpu/quantization/dequantize_blockwise_bnb4.h
new file mode 100644
index 0000000000000..5ddb77e5b5ee3
--- /dev/null
+++ b/onnxruntime/contrib_ops/cpu/quantization/dequantize_blockwise_bnb4.h
@@ -0,0 +1,143 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "blockwise_quant_block_bnb4.h"
+
+#include <vector>
+
+#include "core/common/safeint.h"
+#include "core/framework/float16.h"
+#include "core/platform/threadpool.h"
+#include <iostream>
+
+namespace onnxruntime {
+namespace contrib {
+
+template <typename T, int32_t block_size, int32_t DATA_TYPE>
+void QuantizeBlockwiseBnb4(
+    uint8_t* dst,  // shape: [(N * K + 1) / 2]
+    const T* src,  // shape: [N, K]
+    T* absmax,     // shape: [(N * K + block_size - 1) / block_size]
+    int32_t N,
+    int32_t K,
+    onnxruntime::concurrency::ThreadPool* thread_pool) {
+  int32_t numel = N * K;
+  int32_t total_block_count = (numel + block_size - 1) / block_size;
+
+  concurrency::ThreadPool::TryBatchParallelFor(
+      thread_pool,
+      total_block_count,
+      [&](ptrdiff_t block_idx) {
+        QuantizeBlockBnb4<T, block_size, DATA_TYPE>(
+            src,
+            dst,
+            absmax[block_idx],
+            static_cast<int32_t>(block_idx),
+            numel);
+      },
+      0);
+}
+
+#define QuantizeBlockwiseBn4DataTyped(block_size, quant_type)                       \
+  if (quant_type == FP4)                                                            \
+    QuantizeBlockwiseBnb4<T, block_size, FP4>(dst, src, absmax, N, K, thread_pool); \
+  else                                                                              \
+    QuantizeBlockwiseBnb4<T, block_size, NF4>(dst, src, absmax, N, K, thread_pool);
+
+template <typename T>
+void QuantizeBlockwiseBnb4(
+    uint8_t* dst,  // shape: [(N * K + 1) / 2]
+    const T* src,  // shape: [N, K]
+    T* absmax,     // shape: [(N * K + block_size - 1) / block_size]
+    int32_t block_size,
+    int32_t quant_type,
+    int32_t N,
+    int32_t K,
+    onnxruntime::concurrency::ThreadPool* thread_pool) {
+  ORT_ENFORCE(
+      quant_type == FP4 || quant_type == NF4,
+      "Invalid quant_type, only 0 (FP4) and 1 (NF4) are supported.");
+
+  if (block_size == 16) {
+    QuantizeBlockwiseBn4DataTyped(16, quant_type);
+  } else if (block_size == 32) {
+    QuantizeBlockwiseBn4DataTyped(32, quant_type);
+  } else if (block_size == 64) {
+    QuantizeBlockwiseBn4DataTyped(64, quant_type);
+  } else if (block_size == 128) {
+    QuantizeBlockwiseBn4DataTyped(128, quant_type);
+  } else if (block_size == 256) {
+    QuantizeBlockwiseBn4DataTyped(256, quant_type);
+  } else {
+    ORT_NOT_IMPLEMENTED("only block size 16, 32, 64, 128, 256 are supported.");
+  }
+}
+
+#undef QuantizeBlockwiseBn4DataTyped
+
+template <typename T, int32_t block_size, int32_t DATA_TYPE>
+void DequantizeBlockwiseBnb4(
+    T* dst,              // shape: [N, K]
+    const uint8_t* src,  // shape: [(N * K + 1) / 2)]
+    const T* absmax,     // shape: [(N * K + block_size - 1) / block_size]
+    int32_t N,
+    int32_t K,
+    onnxruntime::concurrency::ThreadPool* thread_pool) {
+  int32_t numel = N * K;
+  int32_t total_block_count = (numel + block_size - 1) / block_size;
+
+  concurrency::ThreadPool::TryBatchParallelFor(
+      thread_pool,
+      total_block_count,
+      [&](ptrdiff_t block_idx) {
+        DequantizeBlockBnb4<T, block_size, DATA_TYPE>(
+            src,
+            dst,
+            absmax[block_idx],
+            static_cast<int32_t>(block_idx),
+            numel);
+      },
+      0);
+}
+
+#define DequantizeBlockwiseBn4DataTyped(block_size, quant_type)                       \
+  if (quant_type == FP4)                                                              \
+    DequantizeBlockwiseBnb4<T, block_size, FP4>(dst, src, absmax, N, K, thread_pool); \
+  else                                                                                \
+    DequantizeBlockwiseBnb4<T, block_size, NF4>(dst, src, absmax, N, K, thread_pool);
+
+template <typename T>
+void DequantizeBlockwiseBnb4(
+    T* dst,              // shape: [N, K]
+    const uint8_t* src,  // shape: [(N * K + 1) / 2)]
+    const T* absmax,     // shape: [(N * K + block_size - 1) / block_size]
+    int32_t block_size,
+    int32_t quant_type,
+    int32_t N,
+    int32_t K,
+    onnxruntime::concurrency::ThreadPool* thread_pool) {
+  ORT_ENFORCE(
+      quant_type == FP4 || quant_type == NF4,
+      "Invalid quant_type, only 0 (FP4) and 1 (NF4) are supported.");
+
+  if (block_size == 16) {
+    DequantizeBlockwiseBn4DataTyped(16, quant_type);
+  } else if (block_size == 32) {
+    DequantizeBlockwiseBn4DataTyped(32, quant_type);
+  } else if (block_size == 64) {
+    DequantizeBlockwiseBn4DataTyped(64, quant_type);
+  } else if (block_size == 128) {
+    DequantizeBlockwiseBn4DataTyped(128, quant_type);
+  } else if (block_size == 256) {
+    DequantizeBlockwiseBn4DataTyped(256, quant_type);
+  } else {
+    ORT_NOT_IMPLEMENTED("only block size 16, 32, 64, 128, 256 are supported.");
+  }
+}
+
+#undef DequantizeBlockwiseBn4DataTyped
+
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cpu/quantization/matmul_bnb4.cc b/onnxruntime/contrib_ops/cpu/quantization/matmul_bnb4.cc
new file mode 100644
index 0000000000000..b898c956b6e6a
--- /dev/null
+++ b/onnxruntime/contrib_ops/cpu/quantization/matmul_bnb4.cc
@@ -0,0 +1,113 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/common/safeint.h"
+#include "core/framework/op_kernel.h"
+#include "core/providers/cpu/math/matmul_helper.h"
+#include "core/providers/common.h"
+#include "dequantize_blockwise_bnb4.h"
+#include "core/mlas/inc/mlas.h"
+
+namespace onnxruntime {
+namespace contrib {
+
+class MatMulBnb4 final : public OpKernel {
+ public:
+  MatMulBnb4(const OpKernelInfo& info) : OpKernel(info) {
+    ORT_ENFORCE(Status::OK() == info.GetAttr<int64_t>("K", &K_));
+    ORT_ENFORCE(Status::OK() == info.GetAttr<int64_t>("N", &N_));
+    ORT_ENFORCE(Status::OK() == info.GetAttr<int64_t>("block_size", &block_size_));
+    ORT_ENFORCE(Status::OK() == info.GetAttr<int64_t>("quant_type", &quant_type_));
+    ORT_ENFORCE(
+        quant_type_ == FP4 || quant_type_ == NF4,
+        "Invalid quant_type, only 0 (FP4) and 1 (NF4) are supported.");
+    is_training_mode_ = static_cast<bool>(info.GetAttrOrDefault("training_mode", static_cast<int64_t>(0)));
+    transB_ = static_cast<bool>(info.GetAttrOrDefault("transB", static_cast<int64_t>(1)));
+  }
+
+  Status Compute(OpKernelContext* context) const override;
+
+ private:
+  int64_t K_;
+  int64_t N_;
+  int64_t block_size_;
+  int64_t quant_type_;
+  bool is_training_mode_;
+  bool transB_;
+};
+
+Status MatMulBnb4::Compute(OpKernelContext* ctx) const {
+  concurrency::ThreadPool* thread_pool = ctx->GetOperatorThreadPool();
+
+  const Tensor* a = ctx->Input<Tensor>(0);
+  const Tensor* b_quant = ctx->Input<Tensor>(1);
+  const Tensor* absmax = ctx->Input<Tensor>(2);
+
+  const float* a_data = a->Data<float>();
+  const uint8_t* b_quant_data = b_quant->Data<uint8_t>();
+  const float* absmax_data = absmax->Data<float>();
+
+  AllocatorPtr allocator;
+  auto status = ctx->GetTempSpaceAllocator(&allocator);
+  ORT_RETURN_IF_ERROR(status);
+  auto tmp_b_data_ptr = IAllocator::MakeUniquePtr<float>(allocator, SafeInt<size_t>(K_) * N_);
+  DequantizeBlockwiseBnb4<float>(
+      tmp_b_data_ptr.get(),
+      b_quant_data,
+      absmax_data,
+      static_cast<int32_t>(block_size_),
+      static_cast<int32_t>(quant_type_),
+      static_cast<int32_t>(N_),
+      static_cast<int32_t>(K_),
+      thread_pool);
+
+  constexpr bool transa = false;
+  const bool transb = transB_;
+  TensorShape b_shape({N_, K_});
+  MatMulComputeHelper helper;
+  ORT_RETURN_IF_ERROR(helper.Compute(a->Shape(), b_shape, transa, transb));
+
+  Tensor* y = ctx->Output(0, helper.OutputShape());
+
+  // Bail out early if the output is going to be empty
+  if (y->Shape().Size() == 0) return Status::OK();
+
+  auto* y_data = y->MutableData<float>();
+
+  const size_t max_len = helper.OutputOffsets().size();
+  const size_t M = static_cast<size_t>(helper.M());
+  const size_t N = static_cast<size_t>(helper.N());
+  const size_t K = static_cast<size_t>(helper.K());
+  const size_t lda = helper.Lda(transa);
+  const size_t ldb = helper.Ldb(transb);
+
+  // TODO: implement with native kernel
+  std::vector<MLAS_SGEMM_DATA_PARAMS> data(max_len);
+  for (size_t i = 0; i < max_len; i++) {
+    data[i].BIsPacked = false;
+    data[i].A = a_data + helper.LeftOffsets()[i];
+    data[i].lda = lda;
+    data[i].B = tmp_b_data_ptr.get() + helper.RightOffsets()[i];
+    data[i].ldb = ldb;
+    data[i].C = y_data + helper.OutputOffsets()[i];
+    data[i].ldc = N;
+    data[i].alpha = 1.f;
+    data[i].beta = 0.0f;
+  }
+  MlasGemmBatch(CblasNoTrans, CblasTrans, M, N, K, data.data(), max_len, thread_pool);
+
+  return Status::OK();
+}
+
+ONNX_OPERATOR_KERNEL_EX(
+    MatMulBnb4,
+    kMSDomain,
+    1,
+    kCpuExecutionProvider,
+    KernelDefBuilder()
+        .TypeConstraint("T1", DataTypeImpl::GetTensorType<float>())
+        .TypeConstraint("T2", DataTypeImpl::GetTensorType<uint8_t>()),
+    MatMulBnb4);
+
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc b/onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc
new file mode 100644
index 0000000000000..320a05bb97dac
--- /dev/null
+++ b/onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc
@@ -0,0 +1,151 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/common/narrow.h"
+#include "core/common/safeint.h"
+#include "core/framework/op_kernel.h"
+#include "core/mlas/inc/mlas.h"
+#include "core/mlas/inc/mlas_qnbit.h"
+#include "core/mlas/inc/mlas_q4.h"
+#include "core/providers/cpu/math/matmul_helper.h"
+#include "core/providers/common.h"
+
+namespace onnxruntime {
+namespace contrib {
+
+class MatMulNBits final : public OpKernel {
+ public:
+  MatMulNBits(const OpKernelInfo& info)
+      : OpKernel(info),
+        K_{narrow<size_t>(info.GetAttr<int64_t>("K"))},
+        N_{narrow<size_t>(info.GetAttr<int64_t>("N"))},
+        block_size_{narrow<size_t>(info.GetAttr<int64_t>("block_size"))},
+        nbits_{narrow<size_t>(info.GetAttr<int64_t>("bits"))} {
+    ORT_ENFORCE(nbits_ == 4,
+                "Only 4b quantization is supported for MatMulNBits op, additional bits support is planned.");
+  }
+
+  Status Compute(OpKernelContext* context) const override;
+
+ private:
+  const size_t K_;
+  const size_t N_;
+  const size_t block_size_;
+  const size_t nbits_;
+  const bool column_wise_quant_{true};
+};
+
+Status MatMulNBits::Compute(OpKernelContext* ctx) const {
+  concurrency::ThreadPool* thread_pool = ctx->GetOperatorThreadPool();
+
+  const Tensor* a = ctx->Input<Tensor>(0);
+  const Tensor* b = ctx->Input<Tensor>(1);
+  const Tensor* scales = ctx->Input<Tensor>(2);
+  const Tensor* zero_points = ctx->Input<Tensor>(3);
+
+  const auto* a_data = a->Data<float>();
+  const uint8_t* b_data = b->Data<uint8_t>();
+  const auto* scales_data = scales->Data<float>();
+  const auto* zero_points_data = zero_points == nullptr ? nullptr : zero_points->Data<uint8_t>();
+
+  TensorShape b_shape({static_cast<int64_t>(N_), static_cast<int64_t>(K_)});
+
+  MatMulComputeHelper helper;
+  ORT_RETURN_IF_ERROR(helper.Compute(a->Shape(), b_shape, false, true));
+
+  Tensor* y = ctx->Output(0, helper.OutputShape());
+
+  // Bail out early if the output is going to be empty
+  if (y->Shape().Size() == 0)
+    return Status::OK();
+
+  auto* y_data = y->MutableData<float>();
+
+  const size_t batch_count = helper.OutputOffsets().size();
+  const size_t M = static_cast<size_t>(helper.M());
+  const size_t N = static_cast<size_t>(helper.N());
+  const size_t K = static_cast<size_t>(helper.K());
+  const size_t lda = helper.Lda(false);
+
+  if (MlasIsSQNBitGemmAvailable(nbits_, block_size_)) {
+    // number of bytes or elements between adjacent matrices
+    size_t b_data_matrix_stride_in_bytes, b_scale_matrix_stride, b_zero_point_matrix_stride_in_bytes;
+    MlasBlockwiseQuantizedBufferSizes(static_cast<int>(nbits_), static_cast<int>(block_size_), /* columnwise */ true,
+                                      static_cast<int>(K), static_cast<int>(N),
+                                      b_data_matrix_stride_in_bytes, b_scale_matrix_stride,
+                                      &b_zero_point_matrix_stride_in_bytes);
+
+    const size_t b_matrix_size = K * N;
+
+    InlinedVector<MLAS_SQNBIT_GEMM_DATA_PARAMS> data(batch_count);
+    for (size_t i = 0; i < batch_count; ++i) {
+      const size_t b_matrix_offset = helper.RightOffsets()[i] / b_matrix_size;
+
+      data[i].A = a_data + helper.LeftOffsets()[i];
+      data[i].lda = lda;
+      data[i].QuantBData = b_data + b_matrix_offset * b_data_matrix_stride_in_bytes;
+      data[i].QuantBScale = scales_data + b_matrix_offset * b_scale_matrix_stride;
+      data[i].QuantBZeroPoint = zero_points_data != nullptr
+                                    ? zero_points_data + b_matrix_offset * b_zero_point_matrix_stride_in_bytes
+                                    : nullptr;
+      data[i].C = y_data + helper.OutputOffsets()[i];
+      data[i].ldc = N;
+    }
+
+    MlasSQNBitGemmBatch(M, N, K, batch_count, nbits_, block_size_, data.data(), thread_pool);
+
+    return Status::OK();
+  }
+
+  const size_t ldb = helper.Ldb(true);
+
+  AllocatorPtr allocator;
+  ORT_RETURN_IF_ERROR(ctx->GetTempSpaceAllocator(&allocator));
+  auto tmp_b_data_ptr = IAllocator::MakeUniquePtr<float>(allocator, SafeInt<size_t>(K_) * N_);
+  // dequantize b, only 4b quantization is supported for now
+  MlasDequantizeBlockwise<float, 4>(
+      tmp_b_data_ptr.get(),               // dequantized output
+      b_data,                             // quantized input
+      scales_data,                        // quantization scales
+      zero_points_data,                   // quantization zero points
+      static_cast<int32_t>(block_size_),  // quantization block size
+      column_wise_quant_,                 // columnwise quantization or row-wise
+      static_cast<int32_t>(K_),           // number of rows in quantized input
+      static_cast<int32_t>(N_),           // number of columns in quantized input
+      thread_pool);
+
+#if 0  // for debug
+  auto tm_b_data_ptr_trans = IAllocator::MakeUniquePtr<float>(allocator, SafeInt<size_t>(K_) * N_);
+  MlasTranspose(tmp_b_data_ptr.get(), tm_b_data_ptr_trans.get(), N_, K_);
+#endif
+
+  std::vector<MLAS_SGEMM_DATA_PARAMS> data(batch_count);
+  for (size_t i = 0; i < batch_count; i++) {
+    data[i].BIsPacked = false;
+    data[i].A = a_data + helper.LeftOffsets()[i];
+    data[i].lda = lda;
+    data[i].B = tmp_b_data_ptr.get() + helper.RightOffsets()[i];
+    data[i].ldb = ldb;
+    data[i].C = y_data + helper.OutputOffsets()[i];
+    data[i].ldc = N;
+    data[i].alpha = 1.f;
+    data[i].beta = 0.0f;
+  }
+  MlasGemmBatch(CblasNoTrans, CblasTrans,
+                M, N, K, data.data(), batch_count, thread_pool);
+
+  return Status::OK();
+}
+
+ONNX_OPERATOR_KERNEL_EX(
+    MatMulNBits,
+    kMSDomain,
+    1,
+    kCpuExecutionProvider,
+    KernelDefBuilder()
+        .TypeConstraint("T1", DataTypeImpl::GetTensorType<float>())
+        .TypeConstraint("T2", DataTypeImpl::GetTensorType<uint8_t>()),
+    MatMulNBits);
+
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cpu/skip_layer_norm.cc b/onnxruntime/contrib_ops/cpu/skip_layer_norm.cc
index e86a12d9fb873..4e103c2556a7a 100644
--- a/onnxruntime/contrib_ops/cpu/skip_layer_norm.cc
+++ b/onnxruntime/contrib_ops/cpu/skip_layer_norm.cc
@@ -20,20 +20,29 @@ namespace contrib {
       kCpuExecutionProvider,                                      \
       KernelDefBuilder()                                          \
           .TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
-      SkipLayerNorm<T>);
+      SkipLayerNorm<T, false>);                                   \
+  ONNX_OPERATOR_TYPED_KERNEL_EX(                                  \
+      SkipSimplifiedLayerNormalization,                           \
+      kMSDomain,                                                  \
+      1,                                                          \
+      T,                                                          \
+      kCpuExecutionProvider,                                      \
+      KernelDefBuilder()                                          \
+          .TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
+      SkipLayerNorm<T, true>);
 
 REGISTER_KERNEL_TYPED(float)
 REGISTER_KERNEL_TYPED(double)
 
-template <typename T>
-SkipLayerNorm<T>::SkipLayerNorm(const OpKernelInfo& op_kernel_info)
+template <typename T, bool simplified>
+SkipLayerNorm<T, simplified>::SkipLayerNorm(const OpKernelInfo& op_kernel_info)
     : OpKernel(op_kernel_info) {
   ORT_ENFORCE(op_kernel_info.GetAttr<float>("epsilon", &epsilon_).IsOK());
   ORT_ENFORCE(epsilon_ >= 0);
 }
 
-template <typename T>
-Status SkipLayerNorm<T>::Compute(OpKernelContext* p_ctx) const {
+template <typename T, bool simplified>
+Status SkipLayerNorm<T, simplified>::Compute(OpKernelContext* p_ctx) const {
   const Tensor* input = p_ctx->Input<Tensor>(0);
   const Tensor* skip = p_ctx->Input<Tensor>(1);
   const Tensor* gamma = p_ctx->Input<Tensor>(2);
@@ -102,10 +111,16 @@ Status SkipLayerNorm<T>::Compute(OpKernelContext* p_ctx) const {
         }
 
         mean = mean / hidden_size;
-        mean_square = sqrt(mean_square / hidden_size - mean * mean + epsilon_);
+        if (simplified) {
+          mean_square = sqrt(mean_square / hidden_size + epsilon_);
+        } else {
+          mean_square = sqrt(mean_square / hidden_size - mean * mean + epsilon_);
+        }
 
         for (int64_t h = 0; h < hidden_size; h++) {
-          if (nullptr == beta_data) {
+          if (simplified) {
+            p_output[h] = p_output[h] / mean_square * gamma_data[h];
+          } else if (nullptr == beta_data) {
             p_output[h] = (p_output[h] - mean) / mean_square * gamma_data[h];
           } else {
             p_output[h] = (p_output[h] - mean) / mean_square * gamma_data[h] + beta_data[h];
diff --git a/onnxruntime/contrib_ops/cpu/skip_layer_norm.h b/onnxruntime/contrib_ops/cpu/skip_layer_norm.h
index 7723541cb6b18..69edf4609e340 100644
--- a/onnxruntime/contrib_ops/cpu/skip_layer_norm.h
+++ b/onnxruntime/contrib_ops/cpu/skip_layer_norm.h
@@ -10,7 +10,7 @@
 namespace onnxruntime {
 namespace contrib {
 
-template <typename T>
+template <typename T, bool simplified>
 class SkipLayerNorm final : public OpKernel {
  public:
   SkipLayerNorm(const OpKernelInfo& op_kernel_info);
diff --git a/onnxruntime/contrib_ops/cpu/transformers/beam_search.cc b/onnxruntime/contrib_ops/cpu/transformers/beam_search.cc
index c391f47e1927b..93cda00e5a3c3 100644
--- a/onnxruntime/contrib_ops/cpu/transformers/beam_search.cc
+++ b/onnxruntime/contrib_ops/cpu/transformers/beam_search.cc
@@ -52,28 +52,38 @@ namespace contrib {
       kCpuExecutionProvider,                                      \
       (*KernelDefBuilder::Create())                               \
           .TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
-      transformers::BeamSearch);
+      transformers::BeamSearch);                                  \
+                                                                  \
+  ONNX_OPERATOR_TYPED_KERNEL_EX(                                  \
+      WhisperBeamSearch,                                          \
+      kMSDomain,                                                  \
+      1,                                                          \
+      T,                                                          \
+      kCpuExecutionProvider,                                      \
+      (*KernelDefBuilder::Create())                               \
+          .TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
+      transformers::WhisperBeamSearch);
 
 REGISTER_KERNEL_TYPED(float)
 
 namespace transformers {
 
 void BeamSearch::Init(const OpKernelInfo& info) {
-  parameters_.ParseFromAttributes(info);
+  parameters_->ParseFromAttributes(info);
 
   // Model_type could be either 0 (GPT-2), 1 (encoder-decoder like T5), or 2 (Whisper)
-  ORT_ENFORCE(parameters_.model_type == IGenerationParameters::kModelTypeGpt ||
-              parameters_.model_type == IGenerationParameters::kModelTypeT5 ||
-              parameters_.model_type == IGenerationParameters::kModelTypeWhisper);
+  ORT_ENFORCE(parameters_->model_type == IGenerationParameters::kModelTypeGpt ||
+              parameters_->model_type == IGenerationParameters::kModelTypeT5 ||
+              parameters_->model_type == IGenerationParameters::kModelTypeWhisper);
 
   ONNX_NAMESPACE::GraphProto proto;
 
-  if (parameters_.model_type != IGenerationParameters::kModelTypeGpt) {
+  if (parameters_->model_type != IGenerationParameters::kModelTypeGpt) {
     // Make sure the encoder sub-graph attribute is present for the T5 and Whisper models.
     ORT_ENFORCE(info.GetAttr<ONNX_NAMESPACE::GraphProto>("encoder", &proto).IsOK());
   }
 
-  if (parameters_.model_type == IGenerationParameters::kModelTypeGpt) {
+  if (parameters_->model_type == IGenerationParameters::kModelTypeGpt) {
     // Check if the init_decoder sub-graph attribute is present for the GPT2 model.
     if (info.GetAttr<ONNX_NAMESPACE::GraphProto>("init_decoder", &proto).IsOK()) {
       has_init_decoder_ = true;
@@ -90,11 +100,11 @@ Status BeamSearch::SetupSubgraphExecutionInfo(const SessionState& session_state,
                                               const std::string& attribute_name,
                                               const SessionState& subgraph_session_state) {
   const auto& node = Node();
-  if (parameters_.model_type == IGenerationParameters::kModelTypeGpt) {
+  if (parameters_->model_type == IGenerationParameters::kModelTypeGpt) {
     if (attribute_name == "decoder") {
       ORT_ENFORCE(gpt_subgraph_ == nullptr, "SetupSubgraphExecutionInfo should only be called once for each subgraph.");
       auto res = gpt_details::CreateGptSubgraphAndUpdateParameters(node, session_state, attribute_name,
-                                                                   subgraph_session_state, parameters_);
+                                                                   subgraph_session_state, *parameters_);
 
       auto status = res.first;
       if (!status.IsOK()) {
@@ -109,7 +119,7 @@ Status BeamSearch::SetupSubgraphExecutionInfo(const SessionState& session_state,
       // updated once for the 'decoder' attribute). In future, find a way to update 'parameters' only once based on only one subgraph
       // attribute.
       auto res = gpt_details::CreateGptSubgraphAndUpdateParameters(node, session_state, attribute_name,
-                                                                   subgraph_session_state, parameters_);
+                                                                   subgraph_session_state, *parameters_);
 
       auto status = res.first;
       if (!status.IsOK()) {
@@ -119,7 +129,7 @@ Status BeamSearch::SetupSubgraphExecutionInfo(const SessionState& session_state,
       init_run_gpt_subgraph_ = std::move(res.second);
       init_run_decoder_feeds_fetches_manager_ = init_run_gpt_subgraph_->GetFeedsFetchesManager();
     }
-  } else if (parameters_.model_type == IGenerationParameters::kModelTypeT5) {
+  } else if (parameters_->model_type == IGenerationParameters::kModelTypeT5) {
     if (attribute_name == "encoder") {
       ORT_ENFORCE(t5_encoder_subgraph_ == nullptr,
                   "SetupSubgraphExecutionInfo should only be called once for each subgraph.");
@@ -129,7 +139,7 @@ Status BeamSearch::SetupSubgraphExecutionInfo(const SessionState& session_state,
       ORT_RETURN_IF_ERROR(t5_encoder_subgraph_->Setup(session_state, subgraph_session_state));
       encoder_feeds_fetches_manager_ = t5_encoder_subgraph_->GetFeedsFetchesManager();
 
-      if (parameters_.decoder_start_token_id < 0) {
+      if (parameters_->decoder_start_token_id < 0) {
         ORT_RETURN_IF(t5_encoder_subgraph_->num_subgraph_inputs != 2,
                       "Encoder subgraph shall have 2 inputs when decoder_start_token_id attribute is empty");
       } else {
@@ -144,12 +154,12 @@ Status BeamSearch::SetupSubgraphExecutionInfo(const SessionState& session_state,
                                                                  subgraph_session_state.GetGraphViewer());
       ORT_RETURN_IF_ERROR(t5_decoder_subgraph_->Setup(session_state, subgraph_session_state));
       decoder_feeds_fetches_manager_ = t5_decoder_subgraph_->GetFeedsFetchesManager();
-      parameters_.SetSubgraphParameters(t5_decoder_subgraph_->vocab_size,
-                                        t5_decoder_subgraph_->num_heads,
-                                        t5_decoder_subgraph_->head_size,
-                                        t5_decoder_subgraph_->num_layers);
+      parameters_->SetSubgraphParameters(t5_decoder_subgraph_->vocab_size,
+                                         t5_decoder_subgraph_->num_heads,
+                                         t5_decoder_subgraph_->head_size,
+                                         t5_decoder_subgraph_->num_layers);
     }
-  } else if (parameters_.model_type == IGenerationParameters::kModelTypeWhisper) {
+  } else if (parameters_->model_type == IGenerationParameters::kModelTypeWhisper) {
     if (attribute_name == "encoder") {
       ORT_ENFORCE(whisper_encoder_subgraph_ == nullptr,
                   "SetupSubgraphExecutionInfo should only be called once for each subgraph.");
@@ -169,10 +179,10 @@ Status BeamSearch::SetupSubgraphExecutionInfo(const SessionState& session_state,
                                                                            subgraph_session_state.GetGraphViewer());
       ORT_RETURN_IF_ERROR(whisper_decoder_subgraph_->Setup(session_state, subgraph_session_state));
       decoder_feeds_fetches_manager_ = whisper_decoder_subgraph_->GetFeedsFetchesManager();
-      parameters_.SetSubgraphParameters(whisper_decoder_subgraph_->vocab_size,
-                                        whisper_decoder_subgraph_->num_heads,
-                                        whisper_decoder_subgraph_->head_size,
-                                        whisper_decoder_subgraph_->num_layers);
+      parameters_->SetSubgraphParameters(whisper_decoder_subgraph_->vocab_size,
+                                         whisper_decoder_subgraph_->num_heads,
+                                         whisper_decoder_subgraph_->head_size,
+                                         whisper_decoder_subgraph_->num_layers);
     }
   }
 
@@ -197,9 +207,9 @@ Status BeamSearch::Compute(OpKernelContext* ctx) const {
   concurrency::ThreadPool* thread_pool = ctx->GetOperatorThreadPool();
 
   // Make a copy of parameters since we will update it based on inputs later
-  BeamSearchParameters parameters = parameters_;
+  BeamSearchParameters parameters = *parameters_;
 
-  if (parameters_.model_type == IGenerationParameters::kModelTypeGpt) {
+  if (parameters.model_type == IGenerationParameters::kModelTypeGpt) {
     if (!gpt_subgraph_->IsOutputFloat16()) {  // Output float32
       BeamSearchGpt<float> impl{
           *ctx_internal,
@@ -253,7 +263,7 @@ Status BeamSearch::Compute(OpKernelContext* ctx) const {
   ORT_ENFORCE(encoder_session_state, "Subgraph SessionState was not found for 'encoder' attribute.");
   ORT_ENFORCE(encoder_feeds_fetches_manager_, "CreateFeedsFetchesManager must be called prior to execution of graph.");
 
-  if (parameters_.model_type == IGenerationParameters::kModelTypeT5) {
+  if (parameters.model_type == IGenerationParameters::kModelTypeT5) {
     // Subgraph has constraint that the output is either float or float16
     if (!t5_decoder_subgraph_->IsOutputFloat16()) {
       BeamSearchT5<float> impl{
@@ -303,7 +313,7 @@ Status BeamSearch::Compute(OpKernelContext* ctx) const {
   }
 
   // Change the CreateEncoderInputs function for Whisper shapes
-  if (parameters_.model_type == IGenerationParameters::kModelTypeWhisper) {
+  if (parameters.model_type == IGenerationParameters::kModelTypeWhisper) {
     // Subgraph has constraint that the output is either float or float16
     if (!whisper_decoder_subgraph_->IsOutputFloat16()) {
       BeamSearchWhisper<float> impl{
@@ -319,7 +329,10 @@ Status BeamSearch::Compute(OpKernelContext* ctx) const {
           update_decoder_feeds_func_ ? update_decoder_feeds_func_ : GenerationCpuDeviceHelper::UpdateDecoderFeeds<float>,
           expand_buffer_float_func_ ? expand_buffer_float_func_ : GenerationCpuDeviceHelper::ExpandBuffer<float>,
           expand_buffer_float16_func_ ? expand_buffer_float16_func_ : GenerationCpuDeviceHelper::ExpandBuffer<MLFloat16>,
-          create_beam_scorer_func_};
+          create_beam_scorer_func_,
+          update_decoder_cross_qk_func_ ? update_decoder_cross_qk_func_ : GenerationCpuDeviceHelper::UpdateDecoderCrossQK,
+          finalize_decoder_cross_qk_func_ ? finalize_decoder_cross_qk_func_ : GenerationCpuDeviceHelper::FinalizeDecoderCrossQK};
+
 #ifdef USE_CUDA
       ORT_RETURN_IF_ERROR(impl.InitializeCuda(reorder_past_state_func_, init_cache_indir_func_, cuda_device_prop_, cuda_device_arch_));
 #endif
@@ -340,7 +353,10 @@ Status BeamSearch::Compute(OpKernelContext* ctx) const {
           update_decoder_feeds_fp16_func_ ? update_decoder_feeds_fp16_func_ : GenerationCpuDeviceHelper::UpdateDecoderFeeds<MLFloat16>,
           expand_buffer_float_func_,
           expand_buffer_float16_func_,
-          create_beam_scorer_func_};
+          create_beam_scorer_func_,
+          update_decoder_cross_qk_func_ ? update_decoder_cross_qk_func_ : GenerationCpuDeviceHelper::UpdateDecoderCrossQK,
+          finalize_decoder_cross_qk_func_ ? finalize_decoder_cross_qk_func_ : GenerationCpuDeviceHelper::FinalizeDecoderCrossQK};
+
 #ifdef USE_CUDA
       ORT_RETURN_IF_ERROR(impl.InitializeCuda(reorder_past_state_func_, init_cache_indir_func_, cuda_device_prop_, cuda_device_arch_));
 #endif
@@ -354,6 +370,10 @@ Status BeamSearch::Compute(OpKernelContext* ctx) const {
   ORT_THROW("Model type is not supported.");
 }
 
+Status WhisperBeamSearch::Compute(OpKernelContext* ctx) const {
+  return BeamSearch::Compute(ctx);
+}
+
 }  // namespace transformers
 }  // namespace contrib
 }  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cpu/transformers/beam_search.h b/onnxruntime/contrib_ops/cpu/transformers/beam_search.h
index 93b7e08fabf94..fad7dcc75bcab 100644
--- a/onnxruntime/contrib_ops/cpu/transformers/beam_search.h
+++ b/onnxruntime/contrib_ops/cpu/transformers/beam_search.h
@@ -25,11 +25,12 @@ using namespace onnxruntime::controlflow;  // namespace of IControlFlowKernel
 
 class BeamSearch : public IControlFlowKernel {
  public:
-  BeamSearch(const OpKernelInfo& info)
+  BeamSearch(const OpKernelInfo& info, std::unique_ptr<BeamSearchParameters> param = std::make_unique<BeamSearchParameters>())
       : IControlFlowKernel(info),
         encoder_feeds_fetches_manager_(nullptr),
         decoder_feeds_fetches_manager_(nullptr),
         dumper_(nullptr) {
+    parameters_.swap(param);
     Init(info);
   }
 
@@ -88,12 +89,16 @@ class BeamSearch : public IControlFlowKernel {
       const GenerationDeviceHelper::UpdateDecoderFeedsFunc<MLFloat16>& update_decoder_feeds_fp16_func,
       const GenerationDeviceHelper::ExpandBufferFunc<int32_t>& expand_buffer_int32_func,
       const GenerationDeviceHelper::ExpandBufferFunc<float>& expand_buffer_float_func,
-      const GenerationDeviceHelper::ExpandBufferFunc<MLFloat16>& expand_buffer_float16_func) {
+      const GenerationDeviceHelper::ExpandBufferFunc<MLFloat16>& expand_buffer_float16_func,
+      const GenerationDeviceHelper::UpdateDecoderCrossQKFunc& update_decoder_cross_qk_func,
+      const GenerationDeviceHelper::FinalizeDecoderCrossQKFunc& finalize_decoder_cross_qk_func) {
     update_decoder_feeds_func_ = update_decoder_feeds_func;
     update_decoder_feeds_fp16_func_ = update_decoder_feeds_fp16_func;
     expand_buffer_int32_func_ = expand_buffer_int32_func;
     expand_buffer_float_func_ = expand_buffer_float_func;
     expand_buffer_float16_func_ = expand_buffer_float16_func;
+    update_decoder_cross_qk_func_ = update_decoder_cross_qk_func;
+    finalize_decoder_cross_qk_func_ = finalize_decoder_cross_qk_func;
   }
 
 #ifdef USE_CUDA
@@ -101,7 +106,7 @@ class BeamSearch : public IControlFlowKernel {
   int cuda_device_arch_ = 0;
 #endif
 
- private:
+ protected:
   // Device specific functions
   GenerationDeviceHelper::AddToFeedsFunc add_to_feeds_func_;
   GenerationDeviceHelper::TopkFunc topk_func_;
@@ -172,9 +177,21 @@ class BeamSearch : public IControlFlowKernel {
 
   IConsoleDumper* dumper_;
 
-  BeamSearchParameters parameters_;
+  std::unique_ptr<BeamSearchParameters> parameters_;
 
   bool has_init_decoder_ = false;
+
+  GenerationDeviceHelper::UpdateDecoderCrossQKFunc update_decoder_cross_qk_func_;
+
+  GenerationDeviceHelper::FinalizeDecoderCrossQKFunc finalize_decoder_cross_qk_func_;
+};
+
+class WhisperBeamSearch : public BeamSearch {
+ public:
+  WhisperBeamSearch(const OpKernelInfo& info)
+      : BeamSearch(info, std::unique_ptr<BeamSearchParameters>(new WhisperBeamSearchParameters())) {}
+
+  Status Compute(OpKernelContext* ctx) const override;
 };
 
 }  // namespace transformers
diff --git a/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_base.h b/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_base.h
index 8832b4314bad3..29b38fc234de5 100644
--- a/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_base.h
+++ b/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_base.h
@@ -17,34 +17,35 @@ struct BeamSearchState : IBeamSearchState<T> {
   BeamSearchState(const IGenerationParameters& parameters,
                   AllocatorPtr allocator,
                   int has_decoder_masked_attention,
-                  bool use_position) {
+                  bool use_position,
+                  Stream* stream) {
     size_t batch_beam_size = SafeInt<size_t>(parameters.batch_size) * parameters.num_beams;
 
     size_t next_token_size = SafeInt<size_t>(batch_beam_size) * parameters.vocab_size;
-    this->next_token_logits = AllocateBuffer<T>(allocator, next_token_logits_buffer_, next_token_size);
-    this->next_token_scores = AllocateBuffer<float>(allocator, next_token_scores_buffer_, next_token_size);
-    this->next_tokens = AllocateBuffer<int32_t>(allocator, next_tokens_buffer_, SafeInt<size_t>(2) * batch_beam_size);
-    this->next_indices = AllocateBuffer<int32_t>(allocator, next_indices_buffer_, SafeInt<size_t>(2) * batch_beam_size);
-    this->next_scores = AllocateBuffer<float>(allocator, next_scores_buffer_, SafeInt<size_t>(2) * batch_beam_size);
+    this->next_token_logits = AllocateBuffer<T>(allocator, next_token_logits_buffer_, next_token_size, stream);
+    this->next_token_scores = AllocateBuffer<float>(allocator, next_token_scores_buffer_, next_token_size, stream);
+    this->next_tokens = AllocateBuffer<int32_t>(allocator, next_tokens_buffer_, SafeInt<size_t>(2) * batch_beam_size, stream);
+    this->next_indices = AllocateBuffer<int32_t>(allocator, next_indices_buffer_, SafeInt<size_t>(2) * batch_beam_size, stream);
+    this->next_scores = AllocateBuffer<float>(allocator, next_scores_buffer_, SafeInt<size_t>(2) * batch_beam_size, stream);
 
     constexpr size_t max_parts_of_vocab = 128;
     size_t topk_buffer_size = SafeInt<size_t>(batch_beam_size) * (max_parts_of_vocab + 1) * parameters.num_beams * 2 * 2;
-    this->topk_buffer = AllocateBuffer<float>(allocator, topk_temp_buffer_, topk_buffer_size);
+    this->topk_buffer = AllocateBuffer<float>(allocator, topk_temp_buffer_, topk_buffer_size, stream);
 
     if (allocator->Info().device.Type() == OrtDevice::GPU) {
       size_t sequences_elements = SafeInt<size_t>(2) * batch_beam_size * parameters.max_length;
-      this->sequences_device = AllocateBuffer<int32_t>(allocator, sequences_device_buffer_, sequences_elements);
+      this->sequences_device = AllocateBuffer<int32_t>(allocator, sequences_device_buffer_, sequences_elements, stream);
     }
 
     if (use_position) {
-      this->next_positions = AllocateBuffer<int32_t>(allocator, next_positions_buffer_, batch_beam_size);
+      this->next_positions = AllocateBuffer<int32_t>(allocator, next_positions_buffer_, batch_beam_size, stream);
     }
 
-    this->beam_scores = AllocateBuffer<float>(allocator, beam_scores_buffer_, batch_beam_size);
+    this->beam_scores = AllocateBuffer<float>(allocator, beam_scores_buffer_, batch_beam_size, stream);
 
     if (parameters.output_scores) {
       size_t elements = SafeInt<size_t>(parameters.max_length - parameters.sequence_length) * parameters.batch_size * parameters.num_beams * parameters.vocab_size;
-      this->scores = AllocateBuffer<float>(allocator, scores_buffer_, elements);
+      this->scores = AllocateBuffer<float>(allocator, scores_buffer_, elements, stream);
       this->remaining_scores = this->scores;
     }
 
@@ -68,35 +69,38 @@ struct BeamSearchState : IBeamSearchState<T> {
   }
 
  private:
-  BufferUniquePtr next_token_logits_buffer_;
-  BufferUniquePtr next_token_scores_buffer_;
-  BufferUniquePtr next_tokens_buffer_;
-  BufferUniquePtr next_indices_buffer_;
-  BufferUniquePtr next_scores_buffer_;
-  BufferUniquePtr next_positions_buffer_;
-  BufferUniquePtr beam_scores_buffer_;
-  BufferUniquePtr scores_buffer_;
-  BufferUniquePtr topk_temp_buffer_;
-  BufferUniquePtr sequences_device_buffer_;
+  IAllocatorUniquePtr<void> next_token_logits_buffer_;
+  IAllocatorUniquePtr<void> next_token_scores_buffer_;
+  IAllocatorUniquePtr<void> next_tokens_buffer_;
+  IAllocatorUniquePtr<void> next_indices_buffer_;
+  IAllocatorUniquePtr<void> next_scores_buffer_;
+  IAllocatorUniquePtr<void> next_positions_buffer_;
+  IAllocatorUniquePtr<void> beam_scores_buffer_;
+  IAllocatorUniquePtr<void> scores_buffer_;
+  IAllocatorUniquePtr<void> topk_temp_buffer_;
+  IAllocatorUniquePtr<void> sequences_device_buffer_;
 };
 
 struct BeamSearchCpuState : IBeamSearchCpuState {
   Sequences sequences;
 
-  BeamSearchCpuState(const IGenerationParameters& parameters, AllocatorPtr allocator, bool is_cuda)
+  BeamSearchCpuState(const IGenerationParameters& parameters, AllocatorPtr allocator, bool is_cuda, Stream* stream)
       : parameters_{parameters} {
-    sequence_lengths = AllocateBuffer<int32_t>(allocator, sequence_lengths_buffer_, batch_beam_size_);
+    sequence_lengths = AllocateBuffer<int32_t>(allocator, sequence_lengths_buffer_, batch_beam_size_, stream);
 
     size_t sequences_bytes = SafeInt<size_t>(2) * batch_beam_size_ * parameters.max_length;
-    sequences_space = AllocateBuffer<int32_t>(allocator, sequences_space_buffer_, sequences_bytes, true /* fill */);
+    sequences_space = AllocateBuffer<int32_t>(allocator, sequences_space_buffer_, sequences_bytes, stream, true /* fill */);
     sequences.Init(sequences_space, batch_beam_size_, parameters.sequence_length, parameters.max_length);
 
     if (is_cuda) {
       // buffers used by CUDA operator but not by CPU operator.
-      topk_scores = AllocateBuffer<float>(allocator, topk_scores_buffer_, 2 * static_cast<size_t>(batch_beam_size_));
-      topk_tokens = AllocateBuffer<int32_t>(allocator, topk_tokens_buffer_, 2 * static_cast<size_t>(batch_beam_size_));
-      topk_indices = AllocateBuffer<int32_t>(allocator, topk_indices_buffer_, 2 * static_cast<size_t>(batch_beam_size_));
-      final_beam_scores = AllocateBuffer<float>(allocator, final_beam_scores_buffer_, batch_beam_size_);
+      topk_scores = AllocateBuffer<float>(allocator, topk_scores_buffer_, 2 * static_cast<size_t>(batch_beam_size_), stream);
+      topk_tokens = AllocateBuffer<int32_t>(allocator, topk_tokens_buffer_, 2 * static_cast<size_t>(batch_beam_size_), stream);
+      topk_indices = AllocateBuffer<int32_t>(allocator, topk_indices_buffer_, 2 * static_cast<size_t>(batch_beam_size_), stream);
+      final_beam_scores = AllocateBuffer<float>(allocator, final_beam_scores_buffer_, batch_beam_size_, stream);
+
+      size_t next_token_size = SafeInt<size_t>(batch_beam_size_) * parameters.vocab_size;
+      next_token_scores = AllocateBuffer<float>(allocator, next_token_scores_buffer_, next_token_size, stream);
     }
   }
 
@@ -124,12 +128,13 @@ struct BeamSearchCpuState : IBeamSearchCpuState {
   const IGenerationParameters& parameters_;
   const int batch_beam_size_{parameters_.batch_size * parameters_.num_beams};
 
-  BufferUniquePtr final_beam_scores_buffer_;
-  BufferUniquePtr sequence_lengths_buffer_;
-  BufferUniquePtr topk_scores_buffer_;
-  BufferUniquePtr topk_tokens_buffer_;
-  BufferUniquePtr topk_indices_buffer_;
-  BufferUniquePtr sequences_space_buffer_;
+  IAllocatorUniquePtr<void> final_beam_scores_buffer_;
+  IAllocatorUniquePtr<void> sequence_lengths_buffer_;
+  IAllocatorUniquePtr<void> topk_scores_buffer_;
+  IAllocatorUniquePtr<void> topk_tokens_buffer_;
+  IAllocatorUniquePtr<void> topk_indices_buffer_;
+  IAllocatorUniquePtr<void> sequences_space_buffer_;
+  IAllocatorUniquePtr<void> next_token_scores_buffer_;
 };
 
 // Base class of beam search implementation that is common for GPT-2, T5, and Whisper.
diff --git a/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_gpt.h b/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_gpt.h
index 205d94fae9fab..56d950ca2f41e 100644
--- a/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_gpt.h
+++ b/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_gpt.h
@@ -215,7 +215,8 @@ Status BeamSearchGpt<T>::Execute(const FeedsFetchesManager* init_run_feeds_fetch
 
   BeamSearchCpuState cpu_state{*parameters,
                                this->cpu_allocator_,
-                               this->IsCuda()};
+                               this->IsCuda(),
+                               this->ort_stream_};
 
   // buffer in GPU for input_ids, position_ids and attention_mask
   IAllocatorUniquePtr<char> buffer;
@@ -240,7 +241,8 @@ Status BeamSearchGpt<T>::Execute(const FeedsFetchesManager* init_run_feeds_fetch
   BeamSearchState<T> beam_state{*parameters,
                                 this->temp_space_allocator_,
                                 gpt_subgraph_.has_decoder_masked_attention_,
-                                true /* use_position */};
+                                true /* use_position */,
+                                this->ort_stream_};
 
   init_beam_state_func_(&beam_state,
                         cpu_state.sequence_lengths,
diff --git a/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_t5.h b/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_t5.h
index 14a0db57c45de..94547887d3a90 100644
--- a/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_t5.h
+++ b/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_t5.h
@@ -144,7 +144,8 @@ Status BeamSearchT5<T>::Execute(const FeedsFetchesManager& encoder_feeds_fetches
 
   BeamSearchCpuState cpu_state{*parameters,
                                this->cpu_allocator_,
-                               this->IsCuda()};
+                               this->IsCuda(),
+                               this->ort_stream_};
 
   IAllocatorUniquePtr<char> buffer;
 
@@ -195,7 +196,8 @@ Status BeamSearchT5<T>::Execute(const FeedsFetchesManager& encoder_feeds_fetches
   BeamSearchState<T> beam_state{*parameters,
                                 this->temp_space_allocator_,
                                 decoder_subgraph_.has_decoder_masked_attention_,
-                                false /* use_position */};
+                                false /* use_position */,
+                                this->ort_stream_};
 
   init_beam_state_func_(&beam_state,
                         cpu_state.sequence_lengths,
diff --git a/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_whisper.h b/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_whisper.h
index 198dec011c56f..91b93a125ad7a 100644
--- a/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_whisper.h
+++ b/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_whisper.h
@@ -36,7 +36,9 @@ class BeamSearchWhisper : public BeamSearchBase<T> {
                     const GenerationDeviceHelper::UpdateDecoderFeedsFunc<T>& update_decoder_feeds_func,
                     const GenerationDeviceHelper::ExpandBufferFunc<float>& expand_buffer_float_func,
                     const GenerationDeviceHelper::ExpandBufferFunc<MLFloat16>& expand_buffer_float16_func,
-                    const GenerationDeviceHelper::CreateBeamScorer& create_beam_scorer_func)
+                    const GenerationDeviceHelper::CreateBeamScorer& create_beam_scorer_func,
+                    const GenerationDeviceHelper::UpdateDecoderCrossQKFunc& update_decoder_cross_qk_func,
+                    const GenerationDeviceHelper::FinalizeDecoderCrossQKFunc& finalize_decoder_cross_qk_func)
       : BeamSearchBase<T>(context, decoder_session_state, thread_pool,
                           ort_stream, cuda_dumper, params,
                           topk_func, process_logits_func, device_copy_func, device_copy_int32_func),
@@ -49,7 +51,11 @@ class BeamSearchWhisper : public BeamSearchBase<T> {
         update_decoder_feeds_func_(update_decoder_feeds_func),
         expand_buffer_float_func_(expand_buffer_float_func),
         expand_buffer_float16_func_(expand_buffer_float16_func),
-        create_beam_scorer_func_(create_beam_scorer_func) {}
+        create_beam_scorer_func_(create_beam_scorer_func),
+        update_decoder_cross_qk_func_(update_decoder_cross_qk_func),
+        finalize_decoder_cross_qk_func_(finalize_decoder_cross_qk_func),
+        cuda_device_prop_(nullptr),
+        cuda_device_arch_(0) {}
 
 #ifdef USE_CUDA
   Status InitializeCuda(
@@ -95,6 +101,8 @@ class BeamSearchWhisper : public BeamSearchBase<T> {
   GenerationDeviceHelper::ExpandBufferFunc<MLFloat16> expand_buffer_float16_func_;
   GenerationDeviceHelper::CreateBeamScorer create_beam_scorer_func_;
 
+  const GenerationDeviceHelper::UpdateDecoderCrossQKFunc update_decoder_cross_qk_func_;
+  const GenerationDeviceHelper::FinalizeDecoderCrossQKFunc finalize_decoder_cross_qk_func_;
   const void* cuda_device_prop_ = nullptr;
   int cuda_device_arch_ = 0;
 };
@@ -122,6 +130,17 @@ Status BeamSearchWhisper<T>::Execute(const FeedsFetchesManager& encoder_feeds_fe
   TensorShape scores_shape(&scores_dims[0], sizeof(scores_dims) / sizeof(scores_dims[0]));
   Tensor* output_scores = this->context_.Output(2, scores_shape);
 
+  if (parameters->no_speech_probs_output_id > 0) {
+    TensorShape no_speech_probs_shape{parameters->batch_size};
+    Tensor* no_speech_probs = this->context_.Output(parameters->no_speech_probs_output_id, no_speech_probs_shape);
+    if (no_speech_probs && no_speech_probs->MutableData<T>()) {
+      ORT_ENFORCE(parameters->no_speech_token >= 0 && parameters->no_speech_token < parameters->vocab_size,
+                  "no_speech_token id out of range, it is ", parameters->no_speech_token,
+                  ", vocab_size is ", parameters->vocab_size);
+      this->parameters_->no_speech_probs = (void*)no_speech_probs->MutableData<T>();
+    }
+  }
+
   // Update the flag to indicate whether scores exists in output
   this->parameters_->output_scores = (output_scores != nullptr);
 
@@ -136,7 +155,8 @@ Status BeamSearchWhisper<T>::Execute(const FeedsFetchesManager& encoder_feeds_fe
 
   BeamSearchCpuState cpu_state{*parameters,
                                this->cpu_allocator_,
-                               this->IsCuda()};
+                               this->IsCuda(),
+                               this->ort_stream_};
 
   IAllocatorUniquePtr<char> buffer;
 
@@ -188,7 +208,8 @@ Status BeamSearchWhisper<T>::Execute(const FeedsFetchesManager& encoder_feeds_fe
   BeamSearchState<T> beam_state{*parameters,
                                 this->temp_space_allocator_,
                                 decoder_subgraph_.has_decoder_masked_attention_,
-                                false /* use_position */};
+                                false /* use_position */,
+                                this->ort_stream_};
 
   init_beam_state_func_(&beam_state,
                         cpu_state.sequence_lengths,
@@ -222,6 +243,16 @@ Status BeamSearchWhisper<T>::Execute(const FeedsFetchesManager& encoder_feeds_fe
   std::vector<OrtValue> decoder_feeds;
   int current_length = parameters->sequence_length;
 
+  // for decoder subgraph output cross qk
+  int64_t frames_of_k = 0LL;
+  Tensor* cross_qk_output = nullptr;  // output tensor
+  int64_t cross_qk_layer_head_pair_count = 0LL;
+  OrtValue cross_qk_buffer_value;
+  float* cross_qk_buffer_data = nullptr;
+  std::vector<int32_t> cross_qk_all_layer_heads;
+  const int32_t* cross_qk_layer_head_pairs = nullptr;
+  IAllocatorUniquePtr<float*> qk_layer_pointers;  // if needed, device array hold the cross qk data pointers, shape of [num_layers]
+
   std::vector<OrtValue> decoder_fetches;
 
   if (current_length + 1 < parameters->max_length) {
@@ -265,6 +296,41 @@ Status BeamSearchWhisper<T>::Execute(const FeedsFetchesManager& encoder_feeds_fe
       }
     }
 
+    if (decoder_subgraph_.output_cross_qk_) {
+      ORT_ENFORCE(decoder_subgraph_.has_decoder_masked_attention_, "decoder subgraph: output_cross_qk could only work with has_decoder_masked_attention");
+      ORT_ENFORCE(decoder_subgraph_.past_present_share_buffer_, "decoder subgraph: output_cross_qk could only work with past_present_share_buffer");
+
+      cross_qk_layer_head_pair_count = parameters->num_layers * parameters->num_heads;
+      const auto* input_tensor_cross_qk_layer_head = this->context_.template Input<Tensor>(parameters->cross_qk_layer_head_input_id);
+      ORT_ENFORCE(input_tensor_cross_qk_layer_head != nullptr, "Must specify input cross_qk_layer_head");
+      cross_qk_layer_head_pair_count = input_tensor_cross_qk_layer_head->Shape()[0];
+      cross_qk_layer_head_pairs = input_tensor_cross_qk_layer_head->template Data<int32_t>();  // it is on GPU
+
+      size_t decoder_input_first_cross_key = static_cast<size_t>(decoder_subgraph_.GetFirstPastInputIndex()) + (2 * decoder_subgraph_.num_layers);
+      auto first_cross_attention_key = decoder_feeds[decoder_input_first_cross_key].GetMutable<Tensor>();
+      frames_of_k = first_cross_attention_key->Shape()[2];
+
+      TensorShape layer_cross_qk_shape{
+          static_cast<int64_t>(parameters->BatchBeamSize()),
+          static_cast<int64_t>(parameters->num_heads),
+          1LL,
+          static_cast<int64_t>(frames_of_k)};
+      for (int layer = 0; layer < decoder_subgraph_.num_layers; layer++) {
+        OrtValue cross_qk_value;
+        Tensor::InitOrtValue(DataTypeImpl::GetType<float>(), layer_cross_qk_shape, this->temp_space_allocator_, cross_qk_value);
+        decoder_fetches.emplace_back(cross_qk_value);
+      }
+
+      TensorShape cross_qk_shape{
+          static_cast<int64_t>(parameters->batch_size),
+          static_cast<int64_t>(parameters->num_beams),
+          cross_qk_layer_head_pair_count,
+          static_cast<int64_t>(parameters->max_length),
+          frames_of_k};
+      Tensor::InitOrtValue(DataTypeImpl::GetType<float>(), cross_qk_shape, this->temp_space_allocator_, cross_qk_buffer_value);
+      cross_qk_buffer_data = cross_qk_buffer_value.GetMutable<Tensor>()->MutableData<float>();
+    }
+
     if (decoder_subgraph_.has_decoder_masked_attention_) {
       size_t offset = static_cast<size_t>(decoder_subgraph_.GetFirstPastInputIndex());
       // Need to check cross attention's past key tensor size, suppose all layers cross attention key size are same
@@ -316,6 +382,21 @@ Status BeamSearchWhisper<T>::Execute(const FeedsFetchesManager& encoder_feeds_fe
 
     ORT_RETURN_IF_ERROR(status);
 
+    if (decoder_subgraph_.output_cross_qk_) {
+      int decoder_output_first_cross_qk = decoder_subgraph_.GetFirstPresentOutputIndex() + (2 * decoder_subgraph_.num_layers);
+      ORT_RETURN_IF_ERROR(this->update_decoder_cross_qk_func_(
+          iteration_counter,
+          this->ort_stream_,
+          &decoder_fetches[decoder_output_first_cross_qk],
+          qk_layer_pointers,
+          parameters->num_layers,
+          static_cast<int>(cross_qk_layer_head_pair_count),
+          cross_qk_layer_head_pairs,
+          cross_qk_buffer_data,
+          parameters->max_length,
+          this->temp_space_allocator_));
+    }
+
 #ifdef DEBUG_GENERATION
     for (int i = 0; i <= decoder_subgraph_.GetFirstPresentOutputIndex(); i++) {
       dumper->Print("decoder_fetches", i, true);
@@ -383,6 +464,35 @@ Status BeamSearchWhisper<T>::Execute(const FeedsFetchesManager& encoder_feeds_fe
     }
   }
 
+  if (decoder_subgraph_.output_cross_qk_) {
+    TensorShape cross_qk_shape{
+        static_cast<int64_t>(parameters->batch_size),
+        static_cast<int64_t>(parameters->num_return_sequences),
+        cross_qk_layer_head_pair_count,
+        static_cast<int64_t>(iteration_counter - 1),
+        frames_of_k};
+    cross_qk_output = this->context_.Output(parameters->cross_qk_output_id, cross_qk_shape);
+
+    size_t cache_indir_input_offset = static_cast<size_t>(decoder_subgraph_.GetFirstPastInputIndex()) + 4 * static_cast<size_t>(decoder_subgraph_.num_layers) + 2;
+    const int* cache_indir_data = decoder_feeds[cache_indir_input_offset].GetMutable<Tensor>()->Data<int32_t>();
+    auto beam_indices = this->beam_scorer_->GetNextIndicesGPU();  // currently only support on GPU
+    ORT_RETURN_IF_ERROR(this->finalize_decoder_cross_qk_func_(
+        this->ort_stream_,
+        iteration_counter,
+        parameters->sequence_length,
+        parameters->batch_size,
+        parameters->num_beams,
+        parameters->max_length,
+        static_cast<int>(cross_qk_layer_head_pair_count),
+        cross_qk_layer_head_pairs,
+        static_cast<int>(frames_of_k),
+        cross_qk_buffer_data,
+        cross_qk_output->MutableData<float>(),
+        parameters->num_return_sequences,
+        cache_indir_data,
+        beam_indices));
+  }
+
   gsl::span<const float> final_beam_scores = beam_state.beam_scores;
   this->beam_scorer_->Finalize(cpu_state.sequences,
                                final_beam_scores,
diff --git a/onnxruntime/contrib_ops/cpu/transformers/beam_search_parameters.cc b/onnxruntime/contrib_ops/cpu/transformers/beam_search_parameters.cc
index 76011a5c89b66..3962486d5b5eb 100644
--- a/onnxruntime/contrib_ops/cpu/transformers/beam_search_parameters.cc
+++ b/onnxruntime/contrib_ops/cpu/transformers/beam_search_parameters.cc
@@ -47,6 +47,23 @@ void BeamSearchParameters::ParseFromInputs(OpKernelContext* context) {
   }
   batch_size = static_cast<int>(dims[0]);
 
+  extra_decoding_ids = gsl::span<int32_t>();
+  if (this->model_type == IGenerationParameters::kModelTypeWhisper && extra_decoding_ids_input_id > 0) {
+    const Tensor* extra_decoder_tensor = context->Input<Tensor>(extra_decoding_ids_input_id);
+    if (extra_decoder_tensor != nullptr) {
+      const auto& extra_decoder_tensor_dims = extra_decoder_tensor->Shape().GetDims();
+      ORT_ENFORCE(extra_decoder_tensor_dims.size() == 2,
+                  "extra_decoder_tensor shall have 2 dimensions. Got ",
+                  extra_decoder_tensor_dims.size());
+      ORT_ENFORCE(extra_decoder_tensor_dims[0] == batch_size,
+                  "extra_decoder_tensor first dim not same as batch_size. Got ",
+                  extra_decoder_tensor_dims[0], ", expecting ", batch_size);
+      if (extra_decoder_tensor->Shape().Size() > 0) {
+        extra_decoding_ids = gsl::span<const int32_t>(extra_decoder_tensor->Data<int32_t>(), (size_t)extra_decoder_tensor->Shape().Size());
+      }
+    }
+  }
+
   if (this->model_type == IGenerationParameters::kModelTypeGpt) {
     sequence_length = static_cast<int>(dims[1]);
   } else if (this->model_type == IGenerationParameters::kModelTypeWhisper) {
@@ -119,6 +136,18 @@ void BeamSearchParameters::SetSubgraphParameters(int vocabulary_size, int heads,
   num_layers = layers;
 }
 
+void WhisperBeamSearchParameters::ParseFromAttributes(const OpKernelInfo& info) {
+  BeamSearchParameters::ParseFromAttributes(info);
+  model_type = static_cast<int>(info.GetAttrOrDefault<int64_t>("model_type", IGenerationParameters::kModelTypeWhisper));
+  ORT_ENFORCE(model_type == IGenerationParameters::kModelTypeWhisper);
+
+  no_speech_token = static_cast<int>(info.GetAttrOrDefault<int64_t>("no_speech_token", -1LL));
+  cross_qk_layer_head_input_id = 12;
+  extra_decoding_ids_input_id = 13;
+  cross_qk_output_id = 3;
+  no_speech_probs_output_id = 4;
+}
+
 }  // namespace transformers
 }  // namespace contrib
 }  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cpu/transformers/beam_search_parameters.h b/onnxruntime/contrib_ops/cpu/transformers/beam_search_parameters.h
index 0cb2b39976cc3..87bc6cdbfe72c 100644
--- a/onnxruntime/contrib_ops/cpu/transformers/beam_search_parameters.h
+++ b/onnxruntime/contrib_ops/cpu/transformers/beam_search_parameters.h
@@ -11,17 +11,23 @@ namespace contrib {
 namespace transformers {
 
 struct BeamSearchParameters : public IGenerationParameters {
+  virtual ~BeamSearchParameters() {}
+
   Status Validate() const;
 
   int BatchBeamSize() const { return batch_size * num_beams; }
 
-  void ParseFromAttributes(const OpKernelInfo& info);
+  virtual void ParseFromAttributes(const OpKernelInfo& info);
 
   void ParseFromInputs(OpKernelContext* context);
 
   void SetSubgraphParameters(int vocab_size, int num_heads, int head_size, int num_layers);
 };
 
+struct WhisperBeamSearchParameters : public BeamSearchParameters {
+  void ParseFromAttributes(const OpKernelInfo& info) override;
+};
+
 }  // namespace transformers
 }  // namespace contrib
 }  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cpu/transformers/generate_impl_base.h b/onnxruntime/contrib_ops/cpu/transformers/generate_impl_base.h
index e889281abb023..680cb23fd887a 100644
--- a/onnxruntime/contrib_ops/cpu/transformers/generate_impl_base.h
+++ b/onnxruntime/contrib_ops/cpu/transformers/generate_impl_base.h
@@ -33,24 +33,43 @@ gsl::span<T> AllocateBuffer(AllocatorPtr allocator,
   return span;
 }
 
+template <typename T>
+gsl::span<T> AllocateBuffer(AllocatorPtr allocator,
+                            IAllocatorUniquePtr<void>& buffer,
+                            size_t elements,
+                            Stream* stream,
+                            bool fill = false,
+                            T fill_value = T{}) {
+  size_t bytes = SafeInt<size_t>(sizeof(T)) * elements;
+  buffer = IAllocator::MakeUniquePtr<void>(allocator, bytes, false, stream);
+  T* first = reinterpret_cast<T*>(buffer.get());
+  auto span = gsl::make_span(first, elements);
+
+  if (fill) {
+    std::fill_n(first, elements, fill_value);
+  }
+
+  return span;
+}
+
 template <typename ElementType>
 inline void AllocateTempBufferForGetGreedySearchTopOne(
     int32_t batch_size,
     AllocatorPtr allocator,
-    BufferUniquePtr& buffer,
+    IAllocatorUniquePtr<void>& buffer,
     gsl::span<ElementType>& stage_1_scores,  // shape (batch_size, parts_of_vocab)
     gsl::span<int32_t>& stage_1_tokens,      // shape (batch_size, parts_of_vocab)
     gsl::span<ElementType>& output_scores,   // shape (batch_size)
-    gsl::span<int32_t>& output_tokens        // shape (batch_size)
-) {
+    gsl::span<int32_t>& output_tokens,       // shape (batch_size)
+    Stream* stream) {
   constexpr size_t kMaxPartsPerVocab = 128;
   const size_t stage_1_element_size = kMaxPartsPerVocab * batch_size;
   const size_t output_element_size = batch_size;
 
   // Note: use float to allocate buffer for temporary value buffer to avoid unalignment
-  void* topk_data = allocator->Alloc((stage_1_element_size + output_element_size) * (sizeof(float) + sizeof(int32_t)));
-  BufferUniquePtr temp_buffer(topk_data, BufferDeleter(allocator));
-  buffer = std::move(temp_buffer);
+  size_t bytes = (stage_1_element_size + output_element_size) * (sizeof(float) + sizeof(int32_t));
+  buffer = IAllocator::MakeUniquePtr<void>(allocator, bytes, false, stream);
+  void* topk_data = buffer.get();
 
   ElementType* stage_1_scores_data = reinterpret_cast<ElementType*>(topk_data);
   stage_1_scores = gsl::make_span<ElementType>(stage_1_scores_data, stage_1_element_size);
diff --git a/onnxruntime/contrib_ops/cpu/transformers/generation_device_helper.cc b/onnxruntime/contrib_ops/cpu/transformers/generation_device_helper.cc
index 88348ad88dc27..927d3a58e5a6f 100644
--- a/onnxruntime/contrib_ops/cpu/transformers/generation_device_helper.cc
+++ b/onnxruntime/contrib_ops/cpu/transformers/generation_device_helper.cc
@@ -1084,6 +1084,38 @@ template Status CreateWhisperEncoderInputs<MLFloat16>(
     OrtValue& encoder_input_features,
     OrtValue& decoder_input_ids);
 
+Status UpdateDecoderCrossQK(
+    [[maybe_unused]] int iteration_number,
+    [[maybe_unused]] Stream* tream,
+    [[maybe_unused]] OrtValue* cross_qks,
+    [[maybe_unused]] IAllocatorUniquePtr<float*>& qk_layer_pointers,
+    [[maybe_unused]] int num_layers,
+    [[maybe_unused]] int cross_qk_layer_head_pair_count,
+    [[maybe_unused]] const int* cross_qk_layer_head_pairs,
+    [[maybe_unused]] float* cross_qk_buffer_data,
+    [[maybe_unused]] int max_length,
+    [[maybe_unused]] AllocatorPtr allocator) {
+  return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "CPU beam search current not support output cross QK.");
+}
+
+Status FinalizeDecoderCrossQK(
+    [[maybe_unused]] Stream* stream,
+    [[maybe_unused]] int iteration_number,
+    [[maybe_unused]] int context_decoding_len,
+    [[maybe_unused]] int batch_size,
+    [[maybe_unused]] int num_beams,
+    [[maybe_unused]] int max_length,
+    [[maybe_unused]] int cross_qk_layer_head_pair_count,
+    [[maybe_unused]] const int* cross_qk_layer_head_pairs,
+    [[maybe_unused]] int frames_of_k,
+    [[maybe_unused]] const float* cross_qk_buffer_data,
+    [[maybe_unused]] float* cross_qk_output,
+    [[maybe_unused]] int num_return_sequences,
+    [[maybe_unused]] const int* cache_indir_data,
+    [[maybe_unused]] gsl::span<const int32_t> beam_indices) {
+  return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "CPU beam search current not support output cross QK.");
+}
+
 }  // namespace GenerationCpuDeviceHelper
 }  // namespace contrib
 }  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cpu/transformers/generation_device_helper.h b/onnxruntime/contrib_ops/cpu/transformers/generation_device_helper.h
index ba1b0b662f1a5..6dfdc6b027671 100644
--- a/onnxruntime/contrib_ops/cpu/transformers/generation_device_helper.h
+++ b/onnxruntime/contrib_ops/cpu/transformers/generation_device_helper.h
@@ -204,6 +204,35 @@ using ExpandBufferFunc = std::function<Status(
     OrtValue& expanded,
     bool only_copy_shape,
     int max_sequence_length)>;
+
+using UpdateDecoderCrossQKFunc = std::function<Status(
+    int iteration_number,
+    Stream* stream,
+    OrtValue* cross_qks,
+    IAllocatorUniquePtr<float*>& qk_layer_pointers,
+    int num_layers,
+    int cross_qk_layer_head_pair_count,
+    const int* cross_qk_layer_head_pairs,
+    float* cross_qk_buffer_data,
+    int max_length,
+    AllocatorPtr allocator)>;
+
+using FinalizeDecoderCrossQKFunc = std::function<Status(
+    Stream* stream,
+    int iteration_number,
+    int context_decoding_len,
+    int batch_size,
+    int num_beams,
+    int max_length,
+    int cross_qk_layer_head_pair_count,
+    const int* cross_qk_layer_head_pairs,
+    int frames_of_k,
+    const float* cross_qk_buffer_data,
+    float* cross_qk_output,
+    int num_return_sequences,
+    const int* cache_indir_data,
+    gsl::span<const int32_t> beam_indices)>;
+
 }  // namespace GenerationDeviceHelper
 
 // These are CPU specific device helper implementations
@@ -368,6 +397,34 @@ Status ExpandBuffer(
     bool only_copy_shape,
     int max_sequence_length);
 
+Status UpdateDecoderCrossQK(
+    int iteration_number,
+    Stream* stream,
+    OrtValue* cross_qks,
+    IAllocatorUniquePtr<float*>& qk_layer_pointers,
+    int num_layers,
+    int cross_qk_layer_head_pair_count,
+    const int* cross_qk_layer_head_pairs,
+    float* cross_qk_buffer_data,
+    int max_length,
+    AllocatorPtr allocator);
+
+Status FinalizeDecoderCrossQK(
+    Stream* stream,
+    int iteration_number,
+    int context_decoding_len,
+    int batch_size,
+    int num_beams,
+    int max_length,
+    int cross_qk_layer_head_pair_count,
+    const int* cross_qk_layer_head_pairs,
+    int frames_of_k,
+    const float* cross_qk_buffer_data,
+    float* cross_qk_output,
+    int num_return_sequences,
+    const int* cache_indir_data,
+    gsl::span<const int32_t> beam_indices);
+
 }  // namespace GenerationCpuDeviceHelper
 }  // namespace contrib
 }  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cpu/transformers/generation_shared.h b/onnxruntime/contrib_ops/cpu/transformers/generation_shared.h
index 719dd302d274d..f6faf2e325f8f 100644
--- a/onnxruntime/contrib_ops/cpu/transformers/generation_shared.h
+++ b/onnxruntime/contrib_ops/cpu/transformers/generation_shared.h
@@ -53,6 +53,7 @@ struct IBeamSearchCpuState {
   gsl::span<int32_t> topk_tokens;      // shape (batch_size, 2*num_beams), tokens of topk candidates.
   gsl::span<int32_t> topk_indices;     // shape (batch_size, 2*num_beams), beam indices of topk candidates.
   gsl::span<float> final_beam_scores;  // shape (batch_size, num_beams)
+  gsl::span<float> next_token_scores;  // shape (batch_size, num_beams * vocab_size)
 };
 
 template <typename T>
@@ -175,6 +176,17 @@ struct IGenerationParameters {
   int seed = 0;
   int min_tokens_to_keep = 1;
   bool custom_sampling = false;
+
+  // Parameters for whisper model
+  bool decoder_output_cross_qk = false;
+  gsl::span<const int32_t> extra_decoding_ids;
+  int32_t no_speech_token = -1;
+  void* no_speech_probs = nullptr;
+
+  int cross_qk_layer_head_input_id = -1;
+  int extra_decoding_ids_input_id = -1;
+  int cross_qk_output_id = -1;
+  int no_speech_probs_output_id = -1;
 };
 
 }  // namespace transformers
diff --git a/onnxruntime/contrib_ops/cpu/transformers/greedy_search_impl_base.h b/onnxruntime/contrib_ops/cpu/transformers/greedy_search_impl_base.h
index be974ed2159d9..9f372e5b3a673 100644
--- a/onnxruntime/contrib_ops/cpu/transformers/greedy_search_impl_base.h
+++ b/onnxruntime/contrib_ops/cpu/transformers/greedy_search_impl_base.h
@@ -20,26 +20,27 @@ struct SamplingState : public ISamplingState<T> {
             int vocab_size,
             int max_iter,
             int seed,
-            bool is_cuda) {
+            bool is_cuda,
+            Stream* stream) {
     int total_count = batch_size * vocab_size;
 
-    this->h_softmaxed_score = AllocateBuffer<float>(cpu_allocator, h_softmaxed_score_buffer_, SafeInt<size_t>(total_count));
+    this->h_softmaxed_score = AllocateBuffer<float>(cpu_allocator, h_softmaxed_score_buffer_, SafeInt<size_t>(total_count), stream);
 
     this->generator = std::default_random_engine{gsl::narrow_cast<uint32_t>(seed)};
 
     if (is_cuda) {
-      this->d_index_in = AllocateBuffer<int>(allocator, d_index_in_buffer_, SafeInt<size_t>(total_count));
-      this->d_index_out = AllocateBuffer<int>(allocator, d_index_out_buffer_, SafeInt<size_t>(total_count));
-      this->d_offset = AllocateBuffer<int>(allocator, d_offset_buffer_, SafeInt<size_t>(batch_size + 1));
-      this->d_sorted_score = AllocateBuffer<T>(allocator, d_sorted_score_buffer_, SafeInt<size_t>(total_count));
-      this->d_sorted_softmaxed_score = AllocateBuffer<float>(allocator, d_sorted_softmaxed_score_buffer_, SafeInt<size_t>(total_count));
-      this->d_softmaxed_score = AllocateBuffer<float>(allocator, d_softmaxed_score_buffer_, SafeInt<size_t>(total_count));
-      this->d_sampled = AllocateBuffer<float>(allocator, d_sampled_buffer_, SafeInt<size_t>(batch_size));
-      this->h_sampled_all = AllocateBuffer<float>(cpu_allocator, h_sampled_all_buffer_, SafeInt<size_t>(batch_size * max_iter));
-      this->d_indices = AllocateBuffer<int32_t>(allocator, d_indices_buffer_, SafeInt<size_t>(batch_size));
+      this->d_index_in = AllocateBuffer<int>(allocator, d_index_in_buffer_, SafeInt<size_t>(total_count), stream);
+      this->d_index_out = AllocateBuffer<int>(allocator, d_index_out_buffer_, SafeInt<size_t>(total_count), stream);
+      this->d_offset = AllocateBuffer<int>(allocator, d_offset_buffer_, SafeInt<size_t>(batch_size + 1), stream);
+      this->d_sorted_score = AllocateBuffer<T>(allocator, d_sorted_score_buffer_, SafeInt<size_t>(total_count), stream);
+      this->d_sorted_softmaxed_score = AllocateBuffer<float>(allocator, d_sorted_softmaxed_score_buffer_, SafeInt<size_t>(total_count), stream);
+      this->d_softmaxed_score = AllocateBuffer<float>(allocator, d_softmaxed_score_buffer_, SafeInt<size_t>(total_count), stream);
+      this->d_sampled = AllocateBuffer<float>(allocator, d_sampled_buffer_, SafeInt<size_t>(batch_size), stream);
+      this->h_sampled_all = AllocateBuffer<float>(cpu_allocator, h_sampled_all_buffer_, SafeInt<size_t>(batch_size * max_iter), stream);
+      this->d_indices = AllocateBuffer<int32_t>(allocator, d_indices_buffer_, SafeInt<size_t>(batch_size), stream);
       this->temp_storage_bytes = 0;
       // TODO: Do not allocate this buffer if there's no presence_mask
-      this->d_presence_mask = AllocateBuffer<int>(allocator, d_presence_mask_buffer_, SafeInt<size_t>(total_count));
+      this->d_presence_mask = AllocateBuffer<int>(allocator, d_presence_mask_buffer_, SafeInt<size_t>(total_count), stream);
 
       std::uniform_real_distribution<float> distribution(0.0, 1.0);
       static_cast<void>(distribution(this->generator));
@@ -48,25 +49,25 @@ struct SamplingState : public ISamplingState<T> {
       }
     } else {
       // TODO: Some buffer can be reused for CPU
-      this->sorted_scores = AllocateBuffer<T>(cpu_allocator, sorted_scores_buffer_, SafeInt<size_t>(total_count));
-      this->cumulative_probs = AllocateBuffer<T>(cpu_allocator, cumulative_probs_buffer_, SafeInt<size_t>(total_count));
+      this->sorted_scores = AllocateBuffer<T>(cpu_allocator, sorted_scores_buffer_, SafeInt<size_t>(total_count), stream);
+      this->cumulative_probs = AllocateBuffer<T>(cpu_allocator, cumulative_probs_buffer_, SafeInt<size_t>(total_count), stream);
     }
   }
 
  private:
-  BufferUniquePtr d_index_in_buffer_;
-  BufferUniquePtr d_index_out_buffer_;
-  BufferUniquePtr d_offset_buffer_;
-  BufferUniquePtr d_sorted_score_buffer_;
-  BufferUniquePtr d_sorted_softmaxed_score_buffer_;
-  BufferUniquePtr d_softmaxed_score_buffer_;
-  BufferUniquePtr h_softmaxed_score_buffer_;
-  BufferUniquePtr d_sampled_buffer_;
-  BufferUniquePtr h_sampled_all_buffer_;
-  BufferUniquePtr d_indices_buffer_;
-  BufferUniquePtr d_presence_mask_buffer_;
-  BufferUniquePtr sorted_scores_buffer_;
-  BufferUniquePtr cumulative_probs_buffer_;
+  IAllocatorUniquePtr<void> d_index_in_buffer_;
+  IAllocatorUniquePtr<void> d_index_out_buffer_;
+  IAllocatorUniquePtr<void> d_offset_buffer_;
+  IAllocatorUniquePtr<void> d_sorted_score_buffer_;
+  IAllocatorUniquePtr<void> d_sorted_softmaxed_score_buffer_;
+  IAllocatorUniquePtr<void> d_softmaxed_score_buffer_;
+  IAllocatorUniquePtr<void> h_softmaxed_score_buffer_;
+  IAllocatorUniquePtr<void> d_sampled_buffer_;
+  IAllocatorUniquePtr<void> h_sampled_all_buffer_;
+  IAllocatorUniquePtr<void> d_indices_buffer_;
+  IAllocatorUniquePtr<void> d_presence_mask_buffer_;
+  IAllocatorUniquePtr<void> sorted_scores_buffer_;
+  IAllocatorUniquePtr<void> cumulative_probs_buffer_;
 };
 
 template <typename T>
@@ -82,24 +83,25 @@ struct GreedySearchState : public IGreedySearchState<T> {
             int num_heads,
             int head_size,
             bool has_decoder_masked_self_attention,
-            bool is_cuda) {
+            bool is_cuda,
+            Stream* stream) {
     // below buffers are on cpu
     this->sequences_space = AllocateBuffer<int32_t>(cpu_allocator,
                                                     sequences_space_buffer_,
-                                                    SafeInt<size_t>(2) * batch_size * max_length);
+                                                    SafeInt<size_t>(2) * batch_size * max_length, stream);
     memset(this->sequences_space.data(), 0, this->sequences_space.size_bytes());
     this->sequences.Init(this->sequences_space, static_cast<int>(batch_size), sequence_length, max_length);
 
-    this->sequence_lengths = AllocateBuffer<int32_t>(cpu_allocator, sequence_lengths_buffer_, batch_size);
-    this->eos_meet = AllocateBuffer<bool>(cpu_allocator, eos_meet_buffer_, batch_size);
+    this->sequence_lengths = AllocateBuffer<int32_t>(cpu_allocator, sequence_lengths_buffer_, batch_size, stream);
+    this->eos_meet = AllocateBuffer<bool>(cpu_allocator, eos_meet_buffer_, batch_size, stream);
     memset(this->eos_meet.data(), 0, this->eos_meet.size_bytes());
 
-    this->next_tokens = AllocateBuffer<int32_t>(cpu_allocator, next_tokens_buffer_, SafeInt<size_t>(batch_size));
+    this->next_tokens = AllocateBuffer<int32_t>(cpu_allocator, next_tokens_buffer_, SafeInt<size_t>(batch_size), stream);
 
     // below buffers are on cpu or cuda
     size_t next_token_size = SafeInt<size_t>(batch_size) * vocab_size;
-    this->next_token_scores = AllocateBuffer<T>(allocator, next_token_scores_buffer_, next_token_size);
-    this->next_positions = AllocateBuffer<int32_t>(allocator, next_positions_buffer_, batch_size);
+    this->next_token_scores = AllocateBuffer<T>(allocator, next_token_scores_buffer_, next_token_size, stream);
+    this->next_positions = AllocateBuffer<int32_t>(allocator, next_positions_buffer_, batch_size, stream);
 
     if (is_cuda) {
       AllocateTempBufferForGetGreedySearchTopOne<T>(
@@ -109,7 +111,8 @@ struct GreedySearchState : public IGreedySearchState<T> {
           this->temp_topk_scores_buffer,
           this->temp_topk_tokens_buffer,
           this->topk_scores_buffer,
-          this->topk_tokens_buffer);
+          this->topk_tokens_buffer,
+          stream);
 
       // If at all we need to, we only need to re-order past state for CUDA as
       //`DecoderMaskedSelfAttention` is only supported on CUDA
@@ -137,14 +140,14 @@ struct GreedySearchState : public IGreedySearchState<T> {
   }
 
  private:
-  BufferUniquePtr sequences_space_buffer_;
-  BufferUniquePtr sequence_lengths_buffer_;
-  BufferUniquePtr next_token_scores_buffer_;
-  BufferUniquePtr next_tokens_buffer_;
-  BufferUniquePtr next_positions_buffer_;
-  BufferUniquePtr eos_meet_buffer_;
-  BufferUniquePtr temp_topk_buffer_;
-  BufferUniquePtr staging_for_past_state_reorder_buffer_;
+  IAllocatorUniquePtr<void> sequences_space_buffer_;
+  IAllocatorUniquePtr<void> sequence_lengths_buffer_;
+  IAllocatorUniquePtr<void> next_token_scores_buffer_;
+  IAllocatorUniquePtr<void> next_tokens_buffer_;
+  IAllocatorUniquePtr<void> next_positions_buffer_;
+  IAllocatorUniquePtr<void> eos_meet_buffer_;
+  IAllocatorUniquePtr<void> temp_topk_buffer_;
+  IAllocatorUniquePtr<void> staging_for_past_state_reorder_buffer_;
 };
 
 // Base class of gready search implementation that is common for both GPT-2 and Bart/T5.
diff --git a/onnxruntime/contrib_ops/cpu/transformers/greedy_search_impl_gpt.h b/onnxruntime/contrib_ops/cpu/transformers/greedy_search_impl_gpt.h
index 4504b099e32bd..69d25eaabbe02 100644
--- a/onnxruntime/contrib_ops/cpu/transformers/greedy_search_impl_gpt.h
+++ b/onnxruntime/contrib_ops/cpu/transformers/greedy_search_impl_gpt.h
@@ -211,7 +211,8 @@ Status GreedySearchGpt<T, ParametersT>::Execute(const FeedsFetchesManager* init_
                     static_cast<int>(parameters->num_heads),
                     static_cast<int>(parameters->head_size),
                     gpt_subgraph_.has_decoder_masked_attention_,
-                    this->IsCuda());
+                    this->IsCuda(),
+                    this->ort_stream_);
 
   SamplingState<T> sampling_state;
   if (std::is_same<ParametersT, SamplingParameters>::value) {
@@ -221,7 +222,8 @@ Status GreedySearchGpt<T, ParametersT>::Execute(const FeedsFetchesManager* init_
                         static_cast<int>(parameters->vocab_size),
                         static_cast<int>(parameters->max_length - parameters->sequence_length),
                         parameters->seed,
-                        this->IsCuda());
+                        this->IsCuda(),
+                        this->ort_stream_);
   }
 
   IAllocatorUniquePtr<char> buffer;
diff --git a/onnxruntime/contrib_ops/cpu/transformers/greedy_search_parameters.h b/onnxruntime/contrib_ops/cpu/transformers/greedy_search_parameters.h
index f1150fdba00e8..4ef0c180eba34 100644
--- a/onnxruntime/contrib_ops/cpu/transformers/greedy_search_parameters.h
+++ b/onnxruntime/contrib_ops/cpu/transformers/greedy_search_parameters.h
@@ -13,7 +13,7 @@ namespace transformers {
 struct GreedySearchParameters : public BeamSearchParameters {
   int BatchBeamSize() const { return batch_size; }
 
-  void ParseFromAttributes(const OpKernelInfo& info);
+  void ParseFromAttributes(const OpKernelInfo& info) override;
 
   void ParseFromInputs(OpKernelContext* context);
 };
diff --git a/onnxruntime/contrib_ops/cpu/transformers/logits_processor.cc b/onnxruntime/contrib_ops/cpu/transformers/logits_processor.cc
index 9f77c32f0c7cc..f39f090c78b0c 100644
--- a/onnxruntime/contrib_ops/cpu/transformers/logits_processor.cc
+++ b/onnxruntime/contrib_ops/cpu/transformers/logits_processor.cc
@@ -17,20 +17,6 @@ namespace onnxruntime {
 namespace contrib {
 namespace transformers {
 
-template <typename T>
-gsl::span<T> NextTokenScores<T>::GetScores(int batch_beam_index) {
-  assert(batch_beam_index >= 0 && batch_beam_index < batch_beam_size);
-  return scores.subspan(static_cast<gsl::index>(batch_beam_index) * vocab_size, vocab_size);
-}
-
-template <typename T>
-void NextTokenScores<T>::SetScore(int token_id, T score) {
-  assert(token_id >= 0 && token_id < vocab_size);
-  for (int i = 0; i < batch_beam_size; i++) {
-    scores[static_cast<gsl::index>(i) * vocab_size + token_id] = score;
-  }
-}
-
 #ifdef DEBUG_GENERATION
 template <typename T>
 void DumpScores(const char* name, const NextTokenScores<T>& next_token_scores) {
@@ -238,128 +224,6 @@ void PresencePenaltyLogitsProcessor<T>::Process(const ISequences*,
 #endif
 }
 
-template <typename T>
-TimestampLogitsProcessor<T>::TimestampLogitsProcessor(int eos_token_id, int max_initial_timestamp_index)
-    : eos_token_id_(eos_token_id), max_initial_timestamp_index_(max_initial_timestamp_index) {}
-
-template <typename T>
-void TimestampLogitsProcessor<T>::Process(const ISequences* sequences,
-                                          NextTokenScores<T>& next_token_scores) {
-  const int beg_token_id_ = eos_token_id_ + 107;
-  const int not_token_id_ = eos_token_id_ + 106;
-  const int solm_token_id_ = eos_token_id_ + 105;
-  const int sot_token_id_ = eos_token_id_ + 1;
-  constexpr int translate_token_id_ = 50358;
-  constexpr int transcribe_token_id_ = 50359;
-
-  const int batch_beam_size = next_token_scores.batch_beam_size;
-  const int vocab_size = next_token_scores.vocab_size;
-  for (int i = 0; i < batch_beam_size; i++) {
-    gsl::span<T> beam_token_scores = next_token_scores.GetScores(i);
-    gsl::span<const int32_t> sequence = sequences->GetSequence(i);
-    const size_t seq_length = sequence.size();
-
-    // Find first timestamp
-    size_t sample_begin = 0;
-    for (size_t j = 0; j < seq_length; j++) {
-      sample_begin++;
-      if (sequence[j] >= beg_token_id_) {
-        break;
-      }
-    }
-
-    // Suppress tokens
-    for (int j = 0; j < vocab_size; j++) {
-      // Suppress notimestamps and solm tokens
-      if (j == not_token_id_ || j == solm_token_id_) {
-        beam_token_scores[j] = std::numeric_limits<T>::lowest();
-      }
-
-      // Suppress sot, translate and transcribe tokens
-      if (seq_length > sample_begin) {
-        if (j == sot_token_id_ || j == translate_token_id_ || j == transcribe_token_id_) {
-          beam_token_scores[j] = std::numeric_limits<T>::lowest();
-        }
-      }
-    }
-
-    // Timestamps should be in pair except the first one
-    const bool last_was_timestamp = seq_length > 0 && sequence.back() >= beg_token_id_;
-    const bool penultimate_was_timestamp = seq_length <= sample_begin || sequence[seq_length - 2] >= beg_token_id_;
-    if (last_was_timestamp) {
-      if (penultimate_was_timestamp) {
-        // If timestamps show up in pair, or it's the first timestamp, no more timestamp is generated
-        for (int j = beg_token_id_; j < vocab_size; j++) {
-          beam_token_scores[j] = std::numeric_limits<T>::lowest();
-        }
-      } else {
-        // If timestamp doesn't show up in pair, generate timestamp
-        for (int j = 0; j < eos_token_id_; j++) {
-          beam_token_scores[j] = std::numeric_limits<T>::lowest();
-        }
-      }
-    }
-
-    // Find timestamp tokens
-    std::vector<int32_t> timestamps;
-    for (const auto& word_id : sequence) {
-      if (word_id >= beg_token_id_) {
-        timestamps.push_back(word_id);
-      }
-    }
-
-    // Timestamps will not decrease
-    const size_t timestamps_len = timestamps.size();
-    if (timestamps_len > 0) {
-      int timestamp_last = 0;
-      if (last_was_timestamp && !penultimate_was_timestamp) {
-        // For single timestamp at the end, next timestamp must not be smaller
-        timestamp_last = timestamps.back();
-      } else {
-        // For paired timestamp at the end, next timestamp must be greater
-        timestamp_last = timestamps.back() + 1;
-      }
-
-      for (int j = beg_token_id_; j < timestamp_last; j++) {
-        beam_token_scores[j] = std::numeric_limits<T>::lowest();
-      }
-    }
-
-    if (seq_length == sample_begin) {
-      const int last_allowed = beg_token_id_ + max_initial_timestamp_index_;
-      for (int j = last_allowed + 1; j < vocab_size; j++) {
-        beam_token_scores[j] = std::numeric_limits<T>::lowest();
-      }
-    }
-
-    // Caculate logsumexp on timestamps
-    float timestamp_logprob = std::numeric_limits<T>::lowest();
-    {
-      float logsumexp = 0.0f;
-      const float logprob_max = *std::max_element(beam_token_scores.begin() + beg_token_id_, beam_token_scores.end());
-      for (int j = beg_token_id_; j < vocab_size; ++j) {
-        if (beam_token_scores[j] > std::numeric_limits<T>::lowest()) {
-          logsumexp += expf(beam_token_scores[j] - logprob_max);
-        }
-      }
-      if (logsumexp > 0.0f) {
-        timestamp_logprob = logf(logsumexp) + logprob_max;
-      }
-    }
-
-    const float max_text_token_logprob = *std::max_element(beam_token_scores.begin(), beam_token_scores.begin() + beg_token_id_);
-    if (timestamp_logprob > max_text_token_logprob) {
-      for (int j = 0; j < beg_token_id_; ++j) {
-        beam_token_scores[j] = std::numeric_limits<T>::lowest();
-      }
-    }
-  }
-
-#ifdef DEBUG_GENERATION
-  DumpScores("TimestampLogitsProcessor", next_token_scores);
-#endif
-}
-
 void LogitsProcessorList::Init(const BeamSearchParameters& parameters) {
   LogitsProcessorInitImpl<BeamSearchParameters>(parameters);
 }
diff --git a/onnxruntime/contrib_ops/cpu/transformers/logits_processor.h b/onnxruntime/contrib_ops/cpu/transformers/logits_processor.h
index 664c497a106d4..4688ff272cee9 100644
--- a/onnxruntime/contrib_ops/cpu/transformers/logits_processor.h
+++ b/onnxruntime/contrib_ops/cpu/transformers/logits_processor.h
@@ -6,6 +6,7 @@
 #include "core/common/inlined_containers.h"
 #include "contrib_ops/cpu/transformers/sequences.h"
 #include "contrib_ops/cpu/transformers/beam_search_parameters.h"
+#include "contrib_ops/cpu/transformers/dump_tensor.h"
 #include "contrib_ops/cpu/transformers/greedy_search_parameters.h"
 #include "contrib_ops/cpu/transformers/sampling_parameters.h"
 #include "contrib_ops/cpu/transformers/generation_shared.h"
@@ -20,9 +21,17 @@ struct NextTokenScores {
   int batch_beam_size;
   int vocab_size;
 
-  gsl::span<T> GetScores(int batch_beam_index);
+  gsl::span<T> GetScores(int batch_beam_index) {
+    assert(batch_beam_index >= 0 && batch_beam_index < batch_beam_size);
+    return scores.subspan(static_cast<gsl::index>(batch_beam_index) * vocab_size, vocab_size);
+  }
 
-  void SetScore(int token_id, T score);
+  void SetScore(int token_id, T score) {
+    assert(token_id >= 0 && token_id < vocab_size);
+    for (int i = 0; i < batch_beam_size; i++) {
+      scores[static_cast<gsl::index>(i) * vocab_size + token_id] = score;
+    }
+  }
 };
 
 // Interface for all scorers for beam search or beam sample.
@@ -141,10 +150,126 @@ class PresencePenaltyLogitsProcessor : public ILogitsProcessor<T> {
 template <typename T>
 class TimestampLogitsProcessor : public ILogitsProcessor<T> {
  public:
-  TimestampLogitsProcessor(int eos_token_id, int max_initial_timestamp_index);
+  TimestampLogitsProcessor(int eos_token_id, int max_initial_timestamp_index)
+      : eos_token_id_(eos_token_id), max_initial_timestamp_index_(max_initial_timestamp_index) {}
 
   void Process(const ISequences* sequences,
-               NextTokenScores<T>& next_token_scores) override;
+               NextTokenScores<T>& next_token_scores) override {
+    // TODO: translate_token_id_ and transcribe_token_id_ need to support both multilingual and English-only models.
+    const int beg_token_id_ = eos_token_id_ + 107;
+    const int not_token_id_ = eos_token_id_ + 106;
+    const int solm_token_id_ = eos_token_id_ + 105;
+    const int sot_token_id_ = eos_token_id_ + 1;
+    constexpr int translate_token_id_ = 50358;
+    constexpr int transcribe_token_id_ = 50359;
+
+    const int batch_beam_size = next_token_scores.batch_beam_size;
+    const int vocab_size = next_token_scores.vocab_size;
+    for (int i = 0; i < batch_beam_size; i++) {
+      gsl::span<T> beam_token_scores = next_token_scores.GetScores(i);
+      gsl::span<const int32_t> sequence = sequences->GetSequence(i);
+      const size_t seq_length = sequence.size();
+
+      // Find first timestamp
+      size_t sample_begin = 0;
+      for (size_t j = 0; j < seq_length; j++) {
+        sample_begin++;
+        if (sequence[j] >= beg_token_id_) {
+          break;
+        }
+      }
+
+      // Suppress tokens
+      for (int j = 0; j < vocab_size; j++) {
+        // Suppress notimestamps and solm tokens
+        if (j == not_token_id_ || j == solm_token_id_) {
+          beam_token_scores[j] = std::numeric_limits<T>::lowest();
+        }
+
+        // Suppress sot, translate and transcribe tokens
+        if (seq_length > sample_begin) {
+          if (j == sot_token_id_ || j == translate_token_id_ || j == transcribe_token_id_) {
+            beam_token_scores[j] = std::numeric_limits<T>::lowest();
+          }
+        }
+      }
+
+      // Timestamps should be in pair except the first one
+      const bool last_was_timestamp = seq_length > 0 && sequence.back() >= beg_token_id_;
+      const bool penultimate_was_timestamp = seq_length <= sample_begin || sequence[seq_length - 2] >= beg_token_id_;
+      if (last_was_timestamp) {
+        if (penultimate_was_timestamp) {
+          // If timestamps show up in pair, or it's the first timestamp, no more timestamp is generated
+          for (int j = beg_token_id_; j < vocab_size; j++) {
+            beam_token_scores[j] = std::numeric_limits<T>::lowest();
+          }
+        } else {
+          // If timestamp doesn't show up in pair, generate timestamp
+          for (int j = 0; j < eos_token_id_; j++) {
+            beam_token_scores[j] = std::numeric_limits<T>::lowest();
+          }
+        }
+      }
+
+      // Find timestamp tokens
+      std::vector<int32_t> timestamps;
+      for (const auto& word_id : sequence) {
+        if (word_id >= beg_token_id_) {
+          timestamps.push_back(word_id);
+        }
+      }
+
+      // Timestamps will not decrease
+      const size_t timestamps_len = timestamps.size();
+      if (timestamps_len > 0) {
+        int timestamp_last = 0;
+        if (last_was_timestamp && !penultimate_was_timestamp) {
+          // For single timestamp at the end, next timestamp must not be smaller
+          timestamp_last = timestamps.back();
+        } else {
+          // For paired timestamp at the end, next timestamp must be greater
+          timestamp_last = timestamps.back() + 1;
+        }
+
+        for (int j = beg_token_id_; j < timestamp_last; j++) {
+          beam_token_scores[j] = std::numeric_limits<T>::lowest();
+        }
+      }
+
+      if (seq_length == sample_begin) {
+        const int last_allowed = beg_token_id_ + max_initial_timestamp_index_;
+        for (int j = last_allowed + 1; j < vocab_size; j++) {
+          beam_token_scores[j] = std::numeric_limits<T>::lowest();
+        }
+      }
+
+      // Caculate logsumexp on timestamps
+      float timestamp_logprob = std::numeric_limits<T>::lowest();
+      {
+        float logsumexp = 0.0f;
+        const float logprob_max = *std::max_element(beam_token_scores.begin() + beg_token_id_, beam_token_scores.end());
+        for (int j = beg_token_id_; j < vocab_size; ++j) {
+          if (beam_token_scores[j] > std::numeric_limits<T>::lowest()) {
+            logsumexp += expf(beam_token_scores[j] - logprob_max);
+          }
+        }
+        if (logsumexp > 0.0f) {
+          timestamp_logprob = logf(logsumexp) + logprob_max;
+        }
+      }
+
+      const float max_text_token_logprob = *std::max_element(beam_token_scores.begin(), beam_token_scores.begin() + beg_token_id_);
+      if (timestamp_logprob > max_text_token_logprob) {
+        for (int j = 0; j < beg_token_id_; ++j) {
+          beam_token_scores[j] = std::numeric_limits<T>::lowest();
+        }
+      }
+    }
+
+#ifdef DEBUG_GENERATION
+    DumpScores("TimestampLogitsProcessor", next_token_scores);
+#endif
+  }
 
  private:
   int eos_token_id_;
diff --git a/onnxruntime/contrib_ops/cpu/transformers/sampling_parameters.h b/onnxruntime/contrib_ops/cpu/transformers/sampling_parameters.h
index af203abc15d01..7779a73feffba 100644
--- a/onnxruntime/contrib_ops/cpu/transformers/sampling_parameters.h
+++ b/onnxruntime/contrib_ops/cpu/transformers/sampling_parameters.h
@@ -11,7 +11,7 @@ namespace contrib {
 namespace transformers {
 
 struct SamplingParameters : public GreedySearchParameters {
-  void ParseFromAttributes(const OpKernelInfo& info);
+  void ParseFromAttributes(const OpKernelInfo& info) override;
 
   void ParseFromInputs(OpKernelContext* context);
 };
diff --git a/onnxruntime/contrib_ops/cpu/transformers/subgraph_base.h b/onnxruntime/contrib_ops/cpu/transformers/subgraph_base.h
index 3c11d2d324a85..487a35c55a85f 100644
--- a/onnxruntime/contrib_ops/cpu/transformers/subgraph_base.h
+++ b/onnxruntime/contrib_ops/cpu/transformers/subgraph_base.h
@@ -45,6 +45,7 @@ class Subgraph {
   int num_layers;
   bool past_present_share_buffer_;
   bool has_decoder_masked_attention_;
+  bool output_cross_qk_ = false;
 
   // Setup execution
   Status Setup(const SessionState& session_state,
diff --git a/onnxruntime/contrib_ops/cpu/transformers/subgraph_t5_decoder.cc b/onnxruntime/contrib_ops/cpu/transformers/subgraph_t5_decoder.cc
index 28acd81ae95fd..4d61ce71c69be 100644
--- a/onnxruntime/contrib_ops/cpu/transformers/subgraph_t5_decoder.cc
+++ b/onnxruntime/contrib_ops/cpu/transformers/subgraph_t5_decoder.cc
@@ -172,7 +172,7 @@ Status T5DecoderSubgraph::CreateInitialFeeds(
   int32_t* input_ids_data = input_ids.GetMutable<Tensor>()->MutableData<int32_t>();
   AllocatorPtr buffer_allocator = std::make_shared<onnxruntime::CPUAllocator>();
   size_t total_size = static_cast<size_t>(static_cast<long long>(cur_len) * batch_beam_size * sizeof(int));
-  auto seq_copy = IAllocator::MakeUniquePtr<int>(buffer_allocator, total_size);
+  auto seq_copy = IAllocator::MakeUniquePtr<int>(buffer_allocator, total_size, false, stream);
   int* seq_copy_ptr = seq_copy.get();
 
   if (!use_sequence_as_input_ids_) {
diff --git a/onnxruntime/contrib_ops/cpu/transformers/subgraph_t5_decoder.h b/onnxruntime/contrib_ops/cpu/transformers/subgraph_t5_decoder.h
index 085d8f3903976..83dae49c7dcbd 100644
--- a/onnxruntime/contrib_ops/cpu/transformers/subgraph_t5_decoder.h
+++ b/onnxruntime/contrib_ops/cpu/transformers/subgraph_t5_decoder.h
@@ -5,6 +5,7 @@
 
 #include "contrib_ops/cpu/transformers/subgraph_base.h"
 #include "contrib_ops/cpu/transformers/sequences.h"
+#include "core/framework/op_kernel.h"
 
 namespace onnxruntime {
 namespace contrib {
@@ -20,6 +21,13 @@ class T5DecoderSubgraph : public Subgraph {
                                         has_hidden_state_(false),
                                         use_sequence_as_input_ids_(true) {
     first_present_output_index_ = 1;
+
+    // Currently just using parent node's attribute. Maybe better to find it purely in subgraph.
+    const auto& attributes = node_in.GetAttributes();
+    if (attributes.find("decoder_output_cross_qk") != attributes.end()) {
+      auto& attr = attributes.at("decoder_output_cross_qk");
+      output_cross_qk_ = (attr.i() != 0LL);
+    }
   }
 
   // Create inputs for first inference of decoder subgraph.
@@ -62,7 +70,7 @@ class T5DecoderSubgraph : public Subgraph {
     return first_present_output_index_;
   }
 
-  bool UseSequenceAsInputIds() const {
+  inline bool UseSequenceAsInputIds() const {
     return use_sequence_as_input_ids_;
   }
 
diff --git a/onnxruntime/contrib_ops/cpu/transformers/subgraph_whisper_decoder.cc b/onnxruntime/contrib_ops/cpu/transformers/subgraph_whisper_decoder.cc
index 887a6a8984b83..7d0c62b618ee2 100644
--- a/onnxruntime/contrib_ops/cpu/transformers/subgraph_whisper_decoder.cc
+++ b/onnxruntime/contrib_ops/cpu/transformers/subgraph_whisper_decoder.cc
@@ -70,8 +70,15 @@ Status WhisperDecoderSubgraph::Validate(const std::vector<const NodeArg*>& subgr
                   "number of inputs expected to be kFirstPastInputIndex + 4 * layers + 1, got:", num_subgraph_inputs);
   }
 
-  ORT_RETURN_IF(num_subgraph_outputs < 3 || (num_subgraph_outputs - first_present_output_index_) % 2 != 0,
-                "number of outputs expected to be 1 + 2 * layers, got:", num_subgraph_outputs);
+  if (!output_cross_qk_) {
+    ORT_RETURN_IF(num_subgraph_outputs < 3 || (num_subgraph_outputs - first_present_output_index_) % 2 != 0,
+                  "number of outputs expected to be first_present_output_index_",
+                  first_present_output_index_, " + 2 * layers, got:", num_subgraph_outputs);
+  } else {
+    ORT_RETURN_IF(num_subgraph_outputs < 4 || (num_subgraph_outputs - first_present_output_index_) % 3 != 0,
+                  "When outputing cross qk, number of outputs expected to be first_present_output_index_",
+                  first_present_output_index_, " + 3 * layers, got:", num_subgraph_outputs);
+  }
 
   ORT_RETURN_IF(subgraph_inputs[0]->Name() != "input_ids",
                 "decoder subgraph input 0 shall be named as input_ids, got: ", subgraph_inputs[0]->Name());
@@ -90,7 +97,8 @@ Status WhisperDecoderSubgraph::Validate(const std::vector<const NodeArg*>& subgr
 
   // Save parameters related to the subgraph.
   ORT_RETURN_IF_ERROR(GetParameters(past_shape, logits_shape, false));
-  num_layers = (static_cast<int>(subgraph_outputs.size()) - first_present_output_index_) / 2;
+
+  num_layers = (static_cast<int>(subgraph_outputs.size()) - first_present_output_index_) / (output_cross_qk_ ? 3 : 2);
 
   // If input_ids's shape is ['batch_size', 1] then use next token as input_ids.
   // Otherwise in the case of shape ['batch_size', 'sequence'], use sequence as input_ids.
@@ -112,12 +120,7 @@ Status WhisperDecoderSubgraph::Validate(const std::vector<const NodeArg*>& subgr
 
   for (int i = first_past_input_index_; i < first_past_input_index_ + 4 * num_layers; i++) {
     ORT_RETURN_IF(subgraph_inputs[i]->TypeAsProto()->tensor_type().elem_type() != float_type,
-                  "decoder subgraph past inputs shall have same data type as that of encoder_hidden_states");
-  }
-
-  for (int i = 0; i < num_subgraph_outputs; i++) {
-    ORT_RETURN_IF(subgraph_outputs[i]->TypeAsProto()->tensor_type().elem_type() != float_type,
-                  "decoder subgraph output shall have same data type as that of encoder_hidden_states");
+                  "decoder subgraph past inputs shall have same data type as that of encoder_hidden_states.");
   }
 
   is_output_float16_ = (subgraph_outputs[0]->TypeAsProto()->tensor_type().elem_type() == float16_type);
@@ -166,7 +169,7 @@ Status WhisperDecoderSubgraph::CreateInitialFeeds(
 
   AllocatorPtr buffer_allocator = std::make_shared<onnxruntime::CPUAllocator>();
   size_t total_size = static_cast<size_t>(static_cast<long long>(cur_len) * batch_beam_size * sizeof(int));
-  auto seq_copy = IAllocator::MakeUniquePtr<int>(buffer_allocator, total_size);
+  auto seq_copy = IAllocator::MakeUniquePtr<int>(buffer_allocator, total_size, false, stream);
   int* seq_copy_ptr = seq_copy.get();
 
   if (!use_sequence_as_input_ids_) {
diff --git a/onnxruntime/contrib_ops/cuda/bert/add_bias_transpose.cu b/onnxruntime/contrib_ops/cuda/bert/add_bias_transpose.cu
index d846f55f1e28d..626e4c0b87a3c 100644
--- a/onnxruntime/contrib_ops/cuda/bert/add_bias_transpose.cu
+++ b/onnxruntime/contrib_ops/cuda/bert/add_bias_transpose.cu
@@ -287,9 +287,9 @@ __global__ void AddBiasTransposeQKV(int M, const T* input, const T* biases, T* o
     T* k_smem = q_smem + rotary_embedding_dim;
 
     const int half_rotary_dim = rotary_embedding_dim / 2;
-    const int half_idx        = (head_idx) / half_rotary_dim;
-    const int intra_half_idx  = (head_idx) % half_rotary_dim;
-    const int smem_pitch      = half_rotary_dim;
+    const int half_idx = (head_idx) / half_rotary_dim;
+    const int intra_half_idx = (head_idx) % half_rotary_dim;
+    const int smem_pitch = half_rotary_dim;
 
     if (do_rotary) {
       *reinterpret_cast<Vec_t*>(q_smem + half_idx * smem_pitch + intra_half_idx) = q;
@@ -441,7 +441,6 @@ __global__ void AddBiasTransposeQKVLarge(const int head_size, const T* input, co
   }
 }
 
-
 template <typename T>
 __global__ void AddBiasTransposeCutlass(const T* input, const T* biases, T* output, int v_head_size) {
   // Format 3 for cutlass memory efficient attention
@@ -651,7 +650,7 @@ void InvokeAddBiasTranspose(
     if (format != 1 && format != 2 && format != 3) {
       ORT_THROW("format must be 1, 2 or 3 for rotary attention");
     }
-    if (qk_head_size != 64 && qk_head_size !=128) {
+    if (qk_head_size != 64 && qk_head_size != 128) {
       ORT_THROW("qk_head_size must be 64 or 128 for rotary attention");
     }
     if (v_head_size != -1 && qk_head_size != v_head_size) {
diff --git a/onnxruntime/contrib_ops/cuda/bert/attention.cc b/onnxruntime/contrib_ops/cuda/bert/attention.cc
index f0385ea5abdfb..bf6431cf1afb2 100644
--- a/onnxruntime/contrib_ops/cuda/bert/attention.cc
+++ b/onnxruntime/contrib_ops/cuda/bert/attention.cc
@@ -135,32 +135,52 @@ Status Attention<T>::ComputeInternal(OpKernelContext* context) const {
   if (use_flash_attention && parameters.sequence_length < min_seq_len_for_flash_attention_packed_qkv_) {
     use_flash_attention = false;
   }
+  // Allocate buffers
+  size_t softmax_lse_accum_bytes = 0;
+  size_t out_accum_bytes = 0;
+  if (use_flash_attention) {
+    using namespace std;
+    auto [num_splits, slse_accum_bytes, o_accum_bytes] = onnxruntime::flash::get_num_splits_and_buffer_sizes(
+        parameters.batch_size, parameters.sequence_length, parameters.kv_sequence_length, parameters.num_heads,
+        parameters.head_size, device_prop.multiProcessorCount);
+    parameters.num_splits = num_splits;
+    softmax_lse_accum_bytes = slse_accum_bytes;
+    out_accum_bytes = o_accum_bytes;
+  }
+  auto softmax_lse_accum_buffer = GetScratchBuffer<void>(softmax_lse_accum_bytes, context->GetComputeStream());
+  auto out_accum_buffer = GetScratchBuffer<void>(out_accum_bytes, context->GetComputeStream());
 #else
   constexpr bool use_flash_attention = false;
+  auto softmax_lse_accum_buffer = GetScratchBuffer<void>(0, context->GetComputeStream());  // nullptr
+  auto out_accum_buffer = GetScratchBuffer<void>(0, context->GetComputeStream());          // nullptr
 #endif
 
   if (!use_flash_attention) {
-    if (is_unidirectional_ && enable_fused_causal_attention_) {  // GPT
-      // GPT fused kernels requires left side padding. mask can be:
-      //     none (no padding), 1D sequence lengths or 2d mask.
-      // Fused kernels don't support different sequence lengths of q and kv, so only apply to the first token
-      // where past state is empty.
-      bool is_mask_2d_key_padding = parameters.mask_type == AttentionMaskType::MASK_2D_KEY_PADDING;
-      bool use_causal_fused_runner = (nullptr == mask_index || is_mask_1d_seq_len || is_mask_2d_key_padding) &&
-                                     nullptr == relative_position_bias &&
-                                     parameters.past_sequence_length == 0 &&
-                                     parameters.hidden_size == parameters.v_hidden_size &&
-                                     FusedMHARunnerFP16v2::is_supported(sm, parameters.head_size, sequence_length,
-                                                                        enable_trt_flash_attention_, true);
-      if (use_causal_fused_runner) {
-        // Here we assume that num_heads, head_size and is_unidirectional does not change for an Attention node.
-        if (nullptr == fused_fp16_runner_.get()) {
-          fused_fp16_runner_ = FusedMHARunnerFP16v2::Create(num_heads_, parameters.head_size, sm, is_unidirectional_,
-                                                            enable_trt_flash_attention_, parameters.scale);
+    if (is_unidirectional_) {  // GPT
+      if (enable_fused_causal_attention_) {
+        // GPT fused kernels requires left side padding. mask can be:
+        //     none (no padding), 1D sequence lengths or 2d mask.
+        // Fused kernels don't support different sequence lengths of q and kv, so only apply to the first token
+        // where past state is empty.
+        bool is_mask_2d_key_padding = parameters.mask_type == AttentionMaskType::MASK_2D_KEY_PADDING;
+        bool use_causal_fused_runner = (nullptr == mask_index || is_mask_1d_seq_len || is_mask_2d_key_padding) &&
+                                       nullptr == relative_position_bias &&
+                                       parameters.past_sequence_length == 0 &&
+                                       parameters.hidden_size == parameters.v_hidden_size &&
+                                       FusedMHARunnerFP16v2::is_supported(sm, parameters.head_size, sequence_length,
+                                                                          enable_trt_flash_attention_, true);
+        if (use_causal_fused_runner) {
+          // Here we assume that num_heads, head_size and is_unidirectional does not change for an Attention node.
+          if (nullptr == fused_fp16_runner_.get()) {
+            std::call_once(fused_fp16_runner_created_, [&]() {
+              fused_fp16_runner_ = FusedMHARunnerFP16v2::Create(num_heads_, parameters.head_size, sm, is_unidirectional_,
+                                                                enable_trt_flash_attention_, parameters.scale);
+            });
+          }
+
+          // Here we assume all causal kernels can be loaded into shared memory. TODO: add a function to check.
+          fused_runner = fused_fp16_runner_.get();
         }
-
-        // Here we assume all causal kernels can be loaded into shared memory. TODO: add a function to check.
-        fused_runner = fused_fp16_runner_.get();
       }
     } else {  // BERT
       bool use_fused_runner = !disable_fused_self_attention_ &&
@@ -175,8 +195,10 @@ Status Attention<T>::ComputeInternal(OpKernelContext* context) const {
       if (use_fused_runner) {
         // Here we assume that num_heads, head_size and is_unidirectional does not change for an Attention node.
         if (nullptr == fused_fp16_runner_.get()) {
-          fused_fp16_runner_ = FusedMHARunnerFP16v2::Create(num_heads_, parameters.head_size, sm, is_unidirectional_,
-                                                            enable_trt_flash_attention_, parameters.scale);
+          std::call_once(fused_fp16_runner_created_, [&]() {
+            fused_fp16_runner_ = FusedMHARunnerFP16v2::Create(num_heads_, parameters.head_size, sm, is_unidirectional_,
+                                                              enable_trt_flash_attention_, parameters.scale);
+          });
         }
 
         // In case some kernel not loaded due to shared memory limit, we need to double check here.
@@ -213,11 +235,12 @@ Status Attention<T>::ComputeInternal(OpKernelContext* context) const {
 
   typedef typename ToCudaType<T>::MappedType CudaT;
 
-  IAllocatorUniquePtr<T> gemm_buffer;
+  AllocatorPtr allocator;
+  ORT_RETURN_IF_ERROR(context->GetTempSpaceAllocator(&allocator));
   int m = batch_size * sequence_length;
   int n = (parameters.hidden_size + parameters.hidden_size + parameters.v_hidden_size);
   int k = parameters.input_hidden_size;
-  gemm_buffer = GetScratchBuffer<T>(static_cast<size_t>(m) * n, context->GetComputeStream());
+  IAllocatorUniquePtr<void> gemm_buffer = IAllocator::MakeUniquePtr<void>(allocator, static_cast<size_t>(m * n) * sizeof(T), false, context->GetComputeStream());
 
   CudaT one = ToCudaType<T>::FromFloat(1.0f);
   CudaT zero = ToCudaType<T>::FromFloat(0.0f);
@@ -244,7 +267,8 @@ Status Attention<T>::ComputeInternal(OpKernelContext* context) const {
                                                    use_flash_attention,
                                                    use_fused_cross_attention,
                                                    use_memory_efficient_attention);
-  auto work_space = GetScratchBuffer<void>(workSpaceSize, context->GetComputeStream());
+  IAllocatorUniquePtr<void> work_space = IAllocator::MakeUniquePtr<void>(allocator, workSpaceSize, false, context->GetComputeStream());
+  ;
 
   typedef typename ToCudaType<T>::MappedType CudaT;
   AttentionData<CudaT> data;
@@ -271,6 +295,12 @@ Status Attention<T>::ComputeInternal(OpKernelContext* context) const {
   data.fused_runner = reinterpret_cast<void*>(fused_runner);
   data.use_flash_attention = use_flash_attention;
   data.use_memory_efficient_attention = use_memory_efficient_attention;
+  if (softmax_lse_accum_buffer != nullptr) {
+    data.softmax_lse_accum = reinterpret_cast<CudaT*>(softmax_lse_accum_buffer.get());
+  }
+  if (out_accum_buffer != nullptr) {
+    data.out_accum = reinterpret_cast<CudaT*>(out_accum_buffer.get());
+  }
 
   return QkvToContext<CudaT>(device_prop, cublas, context->GetComputeStream(), parameters, data);
 }
diff --git a/onnxruntime/contrib_ops/cuda/bert/attention.h b/onnxruntime/contrib_ops/cuda/bert/attention.h
index 455e55ba05a66..acafb379d713f 100644
--- a/onnxruntime/contrib_ops/cuda/bert/attention.h
+++ b/onnxruntime/contrib_ops/cuda/bert/attention.h
@@ -4,6 +4,7 @@
 #pragma once
 
 #include <memory>
+#include <mutex>
 #include "core/providers/cuda/cuda_kernel.h"
 #include "contrib_ops/cpu/bert/attention_base.h"
 #include "contrib_ops/cuda/bert/tensorrt_fused_multihead_attention/mha_runner.h"
@@ -28,6 +29,7 @@ class Attention final : public CudaKernel, public AttentionBase {
   bool disable_memory_efficient_attention_;
   int min_seq_len_for_flash_attention_packed_qkv_;
   mutable std::unique_ptr<MHARunner> fused_fp16_runner_;
+  mutable std::once_flag fused_fp16_runner_created_;
 };
 
 }  // namespace cuda
diff --git a/onnxruntime/contrib_ops/cuda/bert/attention_impl.cu b/onnxruntime/contrib_ops/cuda/bert/attention_impl.cu
index b4a4ae208ceb1..83c426e7e6ed7 100644
--- a/onnxruntime/contrib_ops/cuda/bert/attention_impl.cu
+++ b/onnxruntime/contrib_ops/cuda/bert/attention_impl.cu
@@ -316,7 +316,9 @@ Status FlashAttention(
   ORT_RETURN_IF_ERROR(onnxruntime::flash::mha_fwd(
       device_prop, stream, query, key, value, data.output, reinterpret_cast<void*>(data.scratch),
       parameters.batch_size, parameters.num_heads, parameters.num_heads, parameters.head_size,
-      parameters.sequence_length, parameters.total_sequence_length, scale, parameters.is_unidirectional));
+      parameters.sequence_length, parameters.total_sequence_length, scale, parameters.is_unidirectional,
+      parameters.num_splits, reinterpret_cast<void*>(data.softmax_lse_accum), reinterpret_cast<void*>(data.out_accum),
+      true));
 
   DUMP_TENSOR("flash attention output", data.output,
               parameters.batch_size, parameters.sequence_length, parameters.num_heads, parameters.v_head_size);
@@ -372,6 +374,7 @@ Status EfficientAttention(
   p.num_heads = parameters.num_heads;
   p.sequence_length = parameters.sequence_length;
   p.kv_sequence_length = parameters.total_sequence_length;
+  p.max_sequence_length = parameters.total_sequence_length;
   p.qk_head_size = parameters.head_size;
   p.v_head_size = parameters.v_head_size;
   p.causal = parameters.is_unidirectional;
@@ -393,10 +396,12 @@ Status EfficientAttention(
   p.attn_bias = nullptr == data.relative_position_bias ? nullptr : data.relative_position_bias;
   p.is_attn_bias_batched = !parameters.broadcast_res_pos_bias;
   p.output = data.output;
+  p.is_kv_bsnh = true;
   p.workspace = MemoryEfficientAttentionParams::need_workspace(parameters.v_head_size, sizeof(T) == sizeof(float))
                     ? data.scratch
                     : nullptr;
   p.stream = stream;
+  p.has_custom_right_padding = false;
   run_memory_efficient_attention(p);
   DUMP_TENSOR("efficient attention output", data.output,
               parameters.batch_size, parameters.sequence_length, parameters.num_heads, parameters.v_head_size);
diff --git a/onnxruntime/contrib_ops/cuda/bert/attention_impl.h b/onnxruntime/contrib_ops/cuda/bert/attention_impl.h
index d0a5fb51a25d6..3e78978c3cc43 100644
--- a/onnxruntime/contrib_ops/cuda/bert/attention_impl.h
+++ b/onnxruntime/contrib_ops/cuda/bert/attention_impl.h
@@ -88,6 +88,11 @@ struct AttentionData {
   T* v = nullptr;
   T* scratch = nullptr;
   AttentionQkvFormat qkv_format = AttentionQkvFormat::Q_K_V_BSNH;
+
+  // Flash buffers
+  T* softmax_lse = nullptr;
+  T* softmax_lse_accum = nullptr;
+  T* out_accum = nullptr;
 };
 
 template <typename T>
diff --git a/onnxruntime/contrib_ops/cuda/bert/bert_padding.cu b/onnxruntime/contrib_ops/cuda/bert/bert_padding.cu
index 2af748d8d4a62..32ed961a68049 100644
--- a/onnxruntime/contrib_ops/cuda/bert/bert_padding.cu
+++ b/onnxruntime/contrib_ops/cuda/bert/bert_padding.cu
@@ -367,32 +367,32 @@ __global__ void __launch_bounds__(kMAX_THREADS_PER_BLOCK)
                            const int* attention_masks,
                            const int batch_size,
                            const int sequence_length) {
-    typedef cub::BlockReduce<int, kMAX_THREADS_PER_BLOCK> BlockReduce;
-    __shared__ typename BlockReduce::TempStorage temp_storage;
-
-    const int batch_id = blockIdx.x;
-    const int* batch_mask = attention_masks + (batch_id * sequence_length);
-    const bool leftmost_non_zero = (batch_mask[0] != 0);
-    int biggest_position = 0;
-
-    for (int i = threadIdx.x; i < sequence_length; i += blockDim.x) {
-      if (leftmost_non_zero == (batch_mask[i] != 0)) {
-        biggest_position = i;
-      } else {
-        break;
-      }
+  typedef cub::BlockReduce<int, kMAX_THREADS_PER_BLOCK> BlockReduce;
+  __shared__ typename BlockReduce::TempStorage temp_storage;
+
+  const int batch_id = blockIdx.x;
+  const int* batch_mask = attention_masks + (batch_id * sequence_length);
+  const bool leftmost_non_zero = (batch_mask[0] != 0);
+  int biggest_position = 0;
+
+  for (int i = threadIdx.x; i < sequence_length; i += blockDim.x) {
+    if (leftmost_non_zero == (batch_mask[i] != 0)) {
+      biggest_position = i;
+    } else {
+      break;
     }
+  }
 
-    int last_leading_position = BlockReduce(temp_storage).Reduce(biggest_position, cub::Max(), blockDim.x);
+  int last_leading_position = BlockReduce(temp_storage).Reduce(biggest_position, cub::Max(), blockDim.x);
 
-    if (threadIdx.x == 0) {
-      int batch_offset = batch_id * sequence_length;
-      trt_mha_padding_offset[2 * batch_id] = batch_offset;
-      trt_mha_padding_offset[2 * batch_id + 1] = batch_offset + last_leading_position + 1;
-      if (batch_id == gridDim.x - 1) {
-        trt_mha_padding_offset[2 * batch_id + 2] = batch_offset + sequence_length;
-      }
+  if (threadIdx.x == 0) {
+    int batch_offset = batch_id * sequence_length;
+    trt_mha_padding_offset[2 * batch_id] = batch_offset;
+    trt_mha_padding_offset[2 * batch_id + 1] = batch_offset + last_leading_position + 1;
+    if (batch_id == gridDim.x - 1) {
+      trt_mha_padding_offset[2 * batch_id + 2] = batch_offset + sequence_length;
     }
+  }
 }
 
 // only support simple left padding with mask 0s on leading left,
diff --git a/onnxruntime/contrib_ops/cuda/bert/cutlass_fmha/fmha_launch_template.h b/onnxruntime/contrib_ops/cuda/bert/cutlass_fmha/fmha_launch_template.h
index ed330b0fca332..db78722cc0e4c 100644
--- a/onnxruntime/contrib_ops/cuda/bert/cutlass_fmha/fmha_launch_template.h
+++ b/onnxruntime/contrib_ops/cuda/bert/cutlass_fmha/fmha_launch_template.h
@@ -16,6 +16,133 @@ namespace onnxruntime {
 namespace contrib {
 namespace cuda {
 
+template <typename AttentionKernel, int kQueriesPerBlock>
+struct RightPaddingBatchHook {
+  using scalar_t = typename AttentionKernel::scalar_t;
+  using accum_t = typename AttentionKernel::accum_t;
+  using lse_scalar_t = typename AttentionKernel::lse_scalar_t;
+  using output_t = typename AttentionKernel::output_t;
+  using output_accum_t = typename AttentionKernel::output_accum_t;
+
+  static constexpr bool kSupportsDropout = AttentionKernel::kSupportsDropout;
+  static constexpr bool kSupportsBias = AttentionKernel::kSupportsBias;
+  static constexpr int kKeysPerBlock = AttentionKernel::kKeysPerBlock;
+  static constexpr bool kIsAligned = AttentionKernel::kIsAligned;
+  static constexpr bool kSingleValueIteration = AttentionKernel::kSingleValueIteration;
+  static constexpr int32_t kAlignLSE = AttentionKernel::kAlignLSE;  // block size of backward
+  static constexpr bool kPreloadV = AttentionKernel::kPreloadV;
+  static constexpr bool kKeepOutputInRF = AttentionKernel::kKeepOutputInRF;
+  static constexpr bool kNeedsOutputAccumulatorBuffer = AttentionKernel::kNeedsOutputAccumulatorBuffer;
+
+  template <typename Params>
+  static CUTLASS_DEVICE bool AdvanceToBlockForGQA(Params& p) {
+    auto batch_id = blockIdx.z;
+    auto head_id = blockIdx.y;
+    auto query_start = blockIdx.x * kQueriesPerBlock;
+
+    auto lse_dim = ceil_div((int32_t)(p.num_queries), kAlignLSE) * kAlignLSE;
+
+    // Advance to current batch - in case of different sequence lengths
+    if (p.seqlen_k_ptr) {
+      p.num_keys = p.seqlen_k_ptr[batch_id];
+    }
+
+    if (query_start >= p.num_queries) {
+      return false;
+    }
+
+    // Advance to the current batch / head / query_start
+    p.query_ptr += batch_id * p.q_strideB + query_start * p.q_strideM + head_id * p.q_strideH;
+    p.key_ptr += batch_id * p.k_strideB + head_id * p.k_strideH;
+    p.value_ptr += batch_id * p.v_strideB + head_id * p.v_strideH;
+    p.output_ptr += int64_t(batch_id * p.num_queries) * p.o_strideM + int64_t(query_start) * p.o_strideM + head_id * p.head_dim_value;
+
+    if (kSupportsBias && p.attn_bias_ptr != nullptr) {
+      p.attn_bias_ptr += (batch_id * p.bias_strideB) + (head_id * p.bias_strideH);
+    }
+    if (p.output_accum_ptr != nullptr) {
+      p.output_accum_ptr += int64_t(batch_id * p.num_queries) * (p.head_dim_value * p.num_heads) +
+                            int64_t(query_start) * (p.head_dim_value * p.num_heads) +
+                            head_id * p.head_dim_value;
+    } else {
+      // Accumulate directly in the destination buffer (eg for f32)
+      p.output_accum_ptr = (accum_t*)(p.output_ptr);
+    }
+
+    if (p.logsumexp_ptr != nullptr) {
+      // lse[batch_id, head_id, query_start]
+      p.logsumexp_ptr +=
+          batch_id * lse_dim * p.num_heads + head_id * lse_dim + query_start;
+    }
+
+    // Custom masking
+    if (p.causal_diagonal_ptr) {
+      p.causal_diagonal_offset = p.causal_diagonal_ptr[batch_id];
+    }
+    if (p.custom_mask_type == AttentionKernel::CausalFromBottomRight) {
+      p.causal_diagonal_offset += p.num_keys - p.num_queries;
+    }
+    if (p.custom_mask_type == AttentionKernel::CausalFromTopLeft ||
+        p.custom_mask_type == AttentionKernel::CausalFromBottomRight) {
+      // the bottom row of the current block is query_start + kQueriesPerBlock
+      // the last active key is then query_start + causal_diagonal_offset +
+      // kQueriesPerBlock so num_keys is the min between actual num_keys and
+      // this to avoid extra computations
+      p.num_keys = cutlass::fast_min(
+          int32_t(query_start + p.causal_diagonal_offset + kQueriesPerBlock),
+          p.num_keys);
+    }
+
+    p.num_queries -= query_start;
+    p.num_batches = 0;  // no longer used after
+
+    // If num_queries == 1, and there is only one key head we're wasting
+    // 15/16th of tensor core compute In that case :
+    //  - we only launch kernels for head_id % kQueriesPerBlock == 0
+    //  - we iterate over heads instead of queries (strideM = strideH)
+    if (p.num_queries == 1 && p.k_strideH == 0 && p.v_strideH == 0) {
+      if (head_id % kQueriesPerBlock != 0)
+        return false;
+      p.q_strideM = p.q_strideH;
+      p.num_queries = p.num_heads;
+      p.num_heads = 1;  // unused but here for intent
+      // remove causal since n_query = 1
+      // otherwise, offset would change with head !
+      p.custom_mask_type = AttentionKernel::NoCustomMask;
+      p.o_strideM = p.head_dim_value;
+    }
+
+    // Make sure the compiler knows these variables are the same on all
+    // the threads of the warp.
+    p.query_ptr = warp_uniform(p.query_ptr);
+    p.key_ptr = warp_uniform(p.key_ptr);
+    p.value_ptr = warp_uniform(p.value_ptr);
+    if (kSupportsBias) {
+      p.attn_bias_ptr = warp_uniform(p.attn_bias_ptr);
+    }
+    p.output_ptr = warp_uniform(p.output_ptr);
+    p.output_accum_ptr = warp_uniform(p.output_accum_ptr);
+    p.logsumexp_ptr = warp_uniform(p.logsumexp_ptr);
+    p.num_queries = warp_uniform(p.num_queries);
+    p.num_keys = warp_uniform(p.num_keys);
+    p.num_heads = warp_uniform(p.num_heads);
+    p.head_dim = warp_uniform(p.head_dim);
+    p.head_dim_value = warp_uniform(p.head_dim_value);
+    p.o_strideM = warp_uniform(p.o_strideM);
+    p.custom_mask_type = warp_uniform(p.custom_mask_type);
+    return true;
+  }
+};
+
+template <typename AK, int kQueriesPerBlock>
+__global__ void __launch_bounds__(AK::kNumThreads, AK::kMinBlocksPerSm)
+    attention_kernel_batched_impl_right_padding(typename AK::Params p) {
+  if (!RightPaddingBatchHook<AK, kQueriesPerBlock>::AdvanceToBlockForGQA(p)) {
+    return;
+  }
+  AK::attention_kernel(p);
+}
+
 template <typename T, typename ArchTag, bool is_aligned, int queries_per_block, int keys_per_block, bool single_value_iteration>
 void LaunchCutlassFmha(const MemoryEfficientAttentionParams& params) {
   using Attention = AttentionKernel<T, ArchTag, is_aligned, queries_per_block, keys_per_block, single_value_iteration>;
@@ -51,28 +178,52 @@ void LaunchCutlassFmha(const MemoryEfficientAttentionParams& params) {
     p.num_keys = params.kv_sequence_length;
 
     if (params.causal) {
-      p.custom_mask_type = Attention::CausalFromTopLeft;
+      p.custom_mask_type = Attention::CausalFromBottomRight;
     }
 
-    // Input format is BxSxNxH, output is BxSxNxH
-    p.q_strideH = params.qk_head_size;
-    p.k_strideH = params.qk_head_size;
-    p.v_strideH = params.v_head_size;
-    p.bias_strideH = nullptr == params.attn_bias ? 0 : p.num_queries * p.num_keys;
-
-    p.q_strideM = params.num_heads * params.qk_head_size;
-    p.k_strideM = params.num_heads * params.qk_head_size;
-    p.v_strideM = params.num_heads * params.v_head_size;
-    p.o_strideM = params.num_heads * params.v_head_size;
-    p.bias_strideM = nullptr == params.attn_bias ? 0 : p.num_keys;
-
-    p.q_strideB = static_cast<int64_t>(p.q_strideM) * params.sequence_length;
-    p.k_strideB = static_cast<int64_t>(p.k_strideM) * params.kv_sequence_length;
-    p.v_strideB = static_cast<int64_t>(p.v_strideM) * params.kv_sequence_length;
-    p.bias_strideB = params.is_attn_bias_batched ? static_cast<int64_t>(p.bias_strideH) * params.num_heads : 0;
+    // We use max_sequence_length to calculate KV stride
+    if (params.is_kv_bsnh) {
+      // Input Q, K, V format is BxSxNxH, output is BxSxNxH
+      p.q_strideH = params.qk_head_size;
+      p.k_strideH = params.qk_head_size;
+      p.v_strideH = params.v_head_size;
+      p.bias_strideH = nullptr == params.attn_bias ? 0 : p.num_queries * p.num_keys;
+
+      p.q_strideM = params.num_heads * params.qk_head_size;
+      p.k_strideM = params.num_heads * params.qk_head_size;
+      p.v_strideM = params.num_heads * params.v_head_size;
+      p.o_strideM = params.num_heads * params.v_head_size;
+      p.bias_strideM = nullptr == params.attn_bias ? 0 : p.num_keys;
+
+      p.q_strideB = static_cast<int64_t>(p.q_strideM) * params.sequence_length;
+      p.k_strideB = static_cast<int64_t>(p.k_strideM) * params.max_sequence_length;
+      p.v_strideB = static_cast<int64_t>(p.v_strideM) * params.max_sequence_length;
+      p.bias_strideB = params.is_attn_bias_batched ? static_cast<int64_t>(p.bias_strideH) * params.num_heads : 0;
+    } else {
+      // Input K, V format is BxNxSxH, Input Q is BxSxNxH, output is BxSxNxH
+      p.q_strideH = params.qk_head_size;
+      p.k_strideH = params.max_sequence_length * params.qk_head_size;
+      p.v_strideH = params.max_sequence_length * params.v_head_size;
+      p.bias_strideH = nullptr == params.attn_bias ? 0 : p.num_queries * p.num_keys;
+
+      p.q_strideM = params.num_heads * params.qk_head_size;
+      p.k_strideM = params.qk_head_size;
+      p.v_strideM = params.v_head_size;
+      p.o_strideM = params.num_heads * params.v_head_size;
+      p.bias_strideM = nullptr == params.attn_bias ? 0 : p.num_keys;
+
+      p.q_strideB = params.num_heads * params.qk_head_size * params.sequence_length;
+      p.k_strideB = params.num_heads * params.qk_head_size * params.max_sequence_length;
+      p.v_strideB = params.num_heads * params.v_head_size * params.max_sequence_length;
+      p.bias_strideB = params.is_attn_bias_batched ? static_cast<int64_t>(p.bias_strideH) * params.num_heads : 0;
+    }
+  }
+
+  auto kernel_fn = attention_kernel_batched_impl<Attention>;
+  if (params.has_custom_right_padding) {
+    kernel_fn = attention_kernel_batched_impl_right_padding<Attention, queries_per_block>;
   }
 
-  constexpr auto kernel_fn = attention_kernel_batched_impl<Attention>;
   int smem_bytes = sizeof(typename Attention::SharedStorage);
   if (smem_bytes > 0xc000) {
     ORT_ENFORCE(params.sm >= 70, "This kernel requires too much shared memory on this machine!");
diff --git a/onnxruntime/contrib_ops/cuda/bert/cutlass_fmha/memory_efficient_attention.h b/onnxruntime/contrib_ops/cuda/bert/cutlass_fmha/memory_efficient_attention.h
index f725be8d7cf89..484b783db1724 100644
--- a/onnxruntime/contrib_ops/cuda/bert/cutlass_fmha/memory_efficient_attention.h
+++ b/onnxruntime/contrib_ops/cuda/bert/cutlass_fmha/memory_efficient_attention.h
@@ -14,10 +14,12 @@ namespace cuda {
 struct MemoryEfficientAttentionParams {
   int32_t sm;
   bool is_half;
+  bool is_kv_bsnh = true;
   int32_t batch_size;
   int32_t num_heads;
   int32_t sequence_length;
   int32_t kv_sequence_length;
+  int32_t max_sequence_length;
   int32_t qk_head_size;
   int32_t v_head_size;
   bool causal;
@@ -41,6 +43,8 @@ struct MemoryEfficientAttentionParams {
   static bool need_workspace(size_t v_head_size, bool is_float) {
     return (v_head_size > 128 && !is_float);
   }
+
+  bool has_custom_right_padding = false;
 };
 
 void run_memory_efficient_attention(const MemoryEfficientAttentionParams& params);
diff --git a/onnxruntime/contrib_ops/cuda/bert/decoder_masked_multihead_attention.cc b/onnxruntime/contrib_ops/cuda/bert/decoder_masked_multihead_attention.cc
index 4bdc6db30b036..54aad9cbaf387 100644
--- a/onnxruntime/contrib_ops/cuda/bert/decoder_masked_multihead_attention.cc
+++ b/onnxruntime/contrib_ops/cuda/bert/decoder_masked_multihead_attention.cc
@@ -22,6 +22,7 @@ static constexpr int kBeamWidthInputIndex = 8;
 static constexpr int kCacheIndirectionInputIndex = 9;
 static constexpr int kPastInputIndex = 5;
 static constexpr int kPresentOutputIndex = 1;
+static constexpr int kQKOutputIndex = 3;
 static constexpr int kBiasIndex = 10;
 
 #define REGISTER_KERNEL_TYPED(T1, T2)                                         \
@@ -50,6 +51,7 @@ DecoderMaskedMultiHeadAttention<T1, T2>::DecoderMaskedMultiHeadAttention(const O
   mask_filter_value_ = info.GetAttrOrDefault<float>("mask_filter_value", -10000.0f);
   scale_ = info.GetAttrOrDefault<float>("scale", 0.0f);
   past_present_share_buffer_ = info.GetAttrOrDefault<int64_t>("past_present_share_buffer", 0LL);
+  output_qk_ = info.GetAttrOrDefault<int64_t>("output_qk", 0LL);
 }
 
 template <typename T1, typename T2>
@@ -98,7 +100,7 @@ Status DecoderMaskedMultiHeadAttention<T1, T2>::ComputeInternal(OpKernelContext*
 
   // This kernel is for decoding only (i.e.) sequence length has to be 1
   if (sequence_length != 1) {
-    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input sequence length should be 1 to use DecoderMaskedMultiHeadAttention");
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input sequence length should be 1 to use DecoderMaskedMultiHeadAttention. Actual length is ", sequence_length);
   }
 
   if (parameters.head_size != parameters.v_head_size) {
@@ -125,6 +127,7 @@ Status DecoderMaskedMultiHeadAttention<T1, T2>::ComputeInternal(OpKernelContext*
   TensorShape present_shape(present_dims);
   Tensor* present_key = context->Output(kPresentOutputIndex, present_shape);
   Tensor* present_value = context->Output(kPresentOutputIndex + 1, present_shape);
+  Tensor* cross_qk = nullptr;
 
   auto cuda_stream = Stream(context);
 
@@ -191,6 +194,13 @@ Status DecoderMaskedMultiHeadAttention<T1, T2>::ComputeInternal(OpKernelContext*
     parameters.v_cache = present_value_data;
   }
 
+  if (output_qk_) {
+    int64_t qk_dims[] = {parameters.batch_size, parameters.num_heads, 1, parameters.total_sequence_length};
+    TensorShape qk_shape(&qk_dims[0], sizeof(qk_dims) / sizeof(qk_dims[0]));
+    cross_qk = context->Output(kQKOutputIndex, qk_shape);
+    parameters.out_qk = cross_qk->MutableData<float>();
+  }
+
   parameters.out = output->MutableDataRaw();
 
   // Scale
diff --git a/onnxruntime/contrib_ops/cuda/bert/decoder_masked_multihead_attention.h b/onnxruntime/contrib_ops/cuda/bert/decoder_masked_multihead_attention.h
index 8200a66db383f..b5476e6b54c44 100644
--- a/onnxruntime/contrib_ops/cuda/bert/decoder_masked_multihead_attention.h
+++ b/onnxruntime/contrib_ops/cuda/bert/decoder_masked_multihead_attention.h
@@ -22,6 +22,7 @@ class DecoderMaskedMultiHeadAttention final : public CudaKernel {
   float mask_filter_value_;
   float scale_;
   bool past_present_share_buffer_;
+  bool output_qk_;
 };
 
 }  // namespace cuda
diff --git a/onnxruntime/contrib_ops/cuda/bert/embed_layer_norm_impl.cu b/onnxruntime/contrib_ops/cuda/bert/embed_layer_norm_impl.cu
index a2dfca8cd6f09..ae53eca541fa5 100644
--- a/onnxruntime/contrib_ops/cuda/bert/embed_layer_norm_impl.cu
+++ b/onnxruntime/contrib_ops/cuda/bert/embed_layer_norm_impl.cu
@@ -86,10 +86,10 @@ __global__ void MaskIndexKernel(int sequence_length, const int* mask, int* mask_
 }
 
 inline Status ComputeMaskIndex(cudaStream_t stream,
-                             const int sequence_length,
-                             const int batch_size,
-                             const int* mask,
-                             int* mask_index) {
+                               const int sequence_length,
+                               const int batch_size,
+                               const int* mask,
+                               int* mask_index) {
   // Mask idx is of length batch_size and assumes the valid region is contiguous starting
   // from the beginning of the sequence
 
@@ -133,7 +133,7 @@ __global__ void EmbedLayerNormKernel(
     }
     if (nullptr == position_ids) {
       position_id = blockIdx.x;
-    } else if (broadcast_position_ids){
+    } else if (broadcast_position_ids) {
       position_id = position_ids[sequence_position % gridDim.x];
     } else {
       position_id = position_ids[sequence_position];
@@ -212,13 +212,12 @@ Status LaunchEmbedLayerNormKernel(
     void* embedding_sum,
     const int* position_ids,
     const bool broadcast_position_ids) {
-
   if (mask_index != nullptr) {
     if (nullptr == input_mask) {
       CUDA_RETURN_IF_ERROR(cudaMemsetAsync(mask_index, 0, sizeof(int) * batch_size, stream));
     } else {
       ORT_RETURN_IF_ERROR(
-        ComputeMaskIndex(stream, sequence_length, batch_size, input_mask, static_cast<int*>(mask_index)));
+          ComputeMaskIndex(stream, sequence_length, batch_size, input_mask, static_cast<int*>(mask_index)));
     }
   }
 
diff --git a/onnxruntime/contrib_ops/cuda/bert/fast_gelu_impl.cu b/onnxruntime/contrib_ops/cuda/bert/fast_gelu_impl.cu
index 1b0de47a834ec..c9498eb1bcd7b 100644
--- a/onnxruntime/contrib_ops/cuda/bert/fast_gelu_impl.cu
+++ b/onnxruntime/contrib_ops/cuda/bert/fast_gelu_impl.cu
@@ -66,7 +66,7 @@ __global__ void FastGeluKernel2(const half2 a, const half2 b, const half2 c, int
 
 template <>
 Status LaunchFastGeluKernel(const cudaDeviceProp& prop, cudaStream_t stream, int input_length, int bias_length,
-                          const float* input, const float* bias, float* output, bool /*use_half2*/) {
+                            const float* input, const float* bias, float* output, bool /*use_half2*/) {
   constexpr int blockSize = 256;
   const int gridSize = (input_length + blockSize - 1) / blockSize;
   FastGeluKernel<float, blockSize><<<gridSize, blockSize, 0, stream>>>(A, B, C, input_length, bias_length,
@@ -77,7 +77,7 @@ Status LaunchFastGeluKernel(const cudaDeviceProp& prop, cudaStream_t stream, int
 
 template <>
 Status LaunchFastGeluKernel(const cudaDeviceProp& prop, cudaStream_t stream, int input_length, int bias_length,
-                          const half* input, const half* bias, half* output, bool use_half2) {
+                            const half* input, const half* bias, half* output, bool use_half2) {
   constexpr int blockSize = 256;
   if (use_half2 && 0 == (bias_length & 1) && prop.major >= 7) {
     const int n = input_length / 2;
@@ -101,7 +101,7 @@ Status LaunchFastGeluKernel(const cudaDeviceProp& prop, cudaStream_t stream, int
 
 template <>
 Status LaunchFastGeluKernel(const cudaDeviceProp& prop, cudaStream_t stream, int input_length, int bias_length,
-                          const BFloat16* input, const BFloat16* bias, BFloat16* output, bool /*use_half2*/) {
+                            const BFloat16* input, const BFloat16* bias, BFloat16* output, bool /*use_half2*/) {
   constexpr int blockSize = 256;
 
   // remove nv_bfloat162 implementation for now to fix build issue
diff --git a/onnxruntime/contrib_ops/cuda/bert/fastertransformer_decoder_attention/decoder_masked_multihead_attention_impl.cu b/onnxruntime/contrib_ops/cuda/bert/fastertransformer_decoder_attention/decoder_masked_multihead_attention_impl.cu
index c8877a5e3f872..33e7a33494778 100644
--- a/onnxruntime/contrib_ops/cuda/bert/fastertransformer_decoder_attention/decoder_masked_multihead_attention_impl.cu
+++ b/onnxruntime/contrib_ops/cuda/bert/fastertransformer_decoder_attention/decoder_masked_multihead_attention_impl.cu
@@ -427,6 +427,15 @@ __global__ void masked_multihead_attention_kernel(DecoderMaskedMultiHeadAttentio
   // Compute the logits and start the sum.
   float sum = 0.f;
   int sum_tlength = params.is_cross_attention ? tlength - 1 : tlength;
+
+  if (params.out_qk != nullptr) {
+    // store cross qk before softmax, out_qk has shape [B(batchxbeam), #Head, 1, total_sequence_length]
+    float* target = ((float*)params.out_qk) + ((int64_t)bhi * tlength);
+    for (int ti = tidx; ti <= sum_tlength; ti += THREADS_PER_BLOCK) {
+      target[ti] = (float)(qk_smem[ti]);
+    }
+  }
+
   for (int ti = tidx; ti <= sum_tlength; ti += THREADS_PER_BLOCK) {
     // This is a deviation from FasterTransformer kernel implementation
     // but this aligns with ORT's other Attention kernels which strives to
diff --git a/onnxruntime/contrib_ops/cuda/bert/fastertransformer_decoder_attention/decoder_masked_multihead_attention_impl.h b/onnxruntime/contrib_ops/cuda/bert/fastertransformer_decoder_attention/decoder_masked_multihead_attention_impl.h
index 6d7f368db4dd4..4b408dafa2d81 100644
--- a/onnxruntime/contrib_ops/cuda/bert/fastertransformer_decoder_attention/decoder_masked_multihead_attention_impl.h
+++ b/onnxruntime/contrib_ops/cuda/bert/fastertransformer_decoder_attention/decoder_masked_multihead_attention_impl.h
@@ -37,6 +37,7 @@ struct DecoderMaskedMultiHeadAttentionParams : AttentionParameters {
   void* v_cache = nullptr;
 
   void* out = nullptr;
+  void* out_qk = nullptr;
 
   const int32_t* cache_indir = nullptr;
   const int32_t* mask = nullptr;  // [B, total_sequence_length]
diff --git a/onnxruntime/contrib_ops/cuda/bert/flash_attention/block_info.h b/onnxruntime/contrib_ops/cuda/bert/flash_attention/block_info.h
index 9db98061bbd66..811b1be7d4315 100644
--- a/onnxruntime/contrib_ops/cuda/bert/flash_attention/block_info.h
+++ b/onnxruntime/contrib_ops/cuda/bert/flash_attention/block_info.h
@@ -12,9 +12,13 @@ struct BlockInfo {
   template <typename Params>
   __device__ BlockInfo(const Params& params, const int bidb)
       : sum_s_q(!Varlen || params.cu_seqlens_q == nullptr ? -1 : params.cu_seqlens_q[bidb]),
-        sum_s_k(!Varlen || params.cu_seqlens_k == nullptr ? -1 : params.cu_seqlens_k[bidb]),
-        actual_seqlen_q(!Varlen || params.cu_seqlens_q == nullptr ? params.seqlen_q : params.cu_seqlens_q[bidb + 1] - sum_s_q),
-        actual_seqlen_k(!Varlen || params.cu_seqlens_k == nullptr ? params.seqlen_k : params.cu_seqlens_k[bidb + 1] - sum_s_k) {
+        sum_s_k(!Varlen || params.cu_seqlens_k == nullptr || !params.is_seqlens_k_cumulative ? -1 : params.cu_seqlens_k[bidb]),
+        actual_seqlen_q(!Varlen || params.cu_seqlens_q == nullptr ? params.seqlen_q : params.cu_seqlens_q[bidb + 1] - sum_s_q)
+        // If is_seqlens_k_cumulative, then seqlen_k is cu_seqlens_k[bidb + 1] - cu_seqlens_k[bidb].
+        // Otherwise it's cu_seqlens_k[bidb], i.e., we use cu_seqlens_k to store the sequence lengths of K.
+        ,
+        seqlen_k_cache(!Varlen || params.cu_seqlens_k == nullptr ? params.seqlen_k : (params.is_seqlens_k_cumulative ? params.cu_seqlens_k[bidb + 1] - sum_s_k : params.cu_seqlens_k[bidb])),
+        actual_seqlen_k(seqlen_k_cache + (params.knew_ptr == nullptr ? 0 : params.seqlen_knew)) {
   }
 
   template <typename index_t>
@@ -30,6 +34,8 @@ struct BlockInfo {
   const int sum_s_q;
   const int sum_s_k;
   const int actual_seqlen_q;
+  // We have to have seqlen_k_cache declared before actual_seqlen_k, otherwise actual_seqlen_k is set to 0.
+  const int seqlen_k_cache;
   const int actual_seqlen_k;
 };
 
diff --git a/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash.h b/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash.h
index 9394a19c9897a..cbe536c6ce45a 100644
--- a/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash.h
+++ b/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash.h
@@ -18,68 +18,112 @@ constexpr int D_DIM = 2;
 struct Qkv_params {
   using index_t = uint32_t;
   // The QKV matrices.
-  void* __restrict__ q_ptr;
-  void* __restrict__ k_ptr;
-  void* __restrict__ v_ptr;
+  void* __restrict__ q_ptr = nullptr;
+  void* __restrict__ k_ptr = nullptr;
+  void* __restrict__ v_ptr = nullptr;
 
   // The stride between rows of the Q, K and V matrices.
-  index_t q_batch_stride;
-  index_t k_batch_stride;
-  index_t v_batch_stride;
-  index_t q_row_stride;
-  index_t k_row_stride;
-  index_t v_row_stride;
-  index_t q_head_stride;
-  index_t k_head_stride;
-  index_t v_head_stride;
+  index_t q_batch_stride = 0;
+  index_t k_batch_stride = 0;
+  index_t v_batch_stride = 0;
+  index_t q_row_stride = 0;
+  index_t k_row_stride = 0;
+  index_t v_row_stride = 0;
+  index_t q_head_stride = 0;
+  index_t k_head_stride = 0;
+  index_t v_head_stride = 0;
 
   // The number of heads.
-  int h, h_k;
+  int h = 0;
+  int h_k = 0;
   // In the case of multi-query and grouped-query attention (MQA/GQA), nheads_k could be
   // different from nheads (query).
-  int h_h_k_ratio;  // precompute h / h_k,
+  int h_h_k_ratio = 0;  // precompute h / h_k,
 };
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
 struct Flash_fwd_params : public Qkv_params {
   // The O matrix (output).
-  void* __restrict__ o_ptr;
+  void* __restrict__ o_ptr = nullptr;
+  void* __restrict__ oaccum_ptr = nullptr;
 
   // The stride between rows of O.
-  index_t o_batch_stride;
-  index_t o_row_stride;
-  index_t o_head_stride;
+  index_t o_batch_stride = 0;
+  index_t o_row_stride = 0;
+  index_t o_head_stride = 0;
 
   // The pointer to the P matrix.
-  void* __restrict__ p_ptr;
+  void* __restrict__ p_ptr = nullptr;
 
   // The pointer to the softmax sum.
-  void* __restrict__ softmax_lse_ptr;
+  void* __restrict__ softmax_lse_ptr = nullptr;
+  void* __restrict__ softmax_lseaccum_ptr = nullptr;
 
   // The dimensions.
-  int b, seqlen_q, seqlen_k, d, seqlen_q_rounded, seqlen_k_rounded, d_rounded;
+  int b = 0;
+  int seqlen_q = 0;
+  int seqlen_k = 0;
+  int seqlen_knew = 0;
+  int d = 0;
+  int seqlen_q_rounded = 0;
+  int seqlen_k_rounded = 0;
+  int d_rounded = 0;
+  int rotary_dim = 0;
 
   // The scaling factors for the kernel.
-  float scale_softmax;
-  float scale_softmax_log2;
+  float scale_softmax = 0.0;
+  float scale_softmax_log2 = 0.0;
 
   // array of length b+1 holding starting offset of each sequence.
-  int* __restrict__ cu_seqlens_q;
-  int* __restrict__ cu_seqlens_k;
+  int* __restrict__ cu_seqlens_q = nullptr;
+  int* __restrict__ cu_seqlens_k = nullptr;
 
-  int* __restrict__ blockmask;
+  int* __restrict__ blockmask = nullptr;
+
+  // The K_new and V_new matrices.
+  void* __restrict__ knew_ptr = nullptr;
+  void* __restrict__ vnew_ptr = nullptr;
+
+  // The stride between rows of the Q, K and V matrices.
+  index_t knew_batch_stride = 0;
+  index_t vnew_batch_stride = 0;
+  index_t knew_row_stride = 0;
+  index_t vnew_row_stride = 0;
+  index_t knew_head_stride = 0;
+  index_t vnew_head_stride = 0;
+
+  // The cos and sin matrices for rotary embedding.
+  void* __restrict__ rotary_cos_ptr = nullptr;
+  void* __restrict__ rotary_sin_ptr = nullptr;
+
+  // The indices to index into the KV cache.
+  int* __restrict__ cache_batch_idx = nullptr;
+
+  // Local window size
+  int window_size_left = -1;
+  int window_size_right = -1;
 
   bool is_bf16 = false;
-  bool is_causal;
+  bool is_causal = false;
+
+  // If is_seqlens_k_cumulative, then seqlen_k is cu_seqlens_k[bidb + 1] - cu_seqlens_k[bidb].
+  // Otherwise it's cu_seqlens_k[bidb], i.e., we use cu_seqlens_k to store the sequence lengths of K.
+  bool is_seqlens_k_cumulative = true;
+
+  bool is_rotary_interleaved = false;
 
-  const cudaDeviceProp* dprops;
+  int num_splits = 0;  // For split-KV version
+
+  const cudaDeviceProp* dprops = nullptr;
 };
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
 template <typename T, int Headdim>
 void run_mha_fwd_(Flash_fwd_params& params, cudaStream_t stream);
+template <typename T, int Headdim>
+void run_mha_fwd_splitkv_dispatch(Flash_fwd_params& params, cudaStream_t stream);
 
 }  // namespace flash
-}  // namespace onnxruntime
\ No newline at end of file
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_api.cc b/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_api.cc
index 87831d1eddfe9..76190aad68fdb 100644
--- a/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_api.cc
+++ b/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_api.cc
@@ -34,24 +34,39 @@ void set_params_fprop(Flash_fwd_params& params,
                       void* p_d,
                       void* softmax_lse_d,
                       float softmax_scale,
-                      bool is_causal) {
+                      bool is_causal,
+                      bool kv_bsnh = true,
+                      int window_size_left = -1,
+                      int window_size_right = -1) {
   // Set the pointers and strides.
   params.q_ptr = q;
   params.k_ptr = k;
   params.v_ptr = v;
   params.o_ptr = out;
 
-  // All stride are in elements, not bytes.
-  params.q_row_stride = num_heads * head_size;
-  params.k_row_stride = num_heads_k * head_size;
-  params.v_row_stride = num_heads * head_size;
-  params.q_head_stride = head_size;
-  params.k_head_stride = head_size;
-  params.v_head_stride = head_size;
-  params.o_row_stride = num_heads * head_size;
-  params.o_head_stride = head_size;
   params.is_bf16 = false;
 
+  // All stride are in elements, not bytes.
+  if (kv_bsnh) {
+    params.q_row_stride = num_heads * head_size;
+    params.k_row_stride = num_heads_k * head_size;
+    params.v_row_stride = num_heads_k * head_size;
+    params.q_head_stride = head_size;
+    params.k_head_stride = head_size;
+    params.v_head_stride = head_size;
+    params.o_row_stride = num_heads * head_size;
+    params.o_head_stride = head_size;
+  } else {
+    params.q_row_stride = num_heads * head_size;
+    params.k_row_stride = head_size;
+    params.v_row_stride = head_size;
+    params.q_head_stride = head_size;
+    params.k_head_stride = seqlen_k * head_size;
+    params.v_head_stride = seqlen_k * head_size;
+    params.o_row_stride = num_heads * head_size;
+    params.o_head_stride = head_size;
+  }
+
   if (cu_seqlens_q_d == nullptr) {
     params.q_batch_stride = seqlen_q * num_heads * head_size;    // stride(0)
     params.k_batch_stride = seqlen_k * num_heads_k * head_size;  // stride(0)
@@ -89,7 +104,22 @@ void set_params_fprop(Flash_fwd_params& params,
   params.scale_softmax = softmax_scale;
   params.scale_softmax_log2 = softmax_scale * M_LOG2E;
 
+  // In our API, causal/unidirectional determines if we only look at prior tokens. However, the flash API seperates
+  // local and causal, meaning when we have local window size
   params.is_causal = is_causal;
+  if (is_causal && (window_size_left >= 0 || window_size_right != 0)) {
+    params.is_causal = false;
+  }
+  if (window_size_left < 0 && window_size_right >= 0) {
+    window_size_left = seqlen_k;
+  }
+  if (window_size_left >= 0 && window_size_right < 0) {
+    window_size_right = seqlen_k;
+  }
+  params.window_size_left = window_size_left;
+  params.window_size_right = window_size_right;
+
+  params.is_seqlens_k_cumulative = true;
 }
 
 size_t get_softmax_lse_size(int seqlen, int batch_size, int num_heads) {
@@ -97,14 +127,104 @@ size_t get_softmax_lse_size(int seqlen, int batch_size, int num_heads) {
   return bytes;
 }
 
-void run_mha_fwd(Flash_fwd_params& params, cudaStream_t stream) {
+size_t get_softmax_lse_accum_size(int num_splits, int batch_size, int num_heads, int seqlen_q) {
+  size_t bytes = sizeof(float) * num_splits * batch_size * seqlen_q * num_heads;
+  return bytes;
+}
+
+size_t get_out_accum_size(int num_splits, int batch_size, int num_heads, int seqlen_q, int head_size_rounded) {
+  size_t bytes = sizeof(float) * num_splits * batch_size * seqlen_q * num_heads * head_size_rounded;
+  return bytes;
+}
+
+void run_mha_fwd(Flash_fwd_params& params, cudaStream_t stream, bool force_split_kernel = false) {
   FP16_SWITCH(!params.is_bf16, [&] {
     FWD_HEADDIM_SWITCH(params.d, [&] {
-      run_mha_fwd_<elem_type, kHeadDim>(params, stream);
+      if (params.num_splits <= 1 && !force_split_kernel) {  // If we don't set it num_splits == 0
+        run_mha_fwd_<elem_type, kHeadDim>(params, stream);
+      } else {
+        run_mha_fwd_splitkv_dispatch<elem_type, kHeadDim>(params, stream);
+      }
     });
   });
 }
 
+// Find the number of splits that maximizes the occupancy. For example, if we have
+// batch * n_heads = 48 and we have 108 SMs, having 2 splits (efficiency = 0.89) is
+// better than having 3 splits (efficiency = 0.67). However, we also don't want too many
+// splits as that would incur more HBM reads/writes.
+// So we find the best efficiency, then find the smallest number of splits that gets 85%
+// of the best efficiency.
+int num_splits_heuristic(int batch_size, int seqlen_q, int seqlen_k, int num_heads, int head_size, int num_SMs,
+                         int max_splits) {
+  // This needs to match with run_mha_fwd_splitkv_dispatch
+  const int block_n = head_size <= 64 ? 256 : (head_size <= 128 ? 128 : 64);
+  const int num_n_blocks = (seqlen_k + block_n - 1) / block_n;
+  // Technically kBlockM = 64 only for the splitKV kernels, not the standard kernel.
+  // In any case we don't expect seqlen_q to be larger than 64 for inference.
+  const int num_m_blocks = (seqlen_q + 64 - 1) / 64;
+  int batch_nheads_mblocks = batch_size * num_heads * num_m_blocks;
+  // If we have enough to almost fill the SMs, then just use 1 split
+  if (batch_nheads_mblocks >= 0.8f * num_SMs) {
+    return 1;
+  }
+  max_splits = std::min({max_splits, num_SMs, num_n_blocks});
+  float max_efficiency = 0.f;
+  std::vector<float> efficiency;
+  efficiency.reserve(max_splits);
+  auto ceildiv = [](int a, int b) { return (a + b - 1) / b; };
+  // Some splits are not eligible. For example, if we have 64 blocks and choose 11 splits,
+  // we'll have 6 * 10 + 4 blocks. If we choose 12 splits, we'll have 6 * 11 + (-2) blocks
+  // (i.e. it's 11 splits anyway).
+  // So we check if the number of blocks per split is the same as the previous num_splits.
+  auto is_split_eligible = [&ceildiv, &num_n_blocks](int num_splits) {
+    return num_splits == 1 || ceildiv(num_n_blocks, num_splits) != ceildiv(num_n_blocks, num_splits - 1);
+  };
+  for (int num_splits = 1; num_splits <= max_splits; num_splits++) {
+    if (!is_split_eligible(num_splits)) {
+      efficiency.push_back(0.f);
+    } else {
+      float n_waves = float(batch_nheads_mblocks * num_splits) / num_SMs;
+      float eff = n_waves / ceil(n_waves);
+      // printf("num_splits = %d, eff = %f\n", num_splits, eff);
+      if (eff > max_efficiency) {
+        max_efficiency = eff;
+      }
+      efficiency.push_back(eff);
+    }
+  }
+  for (int num_splits = 1; num_splits <= max_splits; num_splits++) {
+    if (!is_split_eligible(num_splits)) {
+      continue;
+    }
+    if (efficiency[num_splits - 1] >= 0.85 * max_efficiency) {
+      // printf("num_splits chosen = %d\n", num_splits);
+      return num_splits;
+    }
+  }
+  return 1;
+}
+
+// Returns (num_splits, softmax_lse_accum bytes, out_accum bytes)
+std::tuple<int, int, int> get_num_splits_and_buffer_sizes(int batch_size, int seqlen_q, int seqlen_k, int num_heads,
+                                                          int head_size, int num_SMs) {
+  int max_splits = 128;
+  // split kv buffers
+  int num_splits = num_splits_heuristic(batch_size, seqlen_q, seqlen_k, num_heads, head_size,
+                                        num_SMs, max_splits);
+  if (num_splits > 1) {
+    // softmax_lse_accum buffer
+    int softmax_lse_accum_bytes = get_softmax_lse_accum_size(num_splits, batch_size, num_heads, seqlen_q);
+    // out_accum buffer
+    auto round_multiple = [](int x, int m) { return (x + m - 1) / m * m; };
+    const int head_size_rounded = round_multiple(head_size, 32);
+    int out_accum_bytes = get_out_accum_size(num_splits, batch_size, num_heads, seqlen_q, head_size_rounded);
+    return {num_splits, softmax_lse_accum_bytes, out_accum_bytes};
+  } else {
+    return {0, 0, 0};
+  }
+}
+
 Status mha_fwd(const cudaDeviceProp& dprops,
                cudaStream_t stream,
                void* q,            // batch_size x seqlen_q x num_heads x head_size
@@ -119,14 +239,18 @@ Status mha_fwd(const cudaDeviceProp& dprops,
                int seqlen_q,
                int seqlen_k,
                float softmax_scale,
-               bool is_causal) {
+               bool is_causal,
+               int num_splits,
+               void* softmax_lse_accum,  // num_splits x batch_size x seqlen_q x num_heads
+               void* out_accum,          // num_splits x batch_size x seqlen_q x num_heads x head_size_rounded
+               bool kv_bsnh,
+               int local_window_size) {
   auto round_multiple = [](int x, int m) { return (x + m - 1) / m * m; };
   const int head_size_rounded = round_multiple(head_size, 32);
   const int seqlen_q_rounded = round_multiple(seqlen_q, 128);
   const int seqlen_k_rounded = round_multiple(seqlen_k, 128);
 
   Flash_fwd_params params;
-  params.dprops = &dprops;
   set_params_fprop(params,
                    batch_size,
                    seqlen_q, seqlen_k,
@@ -139,7 +263,28 @@ Status mha_fwd(const cudaDeviceProp& dprops,
                    nullptr,
                    softmax_lse,
                    softmax_scale,
-                   is_causal);
+                   is_causal,
+                   kv_bsnh,
+                   local_window_size,
+                   is_causal ? 0 : -1);
+  params.dprops = &dprops;
+  params.knew_ptr = nullptr;
+  params.vnew_ptr = nullptr;
+  params.knew_batch_stride = 0;
+  params.vnew_batch_stride = 0;
+  params.knew_row_stride = 0;
+  params.vnew_row_stride = 0;
+  params.knew_head_stride = 0;
+  params.vnew_head_stride = 0;
+
+  params.num_splits = num_splits;
+  if (params.num_splits > 1 && softmax_lse_accum != nullptr && out_accum != nullptr) {
+    params.softmax_lseaccum_ptr = softmax_lse_accum;
+    params.oaccum_ptr = out_accum;
+  } else {
+    params.softmax_lseaccum_ptr = nullptr;
+    params.oaccum_ptr = nullptr;
+  }
 
   run_mha_fwd(params, stream);
   return Status::OK();
@@ -168,7 +313,6 @@ Status mha_varlen_fwd(const cudaDeviceProp& dprops,
   const int seqlen_k_rounded = round_multiple(max_seqlen_k, 128);
 
   Flash_fwd_params params;
-  params.dprops = &dprops;
   set_params_fprop(params,
                    batch_size,
                    max_seqlen_q, max_seqlen_k,
@@ -181,7 +325,16 @@ Status mha_varlen_fwd(const cudaDeviceProp& dprops,
                    nullptr,
                    softmax_lse,
                    softmax_scale,
-                   is_causal);
+                   is_causal,
+                   true,
+                   -1,
+                   is_causal ? 0 : -1);
+  params.dprops = &dprops;
+  params.num_splits = 0;
+  params.softmax_lseaccum_ptr = nullptr;
+  params.oaccum_ptr = nullptr;
+  params.knew_ptr = nullptr;
+  params.vnew_ptr = nullptr;
   run_mha_fwd(params, stream);
   return Status::OK();
 }
@@ -192,6 +345,103 @@ bool is_supported(const cudaDeviceProp& dprops, int head_size, int num_heads, in
   return (is_sm8x || is_sm90) && (head_size % 8 == 0) && (head_size <= 256) && (num_heads % num_heads_k == 0);
 }
 
+// This API is used when past key and value are present... since cached, these are assumed to have sequence length
+// of max_sequence_length, so seqlen_k == max_sequence_length. The actual past sequence length is held in seqlens_k_.
+Status mha_fwd_kvcache(const cudaDeviceProp& dprops,
+                       cudaStream_t stream,
+                       void* q,            // batch_size x seqlen_q x num_heads x head_size
+                       void* kcache,       // batch_size x seqlen_k x num_heads_k x head_size or batch_size x num_heads_k seqlen_k x head_size
+                       void* vcache,       // batch_size x seqlen_k x num_heads_k x head_size or batch_size x num_heads_k seqlen_k x head_size
+                       void* k,            // (optional) batch_size x seqlen_k_new x num_heads_k x head_size
+                       void* v,            // (optional) batch_size x seqlen_k_new x num_heads_k x head_size
+                       void* out,          // batch_size x seqlen_q x num_heads x head_size
+                       void* softmax_lse,  // batch_size x num_heads x seqlen_q
+                       void* seqlens_k_,   // batch_size
+                       int batch_size,
+                       int num_heads,
+                       int num_heads_k,
+                       int head_size,
+                       int seqlen_q,
+                       int seqlen_k,
+                       int seqlen_k_new,
+                       const float softmax_scale,
+                       bool is_causal,
+                       bool past_bsnh,  // otherwise bnsh
+                       int num_splits,
+                       void* softmax_lse_accum,  // num_splits x batch_size x seqlen_q x num_heads
+                       void* out_accum,          // num_splits x batch_size x seqlen_q x num_heads x head_size_rounded
+                       int local_window_size) {
+  // if (seqlen_q == 1) {
+  //   is_causal = false;
+  // }  // causal=true is the same as causal=false in this case
+
+  auto round_multiple = [](int x, int m) { return (x + m - 1) / m * m; };
+  const int head_size_rounded = round_multiple(head_size, 32);
+  const int seqlen_q_rounded = round_multiple(seqlen_q, 128);
+  const int seqlen_k_rounded = round_multiple(seqlen_k, 128);
+
+  Flash_fwd_params params;
+  set_params_fprop(params,
+                   batch_size,
+                   seqlen_q, seqlen_k,
+                   seqlen_q_rounded, seqlen_k_rounded,
+                   num_heads, num_heads_k,
+                   head_size, head_size_rounded,
+                   q, kcache, vcache, out,
+                   /*cu_seqlens_q_d=*/nullptr,
+                   /*cu_seqlens_k_d=*/nullptr,
+                   /*p_ptr=*/nullptr,
+                   softmax_lse,
+                   softmax_scale,
+                   is_causal,
+                   past_bsnh,
+                   local_window_size,
+                   is_causal ? 0 : -1);
+  params.dprops = &dprops;
+
+  if (k != nullptr && v != nullptr) {
+    params.seqlen_knew = seqlen_k_new;
+    params.knew_ptr = k;
+    params.vnew_ptr = v;
+    // All stride are in elements, not bytes.
+    params.knew_batch_stride = seqlen_k_new * num_heads_k * head_size;
+    params.vnew_batch_stride = seqlen_k_new * num_heads_k * head_size;
+    params.knew_row_stride = num_heads_k * head_size;
+    params.vnew_row_stride = num_heads_k * head_size;
+    params.knew_head_stride = head_size;
+    params.vnew_head_stride = head_size;
+  } else {
+    params.seqlen_knew = 0;
+    params.knew_ptr = nullptr;
+    params.vnew_ptr = nullptr;
+    params.knew_batch_stride = 0;
+    params.vnew_batch_stride = 0;
+    params.knew_row_stride = 0;
+    params.vnew_row_stride = 0;
+    params.knew_head_stride = 0;
+    params.vnew_head_stride = 0;
+  }
+
+  params.is_seqlens_k_cumulative = seqlens_k_ == nullptr;
+  if (seqlens_k_ != nullptr) {
+    params.cu_seqlens_k = static_cast<int*>(seqlens_k_);
+  }
+
+  params.num_splits = num_splits;
+  if (params.num_splits > 1 && softmax_lse_accum != nullptr && out_accum != nullptr) {
+    params.softmax_lseaccum_ptr = softmax_lse_accum;
+    params.oaccum_ptr = out_accum;
+  } else {
+    params.softmax_lseaccum_ptr = nullptr;
+    params.oaccum_ptr = nullptr;
+  }
+
+  // Only split kernel supports appending to KV cache
+  run_mha_fwd(params, stream, /*force_split_kernel=*/k != nullptr);
+
+  return Status::OK();
+}
+
 }  // namespace flash
 }  // namespace onnxruntime
 
diff --git a/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_api.h b/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_api.h
index 2ae46d34c373a..efc1f565c4fa0 100644
--- a/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_api.h
+++ b/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_api.h
@@ -31,9 +31,11 @@
 #if USE_FLASH_ATTENTION
 
 #include "core/providers/cuda/cuda_common.h"
+#include <tuple>
 
 namespace onnxruntime {
 namespace flash {
+
 Status mha_fwd(const cudaDeviceProp& dprops,
                cudaStream_t stream,
                void* q,            // batch_size x seqlen_q x num_heads x head_size
@@ -48,7 +50,12 @@ Status mha_fwd(const cudaDeviceProp& dprops,
                int seqlen_q,
                int seqlen_k,
                float softmax_scale,
-               bool is_causal);
+               bool is_causal,
+               int num_splits = 0,
+               void* softmax_lse_accum = nullptr,  // num_splits x batch_size x seqlen_q x num_heads
+               void* out_accum = nullptr,          // num_splits x batch_size x seqlen_q x num_heads x head_size_rounded
+               bool kv_bsnh = true,
+               int local_window_size = -1);
 
 Status mha_varlen_fwd(const cudaDeviceProp& dprops,
                       cudaStream_t stream,
@@ -68,8 +75,36 @@ Status mha_varlen_fwd(const cudaDeviceProp& dprops,
                       float softmax_scale,
                       bool is_causal);
 
+Status mha_fwd_kvcache(const cudaDeviceProp& dprops,
+                       cudaStream_t stream,
+                       void* q,            // batch_size x seqlen_q x num_heads x head_size
+                       void* kcache,       // batch_size x seqlen_k x num_heads_k x head_size or batch_size x num_heads_k seqlen_k x x head_size
+                       void* vcache,       // batch_size x seqlen_k x num_heads_k x head_size or batch_size x num_heads_k seqlen_k x x head_size
+                       void* k,            // batch_size x seqlen_k_new x num_heads_k x head_size
+                       void* v,            // batch_size x seqlen_k_new x num_heads_k x head_size
+                       void* out,          // batch_size x seqlen_q x num_heads x head_size
+                       void* softmax_lse,  // batch_size x num_heads x seqlen_q
+                       void* seqlens_k_,   // batch_size
+                       int batch_size,
+                       int num_heads,
+                       int num_heads_k,
+                       int head_size,
+                       int seqlen_q,
+                       int seqlen_k,
+                       int seqlen_k_new,
+                       const float softmax_scale,
+                       bool is_causal,
+                       bool past_bsnh,  // otherwise bnsh
+                       int num_splits = 0,
+                       void* softmax_lse_accum = nullptr,  // num_splits x batch_size x seqlen_q x num_heads
+                       void* out_accum = nullptr,          // num_splits x batch_size x seqlen_q x num_heads x head_size_rounded
+                       int local_window_size = -1);
+
 size_t get_softmax_lse_size(int max_seqlen_q, int batch_size, int num_heads);
 
+std::tuple<int, int, int> get_num_splits_and_buffer_sizes(int batch_size, int seqlen_q, int seqlen_k, int num_heads,
+                                                          int head_size, int num_SMs);
+
 bool is_supported(const cudaDeviceProp& dprops, int head_size, int num_heads, int num_heads_k);
 
 }  // namespace flash
diff --git a/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_kernel.h b/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_kernel.h
index b5af31e432d42..028233f66850f 100644
--- a/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_kernel.h
+++ b/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_kernel.h
@@ -29,47 +29,6 @@ using namespace cute;
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
-template <int MMA_M,
-          class... Args,
-          class TiledMMA>
-CUTE_HOST_DEVICE auto
-make_tiled_copy_A_warpcontiguousM(Copy_Atom<Args...> const& copy_atom,
-                                  TiledMMA const& tiled_mma) {
-  using TileShape_MNK = typename TiledMMA::TiledShape_MNK;
-  using AtomShape_MNK = typename TiledMMA::AtomShape_MNK;
-  constexpr int AtomShape_M = decltype(cute::size<0>(AtomShape_MNK{}))::value;
-  constexpr int kNWarps = decltype(cute::size<0>(TileShape_MNK{}))::value / AtomShape_M;
-  constexpr int MMAStride_M = MMA_M * AtomShape_M;
-  auto t = make_tile(cute::Layout<cute::Shape<cute::Int<AtomShape_M>, cute::Int<kNWarps>>,
-                                  cute::Stride<_1, cute::Int<MMAStride_M>>>{},
-                     make_layout(cute::size<2>(TileShape_MNK{})));
-
-  return make_tiled_copy_impl(copy_atom, tiled_mma.get_layoutA_TV(), t);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template <int MMA_M,
-          class... Args,
-          class TiledMMA>
-CUTE_HOST_DEVICE auto
-make_tiled_copy_C_warpcontiguousM(Copy_Atom<Args...> const& copy_atom,
-                                  TiledMMA const& tiled_mma) {
-  using TileShape_MNK = typename TiledMMA::TiledShape_MNK;
-  using AtomShape_MNK = typename TiledMMA::AtomShape_MNK;
-  constexpr int AtomShape_M = decltype(cute::size<0>(AtomShape_MNK{}))::value;
-  constexpr int kNWarps = decltype(cute::size<0>(TileShape_MNK{}))::value / AtomShape_M;
-  constexpr int MMAStride_M = MMA_M * AtomShape_M;
-  auto t = make_tile(cute::Layout<cute::Shape<cute::Int<AtomShape_M>, cute::Int<kNWarps>>,
-                                  cute::Stride<_1, cute::Int<MMAStride_M>>>{},
-                     // TODO: Shouldn't this be size<1>?
-                     make_layout(cute::size<2>(TileShape_MNK{})));
-  // if (cute::thread0()) {printf("make_tiled_copy_C_warpcontiguousM "); print(t); printf("\n");  }
-  return make_tiled_copy_impl(copy_atom, tiled_mma.get_layoutC_TV(), t);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
 template <bool Is_first, bool Check_inf = false, typename Tensor0, typename Tensor1, typename Tensor2>
 inline __device__ void softmax_rescale_o(Tensor0& scores, Tensor1& scores_max, Tensor1& scores_sum,
                                          Tensor2& acc_o, float softmax_scale_log2) {
@@ -79,7 +38,7 @@ inline __device__ void softmax_rescale_o(Tensor0& scores, Tensor1& scores_max, T
     flash::reduce_sum(scores, scores_sum);
   } else {
     cute::Tensor scores_max_prev = make_fragment_like(scores_max);
-    copy(scores_max, scores_max_prev);
+    cute::copy(scores_max, scores_max_prev);
     flash::template reduce_max</*zero_init=*/false>(scores, scores_max);
     // Reshape acc_o from (MMA=4, MMA_M, MMA_K) to (nrow=(2, MMA_M), ncol=(2, MMA_K))
     cute::Tensor acc_o_rowcol = make_tensor(acc_o.data(), flash::convert_layout_acc_rowcol(acc_o.layout()));
@@ -109,7 +68,7 @@ inline __device__ void softmax_rescale_o(Tensor0& scores, Tensor1& scores_max, T
 
 template <typename Engine0, typename Layout0, typename Engine1, typename Layout1, typename TiledCopy>
 inline __device__ void write_softmax_to_gmem(
-    cute::Tensor<Engine0, Layout0> const& tOrP, cute::Tensor<Engine1, Layout1>& tPgP, TiledCopy gmem_thr_copy_P) {
+    cute::Tensor<Engine0, Layout0> const& tOrP, cute::Tensor<Engine1, Layout1>& tPgP, TiledCopy gmem_tiled_copy_P) {
   // Reshape tOrP from (8, MMA_M, MMA_N) to (8, MMA_M * MMA_N)
   cute::Layout l = tOrP.layout();
   cute::Tensor tPrP = make_tensor(tOrP.data(), make_layout(get<0>(l), make_layout(get<1>(l), get<2>(l))));
@@ -117,13 +76,13 @@ inline __device__ void write_softmax_to_gmem(
   CUTE_STATIC_ASSERT_V(cute::size<1>(tPrP) == cute::size<1>(tPgP));
 #pragma unroll
   for (int mi = 0; mi < cute::size<1>(tPrP); ++mi) {
-    copy(gmem_thr_copy_P, tPrP(_, mi), tPgP(_, mi, 0));
+    cute::copy(gmem_tiled_copy_P, tPrP(_, mi), tPgP(_, mi, 0));
   }
 };
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
-template <typename Kernel_traits, bool Is_causal, bool Is_even_MN, bool Is_even_K, bool Return_softmax, typename Params>
+template <typename Kernel_traits, bool Is_causal, bool Is_local, bool Is_even_MN, bool Is_even_K, bool Return_softmax, typename Params>
 inline __device__ void compute_attn_1rowblock(const Params& params, const int bidb, const int bidh, const int m_block) {
   using Element = typename Kernel_traits::Element;
   using ElementAccum = typename Kernel_traits::ElementAccum;
@@ -144,9 +103,50 @@ inline __device__ void compute_attn_1rowblock(const Params& params, const int bi
   const BlockInfo</*Varlen=*/!Is_even_MN> binfo(params, bidb);
   if (m_block * kBlockM >= binfo.actual_seqlen_q || binfo.actual_seqlen_k == 0) return;
 
+  const int n_block_min = !Is_local ? 0 : std::max(0, (m_block * kBlockM + binfo.actual_seqlen_k - binfo.actual_seqlen_q - params.window_size_left) / kBlockN);
   int n_block_max = cute::ceil_div(binfo.actual_seqlen_k, kBlockN);
-  if (Is_causal) {
-    n_block_max = std::min(n_block_max, cute::ceil_div((m_block + 1) * kBlockM + binfo.actual_seqlen_k - binfo.actual_seqlen_q, kBlockN));
+  if (Is_causal || Is_local) {
+    n_block_max = std::min(n_block_max,
+                           cute::ceil_div((m_block + 1) * kBlockM + binfo.actual_seqlen_k - binfo.actual_seqlen_q + params.window_size_right, kBlockN));
+    // We exit early and write 0 to gO and gLSE.
+    // Otherwise we might read OOB elements from gK and gV.
+    if (n_block_max <= n_block_min) {
+      const index_t row_offset_o = binfo.q_offset(params.o_batch_stride, params.o_row_stride, bidb) + m_block * kBlockM * params.o_row_stride + bidh * params.o_head_stride;
+      const index_t row_offset_lse = (bidb * params.h + bidh) * params.seqlen_q + m_block * kBlockM;
+      Tensor gO = make_tensor(make_gmem_ptr(reinterpret_cast<Element*>(params.o_ptr) + row_offset_o),
+                              Shape<Int<kBlockM>, Int<kHeadDim>>{},
+                              make_stride(params.o_row_stride, _1{}));
+      Tensor gLSE = make_tensor(make_gmem_ptr(reinterpret_cast<ElementAccum*>(params.softmax_lse_ptr) + row_offset_lse),
+                                Shape<Int<kBlockM>>{}, Stride<_1>{});
+
+      typename Kernel_traits::GmemTiledCopyO gmem_tiled_copy_O;
+      auto gmem_thr_copy_O = gmem_tiled_copy_O.get_thread_slice(tidx);
+      Tensor tOgO = gmem_thr_copy_O.partition_D(gO);
+      Tensor tOrO = make_tensor<Element>(shape(tOgO));
+      clear(tOrO);
+      // Construct identity layout for sO
+      Tensor cO = make_identity_tensor(make_shape(size<0>(gO), size<1>(gO)));  // (BLK_M,BLK_K) -> (blk_m,blk_k)
+      // Repeat the partitioning with identity layouts
+      Tensor tOcO = gmem_thr_copy_O.partition_D(cO);
+      Tensor tOpO = make_tensor<bool>(make_shape(size<2>(tOgO)));
+      if (!Is_even_K) {
+#pragma unroll
+        for (int k = 0; k < size(tOpO); ++k) {
+          tOpO(k) = get<1>(tOcO(0, 0, k)) < params.d;
+        }
+      }
+      // Clear_OOB_K must be false since we don't want to write zeros to gmem
+      flash::copy<Is_even_MN, Is_even_K, /*Clear_OOB_MN=*/false, /*Clear_OOB_K=*/false>(
+          gmem_tiled_copy_O, tOrO, tOgO, tOcO, tOpO, binfo.actual_seqlen_q - m_block * kBlockM);
+#pragma unroll
+      for (int m = 0; m < size<1>(tOgO); ++m) {
+        const int row = get<0>(tOcO(0, m, 0));
+        if (row < binfo.actual_seqlen_q - m_block * kBlockM && get<1>(tOcO(0, m, 0)) == 0) {
+          gLSE(row) = INFINITY;
+        }
+      }
+      return;
+    }
   }
 
   // We iterate over the blocks in reverse order. This is because the last block is the only one
@@ -158,7 +158,6 @@ inline __device__ void compute_attn_1rowblock(const Params& params, const int bi
   const index_t row_offset_k = binfo.k_offset(params.k_batch_stride, params.k_row_stride, bidb) + (n_block_max - 1) * kBlockN * params.k_row_stride + (bidh / params.h_h_k_ratio) * params.k_head_stride;
   const index_t row_offset_v = binfo.k_offset(params.v_batch_stride, params.v_row_stride, bidb) + (n_block_max - 1) * kBlockN * params.v_row_stride + (bidh / params.h_h_k_ratio) * params.v_head_stride;
   const index_t row_offset_p = ((bidb * params.h + bidh) * params.seqlen_q_rounded + m_block * kBlockM) * params.seqlen_k_rounded + (n_block_max - 1) * kBlockN;
-
   cute::Tensor gQ = make_tensor(make_gmem_ptr(reinterpret_cast<Element*>(params.q_ptr) + row_offset_q),
                                 cute::Shape<cute::Int<kBlockM>, cute::Int<kHeadDim>>{},
                                 make_stride(params.q_row_stride, _1{}));
@@ -293,9 +292,9 @@ inline __device__ void compute_attn_1rowblock(const Params& params, const int bi
 
   // If not even_N, then seqlen_k might end in the middle of a block. In that case we need to
   // mask 2 blocks (e.g. when kBlockM == kBlockN), not just 1.
-  constexpr int n_masking_steps = !Is_causal
+  constexpr int n_masking_steps = (!Is_causal && !Is_local)
                                       ? 1
-                                      : (Is_even_MN ? cute::ceil_div(kBlockM, kBlockN) : cute::ceil_div(kBlockM, kBlockN) + 1);
+                                      : ((Is_even_MN && Is_causal) ? cute::ceil_div(kBlockM, kBlockN) : cute::ceil_div(kBlockM, kBlockN) + 1);
 #pragma unroll
   for (int masking_step = 0; masking_step < n_masking_steps; ++masking_step, --n_block) {
     cute::Tensor acc_s = partition_fragment_C(tiled_mma, cute::Shape<cute::Int<kBlockM>, cute::Int<kBlockN>>{});  // (MMA=4, MMA_M, MMA_N)
@@ -325,22 +324,22 @@ inline __device__ void compute_attn_1rowblock(const Params& params, const int bi
     // We don't put the masking before the matmul S = Q K^T because we don't clear sK
     // for rows outside actual_seqlen_k. So those rows could have Inf / NaN, and the matmul
     // can produce Inf / NaN.
-    if (!Is_causal) {
+    if (!Is_causal && !Is_local) {
       if (!Is_even_MN) {
         flash::apply_mask(scores, binfo.actual_seqlen_k - n_block * kBlockN);
       }
     } else {
       // I can't get the stride from idx_row
-      flash::apply_mask_causal(scores, n_block * kBlockN, binfo.actual_seqlen_k,
-                               // m_block * kBlockM + get<0>(idx_row(0)),
-                               m_block * kBlockM + (tidx / 32) * 16 + (tidx % 32) / 4,
-                               binfo.actual_seqlen_q,
-                               kNWarps * 16);
+      flash::apply_mask_local</*HasWSLeft=*/Is_local>(scores, n_block * kBlockN, binfo.actual_seqlen_k,
+                                                      // m_block * kBlockM + get<0>(idx_row(0)),
+                                                      m_block * kBlockM + (tidx / 32) * 16 + (tidx % 32) / 4,
+                                                      binfo.actual_seqlen_q, kNWarps * 16,
+                                                      params.window_size_left, params.window_size_right);
     }
 
     flash::cp_async_wait<0>();
     __syncthreads();
-    if (n_block > 0) {
+    if (n_block > n_block_min) {
       // Advance gK
       tKgK.data() = tKgK.data() + (-int(kBlockN * params.k_row_stride));
       flash::copy</*Is_even_MN=*/true, Is_even_K>(gmem_tiled_copy_QKV, tKgK, tKsK, tKVcKV, tKVpKV);
@@ -351,8 +350,8 @@ inline __device__ void compute_attn_1rowblock(const Params& params, const int bi
 
     // TODO: when we have key_padding_mask we'll need to Check_inf
     masking_step == 0
-        ? softmax_rescale_o</*Is_first=*/true, /*Check_inf=*/Is_causal>(scores, scores_max, scores_sum, acc_o, params.scale_softmax_log2)
-        : softmax_rescale_o</*Is_first=*/false, /*Check_inf=*/Is_causal>(scores, scores_max, scores_sum, acc_o, params.scale_softmax_log2);
+        ? softmax_rescale_o</*Is_first=*/true, /*Check_inf=*/Is_causal || Is_local>(scores, scores_max, scores_sum, acc_o, params.scale_softmax_log2)
+        : softmax_rescale_o</*Is_first=*/false, /*Check_inf=*/Is_causal || Is_local>(scores, scores_max, scores_sum, acc_o, params.scale_softmax_log2);
 
     // Convert scores from fp32 to fp16/bf16
     cute::Tensor rP = flash::convert_type<Element>(scores);
@@ -369,14 +368,14 @@ inline __device__ void compute_attn_1rowblock(const Params& params, const int bi
     flash::gemm_A_in_regs(acc_o, tOrP, tOrVt, tOsVt, tiled_mma, smem_tiled_copy_V, smem_thr_copy_V);
 
     // This check is at the end of the loop since we always have at least 1 iteration
-    if (n_masking_steps > 1 && n_block <= 0) {
+    if (n_masking_steps > 1 && n_block <= n_block_min) {
       --n_block;
       break;
     }
   }
 
   // These are the iterations where we don't need masking on S
-  for (; n_block >= 0; --n_block) {
+  for (; n_block >= n_block_min; --n_block) {
     cute::Tensor acc_s = partition_fragment_C(tiled_mma, cute::Shape<cute::Int<kBlockM>, cute::Int<kBlockN>>{});  // (MMA=4, MMA_M, MMA_N)
     clear(acc_s);
     flash::cp_async_wait<0>();
@@ -392,7 +391,7 @@ inline __device__ void compute_attn_1rowblock(const Params& params, const int bi
 
     flash::cp_async_wait<0>();
     __syncthreads();
-    if (n_block > 0) {
+    if (n_block > n_block_min) {
       // Advance gK
       tKgK.data() = tKgK.data() + (-int(kBlockN * params.k_row_stride));
       flash::copy</*Is_even_MN=*/true, Is_even_K>(gmem_tiled_copy_QKV, tKgK, tKsK, tKVcKV, tKVpKV);
@@ -402,8 +401,15 @@ inline __device__ void compute_attn_1rowblock(const Params& params, const int bi
     }
 
     // Reshape acc_s from (MMA=4, MMA_M, MMA_N) to (nrow=(2, MMA_M), ncol=(2, MMA_N))
-    cute::Tensor scores = make_tensor(acc_s.data(), flash::convert_layout_acc_rowcol(acc_s.layout()));
-    softmax_rescale_o</*Is_first=*/false>(scores, scores_max, scores_sum, acc_o, params.scale_softmax_log2);
+    Tensor scores = make_tensor(acc_s.data(), flash::convert_layout_acc_rowcol(acc_s.layout()));
+    if (Is_local && n_block * kBlockN < (m_block + 1) * kBlockM + binfo.actual_seqlen_k - binfo.actual_seqlen_q + params.window_size_right) {
+      flash::apply_mask_local(
+          scores, n_block * kBlockN, binfo.actual_seqlen_k,
+          m_block * kBlockM + (tidx / 32) * 16 + (tidx % 32) / 4,
+          binfo.actual_seqlen_q, kNWarps * 16,
+          params.window_size_left, params.window_size_right);
+    }
+    softmax_rescale_o</*Is_first=*/false, /*Check_inf=*/Is_local>(scores, scores_max, scores_sum, acc_o, params.scale_softmax_log2);
 
     cute::Tensor rP = flash::convert_type<Element>(scores);
     // Reshape rP from (nrow=(2, MMA_M), ncol=(2, MMA_N)) to ((2, 2, 2), MMA_M, MMA_N / 2)
@@ -504,7 +510,546 @@ inline __device__ void compute_attn_1rowblock(const Params& params, const int bi
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
-template <typename Kernel_traits, bool Is_causal, bool Is_even_MN, bool Is_even_K, bool Return_softmax, typename Params>
+template <typename Kernel_traits, bool Is_causal, bool Is_local, bool Is_even_MN, bool Is_even_K, bool Split, bool Append_KV, typename Params>
+inline __device__ void compute_attn_1rowblock_splitkv(const Params& params, const int bidb, const int bidh, const int m_block, const int n_split_idx, const int num_n_splits) {
+  using Element = typename Kernel_traits::Element;
+  using ElementAccum = typename Kernel_traits::ElementAccum;
+  using index_t = typename Kernel_traits::index_t;
+
+  // Shared memory.
+  extern __shared__ char smem_[];
+
+  // The thread index.
+  const int tidx = threadIdx.x;
+
+  constexpr int kBlockM = Kernel_traits::kBlockM;
+  constexpr int kBlockN = Kernel_traits::kBlockN;
+  constexpr int kHeadDim = Kernel_traits::kHeadDim;
+  constexpr int kNWarps = Kernel_traits::kNWarps;
+
+  using GmemTiledCopyO = std::conditional_t<
+      !Split,
+      typename Kernel_traits::GmemTiledCopyOaccum,
+      typename Kernel_traits::GmemTiledCopyO>;
+  using ElementO = std::conditional_t<!Split, Element, ElementAccum>;
+
+  const BlockInfo</*Varlen=*/!Is_even_MN> binfo(params, bidb);
+  // if (threadIdx.x == 0 && blockIdx.y == 0 && blockIdx.z == 0) { printf("Is_even_MN = %d, is_cumulativ = %d, seqlen_k_cache = %d, actual_seqlen_k = %d\n", Is_even_MN, params.is_seqlens_k_cumulative, binfo.seqlen_k_cache, binfo.actual_seqlen_k); }
+  // if (threadIdx.x == 0 && blockIdx.y == 1 && blockIdx.z == 0) { printf("params.knew_ptr = %p, seqlen_k_cache + seqlen_knew = %d\n", params.knew_ptr, binfo.seqlen_k_cache + (params.knew_ptr == nullptr ? 0 : params.seqlen_knew)); }
+  if (m_block * kBlockM >= binfo.actual_seqlen_q) return;
+
+  const int n_blocks_per_split = ((params.seqlen_k + kBlockN - 1) / kBlockN + num_n_splits - 1) / num_n_splits;
+  const int n_block_min = !Is_local
+                              ? n_split_idx * n_blocks_per_split
+                              : std::max(n_split_idx * n_blocks_per_split, (m_block * kBlockM + binfo.actual_seqlen_k - binfo.actual_seqlen_q - params.window_size_left) / kBlockN);
+  int n_block_max = std::min(cute::ceil_div(binfo.actual_seqlen_k, kBlockN), (n_split_idx + 1) * n_blocks_per_split);
+  if (Is_causal || Is_local) {
+    n_block_max = std::min(n_block_max,
+                           cute::ceil_div((m_block + 1) * kBlockM + binfo.actual_seqlen_k - binfo.actual_seqlen_q + params.window_size_right, kBlockN));
+  }
+  if (n_block_min >= n_block_max) {  // This also covers the case where n_block_max <= 0
+    // We exit early and write 0 to gOaccum and -inf to gLSEaccum.
+    // Otherwise we might read OOB elements from gK and gV,
+    // or get wrong results when we combine gOaccum from different blocks.
+    const index_t row_offset_o = binfo.q_offset(params.o_batch_stride, params.o_row_stride, bidb) + m_block * kBlockM * params.o_row_stride + bidh * params.o_head_stride;
+    const index_t row_offset_oaccum = (((n_split_idx * params.b + bidb) * params.h + bidh) * params.seqlen_q + m_block * kBlockM) * params.d_rounded;
+    const index_t row_offset_lseaccum = ((n_split_idx * params.b + bidb) * params.h + bidh) * params.seqlen_q + m_block * kBlockM;
+    Tensor gOaccum = make_tensor(make_gmem_ptr(reinterpret_cast<ElementO*>(Split ? params.oaccum_ptr : params.o_ptr) + (Split ? row_offset_oaccum : row_offset_o)),
+                                 Shape<Int<kBlockM>, Int<kHeadDim>>{},
+                                 make_stride(Split ? kHeadDim : params.o_row_stride, _1{}));
+    Tensor gLSEaccum = make_tensor(make_gmem_ptr(reinterpret_cast<ElementAccum*>(Split ? params.softmax_lseaccum_ptr : params.softmax_lse_ptr) + row_offset_lseaccum),
+                                   Shape<Int<kBlockM>>{}, Stride<_1>{});
+
+    GmemTiledCopyO gmem_tiled_copy_Oaccum;
+    auto gmem_thr_copy_Oaccum = gmem_tiled_copy_Oaccum.get_thread_slice(tidx);
+    Tensor tOgOaccum = gmem_thr_copy_Oaccum.partition_D(gOaccum);
+    Tensor tOrOaccum = make_tensor<ElementO>(shape(tOgOaccum));
+    clear(tOrOaccum);
+    // Construct identity layout for sO
+    Tensor cO = make_identity_tensor(make_shape(size<0>(gOaccum), size<1>(gOaccum)));  // (BLK_M,BLK_K) -> (blk_m,blk_k)
+    // Repeat the partitioning with identity layouts
+    Tensor tOcO = gmem_thr_copy_Oaccum.partition_D(cO);
+    Tensor tOpO = make_tensor<bool>(make_shape(size<2>(tOgOaccum)));
+    if (!Is_even_K) {
+#pragma unroll
+      for (int k = 0; k < size(tOpO); ++k) {
+        tOpO(k) = get<1>(tOcO(0, 0, k)) < params.d;
+      }
+    }
+    // Clear_OOB_K must be false since we don't want to write zeros to gmem
+    flash::copy<Is_even_MN, Is_even_K, /*Clear_OOB_MN=*/false, /*Clear_OOB_K=*/false>(
+        gmem_tiled_copy_Oaccum, tOrOaccum, tOgOaccum, tOcO, tOpO, binfo.actual_seqlen_q - m_block * kBlockM);
+#pragma unroll
+    for (int m = 0; m < size<1>(tOgOaccum); ++m) {
+      const int row = get<0>(tOcO(0, m, 0));
+      if (row < binfo.actual_seqlen_q - m_block * kBlockM && get<1>(tOcO(0, m, 0)) == 0) {
+        gLSEaccum(row) = Split ? -INFINITY : INFINITY;
+      }
+    }
+    return;
+  }
+
+  // We iterate over the blocks in reverse order. This is because the last block is the only one
+  // that needs masking when we read K and V from global memory. Moreover, iterating in reverse
+  // might save us 1 register (we just need n_block instead of both n_block and n_block_max).
+
+  const index_t row_offset_q = binfo.q_offset(params.q_batch_stride, params.q_row_stride, bidb) + m_block * kBlockM * params.q_row_stride + bidh * params.q_head_stride;
+  // We move K and V to the last block.
+  const int bidb_cache = params.cache_batch_idx == nullptr ? bidb : params.cache_batch_idx[bidb];
+  const index_t row_offset_k = binfo.k_offset(params.k_batch_stride, params.k_row_stride, bidb_cache) + (n_block_max - 1) * kBlockN * params.k_row_stride + (bidh / params.h_h_k_ratio) * params.k_head_stride;
+  const index_t row_offset_v = binfo.k_offset(params.v_batch_stride, params.v_row_stride, bidb_cache) + (n_block_max - 1) * kBlockN * params.v_row_stride + (bidh / params.h_h_k_ratio) * params.v_head_stride;
+
+  Tensor gQ = make_tensor(make_gmem_ptr(reinterpret_cast<Element*>(params.q_ptr) + row_offset_q),
+                          Shape<Int<kBlockM>, Int<kHeadDim>>{},
+                          make_stride(params.q_row_stride, _1{}));
+  Tensor gK = make_tensor(make_gmem_ptr(reinterpret_cast<Element*>(params.k_ptr) + row_offset_k),
+                          Shape<Int<kBlockN>, Int<kHeadDim>>{},
+                          make_stride(params.k_row_stride, _1{}));
+  // if (threadIdx.x == 0 && blockIdx.y == 0 && blockIdx.z == 0) { printf("k_ptr = %p, row_offset_k = %d, gK_ptr = %p\n", params.k_ptr, row_offset_k, gK.data()); }
+  Tensor gV = make_tensor(make_gmem_ptr(reinterpret_cast<Element*>(params.v_ptr) + row_offset_v),
+                          Shape<Int<kBlockN>, Int<kHeadDim>>{},
+                          make_stride(params.v_row_stride, _1{}));
+
+  Tensor sQ = make_tensor(make_smem_ptr(reinterpret_cast<Element*>(smem_)),
+                          typename Kernel_traits::SmemLayoutQ{});
+  Tensor sK = make_tensor(sQ.data() + size(sQ), typename Kernel_traits::SmemLayoutKV{});
+  Tensor sV = make_tensor(sK.data() + size(sK), typename Kernel_traits::SmemLayoutKV{});
+  Tensor sVt = make_tensor(sV.data(), typename Kernel_traits::SmemLayoutVtransposed{});
+  Tensor sVtNoSwizzle = make_tensor(sV.data(), typename Kernel_traits::SmemLayoutVtransposedNoSwizzle{});
+
+  typename Kernel_traits::GmemTiledCopyQKV gmem_tiled_copy_QKV;
+  auto gmem_thr_copy_QKV = gmem_tiled_copy_QKV.get_thread_slice(tidx);
+
+  Tensor tQgQ = gmem_thr_copy_QKV.partition_S(gQ);
+  Tensor tQsQ = gmem_thr_copy_QKV.partition_D(sQ);
+  Tensor tKgK = gmem_thr_copy_QKV.partition_S(gK);  // (KCPY, KCPY_N, KCPY_K)
+  Tensor tKsK = gmem_thr_copy_QKV.partition_D(sK);
+  Tensor tVgV = gmem_thr_copy_QKV.partition_S(gV);  // (VCPY, VCPY_N, VCPY_K)
+  Tensor tVsV = gmem_thr_copy_QKV.partition_D(sV);
+
+  typename Kernel_traits::TiledMma tiled_mma;
+  auto thr_mma = tiled_mma.get_thread_slice(tidx);
+  Tensor tSrQ = thr_mma.partition_fragment_A(sQ);             // (MMA,MMA_M,MMA_K)
+  Tensor tSrK = thr_mma.partition_fragment_B(sK);             // (MMA,MMA_N,MMA_K)
+  Tensor tOrVt = thr_mma.partition_fragment_B(sVtNoSwizzle);  // (MMA, MMA_K,MMA_N)
+
+  Tensor acc_o = partition_fragment_C(tiled_mma, Shape<Int<kBlockM>, Int<kHeadDim>>{});  // MMA, MMA_M, MMA_K
+
+  //
+  // Copy Atom retiling
+  //
+
+  auto smem_tiled_copy_Q = make_tiled_copy_A(typename Kernel_traits::SmemCopyAtom{}, tiled_mma);
+  auto smem_thr_copy_Q = smem_tiled_copy_Q.get_thread_slice(tidx);
+  Tensor tSsQ = smem_thr_copy_Q.partition_S(sQ);
+
+  auto smem_tiled_copy_K = make_tiled_copy_B(typename Kernel_traits::SmemCopyAtom{}, tiled_mma);
+  auto smem_thr_copy_K = smem_tiled_copy_K.get_thread_slice(tidx);
+  Tensor tSsK = smem_thr_copy_K.partition_S(sK);
+
+  auto smem_tiled_copy_V = make_tiled_copy_B(typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma);
+  auto smem_thr_copy_V = smem_tiled_copy_V.get_thread_slice(tidx);
+  Tensor tOsVt = smem_thr_copy_V.partition_S(sVt);
+
+  // TODO: this might need to change if we change the mma instruction in SM70
+  Tensor scores_max = make_tensor<ElementAccum>(Shape<Int<2 * size<1>(acc_o)>>{});
+  Tensor scores_sum = make_fragment_like(scores_max);
+
+  //
+  // PREDICATES
+  //
+
+  // // Allocate predicate tensors for m and n
+  // Tensor tQpQ = make_tensor<bool>(make_shape(size<1>(tQsQ), size<2>(tQsQ)), Stride<_1,_0>{});
+  // Tensor tKVpKV = make_tensor<bool>(make_shape(size<1>(tKsK), size<2>(tKsK)), Stride<_1,_0>{});
+
+  // Construct identity layout for sQ and sK
+  Tensor cQ = make_identity_tensor(make_shape(size<0>(sQ), size<1>(sQ)));   // (BLK_M,BLK_K) -> (blk_m,blk_k)
+  Tensor cKV = make_identity_tensor(make_shape(size<0>(sK), size<1>(sK)));  // (BLK_N,BLK_K) -> (blk_n,blk_k)
+
+  // Repeat the partitioning with identity layouts
+  Tensor tQcQ = gmem_thr_copy_QKV.partition_S(cQ);     // (ACPY,ACPY_M,ACPY_K) -> (blk_m,blk_k)
+  Tensor tKVcKV = gmem_thr_copy_QKV.partition_S(cKV);  // (BCPY,BCPY_N,BCPY_K) -> (blk_n,blk_k)
+
+  // Allocate predicate tensors for k
+  Tensor tQpQ = make_tensor<bool>(make_shape(size<2>(tQsQ)));
+  Tensor tKVpKV = make_tensor<bool>(make_shape(size<2>(tKsK)));
+
+  // Set predicates for k bounds
+  if (!Is_even_K) {
+#pragma unroll
+    for (int k = 0; k < size(tQpQ); ++k) {
+      tQpQ(k) = get<1>(tQcQ(0, 0, k)) < params.d;
+    }
+#pragma unroll
+    for (int k = 0; k < size(tKVpKV); ++k) {
+      tKVpKV(k) = get<1>(tKVcKV(0, 0, k)) < params.d;
+    }
+  }
+
+  // Prologue
+  // Copy from Knew to K, optionally apply rotary embedding.
+  typename Kernel_traits::GmemTiledCopyRotcossin gmem_tiled_copy_rotary;
+  auto gmem_thr_copy_rotary = gmem_tiled_copy_rotary.get_thread_slice(tidx);
+  typename Kernel_traits::GmemTiledCopyRotcossinCont gmem_tiled_copy_rotary_cont;
+  auto gmem_thr_copy_rotary_cont = gmem_tiled_copy_rotary_cont.get_thread_slice(tidx);
+  if constexpr (Append_KV) {
+    // Even if we have MQA / GQA, all threadblocks responsible for the same KV head are writing to
+    // gmem. Technically it's a race condition, but they all write the same content anyway, and it's safe.
+    // We want to do this so that all threadblocks can proceed right after they finish writing the KV cache.
+    const index_t row_offset_cossin = ((n_block_max - 1) * kBlockN) * (params.rotary_dim / 2);
+    Tensor gCos = make_tensor(make_gmem_ptr(reinterpret_cast<Element*>(params.rotary_cos_ptr) + row_offset_cossin),
+                              Shape<Int<kBlockN>, Int<kHeadDim / 2>>{},
+                              make_stride(params.rotary_dim / 2, _1{}));
+    Tensor gSin = make_tensor(make_gmem_ptr(reinterpret_cast<Element*>(params.rotary_sin_ptr) + row_offset_cossin),
+                              Shape<Int<kBlockN>, Int<kHeadDim / 2>>{},
+                              make_stride(params.rotary_dim / 2, _1{}));
+    Tensor gCosCont = make_tensor(make_gmem_ptr(reinterpret_cast<Element*>(params.rotary_cos_ptr) + row_offset_cossin),
+                                  Shape<Int<kBlockN>, Int<kHeadDim>>{},
+                                  make_stride(params.rotary_dim / 2, _1{}));
+    Tensor gSinCont = make_tensor(make_gmem_ptr(reinterpret_cast<Element*>(params.rotary_sin_ptr) + row_offset_cossin),
+                                  Shape<Int<kBlockN>, Int<kHeadDim>>{},
+                                  make_stride(params.rotary_dim / 2, _1{}));
+    Tensor tRgCos = gmem_thr_copy_rotary.partition_S(gCos);
+    Tensor tRgSin = gmem_thr_copy_rotary.partition_S(gSin);
+    Tensor tRgCosCont = gmem_thr_copy_rotary_cont.partition_S(gCosCont);
+    Tensor tRgSinCont = gmem_thr_copy_rotary_cont.partition_S(gSinCont);
+    // if (cute::thread(0, 0)) { printf("rotary_cos_ptr = %p, gCos.data() = %p, tRgCos.data() = %p, rotary_dim = %d\n", params.rotary_cos_ptr, gCos.data(), tRgCos.data(), params.rotary_dim); }
+    // if (cute::thread(8, 0)) { print_tensor(gCos); }
+    // if (cute::thread(0, 0)) { print_tensor(tRgCos); }
+
+    const index_t row_offset_knew = binfo.k_offset(params.knew_batch_stride, params.knew_row_stride, bidb) + ((n_block_max - 1) * kBlockN) * params.knew_row_stride + (bidh / params.h_h_k_ratio) * params.knew_head_stride;
+    const index_t row_offset_vnew = binfo.k_offset(params.vnew_batch_stride, params.vnew_row_stride, bidb) + ((n_block_max - 1) * kBlockN) * params.vnew_row_stride + (bidh / params.h_h_k_ratio) * params.vnew_head_stride;
+    // Subtract seqlen_k_cache * row stride so that conceptually gK and gKnew "line up". When we access them,
+    // e.g. if gK has 128 rows and gKnew has 64 rows, we access gK[:128] and gKNew[128:128 + 64].
+    // This maps to accessing the first 64 rows of knew_ptr.
+    Tensor gKnew = make_tensor(make_gmem_ptr(reinterpret_cast<Element*>(params.knew_ptr) + row_offset_knew - binfo.seqlen_k_cache * params.knew_row_stride),
+                               Shape<Int<kBlockN>, Int<kHeadDim>>{},
+                               make_stride(params.knew_row_stride, _1{}));
+    // if (threadIdx.x == 0 && blockIdx.y == 0 && blockIdx.z == 0) { printf("knew_ptr = %p, row_offset_knew = %d, gKnew_ptr = %p\n", params.knew_ptr, row_offset_knew, gKnew.data()); }
+    Tensor gVnew = make_tensor(make_gmem_ptr(reinterpret_cast<Element*>(params.vnew_ptr) + row_offset_vnew - binfo.seqlen_k_cache * params.vnew_row_stride),
+                               Shape<Int<kBlockN>, Int<kHeadDim>>{},
+                               make_stride(params.vnew_row_stride, _1{}));
+    Tensor tKgKnew = gmem_thr_copy_QKV.partition_S(gKnew);  // (KCPY, KCPY_N, KCPY_K)
+    Tensor tVgVnew = gmem_thr_copy_QKV.partition_S(gVnew);  // (VCPY, VCPY_N, VCPY_K)
+
+    const int n_block_copy_min = std::max(n_block_min, binfo.seqlen_k_cache / kBlockN);
+    for (int n_block = n_block_max - 1; n_block >= n_block_copy_min; n_block--) {
+      flash::copy_w_min_idx<Is_even_K>(
+          tVgVnew, tVgV, tKVcKV, tKVpKV, binfo.actual_seqlen_k - n_block * kBlockN, binfo.seqlen_k_cache - n_block * kBlockN);
+      tVgV.data() = tVgV.data() + (-int(kBlockN * params.v_row_stride));
+      tVgVnew.data() = tVgVnew.data() + (-int(kBlockN * params.vnew_row_stride));
+      if (params.rotary_dim == 0) {
+        flash::copy_w_min_idx<Is_even_K>(
+            tKgKnew, tKgK, tKVcKV, tKVpKV, binfo.actual_seqlen_k - n_block * kBlockN, binfo.seqlen_k_cache - n_block * kBlockN);
+      } else {
+        if (params.is_rotary_interleaved) {
+          // Don't clear OOB_K because we're writing to global memory
+          flash::copy_rotary_interleaved<Is_even_K, /*Clear_OOB_K=*/false>(
+              tKgKnew, tKgK, tRgCos, tRgSin, tKVcKV, binfo.actual_seqlen_k - n_block * kBlockN,
+              binfo.seqlen_k_cache - n_block * kBlockN, params.d, params.rotary_dim);
+          tRgCos.data() = tRgCos.data() + (-int(kBlockN * params.rotary_dim / 2));
+          tRgSin.data() = tRgSin.data() + (-int(kBlockN * params.rotary_dim / 2));
+        } else {
+          // Don't clear OOB_K because we're writing to global memory
+          flash::copy_rotary_contiguous<Is_even_K, /*Clear_OOB_K=*/false>(
+              tKgKnew, tKgK, tRgCosCont, tRgSinCont, tKVcKV, binfo.actual_seqlen_k - n_block * kBlockN,
+              binfo.seqlen_k_cache - n_block * kBlockN, params.d, params.rotary_dim);
+          tRgCosCont.data() = tRgCosCont.data() + (-int(kBlockN * params.rotary_dim / 2));
+          tRgSinCont.data() = tRgSinCont.data() + (-int(kBlockN * params.rotary_dim / 2));
+        }
+      }
+      tKgK.data() = tKgK.data() + (-int(kBlockN * params.k_row_stride));
+      tKgKnew.data() = tKgKnew.data() + (-int(kBlockN * params.knew_row_stride));
+    }
+    // Need this before we can read in K again, so that we'll see the updated K values.
+    __syncthreads();
+    if (n_block_max > n_block_copy_min) {
+      tKgK.data() = tKgK.data() + (n_block_max - n_block_copy_min) * kBlockN * params.k_row_stride;
+      tVgV.data() = tVgV.data() + (n_block_max - n_block_copy_min) * kBlockN * params.v_row_stride;
+    }
+  }
+
+  // Read Q from gmem to smem, optionally apply rotary embedding.
+  Tensor tQrQ = make_fragment_like(tQgQ);
+  if (!Append_KV || params.rotary_dim == 0) {
+    // We don't need to clear the sQ smem tiles since we'll only write out the valid outputs
+    flash::copy<Is_even_MN, Is_even_K>(gmem_tiled_copy_QKV, tQgQ, tQsQ, tQcQ, tQpQ,
+                                       binfo.actual_seqlen_q - m_block * kBlockM);
+  } else {
+    const index_t row_offset_cossin = (binfo.seqlen_k_cache + (Is_causal || Is_local ? m_block * kBlockM : 0)) * (params.rotary_dim / 2);
+    // If not causal, all the queries get the same the cos/sin, taken at location seqlen_k_cache.
+    // We do this by setting the row stride of gCos / gSin to 0.
+    Tensor gCos = make_tensor(make_gmem_ptr(reinterpret_cast<Element*>(params.rotary_cos_ptr) + row_offset_cossin),
+                              Shape<Int<kBlockM>, Int<kHeadDim / 2>>{},
+                              make_stride(Is_causal || Is_local ? params.rotary_dim / 2 : 0, _1{}));
+    Tensor gSin = make_tensor(make_gmem_ptr(reinterpret_cast<Element*>(params.rotary_sin_ptr) + row_offset_cossin),
+                              Shape<Int<kBlockM>, Int<kHeadDim / 2>>{},
+                              make_stride(Is_causal || Is_local ? params.rotary_dim / 2 : 0, _1{}));
+    Tensor gCosCont = make_tensor(make_gmem_ptr(reinterpret_cast<Element*>(params.rotary_cos_ptr) + row_offset_cossin),
+                                  Shape<Int<kBlockM>, Int<kHeadDim>>{},
+                                  make_stride(Is_causal || Is_local ? params.rotary_dim / 2 : 0, _1{}));
+    Tensor gSinCont = make_tensor(make_gmem_ptr(reinterpret_cast<Element*>(params.rotary_sin_ptr) + row_offset_cossin),
+                                  Shape<Int<kBlockM>, Int<kHeadDim>>{},
+                                  make_stride(Is_causal || Is_local ? params.rotary_dim / 2 : 0, _1{}));
+    Tensor tRgCos = gmem_thr_copy_rotary.partition_S(gCos);
+    Tensor tRgSin = gmem_thr_copy_rotary.partition_S(gSin);
+    Tensor tRgCosCont = gmem_thr_copy_rotary_cont.partition_S(gCosCont);
+    Tensor tRgSinCont = gmem_thr_copy_rotary_cont.partition_S(gSinCont);
+    if (params.is_rotary_interleaved) {
+      flash::copy_rotary_interleaved<Is_even_K>(
+          tQgQ, tQsQ, tRgCos, tRgSin, tQcQ, binfo.actual_seqlen_q - m_block * kBlockM,
+          0, params.d, params.rotary_dim);
+    } else {
+      flash::copy_rotary_contiguous<Is_even_K>(
+          tQgQ, tQsQ, tRgCosCont, tRgSinCont, tQcQ, binfo.actual_seqlen_q - m_block * kBlockM,
+          0, params.d, params.rotary_dim);
+    }
+  }
+
+  int n_block = n_block_max - 1;
+  // We don't need to clear the sK smem tiles since we'll mask out the scores anyway.
+  flash::copy<Is_even_MN, Is_even_K>(gmem_tiled_copy_QKV, tKgK, tKsK, tKVcKV, tKVpKV,
+                                     binfo.actual_seqlen_k - n_block * kBlockN);
+  cute::cp_async_fence();
+
+  // flash::cp_async_wait<0>();
+  // __syncthreads();
+  // if (tidx == 0 && blockIdx.y == 0 && blockIdx.z == 0) { print(tKsK); }
+  // __syncthreads();
+
+  clear(acc_o);
+
+  // For performance reason, we separate out two kinds of iterations:
+  // those that need masking on S, and those that don't.
+  // We need masking on S for the very last block when K and V has length not multiple of kBlockN.
+  // We also need masking on S if it's causal, for the last ceil_div(kBlockM, kBlockN) blocks.
+  // We will have at least 1 "masking" iteration.
+
+  // If not even_N, then seqlen_k might end in the middle of a block. In that case we need to
+  // mask 2 blocks (e.g. when kBlockM == kBlockN), not just 1.
+  constexpr int n_masking_steps = (!Is_causal && !Is_local)
+                                      ? 1
+                                      : ((Is_even_MN && Is_causal) ? cute::ceil_div(kBlockM, kBlockN) : cute::ceil_div(kBlockM, kBlockN) + 1);
+#pragma unroll
+  for (int masking_step = 0; masking_step < n_masking_steps; ++masking_step, --n_block) {
+    Tensor acc_s = partition_fragment_C(tiled_mma, Shape<Int<kBlockM>, Int<kBlockN>>{});  // (MMA=4, MMA_M, MMA_N)
+    clear(acc_s);
+    flash::cp_async_wait<0>();
+    __syncthreads();
+
+    // Advance gV
+    if (masking_step > 0) {
+      tVgV.data() = tVgV.data() + (-int(kBlockN * params.v_row_stride));
+      flash::copy</*Is_even_MN=*/true, Is_even_K>(gmem_tiled_copy_QKV, tVgV, tVsV, tKVcKV, tKVpKV);
+    } else {
+      // Clear the smem tiles to account for predicated off loads
+      flash::copy<Is_even_MN, Is_even_K, /*Clear_OOB_MN=*/true>(
+          gmem_tiled_copy_QKV, tVgV, tVsV, tKVcKV, tKVpKV, binfo.actual_seqlen_k - n_block * kBlockN);
+    }
+    cute::cp_async_fence();
+
+    flash::gemm(
+        acc_s, tSrQ, tSrK, tSsQ, tSsK, tiled_mma, smem_tiled_copy_Q, smem_tiled_copy_K,
+        smem_thr_copy_Q, smem_thr_copy_K);
+    // if (cute::thread0()) { print(acc_s); }
+
+    // Reshape acc_s from (MMA=4, MMA_M, MMA_N) to (nrow=(2, MMA_M), ncol=(2, MMA_N))
+    Tensor scores = make_tensor(acc_s.data(), flash::convert_layout_acc_rowcol(acc_s.layout()));
+    // if (cute::thread0()) { print(scores); }
+    // We don't put the masking before the matmul S = Q K^T because we don't clear sK
+    // for rows outside actual_seqlen_k. So those rows could have Inf / NaN, and the matmul
+    // can produce Inf / NaN.
+    if (!Is_causal && !Is_local) {
+      if (!Is_even_MN) {
+        flash::apply_mask(scores, binfo.actual_seqlen_k - n_block * kBlockN);
+      }
+    } else {
+      flash::apply_mask_local(scores, n_block * kBlockN, binfo.actual_seqlen_k,
+                              m_block * kBlockM + (tidx / 32) * 16 + (tidx % 32) / 4,
+                              binfo.actual_seqlen_q, kNWarps * 16,
+                              params.window_size_left, params.window_size_right);
+    }
+
+    flash::cp_async_wait<0>();
+    __syncthreads();
+    // if (tidx == 0 && blockIdx.y == 0 && blockIdx.z == 0) { print(tVsV); }
+    // __syncthreads();
+
+    if (n_block > n_block_min) {
+      // Advance gK
+      tKgK.data() = tKgK.data() + (-int(kBlockN * params.k_row_stride));
+      flash::copy</*Is_even_MN=*/true, Is_even_K>(gmem_tiled_copy_QKV, tKgK, tKsK, tKVcKV, tKVpKV);
+      // This cp_async_fence needs to be in the if block, otherwise the synchronization
+      // isn't right and we get race conditions.
+      cute::cp_async_fence();
+    }
+
+    // We have key_padding_mask so we'll need to Check_inf
+    masking_step == 0
+        ? softmax_rescale_o</*Is_first=*/true, /*Check_inf=*/Is_causal || Is_local || !Is_even_MN>(scores, scores_max, scores_sum, acc_o, params.scale_softmax_log2)
+        : softmax_rescale_o</*Is_first=*/false, /*Check_inf=*/Is_causal || Is_local || !Is_even_MN>(scores, scores_max, scores_sum, acc_o, params.scale_softmax_log2);
+    // if (cute::thread0()) { print(scores_max); print(scores_sum); print(scores); }
+
+    // Convert scores from fp32 to fp16/bf16
+    Tensor rP = flash::convert_type<Element>(scores);
+    // Reshape rP from (nrow=(2, MMA_M), ncol=(2, MMA_N)) to ((2, 2, 2), MMA_M, MMA_N / 2)
+    // if using m16n8k16 or ((2, 2, 1), MMA_M, MMA_N) if using m16n8k8.
+    Tensor tOrP = make_tensor(rP.data(), flash::convert_layout_rowcol_Aregs<Kernel_traits::TiledMma>(rP.layout()));
+
+    flash::gemm_A_in_regs(acc_o, tOrP, tOrVt, tOsVt, tiled_mma, smem_tiled_copy_V, smem_thr_copy_V);
+    // if (cute::thread0()) { print(scores); }
+
+    // This check is at the end of the loop since we always have at least 1 iteration
+    if (n_masking_steps > 1 && n_block <= n_block_min) {
+      --n_block;
+      break;
+    }
+  }
+
+  // These are the iterations where we don't need masking on S
+  for (; n_block >= n_block_min; --n_block) {
+    Tensor acc_s = partition_fragment_C(tiled_mma, Shape<Int<kBlockM>, Int<kBlockN>>{});  // (MMA=4, MMA_M, MMA_N)
+    clear(acc_s);
+    flash::cp_async_wait<0>();
+    __syncthreads();
+    // Advance gV
+    tVgV.data() = tVgV.data() + (-int(kBlockN * params.v_row_stride));
+    flash::copy</*Is_even_MN=*/true, Is_even_K>(gmem_tiled_copy_QKV, tVgV, tVsV, tKVcKV, tKVpKV);
+    cute::cp_async_fence();
+
+    flash::gemm(
+        acc_s, tSrQ, tSrK, tSsQ, tSsK, tiled_mma, smem_tiled_copy_Q, smem_tiled_copy_K,
+        smem_thr_copy_Q, smem_thr_copy_K);
+
+    flash::cp_async_wait<0>();
+    __syncthreads();
+    if (n_block > n_block_min) {
+      // Advance gK
+      tKgK.data() = tKgK.data() + (-int(kBlockN * params.k_row_stride));
+      flash::copy</*Is_even_MN=*/true, Is_even_K>(gmem_tiled_copy_QKV, tKgK, tKsK, tKVcKV, tKVpKV);
+      // This cp_async_fence needs to be in the if block, otherwise the synchronization
+      // isn't right and we get race conditions.
+      cute::cp_async_fence();
+    }
+
+    // Reshape acc_s from (MMA=4, MMA_M, MMA_N) to (nrow=(2, MMA_M), ncol=(2, MMA_N))
+    Tensor scores = make_tensor(acc_s.data(), flash::convert_layout_acc_rowcol(acc_s.layout()));
+    if (Is_local && n_block * kBlockN < (m_block + 1) * kBlockM + binfo.actual_seqlen_k - binfo.actual_seqlen_q + params.window_size_right) {
+      flash::apply_mask_local(
+          scores, n_block * kBlockN, binfo.actual_seqlen_k,
+          m_block * kBlockM + (tidx / 32) * 16 + (tidx % 32) / 4,
+          binfo.actual_seqlen_q, kNWarps * 16,
+          params.window_size_left, params.window_size_right);
+    }
+    softmax_rescale_o</*Is_first=*/false, /*Check_inf=*/Is_local>(scores, scores_max, scores_sum, acc_o, params.scale_softmax_log2);
+
+    Tensor rP = flash::convert_type<Element>(scores);
+    // Reshape rP from (nrow=(2, MMA_M), ncol=(2, MMA_N)) to ((2, 2, 2), MMA_M, MMA_N / 2)
+    // if using m16n8k16 or ((2, 2, 1), MMA_M, MMA_N) if using m16n8k8.
+    Tensor tOrP = make_tensor(rP.data(), flash::convert_layout_rowcol_Aregs<Kernel_traits::TiledMma>(rP.layout()));
+
+    flash::gemm_A_in_regs(acc_o, tOrP, tOrVt, tOsVt, tiled_mma, smem_tiled_copy_V, smem_thr_copy_V);
+  }
+
+  // Epilogue
+
+  // Reshape acc_o from (MMA=4, MMA_M, MMA_K) to (nrow=(2, MMA_M), ncol=(2, MMA_K))
+  Tensor acc_o_rowcol = make_tensor(acc_o.data(), flash::convert_layout_acc_rowcol(acc_o.layout()));
+  // if (cute::thread0()) { print(acc_o_rowcol); }
+  Tensor lse = make_fragment_like(scores_sum);
+#pragma unroll
+  for (int mi = 0; mi < size<0>(acc_o_rowcol); ++mi) {
+    float sum = scores_sum(mi);
+    float inv_sum = (sum == 0.f || sum != sum) ? 1.f : 1.f / sum;
+    lse(mi) = (sum == 0.f || sum != sum) ? (Split ? -INFINITY : INFINITY) : scores_max(mi) * params.scale_softmax + __logf(sum);
+    float scale = inv_sum;
+#pragma unroll
+    for (int ni = 0; ni < size<1>(acc_o_rowcol); ++ni) {
+      acc_o_rowcol(mi, ni) *= scale;
+    }
+  }
+  // if (cute::thread0()) { print(lse); }
+  // if (cute::thread0()) { print(acc_o_rowcol); }
+
+  Tensor sOaccum = make_tensor(make_smem_ptr(reinterpret_cast<ElementO*>(smem_)), typename Kernel_traits::SmemLayoutO{});  // (SMEM_M,SMEM_N)
+  // Partition sO to match the accumulator partitioning
+  using SmemTiledCopyO = std::conditional_t<
+      !Split,
+      typename Kernel_traits::SmemCopyAtomO,
+      typename Kernel_traits::SmemCopyAtomOaccum>;
+  auto smem_tiled_copy_Oaccum = make_tiled_copy_C(SmemTiledCopyO{}, tiled_mma);
+  auto smem_thr_copy_Oaccum = smem_tiled_copy_Oaccum.get_thread_slice(tidx);
+  Tensor rO = flash::convert_type<ElementO>(acc_o);
+  Tensor taccOrOaccum = smem_thr_copy_Oaccum.retile_S(rO);          // ((Atom,AtomNum), MMA_M, MMA_N)
+  Tensor taccOsOaccum = smem_thr_copy_Oaccum.partition_D(sOaccum);  // ((Atom,AtomNum),PIPE_M,PIPE_N)
+
+  // sOaccum is larger than sQ, so we need to syncthreads here
+  // TODO: allocate enough smem for sOaccum
+  if constexpr (Split) {
+    __syncthreads();
+  }
+
+  cute::copy(smem_tiled_copy_Oaccum, taccOrOaccum, taccOsOaccum);
+
+  const index_t row_offset_o = binfo.q_offset(params.o_batch_stride, params.o_row_stride, bidb) + m_block * kBlockM * params.o_row_stride + bidh * params.o_head_stride;
+  const index_t row_offset_oaccum = (((n_split_idx * params.b + bidb) * params.h + bidh) * params.seqlen_q + m_block * kBlockM) * params.d_rounded;
+  const index_t row_offset_lseaccum = ((n_split_idx * params.b + bidb) * params.h + bidh) * params.seqlen_q + m_block * kBlockM;
+
+  Tensor gOaccum = make_tensor(make_gmem_ptr(reinterpret_cast<ElementO*>(Split ? params.oaccum_ptr : params.o_ptr) + (Split ? row_offset_oaccum : row_offset_o)),
+                               Shape<Int<kBlockM>, Int<kHeadDim>>{},
+                               make_stride(Split ? kHeadDim : params.o_row_stride, _1{}));
+  Tensor gLSEaccum = make_tensor(make_gmem_ptr(reinterpret_cast<ElementAccum*>(Split ? params.softmax_lseaccum_ptr : params.softmax_lse_ptr) + row_offset_lseaccum),
+                                 Shape<Int<kBlockM>>{}, Stride<_1>{});
+  // if (tidx == 0) { printf("row_offset_o = %d, bidh = %d, gOaccum = %p\n", row_offset_o, bidh, gOaccum.data()); }
+
+  GmemTiledCopyO gmem_tiled_copy_Oaccum;
+  auto gmem_thr_copy_Oaccum = gmem_tiled_copy_Oaccum.get_thread_slice(tidx);
+  Tensor tOsOaccum = gmem_thr_copy_Oaccum.partition_S(sOaccum);  // ((Atom,AtomNum),ATOM_M,ATOM_N)
+  Tensor tOgOaccum = gmem_thr_copy_Oaccum.partition_D(gOaccum);
+
+  __syncthreads();
+
+  Tensor tOrOaccum = make_tensor<ElementO>(shape(tOgOaccum));
+  cute::copy(gmem_tiled_copy_Oaccum, tOsOaccum, tOrOaccum);
+
+  Tensor caccO = make_identity_tensor(Shape<Int<kBlockM>, Int<kHeadDim>>{});  // (BLK_M,BLK_K) -> (blk_m,blk_k)
+  Tensor taccOcO = thr_mma.partition_C(caccO);                                // (MMA,MMA_M,MMA_K)
+  static_assert(decltype(size<0>(taccOcO))::value == 4);
+  // Convert to ((2, 2), MMA_M, MMA_K) then take only the row indices.
+  Tensor taccOcO_row = logical_divide(taccOcO, Shape<_2>{})(make_coord(0, _), _, 0);
+  CUTE_STATIC_ASSERT_V(size(lse) == size(taccOcO_row));  // MMA_M
+  if (get<1>(taccOcO_row(0)) == 0) {
+#pragma unroll
+    for (int mi = 0; mi < size(lse); ++mi) {
+      const int row = get<0>(taccOcO_row(mi));
+      if (row < binfo.actual_seqlen_q - m_block * kBlockM) {
+        gLSEaccum(row) = lse(mi);
+      }
+    }
+  }
+
+  // Construct identity layout for sO
+  Tensor cO = make_identity_tensor(make_shape(size<0>(sOaccum), size<1>(sOaccum)));  // (BLK_M,BLK_K) -> (blk_m,blk_k)
+  // Repeat the partitioning with identity layouts
+  Tensor tOcO = gmem_thr_copy_Oaccum.partition_D(cO);  // (ACPY,ACPY_M,ACPY_K) -> (blk_m,blk_k)
+  Tensor tOpO = make_tensor<bool>(make_shape(size<2>(tOgOaccum)));
+  if (!Is_even_K) {
+#pragma unroll
+    for (int k = 0; k < size(tOpO); ++k) {
+      tOpO(k) = get<1>(tOcO(0, 0, k)) < params.d;
+    }
+  }
+  // Clear_OOB_K must be false since we don't want to write zeros to gmem
+  flash::copy<Is_even_MN, Is_even_K, /*Clear_OOB_MN=*/false, /*Clear_OOB_K=*/false>(
+      gmem_tiled_copy_Oaccum, tOrOaccum, tOgOaccum, tOcO, tOpO, binfo.actual_seqlen_q - m_block * kBlockM);
+  // __syncthreads();
+  // if (cute::thread0()) { print(tOgOaccum); }
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+template <typename Kernel_traits, bool Is_causal, bool Is_local, bool Is_even_MN, bool Is_even_K, bool Return_softmax, typename Params>
 inline __device__ void compute_attn(const Params& params) {
   const int m_block = blockIdx.x;
   // The block index for the batch.
@@ -520,10 +1065,194 @@ inline __device__ void compute_attn(const Params& params) {
   // the attention matrix. This way, as long as we have the batch, head, and the location of
   // the 16 x 32 block within the attention matrix, we can generate the exact same dropout pattern.
 
-  flash::compute_attn_1rowblock<Kernel_traits, Is_causal, Is_even_MN, Is_even_K, Return_softmax>(params, bidb, bidh, m_block);
+  flash::compute_attn_1rowblock<Kernel_traits, Is_causal, Is_local, Is_even_MN, Is_even_K, Return_softmax>(params, bidb, bidh, m_block);
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
+
+template <typename Kernel_traits, bool Is_causal, bool Is_local, bool Is_even_MN, bool Is_even_K, bool Split, bool Append_KV, typename Params>
+inline __device__ void compute_attn_splitkv(const Params& params) {
+  const int m_block = blockIdx.x;
+  // The block index for the batch.
+  const int bidb = Split ? blockIdx.z / params.h : blockIdx.y;
+  // The block index for the head.
+  const int bidh = Split ? blockIdx.z - bidb * params.h : blockIdx.z;
+  const int n_split_idx = Split ? blockIdx.y : 0;
+  const int num_n_splits = Split ? gridDim.y : 1;
+  flash::compute_attn_1rowblock_splitkv<Kernel_traits, Is_causal, Is_local, Is_even_MN, Is_even_K, Split, Append_KV>(params, bidb, bidh, m_block, n_split_idx, num_n_splits);
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+template <typename Kernel_traits, int kBlockM, int Log_max_splits, bool Is_even_K, typename Params>
+inline __device__ void combine_attn_seqk_parallel(const Params& params) {
+  using Element = typename Kernel_traits::Element;
+  using ElementAccum = typename Kernel_traits::ElementAccum;
+  using index_t = typename Kernel_traits::index_t;
+  constexpr int kMaxSplits = 1 << Log_max_splits;
+  constexpr int kHeadDim = Kernel_traits::kHeadDim;
+  constexpr int kNThreads = Kernel_traits::kNThreads;
+
+  static_assert(kMaxSplits <= 128, "kMaxSplits must be <= 128");
+  static_assert(kBlockM == 4 || kBlockM == 8 || kBlockM == 16 || kBlockM == 32, "kBlockM must be 4, 8, 16 or 32");
+  static_assert(kNThreads == 128, "We assume that each block has 128 threads");
+
+  // Shared memory.
+  // kBlockM + 1 instead of kBlockM to reduce bank conflicts.
+  __shared__ ElementAccum sLSE[kMaxSplits][kBlockM + 1];
+
+  // The thread and block index.
+  const int tidx = threadIdx.x;
+  const int bidx = blockIdx.x;
+
+  const index_t row_offset_lse = bidx * kBlockM;
+  Tensor gLSEaccum = make_tensor(make_gmem_ptr(reinterpret_cast<ElementAccum*>(params.softmax_lseaccum_ptr) + row_offset_lse),
+                                 Shape<Int<kMaxSplits>, Int<kBlockM>>{},
+                                 make_stride(params.b * params.h * params.seqlen_q, _1{}));
+  Tensor gLSE = make_tensor(make_gmem_ptr(reinterpret_cast<ElementAccum*>(params.softmax_lse_ptr) + row_offset_lse),
+                            Shape<Int<kBlockM>>{}, Stride<_1>{});
+  constexpr int kNLsePerThread = (kMaxSplits * kBlockM + kNThreads - 1) / kNThreads;
+
+  // Read the LSE values from gmem and store them in shared memory, then tranpose them.
+  constexpr int kRowsPerLoadLSE = kNThreads / kBlockM;
+#pragma unroll
+  for (int l = 0; l < kNLsePerThread; ++l) {
+    const int row = l * kRowsPerLoadLSE + tidx / kBlockM;
+    const int col = tidx % kBlockM;
+    ElementAccum lse = (row < params.num_splits && col < params.b * params.h * params.seqlen_q - bidx * kBlockM) ? gLSEaccum(row, col) : -INFINITY;
+    if (row < kMaxSplits) {
+      sLSE[row][col] = lse;
+    }
+    // if (bidx == 0 && tidx < 32) { printf("tidx = %d, row = %d, col = %d, lse = %f\n", tidx, row, col, lse_accum(l)); }
+  }
+  // if (bidx == 1 && tidx < 32) { printf("tidx = %d, row_offset_lse = %d, lse = %f\n", tidx, row_offset_lse, lse_accum(0)); }
+  __syncthreads();
+  Tensor lse_accum = make_tensor<ElementAccum>(Shape<Int<kNLsePerThread>>{});
+  constexpr int kRowsPerLoadTranspose = std::min(kRowsPerLoadLSE, kMaxSplits);
+  // To make sure that kMaxSplits is within 1 warp: we decide how many elements within kMaxSplits
+  // each thread should hold. If kMaxSplits = 16, then each thread holds 2 elements (128 threads,
+  // 16 rows, so each time we load we can load 8 rows).
+  // constexpr int kThreadsPerSplit = kMaxSplits / kRowsPerLoadTranspose;
+  // static_assert(kThreadsPerSplit <= 32);
+  static_assert(kRowsPerLoadTranspose <= 32);
+  static_assert(kNLsePerThread * kRowsPerLoadTranspose <= kMaxSplits);
+#pragma unroll
+  for (int l = 0; l < kNLsePerThread; ++l) {
+    const int row = l * kRowsPerLoadTranspose + tidx % kRowsPerLoadTranspose;
+    const int col = tidx / kRowsPerLoadTranspose;
+    lse_accum(l) = (row < kMaxSplits && col < kBlockM) ? sLSE[row][col] : -INFINITY;
+    // if (bidx == 0 && tidx < 32) { printf("tidx = %d, row = %d, col = %d, lse = %f\n", tidx, row, col, lse_accum(l)); }
+  }
+
+  // Compute the logsumexp of the LSE along the split dimension.
+  ElementAccum lse_max = lse_accum(0);
+#pragma unroll
+  for (int l = 1; l < kNLsePerThread; ++l) {
+    lse_max = max(lse_max, lse_accum(l));
+  }
+  MaxOp<float> max_op;
+  lse_max = Allreduce<kRowsPerLoadTranspose>::run(lse_max, max_op);
+  lse_max = lse_max == -INFINITY ? 0.0f : lse_max;  // In case all local LSEs are -inf
+  float lse_sum = expf(lse_accum(0) - lse_max);
+#pragma unroll
+  for (int l = 1; l < kNLsePerThread; ++l) {
+    lse_sum += expf(lse_accum(l) - lse_max);
+  }
+  SumOp<float> sum_op;
+  lse_sum = Allreduce<kRowsPerLoadTranspose>::run(lse_sum, sum_op);
+  // For the case where all local lse == -INFINITY, we want to set lse_logsum to INFINITY. Otherwise
+  // lse_logsum is log(0.0) = -INFINITY and we get NaN when we do lse_accum(l) - lse_logsum.
+  ElementAccum lse_logsum = (lse_sum == 0.f || lse_sum != lse_sum) ? INFINITY : logf(lse_sum) + lse_max;
+  // if (bidx == 0 && tidx < 32) { printf("tidx = %d, lse = %f, lse_max = %f, lse_logsum = %f\n", tidx, lse_accum(0), lse_max, lse_logsum); }
+  if (tidx % kRowsPerLoadTranspose == 0 && tidx / kRowsPerLoadTranspose < kBlockM) {
+    gLSE(tidx / kRowsPerLoadTranspose) = lse_logsum;
+  }
+// Store the scales exp(lse - lse_logsum) in shared memory.
+#pragma unroll
+  for (int l = 0; l < kNLsePerThread; ++l) {
+    const int row = l * kRowsPerLoadTranspose + tidx % kRowsPerLoadTranspose;
+    const int col = tidx / kRowsPerLoadTranspose;
+    if (row < params.num_splits && col < kBlockM) {
+      sLSE[row][col] = expf(lse_accum(l) - lse_logsum);
+    }
+  }
+  __syncthreads();
+
+  const index_t row_offset_oaccum = bidx * kBlockM * params.d_rounded;
+  Tensor gOaccum = make_tensor(make_gmem_ptr(reinterpret_cast<ElementAccum*>(params.oaccum_ptr) + row_offset_oaccum),
+                               Shape<Int<kBlockM>, Int<kHeadDim>>{},
+                               Stride<Int<kHeadDim>, _1>{});
+  constexpr int kBlockN = kNThreads / kBlockM;
+  using GmemLayoutAtomOaccum = Layout<Shape<Int<kBlockM>, Int<kBlockN>>, Stride<Int<kBlockN>, _1>>;
+  using GmemTiledCopyOaccum = decltype(make_tiled_copy(Copy_Atom<DefaultCopy, ElementAccum>{},
+                                                       GmemLayoutAtomOaccum{},
+                                                       Layout<Shape<_1, _4>>{}));  // Val layout, 4 vals per store
+  GmemTiledCopyOaccum gmem_tiled_copy_Oaccum;
+  auto gmem_thr_copy_Oaccum = gmem_tiled_copy_Oaccum.get_thread_slice(tidx);
+  Tensor tOgOaccum = gmem_thr_copy_Oaccum.partition_S(gOaccum);
+  Tensor tOrO = make_tensor<ElementAccum>(shape(tOgOaccum));
+  Tensor tOrOaccum = make_tensor<ElementAccum>(shape(tOgOaccum));
+  clear(tOrO);
+
+  // Predicates
+  Tensor cOaccum = make_identity_tensor(Shape<Int<kBlockM>, Int<kHeadDim>>{});
+  // Repeat the partitioning with identity layouts
+  Tensor tOcOaccum = gmem_thr_copy_Oaccum.partition_S(cOaccum);
+  Tensor tOpOaccum = make_tensor<bool>(make_shape(size<2>(tOgOaccum)));
+  if (!Is_even_K) {
+#pragma unroll
+    for (int k = 0; k < size(tOpOaccum); ++k) {
+      tOpOaccum(k) = get<1>(tOcOaccum(0, 0, k)) < params.d;
+    }
+  }
+  // Load Oaccum in then scale and accumulate to O
+  for (int split = 0; split < params.num_splits; ++split) {
+    flash::copy</*Is_even_MN=*/false, Is_even_K>(
+        gmem_tiled_copy_Oaccum, tOgOaccum, tOrOaccum, tOcOaccum, tOpOaccum, params.b * params.h * params.seqlen_q - bidx * kBlockM);
+#pragma unroll
+    for (int m = 0; m < size<1>(tOrOaccum); ++m) {
+      int row = get<0>(tOcOaccum(0, m, 0));
+      ElementAccum lse_scale = sLSE[split][row];
+#pragma unroll
+      for (int k = 0; k < size<2>(tOrOaccum); ++k) {
+#pragma unroll
+        for (int i = 0; i < size<0>(tOrOaccum); ++i) {
+          tOrO(i, m, k) += lse_scale * tOrOaccum(i, m, k);
+        }
+      }
+      // if (cute::thread0()) { printf("lse_scale = %f, %f\n", sLSE[split][0], sLSE[split][1]); print(tOrOaccum); print(tOrO); }
+    }
+    tOgOaccum.data() = tOgOaccum.data() + params.b * params.h * params.seqlen_q * params.d_rounded;
+  }
+  // if (cute::thread0()) { print(tOrO); }
+
+  Tensor rO = flash::convert_type<Element>(tOrO);
+// Write to gO
+#pragma unroll
+  for (int m = 0; m < size<1>(rO); ++m) {
+    const int idx = bidx * kBlockM + get<0>(tOcOaccum(0, m, 0));
+    if (idx < params.b * params.h * params.seqlen_q) {
+      const int batch_idx = idx / (params.h * params.seqlen_q);
+      const int head_idx = (idx - batch_idx * (params.h * params.seqlen_q)) / params.seqlen_q;
+      // The index to the rows of Q
+      const int row = idx - batch_idx * (params.h * params.seqlen_q) - head_idx * params.seqlen_q;
+      auto o_ptr = reinterpret_cast<Element*>(params.o_ptr) + batch_idx * params.o_batch_stride + head_idx * params.o_head_stride + row * params.o_row_stride;
+#pragma unroll
+      for (int k = 0; k < size<2>(rO); ++k) {
+        if (Is_even_K || tOpOaccum(k)) {
+          const int col = get<1>(tOcOaccum(0, m, k));
+          Tensor gO = make_tensor(make_gmem_ptr(o_ptr + col),
+                                  Shape<Int<decltype(size<0>(rO))::value>>{}, Stride<_1>{});
+          // TODO: Should check if this is using vectorized store, but it seems pretty fast
+          copy(rO(_, m, k), gO);
+          // if (bidx == 0 && tidx == 0) { printf("tidx = %d, idx = %d, batch_idx = %d, head_idx = %d, row = %d, col = %d\n", tidx, idx, batch_idx, head_idx, row, col); print(rO(_, m, k)); print(gO); }
+          // reinterpret_cast<uint64_t *>(o_ptr)[col / 4] = recast<uint64_t>(rO)(0, m, k);
+        }
+      }
+    }
+  }
+}
+
 }  // namespace flash
 }  // namespace onnxruntime
 
diff --git a/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_launch_template.h b/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_launch_template.h
index e633ef4d45fbb..87d189a803f8a 100644
--- a/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_launch_template.h
+++ b/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_launch_template.h
@@ -10,9 +10,33 @@
 namespace onnxruntime {
 namespace flash {
 
-template <typename Kernel_traits, bool Is_causal, bool Is_even_MN, bool Is_even_K, bool Return_softmax>
+template <typename Kernel_traits, bool Is_causal, bool Is_local, bool Is_even_MN, bool Is_even_K, bool Return_softmax>
 __global__ void flash_fwd_kernel(Flash_fwd_params params) {
-  flash::compute_attn<Kernel_traits, Is_causal, Is_even_MN, Is_even_K, Return_softmax>(params);
+  static_assert(!(Is_causal && Is_local));  // If Is_local is true, Is_causal should be false
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800
+  flash::compute_attn<Kernel_traits, Is_causal, Is_local, Is_even_MN, Is_even_K, Return_softmax>(params);
+#else
+  (void)params;
+#endif
+}
+
+template <typename Kernel_traits, bool Is_causal, bool Is_local, bool Is_even_MN, bool Is_even_K, bool Split, bool Append_KV>
+__global__ void flash_fwd_splitkv_kernel(Flash_fwd_params params) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800
+  flash::compute_attn_splitkv<Kernel_traits, Is_causal, Is_local, Is_even_MN, Is_even_K, Split, Append_KV>(params);
+#else
+  (void)params;
+#endif
+}
+
+template <typename Kernel_traits, int kBlockM, int Log_max_splits, bool Is_even_K>
+__global__ void flash_fwd_splitkv_combine_kernel(Flash_fwd_params params) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800
+  static_assert(Log_max_splits >= 1);
+  flash::combine_attn_seqk_parallel<Kernel_traits, kBlockM, Log_max_splits, Is_even_K>(params);
+#else
+  (void)params;
+#endif
 }
 
 template <typename Kernel_traits, bool Is_causal>
@@ -25,35 +49,100 @@ void run_flash_fwd(Flash_fwd_params& params, cudaStream_t stream) {
 
   const int num_m_block = (params.seqlen_q + Kernel_traits::kBlockM - 1) / Kernel_traits::kBlockM;
   dim3 grid(num_m_block, params.b, params.h);
-  // We also use is_even_N to set Unpadded in the BlockInfo constructor, so we need to check
-  // for cu_seqlens_q as well.
   const bool is_even_MN = params.cu_seqlens_q == nullptr && params.cu_seqlens_k == nullptr && params.seqlen_k % Kernel_traits::kBlockN == 0 && params.seqlen_q % Kernel_traits::kBlockM == 0;
   const bool is_even_K = params.d == Kernel_traits::kHeadDim;
   BOOL_SWITCH(is_even_MN, IsEvenMNConst, [&] {
     BOOL_SWITCH(is_even_K, IsEvenKConst, [&] {
-      // Will only return softmax if dropout, to reduce compilation time.
-      auto kernel = &flash_fwd_kernel<Kernel_traits, Is_causal, IsEvenMNConst, IsEvenKConst, false>;
-      // auto kernel = &flash_fwd_kernel<Kernel_traits, Is_causal, IsEvenMNConst, true, ReturnSoftmaxConst>;
-      if (smem_size >= 48 * 1024) {
-        cudaFuncSetAttribute(
-            kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size);
-        // ORT_ENFORCE(cudaFuncSetAttribute(
-        //     kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size));
-      }
-      int ctas_per_sm;
-      cudaOccupancyMaxActiveBlocksPerMultiprocessor(
-          &ctas_per_sm, kernel, Kernel_traits::kNThreads, smem_size);
-      // cudaError status_ = cudaOccupancyMaxActiveBlocksPerMultiprocessor(
-      //     &ctas_per_sm, kernel, Kernel_traits::kNThreads, smem_size);
-      //  printf("smem_size = %d, CTAs per SM = %d\n", int(smem_size), ctas_per_sm);
-      kernel<<<grid, Kernel_traits::kNThreads, smem_size, stream>>>(params);
+      BOOL_SWITCH(params.window_size_left >= 0 || params.window_size_right >= 0, Is_local, [&] {
+        // Will only return softmax if dropout, to reduce compilation time.
+        // If not IsEvenKConst, we also set IsEvenMNConst to false to reduce number of templates.
+        // If head dim > 128, set IsEvenMNConst to false to reduce number of templates
+        // If Is_local, set Is_causal to false
+        auto kernel = &flash_fwd_kernel < Kernel_traits, Is_causal && !Is_local, Is_local, IsEvenMNConst && IsEvenKConst && !Is_local && Kernel_traits::kHeadDim <= 128, IsEvenKConst, false > ;
+        // auto kernel = &flash_fwd_kernel<Kernel_traits, Is_causal, IsEvenMNConst, true, ReturnSoftmaxConst>;
+        if (smem_size >= 48 * 1024) {
+          cudaFuncSetAttribute(
+              kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size);
+          // ORT_ENFORCE(cudaFuncSetAttribute(
+          //     kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size));
+        }
+        // int ctas_per_sm;
+        // cudaError status_ = cudaOccupancyMaxActiveBlocksPerMultiprocessor(
+        //     &ctas_per_sm, kernel, Kernel_traits::kNThreads, smem_size);
+        //  printf("smem_size = %d, CTAs per SM = %d\n", int(smem_size), ctas_per_sm);
+        kernel<<<grid, Kernel_traits::kNThreads, smem_size, stream>>>(params);
+      });
+    });
+  });
+}
+
+template <typename Kernel_traits>
+void run_flash_splitkv_fwd(Flash_fwd_params& params, cudaStream_t stream) {
+  static_assert(!Kernel_traits::Is_Q_in_regs, "SplitKV implementation does not support Is_Q_in_regs");
+  static_assert(!Kernel_traits::Share_Q_K_smem, "SplitKV implementation does not support Share_Q_K_smem");
+  constexpr size_t smem_size = Kernel_traits::kSmemSize;
+  const int num_m_block = (params.seqlen_q + Kernel_traits::kBlockM - 1) / Kernel_traits::kBlockM;
+  dim3 grid(num_m_block, params.num_splits > 1 ? params.num_splits : params.b, params.num_splits > 1 ? params.b * params.h : params.h);
+  const bool is_even_MN = params.cu_seqlens_q == nullptr && params.cu_seqlens_k == nullptr && params.seqlen_k % Kernel_traits::kBlockN == 0 && params.seqlen_q % Kernel_traits::kBlockM == 0;
+  const bool is_even_K = params.d == Kernel_traits::kHeadDim;
+  BOOL_SWITCH(params.is_causal, Is_causal, [&] {
+    BOOL_SWITCH(is_even_MN, IsEvenMNConst, [&] {
+      BOOL_SWITCH(is_even_K, IsEvenKConst, [&] {
+        BOOL_SWITCH(params.window_size_left >= 0 || params.window_size_right >= 0, Is_local, [&] {
+          BOOL_SWITCH(params.num_splits > 1, Split, [&] {
+            BOOL_SWITCH(params.knew_ptr != nullptr, Append_KV, [&] {
+              // If Append_KV, then we must have seqlen_offsets, which means cu_seqlens_k != nullptr.
+              // printf("About to launch, Split = %d, Append_KV = %d, knew_ptr = %p\n", Split, Append_KV, params.knew_ptr);
+              auto kernel = &flash_fwd_splitkv_kernel < Kernel_traits, Is_causal && !Is_local, Is_local, IsEvenMNConst && !Append_KV && IsEvenKConst && !Is_local && Kernel_traits::kHeadDim <= 128, IsEvenKConst, Split, Append_KV > ;
+              // auto kernel = &flash_fwd_splitkv_kernel<Kernel_traits, Is_causal, false, true, Split, Append_KV>;
+              // auto kernel = &flash_fwd_splitkv_kernel<Kernel_traits, Is_causal, false, IsEvenKConst>;
+              if (smem_size >= 48 * 1024) {
+                cudaFuncSetAttribute(
+                    kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size);
+              }
+              kernel<<<grid, Kernel_traits::kNThreads, smem_size, stream>>>(params);
+            });
+          });
+        });
+      });
     });
   });
+  if (params.num_splits > 1) {
+    // We want kBlockM to be as small as possible for more parallelism.
+    // With 128 threads we can load 512 elements at a time, so if headdim is divisible by 128, kBlockM = 4.
+    // If headdim is divisible by 64, then we set kBlockM = 8, etc.
+    constexpr static int kBlockM = Kernel_traits::kHeadDim % 128 == 0 ? 4 : (Kernel_traits::kHeadDim % 64 == 0 ? 8 : 16);
+    dim3 grid_combine((params.b * params.h * params.seqlen_q + kBlockM - 1) / kBlockM);
+    BOOL_SWITCH(is_even_K, IsEvenKConst, [&] {
+      if (params.num_splits <= 2) {
+        flash_fwd_splitkv_combine_kernel<Kernel_traits, kBlockM, 1, IsEvenKConst><<<grid_combine, Kernel_traits::kNThreads, 0, stream>>>(params);
+      } else if (params.num_splits <= 4) {
+        flash_fwd_splitkv_combine_kernel<Kernel_traits, kBlockM, 2, IsEvenKConst><<<grid_combine, Kernel_traits::kNThreads, 0, stream>>>(params);
+      } else if (params.num_splits <= 8) {
+        flash_fwd_splitkv_combine_kernel<Kernel_traits, kBlockM, 3, IsEvenKConst><<<grid_combine, Kernel_traits::kNThreads, 0, stream>>>(params);
+      } else if (params.num_splits <= 16) {
+        flash_fwd_splitkv_combine_kernel<Kernel_traits, kBlockM, 4, IsEvenKConst><<<grid_combine, Kernel_traits::kNThreads, 0, stream>>>(params);
+      } else if (params.num_splits <= 32) {
+        flash_fwd_splitkv_combine_kernel<Kernel_traits, kBlockM, 5, IsEvenKConst><<<grid_combine, Kernel_traits::kNThreads, 0, stream>>>(params);
+      } else if (params.num_splits <= 64) {
+        flash_fwd_splitkv_combine_kernel<Kernel_traits, kBlockM, 6, IsEvenKConst><<<grid_combine, Kernel_traits::kNThreads, 0, stream>>>(params);
+      } else if (params.num_splits <= 128) {
+        flash_fwd_splitkv_combine_kernel<Kernel_traits, kBlockM, 7, IsEvenKConst><<<grid_combine, Kernel_traits::kNThreads, 0, stream>>>(params);
+      }
+    });
+  }
+}
+
+template <typename T, int Headdim>
+void run_mha_fwd_splitkv_dispatch(Flash_fwd_params& params, cudaStream_t stream) {
+  constexpr int kBlockM = 64;  // Fixed for all head dimensions
+  constexpr int kBlockN = Headdim <= 64 ? 256 : (Headdim <= 128 ? 128 : 64);
+  run_flash_splitkv_fwd<Flash_fwd_kernel_traits<Headdim, kBlockM, kBlockN, 4, false, false, T>>(params, stream);
 }
 
 template <typename T>
 void run_mha_fwd_hdim32(Flash_fwd_params& params, cudaStream_t stream) {
-  constexpr int Headdim = 32;
+  constexpr static int Headdim = 32;
   BOOL_SWITCH(params.is_causal, Is_causal, [&] {
     run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 128, 128, 4, false, false, T>, Is_causal>(params, stream);
   });
@@ -61,7 +150,7 @@ void run_mha_fwd_hdim32(Flash_fwd_params& params, cudaStream_t stream) {
 
 template <typename T>
 void run_mha_fwd_hdim64(Flash_fwd_params& params, cudaStream_t stream) {
-  constexpr int Headdim = 64;
+  constexpr static int Headdim = 64;
   BOOL_SWITCH(params.is_causal, Is_causal, [&] {
     // Using 8 warps is 18% slower for seqlen=2k, 2 warps is 5% slower
     // Using block size (64 x 256) is 27% slower for seqlen=2k
@@ -97,8 +186,8 @@ void run_mha_fwd_hdim96(Flash_fwd_params& params, cudaStream_t stream) {
 
 template <typename T>
 void run_mha_fwd_hdim128(Flash_fwd_params& params, cudaStream_t stream) {
-  constexpr int Headdim = 128;
-  const bool is_sm8x = params.dprops->major == 8 && params.dprops->minor > 0;
+  constexpr static int Headdim = 128;
+  bool is_sm8x = params.dprops->major == 8 && params.dprops->minor > 0;
   BOOL_SWITCH(params.is_causal, Is_causal, [&] {
     // For sm86 or sm89, 64 x 64 is the fastest for causal (because it's square),
     // and 128 x 32 (48 KB smem) is the fastest for non-causal since we get 2 CTAs per SM.
@@ -124,8 +213,8 @@ void run_mha_fwd_hdim128(Flash_fwd_params& params, cudaStream_t stream) {
 
 template <typename T>
 void run_mha_fwd_hdim160(Flash_fwd_params& params, cudaStream_t stream) {
-  constexpr int Headdim = 160;
-  const bool is_sm8x = params.dprops->major == 8 && params.dprops->minor > 0;
+  constexpr static int Headdim = 160;
+  bool is_sm8x = params.dprops->major == 8 && params.dprops->minor > 0;
   BOOL_SWITCH(params.is_causal, Is_causal, [&] {
     // For A100, H100, 128 x 32 is the fastest.
     // For sm86 or sm89, 64 x 64 is the fastest for causal (because it's square),
@@ -164,12 +253,11 @@ void run_mha_fwd_hdim192(Flash_fwd_params& params, cudaStream_t stream) {
 
 template <typename T>
 void run_mha_fwd_hdim224(Flash_fwd_params& params, cudaStream_t stream) {
-  constexpr size_t Headdim = 224;
-  constexpr size_t threshold = 2 * Headdim * (128 + 2 * 64);
-  size_t max_smem_per_block = params.dprops->sharedMemPerBlockOptin;
+  constexpr static int Headdim = 224;
+  int max_smem_per_block = params.dprops->sharedMemPerBlockOptin;
   //  printf("max_smem_per_block = %d\n", max_smem_per_block);
   BOOL_SWITCH(params.is_causal, Is_causal, [&] {
-    if (max_smem_per_block >= threshold) {  // 112 KB
+    if (max_smem_per_block >= 2 * Headdim * (128 + 2 * 64)) {  // 112 KB
       run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 128, 64, 8, false, false, T>, Is_causal>(params, stream);
     } else {
       run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 64, 64, 4, false, false, T>, Is_causal>(params, stream);
@@ -185,16 +273,14 @@ void run_mha_fwd_hdim224(Flash_fwd_params& params, cudaStream_t stream) {
 
 template <typename T>
 void run_mha_fwd_hdim256(Flash_fwd_params& params, cudaStream_t stream) {
-  constexpr size_t Headdim = 256;
-  constexpr size_t min_threshold = 2 * Headdim * (128 + 2 * 64);
-  constexpr size_t max_threshold = 4 * Headdim * (64 + 2 * 64);
+  constexpr static int Headdim = 256;
   size_t max_smem_per_sm = params.dprops->sharedMemPerMultiprocessor;
   size_t max_smem_per_block = params.dprops->sharedMemPerBlockOptin;
   //  printf("max_smem_per_sm = %d, max_smem_per_block = %d\n", max_smem_per_sm, max_smem_per_block);
   BOOL_SWITCH(params.is_causal, Is_causal, [&] {
     // For A100, we want to run with 128 x 64 (128KB smem).
     // For H100 we want to run with 64 x 64 (96KB smem) since then we can get 2 CTAs per SM.
-    if (max_smem_per_block >= min_threshold && max_smem_per_sm < max_threshold) {
+    if (max_smem_per_block >= 2 * Headdim * (128 + 2 * 64) && max_smem_per_sm < 4 * Headdim * (64 + 2 * 64)) {
       run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 128, 64, 8, false, false, T>, Is_causal>(params, stream);
     } else {
       run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 64, 64, 4, false, false, T>, Is_causal>(params, stream);
diff --git a/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_split_hdim128_fp16_sm80.cu b/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_split_hdim128_fp16_sm80.cu
new file mode 100644
index 0000000000000..68ae2ea759813
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_split_hdim128_fp16_sm80.cu
@@ -0,0 +1,15 @@
+// Copyright (c) 2023, Tri Dao.
+// Splitting the different head dimensions to different files to speed up compilation.
+
+#if USE_FLASH_ATTENTION
+
+#include "contrib_ops/cuda/bert/flash_attention/flash_fwd_launch_template.h"
+
+namespace onnxruntime {
+namespace flash {
+
+template void run_mha_fwd_splitkv_dispatch<cutlass::half_t, 128>(Flash_fwd_params& params, cudaStream_t stream);
+
+}  // namespace flash
+}  // namespace onnxruntime
+#endif
diff --git a/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_split_hdim160_fp16_sm80.cu b/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_split_hdim160_fp16_sm80.cu
new file mode 100644
index 0000000000000..94564a6aba8f3
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_split_hdim160_fp16_sm80.cu
@@ -0,0 +1,15 @@
+// Copyright (c) 2023, Tri Dao.
+// Splitting the different head dimensions to different files to speed up compilation.
+
+#if USE_FLASH_ATTENTION
+
+#include "contrib_ops/cuda/bert/flash_attention/flash_fwd_launch_template.h"
+
+namespace onnxruntime {
+namespace flash {
+
+template void run_mha_fwd_splitkv_dispatch<cutlass::half_t, 160>(Flash_fwd_params& params, cudaStream_t stream);
+
+}  // namespace flash
+}  // namespace onnxruntime
+#endif
diff --git a/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_split_hdim192_fp16_sm80.cu b/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_split_hdim192_fp16_sm80.cu
new file mode 100644
index 0000000000000..ec9e9e738c5b3
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_split_hdim192_fp16_sm80.cu
@@ -0,0 +1,15 @@
+// Copyright (c) 2023, Tri Dao.
+// Splitting the different head dimensions to different files to speed up compilation.
+
+#if USE_FLASH_ATTENTION
+
+#include "contrib_ops/cuda/bert/flash_attention/flash_fwd_launch_template.h"
+
+namespace onnxruntime {
+namespace flash {
+
+template void run_mha_fwd_splitkv_dispatch<cutlass::half_t, 192>(Flash_fwd_params& params, cudaStream_t stream);
+
+}  // namespace flash
+}  // namespace onnxruntime
+#endif
diff --git a/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_split_hdim224_fp16_sm80.cu b/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_split_hdim224_fp16_sm80.cu
new file mode 100644
index 0000000000000..e6c4ff5d95584
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_split_hdim224_fp16_sm80.cu
@@ -0,0 +1,15 @@
+// Copyright (c) 2023, Tri Dao.
+// Splitting the different head dimensions to different files to speed up compilation.
+
+#if USE_FLASH_ATTENTION
+
+#include "contrib_ops/cuda/bert/flash_attention/flash_fwd_launch_template.h"
+
+namespace onnxruntime {
+namespace flash {
+
+template void run_mha_fwd_splitkv_dispatch<cutlass::half_t, 224>(Flash_fwd_params& params, cudaStream_t stream);
+
+}  // namespace flash
+}  // namespace onnxruntime
+#endif
diff --git a/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_split_hdim256_fp16_sm80.cu b/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_split_hdim256_fp16_sm80.cu
new file mode 100644
index 0000000000000..552966852cdbe
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_split_hdim256_fp16_sm80.cu
@@ -0,0 +1,15 @@
+// Copyright (c) 2023, Tri Dao.
+// Splitting the different head dimensions to different files to speed up compilation.
+
+#if USE_FLASH_ATTENTION
+
+#include "contrib_ops/cuda/bert/flash_attention/flash_fwd_launch_template.h"
+
+namespace onnxruntime {
+namespace flash {
+
+template void run_mha_fwd_splitkv_dispatch<cutlass::half_t, 256>(Flash_fwd_params& params, cudaStream_t stream);
+
+}  // namespace flash
+}  // namespace onnxruntime
+#endif
diff --git a/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_split_hdim32_fp16_sm80.cu b/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_split_hdim32_fp16_sm80.cu
new file mode 100644
index 0000000000000..e9f191a4828d6
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_split_hdim32_fp16_sm80.cu
@@ -0,0 +1,15 @@
+// Copyright (c) 2023, Tri Dao.
+// Splitting the different head dimensions to different files to speed up compilation.
+
+#if USE_FLASH_ATTENTION
+
+#include "contrib_ops/cuda/bert/flash_attention/flash_fwd_launch_template.h"
+
+namespace onnxruntime {
+namespace flash {
+
+template void run_mha_fwd_splitkv_dispatch<cutlass::half_t, 32>(Flash_fwd_params& params, cudaStream_t stream);
+
+}  // namespace flash
+}  // namespace onnxruntime
+#endif
diff --git a/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_split_hdim64_fp16_sm80.cu b/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_split_hdim64_fp16_sm80.cu
new file mode 100644
index 0000000000000..d628a556680ad
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_split_hdim64_fp16_sm80.cu
@@ -0,0 +1,15 @@
+// Copyright (c) 2023, Tri Dao.
+// Splitting the different head dimensions to different files to speed up compilation.
+
+#if USE_FLASH_ATTENTION
+
+#include "contrib_ops/cuda/bert/flash_attention/flash_fwd_launch_template.h"
+
+namespace onnxruntime {
+namespace flash {
+
+template void run_mha_fwd_splitkv_dispatch<cutlass::half_t, 64>(Flash_fwd_params& params, cudaStream_t stream);
+
+}  // namespace flash
+}  // namespace onnxruntime
+#endif
diff --git a/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_split_hdim96_fp16_sm80.cu b/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_split_hdim96_fp16_sm80.cu
new file mode 100644
index 0000000000000..88b6cc0fb1e22
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_split_hdim96_fp16_sm80.cu
@@ -0,0 +1,15 @@
+// Copyright (c) 2023, Tri Dao.
+// Splitting the different head dimensions to different files to speed up compilation.
+
+#if USE_FLASH_ATTENTION
+
+#include "contrib_ops/cuda/bert/flash_attention/flash_fwd_launch_template.h"
+
+namespace onnxruntime {
+namespace flash {
+
+template void run_mha_fwd_splitkv_dispatch<cutlass::half_t, 96>(Flash_fwd_params& params, cudaStream_t stream);
+
+}  // namespace flash
+}  // namespace onnxruntime
+#endif
diff --git a/onnxruntime/contrib_ops/cuda/bert/flash_attention/kernel_traits.h b/onnxruntime/contrib_ops/cuda/bert/flash_attention/kernel_traits.h
index 0c967faa85c45..1c0ed7f2fc2e8 100644
--- a/onnxruntime/contrib_ops/cuda/bert/flash_attention/kernel_traits.h
+++ b/onnxruntime/contrib_ops/cuda/bert/flash_attention/kernel_traits.h
@@ -111,7 +111,8 @@ struct Flash_fwd_kernel_traits : public Base {
   using SmemLayoutO = decltype(tile_to_shape(
       SmemLayoutAtomO{},
       Shape<Int<kBlockM>, Int<kHeadDim>>{}));
-  using SmemCopyAtomO = Copy_Atom<DefaultCopy, elem_type>;
+  using SmemCopyAtomO = Copy_Atom<DefaultCopy, Element>;
+  using SmemCopyAtomOaccum = Copy_Atom<DefaultCopy, ElementAccum>;
 
   static constexpr int kSmemQCount = cute::size(SmemLayoutQ{});
   static constexpr int kSmemKVCount = cute::size(SmemLayoutKV{}) * 2;
@@ -139,18 +140,35 @@ struct Flash_fwd_kernel_traits : public Base {
       DefaultCopy>;
   using GmemTiledCopyQKV = decltype(make_tiled_copy(Copy_Atom<Gmem_copy_struct, elem_type>{},
                                                     GmemLayoutAtom{},
-                                                    Layout<Shape<_1, _8>>{}));  // Val layout, 8 vals per read
+                                                    cute::Layout<cute::Shape<_1, _8>>{}));  // Val layout, 8 vals per read
   using GmemTiledCopyO = decltype(make_tiled_copy(Copy_Atom<DefaultCopy, elem_type>{},
                                                   GmemLayoutAtom{},
-                                                  Layout<Shape<_1, _8>>{}));  // Val layout, 8 vals per store
+                                                  cute::Layout<cute::Shape<_1, _8>>{}));  // Val layout, 8 vals per store
   static constexpr int kGmemThreadsPerRowP = kBlockN / kGmemElemsPerLoad;
   static_assert(kNThreads % kGmemThreadsPerRowP == 0, "kNThreads must be a multiple of kGmemThreadsPerRowP");
-  using GmemLayoutAtomP = Layout<Shape<Int<kNThreads / kGmemThreadsPerRowP>, Int<kGmemThreadsPerRowP>>,
-                                 Stride<Int<kGmemThreadsPerRowP>, _1>>;
+  using GmemLayoutAtomP = cute::Layout<cute::Shape<cute::Int<kNThreads / kGmemThreadsPerRowP>, cute::Int<kGmemThreadsPerRowP>>,
+                                       cute::Stride<cute::Int<kGmemThreadsPerRowP>, _1>>;
 
   using GmemTiledCopyP = decltype(make_tiled_copy(Copy_Atom<DefaultCopy, elem_type>{},
                                                   GmemLayoutAtomP{},
-                                                  Layout<Shape<_1, _8>>{}));  // Val layout, 8 vals per store
+                                                  cute::Layout<cute::Shape<_1, _8>>{}));  // Val layout, 8 vals per store
+
+  using GmemLayoutAtomOaccum = std::conditional_t<
+      kBlockKSmem == 32,
+      cute::Layout<cute::Shape<_16, _8>,  // Thread layout, 8 threads per row
+                   cute::Stride<_8, _1>>,
+      cute::Layout<cute::Shape<_8, _16>,  // Thread layout, 16 threads per row
+                   cute::Stride<_16, _1>>>;
+  using GmemTiledCopyOaccum = decltype(make_tiled_copy(Copy_Atom<DefaultCopy, ElementAccum>{},
+                                                       GmemLayoutAtomOaccum{},
+                                                       Layout<Shape<_1, _4>>{}));  // Val layout, 4 vals per store
+  using GmemLayoutAtomRotcossin = GmemLayoutAtom;
+  using GmemTiledCopyRotcossin = decltype(make_tiled_copy(Copy_Atom<UniversalCopy<uint64_t>, Element>{},
+                                                          GmemLayoutAtomRotcossin{},
+                                                          Layout<Shape<_1, _4>>{}));  // Val layout, 4 vals per load
+  using GmemTiledCopyRotcossinCont = decltype(make_tiled_copy(Copy_Atom<DefaultCopy, Element>{},
+                                                              GmemLayoutAtomRotcossin{},
+                                                              Layout<Shape<_1, _8>>{}));  // Val layout, 8 vals per load
 };
 
 // Is_V_in_regs is an option to reduce smem usage, but will increase register pressue.
@@ -289,13 +307,13 @@ struct Flash_bwd_kernel_traits : public Base {
   static constexpr int kSmemdSCount = cute::size(SmemLayoutPdS{});
   static constexpr int kSmemPCount = cute::size(SmemLayoutPdS{});
   static constexpr int kSmemdQCount = cute::size(SmemLayoutdQ{});
-  static constexpr int kSmemdPsumCount = kBlockM;
+  //   static constexpr int kSmemdPsumCount = kBlockM;
   static constexpr int kSmemQdOSize = kSmemQdOCount * sizeof(Element);
   static constexpr int kSmemKVSize = kSmemKVCount * sizeof(Element);
   static constexpr int kSmemdSSize = kSmemdSCount * sizeof(Element);
   static constexpr int kSmemPSize = kSmemPCount * sizeof(Element);
   static constexpr int kSmemdQSize = kSmemdQCount * sizeof(Element);
-  static constexpr int kSmemdPsumSize = kSmemdPsumCount * sizeof(ElementAccum);
+  //   static constexpr int kSmemdPsumSize = kSmemdPsumCount * sizeof(ElementAccum);
   static constexpr int kSmemSize = kSmemQdOSize + (!Is_V_in_regs
                                                        ? kSmemKVSize + kSmemdSSize + std::max(kSmemPSize, kSmemdQSize)
                                                        : std::max(kSmemKVSize, kSmemKVSize / 2 + kSmemdSSize + std::max(kSmemPSize, kSmemdQSize)));
diff --git a/onnxruntime/contrib_ops/cuda/bert/flash_attention/softmax.h b/onnxruntime/contrib_ops/cuda/bert/flash_attention/softmax.h
index 842edf3a98a86..8017f83bbb01d 100644
--- a/onnxruntime/contrib_ops/cuda/bert/flash_attention/softmax.h
+++ b/onnxruntime/contrib_ops/cuda/bert/flash_attention/softmax.h
@@ -139,10 +139,11 @@ inline __device__ void apply_mask(Tensor<Engine, Layout>& tensor, const int max_
   }
 }
 
-template <typename Engine, typename Layout>
-inline __device__ void apply_mask_causal(Tensor<Engine, Layout>& tensor, const int col_idx_offset_,
-                                         const int max_seqlen_k, const int row_idx_offset_,
-                                         const int max_seqlen_q, const int warp_row_stride) {
+template <bool HasWSLeft = true, typename Engine, typename Layout>
+inline __device__ void apply_mask_local(Tensor<Engine, Layout>& tensor, const int col_idx_offset_,
+                                        const int max_seqlen_k, const int row_idx_offset_,
+                                        const int max_seqlen_q, const int warp_row_stride,
+                                        const int window_size_left, const int window_size_right) {
   // tensor has shape (ncol=(2, MMA_M), nrow=(2, MMA_N))
   static_assert(Layout::rank == 2, "Only support 2D Tensor");
   const int lane_id = threadIdx.x % 32;
@@ -155,14 +156,15 @@ inline __device__ void apply_mask_causal(Tensor<Engine, Layout>& tensor, const i
 #pragma unroll
     for (int i = 0; i < size<0, 0>(tensor); ++i) {
       const int row_idx = row_idx_base + i * 8;
-      const int col_idx_limit = std::min(max_seqlen_k, row_idx + 1 + max_seqlen_k - max_seqlen_q);
+      const int col_idx_limit_left = std::max(0, row_idx + max_seqlen_k - max_seqlen_q - window_size_left);
+      const int col_idx_limit_right = std::min(max_seqlen_k, row_idx + 1 + max_seqlen_k - max_seqlen_q + window_size_right);
 #pragma unroll
       for (int nj = 0; nj < size<1, 1>(tensor); ++nj) {
         const int col_idx_base = col_idx_offset + nj * 8;
 #pragma unroll
         for (int j = 0; j < size<1, 0>(tensor); ++j) {
           const int col_idx = col_idx_base + j;
-          if (col_idx >= col_idx_limit) {
+          if (col_idx >= col_idx_limit_right || (HasWSLeft && col_idx < col_idx_limit_left)) {
             tensor(make_coord(i, mi), make_coord(j, nj)) = -INFINITY;
           }
         }
@@ -176,6 +178,15 @@ inline __device__ void apply_mask_causal(Tensor<Engine, Layout>& tensor, const i
   }
 }
 
+template <typename Engine, typename Layout>
+inline __device__ void apply_mask_causal(Tensor<Engine, Layout>& tensor, const int col_idx_offset_,
+                                         const int max_seqlen_k, const int row_idx_offset_,
+                                         const int max_seqlen_q, const int warp_row_stride) {
+  // Causal masking is equivalent to local masking with window_size_left = infinity and window_size_right = 0
+  apply_mask_local</*HasWSLeft=*/false>(tensor, col_idx_offset_, max_seqlen_k, row_idx_offset_,
+                                        max_seqlen_q, warp_row_stride, -1, 0);
+}
+
 template <typename Engine0, typename Layout0, typename Engine1, typename Layout1>
 inline __device__ void apply_mask_causal_w_idx(
     Tensor<Engine0, Layout0>& tensor, Tensor<Engine1, Layout1> const& idx_rowcol,
diff --git a/onnxruntime/contrib_ops/cuda/bert/flash_attention/utils.h b/onnxruntime/contrib_ops/cuda/bert/flash_attention/utils.h
index 49ee687419d0e..271112c5e890a 100644
--- a/onnxruntime/contrib_ops/cuda/bert/flash_attention/utils.h
+++ b/onnxruntime/contrib_ops/cuda/bert/flash_attention/utils.h
@@ -96,46 +96,6 @@ inline __device__ uint32_t convert_relu2<cutlass::bfloat16_t>(const float2 x) {
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
-template <typename T>
-inline __device__ float2 half2_unpack(uint32_t a);
-
-template <>
-inline __device__ float2 half2_unpack<__half>(uint32_t a) {
-  return __half22float2(reinterpret_cast<__half2(&)>(a));
-}
-
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800
-template <>
-inline __device__ float2 half2_unpack<__nv_bfloat16>(uint32_t a) {
-  return __bfloat1622float2(reinterpret_cast<__nv_bfloat162(&)>(a));
-}
-#endif
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-// Convert two half2's or bf162's into float, then take their dot product.
-template <typename T>
-inline __device__ float hfma2_to_float(const uint32_t a, const uint32_t b) {
-  float2 af = flash::half2_unpack<T>(a);
-  float2 bf = flash::half2_unpack<T>(b);
-  return af.x * bf.x + af.y * bf.y;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-// Converted two vectors of 8 half's or bf16's into float, then take their dot product.
-template <typename T>
-inline __device__ float hmulsum8(const uint4 a, const uint4 b) {
-  float sum;
-  sum = flash::hfma2_to_float<T>(a.x, b.x);
-  sum += flash::hfma2_to_float<T>(a.y, b.y);
-  sum += flash::hfma2_to_float<T>(a.z, b.z);
-  sum += flash::hfma2_to_float<T>(a.w, b.w);
-  return sum;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
 template <typename T>
 struct MaxOp {
   __device__ inline T operator()(T const& x, T const& y) { return x > y ? x : y; }
@@ -245,7 +205,10 @@ inline __device__ auto convert_layout_acc_rowcol(Layout acc_layout) {
   static_assert(decltype(size<0>(acc_layout))::value == 4);
   static_assert(decltype(rank(acc_layout))::value == 3);
   auto l = logical_divide(acc_layout, Shape<_2>{});  // ((2, 2), MMA_M, MMA_N)
-  return make_layout(make_layout(get<0, 1>(l), get<1>(l)), make_layout(get<0, 0>(l), get<2>(l)));
+                                                     // TD [2023-08-13]: Idk why but get<0, 1>(l) doesn't work for Cutlass 3.2, I'm getting
+  // "int_tuple.hpp(74): error: conversion to inaccessible base class"
+  // return make_layout(make_layout(get<0, 1>(l), get<1>(l)), make_layout(get<0, 0>(l), get<2>(l)));
+  return make_layout(make_layout(get<1>(get<0>(l)), get<1>(l)), make_layout(get<0>(get<0>(l)), get<2>(l)));
 };
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -261,9 +224,13 @@ inline __device__ auto convert_layout_rowcol_Aregs(Layout rowcol_layout) {
   static_assert(mma_shape_K == 8 || mma_shape_K == 16);
   constexpr int MMA_N_divisor = mma_shape_K == 8 ? 1 : 2;
   auto l = logical_divide(rowcol_layout, Shape<X, Shape<X, Int<MMA_N_divisor>>>{});  // ((2, MMA_M), (2, (2, MMA_N / 2)))
-  return make_layout(make_layout(get<1, 0>(l), get<0, 0>(l), get<1, 1, 0>(l)),
-                     get<0, 1>(l),
-                     get<1, 1, 1>(l));
+                                                                                     // TD [2023-08-13]: Same error as above on Cutlass 3.2
+  // return make_layout(make_layout(get<1, 0>(l), get<0, 0>(l), get<1, 1, 0>(l)),
+  //                    get<0, 1>(l),
+  //                    get<1, 1, 1>(l));
+  return make_layout(make_layout(get<0>(get<1>(l)), get<0>(get<0>(l)), get<0>(get<1>(get<1>(l)))),
+                     get<1>(get<0>(l)),
+                     get<1>(get<1>(get<1>(l))));
 };
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -338,9 +305,9 @@ CUTE_HOST_DEVICE void cp_async_wait() {
 template <bool Is_even_MN = true, bool Is_even_K = true, bool Clear_OOB_MN = false, bool Clear_OOB_K = true,
           typename TiledCopy, typename Engine0, typename Layout0, typename Engine1, typename Layout1,
           typename Engine2, typename Layout2, typename Engine3, typename Layout3>
-inline __device__ void copy(TiledCopy thr_copy, Tensor<Engine0, Layout0> const& S,
+inline __device__ void copy(TiledCopy tiled_copy, Tensor<Engine0, Layout0> const& S,
                             Tensor<Engine1, Layout1>& D, Tensor<Engine2, Layout2> const& identity_MN,
-                            Tensor<Engine3, Layout3> const& predicate_K, int max_MN = 0) {
+                            Tensor<Engine3, Layout3> const& predicate_K, const int max_MN = 0) {
   CUTE_STATIC_ASSERT_V(rank(S) == Int<3>{});
   CUTE_STATIC_ASSERT_V(rank(D) == Int<3>{});
   CUTE_STATIC_ASSERT_V(size<0>(S) == size<0>(D));  // MMA
@@ -354,13 +321,176 @@ inline __device__ void copy(TiledCopy thr_copy, Tensor<Engine0, Layout0> const&
 #pragma unroll
       for (int k = 0; k < size<2>(S); ++k) {
         if (Is_even_K || predicate_K(k)) {
-          copy(thr_copy, S(_, m, k), D(_, m, k));
+          cute::copy(tiled_copy, S(_, m, k), D(_, m, k));
         } else if (Clear_OOB_K) {
-          clear(D(_, m, k));
+          cute::clear(D(_, m, k));
         }
       }
     } else if (Clear_OOB_MN) {
-      clear(D(_, m, _));
+      cute::clear(D(_, m, _));
+    }
+  }
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+template <bool Is_even_K = true,
+          typename Engine0, typename Layout0, typename Engine1, typename Layout1,
+          typename Engine2, typename Layout2, typename Engine3, typename Layout3>
+inline __device__ void copy_w_min_idx(Tensor<Engine0, Layout0> const& S,
+                                      Tensor<Engine1, Layout1>& D, Tensor<Engine2, Layout2> const& identity_MN,
+                                      Tensor<Engine3, Layout3> const& predicate_K,
+                                      const int max_MN = 0, const int min_MN = 0) {
+  CUTE_STATIC_ASSERT_V(rank(S) == Int<3>{});
+  CUTE_STATIC_ASSERT_V(rank(D) == Int<3>{});
+  CUTE_STATIC_ASSERT_V(size<0>(S) == size<0>(D));  // MMA
+  CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(D));  // MMA_M
+  CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(D));  // MMA_K
+// if (threadIdx.x == 0 && blockIdx.z == 0) { printf("blockIdx.y = %d, max_MN = %d, min_MN = %d\n", blockIdx.y, max_MN, min_MN); }
+#pragma unroll
+  for (int m = 0; m < size<1>(S); ++m) {
+    // if (threadIdx.x == 0 && blockIdx.z == 0) { printf("blockIdx.y = %d, m = %d\n", blockIdx.y, get<0>(identity_MN(0, m, 0))); }
+    if (get<0>(identity_MN(0, m, 0)) >= min_MN && get<0>(identity_MN(0, m, 0)) < max_MN) {
+// if (threadIdx.x == 0 && blockIdx.z == 0) { printf("Inner loop, blockIdx.y = %d, m = %d\n", blockIdx.y, get<0>(identity_MN(0, m, 0))); }
+#pragma unroll
+      for (int k = 0; k < size<2>(S); ++k) {
+        if (Is_even_K || predicate_K(k)) {
+          cute::copy(S(_, m, k), D(_, m, k));
+        }
+      }
+    }
+  }
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+template <bool Is_even_K = true, bool Clear_OOB_K = true,
+          typename Engine0, typename Layout0, typename Engine1, typename Layout1,
+          typename Engine2, typename Layout2, typename Engine3, typename Layout3>
+inline __device__ void copy_rotary_interleaved(Tensor<Engine0, Layout0> const& S,
+                                               Tensor<Engine1, Layout1>& D,
+                                               Tensor<Engine2, Layout2> const& Cos,
+                                               Tensor<Engine2, Layout2> const& Sin,
+                                               Tensor<Engine3, Layout3> const& identity_MN,
+                                               const int max_MN, const int min_MN,
+                                               const int dim, const int rotary_dim) {
+  CUTE_STATIC_ASSERT_V(rank(S) == Int<3>{});
+  CUTE_STATIC_ASSERT_V(rank(D) == Int<3>{});
+  CUTE_STATIC_ASSERT_V(size<0>(S) == size<0>(D));      // MMA
+  CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(D));      // MMA_M
+  CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(D));      // MMA_K
+  CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(Cos));    // MMA_M
+  CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(Cos));    // MMA_K
+  CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(Sin));    // MMA_M
+  CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(Sin));    // MMA_K
+  CUTE_STATIC_ASSERT_V(size<0>(Cos) == size<0>(Sin));  // MMA_K
+  static_assert(decltype(size<0>(S))::value == decltype(size<0>(Cos))::value * 2);
+  static_assert(decltype(size<0>(Cos))::value % 2 == 0);  // Since we do fast conversion from fp16/bf16 to fp32
+  Tensor rCos = make_fragment_like(Cos);
+  Tensor rSin = make_fragment_like(Sin);
+  Tensor rS = make_fragment_like(S);
+#pragma unroll
+  for (int m = 0; m < size<1>(S); ++m) {
+    if (get<0>(identity_MN(0, m, 0)) >= min_MN && get<0>(identity_MN(0, m, 0)) < max_MN) {
+#pragma unroll
+      for (int k = 0; k < size<2>(S); ++k) {
+        if (Is_even_K || get<1>(identity_MN(0, 0, k)) < dim) {
+          cute::copy(S(_, m, k), rS(_, m, k));
+          if (get<1>(identity_MN(0, 0, k)) < rotary_dim) {
+            cute::copy(Cos(_, m, k), rCos(_, m, k));
+            cute::copy(Sin(_, m, k), rSin(_, m, k));
+            Tensor S_fp32 = convert_type<float>(rS(_, m, k));
+            Tensor cos_fp32 = convert_type<float>(rCos(_, m, k));
+            Tensor sin_fp32 = convert_type<float>(rSin(_, m, k));
+#pragma unroll
+            for (int i = 0; i < size<0>(rS) / 2; ++i) {
+              float real = S_fp32(2 * i) * cos_fp32(i) - S_fp32(2 * i + 1) * sin_fp32(i);
+              float imag = S_fp32(2 * i) * sin_fp32(i) + S_fp32(2 * i + 1) * cos_fp32(i);
+              S_fp32(2 * i) = real;
+              S_fp32(2 * i + 1) = imag;
+            }
+            // Idk but I need to copy for the convert_type to work
+            Tensor S_fp32_copy = make_fragment_like(S_fp32);
+            cute::copy(S_fp32, S_fp32_copy);
+            using T = typename Engine0::value_type;
+            Tensor S_og_type = convert_type<T>(S_fp32_copy);
+            cute::copy(S_og_type, rS(_, m, k));
+          }
+          cute::copy(rS(_, m, k), D(_, m, k));
+        } else if (Clear_OOB_K) {
+          cute::clear(D(_, m, k));
+        }
+      }
+    }
+  }
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+template <bool Is_even_K = true, bool Clear_OOB_K = true,
+          typename Engine0, typename Layout0, typename Engine1, typename Layout1,
+          typename Engine2, typename Layout2, typename Engine3, typename Layout3>
+inline __device__ void copy_rotary_contiguous(Tensor<Engine0, Layout0> const& S,
+                                              Tensor<Engine1, Layout1>& D,
+                                              Tensor<Engine2, Layout2> const& Cos,
+                                              Tensor<Engine2, Layout2> const& Sin,
+                                              Tensor<Engine3, Layout3> const& identity_MN,
+                                              const int max_MN, const int min_MN,
+                                              const int dim, const int rotary_dim) {
+  CUTE_STATIC_ASSERT_V(rank(S) == Int<3>{});
+  CUTE_STATIC_ASSERT_V(rank(D) == Int<3>{});
+  CUTE_STATIC_ASSERT_V(size<0>(S) == size<0>(D));    // MMA
+  CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(D));    // MMA_M
+  CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(D));    // MMA_K
+  CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(Cos));  // MMA_M
+  CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(Cos));  // MMA_K
+  CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(Sin));  // MMA_M
+  CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(Sin));  // MMA_K
+  CUTE_STATIC_ASSERT_V(size<0>(S) == size<0>(Cos));  // MMA
+  CUTE_STATIC_ASSERT_V(size<0>(Cos) == size<0>(Sin));
+  static_assert(decltype(size<0>(Cos))::value % 2 == 0);  // Since we do fast conversion from fp16/bf16 to fp32
+  Tensor rCos = make_fragment_like(Cos);
+  Tensor rSin = make_fragment_like(Sin);
+  Tensor rS = make_fragment_like(S);
+  Tensor rS_other = make_fragment_like(rS(_, 0, 0));
+#pragma unroll
+  for (int m = 0; m < size<1>(S); ++m) {
+    if (get<0>(identity_MN(0, m, 0)) >= min_MN && get<0>(identity_MN(0, m, 0)) < max_MN) {
+#pragma unroll
+      for (int k = 0; k < size<2>(S); ++k) {
+        if (Is_even_K || get<1>(identity_MN(0, 0, k)) < dim) {
+          cute::copy(S(_, m, k), rS(_, m, k));
+          if (get<1>(identity_MN(0, 0, k)) < rotary_dim) {
+            const bool is_left = get<1>(identity_MN(0, 0, k)) < rotary_dim / 2;
+            Tensor gS_other = make_tensor(S(_, m, k).data() + (is_left ? rotary_dim / 2 : -rotary_dim / 2), S(_, m, k).layout());
+            cute::copy(gS_other, rS_other);
+            // if (cute::thread0()) { print_tensor(rS(_, m, k)); print_tensor(rS_other); }
+            Tensor gCos = make_tensor(Cos(_, m, k).data() + (is_left ? 0 : -rotary_dim / 2), Cos(_, m, k).layout());
+            Tensor gSin = make_tensor(Sin(_, m, k).data() + (is_left ? 0 : -rotary_dim / 2), Sin(_, m, k).layout());
+            cute::copy(gCos, rCos(_, m, k));
+            cute::copy(gSin, rSin(_, m, k));
+            // if (cute::thread0()) { print_tensor(rCos(_, m, k)); print_tensor(rSin(_, m, k)); }
+            Tensor S_fp32 = convert_type<float>(rS(_, m, k));
+            Tensor S_other_fp32 = convert_type<float>(rS_other);
+            Tensor cos_fp32 = convert_type<float>(rCos(_, m, k));
+            Tensor sin_fp32 = convert_type<float>(rSin(_, m, k));
+#pragma unroll
+            for (int i = 0; i < size<0>(rS); ++i) {
+              S_fp32(i) = S_fp32(i) * cos_fp32(i) + S_other_fp32(i) * (is_left ? -sin_fp32(i) : sin_fp32(i));
+            }
+            // Idk but I need to copy for the convert_type to work
+            Tensor S_fp32_copy = make_fragment_like(S_fp32);
+            cute::copy(S_fp32, S_fp32_copy);
+            using T = typename Engine0::value_type;
+            Tensor S_og_type = convert_type<T>(S_fp32_copy);
+            cute::copy(S_og_type, rS(_, m, k));
+            // if (cute::thread0()) { print_tensor(rS(_, m, k)); }
+          }
+          cute::copy(rS(_, m, k), D(_, m, k));
+        } else if (Clear_OOB_K) {
+          cute::clear(D(_, m, k));
+        }
+      }
     }
   }
 }
diff --git a/onnxruntime/contrib_ops/cuda/bert/group_query_attention.cc b/onnxruntime/contrib_ops/cuda/bert/group_query_attention.cc
new file mode 100644
index 0000000000000..93892169f6c79
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/bert/group_query_attention.cc
@@ -0,0 +1,240 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/providers/cuda/cuda_common.h"
+#include "core/platform/env_var_utils.h"
+#include "contrib_ops/cuda/bert/group_query_attention_impl.h"
+#include "contrib_ops/cuda/bert/group_query_attention.h"
+#include "contrib_ops/cuda/bert/group_query_attention_helper.h"
+#include "contrib_ops/cuda/bert/cutlass_fmha/memory_efficient_attention.h"
+#include "contrib_ops/cuda/bert/flash_attention/flash_api.h"
+
+using namespace onnxruntime::cuda;
+using namespace ::onnxruntime::common;
+using namespace ONNX_NAMESPACE;
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+#define REGISTER_KERNEL_TYPED(T)                                         \
+  ONNX_OPERATOR_TYPED_KERNEL_EX(                                         \
+      GroupQueryAttention,                                               \
+      kMSDomain,                                                         \
+      1,                                                                 \
+      T,                                                                 \
+      kCudaExecutionProvider,                                            \
+      (*KernelDefBuilder::Create())                                      \
+          .TypeConstraint("T", DataTypeImpl::GetTensorType<T>())         \
+          .TypeConstraint("M", {DataTypeImpl::GetTensorType<int32_t>()}) \
+          .MayInplace(3, 1)                                              \
+          .MayInplace(4, 2)                                              \
+          .InputMemoryType(OrtMemTypeCPUInput, 6),                       \
+      GroupQueryAttention<T>);
+
+// REGISTER_KERNEL_TYPED(float)
+REGISTER_KERNEL_TYPED(MLFloat16)
+
+template <typename T>
+GroupQueryAttention<T>::GroupQueryAttention(const OpKernelInfo& info)
+    : CudaKernel(info) {
+  int64_t num_heads = 0;
+  int64_t kv_num_heads = 0;
+  ORT_ENFORCE(info.GetAttr("num_heads", &num_heads).IsOK() && num_heads > 0);
+  ORT_ENFORCE(info.GetAttr("kv_num_heads", &kv_num_heads).IsOK() && kv_num_heads > 0 && num_heads % kv_num_heads == 0);
+  num_heads_ = static_cast<int>(num_heads);
+  kv_num_heads_ = static_cast<int>(kv_num_heads);
+  is_past_bsnh_ = false;  // info.GetAttrOrDefault<int64_t>("is_past_bsnh", 1) == 1;
+  local_window_size_ = static_cast<int>(info.GetAttrOrDefault<int64_t>("local_window_size", -1));
+  scale_ = info.GetAttrOrDefault<float>("scale", 0.0f);
+
+#if USE_FLASH_ATTENTION
+  disable_flash_attention_ = sizeof(T) != 2 ||
+                             ParseEnvironmentVariableWithDefault<bool>(attention::kDisableFlashAttention, false);
+#else
+  disable_flash_attention_ = true;
+#endif
+
+#if USE_MEMORY_EFFICIENT_ATTENTION
+  disable_memory_efficient_attention_ = sizeof(T) != 2 ||
+                                        ParseEnvironmentVariableWithDefault<bool>(attention::kDisableMemoryEfficientAttention, false);
+#else
+  disable_memory_efficient_attention_ = true;
+#endif
+}
+
+template <typename T>
+Status GroupQueryAttention<T>::ComputeInternal(OpKernelContext* context) const {
+  const Tensor* query = context->Input<Tensor>(0);
+  const Tensor* key = context->Input<Tensor>(1);
+  const Tensor* value = context->Input<Tensor>(2);
+  const Tensor* past_key = context->Input<Tensor>(3);
+  const Tensor* past_value = context->Input<Tensor>(4);
+  const Tensor* seqlens_k = context->Input<Tensor>(5);
+  const Tensor* total_seqlen = context->Input<Tensor>(6);
+
+  auto& device_prop = GetDeviceProp();
+  GroupQueryAttentionParameters parameters;
+  typedef typename ToCudaType<T>::MappedType CudaT;
+  GroupQueryAttentionData<CudaT> data;
+
+  ORT_RETURN_IF_ERROR(group_query_attention_helper::CheckInputs(query,
+                                                                key,
+                                                                value,
+                                                                past_key,
+                                                                past_value,
+                                                                &parameters,
+                                                                num_heads_,
+                                                                kv_num_heads_,
+                                                                seqlens_k,
+                                                                total_seqlen,
+                                                                is_past_bsnh_,
+                                                                scale_,
+                                                                device_prop.maxThreadsPerBlock));
+  parameters.local_window_size = local_window_size_;
+  int sequence_length = parameters.sequence_length;
+
+  TensorShapeVector output_shape(3);
+  output_shape[0] = static_cast<int64_t>(parameters.batch_size);
+  output_shape[1] = static_cast<int64_t>(sequence_length);
+  output_shape[2] = static_cast<int64_t>(parameters.hidden_size);
+  Tensor* output = context->Output(0, output_shape);
+
+#if USE_FLASH_ATTENTION
+  bool use_flash_attention = !disable_flash_attention_ &&
+                             onnxruntime::flash::is_supported(device_prop,
+                                                              parameters.head_size,
+                                                              parameters.num_heads,
+                                                              parameters.kv_num_heads);
+  // Allocate buffers
+  size_t softmax_lse_bytes = 0;
+  size_t softmax_lse_accum_bytes = 0;
+  size_t out_accum_bytes = 0;
+  if (use_flash_attention) {
+    // softmax buffer
+    softmax_lse_bytes = onnxruntime::flash::get_softmax_lse_size(parameters.sequence_length, parameters.batch_size, parameters.num_heads);
+    // split kv buffer
+    using namespace std;
+    auto [num_splits, slse_accum_bytes, o_accum_bytes] = onnxruntime::flash::get_num_splits_and_buffer_sizes(
+        parameters.batch_size, parameters.sequence_length, parameters.sequence_length, parameters.num_heads,
+        parameters.head_size, device_prop.multiProcessorCount);
+    parameters.num_splits = num_splits;
+    softmax_lse_accum_bytes = slse_accum_bytes;
+    out_accum_bytes = o_accum_bytes;
+  }
+  auto softmax_lse_buffer = GetScratchBuffer<void>(softmax_lse_bytes, context->GetComputeStream());
+  auto softmax_lse_accum_buffer = GetScratchBuffer<void>(softmax_lse_accum_bytes, context->GetComputeStream());
+  auto out_accum_buffer = GetScratchBuffer<void>(out_accum_bytes, context->GetComputeStream());
+#else
+  constexpr bool use_flash_attention = false;
+  auto softmax_lse_buffer = GetScratchBuffer<void>(0, context->GetComputeStream());        // nullptr
+  auto softmax_lse_accum_buffer = GetScratchBuffer<void>(0, context->GetComputeStream());  // nullptr
+  auto out_accum_buffer = GetScratchBuffer<void>(0, context->GetComputeStream());          // nullptr
+#endif
+
+#if USE_MEMORY_EFFICIENT_ATTENTION
+  int sm = (device_prop.major * 10) + device_prop.minor;
+  bool use_memory_efficient_attention =
+      !use_flash_attention &&
+      !disable_memory_efficient_attention_ &&
+      local_window_size_ == -1 &&
+      (parameters.head_size & 7) == 0 &&
+      parameters.sequence_length <= parameters.seqlen_past_kv_cache + parameters.sequence_length &&
+      (sizeof(T) == 2 || parameters.sequence_length >= attention::kMinSeqLenForMemoryEfficientAttentionFp32) &&
+      has_memory_efficient_attention(sm, sizeof(T) == 2);
+  // allocate buffers
+  size_t kv_buffer_bytes = 0;
+  // need a buffer if we must ungroup kv
+  const bool needs_buff = (parameters.num_heads != parameters.kv_num_heads);
+  if (use_memory_efficient_attention && needs_buff) {
+    kv_buffer_bytes = (sizeof(T) * parameters.batch_size * parameters.num_heads * parameters.seqlen_present_kv_cache * parameters.head_size);
+  }
+  size_t fmha_buffer_bytes = 0;
+  if (use_memory_efficient_attention && MemoryEfficientAttentionParams::need_workspace(parameters.head_size, sizeof(T) == sizeof(float))) {
+    fmha_buffer_bytes = (parameters.batch_size * parameters.sequence_length * parameters.num_heads * parameters.head_size * sizeof(float));
+  }
+  auto k_buffer = GetScratchBuffer<void>(kv_buffer_bytes, context->GetComputeStream());
+  auto v_buffer = GetScratchBuffer<void>(kv_buffer_bytes, context->GetComputeStream());
+  auto fmha_buffer = GetScratchBuffer<void>(fmha_buffer_bytes, context->GetComputeStream());
+#else
+  constexpr bool use_memory_efficient_attention = false;
+  auto k_buffer = GetScratchBuffer<void>(0, context->GetComputeStream());
+  auto v_buffer = GetScratchBuffer<void>(0, context->GetComputeStream());
+  auto fmha_buffer = GetScratchBuffer<void>(0, context->GetComputeStream());
+#endif
+
+  // seqlens_k buffer
+  size_t seqlens_k_bytes = 0;
+  seqlens_k_bytes = sizeof(int) * parameters.batch_size;
+  auto seqlens_k_buffer = GetScratchBuffer<void>(seqlens_k_bytes, context->GetComputeStream());
+
+  std::vector<int64_t> present_dims;
+  if (parameters.past_kv_format == AttentionQkvFormat::Q_K_V_BSNH) {
+    present_dims = {
+        parameters.batch_size, parameters.seqlen_present_kv_cache, parameters.kv_num_heads, parameters.head_size};
+  } else {  // BNSH
+    present_dims = {
+        parameters.batch_size, parameters.kv_num_heads, parameters.seqlen_present_kv_cache, parameters.head_size};
+  }
+  TensorShape present_shape(present_dims);
+  Tensor* present_key = context->Output(1, present_shape);
+  Tensor* present_value = context->Output(2, present_shape);
+
+  data.query = reinterpret_cast<const CudaT*>(query->Data<T>());
+  data.key = reinterpret_cast<const CudaT*>(key->Data<T>());
+  data.value = reinterpret_cast<const CudaT*>(value->Data<T>());
+  data.past_key = (nullptr == past_key) ? nullptr : reinterpret_cast<const CudaT*>(past_key->Data<T>());
+  data.past_value = (nullptr == past_value) ? nullptr : reinterpret_cast<const CudaT*>(past_value->Data<T>());
+  data.output = reinterpret_cast<CudaT*>(output->MutableData<T>());
+  data.present_key = (nullptr == present_key) ? nullptr : reinterpret_cast<CudaT*>(present_key->MutableData<T>());
+  data.present_value = (nullptr == present_value) ? nullptr : reinterpret_cast<CudaT*>(present_value->MutableData<T>());
+  data.seqlens_k = const_cast<int*>(seqlens_k->Data<int>());
+  data.use_flash_attention = use_flash_attention;
+  data.use_memory_efficient_attention = use_memory_efficient_attention;
+  if (data.past_key == data.present_key) {
+    parameters.kv_share_buffer = true;
+  } else {
+    parameters.kv_share_buffer = false;
+  }
+  // Flash Buffers
+  if (softmax_lse_buffer != nullptr) {
+    data.softmax_lse = reinterpret_cast<CudaT*>(softmax_lse_buffer.get());
+  }
+  if (softmax_lse_accum_buffer != nullptr) {
+    data.softmax_lse_accum = reinterpret_cast<CudaT*>(softmax_lse_accum_buffer.get());
+  }
+  if (out_accum_buffer != nullptr) {
+    data.out_accum = reinterpret_cast<CudaT*>(out_accum_buffer.get());
+  }
+  if (seqlens_k_buffer != nullptr) {
+    data.seqlens_k_total = reinterpret_cast<int*>(seqlens_k_buffer.get());
+  }
+  // Memory Efficient Buffers
+  if (k_buffer != nullptr) {
+    data.k = reinterpret_cast<CudaT*>(k_buffer.get());
+    data.v = reinterpret_cast<CudaT*>(v_buffer.get());
+  }
+  if (fmha_buffer != nullptr) {
+    data.fmha_buffer = reinterpret_cast<CudaT*>(fmha_buffer.get());
+  }
+  if (k_buffer != nullptr) {
+    data.k = reinterpret_cast<CudaT*>(k_buffer.get());
+    data.v = reinterpret_cast<CudaT*>(v_buffer.get());
+  }
+  if (k_buffer != nullptr) {
+    data.k = reinterpret_cast<CudaT*>(k_buffer.get());
+    data.v = reinterpret_cast<CudaT*>(v_buffer.get());
+  }
+  if (fmha_buffer != nullptr) {
+    data.fmha_buffer = reinterpret_cast<CudaT*>(fmha_buffer.get());
+  }
+
+  cublasHandle_t cublas = GetCublasHandle(context);
+
+  return QkvToContext<CudaT>(
+      device_prop, cublas, context->GetComputeStream(), parameters, data);
+}
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/bert/group_query_attention.h b/onnxruntime/contrib_ops/cuda/bert/group_query_attention.h
new file mode 100644
index 0000000000000..54a8127e29e7b
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/bert/group_query_attention.h
@@ -0,0 +1,34 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include <memory>
+#include "core/providers/cuda/cuda_kernel.h"
+#include "contrib_ops/cuda/bert/group_query_attention_impl.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+using namespace onnxruntime::cuda;
+
+template <typename T>
+class GroupQueryAttention final : public CudaKernel {
+ public:
+  GroupQueryAttention(const OpKernelInfo& info);
+  Status ComputeInternal(OpKernelContext* context) const override;
+
+ protected:
+  int num_heads_;     // number of attention heads
+  int kv_num_heads_;  // different for k and v for group query attention
+  int local_window_size_;
+  bool is_past_bsnh_;
+  float scale_;
+  bool disable_flash_attention_;
+  bool disable_memory_efficient_attention_;
+};
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/bert/group_query_attention_helper.h b/onnxruntime/contrib_ops/cuda/bert/group_query_attention_helper.h
new file mode 100644
index 0000000000000..2cb9955807f26
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/bert/group_query_attention_helper.h
@@ -0,0 +1,228 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "core/common/common.h"
+#include "core/providers/common.h"
+#include "contrib_ops/cpu/bert/attention_common.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace group_query_attention_helper {
+
+Status CheckInputs(const Tensor* query,
+                   const Tensor* key,
+                   const Tensor* value,
+                   const Tensor* past_key,
+                   const Tensor* past_value,
+                   void* parameters,
+                   int num_heads,
+                   int kv_num_heads,
+                   const Tensor* seqlens_k,
+                   const Tensor* total_seqlen,
+                   bool is_past_bsnh,
+                   float scale) {
+  // Note: Here S* is past_cache_sequence_length, S- is past_sequence_length, S+ is sequence_length
+  //     past_key                   : (B, N_k, S*, H) or (B, N_k, S-, H)
+  //     past_value                 : (B, N_k, S*, H) or (B, N_k, S-, H)
+  // no packing for q/k/v:
+  //     query            (Q)       : (B, S, D)
+  //     key              (K)       : (B, S, D_kv)
+  //     value            (V)       : (B, S, D_kv)
+  ORT_UNUSED_PARAMETER(value);
+
+  AttentionQkvFormat qkv_format = Q_K_V_BSNH;
+  AttentionQkvFormat past_kv_format = is_past_bsnh ? Q_K_V_BSNH : Q_K_V_BNSH;
+
+  const auto& query_dims = query->Shape().GetDims();
+  const auto& key_dims = key->Shape().GetDims();
+
+  if (query_dims.size() != 3) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 'query' is expected to have 3 dimensions, got ",
+                           query_dims.size());
+  }
+
+  int batch_size = static_cast<int>(query_dims[0]);
+  int sequence_length = static_cast<int>(query_dims[1]);
+  int q_hidden_size = static_cast<int>(query_dims[2]);
+  int head_size = static_cast<int>(q_hidden_size) / num_heads;
+
+  int kv_hidden_size = static_cast<int>(key_dims[2]);
+
+  int32_t past_sequence_length = 0;
+  if (past_key != nullptr && past_value != nullptr) {
+    const auto& past_key_dims = past_key->Shape().GetDims();
+    const auto& past_value_dims = past_value->Shape().GetDims();
+
+    if (past_key_dims.size() != 4) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "Input 'past_key' is expected to have 4 dimensions, got ",
+                             past_key_dims.size());
+    }
+    if (past_value_dims.size() != 4) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "Input 'past_value' is expected to have 4 dimensions, got ",
+                             past_value_dims.size());
+    }
+
+    if (past_key_dims[0] != batch_size) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "Input 'past_key' dimension 0 should be batch_size, got ",
+                             past_key_dims[0]);
+    }
+    if (past_value_dims[0] != batch_size) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "Input 'past_value' dimension 0 should be batch_size, got ",
+                             past_value_dims[0]);
+    }
+
+    // BNSH
+    if (!is_past_bsnh) {
+      if (past_key_dims[2] != past_value_dims[2]) {
+        return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                               "BNSH Input 'past_key' and 'past_value' should have same dimension 2 (max sequence"
+                               "length or past sequence length), got ",
+                               past_key_dims[1]);
+      }
+      if (past_key_dims[1] != kv_num_heads) {
+        return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                               "Input 'past_key' shall have kv_num_heads");
+      }
+      if (past_value_dims[1] != kv_num_heads) {
+        return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                               "Input 'past_value' shall have kv_num_heads");
+      }
+      // We assume all sequence in past kv are right-padded to max or past sequence length
+      past_sequence_length = static_cast<int>(past_key_dims[2]);
+      // BSNH
+    } else {
+      if (past_key_dims[1] != past_value_dims[1]) {
+        return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                               "BNSH Input 'past_key' and 'past_value' should have same dimension 1 (max sequence"
+                               "length or past sequence length), got ",
+                               past_key_dims[1]);
+      }
+      if (past_key_dims[2] != kv_num_heads) {
+        return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                               "Input 'past_key' shall have kv_num_heads");
+      }
+      if (past_value_dims[2] != kv_num_heads) {
+        return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                               "Input 'past_value' shall have kv_num_heads");
+      }
+      // We assume all sequence in past kv are right-padded to max or past sequence length
+      past_sequence_length = static_cast<int>(past_key_dims[1]);
+    }
+
+    if (past_key_dims[3] != head_size) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "Input 'past_key' dimension 3 should be same as head_size, got ",
+                             past_key_dims[3]);
+    }
+    if (past_value_dims[3] != head_size) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "Input 'past_value' dimension 3 should be same as head_size, got ",
+                             past_value_dims[3]);
+    }
+  } else if (past_key != nullptr || past_value != nullptr) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                           "Input 'past_key' and 'past_value' shall be both present or both absent.");
+  }
+
+  if (key_dims.size() != 3) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 'key' is expected to have 3 dimensions, got ",
+                           key_dims.size());
+  }
+  if (query_dims[0] != key_dims[0]) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                           "Input 'query' and 'key' shall have same dim 0 (batch size)");
+  }
+
+  if (num_heads % kv_num_heads != 0) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                           "num_heads must be a multiple of kv_num_heads. Got num_heads % kv_num_heads == ",
+                           num_heads % kv_num_heads);
+  }
+
+  const auto& value_dims = value->Shape().GetDims();
+  if (value_dims.size() != 3) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 'value' is expected to have 3 dimensions, got ",
+                           value_dims.size());
+  }
+
+  if (query_dims[0] != value_dims[0]) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                           "Input 'query' and 'value' shall have same dim 0 (batch_size)");
+  }
+
+  if (static_cast<int64_t>(sequence_length) != value_dims[1]) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                           "Input 'query,' 'key,' and 'value' shall have the same dim 1 (sequence_length)");
+  }
+
+  if (value_dims[2] != kv_hidden_size) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 'value' is expected to have same hidden size as key.");
+  }
+
+  // Check seqlens_k tensor (holding past seqlen for token gen)
+  const auto& seqlens_dim = seqlens_k->Shape().GetDims();
+  if (seqlens_dim.size() != 1 && seqlens_dim[0] != batch_size) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                           "seqlens_k must be shape (batch_size).");
+  }
+
+  // Set present sequence length and kv_share_buffer from input total_seqlen tensor
+  if (!onnxruntime::IsScalarOr1ElementVector(total_seqlen)) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                           "total_sequence_length tensor must be of one element.");
+  }
+  int total_sequence_length = *((*total_seqlen).template Data<int32_t>());
+  int present_sequence_length = std::max(total_sequence_length, past_sequence_length);
+
+  bool is_prompt = sequence_length != 1;
+
+  if (parameters != nullptr) {
+    GroupQueryAttentionParameters* output_parameters = reinterpret_cast<GroupQueryAttentionParameters*>(parameters);
+    output_parameters->batch_size = batch_size;
+    output_parameters->sequence_length = sequence_length;                  // sequence length of Q
+    output_parameters->seqlen_past_kv_cache = past_sequence_length;        // max sequence length of past kv tensors
+    output_parameters->seqlen_present_kv_cache = present_sequence_length;  // max sequence length of present kv tensors
+    output_parameters->hidden_size = q_hidden_size;
+    output_parameters->num_heads = num_heads;
+    output_parameters->head_size = q_hidden_size / num_heads;
+    output_parameters->kv_hidden_size = kv_hidden_size;
+    output_parameters->kv_num_heads = kv_num_heads;
+    output_parameters->is_unidirectional = true;
+    output_parameters->is_prompt = is_prompt;
+    output_parameters->scale = scale;
+    output_parameters->qkv_format = qkv_format;
+    output_parameters->past_kv_format = past_kv_format;
+  }
+
+  return Status::OK();
+}
+
+Status CheckInputs(const Tensor* query,
+                   const Tensor* key,
+                   const Tensor* value,
+                   const Tensor* past_key,
+                   const Tensor* past_value,
+                   void* parameters,
+                   int num_heads,
+                   int kv_num_heads,
+                   const Tensor* seqlens_k,
+                   const Tensor* total_seqlen,
+                   bool is_past_bsnh,
+                   float scale,
+                   int max_threads_per_block) {
+  if (max_threads_per_block > 0 && num_heads > max_threads_per_block) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "num_heads should be no larger than ", max_threads_per_block);
+  }
+
+  return CheckInputs(query, key, value, past_key, past_value, parameters, num_heads, kv_num_heads, seqlens_k, total_seqlen, is_past_bsnh, scale);
+}
+
+}  // namespace group_query_attention_helper
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/bert/group_query_attention_impl.cu b/onnxruntime/contrib_ops/cuda/bert/group_query_attention_impl.cu
new file mode 100644
index 0000000000000..b22ccb68c1e7b
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/bert/group_query_attention_impl.cu
@@ -0,0 +1,718 @@
+/*
+ The implementation of this file is based on our Multi-Head Attention impl.cu file,
+ which is based on qkvToContext plugin in TensorRT demo:
+ https://github.com/NVIDIA/TensorRT/tree/release/5.1/demo/BERT/
+
+Copyright 2019 NVIDIA Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Modifications:
+// (1) support GPT-2 past state, unidirectional mask (causal)
+// (2) use flash attention kernel from (https://github.com/Dao-AILab/flash-attention)
+// (3) support different number of heads for Q and KV
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include <cassert>
+#include <cuda_fp16.h>
+#include <cub/cub.cuh>
+#include "core/providers/cuda/cu_inc/common.cuh"
+#include "core/providers/cuda/cuda_common.h"
+#include "core/providers/cuda/shared_inc/fpgeneric.h"
+#include "contrib_ops/cuda/bert/attention_softmax.h"
+#include "contrib_ops/cuda/bert/transformer_common.h"
+#include "contrib_ops/cuda/bert/add_bias_transpose.h"
+#include "contrib_ops/cpu/bert/attention_base.h"
+#include "contrib_ops/cuda/bert/bert_padding.h"
+#include "contrib_ops/cuda/transformers/dump_cuda_tensor.h"
+#include "contrib_ops/cuda/bert/cutlass_fmha/memory_efficient_attention.h"
+#include "contrib_ops/cuda/bert/flash_attention/flash_api.h"
+#include "contrib_ops/cuda/bert/group_query_attention_impl.h"
+#include "contrib_ops/cuda/bert/attention_impl.h"
+#include "core/providers/cuda/shared_inc/cuda_call.h"
+#include <cublas_v2.h>
+
+using namespace onnxruntime::cuda;
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+////////// Auxiliary Kernels for KV prep
+
+// Kernel for seqlens_k
+__global__ void repeat_seqlen(int32_t* seqlens_k, int32_t seqlen, int batch_size) {
+  int id = blockDim.x * blockIdx.x + threadIdx.x;
+  if (id < batch_size) seqlens_k[id] = seqlen;
+}
+
+// Kernel to append new and past kv in either BSNH or BNSH format
+// Adapted from ConcatTensorToTensor kernel in attention_kv_cache.cu file
+template <typename T>
+__global__ void ConcatNewToPastKV(const int new_seqlen,
+                                  const int past_buffer_seqlen,
+                                  const T* past_kv,
+                                  const T* new_kv,
+                                  T* present_kv,
+                                  const int* seqlens_k,
+                                  const bool is_bsnh) {  // refers to past; otherwise bnsh
+  const int h = threadIdx.x;
+  const int n = threadIdx.y;
+  const int s = blockIdx.x;
+  const int b = blockIdx.y;
+
+  const int present_buffer_seqlen = gridDim.x;
+  const int num_heads = blockDim.y;
+  const int H = blockDim.x;
+
+  const int present_batch_stride = present_buffer_seqlen * num_heads * H;
+  const int row_stride = is_bsnh ? num_heads * H : H;
+  const int present_head_stride = is_bsnh ? H : present_buffer_seqlen * H;
+
+  // past_kv:     BPNH or BNPH
+  // new_kv:      BLNH
+  // present_kv:  BTNH or BNTH, where T = P + L
+  const int past_seqlen = seqlens_k == nullptr ? 0 : seqlens_k[b];
+
+  int out_offset = b * present_batch_stride + s * row_stride + n * present_head_stride + h;
+  if (s < past_seqlen) {
+    const int past_batch_stride = past_buffer_seqlen * num_heads * H;
+    const int past_head_stride = is_bsnh ? H : past_buffer_seqlen * H;
+    const int in_offset = b * past_batch_stride + s * row_stride + n * past_head_stride + h;
+    present_kv[out_offset] = past_kv[in_offset];
+  } else if (s < past_seqlen + new_seqlen) {
+    // Note: new KV always BSNH
+    const int new_batch_stride = new_seqlen * num_heads * H;
+    const int new_row_stride = num_heads * H;
+    const int new_head_stride = H;
+    const int in_offset = b * new_batch_stride + (s - past_seqlen) * new_row_stride + n * new_head_stride + h;
+    present_kv[out_offset] = new_kv[in_offset];
+  }
+}
+
+// Use when (H*)*num_heads > 1024
+template <typename T>
+__global__ void ConcatNewToPastKVLarge(const int new_seqlen,
+                                       const int past_buffer_seqlen,
+                                       const int H,
+                                       const int num_heads,
+                                       const T* past_kv,
+                                       const T* new_kv,
+                                       T* present_kv,
+                                       const int* seqlens_k,
+                                       const bool is_bsnh) {
+  int i = threadIdx.x + (blockDim.x * blockIdx.x);
+  if (i < H * num_heads) {
+    const int h = i % H;
+    const int n = i / H;
+    const int s = blockIdx.y;
+    const int b = blockIdx.z;
+    const int present_buffer_seqlen = gridDim.y;
+
+    const int present_batch_stride = present_buffer_seqlen * num_heads * H;
+    const int row_stride = is_bsnh ? num_heads * H : H;
+    const int present_head_stride = is_bsnh ? H : present_buffer_seqlen * H;
+
+    // past_kv:     BPNH or BNPH
+    // new_kv:      BLNH
+    // present_kv:  BTNH or BNTH, where T = P + L
+    const int past_seqlen = seqlens_k == nullptr ? 0 : seqlens_k[b];
+
+    int out_offset = b * present_batch_stride + s * row_stride + n * present_head_stride + h;
+    if (s < past_seqlen) {
+      const int past_batch_stride = past_buffer_seqlen * num_heads * H;
+      const int past_head_stride = is_bsnh ? H : past_buffer_seqlen * H;
+      const int in_offset = b * past_batch_stride + s * row_stride + n * past_head_stride + h;
+      present_kv[out_offset] = past_kv[in_offset];
+    } else if (s < past_seqlen + new_seqlen) {
+      const int new_batch_stride = new_seqlen * num_heads * H;
+      const int new_row_stride = num_heads * H;
+      const int new_head_stride = H;
+      const int in_offset = b * new_batch_stride + (s - past_seqlen) * new_row_stride + n * new_head_stride + h;
+      present_kv[out_offset] = new_kv[in_offset];
+    }
+  }
+}
+
+// Concat new to past in present. Supports past BSNH or past BNSH
+template <typename T>
+Status LaunchConcatNewToPastKV(contrib::GroupQueryAttentionParameters& parameters,
+                               GroupQueryAttentionData<T>& data,
+                               cudaStream_t stream,
+                               const int max_threads_per_block) {
+  const int batch_size = parameters.batch_size;
+  const int kv_sequence_length = parameters.sequence_length;
+  const int past_sequence_length = parameters.seqlen_past_kv_cache;
+  const int present_sequence_length = parameters.seqlen_present_kv_cache;
+  const int kv_num_heads = parameters.kv_num_heads;
+  const int head_size = parameters.head_size;
+  const int* seqlens_k = parameters.is_prompt ? nullptr : reinterpret_cast<const int*>(data.seqlens_k);
+
+  AttentionQkvFormat past_kv_format = parameters.past_kv_format;
+
+  assert(past_kv_format == AttentionQkvFormat::Q_K_V_BSNH || past_kv_format == AttentionQkvFormat::Q_K_V_BNSH);
+  const int H = head_size / 4;  // divide by 4 so kernel can operate on 4 float16 elements at a time.
+  if (H * kv_num_heads <= max_threads_per_block) {
+    const dim3 grid(present_sequence_length, batch_size, 1);
+    const dim3 block(H, kv_num_heads, 1);
+    ConcatNewToPastKV<float2><<<grid, block, 0, stream>>>(kv_sequence_length,
+                                                          past_sequence_length,
+                                                          reinterpret_cast<const float2*>(data.past_key),
+                                                          reinterpret_cast<const float2*>(data.key),
+                                                          reinterpret_cast<float2*>(data.present_key),
+                                                          seqlens_k,
+                                                          past_kv_format == AttentionQkvFormat::Q_K_V_BSNH);
+    ConcatNewToPastKV<float2><<<grid, block, 0, stream>>>(kv_sequence_length,
+                                                          past_sequence_length,
+                                                          reinterpret_cast<const float2*>(data.past_value),
+                                                          reinterpret_cast<const float2*>(data.value),
+                                                          reinterpret_cast<float2*>(data.present_value),
+                                                          seqlens_k,
+                                                          past_kv_format == AttentionQkvFormat::Q_K_V_BSNH);
+  } else {
+    int steps = (H * kv_num_heads + 255) / 256;
+    const dim3 grid(steps, present_sequence_length, batch_size);
+    const dim3 block(256, 1, 1);
+    ConcatNewToPastKVLarge<float2><<<grid, block, 0, stream>>>(kv_sequence_length,
+                                                               past_sequence_length,
+                                                               H,
+                                                               kv_num_heads,
+                                                               reinterpret_cast<const float2*>(data.past_key),
+                                                               reinterpret_cast<const float2*>(data.key),
+                                                               reinterpret_cast<float2*>(data.present_key),
+                                                               seqlens_k,
+                                                               past_kv_format == AttentionQkvFormat::Q_K_V_BSNH);
+    ConcatNewToPastKVLarge<float2><<<grid, block, 0, stream>>>(kv_sequence_length,
+                                                               past_sequence_length,
+                                                               H,
+                                                               kv_num_heads,
+                                                               reinterpret_cast<const float2*>(data.past_value),
+                                                               reinterpret_cast<const float2*>(data.value),
+                                                               reinterpret_cast<float2*>(data.present_value),
+                                                               seqlens_k,
+                                                               past_kv_format == AttentionQkvFormat::Q_K_V_BSNH);
+  }
+  return CUDA_CALL(cudaGetLastError());
+}
+
+// Kernel to append new kv to kv buffer in place
+template <typename T>
+__global__ void ConcatKVInPlace(const int max_seqlen,
+                                T* kv_buff,
+                                const T* new_kv,
+                                const int* seqlens_k,
+                                const bool is_bsnh) {  // refers to kv buff; otherwise bnsh
+  const int h = threadIdx.x;
+  const int n = threadIdx.y;
+  const int s = blockIdx.x;
+  const int b = blockIdx.y;
+
+  const int new_seqlen = gridDim.x;
+  const int num_heads = blockDim.y;
+  const int H = blockDim.x;
+
+  const int present_batch_stride = max_seqlen * num_heads * H;
+  const int present_row_stride = is_bsnh ? num_heads * H : H;
+  const int present_head_stride = is_bsnh ? H : max_seqlen * H;
+
+  // kv_buff:     BTNH or BNTH with buffered memory for new
+  // new_kv:      BLNH
+
+  const int past_seq_len = seqlens_k == nullptr ? 0 : seqlens_k[b];
+
+  int out_offset = b * present_batch_stride + (s + past_seq_len) * present_row_stride + n * present_head_stride + h;
+  // Note: new KV always BSNH
+  const int new_batch_stride = new_seqlen * num_heads * H;
+  const int new_row_stride = num_heads * H;
+  const int new_head_stride = H;
+  const int in_offset = b * new_batch_stride + s * new_row_stride + n * new_head_stride + h;
+  kv_buff[out_offset] = new_kv[in_offset];
+}
+
+template <typename T>
+__global__ void ConcatKVInPlaceLarge(const int max_seqlen,
+                                     const int H,
+                                     const int num_heads,
+                                     T* kv_buff,
+                                     const T* new_kv,
+                                     const int* seqlens_k,
+                                     const bool is_bsnh) {  // refers to kv buff; otherwise bnsh
+  int i = threadIdx.x + (blockDim.x * blockIdx.x);
+  if (i < H * num_heads) {
+    const int h = i % H;
+    const int n = i / H;
+    const int s = blockIdx.y;
+    const int b = blockIdx.z;
+    const int new_seqlen = gridDim.y;
+
+    const int present_batch_stride = max_seqlen * num_heads * H;
+    const int present_row_stride = is_bsnh ? num_heads * H : H;
+    const int present_head_stride = is_bsnh ? H : max_seqlen * H;
+
+    // kv_buff:     BTNH or BNTH with buffered memory for new
+    // new_kv:      BLNH
+
+    const int past_seq_len = seqlens_k == nullptr ? 0 : seqlens_k[b];
+
+    int out_offset = b * present_batch_stride + (s + past_seq_len) * present_row_stride + n * present_head_stride + h;
+    // Note: new KV always BSNH
+    const int new_batch_stride = new_seqlen * num_heads * H;
+    const int new_row_stride = num_heads * H;
+    const int new_head_stride = H;
+    const int in_offset = b * new_batch_stride + s * new_row_stride + n * new_head_stride + h;
+    kv_buff[out_offset] = new_kv[in_offset];
+  }
+}
+
+// Concat new to kv buffer in place
+template <typename T>
+Status LaunchConcatKVInPlace(contrib::GroupQueryAttentionParameters& parameters,
+                             GroupQueryAttentionData<T>& data,
+                             cudaStream_t stream,
+                             const int max_threads_per_block) {
+  const int batch_size = parameters.batch_size;
+  const int kv_sequence_length = parameters.sequence_length;
+  const int present_sequence_length = parameters.seqlen_present_kv_cache;
+  const int kv_num_heads = parameters.kv_num_heads;
+  const int head_size = parameters.head_size;
+
+  // Indicates past sequence_length of each sequence
+  const int* seqlens_k = parameters.is_prompt ? nullptr : reinterpret_cast<const int*>(data.seqlens_k);
+
+  AttentionQkvFormat past_kv_format = parameters.past_kv_format;
+  assert(past_kv_format == AttentionQkvFormat::Q_K_V_BSNH || past_kv_format == AttentionQkvFormat::Q_K_V_BNSH);
+  const int H = head_size / 4;
+  if (H * kv_num_heads <= max_threads_per_block) {
+    const dim3 grid(kv_sequence_length, batch_size, 1);
+    const dim3 block(H, kv_num_heads, 1);
+    ConcatKVInPlace<float2><<<grid, block, 0, stream>>>(present_sequence_length,
+                                                        reinterpret_cast<float2*>(data.present_key),
+                                                        reinterpret_cast<const float2*>(data.key),
+                                                        seqlens_k,
+                                                        past_kv_format == AttentionQkvFormat::Q_K_V_BSNH);
+    ConcatKVInPlace<float2><<<grid, block, 0, stream>>>(present_sequence_length,
+                                                        reinterpret_cast<float2*>(data.present_value),
+                                                        reinterpret_cast<const float2*>(data.value),
+                                                        seqlens_k,
+                                                        past_kv_format == AttentionQkvFormat::Q_K_V_BSNH);
+  } else {
+    int steps = int(ceil(float(H * kv_num_heads) / 256.0));
+    const dim3 grid(steps, kv_sequence_length, batch_size);
+    const dim3 block(256, 1, 1);
+    ConcatKVInPlaceLarge<float2><<<grid, block, 0, stream>>>(present_sequence_length,
+                                                             H,
+                                                             kv_num_heads,
+                                                             reinterpret_cast<float2*>(data.present_key),
+                                                             reinterpret_cast<const float2*>(data.key),
+                                                             seqlens_k,
+                                                             past_kv_format == AttentionQkvFormat::Q_K_V_BSNH);
+    ConcatKVInPlaceLarge<float2><<<grid, block, 0, stream>>>(present_sequence_length,
+                                                             H,
+                                                             kv_num_heads,
+                                                             reinterpret_cast<float2*>(data.present_value),
+                                                             reinterpret_cast<const float2*>(data.value),
+                                                             seqlens_k,
+                                                             past_kv_format == AttentionQkvFormat::Q_K_V_BSNH);
+  }
+  return CUDA_CALL(cudaGetLastError());
+}
+
+// Kernel for use with memory efficient kernel... kv_in is grouped and of bnsh or bsnh... kv_out is ungrouped and bsnh
+template <typename T>
+__global__ void Ungroup(const T* kv_in,
+                        T* kv_out,
+                        const int in_seqlen,
+                        const int kv_num_heads,
+                        const bool is_bsnh) {
+  const int h = threadIdx.x;
+  const int out_n = threadIdx.y;
+  const int s = blockIdx.x;
+  const int b = blockIdx.y;
+
+  const int out_seqlen = gridDim.x;
+  const int q_num_heads = blockDim.y;
+  const int H = blockDim.x;
+
+  const int q_kv_head_ratio = q_num_heads / kv_num_heads;
+  const int out_batch_stride = out_seqlen * q_num_heads * H;
+  const int out_row_stride = is_bsnh ? q_num_heads * H : H;
+  const int out_head_stride = is_bsnh ? H : out_seqlen * H;
+
+  const int in_batch_stride = in_seqlen * kv_num_heads * H;
+  const int in_row_stride = is_bsnh ? kv_num_heads * H : H;
+  const int in_head_stride = is_bsnh ? H : in_seqlen * H;
+  const int in_n = out_n / q_kv_head_ratio;
+
+  const int out_offset = out_batch_stride * b + out_row_stride * s + out_head_stride * out_n + h;
+  const int in_offset = in_batch_stride * b + in_row_stride * s + in_head_stride * in_n + h;
+  kv_out[out_offset] = kv_in[in_offset];
+}
+
+template <typename T>
+__global__ void UngroupLarge(const T* kv_in,
+                             T* kv_out,
+                             const int H,
+                             const int in_seqlen,
+                             const int q_num_heads,
+                             const int kv_num_heads,
+                             const bool is_bsnh) {
+  int i = threadIdx.x + (blockDim.x * blockIdx.x);  // index along H * q_num_heads elements
+  if (i < H * q_num_heads) {
+    const int out_seqlen = gridDim.y;
+    const int s = blockIdx.y;
+    const int b = blockIdx.z;
+
+    const int q_kv_head_ratio = q_num_heads / kv_num_heads;
+    const int out_batch_stride = out_seqlen * q_num_heads * H;
+    const int out_row_stride = is_bsnh ? q_num_heads * H : H;
+    const int out_head_stride = is_bsnh ? H : out_seqlen * H;
+
+    const int in_batch_stride = in_seqlen * kv_num_heads * H;
+    const int in_row_stride = is_bsnh ? kv_num_heads * H : H;
+    const int in_head_stride = is_bsnh ? H : in_seqlen * H;
+
+    const int h = i % H;
+    const int out_n = i / H;
+    const int in_n = out_n / q_kv_head_ratio;
+    const int out_offset = out_batch_stride * b + out_row_stride * s + out_head_stride * out_n + h;
+    const int in_offset = in_batch_stride * b + in_row_stride * s + in_head_stride * in_n + h;
+    kv_out[out_offset] = kv_in[in_offset];
+  }
+}
+
+// Ungroup kv or present kv for use in Memory Efficient kernel. If present kv is not null and is BNSH, transposes it.
+Status LaunchUngroup(contrib::GroupQueryAttentionParameters& parameters,
+                     float2* k_buff, float2* v_buff,
+                     const float2* k_og, const float2* v_og,
+                     const int buff_seqlen, const int og_seqlen,
+                     const bool is_bsnh,
+                     cudaStream_t stream,
+                     const int max_threads_per_block) {
+  const int batch_size = parameters.batch_size;
+  const int num_heads = parameters.num_heads;
+  const int kv_num_heads = parameters.kv_num_heads;
+  const int head_size = parameters.head_size;
+
+  const int H = head_size / 4;
+  if (H * num_heads <= max_threads_per_block) {
+    const dim3 grid(buff_seqlen, batch_size, 1);
+    const dim3 block(H, num_heads, 1);
+    Ungroup<float2><<<grid, block, 0, stream>>>(k_og,
+                                                k_buff,
+                                                og_seqlen,
+                                                kv_num_heads,
+                                                is_bsnh);
+    Ungroup<float2><<<grid, block, 0, stream>>>(v_og,
+                                                v_buff,
+                                                og_seqlen,
+                                                kv_num_heads,
+                                                is_bsnh);
+  } else {
+    int steps = int(ceil(float(H * num_heads) / 256.0));
+    const dim3 grid(steps, buff_seqlen, batch_size);
+    const dim3 block(256, 1, 1);
+    UngroupLarge<float2><<<grid, block, 0, stream>>>(k_og,
+                                                     k_buff,
+                                                     H,
+                                                     og_seqlen,
+                                                     num_heads,
+                                                     kv_num_heads,
+                                                     is_bsnh);
+    UngroupLarge<float2><<<grid, block, 0, stream>>>(v_og,
+                                                     v_buff,
+                                                     H,
+                                                     og_seqlen,
+                                                     num_heads,
+                                                     kv_num_heads,
+                                                     is_bsnh);
+  }
+  return CUDA_CALL(cudaGetLastError());
+}
+
+
+__global__ void PastToTotalSeqlen(int32_t* seqlens_k,
+                                  int32_t* seqlens_k_buff,
+                                  const int add_seqlen) {
+  seqlens_k_buff[threadIdx.x] = seqlens_k[threadIdx.x] + add_seqlen;
+}
+
+// Convert Past to Total sequence length tensor
+Status LaunchGetSeqlenBuff(contrib::GroupQueryAttentionParameters& parameters, int32_t* seqlens_k,
+                           int32_t* seqlens_k_buff, bool is_total, cudaStream_t stream,
+                               const int threads_per_block) {
+  if (parameters.is_prompt) {
+    return Status::OK();
+  }
+  const int batch_size = parameters.batch_size;
+  const int add_seqlen = is_total ? parameters.sequence_length : 0;
+
+  const dim3 grid(1, 1, 1);
+  // TODO(aciddelgado): unlikely but could have a bigger batch_size than max_threads
+  const dim3 block(batch_size, 1, 1);
+
+  // TODO(aciddelgado): small version
+  PastToTotalSeqlen<<<grid, block, 0, stream>>>(seqlens_k, seqlens_k_buff, add_seqlen);
+
+  return CUDA_CALL(cudaGetLastError());
+}
+
+////////// Launch Kernels
+
+#if USE_FLASH_ATTENTION
+template <typename T>
+Status FlashAttention(
+    const cudaDeviceProp& device_prop,
+    cudaStream_t stream,
+    contrib::GroupQueryAttentionParameters& parameters,
+    GroupQueryAttentionData<T>& data,
+    float scale) {
+  const int max_threads_per_block = device_prop.maxThreadsPerBlock;
+  const int batch_size = parameters.batch_size;
+  const int sequence_length = parameters.sequence_length;
+  const int kv_sequence_length = parameters.sequence_length;
+  const int present_sequence_length = parameters.seqlen_present_kv_cache;
+  const int num_heads = parameters.num_heads;
+  const int kv_num_heads = parameters.kv_num_heads;
+  const int head_size = parameters.head_size;
+  AttentionQkvFormat past_kv_format = parameters.past_kv_format;
+
+  void* query = reinterpret_cast<void*>(const_cast<T*>(data.query));
+  void* key = reinterpret_cast<void*>(const_cast<T*>(data.key));
+  void* value = reinterpret_cast<void*>(const_cast<T*>(data.value));
+
+  bool is_causal = true;
+
+  // Note: seqlens_k is past sequence length for flash
+  if (parameters.is_prompt) {
+    // Launch kernel to copy seqlen
+    constexpr int thr_per_blk = 256;
+    int blk_in_grid = (batch_size + thr_per_blk -1) / thr_per_blk;
+    repeat_seqlen<<<blk_in_grid, thr_per_blk, 0, stream>>>(data.seqlens_k_total, parameters.sequence_length, batch_size);
+  }
+
+  void* seqlens_k = reinterpret_cast<void*>(data.seqlens_k);
+
+  if (parameters.kv_share_buffer) {
+    // Share buffer case
+    if (data.past_key == nullptr || data.past_key != data.present_key) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "Past and present kv shall share the same tensor when kv_share_buffer is on.");
+    }
+
+    if (parameters.is_prompt) {
+      ORT_RETURN_IF_ERROR(LaunchConcatKVInPlace(parameters, data, stream, max_threads_per_block));
+      key = nullptr;
+      value = nullptr;
+      seqlens_k = reinterpret_cast<void*>(data.seqlens_k_total);
+    }
+
+    void* present_key = reinterpret_cast<void*>(const_cast<T*>(data.present_key));
+    void* present_value = reinterpret_cast<void*>(const_cast<T*>(data.present_value));
+
+    DUMP_TENSOR_INIT();
+    DUMP_TENSOR("seqlens_k", reinterpret_cast<int*>(seqlens_k), batch_size, 1);
+
+    bool past_bsnh = past_kv_format == AttentionQkvFormat::Q_K_V_BSNH;
+    ORT_RETURN_IF_ERROR(onnxruntime::flash::mha_fwd_kvcache(
+        device_prop, stream, query, present_key, present_value, key, value, data.output, reinterpret_cast<void*>(data.softmax_lse),
+        seqlens_k, batch_size, num_heads, kv_num_heads,
+        head_size, sequence_length, present_sequence_length, kv_sequence_length,
+        scale, is_causal, past_bsnh, parameters.num_splits, reinterpret_cast<void*>(data.softmax_lse_accum),
+        reinterpret_cast<void*>(data.out_accum), parameters.local_window_size));
+  } else {
+    // Not share buffer case
+    // Note that Flash Attention kv-caching operates in place on a buffer... therefore this path is inneficient
+    if (data.past_key != nullptr && data.past_key == data.present_key) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "Past and present kv share the same tensor but kv_share_buffer is not on.");
+    }
+
+    ORT_RETURN_IF_ERROR(LaunchConcatNewToPastKV(parameters, data, stream, max_threads_per_block));
+
+    if (!parameters.is_prompt) {
+      ORT_RETURN_IF_ERROR(LaunchGetSeqlenBuff(parameters, data.seqlens_k, data.seqlens_k_total, true, stream, 256));
+    }
+
+    seqlens_k = reinterpret_cast<void*>(data.seqlens_k_total);
+
+    void* present_key = reinterpret_cast<void*>(const_cast<T*>(data.present_key));
+    void* present_value = reinterpret_cast<void*>(const_cast<T*>(data.present_value));
+
+    DUMP_TENSOR_INIT();
+    DUMP_TENSOR("seqlens_k", reinterpret_cast<int*>(seqlens_k), batch_size, 1);
+    DUMP_TENSOR("Q", data.query, batch_size, sequence_length, num_heads, head_size);
+    DUMP_TENSOR("K", data.present_key, batch_size, kv_num_heads, present_sequence_length, head_size);
+    DUMP_TENSOR("V", data.present_value, batch_size, kv_num_heads, present_sequence_length, head_size);
+
+    bool past_bsnh = past_kv_format == AttentionQkvFormat::Q_K_V_BSNH;
+    ORT_RETURN_IF_ERROR(onnxruntime::flash::mha_fwd_kvcache(
+        device_prop, stream, query, present_key, present_value, nullptr, nullptr, data.output, reinterpret_cast<void*>(data.softmax_lse),
+        seqlens_k, batch_size, num_heads, kv_num_heads,
+        head_size, sequence_length, present_sequence_length, 0,
+        scale, is_causal, past_bsnh, parameters.num_splits, reinterpret_cast<void*>(data.softmax_lse_accum),
+        reinterpret_cast<void*>(data.out_accum), parameters.local_window_size));
+  }
+
+  DUMP_TENSOR_INIT();
+  DUMP_TENSOR("flash attention output", data.output, batch_size, sequence_length, num_heads, head_size);
+
+  return Status::OK();
+}
+#endif
+
+#if USE_MEMORY_EFFICIENT_ATTENTION
+template <typename T>
+Status EfficientAttention(
+    const cudaDeviceProp& device_prop,
+    cudaStream_t stream,
+    contrib::GroupQueryAttentionParameters& parameters,
+    GroupQueryAttentionData<T>& data,
+    float scale) {
+  const int max_threads_per_block = device_prop.maxThreadsPerBlock;
+  const int batch_size = parameters.batch_size;
+  const int sequence_length = parameters.sequence_length;
+  const int present_sequence_length = parameters.seqlen_present_kv_cache;
+  const int num_heads = parameters.num_heads;
+  const int kv_num_heads = parameters.kv_num_heads;
+  const int head_size = parameters.head_size;
+  AttentionQkvFormat past_kv_format = parameters.past_kv_format;
+
+  const void* query = reinterpret_cast<const void*>(data.query);
+  const void* key = reinterpret_cast<const void*>(data.key);
+  const void* value = reinterpret_cast<const void*>(data.value);
+
+  if (parameters.is_prompt) {
+    // Launch kernel to copy seqlen
+    constexpr int thr_per_blk = 256;
+    int blk_in_grid = (batch_size + thr_per_blk - 1) / thr_per_blk;
+    repeat_seqlen<<<blk_in_grid, thr_per_blk, 0, stream>>>(data.seqlens_k_total, parameters.sequence_length, batch_size);
+  } else {
+    ORT_RETURN_IF_ERROR(LaunchGetSeqlenBuff(parameters, data.seqlens_k, data.seqlens_k_total, true, stream, 256));
+  }
+
+  if (parameters.kv_share_buffer) {
+    // Share buffer case
+    if (data.past_key == nullptr || data.past_key != data.present_key) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "Past and present kv shall share the same tensor when kv_share_buffer is on.");
+    }
+    // Concatenate new kv in place
+    ORT_RETURN_IF_ERROR(LaunchConcatKVInPlace(parameters, data, stream, max_threads_per_block));
+  } else {
+    // Not share buffer case
+    if (data.past_key != nullptr && data.past_key == data.present_key) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "Past and present kv share the same tensor but kv_share_buffer is not on.");
+    }
+    // Copy past and concat new KV to present buffer
+    ORT_RETURN_IF_ERROR(LaunchConcatNewToPastKV(parameters, data, stream, max_threads_per_block));
+  }
+
+  // Ungroup if grouped, otherwise use present kv directly
+  const bool is_bsnh = past_kv_format == AttentionQkvFormat::Q_K_V_BSNH;
+  if (num_heads == kv_num_heads) {
+    // Use present kv directly if not grouped
+    key = reinterpret_cast<const void*>(data.present_key);
+    value = reinterpret_cast<const void*>(data.present_value);
+  } else {
+    // Otherwise we use intermediate buffers to run memory efficient attention... best avoid this path
+    float2* k_buff = reinterpret_cast<float2*>(data.k);
+    float2* v_buff = reinterpret_cast<float2*>(data.v);
+    const float2* k_og = reinterpret_cast<const float2*>(data.present_key);
+    const float2* v_og = reinterpret_cast<const float2*>(data.present_value);
+    ORT_RETURN_IF_ERROR(LaunchUngroup(parameters, k_buff, v_buff, k_og, v_og, present_sequence_length,
+                                      present_sequence_length, is_bsnh, stream, max_threads_per_block));
+    key = reinterpret_cast<const void*>(data.k);
+    value = reinterpret_cast<const void*>(data.v);
+  }
+
+  DUMP_TENSOR_INIT();
+  DUMP_TENSOR("seqlens_k", data.seqlens_k_total, batch_size, 1);
+
+  MemoryEfficientAttentionParams p;
+  p.sm = device_prop.major * 10 + device_prop.minor;
+  p.is_half = sizeof(T) == 2;
+  p.batch_size = batch_size;
+  p.num_heads = num_heads;
+  p.sequence_length = sequence_length;
+  p.kv_sequence_length = present_sequence_length;  // TOTALLY UNNECESSARY IF WE HAVE SEQLENS_K, maybe remove
+  p.max_sequence_length = present_sequence_length;
+  p.qk_head_size = head_size;
+  p.v_head_size = head_size;
+  p.causal = true;
+  p.scale = scale;
+  p.seqlen_k_ptr = data.seqlens_k_total;  // Note: seqlens_k is total sequence length for efficient
+  p.seqstart_q_ptr = nullptr;
+  p.seqstart_k_ptr = nullptr;
+  p.query = query;
+  p.key = key;
+  p.value = value;
+  p.attn_bias = nullptr;
+  p.is_attn_bias_batched = false;
+  p.is_kv_bsnh = past_kv_format == AttentionQkvFormat::Q_K_V_BSNH;
+  p.output = data.output;
+  p.workspace = MemoryEfficientAttentionParams::need_workspace(p.v_head_size, sizeof(T) == sizeof(float))
+                    ? data.fmha_buffer
+                    : nullptr;
+  p.stream = stream;
+  p.has_custom_right_padding = true;
+  run_memory_efficient_attention(p);
+
+  DUMP_TENSOR_INIT();
+  DUMP_TENSOR("efficient attention output", data.output, batch_size, sequence_length, num_heads, head_size);
+
+  return Status::OK();
+}
+#endif
+
+////////// API Functions
+
+template <typename T>
+Status QkvToContext(
+    const cudaDeviceProp& device_prop,
+    cublasHandle_t& cublas,
+    Stream* ort_stream,
+    contrib::GroupQueryAttentionParameters& parameters,
+    GroupQueryAttentionData<T>& data) {
+  auto stream = static_cast<cudaStream_t>(ort_stream->GetHandle());
+  const float scale = parameters.scale == 0.0f ? 1.f / sqrt(static_cast<float>(parameters.head_size)) : parameters.scale;
+
+#if USE_FLASH_ATTENTION
+  if (data.use_flash_attention) {
+    return FlashAttention(device_prop, stream, parameters, data, scale);
+  }
+#endif
+
+#if USE_MEMORY_EFFICIENT_ATTENTION
+  if (data.use_memory_efficient_attention) {
+    return EfficientAttention(device_prop, stream, parameters, data, scale);
+  }
+#endif
+
+  return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Unfused Group Query Attention not implemented yet.");
+}
+
+template struct GroupQueryAttentionData<half>;
+
+template Status QkvToContext<half>(
+    const cudaDeviceProp& device_prop,
+    cublasHandle_t& cublas,
+    Stream* ort_stream,
+    contrib::GroupQueryAttentionParameters& parameters,
+    GroupQueryAttentionData<half>& data);
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/bert/group_query_attention_impl.h b/onnxruntime/contrib_ops/cuda/bert/group_query_attention_impl.h
new file mode 100644
index 0000000000000..de32d7ea93163
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/bert/group_query_attention_impl.h
@@ -0,0 +1,52 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+#include "core/providers/cuda/shared_inc/cuda_utils.h"
+#include <cuda_fp16.h>
+#include <cublas_v2.h>
+#include "contrib_ops/cpu/bert/attention_common.h"
+#include "core/framework/allocator.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+template <typename T>
+struct GroupQueryAttentionData {
+  // Input Tensors
+  const T* query = nullptr;
+  const T* key = nullptr;
+  const T* value = nullptr;
+  const T* past_key = nullptr;
+  const T* past_value = nullptr;
+  int* seqlens_k = nullptr;
+  // Flash buffers
+  T* softmax_lse = nullptr;
+  T* softmax_lse_accum = nullptr;
+  T* out_accum = nullptr;
+  int* seqlens_k_total = nullptr;
+  // Memory Efficient buffers
+  T* fmha_buffer = nullptr;
+  T* k = nullptr;
+  T* v = nullptr;
+  // Output Tensors
+  T* output = nullptr;
+  T* present_key = nullptr;
+  T* present_value = nullptr;
+  // Kernel Flags
+  bool use_flash_attention = false;
+  bool use_memory_efficient_attention = false;
+};
+
+template <typename T>
+Status QkvToContext(
+    const cudaDeviceProp& device_prop,
+    cublasHandle_t& cublas,
+    Stream* stream,
+    contrib::GroupQueryAttentionParameters& parameters,
+    GroupQueryAttentionData<T>& data);
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/bert/layer_norm.cuh b/onnxruntime/contrib_ops/cuda/bert/layer_norm.cuh
index 5c083d64ee542..ff3178b56c2a6 100644
--- a/onnxruntime/contrib_ops/cuda/bert/layer_norm.cuh
+++ b/onnxruntime/contrib_ops/cuda/bert/layer_norm.cuh
@@ -147,14 +147,16 @@ __device__ inline void LayerNormSmall(const T* input_v, const cub::KeyValuePair<
   __shared__ T rsigma;  // 1 / std.dev.
   T beta_v[ILP], gamma_v[ILP], output_v[ILP];
 
-  if (beta != nullptr) {
-    VecT* beta_val = reinterpret_cast<VecT*>(&beta_v);
-    *beta_val = *reinterpret_cast<const VecT*>(&beta[threadIdx.x * ILP]);
-  }
-  VecT* gamma_val = reinterpret_cast<VecT*>(&gamma_v);
-  *gamma_val = *reinterpret_cast<const VecT*>(&gamma[threadIdx.x * ILP]);
+  const bool is_valid = ILP * threadIdx.x < ld;
+  if (is_valid) {
+    if (beta != nullptr) {
+      VecT* beta_val = reinterpret_cast<VecT*>(&beta_v);
+      *beta_val = *reinterpret_cast<const VecT*>(&beta[threadIdx.x * ILP]);
+    }
 
-  VecT* output_val = reinterpret_cast<VecT*>(&output_v);
+    VecT* gamma_val = reinterpret_cast<VecT*>(&gamma_v);
+    *gamma_val = *reinterpret_cast<const VecT*>(&gamma[threadIdx.x * ILP]);
+  }
 
   KeyValuePairSum pair_sum;
   const cub::KeyValuePair<T, T> sum_kv = BlockReduce(temp_storage).Reduce(thread_data, pair_sum);
@@ -165,13 +167,15 @@ __device__ inline void LayerNormSmall(const T* input_v, const cub::KeyValuePair<
   }
   __syncthreads();
 
-  if (ILP * threadIdx.x < ld) {
+  if (is_valid) {
 #pragma unroll
     for (int i = 0; i < ILP; i++) {
       output_v[i] = (beta != nullptr)
                         ? gamma_v[i] * (input_v[i] - mu) * rsigma + beta_v[i]
                         : gamma_v[i] * (input_v[i] - mu) * rsigma;
     }
+
+    VecT* output_val = reinterpret_cast<VecT*>(&output_v);
     *(reinterpret_cast<VecT*>(&output[idx])) = *output_val;
   }
 }
@@ -186,12 +190,15 @@ __device__ inline void SimplifiedLayerNormSmall(const T* input_v, const T& threa
   using BlockReduce = cub::BlockReduce<T, TPB>;
   __shared__ typename BlockReduce::TempStorage temp_storage;
   __shared__ T rsigma;  // 1 / std.dev.
-  T gamma_v[ILP], output_v[ILP];
 
-  VecT* gamma_val = reinterpret_cast<VecT*>(&gamma_v);
-  *gamma_val = *reinterpret_cast<const VecT*>(&gamma[threadIdx.x * ILP]);
+  const bool is_valid = ILP * threadIdx.x < ld;
 
-  VecT* output_val = reinterpret_cast<VecT*>(&output_v);
+  T gamma_v[ILP], output_v[ILP];
+
+  if (is_valid) {
+    VecT* gamma_val = reinterpret_cast<VecT*>(&gamma_v);
+    *gamma_val = *reinterpret_cast<const VecT*>(&gamma[threadIdx.x * ILP]);
+  }
 
   const T sum = BlockReduce(temp_storage).Sum(thread_data);
 
@@ -200,11 +207,13 @@ __device__ inline void SimplifiedLayerNormSmall(const T* input_v, const T& threa
   }
   __syncthreads();
 
-  if (ILP * threadIdx.x < ld) {
+  if (is_valid) {
 #pragma unroll
     for (int i = 0; i < ILP; i++) {
       output_v[i] = gamma_v[i] * input_v[i] * rsigma;
     }
+
+    VecT* output_val = reinterpret_cast<VecT*>(&output_v);
     *(reinterpret_cast<VecT*>(&output[idx])) = *output_val;
   }
 }
diff --git a/onnxruntime/contrib_ops/cuda/bert/longformer_attention_impl.cu b/onnxruntime/contrib_ops/cuda/bert/longformer_attention_impl.cu
index de3c3fb6ca065..f00239460071b 100644
--- a/onnxruntime/contrib_ops/cuda/bert/longformer_attention_impl.cu
+++ b/onnxruntime/contrib_ops/cuda/bert/longformer_attention_impl.cu
@@ -924,55 +924,55 @@ Status LongformerQkvToContext(
 
   if (disable_compact_memory) {
     ORT_RETURN_IF_ERROR(LaunchLongformerSoftmaxSimpleKernel(
-            stream,
-            cublas,
-            workspace,
-            q,
-            k,
-            v,
-            attention_mask,
-            global_q,
-            global_k,
-            global_v,
-            global_attention,
-            global_index,
-            batch_global_num,
-            pinned_buffer,
-            temp_output,
-            rsqrt_head_size,
-            batch_size,
-            sequence_length,
-            num_heads,
-            head_size,
-            window,
-            element_size));
+        stream,
+        cublas,
+        workspace,
+        q,
+        k,
+        v,
+        attention_mask,
+        global_q,
+        global_k,
+        global_v,
+        global_attention,
+        global_index,
+        batch_global_num,
+        pinned_buffer,
+        temp_output,
+        rsqrt_head_size,
+        batch_size,
+        sequence_length,
+        num_heads,
+        head_size,
+        window,
+        element_size));
   } else {
     ORT_ENFORCE(max_num_global <= window);
     ORT_RETURN_IF_ERROR(LaunchLongformerSoftmaxKernel(
-            stream,
-            cublas,
-            workspace,
-            q,
-            k,
-            v,
-            attention_mask,
-            max_num_global,
-            compact_global_q,
-            global_q,
-            global_k,
-            global_v,
-            global_attention,
-            global_index,
-            batch_global_num,
-            pinned_buffer,
-            temp_output,
-            rsqrt_head_size,
-            batch_size,
-            sequence_length,
-            num_heads,
-            head_size,
-            window,
-            element_size));
+        stream,
+        cublas,
+        workspace,
+        q,
+        k,
+        v,
+        attention_mask,
+        max_num_global,
+        compact_global_q,
+        global_q,
+        global_k,
+        global_v,
+        global_attention,
+        global_index,
+        batch_global_num,
+        pinned_buffer,
+        temp_output,
+        rsqrt_head_size,
+        batch_size,
+        sequence_length,
+        num_heads,
+        head_size,
+        window,
+        element_size));
   }
 
   // The temp_output is BxNxSxH, transpose it to final output BxSxNxH
diff --git a/onnxruntime/contrib_ops/cuda/bert/multihead_attention.cc b/onnxruntime/contrib_ops/cuda/bert/multihead_attention.cc
index 25f3f59165e43..ebd66d8c6528e 100644
--- a/onnxruntime/contrib_ops/cuda/bert/multihead_attention.cc
+++ b/onnxruntime/contrib_ops/cuda/bert/multihead_attention.cc
@@ -153,8 +153,24 @@ Status MultiHeadAttention<T>::ComputeInternal(OpKernelContext* context) const {
       parameters.sequence_length < min_seq_len_for_flash_attention_packed_qkv_) {
     use_flash_attention = false;
   }
+  // Allocate buffers
+  size_t softmax_lse_accum_bytes = 0;
+  size_t out_accum_bytes = 0;
+  if (use_flash_attention) {
+    using namespace std;
+    auto [num_splits, slse_accum_bytes, o_accum_bytes] = onnxruntime::flash::get_num_splits_and_buffer_sizes(
+        parameters.batch_size, parameters.sequence_length, parameters.kv_sequence_length, parameters.num_heads,
+        parameters.head_size, device_prop.multiProcessorCount);
+    parameters.num_splits = num_splits;
+    softmax_lse_accum_bytes = slse_accum_bytes;
+    out_accum_bytes = o_accum_bytes;
+  }
+  auto softmax_lse_accum_buffer = GetScratchBuffer<void>(softmax_lse_accum_bytes, context->GetComputeStream());
+  auto out_accum_buffer = GetScratchBuffer<void>(out_accum_bytes, context->GetComputeStream());
 #else
   constexpr bool use_flash_attention = false;
+  auto softmax_lse_accum_buffer = GetScratchBuffer<void>(0, context->GetComputeStream());  // nullptr
+  auto out_accum_buffer = GetScratchBuffer<void>(0, context->GetComputeStream());          // nullptr
 #endif
 
   bool use_fused_cross_attention = !use_flash_attention &&
@@ -194,8 +210,10 @@ Status MultiHeadAttention<T>::ComputeInternal(OpKernelContext* context) const {
     // Here we assume that num_heads and head_size does not change for a MultiHeadAttention node.
     if (nullptr == fused_fp16_runner_.get()) {
       constexpr bool is_unidirectional = false;
-      fused_fp16_runner_ = FusedMHARunnerFP16v2::Create(
-          num_heads_, parameters.head_size, sm, is_unidirectional, enable_trt_flash_attention_, parameters.scale);
+      std::call_once(fused_fp16_runner_created_, [&]() {
+        fused_fp16_runner_ = FusedMHARunnerFP16v2::Create(num_heads_, parameters.head_size, sm, is_unidirectional,
+                                                          enable_trt_flash_attention_, parameters.scale);
+      });
     }
 
     // In case some kernel not loaded due to shared memory limit, we need to double check here.
@@ -289,6 +307,12 @@ Status MultiHeadAttention<T>::ComputeInternal(OpKernelContext* context) const {
   data.use_memory_efficient_attention = use_memory_efficient_attention;
   data.cumulated_sequence_length_q_cache = &(this->cumulated_sequence_length_q_cache_);
   data.cumulated_sequence_length_kv_cache = &(this->cumulated_sequence_length_kv_cache_);
+  if (softmax_lse_accum_buffer != nullptr) {
+    data.softmax_lse_accum = reinterpret_cast<CudaT*>(softmax_lse_accum_buffer.get());
+  }
+  if (out_accum_buffer != nullptr) {
+    data.out_accum = reinterpret_cast<CudaT*>(out_accum_buffer.get());
+  }
 
   cublasHandle_t cublas = GetCublasHandle(context);
 
diff --git a/onnxruntime/contrib_ops/cuda/bert/multihead_attention.h b/onnxruntime/contrib_ops/cuda/bert/multihead_attention.h
index 33fa3d50e4564..c162f7133cc1c 100644
--- a/onnxruntime/contrib_ops/cuda/bert/multihead_attention.h
+++ b/onnxruntime/contrib_ops/cuda/bert/multihead_attention.h
@@ -32,6 +32,7 @@ class MultiHeadAttention final : public CudaKernel {
   bool disable_memory_efficient_attention_;
   int min_seq_len_for_flash_attention_packed_qkv_;
   mutable std::unique_ptr<MHARunner> fused_fp16_runner_;
+  mutable std::once_flag fused_fp16_runner_created_;
   mutable const FusedMultiHeadCrossAttentionKernel* fused_fp16_cross_attention_kernel_;
   mutable CumulatedSequenceLengthCache cumulated_sequence_length_q_cache_;
   mutable CumulatedSequenceLengthCache cumulated_sequence_length_kv_cache_;
diff --git a/onnxruntime/contrib_ops/cuda/bert/packed_attention.h b/onnxruntime/contrib_ops/cuda/bert/packed_attention.h
index 0cdd8021de4a1..f00c112fc73d2 100644
--- a/onnxruntime/contrib_ops/cuda/bert/packed_attention.h
+++ b/onnxruntime/contrib_ops/cuda/bert/packed_attention.h
@@ -24,10 +24,11 @@ class TrtFusedAttention {
  protected:
   MHARunner* GetFusedRunner(const cudaDeviceProp& device_prop, const PackedAttentionParameters& parameters) const;
 
- private:
+ protected:
   bool disable_fused_runner_;
   bool enable_trt_flash_attention_;
   mutable std::unique_ptr<MHARunner> fused_fp16_runner_;
+  mutable std::once_flag fused_fp16_runner_created_;
 };
 
 template <typename T>
diff --git a/onnxruntime/contrib_ops/cuda/bert/packed_attention_impl.cu b/onnxruntime/contrib_ops/cuda/bert/packed_attention_impl.cu
index aba0efdbd7d5f..3b52320839403 100644
--- a/onnxruntime/contrib_ops/cuda/bert/packed_attention_impl.cu
+++ b/onnxruntime/contrib_ops/cuda/bert/packed_attention_impl.cu
@@ -507,10 +507,12 @@ Status FusedScaledDotProductAttentionCutlass(
   MemoryEfficientAttentionParams p;
   p.sm = device_prop.major * 10 + device_prop.minor;
   p.is_half = sizeof(T) == 2;
+  p.is_kv_bsnh = true;
   p.batch_size = parameters.batch_size;
   p.num_heads = parameters.num_heads;
   p.sequence_length = parameters.sequence_length;
   p.kv_sequence_length = parameters.sequence_length;
+  p.max_sequence_length = parameters.sequence_length;
   p.qk_head_size = parameters.head_size;
   p.v_head_size = parameters.v_head_size;
   p.causal = false;
@@ -527,6 +529,7 @@ Status FusedScaledDotProductAttentionCutlass(
   p.output = data.output;
   p.workspace = MemoryEfficientAttentionParams::need_workspace(v_head_size, sizeof(T) == sizeof(float)) ? accum_workspace : nullptr;
   p.stream = stream;
+  p.has_custom_right_padding = false;
   run_memory_efficient_attention(p);
 
   DUMP_TENSOR("PackedAttention cutlass output", data.output, parameters.token_count, num_heads, v_head_size);
diff --git a/onnxruntime/contrib_ops/cuda/bert/packed_multihead_attention_impl.cu b/onnxruntime/contrib_ops/cuda/bert/packed_multihead_attention_impl.cu
index e09fd9e6b36e5..8a508241d80ba 100644
--- a/onnxruntime/contrib_ops/cuda/bert/packed_multihead_attention_impl.cu
+++ b/onnxruntime/contrib_ops/cuda/bert/packed_multihead_attention_impl.cu
@@ -688,6 +688,7 @@ Status FusedAttentionCutlass(
   p.num_heads = parameters.num_heads;
   p.sequence_length = parameters.sequence_length;
   p.kv_sequence_length = parameters.sequence_length;
+  p.max_sequence_length = parameters.sequence_length;
   p.qk_head_size = parameters.head_size;
   p.v_head_size = parameters.v_head_size;
   p.causal = false;
@@ -702,10 +703,12 @@ Status FusedAttentionCutlass(
   p.attn_bias = data.relative_position_bias;
   p.is_attn_bias_batched = !parameters.broadcast_res_pos_bias;
   p.output = data.output;
+  p.is_kv_bsnh = true;
   p.workspace = MemoryEfficientAttentionParams::need_workspace(v_head_size, sizeof(T) == sizeof(float))
                     ? (data.workspace + (data.no_qkv_workspace ? 0 : (elements_qk + elements_qk + elements_v)))
                     : nullptr;
   p.stream = stream;
+  p.has_custom_right_padding = false;
   run_memory_efficient_attention(p);
 
   DUMP_TENSOR_INIT();
diff --git a/onnxruntime/contrib_ops/cuda/bert/rotary_embedding.cc b/onnxruntime/contrib_ops/cuda/bert/rotary_embedding.cc
new file mode 100644
index 0000000000000..2d12e975d88d7
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/bert/rotary_embedding.cc
@@ -0,0 +1,85 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/providers/cuda/cuda_common.h"
+#include "contrib_ops/cpu/bert/rotary_embedding_helper.h"
+#include "contrib_ops/cuda/bert/rotary_embedding.h"
+#include "contrib_ops/cuda/bert/rotary_embedding_impl.h"
+
+using namespace onnxruntime::cuda;
+using namespace ::onnxruntime::common;
+using namespace ONNX_NAMESPACE;
+using namespace onnxruntime::contrib::rotary_embedding_helper;
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+#define REGISTER_KERNEL_TYPED(T)                                        \
+  ONNX_OPERATOR_TYPED_KERNEL_EX(                                        \
+      RotaryEmbedding,                                                  \
+      kMSDomain,                                                        \
+      1,                                                                \
+      T,                                                                \
+      kCudaExecutionProvider,                                           \
+      (*KernelDefBuilder::Create())                                     \
+          .TypeConstraint("T", DataTypeImpl::GetTensorType<T>())        \
+          .TypeConstraint("M", DataTypeImpl::GetTensorType<int64_t>()), \
+      RotaryEmbedding<T>);
+
+REGISTER_KERNEL_TYPED(float)
+REGISTER_KERNEL_TYPED(MLFloat16)
+
+template <typename T>
+RotaryEmbedding<T>::RotaryEmbedding(const OpKernelInfo& info) : CudaKernel(info) {
+  scale = info.GetAttrOrDefault<float>("scale", 1.0);
+  interleaved = (info.GetAttrOrDefault<int64_t>("interleaved", 0) == 1);
+}
+
+template <typename T>
+Status RotaryEmbedding<T>::ComputeInternal(OpKernelContext* context) const {
+  const Tensor* input = context->Input<Tensor>(0);
+  const Tensor* position_ids = context->Input<Tensor>(1);
+  const Tensor* cos_cache = context->Input<Tensor>(2);
+  const Tensor* sin_cache = context->Input<Tensor>(3);
+
+  RotaryParameters parameters = {};
+  ORT_RETURN_IF_ERROR(rotary_embedding_helper::CheckInputs<Tensor>(input,
+                                                                   position_ids,
+                                                                   cos_cache,
+                                                                   sin_cache,
+                                                                   &parameters));
+
+  Tensor* output = context->Output(0, input->Shape());
+
+  if (parameters.sequence_length > parameters.max_sequence_length) {
+    // Launch update_cos_sin_cache kernel with scale
+    ORT_NOT_IMPLEMENTED("Updating cos_cache and sin_cache in RotaryEmbedding is not currently supported");
+  }
+
+  // Launch rotary embedding kernel
+  typedef typename ToCudaType<T>::MappedType CudaT;
+  auto& device_prop = GetDeviceProp();
+  return LaunchRotaryEmbeddingKernel<CudaT>(
+      Stream(context),
+      reinterpret_cast<CudaT*>(output->template MutableData<T>()),
+      reinterpret_cast<const CudaT*>(input->template Data<T>()),
+      position_ids->Data<int64_t>(),
+      reinterpret_cast<const CudaT*>(cos_cache->template Data<T>()),
+      reinterpret_cast<const CudaT*>(sin_cache->template Data<T>()),
+      parameters.batch_size,
+      parameters.sequence_length,
+      parameters.num_heads,
+      parameters.head_size,
+      parameters.max_sequence_length,
+      parameters.position_ids_format,
+      interleaved,
+      device_prop.maxThreadsPerBlock,
+      parameters.transposed);
+
+  return Status::OK();
+}
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/bert/rotary_embedding.h b/onnxruntime/contrib_ops/cuda/bert/rotary_embedding.h
new file mode 100644
index 0000000000000..6dab2ad56749e
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/bert/rotary_embedding.h
@@ -0,0 +1,27 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+#include "core/common/common.h"
+#include "core/providers/cuda/cuda_kernel.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+using namespace onnxruntime::cuda;
+
+template <typename T>
+class RotaryEmbedding final : public CudaKernel {
+ public:
+  RotaryEmbedding(const OpKernelInfo& info);
+  Status ComputeInternal(OpKernelContext* context) const override;
+
+ protected:
+  float scale;
+  bool interleaved;
+};
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/bert/rotary_embedding_impl.cu b/onnxruntime/contrib_ops/cuda/bert/rotary_embedding_impl.cu
new file mode 100644
index 0000000000000..e1b83bd8caf54
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/bert/rotary_embedding_impl.cu
@@ -0,0 +1,158 @@
+/*
+Copyright (c) Microsoft Corporation.
+Licensed under the MIT License.
+*/
+
+/*
+Kernel implementation for rotary embeddings.
+*/
+
+#include <cuda_fp16.h>
+#include "core/providers/cuda/cu_inc/common.cuh"
+#include "contrib_ops/cuda/bert/rotary_embedding_impl.h"
+
+using namespace onnxruntime::cuda;
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+template <typename T>
+__global__ void RotaryEmbeddingBSNH(T* output,                   // BxSxNxH
+                                    const T* input,              // BxSxNxH
+                                    const T* cos_cache,          // Mx(H/2)
+                                    const T* sin_cache,          // Mx(H/2)
+                                    const int64_t* position_ids, // (1) or BxS
+                                    const int sequence_length,
+                                    const int num_heads,
+                                    const int head_size,
+                                    const int position_ids_format,
+                                    const bool interleaved,
+                                    const int batch_stride,
+                                    const int seq_stride,
+                                    const int head_stride) {
+  // B = batch size, S = sequence length, N = num heads, H = head size, M = max sequence length
+  // Use .x in innermost loop to access global memory efficiently
+  
+  const int b = blockIdx.z;
+  const int s = blockIdx.y;
+  const int n = blockIdx.x;
+
+  const int i = threadIdx.x;
+
+  const int block_offset = b * batch_stride + s * seq_stride + n * head_stride;
+
+  const T* input_data = input + block_offset;
+  T* output_data = output + block_offset;
+
+  // Cache is (M, H/2)
+  const int half_head_size = head_size / 2;
+  const int position_id = (position_ids_format == 0) ? \
+                          static_cast<int>(position_ids[0]) + s \
+                          : static_cast<int>(position_ids[b * sequence_length + s]);
+  const int cache_offset = position_id * half_head_size;
+  const T* cos_data = cos_cache + cache_offset;
+  const T* sin_data = sin_cache + cache_offset;
+
+  int cache_idx = 0;
+  T sign = 0;
+  int j = 0;
+  if (interleaved) {
+    cache_idx = (i / 2) % half_head_size;
+    sign = (i % 2 == 0) ? -1 : 1;
+    j = (i % 2 == 0) ? i+1 : i-1;  // i - sign
+  } else {
+    cache_idx = i % half_head_size;
+    sign = (i < half_head_size) ? -1 : 1;
+    j = (i + half_head_size) % head_size;
+  }
+  output_data[i] = input_data[i] * cos_data[cache_idx] + sign * input_data[j] * sin_data[cache_idx];
+}
+
+
+template <typename T>
+Status LaunchRotaryEmbeddingKernel(
+    cudaStream_t stream,
+    T* output,
+    const T* input,
+    const int64_t* position_ids,
+    const T* cos_cache,
+    const T* sin_cache,
+    const int batch_size,
+    const int sequence_length,
+    const int num_heads,
+    const int head_size,
+    const int max_sequence_length,
+    const int position_ids_format,
+    const bool interleaved,
+    const int max_threads_per_block,
+    const bool transposed) {
+
+  constexpr int smem_size = 0;
+  const dim3 grid(num_heads, sequence_length, batch_size);
+  const dim3 block(head_size, 1, 1);
+
+  // Note: Current implementation assumes head_size <= max_threads_per_block
+  // because head_size is currently large for LLaMA-2. For smaller head_size
+  // and num_heads values, we can create a block as `block(num_heads, head_size, 1)`
+  // instead. This will require kernel changes to support.
+
+  // Default input tensor shape is [batch, seq, hidden_size]
+  int head_stride = head_size;
+  int seq_stride = num_heads * head_stride;
+  int batch_stride = sequence_length * seq_stride;
+  if (transposed) {
+    // When transposed, input tensor shape is [batch, num_heads, seq, head_size]
+    seq_stride = head_size;
+    head_stride = sequence_length * seq_stride;
+    batch_stride = num_heads * head_stride;
+  }
+
+  assert(head_size <= max_threads_per_block);
+  RotaryEmbeddingBSNH<<<grid, block, smem_size, stream>>>(
+    output, input, cos_cache, sin_cache, position_ids,
+    sequence_length, num_heads, head_size, position_ids_format, interleaved,
+    batch_stride, seq_stride, head_stride
+  );
+
+  return CUDA_CALL(cudaGetLastError());
+}
+
+template Status LaunchRotaryEmbeddingKernel<float>(
+    cudaStream_t stream,
+    float* output,
+    const float* input,
+    const int64_t* position_ids,
+    const float* cos_cache,
+    const float* sin_cache,
+    const int batch_size,
+    const int sequence_length,
+    const int num_heads,
+    const int head_size,
+    const int max_sequence_length,
+    const int position_ids_format,
+    const bool interleaved,
+    const int max_threads_per_block,
+    const bool transposed);
+
+template Status LaunchRotaryEmbeddingKernel<half>(
+    cudaStream_t stream,
+    half* output,
+    const half* input,
+    const int64_t* position_ids,
+    const half* cos_cache,
+    const half* sin_cache,
+    const int batch_size,
+    const int sequence_length,
+    const int num_heads,
+    const int head_size,
+    const int max_sequence_length,
+    const int position_ids_format,
+    const bool interleaved,
+    const int max_threads_per_block,
+    const bool transposed);
+
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/bert/rotary_embedding_impl.h b/onnxruntime/contrib_ops/cuda/bert/rotary_embedding_impl.h
new file mode 100644
index 0000000000000..ee1ccc43dcbff
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/bert/rotary_embedding_impl.h
@@ -0,0 +1,32 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+#include "core/common/common.h"
+#include "core/providers/cuda/shared_inc/cuda_utils.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+template <typename T>
+Status LaunchRotaryEmbeddingKernel(
+    cudaStream_t stream,
+    T* output,
+    const T* input,
+    const int64_t* position_ids,
+    const T* cos_cache,
+    const T* sin_cache,
+    const int batch_size,
+    const int sequence_length,
+    const int num_heads,
+    const int head_size,
+    const int max_sequence_length,
+    const int position_ids_format,
+    const bool interleaved,
+    const int max_threads_per_block,
+    const bool transposed);
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/bert/skip_layer_norm.cc b/onnxruntime/contrib_ops/cuda/bert/skip_layer_norm.cc
index 78174181acdc8..3299bc2cb11de 100644
--- a/onnxruntime/contrib_ops/cuda/bert/skip_layer_norm.cc
+++ b/onnxruntime/contrib_ops/cuda/bert/skip_layer_norm.cc
@@ -3,6 +3,7 @@
 
 #include "core/providers/cuda/cuda_common.h"
 #include "core/providers/cuda/nn/layer_norm_impl.h"
+#include "core/common/narrow.h"
 #include "skip_layer_norm.h"
 #include "skip_layer_norm_impl.h"
 #include "contrib_ops/cpu/skip_layer_norm_helper.h"
@@ -50,6 +51,11 @@ template <typename T, bool Simplified>
 Status SkipLayerNorm<T, Simplified>::ComputeInternal(OpKernelContext* ctx) const {
   const Tensor* input = ctx->Input<Tensor>(0);
   const Tensor* skip = ctx->Input<Tensor>(1);
+  if (strict_ && skip->Shape() != input->Shape()) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                           "'input' and 'skip' shall have same shape when enable_skip_layer_norm_strict_mode is True");
+  }
+
   const Tensor* gamma = ctx->Input<Tensor>(2);
 
   const Tensor* beta = Simplified ? nullptr : ctx->Input<Tensor>(3);
@@ -57,16 +63,13 @@ Status SkipLayerNorm<T, Simplified>::ComputeInternal(OpKernelContext* ctx) const
 
   Tensor* output = ctx->Output(0, input->Shape());
 
-  // For inferencing, we support one more optional output which is the sum
-  // of the input and skip tensors
-  Tensor* skip_input_bias_add_output = ctx->Output(3, input->Shape());
+  // Optional output for the sum of skip, input and bias tensors (It is also the input of Layer Normalization).
+  Tensor* sum_output = ctx->Output(3, input->Shape());
 
   const auto& input_dims = input->Shape().GetDims();
   size_t input_dims_size = input_dims.size();
-  const auto& skip_dims = skip->Shape().GetDims();
-  size_t skip_dims_size = skip_dims.size();
 
-  int hidden_size = static_cast<int>(input_dims[input_dims_size - 1]);
+  int hidden_size = onnxruntime::narrow<int>(input_dims[input_dims_size - 1]);
 
   ORT_RETURN_IF_ERROR(onnxruntime::contrib::skip_layer_norm_helper::CheckInputs<Tensor>(input,
                                                                                         skip,
@@ -76,12 +79,15 @@ Status SkipLayerNorm<T, Simplified>::ComputeInternal(OpKernelContext* ctx) const
                                                                                         hidden_size,
                                                                                         input_dims_size));
 
-  const bool skip_broadcasted = (skip_dims[0] == 1 || skip_dims_size == 2) ? true : false;
-  const int skip_size = static_cast<int>(skip_dims[skip_dims_size - 1] * skip_dims[skip_dims_size - 2]);
+  int row_count = onnxruntime::narrow<int>(input->Shape().SizeToDimension(input_dims_size - 1));
+  if (row_count == 0) {
+    return Status::OK();
+  }
 
-  int row_count = gsl::narrow<int>(input->Shape().SizeToDimension(input_dims_size - 1));
   typedef typename ToCudaType<T>::MappedType CudaT;
 
+  const int skip_size = onnxruntime::narrow<int>(skip->Shape().Size());
+
   if (strict_) {
     HostApplyLayerNorm<CudaT, float, CudaT, Simplified>(
         GetDeviceProp(),
@@ -97,21 +103,20 @@ Status SkipLayerNorm<T, Simplified>::ComputeInternal(OpKernelContext* ctx) const
         (beta != nullptr) ? reinterpret_cast<const CudaT*>(beta->Data<T>()) : nullptr,  // beta
         reinterpret_cast<const CudaT*>(skip->Data<T>()),                                // skip or residual to add
         (bias != nullptr) ? reinterpret_cast<const CudaT*>(bias->Data<T>()) : nullptr,  // bias to add
-        skip_input_bias_add_output != nullptr ? reinterpret_cast<CudaT*>(skip_input_bias_add_output->MutableData<T>()) : nullptr);
+        sum_output != nullptr ? reinterpret_cast<CudaT*>(sum_output->MutableData<T>()) : nullptr);
   } else {
     LaunchSkipLayerNormKernel<CudaT, Simplified>(
         Stream(ctx),
         reinterpret_cast<CudaT*>(output->MutableData<T>()),
-        skip_input_bias_add_output != nullptr ? reinterpret_cast<CudaT*>(skip_input_bias_add_output->MutableData<T>()) : nullptr,
+        sum_output != nullptr ? reinterpret_cast<CudaT*>(sum_output->MutableData<T>()) : nullptr,
         reinterpret_cast<const CudaT*>(input->Data<T>()),
         reinterpret_cast<const CudaT*>(skip->Data<T>()),
+        (bias != nullptr) ? reinterpret_cast<const CudaT*>(bias->Data<T>()) : nullptr,
         reinterpret_cast<const CudaT*>(gamma->Data<T>()),
         (beta != nullptr) ? reinterpret_cast<const CudaT*>(beta->Data<T>()) : nullptr,
-        (bias != nullptr) ? reinterpret_cast<const CudaT*>(bias->Data<T>()) : nullptr,
         epsilon_,
         hidden_size,
         row_count,
-        skip_broadcasted,
         skip_size);
   }
 
diff --git a/onnxruntime/contrib_ops/cuda/bert/skip_layer_norm_impl.cu b/onnxruntime/contrib_ops/cuda/bert/skip_layer_norm_impl.cu
index f2ee076a8a03d..50c8e4b5e0398 100644
--- a/onnxruntime/contrib_ops/cuda/bert/skip_layer_norm_impl.cu
+++ b/onnxruntime/contrib_ops/cuda/bert/skip_layer_norm_impl.cu
@@ -51,61 +51,68 @@ half maybe2half(float x) {
 
 // Using only power of 2 numbers will lead to waste of compute for same size such as 768, which is a very common case
 // in BERT. Ideally we can step by wrap_size * num_unroll, but listing too many steps will cause long compile time.
-constexpr int kSizes[] = {32, 64, 128, 384, 768, 1024, 2048};
+constexpr int kSizes[] = {128, 320, 384, 640, 768, 1024, 1280, 2048, 4096, 5120, 8192};
+constexpr size_t kNumOfSizes = sizeof(kSizes) / sizeof(kSizes[0]);
+constexpr int kMaxSize = kSizes[kNumOfSizes - 1];
 constexpr int kMinBlockSize = 32;
-constexpr int kMaxBlockSize = 256;
+constexpr int kMaxBlockSize = 1024;
 
 int NextSize(int x) {
-  size_t len = sizeof(kSizes) / sizeof(kSizes[0]);
-  for (size_t i = 0; i < len; ++i) {
+  for (size_t i = 0; i < kNumOfSizes; ++i) {
     if (x <= kSizes[i]) {
       return kSizes[i];
     }
   }
-  return kSizes[len - 1];
+  return kMaxSize + 1;
 }
 
-template <typename T, int NumUnroll>
-bool CanVectorized(T* output, T* skip_input_bias_add_output, const T* input, const T* skip, const T* gamma,
-                   const T* beta, const T* bias, const int ld, const int next_size) {
-  constexpr int alignment = std::alignment_of<aligned_vector<T, NumUnroll>>::value;
-  return ld % NumUnroll == 0 && reinterpret_cast<uint64_t>(output) % alignment == 0 &&
-         reinterpret_cast<uint64_t>(skip_input_bias_add_output) % alignment == 0 &&
-         reinterpret_cast<uint64_t>(input) % alignment == 0 && reinterpret_cast<uint64_t>(skip) % alignment == 0 &&
-         reinterpret_cast<uint64_t>(gamma) % alignment == 0 && reinterpret_cast<uint64_t>(beta) % alignment == 0 &&
-         reinterpret_cast<uint64_t>(bias) % alignment == 0 && next_size / NumUnroll >= kMinBlockSize &&
-         next_size / NumUnroll <= kMaxBlockSize;
+bool CanVectorized(void* output, void* sum_output, const void* input, const void* skip, const void* bias,
+                   const void* gamma, const void* beta, const int ld, const int next_size, int num_unroll, int element_size) {
+  int alignment = element_size * num_unroll;
+  return ld % num_unroll == 0 &&
+         reinterpret_cast<uint64_t>(output) % alignment == 0 &&
+         reinterpret_cast<uint64_t>(sum_output) % alignment == 0 &&
+         reinterpret_cast<uint64_t>(input) % alignment == 0 &&
+         reinterpret_cast<uint64_t>(skip) % alignment == 0 &&
+         reinterpret_cast<uint64_t>(bias) % alignment == 0 &&
+         reinterpret_cast<uint64_t>(gamma) % alignment == 0 &&
+         reinterpret_cast<uint64_t>(beta) % alignment == 0 &&
+         next_size / num_unroll >= kMinBlockSize &&
+         next_size / num_unroll <= kMaxBlockSize;
 }
 }  // namespace
 
 template <typename T, unsigned TPB, bool Simplified>
 __global__ void SkipLayerNormKernel(
-    const int ld, const T* input, const T* skip,
-    const T* beta, const T* gamma, const T* bias,
-    const T epsilon, T* output, T* skip_input_bias_add_output, const bool skip_broadcasted, int skip_size) {
+    T* output, T* sum_output, const T* input, const T* skip, const T* bias, const T* gamma, const T* beta, T epsilon,
+    const int ld, int skip_size) {
   const T reverse_ld = T(1.f / ld);
   const int offset = blockIdx.x * ld;
+  const bool has_bias = (bias != nullptr);
 
+  // Reduce sum of x and x^2, and the results are divided by ld.
   KeyValuePairSum pair_sum;
-  // reduce x and x^2
   cub::KeyValuePair<T, T> thread_data(0, 0);
 
-
   for (int i = threadIdx.x; i < ld; i += TPB) {
     const int idx = offset + i;
 
-    const T skip_data = skip_broadcasted ? skip[idx % skip_size] : skip[idx];
-    const T val = (bias == nullptr) ? input[idx] + skip_data : input[idx] + skip_data + bias[i];
+    T val = input[idx];
+    if (has_bias) {
+      val += bias[i];
+    }
+    val += skip[idx % skip_size];
 
     const T rldval = reverse_ld * val;
     thread_data = pair_sum(thread_data, cub::KeyValuePair<T, T>(rldval, rldval * val));
 
-    if (skip_input_bias_add_output != nullptr) {
-      skip_input_bias_add_output[idx] = val;
+    if (sum_output != nullptr) {
+      sum_output[idx] = val;
     }
 
     output[idx] = val;
   }
+
   if (Simplified) {
     SimplifiedLayerNorm<T, TPB>(thread_data.value, ld, offset, gamma, epsilon, output);
     return;
@@ -116,106 +123,114 @@ __global__ void SkipLayerNormKernel(
 // Vectorized kernel
 template <typename T, unsigned TPB, int ILP, bool Simplified>
 __global__ void SkipLayerNormKernelSmall(
-    const int ld, const T* input, const T* skip, const T* beta, const T* gamma,
-    const T* bias, const T epsilon, T* output, T* skip_input_bias_add_output,
-    bool hasBias, bool hasSkipInputBiasAdditionOutput, const bool skip_broadcasted, const int skip_size) {
+    T* output, T* sum_output, const T* input, const T* skip, const T* bias, const T* gamma, const T* beta, T epsilon,
+    int ld, int skip_size) {
   const T rld = T(1.f / ld);
-  const int idx = blockIdx.x * ld + threadIdx.x * ILP;  // grid_size = n / ld
+  const int idx = blockIdx.x * ld + threadIdx.x * ILP;
 
   using VecT = aligned_vector<T, ILP>;
+  T sum_v[ILP];
 
-  T input_v[ILP], skip_v[ILP], bias_v[ILP], skip_input_bias_add_output_v[ILP];
+  cub::KeyValuePair<T, T> thread_data(T(0.f), T(0.f));
 
-  VecT* input_val = reinterpret_cast<VecT*>(&input_v);
-  *input_val = *reinterpret_cast<const VecT*>(&input[idx]);
+  if (ILP * threadIdx.x < ld) {  // load data under this guard to avoid reading out-of-bounds
+    T skip_v[ILP], bias_v[ILP];
 
-  VecT* skip_val = reinterpret_cast<VecT*>(&skip_v);
-  if (skip_broadcasted){
-  *skip_val = *reinterpret_cast<const VecT*>(&skip[idx % skip_size]);
-  }else{
-  *skip_val = *reinterpret_cast<const VecT*>(&skip[idx]);
-  }
+    // load input to sum_v
+    VecT* sum_val = reinterpret_cast<VecT*>(&sum_v);
+    *sum_val = *reinterpret_cast<const VecT*>(&input[idx]);
 
-  if (hasBias) {
-    VecT* bias_val = reinterpret_cast<VecT*>(&bias_v);
-    *bias_val = *reinterpret_cast<const VecT*>(&bias[threadIdx.x * ILP]);
-  }
+    VecT* skip_val = reinterpret_cast<VecT*>(&skip_v);
+    *skip_val = *reinterpret_cast<const VecT*>(&skip[idx % skip_size]);
 
-  cub::KeyValuePair<T, T> thread_data(T(0.f), T(0.f));
+    const bool has_bias = (bias != nullptr);
+    if (has_bias) {
+      VecT* bias_val = reinterpret_cast<VecT*>(&bias_v);
+      *bias_val = *reinterpret_cast<const VecT*>(&bias[threadIdx.x * ILP]);
+    }
 
-  if (ILP * threadIdx.x < ld) {
     T rldval_sum = T(0.f);
     T rldvalsq_sum = T(0.f);
+    const bool has_sum_output = (sum_output != nullptr);
+
 #pragma unroll
     for (int i = 0; i < ILP; i++) {
-      input_v[i] += hasBias ? skip_v[i] + bias_v[i] : skip_v[i];
-
-      if (hasSkipInputBiasAdditionOutput) {
-        skip_input_bias_add_output_v[i] = input_v[i];
+      if (has_bias) {
+        sum_v[i] += bias_v[i];
       }
+      sum_v[i] += skip_v[i];
 
-      const T rldval = rld * input_v[i];
+      const T rldval = rld * sum_v[i];
       rldval_sum += rldval;
-      rldvalsq_sum += rldval * input_v[i];
+      rldvalsq_sum += rldval * sum_v[i];
     }
 
-    if (hasSkipInputBiasAdditionOutput) {
-      *(reinterpret_cast<VecT*>(&skip_input_bias_add_output[idx])) = *reinterpret_cast<VecT*>(&skip_input_bias_add_output_v);
+    if (has_sum_output) {
+      *(reinterpret_cast<VecT*>(&sum_output[idx])) = *reinterpret_cast<VecT*>(&sum_v);
     }
 
     thread_data = cub::KeyValuePair<T, T>(rldval_sum, rldvalsq_sum);
   }
 
   if (Simplified) {
-    SimplifiedLayerNormSmall<T, TPB, ILP>(input_v, thread_data.value, ld, idx, gamma, epsilon, output);
+    SimplifiedLayerNormSmall<T, TPB, ILP>(sum_v, thread_data.value, ld, idx, gamma, epsilon, output);
     return;
   }
-  LayerNormSmall<T, TPB, ILP>(input_v, thread_data, ld, idx, beta, gamma, epsilon, output);
+  LayerNormSmall<T, TPB, ILP>(sum_v, thread_data, ld, idx, beta, gamma, epsilon, output);
 }
 
 template <typename T, bool Simplified>
 void LaunchSkipLayerNormKernel(
-    cudaStream_t stream, T* output, T* skip_input_bias_add_output, const T* input, const T* skip, const T* gamma,
-    const T* beta, const T* bias, float epsilon, int ld, int row_count, bool skip_broadcasted, int skip_size) {
-  if (row_count == 0) {
-    return;
-  }
-
-  bool hasBias = (bias == nullptr) ? false : true;
-  bool hasSkipInputBiasAdditionOutput = (skip_input_bias_add_output == nullptr) ? false : true;
-
+    cudaStream_t stream, T* output, T* sum_output,
+    const T* input, const T* skip, const T* bias, const T* gamma, const T* beta, float epsilon,
+    int ld, int row_count, int skip_size) {
   const int next_size = NextSize(ld);
   const int grid_size = row_count;
-  bool flag_vec2 =
-      CanVectorized<T, 2>(output, skip_input_bias_add_output, input, skip, gamma, beta, bias, ld, next_size);
-  bool flag_vec4 =
-      CanVectorized<T, 4>(output, skip_input_bias_add_output, input, skip, gamma, beta, bias, ld, next_size);
+  bool can_unroll_vec4 = CanVectorized(output, sum_output, input,
+                                       skip, bias, gamma,
+                                       beta, ld, next_size,
+                                       4, sizeof(T));
+  bool can_unroll_vec8 = CanVectorized(output, sum_output, input,
+                                       skip, bias, gamma,
+                                       beta, ld, next_size,
+                                       8, sizeof(T));
+
+#define LAUNCH_SKIP_LAYER_NORM_KERNEL_SMALL(num_unroll)                                                  \
+  SkipLayerNormKernelSmall<T, block_size, num_unroll, Simplified><<<grid_size, block_size, 0, stream>>>( \
+      output, sum_output, input, skip, bias, gamma, beta, maybe2half<T>(epsilon), ld, skip_size)
+
+#define LAUNCH_SKIP_LAYER_NORM_KERNEL()                                                 \
+  SkipLayerNormKernel<T, block_size, Simplified><<<grid_size, block_size, 0, stream>>>( \
+      output, sum_output, input, skip, bias, gamma, beta, maybe2half<T>(epsilon), ld, skip_size)
+
+#define CASE_NEXT_SIZE(next_size_value)                                         \
+  case next_size_value: {                                                       \
+    static_assert(next_size_value >= kSizes[0] && next_size_value <= kMaxSize); \
+    if constexpr (next_size_value >= 320) {                                 \
+      if (can_unroll_vec8) {                                                    \
+        constexpr int block_size = next_size_value / 8;                         \
+        LAUNCH_SKIP_LAYER_NORM_KERNEL_SMALL(8);                                 \
+      } else {                                                                  \
+        constexpr int block_size = 256;                                         \
+        LAUNCH_SKIP_LAYER_NORM_KERNEL();                                        \
+      }                                                                         \
+    } else {                                                                    \
+      if (can_unroll_vec4) {                                                    \
+        constexpr int block_size = next_size_value / 4;                         \
+        LAUNCH_SKIP_LAYER_NORM_KERNEL_SMALL(4);                                 \
+      } else {                                                                  \
+        if (next_size_value <= kMaxBlockSize) {                                 \
+          constexpr int block_size = next_size_value;                           \
+          LAUNCH_SKIP_LAYER_NORM_KERNEL_SMALL(1);                               \
+        } else {                                                                \
+          constexpr int block_size = 256;                                       \
+          LAUNCH_SKIP_LAYER_NORM_KERNEL();                                      \
+        }                                                                       \
+      }                                                                         \
+    }                                                                           \
+  } break
 
   switch (next_size) {
-#define LAUNCH_SKIP_LAYER_NORM_KERNEL_SMALL(num_unroll)                                                          \
-  SkipLayerNormKernelSmall<T, block_size, num_unroll, Simplified>                                                \
-      <<<grid_size, block_size, 0, stream>>>(ld, input, skip, beta, gamma, bias, maybe2half<T>(epsilon), output, \
-                                             skip_input_bias_add_output, hasBias, hasSkipInputBiasAdditionOutput, skip_broadcasted, skip_size)
-#define LAUNCH_SKIP_LAYER_NORM_KERNEL()                                                       \
-  SkipLayerNormKernel<T, kMaxBlockSize, Simplified><<<grid_size, kMaxBlockSize, 0, stream>>>( \
-      ld, input, skip, beta, gamma, bias, maybe2half<T>(epsilon), output, skip_input_bias_add_output, skip_broadcasted, skip_size)
-#define CASE_NEXT_SIZE(next_size_value)               \
-  case next_size_value: {                             \
-    if (flag_vec4) {                                  \
-      constexpr int block_size = next_size_value / 4; \
-      LAUNCH_SKIP_LAYER_NORM_KERNEL_SMALL(4);         \
-    } else if (flag_vec2) {                           \
-      constexpr int block_size = next_size_value / 2; \
-      LAUNCH_SKIP_LAYER_NORM_KERNEL_SMALL(2);         \
-    } else {                                          \
-      if (next_size_value <= kMaxBlockSize) {         \
-        constexpr int block_size = next_size_value;   \
-        LAUNCH_SKIP_LAYER_NORM_KERNEL_SMALL(1);       \
-      } else {                                        \
-        LAUNCH_SKIP_LAYER_NORM_KERNEL();              \
-      }                                               \
-    }                                                 \
-  } break
     CASE_NEXT_SIZE(kSizes[0]);
     CASE_NEXT_SIZE(kSizes[1]);
     CASE_NEXT_SIZE(kSizes[2]);
@@ -223,18 +238,27 @@ void LaunchSkipLayerNormKernel(
     CASE_NEXT_SIZE(kSizes[4]);
     CASE_NEXT_SIZE(kSizes[5]);
     CASE_NEXT_SIZE(kSizes[6]);
+    CASE_NEXT_SIZE(kSizes[7]);
+    CASE_NEXT_SIZE(kSizes[8]);
+    CASE_NEXT_SIZE(kSizes[9]);
+    CASE_NEXT_SIZE(kSizes[10]);
+    default: {
+      constexpr int block_size = 256;
+      LAUNCH_SKIP_LAYER_NORM_KERNEL();
+      break;
+    }
+  }
+
 #undef CASE_NEXT_SIZE
 #undef LAUNCH_SKIP_LAYER_NORM_KERNEL
 #undef LAUNCH_SKIP_LAYER_NORM_KERNEL_SMALL
-  }
 }
 
-#define SKIPLAYERNORM_IMPL(T, Simplified)                                                               \
-  template void LaunchSkipLayerNormKernel<T, Simplified>(cudaStream_t stream, T * output,               \
-                                                         T * skip_input_bias_add_output,                \
-                                                         const T* input, const T* skip, const T* gamma, \
-                                                         const T* beta, const T* bias, float epsilon,   \
-                                                         int ld, int row_count, bool skip_broadcasted, int skip_size);
+#define SKIPLAYERNORM_IMPL(T, Simplified)                                                                 \
+  template void LaunchSkipLayerNormKernel<T, Simplified>(cudaStream_t stream, T * output, T * sum_output, \
+                                                         const T* input, const T* skip, const T* bias,    \
+                                                         const T* gamma, const T* beta, float epsilon,    \
+                                                         int ld, int row_count, int skip_size);
 SKIPLAYERNORM_IMPL(float, true);
 SKIPLAYERNORM_IMPL(float, false);
 SKIPLAYERNORM_IMPL(half, true);
diff --git a/onnxruntime/contrib_ops/cuda/bert/skip_layer_norm_impl.h b/onnxruntime/contrib_ops/cuda/bert/skip_layer_norm_impl.h
index ffb5850c827fe..9727dd6236ec8 100644
--- a/onnxruntime/contrib_ops/cuda/bert/skip_layer_norm_impl.h
+++ b/onnxruntime/contrib_ops/cuda/bert/skip_layer_norm_impl.h
@@ -11,18 +11,17 @@ namespace cuda {
 template <typename T, bool Simplified>
 void LaunchSkipLayerNormKernel(
     cudaStream_t stream,
-    T* output,                      // normalized output tensor
-    T* skip_input_bias_add_output,  // sum of the input and skip (and bias if it exists) tensors output
-    const T* input,                 // input tensor
-    const T* skip,                  // skip tensor
-    const T* gamma,                 // Layer normalization gamma tensor
-    const T* beta,                  // Layer normalization beta tensor
-    const T* bias,                  // Layer normalization beta tensor
-    float epsilon,                  // Layer normalization epsilon
-    int hidden_size,                // hidden size, it is the leading dimension (ld)
-    int row_count,                  // number of rows. That is total number of elements divided by hidden size.
-    bool skip_broadcasted,          // determines if broadcasting should be implemented
-    int skip_size);                 // determines size of the skip tensor
+    T* output,        // normalized output tensor
+    T* sum_output,    // sum of the input and skip (and bias if it exists) tensors output
+    const T* input,   // input tensor
+    const T* skip,    // skip tensor
+    const T* bias,    // bias tensor
+    const T* gamma,   // Layer normalization gamma tensor
+    const T* beta,    // Layer normalization beta tensor
+    float epsilon,    // Layer normalization epsilon
+    int hidden_size,  // hidden size, it is the leading dimension (ld)
+    int row_count,    // number of rows. That is total number of elements divided by hidden size.
+    int skip_size);   // number of elements of the skip tensor
 
 }  // namespace cuda
 }  // namespace contrib
diff --git a/onnxruntime/contrib_ops/cuda/collective/distributed_expand.cc b/onnxruntime/contrib_ops/cuda/collective/distributed_expand.cc
new file mode 100644
index 0000000000000..3cfa3ab959343
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/collective/distributed_expand.cc
@@ -0,0 +1,110 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// Distributed computation.
+#include "distributed_expand.h"
+#include "sharding.h"
+#include "sharding_spec.h"
+#include "nccl_kernels.h"
+#include "mpi_include.h"
+
+// ORT system.
+#include "core/providers/cuda/tensor/expand.h"
+
+// std C++.
+#include <iostream>
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+#if defined(ORT_USE_NCCL)
+
+template <typename T>
+DistributedExpand<T>::DistributedExpand(const OpKernelInfo& info) : DistributedKernel(info) {}
+
+template <typename T>
+Status DistributedExpand<T>::ComputeInternal(OpKernelContext* context) const {
+  ORT_ENFORCE(context != nullptr);
+  // Assumptions.
+  //  - Shape is not sharded.
+  // Algorithm.
+  //  - Compute logical output shape.
+  //  - Compute local output shape.
+  //  - Expand from local input to local output.
+
+  auto input_tensor = context->Input<Tensor>(0);
+  auto shape_tensor = context->Input<Tensor>(1);
+  const auto& input_sharding_spec = input_shard_specs_.at(0);
+  const auto& shape_sharding_spec = input_shard_specs_.at(1);
+  const auto& output_sharding_spec = output_shard_specs_.at(0);
+
+  ORT_ENFORCE(shape_sharding_spec.HasNoShard(),
+              "It's not worth to shard Shape tensor. "
+              "If sharding shape is needed, please submit a feature request.");
+  // Compute logical input shape.
+  const auto original_input_shape = ComputeOriginShape(input_tensor->Shape(), input_sharding_spec);
+
+  // Compute logical output shape.
+  // This `shape_tensor` stores the logical output shape.
+  const auto* p_shape = shape_tensor->Data<int64_t>();
+  TensorShapeVector original_output_dims{p_shape, p_shape + shape_tensor->Shape().Size()};
+  TensorShape original_output_shape(original_output_dims);
+  ORT_ENFORCE(
+      onnxruntime::cuda::ComputeOutputShape(
+          Node().Name(),
+          original_input_shape,
+          original_output_dims, original_output_shape)
+          .IsOK());
+
+  // Compute local output shape.
+  const auto local_output_shape = ComputeShardShape(original_output_shape, output_sharding_spec);
+
+  auto output_tensor = context->Output(0, local_output_shape);
+
+  return FuncExpand(
+      this,
+      context,
+      input_tensor,
+      shape_tensor,
+      output_tensor);
+}
+
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    DistributedExpand,
+    kMSDomain,
+    1,
+    int64_t,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .TypeConstraint("T", DataTypeImpl::GetTensorType<int64_t>())
+        .InputMemoryType(OrtMemTypeCPUInput, 1),
+    DistributedExpand<int64_t>);
+
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    DistributedExpand,
+    kMSDomain,
+    1,
+    float,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .TypeConstraint("T", DataTypeImpl::GetTensorType<float>())
+        .InputMemoryType(OrtMemTypeCPUInput, 1),
+    DistributedExpand<float>);
+
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    DistributedExpand,
+    kMSDomain,
+    1,
+    MLFloat16,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .TypeConstraint("T", DataTypeImpl::GetTensorType<MLFloat16>())
+        .InputMemoryType(OrtMemTypeCPUInput, 1),
+    DistributedExpand<MLFloat16>);
+
+#endif
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/collective/distributed_expand.h b/onnxruntime/contrib_ops/cuda/collective/distributed_expand.h
new file mode 100644
index 0000000000000..dedb1bdc5aa36
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/collective/distributed_expand.h
@@ -0,0 +1,35 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "sharding_spec.h"
+#include "sharding.h"
+#include "core/providers/cuda/cuda_kernel.h"
+
+#include <algorithm>
+#include <tuple>
+#include <optional>
+#include <string>
+#include <nccl.h>
+#include <sstream>
+
+#pragma once
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+#if defined(ORT_USE_NCCL)
+
+template <typename T>
+class DistributedExpand final : public DistributedKernel {
+ public:
+  explicit DistributedExpand(const OpKernelInfo& info);
+
+  Status ComputeInternal(OpKernelContext* context) const override;
+};
+
+#endif
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/collective/distributed_matmul.cc b/onnxruntime/contrib_ops/cuda/collective/distributed_matmul.cc
new file mode 100644
index 0000000000000..9008edbf3db30
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/collective/distributed_matmul.cc
@@ -0,0 +1,292 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// Distributed computation.
+#include "sharding.h"
+#include "distributed_matmul.h"
+#include "mpi_include.h"
+
+// ORT system.
+#include "core/providers/cpu/tensor/slice.h"
+#include "core/providers/cuda/tensor/slice.h"
+#include "core/providers/cuda/math/matmul.h"
+#include "core/providers/cuda/tensor/transpose.h"
+#include "core/providers/cuda/cuda_check_memory.h"
+
+// std C++.
+#include <iostream>
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+#if defined(ORT_USE_NCCL)
+
+static TensorShape InferMatmulOutputShape(
+    const TensorShape& shape_A,
+    const TensorShape& shape_B) {
+  // left_shape: [M, K]
+  // right_shape: [K, N]
+  // output_shape: [M, N]
+  ORT_ENFORCE(
+      shape_A.NumDimensions() >= 2 && shape_B.NumDimensions() >= 2,
+      "1-D tensor is not supported by this MatMul.");
+  ORT_ENFORCE(
+      shape_A.NumDimensions() == shape_B.NumDimensions(),
+      "A and B must have the same rank after shape broadcasting.");
+  size_t rank = shape_A.NumDimensions();
+  std::vector<int64_t> shape_Y(rank, 0);
+  for (size_t i = 0; i < rank; ++i) {
+    const int64_t dim_A = shape_A[i];
+    const int64_t dim_B = shape_B[i];
+    if (i == rank - 1) {
+      shape_Y[i] = dim_B;
+    } else if (i == rank - 2) {
+      shape_Y[i] = dim_A;
+    } else if (dim_A == 1 && dim_B >= 1) {
+      // dim_A is 1.
+      // dim_B can be either 1 or other positive integer.
+      // due ot shape broadcast.
+      shape_Y[i] = dim_B;
+    } else if (dim_B == 1 && dim_A >= 1) {
+      // dim_B is 1.
+      // dim_A can be either 1 or other positive integer.
+      // due ot shape broadcast.
+      shape_Y[i] = dim_A;
+    } else {
+      ORT_ENFORCE(dim_A == dim_B, "Broadcasting can only happen when one of dim_A and dim_B is 1.");
+      shape_Y[i] = dim_A;
+    }
+  }
+  return TensorShape(shape_Y);
+};
+
+template <typename T>
+DistributedMatMul<T>::DistributedMatMul(const OpKernelInfo& info) : DistributedKernel(info) {
+}
+
+template <typename T>
+Status DistributedMatMul<T>::ComputeInternal(OpKernelContext* context) const {
+  const auto tensor_shard_A = context->Input<Tensor>(0);
+  const auto tensor_shard_B = context->Input<Tensor>(1);
+  const auto& tensor_shard_shape_A = tensor_shard_A->Shape();
+  const auto& tensor_shard_shape_B = tensor_shard_B->Shape();
+
+  auto rank_A = tensor_shard_shape_A.NumDimensions();
+  auto rank_B = tensor_shard_shape_B.NumDimensions();
+  // TODO(wechi): Fix MatMul(1-D, *) and MatMul(*, 1-D) cases.
+  ORT_ENFORCE(rank_A >= 2 && rank_B >= 2, "Broadcast rule for 1-D tensor is different than other cases.");
+
+  const TensorPartitionSpec& spec_A = input_shard_specs_[0];
+  const TensorPartitionSpec& spec_B = input_shard_specs_[1];
+  const TensorPartitionSpec& spec_Y = output_shard_specs_[0];
+
+  const auto tensor_shape_A = ComputeOriginShape(tensor_shard_shape_A, spec_A);
+  const auto tensor_shape_B = ComputeOriginShape(tensor_shard_shape_B, spec_B);
+
+  TensorShape normalized_shape_A;
+  TensorShape normalized_shape_B;
+  std::tie(normalized_shape_A, normalized_shape_B) = NormalizeShapes(tensor_shape_A, tensor_shape_B);
+
+  TensorPartitionSpec normalized_spec_A;
+  TensorPartitionSpec normalized_spec_B;
+  std::tie(normalized_spec_A, normalized_spec_B) = NormalizeTensorPartitionSpecs(spec_A, spec_B);
+
+  const auto tensor_shape_Y = InferMatmulOutputShape(normalized_shape_A, normalized_shape_B);
+  const auto tensor_shard_shape_Y = ComputeShardShape(tensor_shape_Y, spec_Y);
+
+  // Case 1: A is not sharded, B is sharded.
+  //  1. shard on -1: MatMul(RR, RS) -> RS
+  //  2. shard on -2: MatMul(RR, SR) -> MatMul(RS, SR) + AllReduce -> RR
+  //  3. shard on other axis
+  if (normalized_spec_A.HasNoShard() && normalized_spec_B.HasShard()) {
+    if (normalized_spec_B.OnlyShardAxis(-1)) {
+      // Case 1-1
+      // MatMul(RR, RS) -> RS
+      ORT_ENFORCE(spec_Y.OnlyShardAxis(-1), "Not supported yet.");
+      auto tensor_shard_Y = context->Output(0, tensor_shard_shape_Y);
+      ORT_ENFORCE(onnxruntime::cuda::FuncMatMul<T>(
+                      this, context, tensor_shard_A, tensor_shard_B, 1.0, false, false, false, false, tensor_shard_Y) == Status::OK());
+    } else if (normalized_spec_B.OnlyShardAxis(-2)) {
+      // Case 1-2
+      // MatMul(RR, SR) -> MatMul(RS, SR) + AllReduce -> RR
+      auto tmp_spec_A = CreateTensorShardSpec(spec_A.device_mesh, 0, -1, rank_A);
+      auto tmp_tensor_shard_A = ReshardTensor(this, context, spec_A, tmp_spec_A, nccl_->Rank(), tensor_shard_A);
+
+      auto tensor_shard_Y = context->Output(0, tensor_shard_shape_Y);
+      ORT_ENFORCE(onnxruntime::cuda::FuncMatMul<T>(
+                      this, context, tmp_tensor_shard_A.get(), tensor_shard_B, 1.0, false, false, false, false, tensor_shard_Y) == Status::OK());
+      ORT_ENFORCE(FuncAllReduce(
+                      nccl_->Comm(), Stream(context), tensor_shard_Y, tensor_shard_Y) == Status::OK());
+    } else {
+      // Case 1-3
+      ORT_THROW("Not supported yet.");
+    }
+  }
+
+  // Case 2: A is sharded, B is not sharded.
+  //  1. shard on -1: MatMul(RS, RR) -> MatMul(RS, SR) -> MatMul(RS, SR) + AllReduce -> RR
+  //  2. shard on -2: MatMul(SR, RR) -> SR
+  //  3. shard on other axis: : MatMul(SRR, RRR) -> MatMul(SRR, SRR) -> SRR
+  if (spec_A.HasShard() && spec_B.HasNoShard()) {
+    if (spec_A.OnlyShardAxis(-1) && spec_Y.HasNoShard()) {
+      // Case 2-1
+      // Y is not really sharded in this case.
+      auto tensor_shard_Y = context->Output(0, tensor_shard_shape_Y);
+
+      // TODO: Support cases with multi-dimension device mesh.
+      TensorPartitionSpec new_spec_B = CreateTensorShardSpec(spec_B.device_mesh, 0, -2, rank_B);
+      auto tensor_reshard_B = ShardTensor(this, context, new_spec_B, nccl_->Rank(), tensor_shard_B);
+
+      ORT_ENFORCE(onnxruntime::cuda::FuncMatMul<T>(
+                      this, context, tensor_shard_A, tensor_reshard_B.get(), 1.0, false, false, false, false, tensor_shard_Y) == Status::OK());
+
+      ORT_ENFORCE(FuncAllReduce(
+                      nccl_->Comm(), Stream(context), tensor_shard_Y, tensor_shard_Y) == Status::OK());
+      return Status::OK();
+    } else if (spec_A.OnlyShardAxis(-2) && spec_Y.OnlyShardAxis(-2)) {
+      // Case 2-2
+      auto tensor_shard_Y = context->Output(0, tensor_shard_shape_Y);
+      ORT_ENFORCE(onnxruntime::cuda::FuncMatMul<T>(
+                      this, context, tensor_shard_A, tensor_shard_B, 1.0, false, false, false, false, tensor_shard_Y) == Status::OK());
+      return Status::OK();
+    } else if (spec_A.GetPartitionAxis() < gsl::narrow<int64_t>(tensor_shape_A.NumDimensions()) - 2 && normalized_spec_A.GetPartitionAxis() == spec_Y.GetPartitionAxis()) {
+      // Case 2-3
+      if (normalized_shape_B[normalized_spec_A.GetPartitionAxis()] == 1) {
+        // Case 2-3-1.
+        // B is broadcasted to along sharding axis in A.
+        // E.g., MatMul(A(SRR), B(RR)) where normalized_shape_A = [2, 3, 4] and normalized_shape_B = [1, 4, 3].
+        // No resharding is required.
+        auto tensor_shard_Y = context->Output(0, tensor_shard_shape_Y);
+        ORT_ENFORCE(onnxruntime::cuda::FuncMatMul<T>(
+                        this, context, tensor_shard_A, tensor_shard_B, 1.0, false, false, false, false, tensor_shard_Y) == Status::OK());
+        return Status::OK();
+      } else {
+        // Case 2-3-2.
+        // No broadcasting
+        // Allocate tensor based on shape sharded non-matrix axis.
+        // MatMul(SRR, RRR) -> MatMul(SRR, SRR) -> SRR
+        auto tensor_shard_Y = context->Output(0, tensor_shard_shape_Y);
+        TensorPartitionSpec new_spec_B = CreateTensorShardSpec(
+            spec_B.device_mesh,
+            0,
+            spec_A.GetNegativePartitionAxis(),
+            rank_B);
+        auto tensor_reshard_B = ShardTensor(this, context, new_spec_B, nccl_->Rank(), tensor_shard_B);
+        ORT_ENFORCE(onnxruntime::cuda::FuncMatMul<T>(
+                        this, context, tensor_shard_A, tensor_reshard_B.get(), 1.0, false, false, false, false, tensor_shard_Y) == Status::OK());
+        return Status::OK();
+      }
+    } else {
+      ORT_THROW("Not supported yet.");
+    }
+  }
+
+  // Case 3: A is sharded, B is sharded.
+  //  1. shard on (-1, -1): MatMul(RS, RS) -> MatMul(RS, SR) + AllReduce -> RR
+  //                                       -> MatMul(RR, RS) -> RS
+  //  2. shard on (-1, -2): MatMul(RS, SR) -> MatMul(RS, SR) + AllReduce -> RR
+  //  3. shard on (-2, -1): MatMul(SR, RS) -> MatMul(RS, SR) + AllReduce -> RR
+  //  4. shard on (-2, -2): MatMul(SR, SR) -> MatMul(RS, SR) + AllReduce -> RR
+  //  5. shard on other axes
+  if (spec_A.HasShard() && spec_B.HasShard()) {
+    if (spec_A.OnlyShardAxis(-1) && spec_B.OnlyShardAxis(-1)) {
+      // Case 3-1
+      if (spec_Y.HasNoShard()) {
+        // Case 3-1-1
+        auto tmp_spec_B = CreateTensorShardSpec(spec_B.device_mesh, 0, -2, rank_B);
+        auto tmp_tensor_shard_B = ReshardTensor(this, context, spec_B, tmp_spec_B, nccl_->Rank(), tensor_shard_B);
+        auto tensor_shard_Y = context->Output(0, tensor_shard_shape_Y);
+        ORT_ENFORCE(onnxruntime::cuda::FuncMatMul<T>(
+                        this, context, tensor_shard_A, tmp_tensor_shard_B.get(), 1.0, false, false, false, false, tensor_shard_Y) == Status::OK());
+        ORT_ENFORCE(FuncAllReduce(
+                        nccl_->Comm(), Stream(context), tensor_shard_Y, tensor_shard_Y) == Status::OK());
+      } else if (spec_Y.OnlyShardAxis(-1)) {
+        // Cas 3-1-2
+        auto tmp_spec_A = TensorPartitionSpec::CreateAllReplica(spec_A);
+        auto tmp_tensor_shard_A = ReshardTensor(this, context, spec_A, tmp_spec_A, nccl_->Rank(), tensor_shard_A);
+        auto tensor_shard_Y = context->Output(0, tensor_shard_shape_Y);
+        ORT_ENFORCE(onnxruntime::cuda::FuncMatMul<T>(
+                        this, context, tmp_tensor_shard_A.get(), tensor_shard_B, 1.0, false, false, false, false, tensor_shard_Y) == Status::OK());
+      } else {
+        ORT_THROW("Not supported yet.");
+      }
+    } else if (spec_A.OnlyShardAxis(-1) && spec_B.OnlyShardAxis(-2) && spec_Y.HasNoShard()) {
+      // Case 3-2
+      auto tensor_shard_Y = context->Output(0, tensor_shard_shape_Y);
+
+      auto status = onnxruntime::cuda::FuncMatMul<T>(
+          this, context, tensor_shard_A, tensor_shard_B, 1.0, false, false, false, false, tensor_shard_Y);
+      ORT_ENFORCE(status == Status::OK(), status.ErrorMessage());
+
+      status = FuncAllReduce(
+          nccl_->Comm(), Stream(context), tensor_shard_Y, tensor_shard_Y);
+      ORT_ENFORCE(status == Status::OK(), status.ErrorMessage());
+    } else if (spec_A.OnlyShardAxis(-2) && spec_B.OnlyShardAxis(-1)) {
+      // Case 3-3:
+      // MatMul(SR, RS) -> MatMul(RS, SR) + AllReduce -> RR
+      ORT_ENFORCE(spec_Y.HasNoShard(), "Not supported yet.");
+
+      // A[RS]
+      auto tmp_spec_A = CreateTensorShardSpec(spec_A.device_mesh, 0, -1, rank_A);
+      auto tmp_tensor_shard_A = ReshardTensor(this, context, spec_A, tmp_spec_A, nccl_->Rank(), tensor_shard_A);
+
+      // B[SR]
+      auto tmp_spec_B = CreateTensorShardSpec(spec_B.device_mesh, 0, -2, rank_B);
+      auto tmp_tensor_shard_B = ReshardTensor(this, context, spec_B, tmp_spec_B, nccl_->Rank(), tensor_shard_B);
+
+      // Allocate Y[RR]
+      auto tensor_shard_Y = context->Output(0, tensor_shard_shape_Y);
+
+      // Run local MatMul(A[RS], B[SR])
+      ORT_ENFORCE(onnxruntime::cuda::FuncMatMul<T>(
+                      this, context, tmp_tensor_shard_A.get(), tmp_tensor_shard_B.get(), 1.0, false, false, false, false, tensor_shard_Y) == Status::OK());
+      ORT_ENFORCE(FuncAllReduce(
+                      nccl_->Comm(), Stream(context), tensor_shard_Y, tensor_shard_Y) == Status::OK());
+    } else if (spec_A.OnlyShardAxis(-2) && spec_B.OnlyShardAxis(-2)) {
+      // Case 3-4
+      // MatMul(SR, SR) -> MatMul(RS, SR) + AllReduce -> RR
+      ORT_ENFORCE(spec_Y.HasNoShard(), "Not supported yet.");
+      auto tmp_spec_A = CreateTensorShardSpec(spec_A.device_mesh, 0, -1, rank_A);
+      auto tmp_tensor_shard_A = ReshardTensor(this, context, spec_A, tmp_spec_A, nccl_->Rank(), tensor_shard_A);
+      auto tensor_sard_Y = context->Output(0, tensor_shard_shape_Y);
+      ORT_ENFORCE(onnxruntime::cuda::FuncMatMul<T>(
+                      this, context, tmp_tensor_shard_A.get(), tensor_shard_B, 1.0, false, false, false, false, tensor_sard_Y) == Status::OK());
+    } else {
+      // Case 3-5
+      ORT_THROW("Not supported yet.");
+    }
+  }
+
+  // Case 4: A is not sharded, B is not sharded.
+  //  - Easy!
+  return Status::OK();
+}
+
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    DistributedMatMul,
+    kMSDomain,
+    1,
+    float,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .AllocateInputsContiguously()
+        .TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
+    DistributedMatMul<float>);
+
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    DistributedMatMul,
+    kMSDomain,
+    1,
+    MLFloat16,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .AllocateInputsContiguously()
+        .TypeConstraint("T", DataTypeImpl::GetTensorType<MLFloat16>()),
+    DistributedMatMul<MLFloat16>);
+
+#endif
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/collective/distributed_matmul.h b/onnxruntime/contrib_ops/cuda/collective/distributed_matmul.h
new file mode 100644
index 0000000000000..da07f9a8b2c7b
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/collective/distributed_matmul.h
@@ -0,0 +1,32 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+#include "sharding.h"
+
+#include <algorithm>
+#include <tuple>
+#include <optional>
+#include <string>
+#include <nccl.h>
+#include <sstream>
+
+#pragma once
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+#if defined(ORT_USE_NCCL)
+
+template <typename T>
+class DistributedMatMul final : public DistributedKernel {
+ public:
+  explicit DistributedMatMul(const OpKernelInfo& info);
+
+  Status ComputeInternal(OpKernelContext* context) const override;
+};
+
+#endif
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/collective/distributed_reduce.cc b/onnxruntime/contrib_ops/cuda/collective/distributed_reduce.cc
new file mode 100644
index 0000000000000..967f30a304ac2
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/collective/distributed_reduce.cc
@@ -0,0 +1,175 @@
+
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// Distributed computation.
+#include "distributed_reduce.h"
+#include "sharding.h"
+#include "sharding_spec.h"
+#include "nccl_kernels.h"
+#include "mpi_include.h"
+
+// ORT system.
+#include "core/providers/cuda/cudnn_common.h"
+#include "core/providers/cuda/reduction/reduction_ops.h"
+
+// std C++.
+#include <iostream>
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+#if defined(ORT_USE_NCCL)
+
+template <typename T>
+DistributedReduceBase<T>::DistributedReduceBase(
+    const OpKernelInfo& info,
+    cudnnReduceTensorOp_t cudnn_reduce_op) : DistributedKernel(info) {
+  keepdims_ = info.GetAttrOrDefault<int64_t>("keepdims", 1);
+  cudnn_reduce_op_ = cudnn_reduce_op;
+};
+
+template <typename T>
+Status DistributedReduceBase<T>::ComputeInternal(OpKernelContext* context) const {
+  const auto& input_sharding_spec = input_shard_specs_.at(0);
+  const auto& axes_sharding_spec = input_shard_specs_.at(1);
+  const auto& output_sharding_spec = output_shard_specs_.at(0);
+
+  ORT_ENFORCE(axes_sharding_spec.HasNoShard(),
+              "It's not worthy to shard axes tensor. "
+              "If sharding axes is needed, please submit a feature request.");
+
+  const Tensor* input_tensor = context->Input<Tensor>(0);
+  const Tensor* axes_tensor = context->Input<Tensor>(1);
+  ORT_ENFORCE(axes_tensor->Shape().NumDimensions() == 1, "Axes tensor must be an 1-D tensor.");
+  auto axes_span = axes_tensor->DataAsSpan<int64_t>();
+
+  // Case 1: empty axes means treating this reduction as an identity.
+  if (axes_span.empty()) {
+    ORT_ENFORCE(
+        input_sharding_spec == output_sharding_spec,
+        "Input and output sharding specs should be the same. Otherwise, resharding is needed.");
+    auto* output_tensor = context->Output(0, input_tensor->Shape());
+    CUDA_RETURN_IF_ERROR(cudaMemcpyAsync(output_tensor->MutableData<T>(), input_tensor->Data<T>(), input_tensor->SizeInBytes(),
+                                         cudaMemcpyDeviceToDevice, Stream(context)));
+    return Status::OK();
+  }
+
+  // Case 2: this is a valid reduction. Let's prepare for it.
+
+  bool sharding_on_reduced_axes = false;
+  for (auto axis_it = axes_span.begin(); input_sharding_spec.HasShard() && axis_it != axes_span.end(); ++axis_it) {
+    if (*axis_it == input_sharding_spec.GetPartitionAxis()) {
+      sharding_on_reduced_axes = true;
+      break;
+    }
+  }
+
+  if (sharding_on_reduced_axes) {
+    // Case 2-1: sharding on reduced axes.
+    ORT_THROW(onnxruntime::common::ONNXRUNTIME, onnxruntime::common::FAIL, "Not implemented. Resharding is required to make reduced axes replica.");
+  } else {
+    // Case 2-2: sharding on passing-through axes or no shard.
+    ORT_ENFORCE(
+        input_sharding_spec == output_sharding_spec,
+        "Input and output sharding specs should be the same. Otherwise, resharding is needed.");
+    onnxruntime::cuda::PrepareReduceMetadata metadata;
+    ORT_RETURN_IF_ERROR(
+        onnxruntime::cuda::PrepareForReduce(input_tensor, keepdims_, axes_span, metadata));
+    auto output_tensor = context->Output(0, metadata.squeezed_output_dims);
+
+    // Fast reduction is not deterministic, so sometimes we want to turn it off.
+    const bool enable_fast_but_non_deterministic_reduction = !context->GetUseDeterministicCompute();
+    return onnxruntime::cuda::ReduceComputeCore<T, CUDNN_REDUCE_TENSOR_NO_INDICES>(
+        /* GPU allocator */ Info().GetAllocator(OrtMemType::OrtMemTypeDefault),
+        *input_tensor, metadata, *output_tensor, cudnn_reduce_op_, axes_span,
+        /* calculate_log */ false, /* calculate_sqt */ false, /* log_sum_exp_ */ false,
+        enable_fast_but_non_deterministic_reduction, context->GetComputeStream());
+  }
+  return Status::OK();
+}
+
+template <typename T>
+DistributedReduceSum<T>::DistributedReduceSum(
+    const OpKernelInfo& info) : DistributedReduceBase<T>(info, CUDNN_REDUCE_TENSOR_ADD){};
+
+template <typename T>
+DistributedReduceMean<T>::DistributedReduceMean(
+    const OpKernelInfo& info) : DistributedReduceBase<T>(info, CUDNN_REDUCE_TENSOR_AVG){};
+
+template <typename T>
+DistributedReduceMax<T>::DistributedReduceMax(
+    const OpKernelInfo& info) : DistributedReduceBase<T>(info, CUDNN_REDUCE_TENSOR_MAX){};
+
+// ReduceSum
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    DistributedReduceSum,
+    kMSDomain,
+    1,
+    float,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .TypeConstraint("T", DataTypeImpl::GetTensorType<float>())
+        .InputMemoryType(OrtMemTypeCPUInput, 1),
+    DistributedReduceSum<float>);
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    DistributedReduceSum,
+    kMSDomain,
+    1,
+    MLFloat16,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .TypeConstraint("T", DataTypeImpl::GetTensorType<MLFloat16>())
+        .InputMemoryType(OrtMemTypeCPUInput, 1),
+    DistributedReduceSum<MLFloat16>);
+
+// ReduceMean
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    DistributedReduceMean,
+    kMSDomain,
+    1,
+    float,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .TypeConstraint("T", DataTypeImpl::GetTensorType<float>())
+        .InputMemoryType(OrtMemTypeCPUInput, 1),
+    DistributedReduceMean<float>);
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    DistributedReduceMean,
+    kMSDomain,
+    1,
+    MLFloat16,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .TypeConstraint("T", DataTypeImpl::GetTensorType<MLFloat16>())
+        .InputMemoryType(OrtMemTypeCPUInput, 1),
+    DistributedReduceMean<MLFloat16>);
+
+// ReduceMax
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    DistributedReduceMax,
+    kMSDomain,
+    1,
+    float,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .TypeConstraint("T", DataTypeImpl::GetTensorType<float>())
+        .InputMemoryType(OrtMemTypeCPUInput, 1),
+    DistributedReduceMax<float>);
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    DistributedReduceMax,
+    kMSDomain,
+    1,
+    MLFloat16,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .TypeConstraint("T", DataTypeImpl::GetTensorType<MLFloat16>())
+        .InputMemoryType(OrtMemTypeCPUInput, 1),
+    DistributedReduceMax<MLFloat16>);
+
+#endif
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/collective/distributed_reduce.h b/onnxruntime/contrib_ops/cuda/collective/distributed_reduce.h
new file mode 100644
index 0000000000000..2939852c75c60
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/collective/distributed_reduce.h
@@ -0,0 +1,59 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "sharding_spec.h"
+#include "sharding.h"
+#include "core/providers/cuda/cuda_kernel.h"
+
+#include <algorithm>
+#include <tuple>
+#include <optional>
+#include <string>
+#include <nccl.h>
+#include <sstream>
+
+#pragma once
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+#if defined(ORT_USE_NCCL)
+
+template <typename T>
+class DistributedReduceBase : public DistributedKernel {
+ public:
+  explicit DistributedReduceBase(const OpKernelInfo& info, cudnnReduceTensorOp_t cudnn_reduce_op);
+
+  Status ComputeInternal(OpKernelContext* context) const override;
+
+ private:
+  // ONNX attribute. If true, reduced axes are retained as dimensions with size one.
+  // Otherwise, drop reduced axes.
+  bool keepdims_;
+  cudnnReduceTensorOp_t cudnn_reduce_op_;
+};
+
+template <typename T>
+class DistributedReduceSum final : public DistributedReduceBase<T> {
+ public:
+  explicit DistributedReduceSum(const OpKernelInfo& info);
+};
+
+template <typename T>
+class DistributedReduceMean final : public DistributedReduceBase<T> {
+ public:
+  explicit DistributedReduceMean(const OpKernelInfo& info);
+};
+
+template <typename T>
+class DistributedReduceMax final : public DistributedReduceBase<T> {
+ public:
+  explicit DistributedReduceMax(const OpKernelInfo& info);
+};
+
+#endif
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/collective/distributed_reshape.cc b/onnxruntime/contrib_ops/cuda/collective/distributed_reshape.cc
new file mode 100644
index 0000000000000..e413ccf580870
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/collective/distributed_reshape.cc
@@ -0,0 +1,861 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// Distributed computation.
+#include "distributed_reshape.h"
+#include "sharding.h"
+#include "sharding_spec.h"
+#include "nccl_kernels.h"
+#include "mpi_include.h"
+
+// ORT system.
+#include "core/providers/cuda/tensor/transpose.h"
+#include "core/providers/cuda/cuda_check_memory.h"
+
+// std C++.
+#include <iostream>
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+#if defined(ORT_USE_NCCL)
+
+// Return true if src_shape[src_begin:src_end] is the same as
+// dst_shape[dst_begin:dst_end]. Otherwise, return false.
+// TODO: replace std::vector with gsl::span.
+bool CompareSubVectors(
+    const std::vector<int64_t>& src_shape,
+    const std::vector<int64_t>& dst_shape,
+    size_t src_begin, size_t src_end,
+    size_t dst_begin, size_t dst_end) {
+  if (src_end - src_begin != dst_end - dst_begin) {
+    // Sub-vectors have different lengths.
+    return false;
+  }
+  for (size_t src_index = src_begin, dst_index = dst_begin;
+       src_index < src_end && dst_index < dst_end;
+       ++src_index, ++dst_index) {
+    if (src_shape[src_index] != dst_shape[dst_index]) {
+      // Sub-vectors have different elements.
+      return false;
+    }
+  }
+  // Sub-vectors have same length and same elements.
+  return true;
+}
+
+// TODO: replace std::vector with gsl::span.
+std::tuple<bool, size_t, size_t, size_t> IsTwoAxisFusion(
+    const std::vector<int64_t>& src_shape,
+    const std::vector<int64_t>& dst_shape) {
+  // Return values:
+  // - bool: whether two consecutive axes are fused.
+  // - size_t: the axis in destination shape formed by fusing two source axes.
+  // - size_t: the first axis fused.
+  // - size_t: the length of fusion. In two-axis fusion considered by this
+  //   function, the length of fusion is always 2.
+  const size_t src_rank = src_shape.size();
+  const size_t dst_rank = dst_shape.size();
+  if (src_rank < 2 || dst_rank < 1) {
+    return std::make_tuple(false, -1, -1, -1);
+  }
+  if (src_rank - 1 != dst_rank) {
+    return std::make_tuple(false, -1, -1, -1);
+  }
+  for (size_t i_src = 0; i_src < src_rank; ++i_src) {
+    if (i_src + 1 > src_rank - 1) {
+      // We are at src_shape[i] and we need
+      // src_shape[i + 1] to fuse.
+      // If we are at the last axis, we cannot fuse.
+      break;
+    }
+    const int64_t prod = src_shape[i_src] * src_shape[i_src + 1];
+
+    for (size_t i_dst = 0; i_dst < dst_rank; ++i_dst) {
+      // Check if shape[i_src:i_src+2] (i.e., shape[i_src] and shape[i_src+1])
+      // for source tensor are fused into shape[i_dst] for destination tensor.
+      if (prod != dst_shape[i_dst]) {
+        continue;
+      }
+      // Check if corresponding dimensions before fusion area
+      // are the same.
+      const bool prefix_shape_match = CompareSubVectors(
+          src_shape,
+          dst_shape,
+          // Represent src_shape[0:i_src].
+          0, i_src,
+          // Represent dst_shape[0:i_dst].
+          0, i_dst);
+      const bool suffix_shape_match = CompareSubVectors(
+          src_shape,
+          dst_shape,
+          // Represent src_shape[i_src+2:].
+          i_src + 2, src_rank,
+          // Represent dst_shape[i_dst+1:].
+          i_dst + 1, dst_rank);
+      if (prefix_shape_match && suffix_shape_match) {
+        return std::make_tuple(
+            true, i_dst, i_src, 2);
+      }
+    }
+  }
+  return std::make_tuple(false, 0, 0, 0);
+}
+
+std::tuple<bool, size_t, size_t, size_t> IsTwoAxisDecomposition(
+    const std::vector<int64_t>& src_shape,
+    const std::vector<int64_t>& dst_shape) {
+  // Return values:
+  // - bool: whether one source axis is decomposed into two consecutive destination axes.
+  // - size_t: the axis in source shape decomposed into two consecutive destination axes.
+  // - size_t: the first axis the source axis decomposed into.
+  // - size_t: the number of decomposed axes. It's always 2 in this function.
+  return IsTwoAxisFusion(dst_shape, src_shape);
+}
+
+std::vector<int64_t> RepeatVector(const std::vector<int64_t>& vec, int64_t repeat) {
+  std::vector<int64_t> new_vec;
+  for (int64_t i = 0; i < repeat; ++i) {
+    new_vec.insert(new_vec.end(), vec.begin(), vec.end());
+  }
+  return new_vec;
+}
+
+DeviceMesh CreateInterleaveDeviceMesh(
+    const DeviceMesh& source_mesh, const int64_t repeat) {
+  // Given a 1-D device mesh [0, 1] and repeat=2,
+  // return 1-D device mesh [0, 1, 0, 1].
+  if (source_mesh.device_mesh_shape.size() != 1) {
+    throw std::runtime_error("Source mesh shape 1-D.");
+  }
+
+  // Mesh to return.
+  DeviceMesh new_mesh;
+
+  std::vector<int64_t>& elements = new_mesh.device_mesh_elements;
+  for (int64_t i = 0; i < repeat; ++i) {
+    elements.insert(
+        elements.end(),
+        source_mesh.device_mesh_elements.begin(),
+        source_mesh.device_mesh_elements.end());
+  }
+
+  // source mesh must be 1-D so we only care its 1st dimension.
+  new_mesh.device_mesh_shape.push_back(source_mesh.device_mesh_shape[0] * repeat);
+
+  return new_mesh;
+}
+
+std::tuple<bool, TensorPartitionSpec> ComputeNativeSpecForTwoAxisFusion(
+    const TensorPartitionSpec& src_spec,
+    const std::vector<int64_t>& src_shape,
+    const std::vector<int64_t>& dst_shape,
+    const int64_t fused_axis_in_src,
+    const int64_t fusion_axis_in_dst) {
+  // TODO(wechi): use device mesh stride to support non-1 stride.
+  // Example: S[0]R, shape=[2, 3], device_mesh=[0, 1] -> S[0], shape = [6], device_mesh=[0, 1]
+  // Example: RS[0], shape=[2, 3], device_mesh=[0, 1] -> S[0], shape = [6], device_mesh=[0, 1, 0, 1]
+  // Example: S[0]RR, shape=[2, 3, 5], device_mesh=[0, 1] -> S[0]R, shape = [2, 15], device_mesh=[0, 1]
+  ORT_ENFORCE(src_spec.CountShardingAxes() == 1, "Tensor to be reshaped has too many sharding axes.");
+  ORT_ENFORCE(src_spec.device_mesh.device_mesh_shape.size() == 1, "Source device mesh be 1-D.");
+
+  if (src_spec.HasNoShard()) {
+    return std::make_tuple(true, TensorPartitionSpec::CreateAllReplica(dst_shape.size(), src_spec.device_mesh));
+  } else if (src_spec.HasShard() && src_spec.OnlyShardAxis(fused_axis_in_src)) {
+    // Example: S[0]R, shape=[2, 3], device_mesh=[0, 1] -> S[0], shape = [6], device_mesh=[0, 1]
+    // Example 1:
+    //  - logical input shape: [2, 8]
+    //  - logical output shape: [16]
+    //  - input sharding spec: S[0]R, device_mesh=[0, 1]
+    // 1. Device allocation of the original input tensor:
+    //  - Logical tensor.
+    //    [[0, 0, 0, 0, 0, 0, 0, 0], (device assignment)
+    //     [1, 1, 1, 1, 1, 1, 1, 1]]
+    //    [[ 0,  1,  2,  3,  4,  5,  6,  7], (values)
+    //     [ 8,  9, 10, 11, 12, 13, 14, 15]]
+    //  - Device 0's local tensor (shape: [2, 4]).
+    //    [[ 0,  1,  2,  3,  4,  5,  6,  7]]
+    //  - Device 1's local tensor (shape: [2, 4]).
+    //    [[ 8,  9, 10, 11, 12, 13, 14, 15]]
+    // 2. Deduce local output shape:
+    //  - In the logical Reshape, the 1st and 2nd logical axes are fused,
+    //    so are the corresponding local axes.
+    //  - Local output shape: [8] by fusing both axes in shape [2, 4].
+    // 3. Run local reshape (reshape from shape [2, 4] to shape [8]):
+    //  - Device 0's local output tensor.
+    //    [ 0,  1,  2,  3,  4,  5,  6,  7]
+    //  - Device 1's local output tensor.
+    //    [ 8,  9, 10, 11, 12, 13, 14, 15]
+    // 4. Determine native output sharding spec from local output tensors.
+    //  - Logical output tensor:
+    //    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+    //  - Device assignment by comparing local tensors and logical output tensor:
+    //    [0, 0, 0, 0, 0, 0, 0, 0, 1, 1,  1,  1,  1,  1,  1,  1]
+    //  - S[0] with device_mesh = [0, 1] = input device mesh.
+    // 5. Native output sharding spec:
+    //  - S[0] with device_mesh [0, 1]
+    //
+    // Example 2:
+    //  - logical input shape: [8, 2]
+    //  - logical output shape: [16]
+    //  - input sharding spec: S[0]R, device_mesh=[0, 1]
+    // 1. Device allocation of the original input tensor:
+    //  - Logical tensor.
+    //    [[0, 0], (device assignment)
+    //     [0, 0],
+    //     [0, 0],
+    //     [0, 0],
+    //     [1, 1],
+    //     [1, 1],
+    //     [1, 1],
+    //     [1, 1]]
+    //    [[ 0,  1], (values)
+    //     [ 2,  3],
+    //     [ 4,  5],
+    //     [ 6,  7],
+    //     [ 8,  9],
+    //     [10, 11],
+    //     [12, 13],
+    //     [14, 15]]
+    //  - Device 0's local tensor (shape: [4, 2]).
+    //    [[ 0,  1],
+    //     [ 2,  3],
+    //     [ 4,  5],
+    //     [ 6,  7]]
+    //  - Device 1's local tensor (shape: [4, 2]).
+    //    [[ 8,  9],
+    //     [10, 11],
+    //     [12, 13],
+    //     [14, 15]]
+    // 2. Deduce local output shape:
+    //  - In the logical Reshape, the 1st and 2nd logical axes are fused,
+    //    so are the corresponding local axes.
+    //  - Local output shape: [8] by fusing both axes in shape [4, 2].
+    // 3. Run local reshape (reshape from shape [4, 2] to shape [8]):
+    //  - Device 0's local output tensor.
+    //    [ 0,  1,  2,  3,  4,  5,  6,  7]
+    //  - Device 1's local output tensor.
+    //    [ 8,  9, 10, 11, 12, 13, 14, 15]
+    // 4. Determine native output sharding spec from local output tensors.
+    //  - Logical output tensor:
+    //    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+    //  - Device assignment by comparing local tensors and logical output tensor:
+    //    [0, 0, 0, 0, 0, 0, 0, 0, 1, 1,  1,  1,  1,  1,  1,  1]
+    //  - S[0] with device_mesh = [0, 1] = input device mesh.
+    // 5. Native output sharding spec:
+    //  - S[0] with device_mesh [0, 1]
+    //
+    // Example 3:
+    //  - logical input shape: [8, 2]
+    //  - logical output shape: [16]
+    //  - input sharding spec: S[0]R, device_mesh=[0, 1, 0, 1]
+    // 1. Device allocation of the original input tensor:
+    //  - Logical tensor.
+    //    [[0, 0], (device assignment)
+    //     [0, 0],
+    //     [1, 1],
+    //     [1, 1],
+    //     [0, 0],
+    //     [0, 0],
+    //     [1, 1],
+    //     [1, 1]]
+    //    [[ 0,  1], (values)
+    //     [ 2,  3],
+    //     [ 4,  5],
+    //     [ 6,  7],
+    //     [ 8,  9],
+    //     [10, 11],
+    //     [12, 13],
+    //     [14, 15]]
+    //  - Device 0's local tensor (shape: [4, 2]).
+    //    [[ 0,  1],
+    //     [ 2,  3],
+    //     [ 8,  9],
+    //     [10, 11]]
+    //  - Device 1's local tensor (shape: [4, 2]).
+    //    [[ 4,  5],
+    //     [ 6,  7],
+    //     [12, 13],
+    //     [14, 15]]
+    // 2. Deduce local output shape:
+    //  - In the logical Reshape, the 1st and 2nd logical axes are fused,
+    //    so are the corresponding local axes.
+    //  - Local output shape: [8] by fusing both axes in shape [4, 2].
+    // 3. Run local reshape (reshape from shape [4, 2] to shape [8]):
+    //  - Device 0's local output tensor.
+    //    [ 0,  1,  2,  3,  8,  9, 10, 11]
+    //  - Device 1's local output tensor.
+    //    [ 4,  5,  6,  7, 12, 13, 14, 15]
+    // 4. Determine native output sharding spec from local output tensors.
+    //  - Logical output tensor:
+    //    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+    //  - Device assignment by comparing local tensors and logical output tensor:
+    //    [0, 0, 0, 0, 1, 1, 1, 1, 0, 0,  0,  0,  1,  1,  1,  1]
+    //  - S[0] with device_mesh = [0, 1] = input device mesh.
+    // 5. Native output sharding spec:
+    //  - S[0] with device_mesh [0, 1, 0, 1]
+
+    // Reuse original device mesh but shard the fusion axis in output tensor.
+    auto dst_spec = TensorPartitionSpec::CreateOneTensorAxisOneDeviceMeshAxisSharding(
+        dst_shape.size(), src_spec.device_mesh, fusion_axis_in_dst, /* 1-D mesh */ 0);
+    return std::make_tuple(true, dst_spec);
+  } else if (src_spec.HasShard() && src_spec.OnlyShardAxis(fused_axis_in_src + 1)) {
+    // Example 1 of determining native output sharding spec:
+    //  - logical input shape: [3, 4]
+    //  - logical output shape: [12]
+    //  - input sharding spec: RS[0], device_mesh=[0, 1, 0, 1]
+    // 1. Device allocation of the original input tensor:
+    //  - Logical tensor.
+    //    [[0, 1, 0, 1], (device assignment)
+    //     [0, 1, 0, 1],
+    //     [0, 1, 0, 1]]
+    //    [[0, 1, 2, 3], (values)
+    //     [4, 5, 6, 7],
+    //     [8, 9, 10, 11]],
+    //  - Device 0's local tensor.
+    //    [[0, 0],
+    //     [0, 0],
+    //     [0, 0]]
+    //    [[0, 2],
+    //     [4, 6],
+    //     [8, 10]],
+    //  - Device 1's local tensor.
+    //    [[1, 1],
+    //     [1, 1],
+    //     [1, 1]]
+    //    [[1, 3],
+    //     [5, 7],
+    //     [9, 11]],
+    // 2. Deduce local output shape:
+    //  - In the logical Reshape, the 1st and 2nd logical axes are fused,
+    //    so are the corresponding local axes.
+    //  - Local output shape: [6] by fusing both axes in shape [3, 2].
+    // 3. Run local reshape (reshape from [3, 2] to [6]):
+    //  - Device 0's local output tensor.
+    //    [0, 0, 0, 0, 0, 0]
+    //    [0, 2, 4, 6, 8, 10]
+    //  - Device 1's local output tensor.
+    //    [1, 1, 1, 1, 1, 1]
+    //    [1, 3, 5, 7, 9, 11]
+    // 4. Determine native output sharding spec by comparing local output tensors and logical tensor.
+    //  - Logical output tensor:
+    //    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
+    //  - S[0] with device_mesh = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1] = [0, 1, 0, 1] * (first fused dimension).
+    // 5. Native output sharding spec:
+    //  - S[0] with device_mesh = [0, 1, 0, 1] * (first fused dimension) = [0, 1, 0, 1] * 3
+    //
+    // Example 2 of determining native output sharding spec:
+    //  - logical input shape: [3, 8]
+    //  - logical output shape: [24]
+    //  - input sharding spec: RS[0], device_mesh=[0, 1, 0, 1]
+    // 1. Device allocation of the original input tensor:
+    //  - Logical tensor.
+    //    [[0, 0, 1, 1, 0, 0, 1, 1], (device assignment)
+    //     [0, 0, 1, 1, 0, 0, 1, 1],
+    //     [0, 0, 1, 1, 0, 0, 1, 1]]
+    //    [[ 0,  1,  2,  3,  4,  5,  6,  7], (values)
+    //     [ 8,  9, 10, 11, 12, 13, 14, 15],
+    //     [16, 17, 18, 19, 20, 21, 22, 23]]
+    //  - Device 0's local tensor (shape: [3, 4]).
+    //    [[0, 0, 0, 0],
+    //     [0, 0, 0, 0],
+    //     [0, 0, 0, 0]]
+    //    [[ 0,  1,  4,  5],
+    //     [ 8,  9, 12, 13],
+    //     [16, 17, 20, 21]]
+    //  - Device 1's local tensor (shape: [3, 4]).
+    //    [[1, 1, 1, 1],
+    //     [1, 1, 1, 1],
+    //     [1, 1, 1, 1]]
+    //    [[ 2,  3,  6,  7],
+    //     [10, 11, 14, 15],
+    //     [18, 19, 22, 23]]
+    // 2. Deduce local output shape:
+    //  - In the logical Reshape, the 1st and 2nd logical axes are fused,
+    //    so are the corresponding local axes.
+    //  - Local output shape: [12] by fusing both axes in shape [3, 4].
+    // 3. Run local reshape (reshape from [3, 4] to [12]):
+    //  - Device 0's local output tensor .
+    //    [0, 1, 4, 5,  8,  9, 12, 13, 16, 17, 20, 21]
+    //  - Device 1's local output tensor .
+    //    [2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23]
+    // 4. Determine native output sharding spec from local output tensors.
+    //  - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
+    //  - [0, 0, 1, 1, 0, 0, 1, 1, 0, 0,  1,  1,  0,  0,  1,  1,  0,  0,  1,  1,  0,  0,  1,  1]
+    //  - S[0] with device_mesh = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1] = .
+    // 5. Native output sharding spec:
+    //  - S[0] with device_mesh = [0, 1, 0, 1] * (first fused dimension) = [0, 1, 0, 1] * 3
+    //
+    // Example 3:
+    //  - logical input shape: [2, 8]
+    //  - logical output shape: [16]
+    //  - input sharding spec: RS[0], device_mesh=[0, 1, 0, 1]
+    // 1. Device allocation of the original input tensor:
+    //  - Logical tensor.
+    //    [[0, 0, 1, 1, 0, 0, 1, 1], (device assignment)
+    //     [0, 0, 1, 1, 0, 0, 1, 1]]
+    //    [[ 0,  1,  2,  3,  4,  5,  6,  7], (values)
+    //     [ 8,  9, 10, 11, 12, 13, 14, 15]]
+    //  - Device 0's local tensor (shape: [2, 4]).
+    //    [[0, 0, 0, 0],
+    //     [0, 0, 0, 0]]
+    //    [[ 0,  1,  4,  5],
+    //     [ 8,  9, 12, 13]]
+    //  - Device 1's local tensor (shape: [2, 4]).
+    //    [[1, 1, 1, 1],
+    //     [1, 1, 1, 1]]
+    //    [[ 2,  3,  6,  7],
+    //     [10, 11, 14, 15]]
+    // 2. Deduce local output shape:
+    //  - In the logical Reshape, the 1st and 2nd logical axes are fused,
+    //    so are the corresponding local axes.
+    //  - Local output shape: [8] by fusing both axes in shape [2, 4].
+    // 3. Run local reshape (reshape from [2, 4] to [8]):
+    //  - Device 0's local output tensor .
+    //    [ 0,  1,  4,  5,  8,  9, 12, 13]
+    //  - Device 1's local output tensor .
+    //    [ 2,  3,  6,  7, 10, 11, 14, 15]
+    // 4. Determine native output sharding spec from local output tensors.
+    //  - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+    //  - [0, 0, 1, 1, 0, 0, 1, 1, 0, 0,  1,  1,  0,  0,  1,  1]
+    //  - S[0] with device_mesh = [0, 1, 0, 1, 0, 1, 0, 1] = [0, 1, 0, 1] * (first fused dimension).
+    // 5. Native output sharding spec:
+    //  - S[0] with device_mesh = [0, 1, 0, 1] * (first fused dimension) = [0, 1, 0, 1] * 2
+    //
+    // Example 4:
+    //  - logical input shape: [2, 8]
+    //  - logical output shape: [16]
+    //  - input sharding spec: RS[0], device_mesh=[0, 1]
+    // 1. Device allocation of the original input tensor:
+    //  - Logical tensor.
+    //    [[0, 0, 0, 0, 1, 1, 1, 1], (device assignment)
+    //     [0, 0, 0, 0, 1, 1, 1, 1]]
+    //    [[ 0,  1,  2,  3,  4,  5,  6,  7], (values)
+    //     [ 8,  9, 10, 11, 12, 13, 14, 15]]
+    //  - Device 0's local tensor (shape: [2, 4]).
+    //    [[0, 0, 0, 0],
+    //     [0, 0, 0, 0]]
+    //    [[ 0,  1,  2,  3],
+    //     [ 8,  9, 10, 11]]
+    //  - Device 1's local tensor (shape: [2, 4]).
+    //    [[1, 1, 1, 1],
+    //     [1, 1, 1, 1]]
+    //    [[ 4,  5,  6,  7],
+    //     [12, 13, 14, 15]]
+    // 2. Deduce local output shape:
+    //  - In the logical Reshape, the 1st and 2nd logical axes are fused,
+    //    so are the corresponding local axes.
+    //  - Local output shape: [8] by fusing both axes in shape [2, 4].
+    // 3. Run local reshape (reshape from [2, 4] to [8]):
+    //  - Device 0's local output tensor .
+    //    [ 0,  1,  2,  3,  8,  9, 10, 11]
+    //  - Device 1's local output tensor .
+    //    [ 4,  5,  6,  7, 12, 13, 14, 15]
+    // 4. Determine native output sharding spec from local output tensors.
+    //  - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+    //  - [0, 0, 0, 0, 1, 1, 1, 1, 0, 0,  0,  0,  1,  1,  1,  1]
+    //  - S[0] with device_mesh = [0, 1, 0, 1] = [0, 1] * (first fused dimension).
+    // 5. Native output sharding spec:
+    //  - S[0] with device_mesh = [0, 1] * (first fused dimension) = [0, 1] * 2 = [0, 1, 0, 1]
+
+    // The output device mesh is the repeats of the original device.
+    // Let's use Python syntax. If the original device mesh is [0, 1, 0, 1], and
+    // the first fused dimension is 3, then the output device mesh is [0, 1, 0, 1] * 3.
+    auto dst_device_mesh = DeviceMesh::Create1D(
+        src_spec.device_mesh.device_mesh_elements,
+        src_shape[fused_axis_in_src]);
+    // Sharding happens in the fusion axis with the new device mesh.
+    auto dst_spec = TensorPartitionSpec::CreateOneTensorAxisOneDeviceMeshAxisSharding(
+        dst_shape.size(), dst_device_mesh, fusion_axis_in_dst, /* 1-D mesh */ 0);
+    return std::make_tuple(true, dst_spec);
+  } else if (src_spec.HasShard() && (src_spec.GetPartitionAxis() < fused_axis_in_src || src_spec.GetPartitionAxis() > fused_axis_in_src + 1)) {
+    // It's two-axis fusion but the fused axes is not sharded.
+    // Example: S[0]RR, shape=[2, 3, 5], device_mesh=[0, 1] -> S[0]R, shape = [2, 15], device_mesh=[0, 1]
+    auto dst_spec = TensorPartitionSpec::CreateByDropAxes(
+        src_spec, {fused_axis_in_src + 1});
+    return std::make_tuple(true, dst_spec);
+  } else {
+    return std::make_tuple(false, TensorPartitionSpec());
+  }
+}
+
+// Arguments:
+//  - device_elements: a vector of device IDs.
+//    It should only contain unique device IDs or
+//    repeats of a list of unique device IDs. Otherwise,
+//    (0, 0) is returned.
+// Returns:
+//  - count per device ID (all device IDs should have the same count)
+//  - number of unique device IDs
+// Examples:
+//  - [0, 1] -> (2, 1)
+//  - [0, 1, 2, 0, 1, 2] -> (2, 3)
+std::tuple<int64_t, int64_t> ComputeRepeatAndRepeatStride(
+    const std::vector<int64_t>& device_elements) {
+  int64_t first_device_id = device_elements.at(0);
+  int64_t first_device_id_count = 0;
+  for (size_t i = 0; i < device_elements.size(); ++i) {
+    if (device_elements.at(i) == first_device_id) {
+      ++first_device_id_count;
+    }
+  }
+  size_t repeat_stride = device_elements.size() / first_device_id_count;
+
+  // Check if the device mesh pattern is supported.
+  // Supported examples: [0, 1, 2] and [0, 1, 0, 1, 0, 1].
+  // Unsupported examples: [0, 1, 2, 1, 2, 0] and [0, 1, 2, 0].
+  for (size_t repeat = 0; repeat < first_device_id_count; ++repeat) {
+    for (size_t device_id = 0; device_id < repeat_stride; ++device_id) {
+      ORT_ENFORCE(
+          device_elements.at(repeat * repeat_stride + device_id) == device_elements.at(device_id),
+          "Unsupported device mesh pattern.");
+    }
+  }
+
+  // If device_mesh=[0, 1, 2, 0, 1, 2], returns (2, 3), which means
+  //  - each device repeats twice for "2" in (2, 3).
+  //  - there are 3 unique devices for "3" in (2, 3).
+  return std::make_tuple(first_device_id_count, repeat_stride);
+}
+
+std::tuple<bool, TensorPartitionSpec> ComputeNativeSpecForTwoAxisDecomposition(
+    const TensorPartitionSpec& src_spec,
+    const std::vector<int64_t>& src_shape,
+    const std::vector<int64_t>& dst_shape,
+    const int64_t decomposed_axis_in_src,
+    const int64_t decomposition_axis_in_dst) {
+  // TODO(wechi): use device mesh stride to support non-1 stride.
+  // Example: S[0], shape=[8], device_mesh=[0, 1] -> S[0]R
+  // Example: S[0], shape=[8], device_mesh=[0, 1] -> RS[0]
+  // Example: S[0], shape=[8], device_mesh=[0, 1, 0, 1] -> S[0]R
+  // Example: S[0], shape=[8], device_mesh=[0, 1, 0, 1] -> RS[0]
+  // Example: RS[0]R, shape=[8], device_mesh=[0, 1] -> RS[0]RR
+  // Example: RS[0]R, shape=[8], device_mesh=[0, 1] -> RRS[0]R
+  if (src_spec.CountShardingAxes() != 1) {
+    throw std::runtime_error("Too many sharding axes.");
+  }
+  if (src_spec.device_mesh.device_mesh_shape.size() != 1) {
+    throw std::runtime_error("Source device mesh be 1-D.");
+  }
+
+  if (src_spec.HasNoShard()) {
+    return std::make_tuple(true, TensorPartitionSpec::CreateAllReplica(dst_shape.size(), src_spec.device_mesh));
+  } else if (src_spec.OnlyShardAxis(decomposed_axis_in_src)) {
+    const int64_t device_stride = src_shape[decomposed_axis_in_src] / src_spec.device_mesh.device_mesh_shape[0];
+    if (device_stride >= dst_shape[decomposition_axis_in_dst + 1] && device_stride % dst_shape[decomposition_axis_in_dst + 1] == 0) {
+      // Since 2nd decomposition dimension is a factor of device stride,
+      // Sharding happens at 1st decomposition axis in dst.
+      // device_stride = 10
+      // S[0], shape=[20], device=[0, 1] -> S[0]R, shape=[2, 10], device=[0, 1]
+      //
+      // device_stride = 8
+      // S[0], shape=[16], device=[0, 1] -> RS[0], shape=[1, 16], device=[0, 1]
+      //
+      // device_stride = 8
+      // S[0], shape=[16], device=[0, 1] -> S[0]R, shape=[4, 4], device=[0, 1]
+      std::vector<AxisPartitionSpec> dst_axis_specs;
+      for (size_t src_axis = 0; src_axis < src_shape.size(); ++src_axis) {
+        if (src_axis != decomposed_axis_in_src) {
+          // Sharding spec is copied if the axis is not decomposed.
+          // E.g, shape [5, 6] -> Reshape -> shape [5, 3, 2]
+          // The spec for "5" is copied.
+          dst_axis_specs.push_back(AxisPartitionSpec::CreateCopy(src_spec.GetAxisSpec(src_axis)));
+        } else if (dst_shape[decomposition_axis_in_dst] == 1) {
+          // S[0] -> RS[0]
+          // E.g., shape [5] -> Reshape -> shape [1, 5]
+          // The spec for "5" is copied and "1" is replica.
+          // This reshape only adds a dummy new axis without affecting
+          // the underlying sharding status.
+          dst_axis_specs.push_back(AxisPartitionSpec::CreateReplica());
+          dst_axis_specs.push_back(AxisPartitionSpec::CreateShard(0));
+        } else {
+          // S[0] -> S[0]R
+          // E.g., shape [5] -> Reshape -> shape [5, 1]
+          dst_axis_specs.push_back(AxisPartitionSpec::CreateShard(0));
+          dst_axis_specs.push_back(AxisPartitionSpec::CreateReplica());
+        }
+      }
+      // Now, we know sharding happens at decomposed_axis_in_src axis in destination tensor.
+      // - effective_device_stride along decomposed_axis_in_src: device_stride / dst_shape[decomposed_axis_in_src + 1]
+      // - The original device patterns repeats: dst_shape[decomposed_axis_in_src] / effective_device_stride times.
+      const int64_t effective_device_stride = device_stride / dst_shape[decomposed_axis_in_src + 1];
+      // How many times a device ID changes along decomposed_axis_in_src axis in destination tensor.
+      const int64_t number_of_device_changes = dst_shape[decomposed_axis_in_src] / effective_device_stride;
+      if ((size_t)number_of_device_changes != src_spec.device_mesh.device_mesh_elements.size()) {
+        throw std::runtime_error("Not supported. Resharding is required.");
+      }
+      auto dst_device_mesh = CreateInterleaveDeviceMesh(
+          src_spec.device_mesh, 1);
+      return std::make_tuple(true, TensorPartitionSpec::Create(dst_axis_specs, dst_device_mesh));
+    } else if (dst_shape[decomposition_axis_in_dst + 1] > device_stride && dst_shape[decomposition_axis_in_dst + 1] % device_stride == 0) {
+      // Since 2nd decomposition dimension is a multiple of device stride,
+      // sharding happens at 2nd decomposition axis in dst.
+      // stride = 4
+      // S[0], shape=[8], device=[0, 1] -> S[0]R, shape=[4, 2], device=[0, 1]
+      //
+      // stride = 8
+      // S[0], shape=[32], device=[0, 1, 0, 1] -> RS[0], shape=[2, 16], device=[0, 1]
+      std::vector<AxisPartitionSpec> dst_axis_specs;
+      // How many times a device ID appears.
+      // E.g., [0, 1, 0, 1, 0, 1] -> 3
+      int64_t repeats = 0;
+      // Number of unique devices.
+      // E.g., [0, 1, 0, 1, 0, 1] -> 2
+      int64_t repeat_stride = 0;
+      DeviceMesh dst_device_mesh;
+      std::tie(repeats, repeat_stride) = ComputeRepeatAndRepeatStride(src_spec.device_mesh.device_mesh_elements);
+      for (size_t src_axis = 0; src_axis < src_shape.size(); ++src_axis) {
+        if (src_axis != decomposed_axis_in_src) {
+          dst_axis_specs.push_back(AxisPartitionSpec::CreateCopy(src_spec.GetAxisSpec(src_axis)));
+        } else if (dst_shape[decomposition_axis_in_dst] == 1) {
+          // S[0] -> RS[0]
+          // E.g., shape [5] -> Reshape -> shape [1, 5]
+          // In this case "1" is added as a dummy axis without affecting
+          // the underlying sharding status, so we just copy the spec
+          // for input "5" to output "5".
+          dst_axis_specs.push_back(AxisPartitionSpec::CreateReplica());
+          dst_axis_specs.push_back(AxisPartitionSpec::CreateShard(0));
+          dst_device_mesh = src_spec.device_mesh;
+        } else if (dst_shape[decomposition_axis_in_dst + 1] == 1) {
+          // S[0] -> S[0]R
+          // E.g., shape [5] -> Reshape -> shape [5, 1]
+          // In this case "1" is added as a dummy axis without affecting
+          // the underlying sharding status, so we just copy the spec
+          // for input "5" to output "5".
+          dst_axis_specs.push_back(AxisPartitionSpec::CreateShard(0));
+          dst_axis_specs.push_back(AxisPartitionSpec::CreateReplica());
+          dst_device_mesh = src_spec.device_mesh;
+        } else if (repeats == 1 && dst_shape[decomposition_axis_in_dst + 1] == device_stride * repeat_stride) {
+          // S[0] -> RS[0]
+          dst_axis_specs.push_back(AxisPartitionSpec::CreateReplica());
+          dst_axis_specs.push_back(AxisPartitionSpec::CreateShard(0));
+          dst_device_mesh = src_spec.device_mesh;
+        } else if (repeats != 1 && dst_shape[decomposition_axis_in_dst + 1] % (device_stride * repeat_stride) == 0) {
+          // S[0] -> RS[0]
+          dst_axis_specs.push_back(AxisPartitionSpec::CreateReplica());
+          dst_axis_specs.push_back(AxisPartitionSpec::CreateShard(0));
+          // Extract [0, 1] from [0, 1, 0, 1].
+          std::vector<int64_t> unique_device_mesh_elements(
+              src_spec.device_mesh.device_mesh_elements.begin(),
+              src_spec.device_mesh.device_mesh_elements.begin() + repeat_stride);
+          // Compute new repeats.
+          // Example of repeats change from 2 to 1:
+          //  [16]-shape tensor                      [2, 8]-shape tensor
+          //  with 1-D device mesh     -> Reshape -> with 1-D device mesh
+          //  [0, 1, 0, 1] (repeats=2)               [0, 1] (repeats=1)
+          const int64_t new_repeat = dst_shape[decomposition_axis_in_dst + 1] / (device_stride * repeat_stride);
+          dst_device_mesh.device_mesh_shape.push_back(repeat_stride);
+          dst_device_mesh.device_mesh_elements = RepeatVector(unique_device_mesh_elements, new_repeat);
+        } else {
+          throw std::runtime_error("Not supported. Resharding is required.");
+        }
+      }
+      return std::make_tuple(true, TensorPartitionSpec::Create(dst_axis_specs, dst_device_mesh));
+    } else {
+      // Not supported. Resharding is required.
+      return std::make_tuple(false, TensorPartitionSpec());
+    }
+  } else {
+    // Source tensor is sharded on non-decomposed axis.
+    std::vector<AxisPartitionSpec> dst_axis_specs;
+    for (size_t src_axis = 0; src_axis < src_shape.size(); ++src_axis) {
+      if (src_axis != decomposed_axis_in_src) {
+        dst_axis_specs.push_back(AxisPartitionSpec::CreateCopy(src_spec.GetAxisSpec(src_axis)));
+      } else {
+        // R -> RR
+        dst_axis_specs.push_back(AxisPartitionSpec::CreateReplica());
+        dst_axis_specs.push_back(AxisPartitionSpec::CreateReplica());
+      }
+    }
+
+    return std::make_tuple(true, TensorPartitionSpec::Create(dst_axis_specs, src_spec.device_mesh));
+  }
+}
+
+// Arguments:
+//  global_data_shape: logical shape of Reshape's 1st input.
+//  global_shape_span: logical content of Reshape's 2nd input.
+// Returns:
+//  logical shape of Reshape's output.
+inline TensorShape InferDistributedReshapeLogicalOutputShape(
+    const TensorShape& global_data_shape,
+    const gsl::span<const int64_t>& global_shape_span,
+    const int64_t allow_zero) {
+  return onnxruntime::cuda::InferReshapeOutputShape(
+      global_data_shape,
+      global_shape_span,
+      allow_zero);
+}
+
+template <typename T>
+DistributedReshape<T>::DistributedReshape(const OpKernelInfo& info) : DistributedKernel(info) {
+  allow_zero_ = info.GetAttrOrDefault("allowzero", static_cast<int64_t>(0));
+}
+
+template <typename T>
+Status DistributedReshape<T>::ComputeInternal(OpKernelContext* context) const {
+  ORT_ENFORCE(context != nullptr);
+  auto data_tensor = context->Input<Tensor>(0);
+  auto shape_tensor = context->Input<Tensor>(1);
+  const auto& data_sharding_spec = input_shard_specs_.at(0);
+  const auto& shape_sharding_spec = input_shard_specs_.at(1);
+  const auto& output_sharding_spec = output_shard_specs_.at(0);
+
+  if (data_sharding_spec.HasNoShard() && shape_sharding_spec.HasNoShard() && output_sharding_spec.HasNoShard()) {
+    // Case: all inputs and outputs are not sharded.
+    const auto target_shape = onnxruntime::cuda::InferReshapeOutputShape(
+        data_tensor,
+        shape_tensor,
+        allow_zero_);
+
+    auto output_tensor = context->Output(0, target_shape);
+
+    // Copy data from input from output.
+    return FuncReshape(
+        this,
+        context,
+        data_tensor,
+        shape_tensor,
+        allow_zero_,
+        output_tensor);
+  } else {
+    ORT_ENFORCE(shape_sharding_spec.HasNoShard(),
+                "Shape tensor should not be sharded because it will trigger communication. "
+                "If sharding shape is needed, please request this feature on Github.");
+    ORT_ENFORCE(shape_tensor->Shape().NumDimensions() == 1, "Shape must be a 1-D tensor.");
+    const auto original_data_shape = ComputeOriginShape(data_tensor->Shape(), data_sharding_spec);
+    const auto original_output_shape = InferDistributedReshapeLogicalOutputShape(
+        original_data_shape,
+        shape_tensor->template DataAsSpan<int64_t>(),
+        allow_zero_);
+
+    // TODO: remove below code after replacing std::vector with TensorShape in other APIs.
+    std::vector<int64_t> src_shape(original_data_shape.GetDims().begin(), original_data_shape.GetDims().end());
+    std::vector<int64_t> dst_shape(original_output_shape.GetDims().begin(), original_output_shape.GetDims().end());
+
+    // Case: Two axis fusion
+    bool is_two_axis_fusion = false;
+    size_t two_axis_fusion_axis_in_dst = 0;
+    size_t two_axis_fusion_first_fused_axis_in_src = 0;
+    size_t two_axis_fusion_fused_axis_count = 0;
+    std::tie(
+        is_two_axis_fusion,
+        two_axis_fusion_axis_in_dst,
+        two_axis_fusion_first_fused_axis_in_src,
+        two_axis_fusion_fused_axis_count) = IsTwoAxisFusion(src_shape, dst_shape);
+
+    if (is_two_axis_fusion) {
+      bool is_supported = false;
+      TensorPartitionSpec native_dst_spec;
+      std::tie(is_supported, native_dst_spec) = ComputeNativeSpecForTwoAxisFusion(
+          data_sharding_spec,
+          src_shape,
+          dst_shape,
+          two_axis_fusion_first_fused_axis_in_src,
+          two_axis_fusion_axis_in_dst);
+
+      if (is_supported && native_dst_spec == output_sharding_spec) {
+        // In this case, we can apply Reshape with local shape on local tensor without resharding.
+        // Those local output tensors match the output tensors defined by
+        // sharding the logical tensor following the native sharding spec.
+        TensorShape local_shape = ComputeShardShape(original_output_shape, native_dst_spec);
+        auto output_tensor = context->Output(0, local_shape);
+        return FuncReshape(
+            this,
+            context,
+            data_tensor,
+            shape_tensor,
+            allow_zero_,
+            output_tensor);
+      } else {
+        // TODO: Reshape outputs from `native_dst_spec` to `output_sharding_spec`.
+        return Status(common::ONNXRUNTIME, common::NOT_IMPLEMENTED, "Encounter unsupported reshape pattern.");
+      }
+    }
+
+    // Case: Two axis decomposition
+    bool is_two_axis_decomposition = false;
+    size_t two_axis_decomposition_decomposed_axis_in_src = 0;
+    size_t two_axis_decomposition_first_factor_axis_in_dst = 0;
+    size_t two_axis_decomposition_factor_axis_count_in_dst = 0;
+    std::tie(
+        is_two_axis_decomposition,
+        two_axis_decomposition_decomposed_axis_in_src,
+        two_axis_decomposition_first_factor_axis_in_dst,
+        two_axis_decomposition_factor_axis_count_in_dst) = IsTwoAxisDecomposition(src_shape, dst_shape);
+
+    if (is_two_axis_decomposition) {
+      bool is_supported = false;
+      TensorPartitionSpec native_dst_spec;
+      std::tie(is_supported, native_dst_spec) = ComputeNativeSpecForTwoAxisDecomposition(
+          data_sharding_spec,
+          src_shape,
+          dst_shape,
+          two_axis_decomposition_decomposed_axis_in_src,
+          two_axis_decomposition_first_factor_axis_in_dst);
+
+      if (is_supported && native_dst_spec == output_sharding_spec) {
+        // In this case, we can apply Reshape with local shape on local tensor without resharding.
+        // Those local output tensors match the output tensors defined by
+        // sharding the logical tensor following the native sharding spec.
+        TensorShape local_shape = ComputeShardShape(original_output_shape, native_dst_spec);
+        auto output_tensor = context->Output(0, local_shape);
+        return FuncReshape(
+            this,
+            context,
+            data_tensor,
+            shape_tensor,
+            allow_zero_,
+            output_tensor);
+      } else {
+        // TODO: Reshape outputs from `native_dst_spec` to `output_sharding_spec`.
+        return Status(common::ONNXRUNTIME, common::NOT_IMPLEMENTED, "Encounter unsupported reshape pattern.");
+      }
+    }
+  }
+
+  return Status(common::ONNXRUNTIME, common::NOT_IMPLEMENTED, "Encounter unsupported reshape pattern.");
+}
+
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    DistributedReshape,
+    kMSDomain,
+    1,
+    int64_t,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .AllocateInputsContiguously()
+        .TypeConstraint("T", DataTypeImpl::GetTensorType<int64_t>())
+        .InputMemoryType(OrtMemTypeCPUInput, 1),
+    DistributedReshape<int64_t>);
+
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    DistributedReshape,
+    kMSDomain,
+    1,
+    float,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .AllocateInputsContiguously()
+        .TypeConstraint("T", DataTypeImpl::GetTensorType<float>())
+        .InputMemoryType(OrtMemTypeCPUInput, 1),
+    DistributedReshape<float>);
+
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    DistributedReshape,
+    kMSDomain,
+    1,
+    MLFloat16,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .AllocateInputsContiguously()
+        .TypeConstraint("T", DataTypeImpl::GetTensorType<MLFloat16>())
+        .InputMemoryType(OrtMemTypeCPUInput, 1),
+    DistributedReshape<MLFloat16>);
+
+#endif
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/collective/distributed_reshape.h b/onnxruntime/contrib_ops/cuda/collective/distributed_reshape.h
new file mode 100644
index 0000000000000..e251c3cdc38d7
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/collective/distributed_reshape.h
@@ -0,0 +1,40 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "sharding_spec.h"
+#include "sharding.h"
+#include "core/framework/tensor_shape.h"
+#include "core/providers/cuda/tensor/reshape.h"
+#include "core/providers/cuda/cuda_kernel.h"
+
+#include <algorithm>
+#include <tuple>
+#include <optional>
+#include <string>
+#include <nccl.h>
+#include <sstream>
+
+#pragma once
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+#if defined(ORT_USE_NCCL)
+
+template <typename T>
+class DistributedReshape final : public DistributedKernel {
+ public:
+  explicit DistributedReshape(const OpKernelInfo& info);
+
+  Status ComputeInternal(OpKernelContext* context) const override;
+
+ private:
+  int64_t allow_zero_;
+};
+
+#endif
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/collective/distributed_slice.cc b/onnxruntime/contrib_ops/cuda/collective/distributed_slice.cc
new file mode 100644
index 0000000000000..5768dba791292
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/collective/distributed_slice.cc
@@ -0,0 +1,181 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// Distributed computation.
+#include "distributed_slice.h"
+#include "mpi_include.h"
+
+// ORT system.
+#include "core/providers/cpu/tensor/slice.h"
+#include "core/providers/cuda/tensor/slice.h"
+#include "core/providers/cuda/math/matmul.h"
+#include "core/providers/cuda/tensor/transpose.h"
+#include "core/providers/cuda/cuda_check_memory.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+#if defined(ORT_USE_NCCL)
+template <typename T, typename Tind>
+DistributedSlice<T, Tind>::DistributedSlice(const OpKernelInfo& info) : DistributedKernel(info) {
+}
+
+template <typename T, typename Tind>
+Status DistributedSlice<T, Tind>::ComputeInternal(OpKernelContext* context) const {
+  const auto tensor_shard_data = context->Input<Tensor>(0);
+  const auto tensor_shard_starts = context->Input<Tensor>(1);
+  const auto tensor_shard_ends = context->Input<Tensor>(2);
+
+  const TensorPartitionSpec& spec_data = input_shard_specs_[0];
+  const TensorPartitionSpec& spec_starts = input_shard_specs_[1];
+  const TensorPartitionSpec& spec_ends = input_shard_specs_[2];
+  const TensorPartitionSpec& spec_Y = output_shard_specs_[0];
+
+  const auto tensor_shard_axes = context->Input<Tensor>(3);
+  const TensorPartitionSpec& spec_axes = input_shard_specs_[3];
+
+  if (spec_starts.HasShard() ||
+      spec_ends.HasShard() ||
+      spec_axes.HasShard() ||
+      (input_shard_specs_.size() > 4 && input_shard_specs_[4].HasShard()))
+    ORT_THROW("DistributedSlice: shard on starts / ends / axes / steps are not supported yet.");
+
+  std::vector<int64_t> input_starts;
+  std::vector<int64_t> input_ends;
+  auto starts_data = tensor_shard_starts->DataAsSpan<Tind>();
+  input_starts.resize(starts_data.size());
+  std::copy(starts_data.begin(), starts_data.end(), input_starts.begin());
+  auto ends_data = tensor_shard_ends->DataAsSpan<Tind>();
+  input_ends.resize(ends_data.size());
+  std::copy(ends_data.begin(), ends_data.end(), input_ends.begin());
+
+  std::vector<int64_t> input_axes;
+  if (tensor_shard_axes) {
+    auto axes_data = tensor_shard_axes->DataAsSpan<Tind>();
+    input_axes.resize(axes_data.size());
+    std::copy(axes_data.begin(), axes_data.end(), input_axes.begin());
+  }
+
+  std::vector<int64_t> input_steps;
+  const auto tensor_shard_steps = context->Input<Tensor>(4);
+  if (tensor_shard_steps) {
+    const TensorPartitionSpec& spec_steps = input_shard_specs_[4];
+    if (spec_steps.HasShard())
+      ORT_THROW("Not supported yet.");
+
+    auto steps_data = tensor_shard_steps->DataAsSpan<Tind>();
+    input_steps.resize(steps_data.size());
+    std::copy(steps_data.begin(), steps_data.end(), input_steps.begin());
+  }
+
+  if (spec_data.GetPartitionAxis() != -1 &&
+      std::find(input_axes.begin(), input_axes.end(), spec_data.GetPartitionAxis()) != input_axes.end()) {
+    // shard on slice axes, reshard first
+    auto tmp_spec_data = TensorPartitionSpec::CreateAllReplica(spec_data);
+    auto tensor_data = ReshardTensor(this, context, spec_data, tmp_spec_data, nccl_->Rank(), tensor_shard_data);
+
+    const auto& input_shape = tensor_data->Shape();
+    const auto input_dimensions = input_shape.GetDims();
+    if (input_dimensions.empty()) return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Cannot slice scalars");
+
+    SliceOp::PrepareForComputeMetadata compute_metadata(input_dimensions);
+    ORT_RETURN_IF_ERROR(SliceBase::PrepareForCompute(input_starts, input_ends, input_axes, input_steps, compute_metadata));
+    TensorShape output_shape(compute_metadata.output_dims_);
+
+    if (spec_Y.HasNoShard()) {
+      ORT_RETURN_IF_ERROR(FuncSlice(this,
+                                    context,
+                                    tensor_data.get(),
+                                    input_starts,
+                                    input_ends,
+                                    input_axes,
+                                    input_steps,
+                                    context->Output(0, output_shape)));
+    } else {
+      AllocatorPtr alloc;
+      ORT_ENFORCE(context->GetTempSpaceAllocator(&alloc) == Status::OK());
+      auto dst_tensor = Tensor::Create(tensor_data->DataType(), output_shape, alloc);
+      ORT_RETURN_IF_ERROR(FuncSlice(this,
+                                    context,
+                                    tensor_data.get(),
+                                    input_starts,
+                                    input_ends,
+                                    input_axes,
+                                    input_steps,
+                                    dst_tensor.get()));
+      auto tmp_spec_output = TensorPartitionSpec::CreateAllReplica(spec_Y);
+      ReshardTensor(this, context, tmp_spec_output, spec_Y, nccl_->Rank(), dst_tensor.get(), 0);
+    }
+  } else {
+    const auto& input_shape = tensor_shard_data->Shape();
+    const auto input_dimensions = input_shape.GetDims();
+    if (input_dimensions.empty()) return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Cannot slice scalars");
+
+    SliceOp::PrepareForComputeMetadata compute_metadata(input_dimensions);
+    ORT_RETURN_IF_ERROR(SliceBase::PrepareForCompute(input_starts, input_ends, input_axes, input_steps, compute_metadata));
+    TensorShape output_shape(compute_metadata.output_dims_);
+
+    if (spec_Y.GetPartitionAxis() == spec_data.GetPartitionAxis()) {
+      ORT_RETURN_IF_ERROR(FuncSlice(this,
+                                    context,
+                                    tensor_shard_data,
+                                    input_starts,
+                                    input_ends,
+                                    input_axes,
+                                    input_steps,
+                                    context->Output(0, output_shape)));
+    } else {
+      AllocatorPtr alloc;
+      ORT_ENFORCE(context->GetTempSpaceAllocator(&alloc) == Status::OK());
+      auto dst_tensor = Tensor::Create(tensor_shard_data->DataType(), output_shape, alloc);
+      ORT_RETURN_IF_ERROR(FuncSlice(this,
+                                    context,
+                                    tensor_shard_data,
+                                    input_starts,
+                                    input_ends,
+                                    input_axes,
+                                    input_steps,
+                                    dst_tensor.get()));
+      ReshardTensor(this, context, spec_data, spec_Y, nccl_->Rank(), dst_tensor.get(), 0);
+    }
+  }
+
+  return Status::OK();
+}
+
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    DistributedSlice,
+    kMSDomain,
+    1,
+    float,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .InputMemoryType(OrtMemTypeCPUInput, 1)
+        .InputMemoryType(OrtMemTypeCPUInput, 2)
+        .InputMemoryType(OrtMemTypeCPUInput, 3)
+        .InputMemoryType(OrtMemTypeCPUInput, 4)
+        .TypeConstraint("T", DataTypeImpl::GetTensorType<float>())
+        .TypeConstraint("Tind", DataTypeImpl::GetTensorType<int64_t>()),
+    DistributedSlice<float, int64_t>);
+
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    DistributedSlice,
+    kMSDomain,
+    1,
+    MLFloat16,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .InputMemoryType(OrtMemTypeCPUInput, 1)
+        .InputMemoryType(OrtMemTypeCPUInput, 2)
+        .InputMemoryType(OrtMemTypeCPUInput, 3)
+        .InputMemoryType(OrtMemTypeCPUInput, 4)
+        .TypeConstraint("T", DataTypeImpl::GetTensorType<MLFloat16>())
+        .TypeConstraint("Tind", DataTypeImpl::GetTensorType<int64_t>()),
+    DistributedSlice<MLFloat16, int64_t>);
+
+#endif
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/collective/distributed_slice.h b/onnxruntime/contrib_ops/cuda/collective/distributed_slice.h
new file mode 100644
index 0000000000000..48c77eee241de
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/collective/distributed_slice.h
@@ -0,0 +1,32 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+#pragma once
+
+#include <algorithm>
+#include <tuple>
+#include <optional>
+#include <string>
+#include <nccl.h>
+#include <sstream>
+
+#include "sharding.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+#if defined(ORT_USE_NCCL)
+
+template <typename T, typename Tind>
+class DistributedSlice final : public DistributedKernel {
+ public:
+  explicit DistributedSlice(const OpKernelInfo& info);
+
+  Status ComputeInternal(OpKernelContext* context) const override;
+};
+
+#endif
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/collective/distributed_squeeze.cc b/onnxruntime/contrib_ops/cuda/collective/distributed_squeeze.cc
new file mode 100644
index 0000000000000..c3cae2d0bf8ca
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/collective/distributed_squeeze.cc
@@ -0,0 +1,96 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// Distributed computation.
+#include "distributed_squeeze.h"
+#include "mpi_include.h"
+
+// ORT system.
+#include "core/providers/cuda/cuda_check_memory.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+#if defined(ORT_USE_NCCL)
+template <typename T, typename Tind>
+DistributedSqueeze<T, Tind>::DistributedSqueeze(const OpKernelInfo& info) : DistributedKernel(info) {
+}
+
+template <typename T, typename Tind>
+Status DistributedSqueeze<T, Tind>::ComputeInternal(OpKernelContext* context) const {
+  auto input_tensor = context->Input<Tensor>(0);
+  auto axes_tensor = context->Input<Tensor>(1);
+  auto axes_span = axes_tensor->DataAsSpan<Tind>();
+
+  const TensorPartitionSpec& input_spec = input_shard_specs_[0];
+  const TensorPartitionSpec& axes_spec = input_shard_specs_[1];
+  const TensorPartitionSpec& output_spec = output_shard_specs_[0];
+
+  ORT_ENFORCE(axes_spec.HasNoShard(), "Axes tensor cannot be sharded.");
+
+  // Non-negative collection of axes to drop.
+  std::vector<Tind> axes;
+  for (const auto axis : axes_span) {
+    axes.push_back(axis >= 0 ? axis : axis + input_tensor->Shape().NumDimensions());
+  }
+  // Shape after dropping axes.
+  auto dims = input_tensor->Shape().AsShapeVector();
+  // Sort in descending order so that we can drop axes from the end.
+  std::sort(axes.begin(), axes.end(), [](Tind a, Tind b) { return a > b; });
+  for (const auto axis : axes) {
+    ORT_ENFORCE(input_tensor->Shape()[axis] == 1, "Cannot squeeze non-singleton dimension.");
+    dims.erase(dims.begin() + axis);
+  }
+  auto native_output_spec = TensorPartitionSpec::CreateByDropAxes(
+      input_spec,
+      axes);
+  ORT_ENFORCE(
+      output_spec == native_output_spec,
+      "Re-sharding is required but not supported yet for this case.");
+  auto output_tensor = context->Output(0, dims);
+  CUDA_RETURN_IF_ERROR(cudaMemcpyAsync(
+      output_tensor->MutableDataRaw(),
+      input_tensor->DataRaw(),
+      input_tensor->SizeInBytes(), cudaMemcpyDeviceToDevice, Stream(context)));
+  return Status::OK();
+}
+
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    DistributedSqueeze,
+    kMSDomain,
+    1,
+    float,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .InputMemoryType(OrtMemTypeCPUInput, 1)
+        .TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
+    DistributedSqueeze<float, int64_t>);
+
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    DistributedSqueeze,
+    kMSDomain,
+    1,
+    MLFloat16,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .InputMemoryType(OrtMemTypeCPUInput, 1)
+        .TypeConstraint("T", DataTypeImpl::GetTensorType<MLFloat16>()),
+    DistributedSqueeze<MLFloat16, int64_t>);
+
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    DistributedSqueeze,
+    kMSDomain,
+    1,
+    int64_t,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .InputMemoryType(OrtMemTypeCPUInput, 1)
+        .TypeConstraint("T", DataTypeImpl::GetTensorType<int64_t>()),
+    DistributedSqueeze<int64_t, int64_t>);
+
+#endif
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/collective/distributed_squeeze.h b/onnxruntime/contrib_ops/cuda/collective/distributed_squeeze.h
new file mode 100644
index 0000000000000..5b81d9c4792bd
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/collective/distributed_squeeze.h
@@ -0,0 +1,32 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+#pragma once
+
+#include <algorithm>
+#include <tuple>
+#include <optional>
+#include <string>
+#include <nccl.h>
+#include <sstream>
+
+#include "sharding.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+#if defined(ORT_USE_NCCL)
+
+template <typename T, typename Tind>
+class DistributedSqueeze final : public DistributedKernel {
+ public:
+  explicit DistributedSqueeze(const OpKernelInfo& info);
+
+  Status ComputeInternal(OpKernelContext* context) const override;
+};
+
+#endif
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/collective/distributed_unsqueeze.cc b/onnxruntime/contrib_ops/cuda/collective/distributed_unsqueeze.cc
new file mode 100644
index 0000000000000..a78f19101b0da
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/collective/distributed_unsqueeze.cc
@@ -0,0 +1,95 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// Distributed computation.
+#include "distributed_unsqueeze.h"
+#include "mpi_include.h"
+
+// ORT system.
+#include "core/providers/cuda/cuda_check_memory.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+#if defined(ORT_USE_NCCL)
+template <typename T, typename Tind>
+DistributedUnsqueeze<T, Tind>::DistributedUnsqueeze(const OpKernelInfo& info) : DistributedKernel(info) {
+}
+
+template <typename T, typename Tind>
+Status DistributedUnsqueeze<T, Tind>::ComputeInternal(OpKernelContext* context) const {
+  auto input_tensor = context->Input<Tensor>(0);
+  auto axes_tensor = context->Input<Tensor>(1);
+  auto axes_span = axes_tensor->DataAsSpan<Tind>();
+
+  const TensorPartitionSpec& input_spec = input_shard_specs_[0];
+  const TensorPartitionSpec& axes_spec = input_shard_specs_[1];
+  const TensorPartitionSpec& output_spec = output_shard_specs_[0];
+
+  ORT_ENFORCE(axes_spec.HasNoShard(), "Axes tensor cannot be sharded.");
+
+  std::vector<int64_t> axes(axes_span.begin(), axes_span.end());
+  std::sort(axes.begin(), axes.end());
+  auto dims = input_tensor->Shape().AsShapeVector();
+  auto native_output_spec = input_spec;
+  for (auto axis : axes) {
+    if (axis < 0) {
+      axis += input_tensor->Shape().NumDimensions() + 1;
+    }
+    dims.insert(dims.begin() + axis, 1);
+    native_output_spec = TensorPartitionSpec::CreateByInsertOneAxis(
+        native_output_spec,
+        axis);
+  }
+  ORT_ENFORCE(
+      output_spec == native_output_spec,
+      "Re-sharding is required but not supported yet for this case. ",
+      "Specified: ", output_spec.ToString(),
+      " Actual: ", native_output_spec.ToString());
+  auto output_tensor = context->Output(0, dims);
+  CUDA_RETURN_IF_ERROR(cudaMemcpyAsync(
+      output_tensor->MutableDataRaw(),
+      input_tensor->DataRaw(),
+      input_tensor->SizeInBytes(), cudaMemcpyDeviceToDevice, Stream(context)));
+  return Status::OK();
+}
+
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    DistributedUnsqueeze,
+    kMSDomain,
+    1,
+    float,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .InputMemoryType(OrtMemTypeCPUInput, 1)
+        .TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
+    DistributedUnsqueeze<float, int64_t>);
+
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    DistributedUnsqueeze,
+    kMSDomain,
+    1,
+    MLFloat16,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .InputMemoryType(OrtMemTypeCPUInput, 1)
+        .TypeConstraint("T", DataTypeImpl::GetTensorType<MLFloat16>()),
+    DistributedUnsqueeze<MLFloat16, int64_t>);
+
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    DistributedUnsqueeze,
+    kMSDomain,
+    1,
+    int64_t,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .InputMemoryType(OrtMemTypeCPUInput, 1)
+        .TypeConstraint("T", DataTypeImpl::GetTensorType<int64_t>()),
+    DistributedUnsqueeze<int64_t, int64_t>);
+
+#endif
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/collective/distributed_unsqueeze.h b/onnxruntime/contrib_ops/cuda/collective/distributed_unsqueeze.h
new file mode 100644
index 0000000000000..005093ef78fb9
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/collective/distributed_unsqueeze.h
@@ -0,0 +1,32 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+#pragma once
+
+#include <algorithm>
+#include <tuple>
+#include <optional>
+#include <string>
+#include <nccl.h>
+#include <sstream>
+
+#include "sharding.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+#if defined(ORT_USE_NCCL)
+
+template <typename T, typename Tind>
+class DistributedUnsqueeze final : public DistributedKernel {
+ public:
+  explicit DistributedUnsqueeze(const OpKernelInfo& info);
+
+  Status ComputeInternal(OpKernelContext* context) const override;
+};
+
+#endif
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/collective/nccl_kernels.cc b/onnxruntime/contrib_ops/cuda/collective/nccl_kernels.cc
index bb924a0d49cfe..574a3133de815 100644
--- a/onnxruntime/contrib_ops/cuda/collective/nccl_kernels.cc
+++ b/onnxruntime/contrib_ops/cuda/collective/nccl_kernels.cc
@@ -1,10 +1,24 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
+#include <unistd.h>
+#include <time.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <netinet/tcp.h>
+#include <netdb.h>
+#include <cstdint>
+#include <memory>
+#include <string>
 
 #include "nccl_kernels.h"
 #include "mpi_include.h"
+#include "core/providers/cpu/tensor/slice.h"
+#include "core/providers/cuda/tensor/slice.h"
+#include "core/providers/cuda/math/matmul.h"
 #include "core/providers/cuda/tensor/transpose.h"
 #include "core/providers/cuda/cuda_check_memory.h"
+#include "core/platform/env_var_utils.h"
 
 namespace onnxruntime {
 namespace contrib {
@@ -35,21 +49,160 @@ static ncclDataType_t GetNcclDataType(onnxruntime::MLDataType type) {
   }
 }
 
-#ifdef USE_MPI
-static Status CreateNcclCommByMPI(int world_size, int rank, ncclComm_t* comm) {
+namespace IPC {
+#define FLLOG LOGS_DEFAULT(VERBOSE)
+#define FLLOGERRNO LOGS_DEFAULT(WARNING) << "error:" << strerror(errno)
+#define FLLOGGAI LOGS_DEFAULT(WARNING) << "error:" << gai_strerror(ret)
+
+typedef std::shared_ptr<struct addrinfo> AddrInfoPtr;
+
+int CreateSocket(bool is_server) {
+  int sockfd = -1;
+
+  struct addrinfo hints;
+  struct addrinfo* result = nullptr;
+  AddrInfoPtr result_ptr(result, [](struct addrinfo* p) { if(p){freeaddrinfo(p);} });
+
+  memset(&hints, 0, sizeof(struct addrinfo));
+  hints.ai_family = AF_UNSPEC;     /* Allow IPv4 or IPv6 */
+  hints.ai_socktype = SOCK_STREAM; /* TCP socket. use SOCK_DGRAM for UDP */
+  hints.ai_flags = AI_PASSIVE;     /* For wildcard IP address */
+  hints.ai_protocol = 0;           /* Any protocol */
+
+  std::string rank0_ip = ParseEnvironmentVariableWithDefault<std::string>("RANK0_IP", "localhost");
+  std::string port_number = ParseEnvironmentVariableWithDefault<std::string>("RANK0_PORT", "18888");
+
+  int ret = getaddrinfo(is_server ? nullptr : rank0_ip.c_str(), port_number.c_str(), &hints, &result);
+  if (ret != 0) {
+    FLLOGGAI << " getaddrinfo failed\n";
+    return sockfd;
+  }
+
+  for (struct addrinfo* rp = result; rp != nullptr; rp = rp->ai_next) {
+    sockfd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol);
+    if (sockfd == -1) {
+      continue;
+    }
+
+    int on = 1;
+    int rc = setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, (char*)&on, sizeof(on));
+    if (rc < 0) {
+      FLLOGERRNO << ("setsockopt() failed\n");
+      close(sockfd);
+      continue;
+    }
+
+    if (is_server) {
+      if (bind(sockfd, rp->ai_addr, rp->ai_addrlen) == 0) {
+        FLLOG << "Listening on port " << port_number << " for the other GPU processores...\n";
+      } else {
+        FLLOGERRNO << ("bind failed\n");
+        close(sockfd);
+        sockfd = -1;
+      }
+    } else {
+      time_t start_time = time(0);
+      int conn_ret = connect(sockfd, rp->ai_addr, rp->ai_addrlen);
+      while (time(0) - start_time < 40 && conn_ret < 0) {
+        FLLOGERRNO << (" waiting the RANK 0 ready...\n"); /* terminate */
+        sleep(1);
+        conn_ret = connect(sockfd, rp->ai_addr, rp->ai_addrlen);
+      }
+      if (conn_ret < 0) {
+        close(sockfd);
+        sockfd = -1;
+        FLLOGERRNO << ("connect failed with timeout\n"); /* terminate */
+      } else {
+        FLLOG << "connect to " << rank0_ip << ":" << port_number << "success \n";
+      }
+    }
+    break;
+  }
+  return sockfd;
+}
+
+int WriteOnRank0(ncclUniqueId* nccl_id, int word_size) {
+  int fd = CreateSocket(true);
+  if (fd < 0) {
+    FLLOGERRNO << (" create socket\n"); /* terminate */
+    return -1;
+  }
+
+  /* listen to the socket */
+  if (listen(fd, word_size) < 0) {
+    FLLOGERRNO << ("listen\n"); /* terminate */
+    return -1;
+  }
+
+  word_size--;  // rank 0 is not in word_size
+  while (word_size-- > 0) {
+    int client_fd = accept(fd, nullptr, nullptr); /* accept blocks */
+    if (client_fd < 0) {
+      FLLOGERRNO << ("accept\n"); /* terminate */
+      return -1;
+    }
+    FLLOG << ("Accepted new GPU\n");
+    if (write(client_fd, (nccl_id), sizeof(ncclUniqueId)) != sizeof(ncclUniqueId)) {
+      FLLOGERRNO << ("write\n"); /* terminate */
+      return -1;
+    }
+    close(client_fd);
+  }
+  close(fd);
+  return 0;
+}
+
+int ReadFromRank0(ncclUniqueId* nccl_id) {
+  int sockfd = CreateSocket(false);
+  if (sockfd < 0) {
+    FLLOGERRNO << ("socket");
+    return -1;
+  }
+
+  if (read(sockfd, (nccl_id), sizeof(ncclUniqueId)) != sizeof(ncclUniqueId)) {
+    FLLOGERRNO << ("read"); /* terminate */
+    return -1;
+  }
+
+  close(sockfd);
+  return 0;
+}
+
+int IPC_Bcast(ncclUniqueId* nccl_id, int rank, int world_size) {
+  if (rank == 0) {
+    if (WriteOnRank0(nccl_id, world_size) != 0) {
+      return (-1);
+    }
+  } else if (ReadFromRank0(nccl_id) != 0) {
+    return (-1);
+  }
+
+  return 0;
+}
+}  // namespace IPC
+
+static Status CreateNcclCommunicator(int world_size, int rank, ncclComm_t* comm, bool is_launched_by_mpi) {
   // Create new NCCL communicator
   ncclUniqueId nccl_id;
   if (rank == 0) {
     NCCL_RETURN_IF_ERROR(ncclGetUniqueId(&nccl_id));
   }
-  MPI_CHECK(MPI_Bcast(&nccl_id, sizeof(nccl_id), MPI_BYTE, 0, MPI_COMM_WORLD));
+  if (is_launched_by_mpi) {
+#ifdef USE_MPI
+    MPI_CHECK(MPI_Bcast(&nccl_id, sizeof(nccl_id), MPI_BYTE, 0, MPI_COMM_WORLD));
+#else
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Please compile ORT with USE_MPI.");
+#endif
+  } else if (IPC::IPC_Bcast(&nccl_id, rank, world_size) != 0) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "IPC_Bcast nccl_id failed with :", strerror(errno));
+  }
   NCCL_RETURN_IF_ERROR(ncclCommInitRank(comm, world_size, nccl_id, rank));
 
   return Status::OK();
 }
-#endif
 
 NcclContext::NcclContext() {
+  world_size_ = -1;
 #ifdef USE_MPI
   int is_mpi_initialized = 0;
   MPI_Initialized(&is_mpi_initialized);
@@ -62,14 +215,19 @@ NcclContext::NcclContext() {
   MPI_Comm_size(MPI_COMM_WORLD, &world_size_);
 
   MPI_Comm_rank(MPI_COMM_WORLD, &rank_);
+#endif
+  // world_size_ would be zero if MPI is being compiled but not launched by MPI.
+  bool is_launched_by_mpi = true;
+  if (world_size_ < 1) {
+    is_launched_by_mpi = false;
+    world_size_ = ParseEnvironmentVariableWithDefault<int32_t>("LOCAL_WORLD_SIZE", -1);
+    rank_ = ParseEnvironmentVariableWithDefault<int32_t>("LOCAL_RANK", -1);
+    ORT_ENFORCE(world_size_ != -1 && rank_ != -1);
+  }
 
   // Initialize global Parallel Group NCCL Communicator
-  auto ret = CreateNcclCommByMPI(world_size_, rank_, &comm_);
+  auto ret = CreateNcclCommunicator(world_size_, rank_, &comm_, is_launched_by_mpi);
   ORT_ENFORCE(ret.IsOK());
-
-#else
-  ORT_THROW("ORT must be built with MPI to use NCCL.");
-#endif
 }
 
 NcclContext::~NcclContext() {
@@ -246,6 +404,116 @@ ONNX_OPERATOR_KERNEL_EX(
         .TypeConstraint("T", DataTypeImpl::AllTensorTypes()),
     AllToAll);
 
+Status FuncAllReduce(
+    ncclComm_t comm,
+    cudaStream_t stream,
+    const Tensor* input,
+    Tensor* output) {
+  const void* input_data = input->DataRaw();
+  const auto input_shape = input->Shape();
+  int64_t input_count = input_shape.Size();
+
+  void* output_data = output->MutableDataRaw();
+
+  ncclDataType_t dtype = GetNcclDataType(input->DataType());
+  NCCL_RETURN_IF_ERROR(ncclAllReduce(input_data, output_data, input_count, dtype, ncclSum, comm, stream));
+  return Status::OK();
+}
+
+static std::vector<size_t> CalculatePermToSwapAxes(
+    const int64_t axis,
+    const int64_t another_axis,
+    const size_t rank) {
+  // This is for swapping axis and another_axis.
+  // NCCL's AllGather only gathers along axis 0. If gathering along another axis is needed,
+  // we need to call transpose. E.g.,
+  // Case 1:
+  //  AllGather(axis=0)
+  // Case 2:
+  //  AllGather(axis=3) = Transpose(perm=[3, 1, 2, 0]) -> AllGather(axis=0) -> Transpose(perm=[3, 1, 2, 0])
+  std::vector<size_t> permutation(rank);
+  std::iota(std::begin(permutation), std::end(permutation), 0);
+  permutation[axis] = another_axis;
+  permutation[another_axis] = axis;
+  return permutation;
+}
+
+void FuncAllGather(
+    const NcclKernel* nccl_kernel,
+    OpKernelContext* ctx,
+    const Tensor* input,
+    const int64_t group_size,
+    const int64_t axis,
+    Tensor* output) {
+  ORT_ENFORCE(output->Shape().Size() == input->Shape().Size() * group_size, "Input and output shapes mismatch.");
+  ORT_ENFORCE(group_size >= 0, "group_size should be non-negative.");
+  ORT_ENFORCE(axis >= 0, "axis should be non-negative.");
+  AllocatorPtr alloc;
+  ORT_ENFORCE(ctx->GetTempSpaceAllocator(&alloc) == Status::OK(), "Fail to find allocator.");
+  if (axis == 0) {
+    const void* input_data = input->DataRaw();
+    const auto input_shape = input->Shape();
+    void* output_data = output->MutableDataRaw();
+    ncclAllGather(
+        input_data,
+        output_data,
+        input_shape.Size(),
+        GetNcclDataType(input->DataType()),
+        nccl_kernel->Comm(),
+        nccl_kernel->Stream(ctx));
+  } else {
+    const auto source_shape = input->Shape();
+    TensorShape transposed_shape(source_shape);
+    transposed_shape[0] = source_shape[axis];
+    transposed_shape[axis] = source_shape[0];
+
+    auto transposed_buffer = Tensor::Create(input->DataType(), transposed_shape, alloc);
+
+    // swap axis 0 and axis axis
+    std::vector<size_t> perm = CalculatePermToSwapAxes(0, axis, source_shape.NumDimensions());
+
+    ORT_ENFORCE(onnxruntime::cuda::Transpose::DoTranspose(nccl_kernel->GetDeviceProp(),
+                                                          nccl_kernel->Stream(ctx),
+                                                          nccl_kernel->GetCublasHandle(ctx),
+                                                          perm, *input, *transposed_buffer) == Status::OK());
+
+    TensorShape gathered_shape(transposed_shape);
+    gathered_shape[0] = group_size * transposed_shape[0];
+    auto gathered_buffer = Tensor::Create(input->DataType(), gathered_shape, alloc);
+
+    ncclAllGather(
+        transposed_buffer->DataRaw(),
+        gathered_buffer->MutableDataRaw(),
+        transposed_shape.Size(),
+        GetNcclDataType(input->DataType()),
+        nccl_kernel->Comm(),
+        nccl_kernel->Stream(ctx));
+
+    ORT_ENFORCE(gathered_buffer->Shape().Size() == output->Shape().Size());
+    ORT_ENFORCE(onnxruntime::cuda::Transpose::DoTranspose(nccl_kernel->GetDeviceProp(),
+                                                          nccl_kernel->Stream(ctx),
+                                                          nccl_kernel->GetCublasHandle(ctx),
+                                                          perm, *gathered_buffer, *output) == Status::OK());
+  }
+}
+
+std::unique_ptr<Tensor> FuncAllGather(
+    const NcclKernel* nccl_kernel,
+    OpKernelContext* ctx,
+    const Tensor* input,
+    const int64_t group_size,
+    const int64_t axis) {
+  ORT_ENFORCE(group_size >= 0);
+  ORT_ENFORCE(axis >= 0);
+  AllocatorPtr alloc;
+  ORT_ENFORCE(ctx->GetTempSpaceAllocator(&alloc) == Status::OK());
+  TensorShape output_shape(input->Shape());
+  output_shape[axis] = group_size * output_shape[axis];
+  auto output = Tensor::Create(input->DataType(), output_shape, alloc);
+  FuncAllGather(nccl_kernel, ctx, input, group_size, axis, output.get());
+  return output;
+}
+
 }  // namespace cuda
 }  // namespace contrib
 }  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/collective/nccl_kernels.h b/onnxruntime/contrib_ops/cuda/collective/nccl_kernels.h
index 24df69ea50224..7fc26e6be57b9 100644
--- a/onnxruntime/contrib_ops/cuda/collective/nccl_kernels.h
+++ b/onnxruntime/contrib_ops/cuda/collective/nccl_kernels.h
@@ -6,7 +6,12 @@
 #include "core/providers/cuda/cuda_kernel.h"
 
 #if defined(ORT_USE_NCCL)
+#include <algorithm>
+#include <tuple>
+#include <optional>
+#include <string>
 #include <nccl.h>
+#include <sstream>
 #endif
 
 namespace onnxruntime {
@@ -44,6 +49,10 @@ class NcclKernel : public ::onnxruntime::cuda::CudaKernel {
  public:
   explicit NcclKernel(const OpKernelInfo& info);
 
+  ncclComm_t Comm() const {
+    return nccl_->Comm();
+  }
+
  protected:
   NcclContext* nccl_ = nullptr;
 };
@@ -81,6 +90,27 @@ class AllToAll final : public NcclKernel {
   int64_t group_size_ = -1;
 };
 
+Status FuncAllReduce(
+    ncclComm_t comm,
+    cudaStream_t stream,
+    const Tensor* input,
+    Tensor* output);
+
+void FuncAllGather(
+    const NcclKernel* nccl_kernel,
+    OpKernelContext* ctx,
+    const Tensor* input,
+    const int64_t group_size,
+    const int64_t axis,
+    Tensor* output);
+
+std::unique_ptr<Tensor> FuncAllGather(
+    const NcclKernel* nccl_kernel,
+    OpKernelContext* ctx,
+    const Tensor* input,
+    const int64_t group_size,
+    const int64_t axis);
+
 }  // namespace cuda
 }  // namespace contrib
 }  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/collective/sharding.cc b/onnxruntime/contrib_ops/cuda/collective/sharding.cc
new file mode 100644
index 0000000000000..b6b509023a1a9
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/collective/sharding.cc
@@ -0,0 +1,300 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "sharding.h"
+#include "mpi_include.h"
+#include "sharding_spec.h"
+
+#include <vector>
+#include <string>
+#include "core/providers/cpu/tensor/slice.h"
+#include "core/providers/cuda/tensor/slice.h"
+#include "core/providers/cuda/math/matmul.h"
+#include "core/providers/cuda/tensor/transpose.h"
+#include "core/providers/cuda/cuda_check_memory.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+#if defined(ORT_USE_NCCL)
+
+void GatherTensor(
+    // Use OpKernel and do a pointer cast to unify functional calls with other eps.
+    // TODO: remove CudaKernel and OpKernelContext.
+    const NcclKernel* nccl_kernel,
+    // Do NOT use ctx to access inputs and outputs.
+    // Inputs and outputs are passed in as function arguments.
+    OpKernelContext* ctx,
+    const TensorPartitionSpec& spec,
+    const Tensor* tensor,
+    Tensor* gathered) {
+  const int64_t shard_axis = spec.GetPartitionAxis();
+  const int64_t shard_count = spec.GetUniqueDeviceCount(shard_axis);
+
+  FuncAllGather(
+      nccl_kernel,
+      ctx,
+      tensor,
+      shard_count,
+      shard_axis,
+      gathered);
+}
+
+std::unique_ptr<Tensor> GatherTensor(
+    // Use OpKernel and do a pointer cast to unify functional calls with other eps.
+    // TODO: remove CudaKernel and OpKernelContext.
+    const NcclKernel* nccl_kernel,
+    // Do NOT use ctx to access inputs and outputs.
+    // Inputs and outputs are passed in as function arguments.
+    OpKernelContext* ctx,
+    const TensorPartitionSpec& spec,
+    const Tensor* tensor) {
+  const int64_t shard_axis = spec.GetPartitionAxis();
+  const int64_t shard_count = spec.GetUniqueDeviceCount(shard_axis);
+  TensorShape gathered_shape(tensor->Shape());
+  gathered_shape[shard_axis] *= shard_count;
+
+  AllocatorPtr alloc;
+  ORT_ENFORCE(ctx->GetTempSpaceAllocator(&alloc) == Status::OK());
+  auto gathered = Tensor::Create(tensor->DataType(), gathered_shape, alloc);
+
+  FuncAllGather(
+      nccl_kernel,
+      ctx,
+      tensor,
+      shard_count,
+      shard_axis,
+      gathered.get());
+
+  return gathered;
+}
+
+void ShardTensor(
+    // Use OpKernel and do a pointer cast to unify functional calls with other eps.
+    // TODO: remove CudaKernel and OpKernelContext.
+    const NcclKernel* nccl_kernel,
+    // Do NOT use ctx to access inputs and outputs.
+    // Inputs and outputs are passed in as function arguments.
+    OpKernelContext* ctx,
+    const TensorPartitionSpec& spec,
+    const int64_t device_id,
+    const Tensor* tensor,
+    Tensor* shard_tensor) {
+  const int64_t shard_axis = spec.GetPartitionAxis();
+  const int64_t shard_count = spec.GetUniqueDeviceCount(shard_axis);
+  TensorShape shard_shape = ComputeShardShape(
+      tensor->Shape(),
+      shard_axis,
+      shard_count);
+  const int64_t shard_dim = shard_shape[shard_axis];
+  const std::vector<int64_t> starts = {shard_dim * device_id};
+  const std::vector<int64_t> ends = {shard_dim * (device_id + 1)};
+  const std::vector<int64_t> axes = {shard_axis};
+  const std::vector<int64_t> steps = {1};
+
+  ORT_ENFORCE(FuncSlice(
+                  nccl_kernel,
+                  ctx,
+                  tensor,
+                  starts,
+                  ends,
+                  axes,
+                  steps,
+                  shard_tensor) == Status::OK());
+}
+
+std::unique_ptr<Tensor> ShardTensor(
+    const NcclKernel* nccl_kernel,
+    OpKernelContext* ctx,
+    const TensorPartitionSpec& spec,
+    const int64_t device_id,
+    const Tensor* tensor) {
+  // Shard all-replica tensor per spec.
+
+  AllocatorPtr alloc;
+  ORT_ENFORCE(ctx->GetTempSpaceAllocator(&alloc) == Status::OK());
+
+  TensorShape shard_shape = ComputeShardShape(
+      tensor->Shape(),
+      spec.GetPartitionAxis(),
+      spec.GetUniqueDeviceCount(spec.GetPartitionAxis()));
+  auto shard_buffer = Tensor::Create(tensor->DataType(), shard_shape, alloc);
+
+  // Shard with pre-allocated buffer.
+  ShardTensor(
+      nccl_kernel,
+      ctx,
+      spec,
+      device_id,
+      tensor,
+      shard_buffer.get());
+
+  return shard_buffer;
+}
+
+void ReshardTensor(
+    // Use OpKernel and do a pointer cast to unify functional calls with other eps.
+    // TODO: remove CudaKernel and OpKernelContext.
+    const NcclKernel* nccl_kernel,
+    // Do NOT use ctx to access inputs and outputs.
+    // Inputs and outputs are passed in as function arguments.
+    OpKernelContext* ctx,
+    const TensorPartitionSpec& src_spec,
+    const TensorPartitionSpec& dst_spec,
+    const int64_t device_id,
+    const Tensor* src,
+    Tensor* dst) {
+  if (src_spec.HasShard() && dst_spec.HasNoShard()) {
+    GatherTensor(
+        nccl_kernel,
+        ctx,
+        src_spec,
+        src,
+        dst);
+    return;
+  } else if (src_spec.HasNoShard() && dst_spec.HasShard()) {
+    ShardTensor(
+        nccl_kernel,
+        ctx,
+        dst_spec,
+        device_id,
+        src,
+        dst);
+  } else if (src_spec.HasShard() && dst_spec.HasShard()) {
+    int64_t src_axis = src_spec.GetPartitionAxis();
+    int64_t dst_axis = dst_spec.GetPartitionAxis();
+    ORT_ENFORCE(src_axis != dst_axis, "No reshard is needed. Don't call this function.");
+
+    auto all_replica_buffer = GatherTensor(
+        nccl_kernel,
+        ctx,
+        src_spec,
+        src);
+
+    ShardTensor(
+        nccl_kernel,
+        ctx,
+        dst_spec,
+        device_id,
+        all_replica_buffer.get(),
+        dst);
+  } else {
+    ORT_THROW("Not supported yet. Probably resharding is not needed.");
+  }
+}
+
+std::unique_ptr<Tensor> ReshardTensor(
+    // Use OpKernel and do a pointer cast to unify functional calls with other eps.
+    // TODO: remove CudaKernel and OpKernelContext.
+    const NcclKernel* nccl_kernel,
+    // Do NOT use ctx to access inputs and outputs.
+    // Inputs and outputs are passed in as function arguments.
+    OpKernelContext* ctx,
+    const TensorPartitionSpec& src_spec,
+    const TensorPartitionSpec& dst_spec,
+    const int64_t device_id,
+    const Tensor* src) {
+  // Implement ReshardTensor but returning a unique_ptr to Tensor instead.
+  const auto origin_shape = ComputeOriginShape(src->Shape(), src_spec);
+  const auto dst_shape = ComputeShardShape(origin_shape, dst_spec);
+  ORT_ENFORCE(CanShard(origin_shape, dst_spec), "Cannot shard tensor. Shape:", origin_shape, ", sharding spec: ", dst_spec.ToString());
+
+  AllocatorPtr alloc;
+  ORT_ENFORCE(ctx->GetTempSpaceAllocator(&alloc) == Status::OK());
+  auto dst = Tensor::Create(src->DataType(), dst_shape, alloc);
+  ReshardTensor(
+      nccl_kernel,
+      ctx,
+      src_spec,
+      dst_spec,
+      device_id,
+      src,
+      dst.get());
+  return dst;
+}
+
+void ReshardTensor(
+    const NcclKernel* nccl_kernel,
+    OpKernelContext* ctx,
+    const TensorPartitionSpec& src_spec,
+    const TensorPartitionSpec& dst_spec,
+    const int64_t device_id,
+    const Tensor* src,
+    int output_idx) {
+  // Implement ReshardTensor but returning a unique_ptr to Tensor instead.
+  const auto origin_shape = ComputeOriginShape(src->Shape(), src_spec);
+  const auto dst_shape = ComputeShardShape(origin_shape, dst_spec);
+  ORT_ENFORCE(CanShard(origin_shape, dst_spec), "Cannot shard tensor. Shape:", origin_shape, ", sharding spec: ", dst_spec.ToString());
+
+  auto* dst = ctx->Output(output_idx, dst_shape);
+  ReshardTensor(
+      nccl_kernel,
+      ctx,
+      src_spec,
+      dst_spec,
+      device_id,
+      src,
+      dst);
+}
+
+DistributedKernel::DistributedKernel(const OpKernelInfo& info) : NcclKernel(info) {
+  // input_device_mesh_shapes[i] is the shape of device mesh for the i-th input.
+  // E.g., device_mesh_shapes = ["[2]", "[1]"] means the first input is
+  // stored on a 1-D mesh with 2 devices and the second input on another 1-D
+  // mesh with 1 device.
+  std::vector<std::string> attr_input_device_mesh_shapes;
+  ORT_ENFORCE(info.GetAttrs<std::string>("input_device_mesh_shapes", attr_input_device_mesh_shapes).IsOK());
+
+  // input_device_mesh_elements[i] is the flattened device mesh for the i-th input.
+  // Note that its actual shape is input_device_mesh_shapes[i].
+  // Example:
+  //  Assume
+  //   device_mesh_shapes = ["[2]", "[1]"]
+  //   device_mesh_elements = ["[0,1]", "[0]"]
+  //  Then the first input is stored on a 1-D mesh with 2 devices and the second
+  //  input on another 1-D mesh with 1 device.
+  std::vector<std::string> attr_input_device_mesh_elements;
+  ORT_ENFORCE(info.GetAttrs<std::string>("input_device_mesh_elements", attr_input_device_mesh_elements).IsOK());
+
+  // input_shard_specs[i] is the sharding spec of the i-th input; e.g.,
+  // "RR" if the i-th input is not sharded.
+  std::vector<std::string> input_shard_specs;
+  ORT_ENFORCE(info.GetAttrs<std::string>("input_shard_specs", input_shard_specs).IsOK());
+
+  ORT_ENFORCE(attr_input_device_mesh_shapes.size() == attr_input_device_mesh_elements.size());
+  ORT_ENFORCE(attr_input_device_mesh_shapes.size() == input_shard_specs.size());
+
+  // Begin parsing sharding metadata for inputs.
+  for (size_t i = 0; i < input_shard_specs.size(); ++i) {
+    auto device_mesh_shape = ParseStringAsInt64Vector(attr_input_device_mesh_shapes[i]);
+    auto device_mesh_elements = ParseStringAsInt64Vector(attr_input_device_mesh_elements[i]);
+    auto spec = CreateTensorPartitionSpec(input_shard_specs[i], device_mesh_shape, device_mesh_elements);
+    input_shard_specs_.push_back(spec);
+  }
+
+  std::vector<std::string> attr_output_device_mesh_shapes;
+  ORT_ENFORCE(info.GetAttrs<std::string>("output_device_mesh_shapes", attr_output_device_mesh_shapes).IsOK());
+
+  std::vector<std::string> attr_output_device_mesh_elements;
+  ORT_ENFORCE(info.GetAttrs<std::string>("output_device_mesh_elements", attr_output_device_mesh_elements).IsOK());
+
+  std::vector<std::string> output_shard_specs;
+  ORT_ENFORCE(info.GetAttrs<std::string>("output_shard_specs", output_shard_specs).IsOK());
+
+  ORT_ENFORCE(attr_output_device_mesh_shapes.size() == attr_output_device_mesh_elements.size());
+  ORT_ENFORCE(attr_output_device_mesh_shapes.size() == output_shard_specs.size());
+
+  for (size_t i = 0; i < output_shard_specs.size(); ++i) {
+    auto device_mesh_shape = ParseStringAsInt64Vector(attr_output_device_mesh_shapes[i]);
+    auto device_mesh_elements = ParseStringAsInt64Vector(attr_output_device_mesh_elements[i]);
+    auto spec = CreateTensorPartitionSpec(output_shard_specs[i], device_mesh_shape, device_mesh_elements);
+    output_shard_specs_.push_back(spec);
+  }
+}
+
+#endif
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/collective/sharding.h b/onnxruntime/contrib_ops/cuda/collective/sharding.h
new file mode 100644
index 0000000000000..81a0f72f0c32f
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/collective/sharding.h
@@ -0,0 +1,84 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+#pragma once
+
+#include "sharding_spec.h"
+#include "nccl_kernels.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+#if defined(ORT_USE_NCCL)
+
+void GatherTensor(
+    const NcclKernel* nccl_kernel,
+    OpKernelContext* ctx,
+    const TensorPartitionSpec& spec,
+    const Tensor* tensor,
+    Tensor* gathered);
+
+std::unique_ptr<Tensor> GatherTensor(
+    const NcclKernel* nccl_kernel,
+    OpKernelContext* ctx,
+    const TensorPartitionSpec& spec,
+    const Tensor* tensor);
+
+void ShardTensor(
+    const NcclKernel* nccl_kernel,
+    OpKernelContext* ctx,
+    const TensorPartitionSpec& spec,
+    const int64_t device_id,
+    const Tensor* tensor,
+    Tensor* shard_tensor);
+
+std::unique_ptr<Tensor> ShardTensor(
+    const NcclKernel* nccl_kernel,
+    OpKernelContext* ctx,
+    const TensorPartitionSpec& spec,
+    const int64_t device_id,
+    const Tensor* tensor);
+
+void ReshardTensor(
+    const NcclKernel* nccl_kernel,
+    OpKernelContext* ctx,
+    const TensorPartitionSpec& src_spec,
+    const TensorPartitionSpec& dst_spec,
+    const int64_t device_id,
+    const Tensor* src,
+    Tensor* dst);
+
+// Output from ctx
+void ReshardTensor(
+    const NcclKernel* nccl_kernel,
+    OpKernelContext* ctx,
+    const TensorPartitionSpec& src_spec,
+    const TensorPartitionSpec& dst_spec,
+    const int64_t device_id,
+    const Tensor* src,
+    int output_idx);
+
+std::unique_ptr<Tensor> ReshardTensor(
+    const NcclKernel* nccl_kernel,
+    OpKernelContext* ctx,
+    const TensorPartitionSpec& src_spec,
+    const TensorPartitionSpec& dst_spec,
+    const int64_t device_id,
+    const Tensor* src);
+
+class TensorPartitionSpec;
+
+class DistributedKernel : public NcclKernel {
+ public:
+  explicit DistributedKernel(const OpKernelInfo& info);
+
+ protected:
+  std::vector<TensorPartitionSpec> input_shard_specs_;
+  std::vector<TensorPartitionSpec> output_shard_specs_;
+};
+
+#endif
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/collective/sharding_spec.cc b/onnxruntime/contrib_ops/cuda/collective/sharding_spec.cc
new file mode 100644
index 0000000000000..20c936e1b6718
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/collective/sharding_spec.cc
@@ -0,0 +1,223 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "sharding_spec.h"
+
+#include "core/common/common.h"
+#include "core/common/gsl.h"
+#include "core/framework/tensor_shape.h"
+
+#include <cctype>
+#include <sstream>
+#include <vector>
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+#if defined(ORT_USE_NCCL)
+
+void ValidateAxisIndex(const int64_t axis, const int64_t rank) {
+  int64_t adjusted_axis = axis;
+  if (axis < 0) {
+    adjusted_axis = axis + rank;
+  } else {
+    adjusted_axis = axis;
+  }
+  ORT_ENFORCE(adjusted_axis >= 0 && adjusted_axis < rank, "axis,", axis, ", should be in [", -rank, ",", rank, ").");
+}
+
+std::vector<int64_t> ParseStringAsInt64Vector(const std::string& str) {
+  if (str.empty() || str.front() != '[' || str.back() != ']') {
+    throw std::invalid_argument("Invalid input string format");
+  }
+  // Parsed vector.
+  // If input is "[0, 1, 2]", result should be {0, 1, 2}.
+  std::vector<int64_t> result;
+  // Skip '[' and ']'
+  std::istringstream iss(str.substr(1, str.size() - 2));
+
+  // Extract integers separated by ',' or whitespaces.
+  int64_t num = -1;
+  while (/* Read one number at a time */ iss >> num) {
+    result.push_back(num);
+    // Skip the comma
+    if (iss.peek() == ',') {
+      iss.ignore();
+    }
+  }
+  return result;
+}
+
+DeviceMesh CreateDeviceMesh(
+    std::vector<int64_t> device_mesh_shape,
+    std::vector<int64_t> device_mesh_elements) {
+  DeviceMesh device_mesh;
+  device_mesh.device_mesh_shape = device_mesh_shape;
+  device_mesh.device_mesh_elements = device_mesh_elements;
+  return device_mesh;
+}
+
+TensorPartitionSpec CreateTensorPartitionSpec(std::string spec_string, std::vector<int64_t> device_mesh_shape, std::vector<int64_t> device_mesh_elements) {
+  // "S[0]R"
+  std::vector<AxisPartitionSpec> axis_specs;
+  size_t dim_index = 0;
+  size_t token_index = 0;
+  while (token_index < spec_string.size()) {
+    char token = spec_string.at(token_index);
+    if (token == 'R') {
+      AxisPartitionSpec axis_spec = AxisPartitionSpec::CreateReplica();
+      axis_specs.push_back(axis_spec);
+      ++token_index;
+      ++dim_index;
+    } else if (token == 'S') {
+      std::stringstream ss;
+      // Next should be "[".
+      ++token_index;
+      char left_bracket = spec_string.at(token_index);
+      ORT_ENFORCE(left_bracket == '[', "Invalid partition token: ", left_bracket, " in ", spec_string);
+      // Move to digit part.
+      ++token_index;
+      while (spec_string.at(token_index) != ']') {
+        // Now token_index should points to the first digit of
+        // axis index.
+        char digit = spec_string.at(token_index);
+        ORT_ENFORCE(std::isdigit(digit), "Invalid partition token: ", token, " in ", spec_string);
+        ss << digit;
+        // Loaded a digit. Go to next token.
+        ++token_index;
+      }
+      int device_mesh_index = 0;
+      ss >> device_mesh_index;
+      AxisPartitionSpec axis_spec = AxisPartitionSpec::CreateShard(device_mesh_index);
+      axis_specs.push_back(axis_spec);
+      // Skip "]".
+      char right_bracket = spec_string.at(token_index);
+      ORT_ENFORCE(right_bracket == ']', "Invalid partition token: ", token, " in ", spec_string);
+      ++token_index;
+    } else {
+      throw std::invalid_argument("Invalid partition token: " + token);
+    }
+  }
+  DeviceMesh device_mesh = CreateDeviceMesh(device_mesh_shape, device_mesh_elements);
+  return TensorPartitionSpec::Create(axis_specs, device_mesh);
+}
+
+TensorPartitionSpec CreateTensorShardSpec(
+    const DeviceMesh& device_mesh,
+    int64_t device_mesh_axis,
+    int64_t shard_axis,
+    int64_t tensor_rank) {
+  if (shard_axis < 0) {
+    shard_axis += tensor_rank;
+  }
+  std::vector<AxisPartitionSpec> axis_specs;
+  for (int64_t i = 0; i < tensor_rank; ++i) {
+    if (i == shard_axis) {
+      axis_specs.push_back(AxisPartitionSpec::CreateShard(device_mesh_axis));
+    } else {
+      axis_specs.push_back(AxisPartitionSpec::CreateReplica());
+    }
+  }
+  return TensorPartitionSpec::Create(axis_specs, device_mesh);
+}
+
+TensorShape ComputeOriginShape(const TensorShape& shard_shape, const TensorPartitionSpec& spec) {
+  ORT_ENFORCE(gsl::narrow<int64_t>(shard_shape.NumDimensions()) == spec.Rank(), "Shard shape and spec rank mismatch.");
+  if (spec.HasNoShard()) {
+    return shard_shape;
+  }
+  TensorShape shape(shard_shape);
+  const int64_t axis = spec.GetPartitionAxis();
+  shape[axis] *= spec.GetUniqueDeviceCount(axis);
+  return shape;
+}
+
+TensorShape ComputeShardShape(const TensorShape& shape, const TensorPartitionSpec& spec) {
+  ORT_ENFORCE(gsl::narrow<int64_t>(shape.NumDimensions()) == spec.Rank(), "Shape and spec rank mismatch.");
+  TensorShape shard_shape(shape);
+  if (spec.HasNoShard()) {
+    return shard_shape;
+  }
+  const int64_t axis = spec.GetPartitionAxis();
+  const int64_t unique_device_count = spec.GetUniqueDeviceCount(axis);
+  ORT_ENFORCE(shard_shape[axis] % unique_device_count == 0, "Number of shards must be divisible by sharded axis' dimension.");
+  // If a [8, 16]-tensor is shared by device mesh [0, 1, 0, 1] along axis=1 (2nd axis),
+  // the local tensors on device 0 & 1 have same shape [8, 8 (from 16/2)] instead of
+  // [8, 4 (from 16/4)]. The reason is that
+  //  - First, the original tensor are split into 4 sub-tensors [8, 4] along the 2nd axis.
+  //  - The 1st and 3rd sub-tensors are concatenated along axis=1 to one tensor on device 0.
+  //  - The 2nd and 4th sub-tensors are concatenated along axis=1 to one tensor on device 1.
+  shard_shape[axis] /= unique_device_count;
+  return shard_shape;
+}
+
+TensorShape ComputeShardShape(const TensorShape source_shape, int64_t shard_axis, int64_t shard_count) {
+  if (shard_axis < 0) {
+    shard_axis += gsl::narrow<int64_t>(source_shape.NumDimensions());
+  }
+  TensorShape shard_shape(source_shape);
+  ORT_ENFORCE(shard_axis < gsl::narrow<int64_t>(source_shape.NumDimensions()), "Shard axis must be less than the number of dimensions of the source tensor.");
+  ORT_ENFORCE(source_shape[shard_axis] % shard_count == 0, "Number of shards must be divisible by sharded axis' dimension.");
+  shard_shape[shard_axis] = source_shape[shard_axis] / shard_count;
+  return shard_shape;
+}
+
+std::tuple<TensorShape, TensorShape> NormalizeShapes(const TensorShape& left, const TensorShape& right) {
+  if (left.NumDimensions() > right.NumDimensions()) {
+    std::vector<int64_t> right_vector(right.NumDimensions(), 0);
+    right.CopyDims(right_vector.data(), right.NumDimensions());
+    // Fill 1's to right shape. E.g.,
+    // left: [1, 2, 3, 4], right: [5, 6, 7] -> left: [1, 2, 3, 4], right: [1, 5, 6, 7]
+    right_vector.insert(right_vector.begin(), left.NumDimensions() - right.NumDimensions(), 1);
+    return std::make_tuple(left, TensorShape(right_vector));
+  } else if (left.NumDimensions() < right.NumDimensions()) {
+    std::vector<int64_t> left_vector(left.NumDimensions(), 0);
+    left.CopyDims(left_vector.data(), left.NumDimensions());
+    // Fill 1's to left shape. E.g.,
+    // left: [1, 2, 3], right: [4, 5, 6, 7] -> left: [1, 2, 3, 1], right: [4, 5, 6, 7]
+    left_vector.insert(left_vector.begin(), right.NumDimensions() - left.NumDimensions(), 1);
+    return std::make_tuple(TensorShape(left_vector), TensorShape(right));
+  } else {
+    return std::make_tuple(TensorShape(left), TensorShape(right));
+  }
+}
+
+std::tuple<TensorPartitionSpec, TensorPartitionSpec> NormalizeTensorPartitionSpecs(
+    const TensorPartitionSpec& left, const TensorPartitionSpec& right) {
+  // TODO: Make it to modify left and right instead of returning new values.
+  if (left.axis_specs.size() > right.axis_specs.size()) {
+    auto new_right = TensorPartitionSpec::Create(right.axis_specs, right.device_mesh);
+    new_right.axis_specs.insert(new_right.axis_specs.begin(), left.axis_specs.size() - right.axis_specs.size(), AxisPartitionSpec::CreateReplica());
+    return std::make_tuple(left, new_right);
+  } else if (left.axis_specs.size() < right.axis_specs.size()) {
+    auto new_left = TensorPartitionSpec::Create(left.axis_specs, left.device_mesh);
+    new_left.axis_specs.insert(new_left.axis_specs.begin(), right.axis_specs.size() - left.axis_specs.size(), AxisPartitionSpec::CreateReplica());
+    return std::make_tuple(new_left, right);
+  } else {
+    return std::make_tuple(left, right);
+  }
+}
+
+bool CanShard(const TensorShape& shape, const TensorPartitionSpec& spec) {
+  if (spec.HasNoShard()) {
+    return true;
+  }
+  if (gsl::narrow<int64_t>(shape.NumDimensions()) != spec.Rank()) {
+    return false;
+  }
+  const int64_t axis = spec.GetPartitionAxis();
+  if (axis < 0 || gsl::narrow<size_t>(axis) >= shape.NumDimensions()) {
+    return false;
+  }
+  if (shape[axis] % spec.GetDeviceCount(axis) != 0) {
+    return false;
+  }
+  return true;
+}
+
+#endif
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/collective/sharding_spec.h b/onnxruntime/contrib_ops/cuda/collective/sharding_spec.h
new file mode 100644
index 0000000000000..5abc50a61c9a3
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/collective/sharding_spec.h
@@ -0,0 +1,492 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+#pragma once
+
+#include "core/common/common.h"
+#include "core/framework/tensor_shape.h"
+
+#include <iostream>
+#include <sstream>
+#include <vector>
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+#if defined(ORT_USE_NCCL)
+
+class DeviceMesh {
+ public:
+  // [Device Mesh and Tensor Sharding for Tensor Parallel]
+  // Device mesh is a tensor of device indices.
+  // A tensor can then be partitioned along specific mesh axes.
+  //
+  // Assume we have 4 GPUs indexed by 0, 1, 2, and 3.
+  // Let's consider some examples.
+  //  1. 1D device mesh [0, 1, 2, 3]. In this case,
+  //     device_mesh_shape is [4] and device_mesh_elements
+  //     is [0, 1, 2, 3].
+  //     If we want to shard a 2-D tensor along its axis 1, the
+  //     corresponding sharding spec is a string "RS[0]".
+  //  2. 2D device mesh [[0, 1], [2, 3]]. In this case,
+  //     device_mesh_shape is [2, 2] and device_mesh_elements
+  //     is [0, 1, 2, 3].
+  //     If we want to shard a 2-D tensor's
+  //     rows along mesh axis 1 and
+  //     columns along mesh axis 0, the
+  //     corresponding sharding spec is a string "S[1]S[0]".
+  //     If that 2-D tensor's value is np.array([[5, 6], [7, 8]]),
+  //     GPU 0/1/2/3 owns 5/7/6/8.  Below is a visualization the sharding
+  //     proccess.
+  //     - Start with a 2-D device mesh [[0, 1], [2, 3]] and
+  //       a 2-D tensor [[5, 6], [7, 8]]
+  //       - GPU: [[0, 1], [2, 3]], Tensor: [[5, 6], [7, 8]]
+  //     - Split GPU mesh along axis 1 and tensor along
+  //       axis 0 for "S[1]" in "S[1]S[0]"
+  //       - GPU: [[0], [2]], Tensor: [[5, 6]]
+  //         GPU: [[1], [3]], Tensor: [[7, 8]]
+  //     - Split GPU mesh along axis 0 and tensor along
+  //       axis 1 for "S[0]" in "S[1]S[0]"
+  //       - GPU: [[0]], Tensor: [[5]]
+  //       - GPU: [[2]], Tensor: [[6]]
+  //       - GPU: [[1]], Tensor: [[7]]
+  //       - GPU: [[3]], Tensor: [[8]]
+
+  // Actual shape of device mesh represented by `device_mesh_elements`.
+  std::vector<int64_t> device_mesh_shape;
+
+  // Flattened device mesh.
+  std::vector<int64_t> device_mesh_elements;
+
+  // Helper to debug and generate error message; e.g.,
+  // "DeviceMesh{Shape: [2,2,], Elements: [0,1,2,3,]}".
+  std::string ToString() const {
+    std::ostringstream os;
+    os << "DeviceMesh{Shape: [";
+    for (const auto& shape : device_mesh_shape)
+      os << shape << ",";
+    os << "], Elements: [";
+    for (const auto& element : device_mesh_elements)
+      os << element << ",";
+    os << "]}";
+    return os.str();
+  }
+
+  // Call this in GDB to visualize the mesh.
+  void Print() const {
+    std::cout << ToString() << std::endl;
+  }
+
+  static DeviceMesh Create1D(std::vector<int64_t> device_mesh_elements, size_t repeats = 1) {
+    DeviceMesh device_mesh;
+    device_mesh.device_mesh_shape.push_back(device_mesh_elements.size() * repeats);
+    for (size_t i = 0; i < repeats; ++i) {
+      device_mesh.device_mesh_elements.insert(
+          device_mesh.device_mesh_elements.end(),
+          device_mesh_elements.begin(),
+          device_mesh_elements.end());
+    }
+    return device_mesh;
+  }
+
+  // If the two meshes have the same shape and elements, return true.
+  // Otherwise, return false.
+  bool operator==(const DeviceMesh& other) const {
+    if (device_mesh_shape.size() != other.device_mesh_shape.size() ||
+        device_mesh_elements.size() != other.device_mesh_elements.size()) {
+      return false;
+    }
+
+    for (size_t i = 0; i < device_mesh_elements.size(); ++i) {
+      if (device_mesh_elements.at(i) != other.device_mesh_elements.at(i)) {
+        return false;
+      }
+    }
+    for (size_t i = 0; i < device_mesh_shape.size(); ++i) {
+      if (device_mesh_shape.at(i) != other.device_mesh_shape.at(i)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  bool operator!=(const DeviceMesh& other) const {
+    return !(*this == other);
+  }
+};
+
+class AxisPartitionSpec {
+  // [Device Mesh and Tensor Sharding for Tensor Parallel]
+  // This class is the in-memory representation of
+  //  1. if a tensor is sharded or not (aka replica), and
+  //  2. which tensor axis is shard by which device mesh axis.
+  // Let's consider sharding 2-D tensor along column axis on
+  // device mesh [0, 1] as an example.
+  // The required sharding spec RS[0] can be represented by
+  // - AxisPartitionSpec(Condition::Replica, -1)
+  // - AxisPartitionSpec(Condition::Shard, 0)
+ public:
+  // Status of a tensor axis.
+  // A tensor axis can be either sharded or replicated
+  // along a device mesh axis.
+  enum class Condition { Replica,
+                         Shard };
+
+  // This field tells if a tensor axis is sharded or not.
+  Condition cond;
+
+  // If a tensor axis is sharded, this field tells which device
+  // mesh axis to distribute the shards along.
+  // If a tensor axis is not sharded, this field is ignored.
+  int device_mesh_axis;
+
+  // A helper to construct a replica spec for a tensor axis.
+  static AxisPartitionSpec CreateReplica() {
+    return AxisPartitionSpec(Condition::Replica, -1);
+  }
+
+  // A helper to construct a sharding spec for a tensor axis.
+  // This tensor axis is sharded along `device_mesh_axis` in device mesh.
+  static AxisPartitionSpec CreateShard(int device_mesh_axis) {
+    return AxisPartitionSpec(Condition::Shard, device_mesh_axis);
+  }
+
+  static AxisPartitionSpec CreateCopy(const AxisPartitionSpec& spec) {
+    return AxisPartitionSpec(spec.cond, spec.device_mesh_axis);
+  }
+
+  // A normal ctor.
+  // TODO(wechi): Consider to hide it and revise the `public` members/functions
+  // exposed to the user.
+  AxisPartitionSpec(Condition cond_, int device_mesh_axis_) : cond(cond_), device_mesh_axis(device_mesh_axis_) {}
+
+  // Helper to debug and generate error message; e.g.,
+  // "RS[0]".
+  std::string ToString() const {
+    std::ostringstream os;
+    os << (cond == Condition::Replica ? "R" : "S");
+    if (cond == Condition::Shard) os << "[" << device_mesh_axis << "]";
+    return os.str();
+  }
+
+  // Call this in GDB to visualize the spec.
+  void Print() const {
+    std::cout << ToString() << std::endl;
+  }
+
+  bool operator==(const AxisPartitionSpec& other) const {
+    return cond == other.cond && device_mesh_axis == other.device_mesh_axis;
+  }
+
+  bool operator!=(const AxisPartitionSpec& other) const {
+    return !(*this == other);
+  }
+};
+
+// Return true if `axis` is a valid axis index for a tensor of rank `rank`.
+// Negative `axis` is allowed (e.g., -1 for the last axis).
+void ValidateAxisIndex(const int64_t axis, const int64_t rank);
+
+class TensorPartitionSpec {
+  // [Device Mesh and Tensor Sharding for Tensor Parallel]
+  // TensorPartitionSpec holds a collection of AxisPartitionSpec and an
+  // associated DeviceMesh. It is responsible for determining how a tensor
+  // should be partitioned across a device mesh.
+  //
+  // Example 1: RS[0]
+  // In this scenario, `axis_specs` would contain two `AxisPartitionSpec` objects.
+  // - The first object is a Replica, denoting that the first axis of the tensor is
+  //   not sharded but is instead replicated.
+  // - The second object is a Shard along the 0-th axis of the device mesh. It denotes
+  //   that the second axis of the tensor is sharded along the first axis of the
+  //   device mesh.
+  //
+  // Example 2: S[0]RR
+  // In this scenario, `axis_specs` would contain three `AxisPartitionSpec` objects.
+  // - The first object is a Shard along the 0-th axis of the device mesh, indicating
+  //   that the first axis of the tensor is sharded along the first axis of the
+  //   device mesh.
+  // - The second and third objects are Replicas, indicating that the second and third
+  //   axes of the tensor are not sharded but are instead replicated.
+ public:
+  // axis_specs[i]: AxisPartitionSpec for tensor axis i. For a 2-D tensor,
+  //                axis_specs[0] is for row axis and axis_specs[1] is for
+  //                column axis. axis_specs[i].device_mesh_axis = j means that
+  //                tensor axis i is sharded along device mesh axis j.
+  std::vector<AxisPartitionSpec> axis_specs;
+
+  // device_mesh: DeviceMesh for sharding the associated tensor.
+  // Read [Device Mesh and Tensor Sharding for Tensor Parallel] in DeviceMesh's comment.
+  DeviceMesh device_mesh;
+
+  // Replacement of ctor.
+  static TensorPartitionSpec Create(
+      const std::vector<AxisPartitionSpec>& axis_specs, const DeviceMesh& device_mesh) {
+    TensorPartitionSpec spec;
+    spec.axis_specs = axis_specs;
+    spec.device_mesh = device_mesh;
+    return spec;
+  }
+
+  // Copy-construct `spec` but with all tensor axes replicated.
+  // The new spec have the same number of axis specs and the same device mesh.
+  static TensorPartitionSpec CreateAllReplica(
+      const TensorPartitionSpec& spec) {
+    TensorPartitionSpec new_spec = spec;
+    new_spec.axis_specs[spec.GetPartitionAxis()] = AxisPartitionSpec::CreateReplica();
+    return new_spec;
+  }
+
+  // TODO(wechi): Create a halper to copy-construct a new spec with different sharding axis.
+  // static TensorPartitionSpec CreateReshard(
+  //     const TensorPartitionSpec& spec, int64_t new_shard_axis) {
+  // }
+
+  // Copy-construct `spec` but with all tensor axes replicated.
+  // The new spec have the same number of axis specs and the same device mesh.
+  static TensorPartitionSpec CreateAllReplica(
+      const size_t rank, const DeviceMesh& device_mesh) {
+    std::vector<AxisPartitionSpec> axis_specs(rank, AxisPartitionSpec::CreateReplica());
+    return TensorPartitionSpec::Create(axis_specs, device_mesh);
+  }
+
+  static TensorPartitionSpec CreateOneTensorAxisOneDeviceMeshAxisSharding(
+      const size_t rank, const DeviceMesh& device_mesh, const size_t tensor_axis, const size_t device_mesh_axis) {
+    std::vector<AxisPartitionSpec> axis_specs(rank, AxisPartitionSpec::CreateReplica());
+    axis_specs[tensor_axis] = AxisPartitionSpec::CreateShard(device_mesh_axis);
+    return TensorPartitionSpec::Create(axis_specs, device_mesh);
+  }
+
+  static TensorPartitionSpec CreateByDropAxes(
+      const TensorPartitionSpec& spec, const std::vector<int64_t>& axes_to_drop) {
+    std::vector<AxisPartitionSpec> axis_specs;
+    for (size_t i = 0; i < spec.axis_specs.size(); ++i) {
+      if (std::find(axes_to_drop.begin(), axes_to_drop.end(), i) != axes_to_drop.end()) {
+        // This axis, i, is in axes_to_drop. Let's not copy its spec.
+        continue;
+      }
+      axis_specs.push_back(spec.axis_specs[i]);
+    }
+    return TensorPartitionSpec::Create(axis_specs, spec.device_mesh);
+  }
+
+  static TensorPartitionSpec CreateByInsertOneAxis(
+      const TensorPartitionSpec& spec,
+      const size_t axis_to_insert) {
+    std::vector<AxisPartitionSpec> axis_specs(spec.axis_specs);
+    axis_specs.insert(axis_specs.begin() + axis_to_insert, AxisPartitionSpec::CreateReplica());
+    return TensorPartitionSpec::Create(axis_specs, spec.device_mesh);
+  }
+
+  // Helper to debug and generate error message; e.g.,
+  // "TensorPartitionSpec{RS[0], Device Mesh: DeviceMesh{Shape: [4,], Elements: [0,1,2,3,]}}".
+  std::string ToString() const {
+    std::ostringstream os;
+    os << "TensorPartitionSpec{";
+    for (const auto& spec : axis_specs)
+      os << spec.ToString();
+    os << ", DeviceMesh: " << device_mesh.ToString() << "}";
+    return os.str();
+  }
+
+  // Call this in GDB to visualize the spec.
+  void Print() const {
+    std::cout << ToString() << std::endl;
+  }
+
+  // Return true if at least one tensor axis is sharded.
+  // Otherwise, return false.
+  bool HasShard() const {
+    for (const auto& spec : axis_specs)
+      if (spec.cond == AxisPartitionSpec::Condition::Shard) return true;
+    return false;
+  }
+
+  // Return true if no tensor axis is sharded.
+  // Otherwise, return false.
+  bool HasNoShard() const {
+    return !HasShard();
+  }
+
+  // Return true if the only sharded tensor axis is `axis`.
+  // Otherwise, return false.
+  bool OnlyShardAxis(int64_t axis) const {
+    ValidateAxisIndex(axis, Rank());
+    if (axis < 0) {
+      axis += Rank();
+    }
+    bool answer = true;
+    for (int64_t i = 0; i < Rank(); ++i) {
+      if (i == axis && axis_specs[i].cond != AxisPartitionSpec::Condition::Shard) {
+        answer = false;
+      } else if (i != axis && axis_specs[i].cond == AxisPartitionSpec::Condition::Shard) {
+        answer = false;
+      }
+    }
+    return answer;
+  }
+
+  // Rank of the owing tensor of this spec.
+  int64_t Rank() const {
+    return gsl::narrow<int64_t>(axis_specs.size());
+  }
+
+  // Return the number of sharded tensor axes.
+  // Currently we only support one sharded tensor axis, so
+  // we may assert the returned value is 1 in related APIs.
+  int64_t CountShardingAxes() const {
+    int64_t count = 0;
+    for (const auto& spec : axis_specs)
+      if (spec.cond == AxisPartitionSpec::Condition::Shard) count++;
+    return count;
+  }
+
+  // Return the AxisPartitionSpec for `axis`-th tensor axis.
+  const AxisPartitionSpec& GetAxisSpec(int64_t axis) const {
+    ValidateAxisIndex(axis, Rank());
+    if (axis < 0) {
+      axis += Rank();
+    }
+    return axis_specs.at(axis);
+  }
+
+  // Get the first sharded tensor axis' sharding spec.
+  const AxisPartitionSpec& GetPartitionAxisSpec() const {
+    // TODO: support multiple sharding axes.
+    ORT_ENFORCE(CountShardingAxes() == 1, "TensorPartitionSpec must have exactly one sharding axis.");
+    return GetAxisSpec(GetPartitionAxis());
+  }
+
+  // Get the first sharded tensor axis' index.
+  // E.g., spec "RS[0]" should return 1, spec "S[0]R" should return 0, spec "RR" should return -1.
+  // Returned value -1 means no sharded tensor axis.
+  int64_t GetPartitionAxis() const {
+    // TODO: support multiple sharding axes.
+    ORT_ENFORCE(CountShardingAxes() == 1, "TensorPartitionSpec must have exactly one sharding axis.");
+    for (int64_t i = 0; i < gsl::narrow<int64_t>(axis_specs.size()); ++i) {
+      if (axis_specs[i].cond == AxisPartitionSpec::Condition::Shard) {
+        return i;
+      }
+    }
+    return -1;
+  }
+
+  // Similarly to GetPartitionAxis(), but returns the negative index of the first sharded tensor axis.
+  // E.g., spec "RS[0]" should return -1, spec "S[0]R" should return -2, and spec "RR" should return 0.
+  // Returned value 0 means no sharded tensor axis.
+  int64_t GetNegativePartitionAxis() const {
+    // TODO: support multiple sharding axes.
+    ORT_ENFORCE(CountShardingAxes() == 1, "TensorPartitionSpec must have exactly one sharding axis.");
+    for (int64_t i = 0; i < gsl::narrow<int64_t>(axis_specs.size()); ++i) {
+      if (axis_specs[i].cond == AxisPartitionSpec::Condition::Shard) {
+        return i - axis_specs.size();
+      }
+    }
+    return 0;
+  }
+
+  // Return the number of shards along the first sharded tensor axis.
+  // This value matches the number of devices along the associated mesh axis.
+  // Return 1 if there is no sharding.
+  int64_t GetDeviceCount(int64_t axis) const {
+    ValidateAxisIndex(axis, Rank());
+    auto axis_spec = GetAxisSpec(axis);
+    if (axis_spec.cond == AxisPartitionSpec::Condition::Replica) {
+      return 1;
+    } else {
+      return device_mesh.device_mesh_shape.at(axis_spec.device_mesh_axis);
+    }
+  }
+
+  // Similar to GetDeviceCount(), but returns the number of unique devices
+  // along the first sharded tensor axis.
+  int64_t GetUniqueDeviceCount(int64_t axis) const {
+    ValidateAxisIndex(axis, Rank());
+    auto axis_spec = GetAxisSpec(axis);
+    if (axis_spec.cond == AxisPartitionSpec::Condition::Replica) {
+      return 1;
+    } else {
+      std::set<int64_t> device_ids(
+          device_mesh.device_mesh_elements.begin(),
+          device_mesh.device_mesh_elements.end());
+      return device_ids.size();
+    }
+  }
+
+  bool operator==(const TensorPartitionSpec& other) const {
+    if (axis_specs.size() != other.axis_specs.size()) {
+      return false;
+    }
+    for (size_t i = 0; i < axis_specs.size(); ++i) {
+      if (!(axis_specs.at(i) == other.axis_specs.at(i))) {
+        return false;
+      }
+    }
+    return device_mesh == other.device_mesh;
+  }
+
+  bool operator!=(const TensorPartitionSpec& other) const {
+    return !(*this == other);
+  }
+};
+
+// Parse "[0, 1, 2, 3]" as std::vector<int64_t>{0, 1, 2, 3}.
+std::vector<int64_t> ParseStringAsInt64Vector(const std::string& str);
+
+DeviceMesh CreateDeviceMesh(
+    std::vector<int64_t> device_mesh_shape,
+    std::vector<int64_t> device_mesh_elements);
+
+TensorPartitionSpec CreateTensorPartitionSpec(
+    std::string spec_string,
+    std::vector<int64_t> device_mesh_shape,
+    std::vector<int64_t> device_mesh_elements);
+
+TensorPartitionSpec CreateTensorShardSpec(
+    const DeviceMesh& device_mesh,
+    int64_t device_mesh_axis,
+    int64_t shard_axis,
+    int64_t tensor_rank);
+
+// Return the shape of the original tensor before sharding.
+// E.g., assume tensor shard's shape is [5, 7] and sharding spec is "S[0]R"
+// with 1-D device mesh [0, 1, 2].
+// This function returns [15, 7].
+//
+// `shard_shape`: the shape of a shard.
+// `spec`: the sharding spec of the original tensor.
+TensorShape ComputeOriginShape(const TensorShape& shard_shape, const TensorPartitionSpec& spec);
+
+// Return the shape of a shard.
+// E.g., assume tensor's shape is [15, 7] and sharding spec is "S[0]R"
+// with 1-D device mesh [0, 1, 2].
+// This function returns [5, 7].
+//
+// `shape`: the shape of the original tensor.
+// `spec`: the sharding spec of the original tensor.
+TensorShape ComputeShardShape(const TensorShape& shape, const TensorPartitionSpec& spec);
+
+// Similarly to ComputeShardShape(), but takes a shard axis and counts of all tensor shards
+// instead of a spec.
+TensorShape ComputeShardShape(const TensorShape source_shape, int64_t shard_axis, int64_t shard_count);
+
+// Prepend 1's to `shape` to make `left` and `right` have the same rank.
+// E.g., if `left` is [3, 7] and `right` is [5, 6, 7], this function returns [1, 3, 7] and [5, 6, 7].
+std::tuple<TensorShape, TensorShape> NormalizeShapes(const TensorShape& left, const TensorShape& right);
+
+// Prepend `R` (aks replicating axis) to `spec` to make `left` and `right` have the same rank.
+// E.g., if `left` is S[0]R and `right` is `RRR`, this function returns `RS[0]R` and `RRR`.
+std::tuple<TensorPartitionSpec, TensorPartitionSpec> NormalizeTensorPartitionSpecs(
+    const TensorPartitionSpec& left, const TensorPartitionSpec& right);
+
+// Return true if `shape` can be sharded according to `spec`.
+// Otherwise, return false.
+// Note that an axis is shardable along a device mesh axis only if
+// the dimension of the axis is divisible by the number of devices along the device mesh axis.
+bool CanShard(const TensorShape& shape, const TensorPartitionSpec& spec);
+
+#endif
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/conv_transpose_with_dynamic_pads.h b/onnxruntime/contrib_ops/cuda/conv_transpose_with_dynamic_pads.h
index 6f7a04d059034..a768b2a7d8a24 100644
--- a/onnxruntime/contrib_ops/cuda/conv_transpose_with_dynamic_pads.h
+++ b/onnxruntime/contrib_ops/cuda/conv_transpose_with_dynamic_pads.h
@@ -1,4 +1,5 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
 // Licensed under the MIT License.
 
 #pragma once
@@ -10,12 +11,12 @@ namespace contrib {
 namespace cuda {
 
 template <typename T>
-class ConvTransposeWithDynamicPads : public ::onnxruntime::cuda::ConvTranspose<T> {
+class ConvTransposeWithDynamicPads : public ::onnxruntime::cuda::ConvTranspose<T, false> {
  public:
-  ConvTransposeWithDynamicPads(const OpKernelInfo& info) : ::onnxruntime::cuda::ConvTranspose<T>(info) {}
+  ConvTransposeWithDynamicPads(const OpKernelInfo& info) : ::onnxruntime::cuda::ConvTranspose<T, false>(info) {}
 
   Status ComputeInternal(OpKernelContext* context) const override {
-    return ::onnxruntime::cuda::ConvTranspose<T>::DoConvTranspose(context, true);
+    return ::onnxruntime::cuda::ConvTranspose<T, false>::DoConvTranspose(context, true);
   }
 };
 }  // namespace cuda
diff --git a/onnxruntime/contrib_ops/cuda/cuda_contrib_kernels.cc b/onnxruntime/contrib_ops/cuda/cuda_contrib_kernels.cc
index 86c1cb93e8b6f..108eea1a73fe9 100644
--- a/onnxruntime/contrib_ops/cuda/cuda_contrib_kernels.cc
+++ b/onnxruntime/contrib_ops/cuda/cuda_contrib_kernels.cc
@@ -65,12 +65,16 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, PackedMultiHeadAttention);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, PackedMultiHeadAttention);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, BeamSearch);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, WhisperBeamSearch);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, ConvTransposeWithDynamicPads);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, float, Crop);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, double, Crop);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, MLFloat16, Crop);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, MoE);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, MoE);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, MultiHeadAttention);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, MultiHeadAttention);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, GroupQueryAttention);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, DecoderAttention);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, DecoderAttention);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, int32_t, DynamicSlice);
@@ -89,10 +93,13 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, float, ParametricSoftplus);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, double, ParametricSoftplus);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, MLFloat16, ParametricSoftplus);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, RotaryEmbedding);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, RotaryEmbedding);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, Sampling);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, float, ScaledTanh);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, double, ScaledTanh);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, MLFloat16, ScaledTanh);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, SkipGroupNorm);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, SkipLayerNormalization);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, SkipLayerNormalization);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, SkipSimplifiedLayerNormalization);
@@ -112,7 +119,14 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain,
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, float_float_MLFloat16, SimplifiedLayerNormalization);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, MLFloat16_float_float, SimplifiedLayerNormalization);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, Inverse);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, MatMulNBits);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, MatMulNBits);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, BFloat16, MatMulBnb4);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, MatMulBnb4);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, MatMulBnb4);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, Trilu);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, UnfoldTensor);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, DynamicTimeWarping);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, int8_t_MLFloat16, QuantizeLinear);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, uint8_t_MLFloat16, QuantizeLinear);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, int8_t_MLFloat16, DequantizeLinear);
@@ -134,6 +148,7 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, DecoderMaskedSelfAttention);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, DecoderMaskedMultiHeadAttention);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, DecoderMaskedMultiHeadAttention);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, GemmFloat8);
 
 #ifdef ENABLE_ATEN
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kPytorchAtenDomain, 1, ATen);
@@ -145,10 +160,41 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kPytorchAtenDomain
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, ShrunkenGather);
 #endif
 
-#if defined(USE_MPI) && defined(ORT_USE_NCCL)
+#if defined(ORT_USE_NCCL)
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, AllReduce);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, AllGather);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, AllToAll);
+
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, DistributedMatMul);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, DistributedMatMul);
+
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, DistributedSlice);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, DistributedSlice);
+
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, int64_t, DistributedReshape);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, DistributedReshape);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, DistributedReshape);
+
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, int64_t, DistributedExpand);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, DistributedExpand);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, DistributedExpand);
+
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, DistributedReduceSum);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, DistributedReduceSum);
+
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, DistributedReduceMax);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, DistributedReduceMax);
+
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, DistributedReduceMean);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, DistributedReduceMean);
+
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, int64_t, DistributedUnsqueeze);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, DistributedUnsqueeze);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, DistributedUnsqueeze);
+
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, int64_t, DistributedSqueeze);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, DistributedSqueeze);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, DistributedSqueeze);
 #endif
 
 template <>
@@ -212,12 +258,16 @@ Status RegisterCudaContribKernels(KernelRegistry& kernel_registry) {
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, PackedMultiHeadAttention)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, PackedMultiHeadAttention)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, BeamSearch)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, WhisperBeamSearch)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, ConvTransposeWithDynamicPads)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, float, Crop)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, double, Crop)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, MLFloat16, Crop)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, MoE)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, MoE)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, MultiHeadAttention)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, MultiHeadAttention)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, GroupQueryAttention)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, DecoderAttention)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, DecoderAttention)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, int32_t, DynamicSlice)>,
@@ -236,10 +286,13 @@ Status RegisterCudaContribKernels(KernelRegistry& kernel_registry) {
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, float, ParametricSoftplus)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, double, ParametricSoftplus)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, MLFloat16, ParametricSoftplus)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, RotaryEmbedding)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, RotaryEmbedding)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, Sampling)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, float, ScaledTanh)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, double, ScaledTanh)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, MLFloat16, ScaledTanh)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, SkipGroupNorm)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, SkipLayerNormalization)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, SkipLayerNormalization)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, SkipSimplifiedLayerNormalization)>,
@@ -259,6 +312,11 @@ Status RegisterCudaContribKernels(KernelRegistry& kernel_registry) {
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, float_float_MLFloat16, SimplifiedLayerNormalization)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, MLFloat16_float_float, SimplifiedLayerNormalization)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, Inverse)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, MatMulNBits)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, MatMulNBits)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, BFloat16, MatMulBnb4)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, MatMulBnb4)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, MatMulBnb4)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, BiasSoftmax)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, BiasDropout)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, BitmaskDropout)>,
@@ -270,6 +328,8 @@ Status RegisterCudaContribKernels(KernelRegistry& kernel_registry) {
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, uint8_t_MLFloat16, DequantizeLinear)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float_int8_t, QAttention)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16_int8_t, QAttention)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, UnfoldTensor)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, DynamicTimeWarping)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, Trilu)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, BFloat16, FastGelu)>,
     // TransposedMatMul is still here for backward compatibility
@@ -287,6 +347,7 @@ Status RegisterCudaContribKernels(KernelRegistry& kernel_registry) {
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, DecoderMaskedSelfAttention)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, DecoderMaskedMultiHeadAttention)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, DecoderMaskedMultiHeadAttention)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, GemmFloat8)>,
 
 #ifdef ENABLE_ATEN
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kPytorchAtenDomain, 1, ATen)>,
@@ -298,10 +359,41 @@ Status RegisterCudaContribKernels(KernelRegistry& kernel_registry) {
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, ShrunkenGather)>,
 #endif
 
-#if defined(USE_MPI) && defined(ORT_USE_NCCL)
+#if defined(ORT_USE_NCCL)
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, AllReduce)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, AllGather)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, AllToAll)>,
+
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, DistributedMatMul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, DistributedMatMul)>,
+
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, DistributedSlice)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, DistributedSlice)>,
+
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, int64_t, DistributedReshape)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, DistributedReshape)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, DistributedReshape)>,
+
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, int64_t, DistributedExpand)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, DistributedExpand)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, DistributedExpand)>,
+
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, DistributedReduceSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, DistributedReduceSum)>,
+
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, DistributedReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, DistributedReduceMax)>,
+
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, DistributedReduceMean)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, DistributedReduceMean)>,
+
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, int64_t, DistributedUnsqueeze)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, DistributedUnsqueeze)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, DistributedUnsqueeze)>,
+
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, int64_t, DistributedSqueeze)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, DistributedSqueeze)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, DistributedSqueeze)>,
 #endif
 
   };
diff --git a/onnxruntime/contrib_ops/cuda/diffusion/bias_add.cc b/onnxruntime/contrib_ops/cuda/diffusion/bias_add.cc
index a38dfd34cc977..274bc9a730d87 100644
--- a/onnxruntime/contrib_ops/cuda/diffusion/bias_add.cc
+++ b/onnxruntime/contrib_ops/cuda/diffusion/bias_add.cc
@@ -44,11 +44,6 @@ Status BiasAdd<T>::ComputeInternal(OpKernelContext* context) const {
                            "The input is expected to have 3 dimensions, got ", input_dims.size());
   }
 
-  if (input_dims[2] != 320 && input_dims[2] != 640 && input_dims[2] != 1280) {
-    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
-                           "Number of channels should be 320, 640 or 1280, got ", input_dims[2]);
-  }
-
   const Tensor* bias = context->Input<Tensor>(1);
   const auto& bias_dims = bias->Shape().GetDims();
   if (bias_dims.size() != 1) {
diff --git a/onnxruntime/contrib_ops/cuda/diffusion/bias_add_impl.cu b/onnxruntime/contrib_ops/cuda/diffusion/bias_add_impl.cu
index 2983cc99e30b1..8e8068b5e56ca 100644
--- a/onnxruntime/contrib_ops/cuda/diffusion/bias_add_impl.cu
+++ b/onnxruntime/contrib_ops/cuda/diffusion/bias_add_impl.cu
@@ -42,6 +42,17 @@ __global__ void BiasAddKernel(T const* input, T const* bias, T const* residual,
   }
 }
 
+template <typename T, unsigned TPB>
+__global__ void BiasAddLargeKernel(
+    int32_t const ld, const T* input, const T* bias, const T* residual, T* output) {
+  int32_t const offset = blockIdx.x * ld;
+
+  for (int32_t i = threadIdx.x; i < ld; i += TPB) {
+    int32_t const base_offset = offset + i;
+    output[base_offset] = input[base_offset] + bias[i] + residual[base_offset];
+  }
+}
+
 template __global__ void BiasAddKernel<float, 320, 320>(float const*, float const*, float const*, float*);
 template __global__ void BiasAddKernel<float, 640, 320>(float const*, float const*, float const*, float*);
 template __global__ void BiasAddKernel<float, 1280, 320>(float const*, float const*, float const*, float*);
@@ -52,19 +63,19 @@ template __global__ void BiasAddKernel<half, 1280, 320>(half const*, half const*
 template <typename T>
 void LaunchBiasAddKernel(cudaStream_t stream, int32_t grid_size, int32_t num_channels,
                          T const* input, T const* bias, T const* residual, T* output) {
-  constexpr int32_t TPB = 320;  // thread per block
   switch (num_channels) {
     case 320:
-      (BiasAddKernel<T, 320, TPB>)<<<grid_size, TPB, 0, stream>>>(input, bias, residual, output);
+      (BiasAddKernel<T, 320, 320>)<<<grid_size, 320, 0, stream>>>(input, bias, residual, output);
       break;
     case 640:
-      (BiasAddKernel<T, 640, TPB>)<<<grid_size, TPB, 0, stream>>>(input, bias, residual, output);
+      (BiasAddKernel<T, 640, 320>)<<<grid_size, 320, 0, stream>>>(input, bias, residual, output);
       break;
     case 1280:
-      (BiasAddKernel<T, 1280, TPB>)<<<grid_size, TPB, 0, stream>>>(input, bias, residual, output);
+      (BiasAddKernel<T, 1280, 320>)<<<grid_size, 320, 0, stream>>>(input, bias, residual, output);
       break;
     default:
-      ORT_NOT_IMPLEMENTED("Not implemented");
+      BiasAddLargeKernel<T, 256><<<grid_size, 256, 0, stream>>>(num_channels, input, bias, residual, output);
+      break;
   }
 }
 
diff --git a/onnxruntime/contrib_ops/cuda/diffusion/bias_split_gelu.cc b/onnxruntime/contrib_ops/cuda/diffusion/bias_split_gelu.cc
index 2b13cdbd803ef..cb02bd8541623 100644
--- a/onnxruntime/contrib_ops/cuda/diffusion/bias_split_gelu.cc
+++ b/onnxruntime/contrib_ops/cuda/diffusion/bias_split_gelu.cc
@@ -39,9 +39,13 @@ Status BiasSplitGelu<T>::ComputeInternal(OpKernelContext* context) const {
                            "input is expected to have 3 dimensions, got ", input_dims.size());
   }
 
-  if (input_dims[2] != 2560 && input_dims[2] != 5120 && input_dims[2] != 10240) {
+  if (input_dims[2] != 2560 &&
+      input_dims[2] != 5120 &&
+      input_dims[2] != 6144 &&
+      input_dims[2] != 10240 &&
+      input_dims[2] != 12288) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
-                           "hidden size should be 2560, 5120 or 10240, got ", input_dims[2]);
+                           "hidden size should be 2560, 5120, 6144, 10240 or 12288, got ", input_dims[2]);
   }
 
   const Tensor* bias = context->Input<Tensor>(1);
diff --git a/onnxruntime/contrib_ops/cuda/diffusion/bias_split_gelu_impl.cu b/onnxruntime/contrib_ops/cuda/diffusion/bias_split_gelu_impl.cu
index 19e05a9573f7c..3ae9611d4dfad 100644
--- a/onnxruntime/contrib_ops/cuda/diffusion/bias_split_gelu_impl.cu
+++ b/onnxruntime/contrib_ops/cuda/diffusion/bias_split_gelu_impl.cu
@@ -65,6 +65,12 @@ void LaunchBiasSplitGeluKernel(cudaStream_t stream, int32_t grid_size, int32_t h
     case 5120:
       (biasSplitGeluKernel<T, 5120, TPB>)<<<grid_size, TPB, 0, stream>>>(input, bias, output);
       break;
+    case 3072:
+      (biasSplitGeluKernel<T, 3072, TPB>)<<<grid_size, TPB, 0, stream>>>(input, bias, output);
+      break;
+    case 6144:
+      (biasSplitGeluKernel<T, 6144, TPB>)<<<grid_size, TPB, 0, stream>>>(input, bias, output);
+      break;
     default:
       ORT_NOT_IMPLEMENTED("Not implemented");
   }
@@ -73,9 +79,13 @@ void LaunchBiasSplitGeluKernel(cudaStream_t stream, int32_t grid_size, int32_t h
 template __global__ void biasSplitGeluKernel<float, 1280, 256>(float const*, float const*, float*);
 template __global__ void biasSplitGeluKernel<float, 2560, 256>(float const*, float const*, float*);
 template __global__ void biasSplitGeluKernel<float, 5120, 256>(float const*, float const*, float*);
+template __global__ void biasSplitGeluKernel<float, 3072, 256>(float const*, float const*, float*);
+template __global__ void biasSplitGeluKernel<float, 6144, 256>(float const*, float const*, float*);
 template __global__ void biasSplitGeluKernel<half, 1280, 256>(half const*, half const*, half*);
 template __global__ void biasSplitGeluKernel<half, 2560, 256>(half const*, half const*, half*);
 template __global__ void biasSplitGeluKernel<half, 5120, 256>(half const*, half const*, half*);
+template __global__ void biasSplitGeluKernel<half, 3072, 256>(half const*, half const*, half*);
+template __global__ void biasSplitGeluKernel<half, 6144, 256>(half const*, half const*, half*);
 
 template void LaunchBiasSplitGeluKernel<float>(cudaStream_t stream, int32_t grid_size, int32_t half_hidden_size,
                                                float const* input, float const* bias, float* output);
diff --git a/onnxruntime/contrib_ops/cuda/diffusion/group_norm.cc b/onnxruntime/contrib_ops/cuda/diffusion/group_norm.cc
index 301b2e76b1b2d..87e88ac31c998 100644
--- a/onnxruntime/contrib_ops/cuda/diffusion/group_norm.cc
+++ b/onnxruntime/contrib_ops/cuda/diffusion/group_norm.cc
@@ -1,6 +1,5 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
-
 #include "core/providers/cuda/cuda_common.h"
 #include "contrib_ops/cuda/diffusion/group_norm.h"
 #include "contrib_ops/cuda/diffusion/group_norm_impl.h"
@@ -15,14 +14,22 @@ ONNX_OPERATOR_KERNEL_EX(
     GroupNorm, kMSDomain, 1, kCudaExecutionProvider,
     (*KernelDefBuilder::Create()).TypeConstraint("T", BuildKernelDefConstraints<GROUP_NORM_TYPES>()), GroupNorm);
 
+ONNX_OPERATOR_KERNEL_EX(
+    SkipGroupNorm, kMSDomain, 1, kCudaExecutionProvider,
+    (*KernelDefBuilder::Create()).TypeConstraint("T", BuildKernelDefConstraints<GROUP_NORM_TYPES>()), GroupNorm);
+
 using namespace ONNX_NAMESPACE;
 
 namespace {
+
 template <typename T>
 struct DispatchGroupNorm {
   Status operator()(cudaStream_t stream,
                     Tensor* output,
+                    Tensor* add_out,
                     const Tensor* input,
+                    const Tensor* skip,
+                    const Tensor* bias,
                     const Tensor* gamma,
                     const Tensor* beta,
                     void* workspace,
@@ -32,12 +39,17 @@ struct DispatchGroupNorm {
                     int height,
                     int width,
                     int num_groups,
-                    bool use_swish_activation) {
+                    bool use_swish_activation,
+                    bool broadcast_skip,
+                    int channels_per_block) {
     typedef typename ToCudaType<T>::MappedType CudaT;
     return LaunchGroupNormKernel<CudaT>(
         stream,
         reinterpret_cast<CudaT*>(output->MutableData<T>()),
+        add_out == nullptr ? nullptr : reinterpret_cast<CudaT*>(add_out->MutableData<T>()),
         reinterpret_cast<const CudaT*>(input->Data<T>()),
+        skip == nullptr ? nullptr : reinterpret_cast<const CudaT*>(skip->Data<T>()),
+        bias == nullptr ? nullptr : reinterpret_cast<const CudaT*>(bias->Data<T>()),
         gamma->Data<float>(),
         beta->Data<float>(),
         workspace,
@@ -47,13 +59,21 @@ struct DispatchGroupNorm {
         height,
         width,
         num_groups,
-        use_swish_activation);
+        use_swish_activation,
+        broadcast_skip,
+        channels_per_block);
   }
 };
 
 }  // namespace
 
 GroupNorm::GroupNorm(const OpKernelInfo& op_info) : CudaKernel(op_info) {
+  has_skip_ = false;
+  const std::string& op_name = op_info.GetKernelDef().OpName();
+  if (op_name == "SkipGroupNorm") {
+    has_skip_ = true;
+  }
+
   epsilon_ = op_info.GetAttrOrDefault<float>("epsilon", 1e-5f);
   ORT_ENFORCE(epsilon_ >= 0);
 
@@ -68,6 +88,23 @@ GroupNorm::GroupNorm(const OpKernelInfo& op_info) : CudaKernel(op_info) {
   use_swish_activation_ = (activation == 1);
 
   channels_last_ = (op_info.GetAttrOrDefault<int64_t>("channels_last", static_cast<int64_t>(1)) != 0);
+
+  channels_per_block_ = 0;
+}
+
+Status GroupNorm::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr /*alloc*/,
+                          bool& is_packed, PrePackedWeights* /*prepacked_weights*/) {
+  is_packed = false;
+
+  // Compute and cache cPerBlock using number of channels from gamma tensor shape.
+  if (input_idx == 1) {
+    auto gamma_shape = tensor.Shape();
+    if (gamma_shape.NumDimensions() == 1) {
+      channels_per_block_ = GetChannelsPerBlock(static_cast<int>(gamma_shape[0]), num_groups_);
+    }
+  }
+
+  return Status::OK();
 }
 
 Status GroupNorm::ComputeInternal(OpKernelContext* context) const {
@@ -77,22 +114,38 @@ Status GroupNorm::ComputeInternal(OpKernelContext* context) const {
   Tensor* output = context->Output(0, input->Shape());
 
   if (!channels_last_) {
-    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+    return ORT_MAKE_STATUS(ONNXRUNTIME, NOT_IMPLEMENTED,
                            "only the channels_last layout is supported");
   }
 
+  if (!gamma->IsDataType<float>() || !beta->IsDataType<float>()) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, NOT_IMPLEMENTED,
+                           "GroupNorm only supports gamma and beta in float type");
+  }
+
   const auto& input_dims = input->Shape().GetDims();
   if (input_dims.size() != 4) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                            "input is expected to have 4 dimensions, got ", input_dims.size());
   }
 
+  // Only support NHWC format right now.
+  int batch_size = static_cast<int>(input_dims[0]);
+  int height = static_cast<int>(input_dims[1]);
+  int width = static_cast<int>(input_dims[2]);
+  int num_channels = static_cast<int>(input_dims[3]);
+
+  if (num_channels % num_groups_ != 0) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                           "number of channels should be divisiable by num_groups");
+  }
+
   const auto& gamma_dims = gamma->Shape().GetDims();
   if (gamma_dims.size() != 1) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                            "gamma is expected to have 1 dimension, got ", gamma_dims.size());
   }
-  if (gamma_dims[0] != input_dims[3]) {
+  if (gamma_dims[0] != num_channels) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                            "Number of channels in gamma and input does not match");
   }
@@ -102,22 +155,11 @@ Status GroupNorm::ComputeInternal(OpKernelContext* context) const {
     return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                            "beta is expected to have 1 dimension, got ", beta_dims.size());
   }
-  if (beta_dims[0] != input_dims[3]) {
+  if (beta_dims[0] != num_channels) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                            "Number of channels in beta and input does not match");
   }
 
-  // Input and output format is NHWC
-  int batch_size = static_cast<int>(input_dims[0]);
-  int num_channels = static_cast<int>(input_dims[3]);
-  int height = static_cast<int>(input_dims[1]);
-  int width = static_cast<int>(input_dims[2]);
-
-  if (num_channels % num_groups_ != 0) {
-    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
-                           "number of channels should be divisiable by num_groups");
-  }
-
   if (context->GetUseDeterministicCompute()) {
     static std::once_flag log_warning;
     std::call_once(log_warning, []() {
@@ -125,17 +167,59 @@ Status GroupNorm::ComputeInternal(OpKernelContext* context) const {
     });
   }
 
-  auto workspace = GetScratchBuffer<void>(GetGroupNormWorkspaceSizeInBytes(), context->GetComputeStream());
+  const Tensor* skip = nullptr;
+  const Tensor* bias = nullptr;
+  Tensor* add_out = nullptr;
+
+  bool broadcast_skip = false;
+  if (has_skip_) {
+    skip = context->Input<Tensor>(3);
+    bias = context->Input<Tensor>(4);
+    add_out = context->Output(1, input->Shape());
+
+    if (bias != nullptr) {  // Bias is optional
+      // If provided, bias has shape (C).
+      const auto& bias_dims = bias->Shape().GetDims();
+      if (bias_dims.size() != 1) {
+        return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                               "bias is expected to have 1 dimension, got ", bias_dims.size());
+      }
+      if (bias_dims[0] != num_channels) {
+        return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                               "Number of channels in bias and input does not match");
+      }
+    }
+
+    // Check whether skip can be broadcasted to input shape.
+    if (skip->Shape() != input->Shape()) {
+      const auto& dims = skip->Shape().GetDims();
+      // The shape of ship can be (N, C) or (N, 1, 1, C) for broadcast.
+      const bool b2 = (dims.size() == 2 && dims[0] == batch_size && dims[1] == num_channels);
+      const bool b4 = (dims.size() == 4 && dims[0] == batch_size &&
+                       dims[1] == 1 && dims[2] == 1 && dims[3] == num_channels);
+      broadcast_skip = b2 || b4;
+      if (!broadcast_skip) {
+        return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                               "skip shape is expected to be (N, H, W, C) or (N, 1, 1, C) or (N, C)");
+      }
+    }
+  }
+
+  auto workspace = GetScratchBuffer<void>(GetGroupNormWorkspaceSizeInBytes(batch_size, num_groups_),
+                                          context->GetComputeStream());
 
   utils::MLTypeCallDispatcher<GROUP_NORM_TYPES> dispatcher(input->GetElementType());
-  return dispatcher.InvokeRet<Status, DispatchGroupNorm>(Stream(context), output, input, gamma, beta, workspace.get(),
+  return dispatcher.InvokeRet<Status, DispatchGroupNorm>(Stream(context), output, add_out, input, skip, bias,
+                                                         gamma, beta, workspace.get(),
                                                          epsilon_,
                                                          batch_size,
                                                          num_channels,
                                                          height,
                                                          width,
                                                          num_groups_,
-                                                         use_swish_activation_);
+                                                         use_swish_activation_,
+                                                         broadcast_skip,
+                                                         channels_per_block_);
 }
 
 }  // namespace cuda
diff --git a/onnxruntime/contrib_ops/cuda/diffusion/group_norm.h b/onnxruntime/contrib_ops/cuda/diffusion/group_norm.h
index 52c006e6bdb96..b408b3c1ee79b 100644
--- a/onnxruntime/contrib_ops/cuda/diffusion/group_norm.h
+++ b/onnxruntime/contrib_ops/cuda/diffusion/group_norm.h
@@ -16,11 +16,16 @@ class GroupNorm final : public CudaKernel {
   GroupNorm(const OpKernelInfo& op_kernel_info);
   Status ComputeInternal(OpKernelContext* context) const override;
 
+  Status PrePack(const Tensor& tensor, int input_idx, AllocatorPtr alloc,
+                 bool& is_packed, PrePackedWeights* prepacked_weights) override;
+
  private:
-  bool use_swish_activation_;
+  bool use_swish_activation_;  // use SiLU (also known as Swish) activation after group normalization?
   float epsilon_;
   int num_groups_;
   bool channels_last_;
+  bool has_skip_;  // true for SkipGroupNorm operator; false for GroupNorm
+  int channels_per_block_;
 };
 
 }  // namespace cuda
diff --git a/onnxruntime/contrib_ops/cuda/diffusion/group_norm_impl.cu b/onnxruntime/contrib_ops/cuda/diffusion/group_norm_impl.cu
index 01ba078b4be77..48b161552ce0c 100644
--- a/onnxruntime/contrib_ops/cuda/diffusion/group_norm_impl.cu
+++ b/onnxruntime/contrib_ops/cuda/diffusion/group_norm_impl.cu
@@ -16,18 +16,45 @@
  */
 
 // The CUDA kernel is modified from GroupNorm plugin of TensorRT 8.5
+// Modifications: heuristic channels per block; support epsilon; support skip and bias; update coding style.
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
 #include <cuda_fp16.h>
 #include <cuda_runtime_api.h>
 #include <cub/cub.cuh>
 #include "core/providers/cuda/cuda_common.h"
+#include "core/providers/cuda/cu_inc/common.cuh"
 #include "contrib_ops/cuda/diffusion/group_norm_impl.h"
 #include "contrib_ops/cuda/transformers/dump_cuda_tensor.h"
 
+using namespace onnxruntime::cuda;
+
 namespace onnxruntime {
 namespace contrib {
 namespace cuda {
 
-static inline int32_t divUp(int32_t m, int32_t n) {
+namespace {
+
+// TODO: Similar to SkipLayerNorm kernel, read/write up to 8 channels at same time.
+constexpr static int32_t CHANNELS_PER_THREAD = 2;
+
+constexpr static int kSizes[] = {128, 256, 320, 384, 512};
+constexpr static size_t kNumOfSizes = sizeof(kSizes) / sizeof(kSizes[0]);
+constexpr static int kMaxSize = kSizes[kNumOfSizes - 1];
+
+int NextSize(int x) {
+  for (size_t i = 0; i < kNumOfSizes; ++i) {
+    if (x <= kSizes[i]) {
+      return kSizes[i];
+    }
+  }
+
+  return x;
+}
+}  // namespace
+
+static inline int32_t DivUp(int32_t m, int32_t n) {
   return (m + n - 1) / n;
 }
 
@@ -41,14 +68,14 @@ struct GroupSums {
   // The sum.
   float sum;
   // The sum of squares.
-  float sumSq;
+  float sum_sq;
 };
 
 struct GroupSumsOp {
   inline __device__ GroupSums operator()(GroupSums const& a, GroupSums const& b) {
     GroupSums dst;
     dst.sum = b.flag ? b.sum : (a.sum + b.sum);
-    dst.sumSq = b.flag ? b.sumSq : (a.sumSq + b.sumSq);
+    dst.sum_sq = b.flag ? b.sum_sq : (a.sum_sq + b.sum_sq);
     dst.flag = a.flag + b.flag;
     return dst;
   }
@@ -56,54 +83,85 @@ struct GroupSumsOp {
 
 template <typename T>
 struct GroupNormNHWCParams {
-  // The output buffer. Layout NHWC.
+  // The output buffer. Shape is (n, h, w, c).
   T* dst;
-  // The input buffer. Layout NHWC.
+
+  // Optional output of element-wise add result of src, skip and bias. Shape is (n, h, w, c).
+  T* add_out;
+
+  // The input buffer. Shape is (n, h, w, c).
   T const* src;
+
+  // Optional input buffer for skip tensor. Shape is (n, h, w, c) or (n, 1, 1, c) or (n, c).
+  T const* skip;
+
+  // Optional input buffer for bias tensor. Shape is (c).
+  T const* bias;
+
   // The gamma scaling factor.
   float const* gamma;
+
   // The beta term to add in GN.
   float const* beta;
-  // The temporary buffer to do the global parallel reduction. Size:
-  // BLOCKS_PER_BATCH x C x 2.
-  float* redBuffer;
+
+  // The temporary buffer to do the global parallel reduction. Shape is (n, 2, g), where g is number of groups.
+  float* group_sum_buffer;
 
   // The number of instances in the batch.
   int32_t n;
+
   // The height and width of each activation map.
   int32_t h;
   int32_t w;
-  // The number of channels.
+
+  // Number of channels.
   int32_t c;
-  // The number of groups.
+
+  // Number of groups.
   int32_t groups;
-  // Do we apply the Swish activation function?
-  bool withSwish;
+
+  // Do we apply the SiLU activation function?
+  bool use_silu;
 
   // Precomputed values and parameters to control the execution of the kernels.
 
-  // The number of activations per instance (h * w) and the number of
-  // activations per block.
+  // Number of activations per instance (h * w)
   int32_t hw;
-  int32_t hwPerBlock;
-  // The number of channels per group and blocks per activation in the C
-  // dimension.
-  int32_t cPerBlock;
-  int32_t cPerGroup;
+
+  // Number of activations per block
+  int32_t hw_per_block;
+
+  // Number of channels per block in the C dimension.
+  int32_t channels_per_block;
+
+  // Number of channels per group in the C dimension.
+  int32_t channels_per_group;
 
   // The precomputed stride between instances.
   int32_t hwc;
-  // The inverse of hwc in floats (to compute mean/var).
-  float invHWC;
+  // The inverse of hw*channels_per_group to compute mean of a group.
+  float inv_hw_channels_per_group;
   // The precomputed number of groups per block.
-  int32_t groupsPerBlock;
+  int32_t groups_per_block;
+
+  // Number of threads per block
+  int32_t threads_per_block;
+
+  // Epsilon to get stable variance in normalization.
+  float epsilon;
+
+  // Whether skip need broadcast. True if shape of skip is (N, C) or (N, 1, 1, C); False otherwise.
+  bool broadcast_skip;
+
+  // For SkipGroupNorm, it points to the intermediate result of adding skip and bias.
+  T* skip_workspace;
 };
 
 template <typename T>
-inline __device__ void UpdateSum(const T* src, int64_t offset, float& sum, float& sumSq);
+inline __device__ void UpdateSum(const T* src, int64_t offset, float& sum, float& sum_sq);
 
 template <>
-inline __device__ void UpdateSum(const half* src, int64_t offset, float& sum, float& sumSq) {
+inline __device__ void UpdateSum(const half* src, int64_t offset, float& sum, float& sum_sq) {
   // Fetch two channels per thread.
   __half2 h2 = *reinterpret_cast<__half2 const*>(&src[offset]);
 
@@ -113,11 +171,11 @@ inline __device__ void UpdateSum(const half* src, int64_t offset, float& sum, fl
   sum += f2.x + f2.y;
 
   // Update the sum of squares.
-  sumSq += f2.x * f2.x + f2.y * f2.y;
+  sum_sq += f2.x * f2.x + f2.y * f2.y;
 }
 
 template <>
-inline __device__ void UpdateSum(const float* src, int64_t offset, float& sum, float& sumSq) {
+inline __device__ void UpdateSum(const float* src, int64_t offset, float& sum, float& sum_sq) {
   // Fetch two channels per thread.
   float2 f2 = *reinterpret_cast<float2 const*>(&src[offset]);
 
@@ -125,119 +183,220 @@ inline __device__ void UpdateSum(const float* src, int64_t offset, float& sum, f
   sum += f2.x + f2.y;
 
   // Update the sum of squares.
-  sumSq += f2.x * f2.x + f2.y * f2.y;
+  sum_sq += f2.x * f2.x + f2.y * f2.y;
+}
+
+// Sum for SkipGroupNorm: add_out[offset] = src[offset] + skip[skip_offset] + bias[bias_offset]
+template <typename T>
+inline __device__ void AddSkipBias(T* add_out, const T* src, const T* skip, const T* bias,
+                                   int64_t offset, int64_t skip_offset, int64_t bias_offset, float& sum, float& sum_sq);
+
+template <>
+inline __device__ void AddSkipBias(half* add_out, const half* src, const half* skip, const half* bias,
+                                   int64_t offset, int64_t skip_offset, int64_t bias_offset, float& sum, float& sum_sq) {
+  // Fetch two channels per thread.
+  __half2 h2 = *reinterpret_cast<__half2 const*>(&src[offset]);
+  __half2 s = *reinterpret_cast<__half2 const*>(&skip[skip_offset]);
+  __half2 b = *reinterpret_cast<__half2 const*>(&bias[bias_offset]);
+  h2 = h2 + b;
+  h2 = h2 + s;
+
+  *reinterpret_cast<__half2*>(&add_out[offset]) = h2;
+
+  float2 f2 = __half22float2(h2);
+  sum += f2.x + f2.y;
+  sum_sq += f2.x * f2.x + f2.y * f2.y;
+}
+
+template <>
+inline __device__ void AddSkipBias(float* add_out, const float* src, const float* skip, const float* bias,
+                                   int64_t offset, int64_t skip_offset, int64_t bias_offset, float& sum, float& sum_sq) {
+  float2 f2 = *reinterpret_cast<float2 const*>(&src[offset]);
+  float2 s = *reinterpret_cast<float2 const*>(&skip[skip_offset]);
+  float2 b = *reinterpret_cast<float2 const*>(&bias[bias_offset]);
+  f2.x += s.x + b.x;
+  f2.y += s.y + b.y;
+
+  *reinterpret_cast<float2*>(&add_out[offset]) = f2;
+
+  sum += f2.x + f2.y;
+  sum_sq += f2.x * f2.x + f2.y * f2.y;
+}
+
+// Sum for SkipGroupNorm without bias: add_out[offset] = src[offset] + skip[skip_offset]
+template <typename T>
+inline __device__ void AddSkip(T* add_out, const T* src, const T* skip,
+                               int64_t offset, int64_t skip_offset, float& sum, float& sum_sq);
+
+template <>
+inline __device__ void AddSkip(half* add_out, const half* src, const half* skip,
+                               int64_t offset, int64_t skip_offset, float& sum, float& sum_sq) {
+  __half2 h2 = *reinterpret_cast<__half2 const*>(&src[offset]);
+  __half2 s = *reinterpret_cast<__half2 const*>(&skip[skip_offset]);
+  h2 = h2 + s;
+
+  *reinterpret_cast<__half2*>(&add_out[offset]) = h2;
+
+  float2 f2 = __half22float2(h2);
+  sum += f2.x + f2.y;
+  sum_sq += f2.x * f2.x + f2.y * f2.y;
+}
+
+template <>
+inline __device__ void AddSkip(float* add_out, const float* src, const float* skip,
+                               int64_t offset, int64_t skip_offset, float& sum, float& sum_sq) {
+  float2 f2 = *reinterpret_cast<float2 const*>(&src[offset]);
+  float2 s = *reinterpret_cast<float2 const*>(&skip[skip_offset]);
+  f2.x += s.x;
+  f2.y += s.y;
+  *reinterpret_cast<float2*>(&add_out[offset]) = f2;
+  sum += f2.x + f2.y;
+  sum_sq += f2.x * f2.x + f2.y * f2.y;
 }
 
-template <typename T, int32_t tTHREADS_PER_BLOCK>
-__global__ void groupNormNHWCSumKernel(GroupNormNHWCParams<T> params) {
+template <typename T, int32_t THREADS_PER_BLOCK>
+__global__ void GroupNormNHWCSumKernel(GroupNormNHWCParams<T> params) {
   // The object in charge of doing the sums for the different blocks.
-  typedef cub::BlockScan<GroupSums, tTHREADS_PER_BLOCK> BlockScan;
+  typedef cub::BlockScan<GroupSums, THREADS_PER_BLOCK> BlockScan;
 
   // Allocate shared memory for BlockScan.
-  __shared__ typename BlockScan::TempStorage tempStorage;
-  // Allocate shared memory for the groups. We could reduce the amount of shared
-  // memory reserved.
-  __shared__ float2 smem[tTHREADS_PER_BLOCK];
+  __shared__ typename BlockScan::TempStorage temp_storage;
+
+  // Allocate shared memory for the groups. We could reduce the amount of shared memory reserved.
+  __shared__ float2 smem[THREADS_PER_BLOCK];
 
   // The instance in the batch.
   int32_t ni = blockIdx.z;
-  // The channel loaded by that thread (2 channels per thread for F16x2).
-  int32_t ci = blockIdx.x * params.cPerBlock + threadIdx.x * 2;
+
+  // The channel loaded by that thread.
+  int32_t ci = blockIdx.x * params.channels_per_block + threadIdx.x * CHANNELS_PER_THREAD;
+
+  if (ci >= params.c || threadIdx.x * CHANNELS_PER_THREAD >= params.channels_per_block) {
+    return;
+  }
 
   // The first activation loaded by that block.
-  int32_t hwBegin = blockIdx.y * params.hwPerBlock;
+  int32_t hw_begin = blockIdx.y * params.hw_per_block;
   // The last activation loaded by that block.
-  int32_t hwEnd = min(hwBegin + params.hwPerBlock, params.hw);
+  int32_t hw_end = min(hw_begin + params.hw_per_block, params.hw);
 
   // The sums.
   float sum = 0.F;
-  float sumSq = 0.F;
+  float sum_sq = 0.F;
 
   // Iterate over the activations to compute the sums.
-  if (ci < params.c) {
-    for (int32_t hwi = hwBegin; hwi < hwEnd; ++hwi) {
-      // The offset.
-      int64_t offset = static_cast<int64_t>(ni) * params.hwc + static_cast<int64_t>(hwi) * params.c + ci;
-      UpdateSum(params.src, offset, sum, sumSq);
+  int64_t offset = static_cast<int64_t>(ni) * params.hwc + static_cast<int64_t>(hw_begin) * params.c + ci;
+  if (params.skip != nullptr) {
+    // SkipGroupNorm: skip is (n, h, w, c) or (n, 1, 1, c) or (n, c),  bias is (c), and add_out is (n, h, w, c)
+    const int64_t bias_offset = static_cast<int64_t>(ci);
+    T* add_out = params.skip_workspace;
+    if (params.broadcast_skip) {
+      const int64_t skip_offset = static_cast<int64_t>(ni) * params.c + ci;
+
+      if (params.bias != nullptr) {
+        for (int32_t hwi = hw_begin; hwi < hw_end; ++hwi, offset += params.c) {
+          AddSkipBias(add_out, params.src, params.skip, params.bias, offset, skip_offset, bias_offset, sum, sum_sq);
+        }
+      } else {
+        for (int32_t hwi = hw_begin; hwi < hw_end; ++hwi, offset += params.c) {
+          AddSkip(add_out, params.src, params.skip, offset, skip_offset, sum, sum_sq);
+        }
+      }
+    } else {
+      if (params.bias != nullptr) {
+        for (int32_t hwi = hw_begin; hwi < hw_end; ++hwi, offset += params.c) {
+          AddSkipBias(add_out, params.src, params.skip, params.bias, offset, offset, bias_offset, sum, sum_sq);
+        }
+      } else {
+        for (int32_t hwi = hw_begin; hwi < hw_end; ++hwi, offset += params.c) {
+          AddSkip(add_out, params.src, params.skip, offset, offset, sum, sum_sq);
+        }
+      }
+    }
+  } else {  // GroupNorm
+    for (int32_t hwi = hw_begin; hwi < hw_end; ++hwi, offset += params.c) {
+      UpdateSum(params.src, offset, sum, sum_sq);
     }
   }
 
-  // The group that thread works on and the channel in the group (modulus).
-  int32_t gi = threadIdx.x * 2 / params.cPerGroup;
-  int32_t cj = threadIdx.x * 2 - params.cPerGroup * gi;
+  // The group index relative to the first group within the same block.
+  int32_t gi = threadIdx.x * CHANNELS_PER_THREAD / params.channels_per_group;
+  // The channel in the group.
+  int32_t cj = ci % params.channels_per_group;
 
   // The data for the summations.
-  GroupSums inp{cj == 0 ? 1 : 0, sum, sumSq};
+  GroupSums inp{cj == 0 ? 1 : 0, sum, sum_sq};
 
-  // Do the segmented scan.
+  // Do the segmented scan. InclusiveScan is not deterministic.
   GroupSums out;
-  BlockScan(tempStorage).InclusiveScan(inp, out, GroupSumsOp());
+  BlockScan(temp_storage).InclusiveScan(inp, out, GroupSumsOp());
 
-  // Store the results for the groups in shared memory (to produce coalesced
-  // stores later).
-  if (cj == params.cPerGroup - 2) {  //2 channels per thread
-    smem[gi] = make_float2(out.sum, out.sumSq);
+  // Store the results for the groups in shared memory (to produce coalesced stores later).
+  // For each group, only the last thread of that group is picked to save sum to shared memory.
+  if (cj == params.channels_per_group - CHANNELS_PER_THREAD) {
+    smem[gi] = make_float2(out.sum, out.sum_sq);
   }
 
   // Make sure the data is in shared memory.
   __syncthreads();
 
-  // The global group index.
-  int32_t gj = blockIdx.x * params.groupsPerBlock + threadIdx.x;
-
   // Threads that have nothing left to do, exit.
-  if (threadIdx.x >= params.groupsPerBlock || gj >= params.groups) {
+  if (threadIdx.x >= params.groups_per_block) {
     return;
   }
 
-  // The first threads (those storing to global memory, load the values).
-  float2 sums = smem[threadIdx.x];
-
-  // Store to global memory.
-  atomicAdd(&params.redBuffer[(2 * ni + 0) * params.groups + gj], sums.x);
-  atomicAdd(&params.redBuffer[(2 * ni + 1) * params.groups + gj], sums.y);
+  // The global group index.
+  // Use neighboring threads for coalesced write.
+  int32_t gj = blockIdx.x * params.groups_per_block + threadIdx.x;
+
+  if (gj < params.groups) {
+    float2 sums = smem[threadIdx.x];
+    const int index = (2 * ni) * params.groups + gj;
+    atomicAdd(&params.group_sum_buffer[index], sums.x);
+    atomicAdd(&params.group_sum_buffer[index + params.groups], sums.y);
+  }
 }
 
 template <typename T>
-void groupNormNHWCSum(GroupNormNHWCParams<T> const& params, cudaStream_t stream) {
-  // Make sure the values are as we expect.
-  ORT_ENFORCE(params.c % params.cPerBlock == 0 && params.hw % params.hwPerBlock == 0);
-  // Make sure a group does not span multiple blocks.
-  ORT_ENFORCE(params.cPerBlock % params.cPerGroup == 0);
-
+void GroupNormNHWCSum(GroupNormNHWCParams<T> const& params, cudaStream_t stream) {
   dim3 grid;
 
   // The number of blocks to compute all the channels.
-  grid.x = params.c / params.cPerBlock;
+  grid.x = DivUp(params.c, params.channels_per_block);
+
   // The number of blocks to compute all the activations in a given instance.
-  grid.y = divUp(params.hw, params.hwPerBlock);
+  grid.y = DivUp(params.hw, params.hw_per_block);
+
   // The number of instances.
   grid.z = params.n;
 
-  switch (params.cPerBlock) {
-    case 320:
-      groupNormNHWCSumKernel<T, 160><<<grid, 160, 0, stream>>>(params);
+  // Threads_per_block is half of values in kSizes since CHANNELS_PER_THREAD = 2.
+  switch (params.threads_per_block) {
+    case 256:
+      GroupNormNHWCSumKernel<T, 256><<<grid, 256, 0, stream>>>(params);
       break;
-    case 480:
-      groupNormNHWCSumKernel<T, 256><<<grid, 256, 0, stream>>>(params);
+    case 192:
+      GroupNormNHWCSumKernel<T, 192><<<grid, 192, 0, stream>>>(params);
       break;
-    case 256:
-      groupNormNHWCSumKernel<T, 128><<<grid, 128, 0, stream>>>(params);
+    case 160:
+      GroupNormNHWCSumKernel<T, 160><<<grid, 160, 0, stream>>>(params);
       break;
     case 128:
-      groupNormNHWCSumKernel<T, 64><<<grid, 64, 0, stream>>>(params);
+      GroupNormNHWCSumKernel<T, 128><<<grid, 128, 0, stream>>>(params);
+      break;
+    case 64:
+      GroupNormNHWCSumKernel<T, 64><<<grid, 64, 0, stream>>>(params);
       break;
-    default:
-      ORT_NOT_IMPLEMENTED("Not implemented");
   }
 }
 
 template <typename T>
-__device__ void computeGroupNorm(const T* src, T* dst, int64_t offset, float mean, float invStdDev, float2& gammaF2, float2& betaF2, bool swish);
+__device__ void ComputeGroupNorm(const T* src, T* dst, int64_t offset, float mean, float inv_std_dev,
+                                 float2& gamma_f2, float2& beta_f2, bool silu);
 
 template <>
-__device__ void computeGroupNorm(const half* src, half* dst, int64_t offset, float mean, float invStdDev,
-                                 float2& gammaF2, float2& betaF2, bool swish) {
+__device__ void ComputeGroupNorm(const half* src, half* dst, int64_t offset, float mean, float inv_std_dev,
+                                 float2& gamma_f2, float2& beta_f2, bool silu) {
   // Fetch two channels per thread.
   __half2 h2 = *reinterpret_cast<__half2 const*>(&src[offset]);
 
@@ -245,15 +404,15 @@ __device__ void computeGroupNorm(const half* src, half* dst, int64_t offset, flo
   float2 f2 = __half22float2(h2);
 
   // Normalize the channels.
-  f2.x = (f2.x - mean) * invStdDev;
-  f2.y = (f2.y - mean) * invStdDev;
+  f2.x = (f2.x - mean) * inv_std_dev;
+  f2.y = (f2.y - mean) * inv_std_dev;
 
   // Scale by gamma and add beta.
-  f2.x = gammaF2.x * f2.x + betaF2.x;
-  f2.y = gammaF2.y * f2.y + betaF2.y;
+  f2.x = gamma_f2.x * f2.x + beta_f2.x;
+  f2.y = gamma_f2.y * f2.y + beta_f2.y;
 
-  // Apply Swish if needed.
-  if (swish) {
+  // Apply SiLU activation if needed.
+  if (silu) {
     f2.x = f2.x * sigmoid(f2.x);
     f2.y = f2.y * sigmoid(f2.y);
   }
@@ -262,21 +421,21 @@ __device__ void computeGroupNorm(const half* src, half* dst, int64_t offset, flo
 }
 
 template <>
-__device__ void computeGroupNorm(const float* src, float* dst, int64_t offset, float mean, float invStdDev,
-                                 float2& gammaF2, float2& betaF2, bool swish) {
+__device__ void ComputeGroupNorm(const float* src, float* dst, int64_t offset, float mean, float inv_std_dev,
+                                 float2& gamma_f2, float2& beta_f2, bool silu) {
   // Fetch two channels per thread.
   float2 f2 = *reinterpret_cast<float2 const*>(&src[offset]);
 
   // Normalize the channels.
-  f2.x = (f2.x - mean) * invStdDev;
-  f2.y = (f2.y - mean) * invStdDev;
+  f2.x = (f2.x - mean) * inv_std_dev;
+  f2.y = (f2.y - mean) * inv_std_dev;
 
   // Scale by gamma and add beta.
-  f2.x = gammaF2.x * f2.x + betaF2.x;
-  f2.y = gammaF2.y * f2.y + betaF2.y;
+  f2.x = gamma_f2.x * f2.x + beta_f2.x;
+  f2.y = gamma_f2.y * f2.y + beta_f2.y;
 
-  // Apply Swish if needed.
-  if (swish) {
+  // Apply SiLU activation if needed.
+  if (silu) {
     f2.x = f2.x * sigmoid(f2.x);
     f2.y = f2.y * sigmoid(f2.y);
   }
@@ -284,110 +443,142 @@ __device__ void computeGroupNorm(const float* src, float* dst, int64_t offset, f
   *reinterpret_cast<float2*>(&dst[offset]) = f2;
 }
 
-template <typename T, int32_t tTHREADS_PER_BLOCK>
-__global__ void groupNormNHWCScaleKernel(GroupNormNHWCParams<T> params) {
-  // The channel loaded by that thread (2 channels per thread for F16x2).
-  int32_t ci = blockIdx.x * params.cPerBlock + threadIdx.x * 2;
-  if (ci >= params.c) {
+template <typename T>
+__global__ void GroupNormNHWCScaleKernel(GroupNormNHWCParams<T> params) {
+  // The channel loaded by that thread.
+  int32_t ci = blockIdx.x * params.channels_per_block + threadIdx.x * CHANNELS_PER_THREAD;
+  if (ci >= params.c || threadIdx.x * CHANNELS_PER_THREAD >= params.channels_per_block) {
     return;
   }
 
   // The instance in the batch.
   int32_t ni = blockIdx.z;
 
-  // The group that thread works on and the channel in the group (modulus).
-  int32_t gi = ci / params.cPerGroup;
+  // The group that thread works on.
+  int32_t gi = ci / params.channels_per_group;
 
   // Load the sum and sum of squares for the group.
-  float sum = 0.F, sumSq = 0.F;
+  float sum = 0.F, sum_sq = 0.F;
   if (gi < params.groups) {
-    sum = params.redBuffer[(2 * ni + 0) * params.groups + gi];
-    sumSq = params.redBuffer[(2 * ni + 1) * params.groups + gi];
+    const int index = (2 * ni) * params.groups + gi;
+    sum = params.group_sum_buffer[index];
+    sum_sq = params.group_sum_buffer[index + params.groups];
   }
 
-  // Load gamma/beta.
-  float2 gammaF2 = *reinterpret_cast<float2 const*>(&params.gamma[ci]);
-  float2 betaF2 = *reinterpret_cast<float2 const*>(&params.beta[ci]);
+  // Load gamma/beta. Fetch two per thread.
+  float2 gamma_f2 = *reinterpret_cast<float2 const*>(&params.gamma[ci]);
+  float2 beta_f2 = *reinterpret_cast<float2 const*>(&params.beta[ci]);
 
   // Compute the mean.
-  float mean = sum * params.invHWC;
+  float mean = sum * params.inv_hw_channels_per_group;
   // Compute the variance.
-  float var = sumSq * params.invHWC - (mean * mean);
+  float var = sum_sq * params.inv_hw_channels_per_group - (mean * mean);
   // Compute the inverse of the stddev.
-  float invStdDev = var <= 0.F ? 1.F : rsqrtf(var);
+  float inv_std_dev = rsqrtf(var + params.epsilon);
 
-  // The first activation loaded by that block.
-  int32_t hwBegin = blockIdx.y * params.hwPerBlock;
-  // The last activation loaded by that block.
-  int32_t hwEnd = min(hwBegin + params.hwPerBlock, params.hw);
+  int32_t hw_begin = blockIdx.y * params.hw_per_block;
+  int32_t hw_end = min(hw_begin + params.hw_per_block, params.hw);
 
-  // Iterate over the activations to compute the sums.
-  for (int32_t hwi = hwBegin; hwi < hwEnd; ++hwi) {
-    // The src/dst offset.
-    int64_t offset = (int64_t)ni * params.hwc + hwi * params.c + ci;
-
-    // Fetch two channels per thread.
-    computeGroupNorm<T>(params.src, params.dst, offset, mean, invStdDev, gammaF2, betaF2, params.withSwish);
+  const T* input = (params.skip != nullptr) ? params.skip_workspace : params.src;
+  int64_t offset = static_cast<int64_t>(ni) * params.hwc + static_cast<int64_t>(hw_begin) * params.c + ci;
+  for (int32_t hwi = hw_begin; hwi < hw_end; ++hwi, offset += params.c) {
+    ComputeGroupNorm<T>(input, params.dst, offset, mean, inv_std_dev, gamma_f2, beta_f2, params.use_silu);
   }
 }
 
 template <typename T>
-void groupNormNHWCScale(GroupNormNHWCParams<T> const& params, cudaStream_t stream) {
-  // Make sure the dimensions are aligned with what we expect.
-  ORT_ENFORCE(params.c % params.cPerBlock == 0);
-  // Make sure a group does not span multiple blocks.
-  ORT_ENFORCE(params.cPerBlock % params.cPerGroup == 0);
-
+void GroupNormNHWCScale(GroupNormNHWCParams<T> const& params, cudaStream_t stream) {
   dim3 grid;
 
   // The number of blocks to compute all the channels.
-  grid.x = params.c / params.cPerBlock;
+  grid.x = DivUp(params.c, params.channels_per_block);
   // The number of blocks to compute all the activations in a given instance.
-  grid.y = divUp(params.hw, params.hwPerBlock);
+  grid.y = DivUp(params.hw, params.hw_per_block);
   // The number of instances.
   grid.z = params.n;
 
-  switch (params.cPerBlock) {
-    case 320:
-      groupNormNHWCScaleKernel<T, 160><<<grid, 160, 0, stream>>>(params);
+  // Threads_per_block is half of values in kSizes since CHANNELS_PER_THREAD = 2.
+  switch (params.threads_per_block) {
+    case 256:
+      GroupNormNHWCScaleKernel<T><<<grid, 256, 0, stream>>>(params);
       break;
-    case 480:
-      groupNormNHWCScaleKernel<T, 256><<<grid, 256, 0, stream>>>(params);
+    case 192:
+      GroupNormNHWCScaleKernel<T><<<grid, 192, 0, stream>>>(params);
       break;
-    case 256:
-      groupNormNHWCScaleKernel<T, 128><<<grid, 128, 0, stream>>>(params);
+    case 160:
+      GroupNormNHWCScaleKernel<T><<<grid, 160, 0, stream>>>(params);
       break;
     case 128:
-      groupNormNHWCScaleKernel<T, 64><<<grid, 64, 0, stream>>>(params);
+      GroupNormNHWCScaleKernel<T><<<grid, 128, 0, stream>>>(params);
+      break;
+    case 64:
+      GroupNormNHWCScaleKernel<T><<<grid, 64, 0, stream>>>(params);
       break;
-    default:
-      ORT_NOT_IMPLEMENTED("Not implemented");
   }
 }
 
-int32_t findMaxDivisor(int32_t n, int32_t maxAllowedDivisor) {
-  int32_t maxDivisor = -1;
+int32_t FindMaxDivisor(int32_t n, int32_t max_allowed_divisor) {
+  int32_t max_divisor = -1;
   for (int32_t i = 1; i <= std::sqrt(n); i++) {
     if (n % i == 0) {
       int32_t divisor1 = n / i;
       int32_t divisor2 = i;
 
-      if (divisor1 > maxDivisor && divisor1 < maxAllowedDivisor) {
-        maxDivisor = divisor1;
+      if (divisor1 > max_divisor && divisor1 < max_allowed_divisor) {
+        max_divisor = divisor1;
       }
-      if (divisor2 > maxDivisor && divisor2 < maxAllowedDivisor) {
-        maxDivisor = divisor2;
+      if (divisor2 > max_divisor && divisor2 < max_allowed_divisor) {
+        max_divisor = divisor2;
       }
     }
   }
-  return maxDivisor;
+  return max_divisor;
+}
+
+// Find proper channels per block based on a cost function: The cost is number of channels corresponding to
+// extra threads allocated but no channels assigned to them to work on. If cost is zero, every thread has
+// work to do so it is ideal case.
+int FindChannelsPerBlock(int num_channels, int channels_per_group) {
+  int min_cost = -1;
+  int best_candidate = -1;
+  for (size_t i = kNumOfSizes; i > 0; --i) {
+    if (kSizes[i - 1] < channels_per_group) {
+      break;
+    }
+
+    int channels_per_block = kSizes[i - 1] / channels_per_group * channels_per_group;
+    int blocks = (num_channels + channels_per_block - 1) / channels_per_block;
+    int cost = blocks * kSizes[i - 1] - num_channels;
+    if (cost == 0) {
+      return channels_per_block;
+    }
+
+    if (min_cost == -1 || cost < min_cost) {
+      min_cost = cost;
+      best_candidate = channels_per_block;
+    }
+  }
+
+  return best_candidate;
+}
+
+int GetChannelsPerBlock(int num_channels, int num_groups) {
+  int32_t channels_per_group = num_channels / num_groups;
+  int32_t channels_per_block = channels_per_group;
+  if (channels_per_group < kMaxSize / 2) {
+    channels_per_block = FindChannelsPerBlock(num_channels, channels_per_group);
+  }
+  return channels_per_block;
 }
 
 template <typename T>
 Status LaunchGroupNormKernel(
     cudaStream_t stream,
     T* output,
+    T* add_out,
     const T* input,
+    const T* skip,
+    const T* bias,
     const float* gamma,
     const float* beta,
     void* workspace,
@@ -397,79 +588,94 @@ Status LaunchGroupNormKernel(
     int height,
     int width,
     int num_groups,
-    bool use_swish_activation) {
-  if (batch_size > static_cast<int>(kMaxGroupNormBatchSize)) {
-    return ORT_MAKE_STATUS(ONNXRUNTIME, StatusCode::NOT_IMPLEMENTED,
-                           "only support batch_size <= 32. Got", batch_size);
-  }
+    bool use_silu,
+    bool broadcast_skip,
+    int channels_per_block) {
+  GroupNormNHWCParams<T> params;
 
-  if (num_groups != static_cast<int>(kGroupNormNumberOfGroups)) {
-    return ORT_MAKE_STATUS(ONNXRUNTIME, StatusCode::NOT_IMPLEMENTED,
-                           "only num_groups=32 is supported. Got", num_groups);
+  int32_t channels_per_group = num_channels / num_groups;
+  // channels_per_block is computed in PrePack.
+  // If the gamma is not initializer, channels_per_block might be zero after PrePack. In that happens, compute it here.
+  if (channels_per_block < channels_per_group) {
+    channels_per_block = GetChannelsPerBlock(num_channels, num_groups);
   }
 
-  GroupNormNHWCParams<T> params;
-  int32_t cPerBlock = 320;
-  int32_t maxBlocksPerHW = 1024;
-  switch (num_channels) {
-    case 960:
-    case 1920:
-      cPerBlock = 480;
-      break;
-    case 512:
-    case 256:
-      cPerBlock = 256;
-      break;
-    case 128:
-      cPerBlock = 128;
-      break;
-    default:
-      cPerBlock = 320;
+  // TODO: Update the kernel to support CHANNELS_PER_THREAD==1 and other corner cases
+  if (channels_per_block % channels_per_group != 0 ||
+      channels_per_block > kMaxSize ||
+      (channels_per_group % CHANNELS_PER_THREAD != 0)) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, NOT_IMPLEMENTED,
+                           "GroupNorm in CUDA does not support the input: n=", batch_size,
+                           " h=", height,
+                           " w=", width,
+                           " c=", num_channels,
+                           " groups=", num_groups);
   }
 
-  params.withSwish = use_swish_activation;
+  params.use_silu = use_silu;
   params.dst = output;
+  params.add_out = add_out;
   params.src = input;
+  params.skip = skip;
+  params.bias = bias;
   params.gamma = gamma;
   params.beta = beta;
-  params.redBuffer = reinterpret_cast<float*>(workspace);
+  params.group_sum_buffer = reinterpret_cast<float*>(workspace);
   params.n = batch_size;
   params.h = height;
   params.w = width;
   params.c = num_channels;
   params.groups = num_groups;
   params.hw = params.h * params.w;
-  const int32_t blocksPerHW = findMaxDivisor(params.hw, maxBlocksPerHW);
-  params.hwPerBlock = divUp(params.hw, blocksPerHW);
-  params.cPerBlock = cPerBlock;
-  params.cPerGroup = params.c / params.groups;
+
+  // This will allocate as many blocks as possible to partition HW.
+  // For Stable Diffusion, latent hw is 4K ~ 16K. This will allocate 1024 blocks, and each handles 4~16 hw.
+  // TODO: tune this logic to find proper blocks when hw is small.
+  constexpr int32_t max_blocks_per_hw = 1024;
+  const int32_t blocks_per_hw = FindMaxDivisor(params.hw, max_blocks_per_hw);
+  params.hw_per_block = DivUp(params.hw, blocks_per_hw);
+
+  params.channels_per_block = channels_per_block;
+  params.channels_per_group = channels_per_group;
   params.hwc = params.hw * params.c;
-  params.invHWC = 1.F / (float)(params.hw * params.cPerGroup);
-  params.groupsPerBlock = cPerBlock / params.cPerGroup;
+  params.inv_hw_channels_per_group = 1.F / (float)(params.hw * params.channels_per_group);
+  params.groups_per_block = channels_per_block / params.channels_per_group;
+  params.epsilon = epsilon;
+  params.broadcast_skip = broadcast_skip;
 
-  DUMP_TENSOR_INIT();
-  DUMP_TENSOR("input", input, batch_size, num_channels, height * width);
-  DUMP_TENSOR("gamma", gamma, 1, num_channels);
-  DUMP_TENSOR("beta", beta, 1, num_channels);
-  cudaMemsetAsync(params.redBuffer, 0, GetGroupNormWorkspaceSizeInBytes(), stream);
-  groupNormNHWCSum<T>(params, stream);
-  DUMP_TENSOR("workspace", params.redBuffer, batch_size, num_groups, 2);
+  // Workspace for SkipGroupNorm to store intermediate results of src+skip+bias.
+  params.skip_workspace = (params.add_out != nullptr) ? params.add_out : params.dst;
+
+  params.threads_per_block = NextSize(channels_per_block) / CHANNELS_PER_THREAD;
+
+  CUDA_RETURN_IF_ERROR(cudaMemsetAsync(
+      params.group_sum_buffer, 0, GetGroupNormWorkspaceSizeInBytes(batch_size, num_groups), stream));
+
+  GroupNormNHWCSum<T>(params, stream);
   CUDA_RETURN_IF_ERROR(cudaGetLastError());
-  groupNormNHWCScale<T>(params, stream);
+
+  DUMP_TENSOR_INIT();
+  DUMP_TENSOR("workspace", params.group_sum_buffer, batch_size, 2, num_groups);
+
+  GroupNormNHWCScale<T>(params, stream);
   CUDA_RETURN_IF_ERROR(cudaGetLastError());
-  DUMP_TENSOR("output", output, batch_size, num_channels, height * width);
+
   return Status::OK();
 }
 
-template Status LaunchGroupNormKernel<half>(cudaStream_t stream, half* output,
-                                            const half* input, const float* gamma, const float* beta, void* workspace,
+template Status LaunchGroupNormKernel<half>(cudaStream_t stream, half* output, half* add_out,
+                                            const half* input, const half* skip, const half* bias,
+                                            const float* gamma, const float* beta, void* workspace,
                                             float epsilon, int batch_size, int num_channels,
-                                            int height, int width, int num_groups, bool swish);
+                                            int height, int width, int num_groups, bool silu,
+                                            bool broadcast_skip, int channels_per_block);
 
-template Status LaunchGroupNormKernel<float>(cudaStream_t stream, float* output,
-                                             const float* input, const float* gamma, const float* beta, void* workspace,
+template Status LaunchGroupNormKernel<float>(cudaStream_t stream, float* output, float* add_out,
+                                             const float* input, const float* skip, const float* bias,
+                                             const float* gamma, const float* beta, void* workspace,
                                              float epsilon, int batch_size, int num_channels,
-                                             int height, int width, int num_groups, bool swish);
+                                             int height, int width, int num_groups, bool silu,
+                                             bool broadcast_skip, int channels_per_block);
 }  // namespace cuda
 }  // namespace contrib
 }  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/diffusion/group_norm_impl.h b/onnxruntime/contrib_ops/cuda/diffusion/group_norm_impl.h
index c7e9245050ee6..9532aeecb2f57 100644
--- a/onnxruntime/contrib_ops/cuda/diffusion/group_norm_impl.h
+++ b/onnxruntime/contrib_ops/cuda/diffusion/group_norm_impl.h
@@ -12,29 +12,33 @@ namespace onnxruntime {
 namespace contrib {
 namespace cuda {
 
-constexpr size_t kMaxGroupNormBatchSize = 32;
-constexpr size_t kGroupNormNumberOfGroups = 32;
-
-constexpr size_t GetGroupNormWorkspaceSizeInBytes() {
+constexpr size_t GetGroupNormWorkspaceSizeInBytes(size_t batch_size, size_t num_groups) {
   // Two buffers for sum and squared sum
-  return (sizeof(float) * 2) * kMaxGroupNormBatchSize * kGroupNormNumberOfGroups;
+  return (sizeof(float) * 2) * batch_size * num_groups;
 }
 
+int GetChannelsPerBlock(int num_channels, int num_groups);
+
 template <typename T>
 Status LaunchGroupNormKernel(
     cudaStream_t stream,
-    T* output,                 // normalized output tensor
-    const T* input,            // input tensor
-    const float* gamma,        // gamma (also known as weight or scale)
-    const float* beta,         // beta (also known as bias)
-    void* workspace,           // Work space
-    float epsilon,             // epsilon used normalization
-    int batch_size,            // N
-    int num_channels,          // C
-    int height,                // H
-    int width,                 // W
-    int num_groups,            // number of groups
-    bool use_swish_activation  // Whether there is Swish activation after group normalization
+    T* output,              // normalized output tensor. Shape is (n, h, w, c)
+    T* add_out,             // optional output tensor for element-wise sum of input + skip + bias. Shape is (n, h, w, c)
+    const T* input,         // input tensor. Shape is (n, h, w, c)
+    const T* skip,          // optional skip tensor. Shape is (n, h, w, c)
+    const T* bias,          // optional bias tensor. Shape is (c) for SkipGroupNorm or (n, c) for BiasGroupNorm
+    const float* gamma,     // gamma (also known as weight or scale). Shape is (c)
+    const float* beta,      // beta (also known as bias). Shape is (c)
+    void* workspace,        // Work space
+    float epsilon,          // epsilon used normalization
+    int batch_size,         // N
+    int num_channels,       // C
+    int height,             // H
+    int width,              // W
+    int num_groups,         // number of groups
+    bool use_silu,          // Whether there is Sigmoid Linear Unit (SiLU) activation after group normalization
+    bool broadcast_skip,    // Whether skip need broadcast. When skip has shape (n, c) or (n, 1, 1, c), it need broadcast.
+    int channels_per_block  // Pre-computed channels per block.
 );
 
 }  // namespace cuda
diff --git a/onnxruntime/contrib_ops/cuda/math/gemm_float8.cc b/onnxruntime/contrib_ops/cuda/math/gemm_float8.cc
new file mode 100644
index 0000000000000..6cdccdb1becb1
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/math/gemm_float8.cc
@@ -0,0 +1,76 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include <string>
+#include "core/providers/cuda/math/gemm.h"
+#include "core/providers/cuda/cuda_common.h"
+#include "core/providers/cuda/shared_inc/fpgeneric.h"
+#include "core/providers/cpu/math/gemm_helper.h"
+#include "contrib_ops/cuda/math/gemm_float8.h"
+
+using namespace ONNX_NAMESPACE;
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+#if !defined(DISABLE_FLOAT8_TYPES)
+#define GEMM_FLOAT8_CONSTRAINTS BuildKernelDefConstraints<Float8E4M3FN, Float8E5M2, MLFloat16, BFloat16, float>()
+#else
+#define GEMM_FLOAT8_CONSTRAINTS BuildKernelDefConstraints<MLFloat16, BFloat16, float>()
+#endif
+
+#define REGISTER_KERNEL()                                            \
+  ONNX_OPERATOR_KERNEL_EX(                                           \
+      GemmFloat8,                                                    \
+      kMSDomain,                                                     \
+      1,                                                             \
+      kCudaExecutionProvider,                                        \
+      (*KernelDefBuilder::Create())                                  \
+          .TypeConstraint("TA", GEMM_FLOAT8_CONSTRAINTS)             \
+          .TypeConstraint("TB", GEMM_FLOAT8_CONSTRAINTS)             \
+          .TypeConstraint("TR", GEMM_FLOAT8_CONSTRAINTS)             \
+          .TypeConstraint("TS", BuildKernelDefConstraints<float>()), \
+      GemmFloat8);
+
+REGISTER_KERNEL()
+
+GemmFloat8::GemmFloat8(const OpKernelInfo& info) : CudaKernel(info) {
+  transA_ = info.GetAttrOrDefault<int64_t>("transA", 0);
+  transB_ = info.GetAttrOrDefault<int64_t>("transB", 0);
+  dtype_ = info.GetAttrOrDefault<int64_t>("dtype", ONNX_NAMESPACE::TensorProto_DataType_FLOAT);
+  auto& device_prop = GetDeviceProp();
+  sm_count_ = device_prop.multiProcessorCount;
+  alpha_ = info.GetAttrOrDefault<float>("alpha", 1);
+  beta_ = info.GetAttrOrDefault<float>("beta", 0);
+
+#if (CUDA_VERSION < 12000)
+  ORT_ENFORCE(beta_ == 0, "CUDA < 12.0 does not support bias, beta must be 0.");
+#endif
+
+  std::string stemp = info.GetAttrOrDefault<std::string>("activation", "NONE");
+  if (stemp == "NONE") {
+    epilogue_ = CUBLASLT_EPILOGUE_DEFAULT;
+  } else if (stemp == "RELU") {
+    epilogue_ = CUBLASLT_EPILOGUE_RELU;
+  } else if (stemp == "GELU") {
+    epilogue_ = CUBLASLT_EPILOGUE_GELU;
+  } else {
+    ORT_THROW("Unexpected value for activation: '", stemp, "'.");
+  }
+}
+
+Status GemmFloat8::SetCheck(const TensorShape& a_shape, const TensorShape& b_shape, int& M, int& N, int& K) const {
+  GemmHelper helper(a_shape, transA_, b_shape, transB_, TensorShape({}));
+  if (!helper.State().IsOK())
+    return helper.State();
+
+  M = gsl::narrow_cast<int>(helper.M());
+  N = gsl::narrow_cast<int>(helper.N());
+  K = gsl::narrow_cast<int>(helper.K());
+  return helper.State();
+}
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/math/gemm_float8.cu b/onnxruntime/contrib_ops/cuda/math/gemm_float8.cu
new file mode 100644
index 0000000000000..56b541f5256bf
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/math/gemm_float8.cu
@@ -0,0 +1,411 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//
+// The operator calls function 'cublasLtMatmul'
+// (https://docs.nvidia.com/cuda/cublas/index.html?highlight=cublasLtMatmul#cublasltmatmul).
+// It lets the function checks what configuration is valid or not. If not, the error message
+// shows the error message 'CUBLAS_STATUS_NOT_SUPPORTED'. NVIDIA documentation provides
+// information on what attribute or type must be modified.
+// This operator requires CUDA_VERSION >= 11.8 for float 8 and CUDA_VERSION >= 12.0
+// for beta != 0.
+
+#include <algorithm>
+#include <utility>
+#include <cuda_runtime.h>
+#include "contrib_ops/cuda/math/gemm_float8.h"
+#include "core/providers/cuda/cu_inc/common.cuh"
+#include "core/providers/cuda/shared_inc/cuda_utils.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+// It must exist somewhere already.
+int32_t TypeSize(int32_t element_type) {
+  switch (element_type) {
+    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
+      return 4;
+    case ONNX_NAMESPACE::TensorProto_DataType_BFLOAT16:
+    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
+      return 2;
+#if !defined(DISABLE_FLOAT8_TYPES)
+    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT8E4M3FN:
+    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT8E5M2:
+      return 1;
+#endif
+    default:
+      ORT_THROW("Unexpected element_type=", element_type, ".");
+  }
+}
+
+void GemmFloat8::SetParams(const TensorShape& a_shape, const TensorShape& b_shape,
+                           int& M, int& N, int& K, int& lda, int& ldb, int& ldd) const {
+  int m_idx = transA_ ? 1 : 0;
+  int k_idx = 1 - m_idx;
+  int n_idx = transB_ ? 0 : 1;
+
+  M = static_cast<int>(a_shape[m_idx]);
+  K = static_cast<int>(a_shape[k_idx]);
+  N = static_cast<int>(b_shape[n_idx]);
+  lda = static_cast<int>(a_shape[1]);
+  ldb = static_cast<int>(b_shape[1]);
+  ldd = static_cast<int>(b_shape[n_idx]);
+}
+
+template <typename TValue>
+int32_t GetTypeAndShape(const TValue* input,
+                        TensorShape& shape,
+                        bool swap = false) {
+  shape = input->Shape();
+  ORT_ENFORCE(shape.NumDimensions() == 2);
+  if (swap) {
+    std::swap(shape[0], shape[1]);
+  }
+  return input->GetElementType();
+}
+
+Status GemmFloat8::ComputeInternal(OpKernelContext* ctx) const {
+  const Tensor* input_A = nullptr;
+  const Tensor* input_B = nullptr;
+  const Tensor* input_C = nullptr;
+  const Tensor* scale_A = nullptr;
+  const Tensor* scale_B = nullptr;
+  const Tensor* scale_Y = nullptr;
+  bool has_scales = false;
+  bool has_bias = false;
+  int n_inputs = ctx->InputCount();
+
+  input_A = ctx->Input<Tensor>(0);
+  input_B = ctx->Input<Tensor>(1);
+  if (n_inputs == 3) {
+    input_C = ctx->Input<Tensor>(2);
+    has_bias = true;
+  } else if (n_inputs > 3) {
+    ORT_ENFORCE(n_inputs >= 5, "Unexpected number of inputs=", n_inputs, ".");
+    has_scales = true;
+    scale_A = ctx->Input<Tensor>(3);
+    scale_B = ctx->Input<Tensor>(4);
+    scale_Y = n_inputs < 6 ? nullptr : ctx->Input<Tensor>(5);
+    ORT_ENFORCE(scale_A->GetElementType() == ONNX_NAMESPACE::TensorProto_DataType_FLOAT);
+    ORT_ENFORCE(scale_B->GetElementType() == ONNX_NAMESPACE::TensorProto_DataType_FLOAT);
+    ORT_ENFORCE(scale_Y == nullptr || scale_Y->GetElementType() == ONNX_NAMESPACE::TensorProto_DataType_FLOAT);
+    if (ctx->Input<Tensor>(2) != nullptr) {
+      input_C = ctx->Input<Tensor>(2);
+      has_bias = true;
+      ORT_ENFORCE(input_C->GetElementType() == dtype_, "Bias type must be equal to dtype.");
+    }
+  }
+
+  auto first_type = input_A->GetElementType();
+#if !defined(DISABLE_FLOAT8_TYPES)
+  bool is_float8 = first_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT8E4M3FN || first_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT8E5M2;
+  if (!is_float8)
+#endif
+    return ComputeRowMajor(ctx, n_inputs, has_bias, has_scales, input_A, input_B,
+                           input_C, scale_A, scale_B, scale_Y);
+#if !defined(DISABLE_FLOAT8_TYPES)
+  return ComputeColMajor(ctx, n_inputs, has_bias, has_scales, input_A, input_B,
+                         input_C, scale_A, scale_B, scale_Y);
+#endif
+}
+
+Status GemmFloat8::ComputeRowMajor(
+    OpKernelContext* ctx, int n_inputs, bool has_bias, bool has_scales,
+    const Tensor* input_A, const Tensor* input_B,
+    const Tensor* input_C, const Tensor* scale_A,
+    const Tensor* scale_B, const Tensor* scale_Y) const {
+  TensorShape shape_A, shape_B, shape_C, shape_Y;
+  int32_t dtype_A, dtype_B, dtype_C, dtype_Y;
+  dtype_A = GetTypeAndShape(input_A, shape_A);
+  dtype_B = GetTypeAndShape(input_B, shape_B);
+
+  int M, N, K, lda, ldb, ldd;
+  SetParams(shape_A, shape_B, M, N, K, lda, ldb, ldd);
+
+  TensorShape dimensions{M, N};
+  Tensor* Y = ctx->Output(0, dimensions);
+  dtype_Y = GetTypeAndShape(Y, shape_Y);
+  dtype_C = has_bias ? GetTypeAndShape(input_C, shape_C)
+                     : ONNX_NAMESPACE::TensorProto_DataType_FLOAT;
+  return ComputeGemm(ctx, n_inputs, has_bias, has_scales, dtype_A, dtype_B, dtype_C,
+                     dtype_Y, shape_A, shape_B, shape_C, shape_Y, transA_, transB_,
+                     input_A->DataRaw(), input_B->DataRaw(),
+                     has_bias ? input_C->DataRaw() : nullptr,
+                     has_scales ? scale_A->DataRaw() : nullptr,
+                     has_scales ? scale_B->DataRaw() : nullptr,
+                     has_scales && scale_Y != nullptr ? scale_Y->DataRaw() : nullptr,
+                     Y->MutableDataRaw(), M, N, K, lda, ldb, ldd, true);
+}
+
+Status GemmFloat8::ComputeColMajor(
+    OpKernelContext* ctx, int n_inputs, bool has_bias, bool has_scales,
+    const Tensor* input_A, const Tensor* input_B,
+    const Tensor* input_C, const Tensor* scale_A,
+    const Tensor* scale_B, const Tensor* scale_Y) const {
+  TensorShape shape_A, shape_B, shape_C, shape_Y;
+  int32_t dtype_A, dtype_B, dtype_C, dtype_Y;
+  dtype_A = GetTypeAndShape(input_A, shape_A);
+  dtype_B = GetTypeAndShape(input_B, shape_B);
+
+  int M, N, K, lda, ldb, ldd;
+  SetParams(shape_A, shape_B, M, N, K, lda, ldb, ldd);
+
+  std::swap(shape_A[0], shape_A[1]);
+  std::swap(shape_B[0], shape_B[1]);
+
+  TensorShape dimensions{M, N};
+  Tensor* Y = ctx->Output(0, dimensions);
+  dtype_Y = GetTypeAndShape(Y, shape_Y);
+  dtype_C = has_bias ? GetTypeAndShape(input_C, shape_C, true)
+                     : ONNX_NAMESPACE::TensorProto_DataType_FLOAT;
+
+  return ComputeGemm(ctx, n_inputs, has_bias, has_scales, dtype_B, dtype_A, dtype_C,
+                     dtype_Y, shape_B, shape_A, shape_C, shape_Y, transB_, transA_,
+                     input_B->DataRaw(), input_A->DataRaw(),
+                     has_bias ? input_C->DataRaw() : nullptr,
+                     has_scales ? scale_B->DataRaw() : nullptr,
+                     has_scales ? scale_A->DataRaw() : nullptr,
+                     has_scales && scale_Y != nullptr ? scale_Y->DataRaw() : nullptr,
+                     Y->MutableDataRaw(), N, M, K, ldb, lda, ldd, false);
+}
+
+Status GemmFloat8::ComputeGemm(
+    OpKernelContext* ctx, int n_inputs, bool has_bias, bool has_scales,
+    int32_t dtype_A, int32_t dtype_B,
+    int32_t dtype_C, int32_t dtype_Y,
+    const TensorShape& shape_A, const TensorShape& shape_B,
+    const TensorShape& shape_C, const TensorShape& shape_Y,
+    bool trans_A, bool trans_B, const void* p_input_a, const void* p_input_b,
+    const void* p_input_c, const void* p_scale_a, const void* p_scale_b,
+    const void* p_scale_y, void* p_output_y, int M, int N, int K, int lda,
+    int ldb, int ldd, bool row_major_compute) const {
+  cudaStream_t stream = Stream(ctx);
+  CUDA_RETURN_IF_ERROR(cudaStreamSynchronize(stream));
+
+  cublasLtHandle_t cublasLt;
+  CUBLAS_RETURN_IF_ERROR(cublasLtCreate(&cublasLt));
+
+  cublasLtMatmulDesc_t operationDesc = nullptr;
+  cublasLtMatrixLayout_t Adesc = nullptr, Bdesc = nullptr, Cdesc = nullptr,
+                         Ddesc = nullptr;
+
+  // Create matrix descriptors. Not setting any extra attributes.
+  cudaDataType_t a_cuda_type = onnxruntime::cuda::ToCudaDataType(dtype_A);
+  cudaDataType_t b_cuda_type = onnxruntime::cuda::ToCudaDataType(dtype_B);
+  cudaDataType_t d_cuda_type = onnxruntime::cuda::ToCudaDataType(dtype_Y);
+  cudaDataType_t scale_cuda_type =
+      onnxruntime::cuda::ToCudaDataType(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT);
+  cudaDataType_t bias_cuda_type = onnxruntime::cuda::ToCudaDataType(dtype_C);
+
+  cublasComputeType_t compute_type;
+  switch (d_cuda_type) {
+    case CUDA_R_16F:
+      switch (a_cuda_type) {
+#if !defined(DISABLE_FLOAT8_TYPES)
+#if CUDA_VERSION < 11080
+#error CUDA_R_8F_E4M3 (float 8 types) is defined with CUDA>=11.8. Set flag DISABLE_FLOAT8_TYPES.
+#endif
+        case CUDA_R_8F_E4M3:
+        case CUDA_R_8F_E5M2:
+          compute_type = CUBLAS_COMPUTE_32F_FAST_TF32;
+          break;
+#endif
+        default:
+          compute_type = CUBLAS_COMPUTE_32F_FAST_16F;
+          break;
+      }
+      break;
+    case CUDA_R_16BF:
+      compute_type = CUBLAS_COMPUTE_32F_FAST_16BF;
+      break;
+    case CUDA_R_32F:
+      compute_type = CUBLAS_COMPUTE_32F_FAST_TF32;
+      break;
+    default:
+      ORT_THROW("Unable to determine computeType in operator GemmFloat8.");
+  }
+
+  CUBLAS_RETURN_IF_ERROR(cublasLtMatrixLayoutCreate(
+      &Adesc, a_cuda_type, trans_A ? K : M, trans_A ? M : K, lda));
+  CUBLAS_RETURN_IF_ERROR(cublasLtMatrixLayoutCreate(
+      &Bdesc, b_cuda_type, trans_B ? N : K, trans_B ? K : N, ldb));
+  CUBLAS_RETURN_IF_ERROR(
+      cublasLtMatrixLayoutCreate(&Ddesc, d_cuda_type, M, N, ldd));
+
+  if (row_major_compute) {
+    cublasLtOrder_t matrixOrder = CUBLASLT_ORDER_ROW;
+    CUBLAS_RETURN_IF_ERROR(
+        cublasLtMatrixLayoutSetAttribute(Adesc, CUBLASLT_MATRIX_LAYOUT_ORDER,
+                                         &matrixOrder, sizeof(matrixOrder)));
+    CUBLAS_RETURN_IF_ERROR(
+        cublasLtMatrixLayoutSetAttribute(Bdesc, CUBLASLT_MATRIX_LAYOUT_ORDER,
+                                         &matrixOrder, sizeof(matrixOrder)));
+  }
+
+  CUBLAS_RETURN_IF_ERROR(
+      cublasLtMatmulDescCreate(&operationDesc, compute_type, scale_cuda_type));
+  cublasOperation_t ctransa = trans_A ? CUBLAS_OP_T : CUBLAS_OP_N;
+  cublasOperation_t ctransb = trans_B ? CUBLAS_OP_T : CUBLAS_OP_N;
+  CUBLAS_RETURN_IF_ERROR(cublasLtMatmulDescSetAttribute(
+      operationDesc, CUBLASLT_MATMUL_DESC_TRANSA, &ctransa, sizeof(ctransa)));
+  CUBLAS_RETURN_IF_ERROR(cublasLtMatmulDescSetAttribute(
+      operationDesc, CUBLASLT_MATMUL_DESC_TRANSB, &ctransb, sizeof(ctransb)));
+
+  if (sm_count_ != 0) {
+    int math_sm_count = static_cast<int>(sm_count_);
+    CUBLAS_RETURN_IF_ERROR(cublasLtMatmulDescSetAttribute(
+        operationDesc, CUBLASLT_MATMUL_DESC_SM_COUNT_TARGET, &math_sm_count,
+        sizeof(math_sm_count)));
+  }
+
+  if (has_scales) {
+    // gemm float 8
+    const int8_t ifast_accumulation_mode = 1;
+    CUBLAS_RETURN_IF_ERROR(cublasLtMatmulDescSetAttribute(
+        operationDesc,
+        cublasLtMatmulDescAttributes_t::CUBLASLT_MATMUL_DESC_FAST_ACCUM,
+        &ifast_accumulation_mode, sizeof(ifast_accumulation_mode)));
+    CUBLAS_RETURN_IF_ERROR(cublasLtMatmulDescSetAttribute(
+        operationDesc, CUBLASLT_MATMUL_DESC_A_SCALE_POINTER, &p_scale_a,
+        sizeof(p_scale_a)));
+    CUBLAS_RETURN_IF_ERROR(cublasLtMatmulDescSetAttribute(
+        operationDesc, CUBLASLT_MATMUL_DESC_B_SCALE_POINTER, &p_scale_b,
+        sizeof(p_scale_b)));
+    CUBLAS_RETURN_IF_ERROR(cublasLtMatmulDescSetAttribute(
+        operationDesc, CUBLASLT_MATMUL_DESC_D_SCALE_POINTER, &p_scale_y,
+        sizeof(p_scale_b)));
+
+    // float 8
+#if !defined(DISABLE_FLOAT8_TYPES)
+    if (dtype_Y == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E4M3FN ||
+        dtype_Y == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E5M2) {
+      // For FP8 output, cuBLAS requires C_type to be same as bias_type
+      CUBLAS_RETURN_IF_ERROR(
+          cublasLtMatrixLayoutCreate(&Cdesc, bias_cuda_type, M, N, ldd));
+      CUBLAS_RETURN_IF_ERROR(cublasLtMatmulDescSetAttribute(
+          operationDesc, CUBLASLT_MATMUL_DESC_BIAS_DATA_TYPE, &bias_cuda_type,
+          sizeof(bias_cuda_type)));
+    } else {
+      CUBLAS_RETURN_IF_ERROR(
+          cublasLtMatrixLayoutCreate(&Cdesc, d_cuda_type, M, N, ldd));
+    }
+#else
+    CUBLAS_RETURN_IF_ERROR(
+        cublasLtMatrixLayoutCreate(&Cdesc, d_cuda_type, M, N, ldd));
+#endif
+  } else {
+    CUBLAS_RETURN_IF_ERROR(
+        cublasLtMatrixLayoutCreate(&Cdesc, d_cuda_type, M, N, ldd));
+  }
+
+  if (row_major_compute) {
+    cublasLtOrder_t matrixOrder = CUBLASLT_ORDER_ROW;
+    CUBLAS_RETURN_IF_ERROR(
+        cublasLtMatrixLayoutSetAttribute(Cdesc, CUBLASLT_MATRIX_LAYOUT_ORDER,
+                                         &matrixOrder, sizeof(matrixOrder)));
+    CUBLAS_RETURN_IF_ERROR(
+        cublasLtMatrixLayoutSetAttribute(Ddesc, CUBLASLT_MATRIX_LAYOUT_ORDER,
+                                         &matrixOrder, sizeof(matrixOrder)));
+  }
+
+  cublasLtMatmulDescSetAttribute(operationDesc, CUBLASLT_MATMUL_DESC_EPILOGUE,
+                                 &epilogue_, sizeof(epilogue_));
+
+  // See
+  // https://docs.nvidia.com/cuda/cublas/index.html?highlight=cublasLtMatmulPreferenceAttributes_t#cublasltmatmulpreferenceattributes-t
+  // The workspace should be allocated once from OpKernelContext assuming
+  // only one cuda function is running at a time (which is not necessarily true
+  // with H100).
+  size_t workspaceSize = static_cast<size_t>(1 << 25);  // suggested fixed value 32Mb
+  cublasLtMatmulPreference_t preference = nullptr;
+  cublasLtMatmulPreferenceCreate(&preference);
+  cublasLtMatmulPreferenceSetAttribute(preference,
+                                       CUBLASLT_MATMUL_PREF_MAX_WORKSPACE_BYTES,
+                                       &workspaceSize, sizeof(workspaceSize));
+
+  // https://docs.nvidia.com/cuda/cublas/index.html?highlight=cublasLtMatmulAlgoGetHeuristic#cublasltmatmulalgogetheuristic
+  cublasLtMatmulHeuristicResult_t heuristicResult = {};
+  int returnedResults = 0;
+  cublasStatus_t cuda_status = cublasLtMatmulAlgoGetHeuristic(
+      cublasLt, operationDesc, Adesc, Bdesc, Cdesc, Ddesc, preference, 1,
+      &heuristicResult, &returnedResults);
+  ORT_ENFORCE(
+      returnedResults > 0 && cuda_status == CUBLAS_STATUS_SUCCESS,
+      " Unable to find any suitable algorithm due to ",
+      onnxruntime::cuda::cublasGetErrorEnum(cuda_status),
+      ", returnedResults=", returnedResults,
+      ", alpha=", alpha_, ", beta=", beta_, ", n_inputs=", n_inputs,
+      ", A_type=", onnxruntime::cuda::CudaDataTypeToString(a_cuda_type),
+      ", B_type=", onnxruntime::cuda::CudaDataTypeToString(b_cuda_type),
+      ", C_type=", onnxruntime::cuda::CudaDataTypeToString(bias_cuda_type),
+      ", result_type=", onnxruntime::cuda::CudaDataTypeToString(d_cuda_type),
+      ", bias_type=", onnxruntime::cuda::CudaDataTypeToString(bias_cuda_type),
+      ", scale_type=", onnxruntime::cuda::CudaDataTypeToString(scale_cuda_type),
+      ", computeType=", onnxruntime::cuda::CublasComputeTypeToString(compute_type),
+      ", epilogue=", epilogue_, ", smCount=", sm_count_, ", transA=", trans_A,
+      ", transB=", trans_B,
+      ", fastAccumulationMode=", 1,
+      ", shape_A=", shape_A[0], "x", shape_A[1], ", shape_B=", shape_B[0], "x",
+      shape_B[1], ", shape_C=", (shape_C.NumDimensions() > 0 ? shape_C[0] : 0), "x",
+      (shape_C.NumDimensions() > 1 ? shape_C[1] : 0), ", M=", M, ", N=", N, ", K=", K,
+      ", lda=", lda, ", ldb=", ldb, ", ldd=", ldd,
+      ", workspaceSize=", workspaceSize, ", rowMajorCompute=", (row_major_compute ? 1 : 0),
+      ". Check NVIDIA documentation to see what combination is valid: ",
+      "https://docs.nvidia.com/cuda/cublas/"
+      "index.html?highlight=cublasLtMatmulAlgoGetHeuristic#"
+      "cublasltmatmulalgogetheuristic. CUDA>=11.8 is required to use float 8 types.");
+
+  void* workspace = nullptr;
+  if (workspaceSize > 0) {
+    CUDA_RETURN_IF_ERROR(cudaMalloc(reinterpret_cast<void**>(&workspace), workspaceSize));
+  }
+  // https://docs.nvidia.com/cuda/cublas/index.html?highlight=cublasLtMatmul#cublasltmatmul
+  const void* bias = has_bias ? p_input_c : p_output_y;
+  cuda_status = cublasLtMatmul(
+      cublasLt, operationDesc, static_cast<const void*>(&alpha_), /* alpha */
+      p_input_a,                                                  /* A */
+      Adesc, p_input_b,                                           /* B */
+      Bdesc, static_cast<const void*>(&beta_),                    /* beta */
+      bias,                                                       /* C */
+      Cdesc, p_output_y,                                          /* Y */
+      Ddesc, &heuristicResult.algo,                               /* algo */
+      workspace,                                                  /* workspace */
+      workspaceSize, stream);                                     /* stream */
+  ORT_ENFORCE(
+      cuda_status == CUBLAS_STATUS_SUCCESS,
+      " Unable to run cublasLtMatmul due to ",
+      onnxruntime::cuda::cublasGetErrorEnum(cuda_status),
+      ", returnedResults=", returnedResults, ", alpha=", alpha_,
+      ", n_inputs=", n_inputs, ", A_type=",
+      onnxruntime::cuda::CudaDataTypeToString(a_cuda_type),
+      ", B_type=", onnxruntime::cuda::CudaDataTypeToString(b_cuda_type),
+      ", result_type=", onnxruntime::cuda::CudaDataTypeToString(d_cuda_type),
+      ", bias_type=", onnxruntime::cuda::CudaDataTypeToString(bias_cuda_type),
+      ", scale_type=", onnxruntime::cuda::CudaDataTypeToString(scale_cuda_type),
+      ", computeType=", onnxruntime::cuda::CublasComputeTypeToString(compute_type),
+      ", epilogue=", epilogue_, ", smCount=", sm_count_, ", transA=", trans_A,
+      ", transB=", trans_B,
+      ", fastAccumulationMode=", 1,
+      ", shape_A=", shape_A[0], "x", shape_A[1], ", shape_B=", shape_B[0], "x",
+      shape_B[1], ", M=", M, ", N=", N, ", K=", K, ", lda=", lda, ", ldb=", ldb,
+      ", ldd=", ldd, ", workspaceSize=", workspaceSize,
+      ", rowMajorCompute=", (row_major_compute ? 1 : 0),
+      ". CUDA>=11.8 is required to use float 8 types.");
+
+  if (workspaceSize > 0) {
+    CUDA_RETURN_IF_ERROR(cudaFree(workspace));
+  }
+
+  CUBLAS_RETURN_IF_ERROR(cublasLtMatmulPreferenceDestroy(preference));
+  CUBLAS_RETURN_IF_ERROR(cublasLtMatrixLayoutDestroy(Ddesc));
+  CUBLAS_RETURN_IF_ERROR(cublasLtMatrixLayoutDestroy(Cdesc));
+  CUBLAS_RETURN_IF_ERROR(cublasLtMatrixLayoutDestroy(Bdesc));
+  CUBLAS_RETURN_IF_ERROR(cublasLtMatrixLayoutDestroy(Adesc));
+  CUBLAS_RETURN_IF_ERROR(cublasLtMatmulDescDestroy(operationDesc));
+  CUBLAS_RETURN_IF_ERROR(cublasLtDestroy(cublasLt));
+  return Status::OK();
+}
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/math/gemm_float8.h b/onnxruntime/contrib_ops/cuda/math/gemm_float8.h
new file mode 100644
index 0000000000000..e84ccd55b2003
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/math/gemm_float8.h
@@ -0,0 +1,65 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "cublas_v2.h"
+#include "core/providers/cuda/cuda_kernel.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+// Calls https://docs.nvidia.com/cuda/cublas/index.html#cublasltmatmul.
+// D = alpha*(A*B)
+class GemmFloat8 final : public onnxruntime::cuda::CudaKernel {
+ public:
+  GemmFloat8(const OpKernelInfo& info);
+
+  Status ComputeInternal(OpKernelContext* context) const override;
+
+ private:
+  void SetParams(const TensorShape& shape_a,
+                 const TensorShape& shape_b,
+                 int& M, int& N, int& K,
+                 int& lda, int& ldb, int& ldd) const;
+  Status SetCheck(const TensorShape& shape_a,
+                  const TensorShape& shape_b,
+                  int& M, int& N, int& K) const;
+
+  Status ComputeRowMajor(OpKernelContext* ctx, int n_inputs, bool has_bias,
+                         bool has_scales, const Tensor* input_A,
+                         const Tensor* input_B, const Tensor* input_C,
+                         const Tensor* scale_A, const Tensor* scale_B,
+                         const Tensor* scale_Y) const;
+  Status ComputeColMajor(OpKernelContext* ctx, int n_inputs, bool has_bias,
+                         bool has_scales, const Tensor* input_A,
+                         const Tensor* input_B, const Tensor* input_C,
+                         const Tensor* scale_A, const Tensor* scale_B,
+                         const Tensor* scale_Y) const;
+
+  Status ComputeGemm(
+      OpKernelContext* ctx, int n_inputs, bool has_bias, bool has_scales,
+      int32_t dtype_A, int32_t dtype_b,
+      int32_t dtype_c, int32_t dtype_Y,
+      const TensorShape& shape_A, const TensorShape& shape_B,
+      const TensorShape& shape_C, const TensorShape& shape_Y,
+      bool transa, bool transb, const void* p_input_a, const void* p_input_b,
+      const void* p_input_c, const void* p_scale_a, const void* p_scale_b,
+      const void* p_scale_y, void* p_output_y, int M, int N, int K, int lda,
+      int ldb, int ldd, bool row_major_compute) const;
+
+  float alpha_;
+  float beta_;
+  bool transA_;
+  bool transB_;
+  int64_t sm_count_;
+  int64_t dtype_;
+  cublasLtEpilogue_t epilogue_;
+
+  // TODO(xadupre): add epilogue (= activation function, Relu or Gelu are available).
+};
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/moe/ft_moe/compute_occupancy.h b/onnxruntime/contrib_ops/cuda/moe/ft_moe/compute_occupancy.h
new file mode 100644
index 0000000000000..86136ea244e23
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/moe/ft_moe/compute_occupancy.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cuda_runtime_api.h>
+
+#include "core/providers/cuda/shared_inc/cuda_call.h"
+#include "cutlass/device_kernel.h"
+
+using namespace onnxruntime;
+
+namespace ort_fastertransformer {
+
+template <typename GemmKernel>
+inline int compute_occupancy_for_kernel() {
+  int smem_size = int(sizeof(typename GemmKernel::SharedStorage));
+
+  if (smem_size > (48 << 10)) {
+    cudaError_t status =
+        cudaFuncSetAttribute(cutlass::Kernel<GemmKernel>, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size);
+    if (status == cudaError::cudaErrorInvalidValue) {
+      // Clear the error bit since we can ignore this.
+      // This should mean that smem_size > cudaDevAttrMaxSharedMemoryPerBlockOptin. In that case, we return an
+      // occupancy of 0. This will cause the heuristic to ignore this configuration.
+      status = cudaGetLastError();
+      return 0;
+    }
+    CUDA_CALL_THROW(status);
+  }
+
+  int max_active_blocks = -1;
+  CUDA_CALL_THROW(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&max_active_blocks, cutlass::Kernel<GemmKernel>,
+                                                                GemmKernel::kThreadCount, smem_size));
+
+  return max_active_blocks;
+}
+
+}  // namespace ort_fastertransformer
diff --git a/onnxruntime/contrib_ops/cuda/moe/ft_moe/cutlass_heuristic.cc b/onnxruntime/contrib_ops/cuda/moe/ft_moe/cutlass_heuristic.cc
new file mode 100644
index 0000000000000..5d4c6793ec995
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/moe/ft_moe/cutlass_heuristic.cc
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cutlass_heuristic.h"
+
+#include <cuda_runtime_api.h>
+#include <vector>
+#include <stdexcept>
+
+namespace ort_fastertransformer {
+
+struct TileShape {
+  int m;
+  int n;
+};
+
+TileShape get_cta_shape_for_config(CutlassTileConfig tile_config) {
+  switch (tile_config) {
+    case CutlassTileConfig::CtaShape32x128x64_WarpShape32x32x64:
+      return TileShape{32, 128};
+    case CutlassTileConfig::CtaShape64x128x64_WarpShape32x64x64:
+    case CutlassTileConfig::CtaShape64x128x64_WarpShape64x32x64:
+      return TileShape{64, 128};
+    case CutlassTileConfig::CtaShape128x128x8_WarpShape64x64x8:
+    case CutlassTileConfig::CtaShape128x128x64_WarpShape64x32x64:
+    case CutlassTileConfig::CtaShape128x128x64_WarpShape128x32x64:
+      return TileShape{128, 128};
+    default:
+      ORT_THROW("[FT Error][get_grid_shape_for_config] Invalid config");
+  }
+}
+
+bool is_valid_split_k_factor(const int64_t m, const int64_t n, const int64_t k, const TileShape tile_shape,
+                             const int split_k_factor, const size_t workspace_bytes, const bool is_weight_only) {
+  // All tile sizes have a k_tile of 64.
+  static constexpr int k_tile = 64;
+
+  // For weight-only quant, we need k and k_elements_per_split to be a multiple of cta_k
+  if (is_weight_only) {
+    if ((k % k_tile) != 0) {
+      return false;
+    }
+
+    if ((k % split_k_factor) != 0) {
+      return false;
+    }
+
+    const int k_elements_per_split = static_cast<int>(k / split_k_factor);
+    if ((k_elements_per_split % k_tile) != 0) {
+      return false;
+    }
+  }
+
+  // Check that the workspace has sufficient space for this split-k factor
+  const int ctas_in_m_dim = static_cast<int>((m + tile_shape.m - 1) / tile_shape.m);
+  const int ctas_in_n_dim = static_cast<int>((n + tile_shape.n - 1) / tile_shape.n);
+  const int required_ws_bytes = split_k_factor == 1 ? 0 : sizeof(int) * ctas_in_m_dim * ctas_in_n_dim;
+
+  if (required_ws_bytes > workspace_bytes) {
+    return false;
+  }
+
+  return true;
+}
+
+std::vector<CutlassTileConfig> get_candidate_tiles(const bool is_weight_only, const bool simt_configs_only) {
+  std::vector<CutlassTileConfig> simt_configs{CutlassTileConfig::CtaShape128x128x8_WarpShape64x64x8};
+
+  std::vector<CutlassTileConfig> square_configs{CutlassTileConfig::CtaShape32x128x64_WarpShape32x32x64,
+                                                CutlassTileConfig::CtaShape64x128x64_WarpShape32x64x64,
+                                                CutlassTileConfig::CtaShape128x128x64_WarpShape64x32x64};
+
+  std::vector<CutlassTileConfig> quant_B_configs{CutlassTileConfig::CtaShape32x128x64_WarpShape32x32x64,
+                                                 CutlassTileConfig::CtaShape64x128x64_WarpShape64x32x64,
+                                                 CutlassTileConfig::CtaShape128x128x64_WarpShape128x32x64};
+
+  const std::vector<CutlassTileConfig> allowed_configs = is_weight_only ? quant_B_configs : square_configs;
+  return simt_configs_only ? simt_configs : allowed_configs;
+}
+
+std::vector<CutlassGemmConfig> get_candidate_configs(int sm, const bool is_weight_only, const bool simt_configs_only) {
+  std::vector<CutlassTileConfig> tiles = get_candidate_tiles(is_weight_only, simt_configs_only);
+
+  std::vector<CutlassGemmConfig> candidate_configs;
+  const int min_stages = 2;
+  const int max_stages = sm >= 80 ? 4 : 2;
+
+  for (const auto& tile_config : tiles) {
+    for (int stages = min_stages; stages <= max_stages; ++stages) {
+      CutlassGemmConfig config{tile_config, SplitKStyle::NO_SPLIT_K, 1, stages};
+      candidate_configs.push_back(config);
+    }
+  }
+
+  return candidate_configs;
+}
+
+CutlassGemmConfig estimate_best_config_from_occupancies(const std::vector<CutlassGemmConfig>& candidate_configs,
+                                                        const std::vector<int>& occupancies, const int64_t m,
+                                                        const int64_t n, const int64_t k, const int64_t,
+                                                        const int split_k_limit, const size_t workspace_bytes,
+                                                        const int multi_processor_count, const int is_weight_only) {
+  if (occupancies.size() != candidate_configs.size()) {
+    ORT_THROW(
+        "[FT Error][estimate_best_config_from_occupancies] occpancies and "
+        "candidate configs vectors must have equal length.");
+  }
+
+  CutlassGemmConfig best_config;
+  // Score will be [0, 1]. The objective is to minimize this score.
+  // It represents the fraction of SM resources unused in the last wave.
+  float config_score = 1.0f;
+  int config_waves = INT_MAX;
+  int current_m_tile = 0;
+
+  const int max_split_k = n >= multi_processor_count * 256 ? 1 : split_k_limit;
+  for (int ii = 0; ii < candidate_configs.size(); ++ii) {
+    CutlassGemmConfig candidate_config = candidate_configs[ii];
+    TileShape tile_shape = get_cta_shape_for_config(candidate_config.tile_config);
+    int occupancy = occupancies[ii];
+
+    if (occupancy == 0) {
+      continue;
+    }
+
+    // Keep small tile sizes when possible.
+    if (best_config.tile_config != CutlassTileConfig::ChooseWithHeuristic && m < current_m_tile &&
+        current_m_tile < tile_shape.m) {
+      continue;
+    }
+
+    const int ctas_in_m_dim = static_cast<int>((m + tile_shape.m - 1) / tile_shape.m);
+    const int ctas_in_n_dim = static_cast<int>((n + tile_shape.n - 1) / tile_shape.n);
+
+    for (int split_k_factor = 1; split_k_factor <= max_split_k; ++split_k_factor) {
+      if (is_valid_split_k_factor(m, n, k, tile_shape, split_k_factor, workspace_bytes, is_weight_only)) {
+        const int ctas_per_wave = occupancy * multi_processor_count;
+        const int ctas_for_problem = ctas_in_m_dim * ctas_in_n_dim * split_k_factor;
+
+        const int num_waves_total = (ctas_for_problem + ctas_per_wave - 1) / ctas_per_wave;
+        const float num_waves_fractional = ctas_for_problem / float(ctas_per_wave);
+        const float current_score = float(num_waves_total) - num_waves_fractional;
+
+        const float score_slack = 0.1f;
+        if (current_score < config_score ||
+            ((config_waves > num_waves_total) && (current_score < config_score + score_slack))) {
+          config_score = current_score;
+          config_waves = num_waves_total;
+          SplitKStyle split_style = split_k_factor > 1 ? SplitKStyle::SPLIT_K_SERIAL : SplitKStyle::NO_SPLIT_K;
+          best_config =
+              CutlassGemmConfig{candidate_config.tile_config, split_style, split_k_factor, candidate_config.stages};
+          current_m_tile = tile_shape.m;
+        } else if (current_score == config_score &&
+                   (best_config.stages < candidate_config.stages || split_k_factor < best_config.split_k_factor ||
+                    current_m_tile < tile_shape.m)) {
+          // Prefer deeper pipeline or smaller split-k
+          SplitKStyle split_style = split_k_factor > 1 ? SplitKStyle::SPLIT_K_SERIAL : SplitKStyle::NO_SPLIT_K;
+          best_config =
+              CutlassGemmConfig{candidate_config.tile_config, split_style, split_k_factor, candidate_config.stages};
+          current_m_tile = tile_shape.m;
+          config_waves = num_waves_total;
+        }
+      }
+    }
+  }
+
+  if (best_config.tile_config == CutlassTileConfig::ChooseWithHeuristic) {
+    ORT_THROW("[FT Error] Heurisitc failed to find a valid config.");
+  }
+
+  return best_config;
+}
+
+}  // namespace ort_fastertransformer
diff --git a/onnxruntime/contrib_ops/cuda/moe/ft_moe/cutlass_heuristic.h b/onnxruntime/contrib_ops/cuda/moe/ft_moe/cutlass_heuristic.h
new file mode 100644
index 0000000000000..e70efe0503b55
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/moe/ft_moe/cutlass_heuristic.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "ft_gemm_configs.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <vector>
+
+#include "core/common/common.h"
+
+using namespace onnxruntime;
+
+namespace ort_fastertransformer {
+
+std::vector<CutlassGemmConfig> get_candidate_configs(int sm, const bool is_weight_only, const bool simt_configs_only);
+
+CutlassGemmConfig estimate_best_config_from_occupancies(const std::vector<CutlassGemmConfig>& candidate_configs,
+                                                        const std::vector<int>& occupancies, const int64_t m,
+                                                        const int64_t n, const int64_t k, const int64_t num_experts,
+                                                        const int split_k_limit, const size_t workspace_bytes,
+                                                        const int multi_processor_count, const int is_weight_only);
+
+}  // namespace ort_fastertransformer
diff --git a/onnxruntime/contrib_ops/cuda/moe/ft_moe/epilogue_helpers.h b/onnxruntime/contrib_ops/cuda/moe/ft_moe/epilogue_helpers.h
new file mode 100644
index 0000000000000..78d206bf1d9bc
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/moe/ft_moe/epilogue_helpers.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file epilogue_helpers.h
+ *
+ * This file includes types for the epilogues. The empty structs exist so we can signal to template
+ * code the type of epilogue we want to run, and let the underlying code specify the details such as
+ * element types, accumulator type and elements per vector access.
+ *
+ */
+
+#pragma once
+
+#include "cutlass/array.h"
+#include "cutlass/cutlass.h"
+#include "cutlass/epilogue/thread/activation.h"
+#include "cutlass/epilogue/thread/scale_type.h"
+#include "cutlass/functional.h"
+#include "cutlass/half.h"
+#include "cutlass/numeric_conversion.h"
+#include "cutlass/numeric_types.h"
+#include "cutlass/epilogue/thread/linear_combination.h"
+#include "cutlass/epilogue/thread/linear_combination_generic.h"
+#include "cutlass/epilogue/thread/linear_combination_relu.h"
+#include "cutlass/epilogue/thread/linear_combination_silu.h"
+
+namespace cutlass {
+namespace epilogue {
+namespace thread {
+
+__forceinline__ __device__ float copysignf_pos(float a, float b) {
+  float r;
+  r = __int_as_float(__float_as_int(a) | (__float_as_int(b) & 0x80000000));
+  return r;
+}
+
+__forceinline__ __device__ float tanh_opt(float x) {
+#if (__CUDACC_VER_MAJOR__ < 11) || (__CUDA_ARCH__ < 750)
+  const float exp_val = -1.f * fabs(2 * x);
+  return copysignf_pos((1.0f - __expf(exp_val)) / (__expf(exp_val) + 1.0f), x);
+#else
+  return fast_tanh(x);
+#endif
+}
+
+template <>
+struct GELU_taylor<float> {
+  static const bool kIsHeavy = true;
+  CUTLASS_DEVICE
+  float operator()(float const& z) const {
+    float k0 = float(0.7978845608028654);
+    float k1 = float(0.044715);
+
+    return float(
+        cutlass::constants::half<float>() * z *
+        (cutlass::constants::one<float>() + tanh_opt(k0 * z * (cutlass::constants::one<float>() + k1 * z * z))));
+  }
+
+  using Params = LinearCombinationGenericParams<float>;
+
+  CUTLASS_DEVICE
+  float operator()(float const& scalar, Params const& params_) const { return this->operator()(scalar); }
+};
+
+}  // namespace thread
+}  // namespace epilogue
+}  // namespace cutlass
+
+namespace ort_fastertransformer {
+
+struct EpilogueOpBiasSilu {};
+
+struct EpilogueOpBiasReLU {};
+
+struct EpilogueOpBiasFtGelu {};
+
+struct EpilogueOpBias {};
+
+struct EpilogueOpNoBias {};
+
+template <typename ElementType, int ElementsPerVectorAccess, typename ElementAccumulator, typename Op>
+struct Epilogue {};
+
+template <typename ElementType, int ElementsPerVectorAccess, typename ElementAccumulator>
+struct Epilogue<ElementType, ElementsPerVectorAccess, ElementAccumulator, EpilogueOpBiasSilu> {
+  using Op = cutlass::epilogue::thread::LinearCombinationSilu<ElementType, ElementsPerVectorAccess, ElementAccumulator,
+                                                              ElementAccumulator,
+                                                              cutlass::epilogue::thread::ScaleType::NoBetaScaling>;
+};
+
+template <typename ElementType, int ElementsPerVectorAccess, typename ElementAccumulator>
+struct Epilogue<ElementType, ElementsPerVectorAccess, ElementAccumulator, EpilogueOpBiasReLU> {
+  using Op = cutlass::epilogue::thread::LinearCombinationRelu<ElementType, ElementsPerVectorAccess, ElementAccumulator,
+                                                              ElementAccumulator,
+                                                              cutlass::epilogue::thread::ScaleType::NoBetaScaling>;
+};
+
+template <typename ElementType, int ElementsPerVectorAccess, typename ElementAccumulator>
+struct Epilogue<ElementType, ElementsPerVectorAccess, ElementAccumulator, EpilogueOpBiasFtGelu> {
+  using Op = cutlass::epilogue::thread::LinearCombinationGeneric<
+      cutlass::epilogue::thread::GELU_taylor, ElementType, ElementsPerVectorAccess, ElementAccumulator,
+      ElementAccumulator, cutlass::epilogue::thread::ScaleType::NoBetaScaling,
+      cutlass::FloatRoundStyle::round_to_nearest, true>;
+};
+
+template <typename ElementType, int ElementsPerVectorAccess, typename ElementAccumulator>
+struct Epilogue<ElementType, ElementsPerVectorAccess, ElementAccumulator, EpilogueOpBias> {
+  using Op = cutlass::epilogue::thread::LinearCombination<ElementType, ElementsPerVectorAccess, ElementAccumulator,
+                                                          ElementAccumulator,
+                                                          cutlass::epilogue::thread::ScaleType::NoBetaScaling>;
+};
+
+template <typename ElementType, int ElementsPerVectorAccess, typename ElementAccumulator>
+struct Epilogue<ElementType, ElementsPerVectorAccess, ElementAccumulator, EpilogueOpNoBias> {
+  using Op =
+      cutlass::epilogue::thread::LinearCombination<ElementType, ElementsPerVectorAccess, ElementAccumulator,
+                                                   ElementAccumulator, cutlass::epilogue::thread::ScaleType::Default>;
+};
+
+}  // namespace ort_fastertransformer
diff --git a/onnxruntime/contrib_ops/cuda/moe/ft_moe/ft_gemm_configs.h b/onnxruntime/contrib_ops/cuda/moe/ft_moe/ft_gemm_configs.h
new file mode 100644
index 0000000000000..a5faad423fad9
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/moe/ft_moe/ft_gemm_configs.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+namespace ort_fastertransformer {
+// Note: The shapes are in the format MxNxK. The K shape of the runtime config MUST match the K shape
+//       in the kernel layout details when doing weight only quantization.
+enum class CutlassTileConfig {
+  // Signals that we should run heuristics do choose a config
+  Undefined,
+
+  // Signals that we should run heuristics do choose a config
+  ChooseWithHeuristic,
+
+  // SiMT config
+  CtaShape128x128x8_WarpShape64x64x8,
+
+  // TensorCore configs CTA_N = 128, CTA_K = 64
+  // Warp configs for M=32
+  CtaShape32x128x64_WarpShape32x32x64,
+
+  // Warp configs for M=64
+  CtaShape64x128x64_WarpShape32x64x64,
+  CtaShape64x128x64_WarpShape64x32x64,
+
+  // Warp configs for M=128
+  CtaShape128x128x64_WarpShape64x32x64,
+  CtaShape128x128x64_WarpShape128x32x64
+};
+
+enum class SplitKStyle {
+  NO_SPLIT_K,
+  SPLIT_K_SERIAL,
+  // SPLIT_K_PARALLEL // Not supported yet
+};
+
+struct CutlassGemmConfig {
+  CutlassTileConfig tile_config = CutlassTileConfig::ChooseWithHeuristic;
+  SplitKStyle split_k_style = SplitKStyle::NO_SPLIT_K;
+  int split_k_factor = -1;
+  int stages = -1;
+};
+
+}  // namespace ort_fastertransformer
diff --git a/onnxruntime/contrib_ops/cuda/moe/ft_moe/gemm_moe_problem_visitor.h b/onnxruntime/contrib_ops/cuda/moe/ft_moe/gemm_moe_problem_visitor.h
new file mode 100644
index 0000000000000..311ed323cb90c
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/moe/ft_moe/gemm_moe_problem_visitor.h
@@ -0,0 +1,79 @@
+/***************************************************************************************************
+ * Copyright (c) 2017 - 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ **************************************************************************************************/
+
+/*! \file
+    \brief Scheduler for grouped GEMM
+*/
+
+#pragma once
+
+#include "cutlass/cutlass.h"
+#include "cutlass/gemm/gemm.h"
+#include "cutlass/gemm/kernel/gemm_grouped_problem_visitor.h"
+#include "cutlass/matrix_coord.h"
+
+#include "moe_problem_visitor.h"
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+
+namespace cutlass {
+namespace gemm {
+namespace kernel {
+
+/// Visitor class to abstract away the algorithm for iterating over tiles
+template <typename ThreadblockShape, GroupScheduleMode GroupScheduleMode_, int PrefetchTileCount, int ThreadCount,
+          bool Transposed = false>
+struct GemmMoeProblemVisitor
+    : public MoeProblemVisitor<detail::GemmGroupedProblemSizeHelper<ThreadblockShape, Transposed>, ThreadblockShape,
+                               GroupScheduleMode_, PrefetchTileCount, ThreadCount> {
+  static bool const kTransposed = Transposed;
+
+  using ProblemSizeHelper = detail::GemmGroupedProblemSizeHelper<ThreadblockShape, Transposed>;
+  using Base =
+      MoeProblemVisitor<ProblemSizeHelper, ThreadblockShape, GroupScheduleMode_, PrefetchTileCount, ThreadCount>;
+  using Params = typename Base::Params;
+  using SharedStorage = typename Base::SharedStorage;
+
+  //
+  // Methods
+  //
+  CUTLASS_DEVICE
+  GemmMoeProblemVisitor(Params const& params_, SharedStorage& shared_storage_, int32_t block_idx)
+      : Base(params_, shared_storage_, block_idx) {}
+};
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+
+}  // namespace kernel
+}  // namespace gemm
+}  // namespace cutlass
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/onnxruntime/contrib_ops/cuda/moe/ft_moe/layout_traits_helper.h b/onnxruntime/contrib_ops/cuda/moe/ft_moe/layout_traits_helper.h
new file mode 100644
index 0000000000000..eb33a98e4246f
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/moe/ft_moe/layout_traits_helper.h
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+  This file exists so that we use the same weight layout for MoE grouped gemm and regular gemm when the weight is
+  quantized. The preprocessing code reads this template to know how to organize the quantized weight matrices
+  to be consumed by CUTLASS.
+
+  Note that for int4, ThreadBlockK MUST be 64.
+
+ */
+
+#pragma once
+
+#include "cutlass/layout/matrix.h"
+#include "cutlass/numeric_types.h"
+#include "cutlass/arch/arch.h"
+#include "cutlass/arch/mma.h"
+#include "cutlass/platform/platform.h"
+#include "cutlass/cutlass.h"
+#include "cutlass/gemm/gemm.h"
+
+namespace cutlass {
+namespace gemm {
+namespace kernel {
+
+template <typename TypeB, typename Arch, typename Enable = void>
+struct LayoutDetailsB {};
+
+// Volta specialiations. Volta will dequantize before STS, so we need a different operator
+template <typename TypeB>
+struct LayoutDetailsB<TypeB, arch::Sm70> {
+  static constexpr int ThreadblockK = 64;
+  using Layout = layout::RowMajor;
+  static constexpr int ElementsPerAccess = 8;
+  using Operator = cutlass::arch::OpMultiplyAdd;
+};
+
+// Specializations for Turing+ when B is FP16. These are currently only used for MoE networks.
+// TODO - Switch this to column major for weights since gemms should be more performant.
+template <typename Arch>
+struct LayoutDetailsB<half_t, Arch, typename platform::enable_if<Arch::kMinComputeCapability >= 75>::type> {
+  static constexpr int ThreadblockK = 64;
+  using Layout = layout::RowMajor;
+  static constexpr int ElementsPerAccess = 128 / cutlass::sizeof_bits<half_t>::value;
+  using Operator = cutlass::arch::OpMultiplyAdd;
+};
+
+template <typename TypeA, typename TypeB, typename arch, typename Enable = void>
+struct MixedGemmArchTraits {};
+
+template <typename arch>
+struct MixedGemmArchTraits<float, float, arch> {
+  static constexpr int Stages = 2;
+  using OperatorClass = cutlass::arch::OpClassSimt;
+  using AccType = float;
+  using LayoutB = cutlass::layout::RowMajor;
+
+  static constexpr int ElementsPerAccessA = 1;
+  static constexpr int ElementsPerAccessB = 1;
+  static constexpr int ElementsPerAccessC = 1;
+  static constexpr int ThreadblockK = 8;
+  using InstructionShape = cutlass::gemm::GemmShape<1, 1, 1>;
+
+  using Operator = cutlass::arch::OpMultiplyAdd;
+};
+
+// ========================= Volta Traits ===========================
+// Volta will always dequantize after the global memory load.
+// This will instantiate any HMMA tensorcore kernels for Volta.
+template <typename TypeA, typename TypeB>
+struct MixedGemmArchTraits<
+    TypeA, TypeB, cutlass::arch::Sm70,
+    typename cutlass::platform::enable_if<cutlass::platform::is_same<TypeA, cutlass::half_t>::value>::type> {
+ private:
+  using LayoutDetails = LayoutDetailsB<TypeB, cutlass::arch::Sm70>;
+
+ public:
+  static constexpr int ThreadblockK = LayoutDetails::ThreadblockK;
+
+  using OperatorClass = cutlass::arch::OpClassTensorOp;
+  using AccType = float;
+  using LayoutB = typename LayoutDetails::Layout;
+
+  static constexpr int ElementsPerAccessA = 128 / cutlass::sizeof_bits<TypeA>::value;
+  static constexpr int ElementsPerAccessB = LayoutDetails::ElementsPerAccess;
+  static constexpr int ElementsPerAccessC = 128 / cutlass::sizeof_bits<TypeA>::value;
+  using InstructionShape = cutlass::gemm::GemmShape<8, 8, 4>;
+
+  using Operator = typename LayoutDetails::Operator;
+};
+
+// ======================= Turing Traits ==============================
+template <typename TypeA, typename TypeB>
+struct MixedGemmArchTraits<
+    TypeA, TypeB, cutlass::arch::Sm75,
+    typename cutlass::platform::enable_if<cutlass::platform::is_same<TypeA, cutlass::half_t>::value>::type> {
+ private:
+  using LayoutDetails = LayoutDetailsB<TypeB, cutlass::arch::Sm75>;
+
+ public:
+  static constexpr int ThreadblockK = LayoutDetails::ThreadblockK;
+
+  using OperatorClass = cutlass::arch::OpClassTensorOp;
+  using AccType = float;
+  using LayoutB = typename LayoutDetails::Layout;
+
+  static constexpr int ElementsPerAccessA = 128 / cutlass::sizeof_bits<TypeA>::value;
+  static constexpr int ElementsPerAccessB = LayoutDetails::ElementsPerAccess;
+  static constexpr int ElementsPerAccessC = 128 / cutlass::sizeof_bits<TypeA>::value;
+  using InstructionShape = cutlass::gemm::GemmShape<16, 8, 8>;
+
+  using Operator = typename LayoutDetails::Operator;
+};
+
+// ======================= Ampere Traits ==============================
+template <typename TypeA, typename TypeB>
+struct MixedGemmArchTraits<
+    TypeA, TypeB, cutlass::arch::Sm80,
+    typename cutlass::platform::enable_if<cutlass::platform::is_same<TypeA, cutlass::half_t>::value>::type> {
+ private:
+  using LayoutDetails = LayoutDetailsB<TypeB, cutlass::arch::Sm80>;
+
+ public:
+  static constexpr int ThreadblockK = LayoutDetails::ThreadblockK;
+
+  using OperatorClass = cutlass::arch::OpClassTensorOp;
+  using AccType = float;
+  using LayoutB = typename LayoutDetails::Layout;
+
+  static constexpr int ElementsPerAccessA = 128 / cutlass::sizeof_bits<TypeA>::value;
+  static constexpr int ElementsPerAccessB = LayoutDetails::ElementsPerAccess;
+  static constexpr int ElementsPerAccessC = 128 / cutlass::sizeof_bits<TypeA>::value;
+  using InstructionShape = cutlass::gemm::GemmShape<16, 8, 16>;
+
+  using Operator = typename LayoutDetails::Operator;
+};
+
+}  // namespace kernel
+}  // namespace gemm
+}  // namespace cutlass
\ No newline at end of file
diff --git a/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_cutlass_kernel.h b/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_cutlass_kernel.h
new file mode 100644
index 0000000000000..bfe30b71170d8
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_cutlass_kernel.h
@@ -0,0 +1,463 @@
+/***************************************************************************************************
+ * Copyright (c) 2017-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are permitted
+ * provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright notice, this list of
+ *       conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright notice, this list of
+ *       conditions and the following disclaimer in the documentation and/or other materials
+ *       provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
+ *       to endorse or promote products derived from this software without specific prior written
+ *       permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ **************************************************************************************************/
+
+#pragma once
+
+#include "cutlass/complex.h"
+#include "cutlass/cutlass.h"
+#include "cutlass/fast_math.h"
+#include "cutlass/gemm/gemm.h"
+#include "cutlass/matrix_coord.h"
+#include "cutlass/semaphore.h"
+
+#include "cutlass/gemm/kernel/gemm_transpose_operands.h"
+#include "cutlass/layout/matrix.h"
+#include "cutlass/trace.h"
+
+#include "gemm_moe_problem_visitor.h"
+#include "tile_interleaved_layout.h"
+/////////////////////////////////////////////////////////////////////////////////////////////////
+
+namespace cutlass {
+namespace gemm {
+namespace kernel {
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// This section exists to that we can use the same kernel code for regular gemm and dequantizing gemms.
+// It will dispatch to the dequantizing gemm if the Mma type has an Iterator for scales in global.
+template <typename...>
+using void_t = void;
+
+template <typename Mma, typename = void>
+struct use_dq_gemm : platform::false_type {};
+
+template <typename Mma>
+struct use_dq_gemm<Mma, void_t<typename Mma::IteratorScale>> : platform::true_type {};
+
+// SFINAE overload for dequantizing gemm
+template <typename Mma, typename ElementScale, typename platform::enable_if<use_dq_gemm<Mma>::value, bool>::type = true>
+CUTLASS_DEVICE static void run_mma(Mma mma, int gemm_k_iterations, typename Mma::FragmentC& accum,
+                                   typename Mma::IteratorA iterator_A, typename Mma::IteratorB iterator_B,
+                                   typename Mma::FragmentC const& src_accum, ElementScale* weight_scale_ptr,
+                                   MatrixCoord scale_extent, const int thread_idx, MatrixCoord tb_offset_scale) {
+  typename Mma::IteratorScale iterator_scale(Mma::IteratorScale::Layout(scale_extent.column()), weight_scale_ptr,
+                                             scale_extent, thread_idx, tb_offset_scale);
+
+  mma(gemm_k_iterations, accum, iterator_A, iterator_B, iterator_scale, src_accum);
+}
+
+// SFINAE overload for normal gemm. This completely ignores the scale parameters
+template <typename Mma, typename ElementScale,
+          typename platform::enable_if<!use_dq_gemm<Mma>::value, bool>::type = true>
+CUTLASS_DEVICE static void run_mma(Mma mma, int gemm_k_iterations, typename Mma::FragmentC& accum,
+                                   typename Mma::IteratorA iterator_A, typename Mma::IteratorB iterator_B,
+                                   typename Mma::FragmentC const& src_accum, ElementScale* weight_scale_ptr,
+                                   MatrixCoord scale_extent, const int thread_idx, MatrixCoord tb_offset_scale) {
+  mma(gemm_k_iterations, accum, iterator_A, iterator_B, src_accum);
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+
+template <typename Mma_,                        ///! Threadblock-scoped matrix multiply-accumulate
+          typename Epilogue_,                   ///! Epilogue
+          typename ThreadblockSwizzle_,         ///! Threadblock swizzling function
+          typename KernelArch,                  ///! The Architecture this kernel is compiled for. Used since SIMT kernels lose
+                                                /// top-level
+                                                /// arch.
+          GroupScheduleMode GroupScheduleMode_  ///! Type of scheduling to perform
+          >
+struct MoeFCGemm {
+ public:
+  using Mma = Mma_;
+  using Epilogue = Epilogue_;
+  using EpilogueOutputOp = typename Epilogue::OutputOp;
+  using ThreadblockSwizzle = ThreadblockSwizzle_;
+  static GroupScheduleMode const kGroupScheduleMode = GroupScheduleMode_;
+  static bool const kTransposed = false;
+
+  // Optional transpose
+  using MapArguments =
+      kernel::detail::MapArguments<typename Mma::IteratorA::Element, typename Mma::IteratorA::Layout, Mma::kTransformA,
+                                   Mma::IteratorA::AccessType::kElements, typename Mma::IteratorB::Element,
+                                   typename Mma::IteratorB::Layout, Mma::kTransformB,
+                                   Mma::IteratorB::AccessType::kElements, typename Mma::LayoutC, kTransposed>;
+
+  // Public-facing type definitions related to operand element type, layout, and complex conjugate
+  // operation. Must interact with the 'kTransposed' notion.
+  static_assert(!kTransposed, "Transpose problem not supported");
+  using ElementA = typename MapArguments::ElementA;
+  using LayoutA = typename MapArguments::LayoutA;
+  using ElementB = typename MapArguments::ElementB;
+  using LayoutB = typename MapArguments::LayoutB;
+  using ElementC = typename Epilogue::OutputTileIterator::Element;
+  using LayoutC = typename MapArguments::LayoutC;
+  using ElementScale = ElementC;
+
+  static ComplexTransform const kTransformA = MapArguments::kTransformA;
+  static ComplexTransform const kTransformB = MapArguments::kTransformB;
+
+  // Type definitions about the mainloop.
+  using Operator = typename Mma::Operator;
+  using OperatorClass = typename Mma::Operator::OperatorClass;
+  using ThreadblockShape = typename Mma::Shape;
+  using WarpShape = typename Mma::Operator::Shape;
+  using InstructionShape = typename Mma::Policy::Operator::InstructionShape;
+  using ArchTag = typename Mma::ArchTag;
+
+  static int const kStages = Mma::kStages;
+  static int const kAlignmentA = MapArguments::kAlignmentA;
+  static int const kAlignmentB = MapArguments::kAlignmentB;
+  static int const kAlignmentC = Epilogue::OutputTileIterator::kElementsPerAccess;
+
+  /// Warp count (concept: GemmShape)
+  using WarpCount = typename Mma::WarpCount;
+  static int const kThreadCount = 32 * WarpCount::kCount;
+
+  using ProblemVisitor =
+      GemmMoeProblemVisitor<ThreadblockShape, kGroupScheduleMode, kThreadCount, kThreadCount, kTransposed>;
+
+  //
+  // Structures
+  //
+
+  /// Argument structure
+  struct Arguments {
+    //
+    // Data members
+    //
+
+    int problem_count;
+    int threadblock_count;
+
+    typename EpilogueOutputOp::Params output_op;
+
+    ElementA* ptr_A;
+    ElementB* ptr_B;
+    ElementScale* weight_scales;
+    ElementC* ptr_C;
+    ElementC* ptr_D;
+
+    int64_t* total_rows_before_expert;
+    int64_t gemm_n;
+    int64_t gemm_k;
+
+    // Only used by device-level operator
+    GemmCoord* host_problem_sizes;
+
+    //
+    // Methods
+    //
+
+    /// Default ctor
+    CUTLASS_HOST_DEVICE
+    Arguments()
+        : problem_count(0),
+          threadblock_count(0),
+          ptr_A(nullptr),
+          ptr_B(nullptr),
+          weight_scales(nullptr),
+          ptr_C(nullptr),
+          ptr_D(nullptr),
+          total_rows_before_expert(nullptr),
+          gemm_n(0),
+          gemm_k(0),
+          host_problem_sizes(nullptr) {}
+
+    /// Ctor
+    CUTLASS_HOST_DEVICE
+    Arguments(int problem_count, int threadblock_count, typename EpilogueOutputOp::Params output_op,
+              const ElementA* ptr_A, const ElementB* ptr_B, const ElementScale* weight_scales, const ElementC* ptr_C,
+              ElementC* ptr_D, int64_t* total_rows_before_expert, int64_t gemm_n, int64_t gemm_k,
+              GemmCoord* host_problem_sizes = nullptr)
+        : problem_count(problem_count),
+          threadblock_count(threadblock_count),
+          output_op(output_op),
+          ptr_A(const_cast<ElementA*>(ptr_A)),
+          ptr_B(const_cast<ElementB*>(ptr_B)),
+          weight_scales(const_cast<ElementScale*>(weight_scales)),
+          ptr_C(const_cast<ElementC*>(ptr_C)),
+          ptr_D(ptr_D),
+          total_rows_before_expert(total_rows_before_expert),
+          gemm_n(gemm_n),
+          gemm_k(gemm_k),
+          host_problem_sizes(nullptr) {
+      if (platform::is_same<uint8_t, ElementB>::value || platform::is_same<uint4b_t, ElementB>::value) {
+        assert(weight_scales);
+      }
+    }
+  };
+
+  //
+  // Structure for precomputing values in host memory and passing to kernels
+  //
+
+  /// Parameters structure
+  struct Params {
+    typename ProblemVisitor::Params problem_visitor;
+    int threadblock_count;
+
+    typename EpilogueOutputOp::Params output_op;
+
+    ElementA* ptr_A;
+    ElementB* ptr_B;
+    ElementScale* weight_scales;
+    ElementC* ptr_C;
+    ElementC* ptr_D;
+
+    //
+    // Methods
+    //
+
+    CUTLASS_HOST_DEVICE
+    Params() : ptr_A(nullptr), ptr_B(nullptr), weight_scales(nullptr), ptr_C(nullptr), ptr_D(nullptr) {}
+
+    CUTLASS_HOST_DEVICE
+    Params(Arguments const& args, void* workspace = nullptr, int tile_count = 0)
+        : problem_visitor(args.total_rows_before_expert, args.gemm_n, args.gemm_k, args.problem_count, workspace,
+                          tile_count),
+          threadblock_count(args.threadblock_count),
+          output_op(args.output_op),
+          ptr_A(args.ptr_A),
+          ptr_B(args.ptr_B),
+          weight_scales(args.weight_scales),
+          ptr_C(args.ptr_C),
+          ptr_D(args.ptr_D) {}
+
+    CUTLASS_HOST_DEVICE
+    void update(Arguments const& args, void* workspace = nullptr, int tile_count = 0) {
+      problem_visitor = typename ProblemVisitor::Params(args.total_rows_before_expert, args.gemm_n, args.gemm_k,
+                                                        args.problem_count, workspace, tile_count);
+      threadblock_count = args.threadblock_count;
+      output_op = args.output_op;
+      ptr_A = args.ptr_A;
+      ptr_B = args.ptr_B;
+      weight_scales = args.weight_scales;
+      ptr_C = args.ptr_C;
+      ptr_D = args.ptr_D;
+    }
+  };
+
+  /// Shared memory storage structure
+  union SharedStorage {
+    typename ProblemVisitor::SharedStorage problem_visitor;
+    typename Mma::SharedStorage main_loop;
+    typename Epilogue::SharedStorage epilogue;
+  };
+
+ public:
+  //
+  // Methods
+  //
+
+  CUTLASS_DEVICE
+  MoeFCGemm() {}
+
+  /// Determines whether kernel satisfies alignment
+  static Status can_implement(cutlass::gemm::GemmCoord const& problem_size) { return Status::kSuccess; }
+
+  static Status can_implement(Arguments const& args) {
+    if (args.weight_scales != nullptr) {
+      CUTLASS_TRACE_HOST(
+          "MoeFCGemm::can_implement() - weight scales are ignored for all types except uint8_t and uint4b_t");
+      return Status::kInvalid;
+    }
+    return Status::kSuccess;
+  }
+
+  static size_t get_extra_workspace_size(Arguments const& args, cutlass::gemm::GemmCoord const& grid_tiled_shape) {
+    return 0;
+  }
+
+  // The dummy template parameter is not used and exists so that we can compile this code using
+  // a standard earlier than C++17. Prior to C++17, fully specialized templates HAD to exists in
+  // a namespace
+  template <bool B, typename dummy = void>
+  struct KernelRunner {
+    CUTLASS_DEVICE
+    static void run_kernel(Params const& params, SharedStorage& shared_storage) { CUTLASS_NOT_IMPLEMENTED(); }
+  };
+
+  template <typename dummy>
+  struct KernelRunner<true, dummy> {
+    CUTLASS_DEVICE
+    static void run_kernel(Params const& params, SharedStorage& shared_storage) {
+      //
+      // These types shadow the type-level definitions and support the ability to implement
+      // a 'transposed' GEMM that computes the transposed problems.
+      //
+      using ElementA = typename Mma::IteratorA::Element;
+      using LayoutA = typename Mma::IteratorA::Layout;
+      using ElementB = typename Mma::IteratorB::Element;
+      using LayoutB = typename Mma::IteratorB::Layout;
+      using ElementC = typename Epilogue::OutputTileIterator::Element;
+      using LayoutC = typename Epilogue::OutputTileIterator::Layout;
+      static constexpr int kInterleave = Mma::IteratorB::Shape::kRow / Mma::Shape::kK;
+      static_assert(platform::is_same<LayoutB, layout::RowMajor>::value && kInterleave == 1 ||
+                        platform::is_same<LayoutB, layout::ColumnMajor>::value && kInterleave >= 1,
+                    "B must be row major/col major OR col major interleaved.");
+
+      //
+      // Problem visitor.
+      //
+      ProblemVisitor problem_visitor(params.problem_visitor, shared_storage.problem_visitor, blockIdx.x);
+
+      const int64_t gemm_k = params.problem_visitor.gemm_k;
+      const int64_t gemm_n = params.problem_visitor.gemm_n;
+      int64_t bytes_per_expert_matrix = (gemm_k * gemm_n / 8) * cutlass::sizeof_bits<ElementB>::value;
+
+      // Outer 'persistent' loop to iterate over tiles
+      while (problem_visitor.next_tile()) {
+        GemmCoord problem_size = problem_visitor.problem_size();
+        int32_t problem_idx = problem_visitor.problem_index();
+        int32_t cta_idx = int32_t(problem_visitor.threadblock_idx());
+
+        GemmCoord grid_shape = problem_visitor.grid_shape(problem_size);
+
+        cutlass::gemm::GemmCoord threadblock_offset(int(cta_idx / grid_shape.n()) * Mma::Shape::kM,
+                                                    int(cta_idx % grid_shape.n()) * Mma::Shape::kN, 0);
+
+        // Load element pointers. Exchange pointers and strides if working on the transpose
+        const int64_t rows_to_jump =
+            problem_idx == 0 ? 0 : params.problem_visitor.last_row_for_problem[problem_idx - 1];
+        ElementA* ptr_A = reinterpret_cast<ElementA*>(params.ptr_A) + rows_to_jump * gemm_k;
+        typename LayoutA::LongIndex ldm_A = gemm_k;
+
+        char* byte_ptr_B = ((char*)params.ptr_B) + problem_idx * bytes_per_expert_matrix;
+        ElementB* ptr_B = reinterpret_cast<ElementB*>(byte_ptr_B);
+        typename LayoutB::LongIndex ldm_B =
+            platform::is_same<layout::RowMajor, LayoutB>::value ? gemm_n : gemm_k * kInterleave;
+
+        // Compute initial location in logical coordinates
+        cutlass::MatrixCoord tb_offset_A{
+            threadblock_offset.m(),
+            0,
+        };
+
+        cutlass::MatrixCoord tb_offset_B{0, threadblock_offset.n() / kInterleave};
+
+        cutlass::MatrixCoord tb_offset_scale{0, threadblock_offset.n()};
+
+        // Compute position within threadblock
+        int thread_idx = threadIdx.x;
+
+        // Construct iterators to A and B operands
+        typename Mma::IteratorA iterator_A(LayoutA(ldm_A), ptr_A, {problem_size.m(), problem_size.k()}, thread_idx,
+                                           tb_offset_A);
+
+        typename Mma::IteratorB iterator_B(LayoutB(ldm_B), ptr_B,
+                                           {problem_size.k() * kInterleave, problem_size.n() / kInterleave}, thread_idx,
+                                           tb_offset_B);
+
+        typename Mma::FragmentC accumulators;
+
+        accumulators.clear();
+
+        // Broadcast the warp_id computed by lane 0 to ensure dependent code
+        // is compiled as warp-uniform.
+        int warp_idx = __shfl_sync(0xffffffff, threadIdx.x / 32, 0);
+
+        int lane_idx = threadIdx.x % 32;
+
+        //
+        // Matrix multiply phase
+        //
+
+        // Construct thread-scoped matrix multiply
+        Mma mma(shared_storage.main_loop, thread_idx, warp_idx, lane_idx);
+
+        // Compute threadblock-scoped matrix multiply-add
+        int gemm_k_iterations = (problem_size.k() + Mma::Shape::kK - 1) / Mma::Shape::kK;
+
+        // Wait for all threads to finish their epilogue phases from the previous tile.
+        __syncthreads();
+
+        // Compute threadblock-scoped matrix multiply-add
+        ElementScale* weight_scale_ptr = params.weight_scales + problem_idx * problem_size.n();
+        run_mma<Mma>(mma, gemm_k_iterations, accumulators, iterator_A, iterator_B, accumulators, weight_scale_ptr,
+                     {1, problem_size.n()}, thread_idx, tb_offset_scale);
+
+        //
+        // Epilogue
+        //
+
+        EpilogueOutputOp output_op(params.output_op);
+
+        ElementC* ptr_C = reinterpret_cast<ElementC*>(params.ptr_C) + problem_idx * gemm_n;
+        ElementC* ptr_D = reinterpret_cast<ElementC*>(params.ptr_D) + rows_to_jump * gemm_n;
+
+        LayoutC layout_C(0);
+        LayoutC layout_D(gemm_n);
+
+        typename Epilogue::OutputTileIterator::Params params_C(layout_C);
+        typename Epilogue::OutputTileIterator::Params params_D(layout_D);
+
+        // Tile iterator loading from source tensor.
+        typename Epilogue::OutputTileIterator iterator_C(params_C, ptr_C, problem_size.mn(), thread_idx,
+                                                         threadblock_offset.mn());
+
+        // Tile iterator writing to destination tensor.
+        typename Epilogue::OutputTileIterator iterator_D(params_D, ptr_D, problem_size.mn(), thread_idx,
+                                                         threadblock_offset.mn());
+
+        Epilogue epilogue(shared_storage.epilogue, thread_idx, warp_idx, lane_idx);
+
+        // Execute the epilogue operator to update the destination tensor.
+        epilogue(output_op, iterator_D, accumulators, iterator_C);
+
+        // Next tile
+        problem_visitor.advance(gridDim.x);
+      }
+    }
+  };
+
+  /*
+    To improve compilation speed, we do not compile the device operator if the CUDA_ARCH does not correspond
+    to the ArchTag of the cutlass kernel operator.
+  */
+  /// Executes one GEMM
+  CUTLASS_DEVICE
+  void operator()(Params const& params, SharedStorage& shared_storage) {
+#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 700) && (__CUDA_ARCH__ < 750)
+    static constexpr bool compile_needed = platform::is_same<KernelArch, arch::Sm70>::value;
+    KernelRunner<compile_needed>::run_kernel(params, shared_storage);
+#elif defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 750) && (__CUDA_ARCH__ < 800)
+    static constexpr bool compile_needed = platform::is_same<KernelArch, arch::Sm75>::value;
+    KernelRunner<compile_needed>::run_kernel(params, shared_storage);
+#elif defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 800) && (__CUDA_ARCH__ < 900)
+    static constexpr bool compile_needed = platform::is_same<KernelArch, arch::Sm80>::value;
+    KernelRunner<compile_needed>::run_kernel(params, shared_storage);
+#else
+    CUTLASS_NOT_IMPLEMENTED();
+#endif
+  }
+};
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+
+}  // namespace kernel
+}  // namespace gemm
+}  // namespace cutlass
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_gemm_kernels.h b/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_gemm_kernels.h
new file mode 100644
index 0000000000000..60608f462fde5
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_gemm_kernels.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cuda_runtime_api.h>
+#include "ft_gemm_configs.h"
+
+namespace ort_fastertransformer {
+
+enum class ActivationType { Gelu,
+                            Relu,
+                            Silu,
+                            GeGLU,
+                            ReGLU,
+                            SiGLU,
+                            Identity,
+                            InvalidType };
+
+template <typename T, /*The type used for activations/scales/compute*/
+          typename WeightType /* The type for the MoE weights */>
+class MoeGemmRunner {
+ public:
+  MoeGemmRunner();
+
+  void initialize(int sm);
+
+  void moe_gemm_bias_act(const T* A, const WeightType* B, const T* weight_scales, const T* biases, T* C,
+                         int64_t* total_rows_before_expert, int64_t total_rows, int64_t gemm_n, int64_t gemm_k,
+                         int num_experts, ActivationType activation_type, cudaStream_t stream);
+
+  void moe_gemm(const T* A, const WeightType* B, const T* weight_scales, T* C, int64_t* total_rows_before_expert,
+                int64_t total_rows, int64_t gemm_n, int64_t gemm_k, int num_experts, cudaStream_t stream);
+
+ private:
+  template <typename EpilogueTag>
+  void dispatch_to_arch(const T* A, const WeightType* B, const T* weight_scales, const T* biases, T* C,
+                        int64_t* total_rows_before_expert, int64_t total_rows, int64_t gemm_n, int64_t gemm_k,
+                        int num_experts, CutlassGemmConfig gemm_config, cudaStream_t stream, int* occupancy = nullptr);
+
+  template <typename EpilogueTag>
+  void run_gemm(const T* A, const WeightType* B, const T* weight_scales, const T* biases, T* C,
+                int64_t* total_rows_before_expert, int64_t total_rows, int64_t gemm_n, int64_t gemm_k, int num_experts,
+                cudaStream_t stream);
+
+ private:
+  int sm_;
+  int multi_processor_count_;
+};
+
+}  // namespace ort_fastertransformer
diff --git a/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_gemm_kernels_fp16_fp16.cu b/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_gemm_kernels_fp16_fp16.cu
new file mode 100644
index 0000000000000..1d9a249db4237
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_gemm_kernels_fp16_fp16.cu
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moe_gemm_kernels_template.h"
+
+namespace ort_fastertransformer {
+template class MoeGemmRunner<half, half>;
+}  // namespace ort_fastertransformer
diff --git a/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_gemm_kernels_fp32_fp32.cu b/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_gemm_kernels_fp32_fp32.cu
new file mode 100644
index 0000000000000..7b250e6ca9060
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_gemm_kernels_fp32_fp32.cu
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "moe_gemm_kernels_template.h"
+
+namespace ort_fastertransformer {
+template class MoeGemmRunner<float, float>;
+}  // namespace ort_fastertransformer
diff --git a/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_gemm_kernels_template.h b/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_gemm_kernels_template.h
new file mode 100644
index 0000000000000..66950c9b65970
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_gemm_kernels_template.h
@@ -0,0 +1,428 @@
+/*
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Ignore CUTLASS warnings about type punning
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#endif
+
+#include "cutlass/array.h"
+#include "cutlass/numeric_conversion.h"
+#include "cutlass/layout/matrix.h"
+#include "cutlass/numeric_types.h"
+#include "cutlass/gemm/device/gemm_grouped.h"
+#include "cutlass/gemm/kernel/default_gemm_grouped.h"
+#include "cutlass/cutlass.h"
+#include "cutlass/gemm/gemm.h"
+#include "cutlass/arch/arch.h"
+#include "cutlass/epilogue/thread/linear_combination_relu.h"
+
+#include "compute_occupancy.h"
+#include "epilogue_helpers.h"
+#include "layout_traits_helper.h"
+#include "moe_cutlass_kernel.h"
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#include "cutlass_heuristic.h"
+#include "moe_gemm_kernels.h"
+
+#include <cuda.h>
+#include <cuda_fp16.h>
+#include <math.h>
+#include <sstream>
+
+namespace ort_fastertransformer {
+
+// ============================= Variable batched Gemm things ===========================
+template <typename T, typename WeightType, typename arch, typename EpilogueTag, typename ThreadblockShape,
+          typename WarpShape, int Stages>
+void generic_moe_gemm_kernelLauncher(const T* A, const WeightType* B, const T* weight_scales, const T* biases, T* C,
+                                     int64_t* total_rows_before_expert, int64_t gemm_n, int64_t gemm_k, int num_experts,
+                                     CutlassGemmConfig gemm_config, const int multi_processor_count,
+                                     cudaStream_t stream, int* kernel_occupancy = nullptr) {
+  if (gemm_config.split_k_style != SplitKStyle::NO_SPLIT_K) {
+    ORT_THROW("[FT Error][MoeGemm] Grouped gemm does not support split-k");
+  }
+
+  static_assert(cutlass::platform::is_same<T, half>::value || cutlass::platform::is_same<T, float>::value,
+                "Specialized for half, float");
+
+  static_assert(cutlass::platform::is_same<T, WeightType>::value ||
+                    cutlass::platform::is_same<WeightType, uint8_t>::value ||
+                    cutlass::platform::is_same<WeightType, cutlass::uint4b_t>::value,
+                "");
+
+  // The cutlass type for the input elements. This is needed to convert to cutlass::half_t if necessary.
+  using ElementType_ =
+      typename cutlass::platform::conditional<cutlass::platform::is_same<T, half>::value, cutlass::half_t, T>::type;
+  using ElementType = ElementType_;
+
+  using CutlassWeightType_ =
+      typename cutlass::platform::conditional<cutlass::platform::is_same<WeightType, half>::value, cutlass::half_t,
+                                              WeightType>::type;
+  using CutlassWeightType = CutlassWeightType_;
+
+  // We need separate config for each architecture since we will target different tensorcore instructions. For float,
+  // we do not target TCs.
+  using MixedGemmArchTraits = cutlass::gemm::kernel::MixedGemmArchTraits<ElementType, CutlassWeightType, arch>;
+  using ElementAccumulator = typename MixedGemmArchTraits::AccType;
+
+  using EpilogueOp =
+      typename Epilogue<ElementType, MixedGemmArchTraits::ElementsPerAccessC, ElementAccumulator, EpilogueTag>::Op;
+
+  // Finally, set up the kernel.
+  using GemmKernel_ = typename cutlass::gemm::kernel::DefaultGemmGrouped<
+      ElementType, cutlass::layout::RowMajor, cutlass::ComplexTransform::kNone, MixedGemmArchTraits::ElementsPerAccessA,
+      CutlassWeightType, typename MixedGemmArchTraits::LayoutB, cutlass::ComplexTransform::kNone,
+      MixedGemmArchTraits::ElementsPerAccessB, ElementType, cutlass::layout::RowMajor, ElementAccumulator,
+      typename MixedGemmArchTraits::OperatorClass, arch, ThreadblockShape, WarpShape,
+      typename MixedGemmArchTraits::InstructionShape, EpilogueOp,
+      cutlass::gemm::threadblock::GemmBatchedIdentityThreadblockSwizzle, Stages,
+      cutlass::gemm::kernel::GroupScheduleMode::kDeviceOnly, typename MixedGemmArchTraits::Operator>::GemmKernel;
+
+  using GemmKernel = cutlass::gemm::kernel::MoeFCGemm<typename GemmKernel_::Mma, typename GemmKernel_::Epilogue,
+                                                      typename GemmKernel_::ThreadblockSwizzle,
+                                                      arch,  // Ensure top level arch is used for dispatch
+                                                      GemmKernel_::kGroupScheduleMode>;
+
+  using GemmGrouped = cutlass::gemm::device::GemmGrouped<GemmKernel>;
+
+  if (kernel_occupancy != nullptr) {
+    *kernel_occupancy = compute_occupancy_for_kernel<GemmKernel>();
+    return;
+  }
+  int occupancy = std::min(2, GemmGrouped::maximum_active_blocks());
+  if (occupancy == 0) {
+    ORT_THROW("[FT Error][MoE Runner] GPU lacks the shared memory resources to run GroupedGEMM kernel");
+  }
+  const int threadblock_count = multi_processor_count * occupancy;
+
+  typename EpilogueOp::Params epilogue_op(ElementAccumulator(1.f), ElementAccumulator(0.f));
+
+  typename GemmGrouped::Arguments args(
+      num_experts, threadblock_count, epilogue_op, reinterpret_cast<const ElementType*>(A),
+      reinterpret_cast<const CutlassWeightType*>(B), reinterpret_cast<const ElementType*>(weight_scales),
+      reinterpret_cast<const ElementType*>(biases), reinterpret_cast<ElementType*>(C), total_rows_before_expert, gemm_n,
+      gemm_k);
+
+  GemmGrouped gemm;
+
+  auto can_implement = gemm.can_implement(args);
+  if (can_implement != cutlass::Status::kSuccess) {
+    std::string err_msg =
+        "MoEFC kernel will fail for params. Error: " + std::string(cutlassGetStatusString(can_implement));
+    ORT_THROW("[FT Error][MoE Runner] " + err_msg);
+  }
+
+  auto init_status = gemm.initialize(args);
+  if (init_status != cutlass::Status::kSuccess) {
+    std::string err_msg = "Failed to initialize cutlass variable batched gemm. Error: " +
+                          std::string(cutlassGetStatusString(init_status));
+    ORT_THROW("[FT Error][MoE Runner] " + err_msg);
+  }
+
+  auto run_status = gemm.run(stream);
+  if (run_status != cutlass::Status::kSuccess) {
+    std::string err_msg =
+        "Failed to run cutlass variable batched gemm. Error: " + std::string(cutlassGetStatusString(run_status));
+    ORT_THROW("[FT Error][MoE Runner] " + err_msg);
+  }
+}
+
+template <typename T, typename WeightType, typename arch, typename EpilogueTag, typename ThreadblockShape,
+          typename WarpShape, int Stages, typename Enable = void>
+struct dispatch_stages {
+  static void dispatch(const T* A, const WeightType* B, const T* weight_scales, const T* biases, T* C,
+                       int64_t* total_rows_before_expert, int64_t gemm_n, int64_t gemm_k, int num_experts,
+                       CutlassGemmConfig gemm_config, int multi_processor_count, cudaStream_t stream,
+                       int* occupancy = nullptr) {
+    std::string err_msg = "Cutlass fpA_intB gemm. Not instantiates for arch " +
+                          std::to_string(arch::kMinComputeCapability) + " with stages set to " + std::to_string(Stages);
+    ORT_THROW("[FT Error][dispatch_stages::dispatch] " + err_msg);
+  }
+};
+
+template <typename T, typename WeightType, typename arch, typename EpilogueTag, typename ThreadblockShape,
+          typename WarpShape>
+struct dispatch_stages<T, WeightType, arch, EpilogueTag, ThreadblockShape, WarpShape, 2> {
+  static void dispatch(const T* A, const WeightType* B, const T* weight_scales, const T* biases, T* C,
+                       int64_t* total_rows_before_expert, int64_t gemm_n, int64_t gemm_k, int num_experts,
+                       CutlassGemmConfig gemm_config, int multi_processor_count, cudaStream_t stream,
+                       int* occupancy = nullptr) {
+    generic_moe_gemm_kernelLauncher<T, WeightType, arch, EpilogueTag, ThreadblockShape, WarpShape, 2>(
+        A, B, weight_scales, biases, C, total_rows_before_expert, gemm_n, gemm_k, num_experts, gemm_config,
+        multi_processor_count, stream, occupancy);
+  }
+};
+
+template <typename T, typename WeightType, typename EpilogueTag, typename ThreadblockShape, typename WarpShape,
+          int Stages>
+struct dispatch_stages<T, WeightType, cutlass::arch::Sm80, EpilogueTag, ThreadblockShape, WarpShape, Stages,
+                       typename std::enable_if<(Stages > 2)>::type> {
+  static void dispatch(const T* A, const WeightType* B, const T* weight_scales, const T* biases, T* C,
+                       int64_t* total_rows_before_expert, int64_t gemm_n, int64_t gemm_k, int num_experts,
+                       CutlassGemmConfig gemm_config, int multi_processor_count, cudaStream_t stream,
+                       int* occupancy = nullptr) {
+    generic_moe_gemm_kernelLauncher<T, WeightType, cutlass::arch::Sm80, EpilogueTag, ThreadblockShape, WarpShape,
+                                    Stages>(A, B, weight_scales, biases, C, total_rows_before_expert, gemm_n, gemm_k,
+                                            num_experts, gemm_config, multi_processor_count, stream, occupancy);
+  }
+};
+
+template <typename T, typename WeightType, typename arch, typename EpilogueTag, typename ThreadblockShape,
+          typename WarpShape>
+void dispatch_gemm_config(const T* A, const WeightType* B, const T* weight_scales, const T* biases, T* C,
+                          int64_t* total_rows_before_expert, int64_t gemm_n, int64_t gemm_k, int num_experts,
+                          CutlassGemmConfig gemm_config, int multi_processor_count, cudaStream_t stream,
+                          int* occupancy = nullptr) {
+  switch (gemm_config.stages) {
+    case 2:
+      using DispatcherStages2 = dispatch_stages<T, WeightType, arch, EpilogueTag, ThreadblockShape, WarpShape, 2>;
+      DispatcherStages2::dispatch(A, B, weight_scales, biases, C, total_rows_before_expert, gemm_n, gemm_k, num_experts,
+                                  gemm_config, multi_processor_count, stream, occupancy);
+      break;
+    case 3:
+      using DispatcherStages3 = dispatch_stages<T, WeightType, arch, EpilogueTag, ThreadblockShape, WarpShape, 3>;
+      DispatcherStages3::dispatch(A, B, weight_scales, biases, C, total_rows_before_expert, gemm_n, gemm_k, num_experts,
+                                  gemm_config, multi_processor_count, stream, occupancy);
+      break;
+    case 4:
+      using DispatcherStages4 = dispatch_stages<T, WeightType, arch, EpilogueTag, ThreadblockShape, WarpShape, 4>;
+      DispatcherStages4::dispatch(A, B, weight_scales, biases, C, total_rows_before_expert, gemm_n, gemm_k, num_experts,
+                                  gemm_config, multi_processor_count, stream, occupancy);
+      break;
+    default:
+      std::string err_msg = "dispatch_gemm_config does not support stages " + std::to_string(gemm_config.stages);
+      ORT_THROW("[FT Error][MoE][dispatch_gemm_config] " + err_msg);
+      break;
+  }
+}
+
+// This overload will handle tensorop gemms. It is disabled via SFINAE for fp32.
+// This overload is only enabled when T == WeightType.
+template <
+    typename T, typename WeightType, typename arch, typename EpilogueTag,
+    typename std::enable_if<!std::is_same<T, float>::value && std::is_same<T, WeightType>::value>::type* = nullptr>
+void dispatch_moe_gemm_to_cutlass(const T* A, const WeightType* B, const T* weight_scales, const T* biases, T* C,
+                                  int64_t* total_rows_before_expert, int64_t total_rows, int64_t gemm_n, int64_t gemm_k,
+                                  int num_experts, CutlassGemmConfig gemm_config, int sm_version,
+                                  int multi_processor_count, cudaStream_t stream, int* occupancy = nullptr) {
+  switch (gemm_config.tile_config) {
+    case CutlassTileConfig::CtaShape32x128x64_WarpShape32x32x64:
+      dispatch_gemm_config<T, WeightType, arch, EpilogueTag, cutlass::gemm::GemmShape<32, 128, 64>,
+                           cutlass::gemm::GemmShape<32, 32, 64>>(A, B, weight_scales, biases, C,
+                                                                 total_rows_before_expert, gemm_n, gemm_k, num_experts,
+                                                                 gemm_config, multi_processor_count, stream, occupancy);
+      break;
+    case CutlassTileConfig::CtaShape64x128x64_WarpShape32x64x64:
+      dispatch_gemm_config<T, WeightType, arch, EpilogueTag, cutlass::gemm::GemmShape<64, 128, 64>,
+                           cutlass::gemm::GemmShape<32, 64, 64>>(A, B, weight_scales, biases, C,
+                                                                 total_rows_before_expert, gemm_n, gemm_k, num_experts,
+                                                                 gemm_config, multi_processor_count, stream, occupancy);
+      break;
+    case CutlassTileConfig::CtaShape128x128x64_WarpShape64x32x64:
+      dispatch_gemm_config<T, WeightType, arch, EpilogueTag, cutlass::gemm::GemmShape<128, 128, 64>,
+                           cutlass::gemm::GemmShape<64, 32, 64>>(A, B, weight_scales, biases, C,
+                                                                 total_rows_before_expert, gemm_n, gemm_k, num_experts,
+                                                                 gemm_config, multi_processor_count, stream, occupancy);
+      break;
+    case CutlassTileConfig::Undefined:
+      ORT_THROW("[FT Error][dispatch_moe_gemm_to_cutlass] gemm config undefined.");
+      break;
+    case CutlassTileConfig::ChooseWithHeuristic:
+      ORT_THROW("[FT Error][dispatch_moe_gemm_to_cutlass] gemm config should have already been set by heuristic.");
+      break;
+    default:
+      ORT_THROW("[FT Error][dispatch_moe_gemm_to_cutlass] Config is invalid for same type MoE tensorop GEMM.");
+      break;
+  }
+}
+
+// Tensorop GEMM overload
+// Overload for quantize MoE GEMMs. We disable some warp configs here since they will not be used and we can improve
+// compile time
+template <
+    typename T, typename WeightType, typename arch, typename EpilogueTag,
+    typename std::enable_if<!std::is_same<T, float>::value && !std::is_same<T, WeightType>::value>::type* = nullptr>
+void dispatch_moe_gemm_to_cutlass(const T* A, const WeightType* B, const T* weight_scales, const T* biases, T* C,
+                                  int64_t* total_rows_before_expert, int64_t total_rows, int64_t gemm_n, int64_t gemm_k,
+                                  int num_experts, CutlassGemmConfig gemm_config, int sm_version,
+                                  int multi_processor_count, cudaStream_t stream, int* occupancy = nullptr) {
+  switch (gemm_config.tile_config) {
+    case CutlassTileConfig::CtaShape32x128x64_WarpShape32x32x64:
+      dispatch_gemm_config<T, WeightType, arch, EpilogueTag, cutlass::gemm::GemmShape<32, 128, 64>,
+                           cutlass::gemm::GemmShape<32, 32, 64>>(A, B, weight_scales, biases, C,
+                                                                 total_rows_before_expert, gemm_n, gemm_k, num_experts,
+                                                                 gemm_config, multi_processor_count, stream, occupancy);
+      break;
+    case CutlassTileConfig::CtaShape64x128x64_WarpShape64x32x64:
+      dispatch_gemm_config<T, WeightType, arch, EpilogueTag, cutlass::gemm::GemmShape<64, 128, 64>,
+                           cutlass::gemm::GemmShape<64, 32, 64>>(A, B, weight_scales, biases, C,
+                                                                 total_rows_before_expert, gemm_n, gemm_k, num_experts,
+                                                                 gemm_config, multi_processor_count, stream, occupancy);
+      break;
+    case CutlassTileConfig::CtaShape128x128x64_WarpShape128x32x64:
+      dispatch_gemm_config<T, WeightType, arch, EpilogueTag, cutlass::gemm::GemmShape<128, 128, 64>,
+                           cutlass::gemm::GemmShape<128, 32, 64>>(
+          A, B, weight_scales, biases, C, total_rows_before_expert, gemm_n, gemm_k, num_experts, gemm_config,
+          multi_processor_count, stream, occupancy);
+      break;
+    case CutlassTileConfig::Undefined:
+      ORT_THROW("[FT Error][dispatch_moe_gemm_to_cutlass] gemm config undefined.");
+      break;
+    case CutlassTileConfig::ChooseWithHeuristic:
+      ORT_THROW("[FT Error][dispatch_moe_gemm_to_cutlass] gemm config should have already been set by heuristic.");
+      break;
+    default:
+      ORT_THROW("[FT Error][dispatch_moe_gemm_to_cutlass] Config is invalid for mixed type tensorop GEMM.");
+      break;
+  }
+}
+
+// This overload will handle simt gemms. It is disabled via SFINAE for tensorop.
+template <typename T, typename WeightType, typename arch, typename EpilogueTag,
+          typename std::enable_if<std::is_same<T, float>::value>::type* = nullptr>
+void dispatch_moe_gemm_to_cutlass(const T* A, const WeightType* B, const T* weight_scales, const T* biases, T* C,
+                                  int64_t* total_rows_before_expert, int64_t total_rows, int64_t gemm_n, int64_t gemm_k,
+                                  int num_experts, CutlassGemmConfig gemm_config, int sm_version,
+                                  int multi_processor_count, cudaStream_t stream, int* occupancy = nullptr) {
+  switch (gemm_config.tile_config) {
+    case CutlassTileConfig::CtaShape128x128x8_WarpShape64x64x8:
+      dispatch_gemm_config<T, WeightType, arch, EpilogueTag, cutlass::gemm::GemmShape<128, 128, 8>,
+                           cutlass::gemm::GemmShape<64, 64, 8>>(A, B, weight_scales, biases, C,
+                                                                total_rows_before_expert, gemm_n, gemm_k, num_experts,
+                                                                gemm_config, multi_processor_count, stream, occupancy);
+      break;
+    case CutlassTileConfig::Undefined:
+      ORT_THROW("[FT Error][dispatch_moe_gemm_to_cutlass][SIMT] gemm config undefined.");
+      break;
+    case CutlassTileConfig::ChooseWithHeuristic:
+      ORT_THROW(
+          "[FT Error][dispatch_moe_gemm_to_cutlass][SIMT] gemm config should have already been set by heuristic.");
+      break;
+    default:
+      ORT_THROW("[FT Error][dispatch_moe_gemm_to_cutlass][SIMT] Unsupported config for float MoE gemm.");
+      break;
+  }
+}
+
+template <typename T, typename WeightType>
+MoeGemmRunner<T, WeightType>::MoeGemmRunner() {}
+
+template <typename T, typename WeightType>
+void MoeGemmRunner<T, WeightType>::initialize(int sm_version) {
+  int device{-1};
+  cudaGetDevice(&device);
+  sm_ = sm_version;
+  cudaDeviceGetAttribute(&multi_processor_count_, cudaDevAttrMultiProcessorCount, device);
+}
+
+template <typename T, typename WeightType>
+template <typename EpilogueTag>
+void MoeGemmRunner<T, WeightType>::dispatch_to_arch<EpilogueTag>(const T* A, const WeightType* B,
+                                                                 const T* weight_scales, const T* biases, T* C,
+                                                                 int64_t* total_rows_before_expert, int64_t total_rows,
+                                                                 int64_t gemm_n, int64_t gemm_k, int num_experts,
+                                                                 CutlassGemmConfig gemm_config, cudaStream_t stream,
+                                                                 int* occupancy) {
+  if (sm_ >= 70 && sm_ < 75) {
+    dispatch_moe_gemm_to_cutlass<T, WeightType, cutlass::arch::Sm70, EpilogueTag>(
+        A, B, weight_scales, biases, C, total_rows_before_expert, total_rows, gemm_n, gemm_k, num_experts, gemm_config,
+        sm_, multi_processor_count_, stream, occupancy);
+  } else if (sm_ >= 75 && sm_ < 80) {
+    dispatch_moe_gemm_to_cutlass<T, WeightType, cutlass::arch::Sm75, EpilogueTag>(
+        A, B, weight_scales, biases, C, total_rows_before_expert, total_rows, gemm_n, gemm_k, num_experts, gemm_config,
+        sm_, multi_processor_count_, stream, occupancy);
+  } else if (sm_ >= 80 && sm_ < 90) {
+    dispatch_moe_gemm_to_cutlass<T, WeightType, cutlass::arch::Sm80, EpilogueTag>(
+        A, B, weight_scales, biases, C, total_rows_before_expert, total_rows, gemm_n, gemm_k, num_experts, gemm_config,
+        sm_, multi_processor_count_, stream, occupancy);
+  } else {
+    ORT_THROW("[FT Error][MoE][GEMM Dispatch] Arch unsupported for MoE GEMM");
+  }
+}
+
+template <typename T, typename WeightType>
+template <typename EpilogueTag>
+void MoeGemmRunner<T, WeightType>::run_gemm<EpilogueTag>(const T* A, const WeightType* B, const T* weight_scales,
+                                                         const T* biases, T* C, int64_t* total_rows_before_expert,
+                                                         int64_t total_rows, int64_t gemm_n, int64_t gemm_k,
+                                                         int num_experts, cudaStream_t stream) {
+  static constexpr bool is_weight_only = !std::is_same<T, WeightType>::value;
+  static constexpr bool only_simt_configs = std::is_same<T, float>::value;
+  std::vector<CutlassGemmConfig> candidate_configs = get_candidate_configs(sm_, is_weight_only, only_simt_configs);
+  std::vector<int> occupancies(candidate_configs.size());
+
+  for (size_t ii = 0; ii < candidate_configs.size(); ++ii) {
+    dispatch_to_arch<EpilogueTag>(A, B, weight_scales, biases, C, total_rows_before_expert, total_rows, gemm_n, gemm_k,
+                                  num_experts, candidate_configs[ii], stream, &occupancies[ii]);
+  }
+
+  static constexpr int workspace_bytes = 0;  // No workspace for MoE GEMMs.
+  static constexpr int split_k_limit = 1;    // MoE GEMM does not support split-k.
+  CutlassGemmConfig chosen_config =
+      estimate_best_config_from_occupancies(candidate_configs, occupancies, total_rows, gemm_n, gemm_k, num_experts,
+                                            split_k_limit, workspace_bytes, multi_processor_count_, is_weight_only);
+
+  dispatch_to_arch<EpilogueTag>(A, B, weight_scales, biases, C, total_rows_before_expert, total_rows, gemm_n, gemm_k,
+                                num_experts, chosen_config, stream);
+}
+
+template <typename T, typename WeightType>
+void MoeGemmRunner<T, WeightType>::moe_gemm_bias_act(const T* A, const WeightType* B, const T* weight_scales,
+                                                     const T* biases, T* C, int64_t* total_rows_before_expert,
+                                                     int64_t total_rows, int64_t gemm_n, int64_t gemm_k,
+                                                     int num_experts, ActivationType activation_type,
+                                                     cudaStream_t stream) {
+  switch (activation_type) {
+    case ActivationType::Relu:
+      run_gemm<EpilogueOpBiasReLU>(A, B, weight_scales, biases, C, total_rows_before_expert, total_rows, gemm_n, gemm_k,
+                                   num_experts, stream);
+      break;
+    case ActivationType::Gelu:
+      run_gemm<EpilogueOpBiasFtGelu>(A, B, weight_scales, biases, C, total_rows_before_expert, total_rows, gemm_n,
+                                     gemm_k, num_experts, stream);
+      break;
+    case ActivationType::Silu:
+      run_gemm<EpilogueOpBiasSilu>(A, B, weight_scales, biases, C, total_rows_before_expert, total_rows, gemm_n, gemm_k,
+                                   num_experts, stream);
+      break;
+    case ActivationType::Identity:
+      run_gemm<EpilogueOpBias>(A, B, weight_scales, biases, C, total_rows_before_expert, total_rows, gemm_n, gemm_k,
+                               num_experts, stream);
+      break;
+    case ActivationType::InvalidType:
+      ORT_THROW("[FT Error][MoE Runner] Invalid activation type for MoE GEMM");
+      break;
+    default: {
+      ORT_THROW("[FT Error][MoE Runner] Invalid activation type for MoE GEMM");
+    }
+  }
+}
+
+template <typename T, typename WeightType>
+void MoeGemmRunner<T, WeightType>::moe_gemm(const T* A, const WeightType* B, const T* weight_scales, T* C,
+                                            int64_t* total_rows_before_expert, int64_t total_rows, int64_t gemm_n,
+                                            int64_t gemm_k, int num_experts, cudaStream_t stream) {
+  run_gemm<EpilogueOpNoBias>(A, B, weight_scales, nullptr, C, total_rows_before_expert, total_rows, gemm_n, gemm_k,
+                             num_experts, stream);
+}
+
+}  // namespace ort_fastertransformer
diff --git a/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_kernel.cu b/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_kernel.cu
new file mode 100644
index 0000000000000..398ce4ee9880f
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_kernel.cu
@@ -0,0 +1,830 @@
+/*
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cuda.h>
+#include <cuda_fp16.h>
+#include <math.h>
+#include <sstream>
+#include <algorithm>
+
+// Ignore CUTLASS warnings about type punning
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#endif
+
+#include "cutlass/array.h"
+#include "cutlass/numeric_conversion.h"
+#include "cutlass/numeric_types.h"
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#include "moe_kernel.h"
+
+#if CUDA_VERSION >= 11000
+#include <cub/cub.cuh>
+#include <cub/device/device_radix_sort.cuh>
+#include <cub/util_type.cuh>
+#else
+#include "cub/cub.cuh"
+#include "cub/device/device_radix_sort.cuh"
+#include "cub/util_type.cuh"
+#endif
+
+namespace ort_fastertransformer {
+
+static constexpr int WARP_SIZE = 32;
+
+// ====================== Softmax things ===============================
+// We have our own implementation of softmax here so we can support transposing the output
+// in the softmax kernel when we extend this module to support expert-choice routing.
+template <typename T, int TPB>
+__launch_bounds__(TPB) __global__
+    void moe_softmax(const T* input, const bool* finished, T* output, const int num_cols) {
+  using BlockReduce = cub::BlockReduce<float, TPB>;
+  __shared__ typename BlockReduce::TempStorage tmpStorage;
+
+  __shared__ float normalizing_factor;
+  __shared__ float float_max;
+
+  const int thread_row_offset = blockIdx.x * num_cols;
+
+  cub::Sum sum;
+  float threadData(-FLT_MAX);
+
+  // Don't touch finished rows.
+  if ((finished != nullptr) && finished[blockIdx.x]) {
+    return;
+  }
+
+  for (int ii = threadIdx.x; ii < num_cols; ii += TPB) {
+    const int idx = thread_row_offset + ii;
+    threadData = max(static_cast<float>(input[idx]), threadData);
+  }
+
+  const float maxElem = BlockReduce(tmpStorage).Reduce(threadData, cub::Max());
+  if (threadIdx.x == 0) {
+    float_max = maxElem;
+  }
+  __syncthreads();
+
+  threadData = 0;
+
+  for (int ii = threadIdx.x; ii < num_cols; ii += TPB) {
+    const int idx = thread_row_offset + ii;
+    threadData += exp((static_cast<float>(input[idx]) - float_max));
+  }
+
+  const auto Z = BlockReduce(tmpStorage).Reduce(threadData, sum);
+
+  if (threadIdx.x == 0) {
+    normalizing_factor = 1.f / Z;
+  }
+  __syncthreads();
+
+  for (int ii = threadIdx.x; ii < num_cols; ii += TPB) {
+    const int idx = thread_row_offset + ii;
+    const float val = exp((static_cast<float>(input[idx]) - float_max)) * normalizing_factor;
+    output[idx] = T(val);
+  }
+}
+
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 530
+template <typename T, int TPB>
+__launch_bounds__(TPB) __global__ void moe_top_k(const T*, const bool*, T*, int*, int*, int, const int) {
+  // Does not support pre-Kepler architectures
+  ;
+}
+#else
+template <typename T, int TPB>
+__launch_bounds__(TPB) __global__ void moe_top_k(const T* inputs_after_softmax, const bool* finished, T* output,
+                                                 int* indices, int* source_rows, int num_experts, int k) {
+  using cub_kvp = cub::KeyValuePair<int, T>;
+  using BlockReduce = cub::BlockReduce<cub_kvp, TPB>;
+  __shared__ typename BlockReduce::TempStorage tmpStorage;
+
+  cub_kvp thread_kvp;
+  cub::ArgMax arg_max;
+
+  int num_rows = gridDim.x;
+  const int block_row = blockIdx.x;
+
+  const bool should_process_row = finished ? !finished[block_row] : true;
+  const int thread_read_offset = blockIdx.x * num_experts;
+  for (int k_idx = 0; k_idx < k; ++k_idx) {
+    thread_kvp.key = 0;
+    thread_kvp.value = T(-1.f);  // This is OK because inputs are probabilities
+
+    cub_kvp inp_kvp;
+    for (int expert = threadIdx.x; expert < num_experts; expert += TPB) {
+      const int idx = thread_read_offset + expert;
+      inp_kvp.key = expert;
+      inp_kvp.value = inputs_after_softmax[idx];
+
+      for (int prior_k = 0; prior_k < k_idx; ++prior_k) {
+        const int prior_winning_expert = indices[k * block_row + prior_k];
+
+        if (prior_winning_expert == expert) {
+          inp_kvp = thread_kvp;
+        }
+      }
+
+      thread_kvp = arg_max(inp_kvp, thread_kvp);
+    }
+
+    const cub_kvp result_kvp = BlockReduce(tmpStorage).Reduce(thread_kvp, arg_max);
+    if (threadIdx.x == 0) {
+      const int idx = k * block_row + k_idx;
+      output[idx] = result_kvp.value;
+      indices[idx] = should_process_row ? result_kvp.key : num_experts;
+      source_rows[idx] = k_idx * num_rows + block_row;
+    }
+    __syncthreads();
+  }
+}
+#endif
+
+// ====================== TopK softmax things ===============================
+
+/*
+  A Top-K gating softmax written to exploit when the number of experts in the MoE layers
+  are a small power of 2. This allows us to cleanly share the rows among the threads in
+  a single warp and eliminate communication between warps (so no need to use shared mem).
+
+  It fuses the softmax, max and argmax into a single kernel.
+
+  Limitations:
+  1) This implementation is intended for when the number of experts is a small power of 2.
+  2) This implementation assumes k is small, but will work for any k.
+*/
+
+template <typename T, int VPT, int NUM_EXPERTS, int WARPS_PER_CTA, int BYTES_PER_LDG>
+__launch_bounds__(WARPS_PER_CTA* WARP_SIZE) __global__
+    void topk_gating_softmax(const T* input, const bool* finished, T* output, int num_rows, int* indices,
+                             int* source_rows, int k) {
+  // We begin by enforcing compile time assertions and setting up compile time constants.
+  static_assert(VPT == (VPT & -VPT), "VPT must be power of 2");
+  static_assert(NUM_EXPERTS == (NUM_EXPERTS & -NUM_EXPERTS), "NUM_EXPERTS must be power of 2");
+  static_assert(BYTES_PER_LDG == (BYTES_PER_LDG & -BYTES_PER_LDG), "BYTES_PER_LDG must be power of 2");
+  static_assert(BYTES_PER_LDG <= 16, "BYTES_PER_LDG must be leq 16");
+
+  // Number of bytes each thread pulls in per load
+  static constexpr int ELTS_PER_LDG = BYTES_PER_LDG / sizeof(T);
+  static constexpr int ELTS_PER_ROW = NUM_EXPERTS;
+  static constexpr int THREADS_PER_ROW = ELTS_PER_ROW / VPT;
+  static constexpr int LDG_PER_THREAD = VPT / ELTS_PER_LDG;
+
+  // Restrictions based on previous section.
+  static_assert(VPT % ELTS_PER_LDG == 0, "The elements per thread must be a multiple of the elements per ldg");
+  static_assert(WARP_SIZE % THREADS_PER_ROW == 0, "The threads per row must cleanly divide the threads per warp");
+  static_assert(THREADS_PER_ROW == (THREADS_PER_ROW & -THREADS_PER_ROW), "THREADS_PER_ROW must be power of 2");
+  static_assert(THREADS_PER_ROW <= WARP_SIZE, "THREADS_PER_ROW can be at most warp size");
+
+  // We have NUM_EXPERTS elements per row. We specialize for small #experts
+  static constexpr int ELTS_PER_WARP = WARP_SIZE * VPT;
+  static constexpr int ROWS_PER_WARP = ELTS_PER_WARP / ELTS_PER_ROW;
+  static constexpr int ROWS_PER_CTA = WARPS_PER_CTA * ROWS_PER_WARP;
+
+  // Restrictions for previous section.
+  static_assert(ELTS_PER_WARP % ELTS_PER_ROW == 0, "The elts per row must cleanly divide the total elt per warp");
+
+  // ===================== From this point, we finally start computing run-time variables. ========================
+
+  // Compute CTA and warp rows. We pack multiple rows into a single warp, and a block contains WARPS_PER_CTA warps.
+  // This, each block processes a chunk of rows. We start by computing the start row for each block.
+  const int cta_base_row = blockIdx.x * ROWS_PER_CTA;
+
+  // Now, using the base row per thread block, we compute the base row per warp.
+  const int warp_base_row = cta_base_row + threadIdx.y * ROWS_PER_WARP;
+
+  // The threads in a warp are split into sub-groups that will work on a row.
+  // We compute row offset for each thread sub-group
+  const int thread_row_in_warp = threadIdx.x / THREADS_PER_ROW;
+  const int thread_row = warp_base_row + thread_row_in_warp;
+
+  // Threads with indices out of bounds should early exit here.
+  if (thread_row >= num_rows) return;
+  const bool should_process_row = finished ? !finished[thread_row] : true;
+
+  // We finally start setting up the read pointers for each thread. First, each thread jumps to the start of the
+  // row it will read.
+  const T* thread_row_ptr = input + thread_row * ELTS_PER_ROW;
+
+  // Now, we compute the group each thread belong to in order to determine the first column to start loads.
+  const int thread_group_idx = threadIdx.x % THREADS_PER_ROW;
+  const int first_elt_read_by_thread = thread_group_idx * ELTS_PER_LDG;
+  const T* thread_read_ptr = thread_row_ptr + first_elt_read_by_thread;
+
+  // Determine the pointer type to use to read in the data depending on the BYTES_PER_LDG template param. In theory,
+  // this can support all powers of 2 up to 16.
+  using AccessType = cutlass::AlignedArray<T, ELTS_PER_LDG>;
+
+  // Finally, we pull in the data from global mem
+  cutlass::Array<T, VPT> row_chunk_input;
+  AccessType* row_chunk_vec_ptr = reinterpret_cast<AccessType*>(&row_chunk_input);
+  const AccessType* vec_thread_read_ptr = reinterpret_cast<const AccessType*>(thread_read_ptr);
+#pragma unroll
+  for (int ii = 0; ii < LDG_PER_THREAD; ++ii) {
+    row_chunk_vec_ptr[ii] = vec_thread_read_ptr[ii * THREADS_PER_ROW];
+  }
+
+  using ComputeType = float;
+  using Converter = cutlass::NumericArrayConverter<ComputeType, T, VPT>;
+  Converter compute_type_converter;
+  cutlass::Array<ComputeType, VPT> row_chunk = compute_type_converter(row_chunk_input);
+
+  // First, we perform a max reduce within the thread. We can do the max in fp16 safely (I think) and just
+  // convert to float afterwards for the exp + sum reduction.
+  ComputeType thread_max = row_chunk[0];
+#pragma unroll
+  for (int ii = 1; ii < VPT; ++ii) {
+    thread_max = max(thread_max, row_chunk[ii]);
+  }
+
+// Now, we find the max within the thread group and distribute among the threads. We use a butterfly reduce.
+#pragma unroll
+  for (int mask = THREADS_PER_ROW / 2; mask > 0; mask /= 2) {
+    thread_max = max(thread_max, __shfl_xor_sync(0xFFFFFFFF, thread_max, mask, THREADS_PER_ROW));
+  }
+
+  // From this point, thread max in all the threads have the max within the row.
+  // Now, we subtract the max from each element in the thread and take the exp. We also compute the thread local sum.
+  float row_sum = 0;
+#pragma unroll
+  for (int ii = 0; ii < VPT; ++ii) {
+    row_chunk[ii] = expf(row_chunk[ii] - thread_max);
+    row_sum += row_chunk[ii];
+  }
+
+// Now, we perform the sum reduce within each thread group. Similar to the max reduce, we use a bufferfly pattern.
+#pragma unroll
+  for (int mask = THREADS_PER_ROW / 2; mask > 0; mask /= 2) {
+    row_sum += __shfl_xor_sync(0xFFFFFFFF, row_sum, mask, THREADS_PER_ROW);
+  }
+
+  // From this point, all threads have the max and the sum for their rows in the thread_max and thread_sum variables
+  // respectively. Finally, we can scale the rows for the softmax. Technically, for top-k gating we don't need to
+  // compute the entire softmax row. We can likely look at the maxes and only compute for the top-k values in the row.
+  // However, this kernel will likely not be a bottle neck and it seems better to closer match torch and find the
+  // argmax after computing the softmax.
+  const float reciprocal_row_sum = 1.f / row_sum;
+
+#pragma unroll
+  for (int ii = 0; ii < VPT; ++ii) {
+    row_chunk[ii] = row_chunk[ii] * reciprocal_row_sum;
+  }
+
+  // Now, softmax_res contains the softmax of the row chunk. Now, I want to find the topk elements in each row, along
+  // with the max index.​
+  int start_col = first_elt_read_by_thread;
+  static constexpr int COLS_PER_GROUP_LDG = ELTS_PER_LDG * THREADS_PER_ROW;
+
+  for (int k_idx = 0; k_idx < k; ++k_idx) {
+    // First, each thread does the local argmax
+    float max_val = row_chunk[0];
+    int expert = start_col;
+#pragma unroll
+    for (int ldg = 0, col = start_col; ldg < LDG_PER_THREAD; ++ldg, col += COLS_PER_GROUP_LDG) {
+#pragma unroll
+      for (int ii = 0; ii < ELTS_PER_LDG; ++ii) {
+        float val = row_chunk[ldg * ELTS_PER_LDG + ii];
+
+        // No check on the experts here since columns with the smallest index are processed first and only
+        // updated if > (not >=)
+        if (val > max_val) {
+          max_val = val;
+          expert = col + ii;
+        }
+      }
+    }
+
+// Now, we perform the argmax reduce. We use the butterfly pattern so threads reach consensus about the max.
+// This will be useful for K > 1 so that the threads can agree on "who" had the max value. That thread can
+// then blank out their max with -inf and the warp can run more iterations...
+#pragma unroll
+    for (int mask = THREADS_PER_ROW / 2; mask > 0; mask /= 2) {
+      float other_max = __shfl_xor_sync(0xFFFFFFFF, max_val, mask, THREADS_PER_ROW);
+      int other_expert = __shfl_xor_sync(0xFFFFFFFF, expert, mask, THREADS_PER_ROW);
+
+      // We want lower indices to "win" in every thread so we break ties this way
+      if (other_max > max_val || (other_max == max_val && other_expert < expert)) {
+        max_val = other_max;
+        expert = other_expert;
+      }
+    }
+
+    // Write the max for this k iteration to global memory.
+    if (thread_group_idx == 0) {
+      // The lead thread from each sub-group will write out the final results to global memory. (This will be a
+      // single) thread per row of the input/output matrices.
+      const int idx = k * thread_row + k_idx;
+      output[idx] = T(max_val);
+      indices[idx] = should_process_row ? expert : NUM_EXPERTS;
+      source_rows[idx] = k_idx * num_rows + thread_row;
+    }
+
+    // Finally, we clear the value in the thread with the current max if there is another iteration to run.
+    if (k_idx + 1 < k) {
+      const int ldg_group_for_expert = expert / COLS_PER_GROUP_LDG;
+      const int thread_to_clear_in_group = (expert / ELTS_PER_LDG) % THREADS_PER_ROW;
+
+      // Only the thread in the group which produced the max will reset the "winning" value to -inf.
+      if (thread_group_idx == thread_to_clear_in_group) {
+        const int offset_for_expert = expert % ELTS_PER_LDG;
+        // Safe to set to any negative value since row_chunk values must be between 0 and 1.
+        row_chunk[ldg_group_for_expert * ELTS_PER_LDG + offset_for_expert] = ComputeType(-10000.f);
+      }
+    }
+  }
+}
+
+namespace detail {
+// Constructs some constants needed to partition the work across threads at compile time.
+template <typename T, int EXPERTS, int BYTES_PER_LDG>
+struct TopkConstants {
+  static constexpr int ELTS_PER_LDG = BYTES_PER_LDG / sizeof(T);
+  static_assert(EXPERTS / (ELTS_PER_LDG * WARP_SIZE) == 0 || EXPERTS % (ELTS_PER_LDG * WARP_SIZE) == 0, "");
+  static constexpr int VECs_PER_THREAD = std::max(1, (int)EXPERTS / (ELTS_PER_LDG * WARP_SIZE));
+  static constexpr int VPT = VECs_PER_THREAD * ELTS_PER_LDG;
+  static constexpr int THREADS_PER_ROW = EXPERTS / VPT;
+  static constexpr int ROWS_PER_WARP = WARP_SIZE / THREADS_PER_ROW;
+};
+}  // namespace detail
+
+template <typename T, int EXPERTS, int WARPS_PER_TB>
+void topk_gating_softmax_launcher_helper(const T* input, const bool* finished, T* output, int* indices, int* source_row,
+                                         int num_rows, int num_experts, int k, cudaStream_t stream) {
+  static constexpr unsigned long MAX_BYTES_PER_LDG = 16;
+
+  static constexpr int BYTES_PER_LDG = std::min((int)MAX_BYTES_PER_LDG, (int)sizeof(T) * EXPERTS);
+  using Constants = detail::TopkConstants<T, EXPERTS, BYTES_PER_LDG>;
+  static constexpr int VPT = Constants::VPT;
+  static constexpr int ROWS_PER_WARP = Constants::ROWS_PER_WARP;
+  const int num_warps = (num_rows + ROWS_PER_WARP - 1) / ROWS_PER_WARP;
+  const int num_blocks = (num_warps + WARPS_PER_TB - 1) / WARPS_PER_TB;
+
+  dim3 block_dim(WARP_SIZE, WARPS_PER_TB);
+  topk_gating_softmax<T, VPT, EXPERTS, WARPS_PER_TB, BYTES_PER_LDG>
+      <<<num_blocks, block_dim, 0, stream>>>(input, finished, output, num_rows, indices, source_row, k);
+}
+
+template <typename T>
+void topk_gating_softmax_kernelLauncher(const T* input, const bool* finished, T* output, T* softmax_temp_output,
+                                        int* indices, int* source_row, int num_rows, int num_experts,
+                                        int k, cudaStream_t stream) {
+  static constexpr int WARPS_PER_TB = 4;
+
+  switch (num_experts) {
+    case 2: {
+      topk_gating_softmax_launcher_helper<T, 2, WARPS_PER_TB>(input, finished, output, indices, source_row, num_rows,
+                                                              num_experts, k, stream);
+      break;
+    }
+    case 4: {
+      topk_gating_softmax_launcher_helper<T, 4, WARPS_PER_TB>(input, finished, output, indices, source_row, num_rows,
+                                                              num_experts, k, stream);
+      break;
+    }
+    case 8: {
+      topk_gating_softmax_launcher_helper<T, 8, WARPS_PER_TB>(input, finished, output, indices, source_row, num_rows,
+                                                              num_experts, k, stream);
+      break;
+    }
+    case 16: {
+      topk_gating_softmax_launcher_helper<T, 16, WARPS_PER_TB>(input, finished, output, indices, source_row, num_rows,
+                                                               num_experts, k, stream);
+      break;
+    }
+    case 32: {
+      topk_gating_softmax_launcher_helper<T, 32, WARPS_PER_TB>(input, finished, output, indices, source_row, num_rows,
+                                                               num_experts, k, stream);
+      break;
+    }
+    case 64: {
+      topk_gating_softmax_launcher_helper<T, 64, WARPS_PER_TB>(input, finished, output, indices, source_row, num_rows,
+                                                               num_experts, k, stream);
+      break;
+    }
+    case 128: {
+      topk_gating_softmax_launcher_helper<T, 128, WARPS_PER_TB>(input, finished, output, indices, source_row, num_rows,
+                                                                num_experts, k, stream);
+      break;
+    }
+    case 256: {
+      topk_gating_softmax_launcher_helper<T, 256, WARPS_PER_TB>(input, finished, output, indices, source_row, num_rows,
+                                                                num_experts, k, stream);
+      break;
+    }
+    default: {
+      static constexpr int TPB = 256;
+      moe_softmax<T, TPB><<<num_rows, TPB, 0, stream>>>(input, finished, softmax_temp_output, num_experts);
+      moe_top_k<T, TPB>
+          <<<num_rows, TPB, 0, stream>>>(softmax_temp_output, finished, output, indices, source_row, num_experts, k);
+    }
+  }
+}
+
+// ========================== CUB Sorting things ====================================
+CubKeyValueSorter::CubKeyValueSorter() : num_experts_(0), num_bits_(sizeof(int) * 8) {}
+
+CubKeyValueSorter::CubKeyValueSorter(int num_experts)
+    : num_experts_(num_experts), num_bits_((int)log2(num_experts) + 1) {}
+
+void CubKeyValueSorter::update_num_experts(int num_experts) {
+  num_experts_ = num_experts;
+  num_bits_ = (int)log2(num_experts) + 1;
+}
+
+size_t CubKeyValueSorter::getWorkspaceSize(const size_t num_key_value_pairs) {
+  num_key_value_pairs_ = num_key_value_pairs;
+  size_t required_storage = 0;
+  int* null_int = nullptr;
+  cub::DeviceRadixSort::SortPairs(NULL, required_storage, null_int, null_int, null_int, null_int,
+                                  (int)num_key_value_pairs, 0, num_bits_);
+  return required_storage;
+}
+
+void CubKeyValueSorter::run(void* workspace, const size_t workspace_size, const int* keys_in, int* keys_out,
+                            const int* values_in, int* values_out, const size_t num_key_value_pairs,
+                            cudaStream_t stream) {
+  size_t expected_ws_size = getWorkspaceSize(num_key_value_pairs);
+  size_t actual_ws_size = workspace_size;
+
+  if (expected_ws_size > workspace_size) {
+    ORT_THROW("Error. The allocated workspace is too small to run this problem. Expected workspace size of at least ",
+              expected_ws_size, " but got problem size ", workspace_size, "\n");
+  }
+  cub::DeviceRadixSort::SortPairs(workspace, actual_ws_size, keys_in, keys_out, values_in, values_out,
+                                  (int)num_key_value_pairs, 0, num_bits_, stream);
+}
+
+// ============================== Infer GEMM sizes =================================
+__device__ inline int find_total_elts_leq_target(const int* sorted_indices, const int arr_length, const int target) {
+  int64_t low = 0, high = arr_length - 1, target_location = -1;
+  while (low <= high) {
+    int64_t mid = (low + high) / 2;
+
+    if (sorted_indices[mid] > target) {
+      high = mid - 1;
+    } else {
+      low = mid + 1;
+      target_location = mid;
+    }
+  }
+  return target_location + 1;
+}
+
+// Sets up the gemm assuming the inputs, experts and outputs are stored in row major order.
+// Assumes we want to perform output = matmul(inputs, experts) + bias
+__global__ void compute_total_rows_before_expert_kernel(const int* sorted_experts, const int sorted_experts_len,
+                                                        const int64_t num_experts, int64_t* total_rows_before_expert) {
+  // First, compute the global tid. We only need 1 thread per expert.
+  const int expert = blockIdx.x * blockDim.x + threadIdx.x;
+  if (expert >= num_experts) return;
+
+  // This should construct the last index where each expert occurs.
+  total_rows_before_expert[expert] = find_total_elts_leq_target(sorted_experts, sorted_experts_len, expert);
+}
+
+template <typename T, typename WeightType, typename Enable>
+CutlassMoeFCRunner<T, WeightType, Enable>::CutlassMoeFCRunner(int sm_version) {
+  moe_gemm_runner_.initialize(sm_version);
+}
+
+template <typename T, typename WeightType, typename Enable>
+size_t CutlassMoeFCRunner<T, WeightType, Enable>::getWorkspaceSize(int num_rows, const int hidden_size,
+                                                                   const int inter_size, int num_experts,
+                                                                   int k) {
+  const int buf_size = static_cast<int>(pad_to_multiple_of_16(k * num_rows * hidden_size));
+  const int interbuf_size = static_cast<int>(pad_to_multiple_of_16(k * num_rows * inter_size));
+  const int padded_experts = static_cast<int>(pad_to_multiple_of_16(num_experts));
+  const int num_moe_inputs = static_cast<int>(pad_to_multiple_of_16(k * num_rows));
+  int num_softmax_outs = 0;
+
+  const bool is_pow_2 = (num_experts != 0) && ((num_experts & (num_experts - 1)) == 0);
+  if (!is_pow_2 || num_experts > 256) {
+    num_softmax_outs = static_cast<int>(pad_to_multiple_of_16(num_rows * num_experts));
+  }
+
+  // softmax output, permuted_rows and permuted_experts have moved to outside of moe kernel, allocate them
+  // in Encoder or Decoder before invoking FfnLayer forward.
+  size_t total_ws_bytes = 3 * num_moe_inputs * sizeof(int);  // source_rows_, permuted_rows_, permuted_experts_
+  total_ws_bytes += buf_size * sizeof(T);                    // permuted_data
+  total_ws_bytes += padded_experts * sizeof(int64_t);        // Hold total_rows_before_expert_
+  total_ws_bytes += num_softmax_outs * sizeof(T);
+  const int bytes_for_fc1_result = interbuf_size * sizeof(T);
+  const int sorter_ws_size_bytes = static_cast<int>(pad_to_multiple_of_16(sorter_.getWorkspaceSize(num_rows)));
+  sorter_.update_num_experts(num_experts);
+
+  int bytes_for_intermediate_and_sorting = bytes_for_fc1_result;
+  if (sorter_ws_size_bytes > bytes_for_fc1_result) {
+    int remaining_bytes = static_cast<int>(pad_to_multiple_of_16(sorter_ws_size_bytes - bytes_for_fc1_result));
+    bytes_for_intermediate_and_sorting += remaining_bytes;
+  }
+
+  total_ws_bytes += bytes_for_intermediate_and_sorting;  // intermediate (fc1) output + cub sorting workspace
+  return total_ws_bytes;
+}
+
+template <typename T, typename WeightType, typename Enable>
+void CutlassMoeFCRunner<T, WeightType, Enable>::configure_ws_ptrs(char* ws_ptr, int num_rows,
+                                                                  const int hidden_size, const int inter_size,
+                                                                  int num_experts, int k) {
+  const int buf_size = static_cast<int>(pad_to_multiple_of_16(k * num_rows * hidden_size));
+  const int interbuf_size = static_cast<int>(pad_to_multiple_of_16(k * num_rows * inter_size));
+  const int padded_experts = static_cast<int>(pad_to_multiple_of_16(num_experts));
+  const int num_moe_inputs = static_cast<int>(pad_to_multiple_of_16(k * num_rows));
+  // const int num_softmax_outs = pad_to_multiple_of_16(num_rows * num_experts);
+
+  source_rows_ = (int*)ws_ptr;
+  permuted_rows_ = source_rows_ + num_moe_inputs;
+  permuted_experts_ = permuted_rows_ + num_moe_inputs;
+  permuted_data_ = (T*)(permuted_experts_ + num_moe_inputs);
+
+  total_rows_before_expert_ = (int64_t*)(permuted_data_ + buf_size);
+
+  fc1_result_ = (T*)(total_rows_before_expert_ + padded_experts);
+
+  const bool is_pow_2 = (num_experts != 0) && ((num_experts & (num_experts - 1)) == 0);
+  if (!is_pow_2 || num_experts > 256) {
+    softmax_out_ = (T*)(fc1_result_ + interbuf_size);
+  } else {
+    softmax_out_ = nullptr;
+  }
+}
+
+template <typename T, typename WeightType, typename Enable>
+void CutlassMoeFCRunner<T, WeightType, Enable>::run_moe_fc(
+    const T* input_activations, const T* gating_output, const WeightType* fc1_expert_weights, const T* fc1_scales,
+    const T* fc1_expert_biases, ActivationType fc1_activation_type, const WeightType* fc2_expert_weights,
+    const T* fc2_scales, int num_rows, const int hidden_size, const int inter_size, int num_experts,
+    int k, char* workspace_ptr, T* fc2_result, const bool* finished, int active_rows, T* expert_scales,
+    int* expanded_source_row_to_expanded_dest_row, int* expert_for_source_row, cudaStream_t stream) {
+  static constexpr bool scales_required =
+      std::is_same<WeightType, uint8_t>::value || std::is_same<WeightType, cutlass::uint4b_t>::value;
+
+  if (scales_required) {
+    if (fc1_scales == nullptr) {
+      ORT_THROW("[FT Error][Run MoE FC] Scales expected but scale for first matmul is a null pointer");
+    } else if (fc2_scales == nullptr) {
+      ORT_THROW("[FT Error][Run MoE FC] Scales expected but scale for second matmul is a null pointer");
+    }
+  } else {
+    if (fc1_scales != nullptr) {
+      ORT_THROW("[FT Error][Run MoE FC] Scales are ignored for fp32/fp16/bf16 but received scale for FC1");
+    } else if (fc2_scales != nullptr) {
+      ORT_THROW("[FT Error][Run MoE FC] Scales are ignored for fp32/fp16/bf16 but received scale for FC2");
+    }
+  }
+
+  configure_ws_ptrs(workspace_ptr, num_rows, hidden_size, inter_size, num_experts, k);
+  topk_gating_softmax_kernelLauncher<T>(gating_output, finished, expert_scales, softmax_out_, expert_for_source_row,
+                                        source_rows_, num_rows, num_experts, k, stream);
+
+  const int sorter_ws_size_bytes = static_cast<int>(pad_to_multiple_of_16(sorter_.getWorkspaceSize(k * num_rows)));
+  sorter_.run((void*)fc1_result_, sorter_ws_size_bytes, expert_for_source_row, permuted_experts_, source_rows_,
+              permuted_rows_, k * num_rows, stream);
+
+  initialize_moe_routing_kernelLauncher(input_activations, permuted_data_, permuted_rows_,
+                                        expanded_source_row_to_expanded_dest_row, num_rows, active_rows, hidden_size, k,
+                                        stream);
+
+  const int expanded_active_expert_rows = k * active_rows;
+  compute_total_rows_before_expert(permuted_experts_, expanded_active_expert_rows, num_experts,
+                                   total_rows_before_expert_, stream);
+
+  moe_gemm_runner_.moe_gemm_bias_act(permuted_data_, fc1_expert_weights, fc1_scales, fc1_expert_biases, fc1_result_,
+                                     total_rows_before_expert_, expanded_active_expert_rows, inter_size, hidden_size,
+                                     num_experts, fc1_activation_type, stream);
+
+  moe_gemm_runner_.moe_gemm(fc1_result_, fc2_expert_weights, fc2_scales, fc2_result, total_rows_before_expert_,
+                            expanded_active_expert_rows, hidden_size, inter_size, num_experts, stream);
+}
+
+template <typename T, typename WeightType, typename Enable>
+void CutlassMoeFCRunner<T, WeightType, Enable>::run_moe_fc(
+    const T* input_activations, const T* gating_output, const WeightType* fc1_expert_weights, const T* fc1_scales,
+    const T* fc1_expert_biases, ActivationType fc1_activation_type, const WeightType* fc2_expert_weights,
+    const T* fc2_scales, int num_rows, const int hidden_size, const int inter_size, int num_experts,
+    int k, char* workspace_ptr, T* fc2_result, T* expert_scales, int* expanded_source_row_to_expanded_dest_row,
+    int* expert_for_source_row, cudaStream_t stream) {
+  run_moe_fc(input_activations, gating_output, fc1_expert_weights, fc1_scales, fc1_expert_biases, fc1_activation_type,
+             fc2_expert_weights, fc2_scales, num_rows, hidden_size, inter_size, num_experts, k, workspace_ptr,
+             fc2_result, nullptr, num_rows, expert_scales, expanded_source_row_to_expanded_dest_row,
+             expert_for_source_row, stream);
+}
+
+template <typename T, typename WeightType, typename Enable>
+void CutlassMoeFCRunner<T, WeightType, Enable>::compute_total_rows_before_expert(const int* sorted_indices,
+                                                                                 const int total_indices,
+                                                                                 int num_experts,
+                                                                                 int64_t* total_rows_before_expert,
+                                                                                 cudaStream_t stream) {
+  const int threads = std::min(1024, num_experts);
+  const int blocks = (num_experts + threads - 1) / threads;
+
+  compute_total_rows_before_expert_kernel<<<blocks, threads, 0, stream>>>(sorted_indices, total_indices, num_experts,
+                                                                          total_rows_before_expert);
+}
+
+// ========================== Permutation things =======================================
+
+// Duplicated and permutes rows for MoE. In addition, reverse the permutation map to help with finalizing routing.
+
+// "expanded_x_row" simply means that the number of values is num_rows x k. It is "expanded" since we will have to
+// duplicate some rows in the input matrix to match the dimensions. Duplicates will always get routed to separate
+// experts in the end.
+
+// Note that the expanded_dest_row_to_expanded_source_row map referred to here has indices in the range (0,
+// k*rows_in_input - 1). However, it is set up so that index 0, rows_in_input, 2*rows_in_input ... (k-1)*rows_in_input
+// all map to row 0 in the original matrix. Thus, to know where to read in the source matrix, we simply take the modulus
+// of the expanded index.
+
+template <typename T>
+__global__ void initialize_moe_routing_kernel(const T* unpermuted_input, T* permuted_output,
+                                              const int* expanded_dest_row_to_expanded_source_row,
+                                              int* expanded_source_row_to_expanded_dest_row, int num_rows,
+                                              int active_rows, int cols) {
+  // Reverse permutation map.
+  // I do this so that later, we can use the source -> dest map to do the k-way reduction and unpermuting. I need the
+  // reverse map for that reduction to allow each threadblock to do 1 k-way reduce without atomics later in MoE. 1
+  // thread block will be responsible for all k summations.
+  const int expanded_dest_row = blockIdx.x;
+  const int expanded_source_row = expanded_dest_row_to_expanded_source_row[expanded_dest_row];
+  if (threadIdx.x == 0) {
+    expanded_source_row_to_expanded_dest_row[expanded_source_row] = expanded_dest_row;
+  }
+
+  if (blockIdx.x < active_rows) {
+    // Duplicate and permute rows
+    const int source_row = expanded_source_row % num_rows;
+
+    const T* source_row_ptr = unpermuted_input + source_row * cols;
+    T* dest_row_ptr = permuted_output + expanded_dest_row * cols;
+
+    for (int tid = threadIdx.x; tid < cols; tid += blockDim.x) {
+      dest_row_ptr[tid] = source_row_ptr[tid];
+    }
+  }
+}
+
+template <typename T>
+void initialize_moe_routing_kernelLauncher(const T* unpermuted_input, T* permuted_output,
+                                           const int* expanded_dest_row_to_expanded_source_row,
+                                           int* expanded_source_row_to_expanded_dest_row, int num_rows,
+                                           int active_rows, int cols, int k, cudaStream_t stream) {
+  const int blocks = num_rows * k;
+  const int threads = std::min(cols, 1024);
+  initialize_moe_routing_kernel<T>
+      <<<blocks, threads, 0, stream>>>(unpermuted_input, permuted_output, expanded_dest_row_to_expanded_source_row,
+                                       expanded_source_row_to_expanded_dest_row, num_rows, k * active_rows, cols);
+}
+
+// Final kernel to unpermute and scale
+// This kernel unpermutes the original data, does the k-way reduction and performs the final skip connection.
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 530
+template <typename T, int RESIDUAL_NUM>
+__global__ void finalize_moe_routing_kernel(const T*, T*, const T*, const T*, const T*, const T*, const int*,
+                                            const int*, int, const int) {
+  // Does not support pre-Kepler architectures
+  ;
+}
+#else
+template <typename T, int RESIDUAL_NUM>
+__global__ void finalize_moe_routing_kernel(const T* expanded_permuted_rows, T* reduced_unpermuted_output,
+                                            const T* skip_1, const T* skip_2, const T* bias, const T* scales,
+                                            const int* expanded_source_row_to_expanded_dest_row,
+                                            const int* expert_for_source_row, int cols, int k) {
+  const int original_row = blockIdx.x;
+  int num_rows = gridDim.x;
+  T* reduced_row_ptr = reduced_unpermuted_output + original_row * cols;
+
+  const T* skip_1_row_ptr = nullptr;
+  if (RESIDUAL_NUM == 1) {
+    skip_1_row_ptr = skip_1 + original_row * cols;
+  }
+  const T* skip_2_row_ptr = nullptr;
+  if (RESIDUAL_NUM == 2) {
+    skip_2_row_ptr = skip_2 + original_row * cols;
+  }
+
+  for (int tid = threadIdx.x; tid < cols; tid += blockDim.x) {
+    T thread_output;
+    if (RESIDUAL_NUM == 0) {
+      thread_output = T(0);
+    } else if (RESIDUAL_NUM == 1) {
+      thread_output = skip_1_row_ptr[tid];
+    } else if (RESIDUAL_NUM == 2) {
+      thread_output = skip_1_row_ptr[tid] + skip_2_row_ptr[tid];
+    }
+    for (int k_idx = 0; k_idx < k; ++k_idx) {
+      const int expanded_original_row = original_row + k_idx * num_rows;
+      const int expanded_permuted_row = expanded_source_row_to_expanded_dest_row[expanded_original_row];
+
+      const int64_t k_offset = original_row * k + k_idx;
+      const T row_scale = scales[k_offset];
+      const T* expanded_permuted_rows_row_ptr = expanded_permuted_rows + expanded_permuted_row * cols;
+
+      const int expert_idx = expert_for_source_row[k_offset];
+      const T* bias_ptr = bias + expert_idx * cols;
+
+      thread_output = thread_output + row_scale * (expanded_permuted_rows_row_ptr[tid] + bias_ptr[tid]);
+    }
+    reduced_row_ptr[tid] = thread_output;
+  }
+}
+#endif
+
+template <typename T>
+void finalize_moe_routing_kernelLauncher(const T* expanded_permuted_rows, T* reduced_unpermuted_output, const T* bias,
+                                         const T* scales, const int* expanded_source_row_to_expanded_dest_row,
+                                         const int* expert_for_source_row, int num_rows, int cols,
+                                         int k, cudaStream_t stream) {
+  const int blocks = num_rows;
+  const int threads = std::min(cols, 1024);
+  finalize_moe_routing_kernel<T, 0><<<blocks, threads, 0, stream>>>(
+      expanded_permuted_rows, reduced_unpermuted_output, nullptr, nullptr, bias, scales,
+      expanded_source_row_to_expanded_dest_row, expert_for_source_row, cols, k);
+}
+
+template <typename T>
+void finalize_moe_routing_kernelLauncher(const T* expanded_permuted_rows, T* reduced_unpermuted_output, const T* skip,
+                                         const T* bias, const T* scales,
+                                         const int* expanded_source_row_to_expanded_dest_row,
+                                         const int* expert_for_source_row, int num_rows, int cols,
+                                         int k, cudaStream_t stream) {
+  const int blocks = num_rows;
+  const int threads = std::min(cols, 1024);
+  finalize_moe_routing_kernel<T, 1>
+      <<<blocks, threads, 0, stream>>>(expanded_permuted_rows, reduced_unpermuted_output, skip, nullptr, bias, scales,
+                                       expanded_source_row_to_expanded_dest_row, expert_for_source_row, cols, k);
+}
+
+template <typename T>
+void finalize_moe_routing_kernelLauncher(const T* expanded_permuted_rows, T* reduced_unpermuted_output, const T* skip_1,
+                                         const T* skip_2, const T* bias, const T* scales,
+                                         const int* expanded_source_row_to_expanded_dest_row,
+                                         const int* expert_for_source_row, int num_rows, int cols,
+                                         int k, cudaStream_t stream) {
+  const int blocks = num_rows;
+  const int threads = std::min(cols, 1024);
+  if (skip_2 == nullptr) {
+    finalize_moe_routing_kernel<T, 1><<<blocks, threads, 0, stream>>>(
+        expanded_permuted_rows, reduced_unpermuted_output, skip_1, skip_2, bias, scales,
+        expanded_source_row_to_expanded_dest_row, expert_for_source_row, cols, k);
+  } else {
+    finalize_moe_routing_kernel<T, 2><<<blocks, threads, 0, stream>>>(
+        expanded_permuted_rows, reduced_unpermuted_output, skip_1, skip_2, bias, scales,
+        expanded_source_row_to_expanded_dest_row, expert_for_source_row, cols, k);
+  }
+}
+
+// ========================= TopK Softmax specializations ===========================
+template void topk_gating_softmax_kernelLauncher(const float*, const bool*, float*, float*, int*, int*, int,
+                                                 int, int, cudaStream_t);
+template void topk_gating_softmax_kernelLauncher(const half*, const bool*, half*, half*, int*, int*, int,
+                                                 int, int, cudaStream_t);
+
+// ==================== Variable batched GEMM specializations ==================================
+template class CutlassMoeFCRunner<float, float>;
+template class CutlassMoeFCRunner<half, half>;
+
+// ===================== Specializations for init routing =========================
+template void initialize_moe_routing_kernelLauncher(const float*, float*, const int*, int*, int, int,
+                                                    int, int, cudaStream_t);
+template void initialize_moe_routing_kernelLauncher(const half*, half*, const int*, int*, int, int,
+                                                    int, int, cudaStream_t);
+
+// ==================== Specializations for final routing ===================================
+template void finalize_moe_routing_kernelLauncher(const float*, float*, const float*, const float*, const int*,
+                                                  const int*, int, int, int, cudaStream_t);
+template void finalize_moe_routing_kernelLauncher(const half*, half*, const half*, const half*, const int*, const int*,
+                                                  int, int, int, cudaStream_t);
+template void finalize_moe_routing_kernelLauncher(const float*, float*, const float*, const float*, const float*,
+                                                  const int*, const int*, int, int, int,
+                                                  cudaStream_t);
+template void finalize_moe_routing_kernelLauncher(const half*, half*, const half*, const half*, const half*, const int*,
+                                                  const int*, int, int, int, cudaStream_t);
+template void finalize_moe_routing_kernelLauncher(const float*, float*, const float*, const float*, const float*,
+                                                  const float*, const int*, const int*, int, int, int,
+                                                  cudaStream_t);
+template void finalize_moe_routing_kernelLauncher(const half*, half*, const half*, const half*, const half*,
+                                                  const half*, const int*, const int*, int, int, int,
+                                                  cudaStream_t);
+
+}  // namespace ort_fastertransformer
diff --git a/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_kernel.h b/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_kernel.h
new file mode 100644
index 0000000000000..5cefe4fa5dc47
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_kernel.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "moe_gemm_kernels.h"
+#include <cuda_runtime_api.h>
+
+#include "core/common/common.h"
+
+using namespace onnxruntime;
+
+namespace ort_fastertransformer {
+
+static inline size_t pad_to_multiple_of_16(size_t input) {
+  static constexpr int ALIGNMENT = 16;
+  return ALIGNMENT * ((input + ALIGNMENT - 1) / ALIGNMENT);
+}
+
+/*
+  Launches the topk gating softmax required for the MoE layers.
+
+  Params:
+  input - a [num_rows x num_experts]
+  finished - [num_rows] vector with 1 if the sentence at this row is done translating and 0 otherwise.
+  output - a buffer of shape [num_rows x k] containing the top-k values of the softmax for each row.
+  indices - a matrix of shape [num_rows x k] containing the top-k experts each row should get routed to.
+  source_rows - a matrix of shape [num_rows x k] used internally for permuting. source_rows[row][k] =  k * num_rows +
+  row. It is constructed like this so we can track where each of the original rows end up in order to perform the
+                "k-way" reduction later in the routing.
+
+  num_rows - The number of rows in the matrix
+  num_experts - The number of expert layers present
+  k - k value in topk
+*/
+template <typename T>
+void topk_gating_softmax_kernelLauncher(const T* input, const bool* finished, T* output, T* softmax_temp_out,
+                                        int* indices, int* source_row, int num_rows, int num_experts,
+                                        int k, cudaStream_t stream);
+
+class CubKeyValueSorter {
+ public:
+  CubKeyValueSorter();
+
+  CubKeyValueSorter(int num_experts);
+
+  void update_num_experts(int num_experts);
+
+  size_t getWorkspaceSize(const size_t num_key_value_pairs);
+
+  void run(void* workspace, const size_t workspace_size, const int* keys_in, int* keys_out, const int* values_in,
+           int* values_out, const size_t num_key_value_pairs, cudaStream_t stream);
+
+ private:
+  size_t num_key_value_pairs_;
+  int num_experts_;
+  int num_bits_;
+};
+
+template <typename T>
+void initialize_moe_routing_kernelLauncher(const T* unpermuted_input, T* permuted_output,
+                                           const int* expanded_dest_row_to_expanded_source_row,
+                                           int* expanded_source_row_to_expanded_dest_row, int num_rows,
+                                           int active_rows, int cols, int k, cudaStream_t stream);
+
+template <typename T>
+void finalize_moe_routing_kernelLauncher(const T* expanded_permuted_rows, T* reduced_unpermuted_output, const T* bias,
+                                         const T* scales, const int* expanded_source_row_to_expanded_dest_row,
+                                         const int* expert_for_source_row, int num_rows, int cols,
+                                         int k, cudaStream_t stream);
+
+template <typename T>
+void finalize_moe_routing_kernelLauncher(const T* expanded_permuted_rows, T* reduced_unpermuted_output, const T* skip,
+                                         const T* bias, const T* scales,
+                                         const int* expanded_source_row_to_expanded_dest_row,
+                                         const int* expert_for_source_row, int num_rows, int cols,
+                                         int k, cudaStream_t stream);
+
+template <typename T>
+void finalize_moe_routing_kernelLauncher(const T* expanded_permuted_rows, T* reduced_unpermuted_output, const T* skip_1,
+                                         const T* skip_2, const T* bias, const T* scales,
+                                         const int* expanded_source_row_to_expanded_dest_row,
+                                         const int* expert_for_source_row, int num_rows, int cols,
+                                         int k, cudaStream_t stream);
+
+// Assumes inputs activations are row major. Weights need to be preprocessed by th_op/weight_quantize.cc .
+// Nested in a class to avoid multiple calls to cudaGetDeviceProperties as this call can be expensive.
+// Avoid making several duplicates of this class.
+template <typename T,          /*The type used for activations/scales/compute*/
+          typename WeightType, /* The type for the MoE weights */
+          typename Enable = void>
+class CutlassMoeFCRunner {
+ public:
+  CutlassMoeFCRunner(int sm_version);
+
+  size_t getWorkspaceSize(int num_rows, int hidden_size, int inter_size, int num_experts, int k);
+
+  void run_moe_fc(const T* input_activations, const T* gating_output, const WeightType* fc1_expert_weights,
+                  const T* fc1_scales, const T* fc1_expert_biases, ActivationType fc1_activation_type,
+                  const WeightType* fc2_expert_weights, const T* fc2_scales, int num_rows, int hidden_size,
+                  int inter_size, int num_experts, int k, char* workspace_ptr, T* fc2_result,
+                  T* expert_scales, int* expanded_source_row_to_expanded_dest_row, int* expert_for_source_row,
+                  cudaStream_t stream);
+
+  void run_moe_fc(const T* input_activations, const T* gating_output, const WeightType* fc1_expert_weights,
+                  const T* fc1_scales, const T* fc1_expert_biases, ActivationType fc1_activation_type,
+                  const WeightType* fc2_expert_weights, const T* fc2_scales, int num_rows, int hidden_size,
+                  int inter_size, int num_experts, int k, char* workspace_ptr, T* fc2_result,
+                  const bool* finished, int active_rows, T* expert_scales,
+                  int* expanded_source_row_to_expanded_dest_row, int* expert_for_source_row, cudaStream_t stream);
+
+  void compute_total_rows_before_expert(const int* sorted_indices, int total_indices, int num_experts,
+                                        int64_t* total_rows_before_expert, cudaStream_t stream);
+
+ private:
+  void configure_ws_ptrs(char* ws_ptr, int num_rows, int hidden_size, int inter_size, int num_experts, int k);
+
+ private:
+  CubKeyValueSorter sorter_;
+  MoeGemmRunner<T, WeightType> moe_gemm_runner_;
+
+  // Pointers
+  int* source_rows_;
+  int* permuted_rows_;
+  int* permuted_experts_;
+  char* sorter_ws_;
+  T* permuted_data_;
+  T* softmax_out_;
+
+  int64_t* total_rows_before_expert_;
+
+  T* fc1_result_;
+};
+
+template <typename WeightType>
+class CutlassMoeFCRunner<float, WeightType, typename std::enable_if_t<!std::is_same<float, WeightType>::value>> {
+ public:
+  CutlassMoeFCRunner(int sm_version);
+
+  size_t getWorkspaceSize(int num_rows, int hidden_size, int inter_size, int num_experts, int k) {
+    return 0;
+  }
+};
+
+}  // namespace ort_fastertransformer
\ No newline at end of file
diff --git a/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_problem_visitor.h b/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_problem_visitor.h
new file mode 100644
index 0000000000000..00f977c615df6
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_problem_visitor.h
@@ -0,0 +1,290 @@
+/***************************************************************************************************
+ * Copyright (c) 2017 - 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ **************************************************************************************************/
+
+/*! \file
+    \brief Base scheduler for grouped problems, using MoE
+*/
+
+#pragma once
+
+#include "cutlass/gemm/kernel/grouped_problem_visitor.h"
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+
+namespace cutlass {
+namespace gemm {
+namespace kernel {
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+
+/// Visitor class to abstract away the algorithm for iterating over tiles
+template <typename ProblemSizeHelper, typename ThreadblockShape_>
+struct BaseMoeProblemVisitor {
+  using ThreadblockShape = ThreadblockShape_;
+
+  struct ProblemInfo {
+    static int32_t const kNoPrefetchEntry = -1;
+    int32_t problem_idx;
+    int32_t problem_start;
+
+    CUTLASS_DEVICE
+    ProblemInfo() : problem_idx(kNoPrefetchEntry), problem_start(kNoPrefetchEntry) {}
+
+    CUTLASS_DEVICE
+    ProblemInfo(int32_t problem_idx_, int32_t problem_start_)
+        : problem_idx(problem_idx_), problem_start(problem_start_) {}
+  };
+
+  struct Params {
+    int64_t const* last_row_for_problem;
+    int64_t gemm_n;
+    int64_t gemm_k;
+    int32_t problem_count;
+    void const* workspace;
+    int32_t tile_count;
+
+    //
+    // Methods
+    //
+
+    /// Ctor
+    CUTLASS_HOST_DEVICE
+    Params()
+        : last_row_for_problem(nullptr), gemm_n(0), gemm_k(0), problem_count(0), workspace(nullptr), tile_count(0) {}
+
+    /// Ctor
+    CUTLASS_HOST_DEVICE
+    Params(int64_t const* last_row_for_problem, int64_t gemm_n, int64_t gemm_k, int32_t problem_count,
+           void const* workspace = nullptr, int32_t tile_count = 0)
+        : last_row_for_problem(last_row_for_problem),
+          gemm_n(gemm_n),
+          gemm_k(gemm_k),
+          problem_count(problem_count),
+          workspace(workspace),
+          tile_count(tile_count) {}
+  };
+
+  Params const& params;
+  int32_t tile_idx;
+  int32_t problem_tile_start;
+  int32_t problem_idx;
+
+  //
+  // Methods
+  //
+  CUTLASS_DEVICE
+  BaseMoeProblemVisitor(Params const& params_, int32_t block_idx)
+      : params(params_), tile_idx(block_idx), problem_tile_start(0), problem_idx(0) {}
+
+  /// Get the grid shape
+  CUTLASS_HOST_DEVICE
+  static cutlass::gemm::GemmCoord grid_shape(const cutlass::gemm::GemmCoord& problem) {
+    return cutlass::gemm::GemmCoord(((problem.m() - 1 + ThreadblockShape::kM) / ThreadblockShape::kM),
+                                    ((problem.n() - 1 + ThreadblockShape::kN) / ThreadblockShape::kN), 1);
+  }
+
+  /// Gets the global tile index
+  CUTLASS_HOST_DEVICE
+  int32_t tile_index() const { return tile_idx; }
+
+  /// Gets the index of the problem
+  CUTLASS_HOST_DEVICE
+  int32_t problem_index() const { return problem_idx; }
+
+  CUTLASS_HOST_DEVICE
+  int32_t threadblock_idx() const { return tile_idx - problem_tile_start; }
+
+  CUTLASS_DEVICE
+  void advance(int32_t grid_size) { tile_idx += grid_size; }
+
+  CUTLASS_HOST_DEVICE
+  static void possibly_transpose_problem(cutlass::gemm::GemmCoord& problem) {
+    ProblemSizeHelper::possibly_transpose_problem(problem);
+  }
+
+  /// Returns the problem size for the current problem
+  CUTLASS_HOST_DEVICE
+  cutlass::gemm::GemmCoord problem_size() const { return problem_size(problem_idx); }
+
+  CUTLASS_HOST_DEVICE
+  cutlass::gemm::GemmCoord problem_size(int idx) const {
+    const int64_t prev_problem_row = idx == 0 ? 0 : params.last_row_for_problem[idx - 1];
+    const int64_t current_problem_row = params.last_row_for_problem[idx];
+    const int64_t gemm_m = current_problem_row - prev_problem_row;
+    GemmCoord problem(GemmCoord::Index(gemm_m), GemmCoord::Index(params.gemm_n), GemmCoord::Index(params.gemm_k));
+    ProblemSizeHelper::possibly_transpose_problem(problem);
+    return problem;
+  }
+
+  CUTLASS_HOST_DEVICE
+  static int32_t tile_count(const cutlass::gemm::GemmCoord& grid) { return ProblemSizeHelper::tile_count(grid); }
+
+  static int32_t group_tile_count(const cutlass::gemm::GemmCoord* host_problem_sizes_ptr, int32_t problem_count) {
+    int32_t total_tiles = 0;
+    for (int32_t i = 0; i < problem_count; ++i) {
+      auto problem = host_problem_sizes_ptr[i];
+      possibly_transpose_problem(problem);
+      auto grid = grid_shape(problem);
+      total_tiles += tile_count(grid);
+    }
+
+    return total_tiles;
+  }
+};
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+
+template <typename ProblemSizeHelper, typename ThreadblockShape, GroupScheduleMode GroupScheduleMode_,
+          int PrefetchTileCount, int ThreadCount>
+struct MoeProblemVisitor;
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// ProblemVisitor that performs all scheduling on device
+//
+template <typename ProblemSizeHelper, typename ThreadblockShape, int PrefetchTileCount, int ThreadCount>
+struct MoeProblemVisitor<ProblemSizeHelper, ThreadblockShape, GroupScheduleMode::kDeviceOnly, PrefetchTileCount,
+                         ThreadCount> : public BaseMoeProblemVisitor<ProblemSizeHelper, ThreadblockShape> {
+  using Base = BaseMoeProblemVisitor<ProblemSizeHelper, ThreadblockShape>;
+  using Params = typename Base::Params;
+  static int const kThreadCount = ThreadCount;
+  static bool const kRequiresPrecomputation = false;
+  static int const kThreadsPerWarp = 32;
+
+  struct SharedStorage {};
+
+  // Final tile of the problem loaded by this thread. Each thread will hold
+  // a separate value.
+  int32_t problem_ending_tile;
+
+  SharedStorage& shared_storage;
+
+  //
+  // Methods
+  //
+  CUTLASS_DEVICE
+  MoeProblemVisitor(Params const& params_, SharedStorage& shared_storage_, int32_t block_idx)
+      : Base(params_, block_idx), problem_ending_tile(0), shared_storage(shared_storage_) {
+    this->problem_idx = -1 * kThreadsPerWarp;
+    this->problem_tile_start = 0;
+  }
+
+  CUTLASS_DEVICE
+  bool next_tile() {
+    // Check whether the tile to compute is within the range of the current problem.
+    int32_t problem_tile_end = __shfl_sync(0xffffffff, problem_ending_tile, this->problem_idx % kThreadsPerWarp);
+    if (this->tile_idx < problem_tile_end) {
+      return true;
+    }
+
+    // Check whether the tile to compute is within the current group of problems fetched by the warp.
+    // The last tile for this group is the final tile of the problem held by the final thread in the warp.
+    int32_t group_tile_end = __shfl_sync(0xffffffff, problem_ending_tile, kThreadsPerWarp - 1);
+
+    // Keep the starting problem for this group in `problem_idx`. This is done to reduce
+    // register pressure. The starting problem for this group is simply the first problem
+    // in the group most recently fetched by the warp.
+    int32_t& group_problem_start = this->problem_idx;
+    group_problem_start = (this->problem_idx / kThreadsPerWarp) * kThreadsPerWarp;
+
+    // Keep the starting tile for this group in `problem_tile_start`. This is done to reduce
+    // register pressure.
+    int32_t& group_tile_start = this->problem_tile_start;
+
+    // Each thread in the warp processes a separate problem to advance until
+    // reaching a problem whose starting tile is less less than tile_idx.
+    while (group_tile_end <= this->tile_idx) {
+      group_problem_start += kThreadsPerWarp;
+      if (group_problem_start > this->params.problem_count) {
+        return false;
+      }
+
+      // Since `group_tile_start` is a reference to `this->problem_tile_start`, this
+      // also sets `this->problem_tile_start`. The fact that `this->problem_tile_start`
+      // is also set here is used later in `next_tile`.
+      group_tile_start = group_tile_end;
+
+      int lane_idx = threadIdx.x % kThreadsPerWarp;
+      int32_t lane_problem = group_problem_start + lane_idx;
+
+      // Compute the number of tiles in the problem assigned to each thread.
+      problem_ending_tile = 0;
+      if (lane_problem < this->params.problem_count) {
+        cutlass::gemm::GemmCoord problem = this->problem_size(lane_problem);
+        cutlass::gemm::GemmCoord grid = this->grid_shape(problem);
+        problem_ending_tile = this->tile_count(grid);
+      }
+
+      // Compute a warp-wide inclusive prefix sum to compute the ending tile index of
+      // each thread's problem.
+      CUTLASS_PRAGMA_UNROLL
+      for (int i = 1; i < kThreadsPerWarp; i <<= 1) {
+        int32_t val = __shfl_up_sync(0xffffffff, problem_ending_tile, i);
+        if (lane_idx >= i) {
+          problem_ending_tile += val;
+        }
+      }
+
+      // The total tile count for this group is now in the final position of the prefix sum
+      int32_t tiles_in_group = __shfl_sync(0xffffffff, problem_ending_tile, kThreadsPerWarp - 1);
+
+      problem_ending_tile += group_tile_start;
+      group_tile_end += tiles_in_group;
+    }
+
+    // The next problem to process is the first one that does not have ending tile position
+    // that is greater than or equal to tile index.
+    int32_t problem_idx_in_group = __popc(__ballot_sync(0xffffffff, problem_ending_tile <= this->tile_idx));
+
+    this->problem_idx = group_problem_start + problem_idx_in_group;
+
+    // The starting tile for this problem is the ending tile of the previous problem. In cases
+    // where `problem_idx_in_group` is the first problem in the group, we do not need to reset
+    // `problem_tile_start`, because it is set to the previous group's ending tile in the while
+    // loop above.
+    if (problem_idx_in_group > 0) {
+      this->problem_tile_start = __shfl_sync(0xffffffff, problem_ending_tile, problem_idx_in_group - 1);
+    }
+
+    return true;
+  }
+
+  static size_t get_workspace_size(const cutlass::gemm::GemmCoord* host_problem_sizes_ptr, int32_t problem_count,
+                                   int32_t block_count) {
+    return 0;
+  }
+
+  static void host_precompute(const cutlass::gemm::GemmCoord* host_problem_sizes_ptr, int32_t problem_count,
+                              int32_t block_count, void* host_workspace_ptr) {}
+};
+
+}  // namespace kernel
+}  // namespace gemm
+}  // namespace cutlass
diff --git a/onnxruntime/contrib_ops/cuda/moe/ft_moe/tile_interleaved_layout.h b/onnxruntime/contrib_ops/cuda/moe/ft_moe/tile_interleaved_layout.h
new file mode 100644
index 0000000000000..3505bea24e4d9
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/moe/ft_moe/tile_interleaved_layout.h
@@ -0,0 +1,61 @@
+/***************************************************************************************************
+ * Copyright (c) 2017 - 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ **************************************************************************************************/
+/*! \file
+    \brief Defines new layouts needed for MoE
+*/
+#pragma once
+
+#include "cutlass/cutlass.h"
+#include "cutlass/fast_math.h"
+#include "cutlass/matrix_coord.h"
+#include "cutlass/pitch_linear_coord.h"
+
+namespace cutlass {
+namespace layout {
+
+template <int RowsPerTile, int ColumnsInterleaved>
+class ColumnMajorTileInterleave {
+  static constexpr int kRowsPerTile = RowsPerTile;
+  static constexpr int kColumnsInterleaved = ColumnsInterleaved;
+};
+
+template <class T>
+struct IsColumnMajorTileInterleave {
+  static constexpr bool value = false;
+};
+
+template <int U, int V>
+struct IsColumnMajorTileInterleave<ColumnMajorTileInterleave<U, V>> {
+  static constexpr bool value = true;
+};
+
+}  // namespace layout
+}  // namespace cutlass
diff --git a/onnxruntime/contrib_ops/cuda/moe/moe.cc b/onnxruntime/contrib_ops/cuda/moe/moe.cc
new file mode 100644
index 0000000000000..6f2ffe7a0cc43
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/moe/moe.cc
@@ -0,0 +1,197 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/common/safeint.h"
+#include "core/providers/cuda/cuda_common.h"
+#include "moe.h"
+
+using namespace onnxruntime::cuda;
+using namespace ::onnxruntime::common;
+using namespace ONNX_NAMESPACE;
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+#define REGISTER_KERNEL_TYPED(T)                                  \
+  ONNX_OPERATOR_TYPED_KERNEL_EX(                                  \
+      MoE,                                                        \
+      kMSDomain,                                                  \
+      1,                                                          \
+      T,                                                          \
+      kCudaExecutionProvider,                                     \
+      (*KernelDefBuilder::Create())                               \
+          .MayInplace(0, 0)                                       \
+          .TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
+      MoE<T>);
+
+REGISTER_KERNEL_TYPED(float)
+REGISTER_KERNEL_TYPED(MLFloat16)
+
+using namespace ONNX_NAMESPACE;
+
+template <typename T>
+Status MoE<T>::ComputeInternal(OpKernelContext* context) const {
+  const Tensor* input = context->Input<Tensor>(0);
+  const Tensor* router_probs = context->Input<Tensor>(1);
+  const Tensor* fc1_experts_weights = context->Input<Tensor>(2);
+  const Tensor* fc2_experts_weights = context->Input<Tensor>(3);
+  const Tensor* fc1_experts_bias_optional = context->Input<Tensor>(4);
+  const Tensor* fc2_experts_bias_optional = context->Input<Tensor>(5);
+
+  const auto& input_dims = input->Shape().GetDims();
+  const auto& router_probs_dims = router_probs->Shape().GetDims();
+  const auto& fc1_experts_weights_dims = fc1_experts_weights->Shape().GetDims();
+  const auto& fc2_experts_weights_dims = fc2_experts_weights->Shape().GetDims();
+
+  const int64_t num_rows = input_dims.size() == 2 ? input_dims[0] : input_dims[0] * input_dims[1];
+  const int64_t hidden_size = input_dims[input_dims.size() - 1];
+  const int64_t num_experts = fc1_experts_weights_dims[0];
+  const int64_t inter_size = fc1_experts_weights_dims[2];
+
+  // TODO: refactor to helper function.
+  if (fc1_experts_weights_dims.size() != 3) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "fc1_experts_weights_dims must be 3D, got ",
+                           fc1_experts_weights_dims.size());
+  }
+  if (fc2_experts_weights_dims.size() != 3) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "fc2_experts_weights_dims must be 3D, got ",
+                           fc2_experts_weights_dims.size());
+  }
+  if (fc1_experts_weights_dims[1] != hidden_size) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                           "fc1_experts_weights_dims[1] must be equal to hidden_size, got ",
+                           fc1_experts_weights_dims[1], " and ", hidden_size);
+  }
+  if (fc2_experts_weights_dims[1] != inter_size) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                           "fc2_experts_weights_dims[1] must be equal to inter_size, got ", fc2_experts_weights_dims[1],
+                           " and ", inter_size);
+  }
+  if (fc1_experts_weights_dims[2] != inter_size) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                           "fc1_experts_weights_dims[2] must be equal to inter_size, got ", fc1_experts_weights_dims[2],
+                           " and ", inter_size);
+  }
+  if (fc2_experts_weights_dims[2] != hidden_size) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                           "fc2_experts_weights_dims[2] must be equal to hidden_size, got ",
+                           fc2_experts_weights_dims[2], " and ", hidden_size);
+  }
+  if (router_probs_dims.size() != 2) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "router_probs_dims must be 2D, got ",
+                           router_probs_dims.size());
+  }
+  if (router_probs_dims[0] != num_rows) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "router_probs_dims[0] must be equal to num_rows, got ",
+                           router_probs_dims[0], " and ", num_rows);
+  }
+  if (router_probs_dims[1] != num_experts) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "router_probs_dims[1] must be equal to num_experts, got ",
+                           router_probs_dims[1], " and ", num_experts);
+  }
+  if (fc1_experts_bias_optional != nullptr && fc2_experts_bias_optional == nullptr) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "fc1_experts_bias is set but fc2_experts_bias is not set");
+  }
+  if (fc1_experts_bias_optional == nullptr && fc2_experts_bias_optional != nullptr) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "fc1_experts_bias is not set but fc2_experts_bias is set");
+  }
+  if (fc1_experts_bias_optional != nullptr && fc2_experts_bias_optional != nullptr) {
+    const auto& fc1_experts_bias_dims = fc1_experts_bias_optional->Shape().GetDims();
+    const auto& fc2_experts_bias_dims = fc2_experts_bias_optional->Shape().GetDims();
+    if (fc1_experts_bias_dims.size() != 2) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "fc1_experts_bias_dims must be 2D, got ",
+                             fc1_experts_bias_dims.size());
+    }
+    if (fc2_experts_bias_dims.size() != 2) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "fc2_experts_bias_dims must be 2D, got ",
+                             fc2_experts_bias_dims.size());
+    }
+    if (fc1_experts_bias_dims[0] != num_experts) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "fc1_experts_bias_dims[0] must be equal to num_experts, got ", fc1_experts_bias_dims[0],
+                             " and ", num_experts);
+    }
+    if (fc2_experts_bias_dims[0] != num_experts) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "fc2_experts_bias_dims[0] must be equal to num_experts, got ", fc2_experts_bias_dims[0],
+                             " and ", num_experts);
+    }
+    if (fc1_experts_bias_dims[1] != inter_size) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "fc1_experts_bias_dims[1] must be equal to inter_size, got ", fc1_experts_bias_dims[1],
+                             " and ", inter_size);
+    }
+    if (fc2_experts_bias_dims[1] != hidden_size) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "fc2_experts_bias_dims[1] must be equal to hidden_size, got ", fc2_experts_bias_dims[1],
+                             " and ", hidden_size);
+    }
+  }
+
+  typedef typename ToCudaType<T>::MappedType CudaT;
+  auto stream = context->GetComputeStream();
+
+  auto& device_prop = GetDeviceProp();
+  const int sm = device_prop.major * 10 + device_prop.minor;
+
+  ort_fastertransformer::CutlassMoeFCRunner<CudaT, CudaT> moe_runner(sm);
+
+  size_t ws_size =
+      moe_runner.getWorkspaceSize(static_cast<int>(num_rows), static_cast<int>(hidden_size),
+                                  static_cast<int>(inter_size), static_cast<int>(num_experts), static_cast<int>(k_));
+  size_t fc2_output_size = k_ * num_rows * hidden_size * sizeof(CudaT);
+  size_t expert_scales_size = k_ * num_rows * sizeof(CudaT);
+  size_t expanded_source_row_to_expanded_dest_row_size = k_ * num_rows * sizeof(int);
+  size_t expert_for_source_row_size = k_ * num_rows * sizeof(int);
+
+  AllocatorPtr allocator;
+  ORT_RETURN_IF_ERROR(context->GetTempSpaceAllocator(&allocator));
+
+  // TODO: allocate one buffer and reuse it.
+  IAllocatorUniquePtr<void> work_space = IAllocator::MakeUniquePtr<void>(allocator, ws_size, false, stream);
+  IAllocatorUniquePtr<void> fc2_output = IAllocator::MakeUniquePtr<void>(allocator, fc2_output_size, false, stream);
+  IAllocatorUniquePtr<void> expert_scales =
+      IAllocator::MakeUniquePtr<void>(allocator, expert_scales_size, false, stream);
+  IAllocatorUniquePtr<void> expanded_source_row_to_expanded_dest_row =
+      IAllocator::MakeUniquePtr<void>(allocator, expanded_source_row_to_expanded_dest_row_size, false, stream);
+  IAllocatorUniquePtr<void> expert_for_source_row =
+      IAllocator::MakeUniquePtr<void>(allocator, expert_for_source_row_size, false, stream);
+
+  // fc1_scales and fc2_scales are used in quantized MoE
+  const CudaT* fc1_scales_ptr = nullptr;
+  const CudaT* fc2_scales_ptr = nullptr;
+
+  moe_runner.run_moe_fc(reinterpret_cast<const CudaT*>(input->template Data<T>()),
+                        reinterpret_cast<const CudaT*>(router_probs->template Data<T>()),
+                        reinterpret_cast<const CudaT*>(fc1_experts_weights->template Data<T>()),
+                        std::move(fc1_scales_ptr),
+                        fc1_experts_bias_optional == nullptr
+                            ? nullptr
+                            : reinterpret_cast<const CudaT*>(fc1_experts_bias_optional->template Data<T>()),
+                        activation_type_, reinterpret_cast<const CudaT*>(fc2_experts_weights->template Data<T>()),
+                        std::move(fc2_scales_ptr), static_cast<int>(num_rows), static_cast<int>(hidden_size),
+                        static_cast<int>(inter_size), static_cast<int>(num_experts), static_cast<int>(k_),
+                        reinterpret_cast<char*>(work_space.get()), reinterpret_cast<CudaT*>(fc2_output.get()),
+                        reinterpret_cast<CudaT*>(expert_scales.get()),
+                        reinterpret_cast<int*>(expanded_source_row_to_expanded_dest_row.get()),
+                        reinterpret_cast<int*>(expert_for_source_row.get()), Stream(context));
+
+  Tensor* output = context->Output(0, input->Shape());
+
+  ort_fastertransformer::finalize_moe_routing_kernelLauncher(
+      reinterpret_cast<CudaT*>(fc2_output.get()), reinterpret_cast<CudaT*>(output->template MutableData<T>()),
+      fc2_experts_bias_optional == nullptr
+          ? nullptr
+          : reinterpret_cast<const CudaT*>(fc2_experts_bias_optional->template Data<T>()),
+      reinterpret_cast<CudaT*>(expert_scales.get()),
+      reinterpret_cast<int*>(expanded_source_row_to_expanded_dest_row.get()),
+      reinterpret_cast<int*>(expert_for_source_row.get()), static_cast<int>(num_rows), static_cast<int>(hidden_size),
+      static_cast<int>(k_), Stream(context));
+
+  return Status::OK();
+}
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/moe/moe.h b/onnxruntime/contrib_ops/cuda/moe/moe.h
new file mode 100644
index 0000000000000..8035568693814
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/moe/moe.h
@@ -0,0 +1,45 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "contrib_ops/cuda/moe/ft_moe/moe_kernel.h"
+#include "core/common/common.h"
+#include "core/providers/cuda/cuda_kernel.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+using namespace onnxruntime::cuda;
+
+template <typename T>
+class MoE final : public CudaKernel {
+ public:
+  explicit MoE(const OpKernelInfo& op_kernel_info) : CudaKernel(op_kernel_info) {
+    ORT_ENFORCE(op_kernel_info.GetAttr<int64_t>("k", &k_).IsOK());
+
+    std::string activation_type_str;
+    ORT_ENFORCE(op_kernel_info.GetAttr<std::string>("activation_type", &activation_type_str).IsOK());
+    if (activation_type_str == "relu") {
+      activation_type_ = ort_fastertransformer::ActivationType::Relu;
+    } else if (activation_type_str == "gelu") {
+      activation_type_ = ort_fastertransformer::ActivationType::Gelu;
+    } else if (activation_type_str == "silu") {
+      activation_type_ = ort_fastertransformer::ActivationType::Silu;
+    } else if (activation_type_str == "identity") {
+      activation_type_ = ort_fastertransformer::ActivationType::Identity;
+    } else {
+      ORT_THROW("Unsupported MoE activation type: ", activation_type_str);
+    }
+  }
+  Status ComputeInternal(OpKernelContext* ctx) const override;
+
+ private:
+  int64_t k_;
+  ort_fastertransformer::ActivationType activation_type_;
+};
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/quantization/dequantize_blockwise.cu b/onnxruntime/contrib_ops/cuda/quantization/dequantize_blockwise.cu
new file mode 100644
index 0000000000000..7921315ab52e1
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/quantization/dequantize_blockwise.cu
@@ -0,0 +1,375 @@
+// Modifications: scaling is moved from masked softmax to the gemm before that.
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include <cub/cub.cuh>
+#include <cublas_v2.h>
+#include <cuda_fp16.h>
+#include <cmath>
+#include <math_constants.h>
+#include "core/providers/cuda/cu_inc/common.cuh"
+#include "core/providers/cuda/cuda_common.h"
+#include "dequantize_blockwise.cuh"
+
+using namespace onnxruntime::cuda;
+using namespace cub;
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+
+__device__ __forceinline__ void DequantizeEightElements(uint32_t values_quant, half scale, half zp, half* output) {
+  half2 scale_half2 = {scale, scale};
+  half zp_adjust = -scale * __short2half_rn(zp);
+  half2 zp_adjust2 = {zp_adjust, zp_adjust};
+
+  alignas(16) half2 results[4];
+  half v0 = __uint2half_rn(values_quant & 0xF);
+  half v1 = __uint2half_rn((values_quant >> 4) & 0xF);
+  results[0] = __halves2half2(v0, v1) * scale_half2 + zp_adjust2;
+
+  half v2 = __uint2half_rn((values_quant >> 8) & 0xF);
+  half v3 = __uint2half_rn((values_quant >> 12) & 0xF);
+  results[1] = __halves2half2(v2, v3) * scale_half2 + zp_adjust2;
+
+  half v4 = __uint2half_rn((values_quant >> 16) & 0xF);
+  half v5 = __uint2half_rn((values_quant >> 20) & 0xF);
+  results[2] = __halves2half2(v4, v5) * scale_half2 + zp_adjust2;
+
+  half v6 = __uint2half_rn((values_quant >> 24) & 0xF);
+  half v7 = __uint2half_rn((values_quant >> 28) & 0xF);
+  results[3] = __halves2half2(v6, v7) * scale_half2 + zp_adjust2;
+  *(reinterpret_cast<float4*>(output)) = *(reinterpret_cast<float4*>(results));
+}
+
+__device__ __forceinline__ void DequantizeEightElements(uint32_t values_quant, float scale, float zp, float* output) {
+  float zp_adjust = -scale * zp;
+  output[0] = float(values_quant & 0xF) * scale + zp_adjust;
+  output[1] = float((values_quant >> 4) & 0xF) * scale + zp_adjust;
+  output[2] = float((values_quant >> 8) & 0xF) * scale + zp_adjust;
+  output[3] = float((values_quant >> 12) & 0xF) * scale + zp_adjust;
+  output[4] = float((values_quant >> 16) & 0xF) * scale + zp_adjust;
+  output[5] = float((values_quant >> 20) & 0xF) * scale + zp_adjust;
+  output[6] = float((values_quant >> 24) & 0xF) * scale + zp_adjust;
+  output[7] = float((values_quant >> 28) & 0xF) * scale + zp_adjust;
+}
+
+template <class T>
+__global__ void Dequantize4BitsKernel(
+    T* output,
+    const uint8_t* quant_data,
+    const T* scale_data,
+    const uint8_t* zero_points,
+    int block_size,
+    int blocks_per_K,
+    int blocks_per_threadblock,
+    int shift) {
+  int block_id = blockIdx.x * blocks_per_threadblock + ((threadIdx.x * 8) >> shift);
+  int n_idx = block_id / blocks_per_K;
+  int kb_idx = block_id % blocks_per_K;
+  int element_offset = block_id * block_size + ((threadIdx.x * 8) & ((1 << shift) - 1));
+  uint32_t quant_value = *(reinterpret_cast<const uint32_t*>(quant_data + element_offset / 2));
+  T scale = *(scale_data + block_id);
+  uint8_t zp = 8;
+  if (zero_points) {
+    zp = zero_points[n_idx * ((blocks_per_K + 1)/2) + kb_idx / 2];
+    zp = (kb_idx & 0x01) ? (zp >> 4) : (zp & 0x0f);
+  }
+
+  output = output + element_offset;
+  DequantizeEightElements(quant_value, scale, static_cast<T>(zp), output);
+}
+
+template <class T>
+Status Dequantize4Bits(
+    T* output,
+    const uint8_t* quant_data,
+    const T* scales_data,
+    const uint8_t* zero_points,  // shape: [N, (block_per_K + 1)/2]
+    int k,
+    int n,
+    int block_size,
+    cudaStream_t stream) {
+  // k is padded and equal to block_per_K * block_size
+  ORT_ENFORCE(k % block_size == 0, "k must be a multiplier of block_size");
+  constexpr int element_per_thread = 8;
+  int blocks_per_threadblock = GridDim::maxThreadsPerBlock * element_per_thread / block_size;
+  int blocks_per_K = k / block_size;
+  int blocks_per_grid = static_cast<int>(CeilDiv(n * blocks_per_K, blocks_per_threadblock));
+  int shift = static_cast<int>(log2f(float(block_size)));
+
+  Dequantize4BitsKernel<<<blocks_per_grid, GridDim::maxThreadsPerBlock, 0, stream>>>(
+      output,
+      quant_data,
+      scales_data,
+      zero_points,
+      block_size,
+      blocks_per_K,
+      blocks_per_threadblock,
+      shift);
+
+  return Status::OK();
+}
+
+template Status Dequantize4Bits<float>(
+    float* output,
+    const uint8_t* quant_data,
+    const float* scales_data,
+    const uint8_t* zero_points,
+    int k,
+    int n,
+    int block_size,
+    cudaStream_t stream);
+
+template Status Dequantize4Bits<half>(
+    half* output,
+    const uint8_t* quant_data,
+    const half* scales_data,
+    const uint8_t* zero_points,
+    int k,
+    int n,
+    int block_size,
+    cudaStream_t stream);
+
+
+///////////////////////////////////////////////////////////////////////////////
+// A more general block-wise dequantization implementation that supports
+// different block sizes and block orientations (row-wise/column-wise).
+
+template <
+  int Row_,    ///< rows of a matrix
+  int Column_  ///< columns of a matrix
+  >
+struct Shape2D {
+  static int const kRow = Row_;              ///< rows of a matrix
+  static int const kColumn = Column_;        ///< columns of a matrix
+  static int const kCount = Row_ * Column_;  ///< total number of elements in a matrix
+};
+
+/**
+ * @brief Blockwise quantization constants
+ * @tparam ElementT       source data type, e.g. fp32/fp16
+ * @tparam block_size     number of elemenets quantized together
+ * @tparam qbits          number of bits in each quantized element
+ * @tparam Columnwise     true:  elements in a block come from one single column
+ *                        false: elements in a block come from one single row
+ */
+template <
+  typename ElementT,
+  int32_t block_size,
+  int32_t qbits,
+  bool Columnwise>
+struct BlkQuantTraits {
+  // number of qbit elements to pack into whole bytes
+  static constexpr int kPackSize = (qbits == 8) ? 1 : (qbits == 4) ? 2 : (qbits == 2) ? 4 : 0;
+  static_assert(kPackSize != 0, "Packing to whole bytes not supported for this qbits!");
+
+  using QuantBlk = std::conditional_t<Columnwise, Shape2D<block_size, 1>, Shape2D<1, block_size>>;
+  using ThreadBlk = Shape2D<QuantBlk::kRow * kPackSize, QuantBlk::kColumn>;
+};
+
+template <
+  typename ElementT,
+  int32_t block_size,
+  int32_t qbits,
+  bool Columnwise>
+__global__
+void dequantizeThread(ElementT* dst,
+                      const uint8_t* weights,
+                      const ElementT* scales,
+                      const uint8_t* zero_points,
+                      int rows,
+                      int columns,
+                      int thrd_row_blks) {
+  using QuantBlk = typename BlkQuantTraits<ElementT, block_size, qbits, Columnwise>::QuantBlk;
+  using ThreadBlk = typename BlkQuantTraits<ElementT, block_size, qbits, Columnwise>::ThreadBlk;
+
+  // !! 4b specific code
+  static_assert(qbits == 4, "Only 4b block quantization is supported!");
+
+  const auto block_idx = blockIdx.x * blockDim.x + threadIdx.x;
+  const auto row_blks = (rows + QuantBlk::kRow - 1) / QuantBlk::kRow;
+
+  const auto meta_rows = (rows + QuantBlk::kRow - 1) / QuantBlk::kRow;
+
+  // quantized matrix is stored in column major, packed by column
+  const auto q_rows = (meta_rows * QuantBlk::kRow * qbits + 7) / 8;
+
+  int32_t r_blk_idx = static_cast<int32_t>(block_idx % thrd_row_blks);
+  int32_t c_blk_idx = static_cast<int32_t>(block_idx / thrd_row_blks);
+
+  int32_t r = r_blk_idx * ThreadBlk::kRow;
+  int32_t c = c_blk_idx * ThreadBlk::kColumn;
+
+  int32_t r_end = std::min(r + ThreadBlk::kRow, rows);
+  int32_t c_end = std::min(c + ThreadBlk::kColumn, columns);
+
+  // for 4b quant, kPackSize = 2, so we have 2 scales and 2 offsets
+  const ElementT scale_buf[2] = {
+      scales[(c / QuantBlk::kColumn) * row_blks + r / QuantBlk::kRow],
+      ((r/QuantBlk::kRow) < (meta_rows - 1))
+          ? scales[(c / QuantBlk::kColumn) * row_blks + r / QuantBlk::kRow + 1]
+          : static_cast<ElementT>(0.0f)};
+  const uint8_t zp_pair = (zero_points == nullptr)
+        ? 0x88
+        : zero_points[(c / QuantBlk::kColumn) * ((row_blks + 1) / 2) + (r / QuantBlk::kRow) / 2];
+  const uint16_t zp_buf[2] = {(uint16_t)(zp_pair & 0x0f), (uint16_t)((zp_pair >> 4) & 0x0f)};
+  const ElementT adjust_buf[2] = {(-scale_buf[0]) * static_cast<ElementT>(zp_buf[0]),
+                                  (-scale_buf[1]) * static_cast<ElementT>(zp_buf[1])};
+
+  for (int32_t j = c; j < c_end; ++j) {
+    const uint8_t* q_ptr = weights + j * q_rows;
+    for (int32_t i = r; i < (r_end - 1); i += 2) {
+      const auto scale0 = scale_buf[(i - r) / QuantBlk::kRow];
+      const auto adjust0 = adjust_buf[(i - r) / QuantBlk::kRow];
+
+      const auto scale1 = scale_buf[(i + 1 - r) / QuantBlk::kRow];;
+      const auto adjust1 = adjust_buf[(i + 1 - r) / QuantBlk::kRow];
+
+      const auto vi = q_ptr[i / 2];
+
+      if constexpr (std::is_same<ElementT, half>::value) {
+        half2 scale_half2 = {scale0, scale1};
+        half2 zp_adjust2 = {adjust0, adjust1};
+
+        half2 v = {__ushort2half_rn(vi & 0xF), __ushort2half_rn((vi >> 4) & 0xF)};
+        half2 results = v * scale_half2 + zp_adjust2;
+
+        dst[j * rows + i] = results.x;
+        dst[j * rows + (i + 1)] = results.y;
+      } else {
+        static_assert(std::is_same<ElementT, float>::value, "Only float and half are supported!");
+        const uint8_t vi0 = vi & 0xf;
+        const uint8_t vi1 = vi >> 4;
+        dst[j * rows + i] = static_cast<float>(vi0) * scale0 + adjust0;;
+        dst[j * rows + (i + 1)] = static_cast<float>(vi1) * scale1 + adjust1;
+      }
+    }
+
+    if ((r_end & 1) && (r_end > r)) {
+      const auto scale0 = scale_buf[(r_end - 1 - r) / QuantBlk::kRow];
+      const auto adjust0 = adjust_buf[(r_end - 1 - r) / QuantBlk::kRow];
+
+      const auto vi = q_ptr[(r_end - 1) / 2];
+      const uint8_t vi0 = vi & 0xf;
+
+      dst[j * rows + (r_end - 1)] = static_cast<ElementT>(vi0) * scale0 + adjust0;
+    }
+  }
+}
+
+template <
+  typename ElementT,
+  int32_t block_size,
+  int32_t qbits,
+  bool Columnwise>
+static void dequantize(ElementT* dst, const uint8_t* weights, const ElementT* scales,
+                        const uint8_t* zero_points, int32_t rows, int32_t columns,
+                        cudaStream_t stream) {
+  using QuantBlk = typename BlkQuantTraits<ElementT, block_size, qbits, Columnwise>::QuantBlk;
+  using ThreadBlk = typename BlkQuantTraits<ElementT, block_size, qbits, Columnwise>::ThreadBlk;
+
+  // Thread partitioning
+  const auto thrd_row_blks = (rows + ThreadBlk::kRow - 1) / ThreadBlk::kRow;
+  const auto thrd_col_blks = (columns + ThreadBlk::kColumn - 1) / ThreadBlk::kColumn;
+  const auto total_thrd_blks = thrd_row_blks * thrd_col_blks;
+
+  const auto grids = (total_thrd_blks + GridDim::maxThreadsPerBlock - 1) / GridDim::maxThreadsPerBlock;
+  dequantizeThread<ElementT, block_size, qbits, Columnwise><<<grids, GridDim::maxThreadsPerBlock, 0, stream>>>(
+      dst,
+      weights,
+      scales,
+      zero_points,
+      rows,
+      columns,
+      thrd_row_blks);
+}
+
+
+template <typename T>
+Status
+DequantizeBlockwise4b(
+    T* dst,
+    const uint8_t* src,
+    const T* scales,
+    const uint8_t* zero_points,
+    int block_size,
+    bool columnwise,
+    int rows,
+    int columns,
+    cudaStream_t stream) {
+  switch (block_size) {
+    case 16:
+      if (columnwise) {
+        dequantize<T, 16, 4, true>(dst, src, scales, zero_points, rows, columns, stream);
+      } else {
+        dequantize<T, 16, 4, false>(dst, src, scales, zero_points, rows, columns, stream);
+      }
+      return Status::OK();
+    case 32:
+      if (columnwise) {
+        dequantize<T, 32, 4, true>(dst, src, scales, zero_points, rows, columns, stream);
+      } else {
+        dequantize<T, 32, 4, false>(dst, src, scales, zero_points, rows, columns, stream);
+      }
+      return Status::OK();
+    case 64:
+      if (columnwise) {
+        dequantize<T, 64, 4, true>(dst, src, scales, zero_points, rows, columns, stream);
+      } else {
+        dequantize<T, 64, 4, false>(dst, src, scales, zero_points, rows, columns, stream);
+      }
+      return Status::OK();
+    case 128:
+      if (columnwise) {
+        dequantize<T, 128, 4, true>(dst, src, scales, zero_points, rows,
+                                                        columns, stream);
+      } else {
+        dequantize<T, 128, 4, false>(dst, src, scales, zero_points,
+                                                            rows, columns, stream);
+      }
+      return Status::OK();
+    case 256:
+      if (columnwise) {
+        dequantize<T, 256, 4, true>(dst, src, scales, zero_points, rows,
+                                                        columns, stream);
+      } else {
+        dequantize<T, 256, 4, false>(dst, src, scales, zero_points,
+                                                            rows, columns, stream);
+      }
+      return Status::OK();
+    default:
+      // Only block size 16, 32, 64, 128, 256 are supported.
+      return Status(::onnxruntime::common::ONNXRUNTIME, ::onnxruntime::common::FAIL,
+                    "Unsupported block size for blockwise quantization.");
+  }
+}
+
+template
+Status DequantizeBlockwise4b<float>(
+    float* dst,
+    const uint8_t* src,
+    const float* scales,
+    const uint8_t* zero_points,
+    int block_size,
+    bool columnwise,
+    int rows,
+    int columns,
+    cudaStream_t stream);
+
+template
+Status DequantizeBlockwise4b<half>(
+    half* dst,
+    const uint8_t* src,
+    const half* scales,
+    const uint8_t* zero_points,
+    int block_size,
+    bool columnwise,
+    int rows,
+    int columns,
+    cudaStream_t stream);
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/quantization/dequantize_blockwise.cuh b/onnxruntime/contrib_ops/cuda/quantization/dequantize_blockwise.cuh
new file mode 100644
index 0000000000000..f9c09c55fd893
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/quantization/dequantize_blockwise.cuh
@@ -0,0 +1,50 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+#include "core/providers/cuda/shared_inc/cuda_utils.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+template <class T>
+Status Dequantize4Bits(
+    T* output,
+    const uint8_t* quant_data,
+    const T* scales_data,
+    const uint8_t* zero_points,
+    int k,
+    int n,
+    int block_size,
+    cudaStream_t stream);
+
+
+/**
+ * @brief Dequantize a block-wise quantized matrix, and store the result in a
+ *        column major matrix for use in subsequent GEMM. This implementation supports
+ *        columnwise and rowwise block orientation.
+ * @param[out] dst           pointer to the dequantized matrix, column major: [columns, rows]
+ * @param[in]  qelements     pointer to the quantized elements, column major: [columns, rows]
+ * @param[in]  scales        pointer to the scales of quantized blocks, column major layout
+ * @param[in]  zero_points   pointer to the zero points of quantized blocks, packed column major
+ *                           scales
+ * @param[in]  block_size    size of the quantized block
+ * @param[in]  columnwise    whether the quantized matrix is columnwise or rowwise quantized
+ * @param[in]  rows
+ * @param[in]  columns
+ */
+template <typename T>
+Status DequantizeBlockwise4b(
+    T* dst,
+    const uint8_t* qelements,
+    const T* scales,
+    const uint8_t* zero_points,
+    int block_size,
+    bool columnwise,
+    int rows,
+    int columns,
+    cudaStream_t stream);
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/quantization/dequantize_blockwise_bnb4.cu b/onnxruntime/contrib_ops/cuda/quantization/dequantize_blockwise_bnb4.cu
new file mode 100644
index 0000000000000..2f74dd41f0759
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/quantization/dequantize_blockwise_bnb4.cu
@@ -0,0 +1,161 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include <cub/cub.cuh>
+#include <cuda_fp16.h>
+#include "core/providers/cuda/cuda_common.h"
+#include "contrib_ops/cpu/quantization/blockwise_quant_block_bnb4.h"
+#include "dequantize_blockwise_bnb4.cuh"
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+template <class T>
+Status SetBnbQuantMap(int quant_type, T* quant_map_buffer, cudaStream_t stream) {
+  ORT_ENFORCE(
+      quant_type == FP4 || quant_type == NF4,
+      "Invalid quant_type, only 0 (FP4) and 1 (NF4) are supported.");
+
+  T host_quant_map[16];
+  switch (quant_type) {
+    case FP4:
+      for (int i = 0; i < 16; i++) host_quant_map[i] = static_cast<T>(fp4_qaunt_map[i]);
+      break;
+    case NF4:
+      for (int i = 0; i < 16; i++) host_quant_map[i] = static_cast<T>(nf4_qaunt_map[i]);
+      break;
+  }
+  CUDA_CALL_THROW(cudaMemcpyAsync(quant_map_buffer, host_quant_map, sizeof(T) * 16, cudaMemcpyHostToDevice, stream));
+
+  return Status::OK();
+}
+
+template Status SetBnbQuantMap<float>(int quant_type, float* quant_map_buffer, cudaStream_t stream);
+
+template Status SetBnbQuantMap<half>(int quant_type, half* quant_map_buffer, cudaStream_t stream);
+
+template Status SetBnbQuantMap<BFloat16>(int quant_type, BFloat16* quant_map_buffer, cudaStream_t stream);
+
+template <class T, int TILE_SIZE, int THREADS, int NUM_PER_TH>
+__global__ void kDequantizeBlockwise(
+    const T* quant_map,
+    T* output,
+    const uint8_t* quant_data,
+    const T* absmax,
+    const int block_size,
+    const int n) {
+  const int n_load = (gridDim.x * TILE_SIZE);
+  int valid_items_load = 0;
+  int valid_items_store = 0;
+  const int base_idx = (blockIdx.x * TILE_SIZE);
+
+  T vals[NUM_PER_TH * 2];
+  uint8_t qvals[NUM_PER_TH];
+  T local_abs_max = T(0.0f);
+
+  typedef cub::BlockLoad<uint8_t, THREADS, NUM_PER_TH, cub::BLOCK_LOAD_WARP_TRANSPOSE> LoadChar;
+  typedef cub::BlockStore<T, THREADS, NUM_PER_TH * 2, cub::BLOCK_STORE_WARP_TRANSPOSE> StoreT;
+
+  __shared__ typename LoadChar::TempStorage loadchar;
+  __shared__ typename StoreT::TempStorage storet;
+
+  for (unsigned int i = base_idx; i < n_load; i += gridDim.x * TILE_SIZE) {
+    valid_items_load = (n + 1) / 2 - i > TILE_SIZE ? TILE_SIZE : (n + 1) / 2 - i;
+    valid_items_store = n - i * 2 > TILE_SIZE * 2 ? TILE_SIZE * 2 : n - i * 2;
+
+    local_abs_max = absmax[(i + threadIdx.x * NUM_PER_TH) / (block_size)];
+
+    __syncthreads();
+    LoadChar(loadchar).Load(&(quant_data[i]), qvals, valid_items_load, 128);
+
+    #pragma unroll NUM_PER_TH
+    for (int j = 0; j < NUM_PER_TH; j++) {
+      vals[j * 2] = ScalarMul(quant_map[qvals[j] >> 4], local_abs_max);
+      vals[j * 2 + 1] = ScalarMul(quant_map[qvals[j] & 0x0F], local_abs_max);
+    }
+
+    __syncthreads();
+    StoreT(storet).Store(&(output[i * 2]), vals, valid_items_store);
+  }
+}
+
+template <class T>
+void CallkDequantizeBlockwise(
+    const T* quant_map,
+    T* output,
+    const uint8_t* quant_data,
+    const T* absmax,
+    int block_size,
+    int numel,
+    cudaStream_t stream) {
+  int tile_size = 1024;
+  kDequantizeBlockwise<T, 512, 64, 8><<<(numel + tile_size - 1) / tile_size, 64, 0, stream>>>(
+      quant_map,
+      output,
+      quant_data,
+      absmax,
+      block_size / 2,
+      numel);
+}
+
+template <class T>
+Status DequantizeBnb4(
+    const T* quant_map,
+    T* output,
+    const uint8_t* quant_data,
+    const T* absmax,
+    int block_size,
+    int numel,
+    cudaStream_t stream) {
+  CallkDequantizeBlockwise<T>(quant_map, output, quant_data, absmax, block_size, numel, stream);
+
+  return Status::OK();
+}
+
+template Status DequantizeBnb4<float>(
+    const float* quant_map,
+    float* output,
+    const uint8_t* quant_data,
+    const float* absmax,
+    int block_size,
+    int numel,
+    cudaStream_t stream);
+
+template Status DequantizeBnb4<half>(
+    const half* quant_map,
+    half* output,
+    const uint8_t* quant_data,
+    const half* absmax,
+    int block_size,
+    int numel,
+    cudaStream_t stream);
+
+template <>
+Status DequantizeBnb4<BFloat16>(
+    const BFloat16* quant_map,
+    BFloat16* output,
+    const uint8_t* quant_data,
+    const BFloat16* absmax,
+    int block_size,
+    int numel,
+    cudaStream_t stream) {
+  #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 800
+    CallkDequantizeBlockwise<nv_bfloat16>(
+        reinterpret_cast<const nv_bfloat16*>(quant_map),
+        reinterpret_cast<nv_bfloat16*>(output),
+        quant_data,
+        reinterpret_cast<const nv_bfloat16*>(absmax),
+        block_size,
+        numel,
+        stream);
+  #else
+    CallkDequantizeBlockwise<BFloat16>(quant_map, output, quant_data, absmax, block_size, numel, stream);
+  #endif
+
+  return Status::OK();
+}
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/quantization/dequantize_blockwise_bnb4.cuh b/onnxruntime/contrib_ops/cuda/quantization/dequantize_blockwise_bnb4.cuh
new file mode 100644
index 0000000000000..a0d38c9853cd6
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/quantization/dequantize_blockwise_bnb4.cuh
@@ -0,0 +1,58 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+#include "core/providers/cuda/shared_inc/cuda_utils.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+template <class T>
+Status SetBnbQuantMap(int quant_type, T* quant_map_buffer, cudaStream_t stream);
+
+// templated scalar multiply function
+template <class T>
+__device__ inline T ScalarMul(T a, T b);
+
+template <>
+__device__ inline float ScalarMul(float a, float b) {
+  return a * b;
+}
+
+template <>
+__device__ inline half ScalarMul(half a, half b) {
+  #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 530
+    return a * b;
+  #else
+    // half multiplication not supported
+    return static_cast<half>(static_cast<float>(a) * static_cast<float>(b));
+  #endif
+}
+
+template <>
+__device__ inline BFloat16 ScalarMul(BFloat16 a, BFloat16 b) {
+  return a * b;
+}
+
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800
+// will use the native bfloat16 multiply instruction on sm_80+
+template <>
+__device__ inline nv_bfloat16 ScalarMul(nv_bfloat16 a, nv_bfloat16 b) {
+  return a * b;
+}
+#endif
+
+template <class T>
+Status DequantizeBnb4(
+    const T* quant_map,
+    T* output,
+    const uint8_t* quant_data,
+    const T* absmax,
+    int block_size,
+    int numel,
+    cudaStream_t stream);
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/quantization/matmul_bnb4.cc b/onnxruntime/contrib_ops/cuda/quantization/matmul_bnb4.cc
new file mode 100644
index 0000000000000..bbcb7de99781f
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/quantization/matmul_bnb4.cc
@@ -0,0 +1,161 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/common/safeint.h"
+#include "core/providers/cuda/cuda_kernel.h"
+#include "core/providers/cuda/shared_inc/fpgeneric.h"
+#include "core/providers/cpu/math/matmul_helper.h"
+#include "contrib_ops/cpu/quantization/blockwise_quant_block_bnb4.h"
+#include "matmul_bnb4.cuh"
+#include "dequantize_blockwise_bnb4.cuh"
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+using namespace onnxruntime::cuda;
+
+template <typename T>
+class MatMulBnb4 final : public CudaKernel {
+ public:
+  MatMulBnb4(const OpKernelInfo& info) : CudaKernel(info) {
+    ORT_ENFORCE(Status::OK() == info.GetAttr<int64_t>("K", &K_));
+    ORT_ENFORCE(Status::OK() == info.GetAttr<int64_t>("N", &N_));
+    ORT_ENFORCE(Status::OK() == info.GetAttr<int64_t>("block_size", &block_size_));
+    ORT_ENFORCE(Status::OK() == info.GetAttr<int64_t>("quant_type", &quant_type_));
+    ORT_ENFORCE(
+        quant_type_ == FP4 || quant_type_ == NF4,
+        "Invalid quant_type, only 0 (FP4) and 1 (NF4) are supported.");
+
+    is_training_mode_ = static_cast<bool>(info.GetAttrOrDefault("training_mode", static_cast<int64_t>(0)));
+    transB_ = static_cast<bool>(info.GetAttrOrDefault("transB", static_cast<int64_t>(1)));
+  }
+
+  Status ComputeInternal(OpKernelContext* context) const override;
+
+ private:
+  int64_t K_;
+  int64_t N_;
+  int64_t block_size_;
+  int64_t quant_type_;
+  bool is_training_mode_;
+  bool transB_;
+};
+
+template <typename T>
+Status MatMulBnb4<T>::ComputeInternal(OpKernelContext* ctx) const {
+  const Tensor* a = ctx->Input<Tensor>(0);
+  const Tensor* b_quant = ctx->Input<Tensor>(1);
+  const Tensor* absmax = ctx->Input<Tensor>(2);
+
+  const auto* a_data = a->Data<T>();
+  const uint8_t* b_quant_data = b_quant->Data<uint8_t>();
+  const auto* absmax_data = absmax->Data<T>();
+
+  typedef typename ToCudaType<T>::MappedType CudaT;
+
+  // TODO: find a better way to create the quant_map without using a buffer
+  // don't want to use malloc directly so asking from the caller
+  // can create a __device__ static array for float but doesn't work for half
+  IAllocatorUniquePtr<T> quant_map_buffer = GetScratchBuffer<T>(16, ctx->GetComputeStream());
+  auto* quant_map_buffer_data = quant_map_buffer.get();
+  ORT_RETURN_IF_ERROR(SetBnbQuantMap<CudaT>(
+      SafeInt<int>(quant_type_),
+      reinterpret_cast<CudaT*>(quant_map_buffer_data),
+      static_cast<cudaStream_t>(ctx->GetComputeStream()->GetHandle())));
+
+  constexpr bool transa = false;
+  const bool transb = transB_;
+  MatMulComputeHelper helper;
+  TensorShape b_shape({N_, K_});
+  ORT_RETURN_IF_ERROR(
+      helper.Compute(a->Shape(), b_shape, transa, transb));
+
+  Tensor* Y = ctx->Output(0, helper.OutputShape());
+  // Bail out early if the output is going to be empty
+  if (Y->Shape().Size() == 0) return Status::OK();
+
+  bool is_4bit_done = !is_training_mode_  // skip inference specific handle if in training mode
+                      && TryMatMulBnb4(
+                             reinterpret_cast<const CudaT*>(quant_map_buffer_data),
+                             reinterpret_cast<CudaT*>(Y->MutableData<T>()),
+                             reinterpret_cast<const CudaT*>(a_data),
+                             b_quant_data,
+                             reinterpret_cast<const CudaT*>(absmax_data),
+                             SafeInt<int>(helper.M()),
+                             SafeInt<int>(helper.N()),
+                             SafeInt<int>(helper.K()),
+                             SafeInt<int>(block_size_),
+                             static_cast<cudaStream_t>(ctx->GetComputeStream()->GetHandle()));
+
+  if (!is_4bit_done) {
+    IAllocatorUniquePtr<T> b_dequant_ptr = GetScratchBuffer<T>(N_ * K_, ctx->GetComputeStream());
+    auto* b_dequant_data = b_dequant_ptr.get();
+    ORT_RETURN_IF_ERROR(DequantizeBnb4<CudaT>(
+        reinterpret_cast<const CudaT*>(quant_map_buffer_data),
+        reinterpret_cast<CudaT*>(b_dequant_data),
+        b_quant_data,
+        reinterpret_cast<const CudaT*>(absmax_data),
+        SafeInt<int>(block_size_),
+        SafeInt<int>(N_ * K_),
+        static_cast<cudaStream_t>(ctx->GetComputeStream()->GetHandle())));
+
+    const CudaT alpha = ToCudaType<T>::FromFloat(1.f);
+    const CudaT zero = ToCudaType<T>::FromFloat(0.f);
+
+    CUBLAS_RETURN_IF_ERROR(cublasGemmHelper(
+        GetCublasHandle(ctx),
+        transb ? CUBLAS_OP_T : CUBLAS_OP_N,  // transB
+        CUBLAS_OP_N,                         // transA
+        SafeInt<int>(helper.N()),
+        SafeInt<int>(helper.M()),
+        SafeInt<int>(helper.K()),
+        &alpha,
+        reinterpret_cast<const CudaT*>(b_dequant_data),
+        helper.Ldb(transb),  // ldb
+        reinterpret_cast<const CudaT*>(a_data),
+        helper.Lda(transa),  // lda
+        &zero,
+        reinterpret_cast<CudaT*>(Y->MutableData<T>()),
+        helper.Ldc(),
+        GetDeviceProp()));
+  }
+
+  return Status::OK();
+}
+
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    MatMulBnb4,
+    kMSDomain,
+    1,
+    float,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .TypeConstraint("T1", DataTypeImpl::GetTensorType<float>())
+        .TypeConstraint("T2", DataTypeImpl::GetTensorType<uint8_t>()),
+    MatMulBnb4<float>);
+
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    MatMulBnb4,
+    kMSDomain,
+    1,
+    MLFloat16,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .TypeConstraint("T1", DataTypeImpl::GetTensorType<MLFloat16>())
+        .TypeConstraint("T2", DataTypeImpl::GetTensorType<uint8_t>()),
+    MatMulBnb4<MLFloat16>);
+
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    MatMulBnb4,
+    kMSDomain,
+    1,
+    BFloat16,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .TypeConstraint("T1", DataTypeImpl::GetTensorType<BFloat16>())
+        .TypeConstraint("T2", DataTypeImpl::GetTensorType<uint8_t>()),
+    MatMulBnb4<BFloat16>);
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/quantization/matmul_bnb4.cu b/onnxruntime/contrib_ops/cuda/quantization/matmul_bnb4.cu
new file mode 100644
index 0000000000000..098e3618beddd
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/quantization/matmul_bnb4.cu
@@ -0,0 +1,268 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include <type_traits>
+
+#include <cub/cub.cuh>
+#include <cublas_v2.h>
+#include <cuda_fp16.h>
+#include "contrib_ops/cuda/quantization/dequantize_blockwise_bnb4.cuh"
+#include "matmul_bnb4.cuh"
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+template <class T>
+__device__ inline float ScalarMulFloatOut(T a, T b);
+
+template <>
+__device__ inline float ScalarMulFloatOut(float a, float b) {
+  return a * b;
+}
+
+template <>
+__device__ inline float ScalarMulFloatOut(half a, half b) {
+  #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 530
+    return static_cast<float>(a * b);
+  #else
+    // half multiplication not supported
+    return static_cast<float>(a) * static_cast<float>(b);
+  #endif
+}
+
+template <>
+__device__ inline float ScalarMulFloatOut(BFloat16 a, BFloat16 b) {
+  return a * b;
+}
+
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800
+// will use the native bfloat16 multiply instruction on sm_80+
+template <>
+__device__ inline float ScalarMulFloatOut(nv_bfloat16 a, nv_bfloat16 b) {
+  return static_cast<float>(a * b);
+}
+#endif
+
+#define num_values_4bit 32
+template <class T, int THREADS, int BITS>
+__global__ void kgemm_4bit_inference_naive(
+    int M,
+    int N,
+    int K,
+    const T* __restrict__ A,
+    const uint8_t* B,
+    const T* absmax,
+    const T* datatype,
+    T* out,
+    int lda,
+    int ldb,
+    int ldc,
+    int block_size) {
+  // per threadblock:
+  // load step-by-step in chunks of [32,warps]: 1x32 * [32,warps] -> [1,warps]
+  // 4 warps -> 4 loads per iter
+  // 1x32 * 32x4 -> 1x4 outputs per thread block
+  typedef cub::WarpReduce<float> WarpReduce;
+  __shared__ typename WarpReduce::TempStorage temp_storage[THREADS / 32];
+
+  const int warp_idx = threadIdx.x / 32;
+  const int warp_lane = threadIdx.x % 32;
+  const int row_B = (THREADS / 32) * blockIdx.x + warp_idx;
+  const int num_values_8bit = num_values_4bit / 2;
+  float local_C = 0.0f;
+
+  uint8_t local_B_4bit[num_values_8bit];
+  T local_B[num_values_4bit / 4];
+  T local_A[num_values_4bit / 4];
+  __shared__ T quant_map[16];
+  T local_absmax = T(0.0f);
+
+  for (int i = threadIdx.x; i < 16; i++) quant_map[i] = T(datatype[i]);
+  __syncthreads();
+
+  // A: [1, K]
+  // B: [N, K]
+  for (int inner_idx = warp_lane * num_values_4bit; inner_idx < K; inner_idx += 32 * num_values_4bit) {
+    int inner_idx_halved = inner_idx / 2;
+    int offset_B = ldb * row_B;
+    int absidx = ((2 * offset_B) + inner_idx) / block_size;
+    local_absmax = absmax[absidx];
+
+    if (row_B < N) {
+      if ((inner_idx_halved + num_values_8bit) < (K / 2)) {
+        // this is the most important for performance considerations
+        reinterpret_cast<int4(&)[num_values_8bit]>(local_B_4bit)[0] =
+            reinterpret_cast<const int4*>(B)[(offset_B + (inner_idx_halved)) / (num_values_8bit)];
+      } else {
+        #pragma unroll
+        for (int j = 0; j < (num_values_8bit); j++)
+          if ((inner_idx_halved) + j < (K / 2))
+            local_B_4bit[j] = B[offset_B + inner_idx_halved + j];
+          else
+            local_B_4bit[j] = 0b01110111;
+      }
+    } else {
+      #pragma unroll
+      for (int j = 0; j < (num_values_8bit); j++) local_B_4bit[j] = 0b01110111;
+    }
+
+    for (int i = 0; i < 4; i++) {
+      #pragma unroll
+      for (int k = 0; k < num_values_8bit / 4; k++) {
+        local_B[k * 2] = ScalarMul(quant_map[local_B_4bit[(i * num_values_8bit / 4) + k] >> 4], local_absmax);
+        local_B[k * 2 + 1] = ScalarMul(quant_map[local_B_4bit[(i * num_values_8bit / 4) + k] & 0x0F], local_absmax);
+      }
+
+      if (inner_idx + (num_values_4bit / 4) + (i * num_values_4bit / 4) < K) {
+        // this is also relatively important for performance
+        if (BITS == 16) {
+          reinterpret_cast<int4(&)[num_values_4bit]>(local_A)[0] =
+              reinterpret_cast<const int4*>(A)[inner_idx / (num_values_4bit / 4) + i];
+        } else {
+          reinterpret_cast<int4(&)[num_values_4bit]>(local_A)[0] =
+              reinterpret_cast<const int4*>(A)[inner_idx / (num_values_4bit / 8) + (2 * i) + 0];
+          reinterpret_cast<int4(&)[num_values_4bit]>(local_A)[1] =
+              reinterpret_cast<const int4*>(A)[inner_idx / (num_values_4bit / 8) + (2 * i) + 1];
+        }
+      } else {
+        #pragma unroll
+        for (int k = 0; k < num_values_4bit / 4; k++) {
+          if (inner_idx + (i * num_values_4bit / 4) + k < K)
+            local_A[k] = A[inner_idx + k + (i * num_values_4bit / 4)];
+          else
+            local_A[k] = T(0.0f);
+        }
+      }
+
+      // accumulate in float; small performance hit for Ampere, but lower error for outputs
+      #pragma unroll
+      for (int k = 0; k < num_values_4bit / 4; k++) {
+        local_C += ScalarMulFloatOut(local_A[k], local_B[k]);
+      }
+    }
+  }
+
+  local_C = WarpReduce(temp_storage[warp_idx]).Sum(local_C);
+
+  if (row_B < N && warp_lane == 0) out[row_B] = T(local_C);
+}
+
+bool CheckDims(int m, int k, int block_size) {
+  if (k % block_size != 0 || m > 1) {
+    return false;
+  }
+  // supported block_sizes are [4096, 2048, 1024, 512, 256, 128, 64, 32]
+  if (block_size % 32 != 0 || block_size > 4096) {
+    return false;
+  }
+  return true;
+}
+
+template <class T>
+void Callkgemm_4bit_inference_naive(
+    const T* quant_map,
+    T* output,
+    const T* a_data,
+    const uint8_t* b_data_quant,
+    const T* absmax,
+    int m,
+    int n,
+    int k,
+    int block_size,
+    cudaStream_t stream) {
+  int lda = k;
+  int ldb = (k + 1) / 2;
+  int ldc = n;
+  int num_blocks = (n + 3) / 4;
+
+  constexpr int bits = std::is_same_v<T, float> ? 32 : 16;
+  kgemm_4bit_inference_naive<T, 128, bits><<<num_blocks, 128, 0, stream>>>(
+      m, n, k, a_data, b_data_quant, absmax, quant_map, output, lda, ldb, ldc, block_size);
+}
+
+template <class T>
+bool TryMatMulBnb4(
+    const T* quant_map,
+    T* output,
+    const T* a_data,
+    const uint8_t* b_data_quant,
+    const T* absmax,
+    int m,
+    int n,
+    int k,
+    int block_size,
+    cudaStream_t stream) {
+  if (!CheckDims(m, k, block_size)) {
+    return false;
+  }
+
+  Callkgemm_4bit_inference_naive<T>(
+      quant_map, output, a_data, b_data_quant, absmax, m, n, k, block_size, stream);
+
+  return true;
+}
+
+template bool TryMatMulBnb4<float>(
+    const float* quant_map,
+    float* output,
+    const float* a_data,
+    const uint8_t* b_data_quant,
+    const float* absmax,
+    int m,
+    int n,
+    int k,
+    int block_size,
+    cudaStream_t stream);
+
+template bool TryMatMulBnb4<half>(
+    const half* quant_map,
+    half* output,
+    const half* a_data,
+    const uint8_t* b_data_quant,
+    const half* absmax,
+    int m,
+    int n,
+    int k,
+    int block_size,
+    cudaStream_t stream);
+
+template <>
+bool TryMatMulBnb4<BFloat16>(
+    const BFloat16* quant_map,
+    BFloat16* output,
+    const BFloat16* a_data,
+    const uint8_t* b_data_quant,
+    const BFloat16* absmax,
+    int m,
+    int n,
+    int k,
+    int block_size,
+    cudaStream_t stream) {
+  if (!CheckDims(m, k, block_size)) {
+    return false;
+  }
+
+  #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 800
+    Callkgemm_4bit_inference_naive<nv_bfloat16>(
+        reinterpret_cast<const nv_bfloat16*>(quant_map),
+        reinterpret_cast<nv_bfloat16*>(output),
+        reinterpret_cast<const nv_bfloat16*>(a_data),
+        b_data_quant,
+        reinterpret_cast<const nv_bfloat16*>(absmax),
+        m,
+        n,
+        k,
+        block_size,
+        stream);
+  #else
+    Callkgemm_4bit_inference_naive<BFloat16>(
+        quant_map, output, a_data, b_data_quant, absmax, m, n, k, block_size, stream);
+  #endif
+
+  return true;
+}
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/quantization/matmul_bnb4.cuh b/onnxruntime/contrib_ops/cuda/quantization/matmul_bnb4.cuh
new file mode 100644
index 0000000000000..743234282fbf3
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/quantization/matmul_bnb4.cuh
@@ -0,0 +1,26 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+#include "core/providers/cuda/shared_inc/cuda_utils.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+template <class T>
+bool TryMatMulBnb4(
+    const T* quant_map,
+    T* output,
+    const T* a_data,
+    const uint8_t* b_data_quant,
+    const T* absmax,
+    int m,
+    int n,
+    int k,
+    int block_size,
+    cudaStream_t stream);
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/quantization/matmul_nbits.cc b/onnxruntime/contrib_ops/cuda/quantization/matmul_nbits.cc
new file mode 100644
index 0000000000000..5b0e61e197014
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/quantization/matmul_nbits.cc
@@ -0,0 +1,169 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+//
+// This module define MatMulFp32Q4 operator, it is basically
+// matmul float32 with right hand side being a 2-D matrix
+// pre-packed and block-compacted into int4
+//
+
+#include "core/common/safeint.h"
+#include "core/providers/cuda/cuda_kernel.h"
+#include "core/providers/cuda/shared_inc/fpgeneric.h"
+#include "core/providers/cpu/math/matmul_helper.h"
+#include "matmul_nbits.cuh"
+#include "dequantize_blockwise.cuh"
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+using namespace onnxruntime::cuda;
+
+template <typename T>
+class MatMulNBits final : public CudaKernel {
+ public:
+  MatMulNBits(const OpKernelInfo& info) : CudaKernel(info) {
+    ORT_ENFORCE(Status::OK() == info.GetAttr<int64_t>("K", &K_));
+    ORT_ENFORCE(Status::OK() == info.GetAttr<int64_t>("N", &N_));
+    ORT_ENFORCE(Status::OK() == info.GetAttr<int64_t>("block_size", &block_size_));
+    ORT_ENFORCE(Status::OK() == info.GetAttr<int64_t>("bits", &nbits_));
+    ORT_ENFORCE(nbits_ == 4,
+                "Only 4b quantization is supported for MatMulNBits op,"
+                " additional bits support is planned.");
+  }
+
+  Status ComputeInternal(OpKernelContext* context) const override;
+
+ private:
+  int64_t K_;
+  int64_t N_;
+  int64_t block_size_;
+  int64_t nbits_;
+  bool column_wise_quant_blk_{true};
+};
+
+template <typename T>
+Status MatMulNBits<T>::ComputeInternal(OpKernelContext* ctx) const {
+  const Tensor* a = ctx->Input<Tensor>(0);
+  const Tensor* b = ctx->Input<Tensor>(1);
+  const Tensor* scales = ctx->Input<Tensor>(2);
+  const Tensor* zero_points = ctx->Input<Tensor>(3);
+
+  const auto* a_data = a->Data<T>();
+  const uint8_t* blob_data = b->Data<uint8_t>();
+  const auto* scales_data = scales->Data<T>();
+  const auto* zero_points_data = zero_points == nullptr ? nullptr : zero_points->Data<uint8_t>();
+
+  typedef typename ToCudaType<T>::MappedType CudaT;
+
+  constexpr bool transa = false;
+  constexpr bool transb = true;
+  MatMulComputeHelper helper;
+  TensorShape b_shape({N_, K_});
+  ORT_RETURN_IF_ERROR(
+      helper.Compute(a->Shape(), b_shape, transa, transb));
+
+  Tensor* Y = ctx->Output(0, helper.OutputShape());
+  // Bail out early if the output is going to be empty
+  if (Y->Shape().Size() == 0) return Status::OK();
+
+  bool is_4bit_done = TryMatMul4Bits(
+      reinterpret_cast<CudaT*>(Y->MutableData<T>()),
+      reinterpret_cast<const CudaT*>(a_data),
+      blob_data,
+      reinterpret_cast<const CudaT*>(scales_data),
+      zero_points_data,
+      SafeInt<int>(helper.M()),
+      SafeInt<int>(helper.N()),
+      SafeInt<int>(helper.K()),
+      SafeInt<int>(block_size_),
+      SafeInt<int>(GetDeviceProp().sharedMemPerBlock),
+      static_cast<cudaStream_t>(ctx->GetComputeStream()->GetHandle()));
+  if (!is_4bit_done) {
+    int64_t K_padded = (K_ + block_size_ - 1) / block_size_ * block_size_;
+    IAllocatorUniquePtr<T> b_data_ptr = GetScratchBuffer<T>(N_ * K_padded, ctx->GetComputeStream());
+    auto* b_data = b_data_ptr.get();
+    if (column_wise_quant_blk_) {
+      // column-wise block
+      ORT_RETURN_IF_ERROR(Dequantize4Bits(
+          reinterpret_cast<CudaT*>(b_data),
+          blob_data,
+          reinterpret_cast<const CudaT*>(scales_data),
+          zero_points_data,
+          SafeInt<int>(K_padded),
+          SafeInt<int>(N_),
+          SafeInt<int>(block_size_),
+          static_cast<cudaStream_t>(ctx->GetComputeStream()->GetHandle())));
+    } else {
+      // row-wise block
+      K_padded = K_;
+
+      ORT_RETURN_IF_ERROR(DequantizeBlockwise4b(
+          reinterpret_cast<CudaT*>(b_data),
+          blob_data,
+          reinterpret_cast<const CudaT*>(scales_data),
+          zero_points_data,
+          SafeInt<int>(block_size_),
+          column_wise_quant_blk_,
+          SafeInt<int>(K_),
+          SafeInt<int>(N_),
+          static_cast<cudaStream_t>(ctx->GetComputeStream()->GetHandle())));
+    }
+#if 0
+  cudaStreamSynchronize(static_cast<cudaStream_t>(ctx->GetComputeStream()->GetHandle()));
+  T* b_data_cpu = new T[K_ * N_];
+  cudaMemcpy(b_data_cpu, b_data, K_ * N_ * sizeof(T), cudaMemcpyDeviceToHost);
+  delete[] b_data_cpu;
+#endif
+
+    const CudaT alpha = ToCudaType<T>::FromFloat(1.f);
+    const CudaT zero = ToCudaType<T>::FromFloat(0.f);
+
+    if (helper.OutputOffsets().size() == 1) {
+      CUBLAS_RETURN_IF_ERROR(cublasGemmHelper(
+          GetCublasHandle(ctx),
+          CUBLAS_OP_T,
+          CUBLAS_OP_N,
+          SafeInt<int>(helper.N()),
+          SafeInt<int>(helper.M()),
+          SafeInt<int>(helper.K()),
+          &alpha,
+          reinterpret_cast<const CudaT*>(b_data),
+          SafeInt<int>(K_padded),
+          reinterpret_cast<const CudaT*>(a_data),
+          helper.Lda(transa),
+          &zero,
+          reinterpret_cast<CudaT*>(Y->MutableData<T>()),
+          helper.Ldc(),
+          GetDeviceProp()));
+    }
+  }
+
+  return Status::OK();
+}
+
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    MatMulNBits,
+    kMSDomain,
+    1,
+    float,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .TypeConstraint("T1", DataTypeImpl::GetTensorType<float>())
+        .TypeConstraint("T2", DataTypeImpl::GetTensorType<uint8_t>()),
+    MatMulNBits<float>);
+
+ONNX_OPERATOR_TYPED_KERNEL_EX(
+    MatMulNBits,
+    kMSDomain,
+    1,
+    MLFloat16,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .TypeConstraint("T1", DataTypeImpl::GetTensorType<MLFloat16>())
+        .TypeConstraint("T2", DataTypeImpl::GetTensorType<uint8_t>()),
+    MatMulNBits<MLFloat16>);
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/quantization/matmul_nbits.cu b/onnxruntime/contrib_ops/cuda/quantization/matmul_nbits.cu
new file mode 100644
index 0000000000000..f2600a506285d
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/quantization/matmul_nbits.cu
@@ -0,0 +1,229 @@
+// Modifications: scaling is moved from masked softmax to the gemm before that.
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include <cub/cub.cuh>
+#include <cublas_v2.h>
+#include <cuda_fp16.h>
+#include <math_constants.h>
+#include "core/providers/cuda/cu_inc/common.cuh"
+#include "core/providers/cuda/cuda_common.h"
+#include "matmul_nbits.cuh"
+
+using namespace onnxruntime::cuda;
+using namespace cub;
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+__device__ __forceinline__ float AccumulateEightElements(uint32_t values_quant, half scale, uint8_t zp, const half* a) {
+  half2 scale_half2 = {scale, scale};
+  half zp_adjust = -scale * __short2half_rn(zp);
+  half2 zp_adjust2 = {zp_adjust, zp_adjust};
+  uint4 vec_a = *(reinterpret_cast<const uint4*>(a));
+
+  half2 element01 = __halves2half2(__uint2half_rn(values_quant & 0xF), __uint2half_rn((values_quant >> 4) & 0xF));
+  half2 v0 = element01 * scale_half2 + zp_adjust2;
+
+  half2 element23 = __halves2half2(__uint2half_rn((values_quant >> 8) & 0xF), __uint2half_rn((values_quant >> 12) & 0xF));
+  half2 v1 = element23 * scale_half2 + zp_adjust2;
+
+  half2 element45 = __halves2half2(__uint2half_rn((values_quant >> 16) & 0xF), __uint2half_rn((values_quant >> 20) & 0xF));
+  half2 v2 = element45 * scale_half2 + zp_adjust2;
+
+  half2 element67 = __halves2half2(__uint2half_rn((values_quant >> 24) & 0xF), __uint2half_rn((values_quant >> 28) & 0xF));
+  half2 v3 = element67 * scale_half2 + zp_adjust2;
+
+  v0 = v0 * (*(reinterpret_cast<half2*>(&(vec_a.x))));
+  v1 = v1 * (*(reinterpret_cast<half2*>(&(vec_a.y))));
+  v2 = v2 * (*(reinterpret_cast<half2*>(&(vec_a.z)))) + v0;
+  v3 = v3 * (*(reinterpret_cast<half2*>(&(vec_a.w)))) + v1;
+  v3 = v2 + v3;
+  return float(v3.x) + float(v3.y);
+}
+
+__device__ __forceinline__ float AccumulateEightElements(uint32_t values_quant, float scale, uint8_t zp, const float* a) {
+  float4 a_vec_0 = *(reinterpret_cast<const float4*>(a));
+  float4 a_vec_1 = *(reinterpret_cast<const float4*>(a + 4));
+
+  float zp_adjust = -scale * zp;
+  float v0 = float(values_quant & 0xF) * scale + zp_adjust;
+  float v1 = float((values_quant >> 4) & 0xF) * scale + zp_adjust;
+  float v2 = float((values_quant >> 8) & 0xF) * scale + zp_adjust;
+  float v3 = float((values_quant >> 12) & 0xF) * scale + zp_adjust;
+  float v4 = float((values_quant >> 16) & 0xF) * scale + zp_adjust;
+  float v5 = float((values_quant >> 20) & 0xF) * scale + zp_adjust;
+  float v6 = float((values_quant >> 24) & 0xF) * scale + zp_adjust;
+  float v7 = float((values_quant >> 28) & 0xF) * scale + zp_adjust;
+
+  v0 = v0 * a_vec_0.x;
+  v1 = v1 * a_vec_0.y;
+  v2 = v2 * a_vec_0.z;
+  v3 = v3 * a_vec_0.w;
+  v4 = v4 * a_vec_1.x + v0;
+  v5 = v5 * a_vec_1.y + v1;
+  v6 = v6 * a_vec_1.z + v2;
+  v7 = v7 * a_vec_1.w + v3;
+  return v4 + v5 + v6 + v7;
+}
+
+constexpr int kColsPerThreadBlock = 8;
+constexpr int kWarpSize = 32;
+
+// kernel for 4bits quantized gemv, i.e., computing A(1,K) x B(K, N)
+// B(K, N) is quantized blockwise with 4bits and stored as [N, (K + block_size - 1)/block_size, blob]
+// The thread block size is (kWarpSize, kColsPerThreadBlock) and grid size is (N/kColsPerThreadBlock, 1)
+// Each thread block computes [1, K] x [kColsPerThreadBlock, (K + block_size - 1)/block_size, blob],
+//     i.e., computing kColsPerThreadBlock per block and a warp reduce (1, K) x (K)
+template <class T, int block_size>
+__global__ void MatMulFloatInt4Kernel(
+    T* output,
+    const T* a_data,
+    const uint8_t* b_data_quant,
+    const T* scales_data,
+    const uint8_t* zero_points,
+    int m,
+    int n,
+    int k,
+    int blocks_per_K) {
+  int n_block_id = blockIdx.x;
+  int m_id = blockIdx.y;
+  int lane_id = threadIdx.x;
+  int warp_id = threadIdx.y;
+  int n_id = n_block_id * kColsPerThreadBlock + warp_id;
+  int thread_id = warp_id * kWarpSize + lane_id;
+  constexpr int k_per_iter = 256;
+  int k_iter = k / k_per_iter;
+
+  // blocks_per_k is the number of scales and zero points on the k dim
+  const int b_zp_k = (blocks_per_K + 1)/ 2;
+
+  extern __shared__ char shared_buffer[];
+
+  // load scale to shared buffer
+  T* b_scale_vec = (T*)shared_buffer;
+  uint8_t* b_zp_vec = reinterpret_cast<uint8_t*>(b_scale_vec + kColsPerThreadBlock * blocks_per_K);
+  int offset = n_block_id * kColsPerThreadBlock * blocks_per_K;
+  for (int i = thread_id; i < kColsPerThreadBlock * blocks_per_K; i += kColsPerThreadBlock * kWarpSize) {
+    b_scale_vec[i] = scales_data[offset + i];
+  }
+
+  int zp_offset = n_block_id * kColsPerThreadBlock * b_zp_k;
+  for (int i = thread_id; i < kColsPerThreadBlock * b_zp_k; i += kColsPerThreadBlock * kWarpSize) {
+    b_zp_vec[i] = zero_points != nullptr ? zero_points[zp_offset + i] : uint8_t(0x88);
+  }
+  __syncthreads();
+
+  a_data += m_id * k;
+  b_data_quant += n_id * blocks_per_K * (block_size / 2);
+
+  const int scale_col_offset = warp_id * blocks_per_K;
+  const int zp_col_offset = warp_id * b_zp_k;
+
+  float sum = 0.f;
+  int k_id = 0;
+  for (; k_id < (k & 0xffffff00); k_id += k_per_iter) {
+    const int t_k = k_id + (lane_id << 3);  // k index for this thread
+    const int t_meta_k = t_k / block_size;  // k index for this thread, points to the scale and zero point
+    uint32_t value = *(reinterpret_cast<const uint32_t*>(b_data_quant + (t_k >> 1)));
+    T scale = b_scale_vec[scale_col_offset + t_meta_k];
+    uint8_t zp = b_zp_vec[zp_col_offset + t_meta_k/2];
+    zp = (t_meta_k & 0x01) ? (zp >> 4) : (zp & 0x0f);
+    sum += AccumulateEightElements(value, scale, zp, a_data + k_id + (lane_id << 3));
+  }
+
+  // handle reminder
+  if (k_id + lane_id * 8 < k) {
+    const int t_k = k_id + (lane_id << 3);  // k index for this thread
+    const int t_meta_k = t_k / block_size;  // k index for this thread, points to the scale and zero point
+    uint32_t value = *(reinterpret_cast<const uint32_t*>(b_data_quant + k_iter * 128 + lane_id * 4));
+    T scale = b_scale_vec[scale_col_offset + t_meta_k];
+    uint8_t zp = b_zp_vec[zp_col_offset + t_meta_k/2];
+    zp = (t_meta_k & 0x01) ? (zp >> 4) : (zp & 0x0f);
+    sum += AccumulateEightElements(value, scale, zp, a_data + k_id + (lane_id << 3));
+  }
+
+  // warp reduction
+  for (int i = 16; i > 0; i = i / 2) {
+    sum += __shfl_down_sync(0xffffffff, sum, i);
+  }
+
+  if (lane_id == 0) {
+    output[m_id * n + n_id] = sum;
+  }
+}
+
+template <class T>
+bool TryMatMul4Bits(
+    T* output,
+    const T* a_data,
+    const uint8_t* b_data_quant,
+    const T* scales_data,
+    const uint8_t* zero_points,
+    int m,
+    int n,
+    int k,
+    int block_size,
+    int shared_mem_per_block,
+    cudaStream_t stream) {
+  if (n % kColsPerThreadBlock != 0 || k % 8 != 0 || m > 1) {
+    return false;
+  }
+  dim3 blocks((n + kColsPerThreadBlock - 1) / kColsPerThreadBlock, m);
+  dim3 threads(kWarpSize, kColsPerThreadBlock);
+  int blocks_per_K = (k + block_size - 1) / block_size;
+  int blocks_per_thread_block = blocks_per_K * kColsPerThreadBlock;
+  int shared_mem_size = sizeof(T) * blocks_per_thread_block + blocks_per_thread_block / 2;
+  if (shared_mem_size > shared_mem_per_block) {
+    return false;
+  }
+
+  if (16 == block_size) {
+    MatMulFloatInt4Kernel<T, 16><<<blocks, threads, shared_mem_size, stream>>>(
+        output, a_data, b_data_quant, scales_data, zero_points, m, n, k, blocks_per_K);
+  } else if (32 == block_size) {
+    MatMulFloatInt4Kernel<T, 32><<<blocks, threads, shared_mem_size, stream>>>(
+        output, a_data, b_data_quant, scales_data, zero_points, m, n, k, blocks_per_K);
+  } else if (64 == block_size) {
+    MatMulFloatInt4Kernel<T, 64><<<blocks, threads, shared_mem_size, stream>>>(
+        output, a_data, b_data_quant, scales_data, zero_points, m, n, k, blocks_per_K);
+  } else if (128 == block_size) {
+    MatMulFloatInt4Kernel<T, 128><<<blocks, threads, shared_mem_size, stream>>>(
+        output, a_data, b_data_quant, scales_data, zero_points, m, n, k, blocks_per_K);
+  } else {
+    ORT_THROW("block size ", block_size, " is not supported");
+  }
+
+  return true;
+}
+
+template bool TryMatMul4Bits<float>(
+    float* output,
+    const float* a_data,
+    const uint8_t* b_data_quant,
+    const float* scales_data,
+    const uint8_t* zero_points,
+    int m,
+    int n,
+    int k,
+    int block_size,
+    int shared_mem_per_block,
+    cudaStream_t stream);
+
+template bool TryMatMul4Bits<half>(
+    half* output,
+    const half* a_data,
+    const uint8_t* b_data_quant,
+    const half* scales_data,
+    const uint8_t* zero_points,
+    int m,
+    int n,
+    int k,
+    int block_size,
+    int shared_mem_per_block,
+    cudaStream_t stream);
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/quantization/matmul_nbits.cuh b/onnxruntime/contrib_ops/cuda/quantization/matmul_nbits.cuh
new file mode 100644
index 0000000000000..9ccbe4c4d97a8
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/quantization/matmul_nbits.cuh
@@ -0,0 +1,27 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+#include "core/providers/cuda/shared_inc/cuda_utils.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+template <class T>
+bool TryMatMul4Bits(
+    T* output,
+    const T* a_data,
+    const uint8_t* b_data_quant,
+    const T* scales_data,
+    const uint8_t* zero_points,
+    int m,
+    int n,
+    int k,
+    int block_size,
+    int shared_mem_per_block,
+    cudaStream_t stream);
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/tensor/dynamic_time_warping.cc b/onnxruntime/contrib_ops/cuda/tensor/dynamic_time_warping.cc
new file mode 100644
index 0000000000000..381316f605fc9
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/tensor/dynamic_time_warping.cc
@@ -0,0 +1,56 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "contrib_ops/cuda/tensor/dynamic_time_warping.h"
+#include "contrib_ops/cuda/tensor/dynamic_time_warping_impl.h"
+#include "core/providers/cpu/tensor/utils.h"
+
+#include <vector>
+#include <numeric>
+
+using namespace onnxruntime::common;
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+ONNX_OPERATOR_KERNEL_EX(
+    DynamicTimeWarping,
+    kMSDomain,
+    1,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .TypeConstraint("F", DataTypeImpl::GetTensorType<float>())
+        .TypeConstraint("I", DataTypeImpl::GetTensorType<int32_t>()),
+    DynamicTimeWarping);
+
+Status DynamicTimeWarping::ComputeInternal(OpKernelContext* ctx) const {
+  const Tensor& input_tensor = *ctx->Input<Tensor>(0);
+  const auto& input_dims = input_tensor.Shape().GetDims();
+  int rank = SafeInt<int>(input_dims.size());
+  ORT_ENFORCE(rank == 2 || (rank == 3 && input_dims[0] == 1), "Currently input rank must be 2, or (3 with first dim equal to 1), but got:", rank);
+
+  const size_t rows = SafeInt<size_t>(input_dims[rank == 3 ? 1 : 0]);
+  const size_t cols = SafeInt<size_t>(input_dims[rank == 3 ? 2 : 1]);
+  size_t max_index_len = 0;
+
+  size_t buffer_size_in_bytes = GetDynamicTimeWarpingBufferSize(1, rows, cols, max_index_len);
+  IAllocatorUniquePtr<int8_t> buffer = GetScratchBuffer<int8_t>(buffer_size_in_bytes, ctx->GetComputeStream());
+
+  size_t result_len = 0;
+  ORT_RETURN_IF_ERROR(LaunchDynamicTimeWarping(
+      this->Stream(ctx), this->GetDeviceProp(), 1, rows, cols,
+      input_tensor.Data<float>(), buffer.get(), result_len));
+
+  Tensor* output_tensor = ctx->Output(0, TensorShape{2LL, SafeInt<int64_t>(result_len)});
+
+  return CUDA_CALL(cudaMemcpy2DAsync(
+      output_tensor->MutableData<int32_t>(), result_len * sizeof(int32_t),
+      buffer.get() + ((max_index_len - result_len) * sizeof(int32_t)), max_index_len * sizeof(int32_t),
+      result_len * sizeof(int32_t), 2,
+      cudaMemcpyDeviceToDevice, this->Stream(ctx)));
+}
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/tensor/dynamic_time_warping.h b/onnxruntime/contrib_ops/cuda/tensor/dynamic_time_warping.h
new file mode 100644
index 0000000000000..3083e19aff6f2
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/tensor/dynamic_time_warping.h
@@ -0,0 +1,26 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+#include "core/providers/cuda/cuda_kernel.h"
+#include <core/common/safeint.h>
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+using onnxruntime::OpKernelContext;
+using onnxruntime::OpKernelInfo;
+using onnxruntime::cuda::CudaKernel;
+class DynamicTimeWarping final : public CudaKernel {
+ public:
+  DynamicTimeWarping(const OpKernelInfo& info) : CudaKernel(info) {}
+
+  ~DynamicTimeWarping() = default;
+
+  Status ComputeInternal(OpKernelContext* context) const override;
+};
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/tensor/dynamic_time_warping_impl.cu b/onnxruntime/contrib_ops/cuda/tensor/dynamic_time_warping_impl.cu
new file mode 100644
index 0000000000000..7c3f2963207e6
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/tensor/dynamic_time_warping_impl.cu
@@ -0,0 +1,142 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "contrib_ops/cuda/tensor/dynamic_time_warping_impl.h"
+#include "core/providers/cuda/cu_inc/common.cuh"
+#include "core/common/common.h"
+#include <core/common/safeint.h>
+#include <cfloat>
+
+using namespace onnxruntime::cuda;
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+__global__ void DynamicTimeWarpingInitCost(float* cost_buffer, int8_t* trace_buffer, size_t cols_plus_1) {
+    int r = blockIdx.x;
+    cost_buffer += cols_plus_1 * r;
+    for (size_t i = threadIdx.x; i < cols_plus_1; i += blockDim.x) {
+        cost_buffer[i] = FLT_MAX;
+    }
+    if (r == 0) {
+      for (size_t i = threadIdx.x; i < cols_plus_1; i += blockDim.x) {
+        trace_buffer[i] = 2;
+      }
+    }
+    if (threadIdx.x == 0) trace_buffer[cols_plus_1 * r] = 1;
+    if (threadIdx.x == 0 && r == 0) *cost_buffer = 0.0f;
+}
+
+__global__ void DynamicTimeWarpingKernel(
+    size_t rows,
+    size_t cols,
+    size_t max_index_len,
+    const float* input,
+    float* cost_buffer,
+    int8_t* trace_buffer,
+    int32_t* result_buffer,
+    size_t* result_len_device
+) {
+  const int diag_max = static_cast<int>(rows + cols);
+  for (int d = 1; d <= diag_max; d++) {
+    for (int c = threadIdx.x + 1; c <= cols; c += blockDim.x) {
+        int r = d - c;
+        if (r >= 1 && r <= rows) {
+            int cost_idx = ((r - 1) * (cols + 1) + (c - 1)); //[r - 1, c - 1]
+            const float c0 = cost_buffer[cost_idx];
+            const float c1 = cost_buffer[cost_idx + 1]; // [r - 1, c]
+            const float c2 = cost_buffer[cost_idx + cols + 1]; // [r, c - 1]
+
+            float cost;
+            int8_t t;
+            if (c0 < c1 && c0 < c2) {
+                cost = c0;
+                t = 0;
+            } else if (c1 < c0 && c1 < c2) {
+                cost = c1;
+                t = 1;
+            } else {
+                cost = c2;
+                t = 2;
+            }
+            cost_idx += ((cols + 1) + 1);
+            cost_buffer[cost_idx] = cost + input[(r - 1) * cols + (c - 1)];
+            trace_buffer[cost_idx] = t;
+        }
+    }
+    __syncthreads();
+  }
+
+  //back tracing, reverse append to result buffer
+  if (threadIdx.x == 0) {
+    int r = rows - 1;
+    int c = cols - 1;
+    int pos = static_cast<int>(max_index_len); // reverse put
+    while (r >= 0 && c >= 0) {
+        --pos;
+        result_buffer[pos] = r;
+        result_buffer[max_index_len + pos] = c;
+        const int trace_index = (r + 1) * (cols + 1) + (c + 1);
+        int8_t t = trace_buffer[trace_index];
+        switch (t) {
+        case 0: r -= 1; c -= 1; break;
+        case 1: r -= 1; break;
+        default: c -= 1; break;
+        }
+    }
+    *result_len_device = max_index_len - static_cast<size_t>(pos);
+  }
+}
+
+size_t GetDynamicTimeWarpingBufferSize(size_t batch, size_t rows, size_t cols, size_t& max_index_len) {
+  max_index_len = rows + cols + 1;
+  size_t cost_buffer_size = ((rows + 1) * (cols + 1));
+  return batch * max_index_len * 2 * sizeof(int32_t) + // two index arrays
+         sizeof(int64_t) + // final index array length
+         batch* cost_buffer_size * sizeof(float) + // cost buffer
+         batch* cost_buffer_size * sizeof(int8_t); // trace buffer
+}
+
+Status LaunchDynamicTimeWarping(
+    cudaStream_t stream,
+    const cudaDeviceProp& device_prop,
+    size_t batch,
+    size_t rows,
+    size_t cols,
+    const float* input,
+    void* buffer,
+    size_t& result_len
+) {
+  ORT_ENFORCE(batch == 1);
+  size_t max_index_len = rows + cols + 1;
+  int32_t* result_buffer = (int32_t*)buffer;
+  size_t* result_len_device_buf = (size_t*)(result_buffer + (batch * max_index_len * 2));
+  float* cost_buffer = (float*)(result_len_device_buf + 1);
+  int8_t* trace_buffer = (int8_t*)(cost_buffer + ((rows + 1) * (cols + 1)));
+
+  dim3 block(device_prop.maxThreadsPerBlock);
+  dim3 grid_init((unsigned)SafeInt<unsigned>(rows + 1), (unsigned)SafeInt<unsigned>(batch));
+  DynamicTimeWarpingInitCost<<<grid_init, block, 0, stream>>>(cost_buffer, trace_buffer, cols+1);
+  ORT_RETURN_IF_ERROR(CUDA_CALL(cudaGetLastError()));
+
+  dim3 grid(1, (unsigned)SafeInt<unsigned>(batch));
+  DynamicTimeWarpingKernel<<<grid, block, 0, stream>>>(
+    rows,
+    cols,
+    max_index_len,
+    input,
+    cost_buffer,
+    trace_buffer,
+    result_buffer,
+    result_len_device_buf);
+  ORT_RETURN_IF_ERROR(CUDA_CALL(cudaGetLastError()));
+
+  ORT_RETURN_IF_ERROR(CUDA_CALL(cudaMemcpyAsync(&result_len, result_len_device_buf, sizeof(size_t), cudaMemcpyDeviceToHost, stream)));
+  ORT_RETURN_IF_ERROR(CUDA_CALL(cudaGetLastError()));
+  return CUDA_CALL(cudaStreamSynchronize(stream));
+}
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/tensor/dynamic_time_warping_impl.h b/onnxruntime/contrib_ops/cuda/tensor/dynamic_time_warping_impl.h
new file mode 100644
index 0000000000000..cb4a0dfb16807
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/tensor/dynamic_time_warping_impl.h
@@ -0,0 +1,25 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+#include "core/providers/cuda/shared_inc/cuda_utils.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+size_t GetDynamicTimeWarpingBufferSize(size_t batch, size_t rows, size_t cols, size_t& max_index_len);
+
+Status LaunchDynamicTimeWarping(
+    cudaStream_t stream,
+    const cudaDeviceProp& device_prop,
+    size_t batch,
+    size_t rows,
+    size_t cols,
+    const float* input,
+    void* buffer,
+    size_t& result_len);
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/tensor/unfold.cc b/onnxruntime/contrib_ops/cuda/tensor/unfold.cc
new file mode 100644
index 0000000000000..c38c8c5317f0a
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/tensor/unfold.cc
@@ -0,0 +1,55 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "contrib_ops/cuda/tensor/unfold.h"
+#include "contrib_ops/cuda/tensor/unfold_impl.h"
+#include "core/providers/cpu/tensor/utils.h"
+
+#include <vector>
+#include <numeric>
+
+using namespace onnxruntime::common;
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+ONNX_OPERATOR_KERNEL_EX(
+    UnfoldTensor,
+    kMSDomain,
+    1,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .TypeConstraint("T", DataTypeImpl::AllTensorTypes()),
+    UnfoldTensor);
+
+Status UnfoldTensor::ComputeInternal(OpKernelContext* ctx) const {
+  const Tensor& input_tensor = *ctx->Input<Tensor>(0);
+  const auto& input_dims = input_tensor.Shape().GetDims();
+  int rank = SafeInt<int>(input_dims.size());
+
+  int dim = SafeInt<int>(HandleNegativeAxis(dim_, rank));
+  ORT_ENFORCE(dim < rank, "input rank:", rank, " is not bigger than attribut specified dim: ", dim);
+  ORT_ENFORCE(input_dims[dim] >= size_, "dimsize:", input_dims[dim], " is less than unfold size:", size_);
+
+  int64_t leading_dims = std::accumulate(input_dims.begin(), input_dims.begin() + dim, 1LL, std::multiplies<int64_t>());
+  int64_t tailing_dims = std::accumulate(input_dims.begin() + (dim + 1), input_dims.end(), 1LL, std::multiplies<int64_t>());
+
+  std::vector<int64_t> output_dims(rank + 1, 0);
+  std::copy(input_dims.begin(), input_dims.end(), output_dims.begin());
+  output_dims[dim] = (input_dims[dim] - size_) / step_ + 1;
+  output_dims.back() = size_;
+  TensorShape output_shape(output_dims);
+  Tensor* output_tensor = ctx->Output(0, output_shape);
+
+  cudaStream_t stream = this->Stream(ctx);
+  const cudaDeviceProp& device_prop = this->GetDeviceProp();
+  size_t element_size = input_tensor.DataType()->Size();
+  return LaunchUnfoldTensor(
+      stream, device_prop, element_size, input_tensor.DataRaw(), output_tensor->MutableDataRaw(),
+      leading_dims, input_dims[dim], tailing_dims, size_, step_);
+}
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/tensor/unfold.h b/onnxruntime/contrib_ops/cuda/tensor/unfold.h
new file mode 100644
index 0000000000000..1717687593470
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/tensor/unfold.h
@@ -0,0 +1,39 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+#include "core/providers/cuda/cuda_kernel.h"
+#include <core/common/safeint.h>
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+using onnxruntime::OpKernelContext;
+using onnxruntime::OpKernelInfo;
+using onnxruntime::cuda::CudaKernel;
+class UnfoldTensor final : public CudaKernel {
+ public:
+  UnfoldTensor(const OpKernelInfo& info) : CudaKernel(info) {
+    dim_ = SafeInt<int>(info.GetAttrOrDefault<int64_t>("dim", -1LL));
+    step_ = SafeInt<int>(info.GetAttrOrDefault<int64_t>("step", 1LL));
+    ORT_ENFORCE(step_ > 0, "step must greater than zero!");
+
+    int64_t temp_size;
+    ORT_ENFORCE(info.GetAttr("size", &temp_size).IsOK());
+    size_ = SafeInt<int>(temp_size);
+  }
+
+  ~UnfoldTensor() = default;
+
+  Status ComputeInternal(OpKernelContext* context) const override;
+
+ private:
+  int dim_;
+  int size_;
+  int step_;
+};
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/tensor/unfold_impl.cu b/onnxruntime/contrib_ops/cuda/tensor/unfold_impl.cu
new file mode 100644
index 0000000000000..a3c93ceb33c46
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/tensor/unfold_impl.cu
@@ -0,0 +1,101 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "contrib_ops/cuda/tensor/unfold_impl.h"
+#include "core/providers/cuda/cu_inc/common.cuh"
+#include "core/common/common.h"
+#include <core/common/safeint.h>
+
+using namespace onnxruntime::cuda;
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+template <typename T>
+__global__ void UnfoldTensorKernel(
+    const T* input,
+    T* output,
+    int64_t N,
+    int64_t unfold_size, // stride_tailing_dim_dst
+    int64_t tailing_dims_size, // stride_fold_dim_dst = tailing_dims_size * unfold_size, stride_append_dim_src = tailing_dims_size
+    int64_t stride_leading_dst,
+    int64_t stride_fold_dim_src,
+    int64_t stride_leading_src
+) {
+  int64_t idx = (int64_t)blockIdx.x * blockDim.x + threadIdx.x;
+  if (idx >= N) return;
+
+  const int64_t idx_leading = idx / stride_leading_dst;
+  int64_t n = idx % stride_leading_dst;
+  const int64_t stride_fold_dim_dst = tailing_dims_size * unfold_size;
+  const int64_t idx_fold = n / stride_fold_dim_dst;
+  n %= stride_fold_dim_dst;
+  const int64_t idx_tailing = n / unfold_size;
+  const int64_t idx_append = n % unfold_size;
+
+  int64_t idx_src = idx_leading * stride_leading_src + idx_fold * stride_fold_dim_src + idx_tailing + idx_append * tailing_dims_size;
+  output[idx] = input[idx_src];
+}
+
+
+Status LaunchUnfoldTensor(
+    cudaStream_t stream,
+    const cudaDeviceProp& device_prop,
+    size_t element_size,
+    const void* input,
+    void* output,
+    int64_t leading_dims_size,
+    int64_t unfold_dim_size,
+    int64_t tailing_dims_size,
+    int64_t unfold_size,
+    int64_t step_size
+) {
+  int64_t TPB = device_prop.maxThreadsPerBlock;
+  int64_t unfold_dim_size_dst = (unfold_dim_size - unfold_size) / step_size + 1;
+  int64_t N = leading_dims_size * unfold_dim_size_dst * tailing_dims_size * unfold_size;
+  int64_t num_blocks = (N + TPB - 1) / TPB;
+
+  int64_t stride_leading_dst = unfold_size * tailing_dims_size * unfold_dim_size_dst;
+
+  int64_t stride_fold_dim_src = tailing_dims_size * step_size;
+  int64_t stride_leading_src = tailing_dims_size * unfold_dim_size;
+
+  dim3 block((unsigned)SafeInt<unsigned>(TPB));
+  dim3 grid((unsigned)SafeInt<unsigned>(num_blocks));
+  switch (element_size) {
+    case 1:
+        UnfoldTensorKernel<int8_t><<<grid, block, 0, stream>>>(
+            (const int8_t*)input, (int8_t*)output, N, unfold_size,
+            tailing_dims_size, stride_leading_dst, stride_fold_dim_src, stride_leading_src);
+        break;
+    case 2:
+        UnfoldTensorKernel<int16_t><<<grid, block, 0, stream>>>(
+            (const int16_t*)input, (int16_t*)output, N, unfold_size,
+            tailing_dims_size, stride_leading_dst, stride_fold_dim_src, stride_leading_src);
+        break;
+    case 4:
+        UnfoldTensorKernel<int32_t><<<grid, block, 0, stream>>>(
+            (const int32_t*)input, (int32_t*)output, N, unfold_size,
+            tailing_dims_size, stride_leading_dst, stride_fold_dim_src, stride_leading_src);
+        break;
+    case 8:
+        UnfoldTensorKernel<int64_t><<<grid, block, 0, stream>>>(
+            (const int64_t*)input, (int64_t*)output, N, unfold_size,
+            tailing_dims_size, stride_leading_dst, stride_fold_dim_src, stride_leading_src);
+        break;
+    case 16:
+        UnfoldTensorKernel<float4><<<grid, block, 0, stream>>>(
+            (const float4*)input, (float4*)output, N, unfold_size,
+            tailing_dims_size, stride_leading_dst, stride_fold_dim_src, stride_leading_src);
+        break;
+    default:
+        return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Unsupported element_size");
+  }
+
+  return CUDA_CALL(cudaGetLastError());
+}
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/tensor/unfold_impl.h b/onnxruntime/contrib_ops/cuda/tensor/unfold_impl.h
new file mode 100644
index 0000000000000..9e82dccdec23c
--- /dev/null
+++ b/onnxruntime/contrib_ops/cuda/tensor/unfold_impl.h
@@ -0,0 +1,25 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+#include "core/providers/cuda/shared_inc/cuda_utils.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace cuda {
+
+Status LaunchUnfoldTensor(
+    cudaStream_t stream,
+    const cudaDeviceProp& device_prop,
+    size_t element_size,
+    const void* input,
+    void* output,
+    int64_t leading_dims_size,
+    int64_t tailing_dims_size,
+    int64_t dim_size,
+    int64_t unfold_size,
+    int64_t step_size);
+
+}  // namespace cuda
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/transformers/beam_search.cc b/onnxruntime/contrib_ops/cuda/transformers/beam_search.cc
index d18460e016444..2a90e4911f286 100644
--- a/onnxruntime/contrib_ops/cuda/transformers/beam_search.cc
+++ b/onnxruntime/contrib_ops/cuda/transformers/beam_search.cc
@@ -33,6 +33,28 @@ ONNX_OPERATOR_KERNEL_EX(
                               DataTypeImpl::GetTensorType<MLFloat16>()}),
     BeamSearch);
 
+ONNX_OPERATOR_KERNEL_EX(
+    WhisperBeamSearch,
+    kMSDomain,
+    1,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .InputMemoryType(OrtMemTypeCPUInput, 0)    // 'input_ids' needs to be on CPU
+        .InputMemoryType(OrtMemTypeCPUInput, 1)    // 'max_length' needs to be on CPU
+        .InputMemoryType(OrtMemTypeCPUInput, 2)    // 'min_length' needs to be on CPU
+        .InputMemoryType(OrtMemTypeCPUInput, 3)    // 'num_beams' needs to be on CPU
+        .InputMemoryType(OrtMemTypeCPUInput, 4)    // 'num_return_sequences' needs to be on CPU
+        .InputMemoryType(OrtMemTypeCPUInput, 5)    // 'length_penalty' needs to be on CPU
+        .InputMemoryType(OrtMemTypeCPUInput, 6)    // 'repetition_penalty' needs to be on CPU
+        .InputMemoryType(OrtMemTypeCPUInput, 9)    // 'attention_mask' needs to be on CPU
+        .InputMemoryType(OrtMemTypeCPUInput, 10)   // 'decoder_input_ids' needs to be on CPU
+        .InputMemoryType(OrtMemTypeCPUInput, 11)   // 'logits_processor' needs to be on CPU
+        .OutputMemoryType(OrtMemTypeCPUOutput, 0)  // 'sequences' output on CPU
+        .OutputMemoryType(OrtMemTypeCPUOutput, 1)  // 'sequences_scores' output on CPU
+        .TypeConstraint("T", {DataTypeImpl::GetTensorType<float>(),
+                              DataTypeImpl::GetTensorType<MLFloat16>()}),
+    WhisperBeamSearch);
+
 transformers::CudaTensorConsoleDumper g_cuda_dumper;
 
 BeamSearch::BeamSearch(const OpKernelInfo& info)
@@ -58,7 +80,9 @@ BeamSearch::BeamSearch(const OpKernelInfo& info)
                                   GenerationCudaDeviceHelper::UpdateDecoderFeeds<MLFloat16>,
                                   GenerationCudaDeviceHelper::ExpandBuffer<int32_t>,
                                   GenerationCudaDeviceHelper::ExpandBuffer<float>,
-                                  GenerationCudaDeviceHelper::ExpandBuffer<MLFloat16>);
+                                  GenerationCudaDeviceHelper::ExpandBuffer<MLFloat16>,
+                                  GenerationCudaDeviceHelper::UpdateDecoderCrossQK,
+                                  GenerationCudaDeviceHelper::FinalizeDecoderCrossQK);
 
   SetConsoleDumper(&g_cuda_dumper);
 
@@ -87,6 +111,60 @@ Status BeamSearch::Compute(OpKernelContext* context) const {
   return s;
 }
 
+WhisperBeamSearch::WhisperBeamSearch(const OpKernelInfo& info)
+    : onnxruntime::contrib::transformers::WhisperBeamSearch(info) {
+  SetDeviceHelpers(GenerationCudaDeviceHelper::AddToFeeds,
+                   GenerationCudaDeviceHelper::TopK,
+                   GenerationCudaDeviceHelper::DeviceCopy<float>,
+                   GenerationCudaDeviceHelper::DeviceCopy<int32_t>,
+                   GenerationCudaDeviceHelper::ProcessLogits<float>,
+                   GenerationCudaDeviceHelper::ProcessLogits<MLFloat16>,
+                   GenerationCudaDeviceHelper::InitBeamState<float>,
+                   GenerationCudaDeviceHelper::InitBeamState<MLFloat16>,
+                   GenerationCudaDeviceHelper::CreateBeamScorer);
+
+#ifndef USE_ROCM
+  SetDeviceHelpers_Cuda(GenerationCudaDeviceHelper::ReorderPastState, GenerationCudaDeviceHelper::InitCacheIndir);
+#endif
+
+  SetDeviceHelpers_Gpt(GenerationCudaDeviceHelper::UpdateGptFeeds<float>,
+                       GenerationCudaDeviceHelper::UpdateGptFeeds<MLFloat16>);
+
+  SetDeviceHelpers_EncoderDecoder(GenerationCudaDeviceHelper::UpdateDecoderFeeds<float>,
+                                  GenerationCudaDeviceHelper::UpdateDecoderFeeds<MLFloat16>,
+                                  GenerationCudaDeviceHelper::ExpandBuffer<int32_t>,
+                                  GenerationCudaDeviceHelper::ExpandBuffer<float>,
+                                  GenerationCudaDeviceHelper::ExpandBuffer<MLFloat16>,
+                                  GenerationCudaDeviceHelper::UpdateDecoderCrossQK,
+                                  GenerationCudaDeviceHelper::FinalizeDecoderCrossQK);
+
+  SetConsoleDumper(&g_cuda_dumper);
+
+#ifndef USE_ROCM
+  cuda_device_prop_ = &reinterpret_cast<const CUDAExecutionProvider*>(info.GetExecutionProvider())->GetDeviceProp();
+
+  cuda_device_arch_ = static_cast<const cudaDeviceProp*>(cuda_device_prop_)->major * 100 +
+                      static_cast<const cudaDeviceProp*>(cuda_device_prop_)->minor * 10;
+#endif
+}
+
+Status WhisperBeamSearch::ComputeInternal(OpKernelContext* context) const {
+  return onnxruntime::contrib::transformers::WhisperBeamSearch::Compute(context);
+}
+
+Status WhisperBeamSearch::Compute(OpKernelContext* context) const {
+  auto s = ComputeInternal(context);
+
+  if (s.IsOK()) {
+    auto err = cudaGetLastError();
+    if (err != cudaSuccess) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "CUDA error ", cudaGetErrorName(err), ":", cudaGetErrorString(err));
+    }
+  }
+
+  return s;
+}
+
 }  // namespace cuda
 }  // namespace contrib
 }  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/transformers/beam_search.h b/onnxruntime/contrib_ops/cuda/transformers/beam_search.h
index dda8271e3a6a0..a4370abd8af46 100644
--- a/onnxruntime/contrib_ops/cuda/transformers/beam_search.h
+++ b/onnxruntime/contrib_ops/cuda/transformers/beam_search.h
@@ -21,6 +21,16 @@ class BeamSearch final : public onnxruntime::contrib::transformers::BeamSearch {
   Status ComputeInternal(OpKernelContext* context) const;
 };
 
+class WhisperBeamSearch final : public onnxruntime::contrib::transformers::WhisperBeamSearch {
+ public:
+  WhisperBeamSearch(const OpKernelInfo& info);
+
+  Status Compute(OpKernelContext* context) const override;
+
+ private:
+  Status ComputeInternal(OpKernelContext* context) const;
+};
+
 }  // namespace cuda
 }  // namespace contrib
 }  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/transformers/generation_cuda_impl.cu b/onnxruntime/contrib_ops/cuda/transformers/generation_cuda_impl.cu
index 07a8896210d2c..dbd7fb010462d 100644
--- a/onnxruntime/contrib_ops/cuda/transformers/generation_cuda_impl.cu
+++ b/onnxruntime/contrib_ops/cuda/transformers/generation_cuda_impl.cu
@@ -1315,6 +1315,296 @@ template void BufferExpansionKernelLauncher(const int32_t* input,
                                             int chunk_size,
                                             cudaStream_t stream);
 
+// Support head_size up to 128
+constexpr unsigned int kTileSize = 32;
+constexpr unsigned int kSeqTileSize = 16;
+
+__global__ void ReorderPastStatesKernel(float4* out_buffer,
+                                        const float4* in_buffer,
+                                        int batch_size,
+                                        int num_heads,
+                                        int max_length,
+                                        int chunked_head_size) {
+  __shared__ float4 tile[kSeqTileSize][kTileSize + 1];
+
+  const int b = blockIdx.z;
+  const int n = blockIdx.y;
+  const int s_base = blockIdx.x * kSeqTileSize;
+  const int s = s_base + threadIdx.y;
+  const int base_offset = (b * num_heads + n) * max_length * chunked_head_size;
+
+  if (s < max_length) {
+    const int in_offset = base_offset + s * chunked_head_size + threadIdx.x;
+    tile[threadIdx.y][threadIdx.x] = in_buffer[in_offset];
+  }
+
+  __syncthreads();
+
+  const int tidx = threadIdx.x + threadIdx.y * chunked_head_size;
+  const int tidx_x = tidx % kSeqTileSize;
+  const int tidx_y = tidx / kSeqTileSize;
+
+  const int s2 = s_base + tidx_x;
+
+  if (s2 < max_length) {
+    const int out_offset = base_offset + tidx_y * max_length + s2;
+    out_buffer[out_offset] = tile[tidx_x][tidx_y];
+  }
+}
+
+void ReorderPastStatesKernelLauncher(void* out_buffer,
+                                     const void* in_buffer,
+                                     int batch_size,
+                                     int num_heads,
+                                     int max_length,
+                                     int head_size,
+                                     int chunk_size,
+                                     cudaStream_t stream) {
+  //[B, N, max_length, H2(head_size/chunk_size), equv_chunk_size] -> [B, N, H2(head_size/chunk_size), max_length, equv_chunk_size]
+  const int chunked_head_size = head_size / chunk_size;
+  const dim3 block(chunked_head_size, kSeqTileSize);
+  const dim3 grid((max_length + kSeqTileSize - 1) / kSeqTileSize, num_heads, batch_size);
+  if (chunk_size == 4 || chunk_size == 8) {
+    ReorderPastStatesKernel<<<grid, block, 0, stream>>>(reinterpret_cast<float4*>(out_buffer),
+                                                        reinterpret_cast<const float4*>(in_buffer),
+                                                        batch_size,
+                                                        num_heads,
+                                                        max_length,
+                                                        chunked_head_size);
+  } else {
+    ORT_THROW("ReorderPastStatesKernelLauncher only support float or half");
+  }
+}
+
+template <typename T>
+__global__ void CopyCrossQKSingleDecodeStepKernel(
+    T* target, // shape [batchxbeam, layer_head_pair_count, max_length, frame]
+    T** qk_layer_pointers,
+    int token_index,
+    int num_layers,
+    int num_heads,
+    const int* cross_qk_layer_head_pairs,
+    int frames,
+    int max_length
+) {
+  const int pair = blockIdx.x;
+  const int layer_head_pair_count = gridDim.x;
+  const int bbm = blockIdx.y;
+  cross_qk_layer_head_pairs += (pair * 2);
+  const int layer = *cross_qk_layer_head_pairs;
+  const int head = *(cross_qk_layer_head_pairs + 1);
+
+  target += ((int64_t)bbm * layer_head_pair_count + pair) * max_length * frames + ((int64_t)token_index * frames);
+  T* src = qk_layer_pointers[layer] + ((int64_t)bbm * num_heads + head) * frames;
+
+  for (int tid = threadIdx.x; tid < frames; tid += blockDim.x) {
+    target[tid] = src[tid]; // use vectorized read write in future if needed
+  }
+}
+
+void LaunchCopyCrossQKSingleDecodeStep(
+    cudaStream_t stream,
+    float* cross_qk_buffer_data,
+    float** qk_layer_pointers,
+    int token_index,
+    int batchxbeam,
+    int num_layers,
+    int num_heads,
+    int cross_qk_layer_head_pair_count,
+    const int* cross_qk_layer_head_pairs,
+    int frames,
+    int max_length
+) {
+  dim3 block(512);
+  dim3 grid(cross_qk_layer_head_pair_count, batchxbeam);
+  typedef typename ToCudaType<float>::MappedType CudaT;
+
+  CopyCrossQKSingleDecodeStepKernel<<<grid, block, 0, stream>>>(
+      (CudaT*)cross_qk_buffer_data,
+      (CudaT**)qk_layer_pointers,
+      token_index,
+      num_layers,
+      num_heads,
+      cross_qk_layer_head_pairs,
+      frames,
+      max_length
+  );
+}
+
+
+template <typename T>
+__global__ void CopyDecoderCrossQKAllStepsKernel(
+    int context_decoding_len,
+    int num_beams,
+    int num_return_sequences,
+    int max_length,
+    int frames_of_k,
+    const T* cross_qk_buffer_data, // [batch, num_beams, layer_head_pair_count, max_length, frames]
+    T* cross_qk_output, // [batch, num_return_sequences, layer_head_pair_count, total_decoding_length, frames]
+    const int* cache_indir_data, // [batch, num_beams, max_length]
+    const int32_t* beam_indices
+) {
+  const int pair = blockIdx.y;
+  const int layer_head_pair_count = gridDim.y;
+  const int total_decoding_length = gridDim.x;
+  const int token_decoding_index = blockIdx.x;
+  const int br = blockIdx.z;
+  const int batch = br / num_return_sequences;
+  const int ret_seq_id = br % num_return_sequences;
+
+  // get the real beam index, as the cache_indir_data did not updated in last token
+  const int src_beam = beam_indices[batch * num_beams + ret_seq_id] % num_beams;
+
+  const int64_t offset_in_cache = ((int64_t)batch * num_beams + src_beam) * max_length + token_decoding_index + context_decoding_len;
+  int bm_mapped = ((num_beams <= 1) ? 0: ((token_decoding_index == total_decoding_length - 1) ?  ret_seq_id : cache_indir_data[offset_in_cache]));
+  int bi_src = batch * num_beams + bm_mapped;
+
+  T* target =  cross_qk_output +
+          (((int64_t)br * layer_head_pair_count + (int64_t)pair) * total_decoding_length + token_decoding_index) * frames_of_k;
+  const T* src = cross_qk_buffer_data +
+          ((int64_t)bi_src * layer_head_pair_count * max_length + (int64_t)pair * max_length + token_decoding_index) * frames_of_k;
+  for (int tid = threadIdx.x; tid < frames_of_k; tid += blockDim.x) {
+    target[tid] = src[tid]; // use vectorized read write in future if needed
+  }
+}
+
+void LaunchFinalizeCrossQK(
+    cudaStream_t stream,
+    int iteration_number,
+    int context_decoding_len,
+    int batch_size,
+    int num_beams,
+    int max_length,
+    int cross_qk_layer_head_pair_count,
+    [[maybe_unused]] const int* cross_qk_layer_head_pairs,
+    int frames_of_k,
+    const float* cross_qk_buffer_data,
+    float* cross_qk_output,
+    int num_return_sequences,
+    const int* cache_indir_data,
+    const int32_t* beam_indices
+) {
+  int64_t br = (int64_t)batch_size * num_return_sequences;
+  ORT_ENFORCE(br < 65536L && cross_qk_layer_head_pair_count < 65536);
+  const int total_decoding_length = iteration_number - 1;
+  dim3 block(512);
+  dim3 grid(total_decoding_length, cross_qk_layer_head_pair_count, (unsigned)br);
+  typedef typename ToCudaType<float>::MappedType CudaT;
+
+  CopyDecoderCrossQKAllStepsKernel<<<grid, block, 0, stream>>>(
+    context_decoding_len,
+    num_beams,
+    num_return_sequences,
+    max_length,
+    frames_of_k,
+    (const CudaT*)cross_qk_buffer_data,
+    (CudaT*)cross_qk_output,
+    cache_indir_data,
+    beam_indices);
+}
+
+template <int ElementsPerThreads>
+__global__ void ForceDecodingIdsKernel(
+    float* beam_scores,
+    const int vocab_size,
+    const int32_t* force_ids,
+    int id_len,
+    int step
+) {
+  const int num_beams = gridDim.y;
+  const int beam = blockIdx.y;
+  const int batch = blockIdx.z;
+  beam_scores += (((int64_t)batch * num_beams + beam)* vocab_size); // move to (batch, beam)
+  const int32_t id_wanted = force_ids[((int64_t)batch * id_len) + step];
+  if (id_wanted < 0 || id_wanted >= vocab_size) return;
+
+  const int32_t elements_per_block = (int32_t)blockDim.x * ElementsPerThreads;
+  const int32_t block_start_id = blockIdx.x * elements_per_block;
+
+  int32_t token_id = block_start_id + (int)threadIdx.x;
+  #pragma unroll
+  for (int elem = 0; elem < ElementsPerThreads; elem++) {
+    if (token_id < vocab_size) {
+      beam_scores[token_id] = ((token_id == id_wanted) ? 0.0f : cub::FpLimits<float>::Lowest());
+    }
+    token_id += (int)blockDim.x;
+  }
+}
+
+
+void LaunchForceDecodingIds(
+    float* beam_scores,
+    const int batch_size,
+    const int num_beams,
+    const int vocab_size,
+    const int32_t* force_ids,
+    int id_len,
+    int step,
+    cudaStream_t stream
+) {
+  dim3 blocks(512);
+  constexpr int ElementsPerThreads = 4;
+  unsigned gridx = static_cast<unsigned>((vocab_size + 512 * ElementsPerThreads - 1) / (512 * ElementsPerThreads));
+  dim3 grids(gridx, num_beams, batch_size);
+  ForceDecodingIdsKernel<ElementsPerThreads><<<grids, blocks, 0, stream>>>(
+    beam_scores, vocab_size, force_ids, id_len, step
+  );
+}
+
+template <typename T>
+__global__ void SaveNoSpeechProbsKernel(
+    T* result_no_speech_probs,
+    const float* probs,
+    const int batch_size,
+    const int num_beams,
+    const int vocab_size,
+    const int no_speech_token_id
+) {
+  int b = blockIdx.x * blockDim.x + threadIdx.x;
+  if (b < batch_size) {
+    int64_t src_offset = b * num_beams * vocab_size + no_speech_token_id;
+    result_no_speech_probs[b] = (T)(probs[src_offset]);
+  }
+}
+
+template <typename T>
+void LaunchSaveNoSpeechProbs(
+    T* result_no_speech_probs,      /* [batch]*/
+    const float* probs,             /* [batch, num_beams, vocab_size]*/
+    const int batch_size,
+    const int num_beams,
+    const int vocab_size,
+    const int no_speech_token_id,
+    cudaStream_t stream
+) {
+  int tpb = 256;
+  int bpg = (batch_size + 255) / 256;
+
+  typedef typename ToCudaType<T>::MappedType CudaT;
+  SaveNoSpeechProbsKernel<CudaT><<<bpg, tpb, 0, stream>>>(
+    (CudaT*)result_no_speech_probs, probs, batch_size, num_beams, vocab_size, no_speech_token_id);
+}
+
+template void LaunchSaveNoSpeechProbs<float>(
+    float* result_no_speech_probs,
+    const float* probs,
+    const int batch_size,
+    const int num_beams,
+    const int vocab_size,
+    const int no_speech_token_id,
+    cudaStream_t stream
+);
+
+template void LaunchSaveNoSpeechProbs<MLFloat16>(
+    MLFloat16* result_no_speech_probs,
+    const float* probs,
+    const int batch_size,
+    const int num_beams,
+    const int vocab_size,
+    const int no_speech_token_id,
+    cudaStream_t stream
+);
+
 }  // namespace cuda
 }  // namespace contrib
 }  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/transformers/generation_cuda_impl.h b/onnxruntime/contrib_ops/cuda/transformers/generation_cuda_impl.h
index 8c52f6fd52385..5ed5949196b29 100644
--- a/onnxruntime/contrib_ops/cuda/transformers/generation_cuda_impl.h
+++ b/onnxruntime/contrib_ops/cuda/transformers/generation_cuda_impl.h
@@ -213,6 +213,64 @@ void BufferExpansionKernelLauncher(const T* input,
                                    int chunk_size,
                                    cudaStream_t stream);
 
+void ReorderPastStatesKernelLauncher(void* out_buffer,
+                                     const void* in_buffer,
+                                     int batch_size,
+                                     int num_heads,
+                                     int max_length,
+                                     int head_size,
+                                     int chunk_size,
+                                     cudaStream_t stream);
+
+void LaunchCopyCrossQKSingleDecodeStep(
+    cudaStream_t stream,
+    float* cross_qk_buffer_data,
+    float** qk_layer_pointers,
+    int token_index,
+    int batchxbeam,
+    int num_layers,
+    int num_heads,
+    int cross_qk_layer_head_pair_count,
+    const int* cross_qk_layer_head_pairs,
+    int frames,
+    int max_length);
+
+void LaunchFinalizeCrossQK(
+    cudaStream_t stream,
+    int iteration_number,
+    int context_decoding_len,
+    int batch_size,
+    int num_beams,
+    int max_length,
+    int cross_qk_layer_head_pair_count,
+    const int* cross_qk_layer_head_pairs,
+    int frames_of_k,
+    const float* cross_qk_buffer_data,
+    float* cross_qk_output,
+    int num_return_sequences,
+    const int* cache_indir_data,
+    const int32_t* beam_indices);
+
+void LaunchForceDecodingIds(
+    float* beam_scores,
+    const int batch_size,
+    const int num_beams,
+    const int vocab_size,
+    const int32_t* force_ids,
+    int id_len,
+    int step,
+    cudaStream_t stream);
+
+template <typename T>
+void LaunchSaveNoSpeechProbs(
+    T* result_no_speech_probs, /* [batch]*/
+    const float* probs,        /* [batch, num_beams, vocab_size]*/
+    const int batch_size,
+    const int num_beams,
+    const int vocab_size,
+    const int no_speech_token_id,
+    cudaStream_t stream);
+
 }  // namespace cuda
 }  // namespace contrib
 }  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/transformers/generation_device_helper.cc b/onnxruntime/contrib_ops/cuda/transformers/generation_device_helper.cc
index e4de33499c6ca..380d561bbb23c 100644
--- a/onnxruntime/contrib_ops/cuda/transformers/generation_device_helper.cc
+++ b/onnxruntime/contrib_ops/cuda/transformers/generation_device_helper.cc
@@ -13,6 +13,8 @@
 #include <cuda_runtime.h>
 #include "contrib_ops/cuda/transformers/generation_cuda_impl.h"
 #include "contrib_ops/cuda/transformers/dump_cuda_tensor.h"
+#include "contrib_ops/cpu/transformers/logits_processor.h"
+#include "contrib_ops/cpu/transformers/generation_shared.h"
 #include "contrib_ops/cpu/transformers/subgraph_t5_decoder.h"
 #include "contrib_ops/cpu/transformers/subgraph_gpt.h"
 #include "contrib_ops/cuda/transformers/beam_search_topk.h"
@@ -56,19 +58,23 @@ namespace GenerationCudaDeviceHelper {
 // It might be better to forcefully require the same type since cast node generates
 // extra overhead.
 Status ReorderPastState(
-    const void* cuda_device_prop,
+    const void*,
     Tensor& past_state,
     Tensor& past_state_staging,
     Stream* stream) {
   ORT_ENFORCE(stream);
   cudaStream_t cuda_stream = reinterpret_cast<cudaStream_t>(stream->GetHandle());
-  cublasHandle_t cublas_handle = static_cast<CudaStream*>(stream)->cublas_handle_;
 
   const auto& past_state_shape = past_state.Shape();
 
   const auto& past_state_dims = past_state_shape.GetDims();
   const bool packed_past = past_state_dims.size() == 5;
 
+  size_t batch_size = packed_past ? past_state_dims[1] : past_state_dims[0];
+  size_t num_heads = packed_past ? past_state_dims[2] : past_state_dims[1];
+  size_t max_length = packed_past ? past_state_dims[3] : past_state_dims[2];
+  size_t head_size = packed_past ? past_state_dims[4] : past_state_dims[3];
+
   // Copy the 'K' values into the temp staging buffer
   size_t past_state_size = packed_past ? past_state.SizeInBytes() / 2 : past_state.SizeInBytes();
   void* past_state_staging_buffer = past_state_staging.MutableDataRaw();
@@ -79,27 +85,16 @@ Status ReorderPastState(
   // [B, N, head_size / x, max_length, x], where x = 16 / sizeof(T)
   int64_t chunk_size = static_cast<int64_t>(16 / past_state.DataType()->Size());
 
-  std::vector<size_t> permutation_vector = {0, 1, 3, 2, 4};
-  gsl::span<size_t> permutation(permutation_vector.data(), 5);
-
-  // "Fake" the shapes of the input and output tensors of the Transpose operation to suit our need
-  size_t offset = packed_past ? 1 : 0;
-  TensorShape transpose_input_shape_override = {past_state_shape[offset],
-                                                past_state_shape[offset + 1],
-                                                past_state_shape[offset + 2],
-                                                past_state_shape[offset + 3] / chunk_size,
-                                                chunk_size};
-
-  TensorShape transpose_output_shape_override = {past_state_shape[offset], past_state_shape[offset + 1],
-                                                 past_state_shape[offset + 3] / chunk_size, past_state_shape[offset + 2],
-                                                 chunk_size};
-
-  // TODO(hasesh): Explore perf tuning for this Transpose operation
-  return onnxruntime::cuda::Transpose::DoTranspose(*static_cast<const cudaDeviceProp*>(cuda_device_prop), cuda_stream,
-                                                   cublas_handle, permutation,
-                                                   past_state_staging, past_state,
-                                                   &transpose_input_shape_override,
-                                                   &transpose_output_shape_override);
+  cuda::ReorderPastStatesKernelLauncher(past_state.MutableDataRaw(),
+                                        past_state_staging_buffer,
+                                        static_cast<int>(batch_size),
+                                        static_cast<int>(num_heads),
+                                        static_cast<int>(max_length),
+                                        static_cast<int>(head_size),
+                                        static_cast<int>(chunk_size),
+                                        cuda_stream);
+
+  return Status::OK();
 }
 
 Status InitCacheIndir(Tensor& cache_indir, Stream* stream) {
@@ -210,7 +205,7 @@ Status AddToFeeds(Stream* ort_stream,
   ORT_ENFORCE(total_bytes > 0);
 
   cudaStream_t stream = ort_stream ? static_cast<cudaStream_t>(ort_stream->GetHandle()) : nullptr;
-  auto pinned_buffer = IAllocator::MakeUniquePtr<void>(host_allocator, total_bytes);
+  auto pinned_buffer = IAllocator::MakeUniquePtr<void>(host_allocator, total_bytes, false, ort_stream);
   char* pinned_data = static_cast<char*>(pinned_buffer.get());
   // Copy tensors to one pinned memory buffer (so that we only need copy to GPU once)
   char* destination = pinned_data;
@@ -426,11 +421,21 @@ Status ProcessLogits(const OrtValue& logits,                                 //
   dumper->Print("next_token_scores after softmax", next_token_scores.data(), batch_size, num_beams, vocab_size);
 #endif
 
+  const bool is_whisper_model = (parameters->model_type == onnxruntime::contrib::transformers::IGenerationParameters::kModelTypeWhisper);
+  if (step == 1 && is_whisper_model && parameters->no_speech_probs) {
+    cuda::LaunchSaveNoSpeechProbs<T>(
+        (T*)parameters->no_speech_probs, Y_data, batch_size, num_beams, vocab_size, parameters->no_speech_token, cuda_stream);
+  }
+
+  // NOTE: currently we treat extra decoding ids are same
+  int extra_decoding_len = static_cast<int>(parameters->extra_decoding_ids.size() / parameters->batch_size);
+  const bool need_handle_extra_decoding_ids = is_whisper_model && (!parameters->extra_decoding_ids.empty()) && (extra_decoding_len >= step);
+
   cuda::LaunchLogitsProcessKernel<float>(
       next_token_scores.data(),
       parameters->vocab_mask.data(),
-      step > 1 ? nullptr : parameters->prefix_vocab_mask.data(),  // prefix vocab mask is applied to first step only.
-      nullptr,                                                    // parameters->presence_mask.data(),
+      (step > extra_decoding_len + 1) ? nullptr : parameters->prefix_vocab_mask.data(),  // prefix vocab mask is applied to first step only.
+      nullptr,                                                                           // parameters->presence_mask.data(),
       parameters->presence_penalty,
       parameters->temperature,
       parameters->batch_size,
@@ -445,6 +450,50 @@ Status ProcessLogits(const OrtValue& logits,                                 //
       parameters->no_repeat_ngram_size,
       cuda_stream);
 
+  // Whisper time stamp generation.
+  // TODO: implement it on GPU
+  bool gen_timestamp = is_whisper_model &&
+                       (parameters->logits_processor == onnxruntime::contrib::transformers::IGenerationParameters::kLogitsProcessorTypeWhisper);
+  if (gen_timestamp) {
+    // Copy next token scores to cpu memory, copy Sequences to cpu
+    std::vector<float> cpu_next_token_scores(next_token_scores.size());
+    gsl::span<float> cpu_next_token_scores_span(cpu_next_token_scores.data(), cpu_next_token_scores.size());
+    CUDA_RETURN_IF_ERROR(cudaMemcpyAsync(cpu_next_token_scores.data(),
+                                         next_token_scores.data(),
+                                         next_token_scores.size_bytes(),
+                                         cudaMemcpyDeviceToHost,
+                                         cuda_stream));
+    CUDA_RETURN_IF_ERROR(cudaMemcpyAsync(const_cast<int32_t*>(sequences->GetSequence(0).data()),
+                                         sequences->GetCurrentDeviceSequences().data(),
+                                         sequences->GetSequence(0).size_bytes() * batch_beam_size,
+                                         cudaMemcpyDeviceToHost,
+                                         cuda_stream));
+    constexpr int max_initial_timestamp_index = 50;
+    onnxruntime::contrib::transformers::TimestampLogitsProcessor<float> time_logit_processor(parameters->eos_token_id, max_initial_timestamp_index);
+    onnxruntime::contrib::transformers::NextTokenScores<float> next_token_scores_timestamp({cpu_next_token_scores_span, batch_beam_size, vocab_size});
+
+    CUDA_RETURN_IF_ERROR(cudaStreamSynchronize(cuda_stream));
+    time_logit_processor.Process(sequences, next_token_scores_timestamp);
+    CUDA_RETURN_IF_ERROR(cudaMemcpyAsync(next_token_scores.data(),
+                                         cpu_next_token_scores.data(),
+                                         next_token_scores.size_bytes(),
+                                         cudaMemcpyHostToDevice,
+                                         cuda_stream));
+    CUDA_RETURN_IF_ERROR(cudaStreamSynchronize(cuda_stream));
+  }
+
+  if (need_handle_extra_decoding_ids && !parameters->extra_decoding_ids.empty()) {
+    cuda::LaunchForceDecodingIds(
+        next_token_scores.data(),
+        parameters->batch_size,
+        parameters->num_beams,
+        parameters->vocab_size,
+        parameters->extra_decoding_ids.data(),
+        static_cast<int>(parameters->extra_decoding_ids.size() / parameters->batch_size),
+        step - 1,
+        cuda_stream);
+  }
+
 #ifdef DEBUG_GENERATION
   dumper->Print("next_token_scores after logits process", next_token_scores.data(), batch_size, num_beams, vocab_size);
 #endif
@@ -807,13 +856,11 @@ Status GreedySearchProcessLogits(
 
   // Sequences generated by beam scorer is currently stored in CPU.
   // Copy sequences to device only when repetition penalty or no repeat ngram is used in kernel
-  BufferUniquePtr sequences_buffer;
+  IAllocatorUniquePtr<void> sequences_buffer;
   int current_sequence_length = sequences->GetSequenceLength();
   if (parameters->repetition_penalty != 1.0f) {
     size_t bytes = SafeInt<size_t>(sizeof(int32_t)) * batch_beam_size * parameters->max_length;
-    void* data = allocator->Alloc(bytes);
-    BufferUniquePtr temp_buffer(data, BufferDeleter(allocator));
-    sequences_buffer = std::move(temp_buffer);
+    sequences_buffer = IAllocator::MakeUniquePtr<void>(allocator, bytes, false, stream);
     CUDA_RETURN_IF_ERROR(cudaMemcpyAsync(sequences_buffer.get(), sequences->GetSequence(0).data(), bytes,
                                          cudaMemcpyHostToDevice, cuda_stream));
   }
@@ -1196,14 +1243,14 @@ Status UpdateDecoderFeeds(
 
   if (past_present_share_buffer) {
     // Update past sequence length input
-    const ptrdiff_t past_sequence_length_idx = 2 * (static_cast<ptrdiff_t>(last_outputs.size()) - t5_decoder_first_present_output_idx) + t5_decoder_first_past_input_idx;
+    const ptrdiff_t past_sequence_length_idx = 2 * num_present_tensors + t5_decoder_first_past_input_idx;
     *(next_inputs[past_sequence_length_idx].GetMutable<Tensor>()->MutableData<int32_t>()) = current_length - 1;
 
     // Update beam search specific input for DecoderMaskedSelfAttention (cache indirection) if present
 
     // If the last input is not `past_sequence_length`, then the beam search specific inputs
     // for `DecoderMaskedSelfAttention` is present
-    if (need_cache_indir) {
+    if (need_cache_indir && num_beams > 1) {
       ORT_ENFORCE(!beam_indices_gpu.empty(), "Beam indices must be present on CUDA while using DecoderMaskedMultiHeadAttention with BeamSearch");
 
       // The cache indirection feed comes 2 feeds after the `past_sequence_length` feed
@@ -1528,6 +1575,93 @@ template Status ExpandBuffer<MLFloat16>(
     OrtValue& expanded,
     bool only_copy_shape,
     int max_sequence_length);
+
+Status UpdateDecoderCrossQK(
+    int iteration_number,
+    Stream* stream,
+    OrtValue* cross_qks,
+    IAllocatorUniquePtr<float*>& qk_layer_pointers,
+    int num_layers,
+    int cross_qk_layer_head_pair_count,
+    const int* cross_qk_layer_head_pairs,
+    float* cross_qk_buffer_data,
+    int max_length,
+    AllocatorPtr allocator) {
+  cudaStream_t cuda_stream = stream ? static_cast<cudaStream_t>(stream->GetHandle()) : nullptr;
+
+  if (qk_layer_pointers.get() == nullptr) {
+    // Put all the qk pointers into gpu, as they did not change in following decoding steps
+    // also this help to use single kernel to process each step
+    qk_layer_pointers = IAllocator::MakeUniquePtr<float*>(allocator, static_cast<size_t>(num_layers), false, stream);
+    std::vector<float*> qk_layer_data(num_layers, nullptr);
+    for (int layer = 0; layer < num_layers; layer++) {
+      qk_layer_data[layer] = cross_qks[layer].GetMutable<Tensor>()->MutableData<float>();
+    }
+    CUDA_RETURN_IF_ERROR(cudaMemcpyAsync((void*)qk_layer_pointers.get(), qk_layer_data.data(), sizeof(qk_layer_data[0]) * num_layers,
+                                         cudaMemcpyHostToDevice, cuda_stream));
+  }
+
+  auto cross_qk_layer_shape = cross_qks[0].GetMutable<Tensor>()->Shape();
+  int64_t batchxbeam = cross_qk_layer_shape[0];
+  int64_t num_heads = cross_qk_layer_shape[1];
+  int64_t frames = cross_qk_layer_shape[3];
+
+  cuda::LaunchCopyCrossQKSingleDecodeStep(
+      cuda_stream,
+      cross_qk_buffer_data,
+      qk_layer_pointers.get(),
+      iteration_number - 2,
+      static_cast<int>(batchxbeam),
+      num_layers,
+      static_cast<int>(num_heads),
+      cross_qk_layer_head_pair_count,
+      cross_qk_layer_head_pairs,
+      static_cast<int>(frames),
+      max_length);
+
+  CUDA_RETURN_IF_ERROR(cudaGetLastError());
+
+  return Status::OK();
+}
+
+Status FinalizeDecoderCrossQK(
+    Stream* stream,
+    int iteration_number,
+    int context_decoding_len,
+    int batch_size,
+    int num_beams,
+    int max_length,
+    int cross_qk_layer_head_pair_count,
+    const int* cross_qk_layer_head_pairs,
+    int frames_of_k,
+    const float* cross_qk_buffer_data,
+    float* cross_qk_output,
+    int num_return_sequences,
+    const int* cache_indir_data,
+    gsl::span<const int32_t> beam_indices_gpu) {
+  cudaStream_t cuda_stream = stream ? static_cast<cudaStream_t>(stream->GetHandle()) : nullptr;
+
+  cuda::LaunchFinalizeCrossQK(
+      cuda_stream,
+      iteration_number,
+      context_decoding_len,
+      batch_size,
+      num_beams,
+      max_length,
+      cross_qk_layer_head_pair_count,
+      cross_qk_layer_head_pairs,
+      frames_of_k,
+      cross_qk_buffer_data,
+      cross_qk_output,
+      num_return_sequences,
+      cache_indir_data,
+      beam_indices_gpu.data());
+
+  CUDA_RETURN_IF_ERROR(cudaGetLastError());
+
+  return Status::OK();
+}
+
 }  // namespace GenerationCudaDeviceHelper
 }  // namespace contrib
 }  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/transformers/generation_device_helper.h b/onnxruntime/contrib_ops/cuda/transformers/generation_device_helper.h
index f5f062d7a101b..7a718eb9f66c1 100644
--- a/onnxruntime/contrib_ops/cuda/transformers/generation_device_helper.h
+++ b/onnxruntime/contrib_ops/cuda/transformers/generation_device_helper.h
@@ -150,6 +150,34 @@ Status ExpandBuffer(
     bool only_copy_shape,
     int max_sequence_length = 0);
 
+Status UpdateDecoderCrossQK(
+    int iteration_number,
+    Stream* stream,
+    OrtValue* cross_qks,
+    IAllocatorUniquePtr<float*>& qk_layer_pointers,
+    int num_layers,
+    int cross_qk_layer_head_pair_count,
+    const int* cross_qk_layer_head_pairs,
+    float* cross_qk_buffer_data,
+    int max_length,
+    AllocatorPtr allocator);
+
+Status FinalizeDecoderCrossQK(
+    Stream* stream,
+    int iteration_number,
+    int context_decoding_len,
+    int batch_size,
+    int num_beams,
+    int max_length,
+    int cross_qk_layer_head_pair_count,
+    const int* cross_qk_layer_head_pairs,
+    int frames_of_k,
+    const float* cross_qk_buffer_data,
+    float* cross_qk_output,
+    int num_return_sequences,
+    const int* cache_indir_data,
+    gsl::span<const int32_t> beam_indices);
+
 }  // namespace GenerationCudaDeviceHelper
 }  // namespace contrib
 }  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cuda/transformers/greedy_search.cc b/onnxruntime/contrib_ops/cuda/transformers/greedy_search.cc
index d9014ca8f5c24..812ab0b1bcae6 100644
--- a/onnxruntime/contrib_ops/cuda/transformers/greedy_search.cc
+++ b/onnxruntime/contrib_ops/cuda/transformers/greedy_search.cc
@@ -48,10 +48,12 @@ GreedySearch::GreedySearch(const OpKernelInfo& info)
 
   SetConsoleDumper(&g_cuda_dumper_greedysearch);
 
+#ifndef USE_ROCM
   cuda_device_prop_ = &reinterpret_cast<const CUDAExecutionProvider*>(info.GetExecutionProvider())->GetDeviceProp();
 
   cuda_device_arch_ = static_cast<const cudaDeviceProp*>(cuda_device_prop_)->major * 100 +
                       static_cast<const cudaDeviceProp*>(cuda_device_prop_)->minor * 10;
+#endif
 }
 
 Status GreedySearch::ComputeInternal(OpKernelContext* context) const {
diff --git a/onnxruntime/contrib_ops/js/bert/attention.cc b/onnxruntime/contrib_ops/js/bert/attention.cc
new file mode 100644
index 0000000000000..723ff00aa815e
--- /dev/null
+++ b/onnxruntime/contrib_ops/js/bert/attention.cc
@@ -0,0 +1,24 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "attention.h"
+#include "core/providers/js/js_data_types.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace js {
+
+using onnxruntime::js::JsepSupportedFloatTypes;
+
+ONNX_OPERATOR_KERNEL_EX(
+    Attention,
+    kMSDomain,
+    1,
+    kJsExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .TypeConstraint("T", JsepSupportedFloatTypes()),
+    Attention);
+
+}  // namespace js
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/js/bert/attention.h b/onnxruntime/contrib_ops/js/bert/attention.h
new file mode 100644
index 0000000000000..0fa823befa9b2
--- /dev/null
+++ b/onnxruntime/contrib_ops/js/bert/attention.h
@@ -0,0 +1,47 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "contrib_ops/cpu/bert/attention_base.h"
+#include "core/providers/js/js_kernel.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace js {
+
+using onnxruntime::contrib::AttentionBase;
+using onnxruntime::js::JsKernel;
+
+class Attention : public JsKernel, AttentionBase {
+ public:
+  explicit Attention(const OpKernelInfo& info) : JsKernel(info), AttentionBase(info, false) {
+    std::vector<int32_t> qkv_sizes(qkv_hidden_sizes_.size());
+    if (qkv_hidden_sizes_.size() > 0) {
+      std::transform(qkv_hidden_sizes_.begin(), qkv_hidden_sizes_.end(), qkv_sizes.begin(),
+                     [](int64_t sz) { return gsl::narrow_cast<int32_t>(sz); });
+    }
+
+    JSEP_INIT_KERNEL_ATTRIBUTE(Attention, ({
+                                 "numHeads" : $1,
+                                 "isUnidirectional" : $2,
+                                 "maskFilterValue" : $3,
+                                 "scale" : $4,
+                                 "doRotary" : $5,
+                                 "qkvHiddenSizes" : $6 ? (Array.from(HEAP32.subarray(Number($7), Number($7) + $6))) : [],
+                                 "pastPresentShareBuffer" : !!$8,
+                               }),
+                               static_cast<int32_t>(num_heads_),
+                               static_cast<int32_t>(is_unidirectional_),
+                               static_cast<int32_t>(mask_filter_value_),
+                               static_cast<int32_t>(scale_),
+                               static_cast<int32_t>(do_rotary_),
+                               static_cast<int32_t>(qkv_hidden_sizes_.size()),
+                               reinterpret_cast<uintptr_t>((qkv_sizes.size() > 0) ? qkv_sizes.data() : nullptr) >> 2,
+                               static_cast<int32_t>(past_present_share_buffer_));
+  }
+};
+
+}  // namespace js
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/js/bert/multi_head_attention.cc b/onnxruntime/contrib_ops/js/bert/multi_head_attention.cc
new file mode 100644
index 0000000000000..c43f8b7f18465
--- /dev/null
+++ b/onnxruntime/contrib_ops/js/bert/multi_head_attention.cc
@@ -0,0 +1,24 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "multi_head_attention.h"
+#include "core/providers/js/js_data_types.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace js {
+
+using onnxruntime::js::JsepSupportedFloatTypes;
+
+ONNX_OPERATOR_KERNEL_EX(
+    MultiHeadAttention,
+    kMSDomain,
+    1,
+    kJsExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .TypeConstraint("T", JsepSupportedFloatTypes()),
+    MultiHeadAttention);
+
+}  // namespace js
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/js/bert/multi_head_attention.h b/onnxruntime/contrib_ops/js/bert/multi_head_attention.h
new file mode 100644
index 0000000000000..6c63a2ffed4b2
--- /dev/null
+++ b/onnxruntime/contrib_ops/js/bert/multi_head_attention.h
@@ -0,0 +1,36 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "contrib_ops/cpu/bert/attention_base.h"
+#include "core/providers/js/js_kernel.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace js {
+
+using onnxruntime::contrib::AttentionBase;
+using onnxruntime::js::JsKernel;
+
+class MultiHeadAttention : public JsKernel, AttentionBase {
+ public:
+  explicit MultiHeadAttention(const OpKernelInfo& info) : JsKernel(info), AttentionBase(info, false) {
+    JSEP_INIT_KERNEL_ATTRIBUTE(MultiHeadAttention, ({
+                                 "numHeads" : $1,
+                                 "isUnidirectional" : $2,
+                                 "maskFilterValue" : $3,
+                                 "scale" : $4,
+                                 "doRotary" : $5,
+                               }),
+                               static_cast<int32_t>(num_heads_),
+                               static_cast<int32_t>(is_unidirectional_),
+                               static_cast<int32_t>(mask_filter_value_),
+                               static_cast<int32_t>(scale_),
+                               static_cast<int32_t>(do_rotary_));
+  }
+};
+
+}  // namespace js
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/js/bias_add.cc b/onnxruntime/contrib_ops/js/bias_add.cc
new file mode 100644
index 0000000000000..9e70dead6a5da
--- /dev/null
+++ b/onnxruntime/contrib_ops/js/bias_add.cc
@@ -0,0 +1,23 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "bias_add.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace js {
+
+using onnxruntime::js::JsepSupportedFloatTypes;
+
+ONNX_OPERATOR_KERNEL_EX(
+    BiasAdd,
+    kMSDomain,
+    1,
+    kJsExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .TypeConstraint("T", JsepSupportedFloatTypes()),
+    BiasAdd);
+
+}  // namespace js
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/js/bias_add.h b/onnxruntime/contrib_ops/js/bias_add.h
new file mode 100644
index 0000000000000..62a4df9bcdf34
--- /dev/null
+++ b/onnxruntime/contrib_ops/js/bias_add.h
@@ -0,0 +1,17 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "core/providers/js/js_kernel.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace js {
+
+using onnxruntime::js::JsKernel;
+JSEP_KERNEL_IMPL(BiasAdd, BiasAdd);
+
+}  // namespace js
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/js/bias_split_gelu.cc b/onnxruntime/contrib_ops/js/bias_split_gelu.cc
new file mode 100644
index 0000000000000..e16aa4367d1c7
--- /dev/null
+++ b/onnxruntime/contrib_ops/js/bias_split_gelu.cc
@@ -0,0 +1,23 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "bias_split_gelu.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace js {
+
+using onnxruntime::js::JsepSupportedFloatTypes;
+
+ONNX_OPERATOR_KERNEL_EX(
+    BiasSplitGelu,
+    kMSDomain,
+    1,
+    kJsExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .TypeConstraint("T", JsepSupportedFloatTypes()),
+    BiasSplitGelu);
+
+}  // namespace js
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/js/bias_split_gelu.h b/onnxruntime/contrib_ops/js/bias_split_gelu.h
new file mode 100644
index 0000000000000..3b3b41c0ca1f3
--- /dev/null
+++ b/onnxruntime/contrib_ops/js/bias_split_gelu.h
@@ -0,0 +1,17 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "core/providers/js/js_kernel.h"
+
+namespace onnxruntime {
+namespace contrib {
+namespace js {
+
+using onnxruntime::js::JsKernel;
+JSEP_KERNEL_IMPL(BiasSplitGelu, BiasSplitGelu);
+
+}  // namespace js
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/js/fused_conv.cc b/onnxruntime/contrib_ops/js/fused_conv.cc
new file mode 100644
index 0000000000000..76402f0681976
--- /dev/null
+++ b/onnxruntime/contrib_ops/js/fused_conv.cc
@@ -0,0 +1,20 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/providers/js/operators/conv.h"
+namespace onnxruntime {
+namespace contrib {
+namespace js {
+
+ONNX_OPERATOR_KERNEL_EX(
+    FusedConv,
+    kMSDomain,
+    1,
+    kJsExecutionProvider,
+    KernelDefBuilder()
+        .TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
+    onnxruntime::js::Conv<false, true>);
+
+}  // namespace js
+}  // namespace contrib
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/js/js_contrib_kernels.cc b/onnxruntime/contrib_ops/js/js_contrib_kernels.cc
index 0bf6a4a168e68..498a9f5679eb5 100644
--- a/onnxruntime/contrib_ops/js/js_contrib_kernels.cc
+++ b/onnxruntime/contrib_ops/js/js_contrib_kernels.cc
@@ -7,8 +7,13 @@ namespace onnxruntime {
 namespace contrib {
 namespace js {
 
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSDomain, 1, Attention);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSDomain, 1, Gelu);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSDomain, 1, MultiHeadAttention);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSDomain, 1, BiasSplitGelu);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSDomain, 1, BiasAdd);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSDomain, 1, SkipLayerNormalization);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSDomain, 1, FusedConv);
 
 template <>
 KernelCreateInfo BuildKernelCreateInfo<void>() {
@@ -18,8 +23,14 @@ KernelCreateInfo BuildKernelCreateInfo<void>() {
 
 Status RegisterJsContribKernels(KernelRegistry& kernel_registry) {
   static const BuildKernelCreateInfoFn function_table[] = {
+      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSDomain, 1, Attention)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSDomain, 1, Gelu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSDomain, 1, SkipLayerNormalization)>};
+      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSDomain, 1, MultiHeadAttention)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSDomain, 1, BiasAdd)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSDomain, 1, BiasSplitGelu)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSDomain, 1,
+                                                            SkipLayerNormalization)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSDomain, 1, FusedConv)>};
 
   for (auto& function_table_entry : function_table) {
     KernelCreateInfo info = function_table_entry();
diff --git a/onnxruntime/contrib_ops/js/skip_layer_norm.cc b/onnxruntime/contrib_ops/js/skip_layer_norm.cc
index ee315f9b31e3b..f949326e1dc95 100644
--- a/onnxruntime/contrib_ops/js/skip_layer_norm.cc
+++ b/onnxruntime/contrib_ops/js/skip_layer_norm.cc
@@ -7,14 +7,16 @@ namespace onnxruntime {
 namespace contrib {
 namespace js {
 
+using onnxruntime::js::JsepSupportedFloatTypes;
+
 ONNX_OPERATOR_KERNEL_EX(
     SkipLayerNormalization,
     kMSDomain,
     1,
     kJsExecutionProvider,
     (*KernelDefBuilder::Create())
-        .TypeConstraint("T", DataTypeImpl::GetTensorType<float>())
-        .TypeConstraint("U", DataTypeImpl::GetTensorType<float>()),
+        .TypeConstraint("T", JsepSupportedFloatTypes())
+        .TypeConstraint("U", JsepSupportedFloatTypes()),
     SkipLayerNorm);
 
 }  // namespace js
diff --git a/onnxruntime/contrib_ops/rocm/bert/batched_gemm_softmax_gemm_permute_pipelines.cuh b/onnxruntime/contrib_ops/rocm/bert/batched_gemm_softmax_gemm_permute_pipelines.cuh
index 246b66078537a..78983ac95e672 100644
--- a/onnxruntime/contrib_ops/rocm/bert/batched_gemm_softmax_gemm_permute_pipelines.cuh
+++ b/onnxruntime/contrib_ops/rocm/bert/batched_gemm_softmax_gemm_permute_pipelines.cuh
@@ -838,7 +838,7 @@ auto GetCKGemmSoftmaxGemmPermuteTypeStringAndOps() {
           Nop{});
 
       TUNABLE_OP_RETURN_UNSUPPORTED_ARGUMENT_IF(!impl->IsSupportedArgument(arg.get()),
-                                                impl->GetTypeString(), " does not support ", params->Signature());
+                                                impl->GetTypeString(), " does not support the params");
 
       if constexpr (USE_MASK) {
         ORT_RETURN_IF_ERROR(GemmSoftmaxGemmPermuteTunableOp<T>::LaunchConvertToFilledMaskValue(params));
diff --git a/onnxruntime/contrib_ops/rocm/bert/gemm_fast_gelu_ck.cuh b/onnxruntime/contrib_ops/rocm/bert/gemm_fast_gelu_ck.cuh
index cbf24ee2f5487..ea9040aa7875f 100644
--- a/onnxruntime/contrib_ops/rocm/bert/gemm_fast_gelu_ck.cuh
+++ b/onnxruntime/contrib_ops/rocm/bert/gemm_fast_gelu_ck.cuh
@@ -58,7 +58,7 @@ auto GetCKGemmAddFastGeluTypeStringAndOps() {
       auto zero = ToHipType<T>::FromFloat(0.0f);
       TUNABLE_OP_RETURN_UNSUPPORTED_ARGUMENT_IF(
           params->alpha != one || params->beta != zero || params->bias == nullptr,
-          impl->GetTypeString(), " only supports alpha == 1 and beta == 0 and bias != nullptr", params->Signature());
+          impl->GetTypeString(), " only supports alpha == 1 and beta == 0 and bias != nullptr");
 
       auto nop = Nop{};
       auto addfastgelu = AddFastGelu{};
@@ -67,7 +67,7 @@ auto GetCKGemmAddFastGeluTypeStringAndOps() {
                                            params->lda, params->ldb, std::array<ck::index_t, 1>{0}, params->ldc,
                                            nop, nop, addfastgelu);
       TUNABLE_OP_RETURN_UNSUPPORTED_ARGUMENT_IF(!impl->IsSupportedArgument(arg.get()),
-                                                impl->GetTypeString(), " does not support ", params->Signature());
+                                                impl->GetTypeString(), " does not support the params");
       invoker->Run(arg.get(), StreamConfig{params->StreamHandle()});
       return Status::OK();
     };
@@ -95,7 +95,7 @@ auto GetCKGemmFastGeluTypeStringAndOps() {
 
       TUNABLE_OP_RETURN_UNSUPPORTED_ARGUMENT_IF(
           params->alpha != one || params->beta != zero || params->bias != nullptr,
-          impl->GetTypeString(), " only supports alpha == 1 and beta == 0 and bias == nullptr", params->Signature());
+          impl->GetTypeString(), " only supports alpha == 1 and beta == 0 and bias == nullptr");
 
       auto nop = Nop{};
       auto fastgelu = FastGelu{};
@@ -108,7 +108,7 @@ auto GetCKGemmFastGeluTypeStringAndOps() {
                                            params->ldc,
                                            nop, nop, fastgelu);
       TUNABLE_OP_RETURN_UNSUPPORTED_ARGUMENT_IF(!impl->IsSupportedArgument(arg.get()),
-                                                impl->GetTypeString(), " does not support ", params->Signature());
+                                                impl->GetTypeString(), " does not support the params");
       invoker->Run(arg.get(), StreamConfig{params->StreamHandle()});
       return Status::OK();
     };
diff --git a/onnxruntime/contrib_ops/rocm/diffusion/group_norm.cc b/onnxruntime/contrib_ops/rocm/diffusion/group_norm.cc
index c665da89af36c..e82e15a304f4c 100644
--- a/onnxruntime/contrib_ops/rocm/diffusion/group_norm.cc
+++ b/onnxruntime/contrib_ops/rocm/diffusion/group_norm.cc
@@ -72,6 +72,12 @@ GroupNorm::GroupNorm(const OpKernelInfo& op_info) : RocmKernel(op_info) {
   channels_last_ = (op_info.GetAttrOrDefault<int64_t>("channels_last", static_cast<int64_t>(1)) != 0);
 }
 
+Status GroupNorm::PrePack(const Tensor& /*tensor*/, int /*input_idx*/, AllocatorPtr /*alloc*/,
+                          bool& is_packed, PrePackedWeights* /*prepacked_weights*/) {
+  is_packed = false;
+  return Status::OK();
+}
+
 Status GroupNorm::ComputeInternal(OpKernelContext* context) const {
   const Tensor* input = context->Input<Tensor>(0);
   const Tensor* gamma = context->Input<Tensor>(1);
diff --git a/onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck.cuh b/onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck.cuh
index e87813fb19956..fb7091592c16e 100644
--- a/onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck.cuh
+++ b/onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck.cuh
@@ -34,17 +34,17 @@ constexpr int NumReduceDim = 3;
 
 template <typename T, typename AccT, bool WithSwish>
 auto GetCKGroupNormNHWCTypeStringAndOps() {
-  using InDataType = typename CKDataTypeAdaptor<T>::type;
-  using OutDataType = typename CKDataTypeAdaptor<T>::type;
-  using AccDataType = typename CKDataTypeAdaptor<AccT>::type;
+  using XDataType = typename CKDataTypeAdaptor<T>::type;
+  using YDataType = typename CKDataTypeAdaptor<T>::type;
+  using SaveMeanInvStdDataType = typename CKDataTypeAdaptor<AccT>::type;
   using GammaDataType = float;
   using BetaDataType = float;
 
   using Activation = std::conditional_t<WithSwish, Swish, Pass>;
 
   std::vector<std::pair<std::string, onnxruntime::rocm::tunable::Op<GroupNormNHWCParams<T>>>> ret;
-  for (auto&& impl : internal::GetDeviceGroupNormInstances<InDataType, GammaDataType, BetaDataType, AccDataType,
-                                                           OutDataType, Activation, Rank, NumReduceDim>()) {
+  for (auto&& impl : internal::GetDeviceGroupNormInstances<XDataType, GammaDataType, BetaDataType, YDataType,
+                                                           SaveMeanInvStdDataType, Activation, Rank, NumReduceDim>()) {
     std::string swish_suffix = WithSwish ? "_Swish" : "_Pass";
     auto type_string = onnxruntime::MakeString(impl->GetTypeString()) + swish_suffix;
     auto invoker = impl->MakeInvokerPointer();
@@ -69,6 +69,8 @@ auto GetCKGroupNormNHWCTypeStringAndOps() {
                                            gamma_beta_strides,  // gammaStrides
                                            gamma_beta_strides,  // betaStrides
                                            in_out_strides,      // yStrides
+                                           {0, 0},              // saveMeanStrides
+                                           {0, 0},              // saveInvStdStrides
                                            reduce_dims,         // reduceDims
                                            params->epsilon,
                                            params->src,
@@ -79,7 +81,7 @@ auto GetCKGroupNormNHWCTypeStringAndOps() {
                                            nullptr,
                                            activation);
       TUNABLE_OP_RETURN_UNSUPPORTED_ARGUMENT_IF(!impl->IsSupportedArgument(arg.get()),
-                                                impl->GetTypeString(), " does not support ", params->Signature());
+                                                impl->GetTypeString(), " does not support the params");
       invoker->Run(arg.get(), StreamConfig{params->StreamHandle()});
       return Status::OK();
     };
diff --git a/onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh b/onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh
index 88443478cf521..19b081881dcec 100644
--- a/onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh
+++ b/onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh
@@ -6,8 +6,8 @@
 
 #ifdef USE_COMPOSABLE_KERNEL
 #include "ck/ck.hpp"
-#include "ck/tensor_operation/gpu/device/device_normalization.hpp"
-#include "ck/tensor_operation/gpu/device/impl/device_normalization_impl.hpp"
+#include "ck/tensor_operation/gpu/device/device_normalization_fwd.hpp"
+#include "ck/tensor_operation/gpu/device/impl/device_normalization_fwd_impl.hpp"
 #include "ck/utility/data_type.hpp"
 
 namespace onnxruntime {
@@ -21,102 +21,104 @@ using F32 = float;
 using Swish = ck::tensor_operation::element_wise::Swish;
 using Pass = ck::tensor_operation::element_wise::PassThrough;
 
-using ck::tensor_operation::device::DeviceNormalization;      // the interface
-using ck::tensor_operation::device::DeviceNormalizationImpl;  // the implementation
+using ck::tensor_operation::device::DeviceNormalizationFwd;      // the interface
+using ck::tensor_operation::device::DeviceNormalizationFwdImpl;  // the implementation
+
+// See https://github.com/ROCmSoftwarePlatform/composable_kernel/blob/1fefd82ed8/library/src/tensor_operation_instance/gpu/normalization_fwd/normalization_fwd_instance_common.hpp
 
 template <typename OutElementwise, ck::index_t Rank, ck::index_t Reduce>
 using device_normalization_f32_instances = std::tuple<
     // clang-format off
-        // XDataType, GammaDataType, BetaDataType, ComputeDataType, YDataType, OutElementwise, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XYSrcVectorDim, XSrcVectorSize, GammaSrcVectorSize, BetaSrcVectorSize, YDstVectorSize>
-        DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 1, 1, 1, 1, 1, 1, 1, 1>,   // irregular size
-        DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 1, 1, 1, 1, 1, 1, 1, 1>,   // irregular size
-        DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 512, 1, 512, 1, 1, 1, 1, 1, 1, 1, 1, 1>,   // irregular size
-        DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 1024, 1, 1024, 1, 1, 1, 1, 1, 1, 1, 1, 1>, // irregular size
-        DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 2, 1, 2, 1, 2, 1, 2, 2>,   // irregular size
-        DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 4, 1, 4, 1, 4, 1, 4, 4>,
-        DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 8, 1, 4, 1, 4, 1, 4, 4>,
-        DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 16, 1, 4, 1, 4, 1, 4, 4>,
-        DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 32, 1, 4, 1, 4, 1, 4, 4>,
-        DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 4, 1, 4, 1, 4, 1, 4, 4>,
-        DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 8, 1, 4, 1, 4, 1, 4, 4>,
-        DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 16, 1, 4, 1, 4, 1, 4, 4>,
-        DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 2, 16, 1, 4, 1, 4, 1, 4, 4>,
-        DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 32, 1, 4, 1, 4, 1, 4, 4>,
-        DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 512, 1, 512, 1, 4, 1, 4, 1, 4, 1, 4, 4>,
-        DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 512, 1, 512, 1, 8, 1, 4, 1, 4, 1, 4, 4>,
-        DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 512, 1, 512, 2, 8, 1, 4, 1, 4, 1, 4, 4>,
-        DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 1024, 1, 1024, 1, 4, 1, 4, 1, 4, 1, 4, 4>,
-        DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 1024, 1, 1024, 1, 8, 1, 4, 1, 4, 1, 4, 4>
+        // XDataType, GammaDataType, BetaDataType, ComputeDataType, YDataType, SaveMeanInvStdDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XYSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorSize, SaveMeanInvStdScalarPerVector>
+        DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>,   // irregular size
+        DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>,   // irregular size
+        DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 512, 1, 512, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>,   // irregular size
+        DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 1024, 1, 1024, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>, // irregular size
+        DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1>,   // irregular size
+        DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 4, 1, 4, 1, 4, 1, 4, 4, 1>,
+        DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 8, 1, 4, 1, 4, 1, 4, 4, 1>,
+        DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 16, 1, 4, 1, 4, 1, 4, 4, 1>,
+        DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 32, 1, 4, 1, 4, 1, 4, 4, 1>,
+        DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 4, 1, 4, 1, 4, 1, 4, 4, 1>,
+        DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 8, 1, 4, 1, 4, 1, 4, 4, 1>,
+        DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 16, 1, 4, 1, 4, 1, 4, 4, 1>,
+        DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 2, 16, 1, 4, 1, 4, 1, 4, 4, 2>,
+        DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 32, 1, 4, 1, 4, 1, 4, 4, 1>,
+        DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 512, 1, 512, 1, 4, 1, 4, 1, 4, 1, 4, 4, 1>,
+        DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 512, 1, 512, 1, 8, 1, 4, 1, 4, 1, 4, 4, 1>,
+        DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 512, 1, 512, 2, 8, 1, 4, 1, 4, 1, 4, 4, 2>,
+        DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 1024, 1, 1024, 1, 4, 1, 4, 1, 4, 1, 4, 4, 1>,
+        DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 1024, 1, 1024, 1, 8, 1, 4, 1, 4, 1, 4, 4, 1>
     // clang-format on
     >;
 
 template <typename OutElementwise, ck::index_t Rank, ck::index_t Reduce>
-using device_normalization_f16_instances = std::tuple<
+using device_normalization_f16_instances =
     // clang-format off
-        // XDataType, GammaDataType, BetaDataType, ComputeDataType, YDataType, OutElementwise, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XYSrcVectorDim, XSrcVectorSize, GammaSrcVectorSize, BetaSrcVectorSize, YDstVectorSize>
-        DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 1, 1, 1, 1, 1, 1, 1, 1>,    // irregular size
-        DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 1, 1, 1, 1, 1, 1, 1, 1>,    // irregular size
-        DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 512, 1, 512, 1, 1, 1, 1, 1, 1, 1, 1, 1>,    // irregular size
-        DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 1024, 1, 1024, 1, 1, 1, 1, 1, 1, 1, 1, 1>,  // irregular size
-        DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 2, 1, 2, 1, 2, 1, 2, 2>,    // irregular size
-        DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 4, 1, 4, 1, 4, 1, 4, 4>,
-        DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 8, 1, 4, 1, 4, 1, 4, 4>,
-        DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 16, 1, 4, 1, 4, 1, 4, 4>,
-        DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 32, 1, 4, 1, 4, 1, 4, 4>,
-        DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 4, 1, 4, 1, 4, 1, 4, 4>,
-        DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 8, 1, 4, 1, 4, 1, 4, 4>,
-        DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 16, 1, 4, 1, 4, 1, 4, 4>,
-        DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 256, 1, 256, 2, 16, 1, 4, 1, 4, 1, 4, 4>,
-        DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 32, 1, 4, 1, 4, 1, 4, 4>,
-        DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 512, 1, 512, 1, 4, 1, 4, 1, 4, 1, 4, 4>,
-        DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 512, 1, 512, 1, 8, 1, 4, 1, 4, 1, 4, 4>,
-        DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 512, 1, 512, 2, 8, 1, 4, 1, 4, 1, 4, 4>,
-        DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 1024, 1, 1024, 1, 4, 1, 4, 1, 4, 1, 4, 4>,
-        DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 1024, 1, 1024, 1, 8, 1, 4, 1, 4, 1, 4, 4>
+    std::tuple <
+        // XDataType, GammaDataType, BetaDataType, ComputeDataType, YDataType, SaveMeanInvStdDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XYSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorSize, SaveMeanInvStdScalarPerVector>
+        DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>,   // irregular size
+        DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>,   // irregular size
+        DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 512, 1, 512, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>,   // irregular size
+        DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 1024, 1, 1024, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>, // irregular size
+        DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1>,   // irregular size
+        DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 4, 1, 4, 1, 4, 1, 4, 4, 1>,   // irregular size
+        DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 64, 1, 64, 1, 8, 1, 8, 1, 8, 1, 8, 8, 1>,
+        DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 8, 1, 8, 1, 8, 1, 8, 8, 1>,
+        DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 16, 1, 8, 1, 8, 1, 8, 8, 1>,
+        DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 32, 1, 8, 1, 8, 1, 8, 8, 1>,
+        DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 8, 1, 8, 1, 8, 1, 8, 8, 1>,
+        DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 16, 1, 8, 1, 8, 1, 8, 8, 1>,
+        DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 2, 16, 1, 8, 1, 8, 1, 8, 8, 2>,
+        DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 32, 1, 8, 1, 8, 1, 8, 8, 1>,
+        DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 512, 1, 512, 1, 8, 1, 8, 1, 8, 1, 8, 8, 1>,
+        DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 512, 1, 512, 1, 16, 1, 8, 1, 8, 1, 8, 8, 1>,
+        DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 1024, 1, 1024, 1, 8, 1, 8, 1, 8, 1, 8, 8, 1>,
+        DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 1024, 1, 1024, 1, 16, 1, 8, 1, 8, 1, 8, 8, 1>
     // clang-format on
     >;
 
 // Use this function to get implementation
-template <typename InDataType,
+template <typename XDataType,
           typename GammaDataType,
           typename BetaDataType,
-          typename AccDataType,
-          typename OutDataType,
+          typename YDataType,
+          typename SaveMeanInvStdDataType,
           typename YElementwiseOperation,
           ck::index_t Rank,
           ck::index_t NumReduceDim>
-std::vector<std::unique_ptr<DeviceNormalization<InDataType,
-                                                GammaDataType,
-                                                BetaDataType,
-                                                AccDataType,
-                                                OutDataType,
-                                                YElementwiseOperation,
-                                                Rank,
-                                                NumReduceDim>>>
+std::vector<std::unique_ptr<DeviceNormalizationFwd<XDataType,
+                                                   GammaDataType,
+                                                   BetaDataType,
+                                                   YDataType,
+                                                   SaveMeanInvStdDataType,
+                                                   YElementwiseOperation,
+                                                   Rank,
+                                                   NumReduceDim>>>
 GetDeviceGroupNormInstances() {
   return {};
 }
 
 template <>
-std::vector<std::unique_ptr<DeviceNormalization<
-    F16, F32, F32, F32, F16, Swish, 5, 3>>>
+std::vector<std::unique_ptr<DeviceNormalizationFwd<
+    F16, F32, F32, F16, F32, Swish, 5, 3>>>
 GetDeviceGroupNormInstances<
-    F16, F32, F32, F32, F16, Swish, 5, 3>();
+    F16, F32, F32, F16, F32, Swish, 5, 3>();
 
 template <>
-std::vector<std::unique_ptr<DeviceNormalization<
-    F16, F32, F32, F32, F16, Pass, 5, 3>>>
+std::vector<std::unique_ptr<DeviceNormalizationFwd<
+    F16, F32, F32, F16, F32, Pass, 5, 3>>>
 GetDeviceGroupNormInstances<
-    F16, F32, F32, F32, F16, Pass, 5, 3>();
+    F16, F32, F32, F16, F32, Pass, 5, 3>();
 
 template <>
-std::vector<std::unique_ptr<DeviceNormalization<
+std::vector<std::unique_ptr<DeviceNormalizationFwd<
     F32, F32, F32, F32, F32, Swish, 5, 3>>>
 GetDeviceGroupNormInstances<
     F32, F32, F32, F32, F32, Swish, 5, 3>();
 
 template <>
-std::vector<std::unique_ptr<DeviceNormalization<
+std::vector<std::unique_ptr<DeviceNormalizationFwd<
     F32, F32, F32, F32, F32, Pass, 5, 3>>>
 GetDeviceGroupNormInstances<
     F32, F32, F32, F32, F32, Pass, 5, 3>();
diff --git a/onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl_fp16.cu b/onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl_fp16.cu
index d1dd78e3452da..6718f29268031 100644
--- a/onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl_fp16.cu
+++ b/onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl_fp16.cu
@@ -4,7 +4,6 @@
 #ifdef USE_COMPOSABLE_KERNEL
 #include "contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh"
 #include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
-#include "ck/tensor_operation/gpu/device/impl/device_normalization_impl.hpp"
 
 namespace onnxruntime {
 namespace contrib {
@@ -12,9 +11,9 @@ namespace rocm {
 namespace internal {
 
 template <>
-std::vector<std::unique_ptr<DeviceNormalization<F16, F32, F32, F32, F16, Swish, 5, 3>>>
-GetDeviceGroupNormInstances<F16, F32, F32, F32, F16, Swish, 5, 3>() {
-  std::vector<std::unique_ptr<DeviceNormalization<F16, F32, F32, F32, F16, Swish, 5, 3>>> instances;
+std::vector<std::unique_ptr<DeviceNormalizationFwd<F16, F32, F32, F16, F32, Swish, 5, 3>>>
+GetDeviceGroupNormInstances<F16, F32, F32, F16, F32, Swish, 5, 3>() {
+  std::vector<std::unique_ptr<DeviceNormalizationFwd<F16, F32, F32, F16, F32, Swish, 5, 3>>> instances;
   ck::tensor_operation::device::instance::add_device_operation_instances(
       instances,
       device_normalization_f16_instances<Swish, 5, 3>{});
@@ -23,9 +22,9 @@ GetDeviceGroupNormInstances<F16, F32, F32, F32, F16, Swish, 5, 3>() {
 }
 
 template <>
-std::vector<std::unique_ptr<DeviceNormalization<F16, F32, F32, F32, F16, Pass, 5, 3>>>
-GetDeviceGroupNormInstances<F16, F32, F32, F32, F16, Pass, 5, 3>() {
-  std::vector<std::unique_ptr<DeviceNormalization<F16, F32, F32, F32, F16, Pass, 5, 3>>> instances;
+std::vector<std::unique_ptr<DeviceNormalizationFwd<F16, F32, F32, F16, F32, Pass, 5, 3>>>
+GetDeviceGroupNormInstances<F16, F32, F32, F16, F32, Pass, 5, 3>() {
+  std::vector<std::unique_ptr<DeviceNormalizationFwd<F16, F32, F32, F16, F32, Pass, 5, 3>>> instances;
   ck::tensor_operation::device::instance::add_device_operation_instances(
       instances,
       device_normalization_f16_instances<Pass, 5, 3>{});
diff --git a/onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl_fp32.cu b/onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl_fp32.cu
index 97baed34a341d..9b0ccab17b4c1 100644
--- a/onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl_fp32.cu
+++ b/onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl_fp32.cu
@@ -4,7 +4,6 @@
 #ifdef USE_COMPOSABLE_KERNEL
 #include "contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh"
 #include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
-#include "ck/tensor_operation/gpu/device/impl/device_normalization_impl.hpp"
 
 namespace onnxruntime {
 namespace contrib {
@@ -12,9 +11,9 @@ namespace rocm {
 namespace internal {
 
 template <>
-std::vector<std::unique_ptr<DeviceNormalization<F32, F32, F32, F32, F32, Swish, 5, 3>>>
+std::vector<std::unique_ptr<DeviceNormalizationFwd<F32, F32, F32, F32, F32, Swish, 5, 3>>>
 GetDeviceGroupNormInstances<F32, F32, F32, F32, F32, Swish, 5, 3>() {
-  std::vector<std::unique_ptr<DeviceNormalization<F32, F32, F32, F32, F32, Swish, 5, 3>>> instances;
+  std::vector<std::unique_ptr<DeviceNormalizationFwd<F32, F32, F32, F32, F32, Swish, 5, 3>>> instances;
   ck::tensor_operation::device::instance::add_device_operation_instances(
       instances,
       device_normalization_f32_instances<Swish, 5, 3>{});
@@ -23,9 +22,9 @@ GetDeviceGroupNormInstances<F32, F32, F32, F32, F32, Swish, 5, 3>() {
 }
 
 template <>
-std::vector<std::unique_ptr<DeviceNormalization<F32, F32, F32, F32, F32, Pass, 5, 3>>>
+std::vector<std::unique_ptr<DeviceNormalizationFwd<F32, F32, F32, F32, F32, Pass, 5, 3>>>
 GetDeviceGroupNormInstances<F32, F32, F32, F32, F32, Pass, 5, 3>() {
-  std::vector<std::unique_ptr<DeviceNormalization<F32, F32, F32, F32, F32, Pass, 5, 3>>> instances;
+  std::vector<std::unique_ptr<DeviceNormalizationFwd<F32, F32, F32, F32, F32, Pass, 5, 3>>> instances;
   ck::tensor_operation::device::instance::add_device_operation_instances(
       instances,
       device_normalization_f32_instances<Pass, 5, 3>{});
diff --git a/onnxruntime/contrib_ops/rocm/diffusion/group_norm_triton.cuh b/onnxruntime/contrib_ops/rocm/diffusion/group_norm_triton.cuh
index 526d220d4be24..b7b9441ac997d 100644
--- a/onnxruntime/contrib_ops/rocm/diffusion/group_norm_triton.cuh
+++ b/onnxruntime/contrib_ops/rocm/diffusion/group_norm_triton.cuh
@@ -77,7 +77,7 @@ auto GetTritonGroupNormNHWCTypeStringAndOps() {
           params->epsilon};
 
       // Grid dim is (batch_count, groups, 1)
-      return LaunchTritonKernel(params->stream, i, params->n, params->groups, 1, &args, sizeof(args));
+      return LaunchTritonKernel(params->StreamHandle(), i, params->n, params->groups, 1, &args, sizeof(args));
     };
     ret.emplace_back(std::make_pair(metadata->name, std::move(impl)));
   }
diff --git a/onnxruntime/contrib_ops/rocm/rocm_contrib_kernels.cc b/onnxruntime/contrib_ops/rocm/rocm_contrib_kernels.cc
index 7bc0f99081169..0f8fe68de717a 100644
--- a/onnxruntime/contrib_ops/rocm/rocm_contrib_kernels.cc
+++ b/onnxruntime/contrib_ops/rocm/rocm_contrib_kernels.cc
@@ -29,6 +29,14 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, FusedMatMul);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, double, FusedMatMul);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, FusedMatMul);
+// class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, RelativePositionBias);
+// class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, RelativePositionBias);
+// class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, GatedRelativePositionBias);
+// class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, GatedRelativePositionBias);
+// class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, RemovePadding);
+// class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, RemovePadding);
+// class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, RestorePadding);
+// class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, RestorePadding);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, Rfft);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, double, Rfft);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, Rfft);
@@ -52,6 +60,10 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, MLFloat16, Affine);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, Attention);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, Attention);
+// class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, PackedAttention);
+// class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, PackedAttention);
+// class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, PackedMultiHeadAttention);
+// class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, PackedMultiHeadAttention);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, BeamSearch);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, ConvTransposeWithDynamicPads);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, float, Crop);
@@ -61,12 +73,11 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, MultiHeadAttention);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, DecoderAttention);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, DecoderAttention);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, DecoderMaskedMultiHeadAttention);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, DecoderMaskedMultiHeadAttention);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, int32_t, DynamicSlice);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, int64_t, DynamicSlice);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, EmbedLayerNormalization);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, EmbedLayerNormalization);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, GreedySearch);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, GroupNorm);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, NhwcConv);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, NhwcConv);
@@ -113,6 +124,17 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, BFloat16, FastGelu);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, BFloat16, TransposeMatMul);  // backward compatibility
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, BFloat16, FusedMatMul);
+// class ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, QOrderedMatMul);
+// class ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, QOrderedLayerNormalization);
+// class ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, QOrderedGelu);
+// class ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, QuantizeWithOrder);
+// class ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, DequantizeWithOrder);
+// class ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, QOrderedAttention);
+// class ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, QOrderedLongformerAttention);
+// class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, DecoderMaskedSelfAttention);
+// class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, DecoderMaskedSelfAttention);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, DecoderMaskedMultiHeadAttention);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, DecoderMaskedMultiHeadAttention);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, GemmFastGelu);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, GemmFastGelu);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, BFloat16, GemmFastGelu);
@@ -139,6 +161,7 @@ KernelCreateInfo BuildKernelCreateInfo<void>() {
   return info;
 }
 
+// clang-format off
 Status RegisterRocmContribKernels(KernelRegistry& kernel_registry) {
   static const BuildKernelCreateInfoFn function_table[] = {
     BuildKernelCreateInfo<void>,  // default entry to avoid the list become empty after ops-reducing
@@ -162,70 +185,73 @@ Status RegisterRocmContribKernels(KernelRegistry& kernel_registry) {
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, FusedMatMul)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, double, FusedMatMul)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, FusedMatMul)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, Rfft)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, double, Rfft)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, Rfft)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, Irfft)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, double, Irfft)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, Irfft)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, ComplexMul)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, ComplexMul)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, ComplexMulConj)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, ComplexMulConj)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain,
-                                                          1, NGramRepeatBlock)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, RelativePositionBias)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, RelativePositionBias)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, GatedRelativePositionBias)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, GatedRelativePositionBias)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, RemovePadding)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, RemovePadding)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, RestorePadding)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, RestorePadding)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, Rfft)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, double, Rfft)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, Rfft)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, Irfft)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, double, Irfft)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, Irfft)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, ComplexMul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, ComplexMul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, ComplexMulConj)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, ComplexMulConj)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, NGramRepeatBlock)>,
 
     // These ops were experimental ops in onnx domain which have been removed now. We add them here as
     // contrib ops to maintain backward compatibility
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, float, Affine)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, double, Affine)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, MLFloat16, Affine)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, float, Affine)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, double, Affine)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, MLFloat16, Affine)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, Attention)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, Attention)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, PackedAttention)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, PackedAttention)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, PackedMultiHeadAttention)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, PackedMultiHeadAttention)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, BeamSearch)>,
-
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, ConvTransposeWithDynamicPads)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, float, Crop)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, double, Crop)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, MLFloat16, Crop)>,
-
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, ConvTransposeWithDynamicPads)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, float, Crop)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, double, Crop)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, MLFloat16, Crop)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, MultiHeadAttention)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, MultiHeadAttention)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain,
-                                                                1, float, DecoderAttention)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain,
-                                                                1, MLFloat16, DecoderAttention)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, DecoderMaskedMultiHeadAttention)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, DecoderMaskedMultiHeadAttention)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, int32_t, DynamicSlice)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, int64_t, DynamicSlice)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, DecoderAttention)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, DecoderAttention)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, int32_t, DynamicSlice)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, int64_t, DynamicSlice)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, EmbedLayerNormalization)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, EmbedLayerNormalization)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, GreedySearch)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, GroupNorm)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, NhwcConv)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, NhwcConv)>,
-
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, float, ImageScaler)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, double, ImageScaler)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, MLFloat16, ImageScaler)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain,
-                                                                1, float, LongformerAttention)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain,
-                                                                1, MLFloat16, LongformerAttention)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, float, ParametricSoftplus)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, double, ParametricSoftplus)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, MLFloat16, ParametricSoftplus)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, float, ImageScaler)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, double, ImageScaler)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, MLFloat16, ImageScaler)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, LongformerAttention)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, LongformerAttention)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, float, ParametricSoftplus)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, double, ParametricSoftplus)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, MLFloat16, ParametricSoftplus)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, Sampling)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, float, ScaledTanh)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, double, ScaledTanh)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, MLFloat16, ScaledTanh)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, float, ScaledTanh)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, double, ScaledTanh)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, MLFloat16, ScaledTanh)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, SkipLayerNormalization)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, SkipLayerNormalization)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, SkipSimplifiedLayerNormalization)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, SkipSimplifiedLayerNormalization)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, float, ThresholdedRelu)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, double, ThresholdedRelu)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, MLFloat16, ThresholdedRelu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, float, ThresholdedRelu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, double, ThresholdedRelu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, MLFloat16, ThresholdedRelu)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 16, float_float_float, LayerNormalization)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 16, double_double_double, LayerNormalization)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 16, MLFloat16_float_MLFloat16, LayerNormalization)>,
@@ -238,7 +264,6 @@ Status RegisterRocmContribKernels(KernelRegistry& kernel_registry) {
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, float_float_MLFloat16, SimplifiedLayerNormalization)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, MLFloat16_float_float, SimplifiedLayerNormalization)>,
     // BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, Inverse)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, Trilu)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, BiasSoftmax)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, BiasDropout)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, BitmaskDropout)>,
@@ -249,16 +274,25 @@ Status RegisterRocmContribKernels(KernelRegistry& kernel_registry) {
     // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, int8_t_MLFloat16, DequantizeLinear)>,
     // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, uint8_t_MLFloat16, DequantizeLinear)>,
     // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float_int8_t, QAttention)>,
-    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16_int8_t, QAttention)>
-
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16_int8_t, QAttention)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, Trilu)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, BFloat16, FastGelu)>,
     // TransposedMatMul is still here for backward compatibility
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, BFloat16, TransposeMatMul)>,  // backward compatibility
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, BFloat16, FusedMatMul)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain,
-                                                                1, float, FusedConv)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain,
-                                                                1, MLFloat16, FusedConv)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, FusedConv)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, FusedConv)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, QOrderedMatMul)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, QOrderedLayerNormalization)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, QOrderedGelu)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, QuantizeWithOrder)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, DequantizeWithOrder)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, QOrderedAttention)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, QOrderedLongformerAttention)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, DecoderMaskedSelfAttention)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, DecoderMaskedSelfAttention)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, DecoderMaskedMultiHeadAttention)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, DecoderMaskedMultiHeadAttention)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, GemmFastGelu)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, GemmFastGelu)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, BFloat16, GemmFastGelu)>,
@@ -278,6 +312,7 @@ Status RegisterRocmContribKernels(KernelRegistry& kernel_registry) {
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, AllGather)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, AllToAll)>,
 #endif
+
   };
 
   for (auto& function_table_entry : function_table) {
@@ -289,6 +324,7 @@ Status RegisterRocmContribKernels(KernelRegistry& kernel_registry) {
 
   return Status::OK();
 }
+// clang-format on
 
 }  // namespace rocm
 }  // namespace contrib
diff --git a/onnxruntime/core/common/cpuid_info.cc b/onnxruntime/core/common/cpuid_info.cc
index a23409292bb74..655d5014f3d60 100644
--- a/onnxruntime/core/common/cpuid_info.cc
+++ b/onnxruntime/core/common/cpuid_info.cc
@@ -22,6 +22,14 @@
 #define HWCAP_ASIMDDP (1 << 20)
 #endif
 
+#ifndef HWCAP2_I8MM
+#define HWCAP2_I8MM (1 << 13)
+#endif
+
+#ifndef HWCAP2_SVEI8MM
+#define HWCAP2_SVEI8MM (1 << 9)
+#endif
+
 #endif  // ARM
 
 #endif  // Linux
@@ -135,38 +143,41 @@ void CPUIDInfo::ArmLinuxInit() {
     LOGS_DEFAULT(WARNING) << "Failed to init pytorch cpuinfo library, may cause CPU EP performance degradation due to undetected CPU features.";
     return;
   }
+  is_hybrid_ = cpuinfo_get_uarchs_count() > 1;
+  has_arm_neon_dot_ = cpuinfo_has_arm_neon_dot();
+  has_fp16_ = cpuinfo_has_arm_neon_fp16_arith();
+  has_arm_neon_i8mm_ = cpuinfo_has_arm_i8mm();
+  has_arm_sve_i8mm_ = cpuinfo_has_arm_sve() && cpuinfo_has_arm_i8mm();
+
+  const uint32_t core_cnt = cpuinfo_get_cores_count();
+  core_uarchs_.resize(core_cnt, cpuinfo_uarch_unknown);
+  is_armv8_narrow_ld_.resize(core_cnt, false);
+  for (uint32_t c = 0; c < core_cnt; c++) {
+    const struct cpuinfo_processor* proc = cpuinfo_get_processor(c);
+    if (proc == nullptr) {
+      continue;
+    }
+    const struct cpuinfo_core* corep = proc->core;
+    if (corep == nullptr) {
+      continue;
+    }
+    auto coreid = proc->linux_id;
+    auto uarch = corep->uarch;
+    core_uarchs_[coreid] = uarch;
+    if (uarch == cpuinfo_uarch_cortex_a53 || uarch == cpuinfo_uarch_cortex_a55r0 ||
+        uarch == cpuinfo_uarch_cortex_a55) {
+      is_armv8_narrow_ld_[coreid] = true;
+    }
+  }
 #else
   pytorch_cpuinfo_init_ = false;
-#endif
+  has_arm_neon_dot_ = ((getauxval(AT_HWCAP) & HWCAP_ASIMDDP) != 0);
+  has_fp16_ |= has_arm_neon_dot_;
 
-  if (pytorch_cpuinfo_init_) {
-    is_hybrid_ = cpuinfo_get_uarchs_count() > 1;
-    has_arm_neon_dot_ = cpuinfo_has_arm_neon_dot();
-    has_fp16_ = cpuinfo_has_arm_neon_fp16_arith();
-    const uint32_t core_cnt = cpuinfo_get_cores_count();
-    core_uarchs_.resize(core_cnt, cpuinfo_uarch_unknown);
-    is_armv8_narrow_ld_.resize(core_cnt, false);
-    for (uint32_t c = 0; c < core_cnt; c++) {
-      const struct cpuinfo_processor* proc = cpuinfo_get_processor(c);
-      if (proc == nullptr) {
-        continue;
-      }
-      const struct cpuinfo_core* corep = proc->core;
-      if (corep == nullptr) {
-        continue;
-      }
-      auto coreid = proc->linux_id;
-      auto uarch = corep->uarch;
-      core_uarchs_[coreid] = uarch;
-      if (uarch == cpuinfo_uarch_cortex_a53 || uarch == cpuinfo_uarch_cortex_a55r0 ||
-          uarch == cpuinfo_uarch_cortex_a55) {
-        is_armv8_narrow_ld_[coreid] = true;
-      }
-    }
-  } else {
-    has_arm_neon_dot_ = ((getauxval(AT_HWCAP) & HWCAP_ASIMDDP) != 0);
-    has_fp16_ |= has_arm_neon_dot_;
-  }
+  has_arm_neon_i8mm_ = ((getauxval(AT_HWCAP2) & HWCAP2_I8MM) != 0);
+  has_arm_sve_i8mm_ = ((getauxval(AT_HWCAP2) & HWCAP2_SVEI8MM) != 0);
+
+#endif
 }
 
 #elif defined(_WIN32)
@@ -260,6 +271,9 @@ void CPUIDInfo::ArmWindowsInit() {
 
   has_arm_neon_dot_ = (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) != 0);
   has_fp16_ |= has_arm_neon_dot_;
+  /* TODO: implement them when hw+sw is available for testing these features */
+  has_arm_neon_i8mm_ = false;
+  has_arm_sve_i8mm_ = false;
 }
 
 #endif /* (arm or arm64) and windows */
diff --git a/onnxruntime/core/common/cpuid_info.h b/onnxruntime/core/common/cpuid_info.h
index 386db347c669d..a15c75104b83a 100644
--- a/onnxruntime/core/common/cpuid_info.h
+++ b/onnxruntime/core/common/cpuid_info.h
@@ -28,6 +28,8 @@ class CPUIDInfo {
 
   // ARM
   bool HasArmNeonDot() const { return has_arm_neon_dot_; }
+  bool HasArmNeon_I8MM() const { return has_arm_neon_i8mm_; }
+  bool HasArmSVE_I8MM() const { return has_arm_sve_i8mm_; }
 
   uint32_t GetCurrentCoreIdx() const;
 
@@ -121,6 +123,8 @@ class CPUIDInfo {
 
   bool has_arm_neon_dot_{false};
   bool has_fp16_{false};
+  bool has_arm_neon_i8mm_{false};
+  bool has_arm_sve_i8mm_{false};
 
 #ifdef CPUIDINFO_ARCH_X86
 
diff --git a/onnxruntime/core/common/cpuid_uarch.cc b/onnxruntime/core/common/cpuid_uarch.cc
index 52baad739441b..16634b2bc8744 100644
--- a/onnxruntime/core/common/cpuid_uarch.cc
+++ b/onnxruntime/core/common/cpuid_uarch.cc
@@ -3,7 +3,8 @@
 
 #include "core/common/cpuid_uarch.h"
 
-#include "core/common/logging/logging.h"
+#include <iostream>  // For std::cerr.
+                     // Writing to stderr instead of logging because logger may not be initialized yet.
 
 namespace onnxruntime {
 
@@ -137,7 +138,7 @@ void decodeMIDR(
               break;
               // #endif /* ARM */
             default:
-              LOGS_DEFAULT(WARNING) << "unknown ARM CPU part 0x" << std::hex << midr_get_part(midr) << " ignored";
+              std::cerr << "unknown ARM CPU part 0x" << std::hex << midr_get_part(midr) << " ignored\n";
           }
       }
       break;
@@ -156,7 +157,7 @@ void decodeMIDR(
           break;
           // #endif
         default:
-          LOGS_DEFAULT(WARNING) << "unknown Broadcom CPU part 0x" << std::hex << midr_get_part(midr) << " ignored";
+          std::cerr << "unknown Broadcom CPU part 0x" << std::hex << midr_get_part(midr) << " ignored\n";
       }
       break;
       // #if (defined(_M_ARM64) || defined(__aarch64__)) && !defined(__ANDROID__)
@@ -172,7 +173,7 @@ void decodeMIDR(
           *uarch = cpuinfo_uarch_thunderx2;
           break;
         default:
-          LOGS_DEFAULT(WARNING) << "unknown Cavium CPU part 0x" << std::hex << midr_get_part(midr) << " ignored";
+          std::cerr << "unknown Cavium CPU part 0x" << std::hex << midr_get_part(midr) << " ignored\n";
       }
       break;
       // #endif
@@ -187,7 +188,7 @@ void decodeMIDR(
           *uarch = cpuinfo_uarch_cortex_a76;
           break;
         default:
-          LOGS_DEFAULT(WARNING) << "unknown Huawei CPU part 0x" << std::hex << midr_get_part(midr) << " ignored";
+          std::cerr << "unknown Huawei CPU part 0x" << std::hex << midr_get_part(midr) << " ignored\n";
       }
       break;
       // #if defined(_M_ARM) || defined(__arm__)
@@ -199,7 +200,7 @@ void decodeMIDR(
           *uarch = cpuinfo_uarch_xscale;
           break;
         default:
-          LOGS_DEFAULT(WARNING) << "unknown Intel CPU part 0x" << std::hex << midr_get_part(midr) << " ignored";
+          std::cerr << "unknown Intel CPU part 0x" << std::hex << midr_get_part(midr) << " ignored\n";
       }
       break;
       // #endif /* ARM */
@@ -215,7 +216,7 @@ void decodeMIDR(
           *uarch = cpuinfo_uarch_carmel;
           break;
         default:
-          LOGS_DEFAULT(WARNING) << "unknown Nvidia CPU part 0x" << std::hex << midr_get_part(midr) << " ignored";
+          std::cerr << "unknown Nvidia CPU part 0x" << std::hex << midr_get_part(midr) << " ignored\n";
       }
       break;
 #if !defined(__ANDROID__)
@@ -225,7 +226,7 @@ void decodeMIDR(
           *uarch = cpuinfo_uarch_xgene;
           break;
         default:
-          LOGS_DEFAULT(WARNING) << "unknown Applied Micro CPU part 0x" << std::hex << midr_get_part(midr) << " ignored";
+          std::cerr << "unknown Applied Micro CPU part 0x" << std::hex << midr_get_part(midr) << " ignored\n";
       }
       break;
 #endif
@@ -297,7 +298,7 @@ void decodeMIDR(
           break;
           // #endif /* ARM64 && !defined(__ANDROID__) */
         default:
-          LOGS_DEFAULT(WARNING) << "unknown Qualcomm CPU part 0x" << std::hex << midr_get_part(midr) << " ignored";
+          std::cerr << "unknown Qualcomm CPU part 0x" << std::hex << midr_get_part(midr) << " ignored\n";
       }
       break;
     case 'S':
@@ -343,8 +344,9 @@ void decodeMIDR(
           *uarch = cpuinfo_uarch_exynos_m5;
           break;
         default:
-          LOGS_DEFAULT(WARNING) << "unknown Samsung CPU variant 0x"
-                                << std::hex << midr_get_variant(midr) << " part 0x" << std::hex << midr_get_part(midr) << " ignored";
+          std::cerr << "unknown Samsung CPU variant 0x"
+                    << std::hex << midr_get_variant(midr) << " part 0x" << std::hex << midr_get_part(midr)
+                    << " ignored\n";
       }
       break;
       // #if defined(_M_ARM) || defined(__arm__)
@@ -355,12 +357,12 @@ void decodeMIDR(
           *uarch = cpuinfo_uarch_pj4;
           break;
         default:
-          LOGS_DEFAULT(WARNING) << "unknown Marvell CPU part 0x" << std::hex << midr_get_part(midr) << " ignored";
+          std::cerr << "unknown Marvell CPU part 0x" << std::hex << midr_get_part(midr) << " ignored\n";
       }
       break;
       // #endif /* ARM */
     default:
-      LOGS_DEFAULT(WARNING) << "unknown CPU uarch from MIDR value: 0x" << std::hex << midr;
+      std::cerr << "unknown CPU uarch from MIDR value: 0x" << std::hex << midr << "\n";
   }
 }
 
diff --git a/onnxruntime/core/common/string_utils.h b/onnxruntime/core/common/string_utils.h
index 6e0eb460d2a63..eca1221e84cb8 100644
--- a/onnxruntime/core/common/string_utils.h
+++ b/onnxruntime/core/common/string_utils.h
@@ -3,6 +3,7 @@
 
 #pragma once
 
+#include <string>
 #include <string_view>
 #include <vector>
 
@@ -37,5 +38,32 @@ inline InlinedVector<std::string_view> SplitString(std::string_view string_to_sp
   return result;
 }
 
+/**
+ * Trim a string from start inplace.
+ * @param s The string to trim.
+ */
+inline void TrimStringFromLeft(std::string& s) {
+  s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](unsigned char ch) { return !std::isspace(ch); }));
+}
+
+/**
+ * Trim a string from end inplace.
+ * @param s The string to trim.
+ */
+inline void TrimStringFromRight(std::string& s) {
+  s.erase(std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base(), s.end());
+}
+
+/**
+ * Trim a string from both ends.
+ * @param s The string to trim.
+ * @return The trimmed string.
+ */
+inline std::string TrimString(std::string s) {
+  TrimStringFromRight(s);
+  TrimStringFromLeft(s);
+  return s;
+}
+
 }  // namespace utils
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/common/threadpool.cc b/onnxruntime/core/common/threadpool.cc
index f29ab19608934..10e117267e14b 100644
--- a/onnxruntime/core/common/threadpool.cc
+++ b/onnxruntime/core/common/threadpool.cc
@@ -562,7 +562,7 @@ static ptrdiff_t CalculateParallelForBlock(const ptrdiff_t n, const Eigen::Tenso
   constexpr ptrdiff_t max_oversharding_factor = 4;
   ptrdiff_t block_size = Eigen::numext::mini(
       n,
-      Eigen::numext::maxi<ptrdiff_t>(Eigen::divup<ptrdiff_t>(n, max_oversharding_factor * num_threads), static_cast<ptrdiff_t>(block_size_f)));
+      Eigen::numext::maxi<ptrdiff_t>(Eigen::numext::div_ceil<ptrdiff_t>(n, max_oversharding_factor * num_threads), static_cast<ptrdiff_t>(block_size_f)));
   const ptrdiff_t max_block_size = Eigen::numext::mini(n, 2 * block_size);
 
   if (block_align) {
@@ -571,19 +571,19 @@ static ptrdiff_t CalculateParallelForBlock(const ptrdiff_t n, const Eigen::Tenso
     block_size = Eigen::numext::mini(n, new_block_size);
   }
 
-  ptrdiff_t block_count = Eigen::divup(n, block_size);
+  ptrdiff_t block_count = Eigen::numext::div_ceil(n, block_size);
 
   // Calculate parallel efficiency as fraction of total CPU time used for
   // computations:
   double max_efficiency =
-      static_cast<double>(block_count) / (Eigen::divup<ptrdiff_t>(block_count, num_threads) * num_threads);
+      static_cast<double>(block_count) / (Eigen::numext::div_ceil<ptrdiff_t>(block_count, num_threads) * num_threads);
 
   // Now try to increase block size up to max_block_size as long as it
   // doesn't decrease parallel efficiency.
   for (ptrdiff_t prev_block_count = block_count; max_efficiency < 1.0 && prev_block_count > 1;) {
     // This is the next block size that divides size into a smaller number
     // of blocks than the current block_size.
-    ptrdiff_t coarser_block_size = Eigen::divup(n, prev_block_count - 1);
+    ptrdiff_t coarser_block_size = Eigen::numext::div_ceil(n, prev_block_count - 1);
     if (block_align) {
       ptrdiff_t new_block_size = block_align(coarser_block_size);
       assert(new_block_size >= coarser_block_size);
@@ -593,11 +593,11 @@ static ptrdiff_t CalculateParallelForBlock(const ptrdiff_t n, const Eigen::Tenso
       break;  // Reached max block size. Stop.
     }
     // Recalculate parallel efficiency.
-    const ptrdiff_t coarser_block_count = Eigen::divup(n, coarser_block_size);
+    const ptrdiff_t coarser_block_count = Eigen::numext::div_ceil(n, coarser_block_size);
     assert(coarser_block_count < prev_block_count);
     prev_block_count = coarser_block_count;
     const double coarser_efficiency =
-        static_cast<double>(coarser_block_count) / (Eigen::divup<ptrdiff_t>(coarser_block_count, num_threads) * num_threads);
+        static_cast<double>(coarser_block_count) / (Eigen::numext::div_ceil<ptrdiff_t>(coarser_block_count, num_threads) * num_threads);
     if (coarser_efficiency + 0.01 >= max_efficiency) {
       // Taking it.
       block_size = coarser_block_size;
diff --git a/onnxruntime/core/framework/allocation_planner.cc b/onnxruntime/core/framework/allocation_planner.cc
index 0bf27fdf5e5dc..9556e056dedc0 100644
--- a/onnxruntime/core/framework/allocation_planner.cc
+++ b/onnxruntime/core/framework/allocation_planner.cc
@@ -320,7 +320,7 @@ class PlannerImpl {
       return false;
     }
 
-    const auto& alias_map = ci.kernel_def->Alias();
+    const auto alias_map = GetAliasMap(node, ci);
     auto input_args = node.InputDefs();
     for (auto& pair : alias_map) {
       if (pair.second == output_arg_num) {
@@ -829,6 +829,34 @@ class PlannerImpl {
     return p_provider->GetOrtDeviceByMemType(utils::IsInputOnCpu(node, &kernel_create_info, input_index) ? OrtMemTypeCPUInput : OrtMemTypeDefault);
   }
 
+  std::vector<std::pair<int, int>> GetAliasMap(const Node& node, const KernelCreateInfo& kernel_create_info) {
+    ORT_ENFORCE(kernel_create_info.kernel_def != nullptr, "KernelDef is null for node: ", node.Name());
+#ifdef ENABLE_TRAINING_TORCH_INTEROP
+    if ((node.OpType().compare("PythonOp") == 0 || node.OpType().compare("PythonOpGrad") == 0) &&
+        node.Domain() == kMSDomain) {
+      const auto& attrs = node.GetAttributes();
+      auto attr_it = attrs.find("tensor_reuse_map");
+      if (attr_it != attrs.end()) {
+        const auto& inplace_map = attr_it->second.ints();
+        std::vector<std::pair<int, int>> alias_map;
+        alias_map.reserve(inplace_map.size());
+        for (int i = 0; i < inplace_map.size(); ++i) {
+          int output_index = i;
+          int input_index = inplace_map[i];
+          if (input_index == -1) {
+            // skip because no reuse for this output
+            continue;
+          }
+          alias_map.emplace_back(std::make_pair(input_index, output_index));
+        }
+        return alias_map;
+      }
+    }
+#endif
+
+    return kernel_create_info.kernel_def->Alias();
+  }
+
   void GeneratePlanForWeightsHelper(const GraphViewer& graph_viewer,
                                     const InitializedTensorSet& weights,
                                     const KernelCreateInfoMap& kernel_create_info_map,
@@ -1084,7 +1112,7 @@ class PlannerImpl {
         }
 
         bool found_reusable = false;
-        const auto& alias_map = ci.kernel_def->Alias();
+        const auto alias_map = GetAliasMap(*node, ci);
         auto input_args = node->InputDefs();
         for (auto* input_arg : input_args) {
           OrtValueIndex input_idx_global{};
diff --git a/onnxruntime/core/framework/fallback_cpu_capability.cc b/onnxruntime/core/framework/fallback_cpu_capability.cc
index 3d971e6aa29a2..ef68b88187e08 100644
--- a/onnxruntime/core/framework/fallback_cpu_capability.cc
+++ b/onnxruntime/core/framework/fallback_cpu_capability.cc
@@ -9,6 +9,7 @@
 #include "onnx/defs/data_type_utils.h"
 
 #include "core/framework/op_kernel.h"
+#include "core/framework/utils.h"
 
 using namespace ONNX_NAMESPACE::Utils;
 
@@ -77,7 +78,7 @@ std::unordered_set<NodeIndex> GetCpuPreferredNodes(const onnxruntime::GraphViewe
     ORT_THROW_IF_ERROR(node->ForEachWithIndex(
         node->OutputDefs(),
         [&](const NodeArg& node_arg, size_t out_index) {
-          if (kernel_info->kernel_def->IsOutputOnCpu(out_index)) {
+          if (utils::IsOutputOnCpu(*node, kernel_info, out_index)) {
             cpu_output_args.insert(&node_arg);
             auto consumer_nodes = graph.GetConsumerNodes(node_arg.Name());
             for (auto& consumer_node : consumer_nodes) {
diff --git a/onnxruntime/core/framework/graph_partitioner.cc b/onnxruntime/core/framework/graph_partitioner.cc
index dede1ecc95885..e4fe0c7564548 100644
--- a/onnxruntime/core/framework/graph_partitioner.cc
+++ b/onnxruntime/core/framework/graph_partitioner.cc
@@ -13,7 +13,9 @@
 #include "core/framework/kernel_registry_manager.h"
 #include "core/framework/kernel_registry.h"
 #include "core/graph/function.h"
+#include "core/graph/function_utils.h"
 #include "core/graph/graph_viewer.h"
+#include "core/graph/model.h"
 
 // uncomment this line to count non-CUDA ops in ONNX domain
 // #define COUNT_NON_CUDA_OPS
@@ -129,6 +131,21 @@ struct GetCapabilityForEPParams {
   std::reference_wrapper<const layout_transformation::DebugGraphFn> debug_graph_fn;
 #endif  // !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
 };
+
+auto get_capabilities = [](const IExecutionProvider& ep,
+                           const GraphViewer& graph_viewer,
+                           const IExecutionProvider::IKernelLookup& kernel_lookup) {
+  auto capabilities = ep.GetCapability(graph_viewer, kernel_lookup);
+
+  // In theory an EP could return an empty capability. Remove those.
+  capabilities.erase(std::remove_if(capabilities.begin(), capabilities.end(),
+                                    [](const std::unique_ptr<ComputeCapability>& capability) {
+                                      return !capability || !capability->sub_graph;
+                                    }),
+                     capabilities.end());
+
+  return capabilities;
+};
 }  // namespace
 
 static Status GetCapabilityForEP(const GetCapabilityForEPParams& params) {
@@ -143,21 +160,6 @@ static Status GetCapabilityForEP(const GetCapabilityForEPParams& params) {
   }
 #endif  // !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
 
-  auto get_capabilities = [](const IExecutionProvider& ep,
-                             const GraphViewer& graph_viewer,
-                             const IExecutionProvider::IKernelLookup& kernel_lookup) {
-    auto capabilities = ep.GetCapability(graph_viewer, kernel_lookup);
-
-    // In theory an EP could return an empty capability. Remove those.
-    capabilities.erase(std::remove_if(capabilities.begin(), capabilities.end(),
-                                      [](const std::unique_ptr<ComputeCapability>& capability) {
-                                        return !capability || !capability->sub_graph;
-                                      }),
-                       capabilities.end());
-
-    return capabilities;
-  };
-
   const auto& kernel_registry_mgr = params.kernel_registry_mgr.get();
   const auto kernel_registries_for_ep = kernel_registry_mgr.GetKernelRegistriesByProviderType(ep_type);
   const KernelLookup kernel_lookup{ep_type,
@@ -177,9 +179,9 @@ static Status GetCapabilityForEP(const GetCapabilityForEPParams& params) {
   }
 
 #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
-  // Run layout transformer only for EPs other than CPU EP and provided the preferred layout is NHWC
+  // Run layout transformer for EPs with preferred layout of NHWC
   // CPU EP layout transformation happens later when level 3 transformers are run.
-  if (params.mode != GraphPartitioner::Mode::kAssignOnly &&
+  if (params.mode != GraphPartitioner::Mode::kAssignOnly && params.transform_layout.get() &&
       current_ep.GetPreferredLayout() == DataLayout::NHWC) {
     for (auto& capability : capabilities) {
       TryAssignNodes(graph, *capability->sub_graph, ep_type);
@@ -239,6 +241,26 @@ static Status GetCapabilityForEP(const GetCapabilityForEPParams& params) {
 }
 
 #if !defined(ORT_MINIMAL_BUILD)
+
+// This function queries the capabilities for a given EP, but it does not assign the nodes.
+// It also does not perform layout transformation. This will be done during normal partitioning.
+static Status GetCapabilityForEPForAotInlining(const GraphViewer& graph_viewer,
+                                               const KernelRegistryManager& kernel_registry_mgr,
+                                               const IExecutionProvider& current_ep,
+                                               std::vector<std::unique_ptr<ComputeCapability>>& capabilities) {
+  const auto& ep_type = current_ep.Type();
+
+  const auto kernel_registries_for_ep = kernel_registry_mgr.GetKernelRegistriesByProviderType(ep_type);
+  const KernelLookup kernel_lookup{ep_type,
+                                   kernel_registries_for_ep,
+                                   kernel_registry_mgr.GetKernelTypeStrResolver()};
+
+  // TODO: Provide EP with a capability to look inside the functions.
+  capabilities = get_capabilities(current_ep, graph_viewer, kernel_lookup);
+
+  return Status::OK();
+}
+
 /**
  * Check if a node can be placed on a specific provider.
  * Do nothing if the node is already assigned
@@ -518,7 +540,7 @@ static Status InlineNodes(Graph& graph, bool& modified_graph) {
   // successfully inlined, we re-run the partitioner on the modified graph.
   // NOTE: Inlining the function will change the nodes in the Graph instance, so we can't do that while iterating
   // using graph.Nodes().
-  std::vector<Node*> nodes_to_inline;
+  InlinedVector<Node*> nodes_to_inline;
   for (auto& node : graph.Nodes()) {
     if (node.GetExecutionProviderType().empty() && node.CanBeInlined()) {
       nodes_to_inline.push_back(&node);
@@ -533,6 +555,85 @@ static Status InlineNodes(Graph& graph, bool& modified_graph) {
   return Status::OK();
 }
 
+static Status InlineFunctionsAOTImpl(const ExecutionProviders& execution_providers,
+                                     const KernelRegistryManager& kernel_registry_mgr,
+                                     Graph& graph,
+                                     InlinedHashSet<std::string>& not_inlined,
+                                     size_t& inlined_count) {
+  // handle testing edge case where optimizers or constant lifting results in graph with no nodes.
+  // doing it here saves all providers checking for this in GetCapability
+  if (graph.NumberOfNodes() == 0) {
+    return Status::OK();
+  }
+
+  for (auto& node : graph.Nodes()) {
+    for (auto& entry : node.GetAttributeNameToMutableSubgraphMap()) {
+      Graph* subgraph = entry.second;
+      // we pass through the FuncManager from the top level graph
+      ORT_RETURN_IF_ERROR(InlineFunctionsAOTImpl(execution_providers,
+                                                 kernel_registry_mgr,
+                                                 *subgraph,
+                                                 not_inlined,
+                                                 inlined_count));
+    }
+  }
+
+  // Gather the candidates
+  InlinedVector<NodeIndex> inline_candidates;
+  for (auto& node : graph.Nodes()) {
+    if (node.CanBeInlined()) {
+      inline_candidates.push_back(node.Index());
+    }
+  }
+
+  if (inline_candidates.empty()) {
+    return Status::OK();
+  }
+
+  // Find out all the nodes that are already taken
+  const GraphViewer graph_viewer(graph);
+
+  InlinedHashSet<NodeIndex> claimed_by_ep;
+  for (const auto& ep : execution_providers) {
+    std::vector<std::unique_ptr<ComputeCapability>> capabilities;
+    ORT_RETURN_IF_ERROR(GetCapabilityForEPForAotInlining(graph_viewer, kernel_registry_mgr, *ep, capabilities));
+    for (auto& capability : capabilities) {
+      const auto& nodes = capability->sub_graph->nodes;
+      if (nodes.size() == 1) {
+        // Single node capability.
+        ORT_IGNORE_RETURN_VALUE(claimed_by_ep.insert(nodes[0]));
+      } else {
+        // Make sure none is claimed by other EPs mirroring the logic in PartitionOnnxFormatModelImpl.
+        if (std::all_of(nodes.cbegin(), nodes.cend(), [&claimed_by_ep](NodeIndex node_index) {
+              return claimed_by_ep.count(node_index) == 0;
+            })) {
+          claimed_by_ep.insert(nodes.cbegin(), nodes.cend());
+        }
+      }
+    }
+  }
+
+  // TODO: Insert version check. We need to collect all the versions
+  // that imported by the model. If the version is not supported by
+  // the model, we can not inline it.
+
+  for (auto node_index : inline_candidates) {
+    auto* node = graph.GetNode(node_index);
+    if (node != nullptr) {
+      if (claimed_by_ep.count(node_index) == 0) {
+        ORT_RETURN_IF_ERROR(graph.InlineFunction(*node));
+        ++inlined_count;
+      } else {
+        // OpType is the same as function name.
+        auto function_id = function_utils::GetFunctionIdentifier(node->Domain(), node->OpType());
+        ORT_IGNORE_RETURN_VALUE(not_inlined.insert(std::move(function_id)));
+      }
+    }
+  }
+
+  return Status::OK();
+}
+
 static Status PartitionOnnxFormatModel(const PartitionParams& partition_params, GraphPartitioner::Mode mode,
                                        const ExecutionProviders& execution_providers,
                                        KernelRegistryManager& kernel_registry_manager) {
@@ -693,6 +794,50 @@ static Status PartitionOrtFormatModel(const PartitionParams& partition_params,
   return Status::OK();
 }
 
+#ifndef ORT_MINIMAL_BUILD
+
+Status GraphPartitioner::InlineFunctionsAOT(Model& model,
+                                            const ExecutionProviders& execution_providers,
+                                            const KernelRegistryManager& kernel_registry_manager,
+                                            const logging::Logger& logger) const {
+  const auto local_functions_num = model.GetModelLocalFunctionTemplates().size();
+  const bool is_there_local_functions = local_functions_num > 0;
+
+  if (!is_there_local_functions) {
+    LOGS(logger, INFO) << "This model does not have any local functions defined. AOT Inlining is not performed";
+    return Status::OK();
+  }
+
+  auto& graph = model.MainGraph();
+  InlinedHashSet<std::string> not_inlined;
+  do {
+    size_t inlined_count = 0;
+    ORT_RETURN_IF_ERROR(InlineFunctionsAOTImpl(execution_providers,
+                                               kernel_registry_manager,
+                                               graph,
+                                               not_inlined,
+                                               inlined_count));
+
+    if (inlined_count == 0) {
+      break;
+    }
+
+    ORT_RETURN_IF_ERROR(graph.Resolve());
+  } while (true);
+
+  model.RemoveLocalFunctionsProtos(not_inlined);
+
+  LOGS(logger, INFO)
+      << "AOT inlining completed. (" << (local_functions_num - model.GetModelLocalFunctionTemplates().size())
+      << ") functions of ("
+      << local_functions_num
+      << ") pruned.";
+
+  return Status::OK();
+}
+
+#endif
+
 Status GraphPartitioner::Partition(Graph& graph, FuncManager& func_mgr,
                                    const layout_transformation::TransformLayoutFunction& transform_layout_function,
                                    Mode mode,
diff --git a/onnxruntime/core/framework/graph_partitioner.h b/onnxruntime/core/framework/graph_partitioner.h
index 36a27e906c651..4fc85c2588260 100644
--- a/onnxruntime/core/framework/graph_partitioner.h
+++ b/onnxruntime/core/framework/graph_partitioner.h
@@ -12,6 +12,7 @@ namespace onnxruntime {
 
 class ExecutionProviders;
 class KernelRegistryManager;
+class Model;
 
 class GraphPartitioner {
  public:
@@ -33,6 +34,28 @@ class GraphPartitioner {
                    Mode mode = Mode::kNormal,
                    const layout_transformation::DebugGraphFn& debug_graph_fn = {}) const;
 
+#ifndef ORT_MINIMAL_BUILD
+  /// <summary>
+  // Ahead of Time Function inlining. The main purpose of the function is to inline as many
+  // functions as possible and delete locally defined functions to reduce the size of the model.
+  // This would make other optimizations to be more effective.
+  //
+  // This function performs GetCapability on the graph and its subgraphs bottom up
+  // and inlines any functions that are not claimed by any of the execution providers.
+  // This function does not attempt to run layout transformation, and it does not assign EPs.
+  // The latter will be done by graph partitioning after Level1 optimizations are done.
+  /// </summary>
+  /// <param name="model">model instance</param>
+  /// <param name="execution_providers">execution providers considered</param>
+  /// <param name="kernel_registry_manager">registry manager</param>
+  /// <param name="logger">session logger</param>
+  /// <returns></returns>
+  Status InlineFunctionsAOT(Model& model,
+                            const ExecutionProviders& execution_providers,
+                            const KernelRegistryManager& kernel_registry_manager,
+                            const logging::Logger& logger) const;
+#endif
+
  private:
   ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(GraphPartitioner);
 
diff --git a/onnxruntime/core/framework/kernel_registry_manager.cc b/onnxruntime/core/framework/kernel_registry_manager.cc
index 38c8a4a4e3d5e..b2ef853119588 100644
--- a/onnxruntime/core/framework/kernel_registry_manager.cc
+++ b/onnxruntime/core/framework/kernel_registry_manager.cc
@@ -62,9 +62,15 @@ Status KernelRegistryManager::SearchKernelRegistry(const Node& node,
 
   auto create_error_message = [&node, &status](const std::string& prefix) {
     std::ostringstream errormsg;
-    errormsg << prefix << node.OpType() << "(" << node.SinceVersion() << ")";
-    if (!node.Name().empty()) errormsg << " (node " << node.Name() << "). ";
-    if (!status.IsOK()) errormsg << status.ErrorMessage();
+    errormsg << prefix;
+    const auto& domain = node.Domain();
+    if (!domain.empty()) {
+      errormsg << domain << ".";
+    }
+    errormsg << node.OpType() << "(" << node.SinceVersion() << ")"
+             << " (node:'" << node.Name() << "' ep:'" << node.GetExecutionProviderType() << "'). ";
+    if (!status.IsOK())
+      errormsg << status.ErrorMessage();
 
     return errormsg.str();
   };
diff --git a/onnxruntime/core/framework/kernel_type_str_resolver_utils.cc b/onnxruntime/core/framework/kernel_type_str_resolver_utils.cc
index ea93db58339c7..4f5fa9910b5df 100644
--- a/onnxruntime/core/framework/kernel_type_str_resolver_utils.cc
+++ b/onnxruntime/core/framework/kernel_type_str_resolver_utils.cc
@@ -53,128 +53,200 @@ Status AddLayoutTransformationRequiredOpsToKernelTypeStrResolver(KernelTypeStrRe
   // clang-format off
   constexpr uint8_t kLayoutTransformationRequiredOpsKernelTypeStrResolverBytes[] = {
       0x10, 0x00, 0x00, 0x00, 0x6b, 0x74, 0x73, 0x72, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00,
-      0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0xbc, 0x06, 0x00, 0x00,
-      0x4c, 0x02, 0x00, 0x00, 0xe0, 0x01, 0x00, 0x00, 0xe0, 0x00, 0x00, 0x00, 0x14, 0x06, 0x00, 0x00,
-      0x88, 0x01, 0x00, 0x00, 0xb8, 0x05, 0x00, 0x00, 0x1c, 0x05, 0x00, 0x00, 0x18, 0x07, 0x00, 0x00,
-      0xcc, 0x04, 0x00, 0x00, 0x0c, 0x01, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x54, 0x05, 0x00, 0x00,
-      0x3c, 0x06, 0x00, 0x00, 0xf8, 0x02, 0x00, 0x00, 0x7c, 0x02, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
-      0x38, 0x03, 0x00, 0x00, 0xec, 0xf8, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+      0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00,
+      0x4c, 0x0b, 0x00, 0x00, 0xac, 0x08, 0x00, 0x00, 0xd0, 0x0a, 0x00, 0x00, 0x10, 0x06, 0x00, 0x00,
+      0xa8, 0x07, 0x00, 0x00, 0x18, 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00,
+      0x44, 0x07, 0x00, 0x00, 0x9c, 0x01, 0x00, 0x00, 0xf8, 0x07, 0x00, 0x00, 0x78, 0x09, 0x00, 0x00,
+      0x14, 0x01, 0x00, 0x00, 0x50, 0x06, 0x00, 0x00, 0x60, 0x02, 0x00, 0x00, 0xf4, 0x08, 0x00, 0x00,
+      0x8c, 0x03, 0x00, 0x00, 0x9c, 0x02, 0x00, 0x00, 0x84, 0x06, 0x00, 0x00, 0xcc, 0x03, 0x00, 0x00,
+      0x60, 0x05, 0x00, 0x00, 0xb8, 0x01, 0x00, 0x00, 0x1c, 0x03, 0x00, 0x00, 0x08, 0x04, 0x00, 0x00,
+      0xe0, 0x09, 0x00, 0x00, 0x8c, 0xf4, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+      0x01, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3a, 0x49, 0x64, 0x65,
+      0x6e, 0x74, 0x69, 0x74, 0x79, 0x3a, 0x31, 0x34, 0x00, 0x00, 0x00, 0x00, 0xb4, 0xf4, 0xff, 0xff,
+      0x08, 0x07, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+      0x04, 0x00, 0x00, 0x00, 0xda, 0xf4, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0x9c, 0xf4, 0xff, 0xff,
+      0xd8, 0xf4, 0xff, 0xff, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+      0x60, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+      0x3a, 0x44, 0x65, 0x71, 0x75, 0x61, 0x6e, 0x74, 0x69, 0x7a, 0x65, 0x4c, 0x69, 0x6e, 0x65, 0x61,
+      0x72, 0x3a, 0x31, 0x30, 0x00, 0x00, 0x00, 0x00, 0x10, 0xf5, 0xff, 0xff, 0xa4, 0x0a, 0x00, 0x00,
+      0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xfc, 0xf4, 0xff, 0xff,
+      0x01, 0x00, 0x00, 0x00, 0x2c, 0xf5, 0xff, 0xff, 0xb0, 0x0a, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+      0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x4e, 0xf5, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01,
+      0x48, 0xf5, 0xff, 0xff, 0xc8, 0x0a, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+      0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x38, 0xf5, 0xff, 0xff, 0x02, 0x00, 0x00, 0x00,
+      0x30, 0xf5, 0xff, 0xff, 0x6c, 0xf5, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+      0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
+      0x3a, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x69, 0x7a, 0x65, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x72, 0x3a,
+      0x31, 0x39, 0x00, 0x00, 0x9c, 0xf5, 0xff, 0xff, 0x3c, 0x09, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+      0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xc2, 0xf5, 0xff, 0xff,
+      0x00, 0x00, 0x00, 0x01, 0x94, 0xf5, 0xff, 0xff, 0x02, 0x00, 0x00, 0x00, 0xc4, 0xf5, 0xff, 0xff,
+      0xe8, 0x08, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+      0x04, 0x00, 0x00, 0x00, 0xb4, 0xf5, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0xac, 0xf5, 0xff, 0xff,
+      0xe8, 0xf5, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+      0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3a, 0x49, 0x64, 0x65, 0x6e, 0x74, 0x69, 0x74,
+      0x79, 0x3a, 0x31, 0x39, 0x00, 0x00, 0x00, 0x00, 0x10, 0xf6, 0xff, 0xff, 0xac, 0x05, 0x00, 0x00,
+      0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+      0x36, 0xf6, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0xf8, 0xf5, 0xff, 0xff, 0x34, 0xf6, 0xff, 0xff,
+      0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
+      0x50, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x63, 0x6f, 0x6d, 0x2e, 0x6d, 0x69, 0x63, 0x72,
+      0x6f, 0x73, 0x6f, 0x66, 0x74, 0x3a, 0x44, 0x65, 0x71, 0x75, 0x61, 0x6e, 0x74, 0x69, 0x7a, 0x65,
+      0x4c, 0x69, 0x6e, 0x65, 0x61, 0x72, 0x3a, 0x31, 0x00, 0x00, 0x00, 0x00, 0x74, 0xf6, 0xff, 0xff,
+      0x38, 0x08, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+      0x04, 0x00, 0x00, 0x00, 0x64, 0xf6, 0xff, 0xff, 0x02, 0x00, 0x00, 0x00, 0x5c, 0xf6, 0xff, 0xff,
+      0x98, 0xf6, 0xff, 0xff, 0x40, 0x08, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+      0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xbe, 0xf6, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01,
+      0x90, 0xf6, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0xc0, 0xf6, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+      0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
+      0x3a, 0x53, 0x71, 0x75, 0x65, 0x65, 0x7a, 0x65, 0x3a, 0x31, 0x31, 0x00, 0xe4, 0xf6, 0xff, 0xff,
+      0x2c, 0x09, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+      0x04, 0x00, 0x00, 0x00, 0x0a, 0xf7, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0xcc, 0xf6, 0xff, 0xff,
+      0x08, 0xf7, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+      0x18, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x3a, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f,
+      0x73, 0x65, 0x3a, 0x31, 0x33, 0x00, 0x00, 0x00, 0x30, 0xf7, 0xff, 0xff, 0xe0, 0x08, 0x00, 0x00,
+      0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+      0x56, 0xf7, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0x18, 0xf7, 0xff, 0xff, 0x54, 0xf7, 0xff, 0xff,
+      0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+      0x0b, 0x00, 0x00, 0x00, 0x3a, 0x49, 0x64, 0x65, 0x6e, 0x74, 0x69, 0x74, 0x79, 0x3a, 0x31, 0x00,
+      0x78, 0xf7, 0xff, 0xff, 0x98, 0x08, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+      0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x9e, 0xf7, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01,
+      0x60, 0xf7, 0xff, 0xff, 0x9c, 0xf7, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
       0x01, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x63, 0x6f, 0x6d, 0x2e,
       0x6d, 0x69, 0x63, 0x72, 0x6f, 0x73, 0x6f, 0x66, 0x74, 0x3a, 0x4e, 0x68, 0x77, 0x63, 0x4d, 0x61,
-      0x78, 0x50, 0x6f, 0x6f, 0x6c, 0x3a, 0x31, 0x00, 0x20, 0xf9, 0xff, 0xff, 0xf0, 0x06, 0x00, 0x00,
+      0x78, 0x50, 0x6f, 0x6f, 0x6c, 0x3a, 0x31, 0x00, 0xd0, 0xf7, 0xff, 0xff, 0x40, 0x08, 0x00, 0x00,
       0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-      0x0e, 0xf9, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0x08, 0xf9, 0xff, 0xff, 0x44, 0xf9, 0xff, 0xff,
+      0xf6, 0xf7, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0xb8, 0xf7, 0xff, 0xff, 0xf4, 0xf7, 0xff, 0xff,
       0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
       0x0c, 0x00, 0x00, 0x00, 0x3a, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, 0x65, 0x3a, 0x31,
-      0x00, 0x00, 0x00, 0x00, 0x6c, 0xf9, 0xff, 0xff, 0xa4, 0x06, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-      0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x5a, 0xf9, 0xff, 0xff,
-      0x00, 0x00, 0x00, 0x01, 0x54, 0xf9, 0xff, 0xff, 0x90, 0xf9, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
-      0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
-      0x3a, 0x49, 0x64, 0x65, 0x6e, 0x74, 0x69, 0x74, 0x79, 0x3a, 0x31, 0x00, 0xb4, 0xf9, 0xff, 0xff,
-      0x5c, 0x06, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-      0x04, 0x00, 0x00, 0x00, 0xa2, 0xf9, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0x9c, 0xf9, 0xff, 0xff,
+      0x00, 0x00, 0x00, 0x00, 0x1c, 0xf8, 0xff, 0xff, 0xf4, 0x07, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+      0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x42, 0xf8, 0xff, 0xff,
+      0x00, 0x00, 0x00, 0x01, 0x04, 0xf8, 0xff, 0xff, 0x40, 0xf8, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+      0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
+      0x3a, 0x55, 0x6e, 0x73, 0x71, 0x75, 0x65, 0x65, 0x7a, 0x65, 0x3a, 0x31, 0x31, 0x00, 0x00, 0x00,
+      0x68, 0xf8, 0xff, 0xff, 0xa8, 0x07, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+      0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x8e, 0xf8, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01,
+      0x50, 0xf8, 0xff, 0xff, 0x8c, 0xf8, 0xff, 0xff, 0x28, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+      0x07, 0x00, 0x00, 0x00, 0xf4, 0x00, 0x00, 0x00, 0xc8, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00,
+      0x0c, 0x01, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00,
+      0x1b, 0x00, 0x00, 0x00, 0x63, 0x6f, 0x6d, 0x2e, 0x6d, 0x69, 0x63, 0x72, 0x6f, 0x73, 0x6f, 0x66,
+      0x74, 0x3a, 0x51, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x72, 0x43, 0x6f, 0x6e, 0x76, 0x3a, 0x31, 0x00,
+      0xd8, 0xf8, 0xff, 0xff, 0xdc, 0x06, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+      0x04, 0x00, 0x00, 0x00, 0xc4, 0xf8, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0xf4, 0xf8, 0xff, 0xff,
+      0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x54, 0x33, 0x00, 0x00,
+      0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x22, 0xf9, 0xff, 0xff,
+      0x00, 0x00, 0x00, 0x01, 0xf4, 0xf8, 0xff, 0xff, 0x07, 0x00, 0x00, 0x00, 0x24, 0xf9, 0xff, 0xff,
+      0xe4, 0x04, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+      0x10, 0xf9, 0xff, 0xff, 0x06, 0x00, 0x00, 0x00, 0x40, 0xf9, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00,
+      0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x77, 0x5f, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x00,
+      0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x38, 0xf9, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+      0x68, 0xf9, 0xff, 0xff, 0x70, 0x05, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+      0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x58, 0xf9, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00,
+      0x60, 0xf9, 0xff, 0xff, 0x03, 0x00, 0x00, 0x00, 0x90, 0xf9, 0xff, 0xff, 0x1c, 0x05, 0x00, 0x00,
+      0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+      0x80, 0xf9, 0xff, 0xff, 0x02, 0x00, 0x00, 0x00, 0x78, 0xf9, 0xff, 0xff, 0xb4, 0xf9, 0xff, 0xff,
+      0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x54, 0x34, 0x00, 0x00,
+      0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa8, 0xf9, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00,
       0xd8, 0xf9, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
-      0x34, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3a, 0x53, 0x71, 0x75,
-      0x65, 0x65, 0x7a, 0x65, 0x3a, 0x31, 0x33, 0x00, 0x00, 0xfa, 0xff, 0xff, 0xb4, 0x01, 0x00, 0x00,
-      0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x48, 0xfa, 0xff, 0xff,
-      0x01, 0x00, 0x00, 0x00, 0x1c, 0xfa, 0xff, 0xff, 0xf4, 0x05, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-      0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0a, 0xfa, 0xff, 0xff,
-      0x00, 0x00, 0x00, 0x01, 0x04, 0xfa, 0xff, 0xff, 0x40, 0xfa, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
-      0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-      0x3a, 0x49, 0x64, 0x65, 0x6e, 0x74, 0x69, 0x74, 0x79, 0x3a, 0x31, 0x34, 0x00, 0x00, 0x00, 0x00,
-      0x68, 0xfa, 0xff, 0xff, 0x3c, 0x04, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
-      0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x56, 0xfa, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01,
-      0x50, 0xfa, 0xff, 0xff, 0x8c, 0xfa, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-      0x02, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00,
-      0x3a, 0x47, 0x61, 0x74, 0x68, 0x65, 0x72, 0x3a, 0x31, 0x33, 0x00, 0x00, 0xb4, 0xfa, 0xff, 0xff,
-      0x00, 0x05, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-      0xfc, 0xfa, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0xd0, 0xfa, 0xff, 0xff, 0x40, 0x05, 0x00, 0x00,
+      0x38, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x3a, 0x55, 0x6e, 0x73,
+      0x71, 0x75, 0x65, 0x65, 0x7a, 0x65, 0x3a, 0x31, 0x33, 0x00, 0x00, 0x00, 0x04, 0xfa, 0xff, 0xff,
+      0x84, 0x03, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+      0xf0, 0xf9, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x20, 0xfa, 0xff, 0xff, 0xf0, 0x05, 0x00, 0x00,
       0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-      0xbe, 0xfa, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0xb8, 0xfa, 0xff, 0xff, 0xf4, 0xfa, 0xff, 0xff,
+      0x46, 0xfa, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0x08, 0xfa, 0xff, 0xff, 0x44, 0xfa, 0xff, 0xff,
       0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
       0x14, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x3a, 0x47, 0x61, 0x74, 0x68, 0x65, 0x72, 0x3a,
-      0x31, 0x31, 0x00, 0x00, 0x1c, 0xfb, 0xff, 0xff, 0x98, 0x04, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-      0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x64, 0xfb, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
-      0x38, 0xfb, 0xff, 0xff, 0xd8, 0x04, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
-      0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x26, 0xfb, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01,
-      0x20, 0xfb, 0xff, 0xff, 0x5c, 0xfb, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-      0x02, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
-      0x3a, 0x55, 0x6e, 0x73, 0x71, 0x75, 0x65, 0x65, 0x7a, 0x65, 0x3a, 0x31, 0x33, 0x00, 0x00, 0x00,
-      0x88, 0xfb, 0xff, 0xff, 0x88, 0x04, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
-      0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x76, 0xfb, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01,
-      0x70, 0xfb, 0xff, 0xff, 0xac, 0xfb, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-      0x04, 0x00, 0x00, 0x00, 0x61, 0x78, 0x65, 0x73, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-      0x04, 0x00, 0x00, 0x00, 0x00, 0xfc, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0xd4, 0xfb, 0xff, 0xff,
-      0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
-      0x0d, 0x00, 0x00, 0x00, 0x3a, 0x55, 0x6e, 0x73, 0x71, 0x75, 0x65, 0x65, 0x7a, 0x65, 0x3a, 0x31,
-      0x31, 0x00, 0x00, 0x00, 0xfc, 0xfb, 0xff, 0xff, 0x14, 0x04, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-      0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xea, 0xfb, 0xff, 0xff,
-      0x00, 0x00, 0x00, 0x01, 0xe4, 0xfb, 0xff, 0xff, 0x20, 0xfc, 0xff, 0xff, 0x28, 0x00, 0x00, 0x00,
-      0x04, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x38, 0x01, 0x00, 0x00, 0xdc, 0x00, 0x00, 0x00,
-      0xa8, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00,
-      0x48, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x63, 0x6f, 0x6d, 0x2e, 0x6d, 0x69, 0x63, 0x72,
-      0x6f, 0x73, 0x6f, 0x66, 0x74, 0x3a, 0x51, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x72, 0x43, 0x6f, 0x6e,
-      0x76, 0x3a, 0x31, 0x00, 0x6c, 0xfc, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-      0x02, 0x00, 0x00, 0x00, 0x54, 0x34, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-      0xbc, 0xfc, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x90, 0xfc, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00,
-      0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x79, 0x5f, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x00,
-      0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xe4, 0xfc, 0xff, 0xff, 0x06, 0x00, 0x00, 0x00,
-      0xb8, 0xfc, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
-      0x78, 0x5f, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-      0x0c, 0xfd, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0xe0, 0xfc, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00,
-      0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x54, 0x33, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
-      0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xd6, 0xfc, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01,
-      0x3c, 0xfd, 0xff, 0xff, 0x07, 0x00, 0x00, 0x00, 0x10, 0xfd, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00,
-      0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x54, 0x32, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
-      0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x64, 0xfd, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00,
-      0x6c, 0xfd, 0xff, 0xff, 0x03, 0x00, 0x00, 0x00, 0x40, 0xfd, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00,
-      0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x77, 0x5f, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x00,
-      0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x94, 0xfd, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
-      0x68, 0xfd, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
-      0x54, 0x31, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-      0xbc, 0xfd, 0xff, 0xff, 0x02, 0x00, 0x00, 0x00, 0x58, 0xfd, 0xff, 0xff, 0x94, 0xfd, 0xff, 0xff,
-      0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-      0x0b, 0x00, 0x00, 0x00, 0x3a, 0x53, 0x71, 0x75, 0x65, 0x65, 0x7a, 0x65, 0x3a, 0x31, 0x31, 0x00,
-      0xb8, 0xfd, 0xff, 0xff, 0x58, 0x02, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
-      0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa6, 0xfd, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01,
-      0xa0, 0xfd, 0xff, 0xff, 0xdc, 0xfd, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-      0x01, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3a, 0x49, 0x64, 0x65,
-      0x6e, 0x74, 0x69, 0x74, 0x79, 0x3a, 0x31, 0x39, 0x00, 0x00, 0x00, 0x00, 0x04, 0xfe, 0xff, 0xff,
-      0xa0, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-      0x04, 0x00, 0x00, 0x00, 0xf2, 0xfd, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0xec, 0xfd, 0xff, 0xff,
-      0x28, 0xfe, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-      0x18, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x3a, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f,
-      0x73, 0x65, 0x3a, 0x31, 0x33, 0x00, 0x00, 0x00, 0x50, 0xfe, 0xff, 0xff, 0xc0, 0x01, 0x00, 0x00,
-      0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-      0x3e, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0x38, 0xfe, 0xff, 0xff, 0x74, 0xfe, 0xff, 0xff,
-      0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
-      0x0c, 0x00, 0x00, 0x00, 0x3a, 0x49, 0x64, 0x65, 0x6e, 0x74, 0x69, 0x74, 0x79, 0x3a, 0x31, 0x36,
-      0x00, 0x00, 0x00, 0x00, 0x9c, 0xfe, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-      0x01, 0x00, 0x00, 0x00, 0x56, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-      0x04, 0x00, 0x00, 0x00, 0x92, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0x8c, 0xfe, 0xff, 0xff,
-      0xc8, 0xfe, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-      0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3a, 0x49, 0x64, 0x65, 0x6e, 0x74, 0x69, 0x74,
-      0x79, 0x3a, 0x31, 0x33, 0x00, 0x00, 0x00, 0x00, 0xf0, 0xfe, 0xff, 0xff, 0x20, 0x01, 0x00, 0x00,
+      0x31, 0x31, 0x00, 0x00, 0x6c, 0xfa, 0xff, 0xff, 0xc4, 0x04, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+      0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x58, 0xfa, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+      0x88, 0xfa, 0xff, 0xff, 0x88, 0x05, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+      0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xae, 0xfa, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01,
+      0x70, 0xfa, 0xff, 0xff, 0xac, 0xfa, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+      0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x3a, 0x53, 0x71, 0x75,
+      0x65, 0x65, 0x7a, 0x65, 0x3a, 0x31, 0x00, 0x00, 0xd0, 0xfa, 0xff, 0xff, 0x40, 0x05, 0x00, 0x00,
       0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-      0xde, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0xd8, 0xfe, 0xff, 0xff, 0x14, 0xff, 0xff, 0xff,
+      0xf6, 0xfa, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0xb8, 0xfa, 0xff, 0xff, 0xf4, 0xfa, 0xff, 0xff,
       0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
       0x0c, 0x00, 0x00, 0x00, 0x3a, 0x55, 0x6e, 0x73, 0x71, 0x75, 0x65, 0x65, 0x7a, 0x65, 0x3a, 0x31,
-      0x00, 0x00, 0x00, 0x00, 0x3c, 0xff, 0xff, 0xff, 0xd4, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-      0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x2a, 0xff, 0xff, 0xff,
-      0x00, 0x00, 0x00, 0x01, 0x24, 0xff, 0xff, 0xff, 0x60, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00,
+      0x00, 0x00, 0x00, 0x00, 0x1c, 0xfb, 0xff, 0xff, 0xf4, 0x04, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+      0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x42, 0xfb, 0xff, 0xff,
+      0x00, 0x00, 0x00, 0x01, 0x04, 0xfb, 0xff, 0xff, 0x40, 0xfb, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+      0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+      0x3a, 0x49, 0x64, 0x65, 0x6e, 0x74, 0x69, 0x74, 0x79, 0x3a, 0x31, 0x33, 0x00, 0x00, 0x00, 0x00,
+      0x68, 0xfb, 0xff, 0xff, 0xa8, 0x04, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+      0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x8e, 0xfb, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01,
+      0x50, 0xfb, 0xff, 0xff, 0x8c, 0xfb, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+      0x01, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3a, 0x49, 0x64, 0x65,
+      0x6e, 0x74, 0x69, 0x74, 0x79, 0x3a, 0x31, 0x36, 0x00, 0x00, 0x00, 0x00, 0xb4, 0xfb, 0xff, 0xff,
+      0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x56, 0x00, 0x00, 0x00,
+      0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xe2, 0xfb, 0xff, 0xff,
+      0x00, 0x00, 0x00, 0x01, 0xa4, 0xfb, 0xff, 0xff, 0xe0, 0xfb, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00,
       0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
+      0x0a, 0x00, 0x00, 0x00, 0x3a, 0x47, 0x61, 0x74, 0x68, 0x65, 0x72, 0x3a, 0x31, 0x33, 0x00, 0x00,
+      0x08, 0xfc, 0xff, 0xff, 0x08, 0x04, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+      0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x2e, 0xfc, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01,
+      0xf0, 0xfb, 0xff, 0xff, 0x2c, 0xfc, 0xff, 0xff, 0x04, 0x03, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+      0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x18, 0xfc, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+      0x48, 0xfc, 0xff, 0xff, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+      0x24, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
+      0x3a, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x69, 0x7a, 0x65, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x72, 0x3a,
+      0x31, 0x30, 0x00, 0x00, 0x7c, 0xfc, 0xff, 0xff, 0x30, 0x02, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+      0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x58, 0xfc, 0xff, 0xff, 0x94, 0xfc, 0xff, 0xff,
+      0x44, 0x02, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+      0x04, 0x00, 0x00, 0x00, 0xba, 0xfc, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0x8c, 0xfc, 0xff, 0xff,
+      0x02, 0x00, 0x00, 0x00, 0xbc, 0xfc, 0xff, 0xff, 0x4c, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+      0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa8, 0xfc, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+      0xd8, 0xfc, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+      0x4c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3a, 0x44, 0x65, 0x71,
+      0x75, 0x61, 0x6e, 0x74, 0x69, 0x7a, 0x65, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x72, 0x3a, 0x31, 0x39,
+      0x00, 0x00, 0x00, 0x00, 0x0c, 0xfd, 0xff, 0xff, 0xcc, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+      0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x32, 0xfd, 0xff, 0xff,
+      0x00, 0x00, 0x00, 0x01, 0x04, 0xfd, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x34, 0xfd, 0xff, 0xff,
+      0x78, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+      0x04, 0x00, 0x00, 0x00, 0x24, 0xfd, 0xff, 0xff, 0x02, 0x00, 0x00, 0x00, 0x1c, 0xfd, 0xff, 0xff,
+      0x58, 0xfd, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+      0x40, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3a, 0x53, 0x71, 0x75,
+      0x65, 0x65, 0x7a, 0x65, 0x3a, 0x31, 0x33, 0x00, 0x80, 0xfd, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00,
+      0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x61, 0x78, 0x65, 0x73, 0x00, 0x00, 0x00, 0x00,
+      0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x78, 0xfd, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+      0xa8, 0xfd, 0xff, 0xff, 0x68, 0x02, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+      0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xce, 0xfd, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01,
+      0x90, 0xfd, 0xff, 0xff, 0xcc, 0xfd, 0xff, 0xff, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+      0x03, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+      0x12, 0x00, 0x00, 0x00, 0x3a, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x69, 0x7a, 0x65, 0x4c, 0x69, 0x6e,
+      0x65, 0x61, 0x72, 0x3a, 0x31, 0x33, 0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00,
+      0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x79, 0x5f, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x00,
+      0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf8, 0xfd, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+      0x28, 0xfe, 0xff, 0xff, 0x84, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+      0x04, 0x00, 0x00, 0x00, 0x04, 0xfe, 0xff, 0xff, 0x40, 0xfe, 0xff, 0xff, 0x98, 0x00, 0x00, 0x00,
+      0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+      0x66, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0x38, 0xfe, 0xff, 0xff, 0x02, 0x00, 0x00, 0x00,
+      0x68, 0xfe, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+      0x2c, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x63, 0x6f, 0x6d, 0x2e,
+      0x6d, 0x69, 0x63, 0x72, 0x6f, 0x73, 0x6f, 0x66, 0x74, 0x3a, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x69,
+      0x7a, 0x65, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x72, 0x3a, 0x31, 0x00, 0x00, 0xa4, 0xfe, 0xff, 0xff,
+      0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x54, 0x31, 0x00, 0x00,
+      0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x9c, 0xfe, 0xff, 0xff,
+      0x01, 0x00, 0x00, 0x00, 0x94, 0xfe, 0xff, 0xff, 0xd0, 0xfe, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00,
+      0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x54, 0x32, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+      0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01,
+      0xd0, 0xfe, 0xff, 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00,
+      0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
       0x09, 0x00, 0x00, 0x00, 0x3a, 0x47, 0x61, 0x74, 0x68, 0x65, 0x72, 0x3a, 0x31, 0x00, 0x00, 0x00,
-      0x88, 0xff, 0xff, 0xff, 0x88, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
-      0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x76, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01,
-      0x70, 0xff, 0xff, 0xff, 0xac, 0xff, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-      0x04, 0x00, 0x00, 0x00, 0x54, 0x69, 0x6e, 0x64, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-      0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00,
-      0x01, 0x00, 0x00, 0x00, 0xdc, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-      0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x3a, 0x53, 0x71, 0x75,
-      0x65, 0x65, 0x7a, 0x65, 0x3a, 0x31, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x04, 0x00, 0x08, 0x00,
+      0x28, 0xff, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+      0x54, 0x69, 0x6e, 0x64, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+      0x20, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x50, 0xff, 0xff, 0xff, 0xc0, 0x00, 0x00, 0x00,
+      0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+      0x76, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0x38, 0xff, 0xff, 0xff, 0x74, 0xff, 0xff, 0xff,
+      0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00,
+      0x24, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3a, 0x44, 0x65, 0x71,
+      0x75, 0x61, 0x6e, 0x74, 0x69, 0x7a, 0x65, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x72, 0x3a, 0x31, 0x33,
+      0x00, 0x00, 0x00, 0x00, 0xac, 0xff, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+      0x07, 0x00, 0x00, 0x00, 0x78, 0x5f, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x00, 0x01, 0x00, 0x00, 0x00,
+      0x04, 0x00, 0x00, 0x00, 0xa4, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0xd4, 0xff, 0xff, 0xff,
+      0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x79, 0x00, 0x00, 0x00,
+      0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x07, 0x00,
+      0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x08, 0x00, 0x0c, 0x00, 0x04, 0x00, 0x08, 0x00,
       0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
       0x54, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-      0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+      0x08, 0x00, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
       0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
   };
   // clang-format on
diff --git a/onnxruntime/core/framework/op_node_proto_helper.cc b/onnxruntime/core/framework/op_node_proto_helper.cc
index 38d67eb0e0c72..c3deb94300e78 100644
--- a/onnxruntime/core/framework/op_node_proto_helper.cc
+++ b/onnxruntime/core/framework/op_node_proto_helper.cc
@@ -182,7 +182,7 @@ ORT_DEFINE_GET_ATTRS_SPAN_SPECIALIZATION(float, floats)
 ORT_DEFINE_GET_ATTRS_SPAN_SPECIALIZATION(int64_t, ints)
 
 template <typename Impl_t>
-MUST_USE_RESULT Status OpNodeProtoHelper<Impl_t>::GetAttrs(const std::string& name, TensorShapeVector& out) const {
+Status OpNodeProtoHelper<Impl_t>::GetAttrs(const std::string& name, TensorShapeVector& out) const {
   gsl::span<const int64_t> span;
   Status status = this->GetAttrsAsSpan<int64_t>(name, span);
   if (status.IsOK()) {
@@ -193,7 +193,7 @@ MUST_USE_RESULT Status OpNodeProtoHelper<Impl_t>::GetAttrs(const std::string& na
 }
 
 template <typename Impl_t>
-MUST_USE_RESULT Status OpNodeProtoHelper<Impl_t>::GetAttrsStringRefs(
+Status OpNodeProtoHelper<Impl_t>::GetAttrsStringRefs(
     const std::string& name,
     std::vector<std::reference_wrapper<const std::string>>& refs) const {
   const AttributeProto* attr = TryGetAttribute(name);
diff --git a/onnxruntime/core/framework/session_options.h b/onnxruntime/core/framework/session_options.h
index aff90b8d40bde..8deeb4c2b8b64 100644
--- a/onnxruntime/core/framework/session_options.h
+++ b/onnxruntime/core/framework/session_options.h
@@ -148,6 +148,10 @@ struct SessionOptions {
   std::shared_ptr<LibraryHandles> custom_op_libs;
   void AddCustomOpLibraryHandle(PathString library_name, void* library_handle);
 #endif
+
+  // User specified logging func and param
+  OrtLoggingFunction user_logging_function = nullptr;
+  void* user_logging_param = nullptr;
 };
 
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/framework/session_state.cc b/onnxruntime/core/framework/session_state.cc
index f0e5fbbd38721..6244d426450a2 100644
--- a/onnxruntime/core/framework/session_state.cc
+++ b/onnxruntime/core/framework/session_state.cc
@@ -1046,7 +1046,8 @@ Status SessionState::CreateSubgraphSessionState() {
       auto subgraph_session_state =
           std::make_unique<SessionState>(*subgraph, execution_providers_,
                                          thread_pool_, inter_op_thread_pool_, data_transfer_mgr_,
-                                         logger_, profiler_, sess_options_, nullptr, allocators_);
+                                         logger_, profiler_, sess_options_,
+                                         prepacked_weights_container_, allocators_);
 
       // Pass fused function manager to subgraph
       subgraph_session_state->fused_funcs_mgr_.SetFusedFuncs(fused_funcs_mgr_);
diff --git a/onnxruntime/core/framework/session_state_utils.cc b/onnxruntime/core/framework/session_state_utils.cc
index df3a7afebc176..df11fe8302aef 100644
--- a/onnxruntime/core/framework/session_state_utils.cc
+++ b/onnxruntime/core/framework/session_state_utils.cc
@@ -455,11 +455,10 @@ common::Status SaveInputOutputNamesToNodeMapping(const onnxruntime::GraphViewer&
   // utils::CopyOneInputAcrossDevices is happy.
 
   auto& input_map = session_state.GetInputNodeInfoMap();
-  auto end_map = input_map.cend();
 
   for (const auto& graph_input : graph_inputs) {
     const auto& name = graph_input->Name();
-    if (input_map.find(name) == end_map) {
+    if (input_map.find(name) == input_map.cend()) {
       // dummy entry for an input that we didn't find a use of in the graph. log it in case that's a bug.
       // utils::CopyOneInputAcrossDevices will use the input OrtValue as is given we don't believe it's used anywhere.
       LOGS(session_state.Logger(), INFO) << (graph.IsSubgraph() ? "Subgraph" : "Graph") << " input with name "
diff --git a/onnxruntime/core/framework/tensor.cc b/onnxruntime/core/framework/tensor.cc
index a9e5038e19e02..36f03a9b1046a 100644
--- a/onnxruntime/core/framework/tensor.cc
+++ b/onnxruntime/core/framework/tensor.cc
@@ -27,80 +27,68 @@ int64_t GetSizeFromStrides(const TensorShape& shape, gsl::span<const int64_t> st
 }  // namespace
 #endif
 
-size_t Tensor::CalculateTensorStorageSize(MLDataType p_type,
-                                          const TensorShape& shape,
-                                          gsl::span<const int64_t> strides) {
-#ifdef ENABLE_STRIDED_TENSORS
-  int64_t shape_size = 1;
-  if (shape.NumDimensions() > 0 && !strides.empty()) {
-    ORT_ENFORCE(shape.NumDimensions() == strides.size(), "Length of strides doesn't match with tensor dimension size.");
-    shape_size = GetSizeFromStrides(shape, strides);
-  } else {
-    shape_size = shape.Size();
-  }
-#else
-  ORT_ENFORCE(strides.empty(), "Strided tensor is supported for training only for now.");
+size_t Tensor::CalculateTensorStorageSize(MLDataType elt_type, const TensorShape& shape) {
   int64_t shape_size = shape.Size();
-#endif
-  if (shape_size < 0) ORT_THROW("shape.Size() must >=0");
+  if (shape_size < 0)
+    ORT_THROW("shape.Size() must >=0");
 
   if (shape_size > 0) {
     SafeInt<size_t> len = 0;
-    if (!IAllocator::CalcMemSizeForArray(SafeInt<size_t>(shape_size), p_type->Size(), &len))
+    if (!IAllocator::CalcMemSizeForArray(SafeInt<size_t>(shape_size), elt_type->Size(), &len))
       ORT_THROW("tensor failed memory size calculation");
 
     return len;
   }
+
   return 0;
 }
 
-Tensor::Tensor(MLDataType p_type, const TensorShape& shape, void* p_data, const OrtMemoryInfo& alloc,
+Tensor::Tensor(MLDataType elt_type, const TensorShape& shape, void* p_data, const OrtMemoryInfo& location,
                ptrdiff_t offset, gsl::span<const int64_t> strides)
-    : alloc_info_(alloc) {
-  ORT_ENFORCE(p_type != nullptr);
-  Init(p_type, shape, p_data, nullptr, offset, strides);
+    : alloc_info_(location) {
+  ORT_ENFORCE(elt_type != nullptr);
+  Init(elt_type, shape, p_data, nullptr, offset, strides);
 }
 
-Tensor::Tensor(MLDataType p_type, const TensorShape& shape, std::shared_ptr<IAllocator> allocator,
-               gsl::span<const int64_t> strides)
+Tensor::Tensor(MLDataType elt_type, const TensorShape& shape, std::shared_ptr<IAllocator> allocator)
     : alloc_info_(allocator->Info()) {
-  ORT_ENFORCE(p_type != nullptr);
-  size_t len = Tensor::CalculateTensorStorageSize(p_type, shape, strides);
+  ORT_ENFORCE(elt_type != nullptr);
+  size_t len = Tensor::CalculateTensorStorageSize(elt_type, shape);
 
   void* p_data = nullptr;
   if (len > 0) {
     p_data = allocator->Alloc(len);
   }
-  Init(p_type, shape, p_data, allocator, 0L, strides);
+  Init(elt_type, shape, p_data, allocator, 0L);
 }
 
-Tensor::Tensor(MLDataType p_type, const TensorShape& shape, void* p_data, std::shared_ptr<IAllocator> deleter,
+Tensor::Tensor(MLDataType elt_type, const TensorShape& shape, void* p_data, std::shared_ptr<IAllocator> deleter,
                ptrdiff_t offset, gsl::span<const int64_t> strides)
     : alloc_info_(deleter->Info()) {
-  ORT_ENFORCE(p_type != nullptr);
-  Init(p_type, shape, p_data, deleter, offset, strides);
+  ORT_ENFORCE(elt_type != nullptr);
+  Init(elt_type, shape, p_data, deleter, offset, strides);
 }
 
 void Tensor::InitOrtValue(MLDataType elt_type, const TensorShape& shape, std::shared_ptr<IAllocator> allocator,
-                          OrtValue& ort_value, gsl::span<const int64_t> strides) {
-  auto p_tensor = std::make_unique<Tensor>(elt_type, shape, std::move(allocator), strides);
+                          OrtValue& ort_value) {
+  auto p_tensor = std::make_unique<Tensor>(elt_type, shape, std::move(allocator));
   auto ml_tensor = DataTypeImpl::GetType<Tensor>();
   ort_value.Init(p_tensor.release(), ml_tensor, ml_tensor->GetDeleteFunc());
 }
 
-void Tensor::InitOrtValue(MLDataType p_type, const TensorShape& shape, void* p_data, const OrtMemoryInfo& location,
+void Tensor::InitOrtValue(MLDataType elt_type, const TensorShape& shape, void* p_data, const OrtMemoryInfo& location,
                           OrtValue& ort_value, ptrdiff_t offset, gsl::span<const int64_t> strides) {
   auto ml_tensor = DataTypeImpl::GetType<Tensor>();
-  auto p_tensor = std::make_unique<Tensor>(p_type, shape, p_data, location, offset, strides);
+  auto p_tensor = std::make_unique<Tensor>(elt_type, shape, p_data, location, offset, strides);
   ort_value.Init(p_tensor.release(), ml_tensor, ml_tensor->GetDeleteFunc());
 }
 
-void Tensor::InitOrtValue(MLDataType p_type, const TensorShape& shape,
+void Tensor::InitOrtValue(MLDataType elt_type, const TensorShape& shape,
                           void* p_data, std::shared_ptr<IAllocator> allocator,
                           OrtValue& ort_value, ptrdiff_t offset,
                           gsl::span<const int64_t> strides) {
   auto ml_tensor = DataTypeImpl::GetType<Tensor>();
-  auto p_tensor = std::make_unique<Tensor>(p_type, shape, p_data, std::move(allocator), offset, strides);
+  auto p_tensor = std::make_unique<Tensor>(elt_type, shape, p_data, std::move(allocator), offset, strides);
   ort_value.Init(p_tensor.release(), ml_tensor, ml_tensor->GetDeleteFunc());
 }
 
@@ -123,27 +111,31 @@ size_t Tensor::SizeInBytes() const {
   return ret;
 }
 
-void Tensor::Init(MLDataType p_type, const TensorShape& shape, void* p_raw_data, AllocatorPtr deleter, ptrdiff_t offset,
-                  gsl::span<const int64_t> strides) {
+void Tensor::Init(MLDataType elt_type, const TensorShape& shape, void* p_raw_data, AllocatorPtr deleter,
+                  ptrdiff_t offset, gsl::span<const int64_t> strides) {
   int64_t shape_size = shape.Size();
-  if (shape_size < 0) ORT_THROW("shape.Size() must >=0");
-  dtype_ = p_type->AsPrimitiveDataType();
+  if (shape_size < 0)
+    ORT_THROW("shape.Size() must >=0");
+
+  dtype_ = elt_type->AsPrimitiveDataType();
   ORT_ENFORCE(dtype_ != nullptr,
-              "Tensor is expected to contain one of the primitive data types. Got: ", DataTypeImpl::ToString(p_type));
+              "Tensor is expected to contain one of the primitive data types. Got: ",
+              DataTypeImpl::ToString(elt_type));
   shape_ = shape;
   p_data_ = p_raw_data;
-  // if caller passed in a deleter, that means this tensor own this buffer
-  // we will release the buffer when this tensor is deconstructed.
+  // if caller passed in a deleter we now own p_data_ and must free it in the dtor
   buffer_deleter_ = std::move(deleter);
   // for string tensors, if this tensor own the buffer (caller passed in the deleter)
   // do the placement new for strings on pre-allocated buffer.
   if (buffer_deleter_ && IsDataTypeString()) {
     utils::ConstructStrings(p_data_, shape_size);
   }
+
   byte_offset_ = offset;
+
 #ifdef ENABLE_STRIDED_TENSORS
   if (shape.NumDimensions() > 0 && !strides.empty()) {
-    ORT_ENFORCE(shape.NumDimensions() == strides.size(), "Length of strides doesn't match with tensor dimension size.");
+    ORT_ENFORCE(shape.NumDimensions() == strides.size(), "Length of strides doesn't match tensor dimension size.");
     strides_.assign(strides.begin(), strides.end());
     is_contiguous_ = CheckIsContiguous();
   }
@@ -156,13 +148,21 @@ Tensor::Tensor(Tensor&& other) noexcept
     : p_data_(other.p_data_),
       buffer_deleter_(other.buffer_deleter_),
       shape_(other.shape_),
+#ifdef ENABLE_STRIDED_TENSORS
+      strides_(other.strides_),
+      is_contiguous_(other.is_contiguous_),
+#endif
       dtype_(other.dtype_),
       alloc_info_(other.alloc_info_),
       byte_offset_(other.byte_offset_) {
-  other.dtype_ = DataTypeImpl::GetType<float>()->AsPrimitiveDataType();
-  other.shape_ = TensorShape(std::vector<int64_t>(1, 0));
   other.p_data_ = nullptr;
   other.buffer_deleter_ = nullptr;
+  other.dtype_ = DataTypeImpl::GetType<float>()->AsPrimitiveDataType();
+  other.shape_ = TensorShape(std::vector<int64_t>(1, 0));
+#ifdef ENABLE_STRIDED_TENSORS
+  other.strides_ = {};
+  other.is_contiguous_ = true;
+#endif
   other.byte_offset_ = 0;
 }
 
@@ -170,19 +170,28 @@ Tensor& Tensor::operator=(Tensor&& other) noexcept {
   if (this != &other) {
     ReleaseBuffer();
 
-    dtype_ = other.dtype_;
+    p_data_ = other.p_data_;
+    buffer_deleter_ = other.buffer_deleter_;
     shape_ = other.shape_;
+#ifdef ENABLE_STRIDED_TENSORS
+    strides_ = other.strides_;
+    is_contiguous_ = other.is_contiguous_;
+#endif
+    dtype_ = other.dtype_;
     alloc_info_ = other.alloc_info_;
     byte_offset_ = other.byte_offset_;
-    p_data_ = other.p_data_;
-    buffer_deleter_ = other.buffer_deleter_;
 
-    other.dtype_ = DataTypeImpl::GetType<float>()->AsPrimitiveDataType();
-    other.shape_ = TensorShape(std::vector<int64_t>(1, 0));
     other.p_data_ = nullptr;
-    other.byte_offset_ = 0;
     other.buffer_deleter_ = nullptr;
+    other.shape_ = TensorShape(std::vector<int64_t>(1, 0));
+#ifdef ENABLE_STRIDED_TENSORS
+    other.strides_ = {};
+    other.is_contiguous_ = true;
+#endif
+    other.dtype_ = DataTypeImpl::GetType<float>()->AsPrimitiveDataType();
+    other.byte_offset_ = 0;
   }
+
   return *this;
 }
 
diff --git a/onnxruntime/core/framework/tensor_type_and_shape.cc b/onnxruntime/core/framework/tensor_type_and_shape.cc
index f3e1acbbe523d..6e11bfe1ac8ea 100644
--- a/onnxruntime/core/framework/tensor_type_and_shape.cc
+++ b/onnxruntime/core/framework/tensor_type_and_shape.cc
@@ -85,6 +85,16 @@ ORT_API_STATUS_IMPL(OrtApis::GetSymbolicDimensions,
   return nullptr;
 }
 
+ORT_API_STATUS_IMPL(OrtApis::SetSymbolicDimensions,
+                    _In_ struct OrtTensorTypeAndShapeInfo* info,
+                    _In_ const char** names, _In_ size_t dim_params_length) {
+  info->dim_params.clear();
+  for (size_t idx = 0; idx < dim_params_length; ++idx) {
+    info->dim_params.push_back(names[idx]);
+  }
+  return nullptr;
+}
+
 ORT_API_STATUS_IMPL(OrtApis::GetTensorShapeElementCount,
                     _In_ const OrtTensorTypeAndShapeInfo* this_ptr, _Out_ size_t* out) {
   API_IMPL_BEGIN
diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc
index 08ed811d9ac38..fd32aaedcc2ee 100644
--- a/onnxruntime/core/framework/tensorprotoutils.cc
+++ b/onnxruntime/core/framework/tensorprotoutils.cc
@@ -87,6 +87,7 @@ bool operator!=(const ONNX_NAMESPACE::TensorShapeProto_Dimension& l,
 
 }  // namespace ONNX_NAMESPACE
 
+namespace onnxruntime {
 namespace {
 
 // This function doesn't support string tensors
@@ -162,9 +163,9 @@ static Status GetExternalDataInfo(const ONNX_NAMESPACE::TensorProto& tensor_prot
 // Uses the tensor_proto_dir to construct the full path for external data. If tensor_proto_dir == nullptr
 // then uses the current directory instead.
 // This function does not unpack string_data of an initializer tensor
-static Status ReadExternalDataForTensor(const ONNX_NAMESPACE::TensorProto& tensor_proto,
-                                        const ORTCHAR_T* tensor_proto_dir,
-                                        std::vector<uint8_t>& unpacked_tensor) {
+Status ReadExternalDataForTensor(const ONNX_NAMESPACE::TensorProto& tensor_proto,
+                                 const ORTCHAR_T* tensor_proto_dir,
+                                 std::vector<uint8_t>& unpacked_tensor) {
   std::basic_string<ORTCHAR_T> external_file_path;
   onnxruntime::FileOffsetType file_offset;
   SafeInt<size_t> tensor_byte_size;
@@ -184,10 +185,52 @@ static Status ReadExternalDataForTensor(const ONNX_NAMESPACE::TensorProto& tenso
 
   return Status::OK();
 }
+
+// TODO(unknown): Change the current interface to take Path object for model path
+// so that validating and manipulating path for reading external data becomes easy
+Status TensorProtoToOrtValueImpl(const Env& env, const ORTCHAR_T* model_path,
+                                 const ONNX_NAMESPACE::TensorProto& tensor_proto,
+                                 const MemBuffer* m, AllocatorPtr alloc,
+                                 OrtValue& value) {
+  if (m && m->GetBuffer() == nullptr) {
+    return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "MemBuffer has not been allocated.");
+  }
+
+  // to construct a Tensor with std::string we need to pass an allocator to the Tensor ctor
+  // as the contents of each string needs to be allocated and freed separately.
+  ONNXTensorElementDataType ele_type = utils::GetTensorElementType(tensor_proto);
+  if (ele_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING && (m || !alloc)) {
+    return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "string tensor requires allocator to be provided.");
+  }
+
+  // Note: We permit an empty tensor_shape_vec, and treat it as a scalar (a tensor of size 1).
+  TensorShape tensor_shape = GetTensorShapeFromTensorProto(tensor_proto);
+  const DataTypeImpl* const type = DataTypeImpl::TensorTypeFromONNXEnum(tensor_proto.data_type())->GetElementType();
+
+  std::unique_ptr<Tensor> tensor;
+
+  if (m) {
+    tensor = std::make_unique<Tensor>(type, tensor_shape, m->GetBuffer(), m->GetAllocInfo());
+
+    if (tensor->SizeInBytes() > m->GetLen()) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "The preallocated buffer is too small. Requires ",
+                             tensor->SizeInBytes(), ", Got ", m->GetLen());
+    }
+  } else {
+    tensor = std::make_unique<Tensor>(type, tensor_shape, alloc);
+  }
+
+  ORT_RETURN_IF_ERROR(TensorProtoToTensor(env, model_path, tensor_proto, *tensor));
+
+  auto ml_tensor = DataTypeImpl::GetType<Tensor>();
+  value.Init(tensor.release(), ml_tensor, ml_tensor->GetDeleteFunc());
+  return Status::OK();
+}
+
 }  // namespace
 
-namespace onnxruntime {
 namespace utils {
+
 #if !defined(ORT_MINIMAL_BUILD)
 static Status UnpackTensorWithExternalDataImpl(const ONNX_NAMESPACE::TensorProto& tensor,
                                                const ORTCHAR_T* tensor_proto_dir,
@@ -810,7 +853,7 @@ Status GetExtDataFromTensorProto(const Env& env, const ORTCHAR_T* model_path,
  * @param env
  * @param model_path
  * @param tensor_proto  tensor data in protobuf format
- * @param tensorp       pre-allocated tensor object, where we store the data
+ * @param tensor        pre-allocated tensor object, where we store the data
  * @return
  */
 Status TensorProtoToTensor(const Env& env, const ORTCHAR_T* model_path,
@@ -824,7 +867,7 @@ Status TensorProtoToTensor(const Env& env, const ORTCHAR_T* model_path,
   const DataTypeImpl* const source_type = DataTypeImpl::TensorTypeFromONNXEnum(tensor_proto.data_type())->GetElementType();
   if (source_type->Size() > tensor.DataType()->Size()) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "TensorProto type ", DataTypeImpl::ToString(source_type),
-                           " can not be writen into Tensor type ", DataTypeImpl::ToString(tensor.DataType()));
+                           " can not be written into Tensor type ", DataTypeImpl::ToString(tensor.DataType()));
   }
 
   // find raw data in proto buf
@@ -900,44 +943,18 @@ Status TensorProtoToTensor(const Env& env, const ORTCHAR_T* model_path,
   return Status::OK();
 }
 
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 6239)
-#endif
-// TODO: Change the current interface to take Path object for model path
-// so that validating and manipulating path for reading external data becomes easy
-Status TensorProtoToMLValue(const Env& env, const ORTCHAR_T* model_path,
-                            const ONNX_NAMESPACE::TensorProto& tensor_proto,
-                            const MemBuffer& m, OrtValue& value) {
-  if (m.GetBuffer() == nullptr) {
-    return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT,
-                  "TensorProtoToMLValue() must take a pre-allocated MemBuffer!");
-  }
-
-  ONNXTensorElementDataType ele_type = utils::GetTensorElementType(tensor_proto);
-  if (ele_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING) {
-    return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "string tensor can not use pre-allocated buffer");
-  }
-
-  // Note: We permit an empty tensor_shape_vec, and treat it as a scalar (a tensor of size 1).
-  TensorShape tensor_shape = GetTensorShapeFromTensorProto(tensor_proto);
-  const DataTypeImpl* const type = DataTypeImpl::TensorTypeFromONNXEnum(tensor_proto.data_type())->GetElementType();
-  std::unique_ptr<Tensor> tensorp = std::make_unique<Tensor>(type, tensor_shape, m.GetBuffer(), m.GetAllocInfo());
-  if (tensorp->SizeInBytes() > m.GetLen()) {
-    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "The preallocated buffer is too small. Requires ",
-                           tensorp->SizeInBytes(), ", Got ", m.GetLen());
-  }
-
-  ORT_RETURN_IF_ERROR(TensorProtoToTensor(env, model_path, tensor_proto, *tensorp));
+Status TensorProtoToOrtValue(const Env& env, const ORTCHAR_T* model_path,
+                             const ONNX_NAMESPACE::TensorProto& tensor_proto,
+                             const MemBuffer& m, OrtValue& value) {
+  return TensorProtoToOrtValueImpl(env, model_path, tensor_proto, &m, nullptr, value);
+}
 
-  auto ml_tensor = DataTypeImpl::GetType<Tensor>();
-  value.Init(tensorp.release(), ml_tensor, ml_tensor->GetDeleteFunc());
-  return Status::OK();
+Status TensorProtoToOrtValue(const Env& env, const ORTCHAR_T* model_path,
+                             const ONNX_NAMESPACE::TensorProto& tensor_proto,
+                             AllocatorPtr alloc, OrtValue& value) {
+  return TensorProtoToOrtValueImpl(env, model_path, tensor_proto, nullptr, alloc, value);
 }
-#ifdef _MSC_VER
-#pragma warning(pop)
-#pragma warning(disable : 6239)
-#endif
+
 #define CASE_TYPE(X)                             \
   case ONNX_NAMESPACE::TensorProto_DataType_##X: \
     return ONNX_TENSOR_ELEMENT_DATA_TYPE_##X;
diff --git a/onnxruntime/core/framework/tensorprotoutils.h b/onnxruntime/core/framework/tensorprotoutils.h
index 6f1b60dd2982e..000502ba47594 100644
--- a/onnxruntime/core/framework/tensorprotoutils.h
+++ b/onnxruntime/core/framework/tensorprotoutils.h
@@ -39,13 +39,28 @@ TensorShape GetTensorShapeFromTensorShapeProto(const ONNX_NAMESPACE::TensorShape
 TensorShape GetTensorShapeFromTensorProto(const ONNX_NAMESPACE::TensorProto& tensor_proto);
 
 /**
- * deserialize a TensorProto into a preallocated memory buffer.
- * \param tensor_proto_path A local file path of where the 'input' was loaded from. Can be NULL if the tensor proto doesn't
- *                        have any external data or it was loaded from current working dir. This path could be either a
- *                        relative path or an absolute path.
+ * deserialize a TensorProto into a preallocated memory buffer on CPU.
+ * \param tensor_proto_path A local file path of where the 'input' was loaded from.
+ *                          Can be NULL if the tensor proto doesn't have external data or it was loaded from
+ *                          the current working dir. This path could be either a relative path or an absolute path.
+ * \return Status::OK on success with 'value' containing the Tensor in CPU based memory.
  */
-common::Status TensorProtoToMLValue(const Env& env, const ORTCHAR_T* tensor_proto_path,
-                                    const ONNX_NAMESPACE::TensorProto& input, const MemBuffer& m, OrtValue& value);
+common::Status TensorProtoToOrtValue(const Env& env, const ORTCHAR_T* tensor_proto_path,
+                                     const ONNX_NAMESPACE::TensorProto& input,
+                                     const MemBuffer& m, OrtValue& value);
+
+/**
+ * deserialize a TensorProto into a buffer on CPU allocated using 'alloc'.
+ * \param tensor_proto_path A local file path of where the 'input' was loaded from.
+ *                          Can be NULL if the tensor proto doesn't have external data or it was loaded from
+ *                          the current working dir. This path could be either a relative path or an absolute path.
+ * \param alloc             Allocator to use for allocating the buffer. Must allocate CPU based memory.
+ * \return Status::OK on success with 'value' containing the Tensor in CPU based memory.
+ */
+common::Status TensorProtoToOrtValue(const Env& env, const ORTCHAR_T* tensor_proto_path,
+                                     const ONNX_NAMESPACE::TensorProto& input,
+                                     AllocatorPtr alloc, OrtValue& value);
+
 /**
  * @brief Deserialize a TensorProto into a preallocated empty Tensor
  * @param env
diff --git a/onnxruntime/core/framework/tunable.h b/onnxruntime/core/framework/tunable.h
index 96b4cc53a022c..6d2dd641f6bc6 100644
--- a/onnxruntime/core/framework/tunable.h
+++ b/onnxruntime/core/framework/tunable.h
@@ -232,14 +232,15 @@ class TunableOp {
     return timer.Duration() / num_iter;
   }
 
-  static bool IsSupported(Op<ParamsT>& op, const ParamsT* param) {
-    Status status = op.IsSupported(param);
+  // Filter all Status, only OK and TUNABLE_OP_UNSUPPORTED is left, other error status will be thrown, and to be
+  // processed by onnxruntime. We return Status to avoid the construction of op and params signature string.
+  static Status IsSupported(Op<ParamsT>& op, const ParamsT* params) {
+    Status status = op.IsSupported(params);
     if (status.Category() == common::StatusCategory::NONE && status.Code() == common::StatusCode::INVALID_ARGUMENT) {
-      LOGS_DEFAULT(VERBOSE) << "unsupported reason: " << status.ErrorMessage();
-      return false;
+      return status;
     }
     ORT_THROW_IF_ERROR(status);
-    return true;
+    return status;
   }
 
  protected:
@@ -250,9 +251,9 @@ class TunableOp {
   int FindFastestImpl(const ParamsT* params, const std::vector<Op<ParamsT>>& candidates) {
     ITuningContext* ctx = params->TuningContext();
     auto op_sig = Signature();
-    auto param_sig = params->Signature();
-    LOGS_DEFAULT(VERBOSE) << "FindFastestImpl for " << op_sig << '(' << param_sig << ')';
-    auto min_time = std::numeric_limits<double>::infinity();
+    auto params_sig = params->Signature();
+    LOGS_DEFAULT(VERBOSE) << "finding fastest for " << op_sig << '(' << params_sig << ')';
+    auto min_duration_ms = std::numeric_limits<double>::infinity();
     int id = -1;
 
     constexpr const int max_tuning_iter = 100;
@@ -260,30 +261,32 @@ class TunableOp {
 
     for (size_t i = 0; i < candidates.size(); i++) {
       auto& candidate = const_cast<Op<ParamsT>&>(candidates[i]);
-      if (!IsSupported(candidate, params)) {
-        LOGS_DEFAULT(VERBOSE) << "FindFastestImpl found unsupported " << op_sig << '(' << param_sig << ") id=" << i;
+      auto status = IsSupported(candidate, params);
+      if (!status.IsOK()) {
+        LOGS_DEFAULT(VERBOSE) << "├──unsupported id=" << i << ", " << op_sig << '(' << params_sig << ")";
+        LOGS_DEFAULT(VERBOSE) << "│  reason: " << status.ErrorMessage();
         continue;
       }
 
       WarmUp(candidate, params);
 
       auto approx_duration = Profile(candidate, params, approx_num_iter);
-      if (approx_duration > 2 * min_time) {
-        LOGS_DEFAULT(VERBOSE) << "FindFastestImpl skip slow instance " << op_sig << '(' << param_sig << ") id=" << i;
+      if (approx_duration > 2 * min_duration_ms) {
+        LOGS_DEFAULT(VERBOSE) << "├──skip slow instance id=" << i;
         continue;
       }
       int tuning_iter = std::max(1, int(std::min(double(max_tuning_iter), ctx->GetMaxTuningDurationMs() / approx_duration)));
 
-      LOGS_DEFAULT(VERBOSE) << "FindFastestImpl run instance " << op_sig << '(' << param_sig << ") id=" << i << " " << tuning_iter << " times.";
-
-      auto time = Profile(candidate, params, tuning_iter);
-      if (time < min_time) {
-        min_time = time;
+      auto duration_ms = Profile(candidate, params, tuning_iter);
+      if (duration_ms < min_duration_ms) {
+        LOGS_DEFAULT(VERBOSE) << "├──found better instance, new best id=" << i << ", old id=" << id << ". "
+                              << duration_ms << "ms, " << tuning_iter << " iters.";
+        min_duration_ms = duration_ms;
         id = static_cast<int>(i);
       }
     }
     ORT_ENFORCE(id >= 0, "Could not find viable op");
-    LOGS_DEFAULT(VERBOSE) << "FindFastestImpl for " << op_sig << '(' << param_sig << ") found fastest with id=" << id;
+    LOGS_DEFAULT(VERBOSE) << "└──found fastest with id=" << id << " for " << op_sig << '(' << params_sig << ")";
     std::this_thread::sleep_for(std::chrono::milliseconds(50));
     return id;
   }
diff --git a/onnxruntime/core/framework/utils.cc b/onnxruntime/core/framework/utils.cc
index b6dd8517341bb..23fe5e1cd3d96 100644
--- a/onnxruntime/core/framework/utils.cc
+++ b/onnxruntime/core/framework/utils.cc
@@ -68,6 +68,7 @@ bool ProviderIsCpuBased(const std::string& provider_type) {
          provider_type == onnxruntime::kSnpeExecutionProvider ||
          provider_type == onnxruntime::kQnnExecutionProvider ||
          provider_type == onnxruntime::kXnnpackExecutionProvider ||
+         provider_type == onnxruntime::kAzureExecutionProvider ||
          provider_type == onnxruntime::utils::kInternalTestingExecutionProvider;
 }
 
@@ -1024,7 +1025,32 @@ bool IsInputOnCpu(const Node& node, const KernelCreateInfo* p_kci, size_t index)
       overload_name = attrs.at("overload_name").s();
     }
 
-    return !contrib::aten_ops::ATenOperatorExecutor::Instance().IsTensorArgument(op_name, overload_name, index);
+    return contrib::aten_ops::ATenOperatorExecutor::Instance().IsCpuArgument(op_name, overload_name, index, true);
+  }
+#else
+  ORT_UNUSED_PARAMETER(node);
+#endif
+
+  return false;
+}
+
+bool IsOutputOnCpu(const Node& node, const KernelCreateInfo* p_kci, size_t index) {
+  if (p_kci && p_kci->kernel_def->IsOutputOnCpu(index)) {
+    return true;
+  }
+
+#ifdef ENABLE_ATEN
+  if (node.GetExecutionProviderType() == kCudaExecutionProvider && node.OpType() == "ATen" &&
+      node.Domain() == kPytorchAtenDomain) {
+    const auto& attrs = node.GetAttributes();
+    ORT_ENFORCE(utils::HasString(attrs.at("operator")));
+    std::string op_name = attrs.at("operator").s();
+    std::string overload_name = "";
+    if (attrs.find("overload_name") != attrs.end() && utils::HasString(attrs.at("overload_name"))) {
+      overload_name = attrs.at("overload_name").s();
+    }
+
+    return contrib::aten_ops::ATenOperatorExecutor::Instance().IsCpuArgument(op_name, overload_name, index, false);
   }
 #else
   ORT_UNUSED_PARAMETER(node);
diff --git a/onnxruntime/core/framework/utils.h b/onnxruntime/core/framework/utils.h
index ea6a629f87cb8..f0b1b9109d405 100644
--- a/onnxruntime/core/framework/utils.h
+++ b/onnxruntime/core/framework/utils.h
@@ -121,6 +121,7 @@ common::Status ExecuteSubgraph(const SessionState& session_state, const FeedsFet
                                bool sync_subgraph_fetches = false);
 
 bool IsInputOnCpu(const Node& node, const KernelCreateInfo* p_kci, size_t index);
+bool IsOutputOnCpu(const Node& node, const KernelCreateInfo* p_kci, size_t index);
 
 template <typename T>
 constexpr ONNXTensorElementDataType GetONNXTensorElementDataType() {
diff --git a/onnxruntime/core/graph/contrib_ops/bert_defs.cc b/onnxruntime/core/graph/contrib_ops/bert_defs.cc
index e5956a575d73d..b97fb0d2899fc 100644
--- a/onnxruntime/core/graph/contrib_ops/bert_defs.cc
+++ b/onnxruntime/core/graph/contrib_ops/bert_defs.cc
@@ -171,10 +171,7 @@ void MultiHeadAttentionTypeAndShapeInference(ONNX_NAMESPACE::InferenceContext& c
       *output_shape.add_dim() = query_dims[1];
       *output_shape.add_dim() = query_dims[2] * query_dims[4];
       updateOutputShape(ctx, 0, output_shape);
-      return;
-    }
-
-    if (hasInputShape(ctx, 2)) {
+    } else if (hasInputShape(ctx, 2)) {
       auto& value_shape = getInputShape(ctx, 2);
       auto& value_dims = value_shape.dim();
       if (value_dims.size() != 3 && value_dims.size() != 4) {
@@ -192,10 +189,7 @@ void MultiHeadAttentionTypeAndShapeInference(ONNX_NAMESPACE::InferenceContext& c
                                     ? (dmmha_packing ? value_dims[2] / 3 : value_dims[2])
                                     : value_dims[1] * value_dims[3];
       updateOutputShape(ctx, 0, output_shape);
-      return;
-    }
-
-    if (hasInputShape(ctx, 1)) {
+    } else if (hasInputShape(ctx, 1)) {
       auto& key_shape = getInputShape(ctx, 1);
       if (key_shape.dim().size() == 5) {  // packed KV
         ONNX_NAMESPACE::propagateShapeAndTypeFromFirstInput(ctx);
@@ -217,7 +211,7 @@ void MultiHeadAttentionTypeAndShapeInference(ONNX_NAMESPACE::InferenceContext& c
         propagateElemTypeFromInputToOutput(ctx, static_cast<size_t>(past_key_index) + 1, 2);
       } else {
         if (sequence_length > 0 && past_dims[2].has_dim_value()) {
-          int64_t total_sequence_length = sequence_length + past_shape.dim(3).dim_value();
+          int64_t total_sequence_length = sequence_length + past_dims[2].dim_value();
 
           ONNX_NAMESPACE::TensorShapeProto present_shape;
           for (auto& dim : past_dims) {
@@ -233,6 +227,59 @@ void MultiHeadAttentionTypeAndShapeInference(ONNX_NAMESPACE::InferenceContext& c
   }
 }
 
+void GroupQueryAttentionTypeAndShapeInference(ONNX_NAMESPACE::InferenceContext& ctx, int past_key_index) {
+  // Output 0 has shape (batch_size, sequence_length, hidden_size)
+
+  // Q, K and V:
+  //   Input 0 (query) has shape (batch_size, sequence_length, hidden_size)
+  //   Input 1 (key) has shape (batch_size, kv_sequence_length, kv_hidden_size)
+  //   Input 2 (value) has shape (batch_size, kv_sequence_length, kv_hidden_size)
+
+  // Type inference
+  ONNX_NAMESPACE::propagateElemTypeFromInputToOutput(ctx, 0, 0);
+
+  // Shape inference
+  if (hasInputShape(ctx, 0)) {
+    auto& query_shape = getInputShape(ctx, 0);
+    auto& query_dims = query_shape.dim();
+
+    if (query_dims.size() != 3) {
+      fail_shape_inference("Inputs 0 (query) shall be 3 dimensions");
+    }
+
+    if (hasInputShape(ctx, 2)) {
+      auto& value_shape = getInputShape(ctx, 2);
+      auto& value_dims = value_shape.dim();
+      if (value_dims.size() != 3) {
+        fail_shape_inference("Inputs 2 (value) shall be 3 dimensions");
+      }
+
+      ONNX_NAMESPACE::TensorShapeProto output_shape;
+      *output_shape.add_dim() = query_dims[0];
+      *output_shape.add_dim() = query_dims[1];
+      *output_shape.add_dim() = query_dims[2];
+      updateOutputShape(ctx, 0, output_shape);
+      return;
+    } else {
+      fail_shape_inference("Missing input 2 (value)");
+    }
+  }
+
+  if (ctx.getNumOutputs() > 1) {  // has present output
+    if (hasInputShape(ctx, past_key_index)) {
+      auto& past_shape = getInputShape(ctx, past_key_index);
+      auto& past_dims = past_shape.dim();
+      if (past_dims.size() != 4) {
+        fail_shape_inference("The past_key input shall be 4 dimensions");
+      }
+      ONNX_NAMESPACE::propagateElemTypeFromInputToOutput(ctx, past_key_index, 1);
+      ONNX_NAMESPACE::propagateElemTypeFromInputToOutput(ctx, static_cast<size_t>(past_key_index) + 1, 2);
+      ONNX_NAMESPACE::propagateShapeFromInputToOutput(ctx, past_key_index, 1);
+      ONNX_NAMESPACE::propagateShapeFromInputToOutput(ctx, static_cast<size_t>(past_key_index) + 1, 2);
+    }
+  }
+}
+
 constexpr const char* Attention_ver1_doc = R"DOC(
 Multi-Head Attention that can be either unidirectional (like GPT-2) or bidirectional (like BERT).
 
@@ -746,6 +793,10 @@ ONNX_MS_OPERATOR_SET_SCHEMA(
               "The value to be filled in the attention mask. Default value is -10000.0f",
               AttributeProto::FLOAT,
               OPTIONAL_VALUE)
+        .Attr("output_qk",
+              "Need output the cross attention MatMul(Q, K)",
+              AttributeProto::INT,
+              OPTIONAL_VALUE)
         .Input(0,
                "query",
                "Query with shape (batch_size, 1, hidden_size) or packed QKV with shape "
@@ -823,7 +874,7 @@ ONNX_MS_OPERATOR_SET_SCHEMA(
                 "T")
         .Output(1,
                 "present_key",
-                "past state for key with shape (batch_size, num_heads, total_sequence_length, head_size). "
+                "present state for key with shape (batch_size, num_heads, total_sequence_length, head_size). "
                 "If past_present_share_buffer is set, "
                 "its shape is (batch_size, num_heads, max_sequence_length, head_size), "
                 "while effective_seq_length = (past_sequence_length + kv_sequence_length).",
@@ -831,12 +882,18 @@ ONNX_MS_OPERATOR_SET_SCHEMA(
                 OpSchema::Optional)
         .Output(2,
                 "present_value",
-                "past state for value with shape (batch_size, num_heads, total_sequence_length, head_size). "
+                "present state for value with shape (batch_size, num_heads, total_sequence_length, head_size). "
                 "If past_present_share_buffer is set, "
                 "its shape is (batch_size, num_heads, max_sequence_length, head_size), "
                 "while effective_seq_length = (past_sequence_length + kv_sequence_length).",
                 "T",
                 OpSchema::Optional)
+        .Output(3,
+                "qk",
+                "normalized Q * K, of shape (batch_size, num_heads, 1, head_size). ",
+                "V",
+                OpSchema::Optional)
+        .TypeConstraint("V", {"tensor(float)"}, "Constrain qk output types to float32 tensors.")
         .TypeConstraint("T",
                         {"tensor(float)", "tensor(float16)"},
                         "Constrain input and output types to float tensors.")
@@ -889,7 +946,8 @@ ONNX_MS_OPERATOR_SET_SCHEMA(
                OpSchema::Optional)
         .Input(4,
                "key_padding_mask",
-               "Key padding mask with shape (batch_size) or (3 * batch_size + 2) or (batch_size, kv_sequence_length)",
+               "Key padding mask with shape (batch_size), (3 * batch_size + 2), (batch_size, kv_sequence_length), (batch_size, total_sequence_length), "
+               "or (batch_size, sequence_length, total_sequence_length)",
                "M",
                OpSchema::Optional)
         .Input(5,
@@ -930,6 +988,80 @@ ONNX_MS_OPERATOR_SET_SCHEMA(
           MultiHeadAttentionTypeAndShapeInference(ctx, 6);
         }));
 
+constexpr const char* GroupQueryAttention_ver1_doc = R"DOC(
+Group Query Self/Cross Attention.
+
+Supports different number of heads for q and kv. Only supports causal or local attention.
+)DOC";
+
+ONNX_MS_OPERATOR_SET_SCHEMA(
+    GroupQueryAttention, 1,
+    OpSchema()
+        .SetDoc(GroupQueryAttention_ver1_doc)
+        .Attr("num_heads", "Number of attention heads for q", AttributeProto::INT)
+        .Attr("kv_num_heads", "Number of attention heads for k and v", AttributeProto::INT)
+        .Attr("scale",
+              "Custom scale will be used if specified. Default value is 1/sqrt(head_size)",
+              AttributeProto::FLOAT,
+              OPTIONAL_VALUE)
+        .Attr("local_window_size",
+              "left_window_size for local attention (like Mistral). Default value is -1 meaning unused.",
+              AttributeProto::INT,
+              static_cast<int64_t>(-1))
+        .Input(0,
+               "query",
+               "Query with shape (batch_size, sequence_length, hidden_size)",
+               "T")
+        .Input(1,
+               "key",
+               "Key with shape (batch_size, kv_sequence_length, kv_hidden_size) ",
+               "T")
+        .Input(2,
+               "value",
+               "Value with shape (batch_size, kv_sequence_length, kv_hidden_size)",
+               "T")
+        .Input(3,
+               "past_key",
+               "past state key with support for format BNSH. When past_key uses same tensor as present_key"
+               "(k-v cache), it is of length max_sequence_length... otherwise of length past_sequence_length.",
+               "T",
+               OpSchema::Optional)
+        .Input(4,
+               "past_value",
+               "past state value with support for format BNSH. When past_value uses same tensor as present_value"
+               "(k-v cache), it is of length max_sequence_length... otherwise of length past_sequence_length.",
+               "T",
+               OpSchema::Optional)
+        .Input(5,
+               "seqlens_k",
+               "1d Tensor of shape (batch_size). Indicates past sequence lengths for token generation case.",
+               "M")
+        .Input(6,
+               "total_sequence_length",
+               "Scalar tensor of total sequence length (past + new).",
+               "M")
+        .Output(0,
+                "output",
+                "3D output tensor with shape (batch_size, sequence_length, hidden_size)",
+                "T")
+        .Output(1,
+                "present_key",
+                "present state key with support for format BNSH. When past_key uses same tensor as present_key"
+                "(k-v buffer), it is of length max_sequence_length... otherwise of length past_sequence_length +"
+                "kv_sequence_length.",
+                "T")
+        .Output(2,
+                "present_value",
+                "present state value with support for format BNSH. When past_value uses same tensor as present_value"
+                "(k-v buffer), it is of length max_sequence_length... otherwise of length past_sequence_length +"
+                "kv_sequence_length.",
+                "T")
+        .TypeConstraint("T", {"tensor(float16)"}, "Constrain input and output to float tensors.")
+        .TypeConstraint("M", {"tensor(int32)"}, "Constrain mask to int tensor.")
+        .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
+          GroupQueryAttentionTypeAndShapeInference(ctx, 3);
+        }));
+
 constexpr const char* Longformer_Attention_doc = R"DOC(
 Longformer Self Attention with a local context and a global context. Tokens attend locally: Each token
 attends to its W previous tokens and W succeeding tokens with W being the window length. A selected few tokens
@@ -994,6 +1126,49 @@ ONNX_MS_OPERATOR_SET_SCHEMA(
           DecoderAttentionTypeAndShapeInference(ctx);
         }));
 
+constexpr const char* RotaryEmbedding_ver1_doc = R"DOC(
+RotaryEmbedding is the implementation of rotary positional embeddings (RoPE). The positions are represented as rotation matrices
+that are multiplied to query and key before the inner product of query and key is taken.
+)DOC";
+ONNX_MS_OPERATOR_SET_SCHEMA(
+    RotaryEmbedding, 1,
+    OpSchema()
+        .SetDoc(RotaryEmbedding_ver1_doc)
+        .Attr("scale",
+              "Custom scale will be used if specified. Default value is 1.0",
+              AttributeProto::FLOAT,
+              OPTIONAL_VALUE)
+        .Attr("interleaved",
+              "Rotate using interleaved pattern. Default value is 0 (False).",
+              AttributeProto::INT,
+              OPTIONAL_VALUE)
+        .Input(0,
+               "input",
+               "3D tensor with shape (batch_size, sequence_length, hidden_size) or 4D with shape (batch_size, num_heads, sequence_length, head_size)",
+               "T")
+        .Input(1,
+               "position_ids",
+               "1D tensor with shape (1) or 2D tensor with shape (batch_size, sequence_length)",
+               "M")
+        .Input(2,
+               "cos_cache",
+               "2D tensor with shape (max_sequence_length, head_size / 2).",
+               "T")
+        .Input(3,
+               "sin_cache",
+               "2D tensor with shape (max_sequence_length, head_size / 2).",
+               "T")
+        .Output(0,
+                "output",
+                "tensor with same shape as input.",
+                "T")
+        .TypeConstraint("T", {"tensor(float)", "tensor(float16)"}, "Constrain input and output types to float tensors.")
+        .TypeConstraint("M", {"tensor(int64)"}, "Constrain input and output types to integer tensors")
+        .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
+          propagateElemTypeFromInputToOutput(ctx, 0, 0);
+          propagateShapeFromInputToOutput(ctx, 0, 0);
+        }));
+
 constexpr const char* EmbedLayerNormalization_ver1_doc = R"DOC(
 EmbedLayerNormalization is the fusion of embedding layer in BERT model, with optional mask processing.
 The embedding layer takes input_ids (word IDs) and segment_ids (sentence IDs) to look up word_embedding, position_embedding,
diff --git a/onnxruntime/core/graph/contrib_ops/collective_defs.cc b/onnxruntime/core/graph/contrib_ops/collective_defs.cc
index 9e63e0d5e83f6..59adfc523c860 100644
--- a/onnxruntime/core/graph/contrib_ops/collective_defs.cc
+++ b/onnxruntime/core/graph/contrib_ops/collective_defs.cc
@@ -79,6 +79,397 @@ void RegisterCollectiveOps() {
       .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
         propagateShapeAndTypeFromFirstInput(ctx);
       });
+
+  ONNX_CONTRIB_OPERATOR_SCHEMA(DistributedMatMul)
+      .SetDomain(kMSDomain)
+      .SinceVersion(1)
+      .Attr("input_device_mesh_elements",
+            "device_mesh_elements[i] defines the device mesh's value for the i-th input. "
+            "E.g., device_mesh_elements=[\"[0, 1]\", \"[0, 1]\"] means the 1st and the 2nd "
+            " inputs are stored on the 0-th and the 1st devices, respectively.",
+            AttributeProto::STRINGS)
+      .Attr("input_device_mesh_shapes",
+            "device_mesh_shape[i] defines the device mesh's shape for the i-th input.",
+            AttributeProto::STRINGS)
+      .Attr("input_shard_specs",
+            "The sharding spec of inputs. "
+            "E.g., if input_shard_specs[i] is \"RRR\", the i-th input is a unsharded 3-D tensor.",
+            AttributeProto::STRINGS)
+      .Attr("output_device_mesh_elements",
+            "Similar to input_device_mesh_elments but for outputs.",
+            AttributeProto::STRINGS)
+      .Attr("output_device_mesh_shapes",
+            "Similar to input_device_mesh_shapes but for outputs.",
+            AttributeProto::STRINGS)
+      .Attr("output_shard_specs",
+            "Similar to input_shard_specs but for outputs.",
+            AttributeProto::STRINGS)
+      .Input(0, "A", "N-dimensional matrix A", "T", OpSchema::Single, true, 1, OpSchema::Differentiable)
+      .Input(1, "B", "N-dimensional matrix B", "T", OpSchema::Single, true, 1, OpSchema::Differentiable)
+      .Output(0, "Y", "Matrix multiply results from A * B", "T", OpSchema::Single, true, 1, OpSchema::Differentiable)
+      .TypeConstraint(
+          "T",
+          {
+              "tensor(float16)",
+              "tensor(float)",
+          },
+          "Constrain input and output types to float tensors.");
+
+  ONNX_CONTRIB_OPERATOR_SCHEMA(DistributedSlice)
+      .SetDomain(kMSDomain)
+      .SinceVersion(1)
+      .Attr("input_device_mesh_elements",
+            "device_mesh_elements[i] defines the device mesh's value for the i-th input. "
+            "E.g., device_mesh_elements=[\"[0, 1]\", \"[0, 1]\"] means the 1st and the 2nd "
+            " inputs are stored on the 0-th and the 1st devices, respectively.",
+            AttributeProto::STRINGS)
+      .Attr("input_device_mesh_shapes",
+            "device_mesh_shape[i] defines the device mesh's shape for the i-th input.",
+            AttributeProto::STRINGS)
+      .Attr("input_shard_specs",
+            "The sharding spec of inputs. "
+            "E.g., if input_shard_specs[i] is \"RRR\", the i-th input is a unsharded 3-D tensor.",
+            AttributeProto::STRINGS)
+      .Attr("output_device_mesh_elements",
+            "Similar to input_device_mesh_elments but for outputs.",
+            AttributeProto::STRINGS)
+      .Attr("output_device_mesh_shapes",
+            "Similar to input_device_mesh_shapes but for outputs.",
+            AttributeProto::STRINGS)
+      .Attr("output_shard_specs",
+            "Similar to input_shard_specs but for outputs.",
+            AttributeProto::STRINGS)
+      .Input(
+          0,
+          "data",
+          "Tensor of data to extract slices from.",
+          "T",
+          OpSchema::Single,
+          true,
+          1,
+          OpSchema::Differentiable)
+      .Input(
+          1,
+          "starts",
+          "1-D tensor of starting indices of corresponding axis in `axes`",
+          "Tind",
+          OpSchema::Single,
+          true,
+          1,
+          OpSchema::NonDifferentiable)
+      .Input(
+          2,
+          "ends",
+          "1-D tensor of ending indices (exclusive) of corresponding axis in `axes`",
+          "Tind",
+          OpSchema::Single,
+          true,
+          1,
+          OpSchema::NonDifferentiable)
+      .Input(
+          3,
+          "axes",
+          "1-D tensor of axes that `starts` and `ends` apply to. Negative value means counting dimensions "
+          "from the back. Accepted range is [-r, r-1] where r = rank(data). Behavior is undefined if an "
+          "axis is repeated.",
+          "Tind",
+          OpSchema::Optional,
+          true,
+          1,
+          OpSchema::NonDifferentiable)
+      .Input(
+          4,
+          "steps",
+          "1-D tensor of slice step of corresponding axis in `axes`. "
+          "Negative value means slicing backward. 'steps' cannot be 0. "
+          "Defaults to 1s.",
+          "Tind",
+          OpSchema::Optional,
+          true,
+          1,
+          OpSchema::NonDifferentiable)
+      .Output(0, "output", "Sliced data tensor.", "T", OpSchema::Single, true, 1, OpSchema::Differentiable)
+      .TypeConstraint("T", OpSchema::all_tensor_types_ir4(), "Constrain input and output types to all tensor types.")
+      .TypeConstraint("Tind", {"tensor(int32)", "tensor(int64)"}, "Constrain indices to integer types");
+
+  ONNX_CONTRIB_OPERATOR_SCHEMA(DistributedReshape)
+      .SetDomain(kMSDomain)
+      .SinceVersion(1)
+      .Attr("input_device_mesh_elements",
+            "device_mesh_elements[i] defines the device mesh's value for the i-th input. "
+            "E.g., device_mesh_elements=[\"[0, 1]\", \"[0, 1]\"] means the 1st and the 2nd "
+            " inputs are stored on the 0-th and the 1st devices, respectively.",
+            AttributeProto::STRINGS)
+      .Attr("input_device_mesh_shapes",
+            "device_mesh_shape[i] defines the device mesh's shape for the i-th input.",
+            AttributeProto::STRINGS)
+      .Attr("input_shard_specs",
+            "The sharding spec of inputs. "
+            "E.g., if input_shard_specs[i] is \"RRR\", the i-th input is a unsharded 3-D tensor.",
+            AttributeProto::STRINGS)
+      .Attr("output_device_mesh_elements",
+            "Similar to input_device_mesh_elments but for outputs.",
+            AttributeProto::STRINGS)
+      .Attr("output_device_mesh_shapes",
+            "Similar to input_device_mesh_shapes but for outputs.",
+            AttributeProto::STRINGS)
+      .Attr("output_shard_specs",
+            "Similar to input_shard_specs but for outputs.",
+            AttributeProto::STRINGS)
+      .Attr(
+          "allowzero",
+          "(Optional) By default, when any value in the 'shape' input is equal to zero "
+          "the corresponding dimension value is copied from the input tensor dynamically. "
+          "allowzero=1 indicates that if any value in the 'shape' input is set to zero, "
+          "the zero value is honored, similar to NumPy.",
+          AttributeProto::INT,
+          static_cast<int64_t>(0))
+      .Input(0, "data", "An input tensor.", "T", OpSchema::Single, true, 1, OpSchema::Differentiable)
+      .Input(
+          1,
+          "shape",
+          "Specified shape for output.",
+          "tensor(int64)",
+          OpSchema::Single,
+          true,
+          1,
+          OpSchema::NonDifferentiable)
+      .Output(0, "reshaped", "Reshaped data.", "T", OpSchema::Single, true, 1, OpSchema::Differentiable)
+      .TypeConstraint("T", OpSchema::all_tensor_types_ir4(), "Constrain input and output types to all tensor types.");
+
+  ONNX_CONTRIB_OPERATOR_SCHEMA(DistributedExpand)
+      .SetDomain(kMSDomain)
+      .SinceVersion(1)
+      .Attr("input_device_mesh_elements",
+            "device_mesh_elements[i] defines the device mesh's value for the i-th input. "
+            "E.g., device_mesh_elements=[\"[0, 1]\", \"[0, 1]\"] means the 1st and the 2nd "
+            " inputs are stored on the 0-th and the 1st devices, respectively.",
+            AttributeProto::STRINGS)
+      .Attr("input_device_mesh_shapes",
+            "device_mesh_shape[i] defines the device mesh's shape for the i-th input.",
+            AttributeProto::STRINGS)
+      .Attr("input_shard_specs",
+            "The sharding spec of inputs. "
+            "E.g., if input_shard_specs[i] is \"RRR\", the i-th input is a unsharded 3-D tensor.",
+            AttributeProto::STRINGS)
+      .Attr("output_device_mesh_elements",
+            "Similar to input_device_mesh_elments but for outputs.",
+            AttributeProto::STRINGS)
+      .Attr("output_device_mesh_shapes",
+            "Similar to input_device_mesh_shapes but for outputs.",
+            AttributeProto::STRINGS)
+      .Attr("output_shard_specs",
+            "Similar to input_shard_specs but for outputs.",
+            AttributeProto::STRINGS)
+      .Input(0, "input", "Input tensor", "T", OpSchema::Single, true, 1, OpSchema::Differentiable)
+      .Input(
+          1,
+          "shape",
+          "A 1-D tensor indicates the shape you want to expand to, following the broadcast rule",
+          "tensor(int64)",
+          OpSchema::Single,
+          true,
+          1,
+          OpSchema::NonDifferentiable)
+      .Output(0, "output", "Output tensor", "T", OpSchema::Single, true, 1, OpSchema::Differentiable)
+      .TypeConstraint("T", OpSchema::all_tensor_types_ir4(), "Constrain input and output types to all tensors.");
+
+  ONNX_CONTRIB_OPERATOR_SCHEMA(DistributedReduceSum)
+      .SetDomain(kMSDomain)
+      .SinceVersion(1)
+      .Attr("input_device_mesh_elements",
+            "device_mesh_elements[i] defines the device mesh's value for the i-th input. "
+            "E.g., device_mesh_elements=[\"[0, 1]\", \"[0, 1]\"] means the 1st and the 2nd "
+            " inputs are stored on the 0-th and the 1st devices, respectively.",
+            AttributeProto::STRINGS)
+      .Attr("input_device_mesh_shapes",
+            "device_mesh_shape[i] defines the device mesh's shape for the i-th input.",
+            AttributeProto::STRINGS)
+      .Attr("input_shard_specs",
+            "The sharding spec of inputs. "
+            "E.g., if input_shard_specs[i] is \"RRR\", the i-th input is a unsharded 3-D tensor.",
+            AttributeProto::STRINGS)
+      .Attr("output_device_mesh_elements",
+            "Similar to input_device_mesh_elments but for outputs.",
+            AttributeProto::STRINGS)
+      .Attr("output_device_mesh_shapes",
+            "Similar to input_device_mesh_shapes but for outputs.",
+            AttributeProto::STRINGS)
+      .Attr("output_shard_specs",
+            "Similar to input_shard_specs but for outputs.",
+            AttributeProto::STRINGS)
+      .Attr("keepdims",
+            "Keep the reduced dimension or not, default 1 mean keep reduced dimension.",
+            AttributeProto::INT,
+            static_cast<int64_t>(1))
+      .Input(0, "input", "Input tensor", "T", OpSchema::Single, true, 1, OpSchema::Differentiable)
+      .Input(
+          1,
+          "shape",
+          "A 1-D tensor indicates the shape you want to expand to, following the broadcast rule",
+          "tensor(int64)",
+          OpSchema::Single,
+          true,
+          1,
+          OpSchema::NonDifferentiable)
+      .Output(0, "output", "Output tensor", "T", OpSchema::Single, true, 1, OpSchema::Differentiable)
+      .TypeConstraint("T", OpSchema::all_tensor_types_ir4(), "Constrain input and output types to all tensors.");
+
+  ONNX_CONTRIB_OPERATOR_SCHEMA(DistributedReduceMax)
+      .SetDomain(kMSDomain)
+      .SinceVersion(1)
+      .Attr("input_device_mesh_elements",
+            "device_mesh_elements[i] defines the device mesh's value for the i-th input. "
+            "E.g., device_mesh_elements=[\"[0, 1]\", \"[0, 1]\"] means the 1st and the 2nd "
+            " inputs are stored on the 0-th and the 1st devices, respectively.",
+            AttributeProto::STRINGS)
+      .Attr("input_device_mesh_shapes",
+            "device_mesh_shape[i] defines the device mesh's shape for the i-th input.",
+            AttributeProto::STRINGS)
+      .Attr("input_shard_specs",
+            "The sharding spec of inputs. "
+            "E.g., if input_shard_specs[i] is \"RRR\", the i-th input is a unsharded 3-D tensor.",
+            AttributeProto::STRINGS)
+      .Attr("output_device_mesh_elements",
+            "Similar to input_device_mesh_elments but for outputs.",
+            AttributeProto::STRINGS)
+      .Attr("output_device_mesh_shapes",
+            "Similar to input_device_mesh_shapes but for outputs.",
+            AttributeProto::STRINGS)
+      .Attr("output_shard_specs",
+            "Similar to input_shard_specs but for outputs.",
+            AttributeProto::STRINGS)
+      .Attr("keepdims",
+            "Keep the reduced dimension or not, default 1 mean keep reduced dimension.",
+            AttributeProto::INT,
+            static_cast<int64_t>(1))
+      .Input(0, "input", "Input tensor", "T", OpSchema::Single, true, 1, OpSchema::Differentiable)
+      .Input(
+          1,
+          "shape",
+          "A 1-D tensor indicates the shape you want to expand to, following the broadcast rule",
+          "tensor(int64)",
+          OpSchema::Single,
+          true,
+          1,
+          OpSchema::NonDifferentiable)
+      .Output(0, "output", "Output tensor", "T", OpSchema::Single, true, 1, OpSchema::Differentiable)
+      .TypeConstraint("T", OpSchema::all_tensor_types_ir4(), "Constrain input and output types to all tensors.");
+
+  ONNX_CONTRIB_OPERATOR_SCHEMA(DistributedReduceMean)
+      .SetDomain(kMSDomain)
+      .SinceVersion(1)
+      .Attr("input_device_mesh_elements",
+            "device_mesh_elements[i] defines the device mesh's value for the i-th input. "
+            "E.g., device_mesh_elements=[\"[0, 1]\", \"[0, 1]\"] means the 1st and the 2nd "
+            " inputs are stored on the 0-th and the 1st devices, respectively.",
+            AttributeProto::STRINGS)
+      .Attr("input_device_mesh_shapes",
+            "device_mesh_shape[i] defines the device mesh's shape for the i-th input.",
+            AttributeProto::STRINGS)
+      .Attr("input_shard_specs",
+            "The sharding spec of inputs. "
+            "E.g., if input_shard_specs[i] is \"RRR\", the i-th input is a unsharded 3-D tensor.",
+            AttributeProto::STRINGS)
+      .Attr("output_device_mesh_elements",
+            "Similar to input_device_mesh_elments but for outputs.",
+            AttributeProto::STRINGS)
+      .Attr("output_device_mesh_shapes",
+            "Similar to input_device_mesh_shapes but for outputs.",
+            AttributeProto::STRINGS)
+      .Attr("output_shard_specs",
+            "Similar to input_shard_specs but for outputs.",
+            AttributeProto::STRINGS)
+      .Attr("keepdims",
+            "Keep the reduced dimension or not, default 1 mean keep reduced dimension.",
+            AttributeProto::INT,
+            static_cast<int64_t>(1))
+      .Input(0, "input", "Input tensor", "T", OpSchema::Single, true, 1, OpSchema::Differentiable)
+      .Input(
+          1,
+          "shape",
+          "A 1-D tensor indicates the shape you want to expand to, following the broadcast rule",
+          "tensor(int64)",
+          OpSchema::Single,
+          true,
+          1,
+          OpSchema::NonDifferentiable)
+      .Output(0, "output", "Output tensor", "T", OpSchema::Single, true, 1, OpSchema::Differentiable)
+      .TypeConstraint("T", OpSchema::all_tensor_types_ir4(), "Constrain input and output types to all tensors.");
+
+  ONNX_CONTRIB_OPERATOR_SCHEMA(DistributedUnsqueeze)
+      .SetDomain(kMSDomain)
+      .SinceVersion(1)
+      .Attr("input_device_mesh_elements",
+            "device_mesh_elements[i] defines the device mesh's value for the i-th input. "
+            "E.g., device_mesh_elements=[\"[0, 1]\", \"[0, 1]\"] means the 1st and the 2nd "
+            " inputs are stored on the 0-th and the 1st devices, respectively.",
+            AttributeProto::STRINGS)
+      .Attr("input_device_mesh_shapes",
+            "device_mesh_shape[i] defines the device mesh's shape for the i-th input.",
+            AttributeProto::STRINGS)
+      .Attr("input_shard_specs",
+            "The sharding spec of inputs. "
+            "E.g., if input_shard_specs[i] is \"RRR\", the i-th input is a unsharded 3-D tensor.",
+            AttributeProto::STRINGS)
+      .Attr("output_device_mesh_elements",
+            "Similar to input_device_mesh_elments but for outputs.",
+            AttributeProto::STRINGS)
+      .Attr("output_device_mesh_shapes",
+            "Similar to input_device_mesh_shapes but for outputs.",
+            AttributeProto::STRINGS)
+      .Attr("output_shard_specs",
+            "Similar to input_shard_specs but for outputs.",
+            AttributeProto::STRINGS)
+      .Input(0, "input", "Input tensor", "T", OpSchema::Single, true, 1, OpSchema::Differentiable)
+      .Input(
+          1,
+          "axes",
+          "A 1-D tensor indicates the axes to add.",
+          "tensor(int64)",
+          OpSchema::Single,
+          true,
+          1,
+          OpSchema::NonDifferentiable)
+      .Output(0, "output", "Output tensor", "T", OpSchema::Single, true, 1, OpSchema::Differentiable)
+      .TypeConstraint("T", OpSchema::all_tensor_types_ir4(), "Constrain input and output types to all tensors.");
+
+  ONNX_CONTRIB_OPERATOR_SCHEMA(DistributedSqueeze)
+      .SetDomain(kMSDomain)
+      .SinceVersion(1)
+      .Attr("input_device_mesh_elements",
+            "device_mesh_elements[i] defines the device mesh's value for the i-th input. "
+            "E.g., device_mesh_elements=[\"[0, 1]\", \"[0, 1]\"] means the 1st and the 2nd "
+            " inputs are stored on the 0-th and the 1st devices, respectively.",
+            AttributeProto::STRINGS)
+      .Attr("input_device_mesh_shapes",
+            "device_mesh_shape[i] defines the device mesh's shape for the i-th input.",
+            AttributeProto::STRINGS)
+      .Attr("input_shard_specs",
+            "The sharding spec of inputs. "
+            "E.g., if input_shard_specs[i] is \"RRR\", the i-th input is a unsharded 3-D tensor.",
+            AttributeProto::STRINGS)
+      .Attr("output_device_mesh_elements",
+            "Similar to input_device_mesh_elments but for outputs.",
+            AttributeProto::STRINGS)
+      .Attr("output_device_mesh_shapes",
+            "Similar to input_device_mesh_shapes but for outputs.",
+            AttributeProto::STRINGS)
+      .Attr("output_shard_specs",
+            "Similar to input_shard_specs but for outputs.",
+            AttributeProto::STRINGS)
+      .Input(0, "input", "Input tensor", "T", OpSchema::Single, true, 1, OpSchema::Differentiable)
+      .Input(
+          1,
+          "axes",
+          "A 1-D tensor indicates the axes to add.",
+          "tensor(int64)",
+          OpSchema::Single,
+          true,
+          1,
+          OpSchema::NonDifferentiable)
+      .Output(0, "output", "Output tensor", "T", OpSchema::Single, true, 1, OpSchema::Differentiable)
+      .TypeConstraint("T", OpSchema::all_tensor_types_ir4(), "Constrain input and output types to all tensors.");
 }
 
 }  // namespace contrib
diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc
index a79203a94a3a7..4c0d78f0ee297 100644
--- a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc
+++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc
@@ -415,6 +415,7 @@ void BeamSearchShapeInference(ONNX_NAMESPACE::InferenceContext& ctx) {
   // output 0 (sequences) shape: (batch_size, num_return_sequences, max_length)
   // output 1 (sequences_scores) shape: (batch_size, num_return_sequences)
   // output 2 (scores) shape: (max_length - sequence_length, batch_size, num_beams, vocab_size)
+  // output 3 (cross_attention): shape: (batch_size, num_return_sequences, Layers, Heads, max_length, Frames)
   if (!hasInputShape(ctx, 0)) {
     return;
   }
@@ -1060,6 +1061,78 @@ ONNX_MS_OPERATOR_SET_SCHEMA(GridSample, 1,
                                   updateOutputShape(ctx, 0, {N, C, H_out, W_out});
                                 }));
 
+ONNX_MS_OPERATOR_SET_SCHEMA(
+    UnfoldTensor, 1,
+    OpSchema()
+        .SetDoc("Returns a tensor which contains all slices of size size from input tensor in the dimension dim. "
+                "Step between two slices is given by step. "
+                "If sizedim is the size of dimension dim for input tensor, the size of dimension dim in "
+                "the returned tensor will be (sizedim - size) / step + 1. "
+                "An additional dimension of size size is appended in the returned tensor.")
+        .Attr("dim", "specify the dimension to unfold", AttributeProto::INT, static_cast<int64_t>(-1))
+        .Attr("size", "specify the size", AttributeProto::INT)
+        .Attr("step", "specify the step.", AttributeProto::INT, static_cast<int64_t>(1))
+        .Input(0, "input", "input tensor", "T")
+        .Output(0, "output", "Output tensor.", "T")
+        .TypeConstraint("T", OpSchema::all_tensor_types_ir4(), "Allow inputs and outputs to be any kind of tensor.")
+        .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
+          propagateElemTypeFromInputToOutput(ctx, 0, 0);
+
+          if (!hasInputShape(ctx, 0)) return;
+          auto& input_shape = getInputShape(ctx, 0);
+          const int rank = input_shape.dim_size();
+          int64_t dim = getAttribute(ctx, "dim", -1);
+          dim = HandleNegativeAxis(dim, rank);
+          if (!input_shape.dim(static_cast<int>(dim)).has_dim_value()) {
+            return;
+          }
+          int64_t dim_size = input_shape.dim(static_cast<int>(dim)).dim_value();
+
+          const int64_t step = getAttribute(ctx, "step", -1);
+          if (step <= 0) {
+            fail_shape_inference("size attribute in UnfoldTensor must greater than 0.")
+          }
+          int64_t size = -1;
+          auto size_proto = ctx.getAttribute("size");
+          if (!(size_proto)) {
+            fail_shape_inference("size attribute in UnfoldTensor not specified!")
+          }
+          size = size_proto->i();
+          if (size > dim_size || size <= 0) {
+            fail_shape_inference("size attribute in UnfoldTensor not positive and less than the dim size!")
+          }
+
+          ONNX_NAMESPACE::TensorShapeProto output_shape;
+          for (int d = 0; d < rank; d++) {
+            if (d == dim) {
+              output_shape.add_dim()->set_dim_value((dim_size - size) / step + 1);
+            } else {
+              *output_shape.add_dim() = input_shape.dim(d);
+            }
+          }
+          output_shape.add_dim()->set_dim_value(size);
+          updateOutputShape(ctx, 0, output_shape);
+        }));
+
+ONNX_MS_OPERATOR_SET_SCHEMA(
+    DynamicTimeWarping, 1,
+    OpSchema()
+        .SetDoc("Input is cost matrix where each value in input[r][c] is the cost for pass the point (r, c). From current point"
+                "(r, c),  points (r+1, c), (r+1, c+1) or (r, c+1) could be arrived in next move. Given such cost matrix, return "
+                "dynamic time wrapping of shape [2, x], where the path made by all points (output[0][t], output[1][t])"
+                "have the lowest cost among all paths from (0, 0) to (M-1, N-1).")
+        .Input(0, "input", "Input cost tensor, it must be 2D tensor of shape M x N, or 1 x M x N", "F")
+        .Output(0, "output", "Output tensor. shape is [2, x], where max(M, N) <= x < M + N", "I")
+        .TypeConstraint("F", {"tensor(float)"}, "Constrain to float tensors.")
+        .TypeConstraint("I", {"tensor(int32)"}, "Constrain to integer types.")
+        .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
+          updateOutputElemType(ctx, 0, ONNX_NAMESPACE::TensorProto::INT32);
+          ONNX_NAMESPACE::TensorShapeProto resultShape;
+          resultShape.add_dim()->set_dim_value(2);
+          resultShape.add_dim();
+          updateOutputShape(ctx, 0, resultShape);
+        }));
+
 ONNX_MS_OPERATOR_SET_SCHEMA(BeamSearch, 1,
                             OpSchema()
                                 .SetDoc("Beam Search for text generation. Supports GPT-2 decoder.")
@@ -1110,6 +1183,122 @@ ONNX_MS_OPERATOR_SET_SCHEMA(BeamSearch, 1,
                                   BeamSearchShapeInference(ctx);
                                 }));
 
+ONNX_MS_OPERATOR_SET_SCHEMA(WhisperBeamSearch, 1,
+                            OpSchema()
+                                .SetDoc("Beam Search for whisper model, especiall with cross_qk features etc.")
+                                .Attr("eos_token_id", "The id of the end-of-sequence token", AttributeProto::INT)
+                                .Attr("pad_token_id", "The id of the padding token", AttributeProto::INT)
+                                .Attr("decoder_start_token_id", "The id of the token that indicates decoding starts.", AttributeProto::INT, static_cast<int64_t>(-1))
+                                .Attr("no_repeat_ngram_size", "no repeat ngrams size", AttributeProto::INT, static_cast<int64_t>(0))
+                                .Attr("early_stopping", "early stop or not", AttributeProto::INT, static_cast<int64_t>(0))
+                                .Attr("model_type", "Must be 2 for whisper", AttributeProto::INT, static_cast<int64_t>(2))
+                                .Attr("encoder", "The subgraph for initialization of encoder and decoder. It will be called once before decoder subgraph.", AttributeProto::GRAPH, OPTIONAL_VALUE)
+                                .Attr("init_decoder",
+                                      "The subgraph for the first decoding run. It will be called once before `decoder` subgraph. "
+                                      "This is relevant only for the GPT2 model. If this attribute is missing, the `decoder` subgraph will be used for all decoding runs",
+                                      AttributeProto::GRAPH, OPTIONAL_VALUE)
+                                .Attr("decoder", "Decoder subgraph to execute in a loop.", AttributeProto::GRAPH)
+                                .Attr("vocab_size",
+                                      "Size of the vocabulary. "
+                                      "If not provided, it will be inferred from the decoder subgraph's output shape",
+                                      AttributeProto::INT, static_cast<int64_t>(-1))
+                                .Attr("decoder_output_cross_qk", "If nozero, decoder subgraph contains output Q*K from cross attentions. Default 0.", AttributeProto::INT, OPTIONAL_VALUE)
+                                .Attr("no_speech_token",
+                                      "The token in whisper model that marks all sequence empty. With this model, whisper could output no_speech_prob after. Default -1.",
+                                      AttributeProto::INT, OPTIONAL_VALUE)
+                                .Input(0, "input_ids", "The sequence used as a prompt for the generation in the encoder subgraph. Shape is (batch_size, sequence_length)", "F")
+                                .Input(1, "max_length", "The maximum length of the sequence to be generated. Shape is (1)", "I")
+                                .Input(2, "min_length", "The minimum length below which the score of eos_token_id is set to -Inf. Shape is (1)", "I", OpSchema::Optional)
+                                .Input(3, "num_beams", "Number of beams for beam search. 1 means no beam search. Shape is (1)", "I")
+                                .Input(4, "num_return_sequences", "The number of returned sequences in the batch. Shape is (1)", "I")
+                                .Input(5, "length_penalty",
+                                       "Exponential penalty to the length. Default value 1.0 means no penalty."
+                                       "Value > 1.0 encourages longer sequences, while values < 1.0 produces shorter sequences."
+                                       "Shape is (1,)",
+                                       "T", OpSchema::Optional)
+                                .Input(6, "repetition_penalty", "The parameter for repetition penalty. Default value 1.0 means no penalty. Accepts value > 0.0. Shape is (1)", "T", OpSchema::Optional)
+                                .Input(7, "vocab_mask", "Mask of vocabulary. Words that masked with 0 are not allowed to be generated, and 1 is allowed. Shape is (vacab_size)", "M", OpSchema::Optional)
+                                .Input(8, "prefix_vocab_mask", "Mask of vocabulary for first step. Words that masked with 0 are not allowed to be generated, and 1 is allowed. Shape is (batch_size, vocab_size)", "M", OpSchema::Optional)
+                                .Input(9, "attention_mask", "Custom attention mask. Shape is (batch_size, sequence_length)", "I", OpSchema::Optional)
+                                .Input(10, "decoder_input_ids", "The forced input id sequence for the decoder subgraph. Shape is (batch_size, initial_sequence_length)", "I", OpSchema::Optional)
+                                .Input(11, "logits_processor", "Specific logits processor for different types of beamsearch models. Default value 0 means no specific logit processor. Accepts value >= 0. Shape is (1)", "I", OpSchema::Optional)
+                                .Input(12, "cross_qk_layer_head",
+                                       "Only keep this list of (layer, head) of QK in the final cross_qk output when use_cross_qk is set. Default collect all"
+                                       "its shape is (number of (layer, head) to keep, 2), i.e., [[layer_id1, head_id1], [layer_id2, head_id2]......]",
+                                       "I", OpSchema::Optional)
+                                .Input(13, "extra_decoding_ids",
+                                       "Part of the decoder_input_ids that we need cross qk for it. it is of shape  (batch_size, extra_decoding_ids_len)."
+                                       "In such case, we should remove this from the tail of the decoder_input_ids, and put it here. ids < 0 in it (for multiple batch) "
+                                       "are treated as stop of the extra_decoding_ids for corresponding batch.",
+                                       "I", OpSchema::Optional)
+                                .Output(0, "sequences", "Word IDs of generated sequences. Shape is (batch_size, num_return_sequences, max_sequence_length)", "I")
+                                .Output(1, "sequences_scores", "Final beam score of the generated sequences. Shape is (batch_size, num_return_sequences)", "T", OpSchema::Optional)
+                                .Output(2, "scores",
+                                        "Processed beam scores for each vocabulary token at each generation step."
+                                        "Beam scores consisting of log softmax scores for each vocabulary token and sum of log softmax of previously generated tokens in this beam."
+                                        "Shape is (max_length - sequence_length, batch_size, num_beams, vocab_size)",
+                                        "T", OpSchema::Optional)
+                                .Output(3, "cross_qk",
+                                        "Output the accumulated stacked Q*K in cross attentions. Let H = number of Head of cross attention, "
+                                        "F = the frames or kv-seq-len of the cross attention input, T = real decoded token length, L = number of layers,"
+                                        "B = batch size, R = num_return_sequences. It then should return tensor of shape [B, R, L*H, T, F]."
+                                        "If cross_qk_layer_head is given, shape is [B, R, cross_qk_layer_head.shape[0], T, F]",
+                                        "V", OpSchema::Optional)
+                                .Output(4, "non_speech_probs",
+                                        "For whisper model, output the probabilities from logits after encoder and context decoding for the no_speech_token."
+                                        "Currently we treat the last token's logits is what we need, in future extra graph logic may be add to the encoder/context-decoder subgraph."
+                                        "The prob is save before logits may be updated by extra-decoding-ids. The shape of non_speech_probs is [B]",
+                                        "T", OpSchema::Optional)
+                                .TypeConstraint("T", {"tensor(float)", "tensor(float16)"}, "Constrain to float tensors.")
+                                .TypeConstraint("F", {"tensor(float)", "tensor(int32)", "tensor(float16)"}, "Constrain input type to float or int tensors.")
+                                .TypeConstraint("I", {"tensor(int32)"}, "Constrain to integer types")
+                                .TypeConstraint("M", {"tensor(int32)"}, "Constrain mask to integer types")
+                                .TypeConstraint("V", {"tensor(float)"}, "Constrain cross_qk to float32 tensors.")
+                                .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
+                                  BeamSearchShapeInference(ctx);
+                                  if (ctx.getNumOutputs() > 3) {
+                                    ONNX_NAMESPACE::updateOutputElemType(ctx, 3, ONNX_NAMESPACE::TensorProto::FLOAT);
+                                  }
+                                  if (!hasInputShape(ctx, 0)) {
+                                    return;
+                                  }
+                                  auto& input_ids_shape = getInputShape(ctx, 0);
+                                  auto& input_ids_dims = input_ids_shape.dim();
+                                  int64_t batch_size = input_ids_dims[0].dim_value();
+                                  int64_t sequence_length = input_ids_dims[1].dim_value();
+
+                                  const auto max_length = ctx.getInputData(1);
+                                  const auto num_return_sequences = ctx.getInputData(4);
+                                  if (max_length == nullptr || num_return_sequences == nullptr) {  // not initializer
+                                    return;
+                                  }
+                                  int max_length_value = 0;
+                                  if (!ParseScalar(max_length, max_length_value) || max_length_value <= 0) {
+                                    fail_shape_inference("Failed to parse max_length or it is not positive integer scalar");
+                                  }
+
+                                  int num_return_sequences_value = 0;
+                                  if (!ParseScalar(num_return_sequences, num_return_sequences_value) || num_return_sequences_value <= 0) {
+                                    fail_shape_inference("Failed to parse num_return_sequences or it is not positive integer scalar");
+                                  }
+
+                                  if (ctx.getNumOutputs() > 3) {
+                                    ONNX_NAMESPACE::TensorShapeProto cross_attn_shape;
+                                    cross_attn_shape.add_dim()->set_dim_value(batch_size);
+                                    cross_attn_shape.add_dim()->set_dim_value(num_return_sequences_value);
+                                    cross_attn_shape.add_dim();  // num of layer is unknown, no need to calc it from subgraph here
+                                    cross_attn_shape.add_dim();  // num of head is unknown, no need to calc it from subgraph here
+                                    cross_attn_shape.add_dim()->set_dim_value(max_length_value);
+                                    cross_attn_shape.add_dim()->set_dim_value(sequence_length);
+                                    updateOutputShape(ctx, 3, cross_attn_shape);
+                                  }
+                                  if (ctx.getNumOutputs() > 4) {
+                                    ONNX_NAMESPACE::TensorShapeProto non_speech_probs_shape;
+                                    non_speech_probs_shape.add_dim()->set_dim_value(batch_size);
+                                    updateOutputShape(ctx, 4, non_speech_probs_shape);
+                                  }
+                                }));
+
 ONNX_MS_OPERATOR_SET_SCHEMA(GreedySearch, 1,
                             OpSchema()
                                 .SetDoc("Greedy Search for text generation.")
@@ -1186,6 +1375,27 @@ ONNX_MS_OPERATOR_SET_SCHEMA(Sampling, 1,
                                   GreedySearchShapeInference(ctx);
                                 }));
 
+constexpr const char* MoE_ver1_doc = R"DOC(
+      Mixture of experts. Examples: Switch transformer(https://arxiv.org/pdf/2101.03961.pdf) use top 1,
+      GLaM(https://arxiv.org/abs/2112.06905) activates top 2 FFN, and Vision MOE(https://arxiv.org/pdf/2106.05974.pdf)
+      usually uses top 32 experts.
+      )DOC";
+
+ONNX_MS_OPERATOR_SET_SCHEMA(MoE, 1,
+                            OpSchema()
+                                .SetDoc(MoE_ver1_doc)
+                                .Attr("activation_type", "Activation function to use. Choose from relu, gelu, silu and identity. Default is relu", AttributeProto::STRING, std::string("relu"))
+                                .Attr("k", "Number of top experts to select from expert pool", AttributeProto::INT, static_cast<int64_t>(1))
+                                .Input(0, "input", "2D input tensor with shape (num_rows, hidden_size) or 3D input tensor with shape (batch_size, sequence_length, hidden_size)", "T")
+                                .Input(1, "router_probs", "2D input tensor with shape (num_rows, num_experts)", "T")
+                                .Input(2, "fc1_experts_weights", "3D input tensor with shape (num_experts, hidden_size, inter_size)", "T")
+                                .Input(3, "fc2_experts_weights", "3D input tensor with shape (num_experts, inter_size, hidden_size)", "T")
+                                .Input(4, "fc1_experts_bias", "2D optional input tensor with shape (num_experts, inter_size)", "T", OpSchema::Optional)
+                                .Input(5, "fc2_experts_bias", "2D optional input tensor with shape (num_experts, hidden_size)", "T", OpSchema::Optional)
+                                .Output(0, "output", "2D input tensor with shape (num_rows, hidden_size) or 3D input tensor with shape (batch_size, sequence_length, hidden_size)", "T")
+                                .TypeConstraint("T", {"tensor(float)", "tensor(float16)"}, "Constrain input and output types to float or float16 tensors.")
+                                .TypeAndShapeInferenceFunction(ONNX_NAMESPACE::propagateShapeAndTypeFromFirstInput));
+
 ONNX_MS_OPERATOR_SET_SCHEMA(SampleOp, 1,
                             OpSchema()
                                 .Input(0, "X", "input", "T")
@@ -2384,6 +2594,154 @@ ONNX_MS_OPERATOR_SET_SCHEMA(CropAndResize, 1,
         a fixed size = [crop_height, crop_width]. The result is a 4-D tensor [num_boxes, crop_height, crop_width, depth].
         The resizing is corner aligned.)DOC"));
 
+#if !defined(DISABLE_FLOAT8_TYPES)
+#define GEMM_FLOAT8_TYPES \
+  { "tensor(float8e4m3fn)", "tensor(float8e5m2)", "tensor(float16)", "tensor(bfloat16)", "tensor(float)" }
+#else
+#define GEMM_FLOAT8_TYPES \
+  { "tensor(float16)", "tensor(bfloat16)", "tensor(float)" }
+#endif
+
+ONNX_MS_OPERATOR_SET_SCHEMA(GemmFloat8, 1,
+                            OpSchema()
+                                .SetDoc(R"DOC(Generic Gemm for float and float 8.)DOC")
+                                .Attr(
+                                    "transA",
+                                    "Whether A should be transposed. Float 8 only supprted transA=0.",
+                                    AttributeProto::INT,
+                                    static_cast<int64_t>(0))
+                                .Attr(
+                                    "transB",
+                                    "Whether B should be transposed. Float 8 only supprted transB=1.",
+                                    AttributeProto::INT,
+                                    static_cast<int64_t>(0))
+                                .Attr(
+                                    "alpha",
+                                    "Scalar multiplier for the product of input tensors A * B.",
+                                    AttributeProto::FLOAT,
+                                    1.0f)
+                                .Attr(
+                                    "beta",
+                                    "Scalar multiplier for the product of input bias C.",
+                                    AttributeProto::FLOAT,
+                                    0.0f)
+                                .Attr(
+                                    "dtype",
+                                    "Output Type. Same definition as attribute 'to' for operator Cast.",
+                                    AttributeProto::INT,
+                                    static_cast<int64_t>(1))
+                                .Attr(
+                                    "activation",
+                                    "Activation function, RELU or GELU or NONE (default).",
+                                    AttributeProto::STRING,
+                                    OPTIONAL_VALUE)
+                                .Input(
+                                    0,
+                                    "A",
+                                    "Input tensor A. "
+                                    "The shape of A should be (M, K) if transA is 0, "
+                                    "or (K, M) if transA is non-zero.",
+                                    "TA")
+                                .Input(
+                                    1,
+                                    "B",
+                                    "Input tensor B. "
+                                    "The shape of B should be (K, N) if transB is 0, "
+                                    "or (N, K) if transB is non-zero.",
+                                    "TB")
+                                .Input(
+                                    2,
+                                    "C",
+                                    "Input tensor C.",
+                                    "TC",
+                                    OpSchema::Optional)
+                                .Input(
+                                    3,
+                                    "scaleA",
+                                    "Scale of tensor A if A is float 8 tensor",
+                                    "TS",
+                                    OpSchema::Optional)
+                                .Input(
+                                    4,
+                                    "scaleB",
+                                    "Scale of tensor B if B is float 8 tensor",
+                                    "TS",
+                                    OpSchema::Optional)
+                                .Input(
+                                    5,
+                                    "scaleY",
+                                    "Scale of the output tensor if A or B is float 8.",
+                                    "TS",
+                                    OpSchema::Optional)
+                                .Output(0, "Y", "Output tensor of shape (M, N).", "TR")
+                                .TypeConstraint(
+                                    "TA",
+                                    GEMM_FLOAT8_TYPES,
+                                    "Constrain type to input A.")
+                                .TypeConstraint(
+                                    "TB",
+                                    GEMM_FLOAT8_TYPES,
+                                    "Constrain type to input B.")
+                                .TypeConstraint(
+                                    "TC",
+                                    {"tensor(float16)", "tensor(bfloat16)", "tensor(float)"},
+                                    "Constrain type to input C.")
+                                .TypeConstraint(
+                                    "TR",
+                                    GEMM_FLOAT8_TYPES,
+                                    "Constrain type to result type.")
+                                .TypeConstraint("TS", {"tensor(float)"},
+                                                "Constrain type for all input scales (scaleA, scaleB, scaleY).")
+                                .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
+                                  propagateElemTypeFromAttributeToOutput(ctx, "dtype", 0, TensorProto::FLOAT);
+                                  if (!hasNInputShapes(ctx, 2)) {
+                                    return;
+                                  }
+                                  auto transAAttr = ctx.getAttribute("transA");
+                                  bool transA = transAAttr ? static_cast<int>(transAAttr->i()) != 0 : false;
+                                  auto transBAttr = ctx.getAttribute("transB");
+                                  bool transB = transBAttr ? static_cast<int>(transBAttr->i()) != 0 : false;
+                                  auto& first_input_shape = getInputShape(ctx, 0);
+                                  auto& second_input_shape = getInputShape(ctx, 1);
+                                  if (first_input_shape.dim_size() != 2) {
+                                    fail_shape_inference("First input does not have rank 2");
+                                  }
+                                  if (second_input_shape.dim_size() != 2) {
+                                    fail_shape_inference("Second input does not have rank 2");
+                                  }
+                                  updateOutputShape(ctx, 0, {first_input_shape.dim(transA ? 1 : 0), second_input_shape.dim(transB ? 0 : 1)});
+                                }));
+
+static void MatmulWithQuantWeightShapeInference(ONNX_NAMESPACE::InferenceContext& ctx,
+                                                int64_t K,
+                                                int64_t N,
+                                                bool transB) {
+  int input_a_idx = 0;
+  if (!hasInputShape(ctx, input_a_idx)) {
+    return;
+  }
+
+  const auto& a_shape = ctx.getInputType(input_a_idx)->tensor_type().shape();
+  if (a_shape.dim_size() == 0) {
+    fail_shape_inference("Input tensors of wrong rank (0).");
+  }
+
+  // TODO: check B shape
+
+  const auto& dim_last = a_shape.dim(a_shape.dim_size() - 1);
+  ONNX_NAMESPACE::TensorShapeProto resultShape;
+  if (dim_last.has_dim_value() && dim_last.dim_value() != (transB ? K : N)) {
+    fail_shape_inference("Incompatible dimensions for matrix multiplication");
+  }
+
+  for (int i = 0; i < a_shape.dim_size() - 1; ++i) {
+    *resultShape.add_dim() = a_shape.dim(i);
+  }
+  resultShape.add_dim()->set_dim_value(transB ? N : K);
+
+  *ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape() = resultShape;
+}
+
 void RegisterContribSchemas() {
   ONNX_CONTRIB_OPERATOR_SCHEMA_ELSEWHERE(AttnLSTM, RegisterAttnLSTMContribOpSchema);
   ONNX_CONTRIB_OPERATOR_SCHEMA_ELSEWHERE(Range, RegisterRangeOpSchema);
@@ -2844,6 +3202,83 @@ void RegisterContribSchemas() {
         propagateElemTypeFromInputToOutput(ctx, 0, 0);
       });
 
+  ONNX_CONTRIB_OPERATOR_SCHEMA(EPContext)
+      .SetDomain(kMSDomain)
+      .SinceVersion(1)
+      .SetDoc("Onnx node container for EP context.")
+      .Attr(
+          "main_context",
+          "Usually each single EPContext associate with a graph partition."
+          "But for some case like QNN, it has single EPContext contains all partitions."
+          "In that case, the node with ep_cache_context should set main_context=1. Other nodes set main_context=0 and skip ep_cache_context."
+          "The path is relative to this Onnx file. Default is 1.",
+          AttributeProto::INT,
+          static_cast<int64_t>(1))
+      .Attr(
+          "ep_cache_context",
+          "payload of the execution provider context if embed_mode=1, or path to the context file if embed_mode=0.",
+          AttributeProto::STRING,
+          OPTIONAL_VALUE)
+      .Attr(
+          "embed_mode",
+          "1: indicate ep_cache_context is the context content. 0: indicate ep_cache_context is the file path to the context content."
+          "The path is relative to this Onnx file. Default is 1.",
+          AttributeProto::INT,
+          static_cast<int64_t>(1))
+      .Attr(
+          "ep_sdk_version",
+          "(Optional) SDK version used to convert the model.",
+          AttributeProto::STRING,
+          OPTIONAL_VALUE)
+      .Attr(
+          "partition_name",
+          "(Optional) partitioned graph name.",
+          AttributeProto::STRING,
+          OPTIONAL_VALUE)
+      .Attr(
+          "source",
+          "(Optional) the source used to generate the engine/context cache file. Ort EP or native SDK tool chain",
+          AttributeProto::STRING,
+          OPTIONAL_VALUE)
+      .Attr("notes", "(Optional) Some notes for the model", AttributeProto::STRING, OPTIONAL_VALUE)
+      .AllowUncheckedAttributes()
+      .Input(
+          0,
+          "inputs",
+          "List of tensors for inputs",
+          "T",
+          OpSchema::Variadic,
+          true,
+          1,
+          OpSchema::NonDifferentiable)
+      .Output(
+          0,
+          "outputs",
+          "One or more outputs, list of tensors for outputs",
+          "T",
+          OpSchema::Variadic,
+          true,
+          1,
+          OpSchema::NonDifferentiable)
+      .TypeConstraint(
+          "T",
+          {"tensor(int8)",
+           "tensor(int16)",
+           "tensor(int32)",
+           "tensor(int64)",
+           "tensor(uint8)",
+           "tensor(uint16)",
+           "tensor(uint32)",
+           "tensor(uint64)",
+           "tensor(float16)",
+           "tensor(float)",
+           "tensor(double)"},
+          "Constrain input and output types.")
+      .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
+        // Type inference
+        propagateElemTypeFromInputToOutput(ctx, 0, 0);
+      });
+
   static const char* BitmaskDropout_ver1_doc = R"DOC(
 BitmaskDropout takes an input floating-point tensor, an optional input ratio (floating-point scalar) and an optional input training_mode (boolean scalar).
 It produces two tensor outputs: output (floating-point tensor) and mask (optional `Tensor<uint32>`). If `training_mode` is true then the output Y will be a random dropout.
@@ -2895,6 +3330,119 @@ This op functions in much the same was as Dropout-11 and Dropout-13 do, execpt t
         }
       });
 
+  static const char* MatMulNBits_ver1_doc = R"DOC(
+MatMulNBits is a MatMul with weight quantized with N bits(e.g., 2, 3, 4, 5, 6, 7).It does Matrix Multiplication like MatMul (https://github.com/onnx/onnx/blob/main/docs/Operators.md#matmul) with differences:
+  1. Input B is a 2D constant Matrix. Its input feature count and output feature count are specified by attribute 'K' and 'N'.
+  2. Input B is quantized with x bits which is specified by attribute 'bits'. It is quantized blockwisely along dimension 0 (e.g. column) with block size specified by attribute block_size.
+     And block_size is not an arbitrary number and must be a power of 2 and not smaller than 16, like 16, 32, 64, 128,..
+  3. Input B's scale and zero point are specified by input scales and zero_points.
+
+Input B is stored as uint8_t with shape: [N][n_blocks_per_col][blob_size] in which:
+- n_blocks_per_col = (K + block_size - 1) / block_size
+- blob_size = block_size / 8 * bits
+
+  For a block blob. It is stored in format:
+  struct Blob {
+    uint8 one_bits[(bits & 0x1) * 1 * block_size / 8];  // highest 1 bit for 3, 5, 7 bits quantization
+    uint8 two_bits[(bits & 0x2) * 2 * block_size / 8];  // high 2 bits for 2, 6, 7 bits quantization
+    uint8 four_bits[(bits & 0x4) * 4 * block_size / 8]; // low 4 bits for 4, 5, 6 bits quantization
+  }
+
+Input scales is stored in same type as original type of B(float32, float16) with shape like: [N * n_blocks_per_col]
+Input zero_points is stored as uint8_t. If bits <= 4, two zero points are stored as one unit8_t. If bits > 4, one zero point is stored with one unit8_t. Thus, its shape is:
+  - [(N * n_blocks_per_col + 1) / 2] if bits <=4
+  - [N * n_blocks_per_col] if bits > 4
+
+)DOC";
+
+  ONNX_CONTRIB_OPERATOR_SCHEMA(MatMulNBits)
+      .SetDomain(kMSDomain)
+      .SinceVersion(1)
+      .SetDoc(MatMulNBits_ver1_doc)
+      .Attr("K", "size of each input feature", AttributeProto::INT)
+      .Attr("N", "size of each output feature", AttributeProto::INT)
+      .Attr("bits", "number of bits used for weight quantization (default 4)", AttributeProto::INT)
+      .Attr("block_size", "number of groupsize used for weight quantization,(default 128). It needs to be a power of 2 and not smaller than 16.", AttributeProto::INT)
+      .Input(0, "A", "The input tensor, not quantized", "T1")
+      .Input(1, "B", "1-dimensional data blob", "T2")
+      .Input(2, "scales", "quantization scale", "T1")
+      .Input(3, "zero_points", "quantization zero points", "T2", OpSchema::Optional)
+      .Output(0, "Y", "tensor. The output tensor has the same rank as the input. ", "T1")
+      .TypeConstraint("T1", {"tensor(float)", "tensor(float16)"}, "Constrain input and output types to float/half_float tensors.")
+      .TypeConstraint("T2", {"tensor(uint8)"}, "Constrain quantized weight types to uint8.")
+      .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
+        // Type inference
+        propagateElemTypeFromInputToOutput(ctx, 0, 0);
+        // Shape inference
+        int64_t in_features = getAttribute(ctx, "K", -1);
+        int64_t out_features = getAttribute(ctx, "N", -1);
+        MatmulWithQuantWeightShapeInference(ctx, in_features, out_features, true);
+      });
+
+  static const char* MatMulBnb4_ver1_doc = R"DOC(
+MatMulBnb4 is a MatMul with weight quantized with 4 bits using either FP4 or NF4 data type (https://arxiv.org/pdf/2305.14314.pdf). It does Matrix Multiplication like MatMul (https://github.com/onnx/onnx/blob/main/docs/Operators.md#matmul) with differences:
+  1. Input B is a 2D constant Matrix. Its input feature count and output feature count are specified by attribute 'K' and 'N'.
+  2. Input B is quantized with 4 bits with quantization data type specified by attribute 'quant_type'. It is transposed, flattened and quantized blockwisely with block size specified by attribute 'block_size'.
+     And block_size is not an arbitrary number and must be a power of 2 and not smaller than 16, like 16, 32, 64, 128,..
+  3. Input B's quantization constants or scales are specified by input 'absmax'.
+
+  Input B is stored as uint8_t with shape: [(N * K + 1) / 2].
+  Input absmax is stored in same type as original type of B(float32, float16) with shape like: [(N * K + block_size - 1) / block_size].
+
+
+  1. (Default value) transB=True (Majorly used for forward pass)
+    Shape of A: [D0, D1, ..., Dn, K]
+    Shape of Dequanted B: [N, K], this is aligned with how PyTorch defined the linear weight, .e.g [out_features, in_features].
+
+    The computation math:
+      dequant_B = dequant(B, absmax, quant_type, block_size)
+      transposed_dequant_B = dequant_B^T
+      output = A @ transposed_dequant_B
+
+    Shape of output: [D0, D1, ..., Dn, N]
+
+  2. transB=False (Majorly used for backward pass)
+    Shape of A: [D0, D1, ..., Dn, N]
+    Shape of Dequanted B: [N, K], this is aligned with how PyTorch defined the linear weight, .e.g [out_features, in_features].
+
+    The computation math:
+      dequant_B = dequant(B, absmax, quant_type, block_size)
+      output = A @ dequant_B
+
+    Shape of output: [D0, D1, ..., Dn, K]
+
+)DOC";
+
+  ONNX_CONTRIB_OPERATOR_SCHEMA(MatMulBnb4)
+      .SetDomain(kMSDomain)
+      .SinceVersion(1)
+      .SetDoc(MatMulBnb4_ver1_doc)
+      .Attr("K", "size of each input feature", AttributeProto::INT)
+      .Attr("N", "size of each output feature", AttributeProto::INT)
+      .Attr("block_size", "number of groupsize used for weight quantization. It needs to be a power of 2 and not smaller than 16.", AttributeProto::INT)
+      .Attr("quant_type", "quantization data type. 0 for FP4, 1 for NF4.", AttributeProto::INT)
+      .Attr("training_mode",
+            "Indicate if the ops run in training_mode, by default, False.",
+            AttributeProto::INT,
+            static_cast<int64_t>(0))
+      .Attr("transB", "Whether B should be transposed on the last two dimensions before doing multiplication. Default to be 1.",
+            AttributeProto::INT, static_cast<int64_t>(1))
+      .Input(0, "A", "The input tensor, not quantized", "T1")
+      .Input(1, "B", "1-dimensional quantized data for weight", "T2")
+      .Input(2, "absmax", "quantization constants", "T1")
+      .Output(0, "Y", "tensor. The output tensor has the same rank as the input. ", "T1")
+      .TypeConstraint("T1", {"tensor(float)", "tensor(float16)", "tensor(bfloat16)"}, "Constrain input and output types to float/half_float/brain_float tensors.")
+      .TypeConstraint("T2", {"tensor(uint8)"}, "Constrain quantized weight types to uint8.")
+      .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
+        // Type inference
+        propagateElemTypeFromInputToOutput(ctx, 0, 0);
+        // Shape inference
+        int64_t in_features = getAttribute(ctx, "K", -1);
+        int64_t out_features = getAttribute(ctx, "N", -1);
+        bool transB = getAttribute(ctx, "transB", 1) != 0;
+        MatmulWithQuantWeightShapeInference(ctx, in_features, out_features, transB);
+      });
+
 #ifdef ENABLE_ATEN
   ONNX_CONTRIB_OPERATOR_SCHEMA(ATen)
       .SetDomain(kPytorchAtenDomain)
@@ -3003,7 +3551,7 @@ Having this op allows runtime to do operator re-ordering to reduce compute FLOPs
   }
 #endif
 
-#ifdef USE_MPI
+#ifdef ORT_USE_NCCL
   RegisterCollectiveOps();
 #endif
 }
diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.h b/onnxruntime/core/graph/contrib_ops/contrib_defs.h
index 2d5b4f8e76cc2..5b3904669f9fc 100644
--- a/onnxruntime/core/graph/contrib_ops/contrib_defs.h
+++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.h
@@ -53,7 +53,7 @@ void RegisterContribSchemas();
 void RegisterNchwcSchemas();
 void RegisterQuantizationSchemas();
 
-#if defined(USE_MPI)
+#if defined(ORT_USE_NCCL)
 void RegisterCollectiveOps();
 #endif
 
diff --git a/onnxruntime/core/graph/contrib_ops/diffusion_defs.cc b/onnxruntime/core/graph/contrib_ops/diffusion_defs.cc
index c2f5edaa6149b..f81c3b8e0182c 100644
--- a/onnxruntime/core/graph/contrib_ops/diffusion_defs.cc
+++ b/onnxruntime/core/graph/contrib_ops/diffusion_defs.cc
@@ -42,7 +42,7 @@ ONNX_MS_OPERATOR_SET_SCHEMA(
               "The number of groups of channels. It should be a divisor of the number of channels C",
               AttributeProto::INT)
         .Attr("activation",
-              "Activation after group normalization: 0 for None, 1 for Swish",
+              "Activation after group normalization: 0 for None, 1 for SiLU",
               AttributeProto::INT)
         .Attr("channels_last",
               "1 if the input and output are in the NHWC layout, 0 if it is in the NCHW layout. Defaults to 1.",
@@ -68,6 +68,85 @@ ONNX_MS_OPERATOR_SET_SCHEMA(
         .TypeConstraint("M", {"tensor(float16)", "tensor(float)"}, "Constrain gamma and beta to float tensors.")
         .TypeAndShapeInferenceFunction(ONNX_NAMESPACE::propagateShapeAndTypeFromFirstInput));
 
+constexpr const char* SkipGroupNorm_ver1_doc = R"DOC(
+This operator element-wise adds x, skip and bias, then apply group normalization and optional activation.
+
+This operator transforms input according to
+  s = x + skip + bias
+  y = gamma * (s - mean) / sqrt(variance + epsilon) + beta
+
+The input channels are separated into num_groups groups, each containing num_channels / num_groups channels.
+The num_channels must be divisible by num_groups.
+The mean and standard-deviation of s are calculated separately over the each group.
+The weight and bias are per-channel affine transform parameter vectors of size num_channels.
+
+The activation attribute can be used to enable activation after group normalization.
+)DOC";
+
+ONNX_MS_OPERATOR_SET_SCHEMA(
+    SkipGroupNorm, 1,
+    OpSchema()
+        .SetDoc(SkipGroupNorm_ver1_doc)
+        .Attr("epsilon", "The epsilon value to use to avoid division by zero",
+              AttributeProto::FLOAT, static_cast<float>(1e-5))
+        .Attr("groups",
+              "The number of groups of channels. It should be a divisor of the number of channels C",
+              AttributeProto::INT)
+        .Attr("activation",
+              "Activation after group normalization: 0 for None, 1 for SiLU",
+              AttributeProto::INT)
+        .Attr("channels_last",
+              "1 if the input and output are in the NHWC layout, 0 if it is in the NCHW layout. Defaults to 1.",
+              AttributeProto::INT,
+              static_cast<int64_t>(1))
+        .Input(0,
+               "X",
+               "Input data tensor. Dimensions are (N x H x W x C) when channels_last is 1 "
+               " or (N x C x H x W) otherwise, where N is the batch size, C is the number of channels,"
+               " and H and W are the height and width of the data",
+               "T")
+        .Input(1,
+               "gamma",
+               "1D gamma tensor for normalization with shape (C), where C is number of channels",
+               "M")
+        .Input(2,
+               "beta",
+               "1D beta tensor for normalization with shape (C), where C is number of channels",
+               "M")
+        .Input(3,
+               "skip",
+               "4D or 2D skip tensor. The shape can be (N x H x W x C) or (N x 1 x 1 x C) or (N x C)",
+               "T")
+        .Input(4,
+               "bias",
+               "1D bias tensor. Dimensions are (C), where C is number of channels",
+               "T",
+               OpSchema::Optional)
+        .Output(0,
+                "Y",
+                "The output tensor of the same shape as X",
+                "T")
+        .Output(1,
+                "S",
+                "The element-wise sum of input x, skip and bias tensors. It has the same shape as X",
+                "T",
+                OpSchema::Optional)
+        .TypeConstraint("T", {"tensor(float16)", "tensor(float)"}, "Constrain input X, skip, bias and output Y, S types to float tensors.")
+        .TypeConstraint("M", {"tensor(float16)", "tensor(float)"}, "Constrain gamma and beta to float tensors.")
+        .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
+          propagateElemTypeFromInputToOutput(ctx, 0, 0);
+          if (ctx.getNumOutputs() > 1) {
+            propagateElemTypeFromInputToOutput(ctx, 0, 1);
+          }
+
+          if (hasInputShape(ctx, 0)) {
+            propagateShapeFromInputToOutput(ctx, 0, 0);
+            if (ctx.getNumOutputs() > 1) {
+              propagateShapeFromInputToOutput(ctx, 0, 1);
+            }
+          }
+        }));
+
 constexpr const char* BiasSplitGelu_ver1_doc = R"DOC(
 A fusion used in diffusion model that after adding bias, hidden state is sliced into two tensors of same size, then left
 tensor multiplies the Gelu activation result of right tensor.
diff --git a/onnxruntime/core/graph/contrib_ops/internal_nhwc_onnx_schemas.cc b/onnxruntime/core/graph/contrib_ops/internal_nhwc_onnx_schemas.cc
index 3ce7c40e754dc..c8960578f9e3d 100644
--- a/onnxruntime/core/graph/contrib_ops/internal_nhwc_onnx_schemas.cc
+++ b/onnxruntime/core/graph/contrib_ops/internal_nhwc_onnx_schemas.cc
@@ -90,13 +90,18 @@ void RegisterNHWCSchemaWithActivation(const RegistrationFunc& f, ::ONNX_NAMESPAC
 void OpSet_Internal_NHWC_ONNX::ForEachSchema(const std::function<void(ONNX_NAMESPACE::OpSchema&&)>& fn) {
   // if the operator may be fused with an activation, use the WITH_ACTIVATION variant to add optional attributes
   // for the activation parameters.
-  // For now we only register operators from opset 11 on. Models can easily have their opset updated using ONNX tools
+  // We mainly register operators from opset 11 on . Models can easily have their opset updated using ONNX tools
   // so supporting older opsets is unnecessary.
+  // Older opsets are included on a per-operator basis as needed.
 
   // NOTE: This should be in sync with GetLayoutSensitiveOps in
-  // /onnxruntime/core/optimizer/transpose_optimizer/transpose_optimizer.cc
+  // /onnxruntime/core/optimizer/transpose_optimization/transpose_optimizer.cc
+  REGISTER_NHWC_SCHEMA_WITH_ACTIVATION(fn, AveragePool, 7);
+  REGISTER_NHWC_SCHEMA_WITH_ACTIVATION(fn, AveragePool, 10);
   REGISTER_NHWC_SCHEMA_WITH_ACTIVATION(fn, AveragePool, 11);
+  REGISTER_NHWC_SCHEMA_WITH_ACTIVATION(fn, AveragePool, 19);
 
+  REGISTER_NHWC_SCHEMA_WITH_ACTIVATION(fn, BatchNormalization, 7);
   REGISTER_NHWC_SCHEMA_WITH_ACTIVATION(fn, BatchNormalization, 9);
   REGISTER_NHWC_SCHEMA_WITH_ACTIVATION(fn, BatchNormalization, 14);
   REGISTER_NHWC_SCHEMA_WITH_ACTIVATION(fn, BatchNormalization, 15);
@@ -106,16 +111,18 @@ void OpSet_Internal_NHWC_ONNX::ForEachSchema(const std::function<void(ONNX_NAMES
 
   REGISTER_NHWC_SCHEMA_WITH_ACTIVATION(fn, InstanceNormalization, 6);
 
+  REGISTER_NHWC_SCHEMA_WITH_ACTIVATION(fn, Conv, 1);
   REGISTER_NHWC_SCHEMA_WITH_ACTIVATION(fn, Conv, 11);
 
-  REGISTER_NHWC_SCHEMA_WITH_ACTIVATION(fn, ConvTranspose, 11);
   REGISTER_NHWC_SCHEMA_WITH_ACTIVATION(fn, ConvTranspose, 1);
+  REGISTER_NHWC_SCHEMA_WITH_ACTIVATION(fn, ConvTranspose, 11);
 
   REGISTER_NHWC_SCHEMA(fn, GlobalAveragePool, 1);
   REGISTER_NHWC_SCHEMA(fn, GlobalLpPool, 2);
   REGISTER_NHWC_SCHEMA(fn, GlobalMaxPool, 1);
 
   REGISTER_NHWC_SCHEMA(fn, GridSample, 16);
+  REGISTER_NHWC_SCHEMA(fn, GridSample, 20);
 
   REGISTER_NHWC_SCHEMA(fn, LRN, 1);
   REGISTER_NHWC_SCHEMA(fn, LRN, 13);
@@ -123,6 +130,9 @@ void OpSet_Internal_NHWC_ONNX::ForEachSchema(const std::function<void(ONNX_NAMES
   REGISTER_NHWC_SCHEMA(fn, LpPool, 11);
   REGISTER_NHWC_SCHEMA(fn, LpPool, 18);
 
+  REGISTER_NHWC_SCHEMA_WITH_ACTIVATION(fn, MaxPool, 1);
+  REGISTER_NHWC_SCHEMA_WITH_ACTIVATION(fn, MaxPool, 8);
+  REGISTER_NHWC_SCHEMA_WITH_ACTIVATION(fn, MaxPool, 10);
   REGISTER_NHWC_SCHEMA_WITH_ACTIVATION(fn, MaxPool, 11);
   REGISTER_NHWC_SCHEMA_WITH_ACTIVATION(fn, MaxPool, 12);
 
diff --git a/onnxruntime/core/graph/contrib_ops/ms_opset.h b/onnxruntime/core/graph/contrib_ops/ms_opset.h
index 3c31997286254..5eef1b33a24dd 100644
--- a/onnxruntime/core/graph/contrib_ops/ms_opset.h
+++ b/onnxruntime/core/graph/contrib_ops/ms_opset.h
@@ -47,6 +47,7 @@ class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QOrderedLongformerAttent
 // Others
 class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Attention);
 class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, BeamSearch);
+class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, WhisperBeamSearch);
 class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, BiasDropout);
 class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, BitmaskBiasDropout);
 class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, BiasGelu);
@@ -82,7 +83,9 @@ class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, MatMulInteger16);
 class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, MatMulFpQ4);
 #endif
 class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, MaxpoolWithMask);
+class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, MoE);
 class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, MultiHeadAttention);
+class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, GroupQueryAttention);
 class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, MurmurHash3);
 class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, NGramRepeatBlock);
 class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Pad);
@@ -93,8 +96,10 @@ class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, GatedRelativePositionBia
 class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, RemovePadding);
 class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, RestorePadding);
 class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Rfft);
+class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, RotaryEmbedding);
 class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, SampleOp);
 class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Sampling);
+class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, SkipGroupNorm);
 class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, SkipLayerNormalization);
 class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, SkipSimplifiedLayerNormalization);
 class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, SparseToDenseMatMul);
@@ -102,11 +107,14 @@ class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Tokenizer);
 class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, TorchEmbedding);
 class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, TransposeMatMul);
 class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Trilu);
+class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, UnfoldTensor);
+class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, DynamicTimeWarping);
 class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Unique);
 class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, WordConvEmbedding);
 class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, GemmFastGelu);
 class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, DecoderMaskedSelfAttention);
 class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, DecoderMaskedMultiHeadAttention);
+class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, GemmFloat8);
 
 class OpSet_Microsoft_ver1 {
  public:
@@ -146,6 +154,7 @@ class OpSet_Microsoft_ver1 {
 
     fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Attention)>());
     fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, BeamSearch)>());
+    fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, WhisperBeamSearch)>());
     fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, BiasDropout)>());
     fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, BitmaskBiasDropout)>());
     fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, BiasGelu)>());
@@ -181,7 +190,9 @@ class OpSet_Microsoft_ver1 {
     fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, MatMulFpQ4)>());
 #endif
     fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, MaxpoolWithMask)>());
+    fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, MoE)>());
     fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, MultiHeadAttention)>());
+    fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, GroupQueryAttention)>());
     fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, MurmurHash3)>());
     fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, NGramRepeatBlock)>());
     fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Pad)>());
@@ -194,8 +205,10 @@ class OpSet_Microsoft_ver1 {
     fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, RemovePadding)>());
     fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, RestorePadding)>());
     fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Rfft)>());
+    fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, RotaryEmbedding)>());
     fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, SampleOp)>());
     fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Sampling)>());
+    fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, SkipGroupNorm)>());
     fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, SkipLayerNormalization)>());
     fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, SkipSimplifiedLayerNormalization)>());
     fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, SparseToDenseMatMul)>());
@@ -203,11 +216,14 @@ class OpSet_Microsoft_ver1 {
     fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, TorchEmbedding)>());
     fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, TransposeMatMul)>());
     fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Trilu)>());
+    fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, UnfoldTensor)>());
+    fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, DynamicTimeWarping)>());
     fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Unique)>());
     fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, WordConvEmbedding)>());
     fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, GemmFastGelu)>());
     fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, DecoderMaskedSelfAttention)>());
     fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, DecoderMaskedMultiHeadAttention)>());
+    fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, GemmFloat8)>());
   }
 };
 }  // namespace contrib
diff --git a/onnxruntime/core/graph/function_utils.cc b/onnxruntime/core/graph/function_utils.cc
index 7477f48088a15..a266c9ab04a2e 100644
--- a/onnxruntime/core/graph/function_utils.cc
+++ b/onnxruntime/core/graph/function_utils.cc
@@ -373,7 +373,8 @@ class Inliner {
   // Replace given name with a unique version of the name, and cache the
   // renaming-binding in current scope.
   void make_unique(std::string& name) {
-    auto new_name = prefix_ + name;
+    auto new_name{prefix_};
+    new_name.append("_").append(name);
     auto& current_scope = rename_scopes_.back();
     current_scope[name] = new_name;
     name = std::move(new_name);
@@ -410,7 +411,7 @@ class Inliner {
       std::string rename_as = actuals.Get(i);
       if constexpr (isOutput) {
         if (rename_as.empty())
-          rename_as.assign(prefix_).append(formal);
+          rename_as.assign(prefix_).append("_").append(formal);
       }
       current_scope[formal] = rename_as;
       if (!rename_as.empty())
@@ -420,7 +421,7 @@ class Inliner {
       std::string& formal = *formals.Mutable(i);
       std::string rename_as;
       if constexpr (isOutput) {
-        rename_as.assign(prefix_).append(formal);
+        rename_as.assign(prefix_).append("_").append(formal);
       }
       current_scope[formal] = rename_as;
       if (!rename_as.empty())
@@ -431,7 +432,7 @@ class Inliner {
   // Process a node:
   void transform(NodeProto& n) {
     if (!n.name().empty())
-      n.set_name(prefix_ + n.name());
+      n.set_name(prefix_ + "_" + n.name());
 
     for (auto& x : *n.mutable_input()) {
       rename(x, false);
diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc
index 383c1d689d3c3..d489a59c4b798 100644
--- a/onnxruntime/core/graph/graph.cc
+++ b/onnxruntime/core/graph/graph.cc
@@ -860,18 +860,18 @@ Status Node::LoadEdgesFromOrtFormat(const onnxruntime::fbs::NodeEdge& fbs_node_e
 }
 
 #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD) || defined(ORT_MINIMAL_BUILD_CUSTOM_OPS)
-void Node::Init(const std::string& name,
-                const std::string& op_type,
-                const std::string& description,
-                const std::vector<NodeArg*>& input_args,
-                const std::vector<NodeArg*>& output_args,
+void Node::Init(std::string_view name,
+                std::string_view op_type,
+                std::string_view description,
+                gsl::span<NodeArg* const> input_args,
+                gsl::span<NodeArg* const> output_args,
                 const NodeAttributes* attributes,
-                const std::string& domain) {
+                std::string_view domain) {
   name_ = name;
   op_type_ = op_type;
   description_ = description;
-  definitions_.input_defs = input_args;
-  definitions_.output_defs = output_args;
+  definitions_.input_defs.assign(input_args.begin(), input_args.end());
+  definitions_.output_defs.assign(output_args.begin(), output_args.end());
   domain_ = domain;
   can_be_saved_ = true;
   priority_ = 0;
@@ -984,6 +984,7 @@ bool Node::ClearAttribute(const std::string& attr_name) {
   graph_->SetGraphProtoSyncNeeded();
   return attributes_.erase(attr_name) > 0;
 }
+
 #endif  // !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
 
 int Node::PruneRemovableAttributes(gsl::span<const std::string> removable_attributes) {
@@ -1145,7 +1146,8 @@ Graph::Graph(const Model& owning_model,
              IOnnxRuntimeOpSchemaCollectionPtr schema_registry,
              const logging::Logger& logger,
              bool strict_shape_type_inference)
-    : Graph(owning_model, graph_proto, domain_to_version, ir_version, schema_registry, nullptr, nullptr, logger, strict_shape_type_inference) {}
+    : Graph(owning_model, graph_proto, domain_to_version, ir_version,
+            schema_registry, nullptr, nullptr, logger, strict_shape_type_inference) {}
 
 Graph::Graph(const Model& owning_model,
              GraphProto* graph_proto, const std::unordered_map<std::string, int>& domain_to_version, Version ir_version,
@@ -3261,8 +3263,8 @@ Node& Graph::AddNode(const std::string& name,
                      gsl::span<NodeArg* const> output_args,
                      const NodeAttributes* attributes,
                      const std::string& domain) {
-  std::vector<NodeArg*> inputs;
-  std::vector<NodeArg*> outputs;
+  InlinedVector<NodeArg*> inputs;
+  InlinedVector<NodeArg*> outputs;
   inputs.resize(input_args.size());
   outputs.resize(output_args.size());
   int i = 0;
@@ -3907,7 +3909,8 @@ Node& Graph::CreateFusedSubGraphNode(const IndexedSubGraph& sub_graph, const std
   // kernel lookup works as per usual, if not using an existing schema.
   if (sub_graph.schema_source == IndexedSubGraph::SourceOfSchema::EXISTING) {
     ORT_ENFORCE(SetOpSchemaFromRegistryForNode(fused_node),
-                "Schema was not found for fused node. Domain:", fused_node.Domain(), " OpType:", fused_node.OpType());
+                "Schema was not found for fused node. Domain:", fused_node.Domain(), " OpType:", fused_node.OpType(),
+                " SinceVersion:", fused_node.SinceVersion());
   } else if (IndexedSubGraph::SourceOfSchema::REUSE_OR_CREATE == sub_graph.schema_source) {
     auto schema_key = GenerateSchemaKey(sub_graph);
     if (reusable_fused_schema_map_.count(schema_key) == 0) {
@@ -4019,69 +4022,424 @@ Node& Graph::FuseSubGraph(const IndexedSubGraph& sub_graph,
   return fused_node;
 }
 
-Status Graph::InlineFunction(Node& callnode) {
-  const auto& model_path = ModelPath();
-  auto output_edges = callnode.GetRelationships().output_edges;
-  for (const auto& output_edge : output_edges) {
-    RemoveEdge(callnode.Index(), output_edge.GetNode().Index(), output_edge.GetSrcArgIndex(), output_edge.GetDstArgIndex());
+Status Graph::AddConstantProtoAsInitializer(const ONNX_NAMESPACE::NodeProto& node_proto,
+                                            std::optional<std::string_view> new_name) {
+  const gsl::not_null<TensorProto*> tensor{graph_proto_->add_initializer()};
+  ORT_RETURN_IF_ERROR(utils::ConstantNodeProtoToTensorProto(node_proto, ModelPath(), *tensor, node_proto.output(0)));
+
+  if (new_name.has_value()) {
+    tensor->set_name(std::string(new_name.value()));
   }
 
-  // create a uniq_identifier to append to every node name and intermediate input\outputs
-  // to make sure there are no unintended duplicates
-  std::stringstream ss;
-  ss << "_inline_" << callnode.OpType();
-  auto uniq_identifier = GenerateNodeName(ss.str());
-  // Replace a (function-call) node by an inlined graph.
-  if (!callnode.GetFunctionBody()) {
-    // This is the normal use-case: inlining a FunctionProto (representing
-    // a model-local function or a schema-defined function).
-    FunctionProto inlined_fp;
-    ORT_ENFORCE(callnode.TryGetFunctionProto(inlined_fp), "Node has no function body and cannot be inlined.");
-    function_utils::Specialize(inlined_fp, callnode, uniq_identifier);
+  auto insert_result = name_to_initial_tensor_.emplace(tensor->name(), tensor);
+  ORT_ENFORCE(insert_result.second, "Constant node name: ", tensor->name(),
+              " conflicts with graph initializer. Check that the node names have been made unique.");
+  if (GetNodeArg(tensor->name()) == nullptr) {
+    TypeProto t{TypeProtoFromTensorProto(*tensor)};
+    ORT_IGNORE_RETURN_VALUE(GetOrCreateNodeArg(tensor->name(), &t));
+  }
+
+#if !defined(DISABLE_SPARSE_TENSORS)
+  if (node_proto.attribute(0).type() == AttributeProto_AttributeType_SPARSE_TENSOR) {
+    ORT_IGNORE_RETURN_VALUE(sparse_tensor_names_.emplace(tensor->name()));
+  }
+#endif
 
-    auto to_node_arg = [this](const std::string& name) {
-      return &this->GetOrCreateNodeArg(name, nullptr);
-    };
+  return Status::OK();
+}
 
-    // Process constant nodes first and create NodeArg for these as they become initializers
-    // It is important for the initializers to have NodeArg created, first they are needed
-    // if the initializer is unused and removed, second if the node depends on the initializer,
-    // we can have Type attached to it.
-    for (const auto& inlined_node : inlined_fp.node()) {
-      if (inlined_node.op_type() == kConstant) {
-        // Copy constant nodes _value to name_to_initial_tensor_
-        const gsl::not_null<TensorProto*> tensor{graph_proto_->add_initializer()};
-        ORT_RETURN_IF_ERROR(utils::ConstantNodeProtoToTensorProto(inlined_node, model_path, *tensor, inlined_node.output(0)));
-        auto insert_result = name_to_initial_tensor_.emplace(tensor->name(), tensor);
-        ORT_ENFORCE(insert_result.second, "Constant node name: ", tensor->name(), " in inlined function: ",
-                    inlined_fp.name(), " conflicts with graph initializer. Check Specializing code.");
-        TypeProto t{TypeProtoFromTensorProto(*tensor)};
-        ORT_IGNORE_RETURN_VALUE(GetOrCreateNodeArg(tensor->name(), &t));
+static void ReassignSubgraphDependentNodeArgs(const InlinedHashMap<std::string, NodeArg*>& name_to_nodearg,
+                                              Graph& graph) {
+  for (auto& node : graph.Nodes()) {
+    if (node.ContainsSubgraph()) {
+      for (auto& [name, subgraph] : node.GetAttributeNameToMutableSubgraphMap()) {
+        ReassignSubgraphDependentNodeArgs(name_to_nodearg, *subgraph);
       }
     }
 
-    for (const auto& inlined_node : inlined_fp.node()) {
-      if (inlined_node.op_type() != kConstant) {
-        InlinedVector<onnxruntime::NodeArg*> inputs;
-        InlinedVector<onnxruntime::NodeArg*> outputs;
+    // NodeArgs need to be updated
+    for (auto& input_def : node.MutableInputDefs()) {
+      if (input_def->Exists()) {
+        auto hit = name_to_nodearg.find(input_def->Name());
+        if (hit != name_to_nodearg.cend()) {
+          // Make sure we create a local to this subgraph definition
+          const auto* new_name_arg = hit->second;
+          input_def = &graph.GetOrCreateNodeArg(new_name_arg->Name(), input_def->TypeAsProto());
+        }
+      }
+    }
+  }
+}
+
+Status Graph::InlineIfSubgraph(bool condition_value, Node& if_node, const logging::Logger& logger) {
+  static const std::string then_branch{"then_branch"};
+  static const std::string else_branch{"else_branch"};
+  Graph* sub_graph;
+  if (condition_value) {
+    sub_graph = if_node.GetMutableGraphAttribute(then_branch);
+  } else {
+    sub_graph = if_node.GetMutableGraphAttribute(else_branch);
+  }
+
+  if (sub_graph == nullptr) {
+    auto str = MakeString("Unable to constant fold If node: '", if_node.Name(), "' Unable to fetch: ",
+                          (condition_value ? then_branch : else_branch));
+    LOGS(logger, WARNING) << str;
+    return Status::OK();
+  }
+
+  Graph& graph_to_inline = *sub_graph;
+
+  std::string unique_id{"_if_"};
+  if (condition_value) {
+    unique_id.append(then_branch);
+  } else {
+    unique_id.append(else_branch);
+  }
+
+  unique_id = GenerateNodeName(unique_id);
+
+  auto make_unique = [&unique_id](const std::string& name) {
+    return unique_id + '_' + name;
+  };
+
+  // Check if the name is an input or implicit input.
+  // These are not renamed, and we do not need to adjust subgraphs for them.
+  // Implicit inputs would cover both If node input and implicit inputs.
+  // Reason: there are no explicit inputs to the subgraphs, and the subgraph's
+  // implicit inputs must be covered by the implicit inputs of the If node.
+  InlinedHashMap<std::string_view, NodeArg*> outer_scope_values;
+  const auto& if_implicit_inputs = if_node.MutableImplicitInputDefs();
+  outer_scope_values.reserve(if_implicit_inputs.size());
+
+  for (auto* input : if_implicit_inputs) {
+    const auto& name = input->Name();
+    ORT_IGNORE_RETURN_VALUE(outer_scope_values.emplace(name, input));
+  }
+
+  // Name mapping from the graph to inline to the graph we are inlining into
+  // we also use this to process any subgraphs in the graph we are inlining
+  InlinedHashMap<std::string, NodeArg*> name_to_nodearg;
+
+  // We are going to map the outputs of the graph to inline to the outputs of the If node.
+  // They are assumed to be in the same order.
+  const auto& node_output_defs = if_node.MutableOutputDefs();
+  const auto& graph_output_defs = graph_to_inline.GetOutputs();
+  for (size_t i = 0; i < graph_output_defs.size(); ++i) {
+    name_to_nodearg.emplace(graph_output_defs[i]->Name(), node_output_defs[i]);
+  }
+
+  // Move initializers from the subgraph to the destination graph.
+  for (int i = 0, limit = graph_to_inline.graph_proto_->initializer_size(); i < limit; ++i) {
+    auto* initializer = graph_to_inline.graph_proto_->mutable_initializer(i);
+    const std::string src_name = initializer->name();
+
+#if !defined(DISABLE_SPARSE_TENSORS)
+    bool has_sparse_origin = false;
+    if (!graph_to_inline.sparse_tensor_names_.empty()) {
+      auto hit = graph_to_inline.sparse_tensor_names_.find(src_name);
+      if (hit != graph_to_inline.sparse_tensor_names_.cend()) {
+        has_sparse_origin = true;
+        // Erase the entry that will be invalidated
+        graph_to_inline.sparse_tensor_names_.erase(hit);
+      }
+    }
+#endif
+
+    graph_to_inline.name_to_initial_tensor_.erase(src_name);
+    const gsl::not_null<TensorProto*> tensor{graph_proto_->add_initializer()};
+    *tensor = std::move(*initializer);
+
+    // Check if this is an output of the graph
+    auto hit = name_to_nodearg.find(src_name);
+    if (hit != name_to_nodearg.cend()) {
+      // We rename it to If node output.
+      tensor->set_name(hit->second->Name());
+    } else {
+      NodeArg* node_arg = graph_to_inline.GetNodeArg(src_name);
+      assert(node_arg != nullptr);
+      auto new_name = GenerateNodeArgName(make_unique(src_name));
+      NodeArg& new_arg = GetOrCreateNodeArg(new_name, node_arg->TypeAsProto());
+      ORT_IGNORE_RETURN_VALUE(name_to_nodearg.emplace(src_name, &new_arg));
+      tensor->set_name(std::move(new_name));
+    }
+
+    auto insert_result = name_to_initial_tensor_.emplace(tensor->name(), tensor);
+    ORT_ENFORCE(insert_result.second, "Initializer name: ", tensor->name(), " from graph: ",
+                graph_to_inline.Name(), " conflicts with graph initializer. Check name generation above.");
+
+#if !defined(DISABLE_SPARSE_TENSORS)
+    if (has_sparse_origin) {
+      ORT_IGNORE_RETURN_VALUE(sparse_tensor_names_.emplace(tensor->name()));
+    }
+#endif
+  }
+
+  // Look up nodes that would be providing input to our nodes (implicit and explicit)
+  // and any nodes that take the output of our nodes (used to be If output)
+  // Map of NodeArg name to pair of Node* and input index in the destination node
+  using NodeAndIndex = std::pair<gsl::not_null<Node*>, int>;
+  using ArgNameToNodeMap = InlinedHashMap<std::string_view, NodeAndIndex>;
+  ArgNameToNodeMap input_args;
+  // Map of NodeArg name to pair of Node* and output index in the source node.
+  ArgNameToNodeMap output_args;
+
+  auto map_defs = [](Node& node, ArgNameToNodeMap& map, bool input) {
+    const auto defs = (input) ? node.InputDefs() : node.OutputDefs();
+    map.reserve(map.size() + defs.size());
+    int arg_pos = -1;
+    for (auto* node_arg : defs) {
+      ++arg_pos;
+      if (node_arg->Exists()) {
+        map.emplace(node_arg->Name(), std::make_pair(&node, arg_pos));
+      }
+    }
+  };
+
+  const bool is_this_main_graph = (parent_graph_ == nullptr);
+  // Map the inputs and outputs of the If node to the nodes in the graph to inline.
+  if (!is_this_main_graph) {
+    for (auto& node : Nodes()) {
+      if (node.Index() == if_node.Index()) {
+        continue;
+      }
+      map_defs(node, input_args, true);
+      map_defs(node, output_args, false);
+    }
+  }
+
+  auto* non_existing_arg = &GetOrCreateNodeArg(std::string(), nullptr);
+  // We want to make sure we get nodes in topological order
+  // because Constant folding may cause the nodes appear in
+  // a different order.
+  InlinedVector<Node*> new_nodes;
+  GraphViewer graph(graph_to_inline);
+  for (const auto node_idx : graph.GetNodesInTopologicalOrder()) {
+    // GraphViewer filters out nullptrs
+    auto* node = graph_to_inline.GetNode(node_idx);
+    assert(node->OpType() != kConstant);
+
+    // Inputs
+    // Chop off trailing non-existing defs, but preserve non-existing in the middle
+    auto& input_defs = node->MutableInputDefs();
+    auto last_existing = std::find_if(input_defs.rbegin(), input_defs.rend(),
+                                      [](const NodeArg* node_arg) { return node_arg->Exists(); });
+    input_defs.resize(std::distance(input_defs.begin(), last_existing.base()));
+
+    InlinedVector<NodeArg*> new_input_defs;
+    for (auto* input_def : node->InputDefs()) {
+      if (input_def->Exists()) {
+        // Check if this is one of the implicit graph inputs
+        // then re-assign the def to the outer scope value.
+        const auto& input_name = input_def->Name();
+        auto outer_hit = outer_scope_values.find(input_name);
+        if (outer_hit != outer_scope_values.cend()) {
+          // get/create local definition
+          NodeArg* outer_arg = outer_hit->second;
+          auto& this_scope_arg = GetOrCreateNodeArg(outer_arg->Name(), input_def->TypeAsProto());
+          new_input_defs.push_back(&this_scope_arg);
+        } else {
+          auto hit = name_to_nodearg.find(input_name);
+          if (hit != name_to_nodearg.cend()) {
+            // This is other node output in the dest graph,
+            // constant node or initializer that was renamed.
+            new_input_defs.push_back(hit->second);
+          } else {
+            ORT_THROW("Node's: ", node->Name(), " input: ", input_name,
+                      " is not If node's input or previous node output in this subgraph");
+          }
+        }
+      } else {
+        new_input_defs.push_back(non_existing_arg);
+      }
+    }
+
+    // Outputs
+    // Chop off trailing non-existing defs
+    auto& output_defs = node->MutableOutputDefs();
+    last_existing = std::find_if(output_defs.rbegin(), output_defs.rend(),
+                                 [](const NodeArg* node_arg) { return node_arg->Exists(); });
+    output_defs.resize(std::distance(output_defs.begin(), last_existing.base()));
+
+    InlinedVector<NodeArg*> new_output_defs;
+    for (auto* output_def : node->OutputDefs()) {
+      if (output_def->Exists()) {
+        const auto& output_name = output_def->Name();
+        auto hit = name_to_nodearg.find(output_name);
+        if (hit != name_to_nodearg.cend()) {
+          // This is one of the If node outputs, simply reassign the def.
+          // If node defs are already in the destination graph
+          new_output_defs.push_back(hit->second);
+        } else {
+          // We generate an output to downstream nodes.
+          auto new_name = GenerateNodeArgName(make_unique(output_name));
+          NodeArg& new_arg = GetOrCreateNodeArg(new_name, output_def->TypeAsProto());
+          new_output_defs.push_back(&new_arg);
+          ORT_IGNORE_RETURN_VALUE(name_to_nodearg.emplace(output_name, &new_arg));
+        }
+      } else {
+        new_output_defs.push_back(non_existing_arg);
+      }
+    }
+
+    const auto new_node_name = GenerateNodeName(make_unique(node->OpType()));
+    Node& new_node = AddNode(new_node_name, node->OpType(), node->Description(),
+                             new_input_defs,
+                             new_output_defs,
+                             nullptr,
+                             node->Domain());
 
-        for (const auto& tensor_name : inlined_node.input())
-          inputs.push_back(to_node_arg(tensor_name));
+    new_node.SetSinceVersion(node->SinceVersion());
+    new_node.op_ = node->op_;
 
-        for (const auto& tensor_name : inlined_node.output())
-          outputs.push_back(to_node_arg(tensor_name));
+    if (!is_this_main_graph) {
+      map_defs(new_node, input_args, true);
+      map_defs(new_node, output_args, false);
+      new_nodes.push_back(&new_node);
+    }
+
+    if (node->ContainsSubgraph()) {
+      auto& subgraphs = node->MutableSubgraphs();
+
+      // Check if any of this node implicit inputs of this graph is in the renaming map
+      // that would mean they come from the destination graph, not from the parent
+      // of the destination graph.
+      int renames_subgraph_names = 0;
+      auto& implicit_defs = node->MutableImplicitInputDefs();
+      for (auto& input_def : implicit_defs) {
+        auto hit = name_to_nodearg.find(input_def->Name());
+        if (hit != name_to_nodearg.cend()) {
+          input_def = hit->second;
+          ++renames_subgraph_names;
+        }
+      }
+
+      for (auto& subgraph : subgraphs) {
+        if (renames_subgraph_names > 0) {
+          // We need to rename the subgraph node names
+          // because they may refer to the implicit inputs
+          // that were renamed.
+          ReassignSubgraphDependentNodeArgs(name_to_nodearg, *subgraph);
+        }
+        subgraph->parent_node_ = &new_node;
+        subgraph->parent_graph_ = this;
+      }
+
+      new_node.MutableSubgraphs() = std::move(subgraphs);
+      new_node.GetMutableMapOfAttributeNameToSubgraph() = std::move(node->GetMutableMapOfAttributeNameToSubgraph());
+      new_node.MutableImplicitInputDefs() = std::move(implicit_defs);
+    }
+
+    new_node.GetMutableAttributes() = std::move(node->GetMutableAttributes());
+  }
 
-        onnxruntime::NodeAttributes new_attr_map;
-        new_attr_map.reserve(inlined_node.attribute_size());
-        for (const auto& node_attr : inlined_node.attribute()) {
-          onnx::AttributeProto attr_copy = node_attr;
-          new_attr_map[node_attr.name()] = std::move(attr_copy);
+  // Let's rebuild local connections, so next time a GraphViewer is able to perform topological sort.
+  // We only need to do so if this graph is not the main graph, because the main graph is going to resolve
+  // and it is not possible to inline the same nodes again.
+  if (!is_this_main_graph) {
+    for (auto* node : new_nodes) {
+      int arg_pos = -1;
+      for (auto* input_def : node->InputDefs()) {
+        ++arg_pos;
+        auto hit = output_args.find(input_def->Name());
+        if (hit != output_args.cend()) {
+          // The input to this node is an output from a previous node in this graph.
+          // Create relationship between this node (node), and the node providing the output (output_node).
+          const auto& [producer, src_idx] = hit->second;
+          AddEdge(producer->Index(), node->Index(), src_idx, arg_pos);
         }
-        AddNode(inlined_node.name(), inlined_node.op_type(),
-                inlined_node.doc_string(), inputs, outputs, &new_attr_map, inlined_node.domain());
       }
+
+      // Check if any of the outputs for inlined nodes are inputs to other nodes in the graph.
+      // (outputs of If node)
+      arg_pos = -1;
+      for (auto& output_def : node->OutputDefs()) {
+        ++arg_pos;
+        auto hit = input_args.find(output_def->Name());
+        if (hit != input_args.cend()) {
+          // The output of this node is an input to another node in this graph.
+          // Create relationship between this node (node), and the node using the input (input_node).
+          const auto& [consumer, dst_idx] = hit->second;
+          AddEdge(node->Index(), consumer->Index(), arg_pos, dst_idx);
+        }
+      }
+    }
+  }
+
+  LOGS(logger, INFO) << "Constant folded (inlined) " << (condition_value ? then_branch : else_branch)
+                     << " for If node: " << if_node.Name();
+
+  return Status::OK();
+}
+
+Status Graph::InlineFunctionProto(const ONNX_NAMESPACE::FunctionProto& func_to_inline) {
+  auto to_node_arg = [this](const std::string& name) {
+    return &this->GetOrCreateNodeArg(name, nullptr);
+  };
+
+  // Process constant nodes first and create NodeArg for these as they become initializers
+  // It is important for the initializers to have NodeArg created, first they are needed
+  // if the initializer is unused and removed, second if the node depends on the initializer,
+  // we can have Type attached to it.
+  InlinedVector<const NodeProto*> non_constant_nodes;
+  non_constant_nodes.reserve(func_to_inline.node_size());
+  for (const auto& inlined_node : func_to_inline.node()) {
+    if (inlined_node.op_type() == kConstant) {
+      // Copy constant nodes _value to name_to_initial_tensor_
+      ORT_RETURN_IF_ERROR(AddConstantProtoAsInitializer(inlined_node, std::nullopt));
+    } else {
+      non_constant_nodes.push_back(&inlined_node);
     }
+  }
+
+  for (const auto* inlined_node : non_constant_nodes) {
+    InlinedVector<onnxruntime::NodeArg*> inputs;
+    InlinedVector<onnxruntime::NodeArg*> outputs;
+
+    for (const auto& tensor_name : inlined_node->input())
+      inputs.push_back(to_node_arg(tensor_name));
+
+    for (const auto& tensor_name : inlined_node->output())
+      outputs.push_back(to_node_arg(tensor_name));
+
+    onnxruntime::NodeAttributes new_attr_map;
+    new_attr_map.reserve(inlined_node->attribute_size());
+    for (const auto& node_attr : inlined_node->attribute()) {
+      new_attr_map.insert_or_assign(node_attr.name(), node_attr);
+    }
+    ORT_IGNORE_RETURN_VALUE(AddNode(inlined_node->name(), inlined_node->op_type(),
+                                    inlined_node->doc_string(), inputs, outputs,
+                                    &new_attr_map, inlined_node->domain()));
+  }
+
+  return Status::OK();
+}
+
+Status Graph::InlineFunction(Node& callnode) {
+  // Remove output edges. Requirement for RemoveNode() below.
+  auto output_edges = callnode.GetRelationships().output_edges;  // copy so RemoveEdge doesn't invalidate iterator
+  for (const auto& output_edge : output_edges) {
+    RemoveEdge(callnode.Index(), output_edge.GetNode().Index(), output_edge.GetSrcArgIndex(), output_edge.GetDstArgIndex());
+  }
+
+  // create a uniq_identifier to append to every node name and intermediate input\outputs
+  // to make sure there are no unintended duplicates
+  std::string base_uniq_identifier{"_inlfunc_"};
+  base_uniq_identifier.append(callnode.OpType());
+  const auto uniq_identifier = GenerateNodeName(base_uniq_identifier);
 
+  // Replace a (function-call) node by an inlined graph.
+  if (!callnode.GetFunctionBody()) {
+    // This is the normal use-case: inlining a FunctionProto (representing
+    // a model-local function or a schema-defined function).
+    ONNX_NAMESPACE::FunctionProto inlined_fp;
+    ORT_ENFORCE(callnode.TryGetFunctionProto(inlined_fp), "Node has no function body and cannot be inlined.");
+
+    // Make all the names unique and resolve nested graphs inputs to the outer scope.
+    function_utils::Specialize(inlined_fp, callnode, uniq_identifier);
+
+    // In this case, global Resolve() will take care of everything.
+    ORT_RETURN_IF_ERROR(InlineFunctionProto(inlined_fp));
   } else {
     // Uncommon scenario. Inlining a node representing a fused sub-graph.
     // TODO: Unclear that this feature is needed. Can this be removed?
@@ -4115,15 +4473,7 @@ Status Graph::InlineFunction(Node& callnode) {
         // Copy constant nodes _value to name_to_initial_tensor_
         ONNX_NAMESPACE::NodeProto subgraph_node_proto{};
         subgraph_node.ToProto(subgraph_node_proto);
-        const gsl::not_null<TensorProto*> tensor{graph_proto_->add_initializer()};
-        ORT_RETURN_IF_ERROR(utils::ConstantNodeProtoToTensorProto(subgraph_node_proto, model_path, *tensor, subgraph_node_proto.output(0)));
-        auto insert_result = name_to_initial_tensor_.emplace(tensor->name(), tensor);
-        ORT_ENFORCE(insert_result.second, "Constant node name: ", tensor->name(), " in inlined subgraph: ",
-                    subgraph.Name(), " conflicts with graph initializer. Check Specializing code.");
-        if (GetNodeArg(tensor->name()) == nullptr) {
-          TypeProto t{TypeProtoFromTensorProto(*tensor)};
-          ORT_IGNORE_RETURN_VALUE(GetOrCreateNodeArg(tensor->name(), &t));
-        }
+        ORT_RETURN_IF_ERROR(AddConstantProtoAsInitializer(subgraph_node_proto, std::nullopt));
       }
     }
 
diff --git a/onnxruntime/core/graph/model.cc b/onnxruntime/core/graph/model.cc
index 05747a7e5124d..b3935e69ad7b1 100644
--- a/onnxruntime/core/graph/model.cc
+++ b/onnxruntime/core/graph/model.cc
@@ -41,6 +41,35 @@ namespace onnxruntime {
 
 #if !defined(ORT_MINIMAL_BUILD)
 
+void Model::RemoveLocalFunctionsProtos(const InlinedHashSet<std::string>& retained) {
+  auto* local_functions = model_proto_.mutable_functions();
+  if (retained.empty()) {
+    model_local_function_templates_maps_.clear();
+    model_local_functions_.clear();
+    local_functions->erase(local_functions->begin(), local_functions->end());
+  } else {
+    const auto retained_end = retained.cend();
+    for (auto it = model_local_functions_.begin();
+         it != model_local_functions_.end();) {
+      if (retained.find(it->first) == retained_end) {
+        model_local_function_templates_maps_.erase(it->first);
+        it = model_local_functions_.erase(it);
+      } else {
+        ++it;
+      }
+    }
+
+    for (auto it = local_functions->begin(); it != local_functions->end();) {
+      const auto function_id = function_utils::GetFunctionIdentifier(it->domain(), it->name());
+      if (retained.find(function_id) == retained_end) {
+        it = local_functions->erase(it);
+      } else {
+        ++it;
+      }
+    }
+  }
+}
+
 static constexpr int DEFAULT_PROTOBUF_BLOCK_SIZE = 4 * 1024 * 1024;
 
 Model::Model(const std::string& graph_name,
@@ -95,10 +124,10 @@ Model::Model(const std::string& graph_name,
   for (auto& func : model_local_functions) {
     auto func_ptr = model_proto_.add_functions();
     func_ptr->CopyFrom(func);
-    model_local_functions_.insert_or_assign(function_utils::GetFunctionIdentifier(func_ptr->domain(), func_ptr->name()), func_ptr);
+    model_local_functions_.insert_or_assign(function_utils::GetFunctionIdentifier(func_ptr->domain(), func_ptr->name()),
+                                            func_ptr);
   }
 
-  model_local_function_templates_.reserve(model_proto_.functions().size());
   model_local_function_templates_maps_.reserve(model_proto_.functions().size());
   for (auto& func : model_proto_.functions()) {
     auto func_schema_ptr = function_utils::CreateSchema(func.domain(),
@@ -111,8 +140,8 @@ Model::Model(const std::string& graph_name,
     auto func_template_ptr = std::make_unique<FunctionTemplate>();
     func_template_ptr->op_schema_ = std::move(func_schema_ptr);
     func_template_ptr->onnx_func_proto_ = &func;
-    model_local_function_templates_.push_back(std::move(func_template_ptr));
-    model_local_function_templates_maps_[function_utils::GetFunctionIdentifier(func.domain(), func.name())] = model_local_function_templates_.back().get();
+    model_local_function_templates_maps_.insert_or_assign(function_utils::GetFunctionIdentifier(func.domain(), func.name()),
+                                                          std::move(func_template_ptr));
   }
 
   // need to call private ctor so can't use make_shared
@@ -203,6 +232,14 @@ Model::Model(ModelProto&& model_proto, const PathString& model_path,
     }
   }
 
+  // special-case the internal NHWC domain as it must match the ONNX opset if not explicitly imported
+  if (domain_to_version.find(kMSInternalNHWCDomain) == domain_to_version.end()) {
+    auto onnx_version = domain_to_version.find(kOnnxDomain);
+    if (onnx_version != domain_to_version.end()) {
+      domain_to_version[kMSInternalNHWCDomain] = onnx_version->second;
+    }
+  }
+
   auto domain_map = allow_official_onnx_release_only_final
                         ? schema_registry->GetLastReleasedOpsetVersions(false)
                         : schema_registry->GetLatestOpsetVersions(false);
@@ -220,7 +257,6 @@ Model::Model(ModelProto&& model_proto, const PathString& model_path,
     model_local_functions_.insert_or_assign(function_utils::GetFunctionIdentifier(func.domain(), func.name()), &func);
   }
 
-  model_local_function_templates_.reserve(model_proto_.functions().size());
   model_local_function_templates_maps_.reserve(model_proto_.functions().size());
   for (auto& func : model_proto_.functions()) {
     auto func_schema_ptr = function_utils::CreateSchema(func.domain(),
@@ -233,9 +269,7 @@ Model::Model(ModelProto&& model_proto, const PathString& model_path,
     auto func_template_ptr = std::make_unique<FunctionTemplate>();
     func_template_ptr->op_schema_ = std::move(func_schema_ptr);
     func_template_ptr->onnx_func_proto_ = &func;
-    model_local_function_templates_.push_back(std::move(func_template_ptr));
-    model_local_function_templates_maps_[function_utils::GetFunctionIdentifier(func.domain(), func.name())] =
-        model_local_function_templates_.back().get();
+    model_local_function_templates_maps_.insert_or_assign(function_utils::GetFunctionIdentifier(func.domain(), func.name()), std::move(func_template_ptr));
   }
 
   // create instance. need to call private ctor so can't use make_unique
@@ -244,7 +278,7 @@ Model::Model(ModelProto&& model_proto, const PathString& model_path,
                          logger, options.strict_shape_type_inference));
 }
 
-const InlinedHashMap<std::string, FunctionTemplate*>& Model::GetModelLocalFunctionTemplates() const {
+const NodeHashMap<std::string, std::unique_ptr<FunctionTemplate>>& Model::GetModelLocalFunctionTemplates() const {
   return model_local_function_templates_maps_;
 }
 
@@ -332,7 +366,7 @@ const Graph& Model::MainGraph() const noexcept {
 }
 
 #if !defined(ORT_MINIMAL_BUILD)
-ModelProto Model::ToProto() {
+ModelProto Model::ToProto() const {
   // We want to return back the original proto
   // To that end invoke const overload of ToGraphProto()
   // that returns by value and, therefore, allows us to filter
@@ -346,7 +380,7 @@ ModelProto Model::ToProto() {
 
 ModelProto Model::ToGraphProtoWithExternalInitializers(const std::string& external_file_name,
                                                        const PathString& file_path,
-                                                       size_t initializer_size_threshold) {
+                                                       size_t initializer_size_threshold) const {
   ModelProto result(model_proto_);
   const auto& graph = *graph_;
   *(result.mutable_graph()) = graph.ToGraphProtoWithExternalInitializers(external_file_name,
diff --git a/onnxruntime/core/graph/model.h b/onnxruntime/core/graph/model.h
index 6bdb68dd734f0..4ce6660b794bc 100644
--- a/onnxruntime/core/graph/model.h
+++ b/onnxruntime/core/graph/model.h
@@ -139,7 +139,7 @@ class Model {
   // Returns empty string if not specified.
   const std::string GraphDocString() const;
 
-  const InlinedHashMap<std::string, FunctionTemplate*>& GetModelLocalFunctionTemplates() const;
+  const NodeHashMap<std::string, std::unique_ptr<FunctionTemplate>>& GetModelLocalFunctionTemplates() const;
 
 #else
   // Get model's IR version.
@@ -182,14 +182,14 @@ class Model {
 
 #if !defined(ORT_MINIMAL_BUILD)
   // Get model's serialization proto data.
-  ONNX_NAMESPACE::ModelProto ToProto();
+  ONNX_NAMESPACE::ModelProto ToProto() const;
 
   // Get model's serialization proto data.
   // Save initializer larger than the given threshold (in bytes) into an external binary file
   // with the given name. This function is useful to avoid hitting the size limit of protobuf files.
   ONNX_NAMESPACE::ModelProto ToGraphProtoWithExternalInitializers(const std::string& external_file_name,
                                                                   const PathString& file_path,
-                                                                  size_t initializer_size_threshold);
+                                                                  size_t initializer_size_threshold) const;
 
 #ifdef _WIN32
   static common::Status Save(Model& model, const std::wstring& file_path);
@@ -291,6 +291,13 @@ class Model {
   common::Status SaveToOrtFormat(flatbuffers::FlatBufferBuilder& builder,
                                  flatbuffers::Offset<onnxruntime::fbs::Model>& model) const;
 
+  /// <summary>
+  /// Frees local function definitions in the model, excluding those in the `retained` set.
+  /// Called from GraphPartitioner::InlineFunctionsAOT.
+  /// </summary>
+  /// <param name="retained">contains function IDs that should not be removed.</param>
+  void RemoveLocalFunctionsProtos(const InlinedHashSet<std::string>& retained);
+
 #endif  // !defined(ORT_MINIMAL_BUILD)
 
   static common::Status LoadFromOrtFormat(const onnxruntime::fbs::Model& fbs_model,
@@ -312,14 +319,12 @@ class Model {
   // this map will be used for the local functions' schema's type/shape inference.
   // This container is used by ONNX code and must be an std::unordered_map.
   std::unordered_map<std::string, const ONNX_NAMESPACE::FunctionProto*> model_local_functions_;
-  // this is the container that host the generated schemas for model local functions.
-  // the generated schemare will be used for graph resolving and type/shape inference.
-  // those schemas' type/shape inference will reference to the model_local_functions_ as context,
-  // so need to keep them with same lifetime.
-  InlinedVector<std::unique_ptr<FunctionTemplate>> model_local_function_templates_;
   // this is the map from function id to the local function template.
   // this map will be used by graph to instantiate the function body.
-  InlinedHashMap<std::string, FunctionTemplate*> model_local_function_templates_maps_;
+  // Defined as a node based map so the memory is released when not all of the functions
+  // are inlined and removed.
+  NodeHashMap<std::string, std::unique_ptr<FunctionTemplate>> model_local_function_templates_maps_;
+
 #else
   // properties that would normally come from ModelProto
   std::string producer_version_;
diff --git a/onnxruntime/core/mickey/README.md b/onnxruntime/core/mickey/README.md
new file mode 100644
index 0000000000000..7e8d30cd1805b
--- /dev/null
+++ b/onnxruntime/core/mickey/README.md
@@ -0,0 +1,6 @@
+# About Mickey
+
+Playful name for a template library of high performance cuda code that
+are often shared by various AI operators. The intention is to make this
+header files only, with no binary impact unless it is instantiated
+where it is needed.
diff --git a/onnxruntime/core/mickey/blk_q4/prepack_sm80.h b/onnxruntime/core/mickey/blk_q4/prepack_sm80.h
new file mode 100644
index 0000000000000..e291ab39e8aa3
--- /dev/null
+++ b/onnxruntime/core/mickey/blk_q4/prepack_sm80.h
@@ -0,0 +1,325 @@
+/**
+ * Copyright (c) Microsoft Corporation. All rights reserved.
+ * Licensed under the MIT License.
+ *
+ * Module Name:
+ *    prepack_sm80.h
+ *
+ * Abstract:
+ *    Prepack weights and quantization parameters (scales and offsets) for
+ *    GEMM, where activations are fp16 or bf16, and weights are block-wise
+ *    4b quantized values, specifically for Ampere GPUs.
+ *
+ *    Prepacking enables faster loading of weights and quantization parameters
+ *    into tensor cores, and faster dequantization of weights.
+ *
+ *    Only supports fp16 for now, bfloat16 support will be added later.
+ */
+
+#pragma once
+
+#include "core/common/common.h"
+#include "core/util/matrix_layout.h"
+
+namespace onnxruntime {
+namespace cuda {
+
+/**
+ * @brief Blockwise quantization methods
+ * @tparam ElementT       source data type, fp16
+ * @tparam block_size     number of elemenets quantized together
+ * @tparam qbits          number of bits in each quantized element
+ * @tparam Columnwise     true:  elements in a block come from one single column
+ *                        false: elements in a block come from one single row
+ */
+template <
+    typename ElementT,
+    int block_size,
+    int qbits,
+    bool Columnwise,
+    bool ExtraBoundsCheck = false>
+struct BlockwiseQuantization {
+  static_assert(qbits == 4, "Only 4b block quantization is supported!");
+  static_assert(sizeof(ElementT) == 2, "Only 16b floating point types are supported!");
+
+  using QuantBlocking =
+      std::conditional_t<Columnwise,
+                         MatrixShape<block_size, 1>,
+                         MatrixShape<1, block_size>>;
+
+  using ElementW = uint8_t;  // <- Weight is int4, uint8 for two of them
+  // We pack 4 weights into one 16b element, so we can leverage cutlass tile iterators
+  // for async share memory loading, and minimizing bank conflict during matrix loading
+  using ElementWPack = ElementT;
+  using LayoutWPack = ColumnMajorLayout;  // <- layout of packed weight, must be column major
+
+  // Current Ampere kernel use 8b zero point, need to shrink it to 4b in the future
+  using ElementQOffset = uint8_t;
+
+  // Layout of the quantization parameters (scales and zero points)
+  // Major on the dimension that has the most parameters per squarish weight block.
+  // E.g. for column-wise quantization, a [64, 64] block has [2, 64] parameters,
+  // where each row has more data, so we use row major layout so that warp threads
+  // can use less load instructions to load more parameters.
+  using LayoutQmeta =
+      typename std::conditional<Columnwise,
+                                RowMajorLayout, ColumnMajorLayout>::type;
+
+  /**
+   * @brief  Get quantized weight tensor dimensions.
+   * Actual weight type is int4, we use ElementW = uint8 to avoid possible compilation
+   * troubles. Since the layout is column major, we are packing 2 weights in a column
+   * into one int8
+   */
+  static inline auto get_quant_weights_shape(int rows, int columns) {
+    return make_Position(rows / 2, columns);
+  }
+
+  static inline auto get_quant_meta_shape(int rows, int columns) {
+    return make_Position(rows / QuantBlocking::kRow, columns / QuantBlocking::kColumn);
+  }
+
+  /**
+   * @brief Prepack weight matrix to facilitate matrix loading, depending on MMA
+   * instruction layout.
+   *
+   * The weight matrix is int4, yet we want to leverage existing fp16/bf16
+   * tile loading and MMA layout code in CUTLASS. So we group 4 int4 into 2
+   * bytes, pretending it's fp16. This grouping must be done in a way to be
+   * easily unpacked into tiles that match the MMA instruction layout.
+   * For MMA instruction <16, 8, 16>, each instruction processes 2 8x8 tiles,
+   * vertically stacked on the K dimension. And MmaTensorOpMultiplicandTileIterator
+   * loads a <InstructionShape::kK, WarpShape::kN> tile.
+   *
+   * So we stack 2x2 tiles on a 3rd dimeansion, and reshape them in a HWC fashion:
+   * T0, T2
+   * T1, T3
+   * ==>
+   * T0[0, 0], T1[0, 0], T2[0, 0], T3[0, 0]
+   * T0[1, 0], T1[1, 0], T2[1, 0], T3[1, 0]
+   * T0[2, 0], T1[2, 0], T2[2, 0], T3[2, 0]
+   * T0[3, 0], T1[3, 0], T2[3, 0], T3[3, 0]
+   * ...
+   * T0[0, 7], T1[0, 7], T2[0, 7], T3[0, 7]
+   * T0[1, 7], T1[1, 7], T2[1, 7], T3[1, 7]
+   * T0[2, 7], T1[2, 7], T2[2, 7], T3[2, 7]
+   * T0[3, 7], T1[3, 7], T2[3, 7], T3[3, 7]
+   *
+   * This pack a 8x16 int8 tile into a 16x8 int8 tile, i.e. a 8x8 16b tile
+   */
+  static void prepack_weights(
+      int rows,
+      int columns,
+      const gsl::span<uint8_t const>& weights,     // <- int4 weights, column major
+      const gsl::span<uint8_t>& weights_prepacked  // <- int4 prepacked weights tensor, same size buffer
+  ) {
+    ORT_ENFORCE((rows % 16) == 0 && (columns % 16) == 0 &&
+                    (rows % QuantBlocking::kRow) == 0 &&
+                    (columns % QuantBlocking::kColumn) == 0,
+                "Does not support odd number of rows or columns!");
+    ORT_ENFORCE(weights.size() == size_t(rows * columns / 2),
+                "Weight tensor shape mismatch!");
+    ORT_ENFORCE(weights_prepacked.size() == weights.size(),
+                "Prepacked Weight tensor buffer should be the same size!");
+
+    const MatrixRef<uint8_t const, ColumnMajorLayout, ExtraBoundsCheck>
+        tensor_weight(weights, make_Position(rows / 2, columns));
+    const MatrixRef<uint8_t, LayoutWPack, ExtraBoundsCheck>
+        tensor_weight_prepacked(weights_prepacked, make_Position(rows, columns / 2));
+
+    // TODO(fuchen)!! parallized this.
+    auto t0_base = make_Position(0, 0);
+    auto t1_base = make_Position(4, 0);
+    auto t2_base = make_Position(0, 8);
+    auto t3_base = make_Position(4, 8);
+    for (int col_dtile = 0; col_dtile < columns / 16; ++col_dtile) {
+      for (int row_dtile = 0; row_dtile < rows / 16; ++row_dtile) {
+        // Packing from a 8x16 tile to a 16x8 tile
+        auto dtile_base = make_Position(row_dtile * 8, col_dtile * 16);
+        auto packed_tile_base = make_Position(row_dtile * 16, col_dtile * 8);
+        for (int col = 0; col < 8; ++col) {
+          for (int row = 0; row < 4; ++row) {
+            auto cord = make_Position(row, col);
+            auto packed_cord = packed_tile_base + make_Position(row * 4, col);  // packed tile is 16x8
+            uint8_t buf[4];
+            buf[0] = tensor_weight.at(dtile_base + t0_base + cord);
+            buf[1] = tensor_weight.at(dtile_base + t1_base + cord);
+            buf[2] = tensor_weight.at(dtile_base + t2_base + cord);
+            buf[3] = tensor_weight.at(dtile_base + t3_base + cord);
+
+            // [0, 1, 2, 3, 4, 5, 6, 7] => [0, 2, 4, 6, 1, 3, 5, 7] so that each pair of adjacent weights
+            // are in different b16 register at the same positions. This makes it easier to convert to
+            // fp16x2 format in a b32 register
+
+            tensor_weight_prepacked.at(packed_cord) = (buf[0] & 0x0f) | ((buf[1] & 0x0f) << 4);
+            tensor_weight_prepacked.at(packed_cord + make_Position(1, 0)) = (buf[2] & 0x0f) | ((buf[3] & 0x0f) << 4);
+            tensor_weight_prepacked.at(packed_cord + make_Position(2, 0)) = ((buf[0] & 0xf0) >> 4) | (buf[1] & 0xf0);
+            tensor_weight_prepacked.at(packed_cord + make_Position(3, 0)) = ((buf[2] & 0xf0) >> 4) | (buf[3] & 0xf0);
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * @brief We rearrange the values of the quantization scale and offset tensors
+   * to facilitate faster loading to tensor core, only 16b gemm, and (1,n)
+   * block quantization.
+   */
+  static constexpr bool ShouldRearrangeMeta = sizeof(ElementT) == 2 && QuantBlocking::kRow == 1;
+
+  static void prepack_quant_scales(
+      size_t rows,
+      size_t columns,
+      const gsl::span<ElementT const>& scales,     // <- quant scales, column major layout
+      const gsl::span<ElementT>& scales_prepacked  // <- quant scales prepacked, same size buffer
+  ) {
+    auto meta_shape = get_quant_meta_shape(rows, columns);
+    ORT_ENFORCE(scales.size() == size_t(meta_shape.product()),
+                "Quantization scale tensor shape mismatch!");
+    ORT_ENFORCE(scales_prepacked.size() == size_t(meta_shape.product()),
+                "Prepacked quantization scale tensor buffer should be the same size!");
+
+    MatrixRef<ElementT const, ColumnMajorLayout, ExtraBoundsCheck> tensor_scale(scales, meta_shape);
+    MatrixRef<ElementT, LayoutQmeta, ExtraBoundsCheck> tensor_scale_prepacked(scales_prepacked, meta_shape);
+
+    // Only prepacking scale and offset tensors for a often used special case:
+    //    16b gemm (2 elements per 32b register, operand tile shape 8x8)
+    //    2 B operand tiles per mma instruction stacked on k dimension
+    //    (1,n) quantization blocking
+    if constexpr (sizeof(ElementT) == 2 && QuantBlocking::kRow == 1) {
+      // In Ampere tensor op, each operand B tile is 8 x 8, in a warp of 32 threads, each thread
+      // holds a fragment of the tile containing 2 elements in the k dimension. Most often we use
+      // mma instruction shape of 16x8x16, which means 2 B tiles are stacked in the k dimension,
+      // as shown below (T stands for thread):
+      // T0, T4, T8, T12
+      // T1, T5, T9, T13
+      // T2, T6, T10, T14
+      // T3, T7, T11, T15
+      // T0, T4, T8, T12
+      // T1, T5, T9, T13
+      // T2, T6, T10, T14
+      // T3, T7, T11, T15
+      //
+      // We need to deliver quantization scale and offset elements to the corresponding threads,
+      // so we can perform dequantization efficiently. With a column major layout, each thread
+      // needs two separate loads for a mma instruction, due to the tile fragment layout shown
+      // above. To reduce the number of loads, we rearrange each column as below, so we can use
+      // a single load to load fragments for two tiles:
+      // T0        T0
+      // T1        T0
+      // T2        T1
+      // T3   =>   T1
+      // T0        T2
+      // T1        T2
+      // T2        T3
+      // T3        T3
+
+      for (int col = 0; col < tensor_scale.shape()[1]; ++col) {
+        for (int row_blk = 0; row_blk < tensor_scale.shape()[0]; row_blk += 16) {
+          for (int thread_id = 0; thread_id < 4; thread_id++) {
+            const int dst_idx = row_blk + thread_id * 4;
+            const int src_idx = row_blk + thread_id * 2;
+            tensor_scale_prepacked.at(dst_idx + 0, col) = tensor_scale.at(src_idx + 0, col);
+            tensor_scale_prepacked.at(dst_idx + 1, col) = tensor_scale.at(src_idx + 1, col);
+            tensor_scale_prepacked.at(dst_idx + 2, col) = tensor_scale.at(src_idx + 8, col);
+            tensor_scale_prepacked.at(dst_idx + 3, col) = tensor_scale.at(src_idx + 9, col);
+          }
+        }
+      }
+    } else {
+      // In all other cases, we don't prepack scale or offset
+      // Potential transpose if the prepacked layout is different from the original layout
+      for (int col = 0; col < tensor_scale.shape()[1]; ++col) {
+        for (int row = 0; row < tensor_scale.shape()[0]; ++row) {
+          tensor_scale_prepacked.at(row, col) = tensor_scale.at(row, col);
+        }
+      }
+    }
+  }
+
+  static void prepack_quant_offsets(
+      size_t rows,
+      size_t columns,
+      const gsl::span<uint8_t const>& offsets,     // <- quant offsets, int4, column major layout
+      const gsl::span<uint8_t>& offsets_prepacked  // <- quant offsets prepacked, double size buffer
+  ) {
+    auto meta_shape = get_quant_meta_shape(rows, columns);
+
+    ORT_ENFORCE((rows % 16) == 0 && (columns % 16) == 0,
+                "Does not support odd number of rows or columns!");
+    ORT_ENFORCE(offsets_prepacked.size() == size_t(meta_shape.product()),
+                "Wrong buffer size for prepacked quantization offsets!");
+    ORT_ENFORCE(offsets.size() == size_t(((meta_shape[0] + 1) / 2) * meta_shape[1]),
+                "Quantization offset tensor shape mismatch!");
+
+    MatrixRef<uint8_t const, ColumnMajorLayout, ExtraBoundsCheck>
+        tensor_offset(offsets, make_Position((meta_shape[0] + 1) / 2, meta_shape[1]));
+    MatrixRef<uint8_t, LayoutQmeta, ExtraBoundsCheck> tensor_offset_prepacked(offsets_prepacked, meta_shape);
+
+    // Only prepacking scale and offset tensors for a often used special case:
+    //    16b gemm (2 elements per 32b register, operand tile shape 8x8)
+    //    2 B operand tiles per mma instruction stacked on k dimension
+    //    (1,n) quantization blocking
+    if constexpr (sizeof(ElementT) == 2 && QuantBlocking::kRow == 1) {
+      // In Ampere tensor op, each operand B tile is 8 x 8, in a warp of 32 threads, each thread
+      // holds a fragment of the tile containing 2 elements in the k dimension. Most often we use
+      // mma instruction shape of 16x8x16, which means 2 B tiles are stacked in the k dimension,
+      // as shown below (T stands for thread):
+      // T0, T4, T8, T12
+      // T1, T5, T9, T13
+      // T2, T6, T10, T14
+      // T3, T7, T11, T15
+      // T0, T4, T8, T12
+      // T1, T5, T9, T13
+      // T2, T6, T10, T14
+      // T3, T7, T11, T15
+      //
+      // We need to deliver quantization scale and offset elements to the corresponding threads,
+      // so we can perform dequantization efficiently. With a column major layout, each thread
+      // needs two separate loads for a mma instruction, due to the tile fragment layout shown
+      // above. To reduce the number of loads, we rearrange each column as below, so we can use
+      // a single load to load fragments for two tiles:
+      // T0        T0
+      // T1        T0
+      // T2        T1
+      // T3   =>   T1
+      // T0        T2
+      // T1        T2
+      // T2        T3
+      // T3        T3
+      for (int col = 0; col < meta_shape[1]; ++col) {
+        for (int row_blk = 0; row_blk < meta_shape[0]; row_blk += 16) {
+          for (int thread_id = 0; thread_id < 4; thread_id++) {
+            const int dst_idx = row_blk + thread_id * 4;
+            const int src_idx = row_blk + thread_id * 2;
+            // [a, b, c, d] => [a, c, b, d] so that adjacent weights are in their own
+            // 16b element: [a, x, b, x] and [x, c, x, d], which makes it easier to
+            // convert to fp16x2 format in a b32 register
+            uint8_t pair01 = tensor_offset.at(src_idx / 2, col);
+            uint8_t pair89 = tensor_offset.at((src_idx + 8) / 2, col);
+            tensor_offset_prepacked.at(dst_idx + 0, col) = pair01 & 0xf;
+            tensor_offset_prepacked.at(dst_idx + 1, col) = pair89 & 0xf;
+            tensor_offset_prepacked.at(dst_idx + 2, col) = pair01 >> 4;
+            tensor_offset_prepacked.at(dst_idx + 3, col) = pair89 >> 4;
+          }
+        }
+      }
+    } else {
+      // In all other cases, we don't prepack scale or offset
+      // Potential transpose if the prepacked layout is different from the original layout
+      for (int col = 0; col < meta_shape[1]; ++col) {
+        for (int row = 0; row < meta_shape[0]; row += 2) {
+          uint8_t pair01 = tensor_offset.at(row / 2, col);
+          tensor_offset_prepacked.at(row + 0, col) = pair01 & 0xf;
+          if (row + 1 < meta_shape[0]) {
+            tensor_offset_prepacked.at(row + 1, col) = pair01 >> 4;
+          }
+        }
+      }
+    }
+  }
+};
+
+}  // namespace cuda
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/mlas/.clang-format b/onnxruntime/core/mlas/.clang-format
index 4a89ef98cf049..16ad8bd8a7234 100644
--- a/onnxruntime/core/mlas/.clang-format
+++ b/onnxruntime/core/mlas/.clang-format
@@ -2,10 +2,12 @@
 
 BasedOnStyle: Google
 IndentWidth: 4
-ColumnLimit: 100
+# Setting ColumnLimit to 0 so developer choices about where to break lines are maintained.
+# Developers are responsible for adhering to the 120 character maximum.
+ColumnLimit: 0
+AlignAfterOpenBracket: BlockIndent
 AlwaysBreakAfterReturnType: TopLevel
 AlwaysBreakTemplateDeclarations: Yes
 BinPackParameters: false
 BreakBeforeBraces: Linux
 ...
-
diff --git a/onnxruntime/core/mlas/inc/mlas_gemm_postprocessor.h b/onnxruntime/core/mlas/inc/mlas_gemm_postprocessor.h
new file mode 100644
index 0000000000000..7ea29eb091318
--- /dev/null
+++ b/onnxruntime/core/mlas/inc/mlas_gemm_postprocessor.h
@@ -0,0 +1,33 @@
+/*++
+
+Copyright (c) Microsoft Corporation. All rights reserved.
+
+Licensed under the MIT License.
+
+Module Name:
+
+    mlas_gemm_postprocessor.h
+
+Abstract:
+
+    This module contains a base class for custom postprocessing following a
+    GEMM.
+
+--*/
+
+#pragma once
+
+template<typename T>
+class MLAS_GEMM_POSTPROCESSOR
+{
+   public:
+    virtual void Process(T* C,                  /**< the address of matrix to process */
+                         size_t RangeStartM,    /**< the start row index of matrix */
+                         size_t RangeStartN,    /**< the start col index of matrix */
+                         size_t RangeCountM,    /**< the element count per row to process */
+                         size_t RangeCountN,    /**< the element count per col to process */
+                         size_t ldc             /**< the leading dimension of matrix */
+    ) const = 0;
+
+    virtual ~MLAS_GEMM_POSTPROCESSOR() {}
+};
diff --git a/onnxruntime/core/mlas/inc/mlas_q4.h b/onnxruntime/core/mlas/inc/mlas_q4.h
index 65b48a3009e72..316344ad8c214 100644
--- a/onnxruntime/core/mlas/inc/mlas_q4.h
+++ b/onnxruntime/core/mlas/inc/mlas_q4.h
@@ -21,6 +21,7 @@ Module Name:
 #pragma once
 
 #include "mlas.h"
+#include "mlas_gemm_postprocessor.h"
 
 #include <math.h>
 #include <algorithm>
@@ -39,7 +40,7 @@ typedef enum {
  * @brief Computes the number of bytes required to pack and int4-quantize
  *        a weight matrix
  * @param QType  type of block quantization
- * @param N      the number of columns of matrix B. 
+ * @param N      the number of columns of matrix B.
  * @param K      the number of rows of matrix B.
  * @return size of the packing buffer, 0 if the operation is not yet supported.
 */
@@ -53,11 +54,11 @@ MlasQ4GemmPackBSize(
 
 /**
  * @brief Prepack and Quantize fp32 weight tensor to int4 blocks
- * 
+ *
  * @param QType      type of block quantization
  * @param PackedBuf  destination buffer
  * @param FpData     the pointer to fp32 matrix
- * @param N          the number of columns of matrix B. 
+ * @param N          the number of columns of matrix B.
  * @param K          the number of rows of matrix B.
  * @param ldb        leading dimension of B
 */
@@ -95,22 +96,6 @@ MlasQ4GemmUnPackB(
     );
 
 
-template<typename T>
-class MLAS_GEMM_POSTPROCESSOR
-{
-   public:
-    virtual void Process(T*,         /**< the address of matrix to process */
-                         size_t,     /**< the start row index of matrix */
-                         size_t,     /**< the start col index of matrix */
-                         size_t,     /**< the element count per row to process */
-                         size_t,     /**< the element count per col to process */
-                         size_t      /**< the leading dimension of matrix */
-    ) const = 0;
-
-    virtual ~MLAS_GEMM_POSTPROCESSOR() {}
-};
-
-
 /**
  * @brief Data parameters for Q4 GEMM routine
  *        C = A * B + Bias
@@ -229,3 +214,147 @@ MlasQ8Q4GemmBatch(
     const MLAS_Q8Q4_GEMM_DATA_PARAMS* DataParams,
     MLAS_THREADPOOL* ThreadPool
     );
+
+
+////////////////////////////////////////////////////////////
+// Blockwise quantization and dequantization where quantization
+// parameters are packed into separate buffers.
+//
+
+/**
+ * @brief For quantization type <T, block_size, columnwise>, and
+ *        matrix shape [rows, columns], compute the shape of the
+ *        quantization parameter matrix [meta_rows, meta_cols]
+*/
+template <typename T, int qbits>
+void
+MlasBlockwiseQuantMetaShape(
+    int block_size,
+    bool columnwise,
+    int rows,
+    int columns,
+    int& meta_rows,
+    int& meta_cols
+    );
+
+/**
+ * @brief For quantization type <T, block_size, columnwise>, and
+ * matrix shape [rows, columns], compute the shape of the
+ * quantized matrix [q_rows, q_cols]. The quantized matrix
+ * is in column major layout, with bits packed on the column.
+ *
+ * @tparam T
+ * @tparam qbits
+ * @param block_size
+ * @param columnwise
+ * @param rows
+ * @param columns
+ * @param q_rows
+ * @param q_cols
+*/
+template <typename T, int qbits>
+void
+MlasBlockwiseQuantizedShape(
+    int block_size,
+    bool columnwise,
+    int rows,
+    int columns,
+    int& q_rows,
+    int& q_cols
+    );
+
+/**
+ * @brief Compute the sizes of the quantized data and quantization parameter buffers.
+ *
+ * @param qbits                             The bit width of each quantized value.
+ * @param block_size                        The number of quantized values in a block.
+ * @param columnwise                        Whether a block contains values from a matrix column (true) or row (false).
+ * @param rows                              Number of matrix rows.
+ * @param columns                           Number of matrix columns.
+ * @param[out] q_data_size_in_bytes         The size in bytes of the quantized data.
+ * @param[out] q_scale_num_elements         The size in elements of the scale quantization parameters.
+ * @param[out] q_zero_point_size_in_bytes   The size in bytes of the zero point quantization parameters. Optional.
+ *
+ * If the qbits or block_size values are unsupported the output sizes will be zero.
+ */
+void MLASCALL
+MlasBlockwiseQuantizedBufferSizes(
+    int qbits,
+    int block_size,
+    bool columnwise,
+    int rows,
+    int columns,
+    size_t& q_data_size_in_bytes,
+    size_t& q_scale_num_elements,
+    size_t* q_zero_point_size_in_bytes
+);
+
+
+/**
+ * @brief Blockwise 4 bits quantization, resulting elements and quantization
+ *        parameters (scales, zero points) are packed into separate matrices
+ *        all in column major layout for faster access during subsequent matrix
+ *        multiplication.
+ *
+ * @tparam ElementT             type of the input matrix element, usually floating point
+ * @tparam qbits                number of bits used for quantization, 4 for int4
+ *
+ * @param dst                   points to the quantized matrix, shape [rows, columns] column major
+ * @param scales                points to the scales matrix, column major
+ * @param zero_points           points to the zero_points matrix, column major
+ * @param src                   points to the floating point matrix, to be quantized, row major shape [rows, columns]
+ * @param block_size            size of the block to quantize, elements from the same block share the same scale and zero point
+ * @param columnwise            true when elements in a block are from the same column, false when elements in a block are from the same row
+ * @param rows
+ * @param columns
+ * @param leading_dimension
+ * @param thread_pool
+*/
+template <typename ElementT, int qbits>
+void
+MlasQuantizeBlockwise(
+    uint8_t* dst,
+    ElementT* scales,
+    uint8_t* zero_points,
+    const ElementT* src,
+    int block_size,
+    bool columnwise,
+    int rows,
+    int columns,
+    int leading_dimension,
+    MLAS_THREADPOOL* thread_pool
+    );
+
+
+/**
+ * @brief Blockwise 4 bits dequantization, quantized elements and quantization
+ *        parameters (scales, zero points) are from separate matrices packed
+ *        in column major layout.  Output is a floating point matrix in column
+ *        major layout for faster access during subsequent matrix multiplication.
+ *
+ * @tparam ElementT     type of the dequantized matrix element, usually floating point
+ * @tparam qbits        number of bits used for quantization, 4 for int4
+ *
+ * @param dst           points to dequantized matrix shape [rows, columns] column major
+ * @param src           points to quantized matrix, column major
+ * @param scales        points to quantization scales, column major
+ * @param zero_points   points to quantization zero points, column major
+ * @param block_size    size of the block to quantize, elements from the same block share the same scale and zero point
+ * @param columnwise    true when elements in a block are from the same column, false when elements in a block are from the same row
+ * @param rows
+ * @param columns
+ * @param thread_pool
+*/
+template <typename ElementT, int qbits>
+void
+MlasDequantizeBlockwise(
+    ElementT* dst,
+    const uint8_t* src,
+    const ElementT* scales,
+    const uint8_t* zero_points,
+    int block_size,
+    bool columnwise,
+    int rows,
+    int columns,
+    MLAS_THREADPOOL* thread_pool
+    );
diff --git a/onnxruntime/core/mlas/inc/mlas_qnbit.h b/onnxruntime/core/mlas/inc/mlas_qnbit.h
new file mode 100644
index 0000000000000..9620dd42d1da9
--- /dev/null
+++ b/onnxruntime/core/mlas/inc/mlas_qnbit.h
@@ -0,0 +1,79 @@
+/*++
+
+Copyright (c) Microsoft Corporation. All rights reserved.
+
+Licensed under the MIT License.
+
+Module Name:
+
+    mlas_qnbit.h
+
+Abstract:
+
+    This module contains the public data structures and procedure prototypes
+    for blocked n-bit quantized GEMM.
+
+    N-bit block quantization is used to compress weight tensors of large
+    language models.
+
+--*/
+
+#pragma once
+
+#include "mlas.h"
+#include "mlas_gemm_postprocessor.h"
+
+/**
+ * @brief Data parameters for float/n-bit quantized int GEMM routine.
+ */
+struct MLAS_SQNBIT_GEMM_DATA_PARAMS {
+    const float* A = nullptr;                ///< address of A (float32 matrix)
+    size_t lda = 0;                          ///< leading dimension of A
+    const void* QuantBData = nullptr;        ///< address of quantized B (quantized n-bit int values)
+    const float* QuantBScale = nullptr;      ///< address of scale values of quantized B, one per block
+    const void* QuantBZeroPoint = nullptr;   ///< optional address of zero point values of quantized B, one per block
+    bool IsBPacked = false;                  ///< whether B values are packed in an optimized format for the computation
+    const float* Bias = nullptr;             ///< optional address of Bias, vector size N
+    float* C = nullptr;                      ///< address of result matrix
+    size_t ldc = 0;                          ///< leading dimension of C
+
+    ///< optional post processing to apply to result matrix
+    MLAS_GEMM_POSTPROCESSOR<float>* PostProcessor = nullptr;
+};
+
+/**
+ * @brief Batched GEMM:  C = A * B + Bias
+ *        A must be a float32 matrix
+ *        B must be a quantized and packed n-bit int matrix
+ *
+ * @param[in]       M               row size of matrix A and C
+ * @param[in]       N               column size of matrix B and C
+ * @param[in]       K               column size of matrix A and row size of matrix B
+ * @param[in]       BatchN          number of batches
+ * @param[in]       BlkBitWidth     quantized value bit width (e.g., 4 means 4 bit ints)
+ * @param[in]       BlkLen          number of quantized values per block
+ * @param[inout]    DataParams      An array (size BatchN) of parameter blocks
+ * @param[in]       ThreadPool      optional thread pool to use
+ */
+void MLASCALL
+MlasSQNBitGemmBatch(
+    size_t M,
+    size_t N,
+    size_t K,
+    size_t BatchN,
+    size_t BlkBitWidth,
+    size_t BlkLen,
+    const MLAS_SQNBIT_GEMM_DATA_PARAMS* DataParams,
+    MLAS_THREADPOOL* ThreadPool = nullptr
+);
+
+/**
+ * @brief Determines whether a float32/quantized n-bit int GEMM implementation is available on the current platform.
+ * @param[in]   BlkBitWidth     quantized value bit width (e.g., 4 means 4 bit ints)
+ * @param[in]   BlkLen          number of quantized values per block
+ */
+bool MLASCALL
+MlasIsSQNBitGemmAvailable(
+    size_t BlkBitWidth,
+    size_t BlkLen
+);
diff --git a/onnxruntime/core/mlas/lib/aarch64/QgemmS8S8KernelSmmla.S b/onnxruntime/core/mlas/lib/aarch64/QgemmS8S8KernelSmmla.S
new file mode 100644
index 0000000000000..e18846c89030e
--- /dev/null
+++ b/onnxruntime/core/mlas/lib/aarch64/QgemmS8S8KernelSmmla.S
@@ -0,0 +1,922 @@
+/*++
+
+Copyright (c) Microsoft Corporation. All rights reserved.
+Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+
+Licensed under the MIT License.
+
+Module Name:
+
+    QgemmS8S8KernelSmmla.s
+
+Abstract:
+
+    This module implements the kernels for the Int8 precision matrix/matrix
+    multiply operation (QGEMM).
+
+--*/
+
+#include "asmmacro.h"
+
+        .text
+
+//
+// Stack frame layout for the smmla kernel. d8-d15, x19-x30 need save
+//
+        .equ  .LMlasQgemmKernel_backup_x19_x20,    0
+        .equ  .LMlasQgemmKernel_backup_x21_x22,    16
+        .equ  .LMlasQgemmKernel_backup_x23_x24,    32
+        .equ  .LMlasQgemmKernel_backup_x25_x26,    48
+        .equ  .LMlasQgemmKernel_backup_x27_x28,    64
+        .equ  .LMlasQgemmKernel_backup_d8_d9,      80
+        .equ  .LMlasQgemmKernel_backup_d10_d11,    96
+        .equ  .LMlasQgemmKernel_backup_d12_d13,    112
+        .equ  .LMlasQgemmKernel_backup_d14_d15,    128
+        .equ  .LMlasQgemmKernel_SavedRegisters,    144
+        .equ  .LMlasQgemmKernel_SavedRegisters_Neg, -144
+
+
+//
+// Init Row Accumulators
+//
+// Generates the code to initialize the accumulators for a single row of the output
+// block.
+//
+//
+//  Accumulators are initialized to ZeroPointB * RowSum + ColumnSum
+//  x7 for RowSumsBuffer pointer
+//  x10 for ColumnSumBuffer pointer
+//  x11 for ZeroPointB buffer pointer
+//
+//  v12~v13 for RowSums values
+//  v14~v15 for ColumnSums values
+//  v0~v3 for ZeroPointB values
+//
+        .macro  InitRowAccumulators Columns, Vec1Reg, Vec2Reg, Vec3Reg, Vec4Reg, RowSumReg
+
+        mul     v7.4s, v\RowSumReg\().4s, v8.4s
+        mov     v\Vec1Reg\().16b, v7.16b
+        add     v\Vec1Reg\().4s, v\Vec1Reg\().4s, v0.4s
+.if \Columns\() > 2
+        mul     v7.4s, v\RowSumReg\().4s, v9.4s
+        mov     v\Vec2Reg\().16b, v7.16b
+        add     v\Vec2Reg\().4s, v\Vec2Reg\().4s, v1.4s
+.endif
+.if \Columns\() > 4
+        mul     v7.4s, v\RowSumReg\().4s, v10.4s
+        mov     v\Vec3Reg\().16b, v7.16b
+        add     v\Vec3Reg\().4s, v\Vec3Reg\().4s, v2.4s
+.endif
+.if \Columns\() > 6
+        mul     v7.4s, v\RowSumReg\().4s, v11.4s
+        mov     v\Vec4Reg\().16b, v7.16b
+        add     v\Vec4Reg\().4s, v\Vec4Reg\().4s, v3.4s
+.endif
+
+        .endm
+
+
+//
+// InitBlockAccumulators
+//
+// Generates the code to initialize the accumulators for 8x8 output
+// block.
+//
+        .macro  InitBlockAccumulators Mode, Columns, Rows
+
+        ld1     {v14.4s},[x10],#16            // load ColumnSumBuffer[0]
+.if \Columns\() > 4
+        ld1     {v15.4s},[x10],#16            // load ColumnSumBuffer[4]
+.endif
+        // v4~v7 will be set to matrixB after this, so, they can used now
+        dup     v4.4s,v14.s[0]              // broadcast column
+        dup     v5.4s,v14.s[1]
+        dup     v6.4s,v14.s[2]
+        dup     v7.4s,v14.s[3]
+
+        zip1    v0.4s, v4.4s, v5.4s
+        zip2    v1.4s, v6.4s, v7.4s
+.if \Columns\() > 4
+        dup     v4.4s,v15.s[0]              // broadcast column
+        dup     v5.4s,v15.s[1]
+        dup     v6.4s,v15.s[2]
+        dup     v7.4s,v15.s[3]
+
+        zip1    v2.4s, v4.4s, v5.4s
+        zip2    v3.4s, v6.4s, v7.4s
+.endif
+
+        // v8~v11 will anyway get set in MatrixA loading, so they are free to use now
+        movi    v8.4s, #1
+        movi    v9.4s, #1
+        movi    v10.4s, #1
+        movi    v11.4s, #1
+
+        cbz     x11,.L\Mode\().InitBlock\Columns\().x\Rows\().SkipScaleByZeroPointB
+
+        ld1     {v4.4s},[x11],#16           // load ZeroPointB[0]
+        ld1     {v5.4s},[x11],#16           // load ZeroPointB[4]
+
+        dup     v6.4s, v4.s[0]
+        dup     v7.4s, v4.s[1]
+        zip1    v8.4s, v6.4s, v7.4s
+
+        dup     v6.4s, v4.s[2]
+        dup     v7.4s, v4.s[3]
+        zip1    v9.4s, v6.4s, v7.4s
+
+        dup     v6.4s, v5.s[0]
+        dup     v7.4s, v5.s[1]
+        zip1    v10.4s, v6.4s, v7.4s
+
+        dup     v6.4s, v5.s[2]
+        dup     v7.4s, v5.s[3]
+        zip1    v11.4s, v6.4s, v7.4s
+
+.L\Mode\().InitBlock\Columns\().x\Rows\().SkipScaleByZeroPointB:
+        dup     v4.4s, v12.s[0]           //boardcast RowSums
+        dup     v5.4s, v12.s[1]
+
+        uzp1    v6.2d, v4.2d, v5.2d
+
+        InitRowAccumulators \Columns\(),16,17,18,19,6
+.if \Rows\() > 2
+        dup     v4.4s, v12.s[2]           //boardcast RowSums
+        dup     v5.4s, v12.s[3]
+
+        uzp1    v6.2d, v4.2d, v5.2d
+
+        InitRowAccumulators \Columns\(),20,21,22,23,6
+.endif
+.if \Rows\() > 4
+        dup     v4.4s,v13.s[0]         // broadcast row sums
+        dup     v5.4s,v13.s[1]
+
+        uzp1    v6.2d, v4.2d, v5.2d
+
+        InitRowAccumulators \Columns\(),24,25,26,27,6
+.endif
+.if \Rows\() > 6
+        dup     v4.4s,v13.s[2]         // broadcast row sums
+        dup     v5.4s,v13.s[3]
+
+        uzp1    v6.2d, v4.2d, v5.2d
+        InitRowAccumulators \Columns\(),28,29,30,31,6
+.endif
+
+        .endm
+
+
+// LoadPackedMatrixABy16Elements
+//
+// Generates the code to load 16 elements from matrix A.
+//
+        .macro  LoadPackedMatrixABy16Elements Rows
+.if \Rows\() == 1
+        ldr     q8,[x0],#8
+.else
+        ldr     q8,[x0],#16
+
+.if \Rows\() > 2
+        ldr     q9,[x0],#16
+.endif
+
+.if \Rows\() > 4
+        ldr     q10,[x0],#16
+.endif
+
+.if \Rows\() > 6
+        ldr     q11,[x0],#16
+.endif
+.endif
+        .endm
+
+
+//
+// MultiplyAccumulateRow
+//
+// Generates the code to multiply and accumulate a single row of the output
+// block.
+//
+
+        .macro  MultiplyAccumulateRow Columns, MatrixAReg, Vec1Reg, Vec2Reg, Vec3Reg, Vec4Reg
+
+        smmla   v\Vec1Reg\().4s, \MatrixAReg\().16b, v4.16b
+.if \Columns\() > 2
+        smmla   v\Vec2Reg\().4s, \MatrixAReg\().16b, v5.16b
+.endif
+.if \Columns\() > 4
+        smmla   v\Vec3Reg\().4s, \MatrixAReg\().16b, v6.16b
+.endif
+.if \Columns\() > 6
+        smmla   v\Vec4Reg\().4s, \MatrixAReg\().16b, v7.16b
+.endif
+
+        .endm
+
+//
+// MultiplyAccumulateBlock
+//
+// Generates the code to multiply and accumulate into the output block.
+//
+
+        .macro  MultiplyAccumulateBlock Columns, Rows
+
+        MultiplyAccumulateRow \Columns\(),v8,16,17,18,19
+.if \Rows\() > 2
+        MultiplyAccumulateRow \Columns\(),v9,20,21,22,23
+.endif
+.if \Rows\() > 4
+        MultiplyAccumulateRow \Columns\(),v10,24,25,26,27
+.endif
+.if \Rows\() > 6
+        MultiplyAccumulateRow \Columns\(),v11,28,29,30,31
+.endif
+
+        .endm
+
+//
+// ComputeBlockLoop
+//
+// Generates the code to loop over K entries of the input matrices to produce
+// the output block.
+//
+
+        .macro  ComputeBlockLoop Mode, Columns, Rows
+
+        InitBlockAccumulators \Mode\(), \Columns\(),\Rows\()
+
+        sub     x9,x3,#1                   //  block count to process
+        tbnz    x9,#63,.L\Mode\().ProcessRemaining\Columns\().x\Rows\().Blocks
+
+.L\Mode\().Compute\Columns\().x\Rows\().BlockBy4Loop:
+
+        LoadPackedMatrixABy16Elements \Rows\()
+        ld1     {v4.16b - v7.16b}, [x1], #64
+        MultiplyAccumulateBlock \Columns\(),\Rows\()
+
+        sub     x9,x9,#1
+        tbz     x9,#63,.L\Mode\().Compute\Columns\().x\Rows\().BlockBy4Loop
+.L\Mode\().ProcessRemaining\Columns\().x\Rows\().Blocks:
+        add     x9,x9,#1                    // correct for over-subtract above
+        cbz     x9,.L\Mode\().Output\Columns\().x\Rows\().Block
+
+.L\Mode\().Compute\Columns\().x\Rows\().BlockBy4PaddedLoop:
+        LoadPackedMatrixABy16Elements \Rows\()
+        ld1     {v4.16b - v7.16b}, [x1], #64
+        MultiplyAccumulateBlock \Columns\(),\Rows\()
+
+.L\Mode\().Output\Columns\().x\Rows\().Block:
+
+        .endm
+
+
+//
+// OutputRow2Element
+// OutputRow4Element
+// OutputRow6Element
+// OutputRow8Element
+// OutputRow10Element
+// OutputRow12Element
+// OutputRow14Element
+// OutputRow16Element
+//
+// Generates the code to store elements to the output block.
+//
+
+        .macro  OutputRow2Element Mode, AddrReg1, AddrReg2, Vec1Reg, Vec2Reg, Vec3Reg, Vec4Reg, last_row
+
+.ifeqs "\Mode\()","Add"
+        ldr     s8,[\AddrReg1\()],#0
+.if \last_row\() == 0
+        ldr     s9,[\AddrReg2\()],#0
+.else
+        mov     x27,#0
+        mov     v9.D[0],x27
+        mov     v9.D[1],x27
+.endif
+        mov     v8.S[2], v9.S[0]
+        add     v8.4s,v8.4s,v\Vec1Reg\().4s
+
+        mov     w27, v8.S[0]
+        str     w27, [\AddrReg1\()],#4
+
+.if \last_row\() == 0
+        mov     w27, v8.S[2]
+        str     w27, [\AddrReg2\()],#4
+.endif
+
+.else
+        mov     w27, v\Vec1Reg\().S[0]
+        str     w27, [\AddrReg1\()],#4
+
+.if \last_row\() == 0
+        mov     w27, v\Vec1Reg\().S[2]
+        str     w27, [\AddrReg2\()],#4
+.endif
+
+.endif
+
+        .endm
+
+
+        .macro  OutputRow4Element Mode, AddrReg1, AddrReg2, Vec1Reg, Vec2Reg, Vec3Reg, Vec4Reg, last_row
+
+.ifeqs "\Mode\()","Add"
+        ldr     d8,[\AddrReg1\()],#0
+.if \last_row\() == 0
+        ldr     d9,[\AddrReg2\()],#0
+.else
+        mov     x27,#0
+        mov     v9.D[0],x27
+        mov     v9.D[1],x27
+.endif
+
+        mov     v8.D[1], v9.D[0]
+
+        add     v8.4s,v8.4s,v\Vec1Reg\().4s
+
+        mov     x27, v8.D[0]
+        mov     x28, v8.D[1]
+
+        str     x27, [\AddrReg1\()],#8
+.if \last_row\() == 0
+        str     x28, [\AddrReg2\()],#8
+.endif
+
+.else
+        mov     x27, v\Vec1Reg\().D[0]
+        mov     x28, v\Vec1Reg\().D[1]
+
+        str     x27, [\AddrReg1\()],#8
+.if \last_row\() == 0
+        str     x28, [\AddrReg2\()],#8
+.endif
+
+.endif
+
+        .endm
+
+
+        .macro  OutputRow6Element Mode, AddrReg1, AddrReg2, Vec1Reg, Vec2Reg, Vec3Reg, Vec4Reg, last_row
+
+.ifeqs "\Mode\()","Add"
+        ldr     d8,[\AddrReg1\()],#8
+        ldr     w28,[\AddrReg1\()],#-8
+        mov     v8.S[2], w28
+.if \last_row\() == 0
+        ldr     d9,[\AddrReg2\()],#8
+        ldr     w27,[\AddrReg2\()],#-8
+        mov     v9.S[2], w27
+.else
+        mov     x27,#0
+        mov     v9.D[0],x27
+        mov     v9.D[1],x27
+.endif
+        uzp1    v4.2d,v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp2    v5.2d,v\Vec1Reg\().2d,v\Vec2Reg\().2d
+
+        add     v8.4s,v8.4s,v4.4s
+        add     v9.4s,v9.4s,v5.4s
+
+        mov     x27, v8.D[0]
+        str     x27, [\AddrReg1\()],#8
+        mov     w27, v8.S[2]
+        str     w27, [\AddrReg1\()],#4
+
+.if \last_row\() == 0
+        mov     x27, v9.D[0]
+        str     x27, [\AddrReg2\()],#8
+        mov     w27, v9.S[2]
+        str     w27, [\AddrReg2\()],#4
+.endif
+
+.else
+        uzp1    v4.2d, v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp2    v5.2d, v\Vec1Reg\().2d,v\Vec2Reg\().2d
+
+        mov     x27, v4.D[0]
+        str     x27, [\AddrReg1\()],#8
+        mov     w27, v4.S[2]
+        str     w27, [\AddrReg1\()],#4
+
+.if \last_row\() == 0
+        mov     x27, v5.D[0]
+        str     x27, [\AddrReg2\()],#8
+        mov     w27, v5.S[2]
+        str     w27, [\AddrReg2\()],#4
+.endif
+
+.endif
+
+        .endm
+
+
+        .macro  OutputRow8Element Mode, AddrReg1, AddrReg2, Vec1Reg, Vec2Reg, Vec3Reg, Vec4Reg, last_row
+
+.ifeqs "\Mode\()","Add"
+        ldr     q8,[\AddrReg1\()],#0
+.if \last_row\() == 0
+        ldr     q9,[\AddrReg2\()],#0
+.else
+        mov     x27,#0
+        mov     v9.D[0],x27
+        mov     v9.D[1],x27
+.endif
+        uzp1    v4.2d,v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp2    v5.2d,v\Vec1Reg\().2d,v\Vec2Reg\().2d
+
+        add     v8.4s,v8.4s,v4.4s
+        add     v9.4s,v9.4s,v5.4s
+
+        str     q8,[\AddrReg1\()],#16
+.if \last_row\() == 0
+        str     q9,[\AddrReg2\()],#16
+.endif
+
+.else
+        uzp1    v4.2d, v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp2    v5.2d, v\Vec1Reg\().2d,v\Vec2Reg\().2d
+
+        str     q4,[\AddrReg1\()],#16
+.if \last_row\() == 0
+        str     q5,[\AddrReg2\()],#16
+.endif
+
+.endif
+
+        .endm
+
+
+        .macro  OutputRow10Element Mode, AddrReg1, AddrReg2, Vec1Reg, Vec2Reg, Vec3Reg, Vec4Reg, last_row
+
+.ifeqs "\Mode\()","Add"
+        ldr     q8,[\AddrReg1\()],#16
+        ldr     w28, [\AddrReg1\()],#-16
+
+.if \last_row\() == 0
+        ldr     q9,[\AddrReg2\()],#16
+        ldr     w27,[\AddrReg2\()],#-16
+.else
+        mov     x27,#0
+        mov     v9.D[0],x27
+        mov     v9.D[1],x27
+.endif
+        uzp1    v4.2d,v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp2    v5.2d,v\Vec1Reg\().2d,v\Vec2Reg\().2d
+
+        add     v8.4s,v8.4s,v4.4s
+        add     v9.4s,v9.4s,v5.4s
+
+        str     q8,[\AddrReg1\()],#16
+.if \last_row\() == 0
+        str     q9,[\AddrReg2\()],#16
+.endif
+        mov     v8.S[0], w28
+        mov     v8.S[2], w27
+
+        add     v8.4s,v8.4s,v\Vec3Reg\().4s
+
+        mov     w27, v8.S[0]
+        mov     w28, v8.S[2]
+
+        str     w27, [\AddrReg1\()],#4
+.if \last_row\() == 0
+        str     w28, [\AddrReg2\()],#4
+.endif
+
+.else
+        uzp1    v4.2d, v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp2    v5.2d, v\Vec1Reg\().2d,v\Vec2Reg\().2d
+
+        str     q4,[\AddrReg1\()],#16
+.if \last_row\() == 0
+        str     q5,[\AddrReg2\()],#16
+.endif
+        mov     w27, v\Vec3Reg\().S[0]
+        mov     w28, v\Vec3Reg\().S[2]
+
+        str     w27, [\AddrReg1\()],#4
+.if \last_row\() == 0
+        str     w28, [\AddrReg2\()],#4
+.endif
+.endif
+
+.endm
+
+
+        .macro  OutputRow12Element Mode, AddrReg1, AddrReg2, Vec1Reg, Vec2Reg, Vec3Reg, Vec4Reg, last_row
+
+.ifeqs "\Mode\()","Add"
+        ldr     q8,[\AddrReg1\()],#16
+        ldr     d10,[\AddrReg1\()],#-16
+.if \last_row\() == 0
+        ldr     q9,[\AddrReg2\()],#16
+        ldr     d11,[\AddrReg2\()],#-16
+.else
+        mov     x27,#0
+        mov     v9.D[0],x27
+        mov     v9.D[1],x27
+        mov     v11.D[0],x27
+.endif
+        uzp1    v4.2d,v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp2    v5.2d,v\Vec1Reg\().2d,v\Vec2Reg\().2d
+
+        add     v8.4s,v8.4s,v4.4s
+        add     v9.4s,v9.4s,v5.4s
+
+        str     q8,[\AddrReg1\()],#16
+.if \last_row\() == 0
+        str     q9,[\AddrReg2\()],#16
+.endif
+
+        mov     v10.D[1], v11.D[0]
+
+        add     v10.4s,v10.4s,v\Vec3Reg\().4s
+
+        mov     x27, v10.D[0]
+        mov     x28, v10.D[1]
+
+        str     x27, [\AddrReg1\()],#8
+.if \last_row\() == 0
+        str     x28, [\AddrReg2\()],#8
+.endif
+
+.else
+        uzp1    v4.2d, v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp2    v5.2d, v\Vec1Reg\().2d,v\Vec2Reg\().2d
+
+        str     q4,[\AddrReg1\()],#16
+.if \last_row\() == 0
+        str     q5,[\AddrReg2\()],#16
+.endif
+        mov     x27, v\Vec3Reg\().D[0]
+        mov     x28, v\Vec3Reg\().D[1]
+
+        str     x27, [\AddrReg1\()],#8
+.if \last_row\() == 0
+        str     x28, [\AddrReg2\()],#8
+.endif
+.endif
+
+        .endm
+
+       .macro  OutputRow14Element Mode, AddrReg1, AddrReg2, Vec1Reg, Vec2Reg, Vec3Reg, Vec4Reg, last_row
+
+.ifeqs "\Mode\()","Add"
+        ldr     q8,[\AddrReg1\()],#16
+        ldr     d10,[\AddrReg1\()],#8
+        ldr     w28, [\AddrReg1\()],#-24
+        mov     v10.S[2], w28
+.if \last_row\() == 0
+        ldr     q9,[\AddrReg2\()],#16
+        ldr     d11,[\AddrReg2\()],#8
+        ldr     w27,[\AddrReg2\()],#-24
+        mov     v11.S[2], w27
+.else
+        mov     x27,#0
+        mov     v9.D[0],x27
+        mov     v9.D[1],x27
+
+        mov     v11.D[0],x27
+        mov     v11.D[1],x27
+.endif
+        uzp1    v4.2d,v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp2    v5.2d,v\Vec1Reg\().2d,v\Vec2Reg\().2d
+
+        uzp1    v6.2d, v\Vec3Reg\().2d,v\Vec4Reg\().2d
+        uzp2    v7.2d, v\Vec3Reg\().2d,v\Vec4Reg\().2d
+
+        add     v8.4s,v8.4s,v4.4s
+        add     v9.4s,v9.4s,v5.4s
+        add     v10.4s,v10.4s,v6.4s
+        add     v11.4s,v11.4s,v7.4s
+
+        str     q8,[\AddrReg1\()],#16
+
+        mov     x27, v10.D[0]
+        str     x27, [\AddrReg1\()],#8
+        mov     w27, v10.S[2]
+        str     w27, [\AddrReg1\()],#4
+
+.if \last_row\() == 0
+        str     q9,[\AddrReg2\()],#16
+        mov     x27, v11.D[0]
+        str     x27, [\AddrReg2\()],#8
+        mov     w27, v11.S[2]
+        str     w27, [\AddrReg2\()],#4
+.endif
+
+.else
+        uzp1    v4.2d, v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp2    v5.2d, v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp1    v6.2d, v\Vec3Reg\().2d,v\Vec4Reg\().2d
+        uzp2    v7.2d, v\Vec3Reg\().2d,v\Vec4Reg\().2d
+
+        str     q4,[\AddrReg1\()],#16
+        mov     x27, v6.D[0]
+        str     x27, [\AddrReg1\()],#8
+        mov     w27, v6.S[2]
+        str     w27, [\AddrReg1\()],#4
+
+.if \last_row\() == 0
+        str     q5,[\AddrReg2\()],#16
+        mov     x27, v7.D[0]
+        str     x27, [\AddrReg2\()],#8
+        mov     w27, v7.S[2]
+        str     w27, [\AddrReg2\()],#4
+.endif
+.endif
+
+        .endm
+
+
+        .macro  OutputRow16Element Mode, AddrReg1, AddrReg2, Vec1Reg, Vec2Reg, Vec3Reg, Vec4Reg, last_row
+
+.ifeqs "\Mode\()","Add"
+        ldp     q8,q10,[\AddrReg1\()],#0
+.if \last_row\() == 0
+        ldp     q9,q11,[\AddrReg2\()],#0
+.else
+        mov     x27,#0
+        mov     v9.D[0],x27
+        mov     v9.D[1],x27
+
+        mov     v11.D[0],x27
+        mov     v11.D[1],x27
+.endif
+        uzp1    v4.2d,v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp2    v5.2d,v\Vec1Reg\().2d,v\Vec2Reg\().2d
+
+        uzp1    v6.2d, v\Vec3Reg\().2d,v\Vec4Reg\().2d
+        uzp2    v7.2d, v\Vec3Reg\().2d,v\Vec4Reg\().2d
+
+        add     v8.4s,v8.4s,v4.4s
+        add     v9.4s,v9.4s,v5.4s
+        add     v10.4s,v10.4s,v6.4s
+        add     v11.4s,v11.4s,v7.4s
+
+        stp     q8,q10,[\AddrReg1\()],#32
+.if \last_row\() == 0
+        stp     q9,q11,[\AddrReg2\()],#32
+.endif
+.else
+        uzp1    v4.2d, v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp2    v5.2d, v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp1    v6.2d, v\Vec3Reg\().2d,v\Vec4Reg\().2d
+        uzp2    v7.2d, v\Vec3Reg\().2d,v\Vec4Reg\().2d
+
+        stp     q4,q6,[\AddrReg1\()],#32
+.if \last_row\() == 0
+        stp     q5,q7,[\AddrReg2\()],#32
+.endif
+.endif
+
+        .endm
+
+//
+// OutputBlock
+//
+// Generates the code to store the output block.
+//
+
+        .macro  OutputBlock Mode, Columns, Rows
+
+        OutputRow\Columns\()Element \Mode\(),x2,x13,16,17,18,19,(\Rows\() == 1)
+
+.if \Rows\() > 2
+        OutputRow\Columns\()Element \Mode\(),x14,x15,20,21,22,23,(\Rows\() == 3)
+.endif
+
+.if \Rows\() > 4
+        OutputRow\Columns\()Element \Mode\(),x16,x17,24,25,26,27,(\Rows\() == 5)
+.endif
+
+.if \Rows\() > 6
+        OutputRow\Columns\()Element \Mode\(),x18,x19,28,29,30,31,(\Rows\() == 7)
+.endif
+
+        .endm
+//
+// ProcessRows
+//
+// Generates the code to process a compute and store the output block for a
+// fixed number of rows.
+//
+
+        .macro  ProcessRows Mode, Rows
+        mov     x4,#\Rows\()                   // return number of rows handled
+        cmp     x5,#6
+        ble     .L\Mode\().ProcessNextColumnLoop6x\Rows\()
+
+.L\Mode\().ProcessNextColumnLoop8x\Rows\():
+        ComputeBlockLoop \Mode\(),8,\Rows\()
+
+        sub     x5,x5,#8
+        cmp     x5,#0
+        blt     .L\Mode\().Output14ElementsOnlyFor\Rows\()
+        OutputBlock \Mode\(),16,\Rows\()
+        mov     x0,x8               // reload matrix A
+        cmp     x5,#6
+        bgt     .L\Mode\().ProcessNextColumnLoop8x\Rows\()
+        cbz     x5,.L\Mode\().ExitKernel
+
+.L\Mode\().ProcessNextColumnLoop6x\Rows\():
+
+        cmp     x5,#4
+        ble     .L\Mode\().ProcessNextColumnLoop4x\Rows\()
+        ComputeBlockLoop \Mode\(),6,\Rows\()
+        sub     x5,x5,#6
+        cmp     x5,#0
+        blt     .L\Mode\().Output10ElementsOnlyFor\Rows\()
+        OutputBlock \Mode\(),12,\Rows\()
+        mov     x0,x8               // reload matrix A
+        cmp     x5,#4
+        bgt     .L\Mode\().ProcessNextColumnLoop6x\Rows\()
+        b       .L\Mode\().ExitKernel
+
+.L\Mode\().ProcessNextColumnLoop4x\Rows\():
+        cmp     x5,#2
+        ble     .L\Mode\().ProcessNextColumnLoop2x\Rows\()
+        ComputeBlockLoop \Mode\(),4,\Rows\()
+        sub     x5,x5,#4
+        cmp     x5,#0
+        blt     .L\Mode\().Output6ElementsOnlyFor\Rows\()
+        OutputBlock \Mode\(),8,\Rows\()
+        mov     x0,x8               // reload matrix A
+        cmp     x5,#2
+        bgt     .L\Mode\().ProcessNextColumnLoop4x\Rows\()
+        b       .L\Mode\().ExitKernel
+
+.L\Mode\().ProcessNextColumnLoop2x\Rows\():
+        ComputeBlockLoop \Mode\(),2,\Rows\()
+        sub     x5,x5,#2
+        cmp     x5,#0
+        blt     .L\Mode\().Output2ElementsOnlyFor\Rows\()
+        OutputBlock \Mode\(),4,\Rows\()
+        mov     x0,x8               // reload matrix A
+        cmp     x5,#2
+        b       .L\Mode\().ExitKernel
+
+.L\Mode\().Output14ElementsOnlyFor\Rows\():
+	OutputBlock \Mode\(),14,\Rows\()
+        b       .L\Mode\().ExitKernel
+
+
+.L\Mode\().Output10ElementsOnlyFor\Rows\():
+        OutputBlock \Mode\(),10,\Rows\()
+        b       .L\Mode\().ExitKernel
+
+
+.L\Mode\().Output6ElementsOnlyFor\Rows\():
+        OutputBlock \Mode\(),6,\Rows\()
+        b       .L\Mode\().ExitKernel
+
+
+.L\Mode\().Output2ElementsOnlyFor\Rows\():
+        OutputBlock \Mode\(),2,\Rows\()
+        b       .L\Mode\().ExitKernel
+
+        .endm
+
+
+/*++
+
+Routine Description:
+
+    This routine is an inner kernel to compute matrix multiplication for a
+    set of rows.
+
+Arguments:
+
+    A (x0) - Supplies the address of matrix A. The matrix data has been packed
+        using MlasGemmQuantCopyPackA<MLAS_GEMM_S8S8_KERNEL_SMMLA>.
+
+    B (x1) - Supplies the address of matrix B. The matrix data has been packed
+        using MlasGemmQuantCopyPackB<MLAS_GEMM_S8S8_KERNEL_SMMLA>.
+
+    C (x2) - Supplies the address of matrix C.
+
+    PackedCountK (x3) - Supplies the number of packed columns from matrix A and
+        the number of packed rows from matrix B to iterate over.
+
+    CountM (x4) - Supplies the maximum number of rows that can be processed for
+        matrix A and matrix C. The actual number of rows handled for this
+        invocation depends on the kernel implementation.
+
+    CountN (x5) - Supplies the number of columns from matrix B and matrix C to
+        iterate over.
+
+    ldc (x6) - Supplies the first dimension of matrix C.
+
+    RowSumBuffer (x7) - Supplies the sum of each row from matrix A. These values
+        have been pre-scaled by the zero point offset of matrix B if the offset
+        is per-tensor (ZeroPointB is nullptr). Otherwise, these values must be
+        scaled by the per-column zero point offsets of matrix B. These values are
+        accumulated into every row of matrix C.
+
+    ColumnSumBuffer - Supplies the sum of each column from matrix B multiplied
+        by the zero point offset of matrix A. These values are accumulated into
+        every column of matrix C.
+
+    ZeroPointB - Optionally supplies the per-column zero point offsets of matrix
+        B, else nullptr if the matrix B is using per-tensor quantization.
+
+Return Value:
+
+    Returns the number of rows handled.
+
+--*/
+
+       .macro  QgemmS8S8KernelSmmlaFunction Mode
+
+        FUNCTION_ENTRY MlasGemmS8S8KernelSmmla\Mode\()
+
+        ldr     x10,[sp, #0]
+        ldr     x11,[sp,#8]
+
+        stp     x19, x20, [sp, #.LMlasQgemmKernel_SavedRegisters_Neg]!
+        stp     x21, x22, [sp, #.LMlasQgemmKernel_backup_x21_x22]
+        stp     x23, x24, [sp, #.LMlasQgemmKernel_backup_x23_x24]
+        stp     x25, x26, [sp, #.LMlasQgemmKernel_backup_x25_x26]
+        stp     x27, x28, [sp, #.LMlasQgemmKernel_backup_x27_x28]
+        stp     d8, d9, [sp, #.LMlasQgemmKernel_backup_d8_d9]
+        stp     d10, d11, [sp, #.LMlasQgemmKernel_backup_d10_d11]
+        stp     d12, d13, [sp, #.LMlasQgemmKernel_backup_d12_d13]
+        stp     d14, d15, [sp, #.LMlasQgemmKernel_backup_d14_d15]
+
+        add     x13,x2,x6,lsl #2            // compute matrix C plus 1 row
+        add     x14,x13,x6,lsl #2           // compute matrix C plus 2 rows
+        add     x15,x14,x6,lsl #2           // compute matrix C plus 3 rows
+        add     x16,x15,x6,lsl #2           // compute matrix C plus 4 rows
+        add     x17,x16,x6,lsl #2           // compute matrix C plus 5 rows
+        add     x18,x17,x6,lsl #2           // compute matrix C plus 6 rows
+        add     x19,x18,x6,lsl #2           // compute matrix C plus 7 rows
+
+        mov     x8,x0                       // save matrix A
+
+//
+// Process 8 rows of the matrices.
+//
+        ld1     {v12.4s},[x7],#16            // load row sum 1 ~ 4
+        cmp     x4,#8
+        blt     .L\Mode\().ProcessCountMLessThan8
+        ld1     {v13.4s},[x7],#16            // load row sum 5 ~ 8
+        ProcessRows \Mode\(),8
+
+//
+// Restore non-volatile registers and return.
+//
+
+.L\Mode\().ExitKernel:
+        mov     x0,x4
+
+        ldp     d14, d15, [sp, #.LMlasQgemmKernel_backup_d14_d15]
+        ldp     d12, d13, [sp, #.LMlasQgemmKernel_backup_d12_d13]
+        ldp     d10, d11, [sp, #.LMlasQgemmKernel_backup_d10_d11]
+        ldp     d8, d9, [sp, #.LMlasQgemmKernel_backup_d8_d9]
+        ldp     x27, x28, [sp, #.LMlasQgemmKernel_backup_x27_x28]
+        ldp     x25, x26, [sp, #.LMlasQgemmKernel_backup_x25_x26]
+        ldp     x23, x24, [sp, #.LMlasQgemmKernel_backup_x23_x24]
+        ldp     x21, x22, [sp, #.LMlasQgemmKernel_backup_x21_x22]
+        ldp     x19, x20, [sp], #.LMlasQgemmKernel_SavedRegisters
+
+        ret
+
+//
+// Process 4 rows of the matrix.
+//
+
+.L\Mode\().ProcessCountMLessThan8:
+        cmp     x4,#4
+        blt     .L\Mode\().ProcessCountMLessThan4
+        ProcessRows \Mode\(),4
+        b       .L\Mode\().ExitKernel
+
+//
+// Process 2 row of the matrix.
+//
+
+.L\Mode\().ProcessCountMLessThan4:
+        cmp     x4,#2
+        blt     .L\Mode\().ProcessCountMLessThan2
+
+        ProcessRows \Mode\(),2
+        b       .L\Mode\().ExitKernel
+
+
+//
+// Process the last row of the matrix.
+//
+
+.L\Mode\().ProcessCountMLessThan2:
+        ProcessRows \Mode\(),1
+        b       .L\Mode\().ExitKernel
+
+
+        .endm
+
+        QgemmS8S8KernelSmmlaFunction Zero
+        QgemmS8S8KernelSmmlaFunction Add
+
+        .end
diff --git a/onnxruntime/core/mlas/lib/aarch64/QgemmU8X8KernelUmmla.S b/onnxruntime/core/mlas/lib/aarch64/QgemmU8X8KernelUmmla.S
new file mode 100644
index 0000000000000..baf6e21e6ff06
--- /dev/null
+++ b/onnxruntime/core/mlas/lib/aarch64/QgemmU8X8KernelUmmla.S
@@ -0,0 +1,922 @@
+/*++
+
+Copyright (c) Microsoft Corporation. All rights reserved.
+Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+
+Licensed under the MIT License.
+
+Module Name:
+
+    QgemmU8X8KernelUmmla.s
+
+Abstract:
+
+    This module implements the kernels for the Int8 precision matrix/matrix
+    multiply operation (QGEMM).
+
+--*/
+
+#include "asmmacro.h"
+
+        .text
+
+//
+// Stack frame layout for the ummla kernel. d8-d15, x19-x30 need save
+//
+        .equ  .LMlasQgemmKernel_backup_x19_x20,    0
+        .equ  .LMlasQgemmKernel_backup_x21_x22,    16
+        .equ  .LMlasQgemmKernel_backup_x23_x24,    32
+        .equ  .LMlasQgemmKernel_backup_x25_x26,    48
+        .equ  .LMlasQgemmKernel_backup_x27_x28,    64
+        .equ  .LMlasQgemmKernel_backup_d8_d9,      80
+        .equ  .LMlasQgemmKernel_backup_d10_d11,    96
+        .equ  .LMlasQgemmKernel_backup_d12_d13,    112
+        .equ  .LMlasQgemmKernel_backup_d14_d15,    128
+        .equ  .LMlasQgemmKernel_SavedRegisters,    144
+        .equ  .LMlasQgemmKernel_SavedRegisters_Neg, -144
+
+
+//
+// Init Row Accumulators
+//
+// Generates the code to initialize the accumulators for a single row of the output
+// block.
+//
+//
+//  Accumulators are initialized to ZeroPointB * RowSum + ColumnSum
+//  x7 for RowSumsBuffer pointer
+//  x10 for ColumnSumBuffer pointer
+//  x11 for ZeroPointB buffer pointer
+//
+//  v12~v13 for RowSums values
+//  v14~v15 for ColumnSums values
+//  v0~v3 for ZeroPointB values
+//
+        .macro  InitRowAccumulators Columns, Vec1Reg, Vec2Reg, Vec3Reg, Vec4Reg, RowSumReg
+
+        mul     v7.4s, v\RowSumReg\().4s, v8.4s
+        mov     v\Vec1Reg\().16b, v7.16b
+        add     v\Vec1Reg\().4s, v\Vec1Reg\().4s, v0.4s
+.if \Columns\() > 2
+        mul     v7.4s, v\RowSumReg\().4s, v9.4s
+        mov     v\Vec2Reg\().16b, v7.16b
+        add     v\Vec2Reg\().4s, v\Vec2Reg\().4s, v1.4s
+.endif
+.if \Columns\() > 4
+        mul     v7.4s, v\RowSumReg\().4s, v10.4s
+        mov     v\Vec3Reg\().16b, v7.16b
+        add     v\Vec3Reg\().4s, v\Vec3Reg\().4s, v2.4s
+.endif
+.if \Columns\() > 6
+        mul     v7.4s, v\RowSumReg\().4s, v11.4s
+        mov     v\Vec4Reg\().16b, v7.16b
+        add     v\Vec4Reg\().4s, v\Vec4Reg\().4s, v3.4s
+.endif
+
+        .endm
+
+
+//
+// InitBlockAccumulators
+//
+// Generates the code to initialize the accumulators for 8x8 output
+// block.
+//
+        .macro  InitBlockAccumulators Mode, Columns, Rows
+
+        ld1     {v14.4s},[x10],#16            // load ColumnSumBuffer[0]
+.if \Columns\() > 4
+        ld1     {v15.4s},[x10],#16            // load ColumnSumBuffer[4]
+.endif
+        // v4~v7 will be set to matrixB after this, so, they can used now
+        dup     v4.4s,v14.s[0]              // broadcast column
+        dup     v5.4s,v14.s[1]
+        dup     v6.4s,v14.s[2]
+        dup     v7.4s,v14.s[3]
+
+        zip1    v0.4s, v4.4s, v5.4s
+        zip2    v1.4s, v6.4s, v7.4s
+.if \Columns\() > 4
+        dup     v4.4s,v15.s[0]              // broadcast column
+        dup     v5.4s,v15.s[1]
+        dup     v6.4s,v15.s[2]
+        dup     v7.4s,v15.s[3]
+
+        zip1    v2.4s, v4.4s, v5.4s
+        zip2    v3.4s, v6.4s, v7.4s
+.endif
+
+        // v8~v11 will anyway get set in MatrixA loading, so they are free to use now
+        movi    v8.4s, #1
+        movi    v9.4s, #1
+        movi    v10.4s, #1
+        movi    v11.4s, #1
+
+        cbz     x11,.L\Mode\().InitBlock\Columns\().x\Rows\().SkipScaleByZeroPointB
+
+        ld1     {v4.4s},[x11],#16           // load ZeroPointB[0]
+        ld1     {v5.4s},[x11],#16           // load ZeroPointB[4]
+
+        dup     v6.4s, v4.s[0]
+        dup     v7.4s, v4.s[1]
+        zip1    v8.4s, v6.4s, v7.4s
+
+        dup     v6.4s, v4.s[2]
+        dup     v7.4s, v4.s[3]
+        zip1    v9.4s, v6.4s, v7.4s
+
+        dup     v6.4s, v5.s[0]
+        dup     v7.4s, v5.s[1]
+        zip1    v10.4s, v6.4s, v7.4s
+
+        dup     v6.4s, v5.s[2]
+        dup     v7.4s, v5.s[3]
+        zip1    v11.4s, v6.4s, v7.4s
+
+.L\Mode\().InitBlock\Columns\().x\Rows\().SkipScaleByZeroPointB:
+        dup     v4.4s, v12.s[0]           //boardcast RowSums
+        dup     v5.4s, v12.s[1]
+
+        uzp1    v6.2d, v4.2d, v5.2d
+
+        InitRowAccumulators \Columns\(),16,17,18,19,6
+.if \Rows\() > 2
+        dup     v4.4s, v12.s[2]           //boardcast RowSums
+        dup     v5.4s, v12.s[3]
+
+        uzp1    v6.2d, v4.2d, v5.2d
+
+        InitRowAccumulators \Columns\(),20,21,22,23,6
+.endif
+.if \Rows\() > 4
+        dup     v4.4s,v13.s[0]         // broadcast row sums
+        dup     v5.4s,v13.s[1]
+
+        uzp1    v6.2d, v4.2d, v5.2d
+
+        InitRowAccumulators \Columns\(),24,25,26,27,6
+.endif
+.if \Rows\() > 6
+        dup     v4.4s,v13.s[2]         // broadcast row sums
+        dup     v5.4s,v13.s[3]
+
+        uzp1    v6.2d, v4.2d, v5.2d
+        InitRowAccumulators \Columns\(),28,29,30,31,6
+.endif
+
+        .endm
+
+
+// LoadPackedMatrixABy16Elements
+//
+// Generates the code to load 16 elements from matrix A.
+//
+        .macro  LoadPackedMatrixABy16Elements Rows
+.if \Rows\() == 1
+        ldr     q8,[x0],#8
+.else
+        ldr     q8,[x0],#16
+
+.if \Rows\() > 2
+        ldr     q9,[x0],#16
+.endif
+
+.if \Rows\() > 4
+        ldr     q10,[x0],#16
+.endif
+
+.if \Rows\() > 6
+        ldr     q11,[x0],#16
+.endif
+.endif
+        .endm
+
+
+//
+// MultiplyAccumulateRow
+//
+// Generates the code to multiply and accumulate a single row of the output
+// block.
+//
+
+        .macro  MultiplyAccumulateRow Columns, MatrixAReg, Vec1Reg, Vec2Reg, Vec3Reg, Vec4Reg
+
+        ummla   v\Vec1Reg\().4s, \MatrixAReg\().16b, v4.16b
+.if \Columns\() > 2
+        ummla   v\Vec2Reg\().4s, \MatrixAReg\().16b, v5.16b
+.endif
+.if \Columns\() > 4
+	ummla   v\Vec3Reg\().4s, \MatrixAReg\().16b, v6.16b
+.endif
+.if \Columns\() > 6
+        ummla   v\Vec4Reg\().4s, \MatrixAReg\().16b, v7.16b
+.endif
+
+        .endm
+
+//
+// MultiplyAccumulateBlock
+//
+// Generates the code to multiply and accumulate into the output block.
+//
+
+        .macro  MultiplyAccumulateBlock Columns, Rows
+
+        MultiplyAccumulateRow \Columns\(),v8,16,17,18,19
+.if \Rows\() > 2
+        MultiplyAccumulateRow \Columns\(),v9,20,21,22,23
+.endif
+.if \Rows\() > 4
+        MultiplyAccumulateRow \Columns\(),v10,24,25,26,27
+.endif
+.if \Rows\() > 6
+        MultiplyAccumulateRow \Columns\(),v11,28,29,30,31
+.endif
+
+        .endm
+
+//
+// ComputeBlockLoop
+//
+// Generates the code to loop over K entries of the input matrices to produce
+// the output block.
+//
+
+        .macro  ComputeBlockLoop Mode, Columns, Rows
+
+        InitBlockAccumulators \Mode\(), \Columns\(),\Rows\()
+
+        sub     x9,x3,#1                   //  block count to process
+        tbnz    x9,#63,.L\Mode\().ProcessRemaining\Columns\().x\Rows\().Blocks
+
+.L\Mode\().Compute\Columns\().x\Rows\().BlockBy4Loop:
+
+        LoadPackedMatrixABy16Elements \Rows\()
+        ld1     {v4.16b - v7.16b}, [x1], #64
+        MultiplyAccumulateBlock \Columns\(),\Rows\()
+
+        sub     x9,x9,#1
+        tbz     x9,#63,.L\Mode\().Compute\Columns\().x\Rows\().BlockBy4Loop
+.L\Mode\().ProcessRemaining\Columns\().x\Rows\().Blocks:
+        add     x9,x9,#1                    // correct for over-subtract above
+        cbz     x9,.L\Mode\().Output\Columns\().x\Rows\().Block
+
+.L\Mode\().Compute\Columns\().x\Rows\().BlockBy4PaddedLoop:
+        LoadPackedMatrixABy16Elements \Rows\()
+        ld1     {v4.16b - v7.16b}, [x1], #64
+        MultiplyAccumulateBlock \Columns\(),\Rows\()
+
+.L\Mode\().Output\Columns\().x\Rows\().Block:
+
+        .endm
+
+
+//
+// OutputRow2Element
+// OutputRow4Element
+// OutputRow6Element
+// OutputRow8Element
+// OutputRow10Element
+// OutputRow12Element
+// OutputRow14Element
+// OutputRow16Element
+//
+// Generates the code to store elements to the output block.
+//
+
+        .macro  OutputRow2Element Mode, AddrReg1, AddrReg2, Vec1Reg, Vec2Reg, Vec3Reg, Vec4Reg, last_row
+
+.ifeqs "\Mode\()","Add"
+        ldr     s8,[\AddrReg1\()],#0
+.if \last_row\() == 0
+        ldr     s9,[\AddrReg2\()],#0
+.else
+        mov     x27,#0
+        mov     v9.D[0],x27
+        mov     v9.D[1],x27
+.endif
+        mov     v8.S[2], v9.S[0]
+        add     v8.4s,v8.4s,v\Vec1Reg\().4s
+
+        mov     w27, v8.S[0]
+        str     w27, [\AddrReg1\()],#4
+
+.if \last_row\() == 0
+        mov     w27, v8.S[2]
+        str     w27, [\AddrReg2\()],#4
+.endif
+
+.else
+        mov     w27, v\Vec1Reg\().S[0]
+        str     w27, [\AddrReg1\()],#4
+
+.if \last_row\() == 0
+        mov    w27, v\Vec1Reg\().S[2]
+        str    w27, [\AddrReg2\()],#4
+.endif
+
+.endif
+
+        .endm
+
+
+        .macro  OutputRow4Element Mode, AddrReg1, AddrReg2, Vec1Reg, Vec2Reg, Vec3Reg, Vec4Reg, last_row
+
+.ifeqs "\Mode\()","Add"
+        ldr     d8,[\AddrReg1\()],#0
+.if \last_row\() == 0
+        ldr     d9,[\AddrReg2\()],#0
+.else
+        mov     x27,#0
+        mov     v9.D[0],x27
+        mov     v9.D[1],x27
+.endif
+
+        mov     v8.D[1], v9.D[0]
+
+        add     v8.4s,v8.4s,v\Vec1Reg\().4s
+
+        mov     x27, v8.D[0]
+        mov     x28, v8.D[1]
+
+        str     x27, [\AddrReg1\()],#8
+.if \last_row\() == 0
+        str     x28, [\AddrReg2\()],#8
+.endif
+
+.else
+        mov     x27, v\Vec1Reg\().D[0]
+        mov     x28, v\Vec1Reg\().D[1]
+
+        str     x27, [\AddrReg1\()],#8
+.if \last_row\() == 0
+        str     x28, [\AddrReg2\()],#8
+.endif
+
+.endif
+
+        .endm
+
+
+        .macro  OutputRow6Element Mode, AddrReg1, AddrReg2, Vec1Reg, Vec2Reg, Vec3Reg, Vec4Reg, last_row
+
+.ifeqs "\Mode\()","Add"
+        ldr     d8,[\AddrReg1\()],#8
+        ldr     w28,[\AddrReg1\()],#-8
+        mov     v8.S[2], w28
+.if \last_row\() == 0
+        ldr     d9,[\AddrReg2\()],#8
+        ldr     w27,[\AddrReg2\()],#-8
+        mov     v9.S[2], w27
+.else
+        mov     x27,#0
+        mov     v9.D[0],x27
+        mov     v9.D[1],x27
+.endif
+        uzp1    v4.2d,v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp2    v5.2d,v\Vec1Reg\().2d,v\Vec2Reg\().2d
+
+        add     v8.4s,v8.4s,v4.4s
+        add     v9.4s,v9.4s,v5.4s
+
+        mov     x27, v8.D[0]
+        str     x27, [\AddrReg1\()],#8
+        mov     w27, v8.S[2]
+        str     w27, [\AddrReg1\()],#4
+
+.if \last_row\() == 0
+        mov     x27, v9.D[0]
+        str     x27, [\AddrReg2\()],#8
+        mov     w27, v9.S[2]
+        str     w27, [\AddrReg2\()],#4
+.endif
+
+.else
+        uzp1    v4.2d, v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp2    v5.2d, v\Vec1Reg\().2d,v\Vec2Reg\().2d
+
+        mov     x27, v4.D[0]
+        str     x27, [\AddrReg1\()],#8
+        mov     w27, v4.S[2]
+        str     w27, [\AddrReg1\()],#4
+
+.if \last_row\() == 0
+        mov     x27, v5.D[0]
+        str     x27, [\AddrReg2\()],#8
+        mov     w27, v5.S[2]
+        str     w27, [\AddrReg2\()],#4
+.endif
+
+.endif
+
+        .endm
+
+
+        .macro  OutputRow8Element Mode, AddrReg1, AddrReg2, Vec1Reg, Vec2Reg, Vec3Reg, Vec4Reg, last_row
+
+.ifeqs "\Mode\()","Add"
+        ldr     q8,[\AddrReg1\()],#0
+.if \last_row\() == 0
+        ldr     q9,[\AddrReg2\()],#0
+.else
+        mov     x27,#0
+        mov     v9.D[0],x27
+        mov     v9.D[1],x27
+.endif
+        uzp1    v4.2d,v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp2    v5.2d,v\Vec1Reg\().2d,v\Vec2Reg\().2d
+
+        add     v8.4s,v8.4s,v4.4s
+        add     v9.4s,v9.4s,v5.4s
+
+        str     q8,[\AddrReg1\()],#16
+.if \last_row\() == 0
+        str     q9,[\AddrReg2\()],#16
+.endif
+
+.else
+        uzp1    v4.2d, v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp2    v5.2d, v\Vec1Reg\().2d,v\Vec2Reg\().2d
+
+        str     q4,[\AddrReg1\()],#16
+.if \last_row\() == 0
+        str     q5,[\AddrReg2\()],#16
+.endif
+
+.endif
+
+        .endm
+
+
+        .macro  OutputRow10Element Mode, AddrReg1, AddrReg2, Vec1Reg, Vec2Reg, Vec3Reg, Vec4Reg, last_row
+
+.ifeqs "\Mode\()","Add"
+        ldr     q8,[\AddrReg1\()],#16
+        ldr     w28, [\AddrReg1\()],#-16
+
+.if \last_row\() == 0
+        ldr     q9,[\AddrReg2\()],#16
+        ldr     w27,[\AddrReg2\()],#-16
+.else
+        mov     x27,#0
+        mov     v9.D[0],x27
+        mov     v9.D[1],x27
+.endif
+        uzp1    v4.2d,v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp2    v5.2d,v\Vec1Reg\().2d,v\Vec2Reg\().2d
+
+        add     v8.4s,v8.4s,v4.4s
+        add     v9.4s,v9.4s,v5.4s
+
+        str     q8,[\AddrReg1\()],#16
+.if \last_row\() == 0
+        str     q9,[\AddrReg2\()],#16
+.endif
+        mov     v8.S[0], w28
+        mov     v8.S[2], w27
+
+        add     v8.4s,v8.4s,v\Vec3Reg\().4s
+
+        mov     w27, v8.S[0]
+        mov     w28, v8.S[2]
+
+        str     w27, [\AddrReg1\()],#4
+.if \last_row\() == 0
+        str     w28, [\AddrReg2\()],#4
+.endif
+
+.else
+        uzp1    v4.2d, v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp2    v5.2d, v\Vec1Reg\().2d,v\Vec2Reg\().2d
+
+        str     q4,[\AddrReg1\()],#16
+.if \last_row\() == 0
+        str     q5,[\AddrReg2\()],#16
+.endif
+        mov     w27, v\Vec3Reg\().S[0]
+        mov     w28, v\Vec3Reg\().S[2]
+
+        str     w27, [\AddrReg1\()],#4
+.if \last_row\() == 0
+        str     w28, [\AddrReg2\()],#4
+.endif
+.endif
+
+.endm
+
+
+        .macro  OutputRow12Element Mode, AddrReg1, AddrReg2, Vec1Reg, Vec2Reg, Vec3Reg, Vec4Reg, last_row
+
+.ifeqs "\Mode\()","Add"
+        ldr     q8,[\AddrReg1\()],#16
+        ldr     d10,[\AddrReg1\()],#-16
+.if \last_row\() == 0
+        ldr     q9,[\AddrReg2\()],#16
+        ldr     d11,[\AddrReg2\()],#-16
+.else
+        mov     x27,#0
+        mov     v9.D[0],x27
+        mov     v9.D[1],x27
+        mov     v11.D[0],x27
+.endif
+        uzp1    v4.2d,v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp2    v5.2d,v\Vec1Reg\().2d,v\Vec2Reg\().2d
+
+        add     v8.4s,v8.4s,v4.4s
+        add     v9.4s,v9.4s,v5.4s
+
+        str     q8,[\AddrReg1\()],#16
+.if \last_row\() == 0
+        str     q9,[\AddrReg2\()],#16
+.endif
+
+        mov v10.D[1], v11.D[0]
+
+        add     v10.4s,v10.4s,v\Vec3Reg\().4s
+
+        mov     x27, v10.D[0]
+        mov     x28, v10.D[1]
+
+        str     x27, [\AddrReg1\()],#8
+.if \last_row\() == 0
+        str     x28, [\AddrReg2\()],#8
+.endif
+
+.else
+        uzp1    v4.2d, v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp2    v5.2d, v\Vec1Reg\().2d,v\Vec2Reg\().2d
+
+        str     q4,[\AddrReg1\()],#16
+.if \last_row\() == 0
+        str     q5,[\AddrReg2\()],#16
+.endif
+        mov     x27, v\Vec3Reg\().D[0]
+        mov     x28, v\Vec3Reg\().D[1]
+
+        str     x27, [\AddrReg1\()],#8
+.if \last_row\() == 0
+        str     x28, [\AddrReg2\()],#8
+.endif
+.endif
+
+        .endm
+
+       .macro  OutputRow14Element Mode, AddrReg1, AddrReg2, Vec1Reg, Vec2Reg, Vec3Reg, Vec4Reg, last_row
+
+.ifeqs "\Mode\()","Add"
+        ldr     q8,[\AddrReg1\()],#16
+        ldr     d10,[\AddrReg1\()],#8
+        ldr     w28, [\AddrReg1\()],#-24
+        mov     v10.S[2], w28
+.if \last_row\() == 0
+        ldr     q9,[\AddrReg2\()],#16
+        ldr     d11,[\AddrReg2\()],#8
+        ldr     w27,[\AddrReg2\()],#-24
+        mov     v11.S[2], w27
+.else
+        mov     x27,#0
+        mov     v9.D[0],x27
+        mov     v9.D[1],x27
+
+        mov     v11.D[0],x27
+        mov     v11.D[1],x27
+.endif
+        uzp1    v4.2d,v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp2    v5.2d,v\Vec1Reg\().2d,v\Vec2Reg\().2d
+
+        uzp1    v6.2d, v\Vec3Reg\().2d,v\Vec4Reg\().2d
+        uzp2    v7.2d, v\Vec3Reg\().2d,v\Vec4Reg\().2d
+
+        add     v8.4s,v8.4s,v4.4s
+        add     v9.4s,v9.4s,v5.4s
+        add     v10.4s,v10.4s,v6.4s
+        add     v11.4s,v11.4s,v7.4s
+
+        str     q8,[\AddrReg1\()],#16
+
+        mov     x27, v10.D[0]
+        str     x27, [\AddrReg1\()],#8
+        mov     w27, v10.S[2]
+        str     w27, [\AddrReg1\()],#4
+
+.if \last_row\() == 0
+        str     q9,[\AddrReg2\()],#16
+        mov     x27, v11.D[0]
+        str     x27, [\AddrReg2\()],#8
+        mov     w27, v11.S[2]
+        str     w27, [\AddrReg2\()],#4
+.endif
+
+.else
+        uzp1    v4.2d, v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp2    v5.2d, v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp1    v6.2d, v\Vec3Reg\().2d,v\Vec4Reg\().2d
+        uzp2    v7.2d, v\Vec3Reg\().2d,v\Vec4Reg\().2d
+
+        str     q4,[\AddrReg1\()],#16
+        mov     x27, v6.D[0]
+        str     x27, [\AddrReg1\()],#8
+        mov     w27, v6.S[2]
+        str     w27, [\AddrReg1\()],#4
+
+.if \last_row\() == 0
+        str     q5,[\AddrReg2\()],#16
+        mov     x27, v7.D[0]
+        str     x27, [\AddrReg2\()],#8
+        mov     w27, v7.S[2]
+        str     w27, [\AddrReg2\()],#4
+.endif
+.endif
+
+        .endm
+
+
+        .macro  OutputRow16Element Mode, AddrReg1, AddrReg2, Vec1Reg, Vec2Reg, Vec3Reg, Vec4Reg, last_row
+
+.ifeqs "\Mode\()","Add"
+        ldp     q8,q10,[\AddrReg1\()],#0
+.if \last_row\() == 0
+        ldp     q9,q11,[\AddrReg2\()],#0
+.else
+        mov     x27,#0
+        mov     v9.D[0],x27
+        mov     v9.D[1],x27
+
+        mov     v11.D[0],x27
+        mov     v11.D[1],x27
+.endif
+        uzp1    v4.2d,v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp2    v5.2d,v\Vec1Reg\().2d,v\Vec2Reg\().2d
+
+        uzp1    v6.2d, v\Vec3Reg\().2d,v\Vec4Reg\().2d
+        uzp2    v7.2d, v\Vec3Reg\().2d,v\Vec4Reg\().2d
+
+        add     v8.4s,v8.4s,v4.4s
+        add     v9.4s,v9.4s,v5.4s
+        add     v10.4s,v10.4s,v6.4s
+        add     v11.4s,v11.4s,v7.4s
+
+        stp     q8,q10,[\AddrReg1\()],#32
+.if \last_row\() == 0
+        stp     q9,q11,[\AddrReg2\()],#32
+.endif
+.else
+        uzp1    v4.2d, v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp2    v5.2d, v\Vec1Reg\().2d,v\Vec2Reg\().2d
+        uzp1    v6.2d, v\Vec3Reg\().2d,v\Vec4Reg\().2d
+        uzp2    v7.2d, v\Vec3Reg\().2d,v\Vec4Reg\().2d
+
+        stp     q4,q6,[\AddrReg1\()],#32
+.if \last_row\() == 0
+        stp     q5,q7,[\AddrReg2\()],#32
+.endif
+.endif
+
+        .endm
+
+//
+// OutputBlock
+//
+// Generates the code to store the output block.
+//
+
+        .macro  OutputBlock Mode, Columns, Rows
+
+        OutputRow\Columns\()Element \Mode\(),x2,x13,16,17,18,19,(\Rows\() == 1)
+
+.if \Rows\() > 2
+        OutputRow\Columns\()Element \Mode\(),x14,x15,20,21,22,23,(\Rows\() == 3)
+.endif
+
+.if \Rows\() > 4
+        OutputRow\Columns\()Element \Mode\(),x16,x17,24,25,26,27,(\Rows\() == 5)
+.endif
+
+.if \Rows\() > 6
+        OutputRow\Columns\()Element \Mode\(),x18,x19,28,29,30,31,(\Rows\() == 7)
+.endif
+
+        .endm
+//
+// ProcessRows
+//
+// Generates the code to process a compute and store the output block for a
+// fixed number of rows.
+//
+
+        .macro  ProcessRows Mode, Rows
+        mov     x4,#\Rows\()                   // return number of rows handled
+        cmp     x5,#6
+        ble     .L\Mode\().ProcessNextColumnLoop6x\Rows\()
+
+.L\Mode\().ProcessNextColumnLoop8x\Rows\():
+        ComputeBlockLoop \Mode\(),8,\Rows\()
+
+        sub     x5,x5,#8
+        cmp     x5,#0
+        blt     .L\Mode\().Output14ElementsOnlyFor\Rows\()
+        OutputBlock \Mode\(),16,\Rows\()
+        mov     x0,x8               // reload matrix A
+        cmp     x5,#6
+        bgt     .L\Mode\().ProcessNextColumnLoop8x\Rows\()
+        cbz     x5,.L\Mode\().ExitKernel
+
+.L\Mode\().ProcessNextColumnLoop6x\Rows\():
+
+        cmp     x5,#4
+        ble     .L\Mode\().ProcessNextColumnLoop4x\Rows\()
+        ComputeBlockLoop \Mode\(),6,\Rows\()
+        sub     x5,x5,#6
+        cmp     x5,#0
+        blt     .L\Mode\().Output10ElementsOnlyFor\Rows\()
+        OutputBlock \Mode\(),12,\Rows\()
+        mov     x0,x8               // reload matrix A
+        cmp     x5,#4
+        bgt     .L\Mode\().ProcessNextColumnLoop6x\Rows\()
+        b       .L\Mode\().ExitKernel
+
+.L\Mode\().ProcessNextColumnLoop4x\Rows\():
+        cmp     x5,#2
+        ble     .L\Mode\().ProcessNextColumnLoop2x\Rows\()
+        ComputeBlockLoop \Mode\(),4,\Rows\()
+        sub     x5,x5,#4
+        cmp     x5,#0
+        blt     .L\Mode\().Output6ElementsOnlyFor\Rows\()
+        OutputBlock \Mode\(),8,\Rows\()
+        mov     x0,x8               // reload matrix A
+        cmp     x5,#2
+        bgt     .L\Mode\().ProcessNextColumnLoop4x\Rows\()
+        b       .L\Mode\().ExitKernel
+
+.L\Mode\().ProcessNextColumnLoop2x\Rows\():
+        ComputeBlockLoop \Mode\(),2,\Rows\()
+        sub     x5,x5,#2
+        cmp     x5,#0
+        blt     .L\Mode\().Output2ElementsOnlyFor\Rows\()
+        OutputBlock \Mode\(),4,\Rows\()
+        mov     x0,x8               // reload matrix A
+        cmp     x5,#2
+        b       .L\Mode\().ExitKernel
+
+.L\Mode\().Output14ElementsOnlyFor\Rows\():
+	OutputBlock \Mode\(),14,\Rows\()
+        b       .L\Mode\().ExitKernel
+
+
+.L\Mode\().Output10ElementsOnlyFor\Rows\():
+        OutputBlock \Mode\(),10,\Rows\()
+        b       .L\Mode\().ExitKernel
+
+
+.L\Mode\().Output6ElementsOnlyFor\Rows\():
+        OutputBlock \Mode\(),6,\Rows\()
+        b       .L\Mode\().ExitKernel
+
+
+.L\Mode\().Output2ElementsOnlyFor\Rows\():
+        OutputBlock \Mode\(),2,\Rows\()
+        b       .L\Mode\().ExitKernel
+
+        .endm
+
+
+/*++
+
+Routine Description:
+
+    This routine is an inner kernel to compute matrix multiplication for a
+    set of rows.
+
+Arguments:
+
+    A (x0) - Supplies the address of matrix A. The matrix data has been packed
+        using MlasGemmQuantCopyPackA<MLAS_GEMM_U8X8_KERNEL_UMMLA>.
+
+    B (x1) - Supplies the address of matrix B. The matrix data has been packed
+        using MlasGemmQuantCopyPackB<MLAS_GEMM_U8X8_KERNEL_UMMLA>.
+
+    C (x2) - Supplies the address of matrix C.
+
+    PackedCountK (x3) - Supplies the number of packed columns from matrix A and
+        the number of packed rows from matrix B to iterate over.
+
+    CountM (x4) - Supplies the maximum number of rows that can be processed for
+        matrix A and matrix C. The actual number of rows handled for this
+        invocation depends on the kernel implementation.
+
+    CountN (x5) - Supplies the number of columns from matrix B and matrix C to
+        iterate over.
+
+    ldc (x6) - Supplies the first dimension of matrix C.
+
+    RowSumBuffer (x7) - Supplies the sum of each row from matrix A. These values
+        have been pre-scaled by the zero point offset of matrix B if the offset
+        is per-tensor (ZeroPointB is nullptr). Otherwise, these values must be
+        scaled by the per-column zero point offsets of matrix B. These values are
+        accumulated into every row of matrix C.
+
+    ColumnSumBuffer - Supplies the sum of each column from matrix B multiplied
+        by the zero point offset of matrix A. These values are accumulated into
+        every column of matrix C.
+
+    ZeroPointB - Optionally supplies the per-column zero point offsets of matrix
+        B, else nullptr if the matrix B is using per-tensor quantization.
+
+Return Value:
+
+    Returns the number of rows handled.
+
+--*/
+
+       .macro  QgemmU8X8KernelUmmlaFunction Mode
+
+        FUNCTION_ENTRY MlasGemmU8X8KernelUmmla\Mode\()
+
+        ldr     x10,[sp, #0]
+        ldr     x11,[sp,#8]
+
+        stp     x19, x20, [sp, #.LMlasQgemmKernel_SavedRegisters_Neg]!
+        stp     x21, x22, [sp, #.LMlasQgemmKernel_backup_x21_x22]
+        stp     x23, x24, [sp, #.LMlasQgemmKernel_backup_x23_x24]
+        stp     x25, x26, [sp, #.LMlasQgemmKernel_backup_x25_x26]
+        stp     x27, x28, [sp, #.LMlasQgemmKernel_backup_x27_x28]
+        stp     d8, d9, [sp, #.LMlasQgemmKernel_backup_d8_d9]
+        stp     d10, d11, [sp, #.LMlasQgemmKernel_backup_d10_d11]
+        stp     d12, d13, [sp, #.LMlasQgemmKernel_backup_d12_d13]
+        stp     d14, d15, [sp, #.LMlasQgemmKernel_backup_d14_d15]
+
+        add     x13,x2,x6,lsl #2            // compute matrix C plus 1 row
+        add     x14,x13,x6,lsl #2           // compute matrix C plus 2 rows
+        add     x15,x14,x6,lsl #2           // compute matrix C plus 3 rows
+        add     x16,x15,x6,lsl #2           // compute matrix C plus 4 rows
+        add     x17,x16,x6,lsl #2           // compute matrix C plus 5 rows
+        add     x18,x17,x6,lsl #2           // compute matrix C plus 6 rows
+        add     x19,x18,x6,lsl #2           // compute matrix C plus 7 rows
+
+        mov     x8,x0                       // save matrix A
+
+//
+// Process 8 rows of the matrices.
+//
+        ld1     {v12.4s},[x7],#16            // load row sum 1 ~ 4
+        cmp     x4,#8
+        blt     .L\Mode\().ProcessCountMLessThan8
+        ld1     {v13.4s},[x7],#16            // load row sum 5 ~ 8
+        ProcessRows \Mode\(),8
+
+//
+// Restore non-volatile registers and return.
+//
+
+.L\Mode\().ExitKernel:
+        mov     x0,x4
+
+        ldp     d14, d15, [sp, #.LMlasQgemmKernel_backup_d14_d15]
+        ldp     d12, d13, [sp, #.LMlasQgemmKernel_backup_d12_d13]
+        ldp     d10, d11, [sp, #.LMlasQgemmKernel_backup_d10_d11]
+        ldp     d8, d9, [sp, #.LMlasQgemmKernel_backup_d8_d9]
+        ldp     x27, x28, [sp, #.LMlasQgemmKernel_backup_x27_x28]
+        ldp     x25, x26, [sp, #.LMlasQgemmKernel_backup_x25_x26]
+        ldp     x23, x24, [sp, #.LMlasQgemmKernel_backup_x23_x24]
+        ldp     x21, x22, [sp, #.LMlasQgemmKernel_backup_x21_x22]
+        ldp     x19, x20, [sp], #.LMlasQgemmKernel_SavedRegisters
+
+        ret
+
+//
+// Process 4 rows of the matrix.
+//
+
+.L\Mode\().ProcessCountMLessThan8:
+        cmp     x4,#4
+        blt     .L\Mode\().ProcessCountMLessThan4
+        ProcessRows \Mode\(),4
+        b       .L\Mode\().ExitKernel
+
+//
+// Process 2 row of the matrix.
+//
+
+.L\Mode\().ProcessCountMLessThan4:
+        cmp     x4,#2
+        blt     .L\Mode\().ProcessCountMLessThan2
+
+        ProcessRows \Mode\(),2
+        b       .L\Mode\().ExitKernel
+
+
+//
+// Process the last row of the matrix.
+//
+
+.L\Mode\().ProcessCountMLessThan2:
+        ProcessRows \Mode\(),1
+        b       .L\Mode\().ExitKernel
+
+
+        .endm
+
+        QgemmU8X8KernelUmmlaFunction Zero
+        QgemmU8X8KernelUmmlaFunction Add
+
+        .end
diff --git a/onnxruntime/core/mlas/lib/mlasi.h b/onnxruntime/core/mlas/lib/mlasi.h
index b6ac4a1ca1d6c..6c859e4e4f44b 100644
--- a/onnxruntime/core/mlas/lib/mlasi.h
+++ b/onnxruntime/core/mlas/lib/mlasi.h
@@ -184,11 +184,17 @@ class MLASCPUIDInfo
 
     bool IsCurrentCoreArmv8NarrowLd() const { return false; }
 
+    bool HasArmNeon_I8MM() const { return has_arm_neon_i8mm_; }
+
+    bool HasArmSVE_I8MM() const { return has_arm_sve_i8mm_; }
+
    private:
     MLASCPUIDInfo();
 
     bool has_arm_neon_dot_{false};
     bool has_fp16_{false};
+    bool has_arm_neon_i8mm_{false};
+    bool has_arm_sve_i8mm_{false};
 };
 using MLAS_CPUIDINFO = MLASCPUIDInfo;
 
@@ -856,6 +862,8 @@ extern const MLAS_GEMM_QUANT_DISPATCH MlasGemmU8X8DispatchNeon;
 extern const MLAS_GEMM_QUANT_DISPATCH MlasGemmX8S8DispatchNeon;
 extern const MLAS_GEMM_QUANT_DISPATCH MlasGemmU8X8DispatchUdot;
 extern const MLAS_GEMM_QUANT_DISPATCH MlasGemmS8S8DispatchSdot;
+extern const MLAS_GEMM_QUANT_DISPATCH MlasGemmU8X8DispatchUmmla;
+extern const MLAS_GEMM_QUANT_DISPATCH MlasGemmS8S8DispatchSmmla;
 extern const MLAS_GEMM_QUANT_DISPATCH MlasGemmU8X8DispatchWasmSimd;
 extern const MLAS_GEMM_QUANT_DISPATCH MlasGemmQuantDispatchDefault;
 extern const MLAS_GEMM_QUANT_DISPATCH MlasGemm8X8DispatchPOWER10;
@@ -882,14 +890,30 @@ extern const MLAS_CONV_SYM_DISPATCH MlasConvSymS8DispatchNeon;
 extern const MLAS_CONV_SYM_DISPATCH MlasConvSymU8DispatchDot;
 extern const MLAS_CONV_SYM_DISPATCH MlasConvSymS8DispatchDot;
 
+//
+// Quantized 8-bit integer/quantized 4-bit integer matrix/matrix multiply dispatch structure.
+//
+
 struct MLAS_Q8Q4GEMM_DISPATCH;
 
 extern const MLAS_Q8Q4GEMM_DISPATCH MlasQ8Q4GemmDispatchAvx512vnni;
 
+//
+// Float/quantized 4-bit integer matrix/matrix multiply dispatch structure.
+//
+
 struct MLAS_FPQ4GEMM_DISPATCH;
 
 extern const MLAS_FPQ4GEMM_DISPATCH MlasFpQ4GemmDispatchAvx512;
 
+//
+// Float/quantized n-bit integer matrix/matrix multiply dispatch structure.
+//
+
+struct MLAS_SQNBIT_GEMM_DISPATCH;
+
+extern const MLAS_SQNBIT_GEMM_DISPATCH MlasSQNBitGemmDispatchNeon;
+
 //
 // Quantized depthwise convolution kernels.
 //
@@ -1021,6 +1045,8 @@ struct MLAS_PLATFORM {
 
     const MLAS_FPQ4GEMM_DISPATCH* FpQ4GemmDispatch{nullptr};
     const MLAS_Q8Q4GEMM_DISPATCH* Q8Q4GemmDispatch{nullptr};
+
+    const MLAS_SQNBIT_GEMM_DISPATCH* SQNBitGemmDispatch{nullptr};
 };
 
 inline
@@ -1069,6 +1095,23 @@ MlasTrySimpleParallel(
     const std::function<void(std::ptrdiff_t tid)>& Work
     );
 
+
+/**
+ * @brief Distribute many iterations of work over a thread pool if supported.
+ * This function is for small workloads in non-performance critical situation.
+ *
+ * @param ThreadPool [IN]          Optional thread pool. Ignored when using OpenMP
+ * @param Iterations [IN]          Total number of iterations
+ * @param Work [IN]                Logic for computing a range of iterations [begin, end)
+ */
+void
+MlasTryBatchParallel(
+	MLAS_THREADPOOL * ThreadPool,
+	const std::ptrdiff_t Iterations,
+	const std::function<void(std::ptrdiff_t tid)>& Work
+    );
+
+
 inline
 ptrdiff_t
 MlasGetMaximumThreadCount(
diff --git a/onnxruntime/core/mlas/lib/platform.cpp b/onnxruntime/core/mlas/lib/platform.cpp
index 96bc1d8010bed..fec56c6ee063f 100644
--- a/onnxruntime/core/mlas/lib/platform.cpp
+++ b/onnxruntime/core/mlas/lib/platform.cpp
@@ -52,6 +52,14 @@ MLASCPUIDInfo::MLASCPUIDInfo()
 #define HWCAP_ASIMDDP (1 << 20)
 #endif
 
+#ifndef HWCAP2_I8MM
+#define HWCAP2_I8MM (1 << 13)
+#endif
+
+#ifndef HWCAP2_SVEI8MM
+#define HWCAP2_SVEI8MM (1 << 9)
+#endif
+
 #if defined(BUILD_MLAS_NO_ONNXRUNTIME)
 MLASCPUIDInfo::MLASCPUIDInfo()
 {
@@ -59,6 +67,9 @@ MLASCPUIDInfo::MLASCPUIDInfo()
 
     // raw hack! Need CPUIDInfo implementation for more precise detection
     has_fp16_ = has_arm_neon_dot_;
+
+    has_arm_neon_i8mm_ = ((getauxval(AT_HWCAP2) & HWCAP2_I8MM) != 0);
+    has_arm_sve_i8mm_ = ((getauxval(AT_HWCAP2) & HWCAP2_SVEI8MM) != 0);
 }
 #endif
 
@@ -449,6 +460,7 @@ Return Value:
     this->SymmQgemmDispatch = &MlasSymmQgemmS8DispatchNeon;
     this->ConvSymU8S8Dispatch = &MlasConvSymU8DispatchNeon;
     this->ConvSymS8S8Dispatch = &MlasConvSymS8DispatchNeon;
+    this->SQNBitGemmDispatch = &MlasSQNBitGemmDispatchNeon;
 
     //
     // Check if the processor supports ASIMD dot product instructions.
@@ -458,12 +470,16 @@ Return Value:
 
 #if defined(_WIN32)
     HasDotProductInstructions = (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) != 0);
-#elif !defined(__APPLE__)  // The next few lines result in an EXC_BAD_INSTRUCTION runtime error on a M1 Mac so we
-                           // disable it there.
-    uint64_t isar0_el1;
-    asm("mrs %[reg], ID_AA64ISAR0_EL1\n" : [reg] "=r"(isar0_el1) : :);
-    HasDotProductInstructions = ((isar0_el1 >> 44) & 0xfu) == 0x1u;
 #else
+    // Use the cpuinfo value which is read from sysctl and has some additional special cases.
+    // https://github.com/pytorch/cpuinfo/blob/959002f82d7962a473d8bf301845f2af720e0aa4/src/arm/mach/init.c#L369-L379
+    // Do NOT use ID_AA64ISAR0_EL1. It causes illegal instruction errors on Mac M1 and ARMv8-A chips
+    // as well as failing on other ARM chips as it is an EL1 level register that requires extra
+    // privileges to read.
+    //
+    // uint64_t isar0_el1;
+    // asm("mrs %[reg], ID_AA64ISAR0_EL1\n" : [reg] "=r"(isar0_el1) : :);
+    // HasDotProductInstructions = ((isar0_el1 >> 44) & 0xfu) == 0x1u;
     HasDotProductInstructions = MLAS_CPUIDINFO::GetCPUIDInfo().HasArmNeonDot();
 #endif
 
@@ -476,6 +492,17 @@ Return Value:
         this->ConvSymS8S8Dispatch = &MlasConvSymS8DispatchDot;
     }
 
+#if defined(__linux__)
+    //
+    // Check if the processor supports ASIMD I8MM instructions.
+    //
+    if (MLAS_CPUIDINFO::GetCPUIDInfo().HasArmNeon_I8MM()) {
+        this->GemmU8U8Dispatch = &MlasGemmU8X8DispatchUmmla;
+        this->GemmU8S8Dispatch = &MlasGemmU8X8DispatchUmmla;
+        this->GemmS8S8Dispatch = &MlasGemmS8S8DispatchSmmla;
+    }
+#endif
+
 #endif // MLAS_TARGET_ARM64
 #if defined(MLAS_TARGET_POWER)
     this->GemmFloatKernel = MlasSgemmKernel;
diff --git a/onnxruntime/core/mlas/lib/q4_dq.cpp b/onnxruntime/core/mlas/lib/q4_dq.cpp
index 85c0d13006126..b5784ecb56d01 100644
--- a/onnxruntime/core/mlas/lib/q4_dq.cpp
+++ b/onnxruntime/core/mlas/lib/q4_dq.cpp
@@ -294,3 +294,790 @@ MlasQ4GemmUnPackB(
             return MlasQ4GemmUnPackBImpl<MLAS_Q4TYPE_BLK1>(FpData, PackedBuf, N, K, ldb);
     }
 }
+
+
+
+/***************************************************************
+ * The quantization format that pack data and quantization
+ * parameters into separate buffers.
+ */
+
+
+template <
+    int Row_,    ///< rows of a matrix
+    int Column_  ///< columns of a matrix
+    >
+struct Shape2D {
+    static int const kRow = Row_;              ///< rows of a matrix
+    static int const kColumn = Column_;        ///< columns of a matrix
+    static int const kCount = Row_ * Column_;  ///< total number of elements in a matrix
+};
+
+
+template <int qbits>
+struct BitsTraits {
+    static_assert(qbits <= 8, "Only BitsTraits are for small number of bits!");
+
+    static constexpr int kBits = qbits;
+    static constexpr int kMax = (1 << qbits) - 1;
+    static constexpr int kMid = 1 << (qbits - 1);
+    static constexpr float kMaxFp = static_cast<float>(kMax);
+
+    // number of qbit elements to pack into whole bytes
+    static constexpr int kPackSize = (qbits == 8) ? 1 : (qbits == 4) ? 2 : (qbits == 2) ? 4 : 0;
+    static_assert(kPackSize != 0, "Packing to whole bytes not supported for this qbits!");
+};
+
+
+/**
+ * @brief Rectify min/max from a set of weights, and convert to scale and zero point
+ *        for quantization
+ * @tparam ScaleT   type of scale, usually floating point of various bits
+ * @tparam qbits  number of int bits used for zero point value
+ * @param[in]   min
+ * @param[in]   max
+ * @param[out]  scale
+ * @param[out]  zp
+ */
+template <typename ScaleT, int qbits>
+MLAS_FORCEINLINE
+void
+range2scalezp(float min, float max, ScaleT& scale, uint8_t& zp)
+{
+    constexpr int zp_max = BitsTraits<qbits>::kMax;
+    constexpr float zp_max_fp = BitsTraits<qbits>::kMaxFp;
+
+    min = std::min(min, 0.0f);
+    max = std::max(max, 0.0f);
+
+    float scale_f = (max - min) / zp_max;
+
+    float zero_point_fp = min;
+    if (scale_f != 0.0f) {
+        zero_point_fp = 0.f - min / scale_f;
+    }
+
+    if (zero_point_fp < 0.0f) {
+        zp = 0;
+    } else if (zero_point_fp > zp_max_fp) {
+        zp = zp_max;
+    } else {
+        zp = (uint8_t)roundf(zero_point_fp);
+    }
+    scale = ScaleT(scale_f);
+}
+
+template <typename ScaleT, int qbits>
+MLAS_FORCEINLINE
+void
+range2scale(float min, float max, ScaleT& scale)
+{
+    constexpr int mid_v = BitsTraits<qbits>::kMid;
+    constexpr float mid_fp = static_cast<float>(-mid_v);
+
+    max = fabsf(max) > fabsf(min) ? max : min;
+
+    scale = ScaleT(max / mid_fp);
+};
+
+
+/**
+ * @brief Blockwise quantization methods
+ * @tparam ElementT       source data type, e.g. fp32/fp16
+ * @tparam block_size     number of elemenets quantized together
+ * @tparam qbits          number of bits in each quantized element
+ * @tparam Columnwise     true:  elements in a block come from one single column
+ *                        false: elements in a block come from one single row
+ */
+template <
+    typename ElementT,
+    int32_t block_size,
+    int32_t qbits,
+    bool Columnwise>
+struct BlockwiseQuantizer {
+    // To support other qbits, need to add bit packing code for
+    // storing to dst and zero points
+    static_assert(qbits == 4, "Only 4b block quantization is supported!");
+
+    using QuantBlk = std::conditional_t<Columnwise, Shape2D<block_size, 1>, Shape2D<1, block_size>>;
+    using ThreadBlk = Shape2D<QuantBlk::kRow * BitsTraits<qbits>::kPackSize, QuantBlk::kColumn>;
+
+    static
+    MLAS_FORCEINLINE
+    void quantizeMetaShape(int rows, int columns, int& meta_rows, int& meta_cols)
+    {
+        meta_rows = (rows + QuantBlk::kRow - 1) / QuantBlk::kRow;
+        meta_cols = (columns + QuantBlk::kColumn - 1) / QuantBlk::kColumn;
+    }
+
+    static
+    MLAS_FORCEINLINE
+    void quantizedShape(int rows, int columns, int& q_rows, int& q_cols) {
+        int meta_rows;
+        int meta_cols;
+        quantizeMetaShape(rows, columns, meta_rows, meta_cols);
+
+        // quantized matrix is stored in column major, packed by column
+        q_rows = (meta_rows * QuantBlk::kRow * qbits + 7) / 8;
+        q_cols = meta_cols * QuantBlk::kColumn;
+    }
+
+    static MLAS_FORCEINLINE void quantizedBufferSizes(
+        int rows, int columns, size_t& data_bytes, size_t& scale_num_elements, size_t* zero_point_bytes
+    )
+    {
+        int meta_rows, meta_cols;
+        quantizeMetaShape(rows, columns, meta_rows, meta_cols);
+        int q_rows, q_cols;
+        quantizedShape(rows, columns, q_rows, q_cols);
+
+        data_bytes = q_rows * q_cols;
+        scale_num_elements = meta_rows * meta_cols;
+
+        if (zero_point_bytes) {
+            // this works for qbits == 4 but may need to be updated for other qbits values
+            *zero_point_bytes = ((meta_rows * qbits + 7) / 8) * meta_cols;
+        }
+    }
+
+    /**
+     * @brief Quantized a Matrix shape [rows, columns], resulting quantized
+     *        and packed data are stored in column major (transposed)
+     * @param[out] dst           pointer to the quantized weights, column major: [columns, rows]
+     * @param[out] scale         pointer to the scales, column major: [columns/QuantBlk::kColumn, rows/QuantBlk::kRow]
+     * @param[out] zero_points   pointer to the zero points, same shape as scale
+     * @param[in]  src           pointer to the source matrix, row major: [rows, columns]
+     * @param rows
+     * @param columns
+     * @param leadingDimension   stride of the source matrix, i.e. distance from one row to the next
+     */
+    static void quantizeAndTranspose(
+        uint8_t* dst,
+        ElementT* scales,
+        uint8_t* zero_points,
+        const ElementT* src,
+        int32_t rows,
+        int32_t columns,
+        int32_t leadingDimension,
+        MLAS_THREADPOOL* thread_pool)
+    {
+        // Thread partitioning
+        const auto thrd_row_blks = (rows + ThreadBlk::kRow - 1) / ThreadBlk::kRow;
+        const auto thrd_col_blks = (columns + ThreadBlk::kColumn - 1) / ThreadBlk::kColumn;
+        const auto total_thrd_blks = thrd_row_blks * thrd_col_blks;
+
+        const auto row_blks = (rows + QuantBlk::kRow - 1) / QuantBlk::kRow;
+
+        int q_rows, q_cols;
+        quantizedShape(rows, columns, q_rows, q_cols);
+
+        MlasTryBatchParallel(
+            thread_pool, total_thrd_blks,
+            [&](ptrdiff_t block_idx) {
+                uint8_t zp_bytes[BitsTraits<qbits>::kPackSize];
+                std::fill_n(zp_bytes, BitsTraits<qbits>::kPackSize, (uint8_t)8);
+
+                const int32_t r_blk_idx = static_cast<int32_t>(block_idx / thrd_col_blks);
+                const int32_t c_blk_idx = static_cast<int32_t>(block_idx % thrd_col_blks);
+
+                const int32_t r = r_blk_idx * ThreadBlk::kRow;
+                const int32_t c = c_blk_idx * ThreadBlk::kColumn;
+
+                const int32_t r_end = std::min(r + ThreadBlk::kRow, rows);
+                const int32_t c_end = std::min(c + ThreadBlk::kColumn, columns);
+
+                const int meta_row = r / QuantBlk::kRow;
+                const int meta_col = c / QuantBlk::kColumn;
+
+                // compute scale and zero point
+                for (int kpack = 0; kpack < BitsTraits<qbits>::kPackSize; kpack++) {
+
+                    // scan a single block to extract range [min, max]
+                    float min = std::numeric_limits<float>::max();
+                    float max = -min;
+                    const int row_start = r + kpack * QuantBlk::kRow;
+                    const int row_end = std::min(row_start + QuantBlk::kRow, r_end);
+                    for (int i = row_start; i < row_end; ++i) {
+                        for (int j = c; j < c_end; ++j) {
+                            const float v = static_cast<float>(src[i * leadingDimension + j]);
+                            if (v < min) min = v;
+                            if (v > max) max = v;
+                        }
+                    }
+
+                    // store scale and zero point at quant parameter matrix position
+                    if (row_start < row_end) {
+                        const int32_t meta_idx = meta_col * row_blks + meta_row + kpack;
+                        if (zero_points == nullptr) {
+                            range2scale<ElementT, qbits>(min, max, scales[meta_idx]);
+                        } else {
+                            range2scalezp<ElementT, qbits>(min, max, scales[meta_idx], zp_bytes[kpack]);
+                        }
+                    }
+                }
+
+                // !! 4b specific code as we need to pack 2 4b numbers into one byte
+                if (zero_points != nullptr) {
+                    const int32_t meta_idx = meta_col * ((row_blks + 1) / 2) + meta_row / 2;
+                    zero_points[meta_idx] = (zp_bytes[0] & 0xf) | (zp_bytes[1] << 4);
+                }
+
+                for (int32_t j = c; j < c_end; ++j) {
+                    const int32_t meta_c = j / QuantBlk::kColumn;
+                    for (int32_t i = r; i < r_end; i += 2) {
+                        const int32_t meta_r = i / QuantBlk::kRow;
+                        const float scale = static_cast<float>(scales[meta_c * row_blks + meta_r]);
+                        const float reciprocal_scale = scale ? 1.0f / scale : 0.0f;
+                        const int8_t zp = zp_bytes[meta_r & 1];
+                        const int8_t zp1 = zp_bytes[((i + 1) / QuantBlk::kRow) & 1];
+
+                        const float v0 = static_cast<float>(src[i * leadingDimension + j]);
+                        const uint8_t vi0 = (uint8_t)std::clamp(roundf(v0 * reciprocal_scale + zp),
+                                                                0.0f, BitsTraits<qbits>::kMaxFp);
+
+                        uint8_t vi1 = (uint8_t)zp;
+                        if (i + 1 < r_end) {
+                            float reciprocal_scale1 = reciprocal_scale;
+                            if constexpr (QuantBlk::kRow == 1) {
+                                const float scale1 =
+                                    static_cast<float>(scales[meta_c * row_blks + meta_r + 1]);
+                                reciprocal_scale1 = scale1 ? 1.0f / scale1 : 0.0f;
+                            }
+                            const float v1 = static_cast<float>(src[(i + 1) * leadingDimension + j]);
+                            vi1 = (uint8_t)std::clamp(roundf(v1 * reciprocal_scale1 + zp1), 0.0f,
+                                                      BitsTraits<qbits>::kMaxFp);
+                        }
+
+                        // !! 4b specific code
+                        dst[j * q_rows + i / 2] = (vi0 & 0xf) | (vi1 << 4);
+                    }
+                }
+            });
+    }
+
+    /**
+     * @brief Dequantize a column major quantized matrix, and store the result in a column major
+     * matrix for use in GEMM
+     * @param[out] dst           pointer to the dequantized matrix, column major: [columns, rows]
+     * @param[in]  weights       pointer to the quantized weights, column major: [columns, rows]
+     * @param[in]  scales        pointer to the scales of quantized blocks, column major layout
+     * @param[in]  zero_points   pointer to the zero points of quantized blocks, packed column major
+     *                           scales
+     * @param[in]  rows
+     * @param[in]  columns
+     */
+    static void dequantize(
+        ElementT* dst,
+        const uint8_t* weights,
+        const ElementT* scales,
+        const uint8_t* zero_points,
+        int32_t rows,
+        int32_t columns,
+        MLAS_THREADPOOL* thread_pool)
+    {
+        // Thread partitioning
+        const auto thrd_row_blks = (rows + ThreadBlk::kRow - 1) / ThreadBlk::kRow;
+        const auto thrd_col_blks = (columns + ThreadBlk::kColumn - 1) / ThreadBlk::kColumn;
+        const auto total_thrd_blks = thrd_row_blks * thrd_col_blks;
+
+        const auto row_blks = (rows + QuantBlk::kRow - 1) / QuantBlk::kRow;
+
+        int q_rows, q_cols;
+        quantizedShape(rows, columns, q_rows, q_cols);
+
+        MlasTryBatchParallel(
+            thread_pool, total_thrd_blks,
+            [&](ptrdiff_t block_idx) {
+                int32_t r_blk_idx = static_cast<int32_t>(block_idx / thrd_col_blks);
+                int32_t c_blk_idx = static_cast<int32_t>(block_idx % thrd_col_blks);
+
+                int32_t r = r_blk_idx * ThreadBlk::kRow;
+                int32_t c = c_blk_idx * ThreadBlk::kColumn;
+
+                int32_t r_end = std::min(r + ThreadBlk::kRow, rows);
+                int32_t c_end = std::min(c + ThreadBlk::kColumn, columns);
+
+                for (int32_t j = c; j < c_end; ++j) {
+                    const int32_t meta_col = j / QuantBlk::kColumn;
+
+                    // !! 4b specific code
+                    // the whole loop is 4b specific due to sub 8 bit packing
+                    // and unpacking. We can potentially make this qbits generic
+                    // by wraping the packing/unpacking code like cutlass::Array
+                    for (int32_t i = r; i < r_end; i += 2) {
+                        const int32_t meta_row = i / QuantBlk::kRow;
+
+                        const float scale0 =
+                            static_cast<float>(scales[meta_col * row_blks + meta_row]);
+
+                        const int zp_pair =
+                            (zero_points == nullptr)
+                                ? 0x88
+                                : zero_points[meta_col * ((row_blks + 1) / 2) + meta_row / 2];
+                        const int zp0 = (meta_row & 1) ? (zp_pair >> 4) : (zp_pair & 0xf);
+
+                        const uint8_t vi0 = weights[j * q_rows + i / 2] & 0xf;
+                        const float v0 = (static_cast<float>(vi0) - zp0) * scale0;
+
+                        dst[j * rows + i] = static_cast<ElementT>(v0);
+                        if ((i + 1) < r_end) {
+                            float scale1 = scale0;
+                            int zp1 = zp0;
+                            if constexpr (QuantBlk::kRow == 1) {
+                                scale1 =
+                                    static_cast<float>(scales[meta_col * row_blks + meta_row + 1]);
+                                zp1 = (zp_pair >> 4) & 0xf;
+                            }
+                            const uint8_t vi1 = weights[j * q_rows + i / 2] >> 4;
+                            const float v1 = (static_cast<float>(vi1) - zp1) * scale1;
+                            dst[j * rows + (i + 1)] = static_cast<ElementT>(v1);
+                        }
+                    }
+                }
+            });
+    }
+};
+
+
+template <typename T, int qbits>
+void
+MlasBlockwiseQuantMetaShape(
+    int block_size,
+    bool columnwise,
+    int rows,
+    int columns,
+    int& meta_rows,
+    int& meta_cols
+    )
+{
+    switch (block_size) {
+        case 16: {
+            if (columnwise) {
+                BlockwiseQuantizer<T, 16, qbits, true>::quantizeMetaShape(rows, columns, meta_rows, meta_cols);
+            } else {
+                BlockwiseQuantizer<T, 16, qbits, false>::quantizeMetaShape(rows, columns, meta_rows, meta_cols);
+            }
+            break;
+        }
+        case 32: {
+            if (columnwise) {
+                BlockwiseQuantizer<T, 32, qbits, true>::quantizeMetaShape(rows, columns, meta_rows, meta_cols);
+            } else {
+                BlockwiseQuantizer<T, 32, qbits, false>::quantizeMetaShape(
+                                    rows, columns, meta_rows, meta_cols);
+            }
+            break;
+        }
+        case 64: {
+            if (columnwise) {
+                BlockwiseQuantizer<T, 64, qbits, true>::quantizeMetaShape(rows, columns, meta_rows,
+                                                                      meta_cols);
+            } else {
+                BlockwiseQuantizer<T, 64, qbits, false>::quantizeMetaShape(rows, columns, meta_rows,
+                                                                       meta_cols);
+            }
+            break;
+        }
+        case 128: {
+            if (columnwise) {
+                BlockwiseQuantizer<T, 128, qbits, true>::quantizeMetaShape(rows, columns, meta_rows,
+                                                                      meta_cols);
+            } else {
+                BlockwiseQuantizer<T, 128, qbits, false>::quantizeMetaShape(rows, columns, meta_rows,
+                                                                       meta_cols);
+            }
+            break;
+        }
+        case 256: {
+            if (columnwise) {
+                BlockwiseQuantizer<T, 256, qbits, true>::quantizeMetaShape(rows, columns, meta_rows,
+                                                                      meta_cols);
+            } else {
+                BlockwiseQuantizer<T, 256, qbits, false>::quantizeMetaShape(rows, columns, meta_rows,
+                                                                       meta_cols);
+            }
+            break;
+        }
+        default:
+            meta_rows = 0;
+            meta_cols = 0;
+            break;
+    }
+}
+
+
+
+template <typename T, int qbits>
+void
+MlasBlockwiseQuantizedShape(
+    int block_size,
+    bool columnwise,
+    int rows,
+    int columns,
+    int& q_rows,
+    int& q_cols
+    )
+{
+    switch (block_size) {
+        case 16: {
+            if (columnwise) {
+                BlockwiseQuantizer<T, 16, qbits, true>::quantizedShape(rows, columns, q_rows, q_cols);
+            } else {
+                BlockwiseQuantizer<T, 16, qbits, false>::quantizedShape(rows, columns, q_rows, q_cols);
+            }
+            break;
+        }
+        case 32: {
+            if (columnwise) {
+                BlockwiseQuantizer<T, 32, qbits, true>::quantizedShape(rows, columns, q_rows, q_cols);
+            } else {
+                BlockwiseQuantizer<T, 32, qbits, false>::quantizedShape(
+                                    rows, columns, q_rows, q_cols);
+            }
+            break;
+        }
+        case 64: {
+            if (columnwise) {
+                BlockwiseQuantizer<T, 64, qbits, true>::quantizedShape(rows, columns, q_rows, q_cols);
+            } else {
+                BlockwiseQuantizer<T, 64, qbits, false>::quantizedShape(rows, columns, q_rows, q_cols);
+            }
+            break;
+        }
+        case 128: {
+            if (columnwise) {
+                BlockwiseQuantizer<T, 128, qbits, true>::quantizedShape(rows, columns, q_rows, q_cols);
+            } else {
+                BlockwiseQuantizer<T, 128, qbits, false>::quantizedShape(rows, columns, q_rows, q_cols);
+            }
+            break;
+        }
+        case 256: {
+            if (columnwise) {
+                BlockwiseQuantizer<T, 256, qbits, true>::quantizedShape(rows, columns, q_rows, q_cols);
+            } else {
+                BlockwiseQuantizer<T, 256, qbits, false>::quantizedShape(rows, columns, q_rows, q_cols);
+            }
+            break;
+        }
+        default:
+            q_rows = 0;
+            q_cols = 0;
+            break;
+    }
+}
+
+
+template
+void
+MlasBlockwiseQuantMetaShape<float, 4>(
+    int block_size,
+    bool columnwise,
+    int rows,
+    int columns,
+    int& meta_rows,
+    int& meta_cols
+    );
+
+template
+void
+MlasBlockwiseQuantMetaShape<MLAS_FP16, 4>(
+    int block_size,
+    bool columnwise,
+    int rows,
+    int columns,
+    int& meta_rows,
+    int& meta_cols
+    );
+
+template
+void
+MlasBlockwiseQuantizedShape<float, 4>(
+    int block_size,
+    bool columnwise,
+    int rows,
+    int columns,
+    int& q_rows,
+    int& q_cols
+    );
+
+template
+void
+MlasBlockwiseQuantizedShape<MLAS_FP16, 4>(
+    int block_size,
+    bool columnwise,
+    int rows,
+    int columns,
+    int& q_rows,
+    int& q_cols
+    );
+
+void MLASCALL
+MlasBlockwiseQuantizedBufferSizes(
+    int qbits,
+    int block_size,
+    bool columnwise,
+    int rows,
+    int columns,
+    size_t& q_data_size_in_bytes,
+    size_t& q_scale_num_elements,
+    size_t* q_zero_point_size_in_bytes
+)
+{
+    q_data_size_in_bytes = q_scale_num_elements = 0;
+    if (q_zero_point_size_in_bytes) {
+        *q_zero_point_size_in_bytes = 0;
+    }
+
+    if (qbits == 4) {
+        switch (block_size) {
+            case 16:
+                if (columnwise) {
+                    BlockwiseQuantizer<float, 16, 4, true>::quantizedBufferSizes(
+                        rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
+                    );
+                } else {
+                    BlockwiseQuantizer<float, 16, 4, false>::quantizedBufferSizes(
+                        rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
+                    );
+                }
+                break;
+
+            case 32:
+                if (columnwise) {
+                    BlockwiseQuantizer<float, 32, 4, true>::quantizedBufferSizes(
+                        rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
+                    );
+                } else {
+                    BlockwiseQuantizer<float, 32, 4, false>::quantizedBufferSizes(
+                        rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
+                    );
+                }
+                break;
+
+            case 64:
+                if (columnwise) {
+                    BlockwiseQuantizer<float, 64, 4, true>::quantizedBufferSizes(
+                        rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
+                    );
+                } else {
+                    BlockwiseQuantizer<float, 64, 4, false>::quantizedBufferSizes(
+                        rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
+                    );
+                }
+                break;
+
+            case 128:
+                if (columnwise) {
+                    BlockwiseQuantizer<float, 128, 4, true>::quantizedBufferSizes(
+                        rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
+                    );
+                } else {
+                    BlockwiseQuantizer<float, 128, 4, false>::quantizedBufferSizes(
+                        rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
+                    );
+                }
+                break;
+
+            case 256:
+                if (columnwise) {
+                    BlockwiseQuantizer<float, 256, 4, true>::quantizedBufferSizes(
+                        rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
+                    );
+                } else {
+                    BlockwiseQuantizer<float, 256, 4, false>::quantizedBufferSizes(
+                        rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
+                    );
+                }
+                break;
+
+            default:
+                // Only block size 16, 32, 64, 128, 256 are supported.
+                break;
+        }
+    }
+}
+
+
+template <typename T, int qbits>
+void
+MlasQuantizeBlockwise(
+    uint8_t* dst,
+    T* scales,
+    uint8_t* zero_points,
+    const T* src,
+    int block_size,
+    bool columnwise,
+    int rows,
+    int columns,
+    int leading_dimension,
+    MLAS_THREADPOOL* thread_pool
+    )
+{
+    switch (block_size) {
+        case 16:
+            if (columnwise) {
+                BlockwiseQuantizer<T, 16, qbits, true>::quantizeAndTranspose(
+                    dst, scales, zero_points, src, rows, columns, leading_dimension, thread_pool);
+            } else {
+                BlockwiseQuantizer<T, 16, qbits, false>::quantizeAndTranspose(
+                    dst, scales, zero_points, src, rows, columns, leading_dimension, thread_pool);
+            }
+            break;
+
+        case 32:
+            if (columnwise) {
+                BlockwiseQuantizer<T, 32, qbits, true>::quantizeAndTranspose(
+                    dst, scales, zero_points, src, rows, columns, leading_dimension, thread_pool);
+            } else {
+                BlockwiseQuantizer<T, 32, qbits, false>::quantizeAndTranspose(
+                    dst, scales, zero_points, src, rows, columns, leading_dimension, thread_pool);
+            }
+            break;
+
+        case 64:
+            if (columnwise) {
+                BlockwiseQuantizer<T, 64, qbits, true>::quantizeAndTranspose(
+                    dst, scales, zero_points, src, rows, columns, leading_dimension, thread_pool);
+            } else {
+                BlockwiseQuantizer<T, 64, qbits, false>::quantizeAndTranspose(
+                    dst, scales, zero_points, src, rows, columns, leading_dimension, thread_pool);
+            }
+            break;
+
+        case 128:
+            if (columnwise) {
+                BlockwiseQuantizer<T, 128, qbits, true>::quantizeAndTranspose(
+                    dst, scales, zero_points, src, rows, columns, leading_dimension, thread_pool);
+            } else {
+                BlockwiseQuantizer<T, 128, qbits, false>::quantizeAndTranspose(
+                    dst, scales, zero_points, src, rows, columns, leading_dimension, thread_pool);
+            }
+            break;
+
+        case 256:
+            if (columnwise) {
+                BlockwiseQuantizer<T, 256, qbits, true>::quantizeAndTranspose(
+                    dst, scales, zero_points, src, rows, columns, leading_dimension, thread_pool);
+            } else {
+                BlockwiseQuantizer<T, 256, qbits, false>::quantizeAndTranspose(
+                    dst, scales, zero_points, src, rows, columns, leading_dimension, thread_pool);
+            }
+            break;
+
+        default:
+            // Only block size 16, 32, 64, 128, 256 are supported.
+            break;
+    }
+}
+
+template
+void
+MlasQuantizeBlockwise<float, 4>(
+    uint8_t* dst,
+    float* scales,
+    uint8_t* zero_points,
+    const float* src,
+    int block_size,
+    bool columnwise,
+    int rows,
+    int columns,
+    int leading_dimension,
+    MLAS_THREADPOOL* thread_pool
+    );
+
+template
+void
+MlasQuantizeBlockwise<MLAS_FP16, 4>(
+    uint8_t* dst,
+    MLAS_FP16* scales,
+    uint8_t* zero_points,
+    const MLAS_FP16* src,
+    int block_size,
+    bool columnwise,
+    int rows,
+    int columns,
+    int leading_dimension,
+    MLAS_THREADPOOL* thread_pool
+    );
+
+
+template <typename T, int qbits>
+void
+MlasDequantizeBlockwise(
+    T* dst,
+    const uint8_t* src,
+    const T* scales,
+    const uint8_t* zero_points,
+    int block_size,
+    bool columnwise,
+    int rows,
+    int columns,
+    MLAS_THREADPOOL* thread_pool
+    )
+{
+    switch (block_size) {
+        case 16:
+            if (columnwise) {
+                BlockwiseQuantizer<T, 16, qbits, true>::dequantize(dst, src, scales, zero_points, rows,
+                                                               columns, thread_pool);
+            } else {
+                BlockwiseQuantizer<T, 16, qbits, false>::dequantize(dst, src, scales, zero_points, rows,
+                                                                columns, thread_pool);
+            }
+            break;
+        case 32:
+            if (columnwise) {
+                BlockwiseQuantizer<T, 32, qbits, true>::dequantize(dst, src, scales, zero_points, rows,
+                                                               columns, thread_pool);
+            } else {
+                BlockwiseQuantizer<T, 32, qbits, false>::dequantize(dst, src, scales, zero_points, rows,
+                                                                columns, thread_pool);
+            }
+            break;
+        case 64:
+            if (columnwise) {
+                BlockwiseQuantizer<T, 64, qbits, true>::dequantize(dst, src, scales, zero_points, rows,
+                                                               columns, thread_pool);
+            } else {
+                BlockwiseQuantizer<T, 64, qbits, false>::dequantize(dst, src, scales, zero_points, rows,
+                                                                columns, thread_pool);
+            }
+            break;
+        case 128:
+            if (columnwise) {
+                BlockwiseQuantizer<T, 128, qbits, true>::dequantize(dst, src, scales, zero_points, rows,
+                                                                columns, thread_pool);
+            } else {
+                BlockwiseQuantizer<T, 128, qbits, false>::dequantize(dst, src, scales, zero_points,
+                                                                 rows, columns, thread_pool);
+            }
+            break;
+        case 256:
+            if (columnwise) {
+                BlockwiseQuantizer<T, 256, qbits, true>::dequantize(dst, src, scales, zero_points, rows,
+                                                                columns, thread_pool);
+            } else {
+                BlockwiseQuantizer<T, 256, qbits, false>::dequantize(dst, src, scales, zero_points,
+                                                                 rows, columns, thread_pool);
+            }
+            break;
+        default:
+            // Only block size 16, 32, 64, 128, 256 are supported.
+            break;
+    }
+}
+
+template
+void
+MlasDequantizeBlockwise<float, 4>(
+    float* dst,
+    const uint8_t* src,
+    const float* scales,
+    const uint8_t* zero_points,
+    int block_size,
+    bool columnwise,
+    int rows,
+    int columns,
+    MLAS_THREADPOOL* thread_pool
+    );
diff --git a/onnxruntime/core/mlas/lib/q4gemm.h b/onnxruntime/core/mlas/lib/q4gemm.h
index 1562f9c0b4236..b1b51dd53c4fc 100644
--- a/onnxruntime/core/mlas/lib/q4gemm.h
+++ b/onnxruntime/core/mlas/lib/q4gemm.h
@@ -90,7 +90,7 @@ MlasQ4GemmOperation(
 
                 if (DataParams->OutputProcessor != nullptr) {
                     DataParams->OutputProcessor->Process(
-                        DataParams->C, RangeStartM + RangeCountM - RowsRemaining, RangeStartN,
+                        DataParams->C, RangeStartM + RangeCountM - RowsRemaining, RangeStartN + n,
                         RowsHandled, CountN, ldc);
                 }
 
diff --git a/onnxruntime/core/mlas/lib/qgemm_kernel_smmla.cpp b/onnxruntime/core/mlas/lib/qgemm_kernel_smmla.cpp
new file mode 100644
index 0000000000000..c41f43ca22d18
--- /dev/null
+++ b/onnxruntime/core/mlas/lib/qgemm_kernel_smmla.cpp
@@ -0,0 +1,964 @@
+/*++
+
+Copyright (c) Microsoft Corporation. All rights reserved.
+Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+
+Licensed under the MIT License.
+
+Module Name:
+
+    qgemm_kernel_smmla.cpp
+
+Abstract:
+
+    This module implements smmla QGEMM kernel.
+
+--*/
+
+#include "mlasi.h"
+#include "qgemm.h"
+
+//
+// Define the prototypes of the NEON SMMLA routines written in assembly.
+//
+
+extern "C" {
+
+size_t MLASCALL
+MlasGemmS8S8KernelSmmlaZero(const uint8_t* A,
+                            const uint8_t* B,
+                            int32_t* C,
+                            size_t PackedCountK,
+                            size_t CountM,
+                            size_t CountN,
+                            size_t ldc,
+                            const int32_t* RowSumVector,
+                            const int32_t* ColumnSumVector,
+                            const int32_t* ZeroPointB);
+
+size_t MLASCALL
+MlasGemmS8S8KernelSmmlaAdd(const uint8_t* A,
+                           const uint8_t* B,
+                           int32_t* C,
+                           size_t PackedCountK,
+                           size_t CountM,
+                           size_t CountN,
+                           size_t ldc,
+                           const int32_t* RowSumVector,
+                           const int32_t* ColumnSumVector,
+                           const int32_t* ZeroPointB);
+}
+
+struct MLAS_GEMM_S8S8_KERNEL_SMMLA {
+    typedef uint8_t PackedAType;
+    typedef uint8_t PackedBType;
+    typedef int8_t OffsetAType;
+    typedef int8_t OffsetBType;
+
+    static constexpr size_t PackedK = 8;
+    static constexpr MLAS_GEMM_QUANT_STRIDES Strides{24, 128, 256};
+    static constexpr MLAS_GEMM_QUANT_STRIDES PackedStrides{24, 128, 384};
+};
+
+constexpr size_t MLAS_GEMM_S8S8_KERNEL_SMMLA::PackedK;
+constexpr MLAS_GEMM_QUANT_STRIDES MLAS_GEMM_S8S8_KERNEL_SMMLA::Strides;
+constexpr MLAS_GEMM_QUANT_STRIDES MLAS_GEMM_S8S8_KERNEL_SMMLA::PackedStrides;
+
+template <>
+MLAS_FORCEINLINE int32_t
+MlasGemmQuantFixupZeroPointB<MLAS_GEMM_S8S8_KERNEL_SMMLA>(int32_t ZeroPointB, bool BIsSigned)
+{
+    MLAS_UNREFERENCED_PARAMETER(BIsSigned);
+    return ZeroPointB;
+}
+
+template <>
+void
+MlasGemmQuantCopyPackA<MLAS_GEMM_S8S8_KERNEL_SMMLA>(
+    MLAS_GEMM_S8S8_KERNEL_SMMLA::PackedAType* D_uint8_t,
+    const uint8_t* A,
+    size_t lda,
+    size_t CountM,
+    size_t CountK,
+    int32_t* RowSumBuffer,
+    bool AIsSigned)
+{
+    int8_t* D = reinterpret_cast<int8_t*>(D_uint8_t);
+    MLAS_UNREFERENCED_PARAMETER(AIsSigned);
+    int8_t PaddedMatrixAData[64];
+
+    //
+    // Process 8 rows of matrix A.
+    //
+    // MMLA kernels load 8x8 block of A with four vector registers. So A is packed
+    // a series of 64 byte vectors where eight rows are interleaved with the
+    // following pattern:
+    //
+    //      [ A0 A1 A2 A3 A4 A5 A6 A7 ]
+    //      [ B0 B1 B2 B3 B4 B5 B6 B7 ]
+    //      [ C0 C1 C2 C3 C4 C5 C6 C7 ]
+    //      [ D0 D1 D2 D3 D4 D5 D6 D7 ]
+    //      [ E0 E1 E2 E3 E4 E5 E6 E7 ]
+    //      [ F0 F1 F2 F3 F4 F5 F6 F7 ]
+    //      [ G0 G1 G2 G3 G4 G5 G6 G7 ]
+    //      [ H0 H1 H2 H3 H4 H5 H6 H7 ]
+    //
+    //      ...
+    //
+    // This pattern is repeated (CountK / 8) times.
+    //
+    // If CountK is not aligned to a multiple of eight, then the vector is padded
+    // with zeroes.
+    //
+
+    while (CountM >= 8) {
+        const int8_t* a0 = reinterpret_cast<const int8_t*>(A);
+        const int8_t* a1 = a0 + lda;
+        const int8_t* a2 = a0 + lda * 2;
+        const int8_t* a3 = a0 + lda * 3;
+        const int8_t* a4 = a0 + lda * 4;
+        const int8_t* a5 = a0 + lda * 5;
+        const int8_t* a6 = a0 + lda * 6;
+        const int8_t* a7 = a0 + lda * 7;
+
+        size_t k = CountK;
+        int32x4_t RowSums0 = vmovq_n_s32(0);
+        int32x4_t RowSums1 = vmovq_n_s32(0);
+
+        while (k >= 16) {
+            int64x2_t v0 = vld1q_s64(reinterpret_cast<const int64_t*>(a0));
+            a0 += 16;
+            int64x2_t v1 = vld1q_s64(reinterpret_cast<const int64_t*>(a1));
+            a1 += 16;
+            int64x2_t v2 = vld1q_s64(reinterpret_cast<const int64_t*>(a2));
+            a2 += 16;
+            int64x2_t v3 = vld1q_s64(reinterpret_cast<const int64_t*>(a3));
+            a3 += 16;
+            int64x2_t v4 = vld1q_s64(reinterpret_cast<const int64_t*>(a4));
+            a4 += 16;
+            int64x2_t v5 = vld1q_s64(reinterpret_cast<const int64_t*>(a5));
+            a5 += 16;
+            int64x2_t v6 = vld1q_s64(reinterpret_cast<const int64_t*>(a6));
+            a6 += 16;
+            int64x2_t v7 = vld1q_s64(reinterpret_cast<const int64_t*>(a7));
+            a7 += 16;
+
+            int64x2_t z0 = vzip1q_s64(v0, v1);
+            int64x2_t z1 = vzip2q_s64(v0, v1);
+            int64x2_t z2 = vzip1q_s64(v2, v3);
+            int64x2_t z3 = vzip2q_s64(v2, v3);
+
+            int64x2_t z4 = vzip1q_s64(v4, v5);
+            int64x2_t z5 = vzip2q_s64(v4, v5);
+            int64x2_t z6 = vzip1q_s64(v6, v7);
+            int64x2_t z7 = vzip2q_s64(v6, v7);
+
+            vst1q_s8(&D[0], vreinterpretq_s8_s64(z0));
+            vst1q_s8(&D[16], vreinterpretq_s8_s64(z2));
+            vst1q_s8(&D[32], vreinterpretq_s8_s64(z4));
+            vst1q_s8(&D[48], vreinterpretq_s8_s64(z6));
+            vst1q_s8(&D[64], vreinterpretq_s8_s64(z1));
+            vst1q_s8(&D[80], vreinterpretq_s8_s64(z3));
+            vst1q_s8(&D[96], vreinterpretq_s8_s64(z5));
+            vst1q_s8(&D[112], vreinterpretq_s8_s64(z7));
+
+            int32x4_t RowSums0L_pada = vmovq_n_s32(0);
+            RowSums0L_pada = vpadalq_s16(RowSums0L_pada, vpaddlq_s8(vreinterpretq_s8_s64(z0)));
+            RowSums0L_pada = vpadalq_s16(RowSums0L_pada, vpaddlq_s8(vreinterpretq_s8_s64(z1)));
+
+            int32x4_t RowSums0L_ext = vextq_s32(RowSums0L_pada, RowSums0L_pada, 1);
+            int32x4_t RowSums0L_add = vaddq_s32(RowSums0L_pada, RowSums0L_ext);
+            int32x2_t RowSums0L = {vdups_laneq_s32(RowSums0L_add, 0),
+                                   vdups_laneq_s32(RowSums0L_add, 2)};
+
+            int32x4_t RowSums0H_pada = vmovq_n_s32(0);
+            RowSums0H_pada = vpadalq_s16(RowSums0H_pada, vpaddlq_s8(vreinterpretq_s8_s64(z2)));
+            RowSums0H_pada = vpadalq_s16(RowSums0H_pada, vpaddlq_s8(vreinterpretq_s8_s64(z3)));
+
+            int32x4_t RowSums0H_ext = vextq_s32(RowSums0H_pada, RowSums0H_pada, 1);
+            int32x4_t RowSums0H_add = vaddq_s32(RowSums0H_pada, RowSums0H_ext);
+            int32x2_t RowSums0H = {vdups_laneq_s32(RowSums0H_add, 0),
+                                   vdups_laneq_s32(RowSums0H_add, 2)};
+
+            RowSums0 = vaddq_s32(RowSums0, vcombine_s32(RowSums0L, RowSums0H));
+
+            int32x4_t RowSums1L_pada = vmovq_n_s32(0);
+            RowSums1L_pada = vpadalq_s16(RowSums1L_pada, vpaddlq_s8(vreinterpretq_s8_s64(z4)));
+            RowSums1L_pada = vpadalq_s16(RowSums1L_pada, vpaddlq_s8(vreinterpretq_s8_s64(z5)));
+
+            int32x4_t RowSums1L_ext = vextq_s32(RowSums1L_pada, RowSums1L_pada, 1);
+            int32x4_t RowSums1L_add = vaddq_s32(RowSums1L_pada, RowSums1L_ext);
+            int32x2_t RowSums1L = {vdups_laneq_s32(RowSums1L_add, 0),
+                                   vdups_laneq_s32(RowSums1L_add, 2)};
+
+            int32x4_t RowSums1H_pada = vmovq_n_s32(0);
+            RowSums1H_pada = vpadalq_s16(RowSums1H_pada, vpaddlq_s8(vreinterpretq_s8_s64(z6)));
+            RowSums1H_pada = vpadalq_s16(RowSums1H_pada, vpaddlq_s8(vreinterpretq_s8_s64(z7)));
+
+            int32x4_t RowSums1H_ext = vextq_s32(RowSums1H_pada, RowSums1H_pada, 1);
+            int32x4_t RowSums1H_add = vaddq_s32(RowSums1H_pada, RowSums1H_ext);
+            int32x2_t RowSums1H = {vdups_laneq_s32(RowSums1H_add, 0),
+                                   vdups_laneq_s32(RowSums1H_add, 2)};
+
+            RowSums1 = vaddq_s32(RowSums1, vcombine_s32(RowSums1L, RowSums1H));
+
+            D += 128;
+            k -= 16;
+        }
+
+        while (k >= 8) {
+            int64x1_t v0 = *reinterpret_cast<const int64x1_t*>(a0);
+            a0 += 8;
+            int64x1_t v1 = *reinterpret_cast<const int64x1_t*>(a1);
+            a1 += 8;
+            int64x1_t v2 = *reinterpret_cast<const int64x1_t*>(a2);
+            a2 += 8;
+            int64x1_t v3 = *reinterpret_cast<const int64x1_t*>(a3);
+            a3 += 8;
+            int64x1_t v4 = *reinterpret_cast<const int64x1_t*>(a4);
+            a4 += 8;
+            int64x1_t v5 = *reinterpret_cast<const int64x1_t*>(a5);
+            a5 += 8;
+            int64x1_t v6 = *reinterpret_cast<const int64x1_t*>(a6);
+            a6 += 8;
+            int64x1_t v7 = *reinterpret_cast<const int64x1_t*>(a7);
+            a7 += 8;
+
+            *reinterpret_cast<int64x1_t*>(&D[0]) = v0;
+            *reinterpret_cast<int64x1_t*>(&D[8]) = v1;
+            *reinterpret_cast<int64x1_t*>(&D[16]) = v2;
+            *reinterpret_cast<int64x1_t*>(&D[24]) = v3;
+            *reinterpret_cast<int64x1_t*>(&D[32]) = v4;
+            *reinterpret_cast<int64x1_t*>(&D[40]) = v5;
+            *reinterpret_cast<int64x1_t*>(&D[48]) = v6;
+            *reinterpret_cast<int64x1_t*>(&D[56]) = v7;
+
+            int64x2_t z01 = vcombine_s64(v0, v1);
+            int64x2_t z23 = vcombine_s64(v2, v3);
+            int64x2_t z45 = vcombine_s64(v4, v5);
+            int64x2_t z67 = vcombine_s64(v6, v7);
+
+            int32x4_t RowSums0L_pada = vmovq_n_s32(0);
+            RowSums0L_pada = vpadalq_s16(RowSums0L_pada, vpaddlq_s8(vreinterpretq_s8_s64(z01)));
+
+            int32x4_t RowSums0L_ext = vextq_s32(RowSums0L_pada, RowSums0L_pada, 1);
+            int32x4_t RowSums0L_add = vaddq_s32(RowSums0L_pada, RowSums0L_ext);
+            int32x2_t RowSums0L = {vdups_laneq_s32(RowSums0L_add, 0),
+                                   vdups_laneq_s32(RowSums0L_add, 2)};
+
+            int32x4_t RowSums0H_pada = vmovq_n_s32(0);
+            RowSums0H_pada = vpadalq_s16(RowSums0H_pada, vpaddlq_s8(vreinterpretq_s8_s64(z23)));
+
+            int32x4_t RowSums0H_ext = vextq_s32(RowSums0H_pada, RowSums0H_pada, 1);
+            int32x4_t RowSums0H_add = vaddq_s32(RowSums0H_pada, RowSums0H_ext);
+            int32x2_t RowSums0H = {vdups_laneq_s32(RowSums0H_add, 0),
+                                   vdups_laneq_s32(RowSums0H_add, 2)};
+
+            RowSums0 = vaddq_s32(RowSums0, vcombine_s32(RowSums0L, RowSums0H));
+
+            int32x4_t RowSums1L_pada = vmovq_n_s32(0);
+            RowSums1L_pada = vpadalq_s16(RowSums1L_pada, vpaddlq_s8(vreinterpretq_s8_s64(z45)));
+
+            int32x4_t RowSums1L_ext = vextq_s32(RowSums1L_pada, RowSums1L_pada, 1);
+            int32x4_t RowSums1L_add = vaddq_s32(RowSums1L_pada, RowSums1L_ext);
+            int32x2_t RowSums1L = {vdups_laneq_s32(RowSums1L_add, 0),
+                                   vdups_laneq_s32(RowSums1L_add, 2)};
+
+            int32x4_t RowSums1H_pada = vmovq_n_s32(0);
+            RowSums1H_pada = vpadalq_s16(RowSums1H_pada, vpaddlq_s8(vreinterpretq_s8_s64(z67)));
+
+            int32x4_t RowSums1H_ext = vextq_s32(RowSums1H_pada, RowSums1H_pada, 1);
+            int32x4_t RowSums1H_add = vaddq_s32(RowSums1H_pada, RowSums1H_ext);
+            int32x2_t RowSums1H = {vdups_laneq_s32(RowSums1H_add, 0),
+                                   vdups_laneq_s32(RowSums1H_add, 2)};
+
+            RowSums1 = vaddq_s32(RowSums1, vcombine_s32(RowSums1L, RowSums1H));
+
+            D += 64;
+            k -= 8;
+        }
+
+        if (k > 0) {
+            //
+            // zero pad the remaining columns to 8
+            //
+            int8_t* d = D;
+
+            vst1q_s8(d, vmovq_n_s8(0));
+            vst1q_s8(&d[16], vmovq_n_s8(0));
+            vst1q_s8(&d[32], vmovq_n_s8(0));
+            vst1q_s8(&d[48], vmovq_n_s8(0));
+
+            while (k > 0) {
+                d[0] = *a0++;
+                d[8] = *a1++;
+                d[16] = *a2++;
+                d[24] = *a3++;
+                d[32] = *a4++;
+                d[40] = *a5++;
+                d[48] = *a6++;
+                d[56] = *a7++;
+                d += 1;
+                k -= 1;
+            }
+            d = D;
+            int64x1_t v0 = *reinterpret_cast<const int64x1_t*>(d);
+            d = d + 8;
+            int64x1_t v1 = *reinterpret_cast<const int64x1_t*>(d);
+            d = d + 8;
+            int64x1_t v2 = *reinterpret_cast<const int64x1_t*>(d);
+            d = d + 8;
+            int64x1_t v3 = *reinterpret_cast<const int64x1_t*>(d);
+            d = d + 8;
+            int64x1_t v4 = *reinterpret_cast<const int64x1_t*>(d);
+            d = d + 8;
+            int64x1_t v5 = *reinterpret_cast<const int64x1_t*>(d);
+            d = d + 8;
+            int64x1_t v6 = *reinterpret_cast<const int64x1_t*>(d);
+            d = d + 8;
+            int64x1_t v7 = *reinterpret_cast<const int64x1_t*>(d);
+            d = d + 8;
+
+            int64x2_t z01 = vcombine_s64(v0, v1);
+            int64x2_t z23 = vcombine_s64(v2, v3);
+            int64x2_t z45 = vcombine_s64(v4, v5);
+            int64x2_t z67 = vcombine_s64(v6, v7);
+
+            int32x4_t RowSums0L_pada = vmovq_n_s32(0);
+            RowSums0L_pada = vpadalq_s16(RowSums0L_pada, vpaddlq_s8(vreinterpretq_s8_s64(z01)));
+
+            int32x4_t RowSums0L_ext = vextq_s32(RowSums0L_pada, RowSums0L_pada, 1);
+            int32x4_t RowSums0L_add = vaddq_s32(RowSums0L_pada, RowSums0L_ext);
+            int32x2_t RowSums0L = {vdups_laneq_s32(RowSums0L_add, 0),
+                                   vdups_laneq_s32(RowSums0L_add, 2)};
+
+            int32x4_t RowSums0H_pada = vmovq_n_s32(0);
+            RowSums0H_pada = vpadalq_s16(RowSums0H_pada, vpaddlq_s8(vreinterpretq_s8_s64(z23)));
+
+            int32x4_t RowSums0H_ext = vextq_s32(RowSums0H_pada, RowSums0H_pada, 1);
+            int32x4_t RowSums0H_add = vaddq_s32(RowSums0H_pada, RowSums0H_ext);
+            int32x2_t RowSums0H = {vdups_laneq_s32(RowSums0H_add, 0),
+                                   vdups_laneq_s32(RowSums0H_add, 2)};
+
+            RowSums0 = vaddq_s32(RowSums0, vcombine_s32(RowSums0L, RowSums0H));
+
+            int32x4_t RowSums1L_pada = vmovq_n_s32(0);
+            RowSums1L_pada = vpadalq_s16(RowSums1L_pada, vpaddlq_s8(vreinterpretq_s8_s64(z45)));
+
+            int32x4_t RowSums1L_ext = vextq_s32(RowSums1L_pada, RowSums1L_pada, 1);
+            int32x4_t RowSums1L_add = vaddq_s32(RowSums1L_pada, RowSums1L_ext);
+            int32x2_t RowSums1L = {vdups_laneq_s32(RowSums1L_add, 0),
+                                   vdups_laneq_s32(RowSums1L_add, 2)};
+
+            int32x4_t RowSums1H_pada = vmovq_n_s32(0);
+            RowSums1H_pada = vpadalq_s16(RowSums1H_pada, vpaddlq_s8(vreinterpretq_s8_s64(z67)));
+
+            int32x4_t RowSums1H_ext = vextq_s32(RowSums1H_pada, RowSums1H_pada, 1);
+            int32x4_t RowSums1H_add = vaddq_s32(RowSums1H_pada, RowSums1H_ext);
+            int32x2_t RowSums1H = {vdups_laneq_s32(RowSums1H_add, 0),
+                                   vdups_laneq_s32(RowSums1H_add, 2)};
+
+            RowSums1 = vaddq_s32(RowSums1, vcombine_s32(RowSums1L, RowSums1H));
+
+            D += 64;
+        }
+
+        vst1q_s32(RowSumBuffer, RowSums0);
+        vst1q_s32(&RowSumBuffer[4], RowSums1);
+
+        RowSumBuffer += 8;
+
+        A = A + lda * 8;
+        CountM -= 8;
+    }
+
+    //
+    // Process four rows of matrix A.
+    //
+    // The buffer is packed as a series of 32 byte vectors where four rows are
+    // interleaved with the following pattern:
+    //
+    //      [ A0 A1 A2 A3 A4 A5 A6 A7 ]
+    //      [ B0 B1 B2 B3 B4 B5 B6 B7 ]
+    //      [ C0 C1 C2 C3 C4 C5 C6 C7 ]
+    //      [ D0 D1 D2 D3 D4 D5 D6 D7 ]
+    //
+    // This pattern is repeated (CountK / 8) times.
+    //
+    // If CountK is not aligned to a multiple of eight, then the vector is padded
+    // with zeroes.
+    //
+
+    if (CountM >= 4) {
+        const int8_t* a0 = reinterpret_cast<const int8_t*>(A);
+        const int8_t* a1 = a0 + lda;
+        const int8_t* a2 = a1 + lda;
+        const int8_t* a3 = a2 + lda;
+
+        size_t k = CountK;
+        int32x4_t RowSums = vmovq_n_s32(0);
+
+        while (k >= 16) {
+            int64x2_t v0 = vld1q_s64(reinterpret_cast<const int64_t*>(a0));
+            a0 += 16;
+            int64x2_t v1 = vld1q_s64(reinterpret_cast<const int64_t*>(a1));
+            a1 += 16;
+            int64x2_t v2 = vld1q_s64(reinterpret_cast<const int64_t*>(a2));
+            a2 += 16;
+            int64x2_t v3 = vld1q_s64(reinterpret_cast<const int64_t*>(a3));
+            a3 += 16;
+
+            int64x2_t z0 = vzip1q_s64(v0, v1);
+            int64x2_t z1 = vzip2q_s64(v0, v1);
+            int64x2_t z2 = vzip1q_s64(v2, v3);
+            int64x2_t z3 = vzip2q_s64(v2, v3);
+
+            vst1q_s8(&D[0], vreinterpretq_s8_s64(z0));
+            vst1q_s8(&D[16], vreinterpretq_s8_s64(z2));
+            vst1q_s8(&D[32], vreinterpretq_s8_s64(z1));
+            vst1q_s8(&D[48], vreinterpretq_s8_s64(z3));
+
+            int32x4_t RowSumsL_pada = vmovq_n_s32(0);
+            RowSumsL_pada = vpadalq_s16(RowSumsL_pada, vpaddlq_s8(vreinterpretq_s8_s64(z0)));
+            RowSumsL_pada = vpadalq_s16(RowSumsL_pada, vpaddlq_s8(vreinterpretq_s8_s64(z1)));
+
+            int32x4_t RowSumsL_ext = vextq_s32(RowSumsL_pada, RowSumsL_pada, 1);
+            int32x4_t RowSumsL_add = vaddq_s32(RowSumsL_pada, RowSumsL_ext);
+            int32x2_t RowSumsL = {vdups_laneq_s32(RowSumsL_add, 0),
+                                  vdups_laneq_s32(RowSumsL_add, 2)};
+
+            int32x4_t RowSumsH_pada = vmovq_n_s32(0);
+            RowSumsH_pada = vpadalq_s16(RowSumsH_pada, vpaddlq_s8(vreinterpretq_s8_s64(z2)));
+            RowSumsH_pada = vpadalq_s16(RowSumsH_pada, vpaddlq_s8(vreinterpretq_s8_s64(z3)));
+
+            int32x4_t RowSumsH_ext = vextq_s32(RowSumsH_pada, RowSumsH_pada, 1);
+            int32x4_t RowSumsH_add = vaddq_s32(RowSumsH_pada, RowSumsH_ext);
+            int32x2_t RowSumsH = {vdups_laneq_s32(RowSumsH_add, 0),
+                                  vdups_laneq_s32(RowSumsH_add, 2)};
+
+            RowSums = vaddq_s32(RowSums, vcombine_s32(RowSumsL, RowSumsH));
+
+            D += 64;
+            k -= 16;
+        }
+
+        while (k >= 8) {
+            int64x1_t v0 = *reinterpret_cast<const int64x1_t*>(a0);
+            a0 += 8;
+            int64x1_t v1 = *reinterpret_cast<const int64x1_t*>(a1);
+            a1 += 8;
+            int64x1_t v2 = *reinterpret_cast<const int64x1_t*>(a2);
+            a2 += 8;
+            int64x1_t v3 = *reinterpret_cast<const int64x1_t*>(a3);
+            a3 += 8;
+
+            *reinterpret_cast<int64x1_t*>(&D[0]) = v0;
+            *reinterpret_cast<int64x1_t*>(&D[8]) = v1;
+            *reinterpret_cast<int64x1_t*>(&D[16]) = v2;
+            *reinterpret_cast<int64x1_t*>(&D[24]) = v3;
+
+            int64x2_t z01 = vcombine_s64(v0, v1);
+            int64x2_t z23 = vcombine_s64(v2, v3);
+
+            int32x4_t RowSumsL_pada = vmovq_n_s32(0);
+            RowSumsL_pada = vpadalq_s16(RowSumsL_pada, vpaddlq_s8(vreinterpretq_s8_s64(z01)));
+
+            int32x4_t RowSumsL_ext = vextq_s32(RowSumsL_pada, RowSumsL_pada, 1);
+            int32x4_t RowSumsL_add = vaddq_s32(RowSumsL_pada, RowSumsL_ext);
+            int32x2_t RowSumsL = {vdups_laneq_s32(RowSumsL_add, 0),
+                                  vdups_laneq_s32(RowSumsL_add, 2)};
+
+            int32x4_t RowSumsH_pada = vmovq_n_s32(0);
+            RowSumsH_pada = vpadalq_s16(RowSumsH_pada, vpaddlq_s8(vreinterpretq_s8_s64(z23)));
+
+            int32x4_t RowSumsH_ext = vextq_s32(RowSumsH_pada, RowSumsH_pada, 1);
+            int32x4_t RowSumsH_add = vaddq_s32(RowSumsH_pada, RowSumsH_ext);
+            int32x2_t RowSumsH = {vdups_laneq_s32(RowSumsH_add, 0),
+                                  vdups_laneq_s32(RowSumsH_add, 2)};
+
+            RowSums = vaddq_s32(RowSums, vcombine_s32(RowSumsL, RowSumsH));
+
+            D += 32;
+            k -= 8;
+        }
+
+        if (k > 0) {
+            //
+            // Copy the remaining bytes with zero padding.
+            //
+            int8_t* d = D;
+
+            vst1q_s8(d, vmovq_n_s8(0));
+            vst1q_s8(&d[16], vmovq_n_s8(0));
+
+            while (k > 0) {
+                d[0] = *a0++;
+                d[8] = *a1++;
+                d[16] = *a2++;
+                d[24] = *a3++;
+                d += 1;
+                k -= 1;
+            }
+
+            d = D;
+            int64x1_t v0 = *reinterpret_cast<const int64x1_t*>(d);
+            d = d + 8;
+            int64x1_t v1 = *reinterpret_cast<const int64x1_t*>(d);
+            d = d + 8;
+            int64x1_t v2 = *reinterpret_cast<const int64x1_t*>(d);
+            d = d + 8;
+            int64x1_t v3 = *reinterpret_cast<const int64x1_t*>(d);
+            d = d + 8;
+
+            int64x2_t z01 = vcombine_s64(v0, v1);
+            int64x2_t z23 = vcombine_s64(v2, v3);
+
+            int32x4_t RowSums0L_pada = vmovq_n_s32(0);
+            RowSums0L_pada = vpadalq_s16(RowSums0L_pada, vpaddlq_s8(vreinterpretq_s8_s64(z01)));
+
+            int32x4_t RowSums0L_ext = vextq_s32(RowSums0L_pada, RowSums0L_pada, 1);
+            int32x4_t RowSums0L_add = vaddq_s32(RowSums0L_pada, RowSums0L_ext);
+            int32x2_t RowSums0L = {vdups_laneq_s32(RowSums0L_add, 0),
+                                   vdups_laneq_s32(RowSums0L_add, 2)};
+
+            int32x4_t RowSums0H_pada = vmovq_n_s32(0);
+            RowSums0H_pada = vpadalq_s16(RowSums0H_pada, vpaddlq_s8(vreinterpretq_s8_s64(z23)));
+
+            int32x4_t RowSums0H_ext = vextq_s32(RowSums0H_pada, RowSums0H_pada, 1);
+            int32x4_t RowSums0H_add = vaddq_s32(RowSums0H_pada, RowSums0H_ext);
+            int32x2_t RowSums0H = {vdups_laneq_s32(RowSums0H_add, 0),
+                                   vdups_laneq_s32(RowSums0H_add, 2)};
+
+            RowSums = vaddq_s32(RowSums, vcombine_s32(RowSums0L, RowSums0H));
+
+            D += 32;
+        }
+
+        vst1q_s32(RowSumBuffer, RowSums);
+        RowSumBuffer += 4;
+
+        A = A + lda * 4;
+        CountM -= 4;
+    }
+
+    //
+    // Process two rows of matrix A.
+    //
+    // The buffer is packed as a series of 16 byte vectors where two rows are
+    // interleaved with the following pattern:
+    //
+    //      [ A0 A1 A2 A3 A4 A5 A6 A7 ]
+    //      [ B0 B1 B2 B3 B4 B5 B6 B7 ]
+    //
+    // This pattern is repeated (CountK / 8) times.
+    //
+    // If CountK is not aligned to a multiple of eight, then the vector is padded
+    // with zeroes.
+    //
+
+    if (CountM >= 2) {
+        const int8_t* a0 = reinterpret_cast<const int8_t*>(A);
+        const int8_t* a1 = a0 + lda;
+
+        size_t k = CountK;
+        int32x2_t RowSums = vmov_n_s32(0);
+
+        while (k >= 16) {
+            int64x2_t v0 = vld1q_s64(reinterpret_cast<const int64_t*>(a0));
+            a0 += 16;
+            int64x2_t v1 = vld1q_s64(reinterpret_cast<const int64_t*>(a1));
+            a1 += 16;
+
+            int64x2_t z0 = vzip1q_s64(v0, v1);
+            int64x2_t z1 = vzip2q_s64(v0, v1);
+
+            vst1q_s8(&D[0], vreinterpretq_s8_s64(z0));
+            vst1q_s8(&D[16], vreinterpretq_s8_s64(z1));
+
+            int32x4_t RowSumsL_pada = vmovq_n_s32(0);
+            RowSumsL_pada = vpadalq_s16(RowSumsL_pada, vpaddlq_s8(vreinterpretq_s8_s64(z0)));
+            RowSumsL_pada = vpadalq_s16(RowSumsL_pada, vpaddlq_s8(vreinterpretq_s8_s64(z1)));
+
+            int32x4_t RowSumsL_ext = vextq_s32(RowSumsL_pada, RowSumsL_pada, 1);
+            int32x4_t RowSumsL_add = vaddq_s32(RowSumsL_pada, RowSumsL_ext);
+            int32x2_t RowSumsL = {vdups_laneq_s32(RowSumsL_add, 0),
+                                  vdups_laneq_s32(RowSumsL_add, 2)};
+
+            RowSums = vadd_s32(RowSums, RowSumsL);
+
+            D += 32;
+            k -= 16;
+        }
+
+        while (k >= 8) {
+            int64x1_t v0 = *reinterpret_cast<const int64x1_t*>(a0);
+            a0 += 8;
+            int64x1_t v1 = *reinterpret_cast<const int64x1_t*>(a1);
+            a1 += 8;
+
+            *reinterpret_cast<int64x1_t*>(&D[0]) = v0;
+            *reinterpret_cast<int64x1_t*>(&D[8]) = v1;
+
+            int64x2_t z01 = vcombine_s64(v0, v1);
+            int32x4_t RowSumsL_pada = vmovq_n_s32(0);
+            RowSumsL_pada = vpadalq_s16(RowSumsL_pada, vpaddlq_s8(vreinterpretq_s8_s64(z01)));
+
+            int32x4_t RowSumsL_ext = vextq_s32(RowSumsL_pada, RowSumsL_pada, 1);
+            int32x4_t RowSumsL_add = vaddq_s32(RowSumsL_pada, RowSumsL_ext);
+            int32x2_t RowSumsL = {vdups_laneq_s32(RowSumsL_add, 0),
+                                  vdups_laneq_s32(RowSumsL_add, 2)};
+
+            RowSums = vadd_s32(RowSums, RowSumsL);
+
+            D += 16;
+            k -= 8;
+        }
+
+        if (k > 0) {
+            //
+            // Zero pad the remaining elements to make 8 columns.
+            //
+
+            int8_t* d = PaddedMatrixAData;
+            vst1q_s8(PaddedMatrixAData, vmovq_n_s8(0));
+
+            while (k > 0) {
+                d[0] = *a0++;
+                d[8] = *a1++;
+
+                d += 1;
+                k -= 1;
+            }
+
+            d = PaddedMatrixAData;
+            int64x1_t v0 = *reinterpret_cast<const int64x1_t*>(d);
+            d = d + 8;
+            int64x1_t v1 = *reinterpret_cast<const int64x1_t*>(d);
+            d = d + 8;
+
+            int64x2_t z01 = vcombine_s64(v0, v1);
+            int32x4_t RowSumsL_pada = vmovq_n_s32(0);
+            RowSumsL_pada = vpadalq_s16(RowSumsL_pada, vpaddlq_s8(vreinterpretq_s8_s64(z01)));
+
+            int32x4_t RowSumsL_ext = vextq_s32(RowSumsL_pada, RowSumsL_pada, 1);
+            int32x4_t RowSumsL_add = vaddq_s32(RowSumsL_pada, RowSumsL_ext);
+            int32x2_t RowSumsL = {vdups_laneq_s32(RowSumsL_add, 0),
+                                  vdups_laneq_s32(RowSumsL_add, 2)};
+
+            RowSums = vadd_s32(RowSums, RowSumsL);
+
+            int8x16_t PackedVector = vld1q_s8(PaddedMatrixAData);
+            vst1q_s8(D, PackedVector);
+
+            D += 16;
+        }
+
+        vst1_s32(RowSumBuffer, RowSums);
+        RowSumBuffer += 2;
+
+        A = A + lda * 2;
+        CountM -= 2;
+    }
+
+    //
+    // Process one row of matrix A.
+    //
+    // The buffer is packed as a series of 8 byte with the following pattern:
+    //
+    //      [ A0 A1 A2 A3 A4 A5 A6 A7 ]
+    //
+    // This pattern is repeated (CountK / 8) times.
+    //
+    // If CountK is not aligned to a multiple of 8, then the vector is padded
+    // with zeroes.
+    //
+
+    if (CountM > 0) {
+        // No need to pad the rows to 2, the .S takes care of zero pdding
+        const int8_t* a = reinterpret_cast<const int8_t*>(A);
+        size_t k = CountK;
+        int32x4_t RowSums = vmovq_n_s32(0);
+
+        while (k >= 16) {
+            int8x16_t v = vld1q_s8(a);
+            a += 16;
+
+            vst1q_s8(D, v);
+
+            RowSums = vpadalq_s16(RowSums, vpaddlq_s8(v));
+
+            D += 16;
+            k -= 16;
+        }
+
+        if (k > 0) {
+            //
+            // Copy the remaining bytes to the zero padded stack buffer.
+            //
+
+            vst1q_s8(PaddedMatrixAData, vmovq_n_s8(0));
+
+            for (size_t kk = 0; kk < k; kk++) {
+                PaddedMatrixAData[kk] = a[kk];
+            }
+
+            int8x16_t v = vld1q_s8(PaddedMatrixAData);
+            vst1q_s8(D, v);
+
+            RowSums = vpadalq_s16(RowSums, vpaddlq_s8(v));
+        }
+
+        *RowSumBuffer = int32_t(vaddvq_s32(RowSums));
+    }
+}
+
+MLAS_FORCEINLINE
+void
+MlasGemmS8S8CopyPackBProcessSmmla(int8_t* D, int8x8_t BytesRow[8], int32x4_t ColumnSums[2])
+{
+    int8x16_t v02 = vcombine_s8(BytesRow[0], BytesRow[2]);
+    int8x16_t v13 = vcombine_s8(BytesRow[1], BytesRow[3]);
+
+    int8x16_t v46 = vcombine_s8(BytesRow[4], BytesRow[6]);
+    int8x16_t v57 = vcombine_s8(BytesRow[5], BytesRow[7]);
+
+    int8x16x2_t zw1 = vzipq_s8(v02, v13);
+    int16x8x2_t zd1 = vzipq_s16(vreinterpretq_s16_s8(zw1.val[0]), vreinterpretq_s16_s8(zw1.val[1]));
+
+    int8x16x2_t zw2 = vzipq_s8(v46, v57);
+    int16x8x2_t zd2 = vzipq_s16(vreinterpretq_s16_s8(zw2.val[0]), vreinterpretq_s16_s8(zw2.val[1]));
+
+    int32x4x2_t zd3 =
+        vzipq_s32(vreinterpretq_s32_s16(zd1.val[0]), vreinterpretq_s32_s16(zd2.val[0]));
+    int32x4x2_t zd4 =
+        vzipq_s32(vreinterpretq_s32_s16(zd1.val[1]), vreinterpretq_s32_s16(zd2.val[1]));
+
+    vst1q_s8(&D[0], vreinterpretq_s8_s32(zd3.val[0]));
+    vst1q_s8(&D[16], vreinterpretq_s8_s32(zd3.val[1]));
+    vst1q_s8(&D[32], vreinterpretq_s8_s32(zd4.val[0]));
+    vst1q_s8(&D[48], vreinterpretq_s8_s32(zd4.val[1]));
+
+    int32x4_t ColSums0L_pada = vmovq_n_s32(0);
+    ColSums0L_pada = vpadalq_s16(ColSums0L_pada, vpaddlq_s8(vreinterpretq_s8_s32(zd3.val[0])));
+    int32x4_t ColSums0L_ext = vextq_s32(ColSums0L_pada, ColSums0L_pada, 1);
+    int32x4_t ColSums0L_add = vaddq_s32(ColSums0L_pada, ColSums0L_ext);
+    int32x2_t ColSums0L = {vdups_laneq_s32(ColSums0L_add, 0), vdups_laneq_s32(ColSums0L_add, 2)};
+
+    int32x4_t ColSums0H_pada = vmovq_n_s32(0);
+    ColSums0H_pada = vpadalq_s16(ColSums0H_pada, vpaddlq_s8(vreinterpretq_s8_s32(zd3.val[1])));
+    int32x4_t ColSums0H_ext = vextq_s32(ColSums0H_pada, ColSums0H_pada, 1);
+    int32x4_t ColSums0H_add = vaddq_s32(ColSums0H_pada, ColSums0H_ext);
+    int32x2_t ColSums0H = {vdups_laneq_s32(ColSums0H_add, 0), vdups_laneq_s32(ColSums0H_add, 2)};
+
+    ColumnSums[0] = vaddq_s32(ColumnSums[0], vcombine_s32(ColSums0L, ColSums0H));
+
+    int32x4_t ColSums1L_pada = vmovq_n_s32(0);
+    ColSums1L_pada = vpadalq_s16(ColSums1L_pada, vpaddlq_s8(vreinterpretq_s8_s32(zd4.val[0])));
+    int32x4_t ColSums1L_ext = vextq_s32(ColSums1L_pada, ColSums1L_pada, 1);
+    int32x4_t ColSums1L_add = vaddq_s32(ColSums1L_pada, ColSums1L_ext);
+    int32x2_t ColSums1L = {vdups_laneq_s32(ColSums1L_add, 0), vdups_laneq_s32(ColSums1L_add, 2)};
+
+    int32x4_t ColSums1H_pada = vmovq_n_s32(0);
+    ColSums1H_pada = vpadalq_s16(ColSums1H_pada, vpaddlq_s8(vreinterpretq_s8_s32(zd4.val[1])));
+    int32x4_t ColSums1H_ext = vextq_s32(ColSums1H_pada, ColSums1H_pada, 1);
+    int32x4_t ColSums1H_add = vaddq_s32(ColSums1H_pada, ColSums1H_ext);
+    int32x2_t ColSums1H = {vdups_laneq_s32(ColSums1H_add, 0), vdups_laneq_s32(ColSums1H_add, 2)};
+
+    ColumnSums[1] = vaddq_s32(ColumnSums[1], vcombine_s32(ColSums1L, ColSums1H));
+}
+
+template <>
+void
+MlasGemmQuantCopyPackB<MLAS_GEMM_S8S8_KERNEL_SMMLA>(MLAS_GEMM_S8S8_KERNEL_SMMLA::PackedBType* Dst,
+                                                    const uint8_t* B,
+                                                    size_t ldb,
+                                                    size_t CountN,
+                                                    size_t CountK,
+                                                    int32_t* ColumnSumBuffer,
+                                                    bool BIsSigned)
+{
+    MLAS_UNREFERENCED_PARAMETER(BIsSigned);
+    int8_t* D = reinterpret_cast<int8_t*>(Dst);
+    const int8x16_t ZeroVector = vmovq_n_s8(0);
+    int8x8_t BytesRow[8];
+
+    //
+    // Copy data from matrix B into the destination buffer 8x2 blocks at a
+    // time.
+    //
+    //
+    while (CountN >= 8) {
+        const int8_t* b = reinterpret_cast<const int8_t*>(B);
+        size_t k = CountK;
+        int32x4_t ColumnSums[2];
+
+        ColumnSums[0] = vmovq_n_s32(0);
+        ColumnSums[1] = vmovq_n_s32(0);
+
+        while (k >= 8) {
+            BytesRow[0] = vld1_s8(&b[ldb * 0]);
+            BytesRow[1] = vld1_s8(&b[ldb * 1]);
+            BytesRow[2] = vld1_s8(&b[ldb * 2]);
+            BytesRow[3] = vld1_s8(&b[ldb * 3]);
+            BytesRow[4] = vld1_s8(&b[ldb * 4]);
+            BytesRow[5] = vld1_s8(&b[ldb * 5]);
+            BytesRow[6] = vld1_s8(&b[ldb * 6]);
+            BytesRow[7] = vld1_s8(&b[ldb * 7]);
+
+            MlasGemmS8S8CopyPackBProcessSmmla(D, BytesRow, ColumnSums);
+
+            D += 64;
+            b += ldb * 8;
+            k -= 8;
+        }
+
+        if (k > 0) {
+            // Pad k to 8
+
+            BytesRow[0] = vld1_s8(&b[ldb * 0]);
+            BytesRow[1] = (k >= 2) ? vld1_s8(&b[ldb * 1]) : vget_low_s8(ZeroVector);
+            BytesRow[2] = (k >= 3) ? vld1_s8(&b[ldb * 2]) : vget_low_s8(ZeroVector);
+            BytesRow[3] = (k >= 4) ? vld1_s8(&b[ldb * 3]) : vget_low_s8(ZeroVector);
+            BytesRow[4] = (k >= 5) ? vld1_s8(&b[ldb * 4]) : vget_low_s8(ZeroVector);
+            BytesRow[5] = (k >= 6) ? vld1_s8(&b[ldb * 5]) : vget_low_s8(ZeroVector);
+            BytesRow[6] = (k >= 7) ? vld1_s8(&b[ldb * 6]) : vget_low_s8(ZeroVector);
+            BytesRow[7] = vget_low_s8(ZeroVector);
+
+            MlasGemmS8S8CopyPackBProcessSmmla(D, BytesRow, ColumnSums);
+
+            D += 64;
+        }
+
+        // Zero pad the output buffer to a multiple of PackedK if the above
+        // processed an odd number of four row bundles.
+        //
+        vst1q_s32(&ColumnSumBuffer[0], ColumnSums[0]);
+        vst1q_s32(&ColumnSumBuffer[4], ColumnSums[1]);
+
+        ColumnSumBuffer += 8;
+
+        B += 8;
+        CountN -= 8;
+    }
+
+    //
+    // Process the remaining columns of matrix B.
+    //
+
+    if (CountN > 0) {
+        const int8_t* b = reinterpret_cast<const int8_t*>(B);
+        size_t k = CountK;
+        int8_t PaddedMatrixBData[64];
+        int32x4_t ColumnSums[2];
+
+        vst1q_s8(&PaddedMatrixBData[0], ZeroVector);
+        vst1q_s8(&PaddedMatrixBData[16], ZeroVector);
+        vst1q_s8(&PaddedMatrixBData[32], ZeroVector);
+        vst1q_s8(&PaddedMatrixBData[48], ZeroVector);
+
+        ColumnSums[0] = vmovq_n_s32(0);
+        ColumnSums[1] = vmovq_n_s32(0);
+
+        //
+        // Interleave rows of matrix B using an intermediate zero padded stack
+        // buffer and write to the packed buffer.
+        //
+
+        while (k > 0) {
+            const int8_t* bcopy0 = &b[ldb * 0];
+            const int8_t* bcopy1 = &b[ldb * 1];
+            const int8_t* bcopy2 = &b[ldb * 2];
+            const int8_t* bcopy3 = &b[ldb * 3];
+            const int8_t* bcopy4 = &b[ldb * 4];
+            const int8_t* bcopy5 = &b[ldb * 5];
+            const int8_t* bcopy6 = &b[ldb * 6];
+            const int8_t* bcopy7 = &b[ldb * 7];
+
+            if (k >= 8) {
+                b += ldb * 8;
+                k -= 8;
+
+            } else {
+                vst1q_s8(&PaddedMatrixBData[0], ZeroVector);
+                vst1q_s8(&PaddedMatrixBData[16], ZeroVector);
+                vst1q_s8(&PaddedMatrixBData[32], ZeroVector);
+                vst1q_s8(&PaddedMatrixBData[48], ZeroVector);
+
+                bcopy1 = (k >= 2) ? bcopy1 : &PaddedMatrixBData[56];
+                bcopy2 = (k >= 3) ? bcopy2 : &PaddedMatrixBData[56];
+                bcopy3 = (k >= 4) ? bcopy3 : &PaddedMatrixBData[56];
+                bcopy4 = (k >= 5) ? bcopy4 : &PaddedMatrixBData[56];
+                bcopy5 = (k >= 6) ? bcopy5 : &PaddedMatrixBData[56];
+                bcopy6 = (k >= 7) ? bcopy6 : &PaddedMatrixBData[56];
+                bcopy7 = &PaddedMatrixBData[56];
+
+                k = 0;
+            }
+
+            int8_t* padded = PaddedMatrixBData;
+            int8_t* padded_end = padded + CountN;
+            do {
+                padded[0] = *bcopy0++;
+                padded[8] = *bcopy1++;
+                padded[16] = *bcopy2++;
+                padded[24] = *bcopy3++;
+                padded[32] = *bcopy4++;
+                padded[40] = *bcopy5++;
+                padded[48] = *bcopy6++;
+                padded[56] = *bcopy7++;
+
+            } while (++padded < padded_end);
+
+            BytesRow[0] = vld1_s8(&PaddedMatrixBData[0]);
+            BytesRow[1] = vld1_s8(&PaddedMatrixBData[8]);
+            BytesRow[2] = vld1_s8(&PaddedMatrixBData[16]);
+            BytesRow[3] = vld1_s8(&PaddedMatrixBData[24]);
+            BytesRow[4] = vld1_s8(&PaddedMatrixBData[32]);
+            BytesRow[5] = vld1_s8(&PaddedMatrixBData[40]);
+            BytesRow[6] = vld1_s8(&PaddedMatrixBData[48]);
+            BytesRow[7] = vld1_s8(&PaddedMatrixBData[56]);
+
+            MlasGemmS8S8CopyPackBProcessSmmla(D, BytesRow, ColumnSums);
+
+            D += 64;
+        }
+
+        vst1q_s32(&ColumnSumBuffer[0], ColumnSums[0]);
+        vst1q_s32(&ColumnSumBuffer[4], ColumnSums[1]);
+    }
+}
+
+template <>
+MLAS_FORCEINLINE size_t
+MlasGemmQuantKernel<MLAS_GEMM_S8S8_KERNEL_SMMLA>(const MLAS_GEMM_S8S8_KERNEL_SMMLA::PackedAType* A,
+                                                 const MLAS_GEMM_S8S8_KERNEL_SMMLA::PackedBType* B,
+                                                 int32_t* C,
+                                                 size_t PackedCountK,
+                                                 size_t CountM,
+                                                 size_t CountN,
+                                                 size_t ldc,
+                                                 const int32_t* RowSumBuffer,
+                                                 const int32_t* ColumnSumBuffer,
+                                                 const int32_t* ZeroPointB,
+                                                 bool ZeroMode)
+{
+    size_t RowsHandled;
+
+    if (ZeroMode) {
+        RowsHandled = MlasGemmS8S8KernelSmmlaZero(A, B, C, PackedCountK, CountM, CountN, ldc,
+                                                  RowSumBuffer, ColumnSumBuffer, ZeroPointB);
+    } else {
+        RowsHandled = MlasGemmS8S8KernelSmmlaAdd(A, B, C, PackedCountK, CountM, CountN, ldc,
+                                                 RowSumBuffer, ColumnSumBuffer, ZeroPointB);
+    }
+
+    return RowsHandled;
+}
+
+const MLAS_GEMM_QUANT_DISPATCH MlasGemmS8S8DispatchSmmla = {
+    MlasGemmQuantOperation<MLAS_GEMM_S8S8_KERNEL_SMMLA>,
+    MlasGemmQuantPackedOperation<MLAS_GEMM_S8S8_KERNEL_SMMLA>,
+    MlasGemmQuantCopyPackB<MLAS_GEMM_S8S8_KERNEL_SMMLA>,
+    MLAS_GEMM_S8S8_KERNEL_SMMLA::PackedK,
+    MLAS_GEMM_S8S8_KERNEL_SMMLA::PackedStrides.K,
+    8};
diff --git a/onnxruntime/core/mlas/lib/qgemm_kernel_ummla.cpp b/onnxruntime/core/mlas/lib/qgemm_kernel_ummla.cpp
new file mode 100644
index 0000000000000..3936154432ac7
--- /dev/null
+++ b/onnxruntime/core/mlas/lib/qgemm_kernel_ummla.cpp
@@ -0,0 +1,967 @@
+/*++
+
+Copyright (c) Microsoft Corporation. All rights reserved.
+Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+
+Licensed under the MIT License.
+
+Module Name:
+
+    qgemm_kernel_ummla.cpp
+
+Abstract:
+
+    This module implements ummla QGEMM kernel.
+
+--*/
+
+#include "mlasi.h"
+#include "qgemm.h"
+
+//
+// Define the prototypes of the NEON UMMLA routines written in assembly.
+//
+
+extern "C" {
+
+size_t MLASCALL
+MlasGemmU8X8KernelUmmlaZero(const uint8_t* A,
+                            const uint8_t* B,
+                            int32_t* C,
+                            size_t PackedCountK,
+                            size_t CountM,
+                            size_t CountN,
+                            size_t ldc,
+                            const int32_t* RowSumVector,
+                            const int32_t* ColumnSumVector,
+                            const int32_t* ZeroPointB);
+
+size_t MLASCALL
+MlasGemmU8X8KernelUmmlaAdd(const uint8_t* A,
+                           const uint8_t* B,
+                           int32_t* C,
+                           size_t PackedCountK,
+                           size_t CountM,
+                           size_t CountN,
+                           size_t ldc,
+                           const int32_t* RowSumVector,
+                           const int32_t* ColumnSumVector,
+                           const int32_t* ZeroPointB);
+}
+
+struct MLAS_GEMM_U8X8_KERNEL_UMMLA {
+    typedef uint8_t PackedAType;
+    typedef uint8_t PackedBType;
+    typedef uint8_t OffsetAType;
+    typedef uint8_t OffsetBType;
+
+    static constexpr size_t PackedK = 8;
+    static constexpr MLAS_GEMM_QUANT_STRIDES Strides{24, 128, 256};
+    static constexpr MLAS_GEMM_QUANT_STRIDES PackedStrides{24, 128, 384};
+};
+
+constexpr size_t MLAS_GEMM_U8X8_KERNEL_UMMLA::PackedK;
+constexpr MLAS_GEMM_QUANT_STRIDES MLAS_GEMM_U8X8_KERNEL_UMMLA::Strides;
+constexpr MLAS_GEMM_QUANT_STRIDES MLAS_GEMM_U8X8_KERNEL_UMMLA::PackedStrides;
+
+template <>
+MLAS_FORCEINLINE int32_t
+MlasGemmQuantFixupZeroPointB<MLAS_GEMM_U8X8_KERNEL_UMMLA>(int32_t ZeroPointB, bool BIsSigned)
+{
+    if (BIsSigned) {
+        ZeroPointB = MLAS_GEMM_U8X8_KERNEL_UMMLA::OffsetBType(ZeroPointB ^ 0x80);
+    }
+
+    return ZeroPointB;
+}
+
+template <>
+void
+MlasGemmQuantCopyPackA<MLAS_GEMM_U8X8_KERNEL_UMMLA>(MLAS_GEMM_U8X8_KERNEL_UMMLA::PackedAType* D,
+                                                    const uint8_t* A,
+                                                    size_t lda,
+                                                    size_t CountM,
+                                                    size_t CountK,
+                                                    int32_t* RowSumBuffer,
+                                                    bool AIsSigned)
+{
+    MLAS_UNREFERENCED_PARAMETER(AIsSigned);
+    uint8_t PaddedMatrixAData[64];
+
+    //
+    // Process 8 rows of matrix A.
+    //
+    // MMLA kernels load 8x8 block of A with four vector registers. So A is packed
+    // a series of 64 byte vectors where eight rows are interleaved with the
+    // following pattern:
+    //
+    //      [ A0 A1 A2 A3 A4 A5 A6 A7 ]
+    //      [ B0 B1 B2 B3 B4 B5 B6 B7 ]
+    //      [ C0 C1 C2 C3 C4 C5 C6 C7 ]
+    //      [ D0 D1 D2 D3 D4 D5 D6 D7 ]
+    //      [ E0 E1 E2 E3 E4 E5 E6 E7 ]
+    //      [ F0 F1 F2 F3 F4 F5 F6 F7 ]
+    //      [ G0 G1 G2 G3 G4 G5 G6 G7 ]
+    //      [ H0 H1 H2 H3 H4 H5 H6 H7 ]
+    //
+    //      ...
+    //
+    // This pattern is repeated (CountK / 8) times.
+    //
+    // If CountK is not aligned to a multiple of eight, then the vector is padded
+    // with zeroes.
+    //
+
+    while (CountM >= 8) {
+        const uint8_t* a0 = A;
+        const uint8_t* a1 = a0 + lda;
+        const uint8_t* a2 = a0 + lda * 2;
+        const uint8_t* a3 = a0 + lda * 3;
+        const uint8_t* a4 = a0 + lda * 4;
+        const uint8_t* a5 = a0 + lda * 5;
+        const uint8_t* a6 = a0 + lda * 6;
+        const uint8_t* a7 = a0 + lda * 7;
+
+        size_t k = CountK;
+        uint32x4_t RowSums0 = vmovq_n_u32(0);
+        uint32x4_t RowSums1 = vmovq_n_u32(0);
+
+        while (k >= 16) {
+            uint64x2_t v0 = vld1q_u64(reinterpret_cast<const uint64_t*>(a0));
+            a0 += 16;
+            uint64x2_t v1 = vld1q_u64(reinterpret_cast<const uint64_t*>(a1));
+            a1 += 16;
+            uint64x2_t v2 = vld1q_u64(reinterpret_cast<const uint64_t*>(a2));
+            a2 += 16;
+            uint64x2_t v3 = vld1q_u64(reinterpret_cast<const uint64_t*>(a3));
+            a3 += 16;
+            uint64x2_t v4 = vld1q_u64(reinterpret_cast<const uint64_t*>(a4));
+            a4 += 16;
+            uint64x2_t v5 = vld1q_u64(reinterpret_cast<const uint64_t*>(a5));
+            a5 += 16;
+            uint64x2_t v6 = vld1q_u64(reinterpret_cast<const uint64_t*>(a6));
+            a6 += 16;
+            uint64x2_t v7 = vld1q_u64(reinterpret_cast<const uint64_t*>(a7));
+            a7 += 16;
+
+            uint64x2_t z0 = vzip1q_u64(v0, v1);
+            uint64x2_t z1 = vzip2q_u64(v0, v1);
+            uint64x2_t z2 = vzip1q_u64(v2, v3);
+            uint64x2_t z3 = vzip2q_u64(v2, v3);
+
+            uint64x2_t z4 = vzip1q_u64(v4, v5);
+            uint64x2_t z5 = vzip2q_u64(v4, v5);
+            uint64x2_t z6 = vzip1q_u64(v6, v7);
+            uint64x2_t z7 = vzip2q_u64(v6, v7);
+
+            vst1q_u8(&D[0], vreinterpretq_u8_u64(z0));
+            vst1q_u8(&D[16], vreinterpretq_u8_u64(z2));
+            vst1q_u8(&D[32], vreinterpretq_u8_u64(z4));
+            vst1q_u8(&D[48], vreinterpretq_u8_u64(z6));
+            vst1q_u8(&D[64], vreinterpretq_u8_u64(z1));
+            vst1q_u8(&D[80], vreinterpretq_u8_u64(z3));
+            vst1q_u8(&D[96], vreinterpretq_u8_u64(z5));
+            vst1q_u8(&D[112], vreinterpretq_u8_u64(z7));
+
+            uint32x4_t RowSums0L_pada = vmovq_n_u32(0);
+            RowSums0L_pada = vpadalq_u16(RowSums0L_pada, vpaddlq_u8(vreinterpretq_u8_u64(z0)));
+            RowSums0L_pada = vpadalq_u16(RowSums0L_pada, vpaddlq_u8(vreinterpretq_u8_u64(z1)));
+
+            uint32x4_t RowSums0L_ext = vextq_u32(RowSums0L_pada, RowSums0L_pada, 1);
+            uint32x4_t RowSums0L_add = vaddq_u32(RowSums0L_pada, RowSums0L_ext);
+            uint32x2_t RowSums0L = {vdups_laneq_u32(RowSums0L_add, 0),
+                                    vdups_laneq_u32(RowSums0L_add, 2)};
+
+            uint32x4_t RowSums0H_pada = vmovq_n_u32(0);
+            RowSums0H_pada = vpadalq_u16(RowSums0H_pada, vpaddlq_u8(vreinterpretq_u8_u64(z2)));
+            RowSums0H_pada = vpadalq_u16(RowSums0H_pada, vpaddlq_u8(vreinterpretq_u8_u64(z3)));
+
+            uint32x4_t RowSums0H_ext = vextq_u32(RowSums0H_pada, RowSums0H_pada, 1);
+            uint32x4_t RowSums0H_add = vaddq_u32(RowSums0H_pada, RowSums0H_ext);
+            uint32x2_t RowSums0H = {vdups_laneq_u32(RowSums0H_add, 0),
+                                    vdups_laneq_u32(RowSums0H_add, 2)};
+
+            RowSums0 = vaddq_u32(RowSums0, vcombine_u32(RowSums0L, RowSums0H));
+
+            uint32x4_t RowSums1L_pada = vmovq_n_u32(0);
+            RowSums1L_pada = vpadalq_u16(RowSums1L_pada, vpaddlq_u8(vreinterpretq_u8_u64(z4)));
+            RowSums1L_pada = vpadalq_u16(RowSums1L_pada, vpaddlq_u8(vreinterpretq_u8_u64(z5)));
+
+            uint32x4_t RowSums1L_ext = vextq_u32(RowSums1L_pada, RowSums1L_pada, 1);
+            uint32x4_t RowSums1L_add = vaddq_u32(RowSums1L_pada, RowSums1L_ext);
+            uint32x2_t RowSums1L = {vdups_laneq_u32(RowSums1L_add, 0),
+                                    vdups_laneq_u32(RowSums1L_add, 2)};
+
+            uint32x4_t RowSums1H_pada = vmovq_n_u32(0);
+            RowSums1H_pada = vpadalq_u16(RowSums1H_pada, vpaddlq_u8(vreinterpretq_u8_u64(z6)));
+            RowSums1H_pada = vpadalq_u16(RowSums1H_pada, vpaddlq_u8(vreinterpretq_u8_u64(z7)));
+
+            uint32x4_t RowSums1H_ext = vextq_u32(RowSums1H_pada, RowSums1H_pada, 1);
+            uint32x4_t RowSums1H_add = vaddq_u32(RowSums1H_pada, RowSums1H_ext);
+            uint32x2_t RowSums1H = {vdups_laneq_u32(RowSums1H_add, 0),
+                                    vdups_laneq_u32(RowSums1H_add, 2)};
+
+            RowSums1 = vaddq_u32(RowSums1, vcombine_u32(RowSums1L, RowSums1H));
+
+            D += 128;
+            k -= 16;
+        }
+
+        while (k >= 8) {
+            uint64x1_t v0 = *reinterpret_cast<const uint64x1_t*>(a0);
+            a0 += 8;
+            uint64x1_t v1 = *reinterpret_cast<const uint64x1_t*>(a1);
+            a1 += 8;
+            uint64x1_t v2 = *reinterpret_cast<const uint64x1_t*>(a2);
+            a2 += 8;
+            uint64x1_t v3 = *reinterpret_cast<const uint64x1_t*>(a3);
+            a3 += 8;
+            uint64x1_t v4 = *reinterpret_cast<const uint64x1_t*>(a4);
+            a4 += 8;
+            uint64x1_t v5 = *reinterpret_cast<const uint64x1_t*>(a5);
+            a5 += 8;
+            uint64x1_t v6 = *reinterpret_cast<const uint64x1_t*>(a6);
+            a6 += 8;
+            uint64x1_t v7 = *reinterpret_cast<const uint64x1_t*>(a7);
+            a7 += 8;
+
+            *reinterpret_cast<uint64x1_t*>(&D[0]) = v0;
+            *reinterpret_cast<uint64x1_t*>(&D[8]) = v1;
+            *reinterpret_cast<uint64x1_t*>(&D[16]) = v2;
+            *reinterpret_cast<uint64x1_t*>(&D[24]) = v3;
+            *reinterpret_cast<uint64x1_t*>(&D[32]) = v4;
+            *reinterpret_cast<uint64x1_t*>(&D[40]) = v5;
+            *reinterpret_cast<uint64x1_t*>(&D[48]) = v6;
+            *reinterpret_cast<uint64x1_t*>(&D[56]) = v7;
+
+            uint64x2_t z01 = vcombine_u64(v0, v1);
+            uint64x2_t z23 = vcombine_u64(v2, v3);
+            uint64x2_t z45 = vcombine_u64(v4, v5);
+            uint64x2_t z67 = vcombine_u64(v6, v7);
+
+            uint32x4_t RowSums0L_pada = vmovq_n_u32(0);
+            RowSums0L_pada = vpadalq_u16(RowSums0L_pada, vpaddlq_u8(vreinterpretq_u8_u64(z01)));
+
+            uint32x4_t RowSums0L_ext = vextq_u32(RowSums0L_pada, RowSums0L_pada, 1);
+            uint32x4_t RowSums0L_add = vaddq_u32(RowSums0L_pada, RowSums0L_ext);
+            uint32x2_t RowSums0L = {vdups_laneq_u32(RowSums0L_add, 0),
+                                    vdups_laneq_u32(RowSums0L_add, 2)};
+
+            uint32x4_t RowSums0H_pada = vmovq_n_u32(0);
+            RowSums0H_pada = vpadalq_u16(RowSums0H_pada, vpaddlq_u8(vreinterpretq_u8_u64(z23)));
+
+            uint32x4_t RowSums0H_ext = vextq_u32(RowSums0H_pada, RowSums0H_pada, 1);
+            uint32x4_t RowSums0H_add = vaddq_u32(RowSums0H_pada, RowSums0H_ext);
+            uint32x2_t RowSums0H = {vdups_laneq_u32(RowSums0H_add, 0),
+                                    vdups_laneq_u32(RowSums0H_add, 2)};
+
+            RowSums0 = vaddq_u32(RowSums0, vcombine_u32(RowSums0L, RowSums0H));
+
+            uint32x4_t RowSums1L_pada = vmovq_n_u32(0);
+            RowSums1L_pada = vpadalq_u16(RowSums1L_pada, vpaddlq_u8(vreinterpretq_u8_u64(z45)));
+
+            uint32x4_t RowSums1L_ext = vextq_u32(RowSums1L_pada, RowSums1L_pada, 1);
+            uint32x4_t RowSums1L_add = vaddq_u32(RowSums1L_pada, RowSums1L_ext);
+            uint32x2_t RowSums1L = {vdups_laneq_u32(RowSums1L_add, 0),
+                                    vdups_laneq_u32(RowSums1L_add, 2)};
+
+            uint32x4_t RowSums1H_pada = vmovq_n_u32(0);
+            RowSums1H_pada = vpadalq_u16(RowSums1H_pada, vpaddlq_u8(vreinterpretq_u8_u64(z67)));
+
+            uint32x4_t RowSums1H_ext = vextq_u32(RowSums1H_pada, RowSums1H_pada, 1);
+            uint32x4_t RowSums1H_add = vaddq_u32(RowSums1H_pada, RowSums1H_ext);
+            uint32x2_t RowSums1H = {vdups_laneq_u32(RowSums1H_add, 0),
+                                    vdups_laneq_u32(RowSums1H_add, 2)};
+
+            RowSums1 = vaddq_u32(RowSums1, vcombine_u32(RowSums1L, RowSums1H));
+
+            D += 64;
+            k -= 8;
+        }
+
+        if (k > 0) {
+            //
+            // zero pad the remaining columns to 8
+            //
+            uint8_t* d = D;
+
+            vst1q_u8(d, vmovq_n_u8(0));
+            vst1q_u8(&d[16], vmovq_n_u8(0));
+            vst1q_u8(&d[32], vmovq_n_u8(0));
+            vst1q_u8(&d[48], vmovq_n_u8(0));
+
+            while (k > 0) {
+                d[0] = *a0++;
+                d[8] = *a1++;
+                d[16] = *a2++;
+                d[24] = *a3++;
+                d[32] = *a4++;
+                d[40] = *a5++;
+                d[48] = *a6++;
+                d[56] = *a7++;
+                d += 1;
+                k -= 1;
+            }
+            d = D;
+            uint64x1_t v0 = *reinterpret_cast<const uint64x1_t*>(d);
+            d = d + 8;
+            uint64x1_t v1 = *reinterpret_cast<const uint64x1_t*>(d);
+            d = d + 8;
+            uint64x1_t v2 = *reinterpret_cast<const uint64x1_t*>(d);
+            d = d + 8;
+            uint64x1_t v3 = *reinterpret_cast<const uint64x1_t*>(d);
+            d = d + 8;
+            uint64x1_t v4 = *reinterpret_cast<const uint64x1_t*>(d);
+            d = d + 8;
+            uint64x1_t v5 = *reinterpret_cast<const uint64x1_t*>(d);
+            d = d + 8;
+            uint64x1_t v6 = *reinterpret_cast<const uint64x1_t*>(d);
+            d = d + 8;
+            uint64x1_t v7 = *reinterpret_cast<const uint64x1_t*>(d);
+            d = d + 8;
+
+            uint64x2_t z01 = vcombine_u64(v0, v1);
+            uint64x2_t z23 = vcombine_u64(v2, v3);
+            uint64x2_t z45 = vcombine_u64(v4, v5);
+            uint64x2_t z67 = vcombine_u64(v6, v7);
+
+            uint32x4_t RowSums0L_pada = vmovq_n_u32(0);
+            RowSums0L_pada = vpadalq_u16(RowSums0L_pada, vpaddlq_u8(vreinterpretq_u8_u64(z01)));
+
+            uint32x4_t RowSums0L_ext = vextq_u32(RowSums0L_pada, RowSums0L_pada, 1);
+            uint32x4_t RowSums0L_add = vaddq_u32(RowSums0L_pada, RowSums0L_ext);
+            uint32x2_t RowSums0L = {vdups_laneq_u32(RowSums0L_add, 0),
+                                    vdups_laneq_u32(RowSums0L_add, 2)};
+
+            uint32x4_t RowSums0H_pada = vmovq_n_u32(0);
+            RowSums0H_pada = vpadalq_u16(RowSums0H_pada, vpaddlq_u8(vreinterpretq_u8_u64(z23)));
+
+            uint32x4_t RowSums0H_ext = vextq_u32(RowSums0H_pada, RowSums0H_pada, 1);
+            uint32x4_t RowSums0H_add = vaddq_u32(RowSums0H_pada, RowSums0H_ext);
+            uint32x2_t RowSums0H = {vdups_laneq_u32(RowSums0H_add, 0),
+                                    vdups_laneq_u32(RowSums0H_add, 2)};
+
+            RowSums0 = vaddq_u32(RowSums0, vcombine_u32(RowSums0L, RowSums0H));
+
+            uint32x4_t RowSums1L_pada = vmovq_n_u32(0);
+            RowSums1L_pada = vpadalq_u16(RowSums1L_pada, vpaddlq_u8(vreinterpretq_u8_u64(z45)));
+
+            uint32x4_t RowSums1L_ext = vextq_u32(RowSums1L_pada, RowSums1L_pada, 1);
+            uint32x4_t RowSums1L_add = vaddq_u32(RowSums1L_pada, RowSums1L_ext);
+            uint32x2_t RowSums1L = {vdups_laneq_u32(RowSums1L_add, 0),
+                                    vdups_laneq_u32(RowSums1L_add, 2)};
+
+            uint32x4_t RowSums1H_pada = vmovq_n_u32(0);
+            RowSums1H_pada = vpadalq_u16(RowSums1H_pada, vpaddlq_u8(vreinterpretq_u8_u64(z67)));
+
+            uint32x4_t RowSums1H_ext = vextq_u32(RowSums1H_pada, RowSums1H_pada, 1);
+            uint32x4_t RowSums1H_add = vaddq_u32(RowSums1H_pada, RowSums1H_ext);
+            uint32x2_t RowSums1H = {vdups_laneq_u32(RowSums1H_add, 0),
+                                    vdups_laneq_u32(RowSums1H_add, 2)};
+
+            RowSums1 = vaddq_u32(RowSums1, vcombine_u32(RowSums1L, RowSums1H));
+
+            D += 64;
+        }
+
+        vst1q_s32(RowSumBuffer, vreinterpretq_s32_u32(RowSums0));
+        vst1q_s32(&RowSumBuffer[4], vreinterpretq_s32_u32(RowSums1));
+
+        RowSumBuffer += 8;
+
+        A = A + lda * 8;
+        CountM -= 8;
+    }
+
+    //
+    // Process four rows of matrix A.
+    //
+    // The buffer is packed as a series of 32 byte vectors where four rows are
+    // interleaved with the following pattern:
+    //
+    //      [ A0 A1 A2 A3 A4 A5 A6 A7 ]
+    //      [ B0 B1 B2 B3 B4 B5 B6 B7 ]
+    //      [ C0 C1 C2 C3 C4 C5 C6 C7 ]
+    //      [ D0 D1 D2 D3 D4 D5 D6 D7 ]
+    //
+    // This pattern is repeated (CountK / 8) times.
+    //
+    // If CountK is not aligned to a multiple of eight, then the vector is padded
+    // with zeroes.
+    //
+
+    if (CountM >= 4) {
+        const uint8_t* a0 = A;
+        const uint8_t* a1 = a0 + lda;
+        const uint8_t* a2 = a1 + lda;
+        const uint8_t* a3 = a2 + lda;
+
+        size_t k = CountK;
+        uint32x4_t RowSums = vmovq_n_u32(0);
+
+        while (k >= 16) {
+            uint64x2_t v0 = vld1q_u64(reinterpret_cast<const uint64_t*>(a0));
+            a0 += 16;
+            uint64x2_t v1 = vld1q_u64(reinterpret_cast<const uint64_t*>(a1));
+            a1 += 16;
+            uint64x2_t v2 = vld1q_u64(reinterpret_cast<const uint64_t*>(a2));
+            a2 += 16;
+            uint64x2_t v3 = vld1q_u64(reinterpret_cast<const uint64_t*>(a3));
+            a3 += 16;
+
+            uint64x2_t z0 = vzip1q_u64(v0, v1);
+            uint64x2_t z1 = vzip2q_u64(v0, v1);
+            uint64x2_t z2 = vzip1q_u64(v2, v3);
+            uint64x2_t z3 = vzip2q_u64(v2, v3);
+
+            vst1q_u8(&D[0], vreinterpretq_u8_u64(z0));
+            vst1q_u8(&D[16], vreinterpretq_u8_u64(z2));
+            vst1q_u8(&D[32], vreinterpretq_u8_u64(z1));
+            vst1q_u8(&D[48], vreinterpretq_u8_u64(z3));
+
+            uint32x4_t RowSumsL_pada = vmovq_n_u32(0);
+            RowSumsL_pada = vpadalq_u16(RowSumsL_pada, vpaddlq_u8(vreinterpretq_u8_u64(z0)));
+            RowSumsL_pada = vpadalq_u16(RowSumsL_pada, vpaddlq_u8(vreinterpretq_u8_u64(z1)));
+
+            uint32x4_t RowSumsL_ext = vextq_u32(RowSumsL_pada, RowSumsL_pada, 1);
+            uint32x4_t RowSumsL_add = vaddq_u32(RowSumsL_pada, RowSumsL_ext);
+            uint32x2_t RowSumsL = {vdups_laneq_u32(RowSumsL_add, 0),
+                                   vdups_laneq_u32(RowSumsL_add, 2)};
+
+            uint32x4_t RowSumsH_pada = vmovq_n_u32(0);
+            RowSumsH_pada = vpadalq_u16(RowSumsH_pada, vpaddlq_u8(vreinterpretq_u8_u64(z2)));
+            RowSumsH_pada = vpadalq_u16(RowSumsH_pada, vpaddlq_u8(vreinterpretq_u8_u64(z3)));
+
+            uint32x4_t RowSumsH_ext = vextq_u32(RowSumsH_pada, RowSumsH_pada, 1);
+            uint32x4_t RowSumsH_add = vaddq_u32(RowSumsH_pada, RowSumsH_ext);
+            uint32x2_t RowSumsH = {vdups_laneq_u32(RowSumsH_add, 0),
+                                   vdups_laneq_u32(RowSumsH_add, 2)};
+
+            RowSums = vaddq_u32(RowSums, vcombine_u32(RowSumsL, RowSumsH));
+
+            D += 64;
+            k -= 16;
+        }
+
+        while (k >= 8) {
+            uint64x1_t v0 = *reinterpret_cast<const uint64x1_t*>(a0);
+            a0 += 8;
+            uint64x1_t v1 = *reinterpret_cast<const uint64x1_t*>(a1);
+            a1 += 8;
+            uint64x1_t v2 = *reinterpret_cast<const uint64x1_t*>(a2);
+            a2 += 8;
+            uint64x1_t v3 = *reinterpret_cast<const uint64x1_t*>(a3);
+            a3 += 8;
+
+            *reinterpret_cast<uint64x1_t*>(&D[0]) = v0;
+            *reinterpret_cast<uint64x1_t*>(&D[8]) = v1;
+            *reinterpret_cast<uint64x1_t*>(&D[16]) = v2;
+            *reinterpret_cast<uint64x1_t*>(&D[24]) = v3;
+
+            uint64x2_t z01 = vcombine_u64(v0, v1);
+            uint64x2_t z23 = vcombine_u64(v2, v3);
+
+            uint32x4_t RowSumsL_pada = vmovq_n_u32(0);
+            RowSumsL_pada = vpadalq_u16(RowSumsL_pada, vpaddlq_u8(vreinterpretq_u8_u64(z01)));
+
+            uint32x4_t RowSumsL_ext = vextq_u32(RowSumsL_pada, RowSumsL_pada, 1);
+            uint32x4_t RowSumsL_add = vaddq_u32(RowSumsL_pada, RowSumsL_ext);
+            uint32x2_t RowSumsL = {vdups_laneq_u32(RowSumsL_add, 0),
+                                   vdups_laneq_u32(RowSumsL_add, 2)};
+
+            uint32x4_t RowSumsH_pada = vmovq_n_u32(0);
+            RowSumsH_pada = vpadalq_u16(RowSumsH_pada, vpaddlq_u8(vreinterpretq_u8_u64(z23)));
+
+            uint32x4_t RowSumsH_ext = vextq_u32(RowSumsH_pada, RowSumsH_pada, 1);
+            uint32x4_t RowSumsH_add = vaddq_u32(RowSumsH_pada, RowSumsH_ext);
+            uint32x2_t RowSumsH = {vdups_laneq_u32(RowSumsH_add, 0),
+                                   vdups_laneq_u32(RowSumsH_add, 2)};
+
+            RowSums = vaddq_u32(RowSums, vcombine_u32(RowSumsL, RowSumsH));
+
+            D += 32;
+            k -= 8;
+        }
+
+        if (k > 0) {
+            //
+            // Copy the remaining bytes with zero padding.
+            //
+            uint8_t* d = D;
+
+            vst1q_u8(d, vmovq_n_u8(0));
+            vst1q_u8(&d[16], vmovq_n_u8(0));
+
+            while (k > 0) {
+                d[0] = *a0++;
+                d[8] = *a1++;
+                d[16] = *a2++;
+                d[24] = *a3++;
+                d += 1;
+                k -= 1;
+            }
+
+            d = D;
+            uint64x1_t v0 = *reinterpret_cast<const uint64x1_t*>(d);
+            d = d + 8;
+            uint64x1_t v1 = *reinterpret_cast<const uint64x1_t*>(d);
+            d = d + 8;
+            uint64x1_t v2 = *reinterpret_cast<const uint64x1_t*>(d);
+            d = d + 8;
+            uint64x1_t v3 = *reinterpret_cast<const uint64x1_t*>(d);
+            d = d + 8;
+
+            uint64x2_t z01 = vcombine_u64(v0, v1);
+            uint64x2_t z23 = vcombine_u64(v2, v3);
+
+            uint32x4_t RowSums0L_pada = vmovq_n_u32(0);
+            RowSums0L_pada = vpadalq_u16(RowSums0L_pada, vpaddlq_u8(vreinterpretq_u8_u64(z01)));
+
+            uint32x4_t RowSums0L_ext = vextq_u32(RowSums0L_pada, RowSums0L_pada, 1);
+            uint32x4_t RowSums0L_add = vaddq_u32(RowSums0L_pada, RowSums0L_ext);
+            uint32x2_t RowSums0L = {vdups_laneq_u32(RowSums0L_add, 0),
+                                    vdups_laneq_u32(RowSums0L_add, 2)};
+
+            uint32x4_t RowSums0H_pada = vmovq_n_u32(0);
+            RowSums0H_pada = vpadalq_u16(RowSums0H_pada, vpaddlq_u8(vreinterpretq_u8_u64(z23)));
+
+            uint32x4_t RowSums0H_ext = vextq_u32(RowSums0H_pada, RowSums0H_pada, 1);
+            uint32x4_t RowSums0H_add = vaddq_u32(RowSums0H_pada, RowSums0H_ext);
+            uint32x2_t RowSums0H = {vdups_laneq_u32(RowSums0H_add, 0),
+                                    vdups_laneq_u32(RowSums0H_add, 2)};
+
+            RowSums = vaddq_u32(RowSums, vcombine_u32(RowSums0L, RowSums0H));
+
+            D += 32;
+        }
+
+        vst1q_s32(RowSumBuffer, vreinterpretq_s32_u32(RowSums));
+        RowSumBuffer += 4;
+
+        A = A + lda * 4;
+        CountM -= 4;
+    }
+
+    //
+    // Process two rows of matrix A.
+    //
+    // The buffer is packed as a series of 16 byte vectors where two rows are
+    // interleaved with the following pattern:
+    //
+    //      [ A0 A1 A2 A3 A4 A5 A6 A7 ]
+    //      [ B0 B1 B2 B3 B4 B5 B6 B7 ]
+    //
+    // This pattern is repeated (CountK / 8) times.
+    //
+    // If CountK is not aligned to a multiple of eight, then the vector is padded
+    // with zeroes.
+    //
+
+    if (CountM >= 2) {
+        const uint8_t* a0 = A;
+        const uint8_t* a1 = a0 + lda;
+
+        size_t k = CountK;
+        uint32x2_t RowSums = vmov_n_u32(0);
+
+        while (k >= 16) {
+            uint64x2_t v0 = vld1q_u64(reinterpret_cast<const uint64_t*>(a0));
+            a0 += 16;
+            uint64x2_t v1 = vld1q_u64(reinterpret_cast<const uint64_t*>(a1));
+            a1 += 16;
+
+            uint64x2_t z0 = vzip1q_u64(v0, v1);
+            uint64x2_t z1 = vzip2q_u64(v0, v1);
+
+            vst1q_u8(&D[0], vreinterpretq_u8_u64(z0));
+            vst1q_u8(&D[16], vreinterpretq_u8_u64(z1));
+
+            uint32x4_t RowSumsL_pada = vmovq_n_u32(0);
+            RowSumsL_pada = vpadalq_u16(RowSumsL_pada, vpaddlq_u8(vreinterpretq_u8_u64(z0)));
+            RowSumsL_pada = vpadalq_u16(RowSumsL_pada, vpaddlq_u8(vreinterpretq_u8_u64(z1)));
+
+            uint32x4_t RowSumsL_ext = vextq_u32(RowSumsL_pada, RowSumsL_pada, 1);
+            uint32x4_t RowSumsL_add = vaddq_u32(RowSumsL_pada, RowSumsL_ext);
+            uint32x2_t RowSumsL = {vdups_laneq_u32(RowSumsL_add, 0),
+                                   vdups_laneq_u32(RowSumsL_add, 2)};
+
+            RowSums = vadd_u32(RowSums, RowSumsL);
+
+            D += 32;
+            k -= 16;
+        }
+
+        while (k >= 8) {
+            uint64x1_t v0 = *reinterpret_cast<const uint64x1_t*>(a0);
+            a0 += 8;
+            uint64x1_t v1 = *reinterpret_cast<const uint64x1_t*>(a1);
+            a1 += 8;
+
+            *reinterpret_cast<uint64x1_t*>(&D[0]) = v0;
+            *reinterpret_cast<uint64x1_t*>(&D[8]) = v1;
+
+            uint64x2_t z01 = vcombine_u64(v0, v1);
+            uint32x4_t RowSumsL_pada = vmovq_n_u32(0);
+            RowSumsL_pada = vpadalq_u16(RowSumsL_pada, vpaddlq_u8(vreinterpretq_u8_u64(z01)));
+
+            uint32x4_t RowSumsL_ext = vextq_u32(RowSumsL_pada, RowSumsL_pada, 1);
+            uint32x4_t RowSumsL_add = vaddq_u32(RowSumsL_pada, RowSumsL_ext);
+            uint32x2_t RowSumsL = {vdups_laneq_u32(RowSumsL_add, 0),
+                                   vdups_laneq_u32(RowSumsL_add, 2)};
+
+            RowSums = vadd_u32(RowSums, RowSumsL);
+
+            D += 16;
+            k -= 8;
+        }
+
+        if (k > 0) {
+            //
+            // Zero pad the remaining elements to make 8 columns.
+            //
+
+            uint8_t* d = PaddedMatrixAData;
+            vst1q_u8(PaddedMatrixAData, vmovq_n_u8(0));
+
+            while (k > 0) {
+                d[0] = *a0++;
+                d[8] = *a1++;
+
+                d += 1;
+                k -= 1;
+            }
+
+            d = PaddedMatrixAData;
+            uint64x1_t v0 = *reinterpret_cast<const uint64x1_t*>(d);
+            d = d + 8;
+            uint64x1_t v1 = *reinterpret_cast<const uint64x1_t*>(d);
+            d = d + 8;
+
+            uint64x2_t z01 = vcombine_u64(v0, v1);
+            uint32x4_t RowSumsL_pada = vmovq_n_u32(0);
+            RowSumsL_pada = vpadalq_u16(RowSumsL_pada, vpaddlq_u8(vreinterpretq_u8_u64(z01)));
+
+            uint32x4_t RowSumsL_ext = vextq_u32(RowSumsL_pada, RowSumsL_pada, 1);
+            uint32x4_t RowSumsL_add = vaddq_u32(RowSumsL_pada, RowSumsL_ext);
+            uint32x2_t RowSumsL = {vdups_laneq_u32(RowSumsL_add, 0),
+                                   vdups_laneq_u32(RowSumsL_add, 2)};
+
+            RowSums = vadd_u32(RowSums, RowSumsL);
+
+            uint8x16_t PackedVector = vld1q_u8(PaddedMatrixAData);
+            vst1q_u8(D, PackedVector);
+
+            D += 16;
+        }
+
+        vst1_s32(RowSumBuffer, vreinterpret_s32_u32(RowSums));
+        RowSumBuffer += 2;
+
+        A = A + lda * 2;
+        CountM -= 2;
+    }
+
+    //
+    // Process one row of matrix A.
+    //
+    // The buffer is packed as a series of 8 byte with the following pattern:
+    //
+    //      [ A0 A1 A2 A3 A4 A5 A6 A7 ]
+    //
+    // This pattern is repeated (CountK / 8) times.
+    //
+    // If CountK is not aligned to a multiple of 8, then the vector is padded
+    // with zeroes.
+    //
+
+    if (CountM > 0) {
+        // No need to pad the rows to 2, the .S takes care of zero pdding
+        const uint8_t* a = A;
+        size_t k = CountK;
+        uint32x4_t RowSums = vmovq_n_u32(0);
+
+        while (k >= 16) {
+            uint8x16_t v = vld1q_u8(a);
+            a += 16;
+
+            vst1q_u8(D, v);
+
+            RowSums = vpadalq_u16(RowSums, vpaddlq_u8(v));
+
+            D += 16;
+            k -= 16;
+        }
+
+        if (k > 0) {
+            //
+            // Copy the remaining bytes to the zero padded stack buffer.
+            //
+
+            vst1q_u8(PaddedMatrixAData, vmovq_n_u8(0));
+
+            for (size_t kk = 0; kk < k; kk++) {
+                PaddedMatrixAData[kk] = a[kk];
+            }
+
+            uint8x16_t v = vld1q_u8(PaddedMatrixAData);
+            vst1q_u8(D, v);
+
+            RowSums = vpadalq_u16(RowSums, vpaddlq_u8(v));
+        }
+
+        *RowSumBuffer = int32_t(vaddvq_u32(RowSums));
+    }
+}
+
+MLAS_FORCEINLINE
+void
+MlasGemmU8X8CopyPackBProcessUmmla(MLAS_GEMM_U8X8_KERNEL_UMMLA::PackedBType* D,
+                                  uint8x8_t BytesRow[8],
+                                  uint8x16_t BitFlipVector,
+                                  uint32x4_t ColumnSums[2])
+{
+    uint8x16_t v02 = veorq_u8(vcombine_u8(BytesRow[0], BytesRow[2]), BitFlipVector);
+    uint8x16_t v13 = veorq_u8(vcombine_u8(BytesRow[1], BytesRow[3]), BitFlipVector);
+
+    uint8x16_t v46 = veorq_u8(vcombine_u8(BytesRow[4], BytesRow[6]), BitFlipVector);
+    uint8x16_t v57 = veorq_u8(vcombine_u8(BytesRow[5], BytesRow[7]), BitFlipVector);
+
+    uint8x16x2_t zw1 = vzipq_u8(v02, v13);
+    uint16x8x2_t zd1 =
+        vzipq_u16(vreinterpretq_u16_u8(zw1.val[0]), vreinterpretq_u16_u8(zw1.val[1]));
+
+    uint8x16x2_t zw2 = vzipq_u8(v46, v57);
+    uint16x8x2_t zd2 =
+        vzipq_u16(vreinterpretq_u16_u8(zw2.val[0]), vreinterpretq_u16_u8(zw2.val[1]));
+
+    uint32x4x2_t zd3 =
+        vzipq_u32(vreinterpretq_u32_u16(zd1.val[0]), vreinterpretq_u32_u16(zd2.val[0]));
+    uint32x4x2_t zd4 =
+        vzipq_u32(vreinterpretq_u32_u16(zd1.val[1]), vreinterpretq_u32_u16(zd2.val[1]));
+
+    vst1q_u8(&D[0], vreinterpretq_u8_u32(zd3.val[0]));
+    vst1q_u8(&D[16], vreinterpretq_u8_u32(zd3.val[1]));
+    vst1q_u8(&D[32], vreinterpretq_u8_u32(zd4.val[0]));
+    vst1q_u8(&D[48], vreinterpretq_u8_u32(zd4.val[1]));
+
+    uint32x4_t ColSums0L_pada = vmovq_n_u32(0);
+    ColSums0L_pada = vpadalq_u16(ColSums0L_pada, vpaddlq_u8(vreinterpretq_u8_u32(zd3.val[0])));
+    uint32x4_t ColSums0L_ext = vextq_u32(ColSums0L_pada, ColSums0L_pada, 1);
+    uint32x4_t ColSums0L_add = vaddq_u32(ColSums0L_pada, ColSums0L_ext);
+    uint32x2_t ColSums0L = {vdups_laneq_u32(ColSums0L_add, 0), vdups_laneq_u32(ColSums0L_add, 2)};
+
+    uint32x4_t ColSums0H_pada = vmovq_n_u32(0);
+    ColSums0H_pada = vpadalq_u16(ColSums0H_pada, vpaddlq_u8(vreinterpretq_u8_u32(zd3.val[1])));
+    uint32x4_t ColSums0H_ext = vextq_u32(ColSums0H_pada, ColSums0H_pada, 1);
+    uint32x4_t ColSums0H_add = vaddq_u32(ColSums0H_pada, ColSums0H_ext);
+    uint32x2_t ColSums0H = {vdups_laneq_u32(ColSums0H_add, 0), vdups_laneq_u32(ColSums0H_add, 2)};
+
+    ColumnSums[0] = vaddq_u32(ColumnSums[0], vcombine_u32(ColSums0L, ColSums0H));
+
+    uint32x4_t ColSums1L_pada = vmovq_n_u32(0);
+    ColSums1L_pada = vpadalq_u16(ColSums1L_pada, vpaddlq_u8(vreinterpretq_u8_u32(zd4.val[0])));
+    uint32x4_t ColSums1L_ext = vextq_u32(ColSums1L_pada, ColSums1L_pada, 1);
+    uint32x4_t ColSums1L_add = vaddq_u32(ColSums1L_pada, ColSums1L_ext);
+    uint32x2_t ColSums1L = {vdups_laneq_u32(ColSums1L_add, 0), vdups_laneq_u32(ColSums1L_add, 2)};
+
+    uint32x4_t ColSums1H_pada = vmovq_n_u32(0);
+    ColSums1H_pada = vpadalq_u16(ColSums1H_pada, vpaddlq_u8(vreinterpretq_u8_u32(zd4.val[1])));
+    uint32x4_t ColSums1H_ext = vextq_u32(ColSums1H_pada, ColSums1H_pada, 1);
+    uint32x4_t ColSums1H_add = vaddq_u32(ColSums1H_pada, ColSums1H_ext);
+    uint32x2_t ColSums1H = {vdups_laneq_u32(ColSums1H_add, 0), vdups_laneq_u32(ColSums1H_add, 2)};
+
+    ColumnSums[1] = vaddq_u32(ColumnSums[1], vcombine_u32(ColSums1L, ColSums1H));
+}
+
+template <>
+void
+MlasGemmQuantCopyPackB<MLAS_GEMM_U8X8_KERNEL_UMMLA>(MLAS_GEMM_U8X8_KERNEL_UMMLA::PackedBType* D,
+                                                    const uint8_t* B,
+                                                    size_t ldb,
+                                                    size_t CountN,
+                                                    size_t CountK,
+                                                    int32_t* ColumnSumBuffer,
+                                                    bool BIsSigned)
+{
+    const uint8x16_t BitFlipVector = vdupq_n_u8(BIsSigned ? 0x80 : 0);
+    uint8x8_t BytesRow[8];
+
+    //
+    // Copy data from matrix B into the destination buffer 8x2 blocks at a
+    // time.
+    //
+    //
+    while (CountN >= 8) {
+        const uint8_t* b = B;
+        size_t k = CountK;
+        uint32x4_t ColumnSums[2];
+        ColumnSums[0] = vmovq_n_u32(0);
+        ColumnSums[1] = vmovq_n_u32(0);
+
+        while (k >= 8) {
+            BytesRow[0] = vld1_u8(&b[ldb * 0]);
+            BytesRow[1] = vld1_u8(&b[ldb * 1]);
+            BytesRow[2] = vld1_u8(&b[ldb * 2]);
+            BytesRow[3] = vld1_u8(&b[ldb * 3]);
+            BytesRow[4] = vld1_u8(&b[ldb * 4]);
+            BytesRow[5] = vld1_u8(&b[ldb * 5]);
+            BytesRow[6] = vld1_u8(&b[ldb * 6]);
+            BytesRow[7] = vld1_u8(&b[ldb * 7]);
+
+            MlasGemmU8X8CopyPackBProcessUmmla(D, BytesRow, BitFlipVector, ColumnSums);
+
+            D += 64;
+            b += ldb * 8;
+            k -= 8;
+        }
+
+        if (k > 0) {
+            // Pad k to 8
+
+            BytesRow[0] = vld1_u8(&b[ldb * 0]);
+            BytesRow[1] = (k >= 2) ? vld1_u8(&b[ldb * 1]) : vget_low_u8(BitFlipVector);
+            BytesRow[2] = (k >= 3) ? vld1_u8(&b[ldb * 2]) : vget_low_u8(BitFlipVector);
+            BytesRow[3] = (k >= 4) ? vld1_u8(&b[ldb * 3]) : vget_low_u8(BitFlipVector);
+            BytesRow[4] = (k >= 5) ? vld1_u8(&b[ldb * 4]) : vget_low_u8(BitFlipVector);
+            BytesRow[5] = (k >= 6) ? vld1_u8(&b[ldb * 5]) : vget_low_u8(BitFlipVector);
+            BytesRow[6] = (k >= 7) ? vld1_u8(&b[ldb * 6]) : vget_low_u8(BitFlipVector);
+            BytesRow[7] = vget_low_u8(BitFlipVector);
+
+            MlasGemmU8X8CopyPackBProcessUmmla(D, BytesRow, BitFlipVector, ColumnSums);
+
+            D += 64;
+        }
+
+        // Zero pad the output buffer to a multiple of PackedK if the above
+        // processed an odd number of four row bundles.
+        //
+        vst1q_s32(&ColumnSumBuffer[0], vreinterpretq_s32_u32(ColumnSums[0]));
+        vst1q_s32(&ColumnSumBuffer[4], vreinterpretq_s32_u32(ColumnSums[1]));
+
+        ColumnSumBuffer += 8;
+
+        B += 8;
+        CountN -= 8;
+    }
+
+    //
+    // Process the remaining columns of matrix B.
+    //
+
+    if (CountN > 0) {
+        const uint8_t* b = B;
+        size_t k = CountK;
+        uint8_t PaddedMatrixBData[64];
+        uint32x4_t ColumnSums[2];
+
+        vst1q_u8(&PaddedMatrixBData[0], BitFlipVector);
+        vst1q_u8(&PaddedMatrixBData[16], BitFlipVector);
+        vst1q_u8(&PaddedMatrixBData[32], BitFlipVector);
+        vst1q_u8(&PaddedMatrixBData[48], BitFlipVector);
+
+        ColumnSums[0] = vmovq_n_u32(0);
+        ColumnSums[1] = vmovq_n_u32(0);
+
+        //
+        // Interleave rows of matrix B using an intermediate zero padded stack
+        // buffer and write to the packed buffer.
+        //
+
+        while (k > 0) {
+            const uint8_t* bcopy0 = &b[ldb * 0];
+            const uint8_t* bcopy1 = &b[ldb * 1];
+            const uint8_t* bcopy2 = &b[ldb * 2];
+            const uint8_t* bcopy3 = &b[ldb * 3];
+            const uint8_t* bcopy4 = &b[ldb * 4];
+            const uint8_t* bcopy5 = &b[ldb * 5];
+            const uint8_t* bcopy6 = &b[ldb * 6];
+            const uint8_t* bcopy7 = &b[ldb * 7];
+
+            if (k >= 8) {
+                b += ldb * 8;
+                k -= 8;
+
+            } else {
+                vst1q_u8(&PaddedMatrixBData[0], BitFlipVector);
+                vst1q_u8(&PaddedMatrixBData[16], BitFlipVector);
+                vst1q_u8(&PaddedMatrixBData[32], BitFlipVector);
+                vst1q_u8(&PaddedMatrixBData[48], BitFlipVector);
+
+                bcopy1 = (k >= 2) ? bcopy1 : &PaddedMatrixBData[56];
+                bcopy2 = (k >= 3) ? bcopy2 : &PaddedMatrixBData[56];
+                bcopy3 = (k >= 4) ? bcopy3 : &PaddedMatrixBData[56];
+                bcopy4 = (k >= 5) ? bcopy4 : &PaddedMatrixBData[56];
+                bcopy5 = (k >= 6) ? bcopy5 : &PaddedMatrixBData[56];
+                bcopy6 = (k >= 7) ? bcopy6 : &PaddedMatrixBData[56];
+                bcopy7 = &PaddedMatrixBData[56];
+
+                k = 0;
+            }
+
+            uint8_t* padded = PaddedMatrixBData;
+            uint8_t* padded_end = padded + CountN;
+            do {
+                padded[0] = *bcopy0++;
+                padded[8] = *bcopy1++;
+                padded[16] = *bcopy2++;
+                padded[24] = *bcopy3++;
+                padded[32] = *bcopy4++;
+                padded[40] = *bcopy5++;
+                padded[48] = *bcopy6++;
+                padded[56] = *bcopy7++;
+
+            } while (++padded < padded_end);
+
+            BytesRow[0] = vld1_u8(&PaddedMatrixBData[0]);
+            BytesRow[1] = vld1_u8(&PaddedMatrixBData[8]);
+            BytesRow[2] = vld1_u8(&PaddedMatrixBData[16]);
+            BytesRow[3] = vld1_u8(&PaddedMatrixBData[24]);
+            BytesRow[4] = vld1_u8(&PaddedMatrixBData[32]);
+            BytesRow[5] = vld1_u8(&PaddedMatrixBData[40]);
+            BytesRow[6] = vld1_u8(&PaddedMatrixBData[48]);
+            BytesRow[7] = vld1_u8(&PaddedMatrixBData[56]);
+
+            MlasGemmU8X8CopyPackBProcessUmmla(D, BytesRow, BitFlipVector, ColumnSums);
+
+            D += 64;
+        }
+
+        vst1q_s32(&ColumnSumBuffer[0], vreinterpretq_s32_u32(ColumnSums[0]));
+        vst1q_s32(&ColumnSumBuffer[4], vreinterpretq_s32_u32(ColumnSums[1]));
+    }
+}
+
+template <>
+MLAS_FORCEINLINE size_t
+MlasGemmQuantKernel<MLAS_GEMM_U8X8_KERNEL_UMMLA>(const MLAS_GEMM_U8X8_KERNEL_UMMLA::PackedAType* A,
+                                                 const MLAS_GEMM_U8X8_KERNEL_UMMLA::PackedBType* B,
+                                                 int32_t* C,
+                                                 size_t PackedCountK,
+                                                 size_t CountM,
+                                                 size_t CountN,
+                                                 size_t ldc,
+                                                 const int32_t* RowSumBuffer,
+                                                 const int32_t* ColumnSumBuffer,
+                                                 const int32_t* ZeroPointB,
+                                                 bool ZeroMode)
+{
+    size_t RowsHandled;
+
+    if (ZeroMode) {
+        RowsHandled = MlasGemmU8X8KernelUmmlaZero(A, B, C, PackedCountK, CountM, CountN, ldc,
+                                                  RowSumBuffer, ColumnSumBuffer, ZeroPointB);
+    } else {
+        RowsHandled = MlasGemmU8X8KernelUmmlaAdd(A, B, C, PackedCountK, CountM, CountN, ldc,
+                                                 RowSumBuffer, ColumnSumBuffer, ZeroPointB);
+    }
+
+    return RowsHandled;
+}
+
+const MLAS_GEMM_QUANT_DISPATCH MlasGemmU8X8DispatchUmmla = {
+    MlasGemmQuantOperation<MLAS_GEMM_U8X8_KERNEL_UMMLA>,
+    MlasGemmQuantPackedOperation<MLAS_GEMM_U8X8_KERNEL_UMMLA>,
+    MlasGemmQuantCopyPackB<MLAS_GEMM_U8X8_KERNEL_UMMLA>,
+    MLAS_GEMM_U8X8_KERNEL_UMMLA::PackedK,
+    MLAS_GEMM_U8X8_KERNEL_UMMLA::PackedStrides.K,
+    8};
diff --git a/onnxruntime/core/mlas/lib/sqnbitgemm.cpp b/onnxruntime/core/mlas/lib/sqnbitgemm.cpp
new file mode 100644
index 0000000000000..f964b1affec31
--- /dev/null
+++ b/onnxruntime/core/mlas/lib/sqnbitgemm.cpp
@@ -0,0 +1,144 @@
+/*++
+
+Copyright (c) Microsoft Corporation. All rights reserved.
+
+Licensed under the MIT License.
+
+Module Name:
+
+    sqnbitgemm.cpp
+
+Abstract:
+
+    This module implements the float/quantized n-bit integer matrix
+    multiplication hardware agnostic entrypoint, MlasSQNBitGemmBatch.
+--*/
+
+#include "sqnbitgemm.h"
+
+namespace
+{
+
+// Get quantization variant based on `BlkBitWidth` and `BlkLen`.
+// Return -1 if the input values are unsupported.
+int32_t
+GetDispatchQuantVariant(size_t BlkBitWidth, size_t BlkLen)
+{
+    int32_t type = -1;
+    if (BlkBitWidth == 4 && BlkLen == 16) {
+        type = QuantVariant_BitWidth4_BlockSize16;
+    } else if (BlkBitWidth == 4 && BlkLen == 32) {
+        type = QuantVariant_BitWidth4_BlockSize32;
+    } else if (BlkBitWidth == 4 && BlkLen == 64) {
+        type = QuantVariant_BitWidth4_BlockSize64;
+    } else if (BlkBitWidth == 4 && BlkLen == 128) {
+        type = QuantVariant_BitWidth4_BlockSize128;
+    } else if (BlkBitWidth == 4 && BlkLen == 256) {
+        type = QuantVariant_BitWidth4_BlockSize256;
+    }
+
+    return type;
+}
+
+}  // namespace
+
+void MLASCALL
+MlasSQNBitGemmBatch(
+    const size_t M,
+    const size_t N,
+    const size_t K,
+    const size_t BatchN,
+    const size_t BlkBitWidth,
+    const size_t BlkLen,
+    const MLAS_SQNBIT_GEMM_DATA_PARAMS* DataParams,
+    MLAS_THREADPOOL* ThreadPool
+)
+{
+    const int32_t QuantVariant = GetDispatchQuantVariant(BlkBitWidth, BlkLen);
+    MLAS_SQNBIT_GEMM_OPERATION* const Operation = GetMlasPlatform().SQNBitGemmDispatch->Operations[QuantVariant];
+
+    if (ThreadPool == nullptr) {
+        for (size_t gemm_i = 0; gemm_i < BatchN; gemm_i++) {
+            auto Data = &DataParams[gemm_i];
+            Operation(K, Data, 0, M, 0, N);
+        }
+        return;
+    }
+
+    //
+    // Compute the number of target threads given the complexity of the SGEMM
+    // operation. Small requests should run using the single threaded path.
+    //
+
+    const double Complexity = double(M) * double(N) * double(K) * double(BatchN);
+
+    ptrdiff_t TargetThreadCount = ptrdiff_t(Complexity / double(MLAS_QGEMM_THREAD_COMPLEXITY)) + 1;
+
+    ptrdiff_t MaximumThreadCount = MlasGetMaximumThreadCount(ThreadPool) * 8;
+
+    if (TargetThreadCount >= MaximumThreadCount) {
+        TargetThreadCount = MaximumThreadCount;
+    }
+
+    ptrdiff_t ThreadsPerGemm = TargetThreadCount / BatchN;
+    if (ThreadsPerGemm < 1) {
+        ThreadsPerGemm = 1;
+    }
+
+    constexpr size_t StrideM = 128;
+
+    size_t nc = N;
+    if (ThreadsPerGemm > 1) {
+        // more than one thread per GEMM
+
+        const size_t BlockedM = MlasDivRoundup(M, StrideM);
+        const size_t max_nc = MlasDivRoundup(N * BlockedM, ThreadsPerGemm);
+        if (max_nc < nc) {
+            nc = std::min(
+                nc, MlasDivRoundup(max_nc, MLAS_QGEMM_STRIDEN_THREAD_ALIGN) *
+                        MLAS_QGEMM_STRIDEN_THREAD_ALIGN
+            );
+        }
+    }
+    const size_t StrideN = nc;
+
+    const size_t ThreadCountM = MlasDivRoundup(M, StrideM);
+    const size_t ThreadCountN = MlasDivRoundup(N, StrideN);
+    ThreadsPerGemm = ThreadCountM * ThreadCountN;
+
+    MlasTrySimpleParallel(ThreadPool, ThreadsPerGemm * BatchN, [&](ptrdiff_t tid) {
+        const auto gemm_i = tid / ThreadsPerGemm;
+        const auto blk_i = tid % ThreadsPerGemm;
+        auto Data = &DataParams[gemm_i];
+
+        const ptrdiff_t ThreadIdN = blk_i / ThreadCountM;
+        const ptrdiff_t ThreadIdM = blk_i % ThreadCountM;
+
+        const size_t RangeStartM = ThreadIdM * StrideM;
+        const size_t RangeCountM = std::min(M - RangeStartM, (size_t)StrideM);
+
+        const size_t RangeStartN = ThreadIdN * StrideN;
+        const size_t RangeCountN = std::min(N - RangeStartN, (size_t)StrideN);
+
+        Operation(K, Data, RangeStartM, RangeCountM, RangeStartN, RangeCountN);
+    });
+}
+
+bool MLASCALL
+MlasIsSQNBitGemmAvailable(
+    size_t BlkBitWidth,
+    size_t BlkLen
+)
+{
+    const int32_t QuantVariant = GetDispatchQuantVariant(BlkBitWidth, BlkLen);
+    if (QuantVariant == -1) {
+        return false;
+    }
+
+    if (GetMlasPlatform().SQNBitGemmDispatch == nullptr ||
+        GetMlasPlatform().SQNBitGemmDispatch->Operations[QuantVariant] == nullptr) {
+        return false;
+    }
+
+    return true;
+}
diff --git a/onnxruntime/core/mlas/lib/sqnbitgemm.h b/onnxruntime/core/mlas/lib/sqnbitgemm.h
new file mode 100644
index 0000000000000..f8f7dcd43699f
--- /dev/null
+++ b/onnxruntime/core/mlas/lib/sqnbitgemm.h
@@ -0,0 +1,287 @@
+/*++
+
+Copyright (c) Microsoft Corporation. All rights reserved.
+
+Licensed under the MIT License.
+
+Module Name:
+
+    sqnbitgemm.h
+
+Abstract:
+
+    This module includes:
+
+    - Declaration of the set of template functions used to implement a kernel
+    for a matrix/matrix multiplication, A*B, where A is a float matrix and B is
+    a n-bit quantized integer matrix (QNBitGemm).
+
+    - A shared kernel driver function template, MlasSQNBitGemmOperation.
+
+    - Kernel dispatch structure.
+
+    The B matrix is block quantized, which means that its values are grouped
+    into blocks which each have one scale and optional zero point. Each
+    quantized value in B is n-bits wide.
+
+--*/
+
+#pragma once
+
+#include "mlas_qnbit.h"
+#include "mlasi.h"
+
+//
+// Kernel implementation template declarations
+//
+
+/**
+ * @brief Multiply float matrix A with quantized n-bit integer matrix B.
+ *        B is block quantized and column major.
+ *        This kernel handles the special case where M, the number of rows of A and C, is 1.
+ *
+ * @tparam BlkBitWidth  Bit width of each value in a block.
+ * @tparam BlkLen       Number of values in a block.
+ * @tparam KernelType   Hardware-specific kernel type.
+ *
+ * @param       A                   Supplies the A matrix.
+ * @param       QuantBData          Supplies the quantized B matrix block data.
+ * @param       QuantBScale         Supplies the quantized B matrix block scale values.
+ * @param       QuantBZeroPoint     Supplies the quantized B matrix block zero point values. Optional.
+ * @param[out]  C                   Supplies the output C matrix.
+ * @param       CountN              Number of columns of B and C.
+ * @param       CountK              Number of columns of A and rows of B.
+ * @param       BlockStrideQuantB   Number of blocks between adjacent columns of the quantized B matrix.
+ * @param       Bias                Bias vector of length N.
+ */
+template <size_t BlkBitWidth, size_t BlkLen, typename KernelType>
+MLAS_FORCEINLINE void
+MlasSQNBitGemmM1Kernel(
+    const float* A,
+    const uint8_t* QuantBData,
+    const float* QuantBScale,
+    const uint8_t* QuantBZeroPoint,
+    float* C,
+    size_t CountN,
+    size_t CountK,
+    size_t BlockStrideQuantB,
+    const float* Bias
+);
+
+/**
+ * @brief Dequantize B into the format expected by the Sgemm kernel.
+ *        B is block quantized and column major.
+ *        This is equivalent to dequantizing B and then running
+ *        MlasSgemmCopyPackB.
+ *
+ * @tparam BlkBitWidth  Bit width of each value in a block.
+ * @tparam BlkLen       Number of values in a block.
+ * @tparam KernelType   Hardware-specific kernel type.
+ *
+ * @param[out]  FpData              Supplies the output buffer for the dequantized B float data.
+ * @param       QuantBData          Supplies the quantized B matrix block data.
+ * @param       QuantBScale         Supplies the quantized B matrix block scale values.
+ * @param       QuantBZeroPoint     Supplies the quantized B matrix block zero point values. Optional.
+ * @param       CountN              Number of columns of B.
+ * @param       CountK              Number of rows of B.
+ * @param       BlockStrideQuantB   Number of blocks between adjacent columns of the quantized B matrix.
+ */
+template <size_t BlkBitWidth, size_t BlkLen, typename KernelType>
+MLAS_FORCEINLINE void
+MlasQNBitBlkDequantBForSgemm(
+    float* FpData,
+    const uint8_t* QuantBData,
+    const float* QuantBScale,
+    const uint8_t* QuantBZeroPoint,
+    size_t CountN,
+    size_t CountK,
+    size_t BlockStrideQuantB
+);
+
+//
+// MlasQNBitGemmOperation and helpers
+//
+
+constexpr MLAS_FORCEINLINE size_t
+MlasQNBitBlkDataSizeInBytes(size_t BlkBitWidth, size_t BlkLen)
+{
+    return BlkLen * BlkBitWidth / 8;
+}
+
+template <size_t BlkBitWidth>
+constexpr MLAS_FORCEINLINE size_t
+MlasQNBitZeroPointsForBlksSizeInBytes(size_t BlkCount)
+{
+    if constexpr (BlkBitWidth <= 4) {
+        return MlasDivRoundup(BlkCount, 2);  // 2 blocks per byte
+    } else {
+        return BlkCount;
+    }
+}
+
+MLAS_FORCEINLINE void
+MlasAddBiasForGemm(const float* Bias, float* C, size_t CountM, size_t CountN, size_t ldc)
+{
+    for (size_t m = 0; m < CountM; m++) {
+        const float* bias = Bias;
+        float* sum = C;
+        for (size_t n = 0; n < CountN; n += 4) {
+            if (CountN - n < 4) {
+                for (size_t nn = n; nn < CountN; nn++) {
+                    *sum += *bias;
+                    sum++;
+                    bias++;
+                }
+                break;
+            }
+
+            MLAS_FLOAT32X4 acc_x = MlasLoadFloat32x4(sum);
+            acc_x = MlasAddFloat32x4(acc_x, MlasLoadFloat32x4(bias));
+            MlasStoreFloat32x4(sum, acc_x);
+            bias += 4;
+            sum += 4;
+        }
+        C += ldc;
+    }
+}
+
+template <size_t BlkBitWidth, size_t BlkLen, typename KernelType>
+MLAS_FORCEINLINE void MLASCALL
+MlasSQNBitGemmOperation(
+    const size_t K,
+    const MLAS_SQNBIT_GEMM_DATA_PARAMS* const DataParams,
+    const size_t RangeStartM,
+    const size_t RangeCountM,
+    const size_t RangeStartN,
+    const size_t RangeCountN
+)
+{
+    const size_t lda = DataParams->lda;
+    const size_t ldc = DataParams->ldc;
+
+    const size_t k_blks = MlasDivRoundup(K, BlkLen);
+    const size_t ldb = k_blks * MlasQNBitBlkDataSizeInBytes(BlkBitWidth, BlkLen);
+    const size_t k_blks_zp_bytes = MlasQNBitZeroPointsForBlksSizeInBytes<BlkBitWidth>(k_blks);
+
+    const float* A = DataParams->A + RangeStartM * lda;
+
+    const uint8_t* QuantBData = static_cast<const uint8_t*>(DataParams->QuantBData) + RangeStartN * ldb;
+    const float* QuantBScale = DataParams->QuantBScale + RangeStartN * k_blks;
+    const uint8_t* QuantBZeroPoint =
+        (DataParams->QuantBZeroPoint == nullptr)
+            ? nullptr
+            : static_cast<const uint8_t*>(DataParams->QuantBZeroPoint) + RangeStartN * k_blks_zp_bytes;
+
+    float* C = DataParams->C + RangeStartM * ldc + RangeStartN;
+
+    const float* Bias = (DataParams->Bias == nullptr) ? nullptr : DataParams->Bias + RangeStartN;
+
+    if (RangeCountM == 1) {
+        size_t CountN;
+        for (size_t n = 0; n < RangeCountN; n += CountN) {
+            CountN = std::min(RangeCountN - n, size_t{128});
+
+            const float* a_row = A;
+            const uint8_t* b_col = QuantBData + n * ldb;
+            const float* b_col_scale = QuantBScale + n * k_blks;
+            const uint8_t* b_col_zp =
+                (QuantBZeroPoint == nullptr) ? nullptr : QuantBZeroPoint + n * k_blks_zp_bytes;
+            float* c_blk = C + n;
+            const float* bias = (Bias == nullptr) ? nullptr : Bias + n;
+
+            MlasSQNBitGemmM1Kernel<BlkBitWidth, BlkLen, KernelType>(
+                a_row, b_col, b_col_scale, b_col_zp, c_blk, CountN, K, k_blks, bias
+            );
+
+            if (DataParams->PostProcessor != nullptr) {
+                DataParams->PostProcessor->Process(
+                    DataParams->C, RangeStartM, RangeStartN + n,
+                    RangeCountM, CountN, ldc
+                );
+            }
+        }
+        return;
+    }
+
+    constexpr size_t StrideN = 32;
+    size_t bufsize = k_blks * BlkLen * StrideN * sizeof(float);
+    MlasThreadedBufAlloc(bufsize);
+    auto* dequant_b = reinterpret_cast<float*>(ThreadedBufHolder.get());
+    //
+    // Step through each slice of matrix B along the N dimension.
+    //
+
+    size_t CountN;
+    for (size_t n = 0; n < RangeCountN; n += CountN) {
+        CountN = std::min(RangeCountN - n, StrideN);
+
+        //
+        // Step through each slice of matrix A along the M dimension.
+        //
+        const float* a_row = A;
+        const uint8_t* b_col = QuantBData + n * ldb;
+        const float* b_col_scale = QuantBScale + n * k_blks;
+        const uint8_t* b_col_zp =
+            (QuantBZeroPoint == nullptr) ? nullptr : QuantBZeroPoint + n * k_blks_zp_bytes;
+        float* c_blk = C + n;
+        const float* bias = (Bias == nullptr) ? nullptr : Bias + n;
+
+        MlasQNBitBlkDequantBForSgemm<BlkBitWidth, BlkLen, KernelType>(
+            dequant_b, b_col, b_col_scale, b_col_zp, CountN, K, k_blks
+        );
+
+        size_t RowsRemaining = RangeCountM;
+        while (RowsRemaining > 0) {
+#if defined(MLAS_TARGET_AMD64_IX86) || defined(MLAS_TARGET_POWER)
+            auto RowsHandled = GetMlasPlatform().GemmFloatKernel(
+                a_row, dequant_b, c_blk, K, RowsRemaining, CountN, lda, ldc, 1.f, true
+            );
+#else
+            auto RowsHandled = MlasSgemmKernelZero(a_row, dequant_b, c_blk, K, RowsRemaining, CountN, lda, ldc, 1.f);
+#endif
+
+            if (bias) {
+                MlasAddBiasForGemm(bias, c_blk, RowsHandled, CountN, ldc);
+            }
+            if (DataParams->PostProcessor != nullptr) {
+                DataParams->PostProcessor->Process(
+                    DataParams->C, RangeStartM + RangeCountM - RowsRemaining, RangeStartN,
+                    RowsHandled, CountN, ldc
+                );
+            }
+
+            c_blk += ldc * RowsHandled;
+            a_row += lda * RowsHandled;
+            RowsRemaining -= RowsHandled;
+        }
+    }
+}
+
+//
+// Kernel dispatch structure.
+//
+
+typedef void(MLASCALL MLAS_SQNBIT_GEMM_OPERATION)(
+    size_t K,
+    const MLAS_SQNBIT_GEMM_DATA_PARAMS* DataParams,
+    size_t RangeStartM,
+    size_t RangeCountM,
+    size_t RangeStartN,
+    size_t RangeCountN
+);
+
+enum QuantVariant {
+    QuantVariant_BitWidth4_BlockSize16,
+    QuantVariant_BitWidth4_BlockSize32,
+    QuantVariant_BitWidth4_BlockSize64,
+    QuantVariant_BitWidth4_BlockSize128,
+    QuantVariant_BitWidth4_BlockSize256,
+    QuantVariantCount,  // Keep this element last and ensure that its value is the number of other QuantVariant values.
+                        // Its value is used as an array size.
+};
+
+struct MLAS_SQNBIT_GEMM_DISPATCH {
+    MLAS_SQNBIT_GEMM_OPERATION* Operations[QuantVariantCount] = {
+        // Initialized to nullptrs. Overwrite in hardware-specific kernel implementation.
+    };
+};
diff --git a/onnxruntime/core/mlas/lib/sqnbitgemm_kernel_neon.cpp b/onnxruntime/core/mlas/lib/sqnbitgemm_kernel_neon.cpp
new file mode 100644
index 0000000000000..63afe57dd9137
--- /dev/null
+++ b/onnxruntime/core/mlas/lib/sqnbitgemm_kernel_neon.cpp
@@ -0,0 +1,489 @@
+/*++
+
+Copyright (c) Microsoft Corporation. All rights reserved.
+
+Licensed under the MIT License.
+
+Module Name:
+
+    sqnbitgemm_kernel_neon.h
+
+Abstract:
+
+    This module implements the float/quantized n-bit integer matrix
+    multiplication kernels for ARM NEON.
+
+--*/
+
+#include "sqnbitgemm.h"
+
+#include <arm_neon.h>
+
+#include <algorithm>
+#include <cassert>
+#include <utility>
+
+//
+// Hardware-specific kernel type.
+//
+struct MLAS_SQNBIT_GEMM_KERNEL_NEON {
+};
+
+namespace
+{
+
+template <typename IterationFn, size_t... Indices>
+MLAS_FORCEINLINE void
+UnrolledLoopIterations(IterationFn&& f, std::index_sequence<Indices...> /* indices */)
+{
+    (f(Indices), ...);
+}
+
+template <size_t N, typename IterationFn>
+MLAS_FORCEINLINE void
+UnrolledLoop(IterationFn&& f)
+{
+    UnrolledLoopIterations(std::forward<IterationFn>(f), std::make_index_sequence<N>());
+}
+
+MLAS_FORCEINLINE float32x4_t
+FoldAccumulators(float32x4_t a0, float32x4_t a1, float32x4_t a2, float32x4_t a3)
+{
+    // aN: aN_0 aN_1 aN_2 aN_3
+
+    float32x4_t b0 = vzip1q_f32(a0, a1);  // a0_0 a1_0 a0_1 a1_1
+    float32x4_t b1 = vzip2q_f32(a0, a1);  // a0_2 a1_2 a0_3 a1_3
+    float32x4_t b2 = vzip1q_f32(a2, a3);  // a2_0 a3_0 a2_1 a3_1
+    float32x4_t b3 = vzip2q_f32(a2, a3);  // a2_2 a3_2 a2_3 a3_3
+
+    // a0_0 a1_0 a2_0 a3_0
+    a0 = vreinterpretq_f32_f64(vzip1q_f64(vreinterpretq_f64_f32(b0), vreinterpretq_f64_f32(b2)));
+    // a0_1 a1_1 a2_1 a3_1
+    a1 = vreinterpretq_f32_f64(vzip2q_f64(vreinterpretq_f64_f32(b0), vreinterpretq_f64_f32(b2)));
+    // a0_2 a1_2 a3_2 a3_2
+    a2 = vreinterpretq_f32_f64(vzip1q_f64(vreinterpretq_f64_f32(b1), vreinterpretq_f64_f32(b3)));
+    // a0_3 a1_3 a2_3 a3_3
+    a3 = vreinterpretq_f32_f64(vzip2q_f64(vreinterpretq_f64_f32(b1), vreinterpretq_f64_f32(b3)));
+
+    return vaddq_f32(vaddq_f32(a0, a1), vaddq_f32(a2, a3));
+}
+
+template <size_t Capacity>
+MLAS_FORCEINLINE void
+LoadData(const float* src, size_t count, float32x4_t (& dst)[Capacity / 4])
+{
+    static_assert(Capacity % 4 == 0, "Capacity must be divisible by 4.");
+
+    assert(count <= Capacity);
+
+    size_t vi = 0;  // vector index
+
+    // handle 4 values at a time
+    while (count > 3) {
+        dst[vi] = vld1q_f32(src);
+
+        vi += 1;
+        src += 4;
+        count -= 4;
+    }
+
+    // handle remaining values
+    if (count > 0) {
+        dst[vi] = vsetq_lane_f32(src[0], dst[vi], 0);
+
+        if (count > 1) {
+            dst[vi] = vsetq_lane_f32(src[1], dst[vi], 1);
+
+            if (count > 2) {
+                dst[vi] = vsetq_lane_f32(src[2], dst[vi], 2);
+            }
+        }
+    }
+}
+
+template <size_t BlkBitWidth, size_t BlkLen, size_t NCols>
+MLAS_FORCEINLINE void
+ComputeDotProducts(
+    const float* ARowPtr,
+    const uint8_t* QuantBDataColPtr,
+    const float* QuantBScaleColPtr,
+    const uint8_t* QuantBZeroPointColPtr,
+    float* SumPtr,
+    size_t CountK,
+    size_t StrideQuantBData,
+    size_t StrideQuantBScale,
+    size_t StrideQuantBZeroPoint,
+    const float* BiasPtr
+)
+{
+    static_assert(NCols == 1 || NCols == 4, "NCols must be 1 or 4");
+
+    const uint8x8_t LowMask = vdup_n_u8(0x0F);
+
+    // Manual conversion to float takes place in two steps:
+    // 1. Map 4-bit values from [0, 15] to float values from [16.0f, 31.0f].
+    //    This target float range is convenient because the 4-bit source values can be placed directly into the
+    //    target float bits.
+    // 2. Subtract the conversion offset of 16 from the float result.
+
+    // The high 16 bits of an IEEE 754 32-bit float used as a template for creating float values.
+    constexpr uint16_t float_high_half_template = 0b0'10000011'0000000;
+    //                                           sign|exponent|partial mantissa
+    //                                              +|131: 2^4|~~~~ <- 4 bits go here
+
+    const uint16x8_t float_high_half_template_v = vdupq_n_u16(float_high_half_template);
+
+    float32x4_t acc[NCols]{};
+
+    const uint8_t* QuantBData = QuantBDataColPtr;
+    const float* QuantBScale = QuantBScaleColPtr;
+    size_t QuantBZeroPointIdx = 0;  // track half byte increments with this index instead of a pointer
+
+    for (size_t k = 0; k < CountK; k += BlkLen) {
+        const size_t k_blk_len = std::min(CountK - k, BlkLen);
+
+        float scale[NCols];
+        UnrolledLoop<NCols>(
+            [&](size_t i) { scale[i] = QuantBScale[i * StrideQuantBScale]; }
+        );
+
+        float offset[NCols];  // Includes zero point and float conversion offset of 16.
+        if (QuantBZeroPointColPtr != nullptr) {
+            UnrolledLoop<NCols>([&](size_t i) {
+                const uint8_t zp_packed =
+                    QuantBZeroPointColPtr[i * StrideQuantBZeroPoint + QuantBZeroPointIdx / 2];
+                const uint8_t zp = ((QuantBZeroPointIdx & 1) == 1) ? (zp_packed >> 4) : (zp_packed & 0x0F);
+                offset[i] = 16.0f + zp;
+            });
+        } else {
+            UnrolledLoop<NCols>([&](size_t i) {
+                constexpr float zp = 8.0f;
+                offset[i] = 16.0f + zp;
+            });
+        }
+
+        constexpr size_t SubBlkLen = 16;  // number of block elements to process in one iteration
+
+        for (size_t k_idx_in_blk = 0; k_idx_in_blk < k_blk_len; k_idx_in_blk += SubBlkLen) {
+            // load A row vector elements
+
+            // load `SubBlkLen` elements from A, padded with 0's if there aren't enough
+            const size_t k_subblk_len = std::min(k_blk_len - k_idx_in_blk, SubBlkLen);
+            float32x4_t av[4]{};
+            LoadData<SubBlkLen>(ARowPtr + k + k_idx_in_blk, k_subblk_len, av);
+
+            // load B column vectors
+            uint8x8_t bv_packed[NCols];
+            UnrolledLoop<NCols>([&](size_t i) {
+                const size_t b_data_block_offset = k_idx_in_blk * BlkBitWidth / 8;
+                bv_packed[i] = vld1_u8(QuantBData + i * StrideQuantBData + b_data_block_offset);
+            });
+
+            uint8x8_t bv_u8_unzipped[NCols][2];
+            UnrolledLoop<NCols>([&](size_t i) {
+                bv_u8_unzipped[i][0] = vand_u8(bv_packed[i], LowMask);
+                bv_u8_unzipped[i][1] = vand_u8(vshr_n_u8(bv_packed[i], 4), LowMask);
+            });
+
+            uint8x8_t bv_u8[NCols][2];
+            UnrolledLoop<NCols>([&](size_t i) {
+                bv_u8[i][0] = vzip1_u8(bv_u8_unzipped[i][0], bv_u8_unzipped[i][1]);
+                bv_u8[i][1] = vzip2_u8(bv_u8_unzipped[i][0], bv_u8_unzipped[i][1]);
+            });
+
+            // dequantize B
+
+            // shift left 3 and widen to 16 bits
+            uint16x8_t bv_u16[NCols][2];
+            UnrolledLoop<NCols>([&](size_t i) {
+                constexpr int shift = 3;
+                bv_u16[i][0] = vshll_n_u8(bv_u8[i][0], shift);
+                bv_u16[i][1] = vshll_n_u8(bv_u8[i][1], shift);
+            });
+
+            // combine 4 bits with float high half template
+            UnrolledLoop<NCols>([&](size_t i) {
+                bv_u16[i][0] = vorrq_u16(bv_u16[i][0], float_high_half_template_v);
+                bv_u16[i][1] = vorrq_u16(bv_u16[i][1], float_high_half_template_v);
+            });
+
+            // `SubBlkLen` floats of B
+            float32x4_t bv[NCols][4];
+
+            // shift left 16, widen to 32 bits, and reinterpret as float
+            UnrolledLoop<NCols>([&](size_t i) {
+                constexpr int shift = 16;
+                bv[i][0] = vreinterpretq_f32_u32(vshll_n_u16(vget_low_u16(bv_u16[i][0]), shift));
+                bv[i][1] = vreinterpretq_f32_u32(vshll_high_n_u16(bv_u16[i][0], shift));
+
+                bv[i][2] = vreinterpretq_f32_u32(vshll_n_u16(vget_low_u16(bv_u16[i][1]), shift));
+                bv[i][3] = vreinterpretq_f32_u32(vshll_high_n_u16(bv_u16[i][1], shift));
+            });
+
+            // subtract float conversion offset (16) and zero point
+            UnrolledLoop<NCols>([&](size_t i) {
+                const float32x4_t offset_v = vdupq_n_f32(offset[i]);
+                UnrolledLoop<4>([&](size_t j) { bv[i][j] = vsubq_f32(bv[i][j], offset_v); });
+            });
+
+            // multiply by scale
+            UnrolledLoop<NCols>([&](size_t i) {
+                const float32x4_t scale_v = vdupq_n_f32(scale[i]);
+                UnrolledLoop<4>([&](size_t j) { bv[i][j] = vmulq_f32(bv[i][j], scale_v); });
+            });
+
+            // c[m,n] += a[m,k] * b[k,n]
+            UnrolledLoop<4>([&](size_t j) {
+                UnrolledLoop<NCols>([&](size_t i) { acc[i] = vfmaq_f32(acc[i], av[j], bv[i][j]); });
+            });
+        }
+
+        // increment pointers to next block
+        QuantBData += MlasQNBitBlkDataSizeInBytes(BlkBitWidth, BlkLen);
+        QuantBScale += 1;
+        QuantBZeroPointIdx += 1;
+    }
+
+    if constexpr (NCols == 4) {
+        float32x4_t sum = FoldAccumulators(acc[0], acc[1], acc[2], acc[3]);
+
+        if (BiasPtr != nullptr) {
+            sum = vaddq_f32(sum, vld1q_f32(BiasPtr));
+        }
+
+        vst1q_f32(SumPtr, sum);
+    } else {
+        for (size_t i = 0; i < NCols; ++i) {
+            SumPtr[i] = vaddvq_f32(acc[i]);
+            if (BiasPtr != nullptr) {
+                SumPtr[i] += BiasPtr[i];
+            }
+        }
+    }
+}
+
+}  // namespace
+
+//
+// MlasSQNBitGemmKernel and helpers.
+//
+
+template <size_t BlkBitWidth, size_t BlkLen>
+MLAS_FORCEINLINE void
+MlasSQNBitGemmM1KernelNeon(
+    const float* A,
+    const uint8_t* QuantBData,
+    const float* QuantBScale,
+    const uint8_t* QuantBZeroPoint,
+    float* C,
+    size_t CountN,
+    size_t CountK,
+    size_t BlockStrideQuantB,
+    const float* Bias
+)
+{
+    constexpr size_t NCols = 4;
+
+    const float* ARowPtr = A;
+    float* CRowPtr = C;
+
+    const size_t BlockCountK = BlockStrideQuantB;
+
+    const size_t StrideQuantBData = BlockCountK * MlasQNBitBlkDataSizeInBytes(BlkBitWidth, BlkLen);
+    const size_t StrideQuantBScale = BlockCountK;
+    const size_t StrideQuantBZeroPoint = MlasQNBitZeroPointsForBlksSizeInBytes<BlkBitWidth>(BlockCountK);
+
+    const float* BiasPtr = Bias;
+
+    const uint8_t* QuantBDataColPtr = QuantBData;
+    const float* QuantBScaleColPtr = QuantBScale;
+    const uint8_t* QuantBZeroPointColPtr = QuantBZeroPoint;
+
+    float* SumPtr = CRowPtr;
+
+    int64_t nblk = static_cast<int64_t>(CountN) - NCols;
+
+    while (nblk >= 0) {
+        ComputeDotProducts<BlkBitWidth, BlkLen, NCols>(
+            ARowPtr, QuantBDataColPtr, QuantBScaleColPtr, QuantBZeroPointColPtr, SumPtr, CountK,
+            StrideQuantBData, StrideQuantBScale, StrideQuantBZeroPoint,
+            BiasPtr
+        );
+
+        // move to next `NCols` columns
+
+        QuantBDataColPtr += NCols * StrideQuantBData;
+        QuantBScaleColPtr += NCols * StrideQuantBScale;
+        if (QuantBZeroPointColPtr != nullptr) {
+            QuantBZeroPointColPtr += NCols * StrideQuantBZeroPoint;
+        }
+
+        BiasPtr += BiasPtr != nullptr ? NCols : 0;
+        SumPtr += NCols;
+
+        nblk -= NCols;
+    }
+
+    // left over columns less than `NCols`?
+    nblk += NCols;
+    for (int64_t n = 0; n < nblk; ++n) {
+        ComputeDotProducts<BlkBitWidth, BlkLen, 1>(
+            ARowPtr, QuantBDataColPtr, QuantBScaleColPtr, QuantBZeroPointColPtr, SumPtr, CountK,
+            StrideQuantBData, StrideQuantBScale, StrideQuantBZeroPoint,
+            BiasPtr
+        );
+
+        // move to next column
+
+        QuantBDataColPtr += StrideQuantBData;
+        QuantBScaleColPtr += StrideQuantBScale;
+        if (QuantBZeroPointColPtr != nullptr) {
+            QuantBZeroPointColPtr += StrideQuantBZeroPoint;
+        }
+
+        BiasPtr += BiasPtr != nullptr ? 1 : 0;
+        SumPtr += 1;
+    }
+}
+
+#define SPECIALIZE_SQNBIT_GEMM_M1_KERNEL(BlkBitWidth, BlkLen)                  \
+    template <>                                                                \
+    MLAS_FORCEINLINE void                                                      \
+    MlasSQNBitGemmM1Kernel<BlkBitWidth, BlkLen, MLAS_SQNBIT_GEMM_KERNEL_NEON>( \
+        const float* A,                                                        \
+        const uint8_t* QuantBData,                                             \
+        const float* QuantBScale,                                              \
+        const uint8_t* QuantBZeroPoint,                                        \
+        float* C,                                                              \
+        size_t CountN,                                                         \
+        size_t CountK,                                                         \
+        size_t BlockStrideQuantB,                                              \
+        const float* Bias                                                      \
+    )                                                                          \
+    {                                                                          \
+        return MlasSQNBitGemmM1KernelNeon<BlkBitWidth, BlkLen>(                \
+            A, QuantBData, QuantBScale, QuantBZeroPoint, C, CountN, CountK,    \
+            BlockStrideQuantB, Bias                                            \
+        );                                                                     \
+    }
+
+SPECIALIZE_SQNBIT_GEMM_M1_KERNEL(4, 16)
+SPECIALIZE_SQNBIT_GEMM_M1_KERNEL(4, 32)
+SPECIALIZE_SQNBIT_GEMM_M1_KERNEL(4, 64)
+SPECIALIZE_SQNBIT_GEMM_M1_KERNEL(4, 128)
+SPECIALIZE_SQNBIT_GEMM_M1_KERNEL(4, 256)
+
+#undef SPECIALIZE_SQNBIT_GEMM_M1_KERNEL
+
+//
+// MlasQNBitBlkDequantBForSgemm and helpers.
+//
+
+template <size_t BlkBitWidth, size_t BlkLen>
+MLAS_FORCEINLINE void
+MlasQNBitBlkDequantBForSgemmNeon(
+    float* FpData,
+    const uint8_t* QuantBData,
+    const float* QuantBScale,
+    const uint8_t* QuantBZeroPoint,
+    size_t CountN,
+    size_t CountK,
+    size_t BlockStrideQuantB
+)
+{
+    auto impl0_reference = [&]() {
+        static_assert(BlkBitWidth == 4);
+
+        float* Dst = FpData;
+
+        const uint8_t* QuantBDataCol = QuantBData;
+        const float* QuantBScaleCol = QuantBScale;
+        const uint8_t* QuantBZeroPointCol = QuantBZeroPoint;
+
+        for (size_t n = 0; n < CountN; n += 16) {
+            const size_t nnlen = std::min(CountN - n, size_t{16});
+
+            for (size_t nn = 0; nn < nnlen; ++nn) {
+                for (size_t k = 0, k_blk_idx = 0; k < CountK; k += BlkLen, k_blk_idx += 1) {
+                    const size_t kklen = std::min(CountK - k, BlkLen);
+
+                    const uint8_t* b_data =
+                        QuantBDataCol + k_blk_idx * MlasQNBitBlkDataSizeInBytes(BlkBitWidth, BlkLen);
+                    const float b_s = QuantBScaleCol[k_blk_idx];
+                    const uint8_t b_z =
+                        (QuantBZeroPointCol != nullptr)
+                            ? ((k_blk_idx & 1) == 1)
+                                  ? QuantBZeroPointCol[k_blk_idx / 2] >> 4
+                                  : QuantBZeroPointCol[k_blk_idx / 2] & 0x0F
+                            : 8;
+
+                    for (size_t kk = 0; kk < kklen; ++kk) {
+                        const uint8_t b_packed = b_data[kk / 2];
+                        const uint8_t b_byte = ((kk & 1) == 1) ? b_packed >> 4 : b_packed & 0x0F;
+                        const float b_value = (b_byte - b_z) * b_s;
+
+                        Dst[(k + kk) * 16 + nn] = b_value;
+                    }
+                }
+
+                QuantBDataCol += BlockStrideQuantB * MlasQNBitBlkDataSizeInBytes(BlkBitWidth, BlkLen);
+                QuantBScaleCol += BlockStrideQuantB;
+                if (QuantBZeroPointCol != nullptr) {
+                    QuantBZeroPointCol += MlasQNBitZeroPointsForBlksSizeInBytes<BlkBitWidth>(BlockStrideQuantB);
+                }
+            }
+
+            // zero out any remaining columns
+
+            if (nnlen < 16) {
+                for (size_t k = 0; k < CountK; ++k) {
+                    std::fill_n(Dst + (k * 16) + nnlen, 16 - nnlen, 0.0f);
+                }
+            }
+
+            Dst += CountK * 16;
+        }
+    };
+
+    impl0_reference();
+}
+
+#define SPECIALIZE_QNBIT_BLK_DEQUANT_B_FOR_SGEMM(BlkBitWidth, BlkLen)                           \
+    template <>                                                                                 \
+    MLAS_FORCEINLINE void                                                                       \
+    MlasQNBitBlkDequantBForSgemm<BlkBitWidth, BlkLen, MLAS_SQNBIT_GEMM_KERNEL_NEON>(            \
+        float* FpData,                                                                          \
+        const uint8_t* QuantBData,                                                              \
+        const float* QuantBScale,                                                               \
+        const uint8_t* QuantBZeroPoint,                                                         \
+        size_t CountN,                                                                          \
+        size_t CountK,                                                                          \
+        size_t BlockStrideQuantB                                                                \
+    )                                                                                           \
+    {                                                                                           \
+        MlasQNBitBlkDequantBForSgemmNeon<BlkBitWidth, BlkLen>(                                  \
+            FpData, QuantBData, QuantBScale, QuantBZeroPoint, CountN, CountK, BlockStrideQuantB \
+        );                                                                                      \
+    }
+
+SPECIALIZE_QNBIT_BLK_DEQUANT_B_FOR_SGEMM(4, 16)
+SPECIALIZE_QNBIT_BLK_DEQUANT_B_FOR_SGEMM(4, 32)
+SPECIALIZE_QNBIT_BLK_DEQUANT_B_FOR_SGEMM(4, 64)
+SPECIALIZE_QNBIT_BLK_DEQUANT_B_FOR_SGEMM(4, 128)
+SPECIALIZE_QNBIT_BLK_DEQUANT_B_FOR_SGEMM(4, 256)
+
+#undef SPECIALIZE_QNBIT_BLK_DEQUANT_B_FOR_SGEMM
+
+//
+// Kernel dispatch structure definition.
+//
+
+const MLAS_SQNBIT_GEMM_DISPATCH MlasSQNBitGemmDispatchNeon = []() {
+    MLAS_SQNBIT_GEMM_DISPATCH d;
+    d.Operations[QuantVariant_BitWidth4_BlockSize16] = MlasSQNBitGemmOperation<4, 16, MLAS_SQNBIT_GEMM_KERNEL_NEON>;
+    d.Operations[QuantVariant_BitWidth4_BlockSize32] = MlasSQNBitGemmOperation<4, 32, MLAS_SQNBIT_GEMM_KERNEL_NEON>;
+    d.Operations[QuantVariant_BitWidth4_BlockSize64] = MlasSQNBitGemmOperation<4, 64, MLAS_SQNBIT_GEMM_KERNEL_NEON>;
+    d.Operations[QuantVariant_BitWidth4_BlockSize128] = MlasSQNBitGemmOperation<4, 128, MLAS_SQNBIT_GEMM_KERNEL_NEON>;
+    d.Operations[QuantVariant_BitWidth4_BlockSize256] = MlasSQNBitGemmOperation<4, 256, MLAS_SQNBIT_GEMM_KERNEL_NEON>;
+    return d;
+}();
diff --git a/onnxruntime/core/mlas/lib/threading.cpp b/onnxruntime/core/mlas/lib/threading.cpp
index ecdc5250ebf0e..dc5daf998d3be 100644
--- a/onnxruntime/core/mlas/lib/threading.cpp
+++ b/onnxruntime/core/mlas/lib/threading.cpp
@@ -93,3 +93,41 @@ MlasTrySimpleParallel(
     MLAS_THREADPOOL::TrySimpleParallelFor(ThreadPool, Iterations, Work);
 #endif
 }
+
+
+void
+MlasTryBatchParallel(
+	MLAS_THREADPOOL * ThreadPool,
+	const std::ptrdiff_t Iterations,
+	const std::function<void(std::ptrdiff_t tid)>& Work)
+{
+    //
+    // Execute the routine directly if only one iteration is specified.
+    //
+    if (Iterations == 1) {
+        Work(0);
+        return;
+    }
+
+#if defined(BUILD_MLAS_NO_ONNXRUNTIME)
+    MLAS_UNREFERENCED_PARAMETER(ThreadPool);
+
+    //
+    // Fallback to OpenMP or a serialized implementation.
+    //
+
+    //
+    // Execute the routine for the specified number of iterations.
+    //
+    for (ptrdiff_t tid = 0; tid < Iterations; tid++) {
+        Work(tid);
+    }
+#else
+    //
+    // Schedule the threaded iterations using the thread pool object.
+    //
+
+    MLAS_THREADPOOL::TryBatchParallelFor(ThreadPool, Iterations, Work, 0);
+#endif
+
+}
\ No newline at end of file
diff --git a/onnxruntime/core/optimizer/compute_optimizer/upstream_gather.cc b/onnxruntime/core/optimizer/compute_optimizer/upstream_gather.cc
index 094ea1e24dd92..9c98ed6d3e114 100644
--- a/onnxruntime/core/optimizer/compute_optimizer/upstream_gather.cc
+++ b/onnxruntime/core/optimizer/compute_optimizer/upstream_gather.cc
@@ -338,8 +338,8 @@ std::optional<SliceInfo> IsSupportedGather(Graph& graph, Node& node,
   auto axis = static_cast<int>(node.GetAttributes().at("axis").i());
   axis = axis < 0 ? axis + data_rank : axis;
   size_t dim_size = static_cast<size_t>(indices_shape->dim_size());
-  bool is_single_value_1d_tensor = dim_size != 0 && (dim_size == 1 && utils::HasDimValue(indices_shape->dim(0)) &&
-                                                     indices_shape->dim(0).dim_value() == 1);
+  bool is_single_value_1d_tensor = dim_size == 1 && utils::HasDimValue(indices_shape->dim(0)) &&
+                                   indices_shape->dim(0).dim_value() == 1;
   if (dim_size != 0 && !is_single_value_1d_tensor) {
     if (dim_size == 1 && utils::HasDimValue(data_shape->dim(axis)) &&
         data_shape->dim(axis).dim_value() > indices_shape->dim(0).dim_value()) {
diff --git a/onnxruntime/core/optimizer/compute_optimizer/upstream_reshape_actors.cc b/onnxruntime/core/optimizer/compute_optimizer/upstream_reshape_actors.cc
index 716b027068ba1..23f7c45fba4ba 100644
--- a/onnxruntime/core/optimizer/compute_optimizer/upstream_reshape_actors.cc
+++ b/onnxruntime/core/optimizer/compute_optimizer/upstream_reshape_actors.cc
@@ -3,6 +3,7 @@
 
 #ifdef ENABLE_TRAINING
 
+#include <onnx/defs/attr_proto_util.h>
 #include "core/optimizer/utils.h"
 #include "core/optimizer/compute_optimizer/upstream_reshape_actors.h"
 
@@ -282,6 +283,23 @@ bool LayerNormalizationReshapeActor::PreCheck(
   return propagate_input_indices.size() > 0;
 }
 
+bool LayerNormalizationReshapeActor::PostProcess(
+    Graph& /* graph */, Node& current_node, const ReshapeInfo& /* info_without_node */,
+    const logging::Logger& /* logger */,
+    std::vector<int>& /* propagate_input_indices */,
+    const std::unordered_map<int, std::vector<DimCompare>>& /* all_input_cmp_rets */,
+    const std::unordered_map<int, ReshapeInfo>& /* new_reshape_infos */) {
+  auto axis = static_cast<int64_t>(current_node.GetAttributes().at("axis").i());
+  // When Reshape(from 3D to 2D, with the first two dimensions be merged) upstream a LayerNormalization,
+  // The axis attribute of LayerNormalization should be decreased by 1 if it is greater than 1.
+  if (axis > 1) {
+    auto new_axis = axis - 1;
+    auto& attributes = current_node.GetMutableAttributes();
+    attributes["axis"] = ONNX_NAMESPACE::MakeAttribute("axis", static_cast<int64_t>(new_axis));
+  }
+  return true;
+}
+
 template class SimplePointwiseReshapeActor<true>;
 template class SimplePointwiseReshapeActor<false>;
 
diff --git a/onnxruntime/core/optimizer/compute_optimizer/upstream_reshape_actors.h b/onnxruntime/core/optimizer/compute_optimizer/upstream_reshape_actors.h
index 05bcbabe9ba4c..de50a56fd8781 100644
--- a/onnxruntime/core/optimizer/compute_optimizer/upstream_reshape_actors.h
+++ b/onnxruntime/core/optimizer/compute_optimizer/upstream_reshape_actors.h
@@ -111,13 +111,11 @@ class UpStreamReshapeOperatorActorBase : public UpStreamOperatorActorBase {
    * So far, we don't have requirements to override PostProcess function.
 
    */
-  bool PostProcess(Graph& /* graph */, Node& /* current_node */, const ReshapeInfo& /* info_without_node */,
-                   const logging::Logger& /* logger */,
-                   std::vector<int>& /* propagate_input_indices */,
-                   const std::unordered_map<int, std::vector<DimCompare>>& /* all_input_cmp_rets */,
-                   const std::unordered_map<int, ReshapeInfo>& /* new_reshape_infos */) {
-    return true;
-  }
+  virtual bool PostProcess(Graph& /* graph */, Node& /* current_node */, const ReshapeInfo& /* info_without_node */,
+                           const logging::Logger& /* logger */,
+                           std::vector<int>& /* propagate_input_indices */,
+                           const std::unordered_map<int, std::vector<DimCompare>>& /* all_input_cmp_rets */,
+                           const std::unordered_map<int, ReshapeInfo>& /* new_reshape_infos */) = 0;
 };
 
 // The inputs are broad-cast-able. The outputs should have the same shape (fully broadcasted shape)
@@ -133,6 +131,14 @@ class SimplePointwiseReshapeActor : public UpStreamReshapeOperatorActorBase {
                 std::vector<int>& propagate_input_indices,
                 std::unordered_map<int, std::vector<DimCompare>>& all_input_cmp_rets,
                 std::function<void(Node& node)>& shape_update_func) override;
+
+  bool PostProcess(Graph& /* graph */, Node& /* current_node */, const ReshapeInfo& /* info_without_node */,
+                   const logging::Logger& /* logger */,
+                   std::vector<int>& /* propagate_input_indices */,
+                   const std::unordered_map<int, std::vector<DimCompare>>& /* all_input_cmp_rets */,
+                   const std::unordered_map<int, ReshapeInfo>& /* new_reshape_infos */) override {
+    return true;
+  }
 };
 
 class MatMulReshapeActor : public UpStreamReshapeOperatorActorBase {
@@ -145,6 +151,14 @@ class MatMulReshapeActor : public UpStreamReshapeOperatorActorBase {
                 std::vector<int>& propagate_input_indices,
                 std::unordered_map<int, std::vector<DimCompare>>& all_input_cmp_rets,
                 std::function<void(Node& node)>& shape_update_func) override;
+
+  bool PostProcess(Graph& /* graph */, Node& /* current_node */, const ReshapeInfo& /* info_without_node */,
+                   const logging::Logger& /* logger */,
+                   std::vector<int>& /* propagate_input_indices */,
+                   const std::unordered_map<int, std::vector<DimCompare>>& /* all_input_cmp_rets */,
+                   const std::unordered_map<int, ReshapeInfo>& /* new_reshape_infos */) override {
+    return true;
+  }
 };
 
 class LayerNormalizationReshapeActor : public UpStreamReshapeOperatorActorBase {
@@ -157,6 +171,12 @@ class LayerNormalizationReshapeActor : public UpStreamReshapeOperatorActorBase {
                 std::vector<int>& propagate_input_indices,
                 std::unordered_map<int, std::vector<DimCompare>>& all_input_cmp_rets,
                 std::function<void(Node& node)>& shape_update_func) override;
+
+  bool PostProcess(Graph& /* graph */, Node& current_node, const ReshapeInfo& /* info_without_node */,
+                   const logging::Logger& /* logger */,
+                   std::vector<int>& /* propagate_input_indices */,
+                   const std::unordered_map<int, std::vector<DimCompare>>& /* all_input_cmp_rets */,
+                   const std::unordered_map<int, ReshapeInfo>& /* new_reshape_infos */) override;
 };
 
 /**
diff --git a/onnxruntime/core/optimizer/constant_folding.cc b/onnxruntime/core/optimizer/constant_folding.cc
index f46273f2680a9..e3a2f2d74c0d4 100644
--- a/onnxruntime/core/optimizer/constant_folding.cc
+++ b/onnxruntime/core/optimizer/constant_folding.cc
@@ -4,6 +4,7 @@
 #include <limits>
 
 #include "core/optimizer/constant_folding.h"
+#include "core/optimizer/initializer.h"
 #include "core/optimizer/utils.h"
 #include "core/graph/graph_utils.h"
 #include "core/optimizer/optimizer_execution_frame.h"
@@ -90,6 +91,45 @@ static bool ConstantFoldShapeNode(Graph& graph, Node& node) {
   return is_concrete_shape;  // convert to constant if this is true
 }
 
+// This function inlines the appropriate subgraph. It does not literally fold it.
+static Status ConstantFoldIfNode(Graph& graph, Node& if_node, const logging::Logger& logger, bool& folded) {
+  folded = false;
+  // First, find out which subgraph to inline
+  // We need to fetch the constant argument.
+  assert(if_node.InputDefs().size() == 1);
+  const auto* condition_def = if_node.InputDefs()[0];
+
+  // We need to check if the condition is a constant.
+  constexpr bool check_outer_scope_true = true;
+  const ONNX_NAMESPACE::TensorProto* initializer =
+      graph.GetConstantInitializer(condition_def->Name(), check_outer_scope_true);
+  if (initializer == nullptr) {
+    return Status::OK();
+  }
+
+  // This is a boolean initializer with a single element.
+  Initializer condition{*initializer};
+  ORT_RETURN_IF_NOT(condition.size() == 1, "If node condition initializer: `", condition_def->Name(),
+                    "' is expected to have a single boolean element");
+
+  const bool condition_value = *condition.data<bool>();
+
+  auto status = graph.InlineIfSubgraph(condition_value, if_node, logger);
+
+  if (!status.IsOK()) {
+    LOGS(logger, WARNING) << "Unable to constant fold. InlineIfSubgraph failed "
+                          << " node '" << if_node.Name() << "': "
+                          << status.ErrorMessage();
+    return status;
+  }
+
+  graph_utils::RemoveNodeOutputEdges(graph, if_node);
+  graph.RemoveNode(if_node.Index());
+
+  folded = true;
+  return status;
+}
+
 Status ConstantFolding::ApplyImpl(Graph& graph, bool& modified, int graph_level, const logging::Logger& logger) const {
   bool have_updated_nodes = false;
   GraphViewer graph_viewer(graph);
@@ -118,7 +158,20 @@ Status ConstantFolding::ApplyImpl(Graph& graph, bool& modified, int graph_level,
     }
 
     bool converted_to_constant = false;
-    if (node->OpType().compare("Shape") == 0) {
+    if (node->OpType().compare("If") == 0) {
+      // This process constant folds the If node only,
+      // but inlines the nodes of the corresponding branch graph.
+      // It does not convert the node to a constant in a common sense.
+      // We call it constant folding because the `If` node constant condition
+      // may enable us to inline the corresponding branch graph.
+      bool folded = false;
+      ORT_RETURN_IF_ERROR(ConstantFoldIfNode(graph, *node, logger, folded));
+      if (folded) {
+        // Node removal is done within ConstantFoldIfNode()
+        modified = true;
+        have_updated_nodes = true;
+      }
+    } else if (node->OpType().compare("Shape") == 0) {
       converted_to_constant = ConstantFoldShapeNode(graph, *node);
     } else {
       InitializedTensorSet constant_inputs;
diff --git a/onnxruntime/core/optimizer/conv_activation_fusion.cc b/onnxruntime/core/optimizer/conv_activation_fusion.cc
index c090ab2a6cc9b..d27603e4ab3a1 100644
--- a/onnxruntime/core/optimizer/conv_activation_fusion.cc
+++ b/onnxruntime/core/optimizer/conv_activation_fusion.cc
@@ -4,7 +4,7 @@
 #include "core/optimizer/conv_activation_fusion.h"
 
 #include <string_view>
-
+#include <string>
 #include "core/common/inlined_containers.h"
 #include "core/framework/tensorprotoutils.h"
 #include "core/mlas/inc/mlas.h"
@@ -174,9 +174,29 @@ using NTO = NodesToOptimize;
 
 class FuseConvActivationAction : public ReplaceWithNew {
  private:
-  std::string OpType(const RuntimeState&) const override { return "FusedConv"; }
+  std::string OpType(const RuntimeState& runtime_state) const override {
+    const auto& domain = runtime_state.selected_nodes.Target().Domain();
+    const auto& op_type = runtime_state.selected_nodes.Target().OpType();
+    if (domain == kOnnxDomain) {
+      if (op_type == "Conv") {
+        return "FusedConv";
+      }
+    } else if (domain == kMSDomain) {
+      if (op_type == "NhwcConv") {
+        return "NhwcFusedConv";
+      }
+    } else if (domain == kMSInternalNHWCDomain) {
+      if (op_type == "Conv") {
+        return "Conv";
+      }
+    }
+    ORT_THROW("Unsupported operator: ", op_type, " and domain: ", domain);
+  }
 
-  std::string Domain(const RuntimeState&) const override { return kMSDomain; }
+  std::string Domain(const RuntimeState& runtime_state) const override {
+    auto domain = runtime_state.selected_nodes.Target().Domain();
+    return domain == kOnnxDomain ? kMSDomain : domain;
+  }
 
   NodeAttributes ExtraAttributes(const RuntimeState& state) const override {
     NodeAttributes extra_fused_conv_attributes;
@@ -260,8 +280,11 @@ void RegisterConvActivationFusionRules(SelectorActionRegistry& registry) {
   const auto name = "ConvAct";
   auto action = std::make_unique<actions::FuseConvActivationAction>();
 #if !defined(ORT_MINIMAL_BUILD)
+  const std::string msInternalNHWCDomainConv = SelectorActionRegistry::OpVersionsMapKey("Conv", kMSInternalNHWCDomain);
+  const std::string msDomainConv = SelectorActionRegistry::OpVersionsMapKey("NhwcConv", kMSDomain);
   auto selector = std::make_unique<selectors::ConvActivationSelector>();
-  registry.RegisterSelectorAndAction(name, {{"Conv", {1, 11}}},
+
+  registry.RegisterSelectorAndAction(name, {{"Conv", {1, 11}}, {msInternalNHWCDomainConv, {11}}, {msDomainConv, {1}}},
                                      std::move(selector), std::move(action));
 #else
   registry.RegisterAction(name, std::move(action));
diff --git a/onnxruntime/core/optimizer/conv_add_act_fusion.cc b/onnxruntime/core/optimizer/conv_add_act_fusion.cc
index 7c8bfeaec5f0f..6f90eaf07ef4d 100644
--- a/onnxruntime/core/optimizer/conv_add_act_fusion.cc
+++ b/onnxruntime/core/optimizer/conv_add_act_fusion.cc
@@ -287,12 +287,9 @@ class FuseConvAddActivationAction : public ReplaceWithNew {
 void RegisterConvAddActivationFusionRules(SelectorActionRegistry& registry) {
   auto action = std::make_unique<actions::FuseConvAddActivationAction>();
   auto selector = std::make_unique<selectors::ConvAddActivationSelector>();
-  registry.RegisterSelectorAndAction("ConvAddAct", {{"Conv", {1, 11}}},
+  std::string msDomainNhwcFusedConv = SelectorActionRegistry::OpVersionsMapKey("NhwcFusedConv", kMSDomain);
+  registry.RegisterSelectorAndAction("ConvAddAct", {{"Conv", {1, 11}}, {msDomainNhwcFusedConv, {1, 11}}},
                                      std::move(selector), std::move(action));
-  auto action_nhwc = std::make_unique<actions::FuseConvAddActivationAction>();
-  auto selector_nhwc = std::make_unique<selectors::ConvAddActivationSelector>();
-  registry.RegisterSelectorAndAction("NhwcFusedConvAct", {{"NhwcFusedConv", {1, 11}}},
-                                     std::move(selector_nhwc), std::move(action_nhwc));
 }
 
 SelectorActionRegistry CreateSelectorActionRegistry() {
diff --git a/onnxruntime/core/optimizer/gather_fusion.cc b/onnxruntime/core/optimizer/gather_fusion.cc
index b994028cbca13..4903bc1d6b961 100644
--- a/onnxruntime/core/optimizer/gather_fusion.cc
+++ b/onnxruntime/core/optimizer/gather_fusion.cc
@@ -9,7 +9,8 @@
 
 namespace onnxruntime {
 
-bool GatherToSplitFusion::IsSupportedGather(const Graph& graph, const Node& node, int64_t& index, int64_t& axis, int64_t& indices_n_dims) const {
+bool GatherToSplitFusion::IsSupportedGather(const Graph& graph, const Node& node, int64_t& index, int64_t& axis,
+                                            int64_t& indices_n_dims) const {
   if (!graph_utils::IsSupportedOptypeVersionAndDomain(node, "Gather", {1, 11, 13}) ||
       !graph_utils::IsSupportedProvider(node, GetCompatibleExecutionProviders())) {
     return false;
@@ -53,6 +54,22 @@ Status GatherToSplitFusion::ApplyImpl(Graph& graph, bool& modified, int graph_le
   GraphViewer graph_viewer(graph);
   const auto& node_topology_list = graph_viewer.GetNodesInTopologicalOrder();
 
+  InlinedVector<const NodeArg*> node_args;
+  for (auto node_arg : graph.GetInputs()) {
+    if (node_arg && graph.GetConsumerNodes(node_arg->Name()).size() > 1) {
+      node_args.push_back(node_arg);
+    }
+  }
+
+  for (auto entry : graph.GetAllInitializedTensors()) {
+    if (graph.GetConsumerNodes(entry.first).size() > 1) {
+      auto node_arg = graph.GetNodeArg(entry.first);
+      if (node_arg) {
+        node_args.push_back(node_arg);
+      }
+    }
+  }
+
   for (auto node_index : node_topology_list) {
     auto* p_node = graph.GetNode(node_index);
     if (p_node == nullptr) continue;  // we removed the node as part of an earlier fusion
@@ -73,7 +90,11 @@ Status GatherToSplitFusion::ApplyImpl(Graph& graph, bool& modified, int graph_le
     size_t output_count = node.GetOutputEdgesCount();
     if (output_count <= 1) continue;
 
-    auto shape = node.MutableOutputDefs()[0]->Shape();
+    node_args.push_back(node.OutputDefs()[0]);
+  }
+
+  for (const NodeArg* node_arg : node_args) {
+    auto shape = node_arg->Shape();
     if (!shape) continue;
     int64_t rank = static_cast<int64_t>(shape->dim_size());
 
@@ -81,11 +102,14 @@ Status GatherToSplitFusion::ApplyImpl(Graph& graph, bool& modified, int graph_le
     bool first_edge = true;
     int64_t split_axis = 0;
     int64_t indices_n_dims = -1;
-    InlinedVector<NodeArg*> gather_outputs(output_count, nullptr);
+    auto consumers = graph.GetConsumerNodes(node_arg->Name());
+    size_t consumer_count = consumers.size();
+    InlinedVector<NodeArg*> gather_outputs(consumer_count, nullptr);
     InlinedVector<std::reference_wrapper<Node>> nodes_to_fuse;
-    for (auto it = node.OutputNodesBegin(); it != node.OutputNodesEnd(); ++it) {
+    for (auto consumer : consumers) {
       int64_t index, axis, dims;
-      if (!IsSupportedGather(graph, *it, index, axis, dims)) {
+      if (!consumer || consumer->InputDefs()[0] != node_arg ||
+          !IsSupportedGather(graph, *consumer, index, axis, dims)) {
         can_fuse = false;
         break;
       }
@@ -99,7 +123,7 @@ Status GatherToSplitFusion::ApplyImpl(Graph& graph, bool& modified, int graph_le
       if (axis < 0) axis += rank;
       if (first_edge) {
         auto dim = shape->dim(static_cast<int>(axis));
-        if (!utils::HasDimValue(dim) || dim.dim_value() != static_cast<int64_t>(output_count)) {
+        if (!utils::HasDimValue(dim) || dim.dim_value() != static_cast<int64_t>(consumer_count)) {
           can_fuse = false;
           break;
         }
@@ -109,12 +133,12 @@ Status GatherToSplitFusion::ApplyImpl(Graph& graph, bool& modified, int graph_le
         can_fuse = false;
         break;
       }
-      if (index < 0) index += static_cast<int64_t>(output_count);
-      if (index < 0 || index >= static_cast<int64_t>(output_count) || gather_outputs[static_cast<size_t>(index)]) {
+      if (index < 0) index += static_cast<int64_t>(consumer_count);
+      if (index < 0 || index >= static_cast<int64_t>(consumer_count) || gather_outputs[static_cast<size_t>(index)]) {
         can_fuse = false;
         break;
       }
-      Node& gather_node = *graph.GetNode(it->Index());
+      Node& gather_node = *graph.GetNode(consumer->Index());
       nodes_to_fuse.emplace_back(gather_node);
       gather_outputs[static_cast<size_t>(index)] = gather_node.MutableOutputDefs()[0];
     }
@@ -122,8 +146,8 @@ Status GatherToSplitFusion::ApplyImpl(Graph& graph, bool& modified, int graph_le
     if (!can_fuse) continue;
 
     ONNX_NAMESPACE::TypeProto split_output_type;
-    const ONNX_NAMESPACE::TensorProto_DataType element_type = static_cast<ONNX_NAMESPACE::TensorProto_DataType>(
-        node.MutableOutputDefs()[0]->TypeAsProto()->tensor_type().elem_type());
+    const ONNX_NAMESPACE::TensorProto_DataType element_type =
+        static_cast<ONNX_NAMESPACE::TensorProto_DataType>(node_arg->TypeAsProto()->tensor_type().elem_type());
     split_output_type.mutable_tensor_type()->set_elem_type(element_type);
     for (int64_t i = 0; i < rank; ++i) {
       if (i == split_axis) {
@@ -136,16 +160,17 @@ Status GatherToSplitFusion::ApplyImpl(Graph& graph, bool& modified, int graph_le
     InlinedVector<NodeArg*> split_outputs;
     bool add_squeeze_node = indices_n_dims == 0;
     if (add_squeeze_node) {
-      for (size_t i = 0; i < output_count; ++i) {
+      for (size_t i = 0; i < consumer_count; ++i) {
         split_outputs.emplace_back(
             &graph.GetOrCreateNodeArg(graph.GenerateNodeArgName("split" + std::to_string(i)), &split_output_type));
       }
     }
 
-    Node& split_node = graph.AddNode(graph.GenerateNodeName("Split"), "Split", "Split for Fused Gather nodes",
-                                     {node.MutableOutputDefs()[0]}, add_squeeze_node ? split_outputs : gather_outputs);
+    Node& split_node =
+        graph.AddNode(graph.GenerateNodeName("Split"), "Split", "Split for Fused Gather nodes",
+                      {graph.GetNodeArg(node_arg->Name())}, add_squeeze_node ? split_outputs : gather_outputs);
     split_node.AddAttribute("axis", split_axis);
-    split_node.SetExecutionProviderType(node.GetExecutionProviderType());
+    split_node.SetExecutionProviderType(nodes_to_fuse[0].get().GetExecutionProviderType());
 
     // Squeeze-11, Squeee-13, Split-13, Split-18 have different schemas.
     int onnx_opset_version = -1;
@@ -155,16 +180,16 @@ Status GatherToSplitFusion::ApplyImpl(Graph& graph, bool& modified, int graph_le
 
     if (onnx_opset_version < 13) {
       if (add_squeeze_node) {
-        for (size_t i = 0; i < output_count; ++i) {
+        for (size_t i = 0; i < consumer_count; ++i) {
           Node& squeeze_node = graph.AddNode(graph.GenerateNodeName("Squeeze" + std::to_string(i)), "Squeeze",
                                              "Squeeze for Fused Gather nodes", {split_outputs[i]}, {gather_outputs[i]});
           squeeze_node.AddAttribute("axes", std::vector<int64_t>{split_axis});
-          squeeze_node.SetExecutionProviderType(node.GetExecutionProviderType());
+          squeeze_node.SetExecutionProviderType(nodes_to_fuse[0].get().GetExecutionProviderType());
         }
       }
     } else {
       if (onnx_opset_version >= 18) {
-        split_node.AddAttribute("num_outputs", static_cast<int64_t>(output_count));
+        split_node.AddAttribute("num_outputs", static_cast<int64_t>(consumer_count));
       }
 
       if (add_squeeze_node) {
@@ -176,11 +201,11 @@ Status GatherToSplitFusion::ApplyImpl(Graph& graph, bool& modified, int graph_le
         axes_initializer_proto.set_raw_data(axes_value.data(), axes_value.size() * sizeof(int64_t));
         NodeArg* axes_arg = &graph_utils::AddInitializer(graph, axes_initializer_proto);
 
-        for (size_t i = 0; i < output_count; ++i) {
+        for (size_t i = 0; i < consumer_count; ++i) {
           Node& squeeze_node =
               graph.AddNode(graph.GenerateNodeName("Squeeze" + std::to_string(i)), "Squeeze",
                             "Squeeze for Fused Gather nodes", {split_outputs[i], axes_arg}, {gather_outputs[i]});
-          squeeze_node.SetExecutionProviderType(node.GetExecutionProviderType());
+          squeeze_node.SetExecutionProviderType(nodes_to_fuse[0].get().GetExecutionProviderType());
         }
       }
     }
diff --git a/onnxruntime/core/optimizer/graph_transformer.cc b/onnxruntime/core/optimizer/graph_transformer.cc
index ba580b8105875..37093496a66fa 100644
--- a/onnxruntime/core/optimizer/graph_transformer.cc
+++ b/onnxruntime/core/optimizer/graph_transformer.cc
@@ -12,6 +12,7 @@ Status GraphTransformer::Apply(Graph& graph, bool& modified, const logging::Logg
   // ORT_RETURN_IF_ERROR(graph.Resolve());
 
   auto status = ApplyImpl(graph, modified, 0, logger);
+  LOGS(logger, INFO) << "GraphTransformer " << Name() << " modified: " << modified << " with status: " << status;
   ORT_RETURN_IF_ERROR(status);
 
 #if !defined(ORT_MINIMAL_BUILD)
diff --git a/onnxruntime/core/optimizer/graph_transformer_utils.cc b/onnxruntime/core/optimizer/graph_transformer_utils.cc
index 54511aa02a57c..3d6251a694cfb 100644
--- a/onnxruntime/core/optimizer/graph_transformer_utils.cc
+++ b/onnxruntime/core/optimizer/graph_transformer_utils.cc
@@ -50,6 +50,8 @@
 #include "core/optimizer/matmul_integer_to_float.h"
 #include "core/optimizer/matmul_scale_fusion.h"
 #include "core/optimizer/matmul_transpose_fusion.h"
+#include "core/optimizer/matmul_bn_fusion.h"
+#include "core/optimizer/pad_fusion.h"
 #include "core/optimizer/nchwc_transformer.h"
 #include "core/optimizer/noop_elimination.h"
 #include "core/optimizer/not_where_fusion.h"
@@ -75,7 +77,6 @@
 #include "orttraining/core/optimizer/bias_softmax_dropout_fusion.h"
 #include "orttraining/core/optimizer/bitmask_dropout_replacement.h"
 #include "orttraining/core/optimizer/sce_loss_grad_bias_fusion.h"
-#include "orttraining/core/optimizer/memory_optimizer.h"
 #endif
 #ifdef ENABLE_TRITON
 #include "orttraining/core/optimizer/triton_fusion.h"
@@ -127,6 +128,8 @@ InlinedVector<std::unique_ptr<RewriteRule>> GenerateRewriteRules(
       rules.push_back(std::make_unique<ConvAddFusion>());
       rules.push_back(std::make_unique<ConvMulFusion>());
       rules.push_back(std::make_unique<ConvBNFusion>());
+      rules.push_back(std::make_unique<PadFusion>());
+      rules.push_back(std::make_unique<MatmulBNFusion>());
       rules.push_back(std::make_unique<ClipQuantFusion>());
       rules.push_back(std::make_unique<ReluQuantFusion>());
       break;
@@ -189,6 +192,8 @@ InlinedVector<std::unique_ptr<GraphTransformer>> GenerateTransformers(
   const InlinedHashSet<std::string_view> cpu_ep = {onnxruntime::kCpuExecutionProvider};
 #endif
   const InlinedHashSet<std::string_view> dml_ep = {onnxruntime::kDmlExecutionProvider};
+  AllocatorPtr cpu_allocator = std::make_shared<CPUAllocator>();
+
   switch (level) {
     case TransformerLevel::Level1: {
       // RewriteRule optimizations are the simplest (they generally remove unnecessary nodes and are cheap to run)
@@ -240,13 +245,14 @@ InlinedVector<std::unique_ptr<GraphTransformer>> GenerateTransformers(
 
       // run TransposeOptimizer last as it works in a slightly different way by moving Transpose nodes around.
       // shouldn't affect the end result - just easier to debug any issue if it's last.
-      // local CPU allocator is enough as this allocator is finally passed to a local tensor.
-      // We will also benefit by using a local allocator as we don't need to pass allocator as parameter for EP API refactor
-      AllocatorPtr cpu_allocator = std::make_shared<CPUAllocator>();
       transformers.emplace_back(std::make_unique<TransposeOptimizer>(std::move(cpu_allocator)));
     } break;
 
     case TransformerLevel::Level2: {
+      // we run TransposeOptimizer again in Level2 for some CPU EP specific optimizations that can only be
+      // applied once nodes are assigned to the CPU EP (which happens between level 1 and level 2).
+      transformers.emplace_back(std::make_unique<TransposeOptimizer>(std::move(cpu_allocator), kCpuExecutionProvider));
+
       const bool enable_quant_qdq_cleanup =
           session_options.config_options.GetConfigOrDefault(kOrtSessionOptionsEnableQuantQDQCleanup, "0") == "1";
 #if !defined(DISABLE_CONTRIB_OPS)
@@ -265,11 +271,12 @@ InlinedVector<std::unique_ptr<GraphTransformer>> GenerateTransformers(
                                                                       onnxruntime::kCudaExecutionProvider,
                                                                       onnxruntime::kRocmExecutionProvider,
                                                                       onnxruntime::kDmlExecutionProvider};
-      const InlinedHashSet<std::string_view> cpu_cuda_rocm_acl_armnn_eps = {onnxruntime::kCpuExecutionProvider,
-                                                                            onnxruntime::kCudaExecutionProvider,
-                                                                            onnxruntime::kRocmExecutionProvider,
-                                                                            onnxruntime::kAclExecutionProvider,
-                                                                            onnxruntime::kArmNNExecutionProvider};
+      const InlinedHashSet<std::string_view> cpu_cuda_rocm_acl_armnn_js_eps = {onnxruntime::kCpuExecutionProvider,
+                                                                               onnxruntime::kCudaExecutionProvider,
+                                                                               onnxruntime::kRocmExecutionProvider,
+                                                                               onnxruntime::kAclExecutionProvider,
+                                                                               onnxruntime::kArmNNExecutionProvider,
+                                                                               onnxruntime::kJsExecutionProvider};
 
 #ifdef MLAS_TARGET_AMD64_IX86
       const bool avx2_precision_mode =
@@ -291,7 +298,7 @@ InlinedVector<std::unique_ptr<GraphTransformer>> GenerateTransformers(
       transformers.emplace_back(std::make_unique<MatMulIntegerToFloatFusion>(cpu_ep));
       transformers.emplace_back(std::make_unique<DynamicQuantizeMatMulFusion>(cpu_ep));
 
-      transformers.emplace_back(std::make_unique<ConvActivationFusion>(cpu_cuda_rocm_acl_armnn_eps));
+      transformers.emplace_back(std::make_unique<ConvActivationFusion>(cpu_cuda_rocm_acl_armnn_js_eps));
 
       transformers.emplace_back(std::make_unique<GeluFusion>(cpu_cuda_dml_rocm_eps));
       transformers.emplace_back(std::make_unique<LayerNormFusion>(cpu_cuda_dml_rocm_eps));
@@ -346,18 +353,6 @@ InlinedVector<std::unique_ptr<GraphTransformer>> GenerateTransformers(
       // fusions might be prevented if this one removes a Q/DQ node too early.
       transformers.emplace_back(std::make_unique<QDQFinalCleanupTransformer>(enable_quant_qdq_cleanup));
 
-#ifdef ENABLE_TRAINING
-      // Put memory optimization transformer at last (which is done after most of fusions are done) by intention.
-      // Known issue: after memory optimization is completed, if some fusion happens, it is possible that the
-      // node priority got changed. This may disorder the execution order of nodes to recompute.
-      // TODO(pengwa): need to fix this issue.
-      const std::string enable_memory_optimizer =
-          session_options.config_options.GetConfigOrDefault(kOrtSessionOptionsMemoryOptimizerEnabler, "");
-      const std::string probe_level =
-          session_options.config_options.GetConfigOrDefault(kOrtSessionOptionsMemoryOptimizerProbeLevel, "0");
-      transformers.emplace_back(std::make_unique<MemoryOptimizer>(enable_memory_optimizer, probe_level));
-#endif
-
     } break;
 
     case TransformerLevel::Level3: {
@@ -366,16 +361,16 @@ InlinedVector<std::unique_ptr<GraphTransformer>> GenerateTransformers(
       if (MlasNchwcGetBlockSize() > 1) {
         transformers.emplace_back(std::make_unique<NchwcTransformer>());
       }
-      AllocatorPtr cpu_allocator = std::make_shared<CPUAllocator>();
+
       auto cpu_registry = cpu_execution_provider.GetKernelRegistry();
       auto nhwc_transformer = std::make_unique<NhwcTransformer>(std::move(cpu_allocator), std::move(cpu_registry));
       if (nhwc_transformer->IsActive()) {
         transformers.emplace_back(std::move(nhwc_transformer));
       }
-      // NCHWCtransformer should have a higher priority versus this. Because NCHWCtransformer also do the similar things
-      // of fusion patterns and target on CPU. However, NCHWCtransformer will reorder the layout to nchwc which is only available for
-      // x86-64 cpu, not edge cpu like arm. But This transformer could be used by opencl-ep/cpu-ep. So
-      // we will prefer NhwcTransformer once ort runs on x86-64 CPU, otherwise ConvAddActivationFusion is enabled.
+
+      // NchwcTransformer must have a higher priority than ConvAddActivationFusion. NchwcTransformer does similar
+      // fusions targeting CPU but also reorders the layout to NCHWc which is expected to be more efficient but is
+      // only available on x86-64.
       // PR #6351 implemented similar fusion-pattern for CUDA only, and can only fuse conv-add-relu,
       // while we can fuse more activation.
       transformers.emplace_back(std::make_unique<ConvAddActivationFusion>(cpu_ep));
diff --git a/onnxruntime/core/optimizer/initializer.cc b/onnxruntime/core/optimizer/initializer.cc
index 9cdc0d9ef0473..9e807ddc7be59 100644
--- a/onnxruntime/core/optimizer/initializer.cc
+++ b/onnxruntime/core/optimizer/initializer.cc
@@ -3,22 +3,23 @@
 
 #include "core/optimizer/initializer.h"
 
-#include "core/common/gsl.h"
+#include <functional>
+#include <memory>
 
+#include "core/common/gsl.h"
 #include "core/common/path.h"
 #include "core/framework/tensorprotoutils.h"
 #include "core/framework/tensor_external_data_info.h"
 #include "core/platform/env.h"
 
-#include <functional>
-
 namespace onnxruntime {
 
 Initializer::Initializer(ONNX_NAMESPACE::TensorProto_DataType data_type,
                          std::string_view name,
                          gsl::span<const int64_t> dims)
     : name_(name),
-      data_(DataTypeImpl::TensorTypeFromONNXEnum(data_type)->GetElementType(), dims, std::make_shared<CPUAllocator>()) {
+      data_(DataTypeImpl::TensorTypeFromONNXEnum(data_type)->GetElementType(), dims,
+            std::make_shared<CPUAllocator>()) {
   if (!data_.IsDataTypeString()) {
     memset(data_.MutableDataRaw(), 0, data_.SizeInBytes());
   }
@@ -39,7 +40,8 @@ Initializer::Initializer(const ONNX_NAMESPACE::TensorProto& tensor_proto, const
   auto proto_shape = utils::GetTensorShapeFromTensorProto(tensor_proto);
 
   // This must be pre-allocated
-  Tensor w(DataTypeImpl::TensorTypeFromONNXEnum(proto_data_type)->GetElementType(), proto_shape, std::make_shared<CPUAllocator>());
+  Tensor w(DataTypeImpl::TensorTypeFromONNXEnum(proto_data_type)->GetElementType(), proto_shape,
+           std::make_shared<CPUAllocator>());
   ORT_THROW_IF_ERROR(utils::TensorProtoToTensor(Env::Default(), model_path.ToPathString().c_str(), tensor_proto, w));
   data_ = std::move(w);
 }
@@ -289,7 +291,11 @@ Initializer& Initializer::sqrt() {
 namespace {
 template <typename T>
 struct ScaleByAxis {
-  void operator()(Tensor& data, const Tensor& scalers, const size_t block_size, const size_t num_blocks) const {
+  void operator()(Tensor& data,
+                  const Tensor& scalers,
+                  const size_t block_size,
+                  const size_t num_blocks,
+                  const bool column_major) const {
     ToNumeric<T> to_numeric;
     const auto scaler_size = scalers.Shape().Size();
     T* dst = data.MutableData<T>();
@@ -301,24 +307,32 @@ struct ScaleByAxis {
       }
     } else {
       for (size_t block_offset = 0, i = 0; i < num_blocks; i++) {
-        const auto numeric_scaler = to_numeric(scalers_data[i]);
-        for (size_t j = 0; j < block_size; ++j, ++block_offset) {
-          dst[block_offset] = T(to_numeric(dst[block_offset]) * numeric_scaler);
+        if (column_major) {
+          for (size_t j = 0; j < block_size; ++j, ++block_offset) {
+            const auto numeric_scaler = to_numeric(scalers_data[j]);
+            dst[block_offset] = T(to_numeric(dst[block_offset]) * numeric_scaler);
+          }
+        } else {
+          const auto numeric_scaler = to_numeric(scalers_data[i]);
+          for (size_t j = 0; j < block_size; ++j, ++block_offset) {
+            dst[block_offset] = T(to_numeric(dst[block_offset]) * numeric_scaler);
+          }
         }
       }
     }
   }
 };
-
 }  // namespace
 
-void Initializer::scale_by_axis(const Initializer& scalers, int axis) {
+void Initializer::scale_by_axis(const Initializer& scalers, int axis, bool column_major) {
   ORT_ENFORCE(axis >= 0, "Axis must be non-negative");
   const size_t block_size = narrow<size_t>(data_.Shape().SizeFromDimension(gsl::narrow_cast<size_t>(axis)));
   const size_t num_blocks = size() / block_size;
-  ORT_ENFORCE(scalers.size() == 1 || scalers.size() == num_blocks, "Invalid other(scalers) size");
+  ORT_ENFORCE(scalers.size() == 1 ||
+                  (column_major ? scalers.size() == block_size : scalers.size() == num_blocks),
+              "Invalid other(scalers) size");
   utils::MLTypeCallDispatcher<MLFloat16, BFloat16, float, double, int32_t, int64_t> t_disp(data_.GetElementType());
-  t_disp.Invoke<ScaleByAxis>(data_, scalers.data_, block_size, num_blocks);
+  t_disp.Invoke<ScaleByAxis>(data_, scalers.data_, block_size, num_blocks, column_major);
 }
 #endif  // ORT_EXTENDED_MINIMAL_BUILD
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/optimizer/initializer.h b/onnxruntime/core/optimizer/initializer.h
index dfe054ba1aced..78e3fd6a3d24e 100644
--- a/onnxruntime/core/optimizer/initializer.h
+++ b/onnxruntime/core/optimizer/initializer.h
@@ -86,7 +86,7 @@ class Initializer final {
 
   Initializer& sqrt();
 
-  void scale_by_axis(const Initializer& other, int axis);
+  void scale_by_axis(const Initializer& other, int axis, bool column_major = false);
 #endif  // ORT_EXTENDED_MINIMAL_BUILD
  private:
   std::string name_;
diff --git a/onnxruntime/core/optimizer/insert_cast_transformer.cc b/onnxruntime/core/optimizer/insert_cast_transformer.cc
index 7c087ec77d9fe..959fcd6efdc3c 100644
--- a/onnxruntime/core/optimizer/insert_cast_transformer.cc
+++ b/onnxruntime/core/optimizer/insert_cast_transformer.cc
@@ -32,7 +32,7 @@ onnxruntime::NodeArg* AddCastNode(onnxruntime::Graph& graph,
                                   int64_t to_type,
                                   onnxruntime::ProviderType providerType) {
   // insert cast op to cast input
-  std::string node_name = graph.GenerateNodeName("InsertedCast_" + old_arg->Name());
+  std::string node_name = graph.GenerateNodeName("InsertedPrecisionFreeCast_" + old_arg->Name());
 
   auto* new_arg = &graph.GetOrCreateNodeArg(node_name, new_type);
 
@@ -235,7 +235,8 @@ enum TypeGroup {
   Unknown = -1,
   Bool = 0,
   Integer = 1,
-  Float = 2,
+  Unsigned = 2,
+  Float = 3,
 };
 
 TypeGroup GetTypeGroup(DataType type) {
@@ -243,11 +244,14 @@ TypeGroup GetTypeGroup(DataType type) {
     return Bool;
   }
 
-  if (*type == "tensor(int16)" || *type == "tensor(int32)" || *type == "tensor(int64)" || *type == "tensor(int8)" ||
-      *type == "tensor(uint16)" || *type == "tensor(uint32)" || *type == "tensor(uint64)" || *type == "tensor(uint8)") {
+  if (*type == "tensor(int16)" || *type == "tensor(int32)" || *type == "tensor(int64)" || *type == "tensor(int8)") {
     return Integer;
   }
 
+  if (*type == "tensor(uint16)" || *type == "tensor(uint32)" || *type == "tensor(uint64)" || *type == "tensor(uint8)") {
+    return Unsigned;
+  }
+
   if (*type == "tensor(bfloat16)" || *type == "tensor(double)" || *type == "tensor(float)" || *type == "tensor(float16)") {
     return Float;
   }
@@ -255,6 +259,22 @@ TypeGroup GetTypeGroup(DataType type) {
   return Unknown;
 }
 
+int BitLength(DataType type) {
+  if (*type == "tensor(bool)") {
+    return 1;
+  } else if (*type == "tensor(uint8)" || *type == "tensor(int8)") {
+    return 8;
+  } else if (*type == "tensor(int16)" || *type == "tensor(uint16)" || *type == "tensor(bfloat16)" || *type == "tensor(float16)") {
+    return 16;
+  } else if (*type == "tensor(int32)" || *type == "tensor(uint32)" || *type == "tensor(float)") {
+    return 32;
+  } else if (*type == "tensor(int64)" || *type == "tensor(uint64)" || *type == "tensor(double)") {
+    return 64;
+  } else {
+    return -1;
+  }
+}
+
 /** Transformer to remove duplicate Cast nodes. */
 class RemoveDuplicateCastTransformer : public GraphTransformer {
  public:
@@ -262,6 +282,48 @@ class RemoveDuplicateCastTransformer : public GraphTransformer {
   }
 
  private:
+  static bool UnsafeCast(DataType src_type, DataType dst_type, const Node& node) {
+    // This is not a complete cast optimisation pass, and is more conservative than it could be.
+    // For instance, certain integral -> floating point casts could be optimised but this is left to an explicit cast optimisation pass.
+
+    // The comparison with "InsertedPrecisionFreeCast_" reflects cast nodes that are inserted by InsertCastTransformer.
+    // Such casts should not be considered as loss of precision - the inserted upcasts (f16 -> f32) and downcasts (f32 -> f16) are inserted to support kernels when on a CPU EP without F16 support.
+    auto src_type_group = GetTypeGroup(src_type);
+    auto dst_type_group = GetTypeGroup(dst_type);
+    if (Unknown == src_type_group || Unknown == dst_type_group) {
+      return true;
+    }
+
+    // Do not remove any signed -> unsigned cast.
+    if ((src_type_group != Bool && src_type_group != Unsigned) && Unsigned == dst_type_group) {
+      return true;
+    }
+
+    // Do not remove any floating point -> non floating point cast.
+    if (Float == src_type_group && Float != dst_type_group) {
+      return true;
+    }
+
+    auto src_bit_length = BitLength(src_type);
+    auto dst_bit_length = BitLength(dst_type);
+
+    // unsigned integer -> integer cast may overflow if the destination integer is smaller or equal to the source integer.
+    if (Unsigned == src_type_group && Integer == dst_type_group) {
+      return dst_bit_length <= src_bit_length;
+    }
+
+    // integral -> floating cast may overflow if integer cannot be encoded in the mantissa. This check could be more precise.
+    if ((Integer == src_type_group || Unsigned == src_type_group) && Float == dst_type_group) {
+      return dst_bit_length <= src_bit_length;
+    }
+
+    if ((*src_type == "tensor(float16)" && *dst_type == "tensor(bfloat16)") || (*src_type == "tensor(bfloat16)" && *dst_type == "tensor(float16)")) {
+      return true;
+    }
+
+    return src_bit_length > dst_bit_length && (node.Name().compare(0, 26, "InsertedPrecisionFreeCast_"));
+  }
+
   Status ApplyImpl(Graph& graph, bool& modified, int graph_level, const logging::Logger& logger) const override {
     auto output_args = graph.GetOutputs();
     InlinedHashSet<const onnxruntime::NodeArg*> graph_outputs;
@@ -293,17 +355,8 @@ class RemoveDuplicateCastTransformer : public GraphTransformer {
         //     - for each consumer cast node, it meets above condition for this optimization.
         auto src_type = node.InputDefs()[0]->Type();
         auto dst_type = node.OutputDefs()[0]->Type();
-        TypeGroup src_type_group = GetTypeGroup(src_type);
-        TypeGroup dst_type_group = GetTypeGroup(dst_type);
-        if (src_type_group == Unknown || dst_type_group == Unknown) {
-          continue;
-        }
-
-        bool loss_precision_cast = false;
-        if (src_type_group > dst_type_group) {
-          loss_precision_cast = true;
-        }
 
+        bool loss_precision_cast = UnsafeCast(src_type, dst_type, node);
         size_t num_children = node.GetOutputEdgesCount();
 
         bool inconsistent_casts = false;
@@ -312,10 +365,7 @@ class RemoveDuplicateCastTransformer : public GraphTransformer {
           if (output_node.OpType() == "Cast") {
             auto src_type1 = output_node.InputDefs()[0]->Type();
             auto dst_type1 = output_node.OutputDefs()[0]->Type();
-            TypeGroup src_type_group1 = GetTypeGroup(src_type1);
-            TypeGroup dst_type_group1 = GetTypeGroup(dst_type1);
-            if (src_type_group1 == Unknown || dst_type_group1 == Unknown ||
-                (loss_precision_cast && dst_type_group1 > src_type_group1)) {
+            if (loss_precision_cast && UnsafeCast(dst_type1, src_type1, output_node)) {
               inconsistent_casts = true;
               break;
             }
diff --git a/onnxruntime/core/optimizer/layout_transformation/layout_transformation.cc b/onnxruntime/core/optimizer/layout_transformation/layout_transformation.cc
index 2d12c407e6e31..4505d4afdf1e0 100644
--- a/onnxruntime/core/optimizer/layout_transformation/layout_transformation.cc
+++ b/onnxruntime/core/optimizer/layout_transformation/layout_transformation.cc
@@ -13,27 +13,102 @@ using namespace onnx_transpose_optimization;
 
 namespace onnxruntime {
 namespace layout_transformation {
+namespace {
+// Cost check for aggressively pushing the Transpose nodes involved in the layout transformation further out.
+CostCheckResult PostLayoutTransformCostCheck(const api::GraphRef& graph, const api::NodeRef& node,
+                                             const std::vector<int64_t>& perm,
+                                             const std::unordered_set<std::string>& outputs_leading_to_transpose) {
+  // we aggressively push the layout transpose nodes.
+  // Exception: pushing through a Concat can result in Transpose nodes being added to multiple other inputs which
+  // can potentially be worse for performance. Use the cost check in that case.
+  if (node.OpType() != "Concat" &&
+      (perm == ChannelFirstToLastPerm(perm.size()) || perm == ChannelLastToFirstPerm(perm.size()))) {
+    return CostCheckResult::kPushTranspose;
+  }
+
+  // for other nodes use the default ORT cost check
+  return OrtEPCostCheck(graph, node, perm, outputs_leading_to_transpose);
+}
+
+#if defined(USE_CUDA) && ENABLE_CUDA_NHWC_OPS
+const std::unordered_set<std::string_view>& GetCUDALayoutSensitiveOps() {
+  static std::unordered_set<std::string_view> cuda_nhwc_ops = []() {
+    return std::unordered_set<std::string_view>{
+        "BatchNormalization",
+        "Conv",
+        "ConvTranspose",
+        "GlobalMaxPool",
+        "MaxPool",
+        "GlobalAveragePool",
+        "AveragePool",
+    };
+  }();
+  return cuda_nhwc_ops;
+}
+#endif
+
+/// <summary>
+/// Default function for checking if a node should have its layout changed. Allows EP specific adjustments to the
+/// default set of layout sensitive operators if required.
+///
+/// Longer term, if required, the EP API could allow the EP to provide a delegate to plugin EP specific logic so we
+/// don't hardcode it here.
+/// </summary>
+/// <param name="node">Node to check</param>
+/// <returns>true if the node should have its layout converted to NHWC.</returns>
+bool ConvertNodeLayout(const api::NodeRef& node) {
+  // skip if op is not an ONNX or contrib op
+  auto domain = node.Domain();
+  if (domain != kOnnxDomain && domain != kMSDomain) {
+    return false;
+  }
+
+  const auto& layout_sensitive_ops = GetORTLayoutSensitiveOps();
+
+  // handle special cases
+#if defined(USE_JSEP)
+  // TODO(fs-eire): Remove special case handing of JSEP once NHWC Resize implementation is fixed
+  if (node.GetExecutionProviderType() == kJsExecutionProvider) {
+    if (node.OpType() == "Resize") {
+      // leave Resize as-is pending bugfix for NHWC implementation. this means the node will remain in the ONNX domain
+      // with the original input layout.
+      return false;
+    }
+  }
+#endif
+
+#if defined(USE_CUDA) && ENABLE_CUDA_NHWC_OPS
+  if (node.GetExecutionProviderType() == kCudaExecutionProvider) {
+    if (layout_sensitive_ops.count(node.OpType())) {
+      const auto& cuda_nhwc_ops = GetCUDALayoutSensitiveOps();
+      if (!cuda_nhwc_ops.count(node.OpType())) {
+        return false;
+      }
+    }
+  }
+#endif
+
+  return layout_sensitive_ops.count(node.OpType()) != 0;
+}
+}  // namespace
 
 // Layout sensitive NCHW ops. TransformLayoutForEP will wrap these with Transpose nodes to convert the input
 // data to NHWC and output data back to NCHW, and move the op to the internal NHWC domain (kMSInternalNHWCDomain).
-// The EP requesting these ops MUST be able to handle the node with the operator in the kMSInternalNHWCDomain.
+// The EP requesting these ops MUST be able to handle the node with the operator in the kMSInternalNHWCDomain domain.
 // Once all the layout sensitive ops requested by the EP are wrapped the transpose optimizer will attempt to remove
 // as many of the layout transposes as possible.
 const std::unordered_set<std::string_view>& GetORTLayoutSensitiveOps() {
   static std::unordered_set<std::string_view> ort_layout_sensitive_ops = []() {
     const auto& layout_sensitive_ops = onnx_transpose_optimization::GetLayoutSensitiveOps();
     std::unordered_set<std::string_view> ort_specific_ops =
-    { "FusedConv",
-      "QLinearAveragePool",
-      "QLinearGlobalAveragePool"
-#if defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_QNN) || defined(USE_WEBNN)
-      // The CUDA/ROCM Resize kernel is layout sensitive as it only handles NCHW input.
-      // The CPU kernel and ONNX spec are not limited to handling NCHW input so are not layout sensitive, and
-      // onnx_layout_transformation::HandleResize is used.
-      ,
-      "Resize"
-#endif
-    };
+        {
+            "FusedConv",
+            "QLinearAveragePool",
+            "QLinearGlobalAveragePool",
+            // Whilst the ONNX spec doesn't specify a layout for Resize, we treat it as layout sensitive by default
+            // as EPs tend to only support one layout.
+            "Resize",
+        };
 
     ort_specific_ops.insert(layout_sensitive_ops.cbegin(), layout_sensitive_ops.cend());
     return ort_specific_ops;
@@ -42,45 +117,21 @@ const std::unordered_set<std::string_view>& GetORTLayoutSensitiveOps() {
   return ort_layout_sensitive_ops;
 }
 
-// Cost check for aggressively pushing the Transpose nodes involved in the layout transformation further out.
-static CostCheckResult
-PostLayoutTransformCostCheck(const api::GraphRef& graph, const api::NodeRef& node,
-                             const std::vector<int64_t>& perm,
-                             const std::unordered_set<std::string>& outputs_leading_to_transpose) {
-  // we aggressively push the layout transpose nodes.
-  // Exception: pushing through a Concat can result in Transpose nodes being added to multiple other inputs which
-  // can potentially be worse for performance. Use the cost check in that case.
-  if (node.OpType() != "Concat" &&
-      (perm == ChannelFirstToLastPerm(perm.size()) || perm == ChannelLastToFirstPerm(perm.size()))) {
-    return CostCheckResult::kPushTranspose;
-  }
-
-  // for other nodes use the default ORT cost check
-  return OrtEPCostCheck(graph, node, perm, outputs_leading_to_transpose);
-}
-
 Status TransformLayoutForEP(Graph& graph, bool& modified, const IExecutionProvider& execution_provider,
                             AllocatorPtr cpu_allocator,
                             const DebugGraphFn& debug_graph_fn) {
   // We pass in nullptr for the new_node_ep param as new nodes will be assigned by the graph partitioner after
   // TransformLayoutForEP returns.
-  // sub graph recurse will be added later.
+  // sub graph recurse will be added later
   auto api_graph = MakeApiGraph(graph, cpu_allocator, /*new_node_ep*/ nullptr);
-  const auto& layout_sensitive_ops = GetORTLayoutSensitiveOps();
 
   // to convert to NHWC we need to wrap layout sensitive nodes to Transpose from NCHW to NHWC and back.
   for (auto& node : api_graph->Nodes()) {
-    if (layout_sensitive_ops.count(node->OpType())) {
-      if (node->GetExecutionProviderType() != execution_provider.Type()) {
-        continue;
-      }
-
-      auto domain = node->Domain();
-      // Skip if domain is incorrect
-      if (domain != kOnnxDomain && domain != kMSDomain) {
-        continue;
-      }
+    if (node->GetExecutionProviderType() != execution_provider.Type()) {
+      continue;
+    }
 
+    if (ConvertNodeLayout(*node)) {
       // if already transformed then change the domain to kMSInternalNHWCDomain this way the EP
       // knows this op is in the expected format.
       if (node->GetAttributeIntDefault("channels_last", 0) == 1) {
@@ -137,7 +188,6 @@ Status TransformLayoutForEP(Graph& graph, bool& modified, const IExecutionProvid
         WrapTransposesAroundNode(*api_graph, *node, {&input_perm}, {&output_perm});
       }
 
-      // TODO: Technically Resize doesn't need to change domain as the ONNX Resize spec is not layout sensitive.
       SwapNodeOpTypeAndDomain(*api_graph, *node, node->OpType(), kMSInternalNHWCDomain);
       modified = true;
     }
diff --git a/onnxruntime/core/optimizer/layout_transformation/layout_transformation_potentially_added_ops.h b/onnxruntime/core/optimizer/layout_transformation/layout_transformation_potentially_added_ops.h
index 91e21b655f8bd..cfa02c916b73f 100644
--- a/onnxruntime/core/optimizer/layout_transformation/layout_transformation_potentially_added_ops.h
+++ b/onnxruntime/core/optimizer/layout_transformation/layout_transformation_potentially_added_ops.h
@@ -20,6 +20,10 @@ inline constexpr std::array kLayoutTransformationPotentiallyAddedOps = {
     // @@region_begin(extended_minimal_build_required_kernels)@@
 
     // kOnnxDomain ops
+    OpIdentifierWithStringViews{kOnnxDomain, "DequantizeLinear", 10},
+    OpIdentifierWithStringViews{kOnnxDomain, "DequantizeLinear", 13},
+    OpIdentifierWithStringViews{kOnnxDomain, "DequantizeLinear", 19},
+    // OpIdentifierWithStringViews{kOnnxDomain, "DequantizeLinear", 21}, pending CPU EP adding support
     OpIdentifierWithStringViews{kOnnxDomain, "Gather", 1},
     OpIdentifierWithStringViews{kOnnxDomain, "Gather", 11},
     OpIdentifierWithStringViews{kOnnxDomain, "Gather", 13},
@@ -28,6 +32,10 @@ inline constexpr std::array kLayoutTransformationPotentiallyAddedOps = {
     OpIdentifierWithStringViews{kOnnxDomain, "Identity", 14},
     OpIdentifierWithStringViews{kOnnxDomain, "Identity", 16},
     OpIdentifierWithStringViews{kOnnxDomain, "Identity", 19},
+    OpIdentifierWithStringViews{kOnnxDomain, "QuantizeLinear", 10},
+    OpIdentifierWithStringViews{kOnnxDomain, "QuantizeLinear", 13},
+    OpIdentifierWithStringViews{kOnnxDomain, "QuantizeLinear", 19},
+    // OpIdentifierWithStringViews{kOnnxDomain, "QuantizeLinear", 21}, pending CPU EP adding support
     OpIdentifierWithStringViews{kOnnxDomain, "Squeeze", 1},
     OpIdentifierWithStringViews{kOnnxDomain, "Squeeze", 11},
     OpIdentifierWithStringViews{kOnnxDomain, "Squeeze", 13},
@@ -39,8 +47,10 @@ inline constexpr std::array kLayoutTransformationPotentiallyAddedOps = {
 
 #if !defined(DISABLE_CONTRIB_OPS)
     // kMSDomain ops
+    OpIdentifierWithStringViews{kMSDomain, "DequantizeLinear", 1},
     OpIdentifierWithStringViews{kMSDomain, "NhwcMaxPool", 1},
     OpIdentifierWithStringViews{kMSDomain, "QLinearConv", 1},
+    OpIdentifierWithStringViews{kMSDomain, "QuantizeLinear", 1},
 #endif  // !defined(DISABLE_CONTRIB_OPS)
 
     // @@region_end(extended_minimal_build_required_kernels)@@
diff --git a/onnxruntime/core/optimizer/matmul_bn_fusion.cc b/onnxruntime/core/optimizer/matmul_bn_fusion.cc
new file mode 100644
index 0000000000000..e944522c9c338
--- /dev/null
+++ b/onnxruntime/core/optimizer/matmul_bn_fusion.cc
@@ -0,0 +1,230 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/optimizer/matmul_bn_fusion.h"
+#include "core/graph/graph_utils.h"
+#include "core/optimizer/initializer.h"
+#include "core/optimizer/utils.h"
+
+namespace onnxruntime {
+
+namespace {
+const std::vector<std::pair<std::string, InlinedVector<ONNX_NAMESPACE::OperatorSetVersion>>> ignorable_nodes{
+    {"Reshape", {1, 5, 13, 14, 19}},
+    {"Transpose", {1, 13}}};
+const std::pair<std::string, InlinedVector<ONNX_NAMESPACE::OperatorSetVersion>> dest = {"BatchNormalization", {1, 6, 7, 9, 14, 15}};
+}  // namespace
+
+bool NodeIsIgnorable(const Graph& graph, const Node& root_node, NodeIndex curr_node_index) {
+  const Node* curr_node = graph.GetNode(curr_node_index);
+
+  // curr_node has different execution provider then it's parent or
+  // has output edge != 1 (this condition will handle the case when ignorable node
+  // is graph output i.e. a graph like this "MatMul->Transpose")
+  if (curr_node->GetExecutionProviderType() != root_node.GetExecutionProviderType() ||
+      curr_node->GetOutputEdgesCount() != 1) {
+    return false;
+  }
+
+  // curr_node can be any of the ignorable_nodes.
+  for (size_t index = 0; index < ignorable_nodes.size(); index++) {
+    if (graph_utils::IsSupportedOptypeVersionAndDomain(*curr_node, ignorable_nodes[index].first, ignorable_nodes[index].second)) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+std::optional<NodeIndex> MatchPath(const Graph& graph, const Node& root_node, NodeIndex curr_node_index) {
+  while (NodeIsIgnorable(graph, root_node, curr_node_index)) {
+    curr_node_index = graph.GetNode(curr_node_index)->OutputNodesBegin()->Index();
+  }
+
+  // curr_node is neither ignorable nor dest
+  const Node* curr_node = graph.GetNode(curr_node_index);
+  if (curr_node->OpType() != dest.first) {
+    return std::nullopt;
+  }
+
+  if (curr_node->GetExecutionProviderType() == root_node.GetExecutionProviderType() &&
+      graph_utils::IsSupportedOptypeVersionAndDomain(*curr_node, dest.first, dest.second)) {
+    return curr_node_index;
+  }
+
+  // either curr_node has different execution provider or
+  // has invalid opset.
+  return std::nullopt;
+}
+
+/*
+ *   Given a MatMul node, it will verify the following pattern.
+ *                MatMul                  GEMM
+ *                  |                       |
+ *               Reshape ^     --->      Reshape ^
+ *                  |                       |
+ *             Transpose ^             Transpose ^
+ *                  |
+ *        BatchNormalization
+ * Note: ^ means there can be 0 or any occurrences of that node.
+ * Few example fusable pattern:
+ *  - MatMul -> Reshape -> Transpose -> BatchNormalization              ---> GEMM -> Reshape -> Transpose
+ *  - MatMul -> Reshape -> BatchNormalization                           ---> GEMM -> Reshape
+ *  - MatMul -> Transpose -> BatchNormalization                         ---> GEMM -> Transpose
+ *  - MatMul -> Reshape -> Reshape -> BatchNormalization                ---> GEMM -> Reshape -> Reshape
+ *  - MatMul -> Reshape -> Transpose -> Reshape -> BatchNormalization   ---> GEMM -> Reshape -> Transpose -> Reshape
+ *  - MatMul -> BatchNormalization                                      ---> GEMM
+ * Other Conditions:
+ *   - B tensor of MatMul should be constant.
+ *   - scale, B, mean, var tensors of BatchNormalization should be constant.
+ *   - Every node in the path, except the BatchNormalization, should have only 1 output edge.
+ */
+bool MatmulBNFusion::SatisfyCondition(const Graph& graph, const Node& node, const logging::Logger&) const {
+  if (!graph_utils::IsSupportedOptypeVersionAndDomain(node, "MatMul", {1, 9, 13}) ||
+      node.GetOutputEdgesCount() != 1) {
+    return false;
+  }
+
+  if (graph.NodeProducesGraphOutput(node)) {
+    return false;
+  }
+
+  // because <node> is not producing graph output, it means it will have a child node
+  NodeIndex child_node_index = node.OutputNodesBegin()->Index();
+  std::optional<NodeIndex> batch_norm_index = MatchPath(graph, node, child_node_index);
+  if (!batch_norm_index.has_value()) {
+    return false;
+  }
+
+  const Node* batch_norm_node = graph.GetNode(*batch_norm_index);
+
+  // Check that the appropriate inputs to the Matmul and BN nodes are constants.
+  if (!graph_utils::NodeArgIsConstant(graph, *node.InputDefs()[1]) ||
+      !graph_utils::NodeArgIsConstant(graph, *batch_norm_node->InputDefs()[1]) ||
+      !graph_utils::NodeArgIsConstant(graph, *batch_norm_node->InputDefs()[2]) ||
+      !graph_utils::NodeArgIsConstant(graph, *batch_norm_node->InputDefs()[3]) ||
+      !graph_utils::NodeArgIsConstant(graph, *batch_norm_node->InputDefs()[4])) {
+    return false;
+  }
+
+  // First output from BN is required. Others are optional. If any optional outputs exist we can't fuse.
+  const auto& output_defs = batch_norm_node->OutputDefs();
+  if (output_defs.size() > 1) {
+    for (size_t i = 1, end = output_defs.size(); i < end; ++i) {
+      if (output_defs[i] != nullptr && output_defs[i]->Exists()) {
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+/*
+ * BatchNormalization: [https://learn.microsoft.com/en-us/windows/win32/api/directml/ns-directml-dml_batch_normalization_operator_desc]
+ *   Scale * ((Input - Mean) / sqrt(Variance + Epsilon)) + Bias // ignore the FusedActivation in the above definition, that's very specific to DML
+ * Expanding out the terms:
+ *   Output = (Scale / sqrt(Variance + Epsilon)) * Input + (Scale / sqrt(Variance + Epsilon)) * -Mean + Bias
+ * Here,
+ *   [Scale/sqrt(Variance + Epsilon)] is constant, and let's call it `alpha`
+ *   [(Scale / sqrt(Variance + Epsilon)) * -Mean + Bias] is also constant, and let's call it `beta`
+ * Output = alpha * Input + beta, Input = B tensor of MatMul.
+ *
+ */
+Status MatmulBNFusion::Apply(Graph& graph, Node& matmul_node, RewriteRuleEffect& rule_effect, const logging::Logger&) const {
+  NodeIndex child_node_index = matmul_node.OutputNodesBegin()->Index();
+  NodeIndex batch_norm_node_index = MatchPath(graph, matmul_node, child_node_index).value();
+
+  Node& batch_norm_node = *graph.GetNode(batch_norm_node_index);  // need mutable node, that's why extracting node from graph
+
+  // only perform fusion if epsilon is present and is of float_32 type
+  auto epsilon_attribute = batch_norm_node.GetAttributes().find("epsilon");
+  if (epsilon_attribute == batch_norm_node.GetAttributes().end() ||
+      epsilon_attribute->second.type() != ONNX_NAMESPACE::AttributeProto_AttributeType_FLOAT) {
+    return Status::OK();
+  }
+  const float epsilon = epsilon_attribute->second.f();
+
+  const onnx::TensorProto* scale_tensor = graph_utils::GetConstantInitializer(graph, batch_norm_node.InputDefs()[1]->Name());
+  ORT_ENFORCE(scale_tensor);
+  const onnx::TensorProto* bias_tensor = graph_utils::GetConstantInitializer(graph, batch_norm_node.InputDefs()[2]->Name());
+  ORT_ENFORCE(bias_tensor);
+  const onnx::TensorProto* mean_tensor = graph_utils::GetConstantInitializer(graph, batch_norm_node.InputDefs()[3]->Name());
+  ORT_ENFORCE(mean_tensor);
+  const onnx::TensorProto* var_tensor = graph_utils::GetConstantInitializer(graph, batch_norm_node.InputDefs()[4]->Name());
+  ORT_ENFORCE(var_tensor);
+  const onnx::TensorProto* matmul_b_tensor = graph_utils::GetConstantInitializer(graph, matmul_node.InputDefs()[1]->Name());
+  ORT_ENFORCE(matmul_b_tensor);
+
+  if (!optimizer_utils::IsFloatingPointDataType(*matmul_b_tensor) ||
+      !optimizer_utils::IsFloatingPointDataType(*scale_tensor) ||
+      !optimizer_utils::IsFloatingPointDataType(*bias_tensor) ||
+      !optimizer_utils::IsFloatingPointDataType(*mean_tensor) ||
+      !optimizer_utils::IsFloatingPointDataType(*var_tensor) ||
+      scale_tensor->dims_size() != 1 ||
+      bias_tensor->dims_size() != 1 ||
+      mean_tensor->dims_size() != 1 ||
+      var_tensor->dims_size() != 1 ||
+      scale_tensor->dims(0) != matmul_b_tensor->dims(1) ||
+      bias_tensor->dims(0) != matmul_b_tensor->dims(1) ||
+      mean_tensor->dims(0) != matmul_b_tensor->dims(1) ||
+      var_tensor->dims(0) != matmul_b_tensor->dims(1)) {
+    return Status::OK();
+  }
+
+  /*
+   * temp = scale / sqrt(var + epsilon)
+   * output = (temp * Input) - ((temp * mean) + bias)
+   */
+  Initializer scale(*scale_tensor, graph.ModelPath());
+  Initializer bias(*bias_tensor, graph.ModelPath());
+  Initializer mean(*mean_tensor, graph.ModelPath());
+  Initializer var(*var_tensor, graph.ModelPath());
+  Initializer matmul_b(*matmul_b_tensor, graph.ModelPath());
+
+  var.add(epsilon);
+  var.sqrt();
+  scale.div(var);  // this is the temp
+  matmul_b.scale_by_axis(scale, 1, true);
+
+  mean.mul(scale);
+  bias.sub(mean);
+
+  // create B tensorProto for new Gemm node from <matmulB> initializer.
+  ONNX_NAMESPACE::TensorProto new_gemm_b_tensor(*matmul_b_tensor);
+  matmul_b.ToProto(new_gemm_b_tensor);
+  const std::string new_gemm_b_name = graph.GenerateNodeArgName("MatMulBnFusion_GemmB_" + matmul_b_tensor->name());
+  new_gemm_b_tensor.set_name(new_gemm_b_name);
+  NodeArg& new_gemm_b_node_arg = graph_utils::AddInitializer(graph, new_gemm_b_tensor);
+
+  // create bias tensorProto for new Gemm node from <bias> initializer.
+  ONNX_NAMESPACE::TensorProto new_gemm_bias_tensor(*bias_tensor);
+  bias.ToProto(new_gemm_bias_tensor);
+  const std::string new_gemm_bias_name = graph.GenerateNodeArgName("MatMulBnFusion_GemmBias");
+  new_gemm_bias_tensor.set_name(new_gemm_bias_name);
+  NodeArg& new_gemm_bias_node_arg = graph_utils::AddInitializer(graph, new_gemm_bias_tensor);
+
+  Node& gemm_node = graph.AddNode(
+      graph.GenerateNodeArgName("MatMulBnFusion_Gemm"),
+      "Gemm",
+      "Generated from Matmul BatchNormalization fusion",
+      {matmul_node.MutableInputDefs()[0], &new_gemm_b_node_arg, &new_gemm_bias_node_arg},
+      matmul_node.MutableOutputDefs(),
+      nullptr,
+      kOnnxDomain);
+
+  // Remove MatMul node.
+  Node* node = graph.GetNode(matmul_node.Index());
+  graph_utils::RemoveNodeOutputEdges(graph, *node);
+  graph.RemoveNode(matmul_node.Index());
+
+  // Delete optional empty output defs.
+  // Delete BatchNormalization node and update the input of the child of BatchNormalization
+  batch_norm_node.MutableOutputDefs().resize(1);
+  NodeIndex batch_norm_parent_index = graph.GetNode(child_node_index)->OpType() == "BatchNormalization" ? gemm_node.Index() : batch_norm_node.InputNodesBegin()->Index();
+  graph_utils::FinalizeNodeFusion(graph, *graph.GetNode(batch_norm_parent_index), batch_norm_node);
+
+  rule_effect = RewriteRuleEffect::kRemovedCurrentNode;
+  return Status::OK();
+}
+}  // namespace onnxruntime
\ No newline at end of file
diff --git a/onnxruntime/core/optimizer/matmul_bn_fusion.h b/onnxruntime/core/optimizer/matmul_bn_fusion.h
new file mode 100644
index 0000000000000..7a43483cf37d4
--- /dev/null
+++ b/onnxruntime/core/optimizer/matmul_bn_fusion.h
@@ -0,0 +1,27 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "core/optimizer/rewrite_rule.h"
+
+namespace onnxruntime {
+/*
+ *   This fusion submerges a BatchNormalization operator to it's super
+ *   precedding MatMul operator, if and only if MatmulBNFusion::SatisfyCondition()
+ *   is true.
+ */
+class MatmulBNFusion : public RewriteRule {
+ public:
+  MatmulBNFusion() : RewriteRule("MatMul_BatchNormalization_Fusion") {}
+
+  std::vector<std::string> TargetOpTypes() const noexcept override {
+    return {"MatMul"};
+  }
+
+ private:
+  bool SatisfyCondition(const Graph& graph, const Node& node, const logging::Logger& logger) const override;
+
+  Status Apply(Graph& graph, Node& matmul_node, RewriteRuleEffect& rule_effect, const logging::Logger& logger) const override;
+};
+}  // namespace onnxruntime
\ No newline at end of file
diff --git a/onnxruntime/core/optimizer/optimizer_execution_frame.cc b/onnxruntime/core/optimizer/optimizer_execution_frame.cc
index fc7e694b6a69b..46041bca9dcc1 100644
--- a/onnxruntime/core/optimizer/optimizer_execution_frame.cc
+++ b/onnxruntime/core/optimizer/optimizer_execution_frame.cc
@@ -49,19 +49,13 @@ OptimizerExecutionFrame::Info::Info(const std::vector<const Node*>& nodes,
     InitializedTensorSet::const_iterator it = initialized_tensor_set.find(arg.Name());
     if (it != initialized_tensor_set.cend()) {
       const auto& tensor_proto = *(it->second);
-      size_t cpu_tensor_length;
-      ORT_RETURN_IF_ERROR(utils::GetSizeInBytesFromTensorProto<0>(tensor_proto, &cpu_tensor_length));
       OrtValue ort_value;
-      std::unique_ptr<char[]> data = std::make_unique<char[]>(cpu_tensor_length);
-      std::unique_ptr<Tensor> p_tensor;
-      ORT_RETURN_IF_ERROR(utils::TensorProtoToMLValue(Env::Default(),
-                                                      model_path.IsEmpty() ? nullptr : model_path.ToPathString().c_str(),
-                                                      tensor_proto,
-                                                      MemBuffer(data.get(), cpu_tensor_length, allocator_ptr_->Info()),
-                                                      ort_value));
-
-      initializers_[idx] = ort_value;
-      buffer_for_initialized_tensors_[idx] = std::move(data);
+      ORT_RETURN_IF_ERROR(
+          utils::TensorProtoToOrtValue(Env::Default(),
+                                       model_path.IsEmpty() ? nullptr : model_path.ToPathString().c_str(),
+                                       tensor_proto, allocator_ptr_, ort_value));
+
+      initializers_[idx] = std::move(ort_value);
     }
 
     return Status::OK();
@@ -72,7 +66,6 @@ OptimizerExecutionFrame::Info::Info(const std::vector<const Node*>& nodes,
   ort_value_name_idx_map_.Reserve(num_inputs_outputs);
   ort_value_idx_nodearg_map_.reserve(num_inputs_outputs);
   initializers_.reserve(initialized_tensor_set.size());
-  buffer_for_initialized_tensors_.reserve(initialized_tensor_set.size());
 
   for (auto* node : nodes) {
     ORT_THROW_IF_ERROR(onnxruntime::Node::ForEachWithIndex(node->InputDefs(), initialize_maps));
diff --git a/onnxruntime/core/optimizer/optimizer_execution_frame.h b/onnxruntime/core/optimizer/optimizer_execution_frame.h
index e1b8a91545f64..13cf9e652c404 100644
--- a/onnxruntime/core/optimizer/optimizer_execution_frame.h
+++ b/onnxruntime/core/optimizer/optimizer_execution_frame.h
@@ -70,7 +70,6 @@ class OptimizerExecutionFrame final : public IExecutionFrame {
     OrtValueNameIdxMap ort_value_name_idx_map_;
     std::unordered_map<int, const NodeArg*> ort_value_idx_nodearg_map_;
     std::unordered_map<int, OrtValue> initializers_;
-    InlinedHashMap<int, std::unique_ptr<char[]>> buffer_for_initialized_tensors_;
     std::unique_ptr<NodeIndexInfo> node_index_info_;
     const IExecutionProvider& execution_provider_;
     const std::function<bool(const std::string&)>& is_sparse_initializer_func_;
diff --git a/onnxruntime/core/optimizer/pad_fusion.cc b/onnxruntime/core/optimizer/pad_fusion.cc
new file mode 100644
index 0000000000000..b25e7618802dd
--- /dev/null
+++ b/onnxruntime/core/optimizer/pad_fusion.cc
@@ -0,0 +1,128 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/optimizer/pad_fusion.h"
+#include "core/graph/graph_utils.h"
+#include "core/optimizer/initializer.h"
+#include "core/optimizer/utils.h"
+
+namespace onnxruntime {
+
+/*
+ * It matches following pattern:
+ *     Pad
+ *      |
+ *   Conv/MaxPool
+ */
+bool PadFusion::SatisfyCondition(const Graph& graph, const Node& node, const logging::Logger&) const {
+  // if Pad has input axis, don't fuse it.
+  if (!graph_utils::IsSupportedOptypeVersionAndDomain(node, "Pad", {1, 2, 11, 13, 18, 19}) ||
+      node.GetOutputEdgesCount() != 1 ||
+      node.InputDefs().size() > 3) {
+    return false;
+  }
+
+  if (graph.NodeProducesGraphOutput(node)) {
+    return false;
+  }
+
+  const Node& child_node = *node.OutputNodesBegin();
+  if (!graph_utils::IsSupportedOptypeVersionAndDomain(child_node, "Conv", {1, 11}) &&
+      !graph_utils::IsSupportedOptypeVersionAndDomain(child_node, "MaxPool", {1, 8, 10, 11, 12})) {
+    return false;
+  }
+
+  // Don't fuse if MaxPool has optional output indices tensor because output indices tensor
+  // does not incorporate pad values. Basically if we allow the fusion, then dimension values
+  // of input tensor < dimension values of input tensor without fusion.
+  // This will cause the range of values for output indices tensor to be less than what it
+  // should have been.
+
+  if (child_node.OutputDefs().size() > 1) {
+    return false;
+  }
+
+  // conv or maxpool node must use explicit padding to perform this fusion.
+  if (child_node.GetAttributes().find("auto_pad") != child_node.GetAttributes().end() &&
+      child_node.GetAttributes().at("auto_pad").s() != "NOTSET") {
+    return false;
+  }
+
+  const NodeAttributes& pad_attributes = node.GetAttributes();
+  if (pad_attributes.find("mode") != pad_attributes.end() &&
+      pad_attributes.at("mode").s() != "constant") {
+    return false;
+  }
+
+  // Since opset 11, <pads> and <constant_value> moved to inputs.
+  // Both of these should be initializer because we have to verify the values.
+  if (node.SinceVersion() >= 11) {
+    if (!graph_utils::NodeArgIsConstant(graph, *node.InputDefs()[1]) ||
+        (node.InputDefs().size() > 2 && !graph_utils::NodeArgIsConstant(graph, *node.InputDefs()[2]))) {
+      return false;
+    }
+
+    // constant_value should be zero because Conv and MaxPool allow only 0 as padding value.
+    if (node.InputDefs().size() > 2) {
+      const auto* pad_constant_value_proto = graph_utils::GetConstantInitializer(graph, node.InputDefs()[2]->Name());
+      Initializer pad_constant_value{*pad_constant_value_proto, graph.ModelPath()};
+      if (std::any_of(pad_constant_value.DataAsByteSpan().begin(), pad_constant_value.DataAsByteSpan().end(), [](const uint8_t byte) { return byte != 0; })) {
+        return false;
+      }
+    }
+  } else {
+    if (pad_attributes.find("value") != pad_attributes.end() &&
+        pad_attributes.at("value").f() != 0.0) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+/*
+ * - For 1st two dimension Pads array's value should be zero and for rest of them values should >= 0
+ */
+Status PadFusion::Apply(Graph& graph, Node& pad_node, RewriteRuleEffect& rule_effect, const logging::Logger&) const {
+  std::vector<int64_t> pads_values;
+
+  if (pad_node.SinceVersion() >= 11) {
+    const auto* pads_proto = graph_utils::GetConstantInitializer(graph, pad_node.InputDefs()[1]->Name());
+    Initializer pads{*pads_proto, graph.ModelPath()};
+    pads_values.assign(pads.DataAsSpan<int64_t>().begin(), pads.DataAsSpan<int64_t>().end());
+  } else {
+    pads_values.assign(pad_node.GetAttributes().at("pads").ints().begin(), pad_node.GetAttributes().at("pads").ints().end());
+  }
+
+  assert(static_cast<uint32_t>(pads_values.size()) == (2 * static_cast<uint32_t>(pad_node.InputDefs()[0]->Shape()->dim_size())));
+
+  uint32_t pads_size = static_cast<uint32_t>(pads_values.size());
+  // check if padding is applied only on feature dims
+  if (pads_values[0] != 0 || pads_values[1] != 0 || pads_values[pads_size / 2] != 0 ||
+      pads_values[pads_size / 2 + 1] != 0) {
+    return Status::OK();
+  }
+
+  // check if padding is only positive
+  if (std::any_of(pads_values.begin(), pads_values.end(), [](int64_t value) { return value < 0; })) {
+    return Status::OK();
+  }
+
+  Node& child_node = *graph.GetNode(pad_node.OutputNodesBegin()->Index());
+  auto child_pads = child_node.GetMutableAttributes()["pads"].mutable_ints();
+  uint32_t child_pads_size = static_cast<uint32_t>(child_pads->size());
+
+  for (uint32_t pads_index = 2, child_index = 0; pads_index < pads_size / 2; pads_index++, child_index++) {
+    child_pads->Set(child_index, child_pads->Get(child_index) + pads_values[pads_index]);
+    uint32_t mirrored_child_index = child_index + (child_pads_size / 2);
+    uint32_t mirrored_pad_index = pads_index + (pads_size / 2);
+    child_pads->Set(mirrored_child_index, child_pads->Get(mirrored_child_index) + pads_values[mirrored_pad_index]);
+  }
+
+  graph_utils::RemoveNodeOutputEdges(graph, pad_node);
+  graph_utils::ReplaceNodeInput(child_node, 0, *pad_node.MutableInputDefs()[0]);
+  graph.RemoveNode(pad_node.Index());
+  rule_effect = RewriteRuleEffect::kRemovedCurrentNode;
+  return Status::OK();
+}
+}  // namespace onnxruntime
\ No newline at end of file
diff --git a/onnxruntime/core/optimizer/pad_fusion.h b/onnxruntime/core/optimizer/pad_fusion.h
new file mode 100644
index 0000000000000..a1b6978a83d1e
--- /dev/null
+++ b/onnxruntime/core/optimizer/pad_fusion.h
@@ -0,0 +1,27 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "core/optimizer/rewrite_rule.h"
+
+namespace onnxruntime {
+/*
+ *   This fusion submerges a Pad operator to it's child
+ *   Conv or MaxPool operator, if and only if PadFusion::SatisfyCondition()
+ *   is true.
+ */
+class PadFusion : public RewriteRule {
+ public:
+  PadFusion() : RewriteRule("Pad_Fusion") {}
+
+  std::vector<std::string> TargetOpTypes() const noexcept override {
+    return {"Pad"};
+  }
+
+ private:
+  bool SatisfyCondition(const Graph& graph, const Node& node, const logging::Logger& logger) const override;
+
+  Status Apply(Graph& graph, Node& matmul_node, RewriteRuleEffect& rule_effect, const logging::Logger& logger) const override;
+};
+}  // namespace onnxruntime
\ No newline at end of file
diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_actions.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_actions.cc
index f42766267b0f9..3d2a81ce7f8cd 100644
--- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_actions.cc
+++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_actions.cc
@@ -87,12 +87,19 @@ std::vector<NodeAndMoveInfo> WhereMoves() {
       MoveAll(q, ArgType::kOutput)};
   return moves;
 }
-QDQReplaceWithNew SplitReplacer() {
+QDQReplaceWithNew SplitReplacer(bool has_split_as_input) {
   NTO::NodeLocation dq{NTO::NodeType::kInput, 0};
+  NTO::NodeLocation target{NTO::NodeType::kTarget, 0};
   NTO::NodeLocation q{NTO::NodeType::kOutput, 0};
-  std::vector<NodeAndMoveInfo> moves{
-      MoveAndAppend(dq, ArgType::kInput, 0, ArgType::kInput),
-      MoveAll(q, ArgType::kOutput)};
+  std::vector<NodeAndMoveInfo> moves{MoveAndAppend(dq, ArgType::kInput, 0, ArgType::kInput)};
+
+  if (has_split_as_input) {
+    // Move the optional split input to the new node.
+    moves.push_back(MoveAndAppend(target, ArgType::kInput, 1, ArgType::kInput, true));
+  }
+
+  moves.push_back(MoveAll(q, ArgType::kOutput));
+
   return QDQReplaceWithNew(kOnnxDomain, "Split", std::move(moves));
 }
 
@@ -247,7 +254,12 @@ MatMulReplaceWithQLinear::MatMulReplaceWithQLinear()
 }
 
 Status SplitReplaceWithQuant::Run(Graph& graph, const NodesToOptimize& selected_nodes) const {
-  return SplitReplacer().Run(graph, selected_nodes);
+  const auto& target_node = selected_nodes.Target();
+  const auto& input_defs = target_node.InputDefs();
+
+  // The 'split' attribute became an optional input at opset 13.
+  bool has_split_as_input = target_node.SinceVersion() >= 13 && input_defs.size() == 2;
+  return SplitReplacer(has_split_as_input).Run(graph, selected_nodes);
 }
 
 Status MatMulReplaceWithQLinear::Run(Graph& graph, const NodesToOptimize& selected_nodes) const {
diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc
index 0e383c3031ca6..29178fe87f75c 100644
--- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc
+++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc
@@ -20,7 +20,7 @@ void SplitQDQRules(SelectorActionRegistry& qdq_selector_action_registry) {
   const std::string action_name{"dropSplitQDQ"};
   std::unique_ptr<Action> action = std::make_unique<QDQ::SplitReplaceWithQuant>();
 #if !defined(ORT_MINIMAL_BUILD)
-  std::unique_ptr<NodeSelector> selector = std::make_unique<QDQ::OutputVariadicSelector>();
+  std::unique_ptr<NodeSelector> selector = std::make_unique<QDQ::SplitSelector>(true /*req_equal_quant_params*/);
   qdq_selector_action_registry.RegisterSelectorAndAction(action_name,
                                                          {{"Split", {}}},
                                                          std::move(selector),
diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.cc
index 5015e48fdb7b8..15b501c667046 100644
--- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.cc
+++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.cc
@@ -253,7 +253,39 @@ void InputVariadicSelector::UpdateBuilder(NodesToOptimizeIndicesBuilder& builder
   builder.num_input_defs = 1;  // set to 1 as the first input is variadic
 }
 
-void OutputVariadicSelector::UpdateBuilder(NodesToOptimizeIndicesBuilder& builder) const {
+bool SplitNodeGroupSelector::Check(const GraphViewer& graph_viewer,
+                                   const Node& node,
+                                   const std::vector<const Node*>& dq_nodes,
+                                   const std::vector<const Node*>& q_nodes) const {
+  if (!CheckQDQNodes(graph_viewer, node, dq_nodes, q_nodes, 1)) {
+    return false;
+  }
+
+  auto get_const_initializer = [&graph_viewer](const std::string& initializer_name) {
+    return graph_viewer.GetConstantInitializer(initializer_name, true);
+  };
+
+  const Node& dq_node = *dq_nodes.front();
+  int32_t dt_input = dq_node.InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
+
+  // All Q outputs should have same data type and (optionally) equal quantization parameters as the input.
+  for (size_t q_idx = 0; q_idx < q_nodes.size(); q_idx++) {
+    const Node& q_node = *q_nodes[q_idx];
+
+    if (dt_input != q_node.OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type()) {
+      return false;
+    }
+
+    if (req_equal_quant_params_ &&
+        !IsQDQPairSupported(q_node, dq_node, get_const_initializer, graph_viewer.ModelPath())) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+void SplitSelector::UpdateBuilder(NodesToOptimizeIndicesBuilder& builder) const {
   builder.num_output_defs = 1;  // set to 1 as the first output is variadic
 }
 
@@ -443,7 +475,6 @@ bool InstanceAndLayerNormalizationNodeGroupSelector::Check(const GraphViewer& gr
   }
 
   int32_t dt_input = dq_nodes[0]->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
-  int32_t dt_scale = dq_nodes[1]->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
   int32_t dt_bias = 0;
   bool has_bias = false;
   // bias is optional for LayerNorm
@@ -453,9 +484,9 @@ bool InstanceAndLayerNormalizationNodeGroupSelector::Check(const GraphViewer& gr
   }
   int32_t dt_output = q_nodes[0]->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
 
-  // Input, output, and scale need to be the same type. The bias is int32.
+  // Input, output, need to be the same type. The bias is int32.
+  // Scale can be different with input for a16w8 case
   return (dt_input == dt_output) &&
-         (dt_input == dt_scale) &&
          (has_bias ? dt_bias == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32 : true);
 }
 
diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h
index be7f7e0288eda..d0d7fb2c2af17 100644
--- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h
+++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h
@@ -115,6 +115,24 @@ class VariadicNodeGroupSelector : public NodeGroupSelector {
   bool allow_16bit_;
 };
 
+// DQ node -> Split -> multiple Q nodes with equal quantization types.
+// Optionally, the selector can require all input and output quantization parameters to be
+// equal and constant.
+class SplitNodeGroupSelector : public NodeGroupSelector {
+ public:
+  explicit SplitNodeGroupSelector(bool req_equal_quant_params = false)
+      : req_equal_quant_params_(req_equal_quant_params) {}
+
+ private:
+  bool Check(const GraphViewer& graph_viewer, const Node& node,
+             const std::vector<const Node*>& dq_nodes,
+             const std::vector<const Node*>& q_nodes) const override;
+
+  bool req_equal_quant_params_;  // If true, only selects a node group if the input and output
+                                 // quantization parameters are all equal/constant, which enables the
+                                 // optimizer to drop the Q/DQ ops if the group is assigned to the CPU EP.
+};
+
 // DQ nodes for X, W and optionally B -> node -> Q
 class ConvNodeGroupSelector : public NodeGroupSelector {
  public:
@@ -288,10 +306,11 @@ class InputVariadicSelector : public BaseSelector {
   void UpdateBuilder(NodesToOptimizeIndicesBuilder&) const override;
 };
 
-//  DQ -> node -> Variadic Q nodes
-class OutputVariadicSelector : public BaseSelector {
+//  DQ -> Split -> variadic Q nodes
+class SplitSelector : public BaseSelector {
  public:
-  OutputVariadicSelector() : BaseSelector(std::make_unique<VariadicNodeGroupSelector>()) {}
+  SplitSelector(bool req_equal_quant_params = false)
+      : BaseSelector(std::make_unique<SplitNodeGroupSelector>(req_equal_quant_params)) {}
 
   void UpdateBuilder(NodesToOptimizeIndicesBuilder&) const override;
 };
diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc
index 3f1b2f0458bc0..544fe82a268c8 100644
--- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc
+++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc
@@ -27,14 +27,17 @@ void Selectors::RegisterSelector(const OpVersionsAndSelector::OpVersionsMap& ops
 }
 
 /* static methods to return different operator's OpVersionMap */
+
+// These are operators that do not change the data and therefore the input DQ and
+// output Q have the same scale and zero_point.
 static const OpVersionsAndSelector::OpVersionsMap GetMiscOpVersionsMap() {
   return {{"Gather", {}},
           {"Reshape", {}},
+          {"Expand", {}},
           {"Flatten", {}},
           {"Transpose", {}},
           {"MaxPool", {12}},
           {"Resize", {}},
-          {"Split", {}},
           {"Squeeze", {}},
           {"Unsqueeze", {}},
           {"Tile", {}}};
@@ -80,7 +83,8 @@ static const OpVersionsAndSelector::OpVersionsMap GetUnaryOpVersionsMap() {
           {"Neg", {}},
           {"DepthToSpace", {}},
           {"SpaceToDepth", {}},
-          {"Clip", {}}};
+          {"Clip", {}},
+          {"LpNormalization", {}}};
 }
 static const OpVersionsAndSelector::OpVersionsMap GetBinaryOpVersionsMap() {
   return {{"Add", {}},
@@ -96,6 +100,9 @@ static const OpVersionsAndSelector::OpVersionsMap GetVariadicOpVersionsMap() {
           {"Max", {}},
           {"Min", {}}};
 }
+static const OpVersionsAndSelector::OpVersionsMap GetSplitOpVersionsMap() {
+  return {{"Split", {}}};
+}
 static const OpVersionsAndSelector::OpVersionsMap GetConvOpVersionsMap() {
   return {{"Conv", {}}};
 }
@@ -169,6 +176,13 @@ void RegisterVariadicSelectors(Selectors& qdq_selectors) {
                                  std::move(selector));
 }
 
+void RegisterSplitSelector(Selectors& qdq_selectors) {
+  /* register selectors for Split op */
+  std::unique_ptr<NodeGroupSelector> selector = std::make_unique<SplitNodeGroupSelector>();
+  qdq_selectors.RegisterSelector(GetSplitOpVersionsMap(),
+                                 std::move(selector));
+}
+
 void RegisterConvSelector(Selectors& qdq_selectors) {
   /* register selector for conv op */
   std::unique_ptr<NodeGroupSelector> selector = std::make_unique<ConvNodeGroupSelector>();
@@ -246,6 +260,7 @@ void SelectorManager::CreateSelectors() {
   RegisterUnarySelectors(qdq_selectors_);
   RegisterBinarySelectors(qdq_selectors_);
   RegisterVariadicSelectors(qdq_selectors_);
+  RegisterSplitSelector(qdq_selectors_);
   RegisterConvSelector(qdq_selectors_);
   RegisterConvTransposeSelector(qdq_selectors_);
   RegisterMatMulSelector(qdq_selectors_);
diff --git a/onnxruntime/core/optimizer/selectors_actions/selector_action_transformer.cc b/onnxruntime/core/optimizer/selectors_actions/selector_action_transformer.cc
index e182b6c695d2f..546d52b6f1682 100644
--- a/onnxruntime/core/optimizer/selectors_actions/selector_action_transformer.cc
+++ b/onnxruntime/core/optimizer/selectors_actions/selector_action_transformer.cc
@@ -3,9 +3,10 @@
 
 #include "core/optimizer/selectors_actions/selector_action_transformer.h"
 
-#include <cassert>
 #include <algorithm>
+#include <cassert>
 #include <iterator>
+#include <string>
 #include <utility>
 
 #include "core/graph/op_identifier_utils.h"
@@ -56,9 +57,9 @@ const SelectorActionRegistry::Entry* SelectorActionRegistry::LookUp(const std::s
 }
 
 #if !defined(ORT_MINIMAL_BUILD)
-auto SelectorActionRegistry::LookUpByOpType(const std::string& op_type) const
+auto SelectorActionRegistry::LookUpByOpTypeAndDomain(const std::string& op_type, const std::string& domain) const
     -> std::vector<gsl::not_null<const Entry*>> {
-  const auto [range_begin, range_end] = op_type_to_entry_.equal_range(op_type);
+  const auto [range_begin, range_end] = op_type_to_entry_.equal_range(OpVersionsMapKey(op_type, domain));
   std::vector<gsl::not_null<const Entry*>> result{};
   result.reserve(std::distance(range_begin, range_end));
   std::transform(range_begin, range_end, std::back_inserter(result),
@@ -93,20 +94,15 @@ static Status MatchAndProcess(
   Status status = Status::OK();
 
   do {
-    // TODO: for now this just needs to support ONNX and Micrsoft Domain ops.
-    // If we ever had a transformer that was going to target non-ONNX ops,
-    // we'd need to rework a few things to include the op domain in the matches
-    if (node.Domain() != kOnnxDomain && node.Domain() != kMSDomain) {
-      break;
-    }
-
     std::optional<NodesToOptimizeIndices> node_selection_opt{};
     const SelectorActionRegistry::Entry* selector_action_entry_ptr = nullptr;
 
-    const auto selector_action_entries = selector_action_registry.LookUpByOpType(node.OpType());
+    const auto selector_action_entries =
+        selector_action_registry.LookUpByOpTypeAndDomain(node.OpType(), node.Domain());
+    std::string key = SelectorActionRegistry::OpVersionsMapKey(node.OpType(), node.Domain());
     for (const auto& entry : selector_action_entries) {
       // check the supported versions if specified
-      const auto& versions = entry->ops_and_versions.find(node.OpType())->second;
+      const auto& versions = entry->ops_and_versions.find(key)->second;
       if (!versions.empty()) {
         if (std::find(versions.cbegin(), versions.cend(), node.SinceVersion()) == versions.cend()) {
           continue;
diff --git a/onnxruntime/core/optimizer/selectors_actions/selector_action_transformer.h b/onnxruntime/core/optimizer/selectors_actions/selector_action_transformer.h
index 7eb162cc693f1..5caa949ebbe93 100644
--- a/onnxruntime/core/optimizer/selectors_actions/selector_action_transformer.h
+++ b/onnxruntime/core/optimizer/selectors_actions/selector_action_transformer.h
@@ -38,8 +38,20 @@ struct NodeSelector {
 // class to manage a set of selector and associated actions
 class SelectorActionRegistry {
  public:
+  // The key is a string representing the op, optionally specifying the domain using ':' as the
+  // separator with domain as the first part and operator as the second part, "<domain>:<operator>" or "<operator>".
+  // For ops in kOnnxDomain, the domain should be left unspecified ("<operator>").
+  // For ops in other domains, the domain should be specified ("<domain>:<operator>").
+  // Ex: "Conv", "com.microsoft:Conv", "com.ms.internal.nhwc:Conv"
   using OpVersionsMap = std::unordered_map<std::string, std::vector<ONNX_NAMESPACE::OperatorSetVersion>>;
 
+  // Helper function to create a key to OpVersionsMap using domain and op_type.
+  static std::string OpVersionsMapKey(std::string_view op_type, std::string_view domain = kOnnxDomain) {
+    return (domain == kOnnxDomain)
+               ? std::string{op_type}
+               : std::string{domain} + ":" + std::string{op_type};
+  }
+
   struct Entry {
     Entry(const std::string& name_in,
 #if !defined(ORT_MINIMAL_BUILD)
@@ -95,14 +107,15 @@ class SelectorActionRegistry {
 
 #if !defined(ORT_MINIMAL_BUILD)
   // return registered Entry or nullptr if not found
-  auto LookUpByOpType(const std::string& op_type) const -> std::vector<gsl::not_null<const Entry*>>;
+  auto LookUpByOpTypeAndDomain(const std::string& op_type,
+                               const std::string& domain) const -> std::vector<gsl::not_null<const Entry*>>;
 #endif  // !defined(ORT_MINIMAL_BUILD)
 
  private:
   std::unordered_map<std::string, const Entry> name_to_entry_;
 
 #if !defined(ORT_MINIMAL_BUILD)
-  // auxiliary mapping to enable lookup by op type
+  // auxiliary mapping to enable lookup by op type or "domain:op type"
   std::unordered_multimap<std::string, const Entry*> op_type_to_entry_;
 #endif  // !defined(ORT_MINIMAL_BUILD)
 };
diff --git a/onnxruntime/core/optimizer/transformer_memcpy.cc b/onnxruntime/core/optimizer/transformer_memcpy.cc
index 07f391f2ae430..0d7ab70eba613 100644
--- a/onnxruntime/core/optimizer/transformer_memcpy.cc
+++ b/onnxruntime/core/optimizer/transformer_memcpy.cc
@@ -2,6 +2,7 @@
 // Licensed under the MIT License.
 
 #include "transformer_memcpy.h"
+#include "core/common/logging/logging.h"
 #include "core/framework/kernel_registry_manager.h"
 #include "core/framework/execution_providers.h"
 #include "core/framework/utils.h"
@@ -16,12 +17,12 @@ class TransformerMemcpyImpl {
   TransformerMemcpyImpl(onnxruntime::Graph& graph, const std::string& provider)
       : graph_(graph), provider_(provider) {}
 
-  bool ModifyGraph(const KernelRegistryManager& schema_registries);
+  bool ModifyGraph(const KernelRegistryManager& schema_registries, const logging::Logger& logger, int& copy_node_counter);
 
  private:
   void ProcessDefs(onnxruntime::Node& node, const KernelRegistryManager& kernel_registries, InitializedTensorSet& initializers_consumed);
   void BuildDefsMapping(const onnxruntime::NodeArg* arg, const KernelRegistryManager& kernel_registries);
-  void AddCopyNode(onnxruntime::NodeArg* arg, bool is_input);
+  void AddCopyNode(onnxruntime::NodeArg* arg, bool is_input, const logging::Logger& logger);
   bool ProcessInitializers(const KernelRegistryManager& kernel_registries, const InitializedTensorSet& initializers_consumed);
 
  private:
@@ -61,11 +62,21 @@ static const onnx::TensorProto* GetInitializer(const Graph& graph, const std::st
 
 // very simple GraphTransformer that uses TransformerMemcpyImpl for each graph
 // and mainly provides the subgraph recursion functionality
-common::Status MemcpyTransformer::ApplyImpl(Graph& graph, bool& modified, int graph_level, const logging::Logger& logger) const {
+common::Status MemcpyTransformer::ApplyImpl(Graph& graph, bool& modified, int graph_level,
+                                            const logging::Logger& logger) const {
   for (auto& provider : provider_types_) {
     if (!utils::ProviderIsCpuBased(provider)) {
       TransformerMemcpyImpl copy_impl(graph, provider);
-      auto current_modified = copy_impl.ModifyGraph(registry_manager_);
+
+      int copy_node_counter = 0;
+      auto current_modified = copy_impl.ModifyGraph(registry_manager_, logger, copy_node_counter);
+      if (copy_node_counter > 0 && provider == kCudaExecutionProvider) {
+        LOGS(logger, WARNING) << copy_node_counter << " Memcpy nodes are added to the graph " << graph.Name()
+                              << " for " << provider
+                              << ". It might have negative impact on performance (including unable to run CUDA graph). "
+                              << "Set session_options.log_severity_level=1 to see the detail logs before this message.";
+      }
+
       modified = modified || current_modified;
       break;
     }
@@ -111,7 +122,9 @@ This transformer does not currently optimize copies between, e.g., two different
 
 */
 
-bool TransformerMemcpyImpl::ModifyGraph(const KernelRegistryManager& kernel_registries) {
+bool TransformerMemcpyImpl::ModifyGraph(const KernelRegistryManager& kernel_registries,
+                                        const logging::Logger& logger,
+                                        int& copy_node_counter) {
   bool modified = false;
   InitializedTensorSet initializers_consumed;
   // find defs that require copy
@@ -137,19 +150,22 @@ bool TransformerMemcpyImpl::ModifyGraph(const KernelRegistryManager& kernel_regi
     // For inputs we need to create a copy node only when the input is connected to both provider
     // and non-provider nodes. Otherwise utils::CopyInputsAcrossDevices() will do the job.
     if (provider_input_defs_.count(arg) && non_provider_input_defs_.count(arg)) {
-      AddCopyNode(const_cast<onnxruntime::NodeArg*>(arg), true);
+      AddCopyNode(const_cast<onnxruntime::NodeArg*>(arg), true, logger);
+      copy_node_counter++;
       modified = true;
     }
 
   for (auto arg : non_provider_output_defs_)
     if (provider_input_defs_.count(arg)) {
-      AddCopyNode(arg, true);
+      AddCopyNode(arg, true, logger);
+      copy_node_counter++;
       modified = true;
     }
 
   for (auto arg : provider_output_defs_)
     if (non_provider_input_defs_.count(arg)) {
-      AddCopyNode(arg, false);
+      AddCopyNode(arg, false, logger);
+      copy_node_counter++;
       modified = true;
     }
 
@@ -176,7 +192,8 @@ bool TransformerMemcpyImpl::ModifyGraph(const KernelRegistryManager& kernel_regi
         // (the name will be the same as the parent node's implicit input)
         const auto* node_arg_in_current_graph_level = *provider_input_defs_.find(arg);
 
-        AddCopyNode(const_cast<onnxruntime::NodeArg*>(node_arg_in_current_graph_level), true);
+        AddCopyNode(const_cast<onnxruntime::NodeArg*>(node_arg_in_current_graph_level), true, logger);
+        copy_node_counter++;
         modified = true;
       }
     }
@@ -232,7 +249,7 @@ void TransformerMemcpyImpl::ProcessDefs(onnxruntime::Node& node, const KernelReg
       if (!arg->Exists())
         continue;
 
-      if (kci && kci->kernel_def->IsOutputOnCpu(i))
+      if (utils::IsOutputOnCpu(node, kci, i))
         non_provider_output_defs_.insert(arg);
       else
         provider_output_defs_.insert(arg);
@@ -291,13 +308,13 @@ void TransformerMemcpyImpl::BuildDefsMapping(const onnxruntime::NodeArg* arg, co
         if (!kci || !utils::IsInputOnCpu(it, kci, arg_input_index)) provider_input_nodes_[arg].insert(&it);
       }
       if (arg_output_index != -1) {
-        if (!kci || !kci->kernel_def->IsOutputOnCpu(arg_output_index)) provider_output_nodes_[arg].insert(&it);
+        if (!kci || !utils::IsOutputOnCpu(it, kci, arg_output_index)) provider_output_nodes_[arg].insert(&it);
       }
     }
   }
 }
 
-void TransformerMemcpyImpl::AddCopyNode(onnxruntime::NodeArg* arg, bool is_input) {
+void TransformerMemcpyImpl::AddCopyNode(onnxruntime::NodeArg* arg, bool is_input, const logging::Logger& logger) {
   // create unique name for new def
   std::string new_def_name = graph_.GenerateNodeArgName(arg->Name() + "_" + provider_);
 
@@ -309,6 +326,9 @@ void TransformerMemcpyImpl::AddCopyNode(onnxruntime::NodeArg* arg, bool is_input
   std::string new_node_name = graph_.GenerateNodeName("Memcpy");
 
   const auto op_name = is_input ? "MemcpyFromHost" : "MemcpyToHost";
+  LOGS(logger, INFO) << "Add " << op_name << (is_input ? " after " : " before ") << arg->Name()
+                     << " for " << provider_;
+
   auto& new_node = graph_.AddNode(new_node_name, op_name, "Copy from/to host memory",
                                   std::vector<onnxruntime::NodeArg*>{src_arg},
                                   std::vector<onnxruntime::NodeArg*>{dst_arg});
@@ -384,8 +404,8 @@ bool TransformerMemcpyImpl::ProcessInitializers(const KernelRegistryManager& ker
     // normally initializers are only inputs, but things may change with ops like assign
     ORT_THROW_IF_ERROR(Node::ForEachWithIndex(
         p_node->OutputDefs(),
-        [kci, &dup_replacements](const onnxruntime::NodeArg& arg, size_t index) {
-          if (kci->kernel_def->IsOutputOnCpu(index)) {
+        [kci, &p_node, &dup_replacements](const onnxruntime::NodeArg& arg, size_t index) {
+          if (utils::IsOutputOnCpu(*p_node, kci, index)) {
             ORT_ENFORCE(dup_replacements.find(&arg) == dup_replacements.end());
           }
           return Status::OK();
diff --git a/onnxruntime/core/optimizer/transpose_optimization/onnx_transpose_optimization.cc b/onnxruntime/core/optimizer/transpose_optimization/onnx_transpose_optimization.cc
index 2c11bf144999e..c479b685f9267 100644
--- a/onnxruntime/core/optimizer/transpose_optimization/onnx_transpose_optimization.cc
+++ b/onnxruntime/core/optimizer/transpose_optimization/onnx_transpose_optimization.cc
@@ -4,9 +4,11 @@
 #include "onnx_transpose_optimization.h"
 
 #include <algorithm>
+#include <cassert>
 #include <iostream>
 #include <unordered_map>
 #include <unordered_set>
+#include <utility>
 #include <vector>
 
 #include "core/common/gsl.h"
@@ -17,6 +19,9 @@ namespace onnx_transpose_optimization {
 
 /////// <Helper Utils> ///////
 /* Small utilities for editing nodes and manipulating axes/permutations */
+static constexpr bool IsOnnxDomain(std::string_view domain) {
+  return (domain == onnxruntime::kOnnxDomain) || (domain == onnxruntime::kOnnxDomainAlias);
+}
 
 static std::vector<int64_t> DataInt64(api::TensorRef& tensor) {
   std::vector<uint8_t> raw_data = tensor.Data();
@@ -93,6 +98,19 @@ static std::unique_ptr<api::NodeRef> MakeSqueezeOrUnsqueeze(int64_t opset, api::
   return graph.AddNode(op_type, inputs, /*num_outputs*/ 1);
 }
 
+// Use to create a QuantizeLinear or DequantizeLinear node. Does not update output ValueInfo. Adds axis if needed.
+static std::unique_ptr<api::NodeRef> MakeQOrDQ(api::GraphRef& graph, std::string_view domain, std::string_view op_type,
+                                               std::vector<std::string_view> inputs,
+                                               std::optional<int64_t> axis) {
+  std::unique_ptr<api::NodeRef> node = graph.AddNode(op_type, inputs, /* num_outputs */ 1, domain);
+  // only set if provided and not the default
+  if (axis && axis != 1) {
+    node->SetAttributeInt("axis", *axis);
+  }
+
+  return node;
+}
+
 // Returns whether perm is a valid permutation (contains each value from 0 to perm.size() - 1 exactly once)
 static bool IsValidPerm(const std::vector<int64_t>& perm) {
   size_t rank = perm.size();
@@ -117,6 +135,263 @@ static std::optional<std::vector<int64_t>> GetPermAttrIfValid(const api::NodeRef
   return perm;
 }
 
+static inline bool NormalizeAndValidateAxis(int64_t& axis, size_t rank) {
+  int64_t rank_int = gsl::narrow_cast<int64_t>(rank);
+  if (axis < 0) {
+    axis += rank_int;
+  }
+
+  return axis >= 0 && axis < rank_int;
+}
+
+/// <summary>
+/// Check if an output value has a single consumer that is a node.
+/// </summary>
+/// <param name="single_consumer">Consumer node if found.</param>
+/// <returns>True if there is a single consumer node.</returns>
+static bool OutputValueHasSingleConsumerNode(const api::GraphRef& graph, const api::NodeRef& node, size_t output_idx,
+                                             std::unique_ptr<api::NodeRef>& single_consumer) {
+  auto value = node.Outputs()[output_idx];
+  auto consumers = graph.GetValueConsumers(value);
+
+  if (consumers->comprehensive && (consumers->nodes.size() == 1)) {
+    single_consumer = std::move(consumers->nodes[0]);
+  } else {
+    single_consumer.reset();
+  }
+
+  return single_consumer != nullptr;
+}
+
+/// return the DQ node if value_name is produced by a DQ node
+static std::unique_ptr<api::NodeRef> GetDQIfProducingValue(const api::GraphRef& graph, std::string_view value_name) {
+  auto maybe_dq_node = graph.GetNodeProducingOutput(value_name);
+
+  return (maybe_dq_node != nullptr && maybe_dq_node->OpType() == "DequantizeLinear") ? std::move(maybe_dq_node)
+                                                                                     : std::unique_ptr<api::NodeRef>();
+}
+
+/// <summary>
+/// Return a DequantizeLinear node if it's input is a constant initializer and it has a single consumer.
+/// In this case the initializer can be updated in-place by UnsqueezeInput or TransposeInput.
+/// </summary>
+/// <param name="graph">Current graph</param>
+/// <param name="value_name">Value to check if produced by a DQ node who's input is a constant initializer</param>
+/// <returns>NodeRef for DQ node if it meets the requirements.</returns>
+static std::unique_ptr<api::NodeRef> GetDQWithConstInitializerInputAndSingleConsumer(const api::GraphRef& graph,
+                                                                                     std::string_view value_name) {
+  std::unique_ptr<api::NodeRef> result;
+  auto dq_node = GetDQIfProducingValue(graph, value_name);
+
+  if (dq_node) {
+    do {
+      auto dq_input = dq_node->Inputs()[0];
+      auto dq_constant = graph.GetConstant(dq_input);
+
+      // input to DQ must be a constant initializer
+      if (!dq_constant) {
+        break;
+      }
+
+      // For now keep it simple and don't support per-axis quantization as that would require updating the axis of
+      // the DQ node during TransposeInputImpl and UnsqueezeInput.
+      auto dq_scale = graph.GetConstant(dq_node->Inputs()[1]);
+      if (!dq_scale || dq_scale->NumElements() != 1) {
+        break;
+      }
+
+      // need to know all the initializer consumers as we're potentially going to modify it directly
+      auto initializer_consumers = graph.GetValueConsumers(dq_input);
+      if (!initializer_consumers->comprehensive) {
+        break;
+      }
+
+      std::unique_ptr<api::NodeRef> consumer;
+      if (!OutputValueHasSingleConsumerNode(graph, *dq_node, 0, consumer)) {
+        break;
+      }
+
+      result = std::move(dq_node);
+    } while (false);
+  }
+
+  return result;
+}
+
+/// <summary>
+/// Insert a Q -> DQ pair after the node following the DQ by using scale and zp info from the preceding DQ node.
+/// DQ -> next node => DQ -> next node -> Q -> DQ.
+/// This is only called for Transpose and Unsqueeze nodes.
+/// </summary>
+/// <param name="dq_node">DQ node.</param>
+/// <param name="next_node">Node following DQ node.</param>
+/// <param name="new_dq_node">New DQ node at end of DQ -> next_node -> Q -> DQ.</param>
+/// <returns>True if insert was successful.</returns>
+static bool MakeQDQNodeUnit(api::GraphRef& graph, const api::NodeRef& dq_node) {
+  std::unique_ptr<api::NodeRef> single_consumer_node;
+  if (!OutputValueHasSingleConsumerNode(graph, dq_node, 0, single_consumer_node)) {
+    // should never happen as caller should have checked previously
+    return false;
+  }
+
+  auto& next_node = *single_consumer_node;
+  assert(next_node.OpType() == "Transpose" || next_node.OpType() == "Unsqueeze");
+
+  const auto dq_domain = dq_node.Domain();
+  const auto& dq_inputs = dq_node.Inputs();
+  const bool is_transpose = next_node.OpType() == "Transpose";
+
+  const auto scale_input = dq_inputs[1];
+  const auto scale_value_info = graph.GetValueInfo(scale_input);
+  std::optional<std::string_view> zp_input;
+  std::optional<std::unique_ptr<api::ValueInfoRef>> zp_value_info;
+
+  auto scale_shape = scale_value_info->Shape();
+  if (!scale_shape && is_transpose) {
+    // axis potentially needs updating due to the transpose but we don't have the required info to do it.
+    return false;
+  }
+
+  if (dq_inputs.size() > 2) {
+    zp_input = dq_inputs[2];
+    zp_value_info = graph.GetValueInfo(zp_input.value());
+  }
+
+  // per-axis quantization if not a scalar (shape is empty for scalar).
+  // note there could be an axis value as the onnx spec says that is ignored for per-tensor quantization,
+  // so we have to check the shape.
+  auto update_dq_axis = scale_shape && !scale_shape->empty();
+  int64_t axis = dq_node.GetAttributeIntDefault("axis", 1);
+
+  if (update_dq_axis && is_transpose) {
+    // update axis.
+    auto perm = GetPermAttrIfValid(next_node);
+    assert(perm.has_value());  // onnx shape inferencing checks that `perm` is valid
+    NormalizeAndValidateAxis(axis, scale_shape->size());
+    axis = InvertPerm(*perm)[gsl::narrow_cast<size_t>(axis)];
+  }
+
+  auto next_node_output_name = next_node.Outputs()[0];
+  auto next_node_output_shape = graph.GetValueInfo(next_node_output_name)->Shape();
+
+  // setup Q node inputs. we don't connect it to next_node yet as we will move the output of that to the new DQ first.
+  std::vector<std::string_view> inputs = {"", scale_input};
+  if (zp_input) {
+    inputs.push_back(zp_input.value());
+  }
+
+  // Add Q
+  auto new_q_node = MakeQOrDQ(graph, dq_domain, "QuantizeLinear", inputs, axis);
+  auto q_node_outputs = new_q_node->Outputs();
+
+  // copy value info from the dq input for the type information, and update the shape to match next_node's output
+  graph.CopyValueInfo(dq_node.Inputs()[0], q_node_outputs[0]);  // Q produces same type as the dq_node input
+  auto q_node_value_info = graph.GetValueInfo(q_node_outputs[0]);
+  q_node_value_info->SetShape(next_node_output_shape ? &*next_node_output_shape : nullptr);
+
+  // update input to connect the DQ to the Q we just added. re-use scale and zp.
+  inputs[0] = new_q_node->Outputs()[0];
+
+  // Add DQ
+  auto new_dq_node = MakeQOrDQ(graph, dq_domain, "DequantizeLinear", inputs, axis);
+  auto dq_node_outputs = new_dq_node->Outputs();
+
+  // straight copy of value info as the type and shape are the same as next_node's output
+  graph.CopyValueInfo(next_node_output_name, dq_node_outputs[0]);
+
+  // move next_node output to the new DQ node in case it was a graph output, and connect next_node with the new Q node
+  graph.MoveOutput(next_node, 0, *new_dq_node, 0);
+  auto new_next_node_output_name = next_node.Outputs()[0];
+  new_q_node->SetInput(0, new_next_node_output_name);
+  graph.CopyValueInfo(dq_node_outputs[0], new_next_node_output_name);
+
+  return true;
+}
+
+/// <summary>
+/// Check if a DQ -> Q pair have matching type/scale/zero point.
+/// If there's no operator between them, and they match, they are redundant and can be removed.
+/// </summary>
+/// <returns>True if they match.</returns>
+static bool CheckQDQNodePairMatch(const api::GraphRef& graph,
+                                  const api::NodeRef& dq_node, const api::NodeRef& q_node) {
+  bool match = false;
+
+  do {
+    if (dq_node.Domain() != q_node.Domain()) {
+      break;
+    }
+
+    auto t1 = graph.GetValueInfo(dq_node.Inputs()[0])->DType();
+    auto t2 = graph.GetValueInfo(q_node.Outputs()[0])->DType();
+
+    if (t1 == api::DataType::UNDEFINED || t2 == api::DataType::UNDEFINED || t1 != t2) {
+      break;
+    }
+
+    auto dq_scale = dq_node.Inputs()[1];
+    auto q_scale = q_node.Inputs()[1];
+
+    if (dq_scale != q_scale) {
+      auto dq_scale_value = graph.GetConstant(dq_scale);
+      auto q_scale_value = graph.GetConstant(q_scale);
+      if (!dq_scale_value || !q_scale_value) {
+        break;  // non-const input
+      }
+
+      if (dq_scale_value->Data() != q_scale_value->Data()) {
+        break;
+      }
+    }
+
+    auto dq_zp = dq_node.Inputs().size() > 2 ? dq_node.Inputs()[2] : "";
+    auto q_zp = q_node.Inputs().size() > 2 ? q_node.Inputs()[2] : "";
+
+    if (dq_zp != q_zp) {
+      std::optional<std::unique_ptr<api::TensorRef>> dq_scale_value;
+      std::optional<std::unique_ptr<api::TensorRef>> q_scale_value;
+      if (dq_zp != "") {
+        dq_scale_value = graph.GetConstant(dq_zp);
+        if (!dq_scale_value.value()) {
+          break;  // non-const input
+        }
+      }
+
+      if (q_zp != "") {
+        q_scale_value = graph.GetConstant(q_zp);
+        if (!q_scale_value.value()) {
+          break;  // non-const input
+        }
+      }
+
+      if (dq_scale_value.has_value() && q_scale_value.has_value()) {
+        if (dq_scale_value->get()->Data() != q_scale_value->get()->Data()) {
+          break;
+        }
+      } else {
+        // check the input with a value matches the default zp value of 0
+        if (dq_scale_value.has_value()) {
+          auto data = dq_scale_value->get()->Data();
+          if (!std::all_of(data.begin(), data.end(), [](auto value) { return value == 0; })) {
+            break;
+          }
+        } else {
+          // q_scale_value must have a value to get here
+          auto data = q_scale_value->get()->Data();
+          if (!std::all_of(data.begin(), data.end(), [](auto value) { return value == 0; })) {
+            break;
+          }
+        }
+      }
+    }
+
+    match = true;
+
+  } while (false);
+
+  return match;
+}
+
 // Adds rank to negative axes and checks that axes are unique and within [0, rank). Returns false if invalid.
 static bool NormalizeAndValidateAxes(std::vector<int64_t>& axes, size_t rank) {
   int64_t rank_int = gsl::narrow_cast<int64_t>(rank);
@@ -134,15 +409,6 @@ static bool NormalizeAndValidateAxes(std::vector<int64_t>& axes, size_t rank) {
   return true;
 }
 
-static inline bool NormalizeAndValidateAxis(int64_t& axis, size_t rank) {
-  int64_t rank_int = gsl::narrow_cast<int64_t>(rank);
-  if (axis < 0) {
-    axis += rank_int;
-  }
-
-  return axis >= 0 && axis < rank_int;
-}
-
 // Read int64 data from attribute or input, depending on whether model opset < provided opset
 static std::optional<std::vector<int64_t>> ReadFromAttrOrInput(OptimizerCtx& ctx, api::NodeRef& node,
                                                                std::string_view attr_name, size_t inp_index,
@@ -345,6 +611,12 @@ static std::vector<int64_t> SortedAxesForTransposedInput(const std::vector<int64
   return new_axes;
 }
 
+static void UpdateDQNodeInputAndShape(api::GraphRef& graph, api::NodeRef& dq, std::string_view new_input) {
+  dq.SetInput(0, new_input);
+  auto new_shape = *graph.GetValueInfo(new_input)->Shape();
+  graph.GetValueInfo(dq.Outputs()[0])->SetShape(&new_shape);
+}
+
 /////// </Helper Utils> ///////
 
 /////// <Core Helpers> ///////
@@ -357,51 +629,102 @@ static std::string_view HelpHandleUnsqueeze(HandlerArgs& args, const std::vector
 // broadcasting.
 static void UnsqueezeInput(OptimizerCtx& ctx, api::NodeRef& node, size_t i, const std::vector<int64_t>& axes) {
   std::string_view input = node.Inputs()[i];
-  // Remove this node as a consumer
-  node.SetInput(i, "");
 
   std::unique_ptr<api::TensorRef> constant = ctx.graph.GetLocalConstant(input);
-  auto consumers = ctx.graph.GetValueConsumers(input);
+
+  // allow a constant initializer coming via a DQ node with a single consumer
+  std::unique_ptr<api::NodeRef> dq_node;
+  std::string_view constant_dq_input;
+
+  if (!constant) {
+    // look past a DQ node for a constant initializer. essentially we pretend the DQ node doesn't exist
+    // to enable directly making changes to the initializer. any nodes added for other consumers of the initializer
+    // in 'Case 1' are prior to the DQ so we don't break up any QDQ node units.
+    dq_node = GetDQWithConstInitializerInputAndSingleConsumer(ctx.graph, input);
+    if (dq_node) {
+      // underlying string for the input name is in the Node so it's safe to store in string_view constant_dq_input
+      constant_dq_input = dq_node->Inputs()[0];
+      constant = ctx.graph.GetLocalConstant(constant_dq_input);
+      // remove the DQ node as a consumer of the initializer while we modify things
+      dq_node->SetInput(0, "");
+    }
+  }
+
+  // Clear the input, which also removes this node's input as a consumer of the value.
+  // NOTE: the node may have multiple inputs consuming the value.
+  node.SetInput(i, "");
+  auto value_to_modify = dq_node ? constant_dq_input : input;
+  auto consumers = ctx.graph.GetValueConsumers(value_to_modify);
 
   // Case 1: input is a constant with a known list of consumer nodes
   if (constant != nullptr && consumers->comprehensive) {
-    // We will reshape the initializer. If there are existing consumers, still reshape it but add Squeeze nodes
+    // We will reshape the initializer. If there are existing consumers, reshape it and add Squeeze nodes
     // to counteract its effect. If they later Unsqueeze the same input, the Squeeze nodes will simply be deleted
     // (see Case 2).
     if (consumers->nodes.size() > 0) {
-      auto squeeze_ptr = MakeSqueezeOrUnsqueeze(ctx.opset, ctx.graph, "Squeeze", input, axes);
+      auto squeeze_ptr = MakeSqueezeOrUnsqueeze(ctx.opset, ctx.graph, "Squeeze", value_to_modify, axes);
       api::NodeRef& squeeze = *squeeze_ptr;
       std::string_view sq_out = squeeze.Outputs()[0];
-      ctx.graph.CopyValueInfo(input, sq_out);
-      ReplaceValueReferences(consumers->nodes, input, sq_out);
+      ctx.graph.CopyValueInfo(value_to_modify, sq_out);
+      ReplaceValueReferences(consumers->nodes, value_to_modify, sq_out);
     }
+
     auto new_shape = UnsqueezeShape(constant->Shape(), axes);
-    ctx.graph.ReshapeInitializer(input, new_shape);
-    node.SetInput(i, input);
+    ctx.graph.ReshapeInitializer(value_to_modify, new_shape);
+
+    if (dq_node) {
+      UpdateDQNodeInputAndShape(ctx.graph, *dq_node, constant_dq_input);
+    }
+
+    node.SetInput(i, input);  // restore the original connection
     return;
   }
 
   // Case 2: input is a Squeeze node with matching axes
   std::unique_ptr<api::NodeRef> inp_node = ctx.graph.GetNodeProducingOutput(input);
+
+  // look past a DQ node for a Squeeze to cancel
+  if (inp_node && inp_node->OpType() == "DequantizeLinear") {
+    dq_node = std::move(inp_node);
+    auto dq_input = dq_node->Inputs()[0];
+    inp_node = ctx.graph.GetNodeProducingOutput(dq_input);
+    consumers = ctx.graph.GetValueConsumers(dq_input);
+  }
+
   if (inp_node != nullptr && inp_node->IsOp("Squeeze")) {
     const std::vector<std::string_view>& inp_node_inputs = inp_node->Inputs();
     std::optional<std::vector<int64_t>> squeeze_axes = std::nullopt;
     squeeze_axes = ReadFromAttrOrInput(ctx, *inp_node, "axes", /*inp_index*/ 1, /*opset*/ 13);
     if (squeeze_axes != std::nullopt && *squeeze_axes == axes) {
+      if (dq_node) {
+        UpdateDQNodeInputAndShape(ctx.graph, *dq_node, inp_node_inputs[0]);
+        node.SetInput(i, dq_node->Outputs()[0]);
+      } else {
+        node.SetInput(i, inp_node_inputs[0]);
+      }
+
       // Remove the Squeeze node if possible
-      if (consumers->comprehensive && consumers->nodes.size() == 0) {
+      // if there's a DQ node the `consumers` list still includes it so allow for that.
+      // in that case UpdateDQNodeInputAndShape already updated the input of the DQ node so it's safe to remove it.
+      if (consumers->comprehensive && consumers->nodes.size() == size_t(dq_node ? 1 : 0)) {
         ctx.graph.RemoveNode(*inp_node);
+
         if (ctx.opset >= 13 && !ctx.graph.HasValueConsumers(inp_node_inputs[1])) {
           ctx.graph.RemoveInitializer(inp_node_inputs[1]);
         }
       }
-      node.SetInput(i, inp_node_inputs[0]);
+
       return;
     }
 
     // Axes don't match. Fall through to Case 3.
   }
 
+  // any DQ node special casing doesn't apply anymore, so go back to the original inp_node
+  if (dq_node) {
+    inp_node = std::move(dq_node);
+  }
+
   // Case 3: Add an Unsqueeze node.
   auto unsqueeze_ptr = MakeSqueezeOrUnsqueeze(ctx.opset, ctx.graph, "Unsqueeze", input, axes);
   api::NodeRef& unsqueeze = *unsqueeze_ptr;
@@ -426,6 +749,10 @@ static void UnsqueezeInput(OptimizerCtx& ctx, api::NodeRef& node, size_t i, cons
   }
 
   node.SetInput(i, unsq_out);
+
+  if (inp_node != nullptr && inp_node->OpType() == "DequantizeLinear") {
+    MakeQDQNodeUnit(ctx.graph, *inp_node);
+  }
 }
 
 static void Permute1DConstant(api::GraphRef& graph, api::NodeRef& node, api::TensorRef& constant,
@@ -453,83 +780,164 @@ static void Permute1DConstant(api::GraphRef& graph, api::NodeRef& node, api::Ten
 
 // Replaces ith input to node with transposed value. Might create a new Transpose node, find an existing one,
 // or transpose an initializer.
-void TransposeInput(api::GraphRef& graph, api::NodeRef& node, size_t i,
-                    const std::vector<int64_t>& perm, const std::vector<int64_t>& perm_inv) {
+static void TransposeInputImpl(api::GraphRef& graph, api::NodeRef& node, size_t i,
+                               const std::vector<int64_t>& perm, const std::vector<int64_t>& perm_inv) {
   std::string_view input = node.Inputs()[i];
-  // Remove this node as a consumer
-  node.SetInput(i, "");
+
   // Only local constants are editable
   std::unique_ptr<api::TensorRef> constant = graph.GetLocalConstant(input);
-  auto consumers = graph.GetValueConsumers(input);
+
+  // allow a constant initializer coming via a DQ node with a single consumer
+  std::unique_ptr<api::NodeRef> dq_node;
+  std::string_view constant_dq_input;
+
+  if (!constant) {
+    // look past a DQ node for a constant initializer. essentially we pretend the DQ node doesn't exist
+    // to enable directly making changes to the initializer. any nodes added for other consumers of the initializer
+    // in 'Case 1' are prior to the DQ so we don't break up any QDQ node units.
+    dq_node = GetDQWithConstInitializerInputAndSingleConsumer(graph, input);
+    if (dq_node) {
+      // underlying string for the input name is in the Node so it's safe to store in string_view constant_dq_input
+      constant_dq_input = dq_node->Inputs()[0];
+      constant = graph.GetLocalConstant(constant_dq_input);
+      // remove the DQ node as a consumer of the initializer while we modify things
+      dq_node->SetInput(0, "");
+    }
+  }
+
+  // Clear the input, which also removes this node's input as a consumer of the value.
+  // NOTE: the node may have multiple inputs consuming the value.
+  node.SetInput(i, "");
+
+  auto constant_to_modify = dq_node ? constant_dq_input : input;
+  auto consumers = graph.GetValueConsumers(constant_to_modify);
 
   // Case 1: input is a constant with a known list of consumer nodes
   if (constant != nullptr && consumers->comprehensive) {
-    // Input is scalar, return early.
-    if (constant->Shape().size() == 1 && constant->Shape()[0] == 0) {
+    // we modify the initializer in-place and need to reconnect things up when we're done. this helper will
+    // do that when it goes out of scope. if we have manually reconnected, input or constant_dq_input is
+    // set to an empty string.
+    auto reconnect_nodes = gsl::finally([i, &node, &dq_node, &input, &constant_dq_input] {
+      if (!input.empty()) {
+        node.SetInput(i, input);
+      }
+
+      if (!constant_dq_input.empty()) {
+        dq_node->SetInput(0, constant_dq_input);
+      }
+    });
+
+    // If there is only one element return early as the transpose won't change the data
+    if (constant->NumElements() == 1) {
       return;
     }
+
     // This is a special case where the constant is 1D with length == perm.
-    // TODO: TransposeInitializer should be updated to handle this case.
+    //   e.g. it provides a set of values that are relative to the input axes like the `sizes` input for Resize
     // Permute1DConstant permutes the constant and adds a new initializer. The old initializer is removed only if
     // there are no other consumers.
     if (constant->Shape().size() == 1 && constant->Shape()[0] == gsl::narrow_cast<int64_t>(perm.size())) {
-      Permute1DConstant(graph, node, *constant, i, input, perm);
+      auto& node_to_update = dq_node ? *dq_node : node;
+      Permute1DConstant(graph, node_to_update, *constant, i, constant_to_modify, perm);
+
+      // unset updated input so reconnect_nodes doesn't change it back
+      if (dq_node) {
+        constant_dq_input = "";
+      } else {
+        input = "";
+      }
+
       return;
     }
+
     if (consumers->nodes.size() > 0) {
       // Transpose the initializer. If there are existing consumers, add Transpose nodes to them using perm_inv
       // to counteract the effect. These Transposes will hopefully be optimized out later.
-      auto transpose_inv_ptr = MakeTranspose(graph, input, perm_inv);
+      auto transpose_inv_ptr = MakeTranspose(graph, constant_to_modify, perm_inv);
       api::NodeRef& transpose_inv = *transpose_inv_ptr;
       std::string_view transpose_out = transpose_inv.Outputs()[0];
-      graph.CopyValueInfo(input, transpose_out);
-      ReplaceValueReferences(consumers->nodes, input, transpose_out);
+      graph.CopyValueInfo(constant_to_modify, transpose_out);
+      ReplaceValueReferences(consumers->nodes, constant_to_modify, transpose_out);
+    }
+
+    graph.TransposeInitializer(constant_to_modify, perm);
+
+    if (dq_node) {
+      UpdateDQNodeInputAndShape(graph, *dq_node, constant_to_modify);
+      constant_dq_input = "";  // DQ input was already updated so we don't need reconnect_nodes to handle it
     }
-    graph.TransposeInitializer(input, perm);
-    node.SetInput(i, input);
+
     return;
   }
 
   // Case 2: input is a Transpose node
   std::unique_ptr<api::NodeRef> inp_node = graph.GetNodeProducingOutput(input);
+
+  // Look past a DQ for the Transpose
+  if (inp_node && inp_node->OpType() == "DequantizeLinear") {
+    dq_node = std::move(inp_node);
+    auto dq_input = dq_node->Inputs()[0];
+    inp_node = graph.GetNodeProducingOutput(dq_input);
+    consumers = graph.GetValueConsumers(dq_input);
+  }
+
   if (inp_node != nullptr && inp_node->IsOp("Transpose")) {
     std::optional<std::vector<int64_t>> perm2 = GetPermAttrIfValid(*inp_node);
     if (perm2 != std::nullopt && perm2->size() == perm.size()) {
       // If they cancel, use pre_transpose_value and remove Transpose if possible.
       if (*perm2 == perm_inv) {
         std::string_view pre_transpose_value = inp_node->Inputs()[0];
-        if (consumers->comprehensive && consumers->nodes.size() == 0) {
+
+        if (dq_node) {
+          UpdateDQNodeInputAndShape(graph, *dq_node, pre_transpose_value);
+          node.SetInput(i, dq_node->Outputs()[0]);
+        } else {
+          node.SetInput(i, pre_transpose_value);
+        }
+
+        // Remove the Transpose node if possible
+        // if there's a DQ node the `consumers` list still includes it so allow for that.
+        // in that case UpdateDQNodeInputAndShape already updated the input of the DQ node so it's safe to remove it.
+        if (consumers->comprehensive && consumers->nodes.size() == size_t(dq_node ? 1 : 0)) {
           graph.RemoveNode(*inp_node);
         }
-        node.SetInput(i, pre_transpose_value);
-        return;
-      } else if (*perm2 == perm) {
-        // we are trying to add a duplicate transpose.
-        // do nothing and return
+
         return;
       }
 
-      // Otherwise, compose the perm and Transpose pre_transpose_value. Cost is the same and we may be able to remove
-      // the other Transpose.
-      const std::vector<int64_t>& perm_combined = ComposePerm(*perm2, perm);
-      auto transpose_ptr = MakeTranspose(graph, inp_node->Inputs()[0], perm_combined);
-      api::NodeRef& transpose = *transpose_ptr;
-      std::string_view transpose_out = transpose.Outputs()[0];
-      graph.CopyValueInfo(input, transpose_out);
-      graph.GetValueInfo(transpose_out)->PermuteDims(perm);
-      if (consumers->comprehensive && consumers->nodes.size() == 0) {
-        graph.RemoveNode(*inp_node);
+      if (!dq_node) {
+        // Otherwise, compose the perm and Transpose pre_transpose_value. Cost is the same and we may be able to remove
+        // the other Transpose.
+        const std::vector<int64_t>& perm_combined = ComposePerm(*perm2, perm);
+        auto transpose_ptr = MakeTranspose(graph, inp_node->Inputs()[0], perm_combined);
+        api::NodeRef& transpose = *transpose_ptr;
+        std::string_view transpose_out = transpose.Outputs()[0];
+        graph.CopyValueInfo(input, transpose_out);
+        graph.GetValueInfo(transpose_out)->PermuteDims(perm);
+
+        if (consumers->comprehensive && consumers->nodes.size() == 0) {
+          graph.RemoveNode(*inp_node);
+        }
+
+        node.SetInput(i, transpose_out);
+
+        return;
+      } else {
+        // fall through to regular processing if the Transpose prior to the DQ doesn't cancel out cleanly
       }
-      node.SetInput(i, transpose_out);
-      return;
     }
   }
 
-  // Case 3: A Transpose op might already exist
-  for (size_t j = 0; j < consumers->nodes.size(); ++j) {
-    api::NodeRef& consumer = *consumers->nodes[j];
-    if (consumer.IsOp("Transpose") && GetPermAttrIfValid(consumer) == perm) {
-      node.SetInput(i, consumer.Outputs()[0]);
+  // any DQ node special casing doesn't apply anymore, so go back to the original inp_node
+  if (dq_node) {
+    inp_node = std::move(dq_node);
+    consumers = graph.GetValueConsumers(input);
+  }
+
+  // Case 3: A Transpose op with the same perms might already exist
+  for (auto& consumer : consumers->nodes) {
+    if (consumer->IsOp("Transpose") && GetPermAttrIfValid(*consumer) == perm) {
+      node.SetInput(i, consumer->Outputs()[0]);
       return;
     }
   }
@@ -540,11 +948,29 @@ void TransposeInput(api::GraphRef& graph, api::NodeRef& node, size_t i,
   std::string_view transpose_out = transpose.Outputs()[0];
   graph.CopyValueInfo(input, transpose_out);
   graph.GetValueInfo(transpose_out)->PermuteDims(perm);
+
   node.SetInput(i, transpose_out);
+
+  if (inp_node && inp_node->OpType() == "DequantizeLinear") {
+    MakeQDQNodeUnit(graph, *inp_node);
+  }
+}
+
+// this TransposeInput is used by the layout transformer to wrap a node in Transpose ops.
+// there's no OptimizerCtx in that scenario
+void TransposeInput(api::GraphRef& graph, api::NodeRef& node, size_t i,
+                    const std::vector<int64_t>& perm,
+                    const std::vector<int64_t>& perm_inv) {
+  TransposeInputImpl(graph, node, i, perm, perm_inv);
+}
+
+static void TransposeInput(OptimizerCtx& ctx, api::NodeRef& node, size_t i, const std::vector<int64_t>& perm,
+                           const std::vector<int64_t>& perm_inv) {
+  TransposeInputImpl(ctx.graph, node, i, perm, perm_inv);
 }
 
 // Unsqueezes inputs of node to have uniform rank. Returns false if input ranks are unknown or exceed the target rank.
-static bool NormalizeInputRanks(OptimizerCtx ctx, api::NodeRef& node, size_t target_rank,
+static bool NormalizeInputRanks(OptimizerCtx& ctx, api::NodeRef& node, size_t target_rank,
                                 const std::vector<size_t>& input_indices) {
   auto inputs = node.Inputs();
 
@@ -579,7 +1005,7 @@ void TransposeInputs(OptimizerCtx& ctx, api::NodeRef& node, const std::vector<in
                      const std::vector<size_t>& input_indices) {
   auto perm_inv = InvertPerm(perm);
   for (size_t j : input_indices) {
-    TransposeInput(ctx.graph, node, j, perm, perm_inv);
+    TransposeInput(ctx, node, j, perm, perm_inv);
   }
 }
 
@@ -670,30 +1096,95 @@ static bool CanLikelyRemoveTranspose(const api::GraphRef& graph, api::NodeRef& t
   return true;
 }
 
+// return true if
+//   - the value is a constant initializer
+//   - the value is the output of a DQ node who's input is a constant initializer
+//     - UnsqueezeInput/TransposeInput can look past the DQ to update the constant initializer directly
+//     - DQ node is currently ignored if it uses per-channel quantization
+//       - supporting per-channel quantization requires modifying the scales and zero point data, which can be done
+//         if/when there's a use-case to justify the development cost.
+//   - the input was originally connected to a shared constant initializer that was updated in place by UnsqueezeInput
+//     or TransposeInput, and usage by this node had Squeeze/Transpose nodes inserted to counteract the effect of the
+//     in-place update. if we push the same transpose through this node it should cancel out that Squeeze/Transpose
+//
+// in all these cases we expect pushing the transpose through to not require a runtime Transpose node
+static bool IsConstant(const api::GraphRef& graph, std::string_view value_name) {
+  std::unique_ptr<api::NodeRef> producer_node = graph.GetNodeProducingOutput(value_name);
+
+  if (!producer_node) {
+    // initializer or graph input.
+    // initializer may or may not be constant depending on whether it has a matching graph input
+    std::unique_ptr<api::TensorRef> constant = graph.GetConstant(value_name);
+    return constant != nullptr;
+  }
+
+  // look past a DQ node
+  if (producer_node->OpType() == "DequantizeLinear") {
+    std::unique_ptr<api::NodeRef> dq_node = GetDQWithConstInitializerInputAndSingleConsumer(graph, value_name);
+    if (dq_node != nullptr) {
+      // DQ node pointing to an constant initializer
+      return true;
+    }
+  }
+
+  return false;
+}
+
 // Estimates the cost of transposing an input. Currently uses rank heuristic. Negative if transpose is removed.
 // Feel free to improve as needed.
 static int EstimateTransposeValueCost(const api::GraphRef& graph, std::string_view input,
-                                      const std::vector<int64_t>& perm_inv,
-                                      const HandlerMap& extended_handlers) {
+                                      const std::vector<int64_t>& perm_inv, const HandlerMap& extended_handlers) {
   // Case 1: Transposing constants probably costs nothing.
-  std::unique_ptr<api::TensorRef> constant = graph.GetConstant(input);
-  if (constant != nullptr) {
+  if (IsConstant(graph, input)) {
     return 0;
   }
 
   // Case 2: Transposing a transpose either cancels it or composes the permutations.
-  std::unique_ptr<api::NodeRef> node = graph.GetNodeProducingOutput(input);
-  if (node != nullptr && node->IsOp("Transpose")) {
-    std::optional<std::vector<int64_t>> perm2 = GetPermAttrIfValid(*node);
-    if (perm2 != std::nullopt) {
-      if (*perm2 == perm_inv && CanLikelyRemoveTranspose(graph, *node, extended_handlers)) {
-        return -EstimateValueRank(graph, input);
-      } else {
-        return 0;
+  std::unique_ptr<api::NodeRef> producer_node = graph.GetNodeProducingOutput(input);
+
+  if (producer_node != nullptr) {
+    // this handles cancelling out a Transpose or Squeeze added to a shared initializer that was updated
+    // by TransposeInputImpl Case 1 or UnqueezeInput Case 1.
+    //   - if a shared initializer is not broadcast, we have <updated initializer> -> Transpose -> DQ
+    //   - if a shared initializer is broadcast, we have <updated initializer> -> Transpose -> Squeeze -> DQ and need
+    //     to look slightly further in the hopes of finding the Transpose.
+    //     - in practice it's only necessary if the operator that we're looking to push the transpose through has
+    //       more than 2 inputs, and at least one of them is broadcastable. When there are 2 inputs the input with
+    //       the Transpose will have a negative weight. If we don't look past DQ -> Squeeze to find the Transpose
+    //       on the other input the positive weight of the broadcast initializer will always be less as it's based on
+    //       rank, so the total cost estimate will always be negative and we'll push the Transpose.
+    //       onnx::Where may be the only operator that requires the look past Squeeze.
+    //
+    // look past a DQ as we do that in the TransposeInput/UnsqueezeInput handling.
+    // match onnx and contrib ops domain for Q/DQ while we have those ops in both domains.
+    if (producer_node->OpType() == "DequantizeLinear") {
+      auto dq_input_node = graph.GetNodeProducingOutput(producer_node->Inputs()[0]);
+      if (dq_input_node != nullptr) {
+        if (dq_input_node->OpType() == "Squeeze") {
+          auto squeeze_input_node = graph.GetNodeProducingOutput(dq_input_node->Inputs()[0]);
+          if (squeeze_input_node->OpType() == "Transpose") {
+            // we only want to set this if it is a Transpose as otherwise we're invalidating the cost given it is
+            // rank based and the Squeeze will change that.
+            producer_node = std::move(squeeze_input_node);
+          }
+        } else {
+          // DQ doesn't change the rank so we don't need to check the OpType of the DQ input
+          producer_node = std::move(dq_input_node);
+        }
       }
     }
-  }
 
+    if (producer_node->IsOp("Transpose")) {
+      std::optional<std::vector<int64_t>> perm2 = GetPermAttrIfValid(*producer_node);
+      if (perm2 != std::nullopt) {
+        if (*perm2 == perm_inv && CanLikelyRemoveTranspose(graph, *producer_node, extended_handlers)) {
+          return -EstimateValueRank(graph, input);
+        } else {
+          return 0;
+        }
+      }
+    }
+  }
   // Case 3: We will likely need to add a transpose.
   return EstimateValueRank(graph, input);
 }
@@ -708,6 +1199,7 @@ static int EstimateTransposeInputsCost(const api::GraphRef& graph, const api::No
   for (size_t j : input_indices) {
     cost += EstimateTransposeValueCost(graph, inputs[j], perm_inv, extended_handlers);
   }
+
   return cost;
 }
 
@@ -734,8 +1226,10 @@ static bool HandleSimpleNodeBase(HandlerArgs& args, bool broadcast_inputs) {
   if (broadcast_inputs && !NormalizeInputRanks(args.ctx, args.node, rank, args.transposible_inputs)) {
     return false;
   }
+
   TransposeInputs(args.ctx, args.node, args.perm_inv, args.transposible_inputs);
   TransposeOutputs(args.ctx, args.node, args.perm);
+
   return true;
 }
 
@@ -907,38 +1401,29 @@ static void PermuteInput(api::GraphRef& graph, api::NodeRef& node, size_t i, con
   size_t rank = perm.size();
   int64_t rank_int = gsl::narrow_cast<int64_t>(rank);
 
-  std::string_view input = node.Inputs()[i];
-  auto constant = graph.GetConstant(input);
+  std::string_view input_name = node.Inputs()[i];
+  auto constant = graph.GetConstant(input_name);
   if (constant != nullptr) {
     auto shape = constant->Shape();
     if (shape.size() == 1 && (shape[0] == rank_int || shape[0] == 0)) {
-      Permute1DConstant(graph, node, *constant, i, input, perm);
+      Permute1DConstant(graph, node, *constant, i, input_name, perm);
       return;
     }
   }
 
+  // we don't check for a DQ input here as PermuteInput is only used for Resize (roi/scales/sizes) and Pad (pads)
+  // inputs that would never be quantized.
   std::string_view gather_indices_const = AddInitializerInt64(graph, /*shape*/ {rank_int}, perm);
-  std::vector<std::string_view> gather_inputs{input, gather_indices_const};
+  std::vector<std::string_view> gather_inputs{input_name, gather_indices_const};
   auto gather_ptr = graph.AddNode("Gather", gather_inputs, /*num_outputs*/ 1);
   api::NodeRef& gather = *gather_ptr;
   std::string_view gather_output = gather.Outputs()[0];
-  graph.CopyValueInfo(input, gather_output);
+  graph.CopyValueInfo(input_name, gather_output);
   gather.SetAttributeInt("axis", 0);
   node.SetInput(i, gather_output);
 }
 
-static bool HandleResize([[maybe_unused]] HandlerArgs& args) {
-#if defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_QNN) || defined(USE_WEBNN)
-  // The CUDA Resize kernel requires that the input is NCHW, so we can't push a Transpose through a Resize
-  // in ORT builds with CUDA enabled.
-  // The ROCm EP is generated from the CUDA EP kernel so the same applies to builds with ROCm enabled.
-  // The QNN EP requires the input to be NHWC, so the Resize handler is also not enabled for QNN builds.
-  //
-  // TODO: Remove this special case once the CUDA Resize kernel is implemented "generically" (i.e.) aligning with the
-  // generic nature of the ONNX spec.
-  // See https://github.com/microsoft/onnxruntime/pull/10824 for a similar fix applied to the CPU Resize kernel.
-  return false;
-#else
+bool HandleResize([[maybe_unused]] HandlerArgs& args) {
   auto inputs = args.node.Inputs();
   int64_t rank_int = gsl::narrow_cast<int64_t>(args.perm.size());
 
@@ -964,10 +1449,10 @@ static bool HandleResize([[maybe_unused]] HandlerArgs& args) {
   TransposeOutputs(args.ctx, args.node, args.perm);
 
   return true;
-#endif
 }
 
-constexpr HandlerInfo resize_handler = {&FirstInput, &HandleResize};
+// Not currently registered by default.
+// constexpr HandlerInfo resize_handler = {&FirstInput, &HandleResize};
 
 static bool HandlePad(HandlerArgs& args) {
   size_t rank = args.perm.size();
@@ -1719,8 +2204,11 @@ static const std::unordered_map<std::string_view, const HandlerInfo&> handler_ma
     {"Split", split_handler},
     {"Shape", shape_handler},
     {"Pad", pad_handler},
-    {"Resize", resize_handler},
-    {"ReduceSum", reduce_op_handler},
+
+    // Execution providers tend to only implement Resize for specific layouts. Due to that, it's safer to not
+    // push a Transpose through a Resize unless the EP specifically checks that it can handle the change via an
+    // extended handler.
+    // {"Resize", resize_handler},
 
     {"ReduceLogSum", reduce_op_handler},
     {"ReduceLogSumExp", reduce_op_handler},
@@ -1728,6 +2216,7 @@ static const std::unordered_map<std::string_view, const HandlerInfo&> handler_ma
     {"ReduceMean", reduce_op_handler},
     {"ReduceMin", reduce_op_handler},
     {"ReduceProd", reduce_op_handler},
+    {"ReduceSum", reduce_op_handler},
     {"ReduceSumSquare", reduce_op_handler},
     {"ReduceL1", reduce_op_handler},
     {"ReduceL2", reduce_op_handler},
@@ -1749,14 +2238,6 @@ static const std::unordered_map<std::string_view, const HandlerInfo&> handler_ma
     {"Reshape", reshape_handler},
 };
 
-constexpr bool IsOnnxDomain(std::string_view domain) {
-  return (domain == onnxruntime::kOnnxDomain) || (domain == onnxruntime::kOnnxDomainAlias);
-}
-
-constexpr bool IsMSDomain(std::string_view domain) {
-  return domain == onnxruntime::kMSDomain;
-}
-
 static const HandlerInfo* GetHandler(api::NodeRef& node, const HandlerMap& extended_handlers) {
   std::string key;
   auto domain = node.Domain();
@@ -1817,12 +2298,12 @@ static int CalculateCost(const api::GraphRef& graph, const api::NodeRef& node,
 }
 
 // Default cost check. Returns `true` if pushing the Transpose through the node is considered to be beneficial.
-static bool ShouldPushTranspose(const api::GraphRef& graph, const api::NodeRef& node,
-                                const std::vector<int64_t>& perm,
-                                const std::unordered_set<std::string>& outputs_leading_to_transpose,
-                                const HandlerInfo& info,
-                                const std::vector<size_t> transposable_input_indices,
-                                const HandlerMap& extended_handlers) {
+static bool DefaultCostCheck(const api::GraphRef& graph, const api::NodeRef& node,
+                             const std::vector<int64_t>& perm,
+                             const std::unordered_set<std::string>& outputs_leading_to_transpose,
+                             const HandlerInfo& info,
+                             const std::vector<size_t> transposable_input_indices,
+                             const HandlerMap& extended_handlers) {
   if (node.IsOp("Transpose")) {
     return true;
   }
@@ -1854,8 +2335,8 @@ bool ProcessTranspose(OptimizerCtx& ctx, api::NodeRef& transpose, api::NodeRef&
   }
 
   if (cost == CostCheckResult::kFallThrough) {
-    cost = ShouldPushTranspose(ctx.graph, node, perm, outputs_leading_to_transpose, *info, input_indices,
-                               ctx.extended_handlers)
+    cost = DefaultCostCheck(ctx.graph, node, perm, outputs_leading_to_transpose, *info, input_indices,
+                            ctx.extended_handlers)
                ? CostCheckResult::kPushTranspose
                : CostCheckResult::kStop;
   }
@@ -2009,75 +2490,99 @@ OptimizeResult OptimizeImpl(OptimizerCtx& ctx) {
       }
     }
   }
-
   if (!have_dq) {
     result.graph_modified = changed;
     return result;
   }
 
-  // Run second optimization pass.
-  // If any transpose succeeds a DQ node, move it above the DQ node if it's not part of a QDQ node group.
-  // In QDQ models this helps to preserve the QDQ node group when a Transpose was pushed across a DQ into
-  // an existing QDQ node group.
-  // In all other scenarios this is beneficial as well because moving transpose above DQ node is more efficient as
-  // transpose node now handles less data.
+  // Run 'fix up' pass for QDQ node units.
+  //
+  // Repair broken QDQ node unit from Transpose being blocked on Op inside a QDQ node unit.
+  //   DQ -> Transpose ->            Op -> Q =>
+  //   DQ -> Transpose -> Q -> DQ -> Op -> Q
+  //
+  // Create QDQ node unit for Transpose after DQ that provides graph output.
+  //   DQ -> Transpose ->            graph output =>
+  //   DQ -> Transpose -> Q -> DQ -> graph output
+  //
+  // Remove empty DQ -> Q pair from moving a Transpose downstream or a Transpose being cancelled out.
+  //   DQ -> Q -> consumer node =>
+  //              consumer node
+
   auto graph_nodes = ctx.graph.Nodes();
   for (size_t i = 1; i < graph_nodes.size(); i++) {
-    const auto& node = *graph_nodes[i];
+    auto& node = *graph_nodes[i];
 
     if (!can_modify_node(node)) {
       continue;
     }
 
-    if (node.OpType() == "Transpose") {
-      auto& transpose_node = *graph_nodes[i];
-      auto dq_node = ctx.graph.GetNodeProducingOutput(transpose_node.Inputs()[0]);
-      if (!dq_node || dq_node->OpType() != "DequantizeLinear") {
+    for (size_t i_idx = 0, i_end = node.Inputs().size(); i_idx < i_end; ++i_idx) {
+      // any change requires a DQ as the input to the current node
+      auto input_node = ctx.graph.GetNodeProducingOutput(node.Inputs()[i_idx]);
+      if (!input_node || input_node->OpType() != "DequantizeLinear") {
         continue;
       }
 
-      // Check if Transpose node is the only consumer of dq node
-      auto consumers_of_dq_node = ctx.graph.GetValueConsumers(dq_node->Outputs()[0]);
-      if (!consumers_of_dq_node->comprehensive || consumers_of_dq_node->nodes.size() > 1) {
-        continue;
-      }
+      auto& dq_node = *input_node;
+      std::unique_ptr<api::NodeRef> single_consumer_node;
+
+      // remove empty DQ -> Q before a consumer node if the DQ and Q have matching types, scale and zp.
+      if (node.OpType() == "QuantizeLinear") {
+        // we don't need to check scale and zp inputs, and we may remove nodes invalidating `node` if we
+        // continue with the loop of inputs so set i_end to bail
+        i_end = 1;
+
+        auto& q_node = node;
+        if (OutputValueHasSingleConsumerNode(ctx.graph, dq_node, 0, single_consumer_node) &&
+            OutputValueHasSingleConsumerNode(ctx.graph, q_node, 0, single_consumer_node) &&
+            CheckQDQNodePairMatch(ctx.graph, dq_node, q_node)) {
+          // connect Q consumer to DQ input
+          for (size_t j_idx = 0, j_end = single_consumer_node->Inputs().size(); j_idx < j_end; ++j_idx) {
+            if (single_consumer_node->Inputs()[j_idx] == q_node.Outputs()[0]) {
+              single_consumer_node->SetInput(j_idx, dq_node.Inputs()[0]);
+              // break; in theory the Q might be providing multiple inputs.
+            }
+          }
 
-      auto consumers_of_transpose_node = ctx.graph.GetValueConsumers(transpose_node.Outputs()[0]);
-      bool is_part_of_qdq_group = std::find_if(consumers_of_transpose_node->nodes.cbegin(),
-                                               consumers_of_transpose_node->nodes.cend(),
-                                               [](const std::unique_ptr<api::NodeRef>& node) {
-                                                 return node->OpType() == "QuantizeLinear";
-                                               }) != consumers_of_transpose_node->nodes.cend();
-      if (is_part_of_qdq_group) {
-        continue;
-      }
+          // disconnect other nodes and remove
+          dq_node.SetInput(0, "");
+          q_node.SetInput(0, "");
+          ctx.graph.RemoveNode(dq_node);
+          ctx.graph.RemoveNode(q_node);
 
-      // Update Dequantize Node and move the transpose above it
-      auto perm = GetPermAttrIfValid(transpose_node);
-      if (!perm.has_value()) {
-        continue;
+          changed = true;
+          continue;
+        }
       }
 
-      // we're moving the Transpose to before the DQ, so we need to use the inverse permutations to update the axis
-      // attribute correctly when doing per-axis dequantization
-      std::string_view dq_domain = dq_node->Domain();
-      std::vector<int64_t> perm_inv = InvertPerm(*perm);
+      // DQ -> Transpose => DQ -> Transpose -> Q -> DQ if needed
+      if (node.OpType() == "Transpose") {
+        auto& transpose_node = node;
+
+        // GetValueConsumers sets `comprehensive` to false for graph outputs and implicit inputs.
+        // we know Transpose doesn't have implicit inputs so if nodes are empty it can only be a graph output.
+        auto transpose_output = transpose_node.Outputs()[0];
+        auto consumers = ctx.graph.GetValueConsumers(transpose_output);
+        if (consumers->nodes.empty()) {
+          // DQ -> Transpose -> graph output
+        } else {
+          if (consumers->nodes.size() > 1) {
+            // unexpected to have DQ -> Transpose -> multiple consumers
+            continue;
+          }
 
-      if (IsOnnxDomain(dq_domain) && !HandleQuantizeDequantizeAxis(ctx.graph, perm_inv, *dq_node, ctx.opset)) {
-        continue;
-      }
+          if (consumers->nodes[0]->OpType() == "QuantizeLinear") {
+            // already in QDQ node unit
+            continue;
+          }
+        }
 
-      if (IsMSDomain(dq_domain) && !TransposeQuantizeDequantizeAxis(ctx.graph, perm_inv, *dq_node)) {
-        continue;
+        // Add Q -> DQ after the DQ -> Transpose
+        if (MakeQDQNodeUnit(ctx.graph, dq_node)) {
+          changed = true;
+        }
       }
-
-      TransposeFirstInput(ctx, *dq_node, *perm);
-
-      // remove existing transpose node
-      transpose_node.SetInput(0, "");
-      ctx.graph.MoveOutput(transpose_node, 0, *dq_node, 0);
-      ctx.graph.RemoveNode(transpose_node);
-      changed = true;
     }
   }
 
diff --git a/onnxruntime/core/optimizer/transpose_optimization/onnx_transpose_optimization.h b/onnxruntime/core/optimizer/transpose_optimization/onnx_transpose_optimization.h
index 1a54e7834a4ae..6d1f1f8535ba4 100644
--- a/onnxruntime/core/optimizer/transpose_optimization/onnx_transpose_optimization.h
+++ b/onnxruntime/core/optimizer/transpose_optimization/onnx_transpose_optimization.h
@@ -3,6 +3,7 @@
 
 #pragma once
 
+#include <unordered_map>
 #include <vector>
 
 // implementation details of the transpose optimizer API defined in optimizer_api.h.
@@ -38,6 +39,8 @@ struct HandlerInfo {
   bool transposes_outputs = true;
 };
 
+using NodeIdToInputIdxsMap = std::unordered_map<int64_t, std::vector<size_t>>;
+
 struct OptimizerCtx {
   int64_t opset;
   api::GraphRef& graph;
@@ -69,6 +72,7 @@ bool HandleSimpleNodeWithAxis(HandlerArgs& args, std::optional<int64_t> default_
 
 // base handlers that are used by extended handlers. add from transpose_optimizer.cc as needed.
 bool HandleReduceOps(HandlerArgs& args);
+bool HandleResize([[maybe_unused]] HandlerArgs& args);
 
 void TransposeInput(api::GraphRef& graph, api::NodeRef& node, size_t i,
                     const std::vector<int64_t>& perm,
diff --git a/onnxruntime/core/optimizer/transpose_optimization/optimizer_api.h b/onnxruntime/core/optimizer/transpose_optimization/optimizer_api.h
index 40a03f24f7648..c45aaef0cf02f 100644
--- a/onnxruntime/core/optimizer/transpose_optimization/optimizer_api.h
+++ b/onnxruntime/core/optimizer/transpose_optimization/optimizer_api.h
@@ -242,6 +242,12 @@ class NodeRef {
   /// <returns>since version or default value -1</returns>
   virtual int SinceVersion() const = 0;
 
+  /// <summary>
+  /// Get the unique id of the node.
+  /// </summary>
+  /// <returns>Id</returns>
+  virtual int64_t Id() const = 0;
+
   virtual ~NodeRef(){};
 };
 
@@ -436,13 +442,20 @@ class GraphRef {
     return !unused;
   }
 
+  /// <summary>
+  /// Is the value a graph output.
+  /// </summary>
+  /// <param name="name">Value name.</param>
+  /// <returns>True if output of the Graph.</returns>
+  virtual bool IsGraphOutput(std::string_view name) const = 0;
+
   virtual ~GraphRef(){};
 };
 
 }  // namespace api
 
 constexpr int64_t kMinSupportedOpset = 7;
-constexpr int64_t kMaxSupportedOpset = 19;
+constexpr int64_t kMaxSupportedOpset = 20;
 
 // enum of results that a CostCheckFn can return.
 enum class CostCheckResult {
diff --git a/onnxruntime/core/optimizer/transpose_optimization/ort_optimizer_api_impl.cc b/onnxruntime/core/optimizer/transpose_optimization/ort_optimizer_api_impl.cc
index b30c94d7b3e40..d9f08ffe1171e 100644
--- a/onnxruntime/core/optimizer/transpose_optimization/ort_optimizer_api_impl.cc
+++ b/onnxruntime/core/optimizer/transpose_optimization/ort_optimizer_api_impl.cc
@@ -95,7 +95,8 @@ class ApiNode final : public api::NodeRef {
   void ClearAttribute(std::string_view name) override;
   void SetInput(size_t i, std::string_view name) override;
   std::string_view GetExecutionProviderType() const override;
-  virtual int SinceVersion() const override;
+  int SinceVersion() const override;
+  int64_t Id() const override;
 
  private:
   ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(ApiNode);
@@ -106,10 +107,17 @@ class ApiGraph final : public api::GraphRef {
   onnxruntime::Graph& graph_;
   AllocatorPtr cpu_allocator_;
   const char* new_node_ep_;
+  std::unordered_set<std::string_view> graph_outputs_;  // graph_.GetOutputs() names for efficient lookup
 
  public:
   explicit ApiGraph(onnxruntime::Graph& graph, AllocatorPtr cpu_allocator, const char* new_node_ep)
-      : graph_(graph), cpu_allocator_(std::move(cpu_allocator)), new_node_ep_(new_node_ep) {}
+      : graph_(graph), cpu_allocator_(std::move(cpu_allocator)), new_node_ep_(new_node_ep) {
+    const auto& graph_outputs = graph_.GetOutputs();
+    graph_outputs_.reserve(graph_outputs.size());
+    for (const auto* output : graph_outputs) {
+      graph_outputs_.insert(output->Name());
+    }
+  }
 
   onnxruntime::Graph& Graph() {
     return graph_;
@@ -137,6 +145,7 @@ class ApiGraph final : public api::GraphRef {
   void MoveOutput(api::NodeRef& src_node, size_t src_idx, api::NodeRef& dst_node, size_t dst_idx) override;
   void CopyValueInfo(std::string_view src_name, std::string_view dst_name) override;
   bool HasValueConsumers(std::string_view name) const override;
+  bool IsGraphOutput(std::string_view name) const override;
 
  private:
   ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(ApiGraph);
@@ -417,6 +426,10 @@ int ApiNode::SinceVersion() const {
   return node_.SinceVersion();
 }
 
+int64_t ApiNode::Id() const {
+  return node_.Index();
+}
+
 // </ApiNode>
 
 std::optional<int64_t> ApiGraph::Opset(std::string_view domain) const {
@@ -442,6 +455,10 @@ std::vector<std::unique_ptr<api::NodeRef>> ApiGraph::Nodes() const {
   return nodes;
 }
 
+bool ApiGraph::IsGraphOutput(std::string_view name) const {
+  return graph_outputs_.find(name) != graph_outputs_.end();
+}
+
 std::unique_ptr<api::TensorRef> ApiGraph::GetConstant(std::string_view name) const {
   const auto* tensor = graph_.GetConstantInitializer(std::string(name), /*check_outer_scope*/ true);
   if (tensor == nullptr) {
@@ -489,11 +506,8 @@ std::unique_ptr<api::ValueConsumers> ApiGraph::GetValueConsumers(std::string_vie
     }
   }
 
-  const auto& graph_outputs = graph_.GetOutputs();
-  for (const auto* output : graph_outputs) {
-    if (output->Name() == name) {
-      consumers->comprehensive = false;
-    }
+  if (IsGraphOutput(name)) {
+    consumers->comprehensive = false;
   }
 
   return consumers;
@@ -505,14 +519,7 @@ bool ApiGraph::HasValueConsumers(std::string_view name) const {
     return true;
   }
 
-  const auto& graph_outputs = graph_.GetOutputs();
-  for (const auto* output : graph_outputs) {
-    if (output->Name() == name) {
-      return true;
-    }
-  }
-
-  return false;
+  return IsGraphOutput(name);
 }
 
 std::unique_ptr<api::NodeRef> ApiGraph::GetNodeProducingOutput(std::string_view name) const {
@@ -699,10 +706,6 @@ static std::optional<int> GetLayoutTransformationPotentiallyAddedOpSinceVersion(
 // Based on the opset version imported for this model, returns the since version for the node.
 static int GetSinceVersionForNewOp(std::string_view op_type, std::string_view domain,
                                    const std::unordered_map<std::string, int>& domain_to_version_map) {
-  // TODO do we need this check? we will also check kLayoutTransformationPotentiallyAddedOps
-  ORT_ENFORCE(domain == kOnnxDomain, "Transpose optimizer is expected to add only onnx domain ops. Domain: ",
-              domain, " provided for op: ", op_type);
-
   const auto opset_import_iter = domain_to_version_map.find(std::string(domain));
   ORT_ENFORCE(opset_import_iter != domain_to_version_map.end(), domain, " domain not found in opset imports.");
 
diff --git a/onnxruntime/core/optimizer/transpose_optimization/ort_transpose_optimization.cc b/onnxruntime/core/optimizer/transpose_optimization/ort_transpose_optimization.cc
index ead82a6b56741..8eaac3d34c3af 100644
--- a/onnxruntime/core/optimizer/transpose_optimization/ort_transpose_optimization.cc
+++ b/onnxruntime/core/optimizer/transpose_optimization/ort_transpose_optimization.cc
@@ -5,12 +5,35 @@
 
 #include <algorithm>
 #include "core/graph/constants.h"
+#include "core/framework/utils.h"
 #include "core/optimizer/transpose_optimization/ort_optimizer_utils.h"
 
 using namespace onnx_transpose_optimization;
 
 namespace onnxruntime {
 
+static bool EPAwareHandleResize(HandlerArgs& args) {
+  // Whilst Resize is not technically layout sensitive, execution providers typically implement handling for only one
+  // layout. Due to that, only push a Transpose through a Resize once it is assigned and we know it's being handled
+  // by an EP that supports multiple layouts. Currently that's the CPU and XNNPACK EPs.
+  const auto ep_type = args.node.GetExecutionProviderType();
+  if (ep_type == kCpuExecutionProvider) {
+    // allow NCHW <-> NHWC for now. not clear any other sort of transpose has a valid usage in a real model
+    int64_t rank_int = gsl::narrow_cast<int64_t>(args.perm.size());
+    if (rank_int == 4) {
+      static const std::vector<int64_t> nchw_to_nhwc_perm{0, 2, 3, 1};
+      static const std::vector<int64_t> nhwc_to_nchw_perm{0, 3, 1, 2};
+      if (args.perm == nchw_to_nhwc_perm || args.perm == nhwc_to_nchw_perm) {
+        return HandleResize(args);
+      }
+    }
+  }
+
+  return false;
+}
+
+constexpr HandlerInfo ep_aware_resize_handler = {&FirstInput, &EPAwareHandleResize};
+
 static bool HandleQLinearConcat(HandlerArgs& args) {
   return HandleSimpleNodeWithAxis(args);
 }
@@ -62,7 +85,7 @@ static bool HandleMaxPool(HandlerArgs& args) {
   ORT_UNUSED_PARAMETER(args);
   return false;
 #else
-  if (args.node.GetExecutionProviderType() != "CPUExecutionProvider") {
+  if (args.node.GetExecutionProviderType() != kCpuExecutionProvider) {
     return false;
   }
 
@@ -103,6 +126,7 @@ static bool HandleContribQuantizeDequantizeLinear(HandlerArgs& args) {
 }
 
 constexpr HandlerInfo max_pool_op_handler = {&FirstInput, &HandleMaxPool};
+
 constexpr HandlerInfo node_1_inp_handler = {&FirstInput, &HandleSimpleNode};
 constexpr HandlerInfo reduce_op_handler = {&FirstInput, &HandleReduceOps};
 constexpr HandlerInfo contrib_quantize_dequantize_linear_handler = {&FirstInput,
@@ -113,6 +137,7 @@ const HandlerMap& OrtExtendedHandlers() {
   static const HandlerMap extended_handler_map = []() {
     HandlerMap map = {
         {"MaxPool", max_pool_op_handler},
+        {"Resize", ep_aware_resize_handler},
         {"com.microsoft.QuantizeLinear", contrib_quantize_dequantize_linear_handler},
         {"com.microsoft.DequantizeLinear", contrib_quantize_dequantize_linear_handler},
         {"com.microsoft.QLinearAdd", q_linear_binary_op_handler},
diff --git a/onnxruntime/core/optimizer/transpose_optimization/ort_transpose_optimization.h b/onnxruntime/core/optimizer/transpose_optimization/ort_transpose_optimization.h
index 0a5dbd6d13d06..8245d8c3b4eae 100644
--- a/onnxruntime/core/optimizer/transpose_optimization/ort_transpose_optimization.h
+++ b/onnxruntime/core/optimizer/transpose_optimization/ort_transpose_optimization.h
@@ -10,7 +10,6 @@ namespace onnxruntime {
 /// <summary>
 /// Get the extended handlers for ORT specific transpose optimization.
 /// These include handlers for contrib ops, and where we have an NHWC version of a layout sensitive op.
-/// Extends the handlers returned by OrtHandlers.
 /// </summary>
 /// <returns>HandlerMap</returns>
 const onnx_transpose_optimization::HandlerMap& OrtExtendedHandlers();
diff --git a/onnxruntime/core/optimizer/transpose_optimizer.cc b/onnxruntime/core/optimizer/transpose_optimizer.cc
index 33e3f5eeaf0fa..092df9cc7dcfb 100644
--- a/onnxruntime/core/optimizer/transpose_optimizer.cc
+++ b/onnxruntime/core/optimizer/transpose_optimizer.cc
@@ -18,10 +18,18 @@ namespace onnxruntime {
 
 Status TransposeOptimizer::ApplyImpl(Graph& graph, bool& modified, int graph_level,
                                      const logging::Logger& logger) const {
-  auto api_graph = MakeApiGraph(graph, cpu_allocator_, /*new_node_ep*/ nullptr);
-
-  OptimizeResult result = onnx_transpose_optimization::Optimize(*api_graph, "", /* default cost check*/ nullptr,
-                                                                OrtExtendedHandlers());
+  OptimizeResult result;
+
+  if (ep_.empty()) {
+    // basic usage - no EP specific optimizations
+    auto api_graph = MakeApiGraph(graph, cpu_allocator_, /*new_node_ep*/ nullptr);
+    result = onnx_transpose_optimization::Optimize(*api_graph, "", /* default cost check*/ nullptr,
+                                                   OrtExtendedHandlers());
+  } else {
+    // EP specific optimizations enabled. Currently only used for CPU EP.
+    auto api_graph = MakeApiGraph(graph, cpu_allocator_, /*new_node_ep*/ ep_.c_str());
+    result = onnx_transpose_optimization::Optimize(*api_graph, ep_, OrtEPCostCheck, OrtExtendedHandlers());
+  }
 
   if (result.error_msg) {
     // currently onnx_layout_transformation::Optimize only fails if we hit an unsupported opset.
diff --git a/onnxruntime/core/optimizer/transpose_optimizer.h b/onnxruntime/core/optimizer/transpose_optimizer.h
index 1ae6d611d2f0e..97d7ab4d0e220 100644
--- a/onnxruntime/core/optimizer/transpose_optimizer.h
+++ b/onnxruntime/core/optimizer/transpose_optimizer.h
@@ -15,10 +15,14 @@ Push transposes through ops and eliminate them.
 class TransposeOptimizer : public GraphTransformer {
  private:
   AllocatorPtr cpu_allocator_;
+  const std::string ep_;
 
  public:
-  explicit TransposeOptimizer(AllocatorPtr cpu_allocator) noexcept
-      : GraphTransformer("TransposeOptimizer"), cpu_allocator_(std::move(cpu_allocator)) {}
+  explicit TransposeOptimizer(AllocatorPtr cpu_allocator,
+                              const std::string& ep = {}) noexcept
+      : GraphTransformer(ep.empty() ? "TransposeOptimizer" : "TransposeOptimizer_" + ep),
+        cpu_allocator_(std::move(cpu_allocator)),
+        ep_{ep} {}
 
   Status ApplyImpl(Graph& graph, bool& modified, int graph_level, const logging::Logger& logger) const override;
 
diff --git a/onnxruntime/core/platform/windows/debug_alloc.cc b/onnxruntime/core/platform/windows/debug_alloc.cc
index b08d189f79866..ff6a059607367 100644
--- a/onnxruntime/core/platform/windows/debug_alloc.cc
+++ b/onnxruntime/core/platform/windows/debug_alloc.cc
@@ -12,7 +12,7 @@
 //
 #ifndef NDEBUG
 #ifdef ONNXRUNTIME_ENABLE_MEMLEAK_CHECK
-constexpr int c_callstack_limit = 16;  // Maximum depth of callstack in leak trace
+constexpr int c_callstack_limit = 32;  // Maximum depth of callstack in leak trace
 #define VALIDATE_HEAP_EVERY_ALLOC 0    // Call HeapValidate on every new/delete
 
 #pragma warning(disable : 4073)  // initializers put in library initialization area (this is intentional)
@@ -223,6 +223,11 @@ Memory_LeakCheck::~Memory_LeakCheck() {
     //     empty_group_names = new std::map<int, string>; });
     if (string.find("RtlRunOnceExecuteOnce") == std::string::npos &&
         string.find("re2::RE2::Init") == std::string::npos &&
+        string.find("dynamic initializer for 'FLAGS_") == std::string::npos &&
+        string.find("AbslFlagDefaultGenForgtest_") == std::string::npos &&
+        string.find("AbslFlagDefaultGenForundefok::Gen") == std::string::npos &&
+        string.find("::SetProgramUsageMessage") == std::string::npos &&
+        string.find("testing::internal::ParseGoogleTestFlagsOnly") == std::string::npos &&
         string.find("testing::internal::Mutex::ThreadSafeLazyInit") == std::string::npos &&
         string.find("testing::internal::ThreadLocalRegistryImpl::GetThreadLocalsMapLocked") == std::string::npos &&
         string.find("testing::internal::ThreadLocalRegistryImpl::GetValueOnCurrentThread") == std::string::npos &&
diff --git a/onnxruntime/core/platform/windows/env.cc b/onnxruntime/core/platform/windows/env.cc
index f02c61daabeed..45648010baf86 100644
--- a/onnxruntime/core/platform/windows/env.cc
+++ b/onnxruntime/core/platform/windows/env.cc
@@ -32,7 +32,7 @@ limitations under the License.
 #include "core/common/span_utils.h"
 #include "core/platform/env.h"
 #include "core/platform/scoped_resource.h"
-#include "unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h"
+#include <unsupported/Eigen/CXX11/ThreadPool>
 #include <wil/Resource.h>
 
 #include "core/platform/path_lib.h"  // for LoopDir()
diff --git a/onnxruntime/core/platform/windows/stacktrace.cc b/onnxruntime/core/platform/windows/stacktrace.cc
index cac6f4f29043b..d7d423e4a483e 100644
--- a/onnxruntime/core/platform/windows/stacktrace.cc
+++ b/onnxruntime/core/platform/windows/stacktrace.cc
@@ -10,7 +10,6 @@
 #include <stacktrace>
 #endif
 #endif
-#include <stacktrace>
 
 #include "core/common/logging/logging.h"
 #include "core/common/gsl.h"
diff --git a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h
index b142db86a7902..b4132d3b770ec 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h
+++ b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h
@@ -51,7 +51,7 @@ class BaseOpBuilder : public IOpBuilder {
   virtual bool HasSupportedInputsImpl(const Node& node, const logging::Logger& logger) const;
 
   virtual int GetMinSupportedOpSet(const Node& /* node */) const { return 1; }
-  virtual int GetMaxSupportedOpSet(const Node& /* node */) const { return 19; }
+  virtual int GetMaxSupportedOpSet(const Node& /* node */) const { return 20; }
 
  private:
   bool HasSupportedOpSet(const Node& node, const logging::Logger& logger) const;
diff --git a/onnxruntime/core/providers/coreml/builders/impl/softmax_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/softmax_op_builder.cc
new file mode 100644
index 0000000000000..c454a2a779f6e
--- /dev/null
+++ b/onnxruntime/core/providers/coreml/builders/impl/softmax_op_builder.cc
@@ -0,0 +1,128 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/providers/coreml/builders/impl/base_op_builder.h"
+
+#include "core/framework/tensorprotoutils.h"
+#include "core/providers/common.h"
+#include "core/providers/coreml/shape_utils.h"
+#include "core/providers/shared/utils/utils.h"
+
+#ifdef __APPLE__
+#include "core/providers/coreml/builders/model_builder.h"
+#endif
+#include "core/providers/coreml/builders/op_builder_factory.h"
+
+namespace onnxruntime {
+namespace coreml {
+
+class SoftmaxOpBuilder : public BaseOpBuilder {
+  // Add operator related
+#ifdef __APPLE__
+ private:
+  Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
+                               const logging::Logger& logger) const override;
+#endif
+
+  // Operator support related
+ private:
+  bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
+                         const logging::Logger& logger) const override;
+};
+
+// Add operator related
+
+#ifdef __APPLE__
+
+Status SoftmaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
+                                               const Node& node,
+                                               const logging::Logger& logger) const {
+  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = CreateNNLayer(model_builder, node);
+  const auto& input_name = node.InputDefs()[0]->Name();
+  const auto& output_name = node.OutputDefs()[0]->Name();
+
+  std::vector<int64_t> data_shape;
+  ORT_RETURN_IF_NOT(GetStaticShape(*node.InputDefs()[0], data_shape, logger), "Failed to get input shape.");
+
+  NodeAttrHelper helper(node);
+  int32_t axis_default_value = (node.SinceVersion() < 13) ? 1 : -1;
+  const auto axis = helper.Get("axis", axis_default_value);
+  const auto axis_nonnegative = HandleNegativeAxis(axis, data_shape.size());
+
+  if (node.SinceVersion() >= 13 || (data_shape.size() == 2)) {
+    auto* coreml_softmaxnd = layer->mutable_softmaxnd();
+    coreml_softmaxnd->set_axis(axis);
+    *layer->mutable_input()->Add() = input_name;
+    *layer->mutable_output()->Add() = output_name;
+    model_builder.AddLayer(std::move(layer));
+  } else {
+    // note: if opsets < 13, onnx Softmax coerces the input shape to be 2D based on axis.
+    // we need to manually reshape to 2D and apply SoftmaxND to axis -1 to achieve equivalent results for CoreML.
+    TensorShape input_shape(data_shape);
+    const auto size_to_dimension = input_shape.SizeToDimension(axis_nonnegative);
+    const auto size_from_dimension = input_shape.SizeFromDimension(axis_nonnegative);
+
+    TensorShapeVector target_shape;
+    target_shape.push_back(size_to_dimension);
+    target_shape.push_back(size_from_dimension);
+
+    const auto reshape1_output_name = model_builder.GetUniqueName(MakeString(node.Name(), "reshape1_output"));
+    {  // Add reshape layer
+      const auto softmax_reshape1_layer_name =
+          model_builder.GetUniqueName(MakeString(node.Name(), "_Softmax_reshape1"));
+      auto reshape_layer = CreateNNLayer(softmax_reshape1_layer_name);
+      *reshape_layer->mutable_reshapestatic()->mutable_targetshape() = {target_shape.cbegin(), target_shape.cend()};
+      *reshape_layer->mutable_input()->Add() = input_name;
+      *reshape_layer->mutable_output()->Add() = reshape1_output_name;
+      model_builder.AddLayer(std::move(reshape_layer));
+    }
+    const auto softmax_output_name = model_builder.GetUniqueName(MakeString(node.Name(), "softmax_output"));
+    {
+      auto* coreml_softmaxnd = layer->mutable_softmaxnd();
+      coreml_softmaxnd->set_axis(-1);
+      *layer->mutable_input()->Add() = reshape1_output_name;
+      *layer->mutable_output()->Add() = softmax_output_name;
+      model_builder.AddLayer(std::move(layer));
+    }
+    {
+      // Add reshape back layer
+      const auto softmax_reshape2_layer_name =
+          model_builder.GetUniqueName(MakeString(node.Name(), "_Softmax_reshape2"));
+      auto reshape_layer = CreateNNLayer(softmax_reshape2_layer_name);
+      *reshape_layer->mutable_reshapestatic()->mutable_targetshape() = {data_shape.cbegin(), data_shape.cend()};
+      *reshape_layer->mutable_input()->Add() = softmax_output_name;
+      *reshape_layer->mutable_output()->Add() = output_name;
+      model_builder.AddLayer(std::move(reshape_layer));
+    }
+  }
+
+  return Status::OK();
+}
+
+#endif
+
+// Operator support related
+
+bool SoftmaxOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& /* input_params */,
+                                         const logging::Logger& logger) const {
+  const auto& input_defs = node.InputDefs();
+  std::vector<int64_t> input_shape;
+  if (!GetStaticShape(*input_defs[0], input_shape, logger))
+    return false;
+
+  const TensorShape shape(input_shape);
+  if (shape.Size() == 0) {
+    LOGS(logger, VERBOSE) << "Empty input data is not supported.";
+    return false;
+  }
+
+  return true;
+}
+
+void CreateSoftmaxOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
+  op_registrations.builders.push_back(std::make_unique<SoftmaxOpBuilder>());
+  op_registrations.op_builder_map.emplace(op_type, op_registrations.builders.back().get());
+}
+
+}  // namespace coreml
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/coreml/builders/impl/split_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/split_op_builder.cc
new file mode 100644
index 0000000000000..815f68128ffaf
--- /dev/null
+++ b/onnxruntime/core/providers/coreml/builders/impl/split_op_builder.cc
@@ -0,0 +1,189 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/providers/coreml/builders/impl/base_op_builder.h"
+
+#include "core/optimizer/initializer.h"
+#include "core/providers/common.h"
+#include "core/providers/coreml/builders/helper.h"
+#include "core/providers/coreml/builders/op_builder_factory.h"
+#include "core/providers/coreml/shape_utils.h"
+#include "core/providers/shared/utils/utils.h"
+
+#if defined(__APPLE__)
+#include "core/providers/coreml/builders/model_builder.h"
+#endif
+
+namespace onnxruntime {
+namespace coreml {
+
+class SplitOpBuilder : public BaseOpBuilder {
+  // Add operator related
+#ifdef __APPLE__
+ private:
+  void AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const override;
+
+ private:
+  Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
+                               const logging::Logger& logger) const override;
+#endif
+
+  // Operator support related
+ private:
+  bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
+                         const logging::Logger& logger) const override;
+
+  // Split opset 13- uses "split" as attribute. Currently it's not supported.
+  int GetMinSupportedOpSet(const Node& /* node */) const override { return 13; }
+};
+
+// Add operator related
+
+#ifdef __APPLE__
+
+void SplitOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const {
+  const auto& input_defs = node.InputDefs();
+
+  if (input_defs.size() > 1 && input_defs[1]->Exists()) {  // optional second input "split"
+    model_builder.AddInitializerToSkip(input_defs[1]->Name());
+  }
+}
+
+Status SplitOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
+                                             const Node& node,
+                                             const logging::Logger& logger) const {
+  const auto& input_defs = node.InputDefs();
+
+  std::vector<int64_t> data_shape;
+  ORT_RETURN_IF_NOT(GetShape(*node.InputDefs()[0], data_shape, logger), "Failed to get input shape.");
+
+  NodeAttrHelper helper(node);
+  const auto axis = helper.Get("axis", 0);
+
+  // attribute introduced since opset 18
+  uint64_t num_outputs;
+
+  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = CreateNNLayer(model_builder, node);
+  auto* coreml_splitnd = layer->mutable_splitnd();
+  coreml_splitnd->set_axis(axis);
+
+  if (input_defs.size() > 1) {
+    // if "split" is explicitly provided as an input
+    const auto& split_tensor = *model_builder.GetInitializerTensors().at(input_defs[1]->Name());
+    Initializer unpacked_tensor(split_tensor);
+    auto split_span = unpacked_tensor.DataAsSpan<uint64_t>();
+    auto split_sizes = split_span.size();
+    num_outputs = narrow<uint64_t>(split_sizes);
+    for (size_t i = 0; i < split_sizes; i++) {
+      coreml_splitnd->add_splitsizes(split_span[i]);
+    }
+  } else if (node.SinceVersion() < 18) {
+    num_outputs = narrow<uint64_t>(node.OutputDefs().size());
+    coreml_splitnd->set_numsplits(num_outputs);
+  } else {
+    // note: for opset 18+ 'num_outputs' is a required attribute
+    num_outputs = narrow<uint64_t>(helper.GetInt("num_outputs").value());
+    // note: checked in IsOpSupportedImpl that ensures the dim value at splitting axis exists
+    auto split_dim_size = data_shape[HandleNegativeAxis(axis, data_shape.size())];
+    uint64_t chunk_size = narrow<uint64_t>((split_dim_size + num_outputs - 1) / num_outputs);
+    uint64_t remainder = split_dim_size % chunk_size;
+    if (remainder) {
+      // uneven
+      auto split_sizes = InlinedVector<uint64_t>(num_outputs, chunk_size);
+      split_sizes.back() = remainder;
+      for (size_t i = 0; i < split_sizes.size(); i++) {
+        coreml_splitnd->add_splitsizes(split_sizes[i]);
+      }
+    } else {
+      // even
+      coreml_splitnd->set_numsplits(num_outputs);
+    }
+  }
+
+  *layer->mutable_input()->Add() = node.InputDefs()[0]->Name();
+  // variadic number of outputs. Calculated based on the length of the given splitSizes if provided.
+  // Otherwise, uses attribute value 'num_outputs'.
+  for (uint64_t i = 0; i < num_outputs; i++) {
+    *layer->mutable_output()->Add() = node.OutputDefs()[i]->Name();
+  }
+  model_builder.AddLayer(std::move(layer));
+
+  return Status::OK();
+}
+
+#endif
+
+// Operator support related
+
+bool SplitOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
+                                       const logging::Logger& logger) const {
+  const auto& input_defs = node.InputDefs();
+  const auto& initializers = input_params.graph_viewer.GetAllInitializedTensors();
+
+  NodeAttrHelper helper(node);
+  const auto axis = helper.Get("axis", 0);
+
+  std::vector<int64_t> input_shape;
+  if (!GetShape(*input_defs[0], input_shape, logger))
+    return false;
+
+  const auto split_dims_at_axis = input_shape[HandleNegativeAxis(axis, input_shape.size())];
+  if (input_defs.size() > 1 && input_defs[1]->Exists()) {
+    if (!CheckIsConstantInitializer(*input_defs[1], input_params.graph_viewer, logger, "'split'")) {
+      return false;
+    }
+    const auto split_shape = *input_defs[1]->Shape();
+    if (split_shape.dim_size() < 2) {
+      LOGS(logger, VERBOSE) << "CoreML SplitND requires to produce at least 2 outputs.";
+      return false;
+    }
+    const auto& splits_tensor = *initializers.at(input_defs[1]->Name());
+    Initializer unpacked_tensor(splits_tensor);
+    auto splits_span = unpacked_tensor.DataAsSpan<uint64_t>();
+    int sum_of_splits = std::accumulate(splits_span.begin(), splits_span.end(), 0);
+    if (sum_of_splits != split_dims_at_axis) {
+      LOGS(logger, VERBOSE) << "Mismatch between the sum of 'split'. Expected: "
+                            << split_dims_at_axis
+                            << "Actual: "
+                            << sum_of_splits;
+      return false;
+    }
+    auto it = std::find(splits_span.begin(), splits_span.end(), 0);
+    if (it != splits_span.end()) {
+      LOGS(logger, VERBOSE) << "Invalid value in 'splits' input.";
+      return false;
+    }
+    if (split_dims_at_axis == -1) {
+      LOGS(logger, VERBOSE) << "Dim at the splitting axis is not allowed to be dynamic.";
+      return false;
+    }
+  } else {
+    if (node.SinceVersion() >= 18) {
+      const auto num_outputs = helper.GetInt("num_outputs");
+      if (!num_outputs.has_value()) {
+        LOGS(logger, VERBOSE) << "No 'num_outputs' provided. For split 18+, num_outputs is a required attribute.";
+        return false;
+      }
+      if (num_outputs.value() < 2) {
+        LOGS(logger, VERBOSE) << "Invalid num_outputs. The value cannot be lower than 2.\n"
+                              << "CoreML SplitND requires at least 2 outputs. num_outputs: " << num_outputs.value();
+        return false;
+      }
+      if (num_outputs.value() != static_cast<int32_t>(node.OutputDefs().size()) || num_outputs.value() > split_dims_at_axis) {
+        LOGS(logger, VERBOSE) << "Invalid num_outputs provided.\n."
+                              << "The value should be smaller or equal to the size of dimension being split. num_outputs: "
+                              << num_outputs.value();
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+void CreateSplitOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
+  op_registrations.builders.push_back(std::make_unique<SplitOpBuilder>());
+  op_registrations.op_builder_map.emplace(op_type, op_registrations.builders.back().get());
+}
+
+}  // namespace coreml
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/coreml/builders/op_builder_factory.cc b/onnxruntime/core/providers/coreml/builders/op_builder_factory.cc
index c1b09cec8a30a..2c06659852134 100644
--- a/onnxruntime/core/providers/coreml/builders/op_builder_factory.cc
+++ b/onnxruntime/core/providers/coreml/builders/op_builder_factory.cc
@@ -122,6 +122,14 @@ static OpBuilderRegistrations CreateOpBuilderRegistrations() {
     CreateSliceOpBuilder("Slice", op_registrations);
   }
 
+  {  // Softmax
+    CreateSoftmaxOpBuilder("Softmax", op_registrations);
+  }
+
+  {  // Split
+    CreateSplitOpBuilder("Split", op_registrations);
+  }
+
   return op_registrations;
 }
 
diff --git a/onnxruntime/core/providers/coreml/builders/op_builder_factory.h b/onnxruntime/core/providers/coreml/builders/op_builder_factory.h
index b2c8dc765d33d..d72420bcfff88 100644
--- a/onnxruntime/core/providers/coreml/builders/op_builder_factory.h
+++ b/onnxruntime/core/providers/coreml/builders/op_builder_factory.h
@@ -36,6 +36,8 @@ void CreateReshapeOpBuilder(const std::string& op_type, OpBuilderRegistrations&
 void CreateResizeOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
 void CreateShapeOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
 void CreateSliceOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
+void CreateSoftmaxOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
+void CreateSplitOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
 void CreateSqueezeOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
 void CreateTransposeOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
 void CreateUnaryOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm
index 60e0b1c061a43..155201ad4c39c 100644
--- a/onnxruntime/core/providers/coreml/model/model.mm
+++ b/onnxruntime/core/providers/coreml/model/model.mm
@@ -8,6 +8,7 @@
 
 #include <algorithm>
 #include <cstdint>
+#include <optional>
 #include <unordered_map>
 #include <vector>
 
@@ -31,6 +32,13 @@
 using namespace onnxruntime::coreml;
 
 namespace {
+// Converts a UTF8 const char* to an NSString. Throws on failure.
+NSString* _Nonnull Utf8StringToNSString(const char* utf8_str) {
+  NSString* result = [NSString stringWithUTF8String:utf8_str];
+  ORT_ENFORCE(result != nil, "NSString conversion failed.");
+  return result;
+}
+
 /**
  * Computes the static output shape used to allocate the output tensor.
  * `inferred_shape` is the inferred shape known at model compile time. It may contain dynamic dimensions (-1).
@@ -151,24 +159,79 @@ Status CreateInputFeatureProvider(const std::unordered_map<std::string, OnnxTens
                                                               deallocator:^(void* /* bytes */) {
                                                               }
                                                                     error:&error];
-    ORT_RETURN_IF(error != nil,
+    ORT_RETURN_IF(error != nil || multi_array == nil,
                   "Failed to create MLMultiArray for feature: ", name,
-                  ", error: ", [[error localizedDescription] UTF8String]);
+                  (error != nil) ? MakeString(", error: ", [[error localizedDescription] UTF8String]) : "");
 
     MLFeatureValue* feature_value = [MLFeatureValue featureValueWithMultiArray:multi_array];
-    NSString* feature_name = [NSString stringWithUTF8String:name.c_str()];
+    NSString* feature_name = Utf8StringToNSString(name.c_str());
     feature_dictionary[feature_name] = feature_value;
   }
 
   auto* feature_provider = [[MLDictionaryFeatureProvider alloc] initWithDictionary:feature_dictionary
                                                                              error:&error];
-  ORT_RETURN_IF(error != nil,
-                "Failed to create MLDictionaryFeatureProvider, error: ", [[error localizedDescription] UTF8String]);
+  ORT_RETURN_IF(error != nil || feature_provider == nil,
+                "Failed to create MLDictionaryFeatureProvider",
+                (error != nil) ? MakeString(", error: ", [[error localizedDescription] UTF8String]) : "");
 
   *feature_provider_out = feature_provider;
   conversion_buffers_out = std::move(conversion_buffers);
   return Status::OK();
 }
+
+bool IsArrayContiguous(const MLMultiArray* array) {
+  int64_t batch_stride = [array.strides[0] longLongValue];
+  const auto* shape = array.shape;
+  int64_t batch_elems = 1;
+  for (unsigned long i = 1; i < shape.count; i++) batch_elems *= [shape[i] longLongValue];
+  return batch_stride == batch_elems;
+}
+
+Status CopyMLMultiArrayBuffer(const void* mlmultiarray_buffer, void* tensor_buffer,
+                              const MLMultiArray* array_info,
+                              const OnnxTensorInfo* tensor_info,
+                              const std::optional<unsigned long> mlmultiarray_buffer_size) {
+  if (mlmultiarray_buffer == nullptr) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "mlmultiarray_buffer has no data");
+  }
+
+  const size_t num_elements = array_info.count;
+  const auto onnx_data_type = tensor_info->data_type;
+  switch (onnx_data_type) {
+    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: {
+      const auto output_data_byte_size = num_elements * sizeof(float);
+      ORT_RETURN_IF_NOT(!mlmultiarray_buffer_size || mlmultiarray_buffer_size == output_data_byte_size,
+                        "CoreML output buffer size and expected output size differ");
+      memcpy(tensor_buffer, mlmultiarray_buffer, output_data_byte_size);
+      break;
+    }
+    case ONNX_NAMESPACE::TensorProto_DataType_INT32: {
+      const auto output_data_byte_size = num_elements * sizeof(int32_t);
+      ORT_RETURN_IF_NOT(!mlmultiarray_buffer_size || mlmultiarray_buffer_size == output_data_byte_size,
+                        "CoreML output buffer size and expected output size differ");
+      memcpy(tensor_buffer, mlmultiarray_buffer, output_data_byte_size);
+      break;
+    }
+    // For this case, since Coreml Spec only uses int32 for model output while onnx provides
+    // int64 for model output data type. We are doing a type casting (int32 -> int64) here
+    // when copying the model to ORT
+    case ONNX_NAMESPACE::TensorProto_DataType_INT64: {
+      ORT_RETURN_IF_NOT(array_info.dataType == MLMultiArrayDataTypeInt32,
+                        "CoreML output data type is not MLMultiArrayDataTypeInt32");
+      ORT_RETURN_IF_NOT(!mlmultiarray_buffer_size || mlmultiarray_buffer_size == num_elements * sizeof(int32_t),
+                        "CoreML output buffer size and expected output size differ");
+      const auto model_output_span = gsl::span{static_cast<const int32_t*>(mlmultiarray_buffer), num_elements};
+      const auto output_span = gsl::span{static_cast<int64_t*>(tensor_buffer), num_elements};
+      std::transform(model_output_span.begin(), model_output_span.end(), output_span.begin(),
+                     [](int32_t v) { return static_cast<int64_t>(v); });
+      break;
+    }
+    default:
+      return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
+                             "Output data type is not supported, actual type: ", onnx_data_type);
+  }
+  return Status::OK();
+}
 }  // namespace
 
 NS_ASSUME_NONNULL_BEGIN
@@ -196,7 +259,7 @@ - (Status)predict:(const std::unordered_map<std::string, OnnxTensorData>&)inputs
                               get_output_tensor_mutable_raw_data_fn
     API_AVAILABLE_OS_VERSIONS;
 
-@property MLModel* model API_AVAILABLE_OS_VERSIONS;
+@property(nullable) MLModel* model API_AVAILABLE_OS_VERSIONS;
 
 @end
 
@@ -240,14 +303,17 @@ - (void)dealloc {
 }
 
 - (Status)loadModel {
-  NSError* error = nil;
   NSURL* modelUrl = [NSURL URLWithString:coreml_model_path_];
-  NSAssert(modelUrl != nil, @"modelUrl must not be nil");
+  if (modelUrl == nil) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create model URL from path");
+  }
+
+  NSError* error = nil;
   NSURL* compileUrl = [MLModel compileModelAtURL:modelUrl error:&error];
 
   if (error != nil) {
-    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Error compiling model ",
-                           [[error localizedDescription] cStringUsingEncoding:NSUTF8StringEncoding]);
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Error compiling model: ",
+                           [[error localizedDescription] UTF8String]);
   }
 
   compiled_model_path_ = [compileUrl path];
@@ -258,9 +324,9 @@ - (Status)loadModel {
                             : MLComputeUnitsAll;
   _model = [MLModel modelWithContentsOfURL:compileUrl configuration:config error:&error];
 
-  if (error != NULL) {
-    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Error Creating MLModel ",
-                           [[error localizedDescription] cStringUsingEncoding:NSUTF8StringEncoding]);
+  if (error != nil || _model == nil) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create MLModel",
+                           (error != nil) ? MakeString(", error: ", [[error localizedDescription] UTF8String]) : "");
   }
 
   return Status::OK();
@@ -272,7 +338,7 @@ - (Status)predict:(const std::unordered_map<std::string, OnnxTensorData>&)inputs
   Status status = Status::OK();
   ORT_TRY {
     if (_model == nil) {
-      return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "model is not loaded");
+      return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Model is not loaded");
     }
 
     id<MLFeatureProvider> input_features;
@@ -287,20 +353,20 @@ - (Status)predict:(const std::unordered_map<std::string, OnnxTensorData>&)inputs
 
     if (error != nil) {
       return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Error executing model: ",
-                             [[error localizedDescription] cStringUsingEncoding:NSUTF8StringEncoding]);
+                             [[error localizedDescription] UTF8String]);
     }
 
     for (const auto& [output_name, output_tensor_info] : outputs) {
       MLFeatureValue* output_value =
-          [output_features featureValueForName:[NSString stringWithUTF8String:output_name.c_str()]];
+          [output_features featureValueForName:Utf8StringToNSString(output_name.c_str())];
 
       if (output_value == nil) {
         return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "output_features has no value for ", output_name);
       }
 
-      auto* data = [output_value multiArrayValue];
+      MLMultiArray* data = [output_value multiArrayValue];
 
-      const auto coreml_static_output_shape = [&]() {
+      const auto coreml_static_output_shape = [data]() {
         InlinedVector<int64_t> result;
         result.reserve(data.shape.count);
         for (NSNumber* dim in data.shape) {
@@ -324,41 +390,21 @@ - (Status)predict:(const std::unordered_map<std::string, OnnxTensorData>&)inputs
                                  ") do not match");
         }
 
-        const void* model_output_buffer = data.dataPointer;
-
-        if (model_output_buffer == nullptr) {
-          return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "model_output_buffer has no data for ", output_name);
-        }
-
-        const auto onnx_data_type = output_tensor_info.data_type;
-        switch (onnx_data_type) {
-          case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: {
-            const auto output_data_byte_size = num_elements * sizeof(float);
-            memcpy(output_buffer, model_output_buffer, output_data_byte_size);
-            break;
-          }
-          case ONNX_NAMESPACE::TensorProto_DataType_INT32: {
-            const auto output_data_byte_size = num_elements * sizeof(int32_t);
-            memcpy(output_buffer, model_output_buffer, output_data_byte_size);
-            break;
-          }
-          // For this case, since Coreml Spec only uses int32 for model output while onnx provides
-          // int64 for model output data type. We are doing a type casting (int32 -> int64) here
-          // when copying the model to ORT
-          case ONNX_NAMESPACE::TensorProto_DataType_INT64: {
-            ORT_RETURN_IF_NOT(data.dataType == MLMultiArrayDataTypeInt32,
-                              "CoreML output data type is not MLMultiArrayDataTypeInt32");
-
-            const auto model_output_span = gsl::span{static_cast<const int32_t*>(model_output_buffer), num_elements};
-            const auto output_span = gsl::span{static_cast<int64_t*>(output_buffer), num_elements};
-            std::transform(model_output_span.begin(), model_output_span.end(), output_span.begin(),
-                           [](int32_t v) { return static_cast<int64_t>(v); });
-            break;
-          }
-          default:
-            return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
-                                   "Output data type is not supported, actual type: ", onnx_data_type);
+        ORT_RETURN_IF_NOT(IsArrayContiguous(data),
+                          "Non-contiguous output MLMultiArray is not currently supported");
+        __block Status copy_status;
+        const auto* tensor_info = &output_tensor_info;
+        // `getBytesWithHandler` replaces deprecated `.dataPointer` on new versions
+        if (@available(macOS 12.3, iOS 15.4, *)) {
+          [data getBytesWithHandler:^(const void* bytes, NSInteger size) {
+            copy_status = CopyMLMultiArrayBuffer(bytes, output_buffer, data, tensor_info, size);
+          }];
+        } else {
+          // disable size check as old API does not return buffer length
+          copy_status = CopyMLMultiArrayBuffer(data.dataPointer, output_buffer, data, tensor_info, std::nullopt);
         }
+        if (!copy_status.IsOK())
+          return copy_status;
       }
     }
   }
@@ -417,7 +463,7 @@ Status Predict(const std::unordered_map<std::string, OnnxTensorData>& inputs,
     return status;
   }
 
-  return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Execution::LoadModel requires macos 10.15+ or ios 13+ ");
+  return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Execution::LoadModel requires macos 10.15+ or ios 13+");
 }
 
 Status Execution::Predict(const std::unordered_map<std::string, OnnxTensorData>& inputs,
@@ -433,7 +479,7 @@ Status Predict(const std::unordered_map<std::string, OnnxTensorData>& inputs,
     }
   }
 
-  return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Execution::LoadModel requires macos 10.15+ or ios 13+ ");
+  return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Execution::Predict requires macos 10.15+ or ios 13+");
 }
 
 Model::Model(const std::string& path, const logging::Logger& logger, uint32_t coreml_flags)
diff --git a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc
index 18010960e11c8..4553e7ee18913 100644
--- a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc
+++ b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc
@@ -273,7 +273,7 @@ class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDoma
 
 // Opset 9
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 10, Compress);
-class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, ConstantOfShape);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 19, ConstantOfShape);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 12, MeanVarianceNormalization);
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 12, float, Greater);
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 12, double, Greater);
@@ -365,7 +365,7 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain,
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 10, Slice);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 11, Dropout);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 10, NonMaxSuppression);
-class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, IsInf);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 19, IsInf);
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 15, float, RoiAlign);
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 15, double, RoiAlign);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, ReverseSequence);
@@ -682,9 +682,9 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, Ga
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 15, ScatterND);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 15, ScatterElements);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 13, Identity);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, float, IsNaN);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, double, IsNaN);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, MLFloat16, IsNaN);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 19, float, IsNaN);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 19, double, IsNaN);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 19, MLFloat16, IsNaN);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, bool, NonZero);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, float, NonZero);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, int32_t, NonZero);
@@ -798,7 +798,7 @@ class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDoma
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 16, 18, If);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 16, float, RoiAlign);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 16, double, RoiAlign);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 16, float, GridSample);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 16, 19, float, GridSample);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 16, 17, ScatterElements);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 16, 17, ScatterND);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 16, string, Where);
@@ -958,6 +958,23 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain,
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 19, Scan);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 19, Shape);
 
+// Opset 20
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, ConstantOfShape);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, float, GridSample);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, double, GridSample);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, float, AffineGrid);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, double, AffineGrid);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, float, IsNaN);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, double, IsNaN);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, MLFloat16, IsNaN);
+#if !defined(DISABLE_FLOAT8_TYPES)
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, Float8E4M3FN, IsNaN);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, Float8E4M3FNUZ, IsNaN);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, Float8E5M2, IsNaN);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, Float8E5M2FNUZ, IsNaN);
+#endif
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, IsInf);
+
 // !!PLEASE READ BELOW!! Following that, add new entries above this comment
 
 /*  *** IMPORTANT! ***
@@ -1332,7 +1349,7 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) {
     // Opset 9
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 10,
                                                                     Compress)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, ConstantOfShape)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 19, ConstantOfShape)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 12,
                                                                     MeanVarianceNormalization)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 12, float,
@@ -1489,7 +1506,7 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) {
                                                                     Dropout)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 10,
                                                                     NonMaxSuppression)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, IsInf)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 19, IsInf)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 15, float,
                                                                           RoiAlign)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 15, double,
@@ -1978,12 +1995,12 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) {
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 15, ScatterElements)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 15, ScatterND)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 13, Identity)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, float,
-                                                                IsNaN)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, double,
-                                                                IsNaN)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, MLFloat16,
-                                                                IsNaN)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 19, float,
+                                                                          IsNaN)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 19, double,
+                                                                          IsNaN)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 19, MLFloat16,
+                                                                          IsNaN)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, bool,
                                                                 NonZero)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, float,
@@ -2168,8 +2185,8 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) {
                                                                 RoiAlign)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 16, double,
                                                                 RoiAlign)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 16, float,
-                                                                GridSample)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 16, 19, float,
+                                                                          GridSample)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 16, 17, ScatterElements)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 16, 17, ScatterND)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 16, string, Where)>,
@@ -2383,6 +2400,23 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) {
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 19, uint8_t, Resize)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 19, Scan)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 19, Shape)>,
+
+    // Opset 20
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, ConstantOfShape)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, float, GridSample)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, double, GridSample)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, float, AffineGrid)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, double, AffineGrid)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, float, IsNaN)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, double, IsNaN)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, MLFloat16, IsNaN)>,
+#if !defined(DISABLE_FLOAT8_TYPES)
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, Float8E4M3FN, IsNaN)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, Float8E4M3FNUZ, IsNaN)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, Float8E5M2, IsNaN)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, Float8E5M2FNUZ, IsNaN)>,
+#endif
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, IsInf)>,
   };
 
   for (auto& function_table_entry : function_table) {
diff --git a/onnxruntime/core/providers/cpu/cpu_provider_shared.cc b/onnxruntime/core/providers/cpu/cpu_provider_shared.cc
index ddcc04cf4a45c..9c55d37f550f4 100644
--- a/onnxruntime/core/providers/cpu/cpu_provider_shared.cc
+++ b/onnxruntime/core/providers/cpu/cpu_provider_shared.cc
@@ -115,6 +115,12 @@ struct ProviderHostCPUImpl : ProviderHostCPU {
   Status PrepareOutputShape(const Tensor* indices, const int64_t depth_val, const int64_t axis, int64_t& prefix_dim_size, int64_t& suffix_dim_size, TensorShapeVector& output_shape) override { return onnxruntime::PrepareOutputShape(indices, depth_val, axis, prefix_dim_size, suffix_dim_size, output_shape); }
 
   // From cpu/tensor/slice.h (direct)
+  Status SliceBase__FlattenOutputDims(gsl::span<const int64_t> input_dimensions, gsl::span<const int64_t> output_dims,
+                                      TensorShapeVector& starts, TensorShapeVector& ends, TensorShapeVector& steps,
+                                      TensorShapeVector*& p_flattened_input_dims, TensorShapeVector*& p_flattened_output_dims) override {
+    return SliceBase::FlattenOutputDims(input_dimensions, output_dims, starts, ends, steps, p_flattened_input_dims, p_flattened_output_dims);
+  }
+
   Status SliceBase__PrepareForCompute(gsl::span<const int64_t> raw_starts,
                                       gsl::span<const int64_t> raw_ends,
                                       gsl::span<const int64_t> raw_axes,
@@ -239,6 +245,26 @@ struct ProviderHostCPUImpl : ProviderHostCPU {
                                                                             subgraph_session_state);
   }
 
+  Status WhisperBeamSearch__Compute(const contrib::transformers::WhisperBeamSearch* p, OpKernelContext* ctx) override {
+    return p->contrib::transformers::WhisperBeamSearch::Compute(ctx);
+  }
+
+  void BeamSearchParameters__ParseFromAttributes(contrib::transformers::BeamSearchParameters* p, const OpKernelInfo& info) override {
+    p->contrib::transformers::BeamSearchParameters::ParseFromAttributes(info);
+  }
+
+  void GreedySearchParameters__ParseFromAttributes(contrib::transformers::GreedySearchParameters* p, const OpKernelInfo& info) override {
+    p->contrib::transformers::GreedySearchParameters::ParseFromAttributes(info);
+  }
+
+  void SamplingParameters__ParseFromAttributes(contrib::transformers::SamplingParameters* p, const OpKernelInfo& info) override {
+    p->contrib::transformers::SamplingParameters::ParseFromAttributes(info);
+  }
+
+  void WhisperBeamSearchParameters__ParseFromAttributes(contrib::transformers::WhisperBeamSearchParameters* p, const OpKernelInfo& info) override {
+    p->contrib::transformers::WhisperBeamSearchParameters::ParseFromAttributes(info);
+  }
+
   void GreedySearch__Init(contrib::transformers::GreedySearch* p, const OpKernelInfo& info) override {
     p->contrib::transformers::GreedySearch::Init(info);
   }
diff --git a/onnxruntime/core/providers/cpu/cpu_provider_shared.h b/onnxruntime/core/providers/cpu/cpu_provider_shared.h
index 7d4620f0039eb..8dee1cd620282 100644
--- a/onnxruntime/core/providers/cpu/cpu_provider_shared.h
+++ b/onnxruntime/core/providers/cpu/cpu_provider_shared.h
@@ -8,8 +8,13 @@ class LongformerAttentionBase;
 class AttentionBase;
 namespace transformers {
 class BeamSearch;
+class WhisperBeamSearch;
 class GreedySearch;
 class Sampling;
+struct BeamSearchParameters;
+struct GreedySearchParameters;
+struct SamplingParameters;
+struct WhisperBeamSearchParameters;
 }  // namespace transformers
 }  // namespace contrib
 
@@ -63,6 +68,10 @@ struct ProviderHostCPU {
   virtual Status PrepareOutputShape(const Tensor* indices, const int64_t depth_val, const int64_t axis, int64_t& prefix_dim_size, int64_t& suffix_dim_size, TensorShapeVector& output_shape) = 0;
 
   // From cpu/tensor/slice.h
+  virtual Status SliceBase__FlattenOutputDims(gsl::span<const int64_t> input_dimensions, gsl::span<const int64_t> output_dims,
+                                              TensorShapeVector& starts, TensorShapeVector& ends, TensorShapeVector& steps,
+                                              TensorShapeVector*& p_flattened_input_dims, TensorShapeVector*& p_flattened_output_dims) = 0;
+
   virtual Status SliceBase__PrepareForCompute(gsl::span<const int64_t> raw_starts,
                                               gsl::span<const int64_t> raw_ends,
                                               gsl::span<const int64_t> raw_axes,
@@ -165,6 +174,15 @@ struct ProviderHostCPU {
                                                         const SessionState& session_state,
                                                         const std::string& attribute_name,
                                                         const SessionState& subgraph_session_state) = 0;
+  virtual Status WhisperBeamSearch__Compute(const contrib::transformers::WhisperBeamSearch* p, OpKernelContext* ctx) = 0;
+
+  virtual void BeamSearchParameters__ParseFromAttributes(contrib::transformers::BeamSearchParameters* p, const OpKernelInfo& info) = 0;
+
+  virtual void GreedySearchParameters__ParseFromAttributes(contrib::transformers::GreedySearchParameters* p, const OpKernelInfo& info) = 0;
+
+  virtual void SamplingParameters__ParseFromAttributes(contrib::transformers::SamplingParameters* p, const OpKernelInfo& info) = 0;
+
+  virtual void WhisperBeamSearchParameters__ParseFromAttributes(contrib::transformers::WhisperBeamSearchParameters* p, const OpKernelInfo& info) = 0;
 
   // GreedySearch
   virtual void GreedySearch__Init(contrib::transformers::GreedySearch* p, const OpKernelInfo& info) = 0;
diff --git a/onnxruntime/core/providers/cpu/generator/constant_of_shape.cc b/onnxruntime/core/providers/cpu/generator/constant_of_shape.cc
index 920db5ed34dd1..a93da12ccf595 100644
--- a/onnxruntime/core/providers/cpu/generator/constant_of_shape.cc
+++ b/onnxruntime/core/providers/cpu/generator/constant_of_shape.cc
@@ -11,11 +11,16 @@ ORT_SPECIFY_OP_KERNEL_ARG_DEFAULT_TYPE_LIST_ALL_OPSETS(
     kCpuExecutionProvider, kOnnxDomain, ConstantOfShape, Output, 0,
     ConstantOfShapeDefaultOutputTypes);
 
+ORT_SPECIFY_OP_KERNEL_ARG_DEFAULT_TYPE_LIST(
+    kCpuExecutionProvider, kOnnxDomain, ConstantOfShape, 20, Output, 0,
+    ConstantOfShapeDefaultOutputTypesOpset20);
+
 // pytorch converter uses ConstantOfShape with int64 to create Pad input
 // https://github.com/pytorch/pytorch/blob/044b519a80459f6787f6723c1c091a18b153d184/torch/onnx/symbolic_opset11.py#L449
 ORT_SPECIFY_OP_KERNEL_ARG_REQUIRED_TYPES_ALL_OPSETS(
     kCpuExecutionProvider, kOnnxDomain, ConstantOfShape, Output, 0,
     int64_t);
+
 }  // namespace op_kernel_type_control
 
 namespace {
@@ -24,6 +29,10 @@ using EnabledOutputTypes =
     ORT_OP_KERNEL_ARG_ENABLED_TYPE_LIST_ALL_OPSETS(
         kCpuExecutionProvider, kOnnxDomain, ConstantOfShape, Output, 0);
 
+using EnabledOutputTypesOpset20 =
+    ORT_OP_KERNEL_ARG_ENABLED_TYPE_LIST(
+        kCpuExecutionProvider, kOnnxDomain, ConstantOfShape, 20, Output, 0);
+
 class ConstantOfShape final : public ConstantOfShapeBase<EnabledOutputTypes>, public OpKernel {
  public:
   explicit ConstantOfShape(const OpKernelInfo& info) : ConstantOfShapeBase(info), OpKernel(info) {}
@@ -66,13 +75,22 @@ Status ConstantOfShape::Compute(OpKernelContext* ctx) const {
 
 }  // namespace
 
-ONNX_CPU_OPERATOR_KERNEL(
+ONNX_CPU_OPERATOR_VERSIONED_KERNEL(
     ConstantOfShape,
     9,
+    19,
     KernelDefBuilder()
         .TypeConstraint("T1", DataTypeImpl::GetTensorType<int64_t>())
         .TypeConstraint("T2",
                         BuildKernelDefConstraintsFromTypeList<EnabledOutputTypes>()),
     ConstantOfShape);
 
+ONNX_CPU_OPERATOR_KERNEL(
+    ConstantOfShape,
+    20,
+    KernelDefBuilder()
+        .TypeConstraint("T1", DataTypeImpl::GetTensorType<int64_t>())
+        .TypeConstraint("T2",
+                        BuildKernelDefConstraintsFromTypeList<EnabledOutputTypesOpset20>()),
+    ConstantOfShape);
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/cpu/generator/constant_of_shape_base.h b/onnxruntime/core/providers/cpu/generator/constant_of_shape_base.h
index d96ff06e3d6d8..9aa73c714daea 100644
--- a/onnxruntime/core/providers/cpu/generator/constant_of_shape_base.h
+++ b/onnxruntime/core/providers/cpu/generator/constant_of_shape_base.h
@@ -23,6 +23,18 @@ using ConstantOfShapeDefaultOutputTypes =
         uint8_t, uint16_t, uint32_t, uint64_t,
         bool>;
 
+using ConstantOfShapeDefaultOutputTypesOpset20 =
+    TypeList<
+        BFloat16,
+        MLFloat16,
+        float, double,
+#if !defined(DISABLE_FLOAT8_TYPES)
+        Float8E4M3FN, Float8E4M3FNUZ, Float8E5M2, Float8E5M2FNUZ,
+#endif
+        int8_t, int16_t, int32_t, int64_t,
+        uint8_t, uint16_t, uint32_t, uint64_t,
+        bool>;
+
 template <typename EnabledOutputTypeList = ConstantOfShapeDefaultOutputTypes>
 class ConstantOfShapeBase {
  protected:
diff --git a/onnxruntime/core/providers/cpu/generator/random.cc b/onnxruntime/core/providers/cpu/generator/random.cc
index b63c0d2161ad5..dfa27f1f44d5a 100644
--- a/onnxruntime/core/providers/cpu/generator/random.cc
+++ b/onnxruntime/core/providers/cpu/generator/random.cc
@@ -428,4 +428,14 @@ template Status MultinomialComputeShared<int64_t>(AllocatorPtr& alloc,
                                                   std::default_random_engine& generator,
                                                   Tensor& Y);
 
+#if !defined(DISABLE_CONTRIB_OPS)
+// used by onnxruntime/contrib_ops/cpu/transformers/sampling_cpu_helper.h
+template Status MultinomialComputeShared<int32_t>(AllocatorPtr& alloc,
+                                                  const Tensor& X,
+                                                  const int64_t batch_size,
+                                                  const int64_t num_classes,
+                                                  const int64_t num_samples,
+                                                  std::default_random_engine& generator,
+                                                  Tensor& Y);
+#endif
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/cpu/math/element_wise_ops.cc b/onnxruntime/core/providers/cpu/math/element_wise_ops.cc
index 3192c8573c5c0..1d524a90302e7 100644
--- a/onnxruntime/core/providers/cpu/math/element_wise_ops.cc
+++ b/onnxruntime/core/providers/cpu/math/element_wise_ops.cc
@@ -967,7 +967,7 @@ Status Xor::Compute(OpKernelContext* context) const {
       },
       [](BroadcastHelper& per_iter_bh) {
         per_iter_bh.OutputEigen<bool>() =
-            per_iter_bh.EigenInput0<bool>().array() ^ per_iter_bh.EigenInput1<bool>().array();
+            per_iter_bh.EigenInput0<bool>().array() != per_iter_bh.EigenInput1<bool>().array();
       }};
 
   UntypedBroadcastTwo(*context, funcs, 1.0);
diff --git a/onnxruntime/core/providers/cpu/nn/batch_norm_helper.h b/onnxruntime/core/providers/cpu/nn/batch_norm_helper.h
index 8507d87fd2442..a5d46aff83b50 100644
--- a/onnxruntime/core/providers/cpu/nn/batch_norm_helper.h
+++ b/onnxruntime/core/providers/cpu/nn/batch_norm_helper.h
@@ -1,4 +1,5 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
 // Licensed under the MIT License.
 
 #pragma once
@@ -22,11 +23,17 @@ class BatchNormHelper {
                                        const Tensor* B,
                                        const Tensor* mean,
                                        const Tensor* var,
-                                       bool is_spatial = true) {
+                                       bool is_spatial = true,
+                                       bool is_nhwc = false) {
     const auto& x_dims = X->Shape().GetDims();
 
     // If x_dims size < 2, num_channels defaults to 1.
-    int64_t num_channels = x_dims.size() > 1 ? x_dims[1] : 1;
+    int64_t num_channels;
+    if (is_nhwc) {
+      num_channels = x_dims.size() > 1 ? x_dims[x_dims.size() - 1] : 1;
+    } else {
+      num_channels = x_dims.size() > 1 ? x_dims[1] : 1;
+    }
     // the first 2 are respectively - N and C.
     int num_feature_dims = x_dims.size() > 1 ? static_cast<int>(x_dims.size() - 2) : 0;
 
@@ -109,7 +116,7 @@ class BatchNormHelper {
     return common::Status::OK();
   }
 
-  static void NormalizeDims(const TensorShape& x_shape, std::vector<int64_t>& new_dims) {
+  static void NormalizeDims(const TensorShape& x_shape, std::vector<int64_t>& new_dims, bool is_nhwc = false) {
     new_dims.clear();
     auto orig_dims = x_shape.GetDims();
     ORT_ENFORCE(orig_dims.size() < 6,
@@ -122,13 +129,19 @@ class BatchNormHelper {
 
     auto rank = x_shape.NumDimensions();
     auto num_samples = rank > 0 ? orig_dims[0] : 1;  // NCHW
-    auto num_channels = rank > 1 ? orig_dims[1] : 1;
-    auto height = rank > 2 ? orig_dims[2] : 1;
+    const size_t channel_dim = is_nhwc ? rank - 1 : 1;
+    const size_t height_dim = is_nhwc ? 1 : 2;
+    auto num_channels = rank > 1 ? orig_dims[channel_dim] : 1;
+    auto height = rank > 2 ? orig_dims[height_dim] : 1;
     int64_t width = 1;
-    new_dims = {num_samples, num_channels, height, width};
+    if (is_nhwc) {
+      new_dims = {num_samples, height, width, num_channels};
+    } else {
+      new_dims = {num_samples, num_channels, height, width};
+    }
   }
 };
 }  // namespace onnxruntime
 #if defined(_MSC_VER) && !defined(__clang__)
 #pragma warning(pop)
-#endif
\ No newline at end of file
+#endif
diff --git a/onnxruntime/core/providers/cpu/nn/conv_transpose_attributes.h b/onnxruntime/core/providers/cpu/nn/conv_transpose_attributes.h
index a4d67ec63f0c2..4b3b934834ac8 100644
--- a/onnxruntime/core/providers/cpu/nn/conv_transpose_attributes.h
+++ b/onnxruntime/core/providers/cpu/nn/conv_transpose_attributes.h
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 /* Modifications Copyright (c) Microsoft. */
+// Copyright (c) 2023 NVIDIA Corporation.
 
 #pragma once
 
@@ -44,17 +45,19 @@ struct ConvTransposeAttributes : public ConvAttributes {
   };
 
   Status PrepareForCompute(OpKernelContext* context, bool has_bias, Prepare& p,
-                           bool dynamic_padding = false, const TensorShape* filter_shape = nullptr) const {
+                           bool dynamic_padding = false, const TensorShape* filter_shape = nullptr,
+                           bool is_nhwc = false) const {
     const Tensor* X = context->Input<Tensor>(0);
     const Tensor* F = (filter_shape != nullptr) ? nullptr : context->Input<Tensor>(1);
     const TensorShape& F_Shape = (filter_shape != nullptr) ? *filter_shape : F->Shape();
     const Tensor* Pads = dynamic_padding ? context->Input<Tensor>(2) : nullptr;
     const Tensor* B = has_bias ? (dynamic_padding ? context->Input<Tensor>(3) : context->Input<Tensor>(2)) : nullptr;
-    TensorShape input_shape = X->Shape().Slice(2);
 
-    const int64_t num_input_channels = X->Shape()[1];
+    const int rank = static_cast<int>(X->Shape().NumDimensions());
+    TensorShape input_shape = X->Shape().Slice(is_nhwc ? 1 : 2, is_nhwc ? rank - 1 : rank);
+    const int64_t num_input_channels = is_nhwc ? X->Shape()[rank - 1] : X->Shape()[1];
     const int64_t N = X->Shape()[0];
-    const int64_t num_output_channels_multiplier = F_Shape[1];
+    const int64_t num_output_channels_multiplier = is_nhwc ? F_Shape[3] : F_Shape[1];
     const int64_t num_output_channels = num_output_channels_multiplier * group;
 
     // input validations
@@ -85,7 +88,7 @@ struct ConvTransposeAttributes : public ConvAttributes {
     }
 
     TensorShapeVector kernel_shape;
-    ORT_RETURN_IF_ERROR(ComputeKernelShape(F_Shape, kernel_shape));
+    ORT_RETURN_IF_ERROR(ComputeKernelShape(F_Shape, kernel_shape, is_nhwc));
 
     TensorShapeVector local_output_padding(output_padding);
     if (local_output_padding.empty()) {
@@ -115,7 +118,7 @@ struct ConvTransposeAttributes : public ConvAttributes {
     TensorShapeVector Y_dims;
 
     ComputePadsAndOutputShape(input_shape, num_output_channels, kernel_shape,
-                              local_strides, local_dilations, local_output_padding, N, &local_pads, &Y_dims);
+                              local_strides, local_dilations, local_output_padding, N, &local_pads, &Y_dims, is_nhwc);
     TensorShape Yshape(Y_dims);
     Tensor* Y = context->Output(0, Yshape);
 
@@ -137,9 +140,14 @@ struct ConvTransposeAttributes : public ConvAttributes {
   void ComputePadsAndOutputShape(TensorShape input_shape, int64_t output_channel,
                                  const TensorShapeVector& kernel_shape, const TensorShapeVector& p_strides,
                                  const TensorShapeVector& p_dilations, const TensorShapeVector& p_output_padding, const int64_t N,
-                                 ConvPadVector* p_pads, TensorShapeVector* output_shape_p) const {
+                                 ConvPadVector* p_pads, TensorShapeVector* output_shape_p,
+                                 bool is_nhwc = false) const {
     size_t output_shape_size = output_shape.size();
-    output_shape_p->insert(output_shape_p->begin(), {N, output_channel});
+    if (is_nhwc) {
+      output_shape_p->insert(output_shape_p->begin(), {N});
+    } else {
+      output_shape_p->insert(output_shape_p->begin(), {N, output_channel});
+    }
 
     size_t rank = input_shape.NumDimensions();
     for (size_t dim = 0; dim < rank; ++dim) {
@@ -163,6 +171,9 @@ struct ConvTransposeAttributes : public ConvAttributes {
       ORT_ENFORCE(dim_size > 0, "Invalid input shape: ", input_shape.ToString());
       output_shape_p->push_back(dim_size);
     }
+    if (is_nhwc) {
+      output_shape_p->push_back(output_channel);
+    }
   }
 
   TensorShapeVector output_padding;
diff --git a/onnxruntime/core/providers/cpu/nn/instance_norm_helper.h b/onnxruntime/core/providers/cpu/nn/instance_norm_helper.h
index 48e54ac7eeefb..9a2a710fd291a 100644
--- a/onnxruntime/core/providers/cpu/nn/instance_norm_helper.h
+++ b/onnxruntime/core/providers/cpu/nn/instance_norm_helper.h
@@ -1,4 +1,5 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
 // Licensed under the MIT License.
 
 #pragma once
@@ -8,13 +9,16 @@
 #include "core/framework/tensor.h"
 #endif
 #include <sstream>
+#include <utility>
 
 namespace onnxruntime {
 
 class InstanceNormHelper {
  public:
-  static common::Status ValidateInputs(const Tensor* input, const Tensor* scale, const Tensor* B) {
-    if (input->Shape().NumDimensions() < 3) {
+  static common::Status ValidateInputs(const Tensor* input, const Tensor* scale, const Tensor* B,
+                                       bool is_nhwc = false) {
+    const auto rank = input->Shape().NumDimensions();
+    if (rank < 3) {
       std::ostringstream ostr;
       ostr << "Invalid input data: number of dimensions is less than 3: " << input->Shape().NumDimensions();
       return common::Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, ostr.str());
@@ -24,10 +28,13 @@ class InstanceNormHelper {
       ostr << "Invalid input scale: number of dimensions is not 1: " << scale->Shape().NumDimensions();
       return common::Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, ostr.str());
     }
-    if (scale->Shape().Size() != input->Shape().GetDims()[1]) {
+    auto in_dims = input->Shape().GetDims();
+    auto in_channels = is_nhwc ? in_dims[rank - 1] : in_dims[1];
+
+    if (scale->Shape().Size() != in_channels) {
       std::ostringstream ostr;
-      ostr << "Mismatch between input data and scale: size of scale != input channel count "
-           << scale->Shape().Size() << " vs. " << input->Shape().GetDims()[1];
+      ostr << "Mismatch between input data and scale: size of scale != input channel count " << scale->Shape().Size()
+           << " vs. " << in_channels;
       return common::Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, ostr.str());
     }
 
@@ -37,10 +44,10 @@ class InstanceNormHelper {
       return common::Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, ostr.str());
     }
 
-    if (B->Shape().Size() != input->Shape().GetDims()[1]) {
+    if (B->Shape().Size() != in_channels) {
       std::ostringstream ostr;
-      ostr << "Mismatch between input data and B: size of B != input channel count "
-           << B->Shape().Size() << " vs. " << input->Shape().GetDims()[1];
+      ostr << "Mismatch between input data and B: size of B != input channel count " << B->Shape().Size() << " vs. "
+           << in_channels;
       return common::Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, ostr.str());
     }
 
diff --git a/onnxruntime/core/providers/cpu/nn/pool_attributes.h b/onnxruntime/core/providers/cpu/nn/pool_attributes.h
index 54f41f09f4b24..118cb4a3ba4bd 100644
--- a/onnxruntime/core/providers/cpu/nn/pool_attributes.h
+++ b/onnxruntime/core/providers/cpu/nn/pool_attributes.h
@@ -1,4 +1,5 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
 // Licensed under the MIT License.
 
 #pragma once
@@ -98,28 +99,34 @@ struct PoolAttributes {
 
   TensorShapeVector SetOutputSize(const TensorShape& input_shape,
                                   int64_t output_channel,
-                                  TensorShapeVector* actual_pads) const {
+                                  TensorShapeVector* actual_pads,
+                                  bool is_nhwc = false) const {
     ORT_ENFORCE(input_shape.Size() > 0 || input_shape[0] == 0,
                 "Invalid input shape. Only N can be zero. Got:", input_shape);
     TensorShapeVector output_dims;
     int64_t N = input_shape[0];
-    InferOutputSize(input_shape.GetDims(), &output_dims, actual_pads);
-
-    output_dims.insert(output_dims.begin(), {N, output_channel});
-
+    InferOutputSize(input_shape.GetDims(), &output_dims, actual_pads, is_nhwc);
+    if (is_nhwc) {
+      output_dims.insert(output_dims.begin(), N);
+      output_dims.push_back(output_channel);
+    } else {
+      output_dims.insert(output_dims.begin(), {N, output_channel});
+    }
     return output_dims;
   }
 
   void InferOutputSize(gsl::span<const int64_t> input_dims,
                        TensorShapeVector* output_dims,
-                       TensorShapeVector* actual_pads) const {
+                       TensorShapeVector* actual_pads,
+                       bool is_nhwc = false) const {
     ORT_ENFORCE(input_dims.size() >= 2);
     if (global_pooling) {
       output_dims->assign(input_dims.size() - 2, 1);
     } else {
       for (size_t dim = 0; dim < input_dims.size() - 2; ++dim) {
         int64_t dim_size = 0;
-        ComputeSizePadDilations(static_cast<int>(input_dims[dim + 2]),
+        auto spatial_dim = is_nhwc ? input_dims[dim + 1] : input_dims[dim + 2];
+        ComputeSizePadDilations(static_cast<int>(spatial_dim),
                                 strides[dim],
                                 kernel_shape[dim],
                                 &actual_pads->at(dim),
diff --git a/onnxruntime/core/providers/cpu/nn/tfidfvectorizer.cc b/onnxruntime/core/providers/cpu/nn/tfidfvectorizer.cc
index f36b75c508da0..eb245a4c9ba0c 100644
--- a/onnxruntime/core/providers/cpu/nn/tfidfvectorizer.cc
+++ b/onnxruntime/core/providers/cpu/nn/tfidfvectorizer.cc
@@ -141,14 +141,11 @@ struct TfIdfVectorizer::Impl {
   Impl(const Impl&) = delete;
   Impl& operator=(const Impl&) = delete;
 
-  void IncrementCount(size_t ngram_id, size_t row_num,
-                      std::vector<uint32_t>& frequencies) const {
+  inline size_t OutputIdToIncrement(size_t ngram_id) const {
     assert(ngram_id != 0);
     --ngram_id;
     assert(ngram_id < ngram_indexes_.size());
-    size_t output_idx = row_num * output_size_ + SafeInt<size_t>(ngram_indexes_[ngram_id]);
-    assert(output_idx < frequencies.size());
-    ++frequencies[output_idx];
+    return SafeInt<size_t>(ngram_indexes_[ngram_id]);
   }
 };
 
@@ -252,77 +249,17 @@ TfIdfVectorizer::TfIdfVectorizer(const OpKernelInfo& info) : OpKernel(info), imp
 
 TfIdfVectorizer::~TfIdfVectorizer() = default;
 
-void TfIdfVectorizer::OutputResult(OpKernelContext* ctx, size_t B, const std::vector<uint32_t>& frequences) const {
-  const Impl& impl = *impl_;
-  std::vector<int64_t> output_dims;
-  if (B == 0) {
-    output_dims.push_back(impl.output_size_);
-    B = 1;  // For use in the loops below
-  } else {
-    output_dims.push_back(B);
-    output_dims.push_back(impl.output_size_);
-  }
-
-  const auto row_size = impl.output_size_;
-
-  TensorShape output_shape(output_dims);
-  assert(frequences.size() == static_cast<size_t>(output_shape.Size()));
-
-  auto Y = ctx->Output(0, output_shape);
-  auto output_data = Y->MutableData<float>();
-  const auto& w = impl.weights_;
-  switch (impl.weighting_criteria_) {
-    case kTF: {
-      for (auto f : frequences) {
-        *output_data++ = static_cast<float>(f);
-      }
-    } break;
-    case kIDF: {
-      if (!w.empty()) {
-        const auto* freqs = frequences.data();
-        for (size_t batch = 0; batch < B; ++batch) {
-          for (size_t i = 0; i < row_size; ++i) {
-            *output_data++ = (*freqs++ > 0) ? w[i] : 0;
-          }
-        }
-      } else {
-        for (auto f : frequences) {
-          *output_data++ = (f > 0) ? 1.0f : 0;
-        }
-      }
-    } break;
-    case kTFIDF: {
-      if (!w.empty()) {
-        const auto* freqs = frequences.data();
-        for (size_t batch = 0; batch < B; ++batch) {
-          for (size_t i = 0; i < row_size; ++i) {
-            *output_data++ = *freqs++ * w[i];
-          }
-        }
-      } else {
-        for (auto f : frequences) {
-          *output_data++ = static_cast<float>(f);
-        }
-      }
-    } break;
-    case kNone:  // fall-through
-    default:
-      assert(false);
-  }
-}
-
-void TfIdfVectorizer::ComputeImpl(OpKernelContext* ctx, ptrdiff_t row_num, size_t row_size,
-                                  std::vector<uint32_t>& frequencies) const {
-  auto X = ctx->Input<Tensor>(0);
-  const auto elem_size = X->DataType()->Size();
-
-  const void* const row_begin = AdvanceElementPtr(X->DataRaw(), row_num * row_size, elem_size);
+void TfIdfVectorizer::ComputeImpl(const void* x_data_raw, size_t elem_size, ptrdiff_t row_num, size_t row_size,
+                                  bool is_input_string, gsl::span<float> output_data,
+                                  std::function<void(size_t, gsl::span<float>&)>& fn_weight) const {
+  const void* const row_begin = AdvanceElementPtr(x_data_raw, row_num * row_size, elem_size);
   const void* const row_end = AdvanceElementPtr(row_begin, row_size, elem_size);
 
   const auto& impl = *impl_;
   const auto max_gram_length = impl.max_gram_length_;
   const auto max_skip_distance = impl.max_skip_count_ + 1;  // Convert to distance
   auto start_ngram_size = impl.min_gram_length_;
+  size_t output_idx;
 
   for (auto skip_distance = 1; skip_distance <= max_skip_distance; ++skip_distance) {
     auto ngram_start = row_begin;
@@ -336,7 +273,7 @@ void TfIdfVectorizer::ComputeImpl(OpKernelContext* ctx, ptrdiff_t row_num, size_
       }
 
       auto ngram_item = ngram_start;
-      if (X->IsDataTypeString()) {
+      if (is_input_string) {
         const std::string* str_item = reinterpret_cast<const std::string*>(ngram_item);
         const StrMap* str_map = &impl.str_map_;
         for (auto ngram_size = 1;
@@ -349,7 +286,8 @@ void TfIdfVectorizer::ComputeImpl(OpKernelContext* ctx, ptrdiff_t row_num, size_
             break;
           }
           if (ngram_size >= start_ngram_size && hit->second->id_ != 0) {
-            impl.IncrementCount(hit->second->id_, row_num, frequencies);
+            output_idx = impl.OutputIdToIncrement(hit->second->id_);
+            fn_weight(output_idx, output_data);
           }
           str_map = &hit->second->leafs_;
         }
@@ -360,13 +298,14 @@ void TfIdfVectorizer::ComputeImpl(OpKernelContext* ctx, ptrdiff_t row_num, size_
              ngram_size <= max_gram_length &&
              ngram_item < ngram_row_end;
              ++ngram_size, ngram_item = AdvanceElementPtr(ngram_item, skip_distance, elem_size)) {
-          int64_t val = (X->IsDataType<int32_t>()) ? int64_t{*reinterpret_cast<const int32_t*>(ngram_item)} : *reinterpret_cast<const int64_t*>(ngram_item);
+          int64_t val = (elem_size == 4) ? int64_t{*reinterpret_cast<const int32_t*>(ngram_item)} : *reinterpret_cast<const int64_t*>(ngram_item);
           auto hit = int_map->find(val);
           if (hit == int_map->end()) {
             break;
           }
           if (ngram_size >= start_ngram_size && hit->second->id_ != 0) {
-            impl.IncrementCount(hit->second->id_, row_num, frequencies);
+            output_idx = impl.OutputIdToIncrement(hit->second->id_);
+            fn_weight(output_idx, output_data);
           }
           int_map = &hit->second->leafs_;
         }
@@ -412,31 +351,76 @@ Status TfIdfVectorizer::Compute(OpKernelContext* ctx) const {
   }
 
   assert((num_rows * C) == total_items);
-  // Frequency holder allocate [B..output_size_]
-  // and init all to zero
-  std::vector<uint32_t> frequencies;
-  frequencies.resize(num_rows * impl_->output_size_, 0);
+  const Impl& impl = *impl_;
+  TensorShapeVector output_dims;
+  if (B == 0) {
+    output_dims.push_back(impl.output_size_);
+    B = 1;  // For use in the loops below
+  } else {
+    output_dims.push_back(B);
+    output_dims.push_back(impl.output_size_);
+  }
+  TensorShape output_shape(output_dims);
+
+  auto Y = ctx->Output(0, output_shape);
+  auto output_data = Y->MutableData<float>();
+  const bool is_input_string = X->IsDataTypeString();
 
   if (total_items == 0 ||
-      (X->IsDataTypeString() && impl_->str_map_.empty()) ||
+      (is_input_string && impl_->str_map_.empty()) ||
       ((X->IsDataType<int32_t>() || X->IsDataType<int64_t>()) && impl_->int64_map_.empty())) {
     // TfidfVectorizer may receive an empty input when it follows a Tokenizer
     // (for example for a string containing only stopwords).
     // TfidfVectorizer returns a zero tensor of shape
     // {b_dim, output_size} when b_dim is the number of received observations
     // and output_size the is the maximum value in ngram_indexes attribute plus 1.
-    OutputResult(ctx, B, frequencies);
+    memset(output_data, 0, static_cast<size_t>(output_shape.Size() * sizeof(float)));
     return Status::OK();
   }
 
-  std::function<void(ptrdiff_t)> fn = [this, ctx, C, &frequencies](ptrdiff_t row_num) {
-    ComputeImpl(ctx, row_num, C, frequencies);
-  };
+  auto x_data_raw = ctx->Input<Tensor>(0)->DataRaw();
+  const auto elem_size = X->DataType()->Size();
+  int32_t num_batches = std::min<int32_t>(concurrency::ThreadPool::DegreeOfParallelism(ctx->GetOperatorThreadPool()) * 2, num_rows);
 
-  concurrency::ThreadPool::TryBatchParallelFor(ctx->GetOperatorThreadPool(), num_rows, std::move(fn), 0);
+  const auto& w = impl.weights_;
+  std::function<void(size_t, gsl::span<float>&)> fn_weight;
 
-  OutputResult(ctx, B, frequencies);
+  switch (impl.weighting_criteria_) {
+    case kTF:
+      fn_weight = [](size_t i, gsl::span<float>& out) { out[i] += 1.0f; };
+      break;
+    case kIDF:
+      if (!w.empty()) {
+        fn_weight = [&w](size_t i, gsl::span<float>& out) { out[i] = w[i]; };
+      } else {
+        fn_weight = [](size_t i, gsl::span<float>& out) { out[i] = 1.0f; };
+      }
+      break;
+    case kTFIDF:
+      if (!w.empty()) {
+        fn_weight = [&w](size_t i, gsl::span<float>& out) { out[i] += w[i]; };
+      } else {
+        fn_weight = [](size_t i, gsl::span<float>& out) { out[i] += 1.0f; };
+      }
+      break;
+    case kNone:  // fall-through
+    default:
+      assert(false);
+  }
+
+  std::function<void(ptrdiff_t)> fn = [this, C, output_data, x_data_raw, elem_size,
+                                       is_input_string, num_batches, num_rows, &fn_weight](ptrdiff_t batch_num) {
+    // Frequency holder allocate [B..output_size_] and init all to zero.
+    auto work = concurrency::ThreadPool::PartitionWork(batch_num, num_batches, static_cast<size_t>(num_rows));
+    std::vector<uint32_t> frequencies(this->impl_->output_size_);
+    for (auto row_num = work.start; row_num < work.end; ++row_num) {
+      auto out = gsl::span<float>(output_data + row_num * this->impl_->output_size_, this->impl_->output_size_);
+      std::fill(out.begin(), out.end(), 0.0f);
+      ComputeImpl(x_data_raw, elem_size, row_num, C, is_input_string, out, fn_weight);
+    }
+  };
 
+  concurrency::ThreadPool::TrySimpleParallelFor(ctx->GetOperatorThreadPool(), num_batches, std::move(fn));
   return Status::OK();
 }
 
diff --git a/onnxruntime/core/providers/cpu/nn/tfidfvectorizer.h b/onnxruntime/core/providers/cpu/nn/tfidfvectorizer.h
index 45db40d893231..14488d91c23e9 100644
--- a/onnxruntime/core/providers/cpu/nn/tfidfvectorizer.h
+++ b/onnxruntime/core/providers/cpu/nn/tfidfvectorizer.h
@@ -19,11 +19,8 @@ class TfIdfVectorizer final : public OpKernel {
   Status Compute(OpKernelContext* ctx) const override;
 
  private:
-  void ComputeImpl(OpKernelContext* ctx, ptrdiff_t row_num, size_t row_size,
-                   std::vector<uint32_t>& frequencies) const;
-
-  // Apply weighing criteria and output
-  void OutputResult(OpKernelContext* ctx, size_t b_dim, const std::vector<uint32_t>& frequences) const;
+  void ComputeImpl(const void* x_data_raw, size_t elem_size, ptrdiff_t row_num, size_t row_size, bool is_input_string,
+                   gsl::span<float> output_data, std::function<void(size_t, gsl::span<float>&)>& fn_weight) const;
 
   struct Impl;
   std::unique_ptr<Impl> impl_;
diff --git a/onnxruntime/core/providers/cpu/quantization/qlinearconv.cc b/onnxruntime/core/providers/cpu/quantization/qlinearconv.cc
index e9fc8d857b831..21a256eee6f14 100644
--- a/onnxruntime/core/providers/cpu/quantization/qlinearconv.cc
+++ b/onnxruntime/core/providers/cpu/quantization/qlinearconv.cc
@@ -77,7 +77,8 @@ class QLinearConv : public OpKernel {
     W_zero_point_value = W_zero_point_data[0];
     for (int64_t i = 1; i < W_zero_point_size; i++) {
       ORT_ENFORCE(W_zero_point_data[i] == W_zero_point_value,
-                  "QLinearConv : zero point of per-channel filter must be same");
+                  "QLinearConv : zero point of per-channel filter must be same. "
+                  "This happens by design if the quantization is symmetric.");
     }
   }
 
diff --git a/onnxruntime/core/providers/cpu/reduction/reduction_ops.cc b/onnxruntime/core/providers/cpu/reduction/reduction_ops.cc
index ce834e371fdef..3c83394fb0bf4 100644
--- a/onnxruntime/core/providers/cpu/reduction/reduction_ops.cc
+++ b/onnxruntime/core/providers/cpu/reduction/reduction_ops.cc
@@ -688,21 +688,23 @@ FastReduceKind OptimizeShapeForFastReduce(gsl::span<const int64_t> input_shape,
   return FastReduceKind::kNone;
 }
 
-void ValidateCommonFastReduce(const Tensor* axes_tensor) {
-  ORT_ENFORCE(axes_tensor != nullptr, "Axes input is null");
-  ORT_ENFORCE(axes_tensor->Shape().NumDimensions() == 1,
-              "An axes tensor must be a vector tensor.");
-}
-
 // template <typename T, typename TVAL>
 bool CommonFastReduceCopy(OpKernelContext* ctx, TensorShapeVector& input_axes, bool noop_with_empty_axes) {
   if (ctx->InputCount() == 2) {
     // second input holds the axes.
+    // the argument is optional
     const Tensor* axes_tensor = ctx->Input<Tensor>(1);
-    ValidateCommonFastReduce(axes_tensor);
-    auto nDims = static_cast<size_t>(axes_tensor->Shape()[0]);
-    const auto* data = axes_tensor->Data<int64_t>();
-    input_axes.insert(input_axes.begin(), data, data + nDims);
+
+    if (axes_tensor != nullptr) {
+      ORT_ENFORCE(axes_tensor->Shape().NumDimensions() == 1,
+                  "An axes tensor must be a vector tensor.");
+
+      const auto data_span = axes_tensor->DataAsSpan<int64_t>();
+      input_axes.assign(data_span.begin(), data_span.end());
+    } else {
+      input_axes.clear();
+    }
+
     if (input_axes.empty() && noop_with_empty_axes) {
       const Tensor* input = ctx->Input<Tensor>(0);
       auto* output = ctx->Output(0, input->Shape());
diff --git a/onnxruntime/core/providers/cpu/sequence/sequence_ops.cc b/onnxruntime/core/providers/cpu/sequence/sequence_ops.cc
index 4759938cd8250..8064bc0a58cb1 100644
--- a/onnxruntime/core/providers/cpu/sequence/sequence_ops.cc
+++ b/onnxruntime/core/providers/cpu/sequence/sequence_ops.cc
@@ -334,27 +334,14 @@ Status SequenceConstruct::Compute(OpKernelContext* context) const {
 
 // SplitToSequence
 
-namespace op_kernel_type_control {
-ORT_SPECIFY_OP_KERNEL_ARG_DEFAULT_TYPES_ALL_OPSETS(
-    kCpuExecutionProvider, kOnnxDomain, SplitToSequence, Input, 0,
-    float, double, int32_t, int64_t, std::string);
-}  // namespace op_kernel_type_control
-
-namespace {
-using EnabledSplitToSequenceDataTypes = ORT_OP_KERNEL_ARG_ENABLED_TYPE_LIST_ALL_OPSETS(
-    kCpuExecutionProvider, kOnnxDomain, SplitToSequence, Input, 0);
-}  // namespace
-
 ONNX_CPU_OPERATOR_KERNEL(
     SplitToSequence,
     11,
     KernelDefBuilder()
         .TypeConstraint("T",
-                        BuildKernelDefConstraintsFromTypeList<EnabledSplitToSequenceDataTypes>())
+                        BuildKernelDefConstraints<float, MLFloat16, double, int32_t, int64_t, std::string>())
         .TypeConstraint("S", DataTypeImpl::AllSequenceTensorTypes())
-        .TypeConstraint("I", std::vector<MLDataType>{
-                                 DataTypeImpl::GetTensorType<int32_t>(),
-                                 DataTypeImpl::GetTensorType<int64_t>()}),
+        .TypeConstraint("I", BuildKernelDefConstraints<int32_t, int64_t>()),
     SplitToSequence);
 
 SplitToSequence::SplitToSequence(const OpKernelInfo& info) : OpKernel(info) {
@@ -366,29 +353,14 @@ Status SplitToSequence::Compute(OpKernelContext* context) const {
   const Tensor& input = *context->Input<Tensor>(0);
   const Tensor* p_split_input = context->Input<Tensor>(1);
 
-  Status status;
-
-  if (input.IsDataType<float>())
-    status = ComputeImpl<float>(*context, input, p_split_input);
-  else if (input.IsDataType<double>())
-    status = ComputeImpl<double>(*context, input, p_split_input);
-  else if (input.IsDataType<int32_t>())
-    status = ComputeImpl<int32_t>(*context, input, p_split_input);
-  else if (input.IsDataType<int64_t>())
-    status = ComputeImpl<int64_t>(*context, input, p_split_input);
-  else if (input.IsDataTypeString())
-    status = ComputeImpl<std::string>(*context, input, p_split_input);
-  else
-    status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "SplitToSequence operator does not support ", input.DataType(), " yet");
-
-  return status;
+  return ComputeImpl(*context, input, p_split_input);
 }
 
 Status SplitToSequence::PrepareForCompute(const TensorShape& input_shape, int64_t split_scalar, bool is_split_input_scalar,
                                           int64_t& num_outputs, int64_t& axis, int& before_dims,
                                           int& after_dims_including_split_axis, int& after_dims_excluding_split,
                                           bool& is_uneven_split, int& num_remaining_splits,
-                                          std::vector<int64_t>& split_sizes) const {
+                                          InlinedVector<int64_t>& split_sizes) const {
   auto input_dims = input_shape.GetDims();
   const auto num_dimensions = gsl::narrow_cast<int64_t>(input_shape.NumDimensions());
   axis = HandleNegativeAxis(axis_, num_dimensions);  // handle negative and enforce axis is valid
@@ -416,7 +388,7 @@ Status SplitToSequence::PrepareForCompute(const TensorShape& input_shape, int64_
       // populate split_sizes with the same size for each output
       num_outputs = split_dim_size;
       // https://github.com/onnx/onnx/issues/2396
-      split_sizes = std::vector<int64_t>(static_cast<size_t>(num_outputs), DEFAULT_LENGTH_EACH_OUTPUT_);
+      split_sizes = InlinedVector<int64_t>(static_cast<size_t>(num_outputs), DEFAULT_LENGTH_EACH_OUTPUT_);
     } else {
       auto split_size_sum = std::accumulate(split_sizes.cbegin(), split_sizes.cend(), 0LL);
       if (split_size_sum != split_dim_size) {
@@ -453,7 +425,7 @@ static int64_t GetScalarSplitInput(const Tensor& tensor) {
   return retval;
 }
 
-static void GetSplitSizesInput(const Tensor& tensor, std::vector<int64_t>& split_sizes) {
+static void GetSplitSizesInput(const Tensor& tensor, InlinedVector<int64_t>& split_sizes) {
   auto num_elems = tensor.Shape().Size();
   split_sizes.reserve(onnxruntime::narrow<size_t>(num_elems));
   if (tensor.IsDataType<int32_t>()) {
@@ -467,13 +439,8 @@ static void GetSplitSizesInput(const Tensor& tensor, std::vector<int64_t>& split
   }
 }
 
-template <typename T>
 Status SplitToSequence::ComputeImpl(OpKernelContext& context, const Tensor& input,
                                     const Tensor* p_split_input) const {
-  if (!utils::HasType<EnabledSplitToSequenceDataTypes, T>()) {
-    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Data type is not supported in this build.");
-  }
-
   auto& input_shape = input.Shape();
   int64_t num_outputs = 0;
   int64_t axis = axis_;
@@ -484,7 +451,9 @@ Status SplitToSequence::ComputeImpl(OpKernelContext& context, const Tensor& inpu
   bool is_split_input_scalar = false;
   bool is_uneven_split = false;
   int num_remaining_splits = 0;
-  std::vector<int64_t> split_sizes;
+  InlinedVector<int64_t> split_sizes;
+  const bool is_string_type = input.IsDataTypeString();
+  const size_t element_size = (is_string_type) ? 0U : input.DataType()->Size();
 
   // figure out split_scalar or split_sizes
   if (p_split_input) {
@@ -520,8 +489,8 @@ Status SplitToSequence::ComputeImpl(OpKernelContext& context, const Tensor& inpu
 
   // copy dimensions so we can update the selected axis in place
   auto output_dimensions = input_shape.AsShapeVector();
-  int64_t input_offset = 0;
-  const T* input_data = input.Data<T>();
+  SafeInt<size_t> input_offset = 0;
+  const void* input_data = input.DataRaw();
   for (int i = 0; i < num_outputs; ++i) {
     // update size of dimension for axis we're splitting on while considering uneven split
     int split_size;
@@ -535,20 +504,50 @@ Status SplitToSequence::ComputeImpl(OpKernelContext& context, const Tensor& inpu
     AllocatorPtr alloc;
     ORT_RETURN_IF_ERROR(context.GetTempSpaceAllocator(&alloc));
     Tensor output_tensor(input.DataType(), onnxruntime::TensorShape(output_dimensions), alloc);
-    T* output_data = output_tensor.MutableData<T>();
-
-    ::onnxruntime::math::CopyMatrix<T>(
-        before_dims,                                       // M
-        split_size * after_dims_excluding_split,           // N
-        static_cast<const T*>(input_data + input_offset),  // A
-        after_dims_including_split_axis,                   // lda
-        static_cast<T*>(output_data),                      // B
-        split_size * after_dims_excluding_split,           // ldb
-        [](const T* src, T* dst, size_t count) {
-          copy_data<T>(src, dst, count);
-        });
-
-    input_offset += static_cast<int64_t>(split_size) * after_dims_excluding_split;  // offset by the N data we used in this iteration
+    void* output_data = output_tensor.MutableDataRaw();
+
+    const auto M = before_dims;
+    const auto* A = static_cast<const char*>(input_data) + static_cast<size_t>(input_offset * element_size);
+    const auto lda = after_dims_including_split_axis;
+    auto* B = output_data;
+
+    const auto N = split_size * after_dims_excluding_split;
+    const auto ldb = N;
+
+    if (is_string_type) {
+      const auto* src = reinterpret_cast<const std::string*>(A);
+      auto* dst = reinterpret_cast<std::string*>(B);
+      if (lda == N) {
+        copy_data<std::string>(src, dst, static_cast<size_t>(M * N));
+      } else {
+        size_t lda_offset = 0;
+        size_t ldb_offset = 0;
+        for (size_t idx = 0; idx < static_cast<size_t>(M); ++idx,
+                    lda_offset += lda, ldb_offset += ldb) {
+          copy_data<std::string>(src + lda_offset, dst + ldb_offset, static_cast<size_t>(N));
+        }
+      }
+    } else {
+      if (lda == N) {
+        // if the data is contiguous, we can just copy the data
+        const size_t bytes_to_copy = static_cast<size_t>(N) * static_cast<size_t>(M) * element_size;
+        memcpy(B, A, bytes_to_copy);
+      } else {
+        // otherwise we need to copy each row
+        const size_t row_bytes = SafeInt<size_t>(N) * element_size;
+        const auto lda_bytes_inc = SafeInt<size_t>(lda) * element_size;
+        const auto ldb_bytes_inc = SafeInt<size_t>(ldb) * element_size;
+        SafeInt<size_t> lda_bytes_offset = 0;
+        SafeInt<size_t> ldb_bytes_offset = 0;
+        for (size_t idx = 0; idx < static_cast<size_t>(M); ++idx,
+                    lda_bytes_offset += lda_bytes_inc, ldb_bytes_offset += ldb_bytes_inc) {
+          memcpy(reinterpret_cast<char*>(B) + static_cast<size_t>(ldb_bytes_offset),
+                 reinterpret_cast<const char*>(A) + static_cast<size_t>(lda_bytes_offset), row_bytes);
+        }
+      }
+    }
+
+    input_offset += SafeInt<size_t>(split_size) * after_dims_excluding_split;  // offset by the N data we used in this iteration
 
     // if keep_dims = 0, reshape the tensor by dropping the dimension corresponding to 'axis'
     if (use_keep_dims && keepdims_ == 0) {
diff --git a/onnxruntime/core/providers/cpu/sequence/sequence_ops.h b/onnxruntime/core/providers/cpu/sequence/sequence_ops.h
index 9466d3f0fd108..ccca226fb07ee 100644
--- a/onnxruntime/core/providers/cpu/sequence/sequence_ops.h
+++ b/onnxruntime/core/providers/cpu/sequence/sequence_ops.h
@@ -60,13 +60,12 @@ class SplitToSequence final : public OpKernel {
   Status Compute(OpKernelContext* context) const override;
 
  private:
-  template <typename T>
   Status ComputeImpl(OpKernelContext& context, const Tensor& input, const Tensor* p_split_input) const;
   Status PrepareForCompute(const TensorShape& input_shape, int64_t split_scalar, bool is_split_input_scalar,
                            int64_t& num_outputs, int64_t& axis, int& before_dims,
                            int& after_dims_including_split_axis, int& after_dims_excluding_split,
                            bool& is_uneven_split, int& num_remaining_splits,
-                           std::vector<int64_t>& split_sizes) const;
+                           InlinedVector<int64_t>& split_sizes) const;
   int64_t axis_{};
   int64_t keepdims_{1};
   const int64_t DEFAULT_LENGTH_EACH_OUTPUT_ = 1;
diff --git a/onnxruntime/core/providers/cpu/tensor/affine_grid.cc b/onnxruntime/core/providers/cpu/tensor/affine_grid.cc
new file mode 100644
index 0000000000000..15900ba553983
--- /dev/null
+++ b/onnxruntime/core/providers/cpu/tensor/affine_grid.cc
@@ -0,0 +1,151 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/providers/cpu/tensor/affine_grid.h"
+
+#include "core/common/common.h"
+#include "core/providers/op_kernel_type_control.h"
+#include "core/util/math_cpuonly.h"
+#include <iostream>
+#include "Eigen/src/Core/Map.h"
+#include <Eigen/Dense>
+#include "core/common/eigen_common_wrapper.h"
+
+namespace onnxruntime {
+
+#define REGISTER_KERNEL_TYPED(T)                                         \
+  ONNX_CPU_OPERATOR_TYPED_KERNEL(                                        \
+      AffineGrid,                                                        \
+      20,                                                                \
+      T,                                                                 \
+      KernelDefBuilder()                                                 \
+          .TypeConstraint("T1", DataTypeImpl::GetTensorType<T>())        \
+          .TypeConstraint("T2", DataTypeImpl::GetTensorType<int64_t>()), \
+      AffineGrid<T>);
+
+REGISTER_KERNEL_TYPED(float)
+REGISTER_KERNEL_TYPED(double)
+
+template <typename T>
+void generate_base_grid_2d(int64_t H, int64_t W, bool align_corners, Eigen::Matrix<T, Eigen::Dynamic, 2>& base_grid) {
+  Eigen::VectorXf row_vec = Eigen::VectorXf::LinSpaced(static_cast<Eigen::Index>(W), -1, 1);
+  if (!align_corners) {
+    row_vec = row_vec * (W - 1) / W;
+  }
+  Eigen::VectorXf col_vec = Eigen::VectorXf::LinSpaced(static_cast<Eigen::Index>(H), -1, 1);
+  if (!align_corners) {
+    col_vec = col_vec * (H - 1) / H;
+  }
+
+  base_grid.resize(static_cast<Eigen::Index>(H * W), 2);
+  for (Eigen::Index j = 0; j < H; j++) {
+    for (Eigen::Index i = 0; i < W; i++) {
+      base_grid.row(j * static_cast<Eigen::Index>(W) + i) << row_vec(i), col_vec(j);
+    }
+  }
+}
+
+template <typename T>
+void generate_base_grid_3d(int64_t D, int64_t H, int64_t W, bool align_corners, Eigen::Matrix<T, Eigen::Dynamic, 3>& base_grid) {
+  Eigen::VectorXf row_vec = Eigen::VectorXf::LinSpaced(static_cast<Eigen::Index>(W), -1, 1);
+  if (!align_corners) {
+    row_vec = row_vec * (W - 1) / W;
+  }
+  Eigen::VectorXf col_vec = Eigen::VectorXf::LinSpaced(static_cast<Eigen::Index>(H), -1, 1);
+  if (!align_corners) {
+    col_vec = col_vec * (H - 1) / H;
+  }
+  Eigen::VectorXf slice_vec = Eigen::VectorXf::LinSpaced(static_cast<Eigen::Index>(D), -1, 1);
+  if (!align_corners) {
+    slice_vec = slice_vec * (D - 1) / D;
+  }
+
+  base_grid.resize(static_cast<Eigen::Index>(D * H * W), 3);
+  for (Eigen::Index k = 0; k < D; k++) {
+    for (Eigen::Index j = 0; j < H; j++) {
+      for (Eigen::Index i = 0; i < W; i++) {
+        base_grid.row(k * static_cast<Eigen::Index>(H * W) + j * static_cast<Eigen::Index>(W) + i) << row_vec(i), col_vec(j), slice_vec(k);
+      }
+    }
+  }
+}
+
+template <typename T>
+void affine_grid_generator_2d(const Tensor* theta, const Eigen::Matrix<T, 2, Eigen::Dynamic>& base_grid_transposed, int64_t batch_num, int64_t H, int64_t W, Tensor* grid) {
+  const Eigen::StorageOptions option = Eigen::RowMajor;
+  auto theta_batch_offset = batch_num * 2 * 3;
+  const T* theta_data = theta->Data<T>() + theta_batch_offset;
+  const Eigen::Matrix<T, 2, 2, option> theta_R{{theta_data[0], theta_data[1]}, {theta_data[3], theta_data[4]}};
+  const Eigen::Array<T, 2, 1> theta_T(theta_data[2], theta_data[5]);
+
+  auto grid_batch_offset = batch_num * H * W * 2;
+  T* grid_data = grid->MutableData<T>() + grid_batch_offset;
+  Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, 2, option>> grid_matrix(grid_data, narrow<size_t>(H * W), 2);
+  grid_matrix = ((theta_R * base_grid_transposed).array().colwise() + theta_T).matrix().transpose();
+}
+
+template <typename T>
+void affine_grid_generator_3d(const Tensor* theta, const Eigen::Matrix<T, 3, Eigen::Dynamic>& base_grid_transposed, int64_t batch_num, int64_t D, int64_t H, int64_t W, Tensor* grid) {
+  const Eigen::StorageOptions option = Eigen::RowMajor;
+  auto theta_batch_offset = batch_num * 3 * 4;
+  const T* theta_data = theta->Data<T>() + theta_batch_offset;
+  const Eigen::Matrix<T, 3, 3, option> theta_R{
+      {theta_data[0], theta_data[1], theta_data[2]},
+      {theta_data[4], theta_data[5], theta_data[6]},
+      {theta_data[8], theta_data[9], theta_data[10]}};
+  const Eigen::Array<T, 3, 1> theta_T(theta_data[3], theta_data[7], theta_data[11]);
+
+  auto grid_batch_offset = batch_num * D * H * W * 3;
+  T* grid_data = grid->MutableData<T>() + grid_batch_offset;
+  Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, 3, option>> grid_matrix(grid_data, narrow<size_t>(D * H * W), 3);
+  grid_matrix = ((theta_R * base_grid_transposed).array().colwise() + theta_T).matrix().transpose();
+}
+
+template <typename T>
+Status AffineGrid<T>::Compute(OpKernelContext* context) const {
+  const Tensor* theta = context->Input<Tensor>(0);
+  const TensorShape& theta_shape = theta->Shape();
+  if (theta_shape.NumDimensions() != 3) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "AffineGrid : Input theta tensor dimension is not 3");
+  }
+
+  const Tensor* size = context->Input<Tensor>(1);
+  const TensorShape& size_shape = size->Shape();
+  const int64_t* size_data = size->Data<int64_t>();
+
+  if (size_shape.GetDims()[0] == 4 /*&& get_check_2d_grid_sample_consistency(theta_shape, size_shape, N, C, H, W)*/) {
+    int64_t N = size_data[0], H = size_data[2], W = size_data[3];
+
+    TensorShape grid_shape{N, H, W, 2};
+    auto grid = context->Output(0, grid_shape);
+
+    Eigen::Matrix<T, Eigen::Dynamic, 2> base_grid;
+    generate_base_grid_2d(H, W, align_corners_, base_grid);
+    Eigen::Matrix<T, 2, Eigen::Dynamic> base_grid_transposed = base_grid.transpose();
+
+    std::function<void(ptrdiff_t)> fn = [theta, base_grid_transposed, H, W, grid](ptrdiff_t batch_num) {
+      affine_grid_generator_2d(theta, base_grid_transposed, batch_num, H, W, grid);
+    };
+
+    concurrency::ThreadPool::TryBatchParallelFor(context->GetOperatorThreadPool(), narrow<size_t>(N), std::move(fn), 0);
+  } else if (size_shape.GetDims()[0] == 5 /*&& get_check_2d_grid_sample_consistency(theta_shape, size_shape, N, C, H, W)*/) {
+    int64_t N = size_data[0], D = size_data[2], H = size_data[3], W = size_data[4];
+
+    TensorShape grid_shape{N, D, H, W, 3};
+    auto grid = context->Output(0, grid_shape);
+
+    Eigen::Matrix<T, Eigen::Dynamic, 3> base_grid;
+    generate_base_grid_3d(D, H, W, align_corners_, base_grid);
+    Eigen::Matrix<T, 3, Eigen::Dynamic> base_grid_transposed = base_grid.transpose();
+
+    std::function<void(ptrdiff_t)> fn = [theta, base_grid_transposed, D, H, W, grid](ptrdiff_t batch_num) {
+      affine_grid_generator_3d(theta, base_grid_transposed, batch_num, D, H, W, grid);
+    };
+
+    concurrency::ThreadPool::TryBatchParallelFor(context->GetOperatorThreadPool(), narrow<size_t>(N), std::move(fn), 0);
+  } else {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "AffineGrid : Invalidate size - length of size should be 4 or 5.");
+  }
+  return Status::OK();
+}
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/cpu/tensor/affine_grid.h b/onnxruntime/core/providers/cpu/tensor/affine_grid.h
new file mode 100644
index 0000000000000..5ffe660e986f2
--- /dev/null
+++ b/onnxruntime/core/providers/cpu/tensor/affine_grid.h
@@ -0,0 +1,25 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "core/common/common.h"
+#include "core/framework/op_kernel.h"
+
+namespace onnxruntime {
+
+template <typename T>
+class AffineGrid final : public OpKernel {
+ public:
+  AffineGrid(const OpKernelInfo& info) : OpKernel(info) {
+    int64_t align_corners = info.GetAttrOrDefault<int64_t>("align_corners", 0);
+    align_corners_ = (align_corners != 0);
+  }
+
+  Status Compute(OpKernelContext* context) const override;
+
+ private:
+  bool align_corners_;
+};
+
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/cpu/tensor/grid_sample.cc b/onnxruntime/core/providers/cpu/tensor/grid_sample.cc
index c58a7d8337114..a83ba378d7f1e 100644
--- a/onnxruntime/core/providers/cpu/tensor/grid_sample.cc
+++ b/onnxruntime/core/providers/cpu/tensor/grid_sample.cc
@@ -11,17 +11,23 @@
 
 namespace onnxruntime {
 
-#define REGISTER_KERNEL_TYPED(T)                                   \
-  ONNX_CPU_OPERATOR_TYPED_KERNEL(                                  \
-      GridSample,                                                  \
-      16,                                                          \
-      T,                                                           \
-      KernelDefBuilder()                                           \
-          .TypeConstraint("T1", DataTypeImpl::GetTensorType<T>())  \
-          .TypeConstraint("T2", DataTypeImpl::GetTensorType<T>()), \
-      GridSample<T>);
+#define REGISTER_KERNEL_TYPED(T)                                                                       \
+  ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(GridSample, kOnnxDomain, 16, 19, T, kCpuExecutionProvider,   \
+                                          KernelDefBuilder()                                           \
+                                              .TypeConstraint("T1", DataTypeImpl::GetTensorType<T>())  \
+                                              .TypeConstraint("T2", DataTypeImpl::GetTensorType<T>()), \
+                                          GridSample<T>);
+
+#define REGISTER_KERNEL_TYPED_20(T)                                                          \
+  ONNX_OPERATOR_TYPED_KERNEL_EX(GridSample, kOnnxDomain, 20, T, kCpuExecutionProvider,       \
+                                KernelDefBuilder()                                           \
+                                    .TypeConstraint("T1", DataTypeImpl::GetTensorType<T>())  \
+                                    .TypeConstraint("T2", DataTypeImpl::GetTensorType<T>()), \
+                                GridSample<T>);
 
 REGISTER_KERNEL_TYPED(float)
+REGISTER_KERNEL_TYPED_20(float)
+REGISTER_KERNEL_TYPED_20(double)
 
 // Restore normalized location to actual image location
 //   When align_corners is true:
@@ -44,16 +50,15 @@ T GsDenormalize(T n, int64_t length, bool align_corners) {
 }
 
 // Reflect by the near border till within the borders
-// Use float for borders to avoid potential issues with integer T
 template <typename T>
-T GsReflect(T x, float x_min, float x_max) {
-  float dx = {};
-  float fx = static_cast<float>(x);
-  float range = x_max - x_min;
+T GsReflect(T x, T x_min, T x_max) {
+  T dx = {};
+  T fx = static_cast<T>(x);
+  T range = x_max - x_min;
   if (fx < x_min) {
     dx = x_min - fx;
     int n = static_cast<int>(dx / range);
-    float r = dx - n * range;
+    T r = dx - n * range;
     if (n % 2 == 0) {
       fx = x_min + r;
     } else {
@@ -62,7 +67,7 @@ T GsReflect(T x, float x_min, float x_max) {
   } else if (fx > x_max) {
     dx = fx - x_max;
     int n = static_cast<int>(dx / range);
-    float r = dx - n * range;
+    T r = dx - n * range;
     if (n % 2 == 0) {
       fx = x_max - r;
     } else {
@@ -75,9 +80,9 @@ T GsReflect(T x, float x_min, float x_max) {
 
 // Calculate cubic convolution interpolation coefficients
 // ROBERT G. KEYS https://ieeexplore.ieee.org/document/1163711
-// Use float to avoid potential issues with integer T
-void GsGetCubicCoeffs(float x, float coeffs[4]) {
-  constexpr float cubic_alpha = -0.75f;
+template <typename T>
+void GsGetCubicCoeffs(T x, T coeffs[4]) {
+  constexpr T cubic_alpha = -0.75f;
   x = std::abs(x);
   coeffs[0] = ((cubic_alpha * (x + 1) - 5 * cubic_alpha) * (x + 1) + 8 * cubic_alpha) * (x + 1) - 4 * cubic_alpha;
   coeffs[1] = ((cubic_alpha + 2) * x - (cubic_alpha + 3)) * x * x + 1;
@@ -86,9 +91,9 @@ void GsGetCubicCoeffs(float x, float coeffs[4]) {
 }
 
 template <typename T>
-T GsBicubicInterpolate(T p[4][4], float x, float y) {
-  float v[4] = {};
-  float coeffs[4] = {};
+T GsBicubicInterpolate(T p[4][4], T x, T y) {
+  T v[4] = {};
+  T coeffs[4] = {};
   GsGetCubicCoeffs(x, coeffs);
   for (int64_t i = 0; i < 4; i++) {
     v[i] = coeffs[0] * p[i][0] + coeffs[1] * p[i][1] + coeffs[2] * p[i][2] + coeffs[3] * p[i][3];
@@ -98,7 +103,7 @@ T GsBicubicInterpolate(T p[4][4], float x, float y) {
 }
 
 template <typename T>
-T GridSample<T>::PixelAtGrid(const T* image, int64_t r, int64_t c, int64_t H, int64_t W, float border[/* 4 */]) const {
+T GridSample<T>::PixelAtGrid(const T* image, int64_t r, int64_t c, int64_t H, int64_t W, T border[/* 4 */]) const {
   T pixel = {};  // default 0
   if (padding_mode_ == Zeros) {
     if (c >= 0 && c < W && r >= 0 && r < H) {
@@ -116,6 +121,27 @@ T GridSample<T>::PixelAtGrid(const T* image, int64_t r, int64_t c, int64_t H, in
   return pixel;
 }
 
+template <typename T>
+T GridSample<T>::PixelAtGrid3D(const T* image, int64_t d, int64_t h, int64_t w, int64_t D, int64_t H, int64_t W, T border[/* 6 */]) const {
+  T pixel = {};  // default 0
+  if (padding_mode_ == Zeros) {
+    if (w >= 0 && w < W && h >= 0 && h < H && d >= 0 && d < D) {
+      pixel = image[d * H * W + h * W + w];
+    }
+  } else if (padding_mode_ == Border) {
+    w = std::clamp<int64_t>(w, 0, W - 1);
+    h = std::clamp<int64_t>(h, 0, H - 1);
+    d = std::clamp<int64_t>(d, 0, D - 1);
+    pixel = image[d * H * W + h * W + w];
+  } else {  // (padding_mode_ == Reflection)
+    w = static_cast<int64_t>(GsReflect(static_cast<T>(w), border[0], border[3]));
+    h = static_cast<int64_t>(GsReflect(static_cast<T>(h), border[1], border[4]));
+    d = static_cast<int64_t>(GsReflect(static_cast<T>(d), border[2], border[5]));
+    pixel = image[d * H * W + h * W + w];
+  }
+  return pixel;
+}
+
 // When grid sampling, padding is applied before interpolation.
 // For instance, in bilinear mode and zeros padding-mode, pixel p at actual
 // image location (-0.5, -0.5)
@@ -134,113 +160,203 @@ Status GridSample<T>::Compute(OpKernelContext* context) const {
   const auto& input_dims = input->Shape();
   const auto& grid_dims = grid->Shape();
 
-  if (input_dims.NumDimensions() != 4 || grid_dims.NumDimensions() != 4) {
-    return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Only 4-D tensor is supported");
-  }
+  int64_t data_dims = input_dims.NumDimensions() - 2;
+  ORT_ENFORCE(static_cast<int64_t>(grid_dims.NumDimensions()) == data_dims + 2,
+              "grid dimensions must be ", data_dims + 2, "for input dimension of ", data_dims);
+
+  ORT_ENFORCE(grid_dims[grid_dims.NumDimensions() - 1] == data_dims,
+              "Last dimension of grid: ", grid_dims[grid_dims.NumDimensions() - 1], ", expect ", data_dims);
+
+  ORT_ENFORCE(input_dims.NumDimensions() == 4 || input_dims.NumDimensions() == 5, "Only 4-D or 5-D tensor is supported");
 
   auto N = input_dims[0];
   auto C = input_dims[1];
-  auto H_in = input_dims[2];
-  auto W_in = input_dims[3];
-  auto H_out = grid_dims[1];
-  auto W_out = grid_dims[2];
   ORT_ENFORCE(grid_dims[0] == N, "Grid batch size ", grid_dims[0], " does not match input batch size ", N);
-  ORT_ENFORCE(grid_dims[3] == 2, "Last dimension of grid: ", grid_dims[3], ", expect 2");
 
-  TensorShape Y_shape = {N, C, H_out, W_out};
-  auto& Y = *context->Output(0, Y_shape);
-  // Return early if the output tensor is going to be of size 0
-  if (Y.Shape().Size() == 0) {
-    return Status::OK();
+  if (input_dims.NumDimensions() == 5) {
+    ORT_ENFORCE(mode_ != Cubic, "Only support GridSample Cubic mode in 4-D cases.");
   }
 
-  // Force float here to avoid possible issue in integer T case
-  float x_min = -0.5f;
-  float x_max = W_in - 0.5f;
-  float y_min = -0.5f;
-  float y_max = H_in - 0.5f;
-
-  if (align_corners_) {
-    x_min = 0.f;
-    x_max = W_in - 1.f;
-    y_min = 0.f;
-    y_max = H_in - 1.f;
-  }
-  float border[] = {x_min, y_min, x_max, y_max};  // l-t-r-b
-
-  concurrency::ThreadPool* tp = H_out * W_out > 64 ? context->GetOperatorThreadPool() : nullptr;
-  for (int64_t n = 0; n < N; n++) {
-    const T* grid_data = grid->Data<T>() + n * (H_out * W_out) * 2;
-    concurrency::ThreadPool::TrySimpleParallelFor(
-        tp, onnxruntime::narrow<std::ptrdiff_t>(C),
-        [&](std::ptrdiff_t c) {
-          const T* X_data = input->Data<T>() + (n * C + c) * (H_in * W_in);
-          T* Y_data = Y.MutableData<T>() + (n * C + c) * (H_out * W_out);
-
-          for (int64_t oy = 0; oy < H_out; oy++) {
-            for (int64_t ox = 0; ox < W_out; ox++) {
-              const T* gridpoint = grid_data + (oy * W_out + ox) * 2;
-              T* Y_gridpoint = Y_data + oy * W_out + ox;
-              auto nx = gridpoint[0];  // normalized location
-              auto ny = gridpoint[1];
-              auto x = GsDenormalize<T>(nx, W_in, align_corners_);  // actual location
-              auto y = GsDenormalize<T>(ny, H_in, align_corners_);
-
-              if (mode_ == Nearest) {
-                x = static_cast<T>(std::nearbyintf(static_cast<float>(x)));
-                y = static_cast<T>(std::nearbyintf(static_cast<float>(y)));
-              }
+  if (data_dims == 2) {
+    // sample 2d;
+    auto H_in = input_dims[2];
+    auto W_in = input_dims[3];
+    auto H_out = grid_dims[1];
+    auto W_out = grid_dims[2];
+    TensorShape Y_shape = {N, C, H_out, W_out};
+    auto& Y = *context->Output(0, Y_shape);
+    // Return early if the output tensor is going to be of size 0
+    if (Y.Shape().Size() == 0) {
+      return Status::OK();
+    }
 
-              if (x < x_min || x > x_max || y < y_min || y > y_max) {  // out of bound
-                if (padding_mode_ == Border) {
-                  // use original border in both align_corner cases
-                  x = std::clamp(x, static_cast<T>(0), static_cast<T>(W_in - 1));
-                  y = std::clamp(y, static_cast<T>(0), static_cast<T>(H_in - 1));
-                } else if (padding_mode_ == Reflection) {
-                  x = GsReflect(x, x_min, x_max);
-                  y = GsReflect(y, y_min, y_max);
-                }
-              }  // out of bound
+    T x_min = -0.5f;
+    T x_max = W_in - 0.5f;
+    T y_min = -0.5f;
+    T y_max = H_in - 0.5f;
 
-              if (mode_ == Nearest) {
-                // x, y are integers in all padding modes
-                *Y_gridpoint = PixelAtGrid(X_data, static_cast<int64_t>(y), static_cast<int64_t>(x), H_in, W_in, border);
-                continue;
-              }
+    if (align_corners_) {
+      x_min = 0.f;
+      x_max = W_in - 1.f;
+      y_min = 0.f;
+      y_max = H_in - 1.f;
+    }
+    T border[] = {x_min, y_min, x_max, y_max};  // l-t-r-b
 
-              if (mode_ == Bilinear) {
-                int64_t x1 = static_cast<int64_t>(std::floor(x));
-                int64_t y1 = static_cast<int64_t>(std::floor(y));
-                int64_t x2 = x1 + 1;
-                int64_t y2 = y1 + 1;
-
-                T p11 = PixelAtGrid(X_data, y1, x1, H_in, W_in, border);
-                T p12 = PixelAtGrid(X_data, y1, x2, H_in, W_in, border);
-                T p21 = PixelAtGrid(X_data, y2, x1, H_in, W_in, border);
-                T p22 = PixelAtGrid(X_data, y2, x2, H_in, W_in, border);
-
-                T dx2 = static_cast<T>(x2) - x;
-                T dx1 = x - static_cast<T>(x1);
-                T dy2 = static_cast<T>(y2) - y;
-                T dy1 = y - static_cast<T>(y1);
-                *Y_gridpoint = dy2 * (dx2 * p11 + dx1 * p12) + dy1 * (dx2 * p21 + dx1 * p22);
+    concurrency::ThreadPool* tp = H_out * W_out > 64 ? context->GetOperatorThreadPool() : nullptr;
+    for (int64_t n = 0; n < N; n++) {
+      const T* grid_data = grid->Data<T>() + n * (H_out * W_out) * 2;
+      concurrency::ThreadPool::TrySimpleParallelFor(
+          tp, onnxruntime::narrow<std::ptrdiff_t>(C),
+          [&](std::ptrdiff_t c) {
+            const T* X_data = input->Data<T>() + (n * C + c) * (H_in * W_in);
+            T* Y_data = Y.MutableData<T>() + (n * C + c) * (H_out * W_out);
+
+            for (int64_t oy = 0; oy < H_out; oy++) {
+              for (int64_t ox = 0; ox < W_out; ox++) {
+                const T* gridpoint = grid_data + (oy * W_out + ox) * 2;
+                T* Y_gridpoint = Y_data + oy * W_out + ox;
+                auto nx = gridpoint[0];  // normalized location
+                auto ny = gridpoint[1];
+                auto x = GsDenormalize<T>(nx, W_in, align_corners_);  // actual location
+                auto y = GsDenormalize<T>(ny, H_in, align_corners_);
+
+                if (mode_ == Nearest) {
+                  x = static_cast<T>(std::nearbyint(static_cast<T>(x)));
+                  y = static_cast<T>(std::nearbyint(static_cast<T>(y)));
+                  // x, y are integers in all padding modes
+                  *Y_gridpoint = PixelAtGrid(X_data, static_cast<int64_t>(y), static_cast<int64_t>(x), H_in, W_in, border);
+                } else if (mode_ == Linear) {
+                  int64_t x1 = static_cast<int64_t>(std::floor(x));
+                  int64_t y1 = static_cast<int64_t>(std::floor(y));
+                  int64_t x2 = x1 + 1;
+                  int64_t y2 = y1 + 1;
+
+                  T p11 = PixelAtGrid(X_data, y1, x1, H_in, W_in, border);
+                  T p12 = PixelAtGrid(X_data, y1, x2, H_in, W_in, border);
+                  T p21 = PixelAtGrid(X_data, y2, x1, H_in, W_in, border);
+                  T p22 = PixelAtGrid(X_data, y2, x2, H_in, W_in, border);
+
+                  T dx2 = static_cast<T>(x2) - x;
+                  T dx1 = x - static_cast<T>(x1);
+                  T dy2 = static_cast<T>(y2) - y;
+                  T dy1 = y - static_cast<T>(y1);
+                  *Y_gridpoint = dy2 * (dx2 * p11 + dx1 * p12) + dy1 * (dx2 * p21 + dx1 * p22);
+                } else if (mode_ == Cubic) {
+                  int64_t x0 = static_cast<int64_t>(std::floor(x)) - 1;  // top-left corner of the bbox
+                  int64_t y0 = static_cast<int64_t>(std::floor(y)) - 1;
+
+                  T p[4][4] = {};  // [H][W]
+                  for (int64_t h = 0; h < 4; h++) {
+                    for (int64_t w = 0; w < 4; w++) {
+                      p[h][w] = PixelAtGrid(X_data, h + y0, w + x0, H_in, W_in, border);
+                    }
+                  }
+                  T dx = static_cast<T>(x - x0 - 1);
+                  T dy = static_cast<T>(y - y0 - 1);
+                  *Y_gridpoint = GsBicubicInterpolate(p, dx, dy);
+                }
               }
-              if (mode_ == Bicubic) {
-                int64_t x0 = static_cast<int64_t>(std::floor(x)) - 1;  // top-left corner of the bbox
-                int64_t y0 = static_cast<int64_t>(std::floor(y)) - 1;
-                T p[4][4] = {};  // [H][W]
-                for (int64_t h = 0; h < 4; h++) {
-                  for (int64_t w = 0; w < 4; w++) {
-                    p[h][w] = PixelAtGrid(X_data, h + y0, w + x0, H_in, W_in, border);
+            }
+          });
+    }
+  } else if (data_dims == 3) {
+    // sample 3d;
+    auto D_in = input_dims[2];
+    auto H_in = input_dims[3];
+    auto W_in = input_dims[4];
+    auto D_out = grid_dims[1];
+    auto H_out = grid_dims[2];
+    auto W_out = grid_dims[3];
+    TensorShape Y_shape = {N, C, D_out, H_out, W_out};
+    auto& Y = *context->Output(0, Y_shape);
+    // Return early if the output tensor is going to be of size 0
+    if (Y.Shape().Size() == 0) {
+      return Status::OK();
+    }
+
+    T x_min = -0.5f;
+    T x_max = W_in - 0.5f;
+    T y_min = -0.5f;
+    T y_max = H_in - 0.5f;
+    T z_min = -0.5f;
+    T z_max = D_in - 0.5f;
+
+    if (align_corners_) {
+      x_min = 0.f;
+      x_max = W_in - 1.f;
+      y_min = 0.f;
+      y_max = H_in - 1.f;
+      z_min = 0.f;
+      z_max = D_in - 1.f;
+    }
+    T border[] = {x_min, y_min, z_min, x_max, y_max, z_max};
+
+    concurrency::ThreadPool* tp = D_out * H_out * W_out > 64 ? context->GetOperatorThreadPool() : nullptr;
+    for (int64_t n = 0; n < N; n++) {
+      const T* grid_data = grid->Data<T>() + n * (D_out * H_out * W_out) * 3;
+      concurrency::ThreadPool::TrySimpleParallelFor(
+          tp, onnxruntime::narrow<std::ptrdiff_t>(C),
+          [&](std::ptrdiff_t c) {
+            const T* X_data = input->Data<T>() + (n * C + c) * (D_in * H_in * W_in);
+            T* Y_data = Y.MutableData<T>() + (n * C + c) * (D_out * H_out * W_out);
+
+            for (int64_t oz = 0; oz < D_out; oz++) {
+              for (int64_t oy = 0; oy < H_out; oy++) {
+                for (int64_t ox = 0; ox < W_out; ox++) {
+                  const T* gridpoint = grid_data + (oz * H_out * W_out + oy * W_out + ox) * 3;
+                  T* Y_gridpoint = Y_data + oz * H_out * W_out + oy * W_out + ox;
+                  auto nx = gridpoint[0];  // normalized location
+                  auto ny = gridpoint[1];
+                  auto nz = gridpoint[2];
+                  auto x = GsDenormalize<T>(nx, W_in, align_corners_);  // actual location
+                  auto y = GsDenormalize<T>(ny, H_in, align_corners_);
+                  auto z = GsDenormalize<T>(nz, D_in, align_corners_);
+
+                  if (mode_ == Nearest) {
+                    x = static_cast<T>(std::nearbyint(static_cast<T>(x)));
+                    y = static_cast<T>(std::nearbyint(static_cast<T>(y)));
+                    z = static_cast<T>(std::nearbyint(static_cast<T>(z)));
+
+                    // x, y are integers in all padding modes
+                    *Y_gridpoint = PixelAtGrid3D(X_data, static_cast<int64_t>(z), static_cast<int64_t>(y), static_cast<int64_t>(x),
+                                                 D_in, H_in, W_in, border);
+                  } else if (mode_ == Linear) {
+                    int64_t x1 = static_cast<int64_t>(std::floor(x));
+                    int64_t y1 = static_cast<int64_t>(std::floor(y));
+                    int64_t z1 = static_cast<int64_t>(std::floor(z));
+                    int64_t x2 = x1 + 1;
+                    int64_t y2 = y1 + 1;
+                    int64_t z2 = z1 + 1;
+
+                    T dx2 = static_cast<T>(x2) - x;
+                    T dx1 = x - static_cast<T>(x1);
+                    T dy2 = static_cast<T>(y2) - y;
+                    T dy1 = y - static_cast<T>(y1);
+                    T dz2 = static_cast<T>(z2) - z;
+                    T dz1 = z - static_cast<T>(z1);
+
+                    T p111 = PixelAtGrid3D(X_data, z1, y1, x1, D_in, H_in, W_in, border);
+                    T p112 = PixelAtGrid3D(X_data, z1, y1, x2, D_in, H_in, W_in, border);
+                    T p121 = PixelAtGrid3D(X_data, z1, y2, x1, D_in, H_in, W_in, border);
+                    T p122 = PixelAtGrid3D(X_data, z1, y2, x2, D_in, H_in, W_in, border);
+                    T Y_gridpoint_z1 = dy2 * (dx2 * p111 + dx1 * p112) + dy1 * (dx2 * p121 + dx1 * p122);
+
+                    T p211 = PixelAtGrid3D(X_data, z2, y1, x1, D_in, H_in, W_in, border);
+                    T p212 = PixelAtGrid3D(X_data, z2, y1, x2, D_in, H_in, W_in, border);
+                    T p221 = PixelAtGrid3D(X_data, z2, y2, x1, D_in, H_in, W_in, border);
+                    T p222 = PixelAtGrid3D(X_data, z2, y2, x2, D_in, H_in, W_in, border);
+                    T Y_gridpoint_z2 = dy2 * (dx2 * p211 + dx1 * p212) + dy1 * (dx2 * p221 + dx1 * p222);
+                    *Y_gridpoint = dz2 * Y_gridpoint_z1 + dz1 * Y_gridpoint_z2;
                   }
                 }
-                T dx = static_cast<T>(x - x0 - 1);
-                T dy = static_cast<T>(y - y0 - 1);
-                *Y_gridpoint = GsBicubicInterpolate(p, static_cast<float>(dx), static_cast<float>(dy));
               }
             }
-          }
-        });
+          });
+    }
+  } else {
+    // shall not reach here due to above checks
+    return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Only support GirdSample in 4-D or 5-D cases.");
   }
   return Status::OK();
 }
diff --git a/onnxruntime/core/providers/cpu/tensor/grid_sample.h b/onnxruntime/core/providers/cpu/tensor/grid_sample.h
index 2dd828b3ae3f1..dee0c4701ee21 100644
--- a/onnxruntime/core/providers/cpu/tensor/grid_sample.h
+++ b/onnxruntime/core/providers/cpu/tensor/grid_sample.h
@@ -15,37 +15,52 @@ template <typename T>
 class GridSample final : public OpKernel {
  public:
   explicit GridSample(const OpKernelInfo& info) : OpKernel(info) {
-    std::string mode_str = info.GetAttrOrDefault<std::string>("mode", "bilinear");
-    std::string padding_mode_str = info.GetAttrOrDefault<std::string>("padding_mode", "zeros");
-    align_corners_ = static_cast<bool>(info.GetAttrOrDefault<int64_t>("align_corners", 0));
-    ORT_ENFORCE(mode_str == "bilinear" || mode_str == "nearest" || mode_str == "bicubic",
-                "mode \"", mode_str, "\" not supported, expect bilinear, nearest or bicubic");
-    ORT_ENFORCE(padding_mode_str == "zeros" || padding_mode_str == "border" || padding_mode_str == "reflection",
-                "padding_mode \"", padding_mode_str, "\" not supported, expect zeros, border or reflection");
-    if (mode_str == "bicubic") {
-      mode_ = Bicubic;
-    } else if (mode_str == "nearest") {
-      mode_ = Nearest;
+    int start_version = info.node().SinceVersion();
+    if (start_version >= 20) {
+      std::string mode_str = info.GetAttrOrDefault<std::string>("mode", "linear");
+      if (mode_str == "cubic") {
+        mode_ = Cubic;
+      } else if (mode_str == "nearest") {
+        mode_ = Nearest;
+      } else if (mode_str == "linear") {
+        mode_ = Linear;
+      } else {
+        ORT_THROW("mode \"", mode_str, "\" not supported, expect linear, nearest or cubic");
+      }
     } else {
-      mode_ = Bilinear;
+      std::string mode_str = info.GetAttrOrDefault<std::string>("mode", "bilinear");
+      if (mode_str == "bicubic") {
+        mode_ = Cubic;
+      } else if (mode_str == "nearest") {
+        mode_ = Nearest;
+      } else if (mode_str == "bilinear") {
+        mode_ = Linear;
+      } else {
+        ORT_THROW("mode \"", mode_str, "\" not supported, expect bilinear, nearest or bicubic");
+      }
     }
+
+    std::string padding_mode_str = info.GetAttrOrDefault<std::string>("padding_mode", "zeros");
+    align_corners_ = static_cast<bool>(info.GetAttrOrDefault<int64_t>("align_corners", 0));
     if (padding_mode_str == "reflection") {
       padding_mode_ = Reflection;
     } else if (padding_mode_str == "border") {
       padding_mode_ = Border;
-    } else {
+    } else if (padding_mode_str == "zeros") {
       padding_mode_ = Zeros;
+    } else {
+      ORT_THROW("padding_mode \"", padding_mode_str, "\" not supported, expect zeros, border or reflection");
     }
   }
 
   Status Compute(OpKernelContext* context) const override;
 
  private:
-  enum GridSampleInterpolationMode {
-    Bilinear,
+  typedef enum {
+    Linear,
+    Cubic,
     Nearest,
-    Bicubic
-  };
+  } GridSampleInterpolationMode;
 
   enum GridSamplePaddingMode {
     Zeros,
@@ -53,9 +68,10 @@ class GridSample final : public OpKernel {
     Reflection
   };
 
-  T PixelAtGrid(const T* image, int64_t r, int64_t c, int64_t H, int64_t W, float border[/* 4 */]) const;
+  T PixelAtGrid(const T* image, int64_t r, int64_t c, int64_t H, int64_t W, T border[/* 4 */]) const;
+  T PixelAtGrid3D(const T* image, int64_t d, int64_t h, int64_t w, int64_t D, int64_t H, int64_t W, T border[/* 6 */]) const;
 
-  GridSampleInterpolationMode mode_{Bilinear};
+  GridSampleInterpolationMode mode_{Linear};
   GridSamplePaddingMode padding_mode_{Zeros};
   bool align_corners_{0};
 };
diff --git a/onnxruntime/core/providers/cpu/tensor/isinf.cc b/onnxruntime/core/providers/cpu/tensor/isinf.cc
index bc99caa8036cf..1b449f46927a2 100644
--- a/onnxruntime/core/providers/cpu/tensor/isinf.cc
+++ b/onnxruntime/core/providers/cpu/tensor/isinf.cc
@@ -14,15 +14,38 @@ namespace onnxruntime {
 // https://github.com/onnx/onnx/blob/main/docs/Operators.md#IsInf
 
 namespace op_kernel_type_control {
-ORT_SPECIFY_OP_KERNEL_ARG_DEFAULT_TYPES_ALL_OPSETS(
-    kCpuExecutionProvider, kOnnxDomain, IsInf, Input, 0,
-    float, double);
+using IsInfTypesOpset10 = TypeList<float, double>;
+
+ORT_SPECIFY_OP_KERNEL_ARG_DEFAULT_TYPE_LIST(
+    kCpuExecutionProvider, kOnnxDomain, IsInf, 10, Input, 0,
+    IsInfTypesOpset10);
+
+using IsInfTypesOpset20 =
+    TypeList<
+        float,
+        double
+#if !defined(DISABLE_FLOAT8_TYPES)
+        ,
+        Float8E4M3FN, Float8E4M3FNUZ, Float8E5M2, Float8E5M2FNUZ
+#endif
+        >;
+
+ORT_SPECIFY_OP_KERNEL_ARG_DEFAULT_TYPE_LIST(
+    kCpuExecutionProvider,
+    kOnnxDomain,
+    IsInf,
+    20,
+    Input,
+    0,
+    IsInfTypesOpset20);
 }  // namespace op_kernel_type_control
 
 class IsInf final : public OpKernel {
  public:
-  using EnabledDataTypes = ORT_OP_KERNEL_ARG_ENABLED_TYPE_LIST_ALL_OPSETS(kCpuExecutionProvider, kOnnxDomain,
-                                                                          IsInf, Input, 0);
+  using EnabledDataTypes10 = ORT_OP_KERNEL_ARG_ENABLED_TYPE_LIST(kCpuExecutionProvider, kOnnxDomain,
+                                                                 IsInf, 10, Input, 0);
+  using EnabledDataTypes20 = ORT_OP_KERNEL_ARG_ENABLED_TYPE_LIST(kCpuExecutionProvider, kOnnxDomain,
+                                                                 IsInf, 20, Input, 0);
 
   explicit IsInf(const OpKernelInfo& info);
   Status Compute(OpKernelContext* context) const override;
@@ -30,14 +53,25 @@ class IsInf final : public OpKernel {
  private:
   int64_t detect_positive_{1};
   int64_t detect_negative_{1};
+  int opset_;
 };
 
-ONNX_CPU_OPERATOR_KERNEL(
+ONNX_CPU_OPERATOR_VERSIONED_KERNEL(
     IsInf,
     10,
+    19,
     KernelDefBuilder()
         .TypeConstraint("T1",
-                        BuildKernelDefConstraintsFromTypeList<IsInf::EnabledDataTypes>())
+                        BuildKernelDefConstraintsFromTypeList<IsInf::EnabledDataTypes10>())
+        .TypeConstraint("T2", DataTypeImpl::GetTensorType<bool>()),
+    IsInf);
+
+ONNX_CPU_OPERATOR_KERNEL(
+    IsInf,
+    20,
+    KernelDefBuilder()
+        .TypeConstraint("T1",
+                        BuildKernelDefConstraintsFromTypeList<IsInf::EnabledDataTypes20>())
         .TypeConstraint("T2", DataTypeImpl::GetTensorType<bool>()),
     IsInf);
 
@@ -46,6 +80,7 @@ IsInf::IsInf(const OpKernelInfo& info) : OpKernel(info) {
   ORT_ENFORCE(status.IsOK(), "Failed to obtain detect_positive");
   status = info.GetAttr("detect_negative", &detect_negative_);
   ORT_ENFORCE(status.IsOK(), "Failed to obtain detect_negative");
+  opset_ = info.node().SinceVersion();
 }
 
 namespace isinf_internal {
@@ -78,6 +113,49 @@ struct ComputeDispatchTarget {
     }
   }
 };
+
+#if !defined(DISABLE_FLOAT8_TYPES)
+template <>
+struct ComputeDispatchTarget<Float8E4M3FN> {
+  void operator()(const Tensor&, Tensor& Y, bool, bool) const {
+    EigenMap<bool>(Y).array() = false;
+  }
+};
+
+template <>
+struct ComputeDispatchTarget<Float8E4M3FNUZ> {
+  void operator()(const Tensor&, Tensor& Y, bool, bool) const {
+    EigenMap<bool>(Y).array() = false;
+  }
+};
+
+template <>
+struct ComputeDispatchTarget<Float8E5M2> {
+  void operator()(const Tensor& X, Tensor& Y, bool detect_positive, bool detect_negative) const {
+    auto& dims = X.Shape();
+    auto input = ConstEigenVectorMap<uint8_t>(static_cast<const uint8_t*>(static_cast<const void*>(X.Data<Float8E5M2>())), onnxruntime::narrow<size_t>(dims.Size()));
+    auto output = EigenMap<bool>(Y);
+
+    // S.11111.00
+    if (detect_positive && detect_negative) {
+      output.array() = input.array() == 0b01111100 || input.array() == 0b11111100;
+    } else if (detect_positive) {
+      output.array() = input.array() == 0b01111100;
+    } else if (detect_negative) {
+      output.array() = input.array() == 0b11111100;
+    } else {
+      output.array() = false;
+    }
+  }
+};
+
+template <>
+struct ComputeDispatchTarget<Float8E5M2FNUZ> {
+  void operator()(const Tensor&, Tensor& Y, bool, bool) const {
+    EigenMap<bool>(Y).array() = false;
+  }
+};
+#endif
 }  // namespace isinf_internal
 
 Status IsInf::Compute(OpKernelContext* context) const {
@@ -88,8 +166,13 @@ Status IsInf::Compute(OpKernelContext* context) const {
 
   using namespace isinf_internal;
 
-  utils::MLTypeCallDispatcherFromTypeList<EnabledDataTypes> dispatcher{X.GetElementType()};
-  dispatcher.Invoke<ComputeDispatchTarget>(X, Y, detect_positive_ != 0, detect_negative_ != 0);
+  if (opset_ < 20) {
+    utils::MLTypeCallDispatcherFromTypeList<EnabledDataTypes10> dispatcher{X.GetElementType()};
+    dispatcher.Invoke<ComputeDispatchTarget>(X, Y, detect_positive_ != 0, detect_negative_ != 0);
+  } else {
+    utils::MLTypeCallDispatcherFromTypeList<EnabledDataTypes20> dispatcher{X.GetElementType()};
+    dispatcher.Invoke<ComputeDispatchTarget>(X, Y, detect_positive_ != 0, detect_negative_ != 0);
+  }
 
   return Status::OK();
 }
diff --git a/onnxruntime/core/providers/cpu/tensor/isnan.cc b/onnxruntime/core/providers/cpu/tensor/isnan.cc
index 33d0f8eb6c1ae..34495e382278a 100644
--- a/onnxruntime/core/providers/cpu/tensor/isnan.cc
+++ b/onnxruntime/core/providers/cpu/tensor/isnan.cc
@@ -20,10 +20,20 @@ namespace onnxruntime {
           .TypeConstraint("T2", DataTypeImpl::GetTensorType<bool>()),     \
       IsNaN<data_type>);
 
+#define ADD_TYPED_ISNAN_OP_13(data_type)                                  \
+  ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL(                               \
+      IsNaN,                                                              \
+      13, 19,                                                             \
+      data_type,                                                          \
+      KernelDefBuilder()                                                  \
+          .TypeConstraint("T1", DataTypeImpl::GetTensorType<data_type>()) \
+          .TypeConstraint("T2", DataTypeImpl::GetTensorType<bool>()),     \
+      IsNaN<data_type>);
+
 #define ADD_TYPED_ISNAN_OP(data_type)                                     \
   ONNX_CPU_OPERATOR_TYPED_KERNEL(                                         \
       IsNaN,                                                              \
-      13,                                                                 \
+      20,                                                                 \
       data_type,                                                          \
       KernelDefBuilder()                                                  \
           .TypeConstraint("T1", DataTypeImpl::GetTensorType<data_type>()) \
@@ -33,10 +43,20 @@ namespace onnxruntime {
 ADD_TYPED_ISNAN_OP_9(float);
 ADD_TYPED_ISNAN_OP_9(double);
 ADD_TYPED_ISNAN_OP_9(MLFloat16);
+ADD_TYPED_ISNAN_OP_13(float);
+ADD_TYPED_ISNAN_OP_13(double);
+ADD_TYPED_ISNAN_OP_13(MLFloat16);
 ADD_TYPED_ISNAN_OP(float);
 ADD_TYPED_ISNAN_OP(double);
 ADD_TYPED_ISNAN_OP(MLFloat16);
 
+#if !defined(DISABLE_FLOAT8_TYPES)
+ADD_TYPED_ISNAN_OP(Float8E4M3FN);
+ADD_TYPED_ISNAN_OP(Float8E4M3FNUZ);
+ADD_TYPED_ISNAN_OP(Float8E5M2);
+ADD_TYPED_ISNAN_OP(Float8E5M2FNUZ);
+#endif
+
 template <typename T>
 Status IsNaN<T>::Compute(OpKernelContext* context) const {
   const auto* X_ptr = context->Input<Tensor>(0);
@@ -70,4 +90,63 @@ Status IsNaN<MLFloat16>::Compute(OpKernelContext* context) const {
 
   return Status::OK();
 }
+
+#if !defined(DISABLE_FLOAT8_TYPES)
+template <>
+Status IsNaN<Float8E4M3FN>::Compute(OpKernelContext* context) const {
+  const auto* X = context->Input<Tensor>(0);
+  auto& dims = X->Shape();
+  auto& Y = *context->Output(0, dims);
+
+  auto input = ConstEigenVectorMap<uint8_t>(static_cast<const uint8_t*>(static_cast<const void*>(X->Data<Float8E4M3FN>())), onnxruntime::narrow<size_t>(dims.Size()));
+  auto output = EigenMap<bool>(Y);
+
+  // S.1111.111
+  std::transform(input.begin(), input.end(), output.begin(), [](uint8_t c) { return (c & 0x7f) == 0x7f; });
+  return Status::OK();
+}
+
+template <>
+Status IsNaN<Float8E4M3FNUZ>::Compute(OpKernelContext* context) const {
+  const auto* X = context->Input<Tensor>(0);
+  auto X_data = X->Data<Float8E4M3FNUZ>();
+  auto& dims = X->Shape();
+  auto shape_size = dims.Size();
+  auto& Y = *context->Output(0, dims);
+
+  // 1.0000.000
+  EigenMap<bool>(Y) =
+      ConstEigenVectorMap<uint8_t>(static_cast<const uint8_t*>(static_cast<const void*>(X_data)), onnxruntime::narrow<size_t>(shape_size)).array() == 0x80;
+
+  return Status::OK();
+}
+
+template <>
+Status IsNaN<Float8E5M2>::Compute(OpKernelContext* context) const {
+  const auto* X = context->Input<Tensor>(0);
+  auto& dims = X->Shape();
+  auto& Y = *context->Output(0, dims);
+
+  auto input = ConstEigenVectorMap<uint8_t>(static_cast<const uint8_t*>(static_cast<const void*>(X->Data<Float8E5M2>())), onnxruntime::narrow<size_t>(dims.Size()));
+  auto output = EigenMap<bool>(Y);
+
+  // S.11111.{01, 10, 11}
+  std::transform(input.begin(), input.end(), output.begin(), [](uint8_t c) { return ((c & 0x7c) == 0x7c) && ((c & 0x03) != 0x00); });
+  return Status::OK();
+}
+
+template <>
+Status IsNaN<Float8E5M2FNUZ>::Compute(OpKernelContext* context) const {
+  const auto* X = context->Input<Tensor>(0);
+  auto X_data = X->Data<Float8E5M2FNUZ>();
+  auto& dims = X->Shape();
+  auto shape_size = dims.Size();
+  auto& Y = *context->Output(0, dims);
+
+  // 1.0000.000
+  EigenMap<bool>(Y) = ConstEigenVectorMap<uint8_t>(static_cast<const uint8_t*>(static_cast<const void*>(X_data)), onnxruntime::narrow<size_t>(shape_size)).array() == 0x80;
+
+  return Status::OK();
+}
+#endif
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/cpu/tensor/slice.cc b/onnxruntime/core/providers/cpu/tensor/slice.cc
index a8cb74a62e02d..e0cd74343b83d 100644
--- a/onnxruntime/core/providers/cpu/tensor/slice.cc
+++ b/onnxruntime/core/providers/cpu/tensor/slice.cc
@@ -76,9 +76,9 @@ ONNX_CPU_OPERATOR_KERNEL(
 // e.g. if input shape is { 2, 2, 2, 2, 2 }, output shape is { 2, 2, 1, 2, 2 },
 // and the 'steps' value for all dims is 1 except dim-2, then the input shape is coalesced to { 4, 2, 4 }
 // and the output shape is coalesced to { 4, 1, 4 }.
-static void FlattenOutputDims(gsl::span<const int64_t> input_dimensions, gsl::span<const int64_t> output_dims,
-                              TensorShapeVector& starts, TensorShapeVector& ends, TensorShapeVector& steps,
-                              TensorShapeVector*& p_flattened_input_dims, TensorShapeVector*& p_flattened_output_dims) {
+Status SliceBase::FlattenOutputDims(gsl::span<const int64_t> input_dimensions, gsl::span<const int64_t> output_dims,
+                                    TensorShapeVector& starts, TensorShapeVector& ends, TensorShapeVector& steps,
+                                    TensorShapeVector*& p_flattened_input_dims, TensorShapeVector*& p_flattened_output_dims) {
   size_t cur = 0;
   size_t nxt = 0;
   while (true) {
@@ -131,6 +131,8 @@ static void FlattenOutputDims(gsl::span<const int64_t> input_dimensions, gsl::sp
     ends.resize(cur);
     steps.resize(cur);
   }
+
+  return Status::OK();
 }
 
 // Slice V1-9 & DynamicSlice
@@ -138,9 +140,9 @@ Status SliceBase::PrepareForCompute(gsl::span<const int64_t> raw_starts, gsl::sp
                                     gsl::span<const int64_t> raw_axes,
                                     SliceOp::PrepareForComputeMetadata& compute_metadata) {
   ORT_RETURN_IF_ERROR(SliceOp::PrepareForComputeHelper(raw_starts, raw_ends, raw_axes, compute_metadata));
-  FlattenOutputDims(compute_metadata.input_dimensions_, compute_metadata.output_dims_, compute_metadata.starts_,
-                    compute_metadata.ends_, compute_metadata.steps_, compute_metadata.p_flattened_input_dims_,
-                    compute_metadata.p_flattened_output_dims_);
+  ORT_RETURN_IF_ERROR(FlattenOutputDims(compute_metadata.input_dimensions_, compute_metadata.output_dims_, compute_metadata.starts_,
+                                        compute_metadata.ends_, compute_metadata.steps_, compute_metadata.p_flattened_input_dims_,
+                                        compute_metadata.p_flattened_output_dims_));
   return Status::OK();
 }
 
@@ -149,9 +151,9 @@ Status SliceBase::PrepareForCompute(gsl::span<const int64_t> raw_starts, gsl::sp
                                     gsl::span<const int64_t> raw_axes, gsl::span<const int64_t> raw_steps,
                                     SliceOp::PrepareForComputeMetadata& compute_metadata) {
   ORT_RETURN_IF_ERROR(SliceOp::PrepareForComputeHelper(raw_starts, raw_ends, raw_axes, raw_steps, compute_metadata));
-  FlattenOutputDims(compute_metadata.input_dimensions_, compute_metadata.output_dims_, compute_metadata.starts_,
-                    compute_metadata.ends_, compute_metadata.steps_, compute_metadata.p_flattened_input_dims_,
-                    compute_metadata.p_flattened_output_dims_);
+  ORT_RETURN_IF_ERROR(FlattenOutputDims(compute_metadata.input_dimensions_, compute_metadata.output_dims_, compute_metadata.starts_,
+                                        compute_metadata.ends_, compute_metadata.steps_, compute_metadata.p_flattened_input_dims_,
+                                        compute_metadata.p_flattened_output_dims_));
 
   return Status::OK();
 }
diff --git a/onnxruntime/core/providers/cpu/tensor/slice.h b/onnxruntime/core/providers/cpu/tensor/slice.h
index 28e76aca4ea21..1503a87931bcf 100644
--- a/onnxruntime/core/providers/cpu/tensor/slice.h
+++ b/onnxruntime/core/providers/cpu/tensor/slice.h
@@ -38,6 +38,10 @@ class SliceBase {
                                      TensorShapeVector& input_axes,
                                      TensorShapeVector& input_steps);
 
+  static Status FlattenOutputDims(gsl::span<const int64_t> input_dimensions, gsl::span<const int64_t> output_dims,
+                                  TensorShapeVector& starts, TensorShapeVector& ends, TensorShapeVector& steps,
+                                  TensorShapeVector*& p_flattened_input_dims, TensorShapeVector*& p_flattened_output_dims);
+
  protected:
   SliceBase(const OpKernelInfo& info, bool dynamic = false)
       : dynamic_(dynamic) {
diff --git a/onnxruntime/core/providers/cpu/tensor/upsamplebase.h b/onnxruntime/core/providers/cpu/tensor/upsamplebase.h
index c13c9d42dd392..0b3ce6f477843 100644
--- a/onnxruntime/core/providers/cpu/tensor/upsamplebase.h
+++ b/onnxruntime/core/providers/cpu/tensor/upsamplebase.h
@@ -239,7 +239,7 @@ class UpsampleBase {
     if (coordinate_transform_mode_name == "half_pixel_symmetric") {
       return HALF_PIXEL_SYMMETRIC;
     }
-    ORT_THROW("coordinate_transform_mode:[" + coordinate_transform_mode_name + "] is not supportted!");
+    ORT_THROW("coordinate_transform_mode:[" + coordinate_transform_mode_name + "] is not supported!");
   }
 
   GetOriginalCoordinateFunc GetOriginalCoordinateFromResizedCoordinate(
@@ -352,7 +352,7 @@ class UpsampleBase {
                             (scales.size() == 4 && scales[0] == 1 && scales[3] == 1) ||
                             scales.size() == 3 ||
                             (scales.size() == 5 && scales[0] == 1 && scales[1] == 1),
-                        "'Linear' mode only support:\n"
+                        "'Linear' mode only supports:\n"
                         "  * 2-D inputs or\n"
                         "  * 3-D inputs ('Bilinear', 'Trilinear') or\n"
                         "  * 4-D inputs with the corresponding outermost 2 scale values being 1"
diff --git a/onnxruntime/core/providers/cuda/cuda_common.cc b/onnxruntime/core/providers/cuda/cuda_common.cc
index 57477f167c555..33f2938940e4d 100644
--- a/onnxruntime/core/providers/cuda/cuda_common.cc
+++ b/onnxruntime/core/providers/cuda/cuda_common.cc
@@ -27,5 +27,91 @@ const HalfGemmOptions* HalfGemmOptions::GetInstance() {
   return &instance;
 }
 
+const char* cublasGetErrorEnum(cublasStatus_t error) {
+  switch (error) {
+    case CUBLAS_STATUS_SUCCESS:
+      return "CUBLAS_STATUS_SUCCESS";
+    case CUBLAS_STATUS_NOT_INITIALIZED:
+      return "CUBLAS_STATUS_NOT_INITIALIZED";
+    case CUBLAS_STATUS_ALLOC_FAILED:
+      return "CUBLAS_STATUS_ALLOC_FAILED";
+    case CUBLAS_STATUS_INVALID_VALUE:
+      return "CUBLAS_STATUS_INVALID_VALUE";
+    case CUBLAS_STATUS_ARCH_MISMATCH:
+      return "CUBLAS_STATUS_ARCH_MISMATCH";
+    case CUBLAS_STATUS_MAPPING_ERROR:
+      return "CUBLAS_STATUS_MAPPING_ERROR";
+    case CUBLAS_STATUS_EXECUTION_FAILED:
+      return "CUBLAS_STATUS_EXECUTION_FAILED";
+    case CUBLAS_STATUS_INTERNAL_ERROR:
+      return "CUBLAS_STATUS_INTERNAL_ERROR";
+    case CUBLAS_STATUS_NOT_SUPPORTED:
+      return "CUBLAS_STATUS_NOT_SUPPORTED";
+    case CUBLAS_STATUS_LICENSE_ERROR:
+      return "CUBLAS_STATUS_LICENSE_ERROR";
+    default:
+      return "<unknown>";
+  }
+}
+
+const char* CudaDataTypeToString(cudaDataType_t dt) {
+  switch (dt) {
+    case CUDA_R_16F:
+      return "CUDA_R_16F";
+    case CUDA_R_16BF:
+      return "CUDA_R_16BF";
+    case CUDA_R_32F:
+      return "CUDA_R_32F";
+#if !defined(DISABLE_FLOAT8_TYPES)
+    // Note: CUDA_R_8F_E4M3 is defined with CUDA>=11.8
+    case CUDA_R_8F_E4M3:
+      return "CUDA_R_8F_E4M3";
+    case CUDA_R_8F_E5M2:
+      return "CUDA_R_8F_E5M2";
+#endif
+    default:
+      return "<unknown>";
+  }
+}
+
+const char* CublasComputeTypeToString(cublasComputeType_t ct) {
+  switch (ct) {
+    case CUBLAS_COMPUTE_16F:
+      return "CUBLAS_COMPUTE_16F";
+    case CUBLAS_COMPUTE_32F:
+      return "CUBLAS_COMPUTE_32F";
+    case CUBLAS_COMPUTE_32F_FAST_16F:
+      return "CUBLAS_COMPUTE_32F_FAST_16F";
+    case CUBLAS_COMPUTE_32F_FAST_16BF:
+      return "CUBLAS_COMPUTE_32F_FAST_16BF";
+    case CUBLAS_COMPUTE_32F_FAST_TF32:
+      return "CUBLAS_COMPUTE_32F_FAST_TF32";
+    case CUBLAS_COMPUTE_64F:
+      return "CUBLAS_COMPUTE_64F";
+    default:
+      return "<unknown>";
+  }
+}
+
+// It must exist somewhere already.
+cudaDataType_t ToCudaDataType(int32_t element_type) {
+  switch (element_type) {
+    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
+      return CUDA_R_32F;
+    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
+      return CUDA_R_16F;
+    case ONNX_NAMESPACE::TensorProto_DataType_BFLOAT16:
+      return CUDA_R_16BF;
+#if !defined(DISABLE_FLOAT8_TYPES)
+    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT8E4M3FN:
+      return CUDA_R_8F_E4M3;
+    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT8E5M2:
+      return CUDA_R_8F_E5M2;
+#endif
+    default:
+      ORT_THROW("Unexpected element_type=", element_type, ".");
+  }
+}
+
 }  // namespace cuda
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/cuda/cuda_common.h b/onnxruntime/core/providers/cuda/cuda_common.h
index fa258961f1155..707099bac3ce0 100644
--- a/onnxruntime/core/providers/cuda/cuda_common.h
+++ b/onnxruntime/core/providers/cuda/cuda_common.h
@@ -11,6 +11,7 @@
 
 #include "core/providers/shared_library/provider_api.h"
 #include "core/common/status.h"
+#include "core/framework/float8.h"
 #include "core/framework/float16.h"
 #include "core/providers/cuda/cuda_pch.h"
 #include "core/providers/cuda/shared_inc/cuda_call.h"
@@ -48,6 +49,37 @@ class ToCudaType<MLFloat16> {
   }
 };
 
+template <>
+class ToCudaType<BFloat16> {
+ public:
+  typedef BFloat16 MappedType;
+  static MappedType FromFloat(float f) {
+    return MappedType(f);
+  }
+};
+
+#if !defined(DISABLE_FLOAT8_TYPES)
+
+template <>
+class ToCudaType<Float8E4M3FN> {
+ public:
+  typedef Float8E4M3FN MappedType;
+  static MappedType FromFloat(float f) {
+    return MappedType(f);
+  }
+};
+
+template <>
+class ToCudaType<Float8E5M2> {
+ public:
+  typedef Float8E5M2 MappedType;
+  static MappedType FromFloat(float f) {
+    return MappedType(f);
+  }
+};
+
+#endif
+
 inline bool CalculateFdmStrides(gsl::span<fast_divmod> p, const std::vector<int64_t>& dims) {
   int stride = 1;
   if (dims.empty() || p.size() < dims.size())
@@ -152,5 +184,13 @@ class HalfGemmOptions {
   static HalfGemmOptions instance;
 };
 
+const char* cublasGetErrorEnum(cublasStatus_t error);
+
+const char* CudaDataTypeToString(cudaDataType_t dt);
+
+const char* CublasComputeTypeToString(cublasComputeType_t ct);
+
+cudaDataType_t ToCudaDataType(int32_t element_type);
+
 }  // namespace cuda
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc
index ad892eab3b843..d8a0792209b0f 100644
--- a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc
+++ b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc
@@ -1,4 +1,5 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
 // Licensed under the MIT License.
 
 #include "core/common/inlined_containers.h"
@@ -15,6 +16,10 @@
 #include "contrib_ops/cuda/cuda_contrib_kernels.h"
 #endif
 
+#ifdef ENABLE_CUDA_NHWC_OPS
+#include "core/providers/cuda/cuda_nhwc_kernels.h"
+#endif
+
 #ifdef ENABLE_TRAINING_OPS
 #include "orttraining/training_ops/cuda/cuda_training_kernels.h"
 #endif
@@ -233,6 +238,10 @@ CUDAExecutionProvider::CUDAExecutionProvider(const CUDAExecutionProviderInfo& in
     : IExecutionProvider{onnxruntime::kCudaExecutionProvider, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, info.device_id)},
       info_{info},
       tuning_context_(this, &info_.tunable_op) {
+#ifndef ENABLE_CUDA_NHWC_OPS
+  ORT_ENFORCE(info_.prefer_nhwc == 0, "This build does not support NHWC layout");
+#endif
+
   CUDA_CALL_THROW(cudaSetDevice(info_.device_id));
 
   // must wait GPU idle, otherwise cudaGetDeviceProperties might fail
@@ -250,7 +259,7 @@ CUDAExecutionProvider::CUDAExecutionProvider(const CUDAExecutionProviderInfo& in
     if (info.external_allocator_info.UseExternalAllocator()) {
       use_ep_level_unified_stream_ = true;
       stream_ = nullptr;
-    } else if (info.enable_cuda_graph) {
+    } else if (info.enable_cuda_graph || info.use_ep_level_unified_stream) {
       // current cuda graph implementation only works with single stream
       // use EP level unified stream for all the reqeust
       CUDA_CALL_THROW(cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking));
@@ -271,6 +280,10 @@ CUDAExecutionProvider::CUDAExecutionProvider(const CUDAExecutionProviderInfo& in
 #endif
 }
 
+DataLayout CUDAExecutionProvider::GetPreferredLayout() const {
+  return this->IsNHWCPreferred() ? DataLayout::NHWC : DataLayout::NCHW;
+}
+
 CUDAExecutionProvider::~CUDAExecutionProvider() {
   // clean up thread local context caches
   {
@@ -373,7 +386,7 @@ Status CUDAExecutionProvider::OnRunStart() {
   // always set CUDA device when session::Run() in case it runs in a worker thread
   CUDA_RETURN_IF_ERROR(cudaSetDevice(GetDeviceId()));
   if (IsGraphCaptureEnabled() && GetPerThreadContext().IsGraphCaptureAllowed() && !GetPerThreadContext().IsGraphCaptured()) {
-    LOGS_DEFAULT(INFO) << "Capturing the cuda graph for this model";
+    LOGS(*GetLogger(), INFO) << "Capturing the cuda graph for this model";
     GetPerThreadContext().CaptureBegin();
   }
   return Status::OK();
@@ -632,51 +645,54 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kO
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, float, GlobalMaxPool);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, double, GlobalMaxPool);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, MLFloat16, GlobalMaxPool);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, float, ArgMax);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, double, ArgMax);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ArgMax);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, float, ArgMin);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, double, ArgMin);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ArgMin);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, float, ReduceL1);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, double, ReduceL1);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceL1);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceL1);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, float, ReduceL2);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, double, ReduceL2);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceL2);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceL2);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, float, ReduceMax);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, double, ReduceMax);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceMax);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceMax);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, int64_t, ReduceMax);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, float, ReduceMean);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, double, ReduceMean);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceMean);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceMean);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, float, ReduceMin);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, double, ReduceMin);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceMin);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceMin);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, float, ReduceProd);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, double, ReduceProd);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceProd);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceProd);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, float, ReduceSum);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, double, ReduceSum);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceSum);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceSum);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, int64_t, ReduceSum);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, float, ReduceLogSum);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, double, ReduceLogSum);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceLogSum);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, float, ReduceSumSquare);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, double, ReduceSumSquare);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceSumSquare);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, float, ReduceLogSumExp);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, double, ReduceLogSumExp);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceLogSumExp);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 11, float, ArgMax);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 11, double, ArgMax);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 11, MLFloat16, ArgMax);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 11, float, ArgMin);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 11, double, ArgMin);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 11, MLFloat16, ArgMin);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, float, ReduceL1);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, double, ReduceL1);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, MLFloat16, ReduceL1);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, int32_t, ReduceL1);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, float, ReduceL2);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, double, ReduceL2);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, MLFloat16, ReduceL2);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, int32_t, ReduceL2);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, float, ReduceMax);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, double, ReduceMax);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, MLFloat16, ReduceMax);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, int32_t, ReduceMax);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, int64_t, ReduceMax);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, float, ReduceMean);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, double, ReduceMean);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, MLFloat16, ReduceMean);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, int32_t, ReduceMean);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, float, ReduceMin);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, double, ReduceMin);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, MLFloat16, ReduceMin);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, int32_t, ReduceMin);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, int64_t, ReduceMin);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, int8_t, ReduceMin);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, uint8_t, ReduceMin);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, float, ReduceProd);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, double, ReduceProd);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, MLFloat16, ReduceProd);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, int32_t, ReduceProd);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 12, float, ReduceSum);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 12, double, ReduceSum);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 12, MLFloat16, ReduceSum);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 12, int32_t, ReduceSum);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 12, int64_t, ReduceSum);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, float, ReduceLogSum);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, double, ReduceLogSum);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, MLFloat16, ReduceLogSum);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, float, ReduceSumSquare);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, double, ReduceSumSquare);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, MLFloat16, ReduceSumSquare);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, float, ReduceLogSumExp);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, double, ReduceLogSumExp);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, MLFloat16, ReduceLogSumExp);
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 6, 8, float, Cast);
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 6, 8, double, Cast);
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 6, 8, MLFloat16, Cast);
@@ -811,12 +827,6 @@ class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDom
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 10, 12, Mod);
 
 // opset 11
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, float, ArgMax);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, double, ArgMax);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, MLFloat16, ArgMax);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, float, ArgMin);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, double, ArgMin);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, MLFloat16, ArgMin);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, Compress);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, Concat);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, Flatten);
@@ -830,45 +840,6 @@ class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDom
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, Loop);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, NonMaxSuppression);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, Range);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, float, ReduceL1);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, double, ReduceL1);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceL1);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, int32_t, ReduceL1);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, float, ReduceL2);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, double, ReduceL2);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceL2);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, int32_t, ReduceL2);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, float, ReduceLogSum);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, double, ReduceLogSum);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceLogSum);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, float, ReduceLogSumExp);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, double, ReduceLogSumExp);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceLogSumExp);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, float, ReduceMax);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, double, ReduceMax);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, MLFloat16, ReduceMax);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, int32_t, ReduceMax);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, int64_t, ReduceMax);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, float, ReduceMean);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, double, ReduceMean);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceMean);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, int32_t, ReduceMean);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, float, ReduceMin);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, double, ReduceMin);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, MLFloat16, ReduceMin);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, int32_t, ReduceMin);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, float, ReduceProd);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, double, ReduceProd);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceProd);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, int32_t, ReduceProd);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, float, ReduceSum);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, double, ReduceSum);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceSum);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, int32_t, ReduceSum);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, int64_t, ReduceSum);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, float, ReduceSumSquare);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, double, ReduceSumSquare);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceSumSquare);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 15, Scan);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, ScatterElements);
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, int32_t, Slice);
@@ -946,22 +917,6 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain,
 
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, Pow);
 
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, float, ReduceMax);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, double, ReduceMax);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, MLFloat16, ReduceMax);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, int32_t, ReduceMax);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, int64_t, ReduceMax);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, int8_t, ReduceMax);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, uint8_t, ReduceMax);
-
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, float, ReduceMin);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, double, ReduceMin);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, MLFloat16, ReduceMin);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, int32_t, ReduceMin);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, int64_t, ReduceMin);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, int8_t, ReduceMin);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, uint8_t, ReduceMin);
-
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, int64_t, GatherND);
 
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, Dropout);
@@ -1016,6 +971,7 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain,
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, Neg);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, Neg);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, Neg);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, BFloat16, Neg);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, Floor);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, Floor);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, Floor);
@@ -1115,50 +1071,36 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain,
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, Gemm);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, Gemm);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, Gemm);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, ReduceL1);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, ReduceL1);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceL1);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, int32_t, ReduceL1);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, ReduceL2);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, ReduceL2);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceL2);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, int32_t, ReduceL2);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, ReduceLogSum);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, ReduceLogSum);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceLogSum);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, ReduceLogSumExp);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, ReduceLogSumExp);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceLogSumExp);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, ReduceMax);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, ReduceMax);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceMax);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, int32_t, ReduceMax);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, int64_t, ReduceMax);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, int8_t, ReduceMax);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, uint8_t, ReduceMax);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, ReduceMean);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, ReduceMean);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceMean);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, int32_t, ReduceMean);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, 13, float, ReduceMin);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, 13, double, ReduceMin);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, 13, MLFloat16, ReduceMin);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, 13, int32_t, ReduceMin);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, 13, int64_t, ReduceMin);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, 13, int8_t, ReduceMin);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, 13, uint8_t, ReduceMin);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, ReduceProd);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, ReduceProd);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceProd);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, int32_t, ReduceProd);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, float, ReduceL1);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, double, ReduceL1);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, MLFloat16, ReduceL1);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, int32_t, ReduceL1);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, float, ReduceL2);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, double, ReduceL2);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, MLFloat16, ReduceL2);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, int32_t, ReduceL2);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, float, ReduceLogSum);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, double, ReduceLogSum);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, MLFloat16, ReduceLogSum);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, float, ReduceLogSumExp);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, double, ReduceLogSumExp);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, MLFloat16, ReduceLogSumExp);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, float, ReduceMean);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, double, ReduceMean);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, MLFloat16, ReduceMean);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, int32_t, ReduceMean);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, float, ReduceProd);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, double, ReduceProd);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, MLFloat16, ReduceProd);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, int32_t, ReduceProd);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, ReduceSum);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, ReduceSum);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceSum);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, int32_t, ReduceSum);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, int64_t, ReduceSum);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, ReduceSumSquare);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, ReduceSumSquare);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceSumSquare);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, float, ReduceSumSquare);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, double, ReduceSumSquare);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, MLFloat16, ReduceSumSquare);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, int64_t, GatherND);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, Dropout);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, Resize);
@@ -1257,13 +1199,13 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain,
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, 14, float, BatchNormalization);
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, 14, double, BatchNormalization);
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, 14, MLFloat16, BatchNormalization);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, float, ReduceMin);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, double, ReduceMin);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, MLFloat16, ReduceMin);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, int32_t, ReduceMin);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, int8_t, ReduceMin);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, uint8_t, ReduceMin);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, int64_t, ReduceMin);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, float, ReduceMin);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, double, ReduceMin);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, MLFloat16, ReduceMin);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, int32_t, ReduceMin);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, int8_t, ReduceMin);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, uint8_t, ReduceMin);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, int64_t, ReduceMin);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, Trilu);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, BFloat16, Add);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, BFloat16, Sub);
@@ -1287,6 +1229,7 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain,
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 16, MLFloat16, PRelu);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 16, 18, Scan);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 16, MLFloat16, Where);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 16, BFloat16, Where);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 16, float, Where);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 16, double_t, Where);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 16, int32_t, Where);
@@ -1316,6 +1259,12 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain,
 // Opset 18
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, Split);
 
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, float, ReduceMax);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, double, ReduceMax);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, MLFloat16, ReduceMax);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, int32_t, ReduceMax);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, int64_t, ReduceMax);
+
 // Opset 19
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 19, float, Cast);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 19, double, Cast);
@@ -1581,51 +1530,51 @@ static Status RegisterCudaKernels(KernelRegistry& kernel_registry) {
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, float, GlobalMaxPool)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, double, GlobalMaxPool)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, MLFloat16, GlobalMaxPool)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, float, ArgMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, double, ArgMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ArgMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, float, ArgMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, double, ArgMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ArgMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, float, ReduceL1)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, double, ReduceL1)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceL1)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceL1)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, float, ReduceL2)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, double, ReduceL2)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceL2)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceL2)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, float, ReduceMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, double, ReduceMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, int64_t, ReduceMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, float, ReduceMean)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, double, ReduceMean)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceMean)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceMean)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, float, ReduceMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, double, ReduceMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, float, ReduceProd)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, double, ReduceProd)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceProd)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceProd)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, float, ReduceSum)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, double, ReduceSum)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceSum)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceSum)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, int64_t, ReduceSum)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, float, ReduceLogSum)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, double, ReduceLogSum)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceLogSum)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, float, ReduceSumSquare)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, double, ReduceSumSquare)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceSumSquare)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, float, ReduceLogSumExp)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, double, ReduceLogSumExp)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceLogSumExp)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 11, float, ArgMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 11, double, ArgMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 11, MLFloat16, ArgMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, float, ReduceL1)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, double, ReduceL1)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, MLFloat16, ReduceL1)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, int32_t, ReduceL1)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, float, ReduceL2)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, double, ReduceL2)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, MLFloat16, ReduceL2)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, int32_t, ReduceL2)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, float, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, double, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, MLFloat16, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, int32_t, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, int64_t, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, float, ReduceMean)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, double, ReduceMean)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, MLFloat16, ReduceMean)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, int32_t, ReduceMean)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, float, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, double, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, MLFloat16, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, int32_t, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, int64_t, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, int8_t, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, uint8_t, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, float, ReduceProd)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, double, ReduceProd)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, MLFloat16, ReduceProd)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, int32_t, ReduceProd)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 12, float, ReduceSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 12, double, ReduceSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 12, MLFloat16, ReduceSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 12, int32_t, ReduceSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 12, int64_t, ReduceSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, float, ReduceLogSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, double, ReduceLogSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, MLFloat16, ReduceLogSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, float, ReduceSumSquare)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, double, ReduceSumSquare)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, MLFloat16, ReduceSumSquare)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, float, ReduceLogSumExp)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, double, ReduceLogSumExp)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 17, MLFloat16, ReduceLogSumExp)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 6, 8, float, Cast)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 6, 8, double, Cast)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 6, 8, MLFloat16, Cast)>,
@@ -1764,12 +1713,9 @@ static Status RegisterCudaKernels(KernelRegistry& kernel_registry) {
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 10, 12, Mod)>,
 
     // opset 11
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, float, ArgMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, double, ArgMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, MLFloat16, ArgMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, float, ArgMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, double, ArgMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, MLFloat16, ArgMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 11, float, ArgMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 11, double, ArgMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 11, MLFloat16, ArgMax)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, Compress)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, Concat)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, Flatten)>,
@@ -1783,45 +1729,6 @@ static Status RegisterCudaKernels(KernelRegistry& kernel_registry) {
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, Loop)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, NonMaxSuppression)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, Range)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, float, ReduceL1)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, double, ReduceL1)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceL1)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, int32_t, ReduceL1)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, float, ReduceL2)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, double, ReduceL2)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceL2)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, int32_t, ReduceL2)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, float, ReduceLogSum)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, double, ReduceLogSum)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceLogSum)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, float, ReduceLogSumExp)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, double, ReduceLogSumExp)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceLogSumExp)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, float, ReduceMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, double, ReduceMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, MLFloat16, ReduceMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, int32_t, ReduceMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, int64_t, ReduceMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, float, ReduceMean)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, double, ReduceMean)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceMean)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, int32_t, ReduceMean)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, float, ReduceMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, double, ReduceMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, MLFloat16, ReduceMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, int32_t, ReduceMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, float, ReduceProd)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, double, ReduceProd)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceProd)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, int32_t, ReduceProd)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, float, ReduceSum)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, double, ReduceSum)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceSum)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, int32_t, ReduceSum)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, int64_t, ReduceSum)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, float, ReduceSumSquare)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, double, ReduceSumSquare)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceSumSquare)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 15, Scan)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, ScatterElements)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, int32_t, Slice)>,
@@ -1895,22 +1802,6 @@ static Status RegisterCudaKernels(KernelRegistry& kernel_registry) {
 
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, Pow)>,
 
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, float, ReduceMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, double, ReduceMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, MLFloat16, ReduceMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, int32_t, ReduceMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, int64_t, ReduceMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, int8_t, ReduceMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, uint8_t, ReduceMax)>,
-
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, float, ReduceMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, double, ReduceMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, MLFloat16, ReduceMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, int32_t, ReduceMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, int64_t, ReduceMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, int8_t, ReduceMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, uint8_t, ReduceMin)>,
-
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, int64_t, GatherND)>,
 
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 12, 12, Dropout)>,
@@ -1965,6 +1856,7 @@ static Status RegisterCudaKernels(KernelRegistry& kernel_registry) {
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, Neg)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, Neg)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, Neg)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, BFloat16, Neg)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, Floor)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, Floor)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, Floor)>,
@@ -2064,50 +1956,36 @@ static Status RegisterCudaKernels(KernelRegistry& kernel_registry) {
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, Gemm)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, Gemm)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, Gemm)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, ReduceL1)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, ReduceL1)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceL1)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, int32_t, ReduceL1)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, ReduceL2)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, ReduceL2)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceL2)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, int32_t, ReduceL2)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, ReduceLogSum)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, ReduceLogSum)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceLogSum)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, ReduceLogSumExp)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, ReduceLogSumExp)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceLogSumExp)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, ReduceMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, ReduceMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, int32_t, ReduceMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, int64_t, ReduceMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, int8_t, ReduceMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, uint8_t, ReduceMax)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, ReduceMean)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, ReduceMean)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceMean)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, int32_t, ReduceMean)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, 13, float, ReduceMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, 13, double, ReduceMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, 13, MLFloat16, ReduceMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, 13, int32_t, ReduceMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, 13, int64_t, ReduceMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, 13, int8_t, ReduceMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, 13, uint8_t, ReduceMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, ReduceProd)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, ReduceProd)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceProd)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, int32_t, ReduceProd)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, float, ReduceL1)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, double, ReduceL1)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, MLFloat16, ReduceL1)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, int32_t, ReduceL1)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, float, ReduceL2)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, double, ReduceL2)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, MLFloat16, ReduceL2)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, int32_t, ReduceL2)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, float, ReduceLogSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, double, ReduceLogSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, MLFloat16, ReduceLogSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, float, ReduceLogSumExp)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, double, ReduceLogSumExp)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, MLFloat16, ReduceLogSumExp)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, float, ReduceMean)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, double, ReduceMean)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, MLFloat16, ReduceMean)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, int32_t, ReduceMean)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, float, ReduceProd)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, double, ReduceProd)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, MLFloat16, ReduceProd)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, int32_t, ReduceProd)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, ReduceSum)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, ReduceSum)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceSum)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, int32_t, ReduceSum)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, int64_t, ReduceSum)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, ReduceSumSquare)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, ReduceSumSquare)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceSumSquare)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, float, ReduceSumSquare)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, double, ReduceSumSquare)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, MLFloat16, ReduceSumSquare)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, int64_t, GatherND)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, Dropout)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, Resize)>,
@@ -2206,13 +2084,13 @@ static Status RegisterCudaKernels(KernelRegistry& kernel_registry) {
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, 14, float, BatchNormalization)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, 14, double, BatchNormalization)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, 14, MLFloat16, BatchNormalization)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, float, ReduceMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, double, ReduceMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, MLFloat16, ReduceMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, int32_t, ReduceMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, int8_t, ReduceMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, uint8_t, ReduceMin)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, int64_t, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, float, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, double, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, MLFloat16, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, int32_t, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, int8_t, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, uint8_t, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, int64_t, ReduceMin)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, BFloat16, Add)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, BFloat16, Sub)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 14, BFloat16, Mul)>,
@@ -2236,6 +2114,7 @@ static Status RegisterCudaKernels(KernelRegistry& kernel_registry) {
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 16, MLFloat16, PRelu)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 16, 18, Scan)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 16, MLFloat16, Where)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 16, BFloat16, Where)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 16, float, Where)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 16, double_t, Where)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 16, int32_t, Where)>,
@@ -2264,6 +2143,11 @@ static Status RegisterCudaKernels(KernelRegistry& kernel_registry) {
 
     // Opset 18
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, Split)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, float, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, double, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, MLFloat16, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, int32_t, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 18, int64_t, ReduceMax)>,
 
     // Opset 19
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 19, float, Cast)>,
@@ -2330,6 +2214,10 @@ static Status RegisterCudaKernels(KernelRegistry& kernel_registry) {
   ORT_RETURN_IF_ERROR(::onnxruntime::contrib::cuda::RegisterCudaContribKernels(kernel_registry));
 #endif
 
+#ifdef ENABLE_CUDA_NHWC_OPS
+  ORT_RETURN_IF_ERROR(::onnxruntime::cuda::RegisterCudaNhwcKernels(kernel_registry));
+#endif
+
 #ifdef ENABLE_TRAINING_OPS
   ORT_RETURN_IF_ERROR(::onnxruntime::cuda::RegisterCudaTrainingKernels(kernel_registry));
 #endif
@@ -2410,7 +2298,7 @@ static bool RNNNeedFallbackToCPU(const onnxruntime::Node& node,
   return false;
 }
 
-static bool ConvTransposeNeedFallbackToCPU(const onnxruntime::Node& node) {
+static bool ConvTransposeNeedFallbackToCPU(const onnxruntime::Node& node, const logging::Logger& logger) {
   const auto& node_attributes = node.GetAttributes();
   // Check attributes
   for (auto& attr : node_attributes) {
@@ -2428,7 +2316,7 @@ static bool ConvTransposeNeedFallbackToCPU(const onnxruntime::Node& node) {
       int rank = pads_size / 2;
       for (int i = 0; i < rank; i++) {
         if (pads.Get(i) != pads.Get(i + rank)) {
-          LOGS_DEFAULT(WARNING) << "Dropping the ConvTranspose node: " << node.Name()
+          LOGS(logger, WARNING) << "Dropping the ConvTranspose node: " << node.Name()
                                 << " to CPU because it requires asymmetric padding which the CUDA EP"
                                 << " currently does not support";
           return true;
@@ -2450,7 +2338,7 @@ static bool ConvTransposeNeedFallbackToCPU(const onnxruntime::Node& node) {
       // symmetric padding.
       // TODO: Remove this after we have supported asymmetric padding in the CUDA ConvTranspose kernel
       if (auto_pad_attr == "SAME_UPPER" || auto_pad_attr == "SAME_LOWER") {
-        LOGS_DEFAULT(WARNING) << "Dropping the ConvTranspose node: " << node.Name()
+        LOGS(logger, WARNING) << "Dropping the ConvTranspose node: " << node.Name()
                               << " to CPU because it uses the auto_pad attribute which may lead to asymmetric padding which"
                               << " the CUDA EP currently does not support";
         return true;
@@ -2487,6 +2375,9 @@ std::vector<std::unique_ptr<ComputeCapability>>
 CUDAExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph,
                                      const IKernelLookup& kernel_lookup) const {
   InlinedVector<NodeIndex> candidates;
+  // A subset of the above vector. A subset of the tentative_nodes might be moved to CPU.
+  InlinedVector<NodeIndex> tentative_nodes;
+  const logging::Logger& logger = *GetLogger();
   for (auto& node_index : graph.GetNodesInTopologicalOrder()) {
     const auto* p_node = graph.GetNode(node_index);
     if (p_node == nullptr)
@@ -2494,13 +2385,16 @@ CUDAExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph,
 
     const auto& node = *p_node;
     if (!node.GetExecutionProviderType().empty()) {
+      if (node.GetExecutionProviderType() == kCudaExecutionProvider) {
+        candidates.push_back(node.Index());
+      }
       continue;
     }
 
     const KernelCreateInfo* cuda_kernel_def = kernel_lookup.LookUpKernel(node);
     // none of the provided registries has a CUDA kernel for this node
     if (cuda_kernel_def == nullptr) {
-      LOGS_DEFAULT(INFO) << "CUDA kernel not found in registries for Op type: " << node.OpType() << " node name: " << node.Name();
+      LOGS(logger, INFO) << "CUDA kernel not found in registries for Op type: " << node.OpType() << " node name: " << node.Name();
       continue;
     }
 
@@ -2520,7 +2414,7 @@ CUDAExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph,
       not_supported = RNNNeedFallbackToCPU(node, activations_supported, node.OpType());
       force_inside = !not_supported;
     } else if ("ConvTranspose" == node.OpType()) {
-      not_supported = ConvTransposeNeedFallbackToCPU(node);
+      not_supported = ConvTransposeNeedFallbackToCPU(node, logger);
       force_inside = !not_supported;
     } else if ("Cast" == node.OpType()) {
       not_supported = CastNeedFallbackToCPU(node);
@@ -2529,9 +2423,10 @@ CUDAExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph,
 
     if (!force_inside && not_supported) {
       if (not_supported) {
-        LOGS_DEFAULT(WARNING) << "CUDA kernel not supported. Fallback to CPU execution provider for Op type: " << node.OpType() << " node name: " << node.Name();
+        LOGS(logger, WARNING) << "CUDA kernel not supported. Fallback to CPU execution provider for Op type: " << node.OpType() << " node name: " << node.Name();
       }
     } else {
+      tentative_nodes.push_back(node.Index());
       candidates.push_back(node.Index());
     }
   }
@@ -2539,7 +2434,7 @@ CUDAExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph,
   // For CUDA EP, exclude the subgraph that is preferred to be placed in CPU
   // These are usually shape related computation subgraphs
   // Following logic can be extended for other EPs
-  auto cpu_nodes = GetCpuPreferredNodes(graph, kernel_lookup, candidates);
+  auto cpu_nodes = GetCpuPreferredNodes(graph, kernel_lookup, tentative_nodes);
   std::vector<std::unique_ptr<ComputeCapability>> result;
   for (auto& node_index : candidates) {
     if (cpu_nodes.count(node_index) > 0)
diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider.h b/onnxruntime/core/providers/cuda/cuda_execution_provider.h
index c9e510b7f472b..d0bb2321edf0a 100644
--- a/onnxruntime/core/providers/cuda/cuda_execution_provider.h
+++ b/onnxruntime/core/providers/cuda/cuda_execution_provider.h
@@ -1,4 +1,5 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
 // Licensed under the MIT License.
 
 #pragma once
@@ -32,6 +33,8 @@ class CUDAExecutionProvider : public IExecutionProvider {
 
   Status OnRunEnd(bool sync_stream) override;
 
+  DataLayout GetPreferredLayout() const override;
+
   const void* GetExecutionHandle() const noexcept override {
     // The CUDA interface does not return anything interesting.
     return nullptr;
@@ -49,6 +52,12 @@ class CUDAExecutionProvider : public IExecutionProvider {
     return GetPerThreadContext().CudnnHandle();
   }
 
+  cudaStream_t ComputeStream() {
+    // this will return the CUDA EP level stream which can differ from the actual compute tasks stream
+    // the compute task stream is supplied within OpKernelContext during inference
+    return stream_;
+  }
+
   template <typename T>
   const T* GetConstOnes(size_t count, cudaStream_t stream) {
     return GetPerThreadContext().template GetConstOnes<T>(count, stream);
@@ -68,6 +77,7 @@ class CUDAExecutionProvider : public IExecutionProvider {
   bool GetCudnnConvUseMaxWorkspace() const { return info_.cudnn_conv_use_max_workspace; }
   bool GetCudnnConv1dPadToNc1d() const { return info_.cudnn_conv1d_pad_to_nc1d; }
   bool IsSkipLayerNormInStrictMode() const { return info_.enable_skip_layer_norm_strict_mode; }
+  bool IsNHWCPreferred() const { return info_.prefer_nhwc; }
 
   ProviderOptions GetProviderOptions() const override {
     return CUDAExecutionProviderInfo::ToProviderOptions(info_);
diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider_info.cc b/onnxruntime/core/providers/cuda/cuda_execution_provider_info.cc
index ca88b3474b758..daa3b5ff3d72f 100644
--- a/onnxruntime/core/providers/cuda/cuda_execution_provider_info.cc
+++ b/onnxruntime/core/providers/cuda/cuda_execution_provider_info.cc
@@ -1,4 +1,5 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
 // Licensed under the MIT License.
 
 #include "core/providers/shared_library/provider_api.h"
@@ -29,6 +30,8 @@ constexpr const char* kTunableOpEnable = "tunable_op_enable";
 constexpr const char* kTunableOpTuningEnable = "tunable_op_tuning_enable";
 constexpr const char* kTunableOpMaxTuningDurationMs = "tunable_op_max_tuning_duration_ms";
 constexpr const char* kEnableSkipLayerNormStrictMode = "enable_skip_layer_norm_strict_mode";
+constexpr const char* kPreferNCHWMode = "prefer_nhwc";
+constexpr const char* KUseEPLevelUnifiedStream = "use_ep_level_unified_stream";
 }  // namespace provider_option_names
 }  // namespace cuda
 
@@ -99,6 +102,8 @@ CUDAExecutionProviderInfo CUDAExecutionProviderInfo::FromProviderOptions(const P
           .AddAssignmentToReference(cuda::provider_option_names::kEnableCudaGraph, info.enable_cuda_graph)
           .AddAssignmentToReference(cuda::provider_option_names::kCudnnConv1dPadToNc1d, info.cudnn_conv1d_pad_to_nc1d)
           .AddAssignmentToReference(cuda::provider_option_names::kEnableSkipLayerNormStrictMode, info.enable_skip_layer_norm_strict_mode)
+          .AddAssignmentToReference(cuda::provider_option_names::kPreferNCHWMode, info.prefer_nhwc)
+          .AddAssignmentToReference(cuda::provider_option_names::KUseEPLevelUnifiedStream, info.use_ep_level_unified_stream)
           .AddValueParser(
               cuda::provider_option_names::kTunableOpEnable,
               [&info](const std::string& value_str) -> Status {
@@ -144,6 +149,8 @@ ProviderOptions CUDAExecutionProviderInfo::ToProviderOptions(const CUDAExecution
       {cuda::provider_option_names::kTunableOpTuningEnable, MakeStringWithClassicLocale(info.tunable_op.tuning_enable)},
       {cuda::provider_option_names::kTunableOpMaxTuningDurationMs, MakeStringWithClassicLocale(info.tunable_op.max_tuning_duration_ms)},
       {cuda::provider_option_names::kEnableSkipLayerNormStrictMode, MakeStringWithClassicLocale(info.enable_skip_layer_norm_strict_mode)},
+      {cuda::provider_option_names::kPreferNCHWMode, MakeStringWithClassicLocale(info.prefer_nhwc)},
+      {cuda::provider_option_names::KUseEPLevelUnifiedStream, MakeStringWithClassicLocale(info.use_ep_level_unified_stream)},
   };
 
   return options;
@@ -162,6 +169,8 @@ ProviderOptions CUDAExecutionProviderInfo::ToProviderOptions(const OrtCUDAProvid
       {cuda::provider_option_names::kTunableOpEnable, MakeStringWithClassicLocale(info.tunable_op_enable)},
       {cuda::provider_option_names::kTunableOpTuningEnable, MakeStringWithClassicLocale(info.tunable_op_tuning_enable)},
       {cuda::provider_option_names::kTunableOpMaxTuningDurationMs, MakeStringWithClassicLocale(info.tunable_op_max_tuning_duration_ms)},
+      {cuda::provider_option_names::kPreferNCHWMode, MakeStringWithClassicLocale(info.prefer_nhwc)},
+      {cuda::provider_option_names::KUseEPLevelUnifiedStream, MakeStringWithClassicLocale(info.use_ep_level_unified_stream)},
   };
 
   return options;
diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h b/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h
index 789b02b0e1d8c..b286f5a9161b0 100644
--- a/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h
+++ b/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h
@@ -1,4 +1,5 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
 // Licensed under the MIT License.
 
 #pragma once
@@ -71,6 +72,9 @@ struct CUDAExecutionProviderInfo {
   cuda::TunableOpInfo tunable_op{};
 
   bool enable_skip_layer_norm_strict_mode{false};
+  bool prefer_nhwc{false};
+
+  bool use_ep_level_unified_stream{false};
 
   static CUDAExecutionProviderInfo FromProviderOptions(const ProviderOptions& options);
   static ProviderOptions ToProviderOptions(const CUDAExecutionProviderInfo& info);
diff --git a/onnxruntime/core/providers/cuda/cuda_kernel.h b/onnxruntime/core/providers/cuda/cuda_kernel.h
index 58517c2850baf..e3106e41e77c8 100644
--- a/onnxruntime/core/providers/cuda/cuda_kernel.h
+++ b/onnxruntime/core/providers/cuda/cuda_kernel.h
@@ -170,10 +170,10 @@ class CudaKernel : public OpKernel {
     return provider_->PerThreadDefaultCudnnHandle();
   }
 
- protected:
-  template <typename T>
-  inline const T* GetConstOnes(size_t count, cudaStream_t stream) const {
-    return provider_->template GetConstOnes<T>(count, stream);
+  inline cudaStream_t DefaultCudaStream() const {
+    // this will return the CUDA EP level stream which can differ from the actual compute tasks stream
+    // the compute task stream is supplied within OpKernelContext during inference
+    return provider_->ComputeStream();
   }
 
   inline Status CopyTensor(const Tensor& src, Tensor& dst, onnxruntime::Stream& stream) const {
@@ -181,6 +181,12 @@ class CudaKernel : public OpKernel {
     return gpu_data_transfer->CopyTensorAsync(src, dst, stream);
   }
 
+ protected:
+  template <typename T>
+  inline const T* GetConstOnes(size_t count, cudaStream_t stream) const {
+    return provider_->template GetConstOnes<T>(count, stream);
+  }
+
   inline int GetDeviceId() const { return provider_->GetDeviceId(); }
 
  private:
diff --git a/onnxruntime/core/providers/cuda/cuda_nhwc_kernels.cc b/onnxruntime/core/providers/cuda/cuda_nhwc_kernels.cc
new file mode 100644
index 0000000000000..f416caecd115f
--- /dev/null
+++ b/onnxruntime/core/providers/cuda/cuda_nhwc_kernels.cc
@@ -0,0 +1,169 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
+// Licensed under the MIT License.
+
+#ifdef ENABLE_CUDA_NHWC_OPS
+
+#include <utility>
+
+#include "core/providers/shared_library/provider_api.h"
+#include "core/providers/cuda/cuda_fwd.h"
+
+#include "core/providers/cuda/cuda_nhwc_kernels.h"
+
+namespace onnxruntime::cuda {
+
+// When adding new supported NHWC operations make sure to also integrate them into: ConvertNodeLayout
+// in onnxruntime/core/optimizer/layout_transformation/layout_transformation.cc
+
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 7, 8, float,
+                                                      BatchNormalization);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 7, 8, MLFloat16,
+                                                      BatchNormalization);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 9, 13, float,
+                                                      BatchNormalization);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 9, 13, MLFloat16,
+                                                      BatchNormalization);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 1, 10, float,
+                                                      Conv);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 1, 10, MLFloat16,
+                                                      Conv);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 1, 10, float,
+                                                      ConvTranspose);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 1, 10, MLFloat16,
+                                                      ConvTranspose);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 7, 9, float,
+                                                      AveragePool);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 7, 9, MLFloat16,
+                                                      AveragePool);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 1, float, GlobalAveragePool);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 1, MLFloat16,
+                                            GlobalAveragePool);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 1, 7, float,
+                                                      MaxPool);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 1, 7, MLFloat16,
+                                                      MaxPool);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 8, 9, float,
+                                                      MaxPool);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 8, 9, MLFloat16,
+                                                      MaxPool);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 1, float, GlobalMaxPool);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 1, MLFloat16, GlobalMaxPool);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 10, 10, float,
+                                                      AveragePool);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 10, 10, MLFloat16,
+                                                      AveragePool);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 10, 10, float,
+                                                      MaxPool);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 10, 10, MLFloat16,
+                                                      MaxPool);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 11, float, Conv);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 11, MLFloat16, Conv);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 11, float, ConvTranspose);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 11, MLFloat16,
+                                            ConvTranspose);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 11, float, AveragePool);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 11, MLFloat16, AveragePool);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 11, 11, float,
+                                                      MaxPool);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 11, 11, MLFloat16,
+                                                      MaxPool);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 12, float, MaxPool);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 12, MLFloat16, MaxPool);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 14, 14, float,
+                                                      BatchNormalization);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 14, 14, MLFloat16,
+                                                      BatchNormalization);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 15, float,
+                                            BatchNormalization);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 15, MLFloat16,
+                                            BatchNormalization);
+
+Status RegisterCudaNhwcKernels(KernelRegistry& kernel_registry) {
+  static const BuildKernelCreateInfoFn nhwc_function_table[] = {
+      BuildKernelCreateInfo<void>,  // default entry to avoid the list become empty after ops-reducing
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(
+          kCudaExecutionProvider, kMSInternalNHWCDomain, 7, 8, MLFloat16, BatchNormalization)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(
+          kCudaExecutionProvider, kMSInternalNHWCDomain, 7, 8, float, BatchNormalization)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(
+          kCudaExecutionProvider, kMSInternalNHWCDomain, 9, 13, MLFloat16, BatchNormalization)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(
+          kCudaExecutionProvider, kMSInternalNHWCDomain, 9, 13, float, BatchNormalization)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(
+          kCudaExecutionProvider, kMSInternalNHWCDomain, 14, 14, MLFloat16, BatchNormalization)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(
+          kCudaExecutionProvider, kMSInternalNHWCDomain, 14, 14, float, BatchNormalization)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 15,
+                                                                  MLFloat16, BatchNormalization)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 15,
+                                                                  float, BatchNormalization)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(
+          kCudaExecutionProvider, kMSInternalNHWCDomain, 1, 10, MLFloat16, Conv)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider,
+                                                                            kMSInternalNHWCDomain, 1, 10, float, Conv)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 11,
+                                                                  float, Conv)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 11,
+                                                                  MLFloat16, Conv)>,
+
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(
+          kCudaExecutionProvider, kMSInternalNHWCDomain, 7, 9, float, AveragePool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(
+          kCudaExecutionProvider, kMSInternalNHWCDomain, 7, 9, MLFloat16, AveragePool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 1,
+                                                                  float, GlobalAveragePool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 1,
+                                                                  MLFloat16, GlobalAveragePool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(
+          kCudaExecutionProvider, kMSInternalNHWCDomain, 1, 7, float, MaxPool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(
+          kCudaExecutionProvider, kMSInternalNHWCDomain, 1, 7, MLFloat16, MaxPool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(
+          kCudaExecutionProvider, kMSInternalNHWCDomain, 8, 9, float, MaxPool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(
+          kCudaExecutionProvider, kMSInternalNHWCDomain, 8, 9, MLFloat16, MaxPool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 1,
+                                                                  float, GlobalMaxPool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 1,
+                                                                  MLFloat16, GlobalMaxPool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(
+          kCudaExecutionProvider, kMSInternalNHWCDomain, 10, 10, float, AveragePool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(
+          kCudaExecutionProvider, kMSInternalNHWCDomain, 10, 10, MLFloat16, AveragePool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(
+          kCudaExecutionProvider, kMSInternalNHWCDomain, 10, 10, float, MaxPool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(
+          kCudaExecutionProvider, kMSInternalNHWCDomain, 10, 10, MLFloat16, MaxPool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 11,
+                                                                  float, AveragePool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 11,
+                                                                  MLFloat16, AveragePool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(
+          kCudaExecutionProvider, kMSInternalNHWCDomain, 11, 11, float, MaxPool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(
+          kCudaExecutionProvider, kMSInternalNHWCDomain, 11, 11, MLFloat16, MaxPool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 12,
+                                                                  float, MaxPool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 12,
+                                                                  MLFloat16, MaxPool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 11,
+                                                                  float, ConvTranspose)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSInternalNHWCDomain, 11,
+                                                                  MLFloat16, ConvTranspose)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(
+          kCudaExecutionProvider, kMSInternalNHWCDomain, 1, 10, float, ConvTranspose)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(
+          kCudaExecutionProvider, kMSInternalNHWCDomain, 1, 10, MLFloat16, ConvTranspose)>,
+  };
+
+  for (auto& function_table_entry : nhwc_function_table) {
+    KernelCreateInfo info = function_table_entry();
+    if (info.kernel_def != nullptr) {  // filter disabled entries where type is void
+      ORT_RETURN_IF_ERROR(kernel_registry.Register(std::move(info)));
+    }
+  }
+  return Status::OK();
+}
+}  // namespace onnxruntime::cuda
+#endif
diff --git a/onnxruntime/core/providers/cuda/cuda_nhwc_kernels.h b/onnxruntime/core/providers/cuda/cuda_nhwc_kernels.h
new file mode 100644
index 0000000000000..0b3a6d5cff0c7
--- /dev/null
+++ b/onnxruntime/core/providers/cuda/cuda_nhwc_kernels.h
@@ -0,0 +1,13 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "core/common/status.h"
+
+namespace onnxruntime::cuda {
+
+onnxruntime::common::Status RegisterCudaNhwcKernels(onnxruntime::KernelRegistry& kernel_registry);
+
+}  // namespace onnxruntime::cuda
diff --git a/onnxruntime/core/providers/cuda/cuda_provider_factory.cc b/onnxruntime/core/providers/cuda/cuda_provider_factory.cc
index 5a11f2529f38e..892e8d5329eba 100644
--- a/onnxruntime/core/providers/cuda/cuda_provider_factory.cc
+++ b/onnxruntime/core/providers/cuda/cuda_provider_factory.cc
@@ -1,4 +1,5 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
 // Licensed under the MIT License.
 
 #include "core/providers/shared_library/provider_api.h"
@@ -217,11 +218,13 @@ struct CUDA_Provider : Provider {
     info.default_memory_arena_cfg = params->default_memory_arena_cfg;
     info.cudnn_conv_use_max_workspace = params->cudnn_conv_use_max_workspace != 0;
     info.enable_cuda_graph = params->enable_cuda_graph != 0;
+    info.prefer_nhwc = params->prefer_nhwc;
     info.cudnn_conv1d_pad_to_nc1d = params->cudnn_conv1d_pad_to_nc1d != 0;
     info.tunable_op.enable = params->tunable_op_enable;
     info.tunable_op.tuning_enable = params->tunable_op_tuning_enable;
     info.tunable_op.max_tuning_duration_ms = params->tunable_op_max_tuning_duration_ms;
     info.enable_skip_layer_norm_strict_mode = params->enable_skip_layer_norm_strict_mode != 0;
+    info.use_ep_level_unified_stream = params->use_ep_level_unified_stream != 0;
 
     return std::make_shared<CUDAProviderFactory>(info);
   }
@@ -243,7 +246,7 @@ struct CUDA_Provider : Provider {
     cuda_options.arena_extend_strategy = internal_options.arena_extend_strategy;
     cuda_options.do_copy_in_default_stream = internal_options.do_copy_in_default_stream;
     cuda_options.has_user_compute_stream = internal_options.has_user_compute_stream;
-    // The 'has_user_compute_stream' of the OrtCUDAProviderOptionsV2 instance can be set byC API UpdateCUDAProviderOptionsWithValue() as well.
+    // The 'has_user_compute_stream' of the OrtCUDAProviderOptionsV2 instance can be set by C API UpdateCUDAProviderOptionsWithValue() as well.
     // We only set the 'has_user_compute_stream' of the OrtCUDAProviderOptionsV2 instance if it is provided in options
     if (options.find("has_user_compute_stream") != options.end()) {
       cuda_options.user_compute_stream = internal_options.user_compute_stream;
@@ -253,6 +256,8 @@ struct CUDA_Provider : Provider {
     cuda_options.enable_cuda_graph = internal_options.enable_cuda_graph;
     cuda_options.cudnn_conv1d_pad_to_nc1d = internal_options.cudnn_conv1d_pad_to_nc1d;
     cuda_options.enable_skip_layer_norm_strict_mode = internal_options.enable_skip_layer_norm_strict_mode;
+    cuda_options.prefer_nhwc = internal_options.prefer_nhwc;
+    cuda_options.use_ep_level_unified_stream = internal_options.use_ep_level_unified_stream;
   }
 
   ProviderOptions GetProviderOptions(const void* provider_options) override {
diff --git a/onnxruntime/core/providers/cuda/cuda_stream_handle.cc b/onnxruntime/core/providers/cuda/cuda_stream_handle.cc
index e855a515f445a..5f1dbd30f6a3e 100644
--- a/onnxruntime/core/providers/cuda/cuda_stream_handle.cc
+++ b/onnxruntime/core/providers/cuda/cuda_stream_handle.cc
@@ -7,6 +7,25 @@
 
 namespace onnxruntime {
 
+DeferredCpuAllocator::DeferredCpuAllocator(CudaStream& cuda_stream) : cuda_stream_(cuda_stream) {
+  OrtAllocator::version = ORT_API_VERSION;
+  OrtAllocator::Alloc =
+      [](OrtAllocator* this_, size_t size) {
+        auto self = reinterpret_cast<DeferredCpuAllocator*>(this_);
+        return self->cuda_stream_.GetCpuAllocator()->Alloc(size);
+      };
+  OrtAllocator::Free =
+      [](OrtAllocator* this_, void* p) {
+        auto self = reinterpret_cast<DeferredCpuAllocator*>(this_);
+        self->cuda_stream_.EnqueDeferredCPUBuffer(p);
+      };
+  OrtAllocator::Info =
+      [](const OrtAllocator* this_) {
+        auto self = reinterpret_cast<const DeferredCpuAllocator*>(this_);
+        return &self->cuda_stream_.GetCpuAllocator()->Info();
+      };
+}
+
 struct CudaNotification : public synchronize::Notification {
   CudaNotification(Stream& s) : Notification(s) {
     CUDA_CALL_THROW(cudaEventCreateWithFlags(&event_, cudaEventDisableTiming));
@@ -46,7 +65,8 @@ CudaStream::CudaStream(cudaStream_t stream,
                        cublasHandle_t external_cublas_handle) : Stream(stream, device),
                                                                 own_stream_(own_flag),
                                                                 cpu_allocator_(cpu_allocator),
-                                                                release_cpu_buffer_on_cuda_stream_(release_cpu_buffer_on_cuda_stream) {
+                                                                release_cpu_buffer_on_cuda_stream_(release_cpu_buffer_on_cuda_stream),
+                                                                deferred_cpu_allocator_(*this) {
   if (own_flag) {
     CUBLAS_CALL_THROW(cublasCreate(&cublas_handle_));
     CUBLAS_CALL_THROW(cublasSetStream(cublas_handle_, stream));
@@ -162,6 +182,9 @@ void* CudaStream::GetResource(int version, int id) const {
     case CudaResource::cublas_handle_t:
       return reinterpret_cast<void*>(cublas_handle_);
       break;
+    case CudaResource::deferred_cpu_allocator_t:
+      return const_cast<DeferredCpuAllocator*>(&deferred_cpu_allocator_);
+      break;
     default:
       break;
   }
diff --git a/onnxruntime/core/providers/cuda/cuda_stream_handle.h b/onnxruntime/core/providers/cuda/cuda_stream_handle.h
index 9c62b029b7a36..917702fae08f1 100644
--- a/onnxruntime/core/providers/cuda/cuda_stream_handle.h
+++ b/onnxruntime/core/providers/cuda/cuda_stream_handle.h
@@ -9,6 +9,13 @@
 
 namespace onnxruntime {
 
+struct CudaStream;
+
+struct DeferredCpuAllocator : public OrtAllocator {
+  DeferredCpuAllocator(CudaStream&);
+  CudaStream& cuda_stream_;
+};
+
 struct CudaStream : Stream {
   CudaStream(cudaStream_t stream,
              const OrtDevice& device,
@@ -36,10 +43,13 @@ struct CudaStream : Stream {
 
   void* GetResource(int version, int id) const override;
 
+  onnxruntime::IAllocator* GetCpuAllocator() const { return cpu_allocator_.get(); }
+
  private:
   std::vector<void*> deferred_cpu_buffers_;
   AllocatorPtr cpu_allocator_;
   bool release_cpu_buffer_on_cuda_stream_{true};
+  DeferredCpuAllocator deferred_cpu_allocator_;
 };
 
 void RegisterCudaStreamHandles(IStreamCommandHandleRegistry& stream_handle_registry,
diff --git a/onnxruntime/core/providers/cuda/cudnn_common.cc b/onnxruntime/core/providers/cuda/cudnn_common.cc
index fc02a6509bf24..4df59a98b12e5 100644
--- a/onnxruntime/core/providers/cuda/cudnn_common.cc
+++ b/onnxruntime/core/providers/cuda/cudnn_common.cc
@@ -1,7 +1,10 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
 // Licensed under the MIT License.
 
-#include "cudnn_common.h"
+#include <utility>
+
+#include "core/providers/cuda/cudnn_common.h"
 #include "core/common/inlined_containers.h"
 #include "core/common/gsl.h"
 #include "shared_inc/cuda_call.h"
@@ -27,7 +30,7 @@ Status CudnnTensor::CreateTensorIfNeeded() {
   return Status::OK();
 }
 
-Status CudnnTensor::Set(gsl::span<const int64_t> input_dims, cudnnDataType_t dataType) {
+Status CudnnTensor::Set(gsl::span<const int64_t> input_dims, cudnnDataType_t dataType, bool is_nhwc) {
   ORT_RETURN_IF_ERROR(CreateTensorIfNeeded());
 
   int rank = gsl::narrow_cast<int>(input_dims.size());
@@ -38,6 +41,10 @@ Status CudnnTensor::Set(gsl::span<const int64_t> input_dims, cudnnDataType_t dat
     dims[i] = gsl::narrow_cast<int>(input_dims[i]);
     strides[i] = gsl::narrow_cast<int>(pitches[i]);
   }
+  if (is_nhwc) {
+    std::swap(dims[1], dims[rank - 1]);
+    std::swap(strides[1], strides[rank - 1]);
+  }
   CUDNN_RETURN_IF_ERROR(cudnnSetTensorNdDescriptor(tensor_, dataType, static_cast<int>(rank), dims.data(), strides.data()));
   return Status::OK();
 }
diff --git a/onnxruntime/core/providers/cuda/cudnn_common.h b/onnxruntime/core/providers/cuda/cudnn_common.h
index ba75ab4f2c029..8a94a334ee688 100644
--- a/onnxruntime/core/providers/cuda/cudnn_common.h
+++ b/onnxruntime/core/providers/cuda/cudnn_common.h
@@ -1,4 +1,5 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
 // Licensed under the MIT License.
 
 #pragma once
@@ -16,7 +17,7 @@ class CudnnTensor final {
   ~CudnnTensor();
   ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(CudnnTensor);
 
-  Status Set(gsl::span<const int64_t> input_dims, cudnnDataType_t dataType);
+  Status Set(gsl::span<const int64_t> input_dims, cudnnDataType_t dataType, bool is_nhwc = false);
   Status Set(const CudnnTensor& x_desc, cudnnBatchNormMode_t mode);
   // Set 4D tensor format (for NHWC)
   Status Set(cudnnTensorFormat_t format, cudnnDataType_t dataType, int n, int c, int h, int w);
diff --git a/onnxruntime/core/providers/cuda/math/matmul.cc b/onnxruntime/core/providers/cuda/math/matmul.cc
index 899d506f840a2..e4c37c52a1780 100644
--- a/onnxruntime/core/providers/cuda/math/matmul.cc
+++ b/onnxruntime/core/providers/cuda/math/matmul.cc
@@ -119,6 +119,161 @@ Status MatMul<T>::ComputeInternal(OpKernelContext* ctx) const {
   return ComputeDefault(ctx, helper);
 }
 
+template <typename T>
+Status FuncMatMul(
+    const CudaKernel* cuda_kernel,
+    OpKernelContext* ctx,
+    const Tensor* A,
+    const Tensor* B,
+    float alpha,
+    bool trans_A,
+    bool trans_B,
+    bool trans_batch_A,
+    bool trans_batch_B,
+    Tensor* Y) {
+  typedef typename ToCudaType<T>::MappedType CudaT;
+
+  // Ignore the transpose flag if rank of input being 1.
+  // Be noted: numpy.transpose on vector does not change anything.
+  if (A->Shape().NumDimensions() == 1) {
+    trans_A = false;
+  }
+  if (B->Shape().NumDimensions() == 1) {
+    trans_B = false;
+  }
+
+  const CudaT cuda_alpha = ToCudaType<T>::FromFloat(alpha);
+  const CudaT cuda_zero = ToCudaType<T>::FromFloat(0.0f);
+
+  cublasOperation_t cuda_trans_A = trans_A ? CUBLAS_OP_T : CUBLAS_OP_N;
+  cublasOperation_t cuda_trans_B = trans_B ? CUBLAS_OP_T : CUBLAS_OP_N;
+
+  MatMulComputeHelper helper;
+  ORT_RETURN_IF_ERROR(
+      helper.Compute(A->Shape(), B->Shape(), trans_A, trans_B, trans_batch_A, trans_batch_B, false));
+  const int lda = helper.Lda(trans_A);
+  const int ldb = helper.Ldb(trans_B);
+  const int ldc = helper.Ldc();
+  int64_t stride_A, stride_B, stride_C, batch_count;
+  auto& device_prop = cuda_kernel->GetDeviceProp();
+
+  if (helper.OutputOffsets().size() == 1) {
+    CUBLAS_RETURN_IF_ERROR(cublasGemmHelper(
+        cuda_kernel->GetCublasHandle(ctx),
+        cuda_trans_B,
+        cuda_trans_A,
+        static_cast<int>(helper.N()),
+        static_cast<int>(helper.M()),
+        static_cast<int>(helper.K()),
+        &cuda_alpha,
+        reinterpret_cast<const CudaT*>(B->Data<T>()),
+        ldb,
+        reinterpret_cast<const CudaT*>(A->Data<T>()),
+        lda,
+        &cuda_zero,
+        reinterpret_cast<CudaT*>(Y->MutableData<T>()),
+        ldc,
+        device_prop));
+    return Status::OK();
+  } else if (CanUseStridedBatchedGemm(A->Shape(), B->Shape(),
+                                      trans_A, trans_B, trans_batch_B, trans_batch_B, stride_A, stride_B, stride_C, batch_count)) {
+    CUBLAS_RETURN_IF_ERROR(cublasGemmStridedBatchedHelper(cuda_kernel->GetCublasHandle(ctx),
+                                                          cuda_trans_B,
+                                                          cuda_trans_A,
+                                                          static_cast<int>(helper.N()),
+                                                          static_cast<int>(helper.M()),
+                                                          static_cast<int>(helper.K()),
+                                                          &cuda_alpha,
+                                                          reinterpret_cast<const CudaT*>(B->Data<T>()),
+                                                          ldb,
+                                                          stride_B,
+                                                          reinterpret_cast<const CudaT*>(A->Data<T>()),
+                                                          lda,
+                                                          stride_A,
+                                                          &cuda_zero,
+                                                          reinterpret_cast<CudaT*>(Y->MutableData<T>()),
+                                                          ldc,
+                                                          stride_C,
+                                                          static_cast<int>(batch_count),
+                                                          device_prop));
+
+    return Status::OK();
+  }
+
+  // Fill offsets when needed.
+  helper.FillOffsets();
+  CudaKernel::CudaAsyncBuffer<const CudaT*> A_arrays(cuda_kernel, helper.LeftOffsets().size());
+  CudaKernel::CudaAsyncBuffer<const CudaT*> B_arrays(cuda_kernel, helper.RightOffsets().size());
+  CudaKernel::CudaAsyncBuffer<CudaT*> Y_arrays(cuda_kernel, helper.OutputOffsets().size());
+  MatMulComputeHelper::OffsetToArrays(reinterpret_cast<const CudaT*>(A->Data<T>()), helper.LeftOffsets(), A_arrays.CpuSpan());
+  MatMulComputeHelper::OffsetToArrays(reinterpret_cast<const CudaT*>(B->Data<T>()), helper.RightOffsets(), B_arrays.CpuSpan());
+  MatMulComputeHelper::OffsetToArrays(reinterpret_cast<CudaT*>(Y->MutableData<T>()), helper.OutputOffsets(), Y_arrays.CpuSpan());
+  ORT_RETURN_IF_ERROR(A_arrays.CopyToGpu(ctx->GetComputeStream()));
+  ORT_RETURN_IF_ERROR(B_arrays.CopyToGpu(ctx->GetComputeStream()));
+  ORT_RETURN_IF_ERROR(Y_arrays.CopyToGpu(ctx->GetComputeStream()));
+
+  // TF32 provides a huge performance gain for training and inference while preserving FP32 levels of accuracy.
+  // It requires Ampere or newer GPU, and pointers of matrics shall be aligned (ideal alignment is 16-byte).
+  // Assume that start memory of input/output tensor is aligned, we only check offsets of sub-matrix per batch here.
+  cublasMath_t mode = (std::is_same<T, float>::value && device_prop.major >= 8 && helper.IsBatchedGemmAligned())
+                          ? CUBLAS_TF32_TENSOR_OP_MATH
+                          : CUBLAS_DEFAULT_MATH;
+  CublasMathModeSetter math_mode_setter(device_prop, cuda_kernel->GetCublasHandle(ctx), mode);
+
+  // note that onnxruntime OrtValue is row major, while cublas is column major,
+  // so swap left/right operands
+  CUBLAS_RETURN_IF_ERROR(cublasGemmBatchedHelper(
+      cuda_kernel->GetCublasHandle(ctx),
+      cuda_trans_B,
+      cuda_trans_A,
+      static_cast<int>(helper.N()),
+      static_cast<int>(helper.M()),
+      static_cast<int>(helper.K()),
+      &cuda_alpha,
+      B_arrays.GpuPtr(),
+      ldb,
+      A_arrays.GpuPtr(),
+      lda,
+      &cuda_zero,
+      Y_arrays.GpuPtr(),
+      ldc,
+      static_cast<int>(helper.OutputOffsets().size()),
+      device_prop));
+  return Status::OK();
+}
+
+template Status FuncMatMul<float>(
+    // Use OpKernel and do a pointer cast to unify functional calls with other eps.
+    // TODO: remove CudaKernel and OpKernelContext.
+    const CudaKernel* cuda_kernel,
+    // Do NOT use ctx to access inputs and outputs.
+    // Inputs and outputs are passed in as function arguments.
+    OpKernelContext* ctx,
+    const Tensor* A,
+    const Tensor* B,
+    float alpha,
+    bool trans_A,
+    bool trans_B,
+    bool trans_batch_A,
+    bool trans_batch_B,
+    Tensor* Y);
+
+template Status FuncMatMul<MLFloat16>(
+    // Use OpKernel and do a pointer cast to unify functional calls with other eps.
+    // TODO: remove CudaKernel and OpKernelContext.
+    const CudaKernel* cuda_kernel,
+    // Do NOT use ctx to access inputs and outputs.
+    // Inputs and outputs are passed in as function arguments.
+    OpKernelContext* ctx,
+    const Tensor* A,
+    const Tensor* B,
+    float alpha,
+    bool trans_A,
+    bool trans_B,
+    bool trans_batch_A,
+    bool trans_batch_B,
+    Tensor* Y);
+
 template <typename T>
 Status MatMul<T>::ComputeDefault(OpKernelContext* ctx, MatMulComputeHelper& helper) const {
   typedef typename ToCudaType<T>::MappedType CudaT;
diff --git a/onnxruntime/core/providers/cuda/math/matmul.h b/onnxruntime/core/providers/cuda/math/matmul.h
index 5ea7b30777402..26de1044eeb23 100644
--- a/onnxruntime/core/providers/cuda/math/matmul.h
+++ b/onnxruntime/core/providers/cuda/math/matmul.h
@@ -31,5 +31,23 @@ class MatMul final : public CudaKernel {
   const bool trans_batch_a_;
   const bool trans_batch_b_;
 };
+
+template <typename T>
+Status FuncMatMul(
+    // Use OpKernel and do a pointer cast to unify functional calls with other eps.
+    // TODO: remove CudaKernel and OpKernelContext.
+    const CudaKernel* cuda_kernel,
+    // Do NOT use ctx to access inputs and outputs.
+    // Inputs and outputs are passed in as function arguments.
+    OpKernelContext* ctx,
+    const Tensor* A,
+    const Tensor* B,
+    float alpha,
+    bool trans_A,
+    bool trans_B,
+    bool trans_batch_A,
+    bool trans_batch_B,
+    Tensor* Y);
+
 }  // namespace cuda
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/cuda/math/unary_elementwise_ops.cc b/onnxruntime/core/providers/cuda/math/unary_elementwise_ops.cc
index 9ede1f8d90ecc..655877f425054 100644
--- a/onnxruntime/core/providers/cuda/math/unary_elementwise_ops.cc
+++ b/onnxruntime/core/providers/cuda/math/unary_elementwise_ops.cc
@@ -99,6 +99,7 @@ Status UnaryElementwise::Prepare(OpKernelContext* context, UnaryElementwisePrepa
 // F: float
 // D: double
 // O: bool
+// X: BFloat16
 
 #define UNARY_OP_VERSIONED_HFD(name, startver, endver)        \
   UNARY_OP_VERSIONED_TYPED(name, startver, endver, MLFloat16) \
@@ -124,12 +125,18 @@ Status UnaryElementwise::Prepare(OpKernelContext* context, UnaryElementwisePrepa
   UNARY_OP_TYPED(name, ver, float)     \
   UNARY_OP_TYPED(name, ver, double)
 
+#define UNARY_OP_HFDX(name, ver)       \
+  UNARY_OP_TYPED(name, ver, MLFloat16) \
+  UNARY_OP_TYPED(name, ver, BFloat16)  \
+  UNARY_OP_TYPED(name, ver, float)     \
+  UNARY_OP_TYPED(name, ver, double)
+
 #define UNARY_OP_CSILHFD(name, ver)  \
   UNARY_OP_TYPED(name, ver, int8_t)  \
   UNARY_OP_TYPED(name, ver, int16_t) \
   UNARY_OP_TYPED(name, ver, int32_t) \
   UNARY_OP_TYPED(name, ver, int64_t) \
-  UNARY_OP_HFD(name, ver)
+  UNARY_OP_HFDX(name, ver)
 
 #define UNARY_OP_BWUZCSILHFD(name, ver) \
   UNARY_OP_TYPED(name, ver, uint8_t)    \
diff --git a/onnxruntime/core/providers/cuda/math/unary_elementwise_ops_impl.cu b/onnxruntime/core/providers/cuda/math/unary_elementwise_ops_impl.cu
index 1298d53338337..5c3db4a499972 100644
--- a/onnxruntime/core/providers/cuda/math/unary_elementwise_ops_impl.cu
+++ b/onnxruntime/core/providers/cuda/math/unary_elementwise_ops_impl.cu
@@ -53,13 +53,14 @@ UNARY_OPS()
 // F: float
 // D: double
 // O: bool
+// X: BFloat16
 
 #define SPECIALIZED_UNARY_ELEMENTWISE_IMPL_HFD(name) \
   SPECIALIZED_UNARY_ELEMENTWISE_IMPL(name, half)     \
   SPECIALIZED_UNARY_ELEMENTWISE_IMPL(name, float)    \
   SPECIALIZED_UNARY_ELEMENTWISE_IMPL(name, double)
 
-#define SPECIALIZED_UNARY_ELEMENTWISE_IMPL_HFDB(name) \
+#define SPECIALIZED_UNARY_ELEMENTWISE_IMPL_HFDX(name) \
   SPECIALIZED_UNARY_ELEMENTWISE_IMPL_HFD(name)        \
   SPECIALIZED_UNARY_ELEMENTWISE_IMPL(name, BFloat16)
 
@@ -68,7 +69,7 @@ UNARY_OPS()
   SPECIALIZED_UNARY_ELEMENTWISE_IMPL(name, int16_t)      \
   SPECIALIZED_UNARY_ELEMENTWISE_IMPL(name, int32_t)      \
   SPECIALIZED_UNARY_ELEMENTWISE_IMPL(name, int64_t)      \
-  SPECIALIZED_UNARY_ELEMENTWISE_IMPL_HFD(name)
+  SPECIALIZED_UNARY_ELEMENTWISE_IMPL_HFDX(name)
 
 #define SPECIALIZED_UNARY_ELEMENTWISE_IMPL_BWUZCSILHFD(name) \
   SPECIALIZED_UNARY_ELEMENTWISE_IMPL(name, uint8_t)          \
@@ -83,8 +84,8 @@ SPECIALIZED_UNARY_ELEMENTWISE_IMPL_HFD(Floor)
 SPECIALIZED_UNARY_ELEMENTWISE_IMPL_HFD(Ceil)
 SPECIALIZED_UNARY_ELEMENTWISE_IMPL_HFD(Reciprocal)
 SPECIALIZED_UNARY_ELEMENTWISE_IMPL_HFD(Sqrt)
-SPECIALIZED_UNARY_ELEMENTWISE_IMPL_HFDB(Log)
-SPECIALIZED_UNARY_ELEMENTWISE_IMPL_HFDB(Exp)
+SPECIALIZED_UNARY_ELEMENTWISE_IMPL_HFDX(Log)
+SPECIALIZED_UNARY_ELEMENTWISE_IMPL_HFDX(Exp)
 SPECIALIZED_UNARY_ELEMENTWISE_IMPL_HFD(Erf)
 SPECIALIZED_UNARY_ELEMENTWISE_IMPL_HFD(Round)
 SPECIALIZED_UNARY_ELEMENTWISE_IMPL_HFD(Sin)
diff --git a/onnxruntime/core/providers/cuda/nn/batch_norm.cc b/onnxruntime/core/providers/cuda/nn/batch_norm.cc
index 4f22b5298a30a..c468971e1e426 100644
--- a/onnxruntime/core/providers/cuda/nn/batch_norm.cc
+++ b/onnxruntime/core/providers/cuda/nn/batch_norm.cc
@@ -1,4 +1,5 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
 // Licensed under the MIT License.
 
 #include "batch_norm.h"
@@ -11,38 +12,38 @@ using namespace std;
 namespace onnxruntime {
 namespace cuda {
 
-#define REGISTER_KERNEL_TYPED(T)                                   \
+#define REGISTER_KERNEL_TYPED(T, DOMAIN, NHWC)                     \
   ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(                         \
       BatchNormalization,                                          \
-      kOnnxDomain,                                                 \
+      DOMAIN,                                                      \
       7, 8,                                                        \
       T,                                                           \
       kCudaExecutionProvider,                                      \
       (*KernelDefBuilder::Create())                                \
           .TypeConstraint("T", DataTypeImpl::GetTensorType<T>()),  \
-      BatchNorm<T>);                                               \
+      BatchNorm<T, NHWC>);                                         \
   ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(                         \
       BatchNormalization,                                          \
-      kOnnxDomain,                                                 \
+      DOMAIN,                                                      \
       9, 13,                                                       \
       T,                                                           \
       kCudaExecutionProvider,                                      \
       (*KernelDefBuilder::Create())                                \
           .TypeConstraint("T", DataTypeImpl::GetTensorType<T>()),  \
-      BatchNorm<T>);                                               \
+      BatchNorm<T, NHWC>);                                         \
   ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(                         \
       BatchNormalization,                                          \
-      kOnnxDomain,                                                 \
+      DOMAIN,                                                      \
       14, 14,                                                      \
       T,                                                           \
       kCudaExecutionProvider,                                      \
       (*KernelDefBuilder::Create())                                \
           .TypeConstraint("T", DataTypeImpl::GetTensorType<T>())   \
           .TypeConstraint("U", DataTypeImpl::GetTensorType<T>()),  \
-      BatchNorm<T>);                                               \
+      BatchNorm<T, NHWC>);                                         \
   ONNX_OPERATOR_TYPED_KERNEL_EX(                                   \
       BatchNormalization,                                          \
-      kOnnxDomain,                                                 \
+      DOMAIN,                                                      \
       15,                                                          \
       T,                                                           \
       kCudaExecutionProvider,                                      \
@@ -50,10 +51,10 @@ namespace cuda {
           .TypeConstraint("T", DataTypeImpl::GetTensorType<T>())   \
           .TypeConstraint("T1", DataTypeImpl::GetTensorType<T>())  \
           .TypeConstraint("T2", DataTypeImpl::GetTensorType<T>()), \
-      BatchNorm<T>);
+      BatchNorm<T, NHWC>);
 
-template <typename T>
-Status BatchNorm<T>::ComputeInternal(OpKernelContext* p_op_kernel_context) const {
+template <typename T, bool NHWC>
+Status BatchNorm<T, NHWC>::ComputeInternal(OpKernelContext* p_op_kernel_context) const {
   typedef typename ToCudaType<T>::MappedType CudaT;
 
   const Tensor* X = p_op_kernel_context->Input<Tensor>(0);
@@ -62,7 +63,7 @@ Status BatchNorm<T>::ComputeInternal(OpKernelContext* p_op_kernel_context) const
   const Tensor* mean = p_op_kernel_context->Input<Tensor>(3);
   const Tensor* var = p_op_kernel_context->Input<Tensor>(4);
 
-  ORT_RETURN_IF_ERROR(BatchNormHelper::ValidateInputs(X, scale, B, mean, var, spatial_ == 1));
+  ORT_RETURN_IF_ERROR(BatchNormHelper::ValidateInputs(X, scale, B, mean, var, spatial_ == 1, NHWC));
 
   const TensorShape& x_shape = X->Shape();
   const TensorShape& channel_shape = mean->Shape();
@@ -87,7 +88,7 @@ Status BatchNorm<T>::ComputeInternal(OpKernelContext* p_op_kernel_context) const
   CudnnTensor data_desc;
   vector<int64_t> new_dims;
   BatchNormHelper::NormalizeDims(x_shape, new_dims);
-  ORT_RETURN_IF_ERROR(data_desc.Set(new_dims, CudnnTensor::GetDataType<CudaT>()));
+  ORT_RETURN_IF_ERROR(data_desc.Set(new_dims, CudnnTensor::GetDataType<CudaT>(), NHWC));
 
   // For half data type, the alpha, beta, scale, B, mean, var need to be float type
   if (X->IsDataType<MLFloat16>()) {
@@ -97,7 +98,7 @@ Status BatchNorm<T>::ComputeInternal(OpKernelContext* p_op_kernel_context) const
     ORT_RETURN_IF_ERROR(bn_tensor_desc.Set(data_desc, cudnn_batch_norm_mode_));
 
     // Convert the scale, B, mean, var to float
-    const int64_t C = x_shape.GetDims()[1];
+    const int64_t C = x_shape.GetDims()[NHWC ? 3 : 1];
     auto f_scale = GetScratchBuffer<float>(C, p_op_kernel_context->GetComputeStream());
     auto f_B = GetScratchBuffer<float>(C, p_op_kernel_context->GetComputeStream());
     auto f_mean = GetScratchBuffer<float>(C, p_op_kernel_context->GetComputeStream());
@@ -175,13 +176,17 @@ Status BatchNorm<T>::ComputeInternal(OpKernelContext* p_op_kernel_context) const
   return Status::OK();
 }
 
-#define SPECIALIZED_COMPUTE(T) \
-  REGISTER_KERNEL_TYPED(T)     \
-  template Status BatchNorm<T>::ComputeInternal(OpKernelContext* ctx) const;
+#define SPECIALIZED_COMPUTE(T, DOMAIN, NHWC) \
+  REGISTER_KERNEL_TYPED(T, DOMAIN, NHWC)     \
+  template Status BatchNorm<T, NHWC>::ComputeInternal(OpKernelContext* ctx) const;
 
-SPECIALIZED_COMPUTE(float)
-SPECIALIZED_COMPUTE(double)
-SPECIALIZED_COMPUTE(MLFloat16)
+SPECIALIZED_COMPUTE(float, kOnnxDomain, false)
+SPECIALIZED_COMPUTE(double, kOnnxDomain, false)
+SPECIALIZED_COMPUTE(MLFloat16, kOnnxDomain, false)
 
+#ifdef ENABLE_CUDA_NHWC_OPS
+SPECIALIZED_COMPUTE(float, kMSInternalNHWCDomain, true)
+SPECIALIZED_COMPUTE(MLFloat16, kMSInternalNHWCDomain, true)
+#endif
 }  // namespace cuda
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/cuda/nn/batch_norm.h b/onnxruntime/core/providers/cuda/nn/batch_norm.h
index 99da7652a1d24..4eb9fb74d3761 100644
--- a/onnxruntime/core/providers/cuda/nn/batch_norm.h
+++ b/onnxruntime/core/providers/cuda/nn/batch_norm.h
@@ -1,4 +1,5 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
 // Licensed under the MIT License.
 
 #pragma once
@@ -9,7 +10,7 @@
 namespace onnxruntime {
 namespace cuda {
 
-template <typename T>
+template <typename T, bool NCHW>
 class BatchNorm final : public CudaKernel {
  public:
   BatchNorm(const OpKernelInfo& op_kernel_info)
diff --git a/onnxruntime/core/providers/cuda/nn/conv.cc b/onnxruntime/core/providers/cuda/nn/conv.cc
index 81db3c4186282..82f3503919237 100644
--- a/onnxruntime/core/providers/cuda/nn/conv.cc
+++ b/onnxruntime/core/providers/cuda/nn/conv.cc
@@ -1,38 +1,47 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
 // Licensed under the MIT License.
 
+#include <utility>
+
 #include "core/providers/cuda/nn/conv.h"
 #include "core/common/span_utils.h"
 #include "core/providers/cuda/cuda_common.h"
 #include "core/providers/cuda/shared_inc/fpgeneric.h"
 #include "core/providers/cuda/tensor/slice.h"
+#include "core/providers/cuda/tensor/transpose.h"
 
 namespace onnxruntime {
 namespace cuda {
 
 // Op Set 11 for Conv only update document to clearify default dilations and strides value.
 // which are already convered by op set 11 cpu versoin, so simply add declaration.
-#define REGISTER_KERNEL_TYPED(T)                                                           \
+#define REGISTER_KERNEL_TYPED(T, DOMAIN, NHWC)                                             \
   ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(                                                 \
       Conv,                                                                                \
-      kOnnxDomain,                                                                         \
+      DOMAIN,                                                                              \
       1, 10,                                                                               \
       T,                                                                                   \
       kCudaExecutionProvider,                                                              \
       (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
-      Conv<T, false>);                                                                     \
+      Conv<T, NHWC>);                                                                      \
   ONNX_OPERATOR_TYPED_KERNEL_EX(                                                           \
       Conv,                                                                                \
-      kOnnxDomain,                                                                         \
+      DOMAIN,                                                                              \
       11,                                                                                  \
       T,                                                                                   \
       kCudaExecutionProvider,                                                              \
       (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
-      Conv<T, false>);
+      Conv<T, NHWC>);
+
+REGISTER_KERNEL_TYPED(float, kOnnxDomain, false)
+REGISTER_KERNEL_TYPED(double, kOnnxDomain, false)
+REGISTER_KERNEL_TYPED(MLFloat16, kOnnxDomain, false)
 
-REGISTER_KERNEL_TYPED(float)
-REGISTER_KERNEL_TYPED(double)
-REGISTER_KERNEL_TYPED(MLFloat16)
+#ifdef ENABLE_CUDA_NHWC_OPS
+REGISTER_KERNEL_TYPED(float, kMSInternalNHWCDomain, true)
+REGISTER_KERNEL_TYPED(MLFloat16, kMSInternalNHWCDomain, true)
+#endif
 
 template <typename T, bool NHWC>
 const cudnnConvolutionFwdAlgo_t Conv<T, NHWC>::kAllAlgos[] = {
@@ -86,6 +95,39 @@ Status SliceOutUnwantedOutputSection(cudaStream_t stream,
   return SliceCuda::Impl(stream, input_data, input_dims, output_data, compute_metadata, element_size);
 }
 
+template <typename T, bool NHWC>
+Status Conv<T, NHWC>::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr alloc,
+                              bool& is_packed, [[maybe_unused]] PrePackedWeights* prepacked_weights) {
+  is_packed = false;
+  // only layout of weight input is adjusted via PrePack
+  if (NHWC && is_nhwc_domain_) {  // InputTensors::IN_W
+    if (input_idx == 1) {
+      // Transpose from {M, C/group, kH, kW} to {M, kH, kW, C/group}
+      auto orig_shape = tensor.Shape();
+
+      InlinedVector<size_t> perm{0, 2, 3, 1};
+      gsl::span<size_t> permutation(perm.data(), 4);
+      TensorShapeVector new_dims{orig_shape[0],
+                                 orig_shape[2],
+                                 orig_shape[3],
+                                 orig_shape[1]};
+      W_ = Tensor::Create(tensor.DataType(), TensorShape(new_dims), std::move(alloc));
+
+      auto status = cuda::Transpose::DoTranspose(GetDeviceProp(),
+                                                 DefaultCudaStream(),
+                                                 DefaultCublasHandle(),
+                                                 permutation, tensor, *W_);
+      if (!status.IsOK()) {
+        return status;
+      }
+      CUDA_CALL_THROW(cudaStreamSynchronize(DefaultCudaStream()));
+      is_packed = true;
+    }
+  }
+
+  return Status::OK();
+}
+
 template <typename T, bool NHWC>
 Status Conv<T, NHWC>::UpdateState(OpKernelContext* context, bool bias_expected) const {
   // set X
@@ -95,7 +137,12 @@ Status Conv<T, NHWC>::UpdateState(OpKernelContext* context, bool bias_expected)
   s_.x_data = reinterpret_cast<const CudaT*>(X->Data<T>());
   s_.element_size = X->DataType()->Size();
   // set W
-  const Tensor* W = context->Input<Tensor>(1);
+  const Tensor* W;
+  if (!W_) {
+    W = context->Input<Tensor>(1);
+  } else {
+    W = W_.get();
+  }
   const TensorShape& w_shape = W->Shape();
   auto w_dims = w_shape.AsShapeVector();
   s_.w_data = reinterpret_cast<const CudaT*>(W->Data<T>());
diff --git a/onnxruntime/core/providers/cuda/nn/conv.h b/onnxruntime/core/providers/cuda/nn/conv.h
index 07825b93204ca..bcaa4d855b81e 100644
--- a/onnxruntime/core/providers/cuda/nn/conv.h
+++ b/onnxruntime/core/providers/cuda/nn/conv.h
@@ -1,13 +1,16 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
 // Licensed under the MIT License.
 
 #pragma once
 
+#include <list>
+#include <memory>
+
 #include "core/platform/ort_mutex.h"
 #include "core/providers/cuda/cuda_kernel.h"
 #include "core/providers/cuda/cudnn_common.h"
 #include "core/providers/cpu/nn/conv_attributes.h"
-#include <list>
 
 namespace onnxruntime {
 
@@ -187,8 +190,12 @@ class Conv : public CudaKernel {
   Conv(const OpKernelInfo& info) : CudaKernel(info), conv_attrs_(info) {
     auto pads_size = conv_attrs_.pads.size();
     ORT_ENFORCE(pads_size % 2 == 0);
+    is_nhwc_domain_ = info.node().Domain() == kMSInternalNHWCDomain;
   }
 
+  Status PrePack(const Tensor& tensor, int input_idx, AllocatorPtr alloc,
+                 bool& is_packed, [[maybe_unused]] PrePackedWeights* prepacked_weights) override;
+
   Status ComputeInternal(OpKernelContext* context) const override;
 
  protected:
@@ -201,6 +208,8 @@ class Conv : public CudaKernel {
   mutable CudnnConvState<cudnnConvolutionFwdAlgoPerf_t> s_;
   constexpr static auto kDefaultConvAlgo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
   static const cudnnConvolutionFwdAlgo_t kAllAlgos[];
+  std::unique_ptr<Tensor> W_;
+  bool is_nhwc_domain_;  // prepack is only needed for the Conv in kMSInternalNHWCDomain
 };
 
 Status SliceOutUnwantedOutputSection(cudaStream_t stream,
diff --git a/onnxruntime/core/providers/cuda/nn/conv_transpose.cc b/onnxruntime/core/providers/cuda/nn/conv_transpose.cc
index 2b3326c528659..55dceaa2698e8 100644
--- a/onnxruntime/core/providers/cuda/nn/conv_transpose.cc
+++ b/onnxruntime/core/providers/cuda/nn/conv_transpose.cc
@@ -1,7 +1,11 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
 // Licensed under the MIT License.
 
+#include <utility>
+
 #include "conv_transpose.h"
+#include "core/providers/cuda/tensor/transpose.h"
 
 // To suppress FP static analyzer warnings:
 // https://msdata.visualstudio.com/Vienna/_workitems/edit/1944928 and
@@ -17,35 +21,59 @@ namespace cuda {
 
 // Op Set 11 for ConvTranspose only update document to clarify default dilations and strides value.
 // which are already covered by op set 11 cpu version, so simply add declaration.
-#define REGISTER_KERNEL_TYPED(T)                                                           \
-  ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(                                                 \
-      ConvTranspose,                                                                       \
-      kOnnxDomain,                                                                         \
-      1, 10,                                                                               \
-      T,                                                                                   \
-      kCudaExecutionProvider,                                                              \
-      (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
-      ConvTranspose<T>);                                                                   \
-  ONNX_OPERATOR_TYPED_KERNEL_EX(                                                           \
-      ConvTranspose,                                                                       \
-      kOnnxDomain,                                                                         \
-      11,                                                                                  \
-      T,                                                                                   \
-      kCudaExecutionProvider,                                                              \
-      (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
-      ConvTranspose<T>);
-
-REGISTER_KERNEL_TYPED(float)
-REGISTER_KERNEL_TYPED(double)
-REGISTER_KERNEL_TYPED(MLFloat16)
-
-template <typename T>
-Status ConvTranspose<T>::ComputeInternal(OpKernelContext* context) const {
+#define REGISTER_KERNEL_TYPED(T, DOMAIN, NHWC)                                                                       \
+  ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(                                                                           \
+      ConvTranspose, DOMAIN, 1, 10, T, kCudaExecutionProvider,                                                       \
+      (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), ConvTranspose<T, NHWC>);  \
+  ONNX_OPERATOR_TYPED_KERNEL_EX(ConvTranspose, DOMAIN, 11, T, kCudaExecutionProvider,                                \
+                                (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
+                                ConvTranspose<T, NHWC>);
+
+REGISTER_KERNEL_TYPED(float, kOnnxDomain, false)
+REGISTER_KERNEL_TYPED(double, kOnnxDomain, false)
+REGISTER_KERNEL_TYPED(MLFloat16, kOnnxDomain, false)
+
+#ifdef ENABLE_CUDA_NHWC_OPS
+REGISTER_KERNEL_TYPED(float, kMSInternalNHWCDomain, true)
+REGISTER_KERNEL_TYPED(MLFloat16, kMSInternalNHWCDomain, true)
+#endif
+
+template <typename T, bool NHWC>
+Status ConvTranspose<T, NHWC>::ComputeInternal(OpKernelContext* context) const {
   return DoConvTranspose(context, false);
 }
 
-template <typename T>
-Status ConvTranspose<T>::DoConvTranspose(OpKernelContext* context, bool dynamic_padding) const {
+template <typename T, bool NHWC>
+Status ConvTranspose<T, NHWC>::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr alloc, bool& is_packed,
+                                       [[maybe_unused]] PrePackedWeights* prepacked_weights) {
+  is_packed = false;
+  // only layout of weight input is adjusted via PrePack
+  if (NHWC) {  // InputTensors::IN_W
+    if (input_idx == 1) {
+      // Transpose from {M, C/group, kH, kW} to {M, kH, kW, C/group}
+      auto orig_shape = tensor.Shape();
+
+      InlinedVector<size_t> perm{0, 2, 3, 1};
+      gsl::span<size_t> permutation(perm.data(), 4);
+      TensorShapeVector new_dims{orig_shape[0], orig_shape[2], orig_shape[3], orig_shape[1]};
+      W_ = Tensor::Create(tensor.DataType(), TensorShape(new_dims), std::move(alloc));
+
+      auto status = cuda::Transpose::DoTranspose(GetDeviceProp(), DefaultCudaStream(), DefaultCublasHandle(),
+                                                 permutation, tensor, *W_);
+
+      if (!status.IsOK()) {
+        return status;
+      }
+      CUDA_CALL_THROW(cudaStreamSynchronize(DefaultCudaStream()));
+      is_packed = true;
+    }
+  }
+
+  return Status::OK();
+}
+
+template <typename T, bool NHWC>
+Status ConvTranspose<T, NHWC>::DoConvTranspose(OpKernelContext* context, bool dynamic_padding) const {
   typedef typename ToCudaType<T>::MappedType CudaT;
 
   const Tensor* X = context->Input<Tensor>(0);
@@ -59,7 +87,12 @@ Status ConvTranspose<T>::DoConvTranspose(OpKernelContext* context, bool dynamic_
     return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input X must be 3-, 4- or 5-dimensional.",
                            " X: ", X->Shape().ToString().c_str());
   }
-  const Tensor* W = context->Input<Tensor>(1);
+  const Tensor* W;
+  if (!W_) {
+    W = context->Input<Tensor>(1);
+  } else {
+    W = W_.get();
+  }
   const TensorShape& w_shape = W->Shape();
   TensorShapeVector w_dims = w_shape.AsShapeVector();
   auto w_data = reinterpret_cast<const CudaT*>(W->Data<T>());
@@ -80,8 +113,7 @@ Status ConvTranspose<T>::DoConvTranspose(OpKernelContext* context, bool dynamic_
     bool input_dims_changed = (s_.last_x_dims.AsShapeVector() != x_dims);
     bool w_dims_changed = (s_.last_w_dims.AsShapeVector() != w_dims);
     if (input_dims_changed || w_dims_changed) {
-      if (input_dims_changed)
-        s_.last_x_dims = gsl::make_span(x_dims);
+      if (input_dims_changed) s_.last_x_dims = gsl::make_span(x_dims);
 
       if (w_dims_changed) {
         s_.last_w_dims = gsl::make_span(w_dims);
@@ -89,7 +121,8 @@ Status ConvTranspose<T>::DoConvTranspose(OpKernelContext* context, bool dynamic_
       }
 
       ConvTransposeAttributes::Prepare p;
-      ORT_RETURN_IF_ERROR(conv_transpose_attrs_.PrepareForCompute(context, has_bias, p, dynamic_padding));
+      ORT_RETURN_IF_ERROR(
+          conv_transpose_attrs_.PrepareForCompute(context, has_bias, p, dynamic_padding, &w_shape, NHWC));
 
       auto y_dims = p.Y->Shape().AsShapeVector();
       if (x_dimensions == 3) {
@@ -102,8 +135,15 @@ Status ConvTranspose<T>::DoConvTranspose(OpKernelContext* context, bool dynamic_
       }
       s_.y_dims = gsl::make_span(y_dims);
 
-      if (w_dims_changed)
-        ORT_RETURN_IF_ERROR(s_.w_desc.Set(w_dims, CudnnTensor::GetDataType<CudaT>()));
+      if (w_dims_changed) {
+        if (NHWC) {
+          ORT_RETURN_IF_ERROR(s_.w_desc.Set(CUDNN_TENSOR_NHWC, CudnnTensor::GetDataType<CudaT>(),
+                                            static_cast<int>(w_dims[0]), static_cast<int>(w_dims[3]),
+                                            static_cast<int>(w_dims[1]), static_cast<int>(w_dims[2])));
+        } else {
+          ORT_RETURN_IF_ERROR(s_.w_desc.Set(w_dims, CudnnTensor::GetDataType<CudaT>()));
+        }
+      }
 
       // Special case when there is a dim value of 0 in the shape.
       // Return only after we have cached the following for subsequent runs :
@@ -112,31 +152,39 @@ Status ConvTranspose<T>::DoConvTranspose(OpKernelContext* context, bool dynamic_
       if (p.Y->Shape().Size() == 0) {
         return Status::OK();
       }
-
-      ORT_RETURN_IF_ERROR(s_.x_tensor.Set(x_dims, CudnnTensor::GetDataType<CudaT>()));
-      ORT_RETURN_IF_ERROR(s_.y_tensor.Set(y_dims, CudnnTensor::GetDataType<CudaT>()));
+      if (NHWC) {
+        ORT_RETURN_IF_ERROR(s_.x_tensor.Set(CUDNN_TENSOR_NHWC, CudnnTensor::GetDataType<CudaT>(),
+                                            static_cast<int>(x_dims[0]), static_cast<int>(x_dims[3]),
+                                            static_cast<int>(x_dims[1]), static_cast<int>(x_dims[2])));
+        ORT_RETURN_IF_ERROR(s_.y_tensor.Set(CUDNN_TENSOR_NHWC, CudnnTensor::GetDataType<CudaT>(),
+                                            static_cast<int>(y_dims[0]), static_cast<int>(y_dims[3]),
+                                            static_cast<int>(y_dims[1]), static_cast<int>(y_dims[2])));
+      } else {
+        ORT_RETURN_IF_ERROR(s_.x_tensor.Set(x_dims, CudnnTensor::GetDataType<CudaT>()));
+        ORT_RETURN_IF_ERROR(s_.y_tensor.Set(y_dims, CudnnTensor::GetDataType<CudaT>()));
+      }
 
       cudnnConvolutionMode_t mode = CUDNN_CROSS_CORRELATION;
       ORT_RETURN_IF_ERROR(s_.conv_desc.Set(p.kernel_shape.size(), p.pads, p.strides, p.dilations,
-                                           gsl::narrow_cast<int>(conv_transpose_attrs_.group),
-                                           mode, CudnnTensor::GetDataType<CudaT>()));
+                                           gsl::narrow_cast<int>(conv_transpose_attrs_.group), mode,
+                                           CudnnTensor::GetDataType<CudaT>()));
 
       if (has_bias) {
         const auto& b_shape = p.B->Shape();
         ORT_RETURN_IF_NOT(b_shape.NumDimensions() == 1, "bias should be 1D");
         TensorShapeVector b_dims(2 + p.kernel_shape.size());
-        b_dims[0] = 1;           // N
-        b_dims[1] = b_shape[0];  // C
-        for (size_t i = 0; i < p.kernel_shape.size(); i++)
-          b_dims[2 + i] = 1;
+        b_dims[0] = 1;                      // N
+        b_dims[NHWC ? 3 : 1] = b_shape[0];  // C
+        for (size_t i = 0; i < p.kernel_shape.size(); i++) b_dims[(NHWC ? 1 : 2) + i] = 1;
 
-        ORT_RETURN_IF_ERROR(s_.b_tensor.Set(b_dims, CudnnTensor::GetDataType<CudaT>()));
+        ORT_RETURN_IF_ERROR(s_.b_tensor.Set(b_dims, CudnnTensor::GetDataType<CudaT>(), NHWC));
       }
 
       y_data = reinterpret_cast<CudaT*>(p.Y->MutableData<T>());
 
       if (!s_.cached_benchmark_results.contains(x_dims)) {
-        IAllocatorUniquePtr<void> algo_search_workspace = GetScratchBuffer<void>(AlgoSearchWorkspaceSize, context->GetComputeStream());
+        IAllocatorUniquePtr<void> algo_search_workspace =
+            GetScratchBuffer<void>(AlgoSearchWorkspaceSize, context->GetComputeStream());
 
         // set math type to tensor core before algorithm search
         if constexpr (std::is_same<T, MLFloat16>::value)
@@ -145,19 +193,8 @@ Status ConvTranspose<T>::DoConvTranspose(OpKernelContext* context, bool dynamic_
         cudnnConvolutionBwdDataAlgoPerf_t perf;
         int algo_count = 1;
         CUDNN_RETURN_IF_ERROR(cudnnFindConvolutionBackwardDataAlgorithmEx(
-            GetCudnnHandle(context),
-            s_.w_desc,
-            w_data,
-            s_.x_tensor,
-            x_data,
-            s_.conv_desc,
-            s_.y_tensor,
-            y_data,
-            1,
-            &algo_count,
-            &perf,
-            algo_search_workspace.get(),
-            AlgoSearchWorkspaceSize));
+            GetCudnnHandle(context), s_.w_desc, w_data, s_.x_tensor, x_data, s_.conv_desc, s_.y_tensor, y_data, 1,
+            &algo_count, &perf, algo_search_workspace.get(), AlgoSearchWorkspaceSize));
         s_.cached_benchmark_results.insert(x_dims, {perf.algo, perf.memory, perf.mathType});
       }
 
@@ -188,26 +225,15 @@ Status ConvTranspose<T>::DoConvTranspose(OpKernelContext* context, bool dynamic_
 
     IAllocatorUniquePtr<void> workspace = GetScratchBuffer<void>(s_.workspace_bytes, context->GetComputeStream());
 
-    CUDNN_RETURN_IF_ERROR(
-        cudnnConvolutionBackwardData(
-            GetCudnnHandle(context),
-            &alpha,
-            s_.w_desc,
-            w_data,
-            s_.x_tensor,
-            x_data,
-            s_.conv_desc,
-            s_.algo,
-            workspace.get(),
-            s_.workspace_bytes,
-            &beta,
-            s_.y_tensor,
-            y_data));
+    CUDNN_RETURN_IF_ERROR(cudnnConvolutionBackwardData(GetCudnnHandle(context), &alpha, s_.w_desc, w_data, s_.x_tensor,
+                                                       x_data, s_.conv_desc, s_.algo, workspace.get(),
+                                                       s_.workspace_bytes, &beta, s_.y_tensor, y_data));
 
     if (has_bias) {
       const Tensor* B = dynamic_padding ? context->Input<Tensor>(3) : context->Input<Tensor>(2);
       auto b_data = reinterpret_cast<const CudaT*>(B->Data<T>());
-      CUDNN_RETURN_IF_ERROR(cudnnAddTensor(GetCudnnHandle(context), &alpha, s_.b_tensor, b_data, &alpha, s_.y_tensor, y_data));
+      CUDNN_RETURN_IF_ERROR(
+          cudnnAddTensor(GetCudnnHandle(context), &alpha, s_.b_tensor, b_data, &alpha, s_.y_tensor, y_data));
     }
   }
 
diff --git a/onnxruntime/core/providers/cuda/nn/conv_transpose.h b/onnxruntime/core/providers/cuda/nn/conv_transpose.h
index 165d548d27fa2..77c9d94162b6b 100644
--- a/onnxruntime/core/providers/cuda/nn/conv_transpose.h
+++ b/onnxruntime/core/providers/cuda/nn/conv_transpose.h
@@ -1,8 +1,11 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
 // Licensed under the MIT License.
 
 #pragma once
 
+#include <memory>
+
 #include "core/providers/cuda/cuda_common.h"
 #include "core/providers/cuda/cuda_kernel.h"
 #include "core/providers/cuda/cudnn_common.h"
@@ -12,10 +15,12 @@
 namespace onnxruntime {
 namespace cuda {
 
-template <typename T>
+template <typename T, bool NHWC>
 class ConvTranspose : public CudaKernel {
  public:
   ConvTranspose(const OpKernelInfo& info) : CudaKernel(info), conv_transpose_attrs_(info){};
+  Status PrePack(const Tensor& tensor, int input_idx, AllocatorPtr alloc,
+                 bool& is_packed, [[maybe_unused]] PrePackedWeights* prepacked_weights) override;
   Status ComputeInternal(OpKernelContext* context) const override;
   Status DoConvTranspose(OpKernelContext* context, bool dynamic_padding) const;
 
@@ -23,6 +28,7 @@ class ConvTranspose : public CudaKernel {
   ConvTransposeAttributes conv_transpose_attrs_;
 
   mutable CudnnConvState<cudnnConvolutionBwdDataAlgoPerf_t> s_;
+  std::unique_ptr<Tensor> W_;
 };
 
 }  // namespace cuda
diff --git a/onnxruntime/core/providers/cuda/nn/layer_norm_impl.cu b/onnxruntime/core/providers/cuda/nn/layer_norm_impl.cu
index 4cc560a1178ef..679b8b6b78886 100644
--- a/onnxruntime/core/providers/cuda/nn/layer_norm_impl.cu
+++ b/onnxruntime/core/providers/cuda/nn/layer_norm_impl.cu
@@ -104,17 +104,17 @@ __device__ void cuWelfordMuSigma2(
     const int numx = blockDim.x * blockDim.y;
     const int thrx = threadIdx.x + threadIdx.y * blockDim.x;
     const T* lvals = vals + i1 * n2;
-    const T* skip_vals = (skip != NULL) ? skip + i1 * n2 : NULL;
+    const T* skip_vals = (skip != nullptr) ? skip + i1 * n2 : nullptr;
     int l = 4 * thrx;
     for (; l + 3 < n2; l += 4 * numx) {
       for (int k = 0; k < 4; ++k) {
         U curr = static_cast<U>(lvals[l + k]);
 
-        if (bias != NULL) {
+        if (bias != nullptr) {
           curr += static_cast<U>(bias[l + k]);
         }
 
-        if (skip_vals != NULL) {
+        if (skip_vals != nullptr) {
           curr += static_cast<U>(skip_vals[l + k]);
         }
 
@@ -124,11 +124,11 @@ __device__ void cuWelfordMuSigma2(
     for (; l < n2; ++l) {
       U curr = static_cast<U>(lvals[l]);
 
-      if (bias != NULL) {
+      if (bias != nullptr) {
         curr += static_cast<U>(bias[l]);
       }
 
-      if (skip_vals != NULL) {
+      if (skip_vals != nullptr) {
         curr += static_cast<U>(skip_vals[l]);
       }
 
@@ -301,7 +301,7 @@ namespace {
 //      {
 //          extern __device__ void error(void);
 //          error();
-//          return NULL;
+//          return nullptr;
 //      }
 //  };
 // https://github.com/NVIDIA/apex/issues/246
@@ -338,9 +338,7 @@ __global__ void cuApplyLayerNorm(
     const V* __restrict__ beta,
     const T* __restrict__ skip,
     const T* __restrict__ bias,
-    T* __restrict__ skip_input_bias_add_output,
-    const bool skip_broadcasted,
-    const int skip_size) {
+    T* __restrict__ skip_input_bias_add_output) {
   // Assumptions:
   // 1) blockDim.x == GPU_WARP_SIZE
   // 2) Tensors are contiguous
@@ -350,38 +348,35 @@ __global__ void cuApplyLayerNorm(
     U* buf = shared.getPointer();
     U mu, sigma2;
     cuWelfordMuSigma2<T, U, simplified>(vals, n1, n2, i1, mu, sigma2, buf, skip, bias);
-    const T* lvals = vals + i1 * n2;
-    const T* skip_vals = (skip != NULL) ? skip + i1 * n2 : NULL;
-    V* ovals = output_vals + i1 * n2;
-    T* skip_input_bias_add_ovals = (skip_input_bias_add_output != NULL) ? skip_input_bias_add_output + i1 * n2 : NULL;
+    const int offset = i1 * n2;
+    const T* lvals = vals + offset;
+    const T* skip_vals = (skip != nullptr) ? skip + offset : nullptr;
+
+    V* ovals = output_vals + offset;
+    T* skip_input_bias_add_ovals = (skip_input_bias_add_output != nullptr) ? skip_input_bias_add_output + offset : nullptr;
     U c_inv_std_dev = rsqrt(sigma2 + epsilon);
     const int numx = blockDim.x * blockDim.y;
     const int thrx = threadIdx.x + threadIdx.y * blockDim.x;
     for (int i = thrx; i < n2; i += numx) {
       U curr = static_cast<U>(lvals[i]);
 
-
-
-      if (bias != NULL) {
+      if (bias != nullptr) {
         curr += static_cast<U>(bias[i]);
       }
 
-      if (skip_vals != NULL && skip_broadcasted) {
-        int skip_i = i % skip_size;
-        curr += static_cast<U>(skip_vals[skip_i]);  //Calculates index for the second dimension of the skip tensor
-      }else if (skip_vals != NULL){
+      if (skip_vals != nullptr) {
         curr += static_cast<U>(skip_vals[i]);
       }
 
-      U gamma_i = (gamma != NULL) ? (U)gamma[i] : (U)1;
-      U beta_i = (beta != NULL) ? (U)beta[i] : (U)0;
+      U gamma_i = (gamma != nullptr) ? (U)gamma[i] : (U)1;
+      U beta_i = (beta != nullptr) ? (U)beta[i] : (U)0;
       if (simplified) {
         ovals[i] = static_cast<V>(gamma_i * c_inv_std_dev * curr);
       } else {
         ovals[i] = static_cast<V>(gamma_i * c_inv_std_dev * (curr - mu) + beta_i);
       }
 
-      if (skip_input_bias_add_ovals != NULL) {
+      if (skip_input_bias_add_ovals != nullptr) {
         skip_input_bias_add_ovals[i] = static_cast<T>(curr);
       }
     }
@@ -418,9 +413,7 @@ void HostApplyLayerNorm(
     const V* beta,
     const T* skip,
     const T* bias,
-    T* skip_input_bias_add_output,
-    const bool skip_broadcasted,
-    const int skip_size) {
+    T* skip_input_bias_add_output) {
   const int maxGridY = prop.maxGridSize[1];
   const int warp_size = prop.warpSize;
   ORT_ENFORCE(warp_size == GPU_WARP_SIZE_HOST);
@@ -452,17 +445,14 @@ void HostApplyLayerNorm(
       n1, n2,
       U(epsilon),
       gamma, beta,
-      skip, bias, skip_input_bias_add_output,
-      skip_broadcasted,
-      skip_size);
+      skip, bias, skip_input_bias_add_output);
 }
 
 #define LAYERNORM_LINEAR_IMPL(T, U, V, simplified)                                                                    \
   template void HostApplyLayerNorm<T, U, V, simplified>(const cudaDeviceProp& prop, cudaStream_t stream, V* output,   \
                                                         U* mean, U* inv_std_dev, const T* input, int n1, int n2,      \
                                                         double epsilon, const V* gamma, const V* beta, const T* skip, \
-                                                        const T* bias, T* skip_input_bias_add_output, const bool skip_broadcasted, \
-                                                        const int skip_size);
+                                                        const T* bias, T* skip_input_bias_add_output);
 
 LAYERNORM_LINEAR_IMPL(float, float, float, true)
 LAYERNORM_LINEAR_IMPL(half, float, half, true)
diff --git a/onnxruntime/core/providers/cuda/nn/layer_norm_impl.h b/onnxruntime/core/providers/cuda/nn/layer_norm_impl.h
index d0d5db8ba3587..e3952eefae35d 100644
--- a/onnxruntime/core/providers/cuda/nn/layer_norm_impl.h
+++ b/onnxruntime/core/providers/cuda/nn/layer_norm_impl.h
@@ -43,9 +43,7 @@ void HostApplyLayerNorm(
     const V* beta,
     const T* skip = nullptr,
     const T* bias = nullptr,
-    T* skip_input_bias_add_output = nullptr,
-    const bool skip_broadcasted = false,
-    const int skip_size = 0);
+    T* skip_input_bias_add_output = nullptr);
 
 }  // namespace cuda
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/cuda/nn/pool.cc b/onnxruntime/core/providers/cuda/nn/pool.cc
index e632ef20bce43..8bc96958693bc 100644
--- a/onnxruntime/core/providers/cuda/nn/pool.cc
+++ b/onnxruntime/core/providers/cuda/nn/pool.cc
@@ -1,4 +1,5 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
 // Licensed under the MIT License.
 
 #include "core/providers/shared_library/provider_api.h"
@@ -11,92 +12,99 @@ using namespace onnxruntime::common;
 namespace onnxruntime {
 namespace cuda {
 
-#define POOLING_KERNEL(op_name, data_type, pool_type, since_version)                               \
+#define POOLING_KERNEL(op_name, data_type, pool_type, since_version, op_domain, nhwc)              \
   ONNX_OPERATOR_TYPED_KERNEL_EX(                                                                   \
-      op_name,                                                                                     \
-      kOnnxDomain,                                                                                 \
-      since_version,                                                                               \
-      data_type,                                                                                   \
-      kCudaExecutionProvider,                                                                      \
+      op_name, op_domain, since_version, data_type, kCudaExecutionProvider,                        \
       (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<data_type>()), \
-      Pool<data_type, pool_type>);
-
-#define POOLING_KERNEL_VERSIONED(op_name, data_type, pool_type, since_version, end_version) \
-  ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(                                                  \
-      op_name,                                                                              \
-      kOnnxDomain,                                                                          \
-      since_version,                                                                        \
-      end_version,                                                                          \
-      data_type,                                                                            \
-      kCudaExecutionProvider,                                                               \
-      (*KernelDefBuilder::Create())                                                         \
-          .TypeConstraint("T", DataTypeImpl::GetTensorType<data_type>()),                   \
-      Pool<data_type, pool_type>);
-
-#define POOLING_KERNEL_WITH_INDICES(op_name, data_type, pool_type, since_version) \
-  ONNX_OPERATOR_TYPED_KERNEL_EX(                                                  \
-      op_name,                                                                    \
-      kOnnxDomain,                                                                \
-      since_version,                                                              \
-      data_type,                                                                  \
-      kCudaExecutionProvider,                                                     \
-      (*KernelDefBuilder::Create())                                               \
-          .TypeConstraint("T", DataTypeImpl::GetTensorType<data_type>())          \
-          .TypeConstraint("I", DataTypeImpl::GetTensorType<int64_t>()),           \
-      Pool<data_type, pool_type>);
-
-#define POOLING_KERNEL_VERSIONED_WITH_INDICES(op_name, data_type, pool_type, since_version, end_version) \
-  ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(                                                               \
-      op_name,                                                                                           \
-      kOnnxDomain,                                                                                       \
-      since_version,                                                                                     \
-      end_version,                                                                                       \
-      data_type,                                                                                         \
-      kCudaExecutionProvider,                                                                            \
-      (*KernelDefBuilder::Create())                                                                      \
-          .TypeConstraint("T", DataTypeImpl::GetTensorType<data_type>())                                 \
-          .TypeConstraint("I", DataTypeImpl::GetTensorType<int64_t>()),                                  \
-      Pool<data_type, pool_type>);
-
-POOLING_KERNEL_VERSIONED(AveragePool, float, AveragePool, 7, 9)
-POOLING_KERNEL_VERSIONED(AveragePool, double, AveragePool, 7, 9)
-POOLING_KERNEL_VERSIONED(AveragePool, MLFloat16, AveragePool, 7, 9)
-POOLING_KERNEL_VERSIONED(AveragePool, float, AveragePool, 10, 10)
-POOLING_KERNEL_VERSIONED(AveragePool, double, AveragePool, 10, 10)
-POOLING_KERNEL_VERSIONED(AveragePool, MLFloat16, AveragePool, 10, 10)
+      Pool<data_type, pool_type, nhwc>);
+
+#define POOLING_KERNEL_VERSIONED(op_name, data_type, pool_type, since_version, end_version, op_domain, nhwc) \
+  ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(                                                                   \
+      op_name, op_domain, since_version, end_version, data_type, kCudaExecutionProvider,                     \
+      (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<data_type>()),           \
+      Pool<data_type, pool_type, nhwc>);
+
+#define POOLING_KERNEL_WITH_INDICES(op_name, data_type, pool_type, since_version, op_domain, nhwc)    \
+  ONNX_OPERATOR_TYPED_KERNEL_EX(op_name, op_domain, since_version, data_type, kCudaExecutionProvider, \
+                                (*KernelDefBuilder::Create())                                         \
+                                    .TypeConstraint("T", DataTypeImpl::GetTensorType<data_type>())    \
+                                    .TypeConstraint("I", DataTypeImpl::GetTensorType<int64_t>()),     \
+                                Pool<data_type, pool_type, nhwc>);
+
+#define POOLING_KERNEL_VERSIONED_WITH_INDICES(op_name, data_type, pool_type, since_version, end_version, op_domain, \
+                                              nhwc)                                                                 \
+  ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(op_name, op_domain, since_version, end_version, data_type,                \
+                                          kCudaExecutionProvider,                                                   \
+                                          (*KernelDefBuilder::Create())                                             \
+                                              .TypeConstraint("T", DataTypeImpl::GetTensorType<data_type>())        \
+                                              .TypeConstraint("I", DataTypeImpl::GetTensorType<int64_t>()),         \
+                                          Pool<data_type, pool_type, nhwc>);
+
+POOLING_KERNEL_VERSIONED(AveragePool, float, AveragePool, 7, 9, kOnnxDomain, false)
+POOLING_KERNEL_VERSIONED(AveragePool, double, AveragePool, 7, 9, kOnnxDomain, false)
+POOLING_KERNEL_VERSIONED(AveragePool, MLFloat16, AveragePool, 7, 9, kOnnxDomain, false)
+POOLING_KERNEL_VERSIONED(AveragePool, float, AveragePool, 10, 10, kOnnxDomain, false)
+POOLING_KERNEL_VERSIONED(AveragePool, double, AveragePool, 10, 10, kOnnxDomain, false)
+POOLING_KERNEL_VERSIONED(AveragePool, MLFloat16, AveragePool, 10, 10, kOnnxDomain, false)
 // AveragePool and MaxPool op set 11 only update spec document on default value for dilations and strides.
-POOLING_KERNEL(AveragePool, float, AveragePool, 11)
-POOLING_KERNEL(AveragePool, double, AveragePool, 11)
-POOLING_KERNEL(AveragePool, MLFloat16, AveragePool, 11)
-POOLING_KERNEL(GlobalAveragePool, float, AveragePool, 1)
-POOLING_KERNEL(GlobalAveragePool, double, AveragePool, 1)
-POOLING_KERNEL(GlobalAveragePool, MLFloat16, AveragePool, 1)
-POOLING_KERNEL_VERSIONED(MaxPool, float, MaxPool<1>, 1, 7)
-POOLING_KERNEL_VERSIONED(MaxPool, double, MaxPool<1>, 1, 7)
-POOLING_KERNEL_VERSIONED(MaxPool, MLFloat16, MaxPool<1>, 1, 7)
-POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, float, MaxPool<8>, 8, 9)
-POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, double, MaxPool<8>, 8, 9)
-POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, MLFloat16, MaxPool<8>, 8, 9)
-POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, float, MaxPool<8>, 10, 10)
-POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, double, MaxPool<8>, 10, 10)
-POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, MLFloat16, MaxPool<8>, 10, 10)
-POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, float, MaxPool<8>, 11, 11)
-POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, double, MaxPool<8>, 11, 11)
-POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, MLFloat16, MaxPool<8>, 11, 11)
-POOLING_KERNEL_WITH_INDICES(MaxPool, float, MaxPool<8>, 12)
-POOLING_KERNEL_WITH_INDICES(MaxPool, double, MaxPool<8>, 12)
-POOLING_KERNEL_WITH_INDICES(MaxPool, MLFloat16, MaxPool<8>, 12)
-POOLING_KERNEL_WITH_INDICES(MaxPool, int8_t, MaxPool<8>, 12)
-POOLING_KERNEL_WITH_INDICES(MaxPool, uint8_t, MaxPool<8>, 12)
-
-POOLING_KERNEL(GlobalMaxPool, float, MaxPool<1>, 1)
-POOLING_KERNEL(GlobalMaxPool, double, MaxPool<1>, 1)
-POOLING_KERNEL(GlobalMaxPool, MLFloat16, MaxPool<1>, 1)
+POOLING_KERNEL(AveragePool, float, AveragePool, 11, kOnnxDomain, false)
+POOLING_KERNEL(AveragePool, double, AveragePool, 11, kOnnxDomain, false)
+POOLING_KERNEL(AveragePool, MLFloat16, AveragePool, 11, kOnnxDomain, false)
+POOLING_KERNEL(GlobalAveragePool, float, AveragePool, 1, kOnnxDomain, false)
+POOLING_KERNEL(GlobalAveragePool, double, AveragePool, 1, kOnnxDomain, false)
+POOLING_KERNEL(GlobalAveragePool, MLFloat16, AveragePool, 1, kOnnxDomain, false)
+POOLING_KERNEL_VERSIONED(MaxPool, float, MaxPool<1>, 1, 7, kOnnxDomain, false)
+POOLING_KERNEL_VERSIONED(MaxPool, double, MaxPool<1>, 1, 7, kOnnxDomain, false)
+POOLING_KERNEL_VERSIONED(MaxPool, MLFloat16, MaxPool<1>, 1, 7, kOnnxDomain, false)
+POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, float, MaxPool<8>, 8, 9, kOnnxDomain, false)
+POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, double, MaxPool<8>, 8, 9, kOnnxDomain, false)
+POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, MLFloat16, MaxPool<8>, 8, 9, kOnnxDomain, false)
+POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, float, MaxPool<8>, 10, 10, kOnnxDomain, false)
+POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, double, MaxPool<8>, 10, 10, kOnnxDomain, false)
+POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, MLFloat16, MaxPool<8>, 10, 10, kOnnxDomain, false)
+POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, float, MaxPool<8>, 11, 11, kOnnxDomain, false)
+POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, double, MaxPool<8>, 11, 11, kOnnxDomain, false)
+POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, MLFloat16, MaxPool<8>, 11, 11, kOnnxDomain, false)
+POOLING_KERNEL_WITH_INDICES(MaxPool, float, MaxPool<8>, 12, kOnnxDomain, false)
+POOLING_KERNEL_WITH_INDICES(MaxPool, double, MaxPool<8>, 12, kOnnxDomain, false)
+POOLING_KERNEL_WITH_INDICES(MaxPool, MLFloat16, MaxPool<8>, 12, kOnnxDomain, false)
+POOLING_KERNEL_WITH_INDICES(MaxPool, int8_t, MaxPool<8>, 12, kOnnxDomain, false)
+POOLING_KERNEL_WITH_INDICES(MaxPool, uint8_t, MaxPool<8>, 12, kOnnxDomain, false)
+
+POOLING_KERNEL(GlobalMaxPool, float, MaxPool<1>, 1, kOnnxDomain, false)
+POOLING_KERNEL(GlobalMaxPool, double, MaxPool<1>, 1, kOnnxDomain, false)
+POOLING_KERNEL(GlobalMaxPool, MLFloat16, MaxPool<1>, 1, kOnnxDomain, false)
+
+// NHWC variants
+#ifdef ENABLE_CUDA_NHWC_OPS
+POOLING_KERNEL_VERSIONED(MaxPool, float, MaxPool<1>, 1, 7, kMSInternalNHWCDomain, true)
+POOLING_KERNEL_VERSIONED(MaxPool, MLFloat16, MaxPool<1>, 1, 7, kMSInternalNHWCDomain, true)
+POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, float, MaxPool<8>, 8, 9, kMSInternalNHWCDomain, true)
+POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, MLFloat16, MaxPool<8>, 8, 9, kMSInternalNHWCDomain, true)
+POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, float, MaxPool<8>, 10, 10, kMSInternalNHWCDomain, true)
+POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, MLFloat16, MaxPool<8>, 10, 10, kMSInternalNHWCDomain, true)
+POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, float, MaxPool<8>, 11, 11, kMSInternalNHWCDomain, true)
+POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, MLFloat16, MaxPool<8>, 11, 11, kMSInternalNHWCDomain, true)
+POOLING_KERNEL_WITH_INDICES(MaxPool, float, MaxPool<8>, 12, kMSInternalNHWCDomain, true)
+POOLING_KERNEL_WITH_INDICES(MaxPool, MLFloat16, MaxPool<8>, 12, kMSInternalNHWCDomain, true)
+
+POOLING_KERNEL(GlobalMaxPool, float, MaxPool<1>, 1, kMSInternalNHWCDomain, true)
+POOLING_KERNEL(GlobalMaxPool, MLFloat16, MaxPool<1>, 1, kMSInternalNHWCDomain, true)
+
+POOLING_KERNEL_VERSIONED(AveragePool, float, AveragePool, 7, 9, kMSInternalNHWCDomain, true)
+POOLING_KERNEL_VERSIONED(AveragePool, MLFloat16, AveragePool, 7, 9, kMSInternalNHWCDomain, true)
+POOLING_KERNEL_VERSIONED(AveragePool, float, AveragePool, 10, 10, kMSInternalNHWCDomain, true)
+POOLING_KERNEL_VERSIONED(AveragePool, MLFloat16, AveragePool, 10, 10, kMSInternalNHWCDomain, true)
+// AveragePool and MaxPool op set 11 only update spec document on default value for dilations
+POOLING_KERNEL(AveragePool, float, AveragePool, 11, kMSInternalNHWCDomain, true)
+POOLING_KERNEL(AveragePool, MLFloat16, AveragePool, 11, kMSInternalNHWCDomain, true)
+POOLING_KERNEL(GlobalAveragePool, float, AveragePool, 1, kMSInternalNHWCDomain, true)
+POOLING_KERNEL(GlobalAveragePool, MLFloat16, AveragePool, 1, kMSInternalNHWCDomain, true)
+#endif
 
 class CudnnPoolingDescriptor final {
  public:
-  CudnnPoolingDescriptor() : desc_(nullptr) {
-  }
+  CudnnPoolingDescriptor() : desc_(nullptr) {}
 
   ~CudnnPoolingDescriptor() {
     if (desc_ != nullptr) {
@@ -108,12 +116,9 @@ class CudnnPoolingDescriptor final {
   CudnnPoolingDescriptor(const CudnnPoolingDescriptor&) = delete;
   CudnnPoolingDescriptor& operator=(const CudnnPoolingDescriptor&) = delete;
 
-  Status Set(cudnnPoolingMode_t mode,
-             const gsl::span<const int64_t>& kernel_shape,
-             const gsl::span<const int64_t>& pads,
-             const gsl::span<const int64_t>& strides) {
-    if (!desc_)
-      CUDNN_RETURN_IF_ERROR(cudnnCreatePoolingDescriptor(&desc_));
+  Status Set(cudnnPoolingMode_t mode, const gsl::span<const int64_t>& kernel_shape,
+             const gsl::span<const int64_t>& pads, const gsl::span<const int64_t>& strides) {
+    if (!desc_) CUDNN_RETURN_IF_ERROR(cudnnCreatePoolingDescriptor(&desc_));
 
     int rank = gsl::narrow_cast<int>(kernel_shape.size());
     InlinedVector<int> window(rank);
@@ -128,14 +133,8 @@ class CudnnPoolingDescriptor final {
     for (int i = 0; i < rank; i++) {
       stride[i] = gsl::narrow_cast<int>(strides[i]);
     }
-    CUDNN_RETURN_IF_ERROR(SetPoolingNdDescriptorHelper(
-        desc_,
-        mode,
-        CUDNN_PROPAGATE_NAN,
-        rank,
-        window.data(),
-        padding.data(),
-        stride.data()));
+    CUDNN_RETURN_IF_ERROR(SetPoolingNdDescriptorHelper(desc_, mode, CUDNN_PROPAGATE_NAN, rank, window.data(),
+                                                       padding.data(), stride.data()));
 
     return Status::OK();
   }
@@ -146,8 +145,8 @@ class CudnnPoolingDescriptor final {
   cudnnPoolingDescriptor_t desc_;
 };
 
-template <typename T, typename PoolType>
-Status Pool<T, PoolType>::ComputeInternal(OpKernelContext* context) const {
+template <typename T, typename PoolType, bool NHWC>
+Status Pool<T, PoolType, NHWC>::ComputeInternal(OpKernelContext* context) const {
   typedef typename ToCudaType<T>::MappedType CudaT;
   const Tensor* X = context->Input<Tensor>(0);
   const TensorShape& x_shape = X->Shape();
@@ -166,13 +165,12 @@ Status Pool<T, PoolType>::ComputeInternal(OpKernelContext* context) const {
     pads.assign(kernel_shape.size(), 0);
     strides.assign(kernel_shape.size(), 1);
   }
-
-  auto y_dims = pool_attrs_.SetOutputSize(x_shape, x_shape[1], &pads);
+  auto out_channel = NHWC ? x_shape[3] : x_shape[1];
+  auto y_dims = pool_attrs_.SetOutputSize(x_shape, out_channel, &pads, NHWC);
   TensorShape y_shape(y_dims);
   Tensor* Y = context->Output(0, y_shape);
   // special case when there is a dim value of 0 in the shape.
-  if (y_shape.Size() == 0)
-    return Status::OK();
+  if (y_shape.Size() == 0) return Status::OK();
 
   auto x_data = reinterpret_cast<const CudaT*>(X->Data<T>());
   auto y_data = reinterpret_cast<CudaT*>(Y->MutableData<T>());
@@ -181,12 +179,19 @@ Status Pool<T, PoolType>::ComputeInternal(OpKernelContext* context) const {
   TensorShapeVector y_dims_cudnn(y_dims);
   if (kernel_shape.size() < 2) {
     // cudnn only takes 4D or 5D input, so pad dimensions if needed
-    x_dims_cudnn.push_back(1);
-    y_dims_cudnn.push_back(1);
+    if (NHWC) {
+      x_dims_cudnn.insert(x_dims_cudnn.begin() + 1, 1);
+      y_dims_cudnn.insert(y_dims_cudnn.begin() + 1, 1);
+      kernel_shape.insert(kernel_shape.begin() + 1, 1);
+      strides.insert(strides.begin() + 1, 1);
+    } else {
+      x_dims_cudnn.push_back(1);
+      y_dims_cudnn.push_back(1);
+      kernel_shape.push_back(1);
+      strides.push_back(1);
+    }
     pads.insert(pads.begin() + kernel_shape.size(), 0);
     pads.insert(pads.end(), 0);
-    kernel_shape.push_back(1);
-    strides.push_back(1);
   }
 
   cudnnPoolingMode_t mode = CUDNN_POOLING_MAX;
@@ -203,8 +208,8 @@ Status Pool<T, PoolType>::ComputeInternal(OpKernelContext* context) const {
     const auto beta = Consts<float>::Zero;
     CudnnTensor x_tensor;
     CudnnTensor y_tensor;
-    ORT_RETURN_IF_ERROR(x_tensor.Set(x_dims_cudnn, CudnnTensor::GetDataType<float>()));
-    ORT_RETURN_IF_ERROR(y_tensor.Set(y_dims_cudnn, CudnnTensor::GetDataType<float>()));
+    ORT_RETURN_IF_ERROR(x_tensor.Set(x_dims_cudnn, CudnnTensor::GetDataType<float>(), NHWC));
+    ORT_RETURN_IF_ERROR(y_tensor.Set(y_dims_cudnn, CudnnTensor::GetDataType<float>(), NHWC));
 
     const auto input_count = x_shape.Size();
     const auto output_count = y_shape.Size();
@@ -212,24 +217,26 @@ Status Pool<T, PoolType>::ComputeInternal(OpKernelContext* context) const {
     IAllocatorUniquePtr<float> temp_X = GetScratchBuffer<float>(input_count, context->GetComputeStream());
     auto temp_Y = GetScratchBuffer<float>(output_count, context->GetComputeStream());
     Impl_Cast<CudaT, float>(Stream(context), reinterpret_cast<const CudaT*>(x_data), temp_X.get(), input_count);
-    CUDNN_RETURN_IF_ERROR(PoolingForwardHelper(GetCudnnHandle(context), pooling_desc, &alpha, x_tensor, temp_X.get(), &beta, y_tensor, temp_Y.get()));
+    CUDNN_RETURN_IF_ERROR(PoolingForwardHelper(GetCudnnHandle(context), pooling_desc, &alpha, x_tensor, temp_X.get(),
+                                               &beta, y_tensor, temp_Y.get()));
     Impl_Cast<float, CudaT>(Stream(context), temp_Y.get(), y_data, output_count);
   } else {
     const auto alpha = Consts<CudaT>::One;
     const auto beta = Consts<CudaT>::Zero;
     CudnnTensor x_tensor;
     CudnnTensor y_tensor;
-    ORT_RETURN_IF_ERROR(x_tensor.Set(x_dims_cudnn, CudnnTensor::GetDataType<CudaT>()));
-    ORT_RETURN_IF_ERROR(y_tensor.Set(y_dims_cudnn, CudnnTensor::GetDataType<CudaT>()));
+    ORT_RETURN_IF_ERROR(x_tensor.Set(x_dims_cudnn, CudnnTensor::GetDataType<CudaT>(), NHWC));
+    ORT_RETURN_IF_ERROR(y_tensor.Set(y_dims_cudnn, CudnnTensor::GetDataType<CudaT>(), NHWC));
 
-    CUDNN_RETURN_IF_ERROR(PoolingForwardHelper(GetCudnnHandle(context), pooling_desc, &alpha, x_tensor, x_data, &beta, y_tensor, y_data));
+    CUDNN_RETURN_IF_ERROR(
+        PoolingForwardHelper(GetCudnnHandle(context), pooling_desc, &alpha, x_tensor, x_data, &beta, y_tensor, y_data));
   }
 
   return Status::OK();
 }
 
-template <typename T>
-Status Pool<T, MaxPool<8>>::ComputeInternal(OpKernelContext* context) const {
+template <typename T, bool NHWC>
+Status Pool<T, MaxPool<8>, NHWC>::ComputeInternal(OpKernelContext* context) const {
   typedef typename ToCudaType<T>::MappedType CudaT;
   const Tensor* X = context->Input<Tensor>(0);
   const TensorShape& x_shape = X->Shape();
@@ -248,13 +255,12 @@ Status Pool<T, MaxPool<8>>::ComputeInternal(OpKernelContext* context) const {
     pads.assign(kernel_shape.size(), 0);
     strides.assign(kernel_shape.size(), 1);
   }
-
-  auto y_dims = this->pool_attrs_.SetOutputSize(x_shape, x_shape[1], &pads);
+  auto out_channel = NHWC ? x_shape[3] : x_shape[1];
+  auto y_dims = this->pool_attrs_.SetOutputSize(x_shape, out_channel, &pads, NHWC);
   Tensor* Y = context->Output(0, TensorShape(y_dims));
 
   // special case when there is a dim value of 0 in the shape.
-  if (Y->Shape().Size() == 0)
-    return Status::OK();
+  if (Y->Shape().Size() == 0) return Status::OK();
 
   auto x_data = reinterpret_cast<const CudaT*>(X->Data<T>());
   auto y_data = reinterpret_cast<CudaT*>(Y->MutableData<T>());
@@ -262,20 +268,10 @@ Status Pool<T, MaxPool<8>>::ComputeInternal(OpKernelContext* context) const {
   Tensor* I = context->Output(1, TensorShape(y_dims));
   if (nullptr != I || !this->pool_attrs_.default_dilations) {
     auto i_data = nullptr == I ? nullptr : I->MutableData<int64_t>();
-    MaxPoolWithIndex<CudaT>(
-        this->Stream(context),
-        x_shape,
-        TensorShape(y_dims),
-        kernel_shape,
-        strides,
-        pads,
-        this->pool_attrs_.dilations,
-        this->pool_attrs_.storage_order,
-        x_data,
-        y_data,
-        i_data);
+    MaxPoolWithIndex<CudaT>(this->Stream(context), x_shape, TensorShape(y_dims), kernel_shape, strides, pads,
+                            this->pool_attrs_.dilations, this->pool_attrs_.storage_order, x_data, y_data, i_data);
   } else {
-    ORT_RETURN_IF_ERROR((Pool<T, MaxPool<1>>::ComputeInternal(context)));
+    ORT_RETURN_IF_ERROR((Pool<T, MaxPool<1>, NHWC>::ComputeInternal(context)));
   }
   return Status::OK();
 }
diff --git a/onnxruntime/core/providers/cuda/nn/pool.h b/onnxruntime/core/providers/cuda/nn/pool.h
index fb223c18d2625..8b5152a1565a9 100644
--- a/onnxruntime/core/providers/cuda/nn/pool.h
+++ b/onnxruntime/core/providers/cuda/nn/pool.h
@@ -1,4 +1,5 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
 // Licensed under the MIT License.
 
 #pragma once
@@ -10,7 +11,7 @@
 namespace onnxruntime {
 namespace cuda {
 
-template <typename T, typename PoolType>
+template <typename T, typename PoolType, bool NHWC>
 class Pool : public CudaKernel, public PoolBase {
  public:
   Pool(const OpKernelInfo& info) : CudaKernel(info), PoolBase(info) {}
@@ -18,10 +19,10 @@ class Pool : public CudaKernel, public PoolBase {
   Status ComputeInternal(OpKernelContext* context) const override;
 };
 
-template <typename T>
-class Pool<T, MaxPool<8>> final : public Pool<T, MaxPool<1>> {
+template <typename T, bool NHWC>
+class Pool<T, MaxPool<8>, NHWC> final : public Pool<T, MaxPool<1>, NHWC> {
  public:
-  Pool(const OpKernelInfo& info) : Pool<T, MaxPool<1>>(info) {}
+  explicit Pool(const OpKernelInfo& info) : Pool<T, MaxPool<1>, NHWC>(info) {}
 
   Status ComputeInternal(OpKernelContext* context) const override;
 };
diff --git a/onnxruntime/core/providers/cuda/reduction/reduction_ops.cc b/onnxruntime/core/providers/cuda/reduction/reduction_ops.cc
index 2f057d53d5607..bc78e577c5052 100644
--- a/onnxruntime/core/providers/cuda/reduction/reduction_ops.cc
+++ b/onnxruntime/core/providers/cuda/reduction/reduction_ops.cc
@@ -16,140 +16,29 @@ using namespace onnxruntime::common;
 namespace onnxruntime {
 namespace cuda {
 
-// opset 11 explicitly added support for negative axis. implementation already allowed it.
-#define REGISTER_KERNEL_TYPED(name, T)                                                     \
+#define REGISTER_KERNEL_UNTIL_VERSIONED_TYPED(name, T, end)                                \
   ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(                                                 \
       name,                                                                                \
       kOnnxDomain,                                                                         \
-      1, 10,                                                                               \
-      T,                                                                                   \
-      kCudaExecutionProvider,                                                              \
-      (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
-      name<T>);                                                                            \
-  ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(                                                 \
-      name,                                                                                \
-      kOnnxDomain,                                                                         \
-      11, 12,                                                                              \
-      T,                                                                                   \
-      kCudaExecutionProvider,                                                              \
-      (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
-      name<T>);                                                                            \
-  ONNX_OPERATOR_TYPED_KERNEL_EX(                                                           \
-      name,                                                                                \
-      kOnnxDomain,                                                                         \
-      13,                                                                                  \
+      1, end,                                                                              \
       T,                                                                                   \
       kCudaExecutionProvider,                                                              \
       (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
       name<T>);
 
-#define REGISTER_KERNEL_VERSIONED_TYPED_12(name, T)                                        \
-  ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(                                                 \
-      name,                                                                                \
-      kOnnxDomain,                                                                         \
-      1, 10,                                                                               \
-      T,                                                                                   \
-      kCudaExecutionProvider,                                                              \
-      (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
-      name<T>);                                                                            \
-  ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(                                                 \
-      name,                                                                                \
-      kOnnxDomain,                                                                         \
-      11, 11,                                                                              \
-      T,                                                                                   \
-      kCudaExecutionProvider,                                                              \
-      (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
-      name<T>);                                                                            \
-  ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(                                                 \
-      name,                                                                                \
-      kOnnxDomain,                                                                         \
-      12, 12,                                                                              \
-      T,                                                                                   \
-      kCudaExecutionProvider,                                                              \
-      (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
+#define REGISTER_KERNEL_TYPED_AXES_INPUT(name, T, version)                                                                        \
+  ONNX_OPERATOR_TYPED_KERNEL_EX(                                                                                                  \
+      name,                                                                                                                       \
+      kOnnxDomain,                                                                                                                \
+      version,                                                                                                                    \
+      T,                                                                                                                          \
+      kCudaExecutionProvider,                                                                                                     \
+      (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()).InputMemoryType(OrtMemTypeCPUInput, 1), \
       name<T>);
 
-// Register those with changes in OpSet12.
-#define REGISTER_KERNEL_TYPED_13_WITH_VERSIONED_12(name, T)                                \
-  REGISTER_KERNEL_VERSIONED_TYPED_12(name, T)                                              \
-  ONNX_OPERATOR_TYPED_KERNEL_EX(                                                           \
-      name,                                                                                \
-      kOnnxDomain,                                                                         \
-      13,                                                                                  \
-      T,                                                                                   \
-      kCudaExecutionProvider,                                                              \
-      (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
-      name<T>);
-
-#define REGISTER_KERNEL_VERSIONED_TYPED_13(name, T)                                        \
-  REGISTER_KERNEL_VERSIONED_TYPED_12(name, T)                                              \
-  ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(                                                 \
-      name,                                                                                \
-      kOnnxDomain,                                                                         \
-      13, 13,                                                                              \
-      T,                                                                                   \
-      kCudaExecutionProvider,                                                              \
-      (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
-      name<T>);
-
-// Register ReduceMin int64_t support in OpSet14.
-#define REGISTER_KERNEL_TYPED_14(name, T)                                                  \
-  ONNX_OPERATOR_TYPED_KERNEL_EX(                                                           \
-      name,                                                                                \
-      kOnnxDomain,                                                                         \
-      14,                                                                                  \
-      T,                                                                                   \
-      kCudaExecutionProvider,                                                              \
-      (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
-      name<T>);
-
-// CUDA ArgMax/ArgMin doesn't have OpSet12+ implementation (with select_last_index attr) yet
-#define REGISTER_KERNEL_VERSIONED_TYPED_11(name, T)                                        \
-  ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(                                                 \
-      name,                                                                                \
-      kOnnxDomain,                                                                         \
-      1, 10,                                                                               \
-      T,                                                                                   \
-      kCudaExecutionProvider,                                                              \
-      (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
-      name<T>);                                                                            \
-  ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(                                                 \
-      name,                                                                                \
-      kOnnxDomain,                                                                         \
-      11, 11,                                                                              \
-      T,                                                                                   \
-      kCudaExecutionProvider,                                                              \
-      (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
-      name<T>);
-
-// Register with the latest version 13
-#define REGISTER_KERNEL_TYPED_13(name, T)                                                  \
-  ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(                                                 \
-      name,                                                                                \
-      kOnnxDomain,                                                                         \
-      1, 10,                                                                               \
-      T,                                                                                   \
-      kCudaExecutionProvider,                                                              \
-      (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
-      name<T>);                                                                            \
-  ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(                                                 \
-      name,                                                                                \
-      kOnnxDomain,                                                                         \
-      11, 12,                                                                              \
-      T,                                                                                   \
-      kCudaExecutionProvider,                                                              \
-      (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
-      name<T>);                                                                            \
-  ONNX_OPERATOR_TYPED_KERNEL_EX(                                                           \
-      name,                                                                                \
-      kOnnxDomain,                                                                         \
-      13,                                                                                  \
-      T,                                                                                   \
-      kCudaExecutionProvider,                                                              \
-      (*KernelDefBuilder::Create())                                                        \
-          .InputMemoryType(OrtMemTypeCPUInput, 1)                                          \
-          .TypeConstraint("T", DataTypeImpl::GetTensorType<T>()),                          \
-      name<T>);
+#define REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(name, T, last, cur) \
+  REGISTER_KERNEL_UNTIL_VERSIONED_TYPED(name, T, last)                      \
+  REGISTER_KERNEL_TYPED_AXES_INPUT(name, T, cur)
 
 // TODO ReduceKernel::ReduceKernelShared() is still used by some other training classes though it's not used here - this should be refactored.
 template <bool allow_multi_axes>
@@ -725,6 +614,30 @@ Status ReduceComputeCore(const AllocatorPtr& gpu_allocator, const Tensor& input,
   return Status::OK();
 }
 
+template Status ReduceComputeCore<float, CUDNN_REDUCE_TENSOR_NO_INDICES>(
+    const AllocatorPtr& gpu_allocator, const Tensor& input, PrepareReduceMetadata& prepare_reduce_metadata,
+    /*out*/ Tensor& output, cudnnReduceTensorOp_t cudnn_reduce_op,
+    gsl::span<const int64_t> axes,
+    bool calculate_log, bool calculate_sqt, bool log_sum_exp, bool fast_reduction,
+    Stream* ort_stream,
+    const TensorShape* input_shape_override);
+
+template Status ReduceComputeCore<double, CUDNN_REDUCE_TENSOR_NO_INDICES>(
+    const AllocatorPtr& gpu_allocator, const Tensor& input, PrepareReduceMetadata& prepare_reduce_metadata,
+    /*out*/ Tensor& output, cudnnReduceTensorOp_t cudnn_reduce_op,
+    gsl::span<const int64_t> axes,
+    bool calculate_log, bool calculate_sqt, bool log_sum_exp, bool fast_reduction,
+    Stream* ort_stream,
+    const TensorShape* input_shape_override);
+
+template Status ReduceComputeCore<MLFloat16, CUDNN_REDUCE_TENSOR_NO_INDICES>(
+    const AllocatorPtr& gpu_allocator, const Tensor& input, PrepareReduceMetadata& prepare_reduce_metadata,
+    /*out*/ Tensor& output, cudnnReduceTensorOp_t cudnn_reduce_op,
+    gsl::span<const int64_t> axes,
+    bool calculate_log, bool calculate_sqt, bool log_sum_exp, bool fast_reduction,
+    Stream* ort_stream,
+    const TensorShape* input_shape_override);
+
 template <bool allow_multi_axes>
 template <typename T, cudnnReduceTensorIndices_t ReduceTensorIndices>
 Status ReduceKernel<allow_multi_axes>::ComputeImpl(OpKernelContext* ctx, cudnnReduceTensorOp_t cudnn_reduce_op) const {
@@ -917,69 +830,76 @@ template std::unique_ptr<Tensor> ReduceCompute<MLFloat16, CUDNN_REDUCE_TENSOR_NO
 
 }  // namespace ReductionOps
 
-#define REGISTER_KERNEL_HFD(name)        \
-  REGISTER_KERNEL_TYPED(name, MLFloat16) \
-  REGISTER_KERNEL_TYPED(name, float)     \
-  REGISTER_KERNEL_TYPED(name, double)    \
-  REGISTER_KERNEL_TYPED(name, BFloat16)
-
-#define REGISTER_KERNEL_HFD_VERSIONED_11(name)        \
-  REGISTER_KERNEL_VERSIONED_TYPED_11(name, MLFloat16) \
-  REGISTER_KERNEL_VERSIONED_TYPED_11(name, float)     \
-  REGISTER_KERNEL_VERSIONED_TYPED_11(name, double)
-
-REGISTER_KERNEL_HFD_VERSIONED_11(ArgMax)
-REGISTER_KERNEL_HFD_VERSIONED_11(ArgMin)
-REGISTER_KERNEL_HFD(ReduceL1)
-REGISTER_KERNEL_HFD(ReduceL2)
-
-REGISTER_KERNEL_TYPED_13_WITH_VERSIONED_12(ReduceMax, MLFloat16)
-REGISTER_KERNEL_TYPED_13_WITH_VERSIONED_12(ReduceMax, float)
-REGISTER_KERNEL_TYPED_13_WITH_VERSIONED_12(ReduceMax, double)
-REGISTER_KERNEL_TYPED_13_WITH_VERSIONED_12(ReduceMax, int32_t)
-REGISTER_KERNEL_TYPED_13_WITH_VERSIONED_12(ReduceMax, int64_t)
-REGISTER_KERNEL_TYPED_13_WITH_VERSIONED_12(ReduceMax, int8_t)
-REGISTER_KERNEL_TYPED_13_WITH_VERSIONED_12(ReduceMax, uint8_t)
-
-REGISTER_KERNEL_HFD(ReduceMean)
-
-REGISTER_KERNEL_VERSIONED_TYPED_13(ReduceMin, MLFloat16)
-REGISTER_KERNEL_VERSIONED_TYPED_13(ReduceMin, float)
-REGISTER_KERNEL_VERSIONED_TYPED_13(ReduceMin, double)
-REGISTER_KERNEL_VERSIONED_TYPED_13(ReduceMin, int32_t)
-REGISTER_KERNEL_VERSIONED_TYPED_13(ReduceMin, int64_t)
-REGISTER_KERNEL_VERSIONED_TYPED_13(ReduceMin, int8_t)
-REGISTER_KERNEL_VERSIONED_TYPED_13(ReduceMin, uint8_t)
-
-REGISTER_KERNEL_TYPED_14(ReduceMin, MLFloat16)
-REGISTER_KERNEL_TYPED_14(ReduceMin, float)
-REGISTER_KERNEL_TYPED_14(ReduceMin, double)
-REGISTER_KERNEL_TYPED_14(ReduceMin, int32_t)
-REGISTER_KERNEL_TYPED_14(ReduceMin, int8_t)
-REGISTER_KERNEL_TYPED_14(ReduceMin, uint8_t)
-REGISTER_KERNEL_TYPED_14(ReduceMin, int64_t)
-
-REGISTER_KERNEL_HFD(ReduceProd)
-
-REGISTER_KERNEL_TYPED_13(ReduceSum, MLFloat16)
-REGISTER_KERNEL_TYPED_13(ReduceSum, float)
-REGISTER_KERNEL_TYPED_13(ReduceSum, double)
-REGISTER_KERNEL_TYPED_13(ReduceSum, int32_t)
-REGISTER_KERNEL_TYPED_13(ReduceSum, int64_t)
-REGISTER_KERNEL_TYPED_13(ReduceSum, BFloat16)
-
-REGISTER_KERNEL_HFD(ReduceLogSum)
-REGISTER_KERNEL_HFD(ReduceSumSquare)
-REGISTER_KERNEL_HFD(ReduceLogSumExp)
-
-#define REGISTER_KERNEL_INT32(name) \
-  REGISTER_KERNEL_TYPED(name, int32_t)
-
-REGISTER_KERNEL_INT32(ReduceL1)
-REGISTER_KERNEL_INT32(ReduceL2)
-REGISTER_KERNEL_INT32(ReduceMean)
-
-REGISTER_KERNEL_INT32(ReduceProd)
+// CUDA ArgMax/ArgMin doesn't have OpSet12+ implementation (with select_last_index attr) yet
+REGISTER_KERNEL_UNTIL_VERSIONED_TYPED(ArgMax, MLFloat16, 11)
+REGISTER_KERNEL_UNTIL_VERSIONED_TYPED(ArgMax, float, 11)
+REGISTER_KERNEL_UNTIL_VERSIONED_TYPED(ArgMax, double, 11)
+
+REGISTER_KERNEL_UNTIL_VERSIONED_TYPED(ArgMin, MLFloat16, 11)
+REGISTER_KERNEL_UNTIL_VERSIONED_TYPED(ArgMin, float, 11)
+REGISTER_KERNEL_UNTIL_VERSIONED_TYPED(ArgMin, double, 11)
+
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceMax, MLFloat16, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceMax, float, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceMax, double, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceMax, int32_t, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceMax, int64_t, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceMax, int8_t, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceMax, uint8_t, 17, 18)
+
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceMean, MLFloat16, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceMean, float, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceMean, double, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceMean, BFloat16, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceMean, int32_t, 17, 18)
+
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceMin, MLFloat16, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceMin, float, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceMin, double, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceMin, int32_t, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceMin, int64_t, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceMin, int8_t, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceMin, uint8_t, 17, 18)
+
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceProd, MLFloat16, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceProd, float, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceProd, double, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceProd, BFloat16, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceProd, int32_t, 17, 18)
+
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceSum, MLFloat16, 12, 13)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceSum, float, 12, 13)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceSum, double, 12, 13)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceSum, int32_t, 12, 13)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceSum, int64_t, 12, 13)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceSum, BFloat16, 12, 13)
+
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceLogSum, MLFloat16, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceLogSum, float, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceLogSum, double, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceLogSum, BFloat16, 17, 18)
+
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceSumSquare, MLFloat16, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceSumSquare, float, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceSumSquare, double, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceSumSquare, BFloat16, 17, 18)
+
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceLogSumExp, MLFloat16, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceLogSumExp, float, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceLogSumExp, double, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceLogSumExp, BFloat16, 17, 18)
+
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceL1, MLFloat16, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceL1, float, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceL1, double, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceL1, BFloat16, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceL1, int32_t, 17, 18)
+
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceL2, MLFloat16, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceL2, float, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceL2, double, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceL2, BFloat16, 17, 18)
+REGISTER_KERNEL_TYPED_AXES_INPUT_WITH_VERSIONED(ReduceL2, int32_t, 17, 18)
 
 }  // namespace cuda
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/cuda/tensor/cast_op.cu b/onnxruntime/core/providers/cuda/tensor/cast_op.cu
index 7542fb55757c6..f2c2e6d7458f9 100644
--- a/onnxruntime/core/providers/cuda/tensor/cast_op.cu
+++ b/onnxruntime/core/providers/cuda/tensor/cast_op.cu
@@ -141,7 +141,7 @@ struct CastSat<Float8E5M2, half> {
 
 #endif
 
-#endif
+#endif  // DISABLE_FLOAT8_TYPES
 
 template <int NumThreadsPerBlock, int NumElementsPerThread, typename OutT, typename InT>
 __global__ void CastKernelStd(const InT* input, OutT* output, CUDA_LONG N, CastStd<OutT, InT> cast) {
diff --git a/onnxruntime/core/providers/cuda/tensor/expand.cc b/onnxruntime/core/providers/cuda/tensor/expand.cc
index e9634df205842..806ecfa1aab17 100644
--- a/onnxruntime/core/providers/cuda/tensor/expand.cc
+++ b/onnxruntime/core/providers/cuda/tensor/expand.cc
@@ -142,6 +142,86 @@ Status Expand::ComputeInternal(OpKernelContext* ctx) const {
       input_strides);
 }
 
+Status FuncExpand(
+    const CudaKernel* cuda_kernel,
+    OpKernelContext* ctx,
+    const Tensor* input_data_tensor,
+    const Tensor* /*input_shape_tensor*/,
+    Tensor* output_tensor) {
+  TensorShape output_shape = output_tensor->Shape();
+
+#ifdef ENABLE_STRIDED_TENSORS
+  // Strided output.
+  if (input_data_tensor->DataRaw() == output_tensor->DataRaw()) {
+    gsl::span<const int64_t> input_strides = input_data_tensor->Strides();
+    TensorShapeVector output_strides =
+        ComputeOutputStrides(input_data_tensor->Shape(), input_strides, output_shape);
+    output_tensor->SetShapeAndStrides(output_shape, output_strides);
+    return Status::OK();
+  }
+#endif
+
+  auto output_dims = output_shape.AsShapeVector();
+  auto input_dims = input_data_tensor->Shape().AsShapeVector();
+
+  CalcEffectiveDims(input_dims, output_dims);
+  int rank = gsl::narrow_cast<int>(output_dims.size());
+
+  TensorPitches original_input_strides(input_dims);
+  TensorPitches original_output_strides(output_dims);
+
+  TArray<int64_t> input_strides(rank);
+  for (auto i = 0; i < rank; i++) {
+    input_strides[i] = input_dims[i] == 1 ? 0 : original_input_strides[i];
+  }
+
+  TArray<fast_divmod> output_strides(rank);
+  for (auto i = 0; i < rank; i++) {
+    output_strides[i] = fast_divmod(static_cast<int>(original_output_strides[i]));
+  }
+
+  return ExpandImpl(
+      cuda_kernel->Stream(ctx),
+      input_data_tensor->DataType()->Size(),
+      gsl::narrow_cast<int>(output_shape.Size()),
+      gsl::narrow_cast<int>(input_data_tensor->Shape().Size()),
+      input_data_tensor->DataRaw(),
+      output_tensor->MutableDataRaw(),
+      output_strides,
+      input_strides);
+}
+
+std::unique_ptr<Tensor> FuncExpand(
+    const CudaKernel* cuda_kernel,
+    OpKernelContext* ctx,
+    const Tensor* input_data_tensor,
+    const Tensor* input_shape_tensor) {
+  // new shape to be expanded to
+  const auto* p_shape = input_shape_tensor->Data<int64_t>();
+  TensorShapeVector output_dims{p_shape, p_shape + input_shape_tensor->Shape().Size()};
+  TensorShape output_shape(output_dims);
+
+  ORT_ENFORCE(
+      ComputeOutputShape(
+          cuda_kernel->Node().Name(),
+          input_data_tensor->Shape(),
+          output_dims, output_shape)
+          .IsOK());
+
+  // Pre-allocate output.
+  AllocatorPtr alloc;
+  ORT_ENFORCE(ctx->GetTempSpaceAllocator(&alloc).IsOK());
+  auto output_tensor = Tensor::Create(input_data_tensor->DataType(), output_shape, alloc);
+
+  // Only assign output values when output tensor is non-empty
+  // because empty tensor doesn't own any data.
+  if (output_shape.Size() > 0) {
+    ORT_ENFORCE(FuncExpand(cuda_kernel, ctx, input_data_tensor, input_shape_tensor, output_tensor.get()).IsOK());
+  }
+
+  return output_tensor;
+}
+
 #ifdef ENABLE_STRIDED_TENSORS
 #define CREATE_EXPAND_KERNEL_DEF (*KernelDefBuilder::Create()).MayStridedOutput(0, 0)
 #else
diff --git a/onnxruntime/core/providers/cuda/tensor/expand.h b/onnxruntime/core/providers/cuda/tensor/expand.h
index 4cf4c14e61058..a0b12790017f6 100644
--- a/onnxruntime/core/providers/cuda/tensor/expand.h
+++ b/onnxruntime/core/providers/cuda/tensor/expand.h
@@ -20,5 +20,18 @@ Status ComputeOutputShape(
     const TensorShape& rhs_shape,
     TensorShape& out_shape);
 
+Status FuncExpand(
+    const CudaKernel* cuda_kernel,
+    OpKernelContext* ctx,
+    const Tensor* input_data_tensor,
+    const Tensor* /*input_shape_tensor*/,
+    Tensor* output_tensor);
+
+std::unique_ptr<Tensor> FuncExpand(
+    const CudaKernel* cuda_kernel,
+    OpKernelContext* ctx,
+    const Tensor* input_data_tensor,
+    const Tensor* input_shape_tensor);
+
 }  // namespace cuda
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/cuda/tensor/quantize_linear.cu b/onnxruntime/core/providers/cuda/tensor/quantize_linear.cu
index ad2a44793fe26..1da308811fa48 100644
--- a/onnxruntime/core/providers/cuda/tensor/quantize_linear.cu
+++ b/onnxruntime/core/providers/cuda/tensor/quantize_linear.cu
@@ -104,7 +104,7 @@ struct RoundSat<half, Float8E5M2> {
 
 #endif
 
-#endif
+#endif  // DISABLE_FLOAT8_TYPES 
 
 template <>
 struct RoundStd<half, int8_t> {
@@ -189,7 +189,7 @@ __global__ void QuantizeLinearKernelAxisSat(const InT* input, OutT* output, cons
   }
 }
 
-#endif
+#endif  // DISABLE_FLOAT8_TYPES
 
 template <class OutT, class InT>
 Status CudaQuantizeLinearStd(cudaStream_t stream, const InT* input, OutT* output, const InT* scale, const OutT* zero_point, size_t num_of_element) {
diff --git a/onnxruntime/core/providers/cuda/tensor/reshape.cc b/onnxruntime/core/providers/cuda/tensor/reshape.cc
index 3c6d900cee9a4..ab364c274a32d 100644
--- a/onnxruntime/core/providers/cuda/tensor/reshape.cc
+++ b/onnxruntime/core/providers/cuda/tensor/reshape.cc
@@ -6,6 +6,81 @@
 namespace onnxruntime {
 namespace cuda {
 
+TensorShape InferReshapeOutputShape(
+    const TensorShape& data_tensor_shape,        // Data tensor's shape.
+    const gsl::span<const int64_t>& shape_span,  // Shape that data tensor reshape to.
+    bool allow_zero) {
+  TensorShapeVector shape_vector(shape_span.begin(), shape_span.end());
+  ReshapeHelper helper(data_tensor_shape, shape_vector, allow_zero);
+  return TensorShape(shape_vector);
+}
+
+TensorShape InferReshapeOutputShape(const Tensor* src, const Tensor* shape, bool allow_zero) {
+  ORT_ENFORCE(shape != nullptr, "Cannot reshape to a null shape.");
+  ORT_ENFORCE(shape->Shape().NumDimensions() == 1, "Shape must be an 1-D tensor.");
+  ORT_ENFORCE(shape->Location().device.Type() == OrtDevice::CPU, "Shape must be on CPU.");
+
+  return InferReshapeOutputShape(
+      src->Shape(),
+      shape->template DataAsSpan<int64_t>(),
+      allow_zero);
+}
+
+Status FuncReshape(
+    const CudaKernel* cuda_kernel,
+    OpKernelContext* ctx,
+    const Tensor* X,
+    const Tensor* shape,
+    const bool /*allow_zero*/,
+    Tensor* Y) {
+  if (!X) return Status(common::ONNXRUNTIME, common::FAIL, "Missing data tensor to be reshaped.");
+  if (!shape) return Status(common::ONNXRUNTIME, common::FAIL, "Missing shape tensor for reshaping.");
+  if (shape->Shape().NumDimensions() != 1) {
+    return ORT_MAKE_STATUS(
+        ONNXRUNTIME, FAIL, "The shape tensor for reshaping must be a vector, but got ", shape->Shape(), ".");
+  }
+  if (shape->Location().device.Type() != OrtDevice::CPU) {
+    return Status(common::ONNXRUNTIME, common::FAIL, "Shape tensor must be on CPU.");
+  }
+
+  const void* src_data = X->DataRaw();
+  void* dst_data = Y->MutableDataRaw();
+  // If source and target pointers are not equal (non-inplace operation), we need to copy the data.
+  if (src_data != dst_data) {
+    ORT_ENFORCE(ctx->GetComputeStream());
+    ORT_RETURN_IF_ERROR(cuda_kernel->CopyTensor(*X, *Y, *ctx->GetComputeStream()));
+  }
+
+  return Status::OK();
+}
+
+std::unique_ptr<Tensor> FuncReshape(
+    const CudaKernel* cuda_kernel,
+    OpKernelContext* ctx,
+    const Tensor* X,
+    const Tensor* shape,
+    const bool allow_zero) {
+  // TODO(wechi): Study if Tensor can be created as view to existing tensor.
+  // This feature can refine code for re-sharding and shape broadcasting.
+
+  ORT_ENFORCE(X != nullptr, "Missing data tensor to be reshaped.");
+  ORT_ENFORCE(shape != nullptr, "Missing shape tensor for reshaping.");
+  ORT_ENFORCE(shape->Shape().NumDimensions() == 1, "The shape tensor for reshaping must be a vector, but got ", shape->Shape(), ".");
+  ORT_ENFORCE(shape->Location().device.Type() == OrtDevice::CPU, "Shape tensor must be on CPU.");
+
+  // Calculate output's shape.
+  auto dst_shape = InferReshapeOutputShape(X, shape, allow_zero);
+
+  // Pre-allocate output.
+  AllocatorPtr alloc;
+  ORT_ENFORCE(ctx->GetTempSpaceAllocator(&alloc).IsOK());
+  auto Y = Tensor::Create(X->DataType(), dst_shape, alloc);
+
+  // Do reshape. It's equivalent to memcpy.
+  ORT_ENFORCE(FuncReshape(cuda_kernel, ctx, X, shape, allow_zero, Y.get()).IsOK());
+  return Y;
+}
+
 ONNX_OPERATOR_KERNEL_EX(
     Reshape,
     kOnnxDomain,
diff --git a/onnxruntime/core/providers/cuda/tensor/reshape.h b/onnxruntime/core/providers/cuda/tensor/reshape.h
index 01e933e65888f..8f33265071ed3 100644
--- a/onnxruntime/core/providers/cuda/tensor/reshape.h
+++ b/onnxruntime/core/providers/cuda/tensor/reshape.h
@@ -10,6 +10,39 @@
 namespace onnxruntime {
 namespace cuda {
 
+// Deduce output shape from ONNX Reshape's inputs.
+//
+// Arguments:
+//  data_tensor_shape: The shape of the data tensor (i.e., 1st input).
+//  shape_span: Elements in the shape tensor (i.e., 2nd input).
+//
+// Returns:
+//  The output shape of this Reshape. No symbolic values such as "-1" or "0".
+TensorShape InferReshapeOutputShape(
+    const TensorShape& data_tensor_shape,
+    const gsl::span<const int64_t>& shape_span,
+    bool allow_zero);
+
+TensorShape InferReshapeOutputShape(
+    const Tensor* src,
+    const Tensor* shape,
+    bool allow_zero);
+
+Status FuncReshape(
+    const CudaKernel* cuda_kernel,
+    OpKernelContext* ctx,
+    const Tensor* X,
+    const Tensor* shape,
+    const bool /*allow_zero*/,
+    Tensor* Y);
+
+std::unique_ptr<Tensor> FuncReshape(
+    const CudaKernel* cuda_kernel,
+    OpKernelContext* ctx,
+    const Tensor* X,
+    const Tensor* shape,
+    const bool allow_zero);
+
 class Reshape final : public CudaKernel {
  public:
   Reshape(const OpKernelInfo& info) : CudaKernel(info),
@@ -18,27 +51,11 @@ class Reshape final : public CudaKernel {
 
   Status ComputeInternal(OpKernelContext* context) const override {
     // Copy the second input tensor into the shape vector
-    const Tensor* shapeTensor = context->Input<Tensor>(1);
-    if (shapeTensor == nullptr) return Status(common::ONNXRUNTIME, common::FAIL, "input count mismatch");
-    if (shapeTensor->Shape().NumDimensions() != 1) return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "A shape tensor must be a vector tensor, got ", shapeTensor->Shape().NumDimensions(), " dimensions");
-    auto data_span = shapeTensor->template DataAsSpan<int64_t>();
-    TensorShapeVector shape(data_span.begin(), data_span.end());
-    const Tensor* X = context->Input<Tensor>(0);
-    if (X == nullptr) return Status(common::ONNXRUNTIME, common::FAIL, "input count mismatch");
-    const TensorShape& X_shape = X->Shape();
-
-    ReshapeHelper helper(X_shape, shape, allow_zero_);
-
-    Tensor* Y = context->Output(0, TensorShape(shape));
-    const void* source = X->DataRaw();
-    void* target = Y->MutableDataRaw();
-    // If source and target pointers are not equal (non-inplace operation), we need to copy the data.
-    if (target != source) {
-      ORT_ENFORCE(context->GetComputeStream());
-      ORT_RETURN_IF_ERROR(CopyTensor(*X, *Y, *context->GetComputeStream()));
-    }
-
-    return Status::OK();
+    const Tensor* data_tensor = context->Input<Tensor>(0);
+    const Tensor* shape_tensor = context->Input<Tensor>(1);
+    const auto target_shape = InferReshapeOutputShape(data_tensor, shape_tensor, allow_zero_);
+    Tensor* output_tensor = context->Output(0, target_shape);
+    return FuncReshape(this, context, data_tensor, shape_tensor, allow_zero_, output_tensor);
   }
 
  private:
diff --git a/onnxruntime/core/providers/cuda/tensor/slice.cc b/onnxruntime/core/providers/cuda/tensor/slice.cc
index 440b19bce9fb6..db285ba547b6a 100644
--- a/onnxruntime/core/providers/cuda/tensor/slice.cc
+++ b/onnxruntime/core/providers/cuda/tensor/slice.cc
@@ -3,6 +3,7 @@
 
 #include "core/providers/cuda/tensor/slice.h"
 #include "core/providers/cpu/tensor/utils.h"
+#include "core/providers/cpu/tensor/slice_helper.h"
 #include "core/providers/cuda/tensor/slice_impl.h"
 
 namespace onnxruntime {
@@ -235,5 +236,58 @@ Status Slice<dynamic>::CallSliceImp(size_t element_size, size_t dimension_count,
                       output_shape);
 }
 
+Status FuncSlice(
+    // Use OpKernel and do a pointer cast to unify functional calls with other eps.
+    // TODO: remove CudaKernel and OpKernelContext.
+    const CudaKernel* cuda_kernel,
+    // Do NOT use ctx to access inputs and outputs.
+    // Inputs and outputs are passed in as function arguments.
+    OpKernelContext* ctx,
+    const Tensor* input,
+    const std::vector<int64_t>& starts,
+    const std::vector<int64_t>& ends,
+    const std::vector<int64_t>& axes,
+    const std::vector<int64_t>& steps,
+    Tensor* output) {
+  gsl::span<const int64_t> starts_span = gsl::make_span(starts.data(), starts.size());
+  gsl::span<const int64_t> ends_span = gsl::make_span(ends.data(), ends.size());
+  gsl::span<const int64_t> axes_span = gsl::make_span(axes.data(), axes.size());
+  gsl::span<const int64_t> steps_span = gsl::make_span(steps.data(), steps.size());
+  const auto& input_shape = input->Shape();
+  const auto input_dimensions = input_shape.GetDims();
+
+  SliceOp::PrepareForComputeMetadata compute_metadata(input_dimensions);
+
+  ORT_RETURN_IF_ERROR(
+      SliceOp::PrepareForComputeHelper(starts_span, ends_span, axes_span, steps_span, compute_metadata));
+
+  ORT_RETURN_IF_ERROR(SliceBase::FlattenOutputDims(compute_metadata.input_dimensions_, compute_metadata.output_dims_, compute_metadata.starts_,
+                                                   compute_metadata.ends_, compute_metadata.steps_, compute_metadata.p_flattened_input_dims_,
+                                                   compute_metadata.p_flattened_output_dims_));
+
+  TensorShape output_shape(compute_metadata.output_dims_);
+
+  TArray<int64_t> starts_buffer(compute_metadata.starts_);
+  TArray<int64_t> steps_buffer(compute_metadata.steps_);
+  TArray<int64_t> input_strides;
+  TArray<fast_divmod> output_strides;
+
+  ORT_RETURN_IF_ERROR(SliceCuda::ComputeSliceStrides(input_shape, input_strides, output_strides, compute_metadata));
+
+  ORT_RETURN_IF_ERROR(SliceImpl(
+      cuda_kernel->Stream(ctx),
+      input->DataType()->Size(),
+      gsl::narrow_cast<int32_t>(input_dimensions.size()),
+      starts_buffer,
+      steps_buffer,
+      input_strides,
+      output_strides,
+      input->DataRaw(),
+      output->MutableDataRaw(),
+      output_shape.Size()));
+
+  return Status::OK();
+}
+
 }  // namespace cuda
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/cuda/tensor/slice.h b/onnxruntime/core/providers/cuda/tensor/slice.h
index 444e37c2167e8..d5c53611d3421 100644
--- a/onnxruntime/core/providers/cuda/tensor/slice.h
+++ b/onnxruntime/core/providers/cuda/tensor/slice.h
@@ -38,5 +38,20 @@ class Slice : public CudaKernel, public SliceBase {
                               const TArray<fast_divmod>& output_strides, OpKernelContext* ctx,
                               const TensorShape& output_shape) const;
 };
+
+Status FuncSlice(
+    // Use OpKernel and do a pointer cast to unify functional calls with other eps.
+    // TODO: remove CudaKernel and OpKernelContext.
+    const CudaKernel* cuda_kernel,
+    // Do NOT use ctx to access inputs and outputs.
+    // Inputs and outputs are passed in as function arguments.
+    OpKernelContext* ctx,
+    const Tensor* input,
+    const std::vector<int64_t>& starts,
+    const std::vector<int64_t>& ends,
+    const std::vector<int64_t>& axes,
+    const std::vector<int64_t>& steps,
+    Tensor* output);
+
 }  // namespace cuda
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/cuda/tensor/where.cc b/onnxruntime/core/providers/cuda/tensor/where.cc
index b3f92c913a84b..4d98b3a8a145e 100644
--- a/onnxruntime/core/providers/cuda/tensor/where.cc
+++ b/onnxruntime/core/providers/cuda/tensor/where.cc
@@ -216,5 +216,6 @@ SPECIALIZED_COMPUTE(int64_t)
 SPECIALIZED_COMPUTE(float)
 SPECIALIZED_COMPUTE(double_t)
 SPECIALIZED_COMPUTE(MLFloat16)
+SPECIALIZED_COMPUTE(BFloat16)
 }  // namespace cuda
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/cuda/tensor/where_impl.cu b/onnxruntime/core/providers/cuda/tensor/where_impl.cu
index 0fbd2062ca1b2..d7909454e922c 100644
--- a/onnxruntime/core/providers/cuda/tensor/where_impl.cu
+++ b/onnxruntime/core/providers/cuda/tensor/where_impl.cu
@@ -238,6 +238,7 @@ SPECIALIZED_IMPL(int64_t)
 SPECIALIZED_IMPL(float)
 SPECIALIZED_IMPL(double_t)
 SPECIALIZED_IMPL(half)
+SPECIALIZED_IMPL(BFloat16)
 
 }  // namespace cuda
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/inc/DmlExecutionProvider.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/inc/DmlExecutionProvider.h
index 52018500b134c..cdb0338157561 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/inc/DmlExecutionProvider.h
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/inc/DmlExecutionProvider.h
@@ -3,6 +3,9 @@
 
 #pragma once
 interface IMLOperatorRegistry;
+interface IDMLDevice;
+interface ID3D12CommandQueue;
+interface ID3D12Resource;
 
 #include "core/common/status.h"
 #include "core/framework/data_transfer.h"
@@ -28,7 +31,8 @@ namespace Dml
     std::unique_ptr<onnxruntime::IExecutionProvider> CreateExecutionProvider(
         IDMLDevice* dmlDevice,
         ID3D12CommandQueue* commandQueue,
-        bool enableMetacommands = true);
+        bool enableMetacommands,
+        bool enableDynamicGraphFusion);
 
     ID3D12Resource* GetD3D12ResourceFromAllocation(onnxruntime::IAllocator* allocator, void* ptr);
     void FlushContext(onnxruntime::IExecutionProvider* provider);
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/inc/IWinmlExecutionProvider.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/inc/IWinmlExecutionProvider.h
index 04381b6ce355c..074f13b309181 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/inc/IWinmlExecutionProvider.h
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/inc/IWinmlExecutionProvider.h
@@ -7,11 +7,14 @@
 #include <functional>
 #include <variant>
 #include <optional>
+#include <wrl/client.h>
 
 #include "core/framework/op_kernel.h"
+#include "core/providers/dml/DmlExecutionProvider/src/DmlEdgeShapes.h"
 
 struct AbstractOperatorDesc;
 interface IMLOperatorTensor;
+interface IDMLOperator;
 struct DML_INPUT_GRAPH_EDGE_DESC;
 struct DML_OUTPUT_GRAPH_EDGE_DESC;
 struct DML_INTERMEDIATE_GRAPH_EDGE_DESC;
@@ -92,6 +95,8 @@ namespace Windows::AI::MachineLearning::Adapter
         const onnxruntime::Node& node,
         MLOperatorTensorGetter& constantInputGetter,
         const void* executionHandle,
+        const EdgeShapes* inputShapesOverrides,
+        /*out*/ EdgeShapes* outputShapes,
         /*out*/ DmlGraphNodeCreateInfo* graphNodeCreateInfo
         )>;
 
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/AbiCustomRegistry.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/AbiCustomRegistry.cpp
index ede3e7f2c2257..eb068087de4ad 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/AbiCustomRegistry.cpp
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/AbiCustomRegistry.cpp
@@ -491,6 +491,8 @@ HRESULT STDMETHODCALLTYPE AbiCustomRegistry::RegisterOperatorKernel(
                     const onnxruntime::Node& node,
                     MLOperatorTensorGetter& constantInputGetter,
                     const void* executionHandle,
+                    const EdgeShapes* inputShapesOverrides,
+                    /*out*/ EdgeShapes* outputShapes,
                     /*out*/ DmlGraphNodeCreateInfo* graphNodeCreateInfo
                 )
                 {
@@ -498,15 +500,15 @@ HRESULT STDMETHODCALLTYPE AbiCustomRegistry::RegisterOperatorKernel(
                     onnxruntime::OpNodeProtoHelper<onnxruntime::ProtoHelperNodeContext> protoHelper(&nodeContext);
 
                     // Use the same list of required constant inputs for the shape inferrer and the kernel.
-                    EdgeShapes outputShapes;
-                    InferAndVerifyOutputSizes(node, &defaultAttributesCapture, shapeInferrerCapture.Get(), constantCpuInputCapture, constantInputGetter, nullptr, outputShapes);
+                    InferAndVerifyOutputSizes(node, &defaultAttributesCapture, shapeInferrerCapture.Get(), constantCpuInputCapture, constantInputGetter, inputShapesOverrides, *outputShapes);
 
                     // Create the kernel while allowing input shape and output shape queries according to options
                     ComPtr<DmlGraphOpKernelInfoWrapper> kernelInfoWrapper = wil::MakeOrThrow<DmlGraphOpKernelInfoWrapper>(
                             &protoHelper,
                             executionHandle,
                             true,
-                            &outputShapes,
+                            inputShapesOverrides,
+                            outputShapes,
                             &defaultAttributesCapture,
                             graphNodeCreateInfo,
                             constantCpuInputCapture,
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlCommittedResourceAllocator.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlCommittedResourceAllocator.cpp
index d9bfdc3473ca7..b696aefecf664 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlCommittedResourceAllocator.cpp
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlCommittedResourceAllocator.cpp
@@ -13,7 +13,7 @@ namespace Dml
         ComPtr<ID3D12Resource> resource;
         auto buffer = CD3DX12_RESOURCE_DESC::Buffer(size, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
         ORT_THROW_IF_FAILED(m_device->CreateCommittedResource(
-            &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT),
+            unmove_ptr(CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT)),
             D3D12_HEAP_FLAG_NONE,
             &buffer,
             D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlCommon.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlCommon.h
index 9bf8c58f7a3ec..c4d260b9736df 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlCommon.h
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlCommon.h
@@ -6,6 +6,11 @@
 #include <assert.h>
 #include "core/providers/dml/OperatorAuthorHelper/Common.h"
 
+template <typename T>
+auto unmove_ptr(T&& t) {
+    return &static_cast<T&>(t);
+}
+
 namespace Dml
 {
     using namespace OperatorHelper;
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlEdgeShapes.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlEdgeShapes.h
new file mode 100644
index 0000000000000..5ff70493252bd
--- /dev/null
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlEdgeShapes.h
@@ -0,0 +1,42 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+namespace Windows::AI::MachineLearning::Adapter
+{
+    // edges and unused edges have an empty array of dimensions.
+    class EdgeShapes
+    {
+    public:
+        EdgeShapes() = default;
+
+        EdgeShapes(size_t count) : m_shapes(count) {}
+
+        const std::vector<uint32_t>& GetShape(size_t edgeIndex) const
+        {
+            return m_shapes[edgeIndex];
+        }
+
+        std::vector<uint32_t>& GetMutableShape(size_t edgeIndex)
+        {
+            return m_shapes[edgeIndex];
+        }
+
+        size_t EdgeCount() const { return m_shapes.size(); }
+
+        void Reset(size_t edge_count)
+        {
+            m_shapes.clear();
+            m_shapes.resize(edge_count);
+        }
+
+        bool operator!=(const EdgeShapes& other) const noexcept
+        {
+            return (m_shapes != other.m_shapes);
+        }
+
+    private:
+        std::vector<std::vector<uint32_t>> m_shapes;
+    };
+}
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlGraphFusionHelper.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlGraphFusionHelper.cpp
index 51b93efb3a646..4f7ec188140b5 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlGraphFusionHelper.cpp
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlGraphFusionHelper.cpp
@@ -1,7 +1,7 @@
 #pragma once
 
 #include "DmlGraphFusionHelper.h"
-
+#include "DmlRuntimeFusedGraphKernel.h"
 
 namespace Dml
 {
@@ -103,6 +103,36 @@ namespace DmlGraphFusionHelper
         ORT_THROW_IF_FAILED(resourceUnk->QueryInterface(resource));
     }
 
+    std::tuple<std::unique_ptr<std::byte[]>, std::vector<uint8_t>, std::byte*, size_t> UnpackInitializer(
+        const onnxruntime::Graph& graph,
+        const ONNX_NAMESPACE::TensorProto* initializer)
+    {
+        std::unique_ptr<std::byte[]> unpackedTensor;
+        std::vector<uint8_t> unpackedExternalTensor;
+        std::byte* tensorPtr = nullptr;
+        size_t tensorByteSize = 0;
+
+        // The tensor may be stored as raw data or in typed fields.
+        if (initializer->data_location() == onnx::TensorProto_DataLocation_EXTERNAL)
+        {
+            THROW_IF_NOT_OK(onnxruntime::utils::UnpackInitializerData(*initializer, graph.ModelPath(), unpackedExternalTensor));
+            tensorPtr = reinterpret_cast<std::byte*>(unpackedExternalTensor.data());
+            tensorByteSize = unpackedExternalTensor.size();
+        }
+        else if (initializer->has_raw_data())
+        {
+            tensorPtr = (std::byte*)(initializer->raw_data().c_str());
+            tensorByteSize = initializer->raw_data().size();
+        }
+        else
+        {
+            std::tie(unpackedTensor, tensorByteSize) = Windows::AI::MachineLearning::Adapter::UnpackTensor(*initializer, graph.ModelPath());
+            tensorPtr = unpackedTensor.get();
+        }
+
+        return std::make_tuple(std::move(unpackedTensor), std::move(unpackedExternalTensor), tensorPtr, tensorByteSize);
+    }
+
     void ProcessInputData(
         const ExecutionProviderImpl* providerImpl,
         const std::vector<uint8_t>& isInputsUploadedByDmlEP,
@@ -161,32 +191,11 @@ namespace DmlGraphFusionHelper
             auto iter = initializerNameToInitializerMap.find(subGraphInputArgNames[i]);
             if (iter != initializerNameToInitializerMap.end())
             {
-                std::byte* tensorPtr = nullptr;
-                size_t tensorByteSize = 0;
-                std::vector<uint8_t> unpackedExternalTensor;
-
-                std::unique_ptr<std::byte[]> unpackedTensor;
-
-                //auto& initializer = iter->second;
                 auto* initializer = iter->second.first;
+                auto [unpackedTensor, unpackedExternalTensor, tensorPtr, tensorByteSize] = UnpackInitializer(graph, initializer);
 
-                // The tensor may be stored as raw data or in typed fields.
-                if (initializer->data_location() == onnx::TensorProto_DataLocation_EXTERNAL)
-                {
-                    THROW_IF_NOT_OK(onnxruntime::utils::UnpackInitializerData(*initializer, graph.ModelPath(), unpackedExternalTensor));
-                    tensorPtr = reinterpret_cast<std::byte*>(unpackedExternalTensor.data());
-                    tensorByteSize = unpackedExternalTensor.size();
-                }
-                else if (initializer->has_raw_data())
+                if (initializer->data_location() != onnx::TensorProto_DataLocation_EXTERNAL && !initializer->has_raw_data())
                 {
-                    tensorPtr = (std::byte*)(initializer->raw_data().c_str());
-                    tensorByteSize = initializer->raw_data().size();
-                }
-                else
-                {
-                    std::tie(unpackedTensor, tensorByteSize) = Windows::AI::MachineLearning::Adapter::UnpackTensor(*initializer, graph.ModelPath());
-                    tensorPtr = unpackedTensor.get();
-
                     // Free the initializer if this is the last usage of it.
                     if (initializerToLastInputIndexMap[initializer] == i)
                     {
@@ -501,5 +510,173 @@ namespace DmlGraphFusionHelper
 
         graph.FinalizeFuseSubGraph(indexedSubGraph, fusedNode);
     }
+
+    void RegisterDynamicKernel(
+        onnxruntime::Graph& graph,
+        onnxruntime::KernelRegistry* registryForPartitionKernels,
+        const ExecutionProviderImpl* providerImpl,
+        std::unordered_map<const onnxruntime::Node*, GraphNodeProperties> graphNodePropertyMap,
+        const std::unordered_set<std::string>& dynamicCpuInputMap,
+        std::shared_ptr<const onnxruntime::IndexedSubGraph> indexedSubGraph,
+        std::unordered_map<std::string, std::pair<const ONNX_NAMESPACE::TensorProto*, bool>>&& isInitializerTransferable)
+    {
+        struct NodeInfo
+        {
+            std::string name;
+            std::string opType;
+            std::string description;
+            std::string domain;
+            onnxruntime::NodeAttributes attributes;
+            std::vector<onnxruntime::NodeArg*> inputDefPointers;
+            std::vector<onnxruntime::NodeArg*> outputDefPointers;
+        };
+
+        auto partitionNodePropsMap = DmlGraphFusionHelper::CreatePartitionNodePropsMap(
+            graph,
+            *indexedSubGraph,
+            std::move(graphNodePropertyMap));
+
+        auto modelPath = graph.ModelPath();
+
+        const gsl::span<const std::string> subGraphInputArgNames = indexedSubGraph->GetMetaDef()->inputs;
+        const gsl::span<const std::string> subGraphOutputArgNames = indexedSubGraph->GetMetaDef()->outputs;
+
+        std::vector<NodeInfo> nodesInfo;
+        nodesInfo.reserve(indexedSubGraph->nodes.size());
+
+        std::vector<const onnxruntime::NodeArg*> subgraphInputs;
+        subgraphInputs.reserve(subGraphInputArgNames.size());
+
+        std::vector<const onnxruntime::NodeArg*> subgraphOutputs;
+        subgraphOutputs.reserve(subGraphOutputArgNames.size());
+
+        std::vector<onnxruntime::NodeAttributes> nodeAttributes;
+        nodeAttributes.reserve(indexedSubGraph->nodes.size());
+
+        std::vector<std::shared_ptr<onnxruntime::NodeArg>> intermediateNodeArgs;
+
+        for (size_t sortedNodeIndex : indexedSubGraph->nodes)
+        {
+            auto node = graph.GetNode(sortedNodeIndex);
+
+            nodeAttributes.push_back(node->GetAttributes());
+
+            NodeInfo nodeInfo{};
+            nodeInfo.name = node->Name();
+            nodeInfo.opType = node->OpType();
+            nodeInfo.description = node->Description();
+            nodeInfo.domain = node->Domain();
+            nodeInfo.attributes = node->GetAttributes();
+            nodeInfo.inputDefPointers.reserve(node->InputDefs().size());
+            nodeInfo.outputDefPointers.reserve(node->OutputDefs().size());
+
+            for (const onnxruntime::NodeArg* inputDef : node->InputDefs())
+            {
+                intermediateNodeArgs.emplace_back(std::make_shared<onnxruntime::NodeArg>(inputDef->Name(), inputDef->TypeAsProto()));
+                nodeInfo.inputDefPointers.push_back(intermediateNodeArgs.back().get());
+            }
+
+            for (const onnxruntime::NodeArg* outputDef : node->OutputDefs())
+            {
+                intermediateNodeArgs.emplace_back(std::make_shared<onnxruntime::NodeArg>(outputDef->Name(), outputDef->TypeAsProto()));
+                nodeInfo.outputDefPointers.push_back(intermediateNodeArgs.back().get());
+            }
+
+            nodesInfo.push_back(std::move(nodeInfo));
+        }
+
+        for (const std::string& graphInputName : subGraphInputArgNames)
+        {
+            subgraphInputs.push_back(graph.GetNodeArg(graphInputName));
+        }
+
+        for (const std::string& graphOutputName : subGraphOutputArgNames)
+        {
+            subgraphOutputs.push_back(graph.GetNodeArg(graphOutputName));
+        }
+
+        // We need to keep the initializers alive since they will be freed once the nodes are removed from the graph
+        std::vector<ONNX_NAMESPACE::TensorProto> ownedInitializers;
+        ownedInitializers.reserve(isInitializerTransferable.size());
+
+        for (auto& kvp : isInitializerTransferable)
+        {
+            auto [unpackedTensor, unpackedExternalTensor, tensorPtr, tensorByteSize] = UnpackInitializer(graph, kvp.second.first);
+
+            ONNX_NAMESPACE::TensorProto tensorProto;
+            tensorProto.set_data_type(kvp.second.first->data_type());
+            tensorProto.set_raw_data(tensorPtr, tensorByteSize);
+            tensorProto.set_name(kvp.second.first->name());
+
+            for (int i = 0; i < kvp.second.first->dims_size(); ++i)
+            {
+                tensorProto.add_dims(kvp.second.first->dims(i));
+            }
+            ownedInitializers.push_back(std::move(tensorProto));
+            kvp.second.first = &ownedInitializers.back();
+        }
+
+        // lamda captures for the kernel registration
+        auto fused_kernel_func = [
+            indexedSubGraph,
+            &modelPath,
+            nodesInfo = std::move(nodesInfo),
+            intermediateNodeArgs = std::move(intermediateNodeArgs),
+            subgraphInputs = std::move(subgraphInputs),
+            subgraphOutputs = std::move(subgraphOutputs),
+            partitionNodePropsMap = std::move(partitionNodePropsMap),
+            ownedInitializers = std::move(ownedInitializers)] (onnxruntime::FuncManager& func_mgr, const onnxruntime::OpKernelInfo& info, std::unique_ptr<onnxruntime::OpKernel>& out) mutable ->onnxruntime::Status
+        {
+            std::vector<std::shared_ptr<onnxruntime::Node>> subgraphNodes;
+            subgraphNodes.reserve(nodesInfo.size());
+
+            for (const NodeInfo& nodeInfo : nodesInfo)
+            {
+                subgraphNodes.emplace_back(std::make_shared<onnxruntime::Node>(
+                    nodeInfo.name,
+                    nodeInfo.opType,
+                    nodeInfo.description,
+                    nodeInfo.inputDefPointers,
+                    nodeInfo.outputDefPointers,
+                    &nodeInfo.attributes,
+                    nodeInfo.domain));
+            }
+
+            out.reset(CreateRuntimeFusedGraphKernel(
+                info,
+                indexedSubGraph,
+                modelPath,
+                std::move(subgraphNodes),
+                std::move(subgraphInputs),
+                std::move(subgraphOutputs),
+                std::move(intermediateNodeArgs),
+                std::move(partitionNodePropsMap),
+                std::move(ownedInitializers)));
+            return Status::OK();
+        };
+
+        // build the kernel definition on the fly, and register it to the fused_kernel_regisitry.
+        onnxruntime::KernelDefBuilder builder;
+        builder.SetName(indexedSubGraph->GetMetaDef()->name)
+            .SetDomain(indexedSubGraph->GetMetaDef()->domain)
+            .SinceVersion(indexedSubGraph->GetMetaDef()->since_version)
+            .Provider(onnxruntime::kDmlExecutionProvider);
+
+        // Force the CPU inputs to be allocated on the CPU
+        for (int i = 0; i < subGraphInputArgNames.size(); ++i)
+        {
+            if (dynamicCpuInputMap.find(subGraphInputArgNames[i]) != dynamicCpuInputMap.end())
+            {
+                builder.InputMemoryType(OrtMemTypeCPUInput, i);
+            }
+        }
+
+        ORT_THROW_IF_ERROR(registryForPartitionKernels->Register(builder, fused_kernel_func));
+
+        auto& fusedNode = graph.BeginFuseSubGraph(*indexedSubGraph, indexedSubGraph->GetMetaDef()->name);
+        fusedNode.SetExecutionProviderType(onnxruntime::kDmlExecutionProvider);
+
+        graph.FinalizeFuseSubGraph(*indexedSubGraph, fusedNode);
+    }
 }
 }
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlGraphFusionHelper.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlGraphFusionHelper.h
index 030cffc2a8794..f8f6162aaa1e0 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlGraphFusionHelper.h
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlGraphFusionHelper.h
@@ -80,5 +80,14 @@ namespace DmlGraphFusionHelper
         std::vector<uint8_t>&& isInputsUploadedByDmlEP,
         const GraphDescBuilder::GraphDesc& graphDesc,
         Microsoft::WRL::ComPtr<IDMLCompiledOperator> compiledExecutionPlanOperator);
+
+    void RegisterDynamicKernel(
+        onnxruntime::Graph& graph,
+        onnxruntime::KernelRegistry* registryForPartitionKernels,
+        const ExecutionProviderImpl* providerImpl,
+        std::unordered_map<const onnxruntime::Node*, GraphNodeProperties> graphNodePropertyMap,
+        const std::unordered_set<std::string>& dynamicCpuInputMap,
+        std::shared_ptr<const onnxruntime::IndexedSubGraph> indexedSubGraph,
+        std::unordered_map<std::string, std::pair<const ONNX_NAMESPACE::TensorProto*, bool>>&& isInitializerTransferable);
 }
 }
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlGraphFusionTransformer.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlGraphFusionTransformer.cpp
index 4813707cdf50c..679738b639ec9 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlGraphFusionTransformer.cpp
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlGraphFusionTransformer.cpp
@@ -15,6 +15,18 @@
 
 namespace Dml
 {
+    namespace
+    {
+        struct CompiledPartitionInfo
+        {
+            Microsoft::WRL::ComPtr<IDMLCompiledOperator> compiledOperator;
+            onnxruntime::IndexedSubGraph indexedSubGraph;
+            std::vector<uint8_t> isInputsUploadedByDmlEP;
+            GraphDescBuilder::GraphDesc graphDesc;
+            std::unordered_map<std::string, std::pair<const ONNX_NAMESPACE::TensorProto*, bool>> isInitializerTransferable;
+        };
+    }
+
     DmlGraphFusionTransformer::DmlGraphFusionTransformer(
         const std::string& name,
         const onnxruntime::IExecutionProvider* provider
@@ -24,15 +36,6 @@ namespace Dml
     {
     }
 
-    struct CompiledPartitionInfo
-    {
-        Microsoft::WRL::ComPtr<IDMLCompiledOperator> compiledOperator;
-        onnxruntime::IndexedSubGraph indexedSubGraph;
-        std::vector<uint8_t> isInputsUploadedByDmlEP;
-        GraphDescBuilder::GraphDesc graphDesc;
-        std::unordered_map<std::string, std::pair<const ONNX_NAMESPACE::TensorProto*, bool>> isInitializerTransferable;
-    };
-
     onnxruntime::common::Status DmlGraphFusionTransformer::ApplyImpl(
         onnxruntime::Graph& graph,
         bool& modified,
@@ -87,6 +90,7 @@ namespace Dml
         {
             // Initializers needed by any graph partition
             std::unordered_set<std::string> requiredInitializerMap;
+            std::unordered_set<std::string> dynamicCpuInputMap;
             std::unordered_map<const onnxruntime::Node*, GraphNodeProperties> graphNodePropertyMap;
             onnxruntime::GraphViewer graphViewer(graph);
             std::vector<std::unique_ptr<GraphPartition>> partitions = BuildPartitions(
@@ -96,8 +100,10 @@ namespace Dml
                 m_providerImpl->GetSupportedDeviceDataTypeMask(),
                 graphNodePropertyMap,
                 requiredInitializerMap,
+                dynamicCpuInputMap,
                 additionalSplittingNodes,
-                implicitInputDefs);
+                implicitInputDefs,
+                false);
 
             // Reset the splitting nodes for the current iteration
             additionalSplittingNodes.clear();
@@ -190,17 +196,48 @@ namespace Dml
                         std::move(graphNodePropertyMap));
 
                     // Convert partitionONNXGraph into DML EP GraphDesc
+                    auto modelPath = graph.ModelPath();
+
+                    const gsl::span<const std::string> subGraphInputArgNames = indexedSubGraph.GetMetaDef()->inputs;
+                    const gsl::span<const std::string> subGraphOutputArgNames = indexedSubGraph.GetMetaDef()->outputs;
+
+                    std::vector<const onnxruntime::Node*> subgraphNodes;
+                    subgraphNodes.reserve(indexedSubGraph.nodes.size());
+
+                    std::vector<const onnxruntime::NodeArg*> subgraphInputs;
+                    subgraphInputs.reserve(subGraphInputArgNames.size());
+
+                    std::vector<const onnxruntime::NodeArg*> subgraphOutputs;
+                    subgraphOutputs.reserve(subGraphOutputArgNames.size());
+
+                    for (size_t sortedNodeIndex : indexedSubGraph.nodes)
+                    {
+                        subgraphNodes.push_back(graph.GetNode(sortedNodeIndex));
+                    }
+
+                    for (const std::string& graphInputName : subGraphInputArgNames)
+                    {
+                        subgraphInputs.push_back(graph.GetNodeArg(graphInputName));
+                    }
+
+                    for (const std::string& graphOutputName : subGraphOutputArgNames)
+                    {
+                        subgraphOutputs.push_back(graph.GetNodeArg(graphOutputName));
+                    }
+
                     ComPtr<IDMLDevice> device;
                     ORT_THROW_IF_FAILED(m_providerImpl->GetDmlDevice(device.GetAddressOf()));
                     GraphDescBuilder::GraphDesc graphDesc = GraphDescBuilder::BuildGraphDesc(
                         isInputsUploadedByDmlEP.data(),
                         isInputsUploadedByDmlEP.size(),
                         isInitializerTransferable,
-                        graph,
-                        indexedSubGraph,
                         partitionNodePropsMap,
                         device.Get(),
-                        m_providerImpl);
+                        m_providerImpl,
+                        modelPath,
+                        subgraphNodes,
+                        subgraphInputs,
+                        subgraphOutputs);
 
                     // Compile the operator
                     auto compiledPartition = DmlGraphFusionHelper::TryCreateCompiledOperator(
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeFusedGraphKernel.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeFusedGraphKernel.cpp
new file mode 100644
index 0000000000000..5c7b7bff1e370
--- /dev/null
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeFusedGraphKernel.cpp
@@ -0,0 +1,369 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "precomp.h"
+
+#include "core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h"
+#include "core/providers/dml/DmlExecutionProvider/src/DmlRuntimeFusedGraphKernel.h"
+#include "core/providers/dml/DmlExecutionProvider/src/DmlGraphFusionHelper.h"
+
+using namespace Windows::AI::MachineLearning::Adapter;
+
+namespace Dml
+{
+    class DmlRuntimeFusedGraphKernel : public onnxruntime::OpKernel
+    {
+    public:
+        DmlRuntimeFusedGraphKernel() = delete;
+
+        DmlRuntimeFusedGraphKernel(
+            const onnxruntime::OpKernelInfo& kernelInfo,
+            std::shared_ptr<const onnxruntime::IndexedSubGraph> indexedSubGraph,
+            const onnxruntime::Path& modelPath,
+            std::vector<std::shared_ptr<onnxruntime::Node>>&& subgraphNodes,
+            std::vector<const onnxruntime::NodeArg*>&& subgraphInputs,
+            std::vector<const onnxruntime::NodeArg*>&& subgraphOutputs,
+            std::vector<std::shared_ptr<onnxruntime::NodeArg>>&& intermediateNodeArgs,
+            std::unordered_map<std::string, GraphNodeProperties>&& partitionNodePropsMap,
+            std::vector<ONNX_NAMESPACE::TensorProto>&& ownedInitializers)
+        : OpKernel(kernelInfo),
+          m_indexedSubGraph(std::move(indexedSubGraph)),
+          m_modelPath(modelPath),
+          m_subgraphNodes(std::move(subgraphNodes)),
+          m_subgraphInputs(std::move(subgraphInputs)),
+          m_subgraphOutputs(std::move(subgraphOutputs)),
+          m_intermediateNodeArgs(std::move(intermediateNodeArgs)),
+          m_partitionNodePropsMap(std::move(partitionNodePropsMap)),
+          m_ownedInitializers(std::move(ownedInitializers))
+        {
+            for (const auto& initializer : m_ownedInitializers)
+            {
+                m_isInitializerTransferable[initializer.name()] = std::make_pair(&initializer, false);
+            }
+
+            // Get the execution provider interfaces
+            auto executionHandle = kernelInfo.GetExecutionProvider()->GetExecutionHandle();
+            if (executionHandle)
+            {
+                // We assume the execution object inherits IUnknown as its first base
+                ComPtr<IUnknown> providerExecutionObject = const_cast<IUnknown*>(static_cast<const IUnknown*>(executionHandle));
+
+                // Get the WinML-specific execution provider interface from the execution object.
+                ORT_THROW_IF_FAILED(providerExecutionObject.As(&m_provider));
+                ORT_THROW_IF_FAILED(providerExecutionObject.As(&m_winmlProvider));
+            }
+
+            m_subgraphNodePointers.reserve(m_subgraphNodes.size());
+
+            for (auto& subgraphNode : m_subgraphNodes)
+            {
+                m_subgraphNodePointers.push_back(subgraphNode.get());
+            }
+        }
+
+        void TranslateAndCompileGraph(
+            const onnxruntime::OpKernelInfo& kernelInfo,
+            std::vector<Microsoft::WRL::ComPtr<ID3D12Resource>>& initializeResourceRefs,
+            std::vector<DML_BUFFER_BINDING> initInputBindings) const
+        {
+            // Allocate a persistent resource and initialize the operator
+            UINT64 persistentResourceSize = m_compiledExecutionPlanOperator->GetBindingProperties().PersistentResourceSize;
+            if (persistentResourceSize > 0)
+            {
+                ORT_THROW_IF_FAILED(m_provider->AllocatePooledResource(
+                    static_cast<size_t>(persistentResourceSize),
+                    AllocatorRoundingMode::Disabled,
+                    m_persistentResource.ReleaseAndGetAddressOf(),
+                    m_persistentResourceAllocatorUnk.ReleaseAndGetAddressOf()));
+
+                m_persistentResourceBinding = DML_BUFFER_BINDING { m_persistentResource.Get(), 0, persistentResourceSize };
+            }
+
+            ORT_THROW_IF_FAILED(m_provider->InitializeOperator(
+                m_compiledExecutionPlanOperator.Get(),
+                m_persistentResourceBinding ? &*m_persistentResourceBinding : nullptr,
+                gsl::make_span(initInputBindings)));
+
+            std::for_each(
+                initializeResourceRefs.begin(),
+                initializeResourceRefs.end(),
+                [&](ComPtr<ID3D12Resource>& resource){ m_winmlProvider->QueueReference(WRAP_GRAPHICS_UNKNOWN(resource).Get()); }
+            );
+        }
+
+        onnxruntime::Status Compute(onnxruntime::OpKernelContext* kernelContext) const override
+        {
+            ORT_THROW_HR_IF(E_UNEXPECTED, static_cast<ptrdiff_t>(m_subgraphInputs.size()) != kernelContext->InputCount());
+
+            bool recompileNeeded = m_compiledExecutionPlanOperator == nullptr;
+
+            for (int inputIndex = 0; inputIndex < kernelContext->InputCount(); ++inputIndex)
+            {
+                const auto& input = kernelContext->RequiredInput<onnxruntime::Tensor>(inputIndex);
+                const std::string& inputName = m_subgraphInputs[inputIndex]->Name();
+                auto shapeIter = m_inferredInputShapes.find(inputName);
+
+                if (shapeIter == m_inferredInputShapes.end())
+                {
+                    m_inferredInputShapes[inputName] = input.Shape();
+                    recompileNeeded = true;
+                }
+                else if (shapeIter->second != input.Shape())
+                {
+                    shapeIter->second = input.Shape();
+                    recompileNeeded = true;
+                }
+
+                // If we have CPU inputs that are not initializers (i.e. they were computed at runtime), add them to the initializer list
+                if (input.Location().device.Type() == OrtDevice::CPU)
+                {
+                    auto inputProto = onnxruntime::utils::TensorToTensorProto(input, inputName);
+
+                    // We can only avoid recompiling the graph when all CPU inputs are identical
+                    auto initializerIter = m_isInitializerTransferable.find(inputName);
+
+                    if (initializerIter != m_isInitializerTransferable.end())
+                    {
+                        if (initializerIter->second.first->raw_data().length() == inputProto.raw_data().length())
+                        {
+                            for (int i = 0; i < inputProto.raw_data().length(); ++i)
+                            {
+                                if (initializerIter->second.first->raw_data()[i] != inputProto.raw_data()[i])
+                                {
+                                    recompileNeeded = true;
+                                    break;
+                                }
+                            }
+                        }
+                        else
+                        {
+                            recompileNeeded = true;
+                        }
+                    }
+                    else
+                    {
+                        recompileNeeded = true;
+                    }
+
+                    m_ownedCpuInputs.push_back(std::make_unique<ONNX_NAMESPACE::TensorProto>(std::move(inputProto)));
+                    m_isInitializerTransferable[inputName] = std::make_pair(m_ownedCpuInputs.back().get(), false);
+                }
+            }
+
+            if (recompileNeeded)
+            {
+                // Go through all the node args and replace their shapes with the real ones
+                for (auto& nodeArg : m_intermediateNodeArgs)
+                {
+                    auto iter = m_inferredInputShapes.find(nodeArg->Name());
+                    if (iter != m_inferredInputShapes.end())
+                    {
+                        auto tensorShape = *nodeArg->Shape();
+                        ORT_THROW_HR_IF(E_UNEXPECTED, tensorShape.dim_size() != static_cast<ptrdiff_t>(iter->second.NumDimensions()));
+
+                        for (int i = 0; i < tensorShape.dim_size(); ++i)
+                        {
+                            tensorShape.mutable_dim(i)->set_dim_value(iter->second.GetDims()[i]);
+                        }
+
+                        nodeArg->SetShape(tensorShape);
+                    }
+                }
+
+                // Populate input bindings for operator initialization
+                const uint32_t fusedNodeInputCount = gsl::narrow_cast<uint32_t>(m_indexedSubGraph->GetMetaDef()->inputs.size());
+                std::vector<Microsoft::WRL::ComPtr<ID3D12Resource>> initializeResourceRefs; // For lifetime control
+                std::vector<DML_BUFFER_BINDING> initInputBindings(fusedNodeInputCount);
+                std::vector<uint8_t> isInputsUploadedByDmlEP(fusedNodeInputCount);
+                auto providerImpl = static_cast<const ExecutionProvider*>(Info().GetExecutionProvider())->GetImpl();
+
+                // Convert partitionONNXGraph into DML EP GraphDesc
+                ComPtr<IDMLDevice> device;
+                ORT_THROW_IF_FAILED(providerImpl->GetDmlDevice(device.GetAddressOf()));
+                GraphDescBuilder::GraphDesc graphDesc = GraphDescBuilder::BuildGraphDesc(
+                    isInputsUploadedByDmlEP.data(),
+                    isInputsUploadedByDmlEP.size(),
+                    m_isInitializerTransferable,
+                    m_partitionNodePropsMap,
+                    device.Get(),
+                    providerImpl,
+                    m_modelPath,
+                    m_subgraphNodePointers,
+                    m_subgraphInputs,
+                    m_subgraphOutputs);
+
+                m_outputShapes = graphDesc.outputShapes;
+
+                // Walk through each graph edge and mark used inputs
+                m_inputsUsed.resize(fusedNodeInputCount, false);
+                for (const DML_INPUT_GRAPH_EDGE_DESC& edge : graphDesc.inputEdges)
+                {
+                    m_inputsUsed[edge.GraphInputIndex] = true;
+                }
+
+                // Compile the operator
+                m_compiledExecutionPlanOperator = DmlGraphFusionHelper::TryCreateCompiledOperator(
+                    graphDesc,
+                    *m_indexedSubGraph,
+                    providerImpl);
+
+                // Queue references to objects which must be kept alive until resulting GPU work completes
+                m_winmlProvider->QueueReference(m_compiledExecutionPlanOperator.Get());
+
+                TranslateAndCompileGraph(
+                    Info(),
+                    initializeResourceRefs,
+                    initInputBindings);
+            }
+
+            // Wrap tensors as required by Dml::IExecutionProvider::ExecuteOperator
+            OpKernelContextWrapper contextWrapper(
+                kernelContext,
+                Info().GetExecutionProvider(),
+                true,
+                nullptr);
+
+            ORT_THROW_IF_FAILED(m_provider->AddUAVBarrier());
+
+            // Get input resources for execution, excluding those which were specified as owned by DML and provided
+            // at initialization instead.
+            std::vector<ComPtr<IMLOperatorTensor>> inputTensors(kernelContext->InputCount());
+            std::vector<ID3D12Resource*> inputPtrs(kernelContext->InputCount());
+
+            for (int i = 0; i < kernelContext->InputCount(); ++i)
+            {
+                if (!m_inputsUsed[i])
+                {
+                    continue;
+                }
+
+                ORT_THROW_IF_FAILED(contextWrapper.GetInputTensor(i, inputTensors[i].GetAddressOf()));
+                inputPtrs[i] = m_provider->DecodeResource(MLOperatorTensor(inputTensors[i].Get()).GetDataInterface().Get());
+            }
+
+            auto outputTensors = contextWrapper.GetOutputTensors(m_outputShapes);
+            ExecuteOperator(
+                m_compiledExecutionPlanOperator.Get(),
+                m_persistentResourceBinding ? &*m_persistentResourceBinding : nullptr,
+                inputPtrs,
+                outputTensors);
+
+            ORT_THROW_IF_FAILED(m_provider->AddUAVBarrier());
+
+            return onnxruntime::Status::OK();
+        }
+
+        void ExecuteOperator(
+            IDMLCompiledOperator* op,
+            _In_opt_ const DML_BUFFER_BINDING* persistentResourceBinding,
+            gsl::span<ID3D12Resource*> inputTensors,
+            gsl::span<IMLOperatorTensor*> outputTensors) const
+        {
+            auto FillBindingsFromTensors = [this](auto& bufferBindings, auto& bindingDescs,  gsl::span<IMLOperatorTensor*>& tensors)
+            {
+                for (IMLOperatorTensor* tensor : tensors)
+                {
+                    if (tensor)
+                    {
+                        assert(tensor->IsDataInterface());
+                        ID3D12Resource* resource = m_provider->DecodeResource(MLOperatorTensor(tensor).GetDataInterface().Get());
+                        D3D12_RESOURCE_DESC resourceDesc = resource->GetDesc();
+                        bufferBindings.push_back({ resource, 0, resourceDesc.Width });
+                        bindingDescs.push_back({ DML_BINDING_TYPE_BUFFER, &bufferBindings.back() });
+                    }
+                    else
+                    {
+                        bufferBindings.push_back({ nullptr, 0, 0 });
+                        bindingDescs.push_back({ DML_BINDING_TYPE_NONE, nullptr });
+                    }
+                }
+            };
+
+            auto FillBindingsFromBuffers = [](auto& bufferBindings, auto& bindingDescs,  gsl::span<ID3D12Resource*>& resources)
+            {
+                for (ID3D12Resource* resource : resources)
+                {
+                    if (resource)
+                    {
+                        D3D12_RESOURCE_DESC resourceDesc = resource->GetDesc();
+                        bufferBindings.push_back({ resource, 0, resourceDesc.Width });
+                        bindingDescs.push_back({ DML_BINDING_TYPE_BUFFER, &bufferBindings.back() });
+                    }
+                    else
+                    {
+                        bufferBindings.push_back({ nullptr, 0, 0 });
+                        bindingDescs.push_back({ DML_BINDING_TYPE_NONE, nullptr });
+                    }
+                }
+            };
+
+            std::vector<DML_BUFFER_BINDING> inputBufferBindings;
+            inputBufferBindings.reserve(inputTensors.size());
+            std::vector<DML_BINDING_DESC> inputBindings;
+            inputBindings.reserve(inputTensors.size());
+            FillBindingsFromBuffers(inputBufferBindings, inputBindings, inputTensors);
+
+            std::vector<DML_BUFFER_BINDING> outputBufferBindings;
+            outputBufferBindings.reserve(outputTensors.size());
+            std::vector<DML_BINDING_DESC> outputBindings;
+            outputBindings.reserve(outputTensors.size());
+            FillBindingsFromTensors(outputBufferBindings, outputBindings, outputTensors);
+
+            ORT_THROW_IF_FAILED(m_provider->ExecuteOperator(
+                op,
+                persistentResourceBinding,
+                inputBindings,
+                outputBindings));
+        }
+
+    private:
+        ComPtr<IWinmlExecutionProvider> m_winmlProvider;
+        ComPtr<Dml::IExecutionProvider> m_provider;
+
+        mutable std::optional<DML_BUFFER_BINDING> m_persistentResourceBinding;
+        std::shared_ptr<const onnxruntime::IndexedSubGraph> m_indexedSubGraph;
+        const onnxruntime::Path& m_modelPath;
+
+        std::vector<std::shared_ptr<onnxruntime::Node>> m_subgraphNodes;
+        std::vector<const onnxruntime::NodeArg*> m_subgraphInputs;
+        std::vector<const onnxruntime::NodeArg*> m_subgraphOutputs;
+        mutable std::vector<std::shared_ptr<onnxruntime::NodeArg>> m_intermediateNodeArgs;
+        std::unordered_map<std::string, GraphNodeProperties> m_partitionNodePropsMap;
+        std::vector<ONNX_NAMESPACE::TensorProto> m_ownedInitializers;
+        mutable std::unordered_map<std::string, std::pair<const ONNX_NAMESPACE::TensorProto*, bool>> m_isInitializerTransferable;
+        std::vector<const onnxruntime::Node*> m_subgraphNodePointers;
+
+        // Bindings from previous executions of a re-used command list
+        mutable std::vector<std::unique_ptr<ONNX_NAMESPACE::TensorProto>> m_ownedCpuInputs;
+        mutable ComPtr<IDMLCompiledOperator> m_compiledExecutionPlanOperator;
+        mutable std::vector<bool> m_inputsUsed;
+        mutable ComPtr<ID3D12Resource> m_persistentResource;
+        mutable ComPtr<IUnknown> m_persistentResourceAllocatorUnk; // Controls when the persistent resource is returned to the allocator
+        mutable Windows::AI::MachineLearning::Adapter::EdgeShapes m_outputShapes;
+        mutable std::unordered_map<std::string, onnxruntime::TensorShape> m_inferredInputShapes;
+    };
+
+    onnxruntime::OpKernel* CreateRuntimeFusedGraphKernel(
+        const onnxruntime::OpKernelInfo& info,
+        std::shared_ptr<const onnxruntime::IndexedSubGraph> indexedSubGraph,
+        const onnxruntime::Path& modelPath,
+        std::vector<std::shared_ptr<onnxruntime::Node>>&& subgraphNodes,
+        std::vector<const onnxruntime::NodeArg*>&& subgraphInputs,
+        std::vector<const onnxruntime::NodeArg*>&& subgraphOutputs,
+        std::vector<std::shared_ptr<onnxruntime::NodeArg>>&& intermediateNodeArgs,
+        std::unordered_map<std::string, GraphNodeProperties>&& partitionNodePropsMap,
+        std::vector<ONNX_NAMESPACE::TensorProto>&& ownedInitializers)
+    {
+        return new DmlRuntimeFusedGraphKernel(
+            info,
+            std::move(indexedSubGraph),
+            modelPath,
+            std::move(subgraphNodes),
+            std::move(subgraphInputs),
+            std::move(subgraphOutputs),
+            std::move(intermediateNodeArgs),
+            std::move(partitionNodePropsMap),
+            std::move(ownedInitializers)
+        );
+    }
+} // namespace Dml
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeFusedGraphKernel.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeFusedGraphKernel.h
new file mode 100644
index 0000000000000..d679c5aa5667c
--- /dev/null
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeFusedGraphKernel.h
@@ -0,0 +1,21 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/framework/op_kernel.h"
+#include "GraphDescBuilder.h"
+#include "DmlRuntimeGraphFusionTransformer.h"
+
+namespace Dml
+{
+    onnxruntime::OpKernel* CreateRuntimeFusedGraphKernel(
+        const onnxruntime::OpKernelInfo& info,
+        std::shared_ptr<const onnxruntime::IndexedSubGraph> indexedSubGraph,
+        const onnxruntime::Path& modelPath,
+        std::vector<std::shared_ptr<onnxruntime::Node>>&& subgraphNodes,
+        std::vector<const onnxruntime::NodeArg*>&& subgraphInputs,
+        std::vector<const onnxruntime::NodeArg*>&& subgraphOutputs,
+        std::vector<std::shared_ptr<onnxruntime::NodeArg>>&& intermediateNodeArgs,
+        std::unordered_map<std::string, GraphNodeProperties>&& partitionNodePropsMap,
+        std::vector<ONNX_NAMESPACE::TensorProto>&& ownedInitializers
+    );
+} // namespace Dml
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeGraphFusionTransformer.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeGraphFusionTransformer.cpp
new file mode 100644
index 0000000000000..6318b0d5e2865
--- /dev/null
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeGraphFusionTransformer.cpp
@@ -0,0 +1,161 @@
+#pragma once
+
+#include "precomp.h"
+#include "GraphDescBuilder.h"
+#include "ExecutionProvider.h"
+#include "DmlRuntimeGraphFusionTransformer.h"
+#include "GraphPartitioner.h"
+#include "core/framework/kernel_type_str_resolver.h"
+#include "core/framework/kernel_lookup.h"
+#include "core/optimizer/constant_sharing.h"
+#include "DmlRuntimeFusedGraphKernel.h"
+#include "MLOperatorAuthorImpl.h"
+#include "DmlGraphFusionHelper.h"
+
+namespace Dml
+{
+    namespace
+    {
+        struct CompiledPartitionInfo
+        {
+            std::shared_ptr<onnxruntime::IndexedSubGraph> indexedSubGraph;
+            std::unordered_map<std::string, std::pair<const ONNX_NAMESPACE::TensorProto*, bool>> isInitializerTransferable;
+        };
+    }
+
+    DmlRuntimeGraphFusionTransformer::DmlRuntimeGraphFusionTransformer(
+        const std::string& name,
+        const onnxruntime::IExecutionProvider* provider
+    )
+        :onnxruntime::GraphTransformer(name),
+         m_providerImpl(static_cast<const ExecutionProvider*>(provider)->GetImpl())
+    {
+    }
+
+    onnxruntime::common::Status DmlRuntimeGraphFusionTransformer::ApplyImpl(
+        onnxruntime::Graph& graph,
+        bool& modified,
+        int graphLevel,
+        const onnxruntime::logging::Logger& logger) const
+    {
+        return ApplyImplHelper(graph, modified, graphLevel, logger, {});
+    }
+
+    onnxruntime::common::Status DmlRuntimeGraphFusionTransformer::ApplyImplHelper(
+        onnxruntime::Graph& graph,
+        bool& modified,
+        int graphLevel,
+        const onnxruntime::logging::Logger& logger,
+        const std::unordered_map<std::string, const onnxruntime::NodeArg*>& implicitInputDefs) const
+    {
+        onnxruntime::ProviderType providerType = onnxruntime::kDmlExecutionProvider;
+        const gsl::not_null<const onnxruntime::KernelRegistry*> registry = m_providerImpl->GetKernelRegistry().get();
+        const auto kernelTypeStrResolver = onnxruntime::OpSchemaKernelTypeStrResolver{};
+        const auto kernelLookup = onnxruntime::KernelLookup(
+            providerType,
+            gsl::make_span(&registry, 1),
+            kernelTypeStrResolver);
+
+        onnxruntime::GraphViewer graphViewer(graph);
+        const auto& nodeTopologyList = graphViewer.GetNodesInTopologicalOrder();
+
+        for (auto nodeIndex : nodeTopologyList)
+        {
+            auto* node = graph.GetNode(nodeIndex);
+            if (!node)
+            {
+                continue;  // node was removed
+            }
+
+            std::unordered_map<std::string, const onnxruntime::NodeArg*> subgraphImplicitInputDefs;
+            for (const onnxruntime::NodeArg* inputDef : node->ImplicitInputDefs())
+            {
+                subgraphImplicitInputDefs[inputDef->Name()] = inputDef;
+            }
+
+            for (auto& entry : node->GetAttributeNameToMutableSubgraphMap())
+            {
+                auto& subgraph = *entry.second;
+                ORT_RETURN_IF_ERROR(ApplyImplHelper(subgraph, modified, graphLevel + 1, logger, subgraphImplicitInputDefs));
+            }
+        }
+
+        // Initializers needed by any graph partition
+        std::vector<onnxruntime::NodeIndex> additionalSplittingNodes;
+        std::unordered_map<const onnxruntime::Node*, GraphNodeProperties> graphNodePropertyMap;
+        std::unordered_set<std::string> requiredInitializerMap;
+        std::unordered_set<std::string> dynamicCpuInputMap;
+        std::vector<std::unique_ptr<GraphPartition>> partitions = BuildPartitions(
+            graphViewer,
+            *m_providerImpl->GetInternalRegistrationInfoMap(),
+            kernelLookup,
+            m_providerImpl->GetSupportedDeviceDataTypeMask(),
+            graphNodePropertyMap,
+            requiredInitializerMap,
+            dynamicCpuInputMap,
+            additionalSplittingNodes,
+            implicitInputDefs,
+            true);
+
+        // Reset the splitting nodes for the current iteration
+        additionalSplittingNodes.clear();
+
+        // Reset the compiled operators for the current iteration
+        std::vector<std::shared_ptr<CompiledPartitionInfo>> compiledPartitionInfos(partitions.size());
+
+        // Create a map between each initialized tensor and the partition(s) it is part of.
+        auto initializerPartitionMap = DmlGraphFusionHelper::GetInitializerToPartitionMap(graphViewer, partitions);
+
+        for (uint32_t partitionIndex = 0; partitionIndex < partitions.size(); ++partitionIndex)
+        {
+            auto& partition = partitions[partitionIndex];
+
+            if (partition->GetRootMergedPartition() != partition.get() ||
+                !partition->IsDmlPartition())
+            {
+                continue;
+            }
+
+            if (partition->IsDmlGraphPartition())
+            {
+                std::unordered_map<std::string, std::pair<const ONNX_NAMESPACE::TensorProto*, bool>> isInitializerTransferable;
+
+                std::string partitionKernelPrefix = std::to_string(m_providerImpl->GetPartitionKernelPrefixVal()) + "_";
+                m_providerImpl->IncreasePartitionKernelPrefixVal();
+
+                // populate isInitializerTransferable
+                for (const auto& input : partition->GetInputs())
+                {
+                    const onnx::TensorProto* tensor = nullptr;
+                    if (graph.GetInitializedTensor(input, tensor) && requiredInitializerMap.find(input) != requiredInitializerMap.end())
+                    {
+                        isInitializerTransferable[input] = {tensor, false};
+                    }
+                }
+
+                compiledPartitionInfos[partitionIndex] = std::make_shared<CompiledPartitionInfo>();
+                compiledPartitionInfos[partitionIndex]->indexedSubGraph = std::make_shared<onnxruntime::IndexedSubGraph>(
+                    DmlGraphFusionHelper::CreateIndexedSubGraph(partition.get(), partitionIndex, partitionKernelPrefix));
+                compiledPartitionInfos[partitionIndex]->isInitializerTransferable = std::move(isInitializerTransferable);
+            }
+        }
+
+        for (auto&& compiledPartitionInfo : compiledPartitionInfos)
+        {
+            // Null compiled operators were not DML partitions
+            if (compiledPartitionInfo)
+            {
+                DmlGraphFusionHelper::RegisterDynamicKernel(
+                    graph,
+                    m_providerImpl->GetKernelRegistry().get(),
+                    m_providerImpl,
+                    graphNodePropertyMap,
+                    dynamicCpuInputMap,
+                    std::move(compiledPartitionInfo->indexedSubGraph),
+                    std::move(compiledPartitionInfo->isInitializerTransferable));
+            }
+        }
+
+        return onnxruntime::common::Status::OK();
+    }
+}
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeGraphFusionTransformer.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeGraphFusionTransformer.h
new file mode 100644
index 0000000000000..cfa743e1f2b85
--- /dev/null
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeGraphFusionTransformer.h
@@ -0,0 +1,42 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+#pragma once
+
+#include <string>
+#include <unordered_map>
+#include "core/optimizer/graph_transformer.h"
+#include "core/framework/execution_providers.h"
+
+namespace Dml
+{
+class ExecutionProviderImpl;
+
+class DmlRuntimeGraphFusionTransformer : public onnxruntime::GraphTransformer
+{
+public:
+    DmlRuntimeGraphFusionTransformer(
+        const std::string& name,
+        const onnxruntime::IExecutionProvider* provider
+    );
+
+public:
+    static inline const char* const DML_GRAPH_FUSION_NODE_NAME_PREFIX = "DmlRuntimeFusedNode_";
+    static inline const char* const DML_GRAPH_FUSION_NODE_DOMAIN = "DmlRuntimeFusedNodeDomain";
+
+private:
+    onnxruntime::common::Status ApplyImpl(onnxruntime::Graph& graph,
+                                          bool& modified,
+                                          int graphLevel,
+                                          const onnxruntime::logging::Logger& logger) const final;
+
+    onnxruntime::common::Status ApplyImplHelper(
+        onnxruntime::Graph& graph,
+        bool& modified,
+        int graphLevel,
+        const onnxruntime::logging::Logger& logger,
+        const std::unordered_map<std::string, const onnxruntime::NodeArg*>& implicitInputDefs) const;
+
+private:
+    const ExecutionProviderImpl* m_providerImpl = nullptr;
+};
+}
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp
index f97b72aa2d385..8644b8d56a426 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp
@@ -67,7 +67,8 @@ namespace Dml
     ExecutionProvider::ExecutionProvider(
         IDMLDevice* dmlDevice,
         ID3D12CommandQueue* commandQueue,
-        bool enableMetacommands) :
+        bool enableMetacommands,
+        bool enableDynamicGraphFusion) :
             IExecutionProvider(onnxruntime::kDmlExecutionProvider, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, 0))
     {
         D3D12_COMMAND_LIST_TYPE queueType = commandQueue->GetDesc().Type;
@@ -80,7 +81,7 @@ namespace Dml
         ComPtr<ID3D12Device> device;
         GRAPHICS_THROW_IF_FAILED(commandQueue->GetDevice(IID_GRAPHICS_PPV_ARGS(device.GetAddressOf())));
 
-        m_impl = wil::MakeOrThrow<ExecutionProviderImpl>(dmlDevice, device.Get(), commandQueue, enableMetacommands);
+        m_impl = wil::MakeOrThrow<ExecutionProviderImpl>(dmlDevice, device.Get(), commandQueue, enableMetacommands, enableDynamicGraphFusion);
     }
 
     std::vector<std::unique_ptr<onnxruntime::ComputeCapability>>
@@ -147,12 +148,12 @@ namespace Dml
 // Task 24384515: Update ORT AIInfra release agent pool to install 19H1 SDK on VM bootstrap
 #define D3D_FEATURE_LEVEL_1_0_CORE_PRIVATE ((D3D_FEATURE_LEVEL)0x1000)
 
-    ExecutionProviderImpl::ExecutionProviderImpl(IDMLDevice* dmlDevice, ID3D12Device* d3d12Device, ID3D12CommandQueue* queue, bool enableMetacommands)
+    ExecutionProviderImpl::ExecutionProviderImpl(IDMLDevice* dmlDevice, ID3D12Device* d3d12Device, ID3D12CommandQueue* queue, bool enableMetacommands, bool enableDynamicGraphFusion)
         : m_d3d12Device(d3d12Device),
           m_dmlDevice(dmlDevice),
-          m_areMetacommandsEnabled(enableMetacommands)
+          m_areMetacommandsEnabled(enableMetacommands),
+          m_dynamicGraphFusionEnabled(enableDynamicGraphFusion)
     {
-
         D3D12_FEATURE_DATA_FEATURE_LEVELS featureLevels = {};
 
         D3D_FEATURE_LEVEL featureLevelsList[] = {
@@ -636,7 +637,7 @@ namespace Dml
 
     bool IsCpuOnDmlOperator(const onnxruntime::Node& node)
     {
-        auto cpuOnDmlOperators = std::array<char*, 8>{
+        auto cpuOnDmlOperators = std::array<const char*, 8>{
             "SequenceAt",
             "SequenceConstruct",
             "SequenceEmpty",
@@ -659,7 +660,7 @@ namespace Dml
 
     bool IsDmlSequenceOperator(const onnxruntime::Node& node)
     {
-        auto sequence_ops = std::array<char*, 1>{
+        auto sequence_ops = std::array<const char*, 1>{
             "ConcatFromSequence"
         };
 
@@ -675,7 +676,7 @@ namespace Dml
 
     bool IsCustomOpShader(const onnxruntime::Node& node)
     {
-        auto custom_ops = std::array<char*, 3>{
+        auto custom_ops = std::array<const char*, 3>{
             "DFT",
             "STFT",
             "GridSample"
@@ -1093,6 +1094,11 @@ namespace Dml
         return m_areMetacommandsEnabled;
     }
 
+    bool ExecutionProviderImpl::DynamicGraphFusionEnabled() const noexcept
+    {
+        return m_dynamicGraphFusionEnabled;
+    }
+
     std::shared_ptr<const Windows::AI::MachineLearning::Adapter::InternalRegistrationInfoMap>
     ExecutionProviderImpl::GetInternalRegistrationInfoMap() const
     {
@@ -1129,9 +1135,10 @@ namespace Dml
     std::unique_ptr<onnxruntime::IExecutionProvider> CreateExecutionProvider(
         IDMLDevice* dmlDevice,
         ID3D12CommandQueue* commandQueue,
-        bool enableMetacommands)
+        bool enableMetacommands,
+        bool enableDynamicGraphFusion)
     {
-        return std::make_unique<Dml::ExecutionProvider>(dmlDevice, commandQueue, enableMetacommands);
+        return std::make_unique<Dml::ExecutionProvider>(dmlDevice, commandQueue, enableMetacommands, enableDynamicGraphFusion);
     }
 
     ID3D12Resource* GetD3D12ResourceFromAllocation(onnxruntime::IAllocator* allocator, void* ptr)
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.h
index 31b893a2f25d7..3aaa11cdee479 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.h
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.h
@@ -5,6 +5,7 @@
 
 #include "GraphTransformer.h"
 #include "core/providers/dml/DmlExecutionProvider/inc/IWinmlExecutionProvider.h"
+#include "core/providers/dml/DmlExecutionProvider/src/IExecutionProvider.h"
 
 #include <wrl/client.h>
 #include <wrl/implements.h>
@@ -34,7 +35,8 @@ namespace Dml
             IDMLDevice* dmlDevice,
             ID3D12Device* d3d12Device,
             ID3D12CommandQueue* queue,
-            bool enableMetacommands = true);
+            bool enableMetacommands,
+            bool enableDynamicGraphFusion);
 
         void ReleaseCompletedReferences();
 
@@ -150,6 +152,7 @@ namespace Dml
         STDMETHOD_(bool, IsMcdmDevice)() const noexcept final;
 
         STDMETHOD_(bool, MetacommandsEnabled)() const noexcept final;
+        bool DynamicGraphFusionEnabled() const noexcept;
         std::shared_ptr<onnxruntime::IAllocator> GetGpuAllocator();
         std::shared_ptr<onnxruntime::IAllocator> GetCpuInputAllocator();
 
@@ -184,6 +187,7 @@ namespace Dml
         ComPtr<IDMLDevice> m_dmlDevice;
         bool m_isMcdmDevice = false;
         bool m_areMetacommandsEnabled = true;
+        bool m_dynamicGraphFusionEnabled = false;
         bool m_native16BitShaderOpsSupported = false;
         std::shared_ptr<ExecutionContext> m_context;
         std::unique_ptr<PooledUploadHeap> m_uploadHeap;
@@ -236,7 +240,8 @@ namespace Dml
         explicit ExecutionProvider(
             IDMLDevice* dmlDevice,
             ID3D12CommandQueue* commandQueue,
-            bool enableMetacommands = true
+            bool enableMetacommands,
+            bool enableDynamicGraphFusion
         );
 
         std::unique_ptr<onnxruntime::IDataTransfer> GetDataTransfer() const final override
@@ -299,9 +304,9 @@ namespace Dml
             return m_impl.Get();
         }
 
-        void MetacommandsEnabled()
+        bool DynamicGraphFusionEnabled() const
         {
-            m_impl->MetacommandsEnabled();
+            return m_impl->DynamicGraphFusionEnabled();
         }
 
         virtual std::vector<onnxruntime::AllocatorPtr> CreatePreferredAllocators() override
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.cpp
index 636f46428ce99..3fc8f415e5a58 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.cpp
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.cpp
@@ -147,14 +147,14 @@ namespace Dml::GraphDescBuilder
         const uint8_t* isConstGpuGraphInput,
         const size_t isConstGpuGraphInputCount,
         const std::unordered_map<std::string, std::pair<const ONNX_NAMESPACE::TensorProto*, bool>>& isInitializerTransferable,
-        const onnxruntime::Graph& graph,
-        const onnxruntime::IndexedSubGraph& indexedSubGraph,
         const std::unordered_map<std::string, GraphNodeProperties>& graphNodePropertyMap,
         IDMLDevice* device,
-        const void* executionHandle)
+        const void* executionHandle,
+        const onnxruntime::Path& modelPath,
+        gsl::span<const onnxruntime::Node* const> subgraphNodes,
+        gsl::span<const onnxruntime::NodeArg* const> subgraphInputs,
+        gsl::span<const onnxruntime::NodeArg* const> subgraphOutputs)
     {
-        const gsl::span<const std::string> subGraphInputArgNames = indexedSubGraph.GetMetaDef()->inputs;
-        const gsl::span<const std::string> subGraphOutputArgNames = indexedSubGraph.GetMetaDef()->outputs;
         struct NodeAndIndex
         {
             uint32_t nodeIndex; // The index of the node itself
@@ -164,12 +164,14 @@ namespace Dml::GraphDescBuilder
         // Map from Lotus node argument names to the new node and index where it will be produced
         std::unordered_map<std::string, NodeAndIndex> nameToNodeAndIndexMap;
 
+        std::unordered_map<std::string, EdgeShapes> nodeOutputShapes;
+
         // Map from Lotus node argument names to input indices of the fused kernel node.
         std::unordered_map<std::string, uint32_t> nameToDmlFusedNodeInputIndex;
 
-        for (size_t inputIndex = 0; inputIndex < subGraphInputArgNames.size(); ++inputIndex)
+        for (size_t inputIndex = 0; inputIndex < subgraphInputs.size(); ++inputIndex)
         {
-            const onnxruntime::NodeArg* graphInput = graph.GetNodeArg(subGraphInputArgNames[inputIndex]);
+            const onnxruntime::NodeArg* graphInput = subgraphInputs[inputIndex];
 
             if (!graphInput)
             {
@@ -196,13 +198,11 @@ namespace Dml::GraphDescBuilder
         const uint32_t minNodeCountToReuseCommandList = 5;
         bool reuseCommandList = false;
 
-        if (indexedSubGraph.nodes.size() >= minNodeCountToReuseCommandList)
+        if (subgraphNodes.size() >= minNodeCountToReuseCommandList)
         {
             reuseCommandList = true;
         }
 
-        auto modelPath = graph.ModelPath();
-
         auto constantCpuGraphInputGetter = [&isInitializerTransferable, &modelPath](const std::string& argName)
         {
             ComPtr<OnnxTensorWrapper> tensorWrapper;
@@ -219,9 +219,11 @@ namespace Dml::GraphDescBuilder
 
         // Iterate through each node and create a corresponding node in the new graph
         // We can iterate the nodes in any order because the edge connectivity will take care of the topological order
-        for (size_t sortedNodeIndex : indexedSubGraph.nodes)
+        std::unordered_map<std::string, std::vector<uint32_t>> inferredOutputShapes;
+
+        for (const onnxruntime::Node* subgraphNode : subgraphNodes)
         {
-            const onnxruntime::Node& node = *graph.GetNode(sortedNodeIndex);
+            const onnxruntime::Node& node = *subgraphNode;
 
             const GraphNodeProperties& graphNodeProps = graphNodePropertyMap.find(GetUniqueNodeName(node))->second;
             const auto& requiredConstantCpuInputs = graphNodeProps.internalRegInfo->requiredConstantCpuInputs;
@@ -244,14 +246,45 @@ namespace Dml::GraphDescBuilder
                 return tensor;
             };
 
+            EdgeShapes inputShapesOverrides(node.InputDefs().size());
+
+            // Override the input shapes with shapes that were previously inferred
+            for (int inputIndex = 0; inputIndex < node.InputDefs().size(); ++inputIndex)
+            {
+                auto inputDef = node.InputDefs()[inputIndex];
+
+                auto outputShapesIter = inferredOutputShapes.find(inputDef->Name());
+                if (outputShapesIter != inferredOutputShapes.end())
+                {
+                    inputShapesOverrides.GetMutableShape(inputIndex) = outputShapesIter->second;
+                }
+                else if (inputDef->HasTensorOrScalarShape())
+                {
+                    for (int i = 0; i < inputDef->Shape()->dim_size(); ++i)
+                    {
+                        ORT_THROW_HR_IF(E_INVALIDARG, !inputDef->Shape()->dim(i).has_dim_value());
+                        inputShapesOverrides.GetMutableShape(inputIndex).push_back(gsl::narrow_cast<uint32_t>(inputDef->Shape()->dim(i).dim_value()));
+                    }
+                }
+            }
+
+            EdgeShapes outputShapes;
             DmlGraphNodeCreateInfo graphNodeCreateInfo;
             graphNodeProps.internalRegInfo->graphNodeFactoryRegistration->factory(
                 node,
                 constantCpuNodeInputGetter,
                 executionHandle,
+                &inputShapesOverrides,
+                /*out*/ &outputShapes,
                 /*out*/ &graphNodeCreateInfo
             );
 
+            ORT_THROW_HR_IF(E_UNEXPECTED, outputShapes.EdgeCount() != node.OutputDefs().size());
+            for (int i = 0; i < node.OutputDefs().size(); ++i)
+            {
+                inferredOutputShapes[node.OutputDefs()[i]->Name()] = outputShapes.GetShape(i);
+            }
+
             // Create a map between operatorGraphNodeIndex to mainGraphNodeIndex.
             std::unordered_map<uint32_t, uint32_t> operatorGraphNodeIndexToMainGraphNodeIndexMap;
             uint32_t graphNodeCount = gsl::narrow_cast<uint32_t>(graphNodes.size());
@@ -347,6 +380,8 @@ namespace Dml::GraphDescBuilder
                         operatorGraphNodeIndexToMainGraphNodeIndexMap[operatorGraphOutputEdge.FromNodeIndex],
                         operatorGraphOutputEdge.FromNodeOutputIndex
                     };
+
+                    nodeOutputShapes[arg->Name()] = outputShapes;
                 }
             }
 
@@ -367,10 +402,12 @@ namespace Dml::GraphDescBuilder
             }
         }
 
+        EdgeShapes graphOutputShapes(subgraphOutputs.size());
+
         // Add graph output nodes, which might be in a different order from the encapsulating node
-        for (size_t outputIndex = 0; outputIndex < subGraphOutputArgNames.size(); ++outputIndex)
+        for (size_t outputIndex = 0; outputIndex < subgraphOutputs.size(); ++outputIndex)
         {
-            const onnxruntime::NodeArg* graphOutput = graph.GetNodeArg(subGraphOutputArgNames[outputIndex]);
+            const onnxruntime::NodeArg* graphOutput = subgraphOutputs[outputIndex];
 
             ORT_THROW_HR_IF_NULL_MSG(E_POINTER, graphOutput, "FusedNode's nodeArgList does not contain one of the nodeArg");
             const auto& outputNodeAndIndex = nameToNodeAndIndexMap.at(graphOutput->Name());
@@ -380,6 +417,7 @@ namespace Dml::GraphDescBuilder
             edge.FromNodeOutputIndex = outputNodeAndIndex.targetIndex;
             edge.GraphOutputIndex = gsl::narrow_cast<uint32_t>(outputIndex);
             graphOutputEdges.push_back(edge);
+            graphOutputShapes.GetMutableShape(outputIndex) = nodeOutputShapes[graphOutput->Name()].GetShape(outputNodeAndIndex.targetIndex);
         }
 
         RemoveUnconnectedNodes(graphNodes, graphInputEdges, graphIntermediateEdges, graphOutputEdges);
@@ -390,6 +428,7 @@ namespace Dml::GraphDescBuilder
         graphDesc.outputEdges = std::move(graphOutputEdges);
         graphDesc.intermediateEdges = std::move(graphIntermediateEdges);
         graphDesc.reuseCommandList = reuseCommandList;
+        graphDesc.outputShapes = std::move(graphOutputShapes);
         return graphDesc;
     }
 }
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.h
index 5c04962e55557..0039678c00e59 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.h
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.h
@@ -9,10 +9,10 @@ namespace Dml
 {
     struct GraphNodeProperties
     {
-        std::shared_ptr<const Windows::AI::MachineLearning::Adapter::InternalRegistrationInfo> 
+        std::shared_ptr<const Windows::AI::MachineLearning::Adapter::InternalRegistrationInfo>
             internalRegInfo;
 
-        // These are currently passed from the partitioning step since the only DML operators current 
+        // These are currently passed from the partitioning step since the only DML operators current
         // supporting graph nodes don't customize the order of edges or shapes, other than coercing
         // dimension count.  This will change as the supported set of operators as graph nodes increases.
         Windows::AI::MachineLearning::Adapter::EdgeShapes inputShapes;
@@ -38,16 +38,19 @@ namespace Dml
             std::vector<DML_OUTPUT_GRAPH_EDGE_DESC> outputEdges;
             std::vector<DML_INTERMEDIATE_GRAPH_EDGE_DESC> intermediateEdges;
             bool reuseCommandList;
+            Windows::AI::MachineLearning::Adapter::EdgeShapes outputShapes;
         };
 
         GraphDesc BuildGraphDesc(
             const uint8_t* isConstGpuGraphInput,
             const size_t isConstGpuGraphInputCount,
             const std::unordered_map<std::string, std::pair<const ONNX_NAMESPACE::TensorProto*, bool>>& isInitializerTransferable,
-            const onnxruntime::Graph& graph,
-            const onnxruntime::IndexedSubGraph& indexedSubGraph,
             const std::unordered_map<std::string, GraphNodeProperties>& graphNodePropertyMap,
             IDMLDevice* device,
-            const void* executionHandle);
+            const void* executionHandle,
+            const onnxruntime::Path& modelPath,
+            gsl::span<const onnxruntime::Node* const> subgraphNodes,
+            gsl::span<const onnxruntime::NodeArg* const> subgraphInputs,
+            gsl::span<const onnxruntime::NodeArg* const> subgraphOutputs);
     }
-}
\ No newline at end of file
+}
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphPartitioner.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphPartitioner.cpp
index 18943878ccedc..f7a4743801d81 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphPartitioner.cpp
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphPartitioner.cpp
@@ -151,6 +151,8 @@ namespace Dml
         _In_opt_ const std::unordered_map<std::string, GraphPartition*>* nodeNameToPartitionMap,
         _Inout_ std::unordered_map<const onnxruntime::Node*, GraphNodeProperties>& dmlNodePropertyMap,
         _Inout_ std::unordered_set<std::string>& requiredInitializerMap,
+        _Inout_ std::unordered_set<std::string>& dynamicCpuInputMap,
+        bool allowDmlGraphDynamicShapes,
         _Out_ bool* isDmlGraphNode
         )
     {
@@ -172,36 +174,68 @@ namespace Dml
 
             if (internalRegInfo && internalRegInfo->graphNodeFactoryRegistration)
             {
-                bool requiredCpuInputsConstant = true;
-                for (uint32_t inputIndex : internalRegInfo->requiredConstantCpuInputs)
+                if (allowDmlGraphDynamicShapes)
                 {
-                    if (inputIndex >= node.InputDefs().size() || !node.InputDefs()[inputIndex]->Exists())
+                    for (uint32_t inputIndex : internalRegInfo->requiredConstantCpuInputs)
                     {
-                        continue;
-                    }
+                        if (inputIndex >= node.InputDefs().size() || !node.InputDefs()[inputIndex]->Exists())
+                        {
+                            continue;
+                        }
 
-                    const onnx::TensorProto* tensor = nullptr;
-                    const std::string& inputName = node.InputDefs()[inputIndex]->Name();
+                        const onnx::TensorProto* tensor = nullptr;
+                        const std::string& inputName = node.InputDefs()[inputIndex]->Name();
 
-                    if (!graph.GetInitializedTensor(inputName, tensor))
-                    {
-                        requiredCpuInputsConstant = false;
-                        break;
+                        if (graph.GetInitializedTensor(inputName, tensor))
+                        {
+                            requiredInitializerMap.insert(inputName);
+                        }
+                        else
+                        {
+                            dynamicCpuInputMap.insert(inputName);
+                        }
                     }
 
-                    requiredInitializerMap.insert(inputName);
+                    std::optional<uint32_t> requiredInputCount = internalRegInfo->graphNodeFactoryRegistration->requiredInputCount;
+                    if (requiredInputCount == std::nullopt || *requiredInputCount == node.InputDefs().size())
+                    {
+                        *isDmlGraphNode = true;
+                        graphNodeProperty.first->second.internalRegInfo = internalRegInfo;
+                    }
                 }
-
-                std::optional<uint32_t> requiredInputCount = internalRegInfo->graphNodeFactoryRegistration->requiredInputCount;
-                if (requiredCpuInputsConstant &&
-                    TryGetStaticInputShapes( node, graphNodeProperty.first->second.inputShapes) &&
-                    !ContainsEmptyDimensions(graphNodeProperty.first->second.inputShapes, internalRegInfo->requiredConstantCpuInputs) &&
-                    TryGetStaticOutputShapes(node, graphNodeProperty.first->second.outputShapes) &&
-                    !ContainsEmptyDimensions(graphNodeProperty.first->second.outputShapes, internalRegInfo->requiredConstantCpuInputs) &&
-                    (requiredInputCount == std::nullopt || *requiredInputCount == node.InputDefs().size()))
+                else
                 {
-                    *isDmlGraphNode = true;
-                    graphNodeProperty.first->second.internalRegInfo = internalRegInfo;
+                    bool requiredCpuInputsConstant = true;
+                    for (uint32_t inputIndex : internalRegInfo->requiredConstantCpuInputs)
+                    {
+                        if (inputIndex >= node.InputDefs().size() || !node.InputDefs()[inputIndex]->Exists())
+                        {
+                            continue;
+                        }
+
+                        const onnx::TensorProto* tensor = nullptr;
+                        const std::string& inputName = node.InputDefs()[inputIndex]->Name();
+
+                        if (!graph.GetInitializedTensor(inputName, tensor))
+                        {
+                            requiredCpuInputsConstant = false;
+                            break;
+                        }
+
+                        requiredInitializerMap.insert(inputName);
+                    }
+
+                    std::optional<uint32_t> requiredInputCount = internalRegInfo->graphNodeFactoryRegistration->requiredInputCount;
+                    if (requiredCpuInputsConstant &&
+                        TryGetStaticInputShapes( node, graphNodeProperty.first->second.inputShapes) &&
+                        !ContainsEmptyDimensions(graphNodeProperty.first->second.inputShapes, internalRegInfo->requiredConstantCpuInputs) &&
+                        TryGetStaticOutputShapes(node, graphNodeProperty.first->second.outputShapes) &&
+                        !ContainsEmptyDimensions(graphNodeProperty.first->second.outputShapes, internalRegInfo->requiredConstantCpuInputs) &&
+                        (requiredInputCount == std::nullopt || *requiredInputCount == node.InputDefs().size()))
+                    {
+                        *isDmlGraphNode = true;
+                        graphNodeProperty.first->second.internalRegInfo = internalRegInfo;
+                    }
                 }
             }
         }
@@ -379,8 +413,10 @@ namespace Dml
         uint32_t supportedDeviceDataTypeMask, // Each bit corresponds to each DML_TENSOR_DATA_TYPE.
         std::unordered_map<const onnxruntime::Node*, GraphNodeProperties>& graphNodePropertyMap,
         std::unordered_set<std::string>& requiredInitializerMap,
+        std::unordered_set<std::string>& dynamicCpuInputMap,
         gsl::span<const onnxruntime::NodeIndex> additionalSplittingNodes,
-        const std::unordered_map<std::string, const onnxruntime::NodeArg*>& implicitInputs)
+        const std::unordered_map<std::string, const onnxruntime::NodeArg*>& implicitInputs,
+        bool allowDmlGraphDynamicShapes)
     {
         // Nodes are uniquely identified by the name of their first output argument
         std::vector<std::unique_ptr<GraphPartition>> partitions;
@@ -443,6 +479,8 @@ namespace Dml
                     &nodeNameToPartitionMap,
                     graphNodePropertyMap,
                     requiredInitializerMap,
+                    dynamicCpuInputMap,
+                    allowDmlGraphDynamicShapes,
                     /*out*/ &isDmlGraphNode
                 );
             }
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphPartitioner.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphPartitioner.h
index 37d577f647fb5..3bddb5ae16086 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphPartitioner.h
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphPartitioner.h
@@ -50,6 +50,8 @@ namespace Dml
         uint32_t supportedDeviceDataTypeMask, // Each bit corresponds to each DML_TENSOR_DATA_TYPE.
         std::unordered_map<const onnxruntime::Node*, GraphNodeProperties>& graphNodePropertyMap,
         std::unordered_set<std::string>& requiredInitializerMap,
+        std::unordered_set<std::string>& dynamicCpuInputMap,
         gsl::span<const onnxruntime::NodeIndex> additionalSplittingNodes,
-        const std::unordered_map<std::string, const onnxruntime::NodeArg*>& implicitInputs);
+        const std::unordered_map<std::string, const onnxruntime::NodeArg*>& implicitInputs,
+        bool allowDmlGraphDynamicShapes);
 } // namespace Dml
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/IExecutionProvider.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/IExecutionProvider.h
index d7a0a607cdec9..a8a6d6745e908 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/IExecutionProvider.h
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/IExecutionProvider.h
@@ -2,8 +2,15 @@
 // Licensed under the MIT License.
 
 #pragma once
+
+#include <d3d12.h>
+
 #include "core/providers/dml/DmlExecutionProvider/inc/DmlExecutionProvider.h"
 
+interface IDMLCompiledOperator;
+struct DML_BUFFER_BINDING;
+struct DML_BINDING_DESC;
+
 namespace Dml
 {
     struct Binding
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp
index 6cd10e14e08d2..4deec620fe5fb 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp
@@ -1356,13 +1356,14 @@ namespace Windows::AI::MachineLearning::Adapter
         const onnxruntime::OpNodeProtoHelper<onnxruntime::ProtoHelperNodeContext>* protoHelper,
         const void* executionHandle,
         bool isInternalOperator,
+        const EdgeShapes* inputShapesOverrides,
         const EdgeShapes* inferredOutputShapes,
         const AttributeMap* defaultAttributes,
         DmlGraphNodeCreateInfo* graphNodeCreateInfo,
         gsl::span<const uint32_t> requiredConstantCpuInputs,
         MLOperatorTensorGetter& constantInputGetter
         )
-    :   OpNodeInfoWrapper(protoHelper, nullptr, defaultAttributes, requiredConstantCpuInputs, constantInputGetter, nullptr),
+    :   OpNodeInfoWrapper(protoHelper, inputShapesOverrides, defaultAttributes, requiredConstantCpuInputs, constantInputGetter, nullptr),
         m_inferredOutputShapes(inferredOutputShapes),
         m_internalOperator(isInternalOperator),
         m_graphNodeCreateInfo(graphNodeCreateInfo)
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h
index a7f8bebb2de78..913997ff4ad49 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h
@@ -4,6 +4,7 @@
 #pragma once
 #include "core/providers/dml/DmlExecutionProvider/inc/IWinmlExecutionProvider.h"
 #include "core/providers/dml/OperatorAuthorHelper/MLOperatorAuthorHelper.h"
+#include "core/providers/dml/DmlExecutionProvider/src/DmlEdgeShapes.h"
 #include "core/framework/op_kernel.h"
 #include "core/framework/customregistry.h"
 #include "core/framework/tensorprotoutils.h"
@@ -93,42 +94,6 @@ struct AttributeValue
 
 using AttributeMap = std::map<std::string, AttributeValue>;
 
-// Encapsulation of shapes across different edges of an operator.    Non-tensor
-// edges and unused edges have an empty array of dimensions.
-class EdgeShapes
-{
-public:
-    EdgeShapes() = default;
-
-    EdgeShapes(size_t count) : m_shapes(count) {}
-
-    const std::vector<uint32_t>& GetShape(size_t edgeIndex) const
-    {
-        return m_shapes[edgeIndex];
-    }
-
-    std::vector<uint32_t>& GetMutableShape(size_t edgeIndex)
-    {
-        return m_shapes[edgeIndex];
-    }
-
-    size_t EdgeCount() const { return m_shapes.size(); }
-
-    void Reset(size_t edge_count)
-    {
-        m_shapes.clear();
-        m_shapes.resize(edge_count);
-    }
-
-    bool operator!=(const EdgeShapes& other) const noexcept
-    {
-        return (m_shapes != other.m_shapes);
-    }
-
- private:
-    std::vector<std::vector<uint32_t>> m_shapes;
-};
-
 // Base class for ABI objects which may be "Closed", at which point calls will predictably
 // fail or return a dummy value.  This is used for transient ABI context objects which
 // are passed to methods on kernel or inferencers, and which wrap Lotus objects whose lifetimes
@@ -434,6 +399,7 @@ class DmlGraphOpKernelInfoWrapper : public OpNodeInfoWrapper<
         const onnxruntime::OpNodeProtoHelper<onnxruntime::ProtoHelperNodeContext> * protoHelper,
         const void* executionHandle,
         bool isInternalOperator,
+        const EdgeShapes* inputShapesOverrides,
         const EdgeShapes* inferredOutputShapes,
         const AttributeMap* defaultAttributes,
         DmlGraphNodeCreateInfo* graphNodeCreateInfo,
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlGridSample.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlGridSample.h
index c63863853fb4e..4bbc8a4b718da 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlGridSample.h
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlGridSample.h
@@ -51,16 +51,6 @@ namespace GridSample_float_float
     #include "GeneratedShaders/grid_sample_float_float.h"
 }
 
-namespace GridSample_double_float
-{
-    #include "GeneratedShaders/grid_sample_double_float.h"
-}
-
-namespace GridSample_bool_float
-{
-    #include "GeneratedShaders/grid_sample_bool_float.h"
-}
-
 namespace GridSample_uint16_fp16
 {
     #include "GeneratedShaders/grid_sample_uint16_fp16.h"
@@ -101,66 +91,6 @@ namespace GridSample_float_fp16
     #include "GeneratedShaders/grid_sample_float_fp16.h"
 }
 
-namespace GridSample_double_fp16
-{
-    #include "GeneratedShaders/grid_sample_double_fp16.h"
-}
-
-namespace GridSample_bool_fp16
-{
-    #include "GeneratedShaders/grid_sample_bool_fp16.h"
-}
-
-namespace GridSample_uint16_double
-{
-    #include "GeneratedShaders/grid_sample_uint16_double.h"
-}
-
-namespace GridSample_uint_double
-{
-    #include "GeneratedShaders/grid_sample_uint_double.h"
-}
-
-namespace GridSample_uint64_double
-{
-    #include "GeneratedShaders/grid_sample_uint64_double.h"
-}
-
-namespace GridSample_int16_double
-{
-    #include "GeneratedShaders/grid_sample_int16_double.h"
-}
-
-namespace GridSample_int_double
-{
-    #include "GeneratedShaders/grid_sample_int_double.h"
-}
-
-namespace GridSample_int64_double
-{
-    #include "GeneratedShaders/grid_sample_int64_double.h"
-}
-
-namespace GridSample_fp16_double
-{
-    #include "GeneratedShaders/grid_sample_fp16_double.h"
-}
-
-namespace GridSample_float_double
-{
-    #include "GeneratedShaders/grid_sample_float_double.h"
-}
-
-namespace GridSample_double_double
-{
-    #include "GeneratedShaders/grid_sample_double_double.h"
-}
-
-namespace GridSample_bool_double
-{
-    #include "GeneratedShaders/grid_sample_bool_double.h"
-}
-
 
 #include <wrl/client.h>
 #include <wrl/implements.h>
@@ -471,14 +401,6 @@ class DmlGridSampleOperator : public WRL::Base<IMLOperatorKernel>
                 computePsoDesc.CS = CD3DX12_SHADER_BYTECODE(GridSample_float_float::g_GridSample, sizeof(GridSample_float_float::g_GridSample));
                 break;
 
-                case MLOperatorTensorDataType::Double:
-                computePsoDesc.CS = CD3DX12_SHADER_BYTECODE(GridSample_double_float::g_GridSample, sizeof(GridSample_double_float::g_GridSample));
-                break;
-
-                case MLOperatorTensorDataType::Bool:
-                computePsoDesc.CS = CD3DX12_SHADER_BYTECODE(GridSample_bool_float::g_GridSample, sizeof(GridSample_bool_float::g_GridSample));
-                break;
-
                 default:
                 ORT_THROW_HR(E_INVALIDARG);
                 }
@@ -520,63 +442,6 @@ class DmlGridSampleOperator : public WRL::Base<IMLOperatorKernel>
                 computePsoDesc.CS = CD3DX12_SHADER_BYTECODE(GridSample_float_fp16::g_GridSample, sizeof(GridSample_float_fp16::g_GridSample));
                 break;
 
-                case MLOperatorTensorDataType::Double:
-                computePsoDesc.CS = CD3DX12_SHADER_BYTECODE(GridSample_double_fp16::g_GridSample, sizeof(GridSample_double_fp16::g_GridSample));
-                break;
-
-                case MLOperatorTensorDataType::Bool:
-                computePsoDesc.CS = CD3DX12_SHADER_BYTECODE(GridSample_bool_fp16::g_GridSample, sizeof(GridSample_bool_fp16::g_GridSample));
-                break;
-
-                default:
-                ORT_THROW_HR(E_INVALIDARG);
-                }
-                break;
-            }
-            case MLOperatorTensorDataType::Double:
-            {
-                switch (inputDataType)
-                {
-                case MLOperatorTensorDataType::UInt16:
-                computePsoDesc.CS = CD3DX12_SHADER_BYTECODE(GridSample_uint16_double::g_GridSample, sizeof(GridSample_uint16_double::g_GridSample));
-                break;
-
-                case MLOperatorTensorDataType::UInt32:
-                computePsoDesc.CS = CD3DX12_SHADER_BYTECODE(GridSample_uint_double::g_GridSample, sizeof(GridSample_uint_double::g_GridSample));
-                break;
-
-                case MLOperatorTensorDataType::UInt64:
-                computePsoDesc.CS = CD3DX12_SHADER_BYTECODE(GridSample_uint64_double::g_GridSample, sizeof(GridSample_uint64_double::g_GridSample));
-                break;
-
-                case MLOperatorTensorDataType::Int16:
-                computePsoDesc.CS = CD3DX12_SHADER_BYTECODE(GridSample_int16_double::g_GridSample, sizeof(GridSample_int16_double::g_GridSample));
-                break;
-
-                case MLOperatorTensorDataType::Int32:
-                computePsoDesc.CS = CD3DX12_SHADER_BYTECODE(GridSample_int_double::g_GridSample, sizeof(GridSample_int_double::g_GridSample));
-                break;
-
-                case MLOperatorTensorDataType::Int64:
-                computePsoDesc.CS = CD3DX12_SHADER_BYTECODE(GridSample_int64_double::g_GridSample, sizeof(GridSample_int64_double::g_GridSample));
-                break;
-
-                case MLOperatorTensorDataType::Float16:
-                computePsoDesc.CS = CD3DX12_SHADER_BYTECODE(GridSample_fp16_double::g_GridSample, sizeof(GridSample_fp16_double::g_GridSample));
-                break;
-
-                case MLOperatorTensorDataType::Float:
-                computePsoDesc.CS = CD3DX12_SHADER_BYTECODE(GridSample_float_double::g_GridSample, sizeof(GridSample_float_double::g_GridSample));
-                break;
-
-                case MLOperatorTensorDataType::Double:
-                computePsoDesc.CS = CD3DX12_SHADER_BYTECODE(GridSample_double_double::g_GridSample, sizeof(GridSample_double_double::g_GridSample));
-                break;
-
-                case MLOperatorTensorDataType::Bool:
-                computePsoDesc.CS = CD3DX12_SHADER_BYTECODE(GridSample_bool_double::g_GridSample, sizeof(GridSample_bool_double::g_GridSample));
-                break;
-
                 default:
                 ORT_THROW_HR(E_INVALIDARG);
                 }
@@ -901,6 +766,7 @@ class DmlGridSampleOperatorFactory : public WRL::Base<IMLOperatorKernelFactory>
         kernelDescription.executionType = MLOperatorExecutionType::D3D12;
 
         // T1: tensor(float16), tensor(float), tensor(double), tensor(bfloat16)
+        // tensor(double) is not supported for GPU
         MLOperatorEdgeTypeConstrant t1Constraint;
         t1Constraint.typeLabel = "T1";
         std::vector<MLOperatorEdgeDescription> t1AllowedEdges
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorBatchNormalization.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorBatchNormalization.cpp
index 60b235880e23f..9f9cfad670919 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorBatchNormalization.cpp
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorBatchNormalization.cpp
@@ -143,7 +143,8 @@ class DmlOperatorBatchNormalization15 : public DmlOperator, BatchNormalizationHe
         );
 
         DML_EXECUTION_FLAGS executionFlags = GetExecutionFlags();
-        m_compiledOperator.Attach(graph.Compile(executionFlags, { batchNormalization }).Detach());
+        std::array<dml::Expression, 1> outputs = { batchNormalization };
+        m_compiledOperator.Attach(graph.Compile(executionFlags, outputs).Detach());
     }
 
     void Compute(const MLOperatorKernelContext& kernelContext) override
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorMultiHeadAttention.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorMultiHeadAttention.cpp
index 9c1a7baeaa8df..03500d0ee86a9 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorMultiHeadAttention.cpp
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorMultiHeadAttention.cpp
@@ -205,12 +205,34 @@ class DmlOperatorMultiHeadAttention : public DmlOperator
             else
             {
                 const auto keyPaddingMaskTensorShape = m_inputTensorDescs[dmlMaskIndex].GetSizes();
-                ML_CHECK_VALID_ARGUMENT(keyPaddingMaskTensorShape.size() == 2);
+                size_t maskDimCount = keyPaddingMaskTensorShape.size();
+                ML_CHECK_VALID_ARGUMENT(maskDimCount >= 2 || maskDimCount <= 4);
                 ML_CHECK_VALID_ARGUMENT(keyPaddingMaskTensorShape[0] == batchSize);
-                ML_CHECK_VALID_ARGUMENT(keyPaddingMaskTensorShape[1] == kvSequenceLength);
 
-                const uint32_t actualShape[4] = {batchSize, 1, 1, kvSequenceLength};
-                const uint32_t desiredShape[4] = {batchSize, numHeads, sequenceLength, kvSequenceLength};
+                std::array<uint32_t, 4> actualShape{};
+                std::array<uint32_t, 4> desiredShape{};
+
+                if (maskDimCount == 2)
+                {
+                    ML_CHECK_VALID_ARGUMENT(keyPaddingMaskTensorShape[1] == kvSequenceLength);
+                    actualShape = {batchSize, 1, 1, kvSequenceLength};
+                    desiredShape = {batchSize, numHeads, sequenceLength, kvSequenceLength};
+                }
+                else if (maskDimCount == 3)
+                {
+                    ML_CHECK_VALID_ARGUMENT(keyPaddingMaskTensorShape[1] == sequenceLength);
+                    ML_CHECK_VALID_ARGUMENT(keyPaddingMaskTensorShape[2] == totalSequenceLength);
+                    actualShape = {batchSize, 1, sequenceLength, totalSequenceLength};
+                    desiredShape = {batchSize, numHeads, sequenceLength, totalSequenceLength};
+                }
+                else if (maskDimCount == 4)
+                {
+                    ML_CHECK_VALID_ARGUMENT(keyPaddingMaskTensorShape[1] == numHeads);
+                    ML_CHECK_VALID_ARGUMENT(keyPaddingMaskTensorShape[2] == sequenceLength);
+                    ML_CHECK_VALID_ARGUMENT(keyPaddingMaskTensorShape[3] == totalSequenceLength);
+                    actualShape = {batchSize, numHeads, sequenceLength, totalSequenceLength};
+                    desiredShape = {batchSize, numHeads, sequenceLength, totalSequenceLength};
+                }
 
                 m_inputTensorDescs[dmlMaskIndex] = TensorDesc::ConstructBroadcastedTensorDesc(
                     m_inputTensorDescs[dmlMaskIndex].GetMlOperatorDataType(),
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorPooling.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorPooling.cpp
index 4f8b5a1bc7fac..e8d5b2746aa13 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorPooling.cpp
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorPooling.cpp
@@ -84,7 +84,7 @@ class DmlOperatorPooling : public DmlOperator, public PoolingHelperBase
             poolingDesc.EndPadding = m_kernel.endPadding;
 
             DML_OPERATOR_DESC opDesc = {};
-            opDesc.Type = ApiTraits::OperatorDescTraits<std::remove_reference<decltype(poolingDesc)>::type>::Type;
+            opDesc.Type = ApiTraits::OperatorDescTraits<typename std::remove_reference<decltype(poolingDesc)>::type>::Type;
             opDesc.Desc = &poolingDesc;
             SetDmlOperatorDesc(opDesc, kernelInfo);
         };
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorRotaryEmbedding.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorRotaryEmbedding.cpp
new file mode 100644
index 0000000000000..30c339b845b36
--- /dev/null
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorRotaryEmbedding.cpp
@@ -0,0 +1,436 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "precomp.h"
+
+// This operator is easier to understand by looking at a python implementation of the non-interleaved version:
+//
+// def rotate_half(x):
+//     """Rotates half the hidden dims of the input."""
+//     half_dim = x.shape[-1] // 2
+//     x1 = x[..., :half_dim]
+//     x2 = x[..., half_dim:]
+//     return np.concatenate((-x2, x1), dim=-1)
+//
+//
+// def apply_rope(x, cos, sin, position_ids):
+//     cos = cos[position_ids].unsqueeze(1)  # [bs, 1, seq_len, dim]
+//     sin = sin[position_ids].unsqueeze(1)  # [bs, 1, seq_len, dim]
+//     x_embed = (x * cos) + (rotate_half(x) * sin)
+//     return x_embed
+//
+// For the non-interleaved version, we multiply the cos cache by the non-rotated input tensor while we multiply the sin cache
+// by the rotated input tensor. Rotating the tensor means slicing it in half on the head dimension and swapping the 2 halves.
+//
+// The interleaved version is very similar but instead of swapping 2 halves, we swap every pair of adjacent elements and we swap
+// the sign of every adjacent element.
+
+namespace Dml
+{
+class DmlOperatorRotaryEmbedding : public DmlOperator
+{
+public:
+    DmlOperatorRotaryEmbedding(const MLOperatorKernelCreationContext& kernelInfo) : DmlOperator(kernelInfo)
+    {
+        enum InputIndex : uint32_t
+        {
+            inputDataIndex,
+            positionIdsIndex,
+            cosCacheIndex,
+            sinCacheIndex,
+        };
+
+        ML_CHECK_VALID_ARGUMENT(kernelInfo.GetInputCount() == 4);
+        ML_CHECK_VALID_ARGUMENT(kernelInfo.GetOutputCount() == 1);
+
+        // When positionIds is a scalar, it represents the start offset for each sequence
+        const bool positionIdsIsOffset = kernelInfo.GetInputTensorDimensionCount(positionIdsIndex) == 1;
+
+        Initialize(kernelInfo);
+
+        ML_CHECK_VALID_ARGUMENT(m_inputTensorDescs[inputDataIndex].GetDimensionCount() == 4);
+        ML_CHECK_VALID_ARGUMENT(m_inputTensorDescs[positionIdsIndex].GetDimensionCount() == 4);
+        ML_CHECK_VALID_ARGUMENT(m_inputTensorDescs[cosCacheIndex].GetDimensionCount() == 4);
+        ML_CHECK_VALID_ARGUMENT(m_inputTensorDescs[sinCacheIndex].GetDimensionCount() == 4);
+
+        ML_CHECK_VALID_ARGUMENT(m_outputTensorDescs[0].GetDimensionCount() == 4);
+
+        ML_CHECK_VALID_ARGUMENT(m_inputTensorDescs[cosCacheIndex].GetSizes() == m_inputTensorDescs[sinCacheIndex].GetSizes());
+        const uint32_t headSize = m_inputTensorDescs[cosCacheIndex].GetSizes().back() * 2;
+
+        // The last dimension of the data is the hidden size, so it must be divisible by the head size
+        ML_CHECK_VALID_ARGUMENT(m_inputTensorDescs[inputDataIndex].GetSizes().back() % headSize == 0);
+
+        // We resize the data to be of shape [batchSize, sequenceLength, numHeads, headSize]
+        const auto inputDataSizes = m_inputTensorDescs[inputDataIndex].GetSizes();
+        const uint32_t batchSize = inputDataSizes[1];
+        const uint32_t sequenceLength = inputDataSizes[2];
+        const uint32_t numHeads = inputDataSizes[3] / headSize;
+
+        const auto cosCacheSizes = m_inputTensorDescs[cosCacheIndex].GetSizes();
+        const uint32_t maxSequenceLength = cosCacheSizes[cosCacheSizes.size() - 2];
+
+        if (sequenceLength > maxSequenceLength)
+        {
+            ORT_NOT_IMPLEMENTED("Updating cos_cache and sin_cache in RotaryEmbedding is not currently supported");
+        }
+
+        const bool interleaved = gsl::narrow_cast<bool>(kernelInfo.GetOptionalAttribute<int64_t>(AttrName::Interleaved, 0));
+
+        std::vector<DML_TENSOR_DESC> inputDescs = GetDmlInputDescs();
+        const MLOperatorTensorDataType dataType = kernelInfo.GetInputEdgeDescription(inputDataIndex).tensorDataType;
+
+        // Splitting the hiddenSize into numHeads and headSize dimensions makes it easier for DML to handle
+        const std::array<uint32_t, 4> inputOutputShape = {batchSize, sequenceLength, numHeads, headSize};
+        TensorDesc inputOutputTensorDesc = TensorDesc::ConstructDefaultTensorDesc(dataType, inputOutputShape);
+        const DML_TENSOR_DESC inputOutputDmlTensorDesc = inputOutputTensorDesc.GetDmlDesc();
+
+        // Copy the input to preserve its real input shape in the graph without reshaping it. This will disappear during DML's graph compilation phase.
+        DML_SCALE_BIAS scaleBias = {1.0f, 0.0f};
+
+        DML_ELEMENT_WISE_IDENTITY_OPERATOR_DESC copyInputDesc{};
+        copyInputDesc.InputTensor = &inputOutputDmlTensorDesc;
+        copyInputDesc.OutputTensor = &inputOutputDmlTensorDesc;
+        copyInputDesc.ScaleBias = &scaleBias;
+        const DML_OPERATOR_DESC copyInputDmlDesc = {DML_OPERATOR_ELEMENT_WISE_IDENTITY, &copyInputDesc};
+
+        // Split the input data into 2 equal parts
+        const std::vector<uint32_t> inputDataTensorShape = interleaved
+            ? std::vector<uint32_t>({batchSize, sequenceLength, numHeads, headSize / 2, 2})
+            : std::vector<uint32_t>({batchSize, sequenceLength, numHeads, 2, headSize / 2});
+
+        const std::vector<uint32_t> splitInputDataTensorShape = interleaved
+            ? std::vector<uint32_t>({batchSize, sequenceLength, numHeads, headSize / 2, 1})
+            : std::vector<uint32_t>({batchSize, sequenceLength, numHeads, 1, headSize / 2});
+
+        TensorDesc inputDataTensorDesc = TensorDesc::ConstructDefaultTensorDesc(dataType, inputDataTensorShape);
+        const DML_TENSOR_DESC inputDataDmlTensorDesc = inputDataTensorDesc.GetDmlDesc();
+
+        TensorDesc splitInputDataTensorDesc = TensorDesc::ConstructDefaultTensorDesc(dataType, splitInputDataTensorShape);
+        const std::array<DML_TENSOR_DESC, 2> splitInputDataDmlTensorDescs = {splitInputDataTensorDesc.GetDmlDesc(), splitInputDataTensorDesc.GetDmlDesc()};
+
+        DML_SPLIT_OPERATOR_DESC splitInputDesc{};
+        splitInputDesc.InputTensor = &inputDataDmlTensorDesc;
+        splitInputDesc.OutputTensors = splitInputDataDmlTensorDescs.data();
+        splitInputDesc.OutputCount = gsl::narrow_cast<uint32_t>(splitInputDataDmlTensorDescs.size());
+        splitInputDesc.Axis = interleaved
+            ? gsl::narrow_cast<uint32_t>(splitInputDataTensorShape.size()) - 1
+            : gsl::narrow_cast<uint32_t>(splitInputDataTensorShape.size()) - 2;
+
+        const DML_OPERATOR_DESC splitInputDmlDesc = {DML_OPERATOR_SPLIT, &splitInputDesc};
+
+        // Swap the 2 halves and join them together
+        DML_JOIN_OPERATOR_DESC joinInputDesc{};
+        joinInputDesc.InputTensors = splitInputDataDmlTensorDescs.data();
+        joinInputDesc.OutputTensor = &inputDataDmlTensorDesc;
+        joinInputDesc.Axis = splitInputDesc.Axis;
+        joinInputDesc.InputCount = gsl::narrow_cast<uint32_t>(splitInputDataDmlTensorDescs.size());
+        const DML_OPERATOR_DESC joinInputDmlDesc = {DML_OPERATOR_JOIN, &joinInputDesc};
+
+        // We generate a sequence from 0 to sequenceLength and add the offset to it
+        const std::array<uint32_t, 4> positionIdsRangeShape = {1, 1, 1, sequenceLength};
+        auto positionIdsDataType = kernelInfo.GetInputEdgeDescription(positionIdsIndex).tensorDataType;
+        TensorDesc positionIdsRangeTensorDesc = TensorDesc::ConstructDefaultTensorDesc(positionIdsDataType, positionIdsRangeShape);
+        const DML_TENSOR_DESC positionIdsRangeDmlTensorDesc = positionIdsRangeTensorDesc.GetDmlDesc();
+
+        const std::array<uint32_t, 4> broadcastedPositionIdsRangeShape = {1, 1, batchSize, sequenceLength};
+        TensorDesc broadcastedPositionIdsRangeTensorDesc = TensorDesc::ConstructBroadcastedTensorDesc(positionIdsDataType, broadcastedPositionIdsRangeShape, positionIdsRangeShape);
+        const DML_TENSOR_DESC broadcastedPositionIdsRangeDmlTensorDesc = broadcastedPositionIdsRangeTensorDesc.GetDmlDesc();
+
+        const std::array<uint32_t, 4> broadcastedOffsetShape = {1, 1, batchSize, sequenceLength};
+        TensorDesc broadcastedOffsetTensorDesc = TensorDesc::ConstructBroadcastedTensorDesc(positionIdsDataType, broadcastedOffsetShape, m_inputTensorDescs[positionIdsIndex].GetSizes());
+        const DML_TENSOR_DESC broadcastedOffsetDmlTensorDesc = broadcastedOffsetTensorDesc.GetDmlDesc();
+
+        TensorDesc offsetPositionIdsTensorDesc = TensorDesc::ConstructDefaultTensorDesc(positionIdsDataType, broadcastedOffsetShape);
+        const DML_TENSOR_DESC offsetPositionIdsRangeDmlTensorDesc = offsetPositionIdsTensorDesc.GetDmlDesc();
+
+        DML_FILL_VALUE_SEQUENCE_OPERATOR_DESC positionIdsRange{};
+        DML_ELEMENT_WISE_ADD_OPERATOR_DESC positionIdsAddOffset{};
+        if (positionIdsIsOffset)
+        {
+            ML_CHECK_VALID_ARGUMENT(positionIdsDataType == MLOperatorTensorDataType::Int64);
+            positionIdsRange.ValueDataType = DML_TENSOR_DATA_TYPE_INT64;
+            positionIdsRange.ValueDelta.Int64 = 1;
+            positionIdsRange.OutputTensor = &positionIdsRangeDmlTensorDesc;
+
+            positionIdsAddOffset.ATensor = &broadcastedPositionIdsRangeDmlTensorDesc;
+            positionIdsAddOffset.BTensor = &broadcastedOffsetDmlTensorDesc;
+            positionIdsAddOffset.OutputTensor = &offsetPositionIdsRangeDmlTensorDesc;
+        }
+        const DML_OPERATOR_DESC positionIdsRangeDmlDesc = {DML_OPERATOR_FILL_VALUE_SEQUENCE, &positionIdsRange};
+        const DML_OPERATOR_DESC positionIdsAddOffsetDmlDesc = {DML_OPERATOR_ELEMENT_WISE_ADD, &positionIdsAddOffset};
+
+        // Gather the cos/sin values based on the position ids
+        const std::array<uint32_t, 4> gatheredCosSinShape = {1, batchSize, sequenceLength, headSize / 2};
+        TensorDesc gatheredCosSinTensorDesc = TensorDesc::ConstructDefaultTensorDesc(dataType, gatheredCosSinShape);
+        const DML_TENSOR_DESC gatheredCosSinDmlTensorDesc = gatheredCosSinTensorDesc.GetDmlDesc();
+
+        DML_GATHER_OPERATOR_DESC gatherCosSinDesc{};
+        gatherCosSinDesc.InputTensor = &inputDescs[cosCacheIndex];
+        gatherCosSinDesc.IndicesTensor = positionIdsIsOffset ? &offsetPositionIdsRangeDmlTensorDesc : &inputDescs[positionIdsIndex];
+        gatherCosSinDesc.OutputTensor = &gatheredCosSinDmlTensorDesc;
+        gatherCosSinDesc.Axis = 2;
+        gatherCosSinDesc.IndexDimensions = 2;
+        const DML_OPERATOR_DESC gatherCosSinDmlDesc {DML_OPERATOR_GATHER, &gatherCosSinDesc};
+
+        // After gathering cos/sin, reshape and broadcast them to match the number of heads of the input data
+        const std::vector<uint32_t> reshapedCosSinShape = interleaved
+            ? std::vector<uint32_t>({batchSize, sequenceLength, 1, headSize / 2, 1})
+            : std::vector<uint32_t>({batchSize, sequenceLength, 1, 1, headSize / 2});
+        TensorDesc broadcastedCosSinTensorDesc = TensorDesc::ConstructBroadcastedTensorDesc(dataType, inputDataTensorShape, reshapedCosSinShape);
+        const DML_TENSOR_DESC broadcastedCosSinDmlTensorDesc = broadcastedCosSinTensorDesc.GetDmlDesc();
+
+        // Create a vector that contains the sign values {-1, 1}
+        const std::array<uint32_t, 1> signTensorShape = {2};
+        TensorDesc signTensorDesc = TensorDesc::ConstructDefaultTensorDesc(dataType, signTensorShape);
+        const DML_TENSOR_DESC signDmlTensorDesc = signTensorDesc.GetDmlDesc();
+
+        DML_FILL_VALUE_SEQUENCE_OPERATOR_DESC signRange{};
+        signRange.OutputTensor = &signDmlTensorDesc;
+        if (dataType == MLOperatorTensorDataType::Float16)
+        {
+            const auto valueStart = static_cast<MLFloat16>(-1.0f);
+            const auto valueDelta = static_cast<MLFloat16>(2.0f);
+            memcpy(signRange.ValueStart.Bytes, reinterpret_cast<const BYTE*>(&valueStart), sizeof(valueStart));
+            memcpy(signRange.ValueDelta.Bytes, reinterpret_cast<const BYTE*>(&valueDelta), sizeof(valueDelta));
+            signRange.ValueDataType = DML_TENSOR_DATA_TYPE_FLOAT16;
+        }
+        else
+        {
+            ML_CHECK_VALID_ARGUMENT(dataType == MLOperatorTensorDataType::Float);
+            signRange.ValueStart.Float32 = -1.0f;
+            signRange.ValueDelta.Float32 = 2.0f;
+            signRange.ValueDataType = DML_TENSOR_DATA_TYPE_FLOAT32;
+        }
+        const DML_OPERATOR_DESC signRangeDmlDesc = {DML_OPERATOR_FILL_VALUE_SEQUENCE, &signRange};
+
+        // Multiply the broadcasted sign values with the rotated input
+        const std::vector<uint32_t> reshapedSignShape = interleaved
+            ? std::vector<uint32_t>({1, 1, 1, 1, 2})
+            : std::vector<uint32_t>({1, 1, 1, 2, 1});
+        TensorDesc broadcastedSignCosSinTensorDesc = TensorDesc::ConstructBroadcastedTensorDesc(dataType, inputDataTensorShape, reshapedSignShape);
+        const DML_TENSOR_DESC broadcastedSignDmlTensorDesc = broadcastedSignCosSinTensorDesc.GetDmlDesc();
+
+        DML_ELEMENT_WISE_MULTIPLY_OPERATOR_DESC mulSignDesc{};
+        mulSignDesc.ATensor = &inputDataDmlTensorDesc;
+        mulSignDesc.BTensor = &broadcastedSignDmlTensorDesc;
+        mulSignDesc.OutputTensor = &inputDataDmlTensorDesc;
+        const DML_OPERATOR_DESC mulSignDmlDesc = {DML_OPERATOR_ELEMENT_WISE_MULTIPLY, &mulSignDesc};
+
+        // Multiply the non-rotated data with the cos and the rotated data with the sin
+        DML_ELEMENT_WISE_MULTIPLY_OPERATOR_DESC mulCosSinDesc{};
+        mulCosSinDesc.ATensor = &inputDataDmlTensorDesc;
+        mulCosSinDesc.BTensor = &broadcastedCosSinDmlTensorDesc;
+        mulCosSinDesc.OutputTensor = &inputDataDmlTensorDesc;
+        const DML_OPERATOR_DESC mulCosSinDmlDesc = {DML_OPERATOR_ELEMENT_WISE_MULTIPLY, &mulCosSinDesc};
+
+        // Add the multiplied cos and sin values together
+        DML_ELEMENT_WISE_ADD_OPERATOR_DESC addDesc{};
+        addDesc.ATensor = &inputOutputDmlTensorDesc;
+        addDesc.BTensor = &inputOutputDmlTensorDesc;
+        addDesc.OutputTensor = &inputOutputDmlTensorDesc;
+        const DML_OPERATOR_DESC addDmlDesc = {DML_OPERATOR_ELEMENT_WISE_ADD, &addDesc};
+
+        // Construct the graph
+        std::vector<DML_INPUT_GRAPH_EDGE_DESC> inputEdges;
+        std::vector<DML_INTERMEDIATE_GRAPH_EDGE_DESC> intermediateEdges;
+        std::vector<DML_OUTPUT_GRAPH_EDGE_DESC> outputEdges;
+
+        std::vector<const DML_OPERATOR_DESC*> opDescs = {
+            &copyInputDmlDesc, // Copy the input data to preseve the real input shape
+            &splitInputDmlDesc, // Split the input data
+            &gatherCosSinDmlDesc, // Gather cos
+            &gatherCosSinDmlDesc, // Gather sin
+            &signRangeDmlDesc, // Generate the signs
+
+            &joinInputDmlDesc, // Join the split data
+            &mulCosSinDmlDesc, // Multiply cos with the non-rotated data
+            &mulCosSinDmlDesc, // Multiply sin with the rotated data
+            &mulSignDmlDesc, // Multiply the sign with the rotated data
+            &addDmlDesc, // Add the rotated cos and non-rotated sin parts together
+        };
+
+        enum NodeIndex : uint32_t
+        {
+            copyInputOpIndex,
+            splitInputOpIndex,
+            gatherCosOpIndex,
+            gatherSinOpIndex,
+            signRangeOpIndex,
+
+            joinInputOpIndex,
+            mulCosOpIndex,
+            mulSinOpIndex,
+            mulSignOpIndex,
+            addOpIndex,
+
+            // The following indices are optional
+            positionIdsRangeOpIndex,
+            positionIdsAddOffsetOpIndex,
+        };
+
+        if (positionIdsIsOffset)
+        {
+            opDescs.push_back(&positionIdsRangeDmlDesc);
+            opDescs.push_back(&positionIdsAddOffsetDmlDesc);
+
+            DML_INPUT_GRAPH_EDGE_DESC positionIdsToAddOffsetEdge = {};
+            positionIdsToAddOffsetEdge.GraphInputIndex = positionIdsIndex;
+            positionIdsToAddOffsetEdge.ToNodeIndex = positionIdsAddOffsetOpIndex;
+            positionIdsToAddOffsetEdge.ToNodeInputIndex = 1;
+            inputEdges.push_back(positionIdsToAddOffsetEdge);
+
+            DML_INTERMEDIATE_GRAPH_EDGE_DESC positionIdsOffsetToAddOffsetEdge = {};
+            positionIdsOffsetToAddOffsetEdge.FromNodeIndex = positionIdsRangeOpIndex;
+            positionIdsOffsetToAddOffsetEdge.FromNodeOutputIndex = 0;
+            positionIdsOffsetToAddOffsetEdge.ToNodeIndex = positionIdsAddOffsetOpIndex;
+            positionIdsOffsetToAddOffsetEdge.ToNodeInputIndex = 0;
+            intermediateEdges.push_back(positionIdsOffsetToAddOffsetEdge);
+
+            DML_INTERMEDIATE_GRAPH_EDGE_DESC positionIdsAddOffsetToGatherCosEdge = {};
+            positionIdsAddOffsetToGatherCosEdge.FromNodeIndex = positionIdsAddOffsetOpIndex;
+            positionIdsAddOffsetToGatherCosEdge.FromNodeOutputIndex = 0;
+            positionIdsAddOffsetToGatherCosEdge.ToNodeIndex = gatherCosOpIndex;
+            positionIdsAddOffsetToGatherCosEdge.ToNodeInputIndex = 1;
+            intermediateEdges.push_back(positionIdsAddOffsetToGatherCosEdge);
+
+            DML_INTERMEDIATE_GRAPH_EDGE_DESC positionIdsAddOffsetToGatherSinEdge = {};
+            positionIdsAddOffsetToGatherSinEdge.FromNodeIndex = positionIdsAddOffsetOpIndex;
+            positionIdsAddOffsetToGatherSinEdge.FromNodeOutputIndex = 0;
+            positionIdsAddOffsetToGatherSinEdge.ToNodeIndex = gatherSinOpIndex;
+            positionIdsAddOffsetToGatherSinEdge.ToNodeInputIndex = 1;
+            intermediateEdges.push_back(positionIdsAddOffsetToGatherSinEdge);
+        }
+        else
+        {
+            DML_INPUT_GRAPH_EDGE_DESC positionIdsToGatherCosEdge = {};
+            positionIdsToGatherCosEdge.GraphInputIndex = positionIdsIndex;
+            positionIdsToGatherCosEdge.ToNodeIndex = gatherCosOpIndex;
+            positionIdsToGatherCosEdge.ToNodeInputIndex = 1;
+            inputEdges.push_back(positionIdsToGatherCosEdge);
+
+            DML_INPUT_GRAPH_EDGE_DESC positionIdsToGatherSinEdge = {};
+            positionIdsToGatherSinEdge.GraphInputIndex = positionIdsIndex;
+            positionIdsToGatherSinEdge.ToNodeIndex = gatherSinOpIndex;
+            positionIdsToGatherSinEdge.ToNodeInputIndex = 1;
+            inputEdges.push_back(positionIdsToGatherSinEdge);
+        }
+
+        DML_INPUT_GRAPH_EDGE_DESC inputToCopyInputEdge = {};
+        inputToCopyInputEdge.GraphInputIndex = inputDataIndex;
+        inputToCopyInputEdge.ToNodeIndex = copyInputOpIndex;
+        inputToCopyInputEdge.ToNodeInputIndex = 0;
+        inputEdges.push_back(inputToCopyInputEdge);
+
+        DML_INPUT_GRAPH_EDGE_DESC cosToGatherEdge = {};
+        cosToGatherEdge.GraphInputIndex = cosCacheIndex;
+        cosToGatherEdge.ToNodeIndex = gatherCosOpIndex;
+        cosToGatherEdge.ToNodeInputIndex = 0;
+        inputEdges.push_back(cosToGatherEdge);
+
+        DML_INPUT_GRAPH_EDGE_DESC sinToGatherEdge = {};
+        sinToGatherEdge.GraphInputIndex = sinCacheIndex;
+        sinToGatherEdge.ToNodeIndex = gatherSinOpIndex;
+        sinToGatherEdge.ToNodeInputIndex = 0;
+        inputEdges.push_back(sinToGatherEdge);
+
+        DML_INTERMEDIATE_GRAPH_EDGE_DESC inputToSplitEdge = {};
+        inputToSplitEdge.FromNodeIndex = copyInputOpIndex;
+        inputToSplitEdge.FromNodeOutputIndex = 0;
+        inputToSplitEdge.ToNodeIndex = splitInputOpIndex;
+        inputToSplitEdge.ToNodeInputIndex = 0;
+        intermediateEdges.push_back(inputToSplitEdge);
+
+        DML_INTERMEDIATE_GRAPH_EDGE_DESC nonRotatedDataToMulEdge = {};
+        nonRotatedDataToMulEdge.FromNodeIndex = copyInputOpIndex;
+        nonRotatedDataToMulEdge.FromNodeOutputIndex = 0;
+        nonRotatedDataToMulEdge.ToNodeIndex = mulCosOpIndex;
+        nonRotatedDataToMulEdge.ToNodeInputIndex = 0;
+        intermediateEdges.push_back(nonRotatedDataToMulEdge);
+
+        DML_INTERMEDIATE_GRAPH_EDGE_DESC secondHalfDataToJoinEdge = {};
+        secondHalfDataToJoinEdge.FromNodeIndex = splitInputOpIndex;
+        secondHalfDataToJoinEdge.FromNodeOutputIndex = 1;
+        secondHalfDataToJoinEdge.ToNodeIndex = joinInputOpIndex;
+        secondHalfDataToJoinEdge.ToNodeInputIndex = 0;
+        intermediateEdges.push_back(secondHalfDataToJoinEdge);
+
+        DML_INTERMEDIATE_GRAPH_EDGE_DESC firstHalfDataToJoinEdge = {};
+        firstHalfDataToJoinEdge.FromNodeIndex = splitInputOpIndex;
+        firstHalfDataToJoinEdge.FromNodeOutputIndex = 0;
+        firstHalfDataToJoinEdge.ToNodeIndex = joinInputOpIndex;
+        firstHalfDataToJoinEdge.ToNodeInputIndex = 1;
+        intermediateEdges.push_back(firstHalfDataToJoinEdge);
+
+        DML_INTERMEDIATE_GRAPH_EDGE_DESC cosToMulEdge = {};
+        cosToMulEdge.FromNodeIndex = gatherCosOpIndex;
+        cosToMulEdge.FromNodeOutputIndex = 0;
+        cosToMulEdge.ToNodeIndex = mulCosOpIndex;
+        cosToMulEdge.ToNodeInputIndex = 1;
+        intermediateEdges.push_back(cosToMulEdge);
+
+        DML_INTERMEDIATE_GRAPH_EDGE_DESC rotatedDataToMulEdge = {};
+        rotatedDataToMulEdge.FromNodeIndex = joinInputOpIndex;
+        rotatedDataToMulEdge.FromNodeOutputIndex = 0;
+        rotatedDataToMulEdge.ToNodeIndex = mulSinOpIndex;
+        rotatedDataToMulEdge.ToNodeInputIndex = 0;
+        intermediateEdges.push_back(rotatedDataToMulEdge);
+
+        DML_INTERMEDIATE_GRAPH_EDGE_DESC sinToMulEdge = {};
+        sinToMulEdge.FromNodeIndex = gatherSinOpIndex;
+        sinToMulEdge.FromNodeOutputIndex = 0;
+        sinToMulEdge.ToNodeIndex = mulSinOpIndex;
+        sinToMulEdge.ToNodeInputIndex = 1;
+        intermediateEdges.push_back(sinToMulEdge);
+
+        DML_INTERMEDIATE_GRAPH_EDGE_DESC rotatedSinToMulEdge = {};
+        rotatedSinToMulEdge.FromNodeIndex = mulSinOpIndex;
+        rotatedSinToMulEdge.FromNodeOutputIndex = 0;
+        rotatedSinToMulEdge.ToNodeIndex = mulSignOpIndex;
+        rotatedSinToMulEdge.ToNodeInputIndex = 0;
+        intermediateEdges.push_back(rotatedSinToMulEdge);
+
+        DML_INTERMEDIATE_GRAPH_EDGE_DESC signToMulEdge = {};
+        signToMulEdge.FromNodeIndex = signRangeOpIndex;
+        signToMulEdge.FromNodeOutputIndex = 0;
+        signToMulEdge.ToNodeIndex = mulSignOpIndex;
+        signToMulEdge.ToNodeInputIndex = 1;
+        intermediateEdges.push_back(signToMulEdge);
+
+        DML_INTERMEDIATE_GRAPH_EDGE_DESC nonRotatedCosToAddEdge = {};
+        nonRotatedCosToAddEdge.FromNodeIndex = mulCosOpIndex;
+        nonRotatedCosToAddEdge.FromNodeOutputIndex = 0;
+        nonRotatedCosToAddEdge.ToNodeIndex = addOpIndex;
+        nonRotatedCosToAddEdge.ToNodeInputIndex = 0;
+        intermediateEdges.push_back(nonRotatedCosToAddEdge);
+
+        DML_INTERMEDIATE_GRAPH_EDGE_DESC rotatedSinToAddEdge = {};
+        rotatedSinToAddEdge.FromNodeIndex = mulSignOpIndex;
+        rotatedSinToAddEdge.FromNodeOutputIndex = 0;
+        rotatedSinToAddEdge.ToNodeIndex = addOpIndex;
+        rotatedSinToAddEdge.ToNodeInputIndex = 1;
+        intermediateEdges.push_back(rotatedSinToAddEdge);
+
+        DML_OUTPUT_GRAPH_EDGE_DESC addToOutputEdge = {};
+        addToOutputEdge.FromNodeIndex = addOpIndex;
+        addToOutputEdge.FromNodeOutputIndex = 0;
+        addToOutputEdge.GraphOutputIndex = 0;
+        outputEdges.push_back(addToOutputEdge);
+
+        MLOperatorGraphDesc operatorGraphDesc = {};
+        operatorGraphDesc.inputEdgeCount = gsl::narrow_cast<uint32_t>(inputEdges.size());
+        operatorGraphDesc.inputEdges = inputEdges.data();
+        operatorGraphDesc.intermediateEdgeCount = gsl::narrow_cast<uint32_t>(intermediateEdges.size());
+        operatorGraphDesc.intermediateEdges = intermediateEdges.data();
+        operatorGraphDesc.outputEdgeCount = gsl::narrow_cast<uint32_t>(outputEdges.size());
+        operatorGraphDesc.outputEdges = outputEdges.data();
+        operatorGraphDesc.nodeCount = gsl::narrow_cast<uint32_t>(opDescs.size());
+        operatorGraphDesc.nodesAsOpDesc = opDescs.data();
+
+        SetDmlOperatorGraphDesc(std::move(operatorGraphDesc), kernelInfo);
+    }
+};
+
+DML_OP_DEFINE_CREATION_FUNCTION(RotaryEmbedding, DmlOperatorRotaryEmbedding);
+
+} // namespace Dml
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/GenerateShaders.bat b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/GenerateShaders.bat
index fb087bd800ff0..c5580ee103595 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/GenerateShaders.bat
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/GenerateShaders.bat
@@ -16,8 +16,6 @@ if "%1" == "DEBUG" (
     dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=int64_t -DTBUFFER2=float -enable-16bit-types -Zi -Od -Qembed_debug -Fh grid_sample_int64_float.h
     dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=float16_t -DTBUFFER2=float -enable-16bit-types -Zi -Od -Qembed_debug -Fh grid_sample_fp16_float.h
     fxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_5_0 /DTBUFFER1=float /DTBUFFER2=float /Zi /Od /Fh grid_sample_float_float.h
-    fxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_5_0 /DTBUFFER1=double /DTBUFFER2=float /Zi /Od /Fh grid_sample_double_float.h
-    fxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_5_0 /DTBUFFER1=bool /DTBUFFER2=float /Zi /Od /Fh grid_sample_bool_float.h
 
     dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=uint16_t -DTBUFFER2=float16_t -enable-16bit-types -Zi -Od -Qembed_debug -Fh grid_sample_uint16_fp16.h
     dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=uint -DTBUFFER2=float16_t -enable-16bit-types -Zi -Od -Qembed_debug -Fh grid_sample_uint_fp16.h
@@ -27,20 +25,6 @@ if "%1" == "DEBUG" (
     dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=int64_t -DTBUFFER2=float16_t -enable-16bit-types -Zi -Od -Qembed_debug -Fh grid_sample_int64_fp16.h
     dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=float16_t -DTBUFFER2=float16_t -enable-16bit-types -Zi -Od -Qembed_debug -Fh grid_sample_fp16_fp16.h
     dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=float -DTBUFFER2=float16_t -enable-16bit-types -Zi -Od -Qembed_debug -Fh grid_sample_float_fp16.h
-    dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=double -DTBUFFER2=float16_t -enable-16bit-types -Zi -Od -Qembed_debug -Fh grid_sample_double_fp16.h
-    dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=bool -DTBUFFER2=float16_t -enable-16bit-types -Zi -Od -Qembed_debug -Fh grid_sample_bool_fp16.h
-
-    dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=uint16_t -DTBUFFER2=double -enable-16bit-types -Zi -Od -Qembed_debug -Fh grid_sample_uint16_double.h
-    dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=uint -DTBUFFER2=double -enable-16bit-types -Zi -Od -Qembed_debug -Fh grid_sample_uint_double.h
-    dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=uint64_t -DTBUFFER2=double -enable-16bit-types -Zi -Od -Qembed_debug -Fh grid_sample_uint64_double.h
-    dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=int16_t -DTBUFFER2=double -enable-16bit-types -Zi -Od -Qembed_debug -Fh grid_sample_int16_double.h
-    dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=int -DTBUFFER2=double -enable-16bit-types -Zi -Od -Qembed_debug -Fh grid_sample_int_double.h
-    dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=int64_t -DTBUFFER2=double -enable-16bit-types -Zi -Od -Qembed_debug -Fh grid_sample_int64_double.h
-    dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=float16_t -DTBUFFER2=double -enable-16bit-types -Zi -Od -Qembed_debug -Fh grid_sample_fp16_double.h
-    dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=float -DTBUFFER2=double -enable-16bit-types -Zi -Od -Qembed_debug -Fh grid_sample_float_double.h
-    dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=double -DTBUFFER2=double -enable-16bit-types -Zi -Od -Qembed_debug -Fh grid_sample_double_double.h
-    dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=bool -DTBUFFER2=double -enable-16bit-types -Zi -Od -Qembed_debug -Fh grid_sample_bool_double.h
-
 ) else (
     fxc.exe ..\Shaders\stockham.hlsl -E DFT -T cs_5_0 /DTBUFFER=float /O3 /Qstrip_reflect /Qstrip_debug /Qstrip_rootsignature /Qstrip_priv /Fh stockham.h
     dxc.exe ..\Shaders\stockham.hlsl -E DFT -T cs_6_2 -DTBUFFER=float16_t -enable-16bit-types -O3 -Qstrip_reflect -Qstrip_debug -Qstrip_rootsignature -Fh stockham_fp16.h
@@ -56,8 +40,6 @@ if "%1" == "DEBUG" (
     dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=int64_t -DTBUFFER2=float -enable-16bit-types -O3 -Qstrip_reflect -Qstrip_debug -Qstrip_rootsignature -Fh grid_sample_int64_float.h
     dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=float16_t -DTBUFFER2=float -enable-16bit-types -O3 -Qstrip_reflect -Qstrip_debug -Qstrip_rootsignature -Fh grid_sample_fp16_float.h
     fxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_5_0 /DTBUFFER1=float /DTBUFFER2=float /O3 /Qstrip_reflect /Qstrip_debug /Qstrip_rootsignature /Qstrip_priv /Fh grid_sample_float_float.h
-    fxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_5_0 /DTBUFFER1=double /DTBUFFER2=float /O3 /Qstrip_reflect /Qstrip_debug /Qstrip_rootsignature /Qstrip_priv /Fh grid_sample_double_float.h
-    fxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_5_0 /DTBUFFER1=bool /DTBUFFER2=float /O3 /Qstrip_reflect /Qstrip_debug /Qstrip_rootsignature /Qstrip_priv /Fh grid_sample_bool_float.h
 
     dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=uint16_t -DTBUFFER2=float16_t -enable-16bit-types -O3 -Qstrip_reflect -Qstrip_debug -Qstrip_rootsignature -Fh grid_sample_uint16_fp16.h
     dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=uint -DTBUFFER2=float16_t -enable-16bit-types -O3 -Qstrip_reflect -Qstrip_debug -Qstrip_rootsignature -Fh grid_sample_uint_fp16.h
@@ -67,18 +49,5 @@ if "%1" == "DEBUG" (
     dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=int64_t -DTBUFFER2=float16_t -enable-16bit-types -O3 -Qstrip_reflect -Qstrip_debug -Qstrip_rootsignature -Fh grid_sample_int64_fp16.h
     dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=float16_t -DTBUFFER2=float16_t -enable-16bit-types -O3 -Qstrip_reflect -Qstrip_debug -Qstrip_rootsignature -Fh grid_sample_fp16_fp16.h
     dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=float -DTBUFFER2=float16_t -enable-16bit-types -O3 -Qstrip_reflect -Qstrip_debug -Qstrip_rootsignature -Fh grid_sample_float_fp16.h
-    dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=double -DTBUFFER2=float16_t -enable-16bit-types -O3 -Qstrip_reflect -Qstrip_debug -Qstrip_rootsignature -Fh grid_sample_double_fp16.h
-    dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=bool -DTBUFFER2=float16_t -enable-16bit-types -O3 -Qstrip_reflect -Qstrip_debug -Qstrip_rootsignature -Fh grid_sample_bool_fp16.h
-
-    dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=uint16_t -DTBUFFER2=double -enable-16bit-types -O3 -Qstrip_reflect -Qstrip_debug -Qstrip_rootsignature -Fh grid_sample_uint16_double.h
-    dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=uint -DTBUFFER2=double -enable-16bit-types -O3 -Qstrip_reflect -Qstrip_debug -Qstrip_rootsignature -Fh grid_sample_uint_double.h
-    dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=uint64_t -DTBUFFER2=double -enable-16bit-types -O3 -Qstrip_reflect -Qstrip_debug -Qstrip_rootsignature -Fh grid_sample_uint64_double.h
-    dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=int16_t -DTBUFFER2=double -enable-16bit-types -O3 -Qstrip_reflect -Qstrip_debug -Qstrip_rootsignature -Fh grid_sample_int16_double.h
-    dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=int -DTBUFFER2=double -enable-16bit-types -O3 -Qstrip_reflect -Qstrip_debug -Qstrip_rootsignature -Fh grid_sample_int_double.h
-    dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=int64_t -DTBUFFER2=double -enable-16bit-types -O3 -Qstrip_reflect -Qstrip_debug -Qstrip_rootsignature -Fh grid_sample_int64_double.h
-    dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=float16_t -DTBUFFER2=double -enable-16bit-types -O3 -Qstrip_reflect -Qstrip_debug -Qstrip_rootsignature -Fh grid_sample_fp16_double.h
-    dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=float -DTBUFFER2=double -enable-16bit-types -O3 -Qstrip_reflect -Qstrip_debug -Qstrip_rootsignature -Fh grid_sample_float_double.h
-    dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=double -DTBUFFER2=double -enable-16bit-types -O3 -Qstrip_reflect -Qstrip_debug -Qstrip_rootsignature -Fh grid_sample_double_double.h
-    dxc.exe ..\Shaders\grid_sample.hlsl -E GridSample -T cs_6_2 -DTBUFFER1=bool -DTBUFFER2=double -enable-16bit-types -O3 -Qstrip_reflect -Qstrip_debug -Qstrip_rootsignature -Fh grid_sample_bool_double.h
 
 )
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_bool_double.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_bool_double.h
deleted file mode 100644
index 6d83865ecb7a2..0000000000000
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_bool_double.h
+++ /dev/null
@@ -1,6398 +0,0 @@
-#if 0
-;
-; Note: shader requires additional functionality:
-;       Double-precision floating point
-;
-;
-; Input signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no parameters
-;
-; Output signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no parameters
-; shader hash: eff86a5dd3f8ca652b3700c52570e535
-;
-; Pipeline Runtime Information: 
-;
-;
-;
-; Buffer Definitions:
-;
-; cbuffer 
-; {
-;
-;   [116 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [4 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [8 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [4 x i8] (type annotation not present)
-;
-; }
-;
-;
-; Resource Bindings:
-;
-; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-;                                   cbuffer      NA          NA     CB0            cb0     1
-;                                       UAV  struct         r/w      U0             u0     1
-;                                       UAV  struct         r/w      U1             u1     1
-;                                       UAV  struct         r/w      U2             u2     1
-;
-target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
-target triple = "dxil-ms-dx"
-
-%dx.types.Handle = type { i8* }
-%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
-%dx.types.ResRet.i32 = type { i32, i32, i32, i32, i32 }
-%"class.RWStructuredBuffer<bool>" = type { i32 }
-%"class.RWStructuredBuffer<double>" = type { double }
-%Constants = type { i32, i32, i32, i32, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32 }
-
-define void @GridSample() {
-  %1 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 2, i32 2, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %2 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 1, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %3 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %4 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %5 = call i32 @dx.op.threadId.i32(i32 93, i32 0)  ; ThreadId(component)
-  %6 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
-  %7 = extractvalue %dx.types.CBufRet.i32 %6, 0
-  %8 = add i32 %7, %5
-  %9 = extractvalue %dx.types.CBufRet.i32 %6, 1
-  %10 = icmp ult i32 %8, %9
-  br i1 %10, label %11, label %3389
-
-; <label>:11                                      ; preds = %0
-  %12 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
-  %13 = extractvalue %dx.types.CBufRet.i32 %12, 3
-  %14 = uitofp i32 %13 to float
-  %15 = extractvalue %dx.types.CBufRet.i32 %12, 2
-  %16 = uitofp i32 %15 to float
-  %17 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 7)  ; CBufferLoadLegacy(handle,regIndex)
-  %18 = extractvalue %dx.types.CBufRet.i32 %17, 0
-  %19 = icmp eq i32 %18, 0
-  %20 = select i1 %19, float -5.000000e-01, float 0.000000e+00
-  %21 = select i1 %19, float -5.000000e-01, float -1.000000e+00
-  %22 = fadd float %14, %21
-  %23 = select i1 %19, float -5.000000e-01, float -1.000000e+00
-  %24 = fadd float %16, %23
-  %25 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
-  %26 = extractvalue %dx.types.CBufRet.i32 %25, 1
-  %27 = extractvalue %dx.types.CBufRet.i32 %25, 2
-  %28 = extractvalue %dx.types.CBufRet.i32 %25, 3
-  %29 = mul i32 %28, %27
-  %30 = mul i32 %27, %26
-  %31 = mul i32 %30, %28
-  %32 = udiv i32 %8, %31
-  %33 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 6)  ; CBufferLoadLegacy(handle,regIndex)
-  %34 = extractvalue %dx.types.CBufRet.i32 %33, 0
-  %35 = mul i32 %34, %32
-  %36 = sub i32 %8, %35
-  %37 = udiv i32 %36, %29
-  %38 = extractvalue %dx.types.CBufRet.i32 %33, 1
-  %39 = mul i32 %38, %37
-  %40 = sub i32 %36, %39
-  %41 = udiv i32 %40, %28
-  %42 = extractvalue %dx.types.CBufRet.i32 %33, 2
-  %43 = mul i32 %42, %41
-  %44 = sub i32 %40, %43
-  %45 = uitofp i32 %32 to float
-  %46 = uitofp i32 %41 to float
-  %47 = uitofp i32 %44 to float
-  %48 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
-  %49 = extractvalue %dx.types.CBufRet.i32 %48, 0
-  %50 = extractvalue %dx.types.CBufRet.i32 %48, 1
-  %51 = extractvalue %dx.types.CBufRet.i32 %48, 2
-  %52 = extractvalue %dx.types.CBufRet.i32 %48, 3
-  %53 = uitofp i32 %49 to float
-  %54 = uitofp i32 %50 to float
-  %55 = uitofp i32 %51 to float
-  %56 = uitofp i32 %52 to float
-  %57 = call float @dx.op.dot4.f32(i32 56, float %45, float %46, float %47, float 0.000000e+00, float %53, float %54, float %55, float %56)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %58 = fadd fast float %56, %57
-  %59 = fptoui float %57 to i32
-  %60 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %2, i32 %59, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %61 = extractvalue %dx.types.ResRet.i32 %60, 0
-  %62 = extractvalue %dx.types.ResRet.i32 %60, 1
-  %63 = call double @dx.op.makeDouble.f64(i32 101, i32 %61, i32 %62)  ; MakeDouble(lo,hi)
-  %64 = fptrunc double %63 to float
-  %65 = fptoui float %58 to i32
-  %66 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %2, i32 %65, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %67 = extractvalue %dx.types.ResRet.i32 %66, 0
-  %68 = extractvalue %dx.types.ResRet.i32 %66, 1
-  %69 = call double @dx.op.makeDouble.f64(i32 101, i32 %67, i32 %68)  ; MakeDouble(lo,hi)
-  %70 = fptrunc double %69 to float
-  %71 = icmp eq i32 %18, 1
-  %72 = fadd fast float %64, 1.000000e+00
-  %73 = fadd fast float %70, 1.000000e+00
-  br i1 %71, label %74, label %81
-
-; <label>:74                                      ; preds = %11
-  %75 = fmul fast float %72, 5.000000e-01
-  %76 = fmul fast float %73, 5.000000e-01
-  %77 = fadd fast float %14, -1.000000e+00
-  %78 = fadd fast float %16, -1.000000e+00
-  %79 = fmul fast float %75, %77
-  %80 = fmul fast float %76, %78
-  br label %88
-
-; <label>:81                                      ; preds = %11
-  %82 = fmul fast float %14, %72
-  %83 = fmul fast float %73, %16
-  %84 = fadd fast float %82, -1.000000e+00
-  %85 = fadd fast float %83, -1.000000e+00
-  %86 = fmul fast float %84, 5.000000e-01
-  %87 = fmul fast float %85, 5.000000e-01
-  br label %88
-
-; <label>:88                                      ; preds = %81, %74
-  %89 = phi float [ %79, %74 ], [ %86, %81 ]
-  %90 = phi float [ %80, %74 ], [ %87, %81 ]
-  %91 = extractvalue %dx.types.CBufRet.i32 %6, 2
-  %92 = icmp eq i32 %91, 1
-  br i1 %92, label %93, label %96
-
-; <label>:93                                      ; preds = %88
-  %94 = call float @dx.op.unary.f32(i32 26, float %89)  ; Round_ne(value)
-  %95 = call float @dx.op.unary.f32(i32 26, float %90)  ; Round_ne(value)
-  br label %96
-
-; <label>:96                                      ; preds = %93, %88
-  %97 = phi float [ %94, %93 ], [ %89, %88 ]
-  %98 = phi float [ %95, %93 ], [ %90, %88 ]
-  %99 = fcmp fast olt float %97, %20
-  %100 = fcmp fast ogt float %97, %22
-  %101 = or i1 %99, %100
-  %102 = fcmp fast olt float %98, %20
-  %103 = or i1 %101, %102
-  %104 = fcmp fast ogt float %98, %24
-  %105 = or i1 %104, %103
-  br i1 %105, label %106, label %179
-
-; <label>:106                                     ; preds = %96
-  %107 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %108 = icmp eq i32 %107, 1
-  br i1 %108, label %109, label %118
-
-; <label>:109                                     ; preds = %106
-  %110 = add i32 %13, -1
-  %111 = uitofp i32 %110 to float
-  %112 = call float @dx.op.binary.f32(i32 35, float %97, float 0.000000e+00)  ; FMax(a,b)
-  %113 = call float @dx.op.binary.f32(i32 36, float %112, float %111)  ; FMin(a,b)
-  %114 = add i32 %15, -1
-  %115 = uitofp i32 %114 to float
-  %116 = call float @dx.op.binary.f32(i32 35, float %98, float 0.000000e+00)  ; FMax(a,b)
-  %117 = call float @dx.op.binary.f32(i32 36, float %116, float %115)  ; FMin(a,b)
-  br label %179
-
-; <label>:118                                     ; preds = %106
-  %119 = icmp eq i32 %107, 2
-  br i1 %119, label %120, label %179
-
-; <label>:120                                     ; preds = %118
-  %121 = fsub fast float %22, %20
-  br i1 %99, label %122, label %135
-
-; <label>:122                                     ; preds = %120
-  %123 = fsub fast float %20, %97
-  %124 = fdiv fast float %123, %121
-  %125 = fptoui float %124 to i32
-  %126 = uitofp i32 %125 to float
-  %127 = fmul fast float %126, %121
-  %128 = fsub fast float %123, %127
-  %129 = and i32 %125, 1
-  %130 = icmp eq i32 %129, 0
-  br i1 %130, label %131, label %133
-
-; <label>:131                                     ; preds = %122
-  %132 = fadd fast float %128, %20
-  br label %149
-
-; <label>:133                                     ; preds = %122
-  %134 = fsub fast float %22, %128
-  br label %149
-
-; <label>:135                                     ; preds = %120
-  br i1 %100, label %136, label %149
-
-; <label>:136                                     ; preds = %135
-  %137 = fsub fast float %97, %22
-  %138 = fdiv fast float %137, %121
-  %139 = fptoui float %138 to i32
-  %140 = uitofp i32 %139 to float
-  %141 = fmul fast float %140, %121
-  %142 = fsub fast float %137, %141
-  %143 = and i32 %139, 1
-  %144 = icmp eq i32 %143, 0
-  br i1 %144, label %145, label %147
-
-; <label>:145                                     ; preds = %136
-  %146 = fsub fast float %22, %142
-  br label %149
-
-; <label>:147                                     ; preds = %136
-  %148 = fadd fast float %142, %20
-  br label %149
-
-; <label>:149                                     ; preds = %147, %145, %135, %133, %131
-  %150 = phi float [ %132, %131 ], [ %134, %133 ], [ %146, %145 ], [ %148, %147 ], [ %97, %135 ]
-  %151 = fsub fast float %24, %20
-  br i1 %102, label %152, label %165
-
-; <label>:152                                     ; preds = %149
-  %153 = fsub fast float %20, %98
-  %154 = fdiv fast float %153, %151
-  %155 = fptoui float %154 to i32
-  %156 = uitofp i32 %155 to float
-  %157 = fmul fast float %156, %151
-  %158 = fsub fast float %153, %157
-  %159 = and i32 %155, 1
-  %160 = icmp eq i32 %159, 0
-  br i1 %160, label %161, label %163
-
-; <label>:161                                     ; preds = %152
-  %162 = fadd fast float %158, %20
-  br label %179
-
-; <label>:163                                     ; preds = %152
-  %164 = fsub fast float %24, %158
-  br label %179
-
-; <label>:165                                     ; preds = %149
-  br i1 %104, label %166, label %179
-
-; <label>:166                                     ; preds = %165
-  %167 = fsub fast float %98, %24
-  %168 = fdiv fast float %167, %151
-  %169 = fptoui float %168 to i32
-  %170 = uitofp i32 %169 to float
-  %171 = fmul fast float %170, %151
-  %172 = fsub fast float %167, %171
-  %173 = and i32 %169, 1
-  %174 = icmp eq i32 %173, 0
-  br i1 %174, label %175, label %177
-
-; <label>:175                                     ; preds = %166
-  %176 = fsub fast float %24, %172
-  br label %179
-
-; <label>:177                                     ; preds = %166
-  %178 = fadd fast float %172, %20
-  br label %179
-
-; <label>:179                                     ; preds = %177, %175, %165, %163, %161, %118, %109, %96
-  %180 = phi float [ %113, %109 ], [ %97, %118 ], [ %97, %96 ], [ %150, %177 ], [ %150, %175 ], [ %150, %165 ], [ %150, %163 ], [ %150, %161 ]
-  %181 = phi float [ %117, %109 ], [ %98, %118 ], [ %98, %96 ], [ %178, %177 ], [ %176, %175 ], [ %98, %165 ], [ %164, %163 ], [ %162, %161 ]
-  %182 = uitofp i32 %37 to float
-  br i1 %92, label %183, label %336
-
-; <label>:183                                     ; preds = %179
-  %184 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %185 = icmp eq i32 %184, 0
-  br i1 %185, label %186, label %212
-
-; <label>:186                                     ; preds = %183
-  %187 = fcmp fast oge float %180, 0.000000e+00
-  %188 = fptoui float %180 to i32
-  %189 = icmp ult i32 %188, %13
-  %190 = and i1 %187, %189
-  %191 = fcmp fast oge float %181, 0.000000e+00
-  %192 = and i1 %191, %190
-  %193 = fptoui float %181 to i32
-  %194 = icmp ult i32 %193, %15
-  %195 = and i1 %194, %192
-  br i1 %195, label %196, label %332
-
-; <label>:196                                     ; preds = %186
-  %197 = fptoui float %45 to i32
-  %198 = fptoui float %182 to i32
-  %199 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %200 = extractvalue %dx.types.CBufRet.i32 %199, 0
-  %201 = extractvalue %dx.types.CBufRet.i32 %199, 1
-  %202 = extractvalue %dx.types.CBufRet.i32 %199, 2
-  %203 = extractvalue %dx.types.CBufRet.i32 %199, 3
-  %204 = mul i32 %200, %197
-  %205 = call i32 @dx.op.tertiary.i32(i32 48, i32 %198, i32 %201, i32 %204)  ; IMad(a,b,c)
-  %206 = call i32 @dx.op.tertiary.i32(i32 48, i32 %193, i32 %202, i32 %205)  ; IMad(a,b,c)
-  %207 = call i32 @dx.op.tertiary.i32(i32 48, i32 %188, i32 %203, i32 %206)  ; IMad(a,b,c)
-  %208 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %207, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %209 = extractvalue %dx.types.ResRet.i32 %208, 0
-  %210 = icmp ne i32 %209, 0
-  %211 = uitofp i1 %210 to float
-  br label %332
-
-; <label>:212                                     ; preds = %183
-  %213 = icmp eq i32 %184, 1
-  br i1 %213, label %214, label %244
-
-; <label>:214                                     ; preds = %212
-  %215 = add i32 %13, -1
-  %216 = uitofp i32 %215 to float
-  %217 = call float @dx.op.binary.f32(i32 35, float %180, float 0.000000e+00)  ; FMax(a,b)
-  %218 = call float @dx.op.binary.f32(i32 36, float %217, float %216)  ; FMin(a,b)
-  %219 = fptoui float %218 to i32
-  %220 = add i32 %15, -1
-  %221 = uitofp i32 %220 to float
-  %222 = call float @dx.op.binary.f32(i32 35, float %181, float 0.000000e+00)  ; FMax(a,b)
-  %223 = call float @dx.op.binary.f32(i32 36, float %222, float %221)  ; FMin(a,b)
-  %224 = fptoui float %223 to i32
-  %225 = uitofp i32 %224 to float
-  %226 = uitofp i32 %219 to float
-  %227 = fptoui float %45 to i32
-  %228 = fptoui float %182 to i32
-  %229 = fptoui float %225 to i32
-  %230 = fptoui float %226 to i32
-  %231 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %232 = extractvalue %dx.types.CBufRet.i32 %231, 0
-  %233 = extractvalue %dx.types.CBufRet.i32 %231, 1
-  %234 = extractvalue %dx.types.CBufRet.i32 %231, 2
-  %235 = extractvalue %dx.types.CBufRet.i32 %231, 3
-  %236 = mul i32 %232, %227
-  %237 = call i32 @dx.op.tertiary.i32(i32 48, i32 %228, i32 %233, i32 %236)  ; IMad(a,b,c)
-  %238 = call i32 @dx.op.tertiary.i32(i32 48, i32 %229, i32 %234, i32 %237)  ; IMad(a,b,c)
-  %239 = call i32 @dx.op.tertiary.i32(i32 48, i32 %230, i32 %235, i32 %238)  ; IMad(a,b,c)
-  %240 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %239, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %241 = extractvalue %dx.types.ResRet.i32 %240, 0
-  %242 = icmp ne i32 %241, 0
-  %243 = uitofp i1 %242 to float
-  br label %332
-
-; <label>:244                                     ; preds = %212
-  %245 = icmp eq i32 %184, 2
-  br i1 %245, label %246, label %332
-
-; <label>:246                                     ; preds = %244
-  %247 = fsub fast float %22, %20
-  %248 = fcmp fast olt float %180, %20
-  br i1 %248, label %249, label %262
-
-; <label>:249                                     ; preds = %246
-  %250 = fsub fast float %20, %180
-  %251 = fdiv fast float %250, %247
-  %252 = fptoui float %251 to i32
-  %253 = uitofp i32 %252 to float
-  %254 = fmul fast float %253, %247
-  %255 = fsub fast float %250, %254
-  %256 = and i32 %252, 1
-  %257 = icmp eq i32 %256, 0
-  br i1 %257, label %258, label %260
-
-; <label>:258                                     ; preds = %249
-  %259 = fadd fast float %255, %20
-  br label %277
-
-; <label>:260                                     ; preds = %249
-  %261 = fsub fast float %22, %255
-  br label %277
-
-; <label>:262                                     ; preds = %246
-  %263 = fcmp fast ogt float %180, %22
-  br i1 %263, label %264, label %277
-
-; <label>:264                                     ; preds = %262
-  %265 = fsub fast float %180, %22
-  %266 = fdiv fast float %265, %247
-  %267 = fptoui float %266 to i32
-  %268 = uitofp i32 %267 to float
-  %269 = fmul fast float %268, %247
-  %270 = fsub fast float %265, %269
-  %271 = and i32 %267, 1
-  %272 = icmp eq i32 %271, 0
-  br i1 %272, label %273, label %275
-
-; <label>:273                                     ; preds = %264
-  %274 = fsub fast float %22, %270
-  br label %277
-
-; <label>:275                                     ; preds = %264
-  %276 = fadd fast float %270, %20
-  br label %277
-
-; <label>:277                                     ; preds = %275, %273, %262, %260, %258
-  %278 = phi float [ %259, %258 ], [ %261, %260 ], [ %274, %273 ], [ %276, %275 ], [ %180, %262 ]
-  %279 = fptoui float %278 to i32
-  %280 = fsub fast float %24, %20
-  %281 = fcmp fast olt float %181, %20
-  br i1 %281, label %282, label %295
-
-; <label>:282                                     ; preds = %277
-  %283 = fsub fast float %20, %181
-  %284 = fdiv fast float %283, %280
-  %285 = fptoui float %284 to i32
-  %286 = uitofp i32 %285 to float
-  %287 = fmul fast float %286, %280
-  %288 = fsub fast float %283, %287
-  %289 = and i32 %285, 1
-  %290 = icmp eq i32 %289, 0
-  br i1 %290, label %291, label %293
-
-; <label>:291                                     ; preds = %282
-  %292 = fadd fast float %288, %20
-  br label %310
-
-; <label>:293                                     ; preds = %282
-  %294 = fsub fast float %24, %288
-  br label %310
-
-; <label>:295                                     ; preds = %277
-  %296 = fcmp fast ogt float %181, %24
-  br i1 %296, label %297, label %310
-
-; <label>:297                                     ; preds = %295
-  %298 = fsub fast float %181, %24
-  %299 = fdiv fast float %298, %280
-  %300 = fptoui float %299 to i32
-  %301 = uitofp i32 %300 to float
-  %302 = fmul fast float %301, %280
-  %303 = fsub fast float %298, %302
-  %304 = and i32 %300, 1
-  %305 = icmp eq i32 %304, 0
-  br i1 %305, label %306, label %308
-
-; <label>:306                                     ; preds = %297
-  %307 = fsub fast float %24, %303
-  br label %310
-
-; <label>:308                                     ; preds = %297
-  %309 = fadd fast float %303, %20
-  br label %310
-
-; <label>:310                                     ; preds = %308, %306, %295, %293, %291
-  %311 = phi float [ %292, %291 ], [ %294, %293 ], [ %307, %306 ], [ %309, %308 ], [ %181, %295 ]
-  %312 = fptoui float %311 to i32
-  %313 = uitofp i32 %312 to float
-  %314 = uitofp i32 %279 to float
-  %315 = fptoui float %45 to i32
-  %316 = fptoui float %182 to i32
-  %317 = fptoui float %313 to i32
-  %318 = fptoui float %314 to i32
-  %319 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %320 = extractvalue %dx.types.CBufRet.i32 %319, 0
-  %321 = extractvalue %dx.types.CBufRet.i32 %319, 1
-  %322 = extractvalue %dx.types.CBufRet.i32 %319, 2
-  %323 = extractvalue %dx.types.CBufRet.i32 %319, 3
-  %324 = mul i32 %320, %315
-  %325 = call i32 @dx.op.tertiary.i32(i32 48, i32 %316, i32 %321, i32 %324)  ; IMad(a,b,c)
-  %326 = call i32 @dx.op.tertiary.i32(i32 48, i32 %317, i32 %322, i32 %325)  ; IMad(a,b,c)
-  %327 = call i32 @dx.op.tertiary.i32(i32 48, i32 %318, i32 %323, i32 %326)  ; IMad(a,b,c)
-  %328 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %327, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %329 = extractvalue %dx.types.ResRet.i32 %328, 0
-  %330 = icmp ne i32 %329, 0
-  %331 = uitofp i1 %330 to float
-  br label %332
-
-; <label>:332                                     ; preds = %310, %244, %214, %196, %186
-  %333 = phi float [ %211, %196 ], [ 0.000000e+00, %186 ], [ %243, %214 ], [ %331, %310 ], [ 0.000000e+00, %244 ]
-  %334 = fcmp fast une float %333, 0.000000e+00
-  %335 = zext i1 %334 to i32
-  call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i32 %335, i32 undef, i32 undef, i32 undef, i8 1, i32 4)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3389
-
-; <label>:336                                     ; preds = %179
-  %337 = icmp eq i32 %91, 0
-  br i1 %337, label %338, label %950
-
-; <label>:338                                     ; preds = %336
-  %339 = call float @dx.op.unary.f32(i32 27, float %180)  ; Round_ni(value)
-  %340 = call float @dx.op.unary.f32(i32 27, float %181)  ; Round_ni(value)
-  %341 = fadd fast float %339, 1.000000e+00
-  %342 = fadd fast float %340, 1.000000e+00
-  %343 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %344 = icmp eq i32 %343, 0
-  br i1 %344, label %345, label %371
-
-; <label>:345                                     ; preds = %338
-  %346 = fcmp fast oge float %339, 0.000000e+00
-  %347 = fptoui float %339 to i32
-  %348 = icmp ult i32 %347, %13
-  %349 = and i1 %346, %348
-  %350 = fcmp fast oge float %340, 0.000000e+00
-  %351 = and i1 %350, %349
-  %352 = fptoui float %340 to i32
-  %353 = icmp ult i32 %352, %15
-  %354 = and i1 %353, %351
-  br i1 %354, label %355, label %491
-
-; <label>:355                                     ; preds = %345
-  %356 = fptoui float %45 to i32
-  %357 = fptoui float %182 to i32
-  %358 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %359 = extractvalue %dx.types.CBufRet.i32 %358, 0
-  %360 = extractvalue %dx.types.CBufRet.i32 %358, 1
-  %361 = extractvalue %dx.types.CBufRet.i32 %358, 2
-  %362 = extractvalue %dx.types.CBufRet.i32 %358, 3
-  %363 = mul i32 %359, %356
-  %364 = call i32 @dx.op.tertiary.i32(i32 48, i32 %357, i32 %360, i32 %363)  ; IMad(a,b,c)
-  %365 = call i32 @dx.op.tertiary.i32(i32 48, i32 %352, i32 %361, i32 %364)  ; IMad(a,b,c)
-  %366 = call i32 @dx.op.tertiary.i32(i32 48, i32 %347, i32 %362, i32 %365)  ; IMad(a,b,c)
-  %367 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %366, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %368 = extractvalue %dx.types.ResRet.i32 %367, 0
-  %369 = icmp ne i32 %368, 0
-  %370 = uitofp i1 %369 to float
-  br label %491
-
-; <label>:371                                     ; preds = %338
-  %372 = icmp eq i32 %343, 1
-  br i1 %372, label %373, label %403
-
-; <label>:373                                     ; preds = %371
-  %374 = add i32 %13, -1
-  %375 = uitofp i32 %374 to float
-  %376 = call float @dx.op.binary.f32(i32 35, float %339, float 0.000000e+00)  ; FMax(a,b)
-  %377 = call float @dx.op.binary.f32(i32 36, float %376, float %375)  ; FMin(a,b)
-  %378 = fptoui float %377 to i32
-  %379 = add i32 %15, -1
-  %380 = uitofp i32 %379 to float
-  %381 = call float @dx.op.binary.f32(i32 35, float %340, float 0.000000e+00)  ; FMax(a,b)
-  %382 = call float @dx.op.binary.f32(i32 36, float %381, float %380)  ; FMin(a,b)
-  %383 = fptoui float %382 to i32
-  %384 = uitofp i32 %383 to float
-  %385 = uitofp i32 %378 to float
-  %386 = fptoui float %45 to i32
-  %387 = fptoui float %182 to i32
-  %388 = fptoui float %384 to i32
-  %389 = fptoui float %385 to i32
-  %390 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %391 = extractvalue %dx.types.CBufRet.i32 %390, 0
-  %392 = extractvalue %dx.types.CBufRet.i32 %390, 1
-  %393 = extractvalue %dx.types.CBufRet.i32 %390, 2
-  %394 = extractvalue %dx.types.CBufRet.i32 %390, 3
-  %395 = mul i32 %391, %386
-  %396 = call i32 @dx.op.tertiary.i32(i32 48, i32 %387, i32 %392, i32 %395)  ; IMad(a,b,c)
-  %397 = call i32 @dx.op.tertiary.i32(i32 48, i32 %388, i32 %393, i32 %396)  ; IMad(a,b,c)
-  %398 = call i32 @dx.op.tertiary.i32(i32 48, i32 %389, i32 %394, i32 %397)  ; IMad(a,b,c)
-  %399 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %398, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %400 = extractvalue %dx.types.ResRet.i32 %399, 0
-  %401 = icmp ne i32 %400, 0
-  %402 = uitofp i1 %401 to float
-  br label %491
-
-; <label>:403                                     ; preds = %371
-  %404 = icmp eq i32 %343, 2
-  br i1 %404, label %405, label %491
-
-; <label>:405                                     ; preds = %403
-  %406 = fsub fast float %22, %20
-  %407 = fcmp fast olt float %339, %20
-  br i1 %407, label %408, label %421
-
-; <label>:408                                     ; preds = %405
-  %409 = fsub fast float %20, %339
-  %410 = fdiv fast float %409, %406
-  %411 = fptoui float %410 to i32
-  %412 = uitofp i32 %411 to float
-  %413 = fmul fast float %412, %406
-  %414 = fsub fast float %409, %413
-  %415 = and i32 %411, 1
-  %416 = icmp eq i32 %415, 0
-  br i1 %416, label %417, label %419
-
-; <label>:417                                     ; preds = %408
-  %418 = fadd fast float %414, %20
-  br label %436
-
-; <label>:419                                     ; preds = %408
-  %420 = fsub fast float %22, %414
-  br label %436
-
-; <label>:421                                     ; preds = %405
-  %422 = fcmp fast ogt float %339, %22
-  br i1 %422, label %423, label %436
-
-; <label>:423                                     ; preds = %421
-  %424 = fsub fast float %339, %22
-  %425 = fdiv fast float %424, %406
-  %426 = fptoui float %425 to i32
-  %427 = uitofp i32 %426 to float
-  %428 = fmul fast float %427, %406
-  %429 = fsub fast float %424, %428
-  %430 = and i32 %426, 1
-  %431 = icmp eq i32 %430, 0
-  br i1 %431, label %432, label %434
-
-; <label>:432                                     ; preds = %423
-  %433 = fsub fast float %22, %429
-  br label %436
-
-; <label>:434                                     ; preds = %423
-  %435 = fadd fast float %429, %20
-  br label %436
-
-; <label>:436                                     ; preds = %434, %432, %421, %419, %417
-  %437 = phi float [ %418, %417 ], [ %420, %419 ], [ %433, %432 ], [ %435, %434 ], [ %339, %421 ]
-  %438 = fptoui float %437 to i32
-  %439 = fsub fast float %24, %20
-  %440 = fcmp fast olt float %340, %20
-  br i1 %440, label %441, label %454
-
-; <label>:441                                     ; preds = %436
-  %442 = fsub fast float %20, %340
-  %443 = fdiv fast float %442, %439
-  %444 = fptoui float %443 to i32
-  %445 = uitofp i32 %444 to float
-  %446 = fmul fast float %445, %439
-  %447 = fsub fast float %442, %446
-  %448 = and i32 %444, 1
-  %449 = icmp eq i32 %448, 0
-  br i1 %449, label %450, label %452
-
-; <label>:450                                     ; preds = %441
-  %451 = fadd fast float %447, %20
-  br label %469
-
-; <label>:452                                     ; preds = %441
-  %453 = fsub fast float %24, %447
-  br label %469
-
-; <label>:454                                     ; preds = %436
-  %455 = fcmp fast ogt float %340, %24
-  br i1 %455, label %456, label %469
-
-; <label>:456                                     ; preds = %454
-  %457 = fsub fast float %340, %24
-  %458 = fdiv fast float %457, %439
-  %459 = fptoui float %458 to i32
-  %460 = uitofp i32 %459 to float
-  %461 = fmul fast float %460, %439
-  %462 = fsub fast float %457, %461
-  %463 = and i32 %459, 1
-  %464 = icmp eq i32 %463, 0
-  br i1 %464, label %465, label %467
-
-; <label>:465                                     ; preds = %456
-  %466 = fsub fast float %24, %462
-  br label %469
-
-; <label>:467                                     ; preds = %456
-  %468 = fadd fast float %462, %20
-  br label %469
-
-; <label>:469                                     ; preds = %467, %465, %454, %452, %450
-  %470 = phi float [ %451, %450 ], [ %453, %452 ], [ %466, %465 ], [ %468, %467 ], [ %340, %454 ]
-  %471 = fptoui float %470 to i32
-  %472 = uitofp i32 %471 to float
-  %473 = uitofp i32 %438 to float
-  %474 = fptoui float %45 to i32
-  %475 = fptoui float %182 to i32
-  %476 = fptoui float %472 to i32
-  %477 = fptoui float %473 to i32
-  %478 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %479 = extractvalue %dx.types.CBufRet.i32 %478, 0
-  %480 = extractvalue %dx.types.CBufRet.i32 %478, 1
-  %481 = extractvalue %dx.types.CBufRet.i32 %478, 2
-  %482 = extractvalue %dx.types.CBufRet.i32 %478, 3
-  %483 = mul i32 %479, %474
-  %484 = call i32 @dx.op.tertiary.i32(i32 48, i32 %475, i32 %480, i32 %483)  ; IMad(a,b,c)
-  %485 = call i32 @dx.op.tertiary.i32(i32 48, i32 %476, i32 %481, i32 %484)  ; IMad(a,b,c)
-  %486 = call i32 @dx.op.tertiary.i32(i32 48, i32 %477, i32 %482, i32 %485)  ; IMad(a,b,c)
-  %487 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %486, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %488 = extractvalue %dx.types.ResRet.i32 %487, 0
-  %489 = icmp ne i32 %488, 0
-  %490 = uitofp i1 %489 to float
-  br label %491
-
-; <label>:491                                     ; preds = %469, %403, %373, %355, %345
-  %492 = phi float [ %370, %355 ], [ 0.000000e+00, %345 ], [ %402, %373 ], [ %490, %469 ], [ 0.000000e+00, %403 ]
-  br i1 %344, label %493, label %519
-
-; <label>:493                                     ; preds = %491
-  %494 = fcmp fast oge float %341, 0.000000e+00
-  %495 = fptoui float %341 to i32
-  %496 = icmp ult i32 %495, %13
-  %497 = and i1 %494, %496
-  %498 = fcmp fast oge float %340, 0.000000e+00
-  %499 = and i1 %498, %497
-  %500 = fptoui float %340 to i32
-  %501 = icmp ult i32 %500, %15
-  %502 = and i1 %501, %499
-  br i1 %502, label %503, label %639
-
-; <label>:503                                     ; preds = %493
-  %504 = fptoui float %45 to i32
-  %505 = fptoui float %182 to i32
-  %506 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %507 = extractvalue %dx.types.CBufRet.i32 %506, 0
-  %508 = extractvalue %dx.types.CBufRet.i32 %506, 1
-  %509 = extractvalue %dx.types.CBufRet.i32 %506, 2
-  %510 = extractvalue %dx.types.CBufRet.i32 %506, 3
-  %511 = mul i32 %507, %504
-  %512 = call i32 @dx.op.tertiary.i32(i32 48, i32 %505, i32 %508, i32 %511)  ; IMad(a,b,c)
-  %513 = call i32 @dx.op.tertiary.i32(i32 48, i32 %500, i32 %509, i32 %512)  ; IMad(a,b,c)
-  %514 = call i32 @dx.op.tertiary.i32(i32 48, i32 %495, i32 %510, i32 %513)  ; IMad(a,b,c)
-  %515 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %514, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %516 = extractvalue %dx.types.ResRet.i32 %515, 0
-  %517 = icmp ne i32 %516, 0
-  %518 = uitofp i1 %517 to float
-  br label %639
-
-; <label>:519                                     ; preds = %491
-  %520 = icmp eq i32 %343, 1
-  br i1 %520, label %521, label %551
-
-; <label>:521                                     ; preds = %519
-  %522 = add i32 %13, -1
-  %523 = uitofp i32 %522 to float
-  %524 = call float @dx.op.binary.f32(i32 35, float %341, float 0.000000e+00)  ; FMax(a,b)
-  %525 = call float @dx.op.binary.f32(i32 36, float %524, float %523)  ; FMin(a,b)
-  %526 = fptoui float %525 to i32
-  %527 = add i32 %15, -1
-  %528 = uitofp i32 %527 to float
-  %529 = call float @dx.op.binary.f32(i32 35, float %340, float 0.000000e+00)  ; FMax(a,b)
-  %530 = call float @dx.op.binary.f32(i32 36, float %529, float %528)  ; FMin(a,b)
-  %531 = fptoui float %530 to i32
-  %532 = uitofp i32 %531 to float
-  %533 = uitofp i32 %526 to float
-  %534 = fptoui float %45 to i32
-  %535 = fptoui float %182 to i32
-  %536 = fptoui float %532 to i32
-  %537 = fptoui float %533 to i32
-  %538 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %539 = extractvalue %dx.types.CBufRet.i32 %538, 0
-  %540 = extractvalue %dx.types.CBufRet.i32 %538, 1
-  %541 = extractvalue %dx.types.CBufRet.i32 %538, 2
-  %542 = extractvalue %dx.types.CBufRet.i32 %538, 3
-  %543 = mul i32 %539, %534
-  %544 = call i32 @dx.op.tertiary.i32(i32 48, i32 %535, i32 %540, i32 %543)  ; IMad(a,b,c)
-  %545 = call i32 @dx.op.tertiary.i32(i32 48, i32 %536, i32 %541, i32 %544)  ; IMad(a,b,c)
-  %546 = call i32 @dx.op.tertiary.i32(i32 48, i32 %537, i32 %542, i32 %545)  ; IMad(a,b,c)
-  %547 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %546, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %548 = extractvalue %dx.types.ResRet.i32 %547, 0
-  %549 = icmp ne i32 %548, 0
-  %550 = uitofp i1 %549 to float
-  br label %639
-
-; <label>:551                                     ; preds = %519
-  %552 = icmp eq i32 %343, 2
-  br i1 %552, label %553, label %639
-
-; <label>:553                                     ; preds = %551
-  %554 = fsub fast float %22, %20
-  %555 = fcmp fast olt float %341, %20
-  br i1 %555, label %556, label %569
-
-; <label>:556                                     ; preds = %553
-  %557 = fsub fast float %20, %341
-  %558 = fdiv fast float %557, %554
-  %559 = fptoui float %558 to i32
-  %560 = uitofp i32 %559 to float
-  %561 = fmul fast float %560, %554
-  %562 = fsub fast float %557, %561
-  %563 = and i32 %559, 1
-  %564 = icmp eq i32 %563, 0
-  br i1 %564, label %565, label %567
-
-; <label>:565                                     ; preds = %556
-  %566 = fadd fast float %562, %20
-  br label %584
-
-; <label>:567                                     ; preds = %556
-  %568 = fsub fast float %22, %562
-  br label %584
-
-; <label>:569                                     ; preds = %553
-  %570 = fcmp fast ogt float %341, %22
-  br i1 %570, label %571, label %584
-
-; <label>:571                                     ; preds = %569
-  %572 = fsub fast float %341, %22
-  %573 = fdiv fast float %572, %554
-  %574 = fptoui float %573 to i32
-  %575 = uitofp i32 %574 to float
-  %576 = fmul fast float %575, %554
-  %577 = fsub fast float %572, %576
-  %578 = and i32 %574, 1
-  %579 = icmp eq i32 %578, 0
-  br i1 %579, label %580, label %582
-
-; <label>:580                                     ; preds = %571
-  %581 = fsub fast float %22, %577
-  br label %584
-
-; <label>:582                                     ; preds = %571
-  %583 = fadd fast float %577, %20
-  br label %584
-
-; <label>:584                                     ; preds = %582, %580, %569, %567, %565
-  %585 = phi float [ %566, %565 ], [ %568, %567 ], [ %581, %580 ], [ %583, %582 ], [ %341, %569 ]
-  %586 = fptoui float %585 to i32
-  %587 = fsub fast float %24, %20
-  %588 = fcmp fast olt float %340, %20
-  br i1 %588, label %589, label %602
-
-; <label>:589                                     ; preds = %584
-  %590 = fsub fast float %20, %340
-  %591 = fdiv fast float %590, %587
-  %592 = fptoui float %591 to i32
-  %593 = uitofp i32 %592 to float
-  %594 = fmul fast float %593, %587
-  %595 = fsub fast float %590, %594
-  %596 = and i32 %592, 1
-  %597 = icmp eq i32 %596, 0
-  br i1 %597, label %598, label %600
-
-; <label>:598                                     ; preds = %589
-  %599 = fadd fast float %595, %20
-  br label %617
-
-; <label>:600                                     ; preds = %589
-  %601 = fsub fast float %24, %595
-  br label %617
-
-; <label>:602                                     ; preds = %584
-  %603 = fcmp fast ogt float %340, %24
-  br i1 %603, label %604, label %617
-
-; <label>:604                                     ; preds = %602
-  %605 = fsub fast float %340, %24
-  %606 = fdiv fast float %605, %587
-  %607 = fptoui float %606 to i32
-  %608 = uitofp i32 %607 to float
-  %609 = fmul fast float %608, %587
-  %610 = fsub fast float %605, %609
-  %611 = and i32 %607, 1
-  %612 = icmp eq i32 %611, 0
-  br i1 %612, label %613, label %615
-
-; <label>:613                                     ; preds = %604
-  %614 = fsub fast float %24, %610
-  br label %617
-
-; <label>:615                                     ; preds = %604
-  %616 = fadd fast float %610, %20
-  br label %617
-
-; <label>:617                                     ; preds = %615, %613, %602, %600, %598
-  %618 = phi float [ %599, %598 ], [ %601, %600 ], [ %614, %613 ], [ %616, %615 ], [ %340, %602 ]
-  %619 = fptoui float %618 to i32
-  %620 = uitofp i32 %619 to float
-  %621 = uitofp i32 %586 to float
-  %622 = fptoui float %45 to i32
-  %623 = fptoui float %182 to i32
-  %624 = fptoui float %620 to i32
-  %625 = fptoui float %621 to i32
-  %626 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %627 = extractvalue %dx.types.CBufRet.i32 %626, 0
-  %628 = extractvalue %dx.types.CBufRet.i32 %626, 1
-  %629 = extractvalue %dx.types.CBufRet.i32 %626, 2
-  %630 = extractvalue %dx.types.CBufRet.i32 %626, 3
-  %631 = mul i32 %627, %622
-  %632 = call i32 @dx.op.tertiary.i32(i32 48, i32 %623, i32 %628, i32 %631)  ; IMad(a,b,c)
-  %633 = call i32 @dx.op.tertiary.i32(i32 48, i32 %624, i32 %629, i32 %632)  ; IMad(a,b,c)
-  %634 = call i32 @dx.op.tertiary.i32(i32 48, i32 %625, i32 %630, i32 %633)  ; IMad(a,b,c)
-  %635 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %634, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %636 = extractvalue %dx.types.ResRet.i32 %635, 0
-  %637 = icmp ne i32 %636, 0
-  %638 = uitofp i1 %637 to float
-  br label %639
-
-; <label>:639                                     ; preds = %617, %551, %521, %503, %493
-  %640 = phi float [ %518, %503 ], [ 0.000000e+00, %493 ], [ %550, %521 ], [ %638, %617 ], [ 0.000000e+00, %551 ]
-  br i1 %344, label %641, label %667
-
-; <label>:641                                     ; preds = %639
-  %642 = fcmp fast oge float %339, 0.000000e+00
-  %643 = fptoui float %339 to i32
-  %644 = icmp ult i32 %643, %13
-  %645 = and i1 %642, %644
-  %646 = fcmp fast oge float %342, 0.000000e+00
-  %647 = and i1 %646, %645
-  %648 = fptoui float %342 to i32
-  %649 = icmp ult i32 %648, %15
-  %650 = and i1 %649, %647
-  br i1 %650, label %651, label %787
-
-; <label>:651                                     ; preds = %641
-  %652 = fptoui float %45 to i32
-  %653 = fptoui float %182 to i32
-  %654 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %655 = extractvalue %dx.types.CBufRet.i32 %654, 0
-  %656 = extractvalue %dx.types.CBufRet.i32 %654, 1
-  %657 = extractvalue %dx.types.CBufRet.i32 %654, 2
-  %658 = extractvalue %dx.types.CBufRet.i32 %654, 3
-  %659 = mul i32 %655, %652
-  %660 = call i32 @dx.op.tertiary.i32(i32 48, i32 %653, i32 %656, i32 %659)  ; IMad(a,b,c)
-  %661 = call i32 @dx.op.tertiary.i32(i32 48, i32 %648, i32 %657, i32 %660)  ; IMad(a,b,c)
-  %662 = call i32 @dx.op.tertiary.i32(i32 48, i32 %643, i32 %658, i32 %661)  ; IMad(a,b,c)
-  %663 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %662, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %664 = extractvalue %dx.types.ResRet.i32 %663, 0
-  %665 = icmp ne i32 %664, 0
-  %666 = uitofp i1 %665 to float
-  br label %787
-
-; <label>:667                                     ; preds = %639
-  %668 = icmp eq i32 %343, 1
-  br i1 %668, label %669, label %699
-
-; <label>:669                                     ; preds = %667
-  %670 = add i32 %13, -1
-  %671 = uitofp i32 %670 to float
-  %672 = call float @dx.op.binary.f32(i32 35, float %339, float 0.000000e+00)  ; FMax(a,b)
-  %673 = call float @dx.op.binary.f32(i32 36, float %672, float %671)  ; FMin(a,b)
-  %674 = fptoui float %673 to i32
-  %675 = add i32 %15, -1
-  %676 = uitofp i32 %675 to float
-  %677 = call float @dx.op.binary.f32(i32 35, float %342, float 0.000000e+00)  ; FMax(a,b)
-  %678 = call float @dx.op.binary.f32(i32 36, float %677, float %676)  ; FMin(a,b)
-  %679 = fptoui float %678 to i32
-  %680 = uitofp i32 %679 to float
-  %681 = uitofp i32 %674 to float
-  %682 = fptoui float %45 to i32
-  %683 = fptoui float %182 to i32
-  %684 = fptoui float %680 to i32
-  %685 = fptoui float %681 to i32
-  %686 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %687 = extractvalue %dx.types.CBufRet.i32 %686, 0
-  %688 = extractvalue %dx.types.CBufRet.i32 %686, 1
-  %689 = extractvalue %dx.types.CBufRet.i32 %686, 2
-  %690 = extractvalue %dx.types.CBufRet.i32 %686, 3
-  %691 = mul i32 %687, %682
-  %692 = call i32 @dx.op.tertiary.i32(i32 48, i32 %683, i32 %688, i32 %691)  ; IMad(a,b,c)
-  %693 = call i32 @dx.op.tertiary.i32(i32 48, i32 %684, i32 %689, i32 %692)  ; IMad(a,b,c)
-  %694 = call i32 @dx.op.tertiary.i32(i32 48, i32 %685, i32 %690, i32 %693)  ; IMad(a,b,c)
-  %695 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %694, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %696 = extractvalue %dx.types.ResRet.i32 %695, 0
-  %697 = icmp ne i32 %696, 0
-  %698 = uitofp i1 %697 to float
-  br label %787
-
-; <label>:699                                     ; preds = %667
-  %700 = icmp eq i32 %343, 2
-  br i1 %700, label %701, label %787
-
-; <label>:701                                     ; preds = %699
-  %702 = fsub fast float %22, %20
-  %703 = fcmp fast olt float %339, %20
-  br i1 %703, label %704, label %717
-
-; <label>:704                                     ; preds = %701
-  %705 = fsub fast float %20, %339
-  %706 = fdiv fast float %705, %702
-  %707 = fptoui float %706 to i32
-  %708 = uitofp i32 %707 to float
-  %709 = fmul fast float %708, %702
-  %710 = fsub fast float %705, %709
-  %711 = and i32 %707, 1
-  %712 = icmp eq i32 %711, 0
-  br i1 %712, label %713, label %715
-
-; <label>:713                                     ; preds = %704
-  %714 = fadd fast float %710, %20
-  br label %732
-
-; <label>:715                                     ; preds = %704
-  %716 = fsub fast float %22, %710
-  br label %732
-
-; <label>:717                                     ; preds = %701
-  %718 = fcmp fast ogt float %339, %22
-  br i1 %718, label %719, label %732
-
-; <label>:719                                     ; preds = %717
-  %720 = fsub fast float %339, %22
-  %721 = fdiv fast float %720, %702
-  %722 = fptoui float %721 to i32
-  %723 = uitofp i32 %722 to float
-  %724 = fmul fast float %723, %702
-  %725 = fsub fast float %720, %724
-  %726 = and i32 %722, 1
-  %727 = icmp eq i32 %726, 0
-  br i1 %727, label %728, label %730
-
-; <label>:728                                     ; preds = %719
-  %729 = fsub fast float %22, %725
-  br label %732
-
-; <label>:730                                     ; preds = %719
-  %731 = fadd fast float %725, %20
-  br label %732
-
-; <label>:732                                     ; preds = %730, %728, %717, %715, %713
-  %733 = phi float [ %714, %713 ], [ %716, %715 ], [ %729, %728 ], [ %731, %730 ], [ %339, %717 ]
-  %734 = fptoui float %733 to i32
-  %735 = fsub fast float %24, %20
-  %736 = fcmp fast olt float %342, %20
-  br i1 %736, label %737, label %750
-
-; <label>:737                                     ; preds = %732
-  %738 = fsub fast float %20, %342
-  %739 = fdiv fast float %738, %735
-  %740 = fptoui float %739 to i32
-  %741 = uitofp i32 %740 to float
-  %742 = fmul fast float %741, %735
-  %743 = fsub fast float %738, %742
-  %744 = and i32 %740, 1
-  %745 = icmp eq i32 %744, 0
-  br i1 %745, label %746, label %748
-
-; <label>:746                                     ; preds = %737
-  %747 = fadd fast float %743, %20
-  br label %765
-
-; <label>:748                                     ; preds = %737
-  %749 = fsub fast float %24, %743
-  br label %765
-
-; <label>:750                                     ; preds = %732
-  %751 = fcmp fast ogt float %342, %24
-  br i1 %751, label %752, label %765
-
-; <label>:752                                     ; preds = %750
-  %753 = fsub fast float %342, %24
-  %754 = fdiv fast float %753, %735
-  %755 = fptoui float %754 to i32
-  %756 = uitofp i32 %755 to float
-  %757 = fmul fast float %756, %735
-  %758 = fsub fast float %753, %757
-  %759 = and i32 %755, 1
-  %760 = icmp eq i32 %759, 0
-  br i1 %760, label %761, label %763
-
-; <label>:761                                     ; preds = %752
-  %762 = fsub fast float %24, %758
-  br label %765
-
-; <label>:763                                     ; preds = %752
-  %764 = fadd fast float %758, %20
-  br label %765
-
-; <label>:765                                     ; preds = %763, %761, %750, %748, %746
-  %766 = phi float [ %747, %746 ], [ %749, %748 ], [ %762, %761 ], [ %764, %763 ], [ %342, %750 ]
-  %767 = fptoui float %766 to i32
-  %768 = uitofp i32 %767 to float
-  %769 = uitofp i32 %734 to float
-  %770 = fptoui float %45 to i32
-  %771 = fptoui float %182 to i32
-  %772 = fptoui float %768 to i32
-  %773 = fptoui float %769 to i32
-  %774 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %775 = extractvalue %dx.types.CBufRet.i32 %774, 0
-  %776 = extractvalue %dx.types.CBufRet.i32 %774, 1
-  %777 = extractvalue %dx.types.CBufRet.i32 %774, 2
-  %778 = extractvalue %dx.types.CBufRet.i32 %774, 3
-  %779 = mul i32 %775, %770
-  %780 = call i32 @dx.op.tertiary.i32(i32 48, i32 %771, i32 %776, i32 %779)  ; IMad(a,b,c)
-  %781 = call i32 @dx.op.tertiary.i32(i32 48, i32 %772, i32 %777, i32 %780)  ; IMad(a,b,c)
-  %782 = call i32 @dx.op.tertiary.i32(i32 48, i32 %773, i32 %778, i32 %781)  ; IMad(a,b,c)
-  %783 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %782, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %784 = extractvalue %dx.types.ResRet.i32 %783, 0
-  %785 = icmp ne i32 %784, 0
-  %786 = uitofp i1 %785 to float
-  br label %787
-
-; <label>:787                                     ; preds = %765, %699, %669, %651, %641
-  %788 = phi float [ %666, %651 ], [ 0.000000e+00, %641 ], [ %698, %669 ], [ %786, %765 ], [ 0.000000e+00, %699 ]
-  br i1 %344, label %789, label %815
-
-; <label>:789                                     ; preds = %787
-  %790 = fcmp fast oge float %341, 0.000000e+00
-  %791 = fptoui float %341 to i32
-  %792 = icmp ult i32 %791, %13
-  %793 = and i1 %790, %792
-  %794 = fcmp fast oge float %342, 0.000000e+00
-  %795 = and i1 %794, %793
-  %796 = fptoui float %342 to i32
-  %797 = icmp ult i32 %796, %15
-  %798 = and i1 %797, %795
-  br i1 %798, label %799, label %935
-
-; <label>:799                                     ; preds = %789
-  %800 = fptoui float %45 to i32
-  %801 = fptoui float %182 to i32
-  %802 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %803 = extractvalue %dx.types.CBufRet.i32 %802, 0
-  %804 = extractvalue %dx.types.CBufRet.i32 %802, 1
-  %805 = extractvalue %dx.types.CBufRet.i32 %802, 2
-  %806 = extractvalue %dx.types.CBufRet.i32 %802, 3
-  %807 = mul i32 %803, %800
-  %808 = call i32 @dx.op.tertiary.i32(i32 48, i32 %801, i32 %804, i32 %807)  ; IMad(a,b,c)
-  %809 = call i32 @dx.op.tertiary.i32(i32 48, i32 %796, i32 %805, i32 %808)  ; IMad(a,b,c)
-  %810 = call i32 @dx.op.tertiary.i32(i32 48, i32 %791, i32 %806, i32 %809)  ; IMad(a,b,c)
-  %811 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %810, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %812 = extractvalue %dx.types.ResRet.i32 %811, 0
-  %813 = icmp ne i32 %812, 0
-  %814 = uitofp i1 %813 to float
-  br label %935
-
-; <label>:815                                     ; preds = %787
-  %816 = icmp eq i32 %343, 1
-  br i1 %816, label %817, label %847
-
-; <label>:817                                     ; preds = %815
-  %818 = add i32 %13, -1
-  %819 = uitofp i32 %818 to float
-  %820 = call float @dx.op.binary.f32(i32 35, float %341, float 0.000000e+00)  ; FMax(a,b)
-  %821 = call float @dx.op.binary.f32(i32 36, float %820, float %819)  ; FMin(a,b)
-  %822 = fptoui float %821 to i32
-  %823 = add i32 %15, -1
-  %824 = uitofp i32 %823 to float
-  %825 = call float @dx.op.binary.f32(i32 35, float %342, float 0.000000e+00)  ; FMax(a,b)
-  %826 = call float @dx.op.binary.f32(i32 36, float %825, float %824)  ; FMin(a,b)
-  %827 = fptoui float %826 to i32
-  %828 = uitofp i32 %827 to float
-  %829 = uitofp i32 %822 to float
-  %830 = fptoui float %45 to i32
-  %831 = fptoui float %182 to i32
-  %832 = fptoui float %828 to i32
-  %833 = fptoui float %829 to i32
-  %834 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %835 = extractvalue %dx.types.CBufRet.i32 %834, 0
-  %836 = extractvalue %dx.types.CBufRet.i32 %834, 1
-  %837 = extractvalue %dx.types.CBufRet.i32 %834, 2
-  %838 = extractvalue %dx.types.CBufRet.i32 %834, 3
-  %839 = mul i32 %835, %830
-  %840 = call i32 @dx.op.tertiary.i32(i32 48, i32 %831, i32 %836, i32 %839)  ; IMad(a,b,c)
-  %841 = call i32 @dx.op.tertiary.i32(i32 48, i32 %832, i32 %837, i32 %840)  ; IMad(a,b,c)
-  %842 = call i32 @dx.op.tertiary.i32(i32 48, i32 %833, i32 %838, i32 %841)  ; IMad(a,b,c)
-  %843 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %842, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %844 = extractvalue %dx.types.ResRet.i32 %843, 0
-  %845 = icmp ne i32 %844, 0
-  %846 = uitofp i1 %845 to float
-  br label %935
-
-; <label>:847                                     ; preds = %815
-  %848 = icmp eq i32 %343, 2
-  br i1 %848, label %849, label %935
-
-; <label>:849                                     ; preds = %847
-  %850 = fsub fast float %22, %20
-  %851 = fcmp fast olt float %341, %20
-  br i1 %851, label %852, label %865
-
-; <label>:852                                     ; preds = %849
-  %853 = fsub fast float %20, %341
-  %854 = fdiv fast float %853, %850
-  %855 = fptoui float %854 to i32
-  %856 = uitofp i32 %855 to float
-  %857 = fmul fast float %856, %850
-  %858 = fsub fast float %853, %857
-  %859 = and i32 %855, 1
-  %860 = icmp eq i32 %859, 0
-  br i1 %860, label %861, label %863
-
-; <label>:861                                     ; preds = %852
-  %862 = fadd fast float %858, %20
-  br label %880
-
-; <label>:863                                     ; preds = %852
-  %864 = fsub fast float %22, %858
-  br label %880
-
-; <label>:865                                     ; preds = %849
-  %866 = fcmp fast ogt float %341, %22
-  br i1 %866, label %867, label %880
-
-; <label>:867                                     ; preds = %865
-  %868 = fsub fast float %341, %22
-  %869 = fdiv fast float %868, %850
-  %870 = fptoui float %869 to i32
-  %871 = uitofp i32 %870 to float
-  %872 = fmul fast float %871, %850
-  %873 = fsub fast float %868, %872
-  %874 = and i32 %870, 1
-  %875 = icmp eq i32 %874, 0
-  br i1 %875, label %876, label %878
-
-; <label>:876                                     ; preds = %867
-  %877 = fsub fast float %22, %873
-  br label %880
-
-; <label>:878                                     ; preds = %867
-  %879 = fadd fast float %873, %20
-  br label %880
-
-; <label>:880                                     ; preds = %878, %876, %865, %863, %861
-  %881 = phi float [ %862, %861 ], [ %864, %863 ], [ %877, %876 ], [ %879, %878 ], [ %341, %865 ]
-  %882 = fptoui float %881 to i32
-  %883 = fsub fast float %24, %20
-  %884 = fcmp fast olt float %342, %20
-  br i1 %884, label %885, label %898
-
-; <label>:885                                     ; preds = %880
-  %886 = fsub fast float %20, %342
-  %887 = fdiv fast float %886, %883
-  %888 = fptoui float %887 to i32
-  %889 = uitofp i32 %888 to float
-  %890 = fmul fast float %889, %883
-  %891 = fsub fast float %886, %890
-  %892 = and i32 %888, 1
-  %893 = icmp eq i32 %892, 0
-  br i1 %893, label %894, label %896
-
-; <label>:894                                     ; preds = %885
-  %895 = fadd fast float %891, %20
-  br label %913
-
-; <label>:896                                     ; preds = %885
-  %897 = fsub fast float %24, %891
-  br label %913
-
-; <label>:898                                     ; preds = %880
-  %899 = fcmp fast ogt float %342, %24
-  br i1 %899, label %900, label %913
-
-; <label>:900                                     ; preds = %898
-  %901 = fsub fast float %342, %24
-  %902 = fdiv fast float %901, %883
-  %903 = fptoui float %902 to i32
-  %904 = uitofp i32 %903 to float
-  %905 = fmul fast float %904, %883
-  %906 = fsub fast float %901, %905
-  %907 = and i32 %903, 1
-  %908 = icmp eq i32 %907, 0
-  br i1 %908, label %909, label %911
-
-; <label>:909                                     ; preds = %900
-  %910 = fsub fast float %24, %906
-  br label %913
-
-; <label>:911                                     ; preds = %900
-  %912 = fadd fast float %906, %20
-  br label %913
-
-; <label>:913                                     ; preds = %911, %909, %898, %896, %894
-  %914 = phi float [ %895, %894 ], [ %897, %896 ], [ %910, %909 ], [ %912, %911 ], [ %342, %898 ]
-  %915 = fptoui float %914 to i32
-  %916 = uitofp i32 %915 to float
-  %917 = uitofp i32 %882 to float
-  %918 = fptoui float %45 to i32
-  %919 = fptoui float %182 to i32
-  %920 = fptoui float %916 to i32
-  %921 = fptoui float %917 to i32
-  %922 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %923 = extractvalue %dx.types.CBufRet.i32 %922, 0
-  %924 = extractvalue %dx.types.CBufRet.i32 %922, 1
-  %925 = extractvalue %dx.types.CBufRet.i32 %922, 2
-  %926 = extractvalue %dx.types.CBufRet.i32 %922, 3
-  %927 = mul i32 %923, %918
-  %928 = call i32 @dx.op.tertiary.i32(i32 48, i32 %919, i32 %924, i32 %927)  ; IMad(a,b,c)
-  %929 = call i32 @dx.op.tertiary.i32(i32 48, i32 %920, i32 %925, i32 %928)  ; IMad(a,b,c)
-  %930 = call i32 @dx.op.tertiary.i32(i32 48, i32 %921, i32 %926, i32 %929)  ; IMad(a,b,c)
-  %931 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %930, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %932 = extractvalue %dx.types.ResRet.i32 %931, 0
-  %933 = icmp ne i32 %932, 0
-  %934 = uitofp i1 %933 to float
-  br label %935
-
-; <label>:935                                     ; preds = %913, %847, %817, %799, %789
-  %936 = phi float [ %814, %799 ], [ 0.000000e+00, %789 ], [ %846, %817 ], [ %934, %913 ], [ 0.000000e+00, %847 ]
-  %937 = call float @dx.op.unary.f32(i32 22, float %180)  ; Frc(value)
-  %938 = fsub fast float %640, %492
-  %939 = fmul fast float %937, %938
-  %940 = fadd fast float %939, %492
-  %941 = fsub fast float %936, %788
-  %942 = fmul fast float %937, %941
-  %943 = fadd fast float %942, %788
-  %944 = call float @dx.op.unary.f32(i32 22, float %181)  ; Frc(value)
-  %945 = fsub fast float %943, %940
-  %946 = fmul fast float %945, %944
-  %947 = fadd fast float %946, %940
-  %948 = fcmp fast une float %947, 0.000000e+00
-  %949 = zext i1 %948 to i32
-  call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i32 %949, i32 undef, i32 undef, i32 undef, i8 1, i32 4)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3389
-
-; <label>:950                                     ; preds = %336
-  %951 = icmp eq i32 %91, 2
-  br i1 %951, label %952, label %3389
-
-; <label>:952                                     ; preds = %950
-  %953 = call float @dx.op.unary.f32(i32 27, float %180)  ; Round_ni(value)
-  %954 = fadd fast float %953, -1.000000e+00
-  %955 = call float @dx.op.unary.f32(i32 27, float %181)  ; Round_ni(value)
-  %956 = fadd fast float %955, -1.000000e+00
-  %957 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %958 = icmp eq i32 %957, 0
-  br i1 %958, label %959, label %985
-
-; <label>:959                                     ; preds = %952
-  %960 = fcmp fast oge float %954, 0.000000e+00
-  %961 = fptoui float %954 to i32
-  %962 = icmp ult i32 %961, %13
-  %963 = and i1 %960, %962
-  %964 = fcmp fast oge float %956, 0.000000e+00
-  %965 = and i1 %964, %963
-  %966 = fptoui float %956 to i32
-  %967 = icmp ult i32 %966, %15
-  %968 = and i1 %967, %965
-  br i1 %968, label %969, label %1105
-
-; <label>:969                                     ; preds = %959
-  %970 = fptoui float %45 to i32
-  %971 = fptoui float %182 to i32
-  %972 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %973 = extractvalue %dx.types.CBufRet.i32 %972, 0
-  %974 = extractvalue %dx.types.CBufRet.i32 %972, 1
-  %975 = extractvalue %dx.types.CBufRet.i32 %972, 2
-  %976 = extractvalue %dx.types.CBufRet.i32 %972, 3
-  %977 = mul i32 %973, %970
-  %978 = call i32 @dx.op.tertiary.i32(i32 48, i32 %971, i32 %974, i32 %977)  ; IMad(a,b,c)
-  %979 = call i32 @dx.op.tertiary.i32(i32 48, i32 %966, i32 %975, i32 %978)  ; IMad(a,b,c)
-  %980 = call i32 @dx.op.tertiary.i32(i32 48, i32 %961, i32 %976, i32 %979)  ; IMad(a,b,c)
-  %981 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %980, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %982 = extractvalue %dx.types.ResRet.i32 %981, 0
-  %983 = icmp ne i32 %982, 0
-  %984 = uitofp i1 %983 to float
-  br label %1105
-
-; <label>:985                                     ; preds = %952
-  %986 = icmp eq i32 %957, 1
-  br i1 %986, label %987, label %1017
-
-; <label>:987                                     ; preds = %985
-  %988 = add i32 %13, -1
-  %989 = uitofp i32 %988 to float
-  %990 = call float @dx.op.binary.f32(i32 35, float %954, float 0.000000e+00)  ; FMax(a,b)
-  %991 = call float @dx.op.binary.f32(i32 36, float %990, float %989)  ; FMin(a,b)
-  %992 = fptoui float %991 to i32
-  %993 = add i32 %15, -1
-  %994 = uitofp i32 %993 to float
-  %995 = call float @dx.op.binary.f32(i32 35, float %956, float 0.000000e+00)  ; FMax(a,b)
-  %996 = call float @dx.op.binary.f32(i32 36, float %995, float %994)  ; FMin(a,b)
-  %997 = fptoui float %996 to i32
-  %998 = uitofp i32 %997 to float
-  %999 = uitofp i32 %992 to float
-  %1000 = fptoui float %45 to i32
-  %1001 = fptoui float %182 to i32
-  %1002 = fptoui float %998 to i32
-  %1003 = fptoui float %999 to i32
-  %1004 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1005 = extractvalue %dx.types.CBufRet.i32 %1004, 0
-  %1006 = extractvalue %dx.types.CBufRet.i32 %1004, 1
-  %1007 = extractvalue %dx.types.CBufRet.i32 %1004, 2
-  %1008 = extractvalue %dx.types.CBufRet.i32 %1004, 3
-  %1009 = mul i32 %1005, %1000
-  %1010 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1001, i32 %1006, i32 %1009)  ; IMad(a,b,c)
-  %1011 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1002, i32 %1007, i32 %1010)  ; IMad(a,b,c)
-  %1012 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1003, i32 %1008, i32 %1011)  ; IMad(a,b,c)
-  %1013 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1012, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1014 = extractvalue %dx.types.ResRet.i32 %1013, 0
-  %1015 = icmp ne i32 %1014, 0
-  %1016 = uitofp i1 %1015 to float
-  br label %1105
-
-; <label>:1017                                    ; preds = %985
-  %1018 = icmp eq i32 %957, 2
-  br i1 %1018, label %1019, label %1105
-
-; <label>:1019                                    ; preds = %1017
-  %1020 = fsub fast float %22, %20
-  %1021 = fcmp fast olt float %954, %20
-  br i1 %1021, label %1022, label %1035
-
-; <label>:1022                                    ; preds = %1019
-  %1023 = fsub fast float %20, %954
-  %1024 = fdiv fast float %1023, %1020
-  %1025 = fptoui float %1024 to i32
-  %1026 = uitofp i32 %1025 to float
-  %1027 = fmul fast float %1026, %1020
-  %1028 = fsub fast float %1023, %1027
-  %1029 = and i32 %1025, 1
-  %1030 = icmp eq i32 %1029, 0
-  br i1 %1030, label %1031, label %1033
-
-; <label>:1031                                    ; preds = %1022
-  %1032 = fadd fast float %1028, %20
-  br label %1050
-
-; <label>:1033                                    ; preds = %1022
-  %1034 = fsub fast float %22, %1028
-  br label %1050
-
-; <label>:1035                                    ; preds = %1019
-  %1036 = fcmp fast ogt float %954, %22
-  br i1 %1036, label %1037, label %1050
-
-; <label>:1037                                    ; preds = %1035
-  %1038 = fsub fast float %954, %22
-  %1039 = fdiv fast float %1038, %1020
-  %1040 = fptoui float %1039 to i32
-  %1041 = uitofp i32 %1040 to float
-  %1042 = fmul fast float %1041, %1020
-  %1043 = fsub fast float %1038, %1042
-  %1044 = and i32 %1040, 1
-  %1045 = icmp eq i32 %1044, 0
-  br i1 %1045, label %1046, label %1048
-
-; <label>:1046                                    ; preds = %1037
-  %1047 = fsub fast float %22, %1043
-  br label %1050
-
-; <label>:1048                                    ; preds = %1037
-  %1049 = fadd fast float %1043, %20
-  br label %1050
-
-; <label>:1050                                    ; preds = %1048, %1046, %1035, %1033, %1031
-  %1051 = phi float [ %1032, %1031 ], [ %1034, %1033 ], [ %1047, %1046 ], [ %1049, %1048 ], [ %954, %1035 ]
-  %1052 = fptoui float %1051 to i32
-  %1053 = fsub fast float %24, %20
-  %1054 = fcmp fast olt float %956, %20
-  br i1 %1054, label %1055, label %1068
-
-; <label>:1055                                    ; preds = %1050
-  %1056 = fsub fast float %20, %956
-  %1057 = fdiv fast float %1056, %1053
-  %1058 = fptoui float %1057 to i32
-  %1059 = uitofp i32 %1058 to float
-  %1060 = fmul fast float %1059, %1053
-  %1061 = fsub fast float %1056, %1060
-  %1062 = and i32 %1058, 1
-  %1063 = icmp eq i32 %1062, 0
-  br i1 %1063, label %1064, label %1066
-
-; <label>:1064                                    ; preds = %1055
-  %1065 = fadd fast float %1061, %20
-  br label %1083
-
-; <label>:1066                                    ; preds = %1055
-  %1067 = fsub fast float %24, %1061
-  br label %1083
-
-; <label>:1068                                    ; preds = %1050
-  %1069 = fcmp fast ogt float %956, %24
-  br i1 %1069, label %1070, label %1083
-
-; <label>:1070                                    ; preds = %1068
-  %1071 = fsub fast float %956, %24
-  %1072 = fdiv fast float %1071, %1053
-  %1073 = fptoui float %1072 to i32
-  %1074 = uitofp i32 %1073 to float
-  %1075 = fmul fast float %1074, %1053
-  %1076 = fsub fast float %1071, %1075
-  %1077 = and i32 %1073, 1
-  %1078 = icmp eq i32 %1077, 0
-  br i1 %1078, label %1079, label %1081
-
-; <label>:1079                                    ; preds = %1070
-  %1080 = fsub fast float %24, %1076
-  br label %1083
-
-; <label>:1081                                    ; preds = %1070
-  %1082 = fadd fast float %1076, %20
-  br label %1083
-
-; <label>:1083                                    ; preds = %1081, %1079, %1068, %1066, %1064
-  %1084 = phi float [ %1065, %1064 ], [ %1067, %1066 ], [ %1080, %1079 ], [ %1082, %1081 ], [ %956, %1068 ]
-  %1085 = fptoui float %1084 to i32
-  %1086 = uitofp i32 %1085 to float
-  %1087 = uitofp i32 %1052 to float
-  %1088 = fptoui float %45 to i32
-  %1089 = fptoui float %182 to i32
-  %1090 = fptoui float %1086 to i32
-  %1091 = fptoui float %1087 to i32
-  %1092 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1093 = extractvalue %dx.types.CBufRet.i32 %1092, 0
-  %1094 = extractvalue %dx.types.CBufRet.i32 %1092, 1
-  %1095 = extractvalue %dx.types.CBufRet.i32 %1092, 2
-  %1096 = extractvalue %dx.types.CBufRet.i32 %1092, 3
-  %1097 = mul i32 %1093, %1088
-  %1098 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1089, i32 %1094, i32 %1097)  ; IMad(a,b,c)
-  %1099 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1090, i32 %1095, i32 %1098)  ; IMad(a,b,c)
-  %1100 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1091, i32 %1096, i32 %1099)  ; IMad(a,b,c)
-  %1101 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1100, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1102 = extractvalue %dx.types.ResRet.i32 %1101, 0
-  %1103 = icmp ne i32 %1102, 0
-  %1104 = uitofp i1 %1103 to float
-  br label %1105
-
-; <label>:1105                                    ; preds = %1083, %1017, %987, %969, %959
-  %1106 = phi float [ %984, %969 ], [ 0.000000e+00, %959 ], [ %1016, %987 ], [ %1104, %1083 ], [ 0.000000e+00, %1017 ]
-  br i1 %958, label %1107, label %1133
-
-; <label>:1107                                    ; preds = %1105
-  %1108 = fcmp fast oge float %953, 0.000000e+00
-  %1109 = fptoui float %953 to i32
-  %1110 = icmp ult i32 %1109, %13
-  %1111 = and i1 %1108, %1110
-  %1112 = fcmp fast oge float %956, 0.000000e+00
-  %1113 = and i1 %1112, %1111
-  %1114 = fptoui float %956 to i32
-  %1115 = icmp ult i32 %1114, %15
-  %1116 = and i1 %1115, %1113
-  br i1 %1116, label %1117, label %1253
-
-; <label>:1117                                    ; preds = %1107
-  %1118 = fptoui float %45 to i32
-  %1119 = fptoui float %182 to i32
-  %1120 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1121 = extractvalue %dx.types.CBufRet.i32 %1120, 0
-  %1122 = extractvalue %dx.types.CBufRet.i32 %1120, 1
-  %1123 = extractvalue %dx.types.CBufRet.i32 %1120, 2
-  %1124 = extractvalue %dx.types.CBufRet.i32 %1120, 3
-  %1125 = mul i32 %1121, %1118
-  %1126 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1119, i32 %1122, i32 %1125)  ; IMad(a,b,c)
-  %1127 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1114, i32 %1123, i32 %1126)  ; IMad(a,b,c)
-  %1128 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1109, i32 %1124, i32 %1127)  ; IMad(a,b,c)
-  %1129 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1128, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1130 = extractvalue %dx.types.ResRet.i32 %1129, 0
-  %1131 = icmp ne i32 %1130, 0
-  %1132 = uitofp i1 %1131 to float
-  br label %1253
-
-; <label>:1133                                    ; preds = %1105
-  %1134 = icmp eq i32 %957, 1
-  br i1 %1134, label %1135, label %1165
-
-; <label>:1135                                    ; preds = %1133
-  %1136 = add i32 %13, -1
-  %1137 = uitofp i32 %1136 to float
-  %1138 = call float @dx.op.binary.f32(i32 35, float %953, float 0.000000e+00)  ; FMax(a,b)
-  %1139 = call float @dx.op.binary.f32(i32 36, float %1138, float %1137)  ; FMin(a,b)
-  %1140 = fptoui float %1139 to i32
-  %1141 = add i32 %15, -1
-  %1142 = uitofp i32 %1141 to float
-  %1143 = call float @dx.op.binary.f32(i32 35, float %956, float 0.000000e+00)  ; FMax(a,b)
-  %1144 = call float @dx.op.binary.f32(i32 36, float %1143, float %1142)  ; FMin(a,b)
-  %1145 = fptoui float %1144 to i32
-  %1146 = uitofp i32 %1145 to float
-  %1147 = uitofp i32 %1140 to float
-  %1148 = fptoui float %45 to i32
-  %1149 = fptoui float %182 to i32
-  %1150 = fptoui float %1146 to i32
-  %1151 = fptoui float %1147 to i32
-  %1152 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1153 = extractvalue %dx.types.CBufRet.i32 %1152, 0
-  %1154 = extractvalue %dx.types.CBufRet.i32 %1152, 1
-  %1155 = extractvalue %dx.types.CBufRet.i32 %1152, 2
-  %1156 = extractvalue %dx.types.CBufRet.i32 %1152, 3
-  %1157 = mul i32 %1153, %1148
-  %1158 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1149, i32 %1154, i32 %1157)  ; IMad(a,b,c)
-  %1159 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1150, i32 %1155, i32 %1158)  ; IMad(a,b,c)
-  %1160 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1151, i32 %1156, i32 %1159)  ; IMad(a,b,c)
-  %1161 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1160, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1162 = extractvalue %dx.types.ResRet.i32 %1161, 0
-  %1163 = icmp ne i32 %1162, 0
-  %1164 = uitofp i1 %1163 to float
-  br label %1253
-
-; <label>:1165                                    ; preds = %1133
-  %1166 = icmp eq i32 %957, 2
-  br i1 %1166, label %1167, label %1253
-
-; <label>:1167                                    ; preds = %1165
-  %1168 = fsub fast float %22, %20
-  %1169 = fcmp fast olt float %953, %20
-  br i1 %1169, label %1170, label %1183
-
-; <label>:1170                                    ; preds = %1167
-  %1171 = fsub fast float %20, %953
-  %1172 = fdiv fast float %1171, %1168
-  %1173 = fptoui float %1172 to i32
-  %1174 = uitofp i32 %1173 to float
-  %1175 = fmul fast float %1174, %1168
-  %1176 = fsub fast float %1171, %1175
-  %1177 = and i32 %1173, 1
-  %1178 = icmp eq i32 %1177, 0
-  br i1 %1178, label %1179, label %1181
-
-; <label>:1179                                    ; preds = %1170
-  %1180 = fadd fast float %1176, %20
-  br label %1198
-
-; <label>:1181                                    ; preds = %1170
-  %1182 = fsub fast float %22, %1176
-  br label %1198
-
-; <label>:1183                                    ; preds = %1167
-  %1184 = fcmp fast ogt float %953, %22
-  br i1 %1184, label %1185, label %1198
-
-; <label>:1185                                    ; preds = %1183
-  %1186 = fsub fast float %953, %22
-  %1187 = fdiv fast float %1186, %1168
-  %1188 = fptoui float %1187 to i32
-  %1189 = uitofp i32 %1188 to float
-  %1190 = fmul fast float %1189, %1168
-  %1191 = fsub fast float %1186, %1190
-  %1192 = and i32 %1188, 1
-  %1193 = icmp eq i32 %1192, 0
-  br i1 %1193, label %1194, label %1196
-
-; <label>:1194                                    ; preds = %1185
-  %1195 = fsub fast float %22, %1191
-  br label %1198
-
-; <label>:1196                                    ; preds = %1185
-  %1197 = fadd fast float %1191, %20
-  br label %1198
-
-; <label>:1198                                    ; preds = %1196, %1194, %1183, %1181, %1179
-  %1199 = phi float [ %1180, %1179 ], [ %1182, %1181 ], [ %1195, %1194 ], [ %1197, %1196 ], [ %953, %1183 ]
-  %1200 = fptoui float %1199 to i32
-  %1201 = fsub fast float %24, %20
-  %1202 = fcmp fast olt float %956, %20
-  br i1 %1202, label %1203, label %1216
-
-; <label>:1203                                    ; preds = %1198
-  %1204 = fsub fast float %20, %956
-  %1205 = fdiv fast float %1204, %1201
-  %1206 = fptoui float %1205 to i32
-  %1207 = uitofp i32 %1206 to float
-  %1208 = fmul fast float %1207, %1201
-  %1209 = fsub fast float %1204, %1208
-  %1210 = and i32 %1206, 1
-  %1211 = icmp eq i32 %1210, 0
-  br i1 %1211, label %1212, label %1214
-
-; <label>:1212                                    ; preds = %1203
-  %1213 = fadd fast float %1209, %20
-  br label %1231
-
-; <label>:1214                                    ; preds = %1203
-  %1215 = fsub fast float %24, %1209
-  br label %1231
-
-; <label>:1216                                    ; preds = %1198
-  %1217 = fcmp fast ogt float %956, %24
-  br i1 %1217, label %1218, label %1231
-
-; <label>:1218                                    ; preds = %1216
-  %1219 = fsub fast float %956, %24
-  %1220 = fdiv fast float %1219, %1201
-  %1221 = fptoui float %1220 to i32
-  %1222 = uitofp i32 %1221 to float
-  %1223 = fmul fast float %1222, %1201
-  %1224 = fsub fast float %1219, %1223
-  %1225 = and i32 %1221, 1
-  %1226 = icmp eq i32 %1225, 0
-  br i1 %1226, label %1227, label %1229
-
-; <label>:1227                                    ; preds = %1218
-  %1228 = fsub fast float %24, %1224
-  br label %1231
-
-; <label>:1229                                    ; preds = %1218
-  %1230 = fadd fast float %1224, %20
-  br label %1231
-
-; <label>:1231                                    ; preds = %1229, %1227, %1216, %1214, %1212
-  %1232 = phi float [ %1213, %1212 ], [ %1215, %1214 ], [ %1228, %1227 ], [ %1230, %1229 ], [ %956, %1216 ]
-  %1233 = fptoui float %1232 to i32
-  %1234 = uitofp i32 %1233 to float
-  %1235 = uitofp i32 %1200 to float
-  %1236 = fptoui float %45 to i32
-  %1237 = fptoui float %182 to i32
-  %1238 = fptoui float %1234 to i32
-  %1239 = fptoui float %1235 to i32
-  %1240 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1241 = extractvalue %dx.types.CBufRet.i32 %1240, 0
-  %1242 = extractvalue %dx.types.CBufRet.i32 %1240, 1
-  %1243 = extractvalue %dx.types.CBufRet.i32 %1240, 2
-  %1244 = extractvalue %dx.types.CBufRet.i32 %1240, 3
-  %1245 = mul i32 %1241, %1236
-  %1246 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1237, i32 %1242, i32 %1245)  ; IMad(a,b,c)
-  %1247 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1238, i32 %1243, i32 %1246)  ; IMad(a,b,c)
-  %1248 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1239, i32 %1244, i32 %1247)  ; IMad(a,b,c)
-  %1249 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1248, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1250 = extractvalue %dx.types.ResRet.i32 %1249, 0
-  %1251 = icmp ne i32 %1250, 0
-  %1252 = uitofp i1 %1251 to float
-  br label %1253
-
-; <label>:1253                                    ; preds = %1231, %1165, %1135, %1117, %1107
-  %1254 = phi float [ %1132, %1117 ], [ 0.000000e+00, %1107 ], [ %1164, %1135 ], [ %1252, %1231 ], [ 0.000000e+00, %1165 ]
-  %1255 = fadd fast float %953, 1.000000e+00
-  br i1 %958, label %1256, label %1282
-
-; <label>:1256                                    ; preds = %1253
-  %1257 = fcmp fast oge float %1255, 0.000000e+00
-  %1258 = fptoui float %1255 to i32
-  %1259 = icmp ult i32 %1258, %13
-  %1260 = and i1 %1257, %1259
-  %1261 = fcmp fast oge float %956, 0.000000e+00
-  %1262 = and i1 %1261, %1260
-  %1263 = fptoui float %956 to i32
-  %1264 = icmp ult i32 %1263, %15
-  %1265 = and i1 %1264, %1262
-  br i1 %1265, label %1266, label %1402
-
-; <label>:1266                                    ; preds = %1256
-  %1267 = fptoui float %45 to i32
-  %1268 = fptoui float %182 to i32
-  %1269 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1270 = extractvalue %dx.types.CBufRet.i32 %1269, 0
-  %1271 = extractvalue %dx.types.CBufRet.i32 %1269, 1
-  %1272 = extractvalue %dx.types.CBufRet.i32 %1269, 2
-  %1273 = extractvalue %dx.types.CBufRet.i32 %1269, 3
-  %1274 = mul i32 %1270, %1267
-  %1275 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1268, i32 %1271, i32 %1274)  ; IMad(a,b,c)
-  %1276 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1263, i32 %1272, i32 %1275)  ; IMad(a,b,c)
-  %1277 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1258, i32 %1273, i32 %1276)  ; IMad(a,b,c)
-  %1278 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1277, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1279 = extractvalue %dx.types.ResRet.i32 %1278, 0
-  %1280 = icmp ne i32 %1279, 0
-  %1281 = uitofp i1 %1280 to float
-  br label %1402
-
-; <label>:1282                                    ; preds = %1253
-  %1283 = icmp eq i32 %957, 1
-  br i1 %1283, label %1284, label %1314
-
-; <label>:1284                                    ; preds = %1282
-  %1285 = add i32 %13, -1
-  %1286 = uitofp i32 %1285 to float
-  %1287 = call float @dx.op.binary.f32(i32 35, float %1255, float 0.000000e+00)  ; FMax(a,b)
-  %1288 = call float @dx.op.binary.f32(i32 36, float %1287, float %1286)  ; FMin(a,b)
-  %1289 = fptoui float %1288 to i32
-  %1290 = add i32 %15, -1
-  %1291 = uitofp i32 %1290 to float
-  %1292 = call float @dx.op.binary.f32(i32 35, float %956, float 0.000000e+00)  ; FMax(a,b)
-  %1293 = call float @dx.op.binary.f32(i32 36, float %1292, float %1291)  ; FMin(a,b)
-  %1294 = fptoui float %1293 to i32
-  %1295 = uitofp i32 %1294 to float
-  %1296 = uitofp i32 %1289 to float
-  %1297 = fptoui float %45 to i32
-  %1298 = fptoui float %182 to i32
-  %1299 = fptoui float %1295 to i32
-  %1300 = fptoui float %1296 to i32
-  %1301 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1302 = extractvalue %dx.types.CBufRet.i32 %1301, 0
-  %1303 = extractvalue %dx.types.CBufRet.i32 %1301, 1
-  %1304 = extractvalue %dx.types.CBufRet.i32 %1301, 2
-  %1305 = extractvalue %dx.types.CBufRet.i32 %1301, 3
-  %1306 = mul i32 %1302, %1297
-  %1307 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1298, i32 %1303, i32 %1306)  ; IMad(a,b,c)
-  %1308 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1299, i32 %1304, i32 %1307)  ; IMad(a,b,c)
-  %1309 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1300, i32 %1305, i32 %1308)  ; IMad(a,b,c)
-  %1310 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1309, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1311 = extractvalue %dx.types.ResRet.i32 %1310, 0
-  %1312 = icmp ne i32 %1311, 0
-  %1313 = uitofp i1 %1312 to float
-  br label %1402
-
-; <label>:1314                                    ; preds = %1282
-  %1315 = icmp eq i32 %957, 2
-  br i1 %1315, label %1316, label %1402
-
-; <label>:1316                                    ; preds = %1314
-  %1317 = fsub fast float %22, %20
-  %1318 = fcmp fast olt float %1255, %20
-  br i1 %1318, label %1319, label %1332
-
-; <label>:1319                                    ; preds = %1316
-  %1320 = fsub fast float %20, %1255
-  %1321 = fdiv fast float %1320, %1317
-  %1322 = fptoui float %1321 to i32
-  %1323 = uitofp i32 %1322 to float
-  %1324 = fmul fast float %1323, %1317
-  %1325 = fsub fast float %1320, %1324
-  %1326 = and i32 %1322, 1
-  %1327 = icmp eq i32 %1326, 0
-  br i1 %1327, label %1328, label %1330
-
-; <label>:1328                                    ; preds = %1319
-  %1329 = fadd fast float %1325, %20
-  br label %1347
-
-; <label>:1330                                    ; preds = %1319
-  %1331 = fsub fast float %22, %1325
-  br label %1347
-
-; <label>:1332                                    ; preds = %1316
-  %1333 = fcmp fast ogt float %1255, %22
-  br i1 %1333, label %1334, label %1347
-
-; <label>:1334                                    ; preds = %1332
-  %1335 = fsub fast float %1255, %22
-  %1336 = fdiv fast float %1335, %1317
-  %1337 = fptoui float %1336 to i32
-  %1338 = uitofp i32 %1337 to float
-  %1339 = fmul fast float %1338, %1317
-  %1340 = fsub fast float %1335, %1339
-  %1341 = and i32 %1337, 1
-  %1342 = icmp eq i32 %1341, 0
-  br i1 %1342, label %1343, label %1345
-
-; <label>:1343                                    ; preds = %1334
-  %1344 = fsub fast float %22, %1340
-  br label %1347
-
-; <label>:1345                                    ; preds = %1334
-  %1346 = fadd fast float %1340, %20
-  br label %1347
-
-; <label>:1347                                    ; preds = %1345, %1343, %1332, %1330, %1328
-  %1348 = phi float [ %1329, %1328 ], [ %1331, %1330 ], [ %1344, %1343 ], [ %1346, %1345 ], [ %1255, %1332 ]
-  %1349 = fptoui float %1348 to i32
-  %1350 = fsub fast float %24, %20
-  %1351 = fcmp fast olt float %956, %20
-  br i1 %1351, label %1352, label %1365
-
-; <label>:1352                                    ; preds = %1347
-  %1353 = fsub fast float %20, %956
-  %1354 = fdiv fast float %1353, %1350
-  %1355 = fptoui float %1354 to i32
-  %1356 = uitofp i32 %1355 to float
-  %1357 = fmul fast float %1356, %1350
-  %1358 = fsub fast float %1353, %1357
-  %1359 = and i32 %1355, 1
-  %1360 = icmp eq i32 %1359, 0
-  br i1 %1360, label %1361, label %1363
-
-; <label>:1361                                    ; preds = %1352
-  %1362 = fadd fast float %1358, %20
-  br label %1380
-
-; <label>:1363                                    ; preds = %1352
-  %1364 = fsub fast float %24, %1358
-  br label %1380
-
-; <label>:1365                                    ; preds = %1347
-  %1366 = fcmp fast ogt float %956, %24
-  br i1 %1366, label %1367, label %1380
-
-; <label>:1367                                    ; preds = %1365
-  %1368 = fsub fast float %956, %24
-  %1369 = fdiv fast float %1368, %1350
-  %1370 = fptoui float %1369 to i32
-  %1371 = uitofp i32 %1370 to float
-  %1372 = fmul fast float %1371, %1350
-  %1373 = fsub fast float %1368, %1372
-  %1374 = and i32 %1370, 1
-  %1375 = icmp eq i32 %1374, 0
-  br i1 %1375, label %1376, label %1378
-
-; <label>:1376                                    ; preds = %1367
-  %1377 = fsub fast float %24, %1373
-  br label %1380
-
-; <label>:1378                                    ; preds = %1367
-  %1379 = fadd fast float %1373, %20
-  br label %1380
-
-; <label>:1380                                    ; preds = %1378, %1376, %1365, %1363, %1361
-  %1381 = phi float [ %1362, %1361 ], [ %1364, %1363 ], [ %1377, %1376 ], [ %1379, %1378 ], [ %956, %1365 ]
-  %1382 = fptoui float %1381 to i32
-  %1383 = uitofp i32 %1382 to float
-  %1384 = uitofp i32 %1349 to float
-  %1385 = fptoui float %45 to i32
-  %1386 = fptoui float %182 to i32
-  %1387 = fptoui float %1383 to i32
-  %1388 = fptoui float %1384 to i32
-  %1389 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1390 = extractvalue %dx.types.CBufRet.i32 %1389, 0
-  %1391 = extractvalue %dx.types.CBufRet.i32 %1389, 1
-  %1392 = extractvalue %dx.types.CBufRet.i32 %1389, 2
-  %1393 = extractvalue %dx.types.CBufRet.i32 %1389, 3
-  %1394 = mul i32 %1390, %1385
-  %1395 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1386, i32 %1391, i32 %1394)  ; IMad(a,b,c)
-  %1396 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1387, i32 %1392, i32 %1395)  ; IMad(a,b,c)
-  %1397 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1388, i32 %1393, i32 %1396)  ; IMad(a,b,c)
-  %1398 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1397, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1399 = extractvalue %dx.types.ResRet.i32 %1398, 0
-  %1400 = icmp ne i32 %1399, 0
-  %1401 = uitofp i1 %1400 to float
-  br label %1402
-
-; <label>:1402                                    ; preds = %1380, %1314, %1284, %1266, %1256
-  %1403 = phi float [ %1281, %1266 ], [ 0.000000e+00, %1256 ], [ %1313, %1284 ], [ %1401, %1380 ], [ 0.000000e+00, %1314 ]
-  %1404 = fadd fast float %953, 2.000000e+00
-  br i1 %958, label %1405, label %1431
-
-; <label>:1405                                    ; preds = %1402
-  %1406 = fcmp fast oge float %1404, 0.000000e+00
-  %1407 = fptoui float %1404 to i32
-  %1408 = icmp ult i32 %1407, %13
-  %1409 = and i1 %1406, %1408
-  %1410 = fcmp fast oge float %956, 0.000000e+00
-  %1411 = and i1 %1410, %1409
-  %1412 = fptoui float %956 to i32
-  %1413 = icmp ult i32 %1412, %15
-  %1414 = and i1 %1413, %1411
-  br i1 %1414, label %1415, label %1551
-
-; <label>:1415                                    ; preds = %1405
-  %1416 = fptoui float %45 to i32
-  %1417 = fptoui float %182 to i32
-  %1418 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1419 = extractvalue %dx.types.CBufRet.i32 %1418, 0
-  %1420 = extractvalue %dx.types.CBufRet.i32 %1418, 1
-  %1421 = extractvalue %dx.types.CBufRet.i32 %1418, 2
-  %1422 = extractvalue %dx.types.CBufRet.i32 %1418, 3
-  %1423 = mul i32 %1419, %1416
-  %1424 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1417, i32 %1420, i32 %1423)  ; IMad(a,b,c)
-  %1425 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1412, i32 %1421, i32 %1424)  ; IMad(a,b,c)
-  %1426 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1407, i32 %1422, i32 %1425)  ; IMad(a,b,c)
-  %1427 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1426, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1428 = extractvalue %dx.types.ResRet.i32 %1427, 0
-  %1429 = icmp ne i32 %1428, 0
-  %1430 = uitofp i1 %1429 to float
-  br label %1551
-
-; <label>:1431                                    ; preds = %1402
-  %1432 = icmp eq i32 %957, 1
-  br i1 %1432, label %1433, label %1463
-
-; <label>:1433                                    ; preds = %1431
-  %1434 = add i32 %13, -1
-  %1435 = uitofp i32 %1434 to float
-  %1436 = call float @dx.op.binary.f32(i32 35, float %1404, float 0.000000e+00)  ; FMax(a,b)
-  %1437 = call float @dx.op.binary.f32(i32 36, float %1436, float %1435)  ; FMin(a,b)
-  %1438 = fptoui float %1437 to i32
-  %1439 = add i32 %15, -1
-  %1440 = uitofp i32 %1439 to float
-  %1441 = call float @dx.op.binary.f32(i32 35, float %956, float 0.000000e+00)  ; FMax(a,b)
-  %1442 = call float @dx.op.binary.f32(i32 36, float %1441, float %1440)  ; FMin(a,b)
-  %1443 = fptoui float %1442 to i32
-  %1444 = uitofp i32 %1443 to float
-  %1445 = uitofp i32 %1438 to float
-  %1446 = fptoui float %45 to i32
-  %1447 = fptoui float %182 to i32
-  %1448 = fptoui float %1444 to i32
-  %1449 = fptoui float %1445 to i32
-  %1450 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1451 = extractvalue %dx.types.CBufRet.i32 %1450, 0
-  %1452 = extractvalue %dx.types.CBufRet.i32 %1450, 1
-  %1453 = extractvalue %dx.types.CBufRet.i32 %1450, 2
-  %1454 = extractvalue %dx.types.CBufRet.i32 %1450, 3
-  %1455 = mul i32 %1451, %1446
-  %1456 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1447, i32 %1452, i32 %1455)  ; IMad(a,b,c)
-  %1457 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1448, i32 %1453, i32 %1456)  ; IMad(a,b,c)
-  %1458 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1449, i32 %1454, i32 %1457)  ; IMad(a,b,c)
-  %1459 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1458, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1460 = extractvalue %dx.types.ResRet.i32 %1459, 0
-  %1461 = icmp ne i32 %1460, 0
-  %1462 = uitofp i1 %1461 to float
-  br label %1551
-
-; <label>:1463                                    ; preds = %1431
-  %1464 = icmp eq i32 %957, 2
-  br i1 %1464, label %1465, label %1551
-
-; <label>:1465                                    ; preds = %1463
-  %1466 = fsub fast float %22, %20
-  %1467 = fcmp fast olt float %1404, %20
-  br i1 %1467, label %1468, label %1481
-
-; <label>:1468                                    ; preds = %1465
-  %1469 = fsub fast float %20, %1404
-  %1470 = fdiv fast float %1469, %1466
-  %1471 = fptoui float %1470 to i32
-  %1472 = uitofp i32 %1471 to float
-  %1473 = fmul fast float %1472, %1466
-  %1474 = fsub fast float %1469, %1473
-  %1475 = and i32 %1471, 1
-  %1476 = icmp eq i32 %1475, 0
-  br i1 %1476, label %1477, label %1479
-
-; <label>:1477                                    ; preds = %1468
-  %1478 = fadd fast float %1474, %20
-  br label %1496
-
-; <label>:1479                                    ; preds = %1468
-  %1480 = fsub fast float %22, %1474
-  br label %1496
-
-; <label>:1481                                    ; preds = %1465
-  %1482 = fcmp fast ogt float %1404, %22
-  br i1 %1482, label %1483, label %1496
-
-; <label>:1483                                    ; preds = %1481
-  %1484 = fsub fast float %1404, %22
-  %1485 = fdiv fast float %1484, %1466
-  %1486 = fptoui float %1485 to i32
-  %1487 = uitofp i32 %1486 to float
-  %1488 = fmul fast float %1487, %1466
-  %1489 = fsub fast float %1484, %1488
-  %1490 = and i32 %1486, 1
-  %1491 = icmp eq i32 %1490, 0
-  br i1 %1491, label %1492, label %1494
-
-; <label>:1492                                    ; preds = %1483
-  %1493 = fsub fast float %22, %1489
-  br label %1496
-
-; <label>:1494                                    ; preds = %1483
-  %1495 = fadd fast float %1489, %20
-  br label %1496
-
-; <label>:1496                                    ; preds = %1494, %1492, %1481, %1479, %1477
-  %1497 = phi float [ %1478, %1477 ], [ %1480, %1479 ], [ %1493, %1492 ], [ %1495, %1494 ], [ %1404, %1481 ]
-  %1498 = fptoui float %1497 to i32
-  %1499 = fsub fast float %24, %20
-  %1500 = fcmp fast olt float %956, %20
-  br i1 %1500, label %1501, label %1514
-
-; <label>:1501                                    ; preds = %1496
-  %1502 = fsub fast float %20, %956
-  %1503 = fdiv fast float %1502, %1499
-  %1504 = fptoui float %1503 to i32
-  %1505 = uitofp i32 %1504 to float
-  %1506 = fmul fast float %1505, %1499
-  %1507 = fsub fast float %1502, %1506
-  %1508 = and i32 %1504, 1
-  %1509 = icmp eq i32 %1508, 0
-  br i1 %1509, label %1510, label %1512
-
-; <label>:1510                                    ; preds = %1501
-  %1511 = fadd fast float %1507, %20
-  br label %1529
-
-; <label>:1512                                    ; preds = %1501
-  %1513 = fsub fast float %24, %1507
-  br label %1529
-
-; <label>:1514                                    ; preds = %1496
-  %1515 = fcmp fast ogt float %956, %24
-  br i1 %1515, label %1516, label %1529
-
-; <label>:1516                                    ; preds = %1514
-  %1517 = fsub fast float %956, %24
-  %1518 = fdiv fast float %1517, %1499
-  %1519 = fptoui float %1518 to i32
-  %1520 = uitofp i32 %1519 to float
-  %1521 = fmul fast float %1520, %1499
-  %1522 = fsub fast float %1517, %1521
-  %1523 = and i32 %1519, 1
-  %1524 = icmp eq i32 %1523, 0
-  br i1 %1524, label %1525, label %1527
-
-; <label>:1525                                    ; preds = %1516
-  %1526 = fsub fast float %24, %1522
-  br label %1529
-
-; <label>:1527                                    ; preds = %1516
-  %1528 = fadd fast float %1522, %20
-  br label %1529
-
-; <label>:1529                                    ; preds = %1527, %1525, %1514, %1512, %1510
-  %1530 = phi float [ %1511, %1510 ], [ %1513, %1512 ], [ %1526, %1525 ], [ %1528, %1527 ], [ %956, %1514 ]
-  %1531 = fptoui float %1530 to i32
-  %1532 = uitofp i32 %1531 to float
-  %1533 = uitofp i32 %1498 to float
-  %1534 = fptoui float %45 to i32
-  %1535 = fptoui float %182 to i32
-  %1536 = fptoui float %1532 to i32
-  %1537 = fptoui float %1533 to i32
-  %1538 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1539 = extractvalue %dx.types.CBufRet.i32 %1538, 0
-  %1540 = extractvalue %dx.types.CBufRet.i32 %1538, 1
-  %1541 = extractvalue %dx.types.CBufRet.i32 %1538, 2
-  %1542 = extractvalue %dx.types.CBufRet.i32 %1538, 3
-  %1543 = mul i32 %1539, %1534
-  %1544 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1535, i32 %1540, i32 %1543)  ; IMad(a,b,c)
-  %1545 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1536, i32 %1541, i32 %1544)  ; IMad(a,b,c)
-  %1546 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1537, i32 %1542, i32 %1545)  ; IMad(a,b,c)
-  %1547 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1546, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1548 = extractvalue %dx.types.ResRet.i32 %1547, 0
-  %1549 = icmp ne i32 %1548, 0
-  %1550 = uitofp i1 %1549 to float
-  br label %1551
-
-; <label>:1551                                    ; preds = %1529, %1463, %1433, %1415, %1405
-  %1552 = phi float [ %1430, %1415 ], [ 0.000000e+00, %1405 ], [ %1462, %1433 ], [ %1550, %1529 ], [ 0.000000e+00, %1463 ]
-  br i1 %958, label %1553, label %1579
-
-; <label>:1553                                    ; preds = %1551
-  %1554 = fcmp fast oge float %954, 0.000000e+00
-  %1555 = fptoui float %954 to i32
-  %1556 = icmp ult i32 %1555, %13
-  %1557 = and i1 %1554, %1556
-  %1558 = fcmp fast oge float %955, 0.000000e+00
-  %1559 = and i1 %1558, %1557
-  %1560 = fptoui float %955 to i32
-  %1561 = icmp ult i32 %1560, %15
-  %1562 = and i1 %1561, %1559
-  br i1 %1562, label %1563, label %1699
-
-; <label>:1563                                    ; preds = %1553
-  %1564 = fptoui float %45 to i32
-  %1565 = fptoui float %182 to i32
-  %1566 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1567 = extractvalue %dx.types.CBufRet.i32 %1566, 0
-  %1568 = extractvalue %dx.types.CBufRet.i32 %1566, 1
-  %1569 = extractvalue %dx.types.CBufRet.i32 %1566, 2
-  %1570 = extractvalue %dx.types.CBufRet.i32 %1566, 3
-  %1571 = mul i32 %1567, %1564
-  %1572 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1565, i32 %1568, i32 %1571)  ; IMad(a,b,c)
-  %1573 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1560, i32 %1569, i32 %1572)  ; IMad(a,b,c)
-  %1574 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1555, i32 %1570, i32 %1573)  ; IMad(a,b,c)
-  %1575 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1574, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1576 = extractvalue %dx.types.ResRet.i32 %1575, 0
-  %1577 = icmp ne i32 %1576, 0
-  %1578 = uitofp i1 %1577 to float
-  br label %1699
-
-; <label>:1579                                    ; preds = %1551
-  %1580 = icmp eq i32 %957, 1
-  br i1 %1580, label %1581, label %1611
-
-; <label>:1581                                    ; preds = %1579
-  %1582 = add i32 %13, -1
-  %1583 = uitofp i32 %1582 to float
-  %1584 = call float @dx.op.binary.f32(i32 35, float %954, float 0.000000e+00)  ; FMax(a,b)
-  %1585 = call float @dx.op.binary.f32(i32 36, float %1584, float %1583)  ; FMin(a,b)
-  %1586 = fptoui float %1585 to i32
-  %1587 = add i32 %15, -1
-  %1588 = uitofp i32 %1587 to float
-  %1589 = call float @dx.op.binary.f32(i32 35, float %955, float 0.000000e+00)  ; FMax(a,b)
-  %1590 = call float @dx.op.binary.f32(i32 36, float %1589, float %1588)  ; FMin(a,b)
-  %1591 = fptoui float %1590 to i32
-  %1592 = uitofp i32 %1591 to float
-  %1593 = uitofp i32 %1586 to float
-  %1594 = fptoui float %45 to i32
-  %1595 = fptoui float %182 to i32
-  %1596 = fptoui float %1592 to i32
-  %1597 = fptoui float %1593 to i32
-  %1598 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1599 = extractvalue %dx.types.CBufRet.i32 %1598, 0
-  %1600 = extractvalue %dx.types.CBufRet.i32 %1598, 1
-  %1601 = extractvalue %dx.types.CBufRet.i32 %1598, 2
-  %1602 = extractvalue %dx.types.CBufRet.i32 %1598, 3
-  %1603 = mul i32 %1599, %1594
-  %1604 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1595, i32 %1600, i32 %1603)  ; IMad(a,b,c)
-  %1605 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1596, i32 %1601, i32 %1604)  ; IMad(a,b,c)
-  %1606 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1597, i32 %1602, i32 %1605)  ; IMad(a,b,c)
-  %1607 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1606, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1608 = extractvalue %dx.types.ResRet.i32 %1607, 0
-  %1609 = icmp ne i32 %1608, 0
-  %1610 = uitofp i1 %1609 to float
-  br label %1699
-
-; <label>:1611                                    ; preds = %1579
-  %1612 = icmp eq i32 %957, 2
-  br i1 %1612, label %1613, label %1699
-
-; <label>:1613                                    ; preds = %1611
-  %1614 = fsub fast float %22, %20
-  %1615 = fcmp fast olt float %954, %20
-  br i1 %1615, label %1616, label %1629
-
-; <label>:1616                                    ; preds = %1613
-  %1617 = fsub fast float %20, %954
-  %1618 = fdiv fast float %1617, %1614
-  %1619 = fptoui float %1618 to i32
-  %1620 = uitofp i32 %1619 to float
-  %1621 = fmul fast float %1620, %1614
-  %1622 = fsub fast float %1617, %1621
-  %1623 = and i32 %1619, 1
-  %1624 = icmp eq i32 %1623, 0
-  br i1 %1624, label %1625, label %1627
-
-; <label>:1625                                    ; preds = %1616
-  %1626 = fadd fast float %1622, %20
-  br label %1644
-
-; <label>:1627                                    ; preds = %1616
-  %1628 = fsub fast float %22, %1622
-  br label %1644
-
-; <label>:1629                                    ; preds = %1613
-  %1630 = fcmp fast ogt float %954, %22
-  br i1 %1630, label %1631, label %1644
-
-; <label>:1631                                    ; preds = %1629
-  %1632 = fsub fast float %954, %22
-  %1633 = fdiv fast float %1632, %1614
-  %1634 = fptoui float %1633 to i32
-  %1635 = uitofp i32 %1634 to float
-  %1636 = fmul fast float %1635, %1614
-  %1637 = fsub fast float %1632, %1636
-  %1638 = and i32 %1634, 1
-  %1639 = icmp eq i32 %1638, 0
-  br i1 %1639, label %1640, label %1642
-
-; <label>:1640                                    ; preds = %1631
-  %1641 = fsub fast float %22, %1637
-  br label %1644
-
-; <label>:1642                                    ; preds = %1631
-  %1643 = fadd fast float %1637, %20
-  br label %1644
-
-; <label>:1644                                    ; preds = %1642, %1640, %1629, %1627, %1625
-  %1645 = phi float [ %1626, %1625 ], [ %1628, %1627 ], [ %1641, %1640 ], [ %1643, %1642 ], [ %954, %1629 ]
-  %1646 = fptoui float %1645 to i32
-  %1647 = fsub fast float %24, %20
-  %1648 = fcmp fast olt float %955, %20
-  br i1 %1648, label %1649, label %1662
-
-; <label>:1649                                    ; preds = %1644
-  %1650 = fsub fast float %20, %955
-  %1651 = fdiv fast float %1650, %1647
-  %1652 = fptoui float %1651 to i32
-  %1653 = uitofp i32 %1652 to float
-  %1654 = fmul fast float %1653, %1647
-  %1655 = fsub fast float %1650, %1654
-  %1656 = and i32 %1652, 1
-  %1657 = icmp eq i32 %1656, 0
-  br i1 %1657, label %1658, label %1660
-
-; <label>:1658                                    ; preds = %1649
-  %1659 = fadd fast float %1655, %20
-  br label %1677
-
-; <label>:1660                                    ; preds = %1649
-  %1661 = fsub fast float %24, %1655
-  br label %1677
-
-; <label>:1662                                    ; preds = %1644
-  %1663 = fcmp fast ogt float %955, %24
-  br i1 %1663, label %1664, label %1677
-
-; <label>:1664                                    ; preds = %1662
-  %1665 = fsub fast float %955, %24
-  %1666 = fdiv fast float %1665, %1647
-  %1667 = fptoui float %1666 to i32
-  %1668 = uitofp i32 %1667 to float
-  %1669 = fmul fast float %1668, %1647
-  %1670 = fsub fast float %1665, %1669
-  %1671 = and i32 %1667, 1
-  %1672 = icmp eq i32 %1671, 0
-  br i1 %1672, label %1673, label %1675
-
-; <label>:1673                                    ; preds = %1664
-  %1674 = fsub fast float %24, %1670
-  br label %1677
-
-; <label>:1675                                    ; preds = %1664
-  %1676 = fadd fast float %1670, %20
-  br label %1677
-
-; <label>:1677                                    ; preds = %1675, %1673, %1662, %1660, %1658
-  %1678 = phi float [ %1659, %1658 ], [ %1661, %1660 ], [ %1674, %1673 ], [ %1676, %1675 ], [ %955, %1662 ]
-  %1679 = fptoui float %1678 to i32
-  %1680 = uitofp i32 %1679 to float
-  %1681 = uitofp i32 %1646 to float
-  %1682 = fptoui float %45 to i32
-  %1683 = fptoui float %182 to i32
-  %1684 = fptoui float %1680 to i32
-  %1685 = fptoui float %1681 to i32
-  %1686 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1687 = extractvalue %dx.types.CBufRet.i32 %1686, 0
-  %1688 = extractvalue %dx.types.CBufRet.i32 %1686, 1
-  %1689 = extractvalue %dx.types.CBufRet.i32 %1686, 2
-  %1690 = extractvalue %dx.types.CBufRet.i32 %1686, 3
-  %1691 = mul i32 %1687, %1682
-  %1692 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1683, i32 %1688, i32 %1691)  ; IMad(a,b,c)
-  %1693 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1684, i32 %1689, i32 %1692)  ; IMad(a,b,c)
-  %1694 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1685, i32 %1690, i32 %1693)  ; IMad(a,b,c)
-  %1695 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1694, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1696 = extractvalue %dx.types.ResRet.i32 %1695, 0
-  %1697 = icmp ne i32 %1696, 0
-  %1698 = uitofp i1 %1697 to float
-  br label %1699
-
-; <label>:1699                                    ; preds = %1677, %1611, %1581, %1563, %1553
-  %1700 = phi float [ %1578, %1563 ], [ 0.000000e+00, %1553 ], [ %1610, %1581 ], [ %1698, %1677 ], [ 0.000000e+00, %1611 ]
-  br i1 %958, label %1701, label %1727
-
-; <label>:1701                                    ; preds = %1699
-  %1702 = fcmp fast oge float %953, 0.000000e+00
-  %1703 = fptoui float %953 to i32
-  %1704 = icmp ult i32 %1703, %13
-  %1705 = and i1 %1702, %1704
-  %1706 = fcmp fast oge float %955, 0.000000e+00
-  %1707 = and i1 %1706, %1705
-  %1708 = fptoui float %955 to i32
-  %1709 = icmp ult i32 %1708, %15
-  %1710 = and i1 %1709, %1707
-  br i1 %1710, label %1711, label %1847
-
-; <label>:1711                                    ; preds = %1701
-  %1712 = fptoui float %45 to i32
-  %1713 = fptoui float %182 to i32
-  %1714 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1715 = extractvalue %dx.types.CBufRet.i32 %1714, 0
-  %1716 = extractvalue %dx.types.CBufRet.i32 %1714, 1
-  %1717 = extractvalue %dx.types.CBufRet.i32 %1714, 2
-  %1718 = extractvalue %dx.types.CBufRet.i32 %1714, 3
-  %1719 = mul i32 %1715, %1712
-  %1720 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1713, i32 %1716, i32 %1719)  ; IMad(a,b,c)
-  %1721 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1708, i32 %1717, i32 %1720)  ; IMad(a,b,c)
-  %1722 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1703, i32 %1718, i32 %1721)  ; IMad(a,b,c)
-  %1723 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1722, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1724 = extractvalue %dx.types.ResRet.i32 %1723, 0
-  %1725 = icmp ne i32 %1724, 0
-  %1726 = uitofp i1 %1725 to float
-  br label %1847
-
-; <label>:1727                                    ; preds = %1699
-  %1728 = icmp eq i32 %957, 1
-  br i1 %1728, label %1729, label %1759
-
-; <label>:1729                                    ; preds = %1727
-  %1730 = add i32 %13, -1
-  %1731 = uitofp i32 %1730 to float
-  %1732 = call float @dx.op.binary.f32(i32 35, float %953, float 0.000000e+00)  ; FMax(a,b)
-  %1733 = call float @dx.op.binary.f32(i32 36, float %1732, float %1731)  ; FMin(a,b)
-  %1734 = fptoui float %1733 to i32
-  %1735 = add i32 %15, -1
-  %1736 = uitofp i32 %1735 to float
-  %1737 = call float @dx.op.binary.f32(i32 35, float %955, float 0.000000e+00)  ; FMax(a,b)
-  %1738 = call float @dx.op.binary.f32(i32 36, float %1737, float %1736)  ; FMin(a,b)
-  %1739 = fptoui float %1738 to i32
-  %1740 = uitofp i32 %1739 to float
-  %1741 = uitofp i32 %1734 to float
-  %1742 = fptoui float %45 to i32
-  %1743 = fptoui float %182 to i32
-  %1744 = fptoui float %1740 to i32
-  %1745 = fptoui float %1741 to i32
-  %1746 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1747 = extractvalue %dx.types.CBufRet.i32 %1746, 0
-  %1748 = extractvalue %dx.types.CBufRet.i32 %1746, 1
-  %1749 = extractvalue %dx.types.CBufRet.i32 %1746, 2
-  %1750 = extractvalue %dx.types.CBufRet.i32 %1746, 3
-  %1751 = mul i32 %1747, %1742
-  %1752 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1743, i32 %1748, i32 %1751)  ; IMad(a,b,c)
-  %1753 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1744, i32 %1749, i32 %1752)  ; IMad(a,b,c)
-  %1754 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1745, i32 %1750, i32 %1753)  ; IMad(a,b,c)
-  %1755 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1754, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1756 = extractvalue %dx.types.ResRet.i32 %1755, 0
-  %1757 = icmp ne i32 %1756, 0
-  %1758 = uitofp i1 %1757 to float
-  br label %1847
-
-; <label>:1759                                    ; preds = %1727
-  %1760 = icmp eq i32 %957, 2
-  br i1 %1760, label %1761, label %1847
-
-; <label>:1761                                    ; preds = %1759
-  %1762 = fsub fast float %22, %20
-  %1763 = fcmp fast olt float %953, %20
-  br i1 %1763, label %1764, label %1777
-
-; <label>:1764                                    ; preds = %1761
-  %1765 = fsub fast float %20, %953
-  %1766 = fdiv fast float %1765, %1762
-  %1767 = fptoui float %1766 to i32
-  %1768 = uitofp i32 %1767 to float
-  %1769 = fmul fast float %1768, %1762
-  %1770 = fsub fast float %1765, %1769
-  %1771 = and i32 %1767, 1
-  %1772 = icmp eq i32 %1771, 0
-  br i1 %1772, label %1773, label %1775
-
-; <label>:1773                                    ; preds = %1764
-  %1774 = fadd fast float %1770, %20
-  br label %1792
-
-; <label>:1775                                    ; preds = %1764
-  %1776 = fsub fast float %22, %1770
-  br label %1792
-
-; <label>:1777                                    ; preds = %1761
-  %1778 = fcmp fast ogt float %953, %22
-  br i1 %1778, label %1779, label %1792
-
-; <label>:1779                                    ; preds = %1777
-  %1780 = fsub fast float %953, %22
-  %1781 = fdiv fast float %1780, %1762
-  %1782 = fptoui float %1781 to i32
-  %1783 = uitofp i32 %1782 to float
-  %1784 = fmul fast float %1783, %1762
-  %1785 = fsub fast float %1780, %1784
-  %1786 = and i32 %1782, 1
-  %1787 = icmp eq i32 %1786, 0
-  br i1 %1787, label %1788, label %1790
-
-; <label>:1788                                    ; preds = %1779
-  %1789 = fsub fast float %22, %1785
-  br label %1792
-
-; <label>:1790                                    ; preds = %1779
-  %1791 = fadd fast float %1785, %20
-  br label %1792
-
-; <label>:1792                                    ; preds = %1790, %1788, %1777, %1775, %1773
-  %1793 = phi float [ %1774, %1773 ], [ %1776, %1775 ], [ %1789, %1788 ], [ %1791, %1790 ], [ %953, %1777 ]
-  %1794 = fptoui float %1793 to i32
-  %1795 = fsub fast float %24, %20
-  %1796 = fcmp fast olt float %955, %20
-  br i1 %1796, label %1797, label %1810
-
-; <label>:1797                                    ; preds = %1792
-  %1798 = fsub fast float %20, %955
-  %1799 = fdiv fast float %1798, %1795
-  %1800 = fptoui float %1799 to i32
-  %1801 = uitofp i32 %1800 to float
-  %1802 = fmul fast float %1801, %1795
-  %1803 = fsub fast float %1798, %1802
-  %1804 = and i32 %1800, 1
-  %1805 = icmp eq i32 %1804, 0
-  br i1 %1805, label %1806, label %1808
-
-; <label>:1806                                    ; preds = %1797
-  %1807 = fadd fast float %1803, %20
-  br label %1825
-
-; <label>:1808                                    ; preds = %1797
-  %1809 = fsub fast float %24, %1803
-  br label %1825
-
-; <label>:1810                                    ; preds = %1792
-  %1811 = fcmp fast ogt float %955, %24
-  br i1 %1811, label %1812, label %1825
-
-; <label>:1812                                    ; preds = %1810
-  %1813 = fsub fast float %955, %24
-  %1814 = fdiv fast float %1813, %1795
-  %1815 = fptoui float %1814 to i32
-  %1816 = uitofp i32 %1815 to float
-  %1817 = fmul fast float %1816, %1795
-  %1818 = fsub fast float %1813, %1817
-  %1819 = and i32 %1815, 1
-  %1820 = icmp eq i32 %1819, 0
-  br i1 %1820, label %1821, label %1823
-
-; <label>:1821                                    ; preds = %1812
-  %1822 = fsub fast float %24, %1818
-  br label %1825
-
-; <label>:1823                                    ; preds = %1812
-  %1824 = fadd fast float %1818, %20
-  br label %1825
-
-; <label>:1825                                    ; preds = %1823, %1821, %1810, %1808, %1806
-  %1826 = phi float [ %1807, %1806 ], [ %1809, %1808 ], [ %1822, %1821 ], [ %1824, %1823 ], [ %955, %1810 ]
-  %1827 = fptoui float %1826 to i32
-  %1828 = uitofp i32 %1827 to float
-  %1829 = uitofp i32 %1794 to float
-  %1830 = fptoui float %45 to i32
-  %1831 = fptoui float %182 to i32
-  %1832 = fptoui float %1828 to i32
-  %1833 = fptoui float %1829 to i32
-  %1834 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1835 = extractvalue %dx.types.CBufRet.i32 %1834, 0
-  %1836 = extractvalue %dx.types.CBufRet.i32 %1834, 1
-  %1837 = extractvalue %dx.types.CBufRet.i32 %1834, 2
-  %1838 = extractvalue %dx.types.CBufRet.i32 %1834, 3
-  %1839 = mul i32 %1835, %1830
-  %1840 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1831, i32 %1836, i32 %1839)  ; IMad(a,b,c)
-  %1841 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1832, i32 %1837, i32 %1840)  ; IMad(a,b,c)
-  %1842 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1833, i32 %1838, i32 %1841)  ; IMad(a,b,c)
-  %1843 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1842, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1844 = extractvalue %dx.types.ResRet.i32 %1843, 0
-  %1845 = icmp ne i32 %1844, 0
-  %1846 = uitofp i1 %1845 to float
-  br label %1847
-
-; <label>:1847                                    ; preds = %1825, %1759, %1729, %1711, %1701
-  %1848 = phi float [ %1726, %1711 ], [ 0.000000e+00, %1701 ], [ %1758, %1729 ], [ %1846, %1825 ], [ 0.000000e+00, %1759 ]
-  br i1 %958, label %1849, label %1875
-
-; <label>:1849                                    ; preds = %1847
-  %1850 = fcmp fast oge float %1255, 0.000000e+00
-  %1851 = fptoui float %1255 to i32
-  %1852 = icmp ult i32 %1851, %13
-  %1853 = and i1 %1850, %1852
-  %1854 = fcmp fast oge float %955, 0.000000e+00
-  %1855 = and i1 %1854, %1853
-  %1856 = fptoui float %955 to i32
-  %1857 = icmp ult i32 %1856, %15
-  %1858 = and i1 %1857, %1855
-  br i1 %1858, label %1859, label %1995
-
-; <label>:1859                                    ; preds = %1849
-  %1860 = fptoui float %45 to i32
-  %1861 = fptoui float %182 to i32
-  %1862 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1863 = extractvalue %dx.types.CBufRet.i32 %1862, 0
-  %1864 = extractvalue %dx.types.CBufRet.i32 %1862, 1
-  %1865 = extractvalue %dx.types.CBufRet.i32 %1862, 2
-  %1866 = extractvalue %dx.types.CBufRet.i32 %1862, 3
-  %1867 = mul i32 %1863, %1860
-  %1868 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1861, i32 %1864, i32 %1867)  ; IMad(a,b,c)
-  %1869 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1856, i32 %1865, i32 %1868)  ; IMad(a,b,c)
-  %1870 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1851, i32 %1866, i32 %1869)  ; IMad(a,b,c)
-  %1871 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1870, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1872 = extractvalue %dx.types.ResRet.i32 %1871, 0
-  %1873 = icmp ne i32 %1872, 0
-  %1874 = uitofp i1 %1873 to float
-  br label %1995
-
-; <label>:1875                                    ; preds = %1847
-  %1876 = icmp eq i32 %957, 1
-  br i1 %1876, label %1877, label %1907
-
-; <label>:1877                                    ; preds = %1875
-  %1878 = add i32 %13, -1
-  %1879 = uitofp i32 %1878 to float
-  %1880 = call float @dx.op.binary.f32(i32 35, float %1255, float 0.000000e+00)  ; FMax(a,b)
-  %1881 = call float @dx.op.binary.f32(i32 36, float %1880, float %1879)  ; FMin(a,b)
-  %1882 = fptoui float %1881 to i32
-  %1883 = add i32 %15, -1
-  %1884 = uitofp i32 %1883 to float
-  %1885 = call float @dx.op.binary.f32(i32 35, float %955, float 0.000000e+00)  ; FMax(a,b)
-  %1886 = call float @dx.op.binary.f32(i32 36, float %1885, float %1884)  ; FMin(a,b)
-  %1887 = fptoui float %1886 to i32
-  %1888 = uitofp i32 %1887 to float
-  %1889 = uitofp i32 %1882 to float
-  %1890 = fptoui float %45 to i32
-  %1891 = fptoui float %182 to i32
-  %1892 = fptoui float %1888 to i32
-  %1893 = fptoui float %1889 to i32
-  %1894 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1895 = extractvalue %dx.types.CBufRet.i32 %1894, 0
-  %1896 = extractvalue %dx.types.CBufRet.i32 %1894, 1
-  %1897 = extractvalue %dx.types.CBufRet.i32 %1894, 2
-  %1898 = extractvalue %dx.types.CBufRet.i32 %1894, 3
-  %1899 = mul i32 %1895, %1890
-  %1900 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1891, i32 %1896, i32 %1899)  ; IMad(a,b,c)
-  %1901 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1892, i32 %1897, i32 %1900)  ; IMad(a,b,c)
-  %1902 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1893, i32 %1898, i32 %1901)  ; IMad(a,b,c)
-  %1903 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1902, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1904 = extractvalue %dx.types.ResRet.i32 %1903, 0
-  %1905 = icmp ne i32 %1904, 0
-  %1906 = uitofp i1 %1905 to float
-  br label %1995
-
-; <label>:1907                                    ; preds = %1875
-  %1908 = icmp eq i32 %957, 2
-  br i1 %1908, label %1909, label %1995
-
-; <label>:1909                                    ; preds = %1907
-  %1910 = fsub fast float %22, %20
-  %1911 = fcmp fast olt float %1255, %20
-  br i1 %1911, label %1912, label %1925
-
-; <label>:1912                                    ; preds = %1909
-  %1913 = fsub fast float %20, %1255
-  %1914 = fdiv fast float %1913, %1910
-  %1915 = fptoui float %1914 to i32
-  %1916 = uitofp i32 %1915 to float
-  %1917 = fmul fast float %1916, %1910
-  %1918 = fsub fast float %1913, %1917
-  %1919 = and i32 %1915, 1
-  %1920 = icmp eq i32 %1919, 0
-  br i1 %1920, label %1921, label %1923
-
-; <label>:1921                                    ; preds = %1912
-  %1922 = fadd fast float %1918, %20
-  br label %1940
-
-; <label>:1923                                    ; preds = %1912
-  %1924 = fsub fast float %22, %1918
-  br label %1940
-
-; <label>:1925                                    ; preds = %1909
-  %1926 = fcmp fast ogt float %1255, %22
-  br i1 %1926, label %1927, label %1940
-
-; <label>:1927                                    ; preds = %1925
-  %1928 = fsub fast float %1255, %22
-  %1929 = fdiv fast float %1928, %1910
-  %1930 = fptoui float %1929 to i32
-  %1931 = uitofp i32 %1930 to float
-  %1932 = fmul fast float %1931, %1910
-  %1933 = fsub fast float %1928, %1932
-  %1934 = and i32 %1930, 1
-  %1935 = icmp eq i32 %1934, 0
-  br i1 %1935, label %1936, label %1938
-
-; <label>:1936                                    ; preds = %1927
-  %1937 = fsub fast float %22, %1933
-  br label %1940
-
-; <label>:1938                                    ; preds = %1927
-  %1939 = fadd fast float %1933, %20
-  br label %1940
-
-; <label>:1940                                    ; preds = %1938, %1936, %1925, %1923, %1921
-  %1941 = phi float [ %1922, %1921 ], [ %1924, %1923 ], [ %1937, %1936 ], [ %1939, %1938 ], [ %1255, %1925 ]
-  %1942 = fptoui float %1941 to i32
-  %1943 = fsub fast float %24, %20
-  %1944 = fcmp fast olt float %955, %20
-  br i1 %1944, label %1945, label %1958
-
-; <label>:1945                                    ; preds = %1940
-  %1946 = fsub fast float %20, %955
-  %1947 = fdiv fast float %1946, %1943
-  %1948 = fptoui float %1947 to i32
-  %1949 = uitofp i32 %1948 to float
-  %1950 = fmul fast float %1949, %1943
-  %1951 = fsub fast float %1946, %1950
-  %1952 = and i32 %1948, 1
-  %1953 = icmp eq i32 %1952, 0
-  br i1 %1953, label %1954, label %1956
-
-; <label>:1954                                    ; preds = %1945
-  %1955 = fadd fast float %1951, %20
-  br label %1973
-
-; <label>:1956                                    ; preds = %1945
-  %1957 = fsub fast float %24, %1951
-  br label %1973
-
-; <label>:1958                                    ; preds = %1940
-  %1959 = fcmp fast ogt float %955, %24
-  br i1 %1959, label %1960, label %1973
-
-; <label>:1960                                    ; preds = %1958
-  %1961 = fsub fast float %955, %24
-  %1962 = fdiv fast float %1961, %1943
-  %1963 = fptoui float %1962 to i32
-  %1964 = uitofp i32 %1963 to float
-  %1965 = fmul fast float %1964, %1943
-  %1966 = fsub fast float %1961, %1965
-  %1967 = and i32 %1963, 1
-  %1968 = icmp eq i32 %1967, 0
-  br i1 %1968, label %1969, label %1971
-
-; <label>:1969                                    ; preds = %1960
-  %1970 = fsub fast float %24, %1966
-  br label %1973
-
-; <label>:1971                                    ; preds = %1960
-  %1972 = fadd fast float %1966, %20
-  br label %1973
-
-; <label>:1973                                    ; preds = %1971, %1969, %1958, %1956, %1954
-  %1974 = phi float [ %1955, %1954 ], [ %1957, %1956 ], [ %1970, %1969 ], [ %1972, %1971 ], [ %955, %1958 ]
-  %1975 = fptoui float %1974 to i32
-  %1976 = uitofp i32 %1975 to float
-  %1977 = uitofp i32 %1942 to float
-  %1978 = fptoui float %45 to i32
-  %1979 = fptoui float %182 to i32
-  %1980 = fptoui float %1976 to i32
-  %1981 = fptoui float %1977 to i32
-  %1982 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1983 = extractvalue %dx.types.CBufRet.i32 %1982, 0
-  %1984 = extractvalue %dx.types.CBufRet.i32 %1982, 1
-  %1985 = extractvalue %dx.types.CBufRet.i32 %1982, 2
-  %1986 = extractvalue %dx.types.CBufRet.i32 %1982, 3
-  %1987 = mul i32 %1983, %1978
-  %1988 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1979, i32 %1984, i32 %1987)  ; IMad(a,b,c)
-  %1989 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1980, i32 %1985, i32 %1988)  ; IMad(a,b,c)
-  %1990 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1981, i32 %1986, i32 %1989)  ; IMad(a,b,c)
-  %1991 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1990, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1992 = extractvalue %dx.types.ResRet.i32 %1991, 0
-  %1993 = icmp ne i32 %1992, 0
-  %1994 = uitofp i1 %1993 to float
-  br label %1995
-
-; <label>:1995                                    ; preds = %1973, %1907, %1877, %1859, %1849
-  %1996 = phi float [ %1874, %1859 ], [ 0.000000e+00, %1849 ], [ %1906, %1877 ], [ %1994, %1973 ], [ 0.000000e+00, %1907 ]
-  br i1 %958, label %1997, label %2023
-
-; <label>:1997                                    ; preds = %1995
-  %1998 = fcmp fast oge float %1404, 0.000000e+00
-  %1999 = fptoui float %1404 to i32
-  %2000 = icmp ult i32 %1999, %13
-  %2001 = and i1 %1998, %2000
-  %2002 = fcmp fast oge float %955, 0.000000e+00
-  %2003 = and i1 %2002, %2001
-  %2004 = fptoui float %955 to i32
-  %2005 = icmp ult i32 %2004, %15
-  %2006 = and i1 %2005, %2003
-  br i1 %2006, label %2007, label %2143
-
-; <label>:2007                                    ; preds = %1997
-  %2008 = fptoui float %45 to i32
-  %2009 = fptoui float %182 to i32
-  %2010 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2011 = extractvalue %dx.types.CBufRet.i32 %2010, 0
-  %2012 = extractvalue %dx.types.CBufRet.i32 %2010, 1
-  %2013 = extractvalue %dx.types.CBufRet.i32 %2010, 2
-  %2014 = extractvalue %dx.types.CBufRet.i32 %2010, 3
-  %2015 = mul i32 %2011, %2008
-  %2016 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2009, i32 %2012, i32 %2015)  ; IMad(a,b,c)
-  %2017 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2004, i32 %2013, i32 %2016)  ; IMad(a,b,c)
-  %2018 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1999, i32 %2014, i32 %2017)  ; IMad(a,b,c)
-  %2019 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2018, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2020 = extractvalue %dx.types.ResRet.i32 %2019, 0
-  %2021 = icmp ne i32 %2020, 0
-  %2022 = uitofp i1 %2021 to float
-  br label %2143
-
-; <label>:2023                                    ; preds = %1995
-  %2024 = icmp eq i32 %957, 1
-  br i1 %2024, label %2025, label %2055
-
-; <label>:2025                                    ; preds = %2023
-  %2026 = add i32 %13, -1
-  %2027 = uitofp i32 %2026 to float
-  %2028 = call float @dx.op.binary.f32(i32 35, float %1404, float 0.000000e+00)  ; FMax(a,b)
-  %2029 = call float @dx.op.binary.f32(i32 36, float %2028, float %2027)  ; FMin(a,b)
-  %2030 = fptoui float %2029 to i32
-  %2031 = add i32 %15, -1
-  %2032 = uitofp i32 %2031 to float
-  %2033 = call float @dx.op.binary.f32(i32 35, float %955, float 0.000000e+00)  ; FMax(a,b)
-  %2034 = call float @dx.op.binary.f32(i32 36, float %2033, float %2032)  ; FMin(a,b)
-  %2035 = fptoui float %2034 to i32
-  %2036 = uitofp i32 %2035 to float
-  %2037 = uitofp i32 %2030 to float
-  %2038 = fptoui float %45 to i32
-  %2039 = fptoui float %182 to i32
-  %2040 = fptoui float %2036 to i32
-  %2041 = fptoui float %2037 to i32
-  %2042 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2043 = extractvalue %dx.types.CBufRet.i32 %2042, 0
-  %2044 = extractvalue %dx.types.CBufRet.i32 %2042, 1
-  %2045 = extractvalue %dx.types.CBufRet.i32 %2042, 2
-  %2046 = extractvalue %dx.types.CBufRet.i32 %2042, 3
-  %2047 = mul i32 %2043, %2038
-  %2048 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2039, i32 %2044, i32 %2047)  ; IMad(a,b,c)
-  %2049 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2040, i32 %2045, i32 %2048)  ; IMad(a,b,c)
-  %2050 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2041, i32 %2046, i32 %2049)  ; IMad(a,b,c)
-  %2051 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2050, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2052 = extractvalue %dx.types.ResRet.i32 %2051, 0
-  %2053 = icmp ne i32 %2052, 0
-  %2054 = uitofp i1 %2053 to float
-  br label %2143
-
-; <label>:2055                                    ; preds = %2023
-  %2056 = icmp eq i32 %957, 2
-  br i1 %2056, label %2057, label %2143
-
-; <label>:2057                                    ; preds = %2055
-  %2058 = fsub fast float %22, %20
-  %2059 = fcmp fast olt float %1404, %20
-  br i1 %2059, label %2060, label %2073
-
-; <label>:2060                                    ; preds = %2057
-  %2061 = fsub fast float %20, %1404
-  %2062 = fdiv fast float %2061, %2058
-  %2063 = fptoui float %2062 to i32
-  %2064 = uitofp i32 %2063 to float
-  %2065 = fmul fast float %2064, %2058
-  %2066 = fsub fast float %2061, %2065
-  %2067 = and i32 %2063, 1
-  %2068 = icmp eq i32 %2067, 0
-  br i1 %2068, label %2069, label %2071
-
-; <label>:2069                                    ; preds = %2060
-  %2070 = fadd fast float %2066, %20
-  br label %2088
-
-; <label>:2071                                    ; preds = %2060
-  %2072 = fsub fast float %22, %2066
-  br label %2088
-
-; <label>:2073                                    ; preds = %2057
-  %2074 = fcmp fast ogt float %1404, %22
-  br i1 %2074, label %2075, label %2088
-
-; <label>:2075                                    ; preds = %2073
-  %2076 = fsub fast float %1404, %22
-  %2077 = fdiv fast float %2076, %2058
-  %2078 = fptoui float %2077 to i32
-  %2079 = uitofp i32 %2078 to float
-  %2080 = fmul fast float %2079, %2058
-  %2081 = fsub fast float %2076, %2080
-  %2082 = and i32 %2078, 1
-  %2083 = icmp eq i32 %2082, 0
-  br i1 %2083, label %2084, label %2086
-
-; <label>:2084                                    ; preds = %2075
-  %2085 = fsub fast float %22, %2081
-  br label %2088
-
-; <label>:2086                                    ; preds = %2075
-  %2087 = fadd fast float %2081, %20
-  br label %2088
-
-; <label>:2088                                    ; preds = %2086, %2084, %2073, %2071, %2069
-  %2089 = phi float [ %2070, %2069 ], [ %2072, %2071 ], [ %2085, %2084 ], [ %2087, %2086 ], [ %1404, %2073 ]
-  %2090 = fptoui float %2089 to i32
-  %2091 = fsub fast float %24, %20
-  %2092 = fcmp fast olt float %955, %20
-  br i1 %2092, label %2093, label %2106
-
-; <label>:2093                                    ; preds = %2088
-  %2094 = fsub fast float %20, %955
-  %2095 = fdiv fast float %2094, %2091
-  %2096 = fptoui float %2095 to i32
-  %2097 = uitofp i32 %2096 to float
-  %2098 = fmul fast float %2097, %2091
-  %2099 = fsub fast float %2094, %2098
-  %2100 = and i32 %2096, 1
-  %2101 = icmp eq i32 %2100, 0
-  br i1 %2101, label %2102, label %2104
-
-; <label>:2102                                    ; preds = %2093
-  %2103 = fadd fast float %2099, %20
-  br label %2121
-
-; <label>:2104                                    ; preds = %2093
-  %2105 = fsub fast float %24, %2099
-  br label %2121
-
-; <label>:2106                                    ; preds = %2088
-  %2107 = fcmp fast ogt float %955, %24
-  br i1 %2107, label %2108, label %2121
-
-; <label>:2108                                    ; preds = %2106
-  %2109 = fsub fast float %955, %24
-  %2110 = fdiv fast float %2109, %2091
-  %2111 = fptoui float %2110 to i32
-  %2112 = uitofp i32 %2111 to float
-  %2113 = fmul fast float %2112, %2091
-  %2114 = fsub fast float %2109, %2113
-  %2115 = and i32 %2111, 1
-  %2116 = icmp eq i32 %2115, 0
-  br i1 %2116, label %2117, label %2119
-
-; <label>:2117                                    ; preds = %2108
-  %2118 = fsub fast float %24, %2114
-  br label %2121
-
-; <label>:2119                                    ; preds = %2108
-  %2120 = fadd fast float %2114, %20
-  br label %2121
-
-; <label>:2121                                    ; preds = %2119, %2117, %2106, %2104, %2102
-  %2122 = phi float [ %2103, %2102 ], [ %2105, %2104 ], [ %2118, %2117 ], [ %2120, %2119 ], [ %955, %2106 ]
-  %2123 = fptoui float %2122 to i32
-  %2124 = uitofp i32 %2123 to float
-  %2125 = uitofp i32 %2090 to float
-  %2126 = fptoui float %45 to i32
-  %2127 = fptoui float %182 to i32
-  %2128 = fptoui float %2124 to i32
-  %2129 = fptoui float %2125 to i32
-  %2130 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2131 = extractvalue %dx.types.CBufRet.i32 %2130, 0
-  %2132 = extractvalue %dx.types.CBufRet.i32 %2130, 1
-  %2133 = extractvalue %dx.types.CBufRet.i32 %2130, 2
-  %2134 = extractvalue %dx.types.CBufRet.i32 %2130, 3
-  %2135 = mul i32 %2131, %2126
-  %2136 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2127, i32 %2132, i32 %2135)  ; IMad(a,b,c)
-  %2137 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2128, i32 %2133, i32 %2136)  ; IMad(a,b,c)
-  %2138 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2129, i32 %2134, i32 %2137)  ; IMad(a,b,c)
-  %2139 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2138, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2140 = extractvalue %dx.types.ResRet.i32 %2139, 0
-  %2141 = icmp ne i32 %2140, 0
-  %2142 = uitofp i1 %2141 to float
-  br label %2143
-
-; <label>:2143                                    ; preds = %2121, %2055, %2025, %2007, %1997
-  %2144 = phi float [ %2022, %2007 ], [ 0.000000e+00, %1997 ], [ %2054, %2025 ], [ %2142, %2121 ], [ 0.000000e+00, %2055 ]
-  %2145 = fadd fast float %955, 1.000000e+00
-  br i1 %958, label %2146, label %2172
-
-; <label>:2146                                    ; preds = %2143
-  %2147 = fcmp fast oge float %954, 0.000000e+00
-  %2148 = fptoui float %954 to i32
-  %2149 = icmp ult i32 %2148, %13
-  %2150 = and i1 %2147, %2149
-  %2151 = fcmp fast oge float %2145, 0.000000e+00
-  %2152 = and i1 %2151, %2150
-  %2153 = fptoui float %2145 to i32
-  %2154 = icmp ult i32 %2153, %15
-  %2155 = and i1 %2154, %2152
-  br i1 %2155, label %2156, label %2292
-
-; <label>:2156                                    ; preds = %2146
-  %2157 = fptoui float %45 to i32
-  %2158 = fptoui float %182 to i32
-  %2159 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2160 = extractvalue %dx.types.CBufRet.i32 %2159, 0
-  %2161 = extractvalue %dx.types.CBufRet.i32 %2159, 1
-  %2162 = extractvalue %dx.types.CBufRet.i32 %2159, 2
-  %2163 = extractvalue %dx.types.CBufRet.i32 %2159, 3
-  %2164 = mul i32 %2160, %2157
-  %2165 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2158, i32 %2161, i32 %2164)  ; IMad(a,b,c)
-  %2166 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2153, i32 %2162, i32 %2165)  ; IMad(a,b,c)
-  %2167 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2148, i32 %2163, i32 %2166)  ; IMad(a,b,c)
-  %2168 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2167, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2169 = extractvalue %dx.types.ResRet.i32 %2168, 0
-  %2170 = icmp ne i32 %2169, 0
-  %2171 = uitofp i1 %2170 to float
-  br label %2292
-
-; <label>:2172                                    ; preds = %2143
-  %2173 = icmp eq i32 %957, 1
-  br i1 %2173, label %2174, label %2204
-
-; <label>:2174                                    ; preds = %2172
-  %2175 = add i32 %13, -1
-  %2176 = uitofp i32 %2175 to float
-  %2177 = call float @dx.op.binary.f32(i32 35, float %954, float 0.000000e+00)  ; FMax(a,b)
-  %2178 = call float @dx.op.binary.f32(i32 36, float %2177, float %2176)  ; FMin(a,b)
-  %2179 = fptoui float %2178 to i32
-  %2180 = add i32 %15, -1
-  %2181 = uitofp i32 %2180 to float
-  %2182 = call float @dx.op.binary.f32(i32 35, float %2145, float 0.000000e+00)  ; FMax(a,b)
-  %2183 = call float @dx.op.binary.f32(i32 36, float %2182, float %2181)  ; FMin(a,b)
-  %2184 = fptoui float %2183 to i32
-  %2185 = uitofp i32 %2184 to float
-  %2186 = uitofp i32 %2179 to float
-  %2187 = fptoui float %45 to i32
-  %2188 = fptoui float %182 to i32
-  %2189 = fptoui float %2185 to i32
-  %2190 = fptoui float %2186 to i32
-  %2191 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2192 = extractvalue %dx.types.CBufRet.i32 %2191, 0
-  %2193 = extractvalue %dx.types.CBufRet.i32 %2191, 1
-  %2194 = extractvalue %dx.types.CBufRet.i32 %2191, 2
-  %2195 = extractvalue %dx.types.CBufRet.i32 %2191, 3
-  %2196 = mul i32 %2192, %2187
-  %2197 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2188, i32 %2193, i32 %2196)  ; IMad(a,b,c)
-  %2198 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2189, i32 %2194, i32 %2197)  ; IMad(a,b,c)
-  %2199 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2190, i32 %2195, i32 %2198)  ; IMad(a,b,c)
-  %2200 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2199, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2201 = extractvalue %dx.types.ResRet.i32 %2200, 0
-  %2202 = icmp ne i32 %2201, 0
-  %2203 = uitofp i1 %2202 to float
-  br label %2292
-
-; <label>:2204                                    ; preds = %2172
-  %2205 = icmp eq i32 %957, 2
-  br i1 %2205, label %2206, label %2292
-
-; <label>:2206                                    ; preds = %2204
-  %2207 = fsub fast float %22, %20
-  %2208 = fcmp fast olt float %954, %20
-  br i1 %2208, label %2209, label %2222
-
-; <label>:2209                                    ; preds = %2206
-  %2210 = fsub fast float %20, %954
-  %2211 = fdiv fast float %2210, %2207
-  %2212 = fptoui float %2211 to i32
-  %2213 = uitofp i32 %2212 to float
-  %2214 = fmul fast float %2213, %2207
-  %2215 = fsub fast float %2210, %2214
-  %2216 = and i32 %2212, 1
-  %2217 = icmp eq i32 %2216, 0
-  br i1 %2217, label %2218, label %2220
-
-; <label>:2218                                    ; preds = %2209
-  %2219 = fadd fast float %2215, %20
-  br label %2237
-
-; <label>:2220                                    ; preds = %2209
-  %2221 = fsub fast float %22, %2215
-  br label %2237
-
-; <label>:2222                                    ; preds = %2206
-  %2223 = fcmp fast ogt float %954, %22
-  br i1 %2223, label %2224, label %2237
-
-; <label>:2224                                    ; preds = %2222
-  %2225 = fsub fast float %954, %22
-  %2226 = fdiv fast float %2225, %2207
-  %2227 = fptoui float %2226 to i32
-  %2228 = uitofp i32 %2227 to float
-  %2229 = fmul fast float %2228, %2207
-  %2230 = fsub fast float %2225, %2229
-  %2231 = and i32 %2227, 1
-  %2232 = icmp eq i32 %2231, 0
-  br i1 %2232, label %2233, label %2235
-
-; <label>:2233                                    ; preds = %2224
-  %2234 = fsub fast float %22, %2230
-  br label %2237
-
-; <label>:2235                                    ; preds = %2224
-  %2236 = fadd fast float %2230, %20
-  br label %2237
-
-; <label>:2237                                    ; preds = %2235, %2233, %2222, %2220, %2218
-  %2238 = phi float [ %2219, %2218 ], [ %2221, %2220 ], [ %2234, %2233 ], [ %2236, %2235 ], [ %954, %2222 ]
-  %2239 = fptoui float %2238 to i32
-  %2240 = fsub fast float %24, %20
-  %2241 = fcmp fast olt float %2145, %20
-  br i1 %2241, label %2242, label %2255
-
-; <label>:2242                                    ; preds = %2237
-  %2243 = fsub fast float %20, %2145
-  %2244 = fdiv fast float %2243, %2240
-  %2245 = fptoui float %2244 to i32
-  %2246 = uitofp i32 %2245 to float
-  %2247 = fmul fast float %2246, %2240
-  %2248 = fsub fast float %2243, %2247
-  %2249 = and i32 %2245, 1
-  %2250 = icmp eq i32 %2249, 0
-  br i1 %2250, label %2251, label %2253
-
-; <label>:2251                                    ; preds = %2242
-  %2252 = fadd fast float %2248, %20
-  br label %2270
-
-; <label>:2253                                    ; preds = %2242
-  %2254 = fsub fast float %24, %2248
-  br label %2270
-
-; <label>:2255                                    ; preds = %2237
-  %2256 = fcmp fast ogt float %2145, %24
-  br i1 %2256, label %2257, label %2270
-
-; <label>:2257                                    ; preds = %2255
-  %2258 = fsub fast float %2145, %24
-  %2259 = fdiv fast float %2258, %2240
-  %2260 = fptoui float %2259 to i32
-  %2261 = uitofp i32 %2260 to float
-  %2262 = fmul fast float %2261, %2240
-  %2263 = fsub fast float %2258, %2262
-  %2264 = and i32 %2260, 1
-  %2265 = icmp eq i32 %2264, 0
-  br i1 %2265, label %2266, label %2268
-
-; <label>:2266                                    ; preds = %2257
-  %2267 = fsub fast float %24, %2263
-  br label %2270
-
-; <label>:2268                                    ; preds = %2257
-  %2269 = fadd fast float %2263, %20
-  br label %2270
-
-; <label>:2270                                    ; preds = %2268, %2266, %2255, %2253, %2251
-  %2271 = phi float [ %2252, %2251 ], [ %2254, %2253 ], [ %2267, %2266 ], [ %2269, %2268 ], [ %2145, %2255 ]
-  %2272 = fptoui float %2271 to i32
-  %2273 = uitofp i32 %2272 to float
-  %2274 = uitofp i32 %2239 to float
-  %2275 = fptoui float %45 to i32
-  %2276 = fptoui float %182 to i32
-  %2277 = fptoui float %2273 to i32
-  %2278 = fptoui float %2274 to i32
-  %2279 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2280 = extractvalue %dx.types.CBufRet.i32 %2279, 0
-  %2281 = extractvalue %dx.types.CBufRet.i32 %2279, 1
-  %2282 = extractvalue %dx.types.CBufRet.i32 %2279, 2
-  %2283 = extractvalue %dx.types.CBufRet.i32 %2279, 3
-  %2284 = mul i32 %2280, %2275
-  %2285 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2276, i32 %2281, i32 %2284)  ; IMad(a,b,c)
-  %2286 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2277, i32 %2282, i32 %2285)  ; IMad(a,b,c)
-  %2287 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2278, i32 %2283, i32 %2286)  ; IMad(a,b,c)
-  %2288 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2287, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2289 = extractvalue %dx.types.ResRet.i32 %2288, 0
-  %2290 = icmp ne i32 %2289, 0
-  %2291 = uitofp i1 %2290 to float
-  br label %2292
-
-; <label>:2292                                    ; preds = %2270, %2204, %2174, %2156, %2146
-  %2293 = phi float [ %2171, %2156 ], [ 0.000000e+00, %2146 ], [ %2203, %2174 ], [ %2291, %2270 ], [ 0.000000e+00, %2204 ]
-  br i1 %958, label %2294, label %2320
-
-; <label>:2294                                    ; preds = %2292
-  %2295 = fcmp fast oge float %953, 0.000000e+00
-  %2296 = fptoui float %953 to i32
-  %2297 = icmp ult i32 %2296, %13
-  %2298 = and i1 %2295, %2297
-  %2299 = fcmp fast oge float %2145, 0.000000e+00
-  %2300 = and i1 %2299, %2298
-  %2301 = fptoui float %2145 to i32
-  %2302 = icmp ult i32 %2301, %15
-  %2303 = and i1 %2302, %2300
-  br i1 %2303, label %2304, label %2440
-
-; <label>:2304                                    ; preds = %2294
-  %2305 = fptoui float %45 to i32
-  %2306 = fptoui float %182 to i32
-  %2307 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2308 = extractvalue %dx.types.CBufRet.i32 %2307, 0
-  %2309 = extractvalue %dx.types.CBufRet.i32 %2307, 1
-  %2310 = extractvalue %dx.types.CBufRet.i32 %2307, 2
-  %2311 = extractvalue %dx.types.CBufRet.i32 %2307, 3
-  %2312 = mul i32 %2308, %2305
-  %2313 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2306, i32 %2309, i32 %2312)  ; IMad(a,b,c)
-  %2314 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2301, i32 %2310, i32 %2313)  ; IMad(a,b,c)
-  %2315 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2296, i32 %2311, i32 %2314)  ; IMad(a,b,c)
-  %2316 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2315, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2317 = extractvalue %dx.types.ResRet.i32 %2316, 0
-  %2318 = icmp ne i32 %2317, 0
-  %2319 = uitofp i1 %2318 to float
-  br label %2440
-
-; <label>:2320                                    ; preds = %2292
-  %2321 = icmp eq i32 %957, 1
-  br i1 %2321, label %2322, label %2352
-
-; <label>:2322                                    ; preds = %2320
-  %2323 = add i32 %13, -1
-  %2324 = uitofp i32 %2323 to float
-  %2325 = call float @dx.op.binary.f32(i32 35, float %953, float 0.000000e+00)  ; FMax(a,b)
-  %2326 = call float @dx.op.binary.f32(i32 36, float %2325, float %2324)  ; FMin(a,b)
-  %2327 = fptoui float %2326 to i32
-  %2328 = add i32 %15, -1
-  %2329 = uitofp i32 %2328 to float
-  %2330 = call float @dx.op.binary.f32(i32 35, float %2145, float 0.000000e+00)  ; FMax(a,b)
-  %2331 = call float @dx.op.binary.f32(i32 36, float %2330, float %2329)  ; FMin(a,b)
-  %2332 = fptoui float %2331 to i32
-  %2333 = uitofp i32 %2332 to float
-  %2334 = uitofp i32 %2327 to float
-  %2335 = fptoui float %45 to i32
-  %2336 = fptoui float %182 to i32
-  %2337 = fptoui float %2333 to i32
-  %2338 = fptoui float %2334 to i32
-  %2339 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2340 = extractvalue %dx.types.CBufRet.i32 %2339, 0
-  %2341 = extractvalue %dx.types.CBufRet.i32 %2339, 1
-  %2342 = extractvalue %dx.types.CBufRet.i32 %2339, 2
-  %2343 = extractvalue %dx.types.CBufRet.i32 %2339, 3
-  %2344 = mul i32 %2340, %2335
-  %2345 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2336, i32 %2341, i32 %2344)  ; IMad(a,b,c)
-  %2346 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2337, i32 %2342, i32 %2345)  ; IMad(a,b,c)
-  %2347 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2338, i32 %2343, i32 %2346)  ; IMad(a,b,c)
-  %2348 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2347, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2349 = extractvalue %dx.types.ResRet.i32 %2348, 0
-  %2350 = icmp ne i32 %2349, 0
-  %2351 = uitofp i1 %2350 to float
-  br label %2440
-
-; <label>:2352                                    ; preds = %2320
-  %2353 = icmp eq i32 %957, 2
-  br i1 %2353, label %2354, label %2440
-
-; <label>:2354                                    ; preds = %2352
-  %2355 = fsub fast float %22, %20
-  %2356 = fcmp fast olt float %953, %20
-  br i1 %2356, label %2357, label %2370
-
-; <label>:2357                                    ; preds = %2354
-  %2358 = fsub fast float %20, %953
-  %2359 = fdiv fast float %2358, %2355
-  %2360 = fptoui float %2359 to i32
-  %2361 = uitofp i32 %2360 to float
-  %2362 = fmul fast float %2361, %2355
-  %2363 = fsub fast float %2358, %2362
-  %2364 = and i32 %2360, 1
-  %2365 = icmp eq i32 %2364, 0
-  br i1 %2365, label %2366, label %2368
-
-; <label>:2366                                    ; preds = %2357
-  %2367 = fadd fast float %2363, %20
-  br label %2385
-
-; <label>:2368                                    ; preds = %2357
-  %2369 = fsub fast float %22, %2363
-  br label %2385
-
-; <label>:2370                                    ; preds = %2354
-  %2371 = fcmp fast ogt float %953, %22
-  br i1 %2371, label %2372, label %2385
-
-; <label>:2372                                    ; preds = %2370
-  %2373 = fsub fast float %953, %22
-  %2374 = fdiv fast float %2373, %2355
-  %2375 = fptoui float %2374 to i32
-  %2376 = uitofp i32 %2375 to float
-  %2377 = fmul fast float %2376, %2355
-  %2378 = fsub fast float %2373, %2377
-  %2379 = and i32 %2375, 1
-  %2380 = icmp eq i32 %2379, 0
-  br i1 %2380, label %2381, label %2383
-
-; <label>:2381                                    ; preds = %2372
-  %2382 = fsub fast float %22, %2378
-  br label %2385
-
-; <label>:2383                                    ; preds = %2372
-  %2384 = fadd fast float %2378, %20
-  br label %2385
-
-; <label>:2385                                    ; preds = %2383, %2381, %2370, %2368, %2366
-  %2386 = phi float [ %2367, %2366 ], [ %2369, %2368 ], [ %2382, %2381 ], [ %2384, %2383 ], [ %953, %2370 ]
-  %2387 = fptoui float %2386 to i32
-  %2388 = fsub fast float %24, %20
-  %2389 = fcmp fast olt float %2145, %20
-  br i1 %2389, label %2390, label %2403
-
-; <label>:2390                                    ; preds = %2385
-  %2391 = fsub fast float %20, %2145
-  %2392 = fdiv fast float %2391, %2388
-  %2393 = fptoui float %2392 to i32
-  %2394 = uitofp i32 %2393 to float
-  %2395 = fmul fast float %2394, %2388
-  %2396 = fsub fast float %2391, %2395
-  %2397 = and i32 %2393, 1
-  %2398 = icmp eq i32 %2397, 0
-  br i1 %2398, label %2399, label %2401
-
-; <label>:2399                                    ; preds = %2390
-  %2400 = fadd fast float %2396, %20
-  br label %2418
-
-; <label>:2401                                    ; preds = %2390
-  %2402 = fsub fast float %24, %2396
-  br label %2418
-
-; <label>:2403                                    ; preds = %2385
-  %2404 = fcmp fast ogt float %2145, %24
-  br i1 %2404, label %2405, label %2418
-
-; <label>:2405                                    ; preds = %2403
-  %2406 = fsub fast float %2145, %24
-  %2407 = fdiv fast float %2406, %2388
-  %2408 = fptoui float %2407 to i32
-  %2409 = uitofp i32 %2408 to float
-  %2410 = fmul fast float %2409, %2388
-  %2411 = fsub fast float %2406, %2410
-  %2412 = and i32 %2408, 1
-  %2413 = icmp eq i32 %2412, 0
-  br i1 %2413, label %2414, label %2416
-
-; <label>:2414                                    ; preds = %2405
-  %2415 = fsub fast float %24, %2411
-  br label %2418
-
-; <label>:2416                                    ; preds = %2405
-  %2417 = fadd fast float %2411, %20
-  br label %2418
-
-; <label>:2418                                    ; preds = %2416, %2414, %2403, %2401, %2399
-  %2419 = phi float [ %2400, %2399 ], [ %2402, %2401 ], [ %2415, %2414 ], [ %2417, %2416 ], [ %2145, %2403 ]
-  %2420 = fptoui float %2419 to i32
-  %2421 = uitofp i32 %2420 to float
-  %2422 = uitofp i32 %2387 to float
-  %2423 = fptoui float %45 to i32
-  %2424 = fptoui float %182 to i32
-  %2425 = fptoui float %2421 to i32
-  %2426 = fptoui float %2422 to i32
-  %2427 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2428 = extractvalue %dx.types.CBufRet.i32 %2427, 0
-  %2429 = extractvalue %dx.types.CBufRet.i32 %2427, 1
-  %2430 = extractvalue %dx.types.CBufRet.i32 %2427, 2
-  %2431 = extractvalue %dx.types.CBufRet.i32 %2427, 3
-  %2432 = mul i32 %2428, %2423
-  %2433 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2424, i32 %2429, i32 %2432)  ; IMad(a,b,c)
-  %2434 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2425, i32 %2430, i32 %2433)  ; IMad(a,b,c)
-  %2435 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2426, i32 %2431, i32 %2434)  ; IMad(a,b,c)
-  %2436 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2435, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2437 = extractvalue %dx.types.ResRet.i32 %2436, 0
-  %2438 = icmp ne i32 %2437, 0
-  %2439 = uitofp i1 %2438 to float
-  br label %2440
-
-; <label>:2440                                    ; preds = %2418, %2352, %2322, %2304, %2294
-  %2441 = phi float [ %2319, %2304 ], [ 0.000000e+00, %2294 ], [ %2351, %2322 ], [ %2439, %2418 ], [ 0.000000e+00, %2352 ]
-  br i1 %958, label %2442, label %2468
-
-; <label>:2442                                    ; preds = %2440
-  %2443 = fcmp fast oge float %1255, 0.000000e+00
-  %2444 = fptoui float %1255 to i32
-  %2445 = icmp ult i32 %2444, %13
-  %2446 = and i1 %2443, %2445
-  %2447 = fcmp fast oge float %2145, 0.000000e+00
-  %2448 = and i1 %2447, %2446
-  %2449 = fptoui float %2145 to i32
-  %2450 = icmp ult i32 %2449, %15
-  %2451 = and i1 %2450, %2448
-  br i1 %2451, label %2452, label %2588
-
-; <label>:2452                                    ; preds = %2442
-  %2453 = fptoui float %45 to i32
-  %2454 = fptoui float %182 to i32
-  %2455 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2456 = extractvalue %dx.types.CBufRet.i32 %2455, 0
-  %2457 = extractvalue %dx.types.CBufRet.i32 %2455, 1
-  %2458 = extractvalue %dx.types.CBufRet.i32 %2455, 2
-  %2459 = extractvalue %dx.types.CBufRet.i32 %2455, 3
-  %2460 = mul i32 %2456, %2453
-  %2461 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2454, i32 %2457, i32 %2460)  ; IMad(a,b,c)
-  %2462 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2449, i32 %2458, i32 %2461)  ; IMad(a,b,c)
-  %2463 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2444, i32 %2459, i32 %2462)  ; IMad(a,b,c)
-  %2464 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2463, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2465 = extractvalue %dx.types.ResRet.i32 %2464, 0
-  %2466 = icmp ne i32 %2465, 0
-  %2467 = uitofp i1 %2466 to float
-  br label %2588
-
-; <label>:2468                                    ; preds = %2440
-  %2469 = icmp eq i32 %957, 1
-  br i1 %2469, label %2470, label %2500
-
-; <label>:2470                                    ; preds = %2468
-  %2471 = add i32 %13, -1
-  %2472 = uitofp i32 %2471 to float
-  %2473 = call float @dx.op.binary.f32(i32 35, float %1255, float 0.000000e+00)  ; FMax(a,b)
-  %2474 = call float @dx.op.binary.f32(i32 36, float %2473, float %2472)  ; FMin(a,b)
-  %2475 = fptoui float %2474 to i32
-  %2476 = add i32 %15, -1
-  %2477 = uitofp i32 %2476 to float
-  %2478 = call float @dx.op.binary.f32(i32 35, float %2145, float 0.000000e+00)  ; FMax(a,b)
-  %2479 = call float @dx.op.binary.f32(i32 36, float %2478, float %2477)  ; FMin(a,b)
-  %2480 = fptoui float %2479 to i32
-  %2481 = uitofp i32 %2480 to float
-  %2482 = uitofp i32 %2475 to float
-  %2483 = fptoui float %45 to i32
-  %2484 = fptoui float %182 to i32
-  %2485 = fptoui float %2481 to i32
-  %2486 = fptoui float %2482 to i32
-  %2487 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2488 = extractvalue %dx.types.CBufRet.i32 %2487, 0
-  %2489 = extractvalue %dx.types.CBufRet.i32 %2487, 1
-  %2490 = extractvalue %dx.types.CBufRet.i32 %2487, 2
-  %2491 = extractvalue %dx.types.CBufRet.i32 %2487, 3
-  %2492 = mul i32 %2488, %2483
-  %2493 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2484, i32 %2489, i32 %2492)  ; IMad(a,b,c)
-  %2494 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2485, i32 %2490, i32 %2493)  ; IMad(a,b,c)
-  %2495 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2486, i32 %2491, i32 %2494)  ; IMad(a,b,c)
-  %2496 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2495, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2497 = extractvalue %dx.types.ResRet.i32 %2496, 0
-  %2498 = icmp ne i32 %2497, 0
-  %2499 = uitofp i1 %2498 to float
-  br label %2588
-
-; <label>:2500                                    ; preds = %2468
-  %2501 = icmp eq i32 %957, 2
-  br i1 %2501, label %2502, label %2588
-
-; <label>:2502                                    ; preds = %2500
-  %2503 = fsub fast float %22, %20
-  %2504 = fcmp fast olt float %1255, %20
-  br i1 %2504, label %2505, label %2518
-
-; <label>:2505                                    ; preds = %2502
-  %2506 = fsub fast float %20, %1255
-  %2507 = fdiv fast float %2506, %2503
-  %2508 = fptoui float %2507 to i32
-  %2509 = uitofp i32 %2508 to float
-  %2510 = fmul fast float %2509, %2503
-  %2511 = fsub fast float %2506, %2510
-  %2512 = and i32 %2508, 1
-  %2513 = icmp eq i32 %2512, 0
-  br i1 %2513, label %2514, label %2516
-
-; <label>:2514                                    ; preds = %2505
-  %2515 = fadd fast float %2511, %20
-  br label %2533
-
-; <label>:2516                                    ; preds = %2505
-  %2517 = fsub fast float %22, %2511
-  br label %2533
-
-; <label>:2518                                    ; preds = %2502
-  %2519 = fcmp fast ogt float %1255, %22
-  br i1 %2519, label %2520, label %2533
-
-; <label>:2520                                    ; preds = %2518
-  %2521 = fsub fast float %1255, %22
-  %2522 = fdiv fast float %2521, %2503
-  %2523 = fptoui float %2522 to i32
-  %2524 = uitofp i32 %2523 to float
-  %2525 = fmul fast float %2524, %2503
-  %2526 = fsub fast float %2521, %2525
-  %2527 = and i32 %2523, 1
-  %2528 = icmp eq i32 %2527, 0
-  br i1 %2528, label %2529, label %2531
-
-; <label>:2529                                    ; preds = %2520
-  %2530 = fsub fast float %22, %2526
-  br label %2533
-
-; <label>:2531                                    ; preds = %2520
-  %2532 = fadd fast float %2526, %20
-  br label %2533
-
-; <label>:2533                                    ; preds = %2531, %2529, %2518, %2516, %2514
-  %2534 = phi float [ %2515, %2514 ], [ %2517, %2516 ], [ %2530, %2529 ], [ %2532, %2531 ], [ %1255, %2518 ]
-  %2535 = fptoui float %2534 to i32
-  %2536 = fsub fast float %24, %20
-  %2537 = fcmp fast olt float %2145, %20
-  br i1 %2537, label %2538, label %2551
-
-; <label>:2538                                    ; preds = %2533
-  %2539 = fsub fast float %20, %2145
-  %2540 = fdiv fast float %2539, %2536
-  %2541 = fptoui float %2540 to i32
-  %2542 = uitofp i32 %2541 to float
-  %2543 = fmul fast float %2542, %2536
-  %2544 = fsub fast float %2539, %2543
-  %2545 = and i32 %2541, 1
-  %2546 = icmp eq i32 %2545, 0
-  br i1 %2546, label %2547, label %2549
-
-; <label>:2547                                    ; preds = %2538
-  %2548 = fadd fast float %2544, %20
-  br label %2566
-
-; <label>:2549                                    ; preds = %2538
-  %2550 = fsub fast float %24, %2544
-  br label %2566
-
-; <label>:2551                                    ; preds = %2533
-  %2552 = fcmp fast ogt float %2145, %24
-  br i1 %2552, label %2553, label %2566
-
-; <label>:2553                                    ; preds = %2551
-  %2554 = fsub fast float %2145, %24
-  %2555 = fdiv fast float %2554, %2536
-  %2556 = fptoui float %2555 to i32
-  %2557 = uitofp i32 %2556 to float
-  %2558 = fmul fast float %2557, %2536
-  %2559 = fsub fast float %2554, %2558
-  %2560 = and i32 %2556, 1
-  %2561 = icmp eq i32 %2560, 0
-  br i1 %2561, label %2562, label %2564
-
-; <label>:2562                                    ; preds = %2553
-  %2563 = fsub fast float %24, %2559
-  br label %2566
-
-; <label>:2564                                    ; preds = %2553
-  %2565 = fadd fast float %2559, %20
-  br label %2566
-
-; <label>:2566                                    ; preds = %2564, %2562, %2551, %2549, %2547
-  %2567 = phi float [ %2548, %2547 ], [ %2550, %2549 ], [ %2563, %2562 ], [ %2565, %2564 ], [ %2145, %2551 ]
-  %2568 = fptoui float %2567 to i32
-  %2569 = uitofp i32 %2568 to float
-  %2570 = uitofp i32 %2535 to float
-  %2571 = fptoui float %45 to i32
-  %2572 = fptoui float %182 to i32
-  %2573 = fptoui float %2569 to i32
-  %2574 = fptoui float %2570 to i32
-  %2575 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2576 = extractvalue %dx.types.CBufRet.i32 %2575, 0
-  %2577 = extractvalue %dx.types.CBufRet.i32 %2575, 1
-  %2578 = extractvalue %dx.types.CBufRet.i32 %2575, 2
-  %2579 = extractvalue %dx.types.CBufRet.i32 %2575, 3
-  %2580 = mul i32 %2576, %2571
-  %2581 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2572, i32 %2577, i32 %2580)  ; IMad(a,b,c)
-  %2582 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2573, i32 %2578, i32 %2581)  ; IMad(a,b,c)
-  %2583 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2574, i32 %2579, i32 %2582)  ; IMad(a,b,c)
-  %2584 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2583, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2585 = extractvalue %dx.types.ResRet.i32 %2584, 0
-  %2586 = icmp ne i32 %2585, 0
-  %2587 = uitofp i1 %2586 to float
-  br label %2588
-
-; <label>:2588                                    ; preds = %2566, %2500, %2470, %2452, %2442
-  %2589 = phi float [ %2467, %2452 ], [ 0.000000e+00, %2442 ], [ %2499, %2470 ], [ %2587, %2566 ], [ 0.000000e+00, %2500 ]
-  br i1 %958, label %2590, label %2616
-
-; <label>:2590                                    ; preds = %2588
-  %2591 = fcmp fast oge float %1404, 0.000000e+00
-  %2592 = fptoui float %1404 to i32
-  %2593 = icmp ult i32 %2592, %13
-  %2594 = and i1 %2591, %2593
-  %2595 = fcmp fast oge float %2145, 0.000000e+00
-  %2596 = and i1 %2595, %2594
-  %2597 = fptoui float %2145 to i32
-  %2598 = icmp ult i32 %2597, %15
-  %2599 = and i1 %2598, %2596
-  br i1 %2599, label %2600, label %2736
-
-; <label>:2600                                    ; preds = %2590
-  %2601 = fptoui float %45 to i32
-  %2602 = fptoui float %182 to i32
-  %2603 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2604 = extractvalue %dx.types.CBufRet.i32 %2603, 0
-  %2605 = extractvalue %dx.types.CBufRet.i32 %2603, 1
-  %2606 = extractvalue %dx.types.CBufRet.i32 %2603, 2
-  %2607 = extractvalue %dx.types.CBufRet.i32 %2603, 3
-  %2608 = mul i32 %2604, %2601
-  %2609 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2602, i32 %2605, i32 %2608)  ; IMad(a,b,c)
-  %2610 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2597, i32 %2606, i32 %2609)  ; IMad(a,b,c)
-  %2611 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2592, i32 %2607, i32 %2610)  ; IMad(a,b,c)
-  %2612 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2611, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2613 = extractvalue %dx.types.ResRet.i32 %2612, 0
-  %2614 = icmp ne i32 %2613, 0
-  %2615 = uitofp i1 %2614 to float
-  br label %2736
-
-; <label>:2616                                    ; preds = %2588
-  %2617 = icmp eq i32 %957, 1
-  br i1 %2617, label %2618, label %2648
-
-; <label>:2618                                    ; preds = %2616
-  %2619 = add i32 %13, -1
-  %2620 = uitofp i32 %2619 to float
-  %2621 = call float @dx.op.binary.f32(i32 35, float %1404, float 0.000000e+00)  ; FMax(a,b)
-  %2622 = call float @dx.op.binary.f32(i32 36, float %2621, float %2620)  ; FMin(a,b)
-  %2623 = fptoui float %2622 to i32
-  %2624 = add i32 %15, -1
-  %2625 = uitofp i32 %2624 to float
-  %2626 = call float @dx.op.binary.f32(i32 35, float %2145, float 0.000000e+00)  ; FMax(a,b)
-  %2627 = call float @dx.op.binary.f32(i32 36, float %2626, float %2625)  ; FMin(a,b)
-  %2628 = fptoui float %2627 to i32
-  %2629 = uitofp i32 %2628 to float
-  %2630 = uitofp i32 %2623 to float
-  %2631 = fptoui float %45 to i32
-  %2632 = fptoui float %182 to i32
-  %2633 = fptoui float %2629 to i32
-  %2634 = fptoui float %2630 to i32
-  %2635 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2636 = extractvalue %dx.types.CBufRet.i32 %2635, 0
-  %2637 = extractvalue %dx.types.CBufRet.i32 %2635, 1
-  %2638 = extractvalue %dx.types.CBufRet.i32 %2635, 2
-  %2639 = extractvalue %dx.types.CBufRet.i32 %2635, 3
-  %2640 = mul i32 %2636, %2631
-  %2641 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2632, i32 %2637, i32 %2640)  ; IMad(a,b,c)
-  %2642 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2633, i32 %2638, i32 %2641)  ; IMad(a,b,c)
-  %2643 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2634, i32 %2639, i32 %2642)  ; IMad(a,b,c)
-  %2644 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2643, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2645 = extractvalue %dx.types.ResRet.i32 %2644, 0
-  %2646 = icmp ne i32 %2645, 0
-  %2647 = uitofp i1 %2646 to float
-  br label %2736
-
-; <label>:2648                                    ; preds = %2616
-  %2649 = icmp eq i32 %957, 2
-  br i1 %2649, label %2650, label %2736
-
-; <label>:2650                                    ; preds = %2648
-  %2651 = fsub fast float %22, %20
-  %2652 = fcmp fast olt float %1404, %20
-  br i1 %2652, label %2653, label %2666
-
-; <label>:2653                                    ; preds = %2650
-  %2654 = fsub fast float %20, %1404
-  %2655 = fdiv fast float %2654, %2651
-  %2656 = fptoui float %2655 to i32
-  %2657 = uitofp i32 %2656 to float
-  %2658 = fmul fast float %2657, %2651
-  %2659 = fsub fast float %2654, %2658
-  %2660 = and i32 %2656, 1
-  %2661 = icmp eq i32 %2660, 0
-  br i1 %2661, label %2662, label %2664
-
-; <label>:2662                                    ; preds = %2653
-  %2663 = fadd fast float %2659, %20
-  br label %2681
-
-; <label>:2664                                    ; preds = %2653
-  %2665 = fsub fast float %22, %2659
-  br label %2681
-
-; <label>:2666                                    ; preds = %2650
-  %2667 = fcmp fast ogt float %1404, %22
-  br i1 %2667, label %2668, label %2681
-
-; <label>:2668                                    ; preds = %2666
-  %2669 = fsub fast float %1404, %22
-  %2670 = fdiv fast float %2669, %2651
-  %2671 = fptoui float %2670 to i32
-  %2672 = uitofp i32 %2671 to float
-  %2673 = fmul fast float %2672, %2651
-  %2674 = fsub fast float %2669, %2673
-  %2675 = and i32 %2671, 1
-  %2676 = icmp eq i32 %2675, 0
-  br i1 %2676, label %2677, label %2679
-
-; <label>:2677                                    ; preds = %2668
-  %2678 = fsub fast float %22, %2674
-  br label %2681
-
-; <label>:2679                                    ; preds = %2668
-  %2680 = fadd fast float %2674, %20
-  br label %2681
-
-; <label>:2681                                    ; preds = %2679, %2677, %2666, %2664, %2662
-  %2682 = phi float [ %2663, %2662 ], [ %2665, %2664 ], [ %2678, %2677 ], [ %2680, %2679 ], [ %1404, %2666 ]
-  %2683 = fptoui float %2682 to i32
-  %2684 = fsub fast float %24, %20
-  %2685 = fcmp fast olt float %2145, %20
-  br i1 %2685, label %2686, label %2699
-
-; <label>:2686                                    ; preds = %2681
-  %2687 = fsub fast float %20, %2145
-  %2688 = fdiv fast float %2687, %2684
-  %2689 = fptoui float %2688 to i32
-  %2690 = uitofp i32 %2689 to float
-  %2691 = fmul fast float %2690, %2684
-  %2692 = fsub fast float %2687, %2691
-  %2693 = and i32 %2689, 1
-  %2694 = icmp eq i32 %2693, 0
-  br i1 %2694, label %2695, label %2697
-
-; <label>:2695                                    ; preds = %2686
-  %2696 = fadd fast float %2692, %20
-  br label %2714
-
-; <label>:2697                                    ; preds = %2686
-  %2698 = fsub fast float %24, %2692
-  br label %2714
-
-; <label>:2699                                    ; preds = %2681
-  %2700 = fcmp fast ogt float %2145, %24
-  br i1 %2700, label %2701, label %2714
-
-; <label>:2701                                    ; preds = %2699
-  %2702 = fsub fast float %2145, %24
-  %2703 = fdiv fast float %2702, %2684
-  %2704 = fptoui float %2703 to i32
-  %2705 = uitofp i32 %2704 to float
-  %2706 = fmul fast float %2705, %2684
-  %2707 = fsub fast float %2702, %2706
-  %2708 = and i32 %2704, 1
-  %2709 = icmp eq i32 %2708, 0
-  br i1 %2709, label %2710, label %2712
-
-; <label>:2710                                    ; preds = %2701
-  %2711 = fsub fast float %24, %2707
-  br label %2714
-
-; <label>:2712                                    ; preds = %2701
-  %2713 = fadd fast float %2707, %20
-  br label %2714
-
-; <label>:2714                                    ; preds = %2712, %2710, %2699, %2697, %2695
-  %2715 = phi float [ %2696, %2695 ], [ %2698, %2697 ], [ %2711, %2710 ], [ %2713, %2712 ], [ %2145, %2699 ]
-  %2716 = fptoui float %2715 to i32
-  %2717 = uitofp i32 %2716 to float
-  %2718 = uitofp i32 %2683 to float
-  %2719 = fptoui float %45 to i32
-  %2720 = fptoui float %182 to i32
-  %2721 = fptoui float %2717 to i32
-  %2722 = fptoui float %2718 to i32
-  %2723 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2724 = extractvalue %dx.types.CBufRet.i32 %2723, 0
-  %2725 = extractvalue %dx.types.CBufRet.i32 %2723, 1
-  %2726 = extractvalue %dx.types.CBufRet.i32 %2723, 2
-  %2727 = extractvalue %dx.types.CBufRet.i32 %2723, 3
-  %2728 = mul i32 %2724, %2719
-  %2729 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2720, i32 %2725, i32 %2728)  ; IMad(a,b,c)
-  %2730 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2721, i32 %2726, i32 %2729)  ; IMad(a,b,c)
-  %2731 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2722, i32 %2727, i32 %2730)  ; IMad(a,b,c)
-  %2732 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2731, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2733 = extractvalue %dx.types.ResRet.i32 %2732, 0
-  %2734 = icmp ne i32 %2733, 0
-  %2735 = uitofp i1 %2734 to float
-  br label %2736
-
-; <label>:2736                                    ; preds = %2714, %2648, %2618, %2600, %2590
-  %2737 = phi float [ %2615, %2600 ], [ 0.000000e+00, %2590 ], [ %2647, %2618 ], [ %2735, %2714 ], [ 0.000000e+00, %2648 ]
-  %2738 = fadd fast float %955, 2.000000e+00
-  br i1 %958, label %2739, label %2765
-
-; <label>:2739                                    ; preds = %2736
-  %2740 = fcmp fast oge float %954, 0.000000e+00
-  %2741 = fptoui float %954 to i32
-  %2742 = icmp ult i32 %2741, %13
-  %2743 = and i1 %2740, %2742
-  %2744 = fcmp fast oge float %2738, 0.000000e+00
-  %2745 = and i1 %2744, %2743
-  %2746 = fptoui float %2738 to i32
-  %2747 = icmp ult i32 %2746, %15
-  %2748 = and i1 %2747, %2745
-  br i1 %2748, label %2749, label %2885
-
-; <label>:2749                                    ; preds = %2739
-  %2750 = fptoui float %45 to i32
-  %2751 = fptoui float %182 to i32
-  %2752 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2753 = extractvalue %dx.types.CBufRet.i32 %2752, 0
-  %2754 = extractvalue %dx.types.CBufRet.i32 %2752, 1
-  %2755 = extractvalue %dx.types.CBufRet.i32 %2752, 2
-  %2756 = extractvalue %dx.types.CBufRet.i32 %2752, 3
-  %2757 = mul i32 %2753, %2750
-  %2758 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2751, i32 %2754, i32 %2757)  ; IMad(a,b,c)
-  %2759 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2746, i32 %2755, i32 %2758)  ; IMad(a,b,c)
-  %2760 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2741, i32 %2756, i32 %2759)  ; IMad(a,b,c)
-  %2761 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2760, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2762 = extractvalue %dx.types.ResRet.i32 %2761, 0
-  %2763 = icmp ne i32 %2762, 0
-  %2764 = uitofp i1 %2763 to float
-  br label %2885
-
-; <label>:2765                                    ; preds = %2736
-  %2766 = icmp eq i32 %957, 1
-  br i1 %2766, label %2767, label %2797
-
-; <label>:2767                                    ; preds = %2765
-  %2768 = add i32 %13, -1
-  %2769 = uitofp i32 %2768 to float
-  %2770 = call float @dx.op.binary.f32(i32 35, float %954, float 0.000000e+00)  ; FMax(a,b)
-  %2771 = call float @dx.op.binary.f32(i32 36, float %2770, float %2769)  ; FMin(a,b)
-  %2772 = fptoui float %2771 to i32
-  %2773 = add i32 %15, -1
-  %2774 = uitofp i32 %2773 to float
-  %2775 = call float @dx.op.binary.f32(i32 35, float %2738, float 0.000000e+00)  ; FMax(a,b)
-  %2776 = call float @dx.op.binary.f32(i32 36, float %2775, float %2774)  ; FMin(a,b)
-  %2777 = fptoui float %2776 to i32
-  %2778 = uitofp i32 %2777 to float
-  %2779 = uitofp i32 %2772 to float
-  %2780 = fptoui float %45 to i32
-  %2781 = fptoui float %182 to i32
-  %2782 = fptoui float %2778 to i32
-  %2783 = fptoui float %2779 to i32
-  %2784 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2785 = extractvalue %dx.types.CBufRet.i32 %2784, 0
-  %2786 = extractvalue %dx.types.CBufRet.i32 %2784, 1
-  %2787 = extractvalue %dx.types.CBufRet.i32 %2784, 2
-  %2788 = extractvalue %dx.types.CBufRet.i32 %2784, 3
-  %2789 = mul i32 %2785, %2780
-  %2790 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2781, i32 %2786, i32 %2789)  ; IMad(a,b,c)
-  %2791 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2782, i32 %2787, i32 %2790)  ; IMad(a,b,c)
-  %2792 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2783, i32 %2788, i32 %2791)  ; IMad(a,b,c)
-  %2793 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2792, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2794 = extractvalue %dx.types.ResRet.i32 %2793, 0
-  %2795 = icmp ne i32 %2794, 0
-  %2796 = uitofp i1 %2795 to float
-  br label %2885
-
-; <label>:2797                                    ; preds = %2765
-  %2798 = icmp eq i32 %957, 2
-  br i1 %2798, label %2799, label %2885
-
-; <label>:2799                                    ; preds = %2797
-  %2800 = fsub fast float %22, %20
-  %2801 = fcmp fast olt float %954, %20
-  br i1 %2801, label %2802, label %2815
-
-; <label>:2802                                    ; preds = %2799
-  %2803 = fsub fast float %20, %954
-  %2804 = fdiv fast float %2803, %2800
-  %2805 = fptoui float %2804 to i32
-  %2806 = uitofp i32 %2805 to float
-  %2807 = fmul fast float %2806, %2800
-  %2808 = fsub fast float %2803, %2807
-  %2809 = and i32 %2805, 1
-  %2810 = icmp eq i32 %2809, 0
-  br i1 %2810, label %2811, label %2813
-
-; <label>:2811                                    ; preds = %2802
-  %2812 = fadd fast float %2808, %20
-  br label %2830
-
-; <label>:2813                                    ; preds = %2802
-  %2814 = fsub fast float %22, %2808
-  br label %2830
-
-; <label>:2815                                    ; preds = %2799
-  %2816 = fcmp fast ogt float %954, %22
-  br i1 %2816, label %2817, label %2830
-
-; <label>:2817                                    ; preds = %2815
-  %2818 = fsub fast float %954, %22
-  %2819 = fdiv fast float %2818, %2800
-  %2820 = fptoui float %2819 to i32
-  %2821 = uitofp i32 %2820 to float
-  %2822 = fmul fast float %2821, %2800
-  %2823 = fsub fast float %2818, %2822
-  %2824 = and i32 %2820, 1
-  %2825 = icmp eq i32 %2824, 0
-  br i1 %2825, label %2826, label %2828
-
-; <label>:2826                                    ; preds = %2817
-  %2827 = fsub fast float %22, %2823
-  br label %2830
-
-; <label>:2828                                    ; preds = %2817
-  %2829 = fadd fast float %2823, %20
-  br label %2830
-
-; <label>:2830                                    ; preds = %2828, %2826, %2815, %2813, %2811
-  %2831 = phi float [ %2812, %2811 ], [ %2814, %2813 ], [ %2827, %2826 ], [ %2829, %2828 ], [ %954, %2815 ]
-  %2832 = fptoui float %2831 to i32
-  %2833 = fsub fast float %24, %20
-  %2834 = fcmp fast olt float %2738, %20
-  br i1 %2834, label %2835, label %2848
-
-; <label>:2835                                    ; preds = %2830
-  %2836 = fsub fast float %20, %2738
-  %2837 = fdiv fast float %2836, %2833
-  %2838 = fptoui float %2837 to i32
-  %2839 = uitofp i32 %2838 to float
-  %2840 = fmul fast float %2839, %2833
-  %2841 = fsub fast float %2836, %2840
-  %2842 = and i32 %2838, 1
-  %2843 = icmp eq i32 %2842, 0
-  br i1 %2843, label %2844, label %2846
-
-; <label>:2844                                    ; preds = %2835
-  %2845 = fadd fast float %2841, %20
-  br label %2863
-
-; <label>:2846                                    ; preds = %2835
-  %2847 = fsub fast float %24, %2841
-  br label %2863
-
-; <label>:2848                                    ; preds = %2830
-  %2849 = fcmp fast ogt float %2738, %24
-  br i1 %2849, label %2850, label %2863
-
-; <label>:2850                                    ; preds = %2848
-  %2851 = fsub fast float %2738, %24
-  %2852 = fdiv fast float %2851, %2833
-  %2853 = fptoui float %2852 to i32
-  %2854 = uitofp i32 %2853 to float
-  %2855 = fmul fast float %2854, %2833
-  %2856 = fsub fast float %2851, %2855
-  %2857 = and i32 %2853, 1
-  %2858 = icmp eq i32 %2857, 0
-  br i1 %2858, label %2859, label %2861
-
-; <label>:2859                                    ; preds = %2850
-  %2860 = fsub fast float %24, %2856
-  br label %2863
-
-; <label>:2861                                    ; preds = %2850
-  %2862 = fadd fast float %2856, %20
-  br label %2863
-
-; <label>:2863                                    ; preds = %2861, %2859, %2848, %2846, %2844
-  %2864 = phi float [ %2845, %2844 ], [ %2847, %2846 ], [ %2860, %2859 ], [ %2862, %2861 ], [ %2738, %2848 ]
-  %2865 = fptoui float %2864 to i32
-  %2866 = uitofp i32 %2865 to float
-  %2867 = uitofp i32 %2832 to float
-  %2868 = fptoui float %45 to i32
-  %2869 = fptoui float %182 to i32
-  %2870 = fptoui float %2866 to i32
-  %2871 = fptoui float %2867 to i32
-  %2872 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2873 = extractvalue %dx.types.CBufRet.i32 %2872, 0
-  %2874 = extractvalue %dx.types.CBufRet.i32 %2872, 1
-  %2875 = extractvalue %dx.types.CBufRet.i32 %2872, 2
-  %2876 = extractvalue %dx.types.CBufRet.i32 %2872, 3
-  %2877 = mul i32 %2873, %2868
-  %2878 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2869, i32 %2874, i32 %2877)  ; IMad(a,b,c)
-  %2879 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2870, i32 %2875, i32 %2878)  ; IMad(a,b,c)
-  %2880 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2871, i32 %2876, i32 %2879)  ; IMad(a,b,c)
-  %2881 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2880, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2882 = extractvalue %dx.types.ResRet.i32 %2881, 0
-  %2883 = icmp ne i32 %2882, 0
-  %2884 = uitofp i1 %2883 to float
-  br label %2885
-
-; <label>:2885                                    ; preds = %2863, %2797, %2767, %2749, %2739
-  %2886 = phi float [ %2764, %2749 ], [ 0.000000e+00, %2739 ], [ %2796, %2767 ], [ %2884, %2863 ], [ 0.000000e+00, %2797 ]
-  br i1 %958, label %2887, label %2913
-
-; <label>:2887                                    ; preds = %2885
-  %2888 = fcmp fast oge float %953, 0.000000e+00
-  %2889 = fptoui float %953 to i32
-  %2890 = icmp ult i32 %2889, %13
-  %2891 = and i1 %2888, %2890
-  %2892 = fcmp fast oge float %2738, 0.000000e+00
-  %2893 = and i1 %2892, %2891
-  %2894 = fptoui float %2738 to i32
-  %2895 = icmp ult i32 %2894, %15
-  %2896 = and i1 %2895, %2893
-  br i1 %2896, label %2897, label %3033
-
-; <label>:2897                                    ; preds = %2887
-  %2898 = fptoui float %45 to i32
-  %2899 = fptoui float %182 to i32
-  %2900 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2901 = extractvalue %dx.types.CBufRet.i32 %2900, 0
-  %2902 = extractvalue %dx.types.CBufRet.i32 %2900, 1
-  %2903 = extractvalue %dx.types.CBufRet.i32 %2900, 2
-  %2904 = extractvalue %dx.types.CBufRet.i32 %2900, 3
-  %2905 = mul i32 %2901, %2898
-  %2906 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2899, i32 %2902, i32 %2905)  ; IMad(a,b,c)
-  %2907 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2894, i32 %2903, i32 %2906)  ; IMad(a,b,c)
-  %2908 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2889, i32 %2904, i32 %2907)  ; IMad(a,b,c)
-  %2909 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2908, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2910 = extractvalue %dx.types.ResRet.i32 %2909, 0
-  %2911 = icmp ne i32 %2910, 0
-  %2912 = uitofp i1 %2911 to float
-  br label %3033
-
-; <label>:2913                                    ; preds = %2885
-  %2914 = icmp eq i32 %957, 1
-  br i1 %2914, label %2915, label %2945
-
-; <label>:2915                                    ; preds = %2913
-  %2916 = add i32 %13, -1
-  %2917 = uitofp i32 %2916 to float
-  %2918 = call float @dx.op.binary.f32(i32 35, float %953, float 0.000000e+00)  ; FMax(a,b)
-  %2919 = call float @dx.op.binary.f32(i32 36, float %2918, float %2917)  ; FMin(a,b)
-  %2920 = fptoui float %2919 to i32
-  %2921 = add i32 %15, -1
-  %2922 = uitofp i32 %2921 to float
-  %2923 = call float @dx.op.binary.f32(i32 35, float %2738, float 0.000000e+00)  ; FMax(a,b)
-  %2924 = call float @dx.op.binary.f32(i32 36, float %2923, float %2922)  ; FMin(a,b)
-  %2925 = fptoui float %2924 to i32
-  %2926 = uitofp i32 %2925 to float
-  %2927 = uitofp i32 %2920 to float
-  %2928 = fptoui float %45 to i32
-  %2929 = fptoui float %182 to i32
-  %2930 = fptoui float %2926 to i32
-  %2931 = fptoui float %2927 to i32
-  %2932 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2933 = extractvalue %dx.types.CBufRet.i32 %2932, 0
-  %2934 = extractvalue %dx.types.CBufRet.i32 %2932, 1
-  %2935 = extractvalue %dx.types.CBufRet.i32 %2932, 2
-  %2936 = extractvalue %dx.types.CBufRet.i32 %2932, 3
-  %2937 = mul i32 %2933, %2928
-  %2938 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2929, i32 %2934, i32 %2937)  ; IMad(a,b,c)
-  %2939 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2930, i32 %2935, i32 %2938)  ; IMad(a,b,c)
-  %2940 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2931, i32 %2936, i32 %2939)  ; IMad(a,b,c)
-  %2941 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2940, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2942 = extractvalue %dx.types.ResRet.i32 %2941, 0
-  %2943 = icmp ne i32 %2942, 0
-  %2944 = uitofp i1 %2943 to float
-  br label %3033
-
-; <label>:2945                                    ; preds = %2913
-  %2946 = icmp eq i32 %957, 2
-  br i1 %2946, label %2947, label %3033
-
-; <label>:2947                                    ; preds = %2945
-  %2948 = fsub fast float %22, %20
-  %2949 = fcmp fast olt float %953, %20
-  br i1 %2949, label %2950, label %2963
-
-; <label>:2950                                    ; preds = %2947
-  %2951 = fsub fast float %20, %953
-  %2952 = fdiv fast float %2951, %2948
-  %2953 = fptoui float %2952 to i32
-  %2954 = uitofp i32 %2953 to float
-  %2955 = fmul fast float %2954, %2948
-  %2956 = fsub fast float %2951, %2955
-  %2957 = and i32 %2953, 1
-  %2958 = icmp eq i32 %2957, 0
-  br i1 %2958, label %2959, label %2961
-
-; <label>:2959                                    ; preds = %2950
-  %2960 = fadd fast float %2956, %20
-  br label %2978
-
-; <label>:2961                                    ; preds = %2950
-  %2962 = fsub fast float %22, %2956
-  br label %2978
-
-; <label>:2963                                    ; preds = %2947
-  %2964 = fcmp fast ogt float %953, %22
-  br i1 %2964, label %2965, label %2978
-
-; <label>:2965                                    ; preds = %2963
-  %2966 = fsub fast float %953, %22
-  %2967 = fdiv fast float %2966, %2948
-  %2968 = fptoui float %2967 to i32
-  %2969 = uitofp i32 %2968 to float
-  %2970 = fmul fast float %2969, %2948
-  %2971 = fsub fast float %2966, %2970
-  %2972 = and i32 %2968, 1
-  %2973 = icmp eq i32 %2972, 0
-  br i1 %2973, label %2974, label %2976
-
-; <label>:2974                                    ; preds = %2965
-  %2975 = fsub fast float %22, %2971
-  br label %2978
-
-; <label>:2976                                    ; preds = %2965
-  %2977 = fadd fast float %2971, %20
-  br label %2978
-
-; <label>:2978                                    ; preds = %2976, %2974, %2963, %2961, %2959
-  %2979 = phi float [ %2960, %2959 ], [ %2962, %2961 ], [ %2975, %2974 ], [ %2977, %2976 ], [ %953, %2963 ]
-  %2980 = fptoui float %2979 to i32
-  %2981 = fsub fast float %24, %20
-  %2982 = fcmp fast olt float %2738, %20
-  br i1 %2982, label %2983, label %2996
-
-; <label>:2983                                    ; preds = %2978
-  %2984 = fsub fast float %20, %2738
-  %2985 = fdiv fast float %2984, %2981
-  %2986 = fptoui float %2985 to i32
-  %2987 = uitofp i32 %2986 to float
-  %2988 = fmul fast float %2987, %2981
-  %2989 = fsub fast float %2984, %2988
-  %2990 = and i32 %2986, 1
-  %2991 = icmp eq i32 %2990, 0
-  br i1 %2991, label %2992, label %2994
-
-; <label>:2992                                    ; preds = %2983
-  %2993 = fadd fast float %2989, %20
-  br label %3011
-
-; <label>:2994                                    ; preds = %2983
-  %2995 = fsub fast float %24, %2989
-  br label %3011
-
-; <label>:2996                                    ; preds = %2978
-  %2997 = fcmp fast ogt float %2738, %24
-  br i1 %2997, label %2998, label %3011
-
-; <label>:2998                                    ; preds = %2996
-  %2999 = fsub fast float %2738, %24
-  %3000 = fdiv fast float %2999, %2981
-  %3001 = fptoui float %3000 to i32
-  %3002 = uitofp i32 %3001 to float
-  %3003 = fmul fast float %3002, %2981
-  %3004 = fsub fast float %2999, %3003
-  %3005 = and i32 %3001, 1
-  %3006 = icmp eq i32 %3005, 0
-  br i1 %3006, label %3007, label %3009
-
-; <label>:3007                                    ; preds = %2998
-  %3008 = fsub fast float %24, %3004
-  br label %3011
-
-; <label>:3009                                    ; preds = %2998
-  %3010 = fadd fast float %3004, %20
-  br label %3011
-
-; <label>:3011                                    ; preds = %3009, %3007, %2996, %2994, %2992
-  %3012 = phi float [ %2993, %2992 ], [ %2995, %2994 ], [ %3008, %3007 ], [ %3010, %3009 ], [ %2738, %2996 ]
-  %3013 = fptoui float %3012 to i32
-  %3014 = uitofp i32 %3013 to float
-  %3015 = uitofp i32 %2980 to float
-  %3016 = fptoui float %45 to i32
-  %3017 = fptoui float %182 to i32
-  %3018 = fptoui float %3014 to i32
-  %3019 = fptoui float %3015 to i32
-  %3020 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3021 = extractvalue %dx.types.CBufRet.i32 %3020, 0
-  %3022 = extractvalue %dx.types.CBufRet.i32 %3020, 1
-  %3023 = extractvalue %dx.types.CBufRet.i32 %3020, 2
-  %3024 = extractvalue %dx.types.CBufRet.i32 %3020, 3
-  %3025 = mul i32 %3021, %3016
-  %3026 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3017, i32 %3022, i32 %3025)  ; IMad(a,b,c)
-  %3027 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3018, i32 %3023, i32 %3026)  ; IMad(a,b,c)
-  %3028 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3019, i32 %3024, i32 %3027)  ; IMad(a,b,c)
-  %3029 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3028, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3030 = extractvalue %dx.types.ResRet.i32 %3029, 0
-  %3031 = icmp ne i32 %3030, 0
-  %3032 = uitofp i1 %3031 to float
-  br label %3033
-
-; <label>:3033                                    ; preds = %3011, %2945, %2915, %2897, %2887
-  %3034 = phi float [ %2912, %2897 ], [ 0.000000e+00, %2887 ], [ %2944, %2915 ], [ %3032, %3011 ], [ 0.000000e+00, %2945 ]
-  br i1 %958, label %3035, label %3061
-
-; <label>:3035                                    ; preds = %3033
-  %3036 = fcmp fast oge float %1255, 0.000000e+00
-  %3037 = fptoui float %1255 to i32
-  %3038 = icmp ult i32 %3037, %13
-  %3039 = and i1 %3036, %3038
-  %3040 = fcmp fast oge float %2738, 0.000000e+00
-  %3041 = and i1 %3040, %3039
-  %3042 = fptoui float %2738 to i32
-  %3043 = icmp ult i32 %3042, %15
-  %3044 = and i1 %3043, %3041
-  br i1 %3044, label %3045, label %3181
-
-; <label>:3045                                    ; preds = %3035
-  %3046 = fptoui float %45 to i32
-  %3047 = fptoui float %182 to i32
-  %3048 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3049 = extractvalue %dx.types.CBufRet.i32 %3048, 0
-  %3050 = extractvalue %dx.types.CBufRet.i32 %3048, 1
-  %3051 = extractvalue %dx.types.CBufRet.i32 %3048, 2
-  %3052 = extractvalue %dx.types.CBufRet.i32 %3048, 3
-  %3053 = mul i32 %3049, %3046
-  %3054 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3047, i32 %3050, i32 %3053)  ; IMad(a,b,c)
-  %3055 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3042, i32 %3051, i32 %3054)  ; IMad(a,b,c)
-  %3056 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3037, i32 %3052, i32 %3055)  ; IMad(a,b,c)
-  %3057 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3056, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3058 = extractvalue %dx.types.ResRet.i32 %3057, 0
-  %3059 = icmp ne i32 %3058, 0
-  %3060 = uitofp i1 %3059 to float
-  br label %3181
-
-; <label>:3061                                    ; preds = %3033
-  %3062 = icmp eq i32 %957, 1
-  br i1 %3062, label %3063, label %3093
-
-; <label>:3063                                    ; preds = %3061
-  %3064 = add i32 %13, -1
-  %3065 = uitofp i32 %3064 to float
-  %3066 = call float @dx.op.binary.f32(i32 35, float %1255, float 0.000000e+00)  ; FMax(a,b)
-  %3067 = call float @dx.op.binary.f32(i32 36, float %3066, float %3065)  ; FMin(a,b)
-  %3068 = fptoui float %3067 to i32
-  %3069 = add i32 %15, -1
-  %3070 = uitofp i32 %3069 to float
-  %3071 = call float @dx.op.binary.f32(i32 35, float %2738, float 0.000000e+00)  ; FMax(a,b)
-  %3072 = call float @dx.op.binary.f32(i32 36, float %3071, float %3070)  ; FMin(a,b)
-  %3073 = fptoui float %3072 to i32
-  %3074 = uitofp i32 %3073 to float
-  %3075 = uitofp i32 %3068 to float
-  %3076 = fptoui float %45 to i32
-  %3077 = fptoui float %182 to i32
-  %3078 = fptoui float %3074 to i32
-  %3079 = fptoui float %3075 to i32
-  %3080 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3081 = extractvalue %dx.types.CBufRet.i32 %3080, 0
-  %3082 = extractvalue %dx.types.CBufRet.i32 %3080, 1
-  %3083 = extractvalue %dx.types.CBufRet.i32 %3080, 2
-  %3084 = extractvalue %dx.types.CBufRet.i32 %3080, 3
-  %3085 = mul i32 %3081, %3076
-  %3086 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3077, i32 %3082, i32 %3085)  ; IMad(a,b,c)
-  %3087 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3078, i32 %3083, i32 %3086)  ; IMad(a,b,c)
-  %3088 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3079, i32 %3084, i32 %3087)  ; IMad(a,b,c)
-  %3089 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3088, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3090 = extractvalue %dx.types.ResRet.i32 %3089, 0
-  %3091 = icmp ne i32 %3090, 0
-  %3092 = uitofp i1 %3091 to float
-  br label %3181
-
-; <label>:3093                                    ; preds = %3061
-  %3094 = icmp eq i32 %957, 2
-  br i1 %3094, label %3095, label %3181
-
-; <label>:3095                                    ; preds = %3093
-  %3096 = fsub fast float %22, %20
-  %3097 = fcmp fast olt float %1255, %20
-  br i1 %3097, label %3098, label %3111
-
-; <label>:3098                                    ; preds = %3095
-  %3099 = fsub fast float %20, %1255
-  %3100 = fdiv fast float %3099, %3096
-  %3101 = fptoui float %3100 to i32
-  %3102 = uitofp i32 %3101 to float
-  %3103 = fmul fast float %3102, %3096
-  %3104 = fsub fast float %3099, %3103
-  %3105 = and i32 %3101, 1
-  %3106 = icmp eq i32 %3105, 0
-  br i1 %3106, label %3107, label %3109
-
-; <label>:3107                                    ; preds = %3098
-  %3108 = fadd fast float %3104, %20
-  br label %3126
-
-; <label>:3109                                    ; preds = %3098
-  %3110 = fsub fast float %22, %3104
-  br label %3126
-
-; <label>:3111                                    ; preds = %3095
-  %3112 = fcmp fast ogt float %1255, %22
-  br i1 %3112, label %3113, label %3126
-
-; <label>:3113                                    ; preds = %3111
-  %3114 = fsub fast float %1255, %22
-  %3115 = fdiv fast float %3114, %3096
-  %3116 = fptoui float %3115 to i32
-  %3117 = uitofp i32 %3116 to float
-  %3118 = fmul fast float %3117, %3096
-  %3119 = fsub fast float %3114, %3118
-  %3120 = and i32 %3116, 1
-  %3121 = icmp eq i32 %3120, 0
-  br i1 %3121, label %3122, label %3124
-
-; <label>:3122                                    ; preds = %3113
-  %3123 = fsub fast float %22, %3119
-  br label %3126
-
-; <label>:3124                                    ; preds = %3113
-  %3125 = fadd fast float %3119, %20
-  br label %3126
-
-; <label>:3126                                    ; preds = %3124, %3122, %3111, %3109, %3107
-  %3127 = phi float [ %3108, %3107 ], [ %3110, %3109 ], [ %3123, %3122 ], [ %3125, %3124 ], [ %1255, %3111 ]
-  %3128 = fptoui float %3127 to i32
-  %3129 = fsub fast float %24, %20
-  %3130 = fcmp fast olt float %2738, %20
-  br i1 %3130, label %3131, label %3144
-
-; <label>:3131                                    ; preds = %3126
-  %3132 = fsub fast float %20, %2738
-  %3133 = fdiv fast float %3132, %3129
-  %3134 = fptoui float %3133 to i32
-  %3135 = uitofp i32 %3134 to float
-  %3136 = fmul fast float %3135, %3129
-  %3137 = fsub fast float %3132, %3136
-  %3138 = and i32 %3134, 1
-  %3139 = icmp eq i32 %3138, 0
-  br i1 %3139, label %3140, label %3142
-
-; <label>:3140                                    ; preds = %3131
-  %3141 = fadd fast float %3137, %20
-  br label %3159
-
-; <label>:3142                                    ; preds = %3131
-  %3143 = fsub fast float %24, %3137
-  br label %3159
-
-; <label>:3144                                    ; preds = %3126
-  %3145 = fcmp fast ogt float %2738, %24
-  br i1 %3145, label %3146, label %3159
-
-; <label>:3146                                    ; preds = %3144
-  %3147 = fsub fast float %2738, %24
-  %3148 = fdiv fast float %3147, %3129
-  %3149 = fptoui float %3148 to i32
-  %3150 = uitofp i32 %3149 to float
-  %3151 = fmul fast float %3150, %3129
-  %3152 = fsub fast float %3147, %3151
-  %3153 = and i32 %3149, 1
-  %3154 = icmp eq i32 %3153, 0
-  br i1 %3154, label %3155, label %3157
-
-; <label>:3155                                    ; preds = %3146
-  %3156 = fsub fast float %24, %3152
-  br label %3159
-
-; <label>:3157                                    ; preds = %3146
-  %3158 = fadd fast float %3152, %20
-  br label %3159
-
-; <label>:3159                                    ; preds = %3157, %3155, %3144, %3142, %3140
-  %3160 = phi float [ %3141, %3140 ], [ %3143, %3142 ], [ %3156, %3155 ], [ %3158, %3157 ], [ %2738, %3144 ]
-  %3161 = fptoui float %3160 to i32
-  %3162 = uitofp i32 %3161 to float
-  %3163 = uitofp i32 %3128 to float
-  %3164 = fptoui float %45 to i32
-  %3165 = fptoui float %182 to i32
-  %3166 = fptoui float %3162 to i32
-  %3167 = fptoui float %3163 to i32
-  %3168 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3169 = extractvalue %dx.types.CBufRet.i32 %3168, 0
-  %3170 = extractvalue %dx.types.CBufRet.i32 %3168, 1
-  %3171 = extractvalue %dx.types.CBufRet.i32 %3168, 2
-  %3172 = extractvalue %dx.types.CBufRet.i32 %3168, 3
-  %3173 = mul i32 %3169, %3164
-  %3174 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3165, i32 %3170, i32 %3173)  ; IMad(a,b,c)
-  %3175 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3166, i32 %3171, i32 %3174)  ; IMad(a,b,c)
-  %3176 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3167, i32 %3172, i32 %3175)  ; IMad(a,b,c)
-  %3177 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3176, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3178 = extractvalue %dx.types.ResRet.i32 %3177, 0
-  %3179 = icmp ne i32 %3178, 0
-  %3180 = uitofp i1 %3179 to float
-  br label %3181
-
-; <label>:3181                                    ; preds = %3159, %3093, %3063, %3045, %3035
-  %3182 = phi float [ %3060, %3045 ], [ 0.000000e+00, %3035 ], [ %3092, %3063 ], [ %3180, %3159 ], [ 0.000000e+00, %3093 ]
-  br i1 %958, label %3183, label %3209
-
-; <label>:3183                                    ; preds = %3181
-  %3184 = fcmp fast oge float %1404, 0.000000e+00
-  %3185 = fptoui float %1404 to i32
-  %3186 = icmp ult i32 %3185, %13
-  %3187 = and i1 %3184, %3186
-  %3188 = fcmp fast oge float %2738, 0.000000e+00
-  %3189 = and i1 %3188, %3187
-  %3190 = fptoui float %2738 to i32
-  %3191 = icmp ult i32 %3190, %15
-  %3192 = and i1 %3191, %3189
-  br i1 %3192, label %3193, label %3329
-
-; <label>:3193                                    ; preds = %3183
-  %3194 = fptoui float %45 to i32
-  %3195 = fptoui float %182 to i32
-  %3196 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3197 = extractvalue %dx.types.CBufRet.i32 %3196, 0
-  %3198 = extractvalue %dx.types.CBufRet.i32 %3196, 1
-  %3199 = extractvalue %dx.types.CBufRet.i32 %3196, 2
-  %3200 = extractvalue %dx.types.CBufRet.i32 %3196, 3
-  %3201 = mul i32 %3197, %3194
-  %3202 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3195, i32 %3198, i32 %3201)  ; IMad(a,b,c)
-  %3203 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3190, i32 %3199, i32 %3202)  ; IMad(a,b,c)
-  %3204 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3185, i32 %3200, i32 %3203)  ; IMad(a,b,c)
-  %3205 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3204, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3206 = extractvalue %dx.types.ResRet.i32 %3205, 0
-  %3207 = icmp ne i32 %3206, 0
-  %3208 = uitofp i1 %3207 to float
-  br label %3329
-
-; <label>:3209                                    ; preds = %3181
-  %3210 = icmp eq i32 %957, 1
-  br i1 %3210, label %3211, label %3241
-
-; <label>:3211                                    ; preds = %3209
-  %3212 = add i32 %13, -1
-  %3213 = uitofp i32 %3212 to float
-  %3214 = call float @dx.op.binary.f32(i32 35, float %1404, float 0.000000e+00)  ; FMax(a,b)
-  %3215 = call float @dx.op.binary.f32(i32 36, float %3214, float %3213)  ; FMin(a,b)
-  %3216 = fptoui float %3215 to i32
-  %3217 = add i32 %15, -1
-  %3218 = uitofp i32 %3217 to float
-  %3219 = call float @dx.op.binary.f32(i32 35, float %2738, float 0.000000e+00)  ; FMax(a,b)
-  %3220 = call float @dx.op.binary.f32(i32 36, float %3219, float %3218)  ; FMin(a,b)
-  %3221 = fptoui float %3220 to i32
-  %3222 = uitofp i32 %3221 to float
-  %3223 = uitofp i32 %3216 to float
-  %3224 = fptoui float %45 to i32
-  %3225 = fptoui float %182 to i32
-  %3226 = fptoui float %3222 to i32
-  %3227 = fptoui float %3223 to i32
-  %3228 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3229 = extractvalue %dx.types.CBufRet.i32 %3228, 0
-  %3230 = extractvalue %dx.types.CBufRet.i32 %3228, 1
-  %3231 = extractvalue %dx.types.CBufRet.i32 %3228, 2
-  %3232 = extractvalue %dx.types.CBufRet.i32 %3228, 3
-  %3233 = mul i32 %3229, %3224
-  %3234 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3225, i32 %3230, i32 %3233)  ; IMad(a,b,c)
-  %3235 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3226, i32 %3231, i32 %3234)  ; IMad(a,b,c)
-  %3236 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3227, i32 %3232, i32 %3235)  ; IMad(a,b,c)
-  %3237 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3236, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3238 = extractvalue %dx.types.ResRet.i32 %3237, 0
-  %3239 = icmp ne i32 %3238, 0
-  %3240 = uitofp i1 %3239 to float
-  br label %3329
-
-; <label>:3241                                    ; preds = %3209
-  %3242 = icmp eq i32 %957, 2
-  br i1 %3242, label %3243, label %3329
-
-; <label>:3243                                    ; preds = %3241
-  %3244 = fsub fast float %22, %20
-  %3245 = fcmp fast olt float %1404, %20
-  br i1 %3245, label %3246, label %3259
-
-; <label>:3246                                    ; preds = %3243
-  %3247 = fsub fast float %20, %1404
-  %3248 = fdiv fast float %3247, %3244
-  %3249 = fptoui float %3248 to i32
-  %3250 = uitofp i32 %3249 to float
-  %3251 = fmul fast float %3250, %3244
-  %3252 = fsub fast float %3247, %3251
-  %3253 = and i32 %3249, 1
-  %3254 = icmp eq i32 %3253, 0
-  br i1 %3254, label %3255, label %3257
-
-; <label>:3255                                    ; preds = %3246
-  %3256 = fadd fast float %3252, %20
-  br label %3274
-
-; <label>:3257                                    ; preds = %3246
-  %3258 = fsub fast float %22, %3252
-  br label %3274
-
-; <label>:3259                                    ; preds = %3243
-  %3260 = fcmp fast ogt float %1404, %22
-  br i1 %3260, label %3261, label %3274
-
-; <label>:3261                                    ; preds = %3259
-  %3262 = fsub fast float %1404, %22
-  %3263 = fdiv fast float %3262, %3244
-  %3264 = fptoui float %3263 to i32
-  %3265 = uitofp i32 %3264 to float
-  %3266 = fmul fast float %3265, %3244
-  %3267 = fsub fast float %3262, %3266
-  %3268 = and i32 %3264, 1
-  %3269 = icmp eq i32 %3268, 0
-  br i1 %3269, label %3270, label %3272
-
-; <label>:3270                                    ; preds = %3261
-  %3271 = fsub fast float %22, %3267
-  br label %3274
-
-; <label>:3272                                    ; preds = %3261
-  %3273 = fadd fast float %3267, %20
-  br label %3274
-
-; <label>:3274                                    ; preds = %3272, %3270, %3259, %3257, %3255
-  %3275 = phi float [ %3256, %3255 ], [ %3258, %3257 ], [ %3271, %3270 ], [ %3273, %3272 ], [ %1404, %3259 ]
-  %3276 = fptoui float %3275 to i32
-  %3277 = fsub fast float %24, %20
-  %3278 = fcmp fast olt float %2738, %20
-  br i1 %3278, label %3279, label %3292
-
-; <label>:3279                                    ; preds = %3274
-  %3280 = fsub fast float %20, %2738
-  %3281 = fdiv fast float %3280, %3277
-  %3282 = fptoui float %3281 to i32
-  %3283 = uitofp i32 %3282 to float
-  %3284 = fmul fast float %3283, %3277
-  %3285 = fsub fast float %3280, %3284
-  %3286 = and i32 %3282, 1
-  %3287 = icmp eq i32 %3286, 0
-  br i1 %3287, label %3288, label %3290
-
-; <label>:3288                                    ; preds = %3279
-  %3289 = fadd fast float %3285, %20
-  br label %3307
-
-; <label>:3290                                    ; preds = %3279
-  %3291 = fsub fast float %24, %3285
-  br label %3307
-
-; <label>:3292                                    ; preds = %3274
-  %3293 = fcmp fast ogt float %2738, %24
-  br i1 %3293, label %3294, label %3307
-
-; <label>:3294                                    ; preds = %3292
-  %3295 = fsub fast float %2738, %24
-  %3296 = fdiv fast float %3295, %3277
-  %3297 = fptoui float %3296 to i32
-  %3298 = uitofp i32 %3297 to float
-  %3299 = fmul fast float %3298, %3277
-  %3300 = fsub fast float %3295, %3299
-  %3301 = and i32 %3297, 1
-  %3302 = icmp eq i32 %3301, 0
-  br i1 %3302, label %3303, label %3305
-
-; <label>:3303                                    ; preds = %3294
-  %3304 = fsub fast float %24, %3300
-  br label %3307
-
-; <label>:3305                                    ; preds = %3294
-  %3306 = fadd fast float %3300, %20
-  br label %3307
-
-; <label>:3307                                    ; preds = %3305, %3303, %3292, %3290, %3288
-  %3308 = phi float [ %3289, %3288 ], [ %3291, %3290 ], [ %3304, %3303 ], [ %3306, %3305 ], [ %2738, %3292 ]
-  %3309 = fptoui float %3308 to i32
-  %3310 = uitofp i32 %3309 to float
-  %3311 = uitofp i32 %3276 to float
-  %3312 = fptoui float %45 to i32
-  %3313 = fptoui float %182 to i32
-  %3314 = fptoui float %3310 to i32
-  %3315 = fptoui float %3311 to i32
-  %3316 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3317 = extractvalue %dx.types.CBufRet.i32 %3316, 0
-  %3318 = extractvalue %dx.types.CBufRet.i32 %3316, 1
-  %3319 = extractvalue %dx.types.CBufRet.i32 %3316, 2
-  %3320 = extractvalue %dx.types.CBufRet.i32 %3316, 3
-  %3321 = mul i32 %3317, %3312
-  %3322 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3313, i32 %3318, i32 %3321)  ; IMad(a,b,c)
-  %3323 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3314, i32 %3319, i32 %3322)  ; IMad(a,b,c)
-  %3324 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3315, i32 %3320, i32 %3323)  ; IMad(a,b,c)
-  %3325 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3324, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3326 = extractvalue %dx.types.ResRet.i32 %3325, 0
-  %3327 = icmp ne i32 %3326, 0
-  %3328 = uitofp i1 %3327 to float
-  br label %3329
-
-; <label>:3329                                    ; preds = %3307, %3241, %3211, %3193, %3183
-  %3330 = phi float [ %3208, %3193 ], [ 0.000000e+00, %3183 ], [ %3240, %3211 ], [ %3328, %3307 ], [ 0.000000e+00, %3241 ]
-  %3331 = call float @dx.op.unary.f32(i32 22, float %180)  ; Frc(value)
-  %3332 = call float @dx.op.unary.f32(i32 22, float %181)  ; Frc(value)
-  %3333 = fmul fast float %3332, %3332
-  %3334 = fmul fast float %3333, %3332
-  %3335 = fmul fast float %1106, -7.500000e-01
-  %3336 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2293, float %3335)  ; FMad(a,b,c)
-  %3337 = fmul fast float %1106, 1.500000e+00
-  %3338 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1700, float %3337)  ; FMad(a,b,c)
-  %3339 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2293, float %3338)  ; FMad(a,b,c)
-  %3340 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %2886, float %3339)  ; FMad(a,b,c)
-  %3341 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1700, float %3335)  ; FMad(a,b,c)
-  %3342 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2293, float %3341)  ; FMad(a,b,c)
-  %3343 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2886, float %3342)  ; FMad(a,b,c)
-  %3344 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3332, float %3333, float %3334, float %1700, float %3336, float %3340, float %3343)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3345 = fmul fast float %1254, -7.500000e-01
-  %3346 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2441, float %3345)  ; FMad(a,b,c)
-  %3347 = fmul fast float %1254, 1.500000e+00
-  %3348 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1848, float %3347)  ; FMad(a,b,c)
-  %3349 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2441, float %3348)  ; FMad(a,b,c)
-  %3350 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3034, float %3349)  ; FMad(a,b,c)
-  %3351 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1848, float %3345)  ; FMad(a,b,c)
-  %3352 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2441, float %3351)  ; FMad(a,b,c)
-  %3353 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3034, float %3352)  ; FMad(a,b,c)
-  %3354 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3332, float %3333, float %3334, float %1848, float %3346, float %3350, float %3353)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3355 = fmul fast float %1403, -7.500000e-01
-  %3356 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2589, float %3355)  ; FMad(a,b,c)
-  %3357 = fmul fast float %1403, 1.500000e+00
-  %3358 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1996, float %3357)  ; FMad(a,b,c)
-  %3359 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2589, float %3358)  ; FMad(a,b,c)
-  %3360 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3182, float %3359)  ; FMad(a,b,c)
-  %3361 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1996, float %3355)  ; FMad(a,b,c)
-  %3362 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2589, float %3361)  ; FMad(a,b,c)
-  %3363 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3182, float %3362)  ; FMad(a,b,c)
-  %3364 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3332, float %3333, float %3334, float %1996, float %3356, float %3360, float %3363)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3365 = fmul fast float %1552, -7.500000e-01
-  %3366 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2737, float %3365)  ; FMad(a,b,c)
-  %3367 = fmul fast float %1552, 1.500000e+00
-  %3368 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %2144, float %3367)  ; FMad(a,b,c)
-  %3369 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2737, float %3368)  ; FMad(a,b,c)
-  %3370 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3330, float %3369)  ; FMad(a,b,c)
-  %3371 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %2144, float %3365)  ; FMad(a,b,c)
-  %3372 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2737, float %3371)  ; FMad(a,b,c)
-  %3373 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3330, float %3372)  ; FMad(a,b,c)
-  %3374 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3332, float %3333, float %3334, float %2144, float %3366, float %3370, float %3373)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3375 = fmul fast float %3331, %3331
-  %3376 = fmul fast float %3375, %3331
-  %3377 = fmul fast float %3344, -7.500000e-01
-  %3378 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3364, float %3377)  ; FMad(a,b,c)
-  %3379 = fmul fast float %3344, 1.500000e+00
-  %3380 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %3354, float %3379)  ; FMad(a,b,c)
-  %3381 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %3364, float %3380)  ; FMad(a,b,c)
-  %3382 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3374, float %3381)  ; FMad(a,b,c)
-  %3383 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %3354, float %3377)  ; FMad(a,b,c)
-  %3384 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %3364, float %3383)  ; FMad(a,b,c)
-  %3385 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3374, float %3384)  ; FMad(a,b,c)
-  %3386 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3331, float %3375, float %3376, float %3354, float %3378, float %3382, float %3385)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3387 = fcmp fast une float %3386, 0.000000e+00
-  %3388 = zext i1 %3387 to i32
-  call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i32 %3388, i32 undef, i32 undef, i32 undef, i8 1, i32 4)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3389
-
-; <label>:3389                                    ; preds = %3329, %950, %935, %332, %0
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.threadId.i32(i32, i32) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32, %dx.types.Handle, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.rawBufferStore.i32(i32, %dx.types.Handle, i32, i32, i32, i32, i32, i32, i8, i32) #2
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.unary.f32(i32, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.tertiary.f32(i32, float, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.binary.f32(i32, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.dot4.f32(i32, float, float, float, float, float, float, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.tertiary.i32(i32, i32, i32, i32) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32, %dx.types.Handle, i32) #1
-
-; Function Attrs: nounwind readonly
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readnone
-declare double @dx.op.makeDouble.f64(i32, i32, i32) #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.valver = !{!2}
-!dx.shaderModel = !{!3}
-!dx.resources = !{!4}
-!dx.entryPoints = !{!13}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 1, i32 2}
-!2 = !{i32 1, i32 6}
-!3 = !{!"cs", i32 6, i32 2}
-!4 = !{null, !5, !11, null}
-!5 = !{!6, !8, !10}
-!6 = !{i32 0, %"class.RWStructuredBuffer<bool>"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !7}
-!7 = !{i32 1, i32 4}
-!8 = !{i32 1, %"class.RWStructuredBuffer<double>"* undef, !"", i32 0, i32 1, i32 1, i32 12, i1 false, i1 false, i1 false, !9}
-!9 = !{i32 1, i32 8}
-!10 = !{i32 2, %"class.RWStructuredBuffer<bool>"* undef, !"", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !7}
-!11 = !{!12}
-!12 = !{i32 0, %Constants* undef, !"", i32 0, i32 0, i32 1, i32 116, null}
-!13 = !{void ()* @GridSample, !"GridSample", null, !4, !14}
-!14 = !{i32 0, i64 8388628, i32 4, !15}
-!15 = !{i32 64, i32 1, i32 1}
-
-#endif
-
-const unsigned char g_GridSample[] = {
-  0x44, 0x58, 0x42, 0x43, 0x66, 0xee, 0xd0, 0x3d, 0x80, 0xbc, 0x4b, 0x94,
-  0x7a, 0x0f, 0xfd, 0x88, 0x0b, 0xae, 0xe1, 0xec, 0x01, 0x00, 0x00, 0x00,
-  0xf8, 0x55, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
-  0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
-  0x18, 0x01, 0x00, 0x00, 0x34, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30,
-  0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30,
-  0xa8, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-  0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0xef, 0xf8, 0x6a, 0x5d, 0xd3, 0xf8, 0xca, 0x65,
-  0x2b, 0x37, 0x00, 0xc5, 0x25, 0x70, 0xe5, 0x35, 0x44, 0x58, 0x49, 0x4c,
-  0xbc, 0x54, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0x2f, 0x15, 0x00, 0x00,
-  0x44, 0x58, 0x49, 0x4c, 0x02, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-  0xa4, 0x54, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00,
-  0x26, 0x15, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
-  0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49,
-  0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19,
-  0x1e, 0x04, 0x8b, 0x62, 0x80, 0x18, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42,
-  0xc4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x62, 0x88,
-  0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42,
-  0xe4, 0x48, 0x0e, 0x90, 0x11, 0x23, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c,
-  0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x31, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00,
-  0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07,
-  0x40, 0x02, 0xa8, 0x0d, 0x86, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20,
-  0x01, 0xd5, 0x06, 0x62, 0xf8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x90, 0x00,
-  0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42,
-  0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00,
-  0x50, 0x00, 0x00, 0x00, 0x32, 0x22, 0x88, 0x09, 0x20, 0x64, 0x85, 0x04,
-  0x13, 0x23, 0xa4, 0x84, 0x04, 0x13, 0x23, 0xe3, 0x84, 0xa1, 0x90, 0x14,
-  0x12, 0x4c, 0x8c, 0x8c, 0x0b, 0x84, 0xc4, 0x4c, 0x10, 0xb0, 0xc1, 0x08,
-  0x40, 0x09, 0x00, 0x0a, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0xc3, 0x30, 0x10,
-  0x71, 0xd3, 0x70, 0xf9, 0x13, 0xf6, 0x10, 0x92, 0xbf, 0x12, 0xd2, 0x4a,
-  0x4c, 0x3e, 0x72, 0xdb, 0xa8, 0x18, 0x86, 0x61, 0x18, 0xe6, 0x08, 0x10,
-  0x3a, 0xee, 0x19, 0x2e, 0x7f, 0xc2, 0x1e, 0x42, 0xf2, 0x43, 0xa0, 0x19,
-  0x16, 0x02, 0x05, 0x48, 0x39, 0x8c, 0x21, 0x19, 0x86, 0x63, 0x20, 0xa5,
-  0x2c, 0xc0, 0x90, 0x0c, 0xc3, 0x30, 0x0c, 0xc3, 0x31, 0x10, 0x33, 0x03,
-  0x50, 0x06, 0x67, 0x70, 0xe8, 0x29, 0x85, 0x33, 0x38, 0x8e, 0x43, 0x51,
-  0x21, 0x9c, 0xc1, 0x71, 0x68, 0x2a, 0x8a, 0x33, 0x38, 0x8e, 0xe3, 0x38,
-  0x8e, 0xe3, 0x50, 0x55, 0x8a, 0x61, 0x18, 0x86, 0x81, 0xae, 0xa3, 0x86,
-  0xcb, 0x9f, 0xb0, 0x87, 0x90, 0x7c, 0x6e, 0xa3, 0x8a, 0x95, 0x98, 0x7c,
-  0xe4, 0xb6, 0x11, 0x31, 0x0c, 0xc3, 0x50, 0x88, 0x6c, 0x48, 0x06, 0xd2,
-  0xe6, 0x08, 0x82, 0x62, 0x24, 0xc3, 0x31, 0x0c, 0x1c, 0x75, 0x43, 0x00,
-  0x85, 0xf8, 0x86, 0x61, 0x20, 0x70, 0x20, 0x60, 0x26, 0x6f, 0x1c, 0xd8,
-  0x21, 0x1c, 0xe6, 0x61, 0x1e, 0xdc, 0x40, 0x16, 0x6e, 0x61, 0x16, 0xe8,
-  0x41, 0x1e, 0xea, 0x61, 0x1c, 0xe8, 0xa1, 0x1e, 0xe4, 0xa1, 0x1c, 0xc8,
-  0x41, 0x14, 0xea, 0xc1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0x81, 0x0f, 0xc4,
-  0xe1, 0x1d, 0xde, 0x81, 0x1d, 0xfc, 0x00, 0x05, 0x03, 0x8d, 0x33, 0x81,
-  0xc1, 0x38, 0xb0, 0x43, 0x38, 0xcc, 0xc3, 0x3c, 0xb8, 0x81, 0x2c, 0xdc,
-  0xc2, 0x2c, 0xd0, 0x83, 0x3c, 0xd4, 0xc3, 0x38, 0xd0, 0x43, 0x3d, 0xc8,
-  0x43, 0x39, 0x90, 0x83, 0x28, 0xd4, 0x83, 0x39, 0x98, 0x43, 0x39, 0xc8,
-  0x03, 0x1f, 0x90, 0xc3, 0x3b, 0xd4, 0x83, 0x38, 0xb0, 0x43, 0x39, 0xf8,
-  0x01, 0x0a, 0x3e, 0x2a, 0x87, 0x11, 0x88, 0xe1, 0x12, 0xce, 0x69, 0xa4,
-  0x09, 0x68, 0x26, 0x09, 0x2d, 0xc3, 0x30, 0x0c, 0xe7, 0x79, 0x9e, 0xe7,
-  0x39, 0x10, 0x3a, 0x47, 0x00, 0x0a, 0x53, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79,
-  0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xae, 0x50, 0x0e, 0x6d, 0xd0, 0x0e,
-  0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07,
-  0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07,
-  0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x78, 0xd0, 0x06, 0xe9, 0x10, 0x07,
-  0x76, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x6d, 0x90, 0x0e, 0x73, 0x20, 0x07,
-  0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x60, 0x07, 0x74, 0xa0, 0x07,
-  0x76, 0x40, 0x07, 0x6d, 0x60, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x10, 0x07,
-  0x76, 0xd0, 0x06, 0xe6, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07,
-  0x6d, 0x60, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06,
-  0xee, 0x80, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07,
-  0x7a, 0x60, 0x07, 0x74, 0x30, 0xe4, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0x43, 0x00, 0x01, 0x10, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90, 0x47, 0x01, 0x02, 0x40,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x21, 0x0f, 0x03, 0x04,
-  0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x43, 0x9e, 0x07,
-  0x08, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x3c,
-  0x11, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c,
-  0x79, 0x26, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x18, 0xf2, 0x54, 0x40, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x30, 0xe4, 0xb9, 0x80, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x60, 0xc8, 0xa3, 0x01, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0xc0, 0x90, 0xa7, 0x03, 0x02, 0x40, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x80, 0x21, 0x0f, 0x18, 0x00, 0x01, 0x10, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x16, 0x08, 0x00, 0x00, 0x00,
-  0x0d, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90,
-  0x8c, 0x09, 0x26, 0x47, 0xc6, 0x04, 0x43, 0x1a, 0x4a, 0xa0, 0x08, 0x8a,
-  0x61, 0x04, 0xa0, 0x30, 0x0a, 0xa1, 0x20, 0x0a, 0x3d, 0xa0, 0x00, 0x03,
-  0x88, 0x1b, 0x01, 0x20, 0xb5, 0x50, 0x01, 0x01, 0x11, 0xc8, 0x9c, 0x01,
-  0xa0, 0x74, 0x06, 0x80, 0xc8, 0x19, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00,
-  0x4b, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44,
-  0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b,
-  0x7b, 0x73, 0x03, 0x99, 0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b,
-  0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81,
-  0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84,
-  0x81, 0x99, 0x20, 0x0c, 0xcd, 0x06, 0x61, 0x20, 0x26, 0x08, 0x83, 0xb3,
-  0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08,
-  0xc3, 0x33, 0x41, 0x20, 0x83, 0x8c, 0xc0, 0x04, 0x61, 0x80, 0x26, 0x08,
-  0x5c, 0x35, 0x41, 0x18, 0xa2, 0x0d, 0xc2, 0xf0, 0x6c, 0x58, 0x94, 0x85,
-  0x51, 0x94, 0xa1, 0x71, 0x1c, 0x07, 0x9a, 0x20, 0x98, 0xc1, 0x35, 0x41,
-  0x18, 0xa4, 0x0d, 0xc2, 0x30, 0x6d, 0x58, 0x06, 0x89, 0x51, 0x86, 0xa1,
-  0x71, 0x1c, 0x87, 0xda, 0xb0, 0x10, 0x0b, 0xa3, 0x10, 0x43, 0xe3, 0x38,
-  0x0e, 0xb4, 0x61, 0x88, 0x2a, 0x6b, 0x82, 0x90, 0x06, 0xd8, 0x04, 0x61,
-  0x98, 0x36, 0x20, 0x0a, 0xc6, 0x28, 0xca, 0x90, 0x01, 0x1b, 0x02, 0x6d,
-  0x03, 0x01, 0x5c, 0x1b, 0x30, 0x41, 0x10, 0x00, 0x2a, 0x47, 0x72, 0x69,
-  0x64, 0x53, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x13, 0x04, 0x35, 0xb0, 0x26,
-  0x08, 0x03, 0xb5, 0x61, 0x00, 0x83, 0x61, 0xd8, 0x40, 0x28, 0xdf, 0x13,
-  0x06, 0x1b, 0x8a, 0xce, 0x03, 0x38, 0x31, 0xa8, 0xc2, 0xc6, 0x66, 0xd7,
-  0xe6, 0x92, 0x46, 0x56, 0xe6, 0x46, 0x37, 0x25, 0x08, 0xaa, 0x90, 0xe1,
-  0xb9, 0xd8, 0x95, 0xc9, 0xcd, 0xa5, 0xbd, 0xb9, 0x4d, 0x09, 0x88, 0x26,
-  0x64, 0x78, 0x2e, 0x76, 0x61, 0x6c, 0x76, 0x65, 0x72, 0x53, 0x02, 0xa3,
-  0x0e, 0x19, 0x9e, 0xcb, 0x1c, 0x5a, 0x18, 0x59, 0x99, 0x5c, 0xd3, 0x1b,
-  0x59, 0x19, 0xdb, 0x94, 0x00, 0x29, 0x43, 0x86, 0xe7, 0x22, 0x57, 0x36,
-  0xf7, 0x56, 0x27, 0x37, 0x56, 0x36, 0x37, 0x25, 0xd8, 0xea, 0x90, 0xe1,
-  0xb9, 0x94, 0xb9, 0xd1, 0xc9, 0xe5, 0x41, 0xbd, 0xa5, 0xb9, 0xd1, 0xcd,
-  0x4d, 0x09, 0xc4, 0x00, 0x79, 0x18, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00,
-  0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88,
-  0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73,
-  0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e,
-  0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30,
-  0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8,
-  0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b,
-  0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76,
-  0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e,
-  0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e,
-  0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61,
-  0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4,
-  0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76,
-  0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37,
-  0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76,
-  0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71,
-  0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e,
-  0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1,
-  0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61,
-  0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90,
-  0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8,
-  0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc,
-  0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4,
-  0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87, 0x76, 0x80, 0x87, 0x19,
-  0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20, 0x0e, 0xe7, 0xe0, 0x06,
-  0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f,
-  0xf4, 0x30, 0x83, 0x81, 0xc8, 0x01, 0x1f, 0xdc, 0x40, 0x1c, 0xe4, 0xa1,
-  0x1c, 0xc2, 0x61, 0x1d, 0xdc, 0x40, 0x1c, 0xe4, 0x01, 0x00, 0x00, 0x00,
-  0x71, 0x20, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x06, 0xa0, 0x80, 0x11,
-  0x32, 0xb0, 0x00, 0xf3, 0x2c, 0x84, 0x19, 0x40, 0xc3, 0xe5, 0x3b, 0x8f,
-  0x1f, 0x20, 0x0d, 0x10, 0x61, 0x7e, 0x71, 0xdb, 0x96, 0xb0, 0x0d, 0x97,
-  0xef, 0x3c, 0xbe, 0x10, 0x50, 0x45, 0x41, 0x44, 0xa5, 0x03, 0x0c, 0x25,
-  0x61, 0x00, 0x02, 0xe6, 0x23, 0xb7, 0x6d, 0x0a, 0xd2, 0x70, 0xf9, 0xce,
-  0xe3, 0x0b, 0x11, 0x01, 0x4c, 0x44, 0x08, 0x34, 0xc3, 0x42, 0xd8, 0x81,
-  0x33, 0x5c, 0xbe, 0xf3, 0xf8, 0x83, 0x33, 0xe1, 0x7e, 0x71, 0xdb, 0xb6,
-  0x40, 0x0d, 0x97, 0xef, 0x3c, 0x3e, 0x03, 0x28, 0x44, 0xe7, 0x50, 0xc1,
-  0x42, 0xf8, 0x85, 0x8e, 0x9b, 0xc0, 0x35, 0x5c, 0xbe, 0xf3, 0xf8, 0x11,
-  0x60, 0x6d, 0x54, 0x51, 0x10, 0x51, 0xe9, 0x00, 0x83, 0x8f, 0xdc, 0xb6,
-  0x0d, 0x60, 0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x01, 0xd6, 0x46, 0x15, 0x05,
-  0x11, 0xb1, 0x93, 0x13, 0x11, 0x3e, 0x72, 0xdb, 0x56, 0x20, 0x0d, 0x97,
-  0xef, 0x3c, 0xfe, 0x44, 0x44, 0x13, 0x02, 0x44, 0x98, 0x5f, 0xdc, 0xb6,
-  0x21, 0x48, 0xc3, 0xe5, 0x3b, 0x8f, 0x3f, 0x11, 0xd1, 0x84, 0x00, 0x11,
-  0xe6, 0x23, 0xb7, 0x6d, 0x01, 0xd2, 0x70, 0xf9, 0xce, 0xe3, 0x4f, 0x47,
-  0x44, 0x00, 0x83, 0x38, 0xf8, 0xc8, 0x6d, 0x1b, 0xc1, 0x33, 0x5c, 0xbe,
-  0xf3, 0xf8, 0x54, 0x03, 0x44, 0x98, 0x5f, 0xdc, 0x36, 0x00, 0x00, 0x00,
-  0x61, 0x20, 0x00, 0x00, 0x80, 0x13, 0x00, 0x00, 0x13, 0x04, 0x24, 0x14,
-  0x0b, 0x04, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x34, 0x14, 0x58, 0xd9,
-  0x95, 0xa5, 0x40, 0x0d, 0x94, 0x51, 0x21, 0x15, 0xd7, 0x0c, 0x40, 0xc1,
-  0x95, 0x5c, 0xd9, 0x14, 0x4b, 0x61, 0x0a, 0x94, 0x72, 0x40, 0xd1, 0x94,
-  0x6e, 0x40, 0x39, 0x94, 0x02, 0x1d, 0x25, 0x50, 0x06, 0x45, 0x40, 0xce,
-  0x08, 0xc0, 0x18, 0x01, 0x08, 0x82, 0x20, 0xfe, 0x8d, 0x11, 0x80, 0x20,
-  0x08, 0xd2, 0xbf, 0x30, 0x46, 0x00, 0x82, 0x20, 0x48, 0x7f, 0x63, 0x04,
-  0x20, 0x08, 0x82, 0xfc, 0x37, 0x46, 0x00, 0x82, 0x20, 0x88, 0xff, 0xc2,
-  0x18, 0x01, 0x08, 0x82, 0x60, 0x08, 0x0e, 0x63, 0x04, 0x20, 0x08, 0x82,
-  0xfa, 0x37, 0x46, 0x00, 0x82, 0x20, 0xa8, 0xff, 0xc2, 0x18, 0x01, 0x08,
-  0x82, 0x20, 0xfc, 0x8d, 0x11, 0x80, 0x20, 0x08, 0xc2, 0xbf, 0x30, 0x46,
-  0x00, 0x82, 0x20, 0x08, 0x82, 0x01, 0x00, 0x00, 0x23, 0x06, 0x09, 0x00,
-  0x82, 0x60, 0xd0, 0xc5, 0x01, 0xf6, 0xb8, 0x81, 0x1b, 0x98, 0xc1, 0x88,
-  0x41, 0x02, 0x80, 0x20, 0x18, 0x74, 0x72, 0x90, 0x41, 0x70, 0x00, 0x07,
-  0x67, 0x30, 0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0xdd, 0x1c, 0x68, 0x91,
-  0x1b, 0xb8, 0x01, 0x1a, 0x8c, 0x18, 0x24, 0x00, 0x08, 0x82, 0x41, 0x47,
-  0x07, 0x1b, 0xf4, 0x06, 0x6f, 0x90, 0x06, 0x23, 0x06, 0x06, 0x00, 0x82,
-  0x60, 0x40, 0xf8, 0x81, 0x05, 0x07, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0xa0, 0xdd, 0x41, 0x19, 0x08, 0x71, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c,
-  0x30, 0x9a, 0x30, 0x04, 0xc3, 0x0d, 0x42, 0x40, 0x06, 0xb3, 0x0c, 0xc1,
-  0x08, 0x05, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0, 0xf1, 0x81, 0x1a,
-  0x1c, 0x79, 0x30, 0x9a, 0x10, 0x0c, 0x17, 0x38, 0x35, 0x9a, 0x30, 0x08,
-  0x17, 0x38, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x5a, 0x28, 0xbc,
-  0x01, 0x03, 0x06, 0xa3, 0x09, 0x01, 0x30, 0xdc, 0x10, 0xf4, 0x01, 0x18,
-  0x4c, 0x37, 0x50, 0x5e, 0x30, 0xdd, 0x50, 0x69, 0x42, 0x21, 0x01, 0x4c,
-  0x37, 0x5c, 0x1c, 0x51, 0x48, 0x00, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0xa0, 0xa5, 0xc2, 0x1d, 0x50, 0x67, 0x30, 0x9a, 0x10, 0x04, 0xa3, 0x09,
-  0x82, 0x30, 0x9a, 0x30, 0x0c, 0x15, 0x08, 0x52, 0x03, 0x21, 0x15, 0x0c,
-  0x52, 0x57, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0, 0xc5, 0xc2,
-  0x1f, 0x70, 0xac, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c, 0x52, 0x5b, 0x10,
-  0x15, 0x20, 0x33, 0x9a, 0x50, 0x04, 0x15, 0x08, 0x52, 0x44, 0x10, 0x15,
-  0x34, 0x33, 0x9a, 0x90, 0x08, 0x15, 0x08, 0x52, 0x44, 0x10, 0xd7, 0x38,
-  0x75, 0x85, 0x53, 0x37, 0x38, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x1a, 0x38, 0xb8, 0xc2, 0x1a, 0xe0, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0xc3, 0x11, 0x4e, 0x1d,
-  0xe1, 0xd4, 0x11, 0x4e, 0x1d, 0xe1, 0xd4, 0x88, 0x41, 0x03, 0x80, 0x20,
-  0x18, 0x54, 0xeb, 0x00, 0x0b, 0xcc, 0xa2, 0x8c, 0x02, 0x31, 0x08, 0x81,
-  0x09, 0x01, 0x7c, 0x4e, 0x18, 0x66, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c,
-  0x94, 0x79, 0xc8, 0x85, 0x3c, 0x08, 0xcc, 0x01, 0x15, 0xc6, 0x61, 0x34,
-  0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0xc0, 0xa0, 0x1d, 0x62, 0x41, 0x08, 0x2e, 0x70, 0xee, 0x8e, 0x61, 0x46,
-  0x0c, 0x14, 0x00, 0x04, 0xc1, 0x40, 0xc9, 0x87, 0x5f, 0xf8, 0x83, 0x80,
-  0x1d, 0x5c, 0x21, 0x1d, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x00, 0x0c, 0xe6, 0xe1, 0x16, 0x84, 0xe0,
-  0x02, 0xe7, 0x86, 0x1b, 0xea, 0x80, 0x1e, 0xc0, 0xc0, 0x90, 0x58, 0x80,
-  0x8f, 0x0d, 0xb2, 0x00, 0x9f, 0x59, 0x06, 0x61, 0x18, 0x4c, 0x58, 0x05,
-  0xf9, 0x98, 0xc0, 0x0a, 0xf2, 0x31, 0x3f, 0x88, 0x05, 0xf8, 0x58, 0x1f,
-  0xc8, 0x02, 0x7c, 0x8c, 0x10, 0xe4, 0x63, 0x84, 0x20, 0x9f, 0x59, 0x02,
-  0xc2, 0x44, 0x01, 0x91, 0x8f, 0x21, 0xa1, 0x20, 0x1f, 0x13, 0x6e, 0x01,
-  0x3e, 0x26, 0xe0, 0x02, 0x7c, 0x4c, 0xa8, 0x05, 0xf9, 0x98, 0x60, 0x0b,
-  0xf2, 0x99, 0x25, 0x20, 0x06, 0x2a, 0x1c, 0x48, 0x20, 0x86, 0x81, 0x0a,
-  0x07, 0x12, 0x88, 0x61, 0x34, 0x21, 0x16, 0x84, 0xe1, 0x86, 0xc0, 0x24,
-  0xc0, 0x60, 0x96, 0xa1, 0x30, 0x82, 0x11, 0x03, 0x03, 0x00, 0x41, 0x30,
-  0x78, 0x5e, 0x62, 0x1d, 0x88, 0x11, 0x03, 0x03, 0x00, 0x41, 0x30, 0x78,
-  0x60, 0x82, 0x1d, 0x88, 0x59, 0x02, 0x63, 0xa0, 0xc2, 0x21, 0x0a, 0x86,
-  0x18, 0xa8, 0x70, 0x88, 0x82, 0x21, 0x86, 0x23, 0x04, 0x55, 0x20, 0xbe,
-  0xe1, 0x88, 0x21, 0x15, 0x84, 0xaf, 0x84, 0x60, 0x87, 0x23, 0x88, 0x56,
-  0x20, 0xbe, 0x12, 0x82, 0x1d, 0x8e, 0x30, 0x56, 0x41, 0xf8, 0x2a, 0x10,
-  0x76, 0x96, 0xe1, 0xd0, 0x82, 0xd1, 0x04, 0x5f, 0x18, 0x86, 0x1b, 0x82,
-  0x99, 0x00, 0x83, 0x59, 0x06, 0x24, 0x09, 0x4a, 0x17, 0x46, 0x02, 0x2e,
-  0x70, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x26, 0x9e, 0x20, 0x89,
-  0x66, 0x1e, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xea, 0x09, 0x92,
-  0x08, 0x84, 0xe2, 0x85, 0x93, 0x80, 0x0b, 0x9c, 0x1a, 0x31, 0x38, 0x00,
-  0x10, 0x04, 0x83, 0x09, 0x2c, 0x50, 0x02, 0xba, 0x87, 0x11, 0x83, 0x03,
-  0x00, 0x41, 0x30, 0x98, 0xc2, 0x02, 0x25, 0x02, 0x61, 0x96, 0x40, 0x1b,
-  0x6e, 0x50, 0x76, 0x02, 0x0c, 0x66, 0x19, 0x14, 0x2d, 0x30, 0x5d, 0xe0,
-  0x85, 0xf8, 0xcc, 0x32, 0x2c, 0xce, 0x64, 0xbd, 0x50, 0xc5, 0xc7, 0x02,
-  0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x16, 0x14, 0xf2, 0xb1, 0x22,
-  0x88, 0x4f, 0x11, 0x64, 0xa1, 0xc3, 0x0d, 0x81, 0x58, 0x80, 0xc1, 0x2c,
-  0x03, 0xd3, 0x04, 0x36, 0x94, 0x03, 0x7c, 0x66, 0x09, 0x24, 0x23, 0x07,
-  0x22, 0x3e, 0xb3, 0x04, 0xd2, 0x2c, 0xc3, 0x23, 0x71, 0xf6, 0x95, 0x43,
-  0x7c, 0x2c, 0x60, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0xc1, 0x23,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xe1, 0x16, 0x3a, 0xdc, 0x10, 0xb0, 0x05,
-  0x18, 0xcc, 0x32, 0x40, 0x51, 0x60, 0xed, 0x30, 0xc4, 0x67, 0x96, 0x40,
-  0x32, 0x02, 0x1e, 0xe0, 0x33, 0x4b, 0x20, 0x0d, 0xb4, 0x38, 0x18, 0x63,
-  0x35, 0x04, 0x24, 0x44, 0xb2, 0xe0, 0x98, 0x3b, 0xc8, 0x43, 0x7c, 0x66,
-  0x19, 0x26, 0xcb, 0x0c, 0x6c, 0x1e, 0xd4, 0x20, 0x3e, 0x16, 0x08, 0xf4,
-  0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2, 0xa0, 0x90, 0x8f, 0x15, 0x41, 0x7c,
-  0x8a, 0xd0, 0x0b, 0x1d, 0x6e, 0x08, 0xf0, 0x02, 0x0c, 0x66, 0x19, 0xa8,
-  0x2a, 0xb0, 0x61, 0x1f, 0xe0, 0x33, 0x4b, 0xa0, 0x19, 0x3e, 0x10, 0xf1,
-  0x99, 0x25, 0xd0, 0x66, 0x19, 0x2e, 0xcd, 0x0d, 0x8c, 0x0e, 0xf2, 0x21,
-  0x3e, 0x16, 0x30, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2, 0xe0, 0x91,
-  0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x20, 0x0d, 0x1d, 0x6e, 0x08, 0x44, 0x03,
-  0x0c, 0x66, 0x19, 0xb0, 0x2c, 0xb0, 0x90, 0x18, 0xe2, 0x33, 0x4b, 0xa0,
-  0x19, 0x61, 0x12, 0xf0, 0x99, 0x25, 0xd0, 0x06, 0x8a, 0x1c, 0x71, 0x40,
-  0xfc, 0x21, 0xf1, 0x07, 0x83, 0x0d, 0x32, 0x36, 0xc0, 0xd8, 0xc0, 0x62,
-  0x83, 0x8a, 0x0d, 0xa8, 0x81, 0x22, 0x87, 0x17, 0x10, 0x7f, 0x48, 0xfc,
-  0xc1, 0x20, 0x32, 0x03, 0xf3, 0x07, 0x0b, 0xab, 0x34, 0xea, 0xf0, 0xc1,
-  0xa9, 0x59, 0x86, 0x6d, 0x0e, 0x4a, 0x61, 0x34, 0xe1, 0x26, 0x86, 0xe1,
-  0x86, 0x20, 0x35, 0xc0, 0x60, 0x96, 0x81, 0xf3, 0x82, 0xe1, 0x88, 0x42,
-  0x2d, 0x86, 0xef, 0x8c, 0x61, 0x86, 0x1b, 0x82, 0x9a, 0x20, 0x83, 0x1a,
-  0x02, 0x1d, 0x8e, 0x40, 0xdc, 0x62, 0xf8, 0x2a, 0x10, 0xf4, 0x94, 0x61,
-  0x86, 0x1b, 0x02, 0x9c, 0x20, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0xe8, 0xe4,
-  0x20, 0x38, 0x7e, 0x18, 0xe6, 0x9a, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x40, 0xdb, 0x8d, 0xd4, 0x30, 0x8b, 0xdb, 0x18, 0x4d, 0x08, 0x80,
-  0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2,
-  0x90, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0xc4, 0x03, 0x36, 0x0e,
-  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x6b, 0x3c, 0x62, 0x83,
-  0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0xc8, 0x43, 0x36,
-  0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x03, 0x65, 0x3d, 0x62,
-  0x03, 0x2e, 0x02, 0xdf, 0x08, 0x0d, 0xde, 0x18, 0x4d, 0x08, 0x80, 0xe1,
-  0x86, 0x00, 0x3c, 0xc2, 0xe0, 0x02, 0xa7, 0x66, 0x09, 0xe4, 0x60, 0xb8,
-  0x41, 0x2b, 0x0f, 0x30, 0x98, 0x65, 0xf8, 0xc0, 0x20, 0x28, 0xb6, 0xa8,
-  0x0d, 0xb8, 0xc0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0xdc,
-  0xc3, 0x36, 0xc0, 0xa0, 0x34, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60,
-  0x7a, 0x0f, 0xdb, 0x08, 0x84, 0x0b, 0x86, 0xa9, 0xb7, 0xd0, 0x0d, 0xb8,
-  0xc0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0xe6, 0x63, 0x37,
-  0xc8, 0x40, 0x35, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xa2, 0x8f,
-  0xdd, 0x08, 0x84, 0x0b, 0x86, 0xb9, 0xc0, 0xa9, 0x3b, 0x9c, 0x3a, 0x9c,
-  0x18, 0xe6, 0xd2, 0x60, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0xd0, 0xf2, 0xe3, 0x3c, 0x48, 0xa3, 0x3e, 0x46,
-  0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81,
-  0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x2e, 0x10,
-  0x71, 0x8f, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x0a,
-  0x91, 0xf7, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x2e,
-  0x11, 0x81, 0x8f, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x40,
-  0x49, 0x91, 0xf7, 0x70, 0x8d, 0x80, 0x3f, 0x7e, 0x43, 0x3f, 0x46, 0x13,
-  0x02, 0x60, 0xb8, 0x21, 0xf0, 0x8f, 0x30, 0xb8, 0xc0, 0xa9, 0x59, 0x02,
-  0x39, 0x18, 0x6e, 0xc0, 0x03, 0x11, 0x01, 0x83, 0x59, 0x86, 0x30, 0x90,
-  0x83, 0xc0, 0x42, 0x63, 0x34, 0xe2, 0x33, 0x1c, 0xd1, 0x07, 0xa4, 0x41,
-  0x7c, 0xb3, 0x0c, 0x62, 0x50, 0x06, 0x81, 0x95, 0x86, 0x1f, 0xc4, 0xc7,
-  0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x16, 0x18, 0xf2, 0xb1,
-  0x22, 0x88, 0x4f, 0x11, 0x2c, 0xa2, 0xc3, 0x0d, 0x81, 0x8a, 0x80, 0xc1,
-  0x2c, 0xc3, 0x18, 0x90, 0x41, 0x60, 0x43, 0x6b, 0xc0, 0x67, 0x96, 0x20,
-  0x0d, 0x8c, 0x35, 0x88, 0xf8, 0xcc, 0x12, 0xa4, 0xc1, 0x70, 0x04, 0x2a,
-  0xb4, 0x86, 0xf0, 0xcd, 0x32, 0x98, 0x41, 0x1a, 0x04, 0x96, 0x0a, 0xae,
-  0x11, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4e, 0x59, 0x10,
-  0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xdc, 0x88, 0x0e, 0x37, 0x04, 0x35,
-  0x02, 0x06, 0xb3, 0x0c, 0x67, 0x80, 0x06, 0x81, 0xd9, 0xc6, 0x10, 0x9f,
-  0x59, 0x82, 0x34, 0x30, 0x22, 0x37, 0xe0, 0x33, 0x4b, 0x90, 0x06, 0x03,
-  0x2d, 0x8e, 0x36, 0x06, 0x18, 0x19, 0x10, 0x67, 0x20, 0xa0, 0x81, 0x59,
-  0x94, 0xc1, 0x05, 0xc3, 0x18, 0x6e, 0xf0, 0x46, 0x7c, 0x86, 0x23, 0x6a,
-  0xa1, 0x37, 0x88, 0x6f, 0x96, 0x41, 0x0d, 0xda, 0x20, 0x30, 0xdf, 0xb0,
-  0x85, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70, 0xca, 0x02,
-  0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xa2, 0x4c, 0x74, 0xb8, 0x21, 0x18,
-  0x13, 0x30, 0x98, 0x65, 0x58, 0x03, 0x36, 0x08, 0x6c, 0x30, 0x0f, 0xf8,
-  0xcc, 0x12, 0xc4, 0x81, 0x8d, 0x07, 0x11, 0x9f, 0x59, 0x82, 0x38, 0x18,
-  0x8e, 0x00, 0x07, 0xf2, 0x10, 0xbe, 0x59, 0x06, 0x37, 0x88, 0x83, 0xc0,
-  0xc2, 0xa1, 0x3c, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xc0,
-  0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x38, 0xd1, 0xe1,
-  0x86, 0xc0, 0x4d, 0xc0, 0x60, 0x96, 0xe1, 0x0d, 0xe0, 0x20, 0xb0, 0xf6,
-  0x18, 0xe2, 0x33, 0x4b, 0x10, 0x07, 0x46, 0xc8, 0x07, 0x7c, 0x66, 0x09,
-  0xe2, 0x60, 0xa0, 0xc5, 0xd1, 0xd6, 0x00, 0x63, 0x03, 0xe2, 0x0d, 0x04,
-  0x38, 0xb0, 0x8d, 0x36, 0xb8, 0x60, 0x98, 0x0b, 0x9c, 0xba, 0xcd, 0xa9,
-  0xf3, 0x8d, 0x61, 0xee, 0x1d, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19,
-  0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xed, 0x4f, 0xda, 0x44, 0x45, 0xf6,
-  0x64, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46,
-  0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0,
-  0x32, 0x15, 0x3a, 0x49, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
-  0xae, 0x53, 0xa9, 0x93, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
-  0xe0, 0x42, 0x15, 0x3b, 0x49, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10,
-  0x0c, 0x94, 0x57, 0xa9, 0x13, 0x1a, 0x09, 0x44, 0xa5, 0x4c, 0x40, 0x65,
-  0x34, 0x21, 0x00, 0x86, 0x1b, 0x02, 0x52, 0x09, 0x83, 0x0b, 0x9c, 0x9a,
-  0x25, 0x90, 0x83, 0x81, 0x16, 0x47, 0x35, 0x3a, 0x56, 0xe2, 0x5c, 0xe2,
-  0x13, 0xe2, 0x80, 0x95, 0xc0, 0x60, 0x38, 0x22, 0x38, 0x13, 0xe7, 0xbb,
-  0x60, 0x88, 0x11, 0x03, 0x07, 0x00, 0x41, 0x30, 0x60, 0x6a, 0x45, 0x4e,
-  0x7a, 0xc4, 0x46, 0x52, 0x25, 0xb8, 0x93, 0x3b, 0xb9, 0x13, 0x36, 0x39,
-  0x95, 0x59, 0x82, 0x11, 0x1a, 0x6e, 0x38, 0x8d, 0x54, 0x01, 0x83, 0x59,
-  0x06, 0x3a, 0x88, 0x89, 0x60, 0xc4, 0xc0, 0x00, 0x40, 0x10, 0x0c, 0x9e,
-  0x5a, 0xa9, 0x93, 0x92, 0x18, 0x31, 0x30, 0x00, 0x10, 0x04, 0x83, 0xc7,
-  0x56, 0xec, 0xa4, 0x24, 0x4c, 0x58, 0x13, 0xf8, 0x98, 0xc0, 0x26, 0xf0,
-  0x19, 0x4d, 0xe8, 0x91, 0x61, 0xb8, 0x21, 0x78, 0x15, 0x30, 0x98, 0x65,
-  0xa8, 0x83, 0x3b, 0x08, 0x86, 0x23, 0x0c, 0x38, 0x19, 0xbe, 0x3b, 0x86,
-  0x19, 0x6e, 0x08, 0x76, 0x84, 0x0c, 0x6a, 0x08, 0x74, 0x38, 0x22, 0xa1,
-  0x93, 0xe1, 0xab, 0x40, 0xd0, 0x5b, 0x86, 0x19, 0x6e, 0x08, 0x7c, 0x84,
-  0x0c, 0x2a, 0x18, 0x74, 0x96, 0xc1, 0x0e, 0x56, 0x21, 0x38, 0x11, 0x19,
-  0xe6, 0x66, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xd0, 0xc2,
-  0xe5, 0x55, 0xd8, 0xa4, 0x57, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
-  0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x38, 0x64, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0x2e, 0x74, 0xb1, 0x95, 0x83, 0x08, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0xe0, 0x4a, 0x97, 0x5b, 0x61, 0x88, 0x60, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0x2e, 0x75, 0xc1, 0x15, 0x89, 0x08, 0x46,
-  0x0c, 0x14, 0x00, 0x04, 0xc1, 0x40, 0x89, 0x97, 0x5b, 0xb1, 0x93, 0x80,
-  0x5c, 0x4e, 0x45, 0x5c, 0x46, 0x13, 0x02, 0x60, 0xb8, 0x21, 0x30, 0x97,
-  0x30, 0xb8, 0xc0, 0xa9, 0x59, 0x82, 0x55, 0x18, 0x6e, 0xd0, 0xd6, 0x05,
-  0x0c, 0x66, 0x19, 0xf0, 0x20, 0x0f, 0x82, 0x92, 0x93, 0x5d, 0x81, 0x0b,
-  0x9c, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x89, 0x5e, 0x78, 0x25,
-  0x0c, 0x56, 0x65, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa6, 0x7a, 0xe1,
-  0x95, 0x40, 0xb8, 0x60, 0x98, 0xaa, 0x13, 0x70, 0x81, 0x0b, 0x9c, 0x1a,
-  0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x29, 0x5f, 0xc2, 0xa5, 0x0c, 0x60,
-  0x65, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x26, 0x7d, 0x09, 0x97, 0x40,
-  0xb8, 0x60, 0x98, 0x0b, 0x9c, 0xba, 0xc3, 0xa9, 0xf3, 0x91, 0x61, 0xee,
-  0x2d, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x03, 0xed, 0x5f, 0xda, 0x45, 0x55, 0xf6, 0x65, 0x34, 0x21, 0x00,
-  0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88,
-  0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x32, 0x19, 0x7a, 0x49,
-  0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xae, 0x93, 0xa9, 0x97,
-  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x42, 0x19, 0x7b,
-  0x49, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x94, 0x97, 0xa9,
-  0x17, 0x5a, 0x09, 0x44, 0xa6, 0x5c, 0x40, 0x66, 0x34, 0x21, 0x00, 0x86,
-  0x1b, 0x02, 0x92, 0x09, 0x83, 0x0b, 0x9c, 0x9a, 0x25, 0x58, 0x85, 0xe1,
-  0x06, 0x3c, 0x40, 0x19, 0x30, 0x98, 0x65, 0xd0, 0x83, 0x55, 0x08, 0xec,
-  0x54, 0x52, 0x25, 0x3e, 0xc3, 0x11, 0x7e, 0xa0, 0x2a, 0xc4, 0x37, 0xcb,
-  0xb0, 0x07, 0x7e, 0x10, 0xd8, 0xaa, 0xfc, 0x41, 0x7c, 0x2c, 0x18, 0xe8,
-  0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8,
-  0x14, 0x21, 0x33, 0x3a, 0xdc, 0x10, 0xc0, 0x0c, 0x18, 0xcc, 0x32, 0xf0,
-  0x41, 0x1f, 0x04, 0x36, 0xcc, 0x0a, 0x7c, 0x66, 0x09, 0x44, 0xc1, 0x64,
-  0x85, 0x88, 0xcf, 0x2c, 0x81, 0x28, 0x0c, 0x47, 0xa4, 0xc2, 0xac, 0x08,
-  0xdf, 0x2c, 0xc3, 0x1f, 0x88, 0x42, 0x60, 0xaa, 0x40, 0x2b, 0xf1, 0xb1,
-  0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94, 0x05, 0x91, 0x7c, 0xac,
-  0x08, 0xe2, 0x53, 0x44, 0xcf, 0xe8, 0x70, 0x43, 0xb0, 0x33, 0x60, 0x30,
-  0xcb, 0x00, 0x0a, 0xa1, 0x10, 0x18, 0xaf, 0x0c, 0xf1, 0x99, 0x25, 0x10,
-  0x05, 0x23, 0x7e, 0x05, 0x3e, 0xb3, 0x04, 0xa2, 0x30, 0xd0, 0xe2, 0x68,
-  0x7c, 0x80, 0xf5, 0x01, 0x01, 0x0a, 0x42, 0x28, 0xa0, 0x85, 0x1f, 0x5c,
-  0x30, 0x8c, 0xf9, 0x8a, 0xb8, 0xc4, 0x67, 0x38, 0xc2, 0x16, 0xc6, 0x85,
-  0xf8, 0x66, 0x19, 0x46, 0xc1, 0x14, 0x02, 0x23, 0x97, 0x5b, 0x88, 0x8f,
-  0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x2c, 0x30, 0xe4, 0x63,
-  0x45, 0x10, 0x9f, 0x22, 0xd6, 0x46, 0x87, 0x1b, 0x82, 0xb4, 0x01, 0x83,
-  0x59, 0x06, 0x52, 0x28, 0x85, 0xc0, 0x06, 0x76, 0x81, 0xcf, 0x2c, 0x81,
-  0x2a, 0x58, 0xba, 0x10, 0xf1, 0x99, 0x25, 0x50, 0x85, 0xe1, 0x88, 0x70,
-  0x50, 0x17, 0xe1, 0x9b, 0x65, 0x38, 0x05, 0x55, 0x08, 0x4c, 0x1c, 0xd6,
-  0x25, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2, 0x20,
-  0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xb0, 0x1b, 0x1d, 0x6e, 0x08, 0xe8,
-  0x06, 0x0c, 0x66, 0x19, 0x50, 0x21, 0x15, 0x02, 0x9b, 0x97, 0x21, 0x3e,
-  0xb3, 0x04, 0xaa, 0x60, 0x04, 0xbe, 0xc0, 0x67, 0x96, 0x40, 0x15, 0x06,
-  0x5a, 0x1c, 0x8d, 0x14, 0xb0, 0x52, 0x20, 0x50, 0x41, 0x48, 0x05, 0xdc,
-  0x30, 0x85, 0x0b, 0x86, 0xb9, 0xc0, 0xa9, 0xdb, 0x9c, 0x3a, 0x72, 0x19,
-  0xe6, 0xea, 0x63, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03,
-  0x00, 0x41, 0x30, 0xd0, 0x4a, 0x67, 0x6e, 0x60, 0x26, 0x74, 0x46, 0x13,
-  0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18,
-  0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x2e, 0xd6, 0xd1,
-  0x9b, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x6a, 0x9d,
-  0xbd, 0x49, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x2e, 0xd7,
-  0xe1, 0x9b, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x40, 0xa9,
-  0x9d, 0xbd, 0xd1, 0x99, 0x00, 0x75, 0xd6, 0xc6, 0x74, 0x46, 0x13, 0x02,
-  0x60, 0xb8, 0x21, 0x50, 0x9d, 0x30, 0xb8, 0xc0, 0xa9, 0x59, 0x82, 0x55,
-  0x18, 0x68, 0x71, 0x54, 0xc3, 0x0e, 0x70, 0xad, 0x0e, 0x5c, 0x02, 0x0f,
-  0x04, 0x55, 0xc0, 0xb5, 0x3c, 0x98, 0x65, 0x60, 0x05, 0x57, 0xf0, 0x87,
-  0xe1, 0x88, 0x90, 0x68, 0x9b, 0xe1, 0x3b, 0x91, 0x18, 0x66, 0xb8, 0x21,
-  0xc0, 0x19, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x08, 0x93, 0x88, 0x9b, 0xe1,
-  0xab, 0x40, 0xd0, 0x43, 0x89, 0x61, 0x86, 0x1b, 0x82, 0x9d, 0x21, 0x83,
-  0x0a, 0x06, 0x9d, 0x65, 0x68, 0x05, 0x71, 0x08, 0xee, 0x5f, 0x86, 0x39,
-  0x18, 0x19, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x34, 0xdf, 0x61,
-  0x9d, 0xb4, 0xd1, 0x9d, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1,
-  0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x83, 0xab, 0x7c, 0x66, 0xe7, 0x20, 0x82, 0x11, 0x03, 0x04,
-  0x00, 0x41, 0x30, 0xb8, 0xcc, 0x87, 0x76, 0x18, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x83, 0xeb, 0x7c, 0x6a, 0x47, 0x22, 0x82, 0x11, 0x03,
-  0x05, 0x00, 0x41, 0x30, 0x50, 0xdc, 0x87, 0x76, 0xe6, 0x26, 0x08, 0x1f,
-  0xd2, 0xf9, 0x9d, 0xd1, 0x84, 0x00, 0x18, 0x6e, 0x08, 0xc6, 0x27, 0x0c,
-  0x2e, 0x70, 0x6a, 0x96, 0x40, 0x1c, 0x86, 0x1b, 0x6e, 0x02, 0x7d, 0xc0,
-  0x60, 0x96, 0xe1, 0x15, 0x60, 0x21, 0xa8, 0xb7, 0xc1, 0x1d, 0xb8, 0xc0,
-  0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0xe2, 0x27, 0x77, 0x78,
-  0x02, 0x75, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x92, 0x9f, 0xdc,
-  0x09, 0x84, 0x0b, 0x86, 0x29, 0xb9, 0xe9, 0x1d, 0xb8, 0xc0, 0xa9, 0x11,
-  0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0xec, 0xc7, 0x77, 0xc4, 0xa2, 0x75,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xba, 0x1f, 0xdf, 0x09, 0x84,
-  0x0b, 0x86, 0xb9, 0xc0, 0xa9, 0x3b, 0x9c, 0xba, 0x9d, 0x19, 0xe6, 0xd8,
-  0x64, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41,
-  0x30, 0xd0, 0xf8, 0x47, 0x7d, 0x4e, 0x07, 0x7f, 0x46, 0x13, 0x02, 0x60,
-  0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48,
-  0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xae, 0x11, 0x8a, 0x9f, 0x84,
-  0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x22, 0x21, 0xf9, 0x49,
-  0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xae, 0x12, 0x9a, 0x9f,
-  0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x40, 0x61, 0x21, 0xf9,
-  0x89, 0x9d, 0xe0, 0x7f, 0xc4, 0xa7, 0x7f, 0x46, 0x13, 0x02, 0x60, 0xb8,
-  0x21, 0x08, 0xa1, 0x30, 0xb8, 0xc0, 0xa9, 0x59, 0x02, 0x71, 0x18, 0x6e,
-  0xa8, 0x8b, 0x12, 0x02, 0x83, 0x59, 0x86, 0x58, 0x10, 0x87, 0xc0, 0x48,
-  0xc7, 0x74, 0xe2, 0x33, 0x1c, 0x91, 0x17, 0xa7, 0x43, 0x7c, 0xb3, 0x0c,
-  0xb2, 0x50, 0x0b, 0x81, 0xa1, 0x8e, 0x5e, 0xc4, 0xc7, 0x82, 0x81, 0x3e,
-  0x17, 0x0c, 0x73, 0x81, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f,
-  0x11, 0x2f, 0xa4, 0xc3, 0x0d, 0x41, 0x0b, 0x81, 0xc1, 0x2c, 0xc3, 0x2c,
-  0xd0, 0x42, 0x60, 0x03, 0xec, 0xc0, 0x67, 0x96, 0x20, 0x17, 0xec, 0x75,
-  0x88, 0xf8, 0xcc, 0x12, 0xe4, 0xc2, 0x70, 0x04, 0x69, 0xc0, 0x8e, 0xf0,
-  0xcd, 0x32, 0xd8, 0x42, 0x2e, 0x04, 0x56, 0x1a, 0xb1, 0x13, 0x1f, 0x0b,
-  0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4e, 0x59, 0x10, 0xc9, 0xc7, 0x8a,
-  0x20, 0x3e, 0x45, 0xe8, 0x90, 0x0e, 0x37, 0x04, 0x38, 0x04, 0x06, 0xb3,
-  0x0c, 0xb7, 0x80, 0x0b, 0x81, 0xe5, 0xce, 0x10, 0x9f, 0x59, 0x82, 0x5c,
-  0x30, 0x82, 0x77, 0xe0, 0x33, 0x4b, 0x90, 0x0b, 0x03, 0x2d, 0x8e, 0x36,
-  0x0b, 0x18, 0x2d, 0x10, 0xb7, 0x20, 0xe0, 0x82, 0xcf, 0xd4, 0xc2, 0x05,
-  0xc3, 0xd8, 0xee, 0xfc, 0x4e, 0x7c, 0x86, 0x23, 0x66, 0x03, 0x7c, 0x88,
-  0x6f, 0x96, 0x41, 0x17, 0x7a, 0x21, 0xb0, 0xf0, 0xa1, 0x8d, 0xf8, 0x58,
-  0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70, 0xca, 0x02, 0x43, 0x3e, 0x56,
-  0x04, 0xf1, 0x29, 0x02, 0x8d, 0x74, 0xb8, 0x21, 0x30, 0x23, 0x30, 0x98,
-  0x65, 0xd8, 0x05, 0x5e, 0x08, 0x6c, 0x48, 0x1f, 0xf8, 0xcc, 0x12, 0x84,
-  0x83, 0x99, 0x0f, 0x11, 0x9f, 0x59, 0x82, 0x70, 0x18, 0x8e, 0xf0, 0x8d,
-  0xf3, 0x11, 0xbe, 0x59, 0x06, 0x5f, 0x08, 0x87, 0xc0, 0x7e, 0x03, 0x7d,
-  0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b, 0x22,
-  0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x39, 0xd2, 0xe1, 0x86, 0x20, 0x8e,
-  0xc0, 0x60, 0x96, 0xe1, 0x17, 0xc0, 0x21, 0x30, 0xf8, 0x19, 0xe2, 0x33,
-  0x4b, 0x10, 0x0e, 0x46, 0xd4, 0x0f, 0x7c, 0x66, 0x09, 0xc2, 0x61, 0xa0,
-  0xc5, 0xd1, 0x76, 0x01, 0xe3, 0x05, 0xe2, 0x17, 0x04, 0x70, 0x90, 0x9d,
-  0x5e, 0xb8, 0x60, 0x98, 0x0b, 0x9c, 0xba, 0xcd, 0xa9, 0x0b, 0x9f, 0x61,
-  0x4e, 0x5e, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00,
-  0x10, 0x04, 0x03, 0x4d, 0x94, 0xe0, 0xa8, 0x85, 0xfc, 0x68, 0x34, 0x21,
-  0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1,
-  0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x4a, 0xa5, 0x3b,
-  0x4a, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x2e, 0x55, 0xc2,
-  0xa3, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x5a, 0xa5,
-  0x3c, 0x4a, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x14, 0x59,
-  0xc2, 0xa3, 0x1b, 0x0a, 0x4a, 0x09, 0x8d, 0x46, 0x69, 0x34, 0x21, 0x00,
-  0x86, 0x1b, 0x82, 0x53, 0x0a, 0x83, 0x0b, 0x9c, 0x9a, 0x25, 0x10, 0x87,
-  0x81, 0x16, 0x47, 0x35, 0x5a, 0x41, 0x16, 0x03, 0x56, 0x70, 0x89, 0x57,
-  0x10, 0xc2, 0x41, 0x16, 0x03, 0x58, 0x98, 0x65, 0x18, 0x87, 0x72, 0xd8,
-  0x8f, 0xe1, 0x08, 0x10, 0x51, 0xa3, 0xe1, 0xbb, 0x10, 0x19, 0x66, 0xb8,
-  0x21, 0xa8, 0x21, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x10, 0x71, 0xa3,
-  0xe1, 0xab, 0x40, 0xd0, 0x1b, 0x91, 0x61, 0x86, 0x1b, 0x02, 0x1c, 0x22,
-  0x83, 0x0a, 0x06, 0x9d, 0x65, 0x20, 0x87, 0x7c, 0x08, 0x8e, 0x7f, 0x86,
-  0xb9, 0x96, 0x19, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xb4, 0x5d,
-  0x4a, 0x25, 0x33, 0xba, 0xa5, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82,
-  0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x83, 0x4b, 0x9c, 0x60, 0xe9, 0x20, 0x82, 0x11, 0x03,
-  0x04, 0x00, 0x41, 0x30, 0xb8, 0xc6, 0x29, 0x96, 0x18, 0x22, 0x18, 0x31,
-  0x40, 0x00, 0x10, 0x04, 0x83, 0x8b, 0x9c, 0x64, 0x49, 0x22, 0x82, 0x11,
-  0x03, 0x05, 0x00, 0x41, 0x30, 0x50, 0xd6, 0x29, 0x96, 0xe0, 0x28, 0xf0,
-  0xa5, 0x50, 0xe2, 0xa5, 0xd1, 0x84, 0x00, 0x18, 0x6e, 0x08, 0xc0, 0x29,
-  0x0c, 0x2e, 0x70, 0x6a, 0x96, 0x20, 0x1f, 0x86, 0x1b, 0x68, 0xa4, 0x9c,
-  0xc0, 0x60, 0x96, 0xc1, 0x1c, 0xce, 0x21, 0x28, 0x36, 0xaa, 0x25, 0xb8,
-  0xc0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0xdc, 0xc9, 0x96,
-  0x76, 0xa4, 0x94, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x7a, 0x27,
-  0x5b, 0x0a, 0x84, 0x0b, 0x86, 0xa9, 0x37, 0xd2, 0x25, 0xb8, 0xc0, 0xa9,
-  0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0xe6, 0x69, 0x97, 0x7a, 0x44,
-  0x95, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xa2, 0xa7, 0x5d, 0x0a,
-  0x84, 0x0b, 0x86, 0xb9, 0xc0, 0xa9, 0x3b, 0x9c, 0x3a, 0x1c, 0x1a, 0xe6,
-  0xd2, 0x66, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0xd0, 0xf2, 0xe9, 0x9c, 0x48, 0xa9, 0x9e, 0x46, 0x13, 0x02,
-  0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a,
-  0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x2e, 0x90, 0x72, 0xa7,
-  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x0a, 0xa9, 0x77,
-  0x4a, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x2e, 0x91, 0x82,
-  0xa7, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x40, 0x49, 0xa9,
-  0x77, 0x72, 0xa5, 0x80, 0x9f, 0x7e, 0x49, 0x9f, 0x46, 0x13, 0x02, 0x60,
-  0xb8, 0x21, 0xf0, 0xa7, 0x30, 0xb8, 0xc0, 0xa9, 0x59, 0x82, 0x7c, 0x18,
-  0x6e, 0x90, 0x13, 0x91, 0x02, 0x83, 0x59, 0x06, 0x74, 0xc8, 0x87, 0xc0,
-  0x42, 0x69, 0x94, 0xe2, 0x33, 0x1c, 0x81, 0x27, 0xa4, 0x44, 0x7c, 0xb3,
-  0x0c, 0xe9, 0xc0, 0x0e, 0x81, 0x95, 0x52, 0x9e, 0xc4, 0xc7, 0x82, 0x81,
-  0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88,
-  0x4f, 0x11, 0x2c, 0xa5, 0xc3, 0x0d, 0x81, 0x4a, 0x81, 0xc1, 0x2c, 0x83,
-  0x3a, 0xac, 0x43, 0x60, 0x43, 0x2b, 0xc1, 0x67, 0x96, 0x00, 0x1e, 0x8c,
-  0x95, 0x88, 0xf8, 0xcc, 0x12, 0xc0, 0xc3, 0x70, 0xc4, 0xa8, 0xb4, 0x92,
-  0xf0, 0xcd, 0x32, 0xb4, 0x03, 0x3c, 0x04, 0x46, 0x2a, 0xae, 0x14, 0x1f,
-  0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4e, 0x59, 0x10, 0xc9, 0xc7,
-  0x8a, 0x20, 0x3e, 0x45, 0xdc, 0x94, 0x0e, 0x37, 0x04, 0x35, 0x05, 0x06,
-  0xb3, 0x0c, 0xee, 0xf0, 0x0e, 0x81, 0xd9, 0xd2, 0x10, 0x9f, 0x59, 0x02,
-  0x78, 0x30, 0x22, 0x97, 0xe0, 0x33, 0x4b, 0x00, 0x0f, 0x03, 0x2d, 0x8e,
-  0xa6, 0x0e, 0xd8, 0x3a, 0x10, 0xee, 0x20, 0xbc, 0x03, 0x4f, 0xb1, 0xc3,
-  0x05, 0xc3, 0x18, 0x2e, 0xf1, 0x52, 0x7c, 0x86, 0x23, 0x5c, 0xa5, 0x97,
-  0x88, 0x6f, 0x96, 0x21, 0x1e, 0xe8, 0x21, 0x30, 0x5f, 0x7a, 0x95, 0xf8,
-  0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70, 0xca, 0x02, 0x43, 0x3e,
-  0x56, 0x04, 0xf1, 0x29, 0xa2, 0xac, 0x74, 0xb8, 0x21, 0x18, 0x2b, 0x30,
-  0x98, 0x65, 0x90, 0x87, 0x79, 0x08, 0x6c, 0x30, 0x27, 0xf8, 0xcc, 0x12,
-  0xe0, 0x83, 0x8d, 0x13, 0x11, 0x9f, 0x59, 0x02, 0x7c, 0x18, 0x8e, 0xc8,
-  0x15, 0x72, 0x12, 0xbe, 0x59, 0x86, 0x7a, 0xc0, 0x87, 0xc0, 0x74, 0xa5,
-  0x9c, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b,
-  0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0xb8, 0xd2, 0xe1, 0x86, 0xc0,
-  0xad, 0xc0, 0x60, 0x96, 0xc1, 0x1e, 0xee, 0x21, 0xb0, 0x76, 0x1a, 0xe2,
-  0x33, 0x4b, 0x80, 0x0f, 0x46, 0xc8, 0x13, 0x7c, 0x66, 0x09, 0xf0, 0x61,
-  0xa0, 0xc5, 0xd1, 0xe4, 0x01, 0x9b, 0x07, 0xc2, 0x1e, 0x84, 0x7b, 0x40,
-  0x2d, 0x7a, 0xb8, 0x60, 0x98, 0x0b, 0x9c, 0xba, 0xcd, 0xa9, 0xf3, 0xa5,
-  0x61, 0xee, 0x7d, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x03, 0xed, 0xaf, 0xda, 0x4a, 0xa5, 0xf6, 0x6a, 0x34,
-  0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88,
-  0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x32, 0x2d,
-  0xba, 0x4a, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xae, 0xd3,
-  0xaa, 0xab, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x42,
-  0x2d, 0xbb, 0x4a, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x94,
-  0xd7, 0xaa, 0x2b, 0x9a, 0x0a, 0x44, 0xab, 0xac, 0x40, 0x6b, 0x34, 0x21,
-  0x00, 0x86, 0x1b, 0x02, 0xd2, 0x0a, 0x83, 0x0b, 0x9c, 0x9a, 0x25, 0xc8,
-  0x87, 0x81, 0x16, 0x47, 0x35, 0xc8, 0x81, 0x55, 0x83, 0x71, 0x70, 0x09,
-  0x73, 0x10, 0xf0, 0x81, 0x55, 0x83, 0x73, 0x98, 0x65, 0xd0, 0x07, 0x7e,
-  0xc0, 0x97, 0xe1, 0x88, 0x7d, 0x39, 0xab, 0xe1, 0x3b, 0x7e, 0x19, 0x66,
-  0xb8, 0x21, 0x90, 0x29, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x08, 0x7f, 0x59,
-  0xab, 0xe1, 0xab, 0x40, 0xd0, 0x03, 0x99, 0x61, 0x86, 0x1b, 0x82, 0x9a,
-  0x22, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0xd8, 0x07, 0x98, 0x08, 0x2e, 0x9f,
-  0x86, 0x39, 0x15, 0x1a, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x34,
-  0xdc, 0x32, 0xad, 0xb1, 0xa2, 0xad, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10,
-  0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31,
-  0x40, 0x00, 0x10, 0x04, 0x83, 0xeb, 0xb7, 0x5a, 0xeb, 0x20, 0x82, 0x11,
-  0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0xc0, 0xcb, 0xb5, 0x18, 0x22, 0x18,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x2b, 0xbc, 0x5e, 0x4b, 0x22, 0x82,
-  0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x50, 0xd0, 0xcb, 0xb5, 0xda, 0x2a,
-  0xd8, 0x2d, 0xbf, 0xca, 0xad, 0xd1, 0x84, 0x00, 0x18, 0x6e, 0x08, 0x7a,
-  0x2b, 0x0c, 0x2e, 0x70, 0x6a, 0x96, 0x00, 0x26, 0x86, 0x1b, 0x62, 0x46,
-  0xbc, 0xc0, 0x60, 0x96, 0xa1, 0x1f, 0xfc, 0x21, 0xa8, 0xb4, 0x92, 0x2d,
-  0xb8, 0xc0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0xd6, 0x6b,
-  0xb6, 0x6c, 0x46, 0xb4, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x62,
-  0xaf, 0xd9, 0x0a, 0x84, 0x0b, 0x86, 0x29, 0xb6, 0xba, 0x2d, 0xb8, 0xc0,
-  0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0xe0, 0x0b, 0xb7, 0x74,
-  0xe6, 0xb4, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x8a, 0x2f, 0xdc,
-  0x0a, 0x84, 0x0b, 0x86, 0xb9, 0xc0, 0xa9, 0x3b, 0x9c, 0xba, 0x9a, 0x1a,
-  0xe6, 0xcc, 0x68, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03,
-  0x00, 0x41, 0x30, 0xd0, 0xec, 0x8b, 0xbc, 0x42, 0x4b, 0xbe, 0x46, 0x13,
-  0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18,
-  0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xae, 0xfe, 0x5a,
-  0xaf, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xf2, 0x2f,
-  0xf6, 0x4a, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xae, 0xff,
-  0x6a, 0xaf, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x40, 0x31,
-  0x31, 0xf6, 0x5a, 0xad, 0x20, 0xbf, 0x78, 0xeb, 0xbe, 0x46, 0x13, 0x02,
-  0x60, 0xb8, 0x21, 0xd8, 0xaf, 0x30, 0xb8, 0xc0, 0xa9, 0x59, 0x02, 0x98,
-  0x18, 0x6e, 0x78, 0x9b, 0xff, 0x02, 0x83, 0x59, 0x86, 0x7f, 0x80, 0x89,
-  0xc0, 0xfc, 0x0a, 0xb4, 0xe2, 0x33, 0x1c, 0x31, 0x37, 0xa1, 0x45, 0x7c,
-  0xb3, 0x0c, 0x20, 0x31, 0x12, 0x81, 0x89, 0x16, 0xdd, 0xc4, 0xc7, 0x82,
-  0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22,
-  0x88, 0x4f, 0x11, 0x29, 0xa6, 0xc3, 0x0d, 0xc1, 0x89, 0x81, 0xc1, 0x2c,
-  0x43, 0x48, 0x88, 0x44, 0x60, 0x83, 0x6a, 0xc1, 0x67, 0x96, 0xe0, 0x24,
-  0x2c, 0xb5, 0x88, 0xf8, 0xcc, 0x12, 0x9c, 0xc4, 0x70, 0x84, 0xdf, 0xa8,
-  0x96, 0xf0, 0xcd, 0x32, 0x90, 0xc4, 0x49, 0x04, 0xf6, 0x37, 0xab, 0x15,
-  0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4e, 0x59, 0x10, 0xc9,
-  0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xd0, 0x98, 0x0e, 0x37, 0x04, 0x32, 0x06,
-  0x06, 0xb3, 0x0c, 0x25, 0x61, 0x12, 0x81, 0xcd, 0xd6, 0x10, 0x9f, 0x59,
-  0x82, 0x93, 0x30, 0xc2, 0xb6, 0xe0, 0x33, 0x4b, 0x70, 0x12, 0x03, 0x2d,
-  0x8e, 0x16, 0x12, 0x98, 0x48, 0x10, 0x25, 0x21, 0x98, 0x84, 0xcc, 0x8d,
-  0xc4, 0x05, 0xc3, 0x58, 0x6d, 0xe5, 0x56, 0x7c, 0x86, 0x23, 0x56, 0x47,
-  0xb7, 0x88, 0x6f, 0x96, 0x01, 0x25, 0x56, 0x22, 0xb0, 0xdd, 0x62, 0x9d,
-  0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70, 0xca, 0x02, 0x43,
-  0x3e, 0x56, 0x04, 0xf1, 0x29, 0x42, 0xcc, 0x74, 0xb8, 0x21, 0x00, 0x33,
-  0x30, 0x98, 0x65, 0x48, 0x09, 0x95, 0x08, 0x6c, 0x18, 0x2f, 0xf8, 0xcc,
-  0x12, 0xbc, 0x84, 0x81, 0x17, 0x11, 0x9f, 0x59, 0x82, 0x97, 0x18, 0x8e,
-  0xb0, 0x9d, 0xf0, 0x12, 0xbe, 0x59, 0x06, 0x96, 0x78, 0x89, 0xc0, 0x6e,
-  0x47, 0xbc, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x29,
-  0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x36, 0xd3, 0xe1, 0x86,
-  0x60, 0xcd, 0xc0, 0x60, 0x96, 0xa1, 0x25, 0x5c, 0x22, 0x30, 0xf5, 0x1a,
-  0xe2, 0x33, 0x4b, 0xf0, 0x12, 0x46, 0xbc, 0x17, 0x7c, 0x66, 0x09, 0x5e,
-  0x62, 0xa0, 0xc5, 0xd1, 0x52, 0x02, 0x53, 0x09, 0xa2, 0x25, 0x04, 0x97,
-  0x10, 0xbd, 0x95, 0xb8, 0x60, 0x98, 0x0b, 0x9c, 0xba, 0xcd, 0xa9, 0xdb,
-  0xad, 0x61, 0x8e, 0x9d, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31,
-  0x38, 0x00, 0x10, 0x04, 0x03, 0x8d, 0xcf, 0xd4, 0xec, 0xc4, 0xf0, 0x6c,
-  0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13,
-  0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x1a,
-  0xb5, 0x38, 0x4b, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x2e,
-  0x52, 0x93, 0xb3, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0,
-  0x2a, 0xb5, 0x39, 0x4b, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c,
-  0x14, 0x56, 0x93, 0xb3, 0x18, 0x0b, 0xfe, 0x4c, 0xcc, 0xfa, 0x6c, 0x34,
-  0x21, 0x00, 0x86, 0x1b, 0x82, 0x50, 0x0b, 0x83, 0x0b, 0x9c, 0x9a, 0x25,
-  0x80, 0x89, 0x81, 0x16, 0x47, 0x35, 0xf6, 0xc1, 0x94, 0x03, 0x7d, 0x70,
-  0x89, 0x7e, 0x10, 0x5e, 0xc2, 0x94, 0x03, 0x7f, 0x18, 0x31, 0x30, 0x00,
-  0x10, 0x04, 0x83, 0xe7, 0xd5, 0xdc, 0xec, 0x9f, 0x8c, 0x3f, 0xf2, 0x25,
-  0x3e, 0x26, 0x04, 0xf2, 0xb1, 0x60, 0x5f, 0xe0, 0x63, 0x85, 0x48, 0xc4,
-  0xc7, 0x8a, 0x40, 0x3e, 0x16, 0x90, 0x04, 0x7c, 0x46, 0x0c, 0x0c, 0x00,
-  0x04, 0xc1, 0xe0, 0xb1, 0xb5, 0x3a, 0x2b, 0x29, 0x13, 0x8a, 0xf8, 0x58,
-  0x20, 0xc8, 0xc7, 0x82, 0x03, 0x3e, 0xc3, 0x11, 0xc1, 0x9b, 0x39, 0xdf,
-  0x05, 0x43, 0x8c, 0x18, 0x38, 0x00, 0x08, 0x82, 0x01, 0xd3, 0x6b, 0x7a,
-  0x56, 0x66, 0x3e, 0x16, 0x6b, 0xc1, 0x9f, 0xfd, 0xd9, 0x9f, 0xd1, 0xd9,
-  0xab, 0xcd, 0x12, 0x8c, 0xd0, 0x70, 0xc3, 0x6b, 0xcd, 0x1a, 0x18, 0xcc,
-  0x32, 0xc8, 0xc4, 0x08, 0x05, 0x23, 0x06, 0x06, 0x00, 0x82, 0x60, 0xf0,
-  0xf4, 0x5a, 0x9f, 0xb5, 0x94, 0x05, 0x6e, 0x06, 0x9f, 0x11, 0x03, 0x03,
-  0x00, 0x41, 0x30, 0x78, 0x7e, 0xed, 0xcf, 0x5c, 0xca, 0x02, 0x38, 0x83,
-  0xcf, 0x68, 0x42, 0x99, 0x0d, 0xc3, 0x0d, 0xc1, 0xad, 0x81, 0xc1, 0x2c,
-  0xc3, 0x4c, 0xd4, 0x44, 0x30, 0x1c, 0x51, 0xe0, 0xd9, 0xf0, 0x9d, 0x31,
-  0xcc, 0x70, 0x43, 0x30, 0x66, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0x07,
-  0x9f, 0x0d, 0x5f, 0x05, 0x82, 0x5e, 0x32, 0xcc, 0x70, 0x43, 0x60, 0x66,
-  0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c, 0x34, 0x91, 0x16, 0xc1, 0xa9, 0xd8,
-  0x30, 0xb7, 0x53, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x96,
-  0x6e, 0xb7, 0x46, 0x67, 0xe5, 0x36, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42,
-  0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x70, 0xc1, 0x9b, 0xaf, 0x1d, 0x44, 0x30, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x57, 0xbc, 0xfd, 0x1a, 0x43, 0x04, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xc9, 0x1b, 0xb8, 0x49, 0x44, 0x30,
-  0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x4a, 0xbe, 0xfd, 0x9a, 0x9f, 0x05,
-  0xec, 0xf6, 0x6a, 0xea, 0x36, 0x9a, 0x10, 0x00, 0xc3, 0x0d, 0x81, 0xbb,
-  0x85, 0xc1, 0x05, 0x4e, 0xcd, 0x12, 0xa4, 0xc5, 0x70, 0x83, 0x36, 0x6f,
-  0x60, 0x30, 0xcb, 0x60, 0x13, 0x37, 0x11, 0x94, 0x9e, 0x8d, 0x1b, 0x5c,
-  0xe0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0xfc, 0x46, 0x6e,
-  0x60, 0x30, 0x6b, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30, 0xf5, 0x1b,
-  0xb9, 0x05, 0xc2, 0x05, 0xc3, 0x54, 0x9f, 0xa1, 0x1b, 0x5c, 0xe0, 0xd4,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0x21, 0x97, 0x6e, 0x63, 0x80,
-  0x6b, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30, 0x89, 0x5c, 0xba, 0x05,
-  0xc2, 0x05, 0xc3, 0x5c, 0xe0, 0xd4, 0x1d, 0x4e, 0x9d, 0x99, 0x0d, 0x73,
-  0x77, 0x35, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x68, 0x27, 0x57, 0x6f, 0xb2, 0x36, 0x72, 0xa3, 0x09, 0x01,
-  0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45,
-  0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x97, 0xcb, 0xf1, 0x5b,
-  0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xbd, 0x5c, 0xbf,
-  0x25, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x17, 0xcc, 0xf9,
-  0x5b, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xa0, 0xdc, 0x5c,
-  0xbf, 0xf1, 0x5a, 0xa0, 0x72, 0xed, 0x86, 0x72, 0xa3, 0x09, 0x01, 0x30,
-  0xdc, 0x10, 0xb0, 0x5c, 0x18, 0x5c, 0xe0, 0xd4, 0x2c, 0x41, 0x5a, 0x0c,
-  0x37, 0xe0, 0x01, 0xcc, 0x81, 0xc1, 0x2c, 0x03, 0x4e, 0xa4, 0x45, 0x60,
-  0xaf, 0x16, 0x6b, 0xf1, 0x19, 0x8e, 0xe8, 0x03, 0x59, 0x23, 0xbe, 0x59,
-  0x86, 0x9c, 0xe0, 0x89, 0xc0, 0x66, 0xcd, 0x0f, 0xe2, 0x63, 0xc1, 0x40,
-  0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4,
-  0xa7, 0x08, 0x9d, 0xd3, 0xe1, 0x86, 0x00, 0xe7, 0xc0, 0x60, 0x96, 0x41,
-  0x27, 0x76, 0x22, 0xb0, 0x61, 0xd7, 0xe0, 0x33, 0x4b, 0x00, 0x16, 0xa6,
-  0x6b, 0x44, 0x7c, 0x66, 0x09, 0xc0, 0x62, 0x38, 0x02, 0x15, 0x76, 0x4d,
-  0xf8, 0x66, 0x19, 0x7a, 0x02, 0x2c, 0x02, 0x4b, 0x05, 0x5e, 0x8b, 0x8f,
-  0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x2c, 0x88, 0xe4, 0x63,
-  0x45, 0x10, 0x9f, 0x22, 0xca, 0x4e, 0x87, 0x1b, 0x82, 0xb1, 0x03, 0x83,
-  0x59, 0x06, 0x9f, 0xf8, 0x89, 0xc0, 0xc8, 0x6d, 0x88, 0xcf, 0x2c, 0x01,
-  0x58, 0x18, 0x71, 0x6e, 0xf0, 0x99, 0x25, 0x00, 0x8b, 0x81, 0x16, 0x47,
-  0xd3, 0x09, 0x6c, 0x27, 0x08, 0x9f, 0x10, 0x7e, 0xc2, 0x2c, 0x78, 0xe2,
-  0x82, 0x61, 0xcc, 0xdc, 0xd4, 0x2d, 0x3e, 0xc3, 0x11, 0xb4, 0xb0, 0x6e,
-  0xc4, 0x37, 0xcb, 0x10, 0x16, 0x64, 0x11, 0x18, 0xbb, 0xd5, 0x42, 0x7c,
-  0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x81, 0x21, 0x1f,
-  0x2b, 0x82, 0xf8, 0x14, 0x31, 0x77, 0x3a, 0xdc, 0x10, 0xc4, 0x1d, 0x18,
-  0xcc, 0x32, 0x88, 0xc5, 0x58, 0x04, 0x36, 0xd0, 0x1b, 0x7c, 0x66, 0x09,
-  0xd0, 0xc2, 0xe2, 0x8d, 0x88, 0xcf, 0x2c, 0x01, 0x5a, 0x0c, 0x47, 0xfc,
-  0x82, 0xbc, 0x09, 0xdf, 0x2c, 0x43, 0x59, 0xa0, 0x45, 0x60, 0xe0, 0x30,
-  0x6f, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94, 0x05,
-  0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84, 0xdf, 0xe9, 0x70, 0x43, 0xc0,
-  0x77, 0x60, 0x30, 0xcb, 0x60, 0x16, 0x67, 0x11, 0xd8, 0xbe, 0x0d, 0xf1,
-  0x99, 0x25, 0x40, 0x0b, 0x23, 0x40, 0x0e, 0x3e, 0xb3, 0x04, 0x68, 0x31,
-  0xd0, 0xe2, 0x68, 0x62, 0x81, 0x8d, 0x05, 0x61, 0x16, 0xc2, 0x59, 0xd0,
-  0x06, 0x59, 0x5c, 0x30, 0xcc, 0x05, 0x4e, 0xdd, 0xe6, 0xd4, 0xb1, 0xdb,
-  0x30, 0xd7, 0x5f, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0x81, 0xd6, 0x7a, 0x7b, 0x87, 0x73, 0xa9, 0x37, 0x9a,
-  0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4,
-  0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xd1, 0x9e,
-  0xe8, 0x25, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x57, 0xed,
-  0x8d, 0x5e, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xd9,
-  0x1e, 0xe9, 0x25, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x4a,
-  0xef, 0x8d, 0x9e, 0xd8, 0x05, 0xb0, 0x37, 0x77, 0xae, 0x37, 0x9a, 0x10,
-  0x00, 0xc3, 0x0d, 0x81, 0xec, 0x85, 0xc1, 0x05, 0x4e, 0xcd, 0x12, 0xa4,
-  0xc5, 0x40, 0x8b, 0xa3, 0x1a, 0x34, 0x81, 0xee, 0xc1, 0x4c, 0xb8, 0x84,
-  0x4d, 0x08, 0x68, 0x81, 0xee, 0xc1, 0x4d, 0xcc, 0x32, 0xa8, 0x05, 0x5b,
-  0xf8, 0xc3, 0x70, 0xc4, 0x48, 0xd4, 0xdd, 0xf0, 0x1d, 0x49, 0x0c, 0x33,
-  0xdc, 0x10, 0x80, 0x1d, 0x19, 0xd4, 0x10, 0xe8, 0x70, 0x04, 0x49, 0xe4,
-  0xdd, 0xf0, 0x55, 0x20, 0xe8, 0x99, 0xc4, 0x30, 0xc3, 0x0d, 0xc1, 0xd8,
-  0x91, 0x41, 0x05, 0x83, 0xce, 0x32, 0xac, 0x05, 0x68, 0x04, 0x77, 0x72,
-  0xc3, 0x1c, 0x8e, 0x0d, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x9a,
-  0xf9, 0xd1, 0x5e, 0xdc, 0x89, 0xdf, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08,
-  0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0xc1, 0xd5, 0x7e, 0xbb, 0x77, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xee, 0xc7, 0x7b, 0x0c, 0x11, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xf5, 0x7e, 0xbd, 0x27, 0x11, 0xc1,
-  0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x28, 0xf6, 0xc7, 0x7b, 0x7b, 0x17,
-  0xa4, 0x1f, 0xeb, 0x9d, 0xdf, 0x68, 0x42, 0x00, 0x0c, 0x37, 0x04, 0xeb,
-  0x17, 0x06, 0x17, 0x38, 0x35, 0x4b, 0x00, 0x1a, 0xc3, 0x0d, 0x37, 0x01,
-  0x7f, 0x60, 0x30, 0xcb, 0xd0, 0x16, 0x6e, 0x11, 0xd4, 0xdd, 0x81, 0x1f,
-  0x5c, 0xe0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0xf9, 0x17,
-  0x7e, 0x3e, 0x01, 0x7b, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30, 0xe9,
-  0x5f, 0xf8, 0x05, 0xc2, 0x05, 0xc3, 0x94, 0xde, 0x95, 0x1f, 0x5c, 0xe0,
-  0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0xfe, 0x67, 0x7e, 0x60,
-  0x51, 0x7b, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30, 0xfd, 0x9f, 0xf9,
-  0x05, 0xc2, 0x05, 0xc3, 0x5c, 0xe0, 0xd4, 0x1d, 0x4e, 0xdd, 0xd8, 0x0d,
-  0x73, 0x74, 0x36, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01,
-  0x80, 0x20, 0x18, 0x68, 0x24, 0x18, 0xc8, 0xdf, 0xeb, 0x81, 0x60, 0x30,
-  0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09,
-  0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xad,
-  0x60, 0x90, 0x7f, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1,
-  0xc5, 0x82, 0x81, 0xfe, 0x25, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x57, 0x0b, 0x06, 0xfb, 0x97, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80,
-  0x20, 0x18, 0x28, 0x34, 0x18, 0xe8, 0x5f, 0xee, 0x05, 0x27, 0x18, 0xa8,
-  0x5f, 0x09, 0x06, 0xa3, 0x09, 0x01, 0x30, 0xdc, 0x10, 0xa4, 0x60, 0x10,
-  0x06, 0x17, 0x38, 0x35, 0x4b, 0x00, 0x1a, 0xc3, 0x0d, 0x75, 0xd1, 0x82,
-  0x01, 0x18, 0xcc, 0x32, 0xbc, 0x05, 0x68, 0x04, 0xc6, 0x7a, 0xae, 0x17,
-  0x9f, 0xe1, 0x88, 0xbd, 0x78, 0x3d, 0xe2, 0x9b, 0x65, 0x80, 0x8b, 0xb9,
-  0x08, 0x0c, 0xf6, 0xf8, 0x22, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98,
-  0x0b, 0x9c, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xb8, 0xc1,
-  0x40, 0x87, 0x1b, 0x82, 0x1a, 0x0c, 0xc0, 0x60, 0x96, 0x21, 0x2e, 0xe4,
-  0x22, 0xb0, 0x01, 0xf7, 0xe0, 0x33, 0x4b, 0x70, 0x17, 0x76, 0x7b, 0x44,
-  0x7c, 0x66, 0x09, 0xee, 0x62, 0x38, 0xc2, 0x34, 0x70, 0x4f, 0xf8, 0x66,
-  0x19, 0xe8, 0xe2, 0x2e, 0x02, 0x3b, 0x8d, 0xdc, 0x8b, 0x8f, 0x05, 0x0e,
-  0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10,
-  0x9f, 0x22, 0xc4, 0x30, 0xd0, 0xe1, 0x86, 0x00, 0x0c, 0x03, 0x30, 0x98,
-  0x65, 0xa8, 0x0b, 0xbb, 0x08, 0x2c, 0xfc, 0x86, 0xf8, 0xcc, 0x12, 0xdc,
-  0x85, 0x11, 0xe4, 0x07, 0x9f, 0x59, 0x82, 0xbb, 0x18, 0x68, 0x71, 0xb4,
-  0xb8, 0xc0, 0xe4, 0x82, 0xa8, 0x0b, 0xc1, 0x2e, 0xc4, 0x66, 0x2e, 0x2e,
-  0x18, 0xc6, 0xc6, 0xef, 0xfc, 0xe2, 0x33, 0x1c, 0x11, 0x1b, 0xe8, 0x47,
-  0x7c, 0xb3, 0x0c, 0x78, 0xb1, 0x17, 0x81, 0xa5, 0x9f, 0x6c, 0xc4, 0xc7,
-  0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x16, 0x18, 0xf2, 0xb1,
-  0x22, 0x88, 0x4f, 0x11, 0x70, 0x18, 0xe8, 0x70, 0x43, 0xe0, 0x86, 0x01,
-  0x18, 0xcc, 0x32, 0xe4, 0x85, 0x5e, 0x04, 0x36, 0xc4, 0x1f, 0x7c, 0x66,
-  0x09, 0xfe, 0xc2, 0xdc, 0x8f, 0x88, 0xcf, 0x2c, 0xc1, 0x5f, 0x0c, 0x47,
-  0xf0, 0xc6, 0xfb, 0x09, 0xdf, 0x2c, 0x03, 0x5f, 0xfc, 0x45, 0x60, 0xbd,
-  0x01, 0x7f, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94,
-  0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4, 0x1e, 0x06, 0x3a, 0xdc,
-  0x10, 0xe4, 0x61, 0x00, 0x06, 0xb3, 0x0c, 0x7d, 0xe1, 0x17, 0x81, 0xe1,
-  0xdf, 0x10, 0x9f, 0x59, 0x82, 0xbf, 0x30, 0xa2, 0xff, 0xe0, 0x33, 0x4b,
-  0xf0, 0x17, 0x03, 0x2d, 0x8e, 0x96, 0x17, 0x98, 0x5e, 0x10, 0x7d, 0x21,
-  0xf8, 0x85, 0xeb, 0xec, 0xc5, 0x05, 0xc3, 0x5c, 0xe0, 0xd4, 0x6d, 0x4e,
-  0x5d, 0xfa, 0x0d, 0x73, 0xfa, 0x36, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x68, 0xaa, 0x18, 0xe0, 0x61, 0x50,
-  0x83, 0x81, 0x29, 0x06, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3,
-  0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x57, 0x2c, 0x06, 0x7f, 0x18, 0x24, 0x44, 0x30, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x97, 0x2c, 0x06, 0xa0, 0x18, 0x24, 0x44,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd7, 0x2c, 0x06, 0xa1, 0x18,
-  0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x8a, 0x2e, 0x06,
-  0xa0, 0x18, 0xfc, 0x60, 0x10, 0xb4, 0x62, 0x00, 0x87, 0xc1, 0x2a, 0x06,
-  0xa3, 0x09, 0x01, 0x30, 0xdc, 0x10, 0xbc, 0x62, 0x10, 0x06, 0x17, 0x38,
-  0x35, 0x4b, 0x00, 0x1a, 0x03, 0x2d, 0x8e, 0x6a, 0xac, 0x85, 0x38, 0x0a,
-  0x6a, 0xe1, 0x12, 0x6d, 0x21, 0xfc, 0x85, 0x38, 0x0a, 0x6e, 0x61, 0x20,
-  0x12, 0x87, 0x01, 0x7c, 0x66, 0x19, 0x42, 0x63, 0x34, 0xf8, 0x63, 0x38,
-  0x22, 0x98, 0xc3, 0x60, 0xf8, 0x4e, 0x18, 0x66, 0xb8, 0x21, 0xf0, 0xc1,
-  0x80, 0x0c, 0x6a, 0x08, 0x74, 0x38, 0x42, 0x44, 0xee, 0x30, 0x18, 0xbe,
-  0x0a, 0x04, 0x3d, 0x12, 0x19, 0x66, 0xb8, 0x21, 0x08, 0xc3, 0x80, 0x0c,
-  0x2a, 0x18, 0x74, 0x96, 0x41, 0x34, 0x6e, 0x23, 0xb8, 0x12, 0x0c, 0x86,
-  0x39, 0x9b, 0x1b, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x34, 0x72,
-  0x0c, 0x64, 0x31, 0x78, 0xc3, 0x00, 0x1c, 0x83, 0xd1, 0x84, 0x00, 0x18,
-  0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e,
-  0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x6b, 0x1d, 0x83, 0x5c, 0x0c,
-  0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x8b, 0x1d, 0x03,
-  0x5d, 0x0c, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xab,
-  0x1d, 0x83, 0x5d, 0x0c, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04,
-  0x03, 0x85, 0x1e, 0x03, 0x5d, 0x0c, 0xf2, 0x30, 0x08, 0xce, 0x31, 0x50,
-  0xc5, 0xa0, 0x1c, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x6e, 0x08, 0xd2, 0x31,
-  0x08, 0x83, 0x0b, 0x9c, 0x9a, 0x25, 0xb8, 0x8d, 0xe1, 0x86, 0x1a, 0x71,
-  0xc7, 0x00, 0x0c, 0x66, 0x19, 0x48, 0xa3, 0x34, 0x82, 0xaa, 0xc3, 0xc0,
-  0x17, 0x03, 0xb8, 0xc0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98,
-  0xee, 0x31, 0xf8, 0xc5, 0x80, 0x73, 0xc5, 0x60, 0xc4, 0xe0, 0x00, 0x40,
-  0x10, 0x0c, 0x26, 0x7c, 0x0c, 0x7e, 0x31, 0x08, 0x84, 0x0b, 0x86, 0x29,
-  0x3c, 0x0c, 0xc6, 0x31, 0x80, 0x0b, 0x9c, 0x1a, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x83, 0x89, 0x1f, 0x03, 0x72, 0x0c, 0x7c, 0x64, 0x16, 0x83, 0x11,
-  0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0xfa, 0x31, 0x20, 0xc7, 0x20, 0x10,
-  0x2e, 0x18, 0xe6, 0x02, 0xa7, 0xee, 0x70, 0xea, 0xc2, 0x30, 0x18, 0xe6,
-  0xe4, 0x6e, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0xd0, 0x44, 0x32, 0x80, 0xc7, 0xa0, 0x15, 0x03, 0x7f, 0x0c,
-  0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34,
-  0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xae,
-  0x94, 0x0c, 0xee, 0x31, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10,
-  0x0c, 0x2e, 0x95, 0x0c, 0xf0, 0x31, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0xae, 0x95, 0x0c, 0xf2, 0x31, 0x48, 0x88, 0x60, 0xc4,
-  0x40, 0x01, 0x40, 0x10, 0x0c, 0x14, 0x99, 0x0c, 0xf0, 0x31, 0xb8, 0xc5,
-  0x20, 0x28, 0xc9, 0x00, 0x1d, 0x83, 0x91, 0x0c, 0x46, 0x13, 0x02, 0x60,
-  0xb8, 0x21, 0x38, 0xc9, 0x20, 0x0c, 0x2e, 0x70, 0x6a, 0x96, 0xe0, 0x36,
-  0x86, 0x1b, 0xe6, 0x64, 0x25, 0x03, 0x30, 0x98, 0x65, 0x30, 0x8d, 0xdb,
-  0x08, 0x4c, 0x15, 0x03, 0x56, 0x0c, 0xe2, 0x33, 0x1c, 0x91, 0x07, 0xad,
-  0x18, 0x10, 0xdf, 0x2c, 0xc3, 0x69, 0xa8, 0x46, 0x60, 0xae, 0x18, 0xe8,
-  0x41, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x81,
-  0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x51, 0x93, 0x81, 0x0e, 0x37, 0x04,
-  0x33, 0x19, 0x80, 0xc1, 0x2c, 0x03, 0x6a, 0xa4, 0x46, 0x60, 0x83, 0x2d,
-  0x06, 0xf0, 0x99, 0x25, 0x70, 0x0d, 0xab, 0xc5, 0x80, 0x88, 0xcf, 0x2c,
-  0x81, 0x6b, 0x0c, 0x47, 0x90, 0x82, 0x2d, 0x06, 0xc2, 0x37, 0xcb, 0xb0,
-  0x1a, 0xae, 0x11, 0x58, 0x29, 0xdc, 0x62, 0x10, 0x1f, 0x0b, 0x1c, 0xfa,
-  0x5c, 0x30, 0xcc, 0x05, 0x4e, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e,
-  0x45, 0x80, 0x65, 0xa0, 0xc3, 0x0d, 0x81, 0x4f, 0x06, 0x60, 0x30, 0xcb,
-  0xc0, 0x1a, 0xad, 0x11, 0xd8, 0x2f, 0x06, 0x43, 0x7c, 0x66, 0x09, 0x5c,
-  0xc3, 0x08, 0x71, 0x0c, 0xe0, 0x33, 0x4b, 0xe0, 0x1a, 0x03, 0x2d, 0x8e,
-  0x86, 0x1a, 0x58, 0x6a, 0x10, 0xac, 0x21, 0xb4, 0x86, 0x4f, 0xa8, 0xc6,
-  0x05, 0xc3, 0x58, 0x38, 0x06, 0xe5, 0x18, 0xc4, 0x67, 0x38, 0xe2, 0x55,
-  0xcc, 0x31, 0x20, 0xbe, 0x59, 0x86, 0xd7, 0x90, 0x8d, 0xc0, 0xce, 0x31,
-  0x80, 0x95, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70, 0xca,
-  0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xc2, 0x2d, 0x03, 0x1d, 0x6e,
-  0x08, 0xd8, 0x32, 0x00, 0x83, 0x59, 0x06, 0xd8, 0x88, 0x8d, 0xc0, 0x86,
-  0x77, 0x0c, 0xe0, 0x33, 0x4b, 0x60, 0x1b, 0xc6, 0x8e, 0x01, 0x11, 0x9f,
-  0x59, 0x02, 0xdb, 0x18, 0x8e, 0xd0, 0x95, 0x76, 0x0c, 0x84, 0x6f, 0x96,
-  0x61, 0x36, 0x6c, 0x23, 0xb0, 0x5d, 0x71, 0xc7, 0x20, 0x3e, 0x16, 0x38,
-  0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41,
-  0x7c, 0x8a, 0xc8, 0xcb, 0x40, 0x87, 0x1b, 0x82, 0xbb, 0x0c, 0xc0, 0x60,
-  0x96, 0x81, 0x36, 0x6a, 0x23, 0x30, 0x7b, 0x0c, 0x86, 0xf8, 0xcc, 0x12,
-  0xd8, 0x86, 0x11, 0xfb, 0x18, 0xc0, 0x67, 0x96, 0xc0, 0x36, 0x06, 0x5a,
-  0x1c, 0x0d, 0x36, 0xb0, 0xd8, 0x20, 0x68, 0x43, 0xa8, 0x0d, 0xd5, 0x92,
-  0x8d, 0x0b, 0x86, 0xb9, 0xc0, 0xa9, 0xdb, 0x9c, 0xba, 0x73, 0x0c, 0x86,
-  0x39, 0xfc, 0x1b, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0x34, 0xd4, 0x0c, 0xec, 0x32, 0x98, 0xc9, 0x80, 0x34,
-  0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18,
-  0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
-  0xeb, 0x35, 0x83, 0xbe, 0x0c, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0x0b, 0x36, 0x03, 0xbf, 0x0c, 0x12, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x83, 0x2b, 0x36, 0x83, 0xbf, 0x0c, 0x12, 0x22, 0x18,
-  0x31, 0x50, 0x00, 0x10, 0x04, 0x03, 0x05, 0x37, 0x03, 0xbf, 0x0c, 0x7a,
-  0x32, 0x08, 0x56, 0x33, 0x70, 0xcb, 0x20, 0x35, 0x83, 0xd1, 0x84, 0x00,
-  0x18, 0x6e, 0x08, 0x5a, 0x33, 0x08, 0x83, 0x0b, 0x9c, 0x9a, 0x25, 0xb8,
-  0x8d, 0x81, 0x16, 0x47, 0x35, 0x44, 0xc3, 0x57, 0x85, 0xd0, 0x70, 0x09,
-  0xd2, 0x10, 0x6c, 0xc3, 0x57, 0x85, 0xd2, 0x30, 0x7f, 0x29, 0xcb, 0x00,
-  0x3e, 0xb3, 0x0c, 0xb8, 0xa1, 0x1b, 0xfa, 0x32, 0x1c, 0x11, 0xc4, 0x65,
-  0x30, 0x7c, 0x27, 0x0c, 0x33, 0xdc, 0x10, 0xf0, 0x64, 0x40, 0x06, 0x35,
-  0x04, 0x3a, 0x1c, 0x01, 0x32, 0x75, 0x19, 0x0c, 0x5f, 0x05, 0x82, 0x9e,
-  0xc8, 0x0c, 0x33, 0xdc, 0x10, 0xfc, 0x64, 0x40, 0x06, 0x15, 0x0c, 0x3a,
-  0xcb, 0x90, 0x1b, 0xee, 0x11, 0xdc, 0x48, 0x06, 0xc3, 0x1c, 0x0d, 0x06,
-  0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x26, 0x9e, 0x01, 0x6c,
-  0x06, 0x6d, 0x19, 0xf8, 0x66, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42,
-  0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x70, 0xa5, 0x67, 0x70, 0x9b, 0xc1, 0x41, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xa9, 0x67, 0x80, 0x9b, 0x01,
-  0x43, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xad, 0x67, 0x90,
-  0x9b, 0x81, 0x44, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xa0, 0xc8,
-  0x67, 0x80, 0x9b, 0xc1, 0x5d, 0x06, 0x41, 0x79, 0x06, 0xa8, 0x19, 0x8c,
-  0x67, 0x30, 0x9a, 0x10, 0x00, 0xc3, 0x0d, 0xc1, 0x79, 0x06, 0x61, 0x70,
-  0x81, 0x53, 0xb3, 0x04, 0xee, 0x31, 0xdc, 0x30, 0x33, 0xec, 0x19, 0x80,
-  0xc1, 0x2c, 0xc3, 0x6e, 0xf0, 0x46, 0x50, 0x73, 0x19, 0xf0, 0x66, 0x00,
-  0x17, 0x38, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x53, 0x7d, 0x06,
-  0xbd, 0x19, 0x70, 0xac, 0x19, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1,
-  0x64, 0x9f, 0x41, 0x6f, 0x06, 0x81, 0x70, 0xc1, 0x30, 0x65, 0x97, 0x41,
-  0x78, 0x06, 0x70, 0x81, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30,
-  0xe9, 0x67, 0x20, 0x9e, 0x01, 0xcf, 0xc4, 0x66, 0x30, 0x62, 0x70, 0x00,
-  0x20, 0x08, 0x06, 0xd3, 0x7e, 0x06, 0xe2, 0x19, 0x04, 0xc2, 0x05, 0xc3,
-  0x5c, 0xe0, 0xd4, 0x1d, 0x4e, 0xdd, 0x4f, 0x06, 0xc3, 0x1c, 0x1c, 0x06,
-  0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82,
-  0x81, 0x06, 0xa2, 0x81, 0x7b, 0x06, 0xab, 0x19, 0xf0, 0x67, 0x30, 0x9a,
-  0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4,
-  0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0x9d, 0x68,
-  0x50, 0x9f, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70,
-  0xa1, 0x68, 0x60, 0x9f, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0x70, 0xa5, 0x68, 0x70, 0x9f, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a,
-  0x00, 0x82, 0x60, 0xa0, 0xc0, 0x68, 0x60, 0x9f, 0x41, 0x6d, 0x06, 0xc1,
-  0x88, 0x06, 0xe6, 0x19, 0x84, 0x68, 0x30, 0x9a, 0x10, 0x00, 0xc3, 0x0d,
-  0x41, 0x89, 0x06, 0x61, 0x70, 0x81, 0x53, 0xb3, 0x04, 0xee, 0x31, 0xdc,
-  0x10, 0x37, 0x29, 0x1a, 0x80, 0xc1, 0x2c, 0x43, 0x6f, 0xb8, 0x47, 0x60,
-  0xa8, 0x19, 0xa8, 0x66, 0x10, 0x9f, 0xe1, 0x88, 0x3c, 0x58, 0xcd, 0x80,
-  0xf8, 0x66, 0x19, 0x7c, 0x23, 0x3c, 0x02, 0x63, 0xcd, 0x40, 0x0f, 0xe2,
-  0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b, 0x0c, 0xf9,
-  0x58, 0x11, 0xc4, 0xa7, 0x88, 0x19, 0x0d, 0x74, 0xb8, 0x21, 0x88, 0xd1,
-  0x00, 0x0c, 0x66, 0x19, 0x7e, 0x03, 0x3c, 0x02, 0x1b, 0x68, 0x33, 0x80,
-  0xcf, 0x2c, 0x41, 0x79, 0xd8, 0x6c, 0x06, 0x44, 0x7c, 0x66, 0x09, 0xca,
-  0x63, 0x38, 0x82, 0x14, 0x68, 0x33, 0x10, 0xbe, 0x59, 0x06, 0xf1, 0x28,
-  0x8f, 0xc0, 0x4a, 0xa1, 0x36, 0x83, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82,
-  0x61, 0x2e, 0x70, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xc2,
-  0x47, 0x03, 0x1d, 0x6e, 0x08, 0x78, 0x34, 0x00, 0x83, 0x59, 0x86, 0xf1,
-  0x20, 0x8f, 0xc0, 0x7a, 0x33, 0x18, 0xe2, 0x33, 0x4b, 0x50, 0x1e, 0x46,
-  0x80, 0x67, 0x00, 0x9f, 0x59, 0x82, 0xf2, 0x18, 0x68, 0x71, 0xb4, 0xdf,
-  0xc0, 0xc0, 0x83, 0x18, 0x0f, 0x81, 0x3c, 0x7c, 0x22, 0x3c, 0x2e, 0x18,
-  0xc6, 0x7e, 0x33, 0x18, 0xcf, 0x20, 0x3e, 0xc3, 0x11, 0xad, 0x43, 0x9e,
-  0x01, 0xf1, 0xcd, 0x32, 0x98, 0x47, 0x7a, 0x04, 0x56, 0x9e, 0x81, 0xeb,
-  0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x16, 0x18,
-  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x6c, 0x1a, 0xe8, 0x70, 0x43, 0xa0,
-  0xa6, 0x01, 0x18, 0xcc, 0x32, 0x9c, 0x07, 0x7a, 0x04, 0x36, 0xb4, 0x67,
-  0x00, 0x9f, 0x59, 0x82, 0xf6, 0x30, 0xf5, 0x0c, 0x88, 0xf8, 0xcc, 0x12,
-  0xb4, 0xc7, 0x70, 0x04, 0xee, 0xac, 0x67, 0x20, 0x7c, 0xb3, 0x0c, 0xea,
-  0xd1, 0x1e, 0x81, 0xe5, 0x0e, 0x7b, 0x06, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf,
-  0x05, 0xc3, 0x5c, 0xe0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53,
-  0xc4, 0x9d, 0x06, 0x3a, 0xdc, 0x10, 0xd4, 0x69, 0x00, 0x06, 0xb3, 0x0c,
-  0xeb, 0xc1, 0x1e, 0x81, 0xd1, 0x67, 0x30, 0xc4, 0x67, 0x96, 0xa0, 0x3d,
-  0x8c, 0xc8, 0xcf, 0x00, 0x3e, 0xb3, 0x04, 0xed, 0x31, 0xd0, 0xe2, 0x68,
-  0xe7, 0x81, 0xa1, 0x07, 0xb1, 0x1e, 0x02, 0x7b, 0x98, 0x5e, 0x7a, 0x5c,
-  0x30, 0xcc, 0x05, 0x4e, 0xdd, 0xe6, 0xd4, 0x95, 0x67, 0x30, 0xcc, 0xd9,
-  0x63, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x68, 0xa6, 0x1a, 0xd0, 0x69, 0x10, 0xa3, 0x81, 0xa8, 0x06,
-  0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a,
-  0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x57,
-  0xab, 0x06, 0x7b, 0x1a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x97, 0xab, 0x06, 0x7c, 0x1a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0xd7, 0xab, 0x06, 0x7d, 0x1a, 0x24, 0x44, 0x30, 0x62,
-  0xa0, 0x00, 0x20, 0x08, 0x06, 0x8a, 0xad, 0x06, 0x7c, 0x1a, 0xec, 0x68,
-  0x10, 0xa4, 0x6a, 0xc0, 0xa6, 0xc1, 0xa9, 0x06, 0xa3, 0x09, 0x01, 0x30,
-  0xdc, 0x10, 0xac, 0x6a, 0x10, 0x06, 0x17, 0x38, 0x35, 0x4b, 0xe0, 0x1e,
-  0x03, 0x2d, 0x8e, 0x6a, 0xe4, 0x86, 0x2e, 0x0b, 0xb8, 0xe1, 0x12, 0xbb,
-  0x21, 0xb4, 0x87, 0x2e, 0x0b, 0xbc, 0x31, 0xcb, 0xf0, 0x1e, 0xf1, 0x71,
-  0x3f, 0xc3, 0x11, 0xfb, 0xe3, 0xa6, 0xc1, 0xf0, 0x1d, 0xff, 0x0c, 0x33,
-  0xdc, 0x10, 0xe4, 0x68, 0x40, 0x06, 0x35, 0x04, 0x3a, 0x1c, 0xe1, 0x3f,
-  0x72, 0x1a, 0x0c, 0x5f, 0x05, 0x82, 0x1e, 0x08, 0x0d, 0x33, 0xdc, 0x10,
-  0xf0, 0x68, 0x40, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0x00, 0x1f, 0x25, 0x12,
-  0x1c, 0x88, 0x06, 0xc3, 0x5c, 0x4c, 0x06, 0xc3, 0x8c, 0x18, 0x1c, 0x00,
-  0x08, 0x82, 0x81, 0xf6, 0xab, 0x41, 0xab, 0x06, 0x6a, 0x1a, 0xec, 0x6a,
-  0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3,
-  0x09, 0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70,
-  0x99, 0x6b, 0x40, 0xab, 0xc1, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0x70, 0x9d, 0x6b, 0x50, 0xab, 0x01, 0x43, 0x04, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0x70, 0xa1, 0x6b, 0x60, 0xab, 0x81, 0x44, 0x04, 0x23,
-  0x06, 0x0a, 0x00, 0x82, 0x60, 0xa0, 0xbc, 0x6b, 0x50, 0xab, 0x01, 0x9d,
-  0x06, 0x81, 0xb8, 0x06, 0xa5, 0x1a, 0x80, 0x6b, 0x30, 0x9a, 0x10, 0x00,
-  0xc3, 0x0d, 0x01, 0xb9, 0x06, 0x61, 0x70, 0x81, 0x53, 0xb3, 0x04, 0x25,
-  0x32, 0xdc, 0x00, 0x43, 0xe9, 0x1a, 0x80, 0xc1, 0x2c, 0x83, 0x7c, 0xcc,
-  0x47, 0x50, 0x70, 0x1a, 0xe4, 0x6a, 0x00, 0x17, 0x38, 0x35, 0x62, 0x70,
-  0x00, 0x20, 0x08, 0x06, 0x93, 0xbc, 0x06, 0xba, 0x1a, 0xd8, 0x50, 0xaa,
-  0x06, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30, 0xcd, 0x6b, 0xa0, 0xab,
-  0x41, 0x20, 0x5c, 0x30, 0x4c, 0xcd, 0x69, 0xe0, 0xab, 0x01, 0x5c, 0xe0,
-  0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0xf7, 0x1a, 0xfc, 0x6a,
-  0xa0, 0x43, 0xae, 0x1a, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x84,
-  0xaf, 0xc1, 0xaf, 0x06, 0x81, 0x70, 0xc1, 0x30, 0x17, 0x38, 0x75, 0x87,
-  0x53, 0xc7, 0xa3, 0xc1, 0x30, 0xd7, 0x96, 0xc1, 0x30, 0x47, 0x0c, 0x73,
-  0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0, 0xf5, 0x6b, 0xb0,
-  0xae, 0x01, 0xaa, 0x06, 0xf9, 0x1a, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82,
-  0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0x24, 0x1b, 0xc8, 0x6b, 0x90, 0x10,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0x25, 0x1b, 0xcc, 0x6b,
-  0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0x26, 0x1b,
-  0xd0, 0x6b, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x28,
-  0x2d, 0x1b, 0xcc, 0x6b, 0x20, 0xab, 0x41, 0x00, 0xb2, 0xc1, 0xb8, 0x06,
-  0xfe, 0x1a, 0x8c, 0x26, 0x04, 0xc0, 0x70, 0x43, 0x20, 0xb2, 0x41, 0x18,
-  0x5c, 0xe0, 0xd4, 0x2c, 0x41, 0x89, 0x0c, 0x37, 0xb8, 0x91, 0xc9, 0x06,
-  0x60, 0x30, 0xcb, 0x40, 0x1f, 0x25, 0x12, 0x58, 0xa9, 0x06, 0xa7, 0x1a,
-  0xc4, 0x67, 0x38, 0x62, 0x8e, 0x50, 0x35, 0x20, 0xbe, 0x59, 0x86, 0xfa,
-  0xc0, 0x8f, 0xc0, 0x52, 0x35, 0xa0, 0xa3, 0xf8, 0x58, 0x30, 0xd0, 0xe7,
-  0x82, 0x61, 0x2e, 0x70, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29,
-  0x02, 0x66, 0x03, 0x1d, 0x6e, 0x08, 0x5c, 0x36, 0x00, 0x83, 0x59, 0x06,
-  0xfb, 0xb8, 0x8f, 0xc0, 0x86, 0x58, 0x0d, 0xe0, 0x33, 0x4b, 0xc0, 0x1f,
-  0x06, 0xab, 0x01, 0x11, 0x9f, 0x59, 0x02, 0xfe, 0x18, 0x8e, 0xf0, 0xa3,
-  0x58, 0x0d, 0x84, 0x6f, 0x96, 0x21, 0x3f, 0xf8, 0x23, 0xb0, 0x3f, 0x92,
-  0xd5, 0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2,
-  0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xd8, 0xd9, 0x40, 0x87, 0x1b,
-  0x82, 0x9c, 0x0d, 0xc0, 0x60, 0x96, 0x41, 0x3f, 0xf6, 0x23, 0x30, 0x5d,
-  0x0d, 0x86, 0xf8, 0xcc, 0x12, 0xf0, 0x87, 0x11, 0xbd, 0x1a, 0xc0, 0x67,
-  0x96, 0x80, 0x3f, 0x06, 0x5a, 0x1c, 0xcd, 0x3e, 0xb0, 0xfb, 0x20, 0xf4,
-  0x43, 0xd8, 0x0f, 0x99, 0x0c, 0xf0, 0xe3, 0x82, 0x61, 0x8c, 0x57, 0x03,
-  0x70, 0x0d, 0xe2, 0x33, 0x1c, 0xb1, 0x4a, 0xe1, 0x1a, 0x10, 0xdf, 0x2c,
-  0x43, 0x7f, 0x80, 0x48, 0x60, 0xe2, 0x1a, 0xb0, 0x52, 0x7c, 0x2c, 0x18,
-  0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82,
-  0xf8, 0x14, 0x91, 0xb6, 0x81, 0x0e, 0x37, 0x04, 0x67, 0x1b, 0x80, 0xc1,
-  0x2c, 0x83, 0x7f, 0xfc, 0x47, 0x60, 0x83, 0xba, 0x06, 0xf0, 0x99, 0x25,
-  0x20, 0x11, 0x3b, 0xd7, 0x80, 0x88, 0xcf, 0x2c, 0x01, 0x89, 0x0c, 0x47,
-  0xd8, 0x12, 0xba, 0x06, 0xc2, 0x37, 0xcb, 0x10, 0x22, 0x24, 0x12, 0xd8,
-  0x2d, 0xa5, 0x6b, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05,
-  0x4e, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xd0, 0x6d, 0xa0,
-  0xc3, 0x0d, 0x81, 0xdc, 0x06, 0x60, 0x30, 0xcb, 0x20, 0x22, 0x23, 0x12,
-  0x58, 0xbc, 0x06, 0x43, 0x7c, 0x66, 0x09, 0x48, 0xc4, 0x08, 0x7b, 0x0d,
-  0xe0, 0x33, 0x4b, 0x40, 0x22, 0x03, 0x2d, 0x8e, 0xe6, 0x1f, 0xd8, 0x7f,
-  0x10, 0x22, 0x22, 0x8c, 0x88, 0x68, 0x06, 0x20, 0x72, 0xc1, 0x30, 0x17,
-  0x38, 0x75, 0x9b, 0x53, 0x27, 0xae, 0xc1, 0x30, 0x37, 0x9f, 0xc1, 0x30,
-  0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0,
-  0x8d, 0x6e, 0x10, 0xb7, 0x81, 0xcb, 0x06, 0x7f, 0x1b, 0x8c, 0x26, 0x04,
-  0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14,
-  0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xaa, 0x1b, 0xe0,
-  0x6d, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xab,
-  0x1b, 0xe4, 0x6d, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x5c, 0xac, 0x1b, 0xe8, 0x6d, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80,
-  0x20, 0x18, 0x28, 0xb3, 0x1b, 0xe4, 0x6d, 0x80, 0xb3, 0x41, 0x60, 0xba,
-  0x41, 0xda, 0x06, 0xa4, 0x1b, 0x8c, 0x26, 0x04, 0xc0, 0x70, 0x43, 0x80,
-  0xba, 0x41, 0x18, 0x5c, 0xe0, 0xd4, 0x2c, 0x41, 0x89, 0x0c, 0xb4, 0x38,
-  0xaa, 0x01, 0x1f, 0xb4, 0x2e, 0xbc, 0x87, 0x4b, 0xc8, 0x87, 0x40, 0x22,
-  0xb4, 0x2e, 0xcc, 0xc7, 0x2c, 0x83, 0x89, 0xa0, 0x08, 0x3d, 0x0d, 0x47,
-  0xe4, 0xd3, 0xda, 0x06, 0xc3, 0x77, 0xfa, 0x34, 0xcc, 0x70, 0x43, 0x60,
-  0xb3, 0x01, 0x19, 0xd4, 0x10, 0xe8, 0x70, 0xc4, 0x3e, 0xbd, 0x6d, 0x30,
-  0x7c, 0x15, 0x08, 0x7a, 0xfd, 0x34, 0xcc, 0x70, 0x43, 0x90, 0xb3, 0x01,
-  0x19, 0x54, 0x30, 0xe8, 0x2c, 0xc3, 0x89, 0xf0, 0x48, 0x70, 0xfd, 0x1a,
-  0x0c, 0x73, 0x2e, 0x1a, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x1a, 0xef, 0x06, 0xaa, 0x1b, 0x9c, 0x6d, 0x80, 0xbb, 0xc1, 0x68, 0x42,
-  0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43,
-  0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x35, 0xbe, 0x41,
-  0xec, 0x06, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x45,
-  0xbe, 0x81, 0xec, 0x06, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0xc1, 0x55, 0xbe, 0xc1, 0xec, 0x06, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00,
-  0x08, 0x82, 0x81, 0xc2, 0xbe, 0x81, 0xec, 0x06, 0x71, 0x1b, 0x04, 0xbf,
-  0x1b, 0x88, 0x6e, 0xd0, 0xbb, 0xc1, 0x68, 0x42, 0x00, 0x0c, 0x37, 0x04,
-  0xe1, 0x1b, 0x84, 0xc1, 0x05, 0x4e, 0xcd, 0x12, 0xf0, 0xc8, 0x70, 0x43,
-  0x4b, 0x99, 0x6f, 0x00, 0x06, 0xb3, 0x0c, 0x29, 0xa2, 0x22, 0x41, 0xb5,
-  0x6d, 0x60, 0xbb, 0x01, 0x5c, 0xe0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20,
-  0x18, 0x4c, 0xef, 0x1b, 0xdc, 0x6e, 0x40, 0x53, 0xa6, 0x1b, 0x8c, 0x18,
-  0x1c, 0x00, 0x08, 0x82, 0xc1, 0x04, 0xbf, 0xc1, 0xed, 0x06, 0x81, 0x70,
-  0xc1, 0x30, 0x05, 0xb7, 0xc1, 0xee, 0x06, 0x70, 0x81, 0x53, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x30, 0xd1, 0x6f, 0xc0, 0xbb, 0xc1, 0x4d, 0xad,
-  0x6e, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x53, 0xfd, 0x06, 0xbc,
-  0x1b, 0x04, 0xc2, 0x05, 0xc3, 0x5c, 0xe0, 0xd4, 0x1d, 0x4e, 0x5d, 0xce,
-  0x06, 0xc3, 0x9c, 0x9a, 0x06, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0xa6, 0xbf, 0x01, 0xfa, 0x06, 0xa5,
-  0x1b, 0xd8, 0x6f, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a,
-  0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0x70, 0x85, 0x70, 0xf0, 0xbe, 0x41, 0x42, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x70, 0x89, 0x70, 0x00, 0xbf, 0x41, 0x42, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0x8d, 0x70, 0x10, 0xbf, 0x41,
-  0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xa0, 0xa8, 0x70, 0x00,
-  0xbf, 0xc1, 0xeb, 0x06, 0x41, 0xff, 0x06, 0xe0, 0x1b, 0xec, 0x6f, 0x30,
-  0x9a, 0x10, 0x00, 0xc3, 0x0d, 0xc1, 0xff, 0x06, 0x61, 0x70, 0x81, 0x53,
-  0xb3, 0x04, 0x3c, 0x32, 0xdc, 0xb0, 0x56, 0x23, 0x1c, 0x80, 0xc1, 0x2c,
-  0xc3, 0x8a, 0xf0, 0x48, 0x60, 0xa2, 0x1b, 0x90, 0x6e, 0x10, 0x9f, 0xe1,
-  0x88, 0xb8, 0x2a, 0xdd, 0x80, 0xf8, 0x66, 0x19, 0x58, 0xe4, 0x45, 0x02,
-  0x33, 0xdd, 0x40, 0xae, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9,
-  0xc0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x16, 0x0e,
-  0x74, 0xb8, 0x21, 0x58, 0xe1, 0x00, 0x0c, 0x66, 0x19, 0x5a, 0xc4, 0x45,
-  0x02, 0x1b, 0x5c, 0x37, 0x80, 0xcf, 0x2c, 0xc1, 0x8c, 0x58, 0xeb, 0x06,
-  0x44, 0x7c, 0x66, 0x09, 0x66, 0x64, 0x38, 0x82, 0xaf, 0x5c, 0x37, 0x10,
-  0xbe, 0x59, 0x06, 0x18, 0x99, 0x91, 0xc0, 0xfa, 0xea, 0x75, 0x83, 0xf8,
-  0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70, 0xca, 0x82, 0x48, 0x3e,
-  0x56, 0x04, 0xf1, 0x29, 0x02, 0x87, 0x03, 0x1d, 0x6e, 0x08, 0x6c, 0x38,
-  0x00, 0x83, 0x59, 0x86, 0x18, 0x91, 0x91, 0xc0, 0x6e, 0x37, 0x18, 0xe2,
-  0x33, 0x4b, 0x30, 0x23, 0x46, 0xe8, 0x6e, 0x00, 0x9f, 0x59, 0x82, 0x19,
-  0x19, 0x68, 0x71, 0xb4, 0x16, 0xc1, 0x5c, 0x84, 0x88, 0x11, 0x41, 0x46,
-  0x5c, 0x36, 0x78, 0x91, 0x0b, 0x86, 0xb1, 0xdc, 0x0d, 0x7a, 0x37, 0x88,
-  0xcf, 0x70, 0x04, 0x6a, 0xf9, 0x6e, 0x40, 0x7c, 0xb3, 0x0c, 0x34, 0x72,
-  0x23, 0x81, 0xfd, 0x6e, 0x90, 0x5a, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05,
-  0xc3, 0x5c, 0xe0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84,
-  0x19, 0x07, 0x3a, 0xdc, 0x10, 0x90, 0x71, 0x00, 0x06, 0xb3, 0x0c, 0x35,
-  0x62, 0x23, 0x81, 0x0d, 0xe7, 0x1b, 0xc0, 0x67, 0x96, 0x60, 0x47, 0x8c,
-  0x7c, 0x03, 0x22, 0x3e, 0xb3, 0x04, 0x3b, 0x32, 0x1c, 0x31, 0x5b, 0xe5,
-  0x1b, 0x08, 0xdf, 0x2c, 0x03, 0x8e, 0xec, 0x48, 0x60, 0xb4, 0x65, 0xbe,
-  0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x41,
-  0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x11, 0xc7, 0x81, 0x0e, 0x37, 0x04,
-  0x6f, 0x1c, 0x80, 0xc1, 0x2c, 0x43, 0x8e, 0xe8, 0x48, 0x60, 0xee, 0x1b,
-  0x0c, 0xf1, 0x99, 0x25, 0xd8, 0x11, 0x23, 0xe6, 0x37, 0x80, 0xcf, 0x2c,
-  0xc1, 0x8e, 0x0c, 0xb4, 0x38, 0x5a, 0x8d, 0x60, 0x36, 0x42, 0xe4, 0x88,
-  0xa0, 0x23, 0x7c, 0x1b, 0xdc, 0xc8, 0x05, 0xc3, 0x5c, 0xe0, 0xd4, 0x6d,
-  0x4e, 0xdd, 0xef, 0x06, 0xc3, 0x1c, 0xbc, 0x06, 0xc3, 0x1c, 0x31, 0xcc,
-  0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x06, 0xca, 0x81,
-  0x1b, 0x07, 0x2b, 0x1c, 0xf0, 0x71, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09,
-  0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0x9d, 0x72, 0x50, 0xc7, 0x41, 0x42,
-  0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xa1, 0x72, 0x60, 0xc7,
-  0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xa5, 0x72,
-  0x70, 0xc7, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xa0,
-  0xc0, 0x72, 0x60, 0xc7, 0x41, 0x0d, 0x07, 0xc1, 0x28, 0x07, 0x66, 0x1c,
-  0x84, 0x72, 0x30, 0x9a, 0x10, 0x00, 0xc3, 0x0d, 0x41, 0x29, 0x07, 0x61,
-  0x70, 0x81, 0x53, 0xb3, 0x04, 0x3c, 0x32, 0xd0, 0xe2, 0xa8, 0xc6, 0x89,
-  0xb8, 0xe2, 0x60, 0x22, 0x2e, 0x91, 0x22, 0xc2, 0x8e, 0xb8, 0xe2, 0xa0,
-  0x22, 0xb3, 0x0c, 0x3d, 0xf2, 0x23, 0xf1, 0x35, 0x1c, 0x61, 0x3f, 0x68,
-  0x1c, 0x0c, 0xdf, 0xdd, 0xcf, 0x30, 0xc3, 0x0d, 0xc1, 0x0c, 0x07, 0x64,
-  0x50, 0x43, 0xa0, 0xc3, 0x11, 0xf8, 0xc5, 0xc6, 0xc1, 0xf0, 0x55, 0x20,
-  0xe8, 0xe9, 0xd7, 0x30, 0xc3, 0x0d, 0x81, 0x0d, 0x07, 0x64, 0x50, 0xc1,
-  0xa0, 0xb3, 0x0c, 0x3e, 0x32, 0x27, 0xc1, 0xe9, 0x6f, 0x30, 0xcc, 0xad,
-  0x6c, 0x30, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x68, 0xb9, 0x1c,
-  0x9c, 0x72, 0x40, 0xc6, 0x41, 0x2d, 0x07, 0xa3, 0x09, 0x01, 0x30, 0x9a,
-  0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x1c, 0x32,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x17, 0x38, 0x07, 0xae, 0x1c, 0x1c,
-  0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x57, 0x38, 0x07, 0xaf,
-  0x1c, 0x30, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x97, 0x38,
-  0x07, 0xb0, 0x1c, 0x48, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06,
-  0x4a, 0x3a, 0x07, 0xaf, 0x1c, 0xb8, 0x71, 0x10, 0xf0, 0x72, 0xf0, 0xc7,
-  0x81, 0x2e, 0x07, 0xa3, 0x09, 0x01, 0x30, 0xdc, 0x10, 0xf8, 0x72, 0x10,
-  0x06, 0x17, 0x38, 0x35, 0x4b, 0x30, 0x27, 0xc3, 0x0d, 0x2a, 0x36, 0xce,
-  0x01, 0x18, 0xcc, 0x32, 0x80, 0x49, 0x98, 0x04, 0xa5, 0xc6, 0xc1, 0x2c,
-  0x07, 0x70, 0x81, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30, 0xb1,
-  0x73, 0x40, 0xcb, 0x41, 0x0c, 0x8d, 0x72, 0x30, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0x53, 0x3b, 0x07, 0xb4, 0x1c, 0x04, 0xc2, 0x05, 0xc3, 0x54,
-  0x1b, 0x07, 0xb8, 0x1c, 0xc0, 0x05, 0x4e, 0x8d, 0x18, 0x1c, 0x00, 0x08,
-  0x82, 0xc1, 0x14, 0xcf, 0x41, 0x2e, 0x07, 0x34, 0x86, 0xca, 0xc1, 0x88,
-  0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0xf2, 0x1c, 0xe4, 0x72, 0x10, 0x08,
-  0x17, 0x0c, 0x73, 0x81, 0x53, 0x77, 0x38, 0x75, 0x36, 0x1c, 0x0c, 0x73,
-  0x67, 0x1b, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00,
-  0x20, 0x08, 0x06, 0xda, 0x3d, 0x07, 0xe5, 0x1c, 0x88, 0x72, 0x30, 0xcf,
-  0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c,
-  0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1,
-  0xe5, 0xcf, 0x01, 0x3b, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0xc1, 0xf5, 0xcf, 0x41, 0x3b, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0xc1, 0x05, 0xd2, 0x81, 0x3b, 0x07, 0x09, 0x11, 0x8c,
-  0x18, 0x28, 0x00, 0x08, 0x82, 0x81, 0x72, 0xd2, 0x41, 0x3b, 0x07, 0xac,
-  0x1c, 0x04, 0xfa, 0x1c, 0xf4, 0x72, 0x80, 0xcf, 0xc1, 0x68, 0x42, 0x00,
-  0x0c, 0x37, 0x04, 0xfc, 0x1c, 0x84, 0xc1, 0x05, 0x4e, 0xcd, 0x12, 0xcc,
-  0xc9, 0x70, 0x03, 0x9a, 0x81, 0x74, 0x00, 0x06, 0xb3, 0x0c, 0x62, 0x32,
-  0x27, 0x81, 0xfd, 0x71, 0x10, 0xca, 0x41, 0x7c, 0x86, 0x23, 0xdc, 0x48,
-  0x94, 0x03, 0xe2, 0x9b, 0x65, 0x18, 0x13, 0x33, 0x09, 0x6c, 0x94, 0x83,
-  0x37, 0x8a, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x2c,
-  0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x54, 0x3a, 0xd0, 0xe1, 0x86,
-  0x00, 0xa5, 0x03, 0x30, 0x98, 0x65, 0x20, 0x93, 0x32, 0x09, 0x6c, 0x58,
-  0xe5, 0x00, 0x3e, 0xb3, 0x04, 0x6a, 0x62, 0xaa, 0x1c, 0x10, 0xf1, 0x99,
-  0x25, 0x50, 0x93, 0xe1, 0x88, 0x3c, 0x5a, 0xe5, 0x40, 0xf8, 0x66, 0x19,
-  0xce, 0x44, 0x4d, 0x02, 0xd3, 0x23, 0x56, 0x0e, 0xe2, 0x63, 0x81, 0x43,
-  0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4,
-  0xa7, 0x88, 0x9a, 0x0e, 0x74, 0xb8, 0x21, 0x98, 0xe9, 0x00, 0x0c, 0x66,
-  0x19, 0xd0, 0x24, 0x4d, 0x02, 0xa3, 0xe5, 0x60, 0x88, 0xcf, 0x2c, 0x81,
-  0x9a, 0x18, 0x71, 0xcb, 0x01, 0x7c, 0x66, 0x09, 0xd4, 0x64, 0xa0, 0xc5,
-  0xd1, 0xc8, 0x04, 0x2b, 0x13, 0x02, 0x4d, 0x84, 0x34, 0x41, 0xc9, 0xc0,
-  0x4c, 0x2e, 0x18, 0xc6, 0x6c, 0x39, 0xd0, 0xe5, 0x20, 0x3e, 0xc3, 0x11,
-  0xa5, 0xb6, 0xcb, 0x01, 0xf1, 0xcd, 0x32, 0xac, 0x89, 0x9b, 0x04, 0xc6,
-  0xcb, 0x81, 0xa9, 0xc5, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81,
-  0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x63, 0x1d, 0xe8,
-  0x70, 0x43, 0x10, 0xd6, 0x01, 0x18, 0xcc, 0x32, 0xb0, 0x49, 0x9b, 0x04,
-  0x36, 0x90, 0x73, 0x00, 0x9f, 0x59, 0x02, 0x39, 0xb1, 0x70, 0x0e, 0x88,
-  0xf8, 0xcc, 0x12, 0xc8, 0xc9, 0x70, 0x04, 0xac, 0x89, 0x73, 0x20, 0x7c,
-  0xb3, 0x0c, 0x6f, 0x22, 0x27, 0x81, 0xc5, 0xda, 0x38, 0x07, 0xf1, 0xb1,
-  0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94, 0x05, 0x91, 0x7c, 0xac,
-  0x08, 0xe2, 0x53, 0x84, 0x5b, 0x07, 0x3a, 0xdc, 0x10, 0xb0, 0x75, 0x00,
-  0x06, 0xb3, 0x0c, 0x70, 0x12, 0x27, 0x81, 0xad, 0x73, 0x30, 0xc4, 0x67,
-  0x96, 0x40, 0x4e, 0x8c, 0x80, 0xe7, 0x00, 0x3e, 0xb3, 0x04, 0x72, 0x32,
-  0xd0, 0xe2, 0x68, 0x6c, 0x82, 0xb5, 0x09, 0x01, 0x27, 0x42, 0x9c, 0xd8,
-  0x75, 0xe0, 0x26, 0x17, 0x0c, 0x73, 0x81, 0x53, 0xb7, 0x39, 0x75, 0xbc,
-  0x1c, 0x0c, 0x73, 0xed, 0x1b, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x5a, 0x5f, 0x07, 0x6b, 0x1d, 0xa0,
-  0x74, 0x90, 0xd7, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68,
-  0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0xc1, 0x45, 0xda, 0x81, 0x5c, 0x07, 0x09, 0x11, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0xc1, 0x55, 0xda, 0xc1, 0x5c, 0x07, 0x09, 0x11,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x65, 0xda, 0x01, 0x5d, 0x07,
-  0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0x81, 0xd2, 0xda, 0xc1,
-  0x5c, 0x07, 0x32, 0x1d, 0x04, 0xa0, 0x1d, 0x8c, 0x75, 0xe0, 0xd7, 0xc1,
-  0x68, 0x42, 0x00, 0x0c, 0x37, 0x04, 0xa2, 0x1d, 0x84, 0xc1, 0x05, 0x4e,
-  0xcd, 0x12, 0xcc, 0xc9, 0x40, 0x8b, 0xa3, 0x1a, 0x3e, 0x82, 0xaa, 0x43,
-  0x8f, 0xb8, 0x04, 0x98, 0x08, 0x72, 0x82, 0xaa, 0x43, 0x98, 0xcc, 0x32,
-  0xd0, 0x89, 0x9d, 0xb8, 0xdb, 0x70, 0x44, 0xfd, 0x94, 0x75, 0x30, 0x7c,
-  0x67, 0x3f, 0xc3, 0x0c, 0x37, 0x04, 0x30, 0x1d, 0x90, 0x41, 0x0d, 0x81,
-  0x0e, 0x47, 0xd4, 0x5b, 0x5a, 0x07, 0xc3, 0x57, 0x81, 0xa0, 0x77, 0x6f,
-  0xc3, 0x0c, 0x37, 0x04, 0x33, 0x1d, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32,
-  0xd4, 0x89, 0xaa, 0x04, 0x77, 0xcf, 0xc1, 0x30, 0x87, 0xc2, 0xc1, 0x30,
-  0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0, 0xd9, 0x76, 0x40, 0xda, 0x41,
-  0x58, 0x07, 0xb2, 0x1d, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c,
-  0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x5c, 0xbd, 0x1d, 0xac, 0x76, 0x70, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xbe, 0x1d, 0xb0, 0x76, 0xc0, 0x10,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xbf, 0x1d, 0xb4, 0x76,
-  0x20, 0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x28, 0xe6, 0x1d,
-  0xb0, 0x76, 0xb0, 0xd6, 0x41, 0x90, 0xdb, 0x01, 0x5f, 0x07, 0xb7, 0x1d,
-  0x8c, 0x26, 0x04, 0xc0, 0x70, 0x43, 0xb0, 0xdb, 0x41, 0x18, 0x5c, 0xe0,
-  0xd4, 0x2c, 0x81, 0xaa, 0x0c, 0x37, 0x9c, 0x1c, 0x78, 0x07, 0x60, 0x30,
-  0xcb, 0x70, 0x27, 0x78, 0x12, 0xd4, 0x59, 0x07, 0xb0, 0x1d, 0xc0, 0x05,
-  0x4e, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x94, 0xde, 0x41, 0x6c,
-  0x07, 0x30, 0x04, 0xda, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c,
-  0xea, 0x1d, 0xc4, 0x76, 0x10, 0x08, 0x17, 0x0c, 0x53, 0x6a, 0x1d, 0xd4,
-  0x76, 0x00, 0x17, 0x38, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x93,
-  0x7b, 0x07, 0xb6, 0x1d, 0xc4, 0x5c, 0x69, 0x07, 0x23, 0x06, 0x07, 0x00,
-  0x82, 0x60, 0x30, 0xbd, 0x77, 0x60, 0xdb, 0x41, 0x20, 0x5c, 0x30, 0xcc,
-  0x05, 0x4e, 0xdd, 0xe1, 0xd4, 0xcd, 0x74, 0x30, 0xcc, 0x91, 0x71, 0x30,
-  0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
-  0x68, 0xf4, 0x1d, 0x88, 0x77, 0xf0, 0xd7, 0x01, 0x7c, 0x07, 0xa3, 0x09,
-  0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c,
-  0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd7, 0x7e, 0x07,
-  0xe9, 0x1d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x17,
-  0x7f, 0x07, 0xea, 0x1d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x57, 0x7f, 0x07, 0xeb, 0x1d, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00,
-  0x20, 0x08, 0x06, 0x0a, 0x89, 0x07, 0xea, 0x1d, 0xa4, 0x76, 0x10, 0xdc,
-  0x77, 0xa0, 0xdb, 0x41, 0x7d, 0x07, 0xa3, 0x09, 0x01, 0x30, 0xdc, 0x10,
-  0xe4, 0x77, 0x10, 0x06, 0x17, 0x38, 0x35, 0x4b, 0xa0, 0x2a, 0xc3, 0x0d,
-  0x65, 0xd7, 0xdf, 0x01, 0x18, 0xcc, 0x32, 0xe4, 0x89, 0xaa, 0x04, 0xc6,
-  0xd7, 0x81, 0x5f, 0x07, 0xf1, 0x19, 0x8e, 0x68, 0xa3, 0xbf, 0x0e, 0x88,
-  0x6f, 0x96, 0x41, 0x4f, 0xfa, 0x24, 0x30, 0xd0, 0x0e, 0xdc, 0x28, 0x3e,
-  0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2, 0xc0, 0x90, 0x8f,
-  0x15, 0x41, 0x7c, 0x8a, 0x38, 0xf1, 0x40, 0x87, 0x1b, 0x82, 0x12, 0x0f,
-  0xc0, 0x60, 0x96, 0x61, 0x4f, 0xf8, 0x24, 0xb0, 0x01, 0xb5, 0x03, 0xf8,
-  0xcc, 0x12, 0x84, 0x8a, 0x9d, 0x76, 0x40, 0xc4, 0x67, 0x96, 0x20, 0x54,
-  0x86, 0x23, 0xf0, 0x08, 0xb5, 0x03, 0xe1, 0x9b, 0x65, 0xf0, 0x93, 0x50,
-  0x09, 0x2c, 0x8f, 0x52, 0x3b, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18,
-  0xe6, 0x02, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x64,
-  0x3c, 0xd0, 0xe1, 0x86, 0x00, 0xc6, 0x03, 0x30, 0x98, 0x65, 0xf8, 0x13,
-  0x50, 0x09, 0x2c, 0xb6, 0x83, 0x21, 0x3e, 0xb3, 0x04, 0xa1, 0x62, 0x04,
-  0x6d, 0x07, 0xf0, 0x99, 0x25, 0x08, 0x95, 0x81, 0x16, 0x47, 0xdb, 0x13,
-  0x8c, 0x4f, 0x88, 0x3f, 0x11, 0x40, 0xc5, 0x24, 0x83, 0x3e, 0xb9, 0x60,
-  0x18, 0x9b, 0xed, 0xe0, 0xb6, 0x83, 0xf8, 0x0c, 0x47, 0x88, 0x1e, 0x6e,
-  0x07, 0xc4, 0x37, 0xcb, 0x20, 0x2a, 0xa5, 0x12, 0x58, 0x6e, 0x07, 0xa3,
-  0x17, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4e, 0x59, 0x60,
-  0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x80, 0x79, 0xa0, 0xc3, 0x0d, 0x81,
-  0x8f, 0x07, 0x60, 0x30, 0xcb, 0x30, 0x2a, 0xa4, 0x12, 0xd8, 0x10, 0xde,
-  0x01, 0x7c, 0x66, 0x09, 0x52, 0xc5, 0x7c, 0x3b, 0x20, 0xe2, 0x33, 0x4b,
-  0x90, 0x2a, 0xc3, 0x11, 0xad, 0xf7, 0xdb, 0x81, 0xf0, 0xcd, 0x32, 0x98,
-  0x4a, 0xaa, 0x04, 0xe6, 0x7a, 0xe0, 0x1d, 0xc4, 0xc7, 0x02, 0x87, 0x3e,
-  0x17, 0x0c, 0x73, 0x81, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f,
-  0x11, 0x6b, 0x1e, 0xe8, 0x70, 0x43, 0x90, 0xe6, 0x01, 0x18, 0xcc, 0x32,
-  0x9c, 0x0a, 0xaa, 0x04, 0x86, 0xde, 0xc1, 0x10, 0x9f, 0x59, 0x82, 0x54,
-  0x31, 0xa2, 0xbd, 0x03, 0xf8, 0xcc, 0x12, 0xa4, 0xca, 0x40, 0x8b, 0xa3,
-  0x8d, 0x0a, 0x46, 0x2a, 0xc4, 0xa9, 0x08, 0xa8, 0x02, 0xf7, 0x41, 0xa9,
-  0x5c, 0x30, 0xcc, 0x05, 0x4e, 0xdd, 0xe6, 0xd4, 0xe5, 0x76, 0x30, 0xcc,
-  0xa9, 0x73, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01,
-  0x80, 0x20, 0x18, 0x68, 0x7a, 0x1e, 0xa0, 0x79, 0x50, 0xe2, 0x81, 0x9d,
-  0x07, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30,
-  0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x57, 0xa8, 0x07, 0x6f, 0x1e, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x97, 0xa8, 0x07, 0x70, 0x1e, 0x24, 0x44, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0xd7, 0xa8, 0x07, 0x71, 0x1e, 0x24, 0x44, 0x30,
-  0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x8a, 0xaa, 0x07, 0x70, 0x1e, 0xbc,
-  0x78, 0x10, 0xf4, 0x79, 0x00, 0xe6, 0xc1, 0x9e, 0x07, 0xa3, 0x09, 0x01,
-  0x30, 0xdc, 0x10, 0xfc, 0x79, 0x10, 0x06, 0x17, 0x38, 0x35, 0x4b, 0xa0,
-  0x2a, 0x03, 0x2d, 0x8e, 0x6a, 0xd4, 0x89, 0x28, 0x0f, 0x74, 0xe2, 0x12,
-  0x77, 0x22, 0xa4, 0x8a, 0x28, 0x0f, 0x78, 0x62, 0xee, 0x17, 0xe6, 0x01,
-  0x7c, 0x66, 0x19, 0x56, 0xa5, 0x55, 0xd8, 0x6f, 0x38, 0x02, 0xfe, 0xc6,
-  0x3c, 0x18, 0xbe, 0x8b, 0xbf, 0x61, 0x86, 0x1b, 0x02, 0x17, 0x0f, 0xc8,
-  0xa0, 0x86, 0x40, 0x87, 0x23, 0x8a, 0x33, 0x0f, 0x86, 0xaf, 0x02, 0x41,
-  0xef, 0x18, 0x66, 0xb8, 0x21, 0x88, 0xf1, 0x80, 0x0c, 0x2a, 0x18, 0x74,
-  0x96, 0x81, 0x55, 0xc2, 0x25, 0xb8, 0xfa, 0x0e, 0x86, 0x39, 0x93, 0x0e,
-  0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x8d, 0xd6, 0x03, 0x51,
-  0x0f, 0x7e, 0x3c, 0x80, 0xf5, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84,
-  0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x43, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0xe0, 0xda, 0xf5, 0x20, 0xd5, 0x83, 0x83, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xe2, 0xf5, 0x40, 0xd5, 0x03,
-  0x86, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xea, 0xf5, 0x60,
-  0xd5, 0x03, 0x89, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x40, 0x21,
-  0xf7, 0x40, 0xd5, 0x83, 0x34, 0x0f, 0x82, 0x5b, 0x0f, 0xf4, 0x3c, 0xa8,
-  0xf5, 0x60, 0x34, 0x21, 0x00, 0x86, 0x1b, 0x82, 0x5c, 0x0f, 0xc2, 0xe0,
-  0x02, 0xa7, 0x66, 0x09, 0xc2, 0x65, 0xb8, 0xa1, 0x04, 0x03, 0x5f, 0x0f,
-  0xc0, 0x60, 0x96, 0xc1, 0x55, 0x5e, 0x25, 0xa8, 0x32, 0x0f, 0x5c, 0x3d,
-  0x80, 0x0b, 0x9c, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0xe9, 0xdc,
-  0x83, 0x57, 0x0f, 0x56, 0x30, 0xf0, 0xf3, 0x60, 0xc4, 0xe0, 0x00, 0x40,
-  0x10, 0x0c, 0x26, 0x74, 0x0f, 0x5e, 0x3d, 0x08, 0x84, 0x0b, 0x86, 0x29,
-  0x34, 0x0f, 0x66, 0x3d, 0x80, 0x0b, 0x9c, 0x1a, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x83, 0x89, 0xdd, 0x03, 0x5a, 0x0f, 0xc2, 0x60, 0xd4, 0x83, 0x11,
-  0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0xda, 0x3d, 0xa0, 0xf5, 0x20, 0x10,
-  0x2e, 0x18, 0xe6, 0x02, 0xa7, 0xee, 0x70, 0xea, 0x62, 0x3c, 0x18, 0xe6,
-  0xc4, 0x3a, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0x34, 0x79, 0x0f, 0xc0, 0x3d, 0xe8, 0xf3, 0xc0, 0xdd,
-  0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18,
-  0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
-  0x2b, 0xdf, 0x83, 0x73, 0x0f, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0x4b, 0xdf, 0x03, 0x74, 0x0f, 0x12, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x83, 0x6b, 0xdf, 0x83, 0x74, 0x0f, 0x12, 0x22, 0x18,
-  0x31, 0x50, 0x00, 0x10, 0x04, 0x03, 0x45, 0xe4, 0x03, 0x74, 0x0f, 0x4e,
-  0x3d, 0x08, 0xea, 0x3d, 0xc0, 0xf5, 0x60, 0xde, 0x83, 0xd1, 0x84, 0x00,
-  0x18, 0x6e, 0x08, 0xee, 0x3d, 0x08, 0x83, 0x0b, 0x9c, 0x9a, 0x25, 0x08,
-  0x97, 0xe1, 0x86, 0x31, 0x0c, 0xf6, 0x3d, 0x00, 0x83, 0x59, 0x06, 0x58,
-  0x09, 0x97, 0xc0, 0xf4, 0x3c, 0xe0, 0xf3, 0x20, 0x3e, 0xc3, 0x11, 0x68,
-  0x18, 0xf4, 0x79, 0x40, 0x7c, 0xb3, 0x0c, 0xb1, 0x42, 0x2b, 0x81, 0xf9,
-  0x79, 0x90, 0x86, 0x41, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17,
-  0x38, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x51, 0xf2, 0x81,
-  0x0e, 0x37, 0x04, 0x23, 0x1f, 0x80, 0xc1, 0x2c, 0x83, 0xac, 0xcc, 0x4a,
-  0x60, 0x83, 0xa9, 0x07, 0xf0, 0x99, 0x25, 0xc0, 0x15, 0x2b, 0xf5, 0x80,
-  0x88, 0xcf, 0x2c, 0x01, 0xae, 0x0c, 0x47, 0xcc, 0x61, 0x60, 0xea, 0x81,
-  0xf0, 0xcd, 0x32, 0xd4, 0x0a, 0xae, 0x04, 0x46, 0x87, 0xc1, 0xa9, 0x07,
-  0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94, 0x05, 0x91,
-  0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0xcc, 0x07, 0x3a, 0xdc, 0x10, 0xb8,
-  0x7c, 0x00, 0x06, 0xb3, 0x0c, 0xb6, 0x72, 0x2b, 0x81, 0xbd, 0x7a, 0x30,
-  0xc4, 0x67, 0x96, 0x00, 0x57, 0x8c, 0x90, 0xf5, 0x00, 0x3e, 0xb3, 0x04,
-  0xb8, 0x32, 0xd0, 0xe2, 0x68, 0xb2, 0x82, 0xcd, 0x0a, 0x61, 0x2b, 0xc2,
-  0xad, 0xf0, 0xa3, 0x40, 0x2b, 0x17, 0x0c, 0x63, 0xb1, 0x1e, 0xd4, 0x7a,
-  0x10, 0x9f, 0xe1, 0x08, 0x59, 0xb0, 0xf5, 0x80, 0xf8, 0x66, 0x19, 0x72,
-  0x85, 0x57, 0x02, 0xbb, 0xf5, 0x60, 0x16, 0xe2, 0x63, 0xc1, 0x40, 0x9f,
-  0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7,
-  0x08, 0x9f, 0x0f, 0x74, 0xb8, 0x21, 0xe0, 0xf9, 0x00, 0x0c, 0x66, 0x19,
-  0x74, 0x65, 0x57, 0x02, 0x1b, 0x7e, 0x3d, 0x80, 0xcf, 0x2c, 0x01, 0xb8,
-  0x18, 0xaf, 0x07, 0x44, 0x7c, 0x66, 0x09, 0xc0, 0x65, 0x38, 0xa2, 0x17,
-  0x7a, 0x3d, 0x10, 0xbe, 0x59, 0x86, 0x5e, 0x01, 0x97, 0xc0, 0x7c, 0xc1,
-  0xd7, 0x83, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70, 0xca,
-  0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x22, 0xed, 0x03, 0x1d, 0x6e,
-  0x08, 0xce, 0x3e, 0x00, 0x83, 0x59, 0x06, 0x5f, 0xf9, 0x95, 0xc0, 0xcc,
-  0x3d, 0x18, 0xe2, 0x33, 0x4b, 0x00, 0x2e, 0x46, 0xac, 0x7b, 0x00, 0x9f,
-  0x59, 0x02, 0x70, 0x19, 0x68, 0x71, 0x34, 0x5d, 0xc1, 0x76, 0x85, 0xf0,
-  0x15, 0xe1, 0x57, 0x60, 0x83, 0x57, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x6e,
-  0x73, 0xea, 0x6e, 0x3d, 0x18, 0xe6, 0xd0, 0x3b, 0x18, 0xe6, 0x88, 0x61,
-  0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x34, 0xbc, 0x0f,
-  0xcc, 0x3e, 0x18, 0xf9, 0x80, 0xee, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d,
-  0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xeb, 0xef, 0x83, 0xb6, 0x0f, 0x12,
-  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x0b, 0xf4, 0x03, 0xb7,
-  0x0f, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x2b, 0xf4,
-  0x83, 0xb7, 0x0f, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x03,
-  0x05, 0xf5, 0x03, 0xb7, 0x0f, 0x5a, 0x3e, 0x08, 0xf6, 0x3e, 0xf0, 0xf9,
-  0x20, 0xef, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x6e, 0x08, 0xfa, 0x3e, 0x08,
-  0x83, 0x0b, 0x9c, 0x9a, 0x25, 0x08, 0x97, 0x81, 0x16, 0x47, 0x35, 0x58,
-  0xc5, 0xcf, 0x87, 0x55, 0x71, 0x09, 0x57, 0x11, 0xc0, 0xc5, 0xcf, 0x87,
-  0x57, 0x99, 0x65, 0x10, 0x17, 0x72, 0x49, 0xc7, 0x60, 0x38, 0xc2, 0x1d,
-  0x03, 0xb0, 0x0f, 0x86, 0xef, 0xde, 0x31, 0x18, 0x66, 0xb8, 0x21, 0x58,
-  0xf9, 0x80, 0x0c, 0x6a, 0x08, 0x74, 0x38, 0x42, 0x24, 0xc8, 0x3e, 0x18,
-  0xbe, 0x0a, 0x04, 0x3d, 0x92, 0x18, 0x66, 0xb8, 0x21, 0x70, 0xf9, 0x80,
-  0x0c, 0x2a, 0x18, 0x74, 0x96, 0x61, 0x5c, 0xf0, 0x25, 0x38, 0x79, 0x0f,
-  0x86, 0xb9, 0x11, 0x0f, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03,
-  0x2d, 0xf6, 0x83, 0xbf, 0x0f, 0x78, 0x3e, 0x68, 0xfd, 0x60, 0x34, 0x21,
-  0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1,
-  0x88, 0x43, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xc2, 0xfd, 0xc0,
-  0xf4, 0x83, 0x83, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xca,
-  0xfd, 0xe0, 0xf4, 0x03, 0x86, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
-  0xe0, 0xd2, 0xfd, 0x00, 0xf5, 0x03, 0x89, 0x08, 0x46, 0x0c, 0x14, 0x00,
-  0x04, 0xc1, 0x40, 0x09, 0xff, 0xe0, 0xf4, 0x03, 0xb3, 0x0f, 0x02, 0xda,
-  0x0f, 0xee, 0x3e, 0x90, 0xfd, 0x60, 0x34, 0x21, 0x00, 0x86, 0x1b, 0x02,
-  0xdb, 0x0f, 0xc2, 0xe0, 0x02, 0xa7, 0x66, 0x09, 0xf0, 0x65, 0xb8, 0x41,
-  0x24, 0x83, 0xdd, 0x0f, 0xc0, 0x60, 0x96, 0xa1, 0x5c, 0xcc, 0x25, 0x28,
-  0xb1, 0x0f, 0x56, 0x3f, 0x80, 0x0b, 0x9c, 0x1a, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x83, 0x89, 0xfc, 0x03, 0xd6, 0x0f, 0x52, 0x32, 0xd8, 0xfb, 0x60,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa6, 0xf2, 0x0f, 0x58, 0x3f, 0x08,
-  0x84, 0x0b, 0x86, 0xa9, 0xb2, 0x0f, 0x60, 0x3f, 0x80, 0x0b, 0x9c, 0x1a,
-  0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x29, 0xfd, 0x83, 0xd8, 0x0f, 0x7c,
-  0x02, 0xf4, 0x83, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0xd4, 0x3f,
-  0x88, 0xfd, 0x20, 0x10, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0xee, 0x70, 0xea,
-  0x5c, 0x3e, 0x18, 0xe6, 0x7e, 0x3c, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18,
-  0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xb4, 0xf7, 0x0f, 0x7a, 0x3f,
-  0xd0, 0xfb, 0x60, 0xfd, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82,
-  0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x83, 0xcb, 0xfe, 0x03, 0xf2, 0x0f, 0x12, 0x22, 0x18,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xeb, 0xfe, 0x83, 0xf2, 0x0f, 0x12,
-  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x0b, 0xff, 0x03, 0xf3,
-  0x0f, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x03, 0xe5, 0xff,
-  0x83, 0xf2, 0x0f, 0x48, 0x3f, 0x08, 0xe4, 0x3f, 0xa8, 0xfd, 0x00, 0xfe,
-  0x83, 0xd1, 0x84, 0x00, 0x18, 0x6e, 0x08, 0xe8, 0x3f, 0x08, 0x83, 0x0b,
-  0x9c, 0x9a, 0x25, 0xc0, 0x97, 0xe1, 0x06, 0xb0, 0x0c, 0xf0, 0x3f, 0x00,
-  0x83, 0x59, 0x86, 0x73, 0xc1, 0x97, 0xc0, 0xee, 0x3e, 0xc8, 0xfb, 0x20,
-  0x3e, 0xc3, 0x11, 0x66, 0x19, 0xe8, 0x7d, 0x40, 0x7c, 0xb3, 0x0c, 0xe8,
-  0xb2, 0x2e, 0x81, 0xed, 0x7d, 0x70, 0x96, 0x41, 0x7c, 0x2c, 0x18, 0xe8,
-  0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8,
-  0x14, 0x21, 0x82, 0x82, 0x0e, 0x37, 0x04, 0x20, 0x28, 0x80, 0xc1, 0x2c,
-  0x43, 0xba, 0xa8, 0x4b, 0x60, 0xc3, 0xe8, 0x07, 0xf0, 0x99, 0x25, 0x78,
-  0x17, 0x13, 0xfd, 0x80, 0x88, 0xcf, 0x2c, 0xc1, 0xbb, 0x0c, 0x47, 0xc4,
-  0x65, 0x30, 0xfa, 0x81, 0xf0, 0xcd, 0x32, 0xb0, 0xcb, 0xbb, 0x04, 0x26,
-  0x97, 0x01, 0xe9, 0x07, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c,
-  0xe0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0x0b, 0x0a,
-  0x3a, 0xdc, 0x10, 0xac, 0xa0, 0x00, 0x06, 0xb3, 0x0c, 0xed, 0xe2, 0x2e,
-  0x81, 0xb1, 0x7e, 0x30, 0xc4, 0x67, 0x96, 0xe0, 0x5d, 0x8c, 0x78, 0xfd,
-  0x00, 0x3e, 0xb3, 0x04, 0xef, 0x32, 0xd0, 0xe2, 0x68, 0xe9, 0x82, 0xa9,
-  0x0b, 0xd1, 0x2e, 0x82, 0xbb, 0xe0, 0xab, 0xb0, 0x2e, 0x17, 0x0c, 0x63,
-  0xae, 0x1f, 0xc8, 0x7e, 0x10, 0x9f, 0xe1, 0x88, 0xd7, 0x98, 0xfd, 0x80,
-  0xf8, 0x66, 0x19, 0xe0, 0x65, 0x5e, 0x02, 0xa3, 0xfd, 0x00, 0x36, 0xe2,
-  0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b, 0x0c, 0xf9,
-  0x58, 0x11, 0xc4, 0xa7, 0x88, 0x1d, 0x14, 0x74, 0xb8, 0x21, 0xc8, 0x41,
-  0x01, 0x0c, 0x66, 0x19, 0xe2, 0x45, 0x5e, 0x02, 0x1b, 0x78, 0x3f, 0x80,
-  0xcf, 0x2c, 0xc1, 0xbd, 0x58, 0xee, 0x07, 0x44, 0x7c, 0x66, 0x09, 0xee,
-  0x65, 0x38, 0x42, 0x37, 0x74, 0x3f, 0x10, 0xbe, 0x59, 0x06, 0x7a, 0xb9,
-  0x97, 0xc0, 0x76, 0x63, 0xf7, 0x83, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82,
-  0x61, 0x2e, 0x70, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xc2,
-  0x0c, 0x05, 0x1d, 0x6e, 0x08, 0xc8, 0x50, 0x00, 0x83, 0x59, 0x86, 0x7a,
-  0xb1, 0x97, 0xc0, 0xc6, 0x3f, 0x18, 0xe2, 0x33, 0x4b, 0x70, 0x2f, 0x46,
-  0xa0, 0x7f, 0x00, 0x9f, 0x59, 0x82, 0x7b, 0x19, 0x68, 0x71, 0xb4, 0x78,
-  0xc1, 0xe4, 0x85, 0xa8, 0x17, 0xc1, 0x5e, 0x54, 0x67, 0x5e, 0x2e, 0x18,
-  0xe6, 0x02, 0xa7, 0x6e, 0x73, 0xea, 0x68, 0x3f, 0x18, 0xe6, 0xca, 0x3d,
-  0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10,
-  0x0c, 0xb4, 0x3a, 0x14, 0xc6, 0x50, 0x00, 0x41, 0x21, 0x0e, 0x85, 0xd1,
-  0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20,
-  0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x8b, 0x0f,
-  0x05, 0x35, 0x14, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
-  0xab, 0x0f, 0x85, 0x35, 0x14, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0xcb, 0x0f, 0x05, 0x36, 0x14, 0x12, 0x22, 0x18, 0x31, 0x50,
-  0x00, 0x10, 0x04, 0x03, 0xa5, 0x14, 0x85, 0x35, 0x14, 0x54, 0x50, 0x08,
-  0xf0, 0x50, 0xd8, 0x41, 0xc1, 0x0e, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x6e,
-  0x08, 0xf4, 0x50, 0x08, 0x83, 0x0b, 0x9c, 0x9a, 0x25, 0xc0, 0x97, 0x81,
-  0x16, 0x47, 0x35, 0xc6, 0x05, 0x0f, 0x09, 0x71, 0x71, 0x89, 0x72, 0x11,
-  0xee, 0x05, 0x0f, 0x09, 0x73, 0x99, 0x65, 0xc8, 0x97, 0x7d, 0x31, 0xcf,
-  0x60, 0x38, 0x62, 0xfd, 0x7a, 0x50, 0x18, 0xbe, 0x63, 0xbf, 0x61, 0x86,
-  0x1b, 0x02, 0x14, 0x14, 0xc8, 0xa0, 0x86, 0x40, 0x87, 0x23, 0xfe, 0x23,
-  0x0c, 0x85, 0xe1, 0xab, 0x40, 0xd0, 0x0b, 0x91, 0x61, 0x86, 0x1b, 0x82,
-  0x15, 0x14, 0xc8, 0xa0, 0x82, 0x41, 0x67, 0x19, 0xf4, 0xe5, 0x65, 0x82,
-  0x7b, 0xff, 0x60, 0x98, 0x03, 0xf9, 0x60, 0x98, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0xd0, 0x5c, 0x51, 0xe0, 0x43, 0x21, 0x07, 0x05, 0x55, 0x14,
-  0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34,
-  0x81, 0x18, 0x8a, 0x38, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xae,
-  0x5a, 0x14, 0x46, 0x51, 0x38, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10,
-  0x0c, 0x2e, 0x5b, 0x14, 0x48, 0x51, 0x60, 0x88, 0x60, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0xae, 0x5b, 0x14, 0x4a, 0x51, 0x90, 0x88, 0x60, 0xc4,
-  0x40, 0x01, 0x40, 0x10, 0x0c, 0x14, 0x5f, 0x14, 0x48, 0x51, 0x18, 0x43,
-  0x21, 0x88, 0x45, 0x81, 0x0e, 0x85, 0x57, 0x14, 0x46, 0x13, 0x02, 0x60,
-  0xb8, 0x21, 0x98, 0x45, 0x21, 0x0c, 0x2e, 0x70, 0x6a, 0x96, 0xe0, 0x65,
-  0x86, 0x1b, 0xfe, 0x33, 0xc0, 0x45, 0x01, 0x0c, 0x66, 0x19, 0xf8, 0xa5,
-  0x5f, 0x82, 0xfa, 0x41, 0x01, 0x15, 0x05, 0xb8, 0xc0, 0xa9, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0x98, 0xc2, 0x51, 0x48, 0x45, 0xc1, 0x04, 0x03,
-  0x3c, 0x14, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x12, 0x47, 0x21,
-  0x15, 0x85, 0x40, 0xb8, 0x60, 0x98, 0x12, 0x43, 0xa1, 0x15, 0x05, 0xb8,
-  0xc0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0xcc, 0x51, 0x70,
-  0x45, 0x61, 0x47, 0xfa, 0x50, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83,
-  0xe9, 0x1c, 0x05, 0x57, 0x14, 0x02, 0xe1, 0x82, 0x61, 0x2e, 0x70, 0xea,
-  0x0e, 0xa7, 0x6e, 0x05, 0x85, 0x61, 0x8e, 0xe7, 0x83, 0x61, 0x8e, 0x18,
-  0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x40, 0x63, 0x47,
-  0x41, 0x17, 0x85, 0x3b, 0x14, 0xd0, 0x51, 0x18, 0x4d, 0x08, 0x80, 0xd1,
-  0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91,
-  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0xe6, 0x51, 0x08, 0x47, 0x21,
-  0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0xe8, 0x51, 0x10,
-  0x47, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0xea,
-  0x51, 0x18, 0x47, 0x21, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30,
-  0x50, 0xf8, 0x51, 0x10, 0x47, 0x21, 0x14, 0x85, 0xe0, 0x1d, 0x05, 0x59,
-  0x14, 0xda, 0x51, 0x18, 0x4d, 0x08, 0x80, 0xe1, 0x86, 0x20, 0x1e, 0x85,
-  0x30, 0xb8, 0xc0, 0xa9, 0x59, 0x82, 0x97, 0x19, 0x6e, 0xe8, 0xd1, 0xa0,
-  0x1e, 0x05, 0x30, 0x98, 0x65, 0xf0, 0x97, 0x97, 0x09, 0x8c, 0x0e, 0x05,
-  0x3b, 0x14, 0xe2, 0x33, 0x1c, 0x31, 0x86, 0xc1, 0x1d, 0x0a, 0xc4, 0x37,
-  0xcb, 0xf0, 0x2f, 0x22, 0x13, 0x18, 0x1e, 0x0a, 0x64, 0x18, 0xc4, 0xc7,
-  0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x16, 0x18, 0xf2, 0xb1,
-  0x22, 0x88, 0x4f, 0x11, 0xff, 0x28, 0xe8, 0x70, 0x43, 0xd0, 0x8f, 0x02,
-  0x18, 0xcc, 0x32, 0x80, 0x4c, 0xc8, 0x04, 0x36, 0x80, 0xa2, 0x00, 0x9f,
-  0x59, 0x02, 0x93, 0xb1, 0x3f, 0x14, 0x88, 0xf8, 0xcc, 0x12, 0x98, 0xcc,
-  0x70, 0x84, 0x1b, 0x06, 0xa0, 0x28, 0x08, 0xdf, 0x2c, 0xc3, 0xc8, 0x98,
-  0x4c, 0x60, 0x6f, 0x18, 0x84, 0xa2, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c,
-  0x30, 0xcc, 0x05, 0x4e, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45,
-  0xa8, 0xa4, 0xa0, 0xc3, 0x0d, 0x01, 0x4a, 0x0a, 0x60, 0x30, 0xcb, 0x40,
-  0x32, 0x25, 0x13, 0x58, 0x2a, 0x0a, 0x43, 0x7c, 0x66, 0x09, 0x4c, 0xc6,
-  0x08, 0x56, 0x14, 0xe0, 0x33, 0x4b, 0x60, 0x32, 0x03, 0x2d, 0x8e, 0x06,
-  0x32, 0x58, 0xc8, 0x10, 0x24, 0x23, 0x94, 0x8c, 0x3c, 0x0a, 0x22, 0x73,
-  0xc1, 0x30, 0xb6, 0x8a, 0xc2, 0x2b, 0x0a, 0xf1, 0x19, 0x8e, 0x60, 0x15,
-  0x58, 0x14, 0x88, 0x6f, 0x96, 0xe1, 0x64, 0x54, 0x26, 0xb0, 0x58, 0x14,
-  0x5a, 0x25, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2,
-  0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xc0, 0x49, 0x41, 0x87, 0x1b,
-  0x02, 0x9b, 0x14, 0xc0, 0x60, 0x96, 0x01, 0x65, 0x52, 0x26, 0xb0, 0x21,
-  0x17, 0x05, 0xf8, 0xcc, 0x12, 0xb8, 0x8c, 0xd9, 0xa2, 0x40, 0xc4, 0x67,
-  0x96, 0xc0, 0x65, 0x86, 0x23, 0x6e, 0xe5, 0x16, 0x05, 0xe1, 0x9b, 0x65,
-  0x58, 0x19, 0x97, 0x09, 0x0c, 0x57, 0x70, 0x51, 0x88, 0x8f, 0x05, 0x0e,
-  0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10,
-  0x9f, 0x22, 0xc6, 0x52, 0xd0, 0xe1, 0x86, 0x20, 0x2c, 0x05, 0x30, 0x98,
-  0x65, 0x60, 0x99, 0x96, 0x09, 0x0c, 0x1c, 0x85, 0x21, 0x3e, 0xb3, 0x04,
-  0x2e, 0x63, 0x44, 0x39, 0x0a, 0xf0, 0x99, 0x25, 0x70, 0x99, 0x81, 0x16,
-  0x47, 0x43, 0x19, 0x2c, 0x65, 0x08, 0x96, 0x11, 0x5a, 0x86, 0xb4, 0x54,
-  0xe6, 0x82, 0x61, 0x2e, 0x70, 0xea, 0x36, 0xa7, 0x2e, 0x16, 0x85, 0x61,
-  0x4e, 0xfc, 0x83, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0x40, 0x93, 0x4b, 0x01, 0x2c, 0x85, 0x7e, 0x14, 0xdc,
-  0x52, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84,
-  0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0xb8, 0xf2, 0x52, 0x38, 0x4b, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0xb8, 0xf4, 0x52, 0x40, 0x4b, 0x21, 0x21, 0x82, 0x11, 0x03,
-  0x04, 0x00, 0x41, 0x30, 0xb8, 0xf6, 0x52, 0x48, 0x4b, 0x21, 0x21, 0x82,
-  0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x50, 0x44, 0x53, 0x40, 0x4b, 0xe1,
-  0x24, 0x85, 0xa0, 0x2e, 0x05, 0x9c, 0x14, 0xe6, 0x52, 0x18, 0x4d, 0x08,
-  0x80, 0xe1, 0x86, 0xe0, 0x2e, 0x85, 0x30, 0xb8, 0xc0, 0xa9, 0x59, 0x82,
-  0x97, 0x19, 0x68, 0x71, 0x54, 0x43, 0x5f, 0xe4, 0x94, 0xc8, 0x17, 0x97,
-  0xe0, 0x17, 0xc1, 0x65, 0xe4, 0x94, 0xe8, 0x97, 0x59, 0x06, 0x98, 0x91,
-  0x99, 0x71, 0x0d, 0x86, 0x23, 0xd4, 0x4f, 0x27, 0x85, 0xe1, 0xbb, 0xf5,
-  0x1b, 0x66, 0xb8, 0x21, 0x28, 0x49, 0x81, 0x0c, 0x6a, 0x08, 0x74, 0x38,
-  0x82, 0x5f, 0x7c, 0x52, 0x18, 0xbe, 0x0a, 0x04, 0x3d, 0x7f, 0x19, 0x66,
-  0xb8, 0x21, 0x40, 0x49, 0x81, 0x0c, 0x2a, 0x18, 0x74, 0x96, 0x21, 0x66,
-  0xcc, 0x26, 0x38, 0x76, 0x14, 0x86, 0xb9, 0xfe, 0x0f, 0x86, 0x19, 0x31,
-  0x38, 0x00, 0x10, 0x04, 0x03, 0x6d, 0x35, 0x85, 0xbc, 0x14, 0x6c, 0x52,
-  0x38, 0x4d, 0x61, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61,
-  0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x43, 0x46, 0x0c, 0x10, 0x00, 0x04,
-  0xc1, 0xe0, 0x92, 0x4d, 0x01, 0x34, 0x85, 0x83, 0x08, 0x46, 0x0c, 0x10,
-  0x00, 0x04, 0xc1, 0xe0, 0x9a, 0x4d, 0x21, 0x34, 0x05, 0x86, 0x08, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xa2, 0x4d, 0x41, 0x34, 0x05, 0x89,
-  0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x40, 0xd9, 0x4d, 0x21, 0x34,
-  0x05, 0xb0, 0x14, 0x02, 0xd7, 0x14, 0xe2, 0x52, 0x60, 0x4d, 0x61, 0x34,
-  0x21, 0x00, 0x86, 0x1b, 0x02, 0xd8, 0x14, 0xc2, 0xe0, 0x02, 0xa7, 0x66,
-  0x09, 0xcc, 0x66, 0xb8, 0x81, 0x5f, 0x83, 0xda, 0x14, 0xc0, 0x60, 0x96,
-  0x61, 0x66, 0x68, 0x26, 0x28, 0x9e, 0x14, 0x4a, 0x53, 0x80, 0x0b, 0x9c,
-  0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0xc9, 0x37, 0x05, 0xd3, 0x14,
-  0x4a, 0x30, 0xa8, 0x4b, 0x61, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa6,
-  0xdf, 0x14, 0x4c, 0x53, 0x08, 0x84, 0x0b, 0x86, 0xa9, 0x9f, 0x14, 0x54,
-  0x53, 0x80, 0x0b, 0x9c, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x69,
-  0x3c, 0x85, 0xd5, 0x14, 0x70, 0x46, 0x2f, 0x85, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0x98, 0xc8, 0x53, 0x58, 0x4d, 0x21, 0x10, 0x2e, 0x18, 0xe6,
-  0x02, 0xa7, 0xee, 0x70, 0xea, 0x50, 0x52, 0x18, 0xe6, 0x72, 0x50, 0x18,
-  0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0xb4, 0xf4, 0x14, 0x6e, 0x53, 0xa0, 0x4b, 0xa1, 0x3c, 0x85, 0xd1, 0x84,
-  0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86,
-  0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x0b, 0x3e, 0x05,
-  0xdf, 0x14, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x2b,
-  0x3e, 0x85, 0xdf, 0x14, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x83, 0x4b, 0x3e, 0x05, 0xf0, 0x14, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00,
-  0x10, 0x04, 0x03, 0x25, 0x3f, 0x85, 0xdf, 0x14, 0xfc, 0x52, 0x08, 0xd8,
-  0x53, 0x78, 0x4d, 0x41, 0x3d, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x6e, 0x08,
-  0xdc, 0x53, 0x08, 0x83, 0x0b, 0x9c, 0x9a, 0x25, 0x30, 0x9b, 0xe1, 0x06,
-  0x9d, 0x0d, 0xe4, 0x53, 0x00, 0x83, 0x59, 0x86, 0x9a, 0x31, 0x9b, 0xc0,
-  0xe2, 0x52, 0x98, 0x4b, 0x21, 0x3e, 0xc3, 0x11, 0x62, 0x18, 0xd0, 0xa5,
-  0x40, 0x7c, 0xb3, 0x0c, 0x36, 0x93, 0x33, 0x81, 0xd5, 0xa5, 0x30, 0x86,
-  0x41, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x81,
-  0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xc1, 0x9f, 0x82, 0x0e, 0x37, 0x04,
-  0xfa, 0x29, 0x80, 0xc1, 0x2c, 0xc3, 0xcd, 0xe0, 0x4c, 0x60, 0x43, 0x5f,
-  0x0a, 0xf0, 0x99, 0x25, 0xe8, 0x19, 0xe3, 0x4b, 0x81, 0x88, 0xcf, 0x2c,
-  0x41, 0xcf, 0x0c, 0x47, 0xb4, 0x61, 0xd0, 0x97, 0x82, 0xf0, 0xcd, 0x32,
-  0xe8, 0x4c, 0xcf, 0x04, 0xe6, 0x86, 0x81, 0x5f, 0x0a, 0xf1, 0xb1, 0xc0,
-  0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08,
-  0xe2, 0x53, 0xc4, 0x89, 0x0a, 0x3a, 0xdc, 0x10, 0x94, 0xa8, 0x00, 0x06,
-  0xb3, 0x0c, 0x3b, 0xc3, 0x33, 0x81, 0x99, 0xa6, 0x30, 0xc4, 0x67, 0x96,
-  0xa0, 0x67, 0x8c, 0x48, 0x4d, 0x01, 0x3e, 0xb3, 0x04, 0x3d, 0x33, 0xd0,
-  0xe2, 0x68, 0x37, 0x83, 0xe1, 0x0c, 0xb1, 0x33, 0x02, 0xcf, 0xc0, 0xa3,
-  0x90, 0x33, 0x17, 0x0c, 0x63, 0xa8, 0x29, 0xb0, 0xa6, 0x10, 0x9f, 0xe1,
-  0x88, 0xd4, 0x69, 0x4d, 0x81, 0xf8, 0x66, 0x19, 0x7c, 0x26, 0x6c, 0x02,
-  0x73, 0x4d, 0x41, 0x75, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9,
-  0xc0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x1a, 0x15,
-  0x74, 0xb8, 0x21, 0x98, 0x51, 0x01, 0x0c, 0x66, 0x19, 0x7e, 0x06, 0x6c,
-  0x02, 0x1b, 0x6c, 0x53, 0x80, 0xcf, 0x2c, 0x41, 0xd9, 0xd8, 0x6c, 0x0a,
-  0x44, 0x7c, 0x66, 0x09, 0xca, 0x66, 0x38, 0x82, 0x76, 0x68, 0x53, 0x10,
-  0xbe, 0x59, 0x06, 0xb1, 0x29, 0x9b, 0xc0, 0x6a, 0xa7, 0x36, 0x85, 0xf8,
-  0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70, 0xca, 0x82, 0x48, 0x3e,
-  0x56, 0x04, 0xf1, 0x29, 0x02, 0x4c, 0x05, 0x1d, 0x6e, 0x08, 0x7c, 0x54,
-  0x00, 0x83, 0x59, 0x86, 0xb1, 0x21, 0x9b, 0xc0, 0x7a, 0x53, 0x18, 0xe2,
-  0x33, 0x4b, 0x50, 0x36, 0x46, 0x88, 0xa7, 0x00, 0x9f, 0x59, 0x82, 0xb2,
-  0x19, 0x68, 0x71, 0xb4, 0x9f, 0xc1, 0xc0, 0x86, 0x18, 0x1b, 0x81, 0x6c,
-  0xfc, 0x2e, 0x6c, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x6e, 0x73, 0xea, 0x5c,
-  0x53, 0x18, 0xe6, 0x7e, 0x51, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xb4, 0x37, 0x15, 0x7a, 0x54, 0xd0,
-  0x4f, 0x61, 0x4d, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1,
-  0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x83, 0xcb, 0x4e, 0x05, 0x32, 0x15, 0x12, 0x22, 0x18, 0x31,
-  0x40, 0x00, 0x10, 0x04, 0x83, 0xeb, 0x4e, 0x85, 0x32, 0x15, 0x12, 0x22,
-  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x0b, 0x4f, 0x05, 0x33, 0x15,
-  0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x03, 0xe5, 0x4f, 0x85,
-  0x32, 0x15, 0x48, 0x54, 0x08, 0xe4, 0x54, 0xa8, 0x51, 0x01, 0x4e, 0x85,
-  0xd1, 0x84, 0x00, 0x18, 0x6e, 0x08, 0xe8, 0x54, 0x08, 0x83, 0x0b, 0x9c,
-  0x9a, 0x25, 0x30, 0x9b, 0x81, 0x16, 0x47, 0x35, 0x62, 0x86, 0x8d, 0x09,
-  0x98, 0x71, 0x89, 0x99, 0x11, 0xca, 0x86, 0x8d, 0x09, 0x9a, 0xb1, 0xf1,
-  0x0d, 0x58, 0x54, 0x80, 0xcf, 0x2c, 0xc3, 0xd9, 0xa4, 0x4d, 0xf8, 0x06,
-  0xc3, 0x11, 0xe5, 0x1b, 0xe0, 0xa8, 0x30, 0x7c, 0x67, 0xbe, 0xc1, 0x30,
-  0xc3, 0x0d, 0xc1, 0x88, 0x0a, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0x05,
-  0x8f, 0x0a, 0xc3, 0x57, 0x81, 0xa0, 0x77, 0x0c, 0x33, 0xdc, 0x10, 0x98,
-  0xa8, 0x40, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0x80, 0x36, 0x7d, 0x13, 0x9c,
-  0x7a, 0x0a, 0xc3, 0xdc, 0x3e, 0x0a, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08,
-  0x82, 0x81, 0x96, 0xaa, 0xc2, 0x9d, 0x0a, 0x34, 0x2a, 0x94, 0xaa, 0x30,
-  0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09,
-  0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xc1,
-  0xaa, 0xe0, 0xa7, 0xc2, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0x70, 0xc5, 0xaa, 0xf0, 0xa7, 0x02, 0x43, 0x04, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0x70, 0xc9, 0xaa, 0x00, 0xaa, 0x82, 0x44, 0x04, 0x23, 0x06,
-  0x0a, 0x00, 0x82, 0x60, 0xa0, 0xe4, 0xaa, 0xf0, 0xa7, 0x82, 0x8f, 0x0a,
-  0x01, 0xab, 0x0a, 0x6f, 0x2a, 0xa8, 0xaa, 0x30, 0x9a, 0x10, 0x00, 0xc3,
-  0x0d, 0x81, 0xab, 0x0a, 0x61, 0x70, 0x81, 0x53, 0xb3, 0x04, 0x7d, 0x33,
-  0xdc, 0xa0, 0xbf, 0xc1, 0xac, 0x0a, 0x60, 0x30, 0xcb, 0xa0, 0x36, 0x6b,
-  0x13, 0x94, 0x8e, 0x0a, 0xa3, 0x2a, 0xc0, 0x05, 0x4e, 0x8d, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0xc1, 0xc4, 0xab, 0x02, 0xa9, 0x0a, 0x20, 0x1c, 0xcc,
-  0xa9, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x53, 0xaf, 0x0a, 0xa4,
-  0x2a, 0x04, 0xc2, 0x05, 0xc3, 0x54, 0x8f, 0x0a, 0xa8, 0x2a, 0xc0, 0x05,
-  0x4e, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x14, 0xae, 0x42, 0xaa,
-  0x0a, 0x61, 0x80, 0xa7, 0xc2, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c,
-  0xe2, 0x2a, 0xa4, 0xaa, 0x10, 0x08, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x77,
-  0x38, 0x75, 0x26, 0x2a, 0x0c, 0x73, 0x37, 0x29, 0x0c, 0x73, 0xc4, 0x30,
-  0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xda, 0xb9, 0x0a,
-  0xb5, 0x2a, 0xc8, 0xa9, 0x30, 0xae, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xe5, 0xae, 0x02, 0xaf, 0x0a, 0x09,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xf5, 0xae, 0x42, 0xaf,
-  0x0a, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x05, 0xaf,
-  0x82, 0xaf, 0x0a, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0x81,
-  0x72, 0xaf, 0x42, 0xaf, 0x0a, 0x7c, 0x2a, 0x04, 0xea, 0x2a, 0xb4, 0xaa,
-  0x80, 0xae, 0xc2, 0x68, 0x42, 0x00, 0x0c, 0x37, 0x04, 0xec, 0x2a, 0x84,
-  0xc1, 0x05, 0x4e, 0xcd, 0x12, 0xf4, 0xcd, 0x70, 0x03, 0x0e, 0x07, 0xf0,
-  0x2a, 0x80, 0xc1, 0x2c, 0x03, 0xdb, 0xf4, 0x4d, 0x60, 0x6f, 0x2a, 0xc4,
-  0xa9, 0x10, 0x9f, 0xe1, 0x88, 0x1e, 0x0e, 0xe4, 0x54, 0x20, 0xbe, 0x59,
-  0x86, 0xb6, 0x81, 0x9b, 0xc0, 0xe6, 0x54, 0xf0, 0xe1, 0x20, 0x3e, 0x16,
-  0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2, 0xc0, 0x90, 0x8f, 0x15,
-  0x41, 0x7c, 0x8a, 0xd0, 0x57, 0x41, 0x87, 0x1b, 0x02, 0x7c, 0x15, 0xc0,
-  0x60, 0x96, 0xc1, 0x6d, 0xde, 0x26, 0xb0, 0x61, 0x4f, 0x05, 0xf8, 0xcc,
-  0x12, 0xd0, 0x8d, 0xe9, 0xa9, 0x40, 0xc4, 0x67, 0x96, 0x80, 0x6e, 0x86,
-  0x23, 0xd0, 0x38, 0xd8, 0x53, 0x41, 0xf8, 0x66, 0x19, 0xe2, 0x86, 0x6e,
-  0x02, 0x4b, 0xe3, 0x80, 0x4f, 0x85, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82,
-  0x61, 0x2e, 0x70, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xa2,
-  0x64, 0x05, 0x1d, 0x6e, 0x08, 0x46, 0x56, 0x00, 0x83, 0x59, 0x06, 0xb9,
-  0x99, 0x9b, 0xc0, 0x48, 0x55, 0x18, 0xe2, 0x33, 0x4b, 0x40, 0x37, 0x46,
-  0x9c, 0xaa, 0x00, 0x9f, 0x59, 0x02, 0xba, 0x19, 0x68, 0x71, 0x34, 0xb7,
-  0xc1, 0xde, 0x86, 0x90, 0x1b, 0x61, 0x6e, 0xcc, 0x71, 0x80, 0x9b, 0x0b,
-  0x86, 0x31, 0x53, 0x15, 0x54, 0x55, 0x88, 0xcf, 0x70, 0x84, 0x2c, 0xac,
-  0xaa, 0x40, 0x7c, 0xb3, 0x0c, 0x75, 0x83, 0x37, 0x81, 0xb1, 0xaa, 0x30,
-  0x0b, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94, 0x05,
-  0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4, 0xcc, 0x0a, 0x3a, 0xdc, 0x10,
-  0xc4, 0xac, 0x00, 0x06, 0xb3, 0x0c, 0x76, 0x73, 0x37, 0x81, 0x0d, 0xb4,
-  0x2a, 0xc0, 0x67, 0x96, 0x80, 0x6f, 0x2c, 0x56, 0x05, 0x22, 0x3e, 0xb3,
-  0x04, 0x7c, 0x33, 0x1c, 0xd1, 0x0b, 0xb2, 0x2a, 0x08, 0xdf, 0x2c, 0x43,
-  0xde, 0xf0, 0x4d, 0x60, 0xbe, 0x30, 0xab, 0x42, 0x7c, 0x2c, 0x70, 0xe8,
-  0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8,
-  0x14, 0xe1, 0xb3, 0x82, 0x0e, 0x37, 0x04, 0x3c, 0x2b, 0x80, 0xc1, 0x2c,
-  0x83, 0xde, 0xec, 0x4d, 0x60, 0xbb, 0x2a, 0x0c, 0xf1, 0x99, 0x25, 0xe0,
-  0x1b, 0x23, 0xc0, 0x55, 0x80, 0xcf, 0x2c, 0x01, 0xdf, 0x0c, 0xb4, 0x38,
-  0x9a, 0xdd, 0x60, 0x77, 0x43, 0xe8, 0x8d, 0xb0, 0x37, 0xb0, 0x81, 0x37,
-  0x17, 0x0c, 0x73, 0x81, 0x53, 0xb7, 0x39, 0x75, 0xac, 0x2a, 0x0c, 0x73,
-  0xbd, 0x29, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00,
-  0x20, 0x08, 0x06, 0x5a, 0xdb, 0x0a, 0x3b, 0x2b, 0xe0, 0xab, 0x90, 0xb6,
-  0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c,
-  0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1,
-  0x45, 0xb7, 0x82, 0xd8, 0x0a, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0xc1, 0x55, 0xb7, 0xc2, 0xd8, 0x0a, 0x09, 0x11, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0xc1, 0x65, 0xb7, 0x02, 0xd9, 0x0a, 0x09, 0x11, 0x8c,
-  0x18, 0x28, 0x00, 0x08, 0x82, 0x81, 0xd2, 0xb7, 0xc2, 0xd8, 0x0a, 0x22,
-  0x2b, 0x04, 0x70, 0x2b, 0xcc, 0xac, 0xe0, 0xb6, 0xc2, 0x68, 0x42, 0x00,
-  0x0c, 0x37, 0x04, 0x72, 0x2b, 0x84, 0xc1, 0x05, 0x4e, 0xcd, 0x12, 0xf4,
-  0xcd, 0x40, 0x8b, 0xa3, 0x1a, 0x68, 0x83, 0xe6, 0xc4, 0xd9, 0xb8, 0x84,
-  0xda, 0x08, 0x7c, 0x83, 0xe6, 0xc4, 0xda, 0xcc, 0x32, 0xf8, 0x0d, 0xe8,
-  0xf8, 0x72, 0x30, 0x1c, 0x31, 0xce, 0x41, 0xcd, 0x0a, 0xc3, 0x77, 0xe4,
-  0x1c, 0x0c, 0x33, 0xdc, 0x10, 0x80, 0xac, 0x40, 0x06, 0x35, 0x04, 0x3a,
-  0x1c, 0x21, 0x12, 0x39, 0x2b, 0x0c, 0x5f, 0x05, 0x82, 0x1e, 0x49, 0x0c,
-  0x33, 0xdc, 0x10, 0x8c, 0xac, 0x40, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0xf0,
-  0x37, 0xb4, 0x13, 0xdc, 0xb9, 0x0a, 0xc3, 0x1c, 0x7e, 0x0a, 0xc3, 0x8c,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x66, 0xba, 0x02, 0xdd, 0x0a, 0x31,
-  0x2b, 0x88, 0xae, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a,
-  0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0x70, 0xb5, 0xae, 0xb0, 0xb7, 0xc2, 0x41, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x70, 0xb9, 0xae, 0xc0, 0xb7, 0x02, 0x43, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xbd, 0xae, 0xd0, 0xb7, 0x82,
-  0x44, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xa0, 0xd8, 0xae, 0xc0,
-  0xb7, 0xc2, 0xce, 0x0a, 0x41, 0xea, 0x0a, 0x6c, 0x2b, 0x9c, 0xae, 0x30,
-  0x9a, 0x10, 0x00, 0xc3, 0x0d, 0xc1, 0xea, 0x0a, 0x61, 0x70, 0x81, 0x53,
-  0xb3, 0x04, 0xb4, 0x33, 0xdc, 0x70, 0xcf, 0x01, 0xec, 0x0a, 0x60, 0x30,
-  0xcb, 0x10, 0x3a, 0xa2, 0x13, 0xd4, 0xcd, 0x0a, 0xa0, 0x2b, 0xc0, 0x05,
-  0x4e, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x94, 0xbb, 0x42, 0xe8,
-  0x0a, 0xfe, 0x1c, 0xc0, 0xad, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x93, 0xee, 0x0a, 0xa1, 0x2b, 0x04, 0xc2, 0x05, 0xc3, 0x94, 0xce, 0x0a,
-  0xa5, 0x2b, 0xc0, 0x05, 0x4e, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1,
-  0xe4, 0xbb, 0x82, 0xe9, 0x0a, 0x3e, 0x51, 0xb7, 0xc2, 0x88, 0xc1, 0x01,
-  0x80, 0x20, 0x18, 0x4c, 0xbf, 0x2b, 0x98, 0xae, 0x10, 0x08, 0x17, 0x0c,
-  0x73, 0x81, 0x53, 0x77, 0x38, 0x75, 0x23, 0x2b, 0x0c, 0x73, 0x34, 0x2a,
-  0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x1a, 0xf9, 0x0a, 0xb2, 0x2b, 0xbc, 0xad, 0x00, 0xbe, 0xc2, 0x68,
-  0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10,
-  0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xb5, 0xbe,
-  0x42, 0xee, 0x0a, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1,
-  0xc5, 0xbe, 0x82, 0xee, 0x0a, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0xc1, 0xd5, 0xbe, 0xc2, 0xee, 0x0a, 0x09, 0x11, 0x8c, 0x18, 0x28,
-  0x00, 0x08, 0x82, 0x81, 0x42, 0xbf, 0x82, 0xee, 0x0a, 0x79, 0x2b, 0x04,
-  0xe7, 0x2b, 0xa8, 0xae, 0x50, 0xbe, 0xc2, 0x68, 0x42, 0x00, 0x0c, 0x37,
-  0x04, 0xe9, 0x2b, 0x84, 0xc1, 0x05, 0x4e, 0xcd, 0x12, 0xd0, 0xce, 0x70,
-  0x43, 0x4d, 0x07, 0xed, 0x2b, 0x80, 0xc1, 0x2c, 0xc3, 0xe8, 0xd0, 0x4e,
-  0x60, 0x6c, 0x2b, 0xb8, 0xad, 0x10, 0x9f, 0xe1, 0x88, 0x9d, 0x0e, 0xde,
-  0x56, 0x20, 0xbe, 0x59, 0x06, 0xd2, 0x39, 0x9d, 0xc0, 0xe0, 0x56, 0xe0,
-  0xe9, 0x20, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2,
-  0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xb8, 0x5f, 0x41, 0x87, 0x1b,
-  0x82, 0xfa, 0x15, 0xc0, 0x60, 0x96, 0xa1, 0x74, 0x4c, 0x27, 0xb0, 0x01,
-  0x6f, 0x05, 0xf8, 0xcc, 0x12, 0xac, 0x8e, 0xdd, 0xad, 0x40, 0xc4, 0x67,
-  0x96, 0x60, 0x75, 0x86, 0x23, 0xcc, 0x3a, 0xc0, 0x5b, 0x41, 0xf8, 0x66,
-  0x19, 0x50, 0x67, 0x75, 0x02, 0x3b, 0xeb, 0x20, 0x6f, 0x85, 0xf8, 0x58,
-  0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70, 0xca, 0x82, 0x48, 0x3e, 0x56,
-  0x04, 0xf1, 0x29, 0x42, 0x84, 0x05, 0x1d, 0x6e, 0x08, 0x40, 0x58, 0x00,
-  0x83, 0x59, 0x86, 0xd4, 0x51, 0x9d, 0xc0, 0x42, 0x57, 0x18, 0xe2, 0x33,
-  0x4b, 0xb0, 0x3a, 0x46, 0x90, 0xae, 0x00, 0x9f, 0x59, 0x82, 0xd5, 0x19,
-  0x68, 0x71, 0xb4, 0xd2, 0xc1, 0x4c, 0x87, 0x48, 0x1d, 0x41, 0x75, 0xc4,
-  0x75, 0x38, 0x9d, 0x0b, 0x86, 0xb1, 0xd1, 0x15, 0x4e, 0x57, 0x88, 0xcf,
-  0x70, 0xc4, 0x6b, 0xa0, 0xae, 0x40, 0x7c, 0xb3, 0x0c, 0xac, 0xf3, 0x3a,
-  0x81, 0xa5, 0xae, 0x00, 0x1b, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3,
-  0x5c, 0xe0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0x0c,
-  0x0b, 0x3a, 0xdc, 0x10, 0xb8, 0xb0, 0x00, 0x06, 0xb3, 0x0c, 0xad, 0xe3,
-  0x3a, 0x81, 0x0d, 0xb1, 0x2b, 0xc0, 0x67, 0x96, 0x60, 0x76, 0xcc, 0x75,
-  0x05, 0x22, 0x3e, 0xb3, 0x04, 0xb3, 0x33, 0x1c, 0xa1, 0x1b, 0xaf, 0x2b,
-  0x08, 0xdf, 0x2c, 0x03, 0xec, 0xcc, 0x4e, 0x60, 0xbb, 0x01, 0xbb, 0x42,
-  0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x41, 0x24,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xb1, 0xc3, 0x82, 0x0e, 0x37, 0x04, 0x39,
-  0x2c, 0x80, 0xc1, 0x2c, 0x43, 0xec, 0xc8, 0x4e, 0x60, 0xb8, 0x2b, 0x0c,
-  0xf1, 0x99, 0x25, 0x98, 0x1d, 0x23, 0x7a, 0x57, 0x80, 0xcf, 0x2c, 0xc1,
-  0xec, 0x0c, 0xb4, 0x38, 0x5a, 0xeb, 0x60, 0xae, 0x43, 0xc4, 0x8e, 0x20,
-  0x3b, 0xaa, 0xf3, 0x3a, 0x17, 0x0c, 0x73, 0x81, 0x53, 0xb7, 0x39, 0x75,
-  0xa9, 0x2b, 0x0c, 0x73, 0xba, 0x2a, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c,
-  0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x9a, 0x1a, 0x0b, 0x38, 0x2c,
-  0xd4, 0xaf, 0x60, 0xc6, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1,
-  0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0xc1, 0x15, 0xc7, 0xc2, 0x0f, 0x0b, 0x09, 0x11, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x25, 0xc7, 0x02, 0x18, 0x0b, 0x09,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x35, 0xc7, 0x42, 0x18,
-  0x0b, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0x81, 0xa2, 0xc7,
-  0x02, 0x18, 0x0b, 0xff, 0x2b, 0x04, 0x6d, 0x2c, 0xc0, 0xb0, 0xb0, 0xc6,
-  0xc2, 0x68, 0x42, 0x00, 0x0c, 0x37, 0x04, 0x6f, 0x2c, 0x84, 0xc1, 0x05,
-  0x4e, 0xcd, 0x12, 0xd0, 0xce, 0x40, 0x8b, 0xa3, 0x1a, 0x7f, 0x23, 0x86,
-  0x85, 0xdf, 0xb8, 0x44, 0xe8, 0x08, 0xb3, 0x23, 0x86, 0x85, 0xe8, 0xcc,
-  0x32, 0xd4, 0xce, 0xed, 0xec, 0x76, 0x30, 0x1c, 0x01, 0xbe, 0x81, 0x0c,
-  0x0b, 0xc3, 0x77, 0xe1, 0x1b, 0x0c, 0x33, 0xdc, 0x10, 0xf4, 0xaf, 0x40,
-  0x06, 0x35, 0x04, 0x3a, 0x1c, 0xf1, 0x1f, 0x36, 0x2c, 0x0c, 0x5f, 0x05,
-  0x82, 0x5e, 0x88, 0x0c, 0x33, 0xdc, 0x10, 0x80, 0xb0, 0x40, 0x06, 0x15,
-  0x0c, 0x3a, 0xcb, 0x60, 0x3b, 0xeb, 0x13, 0x1c, 0xf9, 0x0a, 0xc3, 0x5c,
-  0xbd, 0x0a, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x36, 0xca,
-  0x42, 0x1c, 0x0b, 0x2e, 0x2c, 0xfc, 0xb1, 0x30, 0x9a, 0x10, 0x00, 0xa3,
-  0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0xc4, 0x21,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xa9, 0xb2, 0x80, 0xc7, 0xc2,
-  0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xad, 0xb2, 0x90,
-  0xc7, 0x02, 0x43, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xb1,
-  0xb2, 0xa0, 0xc7, 0x82, 0x44, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60,
-  0xa0, 0xcc, 0xb2, 0x90, 0xc7, 0x02, 0x0e, 0x0b, 0x81, 0x29, 0x0b, 0x69,
-  0x2c, 0x90, 0xb2, 0x30, 0x9a, 0x10, 0x00, 0xc3, 0x0d, 0x01, 0x2a, 0x0b,
-  0x61, 0x70, 0x81, 0x53, 0xb3, 0x04, 0xeb, 0x33, 0xdc, 0x40, 0xdf, 0x41,
-  0x2b, 0x0b, 0x60, 0x30, 0xcb, 0x80, 0x3b, 0xb9, 0x13, 0x14, 0x0d, 0x0b,
-  0x7d, 0x2c, 0xc0, 0x05, 0x4e, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1,
-  0x64, 0xcb, 0x82, 0x1f, 0x0b, 0xfb, 0x1b, 0xb4, 0xb1, 0x30, 0x62, 0x70,
-  0x00, 0x20, 0x08, 0x06, 0xd3, 0x2d, 0x0b, 0x7e, 0x2c, 0x04, 0xc2, 0x05,
-  0xc3, 0xd4, 0x0d, 0x0b, 0xa2, 0x2c, 0xc0, 0x05, 0x4e, 0x8d, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0xc1, 0xb4, 0xcb, 0xc2, 0x28, 0x0b, 0x3b, 0x22, 0xc7,
-  0xc2, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0xbc, 0x2c, 0x8c, 0xb2,
-  0x10, 0x08, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x77, 0x38, 0x75, 0x20, 0x2c,
-  0x0c, 0x73, 0x31, 0x2b, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x5a, 0x38, 0x0b, 0xaf, 0x2c, 0xb0, 0xb1,
-  0xd0, 0xcb, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2,
-  0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0xc1, 0x85, 0xce, 0x82, 0x2d, 0x0b, 0x09, 0x11, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0xc1, 0x95, 0xce, 0xc2, 0x2d, 0x0b, 0x09, 0x11, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xa5, 0xce, 0x02, 0x2e, 0x0b, 0x09,
-  0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0x81, 0x12, 0xcf, 0xc2, 0x2d,
-  0x0b, 0x76, 0x2c, 0x04, 0xe4, 0x2c, 0x9c, 0xb2, 0x20, 0xce, 0xc2, 0x68,
-  0x42, 0x00, 0x0c, 0x37, 0x04, 0xe6, 0x2c, 0x84, 0xc1, 0x05, 0x4e, 0xcd,
-  0x12, 0xac, 0xcf, 0x70, 0x83, 0x8c, 0x07, 0xea, 0x2c, 0x80, 0xc1, 0x2c,
-  0x83, 0xee, 0xac, 0x4f, 0x60, 0x69, 0x2c, 0xac, 0xb1, 0x10, 0x9f, 0xe1,
-  0x08, 0x1c, 0x0e, 0xd8, 0x58, 0x20, 0xbe, 0x59, 0x86, 0xdd, 0xf1, 0x9d,
-  0xc0, 0xda, 0x58, 0xc8, 0xe1, 0x20, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60,
-  0x98, 0x0b, 0x9c, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xa0,
-  0x67, 0x41, 0x87, 0x1b, 0x02, 0x79, 0x16, 0xc0, 0x60, 0x96, 0x81, 0x77,
-  0x7a, 0x27, 0xb0, 0xa1, 0x8e, 0x05, 0xf8, 0xcc, 0x12, 0x88, 0x8f, 0xd1,
-  0xb1, 0x40, 0xc4, 0x67, 0x96, 0x40, 0x7c, 0x86, 0x23, 0xc6, 0x38, 0xa8,
-  0x63, 0x41, 0xf8, 0x66, 0x19, 0x7e, 0x47, 0x7c, 0x02, 0x23, 0xe3, 0xc0,
-  0x8e, 0x85, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70, 0xca,
-  0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xe2, 0x9f, 0x05, 0x1d, 0x6e,
-  0x08, 0xfa, 0x59, 0x00, 0x83, 0x59, 0x06, 0xf0, 0x09, 0x9f, 0xc0, 0xfc,
-  0x58, 0x18, 0xe2, 0x33, 0x4b, 0x20, 0x3e, 0x46, 0x84, 0xb2, 0x00, 0x9f,
-  0x59, 0x02, 0xf1, 0x19, 0x68, 0x71, 0x34, 0xde, 0xc1, 0x7a, 0x87, 0x00,
-  0x1f, 0x21, 0x7c, 0x78, 0x71, 0xf0, 0x9d, 0x0b, 0x86, 0x31, 0x50, 0x16,
-  0x48, 0x59, 0x88, 0xcf, 0x70, 0x04, 0xab, 0x94, 0xb2, 0x40, 0x7c, 0xb3,
-  0x0c, 0xe3, 0x63, 0x3e, 0x81, 0x99, 0xb2, 0xd0, 0x2a, 0xf1, 0xb1, 0x60,
-  0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08,
-  0xe2, 0x53, 0x44, 0x4b, 0x0b, 0x3a, 0xdc, 0x10, 0xac, 0xb4, 0x00, 0x06,
-  0xb3, 0x0c, 0xe4, 0x53, 0x3e, 0x81, 0x0d, 0xae, 0x2c, 0xc0, 0x67, 0x96,
-  0x40, 0x7d, 0x6c, 0x95, 0x05, 0x22, 0x3e, 0xb3, 0x04, 0xea, 0x33, 0x1c,
-  0x71, 0x2b, 0xac, 0x2c, 0x08, 0xdf, 0x2c, 0xc3, 0xf9, 0xa8, 0x4f, 0x60,
-  0xb8, 0xd2, 0xca, 0x42, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17,
-  0x38, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x81, 0xd3, 0x82,
-  0x0e, 0x37, 0x04, 0x36, 0x2d, 0x80, 0xc1, 0x2c, 0x03, 0xfa, 0xa4, 0x4f,
-  0x60, 0xb5, 0x2c, 0x0c, 0xf1, 0x99, 0x25, 0x50, 0x1f, 0x23, 0x74, 0x59,
-  0x80, 0xcf, 0x2c, 0x81, 0xfa, 0x0c, 0xb4, 0x38, 0x1a, 0xf9, 0x60, 0xe5,
-  0x43, 0xa0, 0x8f, 0x90, 0x3e, 0xa4, 0x65, 0x3e, 0x17, 0x0c, 0x73, 0x81,
-  0x53, 0xb7, 0x39, 0x75, 0xa6, 0x2c, 0x0c, 0x73, 0xb7, 0x2b, 0x0c, 0x73,
-  0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xda,
-  0x59, 0x0b, 0x35, 0x2d, 0xc8, 0xb3, 0x30, 0xd6, 0xc2, 0x68, 0x42, 0x00,
-  0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11,
-  0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xe5, 0xd6, 0x02, 0x4f,
-  0x0b, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xf5, 0xd6,
-  0x42, 0x4f, 0x0b, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1,
-  0x05, 0xd7, 0x82, 0x4f, 0x0b, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08,
-  0x82, 0x81, 0x72, 0xd7, 0x42, 0x4f, 0x0b, 0xfc, 0x2c, 0x04, 0x6a, 0x2d,
-  0xb4, 0xb4, 0x80, 0xd6, 0xc2, 0x68, 0x42, 0x00, 0x0c, 0x37, 0x04, 0x6c,
-  0x2d, 0x84, 0xc1, 0x05, 0x4e, 0xcd, 0x12, 0xac, 0xcf, 0x40, 0x8b, 0xa3,
-  0x1a, 0xb6, 0xc3, 0xa3, 0x45, 0xed, 0xb8, 0x04, 0xee, 0x08, 0xea, 0xc3,
-  0xa3, 0x45, 0xee, 0xcc, 0x32, 0xb0, 0x8f, 0xfb, 0xe0, 0x7a, 0x30, 0x1c,
-  0xf1, 0xbb, 0xc1, 0x4b, 0x0b, 0xc3, 0x77, 0xe0, 0x1b, 0x0c, 0x33, 0xdc,
-  0x10, 0xe8, 0xb3, 0x40, 0x06, 0x35, 0x04, 0x3a, 0x1c, 0xc1, 0x2f, 0x33,
-  0x2d, 0x0c, 0x5f, 0x05, 0x82, 0x9e, 0xbf, 0x0c, 0x33, 0xdc, 0x10, 0xf4,
-  0xb3, 0x40, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0xd0, 0x3e, 0x22, 0x14, 0x5c,
-  0x38, 0x0b, 0xc3, 0x9c, 0xfc, 0x0a, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08,
-  0x82, 0x81, 0x06, 0xda, 0x82, 0x5b, 0x0b, 0x2b, 0x2d, 0xf0, 0xb5, 0x30,
-  0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09,
-  0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0x9d,
-  0xb6, 0x50, 0xd7, 0xc2, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0x70, 0xa1, 0xb6, 0x60, 0xd7, 0x02, 0x43, 0x04, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0x70, 0xa5, 0xb6, 0x70, 0xd7, 0x82, 0x44, 0x04, 0x23, 0x06,
-  0x0a, 0x00, 0x82, 0x60, 0xa0, 0xc0, 0xb6, 0x60, 0xd7, 0x42, 0x4d, 0x0b,
-  0xc1, 0x68, 0x0b, 0x66, 0x2d, 0x84, 0xb6, 0x30, 0x9a, 0x10, 0x00, 0xc3,
-  0x0d, 0x41, 0x69, 0x0b, 0x61, 0x70, 0x81, 0x53, 0xb3, 0x04, 0x22, 0x34,
-  0xdc, 0x10, 0xef, 0x81, 0x6a, 0x0b, 0x60, 0x30, 0xcb, 0xf0, 0x3e, 0xf0,
-  0x13, 0x54, 0x4c, 0x0b, 0x7a, 0x2d, 0xc0, 0x05, 0x4e, 0x8d, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0xc1, 0x34, 0xdb, 0xc2, 0x5e, 0x0b, 0xfa, 0x1b, 0xa8,
-  0xb5, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x13, 0x6d, 0x0b, 0x7b,
-  0x2d, 0x04, 0xc2, 0x05, 0xc3, 0x14, 0x4d, 0x0b, 0x7f, 0x2d, 0xc0, 0x05,
-  0x4e, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x84, 0xdb, 0x02, 0x68,
-  0x0b, 0x38, 0xf3, 0xd6, 0xc2, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c,
-  0xb9, 0x2d, 0x80, 0xb6, 0x10, 0x08, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x77,
-  0x38, 0x75, 0xfd, 0x2c, 0x0c, 0x73, 0x2e, 0x2c, 0x0c, 0x73, 0xc4, 0x30,
-  0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x9a, 0x6f, 0x0b,
-  0xac, 0x2d, 0xa4, 0xb5, 0xa0, 0xdb, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x55, 0xde, 0xc2, 0x6c, 0x0b, 0x09,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x65, 0xde, 0x02, 0x6d,
-  0x0b, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x75, 0xde,
-  0x42, 0x6d, 0x0b, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0x81,
-  0xe2, 0xde, 0x02, 0x6d, 0x0b, 0x73, 0x2d, 0x04, 0xe1, 0x2d, 0x90, 0xb6,
-  0xf0, 0xdb, 0xc2, 0x68, 0x42, 0x00, 0x0c, 0x37, 0x04, 0xe3, 0x2d, 0x84,
-  0xc1, 0x05, 0x4e, 0xcd, 0x12, 0x88, 0xd0, 0x70, 0xc3, 0xcb, 0x07, 0xe7,
-  0x2d, 0x80, 0xc1, 0x2c, 0x43, 0xfc, 0x88, 0x50, 0x60, 0x66, 0x2d, 0xa0,
-  0xb5, 0x10, 0x9f, 0xe1, 0x88, 0x1b, 0x0e, 0xd2, 0x5a, 0x20, 0xbe, 0x59,
-  0x06, 0xf9, 0xa9, 0x9f, 0xc0, 0xd4, 0x5a, 0xc0, 0xe1, 0x20, 0x3e, 0x16,
-  0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2, 0xc0, 0x90, 0x8f, 0x15,
-  0x41, 0x7c, 0x8a, 0x88, 0x6f, 0x41, 0x87, 0x1b, 0x82, 0xf7, 0x16, 0xc0,
-  0x60, 0x96, 0x61, 0x7e, 0xe8, 0x27, 0xb0, 0x41, 0xae, 0x05, 0xf8, 0xcc,
-  0x12, 0xe4, 0x8f, 0xc5, 0xb5, 0x40, 0xc4, 0x67, 0x96, 0x20, 0x7f, 0x86,
-  0x23, 0xc4, 0x38, 0x90, 0x6b, 0x41, 0xf8, 0x66, 0x19, 0xec, 0x27, 0x7f,
-  0x02, 0x1b, 0xe3, 0x60, 0xae, 0x85, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82,
-  0x61, 0x2e, 0x70, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x82,
-  0xbf, 0x05, 0x1d, 0x6e, 0x08, 0xf4, 0x5b, 0x00, 0x83, 0x59, 0x86, 0xfb,
-  0xc1, 0x9f, 0xc0, 0xf6, 0x5a, 0x18, 0xe2, 0x33, 0x4b, 0x90, 0x3f, 0x46,
-  0xf8, 0xb5, 0x00, 0x9f, 0x59, 0x82, 0xfc, 0x19, 0x68, 0x71, 0xb4, 0xf9,
-  0xc1, 0xe8, 0x87, 0xb8, 0x1f, 0x01, 0x7f, 0x74, 0x71, 0xa8, 0x9f, 0x0b,
-  0x86, 0xb1, 0xbe, 0x16, 0x42, 0x5b, 0x88, 0xcf, 0x70, 0x44, 0xea, 0x88,
-  0xb6, 0x40, 0x7c, 0xb3, 0x0c, 0xfa, 0xd3, 0x3f, 0x81, 0x8d, 0xb6, 0xa0,
-  0x3a, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94, 0x05,
-  0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84, 0x8a, 0x0b, 0x3a, 0xdc, 0x10,
-  0xa0, 0xb8, 0x00, 0x06, 0xb3, 0x0c, 0xfb, 0xc3, 0x3f, 0x81, 0x0d, 0xab,
-  0x2d, 0xc0, 0x67, 0x96, 0x20, 0x84, 0x0c, 0xb5, 0x05, 0x22, 0x3e, 0xb3,
-  0x04, 0x21, 0x34, 0x1c, 0x41, 0x3b, 0xa9, 0x2d, 0x08, 0xdf, 0x2c, 0x83,
-  0xff, 0x84, 0x50, 0x60, 0xb5, 0xa3, 0xda, 0x42, 0x7c, 0x2c, 0x70, 0xe8,
-  0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8,
-  0x14, 0x51, 0xe3, 0x82, 0x0e, 0x37, 0x04, 0x33, 0x2e, 0x80, 0xc1, 0x2c,
-  0xc3, 0xff, 0x80, 0x50, 0x60, 0xb2, 0x2d, 0x0c, 0xf1, 0x99, 0x25, 0x08,
-  0x21, 0x23, 0x6e, 0x5b, 0x80, 0xcf, 0x2c, 0x41, 0x08, 0x0d, 0xb4, 0x38,
-  0xda, 0xfe, 0x60, 0xfc, 0x43, 0xfc, 0x8f, 0x00, 0x42, 0x7e, 0xd7, 0x3f,
-  0x17, 0x0c, 0x73, 0x81, 0x53, 0xb7, 0x39, 0x75, 0xa3, 0x2d, 0x0c, 0x73,
-  0xb4, 0x2c, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00,
-  0x20, 0x08, 0x06, 0x1a, 0x99, 0x0b, 0x32, 0x2e, 0xbc, 0xb7, 0x00, 0xe6,
-  0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c,
-  0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1,
-  0xb5, 0xe6, 0x42, 0x8e, 0x0b, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0xc1, 0xc5, 0xe6, 0x82, 0x8e, 0x0b, 0x09, 0x11, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0xc1, 0xd5, 0xe6, 0xc2, 0x8e, 0x0b, 0x09, 0x11, 0x8c,
-  0x18, 0x28, 0x00, 0x08, 0x82, 0x81, 0x42, 0xe7, 0x82, 0x8e, 0x0b, 0xf9,
-  0x2d, 0x04, 0x67, 0x2e, 0xa8, 0xb8, 0x50, 0xe6, 0xc2, 0x68, 0x42, 0x00,
-  0x0c, 0x37, 0x04, 0x69, 0x2e, 0x84, 0xc1, 0x05, 0x4e, 0xcd, 0x12, 0x88,
-  0xd0, 0x40, 0x8b, 0xa3, 0x1a, 0xed, 0x63, 0xc3, 0x05, 0xfb, 0xb8, 0xc4,
-  0xfb, 0x08, 0x21, 0x64, 0xc3, 0x05, 0xfc, 0x8c, 0x18, 0x18, 0x00, 0x08,
-  0x82, 0xc1, 0x73, 0xe7, 0x82, 0x8d, 0x0b, 0xe7, 0x2c, 0x8c, 0x18, 0x18,
-  0x00, 0x08, 0x82, 0xc1, 0x83, 0xe7, 0xc2, 0x8d, 0x0b, 0xe7, 0x2c, 0x58,
-  0x10, 0xc8, 0xc7, 0x02, 0x41, 0x3e, 0xc6, 0xeb, 0x81, 0x8b, 0x0b, 0xf2,
-  0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x68, 0xcf, 0x05, 0x30, 0x17,
-  0x5c, 0x5c, 0x90, 0xb7, 0xc0, 0x7c, 0x3d, 0x70, 0x71, 0x41, 0x3e, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0x10, 0xf5, 0xb9, 0x20, 0xe6, 0x42, 0x8b,
-  0x0b, 0xe9, 0x1a, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x10, 0xf9,
-  0xb9, 0x30, 0xe6, 0x02, 0x8c, 0x0b, 0xf5, 0x16, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0x41, 0xf4, 0xe7, 0x02, 0x99, 0x0b, 0x33, 0x2e, 0x84, 0x4c,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x11, 0xa8, 0x0b, 0x65, 0x2e,
-  0xbc, 0xb8, 0xc0, 0xae, 0x81, 0x31, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x51, 0xa8, 0x0b, 0x66, 0x2e, 0xbc, 0xb8, 0x80, 0x6f, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x44, 0xa2, 0x2e, 0x9c, 0xb9, 0x50, 0xe3, 0x02,
-  0xc9, 0x04, 0x23, 0x06, 0x0d, 0x00, 0x82, 0x60, 0x50, 0x85, 0xba, 0x60,
-  0xe6, 0x02, 0x8e, 0x0b, 0xcc, 0xa2, 0xbc, 0x6b, 0x80, 0x10, 0x81, 0xa5,
-  0x77, 0x80, 0xe3, 0x82, 0x7c, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x20,
-  0x2a, 0x75, 0x41, 0xcd, 0x05, 0x1c, 0x17, 0xfe, 0x2b, 0xb0, 0xf5, 0x0e,
-  0x70, 0x5c, 0x90, 0xcf, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x44, 0xa7,
-  0x2e, 0xb0, 0xb9, 0x70, 0xe3, 0x82, 0x7d, 0x06, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x44, 0xa8, 0x2e, 0xb4, 0xb9, 0xa0, 0xe3, 0x82, 0x88,
-  0x05, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x10, 0xa5, 0xba, 0xe0, 0xe6,
-  0x42, 0x8f, 0x0b, 0x2e, 0x12, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41,
-  0xa4, 0xea, 0xc2, 0x9b, 0x0b, 0x39, 0x2e, 0xe4, 0x67, 0x60, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0x41, 0xb4, 0xea, 0x02, 0x9c, 0x0b, 0x39, 0x2e,
-  0x94, 0x58, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x11, 0xab, 0x0b,
-  0x71, 0x2e, 0xfc, 0xb8, 0x10, 0x23, 0xc1, 0x88, 0x41, 0x03, 0x80, 0x20,
-  0x18, 0x54, 0xab, 0x2e, 0xc0, 0xb9, 0x20, 0xe6, 0x82, 0x55, 0x51, 0xfc,
-  0x19, 0x20, 0x44, 0x60, 0xf5, 0x1c, 0x88, 0xb9, 0x20, 0x9f, 0x11, 0x03,
-  0x04, 0x00, 0x41, 0x30, 0x88, 0x5e, 0x5d, 0xa0, 0x73, 0x41, 0xcc, 0x05,
-  0x96, 0x0a, 0xec, 0x9e, 0x03, 0x31, 0x17, 0xe4, 0x33, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x51, 0xac, 0x0b, 0x76, 0x2e, 0x84, 0xb9, 0x30, 0x92,
-  0x41, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x91, 0xac, 0x0b, 0x77,
-  0x2e, 0x90, 0xb9, 0xf0, 0x52, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x44, 0xb3, 0x2e, 0xe0, 0xb9, 0x70, 0xe6, 0xc2, 0x4e, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x10, 0xd1, 0xba, 0x90, 0xe7, 0xc2, 0x98, 0x0b,
-  0x26, 0x19, 0x18, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x10, 0xd5, 0xba,
-  0xa0, 0xe7, 0xc2, 0x98, 0x0b, 0x32, 0x15, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0x41, 0x64, 0xeb, 0xc2, 0x9e, 0x0b, 0x69, 0x2e, 0xf8, 0x44, 0x30,
-  0x62, 0xd0, 0x00, 0x20, 0x08, 0x06, 0x55, 0xad, 0x0b, 0x7a, 0x2e, 0xb0,
-  0xb9, 0x00, 0x06, 0x9f, 0x97, 0x92, 0x01, 0x42, 0x04, 0x16, 0xc2, 0x01,
-  0x9b, 0x0b, 0xf2, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x28, 0xd7,
-  0x05, 0x3f, 0x17, 0xd8, 0x5c, 0xc8, 0xa1, 0xc0, 0x46, 0x38, 0x60, 0x73,
-  0x41, 0x3e, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x10, 0xed, 0xba, 0x00,
-  0xea, 0xc2, 0x9a, 0x0b, 0x30, 0x18, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0x10, 0xf1, 0xba, 0x10, 0xea, 0x82, 0x9b, 0x0b, 0x3c, 0x14, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0x41, 0xd4, 0xeb, 0x82, 0xa8, 0x0b, 0x71,
-  0x2e, 0xa0, 0x41, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x91, 0xaf,
-  0x0b, 0xa3, 0x2e, 0xb4, 0xb9, 0x30, 0x83, 0x81, 0x31, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0xd1, 0xaf, 0x0b, 0xa4, 0x2e, 0xb4, 0xb9, 0xf0, 0x43,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x44, 0xe0, 0x2e, 0x94, 0xba,
-  0x30, 0xe7, 0xc2, 0x1a, 0x04, 0x23, 0x06, 0x0d, 0x00, 0x82, 0x60, 0x50,
-  0xfd, 0xba, 0x40, 0xea, 0x82, 0x9d, 0x0b, 0x6a, 0x90, 0x06, 0x68, 0x60,
-  0x83, 0x01, 0x42, 0x04, 0xc6, 0x06, 0x6c, 0x20, 0x1f, 0x0b, 0xda, 0x40,
-  0x3e, 0x16, 0x06, 0x78, 0x2e, 0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40, 0x10,
-  0x0c, 0xa2, 0x72, 0x17, 0x54, 0x5d, 0xc0, 0x73, 0xc1, 0x09, 0x6c, 0x0c,
-  0xf0, 0x5c, 0x90, 0xcf, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x44, 0xe7,
-  0x2e, 0xb0, 0xba, 0x70, 0xe7, 0x82, 0x16, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0x41, 0x84, 0xee, 0x42, 0xab, 0x0b, 0x7a, 0x2e, 0x44, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x44, 0xe9, 0x2e, 0xb8, 0xba, 0xd0, 0xe7,
-  0x02, 0x12, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41, 0xa4, 0xee, 0xc2,
-  0xab, 0x0b, 0x79, 0x2e, 0x74, 0xc6, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x44, 0xeb, 0x2e, 0xc0, 0xba, 0x90, 0xe7, 0x02, 0x15, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0x41, 0xc4, 0xee, 0x42, 0xac, 0x0b, 0x7f, 0x2e, 0x2c,
-  0xc1, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x54, 0xeb, 0x2e, 0xc0, 0xba,
-  0x20, 0xea, 0xc2, 0x1d, 0x2c, 0x0a, 0x18, 0x20, 0x44, 0x30, 0x1c, 0x11,
-  0x90, 0xba, 0xe0, 0x7c, 0x17, 0x0c, 0x31, 0x62, 0xe0, 0x00, 0x20, 0x08,
-  0x06, 0x8c, 0xbc, 0x0b, 0xaf, 0x2e, 0xe8, 0xb9, 0x30, 0xe7, 0x82, 0xb9,
-  0x0b, 0x01, 0xad, 0x0b, 0xb4, 0x2e, 0xd0, 0xba, 0x90, 0xea, 0x02, 0xb9,
-  0x0b, 0xb3, 0x04, 0x23, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-};
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_bool_float.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_bool_float.h
deleted file mode 100644
index 1c13b659dc2e6..0000000000000
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_bool_float.h
+++ /dev/null
@@ -1,11305 +0,0 @@
-#if 0
-//
-// Generated by Microsoft (R) D3D Shader Disassembler
-//
-//
-// Input signature:
-//
-// Name                 Index   Mask Register SysValue  Format   Used
-// -------------------- ----- ------ -------- -------- ------- ------
-// no Input
-//
-// Output signature:
-//
-// Name                 Index   Mask Register SysValue  Format   Used
-// -------------------- ----- ------ -------- -------- ------- ------
-// no Output
-cs_5_0
-dcl_globalFlags refactoringAllowed
-dcl_constantbuffer CB0[8], immediateIndexed
-dcl_uav_structured u0, 4
-dcl_uav_structured u1, 4
-dcl_uav_structured u2, 4
-dcl_input vThreadID.x
-dcl_temps 12
-dcl_thread_group 64, 1, 1
-iadd r0.x, vThreadID.x, cb0[0].x
-ult r0.y, r0.x, cb0[0].y
-if_nz r0.y
-  utof r1.xyzw, cb0[1].wzwz
-  add r2.xyzw, r1.xxyy, l(-1.000000, -0.500000, -0.500000, -1.000000)
-  mov r3.x, l(0)
-  mov r3.yz, r2.xxwx
-  mov r2.x, l(-0.500000)
-  movc r0.yzw, cb0[7].xxxx, r3.xxyz, r2.xxyz
-  imul null, r1.x, cb0[5].z, cb0[5].w
-  imul null, r1.y, r1.x, cb0[5].y
-  udiv r1.y, null, r0.x, r1.y
-  imad r2.x, -r1.y, cb0[6].x, r0.x
-  udiv r1.x, null, r2.x, r1.x
-  imad r2.x, -r1.x, cb0[6].y, r2.x
-  udiv r2.y, null, r2.x, cb0[5].w
-  imad r2.x, -r2.y, cb0[6].z, r2.x
-  utof r3.x, r1.y
-  utof r3.yz, r2.yyxy
-  utof r2.xyzw, cb0[4].xyzw
-  dp3 r2.x, r3.xyzx, r2.xyzx
-  add r2.y, r2.w, r2.x
-  ftou r2.x, r2.x
-  ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r3.x, r2.x, l(0), u1.xxxx
-  ftou r2.x, r2.y
-  ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r3.y, r2.x, l(0), u1.xxxx
-  ieq r2.x, cb0[7].x, l(1)
-  add r2.yz, r3.xxyx, l(0.000000, 1.000000, 1.000000, 0.000000)
-  mul r3.xy, r2.yzyy, l(0.500000, 0.500000, 0.000000, 0.000000)
-  add r3.zw, r1.zzzw, l(0.000000, 0.000000, -1.000000, -1.000000)
-  mul r3.xy, r3.zwzz, r3.xyxx
-  mad r1.zw, r2.yyyz, r1.zzzw, l(0.000000, 0.000000, -1.000000, -1.000000)
-  mul r1.zw, r1.zzzw, l(0.000000, 0.000000, 0.500000, 0.500000)
-  movc r1.zw, r2.xxxx, r3.xxxy, r1.zzzw
-  ieq r2.x, cb0[0].z, l(1)
-  round_ne r2.yz, r1.zzwz
-  movc r1.zw, r2.xxxx, r2.yyyz, r1.zzzw
-  lt r2.yz, r1.zzwz, r0.yyyy
-  lt r3.xy, r0.zwzz, r1.zwzz
-  or r2.w, r2.y, r3.x
-  or r2.w, r2.z, r2.w
-  or r2.w, r3.y, r2.w
-  if_nz r2.w
-    ieq r2.w, cb0[0].w, l(1)
-    if_nz r2.w
-      iadd r3.zw, cb0[1].wwwz, l(0, 0, -1, -1)
-      utof r3.zw, r3.zzzw
-      max r4.xy, r1.zwzz, l(0.000000, 0.000000, 0.000000, 0.000000)
-      min r1.zw, r3.zzzw, r4.xxxy
-    else 
-      ieq r2.w, cb0[0].w, l(2)
-      if_nz r2.w
-        movc r2.w, cb0[7].x, l(-0.000000), l(0.500000)
-        add r3.zw, r0.zzzw, r2.wwww
-        add r4.xy, r0.yyyy, -r1.zwzz
-        div r4.zw, r4.xxxy, r3.zzzw
-        ftou r4.zw, r4.zzzw
-        utof r5.xy, r4.zwzz
-        mad r4.xy, -r5.xyxx, r3.zwzz, r4.xyxx
-        and r4.zw, r4.zzzw, l(0, 0, 1, 1)
-        add r5.xy, r0.yyyy, r4.xyxx
-        add r4.xy, r0.zwzz, -r4.xyxx
-        movc r4.xy, r4.zwzz, r4.xyxx, r5.xyxx
-        add r4.zw, -r0.zzzw, r1.zzzw
-        div r5.xy, r4.zwzz, r3.zwzz
-        ftou r5.xy, r5.xyxx
-        utof r5.zw, r5.xxxy
-        mad r3.zw, -r5.zzzw, r3.zzzw, r4.zzzw
-        and r4.zw, r5.xxxy, l(0, 0, 1, 1)
-        add r5.xy, r0.zwzz, -r3.zwzz
-        add r3.zw, r0.yyyy, r3.zzzw
-        movc r3.zw, r4.zzzw, r3.zzzw, r5.xxxy
-        movc r3.xy, r3.xyxx, r3.zwzz, r1.zwzz
-        movc r1.zw, r2.yyyz, r4.xxxy, r3.xxxy
-      endif 
-    endif 
-  endif 
-  utof r3.xy, r1.yxyy
-  if_nz r2.x
-    if_z cb0[0].w
-      ge r1.xy, r1.zwzz, l(0.000000, 0.000000, 0.000000, 0.000000)
-      ftou r2.xyzw, r1.zwwz
-      ult r2.xy, r2.xyxx, cb0[1].wzww
-      and r1.x, r1.x, r2.x
-      and r1.x, r1.y, r1.x
-      and r1.x, r2.y, r1.x
-      ftou r2.xy, r3.xyxx
-      imul null, r2.xy, r2.xyxx, cb0[2].xyxx
-      iadd r1.y, r2.y, r2.x
-      imad r1.y, r2.z, cb0[2].z, r1.y
-      imad r1.y, r2.w, cb0[2].w, r1.y
-      ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r1.y, r1.y, l(0), u0.xxxx
-      and r1.x, r1.x, r1.y
-      and r1.x, r1.x, l(0x3f800000)
-    else 
-      ieq r1.y, cb0[0].w, l(1)
-      if_nz r1.y
-        iadd r2.xy, cb0[1].zwzz, l(-1, -1, 0, 0)
-        utof r2.xy, r2.xyxx
-        max r2.zw, r1.wwwz, l(0.000000, 0.000000, 0.000000, 0.000000)
-        min r2.xy, r2.xyxx, r2.zwzz
-        ftou r2.xy, r2.xyxx
-        ftou r2.zw, r3.xxxy
-        imul null, r2.zw, r2.zzzw, cb0[2].xxxy
-        iadd r1.y, r2.w, r2.z
-        imad r1.y, r2.x, cb0[2].z, r1.y
-        imad r1.y, r2.y, cb0[2].w, r1.y
-        ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r1.y, r1.y, l(0), u0.xxxx
-        and r1.x, r1.y, l(0x3f800000)
-      else 
-        ieq r1.y, cb0[0].w, l(2)
-        if_nz r1.y
-          movc r1.y, cb0[7].x, l(-0.000000), l(0.500000)
-          add r2.xy, r0.wzww, r1.yyyy
-          lt r2.zw, r1.wwwz, r0.yyyy
-          add r3.zw, r0.yyyy, -r1.wwwz
-          div r4.xy, r3.zwzz, r2.xyxx
-          ftou r4.xy, r4.xyxx
-          utof r4.zw, r4.xxxy
-          mad r3.zw, -r4.zzzw, r2.xxxy, r3.zzzw
-          and r4.xy, r4.xyxx, l(1, 1, 0, 0)
-          add r4.zw, r0.yyyy, r3.zzzw
-          add r3.zw, r0.wwwz, -r3.zzzw
-          movc r3.zw, r4.xxxy, r3.zzzw, r4.zzzw
-          lt r4.xy, r0.wzww, r1.wzww
-          add r4.zw, -r0.wwwz, r1.wwwz
-          div r5.xy, r4.zwzz, r2.xyxx
-          ftou r5.xy, r5.xyxx
-          utof r5.zw, r5.xxxy
-          mad r2.xy, -r5.zwzz, r2.xyxx, r4.zwzz
-          and r4.zw, r5.xxxy, l(0, 0, 1, 1)
-          add r5.xy, r0.wzww, -r2.xyxx
-          add r2.xy, r0.yyyy, r2.xyxx
-          movc r2.xy, r4.zwzz, r2.xyxx, r5.xyxx
-          movc r2.xy, r4.xyxx, r2.xyxx, r1.wzww
-          movc r2.xy, r2.zwzz, r3.zwzz, r2.xyxx
-          ftou r2.xy, r2.xyxx
-          ftou r2.zw, r3.xxxy
-          imul null, r2.zw, r2.zzzw, cb0[2].xxxy
-          iadd r1.y, r2.w, r2.z
-          imad r1.y, r2.x, cb0[2].z, r1.y
-          imad r1.y, r2.y, cb0[2].w, r1.y
-          ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r1.y, r1.y, l(0), u0.xxxx
-          and r1.x, r1.y, l(0x3f800000)
-        else 
-          mov r1.x, l(0)
-        endif 
-      endif 
-    endif 
-    ne r1.x, l(0.000000, 0.000000, 0.000000, 0.000000), r1.x
-    store_structured u2.x, r0.x, l(0), r1.x
-  else 
-    if_z cb0[0].z
-      round_ni r1.xy, r1.wzww
-      add r2.xy, r1.xyxx, l(1.000000, 1.000000, 0.000000, 0.000000)
-      if_z cb0[0].w
-        ge r2.zw, r1.yyyx, l(0.000000, 0.000000, 0.000000, 0.000000)
-        ftou r3.zw, r1.yyyx
-        ult r3.zw, r3.zzzw, cb0[1].wwwz
-        and r2.z, r2.z, r3.z
-        and r3.z, r2.w, r2.z
-        and r3.z, r3.w, r3.z
-        ftou r4.xy, r3.xyxx
-        ftou r4.zw, r1.xxxy
-        imul null, r4.xy, r4.xyxx, cb0[2].xyxx
-        iadd r4.x, r4.y, r4.x
-        imad r4.x, r4.z, cb0[2].z, r4.x
-        imad r4.x, r4.w, cb0[2].w, r4.x
-        ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r4.x, r4.x, l(0), u0.xxxx
-        and r3.z, r3.z, r4.x
-        and r3.z, r3.z, l(0x3f800000)
-        ge r4.xy, r2.yxyy, l(0.000000, 0.000000, 0.000000, 0.000000)
-        ftou r4.zw, r2.yyyx
-        ult r4.zw, r4.zzzw, cb0[1].wwwz
-        and r4.x, r4.z, r4.x
-        and r2.w, r2.w, r4.x
-        and r2.w, r3.w, r2.w
-        ftou r5.xy, r3.xyxx
-        ftou r3.w, r1.x
-        ftou r4.z, r2.y
-        imul null, r5.xy, r5.xyxx, cb0[2].xyxx
-        iadd r5.x, r5.y, r5.x
-        imad r3.w, r3.w, cb0[2].z, r5.x
-        imad r3.w, r4.z, cb0[2].w, r3.w
-        ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r3.w, r3.w, l(0), u0.xxxx
-        and r2.w, r2.w, r3.w
-        and r2.w, r2.w, l(0x3f800000)
-        and r2.z, r2.z, r4.y
-        and r2.z, r4.w, r2.z
-        ftou r5.xy, r3.xyxx
-        ftou r3.w, r2.x
-        ftou r4.z, r1.y
-        imul null, r5.xy, r5.xyxx, cb0[2].xyxx
-        iadd r5.x, r5.y, r5.x
-        imad r3.w, r3.w, cb0[2].z, r5.x
-        imad r3.w, r4.z, cb0[2].w, r3.w
-        ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r3.w, r3.w, l(0), u0.xxxx
-        and r2.z, r2.z, r3.w
-        and r2.z, r2.z, l(0x3f800000)
-        and r3.w, r4.y, r4.x
-        and r3.w, r4.w, r3.w
-        ftou r4.xy, r3.xyxx
-        ftou r4.zw, r2.xxxy
-        imul null, r4.xy, r4.xyxx, cb0[2].xyxx
-        iadd r4.x, r4.y, r4.x
-        imad r4.x, r4.z, cb0[2].z, r4.x
-        imad r4.x, r4.w, cb0[2].w, r4.x
-        ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r4.x, r4.x, l(0), u0.xxxx
-        and r3.w, r3.w, r4.x
-        and r3.w, r3.w, l(0x3f800000)
-      else 
-        ieq r4.x, cb0[0].w, l(1)
-        if_nz r4.x
-          iadd r4.yz, cb0[1].zzwz, l(0, -1, -1, 0)
-          utof r4.yz, r4.yyzy
-          max r5.xy, r1.xyxx, l(0.000000, 0.000000, 0.000000, 0.000000)
-          min r4.yz, r4.yyzy, r5.xxyx
-          ftou r4.yz, r4.yyzy
-          ftou r5.xy, r3.xyxx
-          imul null, r5.xy, r5.xyxx, cb0[2].xyxx
-          iadd r4.w, r5.y, r5.x
-          imad r4.y, r4.y, cb0[2].z, r4.w
-          imad r4.y, r4.z, cb0[2].w, r4.y
-          ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r4.y, r4.y, l(0), u0.xxxx
-          and r3.z, r4.y, l(0x3f800000)
-        else 
-          ieq r4.y, cb0[0].w, l(2)
-          if_nz r4.y
-            movc r4.y, cb0[7].x, l(-0.000000), l(0.500000)
-            add r4.yz, r0.wwzw, r4.yyyy
-            lt r5.xy, r1.xyxx, r0.yyyy
-            add r5.zw, r0.yyyy, -r1.xxxy
-            div r6.xy, r5.zwzz, r4.yzyy
-            ftou r6.xy, r6.xyxx
-            utof r6.zw, r6.xxxy
-            mad r5.zw, -r6.zzzw, r4.yyyz, r5.zzzw
-            and r6.xy, r6.xyxx, l(1, 1, 0, 0)
-            add r6.zw, r0.yyyy, r5.zzzw
-            add r5.zw, r0.wwwz, -r5.zzzw
-            movc r5.zw, r6.xxxy, r5.zzzw, r6.zzzw
-            lt r6.xy, r0.wzww, r1.xyxx
-            add r6.zw, -r0.wwwz, r1.xxxy
-            div r7.xy, r6.zwzz, r4.yzyy
-            ftou r7.xy, r7.xyxx
-            utof r7.zw, r7.xxxy
-            mad r4.yz, -r7.zzwz, r4.yyzy, r6.zzwz
-            and r6.zw, r7.xxxy, l(0, 0, 1, 1)
-            add r7.xy, r0.wzww, -r4.yzyy
-            add r4.yz, r0.yyyy, r4.yyzy
-            movc r4.yz, r6.zzwz, r4.yyzy, r7.xxyx
-            movc r4.yz, r6.xxyx, r4.yyzy, r1.xxyx
-            movc r4.yz, r5.xxyx, r5.zzwz, r4.yyzy
-            ftou r4.yz, r4.yyzy
-            ftou r5.xy, r3.xyxx
-            imul null, r5.xy, r5.xyxx, cb0[2].xyxx
-            iadd r4.w, r5.y, r5.x
-            imad r4.y, r4.y, cb0[2].z, r4.w
-            imad r4.y, r4.z, cb0[2].w, r4.y
-            ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r4.y, r4.y, l(0), u0.xxxx
-            and r3.z, r4.y, l(0x3f800000)
-          else 
-            mov r3.z, l(0)
-          endif 
-        endif 
-        if_nz r4.x
-          iadd r4.yz, cb0[1].wwzw, l(0, -1, -1, 0)
-          utof r4.yz, r4.yyzy
-          max r4.w, r2.y, l(0.000000)
-          min r4.y, r4.y, r4.w
-          max r4.w, r1.x, l(0.000000)
-          min r4.z, r4.z, r4.w
-          ftou r4.yz, r4.yyzy
-          utof r4.zw, r4.zzzy
-          ftou r5.xy, r3.xyxx
-          ftou r4.yz, r4.zzwz
-          imul null, r5.xy, r5.xyxx, cb0[2].xyxx
-          iadd r4.w, r5.y, r5.x
-          imad r4.y, r4.y, cb0[2].z, r4.w
-          imad r4.y, r4.z, cb0[2].w, r4.y
-          ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r4.y, r4.y, l(0), u0.xxxx
-          and r2.w, r4.y, l(0x3f800000)
-        else 
-          ieq r4.y, cb0[0].w, l(2)
-          if_nz r4.y
-            movc r4.y, cb0[7].x, l(-0.000000), l(0.500000)
-            add r4.yz, r0.zzwz, r4.yyyy
-            lt r4.w, r2.y, r0.y
-            add r5.x, r0.y, -r2.y
-            div r5.y, r5.x, r4.y
-            ftou r5.y, r5.y
-            utof r5.z, r5.y
-            mad r5.x, -r5.z, r4.y, r5.x
-            and r5.y, r5.y, l(1)
-            add r5.z, r0.y, r5.x
-            add r5.x, r0.z, -r5.x
-            movc r5.x, r5.y, r5.x, r5.z
-            lt r5.y, r0.z, r2.y
-            add r5.z, -r0.z, r2.y
-            div r5.w, r5.z, r4.y
-            ftou r5.w, r5.w
-            utof r6.x, r5.w
-            mad r4.y, -r6.x, r4.y, r5.z
-            and r5.z, r5.w, l(1)
-            add r5.w, r0.z, -r4.y
-            add r4.y, r0.y, r4.y
-            movc r4.y, r5.z, r4.y, r5.w
-            movc r4.y, r5.y, r4.y, r2.y
-            movc r4.y, r4.w, r5.x, r4.y
-            ftou r4.y, r4.y
-            lt r4.w, r1.x, r0.y
-            add r5.x, r0.y, -r1.x
-            div r5.y, r5.x, r4.z
-            ftou r5.y, r5.y
-            utof r5.z, r5.y
-            mad r5.x, -r5.z, r4.z, r5.x
-            and r5.y, r5.y, l(1)
-            add r5.z, r0.y, r5.x
-            add r5.x, r0.w, -r5.x
-            movc r5.x, r5.y, r5.x, r5.z
-            lt r5.y, r0.w, r1.x
-            add r5.z, -r0.w, r1.x
-            div r5.w, r5.z, r4.z
-            ftou r5.w, r5.w
-            utof r6.x, r5.w
-            mad r4.z, -r6.x, r4.z, r5.z
-            and r5.z, r5.w, l(1)
-            add r5.w, r0.w, -r4.z
-            add r4.z, r0.y, r4.z
-            movc r4.z, r5.z, r4.z, r5.w
-            movc r1.x, r5.y, r4.z, r1.x
-            movc r1.x, r4.w, r5.x, r1.x
-            ftou r1.x, r1.x
-            utof r4.z, r1.x
-            utof r4.w, r4.y
-            ftou r5.xy, r3.xyxx
-            ftou r4.yz, r4.zzwz
-            imul null, r5.xy, r5.xyxx, cb0[2].xyxx
-            iadd r1.x, r5.y, r5.x
-            imad r1.x, r4.y, cb0[2].z, r1.x
-            imad r1.x, r4.z, cb0[2].w, r1.x
-            ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r1.x, r1.x, l(0), u0.xxxx
-            and r2.w, r1.x, l(0x3f800000)
-          else 
-            mov r2.w, l(0)
-          endif 
-        endif 
-        if_nz r4.x
-          iadd r4.yz, cb0[1].wwzw, l(0, -1, -1, 0)
-          utof r4.yz, r4.yyzy
-          max r1.x, r1.y, l(0.000000)
-          min r1.x, r4.y, r1.x
-          ftou r1.x, r1.x
-          max r4.y, r2.x, l(0.000000)
-          min r4.y, r4.z, r4.y
-          ftou r4.y, r4.y
-          utof r4.z, r4.y
-          utof r4.w, r1.x
-          ftou r5.xy, r3.xyxx
-          ftou r4.yz, r4.zzwz
-          imul null, r5.xy, r5.xyxx, cb0[2].xyxx
-          iadd r1.x, r5.y, r5.x
-          imad r1.x, r4.y, cb0[2].z, r1.x
-          imad r1.x, r4.z, cb0[2].w, r1.x
-          ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r1.x, r1.x, l(0), u0.xxxx
-          and r2.z, r1.x, l(0x3f800000)
-        else 
-          ieq r1.x, cb0[0].w, l(2)
-          if_nz r1.x
-            movc r1.x, cb0[7].x, l(-0.000000), l(0.500000)
-            add r4.yz, r0.zzwz, r1.xxxx
-            lt r1.x, r1.y, r0.y
-            add r4.w, r0.y, -r1.y
-            div r5.x, r4.w, r4.y
-            ftou r5.x, r5.x
-            utof r5.y, r5.x
-            mad r4.w, -r5.y, r4.y, r4.w
-            and r5.x, r5.x, l(1)
-            add r5.y, r0.y, r4.w
-            add r4.w, r0.z, -r4.w
-            movc r4.w, r5.x, r4.w, r5.y
-            lt r5.x, r0.z, r1.y
-            add r5.y, -r0.z, r1.y
-            div r5.z, r5.y, r4.y
-            ftou r5.z, r5.z
-            utof r5.w, r5.z
-            mad r4.y, -r5.w, r4.y, r5.y
-            and r5.y, r5.z, l(1)
-            add r5.z, r0.z, -r4.y
-            add r4.y, r0.y, r4.y
-            movc r4.y, r5.y, r4.y, r5.z
-            movc r1.y, r5.x, r4.y, r1.y
-            movc r1.x, r1.x, r4.w, r1.y
-            lt r1.y, r2.x, r0.y
-            add r4.y, r0.y, -r2.x
-            div r4.w, r4.y, r4.z
-            ftou r4.w, r4.w
-            utof r5.x, r4.w
-            mad r4.y, -r5.x, r4.z, r4.y
-            and r4.w, r4.w, l(1)
-            add r5.x, r0.y, r4.y
-            add r4.y, r0.w, -r4.y
-            movc r4.y, r4.w, r4.y, r5.x
-            lt r4.w, r0.w, r2.x
-            add r5.x, -r0.w, r2.x
-            div r5.y, r5.x, r4.z
-            ftou r5.y, r5.y
-            utof r5.z, r5.y
-            mad r4.z, -r5.z, r4.z, r5.x
-            and r5.x, r5.y, l(1)
-            add r5.y, r0.w, -r4.z
-            add r4.z, r0.y, r4.z
-            movc r4.z, r5.x, r4.z, r5.y
-            movc r4.z, r4.w, r4.z, r2.x
-            movc r1.y, r1.y, r4.y, r4.z
-            ftou r1.xy, r1.xyxx
-            utof r4.zw, r1.yyyx
-            ftou r1.xy, r3.xyxx
-            ftou r4.yz, r4.zzwz
-            imul null, r1.xy, r1.xyxx, cb0[2].xyxx
-            iadd r1.x, r1.y, r1.x
-            imad r1.x, r4.y, cb0[2].z, r1.x
-            imad r1.x, r4.z, cb0[2].w, r1.x
-            ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r1.x, r1.x, l(0), u0.xxxx
-            and r2.z, r1.x, l(0x3f800000)
-          else 
-            mov r2.z, l(0)
-          endif 
-        endif 
-        if_nz r4.x
-          iadd r1.xy, cb0[1].zwzz, l(-1, -1, 0, 0)
-          utof r1.xy, r1.xyxx
-          max r4.xy, r2.xyxx, l(0.000000, 0.000000, 0.000000, 0.000000)
-          min r1.xy, r1.xyxx, r4.xyxx
-          ftou r1.xy, r1.xyxx
-          ftou r4.xy, r3.xyxx
-          imul null, r4.xy, r4.xyxx, cb0[2].xyxx
-          iadd r4.x, r4.y, r4.x
-          imad r1.x, r1.x, cb0[2].z, r4.x
-          imad r1.x, r1.y, cb0[2].w, r1.x
-          ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r1.x, r1.x, l(0), u0.xxxx
-          and r3.w, r1.x, l(0x3f800000)
-        else 
-          ieq r1.x, cb0[0].w, l(2)
-          if_nz r1.x
-            movc r1.x, cb0[7].x, l(-0.000000), l(0.500000)
-            add r1.xy, r0.wzww, r1.xxxx
-            lt r4.xy, r2.xyxx, r0.yyyy
-            add r4.zw, r0.yyyy, -r2.xxxy
-            div r5.xy, r4.zwzz, r1.xyxx
-            ftou r5.xy, r5.xyxx
-            utof r5.zw, r5.xxxy
-            mad r4.zw, -r5.zzzw, r1.xxxy, r4.zzzw
-            and r5.xy, r5.xyxx, l(1, 1, 0, 0)
-            add r5.zw, r0.yyyy, r4.zzzw
-            add r4.zw, r0.wwwz, -r4.zzzw
-            movc r4.zw, r5.xxxy, r4.zzzw, r5.zzzw
-            lt r5.xy, r0.wzww, r2.xyxx
-            add r5.zw, -r0.wwwz, r2.xxxy
-            div r6.xy, r5.zwzz, r1.xyxx
-            ftou r6.xy, r6.xyxx
-            utof r6.zw, r6.xxxy
-            mad r1.xy, -r6.zwzz, r1.xyxx, r5.zwzz
-            and r5.zw, r6.xxxy, l(0, 0, 1, 1)
-            add r6.xy, r0.wzww, -r1.xyxx
-            add r1.xy, r0.yyyy, r1.xyxx
-            movc r1.xy, r5.zwzz, r1.xyxx, r6.xyxx
-            movc r1.xy, r5.xyxx, r1.xyxx, r2.xyxx
-            movc r1.xy, r4.xyxx, r4.zwzz, r1.xyxx
-            ftou r1.xy, r1.xyxx
-            ftou r2.xy, r3.xyxx
-            imul null, r2.xy, r2.xyxx, cb0[2].xyxx
-            iadd r2.x, r2.y, r2.x
-            imad r1.x, r1.x, cb0[2].z, r2.x
-            imad r1.x, r1.y, cb0[2].w, r1.x
-            ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r1.x, r1.x, l(0), u0.xxxx
-            and r3.w, r1.x, l(0x3f800000)
-          else 
-            mov r3.w, l(0)
-          endif 
-        endif 
-      endif 
-      frc r1.xy, r1.zwzz
-      add r2.x, r2.w, -r3.z
-      mad r2.x, r1.x, r2.x, r3.z
-      add r2.y, -r2.z, r3.w
-      mad r1.x, r1.x, r2.y, r2.z
-      add r1.x, -r2.x, r1.x
-      mad r1.x, r1.y, r1.x, r2.x
-      ne r1.x, l(0.000000, 0.000000, 0.000000, 0.000000), r1.x
-      store_structured u2.x, r0.x, l(0), r1.x
-    else 
-      ieq r1.x, cb0[0].z, l(2)
-      if_nz r1.x
-        round_ni r1.xy, r1.wzww
-        add r2.xyzw, r1.yxyy, l(2.000000, -1.000000, -1.000000, 1.000000)
-        if_z cb0[0].w
-          ge r3.zw, r2.zzzy, l(0.000000, 0.000000, 0.000000, 0.000000)
-          ftou r4.xy, r2.zyzz
-          ult r4.xy, r4.xyxx, cb0[1].wzww
-          and r3.z, r3.z, r4.x
-          and r3.z, r3.w, r3.z
-          and r3.z, r4.y, r3.z
-          ftou r4.xz, r3.xxyx
-          ftou r5.xy, r2.yzyy
-          imul null, r4.xz, r4.xxzx, cb0[2].xxyx
-          iadd r4.x, r4.z, r4.x
-          imad r4.x, r5.x, cb0[2].z, r4.x
-          imad r4.x, r5.y, cb0[2].w, r4.x
-          ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r4.x, r4.x, l(0), u0.xxxx
-          and r3.z, r3.z, r4.x
-          and r5.x, r3.z, l(0x3f800000)
-          ge r3.z, r1.y, l(0.000000)
-          ftou r4.xz, r1.yyyy
-          ult r4.x, r4.x, cb0[1].w
-          and r3.z, r3.z, r4.x
-          and r3.z, r3.w, r3.z
-          and r3.z, r4.y, r3.z
-          ftou r4.xy, r3.xyxx
-          ftou r3.w, r2.y
-          imul null, r4.xy, r4.xyxx, cb0[2].xyxx
-          iadd r4.x, r4.y, r4.x
-          imad r3.w, r3.w, cb0[2].z, r4.x
-          imad r3.w, r4.z, cb0[2].w, r3.w
-          ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r3.w, r3.w, l(0), u0.xxxx
-          and r3.z, r3.z, r3.w
-          and r4.x, r3.z, l(0x3f800000)
-        else 
-          ieq r3.z, cb0[0].w, l(1)
-          if_nz r3.z
-            iadd r6.xy, cb0[1].zwzz, l(-1, -1, 0, 0)
-            utof r6.xy, r6.xyxx
-            max r6.zw, r2.yyyz, l(0.000000, 0.000000, 0.000000, 0.000000)
-            min r6.xy, r6.xyxx, r6.zwzz
-            ftou r6.xy, r6.xyxx
-            ftou r6.zw, r3.xxxy
-            imul null, r6.zw, r6.zzzw, cb0[2].xxxy
-            iadd r3.w, r6.w, r6.z
-            imad r3.w, r6.x, cb0[2].z, r3.w
-            imad r3.w, r6.y, cb0[2].w, r3.w
-            ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r3.w, r3.w, l(0), u0.xxxx
-            and r5.x, r3.w, l(0x3f800000)
-          else 
-            ieq r3.w, cb0[0].w, l(2)
-            if_nz r3.w
-              movc r3.w, cb0[7].x, l(-0.000000), l(0.500000)
-              add r6.xy, r0.wzww, r3.wwww
-              lt r6.zw, r2.yyyz, r0.yyyy
-              add r7.xy, r0.yyyy, -r2.yzyy
-              div r7.zw, r7.xxxy, r6.xxxy
-              ftou r7.zw, r7.zzzw
-              utof r8.xy, r7.zwzz
-              mad r7.xy, -r8.xyxx, r6.xyxx, r7.xyxx
-              and r7.zw, r7.zzzw, l(0, 0, 1, 1)
-              add r8.xy, r0.yyyy, r7.xyxx
-              add r7.xy, r0.wzww, -r7.xyxx
-              movc r7.xy, r7.zwzz, r7.xyxx, r8.xyxx
-              lt r7.zw, r0.wwwz, r2.yyyz
-              add r8.xy, -r0.wzww, r2.yzyy
-              div r8.zw, r8.xxxy, r6.xxxy
-              ftou r8.zw, r8.zzzw
-              utof r9.xy, r8.zwzz
-              mad r6.xy, -r9.xyxx, r6.xyxx, r8.xyxx
-              and r8.xy, r8.zwzz, l(1, 1, 0, 0)
-              add r8.zw, r0.wwwz, -r6.xxxy
-              add r6.xy, r0.yyyy, r6.xyxx
-              movc r6.xy, r8.xyxx, r6.xyxx, r8.zwzz
-              movc r6.xy, r7.zwzz, r6.xyxx, r2.yzyy
-              movc r6.xy, r6.zwzz, r7.xyxx, r6.xyxx
-              ftou r6.xy, r6.xyxx
-              ftou r6.zw, r3.xxxy
-              imul null, r6.zw, r6.zzzw, cb0[2].xxxy
-              iadd r3.w, r6.w, r6.z
-              imad r3.w, r6.x, cb0[2].z, r3.w
-              imad r3.w, r6.y, cb0[2].w, r3.w
-              ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r3.w, r3.w, l(0), u0.xxxx
-              and r5.x, r3.w, l(0x3f800000)
-            else 
-              mov r5.x, l(0)
-            endif 
-          endif 
-          if_nz r3.z
-            iadd r3.zw, cb0[1].wwwz, l(0, 0, -1, -1)
-            utof r3.zw, r3.zzzw
-            max r6.x, r1.y, l(0.000000)
-            min r3.z, r3.z, r6.x
-            max r6.x, r2.y, l(0.000000)
-            min r3.w, r3.w, r6.x
-            ftou r3.zw, r3.zzzw
-            utof r6.zw, r3.wwwz
-            ftou r3.zw, r3.xxxy
-            ftou r6.xy, r6.zwzz
-            imul null, r3.zw, r3.zzzw, cb0[2].xxxy
-            iadd r3.z, r3.w, r3.z
-            imad r3.z, r6.x, cb0[2].z, r3.z
-            imad r3.z, r6.y, cb0[2].w, r3.z
-            ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r3.z, r3.z, l(0), u0.xxxx
-            and r4.x, r3.z, l(0x3f800000)
-          else 
-            ieq r3.z, cb0[0].w, l(2)
-            if_nz r3.z
-              movc r3.z, cb0[7].x, l(-0.000000), l(0.500000)
-              add r3.zw, r0.zzzw, r3.zzzz
-              lt r6.x, r1.y, r0.y
-              add r6.y, r0.y, -r1.y
-              div r6.z, r6.y, r3.z
-              ftou r6.z, r6.z
-              utof r6.w, r6.z
-              mad r6.y, -r6.w, r3.z, r6.y
-              and r6.z, r6.z, l(1)
-              add r6.w, r0.y, r6.y
-              add r6.y, r0.z, -r6.y
-              movc r6.y, r6.z, r6.y, r6.w
-              lt r6.z, r0.z, r1.y
-              add r6.w, -r0.z, r1.y
-              div r7.x, r6.w, r3.z
-              ftou r7.x, r7.x
-              utof r7.y, r7.x
-              mad r3.z, -r7.y, r3.z, r6.w
-              and r6.w, r7.x, l(1)
-              add r7.x, r0.z, -r3.z
-              add r3.z, r0.y, r3.z
-              movc r3.z, r6.w, r3.z, r7.x
-              movc r3.z, r6.z, r3.z, r1.y
-              movc r3.z, r6.x, r6.y, r3.z
-              lt r6.x, r2.y, r0.y
-              add r6.y, r0.y, -r2.y
-              div r6.z, r6.y, r3.w
-              ftou r6.z, r6.z
-              utof r6.w, r6.z
-              mad r6.y, -r6.w, r3.w, r6.y
-              and r6.z, r6.z, l(1)
-              add r6.w, r0.y, r6.y
-              add r6.y, r0.w, -r6.y
-              movc r6.y, r6.z, r6.y, r6.w
-              lt r6.z, r0.w, r2.y
-              add r6.w, -r0.w, r2.y
-              div r7.x, r6.w, r3.w
-              ftou r7.x, r7.x
-              utof r7.y, r7.x
-              mad r3.w, -r7.y, r3.w, r6.w
-              and r6.w, r7.x, l(1)
-              add r7.x, r0.w, -r3.w
-              add r3.w, r0.y, r3.w
-              movc r3.w, r6.w, r3.w, r7.x
-              movc r3.w, r6.z, r3.w, r2.y
-              movc r3.w, r6.x, r6.y, r3.w
-              ftou r3.zw, r3.zzzw
-              utof r6.zw, r3.wwwz
-              ftou r3.zw, r3.xxxy
-              ftou r6.xy, r6.zwzz
-              imul null, r3.zw, r3.zzzw, cb0[2].xxxy
-              iadd r3.z, r3.w, r3.z
-              imad r3.z, r6.x, cb0[2].z, r3.z
-              imad r3.z, r6.y, cb0[2].w, r3.z
-              ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r3.z, r3.z, l(0), u0.xxxx
-              and r4.x, r3.z, l(0x3f800000)
-            else 
-              mov r4.x, l(0)
-            endif 
-          endif 
-        endif 
-        if_z cb0[0].w
-          ge r3.zw, r2.wwwy, l(0.000000, 0.000000, 0.000000, 0.000000)
-          ftou r6.xyzw, r2.wyyw
-          ult r6.xy, r6.xyxx, cb0[1].wzww
-          and r3.z, r3.z, r6.x
-          and r3.z, r3.w, r3.z
-          and r3.z, r6.y, r3.z
-          ftou r6.xy, r3.xyxx
-          imul null, r6.xy, r6.xyxx, cb0[2].xyxx
-          iadd r3.w, r6.y, r6.x
-          imad r3.w, r6.z, cb0[2].z, r3.w
-          imad r3.w, r6.w, cb0[2].w, r3.w
-          ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r3.w, r3.w, l(0), u0.xxxx
-          and r3.z, r3.z, r3.w
-          and r6.x, r3.z, l(0x3f800000)
-        else 
-          ieq r3.z, cb0[0].w, l(1)
-          if_nz r3.z
-            iadd r3.zw, cb0[1].zzzw, l(0, 0, -1, -1)
-            utof r3.zw, r3.zzzw
-            max r7.xy, r2.ywyy, l(0.000000, 0.000000, 0.000000, 0.000000)
-            min r3.zw, r3.zzzw, r7.xxxy
-            ftou r3.zw, r3.zzzw
-            ftou r7.xy, r3.xyxx
-            imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-            iadd r7.x, r7.y, r7.x
-            imad r3.z, r3.z, cb0[2].z, r7.x
-            imad r3.z, r3.w, cb0[2].w, r3.z
-            ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r3.z, r3.z, l(0), u0.xxxx
-            and r6.x, r3.z, l(0x3f800000)
-          else 
-            ieq r3.z, cb0[0].w, l(2)
-            if_nz r3.z
-              movc r3.z, cb0[7].x, l(-0.000000), l(0.500000)
-              add r3.zw, r0.wwwz, r3.zzzz
-              lt r7.xy, r2.ywyy, r0.yyyy
-              add r7.zw, r0.yyyy, -r2.yyyw
-              div r8.xy, r7.zwzz, r3.zwzz
-              ftou r8.xy, r8.xyxx
-              utof r8.zw, r8.xxxy
-              mad r7.zw, -r8.zzzw, r3.zzzw, r7.zzzw
-              and r8.xy, r8.xyxx, l(1, 1, 0, 0)
-              add r8.zw, r0.yyyy, r7.zzzw
-              add r7.zw, r0.wwwz, -r7.zzzw
-              movc r7.zw, r8.xxxy, r7.zzzw, r8.zzzw
-              lt r8.xy, r0.wzww, r2.ywyy
-              add r8.zw, -r0.wwwz, r2.yyyw
-              div r9.xy, r8.zwzz, r3.zwzz
-              ftou r9.xy, r9.xyxx
-              utof r9.zw, r9.xxxy
-              mad r3.zw, -r9.zzzw, r3.zzzw, r8.zzzw
-              and r8.zw, r9.xxxy, l(0, 0, 1, 1)
-              add r9.xy, r0.wzww, -r3.zwzz
-              add r3.zw, r0.yyyy, r3.zzzw
-              movc r3.zw, r8.zzzw, r3.zzzw, r9.xxxy
-              movc r3.zw, r8.xxxy, r3.zzzw, r2.yyyw
-              movc r3.zw, r7.xxxy, r7.zzzw, r3.zzzw
-              ftou r3.zw, r3.zzzw
-              ftou r7.xy, r3.xyxx
-              imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-              iadd r7.x, r7.y, r7.x
-              imad r3.z, r3.z, cb0[2].z, r7.x
-              imad r3.z, r3.w, cb0[2].w, r3.z
-              ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r3.z, r3.z, l(0), u0.xxxx
-              and r6.x, r3.z, l(0x3f800000)
-            else 
-              mov r6.x, l(0)
-            endif 
-          endif 
-        endif 
-        if_z cb0[0].w
-          ge r7.xyzw, r2.xyzw, l(0.000000, 0.000000, 0.000000, 0.000000)
-          ftou r8.xyzw, r2.xyzw
-          ult r8.xyzw, r8.xyzw, cb0[1].wzww
-          and r7.xzw, r7.zzwx, r8.zzwx
-          and r3.z, r7.y, r7.w
-          and r3.z, r8.y, r3.z
-          ftou r8.xy, r3.xyxx
-          ftou r8.zw, r2.yyyx
-          imul null, r8.xy, r8.xyxx, cb0[2].xyxx
-          iadd r3.w, r8.y, r8.x
-          imad r3.w, r8.z, cb0[2].z, r3.w
-          imad r3.w, r8.w, cb0[2].w, r3.w
-          ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r3.w, r3.w, l(0), u0.xxxx
-          and r3.z, r3.z, r3.w
-          and r8.x, r3.z, l(0x3f800000)
-          ge r3.zw, r1.xxxy, l(0.000000, 0.000000, 0.000000, 0.000000)
-          and r7.xyz, r3.zzzz, r7.xzwx
-          ftou r9.xy, r1.xyxx
-          ult r9.xy, r9.xyxx, cb0[1].zwzz
-          and r7.xyz, r7.xyzx, r9.xxxx
-          ftou r9.zw, r3.xxxy
-          ftou r7.w, r1.x
-          ftou r10.x, r2.z
-          imul null, r9.zw, r9.zzzw, cb0[2].xxxy
-          iadd r9.z, r9.w, r9.z
-          imad r7.w, r7.w, cb0[2].z, r9.z
-          imad r7.w, r10.x, cb0[2].w, r7.w
-          ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r7.w, r7.w, l(0), u0.xxxx
-          and r7.x, r7.x, r7.w
-          and r5.z, r7.x, l(0x3f800000)
-          and r3.w, r3.w, r9.y
-          and r3.z, r3.z, r3.w
-          and r3.z, r9.x, r3.z
-          ftou r7.xw, r3.xxxy
-          ftou r9.xy, r1.xyxx
-          imul null, r7.xw, r7.xxxw, cb0[2].xxxy
-          iadd r3.w, r7.w, r7.x
-          imad r3.w, r9.x, cb0[2].z, r3.w
-          imad r3.w, r9.y, cb0[2].w, r3.w
-          ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r3.w, r3.w, l(0), u0.xxxx
-          and r3.z, r3.z, r3.w
-          and r4.z, r3.z, l(0x3f800000)
-          ftou r3.zw, r3.xxxy
-          ftou r7.x, r1.x
-          ftou r7.w, r2.w
-          imul null, r3.zw, r3.zzzw, cb0[2].xxxy
-          iadd r3.z, r3.w, r3.z
-          imad r3.z, r7.x, cb0[2].z, r3.z
-          imad r3.z, r7.w, cb0[2].w, r3.z
-          ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r3.z, r3.z, l(0), u0.xxxx
-          and r3.z, r7.y, r3.z
-          and r6.z, r3.z, l(0x3f800000)
-          ftou r3.zw, r3.xxxy
-          ftou r7.x, r1.x
-          ftou r7.y, r2.x
-          imul null, r3.zw, r3.zzzw, cb0[2].xxxy
-          iadd r3.z, r3.w, r3.z
-          imad r3.z, r7.x, cb0[2].z, r3.z
-          imad r3.z, r7.y, cb0[2].w, r3.z
-          ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r3.z, r3.z, l(0), u0.xxxx
-          and r3.z, r7.z, r3.z
-          and r8.z, r3.z, l(0x3f800000)
-        else 
-          ieq r3.z, cb0[0].w, l(1)
-          if_nz r3.z
-            iadd r7.xy, cb0[1].zwzz, l(-1, -1, 0, 0)
-            utof r7.xy, r7.xyxx
-            max r7.zw, r2.yyyx, l(0.000000, 0.000000, 0.000000, 0.000000)
-            min r7.xy, r7.xyxx, r7.zwzz
-            ftou r7.xy, r7.xyxx
-            ftou r7.zw, r3.xxxy
-            imul null, r7.zw, r7.zzzw, cb0[2].xxxy
-            iadd r3.w, r7.w, r7.z
-            imad r3.w, r7.x, cb0[2].z, r3.w
-            imad r3.w, r7.y, cb0[2].w, r3.w
-            ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r3.w, r3.w, l(0), u0.xxxx
-            and r8.x, r3.w, l(0x3f800000)
-          else 
-            ieq r3.w, cb0[0].w, l(2)
-            if_nz r3.w
-              movc r3.w, cb0[7].x, l(-0.000000), l(0.500000)
-              add r7.xy, r0.wzww, r3.wwww
-              lt r7.zw, r2.yyyx, r0.yyyy
-              add r9.xy, r0.yyyy, -r2.yxyy
-              div r9.zw, r9.xxxy, r7.xxxy
-              ftou r9.zw, r9.zzzw
-              utof r10.xy, r9.zwzz
-              mad r9.xy, -r10.xyxx, r7.xyxx, r9.xyxx
-              and r9.zw, r9.zzzw, l(0, 0, 1, 1)
-              add r10.xy, r0.yyyy, r9.xyxx
-              add r9.xy, r0.wzww, -r9.xyxx
-              movc r9.xy, r9.zwzz, r9.xyxx, r10.xyxx
-              lt r9.zw, r0.wwwz, r2.yyyx
-              add r10.xy, -r0.wzww, r2.yxyy
-              div r10.zw, r10.xxxy, r7.xxxy
-              ftou r10.zw, r10.zzzw
-              utof r11.xy, r10.zwzz
-              mad r7.xy, -r11.xyxx, r7.xyxx, r10.xyxx
-              and r10.xy, r10.zwzz, l(1, 1, 0, 0)
-              add r10.zw, r0.wwwz, -r7.xxxy
-              add r7.xy, r0.yyyy, r7.xyxx
-              movc r7.xy, r10.xyxx, r7.xyxx, r10.zwzz
-              movc r7.xy, r9.zwzz, r7.xyxx, r2.yxyy
-              movc r7.xy, r7.zwzz, r9.xyxx, r7.xyxx
-              ftou r7.xy, r7.xyxx
-              ftou r7.zw, r3.xxxy
-              imul null, r7.zw, r7.zzzw, cb0[2].xxxy
-              iadd r2.y, r7.w, r7.z
-              imad r2.y, r7.x, cb0[2].z, r2.y
-              imad r2.y, r7.y, cb0[2].w, r2.y
-              ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r2.y, r2.y, l(0), u0.xxxx
-              and r8.x, r2.y, l(0x3f800000)
-            else 
-              mov r8.x, l(0)
-            endif 
-          endif 
-          if_nz r3.z
-            iadd r7.xy, cb0[1].wzww, l(-1, -1, 0, 0)
-            utof r7.xy, r7.xyxx
-            max r2.y, r2.z, l(0.000000)
-            min r2.y, r7.x, r2.y
-            ftou r2.y, r2.y
-            max r3.w, r1.x, l(0.000000)
-            min r3.w, r7.y, r3.w
-            ftou r3.w, r3.w
-            utof r7.z, r3.w
-            utof r7.w, r2.y
-            ftou r7.xy, r3.xyxx
-            ftou r7.zw, r7.zzzw
-            imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-            iadd r2.y, r7.y, r7.x
-            imad r2.y, r7.z, cb0[2].z, r2.y
-            imad r2.y, r7.w, cb0[2].w, r2.y
-            ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r2.y, r2.y, l(0), u0.xxxx
-            and r5.z, r2.y, l(0x3f800000)
-          else 
-            ieq r2.y, cb0[0].w, l(2)
-            if_nz r2.y
-              movc r2.y, cb0[7].x, l(-0.000000), l(0.500000)
-              add r7.xy, r0.zwzz, r2.yyyy
-              lt r2.y, r2.z, r0.y
-              add r3.w, r0.y, -r2.z
-              div r7.z, r3.w, r7.x
-              ftou r7.z, r7.z
-              utof r7.w, r7.z
-              mad r3.w, -r7.w, r7.x, r3.w
-              and r7.z, r7.z, l(1)
-              add r7.w, r0.y, r3.w
-              add r3.w, r0.z, -r3.w
-              movc r3.w, r7.z, r3.w, r7.w
-              lt r7.z, r0.z, r2.z
-              add r7.w, -r0.z, r2.z
-              div r9.x, r7.w, r7.x
-              ftou r9.x, r9.x
-              utof r9.y, r9.x
-              mad r7.x, -r9.y, r7.x, r7.w
-              and r7.w, r9.x, l(1)
-              add r9.x, r0.z, -r7.x
-              add r7.x, r0.y, r7.x
-              movc r7.x, r7.w, r7.x, r9.x
-              movc r7.x, r7.z, r7.x, r2.z
-              movc r2.y, r2.y, r3.w, r7.x
-              ftou r2.y, r2.y
-              lt r3.w, r1.x, r0.y
-              add r7.x, r0.y, -r1.x
-              div r7.z, r7.x, r7.y
-              ftou r7.z, r7.z
-              utof r7.w, r7.z
-              mad r7.x, -r7.w, r7.y, r7.x
-              and r7.z, r7.z, l(1)
-              add r7.w, r0.y, r7.x
-              add r7.x, r0.w, -r7.x
-              movc r7.x, r7.z, r7.x, r7.w
-              lt r7.z, r0.w, r1.x
-              add r7.w, -r0.w, r1.x
-              div r9.x, r7.w, r7.y
-              ftou r9.x, r9.x
-              utof r9.y, r9.x
-              mad r7.y, -r9.y, r7.y, r7.w
-              and r7.w, r9.x, l(1)
-              add r9.x, r0.w, -r7.y
-              add r7.y, r0.y, r7.y
-              movc r7.y, r7.w, r7.y, r9.x
-              movc r7.y, r7.z, r7.y, r1.x
-              movc r3.w, r3.w, r7.x, r7.y
-              ftou r3.w, r3.w
-              utof r7.z, r3.w
-              utof r7.w, r2.y
-              ftou r7.xy, r3.xyxx
-              ftou r7.zw, r7.zzzw
-              imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-              iadd r2.y, r7.y, r7.x
-              imad r2.y, r7.z, cb0[2].z, r2.y
-              imad r2.y, r7.w, cb0[2].w, r2.y
-              ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r2.y, r2.y, l(0), u0.xxxx
-              and r5.z, r2.y, l(0x3f800000)
-            else 
-              mov r5.z, l(0)
-            endif 
-          endif 
-          if_nz r3.z
-            iadd r7.xy, cb0[1].zwzz, l(-1, -1, 0, 0)
-            utof r7.xy, r7.xyxx
-            max r7.zw, r1.xxxy, l(0.000000, 0.000000, 0.000000, 0.000000)
-            min r7.xy, r7.xyxx, r7.zwzz
-            ftou r7.xy, r7.xyxx
-            ftou r7.zw, r3.xxxy
-            imul null, r7.zw, r7.zzzw, cb0[2].xxxy
-            iadd r2.y, r7.w, r7.z
-            imad r2.y, r7.x, cb0[2].z, r2.y
-            imad r2.y, r7.y, cb0[2].w, r2.y
-            ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r2.y, r2.y, l(0), u0.xxxx
-            and r4.z, r2.y, l(0x3f800000)
-          else 
-            ieq r2.y, cb0[0].w, l(2)
-            if_nz r2.y
-              movc r2.y, cb0[7].x, l(-0.000000), l(0.500000)
-              add r7.xy, r0.wzww, r2.yyyy
-              lt r7.zw, r1.xxxy, r0.yyyy
-              add r9.xy, r0.yyyy, -r1.xyxx
-              div r9.zw, r9.xxxy, r7.xxxy
-              ftou r9.zw, r9.zzzw
-              utof r10.xy, r9.zwzz
-              mad r9.xy, -r10.xyxx, r7.xyxx, r9.xyxx
-              and r9.zw, r9.zzzw, l(0, 0, 1, 1)
-              add r10.xy, r0.yyyy, r9.xyxx
-              add r9.xy, r0.wzww, -r9.xyxx
-              movc r9.xy, r9.zwzz, r9.xyxx, r10.xyxx
-              lt r9.zw, r0.wwwz, r1.xxxy
-              add r10.xy, -r0.wzww, r1.xyxx
-              div r10.zw, r10.xxxy, r7.xxxy
-              ftou r10.zw, r10.zzzw
-              utof r11.xy, r10.zwzz
-              mad r7.xy, -r11.xyxx, r7.xyxx, r10.xyxx
-              and r10.xy, r10.zwzz, l(1, 1, 0, 0)
-              add r10.zw, r0.wwwz, -r7.xxxy
-              add r7.xy, r0.yyyy, r7.xyxx
-              movc r7.xy, r10.xyxx, r7.xyxx, r10.zwzz
-              movc r7.xy, r9.zwzz, r7.xyxx, r1.xyxx
-              movc r7.xy, r7.zwzz, r9.xyxx, r7.xyxx
-              ftou r7.xy, r7.xyxx
-              ftou r7.zw, r3.xxxy
-              imul null, r7.zw, r7.zzzw, cb0[2].xxxy
-              iadd r2.y, r7.w, r7.z
-              imad r2.y, r7.x, cb0[2].z, r2.y
-              imad r2.y, r7.y, cb0[2].w, r2.y
-              ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r2.y, r2.y, l(0), u0.xxxx
-              and r4.z, r2.y, l(0x3f800000)
-            else 
-              mov r4.z, l(0)
-            endif 
-          endif 
-          if_nz r3.z
-            iadd r7.xy, cb0[1].wzww, l(-1, -1, 0, 0)
-            utof r7.xy, r7.xyxx
-            max r2.y, r2.w, l(0.000000)
-            min r2.y, r7.x, r2.y
-            ftou r2.y, r2.y
-            max r3.w, r1.x, l(0.000000)
-            min r3.w, r7.y, r3.w
-            ftou r3.w, r3.w
-            utof r7.z, r3.w
-            utof r7.w, r2.y
-            ftou r7.xy, r3.xyxx
-            ftou r7.zw, r7.zzzw
-            imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-            iadd r2.y, r7.y, r7.x
-            imad r2.y, r7.z, cb0[2].z, r2.y
-            imad r2.y, r7.w, cb0[2].w, r2.y
-            ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r2.y, r2.y, l(0), u0.xxxx
-            and r6.z, r2.y, l(0x3f800000)
-          else 
-            ieq r2.y, cb0[0].w, l(2)
-            if_nz r2.y
-              movc r2.y, cb0[7].x, l(-0.000000), l(0.500000)
-              add r7.xy, r0.zwzz, r2.yyyy
-              lt r2.y, r2.w, r0.y
-              add r3.w, r0.y, -r2.w
-              div r7.z, r3.w, r7.x
-              ftou r7.z, r7.z
-              utof r7.w, r7.z
-              mad r3.w, -r7.w, r7.x, r3.w
-              and r7.z, r7.z, l(1)
-              add r7.w, r0.y, r3.w
-              add r3.w, r0.z, -r3.w
-              movc r3.w, r7.z, r3.w, r7.w
-              lt r7.z, r0.z, r2.w
-              add r7.w, -r0.z, r2.w
-              div r9.x, r7.w, r7.x
-              ftou r9.x, r9.x
-              utof r9.y, r9.x
-              mad r7.x, -r9.y, r7.x, r7.w
-              and r7.w, r9.x, l(1)
-              add r9.x, r0.z, -r7.x
-              add r7.x, r0.y, r7.x
-              movc r7.x, r7.w, r7.x, r9.x
-              movc r7.x, r7.z, r7.x, r2.w
-              movc r2.y, r2.y, r3.w, r7.x
-              ftou r2.y, r2.y
-              lt r3.w, r1.x, r0.y
-              add r7.x, r0.y, -r1.x
-              div r7.z, r7.x, r7.y
-              ftou r7.z, r7.z
-              utof r7.w, r7.z
-              mad r7.x, -r7.w, r7.y, r7.x
-              and r7.z, r7.z, l(1)
-              add r7.w, r0.y, r7.x
-              add r7.x, r0.w, -r7.x
-              movc r7.x, r7.z, r7.x, r7.w
-              lt r7.z, r0.w, r1.x
-              add r7.w, -r0.w, r1.x
-              div r9.x, r7.w, r7.y
-              ftou r9.x, r9.x
-              utof r9.y, r9.x
-              mad r7.y, -r9.y, r7.y, r7.w
-              and r7.w, r9.x, l(1)
-              add r9.x, r0.w, -r7.y
-              add r7.y, r0.y, r7.y
-              movc r7.y, r7.w, r7.y, r9.x
-              movc r7.y, r7.z, r7.y, r1.x
-              movc r3.w, r3.w, r7.x, r7.y
-              ftou r3.w, r3.w
-              utof r7.z, r3.w
-              utof r7.w, r2.y
-              ftou r7.xy, r3.xyxx
-              ftou r7.zw, r7.zzzw
-              imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-              iadd r2.y, r7.y, r7.x
-              imad r2.y, r7.z, cb0[2].z, r2.y
-              imad r2.y, r7.w, cb0[2].w, r2.y
-              ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r2.y, r2.y, l(0), u0.xxxx
-              and r6.z, r2.y, l(0x3f800000)
-            else 
-              mov r6.z, l(0)
-            endif 
-          endif 
-          if_nz r3.z
-            iadd r3.zw, cb0[1].wwwz, l(0, 0, -1, -1)
-            utof r3.zw, r3.zzzw
-            max r2.y, r2.x, l(0.000000)
-            min r2.y, r3.z, r2.y
-            ftou r2.y, r2.y
-            max r3.z, r1.x, l(0.000000)
-            min r3.z, r3.w, r3.z
-            ftou r3.z, r3.z
-            utof r3.z, r3.z
-            utof r3.w, r2.y
-            ftou r7.xy, r3.xyxx
-            ftou r3.zw, r3.zzzw
-            imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-            iadd r2.y, r7.y, r7.x
-            imad r2.y, r3.z, cb0[2].z, r2.y
-            imad r2.y, r3.w, cb0[2].w, r2.y
-            ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r2.y, r2.y, l(0), u0.xxxx
-            and r8.z, r2.y, l(0x3f800000)
-          else 
-            ieq r2.y, cb0[0].w, l(2)
-            if_nz r2.y
-              movc r2.y, cb0[7].x, l(-0.000000), l(0.500000)
-              add r3.zw, r0.zzzw, r2.yyyy
-              lt r2.y, r2.x, r0.y
-              add r7.x, r0.y, -r2.x
-              div r7.y, r7.x, r3.z
-              ftou r7.y, r7.y
-              utof r7.z, r7.y
-              mad r7.x, -r7.z, r3.z, r7.x
-              and r7.y, r7.y, l(1)
-              add r7.z, r0.y, r7.x
-              add r7.x, r0.z, -r7.x
-              movc r7.x, r7.y, r7.x, r7.z
-              lt r7.y, r0.z, r2.x
-              add r7.z, -r0.z, r2.x
-              div r7.w, r7.z, r3.z
-              ftou r7.w, r7.w
-              utof r9.x, r7.w
-              mad r3.z, -r9.x, r3.z, r7.z
-              and r7.z, r7.w, l(1)
-              add r7.w, r0.z, -r3.z
-              add r3.z, r0.y, r3.z
-              movc r3.z, r7.z, r3.z, r7.w
-              movc r3.z, r7.y, r3.z, r2.x
-              movc r2.y, r2.y, r7.x, r3.z
-              ftou r2.y, r2.y
-              lt r3.z, r1.x, r0.y
-              add r7.x, r0.y, -r1.x
-              div r7.y, r7.x, r3.w
-              ftou r7.y, r7.y
-              utof r7.z, r7.y
-              mad r7.x, -r7.z, r3.w, r7.x
-              and r7.y, r7.y, l(1)
-              add r7.z, r0.y, r7.x
-              add r7.x, r0.w, -r7.x
-              movc r7.x, r7.y, r7.x, r7.z
-              lt r7.y, r0.w, r1.x
-              add r7.z, -r0.w, r1.x
-              div r7.w, r7.z, r3.w
-              ftou r7.w, r7.w
-              utof r9.x, r7.w
-              mad r3.w, -r9.x, r3.w, r7.z
-              and r7.z, r7.w, l(1)
-              add r7.w, r0.w, -r3.w
-              add r3.w, r0.y, r3.w
-              movc r3.w, r7.z, r3.w, r7.w
-              movc r3.w, r7.y, r3.w, r1.x
-              movc r3.z, r3.z, r7.x, r3.w
-              ftou r3.z, r3.z
-              utof r3.z, r3.z
-              utof r3.w, r2.y
-              ftou r7.xy, r3.xyxx
-              ftou r3.zw, r3.zzzw
-              imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-              iadd r2.y, r7.y, r7.x
-              imad r2.y, r3.z, cb0[2].z, r2.y
-              imad r2.y, r3.w, cb0[2].w, r2.y
-              ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r2.y, r2.y, l(0), u0.xxxx
-              and r8.z, r2.y, l(0x3f800000)
-            else 
-              mov r8.z, l(0)
-            endif 
-          endif 
-        endif 
-        add r3.zw, r1.xxxx, l(0.000000, 0.000000, 1.000000, 2.000000)
-        if_z cb0[0].w
-          ge r7.xyz, r2.zwxz, l(0.000000, 0.000000, 0.000000, 0.000000)
-          ftou r9.xyz, r2.zwxz
-          ult r9.xyz, r9.xyzx, cb0[1].wwww
-          and r7.xyz, r7.xyzx, r9.xyzx
-          ge r1.x, r3.z, l(0.000000)
-          and r7.xyz, r1.xxxx, r7.xyzx
-          ftou r2.y, r3.z
-          ult r2.y, r2.y, cb0[1].z
-          and r7.xyz, r2.yyyy, r7.xyzx
-          ftou r9.xy, r3.xyxx
-          ftou r7.w, r3.z
-          ftou r9.z, r2.z
-          imul null, r9.xy, r9.xyxx, cb0[2].xyxx
-          iadd r9.x, r9.y, r9.x
-          imad r7.w, r7.w, cb0[2].z, r9.x
-          imad r7.w, r9.z, cb0[2].w, r7.w
-          ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r7.w, r7.w, l(0), u0.xxxx
-          and r7.x, r7.x, r7.w
-          and r5.y, r7.x, l(0x3f800000)
-          ge r7.x, r1.y, l(0.000000)
-          ftou r7.w, r1.y
-          ult r7.w, r7.w, cb0[1].w
-          and r7.x, r7.w, r7.x
-          and r1.x, r1.x, r7.x
-          and r1.x, r2.y, r1.x
-          ftou r7.xw, r3.xxxy
-          ftou r2.y, r3.z
-          ftou r9.x, r1.y
-          imul null, r7.xw, r7.xxxw, cb0[2].xxxy
-          iadd r7.x, r7.w, r7.x
-          imad r2.y, r2.y, cb0[2].z, r7.x
-          imad r2.y, r9.x, cb0[2].w, r2.y
-          ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r2.y, r2.y, l(0), u0.xxxx
-          and r1.x, r1.x, r2.y
-          and r4.y, r1.x, l(0x3f800000)
-          ftou r7.xw, r3.xxxy
-          ftou r1.x, r3.z
-          ftou r2.y, r2.w
-          imul null, r7.xw, r7.xxxw, cb0[2].xxxy
-          iadd r7.x, r7.w, r7.x
-          imad r1.x, r1.x, cb0[2].z, r7.x
-          imad r1.x, r2.y, cb0[2].w, r1.x
-          ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r1.x, r1.x, l(0), u0.xxxx
-          and r1.x, r7.y, r1.x
-          and r6.y, r1.x, l(0x3f800000)
-          ftou r7.xy, r3.xyxx
-          ftou r1.x, r3.z
-          ftou r2.y, r2.x
-          imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-          iadd r7.x, r7.y, r7.x
-          imad r1.x, r1.x, cb0[2].z, r7.x
-          imad r1.x, r2.y, cb0[2].w, r1.x
-          ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r1.x, r1.x, l(0), u0.xxxx
-          and r1.x, r7.z, r1.x
-          and r8.y, r1.x, l(0x3f800000)
-        else 
-          ieq r1.x, cb0[0].w, l(1)
-          if_nz r1.x
-            iadd r7.xy, cb0[1].wzww, l(-1, -1, 0, 0)
-            utof r7.xy, r7.xyxx
-            max r2.y, r2.z, l(0.000000)
-            min r2.y, r7.x, r2.y
-            ftou r2.y, r2.y
-            max r7.x, r3.z, l(0.000000)
-            min r7.x, r7.y, r7.x
-            ftou r7.x, r7.x
-            utof r7.z, r7.x
-            utof r7.w, r2.y
-            ftou r7.xy, r3.xyxx
-            ftou r7.zw, r7.zzzw
-            imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-            iadd r2.y, r7.y, r7.x
-            imad r2.y, r7.z, cb0[2].z, r2.y
-            imad r2.y, r7.w, cb0[2].w, r2.y
-            ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r2.y, r2.y, l(0), u0.xxxx
-            and r5.y, r2.y, l(0x3f800000)
-          else 
-            ieq r2.y, cb0[0].w, l(2)
-            if_nz r2.y
-              movc r2.y, cb0[7].x, l(-0.000000), l(0.500000)
-              add r7.xy, r0.zwzz, r2.yyyy
-              lt r2.y, r2.z, r0.y
-              add r7.z, r0.y, -r2.z
-              div r7.w, r7.z, r7.x
-              ftou r7.w, r7.w
-              utof r9.x, r7.w
-              mad r7.z, -r9.x, r7.x, r7.z
-              and r7.w, r7.w, l(1)
-              add r9.x, r0.y, r7.z
-              add r7.z, r0.z, -r7.z
-              movc r7.z, r7.w, r7.z, r9.x
-              lt r7.w, r0.z, r2.z
-              add r9.x, -r0.z, r2.z
-              div r9.y, r9.x, r7.x
-              ftou r9.y, r9.y
-              utof r9.z, r9.y
-              mad r7.x, -r9.z, r7.x, r9.x
-              and r9.x, r9.y, l(1)
-              add r9.y, r0.z, -r7.x
-              add r7.x, r0.y, r7.x
-              movc r7.x, r9.x, r7.x, r9.y
-              movc r7.x, r7.w, r7.x, r2.z
-              movc r2.y, r2.y, r7.z, r7.x
-              ftou r2.y, r2.y
-              lt r7.x, r3.z, r0.y
-              add r7.z, r0.y, -r3.z
-              div r7.w, r7.z, r7.y
-              ftou r7.w, r7.w
-              utof r9.x, r7.w
-              mad r7.z, -r9.x, r7.y, r7.z
-              and r7.w, r7.w, l(1)
-              add r9.x, r0.y, r7.z
-              add r7.z, r0.w, -r7.z
-              movc r7.z, r7.w, r7.z, r9.x
-              lt r7.w, r0.w, r3.z
-              add r9.x, -r0.w, r3.z
-              div r9.y, r9.x, r7.y
-              ftou r9.y, r9.y
-              utof r9.z, r9.y
-              mad r7.y, -r9.z, r7.y, r9.x
-              and r9.x, r9.y, l(1)
-              add r9.y, r0.w, -r7.y
-              add r7.y, r0.y, r7.y
-              movc r7.y, r9.x, r7.y, r9.y
-              movc r7.y, r7.w, r7.y, r3.z
-              movc r7.x, r7.x, r7.z, r7.y
-              ftou r7.x, r7.x
-              utof r7.z, r7.x
-              utof r7.w, r2.y
-              ftou r7.xy, r3.xyxx
-              ftou r7.zw, r7.zzzw
-              imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-              iadd r2.y, r7.y, r7.x
-              imad r2.y, r7.z, cb0[2].z, r2.y
-              imad r2.y, r7.w, cb0[2].w, r2.y
-              ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r2.y, r2.y, l(0), u0.xxxx
-              and r5.y, r2.y, l(0x3f800000)
-            else 
-              mov r5.y, l(0)
-            endif 
-          endif 
-          if_nz r1.x
-            iadd r7.xy, cb0[1].wzww, l(-1, -1, 0, 0)
-            utof r7.xy, r7.xyxx
-            max r2.y, r1.y, l(0.000000)
-            min r2.y, r7.x, r2.y
-            ftou r2.y, r2.y
-            max r7.x, r3.z, l(0.000000)
-            min r7.x, r7.y, r7.x
-            ftou r7.x, r7.x
-            utof r7.z, r7.x
-            utof r7.w, r2.y
-            ftou r7.xy, r3.xyxx
-            ftou r7.zw, r7.zzzw
-            imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-            iadd r2.y, r7.y, r7.x
-            imad r2.y, r7.z, cb0[2].z, r2.y
-            imad r2.y, r7.w, cb0[2].w, r2.y
-            ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r2.y, r2.y, l(0), u0.xxxx
-            and r4.y, r2.y, l(0x3f800000)
-          else 
-            ieq r2.y, cb0[0].w, l(2)
-            if_nz r2.y
-              movc r2.y, cb0[7].x, l(-0.000000), l(0.500000)
-              add r7.xy, r0.zwzz, r2.yyyy
-              lt r2.y, r1.y, r0.y
-              add r7.z, r0.y, -r1.y
-              div r7.w, r7.z, r7.x
-              ftou r7.w, r7.w
-              utof r9.x, r7.w
-              mad r7.z, -r9.x, r7.x, r7.z
-              and r7.w, r7.w, l(1)
-              add r9.x, r0.y, r7.z
-              add r7.z, r0.z, -r7.z
-              movc r7.z, r7.w, r7.z, r9.x
-              lt r7.w, r0.z, r1.y
-              add r9.x, -r0.z, r1.y
-              div r9.y, r9.x, r7.x
-              ftou r9.y, r9.y
-              utof r9.z, r9.y
-              mad r7.x, -r9.z, r7.x, r9.x
-              and r9.x, r9.y, l(1)
-              add r9.y, r0.z, -r7.x
-              add r7.x, r0.y, r7.x
-              movc r7.x, r9.x, r7.x, r9.y
-              movc r7.x, r7.w, r7.x, r1.y
-              movc r2.y, r2.y, r7.z, r7.x
-              ftou r2.y, r2.y
-              lt r7.x, r3.z, r0.y
-              add r7.z, r0.y, -r3.z
-              div r7.w, r7.z, r7.y
-              ftou r7.w, r7.w
-              utof r9.x, r7.w
-              mad r7.z, -r9.x, r7.y, r7.z
-              and r7.w, r7.w, l(1)
-              add r9.x, r0.y, r7.z
-              add r7.z, r0.w, -r7.z
-              movc r7.z, r7.w, r7.z, r9.x
-              lt r7.w, r0.w, r3.z
-              add r9.x, -r0.w, r3.z
-              div r9.y, r9.x, r7.y
-              ftou r9.y, r9.y
-              utof r9.z, r9.y
-              mad r7.y, -r9.z, r7.y, r9.x
-              and r9.x, r9.y, l(1)
-              add r9.y, r0.w, -r7.y
-              add r7.y, r0.y, r7.y
-              movc r7.y, r9.x, r7.y, r9.y
-              movc r7.y, r7.w, r7.y, r3.z
-              movc r7.x, r7.x, r7.z, r7.y
-              ftou r7.x, r7.x
-              utof r7.z, r7.x
-              utof r7.w, r2.y
-              ftou r7.xy, r3.xyxx
-              ftou r7.zw, r7.zzzw
-              imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-              iadd r2.y, r7.y, r7.x
-              imad r2.y, r7.z, cb0[2].z, r2.y
-              imad r2.y, r7.w, cb0[2].w, r2.y
-              ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r2.y, r2.y, l(0), u0.xxxx
-              and r4.y, r2.y, l(0x3f800000)
-            else 
-              mov r4.y, l(0)
-            endif 
-          endif 
-          if_nz r1.x
-            iadd r7.xy, cb0[1].wzww, l(-1, -1, 0, 0)
-            utof r7.xy, r7.xyxx
-            max r2.y, r2.w, l(0.000000)
-            min r2.y, r7.x, r2.y
-            ftou r2.y, r2.y
-            max r7.x, r3.z, l(0.000000)
-            min r7.x, r7.y, r7.x
-            ftou r7.x, r7.x
-            utof r7.z, r7.x
-            utof r7.w, r2.y
-            ftou r7.xy, r3.xyxx
-            ftou r7.zw, r7.zzzw
-            imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-            iadd r2.y, r7.y, r7.x
-            imad r2.y, r7.z, cb0[2].z, r2.y
-            imad r2.y, r7.w, cb0[2].w, r2.y
-            ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r2.y, r2.y, l(0), u0.xxxx
-            and r6.y, r2.y, l(0x3f800000)
-          else 
-            ieq r2.y, cb0[0].w, l(2)
-            if_nz r2.y
-              movc r2.y, cb0[7].x, l(-0.000000), l(0.500000)
-              add r7.xy, r0.zwzz, r2.yyyy
-              lt r2.y, r2.w, r0.y
-              add r7.z, r0.y, -r2.w
-              div r7.w, r7.z, r7.x
-              ftou r7.w, r7.w
-              utof r9.x, r7.w
-              mad r7.z, -r9.x, r7.x, r7.z
-              and r7.w, r7.w, l(1)
-              add r9.x, r0.y, r7.z
-              add r7.z, r0.z, -r7.z
-              movc r7.z, r7.w, r7.z, r9.x
-              lt r7.w, r0.z, r2.w
-              add r9.x, -r0.z, r2.w
-              div r9.y, r9.x, r7.x
-              ftou r9.y, r9.y
-              utof r9.z, r9.y
-              mad r7.x, -r9.z, r7.x, r9.x
-              and r9.x, r9.y, l(1)
-              add r9.y, r0.z, -r7.x
-              add r7.x, r0.y, r7.x
-              movc r7.x, r9.x, r7.x, r9.y
-              movc r7.x, r7.w, r7.x, r2.w
-              movc r2.y, r2.y, r7.z, r7.x
-              ftou r2.y, r2.y
-              lt r7.x, r3.z, r0.y
-              add r7.z, r0.y, -r3.z
-              div r7.w, r7.z, r7.y
-              ftou r7.w, r7.w
-              utof r9.x, r7.w
-              mad r7.z, -r9.x, r7.y, r7.z
-              and r7.w, r7.w, l(1)
-              add r9.x, r0.y, r7.z
-              add r7.z, r0.w, -r7.z
-              movc r7.z, r7.w, r7.z, r9.x
-              lt r7.w, r0.w, r3.z
-              add r9.x, -r0.w, r3.z
-              div r9.y, r9.x, r7.y
-              ftou r9.y, r9.y
-              utof r9.z, r9.y
-              mad r7.y, -r9.z, r7.y, r9.x
-              and r9.x, r9.y, l(1)
-              add r9.y, r0.w, -r7.y
-              add r7.y, r0.y, r7.y
-              movc r7.y, r9.x, r7.y, r9.y
-              movc r7.y, r7.w, r7.y, r3.z
-              movc r7.x, r7.x, r7.z, r7.y
-              ftou r7.x, r7.x
-              utof r7.z, r7.x
-              utof r7.w, r2.y
-              ftou r7.xy, r3.xyxx
-              ftou r7.zw, r7.zzzw
-              imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-              iadd r2.y, r7.y, r7.x
-              imad r2.y, r7.z, cb0[2].z, r2.y
-              imad r2.y, r7.w, cb0[2].w, r2.y
-              ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r2.y, r2.y, l(0), u0.xxxx
-              and r6.y, r2.y, l(0x3f800000)
-            else 
-              mov r6.y, l(0)
-            endif 
-          endif 
-          if_nz r1.x
-            iadd r7.xy, cb0[1].wzww, l(-1, -1, 0, 0)
-            utof r7.xy, r7.xyxx
-            max r1.x, r2.x, l(0.000000)
-            min r1.x, r7.x, r1.x
-            ftou r1.x, r1.x
-            max r2.y, r3.z, l(0.000000)
-            min r2.y, r7.y, r2.y
-            ftou r2.y, r2.y
-            utof r7.z, r2.y
-            utof r7.w, r1.x
-            ftou r7.xy, r3.xyxx
-            ftou r7.zw, r7.zzzw
-            imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-            iadd r1.x, r7.y, r7.x
-            imad r1.x, r7.z, cb0[2].z, r1.x
-            imad r1.x, r7.w, cb0[2].w, r1.x
-            ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r1.x, r1.x, l(0), u0.xxxx
-            and r8.y, r1.x, l(0x3f800000)
-          else 
-            ieq r1.x, cb0[0].w, l(2)
-            if_nz r1.x
-              movc r1.x, cb0[7].x, l(-0.000000), l(0.500000)
-              add r7.xy, r0.zwzz, r1.xxxx
-              lt r1.x, r2.x, r0.y
-              add r2.y, r0.y, -r2.x
-              div r7.z, r2.y, r7.x
-              ftou r7.z, r7.z
-              utof r7.w, r7.z
-              mad r2.y, -r7.w, r7.x, r2.y
-              and r7.z, r7.z, l(1)
-              add r7.w, r0.y, r2.y
-              add r2.y, r0.z, -r2.y
-              movc r2.y, r7.z, r2.y, r7.w
-              lt r7.z, r0.z, r2.x
-              add r7.w, -r0.z, r2.x
-              div r9.x, r7.w, r7.x
-              ftou r9.x, r9.x
-              utof r9.y, r9.x
-              mad r7.x, -r9.y, r7.x, r7.w
-              and r7.w, r9.x, l(1)
-              add r9.x, r0.z, -r7.x
-              add r7.x, r0.y, r7.x
-              movc r7.x, r7.w, r7.x, r9.x
-              movc r7.x, r7.z, r7.x, r2.x
-              movc r1.x, r1.x, r2.y, r7.x
-              ftou r1.x, r1.x
-              lt r2.y, r3.z, r0.y
-              add r7.x, r0.y, -r3.z
-              div r7.z, r7.x, r7.y
-              ftou r7.z, r7.z
-              utof r7.w, r7.z
-              mad r7.x, -r7.w, r7.y, r7.x
-              and r7.z, r7.z, l(1)
-              add r7.w, r0.y, r7.x
-              add r7.x, r0.w, -r7.x
-              movc r7.x, r7.z, r7.x, r7.w
-              lt r7.z, r0.w, r3.z
-              add r7.w, -r0.w, r3.z
-              div r9.x, r7.w, r7.y
-              ftou r9.x, r9.x
-              utof r9.y, r9.x
-              mad r7.y, -r9.y, r7.y, r7.w
-              and r7.w, r9.x, l(1)
-              add r9.x, r0.w, -r7.y
-              add r7.y, r0.y, r7.y
-              movc r7.y, r7.w, r7.y, r9.x
-              movc r3.z, r7.z, r7.y, r3.z
-              movc r2.y, r2.y, r7.x, r3.z
-              ftou r2.y, r2.y
-              utof r7.z, r2.y
-              utof r7.w, r1.x
-              ftou r7.xy, r3.xyxx
-              ftou r7.zw, r7.zzzw
-              imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-              iadd r1.x, r7.y, r7.x
-              imad r1.x, r7.z, cb0[2].z, r1.x
-              imad r1.x, r7.w, cb0[2].w, r1.x
-              ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r1.x, r1.x, l(0), u0.xxxx
-              and r8.y, r1.x, l(0x3f800000)
-            else 
-              mov r8.y, l(0)
-            endif 
-          endif 
-        endif 
-        if_z cb0[0].w
-          ge r7.xyz, r2.zwxz, l(0.000000, 0.000000, 0.000000, 0.000000)
-          ftou r9.xyz, r2.zwxz
-          ult r9.xyz, r9.xyzx, cb0[1].wwww
-          and r7.xyz, r7.xyzx, r9.xyzx
-          ge r1.x, r3.w, l(0.000000)
-          and r7.xyz, r1.xxxx, r7.xyzx
-          ftou r2.y, r3.w
-          ult r2.y, r2.y, cb0[1].z
-          and r7.xyz, r2.yyyy, r7.xyzx
-          ftou r9.xy, r3.xyxx
-          ftou r3.z, r3.w
-          ftou r7.w, r2.z
-          imul null, r9.xy, r9.xyxx, cb0[2].xyxx
-          iadd r9.x, r9.y, r9.x
-          imad r3.z, r3.z, cb0[2].z, r9.x
-          imad r3.z, r7.w, cb0[2].w, r3.z
-          ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r3.z, r3.z, l(0), u0.xxxx
-          and r3.z, r7.x, r3.z
-          and r5.w, r3.z, l(0x3f800000)
-          ge r3.z, r1.y, l(0.000000)
-          ftou r7.x, r1.y
-          ult r7.x, r7.x, cb0[1].w
-          and r3.z, r3.z, r7.x
-          and r1.x, r1.x, r3.z
-          and r1.x, r2.y, r1.x
-          ftou r7.xw, r3.xxxy
-          ftou r2.y, r3.w
-          ftou r3.z, r1.y
-          imul null, r7.xw, r7.xxxw, cb0[2].xxxy
-          iadd r7.x, r7.w, r7.x
-          imad r2.y, r2.y, cb0[2].z, r7.x
-          imad r2.y, r3.z, cb0[2].w, r2.y
-          ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r2.y, r2.y, l(0), u0.xxxx
-          and r1.x, r1.x, r2.y
-          and r4.w, r1.x, l(0x3f800000)
-          ftou r7.xw, r3.xxxy
-          ftou r1.x, r3.w
-          ftou r2.y, r2.w
-          imul null, r7.xw, r7.xxxw, cb0[2].xxxy
-          iadd r3.z, r7.w, r7.x
-          imad r1.x, r1.x, cb0[2].z, r3.z
-          imad r1.x, r2.y, cb0[2].w, r1.x
-          ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r1.x, r1.x, l(0), u0.xxxx
-          and r1.x, r7.y, r1.x
-          and r6.w, r1.x, l(0x3f800000)
-          ftou r7.xy, r3.xyxx
-          ftou r1.x, r3.w
-          ftou r2.y, r2.x
-          imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-          iadd r3.z, r7.y, r7.x
-          imad r1.x, r1.x, cb0[2].z, r3.z
-          imad r1.x, r2.y, cb0[2].w, r1.x
-          ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r1.x, r1.x, l(0), u0.xxxx
-          and r1.x, r7.z, r1.x
-          and r8.w, r1.x, l(0x3f800000)
-        else 
-          ieq r1.x, cb0[0].w, l(1)
-          if_nz r1.x
-            iadd r7.xy, cb0[1].wzww, l(-1, -1, 0, 0)
-            utof r7.xy, r7.xyxx
-            max r2.y, r2.z, l(0.000000)
-            min r2.y, r7.x, r2.y
-            ftou r2.y, r2.y
-            max r3.z, r3.w, l(0.000000)
-            min r3.z, r7.y, r3.z
-            ftou r3.z, r3.z
-            utof r7.z, r3.z
-            utof r7.w, r2.y
-            ftou r7.xy, r3.xyxx
-            ftou r7.zw, r7.zzzw
-            imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-            iadd r2.y, r7.y, r7.x
-            imad r2.y, r7.z, cb0[2].z, r2.y
-            imad r2.y, r7.w, cb0[2].w, r2.y
-            ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r2.y, r2.y, l(0), u0.xxxx
-            and r5.w, r2.y, l(0x3f800000)
-          else 
-            ieq r2.y, cb0[0].w, l(2)
-            if_nz r2.y
-              movc r2.y, cb0[7].x, l(-0.000000), l(0.500000)
-              add r7.xy, r0.zwzz, r2.yyyy
-              lt r2.y, r2.z, r0.y
-              add r3.z, r0.y, -r2.z
-              div r7.z, r3.z, r7.x
-              ftou r7.z, r7.z
-              utof r7.w, r7.z
-              mad r3.z, -r7.w, r7.x, r3.z
-              and r7.z, r7.z, l(1)
-              add r7.w, r0.y, r3.z
-              add r3.z, r0.z, -r3.z
-              movc r3.z, r7.z, r3.z, r7.w
-              lt r7.z, r0.z, r2.z
-              add r7.w, -r0.z, r2.z
-              div r9.x, r7.w, r7.x
-              ftou r9.x, r9.x
-              utof r9.y, r9.x
-              mad r7.x, -r9.y, r7.x, r7.w
-              and r7.w, r9.x, l(1)
-              add r9.x, r0.z, -r7.x
-              add r7.x, r0.y, r7.x
-              movc r7.x, r7.w, r7.x, r9.x
-              movc r2.z, r7.z, r7.x, r2.z
-              movc r2.y, r2.y, r3.z, r2.z
-              lt r2.z, r3.w, r0.y
-              add r3.z, r0.y, -r3.w
-              div r7.x, r3.z, r7.y
-              ftou r7.x, r7.x
-              utof r7.z, r7.x
-              mad r3.z, -r7.z, r7.y, r3.z
-              and r7.x, r7.x, l(1)
-              add r7.z, r0.y, r3.z
-              add r3.z, r0.w, -r3.z
-              movc r3.z, r7.x, r3.z, r7.z
-              lt r7.x, r0.w, r3.w
-              add r7.z, -r0.w, r3.w
-              div r7.w, r7.z, r7.y
-              ftou r7.w, r7.w
-              utof r9.x, r7.w
-              mad r7.y, -r9.x, r7.y, r7.z
-              and r7.z, r7.w, l(1)
-              add r7.w, r0.w, -r7.y
-              add r7.y, r0.y, r7.y
-              movc r7.y, r7.z, r7.y, r7.w
-              movc r7.x, r7.x, r7.y, r3.w
-              movc r2.z, r2.z, r3.z, r7.x
-              ftou r2.yz, r2.yyzy
-              utof r7.zw, r2.zzzy
-              ftou r2.yz, r3.xxyx
-              ftou r7.xy, r7.zwzz
-              imul null, r2.yz, r2.yyzy, cb0[2].xxyx
-              iadd r2.y, r2.z, r2.y
-              imad r2.y, r7.x, cb0[2].z, r2.y
-              imad r2.y, r7.y, cb0[2].w, r2.y
-              ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r2.y, r2.y, l(0), u0.xxxx
-              and r5.w, r2.y, l(0x3f800000)
-            else 
-              mov r5.w, l(0)
-            endif 
-          endif 
-          if_nz r1.x
-            iadd r2.yz, cb0[1].wwzw, l(0, -1, -1, 0)
-            utof r2.yz, r2.yyzy
-            max r3.z, r1.y, l(0.000000)
-            min r2.y, r2.y, r3.z
-            max r3.z, r3.w, l(0.000000)
-            min r2.z, r2.z, r3.z
-            ftou r2.yz, r2.yyzy
-            utof r7.zw, r2.zzzy
-            ftou r2.yz, r3.xxyx
-            ftou r7.xy, r7.zwzz
-            imul null, r2.yz, r2.yyzy, cb0[2].xxyx
-            iadd r2.y, r2.z, r2.y
-            imad r2.y, r7.x, cb0[2].z, r2.y
-            imad r2.y, r7.y, cb0[2].w, r2.y
-            ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r2.y, r2.y, l(0), u0.xxxx
-            and r4.w, r2.y, l(0x3f800000)
-          else 
-            ieq r2.y, cb0[0].w, l(2)
-            if_nz r2.y
-              movc r2.y, cb0[7].x, l(-0.000000), l(0.500000)
-              add r2.yz, r0.zzwz, r2.yyyy
-              lt r3.z, r1.y, r0.y
-              add r7.x, r0.y, -r1.y
-              div r7.y, r7.x, r2.y
-              ftou r7.y, r7.y
-              utof r7.z, r7.y
-              mad r7.x, -r7.z, r2.y, r7.x
-              and r7.y, r7.y, l(1)
-              add r7.z, r0.y, r7.x
-              add r7.x, r0.z, -r7.x
-              movc r7.x, r7.y, r7.x, r7.z
-              lt r7.y, r0.z, r1.y
-              add r7.z, -r0.z, r1.y
-              div r7.w, r7.z, r2.y
-              ftou r7.w, r7.w
-              utof r9.x, r7.w
-              mad r2.y, -r9.x, r2.y, r7.z
-              and r7.z, r7.w, l(1)
-              add r7.w, r0.z, -r2.y
-              add r2.y, r0.y, r2.y
-              movc r2.y, r7.z, r2.y, r7.w
-              movc r1.y, r7.y, r2.y, r1.y
-              movc r1.y, r3.z, r7.x, r1.y
-              ftou r1.y, r1.y
-              lt r2.y, r3.w, r0.y
-              add r3.z, r0.y, -r3.w
-              div r7.x, r3.z, r2.z
-              ftou r7.x, r7.x
-              utof r7.y, r7.x
-              mad r3.z, -r7.y, r2.z, r3.z
-              and r7.x, r7.x, l(1)
-              add r7.y, r0.y, r3.z
-              add r3.z, r0.w, -r3.z
-              movc r3.z, r7.x, r3.z, r7.y
-              lt r7.x, r0.w, r3.w
-              add r7.y, -r0.w, r3.w
-              div r7.z, r7.y, r2.z
-              ftou r7.z, r7.z
-              utof r7.w, r7.z
-              mad r2.z, -r7.w, r2.z, r7.y
-              and r7.y, r7.z, l(1)
-              add r7.z, r0.w, -r2.z
-              add r2.z, r0.y, r2.z
-              movc r2.z, r7.y, r2.z, r7.z
-              movc r2.z, r7.x, r2.z, r3.w
-              movc r2.y, r2.y, r3.z, r2.z
-              ftou r2.y, r2.y
-              utof r7.z, r2.y
-              utof r7.w, r1.y
-              ftou r2.yz, r3.xxyx
-              ftou r7.xy, r7.zwzz
-              imul null, r2.yz, r2.yyzy, cb0[2].xxyx
-              iadd r1.y, r2.z, r2.y
-              imad r1.y, r7.x, cb0[2].z, r1.y
-              imad r1.y, r7.y, cb0[2].w, r1.y
-              ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r1.y, r1.y, l(0), u0.xxxx
-              and r4.w, r1.y, l(0x3f800000)
-            else 
-              mov r4.w, l(0)
-            endif 
-          endif 
-          if_nz r1.x
-            iadd r2.yz, cb0[1].wwzw, l(0, -1, -1, 0)
-            utof r2.yz, r2.yyzy
-            max r1.y, r2.w, l(0.000000)
-            min r1.y, r2.y, r1.y
-            ftou r1.y, r1.y
-            max r2.y, r3.w, l(0.000000)
-            min r2.y, r2.z, r2.y
-            ftou r2.y, r2.y
-            utof r7.z, r2.y
-            utof r7.w, r1.y
-            ftou r2.yz, r3.xxyx
-            ftou r7.xy, r7.zwzz
-            imul null, r2.yz, r2.yyzy, cb0[2].xxyx
-            iadd r1.y, r2.z, r2.y
-            imad r1.y, r7.x, cb0[2].z, r1.y
-            imad r1.y, r7.y, cb0[2].w, r1.y
-            ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r1.y, r1.y, l(0), u0.xxxx
-            and r6.w, r1.y, l(0x3f800000)
-          else 
-            ieq r1.y, cb0[0].w, l(2)
-            if_nz r1.y
-              movc r1.y, cb0[7].x, l(-0.000000), l(0.500000)
-              add r2.yz, r0.zzwz, r1.yyyy
-              lt r1.y, r2.w, r0.y
-              add r3.z, r0.y, -r2.w
-              div r7.x, r3.z, r2.y
-              ftou r7.x, r7.x
-              utof r7.y, r7.x
-              mad r3.z, -r7.y, r2.y, r3.z
-              and r7.x, r7.x, l(1)
-              add r7.y, r0.y, r3.z
-              add r3.z, r0.z, -r3.z
-              movc r3.z, r7.x, r3.z, r7.y
-              lt r7.x, r0.z, r2.w
-              add r7.y, -r0.z, r2.w
-              div r7.z, r7.y, r2.y
-              ftou r7.z, r7.z
-              utof r7.w, r7.z
-              mad r2.y, -r7.w, r2.y, r7.y
-              and r7.y, r7.z, l(1)
-              add r7.z, r0.z, -r2.y
-              add r2.y, r0.y, r2.y
-              movc r2.y, r7.y, r2.y, r7.z
-              movc r2.y, r7.x, r2.y, r2.w
-              movc r1.y, r1.y, r3.z, r2.y
-              ftou r1.y, r1.y
-              lt r2.y, r3.w, r0.y
-              add r2.w, r0.y, -r3.w
-              div r3.z, r2.w, r2.z
-              ftou r3.z, r3.z
-              utof r7.x, r3.z
-              mad r2.w, -r7.x, r2.z, r2.w
-              and r3.z, r3.z, l(1)
-              add r7.x, r0.y, r2.w
-              add r2.w, r0.w, -r2.w
-              movc r2.w, r3.z, r2.w, r7.x
-              lt r3.z, r0.w, r3.w
-              add r7.x, -r0.w, r3.w
-              div r7.y, r7.x, r2.z
-              ftou r7.y, r7.y
-              utof r7.z, r7.y
-              mad r2.z, -r7.z, r2.z, r7.x
-              and r7.x, r7.y, l(1)
-              add r7.y, r0.w, -r2.z
-              add r2.z, r0.y, r2.z
-              movc r2.z, r7.x, r2.z, r7.y
-              movc r2.z, r3.z, r2.z, r3.w
-              movc r2.y, r2.y, r2.w, r2.z
-              ftou r2.y, r2.y
-              utof r2.z, r2.y
-              utof r2.w, r1.y
-              ftou r7.xy, r3.xyxx
-              ftou r2.yz, r2.zzwz
-              imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-              iadd r1.y, r7.y, r7.x
-              imad r1.y, r2.y, cb0[2].z, r1.y
-              imad r1.y, r2.z, cb0[2].w, r1.y
-              ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r1.y, r1.y, l(0), u0.xxxx
-              and r6.w, r1.y, l(0x3f800000)
-            else 
-              mov r6.w, l(0)
-            endif 
-          endif 
-          if_nz r1.x
-            iadd r1.xy, cb0[1].wzww, l(-1, -1, 0, 0)
-            utof r1.xy, r1.xyxx
-            max r2.y, r2.x, l(0.000000)
-            min r1.x, r1.x, r2.y
-            max r2.y, r3.w, l(0.000000)
-            min r1.y, r1.y, r2.y
-            ftou r1.xy, r1.xyxx
-            utof r2.zw, r1.yyyx
-            ftou r1.xy, r3.xyxx
-            ftou r2.yz, r2.zzwz
-            imul null, r1.xy, r1.xyxx, cb0[2].xyxx
-            iadd r1.x, r1.y, r1.x
-            imad r1.x, r2.y, cb0[2].z, r1.x
-            imad r1.x, r2.z, cb0[2].w, r1.x
-            ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r1.x, r1.x, l(0), u0.xxxx
-            and r8.w, r1.x, l(0x3f800000)
-          else 
-            ieq r1.x, cb0[0].w, l(2)
-            if_nz r1.x
-              movc r1.x, cb0[7].x, l(-0.000000), l(0.500000)
-              add r1.xy, r0.zwzz, r1.xxxx
-              lt r2.y, r2.x, r0.y
-              add r2.z, r0.y, -r2.x
-              div r2.w, r2.z, r1.x
-              ftou r2.w, r2.w
-              utof r3.z, r2.w
-              mad r2.z, -r3.z, r1.x, r2.z
-              and r2.w, r2.w, l(1)
-              add r3.z, r0.y, r2.z
-              add r2.z, r0.z, -r2.z
-              movc r2.z, r2.w, r2.z, r3.z
-              lt r2.w, r0.z, r2.x
-              add r3.z, -r0.z, r2.x
-              div r7.x, r3.z, r1.x
-              ftou r7.x, r7.x
-              utof r7.y, r7.x
-              mad r1.x, -r7.y, r1.x, r3.z
-              and r3.z, r7.x, l(1)
-              add r0.z, r0.z, -r1.x
-              add r1.x, r0.y, r1.x
-              movc r0.z, r3.z, r1.x, r0.z
-              movc r0.z, r2.w, r0.z, r2.x
-              movc r0.z, r2.y, r2.z, r0.z
-              lt r1.x, r3.w, r0.y
-              add r2.x, r0.y, -r3.w
-              div r2.y, r2.x, r1.y
-              ftou r2.y, r2.y
-              utof r2.z, r2.y
-              mad r2.x, -r2.z, r1.y, r2.x
-              and r2.y, r2.y, l(1)
-              add r2.z, r0.y, r2.x
-              add r2.x, r0.w, -r2.x
-              movc r2.x, r2.y, r2.x, r2.z
-              lt r2.y, r0.w, r3.w
-              add r2.z, -r0.w, r3.w
-              div r2.w, r2.z, r1.y
-              ftou r2.w, r2.w
-              utof r3.z, r2.w
-              mad r1.y, -r3.z, r1.y, r2.z
-              and r2.z, r2.w, l(1)
-              add r0.w, r0.w, -r1.y
-              add r0.y, r0.y, r1.y
-              movc r0.y, r2.z, r0.y, r0.w
-              movc r0.y, r2.y, r0.y, r3.w
-              movc r0.y, r1.x, r2.x, r0.y
-              ftou r0.yz, r0.yyzy
-              utof r2.zw, r0.yyyz
-              ftou r0.yz, r3.xxyx
-              ftou r1.xy, r2.zwzz
-              imul null, r0.yz, r0.yyzy, cb0[2].xxyx
-              iadd r0.y, r0.z, r0.y
-              imad r0.y, r1.x, cb0[2].z, r0.y
-              imad r0.y, r1.y, cb0[2].w, r0.y
-              ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r0.y, r0.y, l(0), u0.xxxx
-              and r8.w, r0.y, l(0x3f800000)
-            else 
-              mov r8.w, l(0)
-            endif 
-          endif 
-        endif 
-        frc r0.yz, r1.wwzw
-        mul r1.xy, r0.yzyy, r0.yzyy
-        mul r2.xw, r0.yyyz, r1.xxxy
-        dp2 r3.y, l(-0.750000, 0.750000, 0.000000, 0.000000), r5.xyxx
-        dp4 r3.z, l(1.500000, 1.500000, -2.250000, -0.750000), r5.xyzw
-        dp4 r3.w, l(-0.750000, -1.250000, 1.250000, 0.750000), r5.xyzw
-        mov r7.x, l(1.000000)
-        mov r7.y, r0.y
-        mov r7.z, r1.x
-        mov r7.w, r2.x
-        mov r3.x, r5.z
-        dp4 r3.x, r7.xyzw, r3.xyzw
-        dp2 r5.y, l(-0.750000, 0.750000, 0.000000, 0.000000), r4.xyxx
-        dp4 r5.z, l(1.500000, 1.500000, -2.250000, -0.750000), r4.xyzw
-        dp4 r5.w, l(-0.750000, -1.250000, 1.250000, 0.750000), r4.xyzw
-        mov r5.x, r4.z
-        dp4 r3.y, r7.xyzw, r5.xyzw
-        dp2 r4.y, l(-0.750000, 0.750000, 0.000000, 0.000000), r6.xyxx
-        dp4 r4.z, l(1.500000, 1.500000, -2.250000, -0.750000), r6.xyzw
-        dp4 r4.w, l(-0.750000, -1.250000, 1.250000, 0.750000), r6.xyzw
-        mov r4.x, r6.z
-        dp4 r3.z, r7.xyzw, r4.xyzw
-        dp2 r4.y, l(-0.750000, 0.750000, 0.000000, 0.000000), r8.xyxx
-        dp4 r4.z, l(1.500000, 1.500000, -2.250000, -0.750000), r8.xyzw
-        dp4 r4.w, l(-0.750000, -1.250000, 1.250000, 0.750000), r8.xyzw
-        mov r4.x, r8.z
-        dp4 r3.w, r7.xyzw, r4.xyzw
-        dp2 r4.y, l(-0.750000, 0.750000, 0.000000, 0.000000), r3.xzxx
-        dp4 r4.z, l(1.500000, -2.250000, 1.500000, -0.750000), r3.xyzw
-        dp4 r4.w, l(-0.750000, 1.250000, -1.250000, 0.750000), r3.xyzw
-        mov r2.x, l(1.000000)
-        mov r2.y, r0.z
-        mov r2.z, r1.y
-        mov r4.x, r3.y
-        dp4 r0.y, r2.xyzw, r4.xyzw
-        ne r0.y, l(0.000000, 0.000000, 0.000000, 0.000000), r0.y
-        store_structured u2.x, r0.x, l(0), r0.y
-      endif 
-    endif 
-  endif 
-endif 
-ret 
-// Approximately 0 instruction slots used
-#endif
-
-const BYTE g_GridSample[] =
-{
-     68,  88,  66,  67, 100,   8, 
-    243, 250,  58, 235,  43,  73, 
-    237, 134, 187,  13,  90, 216, 
-    193,  79,   1,   0,   0,   0, 
-    220, 217,   0,   0,   3,   0, 
-      0,   0,  44,   0,   0,   0, 
-     60,   0,   0,   0,  76,   0, 
-      0,   0,  73,  83,  71,  78, 
-      8,   0,   0,   0,   0,   0, 
-      0,   0,   8,   0,   0,   0, 
-     79,  83,  71,  78,   8,   0, 
-      0,   0,   0,   0,   0,   0, 
-      8,   0,   0,   0,  83,  72, 
-     69,  88, 136, 217,   0,   0, 
-     80,   0,   5,   0,  98,  54, 
-      0,   0, 106,   8,   0,   1, 
-     89,   0,   0,   4,  70, 142, 
-     32,   0,   0,   0,   0,   0, 
-      8,   0,   0,   0, 158,   0, 
-      0,   4,   0, 224,  17,   0, 
-      0,   0,   0,   0,   4,   0, 
-      0,   0, 158,   0,   0,   4, 
-      0, 224,  17,   0,   1,   0, 
-      0,   0,   4,   0,   0,   0, 
-    158,   0,   0,   4,   0, 224, 
-     17,   0,   2,   0,   0,   0, 
-      4,   0,   0,   0,  95,   0, 
-      0,   2,  18,   0,   2,   0, 
-    104,   0,   0,   2,  12,   0, 
-      0,   0, 155,   0,   0,   4, 
-     64,   0,   0,   0,   1,   0, 
-      0,   0,   1,   0,   0,   0, 
-     30,   0,   0,   7,  18,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,   2,   0,  10, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,  79,   0, 
-      0,   8,  34,   0,  16,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   0,   0,   0,   0, 
-     26, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     31,   0,   4,   3,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     86,   0,   0,   6, 242,   0, 
-     16,   0,   1,   0,   0,   0, 
-    182, 139,  32,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,  10, 242,   0, 
-     16,   0,   2,   0,   0,   0, 
-      6,   5,  16,   0,   1,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0, 128, 191,   0,   0, 
-      0, 191,   0,   0,   0, 191, 
-      0,   0, 128, 191,  54,   0, 
-      0,   5,  18,   0,  16,   0, 
-      3,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     54,   0,   0,   5,  98,   0, 
-     16,   0,   3,   0,   0,   0, 
-      6,   3,  16,   0,   2,   0, 
-      0,   0,  54,   0,   0,   5, 
-     18,   0,  16,   0,   2,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0, 191,  55,   0, 
-      0,  10, 226,   0,  16,   0, 
-      0,   0,   0,   0,   6, 128, 
-     32,   0,   0,   0,   0,   0, 
-      7,   0,   0,   0,   6,   9, 
-     16,   0,   3,   0,   0,   0, 
-      6,   9,  16,   0,   2,   0, 
-      0,   0,  38,   0,   0,  10, 
-      0, 208,   0,   0,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   5,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   5,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  34,   0,  16,   0, 
-      1,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26, 128,  32,   0,   0,   0, 
-      0,   0,   5,   0,   0,   0, 
-     78,   0,   0,   8,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-      0, 208,   0,   0,  10,   0, 
-     16,   0,   0,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  35,   0,   0,  11, 
-     18,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   1,   0, 
-      0,   0,  10, 128,  32,   0, 
-      0,   0,   0,   0,   6,   0, 
-      0,   0,  10,   0,  16,   0, 
-      0,   0,   0,   0,  78,   0, 
-      0,   8,  18,   0,  16,   0, 
-      1,   0,   0,   0,   0, 208, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     35,   0,   0,  11,  18,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   1,   0,   0,   0, 
-     26, 128,  32,   0,   0,   0, 
-      0,   0,   6,   0,   0,   0, 
-     10,   0,  16,   0,   2,   0, 
-      0,   0,  78,   0,   0,   9, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,   0, 208,   0,   0, 
-     10,   0,  16,   0,   2,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   5,   0, 
-      0,   0,  35,   0,   0,  11, 
-     18,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   2,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   6,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,  86,   0, 
-      0,   5,  18,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     86,   0,   0,   5,  98,   0, 
-     16,   0,   3,   0,   0,   0, 
-     86,   4,  16,   0,   2,   0, 
-      0,   0,  86,   0,   0,   6, 
-    242,   0,  16,   0,   2,   0, 
-      0,   0,  70, 142,  32,   0, 
-      0,   0,   0,   0,   4,   0, 
-      0,   0,  16,   0,   0,   7, 
-     18,   0,  16,   0,   2,   0, 
-      0,   0,  70,   2,  16,   0, 
-      3,   0,   0,   0,  70,   2, 
-     16,   0,   2,   0,   0,   0, 
-      0,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  18,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-    167,   0,   0, 139,   2,  35, 
-      0, 128, 131, 153,  25,   0, 
-     18,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 224,  17,   0,   1,   0, 
-      0,   0,  28,   0,   0,   5, 
-     18,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0, 167,   0, 
-      0, 139,   2,  35,   0, 128, 
-    131, 153,  25,   0,  34,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   2,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,   6, 224, 
-     17,   0,   1,   0,   0,   0, 
-     32,   0,   0,   8,  18,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10, 128,  32,   0,   0,   0, 
-      0,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,  10, 
-     98,   0,  16,   0,   2,   0, 
-      0,   0,   6,   1,  16,   0, 
-      3,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0, 128,  63,   0,   0, 
-    128,  63,   0,   0,   0,   0, 
-     56,   0,   0,  10,  50,   0, 
-     16,   0,   3,   0,   0,   0, 
-    150,   5,  16,   0,   2,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,  63,   0,   0, 
-      0,  63,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,  10, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   1,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0, 128, 191,   0,   0, 
-    128, 191,  56,   0,   0,   7, 
-     50,   0,  16,   0,   3,   0, 
-      0,   0, 230,  10,  16,   0, 
-      3,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     50,   0,   0,  12, 194,   0, 
-     16,   0,   1,   0,   0,   0, 
-     86,   9,  16,   0,   2,   0, 
-      0,   0, 166,  14,  16,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-    128, 191,   0,   0, 128, 191, 
-     56,   0,   0,  10, 194,   0, 
-     16,   0,   1,   0,   0,   0, 
-    166,  14,  16,   0,   1,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,  63, 
-      0,   0,   0,  63,  55,   0, 
-      0,   9, 194,   0,  16,   0, 
-      1,   0,   0,   0,   6,   0, 
-     16,   0,   2,   0,   0,   0, 
-      6,   4,  16,   0,   3,   0, 
-      0,   0, 166,  14,  16,   0, 
-      1,   0,   0,   0,  32,   0, 
-      0,   8,  18,   0,  16,   0, 
-      2,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-     64,   0,   0,   5,  98,   0, 
-     16,   0,   2,   0,   0,   0, 
-    166,  11,  16,   0,   1,   0, 
-      0,   0,  55,   0,   0,   9, 
-    194,   0,  16,   0,   1,   0, 
-      0,   0,   6,   0,  16,   0, 
-      2,   0,   0,   0,  86,   9, 
-     16,   0,   2,   0,   0,   0, 
-    166,  14,  16,   0,   1,   0, 
-      0,   0,  49,   0,   0,   7, 
-     98,   0,  16,   0,   2,   0, 
-      0,   0, 166,  11,  16,   0, 
-      1,   0,   0,   0,  86,   5, 
-     16,   0,   0,   0,   0,   0, 
-     49,   0,   0,   7,  50,   0, 
-     16,   0,   3,   0,   0,   0, 
-    230,  10,  16,   0,   0,   0, 
-      0,   0, 230,  10,  16,   0, 
-      1,   0,   0,   0,  60,   0, 
-      0,   7, 130,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   3,   0, 
-      0,   0,  60,   0,   0,   7, 
-    130,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   2,   0,   0,   0, 
-     60,   0,   0,   7, 130,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,  31,   0, 
-      4,   3,  58,   0,  16,   0, 
-      2,   0,   0,   0,  32,   0, 
-      0,   8, 130,   0,  16,   0, 
-      2,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-     31,   0,   4,   3,  58,   0, 
-     16,   0,   2,   0,   0,   0, 
-     30,   0,   0,  11, 194,   0, 
-     16,   0,   3,   0,   0,   0, 
-    246, 139,  32,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-    255, 255, 255, 255, 255, 255, 
-    255, 255,  86,   0,   0,   5, 
-    194,   0,  16,   0,   3,   0, 
-      0,   0, 166,  14,  16,   0, 
-      3,   0,   0,   0,  52,   0, 
-      0,  10,  50,   0,  16,   0, 
-      4,   0,   0,   0, 230,  10, 
-     16,   0,   1,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,  51,   0,   0,   7, 
-    194,   0,  16,   0,   1,   0, 
-      0,   0, 166,  14,  16,   0, 
-      3,   0,   0,   0,   6,   4, 
-     16,   0,   4,   0,   0,   0, 
-     18,   0,   0,   1,  32,   0, 
-      0,   8, 130,   0,  16,   0, 
-      2,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   2,   0,   0,   0, 
-     31,   0,   4,   3,  58,   0, 
-     16,   0,   2,   0,   0,   0, 
-     55,   0,   0,  10, 130,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10, 128,  32,   0,   0,   0, 
-      0,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0, 128,   1,  64,   0,   0, 
-      0,   0,   0,  63,   0,   0, 
-      0,   7, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   0,   0,   0,   0, 
-    246,  15,  16,   0,   2,   0, 
-      0,   0,   0,   0,   0,   8, 
-     50,   0,  16,   0,   4,   0, 
-      0,   0,  86,   5,  16,   0, 
-      0,   0,   0,   0, 230,  10, 
-     16, 128,  65,   0,   0,   0, 
-      1,   0,   0,   0,  14,   0, 
-      0,   7, 194,   0,  16,   0, 
-      4,   0,   0,   0,   6,   4, 
-     16,   0,   4,   0,   0,   0, 
-    166,  14,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-    194,   0,  16,   0,   4,   0, 
-      0,   0, 166,  14,  16,   0, 
-      4,   0,   0,   0,  86,   0, 
-      0,   5,  50,   0,  16,   0, 
-      5,   0,   0,   0, 230,  10, 
-     16,   0,   4,   0,   0,   0, 
-     50,   0,   0,  10,  50,   0, 
-     16,   0,   4,   0,   0,   0, 
-     70,   0,  16, 128,  65,   0, 
-      0,   0,   5,   0,   0,   0, 
-    230,  10,  16,   0,   3,   0, 
-      0,   0,  70,   0,  16,   0, 
-      4,   0,   0,   0,   1,   0, 
-      0,  10, 194,   0,  16,   0, 
-      4,   0,   0,   0, 166,  14, 
-     16,   0,   4,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   7, 
-     50,   0,  16,   0,   5,   0, 
-      0,   0,  86,   5,  16,   0, 
-      0,   0,   0,   0,  70,   0, 
-     16,   0,   4,   0,   0,   0, 
-      0,   0,   0,   8,  50,   0, 
-     16,   0,   4,   0,   0,   0, 
-    230,  10,  16,   0,   0,   0, 
-      0,   0,  70,   0,  16, 128, 
-     65,   0,   0,   0,   4,   0, 
-      0,   0,  55,   0,   0,   9, 
-     50,   0,  16,   0,   4,   0, 
-      0,   0, 230,  10,  16,   0, 
-      4,   0,   0,   0,  70,   0, 
-     16,   0,   4,   0,   0,   0, 
-     70,   0,  16,   0,   5,   0, 
-      0,   0,   0,   0,   0,   8, 
-    194,   0,  16,   0,   4,   0, 
-      0,   0, 166,  14,  16, 128, 
-     65,   0,   0,   0,   0,   0, 
-      0,   0, 166,  14,  16,   0, 
-      1,   0,   0,   0,  14,   0, 
-      0,   7,  50,   0,  16,   0, 
-      5,   0,   0,   0, 230,  10, 
-     16,   0,   4,   0,   0,   0, 
-    230,  10,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   5,   0, 
-      0,   0,  70,   0,  16,   0, 
-      5,   0,   0,   0,  86,   0, 
-      0,   5, 194,   0,  16,   0, 
-      5,   0,   0,   0,   6,   4, 
-     16,   0,   5,   0,   0,   0, 
-     50,   0,   0,  10, 194,   0, 
-     16,   0,   3,   0,   0,   0, 
-    166,  14,  16, 128,  65,   0, 
-      0,   0,   5,   0,   0,   0, 
-    166,  14,  16,   0,   3,   0, 
-      0,   0, 166,  14,  16,   0, 
-      4,   0,   0,   0,   1,   0, 
-      0,  10, 194,   0,  16,   0, 
-      4,   0,   0,   0,   6,   4, 
-     16,   0,   5,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   8, 
-     50,   0,  16,   0,   5,   0, 
-      0,   0, 230,  10,  16,   0, 
-      0,   0,   0,   0, 230,  10, 
-     16, 128,  65,   0,   0,   0, 
-      3,   0,   0,   0,   0,   0, 
-      0,   7, 194,   0,  16,   0, 
-      3,   0,   0,   0,  86,   5, 
-     16,   0,   0,   0,   0,   0, 
-    166,  14,  16,   0,   3,   0, 
-      0,   0,  55,   0,   0,   9, 
-    194,   0,  16,   0,   3,   0, 
-      0,   0, 166,  14,  16,   0, 
-      4,   0,   0,   0, 166,  14, 
-     16,   0,   3,   0,   0,   0, 
-      6,   4,  16,   0,   5,   0, 
-      0,   0,  55,   0,   0,   9, 
-     50,   0,  16,   0,   3,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0, 230,  10, 
-     16,   0,   3,   0,   0,   0, 
-    230,  10,  16,   0,   1,   0, 
-      0,   0,  55,   0,   0,   9, 
-    194,   0,  16,   0,   1,   0, 
-      0,   0,  86,   9,  16,   0, 
-      2,   0,   0,   0,   6,   4, 
-     16,   0,   4,   0,   0,   0, 
-      6,   4,  16,   0,   3,   0, 
-      0,   0,  21,   0,   0,   1, 
-     21,   0,   0,   1,  21,   0, 
-      0,   1,  86,   0,   0,   5, 
-     50,   0,  16,   0,   3,   0, 
-      0,   0,  22,   5,  16,   0, 
-      1,   0,   0,   0,  31,   0, 
-      4,   3,  10,   0,  16,   0, 
-      2,   0,   0,   0,  31,   0, 
-      0,   4,  58, 128,  32,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,  29,   0,   0,  10, 
-     50,   0,  16,   0,   1,   0, 
-      0,   0, 230,  10,  16,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     28,   0,   0,   5, 242,   0, 
-     16,   0,   2,   0,   0,   0, 
-    230,  11,  16,   0,   1,   0, 
-      0,   0,  79,   0,   0,   8, 
-     50,   0,  16,   0,   2,   0, 
-      0,   0,  70,   0,  16,   0, 
-      2,   0,   0,   0, 182, 143, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   1,   0, 
-      0,   7,  18,   0,  16,   0, 
-      1,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   2,   0, 
-      0,   0,   1,   0,   0,   7, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,   0,   0,   7,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      2,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      2,   0,   0,   0,  70,   0, 
-     16,   0,   2,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      1,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   1,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-    167,   0,   0, 139,   2,  35, 
-      0, 128, 131, 153,  25,   0, 
-     34,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 224,  17,   0,   0,   0, 
-      0,   0,   1,   0,   0,   7, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,   0,   0,   7,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0, 128,  63,  18,   0, 
-      0,   1,  32,   0,   0,   8, 
-     34,   0,  16,   0,   1,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,  31,   0, 
-      4,   3,  26,   0,  16,   0, 
-      1,   0,   0,   0,  30,   0, 
-      0,  11,  50,   0,  16,   0, 
-      2,   0,   0,   0, 230, 138, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0, 255, 255, 255, 255, 
-    255, 255, 255, 255,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     86,   0,   0,   5,  50,   0, 
-     16,   0,   2,   0,   0,   0, 
-     70,   0,  16,   0,   2,   0, 
-      0,   0,  52,   0,   0,  10, 
-    194,   0,  16,   0,   2,   0, 
-      0,   0, 246,  11,  16,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7,  50,   0, 
-     16,   0,   2,   0,   0,   0, 
-     70,   0,  16,   0,   2,   0, 
-      0,   0, 230,  10,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      2,   0,   0,   0,  70,   0, 
-     16,   0,   2,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   2,   0,   0,   0, 
-      6,   4,  16,   0,   3,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0, 194,   0, 
-     16,   0,   2,   0,   0,   0, 
-    166,  14,  16,   0,   2,   0, 
-      0,   0,   6, 132,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     34,   0,  16,   0,   1,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   2,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0,  34,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7,  18,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-    128,  63,  18,   0,   0,   1, 
-     32,   0,   0,   8,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,  64,   0,   0,   2,   0, 
-      0,   0,  31,   0,   4,   3, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  55,   0,   0,  10, 
-     34,   0,  16,   0,   1,   0, 
-      0,   0,  10, 128,  32,   0, 
-      0,   0,   0,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0, 128,   1,  64, 
-      0,   0,   0,   0,   0,  63, 
-      0,   0,   0,   7,  50,   0, 
-     16,   0,   2,   0,   0,   0, 
-    182,  15,  16,   0,   0,   0, 
-      0,   0,  86,   5,  16,   0, 
-      1,   0,   0,   0,  49,   0, 
-      0,   7, 194,   0,  16,   0, 
-      2,   0,   0,   0, 246,  11, 
-     16,   0,   1,   0,   0,   0, 
-     86,   5,  16,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-    194,   0,  16,   0,   3,   0, 
-      0,   0,  86,   5,  16,   0, 
-      0,   0,   0,   0, 246,  11, 
-     16, 128,  65,   0,   0,   0, 
-      1,   0,   0,   0,  14,   0, 
-      0,   7,  50,   0,  16,   0, 
-      4,   0,   0,   0, 230,  10, 
-     16,   0,   3,   0,   0,   0, 
-     70,   0,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   4,   0, 
-      0,   0,  70,   0,  16,   0, 
-      4,   0,   0,   0,  86,   0, 
-      0,   5, 194,   0,  16,   0, 
-      4,   0,   0,   0,   6,   4, 
-     16,   0,   4,   0,   0,   0, 
-     50,   0,   0,  10, 194,   0, 
-     16,   0,   3,   0,   0,   0, 
-    166,  14,  16, 128,  65,   0, 
-      0,   0,   4,   0,   0,   0, 
-      6,   4,  16,   0,   2,   0, 
-      0,   0, 166,  14,  16,   0, 
-      3,   0,   0,   0,   1,   0, 
-      0,  10,  50,   0,  16,   0, 
-      4,   0,   0,   0,  70,   0, 
-     16,   0,   4,   0,   0,   0, 
-      2,  64,   0,   0,   1,   0, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   7, 
-    194,   0,  16,   0,   4,   0, 
-      0,   0,  86,   5,  16,   0, 
-      0,   0,   0,   0, 166,  14, 
-     16,   0,   3,   0,   0,   0, 
-      0,   0,   0,   8, 194,   0, 
-     16,   0,   3,   0,   0,   0, 
-    246,  11,  16,   0,   0,   0, 
-      0,   0, 166,  14,  16, 128, 
-     65,   0,   0,   0,   3,   0, 
-      0,   0,  55,   0,   0,   9, 
-    194,   0,  16,   0,   3,   0, 
-      0,   0,   6,   4,  16,   0, 
-      4,   0,   0,   0, 166,  14, 
-     16,   0,   3,   0,   0,   0, 
-    166,  14,  16,   0,   4,   0, 
-      0,   0,  49,   0,   0,   7, 
-     50,   0,  16,   0,   4,   0, 
-      0,   0, 182,  15,  16,   0, 
-      0,   0,   0,   0, 182,  15, 
-     16,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8, 194,   0, 
-     16,   0,   4,   0,   0,   0, 
-    246,  11,  16, 128,  65,   0, 
-      0,   0,   0,   0,   0,   0, 
-    246,  11,  16,   0,   1,   0, 
-      0,   0,  14,   0,   0,   7, 
-     50,   0,  16,   0,   5,   0, 
-      0,   0, 230,  10,  16,   0, 
-      4,   0,   0,   0,  70,   0, 
-     16,   0,   2,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   5,   0,   0,   0, 
-     70,   0,  16,   0,   5,   0, 
-      0,   0,  86,   0,   0,   5, 
-    194,   0,  16,   0,   5,   0, 
-      0,   0,   6,   4,  16,   0, 
-      5,   0,   0,   0,  50,   0, 
-      0,  10,  50,   0,  16,   0, 
-      2,   0,   0,   0, 230,  10, 
-     16, 128,  65,   0,   0,   0, 
-      5,   0,   0,   0,  70,   0, 
-     16,   0,   2,   0,   0,   0, 
-    230,  10,  16,   0,   4,   0, 
-      0,   0,   1,   0,   0,  10, 
-    194,   0,  16,   0,   4,   0, 
-      0,   0,   6,   4,  16,   0, 
-      5,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8,  50,   0, 
-     16,   0,   5,   0,   0,   0, 
-    182,  15,  16,   0,   0,   0, 
-      0,   0,  70,   0,  16, 128, 
-     65,   0,   0,   0,   2,   0, 
-      0,   0,   0,   0,   0,   7, 
-     50,   0,  16,   0,   2,   0, 
-      0,   0,  86,   5,  16,   0, 
-      0,   0,   0,   0,  70,   0, 
-     16,   0,   2,   0,   0,   0, 
-     55,   0,   0,   9,  50,   0, 
-     16,   0,   2,   0,   0,   0, 
-    230,  10,  16,   0,   4,   0, 
-      0,   0,  70,   0,  16,   0, 
-      2,   0,   0,   0,  70,   0, 
-     16,   0,   5,   0,   0,   0, 
-     55,   0,   0,   9,  50,   0, 
-     16,   0,   2,   0,   0,   0, 
-     70,   0,  16,   0,   4,   0, 
-      0,   0,  70,   0,  16,   0, 
-      2,   0,   0,   0, 182,  15, 
-     16,   0,   1,   0,   0,   0, 
-     55,   0,   0,   9,  50,   0, 
-     16,   0,   2,   0,   0,   0, 
-    230,  10,  16,   0,   2,   0, 
-      0,   0, 230,  10,  16,   0, 
-      3,   0,   0,   0,  70,   0, 
-     16,   0,   2,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   2,   0,   0,   0, 
-     70,   0,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-    194,   0,  16,   0,   2,   0, 
-      0,   0,   6,   4,  16,   0, 
-      3,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-    194,   0,  16,   0,   2,   0, 
-      0,   0, 166,  14,  16,   0, 
-      2,   0,   0,   0,   6, 132, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7,  34,   0,  16,   0, 
-      1,   0,   0,   0,  58,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0, 167,   0, 
-      0, 139,   2,  35,   0, 128, 
-    131, 153,  25,   0,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,   6, 224, 
-     17,   0,   0,   0,   0,   0, 
-      1,   0,   0,   7,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0, 128,  63,  18,   0, 
-      0,   1,  54,   0,   0,   5, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  21,   0, 
-      0,   1,  21,   0,   0,   1, 
-     21,   0,   0,   1,  57,   0, 
-      0,  10,  18,   0,  16,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0, 168,   0,   0,   9, 
-     18, 224,  17,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  18,   0,   0,   1, 
-     31,   0,   0,   4,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,  65,   0, 
-      0,   5,  50,   0,  16,   0, 
-      1,   0,   0,   0, 182,  15, 
-     16,   0,   1,   0,   0,   0, 
-      0,   0,   0,  10,  50,   0, 
-     16,   0,   2,   0,   0,   0, 
-     70,   0,  16,   0,   1,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0, 128,  63,   0,   0, 
-    128,  63,   0,   0,   0,   0, 
-      0,   0,   0,   0,  31,   0, 
-      0,   4,  58, 128,  32,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,  29,   0,   0,  10, 
-    194,   0,  16,   0,   2,   0, 
-      0,   0,  86,   1,  16,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   3,   0,   0,   0, 
-     86,   1,  16,   0,   1,   0, 
-      0,   0,  79,   0,   0,   8, 
-    194,   0,  16,   0,   3,   0, 
-      0,   0, 166,  14,  16,   0, 
-      3,   0,   0,   0, 246, 139, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      4,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   4,   0,   0,   0, 
-      6,   4,  16,   0,   1,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   4,   0,   0,   0, 
-     70,   0,  16,   0,   4,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     18,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  10,   0, 
-     16,   0,   4,   0,   0,   0, 
-     35,   0,   0,  10,  18,   0, 
-     16,   0,   4,   0,   0,   0, 
-     42,   0,  16,   0,   4,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      4,   0,   0,   0,  35,   0, 
-      0,  10,  18,   0,  16,   0, 
-      4,   0,   0,   0,  58,   0, 
-     16,   0,   4,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   4,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0,  18,   0,  16,   0, 
-      4,   0,   0,   0,  10,   0, 
-     16,   0,   4,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   4,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0, 128,  63, 
-     29,   0,   0,  10,  50,   0, 
-     16,   0,   4,   0,   0,   0, 
-     22,   5,  16,   0,   2,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,  28,   0, 
-      0,   5, 194,   0,  16,   0, 
-      4,   0,   0,   0,  86,   1, 
-     16,   0,   2,   0,   0,   0, 
-     79,   0,   0,   8, 194,   0, 
-     16,   0,   4,   0,   0,   0, 
-    166,  14,  16,   0,   4,   0, 
-      0,   0, 246, 139,  32,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   0,   1,   0,   0,   7, 
-     18,   0,  16,   0,   4,   0, 
-      0,   0,  42,   0,  16,   0, 
-      4,   0,   0,   0,  10,   0, 
-     16,   0,   4,   0,   0,   0, 
-      1,   0,   0,   7, 130,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      4,   0,   0,   0,   1,   0, 
-      0,   7, 130,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   5,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5, 130,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     28,   0,   0,   5,  66,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   5,   0,   0,   0, 
-     70,   0,  16,   0,   5,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     18,   0,  16,   0,   5,   0, 
-      0,   0,  26,   0,  16,   0, 
-      5,   0,   0,   0,  10,   0, 
-     16,   0,   5,   0,   0,   0, 
-     35,   0,   0,  10, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      5,   0,   0,   0,  35,   0, 
-      0,  10, 130,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   4,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0, 130,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7, 130,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,   1,   0,   0,   7, 
-    130,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0, 128,  63, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   4,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   5,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5, 130,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-     28,   0,   0,   5,  66,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   5,   0,   0,   0, 
-     70,   0,  16,   0,   5,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     18,   0,  16,   0,   5,   0, 
-      0,   0,  26,   0,  16,   0, 
-      5,   0,   0,   0,  10,   0, 
-     16,   0,   5,   0,   0,   0, 
-     35,   0,   0,  10, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      5,   0,   0,   0,  35,   0, 
-      0,  10, 130,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   4,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0, 130,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0, 128,  63, 
-      1,   0,   0,   7, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,  10,   0,  16,   0, 
-      4,   0,   0,   0,   1,   0, 
-      0,   7, 130,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   4,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   4,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5, 194,   0,  16,   0, 
-      4,   0,   0,   0,   6,   4, 
-     16,   0,   2,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      4,   0,   0,   0,  70,   0, 
-     16,   0,   4,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  18,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,  10,   0,  16,   0, 
-      4,   0,   0,   0,  35,   0, 
-      0,  10,  18,   0,  16,   0, 
-      4,   0,   0,   0,  42,   0, 
-     16,   0,   4,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   4,   0, 
-      0,   0,  35,   0,   0,  10, 
-     18,   0,  16,   0,   4,   0, 
-      0,   0,  58,   0,  16,   0, 
-      4,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   4,   0,   0,   0, 
-    167,   0,   0, 139,   2,  35, 
-      0, 128, 131, 153,  25,   0, 
-     18,   0,  16,   0,   4,   0, 
-      0,   0,  10,   0,  16,   0, 
-      4,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 224,  17,   0,   0,   0, 
-      0,   0,   1,   0,   0,   7, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   4,   0,   0,   0, 
-      1,   0,   0,   7, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0, 128,  63,  18,   0, 
-      0,   1,  32,   0,   0,   8, 
-     18,   0,  16,   0,   4,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,  31,   0, 
-      4,   3,  10,   0,  16,   0, 
-      4,   0,   0,   0,  30,   0, 
-      0,  11,  98,   0,  16,   0, 
-      4,   0,   0,   0, 166, 139, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-    255, 255, 255, 255, 255, 255, 
-    255, 255,   0,   0,   0,   0, 
-     86,   0,   0,   5,  98,   0, 
-     16,   0,   4,   0,   0,   0, 
-     86,   6,  16,   0,   4,   0, 
-      0,   0,  52,   0,   0,  10, 
-     50,   0,  16,   0,   5,   0, 
-      0,   0,  70,   0,  16,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7,  98,   0, 
-     16,   0,   4,   0,   0,   0, 
-     86,   6,  16,   0,   4,   0, 
-      0,   0,   6,   1,  16,   0, 
-      5,   0,   0,   0,  28,   0, 
-      0,   5,  98,   0,  16,   0, 
-      4,   0,   0,   0,  86,   6, 
-     16,   0,   4,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   5,   0,   0,   0, 
-     70,   0,  16,   0,   3,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   5,   0,   0,   0, 
-     70,   0,  16,   0,   5,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-    130,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      5,   0,   0,   0,  10,   0, 
-     16,   0,   5,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      4,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      4,   0,   0,   0,  42,   0, 
-     16,   0,   4,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0,  34,   0,  16,   0, 
-      4,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-    128,  63,  18,   0,   0,   1, 
-     32,   0,   0,   8,  34,   0, 
-     16,   0,   4,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,  64,   0,   0,   2,   0, 
-      0,   0,  31,   0,   4,   3, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,  55,   0,   0,  10, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,  10, 128,  32,   0, 
-      0,   0,   0,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0, 128,   1,  64, 
-      0,   0,   0,   0,   0,  63, 
-      0,   0,   0,   7,  98,   0, 
-     16,   0,   4,   0,   0,   0, 
-    246,  14,  16,   0,   0,   0, 
-      0,   0,  86,   5,  16,   0, 
-      4,   0,   0,   0,  49,   0, 
-      0,   7,  50,   0,  16,   0, 
-      5,   0,   0,   0,  70,   0, 
-     16,   0,   1,   0,   0,   0, 
-     86,   5,  16,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-    194,   0,  16,   0,   5,   0, 
-      0,   0,  86,   5,  16,   0, 
-      0,   0,   0,   0,   6,   4, 
-     16, 128,  65,   0,   0,   0, 
-      1,   0,   0,   0,  14,   0, 
-      0,   7,  50,   0,  16,   0, 
-      6,   0,   0,   0, 230,  10, 
-     16,   0,   5,   0,   0,   0, 
-    150,   5,  16,   0,   4,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   6,   0, 
-      0,   0,  70,   0,  16,   0, 
-      6,   0,   0,   0,  86,   0, 
-      0,   5, 194,   0,  16,   0, 
-      6,   0,   0,   0,   6,   4, 
-     16,   0,   6,   0,   0,   0, 
-     50,   0,   0,  10, 194,   0, 
-     16,   0,   5,   0,   0,   0, 
-    166,  14,  16, 128,  65,   0, 
-      0,   0,   6,   0,   0,   0, 
-     86,   9,  16,   0,   4,   0, 
-      0,   0, 166,  14,  16,   0, 
-      5,   0,   0,   0,   1,   0, 
-      0,  10,  50,   0,  16,   0, 
-      6,   0,   0,   0,  70,   0, 
-     16,   0,   6,   0,   0,   0, 
-      2,  64,   0,   0,   1,   0, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   7, 
-    194,   0,  16,   0,   6,   0, 
-      0,   0,  86,   5,  16,   0, 
-      0,   0,   0,   0, 166,  14, 
-     16,   0,   5,   0,   0,   0, 
-      0,   0,   0,   8, 194,   0, 
-     16,   0,   5,   0,   0,   0, 
-    246,  11,  16,   0,   0,   0, 
-      0,   0, 166,  14,  16, 128, 
-     65,   0,   0,   0,   5,   0, 
-      0,   0,  55,   0,   0,   9, 
-    194,   0,  16,   0,   5,   0, 
-      0,   0,   6,   4,  16,   0, 
-      6,   0,   0,   0, 166,  14, 
-     16,   0,   5,   0,   0,   0, 
-    166,  14,  16,   0,   6,   0, 
-      0,   0,  49,   0,   0,   7, 
-     50,   0,  16,   0,   6,   0, 
-      0,   0, 182,  15,  16,   0, 
-      0,   0,   0,   0,  70,   0, 
-     16,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8, 194,   0, 
-     16,   0,   6,   0,   0,   0, 
-    246,  11,  16, 128,  65,   0, 
-      0,   0,   0,   0,   0,   0, 
-      6,   4,  16,   0,   1,   0, 
-      0,   0,  14,   0,   0,   7, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0, 230,  10,  16,   0, 
-      6,   0,   0,   0, 150,   5, 
-     16,   0,   4,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-    194,   0,  16,   0,   7,   0, 
-      0,   0,   6,   4,  16,   0, 
-      7,   0,   0,   0,  50,   0, 
-      0,  10,  98,   0,  16,   0, 
-      4,   0,   0,   0, 166,  11, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,  86,   6, 
-     16,   0,   4,   0,   0,   0, 
-    166,  11,  16,   0,   6,   0, 
-      0,   0,   1,   0,   0,  10, 
-    194,   0,  16,   0,   6,   0, 
-      0,   0,   6,   4,  16,   0, 
-      7,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    182,  15,  16,   0,   0,   0, 
-      0,   0, 150,   5,  16, 128, 
-     65,   0,   0,   0,   4,   0, 
-      0,   0,   0,   0,   0,   7, 
-     98,   0,  16,   0,   4,   0, 
-      0,   0,  86,   5,  16,   0, 
-      0,   0,   0,   0,  86,   6, 
-     16,   0,   4,   0,   0,   0, 
-     55,   0,   0,   9,  98,   0, 
-     16,   0,   4,   0,   0,   0, 
-    166,  11,  16,   0,   6,   0, 
-      0,   0,  86,   6,  16,   0, 
-      4,   0,   0,   0,   6,   1, 
-     16,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  98,   0, 
-     16,   0,   4,   0,   0,   0, 
-      6,   1,  16,   0,   6,   0, 
-      0,   0,  86,   6,  16,   0, 
-      4,   0,   0,   0,   6,   1, 
-     16,   0,   1,   0,   0,   0, 
-     55,   0,   0,   9,  98,   0, 
-     16,   0,   4,   0,   0,   0, 
-      6,   1,  16,   0,   5,   0, 
-      0,   0, 166,  11,  16,   0, 
-      5,   0,   0,   0,  86,   6, 
-     16,   0,   4,   0,   0,   0, 
-     28,   0,   0,   5,  98,   0, 
-     16,   0,   4,   0,   0,   0, 
-     86,   6,  16,   0,   4,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   5,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-     50,   0,  16,   0,   5,   0, 
-      0,   0,  70,   0,  16,   0, 
-      5,   0,   0,   0,  70, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7, 130,   0,  16,   0, 
-      4,   0,   0,   0,  26,   0, 
-     16,   0,   5,   0,   0,   0, 
-     10,   0,  16,   0,   5,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   4,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   4,   0,   0,   0, 
-     42,   0,  16,   0,   4,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0, 167,   0, 
-      0, 139,   2,  35,   0, 128, 
-    131, 153,  25,   0,  34,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,   6, 224, 
-     17,   0,   0,   0,   0,   0, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0, 128,  63,  18,   0, 
-      0,   1,  54,   0,   0,   5, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  21,   0, 
-      0,   1,  21,   0,   0,   1, 
-     31,   0,   4,   3,  10,   0, 
-     16,   0,   4,   0,   0,   0, 
-     30,   0,   0,  11,  98,   0, 
-     16,   0,   4,   0,   0,   0, 
-    246, 142,  32,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-      0,   0, 255, 255, 255, 255, 
-    255, 255, 255, 255,   0,   0, 
-      0,   0,  86,   0,   0,   5, 
-     98,   0,  16,   0,   4,   0, 
-      0,   0,  86,   6,  16,   0, 
-      4,   0,   0,   0,  52,   0, 
-      0,   7, 130,   0,  16,   0, 
-      4,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  51,   0,   0,   7, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  58,   0, 
-     16,   0,   4,   0,   0,   0, 
-     52,   0,   0,   7, 130,   0, 
-     16,   0,   4,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  51,   0, 
-      0,   7,  66,   0,  16,   0, 
-      4,   0,   0,   0,  42,   0, 
-     16,   0,   4,   0,   0,   0, 
-     58,   0,  16,   0,   4,   0, 
-      0,   0,  28,   0,   0,   5, 
-     98,   0,  16,   0,   4,   0, 
-      0,   0,  86,   6,  16,   0, 
-      4,   0,   0,   0,  86,   0, 
-      0,   5, 194,   0,  16,   0, 
-      4,   0,   0,   0, 166,   6, 
-     16,   0,   4,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   5,   0,   0,   0, 
-     70,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     98,   0,  16,   0,   4,   0, 
-      0,   0, 166,  11,  16,   0, 
-      4,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-     50,   0,  16,   0,   5,   0, 
-      0,   0,  70,   0,  16,   0, 
-      5,   0,   0,   0,  70, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7, 130,   0,  16,   0, 
-      4,   0,   0,   0,  26,   0, 
-     16,   0,   5,   0,   0,   0, 
-     10,   0,  16,   0,   5,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   4,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   4,   0,   0,   0, 
-     42,   0,  16,   0,   4,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0, 167,   0, 
-      0, 139,   2,  35,   0, 128, 
-    131, 153,  25,   0,  34,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,   6, 224, 
-     17,   0,   0,   0,   0,   0, 
-      1,   0,   0,   7, 130,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0, 128,  63,  18,   0, 
-      0,   1,  32,   0,   0,   8, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   1,  64,   0,   0, 
-      2,   0,   0,   0,  31,   0, 
-      4,   3,  26,   0,  16,   0, 
-      4,   0,   0,   0,  55,   0, 
-      0,  10,  34,   0,  16,   0, 
-      4,   0,   0,   0,  10, 128, 
-     32,   0,   0,   0,   0,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0, 128, 
-      1,  64,   0,   0,   0,   0, 
-      0,  63,   0,   0,   0,   7, 
-     98,   0,  16,   0,   4,   0, 
-      0,   0, 166,  11,  16,   0, 
-      0,   0,   0,   0,  86,   5, 
-     16,   0,   4,   0,   0,   0, 
-     49,   0,   0,   7, 130,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     26,   0,  16, 128,  65,   0, 
-      0,   0,   2,   0,   0,   0, 
-     14,   0,   0,   7,  34,   0, 
-     16,   0,   5,   0,   0,   0, 
-     10,   0,  16,   0,   5,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   5,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   5,   0,   0,   0, 
-     26,   0,  16,   0,   5,   0, 
-      0,   0,  50,   0,   0,  10, 
-     18,   0,  16,   0,   5,   0, 
-      0,   0,  42,   0,  16, 128, 
-     65,   0,   0,   0,   5,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  10,   0, 
-     16,   0,   5,   0,   0,   0, 
-      1,   0,   0,   7,  34,   0, 
-     16,   0,   5,   0,   0,   0, 
-     26,   0,  16,   0,   5,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   7,  66,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16,   0,   5,   0, 
-      0,   0,   0,   0,   0,   8, 
-     18,   0,  16,   0,   5,   0, 
-      0,   0,  42,   0,  16,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16, 128,  65,   0,   0,   0, 
-      5,   0,   0,   0,  55,   0, 
-      0,   9,  18,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   5,   0,   0,   0, 
-     10,   0,  16,   0,   5,   0, 
-      0,   0,  42,   0,  16,   0, 
-      5,   0,   0,   0,  49,   0, 
-      0,   7,  34,   0,  16,   0, 
-      5,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,   0,   0,   0,   8, 
-     66,   0,  16,   0,   5,   0, 
-      0,   0,  42,   0,  16, 128, 
-     65,   0,   0,   0,   0,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  14,   0, 
-      0,   7, 130,   0,  16,   0, 
-      5,   0,   0,   0,  42,   0, 
-     16,   0,   5,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,  28,   0,   0,   5, 
-    130,   0,  16,   0,   5,   0, 
-      0,   0,  58,   0,  16,   0, 
-      5,   0,   0,   0,  86,   0, 
-      0,   5,  18,   0,  16,   0, 
-      6,   0,   0,   0,  58,   0, 
-     16,   0,   5,   0,   0,   0, 
-     50,   0,   0,  10,  34,   0, 
-     16,   0,   4,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   6,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,  42,   0,  16,   0, 
-      5,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      5,   0,   0,   0,  58,   0, 
-     16,   0,   5,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   8, 
-    130,   0,  16,   0,   5,   0, 
-      0,   0,  42,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16, 128,  65,   0,   0,   0, 
-      4,   0,   0,   0,   0,   0, 
-      0,   7,  34,   0,  16,   0, 
-      4,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,  55,   0,   0,   9, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,  42,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-     58,   0,  16,   0,   5,   0, 
-      0,   0,  55,   0,   0,   9, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  55,   0,   0,   9, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,  58,   0,  16,   0, 
-      4,   0,   0,   0,  10,   0, 
-     16,   0,   5,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,  28,   0,   0,   5, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  49,   0, 
-      0,   7, 130,   0,  16,   0, 
-      4,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-     18,   0,  16,   0,   5,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16, 128,  65,   0,   0,   0, 
-      1,   0,   0,   0,  14,   0, 
-      0,   7,  34,   0,  16,   0, 
-      5,   0,   0,   0,  10,   0, 
-     16,   0,   5,   0,   0,   0, 
-     42,   0,  16,   0,   4,   0, 
-      0,   0,  28,   0,   0,   5, 
-     34,   0,  16,   0,   5,   0, 
-      0,   0,  26,   0,  16,   0, 
-      5,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   5,   0,   0,   0, 
-     50,   0,   0,  10,  18,   0, 
-     16,   0,   5,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   5,   0,   0,   0, 
-     42,   0,  16,   0,   4,   0, 
-      0,   0,  10,   0,  16,   0, 
-      5,   0,   0,   0,   1,   0, 
-      0,   7,  34,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   5,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   7, 
-     66,   0,  16,   0,   5,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   5,   0,   0,   0, 
-      0,   0,   0,   8,  18,   0, 
-     16,   0,   5,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   5,   0, 
-      0,   0,  55,   0,   0,   9, 
-     18,   0,  16,   0,   5,   0, 
-      0,   0,  26,   0,  16,   0, 
-      5,   0,   0,   0,  10,   0, 
-     16,   0,   5,   0,   0,   0, 
-     42,   0,  16,   0,   5,   0, 
-      0,   0,  49,   0,   0,   7, 
-     34,   0,  16,   0,   5,   0, 
-      0,   0,  58,   0,  16,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8,  66,   0, 
-     16,   0,   5,   0,   0,   0, 
-     58,   0,  16, 128,  65,   0, 
-      0,   0,   0,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  14,   0,   0,   7, 
-    130,   0,  16,   0,   5,   0, 
-      0,   0,  42,   0,  16,   0, 
-      5,   0,   0,   0,  42,   0, 
-     16,   0,   4,   0,   0,   0, 
-     28,   0,   0,   5, 130,   0, 
-     16,   0,   5,   0,   0,   0, 
-     58,   0,  16,   0,   5,   0, 
-      0,   0,  86,   0,   0,   5, 
-     18,   0,  16,   0,   6,   0, 
-      0,   0,  58,   0,  16,   0, 
-      5,   0,   0,   0,  50,   0, 
-      0,  10,  66,   0,  16,   0, 
-      4,   0,   0,   0,  10,   0, 
-     16, 128,  65,   0,   0,   0, 
-      6,   0,   0,   0,  42,   0, 
-     16,   0,   4,   0,   0,   0, 
-     42,   0,  16,   0,   5,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   5,   0, 
-      0,   0,  58,   0,  16,   0, 
-      5,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8, 130,   0, 
-     16,   0,   5,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16, 128, 
-     65,   0,   0,   0,   4,   0, 
-      0,   0,   0,   0,   0,   7, 
-     66,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16,   0,   4,   0,   0,   0, 
-     55,   0,   0,   9,  66,   0, 
-     16,   0,   4,   0,   0,   0, 
-     42,   0,  16,   0,   5,   0, 
-      0,   0,  42,   0,  16,   0, 
-      4,   0,   0,   0,  58,   0, 
-     16,   0,   5,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   5,   0, 
-      0,   0,  42,   0,  16,   0, 
-      4,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     58,   0,  16,   0,   4,   0, 
-      0,   0,  10,   0,  16,   0, 
-      5,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  86,   0,   0,   5, 
-     66,   0,  16,   0,   4,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  86,   0, 
-      0,   5, 130,   0,  16,   0, 
-      4,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   5,   0,   0,   0, 
-     70,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     98,   0,  16,   0,   4,   0, 
-      0,   0, 166,  11,  16,   0, 
-      4,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-     50,   0,  16,   0,   5,   0, 
-      0,   0,  70,   0,  16,   0, 
-      5,   0,   0,   0,  70, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7,  18,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   5,   0,   0,   0, 
-     10,   0,  16,   0,   5,   0, 
-      0,   0,  35,   0,   0,  10, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     35,   0,   0,  10,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     42,   0,  16,   0,   4,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0, 167,   0, 
-      0, 139,   2,  35,   0, 128, 
-    131, 153,  25,   0,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,   6, 224, 
-     17,   0,   0,   0,   0,   0, 
-      1,   0,   0,   7, 130,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0, 128,  63,  18,   0, 
-      0,   1,  54,   0,   0,   5, 
-    130,   0,  16,   0,   2,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  21,   0, 
-      0,   1,  21,   0,   0,   1, 
-     31,   0,   4,   3,  10,   0, 
-     16,   0,   4,   0,   0,   0, 
-     30,   0,   0,  11,  98,   0, 
-     16,   0,   4,   0,   0,   0, 
-    246, 142,  32,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-      0,   0, 255, 255, 255, 255, 
-    255, 255, 255, 255,   0,   0, 
-      0,   0,  86,   0,   0,   5, 
-     98,   0,  16,   0,   4,   0, 
-      0,   0,  86,   6,  16,   0, 
-      4,   0,   0,   0,  52,   0, 
-      0,   7,  18,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  51,   0,   0,   7, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  52,   0,   0,   7, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7,  34,   0, 
-     16,   0,   4,   0,   0,   0, 
-     42,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      4,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   4,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      5,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  98,   0, 
-     16,   0,   4,   0,   0,   0, 
-    166,  11,  16,   0,   4,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   5,   0,   0,   0, 
-     70,   0,  16,   0,   5,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      5,   0,   0,   0,  10,   0, 
-     16,   0,   5,   0,   0,   0, 
-     35,   0,   0,  10,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  35,   0, 
-      0,  10,  18,   0,  16,   0, 
-      1,   0,   0,   0,  42,   0, 
-     16,   0,   4,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0,  18,   0,  16,   0, 
-      1,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-    128,  63,  18,   0,   0,   1, 
-     32,   0,   0,   8,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,  64,   0,   0,   2,   0, 
-      0,   0,  31,   0,   4,   3, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  55,   0,   0,  10, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  10, 128,  32,   0, 
-      0,   0,   0,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0, 128,   1,  64, 
-      0,   0,   0,   0,   0,  63, 
-      0,   0,   0,   7,  98,   0, 
-     16,   0,   4,   0,   0,   0, 
-    166,  11,  16,   0,   0,   0, 
-      0,   0,   6,   0,  16,   0, 
-      1,   0,   0,   0,  49,   0, 
-      0,   7,  18,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-    130,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16, 128,  65,   0,   0,   0, 
-      1,   0,   0,   0,  14,   0, 
-      0,   7,  18,   0,  16,   0, 
-      5,   0,   0,   0,  58,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,  28,   0,   0,   5, 
-     18,   0,  16,   0,   5,   0, 
-      0,   0,  10,   0,  16,   0, 
-      5,   0,   0,   0,  86,   0, 
-      0,   5,  34,   0,  16,   0, 
-      5,   0,   0,   0,  10,   0, 
-     16,   0,   5,   0,   0,   0, 
-     50,   0,   0,  10, 130,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16, 128,  65,   0, 
-      0,   0,   5,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,  58,   0,  16,   0, 
-      4,   0,   0,   0,   1,   0, 
-      0,   7,  18,   0,  16,   0, 
-      5,   0,   0,   0,  10,   0, 
-     16,   0,   5,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   7, 
-     34,   0,  16,   0,   5,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16,   0,   4,   0,   0,   0, 
-      0,   0,   0,   8, 130,   0, 
-     16,   0,   4,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  58,   0,  16, 128, 
-     65,   0,   0,   0,   4,   0, 
-      0,   0,  55,   0,   0,   9, 
-    130,   0,  16,   0,   4,   0, 
-      0,   0,  10,   0,  16,   0, 
-      5,   0,   0,   0,  58,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   5,   0, 
-      0,   0,  49,   0,   0,   7, 
-     18,   0,  16,   0,   5,   0, 
-      0,   0,  42,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8,  34,   0, 
-     16,   0,   5,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   0,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  14,   0,   0,   7, 
-     66,   0,  16,   0,   5,   0, 
-      0,   0,  26,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-     28,   0,   0,   5,  66,   0, 
-     16,   0,   5,   0,   0,   0, 
-     42,   0,  16,   0,   5,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   5,   0, 
-      0,   0,  42,   0,  16,   0, 
-      5,   0,   0,   0,  50,   0, 
-      0,  10,  34,   0,  16,   0, 
-      4,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   5,   0, 
-      0,   0,   1,   0,   0,   7, 
-     34,   0,  16,   0,   5,   0, 
-      0,   0,  42,   0,  16,   0, 
-      5,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8,  66,   0, 
-     16,   0,   5,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   4,   0, 
-      0,   0,   0,   0,   0,   7, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   5,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  42,   0, 
-     16,   0,   5,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   5,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  58,   0,  16,   0, 
-      4,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     49,   0,   0,   7,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   8,  34,   0,  16,   0, 
-      4,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   2,   0,   0,   0, 
-     14,   0,   0,   7, 130,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,  42,   0,  16,   0, 
-      4,   0,   0,   0,  28,   0, 
-      0,   5, 130,   0,  16,   0, 
-      4,   0,   0,   0,  58,   0, 
-     16,   0,   4,   0,   0,   0, 
-     86,   0,   0,   5,  18,   0, 
-     16,   0,   5,   0,   0,   0, 
-     58,   0,  16,   0,   4,   0, 
-      0,   0,  50,   0,   0,  10, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   5,   0, 
-      0,   0,  42,   0,  16,   0, 
-      4,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-      1,   0,   0,   7, 130,   0, 
-     16,   0,   4,   0,   0,   0, 
-     58,   0,  16,   0,   4,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   7,  18,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,   0,   0,   0,   8, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,  58,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16, 128,  65,   0,   0,   0, 
-      4,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      4,   0,   0,   0,  58,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,  10,   0,  16,   0, 
-      5,   0,   0,   0,  49,   0, 
-      0,   7, 130,   0,  16,   0, 
-      4,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16,   0,   2,   0, 
-      0,   0,   0,   0,   0,   8, 
-     18,   0,  16,   0,   5,   0, 
-      0,   0,  58,   0,  16, 128, 
-     65,   0,   0,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,  14,   0, 
-      0,   7,  34,   0,  16,   0, 
-      5,   0,   0,   0,  10,   0, 
-     16,   0,   5,   0,   0,   0, 
-     42,   0,  16,   0,   4,   0, 
-      0,   0,  28,   0,   0,   5, 
-     34,   0,  16,   0,   5,   0, 
-      0,   0,  26,   0,  16,   0, 
-      5,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   5,   0,   0,   0, 
-     50,   0,   0,  10,  66,   0, 
-     16,   0,   4,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   5,   0,   0,   0, 
-     42,   0,  16,   0,   4,   0, 
-      0,   0,  10,   0,  16,   0, 
-      5,   0,   0,   0,   1,   0, 
-      0,   7,  18,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   5,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   8, 
-     34,   0,  16,   0,   5,   0, 
-      0,   0,  58,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      4,   0,   0,   0,   0,   0, 
-      0,   7,  66,   0,  16,   0, 
-      4,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16,   0,   4,   0, 
-      0,   0,  55,   0,   0,   9, 
-     66,   0,  16,   0,   4,   0, 
-      0,   0,  10,   0,  16,   0, 
-      5,   0,   0,   0,  42,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   5,   0, 
-      0,   0,  55,   0,   0,   9, 
-     66,   0,  16,   0,   4,   0, 
-      0,   0,  58,   0,  16,   0, 
-      4,   0,   0,   0,  42,   0, 
-     16,   0,   4,   0,   0,   0, 
-     10,   0,  16,   0,   2,   0, 
-      0,   0,  55,   0,   0,   9, 
-     34,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-     42,   0,  16,   0,   4,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   1,   0, 
-      0,   0,  70,   0,  16,   0, 
-      1,   0,   0,   0,  86,   0, 
-      0,   5, 194,   0,  16,   0, 
-      4,   0,   0,   0,  86,   1, 
-     16,   0,   1,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   1,   0,   0,   0, 
-     70,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     98,   0,  16,   0,   4,   0, 
-      0,   0, 166,  11,  16,   0, 
-      4,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-     50,   0,  16,   0,   1,   0, 
-      0,   0,  70,   0,  16,   0, 
-      1,   0,   0,   0,  70, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7,  18,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  35,   0,   0,  10, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     35,   0,   0,  10,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     42,   0,  16,   0,   4,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0, 167,   0, 
-      0, 139,   2,  35,   0, 128, 
-    131, 153,  25,   0,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,   6, 224, 
-     17,   0,   0,   0,   0,   0, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0, 128,  63,  18,   0, 
-      0,   1,  54,   0,   0,   5, 
-     66,   0,  16,   0,   2,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  21,   0, 
-      0,   1,  21,   0,   0,   1, 
-     31,   0,   4,   3,  10,   0, 
-     16,   0,   4,   0,   0,   0, 
-     30,   0,   0,  11,  50,   0, 
-     16,   0,   1,   0,   0,   0, 
-    230, 138,  32,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      2,  64,   0,   0, 255, 255, 
-    255, 255, 255, 255, 255, 255, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,  86,   0,   0,   5, 
-     50,   0,  16,   0,   1,   0, 
-      0,   0,  70,   0,  16,   0, 
-      1,   0,   0,   0,  52,   0, 
-      0,  10,  50,   0,  16,   0, 
-      4,   0,   0,   0,  70,   0, 
-     16,   0,   2,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,  51,   0,   0,   7, 
-     50,   0,  16,   0,   1,   0, 
-      0,   0,  70,   0,  16,   0, 
-      1,   0,   0,   0,  70,   0, 
-     16,   0,   4,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   1,   0,   0,   0, 
-     70,   0,  16,   0,   1,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   4,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-     50,   0,  16,   0,   4,   0, 
-      0,   0,  70,   0,  16,   0, 
-      4,   0,   0,   0,  70, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7,  18,   0,  16,   0, 
-      4,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-     10,   0,  16,   0,   4,   0, 
-      0,   0,  35,   0,   0,  10, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   4,   0,   0,   0, 
-     35,   0,   0,  10,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0, 167,   0, 
-      0, 139,   2,  35,   0, 128, 
-    131, 153,  25,   0,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,   6, 224, 
-     17,   0,   0,   0,   0,   0, 
-      1,   0,   0,   7, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0, 128,  63,  18,   0, 
-      0,   1,  32,   0,   0,   8, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   1,  64,   0,   0, 
-      2,   0,   0,   0,  31,   0, 
-      4,   3,  10,   0,  16,   0, 
-      1,   0,   0,   0,  55,   0, 
-      0,  10,  18,   0,  16,   0, 
-      1,   0,   0,   0,  10, 128, 
-     32,   0,   0,   0,   0,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0, 128, 
-      1,  64,   0,   0,   0,   0, 
-      0,  63,   0,   0,   0,   7, 
-     50,   0,  16,   0,   1,   0, 
-      0,   0, 182,  15,  16,   0, 
-      0,   0,   0,   0,   6,   0, 
-     16,   0,   1,   0,   0,   0, 
-     49,   0,   0,   7,  50,   0, 
-     16,   0,   4,   0,   0,   0, 
-     70,   0,  16,   0,   2,   0, 
-      0,   0,  86,   5,  16,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   8, 194,   0,  16,   0, 
-      4,   0,   0,   0,  86,   5, 
-     16,   0,   0,   0,   0,   0, 
-      6,   4,  16, 128,  65,   0, 
-      0,   0,   2,   0,   0,   0, 
-     14,   0,   0,   7,  50,   0, 
-     16,   0,   5,   0,   0,   0, 
-    230,  10,  16,   0,   4,   0, 
-      0,   0,  70,   0,  16,   0, 
-      1,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      5,   0,   0,   0,  70,   0, 
-     16,   0,   5,   0,   0,   0, 
-     86,   0,   0,   5, 194,   0, 
-     16,   0,   5,   0,   0,   0, 
-      6,   4,  16,   0,   5,   0, 
-      0,   0,  50,   0,   0,  10, 
-    194,   0,  16,   0,   4,   0, 
-      0,   0, 166,  14,  16, 128, 
-     65,   0,   0,   0,   5,   0, 
-      0,   0,   6,   4,  16,   0, 
-      1,   0,   0,   0, 166,  14, 
-     16,   0,   4,   0,   0,   0, 
-      1,   0,   0,  10,  50,   0, 
-     16,   0,   5,   0,   0,   0, 
-     70,   0,  16,   0,   5,   0, 
-      0,   0,   2,  64,   0,   0, 
-      1,   0,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   7, 194,   0,  16,   0, 
-      5,   0,   0,   0,  86,   5, 
-     16,   0,   0,   0,   0,   0, 
-    166,  14,  16,   0,   4,   0, 
-      0,   0,   0,   0,   0,   8, 
-    194,   0,  16,   0,   4,   0, 
-      0,   0, 246,  11,  16,   0, 
-      0,   0,   0,   0, 166,  14, 
-     16, 128,  65,   0,   0,   0, 
-      4,   0,   0,   0,  55,   0, 
-      0,   9, 194,   0,  16,   0, 
-      4,   0,   0,   0,   6,   4, 
-     16,   0,   5,   0,   0,   0, 
-    166,  14,  16,   0,   4,   0, 
-      0,   0, 166,  14,  16,   0, 
-      5,   0,   0,   0,  49,   0, 
-      0,   7,  50,   0,  16,   0, 
-      5,   0,   0,   0, 182,  15, 
-     16,   0,   0,   0,   0,   0, 
-     70,   0,  16,   0,   2,   0, 
-      0,   0,   0,   0,   0,   8, 
-    194,   0,  16,   0,   5,   0, 
-      0,   0, 246,  11,  16, 128, 
-     65,   0,   0,   0,   0,   0, 
-      0,   0,   6,   4,  16,   0, 
-      2,   0,   0,   0,  14,   0, 
-      0,   7,  50,   0,  16,   0, 
-      6,   0,   0,   0, 230,  10, 
-     16,   0,   5,   0,   0,   0, 
-     70,   0,  16,   0,   1,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   6,   0, 
-      0,   0,  70,   0,  16,   0, 
-      6,   0,   0,   0,  86,   0, 
-      0,   5, 194,   0,  16,   0, 
-      6,   0,   0,   0,   6,   4, 
-     16,   0,   6,   0,   0,   0, 
-     50,   0,   0,  10,  50,   0, 
-     16,   0,   1,   0,   0,   0, 
-    230,  10,  16, 128,  65,   0, 
-      0,   0,   6,   0,   0,   0, 
-     70,   0,  16,   0,   1,   0, 
-      0,   0, 230,  10,  16,   0, 
-      5,   0,   0,   0,   1,   0, 
-      0,  10, 194,   0,  16,   0, 
-      5,   0,   0,   0,   6,   4, 
-     16,   0,   6,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   8, 
-     50,   0,  16,   0,   6,   0, 
-      0,   0, 182,  15,  16,   0, 
-      0,   0,   0,   0,  70,   0, 
-     16, 128,  65,   0,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   7,  50,   0,  16,   0, 
-      1,   0,   0,   0,  86,   5, 
-     16,   0,   0,   0,   0,   0, 
-     70,   0,  16,   0,   1,   0, 
-      0,   0,  55,   0,   0,   9, 
-     50,   0,  16,   0,   1,   0, 
-      0,   0, 230,  10,  16,   0, 
-      5,   0,   0,   0,  70,   0, 
-     16,   0,   1,   0,   0,   0, 
-     70,   0,  16,   0,   6,   0, 
-      0,   0,  55,   0,   0,   9, 
-     50,   0,  16,   0,   1,   0, 
-      0,   0,  70,   0,  16,   0, 
-      5,   0,   0,   0,  70,   0, 
-     16,   0,   1,   0,   0,   0, 
-     70,   0,  16,   0,   2,   0, 
-      0,   0,  55,   0,   0,   9, 
-     50,   0,  16,   0,   1,   0, 
-      0,   0,  70,   0,  16,   0, 
-      4,   0,   0,   0, 230,  10, 
-     16,   0,   4,   0,   0,   0, 
-     70,   0,  16,   0,   1,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   1,   0, 
-      0,   0,  70,   0,  16,   0, 
-      1,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      2,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      2,   0,   0,   0,  70,   0, 
-     16,   0,   2,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  18,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,  35,   0, 
-      0,  10,  18,   0,  16,   0, 
-      1,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   2,   0, 
-      0,   0,  35,   0,   0,  10, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-    167,   0,   0, 139,   2,  35, 
-      0, 128, 131, 153,  25,   0, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 224,  17,   0,   0,   0, 
-      0,   0,   1,   0,   0,   7, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0, 128,  63, 
-     18,   0,   0,   1,  54,   0, 
-      0,   5, 130,   0,  16,   0, 
-      3,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     21,   0,   0,   1,  21,   0, 
-      0,   1,  21,   0,   0,   1, 
-     26,   0,   0,   5,  50,   0, 
-     16,   0,   1,   0,   0,   0, 
-    230,  10,  16,   0,   1,   0, 
-      0,   0,   0,   0,   0,   8, 
-     18,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      3,   0,   0,   0,  50,   0, 
-      0,   9,  18,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   0,   0, 
-      0,   8,  34,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     50,   0,   0,   9,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-      0,   0,   0,   8,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  50,   0,   0,   9, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   2,   0, 
-      0,   0,  57,   0,   0,  10, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-    168,   0,   0,   9,  18, 224, 
-     17,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   0,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     18,   0,   0,   1,  32,   0, 
-      0,   8,  18,   0,  16,   0, 
-      1,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   2,   0,   0,   0, 
-     31,   0,   4,   3,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     65,   0,   0,   5,  50,   0, 
-     16,   0,   1,   0,   0,   0, 
-    182,  15,  16,   0,   1,   0, 
-      0,   0,   0,   0,   0,  10, 
-    242,   0,  16,   0,   2,   0, 
-      0,   0,  22,   5,  16,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,  64, 
-      0,   0, 128, 191,   0,   0, 
-    128, 191,   0,   0, 128,  63, 
-     31,   0,   0,   4,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,  29,   0, 
-      0,  10, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,   6, 
-     16,   0,   2,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   4,   0, 
-      0,   0, 102,  10,  16,   0, 
-      2,   0,   0,   0,  79,   0, 
-      0,   8,  50,   0,  16,   0, 
-      4,   0,   0,   0,  70,   0, 
-     16,   0,   4,   0,   0,   0, 
-    182, 143,  32,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      4,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  82,   0, 
-     16,   0,   4,   0,   0,   0, 
-      6,   1,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   5,   0, 
-      0,   0, 150,   5,  16,   0, 
-      2,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-     82,   0,  16,   0,   4,   0, 
-      0,   0,   6,   2,  16,   0, 
-      4,   0,   0,   0,   6, 129, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7,  18,   0,  16,   0, 
-      4,   0,   0,   0,  42,   0, 
-     16,   0,   4,   0,   0,   0, 
-     10,   0,  16,   0,   4,   0, 
-      0,   0,  35,   0,   0,  10, 
-     18,   0,  16,   0,   4,   0, 
-      0,   0,  10,   0,  16,   0, 
-      5,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   4,   0,   0,   0, 
-     35,   0,   0,  10,  18,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   5,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      4,   0,   0,   0, 167,   0, 
-      0, 139,   2,  35,   0, 128, 
-    131, 153,  25,   0,  18,   0, 
-     16,   0,   4,   0,   0,   0, 
-     10,   0,  16,   0,   4,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,   6, 224, 
-     17,   0,   0,   0,   0,   0, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      4,   0,   0,   0,   1,   0, 
-      0,   7,  18,   0,  16,   0, 
-      5,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-    128,  63,  29,   0,   0,   7, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     28,   0,   0,   5,  82,   0, 
-     16,   0,   4,   0,   0,   0, 
-     86,   5,  16,   0,   1,   0, 
-      0,   0,  79,   0,   0,   8, 
-     18,   0,  16,   0,   4,   0, 
-      0,   0,  10,   0,  16,   0, 
-      4,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   4,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      4,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   4,   0,   0,   0, 
-     70,   0,  16,   0,   4,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     18,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  10,   0, 
-     16,   0,   4,   0,   0,   0, 
-     35,   0,   0,  10, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      4,   0,   0,   0,  35,   0, 
-      0,  10, 130,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   4,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0, 130,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,   1,   0,   0,   7, 
-     18,   0,  16,   0,   4,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0, 128,  63, 
-     18,   0,   0,   1,  32,   0, 
-      0,   8,  66,   0,  16,   0, 
-      3,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-     31,   0,   4,   3,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     30,   0,   0,  11,  50,   0, 
-     16,   0,   6,   0,   0,   0, 
-    230, 138,  32,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      2,  64,   0,   0, 255, 255, 
-    255, 255, 255, 255, 255, 255, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,  86,   0,   0,   5, 
-     50,   0,  16,   0,   6,   0, 
-      0,   0,  70,   0,  16,   0, 
-      6,   0,   0,   0,  52,   0, 
-      0,  10, 194,   0,  16,   0, 
-      6,   0,   0,   0,  86,   9, 
-     16,   0,   2,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,  51,   0,   0,   7, 
-     50,   0,  16,   0,   6,   0, 
-      0,   0,  70,   0,  16,   0, 
-      6,   0,   0,   0, 230,  10, 
-     16,   0,   6,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   6,   0,   0,   0, 
-     70,   0,  16,   0,   6,   0, 
-      0,   0,  28,   0,   0,   5, 
-    194,   0,  16,   0,   6,   0, 
-      0,   0,   6,   4,  16,   0, 
-      3,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-    194,   0,  16,   0,   6,   0, 
-      0,   0, 166,  14,  16,   0, 
-      6,   0,   0,   0,   6, 132, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7, 130,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   6,   0,   0,   0, 
-     42,   0,  16,   0,   6,   0, 
-      0,   0,  35,   0,   0,  10, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      6,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     35,   0,   0,  10, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   6,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0, 167,   0, 
-      0, 139,   2,  35,   0, 128, 
-    131, 153,  25,   0, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,   6, 224, 
-     17,   0,   0,   0,   0,   0, 
-      1,   0,   0,   7,  18,   0, 
-     16,   0,   5,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0, 128,  63,  18,   0, 
-      0,   1,  32,   0,   0,   8, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   1,  64,   0,   0, 
-      2,   0,   0,   0,  31,   0, 
-      4,   3,  58,   0,  16,   0, 
-      3,   0,   0,   0,  55,   0, 
-      0,  10, 130,   0,  16,   0, 
-      3,   0,   0,   0,  10, 128, 
-     32,   0,   0,   0,   0,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0, 128, 
-      1,  64,   0,   0,   0,   0, 
-      0,  63,   0,   0,   0,   7, 
-     50,   0,  16,   0,   6,   0, 
-      0,   0, 182,  15,  16,   0, 
-      0,   0,   0,   0, 246,  15, 
-     16,   0,   3,   0,   0,   0, 
-     49,   0,   0,   7, 194,   0, 
-     16,   0,   6,   0,   0,   0, 
-     86,   9,  16,   0,   2,   0, 
-      0,   0,  86,   5,  16,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   8,  50,   0,  16,   0, 
-      7,   0,   0,   0,  86,   5, 
-     16,   0,   0,   0,   0,   0, 
-    150,   5,  16, 128,  65,   0, 
-      0,   0,   2,   0,   0,   0, 
-     14,   0,   0,   7, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-      6,   4,  16,   0,   7,   0, 
-      0,   0,   6,   4,  16,   0, 
-      6,   0,   0,   0,  28,   0, 
-      0,   5, 194,   0,  16,   0, 
-      7,   0,   0,   0, 166,  14, 
-     16,   0,   7,   0,   0,   0, 
-     86,   0,   0,   5,  50,   0, 
-     16,   0,   8,   0,   0,   0, 
-    230,  10,  16,   0,   7,   0, 
-      0,   0,  50,   0,   0,  10, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16, 128, 
-     65,   0,   0,   0,   8,   0, 
-      0,   0,  70,   0,  16,   0, 
-      6,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,   0,   0,  10, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-    166,  14,  16,   0,   7,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   7,  50,   0,  16,   0, 
-      8,   0,   0,   0,  86,   5, 
-     16,   0,   0,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,   0,   0,   0,   8, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0, 182,  15,  16,   0, 
-      0,   0,   0,   0,  70,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  50,   0,  16,   0, 
-      7,   0,   0,   0, 230,  10, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      8,   0,   0,   0,  49,   0, 
-      0,   7, 194,   0,  16,   0, 
-      7,   0,   0,   0, 246,  11, 
-     16,   0,   0,   0,   0,   0, 
-     86,   9,  16,   0,   2,   0, 
-      0,   0,   0,   0,   0,   8, 
-     50,   0,  16,   0,   8,   0, 
-      0,   0, 182,  15,  16, 128, 
-     65,   0,   0,   0,   0,   0, 
-      0,   0, 150,   5,  16,   0, 
-      2,   0,   0,   0,  14,   0, 
-      0,   7, 194,   0,  16,   0, 
-      8,   0,   0,   0,   6,   4, 
-     16,   0,   8,   0,   0,   0, 
-      6,   4,  16,   0,   6,   0, 
-      0,   0,  28,   0,   0,   5, 
-    194,   0,  16,   0,   8,   0, 
-      0,   0, 166,  14,  16,   0, 
-      8,   0,   0,   0,  86,   0, 
-      0,   5,  50,   0,  16,   0, 
-      9,   0,   0,   0, 230,  10, 
-     16,   0,   8,   0,   0,   0, 
-     50,   0,   0,  10,  50,   0, 
-     16,   0,   6,   0,   0,   0, 
-     70,   0,  16, 128,  65,   0, 
-      0,   0,   9,   0,   0,   0, 
-     70,   0,  16,   0,   6,   0, 
-      0,   0,  70,   0,  16,   0, 
-      8,   0,   0,   0,   1,   0, 
-      0,  10,  50,   0,  16,   0, 
-      8,   0,   0,   0, 230,  10, 
-     16,   0,   8,   0,   0,   0, 
-      2,  64,   0,   0,   1,   0, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-    194,   0,  16,   0,   8,   0, 
-      0,   0, 246,  11,  16,   0, 
-      0,   0,   0,   0,   6,   4, 
-     16, 128,  65,   0,   0,   0, 
-      6,   0,   0,   0,   0,   0, 
-      0,   7,  50,   0,  16,   0, 
-      6,   0,   0,   0,  86,   5, 
-     16,   0,   0,   0,   0,   0, 
-     70,   0,  16,   0,   6,   0, 
-      0,   0,  55,   0,   0,   9, 
-     50,   0,  16,   0,   6,   0, 
-      0,   0,  70,   0,  16,   0, 
-      8,   0,   0,   0,  70,   0, 
-     16,   0,   6,   0,   0,   0, 
-    230,  10,  16,   0,   8,   0, 
-      0,   0,  55,   0,   0,   9, 
-     50,   0,  16,   0,   6,   0, 
-      0,   0, 230,  10,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   6,   0,   0,   0, 
-    150,   5,  16,   0,   2,   0, 
-      0,   0,  55,   0,   0,   9, 
-     50,   0,  16,   0,   6,   0, 
-      0,   0, 230,  10,  16,   0, 
-      6,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   6,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   6,   0, 
-      0,   0,  70,   0,  16,   0, 
-      6,   0,   0,   0,  28,   0, 
-      0,   5, 194,   0,  16,   0, 
-      6,   0,   0,   0,   6,   4, 
-     16,   0,   3,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0, 194,   0,  16,   0, 
-      6,   0,   0,   0, 166,  14, 
-     16,   0,   6,   0,   0,   0, 
-      6, 132,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   6,   0, 
-      0,   0,  42,   0,  16,   0, 
-      6,   0,   0,   0,  35,   0, 
-      0,  10, 130,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   6,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  35,   0,   0,  10, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      6,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-    167,   0,   0, 139,   2,  35, 
-      0, 128, 131, 153,  25,   0, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 224,  17,   0,   0,   0, 
-      0,   0,   1,   0,   0,   7, 
-     18,   0,  16,   0,   5,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0, 128,  63, 
-     18,   0,   0,   1,  54,   0, 
-      0,   5,  18,   0,  16,   0, 
-      5,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     21,   0,   0,   1,  21,   0, 
-      0,   1,  31,   0,   4,   3, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  30,   0,   0,  11, 
-    194,   0,  16,   0,   3,   0, 
-      0,   0, 246, 139,  32,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0, 255, 255, 255, 255, 
-    255, 255, 255, 255,  86,   0, 
-      0,   5, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   3,   0,   0,   0, 
-     52,   0,   0,   7,  18,   0, 
-     16,   0,   6,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  51,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   6,   0, 
-      0,   0,  52,   0,   0,   7, 
-     18,   0,  16,   0,   6,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      6,   0,   0,   0,  28,   0, 
-      0,   5, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   3,   0,   0,   0, 
-     86,   0,   0,   5, 194,   0, 
-     16,   0,   6,   0,   0,   0, 
-    246,  11,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-    194,   0,  16,   0,   3,   0, 
-      0,   0,   6,   4,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      6,   0,   0,   0, 230,  10, 
-     16,   0,   6,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   3,   0,   0,   0, 
-      6, 132,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  35,   0, 
-      0,  10,  66,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   6,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  35,   0,   0,  10, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      6,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-    167,   0,   0, 139,   2,  35, 
-      0, 128, 131, 153,  25,   0, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 224,  17,   0,   0,   0, 
-      0,   0,   1,   0,   0,   7, 
-     18,   0,  16,   0,   4,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0, 128,  63, 
-     18,   0,   0,   1,  32,   0, 
-      0,   8,  66,   0,  16,   0, 
-      3,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   2,   0,   0,   0, 
-     31,   0,   4,   3,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     55,   0,   0,  10,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10, 128,  32,   0,   0,   0, 
-      0,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0, 128,   1,  64,   0,   0, 
-      0,   0,   0,  63,   0,   0, 
-      0,   7, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   0,   0,   0,   0, 
-    166,  10,  16,   0,   3,   0, 
-      0,   0,  49,   0,   0,   7, 
-     18,   0,  16,   0,   6,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8,  34,   0, 
-     16,   0,   6,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   1,   0, 
-      0,   0,  14,   0,   0,   7, 
-     66,   0,  16,   0,   6,   0, 
-      0,   0,  26,   0,  16,   0, 
-      6,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  66,   0, 
-     16,   0,   6,   0,   0,   0, 
-     42,   0,  16,   0,   6,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   6,   0, 
-      0,   0,  42,   0,  16,   0, 
-      6,   0,   0,   0,  50,   0, 
-      0,  10,  34,   0,  16,   0, 
-      6,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      6,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   6,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   6,   0, 
-      0,   0,  42,   0,  16,   0, 
-      6,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7, 130,   0, 
-     16,   0,   6,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16,   0, 
-      6,   0,   0,   0,   0,   0, 
-      0,   8,  34,   0,  16,   0, 
-      6,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     26,   0,  16, 128,  65,   0, 
-      0,   0,   6,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   6,   0,   0,   0, 
-     42,   0,  16,   0,   6,   0, 
-      0,   0,  26,   0,  16,   0, 
-      6,   0,   0,   0,  58,   0, 
-     16,   0,   6,   0,   0,   0, 
-     49,   0,   0,   7,  66,   0, 
-     16,   0,   6,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8, 130,   0,  16,   0, 
-      6,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     14,   0,   0,   7,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   6,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  18,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   0,   0,   5,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  50,   0,   0,  10, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   6,   0,   0,   0, 
-      1,   0,   0,   7, 130,   0, 
-     16,   0,   6,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   3,   0,   0,   0, 
-      0,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   6,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   6,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   6,   0,   0,   0, 
-     26,   0,  16,   0,   6,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  49,   0, 
-      0,   7,  18,   0,  16,   0, 
-      6,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-     34,   0,  16,   0,   6,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16, 128,  65,   0,   0,   0, 
-      2,   0,   0,   0,  14,   0, 
-      0,   7,  66,   0,  16,   0, 
-      6,   0,   0,   0,  26,   0, 
-     16,   0,   6,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     66,   0,  16,   0,   6,   0, 
-      0,   0,  42,   0,  16,   0, 
-      6,   0,   0,   0,  86,   0, 
-      0,   5, 130,   0,  16,   0, 
-      6,   0,   0,   0,  42,   0, 
-     16,   0,   6,   0,   0,   0, 
-     50,   0,   0,  10,  34,   0, 
-     16,   0,   6,   0,   0,   0, 
-     58,   0,  16, 128,  65,   0, 
-      0,   0,   6,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      6,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      6,   0,   0,   0,  42,   0, 
-     16,   0,   6,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   7, 
-    130,   0,  16,   0,   6,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   6,   0,   0,   0, 
-      0,   0,   0,   8,  34,   0, 
-     16,   0,   6,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   6,   0, 
-      0,   0,  55,   0,   0,   9, 
-     34,   0,  16,   0,   6,   0, 
-      0,   0,  42,   0,  16,   0, 
-      6,   0,   0,   0,  26,   0, 
-     16,   0,   6,   0,   0,   0, 
-     58,   0,  16,   0,   6,   0, 
-      0,   0,  49,   0,   0,   7, 
-     66,   0,  16,   0,   6,   0, 
-      0,   0,  58,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      0,   0,   0,   8, 130,   0, 
-     16,   0,   6,   0,   0,   0, 
-     58,   0,  16, 128,  65,   0, 
-      0,   0,   0,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  14,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      6,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  50,   0, 
-      0,  10, 130,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   6,   0, 
-      0,   0,   1,   0,   0,   7, 
-    130,   0,  16,   0,   6,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  58,   0,  16, 128, 
-     65,   0,   0,   0,   3,   0, 
-      0,   0,   0,   0,   0,   7, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     55,   0,   0,   9, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   6,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   6,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     55,   0,   0,   9, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   6,   0, 
-      0,   0,  26,   0,  16,   0, 
-      6,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   3,   0,   0,   0, 
-    166,  14,  16,   0,   3,   0, 
-      0,   0,  86,   0,   0,   5, 
-    194,   0,  16,   0,   6,   0, 
-      0,   0, 246,  11,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5, 194,   0,  16,   0, 
-      3,   0,   0,   0,   6,   4, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   6,   0,   0,   0, 
-    230,  10,  16,   0,   6,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0, 194,   0, 
-     16,   0,   3,   0,   0,   0, 
-    166,  14,  16,   0,   3,   0, 
-      0,   0,   6, 132,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     35,   0,   0,  10,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   6,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  35,   0, 
-      0,  10,  66,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   6,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7,  18,   0,  16,   0, 
-      4,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-    128,  63,  18,   0,   0,   1, 
-     54,   0,   0,   5,  18,   0, 
-     16,   0,   4,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  21,   0,   0,   1, 
-     21,   0,   0,   1,  21,   0, 
-      0,   1,  31,   0,   0,   4, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     29,   0,   0,  10, 194,   0, 
-     16,   0,   3,   0,   0,   0, 
-    246,   7,  16,   0,   2,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,  28,   0, 
-      0,   5, 242,   0,  16,   0, 
-      6,   0,   0,   0, 118,  13, 
-     16,   0,   2,   0,   0,   0, 
-     79,   0,   0,   8,  50,   0, 
-     16,   0,   6,   0,   0,   0, 
-     70,   0,  16,   0,   6,   0, 
-      0,   0, 182, 143,  32,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   6,   0,   0,   0, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   6,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   6,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-     50,   0,  16,   0,   6,   0, 
-      0,   0,  70,   0,  16,   0, 
-      6,   0,   0,   0,  70, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7, 130,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   6,   0,   0,   0, 
-     10,   0,  16,   0,   6,   0, 
-      0,   0,  35,   0,   0,  10, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      6,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     35,   0,   0,  10, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   6,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0, 167,   0, 
-      0, 139,   2,  35,   0, 128, 
-    131, 153,  25,   0, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,   6, 224, 
-     17,   0,   0,   0,   0,   0, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,   1,   0, 
-      0,   7,  18,   0,  16,   0, 
-      6,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-    128,  63,  18,   0,   0,   1, 
-     32,   0,   0,   8,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,  31,   0,   4,   3, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  30,   0,   0,  11, 
-    194,   0,  16,   0,   3,   0, 
-      0,   0, 166, 142,  32,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0, 255, 255, 255, 255, 
-    255, 255, 255, 255,  86,   0, 
-      0,   5, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   3,   0,   0,   0, 
-     52,   0,   0,  10,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    214,   5,  16,   0,   2,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,  51,   0, 
-      0,   7, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   3,   0,   0,   0, 
-      6,   4,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-    194,   0,  16,   0,   3,   0, 
-      0,   0, 166,  14,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  35,   0, 
-      0,  10,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  35,   0,   0,  10, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-    167,   0,   0, 139,   2,  35, 
-      0, 128, 131, 153,  25,   0, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 224,  17,   0,   0,   0, 
-      0,   0,   1,   0,   0,   7, 
-     18,   0,  16,   0,   6,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0, 128,  63, 
-     18,   0,   0,   1,  32,   0, 
-      0,   8,  66,   0,  16,   0, 
-      3,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   2,   0,   0,   0, 
-     31,   0,   4,   3,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     55,   0,   0,  10,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10, 128,  32,   0,   0,   0, 
-      0,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0, 128,   1,  64,   0,   0, 
-      0,   0,   0,  63,   0,   0, 
-      0,   7, 194,   0,  16,   0, 
-      3,   0,   0,   0, 246,  11, 
-     16,   0,   0,   0,   0,   0, 
-    166,  10,  16,   0,   3,   0, 
-      0,   0,  49,   0,   0,   7, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0, 214,   5,  16,   0, 
-      2,   0,   0,   0,  86,   5, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   5,  16,   0,   0,   0, 
-      0,   0,  86,  13,  16, 128, 
-     65,   0,   0,   0,   2,   0, 
-      0,   0,  14,   0,   0,   7, 
-     50,   0,  16,   0,   8,   0, 
-      0,   0, 230,  10,  16,   0, 
-      7,   0,   0,   0, 230,  10, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   8,   0,   0,   0, 
-     70,   0,  16,   0,   8,   0, 
-      0,   0,  86,   0,   0,   5, 
-    194,   0,  16,   0,   8,   0, 
-      0,   0,   6,   4,  16,   0, 
-      8,   0,   0,   0,  50,   0, 
-      0,  10, 194,   0,  16,   0, 
-      7,   0,   0,   0, 166,  14, 
-     16, 128,  65,   0,   0,   0, 
-      8,   0,   0,   0, 166,  14, 
-     16,   0,   3,   0,   0,   0, 
-    166,  14,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,  10, 
-     50,   0,  16,   0,   8,   0, 
-      0,   0,  70,   0,  16,   0, 
-      8,   0,   0,   0,   2,  64, 
-      0,   0,   1,   0,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   7, 194,   0, 
-     16,   0,   8,   0,   0,   0, 
-     86,   5,  16,   0,   0,   0, 
-      0,   0, 166,  14,  16,   0, 
-      7,   0,   0,   0,   0,   0, 
-      0,   8, 194,   0,  16,   0, 
-      7,   0,   0,   0, 246,  11, 
-     16,   0,   0,   0,   0,   0, 
-    166,  14,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-      6,   4,  16,   0,   8,   0, 
-      0,   0, 166,  14,  16,   0, 
-      7,   0,   0,   0, 166,  14, 
-     16,   0,   8,   0,   0,   0, 
-     49,   0,   0,   7,  50,   0, 
-     16,   0,   8,   0,   0,   0, 
-    182,  15,  16,   0,   0,   0, 
-      0,   0, 214,   5,  16,   0, 
-      2,   0,   0,   0,   0,   0, 
-      0,   8, 194,   0,  16,   0, 
-      8,   0,   0,   0, 246,  11, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0,  86,  13, 
-     16,   0,   2,   0,   0,   0, 
-     14,   0,   0,   7,  50,   0, 
-     16,   0,   9,   0,   0,   0, 
-    230,  10,  16,   0,   8,   0, 
-      0,   0, 230,  10,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      9,   0,   0,   0,  70,   0, 
-     16,   0,   9,   0,   0,   0, 
-     86,   0,   0,   5, 194,   0, 
-     16,   0,   9,   0,   0,   0, 
-      6,   4,  16,   0,   9,   0, 
-      0,   0,  50,   0,   0,  10, 
-    194,   0,  16,   0,   3,   0, 
-      0,   0, 166,  14,  16, 128, 
-     65,   0,   0,   0,   9,   0, 
-      0,   0, 166,  14,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   8,   0,   0,   0, 
-      1,   0,   0,  10, 194,   0, 
-     16,   0,   8,   0,   0,   0, 
-      6,   4,  16,   0,   9,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  50,   0,  16,   0, 
-      9,   0,   0,   0, 182,  15, 
-     16,   0,   0,   0,   0,   0, 
-    230,  10,  16, 128,  65,   0, 
-      0,   0,   3,   0,   0,   0, 
-      0,   0,   0,   7, 194,   0, 
-     16,   0,   3,   0,   0,   0, 
-     86,   5,  16,   0,   0,   0, 
-      0,   0, 166,  14,  16,   0, 
-      3,   0,   0,   0,  55,   0, 
-      0,   9, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   8,   0,   0,   0, 
-    166,  14,  16,   0,   3,   0, 
-      0,   0,   6,   4,  16,   0, 
-      9,   0,   0,   0,  55,   0, 
-      0,   9, 194,   0,  16,   0, 
-      3,   0,   0,   0,   6,   4, 
-     16,   0,   8,   0,   0,   0, 
-    166,  14,  16,   0,   3,   0, 
-      0,   0,  86,  13,  16,   0, 
-      2,   0,   0,   0,  55,   0, 
-      0,   9, 194,   0,  16,   0, 
-      3,   0,   0,   0,   6,   4, 
-     16,   0,   7,   0,   0,   0, 
-    166,  14,  16,   0,   7,   0, 
-      0,   0, 166,  14,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   3,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  35,   0, 
-      0,  10,  66,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7,  18,   0,  16,   0, 
-      6,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-    128,  63,  18,   0,   0,   1, 
-     54,   0,   0,   5,  18,   0, 
-     16,   0,   6,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  21,   0,   0,   1, 
-     21,   0,   0,   1,  21,   0, 
-      0,   1,  31,   0,   0,   4, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     29,   0,   0,  10, 242,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,  14,  16,   0,   2,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,  28,   0, 
-      0,   5, 242,   0,  16,   0, 
-      8,   0,   0,   0,  70,  14, 
-     16,   0,   2,   0,   0,   0, 
-     79,   0,   0,   8, 242,   0, 
-     16,   0,   8,   0,   0,   0, 
-     70,  14,  16,   0,   8,   0, 
-      0,   0, 182, 143,  32,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   0,   1,   0,   0,   7, 
-    210,   0,  16,   0,   7,   0, 
-      0,   0, 166,   3,  16,   0, 
-      7,   0,   0,   0, 166,   3, 
-     16,   0,   8,   0,   0,   0, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   8,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   8,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5, 194,   0,  16,   0, 
-      8,   0,   0,   0,  86,   1, 
-     16,   0,   2,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      8,   0,   0,   0,  70,   0, 
-     16,   0,   8,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   8,   0, 
-      0,   0,  10,   0,  16,   0, 
-      8,   0,   0,   0,  35,   0, 
-      0,  10, 130,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   8,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  35,   0,   0,  10, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      8,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-    167,   0,   0, 139,   2,  35, 
-      0, 128, 131, 153,  25,   0, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 224,  17,   0,   0,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,   0,   0,   7,  18,   0, 
-     16,   0,   8,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0, 128,  63,  29,   0, 
-      0,  10, 194,   0,  16,   0, 
-      3,   0,   0,   0,   6,   4, 
-     16,   0,   1,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   1,   0,   0,   7, 
-    114,   0,  16,   0,   7,   0, 
-      0,   0, 166,  10,  16,   0, 
-      3,   0,   0,   0, 134,   3, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   9,   0,   0,   0, 
-     70,   0,  16,   0,   1,   0, 
-      0,   0,  79,   0,   0,   8, 
-     50,   0,  16,   0,   9,   0, 
-      0,   0,  70,   0,  16,   0, 
-      9,   0,   0,   0, 230, 138, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   1,   0, 
-      0,   7, 114,   0,  16,   0, 
-      7,   0,   0,   0,  70,   2, 
-     16,   0,   7,   0,   0,   0, 
-      6,   0,  16,   0,   9,   0, 
-      0,   0,  28,   0,   0,   5, 
-    194,   0,  16,   0,   9,   0, 
-      0,   0,   6,   4,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5, 130,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,  10,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0, 194,   0, 
-     16,   0,   9,   0,   0,   0, 
-    166,  14,  16,   0,   9,   0, 
-      0,   0,   6, 132,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     66,   0,  16,   0,   9,   0, 
-      0,   0,  58,   0,  16,   0, 
-      9,   0,   0,   0,  42,   0, 
-     16,   0,   9,   0,   0,   0, 
-     35,   0,   0,  10, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      9,   0,   0,   0,  35,   0, 
-      0,  10, 130,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,  10,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0, 130,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7,  18,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   5,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0, 128,  63, 
-      1,   0,   0,   7, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      9,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5, 146,   0, 
-     16,   0,   7,   0,   0,   0, 
-      6,   4,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   9,   0, 
-      0,   0,  70,   0,  16,   0, 
-      1,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-    146,   0,  16,   0,   7,   0, 
-      0,   0,   6,  12,  16,   0, 
-      7,   0,   0,   0,   6, 132, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7, 130,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  35,   0,   0,  10, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     35,   0,   0,  10, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   9,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0, 167,   0, 
-      0, 139,   2,  35,   0, 128, 
-    131, 153,  25,   0, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,   6, 224, 
-     17,   0,   0,   0,   0,   0, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      4,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-    128,  63,  28,   0,   0,   5, 
-    194,   0,  16,   0,   3,   0, 
-      0,   0,   6,   4,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  18,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     28,   0,   0,   5, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0, 194,   0, 
-     16,   0,   3,   0,   0,   0, 
-    166,  14,  16,   0,   3,   0, 
-      0,   0,   6, 132,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     35,   0,   0,  10,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  35,   0, 
-      0,  10,  66,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   6,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0, 128,  63, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   3,   0,   0,   0, 
-      6,   4,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   3,   0,   0,   0, 
-      6, 132,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  35,   0, 
-      0,  10,  66,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  35,   0,   0,  10, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-    167,   0,   0, 139,   2,  35, 
-      0, 128, 131, 153,  25,   0, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 224,  17,   0,   0,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   8,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0, 128,  63,  18,   0, 
-      0,   1,  32,   0,   0,   8, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,  31,   0, 
-      4,   3,  42,   0,  16,   0, 
-      3,   0,   0,   0,  30,   0, 
-      0,  11,  50,   0,  16,   0, 
-      7,   0,   0,   0, 230, 138, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0, 255, 255, 255, 255, 
-    255, 255, 255, 255,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     86,   0,   0,   5,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  52,   0,   0,  10, 
-    194,   0,  16,   0,   7,   0, 
-      0,   0,  86,   1,  16,   0, 
-      2,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0, 230,  10,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-      6,   4,  16,   0,   3,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-    166,  14,  16,   0,   7,   0, 
-      0,   0,   6, 132,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  35,   0, 
-      0,  10, 130,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0, 130,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7,  18,   0,  16,   0, 
-      8,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-    128,  63,  18,   0,   0,   1, 
-     32,   0,   0,   8, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,  64,   0,   0,   2,   0, 
-      0,   0,  31,   0,   4,   3, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  55,   0,   0,  10, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  10, 128,  32,   0, 
-      0,   0,   0,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0, 128,   1,  64, 
-      0,   0,   0,   0,   0,  63, 
-      0,   0,   0,   7,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    182,  15,  16,   0,   0,   0, 
-      0,   0, 246,  15,  16,   0, 
-      3,   0,   0,   0,  49,   0, 
-      0,   7, 194,   0,  16,   0, 
-      7,   0,   0,   0,  86,   1, 
-     16,   0,   2,   0,   0,   0, 
-     86,   5,  16,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-     50,   0,  16,   0,   9,   0, 
-      0,   0,  86,   5,  16,   0, 
-      0,   0,   0,   0,  22,   5, 
-     16, 128,  65,   0,   0,   0, 
-      2,   0,   0,   0,  14,   0, 
-      0,   7, 194,   0,  16,   0, 
-      9,   0,   0,   0,   6,   4, 
-     16,   0,   9,   0,   0,   0, 
-      6,   4,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-    194,   0,  16,   0,   9,   0, 
-      0,   0, 166,  14,  16,   0, 
-      9,   0,   0,   0,  86,   0, 
-      0,   5,  50,   0,  16,   0, 
-     10,   0,   0,   0, 230,  10, 
-     16,   0,   9,   0,   0,   0, 
-     50,   0,   0,  10,  50,   0, 
-     16,   0,   9,   0,   0,   0, 
-     70,   0,  16, 128,  65,   0, 
-      0,   0,  10,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      9,   0,   0,   0,   1,   0, 
-      0,  10, 194,   0,  16,   0, 
-      9,   0,   0,   0, 166,  14, 
-     16,   0,   9,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   7, 
-     50,   0,  16,   0,  10,   0, 
-      0,   0,  86,   5,  16,   0, 
-      0,   0,   0,   0,  70,   0, 
-     16,   0,   9,   0,   0,   0, 
-      0,   0,   0,   8,  50,   0, 
-     16,   0,   9,   0,   0,   0, 
-    182,  15,  16,   0,   0,   0, 
-      0,   0,  70,   0,  16, 128, 
-     65,   0,   0,   0,   9,   0, 
-      0,   0,  55,   0,   0,   9, 
-     50,   0,  16,   0,   9,   0, 
-      0,   0, 230,  10,  16,   0, 
-      9,   0,   0,   0,  70,   0, 
-     16,   0,   9,   0,   0,   0, 
-     70,   0,  16,   0,  10,   0, 
-      0,   0,  49,   0,   0,   7, 
-    194,   0,  16,   0,   9,   0, 
-      0,   0, 246,  11,  16,   0, 
-      0,   0,   0,   0,  86,   1, 
-     16,   0,   2,   0,   0,   0, 
-      0,   0,   0,   8,  50,   0, 
-     16,   0,  10,   0,   0,   0, 
-    182,  15,  16, 128,  65,   0, 
-      0,   0,   0,   0,   0,   0, 
-     22,   5,  16,   0,   2,   0, 
-      0,   0,  14,   0,   0,   7, 
-    194,   0,  16,   0,  10,   0, 
-      0,   0,   6,   4,  16,   0, 
-     10,   0,   0,   0,   6,   4, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,  10,   0,   0,   0, 
-    166,  14,  16,   0,  10,   0, 
-      0,   0,  86,   0,   0,   5, 
-     50,   0,  16,   0,  11,   0, 
-      0,   0, 230,  10,  16,   0, 
-     10,   0,   0,   0,  50,   0, 
-      0,  10,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16, 128,  65,   0,   0,   0, 
-     11,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,  10,   0, 
-      0,   0,   1,   0,   0,  10, 
-     50,   0,  16,   0,  10,   0, 
-      0,   0, 230,  10,  16,   0, 
-     10,   0,   0,   0,   2,  64, 
-      0,   0,   1,   0,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8, 194,   0, 
-     16,   0,  10,   0,   0,   0, 
-    246,  11,  16,   0,   0,   0, 
-      0,   0,   6,   4,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,   0,   0,   0,   7, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  86,   5,  16,   0, 
-      0,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,  10,   0, 
-      0,   0,  70,   0,  16,   0, 
-      7,   0,   0,   0, 230,  10, 
-     16,   0,  10,   0,   0,   0, 
-     55,   0,   0,   9,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    230,  10,  16,   0,   9,   0, 
-      0,   0,  70,   0,  16,   0, 
-      7,   0,   0,   0,  22,   5, 
-     16,   0,   2,   0,   0,   0, 
-     55,   0,   0,   9,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    230,  10,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      9,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-    194,   0,  16,   0,   7,   0, 
-      0,   0,   6,   4,  16,   0, 
-      3,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-    194,   0,  16,   0,   7,   0, 
-      0,   0, 166,  14,  16,   0, 
-      7,   0,   0,   0,   6, 132, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0, 167,   0, 
-      0, 139,   2,  35,   0, 128, 
-    131, 153,  25,   0,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,   6, 224, 
-     17,   0,   0,   0,   0,   0, 
-      1,   0,   0,   7,  18,   0, 
-     16,   0,   8,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0, 128,  63,  18,   0, 
-      0,   1,  54,   0,   0,   5, 
-     18,   0,  16,   0,   8,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  21,   0, 
-      0,   1,  21,   0,   0,   1, 
-     31,   0,   4,   3,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     30,   0,   0,  11,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    182, 143,  32,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      2,  64,   0,   0, 255, 255, 
-    255, 255, 255, 255, 255, 255, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,  86,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      7,   0,   0,   0,  52,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  51,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  52,   0,   0,   7, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5, 130,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-    166,  14,  16,   0,   7,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-    128,  63,  18,   0,   0,   1, 
-     32,   0,   0,   8,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,  64,   0,   0,   2,   0, 
-      0,   0,  31,   0,   4,   3, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  55,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10, 128,  32,   0, 
-      0,   0,   0,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0, 128,   1,  64, 
-      0,   0,   0,   0,   0,  63, 
-      0,   0,   0,   7,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    230,  10,  16,   0,   0,   0, 
-      0,   0,  86,   5,  16,   0, 
-      2,   0,   0,   0,  49,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      2,   0,   0,   0,  14,   0, 
-      0,   7,  66,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  86,   0, 
-      0,   5, 130,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     50,   0,   0,  10, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-      0,   0,   0,   8, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  58,   0,  16, 128, 
-     65,   0,   0,   0,   3,   0, 
-      0,   0,  55,   0,   0,   9, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  49,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-      0,   0,   0,   8, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   0,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  14,   0,   0,   7, 
-     18,   0,  16,   0,   9,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  86,   0,   0,   5, 
-     34,   0,  16,   0,   9,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  50,   0, 
-      0,  10,  18,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16, 128,  65,   0,   0,   0, 
-      9,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,   0,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  49,   0,   0,   7, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   1,   0, 
-      0,   0,  14,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  50,   0, 
-      0,  10,  18,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     49,   0,   0,   7,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8, 130,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     14,   0,   0,   7,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5,  18,   0,  16,   0, 
-      9,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     86,   0,   0,   5,  34,   0, 
-     16,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  50,   0,   0,  10, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      9,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     26,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-      0,   0,   0,   7,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  55,   0, 
-      0,   9, 130,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5, 130,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-    166,  14,  16,   0,   7,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-    128,  63,  18,   0,   0,   1, 
-     54,   0,   0,   5,  66,   0, 
-     16,   0,   5,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  21,   0,   0,   1, 
-     21,   0,   0,   1,  31,   0, 
-      4,   3,  42,   0,  16,   0, 
-      3,   0,   0,   0,  30,   0, 
-      0,  11,  50,   0,  16,   0, 
-      7,   0,   0,   0, 230, 138, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0, 255, 255, 255, 255, 
-    255, 255, 255, 255,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     86,   0,   0,   5,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  52,   0,   0,  10, 
-    194,   0,  16,   0,   7,   0, 
-      0,   0,   6,   4,  16,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0, 230,  10,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-      6,   4,  16,   0,   3,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-    166,  14,  16,   0,   7,   0, 
-      0,   0,   6, 132,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      4,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-    128,  63,  18,   0,   0,   1, 
-     32,   0,   0,   8,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,  64,   0,   0,   2,   0, 
-      0,   0,  31,   0,   4,   3, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  55,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10, 128,  32,   0, 
-      0,   0,   0,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0, 128,   1,  64, 
-      0,   0,   0,   0,   0,  63, 
-      0,   0,   0,   7,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    182,  15,  16,   0,   0,   0, 
-      0,   0,  86,   5,  16,   0, 
-      2,   0,   0,   0,  49,   0, 
-      0,   7, 194,   0,  16,   0, 
-      7,   0,   0,   0,   6,   4, 
-     16,   0,   1,   0,   0,   0, 
-     86,   5,  16,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-     50,   0,  16,   0,   9,   0, 
-      0,   0,  86,   5,  16,   0, 
-      0,   0,   0,   0,  70,   0, 
-     16, 128,  65,   0,   0,   0, 
-      1,   0,   0,   0,  14,   0, 
-      0,   7, 194,   0,  16,   0, 
-      9,   0,   0,   0,   6,   4, 
-     16,   0,   9,   0,   0,   0, 
-      6,   4,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-    194,   0,  16,   0,   9,   0, 
-      0,   0, 166,  14,  16,   0, 
-      9,   0,   0,   0,  86,   0, 
-      0,   5,  50,   0,  16,   0, 
-     10,   0,   0,   0, 230,  10, 
-     16,   0,   9,   0,   0,   0, 
-     50,   0,   0,  10,  50,   0, 
-     16,   0,   9,   0,   0,   0, 
-     70,   0,  16, 128,  65,   0, 
-      0,   0,  10,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      9,   0,   0,   0,   1,   0, 
-      0,  10, 194,   0,  16,   0, 
-      9,   0,   0,   0, 166,  14, 
-     16,   0,   9,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   7, 
-     50,   0,  16,   0,  10,   0, 
-      0,   0,  86,   5,  16,   0, 
-      0,   0,   0,   0,  70,   0, 
-     16,   0,   9,   0,   0,   0, 
-      0,   0,   0,   8,  50,   0, 
-     16,   0,   9,   0,   0,   0, 
-    182,  15,  16,   0,   0,   0, 
-      0,   0,  70,   0,  16, 128, 
-     65,   0,   0,   0,   9,   0, 
-      0,   0,  55,   0,   0,   9, 
-     50,   0,  16,   0,   9,   0, 
-      0,   0, 230,  10,  16,   0, 
-      9,   0,   0,   0,  70,   0, 
-     16,   0,   9,   0,   0,   0, 
-     70,   0,  16,   0,  10,   0, 
-      0,   0,  49,   0,   0,   7, 
-    194,   0,  16,   0,   9,   0, 
-      0,   0, 246,  11,  16,   0, 
-      0,   0,   0,   0,   6,   4, 
-     16,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8,  50,   0, 
-     16,   0,  10,   0,   0,   0, 
-    182,  15,  16, 128,  65,   0, 
-      0,   0,   0,   0,   0,   0, 
-     70,   0,  16,   0,   1,   0, 
-      0,   0,  14,   0,   0,   7, 
-    194,   0,  16,   0,  10,   0, 
-      0,   0,   6,   4,  16,   0, 
-     10,   0,   0,   0,   6,   4, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,  10,   0,   0,   0, 
-    166,  14,  16,   0,  10,   0, 
-      0,   0,  86,   0,   0,   5, 
-     50,   0,  16,   0,  11,   0, 
-      0,   0, 230,  10,  16,   0, 
-     10,   0,   0,   0,  50,   0, 
-      0,  10,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16, 128,  65,   0,   0,   0, 
-     11,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,  10,   0, 
-      0,   0,   1,   0,   0,  10, 
-     50,   0,  16,   0,  10,   0, 
-      0,   0, 230,  10,  16,   0, 
-     10,   0,   0,   0,   2,  64, 
-      0,   0,   1,   0,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8, 194,   0, 
-     16,   0,  10,   0,   0,   0, 
-    246,  11,  16,   0,   0,   0, 
-      0,   0,   6,   4,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,   0,   0,   0,   7, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  86,   5,  16,   0, 
-      0,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,  10,   0, 
-      0,   0,  70,   0,  16,   0, 
-      7,   0,   0,   0, 230,  10, 
-     16,   0,  10,   0,   0,   0, 
-     55,   0,   0,   9,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    230,  10,  16,   0,   9,   0, 
-      0,   0,  70,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   1,   0,   0,   0, 
-     55,   0,   0,   9,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    230,  10,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      9,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-    194,   0,  16,   0,   7,   0, 
-      0,   0,   6,   4,  16,   0, 
-      3,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-    194,   0,  16,   0,   7,   0, 
-      0,   0, 166,  14,  16,   0, 
-      7,   0,   0,   0,   6, 132, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0, 167,   0, 
-      0, 139,   2,  35,   0, 128, 
-    131, 153,  25,   0,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,   6, 224, 
-     17,   0,   0,   0,   0,   0, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0, 128,  63,  18,   0, 
-      0,   1,  54,   0,   0,   5, 
-     66,   0,  16,   0,   4,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  21,   0, 
-      0,   1,  21,   0,   0,   1, 
-     31,   0,   4,   3,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     30,   0,   0,  11,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    182, 143,  32,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      2,  64,   0,   0, 255, 255, 
-    255, 255, 255, 255, 255, 255, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,  86,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      7,   0,   0,   0,  52,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  51,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  52,   0,   0,   7, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5, 130,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-    166,  14,  16,   0,   7,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      6,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-    128,  63,  18,   0,   0,   1, 
-     32,   0,   0,   8,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,  64,   0,   0,   2,   0, 
-      0,   0,  31,   0,   4,   3, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  55,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10, 128,  32,   0, 
-      0,   0,   0,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0, 128,   1,  64, 
-      0,   0,   0,   0,   0,  63, 
-      0,   0,   0,   7,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    230,  10,  16,   0,   0,   0, 
-      0,   0,  86,   5,  16,   0, 
-      2,   0,   0,   0,  49,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      2,   0,   0,   0,  14,   0, 
-      0,   7,  66,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  86,   0, 
-      0,   5, 130,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     50,   0,   0,  10, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-      0,   0,   0,   8, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  58,   0,  16, 128, 
-     65,   0,   0,   0,   3,   0, 
-      0,   0,  55,   0,   0,   9, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  49,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16,   0,   2,   0,   0,   0, 
-      0,   0,   0,   8, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   0,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,  14,   0,   0,   7, 
-     18,   0,  16,   0,   9,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  86,   0,   0,   5, 
-     34,   0,  16,   0,   9,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  50,   0, 
-      0,  10,  18,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16, 128,  65,   0,   0,   0, 
-      9,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,   0,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   2,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  49,   0,   0,   7, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   1,   0, 
-      0,   0,  14,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  50,   0, 
-      0,  10,  18,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     49,   0,   0,   7,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8, 130,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     14,   0,   0,   7,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5,  18,   0,  16,   0, 
-      9,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     86,   0,   0,   5,  34,   0, 
-     16,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  50,   0,   0,  10, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      9,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     26,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-      0,   0,   0,   7,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  55,   0, 
-      0,   9, 130,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5, 130,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-    166,  14,  16,   0,   7,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      6,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-    128,  63,  18,   0,   0,   1, 
-     54,   0,   0,   5,  66,   0, 
-     16,   0,   6,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  21,   0,   0,   1, 
-     21,   0,   0,   1,  31,   0, 
-      4,   3,  42,   0,  16,   0, 
-      3,   0,   0,   0,  30,   0, 
-      0,  11, 194,   0,  16,   0, 
-      3,   0,   0,   0, 246, 139, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0, 255, 255, 
-    255, 255, 255, 255, 255, 255, 
-     86,   0,   0,   5, 194,   0, 
-     16,   0,   3,   0,   0,   0, 
-    166,  14,  16,   0,   3,   0, 
-      0,   0,  52,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     52,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  51,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     86,   0,   0,   5, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   3,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-    167,   0,   0, 139,   2,  35, 
-      0, 128, 131, 153,  25,   0, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 224,  17,   0,   0,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   8,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0, 128,  63, 
-     18,   0,   0,   1,  32,   0, 
-      0,   8,  34,   0,  16,   0, 
-      2,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   2,   0,   0,   0, 
-     31,   0,   4,   3,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     55,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10, 128,  32,   0,   0,   0, 
-      0,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0, 128,   1,  64,   0,   0, 
-      0,   0,   0,  63,   0,   0, 
-      0,   7, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   0,   0,   0,   0, 
-     86,   5,  16,   0,   2,   0, 
-      0,   0,  49,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   2,   0, 
-      0,   0,  14,   0,   0,   7, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  50,   0, 
-      0,  10,  18,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,   7, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     49,   0,   0,   7,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,   0,   0, 
-      0,   8,  66,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-     14,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5, 130,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   0,   0,   5,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  50,   0,   0,  10, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   9,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8, 130,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   3,   0,   0,   0, 
-      0,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     49,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   1,   0,   0,   0, 
-     14,   0,   0,   7,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  50,   0,   0,  10, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,   0,   0,   7,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   7,  66,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,   0,   0,   0,   8, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  18,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  49,   0, 
-      0,   7,  34,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,   0,   0,   0,   8, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16, 128, 
-     65,   0,   0,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  14,   0, 
-      0,   7, 130,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  86,   0, 
-      0,   5,  18,   0,  16,   0, 
-      9,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     50,   0,   0,  10, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   9,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   8, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      3,   0,   0,   0,   0,   0, 
-      0,   7, 130,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  55,   0,   0,   9, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  55,   0,   0,   9, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  55,   0,   0,   9, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     86,   0,   0,   5, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   3,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-    167,   0,   0, 139,   2,  35, 
-      0, 128, 131, 153,  25,   0, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 224,  17,   0,   0,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   8,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0, 128,  63, 
-     18,   0,   0,   1,  54,   0, 
-      0,   5,  66,   0,  16,   0, 
-      8,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     21,   0,   0,   1,  21,   0, 
-      0,   1,  21,   0,   0,   1, 
-      0,   0,   0,  10, 194,   0, 
-     16,   0,   3,   0,   0,   0, 
-      6,   0,  16,   0,   1,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0, 128,  63, 
-      0,   0,   0,  64,  31,   0, 
-      0,   4,  58, 128,  32,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,  29,   0,   0,  10, 
-    114,   0,  16,   0,   7,   0, 
-      0,   0, 230,   8,  16,   0, 
-      2,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     28,   0,   0,   5, 114,   0, 
-     16,   0,   9,   0,   0,   0, 
-    230,   8,  16,   0,   2,   0, 
-      0,   0,  79,   0,   0,   8, 
-    114,   0,  16,   0,   9,   0, 
-      0,   0,  70,   2,  16,   0, 
-      9,   0,   0,   0, 246, 143, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   1,   0, 
-      0,   7, 114,   0,  16,   0, 
-      7,   0,   0,   0,  70,   2, 
-     16,   0,   7,   0,   0,   0, 
-     70,   2,  16,   0,   9,   0, 
-      0,   0,  29,   0,   0,   7, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      1,   0,   0,   7, 114,   0, 
-     16,   0,   7,   0,   0,   0, 
-      6,   0,  16,   0,   1,   0, 
-      0,   0,  70,   2,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     79,   0,   0,   8,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   0,   1,   0,   0,   7, 
-    114,   0,  16,   0,   7,   0, 
-      0,   0,  86,   5,  16,   0, 
-      2,   0,   0,   0,  70,   2, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   9,   0,   0,   0, 
-     70,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  66,   0,  16,   0, 
-      9,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      9,   0,   0,   0,  70,   0, 
-     16,   0,   9,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     26,   0,  16,   0,   9,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  35,   0, 
-      0,  10, 130,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  35,   0,   0,  10, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      9,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-    167,   0,   0, 139,   2,  35, 
-      0, 128, 131, 153,  25,   0, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 224,  17,   0,   0,   0, 
-      0,   0,   1,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,   0,   0,   7,  34,   0, 
-     16,   0,   5,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0, 128,  63,  29,   0, 
-      0,   7,  18,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  28,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  79,   0, 
-      0,   8, 130,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      1,   0,   0,   7,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,   1,   0, 
-      0,   7,  18,   0,  16,   0, 
-      1,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,   7, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     28,   0,   0,   5, 146,   0, 
-     16,   0,   7,   0,   0,   0, 
-      6,   4,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  18,   0,  16,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0, 146,   0,  16,   0, 
-      7,   0,   0,   0,   6,  12, 
-     16,   0,   7,   0,   0,   0, 
-      6, 132,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-    167,   0,   0, 139,   2,  35, 
-      0, 128, 131, 153,  25,   0, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 224,  17,   0,   0,   0, 
-      0,   0,   1,   0,   0,   7, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,   0,   0,   7,  34,   0, 
-     16,   0,   4,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0, 128,  63,  28,   0, 
-      0,   5, 146,   0,  16,   0, 
-      7,   0,   0,   0,   6,   4, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-    146,   0,  16,   0,   7,   0, 
-      0,   0,   6,  12,  16,   0, 
-      7,   0,   0,   0,   6, 132, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7,  18,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  35,   0,   0,  10, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0, 167,   0, 
-      0, 139,   2,  35,   0, 128, 
-    131, 153,  25,   0,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,   6, 224, 
-     17,   0,   0,   0,   0,   0, 
-      1,   0,   0,   7,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,   1,   0, 
-      0,   7,  34,   0,  16,   0, 
-      6,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-    128,  63,  28,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  18,   0,  16,   0, 
-      1,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   2,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  35,   0, 
-      0,  10,  18,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0,  18,   0,  16,   0, 
-      1,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7,  18,   0,  16,   0, 
-      1,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,   1,   0,   0,   7, 
-     34,   0,  16,   0,   8,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0, 128,  63, 
-     18,   0,   0,   1,  32,   0, 
-      0,   8,  18,   0,  16,   0, 
-      1,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-     31,   0,   4,   3,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     30,   0,   0,  11,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    182, 143,  32,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      2,  64,   0,   0, 255, 255, 
-    255, 255, 255, 255, 255, 255, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,  86,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      7,   0,   0,   0,  52,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  51,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  52,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5,  18,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-    166,  14,  16,   0,   7,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7,  34,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-    128,  63,  18,   0,   0,   1, 
-     32,   0,   0,   8,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,  64,   0,   0,   2,   0, 
-      0,   0,  31,   0,   4,   3, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  55,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10, 128,  32,   0, 
-      0,   0,   0,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0, 128,   1,  64, 
-      0,   0,   0,   0,   0,  63, 
-      0,   0,   0,   7,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    230,  10,  16,   0,   0,   0, 
-      0,   0,  86,   5,  16,   0, 
-      2,   0,   0,   0,  49,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      2,   0,   0,   0,  14,   0, 
-      0,   7, 130,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  86,   0, 
-      0,   5,  18,   0,  16,   0, 
-      9,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     50,   0,   0,  10,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,   1,   0, 
-      0,   7, 130,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   7, 
-     18,   0,  16,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-      0,   0,   0,   8,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,  55,   0,   0,   9, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  49,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-      0,   0,   0,   8,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   0,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  14,   0,   0,   7, 
-     34,   0,  16,   0,   9,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   9,   0,   0,   0, 
-     26,   0,  16,   0,   9,   0, 
-      0,   0,  86,   0,   0,   5, 
-     66,   0,  16,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      9,   0,   0,   0,  50,   0, 
-      0,  10,  18,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      9,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,   1,   0,   0,   7, 
-     18,   0,  16,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      9,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8,  34,   0, 
-     16,   0,   9,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,   0,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   9,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  49,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16, 128, 
-     65,   0,   0,   0,   3,   0, 
-      0,   0,  14,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-     18,   0,  16,   0,   9,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  50,   0, 
-      0,  10,  66,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16, 128,  65,   0,   0,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,   0,   0, 
-      0,   8,  66,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     49,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      9,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     14,   0,   0,   7,  34,   0, 
-     16,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   9,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   9,   0,   0,   0, 
-     26,   0,  16,   0,   9,   0, 
-      0,   0,  50,   0,   0,  10, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16, 128, 
-     65,   0,   0,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-      1,   0,   0,   7,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     26,   0,  16,   0,   9,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  34,   0,  16,   0, 
-      9,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     26,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-      0,   0,   0,   7,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      9,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  55,   0, 
-      0,   9,  18,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5,  18,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-    166,  14,  16,   0,   7,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7,  34,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-    128,  63,  18,   0,   0,   1, 
-     54,   0,   0,   5,  34,   0, 
-     16,   0,   5,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  21,   0,   0,   1, 
-     21,   0,   0,   1,  31,   0, 
-      4,   3,  10,   0,  16,   0, 
-      1,   0,   0,   0,  30,   0, 
-      0,  11,  50,   0,  16,   0, 
-      7,   0,   0,   0, 182, 143, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0, 255, 255, 255, 255, 
-    255, 255, 255, 255,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     86,   0,   0,   5,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  52,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     52,   0,   0,   7,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  51,   0, 
-      0,   7,  18,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   0,   0,   5, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5, 194,   0,  16,   0, 
-      7,   0,   0,   0, 166,  14, 
-     16,   0,   7,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-    167,   0,   0, 139,   2,  35, 
-      0, 128, 131, 153,  25,   0, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 224,  17,   0,   0,   0, 
-      0,   0,   1,   0,   0,   7, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0, 128,  63, 
-     18,   0,   0,   1,  32,   0, 
-      0,   8,  34,   0,  16,   0, 
-      2,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   2,   0,   0,   0, 
-     31,   0,   4,   3,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     55,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10, 128,  32,   0,   0,   0, 
-      0,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0, 128,   1,  64,   0,   0, 
-      0,   0,   0,  63,   0,   0, 
-      0,   7,  50,   0,  16,   0, 
-      7,   0,   0,   0, 230,  10, 
-     16,   0,   0,   0,   0,   0, 
-     86,   5,  16,   0,   2,   0, 
-      0,   0,  49,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   1,   0, 
-      0,   0,  14,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-     18,   0,  16,   0,   9,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  50,   0, 
-      0,  10,  66,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16, 128,  65,   0,   0,   0, 
-      9,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,   0,   0, 
-      0,   8,  66,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     49,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      9,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     14,   0,   0,   7,  34,   0, 
-     16,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   9,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   9,   0,   0,   0, 
-     26,   0,  16,   0,   9,   0, 
-      0,   0,  50,   0,   0,  10, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16, 128, 
-     65,   0,   0,   0,   9,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-      1,   0,   0,   7,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     26,   0,  16,   0,   9,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  34,   0,  16,   0, 
-      9,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-      0,   0,   0,   7,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  18,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      9,   0,   0,   0,  55,   0, 
-      0,   9,  18,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     49,   0,   0,   7,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   8,  66,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   3,   0,   0,   0, 
-     14,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5, 130,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   0,   0,   5,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  50,   0,   0,  10, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   7,  18,   0,  16,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,   0,   0,   0,   8, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  49,   0, 
-      0,   7, 130,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,   0,   0,   0,   8, 
-     18,   0,  16,   0,   9,   0, 
-      0,   0,  58,   0,  16, 128, 
-     65,   0,   0,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  14,   0, 
-      0,   7,  34,   0,  16,   0, 
-      9,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-     34,   0,  16,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      9,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   9,   0,   0,   0, 
-     50,   0,   0,  10,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   9,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,   1,   0, 
-      0,   7,  18,   0,  16,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   9,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   8, 
-     34,   0,  16,   0,   9,   0, 
-      0,   0,  58,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,   0,   0, 
-      0,   7,  34,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  55,   0,   0,   9, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   9,   0, 
-      0,   0,  55,   0,   0,   9, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  55,   0,   0,   9, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   0,   0,   5, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5, 194,   0,  16,   0, 
-      7,   0,   0,   0, 166,  14, 
-     16,   0,   7,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-    167,   0,   0, 139,   2,  35, 
-      0, 128, 131, 153,  25,   0, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 224,  17,   0,   0,   0, 
-      0,   0,   1,   0,   0,   7, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0, 128,  63, 
-     18,   0,   0,   1,  54,   0, 
-      0,   5,  34,   0,  16,   0, 
-      4,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     21,   0,   0,   1,  21,   0, 
-      0,   1,  31,   0,   4,   3, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  30,   0,   0,  11, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0, 182, 143,  32,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   0,   2,  64,   0,   0, 
-    255, 255, 255, 255, 255, 255, 
-    255, 255,   0,   0,   0,   0, 
-      0,   0,   0,   0,  86,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     52,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  51,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  52,   0, 
-      0,   7,  18,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  51,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  86,   0, 
-      0,   5, 130,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-    194,   0,  16,   0,   7,   0, 
-      0,   0, 166,  14,  16,   0, 
-      7,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      7,   0,   0,   0,  70, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0, 167,   0, 
-      0, 139,   2,  35,   0, 128, 
-    131, 153,  25,   0,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,   6, 224, 
-     17,   0,   0,   0,   0,   0, 
-      1,   0,   0,   7,  34,   0, 
-     16,   0,   6,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0, 128,  63,  18,   0, 
-      0,   1,  32,   0,   0,   8, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   1,  64,   0,   0, 
-      2,   0,   0,   0,  31,   0, 
-      4,   3,  26,   0,  16,   0, 
-      2,   0,   0,   0,  55,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  10, 128, 
-     32,   0,   0,   0,   0,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0, 128, 
-      1,  64,   0,   0,   0,   0, 
-      0,  63,   0,   0,   0,   7, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0, 230,  10,  16,   0, 
-      0,   0,   0,   0,  86,   5, 
-     16,   0,   2,   0,   0,   0, 
-     49,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   8,  66,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     58,   0,  16, 128,  65,   0, 
-      0,   0,   2,   0,   0,   0, 
-     14,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5, 130,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   0,   0,   5,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  50,   0,   0,  10, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   9,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   7,  18,   0,  16,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,   0,   0,   0,   8, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  49,   0, 
-      0,   7, 130,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,   0,   0,   0,   8, 
-     18,   0,  16,   0,   9,   0, 
-      0,   0,  42,   0,  16, 128, 
-     65,   0,   0,   0,   0,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,  14,   0, 
-      0,   7,  34,   0,  16,   0, 
-      9,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-     34,   0,  16,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      9,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   9,   0,   0,   0, 
-     50,   0,   0,  10,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,   1,   0, 
-      0,   7,  18,   0,  16,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   9,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   8, 
-     34,   0,  16,   0,   9,   0, 
-      0,   0,  42,   0,  16,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,   0,   0, 
-      0,   7,  18,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  55,   0,   0,   9, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   9,   0, 
-      0,   0,  55,   0,   0,   9, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,  55,   0,   0,   9, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  49,   0, 
-      0,   7,  18,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      3,   0,   0,   0,  14,   0, 
-      0,   7, 130,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  86,   0, 
-      0,   5,  18,   0,  16,   0, 
-      9,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     50,   0,   0,  10,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   9,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,   1,   0, 
-      0,   7, 130,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   7, 
-     18,   0,  16,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-      0,   0,   0,   8,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,  55,   0,   0,   9, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  49,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-      0,   0,   0,   8,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     58,   0,  16, 128,  65,   0, 
-      0,   0,   0,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  14,   0,   0,   7, 
-     34,   0,  16,   0,   9,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   9,   0,   0,   0, 
-     26,   0,  16,   0,   9,   0, 
-      0,   0,  86,   0,   0,   5, 
-     66,   0,  16,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      9,   0,   0,   0,  50,   0, 
-      0,  10,  34,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,   1,   0,   0,   7, 
-     18,   0,  16,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      9,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8,  34,   0, 
-     16,   0,   9,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,   0,   0,   0,   7, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   9,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  86,   0, 
-      0,   5, 130,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-    194,   0,  16,   0,   7,   0, 
-      0,   0, 166,  14,  16,   0, 
-      7,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      7,   0,   0,   0,  70, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0, 167,   0, 
-      0, 139,   2,  35,   0, 128, 
-    131, 153,  25,   0,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,   6, 224, 
-     17,   0,   0,   0,   0,   0, 
-      1,   0,   0,   7,  34,   0, 
-     16,   0,   6,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0, 128,  63,  18,   0, 
-      0,   1,  54,   0,   0,   5, 
-     34,   0,  16,   0,   6,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  21,   0, 
-      0,   1,  21,   0,   0,   1, 
-     31,   0,   4,   3,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     30,   0,   0,  11,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    182, 143,  32,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      2,  64,   0,   0, 255, 255, 
-    255, 255, 255, 255, 255, 255, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,  86,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      7,   0,   0,   0,  52,   0, 
-      0,   7,  18,   0,  16,   0, 
-      1,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  51,   0,   0,   7, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  52,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-    166,  14,  16,   0,   7,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  35,   0, 
-      0,  10,  18,   0,  16,   0, 
-      1,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0,  18,   0,  16,   0, 
-      1,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7,  34,   0,  16,   0, 
-      8,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-    128,  63,  18,   0,   0,   1, 
-     32,   0,   0,   8,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,  64,   0,   0,   2,   0, 
-      0,   0,  31,   0,   4,   3, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  55,   0,   0,  10, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  10, 128,  32,   0, 
-      0,   0,   0,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0, 128,   1,  64, 
-      0,   0,   0,   0,   0,  63, 
-      0,   0,   0,   7,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    230,  10,  16,   0,   0,   0, 
-      0,   0,   6,   0,  16,   0, 
-      1,   0,   0,   0,  49,   0, 
-      0,   7,  18,   0,  16,   0, 
-      1,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16, 128,  65,   0,   0,   0, 
-      2,   0,   0,   0,  14,   0, 
-      0,   7,  66,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  86,   0, 
-      0,   5, 130,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     50,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      0,   0,   0,   8,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   2,   0, 
-      0,   0,  55,   0,   0,   9, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  49,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-      0,   0,   0,   8, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   0,   0,   0,   0, 
-     10,   0,  16,   0,   2,   0, 
-      0,   0,  14,   0,   0,   7, 
-     18,   0,  16,   0,   9,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  86,   0,   0,   5, 
-     34,   0,  16,   0,   9,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  50,   0, 
-      0,  10,  18,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16, 128,  65,   0,   0,   0, 
-      9,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,   0,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  49,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16, 128, 
-     65,   0,   0,   0,   3,   0, 
-      0,   0,  14,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  50,   0, 
-      0,  10,  18,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     49,   0,   0,   7,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   0,   0, 
-      0,   8, 130,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     14,   0,   0,   7,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5,  18,   0,  16,   0, 
-      9,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     86,   0,   0,   5,  34,   0, 
-     16,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  50,   0,   0,  10, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      9,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     26,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-      0,   0,   0,   7,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-    166,  14,  16,   0,   7,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  35,   0, 
-      0,  10,  18,   0,  16,   0, 
-      1,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0,  18,   0,  16,   0, 
-      1,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7,  34,   0,  16,   0, 
-      8,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-    128,  63,  18,   0,   0,   1, 
-     54,   0,   0,   5,  34,   0, 
-     16,   0,   8,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  21,   0,   0,   1, 
-     21,   0,   0,   1,  21,   0, 
-      0,   1,  31,   0,   0,   4, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     29,   0,   0,  10, 114,   0, 
-     16,   0,   7,   0,   0,   0, 
-    230,   8,  16,   0,   2,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,  28,   0, 
-      0,   5, 114,   0,  16,   0, 
-      9,   0,   0,   0, 230,   8, 
-     16,   0,   2,   0,   0,   0, 
-     79,   0,   0,   8, 114,   0, 
-     16,   0,   9,   0,   0,   0, 
-     70,   2,  16,   0,   9,   0, 
-      0,   0, 246, 143,  32,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   0,   1,   0,   0,   7, 
-    114,   0,  16,   0,   7,   0, 
-      0,   0,  70,   2,  16,   0, 
-      7,   0,   0,   0,  70,   2, 
-     16,   0,   9,   0,   0,   0, 
-     29,   0,   0,   7,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7, 114,   0,  16,   0, 
-      7,   0,   0,   0,   6,   0, 
-     16,   0,   1,   0,   0,   0, 
-     70,   2,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  79,   0, 
-      0,   8,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      1,   0,   0,   7, 114,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   5,  16,   0,   2,   0, 
-      0,   0,  70,   2,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      9,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-     50,   0,  16,   0,   9,   0, 
-      0,   0,  70,   0,  16,   0, 
-      9,   0,   0,   0,  70, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7,  18,   0,  16,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  35,   0,   0,  10, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     35,   0,   0,  10,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0, 167,   0, 
-      0, 139,   2,  35,   0, 128, 
-    131, 153,  25,   0,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,   6, 224, 
-     17,   0,   0,   0,   0,   0, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   1,   0, 
-      0,   7, 130,   0,  16,   0, 
-      5,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-    128,  63,  29,   0,   0,   7, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  79,   0,   0,   8, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,   7, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,   0,   0,   7,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  28,   0, 
-      0,   5, 146,   0,  16,   0, 
-      7,   0,   0,   0,   6,   4, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-    146,   0,  16,   0,   7,   0, 
-      0,   0,   6,  12,  16,   0, 
-      7,   0,   0,   0,   6, 132, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7,  18,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0, 167,   0, 
-      0, 139,   2,  35,   0, 128, 
-    131, 153,  25,   0,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,   6, 224, 
-     17,   0,   0,   0,   0,   0, 
-      1,   0,   0,   7,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,   1,   0, 
-      0,   7, 130,   0,  16,   0, 
-      4,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-    128,  63,  28,   0,   0,   5, 
-    146,   0,  16,   0,   7,   0, 
-      0,   0,   6,   4,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  18,   0,  16,   0, 
-      1,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0, 146,   0, 
-     16,   0,   7,   0,   0,   0, 
-      6,  12,  16,   0,   7,   0, 
-      0,   0,   6, 132,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  35,   0, 
-      0,  10,  18,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0,  18,   0,  16,   0, 
-      1,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7,  18,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,   1,   0,   0,   7, 
-    130,   0,  16,   0,   6,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0, 128,  63, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  35,   0, 
-      0,  10,  18,   0,  16,   0, 
-      1,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  35,   0,   0,  10, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-    167,   0,   0, 139,   2,  35, 
-      0, 128, 131, 153,  25,   0, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 224,  17,   0,   0,   0, 
-      0,   0,   1,   0,   0,   7, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,   0,   0,   7, 130,   0, 
-     16,   0,   8,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0, 128,  63,  18,   0, 
-      0,   1,  32,   0,   0,   8, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,  31,   0, 
-      4,   3,  10,   0,  16,   0, 
-      1,   0,   0,   0,  30,   0, 
-      0,  11,  50,   0,  16,   0, 
-      7,   0,   0,   0, 182, 143, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0, 255, 255, 255, 255, 
-    255, 255, 255, 255,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     86,   0,   0,   5,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  52,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     52,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  51,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     86,   0,   0,   5, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5, 194,   0,  16,   0, 
-      7,   0,   0,   0, 166,  14, 
-     16,   0,   7,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-    167,   0,   0, 139,   2,  35, 
-      0, 128, 131, 153,  25,   0, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 224,  17,   0,   0,   0, 
-      0,   0,   1,   0,   0,   7, 
-    130,   0,  16,   0,   5,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0, 128,  63, 
-     18,   0,   0,   1,  32,   0, 
-      0,   8,  34,   0,  16,   0, 
-      2,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   2,   0,   0,   0, 
-     31,   0,   4,   3,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     55,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10, 128,  32,   0,   0,   0, 
-      0,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0, 128,   1,  64,   0,   0, 
-      0,   0,   0,  63,   0,   0, 
-      0,   7,  50,   0,  16,   0, 
-      7,   0,   0,   0, 230,  10, 
-     16,   0,   0,   0,   0,   0, 
-     86,   5,  16,   0,   2,   0, 
-      0,   0,  49,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16, 128, 
-     65,   0,   0,   0,   2,   0, 
-      0,   0,  14,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  50,   0, 
-      0,  10,  66,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   0,   0, 
-      0,   8,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   3,   0,   0,   0, 
-     55,   0,   0,   9,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     49,   0,   0,   7,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,   0,   0, 
-      0,   8, 130,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     14,   0,   0,   7,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5,  18,   0,  16,   0, 
-      9,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     86,   0,   0,   5,  34,   0, 
-     16,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  50,   0,   0,  10, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   9,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      9,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-      0,   0,   0,   7,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  18,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  49,   0, 
-      0,   7,  66,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      3,   0,   0,   0,  14,   0, 
-      0,   7,  18,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     50,   0,   0,  10,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   1,   0, 
-      0,   7,  18,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-      0,   0,   0,   8,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16, 128, 
-     65,   0,   0,   0,   3,   0, 
-      0,   0,  55,   0,   0,   9, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  49,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-      0,   0,   0,   8,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16, 128,  65,   0, 
-      0,   0,   0,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  14,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-     18,   0,  16,   0,   9,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  50,   0, 
-      0,  10,  34,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16, 128,  65,   0,   0,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,   0,   0,   0,   7, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     55,   0,   0,   9,  66,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  98,   0, 
-     16,   0,   2,   0,   0,   0, 
-     86,   6,  16,   0,   2,   0, 
-      0,   0,  86,   0,   0,   5, 
-    194,   0,  16,   0,   7,   0, 
-      0,   0, 166,   6,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  98,   0,  16,   0, 
-      2,   0,   0,   0,   6,   1, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    230,  10,  16,   0,   7,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  98,   0, 
-     16,   0,   2,   0,   0,   0, 
-     86,   6,  16,   0,   2,   0, 
-      0,   0,   6, 129,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7, 130,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-    128,  63,  18,   0,   0,   1, 
-     54,   0,   0,   5, 130,   0, 
-     16,   0,   5,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  21,   0,   0,   1, 
-     21,   0,   0,   1,  31,   0, 
-      4,   3,  10,   0,  16,   0, 
-      1,   0,   0,   0,  30,   0, 
-      0,  11,  98,   0,  16,   0, 
-      2,   0,   0,   0, 246, 142, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-    255, 255, 255, 255, 255, 255, 
-    255, 255,   0,   0,   0,   0, 
-     86,   0,   0,   5,  98,   0, 
-     16,   0,   2,   0,   0,   0, 
-     86,   6,  16,   0,   2,   0, 
-      0,   0,  52,   0,   0,   7, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  52,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  51,   0,   0,   7, 
-     66,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  98,   0, 
-     16,   0,   2,   0,   0,   0, 
-     86,   6,  16,   0,   2,   0, 
-      0,   0,  86,   0,   0,   5, 
-    194,   0,  16,   0,   7,   0, 
-      0,   0, 166,   6,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  98,   0,  16,   0, 
-      2,   0,   0,   0,   6,   1, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    230,  10,  16,   0,   7,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  98,   0, 
-     16,   0,   2,   0,   0,   0, 
-     86,   6,  16,   0,   2,   0, 
-      0,   0,   6, 129,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7, 130,   0,  16,   0, 
-      4,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-    128,  63,  18,   0,   0,   1, 
-     32,   0,   0,   8,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,  64,   0,   0,   2,   0, 
-      0,   0,  31,   0,   4,   3, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  55,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10, 128,  32,   0, 
-      0,   0,   0,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0, 128,   1,  64, 
-      0,   0,   0,   0,   0,  63, 
-      0,   0,   0,   7,  98,   0, 
-     16,   0,   2,   0,   0,   0, 
-    166,  11,  16,   0,   0,   0, 
-      0,   0,  86,   5,  16,   0, 
-      2,   0,   0,   0,  49,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16, 128,  65,   0,   0,   0, 
-      1,   0,   0,   0,  14,   0, 
-      0,   7,  34,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     50,   0,   0,  10,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,   1,   0, 
-      0,   7,  34,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-      0,   0,   0,   8,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,  55,   0,   0,   9, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  49,   0,   0,   7, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   0,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  14,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     28,   0,   0,   5, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-     18,   0,  16,   0,   9,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  50,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16, 128,  65,   0,   0,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   2,   0, 
-      0,   0,   0,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  49,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  58,   0,  16, 128, 
-     65,   0,   0,   0,   3,   0, 
-      0,   0,  14,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  50,   0, 
-      0,  10,  66,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,   1,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   0,   0, 
-      0,   8,  66,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   3,   0,   0,   0, 
-     55,   0,   0,   9,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     49,   0,   0,   7,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,   0,   0, 
-      0,   8,  34,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     14,   0,   0,   7,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  66,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   0,   0,   5, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  50,   0,   0,  10, 
-     66,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,   0,   0,   7,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  66,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   2,   0,   0,   0, 
-      0,   0,   0,   7,  66,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  28,   0, 
-      0,   5,  98,   0,  16,   0, 
-      2,   0,   0,   0,   6,   1, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    230,  10,  16,   0,   7,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  98,   0, 
-     16,   0,   2,   0,   0,   0, 
-     86,   6,  16,   0,   2,   0, 
-      0,   0,   6, 129,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     34,   0,  16,   0,   1,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0,  34,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7, 130,   0,  16,   0, 
-      4,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-    128,  63,  18,   0,   0,   1, 
-     54,   0,   0,   5, 130,   0, 
-     16,   0,   4,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  21,   0,   0,   1, 
-     21,   0,   0,   1,  31,   0, 
-      4,   3,  10,   0,  16,   0, 
-      1,   0,   0,   0,  30,   0, 
-      0,  11,  98,   0,  16,   0, 
-      2,   0,   0,   0, 246, 142, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-    255, 255, 255, 255, 255, 255, 
-    255, 255,   0,   0,   0,   0, 
-     86,   0,   0,   5,  98,   0, 
-     16,   0,   2,   0,   0,   0, 
-     86,   6,  16,   0,   2,   0, 
-      0,   0,  52,   0,   0,   7, 
-     34,   0,  16,   0,   1,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     52,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  51,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     86,   0,   0,   5, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  28,   0,   0,   5, 
-     98,   0,  16,   0,   2,   0, 
-      0,   0,   6,   1,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0, 230,  10, 
-     16,   0,   7,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  98,   0,  16,   0, 
-      2,   0,   0,   0,  86,   6, 
-     16,   0,   2,   0,   0,   0, 
-      6, 129,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      1,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-    167,   0,   0, 139,   2,  35, 
-      0, 128, 131, 153,  25,   0, 
-     34,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 224,  17,   0,   0,   0, 
-      0,   0,   1,   0,   0,   7, 
-    130,   0,  16,   0,   6,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0, 128,  63, 
-     18,   0,   0,   1,  32,   0, 
-      0,   8,  34,   0,  16,   0, 
-      1,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   2,   0,   0,   0, 
-     31,   0,   4,   3,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     55,   0,   0,  10,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10, 128,  32,   0,   0,   0, 
-      0,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0, 128,   1,  64,   0,   0, 
-      0,   0,   0,  63,   0,   0, 
-      0,   7,  98,   0,  16,   0, 
-      2,   0,   0,   0, 166,  11, 
-     16,   0,   0,   0,   0,   0, 
-     86,   5,  16,   0,   1,   0, 
-      0,   0,  49,   0,   0,   7, 
-     34,   0,  16,   0,   1,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  58,   0,  16, 128, 
-     65,   0,   0,   0,   2,   0, 
-      0,   0,  14,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  50,   0, 
-      0,  10,  66,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,   1,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   0,   0, 
-      0,   8,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   3,   0,   0,   0, 
-     55,   0,   0,   9,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     49,   0,   0,   7,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,   0,   0, 
-      0,   8,  34,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16,   0,   2,   0,   0,   0, 
-     14,   0,   0,   7,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  66,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   0,   0,   5, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  50,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,   0,   0,   7,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  66,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     26,   0,  16, 128,  65,   0, 
-      0,   0,   2,   0,   0,   0, 
-      0,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     49,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   8, 130,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     58,   0,  16, 128,  65,   0, 
-      0,   0,   3,   0,   0,   0, 
-     14,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     86,   0,   0,   5,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  50,   0,   0,  10, 
-    130,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   7,  18,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,   0,   0,   0,   8, 
-    130,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      2,   0,   0,   0,  55,   0, 
-      0,   9, 130,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  49,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,   0,   0,   0,   8, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16, 128, 
-     65,   0,   0,   0,   0,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  14,   0, 
-      0,   7,  34,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     50,   0,   0,  10,  66,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,   1,   0, 
-      0,   7,  18,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   8, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      2,   0,   0,   0,   0,   0, 
-      0,   7,  66,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  55,   0,   0,   9, 
-     66,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  55,   0,   0,   9, 
-     66,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  55,   0,   0,   9, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     86,   0,   0,   5, 130,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  98,   0,  16,   0, 
-      2,   0,   0,   0, 166,  11, 
-     16,   0,   2,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   1,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-    167,   0,   0, 139,   2,  35, 
-      0, 128, 131, 153,  25,   0, 
-     34,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 224,  17,   0,   0,   0, 
-      0,   0,   1,   0,   0,   7, 
-    130,   0,  16,   0,   6,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0, 128,  63, 
-     18,   0,   0,   1,  54,   0, 
-      0,   5, 130,   0,  16,   0, 
-      6,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     21,   0,   0,   1,  21,   0, 
-      0,   1,  31,   0,   4,   3, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  30,   0,   0,  11, 
-     50,   0,  16,   0,   1,   0, 
-      0,   0, 182, 143,  32,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   0,   2,  64,   0,   0, 
-    255, 255, 255, 255, 255, 255, 
-    255, 255,   0,   0,   0,   0, 
-      0,   0,   0,   0,  86,   0, 
-      0,   5,  50,   0,  16,   0, 
-      1,   0,   0,   0,  70,   0, 
-     16,   0,   1,   0,   0,   0, 
-     52,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   2,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  51,   0, 
-      0,   7,  18,   0,  16,   0, 
-      1,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  52,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      1,   0,   0,   0,  70,   0, 
-     16,   0,   1,   0,   0,   0, 
-     86,   0,   0,   5, 194,   0, 
-     16,   0,   2,   0,   0,   0, 
-     86,   1,  16,   0,   1,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   1,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  98,   0,  16,   0, 
-      2,   0,   0,   0, 166,  11, 
-     16,   0,   2,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      1,   0,   0,   0,  70,   0, 
-     16,   0,   1,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  35,   0, 
-      0,  10,  18,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  35,   0,   0,  10, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-    167,   0,   0, 139,   2,  35, 
-      0, 128, 131, 153,  25,   0, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 224,  17,   0,   0,   0, 
-      0,   0,   1,   0,   0,   7, 
-    130,   0,  16,   0,   8,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0, 128,  63, 
-     18,   0,   0,   1,  32,   0, 
-      0,   8,  18,   0,  16,   0, 
-      1,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   2,   0,   0,   0, 
-     31,   0,   4,   3,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     55,   0,   0,  10,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10, 128,  32,   0,   0,   0, 
-      0,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0, 128,   1,  64,   0,   0, 
-      0,   0,   0,  63,   0,   0, 
-      0,   7,  50,   0,  16,   0, 
-      1,   0,   0,   0, 230,  10, 
-     16,   0,   0,   0,   0,   0, 
-      6,   0,  16,   0,   1,   0, 
-      0,   0,  49,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8,  66,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   2,   0, 
-      0,   0,  14,   0,   0,   7, 
-    130,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     28,   0,   0,   5, 130,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,  86,   0,   0,   5, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,  50,   0, 
-      0,  10,  66,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,   1,   0,   0,   7, 
-    130,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,   0,   0, 
-      0,   8,  66,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   2,   0,   0,   0, 
-     55,   0,   0,   9,  66,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     49,   0,   0,   7, 130,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,   0,   0, 
-      0,   8,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-     14,   0,   0,   7,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  28,   0, 
-      0,   5,  18,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   0,   0,   5,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  50,   0,   0,  10, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  66,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  42,   0,  16,   0, 
-      0,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      0,   0,   0,   0,  49,   0, 
-      0,   7,  18,   0,  16,   0, 
-      1,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-     18,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      3,   0,   0,   0,  14,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  28,   0,   0,   5, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     50,   0,   0,  10,  18,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,   1,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   7, 
-     66,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-      0,   0,   0,   8,  18,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   2,   0, 
-      0,   0,  55,   0,   0,   9, 
-     18,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  49,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-      0,   0,   0,   8,  66,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16, 128,  65,   0, 
-      0,   0,   0,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  14,   0,   0,   7, 
-    130,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     28,   0,   0,   5, 130,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,  86,   0,   0,   5, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,  50,   0, 
-      0,  10,  34,   0,  16,   0, 
-      1,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8, 130,   0, 
-     16,   0,   0,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   7, 
-     34,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   0,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     28,   0,   0,   5,  98,   0, 
-     16,   0,   0,   0,   0,   0, 
-     86,   6,  16,   0,   0,   0, 
-      0,   0,  86,   0,   0,   5, 
-    194,   0,  16,   0,   2,   0, 
-      0,   0,  86,   9,  16,   0, 
-      0,   0,   0,   0,  28,   0, 
-      0,   5,  98,   0,  16,   0, 
-      0,   0,   0,   0,   6,   1, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   1,   0,   0,   0, 
-    230,  10,  16,   0,   2,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  98,   0, 
-     16,   0,   0,   0,   0,   0, 
-     86,   6,  16,   0,   0,   0, 
-      0,   0,   6, 129,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     34,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0,  34,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7, 130,   0,  16,   0, 
-      8,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-    128,  63,  18,   0,   0,   1, 
-     54,   0,   0,   5, 130,   0, 
-     16,   0,   8,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  21,   0,   0,   1, 
-     21,   0,   0,   1,  21,   0, 
-      0,   1,  26,   0,   0,   5, 
-     98,   0,  16,   0,   0,   0, 
-      0,   0, 246,  14,  16,   0, 
-      1,   0,   0,   0,  56,   0, 
-      0,   7,  50,   0,  16,   0, 
-      1,   0,   0,   0, 150,   5, 
-     16,   0,   0,   0,   0,   0, 
-    150,   5,  16,   0,   0,   0, 
-      0,   0,  56,   0,   0,   7, 
-    146,   0,  16,   0,   2,   0, 
-      0,   0,  86,   9,  16,   0, 
-      0,   0,   0,   0,   6,   4, 
-     16,   0,   1,   0,   0,   0, 
-     15,   0,   0,  10,  34,   0, 
-     16,   0,   3,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-     64, 191,   0,   0,  64,  63, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,  70,   0,  16,   0, 
-      5,   0,   0,   0,  17,   0, 
-      0,  10,  66,   0,  16,   0, 
-      3,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0, 192,  63, 
-      0,   0, 192,  63,   0,   0, 
-     16, 192,   0,   0,  64, 191, 
-     70,  14,  16,   0,   5,   0, 
-      0,   0,  17,   0,   0,  10, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,  64, 191,   0,   0, 
-    160, 191,   0,   0, 160,  63, 
-      0,   0,  64,  63,  70,  14, 
-     16,   0,   5,   0,   0,   0, 
-     54,   0,   0,   5,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-    128,  63,  54,   0,   0,   5, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5,  66,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     54,   0,   0,   5, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   2,   0, 
-      0,   0,  54,   0,   0,   5, 
-     18,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      5,   0,   0,   0,  17,   0, 
-      0,   7,  18,   0,  16,   0, 
-      3,   0,   0,   0,  70,  14, 
-     16,   0,   7,   0,   0,   0, 
-     70,  14,  16,   0,   3,   0, 
-      0,   0,  15,   0,   0,  10, 
-     34,   0,  16,   0,   5,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,  64, 191,   0,   0, 
-     64,  63,   0,   0,   0,   0, 
-      0,   0,   0,   0,  70,   0, 
-     16,   0,   4,   0,   0,   0, 
-     17,   0,   0,  10,  66,   0, 
-     16,   0,   5,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-    192,  63,   0,   0, 192,  63, 
-      0,   0,  16, 192,   0,   0, 
-     64, 191,  70,  14,  16,   0, 
-      4,   0,   0,   0,  17,   0, 
-      0,  10, 130,   0,  16,   0, 
-      5,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,  64, 191, 
-      0,   0, 160, 191,   0,   0, 
-    160,  63,   0,   0,  64,  63, 
-     70,  14,  16,   0,   4,   0, 
-      0,   0,  54,   0,   0,   5, 
-     18,   0,  16,   0,   5,   0, 
-      0,   0,  42,   0,  16,   0, 
-      4,   0,   0,   0,  17,   0, 
-      0,   7,  34,   0,  16,   0, 
-      3,   0,   0,   0,  70,  14, 
-     16,   0,   7,   0,   0,   0, 
-     70,  14,  16,   0,   5,   0, 
-      0,   0,  15,   0,   0,  10, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,  64, 191,   0,   0, 
-     64,  63,   0,   0,   0,   0, 
-      0,   0,   0,   0,  70,   0, 
-     16,   0,   6,   0,   0,   0, 
-     17,   0,   0,  10,  66,   0, 
-     16,   0,   4,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-    192,  63,   0,   0, 192,  63, 
-      0,   0,  16, 192,   0,   0, 
-     64, 191,  70,  14,  16,   0, 
-      6,   0,   0,   0,  17,   0, 
-      0,  10, 130,   0,  16,   0, 
-      4,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,  64, 191, 
-      0,   0, 160, 191,   0,   0, 
-    160,  63,   0,   0,  64,  63, 
-     70,  14,  16,   0,   6,   0, 
-      0,   0,  54,   0,   0,   5, 
-     18,   0,  16,   0,   4,   0, 
-      0,   0,  42,   0,  16,   0, 
-      6,   0,   0,   0,  17,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  70,  14, 
-     16,   0,   7,   0,   0,   0, 
-     70,  14,  16,   0,   4,   0, 
-      0,   0,  15,   0,   0,  10, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,  64, 191,   0,   0, 
-     64,  63,   0,   0,   0,   0, 
-      0,   0,   0,   0,  70,   0, 
-     16,   0,   8,   0,   0,   0, 
-     17,   0,   0,  10,  66,   0, 
-     16,   0,   4,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-    192,  63,   0,   0, 192,  63, 
-      0,   0,  16, 192,   0,   0, 
-     64, 191,  70,  14,  16,   0, 
-      8,   0,   0,   0,  17,   0, 
-      0,  10, 130,   0,  16,   0, 
-      4,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,  64, 191, 
-      0,   0, 160, 191,   0,   0, 
-    160,  63,   0,   0,  64,  63, 
-     70,  14,  16,   0,   8,   0, 
-      0,   0,  54,   0,   0,   5, 
-     18,   0,  16,   0,   4,   0, 
-      0,   0,  42,   0,  16,   0, 
-      8,   0,   0,   0,  17,   0, 
-      0,   7, 130,   0,  16,   0, 
-      3,   0,   0,   0,  70,  14, 
-     16,   0,   7,   0,   0,   0, 
-     70,  14,  16,   0,   4,   0, 
-      0,   0,  15,   0,   0,  10, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,  64, 191,   0,   0, 
-     64,  63,   0,   0,   0,   0, 
-      0,   0,   0,   0, 134,   0, 
-     16,   0,   3,   0,   0,   0, 
-     17,   0,   0,  10,  66,   0, 
-     16,   0,   4,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-    192,  63,   0,   0,  16, 192, 
-      0,   0, 192,  63,   0,   0, 
-     64, 191,  70,  14,  16,   0, 
-      3,   0,   0,   0,  17,   0, 
-      0,  10, 130,   0,  16,   0, 
-      4,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,  64, 191, 
-      0,   0, 160,  63,   0,   0, 
-    160, 191,   0,   0,  64,  63, 
-     70,  14,  16,   0,   3,   0, 
-      0,   0,  54,   0,   0,   5, 
-     18,   0,  16,   0,   2,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0, 128,  63,  54,   0, 
-      0,   5,  34,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     54,   0,   0,   5,  66,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  54,   0,   0,   5, 
-     18,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      3,   0,   0,   0,  17,   0, 
-      0,   7,  34,   0,  16,   0, 
-      0,   0,   0,   0,  70,  14, 
-     16,   0,   2,   0,   0,   0, 
-     70,  14,  16,   0,   4,   0, 
-      0,   0,  57,   0,   0,  10, 
-     34,   0,  16,   0,   0,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-    168,   0,   0,   9,  18, 224, 
-     17,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   0,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     21,   0,   0,   1,  21,   0, 
-      0,   1,  21,   0,   0,   1, 
-     21,   0,   0,   1,  62,   0, 
-      0,   1
-};
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_bool_fp16.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_bool_fp16.h
deleted file mode 100644
index 60abe560e1582..0000000000000
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_bool_fp16.h
+++ /dev/null
@@ -1,6392 +0,0 @@
-#if 0
-;
-; Note: shader requires additional functionality:
-;       Use native low precision
-;
-;
-; Input signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no parameters
-;
-; Output signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no parameters
-; shader hash: f70161fb922351a3e83a11275e73dc46
-;
-; Pipeline Runtime Information: 
-;
-;
-;
-; Buffer Definitions:
-;
-; cbuffer 
-; {
-;
-;   [116 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [4 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [2 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [4 x i8] (type annotation not present)
-;
-; }
-;
-;
-; Resource Bindings:
-;
-; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-;                                   cbuffer      NA          NA     CB0            cb0     1
-;                                       UAV  struct         r/w      U0             u0     1
-;                                       UAV  struct         r/w      U1             u1     1
-;                                       UAV  struct         r/w      U2             u2     1
-;
-target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
-target triple = "dxil-ms-dx"
-
-%dx.types.Handle = type { i8* }
-%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
-%dx.types.ResRet.f16 = type { half, half, half, half, i32 }
-%dx.types.ResRet.i32 = type { i32, i32, i32, i32, i32 }
-%"class.RWStructuredBuffer<bool>" = type { i32 }
-%"class.RWStructuredBuffer<half>" = type { half }
-%Constants = type { i32, i32, i32, i32, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32 }
-
-define void @GridSample() {
-  %1 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 2, i32 2, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %2 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 1, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %3 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %4 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %5 = call i32 @dx.op.threadId.i32(i32 93, i32 0)  ; ThreadId(component)
-  %6 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
-  %7 = extractvalue %dx.types.CBufRet.i32 %6, 0
-  %8 = add i32 %7, %5
-  %9 = extractvalue %dx.types.CBufRet.i32 %6, 1
-  %10 = icmp ult i32 %8, %9
-  br i1 %10, label %11, label %3385
-
-; <label>:11                                      ; preds = %0
-  %12 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
-  %13 = extractvalue %dx.types.CBufRet.i32 %12, 3
-  %14 = uitofp i32 %13 to float
-  %15 = extractvalue %dx.types.CBufRet.i32 %12, 2
-  %16 = uitofp i32 %15 to float
-  %17 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 7)  ; CBufferLoadLegacy(handle,regIndex)
-  %18 = extractvalue %dx.types.CBufRet.i32 %17, 0
-  %19 = icmp eq i32 %18, 0
-  %20 = select i1 %19, float -5.000000e-01, float 0.000000e+00
-  %21 = select i1 %19, float -5.000000e-01, float -1.000000e+00
-  %22 = fadd float %14, %21
-  %23 = select i1 %19, float -5.000000e-01, float -1.000000e+00
-  %24 = fadd float %16, %23
-  %25 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
-  %26 = extractvalue %dx.types.CBufRet.i32 %25, 1
-  %27 = extractvalue %dx.types.CBufRet.i32 %25, 2
-  %28 = extractvalue %dx.types.CBufRet.i32 %25, 3
-  %29 = mul i32 %28, %27
-  %30 = mul i32 %27, %26
-  %31 = mul i32 %30, %28
-  %32 = udiv i32 %8, %31
-  %33 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 6)  ; CBufferLoadLegacy(handle,regIndex)
-  %34 = extractvalue %dx.types.CBufRet.i32 %33, 0
-  %35 = mul i32 %34, %32
-  %36 = sub i32 %8, %35
-  %37 = udiv i32 %36, %29
-  %38 = extractvalue %dx.types.CBufRet.i32 %33, 1
-  %39 = mul i32 %38, %37
-  %40 = sub i32 %36, %39
-  %41 = udiv i32 %40, %28
-  %42 = extractvalue %dx.types.CBufRet.i32 %33, 2
-  %43 = mul i32 %42, %41
-  %44 = sub i32 %40, %43
-  %45 = uitofp i32 %32 to float
-  %46 = uitofp i32 %41 to float
-  %47 = uitofp i32 %44 to float
-  %48 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
-  %49 = extractvalue %dx.types.CBufRet.i32 %48, 0
-  %50 = extractvalue %dx.types.CBufRet.i32 %48, 1
-  %51 = extractvalue %dx.types.CBufRet.i32 %48, 2
-  %52 = extractvalue %dx.types.CBufRet.i32 %48, 3
-  %53 = uitofp i32 %49 to float
-  %54 = uitofp i32 %50 to float
-  %55 = uitofp i32 %51 to float
-  %56 = uitofp i32 %52 to float
-  %57 = call float @dx.op.dot4.f32(i32 56, float %45, float %46, float %47, float 0.000000e+00, float %53, float %54, float %55, float %56)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %58 = fadd fast float %56, %57
-  %59 = fptoui float %57 to i32
-  %60 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %2, i32 %59, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %61 = extractvalue %dx.types.ResRet.f16 %60, 0
-  %62 = fpext half %61 to float
-  %63 = fptoui float %58 to i32
-  %64 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %2, i32 %63, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %65 = extractvalue %dx.types.ResRet.f16 %64, 0
-  %66 = fpext half %65 to float
-  %67 = icmp eq i32 %18, 1
-  %68 = fadd fast float %62, 1.000000e+00
-  %69 = fadd fast float %66, 1.000000e+00
-  br i1 %67, label %70, label %77
-
-; <label>:70                                      ; preds = %11
-  %71 = fmul fast float %68, 5.000000e-01
-  %72 = fmul fast float %69, 5.000000e-01
-  %73 = fadd fast float %14, -1.000000e+00
-  %74 = fadd fast float %16, -1.000000e+00
-  %75 = fmul fast float %71, %73
-  %76 = fmul fast float %72, %74
-  br label %84
-
-; <label>:77                                      ; preds = %11
-  %78 = fmul fast float %14, %68
-  %79 = fmul fast float %69, %16
-  %80 = fadd fast float %78, -1.000000e+00
-  %81 = fadd fast float %79, -1.000000e+00
-  %82 = fmul fast float %80, 5.000000e-01
-  %83 = fmul fast float %81, 5.000000e-01
-  br label %84
-
-; <label>:84                                      ; preds = %77, %70
-  %85 = phi float [ %75, %70 ], [ %82, %77 ]
-  %86 = phi float [ %76, %70 ], [ %83, %77 ]
-  %87 = extractvalue %dx.types.CBufRet.i32 %6, 2
-  %88 = icmp eq i32 %87, 1
-  br i1 %88, label %89, label %92
-
-; <label>:89                                      ; preds = %84
-  %90 = call float @dx.op.unary.f32(i32 26, float %85)  ; Round_ne(value)
-  %91 = call float @dx.op.unary.f32(i32 26, float %86)  ; Round_ne(value)
-  br label %92
-
-; <label>:92                                      ; preds = %89, %84
-  %93 = phi float [ %90, %89 ], [ %85, %84 ]
-  %94 = phi float [ %91, %89 ], [ %86, %84 ]
-  %95 = fcmp fast olt float %93, %20
-  %96 = fcmp fast ogt float %93, %22
-  %97 = or i1 %95, %96
-  %98 = fcmp fast olt float %94, %20
-  %99 = or i1 %97, %98
-  %100 = fcmp fast ogt float %94, %24
-  %101 = or i1 %100, %99
-  br i1 %101, label %102, label %175
-
-; <label>:102                                     ; preds = %92
-  %103 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %104 = icmp eq i32 %103, 1
-  br i1 %104, label %105, label %114
-
-; <label>:105                                     ; preds = %102
-  %106 = add i32 %13, -1
-  %107 = uitofp i32 %106 to float
-  %108 = call float @dx.op.binary.f32(i32 35, float %93, float 0.000000e+00)  ; FMax(a,b)
-  %109 = call float @dx.op.binary.f32(i32 36, float %108, float %107)  ; FMin(a,b)
-  %110 = add i32 %15, -1
-  %111 = uitofp i32 %110 to float
-  %112 = call float @dx.op.binary.f32(i32 35, float %94, float 0.000000e+00)  ; FMax(a,b)
-  %113 = call float @dx.op.binary.f32(i32 36, float %112, float %111)  ; FMin(a,b)
-  br label %175
-
-; <label>:114                                     ; preds = %102
-  %115 = icmp eq i32 %103, 2
-  br i1 %115, label %116, label %175
-
-; <label>:116                                     ; preds = %114
-  %117 = fsub fast float %22, %20
-  br i1 %95, label %118, label %131
-
-; <label>:118                                     ; preds = %116
-  %119 = fsub fast float %20, %93
-  %120 = fdiv fast float %119, %117
-  %121 = fptoui float %120 to i32
-  %122 = uitofp i32 %121 to float
-  %123 = fmul fast float %122, %117
-  %124 = fsub fast float %119, %123
-  %125 = and i32 %121, 1
-  %126 = icmp eq i32 %125, 0
-  br i1 %126, label %127, label %129
-
-; <label>:127                                     ; preds = %118
-  %128 = fadd fast float %124, %20
-  br label %145
-
-; <label>:129                                     ; preds = %118
-  %130 = fsub fast float %22, %124
-  br label %145
-
-; <label>:131                                     ; preds = %116
-  br i1 %96, label %132, label %145
-
-; <label>:132                                     ; preds = %131
-  %133 = fsub fast float %93, %22
-  %134 = fdiv fast float %133, %117
-  %135 = fptoui float %134 to i32
-  %136 = uitofp i32 %135 to float
-  %137 = fmul fast float %136, %117
-  %138 = fsub fast float %133, %137
-  %139 = and i32 %135, 1
-  %140 = icmp eq i32 %139, 0
-  br i1 %140, label %141, label %143
-
-; <label>:141                                     ; preds = %132
-  %142 = fsub fast float %22, %138
-  br label %145
-
-; <label>:143                                     ; preds = %132
-  %144 = fadd fast float %138, %20
-  br label %145
-
-; <label>:145                                     ; preds = %143, %141, %131, %129, %127
-  %146 = phi float [ %128, %127 ], [ %130, %129 ], [ %142, %141 ], [ %144, %143 ], [ %93, %131 ]
-  %147 = fsub fast float %24, %20
-  br i1 %98, label %148, label %161
-
-; <label>:148                                     ; preds = %145
-  %149 = fsub fast float %20, %94
-  %150 = fdiv fast float %149, %147
-  %151 = fptoui float %150 to i32
-  %152 = uitofp i32 %151 to float
-  %153 = fmul fast float %152, %147
-  %154 = fsub fast float %149, %153
-  %155 = and i32 %151, 1
-  %156 = icmp eq i32 %155, 0
-  br i1 %156, label %157, label %159
-
-; <label>:157                                     ; preds = %148
-  %158 = fadd fast float %154, %20
-  br label %175
-
-; <label>:159                                     ; preds = %148
-  %160 = fsub fast float %24, %154
-  br label %175
-
-; <label>:161                                     ; preds = %145
-  br i1 %100, label %162, label %175
-
-; <label>:162                                     ; preds = %161
-  %163 = fsub fast float %94, %24
-  %164 = fdiv fast float %163, %147
-  %165 = fptoui float %164 to i32
-  %166 = uitofp i32 %165 to float
-  %167 = fmul fast float %166, %147
-  %168 = fsub fast float %163, %167
-  %169 = and i32 %165, 1
-  %170 = icmp eq i32 %169, 0
-  br i1 %170, label %171, label %173
-
-; <label>:171                                     ; preds = %162
-  %172 = fsub fast float %24, %168
-  br label %175
-
-; <label>:173                                     ; preds = %162
-  %174 = fadd fast float %168, %20
-  br label %175
-
-; <label>:175                                     ; preds = %173, %171, %161, %159, %157, %114, %105, %92
-  %176 = phi float [ %109, %105 ], [ %93, %114 ], [ %93, %92 ], [ %146, %173 ], [ %146, %171 ], [ %146, %161 ], [ %146, %159 ], [ %146, %157 ]
-  %177 = phi float [ %113, %105 ], [ %94, %114 ], [ %94, %92 ], [ %174, %173 ], [ %172, %171 ], [ %94, %161 ], [ %160, %159 ], [ %158, %157 ]
-  %178 = uitofp i32 %37 to float
-  br i1 %88, label %179, label %332
-
-; <label>:179                                     ; preds = %175
-  %180 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %181 = icmp eq i32 %180, 0
-  br i1 %181, label %182, label %208
-
-; <label>:182                                     ; preds = %179
-  %183 = fcmp fast oge float %176, 0.000000e+00
-  %184 = fptoui float %176 to i32
-  %185 = icmp ult i32 %184, %13
-  %186 = and i1 %183, %185
-  %187 = fcmp fast oge float %177, 0.000000e+00
-  %188 = and i1 %187, %186
-  %189 = fptoui float %177 to i32
-  %190 = icmp ult i32 %189, %15
-  %191 = and i1 %190, %188
-  br i1 %191, label %192, label %328
-
-; <label>:192                                     ; preds = %182
-  %193 = fptoui float %45 to i32
-  %194 = fptoui float %178 to i32
-  %195 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %196 = extractvalue %dx.types.CBufRet.i32 %195, 0
-  %197 = extractvalue %dx.types.CBufRet.i32 %195, 1
-  %198 = extractvalue %dx.types.CBufRet.i32 %195, 2
-  %199 = extractvalue %dx.types.CBufRet.i32 %195, 3
-  %200 = mul i32 %196, %193
-  %201 = call i32 @dx.op.tertiary.i32(i32 48, i32 %194, i32 %197, i32 %200)  ; IMad(a,b,c)
-  %202 = call i32 @dx.op.tertiary.i32(i32 48, i32 %189, i32 %198, i32 %201)  ; IMad(a,b,c)
-  %203 = call i32 @dx.op.tertiary.i32(i32 48, i32 %184, i32 %199, i32 %202)  ; IMad(a,b,c)
-  %204 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %203, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %205 = extractvalue %dx.types.ResRet.i32 %204, 0
-  %206 = icmp ne i32 %205, 0
-  %207 = uitofp i1 %206 to float
-  br label %328
-
-; <label>:208                                     ; preds = %179
-  %209 = icmp eq i32 %180, 1
-  br i1 %209, label %210, label %240
-
-; <label>:210                                     ; preds = %208
-  %211 = add i32 %13, -1
-  %212 = uitofp i32 %211 to float
-  %213 = call float @dx.op.binary.f32(i32 35, float %176, float 0.000000e+00)  ; FMax(a,b)
-  %214 = call float @dx.op.binary.f32(i32 36, float %213, float %212)  ; FMin(a,b)
-  %215 = fptoui float %214 to i32
-  %216 = add i32 %15, -1
-  %217 = uitofp i32 %216 to float
-  %218 = call float @dx.op.binary.f32(i32 35, float %177, float 0.000000e+00)  ; FMax(a,b)
-  %219 = call float @dx.op.binary.f32(i32 36, float %218, float %217)  ; FMin(a,b)
-  %220 = fptoui float %219 to i32
-  %221 = uitofp i32 %220 to float
-  %222 = uitofp i32 %215 to float
-  %223 = fptoui float %45 to i32
-  %224 = fptoui float %178 to i32
-  %225 = fptoui float %221 to i32
-  %226 = fptoui float %222 to i32
-  %227 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %228 = extractvalue %dx.types.CBufRet.i32 %227, 0
-  %229 = extractvalue %dx.types.CBufRet.i32 %227, 1
-  %230 = extractvalue %dx.types.CBufRet.i32 %227, 2
-  %231 = extractvalue %dx.types.CBufRet.i32 %227, 3
-  %232 = mul i32 %228, %223
-  %233 = call i32 @dx.op.tertiary.i32(i32 48, i32 %224, i32 %229, i32 %232)  ; IMad(a,b,c)
-  %234 = call i32 @dx.op.tertiary.i32(i32 48, i32 %225, i32 %230, i32 %233)  ; IMad(a,b,c)
-  %235 = call i32 @dx.op.tertiary.i32(i32 48, i32 %226, i32 %231, i32 %234)  ; IMad(a,b,c)
-  %236 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %235, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %237 = extractvalue %dx.types.ResRet.i32 %236, 0
-  %238 = icmp ne i32 %237, 0
-  %239 = uitofp i1 %238 to float
-  br label %328
-
-; <label>:240                                     ; preds = %208
-  %241 = icmp eq i32 %180, 2
-  br i1 %241, label %242, label %328
-
-; <label>:242                                     ; preds = %240
-  %243 = fsub fast float %22, %20
-  %244 = fcmp fast olt float %176, %20
-  br i1 %244, label %245, label %258
-
-; <label>:245                                     ; preds = %242
-  %246 = fsub fast float %20, %176
-  %247 = fdiv fast float %246, %243
-  %248 = fptoui float %247 to i32
-  %249 = uitofp i32 %248 to float
-  %250 = fmul fast float %249, %243
-  %251 = fsub fast float %246, %250
-  %252 = and i32 %248, 1
-  %253 = icmp eq i32 %252, 0
-  br i1 %253, label %254, label %256
-
-; <label>:254                                     ; preds = %245
-  %255 = fadd fast float %251, %20
-  br label %273
-
-; <label>:256                                     ; preds = %245
-  %257 = fsub fast float %22, %251
-  br label %273
-
-; <label>:258                                     ; preds = %242
-  %259 = fcmp fast ogt float %176, %22
-  br i1 %259, label %260, label %273
-
-; <label>:260                                     ; preds = %258
-  %261 = fsub fast float %176, %22
-  %262 = fdiv fast float %261, %243
-  %263 = fptoui float %262 to i32
-  %264 = uitofp i32 %263 to float
-  %265 = fmul fast float %264, %243
-  %266 = fsub fast float %261, %265
-  %267 = and i32 %263, 1
-  %268 = icmp eq i32 %267, 0
-  br i1 %268, label %269, label %271
-
-; <label>:269                                     ; preds = %260
-  %270 = fsub fast float %22, %266
-  br label %273
-
-; <label>:271                                     ; preds = %260
-  %272 = fadd fast float %266, %20
-  br label %273
-
-; <label>:273                                     ; preds = %271, %269, %258, %256, %254
-  %274 = phi float [ %255, %254 ], [ %257, %256 ], [ %270, %269 ], [ %272, %271 ], [ %176, %258 ]
-  %275 = fptoui float %274 to i32
-  %276 = fsub fast float %24, %20
-  %277 = fcmp fast olt float %177, %20
-  br i1 %277, label %278, label %291
-
-; <label>:278                                     ; preds = %273
-  %279 = fsub fast float %20, %177
-  %280 = fdiv fast float %279, %276
-  %281 = fptoui float %280 to i32
-  %282 = uitofp i32 %281 to float
-  %283 = fmul fast float %282, %276
-  %284 = fsub fast float %279, %283
-  %285 = and i32 %281, 1
-  %286 = icmp eq i32 %285, 0
-  br i1 %286, label %287, label %289
-
-; <label>:287                                     ; preds = %278
-  %288 = fadd fast float %284, %20
-  br label %306
-
-; <label>:289                                     ; preds = %278
-  %290 = fsub fast float %24, %284
-  br label %306
-
-; <label>:291                                     ; preds = %273
-  %292 = fcmp fast ogt float %177, %24
-  br i1 %292, label %293, label %306
-
-; <label>:293                                     ; preds = %291
-  %294 = fsub fast float %177, %24
-  %295 = fdiv fast float %294, %276
-  %296 = fptoui float %295 to i32
-  %297 = uitofp i32 %296 to float
-  %298 = fmul fast float %297, %276
-  %299 = fsub fast float %294, %298
-  %300 = and i32 %296, 1
-  %301 = icmp eq i32 %300, 0
-  br i1 %301, label %302, label %304
-
-; <label>:302                                     ; preds = %293
-  %303 = fsub fast float %24, %299
-  br label %306
-
-; <label>:304                                     ; preds = %293
-  %305 = fadd fast float %299, %20
-  br label %306
-
-; <label>:306                                     ; preds = %304, %302, %291, %289, %287
-  %307 = phi float [ %288, %287 ], [ %290, %289 ], [ %303, %302 ], [ %305, %304 ], [ %177, %291 ]
-  %308 = fptoui float %307 to i32
-  %309 = uitofp i32 %308 to float
-  %310 = uitofp i32 %275 to float
-  %311 = fptoui float %45 to i32
-  %312 = fptoui float %178 to i32
-  %313 = fptoui float %309 to i32
-  %314 = fptoui float %310 to i32
-  %315 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %316 = extractvalue %dx.types.CBufRet.i32 %315, 0
-  %317 = extractvalue %dx.types.CBufRet.i32 %315, 1
-  %318 = extractvalue %dx.types.CBufRet.i32 %315, 2
-  %319 = extractvalue %dx.types.CBufRet.i32 %315, 3
-  %320 = mul i32 %316, %311
-  %321 = call i32 @dx.op.tertiary.i32(i32 48, i32 %312, i32 %317, i32 %320)  ; IMad(a,b,c)
-  %322 = call i32 @dx.op.tertiary.i32(i32 48, i32 %313, i32 %318, i32 %321)  ; IMad(a,b,c)
-  %323 = call i32 @dx.op.tertiary.i32(i32 48, i32 %314, i32 %319, i32 %322)  ; IMad(a,b,c)
-  %324 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %323, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %325 = extractvalue %dx.types.ResRet.i32 %324, 0
-  %326 = icmp ne i32 %325, 0
-  %327 = uitofp i1 %326 to float
-  br label %328
-
-; <label>:328                                     ; preds = %306, %240, %210, %192, %182
-  %329 = phi float [ %207, %192 ], [ 0.000000e+00, %182 ], [ %239, %210 ], [ %327, %306 ], [ 0.000000e+00, %240 ]
-  %330 = fcmp fast une float %329, 0.000000e+00
-  %331 = zext i1 %330 to i32
-  call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i32 %331, i32 undef, i32 undef, i32 undef, i8 1, i32 4)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3385
-
-; <label>:332                                     ; preds = %175
-  %333 = icmp eq i32 %87, 0
-  br i1 %333, label %334, label %946
-
-; <label>:334                                     ; preds = %332
-  %335 = call float @dx.op.unary.f32(i32 27, float %176)  ; Round_ni(value)
-  %336 = call float @dx.op.unary.f32(i32 27, float %177)  ; Round_ni(value)
-  %337 = fadd fast float %335, 1.000000e+00
-  %338 = fadd fast float %336, 1.000000e+00
-  %339 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %340 = icmp eq i32 %339, 0
-  br i1 %340, label %341, label %367
-
-; <label>:341                                     ; preds = %334
-  %342 = fcmp fast oge float %335, 0.000000e+00
-  %343 = fptoui float %335 to i32
-  %344 = icmp ult i32 %343, %13
-  %345 = and i1 %342, %344
-  %346 = fcmp fast oge float %336, 0.000000e+00
-  %347 = and i1 %346, %345
-  %348 = fptoui float %336 to i32
-  %349 = icmp ult i32 %348, %15
-  %350 = and i1 %349, %347
-  br i1 %350, label %351, label %487
-
-; <label>:351                                     ; preds = %341
-  %352 = fptoui float %45 to i32
-  %353 = fptoui float %178 to i32
-  %354 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %355 = extractvalue %dx.types.CBufRet.i32 %354, 0
-  %356 = extractvalue %dx.types.CBufRet.i32 %354, 1
-  %357 = extractvalue %dx.types.CBufRet.i32 %354, 2
-  %358 = extractvalue %dx.types.CBufRet.i32 %354, 3
-  %359 = mul i32 %355, %352
-  %360 = call i32 @dx.op.tertiary.i32(i32 48, i32 %353, i32 %356, i32 %359)  ; IMad(a,b,c)
-  %361 = call i32 @dx.op.tertiary.i32(i32 48, i32 %348, i32 %357, i32 %360)  ; IMad(a,b,c)
-  %362 = call i32 @dx.op.tertiary.i32(i32 48, i32 %343, i32 %358, i32 %361)  ; IMad(a,b,c)
-  %363 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %362, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %364 = extractvalue %dx.types.ResRet.i32 %363, 0
-  %365 = icmp ne i32 %364, 0
-  %366 = uitofp i1 %365 to float
-  br label %487
-
-; <label>:367                                     ; preds = %334
-  %368 = icmp eq i32 %339, 1
-  br i1 %368, label %369, label %399
-
-; <label>:369                                     ; preds = %367
-  %370 = add i32 %13, -1
-  %371 = uitofp i32 %370 to float
-  %372 = call float @dx.op.binary.f32(i32 35, float %335, float 0.000000e+00)  ; FMax(a,b)
-  %373 = call float @dx.op.binary.f32(i32 36, float %372, float %371)  ; FMin(a,b)
-  %374 = fptoui float %373 to i32
-  %375 = add i32 %15, -1
-  %376 = uitofp i32 %375 to float
-  %377 = call float @dx.op.binary.f32(i32 35, float %336, float 0.000000e+00)  ; FMax(a,b)
-  %378 = call float @dx.op.binary.f32(i32 36, float %377, float %376)  ; FMin(a,b)
-  %379 = fptoui float %378 to i32
-  %380 = uitofp i32 %379 to float
-  %381 = uitofp i32 %374 to float
-  %382 = fptoui float %45 to i32
-  %383 = fptoui float %178 to i32
-  %384 = fptoui float %380 to i32
-  %385 = fptoui float %381 to i32
-  %386 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %387 = extractvalue %dx.types.CBufRet.i32 %386, 0
-  %388 = extractvalue %dx.types.CBufRet.i32 %386, 1
-  %389 = extractvalue %dx.types.CBufRet.i32 %386, 2
-  %390 = extractvalue %dx.types.CBufRet.i32 %386, 3
-  %391 = mul i32 %387, %382
-  %392 = call i32 @dx.op.tertiary.i32(i32 48, i32 %383, i32 %388, i32 %391)  ; IMad(a,b,c)
-  %393 = call i32 @dx.op.tertiary.i32(i32 48, i32 %384, i32 %389, i32 %392)  ; IMad(a,b,c)
-  %394 = call i32 @dx.op.tertiary.i32(i32 48, i32 %385, i32 %390, i32 %393)  ; IMad(a,b,c)
-  %395 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %394, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %396 = extractvalue %dx.types.ResRet.i32 %395, 0
-  %397 = icmp ne i32 %396, 0
-  %398 = uitofp i1 %397 to float
-  br label %487
-
-; <label>:399                                     ; preds = %367
-  %400 = icmp eq i32 %339, 2
-  br i1 %400, label %401, label %487
-
-; <label>:401                                     ; preds = %399
-  %402 = fsub fast float %22, %20
-  %403 = fcmp fast olt float %335, %20
-  br i1 %403, label %404, label %417
-
-; <label>:404                                     ; preds = %401
-  %405 = fsub fast float %20, %335
-  %406 = fdiv fast float %405, %402
-  %407 = fptoui float %406 to i32
-  %408 = uitofp i32 %407 to float
-  %409 = fmul fast float %408, %402
-  %410 = fsub fast float %405, %409
-  %411 = and i32 %407, 1
-  %412 = icmp eq i32 %411, 0
-  br i1 %412, label %413, label %415
-
-; <label>:413                                     ; preds = %404
-  %414 = fadd fast float %410, %20
-  br label %432
-
-; <label>:415                                     ; preds = %404
-  %416 = fsub fast float %22, %410
-  br label %432
-
-; <label>:417                                     ; preds = %401
-  %418 = fcmp fast ogt float %335, %22
-  br i1 %418, label %419, label %432
-
-; <label>:419                                     ; preds = %417
-  %420 = fsub fast float %335, %22
-  %421 = fdiv fast float %420, %402
-  %422 = fptoui float %421 to i32
-  %423 = uitofp i32 %422 to float
-  %424 = fmul fast float %423, %402
-  %425 = fsub fast float %420, %424
-  %426 = and i32 %422, 1
-  %427 = icmp eq i32 %426, 0
-  br i1 %427, label %428, label %430
-
-; <label>:428                                     ; preds = %419
-  %429 = fsub fast float %22, %425
-  br label %432
-
-; <label>:430                                     ; preds = %419
-  %431 = fadd fast float %425, %20
-  br label %432
-
-; <label>:432                                     ; preds = %430, %428, %417, %415, %413
-  %433 = phi float [ %414, %413 ], [ %416, %415 ], [ %429, %428 ], [ %431, %430 ], [ %335, %417 ]
-  %434 = fptoui float %433 to i32
-  %435 = fsub fast float %24, %20
-  %436 = fcmp fast olt float %336, %20
-  br i1 %436, label %437, label %450
-
-; <label>:437                                     ; preds = %432
-  %438 = fsub fast float %20, %336
-  %439 = fdiv fast float %438, %435
-  %440 = fptoui float %439 to i32
-  %441 = uitofp i32 %440 to float
-  %442 = fmul fast float %441, %435
-  %443 = fsub fast float %438, %442
-  %444 = and i32 %440, 1
-  %445 = icmp eq i32 %444, 0
-  br i1 %445, label %446, label %448
-
-; <label>:446                                     ; preds = %437
-  %447 = fadd fast float %443, %20
-  br label %465
-
-; <label>:448                                     ; preds = %437
-  %449 = fsub fast float %24, %443
-  br label %465
-
-; <label>:450                                     ; preds = %432
-  %451 = fcmp fast ogt float %336, %24
-  br i1 %451, label %452, label %465
-
-; <label>:452                                     ; preds = %450
-  %453 = fsub fast float %336, %24
-  %454 = fdiv fast float %453, %435
-  %455 = fptoui float %454 to i32
-  %456 = uitofp i32 %455 to float
-  %457 = fmul fast float %456, %435
-  %458 = fsub fast float %453, %457
-  %459 = and i32 %455, 1
-  %460 = icmp eq i32 %459, 0
-  br i1 %460, label %461, label %463
-
-; <label>:461                                     ; preds = %452
-  %462 = fsub fast float %24, %458
-  br label %465
-
-; <label>:463                                     ; preds = %452
-  %464 = fadd fast float %458, %20
-  br label %465
-
-; <label>:465                                     ; preds = %463, %461, %450, %448, %446
-  %466 = phi float [ %447, %446 ], [ %449, %448 ], [ %462, %461 ], [ %464, %463 ], [ %336, %450 ]
-  %467 = fptoui float %466 to i32
-  %468 = uitofp i32 %467 to float
-  %469 = uitofp i32 %434 to float
-  %470 = fptoui float %45 to i32
-  %471 = fptoui float %178 to i32
-  %472 = fptoui float %468 to i32
-  %473 = fptoui float %469 to i32
-  %474 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %475 = extractvalue %dx.types.CBufRet.i32 %474, 0
-  %476 = extractvalue %dx.types.CBufRet.i32 %474, 1
-  %477 = extractvalue %dx.types.CBufRet.i32 %474, 2
-  %478 = extractvalue %dx.types.CBufRet.i32 %474, 3
-  %479 = mul i32 %475, %470
-  %480 = call i32 @dx.op.tertiary.i32(i32 48, i32 %471, i32 %476, i32 %479)  ; IMad(a,b,c)
-  %481 = call i32 @dx.op.tertiary.i32(i32 48, i32 %472, i32 %477, i32 %480)  ; IMad(a,b,c)
-  %482 = call i32 @dx.op.tertiary.i32(i32 48, i32 %473, i32 %478, i32 %481)  ; IMad(a,b,c)
-  %483 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %482, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %484 = extractvalue %dx.types.ResRet.i32 %483, 0
-  %485 = icmp ne i32 %484, 0
-  %486 = uitofp i1 %485 to float
-  br label %487
-
-; <label>:487                                     ; preds = %465, %399, %369, %351, %341
-  %488 = phi float [ %366, %351 ], [ 0.000000e+00, %341 ], [ %398, %369 ], [ %486, %465 ], [ 0.000000e+00, %399 ]
-  br i1 %340, label %489, label %515
-
-; <label>:489                                     ; preds = %487
-  %490 = fcmp fast oge float %337, 0.000000e+00
-  %491 = fptoui float %337 to i32
-  %492 = icmp ult i32 %491, %13
-  %493 = and i1 %490, %492
-  %494 = fcmp fast oge float %336, 0.000000e+00
-  %495 = and i1 %494, %493
-  %496 = fptoui float %336 to i32
-  %497 = icmp ult i32 %496, %15
-  %498 = and i1 %497, %495
-  br i1 %498, label %499, label %635
-
-; <label>:499                                     ; preds = %489
-  %500 = fptoui float %45 to i32
-  %501 = fptoui float %178 to i32
-  %502 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %503 = extractvalue %dx.types.CBufRet.i32 %502, 0
-  %504 = extractvalue %dx.types.CBufRet.i32 %502, 1
-  %505 = extractvalue %dx.types.CBufRet.i32 %502, 2
-  %506 = extractvalue %dx.types.CBufRet.i32 %502, 3
-  %507 = mul i32 %503, %500
-  %508 = call i32 @dx.op.tertiary.i32(i32 48, i32 %501, i32 %504, i32 %507)  ; IMad(a,b,c)
-  %509 = call i32 @dx.op.tertiary.i32(i32 48, i32 %496, i32 %505, i32 %508)  ; IMad(a,b,c)
-  %510 = call i32 @dx.op.tertiary.i32(i32 48, i32 %491, i32 %506, i32 %509)  ; IMad(a,b,c)
-  %511 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %510, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %512 = extractvalue %dx.types.ResRet.i32 %511, 0
-  %513 = icmp ne i32 %512, 0
-  %514 = uitofp i1 %513 to float
-  br label %635
-
-; <label>:515                                     ; preds = %487
-  %516 = icmp eq i32 %339, 1
-  br i1 %516, label %517, label %547
-
-; <label>:517                                     ; preds = %515
-  %518 = add i32 %13, -1
-  %519 = uitofp i32 %518 to float
-  %520 = call float @dx.op.binary.f32(i32 35, float %337, float 0.000000e+00)  ; FMax(a,b)
-  %521 = call float @dx.op.binary.f32(i32 36, float %520, float %519)  ; FMin(a,b)
-  %522 = fptoui float %521 to i32
-  %523 = add i32 %15, -1
-  %524 = uitofp i32 %523 to float
-  %525 = call float @dx.op.binary.f32(i32 35, float %336, float 0.000000e+00)  ; FMax(a,b)
-  %526 = call float @dx.op.binary.f32(i32 36, float %525, float %524)  ; FMin(a,b)
-  %527 = fptoui float %526 to i32
-  %528 = uitofp i32 %527 to float
-  %529 = uitofp i32 %522 to float
-  %530 = fptoui float %45 to i32
-  %531 = fptoui float %178 to i32
-  %532 = fptoui float %528 to i32
-  %533 = fptoui float %529 to i32
-  %534 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %535 = extractvalue %dx.types.CBufRet.i32 %534, 0
-  %536 = extractvalue %dx.types.CBufRet.i32 %534, 1
-  %537 = extractvalue %dx.types.CBufRet.i32 %534, 2
-  %538 = extractvalue %dx.types.CBufRet.i32 %534, 3
-  %539 = mul i32 %535, %530
-  %540 = call i32 @dx.op.tertiary.i32(i32 48, i32 %531, i32 %536, i32 %539)  ; IMad(a,b,c)
-  %541 = call i32 @dx.op.tertiary.i32(i32 48, i32 %532, i32 %537, i32 %540)  ; IMad(a,b,c)
-  %542 = call i32 @dx.op.tertiary.i32(i32 48, i32 %533, i32 %538, i32 %541)  ; IMad(a,b,c)
-  %543 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %542, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %544 = extractvalue %dx.types.ResRet.i32 %543, 0
-  %545 = icmp ne i32 %544, 0
-  %546 = uitofp i1 %545 to float
-  br label %635
-
-; <label>:547                                     ; preds = %515
-  %548 = icmp eq i32 %339, 2
-  br i1 %548, label %549, label %635
-
-; <label>:549                                     ; preds = %547
-  %550 = fsub fast float %22, %20
-  %551 = fcmp fast olt float %337, %20
-  br i1 %551, label %552, label %565
-
-; <label>:552                                     ; preds = %549
-  %553 = fsub fast float %20, %337
-  %554 = fdiv fast float %553, %550
-  %555 = fptoui float %554 to i32
-  %556 = uitofp i32 %555 to float
-  %557 = fmul fast float %556, %550
-  %558 = fsub fast float %553, %557
-  %559 = and i32 %555, 1
-  %560 = icmp eq i32 %559, 0
-  br i1 %560, label %561, label %563
-
-; <label>:561                                     ; preds = %552
-  %562 = fadd fast float %558, %20
-  br label %580
-
-; <label>:563                                     ; preds = %552
-  %564 = fsub fast float %22, %558
-  br label %580
-
-; <label>:565                                     ; preds = %549
-  %566 = fcmp fast ogt float %337, %22
-  br i1 %566, label %567, label %580
-
-; <label>:567                                     ; preds = %565
-  %568 = fsub fast float %337, %22
-  %569 = fdiv fast float %568, %550
-  %570 = fptoui float %569 to i32
-  %571 = uitofp i32 %570 to float
-  %572 = fmul fast float %571, %550
-  %573 = fsub fast float %568, %572
-  %574 = and i32 %570, 1
-  %575 = icmp eq i32 %574, 0
-  br i1 %575, label %576, label %578
-
-; <label>:576                                     ; preds = %567
-  %577 = fsub fast float %22, %573
-  br label %580
-
-; <label>:578                                     ; preds = %567
-  %579 = fadd fast float %573, %20
-  br label %580
-
-; <label>:580                                     ; preds = %578, %576, %565, %563, %561
-  %581 = phi float [ %562, %561 ], [ %564, %563 ], [ %577, %576 ], [ %579, %578 ], [ %337, %565 ]
-  %582 = fptoui float %581 to i32
-  %583 = fsub fast float %24, %20
-  %584 = fcmp fast olt float %336, %20
-  br i1 %584, label %585, label %598
-
-; <label>:585                                     ; preds = %580
-  %586 = fsub fast float %20, %336
-  %587 = fdiv fast float %586, %583
-  %588 = fptoui float %587 to i32
-  %589 = uitofp i32 %588 to float
-  %590 = fmul fast float %589, %583
-  %591 = fsub fast float %586, %590
-  %592 = and i32 %588, 1
-  %593 = icmp eq i32 %592, 0
-  br i1 %593, label %594, label %596
-
-; <label>:594                                     ; preds = %585
-  %595 = fadd fast float %591, %20
-  br label %613
-
-; <label>:596                                     ; preds = %585
-  %597 = fsub fast float %24, %591
-  br label %613
-
-; <label>:598                                     ; preds = %580
-  %599 = fcmp fast ogt float %336, %24
-  br i1 %599, label %600, label %613
-
-; <label>:600                                     ; preds = %598
-  %601 = fsub fast float %336, %24
-  %602 = fdiv fast float %601, %583
-  %603 = fptoui float %602 to i32
-  %604 = uitofp i32 %603 to float
-  %605 = fmul fast float %604, %583
-  %606 = fsub fast float %601, %605
-  %607 = and i32 %603, 1
-  %608 = icmp eq i32 %607, 0
-  br i1 %608, label %609, label %611
-
-; <label>:609                                     ; preds = %600
-  %610 = fsub fast float %24, %606
-  br label %613
-
-; <label>:611                                     ; preds = %600
-  %612 = fadd fast float %606, %20
-  br label %613
-
-; <label>:613                                     ; preds = %611, %609, %598, %596, %594
-  %614 = phi float [ %595, %594 ], [ %597, %596 ], [ %610, %609 ], [ %612, %611 ], [ %336, %598 ]
-  %615 = fptoui float %614 to i32
-  %616 = uitofp i32 %615 to float
-  %617 = uitofp i32 %582 to float
-  %618 = fptoui float %45 to i32
-  %619 = fptoui float %178 to i32
-  %620 = fptoui float %616 to i32
-  %621 = fptoui float %617 to i32
-  %622 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %623 = extractvalue %dx.types.CBufRet.i32 %622, 0
-  %624 = extractvalue %dx.types.CBufRet.i32 %622, 1
-  %625 = extractvalue %dx.types.CBufRet.i32 %622, 2
-  %626 = extractvalue %dx.types.CBufRet.i32 %622, 3
-  %627 = mul i32 %623, %618
-  %628 = call i32 @dx.op.tertiary.i32(i32 48, i32 %619, i32 %624, i32 %627)  ; IMad(a,b,c)
-  %629 = call i32 @dx.op.tertiary.i32(i32 48, i32 %620, i32 %625, i32 %628)  ; IMad(a,b,c)
-  %630 = call i32 @dx.op.tertiary.i32(i32 48, i32 %621, i32 %626, i32 %629)  ; IMad(a,b,c)
-  %631 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %630, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %632 = extractvalue %dx.types.ResRet.i32 %631, 0
-  %633 = icmp ne i32 %632, 0
-  %634 = uitofp i1 %633 to float
-  br label %635
-
-; <label>:635                                     ; preds = %613, %547, %517, %499, %489
-  %636 = phi float [ %514, %499 ], [ 0.000000e+00, %489 ], [ %546, %517 ], [ %634, %613 ], [ 0.000000e+00, %547 ]
-  br i1 %340, label %637, label %663
-
-; <label>:637                                     ; preds = %635
-  %638 = fcmp fast oge float %335, 0.000000e+00
-  %639 = fptoui float %335 to i32
-  %640 = icmp ult i32 %639, %13
-  %641 = and i1 %638, %640
-  %642 = fcmp fast oge float %338, 0.000000e+00
-  %643 = and i1 %642, %641
-  %644 = fptoui float %338 to i32
-  %645 = icmp ult i32 %644, %15
-  %646 = and i1 %645, %643
-  br i1 %646, label %647, label %783
-
-; <label>:647                                     ; preds = %637
-  %648 = fptoui float %45 to i32
-  %649 = fptoui float %178 to i32
-  %650 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %651 = extractvalue %dx.types.CBufRet.i32 %650, 0
-  %652 = extractvalue %dx.types.CBufRet.i32 %650, 1
-  %653 = extractvalue %dx.types.CBufRet.i32 %650, 2
-  %654 = extractvalue %dx.types.CBufRet.i32 %650, 3
-  %655 = mul i32 %651, %648
-  %656 = call i32 @dx.op.tertiary.i32(i32 48, i32 %649, i32 %652, i32 %655)  ; IMad(a,b,c)
-  %657 = call i32 @dx.op.tertiary.i32(i32 48, i32 %644, i32 %653, i32 %656)  ; IMad(a,b,c)
-  %658 = call i32 @dx.op.tertiary.i32(i32 48, i32 %639, i32 %654, i32 %657)  ; IMad(a,b,c)
-  %659 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %658, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %660 = extractvalue %dx.types.ResRet.i32 %659, 0
-  %661 = icmp ne i32 %660, 0
-  %662 = uitofp i1 %661 to float
-  br label %783
-
-; <label>:663                                     ; preds = %635
-  %664 = icmp eq i32 %339, 1
-  br i1 %664, label %665, label %695
-
-; <label>:665                                     ; preds = %663
-  %666 = add i32 %13, -1
-  %667 = uitofp i32 %666 to float
-  %668 = call float @dx.op.binary.f32(i32 35, float %335, float 0.000000e+00)  ; FMax(a,b)
-  %669 = call float @dx.op.binary.f32(i32 36, float %668, float %667)  ; FMin(a,b)
-  %670 = fptoui float %669 to i32
-  %671 = add i32 %15, -1
-  %672 = uitofp i32 %671 to float
-  %673 = call float @dx.op.binary.f32(i32 35, float %338, float 0.000000e+00)  ; FMax(a,b)
-  %674 = call float @dx.op.binary.f32(i32 36, float %673, float %672)  ; FMin(a,b)
-  %675 = fptoui float %674 to i32
-  %676 = uitofp i32 %675 to float
-  %677 = uitofp i32 %670 to float
-  %678 = fptoui float %45 to i32
-  %679 = fptoui float %178 to i32
-  %680 = fptoui float %676 to i32
-  %681 = fptoui float %677 to i32
-  %682 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %683 = extractvalue %dx.types.CBufRet.i32 %682, 0
-  %684 = extractvalue %dx.types.CBufRet.i32 %682, 1
-  %685 = extractvalue %dx.types.CBufRet.i32 %682, 2
-  %686 = extractvalue %dx.types.CBufRet.i32 %682, 3
-  %687 = mul i32 %683, %678
-  %688 = call i32 @dx.op.tertiary.i32(i32 48, i32 %679, i32 %684, i32 %687)  ; IMad(a,b,c)
-  %689 = call i32 @dx.op.tertiary.i32(i32 48, i32 %680, i32 %685, i32 %688)  ; IMad(a,b,c)
-  %690 = call i32 @dx.op.tertiary.i32(i32 48, i32 %681, i32 %686, i32 %689)  ; IMad(a,b,c)
-  %691 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %690, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %692 = extractvalue %dx.types.ResRet.i32 %691, 0
-  %693 = icmp ne i32 %692, 0
-  %694 = uitofp i1 %693 to float
-  br label %783
-
-; <label>:695                                     ; preds = %663
-  %696 = icmp eq i32 %339, 2
-  br i1 %696, label %697, label %783
-
-; <label>:697                                     ; preds = %695
-  %698 = fsub fast float %22, %20
-  %699 = fcmp fast olt float %335, %20
-  br i1 %699, label %700, label %713
-
-; <label>:700                                     ; preds = %697
-  %701 = fsub fast float %20, %335
-  %702 = fdiv fast float %701, %698
-  %703 = fptoui float %702 to i32
-  %704 = uitofp i32 %703 to float
-  %705 = fmul fast float %704, %698
-  %706 = fsub fast float %701, %705
-  %707 = and i32 %703, 1
-  %708 = icmp eq i32 %707, 0
-  br i1 %708, label %709, label %711
-
-; <label>:709                                     ; preds = %700
-  %710 = fadd fast float %706, %20
-  br label %728
-
-; <label>:711                                     ; preds = %700
-  %712 = fsub fast float %22, %706
-  br label %728
-
-; <label>:713                                     ; preds = %697
-  %714 = fcmp fast ogt float %335, %22
-  br i1 %714, label %715, label %728
-
-; <label>:715                                     ; preds = %713
-  %716 = fsub fast float %335, %22
-  %717 = fdiv fast float %716, %698
-  %718 = fptoui float %717 to i32
-  %719 = uitofp i32 %718 to float
-  %720 = fmul fast float %719, %698
-  %721 = fsub fast float %716, %720
-  %722 = and i32 %718, 1
-  %723 = icmp eq i32 %722, 0
-  br i1 %723, label %724, label %726
-
-; <label>:724                                     ; preds = %715
-  %725 = fsub fast float %22, %721
-  br label %728
-
-; <label>:726                                     ; preds = %715
-  %727 = fadd fast float %721, %20
-  br label %728
-
-; <label>:728                                     ; preds = %726, %724, %713, %711, %709
-  %729 = phi float [ %710, %709 ], [ %712, %711 ], [ %725, %724 ], [ %727, %726 ], [ %335, %713 ]
-  %730 = fptoui float %729 to i32
-  %731 = fsub fast float %24, %20
-  %732 = fcmp fast olt float %338, %20
-  br i1 %732, label %733, label %746
-
-; <label>:733                                     ; preds = %728
-  %734 = fsub fast float %20, %338
-  %735 = fdiv fast float %734, %731
-  %736 = fptoui float %735 to i32
-  %737 = uitofp i32 %736 to float
-  %738 = fmul fast float %737, %731
-  %739 = fsub fast float %734, %738
-  %740 = and i32 %736, 1
-  %741 = icmp eq i32 %740, 0
-  br i1 %741, label %742, label %744
-
-; <label>:742                                     ; preds = %733
-  %743 = fadd fast float %739, %20
-  br label %761
-
-; <label>:744                                     ; preds = %733
-  %745 = fsub fast float %24, %739
-  br label %761
-
-; <label>:746                                     ; preds = %728
-  %747 = fcmp fast ogt float %338, %24
-  br i1 %747, label %748, label %761
-
-; <label>:748                                     ; preds = %746
-  %749 = fsub fast float %338, %24
-  %750 = fdiv fast float %749, %731
-  %751 = fptoui float %750 to i32
-  %752 = uitofp i32 %751 to float
-  %753 = fmul fast float %752, %731
-  %754 = fsub fast float %749, %753
-  %755 = and i32 %751, 1
-  %756 = icmp eq i32 %755, 0
-  br i1 %756, label %757, label %759
-
-; <label>:757                                     ; preds = %748
-  %758 = fsub fast float %24, %754
-  br label %761
-
-; <label>:759                                     ; preds = %748
-  %760 = fadd fast float %754, %20
-  br label %761
-
-; <label>:761                                     ; preds = %759, %757, %746, %744, %742
-  %762 = phi float [ %743, %742 ], [ %745, %744 ], [ %758, %757 ], [ %760, %759 ], [ %338, %746 ]
-  %763 = fptoui float %762 to i32
-  %764 = uitofp i32 %763 to float
-  %765 = uitofp i32 %730 to float
-  %766 = fptoui float %45 to i32
-  %767 = fptoui float %178 to i32
-  %768 = fptoui float %764 to i32
-  %769 = fptoui float %765 to i32
-  %770 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %771 = extractvalue %dx.types.CBufRet.i32 %770, 0
-  %772 = extractvalue %dx.types.CBufRet.i32 %770, 1
-  %773 = extractvalue %dx.types.CBufRet.i32 %770, 2
-  %774 = extractvalue %dx.types.CBufRet.i32 %770, 3
-  %775 = mul i32 %771, %766
-  %776 = call i32 @dx.op.tertiary.i32(i32 48, i32 %767, i32 %772, i32 %775)  ; IMad(a,b,c)
-  %777 = call i32 @dx.op.tertiary.i32(i32 48, i32 %768, i32 %773, i32 %776)  ; IMad(a,b,c)
-  %778 = call i32 @dx.op.tertiary.i32(i32 48, i32 %769, i32 %774, i32 %777)  ; IMad(a,b,c)
-  %779 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %778, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %780 = extractvalue %dx.types.ResRet.i32 %779, 0
-  %781 = icmp ne i32 %780, 0
-  %782 = uitofp i1 %781 to float
-  br label %783
-
-; <label>:783                                     ; preds = %761, %695, %665, %647, %637
-  %784 = phi float [ %662, %647 ], [ 0.000000e+00, %637 ], [ %694, %665 ], [ %782, %761 ], [ 0.000000e+00, %695 ]
-  br i1 %340, label %785, label %811
-
-; <label>:785                                     ; preds = %783
-  %786 = fcmp fast oge float %337, 0.000000e+00
-  %787 = fptoui float %337 to i32
-  %788 = icmp ult i32 %787, %13
-  %789 = and i1 %786, %788
-  %790 = fcmp fast oge float %338, 0.000000e+00
-  %791 = and i1 %790, %789
-  %792 = fptoui float %338 to i32
-  %793 = icmp ult i32 %792, %15
-  %794 = and i1 %793, %791
-  br i1 %794, label %795, label %931
-
-; <label>:795                                     ; preds = %785
-  %796 = fptoui float %45 to i32
-  %797 = fptoui float %178 to i32
-  %798 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %799 = extractvalue %dx.types.CBufRet.i32 %798, 0
-  %800 = extractvalue %dx.types.CBufRet.i32 %798, 1
-  %801 = extractvalue %dx.types.CBufRet.i32 %798, 2
-  %802 = extractvalue %dx.types.CBufRet.i32 %798, 3
-  %803 = mul i32 %799, %796
-  %804 = call i32 @dx.op.tertiary.i32(i32 48, i32 %797, i32 %800, i32 %803)  ; IMad(a,b,c)
-  %805 = call i32 @dx.op.tertiary.i32(i32 48, i32 %792, i32 %801, i32 %804)  ; IMad(a,b,c)
-  %806 = call i32 @dx.op.tertiary.i32(i32 48, i32 %787, i32 %802, i32 %805)  ; IMad(a,b,c)
-  %807 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %806, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %808 = extractvalue %dx.types.ResRet.i32 %807, 0
-  %809 = icmp ne i32 %808, 0
-  %810 = uitofp i1 %809 to float
-  br label %931
-
-; <label>:811                                     ; preds = %783
-  %812 = icmp eq i32 %339, 1
-  br i1 %812, label %813, label %843
-
-; <label>:813                                     ; preds = %811
-  %814 = add i32 %13, -1
-  %815 = uitofp i32 %814 to float
-  %816 = call float @dx.op.binary.f32(i32 35, float %337, float 0.000000e+00)  ; FMax(a,b)
-  %817 = call float @dx.op.binary.f32(i32 36, float %816, float %815)  ; FMin(a,b)
-  %818 = fptoui float %817 to i32
-  %819 = add i32 %15, -1
-  %820 = uitofp i32 %819 to float
-  %821 = call float @dx.op.binary.f32(i32 35, float %338, float 0.000000e+00)  ; FMax(a,b)
-  %822 = call float @dx.op.binary.f32(i32 36, float %821, float %820)  ; FMin(a,b)
-  %823 = fptoui float %822 to i32
-  %824 = uitofp i32 %823 to float
-  %825 = uitofp i32 %818 to float
-  %826 = fptoui float %45 to i32
-  %827 = fptoui float %178 to i32
-  %828 = fptoui float %824 to i32
-  %829 = fptoui float %825 to i32
-  %830 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %831 = extractvalue %dx.types.CBufRet.i32 %830, 0
-  %832 = extractvalue %dx.types.CBufRet.i32 %830, 1
-  %833 = extractvalue %dx.types.CBufRet.i32 %830, 2
-  %834 = extractvalue %dx.types.CBufRet.i32 %830, 3
-  %835 = mul i32 %831, %826
-  %836 = call i32 @dx.op.tertiary.i32(i32 48, i32 %827, i32 %832, i32 %835)  ; IMad(a,b,c)
-  %837 = call i32 @dx.op.tertiary.i32(i32 48, i32 %828, i32 %833, i32 %836)  ; IMad(a,b,c)
-  %838 = call i32 @dx.op.tertiary.i32(i32 48, i32 %829, i32 %834, i32 %837)  ; IMad(a,b,c)
-  %839 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %838, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %840 = extractvalue %dx.types.ResRet.i32 %839, 0
-  %841 = icmp ne i32 %840, 0
-  %842 = uitofp i1 %841 to float
-  br label %931
-
-; <label>:843                                     ; preds = %811
-  %844 = icmp eq i32 %339, 2
-  br i1 %844, label %845, label %931
-
-; <label>:845                                     ; preds = %843
-  %846 = fsub fast float %22, %20
-  %847 = fcmp fast olt float %337, %20
-  br i1 %847, label %848, label %861
-
-; <label>:848                                     ; preds = %845
-  %849 = fsub fast float %20, %337
-  %850 = fdiv fast float %849, %846
-  %851 = fptoui float %850 to i32
-  %852 = uitofp i32 %851 to float
-  %853 = fmul fast float %852, %846
-  %854 = fsub fast float %849, %853
-  %855 = and i32 %851, 1
-  %856 = icmp eq i32 %855, 0
-  br i1 %856, label %857, label %859
-
-; <label>:857                                     ; preds = %848
-  %858 = fadd fast float %854, %20
-  br label %876
-
-; <label>:859                                     ; preds = %848
-  %860 = fsub fast float %22, %854
-  br label %876
-
-; <label>:861                                     ; preds = %845
-  %862 = fcmp fast ogt float %337, %22
-  br i1 %862, label %863, label %876
-
-; <label>:863                                     ; preds = %861
-  %864 = fsub fast float %337, %22
-  %865 = fdiv fast float %864, %846
-  %866 = fptoui float %865 to i32
-  %867 = uitofp i32 %866 to float
-  %868 = fmul fast float %867, %846
-  %869 = fsub fast float %864, %868
-  %870 = and i32 %866, 1
-  %871 = icmp eq i32 %870, 0
-  br i1 %871, label %872, label %874
-
-; <label>:872                                     ; preds = %863
-  %873 = fsub fast float %22, %869
-  br label %876
-
-; <label>:874                                     ; preds = %863
-  %875 = fadd fast float %869, %20
-  br label %876
-
-; <label>:876                                     ; preds = %874, %872, %861, %859, %857
-  %877 = phi float [ %858, %857 ], [ %860, %859 ], [ %873, %872 ], [ %875, %874 ], [ %337, %861 ]
-  %878 = fptoui float %877 to i32
-  %879 = fsub fast float %24, %20
-  %880 = fcmp fast olt float %338, %20
-  br i1 %880, label %881, label %894
-
-; <label>:881                                     ; preds = %876
-  %882 = fsub fast float %20, %338
-  %883 = fdiv fast float %882, %879
-  %884 = fptoui float %883 to i32
-  %885 = uitofp i32 %884 to float
-  %886 = fmul fast float %885, %879
-  %887 = fsub fast float %882, %886
-  %888 = and i32 %884, 1
-  %889 = icmp eq i32 %888, 0
-  br i1 %889, label %890, label %892
-
-; <label>:890                                     ; preds = %881
-  %891 = fadd fast float %887, %20
-  br label %909
-
-; <label>:892                                     ; preds = %881
-  %893 = fsub fast float %24, %887
-  br label %909
-
-; <label>:894                                     ; preds = %876
-  %895 = fcmp fast ogt float %338, %24
-  br i1 %895, label %896, label %909
-
-; <label>:896                                     ; preds = %894
-  %897 = fsub fast float %338, %24
-  %898 = fdiv fast float %897, %879
-  %899 = fptoui float %898 to i32
-  %900 = uitofp i32 %899 to float
-  %901 = fmul fast float %900, %879
-  %902 = fsub fast float %897, %901
-  %903 = and i32 %899, 1
-  %904 = icmp eq i32 %903, 0
-  br i1 %904, label %905, label %907
-
-; <label>:905                                     ; preds = %896
-  %906 = fsub fast float %24, %902
-  br label %909
-
-; <label>:907                                     ; preds = %896
-  %908 = fadd fast float %902, %20
-  br label %909
-
-; <label>:909                                     ; preds = %907, %905, %894, %892, %890
-  %910 = phi float [ %891, %890 ], [ %893, %892 ], [ %906, %905 ], [ %908, %907 ], [ %338, %894 ]
-  %911 = fptoui float %910 to i32
-  %912 = uitofp i32 %911 to float
-  %913 = uitofp i32 %878 to float
-  %914 = fptoui float %45 to i32
-  %915 = fptoui float %178 to i32
-  %916 = fptoui float %912 to i32
-  %917 = fptoui float %913 to i32
-  %918 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %919 = extractvalue %dx.types.CBufRet.i32 %918, 0
-  %920 = extractvalue %dx.types.CBufRet.i32 %918, 1
-  %921 = extractvalue %dx.types.CBufRet.i32 %918, 2
-  %922 = extractvalue %dx.types.CBufRet.i32 %918, 3
-  %923 = mul i32 %919, %914
-  %924 = call i32 @dx.op.tertiary.i32(i32 48, i32 %915, i32 %920, i32 %923)  ; IMad(a,b,c)
-  %925 = call i32 @dx.op.tertiary.i32(i32 48, i32 %916, i32 %921, i32 %924)  ; IMad(a,b,c)
-  %926 = call i32 @dx.op.tertiary.i32(i32 48, i32 %917, i32 %922, i32 %925)  ; IMad(a,b,c)
-  %927 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %926, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %928 = extractvalue %dx.types.ResRet.i32 %927, 0
-  %929 = icmp ne i32 %928, 0
-  %930 = uitofp i1 %929 to float
-  br label %931
-
-; <label>:931                                     ; preds = %909, %843, %813, %795, %785
-  %932 = phi float [ %810, %795 ], [ 0.000000e+00, %785 ], [ %842, %813 ], [ %930, %909 ], [ 0.000000e+00, %843 ]
-  %933 = call float @dx.op.unary.f32(i32 22, float %176)  ; Frc(value)
-  %934 = fsub fast float %636, %488
-  %935 = fmul fast float %933, %934
-  %936 = fadd fast float %935, %488
-  %937 = fsub fast float %932, %784
-  %938 = fmul fast float %933, %937
-  %939 = fadd fast float %938, %784
-  %940 = call float @dx.op.unary.f32(i32 22, float %177)  ; Frc(value)
-  %941 = fsub fast float %939, %936
-  %942 = fmul fast float %941, %940
-  %943 = fadd fast float %942, %936
-  %944 = fcmp fast une float %943, 0.000000e+00
-  %945 = zext i1 %944 to i32
-  call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i32 %945, i32 undef, i32 undef, i32 undef, i8 1, i32 4)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3385
-
-; <label>:946                                     ; preds = %332
-  %947 = icmp eq i32 %87, 2
-  br i1 %947, label %948, label %3385
-
-; <label>:948                                     ; preds = %946
-  %949 = call float @dx.op.unary.f32(i32 27, float %176)  ; Round_ni(value)
-  %950 = fadd fast float %949, -1.000000e+00
-  %951 = call float @dx.op.unary.f32(i32 27, float %177)  ; Round_ni(value)
-  %952 = fadd fast float %951, -1.000000e+00
-  %953 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %954 = icmp eq i32 %953, 0
-  br i1 %954, label %955, label %981
-
-; <label>:955                                     ; preds = %948
-  %956 = fcmp fast oge float %950, 0.000000e+00
-  %957 = fptoui float %950 to i32
-  %958 = icmp ult i32 %957, %13
-  %959 = and i1 %956, %958
-  %960 = fcmp fast oge float %952, 0.000000e+00
-  %961 = and i1 %960, %959
-  %962 = fptoui float %952 to i32
-  %963 = icmp ult i32 %962, %15
-  %964 = and i1 %963, %961
-  br i1 %964, label %965, label %1101
-
-; <label>:965                                     ; preds = %955
-  %966 = fptoui float %45 to i32
-  %967 = fptoui float %178 to i32
-  %968 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %969 = extractvalue %dx.types.CBufRet.i32 %968, 0
-  %970 = extractvalue %dx.types.CBufRet.i32 %968, 1
-  %971 = extractvalue %dx.types.CBufRet.i32 %968, 2
-  %972 = extractvalue %dx.types.CBufRet.i32 %968, 3
-  %973 = mul i32 %969, %966
-  %974 = call i32 @dx.op.tertiary.i32(i32 48, i32 %967, i32 %970, i32 %973)  ; IMad(a,b,c)
-  %975 = call i32 @dx.op.tertiary.i32(i32 48, i32 %962, i32 %971, i32 %974)  ; IMad(a,b,c)
-  %976 = call i32 @dx.op.tertiary.i32(i32 48, i32 %957, i32 %972, i32 %975)  ; IMad(a,b,c)
-  %977 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %976, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %978 = extractvalue %dx.types.ResRet.i32 %977, 0
-  %979 = icmp ne i32 %978, 0
-  %980 = uitofp i1 %979 to float
-  br label %1101
-
-; <label>:981                                     ; preds = %948
-  %982 = icmp eq i32 %953, 1
-  br i1 %982, label %983, label %1013
-
-; <label>:983                                     ; preds = %981
-  %984 = add i32 %13, -1
-  %985 = uitofp i32 %984 to float
-  %986 = call float @dx.op.binary.f32(i32 35, float %950, float 0.000000e+00)  ; FMax(a,b)
-  %987 = call float @dx.op.binary.f32(i32 36, float %986, float %985)  ; FMin(a,b)
-  %988 = fptoui float %987 to i32
-  %989 = add i32 %15, -1
-  %990 = uitofp i32 %989 to float
-  %991 = call float @dx.op.binary.f32(i32 35, float %952, float 0.000000e+00)  ; FMax(a,b)
-  %992 = call float @dx.op.binary.f32(i32 36, float %991, float %990)  ; FMin(a,b)
-  %993 = fptoui float %992 to i32
-  %994 = uitofp i32 %993 to float
-  %995 = uitofp i32 %988 to float
-  %996 = fptoui float %45 to i32
-  %997 = fptoui float %178 to i32
-  %998 = fptoui float %994 to i32
-  %999 = fptoui float %995 to i32
-  %1000 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1001 = extractvalue %dx.types.CBufRet.i32 %1000, 0
-  %1002 = extractvalue %dx.types.CBufRet.i32 %1000, 1
-  %1003 = extractvalue %dx.types.CBufRet.i32 %1000, 2
-  %1004 = extractvalue %dx.types.CBufRet.i32 %1000, 3
-  %1005 = mul i32 %1001, %996
-  %1006 = call i32 @dx.op.tertiary.i32(i32 48, i32 %997, i32 %1002, i32 %1005)  ; IMad(a,b,c)
-  %1007 = call i32 @dx.op.tertiary.i32(i32 48, i32 %998, i32 %1003, i32 %1006)  ; IMad(a,b,c)
-  %1008 = call i32 @dx.op.tertiary.i32(i32 48, i32 %999, i32 %1004, i32 %1007)  ; IMad(a,b,c)
-  %1009 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1008, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1010 = extractvalue %dx.types.ResRet.i32 %1009, 0
-  %1011 = icmp ne i32 %1010, 0
-  %1012 = uitofp i1 %1011 to float
-  br label %1101
-
-; <label>:1013                                    ; preds = %981
-  %1014 = icmp eq i32 %953, 2
-  br i1 %1014, label %1015, label %1101
-
-; <label>:1015                                    ; preds = %1013
-  %1016 = fsub fast float %22, %20
-  %1017 = fcmp fast olt float %950, %20
-  br i1 %1017, label %1018, label %1031
-
-; <label>:1018                                    ; preds = %1015
-  %1019 = fsub fast float %20, %950
-  %1020 = fdiv fast float %1019, %1016
-  %1021 = fptoui float %1020 to i32
-  %1022 = uitofp i32 %1021 to float
-  %1023 = fmul fast float %1022, %1016
-  %1024 = fsub fast float %1019, %1023
-  %1025 = and i32 %1021, 1
-  %1026 = icmp eq i32 %1025, 0
-  br i1 %1026, label %1027, label %1029
-
-; <label>:1027                                    ; preds = %1018
-  %1028 = fadd fast float %1024, %20
-  br label %1046
-
-; <label>:1029                                    ; preds = %1018
-  %1030 = fsub fast float %22, %1024
-  br label %1046
-
-; <label>:1031                                    ; preds = %1015
-  %1032 = fcmp fast ogt float %950, %22
-  br i1 %1032, label %1033, label %1046
-
-; <label>:1033                                    ; preds = %1031
-  %1034 = fsub fast float %950, %22
-  %1035 = fdiv fast float %1034, %1016
-  %1036 = fptoui float %1035 to i32
-  %1037 = uitofp i32 %1036 to float
-  %1038 = fmul fast float %1037, %1016
-  %1039 = fsub fast float %1034, %1038
-  %1040 = and i32 %1036, 1
-  %1041 = icmp eq i32 %1040, 0
-  br i1 %1041, label %1042, label %1044
-
-; <label>:1042                                    ; preds = %1033
-  %1043 = fsub fast float %22, %1039
-  br label %1046
-
-; <label>:1044                                    ; preds = %1033
-  %1045 = fadd fast float %1039, %20
-  br label %1046
-
-; <label>:1046                                    ; preds = %1044, %1042, %1031, %1029, %1027
-  %1047 = phi float [ %1028, %1027 ], [ %1030, %1029 ], [ %1043, %1042 ], [ %1045, %1044 ], [ %950, %1031 ]
-  %1048 = fptoui float %1047 to i32
-  %1049 = fsub fast float %24, %20
-  %1050 = fcmp fast olt float %952, %20
-  br i1 %1050, label %1051, label %1064
-
-; <label>:1051                                    ; preds = %1046
-  %1052 = fsub fast float %20, %952
-  %1053 = fdiv fast float %1052, %1049
-  %1054 = fptoui float %1053 to i32
-  %1055 = uitofp i32 %1054 to float
-  %1056 = fmul fast float %1055, %1049
-  %1057 = fsub fast float %1052, %1056
-  %1058 = and i32 %1054, 1
-  %1059 = icmp eq i32 %1058, 0
-  br i1 %1059, label %1060, label %1062
-
-; <label>:1060                                    ; preds = %1051
-  %1061 = fadd fast float %1057, %20
-  br label %1079
-
-; <label>:1062                                    ; preds = %1051
-  %1063 = fsub fast float %24, %1057
-  br label %1079
-
-; <label>:1064                                    ; preds = %1046
-  %1065 = fcmp fast ogt float %952, %24
-  br i1 %1065, label %1066, label %1079
-
-; <label>:1066                                    ; preds = %1064
-  %1067 = fsub fast float %952, %24
-  %1068 = fdiv fast float %1067, %1049
-  %1069 = fptoui float %1068 to i32
-  %1070 = uitofp i32 %1069 to float
-  %1071 = fmul fast float %1070, %1049
-  %1072 = fsub fast float %1067, %1071
-  %1073 = and i32 %1069, 1
-  %1074 = icmp eq i32 %1073, 0
-  br i1 %1074, label %1075, label %1077
-
-; <label>:1075                                    ; preds = %1066
-  %1076 = fsub fast float %24, %1072
-  br label %1079
-
-; <label>:1077                                    ; preds = %1066
-  %1078 = fadd fast float %1072, %20
-  br label %1079
-
-; <label>:1079                                    ; preds = %1077, %1075, %1064, %1062, %1060
-  %1080 = phi float [ %1061, %1060 ], [ %1063, %1062 ], [ %1076, %1075 ], [ %1078, %1077 ], [ %952, %1064 ]
-  %1081 = fptoui float %1080 to i32
-  %1082 = uitofp i32 %1081 to float
-  %1083 = uitofp i32 %1048 to float
-  %1084 = fptoui float %45 to i32
-  %1085 = fptoui float %178 to i32
-  %1086 = fptoui float %1082 to i32
-  %1087 = fptoui float %1083 to i32
-  %1088 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1089 = extractvalue %dx.types.CBufRet.i32 %1088, 0
-  %1090 = extractvalue %dx.types.CBufRet.i32 %1088, 1
-  %1091 = extractvalue %dx.types.CBufRet.i32 %1088, 2
-  %1092 = extractvalue %dx.types.CBufRet.i32 %1088, 3
-  %1093 = mul i32 %1089, %1084
-  %1094 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1085, i32 %1090, i32 %1093)  ; IMad(a,b,c)
-  %1095 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1086, i32 %1091, i32 %1094)  ; IMad(a,b,c)
-  %1096 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1087, i32 %1092, i32 %1095)  ; IMad(a,b,c)
-  %1097 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1096, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1098 = extractvalue %dx.types.ResRet.i32 %1097, 0
-  %1099 = icmp ne i32 %1098, 0
-  %1100 = uitofp i1 %1099 to float
-  br label %1101
-
-; <label>:1101                                    ; preds = %1079, %1013, %983, %965, %955
-  %1102 = phi float [ %980, %965 ], [ 0.000000e+00, %955 ], [ %1012, %983 ], [ %1100, %1079 ], [ 0.000000e+00, %1013 ]
-  br i1 %954, label %1103, label %1129
-
-; <label>:1103                                    ; preds = %1101
-  %1104 = fcmp fast oge float %949, 0.000000e+00
-  %1105 = fptoui float %949 to i32
-  %1106 = icmp ult i32 %1105, %13
-  %1107 = and i1 %1104, %1106
-  %1108 = fcmp fast oge float %952, 0.000000e+00
-  %1109 = and i1 %1108, %1107
-  %1110 = fptoui float %952 to i32
-  %1111 = icmp ult i32 %1110, %15
-  %1112 = and i1 %1111, %1109
-  br i1 %1112, label %1113, label %1249
-
-; <label>:1113                                    ; preds = %1103
-  %1114 = fptoui float %45 to i32
-  %1115 = fptoui float %178 to i32
-  %1116 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1117 = extractvalue %dx.types.CBufRet.i32 %1116, 0
-  %1118 = extractvalue %dx.types.CBufRet.i32 %1116, 1
-  %1119 = extractvalue %dx.types.CBufRet.i32 %1116, 2
-  %1120 = extractvalue %dx.types.CBufRet.i32 %1116, 3
-  %1121 = mul i32 %1117, %1114
-  %1122 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1115, i32 %1118, i32 %1121)  ; IMad(a,b,c)
-  %1123 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1110, i32 %1119, i32 %1122)  ; IMad(a,b,c)
-  %1124 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1105, i32 %1120, i32 %1123)  ; IMad(a,b,c)
-  %1125 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1124, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1126 = extractvalue %dx.types.ResRet.i32 %1125, 0
-  %1127 = icmp ne i32 %1126, 0
-  %1128 = uitofp i1 %1127 to float
-  br label %1249
-
-; <label>:1129                                    ; preds = %1101
-  %1130 = icmp eq i32 %953, 1
-  br i1 %1130, label %1131, label %1161
-
-; <label>:1131                                    ; preds = %1129
-  %1132 = add i32 %13, -1
-  %1133 = uitofp i32 %1132 to float
-  %1134 = call float @dx.op.binary.f32(i32 35, float %949, float 0.000000e+00)  ; FMax(a,b)
-  %1135 = call float @dx.op.binary.f32(i32 36, float %1134, float %1133)  ; FMin(a,b)
-  %1136 = fptoui float %1135 to i32
-  %1137 = add i32 %15, -1
-  %1138 = uitofp i32 %1137 to float
-  %1139 = call float @dx.op.binary.f32(i32 35, float %952, float 0.000000e+00)  ; FMax(a,b)
-  %1140 = call float @dx.op.binary.f32(i32 36, float %1139, float %1138)  ; FMin(a,b)
-  %1141 = fptoui float %1140 to i32
-  %1142 = uitofp i32 %1141 to float
-  %1143 = uitofp i32 %1136 to float
-  %1144 = fptoui float %45 to i32
-  %1145 = fptoui float %178 to i32
-  %1146 = fptoui float %1142 to i32
-  %1147 = fptoui float %1143 to i32
-  %1148 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1149 = extractvalue %dx.types.CBufRet.i32 %1148, 0
-  %1150 = extractvalue %dx.types.CBufRet.i32 %1148, 1
-  %1151 = extractvalue %dx.types.CBufRet.i32 %1148, 2
-  %1152 = extractvalue %dx.types.CBufRet.i32 %1148, 3
-  %1153 = mul i32 %1149, %1144
-  %1154 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1145, i32 %1150, i32 %1153)  ; IMad(a,b,c)
-  %1155 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1146, i32 %1151, i32 %1154)  ; IMad(a,b,c)
-  %1156 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1147, i32 %1152, i32 %1155)  ; IMad(a,b,c)
-  %1157 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1156, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1158 = extractvalue %dx.types.ResRet.i32 %1157, 0
-  %1159 = icmp ne i32 %1158, 0
-  %1160 = uitofp i1 %1159 to float
-  br label %1249
-
-; <label>:1161                                    ; preds = %1129
-  %1162 = icmp eq i32 %953, 2
-  br i1 %1162, label %1163, label %1249
-
-; <label>:1163                                    ; preds = %1161
-  %1164 = fsub fast float %22, %20
-  %1165 = fcmp fast olt float %949, %20
-  br i1 %1165, label %1166, label %1179
-
-; <label>:1166                                    ; preds = %1163
-  %1167 = fsub fast float %20, %949
-  %1168 = fdiv fast float %1167, %1164
-  %1169 = fptoui float %1168 to i32
-  %1170 = uitofp i32 %1169 to float
-  %1171 = fmul fast float %1170, %1164
-  %1172 = fsub fast float %1167, %1171
-  %1173 = and i32 %1169, 1
-  %1174 = icmp eq i32 %1173, 0
-  br i1 %1174, label %1175, label %1177
-
-; <label>:1175                                    ; preds = %1166
-  %1176 = fadd fast float %1172, %20
-  br label %1194
-
-; <label>:1177                                    ; preds = %1166
-  %1178 = fsub fast float %22, %1172
-  br label %1194
-
-; <label>:1179                                    ; preds = %1163
-  %1180 = fcmp fast ogt float %949, %22
-  br i1 %1180, label %1181, label %1194
-
-; <label>:1181                                    ; preds = %1179
-  %1182 = fsub fast float %949, %22
-  %1183 = fdiv fast float %1182, %1164
-  %1184 = fptoui float %1183 to i32
-  %1185 = uitofp i32 %1184 to float
-  %1186 = fmul fast float %1185, %1164
-  %1187 = fsub fast float %1182, %1186
-  %1188 = and i32 %1184, 1
-  %1189 = icmp eq i32 %1188, 0
-  br i1 %1189, label %1190, label %1192
-
-; <label>:1190                                    ; preds = %1181
-  %1191 = fsub fast float %22, %1187
-  br label %1194
-
-; <label>:1192                                    ; preds = %1181
-  %1193 = fadd fast float %1187, %20
-  br label %1194
-
-; <label>:1194                                    ; preds = %1192, %1190, %1179, %1177, %1175
-  %1195 = phi float [ %1176, %1175 ], [ %1178, %1177 ], [ %1191, %1190 ], [ %1193, %1192 ], [ %949, %1179 ]
-  %1196 = fptoui float %1195 to i32
-  %1197 = fsub fast float %24, %20
-  %1198 = fcmp fast olt float %952, %20
-  br i1 %1198, label %1199, label %1212
-
-; <label>:1199                                    ; preds = %1194
-  %1200 = fsub fast float %20, %952
-  %1201 = fdiv fast float %1200, %1197
-  %1202 = fptoui float %1201 to i32
-  %1203 = uitofp i32 %1202 to float
-  %1204 = fmul fast float %1203, %1197
-  %1205 = fsub fast float %1200, %1204
-  %1206 = and i32 %1202, 1
-  %1207 = icmp eq i32 %1206, 0
-  br i1 %1207, label %1208, label %1210
-
-; <label>:1208                                    ; preds = %1199
-  %1209 = fadd fast float %1205, %20
-  br label %1227
-
-; <label>:1210                                    ; preds = %1199
-  %1211 = fsub fast float %24, %1205
-  br label %1227
-
-; <label>:1212                                    ; preds = %1194
-  %1213 = fcmp fast ogt float %952, %24
-  br i1 %1213, label %1214, label %1227
-
-; <label>:1214                                    ; preds = %1212
-  %1215 = fsub fast float %952, %24
-  %1216 = fdiv fast float %1215, %1197
-  %1217 = fptoui float %1216 to i32
-  %1218 = uitofp i32 %1217 to float
-  %1219 = fmul fast float %1218, %1197
-  %1220 = fsub fast float %1215, %1219
-  %1221 = and i32 %1217, 1
-  %1222 = icmp eq i32 %1221, 0
-  br i1 %1222, label %1223, label %1225
-
-; <label>:1223                                    ; preds = %1214
-  %1224 = fsub fast float %24, %1220
-  br label %1227
-
-; <label>:1225                                    ; preds = %1214
-  %1226 = fadd fast float %1220, %20
-  br label %1227
-
-; <label>:1227                                    ; preds = %1225, %1223, %1212, %1210, %1208
-  %1228 = phi float [ %1209, %1208 ], [ %1211, %1210 ], [ %1224, %1223 ], [ %1226, %1225 ], [ %952, %1212 ]
-  %1229 = fptoui float %1228 to i32
-  %1230 = uitofp i32 %1229 to float
-  %1231 = uitofp i32 %1196 to float
-  %1232 = fptoui float %45 to i32
-  %1233 = fptoui float %178 to i32
-  %1234 = fptoui float %1230 to i32
-  %1235 = fptoui float %1231 to i32
-  %1236 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1237 = extractvalue %dx.types.CBufRet.i32 %1236, 0
-  %1238 = extractvalue %dx.types.CBufRet.i32 %1236, 1
-  %1239 = extractvalue %dx.types.CBufRet.i32 %1236, 2
-  %1240 = extractvalue %dx.types.CBufRet.i32 %1236, 3
-  %1241 = mul i32 %1237, %1232
-  %1242 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1233, i32 %1238, i32 %1241)  ; IMad(a,b,c)
-  %1243 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1234, i32 %1239, i32 %1242)  ; IMad(a,b,c)
-  %1244 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1235, i32 %1240, i32 %1243)  ; IMad(a,b,c)
-  %1245 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1244, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1246 = extractvalue %dx.types.ResRet.i32 %1245, 0
-  %1247 = icmp ne i32 %1246, 0
-  %1248 = uitofp i1 %1247 to float
-  br label %1249
-
-; <label>:1249                                    ; preds = %1227, %1161, %1131, %1113, %1103
-  %1250 = phi float [ %1128, %1113 ], [ 0.000000e+00, %1103 ], [ %1160, %1131 ], [ %1248, %1227 ], [ 0.000000e+00, %1161 ]
-  %1251 = fadd fast float %949, 1.000000e+00
-  br i1 %954, label %1252, label %1278
-
-; <label>:1252                                    ; preds = %1249
-  %1253 = fcmp fast oge float %1251, 0.000000e+00
-  %1254 = fptoui float %1251 to i32
-  %1255 = icmp ult i32 %1254, %13
-  %1256 = and i1 %1253, %1255
-  %1257 = fcmp fast oge float %952, 0.000000e+00
-  %1258 = and i1 %1257, %1256
-  %1259 = fptoui float %952 to i32
-  %1260 = icmp ult i32 %1259, %15
-  %1261 = and i1 %1260, %1258
-  br i1 %1261, label %1262, label %1398
-
-; <label>:1262                                    ; preds = %1252
-  %1263 = fptoui float %45 to i32
-  %1264 = fptoui float %178 to i32
-  %1265 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1266 = extractvalue %dx.types.CBufRet.i32 %1265, 0
-  %1267 = extractvalue %dx.types.CBufRet.i32 %1265, 1
-  %1268 = extractvalue %dx.types.CBufRet.i32 %1265, 2
-  %1269 = extractvalue %dx.types.CBufRet.i32 %1265, 3
-  %1270 = mul i32 %1266, %1263
-  %1271 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1264, i32 %1267, i32 %1270)  ; IMad(a,b,c)
-  %1272 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1259, i32 %1268, i32 %1271)  ; IMad(a,b,c)
-  %1273 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1254, i32 %1269, i32 %1272)  ; IMad(a,b,c)
-  %1274 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1273, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1275 = extractvalue %dx.types.ResRet.i32 %1274, 0
-  %1276 = icmp ne i32 %1275, 0
-  %1277 = uitofp i1 %1276 to float
-  br label %1398
-
-; <label>:1278                                    ; preds = %1249
-  %1279 = icmp eq i32 %953, 1
-  br i1 %1279, label %1280, label %1310
-
-; <label>:1280                                    ; preds = %1278
-  %1281 = add i32 %13, -1
-  %1282 = uitofp i32 %1281 to float
-  %1283 = call float @dx.op.binary.f32(i32 35, float %1251, float 0.000000e+00)  ; FMax(a,b)
-  %1284 = call float @dx.op.binary.f32(i32 36, float %1283, float %1282)  ; FMin(a,b)
-  %1285 = fptoui float %1284 to i32
-  %1286 = add i32 %15, -1
-  %1287 = uitofp i32 %1286 to float
-  %1288 = call float @dx.op.binary.f32(i32 35, float %952, float 0.000000e+00)  ; FMax(a,b)
-  %1289 = call float @dx.op.binary.f32(i32 36, float %1288, float %1287)  ; FMin(a,b)
-  %1290 = fptoui float %1289 to i32
-  %1291 = uitofp i32 %1290 to float
-  %1292 = uitofp i32 %1285 to float
-  %1293 = fptoui float %45 to i32
-  %1294 = fptoui float %178 to i32
-  %1295 = fptoui float %1291 to i32
-  %1296 = fptoui float %1292 to i32
-  %1297 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1298 = extractvalue %dx.types.CBufRet.i32 %1297, 0
-  %1299 = extractvalue %dx.types.CBufRet.i32 %1297, 1
-  %1300 = extractvalue %dx.types.CBufRet.i32 %1297, 2
-  %1301 = extractvalue %dx.types.CBufRet.i32 %1297, 3
-  %1302 = mul i32 %1298, %1293
-  %1303 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1294, i32 %1299, i32 %1302)  ; IMad(a,b,c)
-  %1304 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1295, i32 %1300, i32 %1303)  ; IMad(a,b,c)
-  %1305 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1296, i32 %1301, i32 %1304)  ; IMad(a,b,c)
-  %1306 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1305, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1307 = extractvalue %dx.types.ResRet.i32 %1306, 0
-  %1308 = icmp ne i32 %1307, 0
-  %1309 = uitofp i1 %1308 to float
-  br label %1398
-
-; <label>:1310                                    ; preds = %1278
-  %1311 = icmp eq i32 %953, 2
-  br i1 %1311, label %1312, label %1398
-
-; <label>:1312                                    ; preds = %1310
-  %1313 = fsub fast float %22, %20
-  %1314 = fcmp fast olt float %1251, %20
-  br i1 %1314, label %1315, label %1328
-
-; <label>:1315                                    ; preds = %1312
-  %1316 = fsub fast float %20, %1251
-  %1317 = fdiv fast float %1316, %1313
-  %1318 = fptoui float %1317 to i32
-  %1319 = uitofp i32 %1318 to float
-  %1320 = fmul fast float %1319, %1313
-  %1321 = fsub fast float %1316, %1320
-  %1322 = and i32 %1318, 1
-  %1323 = icmp eq i32 %1322, 0
-  br i1 %1323, label %1324, label %1326
-
-; <label>:1324                                    ; preds = %1315
-  %1325 = fadd fast float %1321, %20
-  br label %1343
-
-; <label>:1326                                    ; preds = %1315
-  %1327 = fsub fast float %22, %1321
-  br label %1343
-
-; <label>:1328                                    ; preds = %1312
-  %1329 = fcmp fast ogt float %1251, %22
-  br i1 %1329, label %1330, label %1343
-
-; <label>:1330                                    ; preds = %1328
-  %1331 = fsub fast float %1251, %22
-  %1332 = fdiv fast float %1331, %1313
-  %1333 = fptoui float %1332 to i32
-  %1334 = uitofp i32 %1333 to float
-  %1335 = fmul fast float %1334, %1313
-  %1336 = fsub fast float %1331, %1335
-  %1337 = and i32 %1333, 1
-  %1338 = icmp eq i32 %1337, 0
-  br i1 %1338, label %1339, label %1341
-
-; <label>:1339                                    ; preds = %1330
-  %1340 = fsub fast float %22, %1336
-  br label %1343
-
-; <label>:1341                                    ; preds = %1330
-  %1342 = fadd fast float %1336, %20
-  br label %1343
-
-; <label>:1343                                    ; preds = %1341, %1339, %1328, %1326, %1324
-  %1344 = phi float [ %1325, %1324 ], [ %1327, %1326 ], [ %1340, %1339 ], [ %1342, %1341 ], [ %1251, %1328 ]
-  %1345 = fptoui float %1344 to i32
-  %1346 = fsub fast float %24, %20
-  %1347 = fcmp fast olt float %952, %20
-  br i1 %1347, label %1348, label %1361
-
-; <label>:1348                                    ; preds = %1343
-  %1349 = fsub fast float %20, %952
-  %1350 = fdiv fast float %1349, %1346
-  %1351 = fptoui float %1350 to i32
-  %1352 = uitofp i32 %1351 to float
-  %1353 = fmul fast float %1352, %1346
-  %1354 = fsub fast float %1349, %1353
-  %1355 = and i32 %1351, 1
-  %1356 = icmp eq i32 %1355, 0
-  br i1 %1356, label %1357, label %1359
-
-; <label>:1357                                    ; preds = %1348
-  %1358 = fadd fast float %1354, %20
-  br label %1376
-
-; <label>:1359                                    ; preds = %1348
-  %1360 = fsub fast float %24, %1354
-  br label %1376
-
-; <label>:1361                                    ; preds = %1343
-  %1362 = fcmp fast ogt float %952, %24
-  br i1 %1362, label %1363, label %1376
-
-; <label>:1363                                    ; preds = %1361
-  %1364 = fsub fast float %952, %24
-  %1365 = fdiv fast float %1364, %1346
-  %1366 = fptoui float %1365 to i32
-  %1367 = uitofp i32 %1366 to float
-  %1368 = fmul fast float %1367, %1346
-  %1369 = fsub fast float %1364, %1368
-  %1370 = and i32 %1366, 1
-  %1371 = icmp eq i32 %1370, 0
-  br i1 %1371, label %1372, label %1374
-
-; <label>:1372                                    ; preds = %1363
-  %1373 = fsub fast float %24, %1369
-  br label %1376
-
-; <label>:1374                                    ; preds = %1363
-  %1375 = fadd fast float %1369, %20
-  br label %1376
-
-; <label>:1376                                    ; preds = %1374, %1372, %1361, %1359, %1357
-  %1377 = phi float [ %1358, %1357 ], [ %1360, %1359 ], [ %1373, %1372 ], [ %1375, %1374 ], [ %952, %1361 ]
-  %1378 = fptoui float %1377 to i32
-  %1379 = uitofp i32 %1378 to float
-  %1380 = uitofp i32 %1345 to float
-  %1381 = fptoui float %45 to i32
-  %1382 = fptoui float %178 to i32
-  %1383 = fptoui float %1379 to i32
-  %1384 = fptoui float %1380 to i32
-  %1385 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1386 = extractvalue %dx.types.CBufRet.i32 %1385, 0
-  %1387 = extractvalue %dx.types.CBufRet.i32 %1385, 1
-  %1388 = extractvalue %dx.types.CBufRet.i32 %1385, 2
-  %1389 = extractvalue %dx.types.CBufRet.i32 %1385, 3
-  %1390 = mul i32 %1386, %1381
-  %1391 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1382, i32 %1387, i32 %1390)  ; IMad(a,b,c)
-  %1392 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1383, i32 %1388, i32 %1391)  ; IMad(a,b,c)
-  %1393 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1384, i32 %1389, i32 %1392)  ; IMad(a,b,c)
-  %1394 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1393, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1395 = extractvalue %dx.types.ResRet.i32 %1394, 0
-  %1396 = icmp ne i32 %1395, 0
-  %1397 = uitofp i1 %1396 to float
-  br label %1398
-
-; <label>:1398                                    ; preds = %1376, %1310, %1280, %1262, %1252
-  %1399 = phi float [ %1277, %1262 ], [ 0.000000e+00, %1252 ], [ %1309, %1280 ], [ %1397, %1376 ], [ 0.000000e+00, %1310 ]
-  %1400 = fadd fast float %949, 2.000000e+00
-  br i1 %954, label %1401, label %1427
-
-; <label>:1401                                    ; preds = %1398
-  %1402 = fcmp fast oge float %1400, 0.000000e+00
-  %1403 = fptoui float %1400 to i32
-  %1404 = icmp ult i32 %1403, %13
-  %1405 = and i1 %1402, %1404
-  %1406 = fcmp fast oge float %952, 0.000000e+00
-  %1407 = and i1 %1406, %1405
-  %1408 = fptoui float %952 to i32
-  %1409 = icmp ult i32 %1408, %15
-  %1410 = and i1 %1409, %1407
-  br i1 %1410, label %1411, label %1547
-
-; <label>:1411                                    ; preds = %1401
-  %1412 = fptoui float %45 to i32
-  %1413 = fptoui float %178 to i32
-  %1414 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1415 = extractvalue %dx.types.CBufRet.i32 %1414, 0
-  %1416 = extractvalue %dx.types.CBufRet.i32 %1414, 1
-  %1417 = extractvalue %dx.types.CBufRet.i32 %1414, 2
-  %1418 = extractvalue %dx.types.CBufRet.i32 %1414, 3
-  %1419 = mul i32 %1415, %1412
-  %1420 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1413, i32 %1416, i32 %1419)  ; IMad(a,b,c)
-  %1421 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1408, i32 %1417, i32 %1420)  ; IMad(a,b,c)
-  %1422 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1403, i32 %1418, i32 %1421)  ; IMad(a,b,c)
-  %1423 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1422, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1424 = extractvalue %dx.types.ResRet.i32 %1423, 0
-  %1425 = icmp ne i32 %1424, 0
-  %1426 = uitofp i1 %1425 to float
-  br label %1547
-
-; <label>:1427                                    ; preds = %1398
-  %1428 = icmp eq i32 %953, 1
-  br i1 %1428, label %1429, label %1459
-
-; <label>:1429                                    ; preds = %1427
-  %1430 = add i32 %13, -1
-  %1431 = uitofp i32 %1430 to float
-  %1432 = call float @dx.op.binary.f32(i32 35, float %1400, float 0.000000e+00)  ; FMax(a,b)
-  %1433 = call float @dx.op.binary.f32(i32 36, float %1432, float %1431)  ; FMin(a,b)
-  %1434 = fptoui float %1433 to i32
-  %1435 = add i32 %15, -1
-  %1436 = uitofp i32 %1435 to float
-  %1437 = call float @dx.op.binary.f32(i32 35, float %952, float 0.000000e+00)  ; FMax(a,b)
-  %1438 = call float @dx.op.binary.f32(i32 36, float %1437, float %1436)  ; FMin(a,b)
-  %1439 = fptoui float %1438 to i32
-  %1440 = uitofp i32 %1439 to float
-  %1441 = uitofp i32 %1434 to float
-  %1442 = fptoui float %45 to i32
-  %1443 = fptoui float %178 to i32
-  %1444 = fptoui float %1440 to i32
-  %1445 = fptoui float %1441 to i32
-  %1446 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1447 = extractvalue %dx.types.CBufRet.i32 %1446, 0
-  %1448 = extractvalue %dx.types.CBufRet.i32 %1446, 1
-  %1449 = extractvalue %dx.types.CBufRet.i32 %1446, 2
-  %1450 = extractvalue %dx.types.CBufRet.i32 %1446, 3
-  %1451 = mul i32 %1447, %1442
-  %1452 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1443, i32 %1448, i32 %1451)  ; IMad(a,b,c)
-  %1453 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1444, i32 %1449, i32 %1452)  ; IMad(a,b,c)
-  %1454 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1445, i32 %1450, i32 %1453)  ; IMad(a,b,c)
-  %1455 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1454, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1456 = extractvalue %dx.types.ResRet.i32 %1455, 0
-  %1457 = icmp ne i32 %1456, 0
-  %1458 = uitofp i1 %1457 to float
-  br label %1547
-
-; <label>:1459                                    ; preds = %1427
-  %1460 = icmp eq i32 %953, 2
-  br i1 %1460, label %1461, label %1547
-
-; <label>:1461                                    ; preds = %1459
-  %1462 = fsub fast float %22, %20
-  %1463 = fcmp fast olt float %1400, %20
-  br i1 %1463, label %1464, label %1477
-
-; <label>:1464                                    ; preds = %1461
-  %1465 = fsub fast float %20, %1400
-  %1466 = fdiv fast float %1465, %1462
-  %1467 = fptoui float %1466 to i32
-  %1468 = uitofp i32 %1467 to float
-  %1469 = fmul fast float %1468, %1462
-  %1470 = fsub fast float %1465, %1469
-  %1471 = and i32 %1467, 1
-  %1472 = icmp eq i32 %1471, 0
-  br i1 %1472, label %1473, label %1475
-
-; <label>:1473                                    ; preds = %1464
-  %1474 = fadd fast float %1470, %20
-  br label %1492
-
-; <label>:1475                                    ; preds = %1464
-  %1476 = fsub fast float %22, %1470
-  br label %1492
-
-; <label>:1477                                    ; preds = %1461
-  %1478 = fcmp fast ogt float %1400, %22
-  br i1 %1478, label %1479, label %1492
-
-; <label>:1479                                    ; preds = %1477
-  %1480 = fsub fast float %1400, %22
-  %1481 = fdiv fast float %1480, %1462
-  %1482 = fptoui float %1481 to i32
-  %1483 = uitofp i32 %1482 to float
-  %1484 = fmul fast float %1483, %1462
-  %1485 = fsub fast float %1480, %1484
-  %1486 = and i32 %1482, 1
-  %1487 = icmp eq i32 %1486, 0
-  br i1 %1487, label %1488, label %1490
-
-; <label>:1488                                    ; preds = %1479
-  %1489 = fsub fast float %22, %1485
-  br label %1492
-
-; <label>:1490                                    ; preds = %1479
-  %1491 = fadd fast float %1485, %20
-  br label %1492
-
-; <label>:1492                                    ; preds = %1490, %1488, %1477, %1475, %1473
-  %1493 = phi float [ %1474, %1473 ], [ %1476, %1475 ], [ %1489, %1488 ], [ %1491, %1490 ], [ %1400, %1477 ]
-  %1494 = fptoui float %1493 to i32
-  %1495 = fsub fast float %24, %20
-  %1496 = fcmp fast olt float %952, %20
-  br i1 %1496, label %1497, label %1510
-
-; <label>:1497                                    ; preds = %1492
-  %1498 = fsub fast float %20, %952
-  %1499 = fdiv fast float %1498, %1495
-  %1500 = fptoui float %1499 to i32
-  %1501 = uitofp i32 %1500 to float
-  %1502 = fmul fast float %1501, %1495
-  %1503 = fsub fast float %1498, %1502
-  %1504 = and i32 %1500, 1
-  %1505 = icmp eq i32 %1504, 0
-  br i1 %1505, label %1506, label %1508
-
-; <label>:1506                                    ; preds = %1497
-  %1507 = fadd fast float %1503, %20
-  br label %1525
-
-; <label>:1508                                    ; preds = %1497
-  %1509 = fsub fast float %24, %1503
-  br label %1525
-
-; <label>:1510                                    ; preds = %1492
-  %1511 = fcmp fast ogt float %952, %24
-  br i1 %1511, label %1512, label %1525
-
-; <label>:1512                                    ; preds = %1510
-  %1513 = fsub fast float %952, %24
-  %1514 = fdiv fast float %1513, %1495
-  %1515 = fptoui float %1514 to i32
-  %1516 = uitofp i32 %1515 to float
-  %1517 = fmul fast float %1516, %1495
-  %1518 = fsub fast float %1513, %1517
-  %1519 = and i32 %1515, 1
-  %1520 = icmp eq i32 %1519, 0
-  br i1 %1520, label %1521, label %1523
-
-; <label>:1521                                    ; preds = %1512
-  %1522 = fsub fast float %24, %1518
-  br label %1525
-
-; <label>:1523                                    ; preds = %1512
-  %1524 = fadd fast float %1518, %20
-  br label %1525
-
-; <label>:1525                                    ; preds = %1523, %1521, %1510, %1508, %1506
-  %1526 = phi float [ %1507, %1506 ], [ %1509, %1508 ], [ %1522, %1521 ], [ %1524, %1523 ], [ %952, %1510 ]
-  %1527 = fptoui float %1526 to i32
-  %1528 = uitofp i32 %1527 to float
-  %1529 = uitofp i32 %1494 to float
-  %1530 = fptoui float %45 to i32
-  %1531 = fptoui float %178 to i32
-  %1532 = fptoui float %1528 to i32
-  %1533 = fptoui float %1529 to i32
-  %1534 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1535 = extractvalue %dx.types.CBufRet.i32 %1534, 0
-  %1536 = extractvalue %dx.types.CBufRet.i32 %1534, 1
-  %1537 = extractvalue %dx.types.CBufRet.i32 %1534, 2
-  %1538 = extractvalue %dx.types.CBufRet.i32 %1534, 3
-  %1539 = mul i32 %1535, %1530
-  %1540 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1531, i32 %1536, i32 %1539)  ; IMad(a,b,c)
-  %1541 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1532, i32 %1537, i32 %1540)  ; IMad(a,b,c)
-  %1542 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1533, i32 %1538, i32 %1541)  ; IMad(a,b,c)
-  %1543 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1542, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1544 = extractvalue %dx.types.ResRet.i32 %1543, 0
-  %1545 = icmp ne i32 %1544, 0
-  %1546 = uitofp i1 %1545 to float
-  br label %1547
-
-; <label>:1547                                    ; preds = %1525, %1459, %1429, %1411, %1401
-  %1548 = phi float [ %1426, %1411 ], [ 0.000000e+00, %1401 ], [ %1458, %1429 ], [ %1546, %1525 ], [ 0.000000e+00, %1459 ]
-  br i1 %954, label %1549, label %1575
-
-; <label>:1549                                    ; preds = %1547
-  %1550 = fcmp fast oge float %950, 0.000000e+00
-  %1551 = fptoui float %950 to i32
-  %1552 = icmp ult i32 %1551, %13
-  %1553 = and i1 %1550, %1552
-  %1554 = fcmp fast oge float %951, 0.000000e+00
-  %1555 = and i1 %1554, %1553
-  %1556 = fptoui float %951 to i32
-  %1557 = icmp ult i32 %1556, %15
-  %1558 = and i1 %1557, %1555
-  br i1 %1558, label %1559, label %1695
-
-; <label>:1559                                    ; preds = %1549
-  %1560 = fptoui float %45 to i32
-  %1561 = fptoui float %178 to i32
-  %1562 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1563 = extractvalue %dx.types.CBufRet.i32 %1562, 0
-  %1564 = extractvalue %dx.types.CBufRet.i32 %1562, 1
-  %1565 = extractvalue %dx.types.CBufRet.i32 %1562, 2
-  %1566 = extractvalue %dx.types.CBufRet.i32 %1562, 3
-  %1567 = mul i32 %1563, %1560
-  %1568 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1561, i32 %1564, i32 %1567)  ; IMad(a,b,c)
-  %1569 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1556, i32 %1565, i32 %1568)  ; IMad(a,b,c)
-  %1570 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1551, i32 %1566, i32 %1569)  ; IMad(a,b,c)
-  %1571 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1570, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1572 = extractvalue %dx.types.ResRet.i32 %1571, 0
-  %1573 = icmp ne i32 %1572, 0
-  %1574 = uitofp i1 %1573 to float
-  br label %1695
-
-; <label>:1575                                    ; preds = %1547
-  %1576 = icmp eq i32 %953, 1
-  br i1 %1576, label %1577, label %1607
-
-; <label>:1577                                    ; preds = %1575
-  %1578 = add i32 %13, -1
-  %1579 = uitofp i32 %1578 to float
-  %1580 = call float @dx.op.binary.f32(i32 35, float %950, float 0.000000e+00)  ; FMax(a,b)
-  %1581 = call float @dx.op.binary.f32(i32 36, float %1580, float %1579)  ; FMin(a,b)
-  %1582 = fptoui float %1581 to i32
-  %1583 = add i32 %15, -1
-  %1584 = uitofp i32 %1583 to float
-  %1585 = call float @dx.op.binary.f32(i32 35, float %951, float 0.000000e+00)  ; FMax(a,b)
-  %1586 = call float @dx.op.binary.f32(i32 36, float %1585, float %1584)  ; FMin(a,b)
-  %1587 = fptoui float %1586 to i32
-  %1588 = uitofp i32 %1587 to float
-  %1589 = uitofp i32 %1582 to float
-  %1590 = fptoui float %45 to i32
-  %1591 = fptoui float %178 to i32
-  %1592 = fptoui float %1588 to i32
-  %1593 = fptoui float %1589 to i32
-  %1594 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1595 = extractvalue %dx.types.CBufRet.i32 %1594, 0
-  %1596 = extractvalue %dx.types.CBufRet.i32 %1594, 1
-  %1597 = extractvalue %dx.types.CBufRet.i32 %1594, 2
-  %1598 = extractvalue %dx.types.CBufRet.i32 %1594, 3
-  %1599 = mul i32 %1595, %1590
-  %1600 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1591, i32 %1596, i32 %1599)  ; IMad(a,b,c)
-  %1601 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1592, i32 %1597, i32 %1600)  ; IMad(a,b,c)
-  %1602 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1593, i32 %1598, i32 %1601)  ; IMad(a,b,c)
-  %1603 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1602, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1604 = extractvalue %dx.types.ResRet.i32 %1603, 0
-  %1605 = icmp ne i32 %1604, 0
-  %1606 = uitofp i1 %1605 to float
-  br label %1695
-
-; <label>:1607                                    ; preds = %1575
-  %1608 = icmp eq i32 %953, 2
-  br i1 %1608, label %1609, label %1695
-
-; <label>:1609                                    ; preds = %1607
-  %1610 = fsub fast float %22, %20
-  %1611 = fcmp fast olt float %950, %20
-  br i1 %1611, label %1612, label %1625
-
-; <label>:1612                                    ; preds = %1609
-  %1613 = fsub fast float %20, %950
-  %1614 = fdiv fast float %1613, %1610
-  %1615 = fptoui float %1614 to i32
-  %1616 = uitofp i32 %1615 to float
-  %1617 = fmul fast float %1616, %1610
-  %1618 = fsub fast float %1613, %1617
-  %1619 = and i32 %1615, 1
-  %1620 = icmp eq i32 %1619, 0
-  br i1 %1620, label %1621, label %1623
-
-; <label>:1621                                    ; preds = %1612
-  %1622 = fadd fast float %1618, %20
-  br label %1640
-
-; <label>:1623                                    ; preds = %1612
-  %1624 = fsub fast float %22, %1618
-  br label %1640
-
-; <label>:1625                                    ; preds = %1609
-  %1626 = fcmp fast ogt float %950, %22
-  br i1 %1626, label %1627, label %1640
-
-; <label>:1627                                    ; preds = %1625
-  %1628 = fsub fast float %950, %22
-  %1629 = fdiv fast float %1628, %1610
-  %1630 = fptoui float %1629 to i32
-  %1631 = uitofp i32 %1630 to float
-  %1632 = fmul fast float %1631, %1610
-  %1633 = fsub fast float %1628, %1632
-  %1634 = and i32 %1630, 1
-  %1635 = icmp eq i32 %1634, 0
-  br i1 %1635, label %1636, label %1638
-
-; <label>:1636                                    ; preds = %1627
-  %1637 = fsub fast float %22, %1633
-  br label %1640
-
-; <label>:1638                                    ; preds = %1627
-  %1639 = fadd fast float %1633, %20
-  br label %1640
-
-; <label>:1640                                    ; preds = %1638, %1636, %1625, %1623, %1621
-  %1641 = phi float [ %1622, %1621 ], [ %1624, %1623 ], [ %1637, %1636 ], [ %1639, %1638 ], [ %950, %1625 ]
-  %1642 = fptoui float %1641 to i32
-  %1643 = fsub fast float %24, %20
-  %1644 = fcmp fast olt float %951, %20
-  br i1 %1644, label %1645, label %1658
-
-; <label>:1645                                    ; preds = %1640
-  %1646 = fsub fast float %20, %951
-  %1647 = fdiv fast float %1646, %1643
-  %1648 = fptoui float %1647 to i32
-  %1649 = uitofp i32 %1648 to float
-  %1650 = fmul fast float %1649, %1643
-  %1651 = fsub fast float %1646, %1650
-  %1652 = and i32 %1648, 1
-  %1653 = icmp eq i32 %1652, 0
-  br i1 %1653, label %1654, label %1656
-
-; <label>:1654                                    ; preds = %1645
-  %1655 = fadd fast float %1651, %20
-  br label %1673
-
-; <label>:1656                                    ; preds = %1645
-  %1657 = fsub fast float %24, %1651
-  br label %1673
-
-; <label>:1658                                    ; preds = %1640
-  %1659 = fcmp fast ogt float %951, %24
-  br i1 %1659, label %1660, label %1673
-
-; <label>:1660                                    ; preds = %1658
-  %1661 = fsub fast float %951, %24
-  %1662 = fdiv fast float %1661, %1643
-  %1663 = fptoui float %1662 to i32
-  %1664 = uitofp i32 %1663 to float
-  %1665 = fmul fast float %1664, %1643
-  %1666 = fsub fast float %1661, %1665
-  %1667 = and i32 %1663, 1
-  %1668 = icmp eq i32 %1667, 0
-  br i1 %1668, label %1669, label %1671
-
-; <label>:1669                                    ; preds = %1660
-  %1670 = fsub fast float %24, %1666
-  br label %1673
-
-; <label>:1671                                    ; preds = %1660
-  %1672 = fadd fast float %1666, %20
-  br label %1673
-
-; <label>:1673                                    ; preds = %1671, %1669, %1658, %1656, %1654
-  %1674 = phi float [ %1655, %1654 ], [ %1657, %1656 ], [ %1670, %1669 ], [ %1672, %1671 ], [ %951, %1658 ]
-  %1675 = fptoui float %1674 to i32
-  %1676 = uitofp i32 %1675 to float
-  %1677 = uitofp i32 %1642 to float
-  %1678 = fptoui float %45 to i32
-  %1679 = fptoui float %178 to i32
-  %1680 = fptoui float %1676 to i32
-  %1681 = fptoui float %1677 to i32
-  %1682 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1683 = extractvalue %dx.types.CBufRet.i32 %1682, 0
-  %1684 = extractvalue %dx.types.CBufRet.i32 %1682, 1
-  %1685 = extractvalue %dx.types.CBufRet.i32 %1682, 2
-  %1686 = extractvalue %dx.types.CBufRet.i32 %1682, 3
-  %1687 = mul i32 %1683, %1678
-  %1688 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1679, i32 %1684, i32 %1687)  ; IMad(a,b,c)
-  %1689 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1680, i32 %1685, i32 %1688)  ; IMad(a,b,c)
-  %1690 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1681, i32 %1686, i32 %1689)  ; IMad(a,b,c)
-  %1691 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1690, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1692 = extractvalue %dx.types.ResRet.i32 %1691, 0
-  %1693 = icmp ne i32 %1692, 0
-  %1694 = uitofp i1 %1693 to float
-  br label %1695
-
-; <label>:1695                                    ; preds = %1673, %1607, %1577, %1559, %1549
-  %1696 = phi float [ %1574, %1559 ], [ 0.000000e+00, %1549 ], [ %1606, %1577 ], [ %1694, %1673 ], [ 0.000000e+00, %1607 ]
-  br i1 %954, label %1697, label %1723
-
-; <label>:1697                                    ; preds = %1695
-  %1698 = fcmp fast oge float %949, 0.000000e+00
-  %1699 = fptoui float %949 to i32
-  %1700 = icmp ult i32 %1699, %13
-  %1701 = and i1 %1698, %1700
-  %1702 = fcmp fast oge float %951, 0.000000e+00
-  %1703 = and i1 %1702, %1701
-  %1704 = fptoui float %951 to i32
-  %1705 = icmp ult i32 %1704, %15
-  %1706 = and i1 %1705, %1703
-  br i1 %1706, label %1707, label %1843
-
-; <label>:1707                                    ; preds = %1697
-  %1708 = fptoui float %45 to i32
-  %1709 = fptoui float %178 to i32
-  %1710 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1711 = extractvalue %dx.types.CBufRet.i32 %1710, 0
-  %1712 = extractvalue %dx.types.CBufRet.i32 %1710, 1
-  %1713 = extractvalue %dx.types.CBufRet.i32 %1710, 2
-  %1714 = extractvalue %dx.types.CBufRet.i32 %1710, 3
-  %1715 = mul i32 %1711, %1708
-  %1716 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1709, i32 %1712, i32 %1715)  ; IMad(a,b,c)
-  %1717 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1704, i32 %1713, i32 %1716)  ; IMad(a,b,c)
-  %1718 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1699, i32 %1714, i32 %1717)  ; IMad(a,b,c)
-  %1719 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1718, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1720 = extractvalue %dx.types.ResRet.i32 %1719, 0
-  %1721 = icmp ne i32 %1720, 0
-  %1722 = uitofp i1 %1721 to float
-  br label %1843
-
-; <label>:1723                                    ; preds = %1695
-  %1724 = icmp eq i32 %953, 1
-  br i1 %1724, label %1725, label %1755
-
-; <label>:1725                                    ; preds = %1723
-  %1726 = add i32 %13, -1
-  %1727 = uitofp i32 %1726 to float
-  %1728 = call float @dx.op.binary.f32(i32 35, float %949, float 0.000000e+00)  ; FMax(a,b)
-  %1729 = call float @dx.op.binary.f32(i32 36, float %1728, float %1727)  ; FMin(a,b)
-  %1730 = fptoui float %1729 to i32
-  %1731 = add i32 %15, -1
-  %1732 = uitofp i32 %1731 to float
-  %1733 = call float @dx.op.binary.f32(i32 35, float %951, float 0.000000e+00)  ; FMax(a,b)
-  %1734 = call float @dx.op.binary.f32(i32 36, float %1733, float %1732)  ; FMin(a,b)
-  %1735 = fptoui float %1734 to i32
-  %1736 = uitofp i32 %1735 to float
-  %1737 = uitofp i32 %1730 to float
-  %1738 = fptoui float %45 to i32
-  %1739 = fptoui float %178 to i32
-  %1740 = fptoui float %1736 to i32
-  %1741 = fptoui float %1737 to i32
-  %1742 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1743 = extractvalue %dx.types.CBufRet.i32 %1742, 0
-  %1744 = extractvalue %dx.types.CBufRet.i32 %1742, 1
-  %1745 = extractvalue %dx.types.CBufRet.i32 %1742, 2
-  %1746 = extractvalue %dx.types.CBufRet.i32 %1742, 3
-  %1747 = mul i32 %1743, %1738
-  %1748 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1739, i32 %1744, i32 %1747)  ; IMad(a,b,c)
-  %1749 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1740, i32 %1745, i32 %1748)  ; IMad(a,b,c)
-  %1750 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1741, i32 %1746, i32 %1749)  ; IMad(a,b,c)
-  %1751 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1750, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1752 = extractvalue %dx.types.ResRet.i32 %1751, 0
-  %1753 = icmp ne i32 %1752, 0
-  %1754 = uitofp i1 %1753 to float
-  br label %1843
-
-; <label>:1755                                    ; preds = %1723
-  %1756 = icmp eq i32 %953, 2
-  br i1 %1756, label %1757, label %1843
-
-; <label>:1757                                    ; preds = %1755
-  %1758 = fsub fast float %22, %20
-  %1759 = fcmp fast olt float %949, %20
-  br i1 %1759, label %1760, label %1773
-
-; <label>:1760                                    ; preds = %1757
-  %1761 = fsub fast float %20, %949
-  %1762 = fdiv fast float %1761, %1758
-  %1763 = fptoui float %1762 to i32
-  %1764 = uitofp i32 %1763 to float
-  %1765 = fmul fast float %1764, %1758
-  %1766 = fsub fast float %1761, %1765
-  %1767 = and i32 %1763, 1
-  %1768 = icmp eq i32 %1767, 0
-  br i1 %1768, label %1769, label %1771
-
-; <label>:1769                                    ; preds = %1760
-  %1770 = fadd fast float %1766, %20
-  br label %1788
-
-; <label>:1771                                    ; preds = %1760
-  %1772 = fsub fast float %22, %1766
-  br label %1788
-
-; <label>:1773                                    ; preds = %1757
-  %1774 = fcmp fast ogt float %949, %22
-  br i1 %1774, label %1775, label %1788
-
-; <label>:1775                                    ; preds = %1773
-  %1776 = fsub fast float %949, %22
-  %1777 = fdiv fast float %1776, %1758
-  %1778 = fptoui float %1777 to i32
-  %1779 = uitofp i32 %1778 to float
-  %1780 = fmul fast float %1779, %1758
-  %1781 = fsub fast float %1776, %1780
-  %1782 = and i32 %1778, 1
-  %1783 = icmp eq i32 %1782, 0
-  br i1 %1783, label %1784, label %1786
-
-; <label>:1784                                    ; preds = %1775
-  %1785 = fsub fast float %22, %1781
-  br label %1788
-
-; <label>:1786                                    ; preds = %1775
-  %1787 = fadd fast float %1781, %20
-  br label %1788
-
-; <label>:1788                                    ; preds = %1786, %1784, %1773, %1771, %1769
-  %1789 = phi float [ %1770, %1769 ], [ %1772, %1771 ], [ %1785, %1784 ], [ %1787, %1786 ], [ %949, %1773 ]
-  %1790 = fptoui float %1789 to i32
-  %1791 = fsub fast float %24, %20
-  %1792 = fcmp fast olt float %951, %20
-  br i1 %1792, label %1793, label %1806
-
-; <label>:1793                                    ; preds = %1788
-  %1794 = fsub fast float %20, %951
-  %1795 = fdiv fast float %1794, %1791
-  %1796 = fptoui float %1795 to i32
-  %1797 = uitofp i32 %1796 to float
-  %1798 = fmul fast float %1797, %1791
-  %1799 = fsub fast float %1794, %1798
-  %1800 = and i32 %1796, 1
-  %1801 = icmp eq i32 %1800, 0
-  br i1 %1801, label %1802, label %1804
-
-; <label>:1802                                    ; preds = %1793
-  %1803 = fadd fast float %1799, %20
-  br label %1821
-
-; <label>:1804                                    ; preds = %1793
-  %1805 = fsub fast float %24, %1799
-  br label %1821
-
-; <label>:1806                                    ; preds = %1788
-  %1807 = fcmp fast ogt float %951, %24
-  br i1 %1807, label %1808, label %1821
-
-; <label>:1808                                    ; preds = %1806
-  %1809 = fsub fast float %951, %24
-  %1810 = fdiv fast float %1809, %1791
-  %1811 = fptoui float %1810 to i32
-  %1812 = uitofp i32 %1811 to float
-  %1813 = fmul fast float %1812, %1791
-  %1814 = fsub fast float %1809, %1813
-  %1815 = and i32 %1811, 1
-  %1816 = icmp eq i32 %1815, 0
-  br i1 %1816, label %1817, label %1819
-
-; <label>:1817                                    ; preds = %1808
-  %1818 = fsub fast float %24, %1814
-  br label %1821
-
-; <label>:1819                                    ; preds = %1808
-  %1820 = fadd fast float %1814, %20
-  br label %1821
-
-; <label>:1821                                    ; preds = %1819, %1817, %1806, %1804, %1802
-  %1822 = phi float [ %1803, %1802 ], [ %1805, %1804 ], [ %1818, %1817 ], [ %1820, %1819 ], [ %951, %1806 ]
-  %1823 = fptoui float %1822 to i32
-  %1824 = uitofp i32 %1823 to float
-  %1825 = uitofp i32 %1790 to float
-  %1826 = fptoui float %45 to i32
-  %1827 = fptoui float %178 to i32
-  %1828 = fptoui float %1824 to i32
-  %1829 = fptoui float %1825 to i32
-  %1830 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1831 = extractvalue %dx.types.CBufRet.i32 %1830, 0
-  %1832 = extractvalue %dx.types.CBufRet.i32 %1830, 1
-  %1833 = extractvalue %dx.types.CBufRet.i32 %1830, 2
-  %1834 = extractvalue %dx.types.CBufRet.i32 %1830, 3
-  %1835 = mul i32 %1831, %1826
-  %1836 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1827, i32 %1832, i32 %1835)  ; IMad(a,b,c)
-  %1837 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1828, i32 %1833, i32 %1836)  ; IMad(a,b,c)
-  %1838 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1829, i32 %1834, i32 %1837)  ; IMad(a,b,c)
-  %1839 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1838, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1840 = extractvalue %dx.types.ResRet.i32 %1839, 0
-  %1841 = icmp ne i32 %1840, 0
-  %1842 = uitofp i1 %1841 to float
-  br label %1843
-
-; <label>:1843                                    ; preds = %1821, %1755, %1725, %1707, %1697
-  %1844 = phi float [ %1722, %1707 ], [ 0.000000e+00, %1697 ], [ %1754, %1725 ], [ %1842, %1821 ], [ 0.000000e+00, %1755 ]
-  br i1 %954, label %1845, label %1871
-
-; <label>:1845                                    ; preds = %1843
-  %1846 = fcmp fast oge float %1251, 0.000000e+00
-  %1847 = fptoui float %1251 to i32
-  %1848 = icmp ult i32 %1847, %13
-  %1849 = and i1 %1846, %1848
-  %1850 = fcmp fast oge float %951, 0.000000e+00
-  %1851 = and i1 %1850, %1849
-  %1852 = fptoui float %951 to i32
-  %1853 = icmp ult i32 %1852, %15
-  %1854 = and i1 %1853, %1851
-  br i1 %1854, label %1855, label %1991
-
-; <label>:1855                                    ; preds = %1845
-  %1856 = fptoui float %45 to i32
-  %1857 = fptoui float %178 to i32
-  %1858 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1859 = extractvalue %dx.types.CBufRet.i32 %1858, 0
-  %1860 = extractvalue %dx.types.CBufRet.i32 %1858, 1
-  %1861 = extractvalue %dx.types.CBufRet.i32 %1858, 2
-  %1862 = extractvalue %dx.types.CBufRet.i32 %1858, 3
-  %1863 = mul i32 %1859, %1856
-  %1864 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1857, i32 %1860, i32 %1863)  ; IMad(a,b,c)
-  %1865 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1852, i32 %1861, i32 %1864)  ; IMad(a,b,c)
-  %1866 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1847, i32 %1862, i32 %1865)  ; IMad(a,b,c)
-  %1867 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1866, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1868 = extractvalue %dx.types.ResRet.i32 %1867, 0
-  %1869 = icmp ne i32 %1868, 0
-  %1870 = uitofp i1 %1869 to float
-  br label %1991
-
-; <label>:1871                                    ; preds = %1843
-  %1872 = icmp eq i32 %953, 1
-  br i1 %1872, label %1873, label %1903
-
-; <label>:1873                                    ; preds = %1871
-  %1874 = add i32 %13, -1
-  %1875 = uitofp i32 %1874 to float
-  %1876 = call float @dx.op.binary.f32(i32 35, float %1251, float 0.000000e+00)  ; FMax(a,b)
-  %1877 = call float @dx.op.binary.f32(i32 36, float %1876, float %1875)  ; FMin(a,b)
-  %1878 = fptoui float %1877 to i32
-  %1879 = add i32 %15, -1
-  %1880 = uitofp i32 %1879 to float
-  %1881 = call float @dx.op.binary.f32(i32 35, float %951, float 0.000000e+00)  ; FMax(a,b)
-  %1882 = call float @dx.op.binary.f32(i32 36, float %1881, float %1880)  ; FMin(a,b)
-  %1883 = fptoui float %1882 to i32
-  %1884 = uitofp i32 %1883 to float
-  %1885 = uitofp i32 %1878 to float
-  %1886 = fptoui float %45 to i32
-  %1887 = fptoui float %178 to i32
-  %1888 = fptoui float %1884 to i32
-  %1889 = fptoui float %1885 to i32
-  %1890 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1891 = extractvalue %dx.types.CBufRet.i32 %1890, 0
-  %1892 = extractvalue %dx.types.CBufRet.i32 %1890, 1
-  %1893 = extractvalue %dx.types.CBufRet.i32 %1890, 2
-  %1894 = extractvalue %dx.types.CBufRet.i32 %1890, 3
-  %1895 = mul i32 %1891, %1886
-  %1896 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1887, i32 %1892, i32 %1895)  ; IMad(a,b,c)
-  %1897 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1888, i32 %1893, i32 %1896)  ; IMad(a,b,c)
-  %1898 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1889, i32 %1894, i32 %1897)  ; IMad(a,b,c)
-  %1899 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1898, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1900 = extractvalue %dx.types.ResRet.i32 %1899, 0
-  %1901 = icmp ne i32 %1900, 0
-  %1902 = uitofp i1 %1901 to float
-  br label %1991
-
-; <label>:1903                                    ; preds = %1871
-  %1904 = icmp eq i32 %953, 2
-  br i1 %1904, label %1905, label %1991
-
-; <label>:1905                                    ; preds = %1903
-  %1906 = fsub fast float %22, %20
-  %1907 = fcmp fast olt float %1251, %20
-  br i1 %1907, label %1908, label %1921
-
-; <label>:1908                                    ; preds = %1905
-  %1909 = fsub fast float %20, %1251
-  %1910 = fdiv fast float %1909, %1906
-  %1911 = fptoui float %1910 to i32
-  %1912 = uitofp i32 %1911 to float
-  %1913 = fmul fast float %1912, %1906
-  %1914 = fsub fast float %1909, %1913
-  %1915 = and i32 %1911, 1
-  %1916 = icmp eq i32 %1915, 0
-  br i1 %1916, label %1917, label %1919
-
-; <label>:1917                                    ; preds = %1908
-  %1918 = fadd fast float %1914, %20
-  br label %1936
-
-; <label>:1919                                    ; preds = %1908
-  %1920 = fsub fast float %22, %1914
-  br label %1936
-
-; <label>:1921                                    ; preds = %1905
-  %1922 = fcmp fast ogt float %1251, %22
-  br i1 %1922, label %1923, label %1936
-
-; <label>:1923                                    ; preds = %1921
-  %1924 = fsub fast float %1251, %22
-  %1925 = fdiv fast float %1924, %1906
-  %1926 = fptoui float %1925 to i32
-  %1927 = uitofp i32 %1926 to float
-  %1928 = fmul fast float %1927, %1906
-  %1929 = fsub fast float %1924, %1928
-  %1930 = and i32 %1926, 1
-  %1931 = icmp eq i32 %1930, 0
-  br i1 %1931, label %1932, label %1934
-
-; <label>:1932                                    ; preds = %1923
-  %1933 = fsub fast float %22, %1929
-  br label %1936
-
-; <label>:1934                                    ; preds = %1923
-  %1935 = fadd fast float %1929, %20
-  br label %1936
-
-; <label>:1936                                    ; preds = %1934, %1932, %1921, %1919, %1917
-  %1937 = phi float [ %1918, %1917 ], [ %1920, %1919 ], [ %1933, %1932 ], [ %1935, %1934 ], [ %1251, %1921 ]
-  %1938 = fptoui float %1937 to i32
-  %1939 = fsub fast float %24, %20
-  %1940 = fcmp fast olt float %951, %20
-  br i1 %1940, label %1941, label %1954
-
-; <label>:1941                                    ; preds = %1936
-  %1942 = fsub fast float %20, %951
-  %1943 = fdiv fast float %1942, %1939
-  %1944 = fptoui float %1943 to i32
-  %1945 = uitofp i32 %1944 to float
-  %1946 = fmul fast float %1945, %1939
-  %1947 = fsub fast float %1942, %1946
-  %1948 = and i32 %1944, 1
-  %1949 = icmp eq i32 %1948, 0
-  br i1 %1949, label %1950, label %1952
-
-; <label>:1950                                    ; preds = %1941
-  %1951 = fadd fast float %1947, %20
-  br label %1969
-
-; <label>:1952                                    ; preds = %1941
-  %1953 = fsub fast float %24, %1947
-  br label %1969
-
-; <label>:1954                                    ; preds = %1936
-  %1955 = fcmp fast ogt float %951, %24
-  br i1 %1955, label %1956, label %1969
-
-; <label>:1956                                    ; preds = %1954
-  %1957 = fsub fast float %951, %24
-  %1958 = fdiv fast float %1957, %1939
-  %1959 = fptoui float %1958 to i32
-  %1960 = uitofp i32 %1959 to float
-  %1961 = fmul fast float %1960, %1939
-  %1962 = fsub fast float %1957, %1961
-  %1963 = and i32 %1959, 1
-  %1964 = icmp eq i32 %1963, 0
-  br i1 %1964, label %1965, label %1967
-
-; <label>:1965                                    ; preds = %1956
-  %1966 = fsub fast float %24, %1962
-  br label %1969
-
-; <label>:1967                                    ; preds = %1956
-  %1968 = fadd fast float %1962, %20
-  br label %1969
-
-; <label>:1969                                    ; preds = %1967, %1965, %1954, %1952, %1950
-  %1970 = phi float [ %1951, %1950 ], [ %1953, %1952 ], [ %1966, %1965 ], [ %1968, %1967 ], [ %951, %1954 ]
-  %1971 = fptoui float %1970 to i32
-  %1972 = uitofp i32 %1971 to float
-  %1973 = uitofp i32 %1938 to float
-  %1974 = fptoui float %45 to i32
-  %1975 = fptoui float %178 to i32
-  %1976 = fptoui float %1972 to i32
-  %1977 = fptoui float %1973 to i32
-  %1978 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1979 = extractvalue %dx.types.CBufRet.i32 %1978, 0
-  %1980 = extractvalue %dx.types.CBufRet.i32 %1978, 1
-  %1981 = extractvalue %dx.types.CBufRet.i32 %1978, 2
-  %1982 = extractvalue %dx.types.CBufRet.i32 %1978, 3
-  %1983 = mul i32 %1979, %1974
-  %1984 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1975, i32 %1980, i32 %1983)  ; IMad(a,b,c)
-  %1985 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1976, i32 %1981, i32 %1984)  ; IMad(a,b,c)
-  %1986 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1977, i32 %1982, i32 %1985)  ; IMad(a,b,c)
-  %1987 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1986, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1988 = extractvalue %dx.types.ResRet.i32 %1987, 0
-  %1989 = icmp ne i32 %1988, 0
-  %1990 = uitofp i1 %1989 to float
-  br label %1991
-
-; <label>:1991                                    ; preds = %1969, %1903, %1873, %1855, %1845
-  %1992 = phi float [ %1870, %1855 ], [ 0.000000e+00, %1845 ], [ %1902, %1873 ], [ %1990, %1969 ], [ 0.000000e+00, %1903 ]
-  br i1 %954, label %1993, label %2019
-
-; <label>:1993                                    ; preds = %1991
-  %1994 = fcmp fast oge float %1400, 0.000000e+00
-  %1995 = fptoui float %1400 to i32
-  %1996 = icmp ult i32 %1995, %13
-  %1997 = and i1 %1994, %1996
-  %1998 = fcmp fast oge float %951, 0.000000e+00
-  %1999 = and i1 %1998, %1997
-  %2000 = fptoui float %951 to i32
-  %2001 = icmp ult i32 %2000, %15
-  %2002 = and i1 %2001, %1999
-  br i1 %2002, label %2003, label %2139
-
-; <label>:2003                                    ; preds = %1993
-  %2004 = fptoui float %45 to i32
-  %2005 = fptoui float %178 to i32
-  %2006 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2007 = extractvalue %dx.types.CBufRet.i32 %2006, 0
-  %2008 = extractvalue %dx.types.CBufRet.i32 %2006, 1
-  %2009 = extractvalue %dx.types.CBufRet.i32 %2006, 2
-  %2010 = extractvalue %dx.types.CBufRet.i32 %2006, 3
-  %2011 = mul i32 %2007, %2004
-  %2012 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2005, i32 %2008, i32 %2011)  ; IMad(a,b,c)
-  %2013 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2000, i32 %2009, i32 %2012)  ; IMad(a,b,c)
-  %2014 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1995, i32 %2010, i32 %2013)  ; IMad(a,b,c)
-  %2015 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2014, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2016 = extractvalue %dx.types.ResRet.i32 %2015, 0
-  %2017 = icmp ne i32 %2016, 0
-  %2018 = uitofp i1 %2017 to float
-  br label %2139
-
-; <label>:2019                                    ; preds = %1991
-  %2020 = icmp eq i32 %953, 1
-  br i1 %2020, label %2021, label %2051
-
-; <label>:2021                                    ; preds = %2019
-  %2022 = add i32 %13, -1
-  %2023 = uitofp i32 %2022 to float
-  %2024 = call float @dx.op.binary.f32(i32 35, float %1400, float 0.000000e+00)  ; FMax(a,b)
-  %2025 = call float @dx.op.binary.f32(i32 36, float %2024, float %2023)  ; FMin(a,b)
-  %2026 = fptoui float %2025 to i32
-  %2027 = add i32 %15, -1
-  %2028 = uitofp i32 %2027 to float
-  %2029 = call float @dx.op.binary.f32(i32 35, float %951, float 0.000000e+00)  ; FMax(a,b)
-  %2030 = call float @dx.op.binary.f32(i32 36, float %2029, float %2028)  ; FMin(a,b)
-  %2031 = fptoui float %2030 to i32
-  %2032 = uitofp i32 %2031 to float
-  %2033 = uitofp i32 %2026 to float
-  %2034 = fptoui float %45 to i32
-  %2035 = fptoui float %178 to i32
-  %2036 = fptoui float %2032 to i32
-  %2037 = fptoui float %2033 to i32
-  %2038 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2039 = extractvalue %dx.types.CBufRet.i32 %2038, 0
-  %2040 = extractvalue %dx.types.CBufRet.i32 %2038, 1
-  %2041 = extractvalue %dx.types.CBufRet.i32 %2038, 2
-  %2042 = extractvalue %dx.types.CBufRet.i32 %2038, 3
-  %2043 = mul i32 %2039, %2034
-  %2044 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2035, i32 %2040, i32 %2043)  ; IMad(a,b,c)
-  %2045 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2036, i32 %2041, i32 %2044)  ; IMad(a,b,c)
-  %2046 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2037, i32 %2042, i32 %2045)  ; IMad(a,b,c)
-  %2047 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2046, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2048 = extractvalue %dx.types.ResRet.i32 %2047, 0
-  %2049 = icmp ne i32 %2048, 0
-  %2050 = uitofp i1 %2049 to float
-  br label %2139
-
-; <label>:2051                                    ; preds = %2019
-  %2052 = icmp eq i32 %953, 2
-  br i1 %2052, label %2053, label %2139
-
-; <label>:2053                                    ; preds = %2051
-  %2054 = fsub fast float %22, %20
-  %2055 = fcmp fast olt float %1400, %20
-  br i1 %2055, label %2056, label %2069
-
-; <label>:2056                                    ; preds = %2053
-  %2057 = fsub fast float %20, %1400
-  %2058 = fdiv fast float %2057, %2054
-  %2059 = fptoui float %2058 to i32
-  %2060 = uitofp i32 %2059 to float
-  %2061 = fmul fast float %2060, %2054
-  %2062 = fsub fast float %2057, %2061
-  %2063 = and i32 %2059, 1
-  %2064 = icmp eq i32 %2063, 0
-  br i1 %2064, label %2065, label %2067
-
-; <label>:2065                                    ; preds = %2056
-  %2066 = fadd fast float %2062, %20
-  br label %2084
-
-; <label>:2067                                    ; preds = %2056
-  %2068 = fsub fast float %22, %2062
-  br label %2084
-
-; <label>:2069                                    ; preds = %2053
-  %2070 = fcmp fast ogt float %1400, %22
-  br i1 %2070, label %2071, label %2084
-
-; <label>:2071                                    ; preds = %2069
-  %2072 = fsub fast float %1400, %22
-  %2073 = fdiv fast float %2072, %2054
-  %2074 = fptoui float %2073 to i32
-  %2075 = uitofp i32 %2074 to float
-  %2076 = fmul fast float %2075, %2054
-  %2077 = fsub fast float %2072, %2076
-  %2078 = and i32 %2074, 1
-  %2079 = icmp eq i32 %2078, 0
-  br i1 %2079, label %2080, label %2082
-
-; <label>:2080                                    ; preds = %2071
-  %2081 = fsub fast float %22, %2077
-  br label %2084
-
-; <label>:2082                                    ; preds = %2071
-  %2083 = fadd fast float %2077, %20
-  br label %2084
-
-; <label>:2084                                    ; preds = %2082, %2080, %2069, %2067, %2065
-  %2085 = phi float [ %2066, %2065 ], [ %2068, %2067 ], [ %2081, %2080 ], [ %2083, %2082 ], [ %1400, %2069 ]
-  %2086 = fptoui float %2085 to i32
-  %2087 = fsub fast float %24, %20
-  %2088 = fcmp fast olt float %951, %20
-  br i1 %2088, label %2089, label %2102
-
-; <label>:2089                                    ; preds = %2084
-  %2090 = fsub fast float %20, %951
-  %2091 = fdiv fast float %2090, %2087
-  %2092 = fptoui float %2091 to i32
-  %2093 = uitofp i32 %2092 to float
-  %2094 = fmul fast float %2093, %2087
-  %2095 = fsub fast float %2090, %2094
-  %2096 = and i32 %2092, 1
-  %2097 = icmp eq i32 %2096, 0
-  br i1 %2097, label %2098, label %2100
-
-; <label>:2098                                    ; preds = %2089
-  %2099 = fadd fast float %2095, %20
-  br label %2117
-
-; <label>:2100                                    ; preds = %2089
-  %2101 = fsub fast float %24, %2095
-  br label %2117
-
-; <label>:2102                                    ; preds = %2084
-  %2103 = fcmp fast ogt float %951, %24
-  br i1 %2103, label %2104, label %2117
-
-; <label>:2104                                    ; preds = %2102
-  %2105 = fsub fast float %951, %24
-  %2106 = fdiv fast float %2105, %2087
-  %2107 = fptoui float %2106 to i32
-  %2108 = uitofp i32 %2107 to float
-  %2109 = fmul fast float %2108, %2087
-  %2110 = fsub fast float %2105, %2109
-  %2111 = and i32 %2107, 1
-  %2112 = icmp eq i32 %2111, 0
-  br i1 %2112, label %2113, label %2115
-
-; <label>:2113                                    ; preds = %2104
-  %2114 = fsub fast float %24, %2110
-  br label %2117
-
-; <label>:2115                                    ; preds = %2104
-  %2116 = fadd fast float %2110, %20
-  br label %2117
-
-; <label>:2117                                    ; preds = %2115, %2113, %2102, %2100, %2098
-  %2118 = phi float [ %2099, %2098 ], [ %2101, %2100 ], [ %2114, %2113 ], [ %2116, %2115 ], [ %951, %2102 ]
-  %2119 = fptoui float %2118 to i32
-  %2120 = uitofp i32 %2119 to float
-  %2121 = uitofp i32 %2086 to float
-  %2122 = fptoui float %45 to i32
-  %2123 = fptoui float %178 to i32
-  %2124 = fptoui float %2120 to i32
-  %2125 = fptoui float %2121 to i32
-  %2126 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2127 = extractvalue %dx.types.CBufRet.i32 %2126, 0
-  %2128 = extractvalue %dx.types.CBufRet.i32 %2126, 1
-  %2129 = extractvalue %dx.types.CBufRet.i32 %2126, 2
-  %2130 = extractvalue %dx.types.CBufRet.i32 %2126, 3
-  %2131 = mul i32 %2127, %2122
-  %2132 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2123, i32 %2128, i32 %2131)  ; IMad(a,b,c)
-  %2133 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2124, i32 %2129, i32 %2132)  ; IMad(a,b,c)
-  %2134 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2125, i32 %2130, i32 %2133)  ; IMad(a,b,c)
-  %2135 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2134, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2136 = extractvalue %dx.types.ResRet.i32 %2135, 0
-  %2137 = icmp ne i32 %2136, 0
-  %2138 = uitofp i1 %2137 to float
-  br label %2139
-
-; <label>:2139                                    ; preds = %2117, %2051, %2021, %2003, %1993
-  %2140 = phi float [ %2018, %2003 ], [ 0.000000e+00, %1993 ], [ %2050, %2021 ], [ %2138, %2117 ], [ 0.000000e+00, %2051 ]
-  %2141 = fadd fast float %951, 1.000000e+00
-  br i1 %954, label %2142, label %2168
-
-; <label>:2142                                    ; preds = %2139
-  %2143 = fcmp fast oge float %950, 0.000000e+00
-  %2144 = fptoui float %950 to i32
-  %2145 = icmp ult i32 %2144, %13
-  %2146 = and i1 %2143, %2145
-  %2147 = fcmp fast oge float %2141, 0.000000e+00
-  %2148 = and i1 %2147, %2146
-  %2149 = fptoui float %2141 to i32
-  %2150 = icmp ult i32 %2149, %15
-  %2151 = and i1 %2150, %2148
-  br i1 %2151, label %2152, label %2288
-
-; <label>:2152                                    ; preds = %2142
-  %2153 = fptoui float %45 to i32
-  %2154 = fptoui float %178 to i32
-  %2155 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2156 = extractvalue %dx.types.CBufRet.i32 %2155, 0
-  %2157 = extractvalue %dx.types.CBufRet.i32 %2155, 1
-  %2158 = extractvalue %dx.types.CBufRet.i32 %2155, 2
-  %2159 = extractvalue %dx.types.CBufRet.i32 %2155, 3
-  %2160 = mul i32 %2156, %2153
-  %2161 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2154, i32 %2157, i32 %2160)  ; IMad(a,b,c)
-  %2162 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2149, i32 %2158, i32 %2161)  ; IMad(a,b,c)
-  %2163 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2144, i32 %2159, i32 %2162)  ; IMad(a,b,c)
-  %2164 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2163, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2165 = extractvalue %dx.types.ResRet.i32 %2164, 0
-  %2166 = icmp ne i32 %2165, 0
-  %2167 = uitofp i1 %2166 to float
-  br label %2288
-
-; <label>:2168                                    ; preds = %2139
-  %2169 = icmp eq i32 %953, 1
-  br i1 %2169, label %2170, label %2200
-
-; <label>:2170                                    ; preds = %2168
-  %2171 = add i32 %13, -1
-  %2172 = uitofp i32 %2171 to float
-  %2173 = call float @dx.op.binary.f32(i32 35, float %950, float 0.000000e+00)  ; FMax(a,b)
-  %2174 = call float @dx.op.binary.f32(i32 36, float %2173, float %2172)  ; FMin(a,b)
-  %2175 = fptoui float %2174 to i32
-  %2176 = add i32 %15, -1
-  %2177 = uitofp i32 %2176 to float
-  %2178 = call float @dx.op.binary.f32(i32 35, float %2141, float 0.000000e+00)  ; FMax(a,b)
-  %2179 = call float @dx.op.binary.f32(i32 36, float %2178, float %2177)  ; FMin(a,b)
-  %2180 = fptoui float %2179 to i32
-  %2181 = uitofp i32 %2180 to float
-  %2182 = uitofp i32 %2175 to float
-  %2183 = fptoui float %45 to i32
-  %2184 = fptoui float %178 to i32
-  %2185 = fptoui float %2181 to i32
-  %2186 = fptoui float %2182 to i32
-  %2187 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2188 = extractvalue %dx.types.CBufRet.i32 %2187, 0
-  %2189 = extractvalue %dx.types.CBufRet.i32 %2187, 1
-  %2190 = extractvalue %dx.types.CBufRet.i32 %2187, 2
-  %2191 = extractvalue %dx.types.CBufRet.i32 %2187, 3
-  %2192 = mul i32 %2188, %2183
-  %2193 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2184, i32 %2189, i32 %2192)  ; IMad(a,b,c)
-  %2194 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2185, i32 %2190, i32 %2193)  ; IMad(a,b,c)
-  %2195 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2186, i32 %2191, i32 %2194)  ; IMad(a,b,c)
-  %2196 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2195, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2197 = extractvalue %dx.types.ResRet.i32 %2196, 0
-  %2198 = icmp ne i32 %2197, 0
-  %2199 = uitofp i1 %2198 to float
-  br label %2288
-
-; <label>:2200                                    ; preds = %2168
-  %2201 = icmp eq i32 %953, 2
-  br i1 %2201, label %2202, label %2288
-
-; <label>:2202                                    ; preds = %2200
-  %2203 = fsub fast float %22, %20
-  %2204 = fcmp fast olt float %950, %20
-  br i1 %2204, label %2205, label %2218
-
-; <label>:2205                                    ; preds = %2202
-  %2206 = fsub fast float %20, %950
-  %2207 = fdiv fast float %2206, %2203
-  %2208 = fptoui float %2207 to i32
-  %2209 = uitofp i32 %2208 to float
-  %2210 = fmul fast float %2209, %2203
-  %2211 = fsub fast float %2206, %2210
-  %2212 = and i32 %2208, 1
-  %2213 = icmp eq i32 %2212, 0
-  br i1 %2213, label %2214, label %2216
-
-; <label>:2214                                    ; preds = %2205
-  %2215 = fadd fast float %2211, %20
-  br label %2233
-
-; <label>:2216                                    ; preds = %2205
-  %2217 = fsub fast float %22, %2211
-  br label %2233
-
-; <label>:2218                                    ; preds = %2202
-  %2219 = fcmp fast ogt float %950, %22
-  br i1 %2219, label %2220, label %2233
-
-; <label>:2220                                    ; preds = %2218
-  %2221 = fsub fast float %950, %22
-  %2222 = fdiv fast float %2221, %2203
-  %2223 = fptoui float %2222 to i32
-  %2224 = uitofp i32 %2223 to float
-  %2225 = fmul fast float %2224, %2203
-  %2226 = fsub fast float %2221, %2225
-  %2227 = and i32 %2223, 1
-  %2228 = icmp eq i32 %2227, 0
-  br i1 %2228, label %2229, label %2231
-
-; <label>:2229                                    ; preds = %2220
-  %2230 = fsub fast float %22, %2226
-  br label %2233
-
-; <label>:2231                                    ; preds = %2220
-  %2232 = fadd fast float %2226, %20
-  br label %2233
-
-; <label>:2233                                    ; preds = %2231, %2229, %2218, %2216, %2214
-  %2234 = phi float [ %2215, %2214 ], [ %2217, %2216 ], [ %2230, %2229 ], [ %2232, %2231 ], [ %950, %2218 ]
-  %2235 = fptoui float %2234 to i32
-  %2236 = fsub fast float %24, %20
-  %2237 = fcmp fast olt float %2141, %20
-  br i1 %2237, label %2238, label %2251
-
-; <label>:2238                                    ; preds = %2233
-  %2239 = fsub fast float %20, %2141
-  %2240 = fdiv fast float %2239, %2236
-  %2241 = fptoui float %2240 to i32
-  %2242 = uitofp i32 %2241 to float
-  %2243 = fmul fast float %2242, %2236
-  %2244 = fsub fast float %2239, %2243
-  %2245 = and i32 %2241, 1
-  %2246 = icmp eq i32 %2245, 0
-  br i1 %2246, label %2247, label %2249
-
-; <label>:2247                                    ; preds = %2238
-  %2248 = fadd fast float %2244, %20
-  br label %2266
-
-; <label>:2249                                    ; preds = %2238
-  %2250 = fsub fast float %24, %2244
-  br label %2266
-
-; <label>:2251                                    ; preds = %2233
-  %2252 = fcmp fast ogt float %2141, %24
-  br i1 %2252, label %2253, label %2266
-
-; <label>:2253                                    ; preds = %2251
-  %2254 = fsub fast float %2141, %24
-  %2255 = fdiv fast float %2254, %2236
-  %2256 = fptoui float %2255 to i32
-  %2257 = uitofp i32 %2256 to float
-  %2258 = fmul fast float %2257, %2236
-  %2259 = fsub fast float %2254, %2258
-  %2260 = and i32 %2256, 1
-  %2261 = icmp eq i32 %2260, 0
-  br i1 %2261, label %2262, label %2264
-
-; <label>:2262                                    ; preds = %2253
-  %2263 = fsub fast float %24, %2259
-  br label %2266
-
-; <label>:2264                                    ; preds = %2253
-  %2265 = fadd fast float %2259, %20
-  br label %2266
-
-; <label>:2266                                    ; preds = %2264, %2262, %2251, %2249, %2247
-  %2267 = phi float [ %2248, %2247 ], [ %2250, %2249 ], [ %2263, %2262 ], [ %2265, %2264 ], [ %2141, %2251 ]
-  %2268 = fptoui float %2267 to i32
-  %2269 = uitofp i32 %2268 to float
-  %2270 = uitofp i32 %2235 to float
-  %2271 = fptoui float %45 to i32
-  %2272 = fptoui float %178 to i32
-  %2273 = fptoui float %2269 to i32
-  %2274 = fptoui float %2270 to i32
-  %2275 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2276 = extractvalue %dx.types.CBufRet.i32 %2275, 0
-  %2277 = extractvalue %dx.types.CBufRet.i32 %2275, 1
-  %2278 = extractvalue %dx.types.CBufRet.i32 %2275, 2
-  %2279 = extractvalue %dx.types.CBufRet.i32 %2275, 3
-  %2280 = mul i32 %2276, %2271
-  %2281 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2272, i32 %2277, i32 %2280)  ; IMad(a,b,c)
-  %2282 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2273, i32 %2278, i32 %2281)  ; IMad(a,b,c)
-  %2283 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2274, i32 %2279, i32 %2282)  ; IMad(a,b,c)
-  %2284 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2283, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2285 = extractvalue %dx.types.ResRet.i32 %2284, 0
-  %2286 = icmp ne i32 %2285, 0
-  %2287 = uitofp i1 %2286 to float
-  br label %2288
-
-; <label>:2288                                    ; preds = %2266, %2200, %2170, %2152, %2142
-  %2289 = phi float [ %2167, %2152 ], [ 0.000000e+00, %2142 ], [ %2199, %2170 ], [ %2287, %2266 ], [ 0.000000e+00, %2200 ]
-  br i1 %954, label %2290, label %2316
-
-; <label>:2290                                    ; preds = %2288
-  %2291 = fcmp fast oge float %949, 0.000000e+00
-  %2292 = fptoui float %949 to i32
-  %2293 = icmp ult i32 %2292, %13
-  %2294 = and i1 %2291, %2293
-  %2295 = fcmp fast oge float %2141, 0.000000e+00
-  %2296 = and i1 %2295, %2294
-  %2297 = fptoui float %2141 to i32
-  %2298 = icmp ult i32 %2297, %15
-  %2299 = and i1 %2298, %2296
-  br i1 %2299, label %2300, label %2436
-
-; <label>:2300                                    ; preds = %2290
-  %2301 = fptoui float %45 to i32
-  %2302 = fptoui float %178 to i32
-  %2303 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2304 = extractvalue %dx.types.CBufRet.i32 %2303, 0
-  %2305 = extractvalue %dx.types.CBufRet.i32 %2303, 1
-  %2306 = extractvalue %dx.types.CBufRet.i32 %2303, 2
-  %2307 = extractvalue %dx.types.CBufRet.i32 %2303, 3
-  %2308 = mul i32 %2304, %2301
-  %2309 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2302, i32 %2305, i32 %2308)  ; IMad(a,b,c)
-  %2310 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2297, i32 %2306, i32 %2309)  ; IMad(a,b,c)
-  %2311 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2292, i32 %2307, i32 %2310)  ; IMad(a,b,c)
-  %2312 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2311, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2313 = extractvalue %dx.types.ResRet.i32 %2312, 0
-  %2314 = icmp ne i32 %2313, 0
-  %2315 = uitofp i1 %2314 to float
-  br label %2436
-
-; <label>:2316                                    ; preds = %2288
-  %2317 = icmp eq i32 %953, 1
-  br i1 %2317, label %2318, label %2348
-
-; <label>:2318                                    ; preds = %2316
-  %2319 = add i32 %13, -1
-  %2320 = uitofp i32 %2319 to float
-  %2321 = call float @dx.op.binary.f32(i32 35, float %949, float 0.000000e+00)  ; FMax(a,b)
-  %2322 = call float @dx.op.binary.f32(i32 36, float %2321, float %2320)  ; FMin(a,b)
-  %2323 = fptoui float %2322 to i32
-  %2324 = add i32 %15, -1
-  %2325 = uitofp i32 %2324 to float
-  %2326 = call float @dx.op.binary.f32(i32 35, float %2141, float 0.000000e+00)  ; FMax(a,b)
-  %2327 = call float @dx.op.binary.f32(i32 36, float %2326, float %2325)  ; FMin(a,b)
-  %2328 = fptoui float %2327 to i32
-  %2329 = uitofp i32 %2328 to float
-  %2330 = uitofp i32 %2323 to float
-  %2331 = fptoui float %45 to i32
-  %2332 = fptoui float %178 to i32
-  %2333 = fptoui float %2329 to i32
-  %2334 = fptoui float %2330 to i32
-  %2335 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2336 = extractvalue %dx.types.CBufRet.i32 %2335, 0
-  %2337 = extractvalue %dx.types.CBufRet.i32 %2335, 1
-  %2338 = extractvalue %dx.types.CBufRet.i32 %2335, 2
-  %2339 = extractvalue %dx.types.CBufRet.i32 %2335, 3
-  %2340 = mul i32 %2336, %2331
-  %2341 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2332, i32 %2337, i32 %2340)  ; IMad(a,b,c)
-  %2342 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2333, i32 %2338, i32 %2341)  ; IMad(a,b,c)
-  %2343 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2334, i32 %2339, i32 %2342)  ; IMad(a,b,c)
-  %2344 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2343, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2345 = extractvalue %dx.types.ResRet.i32 %2344, 0
-  %2346 = icmp ne i32 %2345, 0
-  %2347 = uitofp i1 %2346 to float
-  br label %2436
-
-; <label>:2348                                    ; preds = %2316
-  %2349 = icmp eq i32 %953, 2
-  br i1 %2349, label %2350, label %2436
-
-; <label>:2350                                    ; preds = %2348
-  %2351 = fsub fast float %22, %20
-  %2352 = fcmp fast olt float %949, %20
-  br i1 %2352, label %2353, label %2366
-
-; <label>:2353                                    ; preds = %2350
-  %2354 = fsub fast float %20, %949
-  %2355 = fdiv fast float %2354, %2351
-  %2356 = fptoui float %2355 to i32
-  %2357 = uitofp i32 %2356 to float
-  %2358 = fmul fast float %2357, %2351
-  %2359 = fsub fast float %2354, %2358
-  %2360 = and i32 %2356, 1
-  %2361 = icmp eq i32 %2360, 0
-  br i1 %2361, label %2362, label %2364
-
-; <label>:2362                                    ; preds = %2353
-  %2363 = fadd fast float %2359, %20
-  br label %2381
-
-; <label>:2364                                    ; preds = %2353
-  %2365 = fsub fast float %22, %2359
-  br label %2381
-
-; <label>:2366                                    ; preds = %2350
-  %2367 = fcmp fast ogt float %949, %22
-  br i1 %2367, label %2368, label %2381
-
-; <label>:2368                                    ; preds = %2366
-  %2369 = fsub fast float %949, %22
-  %2370 = fdiv fast float %2369, %2351
-  %2371 = fptoui float %2370 to i32
-  %2372 = uitofp i32 %2371 to float
-  %2373 = fmul fast float %2372, %2351
-  %2374 = fsub fast float %2369, %2373
-  %2375 = and i32 %2371, 1
-  %2376 = icmp eq i32 %2375, 0
-  br i1 %2376, label %2377, label %2379
-
-; <label>:2377                                    ; preds = %2368
-  %2378 = fsub fast float %22, %2374
-  br label %2381
-
-; <label>:2379                                    ; preds = %2368
-  %2380 = fadd fast float %2374, %20
-  br label %2381
-
-; <label>:2381                                    ; preds = %2379, %2377, %2366, %2364, %2362
-  %2382 = phi float [ %2363, %2362 ], [ %2365, %2364 ], [ %2378, %2377 ], [ %2380, %2379 ], [ %949, %2366 ]
-  %2383 = fptoui float %2382 to i32
-  %2384 = fsub fast float %24, %20
-  %2385 = fcmp fast olt float %2141, %20
-  br i1 %2385, label %2386, label %2399
-
-; <label>:2386                                    ; preds = %2381
-  %2387 = fsub fast float %20, %2141
-  %2388 = fdiv fast float %2387, %2384
-  %2389 = fptoui float %2388 to i32
-  %2390 = uitofp i32 %2389 to float
-  %2391 = fmul fast float %2390, %2384
-  %2392 = fsub fast float %2387, %2391
-  %2393 = and i32 %2389, 1
-  %2394 = icmp eq i32 %2393, 0
-  br i1 %2394, label %2395, label %2397
-
-; <label>:2395                                    ; preds = %2386
-  %2396 = fadd fast float %2392, %20
-  br label %2414
-
-; <label>:2397                                    ; preds = %2386
-  %2398 = fsub fast float %24, %2392
-  br label %2414
-
-; <label>:2399                                    ; preds = %2381
-  %2400 = fcmp fast ogt float %2141, %24
-  br i1 %2400, label %2401, label %2414
-
-; <label>:2401                                    ; preds = %2399
-  %2402 = fsub fast float %2141, %24
-  %2403 = fdiv fast float %2402, %2384
-  %2404 = fptoui float %2403 to i32
-  %2405 = uitofp i32 %2404 to float
-  %2406 = fmul fast float %2405, %2384
-  %2407 = fsub fast float %2402, %2406
-  %2408 = and i32 %2404, 1
-  %2409 = icmp eq i32 %2408, 0
-  br i1 %2409, label %2410, label %2412
-
-; <label>:2410                                    ; preds = %2401
-  %2411 = fsub fast float %24, %2407
-  br label %2414
-
-; <label>:2412                                    ; preds = %2401
-  %2413 = fadd fast float %2407, %20
-  br label %2414
-
-; <label>:2414                                    ; preds = %2412, %2410, %2399, %2397, %2395
-  %2415 = phi float [ %2396, %2395 ], [ %2398, %2397 ], [ %2411, %2410 ], [ %2413, %2412 ], [ %2141, %2399 ]
-  %2416 = fptoui float %2415 to i32
-  %2417 = uitofp i32 %2416 to float
-  %2418 = uitofp i32 %2383 to float
-  %2419 = fptoui float %45 to i32
-  %2420 = fptoui float %178 to i32
-  %2421 = fptoui float %2417 to i32
-  %2422 = fptoui float %2418 to i32
-  %2423 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2424 = extractvalue %dx.types.CBufRet.i32 %2423, 0
-  %2425 = extractvalue %dx.types.CBufRet.i32 %2423, 1
-  %2426 = extractvalue %dx.types.CBufRet.i32 %2423, 2
-  %2427 = extractvalue %dx.types.CBufRet.i32 %2423, 3
-  %2428 = mul i32 %2424, %2419
-  %2429 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2420, i32 %2425, i32 %2428)  ; IMad(a,b,c)
-  %2430 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2421, i32 %2426, i32 %2429)  ; IMad(a,b,c)
-  %2431 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2422, i32 %2427, i32 %2430)  ; IMad(a,b,c)
-  %2432 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2431, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2433 = extractvalue %dx.types.ResRet.i32 %2432, 0
-  %2434 = icmp ne i32 %2433, 0
-  %2435 = uitofp i1 %2434 to float
-  br label %2436
-
-; <label>:2436                                    ; preds = %2414, %2348, %2318, %2300, %2290
-  %2437 = phi float [ %2315, %2300 ], [ 0.000000e+00, %2290 ], [ %2347, %2318 ], [ %2435, %2414 ], [ 0.000000e+00, %2348 ]
-  br i1 %954, label %2438, label %2464
-
-; <label>:2438                                    ; preds = %2436
-  %2439 = fcmp fast oge float %1251, 0.000000e+00
-  %2440 = fptoui float %1251 to i32
-  %2441 = icmp ult i32 %2440, %13
-  %2442 = and i1 %2439, %2441
-  %2443 = fcmp fast oge float %2141, 0.000000e+00
-  %2444 = and i1 %2443, %2442
-  %2445 = fptoui float %2141 to i32
-  %2446 = icmp ult i32 %2445, %15
-  %2447 = and i1 %2446, %2444
-  br i1 %2447, label %2448, label %2584
-
-; <label>:2448                                    ; preds = %2438
-  %2449 = fptoui float %45 to i32
-  %2450 = fptoui float %178 to i32
-  %2451 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2452 = extractvalue %dx.types.CBufRet.i32 %2451, 0
-  %2453 = extractvalue %dx.types.CBufRet.i32 %2451, 1
-  %2454 = extractvalue %dx.types.CBufRet.i32 %2451, 2
-  %2455 = extractvalue %dx.types.CBufRet.i32 %2451, 3
-  %2456 = mul i32 %2452, %2449
-  %2457 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2450, i32 %2453, i32 %2456)  ; IMad(a,b,c)
-  %2458 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2445, i32 %2454, i32 %2457)  ; IMad(a,b,c)
-  %2459 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2440, i32 %2455, i32 %2458)  ; IMad(a,b,c)
-  %2460 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2459, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2461 = extractvalue %dx.types.ResRet.i32 %2460, 0
-  %2462 = icmp ne i32 %2461, 0
-  %2463 = uitofp i1 %2462 to float
-  br label %2584
-
-; <label>:2464                                    ; preds = %2436
-  %2465 = icmp eq i32 %953, 1
-  br i1 %2465, label %2466, label %2496
-
-; <label>:2466                                    ; preds = %2464
-  %2467 = add i32 %13, -1
-  %2468 = uitofp i32 %2467 to float
-  %2469 = call float @dx.op.binary.f32(i32 35, float %1251, float 0.000000e+00)  ; FMax(a,b)
-  %2470 = call float @dx.op.binary.f32(i32 36, float %2469, float %2468)  ; FMin(a,b)
-  %2471 = fptoui float %2470 to i32
-  %2472 = add i32 %15, -1
-  %2473 = uitofp i32 %2472 to float
-  %2474 = call float @dx.op.binary.f32(i32 35, float %2141, float 0.000000e+00)  ; FMax(a,b)
-  %2475 = call float @dx.op.binary.f32(i32 36, float %2474, float %2473)  ; FMin(a,b)
-  %2476 = fptoui float %2475 to i32
-  %2477 = uitofp i32 %2476 to float
-  %2478 = uitofp i32 %2471 to float
-  %2479 = fptoui float %45 to i32
-  %2480 = fptoui float %178 to i32
-  %2481 = fptoui float %2477 to i32
-  %2482 = fptoui float %2478 to i32
-  %2483 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2484 = extractvalue %dx.types.CBufRet.i32 %2483, 0
-  %2485 = extractvalue %dx.types.CBufRet.i32 %2483, 1
-  %2486 = extractvalue %dx.types.CBufRet.i32 %2483, 2
-  %2487 = extractvalue %dx.types.CBufRet.i32 %2483, 3
-  %2488 = mul i32 %2484, %2479
-  %2489 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2480, i32 %2485, i32 %2488)  ; IMad(a,b,c)
-  %2490 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2481, i32 %2486, i32 %2489)  ; IMad(a,b,c)
-  %2491 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2482, i32 %2487, i32 %2490)  ; IMad(a,b,c)
-  %2492 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2491, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2493 = extractvalue %dx.types.ResRet.i32 %2492, 0
-  %2494 = icmp ne i32 %2493, 0
-  %2495 = uitofp i1 %2494 to float
-  br label %2584
-
-; <label>:2496                                    ; preds = %2464
-  %2497 = icmp eq i32 %953, 2
-  br i1 %2497, label %2498, label %2584
-
-; <label>:2498                                    ; preds = %2496
-  %2499 = fsub fast float %22, %20
-  %2500 = fcmp fast olt float %1251, %20
-  br i1 %2500, label %2501, label %2514
-
-; <label>:2501                                    ; preds = %2498
-  %2502 = fsub fast float %20, %1251
-  %2503 = fdiv fast float %2502, %2499
-  %2504 = fptoui float %2503 to i32
-  %2505 = uitofp i32 %2504 to float
-  %2506 = fmul fast float %2505, %2499
-  %2507 = fsub fast float %2502, %2506
-  %2508 = and i32 %2504, 1
-  %2509 = icmp eq i32 %2508, 0
-  br i1 %2509, label %2510, label %2512
-
-; <label>:2510                                    ; preds = %2501
-  %2511 = fadd fast float %2507, %20
-  br label %2529
-
-; <label>:2512                                    ; preds = %2501
-  %2513 = fsub fast float %22, %2507
-  br label %2529
-
-; <label>:2514                                    ; preds = %2498
-  %2515 = fcmp fast ogt float %1251, %22
-  br i1 %2515, label %2516, label %2529
-
-; <label>:2516                                    ; preds = %2514
-  %2517 = fsub fast float %1251, %22
-  %2518 = fdiv fast float %2517, %2499
-  %2519 = fptoui float %2518 to i32
-  %2520 = uitofp i32 %2519 to float
-  %2521 = fmul fast float %2520, %2499
-  %2522 = fsub fast float %2517, %2521
-  %2523 = and i32 %2519, 1
-  %2524 = icmp eq i32 %2523, 0
-  br i1 %2524, label %2525, label %2527
-
-; <label>:2525                                    ; preds = %2516
-  %2526 = fsub fast float %22, %2522
-  br label %2529
-
-; <label>:2527                                    ; preds = %2516
-  %2528 = fadd fast float %2522, %20
-  br label %2529
-
-; <label>:2529                                    ; preds = %2527, %2525, %2514, %2512, %2510
-  %2530 = phi float [ %2511, %2510 ], [ %2513, %2512 ], [ %2526, %2525 ], [ %2528, %2527 ], [ %1251, %2514 ]
-  %2531 = fptoui float %2530 to i32
-  %2532 = fsub fast float %24, %20
-  %2533 = fcmp fast olt float %2141, %20
-  br i1 %2533, label %2534, label %2547
-
-; <label>:2534                                    ; preds = %2529
-  %2535 = fsub fast float %20, %2141
-  %2536 = fdiv fast float %2535, %2532
-  %2537 = fptoui float %2536 to i32
-  %2538 = uitofp i32 %2537 to float
-  %2539 = fmul fast float %2538, %2532
-  %2540 = fsub fast float %2535, %2539
-  %2541 = and i32 %2537, 1
-  %2542 = icmp eq i32 %2541, 0
-  br i1 %2542, label %2543, label %2545
-
-; <label>:2543                                    ; preds = %2534
-  %2544 = fadd fast float %2540, %20
-  br label %2562
-
-; <label>:2545                                    ; preds = %2534
-  %2546 = fsub fast float %24, %2540
-  br label %2562
-
-; <label>:2547                                    ; preds = %2529
-  %2548 = fcmp fast ogt float %2141, %24
-  br i1 %2548, label %2549, label %2562
-
-; <label>:2549                                    ; preds = %2547
-  %2550 = fsub fast float %2141, %24
-  %2551 = fdiv fast float %2550, %2532
-  %2552 = fptoui float %2551 to i32
-  %2553 = uitofp i32 %2552 to float
-  %2554 = fmul fast float %2553, %2532
-  %2555 = fsub fast float %2550, %2554
-  %2556 = and i32 %2552, 1
-  %2557 = icmp eq i32 %2556, 0
-  br i1 %2557, label %2558, label %2560
-
-; <label>:2558                                    ; preds = %2549
-  %2559 = fsub fast float %24, %2555
-  br label %2562
-
-; <label>:2560                                    ; preds = %2549
-  %2561 = fadd fast float %2555, %20
-  br label %2562
-
-; <label>:2562                                    ; preds = %2560, %2558, %2547, %2545, %2543
-  %2563 = phi float [ %2544, %2543 ], [ %2546, %2545 ], [ %2559, %2558 ], [ %2561, %2560 ], [ %2141, %2547 ]
-  %2564 = fptoui float %2563 to i32
-  %2565 = uitofp i32 %2564 to float
-  %2566 = uitofp i32 %2531 to float
-  %2567 = fptoui float %45 to i32
-  %2568 = fptoui float %178 to i32
-  %2569 = fptoui float %2565 to i32
-  %2570 = fptoui float %2566 to i32
-  %2571 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2572 = extractvalue %dx.types.CBufRet.i32 %2571, 0
-  %2573 = extractvalue %dx.types.CBufRet.i32 %2571, 1
-  %2574 = extractvalue %dx.types.CBufRet.i32 %2571, 2
-  %2575 = extractvalue %dx.types.CBufRet.i32 %2571, 3
-  %2576 = mul i32 %2572, %2567
-  %2577 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2568, i32 %2573, i32 %2576)  ; IMad(a,b,c)
-  %2578 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2569, i32 %2574, i32 %2577)  ; IMad(a,b,c)
-  %2579 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2570, i32 %2575, i32 %2578)  ; IMad(a,b,c)
-  %2580 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2579, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2581 = extractvalue %dx.types.ResRet.i32 %2580, 0
-  %2582 = icmp ne i32 %2581, 0
-  %2583 = uitofp i1 %2582 to float
-  br label %2584
-
-; <label>:2584                                    ; preds = %2562, %2496, %2466, %2448, %2438
-  %2585 = phi float [ %2463, %2448 ], [ 0.000000e+00, %2438 ], [ %2495, %2466 ], [ %2583, %2562 ], [ 0.000000e+00, %2496 ]
-  br i1 %954, label %2586, label %2612
-
-; <label>:2586                                    ; preds = %2584
-  %2587 = fcmp fast oge float %1400, 0.000000e+00
-  %2588 = fptoui float %1400 to i32
-  %2589 = icmp ult i32 %2588, %13
-  %2590 = and i1 %2587, %2589
-  %2591 = fcmp fast oge float %2141, 0.000000e+00
-  %2592 = and i1 %2591, %2590
-  %2593 = fptoui float %2141 to i32
-  %2594 = icmp ult i32 %2593, %15
-  %2595 = and i1 %2594, %2592
-  br i1 %2595, label %2596, label %2732
-
-; <label>:2596                                    ; preds = %2586
-  %2597 = fptoui float %45 to i32
-  %2598 = fptoui float %178 to i32
-  %2599 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2600 = extractvalue %dx.types.CBufRet.i32 %2599, 0
-  %2601 = extractvalue %dx.types.CBufRet.i32 %2599, 1
-  %2602 = extractvalue %dx.types.CBufRet.i32 %2599, 2
-  %2603 = extractvalue %dx.types.CBufRet.i32 %2599, 3
-  %2604 = mul i32 %2600, %2597
-  %2605 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2598, i32 %2601, i32 %2604)  ; IMad(a,b,c)
-  %2606 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2593, i32 %2602, i32 %2605)  ; IMad(a,b,c)
-  %2607 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2588, i32 %2603, i32 %2606)  ; IMad(a,b,c)
-  %2608 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2607, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2609 = extractvalue %dx.types.ResRet.i32 %2608, 0
-  %2610 = icmp ne i32 %2609, 0
-  %2611 = uitofp i1 %2610 to float
-  br label %2732
-
-; <label>:2612                                    ; preds = %2584
-  %2613 = icmp eq i32 %953, 1
-  br i1 %2613, label %2614, label %2644
-
-; <label>:2614                                    ; preds = %2612
-  %2615 = add i32 %13, -1
-  %2616 = uitofp i32 %2615 to float
-  %2617 = call float @dx.op.binary.f32(i32 35, float %1400, float 0.000000e+00)  ; FMax(a,b)
-  %2618 = call float @dx.op.binary.f32(i32 36, float %2617, float %2616)  ; FMin(a,b)
-  %2619 = fptoui float %2618 to i32
-  %2620 = add i32 %15, -1
-  %2621 = uitofp i32 %2620 to float
-  %2622 = call float @dx.op.binary.f32(i32 35, float %2141, float 0.000000e+00)  ; FMax(a,b)
-  %2623 = call float @dx.op.binary.f32(i32 36, float %2622, float %2621)  ; FMin(a,b)
-  %2624 = fptoui float %2623 to i32
-  %2625 = uitofp i32 %2624 to float
-  %2626 = uitofp i32 %2619 to float
-  %2627 = fptoui float %45 to i32
-  %2628 = fptoui float %178 to i32
-  %2629 = fptoui float %2625 to i32
-  %2630 = fptoui float %2626 to i32
-  %2631 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2632 = extractvalue %dx.types.CBufRet.i32 %2631, 0
-  %2633 = extractvalue %dx.types.CBufRet.i32 %2631, 1
-  %2634 = extractvalue %dx.types.CBufRet.i32 %2631, 2
-  %2635 = extractvalue %dx.types.CBufRet.i32 %2631, 3
-  %2636 = mul i32 %2632, %2627
-  %2637 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2628, i32 %2633, i32 %2636)  ; IMad(a,b,c)
-  %2638 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2629, i32 %2634, i32 %2637)  ; IMad(a,b,c)
-  %2639 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2630, i32 %2635, i32 %2638)  ; IMad(a,b,c)
-  %2640 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2639, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2641 = extractvalue %dx.types.ResRet.i32 %2640, 0
-  %2642 = icmp ne i32 %2641, 0
-  %2643 = uitofp i1 %2642 to float
-  br label %2732
-
-; <label>:2644                                    ; preds = %2612
-  %2645 = icmp eq i32 %953, 2
-  br i1 %2645, label %2646, label %2732
-
-; <label>:2646                                    ; preds = %2644
-  %2647 = fsub fast float %22, %20
-  %2648 = fcmp fast olt float %1400, %20
-  br i1 %2648, label %2649, label %2662
-
-; <label>:2649                                    ; preds = %2646
-  %2650 = fsub fast float %20, %1400
-  %2651 = fdiv fast float %2650, %2647
-  %2652 = fptoui float %2651 to i32
-  %2653 = uitofp i32 %2652 to float
-  %2654 = fmul fast float %2653, %2647
-  %2655 = fsub fast float %2650, %2654
-  %2656 = and i32 %2652, 1
-  %2657 = icmp eq i32 %2656, 0
-  br i1 %2657, label %2658, label %2660
-
-; <label>:2658                                    ; preds = %2649
-  %2659 = fadd fast float %2655, %20
-  br label %2677
-
-; <label>:2660                                    ; preds = %2649
-  %2661 = fsub fast float %22, %2655
-  br label %2677
-
-; <label>:2662                                    ; preds = %2646
-  %2663 = fcmp fast ogt float %1400, %22
-  br i1 %2663, label %2664, label %2677
-
-; <label>:2664                                    ; preds = %2662
-  %2665 = fsub fast float %1400, %22
-  %2666 = fdiv fast float %2665, %2647
-  %2667 = fptoui float %2666 to i32
-  %2668 = uitofp i32 %2667 to float
-  %2669 = fmul fast float %2668, %2647
-  %2670 = fsub fast float %2665, %2669
-  %2671 = and i32 %2667, 1
-  %2672 = icmp eq i32 %2671, 0
-  br i1 %2672, label %2673, label %2675
-
-; <label>:2673                                    ; preds = %2664
-  %2674 = fsub fast float %22, %2670
-  br label %2677
-
-; <label>:2675                                    ; preds = %2664
-  %2676 = fadd fast float %2670, %20
-  br label %2677
-
-; <label>:2677                                    ; preds = %2675, %2673, %2662, %2660, %2658
-  %2678 = phi float [ %2659, %2658 ], [ %2661, %2660 ], [ %2674, %2673 ], [ %2676, %2675 ], [ %1400, %2662 ]
-  %2679 = fptoui float %2678 to i32
-  %2680 = fsub fast float %24, %20
-  %2681 = fcmp fast olt float %2141, %20
-  br i1 %2681, label %2682, label %2695
-
-; <label>:2682                                    ; preds = %2677
-  %2683 = fsub fast float %20, %2141
-  %2684 = fdiv fast float %2683, %2680
-  %2685 = fptoui float %2684 to i32
-  %2686 = uitofp i32 %2685 to float
-  %2687 = fmul fast float %2686, %2680
-  %2688 = fsub fast float %2683, %2687
-  %2689 = and i32 %2685, 1
-  %2690 = icmp eq i32 %2689, 0
-  br i1 %2690, label %2691, label %2693
-
-; <label>:2691                                    ; preds = %2682
-  %2692 = fadd fast float %2688, %20
-  br label %2710
-
-; <label>:2693                                    ; preds = %2682
-  %2694 = fsub fast float %24, %2688
-  br label %2710
-
-; <label>:2695                                    ; preds = %2677
-  %2696 = fcmp fast ogt float %2141, %24
-  br i1 %2696, label %2697, label %2710
-
-; <label>:2697                                    ; preds = %2695
-  %2698 = fsub fast float %2141, %24
-  %2699 = fdiv fast float %2698, %2680
-  %2700 = fptoui float %2699 to i32
-  %2701 = uitofp i32 %2700 to float
-  %2702 = fmul fast float %2701, %2680
-  %2703 = fsub fast float %2698, %2702
-  %2704 = and i32 %2700, 1
-  %2705 = icmp eq i32 %2704, 0
-  br i1 %2705, label %2706, label %2708
-
-; <label>:2706                                    ; preds = %2697
-  %2707 = fsub fast float %24, %2703
-  br label %2710
-
-; <label>:2708                                    ; preds = %2697
-  %2709 = fadd fast float %2703, %20
-  br label %2710
-
-; <label>:2710                                    ; preds = %2708, %2706, %2695, %2693, %2691
-  %2711 = phi float [ %2692, %2691 ], [ %2694, %2693 ], [ %2707, %2706 ], [ %2709, %2708 ], [ %2141, %2695 ]
-  %2712 = fptoui float %2711 to i32
-  %2713 = uitofp i32 %2712 to float
-  %2714 = uitofp i32 %2679 to float
-  %2715 = fptoui float %45 to i32
-  %2716 = fptoui float %178 to i32
-  %2717 = fptoui float %2713 to i32
-  %2718 = fptoui float %2714 to i32
-  %2719 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2720 = extractvalue %dx.types.CBufRet.i32 %2719, 0
-  %2721 = extractvalue %dx.types.CBufRet.i32 %2719, 1
-  %2722 = extractvalue %dx.types.CBufRet.i32 %2719, 2
-  %2723 = extractvalue %dx.types.CBufRet.i32 %2719, 3
-  %2724 = mul i32 %2720, %2715
-  %2725 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2716, i32 %2721, i32 %2724)  ; IMad(a,b,c)
-  %2726 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2717, i32 %2722, i32 %2725)  ; IMad(a,b,c)
-  %2727 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2718, i32 %2723, i32 %2726)  ; IMad(a,b,c)
-  %2728 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2727, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2729 = extractvalue %dx.types.ResRet.i32 %2728, 0
-  %2730 = icmp ne i32 %2729, 0
-  %2731 = uitofp i1 %2730 to float
-  br label %2732
-
-; <label>:2732                                    ; preds = %2710, %2644, %2614, %2596, %2586
-  %2733 = phi float [ %2611, %2596 ], [ 0.000000e+00, %2586 ], [ %2643, %2614 ], [ %2731, %2710 ], [ 0.000000e+00, %2644 ]
-  %2734 = fadd fast float %951, 2.000000e+00
-  br i1 %954, label %2735, label %2761
-
-; <label>:2735                                    ; preds = %2732
-  %2736 = fcmp fast oge float %950, 0.000000e+00
-  %2737 = fptoui float %950 to i32
-  %2738 = icmp ult i32 %2737, %13
-  %2739 = and i1 %2736, %2738
-  %2740 = fcmp fast oge float %2734, 0.000000e+00
-  %2741 = and i1 %2740, %2739
-  %2742 = fptoui float %2734 to i32
-  %2743 = icmp ult i32 %2742, %15
-  %2744 = and i1 %2743, %2741
-  br i1 %2744, label %2745, label %2881
-
-; <label>:2745                                    ; preds = %2735
-  %2746 = fptoui float %45 to i32
-  %2747 = fptoui float %178 to i32
-  %2748 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2749 = extractvalue %dx.types.CBufRet.i32 %2748, 0
-  %2750 = extractvalue %dx.types.CBufRet.i32 %2748, 1
-  %2751 = extractvalue %dx.types.CBufRet.i32 %2748, 2
-  %2752 = extractvalue %dx.types.CBufRet.i32 %2748, 3
-  %2753 = mul i32 %2749, %2746
-  %2754 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2747, i32 %2750, i32 %2753)  ; IMad(a,b,c)
-  %2755 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2742, i32 %2751, i32 %2754)  ; IMad(a,b,c)
-  %2756 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2737, i32 %2752, i32 %2755)  ; IMad(a,b,c)
-  %2757 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2756, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2758 = extractvalue %dx.types.ResRet.i32 %2757, 0
-  %2759 = icmp ne i32 %2758, 0
-  %2760 = uitofp i1 %2759 to float
-  br label %2881
-
-; <label>:2761                                    ; preds = %2732
-  %2762 = icmp eq i32 %953, 1
-  br i1 %2762, label %2763, label %2793
-
-; <label>:2763                                    ; preds = %2761
-  %2764 = add i32 %13, -1
-  %2765 = uitofp i32 %2764 to float
-  %2766 = call float @dx.op.binary.f32(i32 35, float %950, float 0.000000e+00)  ; FMax(a,b)
-  %2767 = call float @dx.op.binary.f32(i32 36, float %2766, float %2765)  ; FMin(a,b)
-  %2768 = fptoui float %2767 to i32
-  %2769 = add i32 %15, -1
-  %2770 = uitofp i32 %2769 to float
-  %2771 = call float @dx.op.binary.f32(i32 35, float %2734, float 0.000000e+00)  ; FMax(a,b)
-  %2772 = call float @dx.op.binary.f32(i32 36, float %2771, float %2770)  ; FMin(a,b)
-  %2773 = fptoui float %2772 to i32
-  %2774 = uitofp i32 %2773 to float
-  %2775 = uitofp i32 %2768 to float
-  %2776 = fptoui float %45 to i32
-  %2777 = fptoui float %178 to i32
-  %2778 = fptoui float %2774 to i32
-  %2779 = fptoui float %2775 to i32
-  %2780 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2781 = extractvalue %dx.types.CBufRet.i32 %2780, 0
-  %2782 = extractvalue %dx.types.CBufRet.i32 %2780, 1
-  %2783 = extractvalue %dx.types.CBufRet.i32 %2780, 2
-  %2784 = extractvalue %dx.types.CBufRet.i32 %2780, 3
-  %2785 = mul i32 %2781, %2776
-  %2786 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2777, i32 %2782, i32 %2785)  ; IMad(a,b,c)
-  %2787 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2778, i32 %2783, i32 %2786)  ; IMad(a,b,c)
-  %2788 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2779, i32 %2784, i32 %2787)  ; IMad(a,b,c)
-  %2789 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2788, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2790 = extractvalue %dx.types.ResRet.i32 %2789, 0
-  %2791 = icmp ne i32 %2790, 0
-  %2792 = uitofp i1 %2791 to float
-  br label %2881
-
-; <label>:2793                                    ; preds = %2761
-  %2794 = icmp eq i32 %953, 2
-  br i1 %2794, label %2795, label %2881
-
-; <label>:2795                                    ; preds = %2793
-  %2796 = fsub fast float %22, %20
-  %2797 = fcmp fast olt float %950, %20
-  br i1 %2797, label %2798, label %2811
-
-; <label>:2798                                    ; preds = %2795
-  %2799 = fsub fast float %20, %950
-  %2800 = fdiv fast float %2799, %2796
-  %2801 = fptoui float %2800 to i32
-  %2802 = uitofp i32 %2801 to float
-  %2803 = fmul fast float %2802, %2796
-  %2804 = fsub fast float %2799, %2803
-  %2805 = and i32 %2801, 1
-  %2806 = icmp eq i32 %2805, 0
-  br i1 %2806, label %2807, label %2809
-
-; <label>:2807                                    ; preds = %2798
-  %2808 = fadd fast float %2804, %20
-  br label %2826
-
-; <label>:2809                                    ; preds = %2798
-  %2810 = fsub fast float %22, %2804
-  br label %2826
-
-; <label>:2811                                    ; preds = %2795
-  %2812 = fcmp fast ogt float %950, %22
-  br i1 %2812, label %2813, label %2826
-
-; <label>:2813                                    ; preds = %2811
-  %2814 = fsub fast float %950, %22
-  %2815 = fdiv fast float %2814, %2796
-  %2816 = fptoui float %2815 to i32
-  %2817 = uitofp i32 %2816 to float
-  %2818 = fmul fast float %2817, %2796
-  %2819 = fsub fast float %2814, %2818
-  %2820 = and i32 %2816, 1
-  %2821 = icmp eq i32 %2820, 0
-  br i1 %2821, label %2822, label %2824
-
-; <label>:2822                                    ; preds = %2813
-  %2823 = fsub fast float %22, %2819
-  br label %2826
-
-; <label>:2824                                    ; preds = %2813
-  %2825 = fadd fast float %2819, %20
-  br label %2826
-
-; <label>:2826                                    ; preds = %2824, %2822, %2811, %2809, %2807
-  %2827 = phi float [ %2808, %2807 ], [ %2810, %2809 ], [ %2823, %2822 ], [ %2825, %2824 ], [ %950, %2811 ]
-  %2828 = fptoui float %2827 to i32
-  %2829 = fsub fast float %24, %20
-  %2830 = fcmp fast olt float %2734, %20
-  br i1 %2830, label %2831, label %2844
-
-; <label>:2831                                    ; preds = %2826
-  %2832 = fsub fast float %20, %2734
-  %2833 = fdiv fast float %2832, %2829
-  %2834 = fptoui float %2833 to i32
-  %2835 = uitofp i32 %2834 to float
-  %2836 = fmul fast float %2835, %2829
-  %2837 = fsub fast float %2832, %2836
-  %2838 = and i32 %2834, 1
-  %2839 = icmp eq i32 %2838, 0
-  br i1 %2839, label %2840, label %2842
-
-; <label>:2840                                    ; preds = %2831
-  %2841 = fadd fast float %2837, %20
-  br label %2859
-
-; <label>:2842                                    ; preds = %2831
-  %2843 = fsub fast float %24, %2837
-  br label %2859
-
-; <label>:2844                                    ; preds = %2826
-  %2845 = fcmp fast ogt float %2734, %24
-  br i1 %2845, label %2846, label %2859
-
-; <label>:2846                                    ; preds = %2844
-  %2847 = fsub fast float %2734, %24
-  %2848 = fdiv fast float %2847, %2829
-  %2849 = fptoui float %2848 to i32
-  %2850 = uitofp i32 %2849 to float
-  %2851 = fmul fast float %2850, %2829
-  %2852 = fsub fast float %2847, %2851
-  %2853 = and i32 %2849, 1
-  %2854 = icmp eq i32 %2853, 0
-  br i1 %2854, label %2855, label %2857
-
-; <label>:2855                                    ; preds = %2846
-  %2856 = fsub fast float %24, %2852
-  br label %2859
-
-; <label>:2857                                    ; preds = %2846
-  %2858 = fadd fast float %2852, %20
-  br label %2859
-
-; <label>:2859                                    ; preds = %2857, %2855, %2844, %2842, %2840
-  %2860 = phi float [ %2841, %2840 ], [ %2843, %2842 ], [ %2856, %2855 ], [ %2858, %2857 ], [ %2734, %2844 ]
-  %2861 = fptoui float %2860 to i32
-  %2862 = uitofp i32 %2861 to float
-  %2863 = uitofp i32 %2828 to float
-  %2864 = fptoui float %45 to i32
-  %2865 = fptoui float %178 to i32
-  %2866 = fptoui float %2862 to i32
-  %2867 = fptoui float %2863 to i32
-  %2868 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2869 = extractvalue %dx.types.CBufRet.i32 %2868, 0
-  %2870 = extractvalue %dx.types.CBufRet.i32 %2868, 1
-  %2871 = extractvalue %dx.types.CBufRet.i32 %2868, 2
-  %2872 = extractvalue %dx.types.CBufRet.i32 %2868, 3
-  %2873 = mul i32 %2869, %2864
-  %2874 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2865, i32 %2870, i32 %2873)  ; IMad(a,b,c)
-  %2875 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2866, i32 %2871, i32 %2874)  ; IMad(a,b,c)
-  %2876 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2867, i32 %2872, i32 %2875)  ; IMad(a,b,c)
-  %2877 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2876, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2878 = extractvalue %dx.types.ResRet.i32 %2877, 0
-  %2879 = icmp ne i32 %2878, 0
-  %2880 = uitofp i1 %2879 to float
-  br label %2881
-
-; <label>:2881                                    ; preds = %2859, %2793, %2763, %2745, %2735
-  %2882 = phi float [ %2760, %2745 ], [ 0.000000e+00, %2735 ], [ %2792, %2763 ], [ %2880, %2859 ], [ 0.000000e+00, %2793 ]
-  br i1 %954, label %2883, label %2909
-
-; <label>:2883                                    ; preds = %2881
-  %2884 = fcmp fast oge float %949, 0.000000e+00
-  %2885 = fptoui float %949 to i32
-  %2886 = icmp ult i32 %2885, %13
-  %2887 = and i1 %2884, %2886
-  %2888 = fcmp fast oge float %2734, 0.000000e+00
-  %2889 = and i1 %2888, %2887
-  %2890 = fptoui float %2734 to i32
-  %2891 = icmp ult i32 %2890, %15
-  %2892 = and i1 %2891, %2889
-  br i1 %2892, label %2893, label %3029
-
-; <label>:2893                                    ; preds = %2883
-  %2894 = fptoui float %45 to i32
-  %2895 = fptoui float %178 to i32
-  %2896 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2897 = extractvalue %dx.types.CBufRet.i32 %2896, 0
-  %2898 = extractvalue %dx.types.CBufRet.i32 %2896, 1
-  %2899 = extractvalue %dx.types.CBufRet.i32 %2896, 2
-  %2900 = extractvalue %dx.types.CBufRet.i32 %2896, 3
-  %2901 = mul i32 %2897, %2894
-  %2902 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2895, i32 %2898, i32 %2901)  ; IMad(a,b,c)
-  %2903 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2890, i32 %2899, i32 %2902)  ; IMad(a,b,c)
-  %2904 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2885, i32 %2900, i32 %2903)  ; IMad(a,b,c)
-  %2905 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2904, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2906 = extractvalue %dx.types.ResRet.i32 %2905, 0
-  %2907 = icmp ne i32 %2906, 0
-  %2908 = uitofp i1 %2907 to float
-  br label %3029
-
-; <label>:2909                                    ; preds = %2881
-  %2910 = icmp eq i32 %953, 1
-  br i1 %2910, label %2911, label %2941
-
-; <label>:2911                                    ; preds = %2909
-  %2912 = add i32 %13, -1
-  %2913 = uitofp i32 %2912 to float
-  %2914 = call float @dx.op.binary.f32(i32 35, float %949, float 0.000000e+00)  ; FMax(a,b)
-  %2915 = call float @dx.op.binary.f32(i32 36, float %2914, float %2913)  ; FMin(a,b)
-  %2916 = fptoui float %2915 to i32
-  %2917 = add i32 %15, -1
-  %2918 = uitofp i32 %2917 to float
-  %2919 = call float @dx.op.binary.f32(i32 35, float %2734, float 0.000000e+00)  ; FMax(a,b)
-  %2920 = call float @dx.op.binary.f32(i32 36, float %2919, float %2918)  ; FMin(a,b)
-  %2921 = fptoui float %2920 to i32
-  %2922 = uitofp i32 %2921 to float
-  %2923 = uitofp i32 %2916 to float
-  %2924 = fptoui float %45 to i32
-  %2925 = fptoui float %178 to i32
-  %2926 = fptoui float %2922 to i32
-  %2927 = fptoui float %2923 to i32
-  %2928 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2929 = extractvalue %dx.types.CBufRet.i32 %2928, 0
-  %2930 = extractvalue %dx.types.CBufRet.i32 %2928, 1
-  %2931 = extractvalue %dx.types.CBufRet.i32 %2928, 2
-  %2932 = extractvalue %dx.types.CBufRet.i32 %2928, 3
-  %2933 = mul i32 %2929, %2924
-  %2934 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2925, i32 %2930, i32 %2933)  ; IMad(a,b,c)
-  %2935 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2926, i32 %2931, i32 %2934)  ; IMad(a,b,c)
-  %2936 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2927, i32 %2932, i32 %2935)  ; IMad(a,b,c)
-  %2937 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2936, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2938 = extractvalue %dx.types.ResRet.i32 %2937, 0
-  %2939 = icmp ne i32 %2938, 0
-  %2940 = uitofp i1 %2939 to float
-  br label %3029
-
-; <label>:2941                                    ; preds = %2909
-  %2942 = icmp eq i32 %953, 2
-  br i1 %2942, label %2943, label %3029
-
-; <label>:2943                                    ; preds = %2941
-  %2944 = fsub fast float %22, %20
-  %2945 = fcmp fast olt float %949, %20
-  br i1 %2945, label %2946, label %2959
-
-; <label>:2946                                    ; preds = %2943
-  %2947 = fsub fast float %20, %949
-  %2948 = fdiv fast float %2947, %2944
-  %2949 = fptoui float %2948 to i32
-  %2950 = uitofp i32 %2949 to float
-  %2951 = fmul fast float %2950, %2944
-  %2952 = fsub fast float %2947, %2951
-  %2953 = and i32 %2949, 1
-  %2954 = icmp eq i32 %2953, 0
-  br i1 %2954, label %2955, label %2957
-
-; <label>:2955                                    ; preds = %2946
-  %2956 = fadd fast float %2952, %20
-  br label %2974
-
-; <label>:2957                                    ; preds = %2946
-  %2958 = fsub fast float %22, %2952
-  br label %2974
-
-; <label>:2959                                    ; preds = %2943
-  %2960 = fcmp fast ogt float %949, %22
-  br i1 %2960, label %2961, label %2974
-
-; <label>:2961                                    ; preds = %2959
-  %2962 = fsub fast float %949, %22
-  %2963 = fdiv fast float %2962, %2944
-  %2964 = fptoui float %2963 to i32
-  %2965 = uitofp i32 %2964 to float
-  %2966 = fmul fast float %2965, %2944
-  %2967 = fsub fast float %2962, %2966
-  %2968 = and i32 %2964, 1
-  %2969 = icmp eq i32 %2968, 0
-  br i1 %2969, label %2970, label %2972
-
-; <label>:2970                                    ; preds = %2961
-  %2971 = fsub fast float %22, %2967
-  br label %2974
-
-; <label>:2972                                    ; preds = %2961
-  %2973 = fadd fast float %2967, %20
-  br label %2974
-
-; <label>:2974                                    ; preds = %2972, %2970, %2959, %2957, %2955
-  %2975 = phi float [ %2956, %2955 ], [ %2958, %2957 ], [ %2971, %2970 ], [ %2973, %2972 ], [ %949, %2959 ]
-  %2976 = fptoui float %2975 to i32
-  %2977 = fsub fast float %24, %20
-  %2978 = fcmp fast olt float %2734, %20
-  br i1 %2978, label %2979, label %2992
-
-; <label>:2979                                    ; preds = %2974
-  %2980 = fsub fast float %20, %2734
-  %2981 = fdiv fast float %2980, %2977
-  %2982 = fptoui float %2981 to i32
-  %2983 = uitofp i32 %2982 to float
-  %2984 = fmul fast float %2983, %2977
-  %2985 = fsub fast float %2980, %2984
-  %2986 = and i32 %2982, 1
-  %2987 = icmp eq i32 %2986, 0
-  br i1 %2987, label %2988, label %2990
-
-; <label>:2988                                    ; preds = %2979
-  %2989 = fadd fast float %2985, %20
-  br label %3007
-
-; <label>:2990                                    ; preds = %2979
-  %2991 = fsub fast float %24, %2985
-  br label %3007
-
-; <label>:2992                                    ; preds = %2974
-  %2993 = fcmp fast ogt float %2734, %24
-  br i1 %2993, label %2994, label %3007
-
-; <label>:2994                                    ; preds = %2992
-  %2995 = fsub fast float %2734, %24
-  %2996 = fdiv fast float %2995, %2977
-  %2997 = fptoui float %2996 to i32
-  %2998 = uitofp i32 %2997 to float
-  %2999 = fmul fast float %2998, %2977
-  %3000 = fsub fast float %2995, %2999
-  %3001 = and i32 %2997, 1
-  %3002 = icmp eq i32 %3001, 0
-  br i1 %3002, label %3003, label %3005
-
-; <label>:3003                                    ; preds = %2994
-  %3004 = fsub fast float %24, %3000
-  br label %3007
-
-; <label>:3005                                    ; preds = %2994
-  %3006 = fadd fast float %3000, %20
-  br label %3007
-
-; <label>:3007                                    ; preds = %3005, %3003, %2992, %2990, %2988
-  %3008 = phi float [ %2989, %2988 ], [ %2991, %2990 ], [ %3004, %3003 ], [ %3006, %3005 ], [ %2734, %2992 ]
-  %3009 = fptoui float %3008 to i32
-  %3010 = uitofp i32 %3009 to float
-  %3011 = uitofp i32 %2976 to float
-  %3012 = fptoui float %45 to i32
-  %3013 = fptoui float %178 to i32
-  %3014 = fptoui float %3010 to i32
-  %3015 = fptoui float %3011 to i32
-  %3016 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3017 = extractvalue %dx.types.CBufRet.i32 %3016, 0
-  %3018 = extractvalue %dx.types.CBufRet.i32 %3016, 1
-  %3019 = extractvalue %dx.types.CBufRet.i32 %3016, 2
-  %3020 = extractvalue %dx.types.CBufRet.i32 %3016, 3
-  %3021 = mul i32 %3017, %3012
-  %3022 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3013, i32 %3018, i32 %3021)  ; IMad(a,b,c)
-  %3023 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3014, i32 %3019, i32 %3022)  ; IMad(a,b,c)
-  %3024 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3015, i32 %3020, i32 %3023)  ; IMad(a,b,c)
-  %3025 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3024, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3026 = extractvalue %dx.types.ResRet.i32 %3025, 0
-  %3027 = icmp ne i32 %3026, 0
-  %3028 = uitofp i1 %3027 to float
-  br label %3029
-
-; <label>:3029                                    ; preds = %3007, %2941, %2911, %2893, %2883
-  %3030 = phi float [ %2908, %2893 ], [ 0.000000e+00, %2883 ], [ %2940, %2911 ], [ %3028, %3007 ], [ 0.000000e+00, %2941 ]
-  br i1 %954, label %3031, label %3057
-
-; <label>:3031                                    ; preds = %3029
-  %3032 = fcmp fast oge float %1251, 0.000000e+00
-  %3033 = fptoui float %1251 to i32
-  %3034 = icmp ult i32 %3033, %13
-  %3035 = and i1 %3032, %3034
-  %3036 = fcmp fast oge float %2734, 0.000000e+00
-  %3037 = and i1 %3036, %3035
-  %3038 = fptoui float %2734 to i32
-  %3039 = icmp ult i32 %3038, %15
-  %3040 = and i1 %3039, %3037
-  br i1 %3040, label %3041, label %3177
-
-; <label>:3041                                    ; preds = %3031
-  %3042 = fptoui float %45 to i32
-  %3043 = fptoui float %178 to i32
-  %3044 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3045 = extractvalue %dx.types.CBufRet.i32 %3044, 0
-  %3046 = extractvalue %dx.types.CBufRet.i32 %3044, 1
-  %3047 = extractvalue %dx.types.CBufRet.i32 %3044, 2
-  %3048 = extractvalue %dx.types.CBufRet.i32 %3044, 3
-  %3049 = mul i32 %3045, %3042
-  %3050 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3043, i32 %3046, i32 %3049)  ; IMad(a,b,c)
-  %3051 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3038, i32 %3047, i32 %3050)  ; IMad(a,b,c)
-  %3052 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3033, i32 %3048, i32 %3051)  ; IMad(a,b,c)
-  %3053 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3052, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3054 = extractvalue %dx.types.ResRet.i32 %3053, 0
-  %3055 = icmp ne i32 %3054, 0
-  %3056 = uitofp i1 %3055 to float
-  br label %3177
-
-; <label>:3057                                    ; preds = %3029
-  %3058 = icmp eq i32 %953, 1
-  br i1 %3058, label %3059, label %3089
-
-; <label>:3059                                    ; preds = %3057
-  %3060 = add i32 %13, -1
-  %3061 = uitofp i32 %3060 to float
-  %3062 = call float @dx.op.binary.f32(i32 35, float %1251, float 0.000000e+00)  ; FMax(a,b)
-  %3063 = call float @dx.op.binary.f32(i32 36, float %3062, float %3061)  ; FMin(a,b)
-  %3064 = fptoui float %3063 to i32
-  %3065 = add i32 %15, -1
-  %3066 = uitofp i32 %3065 to float
-  %3067 = call float @dx.op.binary.f32(i32 35, float %2734, float 0.000000e+00)  ; FMax(a,b)
-  %3068 = call float @dx.op.binary.f32(i32 36, float %3067, float %3066)  ; FMin(a,b)
-  %3069 = fptoui float %3068 to i32
-  %3070 = uitofp i32 %3069 to float
-  %3071 = uitofp i32 %3064 to float
-  %3072 = fptoui float %45 to i32
-  %3073 = fptoui float %178 to i32
-  %3074 = fptoui float %3070 to i32
-  %3075 = fptoui float %3071 to i32
-  %3076 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3077 = extractvalue %dx.types.CBufRet.i32 %3076, 0
-  %3078 = extractvalue %dx.types.CBufRet.i32 %3076, 1
-  %3079 = extractvalue %dx.types.CBufRet.i32 %3076, 2
-  %3080 = extractvalue %dx.types.CBufRet.i32 %3076, 3
-  %3081 = mul i32 %3077, %3072
-  %3082 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3073, i32 %3078, i32 %3081)  ; IMad(a,b,c)
-  %3083 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3074, i32 %3079, i32 %3082)  ; IMad(a,b,c)
-  %3084 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3075, i32 %3080, i32 %3083)  ; IMad(a,b,c)
-  %3085 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3084, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3086 = extractvalue %dx.types.ResRet.i32 %3085, 0
-  %3087 = icmp ne i32 %3086, 0
-  %3088 = uitofp i1 %3087 to float
-  br label %3177
-
-; <label>:3089                                    ; preds = %3057
-  %3090 = icmp eq i32 %953, 2
-  br i1 %3090, label %3091, label %3177
-
-; <label>:3091                                    ; preds = %3089
-  %3092 = fsub fast float %22, %20
-  %3093 = fcmp fast olt float %1251, %20
-  br i1 %3093, label %3094, label %3107
-
-; <label>:3094                                    ; preds = %3091
-  %3095 = fsub fast float %20, %1251
-  %3096 = fdiv fast float %3095, %3092
-  %3097 = fptoui float %3096 to i32
-  %3098 = uitofp i32 %3097 to float
-  %3099 = fmul fast float %3098, %3092
-  %3100 = fsub fast float %3095, %3099
-  %3101 = and i32 %3097, 1
-  %3102 = icmp eq i32 %3101, 0
-  br i1 %3102, label %3103, label %3105
-
-; <label>:3103                                    ; preds = %3094
-  %3104 = fadd fast float %3100, %20
-  br label %3122
-
-; <label>:3105                                    ; preds = %3094
-  %3106 = fsub fast float %22, %3100
-  br label %3122
-
-; <label>:3107                                    ; preds = %3091
-  %3108 = fcmp fast ogt float %1251, %22
-  br i1 %3108, label %3109, label %3122
-
-; <label>:3109                                    ; preds = %3107
-  %3110 = fsub fast float %1251, %22
-  %3111 = fdiv fast float %3110, %3092
-  %3112 = fptoui float %3111 to i32
-  %3113 = uitofp i32 %3112 to float
-  %3114 = fmul fast float %3113, %3092
-  %3115 = fsub fast float %3110, %3114
-  %3116 = and i32 %3112, 1
-  %3117 = icmp eq i32 %3116, 0
-  br i1 %3117, label %3118, label %3120
-
-; <label>:3118                                    ; preds = %3109
-  %3119 = fsub fast float %22, %3115
-  br label %3122
-
-; <label>:3120                                    ; preds = %3109
-  %3121 = fadd fast float %3115, %20
-  br label %3122
-
-; <label>:3122                                    ; preds = %3120, %3118, %3107, %3105, %3103
-  %3123 = phi float [ %3104, %3103 ], [ %3106, %3105 ], [ %3119, %3118 ], [ %3121, %3120 ], [ %1251, %3107 ]
-  %3124 = fptoui float %3123 to i32
-  %3125 = fsub fast float %24, %20
-  %3126 = fcmp fast olt float %2734, %20
-  br i1 %3126, label %3127, label %3140
-
-; <label>:3127                                    ; preds = %3122
-  %3128 = fsub fast float %20, %2734
-  %3129 = fdiv fast float %3128, %3125
-  %3130 = fptoui float %3129 to i32
-  %3131 = uitofp i32 %3130 to float
-  %3132 = fmul fast float %3131, %3125
-  %3133 = fsub fast float %3128, %3132
-  %3134 = and i32 %3130, 1
-  %3135 = icmp eq i32 %3134, 0
-  br i1 %3135, label %3136, label %3138
-
-; <label>:3136                                    ; preds = %3127
-  %3137 = fadd fast float %3133, %20
-  br label %3155
-
-; <label>:3138                                    ; preds = %3127
-  %3139 = fsub fast float %24, %3133
-  br label %3155
-
-; <label>:3140                                    ; preds = %3122
-  %3141 = fcmp fast ogt float %2734, %24
-  br i1 %3141, label %3142, label %3155
-
-; <label>:3142                                    ; preds = %3140
-  %3143 = fsub fast float %2734, %24
-  %3144 = fdiv fast float %3143, %3125
-  %3145 = fptoui float %3144 to i32
-  %3146 = uitofp i32 %3145 to float
-  %3147 = fmul fast float %3146, %3125
-  %3148 = fsub fast float %3143, %3147
-  %3149 = and i32 %3145, 1
-  %3150 = icmp eq i32 %3149, 0
-  br i1 %3150, label %3151, label %3153
-
-; <label>:3151                                    ; preds = %3142
-  %3152 = fsub fast float %24, %3148
-  br label %3155
-
-; <label>:3153                                    ; preds = %3142
-  %3154 = fadd fast float %3148, %20
-  br label %3155
-
-; <label>:3155                                    ; preds = %3153, %3151, %3140, %3138, %3136
-  %3156 = phi float [ %3137, %3136 ], [ %3139, %3138 ], [ %3152, %3151 ], [ %3154, %3153 ], [ %2734, %3140 ]
-  %3157 = fptoui float %3156 to i32
-  %3158 = uitofp i32 %3157 to float
-  %3159 = uitofp i32 %3124 to float
-  %3160 = fptoui float %45 to i32
-  %3161 = fptoui float %178 to i32
-  %3162 = fptoui float %3158 to i32
-  %3163 = fptoui float %3159 to i32
-  %3164 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3165 = extractvalue %dx.types.CBufRet.i32 %3164, 0
-  %3166 = extractvalue %dx.types.CBufRet.i32 %3164, 1
-  %3167 = extractvalue %dx.types.CBufRet.i32 %3164, 2
-  %3168 = extractvalue %dx.types.CBufRet.i32 %3164, 3
-  %3169 = mul i32 %3165, %3160
-  %3170 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3161, i32 %3166, i32 %3169)  ; IMad(a,b,c)
-  %3171 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3162, i32 %3167, i32 %3170)  ; IMad(a,b,c)
-  %3172 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3163, i32 %3168, i32 %3171)  ; IMad(a,b,c)
-  %3173 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3172, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3174 = extractvalue %dx.types.ResRet.i32 %3173, 0
-  %3175 = icmp ne i32 %3174, 0
-  %3176 = uitofp i1 %3175 to float
-  br label %3177
-
-; <label>:3177                                    ; preds = %3155, %3089, %3059, %3041, %3031
-  %3178 = phi float [ %3056, %3041 ], [ 0.000000e+00, %3031 ], [ %3088, %3059 ], [ %3176, %3155 ], [ 0.000000e+00, %3089 ]
-  br i1 %954, label %3179, label %3205
-
-; <label>:3179                                    ; preds = %3177
-  %3180 = fcmp fast oge float %1400, 0.000000e+00
-  %3181 = fptoui float %1400 to i32
-  %3182 = icmp ult i32 %3181, %13
-  %3183 = and i1 %3180, %3182
-  %3184 = fcmp fast oge float %2734, 0.000000e+00
-  %3185 = and i1 %3184, %3183
-  %3186 = fptoui float %2734 to i32
-  %3187 = icmp ult i32 %3186, %15
-  %3188 = and i1 %3187, %3185
-  br i1 %3188, label %3189, label %3325
-
-; <label>:3189                                    ; preds = %3179
-  %3190 = fptoui float %45 to i32
-  %3191 = fptoui float %178 to i32
-  %3192 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3193 = extractvalue %dx.types.CBufRet.i32 %3192, 0
-  %3194 = extractvalue %dx.types.CBufRet.i32 %3192, 1
-  %3195 = extractvalue %dx.types.CBufRet.i32 %3192, 2
-  %3196 = extractvalue %dx.types.CBufRet.i32 %3192, 3
-  %3197 = mul i32 %3193, %3190
-  %3198 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3191, i32 %3194, i32 %3197)  ; IMad(a,b,c)
-  %3199 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3186, i32 %3195, i32 %3198)  ; IMad(a,b,c)
-  %3200 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3181, i32 %3196, i32 %3199)  ; IMad(a,b,c)
-  %3201 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3200, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3202 = extractvalue %dx.types.ResRet.i32 %3201, 0
-  %3203 = icmp ne i32 %3202, 0
-  %3204 = uitofp i1 %3203 to float
-  br label %3325
-
-; <label>:3205                                    ; preds = %3177
-  %3206 = icmp eq i32 %953, 1
-  br i1 %3206, label %3207, label %3237
-
-; <label>:3207                                    ; preds = %3205
-  %3208 = add i32 %13, -1
-  %3209 = uitofp i32 %3208 to float
-  %3210 = call float @dx.op.binary.f32(i32 35, float %1400, float 0.000000e+00)  ; FMax(a,b)
-  %3211 = call float @dx.op.binary.f32(i32 36, float %3210, float %3209)  ; FMin(a,b)
-  %3212 = fptoui float %3211 to i32
-  %3213 = add i32 %15, -1
-  %3214 = uitofp i32 %3213 to float
-  %3215 = call float @dx.op.binary.f32(i32 35, float %2734, float 0.000000e+00)  ; FMax(a,b)
-  %3216 = call float @dx.op.binary.f32(i32 36, float %3215, float %3214)  ; FMin(a,b)
-  %3217 = fptoui float %3216 to i32
-  %3218 = uitofp i32 %3217 to float
-  %3219 = uitofp i32 %3212 to float
-  %3220 = fptoui float %45 to i32
-  %3221 = fptoui float %178 to i32
-  %3222 = fptoui float %3218 to i32
-  %3223 = fptoui float %3219 to i32
-  %3224 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3225 = extractvalue %dx.types.CBufRet.i32 %3224, 0
-  %3226 = extractvalue %dx.types.CBufRet.i32 %3224, 1
-  %3227 = extractvalue %dx.types.CBufRet.i32 %3224, 2
-  %3228 = extractvalue %dx.types.CBufRet.i32 %3224, 3
-  %3229 = mul i32 %3225, %3220
-  %3230 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3221, i32 %3226, i32 %3229)  ; IMad(a,b,c)
-  %3231 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3222, i32 %3227, i32 %3230)  ; IMad(a,b,c)
-  %3232 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3223, i32 %3228, i32 %3231)  ; IMad(a,b,c)
-  %3233 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3232, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3234 = extractvalue %dx.types.ResRet.i32 %3233, 0
-  %3235 = icmp ne i32 %3234, 0
-  %3236 = uitofp i1 %3235 to float
-  br label %3325
-
-; <label>:3237                                    ; preds = %3205
-  %3238 = icmp eq i32 %953, 2
-  br i1 %3238, label %3239, label %3325
-
-; <label>:3239                                    ; preds = %3237
-  %3240 = fsub fast float %22, %20
-  %3241 = fcmp fast olt float %1400, %20
-  br i1 %3241, label %3242, label %3255
-
-; <label>:3242                                    ; preds = %3239
-  %3243 = fsub fast float %20, %1400
-  %3244 = fdiv fast float %3243, %3240
-  %3245 = fptoui float %3244 to i32
-  %3246 = uitofp i32 %3245 to float
-  %3247 = fmul fast float %3246, %3240
-  %3248 = fsub fast float %3243, %3247
-  %3249 = and i32 %3245, 1
-  %3250 = icmp eq i32 %3249, 0
-  br i1 %3250, label %3251, label %3253
-
-; <label>:3251                                    ; preds = %3242
-  %3252 = fadd fast float %3248, %20
-  br label %3270
-
-; <label>:3253                                    ; preds = %3242
-  %3254 = fsub fast float %22, %3248
-  br label %3270
-
-; <label>:3255                                    ; preds = %3239
-  %3256 = fcmp fast ogt float %1400, %22
-  br i1 %3256, label %3257, label %3270
-
-; <label>:3257                                    ; preds = %3255
-  %3258 = fsub fast float %1400, %22
-  %3259 = fdiv fast float %3258, %3240
-  %3260 = fptoui float %3259 to i32
-  %3261 = uitofp i32 %3260 to float
-  %3262 = fmul fast float %3261, %3240
-  %3263 = fsub fast float %3258, %3262
-  %3264 = and i32 %3260, 1
-  %3265 = icmp eq i32 %3264, 0
-  br i1 %3265, label %3266, label %3268
-
-; <label>:3266                                    ; preds = %3257
-  %3267 = fsub fast float %22, %3263
-  br label %3270
-
-; <label>:3268                                    ; preds = %3257
-  %3269 = fadd fast float %3263, %20
-  br label %3270
-
-; <label>:3270                                    ; preds = %3268, %3266, %3255, %3253, %3251
-  %3271 = phi float [ %3252, %3251 ], [ %3254, %3253 ], [ %3267, %3266 ], [ %3269, %3268 ], [ %1400, %3255 ]
-  %3272 = fptoui float %3271 to i32
-  %3273 = fsub fast float %24, %20
-  %3274 = fcmp fast olt float %2734, %20
-  br i1 %3274, label %3275, label %3288
-
-; <label>:3275                                    ; preds = %3270
-  %3276 = fsub fast float %20, %2734
-  %3277 = fdiv fast float %3276, %3273
-  %3278 = fptoui float %3277 to i32
-  %3279 = uitofp i32 %3278 to float
-  %3280 = fmul fast float %3279, %3273
-  %3281 = fsub fast float %3276, %3280
-  %3282 = and i32 %3278, 1
-  %3283 = icmp eq i32 %3282, 0
-  br i1 %3283, label %3284, label %3286
-
-; <label>:3284                                    ; preds = %3275
-  %3285 = fadd fast float %3281, %20
-  br label %3303
-
-; <label>:3286                                    ; preds = %3275
-  %3287 = fsub fast float %24, %3281
-  br label %3303
-
-; <label>:3288                                    ; preds = %3270
-  %3289 = fcmp fast ogt float %2734, %24
-  br i1 %3289, label %3290, label %3303
-
-; <label>:3290                                    ; preds = %3288
-  %3291 = fsub fast float %2734, %24
-  %3292 = fdiv fast float %3291, %3273
-  %3293 = fptoui float %3292 to i32
-  %3294 = uitofp i32 %3293 to float
-  %3295 = fmul fast float %3294, %3273
-  %3296 = fsub fast float %3291, %3295
-  %3297 = and i32 %3293, 1
-  %3298 = icmp eq i32 %3297, 0
-  br i1 %3298, label %3299, label %3301
-
-; <label>:3299                                    ; preds = %3290
-  %3300 = fsub fast float %24, %3296
-  br label %3303
-
-; <label>:3301                                    ; preds = %3290
-  %3302 = fadd fast float %3296, %20
-  br label %3303
-
-; <label>:3303                                    ; preds = %3301, %3299, %3288, %3286, %3284
-  %3304 = phi float [ %3285, %3284 ], [ %3287, %3286 ], [ %3300, %3299 ], [ %3302, %3301 ], [ %2734, %3288 ]
-  %3305 = fptoui float %3304 to i32
-  %3306 = uitofp i32 %3305 to float
-  %3307 = uitofp i32 %3272 to float
-  %3308 = fptoui float %45 to i32
-  %3309 = fptoui float %178 to i32
-  %3310 = fptoui float %3306 to i32
-  %3311 = fptoui float %3307 to i32
-  %3312 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3313 = extractvalue %dx.types.CBufRet.i32 %3312, 0
-  %3314 = extractvalue %dx.types.CBufRet.i32 %3312, 1
-  %3315 = extractvalue %dx.types.CBufRet.i32 %3312, 2
-  %3316 = extractvalue %dx.types.CBufRet.i32 %3312, 3
-  %3317 = mul i32 %3313, %3308
-  %3318 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3309, i32 %3314, i32 %3317)  ; IMad(a,b,c)
-  %3319 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3310, i32 %3315, i32 %3318)  ; IMad(a,b,c)
-  %3320 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3311, i32 %3316, i32 %3319)  ; IMad(a,b,c)
-  %3321 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3320, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3322 = extractvalue %dx.types.ResRet.i32 %3321, 0
-  %3323 = icmp ne i32 %3322, 0
-  %3324 = uitofp i1 %3323 to float
-  br label %3325
-
-; <label>:3325                                    ; preds = %3303, %3237, %3207, %3189, %3179
-  %3326 = phi float [ %3204, %3189 ], [ 0.000000e+00, %3179 ], [ %3236, %3207 ], [ %3324, %3303 ], [ 0.000000e+00, %3237 ]
-  %3327 = call float @dx.op.unary.f32(i32 22, float %176)  ; Frc(value)
-  %3328 = call float @dx.op.unary.f32(i32 22, float %177)  ; Frc(value)
-  %3329 = fmul fast float %3328, %3328
-  %3330 = fmul fast float %3329, %3328
-  %3331 = fmul fast float %1102, -7.500000e-01
-  %3332 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2289, float %3331)  ; FMad(a,b,c)
-  %3333 = fmul fast float %1102, 1.500000e+00
-  %3334 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1696, float %3333)  ; FMad(a,b,c)
-  %3335 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2289, float %3334)  ; FMad(a,b,c)
-  %3336 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %2882, float %3335)  ; FMad(a,b,c)
-  %3337 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1696, float %3331)  ; FMad(a,b,c)
-  %3338 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2289, float %3337)  ; FMad(a,b,c)
-  %3339 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2882, float %3338)  ; FMad(a,b,c)
-  %3340 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3328, float %3329, float %3330, float %1696, float %3332, float %3336, float %3339)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3341 = fmul fast float %1250, -7.500000e-01
-  %3342 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2437, float %3341)  ; FMad(a,b,c)
-  %3343 = fmul fast float %1250, 1.500000e+00
-  %3344 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1844, float %3343)  ; FMad(a,b,c)
-  %3345 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2437, float %3344)  ; FMad(a,b,c)
-  %3346 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3030, float %3345)  ; FMad(a,b,c)
-  %3347 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1844, float %3341)  ; FMad(a,b,c)
-  %3348 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2437, float %3347)  ; FMad(a,b,c)
-  %3349 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3030, float %3348)  ; FMad(a,b,c)
-  %3350 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3328, float %3329, float %3330, float %1844, float %3342, float %3346, float %3349)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3351 = fmul fast float %1399, -7.500000e-01
-  %3352 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2585, float %3351)  ; FMad(a,b,c)
-  %3353 = fmul fast float %1399, 1.500000e+00
-  %3354 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1992, float %3353)  ; FMad(a,b,c)
-  %3355 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2585, float %3354)  ; FMad(a,b,c)
-  %3356 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3178, float %3355)  ; FMad(a,b,c)
-  %3357 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1992, float %3351)  ; FMad(a,b,c)
-  %3358 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2585, float %3357)  ; FMad(a,b,c)
-  %3359 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3178, float %3358)  ; FMad(a,b,c)
-  %3360 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3328, float %3329, float %3330, float %1992, float %3352, float %3356, float %3359)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3361 = fmul fast float %1548, -7.500000e-01
-  %3362 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2733, float %3361)  ; FMad(a,b,c)
-  %3363 = fmul fast float %1548, 1.500000e+00
-  %3364 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %2140, float %3363)  ; FMad(a,b,c)
-  %3365 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2733, float %3364)  ; FMad(a,b,c)
-  %3366 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3326, float %3365)  ; FMad(a,b,c)
-  %3367 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %2140, float %3361)  ; FMad(a,b,c)
-  %3368 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2733, float %3367)  ; FMad(a,b,c)
-  %3369 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3326, float %3368)  ; FMad(a,b,c)
-  %3370 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3328, float %3329, float %3330, float %2140, float %3362, float %3366, float %3369)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3371 = fmul fast float %3327, %3327
-  %3372 = fmul fast float %3371, %3327
-  %3373 = fmul fast float %3340, -7.500000e-01
-  %3374 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3360, float %3373)  ; FMad(a,b,c)
-  %3375 = fmul fast float %3340, 1.500000e+00
-  %3376 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %3350, float %3375)  ; FMad(a,b,c)
-  %3377 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %3360, float %3376)  ; FMad(a,b,c)
-  %3378 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3370, float %3377)  ; FMad(a,b,c)
-  %3379 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %3350, float %3373)  ; FMad(a,b,c)
-  %3380 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %3360, float %3379)  ; FMad(a,b,c)
-  %3381 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3370, float %3380)  ; FMad(a,b,c)
-  %3382 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3327, float %3371, float %3372, float %3350, float %3374, float %3378, float %3381)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3383 = fcmp fast une float %3382, 0.000000e+00
-  %3384 = zext i1 %3383 to i32
-  call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i32 %3384, i32 undef, i32 undef, i32 undef, i8 1, i32 4)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3385
-
-; <label>:3385                                    ; preds = %3325, %946, %931, %328, %0
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.threadId.i32(i32, i32) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32, %dx.types.Handle, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.rawBufferStore.i32(i32, %dx.types.Handle, i32, i32, i32, i32, i32, i32, i8, i32) #2
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.unary.f32(i32, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.tertiary.f32(i32, float, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.binary.f32(i32, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.dot4.f32(i32, float, float, float, float, float, float, float, float) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32, %dx.types.Handle, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.tertiary.i32(i32, i32, i32, i32) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32, %dx.types.Handle, i32) #1
-
-; Function Attrs: nounwind readonly
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.valver = !{!2}
-!dx.shaderModel = !{!3}
-!dx.resources = !{!4}
-!dx.entryPoints = !{!12}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 1, i32 2}
-!2 = !{i32 1, i32 6}
-!3 = !{!"cs", i32 6, i32 2}
-!4 = !{null, !5, !10, null}
-!5 = !{!6, !8, !9}
-!6 = !{i32 0, %"class.RWStructuredBuffer<bool>"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !7}
-!7 = !{i32 1, i32 4}
-!8 = !{i32 1, %"class.RWStructuredBuffer<half>"* undef, !"", i32 0, i32 1, i32 1, i32 12, i1 false, i1 false, i1 false, !1}
-!9 = !{i32 2, %"class.RWStructuredBuffer<bool>"* undef, !"", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !7}
-!10 = !{!11}
-!11 = !{i32 0, %Constants* undef, !"", i32 0, i32 0, i32 1, i32 116, null}
-!12 = !{void ()* @GridSample, !"GridSample", null, !4, !13}
-!13 = !{i32 0, i64 8388656, i32 4, !14}
-!14 = !{i32 64, i32 1, i32 1}
-
-#endif
-
-const unsigned char g_GridSample[] = {
-  0x44, 0x58, 0x42, 0x43, 0x42, 0x05, 0xca, 0xb6, 0xde, 0x58, 0x0e, 0x11,
-  0x5b, 0x42, 0xeb, 0x78, 0xc1, 0xb1, 0x2b, 0xba, 0x01, 0x00, 0x00, 0x00,
-  0xe0, 0x55, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
-  0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
-  0x18, 0x01, 0x00, 0x00, 0x34, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30,
-  0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30,
-  0xa8, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-  0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0xf7, 0x01, 0x61, 0xfb, 0x92, 0x23, 0x51, 0xa3,
-  0xe8, 0x3a, 0x11, 0x27, 0x5e, 0x73, 0xdc, 0x46, 0x44, 0x58, 0x49, 0x4c,
-  0xa4, 0x54, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0x29, 0x15, 0x00, 0x00,
-  0x44, 0x58, 0x49, 0x4c, 0x02, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-  0x8c, 0x54, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00,
-  0x20, 0x15, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
-  0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49,
-  0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19,
-  0x1e, 0x04, 0x8b, 0x62, 0x80, 0x18, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42,
-  0xc4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x62, 0x88,
-  0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42,
-  0xe4, 0x48, 0x0e, 0x90, 0x11, 0x23, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c,
-  0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x31, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00,
-  0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07,
-  0x40, 0x02, 0xa8, 0x0d, 0x86, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20,
-  0x01, 0xd5, 0x06, 0x62, 0xf8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x90, 0x00,
-  0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42,
-  0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00,
-  0x54, 0x00, 0x00, 0x00, 0x32, 0x22, 0x88, 0x09, 0x20, 0x64, 0x85, 0x04,
-  0x13, 0x23, 0xa4, 0x84, 0x04, 0x13, 0x23, 0xe3, 0x84, 0xa1, 0x90, 0x14,
-  0x12, 0x4c, 0x8c, 0x8c, 0x0b, 0x84, 0xc4, 0x4c, 0x10, 0xb4, 0xc1, 0x08,
-  0x40, 0x09, 0x00, 0x0a, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0xc3, 0x30, 0x10,
-  0x71, 0xd3, 0x70, 0xf9, 0x13, 0xf6, 0x10, 0x92, 0xbf, 0x12, 0xd2, 0x4a,
-  0x4c, 0x3e, 0x72, 0xdb, 0xa8, 0x18, 0x86, 0x61, 0x18, 0xe6, 0x08, 0x10,
-  0x3a, 0xee, 0x19, 0x2e, 0x7f, 0xc2, 0x1e, 0x42, 0xf2, 0x43, 0xa0, 0x19,
-  0x16, 0x02, 0x05, 0x48, 0x39, 0x8c, 0x21, 0x19, 0x86, 0x63, 0x20, 0xa5,
-  0x2c, 0xc0, 0x90, 0x0c, 0xc3, 0x30, 0x0c, 0xc3, 0x31, 0x10, 0x33, 0x03,
-  0x50, 0x06, 0x67, 0x70, 0xe8, 0x29, 0x85, 0x33, 0x38, 0x8e, 0x43, 0x51,
-  0x21, 0x9c, 0xc1, 0x71, 0x68, 0x2a, 0x8a, 0x33, 0x38, 0x8e, 0xe3, 0x38,
-  0x8e, 0xe3, 0x50, 0x35, 0x0a, 0x70, 0xd3, 0x70, 0xf9, 0x13, 0xf6, 0x10,
-  0x92, 0xbf, 0x12, 0xd2, 0x4a, 0x4c, 0x7e, 0x51, 0xeb, 0xa8, 0xb8, 0xae,
-  0xeb, 0x1a, 0xca, 0x81, 0x0d, 0xc9, 0x30, 0x1c, 0x03, 0x65, 0xa5, 0x18,
-  0x86, 0x61, 0x18, 0x68, 0x3b, 0x6a, 0xb8, 0xfc, 0x09, 0x7b, 0x08, 0xc9,
-  0xe7, 0x36, 0xaa, 0x58, 0x89, 0xc9, 0x47, 0x6e, 0x1b, 0x11, 0xc3, 0x30,
-  0x0c, 0x85, 0xe8, 0x86, 0x64, 0x20, 0x6f, 0x8e, 0x20, 0x28, 0x46, 0x32,
-  0x1c, 0xc3, 0x00, 0x52, 0x38, 0x10, 0x30, 0x93, 0x37, 0x0e, 0xec, 0x10,
-  0x0e, 0xf3, 0x30, 0x0f, 0x6e, 0x20, 0x0b, 0xb7, 0x30, 0x0b, 0xf4, 0x20,
-  0x0f, 0xf5, 0x30, 0x0e, 0xf4, 0x50, 0x0f, 0xf2, 0x50, 0x0e, 0xe4, 0x20,
-  0x0a, 0xf5, 0x60, 0x0e, 0xe6, 0x50, 0x0e, 0xf2, 0xc0, 0x07, 0xe2, 0xf0,
-  0x0e, 0xef, 0xc0, 0x0e, 0x7e, 0x80, 0x82, 0x81, 0xc8, 0x99, 0xbc, 0x71,
-  0x60, 0x87, 0x70, 0x98, 0x87, 0x79, 0x70, 0x03, 0x59, 0xb8, 0x85, 0x59,
-  0xa0, 0x07, 0x79, 0xa8, 0x87, 0x71, 0xa0, 0x87, 0x7a, 0x90, 0x87, 0x72,
-  0x20, 0x07, 0x51, 0xa8, 0x07, 0x73, 0x30, 0x87, 0x72, 0x90, 0x07, 0x3e,
-  0x40, 0x87, 0x70, 0x60, 0x07, 0x73, 0xf0, 0x03, 0x14, 0x5c, 0x64, 0x0e,
-  0x23, 0x10, 0xc3, 0x25, 0x9c, 0xd3, 0x48, 0x13, 0xd0, 0x4c, 0x12, 0x5a,
-  0x86, 0x61, 0x18, 0x50, 0x14, 0x45, 0x51, 0x74, 0xa0, 0x74, 0x8e, 0x00,
-  0x14, 0xa6, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87,
-  0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xae, 0x50,
-  0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30,
-  0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0,
-  0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x78, 0xd0,
-  0x06, 0xe9, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x6d, 0x90,
-  0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x60,
-  0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d, 0x60, 0x0e, 0x71, 0x60,
-  0x07, 0x7a, 0x10, 0x07, 0x76, 0xd0, 0x06, 0xe6, 0x30, 0x07, 0x72, 0xa0,
-  0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60,
-  0x07, 0x74, 0xd0, 0x06, 0xee, 0x80, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xa0,
-  0x07, 0x73, 0x20, 0x07, 0x7a, 0x60, 0x07, 0x74, 0x30, 0xe4, 0x09, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0x43,
-  0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90,
-  0x47, 0x01, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-  0x21, 0x0f, 0x03, 0x04, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x43, 0x9e, 0x07, 0x08, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x86, 0x3c, 0x11, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x0c, 0x79, 0x26, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x18, 0xf2, 0x54, 0x40, 0x00, 0x04, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xe4, 0xc9, 0x80, 0x00, 0x10, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0xb3, 0x01, 0x01, 0x10,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90, 0xc7, 0x03, 0x02,
-  0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x21, 0x4f, 0x18,
-  0x00, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x16,
-  0x08, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14,
-  0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47, 0xc6, 0x04, 0x43, 0x1a,
-  0x4a, 0xa0, 0x08, 0x8a, 0x61, 0x04, 0xa0, 0x30, 0x0a, 0xa1, 0xd0, 0x03,
-  0x0a, 0x30, 0x80, 0xc0, 0x11, 0x00, 0x5a, 0x0b, 0x1c, 0x10, 0x10, 0x81,
-  0xce, 0x19, 0x00, 0x52, 0x67, 0x00, 0xa8, 0x9c, 0x01, 0x00, 0x00, 0x00,
-  0x79, 0x18, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90,
-  0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1,
-  0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99, 0x71, 0xb9, 0x01, 0x41,
-  0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a,
-  0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9,
-  0x10, 0x04, 0x13, 0x84, 0x81, 0x99, 0x20, 0x0c, 0xcd, 0x06, 0x61, 0x20,
-  0x26, 0x08, 0x83, 0xb3, 0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06,
-  0xc4, 0x20, 0x26, 0x08, 0xc3, 0x33, 0x41, 0x28, 0x03, 0x8c, 0xc0, 0x04,
-  0x61, 0x80, 0x26, 0x08, 0x60, 0x40, 0x4d, 0x10, 0x86, 0x68, 0x83, 0x30,
-  0x3c, 0x1b, 0x16, 0x65, 0x61, 0x14, 0x65, 0x68, 0x1c, 0xc7, 0x81, 0x26,
-  0x08, 0x67, 0x60, 0x6d, 0x58, 0x06, 0x89, 0x51, 0x86, 0xa1, 0x71, 0x1c,
-  0xa7, 0xd8, 0xb0, 0x10, 0x0b, 0xa3, 0x10, 0x43, 0xe3, 0x38, 0x0e, 0xb4,
-  0x61, 0x88, 0x26, 0x6a, 0x82, 0xa0, 0x06, 0xd7, 0x04, 0x61, 0x90, 0x36,
-  0x20, 0x8a, 0xc5, 0x28, 0xca, 0x70, 0x01, 0x1b, 0x02, 0x6c, 0x03, 0x01,
-  0x54, 0x19, 0x30, 0x41, 0x10, 0x00, 0x2a, 0x47, 0x72, 0x69, 0x64, 0x53,
-  0x61, 0x6d, 0x70, 0x6c, 0x65, 0x13, 0x84, 0x35, 0xa8, 0x26, 0x08, 0xc3,
-  0xb4, 0x61, 0xf0, 0x86, 0x61, 0x03, 0xa1, 0x74, 0xcf, 0xb7, 0xa1, 0xd8,
-  0x38, 0x40, 0x03, 0x83, 0x2a, 0x6c, 0x6c, 0x76, 0x6d, 0x2e, 0x69, 0x64,
-  0x65, 0x6e, 0x74, 0x53, 0x82, 0xa0, 0x0a, 0x19, 0x9e, 0x8b, 0x5d, 0x99,
-  0xdc, 0x5c, 0xda, 0x9b, 0xdb, 0x94, 0x80, 0x68, 0x42, 0x86, 0xe7, 0x62,
-  0x17, 0xc6, 0x66, 0x57, 0x26, 0x37, 0x25, 0x30, 0xea, 0x90, 0xe1, 0xb9,
-  0xcc, 0xa1, 0x85, 0x91, 0x95, 0xc9, 0x35, 0xbd, 0x91, 0x95, 0xb1, 0x4d,
-  0x09, 0x90, 0x32, 0x64, 0x78, 0x2e, 0x72, 0x65, 0x73, 0x6f, 0x75, 0x72,
-  0x63, 0x65, 0x73, 0x53, 0x82, 0xac, 0x0e, 0x19, 0x9e, 0x4b, 0x99, 0x1b,
-  0x9d, 0x5c, 0x1e, 0xd4, 0x5b, 0x9a, 0x1b, 0xdd, 0xdc, 0x94, 0x00, 0x0c,
-  0x00, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00,
-  0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88,
-  0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73,
-  0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e,
-  0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30,
-  0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8,
-  0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b,
-  0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76,
-  0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e,
-  0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e,
-  0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61,
-  0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4,
-  0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76,
-  0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37,
-  0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76,
-  0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71,
-  0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e,
-  0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1,
-  0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61,
-  0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90,
-  0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8,
-  0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc,
-  0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4,
-  0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87, 0x76, 0x80, 0x87, 0x19,
-  0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20, 0x0e, 0xe7, 0xe0, 0x06,
-  0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f,
-  0xf4, 0x30, 0x83, 0x81, 0xc8, 0x01, 0x1f, 0xdc, 0x40, 0x1c, 0xe4, 0xa1,
-  0x1c, 0xc2, 0x61, 0x1d, 0xdc, 0x40, 0x1c, 0xe4, 0x01, 0x00, 0x00, 0x00,
-  0x71, 0x20, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x06, 0xa0, 0x80, 0x11,
-  0x32, 0xb0, 0x00, 0xf3, 0x2c, 0x84, 0x19, 0x40, 0xc3, 0xe5, 0x3b, 0x8f,
-  0x1f, 0x20, 0x0d, 0x10, 0x61, 0x7e, 0x71, 0xdb, 0xa6, 0xb0, 0x0d, 0x97,
-  0xef, 0x3c, 0xbe, 0x10, 0x50, 0x45, 0x41, 0x44, 0xa5, 0x03, 0x0c, 0x25,
-  0x61, 0x00, 0x02, 0xe6, 0x23, 0xb7, 0x6d, 0x0b, 0xd2, 0x70, 0xf9, 0xce,
-  0xe3, 0x0b, 0x11, 0x01, 0x4c, 0x44, 0x08, 0x34, 0xc3, 0x42, 0xd8, 0x81,
-  0x33, 0x5c, 0xbe, 0xf3, 0xf8, 0x83, 0x33, 0xe1, 0x7e, 0x71, 0xdb, 0x86,
-  0x70, 0x0d, 0x97, 0xef, 0x3c, 0x7e, 0x04, 0x58, 0x1b, 0x55, 0x14, 0x44,
-  0x54, 0x3a, 0xc0, 0xe0, 0x17, 0xb5, 0x6e, 0x02, 0xd7, 0x70, 0xf9, 0xce,
-  0xe3, 0x47, 0x80, 0xb5, 0x51, 0x45, 0x41, 0x44, 0xa5, 0x03, 0x0c, 0x3e,
-  0x72, 0xdb, 0x36, 0x80, 0x0d, 0x97, 0xef, 0x3c, 0x7e, 0x04, 0x58, 0x1b,
-  0x55, 0x14, 0x44, 0xc4, 0x4e, 0x4e, 0x44, 0xf8, 0xc8, 0x6d, 0x5b, 0x81,
-  0x34, 0x5c, 0xbe, 0xf3, 0xf8, 0x13, 0x11, 0x4d, 0x08, 0x10, 0x61, 0x7e,
-  0x71, 0xdb, 0x96, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xfe, 0x44, 0x44, 0x13,
-  0x02, 0x44, 0x98, 0x8f, 0xdc, 0xb6, 0x05, 0x48, 0xc3, 0xe5, 0x3b, 0x8f,
-  0x3f, 0x1d, 0x11, 0x01, 0x0c, 0xe2, 0xe0, 0x23, 0xb7, 0x6d, 0x04, 0xcf,
-  0x70, 0xf9, 0xce, 0xe3, 0x53, 0x0d, 0x10, 0x61, 0x7e, 0x71, 0xdb, 0x00,
-  0x61, 0x20, 0x00, 0x00, 0x77, 0x13, 0x00, 0x00, 0x13, 0x04, 0x24, 0x14,
-  0x0b, 0x04, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x34, 0x14, 0x58, 0xd9,
-  0x95, 0xa5, 0x40, 0x0d, 0x94, 0x51, 0x21, 0x15, 0xd7, 0x0c, 0x40, 0xc1,
-  0x95, 0x5c, 0xd9, 0x14, 0x4b, 0x61, 0x0a, 0x14, 0x4d, 0xe9, 0x06, 0x94,
-  0x43, 0x29, 0xd0, 0x51, 0x02, 0x45, 0x40, 0xce, 0x08, 0xc0, 0x18, 0x01,
-  0x08, 0x82, 0x20, 0xfe, 0x8d, 0x11, 0x80, 0x20, 0x08, 0xd2, 0xbf, 0x30,
-  0x46, 0x00, 0x82, 0x20, 0x48, 0x7f, 0x63, 0x04, 0x20, 0x08, 0x82, 0xfc,
-  0x37, 0x46, 0x00, 0x82, 0x20, 0x88, 0xff, 0xc2, 0x18, 0x01, 0x08, 0x82,
-  0x60, 0x08, 0x0e, 0x63, 0x04, 0x20, 0x08, 0x82, 0xfa, 0x37, 0x46, 0x00,
-  0x82, 0x20, 0xa8, 0xff, 0xc2, 0x18, 0x01, 0x08, 0x82, 0x20, 0xfc, 0x8d,
-  0x11, 0x80, 0x20, 0x08, 0xc2, 0xbf, 0x30, 0x46, 0x00, 0x82, 0x20, 0x08,
-  0x82, 0x01, 0x00, 0x00, 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0x10, 0x06,
-  0x6d, 0x60, 0x39, 0x6b, 0xb0, 0x06, 0x64, 0x30, 0x62, 0x90, 0x00, 0x20,
-  0x08, 0x06, 0x61, 0xe0, 0x06, 0xd7, 0xd3, 0x06, 0x6d, 0x50, 0x06, 0x23,
-  0x06, 0x09, 0x00, 0x82, 0x60, 0x10, 0x06, 0x6f, 0x80, 0x41, 0x6b, 0xb0,
-  0x06, 0x66, 0x30, 0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0x61, 0x00, 0x07,
-  0x19, 0xc4, 0x06, 0x6c, 0x70, 0x06, 0x23, 0x06, 0x06, 0x00, 0x82, 0x60,
-  0x40, 0xec, 0x41, 0xd5, 0x06, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0,
-  0xcd, 0xc1, 0x18, 0x08, 0x6e, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c, 0x30,
-  0x9a, 0x30, 0x04, 0xc3, 0x0d, 0x42, 0x40, 0x06, 0xb3, 0x0c, 0xc1, 0x08,
-  0x05, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0, 0xe1, 0x01, 0x1a, 0x1c,
-  0x76, 0x30, 0x9a, 0x10, 0x0c, 0x17, 0x38, 0x35, 0x9a, 0x30, 0x08, 0x17,
-  0x38, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x5e, 0x1f, 0xb4, 0x01,
-  0xf3, 0x8d, 0x26, 0x04, 0xc0, 0x70, 0x43, 0xa0, 0x07, 0x60, 0x30, 0xdd,
-  0x40, 0x79, 0xc1, 0x74, 0x43, 0xa5, 0x09, 0x85, 0x04, 0x30, 0xdd, 0x70,
-  0x71, 0x44, 0x21, 0x01, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x57,
-  0x0a, 0x75, 0x40, 0x99, 0xc1, 0x68, 0x42, 0x10, 0x8c, 0x26, 0x08, 0xc2,
-  0x68, 0xc2, 0x30, 0x54, 0x20, 0x48, 0x0d, 0x84, 0x54, 0x30, 0x48, 0x5d,
-  0xc1, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0xd7, 0x0a, 0x7d, 0xc0,
-  0xa5, 0xc2, 0x68, 0x42, 0x00, 0x54, 0x30, 0x48, 0x6d, 0x41, 0x54, 0x80,
-  0xcc, 0x68, 0x42, 0x11, 0x54, 0x20, 0x48, 0x11, 0x41, 0x54, 0xd0, 0xcc,
-  0x68, 0x42, 0x22, 0x54, 0x20, 0x48, 0x11, 0x41, 0x5c, 0xe3, 0xd4, 0x15,
-  0x4e, 0xdd, 0xe0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x78, 0xbc,
-  0xc0, 0x0a, 0x6b, 0x50, 0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04,
-  0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x47, 0x38, 0x75, 0x84, 0x53,
-  0x47, 0x38, 0x75, 0x84, 0x53, 0x23, 0x06, 0x0d, 0x00, 0x82, 0x60, 0x50,
-  0xa1, 0x83, 0x2b, 0x30, 0x8b, 0x32, 0x0a, 0xc4, 0x20, 0x04, 0x26, 0x04,
-  0xf0, 0x39, 0x61, 0x98, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0xc8, 0xd4,
-  0xe1, 0x16, 0xf2, 0x20, 0x18, 0x07, 0x54, 0x28, 0x87, 0xd1, 0x84, 0x00,
-  0xb8, 0xc0, 0xc1, 0x2b, 0x86, 0x19, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83,
-  0xcc, 0x1d, 0x76, 0xa1, 0x0f, 0x82, 0x73, 0x60, 0x85, 0x74, 0x18, 0x4d,
-  0x08, 0x80, 0x0b, 0x1c, 0x1c, 0x6e, 0x88, 0x83, 0x76, 0x00, 0x03, 0x33,
-  0x5a, 0x01, 0x3e, 0x36, 0xb8, 0x02, 0x7c, 0x66, 0x19, 0x84, 0x61, 0x30,
-  0xe1, 0x14, 0xe4, 0x63, 0x02, 0x2a, 0xc8, 0xc7, 0xf4, 0xa0, 0x15, 0xe0,
-  0x63, 0x79, 0xe0, 0x0a, 0xf0, 0x31, 0x42, 0x90, 0x8f, 0x11, 0x82, 0x7c,
-  0x66, 0x09, 0x08, 0xf3, 0x03, 0x44, 0x3e, 0x86, 0xf4, 0x81, 0x7c, 0x4c,
-  0x98, 0x05, 0xf8, 0x98, 0x40, 0x0b, 0xf0, 0x31, 0x21, 0x16, 0xe4, 0x63,
-  0x82, 0x2c, 0xc8, 0x67, 0x96, 0x80, 0x18, 0xa8, 0x70, 0x20, 0x81, 0x18,
-  0x06, 0x2a, 0x1c, 0x48, 0x20, 0x86, 0xd1, 0x84, 0x56, 0x10, 0x86, 0x1b,
-  0x82, 0x7f, 0x00, 0x83, 0x59, 0x86, 0xc2, 0x08, 0x46, 0x0c, 0x0c, 0x00,
-  0x04, 0xc1, 0xe0, 0x41, 0x09, 0x73, 0x20, 0x46, 0x0c, 0x0c, 0x00, 0x04,
-  0xc1, 0xe0, 0x49, 0x89, 0x73, 0x20, 0x66, 0x09, 0x8c, 0x81, 0x0a, 0x87,
-  0x28, 0x18, 0x62, 0xa0, 0xc2, 0x21, 0x0a, 0x86, 0x18, 0x8e, 0x10, 0x4c,
-  0x81, 0xf8, 0x86, 0x23, 0x86, 0x52, 0x10, 0xbe, 0x12, 0x82, 0x1d, 0x8e,
-  0x20, 0x52, 0x81, 0xf8, 0x4a, 0x08, 0x76, 0x38, 0xc2, 0x38, 0x05, 0xe1,
-  0xab, 0x40, 0xd8, 0x59, 0x86, 0x43, 0x0b, 0x46, 0x13, 0x74, 0x61, 0x18,
-  0x6e, 0x08, 0x58, 0x02, 0x0c, 0x66, 0x19, 0x90, 0x24, 0x28, 0x5b, 0xe8,
-  0x07, 0xb8, 0xc0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0x6a,
-  0xc2, 0x1f, 0x9a, 0x77, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0xc9,
-  0x26, 0xfc, 0x21, 0x10, 0x0a, 0x17, 0x42, 0x02, 0x2e, 0x70, 0x6a, 0xc4,
-  0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa6, 0x9c, 0x10, 0x09, 0x68, 0x1e, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xd2, 0x09, 0x91, 0x08, 0x84, 0x59,
-  0x02, 0x6d, 0xb8, 0x41, 0xa1, 0x09, 0x30, 0x98, 0x65, 0x50, 0xb4, 0xc0,
-  0x6c, 0x01, 0x17, 0xe2, 0x33, 0xcb, 0xb0, 0x38, 0x93, 0xe5, 0x42, 0x15,
-  0x1f, 0x0b, 0x04, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4e, 0x59, 0x50, 0xc8,
-  0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xf4, 0x84, 0x0e, 0x37, 0x04, 0x3b, 0x01,
-  0x06, 0xb3, 0x0c, 0x4c, 0x13, 0xd8, 0x10, 0x0e, 0xf0, 0x99, 0x25, 0x90,
-  0x0c, 0x1c, 0x88, 0xf8, 0xcc, 0x12, 0x48, 0xb3, 0x0c, 0x8f, 0xc4, 0xd9,
-  0x17, 0x0e, 0xf1, 0xb1, 0x80, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94,
-  0x05, 0x8f, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4, 0x59, 0xe8, 0x70, 0x43,
-  0x50, 0x16, 0x60, 0x30, 0xcb, 0x00, 0x45, 0x81, 0xa5, 0xc3, 0x10, 0x9f,
-  0x59, 0x02, 0xc9, 0x08, 0x76, 0x80, 0xcf, 0x2c, 0x81, 0x34, 0xd0, 0xe2,
-  0x60, 0x8c, 0xd5, 0x10, 0x90, 0x10, 0xc9, 0x82, 0x63, 0xea, 0xe0, 0x0e,
-  0xf1, 0x99, 0x65, 0x98, 0x2c, 0x33, 0xb0, 0x77, 0x50, 0x83, 0xf8, 0x58,
-  0x20, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70, 0xca, 0x82, 0x42, 0x3e, 0x56,
-  0x04, 0xf1, 0x29, 0x62, 0x2e, 0x74, 0xb8, 0x21, 0x88, 0x0b, 0x30, 0x98,
-  0x65, 0xa0, 0xaa, 0xc0, 0x86, 0x7b, 0x80, 0xcf, 0x2c, 0x81, 0x66, 0xf4,
-  0x40, 0xc4, 0x67, 0x96, 0x40, 0x9b, 0x65, 0xb8, 0x34, 0x37, 0x30, 0x3a,
-  0xa8, 0x87, 0xf8, 0x58, 0xc0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70, 0xca,
-  0x82, 0x47, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xa2, 0x2f, 0x74, 0xb8, 0x21,
-  0xd8, 0x0b, 0x30, 0x98, 0x65, 0xc0, 0xb2, 0xc0, 0xfa, 0x61, 0x88, 0xcf,
-  0x2c, 0x81, 0x66, 0x84, 0x48, 0xc0, 0x67, 0x96, 0x40, 0x1b, 0x28, 0x72,
-  0xc4, 0x01, 0xf1, 0x87, 0xc4, 0x1f, 0x0c, 0x36, 0xc8, 0xd8, 0x00, 0x63,
-  0x03, 0x8b, 0x0d, 0x2a, 0x36, 0xa0, 0x06, 0x8a, 0x1c, 0x5e, 0x40, 0xfc,
-  0x21, 0xf1, 0x07, 0x83, 0xc8, 0x0c, 0xcc, 0x1f, 0x2c, 0xac, 0xd2, 0xa8,
-  0xa3, 0x07, 0xa7, 0x66, 0x19, 0xb6, 0x39, 0x28, 0x85, 0xd1, 0x84, 0x99,
-  0x18, 0x86, 0x1b, 0x02, 0xd1, 0x00, 0x83, 0x59, 0x06, 0xce, 0x0b, 0x86,
-  0x23, 0x0a, 0xb3, 0x18, 0xbe, 0x33, 0x86, 0x19, 0x6e, 0x08, 0x62, 0x82,
-  0x0c, 0x6a, 0x08, 0x74, 0x38, 0x02, 0x51, 0x8b, 0xe1, 0xab, 0x40, 0xd0,
-  0x53, 0x86, 0x19, 0x6e, 0x08, 0x68, 0x82, 0x0c, 0x2a, 0x18, 0x74, 0x96,
-  0xa1, 0x93, 0x83, 0xe0, 0xf0, 0x61, 0x98, 0x6b, 0x86, 0x19, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x03, 0x6f, 0x36, 0x46, 0x43, 0x2c, 0x60, 0x63, 0x34,
-  0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88,
-  0xa1, 0x88, 0x43, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0xd3, 0x0d,
-  0xd5, 0x38, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0xdd,
-  0x58, 0x0d, 0x86, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0xe3,
-  0x0d, 0xd6, 0x90, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x14,
-  0xf2, 0x58, 0x0d, 0xb6, 0x08, 0x6e, 0x83, 0x2f, 0x6a, 0x63, 0x34, 0x21,
-  0x00, 0x86, 0x1b, 0x82, 0xdc, 0x08, 0x83, 0x0b, 0x9c, 0x9a, 0x25, 0x90,
-  0x83, 0xe1, 0x06, 0xcd, 0x37, 0xc0, 0x60, 0x96, 0xe1, 0x03, 0x83, 0xa0,
-  0xd0, 0xe2, 0x35, 0xe0, 0x02, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
-  0x60, 0x3a, 0x0f, 0xd8, 0x00, 0x83, 0xd0, 0x18, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x83, 0x09, 0x3d, 0x60, 0x23, 0x10, 0x2e, 0x18, 0xa6, 0xd6, 0x82,
-  0x36, 0xe0, 0x02, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x62,
-  0x8f, 0xda, 0x20, 0x03, 0xd3, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83,
-  0xa9, 0x3d, 0x6a, 0x23, 0x10, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0xee, 0x70,
-  0xea, 0x68, 0x62, 0x98, 0x4b, 0x83, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xc0, 0x8b, 0x8f, 0xf0, 0x00, 0x0d,
-  0xf7, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84,
-  0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0xd8, 0xf0, 0x03, 0x3d, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x83, 0x2d, 0x3f, 0xd2, 0x23, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0xd8, 0xf4, 0x43, 0x3d, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10,
-  0x04, 0x03, 0x45, 0x44, 0xd2, 0x43, 0x35, 0x82, 0xfa, 0xd0, 0x8d, 0xf9,
-  0x18, 0x4d, 0x08, 0x80, 0xe1, 0x86, 0xe0, 0x3e, 0xc2, 0xe0, 0x02, 0xa7,
-  0x66, 0x09, 0xe4, 0x60, 0xb8, 0x01, 0x0f, 0xf6, 0x03, 0x0c, 0x66, 0x19,
-  0xc2, 0x40, 0x0e, 0x02, 0xeb, 0x8b, 0xbf, 0x88, 0xcf, 0x70, 0x44, 0x1f,
-  0x80, 0x06, 0xf1, 0xcd, 0x32, 0x88, 0x41, 0x19, 0x04, 0x16, 0x1a, 0x7e,
-  0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4e, 0x59, 0x60,
-  0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x94, 0x88, 0x0e, 0x37, 0x04, 0x23,
-  0x02, 0x06, 0xb3, 0x0c, 0x63, 0x40, 0x06, 0x81, 0x0d, 0xa9, 0x01, 0x9f,
-  0x59, 0x82, 0x34, 0x30, 0xd4, 0x20, 0xe2, 0x33, 0x4b, 0x90, 0x06, 0xc3,
-  0x11, 0xa8, 0x90, 0x1a, 0xc2, 0x37, 0xcb, 0x60, 0x06, 0x69, 0x10, 0x58,
-  0x2a, 0xa8, 0x46, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38,
-  0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x01, 0x23, 0x3a, 0xdc,
-  0x10, 0xb8, 0x08, 0x18, 0xcc, 0x32, 0x9c, 0x01, 0x1a, 0x04, 0x26, 0x1b,
-  0x43, 0x7c, 0x66, 0x09, 0xd2, 0xc0, 0x88, 0xda, 0x80, 0xcf, 0x2c, 0x41,
-  0x1a, 0x0c, 0xb4, 0x38, 0xda, 0x18, 0x60, 0x64, 0x40, 0x9c, 0x81, 0x80,
-  0x06, 0x66, 0x51, 0x06, 0x17, 0x0c, 0x63, 0xb4, 0x81, 0x1b, 0xf1, 0x19,
-  0x8e, 0xa8, 0x85, 0xdc, 0x20, 0xbe, 0x59, 0x06, 0x35, 0x68, 0x83, 0xc0,
-  0x74, 0xc3, 0x16, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0,
-  0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x1f, 0xd1, 0xe1,
-  0x86, 0x80, 0x47, 0xc0, 0x60, 0x96, 0x61, 0x0d, 0xd8, 0x20, 0xb0, 0x41,
-  0x3c, 0xe0, 0x33, 0x4b, 0x10, 0x07, 0xf6, 0x1b, 0x44, 0x7c, 0x66, 0x09,
-  0xe2, 0x60, 0x38, 0x02, 0x1c, 0xc0, 0x43, 0xf8, 0x66, 0x19, 0xdc, 0x20,
-  0x0e, 0x02, 0x0b, 0x87, 0xf0, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18,
-  0xe6, 0x02, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xd2,
-  0x44, 0x87, 0x1b, 0x82, 0x33, 0x01, 0x83, 0x59, 0x86, 0x37, 0x80, 0x83,
-  0xc0, 0xd2, 0x63, 0x88, 0xcf, 0x2c, 0x41, 0x1c, 0x18, 0xe1, 0x1e, 0xf0,
-  0x99, 0x25, 0x88, 0x83, 0x81, 0x16, 0x47, 0x5b, 0x03, 0x8c, 0x0d, 0x88,
-  0x37, 0x10, 0xe0, 0xc0, 0x36, 0xda, 0xe0, 0x82, 0x61, 0x2e, 0x70, 0xea,
-  0x36, 0xa7, 0x4e, 0x37, 0x86, 0xb9, 0x77, 0x18, 0xe6, 0x88, 0x61, 0x8e,
-  0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc, 0x3b, 0x39, 0x13,
-  0x13, 0xa1, 0x93, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84,
-  0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0xcd, 0x4f, 0xdc, 0x24, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0xd8, 0xfe, 0xe4, 0x4d, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x83, 0x0d, 0x54, 0xe0, 0x24, 0x21, 0x82, 0x11, 0x03, 0x05,
-  0x00, 0x41, 0x30, 0x50, 0x50, 0xe5, 0x4d, 0x60, 0x24, 0xd8, 0x13, 0x30,
-  0xc9, 0x93, 0xd1, 0x84, 0x00, 0x18, 0x6e, 0x08, 0xfa, 0x24, 0x0c, 0x2e,
-  0x70, 0x6a, 0x96, 0x40, 0x0e, 0x06, 0x5a, 0x1c, 0xd5, 0xe8, 0x48, 0x89,
-  0x73, 0x89, 0x4f, 0x88, 0x03, 0x52, 0x02, 0x83, 0xe1, 0x88, 0x60, 0x4c,
-  0x9c, 0xef, 0x82, 0x21, 0x46, 0x0c, 0x1c, 0x00, 0x04, 0xc1, 0x80, 0x71,
-  0x15, 0x36, 0xc9, 0x11, 0x19, 0x11, 0x95, 0x20, 0x4e, 0xe2, 0x24, 0x4e,
-  0xce, 0x04, 0x54, 0x66, 0x09, 0x46, 0x68, 0xb8, 0xe1, 0x34, 0x44, 0x05,
-  0x0c, 0x66, 0x19, 0xe8, 0x20, 0x26, 0x82, 0x11, 0x03, 0x03, 0x00, 0x41,
-  0x30, 0x78, 0x5c, 0xe5, 0x4d, 0x4a, 0x62, 0xc4, 0xc0, 0x00, 0x40, 0x10,
-  0x0c, 0x9e, 0x57, 0x81, 0x93, 0x92, 0x30, 0xe1, 0x4c, 0xe0, 0x63, 0x02,
-  0x9a, 0xc0, 0x67, 0x34, 0x21, 0x47, 0x86, 0xe1, 0x86, 0x00, 0x55, 0xc0,
-  0x60, 0x96, 0xa1, 0x0e, 0xee, 0x20, 0x18, 0x8e, 0x30, 0xd8, 0x64, 0xf8,
-  0xee, 0x18, 0x66, 0xb8, 0x21, 0xb8, 0x11, 0x32, 0xa8, 0x21, 0xd0, 0xe1,
-  0x88, 0x04, 0x4e, 0x86, 0xaf, 0x02, 0x41, 0x6f, 0x19, 0x66, 0xb8, 0x21,
-  0xd0, 0x11, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x06, 0x3b, 0x58, 0x85, 0xe0,
-  0xfc, 0x63, 0x98, 0x9b, 0x89, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
-  0xc0, 0xcb, 0x95, 0x54, 0x41, 0x13, 0x5b, 0x19, 0x4d, 0x08, 0x80, 0xd1,
-  0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90,
-  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xc0, 0x05, 0x56, 0x0e, 0x22,
-  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x2d, 0x5c, 0x62, 0x85, 0x21,
-  0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xc4, 0x45, 0x56, 0x24,
-  0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x03, 0x45, 0x5d, 0x62, 0x45,
-  0x4e, 0x82, 0x5e, 0x11, 0x95, 0x5d, 0x19, 0x4d, 0x08, 0x80, 0xe1, 0x86,
-  0xe0, 0x57, 0xc2, 0xe0, 0x02, 0xa7, 0x66, 0x09, 0x56, 0x61, 0xb8, 0x41,
-  0x23, 0x17, 0x30, 0x98, 0x65, 0xc0, 0x83, 0x3c, 0x08, 0xca, 0x4d, 0x6a,
-  0x05, 0x2e, 0x70, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa6, 0x76,
-  0xb1, 0x95, 0x30, 0x38, 0x95, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98,
-  0xdc, 0xc5, 0x56, 0x02, 0xe1, 0x82, 0x61, 0x2a, 0x4e, 0x74, 0x05, 0x2e,
-  0x70, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x26, 0x79, 0xd9, 0x95,
-  0x32, 0x60, 0x95, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0xe6, 0x65,
-  0x57, 0x02, 0xe1, 0x82, 0x61, 0x2e, 0x70, 0xea, 0x0e, 0xa7, 0x4e, 0x47,
-  0x86, 0xb9, 0xb7, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0,
-  0x00, 0x40, 0x10, 0x0c, 0xbc, 0x7b, 0x39, 0x17, 0x53, 0xa1, 0x97, 0xd1,
-  0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20,
-  0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xcd, 0x5f,
-  0xdc, 0x25, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xfe,
-  0xe5, 0x5d, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x0d,
-  0x64, 0xe0, 0x25, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x50,
-  0x50, 0xe6, 0x5d, 0x60, 0x25, 0xd8, 0x17, 0x70, 0xc9, 0x97, 0xd1, 0x84,
-  0x00, 0x18, 0x6e, 0x08, 0xfa, 0x25, 0x0c, 0x2e, 0x70, 0x6a, 0x96, 0x60,
-  0x15, 0x86, 0x1b, 0xf0, 0x20, 0x64, 0xc0, 0x60, 0x96, 0x41, 0x0f, 0x56,
-  0x21, 0xb0, 0x51, 0x29, 0x95, 0xf8, 0x0c, 0x47, 0xf8, 0x81, 0xa9, 0x10,
-  0xdf, 0x2c, 0xc3, 0x1e, 0xf8, 0x41, 0x60, 0xa7, 0xf2, 0x07, 0xf1, 0xb1,
-  0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94, 0x05, 0x86, 0x7c, 0xac,
-  0x08, 0xe2, 0x53, 0xc4, 0xca, 0xe8, 0x70, 0x43, 0x90, 0x32, 0x60, 0x30,
-  0xcb, 0xc0, 0x07, 0x7d, 0x10, 0xd8, 0xf0, 0x2a, 0xf0, 0x99, 0x25, 0x10,
-  0x05, 0x73, 0x15, 0x22, 0x3e, 0xb3, 0x04, 0xa2, 0x30, 0x1c, 0x91, 0x0a,
-  0xaf, 0x22, 0x7c, 0xb3, 0x0c, 0x7f, 0x20, 0x0a, 0x81, 0xa9, 0x02, 0xac,
-  0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x16, 0x44,
-  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x36, 0xa3, 0xc3, 0x0d, 0x01, 0xcd,
-  0x80, 0xc1, 0x2c, 0x03, 0x28, 0x84, 0x42, 0x60, 0xb8, 0x32, 0xc4, 0x67,
-  0x96, 0x40, 0x14, 0x8c, 0xd8, 0x15, 0xf8, 0xcc, 0x12, 0x88, 0xc2, 0x40,
-  0x8b, 0xa3, 0xf1, 0x01, 0xd6, 0x07, 0x04, 0x28, 0x08, 0xa1, 0x80, 0x16,
-  0x7e, 0x70, 0xc1, 0x30, 0xa6, 0x2b, 0xbe, 0x12, 0x9f, 0xe1, 0x08, 0x5b,
-  0xf8, 0x15, 0xe2, 0x9b, 0x65, 0x18, 0x05, 0x53, 0x08, 0x0c, 0x5c, 0x6e,
-  0x21, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2, 0xc0,
-  0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x20, 0x1b, 0x1d, 0x6e, 0x08, 0xc4,
-  0x06, 0x0c, 0x66, 0x19, 0x48, 0xa1, 0x14, 0x02, 0x1b, 0xd0, 0x05, 0x3e,
-  0xb3, 0x04, 0xaa, 0x60, 0xe5, 0x42, 0xc4, 0x67, 0x96, 0x40, 0x15, 0x86,
-  0x23, 0xc2, 0xc1, 0x5c, 0x84, 0x6f, 0x96, 0xe1, 0x14, 0x54, 0x21, 0x30,
-  0x71, 0x38, 0x97, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70,
-  0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xe2, 0x6d, 0x74, 0xb8,
-  0x21, 0x68, 0x1b, 0x30, 0x98, 0x65, 0x40, 0x85, 0x54, 0x08, 0xec, 0x5d,
-  0x86, 0xf8, 0xcc, 0x12, 0xa8, 0x82, 0x11, 0xf4, 0x02, 0x9f, 0x59, 0x02,
-  0x55, 0x18, 0x68, 0x71, 0x34, 0x52, 0xc0, 0x4a, 0x81, 0x40, 0x05, 0x21,
-  0x15, 0x70, 0xc3, 0x14, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x6e, 0x73, 0xea,
-  0xc0, 0x65, 0x98, 0xab, 0x8f, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xc0, 0xeb, 0x9b, 0xb6, 0x61, 0x19, 0xbd,
-  0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1,
-  0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8,
-  0x48, 0x87, 0x6e, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
-  0xad, 0x74, 0xea, 0x26, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0xd8, 0x4c, 0xc7, 0x6e, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04,
-  0x03, 0xc5, 0x75, 0xea, 0xc6, 0x66, 0x82, 0xd0, 0x31, 0x9b, 0xbf, 0x19,
-  0x4d, 0x08, 0x80, 0xe1, 0x86, 0x60, 0x74, 0xc2, 0xe0, 0x02, 0xa7, 0x66,
-  0x09, 0x56, 0x61, 0xa0, 0xc5, 0x51, 0x0d, 0x3b, 0x80, 0xb5, 0x3a, 0x70,
-  0x09, 0x3c, 0x10, 0x54, 0x01, 0xd6, 0xf2, 0x60, 0x96, 0x81, 0x15, 0x5c,
-  0xc1, 0x1f, 0x86, 0x23, 0x42, 0x22, 0x6d, 0x86, 0xef, 0x44, 0x62, 0x98,
-  0xe1, 0x86, 0x80, 0x66, 0xc8, 0xa0, 0x86, 0x40, 0x87, 0x23, 0x4c, 0xa2,
-  0x6d, 0x86, 0xaf, 0x02, 0x41, 0x0f, 0x25, 0x86, 0x19, 0x6e, 0x08, 0x6e,
-  0x86, 0x0c, 0x2a, 0x18, 0x74, 0x96, 0xa1, 0x15, 0xc4, 0x21, 0xb8, 0x7d,
-  0x19, 0xe6, 0x60, 0x64, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf0,
-  0x6c, 0xc7, 0x74, 0xca, 0x66, 0x76, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41,
-  0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x38, 0x64, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0xde, 0x69, 0x9d, 0x83, 0x08, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0xf3, 0x1d, 0xd7, 0x61, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0xdf, 0x79, 0x1d, 0x89, 0x08,
-  0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x40, 0x39, 0x1f, 0xd7, 0x79, 0x9b,
-  0x40, 0x77, 0xfe, 0x06, 0x77, 0x46, 0x13, 0x02, 0x60, 0xb8, 0x21, 0xe0,
-  0x9d, 0x30, 0xb8, 0xc0, 0xa9, 0x59, 0x02, 0x71, 0x18, 0x6e, 0xb8, 0x89,
-  0xf0, 0x01, 0x83, 0x59, 0x86, 0x57, 0x80, 0x85, 0xa0, 0xd6, 0x46, 0x76,
-  0xe0, 0x02, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x52, 0x9f,
-  0xd9, 0xe1, 0x09, 0xd2, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x69,
-  0x7d, 0x66, 0x27, 0x10, 0x2e, 0x18, 0xa6, 0xdc, 0xe6, 0x76, 0xe0, 0x02,
-  0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x7a, 0x1f, 0xdc, 0x11,
-  0x8b, 0xd4, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x09, 0x7e, 0x70,
-  0x27, 0x10, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0xee, 0x70, 0xea, 0x6e, 0x66,
-  0x98, 0x63, 0x93, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0xc0, 0xa3, 0x1f, 0xf2, 0x19, 0x9d, 0xf8, 0x19, 0x4d,
-  0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62,
-  0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xf6, 0x67,
-  0x7d, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x8d, 0x7f,
-  0xd8, 0x27, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xfa,
-  0xa7, 0x7d, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x03, 0xa5,
-  0x84, 0xd8, 0xa7, 0x75, 0x02, 0xfc, 0xe9, 0x1d, 0xfb, 0x19, 0x4d, 0x08,
-  0x80, 0xe1, 0x86, 0x40, 0x7f, 0xc2, 0xe0, 0x02, 0xa7, 0x66, 0x09, 0xc4,
-  0x61, 0xb8, 0xa1, 0x2e, 0xfc, 0x07, 0x0c, 0x66, 0x19, 0x62, 0x41, 0x1c,
-  0x02, 0x03, 0x1d, 0xd1, 0x89, 0xcf, 0x70, 0x44, 0x5e, 0x8c, 0x0e, 0xf1,
-  0xcd, 0x32, 0xc8, 0x42, 0x2d, 0x04, 0x46, 0x3a, 0x7a, 0x11, 0x1f, 0x0b,
-  0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4e, 0x59, 0x60, 0xc8, 0xc7, 0x8a,
-  0x20, 0x3e, 0x45, 0xa0, 0x90, 0x0e, 0x37, 0x04, 0x26, 0x04, 0x06, 0xb3,
-  0x0c, 0xb3, 0x40, 0x0b, 0x81, 0x0d, 0xac, 0x03, 0x9f, 0x59, 0x82, 0x5c,
-  0xb0, 0xd5, 0x21, 0xe2, 0x33, 0x4b, 0x90, 0x0b, 0xc3, 0x11, 0xa4, 0xc1,
-  0x3a, 0xc2, 0x37, 0xcb, 0x60, 0x0b, 0xb9, 0x10, 0x58, 0x69, 0xb4, 0x4e,
-  0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x41, 0x24,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x31, 0x43, 0x3a, 0xdc, 0x10, 0xc4, 0x10,
-  0x18, 0xcc, 0x32, 0xdc, 0x02, 0x2e, 0x04, 0x56, 0x3b, 0x43, 0x7c, 0x66,
-  0x09, 0x72, 0xc1, 0x08, 0xdc, 0x81, 0xcf, 0x2c, 0x41, 0x2e, 0x0c, 0xb4,
-  0x38, 0xda, 0x2c, 0x60, 0xb4, 0x40, 0xdc, 0x82, 0x80, 0x0b, 0x3e, 0x53,
-  0x0b, 0x17, 0x0c, 0x63, 0xb7, 0xb3, 0x3b, 0xf1, 0x19, 0x8e, 0x98, 0x0d,
-  0xde, 0x21, 0xbe, 0x59, 0x06, 0x5d, 0xe8, 0x85, 0xc0, 0x7a, 0x87, 0x36,
-  0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b, 0x0c,
-  0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x30, 0xd2, 0xe1, 0x86, 0xe0, 0x87,
-  0xc0, 0x60, 0x96, 0x61, 0x17, 0x78, 0x21, 0xb0, 0xa1, 0x7c, 0xe0, 0x33,
-  0x4b, 0x10, 0x0e, 0x26, 0x3e, 0x44, 0x7c, 0x66, 0x09, 0xc2, 0x61, 0x38,
-  0xc2, 0x37, 0xc6, 0x47, 0xf8, 0x66, 0x19, 0x7c, 0x21, 0x1c, 0x02, 0xfb,
-  0x0d, 0xf2, 0x89, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa7,
-  0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xd8, 0x48, 0x87, 0x1b,
-  0x02, 0x35, 0x02, 0x83, 0x59, 0x86, 0x5f, 0x00, 0x87, 0xc0, 0xd8, 0x67,
-  0x88, 0xcf, 0x2c, 0x41, 0x38, 0x18, 0x11, 0x3f, 0xf0, 0x99, 0x25, 0x08,
-  0x87, 0x81, 0x16, 0x47, 0xdb, 0x05, 0x8c, 0x17, 0x88, 0x5f, 0x10, 0xc0,
-  0x41, 0x76, 0x7a, 0xe1, 0x82, 0x61, 0x2e, 0x70, 0xea, 0x36, 0xa7, 0xae,
-  0x77, 0x86, 0x39, 0x79, 0x19, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4,
-  0xe0, 0x00, 0x40, 0x10, 0x0c, 0x3c, 0x3d, 0x52, 0xa3, 0x14, 0xba, 0xa3,
-  0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d,
-  0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x2d,
-  0x94, 0xe2, 0x28, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8,
-  0x44, 0x49, 0x8e, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
-  0x6d, 0x94, 0xe6, 0x28, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30,
-  0x50, 0x56, 0x49, 0x8e, 0x66, 0x28, 0xf0, 0xa3, 0x31, 0xe2, 0xa3, 0xd1,
-  0x84, 0x00, 0x18, 0x6e, 0x08, 0x40, 0x29, 0x0c, 0x2e, 0x70, 0x6a, 0x96,
-  0x40, 0x1c, 0x06, 0x5a, 0x1c, 0xd5, 0x68, 0x05, 0x55, 0x0c, 0x58, 0xc1,
-  0x25, 0x5e, 0x41, 0x08, 0x07, 0x55, 0x0c, 0x60, 0x61, 0x96, 0x61, 0x1c,
-  0xca, 0x61, 0x3f, 0x86, 0x23, 0x40, 0xc4, 0x8c, 0x86, 0xef, 0x42, 0x64,
-  0x98, 0xe1, 0x86, 0x20, 0x86, 0xc8, 0xa0, 0x86, 0x40, 0x87, 0x23, 0x42,
-  0x44, 0x8d, 0x86, 0xaf, 0x02, 0x41, 0x6f, 0x44, 0x86, 0x19, 0x6e, 0x08,
-  0x68, 0x88, 0x0c, 0x2a, 0x18, 0x74, 0x96, 0x81, 0x1c, 0xf2, 0x21, 0x38,
-  0xfc, 0x19, 0xe6, 0x5a, 0x66, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30,
-  0xf0, 0x66, 0x69, 0x94, 0xc4, 0x08, 0x96, 0x46, 0x13, 0x02, 0x60, 0x34,
-  0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x38, 0x64,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x36, 0x5d, 0x52, 0xa5, 0x83, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0xdb, 0xa5, 0x55, 0x62, 0x88,
-  0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x36, 0x5e, 0x62, 0x25, 0x89,
-  0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x40, 0x21, 0xa7, 0x55, 0x62,
-  0xa3, 0xe0, 0x96, 0xf8, 0xa8, 0x96, 0x46, 0x13, 0x02, 0x60, 0xb8, 0x21,
-  0xc8, 0xa5, 0x30, 0xb8, 0xc0, 0xa9, 0x59, 0x82, 0x7c, 0x18, 0x6e, 0xa0,
-  0x11, 0x5f, 0x02, 0x83, 0x59, 0x06, 0x73, 0x38, 0x87, 0xa0, 0xd0, 0xe8,
-  0x95, 0xe0, 0x02, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x3a,
-  0x27, 0x58, 0xda, 0x91, 0x50, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83,
-  0x09, 0x9d, 0x60, 0x29, 0x10, 0x2e, 0x18, 0xa6, 0xd6, 0x88, 0x96, 0xe0,
-  0x02, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x62, 0xa7, 0x5a,
-  0xea, 0x11, 0x53, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0xa9, 0x9d,
-  0x6a, 0x29, 0x10, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0xee, 0x70, 0xea, 0x68,
-  0x68, 0x98, 0x4b, 0x9b, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c,
-  0x0e, 0x00, 0x04, 0xc1, 0xc0, 0x8b, 0xa7, 0x70, 0x02, 0x25, 0x77, 0x1a,
-  0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04,
-  0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xf0,
-  0x09, 0x9d, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x2d,
-  0x9f, 0xd2, 0x29, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8,
-  0xf4, 0x49, 0x9d, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x03,
-  0x45, 0xa4, 0xd2, 0x49, 0x95, 0x82, 0x7a, 0xd2, 0xa5, 0x79, 0x1a, 0x4d,
-  0x08, 0x80, 0xe1, 0x86, 0xe0, 0x9e, 0xc2, 0xe0, 0x02, 0xa7, 0x66, 0x09,
-  0xf2, 0x61, 0xb8, 0x41, 0x4e, 0xf6, 0x09, 0x0c, 0x66, 0x19, 0xd0, 0x21,
-  0x1f, 0x02, 0xeb, 0xa3, 0x3f, 0x8a, 0xcf, 0x70, 0x04, 0x9e, 0x80, 0x12,
-  0xf1, 0xcd, 0x32, 0xa4, 0x03, 0x3b, 0x04, 0x16, 0x4a, 0x79, 0x12, 0x1f,
-  0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4e, 0x59, 0x60, 0xc8, 0xc7,
-  0x8a, 0x20, 0x3e, 0x45, 0x94, 0x94, 0x0e, 0x37, 0x04, 0x23, 0x05, 0x06,
-  0xb3, 0x0c, 0xea, 0xb0, 0x0e, 0x81, 0x0d, 0xa9, 0x04, 0x9f, 0x59, 0x02,
-  0x78, 0x30, 0x54, 0x22, 0xe2, 0x33, 0x4b, 0x00, 0x0f, 0xc3, 0x11, 0xa3,
-  0x92, 0x4a, 0xc2, 0x37, 0xcb, 0xd0, 0x0e, 0xf0, 0x10, 0x18, 0xa9, 0xa8,
-  0x52, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x41,
-  0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x01, 0x53, 0x3a, 0xdc, 0x10, 0xb8,
-  0x14, 0x18, 0xcc, 0x32, 0xb8, 0xc3, 0x3b, 0x04, 0x26, 0x4b, 0x43, 0x7c,
-  0x66, 0x09, 0xe0, 0xc1, 0x88, 0x5a, 0x82, 0xcf, 0x2c, 0x01, 0x3c, 0x0c,
-  0xb4, 0x38, 0x9a, 0x3a, 0x60, 0xeb, 0x40, 0xb8, 0x83, 0xf0, 0x0e, 0x3c,
-  0xc5, 0x0e, 0x17, 0x0c, 0x63, 0xb4, 0x84, 0x4b, 0xf1, 0x19, 0x8e, 0x70,
-  0x95, 0x5c, 0x22, 0xbe, 0x59, 0x86, 0x78, 0xa0, 0x87, 0xc0, 0x74, 0xe9,
-  0x55, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b,
-  0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x9f, 0xd2, 0xe1, 0x86, 0x80,
-  0xa7, 0xc0, 0x60, 0x96, 0x41, 0x1e, 0xe6, 0x21, 0xb0, 0x41, 0x9c, 0xe0,
-  0x33, 0x4b, 0x80, 0x0f, 0xf6, 0x4b, 0x44, 0x7c, 0x66, 0x09, 0xf0, 0x61,
-  0x38, 0x22, 0x57, 0xc0, 0x49, 0xf8, 0x66, 0x19, 0xea, 0x01, 0x1f, 0x02,
-  0xd3, 0x95, 0x70, 0x8a, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02,
-  0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xd2, 0x4a, 0x87,
-  0x1b, 0x82, 0xb3, 0x02, 0x83, 0x59, 0x06, 0x7b, 0xb8, 0x87, 0xc0, 0xd2,
-  0x69, 0x88, 0xcf, 0x2c, 0x01, 0x3e, 0x18, 0xe1, 0x4e, 0xf0, 0x99, 0x25,
-  0xc0, 0x87, 0x81, 0x16, 0x47, 0x93, 0x07, 0x6c, 0x1e, 0x08, 0x7b, 0x10,
-  0xee, 0x01, 0xb5, 0xe8, 0xe1, 0x82, 0x61, 0x2e, 0x70, 0xea, 0x36, 0xa7,
-  0x4e, 0x97, 0x86, 0xb9, 0xf7, 0x19, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc, 0xbb, 0x3a, 0x2b, 0x93, 0xa2,
-  0xab, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18,
-  0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
-  0xcd, 0xaf, 0xdc, 0x2a, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0xd8, 0xfe, 0xea, 0xad, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x83, 0x0d, 0xb4, 0xe0, 0x2a, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41,
-  0x30, 0x50, 0x50, 0xeb, 0xad, 0x60, 0x2a, 0xd8, 0x2b, 0xb0, 0xca, 0xab,
-  0xd1, 0x84, 0x00, 0x18, 0x6e, 0x08, 0xfa, 0x2a, 0x0c, 0x2e, 0x70, 0x6a,
-  0x96, 0x20, 0x1f, 0x06, 0x5a, 0x1c, 0xd5, 0x20, 0x07, 0x52, 0x0d, 0xc6,
-  0xc1, 0x25, 0xcc, 0x41, 0xc0, 0x07, 0x52, 0x0d, 0xce, 0x61, 0x96, 0x41,
-  0x1f, 0xf8, 0x01, 0x5f, 0x86, 0x23, 0xf6, 0x65, 0xac, 0x86, 0xef, 0xf8,
-  0x65, 0x98, 0xe1, 0x86, 0xc0, 0xa5, 0xc8, 0xa0, 0x86, 0x40, 0x87, 0x23,
-  0xfc, 0xe5, 0xac, 0x86, 0xaf, 0x02, 0x41, 0x0f, 0x64, 0x86, 0x19, 0x6e,
-  0x08, 0x62, 0x8a, 0x0c, 0x2a, 0x18, 0x74, 0x96, 0x61, 0x1f, 0x60, 0x22,
-  0xb8, 0x7a, 0x1a, 0xe6, 0x54, 0x68, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41,
-  0x30, 0xf0, 0x60, 0x0b, 0xb4, 0x7e, 0xaa, 0xb5, 0x46, 0x13, 0x02, 0x60,
-  0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x38,
-  0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0xdb, 0x3a, 0xad, 0x83,
-  0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0xc3, 0x2d, 0xd4, 0x62,
-  0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0xdc, 0x4a, 0x2d,
-  0x89, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x40, 0x09, 0x2f, 0xd4,
-  0x4a, 0xab, 0x80, 0xb6, 0xf2, 0x4a, 0xb6, 0x46, 0x13, 0x02, 0x60, 0xb8,
-  0x21, 0xb0, 0xad, 0x30, 0xb8, 0xc0, 0xa9, 0x59, 0x02, 0x98, 0x18, 0x6e,
-  0x88, 0x99, 0xdd, 0x02, 0x83, 0x59, 0x86, 0x7e, 0xf0, 0x87, 0xa0, 0xca,
-  0x8a, 0xb5, 0xe0, 0x02, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60,
-  0x22, 0xaf, 0xd6, 0xb2, 0x19, 0xbf, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x83, 0xa9, 0xbc, 0x5a, 0x2b, 0x10, 0x2e, 0x18, 0xa6, 0xd0, 0x2a, 0xb6,
-  0xe0, 0x02, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x4a, 0x2f,
-  0xd9, 0xd2, 0x99, 0xd1, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x49,
-  0xbd, 0x64, 0x2b, 0x10, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0xee, 0x70, 0xea,
-  0x62, 0x6a, 0x98, 0x33, 0xa3, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xc0, 0x73, 0x2f, 0xdf, 0xea, 0xab, 0xf5,
-  0x1a, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1,
-  0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8,
-  0xea, 0xab, 0xbc, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
-  0xcd, 0xbe, 0xcc, 0x2b, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0xd8, 0xee, 0xeb, 0xbc, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04,
-  0x03, 0xe5, 0xbf, 0xcc, 0xeb, 0xb4, 0x02, 0xf9, 0xba, 0x2d, 0xf8, 0x1a,
-  0x4d, 0x08, 0x80, 0xe1, 0x86, 0x80, 0xbe, 0xc2, 0xe0, 0x02, 0xa7, 0x66,
-  0x09, 0x60, 0x62, 0xb8, 0xe1, 0x6d, 0xf0, 0x0b, 0x0c, 0x66, 0x19, 0xfe,
-  0x01, 0x26, 0x02, 0xd3, 0x2b, 0xbe, 0x8a, 0xcf, 0x70, 0xc4, 0xdc, 0xf4,
-  0x15, 0xf1, 0xcd, 0x32, 0x80, 0xc4, 0x48, 0x04, 0xe6, 0x57, 0x74, 0x13,
-  0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4e, 0x59, 0x60, 0xc8,
-  0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x88, 0x98, 0x0e, 0x37, 0x04, 0x20, 0x06,
-  0x06, 0xb3, 0x0c, 0x21, 0x21, 0x12, 0x81, 0x0d, 0xa6, 0x05, 0x9f, 0x59,
-  0x82, 0x93, 0xb0, 0xd2, 0x22, 0xe2, 0x33, 0x4b, 0x70, 0x12, 0xc3, 0x11,
-  0x7e, 0x63, 0x5a, 0xc2, 0x37, 0xcb, 0x40, 0x12, 0x27, 0x11, 0xd8, 0xdf,
-  0x9c, 0x56, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x65,
-  0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xd1, 0x62, 0x3a, 0xdc, 0x10,
-  0xac, 0x18, 0x18, 0xcc, 0x32, 0x94, 0x84, 0x49, 0x04, 0xf6, 0x5a, 0x43,
-  0x7c, 0x66, 0x09, 0x4e, 0xc2, 0x08, 0xd9, 0x82, 0xcf, 0x2c, 0xc1, 0x49,
-  0x0c, 0xb4, 0x38, 0x5a, 0x48, 0x60, 0x22, 0x41, 0x94, 0x84, 0x60, 0x12,
-  0x32, 0x37, 0x12, 0x17, 0x0c, 0x63, 0xb1, 0x55, 0x5b, 0xf1, 0x19, 0x8e,
-  0x58, 0x1d, 0xdb, 0x22, 0xbe, 0x59, 0x06, 0x94, 0x58, 0x89, 0xc0, 0x6e,
-  0x8b, 0x75, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x29,
-  0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x1d, 0xd3, 0xe1, 0x86,
-  0x20, 0xc7, 0xc0, 0x60, 0x96, 0x21, 0x25, 0x54, 0x22, 0xb0, 0xe1, 0xb7,
-  0xe0, 0x33, 0x4b, 0xf0, 0x12, 0xc6, 0x5b, 0x44, 0x7c, 0x66, 0x09, 0x5e,
-  0x62, 0x38, 0xc2, 0x76, 0x7a, 0x4b, 0xf8, 0x66, 0x19, 0x58, 0xe2, 0x25,
-  0x02, 0xbb, 0x1d, 0xdf, 0x8a, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6,
-  0x02, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xcc, 0x4c,
-  0x87, 0x1b, 0x02, 0x32, 0x03, 0x83, 0x59, 0x86, 0x96, 0x70, 0x89, 0xc0,
-  0xcc, 0x6b, 0x88, 0xcf, 0x2c, 0xc1, 0x4b, 0x18, 0xb1, 0x5e, 0xf0, 0x99,
-  0x25, 0x78, 0x89, 0x81, 0x16, 0x47, 0x4b, 0x09, 0x4c, 0x25, 0x88, 0x96,
-  0x10, 0x5c, 0x42, 0xf4, 0x56, 0xe2, 0x82, 0x61, 0x2e, 0x70, 0xea, 0x36,
-  0xa7, 0xee, 0xb6, 0x86, 0x39, 0x76, 0x1a, 0xe6, 0x88, 0x61, 0x8e, 0x18,
-  0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x3c, 0x3a, 0x23, 0xb3, 0x11,
-  0x8b, 0xb3, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41,
-  0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x83, 0x6d, 0xcf, 0xd6, 0x2c, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0xd8, 0xf8, 0x8c, 0xcd, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0xad, 0xcf, 0xda, 0x2c, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00,
-  0x41, 0x30, 0x50, 0x4a, 0x8d, 0xcd, 0x5a, 0x2c, 0xc0, 0xb3, 0x1e, 0xb3,
-  0xb3, 0xd1, 0x84, 0x00, 0x18, 0x6e, 0x08, 0xf4, 0x2c, 0x0c, 0x2e, 0x70,
-  0x6a, 0x96, 0x00, 0x26, 0x06, 0x5a, 0x1c, 0xd5, 0xd8, 0x07, 0x3f, 0x0e,
-  0xf4, 0xc1, 0x25, 0xfa, 0x41, 0x78, 0x09, 0x3f, 0x0e, 0xfc, 0x61, 0xc4,
-  0xc0, 0x00, 0x40, 0x10, 0x0c, 0x1e, 0x54, 0x43, 0xb3, 0x7f, 0x32, 0xfe,
-  0xc8, 0x97, 0xf8, 0x98, 0x10, 0xc8, 0xc7, 0x82, 0x7d, 0x81, 0x8f, 0x15,
-  0x22, 0x11, 0x1f, 0x2b, 0x02, 0xf9, 0x58, 0x40, 0x12, 0xf0, 0x19, 0x31,
-  0x30, 0x00, 0x10, 0x04, 0x83, 0xe7, 0xd5, 0xde, 0xac, 0xa4, 0x4c, 0x28,
-  0xe2, 0x63, 0x81, 0x20, 0x1f, 0x0b, 0x0e, 0xf8, 0x0c, 0x47, 0x04, 0x6b,
-  0xe6, 0x7c, 0x17, 0x0c, 0x31, 0x62, 0xe0, 0x00, 0x20, 0x08, 0x06, 0x8c,
-  0xad, 0xd1, 0x59, 0x98, 0xe9, 0x98, 0xaa, 0x05, 0x79, 0x96, 0x67, 0x79,
-  0xf6, 0x66, 0xa8, 0x36, 0x4b, 0x30, 0x42, 0xc3, 0x0d, 0xaf, 0xc5, 0x6a,
-  0x60, 0x30, 0xcb, 0x20, 0x13, 0x23, 0x14, 0x8c, 0x18, 0x18, 0x00, 0x08,
-  0x82, 0xc1, 0x63, 0x6b, 0x77, 0xd6, 0x52, 0x16, 0xa8, 0x19, 0x7c, 0x46,
-  0x0c, 0x0c, 0x00, 0x04, 0xc1, 0xe0, 0xc1, 0xb5, 0x3c, 0x73, 0x29, 0x0b,
-  0xd8, 0x0c, 0x3e, 0xa3, 0x09, 0x61, 0x36, 0x0c, 0x37, 0x04, 0xb0, 0x06,
-  0x06, 0xb3, 0x0c, 0x33, 0x51, 0x13, 0xc1, 0x70, 0x44, 0x41, 0x67, 0xc3,
-  0x77, 0xc6, 0x30, 0xc3, 0x0d, 0xc1, 0x8f, 0x91, 0x41, 0x0d, 0x81, 0x0e,
-  0x47, 0x1c, 0x78, 0x36, 0x7c, 0x15, 0x08, 0x7a, 0xc9, 0x30, 0xc3, 0x0d,
-  0x81, 0x98, 0x91, 0x41, 0x05, 0x83, 0xce, 0x32, 0xd0, 0x44, 0x5a, 0x04,
-  0x67, 0x62, 0xc3, 0xdc, 0x4e, 0x0d, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x5e, 0xb8, 0xc5, 0x1a, 0x9c, 0xf9, 0xda, 0x68, 0x42, 0x00, 0x8c,
-  0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x86, 0x6e, 0xb8, 0x76, 0x10,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xe9, 0x96, 0x6b, 0x0c,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xa6, 0x6e, 0xba, 0x26,
-  0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x28, 0xf2, 0x96, 0x6b,
-  0x7a, 0x16, 0x94, 0x9b, 0xaa, 0x8d, 0xdb, 0x68, 0x42, 0x00, 0x0c, 0x37,
-  0x04, 0xe7, 0x16, 0x06, 0x17, 0x38, 0x35, 0x4b, 0x90, 0x16, 0xc3, 0x0d,
-  0x1a, 0xbb, 0x81, 0xc1, 0x2c, 0x83, 0x4d, 0xdc, 0x44, 0x50, 0x76, 0xd6,
-  0x6b, 0x70, 0x81, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30, 0xd5,
-  0x9b, 0xaf, 0x81, 0xc1, 0xab, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1,
-  0x64, 0x6f, 0xbe, 0x16, 0x08, 0x17, 0x0c, 0x53, 0x79, 0x26, 0x6e, 0x70,
-  0x81, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30, 0xe9, 0xdb, 0xb8,
-  0x8d, 0x01, 0xad, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0xb4, 0x6f,
-  0xe3, 0x16, 0x08, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x77, 0x38, 0x75, 0x62,
-  0x36, 0xcc, 0xdd, 0xd5, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0xe0, 0xfd, 0xdb, 0xbb, 0xb9, 0x1a, 0xbf, 0x8d,
-  0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02,
-  0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x26,
-  0x67, 0x6f, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x76,
-  0x72, 0xf7, 0x96, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c,
-  0x28, 0x87, 0x6f, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0x81,
-  0x02, 0x73, 0xf7, 0x86, 0x6b, 0xc1, 0xc8, 0xa1, 0x5b, 0xc8, 0x8d, 0x26,
-  0x04, 0xc0, 0x70, 0x43, 0x50, 0x72, 0x61, 0x70, 0x81, 0x53, 0xb3, 0x04,
-  0x69, 0x31, 0xdc, 0x80, 0x07, 0x29, 0x07, 0x06, 0xb3, 0x0c, 0x38, 0x91,
-  0x16, 0x81, 0xad, 0x5a, 0xab, 0xc5, 0x67, 0x38, 0xa2, 0x0f, 0x5c, 0x8d,
-  0xf8, 0x66, 0x19, 0x72, 0x82, 0x27, 0x02, 0x7b, 0x35, 0x3f, 0x88, 0x8f,
-  0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x2c, 0x30, 0xe4, 0x63,
-  0x45, 0x10, 0x9f, 0x22, 0x66, 0x4e, 0x87, 0x1b, 0x82, 0x98, 0x03, 0x83,
-  0x59, 0x06, 0x9d, 0xd8, 0x89, 0xc0, 0x86, 0x5b, 0x83, 0xcf, 0x2c, 0x01,
-  0x58, 0x98, 0xad, 0x11, 0xf1, 0x99, 0x25, 0x00, 0x8b, 0xe1, 0x08, 0x54,
-  0xb8, 0x35, 0xe1, 0x9b, 0x65, 0xe8, 0x09, 0xb0, 0x08, 0x2c, 0x15, 0x70,
-  0x2d, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2, 0x20,
-  0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xf0, 0x39, 0x1d, 0x6e, 0x08, 0x78,
-  0x0e, 0x0c, 0x66, 0x19, 0x7c, 0xe2, 0x27, 0x02, 0x03, 0xb7, 0x21, 0x3e,
-  0xb3, 0x04, 0x60, 0x61, 0xc4, 0xb8, 0xc1, 0x67, 0x96, 0x00, 0x2c, 0x06,
-  0x5a, 0x1c, 0x4d, 0x27, 0xb0, 0x9d, 0x20, 0x7c, 0x42, 0xf8, 0x09, 0xb3,
-  0xe0, 0x89, 0x0b, 0x86, 0x31, 0x71, 0x33, 0xb7, 0xf8, 0x0c, 0x47, 0xd0,
-  0xc2, 0xb9, 0x11, 0xdf, 0x2c, 0x43, 0x58, 0x90, 0x45, 0x60, 0xe8, 0x56,
-  0x0b, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94, 0x05,
-  0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0xdb, 0xe9, 0x70, 0x43, 0xa0,
-  0x76, 0x60, 0x30, 0xcb, 0x20, 0x16, 0x63, 0x11, 0xd8, 0x00, 0x6f, 0xf0,
-  0x99, 0x25, 0x40, 0x0b, 0x6b, 0x37, 0x22, 0x3e, 0xb3, 0x04, 0x68, 0x31,
-  0x1c, 0xf1, 0x0b, 0xee, 0x26, 0x7c, 0xb3, 0x0c, 0x65, 0x81, 0x16, 0x81,
-  0x81, 0xc3, 0xbb, 0xc5, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0x81,
-  0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x77, 0xa7, 0xc3,
-  0x0d, 0x41, 0xdd, 0x81, 0xc1, 0x2c, 0x83, 0x59, 0x9c, 0x45, 0x60, 0xf7,
-  0x36, 0xc4, 0x67, 0x96, 0x00, 0x2d, 0x8c, 0xe0, 0x37, 0xf8, 0xcc, 0x12,
-  0xa0, 0xc5, 0x40, 0x8b, 0xa3, 0x89, 0x05, 0x36, 0x16, 0x84, 0x59, 0x08,
-  0x67, 0x41, 0x1b, 0x64, 0x71, 0xc1, 0x30, 0x17, 0x38, 0x75, 0x9b, 0x53,
-  0x87, 0x6e, 0xc3, 0x5c, 0x7f, 0x0d, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x5e, 0xe9, 0xd5, 0x1d, 0xcd, 0x89,
-  0xde, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c,
-  0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1,
-  0xc6, 0x7a, 0x7c, 0x97, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x6c, 0xad, 0xd7, 0x77, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0xc1, 0xe6, 0x7a, 0x7e, 0x97, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20,
-  0x18, 0x28, 0xb6, 0xd7, 0x77, 0x3e, 0x17, 0xa4, 0x9e, 0xdb, 0x9d, 0xde,
-  0x68, 0x42, 0x00, 0x0c, 0x37, 0x04, 0xab, 0x17, 0x06, 0x17, 0x38, 0x35,
-  0x4b, 0x90, 0x16, 0x03, 0x2d, 0x8e, 0x6a, 0xd0, 0x04, 0xb8, 0x07, 0x33,
-  0xe1, 0x12, 0x36, 0x21, 0xa0, 0x05, 0xb8, 0x07, 0x37, 0x31, 0xcb, 0xa0,
-  0x16, 0x6c, 0xe1, 0x0f, 0xc3, 0x11, 0x23, 0x11, 0x77, 0xc3, 0x77, 0x24,
-  0x31, 0xcc, 0x70, 0x43, 0xc0, 0x73, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11,
-  0x24, 0x51, 0x77, 0xc3, 0x57, 0x81, 0xa0, 0x67, 0x12, 0xc3, 0x0c, 0x37,
-  0x04, 0x3f, 0x47, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0xb0, 0x16, 0xa0, 0x11,
-  0xdc, 0xc8, 0x0d, 0x73, 0x38, 0x36, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20,
-  0x18, 0x78, 0xbe, 0xe7, 0x7a, 0x6d, 0xb7, 0x7b, 0xa3, 0x09, 0x01, 0x30,
-  0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x1c,
-  0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5b, 0xf9, 0xd5, 0xde, 0x41,
-  0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0x99, 0x9f, 0xed, 0x31,
-  0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdb, 0xf9, 0xdd, 0x9e,
-  0x44, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xa0, 0xbc, 0x9f, 0xed,
-  0xdd, 0x5d, 0x20, 0x7e, 0xa7, 0x07, 0x7e, 0xa3, 0x09, 0x01, 0x30, 0xdc,
-  0x10, 0x90, 0x5f, 0x18, 0x5c, 0xe0, 0xd4, 0x2c, 0x01, 0x68, 0x0c, 0x37,
-  0xdc, 0x44, 0xfa, 0x81, 0xc1, 0x2c, 0x43, 0x5b, 0xb8, 0x45, 0x50, 0x73,
-  0xa7, 0x7b, 0x70, 0x81, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30,
-  0xc9, 0xdf, 0xee, 0xf9, 0x04, 0xeb, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82,
-  0xc1, 0x34, 0x7f, 0xbb, 0x17, 0x08, 0x17, 0x0c, 0x53, 0x76, 0xf7, 0x7b,
-  0x70, 0x81, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30, 0xdd, 0x1f,
-  0xf8, 0x81, 0x45, 0xec, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x84,
-  0x7f, 0xe0, 0x17, 0x08, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x77, 0x38, 0x75,
-  0x3f, 0x37, 0xcc, 0xd1, 0xd9, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0xe0, 0xf1, 0x1f, 0xfb, 0xad, 0x5e, 0xfe,
-  0x8d, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68,
-  0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c,
-  0x23, 0x18, 0xcc, 0x5f, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0xb0, 0x91, 0x60, 0x40, 0x7f, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0xc1, 0x56, 0x82, 0x41, 0xfd, 0x25, 0x44, 0x30, 0x62, 0xa0, 0x00,
-  0x20, 0x08, 0x06, 0x4a, 0x0b, 0x06, 0xf4, 0x57, 0x7b, 0x01, 0x08, 0x06,
-  0xe5, 0xe7, 0x7f, 0xa3, 0x09, 0x01, 0x30, 0xdc, 0x10, 0x88, 0x60, 0x10,
-  0x06, 0x17, 0x38, 0x35, 0x4b, 0x00, 0x1a, 0xc3, 0x0d, 0x75, 0x61, 0x82,
-  0x01, 0x18, 0xcc, 0x32, 0xbc, 0x05, 0x68, 0x04, 0x86, 0x7a, 0xaa, 0x17,
-  0x9f, 0xe1, 0x88, 0xbd, 0x58, 0x3d, 0xe2, 0x9b, 0x65, 0x80, 0x8b, 0xb9,
-  0x08, 0x8c, 0xf5, 0xf8, 0x22, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98,
-  0x0b, 0x9c, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x80, 0xc1,
-  0x40, 0x87, 0x1b, 0x02, 0x17, 0x0c, 0xc0, 0x60, 0x96, 0x21, 0x2e, 0xe4,
-  0x22, 0xb0, 0x81, 0xf6, 0xe0, 0x33, 0x4b, 0x70, 0x17, 0x36, 0x7b, 0x44,
-  0x7c, 0x66, 0x09, 0xee, 0x62, 0x38, 0xc2, 0x34, 0x68, 0x4f, 0xf8, 0x66,
-  0x19, 0xe8, 0xe2, 0x2e, 0x02, 0x3b, 0x8d, 0xda, 0x8b, 0x8f, 0x05, 0x0e,
-  0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10,
-  0x9f, 0x22, 0x76, 0x30, 0xd0, 0xe1, 0x86, 0x20, 0x07, 0x03, 0x30, 0x98,
-  0x65, 0xa8, 0x0b, 0xbb, 0x08, 0xac, 0xf7, 0x86, 0xf8, 0xcc, 0x12, 0xdc,
-  0x85, 0x11, 0xe0, 0x07, 0x9f, 0x59, 0x82, 0xbb, 0x18, 0x68, 0x71, 0xb4,
-  0xb8, 0xc0, 0xe4, 0x82, 0xa8, 0x0b, 0xc1, 0x2e, 0xc4, 0x66, 0x2e, 0x2e,
-  0x18, 0xc6, 0x7e, 0x6f, 0xfc, 0xe2, 0x33, 0x1c, 0x11, 0x1b, 0xe4, 0x47,
-  0x7c, 0xb3, 0x0c, 0x78, 0xb1, 0x17, 0x81, 0x95, 0x9f, 0x6c, 0xc4, 0xc7,
-  0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x16, 0x18, 0xf2, 0xb1,
-  0x22, 0x88, 0x4f, 0x11, 0x69, 0x18, 0xe8, 0x70, 0x43, 0x70, 0x86, 0x01,
-  0x18, 0xcc, 0x32, 0xe4, 0x85, 0x5e, 0x04, 0x36, 0xb4, 0x1f, 0x7c, 0x66,
-  0x09, 0xfe, 0xc2, 0xd4, 0x8f, 0x88, 0xcf, 0x2c, 0xc1, 0x5f, 0x0c, 0x47,
-  0xf0, 0xc6, 0xfa, 0x09, 0xdf, 0x2c, 0x03, 0x5f, 0xfc, 0x45, 0x60, 0xbd,
-  0xc1, 0x7e, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94,
-  0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0x1d, 0x06, 0x3a, 0xdc,
-  0x10, 0xc8, 0x61, 0x00, 0x06, 0xb3, 0x0c, 0x7d, 0xe1, 0x17, 0x81, 0xd1,
-  0xdf, 0x10, 0x9f, 0x59, 0x82, 0xbf, 0x30, 0x22, 0xff, 0xe0, 0x33, 0x4b,
-  0xf0, 0x17, 0x03, 0x2d, 0x8e, 0x96, 0x17, 0x98, 0x5e, 0x10, 0x7d, 0x21,
-  0xf8, 0x85, 0xeb, 0xec, 0xc5, 0x05, 0xc3, 0x5c, 0xe0, 0xd4, 0x6d, 0x4e,
-  0x5d, 0xf9, 0x0d, 0x73, 0xfa, 0x36, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x78, 0xa2, 0x18, 0xc8, 0x61, 0x10,
-  0x83, 0xc1, 0x1f, 0x06, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3,
-  0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x5b, 0x2a, 0x06, 0x79, 0x18, 0x24, 0x44, 0x30, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x9b, 0x2a, 0x06, 0x7a, 0x18, 0x24, 0x44,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdb, 0x2a, 0x06, 0x7b, 0x18,
-  0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0xca, 0x2c, 0x06,
-  0x7a, 0x18, 0xec, 0x60, 0x10, 0x98, 0x62, 0xb0, 0x86, 0x01, 0x29, 0x06,
-  0xa3, 0x09, 0x01, 0x30, 0xdc, 0x10, 0xa0, 0x62, 0x10, 0x06, 0x17, 0x38,
-  0x35, 0x4b, 0x00, 0x1a, 0x03, 0x2d, 0x8e, 0x6a, 0xac, 0x85, 0x2e, 0x0a,
-  0x6a, 0xe1, 0x12, 0x6d, 0x21, 0xfc, 0x85, 0x2e, 0x0a, 0x6e, 0x61, 0x20,
-  0xd2, 0x86, 0x01, 0x7c, 0x66, 0x19, 0x42, 0x63, 0x34, 0xf8, 0x63, 0x38,
-  0x22, 0x78, 0xc3, 0x60, 0xf8, 0x4e, 0x18, 0x66, 0xb8, 0x21, 0xd0, 0xc1,
-  0x80, 0x0c, 0x6a, 0x08, 0x74, 0x38, 0x42, 0x44, 0xe6, 0x30, 0x18, 0xbe,
-  0x0a, 0x04, 0x3d, 0x12, 0x19, 0x66, 0xb8, 0x21, 0xe8, 0xc1, 0x80, 0x0c,
-  0x2a, 0x18, 0x74, 0x96, 0x41, 0x34, 0x6e, 0x23, 0xb8, 0x10, 0x0c, 0x86,
-  0x39, 0x9b, 0x1b, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x3c, 0x5e,
-  0x0c, 0x58, 0x31, 0x58, 0xc3, 0x20, 0x17, 0x83, 0xd1, 0x84, 0x00, 0x18,
-  0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e,
-  0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x6d, 0x1c, 0x83, 0x59, 0x0c,
-  0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x8d, 0x1c, 0x03,
-  0x5a, 0x0c, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xad,
-  0x1c, 0x83, 0x5a, 0x0c, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04,
-  0x03, 0xa5, 0x1d, 0x03, 0x5a, 0x0c, 0xea, 0x30, 0x08, 0xc0, 0x31, 0x28,
-  0xc5, 0xc0, 0x17, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x6e, 0x08, 0xc4, 0x31,
-  0x08, 0x83, 0x0b, 0x9c, 0x9a, 0x25, 0xb8, 0x8d, 0xe1, 0x86, 0x1a, 0x39,
-  0xc7, 0x00, 0x0c, 0x66, 0x19, 0x48, 0xa3, 0x34, 0x82, 0x8a, 0xc3, 0x00,
-  0x17, 0x03, 0xb8, 0xc0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98,
-  0xe0, 0x31, 0xc8, 0xc5, 0x80, 0x53, 0xc5, 0x60, 0xc4, 0xe0, 0x00, 0x40,
-  0x10, 0x0c, 0xa6, 0x78, 0x0c, 0x72, 0x31, 0x08, 0x84, 0x0b, 0x86, 0x29,
-  0x3a, 0x0c, 0x7a, 0x31, 0x80, 0x0b, 0x9c, 0x1a, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x83, 0xa9, 0x1e, 0x03, 0x5f, 0x0c, 0x7c, 0xe4, 0x15, 0x83, 0x11,
-  0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0xec, 0x31, 0xf0, 0xc5, 0x20, 0x10,
-  0x2e, 0x18, 0xe6, 0x02, 0xa7, 0xee, 0x70, 0xea, 0x7a, 0x30, 0x18, 0xe6,
-  0xe4, 0x6e, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0xf0, 0xf4, 0x31, 0x50, 0xc7, 0x20, 0x15, 0x83, 0x7b, 0x0c,
-  0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34,
-  0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6,
-  0x90, 0x0c, 0xe2, 0x31, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10,
-  0x0c, 0x36, 0x91, 0x0c, 0xe4, 0x31, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0xb6, 0x91, 0x0c, 0xe6, 0x31, 0x48, 0x88, 0x60, 0xc4,
-  0x40, 0x01, 0x40, 0x10, 0x0c, 0x94, 0x95, 0x0c, 0xe4, 0x31, 0x98, 0xc5,
-  0x20, 0xf0, 0xc7, 0x60, 0x1c, 0x03, 0x7e, 0x0c, 0x46, 0x13, 0x02, 0x60,
-  0xb8, 0x21, 0x00, 0xc9, 0x20, 0x0c, 0x2e, 0x70, 0x6a, 0x96, 0xe0, 0x36,
-  0x86, 0x1b, 0xe6, 0x84, 0x24, 0x03, 0x30, 0x98, 0x65, 0x30, 0x8d, 0xdb,
-  0x08, 0xcc, 0x14, 0x03, 0x54, 0x0c, 0xe2, 0x33, 0x1c, 0x91, 0x07, 0xa9,
-  0x18, 0x10, 0xdf, 0x2c, 0xc3, 0x69, 0xa8, 0x46, 0x60, 0xaa, 0x18, 0xe8,
-  0x41, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x81,
-  0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xe1, 0x92, 0x81, 0x0e, 0x37, 0x04,
-  0x2c, 0x19, 0x80, 0xc1, 0x2c, 0x03, 0x6a, 0xa4, 0x46, 0x60, 0x83, 0x2c,
-  0x06, 0xf0, 0x99, 0x25, 0x70, 0x0d, 0x8b, 0xc5, 0x80, 0x88, 0xcf, 0x2c,
-  0x81, 0x6b, 0x0c, 0x47, 0x90, 0x82, 0x2c, 0x06, 0xc2, 0x37, 0xcb, 0xb0,
-  0x1a, 0xae, 0x11, 0x58, 0x29, 0xcc, 0x62, 0x10, 0x1f, 0x0b, 0x1c, 0xfa,
-  0x5c, 0x30, 0xcc, 0x05, 0x4e, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e,
-  0x45, 0xe4, 0x64, 0xa0, 0xc3, 0x0d, 0xc1, 0x4d, 0x06, 0x60, 0x30, 0xcb,
-  0xc0, 0x1a, 0xad, 0x11, 0xd8, 0x2e, 0x06, 0x43, 0x7c, 0x66, 0x09, 0x5c,
-  0xc3, 0x08, 0x5f, 0x0c, 0xe0, 0x33, 0x4b, 0xe0, 0x1a, 0x03, 0x2d, 0x8e,
-  0x86, 0x1a, 0x58, 0x6a, 0x10, 0xac, 0x21, 0xb4, 0x86, 0x4f, 0xa8, 0xc6,
-  0x05, 0xc3, 0x58, 0x2f, 0x06, 0xe1, 0x18, 0xc4, 0x67, 0x38, 0xe2, 0x55,
-  0xc4, 0x31, 0x20, 0xbe, 0x59, 0x86, 0xd7, 0x90, 0x8d, 0xc0, 0xc6, 0x31,
-  0x80, 0x95, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70, 0xca,
-  0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xe2, 0x2c, 0x03, 0x1d, 0x6e,
-  0x08, 0xca, 0x32, 0x00, 0x83, 0x59, 0x06, 0xd8, 0x88, 0x8d, 0xc0, 0x86,
-  0x75, 0x0c, 0xe0, 0x33, 0x4b, 0x60, 0x1b, 0x86, 0x8e, 0x01, 0x11, 0x9f,
-  0x59, 0x02, 0xdb, 0x18, 0x8e, 0xd0, 0x95, 0x74, 0x0c, 0x84, 0x6f, 0x96,
-  0x61, 0x36, 0x6c, 0x23, 0xb0, 0x5d, 0x51, 0xc7, 0x20, 0x3e, 0x16, 0x38,
-  0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41,
-  0x7c, 0x8a, 0x90, 0xcb, 0x40, 0x87, 0x1b, 0x02, 0xb8, 0x0c, 0xc0, 0x60,
-  0x96, 0x81, 0x36, 0x6a, 0x23, 0x30, 0x79, 0x0c, 0x86, 0xf8, 0xcc, 0x12,
-  0xd8, 0x86, 0x11, 0xf7, 0x18, 0xc0, 0x67, 0x96, 0xc0, 0x36, 0x06, 0x5a,
-  0x1c, 0x0d, 0x36, 0xb0, 0xd8, 0x20, 0x68, 0x43, 0xa8, 0x0d, 0xd5, 0x92,
-  0x8d, 0x0b, 0x86, 0xb9, 0xc0, 0xa9, 0xdb, 0x9c, 0xba, 0x71, 0x0c, 0x86,
-  0x39, 0xfc, 0x1b, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0x3c, 0xd0, 0x0c, 0xe0, 0x32, 0x78, 0xc9, 0xa0, 0x2f,
-  0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18,
-  0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
-  0xed, 0x34, 0x83, 0xbb, 0x0c, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0x0d, 0x35, 0x03, 0xbc, 0x0c, 0x12, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x83, 0x2d, 0x35, 0x83, 0xbc, 0x0c, 0x12, 0x22, 0x18,
-  0x31, 0x50, 0x00, 0x10, 0x04, 0x03, 0x25, 0x36, 0x03, 0xbc, 0x0c, 0x72,
-  0x32, 0x08, 0x48, 0x33, 0x48, 0xcb, 0x40, 0x34, 0x83, 0xd1, 0x84, 0x00,
-  0x18, 0x6e, 0x08, 0x4c, 0x33, 0x08, 0x83, 0x0b, 0x9c, 0x9a, 0x25, 0xb8,
-  0x8d, 0x81, 0x16, 0x47, 0x35, 0x44, 0xc3, 0x56, 0x85, 0xd0, 0x70, 0x09,
-  0xd2, 0x10, 0x6c, 0xc3, 0x56, 0x85, 0xd2, 0x30, 0x7f, 0x09, 0xcb, 0x00,
-  0x3e, 0xb3, 0x0c, 0xb8, 0xa1, 0x1b, 0xfa, 0x32, 0x1c, 0x11, 0xb4, 0x65,
-  0x30, 0x7c, 0x27, 0x0c, 0x33, 0xdc, 0x10, 0xe0, 0x64, 0x40, 0x06, 0x35,
-  0x04, 0x3a, 0x1c, 0x01, 0x32, 0x71, 0x19, 0x0c, 0x5f, 0x05, 0x82, 0x9e,
-  0xc8, 0x0c, 0x33, 0xdc, 0x10, 0xec, 0x64, 0x40, 0x06, 0x15, 0x0c, 0x3a,
-  0xcb, 0x90, 0x1b, 0xee, 0x11, 0xdc, 0x3f, 0x06, 0xc3, 0x1c, 0x0d, 0x06,
-  0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0xa7, 0x9b, 0x81, 0x6a,
-  0x06, 0x69, 0x19, 0xdc, 0x66, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42,
-  0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0xb0, 0x85, 0x67, 0x10, 0x9b, 0xc1, 0x41, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0x89, 0x67, 0x20, 0x9b, 0x01,
-  0x43, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0x8d, 0x67, 0x30,
-  0x9b, 0x81, 0x44, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xa0, 0xac,
-  0x67, 0x20, 0x9b, 0xc1, 0x5c, 0x06, 0x81, 0x6f, 0x06, 0xa3, 0x19, 0xf0,
-  0x66, 0x30, 0x9a, 0x10, 0x00, 0xc3, 0x0d, 0x01, 0x78, 0x06, 0x61, 0x70,
-  0x81, 0x53, 0xb3, 0x04, 0xee, 0x31, 0xdc, 0x30, 0x33, 0xe5, 0x19, 0x80,
-  0xc1, 0x2c, 0xc3, 0x6e, 0xf0, 0x46, 0x50, 0x6f, 0x19, 0xd8, 0x66, 0x00,
-  0x17, 0x38, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x93, 0x7b, 0x06,
-  0xb7, 0x19, 0x70, 0xa8, 0x19, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1,
-  0xf4, 0x9e, 0xc1, 0x6d, 0x06, 0x81, 0x70, 0xc1, 0x30, 0x25, 0x97, 0xc1,
-  0x6e, 0x06, 0x70, 0x81, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30,
-  0xcd, 0x67, 0xc0, 0x9b, 0x01, 0xcf, 0xb4, 0x66, 0x30, 0x62, 0x70, 0x00,
-  0x20, 0x08, 0x06, 0x13, 0x7d, 0x06, 0xbc, 0x19, 0x04, 0xc2, 0x05, 0xc3,
-  0x5c, 0xe0, 0xd4, 0x1d, 0x4e, 0xdd, 0x4e, 0x06, 0xc3, 0x1c, 0x1c, 0x06,
-  0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82,
-  0x81, 0x87, 0x9f, 0x01, 0x7a, 0x06, 0xa7, 0x19, 0xd4, 0x67, 0x30, 0x9a,
-  0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4,
-  0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xfd, 0x67,
-  0xf0, 0x9e, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0,
-  0x81, 0x68, 0x00, 0x9f, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0xb0, 0x85, 0x68, 0x10, 0x9f, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a,
-  0x00, 0x82, 0x60, 0xa0, 0xa4, 0x68, 0x00, 0x9f, 0x41, 0x6c, 0x06, 0x01,
-  0x7f, 0x06, 0xe1, 0x19, 0xe8, 0x67, 0x30, 0x9a, 0x10, 0x00, 0xc3, 0x0d,
-  0x81, 0x7f, 0x06, 0x61, 0x70, 0x81, 0x53, 0xb3, 0x04, 0xee, 0x31, 0xdc,
-  0x10, 0x37, 0x22, 0x1a, 0x80, 0xc1, 0x2c, 0x43, 0x6f, 0xb8, 0x47, 0x60,
-  0xa4, 0x19, 0x98, 0x66, 0x10, 0x9f, 0xe1, 0x88, 0x3c, 0x38, 0xcd, 0x80,
-  0xf8, 0x66, 0x19, 0x7c, 0x23, 0x3c, 0x02, 0x43, 0xcd, 0x40, 0x0f, 0xe2,
-  0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b, 0x0c, 0xf9,
-  0x58, 0x11, 0xc4, 0xa7, 0x08, 0x16, 0x0d, 0x74, 0xb8, 0x21, 0x50, 0xd1,
-  0x00, 0x0c, 0x66, 0x19, 0x7e, 0x03, 0x3c, 0x02, 0x1b, 0x60, 0x33, 0x80,
-  0xcf, 0x2c, 0x41, 0x79, 0xd8, 0x6b, 0x06, 0x44, 0x7c, 0x66, 0x09, 0xca,
-  0x63, 0x38, 0x82, 0x14, 0x60, 0x33, 0x10, 0xbe, 0x59, 0x06, 0xf1, 0x28,
-  0x8f, 0xc0, 0x4a, 0x21, 0x36, 0x83, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82,
-  0x61, 0x2e, 0x70, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xe2,
-  0x46, 0x03, 0x1d, 0x6e, 0x08, 0x6a, 0x34, 0x00, 0x83, 0x59, 0x86, 0xf1,
-  0x20, 0x8f, 0xc0, 0x72, 0x33, 0x18, 0xe2, 0x33, 0x4b, 0x50, 0x1e, 0x46,
-  0xf0, 0x66, 0x00, 0x9f, 0x59, 0x82, 0xf2, 0x18, 0x68, 0x71, 0xb4, 0xdf,
-  0xc0, 0xc0, 0x83, 0x18, 0x0f, 0x81, 0x3c, 0x7c, 0x22, 0x3c, 0x2e, 0x18,
-  0xc6, 0x76, 0x33, 0xf8, 0xcd, 0x20, 0x3e, 0xc3, 0x11, 0xad, 0x03, 0x9e,
-  0x01, 0xf1, 0xcd, 0x32, 0x98, 0x47, 0x7a, 0x04, 0x16, 0x9e, 0x81, 0xeb,
-  0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x16, 0x18,
-  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x65, 0x1a, 0xe8, 0x70, 0x43, 0x30,
-  0xa6, 0x01, 0x18, 0xcc, 0x32, 0x9c, 0x07, 0x7a, 0x04, 0x36, 0xa4, 0x67,
-  0x00, 0x9f, 0x59, 0x82, 0xf6, 0x30, 0xf3, 0x0c, 0x88, 0xf8, 0xcc, 0x12,
-  0xb4, 0xc7, 0x70, 0x04, 0xee, 0x9c, 0x67, 0x20, 0x7c, 0xb3, 0x0c, 0xea,
-  0xd1, 0x1e, 0x81, 0xe5, 0x0e, 0x7a, 0x06, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf,
-  0x05, 0xc3, 0x5c, 0xe0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53,
-  0x04, 0x9c, 0x06, 0x3a, 0xdc, 0x10, 0xb8, 0x69, 0x00, 0x06, 0xb3, 0x0c,
-  0xeb, 0xc1, 0x1e, 0x81, 0xc1, 0x67, 0x30, 0xc4, 0x67, 0x96, 0xa0, 0x3d,
-  0x8c, 0xa8, 0xcf, 0x00, 0x3e, 0xb3, 0x04, 0xed, 0x31, 0xd0, 0xe2, 0x68,
-  0xe7, 0x81, 0xa1, 0x07, 0xb1, 0x1e, 0x02, 0x7b, 0x98, 0x5e, 0x7a, 0x5c,
-  0x30, 0xcc, 0x05, 0x4e, 0xdd, 0xe6, 0xd4, 0x85, 0x67, 0x30, 0xcc, 0xd9,
-  0x63, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x78, 0x7e, 0x1a, 0xb8, 0x69, 0xd0, 0xa2, 0xc1, 0x9e, 0x06,
-  0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a,
-  0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5b,
-  0xa9, 0x06, 0x75, 0x1a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x9b, 0xa9, 0x06, 0x76, 0x1a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0xdb, 0xa9, 0x06, 0x77, 0x1a, 0x24, 0x44, 0x30, 0x62,
-  0xa0, 0x00, 0x20, 0x08, 0x06, 0xca, 0xab, 0x06, 0x76, 0x1a, 0xdc, 0x68,
-  0x10, 0x88, 0x6a, 0x70, 0xa6, 0x01, 0xa8, 0x06, 0xa3, 0x09, 0x01, 0x30,
-  0xdc, 0x10, 0x90, 0x6a, 0x10, 0x06, 0x17, 0x38, 0x35, 0x4b, 0xe0, 0x1e,
-  0x03, 0x2d, 0x8e, 0x6a, 0xe4, 0x86, 0x2c, 0x0b, 0xb8, 0xe1, 0x12, 0xbb,
-  0x21, 0xb4, 0x87, 0x2c, 0x0b, 0xbc, 0x31, 0xcb, 0xf0, 0x1e, 0xf1, 0x71,
-  0x3f, 0xc3, 0x11, 0xfb, 0xa3, 0xa6, 0xc1, 0xf0, 0x1d, 0xff, 0x0c, 0x33,
-  0xdc, 0x10, 0xd4, 0x68, 0x40, 0x06, 0x35, 0x04, 0x3a, 0x1c, 0xe1, 0x3f,
-  0x6e, 0x1a, 0x0c, 0x5f, 0x05, 0x82, 0x1e, 0x08, 0x0d, 0x33, 0xdc, 0x10,
-  0xe0, 0x68, 0x40, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0x00, 0x1f, 0x25, 0x12,
-  0x1c, 0x7f, 0x06, 0xc3, 0x5c, 0x4c, 0x06, 0xc3, 0x8c, 0x18, 0x1c, 0x00,
-  0x08, 0x82, 0x81, 0x77, 0xab, 0xc1, 0xa9, 0x06, 0x66, 0x1a, 0xd0, 0x6a,
-  0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3,
-  0x09, 0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0,
-  0xf9, 0x6a, 0xe0, 0xaa, 0xc1, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0xb0, 0xfd, 0x6a, 0xf0, 0xaa, 0x01, 0x43, 0x04, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0xb0, 0x81, 0x6b, 0x00, 0xab, 0x81, 0x44, 0x04, 0x23,
-  0x06, 0x0a, 0x00, 0x82, 0x60, 0xa0, 0xa0, 0x6b, 0xf0, 0xaa, 0x01, 0x9c,
-  0x06, 0xc1, 0xae, 0x06, 0xa0, 0x1a, 0xe4, 0x6a, 0x30, 0x9a, 0x10, 0x00,
-  0xc3, 0x0d, 0x41, 0xaf, 0x06, 0x61, 0x70, 0x81, 0x53, 0xb3, 0x04, 0x25,
-  0x32, 0xdc, 0x00, 0x43, 0xe2, 0x1a, 0x80, 0xc1, 0x2c, 0x83, 0x7c, 0xcc,
-  0x47, 0x50, 0x6c, 0x1a, 0xcc, 0x6a, 0x00, 0x17, 0x38, 0x35, 0x62, 0x70,
-  0x00, 0x20, 0x08, 0x06, 0xd3, 0xba, 0x06, 0xb4, 0x1a, 0xd8, 0x50, 0xa9,
-  0x06, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30, 0xb1, 0x6b, 0x40, 0xab,
-  0x41, 0x20, 0x5c, 0x30, 0x4c, 0xbd, 0x69, 0x80, 0xab, 0x01, 0x5c, 0xe0,
-  0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0xf0, 0x1a, 0xe4, 0x6a,
-  0xa0, 0x43, 0xaa, 0x1a, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x14,
-  0xaf, 0x41, 0xae, 0x06, 0x81, 0x70, 0xc1, 0x30, 0x17, 0x38, 0x75, 0x87,
-  0x53, 0x87, 0xa3, 0xc1, 0x30, 0xd7, 0x96, 0xc1, 0x30, 0x47, 0x0c, 0x73,
-  0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0, 0xd5, 0x6b, 0x50,
-  0xae, 0x01, 0xa9, 0x06, 0xf2, 0x1a, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82,
-  0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xfc, 0x1a, 0xb0, 0x6b, 0x90, 0x10,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xfd, 0x1a, 0xb4, 0x6b,
-  0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xfe, 0x1a,
-  0xb8, 0x6b, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x28,
-  0x26, 0x1b, 0xb4, 0x6b, 0xe0, 0xaa, 0x41, 0x90, 0xaf, 0x81, 0xaf, 0x06,
-  0xf7, 0x1a, 0x8c, 0x26, 0x04, 0xc0, 0x70, 0x43, 0xb0, 0xaf, 0x41, 0x18,
-  0x5c, 0xe0, 0xd4, 0x2c, 0x41, 0x89, 0x0c, 0x37, 0xb8, 0xd1, 0xbf, 0x06,
-  0x60, 0x30, 0xcb, 0x40, 0x1f, 0x25, 0x12, 0x58, 0xa8, 0x06, 0xa3, 0x1a,
-  0xc4, 0x67, 0x38, 0x62, 0x8e, 0x48, 0x35, 0x20, 0xbe, 0x59, 0x86, 0xfa,
-  0xc0, 0x8f, 0xc0, 0x4a, 0x35, 0xa0, 0xa3, 0xf8, 0x58, 0x30, 0xd0, 0xe7,
-  0x82, 0x61, 0x2e, 0x70, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29,
-  0x22, 0x65, 0x03, 0x1d, 0x6e, 0x08, 0x4e, 0x36, 0x00, 0x83, 0x59, 0x06,
-  0xfb, 0xb8, 0x8f, 0xc0, 0x86, 0x56, 0x0d, 0xe0, 0x33, 0x4b, 0xc0, 0x1f,
-  0xc6, 0xaa, 0x01, 0x11, 0x9f, 0x59, 0x02, 0xfe, 0x18, 0x8e, 0xf0, 0xa3,
-  0x56, 0x0d, 0x84, 0x6f, 0x96, 0x21, 0x3f, 0xf8, 0x23, 0xb0, 0x3f, 0x72,
-  0xd5, 0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2,
-  0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xa0, 0xd9, 0x40, 0x87, 0x1b,
-  0x02, 0x99, 0x0d, 0xc0, 0x60, 0x96, 0x41, 0x3f, 0xf6, 0x23, 0x30, 0x5b,
-  0x0d, 0x86, 0xf8, 0xcc, 0x12, 0xf0, 0x87, 0x11, 0xb9, 0x1a, 0xc0, 0x67,
-  0x96, 0x80, 0x3f, 0x06, 0x5a, 0x1c, 0xcd, 0x3e, 0xb0, 0xfb, 0x20, 0xf4,
-  0x43, 0xd8, 0x0f, 0x99, 0x0c, 0xf0, 0xe3, 0x82, 0x61, 0x0c, 0x57, 0x03,
-  0x5e, 0x0d, 0xe2, 0x33, 0x1c, 0xb1, 0x4a, 0xbd, 0x1a, 0x10, 0xdf, 0x2c,
-  0x43, 0x7f, 0x80, 0x48, 0x60, 0xbe, 0x1a, 0xb0, 0x52, 0x7c, 0x2c, 0x18,
-  0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82,
-  0xf8, 0x14, 0x21, 0xb6, 0x81, 0x0e, 0x37, 0x04, 0x60, 0x1b, 0x80, 0xc1,
-  0x2c, 0x83, 0x7f, 0xfc, 0x47, 0x60, 0x83, 0xb9, 0x06, 0xf0, 0x99, 0x25,
-  0x20, 0x11, 0x1b, 0xd7, 0x80, 0x88, 0xcf, 0x2c, 0x01, 0x89, 0x0c, 0x47,
-  0xd8, 0x12, 0xb9, 0x06, 0xc2, 0x37, 0xcb, 0x10, 0x22, 0x24, 0x12, 0xd8,
-  0x2d, 0x95, 0x6b, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05,
-  0x4e, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xb4, 0x6d, 0xa0,
-  0xc3, 0x0d, 0xc1, 0xda, 0x06, 0x60, 0x30, 0xcb, 0x20, 0x22, 0x23, 0x12,
-  0x58, 0xbb, 0x06, 0x43, 0x7c, 0x66, 0x09, 0x48, 0xc4, 0x08, 0x79, 0x0d,
-  0xe0, 0x33, 0x4b, 0x40, 0x22, 0x03, 0x2d, 0x8e, 0xe6, 0x1f, 0xd8, 0x7f,
-  0x10, 0x22, 0x22, 0x8c, 0x88, 0x68, 0x06, 0x20, 0x72, 0xc1, 0x30, 0x17,
-  0x38, 0x75, 0x9b, 0x53, 0xe7, 0xab, 0xc1, 0x30, 0x37, 0x9f, 0xc1, 0x30,
-  0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0,
-  0xed, 0x6d, 0xb0, 0xb6, 0x81, 0xca, 0x06, 0x78, 0x1b, 0x8c, 0x26, 0x04,
-  0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14,
-  0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xa2, 0x1b, 0xc8,
-  0x6d, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xa3,
-  0x1b, 0xcc, 0x6d, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x6c, 0xa4, 0x1b, 0xd0, 0x6d, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80,
-  0x20, 0x18, 0x28, 0xac, 0x1b, 0xcc, 0x6d, 0x40, 0xb3, 0x41, 0xf0, 0xb7,
-  0x01, 0xd9, 0x06, 0x7d, 0x1b, 0x8c, 0x26, 0x04, 0xc0, 0x70, 0x43, 0x10,
-  0xba, 0x41, 0x18, 0x5c, 0xe0, 0xd4, 0x2c, 0x41, 0x89, 0x0c, 0xb4, 0x38,
-  0xaa, 0x01, 0x1f, 0xac, 0x2e, 0xbc, 0x87, 0x4b, 0xc8, 0x87, 0x40, 0x22,
-  0xac, 0x2e, 0xcc, 0xc7, 0x2c, 0x83, 0x89, 0xa0, 0x08, 0x3d, 0x0d, 0x47,
-  0xe4, 0xd3, 0xd9, 0x06, 0xc3, 0x77, 0xfa, 0x34, 0xcc, 0x70, 0x43, 0x20,
-  0xb3, 0x01, 0x19, 0xd4, 0x10, 0xe8, 0x70, 0xc4, 0x3e, 0xad, 0x6d, 0x30,
-  0x7c, 0x15, 0x08, 0x7a, 0xfd, 0x34, 0xcc, 0x70, 0x43, 0x50, 0xb3, 0x01,
-  0x19, 0x54, 0x30, 0xe8, 0x2c, 0xc3, 0x89, 0xf0, 0x48, 0x70, 0xf9, 0x1a,
-  0x0c, 0x73, 0x2e, 0x1a, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x1e, 0xed, 0x06, 0xa4, 0x1b, 0x8c, 0x6d, 0x10, 0xbb, 0xc1, 0x68, 0x42,
-  0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43,
-  0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xb6, 0xbb, 0xc1,
-  0xea, 0x06, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xc6,
-  0xbb, 0x01, 0xeb, 0x06, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0xc1, 0xd6, 0xbb, 0x41, 0xeb, 0x06, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00,
-  0x08, 0x82, 0x81, 0x52, 0xbe, 0x01, 0xeb, 0x06, 0x6d, 0x1b, 0x04, 0xb8,
-  0x1b, 0xf4, 0x6d, 0x60, 0xbb, 0xc1, 0x68, 0x42, 0x00, 0x0c, 0x37, 0x04,
-  0xba, 0x1b, 0x84, 0xc1, 0x05, 0x4e, 0xcd, 0x12, 0xf0, 0xc8, 0x70, 0x43,
-  0x4b, 0xfd, 0x6e, 0x00, 0x06, 0xb3, 0x0c, 0x29, 0xa2, 0x22, 0x41, 0xa5,
-  0x6d, 0x00, 0xbb, 0x01, 0x5c, 0xe0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20,
-  0x18, 0x4c, 0xe8, 0x1b, 0xc4, 0x6e, 0x40, 0x53, 0xa2, 0x1b, 0x8c, 0x18,
-  0x1c, 0x00, 0x08, 0x82, 0xc1, 0x94, 0xbe, 0x41, 0xec, 0x06, 0x81, 0x70,
-  0xc1, 0x30, 0xc5, 0xb6, 0x41, 0xed, 0x06, 0x70, 0x81, 0x53, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x30, 0xb5, 0x6f, 0x60, 0xbb, 0xc1, 0x4d, 0x9d,
-  0x6e, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x93, 0xfb, 0x06, 0xb6,
-  0x1b, 0x04, 0xc2, 0x05, 0xc3, 0x5c, 0xe0, 0xd4, 0x1d, 0x4e, 0x5d, 0xcd,
-  0x06, 0xc3, 0x9c, 0x9a, 0x06, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x27, 0xbf, 0x81, 0xf8, 0x06, 0xa1,
-  0x1b, 0xbc, 0x6f, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a,
-  0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0xb0, 0xe5, 0x6f, 0x90, 0xbe, 0x41, 0x42, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0xb0, 0xe9, 0x6f, 0xa0, 0xbe, 0x41, 0x42, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xed, 0x6f, 0xb0, 0xbe, 0x41,
-  0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xa0, 0x8c, 0x70, 0xa0,
-  0xbe, 0xc1, 0xea, 0x06, 0x81, 0xfd, 0x06, 0xbb, 0x1b, 0xd0, 0x6f, 0x30,
-  0x9a, 0x10, 0x00, 0xc3, 0x0d, 0x01, 0xfe, 0x06, 0x61, 0x70, 0x81, 0x53,
-  0xb3, 0x04, 0x3c, 0x32, 0xdc, 0xb0, 0x56, 0xfc, 0x1b, 0x80, 0xc1, 0x2c,
-  0xc3, 0x8a, 0xf0, 0x48, 0x60, 0x7e, 0x1b, 0x80, 0x6e, 0x10, 0x9f, 0xe1,
-  0x88, 0xb8, 0x0a, 0xdd, 0x80, 0xf8, 0x66, 0x19, 0x58, 0xe4, 0x45, 0x02,
-  0x13, 0xdd, 0x40, 0xae, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9,
-  0xc0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x13, 0x0e,
-  0x74, 0xb8, 0x21, 0x20, 0xe1, 0x00, 0x0c, 0x66, 0x19, 0x5a, 0xc4, 0x45,
-  0x02, 0x1b, 0x54, 0x37, 0x80, 0xcf, 0x2c, 0xc1, 0x8c, 0x58, 0xea, 0x06,
-  0x44, 0x7c, 0x66, 0x09, 0x66, 0x64, 0x38, 0x82, 0xaf, 0x54, 0x37, 0x10,
-  0xbe, 0x59, 0x06, 0x18, 0x99, 0x91, 0xc0, 0xfa, 0x6a, 0x75, 0x83, 0xf8,
-  0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70, 0xca, 0x82, 0x48, 0x3e,
-  0x56, 0x04, 0xf1, 0x29, 0x22, 0x86, 0x03, 0x1d, 0x6e, 0x08, 0x5e, 0x38,
-  0x00, 0x83, 0x59, 0x86, 0x18, 0x91, 0x91, 0xc0, 0x66, 0x37, 0x18, 0xe2,
-  0x33, 0x4b, 0x30, 0x23, 0x46, 0xd8, 0x6e, 0x00, 0x9f, 0x59, 0x82, 0x19,
-  0x19, 0x68, 0x71, 0xb4, 0x16, 0xc1, 0x5c, 0x84, 0x88, 0x11, 0x41, 0x46,
-  0x5c, 0x36, 0x78, 0x91, 0x0b, 0x86, 0xb1, 0xda, 0x0d, 0x72, 0x37, 0x88,
-  0xcf, 0x70, 0x04, 0x6a, 0xe9, 0x6e, 0x40, 0x7c, 0xb3, 0x0c, 0x34, 0x72,
-  0x23, 0x81, 0xed, 0x6e, 0x90, 0x5a, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05,
-  0xc3, 0x5c, 0xe0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4,
-  0x0f, 0x07, 0x3a, 0xdc, 0x10, 0xf4, 0x70, 0x00, 0x06, 0xb3, 0x0c, 0x35,
-  0x62, 0x23, 0x81, 0x0d, 0xe3, 0x1b, 0xc0, 0x67, 0x96, 0x60, 0x47, 0x0c,
-  0x7c, 0x03, 0x22, 0x3e, 0xb3, 0x04, 0x3b, 0x32, 0x1c, 0x31, 0x5b, 0xe1,
-  0x1b, 0x08, 0xdf, 0x2c, 0x03, 0x8e, 0xec, 0x48, 0x60, 0xb4, 0x25, 0xbe,
-  0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x41,
-  0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xa1, 0xc6, 0x81, 0x0e, 0x37, 0x04,
-  0x68, 0x1c, 0x80, 0xc1, 0x2c, 0x43, 0x8e, 0xe8, 0x48, 0x60, 0xea, 0x1b,
-  0x0c, 0xf1, 0x99, 0x25, 0xd8, 0x11, 0x23, 0xde, 0x37, 0x80, 0xcf, 0x2c,
-  0xc1, 0x8e, 0x0c, 0xb4, 0x38, 0x5a, 0x8d, 0x60, 0x36, 0x42, 0xe4, 0x88,
-  0xa0, 0x23, 0x7c, 0x1b, 0xdc, 0xc8, 0x05, 0xc3, 0x5c, 0xe0, 0xd4, 0x6d,
-  0x4e, 0xdd, 0xee, 0x06, 0xc3, 0x1c, 0xbc, 0x06, 0xc3, 0x1c, 0x31, 0xcc,
-  0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x87, 0xc7, 0x01,
-  0x1a, 0x07, 0x27, 0x1c, 0xd4, 0x71, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09,
-  0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xfd, 0x71, 0xf0, 0xc6, 0x41, 0x42,
-  0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0x81, 0x72, 0x00, 0xc7,
-  0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0x85, 0x72,
-  0x10, 0xc7, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xa0,
-  0xa4, 0x72, 0x00, 0xc7, 0x41, 0x0c, 0x07, 0x01, 0x1f, 0x07, 0x61, 0x1c,
-  0xe8, 0x71, 0x30, 0x9a, 0x10, 0x00, 0xc3, 0x0d, 0x81, 0x1f, 0x07, 0x61,
-  0x70, 0x81, 0x53, 0xb3, 0x04, 0x3c, 0x32, 0xd0, 0xe2, 0xa8, 0xc6, 0x89,
-  0x98, 0xe2, 0x60, 0x22, 0x2e, 0x91, 0x22, 0xc2, 0x8e, 0x98, 0xe2, 0xa0,
-  0x22, 0xb3, 0x0c, 0x3d, 0xf2, 0x23, 0xf1, 0x35, 0x1c, 0x61, 0x3f, 0x64,
-  0x1c, 0x0c, 0xdf, 0xdd, 0xcf, 0x30, 0xc3, 0x0d, 0xc1, 0x0b, 0x07, 0x64,
-  0x50, 0x43, 0xa0, 0xc3, 0x11, 0xf8, 0x85, 0xc6, 0xc1, 0xf0, 0x55, 0x20,
-  0xe8, 0xe9, 0xd7, 0x30, 0xc3, 0x0d, 0x81, 0x0c, 0x07, 0x64, 0x50, 0xc1,
-  0xa0, 0xb3, 0x0c, 0x3e, 0x32, 0x27, 0xc1, 0xd9, 0x6f, 0x30, 0xcc, 0xad,
-  0x6c, 0x30, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x78, 0xb1, 0x1c,
-  0x84, 0x72, 0x00, 0xc6, 0x81, 0x2b, 0x07, 0xa3, 0x09, 0x01, 0x30, 0x9a,
-  0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x1c, 0x32,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x1b, 0x2e, 0x07, 0xa8, 0x1c, 0x1c,
-  0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5b, 0x2e, 0x07, 0xa9,
-  0x1c, 0x30, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x9b, 0x2e,
-  0x07, 0xaa, 0x1c, 0x48, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06,
-  0x8a, 0x38, 0x07, 0xa9, 0x1c, 0xa8, 0x71, 0x10, 0xd4, 0x72, 0xa0, 0xc7,
-  0xc1, 0x2c, 0x07, 0xa3, 0x09, 0x01, 0x30, 0xdc, 0x10, 0xdc, 0x72, 0x10,
-  0x06, 0x17, 0x38, 0x35, 0x4b, 0x30, 0x27, 0xc3, 0x0d, 0x2a, 0xc6, 0xcb,
-  0x01, 0x18, 0xcc, 0x32, 0x80, 0x49, 0x98, 0x04, 0x65, 0xc6, 0x41, 0x2b,
-  0x07, 0x70, 0x81, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30, 0x95,
-  0x73, 0xe0, 0xca, 0x41, 0x0c, 0xfd, 0x71, 0x30, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0x93, 0x39, 0x07, 0xae, 0x1c, 0x04, 0xc2, 0x05, 0xc3, 0x54,
-  0x1a, 0x07, 0xb2, 0x1c, 0xc0, 0x05, 0x4e, 0x8d, 0x18, 0x1c, 0x00, 0x08,
-  0x82, 0xc1, 0xa4, 0xce, 0xc1, 0x2c, 0x07, 0x34, 0x46, 0xca, 0xc1, 0x88,
-  0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0xeb, 0x1c, 0xcc, 0x72, 0x10, 0x08,
-  0x17, 0x0c, 0x73, 0x81, 0x53, 0x77, 0x38, 0x75, 0x32, 0x1c, 0x0c, 0x73,
-  0x67, 0x1b, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00,
-  0x20, 0x08, 0x06, 0xde, 0x3b, 0x07, 0xbf, 0x1c, 0xf8, 0x71, 0xc0, 0xce,
-  0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c,
-  0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1,
-  0x66, 0xcf, 0x81, 0x39, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0xc1, 0x76, 0xcf, 0xc1, 0x39, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0xc1, 0x86, 0xcf, 0x01, 0x3a, 0x07, 0x09, 0x11, 0x8c,
-  0x18, 0x28, 0x00, 0x08, 0x82, 0x81, 0x02, 0xd2, 0xc1, 0x39, 0x07, 0xa8,
-  0x1c, 0x04, 0xf3, 0x1c, 0xe0, 0x72, 0x10, 0xcf, 0xc1, 0x68, 0x42, 0x00,
-  0x0c, 0x37, 0x04, 0xf5, 0x1c, 0x84, 0xc1, 0x05, 0x4e, 0xcd, 0x12, 0xcc,
-  0xc9, 0x70, 0x03, 0x9a, 0xe5, 0x73, 0x00, 0x06, 0xb3, 0x0c, 0x62, 0x32,
-  0x27, 0x81, 0xed, 0x71, 0xd0, 0xc7, 0x41, 0x7c, 0x86, 0x23, 0xdc, 0xc8,
-  0x8f, 0x03, 0xe2, 0x9b, 0x65, 0x18, 0x13, 0x33, 0x09, 0xec, 0x8f, 0x83,
-  0x37, 0x8a, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x2c,
-  0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x46, 0x3a, 0xd0, 0xe1, 0x86,
-  0x20, 0xa4, 0x03, 0x30, 0x98, 0x65, 0x20, 0x93, 0x32, 0x09, 0x6c, 0x38,
-  0xe5, 0x00, 0x3e, 0xb3, 0x04, 0x6a, 0x62, 0xa6, 0x1c, 0x10, 0xf1, 0x99,
-  0x25, 0x50, 0x93, 0xe1, 0x88, 0x3c, 0x3a, 0xe5, 0x40, 0xf8, 0x66, 0x19,
-  0xce, 0x44, 0x4d, 0x02, 0xd3, 0x23, 0x54, 0x0e, 0xe2, 0x63, 0x81, 0x43,
-  0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4,
-  0xa7, 0x08, 0x97, 0x0e, 0x74, 0xb8, 0x21, 0x60, 0xe9, 0x00, 0x0c, 0x66,
-  0x19, 0xd0, 0x24, 0x4d, 0x02, 0x83, 0xe5, 0x60, 0x88, 0xcf, 0x2c, 0x81,
-  0x9a, 0x18, 0x31, 0xcb, 0x01, 0x7c, 0x66, 0x09, 0xd4, 0x64, 0xa0, 0xc5,
-  0xd1, 0xc8, 0x04, 0x2b, 0x13, 0x02, 0x4d, 0x84, 0x34, 0x41, 0xc9, 0xc0,
-  0x4c, 0x2e, 0x18, 0xc6, 0x64, 0x39, 0xb0, 0xe5, 0x20, 0x3e, 0xc3, 0x11,
-  0xa5, 0x76, 0xcb, 0x01, 0xf1, 0xcd, 0x32, 0xac, 0x89, 0x9b, 0x04, 0x86,
-  0xcb, 0x81, 0xa9, 0xc5, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81,
-  0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x3c, 0x1d, 0xe8,
-  0x70, 0x43, 0xa0, 0xd3, 0x01, 0x18, 0xcc, 0x32, 0xb0, 0x49, 0x9b, 0x04,
-  0x36, 0x80, 0x73, 0x00, 0x9f, 0x59, 0x02, 0x39, 0xb1, 0x5e, 0x0e, 0x88,
-  0xf8, 0xcc, 0x12, 0xc8, 0xc9, 0x70, 0x04, 0xac, 0xf9, 0x72, 0x20, 0x7c,
-  0xb3, 0x0c, 0x6f, 0x22, 0x27, 0x81, 0xc5, 0xda, 0x2f, 0x07, 0xf1, 0xb1,
-  0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94, 0x05, 0x91, 0x7c, 0xac,
-  0x08, 0xe2, 0x53, 0xc4, 0x59, 0x07, 0x3a, 0xdc, 0x10, 0x94, 0x75, 0x00,
-  0x06, 0xb3, 0x0c, 0x70, 0x12, 0x27, 0x81, 0x9d, 0x73, 0x30, 0xc4, 0x67,
-  0x96, 0x40, 0x4e, 0x8c, 0x60, 0xe7, 0x00, 0x3e, 0xb3, 0x04, 0x72, 0x32,
-  0xd0, 0xe2, 0x68, 0x6c, 0x82, 0xb5, 0x09, 0x01, 0x27, 0x42, 0x9c, 0xd8,
-  0x75, 0xe0, 0x26, 0x17, 0x0c, 0x73, 0x81, 0x53, 0xb7, 0x39, 0x75, 0xb8,
-  0x1c, 0x0c, 0x73, 0xed, 0x1b, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x5e, 0x5d, 0x07, 0x65, 0x1d, 0x90,
-  0x74, 0x20, 0xd7, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68,
-  0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0xc1, 0xc6, 0xd7, 0x01, 0x5b, 0x07, 0x09, 0x11, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0xc1, 0xd6, 0xd7, 0x41, 0x5b, 0x07, 0x09, 0x11,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xe6, 0xd7, 0x81, 0x5b, 0x07,
-  0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0x81, 0x62, 0xda, 0x41,
-  0x5b, 0x07, 0x2e, 0x1d, 0x04, 0x79, 0x1d, 0xf8, 0x74, 0x70, 0xd7, 0xc1,
-  0x68, 0x42, 0x00, 0x0c, 0x37, 0x04, 0x7b, 0x1d, 0x84, 0xc1, 0x05, 0x4e,
-  0xcd, 0x12, 0xcc, 0xc9, 0x40, 0x8b, 0xa3, 0x1a, 0x3e, 0x02, 0xaa, 0x43,
-  0x8f, 0xb8, 0x04, 0x98, 0x08, 0x72, 0x02, 0xaa, 0x43, 0x98, 0xcc, 0x32,
-  0xd0, 0x89, 0x9d, 0xb8, 0xdb, 0x70, 0x44, 0xfd, 0x84, 0x75, 0x30, 0x7c,
-  0x67, 0x3f, 0xc3, 0x0c, 0x37, 0x04, 0x2c, 0x1d, 0x90, 0x41, 0x0d, 0x81,
-  0x0e, 0x47, 0xd4, 0x5b, 0x59, 0x07, 0xc3, 0x57, 0x81, 0xa0, 0x77, 0x6f,
-  0xc3, 0x0c, 0x37, 0x04, 0x2f, 0x1d, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32,
-  0xd4, 0x89, 0xaa, 0x04, 0x37, 0xcf, 0xc1, 0x30, 0x87, 0xc2, 0xc1, 0x30,
-  0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0, 0xb9, 0x76, 0xe0, 0xd7, 0x41,
-  0x4f, 0x07, 0xab, 0x1d, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c,
-  0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x6c, 0xb5, 0x1d, 0x94, 0x76, 0x70, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xb6, 0x1d, 0x98, 0x76, 0xc0, 0x10,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xb7, 0x1d, 0x9c, 0x76,
-  0x20, 0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x28, 0xbf, 0x1d,
-  0x98, 0x76, 0x70, 0xd6, 0x41, 0x20, 0xdb, 0xc1, 0x5d, 0x07, 0xb0, 0x1d,
-  0x8c, 0x26, 0x04, 0xc0, 0x70, 0x43, 0x40, 0xdb, 0x41, 0x18, 0x5c, 0xe0,
-  0xd4, 0x2c, 0x81, 0xaa, 0x0c, 0x37, 0x9c, 0x5c, 0x6e, 0x07, 0x60, 0x30,
-  0xcb, 0x70, 0x27, 0x78, 0x12, 0xd4, 0x58, 0x07, 0xaa, 0x1d, 0xc0, 0x05,
-  0x4e, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x24, 0xde, 0xc1, 0x6a,
-  0x07, 0x30, 0xc4, 0xd7, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c,
-  0xe3, 0x1d, 0xac, 0x76, 0x10, 0x08, 0x17, 0x0c, 0x53, 0x66, 0x1d, 0xbc,
-  0x76, 0x00, 0x17, 0x38, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xd3,
-  0x79, 0x07, 0xb0, 0x1d, 0xc4, 0x5c, 0x68, 0x07, 0x23, 0x06, 0x07, 0x00,
-  0x82, 0x60, 0x30, 0xa1, 0x77, 0x00, 0xdb, 0x41, 0x20, 0x5c, 0x30, 0xcc,
-  0x05, 0x4e, 0xdd, 0xe1, 0xd4, 0xbd, 0x74, 0x30, 0xcc, 0x91, 0x71, 0x30,
-  0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
-  0x78, 0xec, 0x1d, 0xf0, 0x76, 0xb0, 0xd7, 0x41, 0x7a, 0x07, 0xa3, 0x09,
-  0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c,
-  0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdb, 0x7c, 0x07,
-  0xe3, 0x1d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x1b,
-  0x7d, 0x07, 0xe4, 0x1d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x5b, 0x7d, 0x07, 0xe5, 0x1d, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00,
-  0x20, 0x08, 0x06, 0x4a, 0x7f, 0x07, 0xe4, 0x1d, 0x94, 0x76, 0x10, 0xc0,
-  0x77, 0x50, 0xdb, 0x81, 0x7b, 0x07, 0xa3, 0x09, 0x01, 0x30, 0xdc, 0x10,
-  0xc8, 0x77, 0x10, 0x06, 0x17, 0x38, 0x35, 0x4b, 0xa0, 0x2a, 0xc3, 0x0d,
-  0x65, 0x67, 0xdf, 0x01, 0x18, 0xcc, 0x32, 0xe4, 0x89, 0xaa, 0x04, 0x86,
-  0xd7, 0x81, 0x5e, 0x07, 0xf1, 0x19, 0x8e, 0x68, 0xa3, 0xbd, 0x0e, 0x88,
-  0x6f, 0x96, 0x41, 0x4f, 0xfa, 0x24, 0x30, 0xbe, 0x0e, 0xdc, 0x28, 0x3e,
-  0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2, 0xc0, 0x90, 0x8f,
-  0x15, 0x41, 0x7c, 0x8a, 0x00, 0xf1, 0x40, 0x87, 0x1b, 0x02, 0xff, 0x0e,
-  0xc0, 0x60, 0x96, 0x61, 0x4f, 0xf8, 0x24, 0xb0, 0x81, 0xb4, 0x03, 0xf8,
-  0xcc, 0x12, 0x84, 0x8a, 0x8d, 0x76, 0x40, 0xc4, 0x67, 0x96, 0x20, 0x54,
-  0x86, 0x23, 0xf0, 0x88, 0xb4, 0x03, 0xe1, 0x9b, 0x65, 0xf0, 0x93, 0x50,
-  0x09, 0x2c, 0x8f, 0x4a, 0x3b, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18,
-  0xe6, 0x02, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x56,
-  0x3c, 0xd0, 0xe1, 0x86, 0x20, 0xc5, 0x03, 0x30, 0x98, 0x65, 0xf8, 0x13,
-  0x50, 0x09, 0xac, 0xb5, 0x83, 0x21, 0x3e, 0xb3, 0x04, 0xa1, 0x62, 0x04,
-  0x6c, 0x07, 0xf0, 0x99, 0x25, 0x08, 0x95, 0x81, 0x16, 0x47, 0xdb, 0x13,
-  0x8c, 0x4f, 0x88, 0x3f, 0x11, 0x40, 0xc5, 0x24, 0x83, 0x3e, 0xb9, 0x60,
-  0x18, 0x7b, 0xed, 0x60, 0xb6, 0x83, 0xf8, 0x0c, 0x47, 0x88, 0x1e, 0x6d,
-  0x07, 0xc4, 0x37, 0xcb, 0x20, 0x2a, 0xa5, 0x12, 0x58, 0x6d, 0x07, 0xa3,
-  0x17, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4e, 0x59, 0x60,
-  0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xe4, 0x78, 0xa0, 0xc3, 0x0d, 0xc1,
-  0x8d, 0x07, 0x60, 0x30, 0xcb, 0x30, 0x2a, 0xa4, 0x12, 0xd8, 0xd0, 0xdb,
-  0x01, 0x7c, 0x66, 0x09, 0x52, 0xc5, 0x74, 0x3b, 0x20, 0xe2, 0x33, 0x4b,
-  0x90, 0x2a, 0xc3, 0x11, 0xad, 0xb7, 0xdb, 0x81, 0xf0, 0xcd, 0x32, 0x98,
-  0x4a, 0xaa, 0x04, 0xe6, 0x7a, 0xbc, 0x1d, 0xc4, 0xc7, 0x02, 0x87, 0x3e,
-  0x17, 0x0c, 0x73, 0x81, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f,
-  0x11, 0x64, 0x1e, 0xe8, 0x70, 0x43, 0x20, 0xe6, 0x01, 0x18, 0xcc, 0x32,
-  0x9c, 0x0a, 0xaa, 0x04, 0x46, 0xde, 0xc1, 0x10, 0x9f, 0x59, 0x82, 0x54,
-  0x31, 0x22, 0xbd, 0x03, 0xf8, 0xcc, 0x12, 0xa4, 0xca, 0x40, 0x8b, 0xa3,
-  0x8d, 0x0a, 0x46, 0x2a, 0xc4, 0xa9, 0x08, 0xa8, 0x02, 0xf7, 0x41, 0xa9,
-  0x5c, 0x30, 0xcc, 0x05, 0x4e, 0xdd, 0xe6, 0xd4, 0xd5, 0x76, 0x30, 0xcc,
-  0xa9, 0x73, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01,
-  0x80, 0x20, 0x18, 0x78, 0x72, 0x1e, 0x88, 0x79, 0x10, 0xe2, 0xc1, 0x9b,
-  0x07, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30,
-  0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x5b, 0x9e, 0x07, 0x69, 0x1e, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x9b, 0x9e, 0x07, 0x6a, 0x1e, 0x24, 0x44, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0xdb, 0x9e, 0x07, 0x6b, 0x1e, 0x24, 0x44, 0x30,
-  0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0xca, 0xa8, 0x07, 0x6a, 0x1e, 0xac,
-  0x78, 0x10, 0xd8, 0x79, 0xb0, 0xe3, 0x01, 0x9d, 0x07, 0xa3, 0x09, 0x01,
-  0x30, 0xdc, 0x10, 0xe0, 0x79, 0x10, 0x06, 0x17, 0x38, 0x35, 0x4b, 0xa0,
-  0x2a, 0x03, 0x2d, 0x8e, 0x6a, 0xd4, 0x89, 0x1e, 0x0f, 0x74, 0xe2, 0x12,
-  0x77, 0x22, 0xa4, 0x8a, 0x1e, 0x0f, 0x78, 0x62, 0xee, 0xd7, 0xe3, 0x01,
-  0x7c, 0x66, 0x19, 0x56, 0xa5, 0x55, 0xd8, 0x6f, 0x38, 0x02, 0xfe, 0x7e,
-  0x3c, 0x18, 0xbe, 0x8b, 0xbf, 0x61, 0x86, 0x1b, 0x02, 0x15, 0x0f, 0xc8,
-  0xa0, 0x86, 0x40, 0x87, 0x23, 0x8a, 0x31, 0x0f, 0x86, 0xaf, 0x02, 0x41,
-  0xef, 0x18, 0x66, 0xb8, 0x21, 0x68, 0xf1, 0x80, 0x0c, 0x2a, 0x18, 0x74,
-  0x96, 0x81, 0x55, 0xc2, 0x25, 0xb8, 0xf8, 0x0e, 0x86, 0x39, 0x93, 0x0e,
-  0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x8f, 0xd5, 0x03, 0x3e,
-  0x0f, 0x76, 0x3c, 0x48, 0xf5, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84,
-  0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x43, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0x60, 0x9b, 0xf5, 0x60, 0xd4, 0x83, 0x83, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0xa3, 0xf5, 0x80, 0xd4, 0x03,
-  0x86, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0xab, 0xf5, 0xa0,
-  0xd4, 0x03, 0x89, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x40, 0xe9,
-  0xf5, 0x80, 0xd4, 0x83, 0x32, 0x0f, 0x02, 0x58, 0x0f, 0xea, 0x3c, 0x70,
-  0xf5, 0x60, 0x34, 0x21, 0x00, 0x86, 0x1b, 0x02, 0x59, 0x0f, 0xc2, 0xe0,
-  0x02, 0xa7, 0x66, 0x09, 0xc2, 0x65, 0xb8, 0xa1, 0x04, 0x83, 0x5b, 0x0f,
-  0xc0, 0x60, 0x96, 0xc1, 0x55, 0x5e, 0x25, 0xa8, 0x30, 0x0f, 0x50, 0x3d,
-  0x80, 0x0b, 0x9c, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x09, 0xdc,
-  0x83, 0x54, 0x0f, 0x56, 0x30, 0xd0, 0xf3, 0x60, 0xc4, 0xe0, 0x00, 0x40,
-  0x10, 0x0c, 0xa6, 0x70, 0x0f, 0x52, 0x3d, 0x08, 0x84, 0x0b, 0x86, 0x29,
-  0x32, 0x0f, 0x5a, 0x3d, 0x80, 0x0b, 0x9c, 0x1a, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x83, 0xa9, 0xdc, 0x03, 0x57, 0x0f, 0xc2, 0xe0, 0xcf, 0x83, 0x11,
-  0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0xcc, 0x3d, 0x70, 0xf5, 0x20, 0x10,
-  0x2e, 0x18, 0xe6, 0x02, 0xa7, 0xee, 0x70, 0xea, 0x5a, 0x3c, 0x18, 0xe6,
-  0xc4, 0x3a, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0x3c, 0x75, 0x0f, 0x74, 0x3d, 0xc8, 0xf3, 0xe0, 0xdc,
-  0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18,
-  0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
-  0x2d, 0xde, 0x83, 0x70, 0x0f, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0x4d, 0xde, 0x03, 0x71, 0x0f, 0x12, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x83, 0x6d, 0xde, 0x83, 0x71, 0x0f, 0x12, 0x22, 0x18,
-  0x31, 0x50, 0x00, 0x10, 0x04, 0x03, 0x65, 0xdf, 0x03, 0x71, 0x0f, 0x46,
-  0x3d, 0x08, 0xdc, 0x3d, 0x98, 0xf5, 0x80, 0xdd, 0x83, 0xd1, 0x84, 0x00,
-  0x18, 0x6e, 0x08, 0xe0, 0x3d, 0x08, 0x83, 0x0b, 0x9c, 0x9a, 0x25, 0x08,
-  0x97, 0xe1, 0x86, 0x31, 0x0c, 0xe8, 0x3d, 0x00, 0x83, 0x59, 0x06, 0x58,
-  0x09, 0x97, 0xc0, 0xec, 0x3c, 0xc0, 0xf3, 0x20, 0x3e, 0xc3, 0x11, 0x68,
-  0x18, 0xe4, 0x79, 0x40, 0x7c, 0xb3, 0x0c, 0xb1, 0x42, 0x2b, 0x81, 0xe9,
-  0x79, 0x90, 0x86, 0x41, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17,
-  0x38, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xe1, 0xef, 0x81,
-  0x0e, 0x37, 0x04, 0xfc, 0x1e, 0x80, 0xc1, 0x2c, 0x83, 0xac, 0xcc, 0x4a,
-  0x60, 0x83, 0xa8, 0x07, 0xf0, 0x99, 0x25, 0xc0, 0x15, 0x0b, 0xf5, 0x80,
-  0x88, 0xcf, 0x2c, 0x01, 0xae, 0x0c, 0x47, 0xcc, 0x61, 0x20, 0xea, 0x81,
-  0xf0, 0xcd, 0x32, 0xd4, 0x0a, 0xae, 0x04, 0x46, 0x87, 0xc1, 0xa8, 0x07,
-  0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94, 0x05, 0x91,
-  0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0xca, 0x07, 0x3a, 0xdc, 0x10, 0x9c,
-  0x7c, 0x00, 0x06, 0xb3, 0x0c, 0xb6, 0x72, 0x2b, 0x81, 0xad, 0x7a, 0x30,
-  0xc4, 0x67, 0x96, 0x00, 0x57, 0x8c, 0x70, 0xf5, 0x00, 0x3e, 0xb3, 0x04,
-  0xb8, 0x32, 0xd0, 0xe2, 0x68, 0xb2, 0x82, 0xcd, 0x0a, 0x61, 0x2b, 0xc2,
-  0xad, 0xf0, 0xa3, 0x40, 0x2b, 0x17, 0x0c, 0x63, 0xad, 0x1e, 0xc4, 0x7a,
-  0x10, 0x9f, 0xe1, 0x08, 0x59, 0x90, 0xf5, 0x80, 0xf8, 0x66, 0x19, 0x72,
-  0x85, 0x57, 0x02, 0x9b, 0xf5, 0x60, 0x16, 0xe2, 0x63, 0xc1, 0x40, 0x9f,
-  0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7,
-  0x88, 0x9b, 0x0f, 0x74, 0xb8, 0x21, 0xa8, 0xf9, 0x00, 0x0c, 0x66, 0x19,
-  0x74, 0x65, 0x57, 0x02, 0x1b, 0x76, 0x3d, 0x80, 0xcf, 0x2c, 0x01, 0xb8,
-  0x18, 0xae, 0x07, 0x44, 0x7c, 0x66, 0x09, 0xc0, 0x65, 0x38, 0xa2, 0x17,
-  0x72, 0x3d, 0x10, 0xbe, 0x59, 0x86, 0x5e, 0x01, 0x97, 0xc0, 0x7c, 0x41,
-  0xd7, 0x83, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70, 0xca,
-  0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x42, 0xec, 0x03, 0x1d, 0x6e,
-  0x08, 0xc0, 0x3e, 0x00, 0x83, 0x59, 0x06, 0x5f, 0xf9, 0x95, 0xc0, 0xc4,
-  0x3d, 0x18, 0xe2, 0x33, 0x4b, 0x00, 0x2e, 0x46, 0x9c, 0x7b, 0x00, 0x9f,
-  0x59, 0x02, 0x70, 0x19, 0x68, 0x71, 0x34, 0x5d, 0xc1, 0x76, 0x85, 0xf0,
-  0x15, 0xe1, 0x57, 0x60, 0x83, 0x57, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x6e,
-  0x73, 0xea, 0x66, 0x3d, 0x18, 0xe6, 0xd0, 0x3b, 0x18, 0xe6, 0x88, 0x61,
-  0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x3c, 0xb8, 0x0f,
-  0xc0, 0x3e, 0xf8, 0xf7, 0xa0, 0xed, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d,
-  0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xed, 0xee, 0x83, 0xb3, 0x0f, 0x12,
-  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x0d, 0xef, 0x03, 0xb4,
-  0x0f, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x2d, 0xef,
-  0x83, 0xb4, 0x0f, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x03,
-  0x25, 0xf4, 0x03, 0xb4, 0x0f, 0x52, 0x3e, 0x08, 0xe8, 0x3e, 0xc8, 0xf9,
-  0x40, 0xee, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x6e, 0x08, 0xec, 0x3e, 0x08,
-  0x83, 0x0b, 0x9c, 0x9a, 0x25, 0x08, 0x97, 0x81, 0x16, 0x47, 0x35, 0x58,
-  0xc5, 0xce, 0x87, 0x55, 0x71, 0x09, 0x57, 0x11, 0xc0, 0xc5, 0xce, 0x87,
-  0x57, 0x99, 0x65, 0x10, 0x17, 0x72, 0x49, 0xc7, 0x60, 0x38, 0xc2, 0x1d,
-  0x03, 0x9e, 0x0f, 0x86, 0xef, 0xde, 0x31, 0x18, 0x66, 0xb8, 0x21, 0x38,
-  0xf9, 0x80, 0x0c, 0x6a, 0x08, 0x74, 0x38, 0x42, 0x24, 0xc0, 0x3e, 0x18,
-  0xbe, 0x0a, 0x04, 0x3d, 0x92, 0x18, 0x66, 0xb8, 0x21, 0x50, 0xf9, 0x80,
-  0x0c, 0x2a, 0x18, 0x74, 0x96, 0x61, 0x5c, 0xf0, 0x25, 0x38, 0x77, 0x0f,
-  0x86, 0xb9, 0x11, 0x0f, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03,
-  0x2f, 0xf5, 0x83, 0xbc, 0x0f, 0x70, 0x3e, 0x30, 0xfd, 0x60, 0x34, 0x21,
-  0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1,
-  0x88, 0x43, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x83, 0xfd, 0x00,
-  0xf4, 0x83, 0x83, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x8b,
-  0xfd, 0x20, 0xf4, 0x03, 0x86, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
-  0x60, 0x93, 0xfd, 0x40, 0xf4, 0x03, 0x89, 0x08, 0x46, 0x0c, 0x14, 0x00,
-  0x04, 0xc1, 0x40, 0xd1, 0xfd, 0x20, 0xf4, 0x03, 0xb1, 0x0f, 0x82, 0xd6,
-  0x0f, 0xe4, 0x3e, 0x58, 0xfd, 0x60, 0x34, 0x21, 0x00, 0x86, 0x1b, 0x82,
-  0xd7, 0x0f, 0xc2, 0xe0, 0x02, 0xa7, 0x66, 0x09, 0xf0, 0x65, 0xb8, 0x41,
-  0x24, 0x03, 0xda, 0x0f, 0xc0, 0x60, 0x96, 0xa1, 0x5c, 0xcc, 0x25, 0x28,
-  0x9f, 0x0f, 0x4a, 0x3f, 0x80, 0x0b, 0x9c, 0x1a, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x83, 0xa9, 0xf7, 0x03, 0xd3, 0x0f, 0x52, 0x32, 0xb8, 0xfb, 0x60,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x26, 0xdf, 0x0f, 0x4c, 0x3f, 0x08,
-  0x84, 0x0b, 0x86, 0xa9, 0xb0, 0x0f, 0x54, 0x3f, 0x80, 0x0b, 0x9c, 0x1a,
-  0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x49, 0xfc, 0x83, 0xd5, 0x0f, 0x7c,
-  0x82, 0xef, 0x83, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0xc6, 0x3f,
-  0x58, 0xfd, 0x20, 0x10, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0xee, 0x70, 0xea,
-  0x54, 0x3e, 0x18, 0xe6, 0x7e, 0x3c, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18,
-  0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc, 0xf3, 0x0f, 0x6e, 0x3f,
-  0xb0, 0xfb, 0x80, 0xfc, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82,
-  0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x83, 0xcd, 0xfd, 0x03, 0xdf, 0x0f, 0x12, 0x22, 0x18,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xed, 0xfd, 0x83, 0xdf, 0x0f, 0x12,
-  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x0d, 0xfe, 0x03, 0xf0,
-  0x0f, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x03, 0x05, 0xff,
-  0x83, 0xdf, 0x0f, 0x40, 0x3f, 0x08, 0xd6, 0x3f, 0x80, 0xfd, 0x20, 0xfd,
-  0x83, 0xd1, 0x84, 0x00, 0x18, 0x6e, 0x08, 0xda, 0x3f, 0x08, 0x83, 0x0b,
-  0x9c, 0x9a, 0x25, 0xc0, 0x97, 0xe1, 0x06, 0xb0, 0x0c, 0xe2, 0x3f, 0x00,
-  0x83, 0x59, 0x86, 0x73, 0xc1, 0x97, 0xc0, 0xe6, 0x3e, 0xa8, 0xfb, 0x20,
-  0x3e, 0xc3, 0x11, 0x66, 0x19, 0xd8, 0x7d, 0x40, 0x7c, 0xb3, 0x0c, 0xe8,
-  0xb2, 0x2e, 0x81, 0xdd, 0x7d, 0x70, 0x96, 0x41, 0x7c, 0x2c, 0x18, 0xe8,
-  0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8,
-  0x14, 0xb1, 0xff, 0x81, 0x0e, 0x37, 0x04, 0xf9, 0x1f, 0x80, 0xc1, 0x2c,
-  0x43, 0xba, 0xa8, 0x4b, 0x60, 0xc3, 0xdf, 0x07, 0xf0, 0x99, 0x25, 0x78,
-  0x17, 0xf3, 0xfb, 0x80, 0x88, 0xcf, 0x2c, 0xc1, 0xbb, 0x0c, 0x47, 0xc4,
-  0x65, 0xf0, 0xf7, 0x81, 0xf0, 0xcd, 0x32, 0xb0, 0xcb, 0xbb, 0x04, 0x26,
-  0x97, 0x01, 0xe8, 0x07, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c,
-  0xe0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84, 0x09, 0x0a,
-  0x3a, 0xdc, 0x10, 0x90, 0xa0, 0x00, 0x06, 0xb3, 0x0c, 0xed, 0xe2, 0x2e,
-  0x81, 0xa1, 0x7e, 0x30, 0xc4, 0x67, 0x96, 0xe0, 0x5d, 0x8c, 0x58, 0xfd,
-  0x00, 0x3e, 0xb3, 0x04, 0xef, 0x32, 0xd0, 0xe2, 0x68, 0xe9, 0x82, 0xa9,
-  0x0b, 0xd1, 0x2e, 0x82, 0xbb, 0xe0, 0xab, 0xb0, 0x2e, 0x17, 0x0c, 0x63,
-  0xaa, 0x1f, 0xb8, 0x7e, 0x10, 0x9f, 0xe1, 0x88, 0xd7, 0x78, 0xfd, 0x80,
-  0xf8, 0x66, 0x19, 0xe0, 0x65, 0x5e, 0x02, 0x83, 0xfd, 0x00, 0x36, 0xe2,
-  0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b, 0x0c, 0xf9,
-  0x58, 0x11, 0xc4, 0xa7, 0x08, 0x1a, 0x14, 0x74, 0xb8, 0x21, 0x90, 0x41,
-  0x01, 0x0c, 0x66, 0x19, 0xe2, 0x45, 0x5e, 0x02, 0x1b, 0x70, 0x3f, 0x80,
-  0xcf, 0x2c, 0xc1, 0xbd, 0x58, 0xed, 0x07, 0x44, 0x7c, 0x66, 0x09, 0xee,
-  0x65, 0x38, 0x42, 0x37, 0x6c, 0x3f, 0x10, 0xbe, 0x59, 0x06, 0x7a, 0xb9,
-  0x97, 0xc0, 0x76, 0xe3, 0xf6, 0x83, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82,
-  0x61, 0x2e, 0x70, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xe2,
-  0x07, 0x05, 0x1d, 0x6e, 0x08, 0x7a, 0x50, 0x00, 0x83, 0x59, 0x86, 0x7a,
-  0xb1, 0x97, 0xc0, 0x7e, 0x3f, 0x18, 0xe2, 0x33, 0x4b, 0x70, 0x2f, 0x46,
-  0x90, 0x7f, 0x00, 0x9f, 0x59, 0x82, 0x7b, 0x19, 0x68, 0x71, 0xb4, 0x78,
-  0xc1, 0xe4, 0x85, 0xa8, 0x17, 0xc1, 0x5e, 0x54, 0x67, 0x5e, 0x2e, 0x18,
-  0xe6, 0x02, 0xa7, 0x6e, 0x73, 0xea, 0x60, 0x3f, 0x18, 0xe6, 0xca, 0x3d,
-  0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10,
-  0x0c, 0xbc, 0x36, 0x14, 0x7a, 0x50, 0xe0, 0xff, 0x40, 0x0d, 0x85, 0xd1,
-  0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20,
-  0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x8d, 0x0e,
-  0x05, 0x32, 0x14, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
-  0xad, 0x0e, 0x85, 0x32, 0x14, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0xcd, 0x0e, 0x05, 0x33, 0x14, 0x12, 0x22, 0x18, 0x31, 0x50,
-  0x00, 0x10, 0x04, 0x03, 0xc5, 0x0f, 0x85, 0x32, 0x14, 0x4c, 0x50, 0x08,
-  0xe2, 0x50, 0xb0, 0x41, 0xe1, 0x0d, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x6e,
-  0x08, 0xe6, 0x50, 0x08, 0x83, 0x0b, 0x9c, 0x9a, 0x25, 0xc0, 0x97, 0x81,
-  0x16, 0x47, 0x35, 0xc6, 0x05, 0x0e, 0x09, 0x71, 0x71, 0x89, 0x72, 0x11,
-  0xee, 0x05, 0x0e, 0x09, 0x73, 0x99, 0x65, 0xc8, 0x97, 0x7d, 0x31, 0xcf,
-  0x60, 0x38, 0x62, 0xfd, 0x72, 0x50, 0x18, 0xbe, 0x63, 0xbf, 0x61, 0x86,
-  0x1b, 0x02, 0x12, 0x14, 0xc8, 0xa0, 0x86, 0x40, 0x87, 0x23, 0xfe, 0xa3,
-  0x07, 0x85, 0xe1, 0xab, 0x40, 0xd0, 0x0b, 0x91, 0x61, 0x86, 0x1b, 0x82,
-  0x13, 0x14, 0xc8, 0xa0, 0x82, 0x41, 0x67, 0x19, 0xf4, 0xe5, 0x65, 0x82,
-  0x5b, 0xff, 0x60, 0x98, 0x03, 0xf9, 0x60, 0x98, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0xf0, 0x4c, 0x51, 0xb0, 0x43, 0xa1, 0x06, 0x85, 0x51, 0x14,
-  0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34,
-  0x81, 0x18, 0x8a, 0x38, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6,
-  0x56, 0x14, 0xfa, 0x50, 0x38, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10,
-  0x0c, 0x36, 0x57, 0x14, 0xfc, 0x50, 0x60, 0x88, 0x60, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0xb6, 0x57, 0x14, 0xfe, 0x50, 0x90, 0x88, 0x60, 0xc4,
-  0x40, 0x01, 0x40, 0x10, 0x0c, 0x94, 0x5b, 0x14, 0xfc, 0x50, 0xf8, 0x41,
-  0x21, 0x50, 0x45, 0xe1, 0x0d, 0x05, 0x54, 0x14, 0x46, 0x13, 0x02, 0x60,
-  0xb8, 0x21, 0x60, 0x45, 0x21, 0x0c, 0x2e, 0x70, 0x6a, 0x96, 0xe0, 0x65,
-  0x86, 0x1b, 0xfe, 0x33, 0x88, 0x45, 0x01, 0x0c, 0x66, 0x19, 0xf8, 0xa5,
-  0x5f, 0x82, 0xda, 0x41, 0x41, 0x14, 0x05, 0xb8, 0xc0, 0xa9, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0x98, 0x74, 0x51, 0x18, 0x45, 0xc1, 0x04, 0x03,
-  0x3a, 0x14, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xda, 0x45, 0x61,
-  0x14, 0x85, 0x40, 0xb8, 0x60, 0x98, 0xf2, 0x41, 0xe1, 0x14, 0x05, 0xb8,
-  0xc0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0x7e, 0x51, 0x40,
-  0x45, 0x61, 0x47, 0xf2, 0x50, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83,
-  0x09, 0x1c, 0x05, 0x54, 0x14, 0x02, 0xe1, 0x82, 0x61, 0x2e, 0x70, 0xea,
-  0x0e, 0xa7, 0xee, 0x04, 0x85, 0x61, 0x8e, 0xe7, 0x83, 0x61, 0x8e, 0x18,
-  0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xc0, 0x23, 0x47,
-  0x81, 0x16, 0x85, 0x39, 0x14, 0xc2, 0x51, 0x18, 0x4d, 0x08, 0x80, 0xd1,
-  0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91,
-  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xd6, 0x51, 0xd8, 0x45, 0x21,
-  0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xd8, 0x51, 0xe0,
-  0x45, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xda,
-  0x51, 0xe8, 0x45, 0x21, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30,
-  0x50, 0xea, 0x51, 0xe0, 0x45, 0xa1, 0x0f, 0x85, 0x00, 0x1d, 0x85, 0x56,
-  0x14, 0xcc, 0x51, 0x18, 0x4d, 0x08, 0x80, 0xe1, 0x86, 0x40, 0x1d, 0x85,
-  0x30, 0xb8, 0xc0, 0xa9, 0x59, 0x82, 0x97, 0x19, 0x6e, 0xe8, 0xd1, 0xc0,
-  0x1d, 0x05, 0x30, 0x98, 0x65, 0xf0, 0x97, 0x97, 0x09, 0x0c, 0x0e, 0x05,
-  0x39, 0x14, 0xe2, 0x33, 0x1c, 0x31, 0x86, 0xc1, 0x1c, 0x0a, 0xc4, 0x37,
-  0xcb, 0xf0, 0x2f, 0x22, 0x13, 0x18, 0x1d, 0x0a, 0x64, 0x18, 0xc4, 0xc7,
-  0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x16, 0x18, 0xf2, 0xb1,
-  0x22, 0x88, 0x4f, 0x11, 0xf8, 0x28, 0xe8, 0x70, 0x43, 0x60, 0x8f, 0x02,
-  0x18, 0xcc, 0x32, 0x80, 0x4c, 0xc8, 0x04, 0x36, 0xf0, 0xa1, 0x00, 0x9f,
-  0x59, 0x02, 0x93, 0xb1, 0x3d, 0x14, 0x88, 0xf8, 0xcc, 0x12, 0x98, 0xcc,
-  0x70, 0x84, 0x1b, 0x06, 0x7c, 0x28, 0x08, 0xdf, 0x2c, 0xc3, 0xc8, 0x98,
-  0x4c, 0x60, 0x6f, 0x18, 0xf4, 0xa1, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c,
-  0x30, 0xcc, 0x05, 0x4e, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45,
-  0x8c, 0xa4, 0xa0, 0xc3, 0x0d, 0x41, 0x48, 0x0a, 0x60, 0x30, 0xcb, 0x40,
-  0x32, 0x25, 0x13, 0x58, 0x29, 0x0a, 0x43, 0x7c, 0x66, 0x09, 0x4c, 0xc6,
-  0x08, 0x54, 0x14, 0xe0, 0x33, 0x4b, 0x60, 0x32, 0x03, 0x2d, 0x8e, 0x06,
-  0x32, 0x58, 0xc8, 0x10, 0x24, 0x23, 0x94, 0x8c, 0x3c, 0x0a, 0x22, 0x73,
-  0xc1, 0x30, 0x76, 0x8a, 0xc2, 0x2a, 0x0a, 0xf1, 0x19, 0x8e, 0x60, 0x15,
-  0x56, 0x14, 0x88, 0x6f, 0x96, 0xe1, 0x64, 0x54, 0x26, 0xb0, 0x56, 0x14,
-  0x5a, 0x25, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2,
-  0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x88, 0x49, 0x41, 0x87, 0x1b,
-  0x82, 0x97, 0x14, 0xc0, 0x60, 0x96, 0x01, 0x65, 0x52, 0x26, 0xb0, 0xa1,
-  0x16, 0x05, 0xf8, 0xcc, 0x12, 0xb8, 0x8c, 0xc9, 0xa2, 0x40, 0xc4, 0x67,
-  0x96, 0xc0, 0x65, 0x86, 0x23, 0x6e, 0x65, 0x16, 0x05, 0xe1, 0x9b, 0x65,
-  0x58, 0x19, 0x97, 0x09, 0x0c, 0x57, 0x68, 0x51, 0x88, 0x8f, 0x05, 0x0e,
-  0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10,
-  0x9f, 0x22, 0x78, 0x52, 0xd0, 0xe1, 0x86, 0x40, 0x27, 0x05, 0x30, 0x98,
-  0x65, 0x60, 0x99, 0x96, 0x09, 0x8c, 0x17, 0x85, 0x21, 0x3e, 0xb3, 0x04,
-  0x2e, 0x63, 0x44, 0x38, 0x0a, 0xf0, 0x99, 0x25, 0x70, 0x99, 0x81, 0x16,
-  0x47, 0x43, 0x19, 0x2c, 0x65, 0x08, 0x96, 0x11, 0x5a, 0x86, 0xb4, 0x54,
-  0xe6, 0x82, 0x61, 0x2e, 0x70, 0xea, 0x36, 0xa7, 0xae, 0x15, 0x85, 0x61,
-  0x4e, 0xfc, 0x83, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0xc0, 0x53, 0x4b, 0x41, 0x27, 0x85, 0x7c, 0x14, 0xce,
-  0x52, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84,
-  0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0xd8, 0xe2, 0x52, 0x08, 0x4b, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0xd8, 0xe4, 0x52, 0x10, 0x4b, 0x21, 0x21, 0x82, 0x11, 0x03,
-  0x04, 0x00, 0x41, 0x30, 0xd8, 0xe6, 0x52, 0x18, 0x4b, 0x21, 0x21, 0x82,
-  0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x50, 0xf6, 0x52, 0x10, 0x4b, 0x61,
-  0x24, 0x85, 0xc0, 0x2d, 0x85, 0x99, 0x14, 0xd8, 0x52, 0x18, 0x4d, 0x08,
-  0x80, 0xe1, 0x86, 0x00, 0x2e, 0x85, 0x30, 0xb8, 0xc0, 0xa9, 0x59, 0x82,
-  0x97, 0x19, 0x68, 0x71, 0x54, 0x43, 0x5f, 0xd4, 0x94, 0xc8, 0x17, 0x97,
-  0xe0, 0x17, 0xc1, 0x65, 0xd4, 0x94, 0xe8, 0x97, 0x59, 0x06, 0x98, 0x91,
-  0x99, 0x71, 0x0d, 0x86, 0x23, 0xd4, 0xcf, 0x26, 0x85, 0xe1, 0xbb, 0xf5,
-  0x1b, 0x66, 0xb8, 0x21, 0x08, 0x49, 0x81, 0x0c, 0x6a, 0x08, 0x74, 0x38,
-  0x82, 0x5f, 0x74, 0x52, 0x18, 0xbe, 0x0a, 0x04, 0x3d, 0x7f, 0x19, 0x66,
-  0xb8, 0x21, 0x20, 0x49, 0x81, 0x0c, 0x2a, 0x18, 0x74, 0x96, 0x21, 0x66,
-  0xcc, 0x26, 0x38, 0x74, 0x14, 0x86, 0xb9, 0xfe, 0x0f, 0x86, 0x19, 0x31,
-  0x38, 0x00, 0x10, 0x04, 0x03, 0x6f, 0x34, 0x85, 0xb9, 0x14, 0x64, 0x52,
-  0x00, 0x4d, 0x61, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61,
-  0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x43, 0x46, 0x0c, 0x10, 0x00, 0x04,
-  0xc1, 0x60, 0x53, 0x4d, 0x41, 0x2f, 0x85, 0x83, 0x08, 0x46, 0x0c, 0x10,
-  0x00, 0x04, 0xc1, 0x60, 0x5b, 0x4d, 0x61, 0x2f, 0x05, 0x86, 0x08, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x63, 0x4d, 0x81, 0x2f, 0x05, 0x89,
-  0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x40, 0xa1, 0x4d, 0x61, 0x2f,
-  0x05, 0x9e, 0x14, 0x82, 0xd3, 0x14, 0xd8, 0x52, 0x28, 0x4d, 0x61, 0x34,
-  0x21, 0x00, 0x86, 0x1b, 0x82, 0xd4, 0x14, 0xc2, 0xe0, 0x02, 0xa7, 0x66,
-  0x09, 0xcc, 0x66, 0xb8, 0x81, 0x5f, 0x03, 0xd7, 0x14, 0xc0, 0x60, 0x96,
-  0x61, 0x66, 0x68, 0x26, 0x28, 0x9c, 0x14, 0xfe, 0x52, 0x80, 0x0b, 0x9c,
-  0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0xe9, 0x36, 0x05, 0xd0, 0x14,
-  0x4a, 0x30, 0x88, 0x4b, 0x61, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x26,
-  0xdc, 0x14, 0x40, 0x53, 0x08, 0x84, 0x0b, 0x86, 0xa9, 0x9d, 0x14, 0x48,
-  0x53, 0x80, 0x0b, 0x9c, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x89,
-  0x37, 0x85, 0xd2, 0x14, 0x70, 0xc6, 0x2e, 0x85, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0x98, 0x7a, 0x53, 0x28, 0x4d, 0x21, 0x10, 0x2e, 0x18, 0xe6,
-  0x02, 0xa7, 0xee, 0x70, 0xea, 0x48, 0x52, 0x18, 0xe6, 0x72, 0x50, 0x18,
-  0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0xbc, 0xf0, 0x14, 0x62, 0x53, 0x80, 0x4b, 0xc1, 0x37, 0x85, 0xd1, 0x84,
-  0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86,
-  0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x0d, 0x3d, 0x05,
-  0xdc, 0x14, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x2d,
-  0x3d, 0x85, 0xdc, 0x14, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x83, 0x4d, 0x3d, 0x05, 0xdd, 0x14, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00,
-  0x10, 0x04, 0x03, 0x45, 0x3e, 0x85, 0xdc, 0x14, 0xf4, 0x52, 0x08, 0xca,
-  0x53, 0x50, 0x4d, 0x61, 0x3c, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x6e, 0x08,
-  0xce, 0x53, 0x08, 0x83, 0x0b, 0x9c, 0x9a, 0x25, 0x30, 0x9b, 0xe1, 0x06,
-  0x9d, 0x0d, 0xd6, 0x53, 0x00, 0x83, 0x59, 0x86, 0x9a, 0x31, 0x9b, 0xc0,
-  0xda, 0x52, 0x78, 0x4b, 0x21, 0x3e, 0xc3, 0x11, 0x62, 0x18, 0xc0, 0xa5,
-  0x40, 0x7c, 0xb3, 0x0c, 0x36, 0x93, 0x33, 0x81, 0xc5, 0xa5, 0x30, 0x86,
-  0x41, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x81,
-  0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x51, 0x9f, 0x82, 0x0e, 0x37, 0x04,
-  0xf3, 0x29, 0x80, 0xc1, 0x2c, 0xc3, 0xcd, 0xe0, 0x4c, 0x60, 0x43, 0x5e,
-  0x0a, 0xf0, 0x99, 0x25, 0xe8, 0x19, 0xc3, 0x4b, 0x81, 0x88, 0xcf, 0x2c,
-  0x41, 0xcf, 0x0c, 0x47, 0xb4, 0x61, 0x90, 0x97, 0x82, 0xf0, 0xcd, 0x32,
-  0xe8, 0x4c, 0xcf, 0x04, 0xe6, 0x86, 0x81, 0x5e, 0x0a, 0xf1, 0xb1, 0xc0,
-  0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08,
-  0xe2, 0x53, 0x04, 0x88, 0x0a, 0x3a, 0xdc, 0x10, 0xf8, 0xa7, 0x00, 0x06,
-  0xb3, 0x0c, 0x3b, 0xc3, 0x33, 0x81, 0x89, 0xa6, 0x30, 0xc4, 0x67, 0x96,
-  0xa0, 0x67, 0x8c, 0x28, 0x4d, 0x01, 0x3e, 0xb3, 0x04, 0x3d, 0x33, 0xd0,
-  0xe2, 0x68, 0x37, 0x83, 0xe1, 0x0c, 0xb1, 0x33, 0x02, 0xcf, 0xc0, 0xa3,
-  0x90, 0x33, 0x17, 0x0c, 0x63, 0xa4, 0x29, 0xa0, 0xa6, 0x10, 0x9f, 0xe1,
-  0x88, 0xd4, 0x49, 0x4d, 0x81, 0xf8, 0x66, 0x19, 0x7c, 0x26, 0x6c, 0x02,
-  0x53, 0x4d, 0x41, 0x75, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9,
-  0xc0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x17, 0x15,
-  0x74, 0xb8, 0x21, 0x60, 0x51, 0x01, 0x0c, 0x66, 0x19, 0x7e, 0x06, 0x6c,
-  0x02, 0x1b, 0x64, 0x53, 0x80, 0xcf, 0x2c, 0x41, 0xd9, 0xd8, 0x6b, 0x0a,
-  0x44, 0x7c, 0x66, 0x09, 0xca, 0x66, 0x38, 0x82, 0x76, 0x60, 0x53, 0x10,
-  0xbe, 0x59, 0x06, 0xb1, 0x29, 0x9b, 0xc0, 0x6a, 0x27, 0x36, 0x85, 0xf8,
-  0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70, 0xca, 0x82, 0x48, 0x3e,
-  0x56, 0x04, 0xf1, 0x29, 0x22, 0x47, 0x05, 0x1d, 0x6e, 0x08, 0x6e, 0x54,
-  0x00, 0x83, 0x59, 0x86, 0xb1, 0x21, 0x9b, 0xc0, 0x72, 0x53, 0x18, 0xe2,
-  0x33, 0x4b, 0x50, 0x36, 0x46, 0xf8, 0xa6, 0x00, 0x9f, 0x59, 0x82, 0xb2,
-  0x19, 0x68, 0x71, 0xb4, 0x9f, 0xc1, 0xc0, 0x86, 0x18, 0x1b, 0x81, 0x6c,
-  0xfc, 0x2e, 0x6c, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x6e, 0x73, 0xea, 0x54,
-  0x53, 0x18, 0xe6, 0x7e, 0x51, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc, 0x33, 0x15, 0x6e, 0x54, 0xb0,
-  0x4f, 0x81, 0x4c, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1,
-  0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x83, 0xcd, 0x4d, 0x05, 0x1f, 0x15, 0x12, 0x22, 0x18, 0x31,
-  0x40, 0x00, 0x10, 0x04, 0x83, 0xed, 0x4d, 0x85, 0x1f, 0x15, 0x12, 0x22,
-  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x0d, 0x4e, 0x05, 0x30, 0x15,
-  0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x03, 0x05, 0x4f, 0x85,
-  0x1f, 0x15, 0x40, 0x54, 0x08, 0xd6, 0x54, 0x80, 0x51, 0x21, 0x4d, 0x85,
-  0xd1, 0x84, 0x00, 0x18, 0x6e, 0x08, 0xda, 0x54, 0x08, 0x83, 0x0b, 0x9c,
-  0x9a, 0x25, 0x30, 0x9b, 0x81, 0x16, 0x47, 0x35, 0x62, 0x86, 0x8c, 0x09,
-  0x98, 0x71, 0x89, 0x99, 0x11, 0xca, 0x86, 0x8c, 0x09, 0x9a, 0xb1, 0xf1,
-  0x0d, 0x50, 0x54, 0x80, 0xcf, 0x2c, 0xc3, 0xd9, 0xa4, 0x4d, 0xf8, 0x06,
-  0xc3, 0x11, 0xe5, 0x1b, 0xd0, 0xa8, 0x30, 0x7c, 0x67, 0xbe, 0xc1, 0x30,
-  0xc3, 0x0d, 0xc1, 0x7f, 0x0a, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0x05,
-  0x8e, 0x0a, 0xc3, 0x57, 0x81, 0xa0, 0x77, 0x0c, 0x33, 0xdc, 0x10, 0x88,
-  0xa8, 0x40, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0x80, 0x36, 0x7d, 0x13, 0x9c,
-  0x79, 0x0a, 0xc3, 0xdc, 0x3e, 0x0a, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08,
-  0x82, 0x81, 0x17, 0xaa, 0x42, 0x9c, 0x0a, 0x30, 0x2a, 0xf8, 0xa9, 0x30,
-  0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09,
-  0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xa1,
-  0xaa, 0x80, 0xa7, 0xc2, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0xb0, 0xa5, 0xaa, 0x90, 0xa7, 0x02, 0x43, 0x04, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0xb0, 0xa9, 0xaa, 0xa0, 0xa7, 0x82, 0x44, 0x04, 0x23, 0x06,
-  0x0a, 0x00, 0x82, 0x60, 0xa0, 0xc8, 0xaa, 0x90, 0xa7, 0x82, 0x8e, 0x0a,
-  0x41, 0xa9, 0x0a, 0x6a, 0x2a, 0x8c, 0xaa, 0x30, 0x9a, 0x10, 0x00, 0xc3,
-  0x0d, 0xc1, 0xa9, 0x0a, 0x61, 0x70, 0x81, 0x53, 0xb3, 0x04, 0x7d, 0x33,
-  0xdc, 0xa0, 0xbf, 0x01, 0xab, 0x0a, 0x60, 0x30, 0xcb, 0xa0, 0x36, 0x6b,
-  0x13, 0x94, 0x8d, 0x0a, 0x7d, 0x2a, 0xc0, 0x05, 0x4e, 0x8d, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0xc1, 0x54, 0xab, 0x82, 0x9f, 0x0a, 0x20, 0x1c, 0xbc,
-  0xa9, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x93, 0xad, 0x0a, 0x7e,
-  0x2a, 0x04, 0xc2, 0x05, 0xc3, 0x54, 0x8e, 0x0a, 0xa2, 0x2a, 0xc0, 0x05,
-  0x4e, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0xa4, 0xab, 0xc2, 0xa8,
-  0x0a, 0x61, 0x40, 0xa7, 0xc2, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c,
-  0xbb, 0x2a, 0x8c, 0xaa, 0x10, 0x08, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x77,
-  0x38, 0x75, 0x22, 0x2a, 0x0c, 0x73, 0x37, 0x29, 0x0c, 0x73, 0xc4, 0x30,
-  0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xde, 0xaf, 0x0a,
-  0xaf, 0x2a, 0xb8, 0xa9, 0xc0, 0xab, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x66, 0xae, 0x82, 0xad, 0x0a, 0x09,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x76, 0xae, 0xc2, 0xad,
-  0x0a, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x86, 0xae,
-  0x02, 0xae, 0x0a, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0x81,
-  0x02, 0xaf, 0xc2, 0xad, 0x0a, 0x78, 0x2a, 0x04, 0xe3, 0x2a, 0xa0, 0xaa,
-  0x10, 0xae, 0xc2, 0x68, 0x42, 0x00, 0x0c, 0x37, 0x04, 0xe5, 0x2a, 0x84,
-  0xc1, 0x05, 0x4e, 0xcd, 0x12, 0xf4, 0xcd, 0x70, 0x03, 0x0e, 0x07, 0xe9,
-  0x2a, 0x80, 0xc1, 0x2c, 0x03, 0xdb, 0xf4, 0x4d, 0x60, 0x6b, 0x2a, 0xb4,
-  0xa9, 0x10, 0x9f, 0xe1, 0x88, 0x1e, 0x0e, 0xdc, 0x54, 0x20, 0xbe, 0x59,
-  0x86, 0xb6, 0x81, 0x9b, 0xc0, 0xde, 0x54, 0xf0, 0xe1, 0x20, 0x3e, 0x16,
-  0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2, 0xc0, 0x90, 0x8f, 0x15,
-  0x41, 0x7c, 0x8a, 0x98, 0x57, 0x41, 0x87, 0x1b, 0x82, 0x78, 0x15, 0xc0,
-  0x60, 0x96, 0xc1, 0x6d, 0xde, 0x26, 0xb0, 0xe1, 0x4e, 0x05, 0xf8, 0xcc,
-  0x12, 0xd0, 0x8d, 0xd9, 0xa9, 0x40, 0xc4, 0x67, 0x96, 0x80, 0x6e, 0x86,
-  0x23, 0xd0, 0x38, 0xb8, 0x53, 0x41, 0xf8, 0x66, 0x19, 0xe2, 0x86, 0x6e,
-  0x02, 0x4b, 0xe3, 0x00, 0x4f, 0x85, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82,
-  0x61, 0x2e, 0x70, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xc2,
-  0x5f, 0x05, 0x1d, 0x6e, 0x08, 0xf8, 0x55, 0x00, 0x83, 0x59, 0x06, 0xb9,
-  0x99, 0x9b, 0xc0, 0x40, 0x55, 0x18, 0xe2, 0x33, 0x4b, 0x40, 0x37, 0x46,
-  0x8c, 0xaa, 0x00, 0x9f, 0x59, 0x02, 0xba, 0x19, 0x68, 0x71, 0x34, 0xb7,
-  0xc1, 0xde, 0x86, 0x90, 0x1b, 0x61, 0x6e, 0xcc, 0x71, 0x80, 0x9b, 0x0b,
-  0x86, 0x31, 0x51, 0x15, 0x4c, 0x55, 0x88, 0xcf, 0x70, 0x84, 0x2c, 0x9c,
-  0xaa, 0x40, 0x7c, 0xb3, 0x0c, 0x75, 0x83, 0x37, 0x81, 0xa1, 0xaa, 0x30,
-  0x0b, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94, 0x05,
-  0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0xcb, 0x0a, 0x3a, 0xdc, 0x10,
-  0xa8, 0xac, 0x00, 0x06, 0xb3, 0x0c, 0x76, 0x73, 0x37, 0x81, 0x0d, 0xb0,
-  0x2a, 0xc0, 0x67, 0x96, 0x80, 0x6f, 0xac, 0x55, 0x05, 0x22, 0x3e, 0xb3,
-  0x04, 0x7c, 0x33, 0x1c, 0xd1, 0x0b, 0xae, 0x2a, 0x08, 0xdf, 0x2c, 0x43,
-  0xde, 0xf0, 0x4d, 0x60, 0xbe, 0xf0, 0xaa, 0x42, 0x7c, 0x2c, 0x70, 0xe8,
-  0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8,
-  0x14, 0x71, 0xb3, 0x82, 0x0e, 0x37, 0x04, 0x35, 0x2b, 0x80, 0xc1, 0x2c,
-  0x83, 0xde, 0xec, 0x4d, 0x60, 0xb7, 0x2a, 0x0c, 0xf1, 0x99, 0x25, 0xe0,
-  0x1b, 0x23, 0x78, 0x55, 0x80, 0xcf, 0x2c, 0x01, 0xdf, 0x0c, 0xb4, 0x38,
-  0x9a, 0xdd, 0x60, 0x77, 0x43, 0xe8, 0x8d, 0xb0, 0x37, 0xb0, 0x81, 0x37,
-  0x17, 0x0c, 0x73, 0x81, 0x53, 0xb7, 0x39, 0x75, 0xa8, 0x2a, 0x0c, 0x73,
-  0xbd, 0x29, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00,
-  0x20, 0x08, 0x06, 0x5e, 0xd9, 0x0a, 0x35, 0x2b, 0xd0, 0xab, 0x20, 0xb6,
-  0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c,
-  0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1,
-  0xc6, 0xb6, 0x02, 0xcf, 0x0a, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0xc1, 0xd6, 0xb6, 0x42, 0xcf, 0x0a, 0x09, 0x11, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0xc1, 0xe6, 0xb6, 0x82, 0xcf, 0x0a, 0x09, 0x11, 0x8c,
-  0x18, 0x28, 0x00, 0x08, 0x82, 0x81, 0x62, 0xb7, 0x42, 0xcf, 0x0a, 0xfe,
-  0x2a, 0x04, 0x69, 0x2b, 0xb8, 0xac, 0x70, 0xb6, 0xc2, 0x68, 0x42, 0x00,
-  0x0c, 0x37, 0x04, 0x6b, 0x2b, 0x84, 0xc1, 0x05, 0x4e, 0xcd, 0x12, 0xf4,
-  0xcd, 0x40, 0x8b, 0xa3, 0x1a, 0x68, 0x03, 0xe6, 0xc4, 0xd9, 0xb8, 0x84,
-  0xda, 0x08, 0x7c, 0x03, 0xe6, 0xc4, 0xda, 0xcc, 0x32, 0xf8, 0x0d, 0xe8,
-  0xf8, 0x72, 0x30, 0x1c, 0x31, 0xce, 0x41, 0xcc, 0x0a, 0xc3, 0x77, 0xe4,
-  0x1c, 0x0c, 0x33, 0xdc, 0x10, 0xf0, 0xab, 0x40, 0x06, 0x35, 0x04, 0x3a,
-  0x1c, 0x21, 0x12, 0x35, 0x2b, 0x0c, 0x5f, 0x05, 0x82, 0x1e, 0x49, 0x0c,
-  0x33, 0xdc, 0x10, 0xfc, 0xab, 0x40, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0xf0,
-  0x37, 0xb4, 0x13, 0xdc, 0xb8, 0x0a, 0xc3, 0x1c, 0x7e, 0x0a, 0xc3, 0x8c,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0xe7, 0xb7, 0x82, 0xdb, 0x0a, 0x2d,
-  0x2b, 0xec, 0xad, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a,
-  0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0xb0, 0x95, 0xae, 0x50, 0xb7, 0xc2, 0x41, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0xb0, 0x99, 0xae, 0x60, 0xb7, 0x02, 0x43, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0x9d, 0xae, 0x70, 0xb7, 0x82,
-  0x44, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xa0, 0xbc, 0xae, 0x60,
-  0xb7, 0xc2, 0xcd, 0x0a, 0x81, 0xe8, 0x0a, 0x67, 0x2b, 0x80, 0xae, 0x30,
-  0x9a, 0x10, 0x00, 0xc3, 0x0d, 0x01, 0xe9, 0x0a, 0x61, 0x70, 0x81, 0x53,
-  0xb3, 0x04, 0xb4, 0x33, 0xdc, 0x70, 0xcf, 0x41, 0xea, 0x0a, 0x60, 0x30,
-  0xcb, 0x10, 0x3a, 0xa2, 0x13, 0xd4, 0xcc, 0x0a, 0x7a, 0x2b, 0xc0, 0x05,
-  0x4e, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x24, 0xbb, 0xc2, 0xde,
-  0x0a, 0xfe, 0x1c, 0xb0, 0xad, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0xd3, 0xec, 0x0a, 0x7b, 0x2b, 0x04, 0xc2, 0x05, 0xc3, 0x94, 0xcd, 0x0a,
-  0x7f, 0x2b, 0xc0, 0x05, 0x4e, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1,
-  0x74, 0xbb, 0x02, 0xe8, 0x0a, 0x3e, 0x11, 0xb7, 0xc2, 0x88, 0xc1, 0x01,
-  0x80, 0x20, 0x18, 0x4c, 0xb8, 0x2b, 0x80, 0xae, 0x10, 0x08, 0x17, 0x0c,
-  0x73, 0x81, 0x53, 0x77, 0x38, 0x75, 0xff, 0x2a, 0x0c, 0x73, 0x34, 0x2a,
-  0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x1e, 0xef, 0x0a, 0xac, 0x2b, 0xac, 0xad, 0x90, 0xbb, 0xc2, 0x68,
-  0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10,
-  0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x36, 0xbe,
-  0xc2, 0xec, 0x0a, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1,
-  0x46, 0xbe, 0x02, 0xed, 0x0a, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0xc1, 0x56, 0xbe, 0x42, 0xed, 0x0a, 0x09, 0x11, 0x8c, 0x18, 0x28,
-  0x00, 0x08, 0x82, 0x81, 0xd2, 0xbe, 0x02, 0xed, 0x0a, 0x75, 0x2b, 0x04,
-  0xe0, 0x2b, 0x94, 0xae, 0xe0, 0xbb, 0xc2, 0x68, 0x42, 0x00, 0x0c, 0x37,
-  0x04, 0xe2, 0x2b, 0x84, 0xc1, 0x05, 0x4e, 0xcd, 0x12, 0xd0, 0xce, 0x70,
-  0x43, 0x4d, 0x07, 0xe6, 0x2b, 0x80, 0xc1, 0x2c, 0xc3, 0xe8, 0xd0, 0x4e,
-  0x60, 0x68, 0x2b, 0xa8, 0xad, 0x10, 0x9f, 0xe1, 0x88, 0x9d, 0x0e, 0xd6,
-  0x56, 0x20, 0xbe, 0x59, 0x06, 0xd2, 0x39, 0x9d, 0xc0, 0xd8, 0x56, 0xe0,
-  0xe9, 0x20, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2,
-  0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x80, 0x5f, 0x41, 0x87, 0x1b,
-  0x02, 0xf7, 0x15, 0xc0, 0x60, 0x96, 0xa1, 0x74, 0x4c, 0x27, 0xb0, 0x81,
-  0x6e, 0x05, 0xf8, 0xcc, 0x12, 0xac, 0x8e, 0xcd, 0xad, 0x40, 0xc4, 0x67,
-  0x96, 0x60, 0x75, 0x86, 0x23, 0xcc, 0x3a, 0xa0, 0x5b, 0x41, 0xf8, 0x66,
-  0x19, 0x50, 0x67, 0x75, 0x02, 0x3b, 0xeb, 0xa0, 0x6e, 0x85, 0xf8, 0x58,
-  0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70, 0xca, 0x82, 0x48, 0x3e, 0x56,
-  0x04, 0xf1, 0x29, 0x62, 0x7f, 0x05, 0x1d, 0x6e, 0x08, 0xf2, 0x57, 0x00,
-  0x83, 0x59, 0x86, 0xd4, 0x51, 0x9d, 0xc0, 0xfa, 0x56, 0x18, 0xe2, 0x33,
-  0x4b, 0xb0, 0x3a, 0x46, 0x80, 0xae, 0x00, 0x9f, 0x59, 0x82, 0xd5, 0x19,
-  0x68, 0x71, 0xb4, 0xd2, 0xc1, 0x4c, 0x87, 0x48, 0x1d, 0x41, 0x75, 0xc4,
-  0x75, 0x38, 0x9d, 0x0b, 0x86, 0xb1, 0xbf, 0x15, 0x46, 0x57, 0x88, 0xcf,
-  0x70, 0xc4, 0x6b, 0x90, 0xae, 0x40, 0x7c, 0xb3, 0x0c, 0xac, 0xf3, 0x3a,
-  0x81, 0x95, 0xae, 0x00, 0x1b, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3,
-  0x5c, 0xe0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0x0a,
-  0x0b, 0x3a, 0xdc, 0x10, 0x9c, 0xb0, 0x00, 0x06, 0xb3, 0x0c, 0xad, 0xe3,
-  0x3a, 0x81, 0x0d, 0xad, 0x2b, 0xc0, 0x67, 0x96, 0x60, 0x76, 0x4c, 0x75,
-  0x05, 0x22, 0x3e, 0xb3, 0x04, 0xb3, 0x33, 0x1c, 0xa1, 0x1b, 0xab, 0x2b,
-  0x08, 0xdf, 0x2c, 0x03, 0xec, 0xcc, 0x4e, 0x60, 0xbb, 0xc1, 0xba, 0x42,
-  0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x41, 0x24,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x41, 0xc3, 0x82, 0x0e, 0x37, 0x04, 0x32,
-  0x2c, 0x80, 0xc1, 0x2c, 0x43, 0xec, 0xc8, 0x4e, 0x60, 0xb4, 0x2b, 0x0c,
-  0xf1, 0x99, 0x25, 0x98, 0x1d, 0x23, 0x72, 0x57, 0x80, 0xcf, 0x2c, 0xc1,
-  0xec, 0x0c, 0xb4, 0x38, 0x5a, 0xeb, 0x60, 0xae, 0x43, 0xc4, 0x8e, 0x20,
-  0x3b, 0xaa, 0xf3, 0x3a, 0x17, 0x0c, 0x73, 0x81, 0x53, 0xb7, 0x39, 0x75,
-  0xa5, 0x2b, 0x0c, 0x73, 0xba, 0x2a, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c,
-  0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x9e, 0x18, 0x0b, 0x32, 0x2c,
-  0xc4, 0xaf, 0xf0, 0xc3, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1,
-  0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0xc1, 0x96, 0xc6, 0x42, 0x0e, 0x0b, 0x09, 0x11, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xa6, 0xc6, 0x82, 0x0e, 0x0b, 0x09,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xb6, 0xc6, 0xc2, 0x0e,
-  0x0b, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0x81, 0x32, 0xc7,
-  0x82, 0x0e, 0x0b, 0xfb, 0x2b, 0x04, 0x66, 0x2c, 0xac, 0xb0, 0x40, 0xc6,
-  0xc2, 0x68, 0x42, 0x00, 0x0c, 0x37, 0x04, 0x68, 0x2c, 0x84, 0xc1, 0x05,
-  0x4e, 0xcd, 0x12, 0xd0, 0xce, 0x40, 0x8b, 0xa3, 0x1a, 0x7f, 0xa3, 0x83,
-  0x85, 0xdf, 0xb8, 0x44, 0xe8, 0x08, 0xb3, 0xa3, 0x83, 0x85, 0xe8, 0xcc,
-  0x32, 0xd4, 0xce, 0xed, 0xec, 0x76, 0x30, 0x1c, 0x01, 0xbe, 0x81, 0x0b,
-  0x0b, 0xc3, 0x77, 0xe1, 0x1b, 0x0c, 0x33, 0xdc, 0x10, 0xe4, 0xaf, 0x40,
-  0x06, 0x35, 0x04, 0x3a, 0x1c, 0xf1, 0x1f, 0x32, 0x2c, 0x0c, 0x5f, 0x05,
-  0x82, 0x5e, 0x88, 0x0c, 0x33, 0xdc, 0x10, 0xf0, 0xaf, 0x40, 0x06, 0x15,
-  0x0c, 0x3a, 0xcb, 0x60, 0x3b, 0xeb, 0x13, 0x1c, 0xf8, 0x0a, 0xc3, 0x5c,
-  0xbd, 0x0a, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0xb7, 0xc7,
-  0xc2, 0x1a, 0x0b, 0x2a, 0x2c, 0xe0, 0xb1, 0x30, 0x9a, 0x10, 0x00, 0xa3,
-  0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0xc4, 0x21,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0x89, 0xb2, 0x20, 0xc7, 0xc2,
-  0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0x8d, 0xb2, 0x30,
-  0xc7, 0x02, 0x43, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0x91,
-  0xb2, 0x40, 0xc7, 0x82, 0x44, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60,
-  0xa0, 0xb0, 0xb2, 0x30, 0xc7, 0x02, 0x0d, 0x0b, 0xc1, 0x1f, 0x0b, 0x64,
-  0x2c, 0xf4, 0xb1, 0x30, 0x9a, 0x10, 0x00, 0xc3, 0x0d, 0x41, 0x28, 0x0b,
-  0x61, 0x70, 0x81, 0x53, 0xb3, 0x04, 0xeb, 0x33, 0xdc, 0x40, 0xdf, 0x81,
-  0x29, 0x0b, 0x60, 0x30, 0xcb, 0x80, 0x3b, 0xb9, 0x13, 0x14, 0x0c, 0x0b,
-  0x77, 0x2c, 0xc0, 0x05, 0x4e, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1,
-  0xf4, 0xca, 0x02, 0x1e, 0x0b, 0xfb, 0x1b, 0xa4, 0xb1, 0x30, 0x62, 0x70,
-  0x00, 0x20, 0x08, 0x06, 0x13, 0x2c, 0x0b, 0x78, 0x2c, 0x04, 0xc2, 0x05,
-  0xc3, 0xd4, 0x0c, 0x0b, 0x7c, 0x2c, 0xc0, 0x05, 0x4e, 0x8d, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0xc1, 0x44, 0xcb, 0x42, 0x1f, 0x0b, 0x3b, 0xe2, 0xc6,
-  0xc2, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0xb5, 0x2c, 0xf4, 0xb1,
-  0x10, 0x08, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x77, 0x38, 0x75, 0xfc, 0x2b,
-  0x0c, 0x73, 0x31, 0x2b, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x5e, 0x2e, 0x0b, 0xa9, 0x2c, 0xa0, 0xb1,
-  0x60, 0xcb, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2,
-  0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0xc1, 0x06, 0xce, 0x02, 0x2c, 0x0b, 0x09, 0x11, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0xc1, 0x16, 0xce, 0x42, 0x2c, 0x0b, 0x09, 0x11, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x26, 0xce, 0x82, 0x2c, 0x0b, 0x09,
-  0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0x81, 0xa2, 0xce, 0x42, 0x2c,
-  0x0b, 0x72, 0x2c, 0x04, 0xbd, 0x2c, 0x88, 0xb2, 0xb0, 0xcb, 0xc2, 0x68,
-  0x42, 0x00, 0x0c, 0x37, 0x04, 0xbf, 0x2c, 0x84, 0xc1, 0x05, 0x4e, 0xcd,
-  0x12, 0xac, 0xcf, 0x70, 0x83, 0x8c, 0x07, 0xe3, 0x2c, 0x80, 0xc1, 0x2c,
-  0x83, 0xee, 0xac, 0x4f, 0x60, 0x65, 0x2c, 0x9c, 0xb1, 0x10, 0x9f, 0xe1,
-  0x08, 0x1c, 0x0e, 0xd0, 0x58, 0x20, 0xbe, 0x59, 0x86, 0xdd, 0xf1, 0x9d,
-  0xc0, 0xd2, 0x58, 0xc8, 0xe1, 0x20, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60,
-  0x98, 0x0b, 0x9c, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x68,
-  0x67, 0x41, 0x87, 0x1b, 0x82, 0x75, 0x16, 0xc0, 0x60, 0x96, 0x81, 0x77,
-  0x7a, 0x27, 0xb0, 0x21, 0x8e, 0x05, 0xf8, 0xcc, 0x12, 0x88, 0x8f, 0xc1,
-  0xb1, 0x40, 0xc4, 0x67, 0x96, 0x40, 0x7c, 0x86, 0x23, 0xc6, 0x38, 0x88,
-  0x63, 0x41, 0xf8, 0x66, 0x19, 0x7e, 0x47, 0x7c, 0x02, 0x23, 0xe3, 0x40,
-  0x8e, 0x85, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70, 0xca,
-  0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x02, 0x9f, 0x05, 0x1d, 0x6e,
-  0x08, 0xec, 0x59, 0x00, 0x83, 0x59, 0x06, 0xf0, 0x09, 0x9f, 0xc0, 0xf4,
-  0x58, 0x18, 0xe2, 0x33, 0x4b, 0x20, 0x3e, 0x46, 0xf4, 0xb1, 0x00, 0x9f,
-  0x59, 0x02, 0xf1, 0x19, 0x68, 0x71, 0x34, 0xde, 0xc1, 0x7a, 0x87, 0x00,
-  0x1f, 0x21, 0x7c, 0x78, 0x71, 0xf0, 0x9d, 0x0b, 0x86, 0x31, 0x3e, 0x16,
-  0x40, 0x59, 0x88, 0xcf, 0x70, 0x04, 0xab, 0x84, 0xb2, 0x40, 0x7c, 0xb3,
-  0x0c, 0xe3, 0x63, 0x3e, 0x81, 0x89, 0xb2, 0xd0, 0x2a, 0xf1, 0xb1, 0x60,
-  0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08,
-  0xe2, 0x53, 0x84, 0x49, 0x0b, 0x3a, 0xdc, 0x10, 0x90, 0xb4, 0x00, 0x06,
-  0xb3, 0x0c, 0xe4, 0x53, 0x3e, 0x81, 0x0d, 0xaa, 0x2c, 0xc0, 0x67, 0x96,
-  0x40, 0x7d, 0xec, 0x94, 0x05, 0x22, 0x3e, 0xb3, 0x04, 0xea, 0x33, 0x1c,
-  0x71, 0x2b, 0xa8, 0x2c, 0x08, 0xdf, 0x2c, 0xc3, 0xf9, 0xa8, 0x4f, 0x60,
-  0xb8, 0x92, 0xca, 0x42, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17,
-  0x38, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x11, 0xd3, 0x82,
-  0x0e, 0x37, 0x04, 0x2f, 0x2d, 0x80, 0xc1, 0x2c, 0x03, 0xfa, 0xa4, 0x4f,
-  0x60, 0xb1, 0x2c, 0x0c, 0xf1, 0x99, 0x25, 0x50, 0x1f, 0x23, 0x6c, 0x59,
-  0x80, 0xcf, 0x2c, 0x81, 0xfa, 0x0c, 0xb4, 0x38, 0x1a, 0xf9, 0x60, 0xe5,
-  0x43, 0xa0, 0x8f, 0x90, 0x3e, 0xa4, 0x65, 0x3e, 0x17, 0x0c, 0x73, 0x81,
-  0x53, 0xb7, 0x39, 0x75, 0xa2, 0x2c, 0x0c, 0x73, 0xb7, 0x2b, 0x0c, 0x73,
-  0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xde,
-  0x4f, 0x0b, 0x2f, 0x2d, 0xb8, 0xb3, 0xc0, 0xd3, 0xc2, 0x68, 0x42, 0x00,
-  0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11,
-  0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x66, 0xd6, 0x82, 0x4d,
-  0x0b, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x76, 0xd6,
-  0xc2, 0x4d, 0x0b, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1,
-  0x86, 0xd6, 0x02, 0x4e, 0x0b, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08,
-  0x82, 0x81, 0x02, 0xd7, 0xc2, 0x4d, 0x0b, 0xf8, 0x2c, 0x04, 0x63, 0x2d,
-  0xa0, 0xb4, 0x10, 0xd6, 0xc2, 0x68, 0x42, 0x00, 0x0c, 0x37, 0x04, 0x65,
-  0x2d, 0x84, 0xc1, 0x05, 0x4e, 0xcd, 0x12, 0xac, 0xcf, 0x40, 0x8b, 0xa3,
-  0x1a, 0xb6, 0x43, 0xa3, 0x45, 0xed, 0xb8, 0x04, 0xee, 0x08, 0xea, 0x43,
-  0xa3, 0x45, 0xee, 0xcc, 0x32, 0xb0, 0x8f, 0xfb, 0xe0, 0x7a, 0x30, 0x1c,
-  0xf1, 0xbb, 0xc1, 0x4a, 0x0b, 0xc3, 0x77, 0xe0, 0x1b, 0x0c, 0x33, 0xdc,
-  0x10, 0xd8, 0xb3, 0x40, 0x06, 0x35, 0x04, 0x3a, 0x1c, 0xc1, 0x2f, 0x2f,
-  0x2d, 0x0c, 0x5f, 0x05, 0x82, 0x9e, 0xbf, 0x0c, 0x33, 0xdc, 0x10, 0xe4,
-  0xb3, 0x40, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0xd0, 0x3e, 0x22, 0x14, 0x5c,
-  0x2f, 0x0b, 0xc3, 0x9c, 0xfc, 0x0a, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08,
-  0x82, 0x81, 0x87, 0xd7, 0x02, 0x5a, 0x0b, 0x27, 0x2d, 0xd4, 0xb5, 0x30,
-  0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09,
-  0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xfd,
-  0xb5, 0xf0, 0xd6, 0xc2, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0xb0, 0x81, 0xb6, 0x00, 0xd7, 0x02, 0x43, 0x04, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0xb0, 0x85, 0xb6, 0x10, 0xd7, 0x82, 0x44, 0x04, 0x23, 0x06,
-  0x0a, 0x00, 0x82, 0x60, 0xa0, 0xa4, 0xb6, 0x00, 0xd7, 0x42, 0x4c, 0x0b,
-  0x01, 0x5f, 0x0b, 0x61, 0x2d, 0xe8, 0xb5, 0x30, 0x9a, 0x10, 0x00, 0xc3,
-  0x0d, 0x81, 0x5f, 0x0b, 0x61, 0x70, 0x81, 0x53, 0xb3, 0x04, 0x22, 0x34,
-  0xdc, 0x10, 0xef, 0xc1, 0x68, 0x0b, 0x60, 0x30, 0xcb, 0xf0, 0x3e, 0xf0,
-  0x13, 0x54, 0x4b, 0x0b, 0x74, 0x2d, 0xc0, 0x05, 0x4e, 0x8d, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0xc1, 0xc4, 0xda, 0x42, 0x5d, 0x0b, 0xfa, 0x1b, 0x98,
-  0xb5, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x53, 0x6b, 0x0b, 0x75,
-  0x2d, 0x04, 0xc2, 0x05, 0xc3, 0x14, 0x4c, 0x0b, 0x79, 0x2d, 0xc0, 0x05,
-  0x4e, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x14, 0xdb, 0x82, 0x5e,
-  0x0b, 0x38, 0xb3, 0xd6, 0xc2, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c,
-  0xb2, 0x2d, 0xe8, 0xb5, 0x10, 0x08, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x77,
-  0x38, 0x75, 0xf9, 0x2c, 0x0c, 0x73, 0x2e, 0x2c, 0x0c, 0x73, 0xc4, 0x30,
-  0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x9e, 0x6d, 0x0b,
-  0xa6, 0x2d, 0x94, 0xb5, 0x30, 0xdb, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xd6, 0xdb, 0x42, 0x6b, 0x0b, 0x09,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xe6, 0xdb, 0x82, 0x6b,
-  0x0b, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xf6, 0xdb,
-  0xc2, 0x6b, 0x0b, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0x81,
-  0x72, 0xde, 0x82, 0x6b, 0x0b, 0x6f, 0x2d, 0x04, 0xba, 0x2d, 0xfc, 0xb5,
-  0x80, 0xdb, 0xc2, 0x68, 0x42, 0x00, 0x0c, 0x37, 0x04, 0xbc, 0x2d, 0x84,
-  0xc1, 0x05, 0x4e, 0xcd, 0x12, 0x88, 0xd0, 0x70, 0xc3, 0xcb, 0x07, 0xe0,
-  0x2d, 0x80, 0xc1, 0x2c, 0x43, 0xfc, 0x88, 0x50, 0x60, 0x62, 0x2d, 0x90,
-  0xb5, 0x10, 0x9f, 0xe1, 0x88, 0x1b, 0x0e, 0xca, 0x5a, 0x20, 0xbe, 0x59,
-  0x06, 0xf9, 0xa9, 0x9f, 0xc0, 0xcc, 0x5a, 0xc0, 0xe1, 0x20, 0x3e, 0x16,
-  0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2, 0xc0, 0x90, 0x8f, 0x15,
-  0x41, 0x7c, 0x8a, 0x50, 0x6f, 0x41, 0x87, 0x1b, 0x02, 0xf4, 0x16, 0xc0,
-  0x60, 0x96, 0x61, 0x7e, 0xe8, 0x27, 0xb0, 0xc1, 0xad, 0x05, 0xf8, 0xcc,
-  0x12, 0xe4, 0x8f, 0xb5, 0xb5, 0x40, 0xc4, 0x67, 0x96, 0x20, 0x7f, 0x86,
-  0x23, 0xc4, 0x38, 0x70, 0x6b, 0x41, 0xf8, 0x66, 0x19, 0xec, 0x27, 0x7f,
-  0x02, 0x1b, 0xe3, 0xe0, 0xad, 0x85, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82,
-  0x61, 0x2e, 0x70, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xa2,
-  0xbe, 0x05, 0x1d, 0x6e, 0x08, 0xe6, 0x5b, 0x00, 0x83, 0x59, 0x86, 0xfb,
-  0xc1, 0x9f, 0xc0, 0xee, 0x5a, 0x18, 0xe2, 0x33, 0x4b, 0x90, 0x3f, 0x46,
-  0xe8, 0xb5, 0x00, 0x9f, 0x59, 0x82, 0xfc, 0x19, 0x68, 0x71, 0xb4, 0xf9,
-  0xc1, 0xe8, 0x87, 0xb8, 0x1f, 0x01, 0x7f, 0x74, 0x71, 0xa8, 0x9f, 0x0b,
-  0x86, 0xb1, 0xbc, 0x16, 0xfa, 0x5a, 0x88, 0xcf, 0x70, 0x44, 0xea, 0xf8,
-  0xb5, 0x40, 0x7c, 0xb3, 0x0c, 0xfa, 0xd3, 0x3f, 0x81, 0xfd, 0xb5, 0xa0,
-  0x3a, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94, 0x05,
-  0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4, 0x88, 0x0b, 0x3a, 0xdc, 0x10,
-  0x84, 0xb8, 0x00, 0x06, 0xb3, 0x0c, 0xfb, 0xc3, 0x3f, 0x81, 0x0d, 0xa7,
-  0x2d, 0xc0, 0x67, 0x96, 0x20, 0x84, 0x8c, 0xb4, 0x05, 0x22, 0x3e, 0xb3,
-  0x04, 0x21, 0x34, 0x1c, 0x41, 0x3b, 0xa5, 0x2d, 0x08, 0xdf, 0x2c, 0x83,
-  0xff, 0x84, 0x50, 0x60, 0xb5, 0x63, 0xda, 0x42, 0x7c, 0x2c, 0x70, 0xe8,
-  0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8,
-  0x14, 0xe1, 0xe2, 0x82, 0x0e, 0x37, 0x04, 0x2c, 0x2e, 0x80, 0xc1, 0x2c,
-  0xc3, 0xff, 0x80, 0x50, 0x60, 0xae, 0x2d, 0x0c, 0xf1, 0x99, 0x25, 0x08,
-  0x21, 0x23, 0x66, 0x5b, 0x80, 0xcf, 0x2c, 0x41, 0x08, 0x0d, 0xb4, 0x38,
-  0xda, 0xfe, 0x60, 0xfc, 0x43, 0xfc, 0x8f, 0x00, 0x42, 0x7e, 0xd7, 0x3f,
-  0x17, 0x0c, 0x73, 0x81, 0x53, 0xb7, 0x39, 0x75, 0x7f, 0x2d, 0x0c, 0x73,
-  0xb4, 0x2c, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00,
-  0x20, 0x08, 0x06, 0x1e, 0x8f, 0x0b, 0x2c, 0x2e, 0xac, 0xb7, 0x90, 0xe3,
-  0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c,
-  0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1,
-  0x36, 0xe6, 0xc2, 0x8c, 0x0b, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0xc1, 0x46, 0xe6, 0x02, 0x8d, 0x0b, 0x09, 0x11, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0xc1, 0x56, 0xe6, 0x42, 0x8d, 0x0b, 0x09, 0x11, 0x8c,
-  0x18, 0x28, 0x00, 0x08, 0x82, 0x81, 0xd2, 0xe6, 0x02, 0x8d, 0x0b, 0xf5,
-  0x2d, 0x04, 0x60, 0x2e, 0x94, 0xb8, 0xe0, 0xe3, 0xc2, 0x68, 0x42, 0x00,
-  0x0c, 0x37, 0x04, 0x62, 0x2e, 0x84, 0xc1, 0x05, 0x4e, 0xcd, 0x12, 0x88,
-  0xd0, 0x40, 0x8b, 0xa3, 0x1a, 0xed, 0xe3, 0xc2, 0x05, 0xfb, 0xb8, 0xc4,
-  0xfb, 0x08, 0x21, 0xe4, 0xc2, 0x05, 0xfc, 0x8c, 0x18, 0x18, 0x00, 0x08,
-  0x82, 0xc1, 0x03, 0xe7, 0x02, 0x8c, 0x0b, 0xe7, 0x2c, 0x8c, 0x18, 0x18,
-  0x00, 0x08, 0x82, 0xc1, 0x13, 0xe7, 0x42, 0x8c, 0x0b, 0xe7, 0x2c, 0x58,
-  0x10, 0xc8, 0xc7, 0x02, 0x41, 0x3e, 0xc6, 0xeb, 0x81, 0x8a, 0x0b, 0xf2,
-  0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x88, 0xce, 0x05, 0x1d, 0x17,
-  0x54, 0x5c, 0x90, 0xb7, 0xc0, 0x7c, 0x3d, 0x50, 0x71, 0x41, 0x3e, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0x10, 0xd9, 0xb9, 0xc0, 0xe3, 0x42, 0x8a,
-  0x0b, 0xe9, 0x1a, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x10, 0xdd,
-  0xb9, 0xd0, 0xe3, 0x02, 0x8b, 0x0b, 0xf5, 0x16, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0x41, 0x84, 0xe7, 0x82, 0x8f, 0x0b, 0x2f, 0x2e, 0x84, 0x4c,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x51, 0x9e, 0x0b, 0x3f, 0x2e,
-  0xac, 0xb8, 0xc0, 0xae, 0x81, 0x31, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x91, 0x9e, 0x0b, 0x60, 0x2e, 0xac, 0xb8, 0x80, 0x6f, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x44, 0x7b, 0x2e, 0x84, 0xb9, 0x10, 0xe3, 0x02,
-  0xc9, 0x04, 0x23, 0x06, 0x0d, 0x00, 0x82, 0x60, 0x50, 0xe9, 0xb9, 0x00,
-  0xe6, 0x02, 0x8d, 0x0b, 0xcc, 0xa2, 0xbc, 0x6b, 0x80, 0x10, 0x81, 0xa5,
-  0x77, 0x40, 0xe3, 0x82, 0x7c, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x20,
-  0xf2, 0x73, 0x81, 0xcc, 0x05, 0x1a, 0x17, 0xfe, 0x2b, 0xb0, 0xf5, 0x0e,
-  0x68, 0x5c, 0x90, 0xcf, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x44, 0xa0,
-  0x2e, 0x98, 0xb9, 0x30, 0xe3, 0x82, 0x7d, 0x06, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x44, 0xa1, 0x2e, 0x9c, 0xb9, 0x60, 0xe3, 0x82, 0x88,
-  0x05, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x10, 0x89, 0xba, 0x80, 0xe6,
-  0x42, 0x8e, 0x0b, 0x2e, 0x12, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41,
-  0x34, 0xea, 0x42, 0x9a, 0x0b, 0x35, 0x2e, 0xe4, 0x67, 0x60, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0x41, 0x44, 0xea, 0x82, 0x9a, 0x0b, 0x35, 0x2e,
-  0x94, 0x58, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x51, 0xa9, 0x0b,
-  0x6b, 0x2e, 0xec, 0xb8, 0x10, 0x23, 0xc1, 0x88, 0x41, 0x03, 0x80, 0x20,
-  0x18, 0x54, 0xa4, 0x2e, 0xa8, 0xb9, 0xe0, 0xe3, 0x82, 0x55, 0x51, 0xfc,
-  0x19, 0x20, 0x44, 0x60, 0xf5, 0x1c, 0xf8, 0xb8, 0x20, 0x9f, 0x11, 0x03,
-  0x04, 0x00, 0x41, 0x30, 0x88, 0x50, 0x5d, 0x70, 0x73, 0xc1, 0xc7, 0x05,
-  0x96, 0x0a, 0xec, 0x9e, 0x03, 0x1f, 0x17, 0xe4, 0x33, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x91, 0xaa, 0x0b, 0x70, 0x2e, 0xf4, 0xb8, 0x30, 0x92,
-  0x41, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd1, 0xaa, 0x0b, 0x71,
-  0x2e, 0x80, 0xb9, 0xf0, 0x52, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x44, 0xac, 0x2e, 0xc8, 0xb9, 0x30, 0xe6, 0xc2, 0x4e, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x10, 0xb5, 0xba, 0x30, 0xe7, 0xc2, 0x8f, 0x0b,
-  0x26, 0x19, 0x18, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x10, 0xb9, 0xba,
-  0x40, 0xe7, 0xc2, 0x8f, 0x0b, 0x32, 0x15, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0x41, 0xf4, 0xea, 0x42, 0x9d, 0x0b, 0x65, 0x2e, 0xf8, 0x44, 0x30,
-  0x62, 0xd0, 0x00, 0x20, 0x08, 0x06, 0x95, 0xab, 0x0b, 0x74, 0x2e, 0xa0,
-  0xb9, 0x00, 0x06, 0x9f, 0x97, 0x92, 0x01, 0x42, 0x04, 0x16, 0xc2, 0x01,
-  0x9a, 0x0b, 0xf2, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x48, 0xd6,
-  0x05, 0x3c, 0x17, 0xd0, 0x5c, 0xc8, 0xa1, 0xc0, 0x46, 0x38, 0x40, 0x73,
-  0x41, 0x3e, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x10, 0xd1, 0xba, 0xa0,
-  0xe7, 0xc2, 0x99, 0x0b, 0x30, 0x18, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0x10, 0xd5, 0xba, 0xb0, 0xe7, 0x82, 0x9a, 0x0b, 0x3c, 0x14, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0x41, 0x64, 0xeb, 0x02, 0x9f, 0x0b, 0x6d,
-  0x2e, 0xa0, 0x41, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd1, 0xad,
-  0x0b, 0x7d, 0x2e, 0xa4, 0xb9, 0x30, 0x83, 0x81, 0x31, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x11, 0xae, 0x0b, 0x7e, 0x2e, 0xa4, 0xb9, 0xf0, 0x43,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x44, 0xb9, 0x2e, 0xfc, 0xb9,
-  0xf0, 0xe6, 0xc2, 0x1a, 0x04, 0x23, 0x06, 0x0d, 0x00, 0x82, 0x60, 0x50,
-  0xe1, 0xba, 0xe0, 0xe7, 0x82, 0x9c, 0x0b, 0x6a, 0x90, 0x06, 0x68, 0x60,
-  0x83, 0x01, 0x42, 0x04, 0xc6, 0x06, 0x6c, 0x20, 0x1f, 0x0b, 0xda, 0x40,
-  0x3e, 0x16, 0x06, 0x74, 0x2e, 0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40, 0x10,
-  0x0c, 0x22, 0x5f, 0x17, 0x48, 0x5d, 0xa0, 0x73, 0xc1, 0x09, 0x6c, 0x0c,
-  0xe8, 0x5c, 0x90, 0xcf, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x44, 0xe0,
-  0x2e, 0x98, 0xba, 0x30, 0xe7, 0x82, 0x16, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0x41, 0x14, 0xee, 0xc2, 0xa9, 0x0b, 0x76, 0x2e, 0x44, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x44, 0xe2, 0x2e, 0xa0, 0xba, 0x90, 0xe7,
-  0x02, 0x12, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41, 0x34, 0xee, 0x42,
-  0xaa, 0x0b, 0x75, 0x2e, 0x74, 0xc6, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x44, 0xe4, 0x2e, 0xa8, 0xba, 0x50, 0xe7, 0x02, 0x15, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0x41, 0x54, 0xee, 0xc2, 0xaa, 0x0b, 0x7b, 0x2e, 0x2c,
-  0xc1, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x54, 0xe4, 0x2e, 0xa8, 0xba,
-  0xe0, 0xe7, 0xc2, 0x1d, 0x2c, 0x0a, 0x18, 0x20, 0x44, 0x30, 0x1c, 0x11,
-  0x80, 0xba, 0xe0, 0x7c, 0x17, 0x0c, 0x31, 0x62, 0xe0, 0x00, 0x20, 0x08,
-  0x06, 0xcc, 0xba, 0x0b, 0xa9, 0x2e, 0xd8, 0xb9, 0xf0, 0xe6, 0xc2, 0xaf,
-  0x0b, 0x81, 0xab, 0x0b, 0xae, 0x2e, 0xb8, 0xba, 0x40, 0xea, 0x42, 0xaf,
-  0x0b, 0xb3, 0x04, 0x23, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-};
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_double_double.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_double_double.h
deleted file mode 100644
index beb1821c4a1ae..0000000000000
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_double_double.h
+++ /dev/null
@@ -1,6527 +0,0 @@
-#if 0
-;
-; Note: shader requires additional functionality:
-;       Double-precision floating point
-;
-;
-; Input signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no parameters
-;
-; Output signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no parameters
-; shader hash: cf61c7e13d4b0dbcf3b2945d9cda94ee
-;
-; Pipeline Runtime Information: 
-;
-;
-;
-; Buffer Definitions:
-;
-; cbuffer 
-; {
-;
-;   [116 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [8 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [8 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [8 x i8] (type annotation not present)
-;
-; }
-;
-;
-; Resource Bindings:
-;
-; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-;                                   cbuffer      NA          NA     CB0            cb0     1
-;                                       UAV  struct         r/w      U0             u0     1
-;                                       UAV  struct         r/w      U1             u1     1
-;                                       UAV  struct         r/w      U2             u2     1
-;
-target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
-target triple = "dxil-ms-dx"
-
-%dx.types.Handle = type { i8* }
-%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
-%dx.types.ResRet.i32 = type { i32, i32, i32, i32, i32 }
-%dx.types.splitdouble = type { i32, i32 }
-%"class.RWStructuredBuffer<double>" = type { double }
-%Constants = type { i32, i32, i32, i32, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32 }
-
-define void @GridSample() {
-  %1 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 2, i32 2, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %2 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 1, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %3 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %4 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %5 = call i32 @dx.op.threadId.i32(i32 93, i32 0)  ; ThreadId(component)
-  %6 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
-  %7 = extractvalue %dx.types.CBufRet.i32 %6, 0
-  %8 = add i32 %7, %5
-  %9 = extractvalue %dx.types.CBufRet.i32 %6, 1
-  %10 = icmp ult i32 %8, %9
-  br i1 %10, label %11, label %3458
-
-; <label>:11                                      ; preds = %0
-  %12 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
-  %13 = extractvalue %dx.types.CBufRet.i32 %12, 3
-  %14 = uitofp i32 %13 to float
-  %15 = extractvalue %dx.types.CBufRet.i32 %12, 2
-  %16 = uitofp i32 %15 to float
-  %17 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 7)  ; CBufferLoadLegacy(handle,regIndex)
-  %18 = extractvalue %dx.types.CBufRet.i32 %17, 0
-  %19 = icmp eq i32 %18, 0
-  %20 = select i1 %19, float -5.000000e-01, float 0.000000e+00
-  %21 = select i1 %19, float -5.000000e-01, float -1.000000e+00
-  %22 = fadd float %14, %21
-  %23 = select i1 %19, float -5.000000e-01, float -1.000000e+00
-  %24 = fadd float %16, %23
-  %25 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
-  %26 = extractvalue %dx.types.CBufRet.i32 %25, 1
-  %27 = extractvalue %dx.types.CBufRet.i32 %25, 2
-  %28 = extractvalue %dx.types.CBufRet.i32 %25, 3
-  %29 = mul i32 %28, %27
-  %30 = mul i32 %27, %26
-  %31 = mul i32 %30, %28
-  %32 = udiv i32 %8, %31
-  %33 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 6)  ; CBufferLoadLegacy(handle,regIndex)
-  %34 = extractvalue %dx.types.CBufRet.i32 %33, 0
-  %35 = mul i32 %34, %32
-  %36 = sub i32 %8, %35
-  %37 = udiv i32 %36, %29
-  %38 = extractvalue %dx.types.CBufRet.i32 %33, 1
-  %39 = mul i32 %38, %37
-  %40 = sub i32 %36, %39
-  %41 = udiv i32 %40, %28
-  %42 = extractvalue %dx.types.CBufRet.i32 %33, 2
-  %43 = mul i32 %42, %41
-  %44 = sub i32 %40, %43
-  %45 = uitofp i32 %32 to float
-  %46 = uitofp i32 %41 to float
-  %47 = uitofp i32 %44 to float
-  %48 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
-  %49 = extractvalue %dx.types.CBufRet.i32 %48, 0
-  %50 = extractvalue %dx.types.CBufRet.i32 %48, 1
-  %51 = extractvalue %dx.types.CBufRet.i32 %48, 2
-  %52 = extractvalue %dx.types.CBufRet.i32 %48, 3
-  %53 = uitofp i32 %49 to float
-  %54 = uitofp i32 %50 to float
-  %55 = uitofp i32 %51 to float
-  %56 = uitofp i32 %52 to float
-  %57 = call float @dx.op.dot4.f32(i32 56, float %45, float %46, float %47, float 0.000000e+00, float %53, float %54, float %55, float %56)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %58 = fadd fast float %56, %57
-  %59 = fptoui float %57 to i32
-  %60 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %2, i32 %59, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %61 = extractvalue %dx.types.ResRet.i32 %60, 0
-  %62 = extractvalue %dx.types.ResRet.i32 %60, 1
-  %63 = call double @dx.op.makeDouble.f64(i32 101, i32 %61, i32 %62)  ; MakeDouble(lo,hi)
-  %64 = fptrunc double %63 to float
-  %65 = fptoui float %58 to i32
-  %66 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %2, i32 %65, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %67 = extractvalue %dx.types.ResRet.i32 %66, 0
-  %68 = extractvalue %dx.types.ResRet.i32 %66, 1
-  %69 = call double @dx.op.makeDouble.f64(i32 101, i32 %67, i32 %68)  ; MakeDouble(lo,hi)
-  %70 = fptrunc double %69 to float
-  %71 = icmp eq i32 %18, 1
-  %72 = fadd fast float %64, 1.000000e+00
-  %73 = fadd fast float %70, 1.000000e+00
-  br i1 %71, label %74, label %81
-
-; <label>:74                                      ; preds = %11
-  %75 = fmul fast float %72, 5.000000e-01
-  %76 = fmul fast float %73, 5.000000e-01
-  %77 = fadd fast float %14, -1.000000e+00
-  %78 = fadd fast float %16, -1.000000e+00
-  %79 = fmul fast float %75, %77
-  %80 = fmul fast float %76, %78
-  br label %88
-
-; <label>:81                                      ; preds = %11
-  %82 = fmul fast float %14, %72
-  %83 = fmul fast float %73, %16
-  %84 = fadd fast float %82, -1.000000e+00
-  %85 = fadd fast float %83, -1.000000e+00
-  %86 = fmul fast float %84, 5.000000e-01
-  %87 = fmul fast float %85, 5.000000e-01
-  br label %88
-
-; <label>:88                                      ; preds = %81, %74
-  %89 = phi float [ %79, %74 ], [ %86, %81 ]
-  %90 = phi float [ %80, %74 ], [ %87, %81 ]
-  %91 = extractvalue %dx.types.CBufRet.i32 %6, 2
-  %92 = icmp eq i32 %91, 1
-  br i1 %92, label %93, label %96
-
-; <label>:93                                      ; preds = %88
-  %94 = call float @dx.op.unary.f32(i32 26, float %89)  ; Round_ne(value)
-  %95 = call float @dx.op.unary.f32(i32 26, float %90)  ; Round_ne(value)
-  br label %96
-
-; <label>:96                                      ; preds = %93, %88
-  %97 = phi float [ %94, %93 ], [ %89, %88 ]
-  %98 = phi float [ %95, %93 ], [ %90, %88 ]
-  %99 = fcmp fast olt float %97, %20
-  %100 = fcmp fast ogt float %97, %22
-  %101 = or i1 %99, %100
-  %102 = fcmp fast olt float %98, %20
-  %103 = or i1 %101, %102
-  %104 = fcmp fast ogt float %98, %24
-  %105 = or i1 %104, %103
-  br i1 %105, label %106, label %179
-
-; <label>:106                                     ; preds = %96
-  %107 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %108 = icmp eq i32 %107, 1
-  br i1 %108, label %109, label %118
-
-; <label>:109                                     ; preds = %106
-  %110 = add i32 %13, -1
-  %111 = uitofp i32 %110 to float
-  %112 = call float @dx.op.binary.f32(i32 35, float %97, float 0.000000e+00)  ; FMax(a,b)
-  %113 = call float @dx.op.binary.f32(i32 36, float %112, float %111)  ; FMin(a,b)
-  %114 = add i32 %15, -1
-  %115 = uitofp i32 %114 to float
-  %116 = call float @dx.op.binary.f32(i32 35, float %98, float 0.000000e+00)  ; FMax(a,b)
-  %117 = call float @dx.op.binary.f32(i32 36, float %116, float %115)  ; FMin(a,b)
-  br label %179
-
-; <label>:118                                     ; preds = %106
-  %119 = icmp eq i32 %107, 2
-  br i1 %119, label %120, label %179
-
-; <label>:120                                     ; preds = %118
-  %121 = fsub fast float %22, %20
-  br i1 %99, label %122, label %135
-
-; <label>:122                                     ; preds = %120
-  %123 = fsub fast float %20, %97
-  %124 = fdiv fast float %123, %121
-  %125 = fptoui float %124 to i32
-  %126 = uitofp i32 %125 to float
-  %127 = fmul fast float %126, %121
-  %128 = fsub fast float %123, %127
-  %129 = and i32 %125, 1
-  %130 = icmp eq i32 %129, 0
-  br i1 %130, label %131, label %133
-
-; <label>:131                                     ; preds = %122
-  %132 = fadd fast float %128, %20
-  br label %149
-
-; <label>:133                                     ; preds = %122
-  %134 = fsub fast float %22, %128
-  br label %149
-
-; <label>:135                                     ; preds = %120
-  br i1 %100, label %136, label %149
-
-; <label>:136                                     ; preds = %135
-  %137 = fsub fast float %97, %22
-  %138 = fdiv fast float %137, %121
-  %139 = fptoui float %138 to i32
-  %140 = uitofp i32 %139 to float
-  %141 = fmul fast float %140, %121
-  %142 = fsub fast float %137, %141
-  %143 = and i32 %139, 1
-  %144 = icmp eq i32 %143, 0
-  br i1 %144, label %145, label %147
-
-; <label>:145                                     ; preds = %136
-  %146 = fsub fast float %22, %142
-  br label %149
-
-; <label>:147                                     ; preds = %136
-  %148 = fadd fast float %142, %20
-  br label %149
-
-; <label>:149                                     ; preds = %147, %145, %135, %133, %131
-  %150 = phi float [ %132, %131 ], [ %134, %133 ], [ %146, %145 ], [ %148, %147 ], [ %97, %135 ]
-  %151 = fsub fast float %24, %20
-  br i1 %102, label %152, label %165
-
-; <label>:152                                     ; preds = %149
-  %153 = fsub fast float %20, %98
-  %154 = fdiv fast float %153, %151
-  %155 = fptoui float %154 to i32
-  %156 = uitofp i32 %155 to float
-  %157 = fmul fast float %156, %151
-  %158 = fsub fast float %153, %157
-  %159 = and i32 %155, 1
-  %160 = icmp eq i32 %159, 0
-  br i1 %160, label %161, label %163
-
-; <label>:161                                     ; preds = %152
-  %162 = fadd fast float %158, %20
-  br label %179
-
-; <label>:163                                     ; preds = %152
-  %164 = fsub fast float %24, %158
-  br label %179
-
-; <label>:165                                     ; preds = %149
-  br i1 %104, label %166, label %179
-
-; <label>:166                                     ; preds = %165
-  %167 = fsub fast float %98, %24
-  %168 = fdiv fast float %167, %151
-  %169 = fptoui float %168 to i32
-  %170 = uitofp i32 %169 to float
-  %171 = fmul fast float %170, %151
-  %172 = fsub fast float %167, %171
-  %173 = and i32 %169, 1
-  %174 = icmp eq i32 %173, 0
-  br i1 %174, label %175, label %177
-
-; <label>:175                                     ; preds = %166
-  %176 = fsub fast float %24, %172
-  br label %179
-
-; <label>:177                                     ; preds = %166
-  %178 = fadd fast float %172, %20
-  br label %179
-
-; <label>:179                                     ; preds = %177, %175, %165, %163, %161, %118, %109, %96
-  %180 = phi float [ %113, %109 ], [ %97, %118 ], [ %97, %96 ], [ %150, %177 ], [ %150, %175 ], [ %150, %165 ], [ %150, %163 ], [ %150, %161 ]
-  %181 = phi float [ %117, %109 ], [ %98, %118 ], [ %98, %96 ], [ %178, %177 ], [ %176, %175 ], [ %98, %165 ], [ %164, %163 ], [ %162, %161 ]
-  %182 = uitofp i32 %37 to float
-  br i1 %92, label %183, label %341
-
-; <label>:183                                     ; preds = %179
-  %184 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %185 = icmp eq i32 %184, 0
-  br i1 %185, label %186, label %213
-
-; <label>:186                                     ; preds = %183
-  %187 = fcmp fast oge float %180, 0.000000e+00
-  %188 = fptoui float %180 to i32
-  %189 = icmp ult i32 %188, %13
-  %190 = and i1 %187, %189
-  %191 = fcmp fast oge float %181, 0.000000e+00
-  %192 = and i1 %191, %190
-  %193 = fptoui float %181 to i32
-  %194 = icmp ult i32 %193, %15
-  %195 = and i1 %194, %192
-  br i1 %195, label %196, label %335
-
-; <label>:196                                     ; preds = %186
-  %197 = fptoui float %45 to i32
-  %198 = fptoui float %182 to i32
-  %199 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %200 = extractvalue %dx.types.CBufRet.i32 %199, 0
-  %201 = extractvalue %dx.types.CBufRet.i32 %199, 1
-  %202 = extractvalue %dx.types.CBufRet.i32 %199, 2
-  %203 = extractvalue %dx.types.CBufRet.i32 %199, 3
-  %204 = mul i32 %200, %197
-  %205 = call i32 @dx.op.tertiary.i32(i32 48, i32 %198, i32 %201, i32 %204)  ; IMad(a,b,c)
-  %206 = call i32 @dx.op.tertiary.i32(i32 48, i32 %193, i32 %202, i32 %205)  ; IMad(a,b,c)
-  %207 = call i32 @dx.op.tertiary.i32(i32 48, i32 %188, i32 %203, i32 %206)  ; IMad(a,b,c)
-  %208 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %207, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %209 = extractvalue %dx.types.ResRet.i32 %208, 0
-  %210 = extractvalue %dx.types.ResRet.i32 %208, 1
-  %211 = call double @dx.op.makeDouble.f64(i32 101, i32 %209, i32 %210)  ; MakeDouble(lo,hi)
-  %212 = fptrunc double %211 to float
-  br label %335
-
-; <label>:213                                     ; preds = %183
-  %214 = icmp eq i32 %184, 1
-  br i1 %214, label %215, label %246
-
-; <label>:215                                     ; preds = %213
-  %216 = add i32 %13, -1
-  %217 = uitofp i32 %216 to float
-  %218 = call float @dx.op.binary.f32(i32 35, float %180, float 0.000000e+00)  ; FMax(a,b)
-  %219 = call float @dx.op.binary.f32(i32 36, float %218, float %217)  ; FMin(a,b)
-  %220 = fptoui float %219 to i32
-  %221 = add i32 %15, -1
-  %222 = uitofp i32 %221 to float
-  %223 = call float @dx.op.binary.f32(i32 35, float %181, float 0.000000e+00)  ; FMax(a,b)
-  %224 = call float @dx.op.binary.f32(i32 36, float %223, float %222)  ; FMin(a,b)
-  %225 = fptoui float %224 to i32
-  %226 = uitofp i32 %225 to float
-  %227 = uitofp i32 %220 to float
-  %228 = fptoui float %45 to i32
-  %229 = fptoui float %182 to i32
-  %230 = fptoui float %226 to i32
-  %231 = fptoui float %227 to i32
-  %232 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %233 = extractvalue %dx.types.CBufRet.i32 %232, 0
-  %234 = extractvalue %dx.types.CBufRet.i32 %232, 1
-  %235 = extractvalue %dx.types.CBufRet.i32 %232, 2
-  %236 = extractvalue %dx.types.CBufRet.i32 %232, 3
-  %237 = mul i32 %233, %228
-  %238 = call i32 @dx.op.tertiary.i32(i32 48, i32 %229, i32 %234, i32 %237)  ; IMad(a,b,c)
-  %239 = call i32 @dx.op.tertiary.i32(i32 48, i32 %230, i32 %235, i32 %238)  ; IMad(a,b,c)
-  %240 = call i32 @dx.op.tertiary.i32(i32 48, i32 %231, i32 %236, i32 %239)  ; IMad(a,b,c)
-  %241 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %240, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %242 = extractvalue %dx.types.ResRet.i32 %241, 0
-  %243 = extractvalue %dx.types.ResRet.i32 %241, 1
-  %244 = call double @dx.op.makeDouble.f64(i32 101, i32 %242, i32 %243)  ; MakeDouble(lo,hi)
-  %245 = fptrunc double %244 to float
-  br label %335
-
-; <label>:246                                     ; preds = %213
-  %247 = icmp eq i32 %184, 2
-  br i1 %247, label %248, label %335
-
-; <label>:248                                     ; preds = %246
-  %249 = fsub fast float %22, %20
-  %250 = fcmp fast olt float %180, %20
-  br i1 %250, label %251, label %264
-
-; <label>:251                                     ; preds = %248
-  %252 = fsub fast float %20, %180
-  %253 = fdiv fast float %252, %249
-  %254 = fptoui float %253 to i32
-  %255 = uitofp i32 %254 to float
-  %256 = fmul fast float %255, %249
-  %257 = fsub fast float %252, %256
-  %258 = and i32 %254, 1
-  %259 = icmp eq i32 %258, 0
-  br i1 %259, label %260, label %262
-
-; <label>:260                                     ; preds = %251
-  %261 = fadd fast float %257, %20
-  br label %279
-
-; <label>:262                                     ; preds = %251
-  %263 = fsub fast float %22, %257
-  br label %279
-
-; <label>:264                                     ; preds = %248
-  %265 = fcmp fast ogt float %180, %22
-  br i1 %265, label %266, label %279
-
-; <label>:266                                     ; preds = %264
-  %267 = fsub fast float %180, %22
-  %268 = fdiv fast float %267, %249
-  %269 = fptoui float %268 to i32
-  %270 = uitofp i32 %269 to float
-  %271 = fmul fast float %270, %249
-  %272 = fsub fast float %267, %271
-  %273 = and i32 %269, 1
-  %274 = icmp eq i32 %273, 0
-  br i1 %274, label %275, label %277
-
-; <label>:275                                     ; preds = %266
-  %276 = fsub fast float %22, %272
-  br label %279
-
-; <label>:277                                     ; preds = %266
-  %278 = fadd fast float %272, %20
-  br label %279
-
-; <label>:279                                     ; preds = %277, %275, %264, %262, %260
-  %280 = phi float [ %261, %260 ], [ %263, %262 ], [ %276, %275 ], [ %278, %277 ], [ %180, %264 ]
-  %281 = fptoui float %280 to i32
-  %282 = fsub fast float %24, %20
-  %283 = fcmp fast olt float %181, %20
-  br i1 %283, label %284, label %297
-
-; <label>:284                                     ; preds = %279
-  %285 = fsub fast float %20, %181
-  %286 = fdiv fast float %285, %282
-  %287 = fptoui float %286 to i32
-  %288 = uitofp i32 %287 to float
-  %289 = fmul fast float %288, %282
-  %290 = fsub fast float %285, %289
-  %291 = and i32 %287, 1
-  %292 = icmp eq i32 %291, 0
-  br i1 %292, label %293, label %295
-
-; <label>:293                                     ; preds = %284
-  %294 = fadd fast float %290, %20
-  br label %312
-
-; <label>:295                                     ; preds = %284
-  %296 = fsub fast float %24, %290
-  br label %312
-
-; <label>:297                                     ; preds = %279
-  %298 = fcmp fast ogt float %181, %24
-  br i1 %298, label %299, label %312
-
-; <label>:299                                     ; preds = %297
-  %300 = fsub fast float %181, %24
-  %301 = fdiv fast float %300, %282
-  %302 = fptoui float %301 to i32
-  %303 = uitofp i32 %302 to float
-  %304 = fmul fast float %303, %282
-  %305 = fsub fast float %300, %304
-  %306 = and i32 %302, 1
-  %307 = icmp eq i32 %306, 0
-  br i1 %307, label %308, label %310
-
-; <label>:308                                     ; preds = %299
-  %309 = fsub fast float %24, %305
-  br label %312
-
-; <label>:310                                     ; preds = %299
-  %311 = fadd fast float %305, %20
-  br label %312
-
-; <label>:312                                     ; preds = %310, %308, %297, %295, %293
-  %313 = phi float [ %294, %293 ], [ %296, %295 ], [ %309, %308 ], [ %311, %310 ], [ %181, %297 ]
-  %314 = fptoui float %313 to i32
-  %315 = uitofp i32 %314 to float
-  %316 = uitofp i32 %281 to float
-  %317 = fptoui float %45 to i32
-  %318 = fptoui float %182 to i32
-  %319 = fptoui float %315 to i32
-  %320 = fptoui float %316 to i32
-  %321 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %322 = extractvalue %dx.types.CBufRet.i32 %321, 0
-  %323 = extractvalue %dx.types.CBufRet.i32 %321, 1
-  %324 = extractvalue %dx.types.CBufRet.i32 %321, 2
-  %325 = extractvalue %dx.types.CBufRet.i32 %321, 3
-  %326 = mul i32 %322, %317
-  %327 = call i32 @dx.op.tertiary.i32(i32 48, i32 %318, i32 %323, i32 %326)  ; IMad(a,b,c)
-  %328 = call i32 @dx.op.tertiary.i32(i32 48, i32 %319, i32 %324, i32 %327)  ; IMad(a,b,c)
-  %329 = call i32 @dx.op.tertiary.i32(i32 48, i32 %320, i32 %325, i32 %328)  ; IMad(a,b,c)
-  %330 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %329, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %331 = extractvalue %dx.types.ResRet.i32 %330, 0
-  %332 = extractvalue %dx.types.ResRet.i32 %330, 1
-  %333 = call double @dx.op.makeDouble.f64(i32 101, i32 %331, i32 %332)  ; MakeDouble(lo,hi)
-  %334 = fptrunc double %333 to float
-  br label %335
-
-; <label>:335                                     ; preds = %312, %246, %215, %196, %186
-  %336 = phi float [ %212, %196 ], [ 0.000000e+00, %186 ], [ %245, %215 ], [ %334, %312 ], [ 0.000000e+00, %246 ]
-  %337 = fpext float %336 to double
-  %338 = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double %337)  ; SplitDouble(value)
-  %339 = extractvalue %dx.types.splitdouble %338, 0
-  %340 = extractvalue %dx.types.splitdouble %338, 1
-  call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i32 %339, i32 %340, i32 undef, i32 undef, i8 3, i32 8)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3458
-
-; <label>:341                                     ; preds = %179
-  %342 = icmp eq i32 %91, 0
-  br i1 %342, label %343, label %969
-
-; <label>:343                                     ; preds = %341
-  %344 = call float @dx.op.unary.f32(i32 27, float %180)  ; Round_ni(value)
-  %345 = call float @dx.op.unary.f32(i32 27, float %181)  ; Round_ni(value)
-  %346 = fadd fast float %344, 1.000000e+00
-  %347 = fadd fast float %345, 1.000000e+00
-  %348 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %349 = icmp eq i32 %348, 0
-  br i1 %349, label %350, label %377
-
-; <label>:350                                     ; preds = %343
-  %351 = fcmp fast oge float %344, 0.000000e+00
-  %352 = fptoui float %344 to i32
-  %353 = icmp ult i32 %352, %13
-  %354 = and i1 %351, %353
-  %355 = fcmp fast oge float %345, 0.000000e+00
-  %356 = and i1 %355, %354
-  %357 = fptoui float %345 to i32
-  %358 = icmp ult i32 %357, %15
-  %359 = and i1 %358, %356
-  br i1 %359, label %360, label %499
-
-; <label>:360                                     ; preds = %350
-  %361 = fptoui float %45 to i32
-  %362 = fptoui float %182 to i32
-  %363 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %364 = extractvalue %dx.types.CBufRet.i32 %363, 0
-  %365 = extractvalue %dx.types.CBufRet.i32 %363, 1
-  %366 = extractvalue %dx.types.CBufRet.i32 %363, 2
-  %367 = extractvalue %dx.types.CBufRet.i32 %363, 3
-  %368 = mul i32 %364, %361
-  %369 = call i32 @dx.op.tertiary.i32(i32 48, i32 %362, i32 %365, i32 %368)  ; IMad(a,b,c)
-  %370 = call i32 @dx.op.tertiary.i32(i32 48, i32 %357, i32 %366, i32 %369)  ; IMad(a,b,c)
-  %371 = call i32 @dx.op.tertiary.i32(i32 48, i32 %352, i32 %367, i32 %370)  ; IMad(a,b,c)
-  %372 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %371, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %373 = extractvalue %dx.types.ResRet.i32 %372, 0
-  %374 = extractvalue %dx.types.ResRet.i32 %372, 1
-  %375 = call double @dx.op.makeDouble.f64(i32 101, i32 %373, i32 %374)  ; MakeDouble(lo,hi)
-  %376 = fptrunc double %375 to float
-  br label %499
-
-; <label>:377                                     ; preds = %343
-  %378 = icmp eq i32 %348, 1
-  br i1 %378, label %379, label %410
-
-; <label>:379                                     ; preds = %377
-  %380 = add i32 %13, -1
-  %381 = uitofp i32 %380 to float
-  %382 = call float @dx.op.binary.f32(i32 35, float %344, float 0.000000e+00)  ; FMax(a,b)
-  %383 = call float @dx.op.binary.f32(i32 36, float %382, float %381)  ; FMin(a,b)
-  %384 = fptoui float %383 to i32
-  %385 = add i32 %15, -1
-  %386 = uitofp i32 %385 to float
-  %387 = call float @dx.op.binary.f32(i32 35, float %345, float 0.000000e+00)  ; FMax(a,b)
-  %388 = call float @dx.op.binary.f32(i32 36, float %387, float %386)  ; FMin(a,b)
-  %389 = fptoui float %388 to i32
-  %390 = uitofp i32 %389 to float
-  %391 = uitofp i32 %384 to float
-  %392 = fptoui float %45 to i32
-  %393 = fptoui float %182 to i32
-  %394 = fptoui float %390 to i32
-  %395 = fptoui float %391 to i32
-  %396 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %397 = extractvalue %dx.types.CBufRet.i32 %396, 0
-  %398 = extractvalue %dx.types.CBufRet.i32 %396, 1
-  %399 = extractvalue %dx.types.CBufRet.i32 %396, 2
-  %400 = extractvalue %dx.types.CBufRet.i32 %396, 3
-  %401 = mul i32 %397, %392
-  %402 = call i32 @dx.op.tertiary.i32(i32 48, i32 %393, i32 %398, i32 %401)  ; IMad(a,b,c)
-  %403 = call i32 @dx.op.tertiary.i32(i32 48, i32 %394, i32 %399, i32 %402)  ; IMad(a,b,c)
-  %404 = call i32 @dx.op.tertiary.i32(i32 48, i32 %395, i32 %400, i32 %403)  ; IMad(a,b,c)
-  %405 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %404, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %406 = extractvalue %dx.types.ResRet.i32 %405, 0
-  %407 = extractvalue %dx.types.ResRet.i32 %405, 1
-  %408 = call double @dx.op.makeDouble.f64(i32 101, i32 %406, i32 %407)  ; MakeDouble(lo,hi)
-  %409 = fptrunc double %408 to float
-  br label %499
-
-; <label>:410                                     ; preds = %377
-  %411 = icmp eq i32 %348, 2
-  br i1 %411, label %412, label %499
-
-; <label>:412                                     ; preds = %410
-  %413 = fsub fast float %22, %20
-  %414 = fcmp fast olt float %344, %20
-  br i1 %414, label %415, label %428
-
-; <label>:415                                     ; preds = %412
-  %416 = fsub fast float %20, %344
-  %417 = fdiv fast float %416, %413
-  %418 = fptoui float %417 to i32
-  %419 = uitofp i32 %418 to float
-  %420 = fmul fast float %419, %413
-  %421 = fsub fast float %416, %420
-  %422 = and i32 %418, 1
-  %423 = icmp eq i32 %422, 0
-  br i1 %423, label %424, label %426
-
-; <label>:424                                     ; preds = %415
-  %425 = fadd fast float %421, %20
-  br label %443
-
-; <label>:426                                     ; preds = %415
-  %427 = fsub fast float %22, %421
-  br label %443
-
-; <label>:428                                     ; preds = %412
-  %429 = fcmp fast ogt float %344, %22
-  br i1 %429, label %430, label %443
-
-; <label>:430                                     ; preds = %428
-  %431 = fsub fast float %344, %22
-  %432 = fdiv fast float %431, %413
-  %433 = fptoui float %432 to i32
-  %434 = uitofp i32 %433 to float
-  %435 = fmul fast float %434, %413
-  %436 = fsub fast float %431, %435
-  %437 = and i32 %433, 1
-  %438 = icmp eq i32 %437, 0
-  br i1 %438, label %439, label %441
-
-; <label>:439                                     ; preds = %430
-  %440 = fsub fast float %22, %436
-  br label %443
-
-; <label>:441                                     ; preds = %430
-  %442 = fadd fast float %436, %20
-  br label %443
-
-; <label>:443                                     ; preds = %441, %439, %428, %426, %424
-  %444 = phi float [ %425, %424 ], [ %427, %426 ], [ %440, %439 ], [ %442, %441 ], [ %344, %428 ]
-  %445 = fptoui float %444 to i32
-  %446 = fsub fast float %24, %20
-  %447 = fcmp fast olt float %345, %20
-  br i1 %447, label %448, label %461
-
-; <label>:448                                     ; preds = %443
-  %449 = fsub fast float %20, %345
-  %450 = fdiv fast float %449, %446
-  %451 = fptoui float %450 to i32
-  %452 = uitofp i32 %451 to float
-  %453 = fmul fast float %452, %446
-  %454 = fsub fast float %449, %453
-  %455 = and i32 %451, 1
-  %456 = icmp eq i32 %455, 0
-  br i1 %456, label %457, label %459
-
-; <label>:457                                     ; preds = %448
-  %458 = fadd fast float %454, %20
-  br label %476
-
-; <label>:459                                     ; preds = %448
-  %460 = fsub fast float %24, %454
-  br label %476
-
-; <label>:461                                     ; preds = %443
-  %462 = fcmp fast ogt float %345, %24
-  br i1 %462, label %463, label %476
-
-; <label>:463                                     ; preds = %461
-  %464 = fsub fast float %345, %24
-  %465 = fdiv fast float %464, %446
-  %466 = fptoui float %465 to i32
-  %467 = uitofp i32 %466 to float
-  %468 = fmul fast float %467, %446
-  %469 = fsub fast float %464, %468
-  %470 = and i32 %466, 1
-  %471 = icmp eq i32 %470, 0
-  br i1 %471, label %472, label %474
-
-; <label>:472                                     ; preds = %463
-  %473 = fsub fast float %24, %469
-  br label %476
-
-; <label>:474                                     ; preds = %463
-  %475 = fadd fast float %469, %20
-  br label %476
-
-; <label>:476                                     ; preds = %474, %472, %461, %459, %457
-  %477 = phi float [ %458, %457 ], [ %460, %459 ], [ %473, %472 ], [ %475, %474 ], [ %345, %461 ]
-  %478 = fptoui float %477 to i32
-  %479 = uitofp i32 %478 to float
-  %480 = uitofp i32 %445 to float
-  %481 = fptoui float %45 to i32
-  %482 = fptoui float %182 to i32
-  %483 = fptoui float %479 to i32
-  %484 = fptoui float %480 to i32
-  %485 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %486 = extractvalue %dx.types.CBufRet.i32 %485, 0
-  %487 = extractvalue %dx.types.CBufRet.i32 %485, 1
-  %488 = extractvalue %dx.types.CBufRet.i32 %485, 2
-  %489 = extractvalue %dx.types.CBufRet.i32 %485, 3
-  %490 = mul i32 %486, %481
-  %491 = call i32 @dx.op.tertiary.i32(i32 48, i32 %482, i32 %487, i32 %490)  ; IMad(a,b,c)
-  %492 = call i32 @dx.op.tertiary.i32(i32 48, i32 %483, i32 %488, i32 %491)  ; IMad(a,b,c)
-  %493 = call i32 @dx.op.tertiary.i32(i32 48, i32 %484, i32 %489, i32 %492)  ; IMad(a,b,c)
-  %494 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %493, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %495 = extractvalue %dx.types.ResRet.i32 %494, 0
-  %496 = extractvalue %dx.types.ResRet.i32 %494, 1
-  %497 = call double @dx.op.makeDouble.f64(i32 101, i32 %495, i32 %496)  ; MakeDouble(lo,hi)
-  %498 = fptrunc double %497 to float
-  br label %499
-
-; <label>:499                                     ; preds = %476, %410, %379, %360, %350
-  %500 = phi float [ %376, %360 ], [ 0.000000e+00, %350 ], [ %409, %379 ], [ %498, %476 ], [ 0.000000e+00, %410 ]
-  br i1 %349, label %501, label %528
-
-; <label>:501                                     ; preds = %499
-  %502 = fcmp fast oge float %346, 0.000000e+00
-  %503 = fptoui float %346 to i32
-  %504 = icmp ult i32 %503, %13
-  %505 = and i1 %502, %504
-  %506 = fcmp fast oge float %345, 0.000000e+00
-  %507 = and i1 %506, %505
-  %508 = fptoui float %345 to i32
-  %509 = icmp ult i32 %508, %15
-  %510 = and i1 %509, %507
-  br i1 %510, label %511, label %650
-
-; <label>:511                                     ; preds = %501
-  %512 = fptoui float %45 to i32
-  %513 = fptoui float %182 to i32
-  %514 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %515 = extractvalue %dx.types.CBufRet.i32 %514, 0
-  %516 = extractvalue %dx.types.CBufRet.i32 %514, 1
-  %517 = extractvalue %dx.types.CBufRet.i32 %514, 2
-  %518 = extractvalue %dx.types.CBufRet.i32 %514, 3
-  %519 = mul i32 %515, %512
-  %520 = call i32 @dx.op.tertiary.i32(i32 48, i32 %513, i32 %516, i32 %519)  ; IMad(a,b,c)
-  %521 = call i32 @dx.op.tertiary.i32(i32 48, i32 %508, i32 %517, i32 %520)  ; IMad(a,b,c)
-  %522 = call i32 @dx.op.tertiary.i32(i32 48, i32 %503, i32 %518, i32 %521)  ; IMad(a,b,c)
-  %523 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %522, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %524 = extractvalue %dx.types.ResRet.i32 %523, 0
-  %525 = extractvalue %dx.types.ResRet.i32 %523, 1
-  %526 = call double @dx.op.makeDouble.f64(i32 101, i32 %524, i32 %525)  ; MakeDouble(lo,hi)
-  %527 = fptrunc double %526 to float
-  br label %650
-
-; <label>:528                                     ; preds = %499
-  %529 = icmp eq i32 %348, 1
-  br i1 %529, label %530, label %561
-
-; <label>:530                                     ; preds = %528
-  %531 = add i32 %13, -1
-  %532 = uitofp i32 %531 to float
-  %533 = call float @dx.op.binary.f32(i32 35, float %346, float 0.000000e+00)  ; FMax(a,b)
-  %534 = call float @dx.op.binary.f32(i32 36, float %533, float %532)  ; FMin(a,b)
-  %535 = fptoui float %534 to i32
-  %536 = add i32 %15, -1
-  %537 = uitofp i32 %536 to float
-  %538 = call float @dx.op.binary.f32(i32 35, float %345, float 0.000000e+00)  ; FMax(a,b)
-  %539 = call float @dx.op.binary.f32(i32 36, float %538, float %537)  ; FMin(a,b)
-  %540 = fptoui float %539 to i32
-  %541 = uitofp i32 %540 to float
-  %542 = uitofp i32 %535 to float
-  %543 = fptoui float %45 to i32
-  %544 = fptoui float %182 to i32
-  %545 = fptoui float %541 to i32
-  %546 = fptoui float %542 to i32
-  %547 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %548 = extractvalue %dx.types.CBufRet.i32 %547, 0
-  %549 = extractvalue %dx.types.CBufRet.i32 %547, 1
-  %550 = extractvalue %dx.types.CBufRet.i32 %547, 2
-  %551 = extractvalue %dx.types.CBufRet.i32 %547, 3
-  %552 = mul i32 %548, %543
-  %553 = call i32 @dx.op.tertiary.i32(i32 48, i32 %544, i32 %549, i32 %552)  ; IMad(a,b,c)
-  %554 = call i32 @dx.op.tertiary.i32(i32 48, i32 %545, i32 %550, i32 %553)  ; IMad(a,b,c)
-  %555 = call i32 @dx.op.tertiary.i32(i32 48, i32 %546, i32 %551, i32 %554)  ; IMad(a,b,c)
-  %556 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %555, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %557 = extractvalue %dx.types.ResRet.i32 %556, 0
-  %558 = extractvalue %dx.types.ResRet.i32 %556, 1
-  %559 = call double @dx.op.makeDouble.f64(i32 101, i32 %557, i32 %558)  ; MakeDouble(lo,hi)
-  %560 = fptrunc double %559 to float
-  br label %650
-
-; <label>:561                                     ; preds = %528
-  %562 = icmp eq i32 %348, 2
-  br i1 %562, label %563, label %650
-
-; <label>:563                                     ; preds = %561
-  %564 = fsub fast float %22, %20
-  %565 = fcmp fast olt float %346, %20
-  br i1 %565, label %566, label %579
-
-; <label>:566                                     ; preds = %563
-  %567 = fsub fast float %20, %346
-  %568 = fdiv fast float %567, %564
-  %569 = fptoui float %568 to i32
-  %570 = uitofp i32 %569 to float
-  %571 = fmul fast float %570, %564
-  %572 = fsub fast float %567, %571
-  %573 = and i32 %569, 1
-  %574 = icmp eq i32 %573, 0
-  br i1 %574, label %575, label %577
-
-; <label>:575                                     ; preds = %566
-  %576 = fadd fast float %572, %20
-  br label %594
-
-; <label>:577                                     ; preds = %566
-  %578 = fsub fast float %22, %572
-  br label %594
-
-; <label>:579                                     ; preds = %563
-  %580 = fcmp fast ogt float %346, %22
-  br i1 %580, label %581, label %594
-
-; <label>:581                                     ; preds = %579
-  %582 = fsub fast float %346, %22
-  %583 = fdiv fast float %582, %564
-  %584 = fptoui float %583 to i32
-  %585 = uitofp i32 %584 to float
-  %586 = fmul fast float %585, %564
-  %587 = fsub fast float %582, %586
-  %588 = and i32 %584, 1
-  %589 = icmp eq i32 %588, 0
-  br i1 %589, label %590, label %592
-
-; <label>:590                                     ; preds = %581
-  %591 = fsub fast float %22, %587
-  br label %594
-
-; <label>:592                                     ; preds = %581
-  %593 = fadd fast float %587, %20
-  br label %594
-
-; <label>:594                                     ; preds = %592, %590, %579, %577, %575
-  %595 = phi float [ %576, %575 ], [ %578, %577 ], [ %591, %590 ], [ %593, %592 ], [ %346, %579 ]
-  %596 = fptoui float %595 to i32
-  %597 = fsub fast float %24, %20
-  %598 = fcmp fast olt float %345, %20
-  br i1 %598, label %599, label %612
-
-; <label>:599                                     ; preds = %594
-  %600 = fsub fast float %20, %345
-  %601 = fdiv fast float %600, %597
-  %602 = fptoui float %601 to i32
-  %603 = uitofp i32 %602 to float
-  %604 = fmul fast float %603, %597
-  %605 = fsub fast float %600, %604
-  %606 = and i32 %602, 1
-  %607 = icmp eq i32 %606, 0
-  br i1 %607, label %608, label %610
-
-; <label>:608                                     ; preds = %599
-  %609 = fadd fast float %605, %20
-  br label %627
-
-; <label>:610                                     ; preds = %599
-  %611 = fsub fast float %24, %605
-  br label %627
-
-; <label>:612                                     ; preds = %594
-  %613 = fcmp fast ogt float %345, %24
-  br i1 %613, label %614, label %627
-
-; <label>:614                                     ; preds = %612
-  %615 = fsub fast float %345, %24
-  %616 = fdiv fast float %615, %597
-  %617 = fptoui float %616 to i32
-  %618 = uitofp i32 %617 to float
-  %619 = fmul fast float %618, %597
-  %620 = fsub fast float %615, %619
-  %621 = and i32 %617, 1
-  %622 = icmp eq i32 %621, 0
-  br i1 %622, label %623, label %625
-
-; <label>:623                                     ; preds = %614
-  %624 = fsub fast float %24, %620
-  br label %627
-
-; <label>:625                                     ; preds = %614
-  %626 = fadd fast float %620, %20
-  br label %627
-
-; <label>:627                                     ; preds = %625, %623, %612, %610, %608
-  %628 = phi float [ %609, %608 ], [ %611, %610 ], [ %624, %623 ], [ %626, %625 ], [ %345, %612 ]
-  %629 = fptoui float %628 to i32
-  %630 = uitofp i32 %629 to float
-  %631 = uitofp i32 %596 to float
-  %632 = fptoui float %45 to i32
-  %633 = fptoui float %182 to i32
-  %634 = fptoui float %630 to i32
-  %635 = fptoui float %631 to i32
-  %636 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %637 = extractvalue %dx.types.CBufRet.i32 %636, 0
-  %638 = extractvalue %dx.types.CBufRet.i32 %636, 1
-  %639 = extractvalue %dx.types.CBufRet.i32 %636, 2
-  %640 = extractvalue %dx.types.CBufRet.i32 %636, 3
-  %641 = mul i32 %637, %632
-  %642 = call i32 @dx.op.tertiary.i32(i32 48, i32 %633, i32 %638, i32 %641)  ; IMad(a,b,c)
-  %643 = call i32 @dx.op.tertiary.i32(i32 48, i32 %634, i32 %639, i32 %642)  ; IMad(a,b,c)
-  %644 = call i32 @dx.op.tertiary.i32(i32 48, i32 %635, i32 %640, i32 %643)  ; IMad(a,b,c)
-  %645 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %644, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %646 = extractvalue %dx.types.ResRet.i32 %645, 0
-  %647 = extractvalue %dx.types.ResRet.i32 %645, 1
-  %648 = call double @dx.op.makeDouble.f64(i32 101, i32 %646, i32 %647)  ; MakeDouble(lo,hi)
-  %649 = fptrunc double %648 to float
-  br label %650
-
-; <label>:650                                     ; preds = %627, %561, %530, %511, %501
-  %651 = phi float [ %527, %511 ], [ 0.000000e+00, %501 ], [ %560, %530 ], [ %649, %627 ], [ 0.000000e+00, %561 ]
-  br i1 %349, label %652, label %679
-
-; <label>:652                                     ; preds = %650
-  %653 = fcmp fast oge float %344, 0.000000e+00
-  %654 = fptoui float %344 to i32
-  %655 = icmp ult i32 %654, %13
-  %656 = and i1 %653, %655
-  %657 = fcmp fast oge float %347, 0.000000e+00
-  %658 = and i1 %657, %656
-  %659 = fptoui float %347 to i32
-  %660 = icmp ult i32 %659, %15
-  %661 = and i1 %660, %658
-  br i1 %661, label %662, label %801
-
-; <label>:662                                     ; preds = %652
-  %663 = fptoui float %45 to i32
-  %664 = fptoui float %182 to i32
-  %665 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %666 = extractvalue %dx.types.CBufRet.i32 %665, 0
-  %667 = extractvalue %dx.types.CBufRet.i32 %665, 1
-  %668 = extractvalue %dx.types.CBufRet.i32 %665, 2
-  %669 = extractvalue %dx.types.CBufRet.i32 %665, 3
-  %670 = mul i32 %666, %663
-  %671 = call i32 @dx.op.tertiary.i32(i32 48, i32 %664, i32 %667, i32 %670)  ; IMad(a,b,c)
-  %672 = call i32 @dx.op.tertiary.i32(i32 48, i32 %659, i32 %668, i32 %671)  ; IMad(a,b,c)
-  %673 = call i32 @dx.op.tertiary.i32(i32 48, i32 %654, i32 %669, i32 %672)  ; IMad(a,b,c)
-  %674 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %673, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %675 = extractvalue %dx.types.ResRet.i32 %674, 0
-  %676 = extractvalue %dx.types.ResRet.i32 %674, 1
-  %677 = call double @dx.op.makeDouble.f64(i32 101, i32 %675, i32 %676)  ; MakeDouble(lo,hi)
-  %678 = fptrunc double %677 to float
-  br label %801
-
-; <label>:679                                     ; preds = %650
-  %680 = icmp eq i32 %348, 1
-  br i1 %680, label %681, label %712
-
-; <label>:681                                     ; preds = %679
-  %682 = add i32 %13, -1
-  %683 = uitofp i32 %682 to float
-  %684 = call float @dx.op.binary.f32(i32 35, float %344, float 0.000000e+00)  ; FMax(a,b)
-  %685 = call float @dx.op.binary.f32(i32 36, float %684, float %683)  ; FMin(a,b)
-  %686 = fptoui float %685 to i32
-  %687 = add i32 %15, -1
-  %688 = uitofp i32 %687 to float
-  %689 = call float @dx.op.binary.f32(i32 35, float %347, float 0.000000e+00)  ; FMax(a,b)
-  %690 = call float @dx.op.binary.f32(i32 36, float %689, float %688)  ; FMin(a,b)
-  %691 = fptoui float %690 to i32
-  %692 = uitofp i32 %691 to float
-  %693 = uitofp i32 %686 to float
-  %694 = fptoui float %45 to i32
-  %695 = fptoui float %182 to i32
-  %696 = fptoui float %692 to i32
-  %697 = fptoui float %693 to i32
-  %698 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %699 = extractvalue %dx.types.CBufRet.i32 %698, 0
-  %700 = extractvalue %dx.types.CBufRet.i32 %698, 1
-  %701 = extractvalue %dx.types.CBufRet.i32 %698, 2
-  %702 = extractvalue %dx.types.CBufRet.i32 %698, 3
-  %703 = mul i32 %699, %694
-  %704 = call i32 @dx.op.tertiary.i32(i32 48, i32 %695, i32 %700, i32 %703)  ; IMad(a,b,c)
-  %705 = call i32 @dx.op.tertiary.i32(i32 48, i32 %696, i32 %701, i32 %704)  ; IMad(a,b,c)
-  %706 = call i32 @dx.op.tertiary.i32(i32 48, i32 %697, i32 %702, i32 %705)  ; IMad(a,b,c)
-  %707 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %706, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %708 = extractvalue %dx.types.ResRet.i32 %707, 0
-  %709 = extractvalue %dx.types.ResRet.i32 %707, 1
-  %710 = call double @dx.op.makeDouble.f64(i32 101, i32 %708, i32 %709)  ; MakeDouble(lo,hi)
-  %711 = fptrunc double %710 to float
-  br label %801
-
-; <label>:712                                     ; preds = %679
-  %713 = icmp eq i32 %348, 2
-  br i1 %713, label %714, label %801
-
-; <label>:714                                     ; preds = %712
-  %715 = fsub fast float %22, %20
-  %716 = fcmp fast olt float %344, %20
-  br i1 %716, label %717, label %730
-
-; <label>:717                                     ; preds = %714
-  %718 = fsub fast float %20, %344
-  %719 = fdiv fast float %718, %715
-  %720 = fptoui float %719 to i32
-  %721 = uitofp i32 %720 to float
-  %722 = fmul fast float %721, %715
-  %723 = fsub fast float %718, %722
-  %724 = and i32 %720, 1
-  %725 = icmp eq i32 %724, 0
-  br i1 %725, label %726, label %728
-
-; <label>:726                                     ; preds = %717
-  %727 = fadd fast float %723, %20
-  br label %745
-
-; <label>:728                                     ; preds = %717
-  %729 = fsub fast float %22, %723
-  br label %745
-
-; <label>:730                                     ; preds = %714
-  %731 = fcmp fast ogt float %344, %22
-  br i1 %731, label %732, label %745
-
-; <label>:732                                     ; preds = %730
-  %733 = fsub fast float %344, %22
-  %734 = fdiv fast float %733, %715
-  %735 = fptoui float %734 to i32
-  %736 = uitofp i32 %735 to float
-  %737 = fmul fast float %736, %715
-  %738 = fsub fast float %733, %737
-  %739 = and i32 %735, 1
-  %740 = icmp eq i32 %739, 0
-  br i1 %740, label %741, label %743
-
-; <label>:741                                     ; preds = %732
-  %742 = fsub fast float %22, %738
-  br label %745
-
-; <label>:743                                     ; preds = %732
-  %744 = fadd fast float %738, %20
-  br label %745
-
-; <label>:745                                     ; preds = %743, %741, %730, %728, %726
-  %746 = phi float [ %727, %726 ], [ %729, %728 ], [ %742, %741 ], [ %744, %743 ], [ %344, %730 ]
-  %747 = fptoui float %746 to i32
-  %748 = fsub fast float %24, %20
-  %749 = fcmp fast olt float %347, %20
-  br i1 %749, label %750, label %763
-
-; <label>:750                                     ; preds = %745
-  %751 = fsub fast float %20, %347
-  %752 = fdiv fast float %751, %748
-  %753 = fptoui float %752 to i32
-  %754 = uitofp i32 %753 to float
-  %755 = fmul fast float %754, %748
-  %756 = fsub fast float %751, %755
-  %757 = and i32 %753, 1
-  %758 = icmp eq i32 %757, 0
-  br i1 %758, label %759, label %761
-
-; <label>:759                                     ; preds = %750
-  %760 = fadd fast float %756, %20
-  br label %778
-
-; <label>:761                                     ; preds = %750
-  %762 = fsub fast float %24, %756
-  br label %778
-
-; <label>:763                                     ; preds = %745
-  %764 = fcmp fast ogt float %347, %24
-  br i1 %764, label %765, label %778
-
-; <label>:765                                     ; preds = %763
-  %766 = fsub fast float %347, %24
-  %767 = fdiv fast float %766, %748
-  %768 = fptoui float %767 to i32
-  %769 = uitofp i32 %768 to float
-  %770 = fmul fast float %769, %748
-  %771 = fsub fast float %766, %770
-  %772 = and i32 %768, 1
-  %773 = icmp eq i32 %772, 0
-  br i1 %773, label %774, label %776
-
-; <label>:774                                     ; preds = %765
-  %775 = fsub fast float %24, %771
-  br label %778
-
-; <label>:776                                     ; preds = %765
-  %777 = fadd fast float %771, %20
-  br label %778
-
-; <label>:778                                     ; preds = %776, %774, %763, %761, %759
-  %779 = phi float [ %760, %759 ], [ %762, %761 ], [ %775, %774 ], [ %777, %776 ], [ %347, %763 ]
-  %780 = fptoui float %779 to i32
-  %781 = uitofp i32 %780 to float
-  %782 = uitofp i32 %747 to float
-  %783 = fptoui float %45 to i32
-  %784 = fptoui float %182 to i32
-  %785 = fptoui float %781 to i32
-  %786 = fptoui float %782 to i32
-  %787 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %788 = extractvalue %dx.types.CBufRet.i32 %787, 0
-  %789 = extractvalue %dx.types.CBufRet.i32 %787, 1
-  %790 = extractvalue %dx.types.CBufRet.i32 %787, 2
-  %791 = extractvalue %dx.types.CBufRet.i32 %787, 3
-  %792 = mul i32 %788, %783
-  %793 = call i32 @dx.op.tertiary.i32(i32 48, i32 %784, i32 %789, i32 %792)  ; IMad(a,b,c)
-  %794 = call i32 @dx.op.tertiary.i32(i32 48, i32 %785, i32 %790, i32 %793)  ; IMad(a,b,c)
-  %795 = call i32 @dx.op.tertiary.i32(i32 48, i32 %786, i32 %791, i32 %794)  ; IMad(a,b,c)
-  %796 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %795, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %797 = extractvalue %dx.types.ResRet.i32 %796, 0
-  %798 = extractvalue %dx.types.ResRet.i32 %796, 1
-  %799 = call double @dx.op.makeDouble.f64(i32 101, i32 %797, i32 %798)  ; MakeDouble(lo,hi)
-  %800 = fptrunc double %799 to float
-  br label %801
-
-; <label>:801                                     ; preds = %778, %712, %681, %662, %652
-  %802 = phi float [ %678, %662 ], [ 0.000000e+00, %652 ], [ %711, %681 ], [ %800, %778 ], [ 0.000000e+00, %712 ]
-  br i1 %349, label %803, label %830
-
-; <label>:803                                     ; preds = %801
-  %804 = fcmp fast oge float %346, 0.000000e+00
-  %805 = fptoui float %346 to i32
-  %806 = icmp ult i32 %805, %13
-  %807 = and i1 %804, %806
-  %808 = fcmp fast oge float %347, 0.000000e+00
-  %809 = and i1 %808, %807
-  %810 = fptoui float %347 to i32
-  %811 = icmp ult i32 %810, %15
-  %812 = and i1 %811, %809
-  br i1 %812, label %813, label %952
-
-; <label>:813                                     ; preds = %803
-  %814 = fptoui float %45 to i32
-  %815 = fptoui float %182 to i32
-  %816 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %817 = extractvalue %dx.types.CBufRet.i32 %816, 0
-  %818 = extractvalue %dx.types.CBufRet.i32 %816, 1
-  %819 = extractvalue %dx.types.CBufRet.i32 %816, 2
-  %820 = extractvalue %dx.types.CBufRet.i32 %816, 3
-  %821 = mul i32 %817, %814
-  %822 = call i32 @dx.op.tertiary.i32(i32 48, i32 %815, i32 %818, i32 %821)  ; IMad(a,b,c)
-  %823 = call i32 @dx.op.tertiary.i32(i32 48, i32 %810, i32 %819, i32 %822)  ; IMad(a,b,c)
-  %824 = call i32 @dx.op.tertiary.i32(i32 48, i32 %805, i32 %820, i32 %823)  ; IMad(a,b,c)
-  %825 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %824, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %826 = extractvalue %dx.types.ResRet.i32 %825, 0
-  %827 = extractvalue %dx.types.ResRet.i32 %825, 1
-  %828 = call double @dx.op.makeDouble.f64(i32 101, i32 %826, i32 %827)  ; MakeDouble(lo,hi)
-  %829 = fptrunc double %828 to float
-  br label %952
-
-; <label>:830                                     ; preds = %801
-  %831 = icmp eq i32 %348, 1
-  br i1 %831, label %832, label %863
-
-; <label>:832                                     ; preds = %830
-  %833 = add i32 %13, -1
-  %834 = uitofp i32 %833 to float
-  %835 = call float @dx.op.binary.f32(i32 35, float %346, float 0.000000e+00)  ; FMax(a,b)
-  %836 = call float @dx.op.binary.f32(i32 36, float %835, float %834)  ; FMin(a,b)
-  %837 = fptoui float %836 to i32
-  %838 = add i32 %15, -1
-  %839 = uitofp i32 %838 to float
-  %840 = call float @dx.op.binary.f32(i32 35, float %347, float 0.000000e+00)  ; FMax(a,b)
-  %841 = call float @dx.op.binary.f32(i32 36, float %840, float %839)  ; FMin(a,b)
-  %842 = fptoui float %841 to i32
-  %843 = uitofp i32 %842 to float
-  %844 = uitofp i32 %837 to float
-  %845 = fptoui float %45 to i32
-  %846 = fptoui float %182 to i32
-  %847 = fptoui float %843 to i32
-  %848 = fptoui float %844 to i32
-  %849 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %850 = extractvalue %dx.types.CBufRet.i32 %849, 0
-  %851 = extractvalue %dx.types.CBufRet.i32 %849, 1
-  %852 = extractvalue %dx.types.CBufRet.i32 %849, 2
-  %853 = extractvalue %dx.types.CBufRet.i32 %849, 3
-  %854 = mul i32 %850, %845
-  %855 = call i32 @dx.op.tertiary.i32(i32 48, i32 %846, i32 %851, i32 %854)  ; IMad(a,b,c)
-  %856 = call i32 @dx.op.tertiary.i32(i32 48, i32 %847, i32 %852, i32 %855)  ; IMad(a,b,c)
-  %857 = call i32 @dx.op.tertiary.i32(i32 48, i32 %848, i32 %853, i32 %856)  ; IMad(a,b,c)
-  %858 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %857, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %859 = extractvalue %dx.types.ResRet.i32 %858, 0
-  %860 = extractvalue %dx.types.ResRet.i32 %858, 1
-  %861 = call double @dx.op.makeDouble.f64(i32 101, i32 %859, i32 %860)  ; MakeDouble(lo,hi)
-  %862 = fptrunc double %861 to float
-  br label %952
-
-; <label>:863                                     ; preds = %830
-  %864 = icmp eq i32 %348, 2
-  br i1 %864, label %865, label %952
-
-; <label>:865                                     ; preds = %863
-  %866 = fsub fast float %22, %20
-  %867 = fcmp fast olt float %346, %20
-  br i1 %867, label %868, label %881
-
-; <label>:868                                     ; preds = %865
-  %869 = fsub fast float %20, %346
-  %870 = fdiv fast float %869, %866
-  %871 = fptoui float %870 to i32
-  %872 = uitofp i32 %871 to float
-  %873 = fmul fast float %872, %866
-  %874 = fsub fast float %869, %873
-  %875 = and i32 %871, 1
-  %876 = icmp eq i32 %875, 0
-  br i1 %876, label %877, label %879
-
-; <label>:877                                     ; preds = %868
-  %878 = fadd fast float %874, %20
-  br label %896
-
-; <label>:879                                     ; preds = %868
-  %880 = fsub fast float %22, %874
-  br label %896
-
-; <label>:881                                     ; preds = %865
-  %882 = fcmp fast ogt float %346, %22
-  br i1 %882, label %883, label %896
-
-; <label>:883                                     ; preds = %881
-  %884 = fsub fast float %346, %22
-  %885 = fdiv fast float %884, %866
-  %886 = fptoui float %885 to i32
-  %887 = uitofp i32 %886 to float
-  %888 = fmul fast float %887, %866
-  %889 = fsub fast float %884, %888
-  %890 = and i32 %886, 1
-  %891 = icmp eq i32 %890, 0
-  br i1 %891, label %892, label %894
-
-; <label>:892                                     ; preds = %883
-  %893 = fsub fast float %22, %889
-  br label %896
-
-; <label>:894                                     ; preds = %883
-  %895 = fadd fast float %889, %20
-  br label %896
-
-; <label>:896                                     ; preds = %894, %892, %881, %879, %877
-  %897 = phi float [ %878, %877 ], [ %880, %879 ], [ %893, %892 ], [ %895, %894 ], [ %346, %881 ]
-  %898 = fptoui float %897 to i32
-  %899 = fsub fast float %24, %20
-  %900 = fcmp fast olt float %347, %20
-  br i1 %900, label %901, label %914
-
-; <label>:901                                     ; preds = %896
-  %902 = fsub fast float %20, %347
-  %903 = fdiv fast float %902, %899
-  %904 = fptoui float %903 to i32
-  %905 = uitofp i32 %904 to float
-  %906 = fmul fast float %905, %899
-  %907 = fsub fast float %902, %906
-  %908 = and i32 %904, 1
-  %909 = icmp eq i32 %908, 0
-  br i1 %909, label %910, label %912
-
-; <label>:910                                     ; preds = %901
-  %911 = fadd fast float %907, %20
-  br label %929
-
-; <label>:912                                     ; preds = %901
-  %913 = fsub fast float %24, %907
-  br label %929
-
-; <label>:914                                     ; preds = %896
-  %915 = fcmp fast ogt float %347, %24
-  br i1 %915, label %916, label %929
-
-; <label>:916                                     ; preds = %914
-  %917 = fsub fast float %347, %24
-  %918 = fdiv fast float %917, %899
-  %919 = fptoui float %918 to i32
-  %920 = uitofp i32 %919 to float
-  %921 = fmul fast float %920, %899
-  %922 = fsub fast float %917, %921
-  %923 = and i32 %919, 1
-  %924 = icmp eq i32 %923, 0
-  br i1 %924, label %925, label %927
-
-; <label>:925                                     ; preds = %916
-  %926 = fsub fast float %24, %922
-  br label %929
-
-; <label>:927                                     ; preds = %916
-  %928 = fadd fast float %922, %20
-  br label %929
-
-; <label>:929                                     ; preds = %927, %925, %914, %912, %910
-  %930 = phi float [ %911, %910 ], [ %913, %912 ], [ %926, %925 ], [ %928, %927 ], [ %347, %914 ]
-  %931 = fptoui float %930 to i32
-  %932 = uitofp i32 %931 to float
-  %933 = uitofp i32 %898 to float
-  %934 = fptoui float %45 to i32
-  %935 = fptoui float %182 to i32
-  %936 = fptoui float %932 to i32
-  %937 = fptoui float %933 to i32
-  %938 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %939 = extractvalue %dx.types.CBufRet.i32 %938, 0
-  %940 = extractvalue %dx.types.CBufRet.i32 %938, 1
-  %941 = extractvalue %dx.types.CBufRet.i32 %938, 2
-  %942 = extractvalue %dx.types.CBufRet.i32 %938, 3
-  %943 = mul i32 %939, %934
-  %944 = call i32 @dx.op.tertiary.i32(i32 48, i32 %935, i32 %940, i32 %943)  ; IMad(a,b,c)
-  %945 = call i32 @dx.op.tertiary.i32(i32 48, i32 %936, i32 %941, i32 %944)  ; IMad(a,b,c)
-  %946 = call i32 @dx.op.tertiary.i32(i32 48, i32 %937, i32 %942, i32 %945)  ; IMad(a,b,c)
-  %947 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %946, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %948 = extractvalue %dx.types.ResRet.i32 %947, 0
-  %949 = extractvalue %dx.types.ResRet.i32 %947, 1
-  %950 = call double @dx.op.makeDouble.f64(i32 101, i32 %948, i32 %949)  ; MakeDouble(lo,hi)
-  %951 = fptrunc double %950 to float
-  br label %952
-
-; <label>:952                                     ; preds = %929, %863, %832, %813, %803
-  %953 = phi float [ %829, %813 ], [ 0.000000e+00, %803 ], [ %862, %832 ], [ %951, %929 ], [ 0.000000e+00, %863 ]
-  %954 = call float @dx.op.unary.f32(i32 22, float %180)  ; Frc(value)
-  %955 = fsub fast float %651, %500
-  %956 = fmul fast float %954, %955
-  %957 = fadd fast float %956, %500
-  %958 = fsub fast float %953, %802
-  %959 = fmul fast float %954, %958
-  %960 = fadd fast float %959, %802
-  %961 = call float @dx.op.unary.f32(i32 22, float %181)  ; Frc(value)
-  %962 = fsub fast float %960, %957
-  %963 = fmul fast float %962, %961
-  %964 = fadd fast float %963, %957
-  %965 = fpext float %964 to double
-  %966 = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double %965)  ; SplitDouble(value)
-  %967 = extractvalue %dx.types.splitdouble %966, 0
-  %968 = extractvalue %dx.types.splitdouble %966, 1
-  call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i32 %967, i32 %968, i32 undef, i32 undef, i8 3, i32 8)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3458
-
-; <label>:969                                     ; preds = %341
-  %970 = icmp eq i32 %91, 2
-  br i1 %970, label %971, label %3458
-
-; <label>:971                                     ; preds = %969
-  %972 = call float @dx.op.unary.f32(i32 27, float %180)  ; Round_ni(value)
-  %973 = fadd fast float %972, -1.000000e+00
-  %974 = call float @dx.op.unary.f32(i32 27, float %181)  ; Round_ni(value)
-  %975 = fadd fast float %974, -1.000000e+00
-  %976 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %977 = icmp eq i32 %976, 0
-  br i1 %977, label %978, label %1005
-
-; <label>:978                                     ; preds = %971
-  %979 = fcmp fast oge float %973, 0.000000e+00
-  %980 = fptoui float %973 to i32
-  %981 = icmp ult i32 %980, %13
-  %982 = and i1 %979, %981
-  %983 = fcmp fast oge float %975, 0.000000e+00
-  %984 = and i1 %983, %982
-  %985 = fptoui float %975 to i32
-  %986 = icmp ult i32 %985, %15
-  %987 = and i1 %986, %984
-  br i1 %987, label %988, label %1127
-
-; <label>:988                                     ; preds = %978
-  %989 = fptoui float %45 to i32
-  %990 = fptoui float %182 to i32
-  %991 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %992 = extractvalue %dx.types.CBufRet.i32 %991, 0
-  %993 = extractvalue %dx.types.CBufRet.i32 %991, 1
-  %994 = extractvalue %dx.types.CBufRet.i32 %991, 2
-  %995 = extractvalue %dx.types.CBufRet.i32 %991, 3
-  %996 = mul i32 %992, %989
-  %997 = call i32 @dx.op.tertiary.i32(i32 48, i32 %990, i32 %993, i32 %996)  ; IMad(a,b,c)
-  %998 = call i32 @dx.op.tertiary.i32(i32 48, i32 %985, i32 %994, i32 %997)  ; IMad(a,b,c)
-  %999 = call i32 @dx.op.tertiary.i32(i32 48, i32 %980, i32 %995, i32 %998)  ; IMad(a,b,c)
-  %1000 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %999, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1001 = extractvalue %dx.types.ResRet.i32 %1000, 0
-  %1002 = extractvalue %dx.types.ResRet.i32 %1000, 1
-  %1003 = call double @dx.op.makeDouble.f64(i32 101, i32 %1001, i32 %1002)  ; MakeDouble(lo,hi)
-  %1004 = fptrunc double %1003 to float
-  br label %1127
-
-; <label>:1005                                    ; preds = %971
-  %1006 = icmp eq i32 %976, 1
-  br i1 %1006, label %1007, label %1038
-
-; <label>:1007                                    ; preds = %1005
-  %1008 = add i32 %13, -1
-  %1009 = uitofp i32 %1008 to float
-  %1010 = call float @dx.op.binary.f32(i32 35, float %973, float 0.000000e+00)  ; FMax(a,b)
-  %1011 = call float @dx.op.binary.f32(i32 36, float %1010, float %1009)  ; FMin(a,b)
-  %1012 = fptoui float %1011 to i32
-  %1013 = add i32 %15, -1
-  %1014 = uitofp i32 %1013 to float
-  %1015 = call float @dx.op.binary.f32(i32 35, float %975, float 0.000000e+00)  ; FMax(a,b)
-  %1016 = call float @dx.op.binary.f32(i32 36, float %1015, float %1014)  ; FMin(a,b)
-  %1017 = fptoui float %1016 to i32
-  %1018 = uitofp i32 %1017 to float
-  %1019 = uitofp i32 %1012 to float
-  %1020 = fptoui float %45 to i32
-  %1021 = fptoui float %182 to i32
-  %1022 = fptoui float %1018 to i32
-  %1023 = fptoui float %1019 to i32
-  %1024 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1025 = extractvalue %dx.types.CBufRet.i32 %1024, 0
-  %1026 = extractvalue %dx.types.CBufRet.i32 %1024, 1
-  %1027 = extractvalue %dx.types.CBufRet.i32 %1024, 2
-  %1028 = extractvalue %dx.types.CBufRet.i32 %1024, 3
-  %1029 = mul i32 %1025, %1020
-  %1030 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1021, i32 %1026, i32 %1029)  ; IMad(a,b,c)
-  %1031 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1022, i32 %1027, i32 %1030)  ; IMad(a,b,c)
-  %1032 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1023, i32 %1028, i32 %1031)  ; IMad(a,b,c)
-  %1033 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1032, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1034 = extractvalue %dx.types.ResRet.i32 %1033, 0
-  %1035 = extractvalue %dx.types.ResRet.i32 %1033, 1
-  %1036 = call double @dx.op.makeDouble.f64(i32 101, i32 %1034, i32 %1035)  ; MakeDouble(lo,hi)
-  %1037 = fptrunc double %1036 to float
-  br label %1127
-
-; <label>:1038                                    ; preds = %1005
-  %1039 = icmp eq i32 %976, 2
-  br i1 %1039, label %1040, label %1127
-
-; <label>:1040                                    ; preds = %1038
-  %1041 = fsub fast float %22, %20
-  %1042 = fcmp fast olt float %973, %20
-  br i1 %1042, label %1043, label %1056
-
-; <label>:1043                                    ; preds = %1040
-  %1044 = fsub fast float %20, %973
-  %1045 = fdiv fast float %1044, %1041
-  %1046 = fptoui float %1045 to i32
-  %1047 = uitofp i32 %1046 to float
-  %1048 = fmul fast float %1047, %1041
-  %1049 = fsub fast float %1044, %1048
-  %1050 = and i32 %1046, 1
-  %1051 = icmp eq i32 %1050, 0
-  br i1 %1051, label %1052, label %1054
-
-; <label>:1052                                    ; preds = %1043
-  %1053 = fadd fast float %1049, %20
-  br label %1071
-
-; <label>:1054                                    ; preds = %1043
-  %1055 = fsub fast float %22, %1049
-  br label %1071
-
-; <label>:1056                                    ; preds = %1040
-  %1057 = fcmp fast ogt float %973, %22
-  br i1 %1057, label %1058, label %1071
-
-; <label>:1058                                    ; preds = %1056
-  %1059 = fsub fast float %973, %22
-  %1060 = fdiv fast float %1059, %1041
-  %1061 = fptoui float %1060 to i32
-  %1062 = uitofp i32 %1061 to float
-  %1063 = fmul fast float %1062, %1041
-  %1064 = fsub fast float %1059, %1063
-  %1065 = and i32 %1061, 1
-  %1066 = icmp eq i32 %1065, 0
-  br i1 %1066, label %1067, label %1069
-
-; <label>:1067                                    ; preds = %1058
-  %1068 = fsub fast float %22, %1064
-  br label %1071
-
-; <label>:1069                                    ; preds = %1058
-  %1070 = fadd fast float %1064, %20
-  br label %1071
-
-; <label>:1071                                    ; preds = %1069, %1067, %1056, %1054, %1052
-  %1072 = phi float [ %1053, %1052 ], [ %1055, %1054 ], [ %1068, %1067 ], [ %1070, %1069 ], [ %973, %1056 ]
-  %1073 = fptoui float %1072 to i32
-  %1074 = fsub fast float %24, %20
-  %1075 = fcmp fast olt float %975, %20
-  br i1 %1075, label %1076, label %1089
-
-; <label>:1076                                    ; preds = %1071
-  %1077 = fsub fast float %20, %975
-  %1078 = fdiv fast float %1077, %1074
-  %1079 = fptoui float %1078 to i32
-  %1080 = uitofp i32 %1079 to float
-  %1081 = fmul fast float %1080, %1074
-  %1082 = fsub fast float %1077, %1081
-  %1083 = and i32 %1079, 1
-  %1084 = icmp eq i32 %1083, 0
-  br i1 %1084, label %1085, label %1087
-
-; <label>:1085                                    ; preds = %1076
-  %1086 = fadd fast float %1082, %20
-  br label %1104
-
-; <label>:1087                                    ; preds = %1076
-  %1088 = fsub fast float %24, %1082
-  br label %1104
-
-; <label>:1089                                    ; preds = %1071
-  %1090 = fcmp fast ogt float %975, %24
-  br i1 %1090, label %1091, label %1104
-
-; <label>:1091                                    ; preds = %1089
-  %1092 = fsub fast float %975, %24
-  %1093 = fdiv fast float %1092, %1074
-  %1094 = fptoui float %1093 to i32
-  %1095 = uitofp i32 %1094 to float
-  %1096 = fmul fast float %1095, %1074
-  %1097 = fsub fast float %1092, %1096
-  %1098 = and i32 %1094, 1
-  %1099 = icmp eq i32 %1098, 0
-  br i1 %1099, label %1100, label %1102
-
-; <label>:1100                                    ; preds = %1091
-  %1101 = fsub fast float %24, %1097
-  br label %1104
-
-; <label>:1102                                    ; preds = %1091
-  %1103 = fadd fast float %1097, %20
-  br label %1104
-
-; <label>:1104                                    ; preds = %1102, %1100, %1089, %1087, %1085
-  %1105 = phi float [ %1086, %1085 ], [ %1088, %1087 ], [ %1101, %1100 ], [ %1103, %1102 ], [ %975, %1089 ]
-  %1106 = fptoui float %1105 to i32
-  %1107 = uitofp i32 %1106 to float
-  %1108 = uitofp i32 %1073 to float
-  %1109 = fptoui float %45 to i32
-  %1110 = fptoui float %182 to i32
-  %1111 = fptoui float %1107 to i32
-  %1112 = fptoui float %1108 to i32
-  %1113 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1114 = extractvalue %dx.types.CBufRet.i32 %1113, 0
-  %1115 = extractvalue %dx.types.CBufRet.i32 %1113, 1
-  %1116 = extractvalue %dx.types.CBufRet.i32 %1113, 2
-  %1117 = extractvalue %dx.types.CBufRet.i32 %1113, 3
-  %1118 = mul i32 %1114, %1109
-  %1119 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1110, i32 %1115, i32 %1118)  ; IMad(a,b,c)
-  %1120 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1111, i32 %1116, i32 %1119)  ; IMad(a,b,c)
-  %1121 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1112, i32 %1117, i32 %1120)  ; IMad(a,b,c)
-  %1122 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1121, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1123 = extractvalue %dx.types.ResRet.i32 %1122, 0
-  %1124 = extractvalue %dx.types.ResRet.i32 %1122, 1
-  %1125 = call double @dx.op.makeDouble.f64(i32 101, i32 %1123, i32 %1124)  ; MakeDouble(lo,hi)
-  %1126 = fptrunc double %1125 to float
-  br label %1127
-
-; <label>:1127                                    ; preds = %1104, %1038, %1007, %988, %978
-  %1128 = phi float [ %1004, %988 ], [ 0.000000e+00, %978 ], [ %1037, %1007 ], [ %1126, %1104 ], [ 0.000000e+00, %1038 ]
-  br i1 %977, label %1129, label %1156
-
-; <label>:1129                                    ; preds = %1127
-  %1130 = fcmp fast oge float %972, 0.000000e+00
-  %1131 = fptoui float %972 to i32
-  %1132 = icmp ult i32 %1131, %13
-  %1133 = and i1 %1130, %1132
-  %1134 = fcmp fast oge float %975, 0.000000e+00
-  %1135 = and i1 %1134, %1133
-  %1136 = fptoui float %975 to i32
-  %1137 = icmp ult i32 %1136, %15
-  %1138 = and i1 %1137, %1135
-  br i1 %1138, label %1139, label %1278
-
-; <label>:1139                                    ; preds = %1129
-  %1140 = fptoui float %45 to i32
-  %1141 = fptoui float %182 to i32
-  %1142 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1143 = extractvalue %dx.types.CBufRet.i32 %1142, 0
-  %1144 = extractvalue %dx.types.CBufRet.i32 %1142, 1
-  %1145 = extractvalue %dx.types.CBufRet.i32 %1142, 2
-  %1146 = extractvalue %dx.types.CBufRet.i32 %1142, 3
-  %1147 = mul i32 %1143, %1140
-  %1148 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1141, i32 %1144, i32 %1147)  ; IMad(a,b,c)
-  %1149 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1136, i32 %1145, i32 %1148)  ; IMad(a,b,c)
-  %1150 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1131, i32 %1146, i32 %1149)  ; IMad(a,b,c)
-  %1151 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1150, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1152 = extractvalue %dx.types.ResRet.i32 %1151, 0
-  %1153 = extractvalue %dx.types.ResRet.i32 %1151, 1
-  %1154 = call double @dx.op.makeDouble.f64(i32 101, i32 %1152, i32 %1153)  ; MakeDouble(lo,hi)
-  %1155 = fptrunc double %1154 to float
-  br label %1278
-
-; <label>:1156                                    ; preds = %1127
-  %1157 = icmp eq i32 %976, 1
-  br i1 %1157, label %1158, label %1189
-
-; <label>:1158                                    ; preds = %1156
-  %1159 = add i32 %13, -1
-  %1160 = uitofp i32 %1159 to float
-  %1161 = call float @dx.op.binary.f32(i32 35, float %972, float 0.000000e+00)  ; FMax(a,b)
-  %1162 = call float @dx.op.binary.f32(i32 36, float %1161, float %1160)  ; FMin(a,b)
-  %1163 = fptoui float %1162 to i32
-  %1164 = add i32 %15, -1
-  %1165 = uitofp i32 %1164 to float
-  %1166 = call float @dx.op.binary.f32(i32 35, float %975, float 0.000000e+00)  ; FMax(a,b)
-  %1167 = call float @dx.op.binary.f32(i32 36, float %1166, float %1165)  ; FMin(a,b)
-  %1168 = fptoui float %1167 to i32
-  %1169 = uitofp i32 %1168 to float
-  %1170 = uitofp i32 %1163 to float
-  %1171 = fptoui float %45 to i32
-  %1172 = fptoui float %182 to i32
-  %1173 = fptoui float %1169 to i32
-  %1174 = fptoui float %1170 to i32
-  %1175 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1176 = extractvalue %dx.types.CBufRet.i32 %1175, 0
-  %1177 = extractvalue %dx.types.CBufRet.i32 %1175, 1
-  %1178 = extractvalue %dx.types.CBufRet.i32 %1175, 2
-  %1179 = extractvalue %dx.types.CBufRet.i32 %1175, 3
-  %1180 = mul i32 %1176, %1171
-  %1181 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1172, i32 %1177, i32 %1180)  ; IMad(a,b,c)
-  %1182 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1173, i32 %1178, i32 %1181)  ; IMad(a,b,c)
-  %1183 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1174, i32 %1179, i32 %1182)  ; IMad(a,b,c)
-  %1184 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1183, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1185 = extractvalue %dx.types.ResRet.i32 %1184, 0
-  %1186 = extractvalue %dx.types.ResRet.i32 %1184, 1
-  %1187 = call double @dx.op.makeDouble.f64(i32 101, i32 %1185, i32 %1186)  ; MakeDouble(lo,hi)
-  %1188 = fptrunc double %1187 to float
-  br label %1278
-
-; <label>:1189                                    ; preds = %1156
-  %1190 = icmp eq i32 %976, 2
-  br i1 %1190, label %1191, label %1278
-
-; <label>:1191                                    ; preds = %1189
-  %1192 = fsub fast float %22, %20
-  %1193 = fcmp fast olt float %972, %20
-  br i1 %1193, label %1194, label %1207
-
-; <label>:1194                                    ; preds = %1191
-  %1195 = fsub fast float %20, %972
-  %1196 = fdiv fast float %1195, %1192
-  %1197 = fptoui float %1196 to i32
-  %1198 = uitofp i32 %1197 to float
-  %1199 = fmul fast float %1198, %1192
-  %1200 = fsub fast float %1195, %1199
-  %1201 = and i32 %1197, 1
-  %1202 = icmp eq i32 %1201, 0
-  br i1 %1202, label %1203, label %1205
-
-; <label>:1203                                    ; preds = %1194
-  %1204 = fadd fast float %1200, %20
-  br label %1222
-
-; <label>:1205                                    ; preds = %1194
-  %1206 = fsub fast float %22, %1200
-  br label %1222
-
-; <label>:1207                                    ; preds = %1191
-  %1208 = fcmp fast ogt float %972, %22
-  br i1 %1208, label %1209, label %1222
-
-; <label>:1209                                    ; preds = %1207
-  %1210 = fsub fast float %972, %22
-  %1211 = fdiv fast float %1210, %1192
-  %1212 = fptoui float %1211 to i32
-  %1213 = uitofp i32 %1212 to float
-  %1214 = fmul fast float %1213, %1192
-  %1215 = fsub fast float %1210, %1214
-  %1216 = and i32 %1212, 1
-  %1217 = icmp eq i32 %1216, 0
-  br i1 %1217, label %1218, label %1220
-
-; <label>:1218                                    ; preds = %1209
-  %1219 = fsub fast float %22, %1215
-  br label %1222
-
-; <label>:1220                                    ; preds = %1209
-  %1221 = fadd fast float %1215, %20
-  br label %1222
-
-; <label>:1222                                    ; preds = %1220, %1218, %1207, %1205, %1203
-  %1223 = phi float [ %1204, %1203 ], [ %1206, %1205 ], [ %1219, %1218 ], [ %1221, %1220 ], [ %972, %1207 ]
-  %1224 = fptoui float %1223 to i32
-  %1225 = fsub fast float %24, %20
-  %1226 = fcmp fast olt float %975, %20
-  br i1 %1226, label %1227, label %1240
-
-; <label>:1227                                    ; preds = %1222
-  %1228 = fsub fast float %20, %975
-  %1229 = fdiv fast float %1228, %1225
-  %1230 = fptoui float %1229 to i32
-  %1231 = uitofp i32 %1230 to float
-  %1232 = fmul fast float %1231, %1225
-  %1233 = fsub fast float %1228, %1232
-  %1234 = and i32 %1230, 1
-  %1235 = icmp eq i32 %1234, 0
-  br i1 %1235, label %1236, label %1238
-
-; <label>:1236                                    ; preds = %1227
-  %1237 = fadd fast float %1233, %20
-  br label %1255
-
-; <label>:1238                                    ; preds = %1227
-  %1239 = fsub fast float %24, %1233
-  br label %1255
-
-; <label>:1240                                    ; preds = %1222
-  %1241 = fcmp fast ogt float %975, %24
-  br i1 %1241, label %1242, label %1255
-
-; <label>:1242                                    ; preds = %1240
-  %1243 = fsub fast float %975, %24
-  %1244 = fdiv fast float %1243, %1225
-  %1245 = fptoui float %1244 to i32
-  %1246 = uitofp i32 %1245 to float
-  %1247 = fmul fast float %1246, %1225
-  %1248 = fsub fast float %1243, %1247
-  %1249 = and i32 %1245, 1
-  %1250 = icmp eq i32 %1249, 0
-  br i1 %1250, label %1251, label %1253
-
-; <label>:1251                                    ; preds = %1242
-  %1252 = fsub fast float %24, %1248
-  br label %1255
-
-; <label>:1253                                    ; preds = %1242
-  %1254 = fadd fast float %1248, %20
-  br label %1255
-
-; <label>:1255                                    ; preds = %1253, %1251, %1240, %1238, %1236
-  %1256 = phi float [ %1237, %1236 ], [ %1239, %1238 ], [ %1252, %1251 ], [ %1254, %1253 ], [ %975, %1240 ]
-  %1257 = fptoui float %1256 to i32
-  %1258 = uitofp i32 %1257 to float
-  %1259 = uitofp i32 %1224 to float
-  %1260 = fptoui float %45 to i32
-  %1261 = fptoui float %182 to i32
-  %1262 = fptoui float %1258 to i32
-  %1263 = fptoui float %1259 to i32
-  %1264 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1265 = extractvalue %dx.types.CBufRet.i32 %1264, 0
-  %1266 = extractvalue %dx.types.CBufRet.i32 %1264, 1
-  %1267 = extractvalue %dx.types.CBufRet.i32 %1264, 2
-  %1268 = extractvalue %dx.types.CBufRet.i32 %1264, 3
-  %1269 = mul i32 %1265, %1260
-  %1270 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1261, i32 %1266, i32 %1269)  ; IMad(a,b,c)
-  %1271 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1262, i32 %1267, i32 %1270)  ; IMad(a,b,c)
-  %1272 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1263, i32 %1268, i32 %1271)  ; IMad(a,b,c)
-  %1273 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1272, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1274 = extractvalue %dx.types.ResRet.i32 %1273, 0
-  %1275 = extractvalue %dx.types.ResRet.i32 %1273, 1
-  %1276 = call double @dx.op.makeDouble.f64(i32 101, i32 %1274, i32 %1275)  ; MakeDouble(lo,hi)
-  %1277 = fptrunc double %1276 to float
-  br label %1278
-
-; <label>:1278                                    ; preds = %1255, %1189, %1158, %1139, %1129
-  %1279 = phi float [ %1155, %1139 ], [ 0.000000e+00, %1129 ], [ %1188, %1158 ], [ %1277, %1255 ], [ 0.000000e+00, %1189 ]
-  %1280 = fadd fast float %972, 1.000000e+00
-  br i1 %977, label %1281, label %1308
-
-; <label>:1281                                    ; preds = %1278
-  %1282 = fcmp fast oge float %1280, 0.000000e+00
-  %1283 = fptoui float %1280 to i32
-  %1284 = icmp ult i32 %1283, %13
-  %1285 = and i1 %1282, %1284
-  %1286 = fcmp fast oge float %975, 0.000000e+00
-  %1287 = and i1 %1286, %1285
-  %1288 = fptoui float %975 to i32
-  %1289 = icmp ult i32 %1288, %15
-  %1290 = and i1 %1289, %1287
-  br i1 %1290, label %1291, label %1430
-
-; <label>:1291                                    ; preds = %1281
-  %1292 = fptoui float %45 to i32
-  %1293 = fptoui float %182 to i32
-  %1294 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1295 = extractvalue %dx.types.CBufRet.i32 %1294, 0
-  %1296 = extractvalue %dx.types.CBufRet.i32 %1294, 1
-  %1297 = extractvalue %dx.types.CBufRet.i32 %1294, 2
-  %1298 = extractvalue %dx.types.CBufRet.i32 %1294, 3
-  %1299 = mul i32 %1295, %1292
-  %1300 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1293, i32 %1296, i32 %1299)  ; IMad(a,b,c)
-  %1301 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1288, i32 %1297, i32 %1300)  ; IMad(a,b,c)
-  %1302 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1283, i32 %1298, i32 %1301)  ; IMad(a,b,c)
-  %1303 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1302, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1304 = extractvalue %dx.types.ResRet.i32 %1303, 0
-  %1305 = extractvalue %dx.types.ResRet.i32 %1303, 1
-  %1306 = call double @dx.op.makeDouble.f64(i32 101, i32 %1304, i32 %1305)  ; MakeDouble(lo,hi)
-  %1307 = fptrunc double %1306 to float
-  br label %1430
-
-; <label>:1308                                    ; preds = %1278
-  %1309 = icmp eq i32 %976, 1
-  br i1 %1309, label %1310, label %1341
-
-; <label>:1310                                    ; preds = %1308
-  %1311 = add i32 %13, -1
-  %1312 = uitofp i32 %1311 to float
-  %1313 = call float @dx.op.binary.f32(i32 35, float %1280, float 0.000000e+00)  ; FMax(a,b)
-  %1314 = call float @dx.op.binary.f32(i32 36, float %1313, float %1312)  ; FMin(a,b)
-  %1315 = fptoui float %1314 to i32
-  %1316 = add i32 %15, -1
-  %1317 = uitofp i32 %1316 to float
-  %1318 = call float @dx.op.binary.f32(i32 35, float %975, float 0.000000e+00)  ; FMax(a,b)
-  %1319 = call float @dx.op.binary.f32(i32 36, float %1318, float %1317)  ; FMin(a,b)
-  %1320 = fptoui float %1319 to i32
-  %1321 = uitofp i32 %1320 to float
-  %1322 = uitofp i32 %1315 to float
-  %1323 = fptoui float %45 to i32
-  %1324 = fptoui float %182 to i32
-  %1325 = fptoui float %1321 to i32
-  %1326 = fptoui float %1322 to i32
-  %1327 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1328 = extractvalue %dx.types.CBufRet.i32 %1327, 0
-  %1329 = extractvalue %dx.types.CBufRet.i32 %1327, 1
-  %1330 = extractvalue %dx.types.CBufRet.i32 %1327, 2
-  %1331 = extractvalue %dx.types.CBufRet.i32 %1327, 3
-  %1332 = mul i32 %1328, %1323
-  %1333 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1324, i32 %1329, i32 %1332)  ; IMad(a,b,c)
-  %1334 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1325, i32 %1330, i32 %1333)  ; IMad(a,b,c)
-  %1335 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1326, i32 %1331, i32 %1334)  ; IMad(a,b,c)
-  %1336 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1335, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1337 = extractvalue %dx.types.ResRet.i32 %1336, 0
-  %1338 = extractvalue %dx.types.ResRet.i32 %1336, 1
-  %1339 = call double @dx.op.makeDouble.f64(i32 101, i32 %1337, i32 %1338)  ; MakeDouble(lo,hi)
-  %1340 = fptrunc double %1339 to float
-  br label %1430
-
-; <label>:1341                                    ; preds = %1308
-  %1342 = icmp eq i32 %976, 2
-  br i1 %1342, label %1343, label %1430
-
-; <label>:1343                                    ; preds = %1341
-  %1344 = fsub fast float %22, %20
-  %1345 = fcmp fast olt float %1280, %20
-  br i1 %1345, label %1346, label %1359
-
-; <label>:1346                                    ; preds = %1343
-  %1347 = fsub fast float %20, %1280
-  %1348 = fdiv fast float %1347, %1344
-  %1349 = fptoui float %1348 to i32
-  %1350 = uitofp i32 %1349 to float
-  %1351 = fmul fast float %1350, %1344
-  %1352 = fsub fast float %1347, %1351
-  %1353 = and i32 %1349, 1
-  %1354 = icmp eq i32 %1353, 0
-  br i1 %1354, label %1355, label %1357
-
-; <label>:1355                                    ; preds = %1346
-  %1356 = fadd fast float %1352, %20
-  br label %1374
-
-; <label>:1357                                    ; preds = %1346
-  %1358 = fsub fast float %22, %1352
-  br label %1374
-
-; <label>:1359                                    ; preds = %1343
-  %1360 = fcmp fast ogt float %1280, %22
-  br i1 %1360, label %1361, label %1374
-
-; <label>:1361                                    ; preds = %1359
-  %1362 = fsub fast float %1280, %22
-  %1363 = fdiv fast float %1362, %1344
-  %1364 = fptoui float %1363 to i32
-  %1365 = uitofp i32 %1364 to float
-  %1366 = fmul fast float %1365, %1344
-  %1367 = fsub fast float %1362, %1366
-  %1368 = and i32 %1364, 1
-  %1369 = icmp eq i32 %1368, 0
-  br i1 %1369, label %1370, label %1372
-
-; <label>:1370                                    ; preds = %1361
-  %1371 = fsub fast float %22, %1367
-  br label %1374
-
-; <label>:1372                                    ; preds = %1361
-  %1373 = fadd fast float %1367, %20
-  br label %1374
-
-; <label>:1374                                    ; preds = %1372, %1370, %1359, %1357, %1355
-  %1375 = phi float [ %1356, %1355 ], [ %1358, %1357 ], [ %1371, %1370 ], [ %1373, %1372 ], [ %1280, %1359 ]
-  %1376 = fptoui float %1375 to i32
-  %1377 = fsub fast float %24, %20
-  %1378 = fcmp fast olt float %975, %20
-  br i1 %1378, label %1379, label %1392
-
-; <label>:1379                                    ; preds = %1374
-  %1380 = fsub fast float %20, %975
-  %1381 = fdiv fast float %1380, %1377
-  %1382 = fptoui float %1381 to i32
-  %1383 = uitofp i32 %1382 to float
-  %1384 = fmul fast float %1383, %1377
-  %1385 = fsub fast float %1380, %1384
-  %1386 = and i32 %1382, 1
-  %1387 = icmp eq i32 %1386, 0
-  br i1 %1387, label %1388, label %1390
-
-; <label>:1388                                    ; preds = %1379
-  %1389 = fadd fast float %1385, %20
-  br label %1407
-
-; <label>:1390                                    ; preds = %1379
-  %1391 = fsub fast float %24, %1385
-  br label %1407
-
-; <label>:1392                                    ; preds = %1374
-  %1393 = fcmp fast ogt float %975, %24
-  br i1 %1393, label %1394, label %1407
-
-; <label>:1394                                    ; preds = %1392
-  %1395 = fsub fast float %975, %24
-  %1396 = fdiv fast float %1395, %1377
-  %1397 = fptoui float %1396 to i32
-  %1398 = uitofp i32 %1397 to float
-  %1399 = fmul fast float %1398, %1377
-  %1400 = fsub fast float %1395, %1399
-  %1401 = and i32 %1397, 1
-  %1402 = icmp eq i32 %1401, 0
-  br i1 %1402, label %1403, label %1405
-
-; <label>:1403                                    ; preds = %1394
-  %1404 = fsub fast float %24, %1400
-  br label %1407
-
-; <label>:1405                                    ; preds = %1394
-  %1406 = fadd fast float %1400, %20
-  br label %1407
-
-; <label>:1407                                    ; preds = %1405, %1403, %1392, %1390, %1388
-  %1408 = phi float [ %1389, %1388 ], [ %1391, %1390 ], [ %1404, %1403 ], [ %1406, %1405 ], [ %975, %1392 ]
-  %1409 = fptoui float %1408 to i32
-  %1410 = uitofp i32 %1409 to float
-  %1411 = uitofp i32 %1376 to float
-  %1412 = fptoui float %45 to i32
-  %1413 = fptoui float %182 to i32
-  %1414 = fptoui float %1410 to i32
-  %1415 = fptoui float %1411 to i32
-  %1416 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1417 = extractvalue %dx.types.CBufRet.i32 %1416, 0
-  %1418 = extractvalue %dx.types.CBufRet.i32 %1416, 1
-  %1419 = extractvalue %dx.types.CBufRet.i32 %1416, 2
-  %1420 = extractvalue %dx.types.CBufRet.i32 %1416, 3
-  %1421 = mul i32 %1417, %1412
-  %1422 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1413, i32 %1418, i32 %1421)  ; IMad(a,b,c)
-  %1423 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1414, i32 %1419, i32 %1422)  ; IMad(a,b,c)
-  %1424 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1415, i32 %1420, i32 %1423)  ; IMad(a,b,c)
-  %1425 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1424, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1426 = extractvalue %dx.types.ResRet.i32 %1425, 0
-  %1427 = extractvalue %dx.types.ResRet.i32 %1425, 1
-  %1428 = call double @dx.op.makeDouble.f64(i32 101, i32 %1426, i32 %1427)  ; MakeDouble(lo,hi)
-  %1429 = fptrunc double %1428 to float
-  br label %1430
-
-; <label>:1430                                    ; preds = %1407, %1341, %1310, %1291, %1281
-  %1431 = phi float [ %1307, %1291 ], [ 0.000000e+00, %1281 ], [ %1340, %1310 ], [ %1429, %1407 ], [ 0.000000e+00, %1341 ]
-  %1432 = fadd fast float %972, 2.000000e+00
-  br i1 %977, label %1433, label %1460
-
-; <label>:1433                                    ; preds = %1430
-  %1434 = fcmp fast oge float %1432, 0.000000e+00
-  %1435 = fptoui float %1432 to i32
-  %1436 = icmp ult i32 %1435, %13
-  %1437 = and i1 %1434, %1436
-  %1438 = fcmp fast oge float %975, 0.000000e+00
-  %1439 = and i1 %1438, %1437
-  %1440 = fptoui float %975 to i32
-  %1441 = icmp ult i32 %1440, %15
-  %1442 = and i1 %1441, %1439
-  br i1 %1442, label %1443, label %1582
-
-; <label>:1443                                    ; preds = %1433
-  %1444 = fptoui float %45 to i32
-  %1445 = fptoui float %182 to i32
-  %1446 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1447 = extractvalue %dx.types.CBufRet.i32 %1446, 0
-  %1448 = extractvalue %dx.types.CBufRet.i32 %1446, 1
-  %1449 = extractvalue %dx.types.CBufRet.i32 %1446, 2
-  %1450 = extractvalue %dx.types.CBufRet.i32 %1446, 3
-  %1451 = mul i32 %1447, %1444
-  %1452 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1445, i32 %1448, i32 %1451)  ; IMad(a,b,c)
-  %1453 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1440, i32 %1449, i32 %1452)  ; IMad(a,b,c)
-  %1454 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1435, i32 %1450, i32 %1453)  ; IMad(a,b,c)
-  %1455 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1454, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1456 = extractvalue %dx.types.ResRet.i32 %1455, 0
-  %1457 = extractvalue %dx.types.ResRet.i32 %1455, 1
-  %1458 = call double @dx.op.makeDouble.f64(i32 101, i32 %1456, i32 %1457)  ; MakeDouble(lo,hi)
-  %1459 = fptrunc double %1458 to float
-  br label %1582
-
-; <label>:1460                                    ; preds = %1430
-  %1461 = icmp eq i32 %976, 1
-  br i1 %1461, label %1462, label %1493
-
-; <label>:1462                                    ; preds = %1460
-  %1463 = add i32 %13, -1
-  %1464 = uitofp i32 %1463 to float
-  %1465 = call float @dx.op.binary.f32(i32 35, float %1432, float 0.000000e+00)  ; FMax(a,b)
-  %1466 = call float @dx.op.binary.f32(i32 36, float %1465, float %1464)  ; FMin(a,b)
-  %1467 = fptoui float %1466 to i32
-  %1468 = add i32 %15, -1
-  %1469 = uitofp i32 %1468 to float
-  %1470 = call float @dx.op.binary.f32(i32 35, float %975, float 0.000000e+00)  ; FMax(a,b)
-  %1471 = call float @dx.op.binary.f32(i32 36, float %1470, float %1469)  ; FMin(a,b)
-  %1472 = fptoui float %1471 to i32
-  %1473 = uitofp i32 %1472 to float
-  %1474 = uitofp i32 %1467 to float
-  %1475 = fptoui float %45 to i32
-  %1476 = fptoui float %182 to i32
-  %1477 = fptoui float %1473 to i32
-  %1478 = fptoui float %1474 to i32
-  %1479 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1480 = extractvalue %dx.types.CBufRet.i32 %1479, 0
-  %1481 = extractvalue %dx.types.CBufRet.i32 %1479, 1
-  %1482 = extractvalue %dx.types.CBufRet.i32 %1479, 2
-  %1483 = extractvalue %dx.types.CBufRet.i32 %1479, 3
-  %1484 = mul i32 %1480, %1475
-  %1485 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1476, i32 %1481, i32 %1484)  ; IMad(a,b,c)
-  %1486 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1477, i32 %1482, i32 %1485)  ; IMad(a,b,c)
-  %1487 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1478, i32 %1483, i32 %1486)  ; IMad(a,b,c)
-  %1488 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1487, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1489 = extractvalue %dx.types.ResRet.i32 %1488, 0
-  %1490 = extractvalue %dx.types.ResRet.i32 %1488, 1
-  %1491 = call double @dx.op.makeDouble.f64(i32 101, i32 %1489, i32 %1490)  ; MakeDouble(lo,hi)
-  %1492 = fptrunc double %1491 to float
-  br label %1582
-
-; <label>:1493                                    ; preds = %1460
-  %1494 = icmp eq i32 %976, 2
-  br i1 %1494, label %1495, label %1582
-
-; <label>:1495                                    ; preds = %1493
-  %1496 = fsub fast float %22, %20
-  %1497 = fcmp fast olt float %1432, %20
-  br i1 %1497, label %1498, label %1511
-
-; <label>:1498                                    ; preds = %1495
-  %1499 = fsub fast float %20, %1432
-  %1500 = fdiv fast float %1499, %1496
-  %1501 = fptoui float %1500 to i32
-  %1502 = uitofp i32 %1501 to float
-  %1503 = fmul fast float %1502, %1496
-  %1504 = fsub fast float %1499, %1503
-  %1505 = and i32 %1501, 1
-  %1506 = icmp eq i32 %1505, 0
-  br i1 %1506, label %1507, label %1509
-
-; <label>:1507                                    ; preds = %1498
-  %1508 = fadd fast float %1504, %20
-  br label %1526
-
-; <label>:1509                                    ; preds = %1498
-  %1510 = fsub fast float %22, %1504
-  br label %1526
-
-; <label>:1511                                    ; preds = %1495
-  %1512 = fcmp fast ogt float %1432, %22
-  br i1 %1512, label %1513, label %1526
-
-; <label>:1513                                    ; preds = %1511
-  %1514 = fsub fast float %1432, %22
-  %1515 = fdiv fast float %1514, %1496
-  %1516 = fptoui float %1515 to i32
-  %1517 = uitofp i32 %1516 to float
-  %1518 = fmul fast float %1517, %1496
-  %1519 = fsub fast float %1514, %1518
-  %1520 = and i32 %1516, 1
-  %1521 = icmp eq i32 %1520, 0
-  br i1 %1521, label %1522, label %1524
-
-; <label>:1522                                    ; preds = %1513
-  %1523 = fsub fast float %22, %1519
-  br label %1526
-
-; <label>:1524                                    ; preds = %1513
-  %1525 = fadd fast float %1519, %20
-  br label %1526
-
-; <label>:1526                                    ; preds = %1524, %1522, %1511, %1509, %1507
-  %1527 = phi float [ %1508, %1507 ], [ %1510, %1509 ], [ %1523, %1522 ], [ %1525, %1524 ], [ %1432, %1511 ]
-  %1528 = fptoui float %1527 to i32
-  %1529 = fsub fast float %24, %20
-  %1530 = fcmp fast olt float %975, %20
-  br i1 %1530, label %1531, label %1544
-
-; <label>:1531                                    ; preds = %1526
-  %1532 = fsub fast float %20, %975
-  %1533 = fdiv fast float %1532, %1529
-  %1534 = fptoui float %1533 to i32
-  %1535 = uitofp i32 %1534 to float
-  %1536 = fmul fast float %1535, %1529
-  %1537 = fsub fast float %1532, %1536
-  %1538 = and i32 %1534, 1
-  %1539 = icmp eq i32 %1538, 0
-  br i1 %1539, label %1540, label %1542
-
-; <label>:1540                                    ; preds = %1531
-  %1541 = fadd fast float %1537, %20
-  br label %1559
-
-; <label>:1542                                    ; preds = %1531
-  %1543 = fsub fast float %24, %1537
-  br label %1559
-
-; <label>:1544                                    ; preds = %1526
-  %1545 = fcmp fast ogt float %975, %24
-  br i1 %1545, label %1546, label %1559
-
-; <label>:1546                                    ; preds = %1544
-  %1547 = fsub fast float %975, %24
-  %1548 = fdiv fast float %1547, %1529
-  %1549 = fptoui float %1548 to i32
-  %1550 = uitofp i32 %1549 to float
-  %1551 = fmul fast float %1550, %1529
-  %1552 = fsub fast float %1547, %1551
-  %1553 = and i32 %1549, 1
-  %1554 = icmp eq i32 %1553, 0
-  br i1 %1554, label %1555, label %1557
-
-; <label>:1555                                    ; preds = %1546
-  %1556 = fsub fast float %24, %1552
-  br label %1559
-
-; <label>:1557                                    ; preds = %1546
-  %1558 = fadd fast float %1552, %20
-  br label %1559
-
-; <label>:1559                                    ; preds = %1557, %1555, %1544, %1542, %1540
-  %1560 = phi float [ %1541, %1540 ], [ %1543, %1542 ], [ %1556, %1555 ], [ %1558, %1557 ], [ %975, %1544 ]
-  %1561 = fptoui float %1560 to i32
-  %1562 = uitofp i32 %1561 to float
-  %1563 = uitofp i32 %1528 to float
-  %1564 = fptoui float %45 to i32
-  %1565 = fptoui float %182 to i32
-  %1566 = fptoui float %1562 to i32
-  %1567 = fptoui float %1563 to i32
-  %1568 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1569 = extractvalue %dx.types.CBufRet.i32 %1568, 0
-  %1570 = extractvalue %dx.types.CBufRet.i32 %1568, 1
-  %1571 = extractvalue %dx.types.CBufRet.i32 %1568, 2
-  %1572 = extractvalue %dx.types.CBufRet.i32 %1568, 3
-  %1573 = mul i32 %1569, %1564
-  %1574 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1565, i32 %1570, i32 %1573)  ; IMad(a,b,c)
-  %1575 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1566, i32 %1571, i32 %1574)  ; IMad(a,b,c)
-  %1576 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1567, i32 %1572, i32 %1575)  ; IMad(a,b,c)
-  %1577 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1576, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1578 = extractvalue %dx.types.ResRet.i32 %1577, 0
-  %1579 = extractvalue %dx.types.ResRet.i32 %1577, 1
-  %1580 = call double @dx.op.makeDouble.f64(i32 101, i32 %1578, i32 %1579)  ; MakeDouble(lo,hi)
-  %1581 = fptrunc double %1580 to float
-  br label %1582
-
-; <label>:1582                                    ; preds = %1559, %1493, %1462, %1443, %1433
-  %1583 = phi float [ %1459, %1443 ], [ 0.000000e+00, %1433 ], [ %1492, %1462 ], [ %1581, %1559 ], [ 0.000000e+00, %1493 ]
-  br i1 %977, label %1584, label %1611
-
-; <label>:1584                                    ; preds = %1582
-  %1585 = fcmp fast oge float %973, 0.000000e+00
-  %1586 = fptoui float %973 to i32
-  %1587 = icmp ult i32 %1586, %13
-  %1588 = and i1 %1585, %1587
-  %1589 = fcmp fast oge float %974, 0.000000e+00
-  %1590 = and i1 %1589, %1588
-  %1591 = fptoui float %974 to i32
-  %1592 = icmp ult i32 %1591, %15
-  %1593 = and i1 %1592, %1590
-  br i1 %1593, label %1594, label %1733
-
-; <label>:1594                                    ; preds = %1584
-  %1595 = fptoui float %45 to i32
-  %1596 = fptoui float %182 to i32
-  %1597 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1598 = extractvalue %dx.types.CBufRet.i32 %1597, 0
-  %1599 = extractvalue %dx.types.CBufRet.i32 %1597, 1
-  %1600 = extractvalue %dx.types.CBufRet.i32 %1597, 2
-  %1601 = extractvalue %dx.types.CBufRet.i32 %1597, 3
-  %1602 = mul i32 %1598, %1595
-  %1603 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1596, i32 %1599, i32 %1602)  ; IMad(a,b,c)
-  %1604 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1591, i32 %1600, i32 %1603)  ; IMad(a,b,c)
-  %1605 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1586, i32 %1601, i32 %1604)  ; IMad(a,b,c)
-  %1606 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1605, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1607 = extractvalue %dx.types.ResRet.i32 %1606, 0
-  %1608 = extractvalue %dx.types.ResRet.i32 %1606, 1
-  %1609 = call double @dx.op.makeDouble.f64(i32 101, i32 %1607, i32 %1608)  ; MakeDouble(lo,hi)
-  %1610 = fptrunc double %1609 to float
-  br label %1733
-
-; <label>:1611                                    ; preds = %1582
-  %1612 = icmp eq i32 %976, 1
-  br i1 %1612, label %1613, label %1644
-
-; <label>:1613                                    ; preds = %1611
-  %1614 = add i32 %13, -1
-  %1615 = uitofp i32 %1614 to float
-  %1616 = call float @dx.op.binary.f32(i32 35, float %973, float 0.000000e+00)  ; FMax(a,b)
-  %1617 = call float @dx.op.binary.f32(i32 36, float %1616, float %1615)  ; FMin(a,b)
-  %1618 = fptoui float %1617 to i32
-  %1619 = add i32 %15, -1
-  %1620 = uitofp i32 %1619 to float
-  %1621 = call float @dx.op.binary.f32(i32 35, float %974, float 0.000000e+00)  ; FMax(a,b)
-  %1622 = call float @dx.op.binary.f32(i32 36, float %1621, float %1620)  ; FMin(a,b)
-  %1623 = fptoui float %1622 to i32
-  %1624 = uitofp i32 %1623 to float
-  %1625 = uitofp i32 %1618 to float
-  %1626 = fptoui float %45 to i32
-  %1627 = fptoui float %182 to i32
-  %1628 = fptoui float %1624 to i32
-  %1629 = fptoui float %1625 to i32
-  %1630 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1631 = extractvalue %dx.types.CBufRet.i32 %1630, 0
-  %1632 = extractvalue %dx.types.CBufRet.i32 %1630, 1
-  %1633 = extractvalue %dx.types.CBufRet.i32 %1630, 2
-  %1634 = extractvalue %dx.types.CBufRet.i32 %1630, 3
-  %1635 = mul i32 %1631, %1626
-  %1636 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1627, i32 %1632, i32 %1635)  ; IMad(a,b,c)
-  %1637 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1628, i32 %1633, i32 %1636)  ; IMad(a,b,c)
-  %1638 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1629, i32 %1634, i32 %1637)  ; IMad(a,b,c)
-  %1639 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1638, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1640 = extractvalue %dx.types.ResRet.i32 %1639, 0
-  %1641 = extractvalue %dx.types.ResRet.i32 %1639, 1
-  %1642 = call double @dx.op.makeDouble.f64(i32 101, i32 %1640, i32 %1641)  ; MakeDouble(lo,hi)
-  %1643 = fptrunc double %1642 to float
-  br label %1733
-
-; <label>:1644                                    ; preds = %1611
-  %1645 = icmp eq i32 %976, 2
-  br i1 %1645, label %1646, label %1733
-
-; <label>:1646                                    ; preds = %1644
-  %1647 = fsub fast float %22, %20
-  %1648 = fcmp fast olt float %973, %20
-  br i1 %1648, label %1649, label %1662
-
-; <label>:1649                                    ; preds = %1646
-  %1650 = fsub fast float %20, %973
-  %1651 = fdiv fast float %1650, %1647
-  %1652 = fptoui float %1651 to i32
-  %1653 = uitofp i32 %1652 to float
-  %1654 = fmul fast float %1653, %1647
-  %1655 = fsub fast float %1650, %1654
-  %1656 = and i32 %1652, 1
-  %1657 = icmp eq i32 %1656, 0
-  br i1 %1657, label %1658, label %1660
-
-; <label>:1658                                    ; preds = %1649
-  %1659 = fadd fast float %1655, %20
-  br label %1677
-
-; <label>:1660                                    ; preds = %1649
-  %1661 = fsub fast float %22, %1655
-  br label %1677
-
-; <label>:1662                                    ; preds = %1646
-  %1663 = fcmp fast ogt float %973, %22
-  br i1 %1663, label %1664, label %1677
-
-; <label>:1664                                    ; preds = %1662
-  %1665 = fsub fast float %973, %22
-  %1666 = fdiv fast float %1665, %1647
-  %1667 = fptoui float %1666 to i32
-  %1668 = uitofp i32 %1667 to float
-  %1669 = fmul fast float %1668, %1647
-  %1670 = fsub fast float %1665, %1669
-  %1671 = and i32 %1667, 1
-  %1672 = icmp eq i32 %1671, 0
-  br i1 %1672, label %1673, label %1675
-
-; <label>:1673                                    ; preds = %1664
-  %1674 = fsub fast float %22, %1670
-  br label %1677
-
-; <label>:1675                                    ; preds = %1664
-  %1676 = fadd fast float %1670, %20
-  br label %1677
-
-; <label>:1677                                    ; preds = %1675, %1673, %1662, %1660, %1658
-  %1678 = phi float [ %1659, %1658 ], [ %1661, %1660 ], [ %1674, %1673 ], [ %1676, %1675 ], [ %973, %1662 ]
-  %1679 = fptoui float %1678 to i32
-  %1680 = fsub fast float %24, %20
-  %1681 = fcmp fast olt float %974, %20
-  br i1 %1681, label %1682, label %1695
-
-; <label>:1682                                    ; preds = %1677
-  %1683 = fsub fast float %20, %974
-  %1684 = fdiv fast float %1683, %1680
-  %1685 = fptoui float %1684 to i32
-  %1686 = uitofp i32 %1685 to float
-  %1687 = fmul fast float %1686, %1680
-  %1688 = fsub fast float %1683, %1687
-  %1689 = and i32 %1685, 1
-  %1690 = icmp eq i32 %1689, 0
-  br i1 %1690, label %1691, label %1693
-
-; <label>:1691                                    ; preds = %1682
-  %1692 = fadd fast float %1688, %20
-  br label %1710
-
-; <label>:1693                                    ; preds = %1682
-  %1694 = fsub fast float %24, %1688
-  br label %1710
-
-; <label>:1695                                    ; preds = %1677
-  %1696 = fcmp fast ogt float %974, %24
-  br i1 %1696, label %1697, label %1710
-
-; <label>:1697                                    ; preds = %1695
-  %1698 = fsub fast float %974, %24
-  %1699 = fdiv fast float %1698, %1680
-  %1700 = fptoui float %1699 to i32
-  %1701 = uitofp i32 %1700 to float
-  %1702 = fmul fast float %1701, %1680
-  %1703 = fsub fast float %1698, %1702
-  %1704 = and i32 %1700, 1
-  %1705 = icmp eq i32 %1704, 0
-  br i1 %1705, label %1706, label %1708
-
-; <label>:1706                                    ; preds = %1697
-  %1707 = fsub fast float %24, %1703
-  br label %1710
-
-; <label>:1708                                    ; preds = %1697
-  %1709 = fadd fast float %1703, %20
-  br label %1710
-
-; <label>:1710                                    ; preds = %1708, %1706, %1695, %1693, %1691
-  %1711 = phi float [ %1692, %1691 ], [ %1694, %1693 ], [ %1707, %1706 ], [ %1709, %1708 ], [ %974, %1695 ]
-  %1712 = fptoui float %1711 to i32
-  %1713 = uitofp i32 %1712 to float
-  %1714 = uitofp i32 %1679 to float
-  %1715 = fptoui float %45 to i32
-  %1716 = fptoui float %182 to i32
-  %1717 = fptoui float %1713 to i32
-  %1718 = fptoui float %1714 to i32
-  %1719 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1720 = extractvalue %dx.types.CBufRet.i32 %1719, 0
-  %1721 = extractvalue %dx.types.CBufRet.i32 %1719, 1
-  %1722 = extractvalue %dx.types.CBufRet.i32 %1719, 2
-  %1723 = extractvalue %dx.types.CBufRet.i32 %1719, 3
-  %1724 = mul i32 %1720, %1715
-  %1725 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1716, i32 %1721, i32 %1724)  ; IMad(a,b,c)
-  %1726 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1717, i32 %1722, i32 %1725)  ; IMad(a,b,c)
-  %1727 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1718, i32 %1723, i32 %1726)  ; IMad(a,b,c)
-  %1728 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1727, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1729 = extractvalue %dx.types.ResRet.i32 %1728, 0
-  %1730 = extractvalue %dx.types.ResRet.i32 %1728, 1
-  %1731 = call double @dx.op.makeDouble.f64(i32 101, i32 %1729, i32 %1730)  ; MakeDouble(lo,hi)
-  %1732 = fptrunc double %1731 to float
-  br label %1733
-
-; <label>:1733                                    ; preds = %1710, %1644, %1613, %1594, %1584
-  %1734 = phi float [ %1610, %1594 ], [ 0.000000e+00, %1584 ], [ %1643, %1613 ], [ %1732, %1710 ], [ 0.000000e+00, %1644 ]
-  br i1 %977, label %1735, label %1762
-
-; <label>:1735                                    ; preds = %1733
-  %1736 = fcmp fast oge float %972, 0.000000e+00
-  %1737 = fptoui float %972 to i32
-  %1738 = icmp ult i32 %1737, %13
-  %1739 = and i1 %1736, %1738
-  %1740 = fcmp fast oge float %974, 0.000000e+00
-  %1741 = and i1 %1740, %1739
-  %1742 = fptoui float %974 to i32
-  %1743 = icmp ult i32 %1742, %15
-  %1744 = and i1 %1743, %1741
-  br i1 %1744, label %1745, label %1884
-
-; <label>:1745                                    ; preds = %1735
-  %1746 = fptoui float %45 to i32
-  %1747 = fptoui float %182 to i32
-  %1748 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1749 = extractvalue %dx.types.CBufRet.i32 %1748, 0
-  %1750 = extractvalue %dx.types.CBufRet.i32 %1748, 1
-  %1751 = extractvalue %dx.types.CBufRet.i32 %1748, 2
-  %1752 = extractvalue %dx.types.CBufRet.i32 %1748, 3
-  %1753 = mul i32 %1749, %1746
-  %1754 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1747, i32 %1750, i32 %1753)  ; IMad(a,b,c)
-  %1755 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1742, i32 %1751, i32 %1754)  ; IMad(a,b,c)
-  %1756 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1737, i32 %1752, i32 %1755)  ; IMad(a,b,c)
-  %1757 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1756, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1758 = extractvalue %dx.types.ResRet.i32 %1757, 0
-  %1759 = extractvalue %dx.types.ResRet.i32 %1757, 1
-  %1760 = call double @dx.op.makeDouble.f64(i32 101, i32 %1758, i32 %1759)  ; MakeDouble(lo,hi)
-  %1761 = fptrunc double %1760 to float
-  br label %1884
-
-; <label>:1762                                    ; preds = %1733
-  %1763 = icmp eq i32 %976, 1
-  br i1 %1763, label %1764, label %1795
-
-; <label>:1764                                    ; preds = %1762
-  %1765 = add i32 %13, -1
-  %1766 = uitofp i32 %1765 to float
-  %1767 = call float @dx.op.binary.f32(i32 35, float %972, float 0.000000e+00)  ; FMax(a,b)
-  %1768 = call float @dx.op.binary.f32(i32 36, float %1767, float %1766)  ; FMin(a,b)
-  %1769 = fptoui float %1768 to i32
-  %1770 = add i32 %15, -1
-  %1771 = uitofp i32 %1770 to float
-  %1772 = call float @dx.op.binary.f32(i32 35, float %974, float 0.000000e+00)  ; FMax(a,b)
-  %1773 = call float @dx.op.binary.f32(i32 36, float %1772, float %1771)  ; FMin(a,b)
-  %1774 = fptoui float %1773 to i32
-  %1775 = uitofp i32 %1774 to float
-  %1776 = uitofp i32 %1769 to float
-  %1777 = fptoui float %45 to i32
-  %1778 = fptoui float %182 to i32
-  %1779 = fptoui float %1775 to i32
-  %1780 = fptoui float %1776 to i32
-  %1781 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1782 = extractvalue %dx.types.CBufRet.i32 %1781, 0
-  %1783 = extractvalue %dx.types.CBufRet.i32 %1781, 1
-  %1784 = extractvalue %dx.types.CBufRet.i32 %1781, 2
-  %1785 = extractvalue %dx.types.CBufRet.i32 %1781, 3
-  %1786 = mul i32 %1782, %1777
-  %1787 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1778, i32 %1783, i32 %1786)  ; IMad(a,b,c)
-  %1788 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1779, i32 %1784, i32 %1787)  ; IMad(a,b,c)
-  %1789 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1780, i32 %1785, i32 %1788)  ; IMad(a,b,c)
-  %1790 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1789, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1791 = extractvalue %dx.types.ResRet.i32 %1790, 0
-  %1792 = extractvalue %dx.types.ResRet.i32 %1790, 1
-  %1793 = call double @dx.op.makeDouble.f64(i32 101, i32 %1791, i32 %1792)  ; MakeDouble(lo,hi)
-  %1794 = fptrunc double %1793 to float
-  br label %1884
-
-; <label>:1795                                    ; preds = %1762
-  %1796 = icmp eq i32 %976, 2
-  br i1 %1796, label %1797, label %1884
-
-; <label>:1797                                    ; preds = %1795
-  %1798 = fsub fast float %22, %20
-  %1799 = fcmp fast olt float %972, %20
-  br i1 %1799, label %1800, label %1813
-
-; <label>:1800                                    ; preds = %1797
-  %1801 = fsub fast float %20, %972
-  %1802 = fdiv fast float %1801, %1798
-  %1803 = fptoui float %1802 to i32
-  %1804 = uitofp i32 %1803 to float
-  %1805 = fmul fast float %1804, %1798
-  %1806 = fsub fast float %1801, %1805
-  %1807 = and i32 %1803, 1
-  %1808 = icmp eq i32 %1807, 0
-  br i1 %1808, label %1809, label %1811
-
-; <label>:1809                                    ; preds = %1800
-  %1810 = fadd fast float %1806, %20
-  br label %1828
-
-; <label>:1811                                    ; preds = %1800
-  %1812 = fsub fast float %22, %1806
-  br label %1828
-
-; <label>:1813                                    ; preds = %1797
-  %1814 = fcmp fast ogt float %972, %22
-  br i1 %1814, label %1815, label %1828
-
-; <label>:1815                                    ; preds = %1813
-  %1816 = fsub fast float %972, %22
-  %1817 = fdiv fast float %1816, %1798
-  %1818 = fptoui float %1817 to i32
-  %1819 = uitofp i32 %1818 to float
-  %1820 = fmul fast float %1819, %1798
-  %1821 = fsub fast float %1816, %1820
-  %1822 = and i32 %1818, 1
-  %1823 = icmp eq i32 %1822, 0
-  br i1 %1823, label %1824, label %1826
-
-; <label>:1824                                    ; preds = %1815
-  %1825 = fsub fast float %22, %1821
-  br label %1828
-
-; <label>:1826                                    ; preds = %1815
-  %1827 = fadd fast float %1821, %20
-  br label %1828
-
-; <label>:1828                                    ; preds = %1826, %1824, %1813, %1811, %1809
-  %1829 = phi float [ %1810, %1809 ], [ %1812, %1811 ], [ %1825, %1824 ], [ %1827, %1826 ], [ %972, %1813 ]
-  %1830 = fptoui float %1829 to i32
-  %1831 = fsub fast float %24, %20
-  %1832 = fcmp fast olt float %974, %20
-  br i1 %1832, label %1833, label %1846
-
-; <label>:1833                                    ; preds = %1828
-  %1834 = fsub fast float %20, %974
-  %1835 = fdiv fast float %1834, %1831
-  %1836 = fptoui float %1835 to i32
-  %1837 = uitofp i32 %1836 to float
-  %1838 = fmul fast float %1837, %1831
-  %1839 = fsub fast float %1834, %1838
-  %1840 = and i32 %1836, 1
-  %1841 = icmp eq i32 %1840, 0
-  br i1 %1841, label %1842, label %1844
-
-; <label>:1842                                    ; preds = %1833
-  %1843 = fadd fast float %1839, %20
-  br label %1861
-
-; <label>:1844                                    ; preds = %1833
-  %1845 = fsub fast float %24, %1839
-  br label %1861
-
-; <label>:1846                                    ; preds = %1828
-  %1847 = fcmp fast ogt float %974, %24
-  br i1 %1847, label %1848, label %1861
-
-; <label>:1848                                    ; preds = %1846
-  %1849 = fsub fast float %974, %24
-  %1850 = fdiv fast float %1849, %1831
-  %1851 = fptoui float %1850 to i32
-  %1852 = uitofp i32 %1851 to float
-  %1853 = fmul fast float %1852, %1831
-  %1854 = fsub fast float %1849, %1853
-  %1855 = and i32 %1851, 1
-  %1856 = icmp eq i32 %1855, 0
-  br i1 %1856, label %1857, label %1859
-
-; <label>:1857                                    ; preds = %1848
-  %1858 = fsub fast float %24, %1854
-  br label %1861
-
-; <label>:1859                                    ; preds = %1848
-  %1860 = fadd fast float %1854, %20
-  br label %1861
-
-; <label>:1861                                    ; preds = %1859, %1857, %1846, %1844, %1842
-  %1862 = phi float [ %1843, %1842 ], [ %1845, %1844 ], [ %1858, %1857 ], [ %1860, %1859 ], [ %974, %1846 ]
-  %1863 = fptoui float %1862 to i32
-  %1864 = uitofp i32 %1863 to float
-  %1865 = uitofp i32 %1830 to float
-  %1866 = fptoui float %45 to i32
-  %1867 = fptoui float %182 to i32
-  %1868 = fptoui float %1864 to i32
-  %1869 = fptoui float %1865 to i32
-  %1870 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1871 = extractvalue %dx.types.CBufRet.i32 %1870, 0
-  %1872 = extractvalue %dx.types.CBufRet.i32 %1870, 1
-  %1873 = extractvalue %dx.types.CBufRet.i32 %1870, 2
-  %1874 = extractvalue %dx.types.CBufRet.i32 %1870, 3
-  %1875 = mul i32 %1871, %1866
-  %1876 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1867, i32 %1872, i32 %1875)  ; IMad(a,b,c)
-  %1877 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1868, i32 %1873, i32 %1876)  ; IMad(a,b,c)
-  %1878 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1869, i32 %1874, i32 %1877)  ; IMad(a,b,c)
-  %1879 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1878, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1880 = extractvalue %dx.types.ResRet.i32 %1879, 0
-  %1881 = extractvalue %dx.types.ResRet.i32 %1879, 1
-  %1882 = call double @dx.op.makeDouble.f64(i32 101, i32 %1880, i32 %1881)  ; MakeDouble(lo,hi)
-  %1883 = fptrunc double %1882 to float
-  br label %1884
-
-; <label>:1884                                    ; preds = %1861, %1795, %1764, %1745, %1735
-  %1885 = phi float [ %1761, %1745 ], [ 0.000000e+00, %1735 ], [ %1794, %1764 ], [ %1883, %1861 ], [ 0.000000e+00, %1795 ]
-  br i1 %977, label %1886, label %1913
-
-; <label>:1886                                    ; preds = %1884
-  %1887 = fcmp fast oge float %1280, 0.000000e+00
-  %1888 = fptoui float %1280 to i32
-  %1889 = icmp ult i32 %1888, %13
-  %1890 = and i1 %1887, %1889
-  %1891 = fcmp fast oge float %974, 0.000000e+00
-  %1892 = and i1 %1891, %1890
-  %1893 = fptoui float %974 to i32
-  %1894 = icmp ult i32 %1893, %15
-  %1895 = and i1 %1894, %1892
-  br i1 %1895, label %1896, label %2035
-
-; <label>:1896                                    ; preds = %1886
-  %1897 = fptoui float %45 to i32
-  %1898 = fptoui float %182 to i32
-  %1899 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1900 = extractvalue %dx.types.CBufRet.i32 %1899, 0
-  %1901 = extractvalue %dx.types.CBufRet.i32 %1899, 1
-  %1902 = extractvalue %dx.types.CBufRet.i32 %1899, 2
-  %1903 = extractvalue %dx.types.CBufRet.i32 %1899, 3
-  %1904 = mul i32 %1900, %1897
-  %1905 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1898, i32 %1901, i32 %1904)  ; IMad(a,b,c)
-  %1906 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1893, i32 %1902, i32 %1905)  ; IMad(a,b,c)
-  %1907 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1888, i32 %1903, i32 %1906)  ; IMad(a,b,c)
-  %1908 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1907, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1909 = extractvalue %dx.types.ResRet.i32 %1908, 0
-  %1910 = extractvalue %dx.types.ResRet.i32 %1908, 1
-  %1911 = call double @dx.op.makeDouble.f64(i32 101, i32 %1909, i32 %1910)  ; MakeDouble(lo,hi)
-  %1912 = fptrunc double %1911 to float
-  br label %2035
-
-; <label>:1913                                    ; preds = %1884
-  %1914 = icmp eq i32 %976, 1
-  br i1 %1914, label %1915, label %1946
-
-; <label>:1915                                    ; preds = %1913
-  %1916 = add i32 %13, -1
-  %1917 = uitofp i32 %1916 to float
-  %1918 = call float @dx.op.binary.f32(i32 35, float %1280, float 0.000000e+00)  ; FMax(a,b)
-  %1919 = call float @dx.op.binary.f32(i32 36, float %1918, float %1917)  ; FMin(a,b)
-  %1920 = fptoui float %1919 to i32
-  %1921 = add i32 %15, -1
-  %1922 = uitofp i32 %1921 to float
-  %1923 = call float @dx.op.binary.f32(i32 35, float %974, float 0.000000e+00)  ; FMax(a,b)
-  %1924 = call float @dx.op.binary.f32(i32 36, float %1923, float %1922)  ; FMin(a,b)
-  %1925 = fptoui float %1924 to i32
-  %1926 = uitofp i32 %1925 to float
-  %1927 = uitofp i32 %1920 to float
-  %1928 = fptoui float %45 to i32
-  %1929 = fptoui float %182 to i32
-  %1930 = fptoui float %1926 to i32
-  %1931 = fptoui float %1927 to i32
-  %1932 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1933 = extractvalue %dx.types.CBufRet.i32 %1932, 0
-  %1934 = extractvalue %dx.types.CBufRet.i32 %1932, 1
-  %1935 = extractvalue %dx.types.CBufRet.i32 %1932, 2
-  %1936 = extractvalue %dx.types.CBufRet.i32 %1932, 3
-  %1937 = mul i32 %1933, %1928
-  %1938 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1929, i32 %1934, i32 %1937)  ; IMad(a,b,c)
-  %1939 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1930, i32 %1935, i32 %1938)  ; IMad(a,b,c)
-  %1940 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1931, i32 %1936, i32 %1939)  ; IMad(a,b,c)
-  %1941 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1940, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1942 = extractvalue %dx.types.ResRet.i32 %1941, 0
-  %1943 = extractvalue %dx.types.ResRet.i32 %1941, 1
-  %1944 = call double @dx.op.makeDouble.f64(i32 101, i32 %1942, i32 %1943)  ; MakeDouble(lo,hi)
-  %1945 = fptrunc double %1944 to float
-  br label %2035
-
-; <label>:1946                                    ; preds = %1913
-  %1947 = icmp eq i32 %976, 2
-  br i1 %1947, label %1948, label %2035
-
-; <label>:1948                                    ; preds = %1946
-  %1949 = fsub fast float %22, %20
-  %1950 = fcmp fast olt float %1280, %20
-  br i1 %1950, label %1951, label %1964
-
-; <label>:1951                                    ; preds = %1948
-  %1952 = fsub fast float %20, %1280
-  %1953 = fdiv fast float %1952, %1949
-  %1954 = fptoui float %1953 to i32
-  %1955 = uitofp i32 %1954 to float
-  %1956 = fmul fast float %1955, %1949
-  %1957 = fsub fast float %1952, %1956
-  %1958 = and i32 %1954, 1
-  %1959 = icmp eq i32 %1958, 0
-  br i1 %1959, label %1960, label %1962
-
-; <label>:1960                                    ; preds = %1951
-  %1961 = fadd fast float %1957, %20
-  br label %1979
-
-; <label>:1962                                    ; preds = %1951
-  %1963 = fsub fast float %22, %1957
-  br label %1979
-
-; <label>:1964                                    ; preds = %1948
-  %1965 = fcmp fast ogt float %1280, %22
-  br i1 %1965, label %1966, label %1979
-
-; <label>:1966                                    ; preds = %1964
-  %1967 = fsub fast float %1280, %22
-  %1968 = fdiv fast float %1967, %1949
-  %1969 = fptoui float %1968 to i32
-  %1970 = uitofp i32 %1969 to float
-  %1971 = fmul fast float %1970, %1949
-  %1972 = fsub fast float %1967, %1971
-  %1973 = and i32 %1969, 1
-  %1974 = icmp eq i32 %1973, 0
-  br i1 %1974, label %1975, label %1977
-
-; <label>:1975                                    ; preds = %1966
-  %1976 = fsub fast float %22, %1972
-  br label %1979
-
-; <label>:1977                                    ; preds = %1966
-  %1978 = fadd fast float %1972, %20
-  br label %1979
-
-; <label>:1979                                    ; preds = %1977, %1975, %1964, %1962, %1960
-  %1980 = phi float [ %1961, %1960 ], [ %1963, %1962 ], [ %1976, %1975 ], [ %1978, %1977 ], [ %1280, %1964 ]
-  %1981 = fptoui float %1980 to i32
-  %1982 = fsub fast float %24, %20
-  %1983 = fcmp fast olt float %974, %20
-  br i1 %1983, label %1984, label %1997
-
-; <label>:1984                                    ; preds = %1979
-  %1985 = fsub fast float %20, %974
-  %1986 = fdiv fast float %1985, %1982
-  %1987 = fptoui float %1986 to i32
-  %1988 = uitofp i32 %1987 to float
-  %1989 = fmul fast float %1988, %1982
-  %1990 = fsub fast float %1985, %1989
-  %1991 = and i32 %1987, 1
-  %1992 = icmp eq i32 %1991, 0
-  br i1 %1992, label %1993, label %1995
-
-; <label>:1993                                    ; preds = %1984
-  %1994 = fadd fast float %1990, %20
-  br label %2012
-
-; <label>:1995                                    ; preds = %1984
-  %1996 = fsub fast float %24, %1990
-  br label %2012
-
-; <label>:1997                                    ; preds = %1979
-  %1998 = fcmp fast ogt float %974, %24
-  br i1 %1998, label %1999, label %2012
-
-; <label>:1999                                    ; preds = %1997
-  %2000 = fsub fast float %974, %24
-  %2001 = fdiv fast float %2000, %1982
-  %2002 = fptoui float %2001 to i32
-  %2003 = uitofp i32 %2002 to float
-  %2004 = fmul fast float %2003, %1982
-  %2005 = fsub fast float %2000, %2004
-  %2006 = and i32 %2002, 1
-  %2007 = icmp eq i32 %2006, 0
-  br i1 %2007, label %2008, label %2010
-
-; <label>:2008                                    ; preds = %1999
-  %2009 = fsub fast float %24, %2005
-  br label %2012
-
-; <label>:2010                                    ; preds = %1999
-  %2011 = fadd fast float %2005, %20
-  br label %2012
-
-; <label>:2012                                    ; preds = %2010, %2008, %1997, %1995, %1993
-  %2013 = phi float [ %1994, %1993 ], [ %1996, %1995 ], [ %2009, %2008 ], [ %2011, %2010 ], [ %974, %1997 ]
-  %2014 = fptoui float %2013 to i32
-  %2015 = uitofp i32 %2014 to float
-  %2016 = uitofp i32 %1981 to float
-  %2017 = fptoui float %45 to i32
-  %2018 = fptoui float %182 to i32
-  %2019 = fptoui float %2015 to i32
-  %2020 = fptoui float %2016 to i32
-  %2021 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2022 = extractvalue %dx.types.CBufRet.i32 %2021, 0
-  %2023 = extractvalue %dx.types.CBufRet.i32 %2021, 1
-  %2024 = extractvalue %dx.types.CBufRet.i32 %2021, 2
-  %2025 = extractvalue %dx.types.CBufRet.i32 %2021, 3
-  %2026 = mul i32 %2022, %2017
-  %2027 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2018, i32 %2023, i32 %2026)  ; IMad(a,b,c)
-  %2028 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2019, i32 %2024, i32 %2027)  ; IMad(a,b,c)
-  %2029 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2020, i32 %2025, i32 %2028)  ; IMad(a,b,c)
-  %2030 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2029, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2031 = extractvalue %dx.types.ResRet.i32 %2030, 0
-  %2032 = extractvalue %dx.types.ResRet.i32 %2030, 1
-  %2033 = call double @dx.op.makeDouble.f64(i32 101, i32 %2031, i32 %2032)  ; MakeDouble(lo,hi)
-  %2034 = fptrunc double %2033 to float
-  br label %2035
-
-; <label>:2035                                    ; preds = %2012, %1946, %1915, %1896, %1886
-  %2036 = phi float [ %1912, %1896 ], [ 0.000000e+00, %1886 ], [ %1945, %1915 ], [ %2034, %2012 ], [ 0.000000e+00, %1946 ]
-  br i1 %977, label %2037, label %2064
-
-; <label>:2037                                    ; preds = %2035
-  %2038 = fcmp fast oge float %1432, 0.000000e+00
-  %2039 = fptoui float %1432 to i32
-  %2040 = icmp ult i32 %2039, %13
-  %2041 = and i1 %2038, %2040
-  %2042 = fcmp fast oge float %974, 0.000000e+00
-  %2043 = and i1 %2042, %2041
-  %2044 = fptoui float %974 to i32
-  %2045 = icmp ult i32 %2044, %15
-  %2046 = and i1 %2045, %2043
-  br i1 %2046, label %2047, label %2186
-
-; <label>:2047                                    ; preds = %2037
-  %2048 = fptoui float %45 to i32
-  %2049 = fptoui float %182 to i32
-  %2050 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2051 = extractvalue %dx.types.CBufRet.i32 %2050, 0
-  %2052 = extractvalue %dx.types.CBufRet.i32 %2050, 1
-  %2053 = extractvalue %dx.types.CBufRet.i32 %2050, 2
-  %2054 = extractvalue %dx.types.CBufRet.i32 %2050, 3
-  %2055 = mul i32 %2051, %2048
-  %2056 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2049, i32 %2052, i32 %2055)  ; IMad(a,b,c)
-  %2057 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2044, i32 %2053, i32 %2056)  ; IMad(a,b,c)
-  %2058 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2039, i32 %2054, i32 %2057)  ; IMad(a,b,c)
-  %2059 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2058, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2060 = extractvalue %dx.types.ResRet.i32 %2059, 0
-  %2061 = extractvalue %dx.types.ResRet.i32 %2059, 1
-  %2062 = call double @dx.op.makeDouble.f64(i32 101, i32 %2060, i32 %2061)  ; MakeDouble(lo,hi)
-  %2063 = fptrunc double %2062 to float
-  br label %2186
-
-; <label>:2064                                    ; preds = %2035
-  %2065 = icmp eq i32 %976, 1
-  br i1 %2065, label %2066, label %2097
-
-; <label>:2066                                    ; preds = %2064
-  %2067 = add i32 %13, -1
-  %2068 = uitofp i32 %2067 to float
-  %2069 = call float @dx.op.binary.f32(i32 35, float %1432, float 0.000000e+00)  ; FMax(a,b)
-  %2070 = call float @dx.op.binary.f32(i32 36, float %2069, float %2068)  ; FMin(a,b)
-  %2071 = fptoui float %2070 to i32
-  %2072 = add i32 %15, -1
-  %2073 = uitofp i32 %2072 to float
-  %2074 = call float @dx.op.binary.f32(i32 35, float %974, float 0.000000e+00)  ; FMax(a,b)
-  %2075 = call float @dx.op.binary.f32(i32 36, float %2074, float %2073)  ; FMin(a,b)
-  %2076 = fptoui float %2075 to i32
-  %2077 = uitofp i32 %2076 to float
-  %2078 = uitofp i32 %2071 to float
-  %2079 = fptoui float %45 to i32
-  %2080 = fptoui float %182 to i32
-  %2081 = fptoui float %2077 to i32
-  %2082 = fptoui float %2078 to i32
-  %2083 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2084 = extractvalue %dx.types.CBufRet.i32 %2083, 0
-  %2085 = extractvalue %dx.types.CBufRet.i32 %2083, 1
-  %2086 = extractvalue %dx.types.CBufRet.i32 %2083, 2
-  %2087 = extractvalue %dx.types.CBufRet.i32 %2083, 3
-  %2088 = mul i32 %2084, %2079
-  %2089 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2080, i32 %2085, i32 %2088)  ; IMad(a,b,c)
-  %2090 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2081, i32 %2086, i32 %2089)  ; IMad(a,b,c)
-  %2091 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2082, i32 %2087, i32 %2090)  ; IMad(a,b,c)
-  %2092 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2091, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2093 = extractvalue %dx.types.ResRet.i32 %2092, 0
-  %2094 = extractvalue %dx.types.ResRet.i32 %2092, 1
-  %2095 = call double @dx.op.makeDouble.f64(i32 101, i32 %2093, i32 %2094)  ; MakeDouble(lo,hi)
-  %2096 = fptrunc double %2095 to float
-  br label %2186
-
-; <label>:2097                                    ; preds = %2064
-  %2098 = icmp eq i32 %976, 2
-  br i1 %2098, label %2099, label %2186
-
-; <label>:2099                                    ; preds = %2097
-  %2100 = fsub fast float %22, %20
-  %2101 = fcmp fast olt float %1432, %20
-  br i1 %2101, label %2102, label %2115
-
-; <label>:2102                                    ; preds = %2099
-  %2103 = fsub fast float %20, %1432
-  %2104 = fdiv fast float %2103, %2100
-  %2105 = fptoui float %2104 to i32
-  %2106 = uitofp i32 %2105 to float
-  %2107 = fmul fast float %2106, %2100
-  %2108 = fsub fast float %2103, %2107
-  %2109 = and i32 %2105, 1
-  %2110 = icmp eq i32 %2109, 0
-  br i1 %2110, label %2111, label %2113
-
-; <label>:2111                                    ; preds = %2102
-  %2112 = fadd fast float %2108, %20
-  br label %2130
-
-; <label>:2113                                    ; preds = %2102
-  %2114 = fsub fast float %22, %2108
-  br label %2130
-
-; <label>:2115                                    ; preds = %2099
-  %2116 = fcmp fast ogt float %1432, %22
-  br i1 %2116, label %2117, label %2130
-
-; <label>:2117                                    ; preds = %2115
-  %2118 = fsub fast float %1432, %22
-  %2119 = fdiv fast float %2118, %2100
-  %2120 = fptoui float %2119 to i32
-  %2121 = uitofp i32 %2120 to float
-  %2122 = fmul fast float %2121, %2100
-  %2123 = fsub fast float %2118, %2122
-  %2124 = and i32 %2120, 1
-  %2125 = icmp eq i32 %2124, 0
-  br i1 %2125, label %2126, label %2128
-
-; <label>:2126                                    ; preds = %2117
-  %2127 = fsub fast float %22, %2123
-  br label %2130
-
-; <label>:2128                                    ; preds = %2117
-  %2129 = fadd fast float %2123, %20
-  br label %2130
-
-; <label>:2130                                    ; preds = %2128, %2126, %2115, %2113, %2111
-  %2131 = phi float [ %2112, %2111 ], [ %2114, %2113 ], [ %2127, %2126 ], [ %2129, %2128 ], [ %1432, %2115 ]
-  %2132 = fptoui float %2131 to i32
-  %2133 = fsub fast float %24, %20
-  %2134 = fcmp fast olt float %974, %20
-  br i1 %2134, label %2135, label %2148
-
-; <label>:2135                                    ; preds = %2130
-  %2136 = fsub fast float %20, %974
-  %2137 = fdiv fast float %2136, %2133
-  %2138 = fptoui float %2137 to i32
-  %2139 = uitofp i32 %2138 to float
-  %2140 = fmul fast float %2139, %2133
-  %2141 = fsub fast float %2136, %2140
-  %2142 = and i32 %2138, 1
-  %2143 = icmp eq i32 %2142, 0
-  br i1 %2143, label %2144, label %2146
-
-; <label>:2144                                    ; preds = %2135
-  %2145 = fadd fast float %2141, %20
-  br label %2163
-
-; <label>:2146                                    ; preds = %2135
-  %2147 = fsub fast float %24, %2141
-  br label %2163
-
-; <label>:2148                                    ; preds = %2130
-  %2149 = fcmp fast ogt float %974, %24
-  br i1 %2149, label %2150, label %2163
-
-; <label>:2150                                    ; preds = %2148
-  %2151 = fsub fast float %974, %24
-  %2152 = fdiv fast float %2151, %2133
-  %2153 = fptoui float %2152 to i32
-  %2154 = uitofp i32 %2153 to float
-  %2155 = fmul fast float %2154, %2133
-  %2156 = fsub fast float %2151, %2155
-  %2157 = and i32 %2153, 1
-  %2158 = icmp eq i32 %2157, 0
-  br i1 %2158, label %2159, label %2161
-
-; <label>:2159                                    ; preds = %2150
-  %2160 = fsub fast float %24, %2156
-  br label %2163
-
-; <label>:2161                                    ; preds = %2150
-  %2162 = fadd fast float %2156, %20
-  br label %2163
-
-; <label>:2163                                    ; preds = %2161, %2159, %2148, %2146, %2144
-  %2164 = phi float [ %2145, %2144 ], [ %2147, %2146 ], [ %2160, %2159 ], [ %2162, %2161 ], [ %974, %2148 ]
-  %2165 = fptoui float %2164 to i32
-  %2166 = uitofp i32 %2165 to float
-  %2167 = uitofp i32 %2132 to float
-  %2168 = fptoui float %45 to i32
-  %2169 = fptoui float %182 to i32
-  %2170 = fptoui float %2166 to i32
-  %2171 = fptoui float %2167 to i32
-  %2172 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2173 = extractvalue %dx.types.CBufRet.i32 %2172, 0
-  %2174 = extractvalue %dx.types.CBufRet.i32 %2172, 1
-  %2175 = extractvalue %dx.types.CBufRet.i32 %2172, 2
-  %2176 = extractvalue %dx.types.CBufRet.i32 %2172, 3
-  %2177 = mul i32 %2173, %2168
-  %2178 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2169, i32 %2174, i32 %2177)  ; IMad(a,b,c)
-  %2179 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2170, i32 %2175, i32 %2178)  ; IMad(a,b,c)
-  %2180 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2171, i32 %2176, i32 %2179)  ; IMad(a,b,c)
-  %2181 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2180, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2182 = extractvalue %dx.types.ResRet.i32 %2181, 0
-  %2183 = extractvalue %dx.types.ResRet.i32 %2181, 1
-  %2184 = call double @dx.op.makeDouble.f64(i32 101, i32 %2182, i32 %2183)  ; MakeDouble(lo,hi)
-  %2185 = fptrunc double %2184 to float
-  br label %2186
-
-; <label>:2186                                    ; preds = %2163, %2097, %2066, %2047, %2037
-  %2187 = phi float [ %2063, %2047 ], [ 0.000000e+00, %2037 ], [ %2096, %2066 ], [ %2185, %2163 ], [ 0.000000e+00, %2097 ]
-  %2188 = fadd fast float %974, 1.000000e+00
-  br i1 %977, label %2189, label %2216
-
-; <label>:2189                                    ; preds = %2186
-  %2190 = fcmp fast oge float %973, 0.000000e+00
-  %2191 = fptoui float %973 to i32
-  %2192 = icmp ult i32 %2191, %13
-  %2193 = and i1 %2190, %2192
-  %2194 = fcmp fast oge float %2188, 0.000000e+00
-  %2195 = and i1 %2194, %2193
-  %2196 = fptoui float %2188 to i32
-  %2197 = icmp ult i32 %2196, %15
-  %2198 = and i1 %2197, %2195
-  br i1 %2198, label %2199, label %2338
-
-; <label>:2199                                    ; preds = %2189
-  %2200 = fptoui float %45 to i32
-  %2201 = fptoui float %182 to i32
-  %2202 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2203 = extractvalue %dx.types.CBufRet.i32 %2202, 0
-  %2204 = extractvalue %dx.types.CBufRet.i32 %2202, 1
-  %2205 = extractvalue %dx.types.CBufRet.i32 %2202, 2
-  %2206 = extractvalue %dx.types.CBufRet.i32 %2202, 3
-  %2207 = mul i32 %2203, %2200
-  %2208 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2201, i32 %2204, i32 %2207)  ; IMad(a,b,c)
-  %2209 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2196, i32 %2205, i32 %2208)  ; IMad(a,b,c)
-  %2210 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2191, i32 %2206, i32 %2209)  ; IMad(a,b,c)
-  %2211 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2210, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2212 = extractvalue %dx.types.ResRet.i32 %2211, 0
-  %2213 = extractvalue %dx.types.ResRet.i32 %2211, 1
-  %2214 = call double @dx.op.makeDouble.f64(i32 101, i32 %2212, i32 %2213)  ; MakeDouble(lo,hi)
-  %2215 = fptrunc double %2214 to float
-  br label %2338
-
-; <label>:2216                                    ; preds = %2186
-  %2217 = icmp eq i32 %976, 1
-  br i1 %2217, label %2218, label %2249
-
-; <label>:2218                                    ; preds = %2216
-  %2219 = add i32 %13, -1
-  %2220 = uitofp i32 %2219 to float
-  %2221 = call float @dx.op.binary.f32(i32 35, float %973, float 0.000000e+00)  ; FMax(a,b)
-  %2222 = call float @dx.op.binary.f32(i32 36, float %2221, float %2220)  ; FMin(a,b)
-  %2223 = fptoui float %2222 to i32
-  %2224 = add i32 %15, -1
-  %2225 = uitofp i32 %2224 to float
-  %2226 = call float @dx.op.binary.f32(i32 35, float %2188, float 0.000000e+00)  ; FMax(a,b)
-  %2227 = call float @dx.op.binary.f32(i32 36, float %2226, float %2225)  ; FMin(a,b)
-  %2228 = fptoui float %2227 to i32
-  %2229 = uitofp i32 %2228 to float
-  %2230 = uitofp i32 %2223 to float
-  %2231 = fptoui float %45 to i32
-  %2232 = fptoui float %182 to i32
-  %2233 = fptoui float %2229 to i32
-  %2234 = fptoui float %2230 to i32
-  %2235 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2236 = extractvalue %dx.types.CBufRet.i32 %2235, 0
-  %2237 = extractvalue %dx.types.CBufRet.i32 %2235, 1
-  %2238 = extractvalue %dx.types.CBufRet.i32 %2235, 2
-  %2239 = extractvalue %dx.types.CBufRet.i32 %2235, 3
-  %2240 = mul i32 %2236, %2231
-  %2241 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2232, i32 %2237, i32 %2240)  ; IMad(a,b,c)
-  %2242 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2233, i32 %2238, i32 %2241)  ; IMad(a,b,c)
-  %2243 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2234, i32 %2239, i32 %2242)  ; IMad(a,b,c)
-  %2244 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2243, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2245 = extractvalue %dx.types.ResRet.i32 %2244, 0
-  %2246 = extractvalue %dx.types.ResRet.i32 %2244, 1
-  %2247 = call double @dx.op.makeDouble.f64(i32 101, i32 %2245, i32 %2246)  ; MakeDouble(lo,hi)
-  %2248 = fptrunc double %2247 to float
-  br label %2338
-
-; <label>:2249                                    ; preds = %2216
-  %2250 = icmp eq i32 %976, 2
-  br i1 %2250, label %2251, label %2338
-
-; <label>:2251                                    ; preds = %2249
-  %2252 = fsub fast float %22, %20
-  %2253 = fcmp fast olt float %973, %20
-  br i1 %2253, label %2254, label %2267
-
-; <label>:2254                                    ; preds = %2251
-  %2255 = fsub fast float %20, %973
-  %2256 = fdiv fast float %2255, %2252
-  %2257 = fptoui float %2256 to i32
-  %2258 = uitofp i32 %2257 to float
-  %2259 = fmul fast float %2258, %2252
-  %2260 = fsub fast float %2255, %2259
-  %2261 = and i32 %2257, 1
-  %2262 = icmp eq i32 %2261, 0
-  br i1 %2262, label %2263, label %2265
-
-; <label>:2263                                    ; preds = %2254
-  %2264 = fadd fast float %2260, %20
-  br label %2282
-
-; <label>:2265                                    ; preds = %2254
-  %2266 = fsub fast float %22, %2260
-  br label %2282
-
-; <label>:2267                                    ; preds = %2251
-  %2268 = fcmp fast ogt float %973, %22
-  br i1 %2268, label %2269, label %2282
-
-; <label>:2269                                    ; preds = %2267
-  %2270 = fsub fast float %973, %22
-  %2271 = fdiv fast float %2270, %2252
-  %2272 = fptoui float %2271 to i32
-  %2273 = uitofp i32 %2272 to float
-  %2274 = fmul fast float %2273, %2252
-  %2275 = fsub fast float %2270, %2274
-  %2276 = and i32 %2272, 1
-  %2277 = icmp eq i32 %2276, 0
-  br i1 %2277, label %2278, label %2280
-
-; <label>:2278                                    ; preds = %2269
-  %2279 = fsub fast float %22, %2275
-  br label %2282
-
-; <label>:2280                                    ; preds = %2269
-  %2281 = fadd fast float %2275, %20
-  br label %2282
-
-; <label>:2282                                    ; preds = %2280, %2278, %2267, %2265, %2263
-  %2283 = phi float [ %2264, %2263 ], [ %2266, %2265 ], [ %2279, %2278 ], [ %2281, %2280 ], [ %973, %2267 ]
-  %2284 = fptoui float %2283 to i32
-  %2285 = fsub fast float %24, %20
-  %2286 = fcmp fast olt float %2188, %20
-  br i1 %2286, label %2287, label %2300
-
-; <label>:2287                                    ; preds = %2282
-  %2288 = fsub fast float %20, %2188
-  %2289 = fdiv fast float %2288, %2285
-  %2290 = fptoui float %2289 to i32
-  %2291 = uitofp i32 %2290 to float
-  %2292 = fmul fast float %2291, %2285
-  %2293 = fsub fast float %2288, %2292
-  %2294 = and i32 %2290, 1
-  %2295 = icmp eq i32 %2294, 0
-  br i1 %2295, label %2296, label %2298
-
-; <label>:2296                                    ; preds = %2287
-  %2297 = fadd fast float %2293, %20
-  br label %2315
-
-; <label>:2298                                    ; preds = %2287
-  %2299 = fsub fast float %24, %2293
-  br label %2315
-
-; <label>:2300                                    ; preds = %2282
-  %2301 = fcmp fast ogt float %2188, %24
-  br i1 %2301, label %2302, label %2315
-
-; <label>:2302                                    ; preds = %2300
-  %2303 = fsub fast float %2188, %24
-  %2304 = fdiv fast float %2303, %2285
-  %2305 = fptoui float %2304 to i32
-  %2306 = uitofp i32 %2305 to float
-  %2307 = fmul fast float %2306, %2285
-  %2308 = fsub fast float %2303, %2307
-  %2309 = and i32 %2305, 1
-  %2310 = icmp eq i32 %2309, 0
-  br i1 %2310, label %2311, label %2313
-
-; <label>:2311                                    ; preds = %2302
-  %2312 = fsub fast float %24, %2308
-  br label %2315
-
-; <label>:2313                                    ; preds = %2302
-  %2314 = fadd fast float %2308, %20
-  br label %2315
-
-; <label>:2315                                    ; preds = %2313, %2311, %2300, %2298, %2296
-  %2316 = phi float [ %2297, %2296 ], [ %2299, %2298 ], [ %2312, %2311 ], [ %2314, %2313 ], [ %2188, %2300 ]
-  %2317 = fptoui float %2316 to i32
-  %2318 = uitofp i32 %2317 to float
-  %2319 = uitofp i32 %2284 to float
-  %2320 = fptoui float %45 to i32
-  %2321 = fptoui float %182 to i32
-  %2322 = fptoui float %2318 to i32
-  %2323 = fptoui float %2319 to i32
-  %2324 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2325 = extractvalue %dx.types.CBufRet.i32 %2324, 0
-  %2326 = extractvalue %dx.types.CBufRet.i32 %2324, 1
-  %2327 = extractvalue %dx.types.CBufRet.i32 %2324, 2
-  %2328 = extractvalue %dx.types.CBufRet.i32 %2324, 3
-  %2329 = mul i32 %2325, %2320
-  %2330 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2321, i32 %2326, i32 %2329)  ; IMad(a,b,c)
-  %2331 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2322, i32 %2327, i32 %2330)  ; IMad(a,b,c)
-  %2332 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2323, i32 %2328, i32 %2331)  ; IMad(a,b,c)
-  %2333 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2332, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2334 = extractvalue %dx.types.ResRet.i32 %2333, 0
-  %2335 = extractvalue %dx.types.ResRet.i32 %2333, 1
-  %2336 = call double @dx.op.makeDouble.f64(i32 101, i32 %2334, i32 %2335)  ; MakeDouble(lo,hi)
-  %2337 = fptrunc double %2336 to float
-  br label %2338
-
-; <label>:2338                                    ; preds = %2315, %2249, %2218, %2199, %2189
-  %2339 = phi float [ %2215, %2199 ], [ 0.000000e+00, %2189 ], [ %2248, %2218 ], [ %2337, %2315 ], [ 0.000000e+00, %2249 ]
-  br i1 %977, label %2340, label %2367
-
-; <label>:2340                                    ; preds = %2338
-  %2341 = fcmp fast oge float %972, 0.000000e+00
-  %2342 = fptoui float %972 to i32
-  %2343 = icmp ult i32 %2342, %13
-  %2344 = and i1 %2341, %2343
-  %2345 = fcmp fast oge float %2188, 0.000000e+00
-  %2346 = and i1 %2345, %2344
-  %2347 = fptoui float %2188 to i32
-  %2348 = icmp ult i32 %2347, %15
-  %2349 = and i1 %2348, %2346
-  br i1 %2349, label %2350, label %2489
-
-; <label>:2350                                    ; preds = %2340
-  %2351 = fptoui float %45 to i32
-  %2352 = fptoui float %182 to i32
-  %2353 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2354 = extractvalue %dx.types.CBufRet.i32 %2353, 0
-  %2355 = extractvalue %dx.types.CBufRet.i32 %2353, 1
-  %2356 = extractvalue %dx.types.CBufRet.i32 %2353, 2
-  %2357 = extractvalue %dx.types.CBufRet.i32 %2353, 3
-  %2358 = mul i32 %2354, %2351
-  %2359 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2352, i32 %2355, i32 %2358)  ; IMad(a,b,c)
-  %2360 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2347, i32 %2356, i32 %2359)  ; IMad(a,b,c)
-  %2361 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2342, i32 %2357, i32 %2360)  ; IMad(a,b,c)
-  %2362 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2361, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2363 = extractvalue %dx.types.ResRet.i32 %2362, 0
-  %2364 = extractvalue %dx.types.ResRet.i32 %2362, 1
-  %2365 = call double @dx.op.makeDouble.f64(i32 101, i32 %2363, i32 %2364)  ; MakeDouble(lo,hi)
-  %2366 = fptrunc double %2365 to float
-  br label %2489
-
-; <label>:2367                                    ; preds = %2338
-  %2368 = icmp eq i32 %976, 1
-  br i1 %2368, label %2369, label %2400
-
-; <label>:2369                                    ; preds = %2367
-  %2370 = add i32 %13, -1
-  %2371 = uitofp i32 %2370 to float
-  %2372 = call float @dx.op.binary.f32(i32 35, float %972, float 0.000000e+00)  ; FMax(a,b)
-  %2373 = call float @dx.op.binary.f32(i32 36, float %2372, float %2371)  ; FMin(a,b)
-  %2374 = fptoui float %2373 to i32
-  %2375 = add i32 %15, -1
-  %2376 = uitofp i32 %2375 to float
-  %2377 = call float @dx.op.binary.f32(i32 35, float %2188, float 0.000000e+00)  ; FMax(a,b)
-  %2378 = call float @dx.op.binary.f32(i32 36, float %2377, float %2376)  ; FMin(a,b)
-  %2379 = fptoui float %2378 to i32
-  %2380 = uitofp i32 %2379 to float
-  %2381 = uitofp i32 %2374 to float
-  %2382 = fptoui float %45 to i32
-  %2383 = fptoui float %182 to i32
-  %2384 = fptoui float %2380 to i32
-  %2385 = fptoui float %2381 to i32
-  %2386 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2387 = extractvalue %dx.types.CBufRet.i32 %2386, 0
-  %2388 = extractvalue %dx.types.CBufRet.i32 %2386, 1
-  %2389 = extractvalue %dx.types.CBufRet.i32 %2386, 2
-  %2390 = extractvalue %dx.types.CBufRet.i32 %2386, 3
-  %2391 = mul i32 %2387, %2382
-  %2392 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2383, i32 %2388, i32 %2391)  ; IMad(a,b,c)
-  %2393 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2384, i32 %2389, i32 %2392)  ; IMad(a,b,c)
-  %2394 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2385, i32 %2390, i32 %2393)  ; IMad(a,b,c)
-  %2395 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2394, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2396 = extractvalue %dx.types.ResRet.i32 %2395, 0
-  %2397 = extractvalue %dx.types.ResRet.i32 %2395, 1
-  %2398 = call double @dx.op.makeDouble.f64(i32 101, i32 %2396, i32 %2397)  ; MakeDouble(lo,hi)
-  %2399 = fptrunc double %2398 to float
-  br label %2489
-
-; <label>:2400                                    ; preds = %2367
-  %2401 = icmp eq i32 %976, 2
-  br i1 %2401, label %2402, label %2489
-
-; <label>:2402                                    ; preds = %2400
-  %2403 = fsub fast float %22, %20
-  %2404 = fcmp fast olt float %972, %20
-  br i1 %2404, label %2405, label %2418
-
-; <label>:2405                                    ; preds = %2402
-  %2406 = fsub fast float %20, %972
-  %2407 = fdiv fast float %2406, %2403
-  %2408 = fptoui float %2407 to i32
-  %2409 = uitofp i32 %2408 to float
-  %2410 = fmul fast float %2409, %2403
-  %2411 = fsub fast float %2406, %2410
-  %2412 = and i32 %2408, 1
-  %2413 = icmp eq i32 %2412, 0
-  br i1 %2413, label %2414, label %2416
-
-; <label>:2414                                    ; preds = %2405
-  %2415 = fadd fast float %2411, %20
-  br label %2433
-
-; <label>:2416                                    ; preds = %2405
-  %2417 = fsub fast float %22, %2411
-  br label %2433
-
-; <label>:2418                                    ; preds = %2402
-  %2419 = fcmp fast ogt float %972, %22
-  br i1 %2419, label %2420, label %2433
-
-; <label>:2420                                    ; preds = %2418
-  %2421 = fsub fast float %972, %22
-  %2422 = fdiv fast float %2421, %2403
-  %2423 = fptoui float %2422 to i32
-  %2424 = uitofp i32 %2423 to float
-  %2425 = fmul fast float %2424, %2403
-  %2426 = fsub fast float %2421, %2425
-  %2427 = and i32 %2423, 1
-  %2428 = icmp eq i32 %2427, 0
-  br i1 %2428, label %2429, label %2431
-
-; <label>:2429                                    ; preds = %2420
-  %2430 = fsub fast float %22, %2426
-  br label %2433
-
-; <label>:2431                                    ; preds = %2420
-  %2432 = fadd fast float %2426, %20
-  br label %2433
-
-; <label>:2433                                    ; preds = %2431, %2429, %2418, %2416, %2414
-  %2434 = phi float [ %2415, %2414 ], [ %2417, %2416 ], [ %2430, %2429 ], [ %2432, %2431 ], [ %972, %2418 ]
-  %2435 = fptoui float %2434 to i32
-  %2436 = fsub fast float %24, %20
-  %2437 = fcmp fast olt float %2188, %20
-  br i1 %2437, label %2438, label %2451
-
-; <label>:2438                                    ; preds = %2433
-  %2439 = fsub fast float %20, %2188
-  %2440 = fdiv fast float %2439, %2436
-  %2441 = fptoui float %2440 to i32
-  %2442 = uitofp i32 %2441 to float
-  %2443 = fmul fast float %2442, %2436
-  %2444 = fsub fast float %2439, %2443
-  %2445 = and i32 %2441, 1
-  %2446 = icmp eq i32 %2445, 0
-  br i1 %2446, label %2447, label %2449
-
-; <label>:2447                                    ; preds = %2438
-  %2448 = fadd fast float %2444, %20
-  br label %2466
-
-; <label>:2449                                    ; preds = %2438
-  %2450 = fsub fast float %24, %2444
-  br label %2466
-
-; <label>:2451                                    ; preds = %2433
-  %2452 = fcmp fast ogt float %2188, %24
-  br i1 %2452, label %2453, label %2466
-
-; <label>:2453                                    ; preds = %2451
-  %2454 = fsub fast float %2188, %24
-  %2455 = fdiv fast float %2454, %2436
-  %2456 = fptoui float %2455 to i32
-  %2457 = uitofp i32 %2456 to float
-  %2458 = fmul fast float %2457, %2436
-  %2459 = fsub fast float %2454, %2458
-  %2460 = and i32 %2456, 1
-  %2461 = icmp eq i32 %2460, 0
-  br i1 %2461, label %2462, label %2464
-
-; <label>:2462                                    ; preds = %2453
-  %2463 = fsub fast float %24, %2459
-  br label %2466
-
-; <label>:2464                                    ; preds = %2453
-  %2465 = fadd fast float %2459, %20
-  br label %2466
-
-; <label>:2466                                    ; preds = %2464, %2462, %2451, %2449, %2447
-  %2467 = phi float [ %2448, %2447 ], [ %2450, %2449 ], [ %2463, %2462 ], [ %2465, %2464 ], [ %2188, %2451 ]
-  %2468 = fptoui float %2467 to i32
-  %2469 = uitofp i32 %2468 to float
-  %2470 = uitofp i32 %2435 to float
-  %2471 = fptoui float %45 to i32
-  %2472 = fptoui float %182 to i32
-  %2473 = fptoui float %2469 to i32
-  %2474 = fptoui float %2470 to i32
-  %2475 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2476 = extractvalue %dx.types.CBufRet.i32 %2475, 0
-  %2477 = extractvalue %dx.types.CBufRet.i32 %2475, 1
-  %2478 = extractvalue %dx.types.CBufRet.i32 %2475, 2
-  %2479 = extractvalue %dx.types.CBufRet.i32 %2475, 3
-  %2480 = mul i32 %2476, %2471
-  %2481 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2472, i32 %2477, i32 %2480)  ; IMad(a,b,c)
-  %2482 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2473, i32 %2478, i32 %2481)  ; IMad(a,b,c)
-  %2483 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2474, i32 %2479, i32 %2482)  ; IMad(a,b,c)
-  %2484 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2483, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2485 = extractvalue %dx.types.ResRet.i32 %2484, 0
-  %2486 = extractvalue %dx.types.ResRet.i32 %2484, 1
-  %2487 = call double @dx.op.makeDouble.f64(i32 101, i32 %2485, i32 %2486)  ; MakeDouble(lo,hi)
-  %2488 = fptrunc double %2487 to float
-  br label %2489
-
-; <label>:2489                                    ; preds = %2466, %2400, %2369, %2350, %2340
-  %2490 = phi float [ %2366, %2350 ], [ 0.000000e+00, %2340 ], [ %2399, %2369 ], [ %2488, %2466 ], [ 0.000000e+00, %2400 ]
-  br i1 %977, label %2491, label %2518
-
-; <label>:2491                                    ; preds = %2489
-  %2492 = fcmp fast oge float %1280, 0.000000e+00
-  %2493 = fptoui float %1280 to i32
-  %2494 = icmp ult i32 %2493, %13
-  %2495 = and i1 %2492, %2494
-  %2496 = fcmp fast oge float %2188, 0.000000e+00
-  %2497 = and i1 %2496, %2495
-  %2498 = fptoui float %2188 to i32
-  %2499 = icmp ult i32 %2498, %15
-  %2500 = and i1 %2499, %2497
-  br i1 %2500, label %2501, label %2640
-
-; <label>:2501                                    ; preds = %2491
-  %2502 = fptoui float %45 to i32
-  %2503 = fptoui float %182 to i32
-  %2504 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2505 = extractvalue %dx.types.CBufRet.i32 %2504, 0
-  %2506 = extractvalue %dx.types.CBufRet.i32 %2504, 1
-  %2507 = extractvalue %dx.types.CBufRet.i32 %2504, 2
-  %2508 = extractvalue %dx.types.CBufRet.i32 %2504, 3
-  %2509 = mul i32 %2505, %2502
-  %2510 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2503, i32 %2506, i32 %2509)  ; IMad(a,b,c)
-  %2511 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2498, i32 %2507, i32 %2510)  ; IMad(a,b,c)
-  %2512 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2493, i32 %2508, i32 %2511)  ; IMad(a,b,c)
-  %2513 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2512, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2514 = extractvalue %dx.types.ResRet.i32 %2513, 0
-  %2515 = extractvalue %dx.types.ResRet.i32 %2513, 1
-  %2516 = call double @dx.op.makeDouble.f64(i32 101, i32 %2514, i32 %2515)  ; MakeDouble(lo,hi)
-  %2517 = fptrunc double %2516 to float
-  br label %2640
-
-; <label>:2518                                    ; preds = %2489
-  %2519 = icmp eq i32 %976, 1
-  br i1 %2519, label %2520, label %2551
-
-; <label>:2520                                    ; preds = %2518
-  %2521 = add i32 %13, -1
-  %2522 = uitofp i32 %2521 to float
-  %2523 = call float @dx.op.binary.f32(i32 35, float %1280, float 0.000000e+00)  ; FMax(a,b)
-  %2524 = call float @dx.op.binary.f32(i32 36, float %2523, float %2522)  ; FMin(a,b)
-  %2525 = fptoui float %2524 to i32
-  %2526 = add i32 %15, -1
-  %2527 = uitofp i32 %2526 to float
-  %2528 = call float @dx.op.binary.f32(i32 35, float %2188, float 0.000000e+00)  ; FMax(a,b)
-  %2529 = call float @dx.op.binary.f32(i32 36, float %2528, float %2527)  ; FMin(a,b)
-  %2530 = fptoui float %2529 to i32
-  %2531 = uitofp i32 %2530 to float
-  %2532 = uitofp i32 %2525 to float
-  %2533 = fptoui float %45 to i32
-  %2534 = fptoui float %182 to i32
-  %2535 = fptoui float %2531 to i32
-  %2536 = fptoui float %2532 to i32
-  %2537 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2538 = extractvalue %dx.types.CBufRet.i32 %2537, 0
-  %2539 = extractvalue %dx.types.CBufRet.i32 %2537, 1
-  %2540 = extractvalue %dx.types.CBufRet.i32 %2537, 2
-  %2541 = extractvalue %dx.types.CBufRet.i32 %2537, 3
-  %2542 = mul i32 %2538, %2533
-  %2543 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2534, i32 %2539, i32 %2542)  ; IMad(a,b,c)
-  %2544 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2535, i32 %2540, i32 %2543)  ; IMad(a,b,c)
-  %2545 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2536, i32 %2541, i32 %2544)  ; IMad(a,b,c)
-  %2546 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2545, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2547 = extractvalue %dx.types.ResRet.i32 %2546, 0
-  %2548 = extractvalue %dx.types.ResRet.i32 %2546, 1
-  %2549 = call double @dx.op.makeDouble.f64(i32 101, i32 %2547, i32 %2548)  ; MakeDouble(lo,hi)
-  %2550 = fptrunc double %2549 to float
-  br label %2640
-
-; <label>:2551                                    ; preds = %2518
-  %2552 = icmp eq i32 %976, 2
-  br i1 %2552, label %2553, label %2640
-
-; <label>:2553                                    ; preds = %2551
-  %2554 = fsub fast float %22, %20
-  %2555 = fcmp fast olt float %1280, %20
-  br i1 %2555, label %2556, label %2569
-
-; <label>:2556                                    ; preds = %2553
-  %2557 = fsub fast float %20, %1280
-  %2558 = fdiv fast float %2557, %2554
-  %2559 = fptoui float %2558 to i32
-  %2560 = uitofp i32 %2559 to float
-  %2561 = fmul fast float %2560, %2554
-  %2562 = fsub fast float %2557, %2561
-  %2563 = and i32 %2559, 1
-  %2564 = icmp eq i32 %2563, 0
-  br i1 %2564, label %2565, label %2567
-
-; <label>:2565                                    ; preds = %2556
-  %2566 = fadd fast float %2562, %20
-  br label %2584
-
-; <label>:2567                                    ; preds = %2556
-  %2568 = fsub fast float %22, %2562
-  br label %2584
-
-; <label>:2569                                    ; preds = %2553
-  %2570 = fcmp fast ogt float %1280, %22
-  br i1 %2570, label %2571, label %2584
-
-; <label>:2571                                    ; preds = %2569
-  %2572 = fsub fast float %1280, %22
-  %2573 = fdiv fast float %2572, %2554
-  %2574 = fptoui float %2573 to i32
-  %2575 = uitofp i32 %2574 to float
-  %2576 = fmul fast float %2575, %2554
-  %2577 = fsub fast float %2572, %2576
-  %2578 = and i32 %2574, 1
-  %2579 = icmp eq i32 %2578, 0
-  br i1 %2579, label %2580, label %2582
-
-; <label>:2580                                    ; preds = %2571
-  %2581 = fsub fast float %22, %2577
-  br label %2584
-
-; <label>:2582                                    ; preds = %2571
-  %2583 = fadd fast float %2577, %20
-  br label %2584
-
-; <label>:2584                                    ; preds = %2582, %2580, %2569, %2567, %2565
-  %2585 = phi float [ %2566, %2565 ], [ %2568, %2567 ], [ %2581, %2580 ], [ %2583, %2582 ], [ %1280, %2569 ]
-  %2586 = fptoui float %2585 to i32
-  %2587 = fsub fast float %24, %20
-  %2588 = fcmp fast olt float %2188, %20
-  br i1 %2588, label %2589, label %2602
-
-; <label>:2589                                    ; preds = %2584
-  %2590 = fsub fast float %20, %2188
-  %2591 = fdiv fast float %2590, %2587
-  %2592 = fptoui float %2591 to i32
-  %2593 = uitofp i32 %2592 to float
-  %2594 = fmul fast float %2593, %2587
-  %2595 = fsub fast float %2590, %2594
-  %2596 = and i32 %2592, 1
-  %2597 = icmp eq i32 %2596, 0
-  br i1 %2597, label %2598, label %2600
-
-; <label>:2598                                    ; preds = %2589
-  %2599 = fadd fast float %2595, %20
-  br label %2617
-
-; <label>:2600                                    ; preds = %2589
-  %2601 = fsub fast float %24, %2595
-  br label %2617
-
-; <label>:2602                                    ; preds = %2584
-  %2603 = fcmp fast ogt float %2188, %24
-  br i1 %2603, label %2604, label %2617
-
-; <label>:2604                                    ; preds = %2602
-  %2605 = fsub fast float %2188, %24
-  %2606 = fdiv fast float %2605, %2587
-  %2607 = fptoui float %2606 to i32
-  %2608 = uitofp i32 %2607 to float
-  %2609 = fmul fast float %2608, %2587
-  %2610 = fsub fast float %2605, %2609
-  %2611 = and i32 %2607, 1
-  %2612 = icmp eq i32 %2611, 0
-  br i1 %2612, label %2613, label %2615
-
-; <label>:2613                                    ; preds = %2604
-  %2614 = fsub fast float %24, %2610
-  br label %2617
-
-; <label>:2615                                    ; preds = %2604
-  %2616 = fadd fast float %2610, %20
-  br label %2617
-
-; <label>:2617                                    ; preds = %2615, %2613, %2602, %2600, %2598
-  %2618 = phi float [ %2599, %2598 ], [ %2601, %2600 ], [ %2614, %2613 ], [ %2616, %2615 ], [ %2188, %2602 ]
-  %2619 = fptoui float %2618 to i32
-  %2620 = uitofp i32 %2619 to float
-  %2621 = uitofp i32 %2586 to float
-  %2622 = fptoui float %45 to i32
-  %2623 = fptoui float %182 to i32
-  %2624 = fptoui float %2620 to i32
-  %2625 = fptoui float %2621 to i32
-  %2626 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2627 = extractvalue %dx.types.CBufRet.i32 %2626, 0
-  %2628 = extractvalue %dx.types.CBufRet.i32 %2626, 1
-  %2629 = extractvalue %dx.types.CBufRet.i32 %2626, 2
-  %2630 = extractvalue %dx.types.CBufRet.i32 %2626, 3
-  %2631 = mul i32 %2627, %2622
-  %2632 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2623, i32 %2628, i32 %2631)  ; IMad(a,b,c)
-  %2633 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2624, i32 %2629, i32 %2632)  ; IMad(a,b,c)
-  %2634 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2625, i32 %2630, i32 %2633)  ; IMad(a,b,c)
-  %2635 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2634, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2636 = extractvalue %dx.types.ResRet.i32 %2635, 0
-  %2637 = extractvalue %dx.types.ResRet.i32 %2635, 1
-  %2638 = call double @dx.op.makeDouble.f64(i32 101, i32 %2636, i32 %2637)  ; MakeDouble(lo,hi)
-  %2639 = fptrunc double %2638 to float
-  br label %2640
-
-; <label>:2640                                    ; preds = %2617, %2551, %2520, %2501, %2491
-  %2641 = phi float [ %2517, %2501 ], [ 0.000000e+00, %2491 ], [ %2550, %2520 ], [ %2639, %2617 ], [ 0.000000e+00, %2551 ]
-  br i1 %977, label %2642, label %2669
-
-; <label>:2642                                    ; preds = %2640
-  %2643 = fcmp fast oge float %1432, 0.000000e+00
-  %2644 = fptoui float %1432 to i32
-  %2645 = icmp ult i32 %2644, %13
-  %2646 = and i1 %2643, %2645
-  %2647 = fcmp fast oge float %2188, 0.000000e+00
-  %2648 = and i1 %2647, %2646
-  %2649 = fptoui float %2188 to i32
-  %2650 = icmp ult i32 %2649, %15
-  %2651 = and i1 %2650, %2648
-  br i1 %2651, label %2652, label %2791
-
-; <label>:2652                                    ; preds = %2642
-  %2653 = fptoui float %45 to i32
-  %2654 = fptoui float %182 to i32
-  %2655 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2656 = extractvalue %dx.types.CBufRet.i32 %2655, 0
-  %2657 = extractvalue %dx.types.CBufRet.i32 %2655, 1
-  %2658 = extractvalue %dx.types.CBufRet.i32 %2655, 2
-  %2659 = extractvalue %dx.types.CBufRet.i32 %2655, 3
-  %2660 = mul i32 %2656, %2653
-  %2661 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2654, i32 %2657, i32 %2660)  ; IMad(a,b,c)
-  %2662 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2649, i32 %2658, i32 %2661)  ; IMad(a,b,c)
-  %2663 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2644, i32 %2659, i32 %2662)  ; IMad(a,b,c)
-  %2664 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2663, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2665 = extractvalue %dx.types.ResRet.i32 %2664, 0
-  %2666 = extractvalue %dx.types.ResRet.i32 %2664, 1
-  %2667 = call double @dx.op.makeDouble.f64(i32 101, i32 %2665, i32 %2666)  ; MakeDouble(lo,hi)
-  %2668 = fptrunc double %2667 to float
-  br label %2791
-
-; <label>:2669                                    ; preds = %2640
-  %2670 = icmp eq i32 %976, 1
-  br i1 %2670, label %2671, label %2702
-
-; <label>:2671                                    ; preds = %2669
-  %2672 = add i32 %13, -1
-  %2673 = uitofp i32 %2672 to float
-  %2674 = call float @dx.op.binary.f32(i32 35, float %1432, float 0.000000e+00)  ; FMax(a,b)
-  %2675 = call float @dx.op.binary.f32(i32 36, float %2674, float %2673)  ; FMin(a,b)
-  %2676 = fptoui float %2675 to i32
-  %2677 = add i32 %15, -1
-  %2678 = uitofp i32 %2677 to float
-  %2679 = call float @dx.op.binary.f32(i32 35, float %2188, float 0.000000e+00)  ; FMax(a,b)
-  %2680 = call float @dx.op.binary.f32(i32 36, float %2679, float %2678)  ; FMin(a,b)
-  %2681 = fptoui float %2680 to i32
-  %2682 = uitofp i32 %2681 to float
-  %2683 = uitofp i32 %2676 to float
-  %2684 = fptoui float %45 to i32
-  %2685 = fptoui float %182 to i32
-  %2686 = fptoui float %2682 to i32
-  %2687 = fptoui float %2683 to i32
-  %2688 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2689 = extractvalue %dx.types.CBufRet.i32 %2688, 0
-  %2690 = extractvalue %dx.types.CBufRet.i32 %2688, 1
-  %2691 = extractvalue %dx.types.CBufRet.i32 %2688, 2
-  %2692 = extractvalue %dx.types.CBufRet.i32 %2688, 3
-  %2693 = mul i32 %2689, %2684
-  %2694 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2685, i32 %2690, i32 %2693)  ; IMad(a,b,c)
-  %2695 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2686, i32 %2691, i32 %2694)  ; IMad(a,b,c)
-  %2696 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2687, i32 %2692, i32 %2695)  ; IMad(a,b,c)
-  %2697 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2696, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2698 = extractvalue %dx.types.ResRet.i32 %2697, 0
-  %2699 = extractvalue %dx.types.ResRet.i32 %2697, 1
-  %2700 = call double @dx.op.makeDouble.f64(i32 101, i32 %2698, i32 %2699)  ; MakeDouble(lo,hi)
-  %2701 = fptrunc double %2700 to float
-  br label %2791
-
-; <label>:2702                                    ; preds = %2669
-  %2703 = icmp eq i32 %976, 2
-  br i1 %2703, label %2704, label %2791
-
-; <label>:2704                                    ; preds = %2702
-  %2705 = fsub fast float %22, %20
-  %2706 = fcmp fast olt float %1432, %20
-  br i1 %2706, label %2707, label %2720
-
-; <label>:2707                                    ; preds = %2704
-  %2708 = fsub fast float %20, %1432
-  %2709 = fdiv fast float %2708, %2705
-  %2710 = fptoui float %2709 to i32
-  %2711 = uitofp i32 %2710 to float
-  %2712 = fmul fast float %2711, %2705
-  %2713 = fsub fast float %2708, %2712
-  %2714 = and i32 %2710, 1
-  %2715 = icmp eq i32 %2714, 0
-  br i1 %2715, label %2716, label %2718
-
-; <label>:2716                                    ; preds = %2707
-  %2717 = fadd fast float %2713, %20
-  br label %2735
-
-; <label>:2718                                    ; preds = %2707
-  %2719 = fsub fast float %22, %2713
-  br label %2735
-
-; <label>:2720                                    ; preds = %2704
-  %2721 = fcmp fast ogt float %1432, %22
-  br i1 %2721, label %2722, label %2735
-
-; <label>:2722                                    ; preds = %2720
-  %2723 = fsub fast float %1432, %22
-  %2724 = fdiv fast float %2723, %2705
-  %2725 = fptoui float %2724 to i32
-  %2726 = uitofp i32 %2725 to float
-  %2727 = fmul fast float %2726, %2705
-  %2728 = fsub fast float %2723, %2727
-  %2729 = and i32 %2725, 1
-  %2730 = icmp eq i32 %2729, 0
-  br i1 %2730, label %2731, label %2733
-
-; <label>:2731                                    ; preds = %2722
-  %2732 = fsub fast float %22, %2728
-  br label %2735
-
-; <label>:2733                                    ; preds = %2722
-  %2734 = fadd fast float %2728, %20
-  br label %2735
-
-; <label>:2735                                    ; preds = %2733, %2731, %2720, %2718, %2716
-  %2736 = phi float [ %2717, %2716 ], [ %2719, %2718 ], [ %2732, %2731 ], [ %2734, %2733 ], [ %1432, %2720 ]
-  %2737 = fptoui float %2736 to i32
-  %2738 = fsub fast float %24, %20
-  %2739 = fcmp fast olt float %2188, %20
-  br i1 %2739, label %2740, label %2753
-
-; <label>:2740                                    ; preds = %2735
-  %2741 = fsub fast float %20, %2188
-  %2742 = fdiv fast float %2741, %2738
-  %2743 = fptoui float %2742 to i32
-  %2744 = uitofp i32 %2743 to float
-  %2745 = fmul fast float %2744, %2738
-  %2746 = fsub fast float %2741, %2745
-  %2747 = and i32 %2743, 1
-  %2748 = icmp eq i32 %2747, 0
-  br i1 %2748, label %2749, label %2751
-
-; <label>:2749                                    ; preds = %2740
-  %2750 = fadd fast float %2746, %20
-  br label %2768
-
-; <label>:2751                                    ; preds = %2740
-  %2752 = fsub fast float %24, %2746
-  br label %2768
-
-; <label>:2753                                    ; preds = %2735
-  %2754 = fcmp fast ogt float %2188, %24
-  br i1 %2754, label %2755, label %2768
-
-; <label>:2755                                    ; preds = %2753
-  %2756 = fsub fast float %2188, %24
-  %2757 = fdiv fast float %2756, %2738
-  %2758 = fptoui float %2757 to i32
-  %2759 = uitofp i32 %2758 to float
-  %2760 = fmul fast float %2759, %2738
-  %2761 = fsub fast float %2756, %2760
-  %2762 = and i32 %2758, 1
-  %2763 = icmp eq i32 %2762, 0
-  br i1 %2763, label %2764, label %2766
-
-; <label>:2764                                    ; preds = %2755
-  %2765 = fsub fast float %24, %2761
-  br label %2768
-
-; <label>:2766                                    ; preds = %2755
-  %2767 = fadd fast float %2761, %20
-  br label %2768
-
-; <label>:2768                                    ; preds = %2766, %2764, %2753, %2751, %2749
-  %2769 = phi float [ %2750, %2749 ], [ %2752, %2751 ], [ %2765, %2764 ], [ %2767, %2766 ], [ %2188, %2753 ]
-  %2770 = fptoui float %2769 to i32
-  %2771 = uitofp i32 %2770 to float
-  %2772 = uitofp i32 %2737 to float
-  %2773 = fptoui float %45 to i32
-  %2774 = fptoui float %182 to i32
-  %2775 = fptoui float %2771 to i32
-  %2776 = fptoui float %2772 to i32
-  %2777 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2778 = extractvalue %dx.types.CBufRet.i32 %2777, 0
-  %2779 = extractvalue %dx.types.CBufRet.i32 %2777, 1
-  %2780 = extractvalue %dx.types.CBufRet.i32 %2777, 2
-  %2781 = extractvalue %dx.types.CBufRet.i32 %2777, 3
-  %2782 = mul i32 %2778, %2773
-  %2783 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2774, i32 %2779, i32 %2782)  ; IMad(a,b,c)
-  %2784 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2775, i32 %2780, i32 %2783)  ; IMad(a,b,c)
-  %2785 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2776, i32 %2781, i32 %2784)  ; IMad(a,b,c)
-  %2786 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2785, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2787 = extractvalue %dx.types.ResRet.i32 %2786, 0
-  %2788 = extractvalue %dx.types.ResRet.i32 %2786, 1
-  %2789 = call double @dx.op.makeDouble.f64(i32 101, i32 %2787, i32 %2788)  ; MakeDouble(lo,hi)
-  %2790 = fptrunc double %2789 to float
-  br label %2791
-
-; <label>:2791                                    ; preds = %2768, %2702, %2671, %2652, %2642
-  %2792 = phi float [ %2668, %2652 ], [ 0.000000e+00, %2642 ], [ %2701, %2671 ], [ %2790, %2768 ], [ 0.000000e+00, %2702 ]
-  %2793 = fadd fast float %974, 2.000000e+00
-  br i1 %977, label %2794, label %2821
-
-; <label>:2794                                    ; preds = %2791
-  %2795 = fcmp fast oge float %973, 0.000000e+00
-  %2796 = fptoui float %973 to i32
-  %2797 = icmp ult i32 %2796, %13
-  %2798 = and i1 %2795, %2797
-  %2799 = fcmp fast oge float %2793, 0.000000e+00
-  %2800 = and i1 %2799, %2798
-  %2801 = fptoui float %2793 to i32
-  %2802 = icmp ult i32 %2801, %15
-  %2803 = and i1 %2802, %2800
-  br i1 %2803, label %2804, label %2943
-
-; <label>:2804                                    ; preds = %2794
-  %2805 = fptoui float %45 to i32
-  %2806 = fptoui float %182 to i32
-  %2807 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2808 = extractvalue %dx.types.CBufRet.i32 %2807, 0
-  %2809 = extractvalue %dx.types.CBufRet.i32 %2807, 1
-  %2810 = extractvalue %dx.types.CBufRet.i32 %2807, 2
-  %2811 = extractvalue %dx.types.CBufRet.i32 %2807, 3
-  %2812 = mul i32 %2808, %2805
-  %2813 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2806, i32 %2809, i32 %2812)  ; IMad(a,b,c)
-  %2814 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2801, i32 %2810, i32 %2813)  ; IMad(a,b,c)
-  %2815 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2796, i32 %2811, i32 %2814)  ; IMad(a,b,c)
-  %2816 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2815, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2817 = extractvalue %dx.types.ResRet.i32 %2816, 0
-  %2818 = extractvalue %dx.types.ResRet.i32 %2816, 1
-  %2819 = call double @dx.op.makeDouble.f64(i32 101, i32 %2817, i32 %2818)  ; MakeDouble(lo,hi)
-  %2820 = fptrunc double %2819 to float
-  br label %2943
-
-; <label>:2821                                    ; preds = %2791
-  %2822 = icmp eq i32 %976, 1
-  br i1 %2822, label %2823, label %2854
-
-; <label>:2823                                    ; preds = %2821
-  %2824 = add i32 %13, -1
-  %2825 = uitofp i32 %2824 to float
-  %2826 = call float @dx.op.binary.f32(i32 35, float %973, float 0.000000e+00)  ; FMax(a,b)
-  %2827 = call float @dx.op.binary.f32(i32 36, float %2826, float %2825)  ; FMin(a,b)
-  %2828 = fptoui float %2827 to i32
-  %2829 = add i32 %15, -1
-  %2830 = uitofp i32 %2829 to float
-  %2831 = call float @dx.op.binary.f32(i32 35, float %2793, float 0.000000e+00)  ; FMax(a,b)
-  %2832 = call float @dx.op.binary.f32(i32 36, float %2831, float %2830)  ; FMin(a,b)
-  %2833 = fptoui float %2832 to i32
-  %2834 = uitofp i32 %2833 to float
-  %2835 = uitofp i32 %2828 to float
-  %2836 = fptoui float %45 to i32
-  %2837 = fptoui float %182 to i32
-  %2838 = fptoui float %2834 to i32
-  %2839 = fptoui float %2835 to i32
-  %2840 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2841 = extractvalue %dx.types.CBufRet.i32 %2840, 0
-  %2842 = extractvalue %dx.types.CBufRet.i32 %2840, 1
-  %2843 = extractvalue %dx.types.CBufRet.i32 %2840, 2
-  %2844 = extractvalue %dx.types.CBufRet.i32 %2840, 3
-  %2845 = mul i32 %2841, %2836
-  %2846 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2837, i32 %2842, i32 %2845)  ; IMad(a,b,c)
-  %2847 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2838, i32 %2843, i32 %2846)  ; IMad(a,b,c)
-  %2848 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2839, i32 %2844, i32 %2847)  ; IMad(a,b,c)
-  %2849 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2848, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2850 = extractvalue %dx.types.ResRet.i32 %2849, 0
-  %2851 = extractvalue %dx.types.ResRet.i32 %2849, 1
-  %2852 = call double @dx.op.makeDouble.f64(i32 101, i32 %2850, i32 %2851)  ; MakeDouble(lo,hi)
-  %2853 = fptrunc double %2852 to float
-  br label %2943
-
-; <label>:2854                                    ; preds = %2821
-  %2855 = icmp eq i32 %976, 2
-  br i1 %2855, label %2856, label %2943
-
-; <label>:2856                                    ; preds = %2854
-  %2857 = fsub fast float %22, %20
-  %2858 = fcmp fast olt float %973, %20
-  br i1 %2858, label %2859, label %2872
-
-; <label>:2859                                    ; preds = %2856
-  %2860 = fsub fast float %20, %973
-  %2861 = fdiv fast float %2860, %2857
-  %2862 = fptoui float %2861 to i32
-  %2863 = uitofp i32 %2862 to float
-  %2864 = fmul fast float %2863, %2857
-  %2865 = fsub fast float %2860, %2864
-  %2866 = and i32 %2862, 1
-  %2867 = icmp eq i32 %2866, 0
-  br i1 %2867, label %2868, label %2870
-
-; <label>:2868                                    ; preds = %2859
-  %2869 = fadd fast float %2865, %20
-  br label %2887
-
-; <label>:2870                                    ; preds = %2859
-  %2871 = fsub fast float %22, %2865
-  br label %2887
-
-; <label>:2872                                    ; preds = %2856
-  %2873 = fcmp fast ogt float %973, %22
-  br i1 %2873, label %2874, label %2887
-
-; <label>:2874                                    ; preds = %2872
-  %2875 = fsub fast float %973, %22
-  %2876 = fdiv fast float %2875, %2857
-  %2877 = fptoui float %2876 to i32
-  %2878 = uitofp i32 %2877 to float
-  %2879 = fmul fast float %2878, %2857
-  %2880 = fsub fast float %2875, %2879
-  %2881 = and i32 %2877, 1
-  %2882 = icmp eq i32 %2881, 0
-  br i1 %2882, label %2883, label %2885
-
-; <label>:2883                                    ; preds = %2874
-  %2884 = fsub fast float %22, %2880
-  br label %2887
-
-; <label>:2885                                    ; preds = %2874
-  %2886 = fadd fast float %2880, %20
-  br label %2887
-
-; <label>:2887                                    ; preds = %2885, %2883, %2872, %2870, %2868
-  %2888 = phi float [ %2869, %2868 ], [ %2871, %2870 ], [ %2884, %2883 ], [ %2886, %2885 ], [ %973, %2872 ]
-  %2889 = fptoui float %2888 to i32
-  %2890 = fsub fast float %24, %20
-  %2891 = fcmp fast olt float %2793, %20
-  br i1 %2891, label %2892, label %2905
-
-; <label>:2892                                    ; preds = %2887
-  %2893 = fsub fast float %20, %2793
-  %2894 = fdiv fast float %2893, %2890
-  %2895 = fptoui float %2894 to i32
-  %2896 = uitofp i32 %2895 to float
-  %2897 = fmul fast float %2896, %2890
-  %2898 = fsub fast float %2893, %2897
-  %2899 = and i32 %2895, 1
-  %2900 = icmp eq i32 %2899, 0
-  br i1 %2900, label %2901, label %2903
-
-; <label>:2901                                    ; preds = %2892
-  %2902 = fadd fast float %2898, %20
-  br label %2920
-
-; <label>:2903                                    ; preds = %2892
-  %2904 = fsub fast float %24, %2898
-  br label %2920
-
-; <label>:2905                                    ; preds = %2887
-  %2906 = fcmp fast ogt float %2793, %24
-  br i1 %2906, label %2907, label %2920
-
-; <label>:2907                                    ; preds = %2905
-  %2908 = fsub fast float %2793, %24
-  %2909 = fdiv fast float %2908, %2890
-  %2910 = fptoui float %2909 to i32
-  %2911 = uitofp i32 %2910 to float
-  %2912 = fmul fast float %2911, %2890
-  %2913 = fsub fast float %2908, %2912
-  %2914 = and i32 %2910, 1
-  %2915 = icmp eq i32 %2914, 0
-  br i1 %2915, label %2916, label %2918
-
-; <label>:2916                                    ; preds = %2907
-  %2917 = fsub fast float %24, %2913
-  br label %2920
-
-; <label>:2918                                    ; preds = %2907
-  %2919 = fadd fast float %2913, %20
-  br label %2920
-
-; <label>:2920                                    ; preds = %2918, %2916, %2905, %2903, %2901
-  %2921 = phi float [ %2902, %2901 ], [ %2904, %2903 ], [ %2917, %2916 ], [ %2919, %2918 ], [ %2793, %2905 ]
-  %2922 = fptoui float %2921 to i32
-  %2923 = uitofp i32 %2922 to float
-  %2924 = uitofp i32 %2889 to float
-  %2925 = fptoui float %45 to i32
-  %2926 = fptoui float %182 to i32
-  %2927 = fptoui float %2923 to i32
-  %2928 = fptoui float %2924 to i32
-  %2929 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2930 = extractvalue %dx.types.CBufRet.i32 %2929, 0
-  %2931 = extractvalue %dx.types.CBufRet.i32 %2929, 1
-  %2932 = extractvalue %dx.types.CBufRet.i32 %2929, 2
-  %2933 = extractvalue %dx.types.CBufRet.i32 %2929, 3
-  %2934 = mul i32 %2930, %2925
-  %2935 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2926, i32 %2931, i32 %2934)  ; IMad(a,b,c)
-  %2936 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2927, i32 %2932, i32 %2935)  ; IMad(a,b,c)
-  %2937 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2928, i32 %2933, i32 %2936)  ; IMad(a,b,c)
-  %2938 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2937, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2939 = extractvalue %dx.types.ResRet.i32 %2938, 0
-  %2940 = extractvalue %dx.types.ResRet.i32 %2938, 1
-  %2941 = call double @dx.op.makeDouble.f64(i32 101, i32 %2939, i32 %2940)  ; MakeDouble(lo,hi)
-  %2942 = fptrunc double %2941 to float
-  br label %2943
-
-; <label>:2943                                    ; preds = %2920, %2854, %2823, %2804, %2794
-  %2944 = phi float [ %2820, %2804 ], [ 0.000000e+00, %2794 ], [ %2853, %2823 ], [ %2942, %2920 ], [ 0.000000e+00, %2854 ]
-  br i1 %977, label %2945, label %2972
-
-; <label>:2945                                    ; preds = %2943
-  %2946 = fcmp fast oge float %972, 0.000000e+00
-  %2947 = fptoui float %972 to i32
-  %2948 = icmp ult i32 %2947, %13
-  %2949 = and i1 %2946, %2948
-  %2950 = fcmp fast oge float %2793, 0.000000e+00
-  %2951 = and i1 %2950, %2949
-  %2952 = fptoui float %2793 to i32
-  %2953 = icmp ult i32 %2952, %15
-  %2954 = and i1 %2953, %2951
-  br i1 %2954, label %2955, label %3094
-
-; <label>:2955                                    ; preds = %2945
-  %2956 = fptoui float %45 to i32
-  %2957 = fptoui float %182 to i32
-  %2958 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2959 = extractvalue %dx.types.CBufRet.i32 %2958, 0
-  %2960 = extractvalue %dx.types.CBufRet.i32 %2958, 1
-  %2961 = extractvalue %dx.types.CBufRet.i32 %2958, 2
-  %2962 = extractvalue %dx.types.CBufRet.i32 %2958, 3
-  %2963 = mul i32 %2959, %2956
-  %2964 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2957, i32 %2960, i32 %2963)  ; IMad(a,b,c)
-  %2965 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2952, i32 %2961, i32 %2964)  ; IMad(a,b,c)
-  %2966 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2947, i32 %2962, i32 %2965)  ; IMad(a,b,c)
-  %2967 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2966, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2968 = extractvalue %dx.types.ResRet.i32 %2967, 0
-  %2969 = extractvalue %dx.types.ResRet.i32 %2967, 1
-  %2970 = call double @dx.op.makeDouble.f64(i32 101, i32 %2968, i32 %2969)  ; MakeDouble(lo,hi)
-  %2971 = fptrunc double %2970 to float
-  br label %3094
-
-; <label>:2972                                    ; preds = %2943
-  %2973 = icmp eq i32 %976, 1
-  br i1 %2973, label %2974, label %3005
-
-; <label>:2974                                    ; preds = %2972
-  %2975 = add i32 %13, -1
-  %2976 = uitofp i32 %2975 to float
-  %2977 = call float @dx.op.binary.f32(i32 35, float %972, float 0.000000e+00)  ; FMax(a,b)
-  %2978 = call float @dx.op.binary.f32(i32 36, float %2977, float %2976)  ; FMin(a,b)
-  %2979 = fptoui float %2978 to i32
-  %2980 = add i32 %15, -1
-  %2981 = uitofp i32 %2980 to float
-  %2982 = call float @dx.op.binary.f32(i32 35, float %2793, float 0.000000e+00)  ; FMax(a,b)
-  %2983 = call float @dx.op.binary.f32(i32 36, float %2982, float %2981)  ; FMin(a,b)
-  %2984 = fptoui float %2983 to i32
-  %2985 = uitofp i32 %2984 to float
-  %2986 = uitofp i32 %2979 to float
-  %2987 = fptoui float %45 to i32
-  %2988 = fptoui float %182 to i32
-  %2989 = fptoui float %2985 to i32
-  %2990 = fptoui float %2986 to i32
-  %2991 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2992 = extractvalue %dx.types.CBufRet.i32 %2991, 0
-  %2993 = extractvalue %dx.types.CBufRet.i32 %2991, 1
-  %2994 = extractvalue %dx.types.CBufRet.i32 %2991, 2
-  %2995 = extractvalue %dx.types.CBufRet.i32 %2991, 3
-  %2996 = mul i32 %2992, %2987
-  %2997 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2988, i32 %2993, i32 %2996)  ; IMad(a,b,c)
-  %2998 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2989, i32 %2994, i32 %2997)  ; IMad(a,b,c)
-  %2999 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2990, i32 %2995, i32 %2998)  ; IMad(a,b,c)
-  %3000 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2999, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3001 = extractvalue %dx.types.ResRet.i32 %3000, 0
-  %3002 = extractvalue %dx.types.ResRet.i32 %3000, 1
-  %3003 = call double @dx.op.makeDouble.f64(i32 101, i32 %3001, i32 %3002)  ; MakeDouble(lo,hi)
-  %3004 = fptrunc double %3003 to float
-  br label %3094
-
-; <label>:3005                                    ; preds = %2972
-  %3006 = icmp eq i32 %976, 2
-  br i1 %3006, label %3007, label %3094
-
-; <label>:3007                                    ; preds = %3005
-  %3008 = fsub fast float %22, %20
-  %3009 = fcmp fast olt float %972, %20
-  br i1 %3009, label %3010, label %3023
-
-; <label>:3010                                    ; preds = %3007
-  %3011 = fsub fast float %20, %972
-  %3012 = fdiv fast float %3011, %3008
-  %3013 = fptoui float %3012 to i32
-  %3014 = uitofp i32 %3013 to float
-  %3015 = fmul fast float %3014, %3008
-  %3016 = fsub fast float %3011, %3015
-  %3017 = and i32 %3013, 1
-  %3018 = icmp eq i32 %3017, 0
-  br i1 %3018, label %3019, label %3021
-
-; <label>:3019                                    ; preds = %3010
-  %3020 = fadd fast float %3016, %20
-  br label %3038
-
-; <label>:3021                                    ; preds = %3010
-  %3022 = fsub fast float %22, %3016
-  br label %3038
-
-; <label>:3023                                    ; preds = %3007
-  %3024 = fcmp fast ogt float %972, %22
-  br i1 %3024, label %3025, label %3038
-
-; <label>:3025                                    ; preds = %3023
-  %3026 = fsub fast float %972, %22
-  %3027 = fdiv fast float %3026, %3008
-  %3028 = fptoui float %3027 to i32
-  %3029 = uitofp i32 %3028 to float
-  %3030 = fmul fast float %3029, %3008
-  %3031 = fsub fast float %3026, %3030
-  %3032 = and i32 %3028, 1
-  %3033 = icmp eq i32 %3032, 0
-  br i1 %3033, label %3034, label %3036
-
-; <label>:3034                                    ; preds = %3025
-  %3035 = fsub fast float %22, %3031
-  br label %3038
-
-; <label>:3036                                    ; preds = %3025
-  %3037 = fadd fast float %3031, %20
-  br label %3038
-
-; <label>:3038                                    ; preds = %3036, %3034, %3023, %3021, %3019
-  %3039 = phi float [ %3020, %3019 ], [ %3022, %3021 ], [ %3035, %3034 ], [ %3037, %3036 ], [ %972, %3023 ]
-  %3040 = fptoui float %3039 to i32
-  %3041 = fsub fast float %24, %20
-  %3042 = fcmp fast olt float %2793, %20
-  br i1 %3042, label %3043, label %3056
-
-; <label>:3043                                    ; preds = %3038
-  %3044 = fsub fast float %20, %2793
-  %3045 = fdiv fast float %3044, %3041
-  %3046 = fptoui float %3045 to i32
-  %3047 = uitofp i32 %3046 to float
-  %3048 = fmul fast float %3047, %3041
-  %3049 = fsub fast float %3044, %3048
-  %3050 = and i32 %3046, 1
-  %3051 = icmp eq i32 %3050, 0
-  br i1 %3051, label %3052, label %3054
-
-; <label>:3052                                    ; preds = %3043
-  %3053 = fadd fast float %3049, %20
-  br label %3071
-
-; <label>:3054                                    ; preds = %3043
-  %3055 = fsub fast float %24, %3049
-  br label %3071
-
-; <label>:3056                                    ; preds = %3038
-  %3057 = fcmp fast ogt float %2793, %24
-  br i1 %3057, label %3058, label %3071
-
-; <label>:3058                                    ; preds = %3056
-  %3059 = fsub fast float %2793, %24
-  %3060 = fdiv fast float %3059, %3041
-  %3061 = fptoui float %3060 to i32
-  %3062 = uitofp i32 %3061 to float
-  %3063 = fmul fast float %3062, %3041
-  %3064 = fsub fast float %3059, %3063
-  %3065 = and i32 %3061, 1
-  %3066 = icmp eq i32 %3065, 0
-  br i1 %3066, label %3067, label %3069
-
-; <label>:3067                                    ; preds = %3058
-  %3068 = fsub fast float %24, %3064
-  br label %3071
-
-; <label>:3069                                    ; preds = %3058
-  %3070 = fadd fast float %3064, %20
-  br label %3071
-
-; <label>:3071                                    ; preds = %3069, %3067, %3056, %3054, %3052
-  %3072 = phi float [ %3053, %3052 ], [ %3055, %3054 ], [ %3068, %3067 ], [ %3070, %3069 ], [ %2793, %3056 ]
-  %3073 = fptoui float %3072 to i32
-  %3074 = uitofp i32 %3073 to float
-  %3075 = uitofp i32 %3040 to float
-  %3076 = fptoui float %45 to i32
-  %3077 = fptoui float %182 to i32
-  %3078 = fptoui float %3074 to i32
-  %3079 = fptoui float %3075 to i32
-  %3080 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3081 = extractvalue %dx.types.CBufRet.i32 %3080, 0
-  %3082 = extractvalue %dx.types.CBufRet.i32 %3080, 1
-  %3083 = extractvalue %dx.types.CBufRet.i32 %3080, 2
-  %3084 = extractvalue %dx.types.CBufRet.i32 %3080, 3
-  %3085 = mul i32 %3081, %3076
-  %3086 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3077, i32 %3082, i32 %3085)  ; IMad(a,b,c)
-  %3087 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3078, i32 %3083, i32 %3086)  ; IMad(a,b,c)
-  %3088 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3079, i32 %3084, i32 %3087)  ; IMad(a,b,c)
-  %3089 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3088, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3090 = extractvalue %dx.types.ResRet.i32 %3089, 0
-  %3091 = extractvalue %dx.types.ResRet.i32 %3089, 1
-  %3092 = call double @dx.op.makeDouble.f64(i32 101, i32 %3090, i32 %3091)  ; MakeDouble(lo,hi)
-  %3093 = fptrunc double %3092 to float
-  br label %3094
-
-; <label>:3094                                    ; preds = %3071, %3005, %2974, %2955, %2945
-  %3095 = phi float [ %2971, %2955 ], [ 0.000000e+00, %2945 ], [ %3004, %2974 ], [ %3093, %3071 ], [ 0.000000e+00, %3005 ]
-  br i1 %977, label %3096, label %3123
-
-; <label>:3096                                    ; preds = %3094
-  %3097 = fcmp fast oge float %1280, 0.000000e+00
-  %3098 = fptoui float %1280 to i32
-  %3099 = icmp ult i32 %3098, %13
-  %3100 = and i1 %3097, %3099
-  %3101 = fcmp fast oge float %2793, 0.000000e+00
-  %3102 = and i1 %3101, %3100
-  %3103 = fptoui float %2793 to i32
-  %3104 = icmp ult i32 %3103, %15
-  %3105 = and i1 %3104, %3102
-  br i1 %3105, label %3106, label %3245
-
-; <label>:3106                                    ; preds = %3096
-  %3107 = fptoui float %45 to i32
-  %3108 = fptoui float %182 to i32
-  %3109 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3110 = extractvalue %dx.types.CBufRet.i32 %3109, 0
-  %3111 = extractvalue %dx.types.CBufRet.i32 %3109, 1
-  %3112 = extractvalue %dx.types.CBufRet.i32 %3109, 2
-  %3113 = extractvalue %dx.types.CBufRet.i32 %3109, 3
-  %3114 = mul i32 %3110, %3107
-  %3115 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3108, i32 %3111, i32 %3114)  ; IMad(a,b,c)
-  %3116 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3103, i32 %3112, i32 %3115)  ; IMad(a,b,c)
-  %3117 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3098, i32 %3113, i32 %3116)  ; IMad(a,b,c)
-  %3118 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3117, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3119 = extractvalue %dx.types.ResRet.i32 %3118, 0
-  %3120 = extractvalue %dx.types.ResRet.i32 %3118, 1
-  %3121 = call double @dx.op.makeDouble.f64(i32 101, i32 %3119, i32 %3120)  ; MakeDouble(lo,hi)
-  %3122 = fptrunc double %3121 to float
-  br label %3245
-
-; <label>:3123                                    ; preds = %3094
-  %3124 = icmp eq i32 %976, 1
-  br i1 %3124, label %3125, label %3156
-
-; <label>:3125                                    ; preds = %3123
-  %3126 = add i32 %13, -1
-  %3127 = uitofp i32 %3126 to float
-  %3128 = call float @dx.op.binary.f32(i32 35, float %1280, float 0.000000e+00)  ; FMax(a,b)
-  %3129 = call float @dx.op.binary.f32(i32 36, float %3128, float %3127)  ; FMin(a,b)
-  %3130 = fptoui float %3129 to i32
-  %3131 = add i32 %15, -1
-  %3132 = uitofp i32 %3131 to float
-  %3133 = call float @dx.op.binary.f32(i32 35, float %2793, float 0.000000e+00)  ; FMax(a,b)
-  %3134 = call float @dx.op.binary.f32(i32 36, float %3133, float %3132)  ; FMin(a,b)
-  %3135 = fptoui float %3134 to i32
-  %3136 = uitofp i32 %3135 to float
-  %3137 = uitofp i32 %3130 to float
-  %3138 = fptoui float %45 to i32
-  %3139 = fptoui float %182 to i32
-  %3140 = fptoui float %3136 to i32
-  %3141 = fptoui float %3137 to i32
-  %3142 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3143 = extractvalue %dx.types.CBufRet.i32 %3142, 0
-  %3144 = extractvalue %dx.types.CBufRet.i32 %3142, 1
-  %3145 = extractvalue %dx.types.CBufRet.i32 %3142, 2
-  %3146 = extractvalue %dx.types.CBufRet.i32 %3142, 3
-  %3147 = mul i32 %3143, %3138
-  %3148 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3139, i32 %3144, i32 %3147)  ; IMad(a,b,c)
-  %3149 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3140, i32 %3145, i32 %3148)  ; IMad(a,b,c)
-  %3150 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3141, i32 %3146, i32 %3149)  ; IMad(a,b,c)
-  %3151 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3150, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3152 = extractvalue %dx.types.ResRet.i32 %3151, 0
-  %3153 = extractvalue %dx.types.ResRet.i32 %3151, 1
-  %3154 = call double @dx.op.makeDouble.f64(i32 101, i32 %3152, i32 %3153)  ; MakeDouble(lo,hi)
-  %3155 = fptrunc double %3154 to float
-  br label %3245
-
-; <label>:3156                                    ; preds = %3123
-  %3157 = icmp eq i32 %976, 2
-  br i1 %3157, label %3158, label %3245
-
-; <label>:3158                                    ; preds = %3156
-  %3159 = fsub fast float %22, %20
-  %3160 = fcmp fast olt float %1280, %20
-  br i1 %3160, label %3161, label %3174
-
-; <label>:3161                                    ; preds = %3158
-  %3162 = fsub fast float %20, %1280
-  %3163 = fdiv fast float %3162, %3159
-  %3164 = fptoui float %3163 to i32
-  %3165 = uitofp i32 %3164 to float
-  %3166 = fmul fast float %3165, %3159
-  %3167 = fsub fast float %3162, %3166
-  %3168 = and i32 %3164, 1
-  %3169 = icmp eq i32 %3168, 0
-  br i1 %3169, label %3170, label %3172
-
-; <label>:3170                                    ; preds = %3161
-  %3171 = fadd fast float %3167, %20
-  br label %3189
-
-; <label>:3172                                    ; preds = %3161
-  %3173 = fsub fast float %22, %3167
-  br label %3189
-
-; <label>:3174                                    ; preds = %3158
-  %3175 = fcmp fast ogt float %1280, %22
-  br i1 %3175, label %3176, label %3189
-
-; <label>:3176                                    ; preds = %3174
-  %3177 = fsub fast float %1280, %22
-  %3178 = fdiv fast float %3177, %3159
-  %3179 = fptoui float %3178 to i32
-  %3180 = uitofp i32 %3179 to float
-  %3181 = fmul fast float %3180, %3159
-  %3182 = fsub fast float %3177, %3181
-  %3183 = and i32 %3179, 1
-  %3184 = icmp eq i32 %3183, 0
-  br i1 %3184, label %3185, label %3187
-
-; <label>:3185                                    ; preds = %3176
-  %3186 = fsub fast float %22, %3182
-  br label %3189
-
-; <label>:3187                                    ; preds = %3176
-  %3188 = fadd fast float %3182, %20
-  br label %3189
-
-; <label>:3189                                    ; preds = %3187, %3185, %3174, %3172, %3170
-  %3190 = phi float [ %3171, %3170 ], [ %3173, %3172 ], [ %3186, %3185 ], [ %3188, %3187 ], [ %1280, %3174 ]
-  %3191 = fptoui float %3190 to i32
-  %3192 = fsub fast float %24, %20
-  %3193 = fcmp fast olt float %2793, %20
-  br i1 %3193, label %3194, label %3207
-
-; <label>:3194                                    ; preds = %3189
-  %3195 = fsub fast float %20, %2793
-  %3196 = fdiv fast float %3195, %3192
-  %3197 = fptoui float %3196 to i32
-  %3198 = uitofp i32 %3197 to float
-  %3199 = fmul fast float %3198, %3192
-  %3200 = fsub fast float %3195, %3199
-  %3201 = and i32 %3197, 1
-  %3202 = icmp eq i32 %3201, 0
-  br i1 %3202, label %3203, label %3205
-
-; <label>:3203                                    ; preds = %3194
-  %3204 = fadd fast float %3200, %20
-  br label %3222
-
-; <label>:3205                                    ; preds = %3194
-  %3206 = fsub fast float %24, %3200
-  br label %3222
-
-; <label>:3207                                    ; preds = %3189
-  %3208 = fcmp fast ogt float %2793, %24
-  br i1 %3208, label %3209, label %3222
-
-; <label>:3209                                    ; preds = %3207
-  %3210 = fsub fast float %2793, %24
-  %3211 = fdiv fast float %3210, %3192
-  %3212 = fptoui float %3211 to i32
-  %3213 = uitofp i32 %3212 to float
-  %3214 = fmul fast float %3213, %3192
-  %3215 = fsub fast float %3210, %3214
-  %3216 = and i32 %3212, 1
-  %3217 = icmp eq i32 %3216, 0
-  br i1 %3217, label %3218, label %3220
-
-; <label>:3218                                    ; preds = %3209
-  %3219 = fsub fast float %24, %3215
-  br label %3222
-
-; <label>:3220                                    ; preds = %3209
-  %3221 = fadd fast float %3215, %20
-  br label %3222
-
-; <label>:3222                                    ; preds = %3220, %3218, %3207, %3205, %3203
-  %3223 = phi float [ %3204, %3203 ], [ %3206, %3205 ], [ %3219, %3218 ], [ %3221, %3220 ], [ %2793, %3207 ]
-  %3224 = fptoui float %3223 to i32
-  %3225 = uitofp i32 %3224 to float
-  %3226 = uitofp i32 %3191 to float
-  %3227 = fptoui float %45 to i32
-  %3228 = fptoui float %182 to i32
-  %3229 = fptoui float %3225 to i32
-  %3230 = fptoui float %3226 to i32
-  %3231 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3232 = extractvalue %dx.types.CBufRet.i32 %3231, 0
-  %3233 = extractvalue %dx.types.CBufRet.i32 %3231, 1
-  %3234 = extractvalue %dx.types.CBufRet.i32 %3231, 2
-  %3235 = extractvalue %dx.types.CBufRet.i32 %3231, 3
-  %3236 = mul i32 %3232, %3227
-  %3237 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3228, i32 %3233, i32 %3236)  ; IMad(a,b,c)
-  %3238 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3229, i32 %3234, i32 %3237)  ; IMad(a,b,c)
-  %3239 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3230, i32 %3235, i32 %3238)  ; IMad(a,b,c)
-  %3240 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3239, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3241 = extractvalue %dx.types.ResRet.i32 %3240, 0
-  %3242 = extractvalue %dx.types.ResRet.i32 %3240, 1
-  %3243 = call double @dx.op.makeDouble.f64(i32 101, i32 %3241, i32 %3242)  ; MakeDouble(lo,hi)
-  %3244 = fptrunc double %3243 to float
-  br label %3245
-
-; <label>:3245                                    ; preds = %3222, %3156, %3125, %3106, %3096
-  %3246 = phi float [ %3122, %3106 ], [ 0.000000e+00, %3096 ], [ %3155, %3125 ], [ %3244, %3222 ], [ 0.000000e+00, %3156 ]
-  br i1 %977, label %3247, label %3274
-
-; <label>:3247                                    ; preds = %3245
-  %3248 = fcmp fast oge float %1432, 0.000000e+00
-  %3249 = fptoui float %1432 to i32
-  %3250 = icmp ult i32 %3249, %13
-  %3251 = and i1 %3248, %3250
-  %3252 = fcmp fast oge float %2793, 0.000000e+00
-  %3253 = and i1 %3252, %3251
-  %3254 = fptoui float %2793 to i32
-  %3255 = icmp ult i32 %3254, %15
-  %3256 = and i1 %3255, %3253
-  br i1 %3256, label %3257, label %3396
-
-; <label>:3257                                    ; preds = %3247
-  %3258 = fptoui float %45 to i32
-  %3259 = fptoui float %182 to i32
-  %3260 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3261 = extractvalue %dx.types.CBufRet.i32 %3260, 0
-  %3262 = extractvalue %dx.types.CBufRet.i32 %3260, 1
-  %3263 = extractvalue %dx.types.CBufRet.i32 %3260, 2
-  %3264 = extractvalue %dx.types.CBufRet.i32 %3260, 3
-  %3265 = mul i32 %3261, %3258
-  %3266 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3259, i32 %3262, i32 %3265)  ; IMad(a,b,c)
-  %3267 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3254, i32 %3263, i32 %3266)  ; IMad(a,b,c)
-  %3268 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3249, i32 %3264, i32 %3267)  ; IMad(a,b,c)
-  %3269 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3268, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3270 = extractvalue %dx.types.ResRet.i32 %3269, 0
-  %3271 = extractvalue %dx.types.ResRet.i32 %3269, 1
-  %3272 = call double @dx.op.makeDouble.f64(i32 101, i32 %3270, i32 %3271)  ; MakeDouble(lo,hi)
-  %3273 = fptrunc double %3272 to float
-  br label %3396
-
-; <label>:3274                                    ; preds = %3245
-  %3275 = icmp eq i32 %976, 1
-  br i1 %3275, label %3276, label %3307
-
-; <label>:3276                                    ; preds = %3274
-  %3277 = add i32 %13, -1
-  %3278 = uitofp i32 %3277 to float
-  %3279 = call float @dx.op.binary.f32(i32 35, float %1432, float 0.000000e+00)  ; FMax(a,b)
-  %3280 = call float @dx.op.binary.f32(i32 36, float %3279, float %3278)  ; FMin(a,b)
-  %3281 = fptoui float %3280 to i32
-  %3282 = add i32 %15, -1
-  %3283 = uitofp i32 %3282 to float
-  %3284 = call float @dx.op.binary.f32(i32 35, float %2793, float 0.000000e+00)  ; FMax(a,b)
-  %3285 = call float @dx.op.binary.f32(i32 36, float %3284, float %3283)  ; FMin(a,b)
-  %3286 = fptoui float %3285 to i32
-  %3287 = uitofp i32 %3286 to float
-  %3288 = uitofp i32 %3281 to float
-  %3289 = fptoui float %45 to i32
-  %3290 = fptoui float %182 to i32
-  %3291 = fptoui float %3287 to i32
-  %3292 = fptoui float %3288 to i32
-  %3293 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3294 = extractvalue %dx.types.CBufRet.i32 %3293, 0
-  %3295 = extractvalue %dx.types.CBufRet.i32 %3293, 1
-  %3296 = extractvalue %dx.types.CBufRet.i32 %3293, 2
-  %3297 = extractvalue %dx.types.CBufRet.i32 %3293, 3
-  %3298 = mul i32 %3294, %3289
-  %3299 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3290, i32 %3295, i32 %3298)  ; IMad(a,b,c)
-  %3300 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3291, i32 %3296, i32 %3299)  ; IMad(a,b,c)
-  %3301 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3292, i32 %3297, i32 %3300)  ; IMad(a,b,c)
-  %3302 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3301, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3303 = extractvalue %dx.types.ResRet.i32 %3302, 0
-  %3304 = extractvalue %dx.types.ResRet.i32 %3302, 1
-  %3305 = call double @dx.op.makeDouble.f64(i32 101, i32 %3303, i32 %3304)  ; MakeDouble(lo,hi)
-  %3306 = fptrunc double %3305 to float
-  br label %3396
-
-; <label>:3307                                    ; preds = %3274
-  %3308 = icmp eq i32 %976, 2
-  br i1 %3308, label %3309, label %3396
-
-; <label>:3309                                    ; preds = %3307
-  %3310 = fsub fast float %22, %20
-  %3311 = fcmp fast olt float %1432, %20
-  br i1 %3311, label %3312, label %3325
-
-; <label>:3312                                    ; preds = %3309
-  %3313 = fsub fast float %20, %1432
-  %3314 = fdiv fast float %3313, %3310
-  %3315 = fptoui float %3314 to i32
-  %3316 = uitofp i32 %3315 to float
-  %3317 = fmul fast float %3316, %3310
-  %3318 = fsub fast float %3313, %3317
-  %3319 = and i32 %3315, 1
-  %3320 = icmp eq i32 %3319, 0
-  br i1 %3320, label %3321, label %3323
-
-; <label>:3321                                    ; preds = %3312
-  %3322 = fadd fast float %3318, %20
-  br label %3340
-
-; <label>:3323                                    ; preds = %3312
-  %3324 = fsub fast float %22, %3318
-  br label %3340
-
-; <label>:3325                                    ; preds = %3309
-  %3326 = fcmp fast ogt float %1432, %22
-  br i1 %3326, label %3327, label %3340
-
-; <label>:3327                                    ; preds = %3325
-  %3328 = fsub fast float %1432, %22
-  %3329 = fdiv fast float %3328, %3310
-  %3330 = fptoui float %3329 to i32
-  %3331 = uitofp i32 %3330 to float
-  %3332 = fmul fast float %3331, %3310
-  %3333 = fsub fast float %3328, %3332
-  %3334 = and i32 %3330, 1
-  %3335 = icmp eq i32 %3334, 0
-  br i1 %3335, label %3336, label %3338
-
-; <label>:3336                                    ; preds = %3327
-  %3337 = fsub fast float %22, %3333
-  br label %3340
-
-; <label>:3338                                    ; preds = %3327
-  %3339 = fadd fast float %3333, %20
-  br label %3340
-
-; <label>:3340                                    ; preds = %3338, %3336, %3325, %3323, %3321
-  %3341 = phi float [ %3322, %3321 ], [ %3324, %3323 ], [ %3337, %3336 ], [ %3339, %3338 ], [ %1432, %3325 ]
-  %3342 = fptoui float %3341 to i32
-  %3343 = fsub fast float %24, %20
-  %3344 = fcmp fast olt float %2793, %20
-  br i1 %3344, label %3345, label %3358
-
-; <label>:3345                                    ; preds = %3340
-  %3346 = fsub fast float %20, %2793
-  %3347 = fdiv fast float %3346, %3343
-  %3348 = fptoui float %3347 to i32
-  %3349 = uitofp i32 %3348 to float
-  %3350 = fmul fast float %3349, %3343
-  %3351 = fsub fast float %3346, %3350
-  %3352 = and i32 %3348, 1
-  %3353 = icmp eq i32 %3352, 0
-  br i1 %3353, label %3354, label %3356
-
-; <label>:3354                                    ; preds = %3345
-  %3355 = fadd fast float %3351, %20
-  br label %3373
-
-; <label>:3356                                    ; preds = %3345
-  %3357 = fsub fast float %24, %3351
-  br label %3373
-
-; <label>:3358                                    ; preds = %3340
-  %3359 = fcmp fast ogt float %2793, %24
-  br i1 %3359, label %3360, label %3373
-
-; <label>:3360                                    ; preds = %3358
-  %3361 = fsub fast float %2793, %24
-  %3362 = fdiv fast float %3361, %3343
-  %3363 = fptoui float %3362 to i32
-  %3364 = uitofp i32 %3363 to float
-  %3365 = fmul fast float %3364, %3343
-  %3366 = fsub fast float %3361, %3365
-  %3367 = and i32 %3363, 1
-  %3368 = icmp eq i32 %3367, 0
-  br i1 %3368, label %3369, label %3371
-
-; <label>:3369                                    ; preds = %3360
-  %3370 = fsub fast float %24, %3366
-  br label %3373
-
-; <label>:3371                                    ; preds = %3360
-  %3372 = fadd fast float %3366, %20
-  br label %3373
-
-; <label>:3373                                    ; preds = %3371, %3369, %3358, %3356, %3354
-  %3374 = phi float [ %3355, %3354 ], [ %3357, %3356 ], [ %3370, %3369 ], [ %3372, %3371 ], [ %2793, %3358 ]
-  %3375 = fptoui float %3374 to i32
-  %3376 = uitofp i32 %3375 to float
-  %3377 = uitofp i32 %3342 to float
-  %3378 = fptoui float %45 to i32
-  %3379 = fptoui float %182 to i32
-  %3380 = fptoui float %3376 to i32
-  %3381 = fptoui float %3377 to i32
-  %3382 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3383 = extractvalue %dx.types.CBufRet.i32 %3382, 0
-  %3384 = extractvalue %dx.types.CBufRet.i32 %3382, 1
-  %3385 = extractvalue %dx.types.CBufRet.i32 %3382, 2
-  %3386 = extractvalue %dx.types.CBufRet.i32 %3382, 3
-  %3387 = mul i32 %3383, %3378
-  %3388 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3379, i32 %3384, i32 %3387)  ; IMad(a,b,c)
-  %3389 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3380, i32 %3385, i32 %3388)  ; IMad(a,b,c)
-  %3390 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3381, i32 %3386, i32 %3389)  ; IMad(a,b,c)
-  %3391 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3390, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3392 = extractvalue %dx.types.ResRet.i32 %3391, 0
-  %3393 = extractvalue %dx.types.ResRet.i32 %3391, 1
-  %3394 = call double @dx.op.makeDouble.f64(i32 101, i32 %3392, i32 %3393)  ; MakeDouble(lo,hi)
-  %3395 = fptrunc double %3394 to float
-  br label %3396
-
-; <label>:3396                                    ; preds = %3373, %3307, %3276, %3257, %3247
-  %3397 = phi float [ %3273, %3257 ], [ 0.000000e+00, %3247 ], [ %3306, %3276 ], [ %3395, %3373 ], [ 0.000000e+00, %3307 ]
-  %3398 = call float @dx.op.unary.f32(i32 22, float %180)  ; Frc(value)
-  %3399 = call float @dx.op.unary.f32(i32 22, float %181)  ; Frc(value)
-  %3400 = fmul fast float %3399, %3399
-  %3401 = fmul fast float %3400, %3399
-  %3402 = fmul fast float %1128, -7.500000e-01
-  %3403 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2339, float %3402)  ; FMad(a,b,c)
-  %3404 = fmul fast float %1128, 1.500000e+00
-  %3405 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1734, float %3404)  ; FMad(a,b,c)
-  %3406 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2339, float %3405)  ; FMad(a,b,c)
-  %3407 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %2944, float %3406)  ; FMad(a,b,c)
-  %3408 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1734, float %3402)  ; FMad(a,b,c)
-  %3409 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2339, float %3408)  ; FMad(a,b,c)
-  %3410 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2944, float %3409)  ; FMad(a,b,c)
-  %3411 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3399, float %3400, float %3401, float %1734, float %3403, float %3407, float %3410)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3412 = fmul fast float %1279, -7.500000e-01
-  %3413 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2490, float %3412)  ; FMad(a,b,c)
-  %3414 = fmul fast float %1279, 1.500000e+00
-  %3415 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1885, float %3414)  ; FMad(a,b,c)
-  %3416 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2490, float %3415)  ; FMad(a,b,c)
-  %3417 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3095, float %3416)  ; FMad(a,b,c)
-  %3418 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1885, float %3412)  ; FMad(a,b,c)
-  %3419 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2490, float %3418)  ; FMad(a,b,c)
-  %3420 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3095, float %3419)  ; FMad(a,b,c)
-  %3421 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3399, float %3400, float %3401, float %1885, float %3413, float %3417, float %3420)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3422 = fmul fast float %1431, -7.500000e-01
-  %3423 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2641, float %3422)  ; FMad(a,b,c)
-  %3424 = fmul fast float %1431, 1.500000e+00
-  %3425 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %2036, float %3424)  ; FMad(a,b,c)
-  %3426 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2641, float %3425)  ; FMad(a,b,c)
-  %3427 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3246, float %3426)  ; FMad(a,b,c)
-  %3428 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %2036, float %3422)  ; FMad(a,b,c)
-  %3429 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2641, float %3428)  ; FMad(a,b,c)
-  %3430 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3246, float %3429)  ; FMad(a,b,c)
-  %3431 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3399, float %3400, float %3401, float %2036, float %3423, float %3427, float %3430)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3432 = fmul fast float %1583, -7.500000e-01
-  %3433 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2792, float %3432)  ; FMad(a,b,c)
-  %3434 = fmul fast float %1583, 1.500000e+00
-  %3435 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %2187, float %3434)  ; FMad(a,b,c)
-  %3436 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2792, float %3435)  ; FMad(a,b,c)
-  %3437 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3397, float %3436)  ; FMad(a,b,c)
-  %3438 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %2187, float %3432)  ; FMad(a,b,c)
-  %3439 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2792, float %3438)  ; FMad(a,b,c)
-  %3440 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3397, float %3439)  ; FMad(a,b,c)
-  %3441 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3399, float %3400, float %3401, float %2187, float %3433, float %3437, float %3440)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3442 = fmul fast float %3398, %3398
-  %3443 = fmul fast float %3442, %3398
-  %3444 = fmul fast float %3411, -7.500000e-01
-  %3445 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3431, float %3444)  ; FMad(a,b,c)
-  %3446 = fmul fast float %3411, 1.500000e+00
-  %3447 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %3421, float %3446)  ; FMad(a,b,c)
-  %3448 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %3431, float %3447)  ; FMad(a,b,c)
-  %3449 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3441, float %3448)  ; FMad(a,b,c)
-  %3450 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %3421, float %3444)  ; FMad(a,b,c)
-  %3451 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %3431, float %3450)  ; FMad(a,b,c)
-  %3452 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3441, float %3451)  ; FMad(a,b,c)
-  %3453 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3398, float %3442, float %3443, float %3421, float %3445, float %3449, float %3452)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3454 = fpext float %3453 to double
-  %3455 = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double %3454)  ; SplitDouble(value)
-  %3456 = extractvalue %dx.types.splitdouble %3455, 0
-  %3457 = extractvalue %dx.types.splitdouble %3455, 1
-  call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i32 %3456, i32 %3457, i32 undef, i32 undef, i8 3, i32 8)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3458
-
-; <label>:3458                                    ; preds = %3396, %969, %952, %335, %0
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.threadId.i32(i32, i32) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.unary.f32(i32, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.tertiary.f32(i32, float, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.binary.f32(i32, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.dot4.f32(i32, float, float, float, float, float, float, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.tertiary.i32(i32, i32, i32, i32) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32, %dx.types.Handle, i32) #1
-
-; Function Attrs: nounwind readonly
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32, %dx.types.Handle, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind readnone
-declare double @dx.op.makeDouble.f64(i32, i32, i32) #0
-
-; Function Attrs: nounwind
-declare void @dx.op.rawBufferStore.i32(i32, %dx.types.Handle, i32, i32, i32, i32, i32, i32, i8, i32) #2
-
-; Function Attrs: nounwind readnone
-declare %dx.types.splitdouble @dx.op.splitDouble.f64(i32, double) #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.valver = !{!2}
-!dx.shaderModel = !{!3}
-!dx.resources = !{!4}
-!dx.entryPoints = !{!12}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 1, i32 2}
-!2 = !{i32 1, i32 6}
-!3 = !{!"cs", i32 6, i32 2}
-!4 = !{null, !5, !10, null}
-!5 = !{!6, !8, !9}
-!6 = !{i32 0, %"class.RWStructuredBuffer<double>"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !7}
-!7 = !{i32 1, i32 8}
-!8 = !{i32 1, %"class.RWStructuredBuffer<double>"* undef, !"", i32 0, i32 1, i32 1, i32 12, i1 false, i1 false, i1 false, !7}
-!9 = !{i32 2, %"class.RWStructuredBuffer<double>"* undef, !"", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !7}
-!10 = !{!11}
-!11 = !{i32 0, %Constants* undef, !"", i32 0, i32 0, i32 1, i32 116, null}
-!12 = !{void ()* @GridSample, !"GridSample", null, !4, !13}
-!13 = !{i32 0, i64 8388628, i32 4, !14}
-!14 = !{i32 64, i32 1, i32 1}
-
-#endif
-
-const unsigned char g_GridSample[] = {
-  0x44, 0x58, 0x42, 0x43, 0x26, 0x90, 0x49, 0x30, 0x2a, 0x6b, 0x47, 0x89,
-  0xe8, 0xb7, 0x72, 0x10, 0xd5, 0xbc, 0x17, 0x60, 0x01, 0x00, 0x00, 0x00,
-  0xa8, 0x58, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
-  0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
-  0x18, 0x01, 0x00, 0x00, 0x34, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30,
-  0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30,
-  0xa8, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-  0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0xcf, 0x61, 0xc7, 0xe1, 0x3d, 0x4b, 0x0d, 0xbc,
-  0xf3, 0xb2, 0x94, 0x5d, 0x9c, 0xda, 0x94, 0xee, 0x44, 0x58, 0x49, 0x4c,
-  0x6c, 0x57, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0xdb, 0x15, 0x00, 0x00,
-  0x44, 0x58, 0x49, 0x4c, 0x02, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-  0x54, 0x57, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00,
-  0xd2, 0x15, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
-  0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49,
-  0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19,
-  0x1e, 0x04, 0x8b, 0x62, 0x80, 0x18, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42,
-  0xc4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x62, 0x88,
-  0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42,
-  0xe4, 0x48, 0x0e, 0x90, 0x11, 0x23, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c,
-  0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x31, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00,
-  0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07,
-  0x40, 0x02, 0xa8, 0x0d, 0x86, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20,
-  0x01, 0xd5, 0x06, 0x62, 0xf8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x90, 0x00,
-  0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42,
-  0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00,
-  0x49, 0x00, 0x00, 0x00, 0x32, 0x22, 0x88, 0x09, 0x20, 0x64, 0x85, 0x04,
-  0x13, 0x23, 0xa4, 0x84, 0x04, 0x13, 0x23, 0xe3, 0x84, 0xa1, 0x90, 0x14,
-  0x12, 0x4c, 0x8c, 0x8c, 0x0b, 0x84, 0xc4, 0x4c, 0x10, 0xb4, 0xc1, 0x08,
-  0x40, 0x09, 0x00, 0x0a, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0xc3, 0x30, 0x10,
-  0x31, 0x03, 0x50, 0x06, 0x63, 0x30, 0xe8, 0x28, 0x85, 0x31, 0x18, 0x86,
-  0x41, 0x49, 0x21, 0x8c, 0xc1, 0x30, 0x68, 0x29, 0x8a, 0x31, 0x18, 0x86,
-  0x61, 0x18, 0x86, 0x61, 0x50, 0x53, 0x8a, 0x61, 0x18, 0x86, 0x81, 0x9e,
-  0xa3, 0x86, 0xcb, 0x9f, 0xb0, 0x87, 0x90, 0x7c, 0x6e, 0xa3, 0x8a, 0x95,
-  0x98, 0x7c, 0xe4, 0xb6, 0x11, 0x31, 0x0c, 0xc3, 0x30, 0x47, 0x80, 0x90,
-  0x74, 0xcf, 0x70, 0xf9, 0x13, 0xf6, 0x10, 0x92, 0x1f, 0x02, 0xcd, 0xb0,
-  0x10, 0x28, 0x98, 0x0a, 0x11, 0x0d, 0xd4, 0x40, 0xd5, 0x1c, 0x41, 0x50,
-  0x0c, 0x6a, 0x90, 0x86, 0xe1, 0x22, 0xec, 0xa6, 0xe1, 0xf2, 0x27, 0xec,
-  0x21, 0x24, 0x7f, 0x25, 0xa4, 0x95, 0x98, 0x7c, 0xe4, 0xb6, 0x51, 0x31,
-  0x0c, 0xc3, 0x30, 0x94, 0x43, 0x1b, 0xa8, 0x61, 0x90, 0x06, 0xda, 0x86,
-  0x00, 0x0a, 0xd1, 0x0d, 0xc3, 0x40, 0x5e, 0x59, 0x80, 0x81, 0x1a, 0x86,
-  0x61, 0x18, 0x06, 0x69, 0x20, 0xf0, 0xa8, 0xe1, 0xf2, 0x27, 0xec, 0x21,
-  0x24, 0x5f, 0x7a, 0x16, 0x64, 0x1a, 0x1c, 0x2a, 0x58, 0x08, 0x24, 0x0c,
-  0x43, 0x19, 0xc4, 0xa1, 0xa3, 0x71, 0x20, 0x60, 0x26, 0x30, 0x18, 0x07,
-  0x76, 0x08, 0x87, 0x79, 0x98, 0x07, 0x37, 0x90, 0x85, 0x5b, 0x98, 0x05,
-  0x7a, 0x90, 0x87, 0x7a, 0x18, 0x07, 0x7a, 0xa8, 0x07, 0x79, 0x28, 0x07,
-  0x72, 0x10, 0x85, 0x7a, 0x30, 0x07, 0x73, 0x28, 0x07, 0x79, 0xe0, 0x03,
-  0x72, 0x78, 0x87, 0x7a, 0x10, 0x07, 0x76, 0x28, 0x07, 0x3f, 0x40, 0x41,
-  0x47, 0xe6, 0x30, 0x02, 0x31, 0x5c, 0xc2, 0x39, 0x8d, 0x34, 0x01, 0xcd,
-  0x24, 0xa1, 0x65, 0x18, 0x86, 0x01, 0x45, 0x51, 0x14, 0x45, 0x07, 0x4a,
-  0xe7, 0x08, 0x40, 0x61, 0x0a, 0x00, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0,
-  0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0,
-  0x87, 0x0d, 0xae, 0x50, 0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d,
-  0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d,
-  0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78,
-  0xa0, 0x07, 0x78, 0xd0, 0x06, 0xe9, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x71,
-  0x60, 0x07, 0x6d, 0x90, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72,
-  0xd0, 0x06, 0xe9, 0x60, 0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d,
-  0x60, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xd0, 0x06, 0xe6,
-  0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x76,
-  0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xee, 0x80, 0x07, 0x7a,
-  0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x7a, 0x60, 0x07, 0x74,
-  0x30, 0xe4, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x60, 0xc8, 0x43, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0xc0, 0x90, 0xe7, 0x00, 0x02, 0x20, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x80, 0x21, 0x4f, 0x02, 0x04, 0x40, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x43, 0x9e, 0x05, 0x08, 0x80, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x3c, 0x0d, 0x10, 0x00, 0x01,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x79, 0x1e, 0x20, 0x00,
-  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xf2, 0x54, 0x40,
-  0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xe4, 0xc1,
-  0x80, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8,
-  0xb3, 0x01, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0,
-  0x90, 0xc7, 0x03, 0x02, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x80, 0x21, 0x0f, 0x18, 0x00, 0x01, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0xc0, 0x90, 0x67, 0x0c, 0x80, 0x00, 0x08, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x20, 0x0b, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00,
-  0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47,
-  0xc6, 0x04, 0x43, 0x1a, 0x4a, 0xa0, 0x08, 0x8a, 0x61, 0x04, 0xa0, 0x30,
-  0x0a, 0xa2, 0xd0, 0x03, 0x0a, 0xa1, 0x00, 0x03, 0xe8, 0x1a, 0x01, 0xa0,
-  0xb5, 0x50, 0x01, 0x01, 0x11, 0x48, 0x9d, 0x01, 0xa0, 0x73, 0x06, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00,
-  0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b,
-  0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99,
-  0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62,
-  0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73,
-  0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84, 0xa1, 0x99, 0x20, 0x0c,
-  0xce, 0x06, 0x61, 0x20, 0x26, 0x08, 0xc3, 0xb3, 0x41, 0x18, 0x0c, 0x0a,
-  0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08, 0x03, 0x34, 0x41, 0x38,
-  0x83, 0x8c, 0xc0, 0x04, 0x61, 0x88, 0x26, 0x08, 0x97, 0x35, 0x41, 0x18,
-  0xa4, 0x0d, 0xc2, 0xf0, 0x6c, 0x58, 0x94, 0x85, 0x51, 0x94, 0xa1, 0x71,
-  0x1c, 0x07, 0xda, 0xb0, 0x0c, 0x0b, 0xa3, 0x0c, 0x43, 0xe3, 0x38, 0x0e,
-  0xb4, 0x61, 0x21, 0x16, 0x46, 0x21, 0x86, 0xc6, 0x71, 0x1c, 0x68, 0xc3,
-  0x10, 0x49, 0xd3, 0x04, 0x41, 0x0d, 0xb0, 0x09, 0xc2, 0x30, 0x6d, 0x40,
-  0x94, 0x8a, 0x51, 0x94, 0xc1, 0x02, 0x36, 0x04, 0xd7, 0x06, 0x02, 0xa0,
-  0x30, 0x60, 0x82, 0x20, 0x00, 0x54, 0x8e, 0xe4, 0xd2, 0xc8, 0xa6, 0xc2,
-  0xda, 0xe0, 0xd8, 0xca, 0x26, 0x08, 0x6b, 0x70, 0x4d, 0x10, 0x06, 0x6a,
-  0x82, 0x30, 0x54, 0x1b, 0x06, 0x6f, 0x18, 0x36, 0x10, 0x0a, 0xd7, 0x7d,
-  0x1b, 0x0a, 0x6d, 0x03, 0x32, 0x30, 0xa8, 0xc2, 0xc6, 0x66, 0xd7, 0xe6,
-  0x92, 0x46, 0x56, 0xe6, 0x46, 0x37, 0x25, 0x08, 0xaa, 0x90, 0xe1, 0xb9,
-  0xd8, 0x95, 0xc9, 0xcd, 0xa5, 0xbd, 0xb9, 0x4d, 0x09, 0x88, 0x26, 0x64,
-  0x78, 0x2e, 0x76, 0x61, 0x6c, 0x76, 0x65, 0x72, 0x53, 0x02, 0xa3, 0x0e,
-  0x19, 0x9e, 0xcb, 0x1c, 0x5a, 0x18, 0x59, 0x99, 0x5c, 0xd3, 0x1b, 0x59,
-  0x19, 0xdb, 0x94, 0x00, 0x29, 0x43, 0x86, 0xe7, 0x22, 0x57, 0x36, 0xf7,
-  0x56, 0x27, 0x37, 0x56, 0x36, 0x37, 0x25, 0xc0, 0xea, 0x90, 0xe1, 0xb9,
-  0x94, 0xb9, 0xd1, 0xc9, 0xe5, 0x41, 0xbd, 0xa5, 0xb9, 0xd1, 0xcd, 0x4d,
-  0x09, 0xc0, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00,
-  0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88,
-  0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73,
-  0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e,
-  0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30,
-  0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8,
-  0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b,
-  0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76,
-  0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e,
-  0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e,
-  0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61,
-  0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4,
-  0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76,
-  0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37,
-  0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76,
-  0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71,
-  0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e,
-  0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1,
-  0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61,
-  0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90,
-  0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8,
-  0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc,
-  0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4,
-  0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87, 0x76, 0x80, 0x87, 0x19,
-  0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20, 0x0e, 0xe7, 0xe0, 0x06,
-  0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f,
-  0xf4, 0x30, 0x83, 0x81, 0xc8, 0x01, 0x1f, 0xdc, 0x40, 0x1c, 0xe4, 0xa1,
-  0x1c, 0xc2, 0x61, 0x1d, 0xdc, 0x40, 0x1c, 0xe4, 0x01, 0x00, 0x00, 0x00,
-  0x71, 0x20, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x06, 0xa0, 0x80, 0x11,
-  0x32, 0xb0, 0x00, 0xf3, 0x2c, 0x84, 0x11, 0x40, 0xc3, 0xe5, 0x3b, 0x8f,
-  0x1f, 0x20, 0x0d, 0x10, 0x61, 0x7e, 0x71, 0xdb, 0x76, 0xb0, 0x0d, 0x97,
-  0xef, 0x3c, 0xbe, 0x10, 0x50, 0x45, 0x41, 0x44, 0xa5, 0x03, 0x0c, 0x25,
-  0x61, 0x00, 0x02, 0xe6, 0x23, 0xb7, 0x6d, 0x08, 0xd2, 0x70, 0xf9, 0xce,
-  0xe3, 0x0b, 0x11, 0x01, 0x4c, 0x44, 0x08, 0x34, 0xc3, 0x42, 0x58, 0x81,
-  0x33, 0x5c, 0xbe, 0xf3, 0xf8, 0x83, 0x33, 0xe1, 0x7e, 0x71, 0xdb, 0xa6,
-  0x40, 0x0d, 0x97, 0xef, 0x3c, 0x3e, 0x03, 0x28, 0x44, 0xe7, 0x50, 0xc1,
-  0x42, 0xf8, 0x85, 0x8e, 0x5b, 0xc2, 0x35, 0x5c, 0xbe, 0xf3, 0xf8, 0x11,
-  0x60, 0x6d, 0x54, 0x51, 0x10, 0x51, 0xe9, 0x00, 0x83, 0x8f, 0xdc, 0xb6,
-  0x2d, 0x60, 0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x01, 0xd6, 0x46, 0x15, 0x05,
-  0x11, 0xb1, 0x93, 0x13, 0x11, 0x3e, 0x72, 0xdb, 0xc6, 0x50, 0x0d, 0x97,
-  0xef, 0x3c, 0xbe, 0xf4, 0x2c, 0xc8, 0xd4, 0x39, 0x54, 0xb0, 0x10, 0x7e,
-  0xa1, 0xe3, 0x36, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xfe, 0x44, 0x44, 0x13,
-  0x02, 0x44, 0x98, 0x5f, 0xdc, 0xb6, 0x19, 0x48, 0xc3, 0xe5, 0x3b, 0x8f,
-  0x3f, 0x11, 0xd1, 0x84, 0x00, 0x11, 0xe6, 0x23, 0xb7, 0x6d, 0x01, 0xd2,
-  0x70, 0xf9, 0xce, 0xe3, 0x4f, 0x47, 0x44, 0x00, 0x83, 0x38, 0xf8, 0xc8,
-  0x6d, 0x9b, 0xc0, 0x33, 0x5c, 0xbe, 0xf3, 0xf8, 0x54, 0x03, 0x44, 0x98,
-  0x5f, 0xdc, 0x36, 0x00, 0x61, 0x20, 0x00, 0x00, 0x2e, 0x14, 0x00, 0x00,
-  0x13, 0x04, 0x24, 0x14, 0x0b, 0x04, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00,
-  0x34, 0x14, 0x58, 0xd9, 0x95, 0xa5, 0x40, 0x29, 0x07, 0xd4, 0x40, 0x19,
-  0x15, 0x52, 0x71, 0x15, 0xdc, 0x0c, 0x40, 0xc9, 0x95, 0x4d, 0xb1, 0x14,
-  0x73, 0x40, 0x61, 0x0a, 0x14, 0x4d, 0xe9, 0x06, 0x94, 0x43, 0x29, 0x90,
-  0x54, 0x04, 0x25, 0x50, 0x06, 0x64, 0x8c, 0x11, 0x80, 0x20, 0x08, 0xd2,
-  0xdf, 0x18, 0x01, 0x08, 0x82, 0x20, 0xff, 0x8d, 0x11, 0x80, 0x20, 0x08,
-  0xe2, 0xbf, 0x30, 0x46, 0x00, 0x82, 0x20, 0x18, 0x82, 0xc3, 0x18, 0x01,
-  0x08, 0x82, 0xa0, 0xfe, 0x8d, 0x11, 0x80, 0x20, 0x08, 0xea, 0xbf, 0x30,
-  0x46, 0x00, 0x82, 0x20, 0x08, 0x7f, 0x63, 0x04, 0x20, 0x08, 0x82, 0xf0,
-  0x2f, 0x8c, 0x11, 0x80, 0x20, 0x08, 0x82, 0x60, 0x30, 0x46, 0x00, 0x82,
-  0x20, 0x48, 0xff, 0xc2, 0x18, 0x01, 0x08, 0x82, 0x20, 0xfe, 0x8d, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0x80, 0xd1,
-  0x01, 0xe6, 0xb8, 0x81, 0x1b, 0x98, 0xc1, 0x88, 0x41, 0x02, 0x80, 0x20,
-  0x18, 0x60, 0x75, 0x90, 0x3d, 0x70, 0x00, 0x07, 0x67, 0x30, 0x62, 0x90,
-  0x00, 0x20, 0x08, 0x06, 0x98, 0x1d, 0x68, 0x90, 0x1b, 0xb8, 0x01, 0x1a,
-  0x8c, 0x18, 0x24, 0x00, 0x08, 0x82, 0x01, 0x76, 0x07, 0x9b, 0xf4, 0x06,
-  0x6f, 0x90, 0x06, 0x23, 0x06, 0x06, 0x00, 0x82, 0x60, 0x40, 0xfc, 0x81,
-  0x05, 0x07, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x50, 0xe9, 0x81, 0x19,
-  0x08, 0x71, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c, 0x30, 0x9a, 0x30, 0x04,
-  0xc3, 0x0d, 0x42, 0x40, 0x06, 0xb3, 0x0c, 0xc1, 0x08, 0x05, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x50, 0xfd, 0xc1, 0x1a, 0x1c, 0x79, 0x30, 0x9a,
-  0x10, 0x0c, 0x17, 0x18, 0x35, 0x9a, 0x30, 0x08, 0x17, 0x18, 0x35, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x15, 0x29, 0xc0, 0x01, 0x03, 0x06, 0xa3,
-  0x09, 0x01, 0x30, 0xdc, 0x10, 0xf4, 0x01, 0x18, 0x4c, 0x37, 0x5c, 0x53,
-  0x30, 0xdd, 0x80, 0x75, 0x42, 0x21, 0x01, 0x4c, 0x37, 0x68, 0x1f, 0x51,
-  0x48, 0x00, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x50, 0xb1, 0x02, 0x1e,
-  0x50, 0x67, 0x30, 0x9a, 0x10, 0x04, 0xa3, 0x09, 0x82, 0x30, 0x9a, 0x30,
-  0x0c, 0x15, 0x08, 0x52, 0x03, 0x21, 0x15, 0x0c, 0x52, 0x57, 0x30, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0x50, 0xd1, 0x02, 0x28, 0x70, 0xac, 0x30,
-  0x9a, 0x10, 0x00, 0x15, 0x0c, 0x52, 0x5b, 0x10, 0x15, 0x20, 0x33, 0x9a,
-  0x50, 0x04, 0x15, 0x08, 0x52, 0x44, 0x10, 0x15, 0x34, 0x33, 0x9a, 0x90,
-  0x08, 0x15, 0x08, 0x52, 0x44, 0x10, 0xd7, 0x18, 0x75, 0x85, 0x51, 0x37,
-  0x18, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xd5, 0x38, 0xbc, 0xc2,
-  0x1a, 0xd8, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2,
-  0x20, 0x8c, 0x26, 0x10, 0xc3, 0x11, 0x46, 0x1d, 0x61, 0xd4, 0x11, 0x46,
-  0x1d, 0x61, 0xd4, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x34, 0xee, 0x10,
-  0x0b, 0xcc, 0xa2, 0xe0, 0x01, 0x31, 0x08, 0x81, 0x09, 0x01, 0x7c, 0x4e,
-  0x18, 0x66, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0x76, 0xd0, 0x85,
-  0x3c, 0x08, 0xcc, 0xe1, 0x14, 0xc8, 0x61, 0x34, 0x21, 0x00, 0x46, 0x13,
-  0x84, 0x60, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc, 0x77, 0xe0, 0x05,
-  0x21, 0xb8, 0xc0, 0xb8, 0x3b, 0x86, 0x19, 0x31, 0x50, 0x00, 0x10, 0x04,
-  0x83, 0x6d, 0x1e, 0xc0, 0xe1, 0x0f, 0x02, 0x76, 0x68, 0x05, 0x75, 0x18,
-  0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x03, 0xaf, 0x1e, 0xc4, 0x41, 0x08, 0x2e, 0x30, 0x6e, 0xb8, 0xa1, 0x0e,
-  0xe8, 0x01, 0x0c, 0x0c, 0x41, 0x05, 0xf8, 0xd8, 0x90, 0x0a, 0xf0, 0x99,
-  0x65, 0x10, 0x86, 0xc1, 0x04, 0x57, 0x90, 0x8f, 0x09, 0xaf, 0x20, 0x1f,
-  0xf3, 0x03, 0x5a, 0x80, 0x8f, 0xf5, 0x41, 0x2d, 0xc0, 0xc7, 0x08, 0x41,
-  0x3e, 0x46, 0x08, 0xf2, 0x99, 0x25, 0x20, 0x4c, 0x14, 0x10, 0xf9, 0x18,
-  0x12, 0x0a, 0xf2, 0x31, 0x41, 0x17, 0xe0, 0x63, 0xc2, 0x2e, 0xc0, 0xc7,
-  0x04, 0x5c, 0x90, 0x8f, 0x09, 0xb9, 0x20, 0x9f, 0x59, 0x02, 0x62, 0xa0,
-  0xc2, 0x80, 0x04, 0x62, 0x18, 0xa8, 0x30, 0x20, 0x81, 0x18, 0x46, 0x13,
-  0x62, 0x41, 0x18, 0x6e, 0x08, 0x4c, 0x02, 0x0c, 0x66, 0x19, 0x0a, 0x23,
-  0x18, 0x31, 0x30, 0x00, 0x10, 0x04, 0x83, 0x43, 0x26, 0xd6, 0x81, 0x18,
-  0x31, 0x30, 0x00, 0x10, 0x04, 0x83, 0x63, 0x26, 0xd8, 0x81, 0x98, 0x25,
-  0x30, 0x06, 0x2a, 0x0c, 0xa2, 0x60, 0x88, 0x81, 0x0a, 0x83, 0x28, 0x18,
-  0x62, 0x38, 0x42, 0x50, 0x05, 0xe2, 0x1b, 0x8e, 0x18, 0x52, 0x41, 0xf8,
-  0x4a, 0x08, 0x76, 0x38, 0x82, 0x68, 0x05, 0xe2, 0x2b, 0x21, 0xd8, 0xe1,
-  0x08, 0x63, 0x15, 0x84, 0xaf, 0x02, 0x61, 0x67, 0x19, 0x0e, 0x2d, 0x18,
-  0x4d, 0xf0, 0x85, 0x61, 0xb8, 0x21, 0x98, 0x09, 0x30, 0x98, 0x65, 0x40,
-  0x92, 0xa0, 0x74, 0x61, 0x24, 0xe0, 0x02, 0xa3, 0x46, 0x0c, 0x0e, 0x00,
-  0x04, 0xc1, 0x60, 0xf9, 0x09, 0x92, 0x68, 0xd0, 0x61, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0x16, 0xb0, 0x20, 0x89, 0x40, 0x28, 0x5e, 0x38, 0x09,
-  0xb8, 0xc0, 0xa8, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0xc6, 0x02,
-  0x25, 0x20, 0x76, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x85, 0x2c,
-  0x50, 0x22, 0x10, 0x66, 0x09, 0xb4, 0xe1, 0x06, 0x65, 0x27, 0xc0, 0x60,
-  0x96, 0x41, 0xd1, 0x02, 0xd3, 0x05, 0x5e, 0x88, 0xcf, 0x2c, 0xc3, 0xe2,
-  0x4c, 0xd6, 0x0b, 0x55, 0x7c, 0x2c, 0x10, 0xe8, 0x73, 0xc1, 0x30, 0x17,
-  0x18, 0x65, 0x41, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x41, 0x16, 0x3a,
-  0xdc, 0x10, 0x88, 0x05, 0x18, 0xcc, 0x32, 0x30, 0x4d, 0x60, 0x43, 0x39,
-  0xc0, 0x67, 0x96, 0x40, 0x32, 0x72, 0x20, 0xe2, 0x33, 0x4b, 0x20, 0xcd,
-  0x32, 0x3c, 0x12, 0x67, 0x5f, 0x39, 0xc4, 0xc7, 0x02, 0x86, 0x3e, 0x17,
-  0x0c, 0x73, 0x81, 0x51, 0x16, 0x3c, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11,
-  0x6e, 0xa1, 0xc3, 0x0d, 0x01, 0x5b, 0x80, 0xc1, 0x2c, 0x03, 0x14, 0x05,
-  0xd6, 0x0e, 0x43, 0x7c, 0x66, 0x09, 0x24, 0x23, 0xe0, 0x01, 0x3e, 0xb3,
-  0x04, 0xd2, 0x40, 0x8b, 0x81, 0x31, 0x56, 0x43, 0x40, 0x42, 0x24, 0x0b,
-  0x8e, 0xb9, 0x83, 0x3c, 0xc4, 0x67, 0x96, 0x61, 0xb2, 0xcc, 0xc0, 0xe6,
-  0x41, 0x0d, 0xe2, 0x63, 0x81, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28,
-  0x0b, 0x0a, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0xbd, 0xd0, 0xe1, 0x86,
-  0x00, 0x2f, 0xc0, 0x60, 0x96, 0x81, 0xaa, 0x02, 0x1b, 0xf6, 0x01, 0x3e,
-  0xb3, 0x04, 0x9a, 0xe1, 0x03, 0x11, 0x9f, 0x59, 0x02, 0x6d, 0x96, 0xe1,
-  0xd2, 0xdc, 0xc0, 0xe8, 0x20, 0x1f, 0xe2, 0x63, 0x01, 0x43, 0x9f, 0x0b,
-  0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x1e, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08,
-  0xd2, 0xd0, 0xe1, 0x86, 0x40, 0x34, 0xc0, 0x60, 0x96, 0x01, 0xcb, 0x02,
-  0x0b, 0x89, 0x21, 0x3e, 0xb3, 0x04, 0x9a, 0x11, 0x26, 0x01, 0x9f, 0x59,
-  0x02, 0x6d, 0xa0, 0xc8, 0x10, 0x07, 0xc4, 0x1f, 0x12, 0x7f, 0x30, 0xd8,
-  0x20, 0x63, 0x03, 0x8c, 0x0d, 0x2c, 0x36, 0xa8, 0xd8, 0x80, 0x1a, 0x28,
-  0x32, 0x78, 0x01, 0xf1, 0x87, 0xc4, 0x1f, 0x0c, 0x22, 0x33, 0x30, 0x7f,
-  0xb0, 0xb0, 0x4a, 0xa3, 0x0e, 0x1f, 0x8c, 0x9a, 0x65, 0xd8, 0xe6, 0xa0,
-  0x14, 0x46, 0x13, 0x6e, 0x62, 0x18, 0x6e, 0x08, 0x52, 0x03, 0x0c, 0x66,
-  0x19, 0x38, 0x2f, 0x18, 0x8e, 0x28, 0x7e, 0x62, 0xf8, 0xce, 0x18, 0x66,
-  0xb8, 0x21, 0xa8, 0x09, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x08, 0x64, 0x2c,
-  0x86, 0xaf, 0x02, 0x41, 0x4f, 0x19, 0x66, 0xb8, 0x21, 0xc0, 0x09, 0x32,
-  0xa8, 0x60, 0xd0, 0x59, 0x86, 0x4e, 0x0e, 0x82, 0xe3, 0x87, 0x61, 0xae,
-  0x19, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x2a, 0xdf, 0x50, 0x0d,
-  0xb3, 0xb8, 0x8d, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84,
-  0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0xa7, 0x3c, 0x62, 0xe3, 0x20, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0x78, 0xcc, 0x43, 0x36, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x83, 0xe7, 0x3c, 0x66, 0x43, 0x22, 0x82, 0x11, 0x03, 0x05,
-  0x00, 0x41, 0x30, 0xd8, 0xca, 0x43, 0x36, 0xe0, 0x22, 0xf0, 0x8d, 0xbf,
-  0xe0, 0x8d, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0x11, 0x83, 0x03,
-  0x00, 0x41, 0x30, 0xf0, 0xce, 0x83, 0x36, 0x84, 0xe0, 0x02, 0xe3, 0x66,
-  0x09, 0xe4, 0x60, 0xb8, 0x61, 0x33, 0x0f, 0x30, 0x98, 0x65, 0xf8, 0xc0,
-  0x20, 0xa8, 0xb6, 0xb0, 0x0d, 0xb8, 0xc0, 0xa8, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0x58, 0xe4, 0xe3, 0x36, 0xc2, 0x60, 0x2f, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0x60, 0x99, 0x8f, 0xdb, 0x08, 0x84, 0x0b, 0x86, 0x29,
-  0xb8, 0xd8, 0x0d, 0xb8, 0xc0, 0xa8, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30,
-  0x58, 0xee, 0x83, 0x37, 0xca, 0x00, 0x34, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x60, 0xc1, 0x0f, 0xde, 0x08, 0x84, 0x0b, 0x86, 0xb9, 0xc0, 0xa8,
-  0x3b, 0x8c, 0xba, 0x9c, 0x18, 0xe6, 0xd4, 0x60, 0x98, 0x23, 0x86, 0x39,
-  0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa8, 0xfa, 0x23, 0x3d,
-  0x4a, 0xc3, 0x3e, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13,
-  0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40,
-  0x10, 0x0c, 0x1e, 0x12, 0x81, 0x8f, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00,
-  0x04, 0xc1, 0xe0, 0x29, 0x91, 0xf8, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0x1e, 0x13, 0x91, 0x8f, 0x84, 0x08, 0x46, 0x0c, 0x14,
-  0x00, 0x04, 0xc1, 0x60, 0x23, 0x91, 0xf8, 0x78, 0x8d, 0xa0, 0x3f, 0x7c,
-  0x63, 0x3f, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0xc0, 0x33, 0x91, 0xf9, 0x10, 0x82, 0x0b, 0x8c, 0x9b,
-  0x25, 0x90, 0x83, 0xe1, 0x06, 0x3d, 0x20, 0x11, 0x30, 0x98, 0x65, 0x08,
-  0x03, 0x39, 0x08, 0x6c, 0x34, 0x4a, 0x23, 0x3e, 0xc3, 0x11, 0x7f, 0x60,
-  0x1a, 0xc4, 0x37, 0xcb, 0x20, 0x06, 0x65, 0x10, 0xd8, 0x69, 0x80, 0x42,
-  0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65, 0x81, 0x21,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xe1, 0x22, 0x3a, 0xdc, 0x10, 0xb0, 0x08,
-  0x18, 0xcc, 0x32, 0x8c, 0x01, 0x19, 0x04, 0x36, 0xbc, 0x06, 0x7c, 0x66,
-  0x09, 0xd2, 0xc0, 0x5c, 0x83, 0x88, 0xcf, 0x2c, 0x41, 0x1a, 0x0c, 0x47,
-  0xa8, 0xc2, 0x6b, 0x08, 0xdf, 0x2c, 0x83, 0x19, 0xa4, 0x41, 0x60, 0xab,
-  0x00, 0x1b, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94,
-  0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0x8e, 0xe8, 0x70, 0x43,
-  0x70, 0x23, 0x60, 0x30, 0xcb, 0x70, 0x06, 0x68, 0x10, 0x18, 0x6e, 0x0c,
-  0xf1, 0x99, 0x25, 0x48, 0x03, 0x23, 0x76, 0x03, 0x3e, 0xb3, 0x04, 0x69,
-  0x30, 0xd0, 0x62, 0x68, 0x63, 0x80, 0x91, 0x01, 0x71, 0x06, 0x02, 0x1a,
-  0xa8, 0x45, 0x19, 0x5c, 0x30, 0x8c, 0xe9, 0x86, 0x6f, 0xc4, 0x67, 0x38,
-  0xe2, 0x16, 0x7e, 0x83, 0xf8, 0x66, 0x19, 0xd4, 0xa0, 0x0d, 0x02, 0x03,
-  0x0f, 0x5c, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3,
-  0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xce, 0x44, 0x87, 0x1b,
-  0x82, 0x32, 0x01, 0x83, 0x59, 0x86, 0x35, 0x60, 0x83, 0xc0, 0x06, 0xf4,
-  0x80, 0xcf, 0x2c, 0x41, 0x1c, 0x58, 0x79, 0x10, 0xf1, 0x99, 0x25, 0x88,
-  0x83, 0xe1, 0x08, 0x71, 0x30, 0x0f, 0xe1, 0x9b, 0x65, 0x70, 0x83, 0x38,
-  0x08, 0x6c, 0x1c, 0xce, 0x23, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98,
-  0x0b, 0x8c, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x90, 0x13,
-  0x1d, 0x6e, 0x08, 0xe0, 0x04, 0x0c, 0x66, 0x19, 0xde, 0x00, 0x0e, 0x02,
-  0x7b, 0x8f, 0x21, 0x3e, 0xb3, 0x04, 0x71, 0x60, 0x04, 0x7d, 0xc0, 0x67,
-  0x96, 0x20, 0x0e, 0x06, 0x5a, 0x0c, 0x6d, 0x0d, 0x30, 0x36, 0x20, 0xde,
-  0x40, 0x80, 0x03, 0xdd, 0x68, 0x83, 0x0b, 0x86, 0xb9, 0xc0, 0xa8, 0xdb,
-  0x8c, 0x3a, 0xf0, 0x18, 0xe6, 0xe2, 0x61, 0x98, 0x23, 0x86, 0x39, 0x62,
-  0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa8, 0x48, 0x05, 0x4e, 0x58,
-  0xa4, 0x4f, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06,
-  0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10,
-  0x0c, 0x9e, 0x55, 0xb9, 0x93, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04,
-  0xc1, 0xe0, 0x61, 0x15, 0x3c, 0x49, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40,
-  0x10, 0x0c, 0x9e, 0x56, 0xc9, 0x93, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00,
-  0x04, 0xc1, 0x60, 0x5b, 0x15, 0x3c, 0xb1, 0x91, 0x80, 0x54, 0xca, 0x44,
-  0x54, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x0c, 0x0e, 0x00,
-  0x04, 0xc1, 0xc0, 0x6b, 0x15, 0x3d, 0x11, 0x82, 0x0b, 0x8c, 0x9b, 0x25,
-  0x90, 0x83, 0x81, 0x16, 0xc3, 0x35, 0x3a, 0x3e, 0xe2, 0x60, 0xe2, 0x13,
-  0xe2, 0x80, 0x8f, 0xc0, 0xe0, 0x82, 0x0e, 0x47, 0x0c, 0x0c, 0x00, 0x04,
-  0xc1, 0x60, 0x0c, 0x5e, 0x85, 0x4e, 0x82, 0xd1, 0x84, 0x00, 0x18, 0x4d,
-  0x10, 0x82, 0x11, 0x03, 0x07, 0x00, 0x41, 0x30, 0x00, 0x83, 0x59, 0xb1,
-  0x13, 0x31, 0xd9, 0x11, 0x57, 0x11, 0x82, 0x3d, 0xd9, 0x93, 0x37, 0x61,
-  0x95, 0x59, 0x82, 0x11, 0x1a, 0x6e, 0x60, 0x0d, 0x57, 0x01, 0x83, 0x59,
-  0x06, 0x3a, 0x88, 0x89, 0x60, 0xc4, 0xc0, 0x00, 0x40, 0x10, 0x0c, 0x8e,
-  0x5e, 0xd1, 0x13, 0x95, 0x18, 0x31, 0x30, 0x00, 0x10, 0x04, 0x83, 0xc3,
-  0x57, 0xf6, 0x44, 0x25, 0x4c, 0x38, 0x13, 0xf8, 0x98, 0x80, 0x26, 0xf0,
-  0x19, 0x4d, 0x10, 0x93, 0x61, 0xb8, 0x21, 0xa0, 0x15, 0x30, 0x98, 0x65,
-  0xa8, 0x83, 0x3b, 0x08, 0x86, 0x23, 0x0c, 0x35, 0x19, 0xbe, 0x3b, 0x86,
-  0x19, 0x6e, 0x08, 0xc0, 0x84, 0x0c, 0x6a, 0x08, 0x74, 0x38, 0x22, 0x71,
-  0x93, 0xe1, 0xab, 0x40, 0xd0, 0x5b, 0x86, 0x19, 0x6e, 0x08, 0xc6, 0x84,
-  0x0c, 0x2a, 0x18, 0x74, 0x96, 0xc1, 0x0e, 0x56, 0x21, 0xb8, 0x13, 0x19,
-  0xe6, 0x70, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa8, 0xd2,
-  0xa5, 0x56, 0xe2, 0x44, 0x5c, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
-  0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x38, 0x64, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0x1e, 0x78, 0xe1, 0x95, 0x83, 0x08, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0xe0, 0x89, 0x97, 0x5e, 0x61, 0x88, 0x60, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0x1e, 0x79, 0xf1, 0x15, 0x89, 0x08, 0x46,
-  0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x83, 0x97, 0x5e, 0xd9, 0x93, 0x20,
-  0x5d, 0x54, 0xe5, 0x5c, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xc0, 0x93, 0x97, 0x5f, 0x11, 0x82, 0x0b,
-  0x8c, 0x9b, 0x25, 0x58, 0x85, 0xe1, 0x86, 0x2d, 0x5e, 0xc0, 0x60, 0x96,
-  0x01, 0x0f, 0xf2, 0x20, 0x28, 0x3c, 0x09, 0x17, 0xb8, 0xc0, 0xa8, 0x11,
-  0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0xfa, 0x45, 0x5c, 0xc4, 0xc0, 0x54,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xf1, 0x17, 0x71, 0x09, 0x84,
-  0x0b, 0x86, 0xa9, 0x3d, 0x31, 0x17, 0xb8, 0xc0, 0xa8, 0x11, 0x83, 0x03,
-  0x00, 0x41, 0x30, 0x58, 0x44, 0xe6, 0x5c, 0xcc, 0x60, 0x55, 0x46, 0x0c,
-  0x0e, 0x00, 0x04, 0xc1, 0x60, 0x19, 0x99, 0x73, 0x09, 0x84, 0x0b, 0x86,
-  0xb9, 0xc0, 0xa8, 0x3b, 0x8c, 0x3a, 0x32, 0x19, 0xe6, 0xea, 0x62, 0x98,
-  0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa8,
-  0x50, 0x86, 0x5e, 0x60, 0x25, 0x64, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41,
-  0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0x9e, 0x97, 0xd9, 0x97, 0x84, 0x08, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x81, 0x19, 0x7e, 0x49, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x9e, 0x98, 0xe9, 0x97, 0x84, 0x08,
-  0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x7b, 0x19, 0x7e, 0xd1, 0x95,
-  0x00, 0x65, 0xd2, 0xc5, 0x64, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xc0, 0x8b, 0x19, 0x7f, 0x11, 0x82,
-  0x0b, 0x8c, 0x9b, 0x25, 0x58, 0x85, 0xe1, 0x06, 0x3d, 0x78, 0x19, 0x30,
-  0x98, 0x65, 0xd0, 0x83, 0x55, 0x08, 0xcc, 0x55, 0x60, 0x25, 0x3e, 0xc3,
-  0x11, 0xa0, 0x10, 0x2b, 0xc4, 0x37, 0xcb, 0xb0, 0x07, 0x7e, 0x10, 0x98,
-  0xac, 0x84, 0x42, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18,
-  0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x91, 0x33, 0x3a, 0xdc,
-  0x10, 0xdc, 0x0c, 0x18, 0xcc, 0x32, 0xf0, 0x41, 0x1f, 0x04, 0x36, 0xe8,
-  0x0a, 0x7c, 0x66, 0x09, 0x44, 0xc1, 0x72, 0x85, 0x88, 0xcf, 0x2c, 0x81,
-  0x28, 0x0c, 0x47, 0xac, 0x82, 0xae, 0x08, 0xdf, 0x2c, 0xc3, 0x1f, 0x88,
-  0x42, 0x60, 0xac, 0xb0, 0x2b, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3,
-  0x5c, 0x60, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0xd9,
-  0xe8, 0x70, 0x43, 0x20, 0x36, 0x60, 0x30, 0xcb, 0x00, 0x0a, 0xa1, 0x10,
-  0xd8, 0xb8, 0x0c, 0xf1, 0x99, 0x25, 0x10, 0x05, 0x23, 0xcc, 0x05, 0x3e,
-  0xb3, 0x04, 0xa2, 0x30, 0xd0, 0x62, 0x68, 0x7c, 0x80, 0xf5, 0x01, 0x01,
-  0x0a, 0x42, 0x28, 0xb0, 0x85, 0x1f, 0x5c, 0x30, 0x8c, 0x95, 0x4b, 0xba,
-  0xc4, 0x67, 0x38, 0x02, 0x17, 0xd4, 0x85, 0xf8, 0x66, 0x19, 0x46, 0xc1,
-  0x14, 0x02, 0x5b, 0x97, 0x5c, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18,
-  0xe6, 0x02, 0xa3, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xe4,
-  0x46, 0x87, 0x1b, 0x02, 0xb8, 0x01, 0x83, 0x59, 0x06, 0x52, 0x28, 0x85,
-  0xc0, 0x86, 0x79, 0x81, 0xcf, 0x2c, 0x81, 0x2a, 0x18, 0xbc, 0x10, 0xf1,
-  0x99, 0x25, 0x50, 0x85, 0xe1, 0x88, 0x71, 0x88, 0x17, 0xe1, 0x9b, 0x65,
-  0x38, 0x05, 0x55, 0x08, 0x8c, 0x1c, 0xe4, 0x25, 0x3e, 0x16, 0x38, 0xf4,
-  0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c,
-  0x8a, 0xe8, 0x1b, 0x1d, 0x6e, 0x08, 0xf6, 0x06, 0x0c, 0x66, 0x19, 0x50,
-  0x21, 0x15, 0x02, 0xd3, 0x97, 0x21, 0x3e, 0xb3, 0x04, 0xaa, 0x60, 0xc4,
-  0xbf, 0xc0, 0x67, 0x96, 0x40, 0x15, 0x06, 0x5a, 0x0c, 0x8d, 0x14, 0xb0,
-  0x52, 0x20, 0x50, 0x41, 0x48, 0x05, 0xde, 0x30, 0x85, 0x0b, 0x86, 0xb9,
-  0xc0, 0xa8, 0xdb, 0x8c, 0xba, 0x75, 0x19, 0xe6, 0xf8, 0x63, 0x98, 0x23,
-  0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa8, 0x5e,
-  0x67, 0x6f, 0x6e, 0x06, 0x75, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
-  0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0x1e, 0xdb, 0x11, 0x9d, 0x84, 0x08, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0xe0, 0xb9, 0x9d, 0xd1, 0x49, 0x88, 0x60, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0x1e, 0xdc, 0x21, 0x9d, 0x84, 0x08, 0x46,
-  0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0xb3, 0x9d, 0xd1, 0x09, 0x9b, 0xe0,
-  0x75, 0xe0, 0xa6, 0x75, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xc0, 0xc3, 0x9d, 0xd2, 0x11, 0x82, 0x0b,
-  0x8c, 0x9b, 0x25, 0x58, 0x85, 0x81, 0x16, 0xc3, 0x35, 0xec, 0x40, 0xd6,
-  0xea, 0x00, 0x26, 0xf0, 0x40, 0x50, 0x05, 0x59, 0xcb, 0x83, 0x59, 0x06,
-  0x56, 0x70, 0x85, 0x90, 0x18, 0x8e, 0x20, 0x09, 0xb5, 0x19, 0xbe, 0x2b,
-  0x89, 0x61, 0x86, 0x1b, 0x02, 0xb0, 0x21, 0x83, 0x1a, 0x02, 0x1d, 0x8e,
-  0x48, 0x09, 0xb7, 0x19, 0xbe, 0x0a, 0x04, 0xbd, 0x95, 0x18, 0x66, 0xb8,
-  0x21, 0x18, 0x1b, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x86, 0x56, 0x10, 0x87,
-  0xe0, 0x4e, 0x66, 0x98, 0xc3, 0x91, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0xa0, 0x4a, 0x9f, 0xda, 0x89, 0x1b, 0xf1, 0x19, 0x4d, 0x08, 0x80,
-  0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2,
-  0x90, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0xe0, 0x87, 0x77, 0x0e,
-  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x27, 0x7e, 0x7a, 0x87,
-  0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0xe4, 0xc7, 0x77,
-  0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x0d, 0x7e, 0x7a,
-  0x67, 0x6f, 0x82, 0xf4, 0x51, 0x9d, 0xf3, 0x19, 0x4d, 0x08, 0x80, 0xd1,
-  0x04, 0x21, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x4f, 0x7e, 0x7e,
-  0x47, 0x08, 0x2e, 0x30, 0x6e, 0x96, 0x40, 0x1c, 0x86, 0x1b, 0x76, 0x22,
-  0x7e, 0xc0, 0x60, 0x96, 0xe1, 0x15, 0x60, 0x21, 0x28, 0xbc, 0x09, 0x1f,
-  0xb8, 0xc0, 0xa8, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0xfa, 0x47,
-  0x7c, 0xc0, 0xc2, 0x74, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xf1,
-  0x1f, 0xf1, 0x09, 0x84, 0x0b, 0x86, 0xa9, 0xbd, 0x31, 0x1f, 0xb8, 0xc0,
-  0xa8, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0x44, 0xe8, 0x7c, 0xcc,
-  0x62, 0x75, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x19, 0xa1, 0xf3,
-  0x09, 0x84, 0x0b, 0x86, 0xb9, 0xc0, 0xa8, 0x3b, 0x8c, 0x3a, 0xb2, 0x19,
-  0xe6, 0xea, 0x64, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03,
-  0x00, 0x41, 0x30, 0xa8, 0x50, 0x88, 0x7e, 0x60, 0x27, 0x84, 0x46, 0x13,
-  0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18,
-  0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x9e, 0x17, 0xda,
-  0x9f, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x81, 0x21,
-  0xfe, 0x49, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x9e, 0x18,
-  0xea, 0x9f, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x7b,
-  0x21, 0xfe, 0xd1, 0x9d, 0x00, 0x85, 0xd2, 0xc7, 0x84, 0x46, 0x13, 0x02,
-  0x60, 0x34, 0x41, 0x08, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xc0, 0x8b,
-  0x21, 0xff, 0x11, 0x82, 0x0b, 0x8c, 0x9b, 0x25, 0x10, 0x87, 0xe1, 0x06,
-  0xbd, 0x78, 0x21, 0x30, 0x98, 0x65, 0x88, 0x05, 0x71, 0x08, 0xcc, 0x75,
-  0x60, 0x27, 0x3e, 0xc3, 0x11, 0x7e, 0x11, 0x3b, 0xc4, 0x37, 0xcb, 0x20,
-  0x0b, 0xb5, 0x10, 0x98, 0xec, 0xfc, 0x45, 0x7c, 0x2c, 0x18, 0xe8, 0x73,
-  0xc1, 0x30, 0x17, 0x18, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14,
-  0x91, 0x43, 0x3a, 0xdc, 0x10, 0xdc, 0x10, 0x18, 0xcc, 0x32, 0xcc, 0x02,
-  0x2d, 0x04, 0x36, 0xe8, 0x0e, 0x7c, 0x66, 0x09, 0x72, 0xc1, 0x72, 0x87,
-  0x88, 0xcf, 0x2c, 0x41, 0x2e, 0x0c, 0x47, 0xa4, 0x86, 0xee, 0x08, 0xdf,
-  0x2c, 0x83, 0x2d, 0xe4, 0x42, 0x60, 0xaa, 0xb1, 0x3b, 0xf1, 0xb1, 0xc0,
-  0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08,
-  0xe2, 0x53, 0x04, 0x19, 0xe9, 0x70, 0x43, 0x20, 0x46, 0x60, 0x30, 0xcb,
-  0x70, 0x0b, 0xb8, 0x10, 0xd8, 0xf8, 0x0c, 0xf1, 0x99, 0x25, 0xc8, 0x05,
-  0x23, 0xcc, 0x07, 0x3e, 0xb3, 0x04, 0xb9, 0x30, 0xd0, 0x62, 0x68, 0xb3,
-  0x80, 0xd1, 0x02, 0x71, 0x0b, 0x02, 0x2e, 0xa0, 0x4d, 0x2d, 0x5c, 0x30,
-  0x8c, 0x95, 0x4f, 0xfa, 0xc4, 0x67, 0x38, 0x02, 0x37, 0xd4, 0x87, 0xf8,
-  0x66, 0x19, 0x74, 0xa1, 0x17, 0x02, 0x5b, 0x9f, 0xdc, 0x88, 0x8f, 0x05,
-  0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x30, 0xe4, 0x63, 0x45,
-  0x10, 0x9f, 0x22, 0xe4, 0x48, 0x87, 0x1b, 0x02, 0x38, 0x02, 0x83, 0x59,
-  0x86, 0x5d, 0xe0, 0x85, 0xc0, 0x86, 0xf9, 0x81, 0xcf, 0x2c, 0x41, 0x38,
-  0x18, 0xfc, 0x10, 0xf1, 0x99, 0x25, 0x08, 0x87, 0xe1, 0x88, 0xf1, 0x88,
-  0x1f, 0xe1, 0x9b, 0x65, 0xf0, 0x85, 0x70, 0x08, 0x8c, 0x3c, 0xe4, 0x27,
-  0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0x20, 0x92,
-  0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xe8, 0x23, 0x1d, 0x6e, 0x08, 0xf6, 0x08,
-  0x0c, 0x66, 0x19, 0x7e, 0x01, 0x1c, 0x02, 0xd3, 0x9f, 0x21, 0x3e, 0xb3,
-  0x04, 0xe1, 0x60, 0xc4, 0xff, 0xc0, 0x67, 0x96, 0x20, 0x1c, 0x06, 0x5a,
-  0x0c, 0x6d, 0x17, 0x30, 0x5e, 0x20, 0x7e, 0x41, 0x00, 0x07, 0xde, 0xe9,
-  0x85, 0x0b, 0x86, 0xb9, 0xc0, 0xa8, 0xdb, 0x8c, 0xba, 0xf5, 0x19, 0xe6,
-  0xf8, 0x65, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0xa8, 0x5e, 0x69, 0x8f, 0x6e, 0x08, 0x95, 0x46, 0x13, 0x02,
-  0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a,
-  0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x1e, 0x5b, 0x12, 0xa5,
-  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xb9, 0xa5, 0x51,
-  0x4a, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x1e, 0x5c, 0x22,
-  0xa5, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0xb3, 0xa5,
-  0x51, 0x0a, 0xa3, 0xe0, 0x95, 0xe0, 0xa8, 0x95, 0x46, 0x13, 0x02, 0x60,
-  0x34, 0x41, 0x08, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xc0, 0xc3, 0xa5,
-  0x52, 0x12, 0x82, 0x0b, 0x8c, 0x9b, 0x25, 0x10, 0x87, 0x81, 0x16, 0xc3,
-  0x35, 0x5a, 0x41, 0x16, 0x03, 0x56, 0x80, 0x89, 0x57, 0x10, 0xc2, 0x41,
-  0x16, 0x03, 0x58, 0x98, 0x65, 0x18, 0x87, 0x72, 0x08, 0x91, 0xe1, 0x08,
-  0x13, 0x51, 0xa3, 0xe1, 0xbb, 0x13, 0x19, 0x66, 0xb8, 0x21, 0x00, 0x23,
-  0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x13, 0x71, 0xa3, 0xe1, 0xab, 0x40,
-  0xd0, 0x4b, 0x91, 0x61, 0x86, 0x1b, 0x82, 0x31, 0x22, 0x83, 0x0a, 0x06,
-  0x9d, 0x65, 0x20, 0x87, 0x7c, 0x08, 0xee, 0x84, 0x86, 0x39, 0x9c, 0x19,
-  0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xaa, 0x74, 0xaa, 0xa5, 0x38,
-  0x12, 0xa7, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41,
-  0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x83, 0x07, 0x9e, 0x78, 0xe9, 0x20, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0x78, 0xe2, 0xa9, 0x97, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0x47, 0x9e, 0x7c, 0x49, 0x22, 0x82, 0x11, 0x03, 0x05, 0x00,
-  0x41, 0x30, 0xd8, 0xe0, 0xa9, 0x97, 0xf6, 0x28, 0x48, 0x27, 0x55, 0x3a,
-  0xa7, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0xf0, 0xe4, 0xe9, 0x97, 0x84, 0xe0, 0x02, 0xe3, 0x66, 0x09,
-  0xf2, 0x61, 0xb8, 0x61, 0x47, 0xe2, 0x09, 0x0c, 0x66, 0x19, 0xcc, 0xe1,
-  0x1c, 0x82, 0xc2, 0xa3, 0x70, 0x82, 0x0b, 0x8c, 0x1a, 0x31, 0x38, 0x00,
-  0x10, 0x04, 0x83, 0xa5, 0x9f, 0xc4, 0x49, 0x4c, 0x4c, 0x69, 0xc4, 0xe0,
-  0x00, 0x40, 0x10, 0x0c, 0x16, 0x7f, 0x12, 0xa7, 0x40, 0xb8, 0x60, 0x98,
-  0xda, 0x23, 0x73, 0x82, 0x0b, 0x8c, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x83, 0x45, 0xa4, 0xce, 0x89, 0x4c, 0x56, 0x69, 0xc4, 0xe0, 0x00, 0x40,
-  0x10, 0x0c, 0x96, 0x91, 0x3a, 0xa7, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x8c,
-  0xba, 0xc3, 0xa8, 0x23, 0xa3, 0x61, 0xae, 0x6e, 0x86, 0x39, 0x62, 0x98,
-  0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x0a, 0xa5, 0xe8,
-  0x09, 0x96, 0x42, 0x6a, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34,
-  0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00,
-  0x04, 0xc1, 0xe0, 0x79, 0xa9, 0x7d, 0x4a, 0x88, 0x60, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0x1e, 0x98, 0xe2, 0xa7, 0x84, 0x08, 0x46, 0x0c, 0x10,
-  0x00, 0x04, 0xc1, 0xe0, 0x89, 0xa9, 0x7e, 0x4a, 0x88, 0x60, 0xc4, 0x40,
-  0x01, 0x40, 0x10, 0x0c, 0xb6, 0x97, 0xe2, 0x27, 0x5d, 0x0a, 0x50, 0x2a,
-  0x9d, 0x4c, 0x6a, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0xc4, 0xe0,
-  0x00, 0x40, 0x10, 0x0c, 0xbc, 0x98, 0xf2, 0x27, 0x21, 0xb8, 0xc0, 0xb8,
-  0x59, 0x82, 0x7c, 0x18, 0x6e, 0xd0, 0x93, 0x97, 0x02, 0x83, 0x59, 0x06,
-  0x74, 0xc8, 0x87, 0xc0, 0x5c, 0x09, 0x96, 0xe2, 0x33, 0x1c, 0x01, 0x2a,
-  0xb1, 0x44, 0x7c, 0xb3, 0x0c, 0xe9, 0xc0, 0x0e, 0x81, 0xc9, 0x52, 0xa8,
-  0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x16, 0x18,
-  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x39, 0xa5, 0xc3, 0x0d, 0xc1, 0x4d,
-  0x81, 0xc1, 0x2c, 0x83, 0x3a, 0xac, 0x43, 0x60, 0x83, 0x2e, 0xc1, 0x67,
-  0x96, 0x00, 0x1e, 0x2c, 0x97, 0x88, 0xf8, 0xcc, 0x12, 0xc0, 0xc3, 0x70,
-  0xc4, 0xaa, 0xe8, 0x92, 0xf0, 0xcd, 0x32, 0xb4, 0x03, 0x3c, 0x04, 0xc6,
-  0x2a, 0xbb, 0x14, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46,
-  0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x90, 0x95, 0x0e, 0x37,
-  0x04, 0x62, 0x05, 0x06, 0xb3, 0x0c, 0xee, 0xf0, 0x0e, 0x81, 0x8d, 0xd3,
-  0x10, 0x9f, 0x59, 0x02, 0x78, 0x30, 0xc2, 0x9c, 0xe0, 0x33, 0x4b, 0x00,
-  0x0f, 0x03, 0x2d, 0x86, 0xa6, 0x0e, 0xd8, 0x3a, 0x10, 0xee, 0x20, 0xbc,
-  0x03, 0x5b, 0xb1, 0xc3, 0x05, 0xc3, 0x58, 0x39, 0xa5, 0x53, 0x7c, 0x86,
-  0x23, 0x6c, 0x45, 0x9d, 0x88, 0x6f, 0x96, 0x21, 0x1e, 0xe8, 0x21, 0xb0,
-  0x75, 0xba, 0x95, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30,
-  0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x42, 0xae, 0x74, 0xb8,
-  0x21, 0x80, 0x2b, 0x30, 0x98, 0x65, 0x90, 0x87, 0x79, 0x08, 0x6c, 0x98,
-  0x27, 0xf8, 0xcc, 0x12, 0xe0, 0x83, 0xc1, 0x13, 0x11, 0x9f, 0x59, 0x02,
-  0x7c, 0x18, 0x8e, 0x08, 0x97, 0x78, 0x12, 0xbe, 0x59, 0x86, 0x7a, 0xc0,
-  0x87, 0xc0, 0xc4, 0x45, 0x9e, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86,
-  0xb9, 0xc0, 0x28, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0xbe,
-  0xd2, 0xe1, 0x86, 0x60, 0xaf, 0xc0, 0x60, 0x96, 0xc1, 0x1e, 0xee, 0x21,
-  0x30, 0x7d, 0x1a, 0xe2, 0x33, 0x4b, 0x80, 0x0f, 0x46, 0xfc, 0x13, 0x7c,
-  0x66, 0x09, 0xf0, 0x61, 0xa0, 0xc5, 0xd0, 0xe4, 0x01, 0x9b, 0x07, 0xc2,
-  0x1e, 0x84, 0x7b, 0xc0, 0x2d, 0x7a, 0xb8, 0x60, 0x98, 0x0b, 0x8c, 0xba,
-  0xcd, 0xa8, 0x5b, 0xa7, 0x61, 0x8e, 0x7f, 0x86, 0x39, 0x62, 0x98, 0x23,
-  0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0xea, 0xb5, 0xf6, 0xea,
-  0xa6, 0x50, 0x6b, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61,
-  0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04,
-  0xc1, 0xe0, 0xb1, 0x2d, 0xd1, 0x4a, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40,
-  0x10, 0x0c, 0x9e, 0xdb, 0x1a, 0xad, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00,
-  0x04, 0xc1, 0xe0, 0xc1, 0x2d, 0xd2, 0x4a, 0x88, 0x60, 0xc4, 0x40, 0x01,
-  0x40, 0x10, 0x0c, 0x36, 0xdb, 0x1a, 0xad, 0xb0, 0x0a, 0x5e, 0x0b, 0xae,
-  0x5a, 0x6b, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0x3c, 0xdc, 0x2a, 0x2d, 0x21, 0xb8, 0xc0, 0xb8, 0x59,
-  0x82, 0x7c, 0x18, 0x68, 0x31, 0x5c, 0x83, 0x1c, 0x64, 0x35, 0x18, 0x07,
-  0x98, 0x30, 0x07, 0x01, 0x1f, 0x64, 0x35, 0x38, 0x87, 0x59, 0x06, 0x7d,
-  0xe0, 0x87, 0x90, 0x19, 0x8e, 0x20, 0x19, 0xb5, 0x1a, 0xbe, 0x2b, 0x99,
-  0x61, 0x86, 0x1b, 0x02, 0xb0, 0x22, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0x38,
-  0x19, 0xb7, 0x1a, 0xbe, 0x0a, 0x04, 0xbd, 0x94, 0x19, 0x66, 0xb8, 0x21,
-  0x18, 0x2b, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x86, 0x7d, 0x80, 0x89, 0xe0,
-  0x4e, 0x6a, 0x98, 0xc3, 0xa1, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
-  0xa0, 0x4a, 0xaf, 0xda, 0x8a, 0x2b, 0xf1, 0x1a, 0x4d, 0x08, 0x80, 0xd1,
-  0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90,
-  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0xe0, 0x8b, 0xb7, 0x0e, 0x22,
-  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x27, 0xbe, 0x7a, 0x8b, 0x21,
-  0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0xe4, 0xcb, 0xb7, 0x24,
-  0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x0d, 0xbe, 0x7a, 0x6b,
-  0xaf, 0x82, 0xf4, 0x52, 0xad, 0xf3, 0x1a, 0x4d, 0x08, 0x80, 0xd1, 0x04,
-  0x21, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x4f, 0xbe, 0x7e, 0x4b,
-  0x08, 0x2e, 0x30, 0x6e, 0x96, 0x00, 0x26, 0x86, 0x1b, 0x76, 0x26, 0xbe,
-  0xc0, 0x60, 0x96, 0xa1, 0x1f, 0xfc, 0x21, 0x28, 0xbc, 0x0a, 0x2f, 0xb8,
-  0xc0, 0xa8, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0xfa, 0x4b, 0xbc,
-  0xc0, 0xc6, 0xb4, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xf1, 0x2f,
-  0xf1, 0x0a, 0x84, 0x0b, 0x86, 0xa9, 0xbd, 0x32, 0x2f, 0xb8, 0xc0, 0xa8,
-  0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0x44, 0xec, 0xbc, 0xc8, 0x66,
-  0xb5, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x19, 0xb1, 0xf3, 0x0a,
-  0x84, 0x0b, 0x86, 0xb9, 0xc0, 0xa8, 0x3b, 0x8c, 0x3a, 0xb2, 0x1a, 0xe6,
-  0xea, 0x68, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0xa8, 0x50, 0x8c, 0xbe, 0x60, 0x2b, 0xc4, 0x46, 0x13, 0x02,
-  0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a,
-  0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x9e, 0x17, 0xdb, 0xaf,
-  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x81, 0x31, 0xfe,
-  0x4a, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x9e, 0x18, 0xeb,
-  0xaf, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x7b, 0x31,
-  0xfe, 0xd2, 0xad, 0x00, 0xc5, 0xd2, 0xcb, 0xc4, 0x46, 0x13, 0x02, 0x60,
-  0x34, 0x41, 0x08, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xc0, 0x8b, 0x31,
-  0xff, 0x12, 0x82, 0x0b, 0x8c, 0x9b, 0x25, 0x80, 0x89, 0xe1, 0x06, 0xbd,
-  0x79, 0x31, 0x30, 0x98, 0x65, 0xf8, 0x07, 0x98, 0x08, 0xcc, 0xb5, 0x60,
-  0x2b, 0x3e, 0xc3, 0x11, 0x7e, 0x13, 0x5b, 0xc4, 0x37, 0xcb, 0x00, 0x12,
-  0x23, 0x11, 0x98, 0x6c, 0xfd, 0x4d, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1,
-  0x30, 0x17, 0x18, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x91,
-  0x63, 0x3a, 0xdc, 0x10, 0xdc, 0x18, 0x18, 0xcc, 0x32, 0x84, 0x84, 0x48,
-  0x04, 0x36, 0xe8, 0x16, 0x7c, 0x66, 0x09, 0x4e, 0xc2, 0x72, 0x8b, 0x88,
-  0xcf, 0x2c, 0xc1, 0x49, 0x0c, 0x47, 0xa4, 0x8e, 0x6e, 0x09, 0xdf, 0x2c,
-  0x03, 0x49, 0x9c, 0x44, 0x60, 0xaa, 0xb3, 0x5b, 0xf1, 0xb1, 0xc0, 0xa1,
-  0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2,
-  0x53, 0x04, 0x99, 0xe9, 0x70, 0x43, 0x20, 0x66, 0x60, 0x30, 0xcb, 0x50,
-  0x12, 0x26, 0x11, 0xd8, 0x78, 0x0d, 0xf1, 0x99, 0x25, 0x38, 0x09, 0x23,
-  0xcc, 0x0b, 0x3e, 0xb3, 0x04, 0x27, 0x31, 0xd0, 0x62, 0x68, 0x21, 0x81,
-  0x89, 0x04, 0x51, 0x12, 0x82, 0x49, 0xa0, 0xdd, 0x48, 0x5c, 0x30, 0x8c,
-  0x95, 0x57, 0x7a, 0xc5, 0x67, 0x38, 0xc2, 0x76, 0xd4, 0x8b, 0xf8, 0x66,
-  0x19, 0x50, 0x62, 0x25, 0x02, 0x5b, 0xaf, 0xdb, 0x89, 0x8f, 0x05, 0x03,
-  0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10,
-  0x9f, 0x22, 0xe4, 0x4c, 0x87, 0x1b, 0x02, 0x38, 0x03, 0x83, 0x59, 0x86,
-  0x94, 0x50, 0x89, 0xc0, 0x86, 0xf9, 0x82, 0xcf, 0x2c, 0xc1, 0x4b, 0x18,
-  0x7c, 0x11, 0xf1, 0x99, 0x25, 0x78, 0x89, 0xe1, 0x88, 0xf0, 0x89, 0x2f,
-  0xe1, 0x9b, 0x65, 0x60, 0x89, 0x97, 0x08, 0x4c, 0x7c, 0xe4, 0x2b, 0x3e,
-  0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0x20, 0x92, 0x8f,
-  0x15, 0x41, 0x7c, 0x8a, 0xe8, 0x33, 0x1d, 0x6e, 0x08, 0xf6, 0x0c, 0x0c,
-  0x66, 0x19, 0x5a, 0xc2, 0x25, 0x02, 0xd3, 0xaf, 0x21, 0x3e, 0xb3, 0x04,
-  0x2f, 0x61, 0xc4, 0x7f, 0xc1, 0x67, 0x96, 0xe0, 0x25, 0x06, 0x5a, 0x0c,
-  0x2d, 0x25, 0x30, 0x95, 0x20, 0x5a, 0x42, 0x70, 0x09, 0xdc, 0x5b, 0x89,
-  0x0b, 0x86, 0xb9, 0xc0, 0xa8, 0xdb, 0x8c, 0xba, 0xf5, 0x1a, 0xe6, 0xf8,
-  0x69, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41,
-  0x30, 0xa8, 0x5e, 0x6d, 0xcf, 0x6e, 0x0c, 0xd5, 0x46, 0x13, 0x02, 0x60,
-  0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48,
-  0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x1e, 0x5b, 0x13, 0xb5, 0x84,
-  0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xb9, 0xb5, 0x51, 0x4b,
-  0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x1e, 0x5c, 0x23, 0xb5,
-  0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0xb3, 0xb5, 0x51,
-  0x0b, 0xb3, 0xe0, 0xd5, 0xe0, 0xac, 0xd5, 0x46, 0x13, 0x02, 0x60, 0x34,
-  0x41, 0x08, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xc0, 0xc3, 0xb5, 0x52,
-  0x13, 0x82, 0x0b, 0x8c, 0x9b, 0x25, 0x80, 0x89, 0x81, 0x16, 0xc3, 0x35,
-  0xf6, 0x41, 0x96, 0x03, 0x7d, 0x80, 0x89, 0x7e, 0x10, 0x5e, 0x42, 0x96,
-  0x03, 0x7f, 0x18, 0x31, 0x30, 0x00, 0x10, 0x04, 0x83, 0x63, 0xdc, 0xfe,
-  0x0c, 0xa6, 0x4c, 0x44, 0x44, 0x26, 0x3e, 0x26, 0x04, 0xf2, 0xb1, 0x80,
-  0x64, 0xe0, 0x63, 0x45, 0x49, 0xc4, 0xc7, 0x8a, 0x40, 0x3e, 0x16, 0x9c,
-  0x04, 0x7c, 0x46, 0x0c, 0x0c, 0x00, 0x04, 0xc1, 0xe0, 0x50, 0x37, 0x53,
-  0xb3, 0x29, 0x13, 0x8a, 0xf8, 0x58, 0x20, 0xc8, 0xc7, 0x82, 0x03, 0x3e,
-  0x17, 0x74, 0x38, 0x62, 0x60, 0x00, 0x20, 0x08, 0x06, 0x63, 0x50, 0x6e,
-  0xaa, 0x16, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x18, 0x38,
-  0x00, 0x08, 0x82, 0x01, 0x18, 0xa4, 0x1b, 0xab, 0xe1, 0x59, 0x9c, 0x91,
-  0x9b, 0x10, 0xc4, 0x5a, 0xac, 0x95, 0x9a, 0xb8, 0xcd, 0x12, 0x8c, 0xd0,
-  0x70, 0x83, 0x78, 0x99, 0x1b, 0x18, 0xcc, 0x32, 0xc8, 0xc4, 0x08, 0x05,
-  0x23, 0x06, 0x06, 0x00, 0x82, 0x60, 0x70, 0xcc, 0x1b, 0xac, 0x81, 0x95,
-  0x05, 0xa4, 0x06, 0x9f, 0x11, 0x03, 0x03, 0x00, 0x41, 0x30, 0x38, 0xea,
-  0x4d, 0xd6, 0xc2, 0xca, 0x02, 0x53, 0x83, 0xcf, 0x68, 0x02, 0x9e, 0x0d,
-  0xc3, 0x0d, 0x81, 0xba, 0x81, 0xc1, 0x2c, 0xc3, 0x4c, 0xd4, 0x44, 0x30,
-  0x1c, 0x51, 0x80, 0xda, 0xf0, 0x9d, 0x31, 0xcc, 0x70, 0x43, 0x60, 0x67,
-  0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0x07, 0xa9, 0x0d, 0x5f, 0x05, 0x82,
-  0x5e, 0x32, 0xcc, 0x70, 0x43, 0x90, 0x67, 0x64, 0x50, 0xc1, 0xa0, 0xb3,
-  0x0c, 0x34, 0x91, 0x16, 0xc1, 0xf5, 0xd8, 0x30, 0xe7, 0x56, 0xc3, 0x8c,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0xf5, 0x6f, 0xeb, 0x76, 0x6a, 0xf8,
-  0x36, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3,
-  0x09, 0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0,
-  0x98, 0x9c, 0xbc, 0x1d, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0xcf, 0xc9, 0xcd, 0x1b, 0x43, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0xf0, 0xa0, 0x1c, 0xbd, 0x49, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08,
-  0x06, 0x9b, 0xc9, 0xcd, 0x5b, 0xac, 0x05, 0xff, 0x06, 0x6e, 0xfd, 0x36,
-  0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x1e, 0xca, 0xd5, 0x9b, 0x10, 0x5c, 0x60, 0xdc, 0x2c, 0x41, 0x5a,
-  0x0c, 0x37, 0x6c, 0x27, 0x07, 0x06, 0xb3, 0x0c, 0x36, 0x71, 0x13, 0x41,
-  0xb9, 0xda, 0xbd, 0xc1, 0x05, 0x46, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82,
-  0xc1, 0x32, 0x73, 0xf8, 0x16, 0x06, 0xbc, 0x36, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0x0b, 0xcd, 0xe1, 0x5b, 0x20, 0x5c, 0x30, 0x4c, 0xc5, 0x1a,
-  0xbf, 0xc1, 0x05, 0x46, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x82,
-  0x73, 0xfd, 0x46, 0x06, 0xe1, 0x36, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x4b, 0xce, 0xf5, 0x5b, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0xdd, 0x61,
-  0xd4, 0xe9, 0xd9, 0x30, 0xb7, 0x5a, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3,
-  0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0xe5, 0x73, 0x2a, 0x67, 0x6e,
-  0x37, 0x37, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08,
-  0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0xf0, 0x94, 0x5d, 0xcc, 0x25, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x8f, 0xd9, 0xc9, 0x5c, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0xf0, 0x9c, 0xdd, 0xcc, 0x25, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20,
-  0x08, 0x06, 0x5b, 0xd9, 0xc9, 0x1c, 0xbc, 0x05, 0x3e, 0xf7, 0x6f, 0x3c,
-  0x37, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0xde, 0xd9, 0xd1, 0x9c, 0x10, 0x5c, 0x60, 0xdc, 0x2c, 0x41,
-  0x5a, 0x0c, 0x37, 0xe8, 0x41, 0xd9, 0x81, 0xc1, 0x2c, 0x03, 0x4e, 0xa4,
-  0x45, 0x60, 0xe4, 0x66, 0x6e, 0xf1, 0x19, 0x8e, 0xf8, 0x83, 0x73, 0x23,
-  0xbe, 0x59, 0x86, 0x9c, 0xe0, 0x89, 0xc0, 0xd0, 0x0d, 0x14, 0xe2, 0x63,
-  0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x0c, 0xf9, 0x58,
-  0x11, 0xc4, 0xa7, 0x88, 0xb7, 0xd3, 0xe1, 0x86, 0xa0, 0xed, 0xc0, 0x60,
-  0x96, 0x41, 0x27, 0x76, 0x22, 0xb0, 0x01, 0xde, 0xe0, 0x33, 0x4b, 0x00,
-  0x16, 0xf6, 0x6e, 0x44, 0x7c, 0x66, 0x09, 0xc0, 0x62, 0x38, 0x42, 0x15,
-  0xe0, 0x4d, 0xf8, 0x66, 0x19, 0x7a, 0x02, 0x2c, 0x02, 0x5b, 0x85, 0x78,
-  0x8b, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x88,
-  0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xf4, 0x4e, 0x87, 0x1b, 0x02, 0xbc,
-  0x03, 0x83, 0x59, 0x06, 0x9f, 0xf8, 0x89, 0xc0, 0xf2, 0x6d, 0x88, 0xcf,
-  0x2c, 0x01, 0x58, 0x18, 0xc1, 0x6f, 0xf0, 0x99, 0x25, 0x00, 0x8b, 0x81,
-  0x16, 0x43, 0xd3, 0x09, 0x6c, 0x27, 0x08, 0x9f, 0x10, 0x7e, 0x42, 0x2d,
-  0x78, 0xe2, 0x82, 0x61, 0x6c, 0xdf, 0xfe, 0x2d, 0x3e, 0xc3, 0x11, 0xb6,
-  0x00, 0x72, 0xc4, 0x37, 0xcb, 0x10, 0x16, 0x64, 0x11, 0x58, 0xc8, 0xdd,
-  0x42, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65, 0x81,
-  0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x81, 0x7a, 0x3a, 0xdc, 0x10, 0x98,
-  0x1e, 0x18, 0xcc, 0x32, 0x88, 0xc5, 0x58, 0x04, 0x36, 0xa4, 0x1c, 0x7c,
-  0x66, 0x09, 0xd0, 0xc2, 0x4c, 0x8e, 0x88, 0xcf, 0x2c, 0x01, 0x5a, 0x0c,
-  0x47, 0x84, 0xc3, 0xc9, 0x09, 0xdf, 0x2c, 0x43, 0x59, 0xa0, 0x45, 0x60,
-  0xe2, 0x80, 0x72, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0x60,
-  0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4, 0xec, 0xe9, 0x70,
-  0x43, 0x10, 0x7b, 0x60, 0x30, 0xcb, 0x60, 0x16, 0x67, 0x11, 0x18, 0xcc,
-  0x0d, 0xf1, 0x99, 0x25, 0x40, 0x0b, 0x23, 0x6a, 0x0e, 0x3e, 0xb3, 0x04,
-  0x68, 0x31, 0xd0, 0x62, 0x68, 0x62, 0x81, 0x8d, 0x05, 0x61, 0x16, 0xc2,
-  0x59, 0xe0, 0x06, 0x59, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0xdd, 0x66, 0xd4,
-  0x85, 0xdc, 0x30, 0x27, 0x63, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0x55, 0x7e, 0xb1, 0xd7, 0x76, 0xbe,
-  0x37, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3,
-  0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0,
-  0xb0, 0x1f, 0xee, 0x25, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x4f, 0xfb, 0xe5, 0x5e, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0xf0, 0xb8, 0x9f, 0xee, 0x25, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08,
-  0x06, 0x1b, 0xfb, 0xe5, 0xde, 0xdd, 0x05, 0xe5, 0x67, 0x7a, 0xe3, 0x37,
-  0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x9e, 0xfb, 0xed, 0x9e, 0x10, 0x5c, 0x60, 0xdc, 0x2c, 0x41, 0x5a,
-  0x0c, 0xb4, 0x18, 0xae, 0x41, 0x13, 0xfe, 0x1e, 0xcc, 0x04, 0x4c, 0xd8,
-  0x84, 0x80, 0x16, 0xfe, 0x1e, 0xdc, 0xc4, 0x2c, 0x83, 0x5a, 0xb0, 0x45,
-  0x48, 0x0c, 0x47, 0x98, 0x04, 0xe8, 0x0d, 0xdf, 0x9d, 0xc4, 0x30, 0xc3,
-  0x0d, 0x81, 0xdd, 0x91, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0x9c, 0x04, 0xe9,
-  0x0d, 0x5f, 0x05, 0x82, 0x5e, 0x4a, 0x0c, 0x33, 0xdc, 0x10, 0xe4, 0x1d,
-  0x19, 0x54, 0x30, 0xe8, 0x2c, 0xc3, 0x5a, 0x80, 0x46, 0x70, 0x3d, 0x37,
-  0xcc, 0xb9, 0xd9, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x50, 0xfd,
-  0xdf, 0xfa, 0x9d, 0x1e, 0xfe, 0x8d, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10,
-  0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x3c, 0x26, 0x18, 0xc8, 0xdf, 0x41, 0x04, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0x9c, 0x60, 0x30, 0x7f, 0x0c, 0x11,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x83, 0x82, 0x01, 0xfd, 0x49,
-  0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x9b, 0x09, 0x06, 0xf3,
-  0x17, 0x7b, 0xc1, 0xff, 0x81, 0x5f, 0xff, 0x8d, 0x26, 0x04, 0xc0, 0x68,
-  0x82, 0x10, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x87, 0x82, 0x41,
-  0xfd, 0x09, 0xc1, 0x05, 0xc6, 0xcd, 0x12, 0x80, 0xc6, 0x70, 0xc3, 0x4e,
-  0x9c, 0x60, 0x00, 0x06, 0xb3, 0x0c, 0x6d, 0xe1, 0x16, 0x41, 0xb9, 0xde,
-  0xfd, 0xc1, 0x05, 0x46, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x32,
-  0x83, 0x01, 0xfe, 0x89, 0x05, 0xef, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82,
-  0xc1, 0x42, 0x83, 0x01, 0xfe, 0x05, 0xc2, 0x05, 0xc3, 0x54, 0xec, 0xf1,
-  0x1f, 0x5c, 0x60, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c, 0x38,
-  0x18, 0xf4, 0x1f, 0x59, 0x84, 0xdf, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
-  0x2c, 0x39, 0x18, 0xf4, 0x5f, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0xdd,
-  0x61, 0xd4, 0xe9, 0xdd, 0x30, 0xb7, 0x6a, 0xc3, 0x1c, 0x31, 0xcc, 0x11,
-  0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0xe5, 0x83, 0x81, 0x0a,
-  0x06, 0xe6, 0x77, 0x83, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1,
-  0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0xc1, 0x53, 0x86, 0x41, 0x0c, 0x06, 0x09, 0x11, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x63, 0x86, 0x81, 0x0c, 0x06, 0x09,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x73, 0x86, 0xc1, 0x0c,
-  0x06, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x56, 0x86,
-  0x81, 0x0c, 0x06, 0xf0, 0x17, 0xf8, 0x60, 0xf0, 0x7f, 0x3c, 0x18, 0x8c,
-  0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82,
-  0x81, 0x77, 0x86, 0x01, 0x0d, 0x06, 0x42, 0x70, 0x81, 0x71, 0xb3, 0x04,
-  0xa0, 0x31, 0xdc, 0xa0, 0x17, 0x65, 0x18, 0x80, 0xc1, 0x2c, 0xc3, 0x5b,
-  0x80, 0x46, 0x60, 0xe4, 0x67, 0x7e, 0xf1, 0x19, 0x8e, 0x00, 0x8d, 0xf3,
-  0x23, 0xbe, 0x59, 0x06, 0xb8, 0x98, 0x8b, 0xc0, 0xd0, 0x2f, 0x34, 0xe2,
-  0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x0c, 0xf9,
-  0x58, 0x11, 0xc4, 0xa7, 0x88, 0x37, 0x0c, 0x74, 0xb8, 0x21, 0x68, 0xc3,
-  0x00, 0x0c, 0x66, 0x19, 0xe2, 0x42, 0x2e, 0x02, 0x1b, 0xe0, 0x0f, 0x3e,
-  0xb3, 0x04, 0x77, 0x61, 0xef, 0x47, 0xc4, 0x67, 0x96, 0xe0, 0x2e, 0x86,
-  0x23, 0x56, 0x03, 0xfe, 0x84, 0x6f, 0x96, 0x81, 0x2e, 0xee, 0x22, 0x30,
-  0xd6, 0x88, 0xbf, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30,
-  0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x42, 0x0f, 0x03, 0x1d,
-  0x6e, 0x08, 0xf0, 0x30, 0x00, 0x83, 0x59, 0x86, 0xba, 0xb0, 0x8b, 0xc0,
-  0xf2, 0x6f, 0x88, 0xcf, 0x2c, 0xc1, 0x5d, 0x18, 0xc1, 0x7f, 0xf0, 0x99,
-  0x25, 0xb8, 0x8b, 0x81, 0x16, 0x43, 0x8b, 0x0b, 0x4c, 0x2e, 0x88, 0xba,
-  0x10, 0xec, 0x82, 0x6d, 0xe6, 0xe2, 0x82, 0x61, 0x6c, 0xff, 0xfe, 0x2f,
-  0x3e, 0xc3, 0x11, 0xb6, 0x01, 0x82, 0x01, 0xf1, 0xcd, 0x32, 0xe0, 0xc5,
-  0x5e, 0x04, 0x16, 0x82, 0xc1, 0x6d, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17,
-  0x0c, 0x73, 0x81, 0x51, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11,
-  0xa8, 0x18, 0xe8, 0x70, 0x43, 0x60, 0x8a, 0x01, 0x18, 0xcc, 0x32, 0xe4,
-  0x85, 0x5e, 0x04, 0x36, 0xa4, 0x60, 0x00, 0x9f, 0x59, 0x82, 0xbf, 0x30,
-  0x13, 0x0c, 0x88, 0xf8, 0xcc, 0x12, 0xfc, 0xc5, 0x70, 0x44, 0x78, 0x9c,
-  0x60, 0x20, 0x7c, 0xb3, 0x0c, 0x7c, 0xf1, 0x17, 0x81, 0x89, 0x07, 0x0a,
-  0x06, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05,
-  0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4, 0x2c, 0x06, 0x3a, 0xdc, 0x10,
-  0xc4, 0x62, 0x00, 0x06, 0xb3, 0x0c, 0x7d, 0xe1, 0x17, 0x81, 0xc1, 0x60,
-  0x30, 0xc4, 0x67, 0x96, 0xe0, 0x2f, 0x8c, 0xa8, 0xc1, 0x00, 0x3e, 0xb3,
-  0x04, 0x7f, 0x31, 0xd0, 0x62, 0x68, 0x79, 0x81, 0xe9, 0x05, 0xd1, 0x17,
-  0x82, 0x5f, 0xe0, 0xce, 0x5e, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0xdd, 0x66,
-  0xd4, 0x85, 0x60, 0x30, 0xcc, 0xc9, 0xdc, 0x30, 0x47, 0x0c, 0x73, 0xc4,
-  0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x50, 0x95, 0x63, 0x10, 0x8b,
-  0x41, 0x1b, 0x06, 0xbe, 0x18, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10,
-  0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x3c, 0xec, 0x18, 0xe0, 0x62, 0x90, 0x10, 0xc1,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x3c, 0xed, 0x18, 0xe4, 0x62, 0x90,
-  0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x3c, 0xee, 0x18, 0xe8,
-  0x62, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xec,
-  0x18, 0xe4, 0x62, 0x70, 0x87, 0x41, 0x50, 0x8e, 0x81, 0x29, 0x06, 0xe3,
-  0x18, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x18, 0x1c, 0x00,
-  0x08, 0x82, 0x81, 0xe7, 0x8e, 0xc1, 0x2e, 0x06, 0x42, 0x70, 0x81, 0x71,
-  0xb3, 0x04, 0xa0, 0x31, 0xd0, 0x62, 0xb8, 0xc6, 0x5a, 0xf8, 0xa3, 0xa0,
-  0x16, 0x30, 0xd1, 0x16, 0xc2, 0x5f, 0xf8, 0xa3, 0xe0, 0x16, 0x66, 0x22,
-  0xa1, 0x18, 0xc0, 0x67, 0x96, 0x21, 0x34, 0x46, 0x43, 0x44, 0x86, 0x23,
-  0x82, 0x50, 0x0c, 0x86, 0xef, 0x84, 0x61, 0x86, 0x1b, 0x82, 0x3b, 0x0c,
-  0xc8, 0xa0, 0x86, 0x40, 0x87, 0x23, 0x50, 0xa4, 0x14, 0x83, 0xe1, 0xab,
-  0x40, 0xd0, 0x53, 0x91, 0x61, 0x86, 0x1b, 0x02, 0x3d, 0x0c, 0xc8, 0xa0,
-  0x82, 0x41, 0x67, 0x19, 0x44, 0xe3, 0x36, 0x82, 0xf3, 0xc1, 0x60, 0x98,
-  0x7b, 0xbb, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xa0, 0x02, 0xc9,
-  0x80, 0x1d, 0x03, 0x54, 0x0c, 0xf2, 0x31, 0x18, 0x4d, 0x08, 0x80, 0xd1,
-  0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90,
-  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0x4e, 0x32, 0x98, 0xc7, 0xe0,
-  0x20, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0x50, 0x32, 0xa0,
-  0xc7, 0x80, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0x52,
-  0x32, 0xa8, 0xc7, 0x40, 0x22, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30,
-  0xd8, 0x4e, 0x32, 0xa0, 0xc7, 0x40, 0x16, 0x83, 0x00, 0x24, 0x83, 0x70,
-  0x0c, 0xfc, 0x31, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x31,
-  0x38, 0x00, 0x10, 0x04, 0x03, 0x2f, 0x25, 0x03, 0x7b, 0x0c, 0x84, 0xe0,
-  0x02, 0xe3, 0x66, 0x09, 0x6e, 0x63, 0xb8, 0x81, 0x47, 0x50, 0x32, 0x00,
-  0x83, 0x59, 0x06, 0xd2, 0x28, 0x8d, 0xa0, 0x5e, 0x31, 0xc0, 0xc7, 0x00,
-  0x2e, 0x30, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x16, 0x9a, 0x0c,
-  0xf2, 0x31, 0xe8, 0x7a, 0x31, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83,
-  0xa5, 0x26, 0x83, 0x7c, 0x0c, 0x02, 0xe1, 0x82, 0x61, 0x4a, 0x16, 0x83,
-  0x7e, 0x0c, 0xe0, 0x02, 0xa3, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60,
-  0xc9, 0xc9, 0xc0, 0x1f, 0x83, 0x32, 0x11, 0xc7, 0x60, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0x16, 0x9d, 0x0c, 0xfc, 0x31, 0x08, 0x84, 0x0b, 0x86,
-  0xb9, 0xc0, 0xa8, 0x3b, 0x8c, 0xba, 0x3d, 0x0c, 0x86, 0x39, 0xd6, 0x1b,
-  0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0xaa, 0x9f, 0x0c, 0x56, 0x32, 0x38, 0xc7, 0x00, 0x27, 0x83, 0xd1, 0x84,
-  0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86,
-  0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xc7, 0x2c, 0x03,
-  0x99, 0x0c, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xe7,
-  0x2c, 0x83, 0x99, 0x0c, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x83, 0x07, 0x2d, 0x03, 0x9a, 0x0c, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00,
-  0x10, 0x04, 0x83, 0xcd, 0x2c, 0x83, 0x99, 0x0c, 0xe2, 0x31, 0x08, 0x7e,
-  0x32, 0x00, 0xc9, 0xa0, 0x27, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10,
-  0x82, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf0, 0xd0, 0x32, 0xa8, 0xc9,
-  0x40, 0x08, 0x2e, 0x30, 0x6e, 0x96, 0xe0, 0x36, 0x86, 0x1b, 0xf6, 0xc4,
-  0x2c, 0x03, 0x30, 0x98, 0x65, 0x30, 0x8d, 0xdb, 0x08, 0xac, 0x1c, 0x83,
-  0x73, 0x0c, 0xe2, 0x33, 0x1c, 0xb1, 0x07, 0xe8, 0x18, 0x10, 0xdf, 0x2c,
-  0xc3, 0x69, 0xa8, 0x46, 0x60, 0xe9, 0x18, 0xf0, 0x41, 0x7c, 0x2c, 0x18,
-  0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82,
-  0xf8, 0x14, 0x01, 0x97, 0x81, 0x0e, 0x37, 0x04, 0x6e, 0x19, 0x80, 0xc1,
-  0x2c, 0x03, 0x6a, 0xa4, 0x46, 0x60, 0x43, 0x3c, 0x06, 0xf0, 0x99, 0x25,
-  0x70, 0x0d, 0x83, 0xc7, 0x80, 0x88, 0xcf, 0x2c, 0x81, 0x6b, 0x0c, 0x47,
-  0x98, 0x42, 0x3c, 0x06, 0xc2, 0x37, 0xcb, 0xb0, 0x1a, 0xae, 0x11, 0xd8,
-  0x29, 0xc8, 0x63, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05,
-  0x46, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xec, 0x65, 0xa0,
-  0xc3, 0x0d, 0x41, 0x5e, 0x06, 0x60, 0x30, 0xcb, 0xc0, 0x1a, 0xad, 0x11,
-  0x98, 0x3e, 0x06, 0x43, 0x7c, 0x66, 0x09, 0x5c, 0xc3, 0x88, 0x7e, 0x0c,
-  0xe0, 0x33, 0x4b, 0xe0, 0x1a, 0x03, 0x2d, 0x86, 0x86, 0x1a, 0x58, 0x6a,
-  0x10, 0xac, 0x21, 0xb4, 0x86, 0x58, 0xa8, 0xc6, 0x05, 0xc3, 0x18, 0x3f,
-  0x06, 0x20, 0x19, 0xc4, 0x67, 0x38, 0xe2, 0x56, 0x42, 0x32, 0x20, 0xbe,
-  0x59, 0x86, 0xd7, 0x90, 0x8d, 0xc0, 0x44, 0x32, 0xc0, 0x95, 0xf8, 0x58,
-  0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x02, 0x43, 0x3e, 0x56,
-  0x04, 0xf1, 0x29, 0x22, 0x35, 0x03, 0x1d, 0x6e, 0x08, 0x4e, 0x33, 0x00,
-  0x83, 0x59, 0x06, 0xd8, 0x88, 0x8d, 0xc0, 0x06, 0x95, 0x0c, 0xe0, 0x33,
-  0x4b, 0x60, 0x1b, 0x76, 0x92, 0x01, 0x11, 0x9f, 0x59, 0x02, 0xdb, 0x18,
-  0x8e, 0x10, 0x17, 0x94, 0x0c, 0x84, 0x6f, 0x96, 0x61, 0x36, 0x6c, 0x23,
-  0xb0, 0x71, 0x49, 0xc9, 0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98,
-  0x0b, 0x8c, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xa0, 0xcd,
-  0x40, 0x87, 0x1b, 0x02, 0xd9, 0x0c, 0xc0, 0x60, 0x96, 0x81, 0x36, 0x6a,
-  0x23, 0xb0, 0x98, 0x0c, 0x86, 0xf8, 0xcc, 0x12, 0xd8, 0x86, 0x11, 0x36,
-  0x19, 0xc0, 0x67, 0x96, 0xc0, 0x36, 0x06, 0x5a, 0x0c, 0x0d, 0x36, 0xb0,
-  0xd8, 0x20, 0x68, 0x43, 0xa8, 0x0d, 0xdd, 0x92, 0x8d, 0x0b, 0x86, 0xb9,
-  0xc0, 0xa8, 0xdb, 0x8c, 0x3a, 0x91, 0x0c, 0x86, 0xb9, 0x19, 0x0c, 0x86,
-  0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83,
-  0xca, 0x3c, 0x03, 0xd9, 0x0c, 0xdc, 0x32, 0xf8, 0xcd, 0x60, 0x34, 0x21,
-  0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1,
-  0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x69, 0xcf, 0x20,
-  0x37, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x71,
-  0xcf, 0x40, 0x37, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
-  0xe0, 0x79, 0xcf, 0x60, 0x37, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00,
-  0x04, 0xc1, 0x60, 0x6b, 0xcf, 0x40, 0x37, 0x03, 0xbc, 0x0c, 0x02, 0xf3,
-  0x0c, 0x4e, 0x33, 0x20, 0xcf, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84,
-  0x60, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc, 0xf7, 0x0c, 0x78, 0x33,
-  0x10, 0x82, 0x0b, 0x8c, 0x9b, 0x25, 0xb8, 0x8d, 0x81, 0x16, 0xc3, 0x35,
-  0x44, 0x03, 0x64, 0x85, 0xd0, 0x80, 0x09, 0xd2, 0x10, 0x6c, 0x03, 0x64,
-  0x85, 0xd2, 0xb0, 0x93, 0x21, 0xcd, 0x00, 0x3e, 0xb3, 0x0c, 0xb8, 0xa1,
-  0x1b, 0x23, 0x33, 0x1c, 0x11, 0x88, 0x66, 0x30, 0x7c, 0x27, 0x0c, 0x33,
-  0xdc, 0x10, 0xe0, 0x65, 0x40, 0x06, 0x35, 0x04, 0x3a, 0x1c, 0x91, 0x32,
-  0xa6, 0x19, 0x0c, 0x5f, 0x05, 0x82, 0xde, 0xca, 0x0c, 0x33, 0xdc, 0x10,
-  0xec, 0x65, 0x40, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0x90, 0x1b, 0xee, 0x11,
-  0xdc, 0x4f, 0x06, 0xc3, 0x1c, 0x1c, 0x06, 0xc3, 0x8c, 0x18, 0x1c, 0x00,
-  0x08, 0x82, 0x41, 0x15, 0xa2, 0x41, 0x7b, 0x06, 0xa9, 0x19, 0xe8, 0x67,
-  0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3,
-  0x09, 0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0,
-  0xa0, 0x68, 0x40, 0x9f, 0xc1, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0xf0, 0xa4, 0x68, 0x50, 0x9f, 0x01, 0x43, 0x04, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0xf0, 0xa8, 0x68, 0x60, 0x9f, 0x81, 0x44, 0x04, 0x23,
-  0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xa1, 0x68, 0x50, 0x9f, 0xc1, 0x6c,
-  0x06, 0x41, 0x88, 0x06, 0xe2, 0x19, 0xfc, 0x67, 0x30, 0x9a, 0x10, 0x00,
-  0xa3, 0x09, 0x42, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x9e, 0x8a,
-  0x06, 0xf7, 0x19, 0x08, 0xc1, 0x05, 0xc6, 0xcd, 0x12, 0xb8, 0xc7, 0x70,
-  0x43, 0xcf, 0xa4, 0x68, 0x00, 0x06, 0xb3, 0x0c, 0xbb, 0xc1, 0x1b, 0x41,
-  0xc1, 0x66, 0x90, 0x9f, 0x01, 0x5c, 0x60, 0xd4, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x2c, 0x35, 0x1a, 0xe8, 0x67, 0xd0, 0xf9, 0x66, 0x30, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x8b, 0x8d, 0x06, 0xfa, 0x19, 0x04, 0xc2,
-  0x05, 0xc3, 0xd4, 0x6c, 0x06, 0xfe, 0x19, 0xc0, 0x05, 0x46, 0x8d, 0x18,
-  0x1c, 0x00, 0x08, 0x82, 0xc1, 0xa2, 0xa3, 0xc1, 0x7f, 0x06, 0x66, 0x33,
-  0x9e, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c, 0x3b, 0x1a, 0xfc,
-  0x67, 0x10, 0x08, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x77, 0x18, 0x75, 0x7c,
-  0x19, 0x0c, 0x73, 0xad, 0x18, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x15, 0x98, 0x06, 0x2c, 0x1a, 0xa0,
-  0x67, 0x90, 0xa3, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68,
-  0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0xc1, 0x73, 0xa6, 0xc1, 0x8c, 0x06, 0x09, 0x11, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0xc1, 0x83, 0xa6, 0x01, 0x8d, 0x06, 0x09, 0x11,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x93, 0xa6, 0x41, 0x8d, 0x06,
-  0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x76, 0xa6, 0x01,
-  0x8d, 0x06, 0xf2, 0x19, 0x04, 0x60, 0x1a, 0x84, 0x68, 0xe0, 0xa3, 0xc1,
-  0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20,
-  0x18, 0x78, 0x69, 0x1a, 0xd8, 0x68, 0x20, 0x04, 0x17, 0x18, 0x37, 0x4b,
-  0xe0, 0x1e, 0xc3, 0x0d, 0x7c, 0x73, 0xa6, 0x01, 0x18, 0xcc, 0x32, 0xf4,
-  0x86, 0x7b, 0x04, 0x66, 0x9e, 0x01, 0x7a, 0x06, 0xf1, 0x19, 0x8e, 0xd8,
-  0x83, 0xf4, 0x0c, 0x88, 0x6f, 0x96, 0xc1, 0x37, 0xc2, 0x23, 0x30, 0xf5,
-  0x0c, 0xf8, 0x20, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c,
-  0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x88, 0xd3, 0x40, 0x87,
-  0x1b, 0x82, 0x37, 0x0d, 0xc0, 0x60, 0x96, 0xe1, 0x37, 0xc0, 0x23, 0xb0,
-  0x41, 0x3e, 0x03, 0xf8, 0xcc, 0x12, 0x94, 0x87, 0xc5, 0x67, 0x40, 0xc4,
-  0x67, 0x96, 0xa0, 0x3c, 0x86, 0x23, 0x4c, 0x41, 0x3e, 0x03, 0xe1, 0x9b,
-  0x65, 0x10, 0x8f, 0xf2, 0x08, 0xec, 0x14, 0xe6, 0x33, 0x88, 0x8f, 0x05,
-  0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x88, 0xe4, 0x63, 0x45,
-  0x10, 0x9f, 0x22, 0xf8, 0x34, 0xd0, 0xe1, 0x86, 0x40, 0x4f, 0x03, 0x30,
-  0x98, 0x65, 0x18, 0x0f, 0xf2, 0x08, 0x6c, 0x3f, 0x83, 0x21, 0x3e, 0xb3,
-  0x04, 0xe5, 0x61, 0x84, 0x7f, 0x06, 0xf0, 0x99, 0x25, 0x28, 0x8f, 0x81,
-  0x16, 0x43, 0xfb, 0x0d, 0x0c, 0x3c, 0x88, 0xf1, 0x10, 0xc8, 0x43, 0x2c,
-  0xc2, 0xe3, 0x82, 0x61, 0xac, 0x3f, 0x83, 0x10, 0x0d, 0xe2, 0x33, 0x1c,
-  0x81, 0x3b, 0x22, 0x1a, 0x10, 0xdf, 0x2c, 0x83, 0x79, 0xa4, 0x47, 0x60,
-  0x23, 0x1a, 0xe4, 0x4e, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17,
-  0x18, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xa1, 0xaa, 0x81,
-  0x0e, 0x37, 0x04, 0xa8, 0x1a, 0x80, 0xc1, 0x2c, 0xc3, 0x79, 0xa0, 0x47,
-  0x60, 0xc3, 0x8a, 0x06, 0xf0, 0x99, 0x25, 0x68, 0x0f, 0x43, 0xd1, 0x80,
-  0x88, 0xcf, 0x2c, 0x41, 0x7b, 0x0c, 0x47, 0x8c, 0x4f, 0x8a, 0x06, 0xc2,
-  0x37, 0xcb, 0xa0, 0x1e, 0xed, 0x11, 0x18, 0xf9, 0xa8, 0x68, 0x10, 0x1f,
-  0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0x59, 0x10, 0xc9, 0xc7,
-  0x8a, 0x20, 0x3e, 0x45, 0xd4, 0x6a, 0xa0, 0xc3, 0x0d, 0xc1, 0xac, 0x06,
-  0x60, 0x30, 0xcb, 0xb0, 0x1e, 0xec, 0x11, 0x98, 0x8c, 0x06, 0x43, 0x7c,
-  0x66, 0x09, 0xda, 0xc3, 0x88, 0x1b, 0x0d, 0xe0, 0x33, 0x4b, 0xd0, 0x1e,
-  0x03, 0x2d, 0x86, 0x76, 0x1e, 0x18, 0x7a, 0x10, 0xeb, 0x21, 0xb0, 0x07,
-  0xef, 0xa5, 0xc7, 0x05, 0xc3, 0x5c, 0x60, 0xd4, 0x6d, 0x46, 0xdd, 0x88,
-  0x06, 0xc3, 0x1c, 0x4d, 0x06, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0x75, 0xae, 0xc1, 0xac, 0x06, 0x6f,
-  0x1a, 0x80, 0x6b, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a,
-  0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0xf0, 0xb8, 0x6b, 0xa0, 0xab, 0x41, 0x42, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0xf0, 0xbc, 0x6b, 0xb0, 0xab, 0x41, 0x42, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0xc0, 0x6b, 0xc0, 0xab, 0x41,
-  0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xb9, 0x6b, 0xb0,
-  0xab, 0x41, 0x9e, 0x06, 0xc1, 0xb9, 0x06, 0xa8, 0x1a, 0x94, 0x6b, 0x30,
-  0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x1e, 0xbc, 0x06, 0xbd, 0x1a, 0x08, 0xc1, 0x05, 0xc6, 0xcd, 0x12,
-  0xb8, 0xc7, 0x40, 0x8b, 0xe1, 0x1a, 0xb9, 0x21, 0xd2, 0x02, 0x6e, 0xc0,
-  0xc4, 0x6e, 0x08, 0xed, 0x21, 0xd2, 0x02, 0x6f, 0xcc, 0x32, 0xbc, 0x47,
-  0x7c, 0x8c, 0xd0, 0x70, 0xc4, 0x09, 0x89, 0x6a, 0x30, 0x7c, 0x87, 0x42,
-  0xc3, 0x0c, 0x37, 0x04, 0x78, 0x1a, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47,
-  0xa8, 0x90, 0xa9, 0x06, 0xc3, 0x57, 0x81, 0xa0, 0xc7, 0x42, 0xc3, 0x0c,
-  0x37, 0x04, 0x7b, 0x1a, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xc0, 0x47,
-  0x89, 0x04, 0xf7, 0xa3, 0xc1, 0x30, 0x07, 0x97, 0xc1, 0x30, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x50, 0x85, 0x6c, 0xd0, 0xae, 0x41, 0xaa, 0x06,
-  0xfa, 0x1a, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c,
-  0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x3c, 0x28, 0x1b, 0xd0, 0x6b, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x3c, 0x29, 0x1b, 0xd4, 0x6b, 0xc0, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x3c, 0x2a, 0x1b, 0xd8, 0x6b, 0x20, 0x11,
-  0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x28, 0x1b, 0xd4, 0x6b,
-  0x30, 0xab, 0x41, 0x10, 0xb2, 0x81, 0xb8, 0x06, 0xff, 0x1a, 0x8c, 0x26,
-  0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81,
-  0xa7, 0xb2, 0xc1, 0xbd, 0x06, 0x42, 0x70, 0x81, 0x71, 0xb3, 0x04, 0x25,
-  0x32, 0xdc, 0xd0, 0x43, 0x29, 0x1b, 0x80, 0xc1, 0x2c, 0x83, 0x7c, 0xcc,
-  0x47, 0x50, 0xb0, 0x1a, 0xe4, 0x6b, 0x00, 0x17, 0x18, 0x35, 0x62, 0x70,
-  0x00, 0x20, 0x08, 0x06, 0x4b, 0xcd, 0x06, 0xfa, 0x1a, 0x8c, 0x91, 0xaf,
-  0x06, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xd8, 0x6c, 0xa0, 0xaf,
-  0x41, 0x20, 0x5c, 0x30, 0x4c, 0xcd, 0x6a, 0xe0, 0xaf, 0x01, 0x5c, 0x60,
-  0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c, 0x3a, 0x1b, 0xfc, 0x6b,
-  0x70, 0x46, 0xe3, 0x1a, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0xb2,
-  0xb3, 0xc1, 0xbf, 0x06, 0x81, 0x70, 0xc1, 0x30, 0x17, 0x18, 0x75, 0x87,
-  0x51, 0xc7, 0xa7, 0xc1, 0x30, 0xd7, 0x9a, 0xc1, 0x30, 0x47, 0x0c, 0x73,
-  0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x50, 0x81, 0x6d, 0xc0,
-  0xb2, 0x01, 0xba, 0x06, 0x39, 0x1b, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82,
-  0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x3c, 0x67, 0x1b, 0xcc, 0x6c, 0x90, 0x10,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x3c, 0x68, 0x1b, 0xd0, 0x6c,
-  0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x3c, 0x69, 0x1b,
-  0xd4, 0x6c, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x6c,
-  0x67, 0x1b, 0xd0, 0x6c, 0x20, 0xaf, 0x41, 0x00, 0xb6, 0x41, 0xc8, 0x06,
-  0x3e, 0x1b, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0x81, 0x97, 0xb6, 0x81, 0xcd, 0x06, 0x42, 0x70, 0x81,
-  0x71, 0xb3, 0x04, 0x25, 0x32, 0xdc, 0xc0, 0x47, 0x67, 0x1b, 0x80, 0xc1,
-  0x2c, 0x03, 0x7d, 0x94, 0x48, 0x60, 0xe6, 0x1a, 0xa0, 0x6b, 0x10, 0x9f,
-  0xe1, 0x88, 0x50, 0x4a, 0xd7, 0x80, 0xf8, 0x66, 0x19, 0xea, 0x03, 0x3f,
-  0x02, 0x53, 0xd7, 0x40, 0x94, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86,
-  0xb9, 0xc0, 0x28, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0xb8,
-  0x0d, 0x74, 0xb8, 0x21, 0x78, 0xdb, 0x00, 0x0c, 0x66, 0x19, 0xec, 0xe3,
-  0x3e, 0x02, 0x1b, 0xe4, 0x35, 0x80, 0xcf, 0x2c, 0x01, 0x7f, 0x58, 0xbc,
-  0x06, 0x44, 0x7c, 0x66, 0x09, 0xf8, 0x63, 0x38, 0x82, 0x95, 0xe4, 0x35,
-  0x10, 0xbe, 0x59, 0x86, 0xfc, 0xe0, 0x8f, 0xc0, 0x5a, 0x69, 0x5e, 0x83,
-  0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x82, 0x48,
-  0x3e, 0x56, 0x04, 0xf1, 0x29, 0x82, 0x6f, 0x03, 0x1d, 0x6e, 0x08, 0xf4,
-  0x36, 0x00, 0x83, 0x59, 0x06, 0xfd, 0xd8, 0x8f, 0xc0, 0xf6, 0x35, 0x18,
-  0xe2, 0x33, 0x4b, 0xc0, 0x1f, 0x46, 0xf8, 0x6b, 0x00, 0x9f, 0x59, 0x02,
-  0xfe, 0x18, 0x68, 0x31, 0x34, 0xfb, 0xc0, 0xee, 0x83, 0xd0, 0x0f, 0x61,
-  0x3f, 0xdc, 0x32, 0xc0, 0x8f, 0x0b, 0x86, 0xb1, 0x7e, 0x0d, 0x42, 0x36,
-  0x88, 0xcf, 0x70, 0x44, 0x2e, 0x89, 0x6c, 0x40, 0x7c, 0xb3, 0x0c, 0xfd,
-  0x01, 0x22, 0x81, 0x8d, 0x6c, 0xa0, 0x4b, 0xf1, 0xb1, 0x60, 0xa0, 0xcf,
-  0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53,
-  0x84, 0xea, 0x06, 0x3a, 0xdc, 0x10, 0xa0, 0x6e, 0x00, 0x06, 0xb3, 0x0c,
-  0xfe, 0xf1, 0x1f, 0x81, 0x0d, 0x2b, 0x1b, 0xc0, 0x67, 0x96, 0x80, 0x44,
-  0x0c, 0x65, 0x03, 0x22, 0x3e, 0xb3, 0x04, 0x24, 0x32, 0x1c, 0x41, 0x4e,
-  0x29, 0x1b, 0x08, 0xdf, 0x2c, 0x43, 0x88, 0x90, 0x48, 0x60, 0xe5, 0xa4,
-  0xb2, 0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65,
-  0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x51, 0xbb, 0x81, 0x0e, 0x37,
-  0x04, 0xb3, 0x1b, 0x80, 0xc1, 0x2c, 0x83, 0x88, 0x8c, 0x48, 0x60, 0x32,
-  0x1b, 0x0c, 0xf1, 0x99, 0x25, 0x20, 0x11, 0x23, 0x6e, 0x36, 0x80, 0xcf,
-  0x2c, 0x01, 0x89, 0x0c, 0xb4, 0x18, 0x9a, 0x7f, 0x60, 0xff, 0x41, 0x88,
-  0x88, 0x30, 0x22, 0xbe, 0x19, 0x80, 0xc8, 0x05, 0xc3, 0x5c, 0x60, 0xd4,
-  0x6d, 0x46, 0xdd, 0xc8, 0x06, 0xc3, 0x1c, 0x8d, 0x06, 0xc3, 0x1c, 0x31,
-  0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0x75, 0xbe,
-  0xc1, 0xec, 0x06, 0x6f, 0x1b, 0x80, 0x6f, 0x30, 0x9a, 0x10, 0x00, 0xa3,
-  0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0xb8, 0x6f, 0xa0, 0xbb, 0x41,
-  0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0xbc, 0x6f, 0xb0,
-  0xbb, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0xc0,
-  0x6f, 0xc0, 0xbb, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60,
-  0xb0, 0xb9, 0x6f, 0xb0, 0xbb, 0x41, 0xde, 0x06, 0xc1, 0xf9, 0x06, 0xa8,
-  0x1b, 0x94, 0x6f, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x1e, 0xfc, 0x06, 0xbd, 0x1b, 0x08, 0xc1,
-  0x05, 0xc6, 0xcd, 0x12, 0x94, 0xc8, 0x40, 0x8b, 0xe1, 0x1a, 0xf0, 0x21,
-  0xf2, 0xc2, 0x7b, 0xc0, 0x84, 0x7c, 0x08, 0x24, 0x22, 0xf2, 0xc2, 0x7c,
-  0xcc, 0x32, 0x98, 0x08, 0x8a, 0x8c, 0xd4, 0x70, 0x04, 0x4a, 0x89, 0x6e,
-  0x30, 0x7c, 0x97, 0x52, 0xc3, 0x0c, 0x37, 0x04, 0x78, 0x1b, 0x90, 0x41,
-  0x0d, 0x81, 0x0e, 0x47, 0xa8, 0x94, 0xe9, 0x06, 0xc3, 0x57, 0x81, 0xa0,
-  0xc7, 0x52, 0xc3, 0x0c, 0x37, 0x04, 0x7b, 0x1b, 0x90, 0x41, 0x05, 0x83,
-  0xce, 0x32, 0x9c, 0x08, 0x8f, 0x04, 0xf7, 0xb3, 0xc1, 0x30, 0x07, 0xa7,
-  0xc1, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x50, 0x85, 0x70, 0xd0,
-  0xbe, 0x41, 0xea, 0x06, 0xfa, 0x1b, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82,
-  0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x3c, 0x28, 0x1c, 0xd0, 0x6f, 0x70, 0x10,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x3c, 0x29, 0x1c, 0xd4, 0x6f,
-  0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x3c, 0x2a, 0x1c,
-  0xd8, 0x6f, 0x20, 0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x6c,
-  0x28, 0x1c, 0xd4, 0x6f, 0x30, 0xbb, 0x41, 0x10, 0xc2, 0x81, 0xf8, 0x06,
-  0xff, 0x1b, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0x81, 0xa7, 0xc2, 0xc1, 0xfd, 0x06, 0x42, 0x70, 0x81,
-  0x71, 0xb3, 0x04, 0x3c, 0x32, 0xdc, 0xd0, 0x53, 0x29, 0x1c, 0x80, 0xc1,
-  0x2c, 0x43, 0x8a, 0xa8, 0x48, 0x50, 0xb0, 0x1b, 0xe4, 0x6f, 0x00, 0x17,
-  0x18, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x4b, 0x0d, 0x07, 0xfa,
-  0x1b, 0x90, 0x95, 0xef, 0x06, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0,
-  0xd8, 0x70, 0xa0, 0xbf, 0x41, 0x20, 0x5c, 0x30, 0x4c, 0xcd, 0x6e, 0xe0,
-  0xbf, 0x01, 0x5c, 0x60, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c,
-  0x3a, 0x1c, 0xfc, 0x6f, 0x70, 0x56, 0xe3, 0x1b, 0x8c, 0x18, 0x1c, 0x00,
-  0x08, 0x82, 0xc1, 0xb2, 0xc3, 0xc1, 0xff, 0x06, 0x81, 0x70, 0xc1, 0x30,
-  0x17, 0x18, 0x75, 0x87, 0x51, 0xc7, 0xb7, 0xc1, 0x30, 0xd7, 0xaa, 0xc1,
-  0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0x50, 0x81, 0x71, 0xc0, 0xc2, 0x01, 0xfa, 0x06, 0x39, 0x1c, 0x8c, 0x26,
-  0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31,
-  0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x3c, 0x67, 0x1c,
-  0xcc, 0x70, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x3c,
-  0x68, 0x1c, 0xd0, 0x70, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x3c, 0x69, 0x1c, 0xd4, 0x70, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02,
-  0x80, 0x20, 0x18, 0x6c, 0x67, 0x1c, 0xd0, 0x70, 0x20, 0xbf, 0x41, 0x00,
-  0xc6, 0x41, 0x08, 0x07, 0x3e, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82,
-  0x10, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x97, 0xc6, 0x81, 0x0d,
-  0x07, 0x42, 0x70, 0x81, 0x71, 0xb3, 0x04, 0x3c, 0x32, 0xdc, 0xc0, 0x57,
-  0x67, 0x1c, 0x80, 0xc1, 0x2c, 0xc3, 0x8a, 0xf0, 0x48, 0x60, 0xe6, 0x1b,
-  0xa0, 0x6f, 0x10, 0x9f, 0xe1, 0x08, 0xd1, 0x4a, 0xdf, 0x80, 0xf8, 0x66,
-  0x19, 0x58, 0xe4, 0x45, 0x02, 0x53, 0xdf, 0x60, 0xb4, 0xe2, 0x63, 0xc1,
-  0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x0c, 0xf9, 0x58, 0x11,
-  0xc4, 0xa7, 0x88, 0x38, 0x0e, 0x74, 0xb8, 0x21, 0x78, 0xe3, 0x00, 0x0c,
-  0x66, 0x19, 0x5a, 0xc4, 0x45, 0x02, 0x1b, 0xe4, 0x37, 0x80, 0xcf, 0x2c,
-  0xc1, 0x8c, 0x58, 0xfc, 0x06, 0x44, 0x7c, 0x66, 0x09, 0x66, 0x64, 0x38,
-  0xa2, 0xb5, 0xe4, 0x37, 0x10, 0xbe, 0x59, 0x06, 0x18, 0x99, 0x91, 0xc0,
-  0x5c, 0x6b, 0x7e, 0x83, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e,
-  0x30, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x82, 0x8f, 0x03,
-  0x1d, 0x6e, 0x08, 0xf4, 0x38, 0x00, 0x83, 0x59, 0x86, 0x18, 0x91, 0x91,
-  0xc0, 0xf6, 0x37, 0x18, 0xe2, 0x33, 0x4b, 0x30, 0x23, 0x46, 0xf8, 0x6f,
-  0x00, 0x9f, 0x59, 0x82, 0x19, 0x19, 0x68, 0x31, 0xb4, 0x16, 0xc1, 0x5c,
-  0x84, 0x88, 0x11, 0x41, 0x46, 0xe0, 0x36, 0x78, 0x91, 0x0b, 0x86, 0xb1,
-  0xfe, 0x0d, 0x42, 0x38, 0x88, 0xcf, 0x70, 0x44, 0x6e, 0x89, 0x70, 0x40,
-  0x7c, 0xb3, 0x0c, 0x34, 0x72, 0x23, 0x81, 0x8d, 0x70, 0xa0, 0x5b, 0xf1,
-  0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x86, 0x7c,
-  0xac, 0x08, 0xe2, 0x53, 0x84, 0x2a, 0x07, 0x3a, 0xdc, 0x10, 0xa0, 0x72,
-  0x00, 0x06, 0xb3, 0x0c, 0x35, 0x62, 0x23, 0x81, 0x0d, 0x2b, 0x1c, 0xc0,
-  0x67, 0x96, 0x60, 0x47, 0x0c, 0x85, 0x03, 0x22, 0x3e, 0xb3, 0x04, 0x3b,
-  0x32, 0x1c, 0x41, 0x5e, 0x29, 0x1c, 0x08, 0xdf, 0x2c, 0x03, 0x8e, 0xec,
-  0x48, 0x60, 0xe5, 0xa5, 0xc2, 0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1,
-  0x30, 0x17, 0x18, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x51,
-  0xcb, 0x81, 0x0e, 0x37, 0x04, 0xb3, 0x1c, 0x80, 0xc1, 0x2c, 0x43, 0x8e,
-  0xe8, 0x48, 0x60, 0x32, 0x1c, 0x0c, 0xf1, 0x99, 0x25, 0xd8, 0x11, 0x23,
-  0x6e, 0x38, 0x80, 0xcf, 0x2c, 0xc1, 0x8e, 0x0c, 0xb4, 0x18, 0x5a, 0x8d,
-  0x60, 0x36, 0x42, 0xe4, 0x88, 0xa0, 0x23, 0xbe, 0x1b, 0xdc, 0xc8, 0x05,
-  0xc3, 0x5c, 0x60, 0xd4, 0x6d, 0x46, 0xdd, 0x08, 0x07, 0xc3, 0x1c, 0xcd,
-  0x06, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08,
-  0x82, 0x41, 0x75, 0xce, 0xc1, 0x2c, 0x07, 0x6f, 0x1c, 0x80, 0x73, 0x30,
-  0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09,
-  0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0xb8,
-  0x73, 0xa0, 0xcb, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0xf0, 0xbc, 0x73, 0xb0, 0xcb, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0xf0, 0xc0, 0x73, 0xc0, 0xcb, 0x41, 0x42, 0x04, 0x23, 0x06,
-  0x0a, 0x00, 0x82, 0x60, 0xb0, 0xb9, 0x73, 0xb0, 0xcb, 0x41, 0x1e, 0x07,
-  0xc1, 0x39, 0x07, 0xa8, 0x1c, 0x94, 0x73, 0x30, 0x9a, 0x10, 0x00, 0xa3,
-  0x09, 0x42, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x1e, 0x3c, 0x07,
-  0xbd, 0x1c, 0x08, 0xc1, 0x05, 0xc6, 0xcd, 0x12, 0xf0, 0xc8, 0x40, 0x8b,
-  0xe1, 0x1a, 0x27, 0x22, 0x92, 0x83, 0x89, 0xc0, 0x44, 0x8a, 0x08, 0x3b,
-  0x22, 0x92, 0x83, 0x8a, 0xcc, 0x32, 0xf4, 0xc8, 0x8f, 0x8c, 0xd8, 0x70,
-  0x84, 0x08, 0x89, 0x72, 0x30, 0x7c, 0x37, 0x42, 0xc3, 0x0c, 0x37, 0x04,
-  0x78, 0x1c, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0xa8, 0x98, 0x29, 0x07,
-  0xc3, 0x57, 0x81, 0xa0, 0xc7, 0x62, 0xc3, 0x0c, 0x37, 0x04, 0x7b, 0x1c,
-  0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xf8, 0xc8, 0x9c, 0x04, 0xf7, 0xc3,
-  0xc1, 0x30, 0x07, 0xb7, 0xc1, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0x50, 0x85, 0x74, 0xd0, 0xce, 0x41, 0x2a, 0x07, 0xfa, 0x1c, 0x8c, 0x26,
-  0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31,
-  0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x3c, 0x28, 0x1d,
-  0xd0, 0x73, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x3c,
-  0x29, 0x1d, 0xd4, 0x73, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x3c, 0x2a, 0x1d, 0xd8, 0x73, 0x20, 0x11, 0xc1, 0x88, 0x81, 0x02,
-  0x80, 0x20, 0x18, 0x6c, 0x28, 0x1d, 0xd4, 0x73, 0x30, 0xcb, 0x41, 0x10,
-  0xd2, 0x81, 0x38, 0x07, 0xff, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82,
-  0x10, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0xa7, 0xd2, 0xc1, 0x3d,
-  0x07, 0x42, 0x70, 0x81, 0x71, 0xb3, 0x04, 0x73, 0x32, 0xdc, 0xd0, 0x63,
-  0x29, 0x1d, 0x80, 0xc1, 0x2c, 0x03, 0x98, 0x84, 0x49, 0x50, 0xb0, 0x1c,
-  0xe4, 0x73, 0x00, 0x17, 0x18, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x4b, 0x4d, 0x07, 0xfa, 0x1c, 0xf8, 0x90, 0x2f, 0x07, 0x23, 0x06, 0x07,
-  0x00, 0x82, 0x60, 0xb0, 0xd8, 0x74, 0xa0, 0xcf, 0x41, 0x20, 0x5c, 0x30,
-  0x4c, 0xcd, 0x72, 0xe0, 0xcf, 0x01, 0x5c, 0x60, 0xd4, 0x88, 0xc1, 0x01,
-  0x80, 0x20, 0x18, 0x2c, 0x3a, 0x1d, 0xfc, 0x73, 0x70, 0x66, 0xe3, 0x1c,
-  0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0xb2, 0xd3, 0xc1, 0x3f, 0x07,
-  0x81, 0x70, 0xc1, 0x30, 0x17, 0x18, 0x75, 0x87, 0x51, 0xc7, 0xc7, 0xc1,
-  0x30, 0xd7, 0xba, 0xc1, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x50, 0x81, 0x75, 0xc0, 0xd2, 0x01, 0x3a, 0x07,
-  0x39, 0x1d, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c,
-  0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x3c, 0x67, 0x1d, 0xcc, 0x74, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x3c, 0x68, 0x1d, 0xd0, 0x74, 0x90, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x3c, 0x69, 0x1d, 0xd4, 0x74, 0x90, 0x10,
-  0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x67, 0x1d, 0xd0, 0x74,
-  0x20, 0xcf, 0x41, 0x00, 0xd6, 0x41, 0x48, 0x07, 0x3e, 0x1d, 0x8c, 0x26,
-  0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81,
-  0x97, 0xd6, 0x81, 0x4d, 0x07, 0x42, 0x70, 0x81, 0x71, 0xb3, 0x04, 0x73,
-  0x32, 0xdc, 0xc0, 0x67, 0x67, 0x1d, 0x80, 0xc1, 0x2c, 0x83, 0x98, 0xcc,
-  0x49, 0x60, 0xe6, 0x1c, 0xa0, 0x73, 0x10, 0x9f, 0xe1, 0x08, 0x3e, 0x4a,
-  0xe7, 0x80, 0xf8, 0x66, 0x19, 0xc6, 0xc4, 0x4c, 0x02, 0x53, 0xe7, 0xa0,
-  0x8f, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b,
-  0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0xb8, 0x0e, 0x74, 0xb8, 0x21,
-  0x78, 0xeb, 0x00, 0x0c, 0x66, 0x19, 0xc8, 0xa4, 0x4c, 0x02, 0x1b, 0xe4,
-  0x39, 0x80, 0xcf, 0x2c, 0x81, 0x9a, 0x58, 0x3c, 0x07, 0x44, 0x7c, 0x66,
-  0x09, 0xd4, 0x64, 0x38, 0xe2, 0x94, 0xe4, 0x39, 0x10, 0xbe, 0x59, 0x86,
-  0x33, 0x51, 0x93, 0xc0, 0x50, 0x69, 0x9e, 0x83, 0xf8, 0x58, 0xe0, 0xd0,
-  0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1,
-  0x29, 0x82, 0xaf, 0x03, 0x1d, 0x6e, 0x08, 0xf4, 0x3a, 0x00, 0x83, 0x59,
-  0x06, 0x34, 0x49, 0x93, 0xc0, 0xf6, 0x39, 0x18, 0xe2, 0x33, 0x4b, 0xa0,
-  0x26, 0x46, 0xf8, 0x73, 0x00, 0x9f, 0x59, 0x02, 0x35, 0x19, 0x68, 0x31,
-  0x34, 0x32, 0xc1, 0xca, 0x84, 0x40, 0x13, 0x21, 0x4d, 0xc8, 0x32, 0x30,
-  0x93, 0x0b, 0x86, 0xb1, 0x7e, 0x0e, 0x42, 0x3a, 0x88, 0xcf, 0x70, 0x44,
-  0xae, 0x89, 0x74, 0x40, 0x7c, 0xb3, 0x0c, 0x6b, 0xe2, 0x26, 0x81, 0x8d,
-  0x74, 0xa0, 0x6b, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0x60,
-  0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84, 0x6a, 0x07, 0x3a,
-  0xdc, 0x10, 0xa0, 0x76, 0x00, 0x06, 0xb3, 0x0c, 0x6c, 0xd2, 0x26, 0x81,
-  0x0d, 0x2b, 0x1d, 0xc0, 0x67, 0x96, 0x40, 0x4e, 0x0c, 0xa5, 0x03, 0x22,
-  0x3e, 0xb3, 0x04, 0x72, 0x32, 0x1c, 0x41, 0x6e, 0x29, 0x1d, 0x08, 0xdf,
-  0x2c, 0xc3, 0x9b, 0xc8, 0x49, 0x60, 0xe5, 0xa6, 0xd2, 0x41, 0x7c, 0x2c,
-  0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65, 0x41, 0x24, 0x1f, 0x2b,
-  0x82, 0xf8, 0x14, 0x51, 0xdb, 0x81, 0x0e, 0x37, 0x04, 0xb3, 0x1d, 0x80,
-  0xc1, 0x2c, 0x03, 0x9c, 0xc4, 0x49, 0x60, 0x32, 0x1d, 0x0c, 0xf1, 0x99,
-  0x25, 0x90, 0x13, 0x23, 0x6e, 0x3a, 0x80, 0xcf, 0x2c, 0x81, 0x9c, 0x0c,
-  0xb4, 0x18, 0x1a, 0x9b, 0x60, 0x6d, 0x42, 0xc0, 0x89, 0x10, 0x27, 0xbe,
-  0x1d, 0xb8, 0xc9, 0x05, 0xc3, 0x5c, 0x60, 0xd4, 0x6d, 0x46, 0xdd, 0x48,
-  0x07, 0xc3, 0x1c, 0x0d, 0x07, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0x75, 0xde, 0xc1, 0x6c, 0x07, 0x6f,
-  0x1d, 0x80, 0x77, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a,
-  0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0xf0, 0xb8, 0x77, 0xa0, 0xdb, 0x41, 0x42, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0xf0, 0xbc, 0x77, 0xb0, 0xdb, 0x41, 0x42, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0xc0, 0x77, 0xc0, 0xdb, 0x41,
-  0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xb9, 0x77, 0xb0,
-  0xdb, 0x41, 0x5e, 0x07, 0xc1, 0x79, 0x07, 0xa8, 0x1d, 0x94, 0x77, 0x30,
-  0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x1e, 0x7c, 0x07, 0xbd, 0x1d, 0x08, 0xc1, 0x05, 0xc6, 0xcd, 0x12,
-  0xcc, 0xc9, 0x40, 0x8b, 0xe1, 0x1a, 0x3e, 0x22, 0xb2, 0x43, 0x8f, 0xc0,
-  0x04, 0x98, 0x08, 0x72, 0x22, 0xb2, 0x43, 0x98, 0xcc, 0x32, 0xd0, 0x89,
-  0x9d, 0x8c, 0xdc, 0x70, 0x44, 0x08, 0x89, 0x76, 0x30, 0x7c, 0x27, 0x42,
-  0xc3, 0x0c, 0x37, 0x04, 0x78, 0x1d, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47,
-  0xa8, 0x9c, 0x69, 0x07, 0xc3, 0x57, 0x81, 0xa0, 0xc7, 0x72, 0xc3, 0x0c,
-  0x37, 0x04, 0x7b, 0x1d, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xd4, 0x89,
-  0xaa, 0x04, 0xf7, 0xd3, 0xc1, 0x30, 0x07, 0xc7, 0xc1, 0x30, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x50, 0x85, 0x78, 0xd0, 0xde, 0x41, 0x6a, 0x07,
-  0xfa, 0x1d, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c,
-  0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x3c, 0x28, 0x1e, 0xd0, 0x77, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x3c, 0x29, 0x1e, 0xd4, 0x77, 0xc0, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x3c, 0x2a, 0x1e, 0xd8, 0x77, 0x20, 0x11,
-  0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x28, 0x1e, 0xd4, 0x77,
-  0x30, 0xdb, 0x41, 0x10, 0xe2, 0x81, 0x78, 0x07, 0xff, 0x1d, 0x8c, 0x26,
-  0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81,
-  0xa7, 0xe2, 0xc1, 0x7d, 0x07, 0x42, 0x70, 0x81, 0x71, 0xb3, 0x04, 0xaa,
-  0x32, 0xdc, 0xd0, 0x73, 0x29, 0x1e, 0x80, 0xc1, 0x2c, 0xc3, 0x9d, 0xe0,
-  0x49, 0x50, 0xb0, 0x1d, 0xe4, 0x77, 0x00, 0x17, 0x18, 0x35, 0x62, 0x70,
-  0x00, 0x20, 0x08, 0x06, 0x4b, 0x8d, 0x07, 0xfa, 0x1d, 0xf4, 0x90, 0x6f,
-  0x07, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xd8, 0x78, 0xa0, 0xdf,
-  0x41, 0x20, 0x5c, 0x30, 0x4c, 0xcd, 0x76, 0xe0, 0xdf, 0x01, 0x5c, 0x60,
-  0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c, 0x3a, 0x1e, 0xfc, 0x77,
-  0x70, 0x76, 0xe3, 0x1d, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0xb2,
-  0xe3, 0xc1, 0x7f, 0x07, 0x81, 0x70, 0xc1, 0x30, 0x17, 0x18, 0x75, 0x87,
-  0x51, 0xc7, 0xd7, 0xc1, 0x30, 0xd7, 0xca, 0xc1, 0x30, 0x47, 0x0c, 0x73,
-  0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x50, 0x81, 0x79, 0xc0,
-  0xe2, 0x01, 0x7a, 0x07, 0x39, 0x1e, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82,
-  0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x3c, 0x67, 0x1e, 0xcc, 0x78, 0x90, 0x10,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x3c, 0x68, 0x1e, 0xd0, 0x78,
-  0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x3c, 0x69, 0x1e,
-  0xd4, 0x78, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x6c,
-  0x67, 0x1e, 0xd0, 0x78, 0x20, 0xdf, 0x41, 0x00, 0xe6, 0x41, 0x88, 0x07,
-  0x3e, 0x1e, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0x81, 0x97, 0xe6, 0x81, 0x8d, 0x07, 0x42, 0x70, 0x81,
-  0x71, 0xb3, 0x04, 0xaa, 0x32, 0xdc, 0xc0, 0x77, 0x67, 0x1e, 0x80, 0xc1,
-  0x2c, 0x43, 0x9e, 0xa8, 0x4a, 0x60, 0xe6, 0x1d, 0xa0, 0x77, 0x10, 0x9f,
-  0xe1, 0x88, 0x3d, 0x4a, 0xef, 0x80, 0xf8, 0x66, 0x19, 0xf4, 0xa4, 0x4f,
-  0x02, 0x53, 0xef, 0x80, 0x8f, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86,
-  0xb9, 0xc0, 0x28, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x38,
-  0x0f, 0x74, 0xb8, 0x21, 0x78, 0xf3, 0x00, 0x0c, 0x66, 0x19, 0xf6, 0x84,
-  0x4f, 0x02, 0x1b, 0xe4, 0x3b, 0x80, 0xcf, 0x2c, 0x41, 0xa8, 0x58, 0x7c,
-  0x07, 0x44, 0x7c, 0x66, 0x09, 0x42, 0x65, 0x38, 0xc2, 0x94, 0xe4, 0x3b,
-  0x10, 0xbe, 0x59, 0x06, 0x3f, 0x09, 0x95, 0xc0, 0x4e, 0x69, 0xbe, 0x83,
-  0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x82, 0x48,
-  0x3e, 0x56, 0x04, 0xf1, 0x29, 0x82, 0xcf, 0x03, 0x1d, 0x6e, 0x08, 0xf4,
-  0x3c, 0x00, 0x83, 0x59, 0x86, 0x3f, 0x01, 0x95, 0xc0, 0xf6, 0x3b, 0x18,
-  0xe2, 0x33, 0x4b, 0x10, 0x2a, 0x46, 0xf8, 0x77, 0x00, 0x9f, 0x59, 0x82,
-  0x50, 0x19, 0x68, 0x31, 0xb4, 0x3d, 0xc1, 0xf8, 0x84, 0xf8, 0x13, 0x01,
-  0x54, 0xc4, 0x32, 0xe8, 0x93, 0x0b, 0x86, 0xb1, 0xfe, 0x0e, 0x42, 0x3c,
-  0x88, 0xcf, 0x70, 0x44, 0xee, 0x89, 0x78, 0x40, 0x7c, 0xb3, 0x0c, 0xa2,
-  0x52, 0x2a, 0x81, 0x8d, 0x78, 0xa0, 0x7b, 0xf1, 0xb1, 0x60, 0xa0, 0xcf,
-  0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53,
-  0x84, 0xaa, 0x07, 0x3a, 0xdc, 0x10, 0xa0, 0x7a, 0x00, 0x06, 0xb3, 0x0c,
-  0xa3, 0x42, 0x2a, 0x81, 0x0d, 0x2b, 0x1e, 0xc0, 0x67, 0x96, 0x20, 0x55,
-  0x0c, 0xc5, 0x03, 0x22, 0x3e, 0xb3, 0x04, 0xa9, 0x32, 0x1c, 0x41, 0x7e,
-  0x29, 0x1e, 0x08, 0xdf, 0x2c, 0x83, 0xa9, 0xa4, 0x4a, 0x60, 0xe5, 0xa7,
-  0xe2, 0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65,
-  0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x51, 0xeb, 0x81, 0x0e, 0x37,
-  0x04, 0xb3, 0x1e, 0x80, 0xc1, 0x2c, 0xc3, 0xa9, 0xa0, 0x4a, 0x60, 0x32,
-  0x1e, 0x0c, 0xf1, 0x99, 0x25, 0x48, 0x15, 0x23, 0x6e, 0x3c, 0x80, 0xcf,
-  0x2c, 0x41, 0xaa, 0x0c, 0xb4, 0x18, 0xda, 0xa8, 0x60, 0xa4, 0x42, 0x9c,
-  0x8a, 0x80, 0x2a, 0xbe, 0x1f, 0x94, 0xca, 0x05, 0xc3, 0x5c, 0x60, 0xd4,
-  0x6d, 0x46, 0xdd, 0x88, 0x07, 0xc3, 0x1c, 0x4d, 0x07, 0xc3, 0x1c, 0x31,
-  0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0x75, 0xee,
-  0xc1, 0xac, 0x07, 0x6f, 0x1e, 0x80, 0x7b, 0x30, 0x9a, 0x10, 0x00, 0xa3,
-  0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0xb8, 0x7b, 0xa0, 0xeb, 0x41,
-  0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0xbc, 0x7b, 0xb0,
-  0xeb, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0xc0,
-  0x7b, 0xc0, 0xeb, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60,
-  0xb0, 0xb9, 0x7b, 0xb0, 0xeb, 0x41, 0x9e, 0x07, 0xc1, 0xb9, 0x07, 0xa8,
-  0x1e, 0x94, 0x7b, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x1e, 0xbc, 0x07, 0xbd, 0x1e, 0x08, 0xc1,
-  0x05, 0xc6, 0xcd, 0x12, 0xa8, 0xca, 0x40, 0x8b, 0xe1, 0x1a, 0x75, 0x22,
-  0xd2, 0x03, 0x9d, 0xc0, 0xc4, 0x9d, 0x08, 0xa9, 0x22, 0xd2, 0x03, 0x9e,
-  0x98, 0x09, 0x06, 0xa3, 0x1e, 0xc0, 0x67, 0x96, 0x61, 0x55, 0x5a, 0x85,
-  0x04, 0x83, 0xe1, 0x08, 0x14, 0x0c, 0x46, 0x3d, 0x18, 0xbe, 0x4b, 0xc1,
-  0x60, 0x98, 0xe1, 0x86, 0x20, 0xcf, 0x03, 0x32, 0xa8, 0x21, 0xd0, 0xe1,
-  0x88, 0xe2, 0xd4, 0x83, 0xe1, 0xab, 0x40, 0xd0, 0x3b, 0x86, 0x19, 0x6e,
-  0x08, 0xf8, 0x3c, 0x20, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0x60, 0x95, 0x70,
-  0x09, 0x0e, 0xcc, 0x83, 0x61, 0x2e, 0xae, 0x83, 0x61, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0xa0, 0x12, 0xf9, 0xc0, 0xdd, 0x03, 0x55, 0x0f, 0xf6,
-  0x3d, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84,
-  0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0x78, 0x52, 0x3e, 0xa8, 0xf7, 0xe0, 0x20, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0x78, 0x54, 0x3e, 0xb0, 0xf7, 0x80, 0x21, 0x82, 0x11, 0x03,
-  0x04, 0x00, 0x41, 0x30, 0x78, 0x56, 0x3e, 0xb8, 0xf7, 0x40, 0x22, 0x82,
-  0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0xd8, 0x52, 0x3e, 0xb0, 0xf7, 0x80,
-  0xd6, 0x83, 0x40, 0xe4, 0x83, 0x71, 0x0f, 0x40, 0x3e, 0x18, 0x4d, 0x08,
-  0x80, 0xd1, 0x04, 0x21, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x6f,
-  0xe5, 0x03, 0x7c, 0x0f, 0x84, 0xe0, 0x02, 0xe3, 0x66, 0x09, 0xc2, 0x65,
-  0xb8, 0xc1, 0x07, 0x03, 0x95, 0x0f, 0xc0, 0x60, 0x96, 0xc1, 0x55, 0x5e,
-  0x25, 0xa8, 0x58, 0x0f, 0xf4, 0x3d, 0x80, 0x0b, 0x8c, 0x1a, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x83, 0xc5, 0xe6, 0x83, 0x7d, 0x0f, 0xc8, 0x30, 0xf8,
-  0xf5, 0x60, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x96, 0x9b, 0x0f, 0xf6,
-  0x3d, 0x08, 0x84, 0x0b, 0x86, 0x29, 0x5a, 0x0f, 0xfe, 0x3d, 0x80, 0x0b,
-  0x8c, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x65, 0xe7, 0x03, 0x90,
-  0x0f, 0xc4, 0x80, 0xdc, 0x83, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x58,
-  0x78, 0x3e, 0x00, 0xf9, 0x20, 0x10, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0xee,
-  0x30, 0xea, 0xfa, 0x3c, 0x18, 0xe6, 0x5c, 0x3b, 0x18, 0xe6, 0x88, 0x61,
-  0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xaa, 0xb0, 0x0f,
-  0x5a, 0x3e, 0x48, 0xf7, 0x40, 0xe7, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d,
-  0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x07, 0xed, 0x03, 0x9a, 0x0f, 0x12,
-  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x27, 0xed, 0x83, 0x9a,
-  0x0f, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x47, 0xed,
-  0x03, 0x9b, 0x0f, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83,
-  0x0d, 0xed, 0x83, 0x9a, 0x0f, 0xe6, 0x3d, 0x08, 0xc2, 0x3e, 0x10, 0xf9,
-  0xe0, 0xe7, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0xf0, 0xd4, 0x3e, 0xb8, 0xf9, 0x40, 0x08, 0x2e,
-  0x30, 0x6e, 0x96, 0x20, 0x5c, 0x86, 0x1b, 0xfa, 0x30, 0x40, 0xfb, 0x00,
-  0x0c, 0x66, 0x19, 0x60, 0x25, 0x5c, 0x02, 0x3b, 0xf7, 0x20, 0xdd, 0x83,
-  0xf8, 0x0c, 0x47, 0x88, 0x62, 0xa0, 0xee, 0x01, 0xf1, 0xcd, 0x32, 0xc4,
-  0x0a, 0xad, 0x04, 0xb6, 0xee, 0xc1, 0x28, 0x06, 0xf1, 0xb1, 0x60, 0xa0,
-  0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2,
-  0x53, 0x84, 0xdc, 0x07, 0x3a, 0xdc, 0x10, 0xc0, 0x7d, 0x00, 0x06, 0xb3,
-  0x0c, 0xb2, 0x32, 0x2b, 0x81, 0x0d, 0xf3, 0x1e, 0xc0, 0x67, 0x96, 0x00,
-  0x57, 0x4c, 0xde, 0x03, 0x22, 0x3e, 0xb3, 0x04, 0xb8, 0x32, 0x1c, 0xd1,
-  0x8a, 0xc1, 0xbc, 0x07, 0xc2, 0x37, 0xcb, 0x50, 0x2b, 0xb8, 0x12, 0x98,
-  0x2b, 0x06, 0xf4, 0x1e, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73,
-  0x81, 0x51, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x7d, 0x1f,
-  0xe8, 0x70, 0x43, 0xb0, 0xf7, 0x01, 0x18, 0xcc, 0x32, 0xd8, 0xca, 0xad,
-  0x04, 0xc6, 0xef, 0xc1, 0x10, 0x9f, 0x59, 0x02, 0x5c, 0x31, 0xe2, 0xdf,
-  0x03, 0xf8, 0xcc, 0x12, 0xe0, 0xca, 0x40, 0x8b, 0xa1, 0xc9, 0x0a, 0x36,
-  0x2b, 0x84, 0xad, 0x08, 0xb7, 0x02, 0x97, 0x02, 0xad, 0x5c, 0x30, 0x8c,
-  0xf9, 0x7b, 0x20, 0xf2, 0x41, 0x7c, 0x86, 0x23, 0x68, 0x61, 0xe4, 0x03,
-  0xe2, 0x9b, 0x65, 0xc8, 0x15, 0x5e, 0x09, 0x8c, 0xe4, 0x83, 0x5a, 0x88,
-  0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x30, 0xe4,
-  0x63, 0x45, 0x10, 0x9f, 0x22, 0x56, 0x3f, 0xd0, 0xe1, 0x86, 0x20, 0xf5,
-  0x03, 0x30, 0x98, 0x65, 0xd0, 0x95, 0x5d, 0x09, 0x6c, 0x60, 0xf9, 0x00,
-  0x3e, 0xb3, 0x04, 0xe0, 0x62, 0x29, 0x1f, 0x10, 0xf1, 0x99, 0x25, 0x00,
-  0x97, 0xe1, 0x88, 0x5f, 0x50, 0xf9, 0x40, 0xf8, 0x66, 0x19, 0x7a, 0x05,
-  0x5c, 0x02, 0x03, 0x87, 0x95, 0x0f, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b,
-  0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08,
-  0xdb, 0x0f, 0x74, 0xb8, 0x21, 0xa0, 0xfd, 0x00, 0x0c, 0x66, 0x19, 0x7c,
-  0xe5, 0x57, 0x02, 0x9b, 0xf9, 0x60, 0x88, 0xcf, 0x2c, 0x01, 0xb8, 0x18,
-  0x81, 0xf3, 0x01, 0x7c, 0x66, 0x09, 0xc0, 0x65, 0xa0, 0xc5, 0xd0, 0x74,
-  0x05, 0xdb, 0x15, 0xc2, 0x57, 0x84, 0x5f, 0xa1, 0x0d, 0x5e, 0xb9, 0x60,
-  0x98, 0x0b, 0x8c, 0xba, 0xcd, 0xa8, 0x23, 0xf9, 0x60, 0x98, 0xab, 0xf1,
-  0x60, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41,
-  0x30, 0xa8, 0xd0, 0x3f, 0xa0, 0xfd, 0x00, 0xee, 0x83, 0xf0, 0x0f, 0x46,
-  0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81,
-  0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x9e, 0xf7,
-  0x0f, 0x76, 0x3f, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
-  0x1e, 0xf8, 0x0f, 0x78, 0x3f, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40,
-  0x10, 0x0c, 0x9e, 0xf8, 0x0f, 0x7a, 0x3f, 0x48, 0x88, 0x60, 0xc4, 0x40,
-  0x01, 0x40, 0x10, 0x0c, 0xb6, 0xf7, 0x0f, 0x78, 0x3f, 0xd0, 0xfb, 0x20,
-  0x40, 0xff, 0x20, 0xf5, 0x03, 0xf3, 0x0f, 0x46, 0x13, 0x02, 0x60, 0x34,
-  0x41, 0x08, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xc0, 0x8b, 0xff, 0xc0,
-  0xf7, 0x03, 0x21, 0xb8, 0xc0, 0xb8, 0x59, 0x82, 0x70, 0x19, 0x68, 0x31,
-  0x5c, 0x83, 0x55, 0x48, 0x7e, 0x58, 0x15, 0x98, 0x70, 0x15, 0x01, 0x5c,
-  0x48, 0x7e, 0x78, 0x95, 0x59, 0x06, 0x71, 0x21, 0x17, 0x92, 0x0c, 0x86,
-  0x23, 0x52, 0x32, 0x18, 0xfd, 0x60, 0xf8, 0x4e, 0x25, 0x83, 0x61, 0x86,
-  0x1b, 0x82, 0xbc, 0x0f, 0xc8, 0xa0, 0x86, 0x40, 0x87, 0x23, 0x4a, 0xe2,
-  0xf4, 0x83, 0xe1, 0xab, 0x40, 0xd0, 0x3b, 0x89, 0x61, 0x86, 0x1b, 0x02,
-  0xbe, 0x0f, 0xc8, 0xa0, 0x82, 0x41, 0x67, 0x19, 0xc6, 0x05, 0x5f, 0x82,
-  0x03, 0xfb, 0x60, 0x98, 0x8b, 0xf3, 0x60, 0x98, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0xa8, 0x44, 0x50, 0x70, 0xff, 0x40, 0xf5, 0x83, 0xfd, 0x0f,
-  0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34,
-  0x81, 0x18, 0x8a, 0x38, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x9e,
-  0x14, 0x14, 0xea, 0x3f, 0x38, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10,
-  0x0c, 0x1e, 0x15, 0x14, 0xec, 0x3f, 0x60, 0x88, 0x60, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0x9e, 0x15, 0x14, 0xee, 0x3f, 0x90, 0x88, 0x60, 0xc4,
-  0x40, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0x14, 0x14, 0xec, 0x3f, 0xa0, 0xfd,
-  0x20, 0x10, 0x41, 0x61, 0xfc, 0x03, 0x10, 0x14, 0x46, 0x13, 0x02, 0x60,
-  0x34, 0x41, 0x08, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xc0, 0x5b, 0x41,
-  0x01, 0xff, 0x03, 0x21, 0xb8, 0xc0, 0xb8, 0x59, 0x02, 0x7c, 0x19, 0x6e,
-  0xf0, 0xc9, 0x40, 0x05, 0x05, 0x30, 0x98, 0x65, 0x28, 0x17, 0x73, 0x09,
-  0x2a, 0xf6, 0x03, 0xfd, 0x0f, 0xe0, 0x02, 0xa3, 0x46, 0x0c, 0x0e, 0x00,
-  0x04, 0xc1, 0x60, 0xb1, 0x41, 0x61, 0xff, 0x83, 0xb2, 0x0c, 0x7e, 0x3f,
-  0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0xe5, 0x06, 0x85, 0xfd, 0x0f,
-  0x02, 0xe1, 0x82, 0x61, 0x8a, 0xf6, 0x83, 0xff, 0x0f, 0xe0, 0x02, 0xa3,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xd9, 0x41, 0x01, 0x04, 0x05,
-  0xb1, 0x20, 0xff, 0x60, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x16, 0x1e,
-  0x14, 0x40, 0x50, 0x08, 0x84, 0x0b, 0x86, 0xb9, 0xc0, 0xa8, 0x3b, 0x8c,
-  0xba, 0xbe, 0x0f, 0x86, 0x39, 0x57, 0x0f, 0x86, 0x39, 0x62, 0x98, 0x23,
-  0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x2a, 0x0c, 0x85, 0x16,
-  0x14, 0xd2, 0x3f, 0xd0, 0x41, 0x61, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84,
-  0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0xe0, 0x41, 0x43, 0x81, 0x06, 0x85, 0x84, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x49, 0x43, 0xa1, 0x06, 0x85,
-  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x51, 0x43, 0xc1,
-  0x06, 0x85, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x43,
-  0x43, 0xa1, 0x06, 0x85, 0xf9, 0x0f, 0x82, 0x30, 0x14, 0x44, 0x50, 0xf8,
-  0x41, 0x61, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0x3c, 0x35, 0x14, 0x6e, 0x50, 0x10, 0x82, 0x0b, 0x8c,
-  0x9b, 0x25, 0xc0, 0x97, 0xe1, 0x86, 0xbe, 0x0c, 0xd0, 0x50, 0x00, 0x83,
-  0x59, 0x86, 0x73, 0xc1, 0x97, 0xc0, 0xce, 0x3f, 0x48, 0xff, 0x20, 0x3e,
-  0xc3, 0x11, 0xa3, 0x19, 0xa8, 0x7f, 0x40, 0x7c, 0xb3, 0x0c, 0xe8, 0xb2,
-  0x2e, 0x81, 0xad, 0x7f, 0x40, 0x9a, 0x41, 0x7c, 0x2c, 0x18, 0xe8, 0x73,
-  0xc1, 0x30, 0x17, 0x18, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14,
-  0x21, 0x87, 0x82, 0x0e, 0x37, 0x04, 0x70, 0x28, 0x80, 0xc1, 0x2c, 0x43,
-  0xba, 0xa8, 0x4b, 0x60, 0xc3, 0xfc, 0x07, 0xf0, 0x99, 0x25, 0x78, 0x17,
-  0x93, 0xff, 0x80, 0x88, 0xcf, 0x2c, 0xc1, 0xbb, 0x0c, 0x47, 0xb8, 0x66,
-  0x30, 0xff, 0x81, 0xf0, 0xcd, 0x32, 0xb0, 0xcb, 0xbb, 0x04, 0xf6, 0x9a,
-  0x01, 0xfd, 0x07, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0x60,
-  0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0x1f, 0x0a, 0x3a,
-  0xdc, 0x10, 0xec, 0xa1, 0x00, 0x06, 0xb3, 0x0c, 0xed, 0xe2, 0x2e, 0x81,
-  0xf1, 0x7f, 0x30, 0xc4, 0x67, 0x96, 0xe0, 0x5d, 0x8c, 0xf8, 0xff, 0x00,
-  0x3e, 0xb3, 0x04, 0xef, 0x32, 0xd0, 0x62, 0x68, 0xe9, 0x82, 0xa9, 0x0b,
-  0xd1, 0x2e, 0x82, 0xbb, 0xc8, 0xad, 0xb0, 0x2e, 0x17, 0x0c, 0x63, 0xfe,
-  0x1f, 0x88, 0xa0, 0x10, 0x9f, 0xe1, 0x08, 0xda, 0x18, 0x41, 0x81, 0xf8,
-  0x66, 0x19, 0xe0, 0x65, 0x5e, 0x02, 0x23, 0x41, 0xa1, 0x36, 0xe2, 0x63,
-  0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x0c, 0xf9, 0x58,
-  0x11, 0xc4, 0xa7, 0x88, 0x55, 0x14, 0x74, 0xb8, 0x21, 0x48, 0x45, 0x01,
-  0x0c, 0x66, 0x19, 0xe2, 0x45, 0x5e, 0x02, 0x1b, 0x58, 0x50, 0x80, 0xcf,
-  0x2c, 0xc1, 0xbd, 0x58, 0x0a, 0x0a, 0x44, 0x7c, 0x66, 0x09, 0xee, 0x65,
-  0x38, 0xe2, 0x37, 0x54, 0x50, 0x10, 0xbe, 0x59, 0x06, 0x7a, 0xb9, 0x97,
-  0xc0, 0xc0, 0x63, 0x05, 0x85, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61,
-  0x2e, 0x30, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xc2, 0x16,
-  0x05, 0x1d, 0x6e, 0x08, 0x68, 0x51, 0x00, 0x83, 0x59, 0x86, 0x7a, 0xb1,
-  0x97, 0xc0, 0x66, 0x50, 0x18, 0xe2, 0x33, 0x4b, 0x70, 0x2f, 0x46, 0xe0,
-  0xa0, 0x00, 0x9f, 0x59, 0x82, 0x7b, 0x19, 0x68, 0x31, 0xb4, 0x78, 0xc1,
-  0xe4, 0x85, 0xa8, 0x17, 0xc1, 0x5e, 0x68, 0x67, 0x5e, 0x2e, 0x18, 0xe6,
-  0x02, 0xa3, 0x6e, 0x33, 0xea, 0x48, 0x50, 0x18, 0xe6, 0x6a, 0x3e, 0x18,
-  0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0x2a, 0x74, 0x14, 0x68, 0x51, 0x80, 0x43, 0x21, 0x1c, 0x85, 0xd1, 0x84,
-  0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86,
-  0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xe7, 0x1d, 0x85,
-  0x5d, 0x14, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x07,
-  0x1e, 0x05, 0x5e, 0x14, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x83, 0x27, 0x1e, 0x85, 0x5e, 0x14, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00,
-  0x10, 0x04, 0x83, 0xed, 0x1d, 0x05, 0x5e, 0x14, 0xf4, 0x50, 0x08, 0xd0,
-  0x51, 0x48, 0x45, 0xc1, 0x1c, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10,
-  0x82, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf0, 0xe2, 0x51, 0xf0, 0x45,
-  0x41, 0x08, 0x2e, 0x30, 0x6e, 0x96, 0x00, 0x5f, 0x06, 0x5a, 0x0c, 0xd7,
-  0x18, 0x17, 0x92, 0x24, 0xc4, 0x05, 0x26, 0xca, 0x45, 0xb8, 0x17, 0x92,
-  0x24, 0xcc, 0x65, 0x96, 0x21, 0x5f, 0xf6, 0x85, 0x44, 0x83, 0xe1, 0x88,
-  0x11, 0x0c, 0x46, 0x51, 0x18, 0xbe, 0x23, 0xc1, 0x60, 0x98, 0xe1, 0x86,
-  0x20, 0x0f, 0x05, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x12, 0x39, 0x45,
-  0x61, 0xf8, 0x2a, 0x10, 0xf4, 0x4e, 0x64, 0x98, 0xe1, 0x86, 0x80, 0x0f,
-  0x05, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x06, 0x7d, 0x79, 0x99, 0xe0, 0xc0,
-  0x50, 0x18, 0xe6, 0xe2, 0x3e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10,
-  0x0c, 0x2a, 0x91, 0x14, 0xdc, 0x51, 0x50, 0x45, 0x61, 0x1f, 0x85, 0xd1,
-  0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20,
-  0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x27, 0x25,
-  0x85, 0x7a, 0x14, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
-  0x47, 0x25, 0x05, 0x7b, 0x14, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0x67, 0x25, 0x85, 0x7b, 0x14, 0x24, 0x22, 0x18, 0x31, 0x50,
-  0x00, 0x10, 0x04, 0x83, 0x2d, 0x25, 0x05, 0x7b, 0x14, 0x68, 0x51, 0x08,
-  0x44, 0x52, 0x18, 0x47, 0x01, 0x24, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d,
-  0x10, 0x82, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf0, 0x56, 0x52, 0xc0,
-  0x47, 0x41, 0x08, 0x2e, 0x30, 0x6e, 0x96, 0xe0, 0x65, 0x86, 0x1b, 0x7c,
-  0x34, 0x50, 0x49, 0x01, 0x0c, 0x66, 0x19, 0xf8, 0xa5, 0x5f, 0x82, 0x8a,
-  0x45, 0x41, 0x1f, 0x05, 0xb8, 0xc0, 0xa8, 0x11, 0x83, 0x03, 0x00, 0x41,
-  0x30, 0x58, 0x6c, 0x52, 0xd8, 0x47, 0xe1, 0x07, 0x83, 0x5f, 0x14, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xb9, 0x49, 0x61, 0x1f, 0x85, 0x40,
-  0xb8, 0x60, 0x98, 0xa2, 0x45, 0xe1, 0x1f, 0x05, 0xb8, 0xc0, 0xa8, 0x11,
-  0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0x76, 0x52, 0x00, 0x49, 0x41, 0x4c,
-  0xc8, 0x51, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x85, 0x27, 0x05,
-  0x90, 0x14, 0x02, 0xe1, 0x82, 0x61, 0x2e, 0x30, 0xea, 0x0e, 0xa3, 0xae,
-  0x0f, 0x85, 0x61, 0xce, 0xf5, 0x83, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xa0, 0x0a, 0x4b, 0xa1, 0x25, 0x85,
-  0x74, 0x14, 0x74, 0x52, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18,
-  0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04,
-  0x00, 0x41, 0x30, 0x78, 0xd0, 0x52, 0xa0, 0x49, 0x21, 0x21, 0x82, 0x11,
-  0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0xd2, 0x52, 0xa8, 0x49, 0x21, 0x21,
-  0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0xd4, 0x52, 0xb0, 0x49,
-  0x21, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0xd8, 0xd0, 0x52,
-  0xa8, 0x49, 0x61, 0x1e, 0x85, 0x20, 0x2c, 0x05, 0x91, 0x14, 0x7e, 0x52,
-  0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x03, 0x4f, 0x2d, 0x85, 0x9b, 0x14, 0x84, 0xe0, 0x02, 0xe3, 0x66,
-  0x09, 0x5e, 0x66, 0xb8, 0xa1, 0x4f, 0x03, 0xb4, 0x14, 0xc0, 0x60, 0x96,
-  0xc1, 0x5f, 0x5e, 0x26, 0xb0, 0x73, 0x14, 0xd2, 0x51, 0x88, 0xcf, 0x70,
-  0x44, 0x1f, 0x06, 0xea, 0x28, 0x10, 0xdf, 0x2c, 0xc3, 0xbf, 0x88, 0x4c,
-  0x60, 0xeb, 0x28, 0xf8, 0x61, 0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30,
-  0xcc, 0x05, 0x46, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xc8,
-  0xa5, 0xa0, 0xc3, 0x0d, 0x01, 0x5c, 0x0a, 0x60, 0x30, 0xcb, 0x00, 0x32,
-  0x21, 0x13, 0xd8, 0x30, 0x8f, 0x02, 0x7c, 0x66, 0x09, 0x4c, 0xc6, 0xe4,
-  0x51, 0x20, 0xe2, 0x33, 0x4b, 0x60, 0x32, 0xc3, 0x11, 0xa8, 0x18, 0xcc,
-  0xa3, 0x20, 0x7c, 0xb3, 0x0c, 0x23, 0x63, 0x32, 0x81, 0xa5, 0x62, 0x40,
-  0x8f, 0x42, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65,
-  0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xd1, 0x97, 0x82, 0x0e, 0x37,
-  0x04, 0x7b, 0x29, 0x80, 0xc1, 0x2c, 0x03, 0xc9, 0x94, 0x4c, 0x60, 0xfc,
-  0x28, 0x0c, 0xf1, 0x99, 0x25, 0x30, 0x19, 0x23, 0xfe, 0x51, 0x80, 0xcf,
-  0x2c, 0x81, 0xc9, 0x0c, 0xb4, 0x18, 0x1a, 0xc8, 0x60, 0x21, 0x43, 0x90,
-  0x8c, 0x50, 0x32, 0x66, 0x29, 0x88, 0xcc, 0x05, 0xc3, 0x98, 0x3f, 0x0a,
-  0x22, 0x29, 0xc4, 0x67, 0x38, 0x82, 0x56, 0x46, 0x52, 0x20, 0xbe, 0x59,
-  0x86, 0x93, 0x51, 0x99, 0xc0, 0x48, 0x52, 0xa8, 0x95, 0xf8, 0x58, 0x30,
-  0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04,
-  0xf1, 0x29, 0x62, 0x35, 0x05, 0x1d, 0x6e, 0x08, 0x52, 0x53, 0x00, 0x83,
-  0x59, 0x06, 0x94, 0x49, 0x99, 0xc0, 0x06, 0x96, 0x14, 0xe0, 0x33, 0x4b,
-  0xe0, 0x32, 0x96, 0x92, 0x02, 0x11, 0x9f, 0x59, 0x02, 0x97, 0x19, 0x8e,
-  0xf8, 0x15, 0x95, 0x14, 0x84, 0x6f, 0x96, 0x61, 0x65, 0x5c, 0x26, 0x30,
-  0x70, 0x59, 0x49, 0x21, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b,
-  0x8c, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xb0, 0x4d, 0x41,
-  0x87, 0x1b, 0x02, 0xda, 0x14, 0xc0, 0x60, 0x96, 0x81, 0x65, 0x5a, 0x26,
-  0xb0, 0x99, 0x14, 0x86, 0xf8, 0xcc, 0x12, 0xb8, 0x8c, 0x11, 0x38, 0x29,
-  0xc0, 0x67, 0x96, 0xc0, 0x65, 0x06, 0x5a, 0x0c, 0x0d, 0x65, 0xb0, 0x94,
-  0x21, 0x58, 0x46, 0x68, 0x19, 0xda, 0x52, 0x99, 0x0b, 0x86, 0xb9, 0xc0,
-  0xa8, 0xdb, 0x8c, 0x3a, 0x92, 0x14, 0x86, 0xb9, 0x1a, 0x14, 0x86, 0x39,
-  0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x0a,
-  0x3d, 0x05, 0xda, 0x14, 0xe0, 0x52, 0x08, 0x4f, 0x61, 0x34, 0x21, 0x00,
-  0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88,
-  0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x79, 0x4f, 0x61, 0x37,
-  0x85, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x81, 0x4f,
-  0x81, 0x37, 0x85, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0,
-  0x89, 0x4f, 0xa1, 0x37, 0x85, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04,
-  0xc1, 0x60, 0x7b, 0x4f, 0x81, 0x37, 0x05, 0xbd, 0x14, 0x02, 0xf4, 0x14,
-  0x52, 0x53, 0x30, 0x4f, 0x61, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc, 0xf8, 0x14, 0x7c, 0x53, 0x10,
-  0x82, 0x0b, 0x8c, 0x9b, 0x25, 0x78, 0x99, 0x81, 0x16, 0xc3, 0x35, 0xf4,
-  0x85, 0x64, 0x89, 0x7c, 0x81, 0x09, 0x7e, 0x11, 0x5c, 0x86, 0x64, 0x89,
-  0x7e, 0x99, 0x65, 0x80, 0x19, 0x99, 0x21, 0xd9, 0x60, 0x38, 0x42, 0x04,
-  0x83, 0xd1, 0x14, 0x86, 0xef, 0x46, 0x30, 0x18, 0x66, 0xb8, 0x21, 0xc8,
-  0x4b, 0x81, 0x0c, 0x6a, 0x08, 0x74, 0x38, 0xa2, 0x64, 0x4e, 0x53, 0x18,
-  0xbe, 0x0a, 0x04, 0xbd, 0x93, 0x19, 0x66, 0xb8, 0x21, 0xe0, 0x4b, 0x81,
-  0x0c, 0x2a, 0x18, 0x74, 0x96, 0x21, 0x66, 0xcc, 0x26, 0x38, 0xb0, 0x14,
-  0x86, 0xb9, 0x38, 0x14, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83,
-  0x4a, 0x44, 0x05, 0xf7, 0x14, 0x54, 0x53, 0xd8, 0x4f, 0x61, 0x34, 0x21,
-  0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1,
-  0x88, 0x43, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x49, 0x51, 0xa1,
-  0x3e, 0x85, 0x83, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x51,
-  0x51, 0xc1, 0x3e, 0x05, 0x86, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
-  0xe0, 0x59, 0x51, 0xe1, 0x3e, 0x05, 0x89, 0x08, 0x46, 0x0c, 0x14, 0x00,
-  0x04, 0xc1, 0x60, 0x4b, 0x51, 0xc1, 0x3e, 0x05, 0xda, 0x14, 0x02, 0x11,
-  0x15, 0xc6, 0x53, 0x00, 0x51, 0x61, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84,
-  0x60, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc, 0x15, 0x15, 0xf0, 0x53,
-  0x10, 0x82, 0x0b, 0x8c, 0x9b, 0x25, 0x30, 0x9b, 0xe1, 0x06, 0x9f, 0x0d,
-  0x54, 0x54, 0x00, 0x83, 0x59, 0x86, 0x99, 0xa1, 0x99, 0xa0, 0x62, 0x53,
-  0xd0, 0x4f, 0x01, 0x2e, 0x30, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0x16, 0x1b, 0x15, 0xf6, 0x53, 0xf0, 0xc1, 0xe0, 0x37, 0x85, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0x58, 0x6e, 0x54, 0xd8, 0x4f, 0x21, 0x10, 0x2e,
-  0x18, 0xa6, 0x68, 0x53, 0xf8, 0x4f, 0x01, 0x2e, 0x30, 0x6a, 0xc4, 0xe0,
-  0x00, 0x40, 0x10, 0x0c, 0x96, 0x1d, 0x15, 0x40, 0x54, 0x10, 0x1b, 0xf2,
-  0x14, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xe1, 0x51, 0x01, 0x44,
-  0x85, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x8c, 0xba, 0xc3, 0xa8, 0xeb, 0x4b,
-  0x61, 0x98, 0x73, 0x45, 0x61, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11,
-  0x83, 0x03, 0x00, 0x41, 0x30, 0xa8, 0xc2, 0x54, 0x68, 0x51, 0x21, 0x3d,
-  0x05, 0x1d, 0x15, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13,
-  0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40,
-  0x10, 0x0c, 0x1e, 0x34, 0x15, 0x68, 0x54, 0x48, 0x88, 0x60, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0x9e, 0x34, 0x15, 0x6a, 0x54, 0x48, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x1e, 0x35, 0x15, 0x6c, 0x54, 0x48,
-  0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x36, 0x34, 0x15, 0x6a,
-  0x54, 0x98, 0x4f, 0x21, 0x08, 0x53, 0x41, 0x44, 0x85, 0x1f, 0x15, 0x46,
-  0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
-  0xc0, 0x53, 0x53, 0xe1, 0x46, 0x05, 0x21, 0xb8, 0xc0, 0xb8, 0x59, 0x02,
-  0xb3, 0x19, 0x6e, 0xe8, 0xdb, 0x00, 0x4d, 0x05, 0x30, 0x98, 0x65, 0xa8,
-  0x19, 0xb3, 0x09, 0xec, 0x3c, 0x85, 0xf4, 0x14, 0xe2, 0x33, 0x1c, 0xc1,
-  0x87, 0x81, 0x7a, 0x0a, 0xc4, 0x37, 0xcb, 0x60, 0x33, 0x39, 0x13, 0xd8,
-  0x7a, 0x0a, 0x7d, 0x18, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73,
-  0x81, 0x51, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x72, 0x2a,
-  0xe8, 0x70, 0x43, 0x00, 0xa7, 0x02, 0x18, 0xcc, 0x32, 0xdc, 0x0c, 0xce,
-  0x04, 0x36, 0xcc, 0xa7, 0x00, 0x9f, 0x59, 0x82, 0x9e, 0x31, 0xf9, 0x14,
-  0x88, 0xf8, 0xcc, 0x12, 0xf4, 0xcc, 0x70, 0xc4, 0x29, 0x06, 0xf3, 0x29,
-  0x08, 0xdf, 0x2c, 0x83, 0xce, 0xf4, 0x4c, 0x60, 0xa8, 0x18, 0xd0, 0xa7,
-  0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0x59, 0x10,
-  0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xf4, 0xa9, 0xa0, 0xc3, 0x0d, 0xc1,
-  0x9e, 0x0a, 0x60, 0x30, 0xcb, 0xb0, 0x33, 0x3c, 0x13, 0x18, 0x7f, 0x0a,
-  0x43, 0x7c, 0x66, 0x09, 0x7a, 0xc6, 0x88, 0xff, 0x14, 0xe0, 0x33, 0x4b,
-  0xd0, 0x33, 0x03, 0x2d, 0x86, 0x76, 0x33, 0x18, 0xce, 0x10, 0x3b, 0x23,
-  0xf0, 0x0c, 0x59, 0x0a, 0x39, 0x73, 0xc1, 0x30, 0xe6, 0x9f, 0x82, 0x88,
-  0x0a, 0xf1, 0x19, 0x8e, 0xa0, 0x9d, 0x11, 0x15, 0x88, 0x6f, 0x96, 0xc1,
-  0x67, 0xc2, 0x26, 0x30, 0x12, 0x15, 0x6a, 0x27, 0x3e, 0x16, 0x0c, 0xf4,
-  0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c,
-  0x8a, 0x58, 0x55, 0x41, 0x87, 0x1b, 0x82, 0x54, 0x15, 0xc0, 0x60, 0x96,
-  0xe1, 0x67, 0xc0, 0x26, 0xb0, 0x81, 0x45, 0x05, 0xf8, 0xcc, 0x12, 0x94,
-  0x8d, 0xa5, 0xa8, 0x40, 0xc4, 0x67, 0x96, 0xa0, 0x6c, 0x86, 0x23, 0x7e,
-  0x47, 0x45, 0x05, 0xe1, 0x9b, 0x65, 0x10, 0x9b, 0xb2, 0x09, 0x0c, 0x7c,
-  0x56, 0x54, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3,
-  0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x6c, 0x55, 0xd0, 0xe1,
-  0x86, 0x80, 0x56, 0x05, 0x30, 0x98, 0x65, 0x18, 0x1b, 0xb2, 0x09, 0x6c,
-  0x46, 0x85, 0x21, 0x3e, 0xb3, 0x04, 0x65, 0x63, 0x04, 0x8e, 0x0a, 0xf0,
-  0x99, 0x25, 0x28, 0x9b, 0x81, 0x16, 0x43, 0xfb, 0x19, 0x0c, 0x6c, 0x88,
-  0xb1, 0x11, 0xc8, 0x86, 0xf6, 0xc2, 0xe6, 0x82, 0x61, 0x2e, 0x30, 0xea,
-  0x36, 0xa3, 0x8e, 0x44, 0x85, 0x61, 0xae, 0x26, 0x85, 0x61, 0x8e, 0x18,
-  0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xa0, 0x42, 0x57,
-  0x81, 0x56, 0x05, 0x38, 0x15, 0xc2, 0x55, 0x18, 0x4d, 0x08, 0x80, 0xd1,
-  0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91,
-  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0xde, 0x55, 0xd8, 0x55, 0x21,
-  0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0xe0, 0x55, 0xe0,
-  0x55, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0xe2,
-  0x55, 0xe8, 0x55, 0x21, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30,
-  0xd8, 0xde, 0x55, 0xe0, 0x55, 0x41, 0x4f, 0x85, 0x00, 0x5d, 0x85, 0x54,
-  0x15, 0xcc, 0x55, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x31,
-  0x38, 0x00, 0x10, 0x04, 0x03, 0x2f, 0x5e, 0x05, 0x5f, 0x15, 0x84, 0xe0,
-  0x02, 0xe3, 0x66, 0x09, 0xcc, 0x66, 0xa0, 0xc5, 0x70, 0x8d, 0x98, 0x21,
-  0x69, 0x02, 0x66, 0x60, 0x62, 0x66, 0x84, 0xb2, 0x21, 0x69, 0x82, 0x66,
-  0xec, 0x84, 0x03, 0x53, 0x15, 0xe0, 0x33, 0xcb, 0x70, 0x36, 0x69, 0x53,
-  0xc2, 0xc1, 0x70, 0x44, 0x0a, 0x07, 0xa4, 0x2a, 0x0c, 0xdf, 0xa9, 0x70,
-  0x30, 0xcc, 0x70, 0x43, 0xa0, 0xa7, 0x02, 0x19, 0xd4, 0x10, 0xe8, 0x70,
-  0x44, 0x81, 0xaa, 0xc2, 0xf0, 0x55, 0x20, 0xe8, 0x1d, 0xc3, 0x0c, 0x37,
-  0x04, 0x7d, 0x2a, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xa0, 0x4d, 0xdf,
-  0x04, 0x17, 0xa6, 0xc2, 0x30, 0x27, 0x97, 0xc2, 0x30, 0x23, 0x06, 0x07,
-  0x00, 0x82, 0x60, 0x50, 0x8d, 0xac, 0xf0, 0xae, 0xc2, 0xaa, 0x0a, 0xfc,
-  0x2a, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2,
-  0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x3c, 0x2a, 0x2b, 0xd8, 0xab, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x3c, 0x2b, 0x2b, 0xdc, 0xab, 0xc0, 0x10, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x3c, 0x2c, 0x2b, 0xe0, 0xab, 0x20, 0x11, 0xc1,
-  0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x2a, 0x2b, 0xdc, 0xab, 0x50,
-  0xab, 0x42, 0x30, 0xb2, 0x02, 0xb9, 0x0a, 0x21, 0x2b, 0x8c, 0x26, 0x04,
-  0xc0, 0x68, 0x82, 0x10, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0xc7,
-  0xb2, 0x42, 0xbe, 0x0a, 0x42, 0x70, 0x81, 0x71, 0xb3, 0x04, 0x7d, 0x33,
-  0xdc, 0xf0, 0xc3, 0xc1, 0xca, 0x0a, 0x60, 0x30, 0xcb, 0xa0, 0x36, 0x6b,
-  0x13, 0x94, 0xac, 0x0a, 0xfb, 0x2a, 0xc0, 0x05, 0x46, 0x8d, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0xc1, 0x72, 0xb3, 0x02, 0xbf, 0x0a, 0x65, 0x1c, 0x80,
-  0xab, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x0b, 0xce, 0x0a, 0xfc,
-  0x2a, 0x04, 0xc2, 0x05, 0xc3, 0x54, 0xad, 0x0a, 0x20, 0x2b, 0xc0, 0x05,
-  0x46, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0xc2, 0xb3, 0x42, 0xc8,
-  0x0a, 0x62, 0x50, 0xae, 0xc2, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c,
-  0x3d, 0x2b, 0x84, 0xac, 0x10, 0x08, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x77,
-  0x18, 0x75, 0x7e, 0x2a, 0x0c, 0x73, 0xaf, 0x29, 0x0c, 0x73, 0xc4, 0x30,
-  0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x95, 0xd8, 0x0a,
-  0x2e, 0x2b, 0xa8, 0xab, 0xb0, 0xb3, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x93, 0xb6, 0x42, 0xcd, 0x0a, 0x09,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xa3, 0xb6, 0x82, 0xcd,
-  0x0a, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xb3, 0xb6,
-  0xc2, 0xcd, 0x0a, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1,
-  0x96, 0xb6, 0x82, 0xcd, 0x0a, 0xf4, 0x2a, 0x04, 0x62, 0x2b, 0x8c, 0xac,
-  0x00, 0xb6, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x88, 0xc1,
-  0x01, 0x80, 0x20, 0x18, 0x78, 0x6b, 0x2b, 0xe0, 0xac, 0x20, 0x04, 0x17,
-  0x18, 0x37, 0x4b, 0xd0, 0x37, 0xc3, 0x0d, 0x7e, 0x1c, 0xa4, 0xad, 0x00,
-  0x06, 0xb3, 0x0c, 0x6c, 0xd3, 0x37, 0x81, 0xa1, 0xab, 0xa0, 0xae, 0x42,
-  0x7c, 0x86, 0x23, 0x46, 0x39, 0x58, 0x57, 0x81, 0xf8, 0x66, 0x19, 0xda,
-  0x06, 0x6e, 0x02, 0x63, 0x57, 0x81, 0x94, 0x83, 0xf8, 0x58, 0x30, 0xd0,
-  0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1,
-  0x29, 0x62, 0x6e, 0x05, 0x1d, 0x6e, 0x08, 0xe2, 0x56, 0x00, 0x83, 0x59,
-  0x06, 0xb7, 0x79, 0x9b, 0xc0, 0x06, 0x7a, 0x15, 0xe0, 0x33, 0x4b, 0x40,
-  0x37, 0x36, 0xaf, 0x02, 0x11, 0x9f, 0x59, 0x02, 0xba, 0x19, 0x8e, 0x70,
-  0xe5, 0x80, 0x5e, 0x05, 0xe1, 0x9b, 0x65, 0x88, 0x1b, 0xba, 0x09, 0xec,
-  0x95, 0x83, 0x7a, 0x15, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9,
-  0xc0, 0x28, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0xbf, 0x15,
-  0x74, 0xb8, 0x21, 0xe0, 0x5b, 0x01, 0x0c, 0x66, 0x19, 0xe4, 0x66, 0x6e,
-  0x02, 0xeb, 0x57, 0x61, 0x88, 0xcf, 0x2c, 0x01, 0xdd, 0x18, 0x01, 0xb2,
-  0x02, 0x7c, 0x66, 0x09, 0xe8, 0x66, 0xa0, 0xc5, 0xd0, 0xdc, 0x06, 0x7b,
-  0x1b, 0x42, 0x6e, 0x84, 0xb9, 0x91, 0xcb, 0x01, 0x6e, 0x2e, 0x18, 0xc6,
-  0xfe, 0x55, 0x18, 0x59, 0x21, 0x3e, 0xc3, 0x11, 0xb4, 0x40, 0xb2, 0x02,
-  0xf1, 0xcd, 0x32, 0xd4, 0x0d, 0xde, 0x04, 0x56, 0xb2, 0x42, 0x2d, 0xc4,
-  0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x16, 0x18, 0xf2,
-  0xb1, 0x22, 0x88, 0x4f, 0x11, 0xac, 0x2b, 0xe8, 0x70, 0x43, 0xa0, 0xba,
-  0x02, 0x18, 0xcc, 0x32, 0xd8, 0xcd, 0xdd, 0x04, 0x36, 0xb4, 0xac, 0x00,
-  0x9f, 0x59, 0x02, 0xbe, 0x31, 0x95, 0x15, 0x88, 0xf8, 0xcc, 0x12, 0xf0,
-  0xcd, 0x70, 0xc4, 0x2f, 0xac, 0xac, 0x20, 0x7c, 0xb3, 0x0c, 0x79, 0xc3,
-  0x37, 0x81, 0x81, 0x03, 0xcb, 0x0a, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05,
-  0xc3, 0x5c, 0x60, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4,
-  0xed, 0x0a, 0x3a, 0xdc, 0x10, 0xd4, 0xae, 0x00, 0x06, 0xb3, 0x0c, 0x7a,
-  0xb3, 0x37, 0x81, 0xd1, 0xac, 0x30, 0xc4, 0x67, 0x96, 0x80, 0x6f, 0x8c,
-  0xc8, 0x59, 0x01, 0x3e, 0xb3, 0x04, 0x7c, 0x33, 0xd0, 0x62, 0x68, 0x76,
-  0x83, 0xdd, 0x0d, 0xa1, 0x37, 0xc2, 0xde, 0xd0, 0x06, 0xde, 0x5c, 0x30,
-  0xcc, 0x05, 0x46, 0xdd, 0x66, 0xd4, 0x95, 0xac, 0x30, 0xcc, 0xd9, 0xa8,
-  0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20,
-  0x18, 0x54, 0xe9, 0x2b, 0xd4, 0xae, 0x10, 0xb7, 0x82, 0xf8, 0x0a, 0xa3,
-  0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40,
-  0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x0f, 0xfc,
-  0x0a, 0xbc, 0x2b, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x4f, 0xfc, 0x0a, 0xbd, 0x2b, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x8f, 0xfc, 0x0a, 0xbe, 0x2b, 0x24, 0x44, 0x30, 0x62, 0xa0,
-  0x00, 0x20, 0x08, 0x06, 0x1b, 0xfc, 0x0a, 0xbd, 0x2b, 0xec, 0xad, 0x10,
-  0xa4, 0xaf, 0xa0, 0xba, 0xc2, 0xf9, 0x0a, 0xa3, 0x09, 0x01, 0x30, 0x9a,
-  0x20, 0x04, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0, 0xc9, 0xaf, 0xf0,
-  0xbb, 0x82, 0x10, 0x5c, 0x60, 0xdc, 0x2c, 0x41, 0xdf, 0x0c, 0xb4, 0x18,
-  0xae, 0x81, 0x36, 0x26, 0x4f, 0x9c, 0x0d, 0x4c, 0xa8, 0x8d, 0xc0, 0x37,
-  0x26, 0x4f, 0xac, 0xcd, 0x2c, 0x83, 0xdf, 0x80, 0x4e, 0x49, 0x07, 0xc3,
-  0x11, 0x2a, 0x1d, 0x90, 0xae, 0x30, 0x7c, 0xb7, 0xd2, 0xc1, 0x30, 0xc3,
-  0x0d, 0x81, 0xde, 0x0a, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0x25, 0x81,
-  0xba, 0xc2, 0xf0, 0x55, 0x20, 0xe8, 0x9d, 0xc4, 0x30, 0xc3, 0x0d, 0x41,
-  0xdf, 0x0a, 0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c, 0x7f, 0x43, 0x3b, 0xc1,
-  0x85, 0xad, 0x30, 0xcc, 0xc9, 0xa9, 0x30, 0xcc, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x54, 0x23, 0x2c, 0xbc, 0xaf, 0xb0, 0xba, 0x02, 0xff, 0x0a,
-  0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a,
-  0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x8f,
-  0x0a, 0x0b, 0xf6, 0x2b, 0x1c, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0xcf, 0x0a, 0x0b, 0xf7, 0x2b, 0x30, 0x44, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x0f, 0x0b, 0x0b, 0xf8, 0x2b, 0x48, 0x44, 0x30, 0x62,
-  0xa0, 0x00, 0x20, 0x08, 0x06, 0x9b, 0x0a, 0x0b, 0xf7, 0x2b, 0xd4, 0xae,
-  0x10, 0x8c, 0xb0, 0x40, 0xbe, 0x42, 0x08, 0x0b, 0xa3, 0x09, 0x01, 0x30,
-  0x9a, 0x20, 0x04, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0, 0xb1, 0xb0,
-  0x90, 0xbf, 0x82, 0x10, 0x5c, 0x60, 0xdc, 0x2c, 0x01, 0xed, 0x0c, 0x37,
-  0xfc, 0x74, 0xb0, 0xc2, 0x02, 0x18, 0xcc, 0x32, 0x84, 0x8e, 0xe8, 0x04,
-  0x25, 0xbb, 0xc2, 0xfe, 0x0a, 0x70, 0x81, 0x51, 0x23, 0x06, 0x07, 0x00,
-  0x82, 0x60, 0xb0, 0xdc, 0xb0, 0xc0, 0xbf, 0x82, 0x59, 0x07, 0xe0, 0x2b,
-  0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x82, 0xc3, 0x02, 0xff, 0x0a,
-  0x81, 0x70, 0xc1, 0x30, 0x55, 0xbb, 0x02, 0x08, 0x0b, 0x70, 0x81, 0x51,
-  0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xf0, 0xb0, 0x10, 0xc2, 0x82,
-  0x58, 0x94, 0xaf, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x4b, 0x0f,
-  0x0b, 0x21, 0x2c, 0x04, 0xc2, 0x05, 0xc3, 0x5c, 0x60, 0xd4, 0x1d, 0x46,
-  0x9d, 0xdf, 0x0a, 0xc3, 0xdc, 0xab, 0x0a, 0xc3, 0x1c, 0x31, 0xcc, 0x11,
-  0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0x25, 0xc6, 0x82, 0x0b,
-  0x0b, 0xea, 0x2b, 0xec, 0xb0, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42,
-  0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0xf0, 0xa4, 0xb1, 0x50, 0xc3, 0x42, 0x42, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0xa8, 0xb1, 0x60, 0xc3, 0x42,
-  0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0xac, 0xb1, 0x70,
-  0xc3, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xa5,
-  0xb1, 0x60, 0xc3, 0x02, 0xfd, 0x0a, 0x81, 0x18, 0x0b, 0x23, 0x2c, 0x80,
-  0xb1, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x62, 0x70, 0x00,
-  0x20, 0x08, 0x06, 0xde, 0x1a, 0x0b, 0x38, 0x2c, 0x08, 0xc1, 0x05, 0xc6,
-  0xcd, 0x12, 0xd0, 0xce, 0x70, 0x83, 0x5f, 0x07, 0x69, 0x2c, 0x80, 0xc1,
-  0x2c, 0xc3, 0xe8, 0xd0, 0x4e, 0x60, 0xe8, 0x2b, 0xa8, 0xaf, 0x10, 0x9f,
-  0xe1, 0x08, 0xd2, 0x0e, 0xd6, 0x57, 0x20, 0xbe, 0x59, 0x06, 0xd2, 0x39,
-  0x9d, 0xc0, 0xd8, 0x57, 0x28, 0xed, 0x20, 0x3e, 0x16, 0x0c, 0xf4, 0xb9,
-  0x60, 0x98, 0x0b, 0x8c, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a,
-  0x98, 0x63, 0x41, 0x87, 0x1b, 0x82, 0x38, 0x16, 0xc0, 0x60, 0x96, 0xa1,
-  0x74, 0x4c, 0x27, 0xb0, 0x81, 0x7e, 0x05, 0xf8, 0xcc, 0x12, 0xac, 0x8e,
-  0xcd, 0xaf, 0x40, 0xc4, 0x67, 0x96, 0x60, 0x75, 0x86, 0x23, 0x5e, 0x3b,
-  0xa0, 0x5f, 0x41, 0xf8, 0x66, 0x19, 0x50, 0x67, 0x75, 0x02, 0x83, 0xed,
-  0xa0, 0x7e, 0x85, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30,
-  0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xc2, 0x8f, 0x05, 0x1d,
-  0x6e, 0x08, 0xf8, 0x58, 0x00, 0x83, 0x59, 0x86, 0xd4, 0x51, 0x9d, 0xc0,
-  0xfa, 0x57, 0x18, 0xe2, 0x33, 0x4b, 0xb0, 0x3a, 0x46, 0x80, 0xb0, 0x00,
-  0x9f, 0x59, 0x82, 0xd5, 0x19, 0x68, 0x31, 0xb4, 0xd2, 0xc1, 0x4c, 0x87,
-  0x48, 0x1d, 0x41, 0x75, 0xe8, 0x76, 0x38, 0x9d, 0x0b, 0x86, 0xb1, 0xff,
-  0x15, 0x46, 0x58, 0x88, 0xcf, 0x70, 0x04, 0x6d, 0x90, 0xb0, 0x40, 0x7c,
-  0xb3, 0x0c, 0xac, 0xf3, 0x3a, 0x81, 0x95, 0xb0, 0x50, 0x1b, 0xf1, 0xb1,
-  0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x86, 0x7c, 0xac,
-  0x08, 0xe2, 0x53, 0x04, 0x2b, 0x0b, 0x3a, 0xdc, 0x10, 0xa8, 0xb2, 0x00,
-  0x06, 0xb3, 0x0c, 0xad, 0xe3, 0x3a, 0x81, 0x0d, 0x2d, 0x2c, 0xc0, 0x67,
-  0x96, 0x60, 0x76, 0x4c, 0x85, 0x05, 0x22, 0x3e, 0xb3, 0x04, 0xb3, 0x33,
-  0x1c, 0xf1, 0x1b, 0x2b, 0x2c, 0x08, 0xdf, 0x2c, 0x03, 0xec, 0xcc, 0x4e,
-  0x60, 0xe0, 0xc1, 0xc2, 0x42, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30,
-  0x17, 0x18, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x71, 0xcb,
-  0x82, 0x0e, 0x37, 0x04, 0xb5, 0x2c, 0x80, 0xc1, 0x2c, 0x43, 0xec, 0xc8,
-  0x4e, 0x60, 0x34, 0x2c, 0x0c, 0xf1, 0x99, 0x25, 0x98, 0x1d, 0x23, 0x72,
-  0x58, 0x80, 0xcf, 0x2c, 0xc1, 0xec, 0x0c, 0xb4, 0x18, 0x5a, 0xeb, 0x60,
-  0xae, 0x43, 0xc4, 0x8e, 0x20, 0x3b, 0xb4, 0xf3, 0x3a, 0x17, 0x0c, 0x73,
-  0x81, 0x51, 0xb7, 0x19, 0x75, 0x25, 0x2c, 0x0c, 0x73, 0x36, 0x2b, 0x0c,
-  0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x55, 0x3a, 0x0b, 0xb5, 0x2c, 0xc4, 0xb1, 0x20, 0xce, 0xc2, 0x68, 0x42,
-  0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43,
-  0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x03, 0xcf, 0x02,
-  0x2f, 0x0b, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x13,
-  0xcf, 0x42, 0x2f, 0x0b, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0xc1, 0x23, 0xcf, 0x82, 0x2f, 0x0b, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00,
-  0x08, 0x82, 0xc1, 0x06, 0xcf, 0x42, 0x2f, 0x0b, 0x7b, 0x2c, 0x04, 0xe9,
-  0x2c, 0xa8, 0xb2, 0x70, 0xce, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08,
-  0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x78, 0xf2, 0x2c, 0xfc, 0xb2,
-  0x20, 0x04, 0x17, 0x18, 0x37, 0x4b, 0x40, 0x3b, 0x03, 0x2d, 0x86, 0x6b,
-  0xfc, 0x8d, 0x49, 0x16, 0x7e, 0x03, 0x13, 0xa1, 0x23, 0xcc, 0x8e, 0x49,
-  0x16, 0xa2, 0x33, 0xcb, 0x50, 0x3b, 0xb7, 0x53, 0xe2, 0xc1, 0x70, 0x04,
-  0x09, 0x07, 0xa4, 0x2c, 0x0c, 0xdf, 0x95, 0x70, 0x30, 0xcc, 0x70, 0x43,
-  0xa0, 0xc7, 0x02, 0x19, 0xd4, 0x10, 0xe8, 0x70, 0x44, 0x89, 0xa0, 0xb2,
-  0x30, 0x7c, 0x15, 0x08, 0x7a, 0x27, 0x32, 0xcc, 0x70, 0x43, 0xd0, 0xc7,
-  0x02, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0x83, 0xed, 0xac, 0x4f, 0x70, 0x61,
-  0x2c, 0x0c, 0x73, 0x72, 0x2b, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0xd5, 0x48, 0x0b, 0xef, 0x2c, 0xac, 0xb2, 0xc0, 0xcf, 0xc2, 0x68,
-  0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10,
-  0x43, 0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xa3, 0xd2,
-  0x82, 0x3d, 0x0b, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1,
-  0xb3, 0xd2, 0xc2, 0x3d, 0x0b, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0xc1, 0xc3, 0xd2, 0x02, 0x3e, 0x0b, 0x12, 0x11, 0x8c, 0x18, 0x28,
-  0x00, 0x08, 0x82, 0xc1, 0xa6, 0xd2, 0xc2, 0x3d, 0x0b, 0xb5, 0x2c, 0x04,
-  0x23, 0x2d, 0x90, 0xb3, 0x10, 0xd2, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x78, 0x2c, 0x2d, 0xe4,
-  0xb3, 0x20, 0x04, 0x17, 0x18, 0x37, 0x4b, 0xb0, 0x3e, 0xc3, 0x0d, 0x3f,
-  0x1e, 0xac, 0xb4, 0x00, 0x06, 0xb3, 0x0c, 0xb8, 0x93, 0x3b, 0x41, 0xc9,
-  0xb2, 0xb0, 0xcf, 0x02, 0x5c, 0x60, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20,
-  0x18, 0x2c, 0x37, 0x2d, 0xf0, 0xb3, 0x00, 0xc6, 0x01, 0x38, 0x0b, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xe0, 0xb4, 0xc0, 0xcf, 0x42, 0x20,
-  0x5c, 0x30, 0x4c, 0xd5, 0xb2, 0x00, 0xd2, 0x02, 0x5c, 0x60, 0xd4, 0x88,
-  0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c, 0x3c, 0x2d, 0x84, 0xb4, 0x20, 0x26,
-  0xe5, 0x2c, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0xd2, 0xd3, 0x42,
-  0x48, 0x0b, 0x81, 0x70, 0xc1, 0x30, 0x17, 0x18, 0x75, 0x87, 0x51, 0xe7,
-  0xc7, 0xc2, 0x30, 0xf7, 0xba, 0xc2, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30,
-  0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x50, 0x89, 0xb5, 0xe0, 0xd2, 0x82,
-  0x3a, 0x0b, 0x3b, 0x2d, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c,
-  0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x3c, 0x69, 0x2d, 0xd4, 0xb4, 0x90, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x3c, 0x6a, 0x2d, 0xd8, 0xb4, 0x90, 0x10,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x3c, 0x6b, 0x2d, 0xdc, 0xb4,
-  0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x69, 0x2d,
-  0xd8, 0xb4, 0x40, 0xcf, 0x42, 0x20, 0xd6, 0xc2, 0x48, 0x0b, 0x60, 0x2d,
-  0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x18, 0x1c, 0x00, 0x08,
-  0x82, 0x81, 0xb7, 0xd6, 0x02, 0x4e, 0x0b, 0x42, 0x70, 0x81, 0x71, 0xb3,
-  0x04, 0xeb, 0x33, 0xdc, 0xe0, 0xe7, 0x41, 0x5a, 0x0b, 0x60, 0x30, 0xcb,
-  0xa0, 0x3b, 0xeb, 0x13, 0x18, 0x3a, 0x0b, 0xea, 0x2c, 0xc4, 0x67, 0x38,
-  0xc2, 0x8f, 0x83, 0x75, 0x16, 0x88, 0x6f, 0x96, 0x61, 0x77, 0x7c, 0x27,
-  0x30, 0x76, 0x16, 0xfe, 0x38, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18,
-  0xe6, 0x02, 0xa3, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xe6,
-  0x5a, 0xd0, 0xe1, 0x86, 0x20, 0xae, 0x05, 0x30, 0x98, 0x65, 0xe0, 0x9d,
-  0xde, 0x09, 0x6c, 0xa0, 0x67, 0x01, 0x3e, 0xb3, 0x04, 0xe2, 0x63, 0xf3,
-  0x2c, 0x10, 0xf1, 0x99, 0x25, 0x10, 0x9f, 0xe1, 0x88, 0x54, 0x0e, 0xe8,
-  0x59, 0x10, 0xbe, 0x59, 0x86, 0xdf, 0x11, 0x9f, 0xc0, 0x54, 0x39, 0xa8,
-  0x67, 0x21, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2,
-  0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xf0, 0x6b, 0x41, 0x87, 0x1b,
-  0x02, 0xbe, 0x16, 0xc0, 0x60, 0x96, 0x01, 0x7c, 0xc2, 0x27, 0xb0, 0x7e,
-  0x16, 0x86, 0xf8, 0xcc, 0x12, 0x88, 0x8f, 0x11, 0x20, 0x2d, 0xc0, 0x67,
-  0x96, 0x40, 0x7c, 0x06, 0x5a, 0x0c, 0x8d, 0x77, 0xb0, 0xde, 0x21, 0xc0,
-  0x47, 0x08, 0x1f, 0xb4, 0x1c, 0x7c, 0xe7, 0x82, 0x61, 0xec, 0x9f, 0x85,
-  0x91, 0x16, 0xe2, 0x33, 0x1c, 0x41, 0x2b, 0x24, 0x2d, 0x10, 0xdf, 0x2c,
-  0xc3, 0xf8, 0x98, 0x4f, 0x60, 0x25, 0x2d, 0xd4, 0x4a, 0x7c, 0x2c, 0x18,
-  0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82,
-  0xf8, 0x14, 0xc1, 0xda, 0x82, 0x0e, 0x37, 0x04, 0xaa, 0x2d, 0x80, 0xc1,
-  0x2c, 0x03, 0xf9, 0x94, 0x4f, 0x60, 0x43, 0x4b, 0x0b, 0xf0, 0x99, 0x25,
-  0x50, 0x1f, 0x53, 0x69, 0x81, 0x88, 0xcf, 0x2c, 0x81, 0xfa, 0x0c, 0x47,
-  0xfc, 0xca, 0x4a, 0x0b, 0xc2, 0x37, 0xcb, 0x70, 0x3e, 0xea, 0x13, 0x18,
-  0xb8, 0xb0, 0xb4, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05,
-  0x46, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xdc, 0xb6, 0xa0,
-  0xc3, 0x0d, 0x41, 0x6d, 0x0b, 0x60, 0x30, 0xcb, 0x80, 0x3e, 0xe9, 0x13,
-  0x18, 0x4d, 0x0b, 0x43, 0x7c, 0x66, 0x09, 0xd4, 0xc7, 0x88, 0x9c, 0x16,
-  0xe0, 0x33, 0x4b, 0xa0, 0x3e, 0x03, 0x2d, 0x86, 0x46, 0x3e, 0x58, 0xf9,
-  0x10, 0xe8, 0x23, 0xa4, 0x0f, 0x6d, 0x99, 0xcf, 0x05, 0xc3, 0x5c, 0x60,
-  0xd4, 0x6d, 0x46, 0x5d, 0x49, 0x0b, 0xc3, 0x9c, 0x0d, 0x0b, 0xc3, 0x1c,
-  0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0x95,
-  0xde, 0x42, 0x6d, 0x0b, 0x71, 0x2d, 0x88, 0xb7, 0x30, 0x9a, 0x10, 0x00,
-  0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44,
-  0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0xc0, 0xb7, 0xc0, 0xdb,
-  0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0xc4, 0xb7,
-  0xd0, 0xdb, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0,
-  0xc8, 0xb7, 0xe0, 0xdb, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82,
-  0x60, 0xb0, 0xc1, 0xb7, 0xd0, 0xdb, 0xc2, 0x5e, 0x0b, 0x41, 0x7a, 0x0b,
-  0xaa, 0x2d, 0x9c, 0xb7, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x9e, 0x7c, 0x0b, 0xbf, 0x2d, 0x08,
-  0xc1, 0x05, 0xc6, 0xcd, 0x12, 0xac, 0xcf, 0x40, 0x8b, 0xe1, 0x1a, 0xb6,
-  0x63, 0xb2, 0x45, 0xed, 0xc0, 0x04, 0xee, 0x08, 0xea, 0x63, 0xb2, 0x45,
-  0xee, 0xcc, 0x32, 0xb0, 0x8f, 0xfb, 0x94, 0x7c, 0x30, 0x1c, 0x31, 0xc2,
-  0x01, 0x69, 0x0b, 0xc3, 0x77, 0x24, 0x1c, 0x0c, 0x33, 0xdc, 0x10, 0xe8,
-  0xb5, 0x40, 0x06, 0x35, 0x04, 0x3a, 0x1c, 0x51, 0x32, 0xa8, 0x2d, 0x0c,
-  0x5f, 0x05, 0x82, 0xde, 0xc9, 0x0c, 0x33, 0xdc, 0x10, 0xf4, 0xb5, 0x40,
-  0x06, 0x15, 0x0c, 0x3a, 0xcb, 0xd0, 0x3e, 0x22, 0x14, 0x5c, 0x58, 0x0b,
-  0xc3, 0x9c, 0x1c, 0x0b, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x41,
-  0x35, 0xe2, 0xc2, 0x7b, 0x0b, 0xab, 0x2d, 0xf0, 0xb7, 0x30, 0x9a, 0x10,
-  0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50,
-  0xc4, 0x21, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0xa8, 0xb8, 0x60,
-  0xdf, 0xc2, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0xac,
-  0xb8, 0x70, 0xdf, 0x02, 0x43, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0xf0, 0xb0, 0xb8, 0x80, 0xdf, 0x82, 0x44, 0x04, 0x23, 0x06, 0x0a, 0x00,
-  0x82, 0x60, 0xb0, 0xa9, 0xb8, 0x70, 0xdf, 0x42, 0x6d, 0x0b, 0xc1, 0x88,
-  0x0b, 0xe4, 0x2d, 0x84, 0xb8, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42,
-  0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x1e, 0x8b, 0x0b, 0xf9, 0x2d,
-  0x08, 0xc1, 0x05, 0xc6, 0xcd, 0x12, 0x88, 0xd0, 0x70, 0xc3, 0xcf, 0x07,
-  0x2b, 0x2e, 0x80, 0xc1, 0x2c, 0xc3, 0xfb, 0xc0, 0x4f, 0x50, 0xb2, 0x2d,
-  0xec, 0xb7, 0x00, 0x17, 0x18, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0xcb, 0x8d, 0x0b, 0xfc, 0x2d, 0xfc, 0x70, 0x00, 0xde, 0xc2, 0x88, 0xc1,
-  0x01, 0x80, 0x20, 0x18, 0x2c, 0x38, 0x2e, 0xf0, 0xb7, 0x10, 0x08, 0x17,
-  0x0c, 0x53, 0xb5, 0x2d, 0x80, 0xb8, 0x00, 0x17, 0x18, 0x35, 0x62, 0x70,
-  0x00, 0x20, 0x08, 0x06, 0x0b, 0x8f, 0x0b, 0x21, 0x2e, 0x88, 0x4d, 0x79,
-  0x0b, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xf4, 0xb8, 0x10, 0xe2,
-  0x42, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0xdd, 0x61, 0xd4, 0xf9, 0xb5,
-  0x30, 0xcc, 0xbd, 0xb2, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88,
-  0xc1, 0x01, 0x80, 0x20, 0x18, 0x54, 0x62, 0x2e, 0xb8, 0xb8, 0xa0, 0xde,
-  0xc2, 0x8e, 0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09,
-  0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x4f, 0x9a, 0x0b, 0x35, 0x2e, 0x24, 0x44, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0x8f, 0x9a, 0x0b, 0x36, 0x2e, 0x24, 0x44, 0x30,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xcf, 0x9a, 0x0b, 0x37, 0x2e, 0x24,
-  0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x5b, 0x9a, 0x0b, 0x36,
-  0x2e, 0xd0, 0xb7, 0x10, 0x88, 0xb9, 0x30, 0xe2, 0x02, 0x98, 0x0b, 0xa3,
-  0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0xe0, 0xad, 0xb9, 0x80, 0xe3, 0x82, 0x10, 0x5c, 0x60, 0xdc, 0x2c, 0x81,
-  0x08, 0x0d, 0x37, 0xf8, 0x7d, 0x90, 0xe6, 0x02, 0x18, 0xcc, 0x32, 0xc4,
-  0x8f, 0x08, 0x05, 0x86, 0xde, 0x82, 0x7a, 0x0b, 0xf1, 0x19, 0x8e, 0xe8,
-  0xe3, 0x60, 0xbd, 0x05, 0xe2, 0x9b, 0x65, 0x90, 0x9f, 0xfa, 0x09, 0x8c,
-  0xbd, 0x05, 0x3f, 0x0e, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9,
-  0xc0, 0x28, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x39, 0x17,
-  0x74, 0xb8, 0x21, 0x88, 0x73, 0x01, 0x0c, 0x66, 0x19, 0xe6, 0x87, 0x7e,
-  0x02, 0x1b, 0xe8, 0x5b, 0x80, 0xcf, 0x2c, 0x41, 0xfe, 0xd8, 0x7c, 0x0b,
-  0x44, 0x7c, 0x66, 0x09, 0xf2, 0x67, 0x38, 0x02, 0x95, 0x03, 0xfa, 0x16,
-  0x84, 0x6f, 0x96, 0xc1, 0x7e, 0xf2, 0x27, 0xb0, 0x54, 0x0e, 0xea, 0x5b,
-  0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x88,
-  0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xfc, 0x5c, 0xd0, 0xe1, 0x86, 0x80,
-  0xcf, 0x05, 0x30, 0x98, 0x65, 0xb8, 0x1f, 0xfc, 0x09, 0xac, 0xbf, 0x85,
-  0x21, 0x3e, 0xb3, 0x04, 0xf9, 0x63, 0x04, 0x88, 0x0b, 0xf0, 0x99, 0x25,
-  0xc8, 0x9f, 0x81, 0x16, 0x43, 0x9b, 0x1f, 0x8c, 0x7e, 0x88, 0xfb, 0x11,
-  0xf0, 0xc7, 0x2c, 0x87, 0xfa, 0xb9, 0x60, 0x18, 0xfb, 0x6f, 0x61, 0xc4,
-  0x85, 0xf8, 0x0c, 0x47, 0xd0, 0x0e, 0x89, 0x0b, 0xc4, 0x37, 0xcb, 0xa0,
-  0x3f, 0xfd, 0x13, 0x58, 0x89, 0x0b, 0xb5, 0x13, 0x1f, 0x0b, 0x06, 0xfa,
-  0x5c, 0x30, 0xcc, 0x05, 0x46, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e,
-  0x45, 0xb0, 0xba, 0xa0, 0xc3, 0x0d, 0x81, 0xaa, 0x0b, 0x60, 0x30, 0xcb,
-  0xb0, 0x3f, 0xfc, 0x13, 0xd8, 0xd0, 0xe2, 0x02, 0x7c, 0x66, 0x09, 0x42,
-  0xc8, 0x54, 0x5c, 0x20, 0xe2, 0x33, 0x4b, 0x10, 0x42, 0xc3, 0x11, 0xbf,
-  0xb3, 0xe2, 0x82, 0xf0, 0xcd, 0x32, 0xf8, 0x4f, 0x08, 0x05, 0x06, 0x3e,
-  0x2c, 0x2e, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51,
-  0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xb7, 0x2e, 0xe8, 0x70,
-  0x43, 0x50, 0xeb, 0x02, 0x18, 0xcc, 0x32, 0xfc, 0x0f, 0x08, 0x05, 0x46,
-  0xe3, 0xc2, 0x10, 0x9f, 0x59, 0x82, 0x10, 0x32, 0x22, 0xc7, 0x05, 0xf8,
-  0xcc, 0x12, 0x84, 0xd0, 0x40, 0x8b, 0xa1, 0xed, 0x0f, 0xc6, 0x3f, 0xc4,
-  0xff, 0x08, 0x20, 0x44, 0x7b, 0xfd, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x75,
-  0x9b, 0x51, 0x57, 0xe2, 0xc2, 0x30, 0x67, 0xd3, 0xc2, 0x30, 0x47, 0x0c,
-  0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x50, 0xa5, 0xbb,
-  0x50, 0xeb, 0x42, 0x9c, 0x0b, 0xe2, 0x2e, 0x8c, 0x26, 0x04, 0xc0, 0x68,
-  0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x3c, 0xf0, 0x2e, 0xf0, 0xba, 0x90,
-  0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x3c, 0xf1, 0x2e, 0xf4,
-  0xba, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x3c, 0xf2,
-  0x2e, 0xf8, 0xba, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18,
-  0x6c, 0xf0, 0x2e, 0xf4, 0xba, 0xb0, 0xe7, 0x42, 0x90, 0xee, 0x82, 0xaa,
-  0x0b, 0xe7, 0x2e, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x18,
-  0x1c, 0x00, 0x08, 0x82, 0x81, 0x27, 0xef, 0xc2, 0xaf, 0x0b, 0x42, 0x70,
-  0x81, 0x71, 0xb3, 0x04, 0x22, 0x34, 0xd0, 0x62, 0xb8, 0x46, 0xfb, 0x98,
-  0x74, 0xc1, 0x3e, 0x30, 0xf1, 0x3e, 0x42, 0x08, 0x99, 0x74, 0x01, 0x3f,
-  0x23, 0x06, 0x06, 0x00, 0x82, 0x60, 0x70, 0xf4, 0xbb, 0x90, 0xeb, 0x82,
-  0x5a, 0x0b, 0x23, 0x06, 0x06, 0x00, 0x82, 0x60, 0x70, 0xf8, 0xbb, 0xa0,
-  0xeb, 0x82, 0x5a, 0x0b, 0x16, 0x04, 0xf2, 0xb1, 0x40, 0x90, 0x8f, 0xa5,
-  0x7c, 0xa0, 0xea, 0x82, 0x7c, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x20,
-  0x09, 0x79, 0x61, 0xdc, 0x05, 0x5a, 0x17, 0x4e, 0x2e, 0xb0, 0x95, 0x0f,
-  0x68, 0x5d, 0x90, 0xcf, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x24, 0x23,
-  0x2f, 0x94, 0xbb, 0x30, 0xeb, 0x82, 0xca, 0x06, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x24, 0x24, 0x2f, 0x98, 0xbb, 0x60, 0xeb, 0x82, 0xca,
-  0x05, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x90, 0x94, 0xbc, 0x70, 0xee,
-  0xc2, 0xab, 0x0b, 0x2a, 0x13, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41,
-  0x62, 0xf2, 0x02, 0xba, 0x0b, 0xb5, 0x2e, 0xb4, 0x6c, 0x60, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0x41, 0x72, 0xf2, 0x42, 0xba, 0x0b, 0xb5, 0x2e,
-  0xb4, 0x5c, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x09, 0xca, 0x0b,
-  0xea, 0x2e, 0xec, 0xba, 0xd0, 0x32, 0xc1, 0x88, 0x41, 0x03, 0x80, 0x20,
-  0x18, 0x34, 0x27, 0x2f, 0xa8, 0xbb, 0x20, 0xeb, 0x02, 0xb3, 0x28, 0x30,
-  0x1b, 0x20, 0x44, 0x60, 0x33, 0x1e, 0xd0, 0xba, 0x20, 0x9f, 0x11, 0x03,
-  0x04, 0x00, 0x41, 0x30, 0x48, 0x56, 0x5e, 0x68, 0x77, 0xc1, 0xd7, 0x85,
-  0x18, 0x0b, 0xac, 0xc6, 0x03, 0x5f, 0x17, 0xe4, 0x33, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x49, 0xcb, 0x0b, 0xef, 0x2e, 0xf4, 0xba, 0x40, 0xa3,
-  0x41, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x89, 0xcb, 0x0b, 0xf0,
-  0x2e, 0x80, 0xbb, 0x40, 0x63, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x24, 0x2f, 0x2f, 0xc4, 0xbb, 0x90, 0xeb, 0x02, 0x8d, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x90, 0xc0, 0xbc, 0x20, 0xef, 0xc2, 0xaf, 0x0b,
-  0x37, 0x1a, 0x18, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x90, 0xc4, 0xbc,
-  0x30, 0xef, 0xc2, 0xaf, 0x0b, 0x37, 0x16, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0x41, 0x22, 0xf3, 0x02, 0xbd, 0x0b, 0xe5, 0x2e, 0xdc, 0x48, 0x30,
-  0x62, 0xd0, 0x00, 0x20, 0x08, 0x06, 0x4d, 0xcc, 0x0b, 0xf4, 0x2e, 0xf0,
-  0xba, 0x60, 0x55, 0x94, 0x8e, 0x06, 0x08, 0x11, 0x18, 0x4f, 0x07, 0xbe,
-  0x2e, 0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x92, 0x9a, 0x17,
-  0xee, 0x5d, 0x40, 0x77, 0x61, 0xa7, 0x02, 0xf3, 0xe9, 0x00, 0xdd, 0x05,
-  0xf9, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41, 0x72, 0xf3, 0x42, 0xbe,
-  0x0b, 0xe7, 0x2e, 0xf8, 0x64, 0x10, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0x41, 0x82, 0xf3, 0x82, 0xbe, 0x0b, 0xea, 0x2e, 0xf8, 0x54, 0x30, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x49, 0xce, 0x0b, 0xfb, 0x2e, 0x8c, 0xbb,
-  0xe0, 0x13, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x24, 0x3a, 0x2f,
-  0xf0, 0xbb, 0x90, 0xee, 0x42, 0x58, 0x06, 0xc6, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x24, 0x3b, 0x2f, 0xf4, 0xbb, 0x90, 0xee, 0x42, 0x58, 0x05,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x90, 0xf0, 0xbc, 0xe0, 0xef, 0xc2,
-  0xbb, 0x0b, 0x61, 0x11, 0x8c, 0x18, 0x34, 0x00, 0x08, 0x82, 0x41, 0xb3,
-  0xf3, 0x82, 0xbf, 0x0b, 0xe6, 0x2e, 0x80, 0xc1, 0xe7, 0x91, 0x65, 0x80,
-  0x10, 0x81, 0x95, 0x71, 0x80, 0xee, 0x82, 0x7c, 0x46, 0x0c, 0x10, 0x00,
-  0x04, 0xc1, 0x20, 0xf9, 0x79, 0x21, 0xe4, 0x05, 0x79, 0x17, 0xca, 0x28,
-  0xb0, 0x33, 0x0e, 0xe4, 0x5d, 0x90, 0xcf, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x24, 0x61, 0x2f, 0x8c, 0xbc, 0x10, 0xef, 0x02, 0x1a, 0x06, 0xc1,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x24, 0x62, 0x2f, 0x90, 0xbc, 0x40,
-  0xef, 0x02, 0x1a, 0x05, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x90, 0x8c,
-  0xbd, 0x50, 0xf2, 0x42, 0xbb, 0x0b, 0x68, 0x10, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0x41, 0x42, 0xf6, 0x82, 0xc9, 0x0b, 0xf3, 0x2e, 0xac, 0x61,
-  0x60, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41, 0x52, 0xf6, 0xc2, 0xc9,
-  0x0b, 0xf3, 0x2e, 0xac, 0x51, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x89, 0xd9, 0x0b, 0x28, 0x2f, 0xe4, 0xbb, 0xb0, 0x06, 0xc1, 0x88, 0x41,
-  0x03, 0x80, 0x20, 0x18, 0x34, 0x65, 0x2f, 0xa0, 0xbc, 0x00, 0xef, 0x82,
-  0x1a, 0xa4, 0x01, 0x1a, 0xb8, 0x61, 0x80, 0x10, 0x81, 0xb1, 0x01, 0x1b,
-  0xc8, 0xc7, 0x82, 0x36, 0x90, 0x8f, 0x85, 0x01, 0xbd, 0x0b, 0xf2, 0x19,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x64, 0xed, 0x85, 0x96, 0x17, 0xfc,
-  0x5d, 0x70, 0x02, 0x1b, 0x03, 0x7f, 0x17, 0xe4, 0x33, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x49, 0xdb, 0x0b, 0x2f, 0x2f, 0xf4, 0xbb, 0xa0, 0x05,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x90, 0xb8, 0xbd, 0x00, 0xf3, 0x02,
-  0xc8, 0x0b, 0x51, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xc9, 0xdb,
-  0x0b, 0x31, 0x2f, 0xe4, 0xbb, 0x80, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0x90, 0xc0, 0xbd, 0x20, 0xf3, 0xc2, 0xbf, 0x0b, 0x9d, 0x31, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x49, 0xdc, 0x0b, 0x33, 0x2f, 0xfc, 0xbb,
-  0x40, 0x05, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x90, 0xc8, 0xbd, 0x40,
-  0xf3, 0x42, 0xc9, 0x0b, 0x4b, 0x30, 0x62, 0xd0, 0x00, 0x20, 0x08, 0x06,
-  0x4d, 0xdc, 0x0b, 0x34, 0x2f, 0xf0, 0xbb, 0x70, 0x07, 0x8b, 0x02, 0x06,
-  0x08, 0x11, 0x5c, 0xd0, 0xe1, 0x88, 0x81, 0x01, 0x80, 0x20, 0x18, 0x8c,
-  0x01, 0xdb, 0x0b, 0x31, 0x2f, 0x04, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20,
-  0x04, 0x23, 0x06, 0x0e, 0x00, 0x82, 0x60, 0x00, 0x06, 0x70, 0x2f, 0xcc,
-  0xbc, 0xf0, 0xef, 0x02, 0xbe, 0x0b, 0x6b, 0x2f, 0x08, 0x01, 0xce, 0x0b,
-  0x38, 0x2f, 0xb0, 0xbc, 0x90, 0xf6, 0xc2, 0x2c, 0xc1, 0x08, 0x21, 0x00,
-  0x00, 0x00, 0x00, 0x00
-};
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_double_float.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_double_float.h
deleted file mode 100644
index 477ac87c1e159..0000000000000
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_double_float.h
+++ /dev/null
@@ -1,11495 +0,0 @@
-#if 0
-//
-// Generated by Microsoft (R) D3D Shader Disassembler
-//
-//
-// Note: shader requires additional functionality:
-//       Double-precision floating point
-//
-//
-// Input signature:
-//
-// Name                 Index   Mask Register SysValue  Format   Used
-// -------------------- ----- ------ -------- -------- ------- ------
-// no Input
-//
-// Output signature:
-//
-// Name                 Index   Mask Register SysValue  Format   Used
-// -------------------- ----- ------ -------- -------- ------- ------
-// no Output
-cs_5_0
-dcl_globalFlags refactoringAllowed | enableDoublePrecisionFloatOps
-dcl_constantbuffer CB0[8], immediateIndexed
-dcl_uav_structured u0, 8
-dcl_uav_structured u1, 4
-dcl_uav_structured u2, 8
-dcl_input vThreadID.x
-dcl_temps 12
-dcl_thread_group 64, 1, 1
-iadd r0.x, vThreadID.x, cb0[0].x
-ult r0.y, r0.x, cb0[0].y
-if_nz r0.y
-  utof r1.xyzw, cb0[1].wzwz
-  add r2.xyzw, r1.xxyy, l(-1.000000, -0.500000, -0.500000, -1.000000)
-  mov r3.x, l(0)
-  mov r3.yz, r2.xxwx
-  mov r2.x, l(-0.500000)
-  movc r0.yzw, cb0[7].xxxx, r3.xxyz, r2.xxyz
-  imul null, r1.x, cb0[5].z, cb0[5].w
-  imul null, r1.y, r1.x, cb0[5].y
-  udiv r1.y, null, r0.x, r1.y
-  imad r2.x, -r1.y, cb0[6].x, r0.x
-  udiv r1.x, null, r2.x, r1.x
-  imad r2.x, -r1.x, cb0[6].y, r2.x
-  udiv r2.y, null, r2.x, cb0[5].w
-  imad r2.x, -r2.y, cb0[6].z, r2.x
-  utof r3.x, r1.y
-  utof r3.yz, r2.yyxy
-  utof r2.xyzw, cb0[4].xyzw
-  dp3 r2.x, r3.xyzx, r2.xyzx
-  add r2.y, r2.w, r2.x
-  ftou r2.x, r2.x
-  ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r3.x, r2.x, l(0), u1.xxxx
-  ftou r2.x, r2.y
-  ld_structured_indexable(structured_buffer, stride=4)(mixed,mixed,mixed,mixed) r3.y, r2.x, l(0), u1.xxxx
-  ieq r2.x, cb0[7].x, l(1)
-  add r2.yz, r3.xxyx, l(0.000000, 1.000000, 1.000000, 0.000000)
-  mul r3.xy, r2.yzyy, l(0.500000, 0.500000, 0.000000, 0.000000)
-  add r3.zw, r1.zzzw, l(0.000000, 0.000000, -1.000000, -1.000000)
-  mul r3.xy, r3.zwzz, r3.xyxx
-  mad r1.zw, r2.yyyz, r1.zzzw, l(0.000000, 0.000000, -1.000000, -1.000000)
-  mul r1.zw, r1.zzzw, l(0.000000, 0.000000, 0.500000, 0.500000)
-  movc r1.zw, r2.xxxx, r3.xxxy, r1.zzzw
-  ieq r2.x, cb0[0].z, l(1)
-  round_ne r2.yz, r1.zzwz
-  movc r1.zw, r2.xxxx, r2.yyyz, r1.zzzw
-  lt r2.yz, r1.zzwz, r0.yyyy
-  lt r3.xy, r0.zwzz, r1.zwzz
-  or r2.w, r2.y, r3.x
-  or r2.w, r2.z, r2.w
-  or r2.w, r3.y, r2.w
-  if_nz r2.w
-    ieq r2.w, cb0[0].w, l(1)
-    if_nz r2.w
-      iadd r3.zw, cb0[1].wwwz, l(0, 0, -1, -1)
-      utof r3.zw, r3.zzzw
-      max r4.xy, r1.zwzz, l(0.000000, 0.000000, 0.000000, 0.000000)
-      min r1.zw, r3.zzzw, r4.xxxy
-    else 
-      ieq r2.w, cb0[0].w, l(2)
-      if_nz r2.w
-        movc r2.w, cb0[7].x, l(-0.000000), l(0.500000)
-        add r3.zw, r0.zzzw, r2.wwww
-        add r4.xy, r0.yyyy, -r1.zwzz
-        div r4.zw, r4.xxxy, r3.zzzw
-        ftou r4.zw, r4.zzzw
-        utof r5.xy, r4.zwzz
-        mad r4.xy, -r5.xyxx, r3.zwzz, r4.xyxx
-        and r4.zw, r4.zzzw, l(0, 0, 1, 1)
-        add r5.xy, r0.yyyy, r4.xyxx
-        add r4.xy, r0.zwzz, -r4.xyxx
-        movc r4.xy, r4.zwzz, r4.xyxx, r5.xyxx
-        add r4.zw, -r0.zzzw, r1.zzzw
-        div r5.xy, r4.zwzz, r3.zwzz
-        ftou r5.xy, r5.xyxx
-        utof r5.zw, r5.xxxy
-        mad r3.zw, -r5.zzzw, r3.zzzw, r4.zzzw
-        and r4.zw, r5.xxxy, l(0, 0, 1, 1)
-        add r5.xy, r0.zwzz, -r3.zwzz
-        add r3.zw, r0.yyyy, r3.zzzw
-        movc r3.zw, r4.zzzw, r3.zzzw, r5.xxxy
-        movc r3.xy, r3.xyxx, r3.zwzz, r1.zwzz
-        movc r1.zw, r2.yyyz, r4.xxxy, r3.xxxy
-      endif 
-    endif 
-  endif 
-  utof r3.xy, r1.yxyy
-  if_nz r2.x
-    if_z cb0[0].w
-      ge r1.xy, r1.zwzz, l(0.000000, 0.000000, 0.000000, 0.000000)
-      ftou r2.xyzw, r1.zwwz
-      ult r2.xy, r2.xyxx, cb0[1].wzww
-      and r1.x, r1.x, r2.x
-      and r1.x, r1.y, r1.x
-      and r1.x, r2.y, r1.x
-      ftou r2.xy, r3.xyxx
-      imul null, r2.xy, r2.xyxx, cb0[2].xyxx
-      iadd r1.y, r2.y, r2.x
-      imad r1.y, r2.z, cb0[2].z, r1.y
-      imad r1.y, r2.w, cb0[2].w, r1.y
-      ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r2.xy, r1.y, l(0), u0.xyxx
-      mov r2.zw, r2.xxxy
-      dtof r1.y, r2.zwzw
-      and r1.x, r1.y, r1.x
-    else 
-      ieq r1.y, cb0[0].w, l(1)
-      if_nz r1.y
-        iadd r2.xy, cb0[1].zwzz, l(-1, -1, 0, 0)
-        utof r2.xy, r2.xyxx
-        max r2.zw, r1.wwwz, l(0.000000, 0.000000, 0.000000, 0.000000)
-        min r2.xy, r2.xyxx, r2.zwzz
-        ftou r2.xy, r2.xyxx
-        ftou r2.zw, r3.xxxy
-        imul null, r2.zw, r2.zzzw, cb0[2].xxxy
-        iadd r1.y, r2.w, r2.z
-        imad r1.y, r2.x, cb0[2].z, r1.y
-        imad r1.y, r2.y, cb0[2].w, r1.y
-        ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r2.xy, r1.y, l(0), u0.xyxx
-        mov r2.zw, r2.xxxy
-        dtof r1.x, r2.zwzw
-      else 
-        ieq r1.y, cb0[0].w, l(2)
-        if_nz r1.y
-          movc r1.y, cb0[7].x, l(-0.000000), l(0.500000)
-          add r2.xy, r0.wzww, r1.yyyy
-          lt r2.zw, r1.wwwz, r0.yyyy
-          add r3.zw, r0.yyyy, -r1.wwwz
-          div r4.xy, r3.zwzz, r2.xyxx
-          ftou r4.xy, r4.xyxx
-          utof r4.zw, r4.xxxy
-          mad r3.zw, -r4.zzzw, r2.xxxy, r3.zzzw
-          and r4.xy, r4.xyxx, l(1, 1, 0, 0)
-          add r4.zw, r0.yyyy, r3.zzzw
-          add r3.zw, r0.wwwz, -r3.zzzw
-          movc r3.zw, r4.xxxy, r3.zzzw, r4.zzzw
-          lt r4.xy, r0.wzww, r1.wzww
-          add r4.zw, -r0.wwwz, r1.wwwz
-          div r5.xy, r4.zwzz, r2.xyxx
-          ftou r5.xy, r5.xyxx
-          utof r5.zw, r5.xxxy
-          mad r2.xy, -r5.zwzz, r2.xyxx, r4.zwzz
-          and r4.zw, r5.xxxy, l(0, 0, 1, 1)
-          add r5.xy, r0.wzww, -r2.xyxx
-          add r2.xy, r0.yyyy, r2.xyxx
-          movc r2.xy, r4.zwzz, r2.xyxx, r5.xyxx
-          movc r2.xy, r4.xyxx, r2.xyxx, r1.wzww
-          movc r2.xy, r2.zwzz, r3.zwzz, r2.xyxx
-          ftou r2.xy, r2.xyxx
-          ftou r2.zw, r3.xxxy
-          imul null, r2.zw, r2.zzzw, cb0[2].xxxy
-          iadd r1.y, r2.w, r2.z
-          imad r1.y, r2.x, cb0[2].z, r1.y
-          imad r1.y, r2.y, cb0[2].w, r1.y
-          ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r2.xy, r1.y, l(0), u0.xyxx
-          mov r2.zw, r2.xxxy
-          dtof r1.x, r2.zwzw
-        else 
-          mov r1.x, l(0)
-        endif 
-      endif 
-    endif 
-    ftod r1.xy, r1.x
-    store_structured u2.xy, r0.x, l(0), r1.xyxx
-  else 
-    if_z cb0[0].z
-      round_ni r1.xy, r1.wzww
-      add r2.xy, r1.xyxx, l(1.000000, 1.000000, 0.000000, 0.000000)
-      if_z cb0[0].w
-        ge r2.zw, r1.yyyx, l(0.000000, 0.000000, 0.000000, 0.000000)
-        ftou r3.zw, r1.yyyx
-        ult r3.zw, r3.zzzw, cb0[1].wwwz
-        and r2.z, r2.z, r3.z
-        and r3.z, r2.w, r2.z
-        and r3.z, r3.w, r3.z
-        ftou r4.xy, r3.xyxx
-        ftou r4.zw, r1.xxxy
-        imul null, r4.xy, r4.xyxx, cb0[2].xyxx
-        iadd r4.x, r4.y, r4.x
-        imad r4.x, r4.z, cb0[2].z, r4.x
-        imad r4.x, r4.w, cb0[2].w, r4.x
-        ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r4.xy, r4.x, l(0), u0.xyxx
-        mov r4.zw, r4.xxxy
-        dtof r4.x, r4.zwzw
-        and r3.z, r3.z, r4.x
-        ge r4.xy, r2.yxyy, l(0.000000, 0.000000, 0.000000, 0.000000)
-        ftou r4.zw, r2.yyyx
-        ult r4.zw, r4.zzzw, cb0[1].wwwz
-        and r4.x, r4.z, r4.x
-        and r2.w, r2.w, r4.x
-        and r2.w, r3.w, r2.w
-        ftou r5.xy, r3.xyxx
-        ftou r3.w, r1.x
-        ftou r4.z, r2.y
-        imul null, r5.xy, r5.xyxx, cb0[2].xyxx
-        iadd r5.x, r5.y, r5.x
-        imad r3.w, r3.w, cb0[2].z, r5.x
-        imad r3.w, r4.z, cb0[2].w, r3.w
-        ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r5.xy, r3.w, l(0), u0.xyxx
-        mov r5.zw, r5.xxxy
-        dtof r3.w, r5.zwzw
-        and r2.w, r2.w, r3.w
-        and r2.z, r2.z, r4.y
-        and r2.z, r4.w, r2.z
-        ftou r5.xy, r3.xyxx
-        ftou r3.w, r2.x
-        ftou r4.z, r1.y
-        imul null, r5.xy, r5.xyxx, cb0[2].xyxx
-        iadd r5.x, r5.y, r5.x
-        imad r3.w, r3.w, cb0[2].z, r5.x
-        imad r3.w, r4.z, cb0[2].w, r3.w
-        ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r5.xy, r3.w, l(0), u0.xyxx
-        mov r5.zw, r5.xxxy
-        dtof r3.w, r5.zwzw
-        and r2.z, r2.z, r3.w
-        and r3.w, r4.y, r4.x
-        and r3.w, r4.w, r3.w
-        ftou r4.xy, r3.xyxx
-        ftou r4.zw, r2.xxxy
-        imul null, r4.xy, r4.xyxx, cb0[2].xyxx
-        iadd r4.x, r4.y, r4.x
-        imad r4.x, r4.z, cb0[2].z, r4.x
-        imad r4.x, r4.w, cb0[2].w, r4.x
-        ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r4.xy, r4.x, l(0), u0.xyxx
-        mov r4.zw, r4.xxxy
-        dtof r4.x, r4.zwzw
-        and r3.w, r3.w, r4.x
-      else 
-        ieq r4.x, cb0[0].w, l(1)
-        if_nz r4.x
-          iadd r4.yz, cb0[1].zzwz, l(0, -1, -1, 0)
-          utof r4.yz, r4.yyzy
-          max r5.xy, r1.xyxx, l(0.000000, 0.000000, 0.000000, 0.000000)
-          min r4.yz, r4.yyzy, r5.xxyx
-          ftou r4.yz, r4.yyzy
-          ftou r5.xy, r3.xyxx
-          imul null, r5.xy, r5.xyxx, cb0[2].xyxx
-          iadd r4.w, r5.y, r5.x
-          imad r4.y, r4.y, cb0[2].z, r4.w
-          imad r4.y, r4.z, cb0[2].w, r4.y
-          ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r4.yz, r4.y, l(0), u0.xxyx
-          mov r5.xy, r4.yzyy
-          dtof r3.z, r5.xyxy
-        else 
-          ieq r4.y, cb0[0].w, l(2)
-          if_nz r4.y
-            movc r4.y, cb0[7].x, l(-0.000000), l(0.500000)
-            add r4.yz, r0.wwzw, r4.yyyy
-            lt r5.xy, r1.xyxx, r0.yyyy
-            add r5.zw, r0.yyyy, -r1.xxxy
-            div r6.xy, r5.zwzz, r4.yzyy
-            ftou r6.xy, r6.xyxx
-            utof r6.zw, r6.xxxy
-            mad r5.zw, -r6.zzzw, r4.yyyz, r5.zzzw
-            and r6.xy, r6.xyxx, l(1, 1, 0, 0)
-            add r6.zw, r0.yyyy, r5.zzzw
-            add r5.zw, r0.wwwz, -r5.zzzw
-            movc r5.zw, r6.xxxy, r5.zzzw, r6.zzzw
-            lt r6.xy, r0.wzww, r1.xyxx
-            add r6.zw, -r0.wwwz, r1.xxxy
-            div r7.xy, r6.zwzz, r4.yzyy
-            ftou r7.xy, r7.xyxx
-            utof r7.zw, r7.xxxy
-            mad r4.yz, -r7.zzwz, r4.yyzy, r6.zzwz
-            and r6.zw, r7.xxxy, l(0, 0, 1, 1)
-            add r7.xy, r0.wzww, -r4.yzyy
-            add r4.yz, r0.yyyy, r4.yyzy
-            movc r4.yz, r6.zzwz, r4.yyzy, r7.xxyx
-            movc r4.yz, r6.xxyx, r4.yyzy, r1.xxyx
-            movc r4.yz, r5.xxyx, r5.zzwz, r4.yyzy
-            ftou r4.yz, r4.yyzy
-            ftou r5.xy, r3.xyxx
-            imul null, r5.xy, r5.xyxx, cb0[2].xyxx
-            iadd r4.w, r5.y, r5.x
-            imad r4.y, r4.y, cb0[2].z, r4.w
-            imad r4.y, r4.z, cb0[2].w, r4.y
-            ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r4.yz, r4.y, l(0), u0.xxyx
-            mov r5.xy, r4.yzyy
-            dtof r3.z, r5.xyxy
-          else 
-            mov r3.z, l(0)
-          endif 
-        endif 
-        if_nz r4.x
-          iadd r4.yz, cb0[1].wwzw, l(0, -1, -1, 0)
-          utof r4.yz, r4.yyzy
-          max r4.w, r2.y, l(0.000000)
-          min r4.y, r4.y, r4.w
-          max r4.w, r1.x, l(0.000000)
-          min r4.z, r4.z, r4.w
-          ftou r4.yz, r4.yyzy
-          utof r4.zw, r4.zzzy
-          ftou r5.xy, r3.xyxx
-          ftou r4.yz, r4.zzwz
-          imul null, r5.xy, r5.xyxx, cb0[2].xyxx
-          iadd r4.w, r5.y, r5.x
-          imad r4.y, r4.y, cb0[2].z, r4.w
-          imad r4.y, r4.z, cb0[2].w, r4.y
-          ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r4.yz, r4.y, l(0), u0.xxyx
-          mov r5.xy, r4.yzyy
-          dtof r2.w, r5.xyxy
-        else 
-          ieq r4.y, cb0[0].w, l(2)
-          if_nz r4.y
-            movc r4.y, cb0[7].x, l(-0.000000), l(0.500000)
-            add r4.yz, r0.zzwz, r4.yyyy
-            lt r4.w, r2.y, r0.y
-            add r5.x, r0.y, -r2.y
-            div r5.y, r5.x, r4.y
-            ftou r5.y, r5.y
-            utof r5.z, r5.y
-            mad r5.x, -r5.z, r4.y, r5.x
-            and r5.y, r5.y, l(1)
-            add r5.z, r0.y, r5.x
-            add r5.x, r0.z, -r5.x
-            movc r5.x, r5.y, r5.x, r5.z
-            lt r5.y, r0.z, r2.y
-            add r5.z, -r0.z, r2.y
-            div r5.w, r5.z, r4.y
-            ftou r5.w, r5.w
-            utof r6.x, r5.w
-            mad r4.y, -r6.x, r4.y, r5.z
-            and r5.z, r5.w, l(1)
-            add r5.w, r0.z, -r4.y
-            add r4.y, r0.y, r4.y
-            movc r4.y, r5.z, r4.y, r5.w
-            movc r4.y, r5.y, r4.y, r2.y
-            movc r4.y, r4.w, r5.x, r4.y
-            ftou r4.y, r4.y
-            lt r4.w, r1.x, r0.y
-            add r5.x, r0.y, -r1.x
-            div r5.y, r5.x, r4.z
-            ftou r5.y, r5.y
-            utof r5.z, r5.y
-            mad r5.x, -r5.z, r4.z, r5.x
-            and r5.y, r5.y, l(1)
-            add r5.z, r0.y, r5.x
-            add r5.x, r0.w, -r5.x
-            movc r5.x, r5.y, r5.x, r5.z
-            lt r5.y, r0.w, r1.x
-            add r5.z, -r0.w, r1.x
-            div r5.w, r5.z, r4.z
-            ftou r5.w, r5.w
-            utof r6.x, r5.w
-            mad r4.z, -r6.x, r4.z, r5.z
-            and r5.z, r5.w, l(1)
-            add r5.w, r0.w, -r4.z
-            add r4.z, r0.y, r4.z
-            movc r4.z, r5.z, r4.z, r5.w
-            movc r1.x, r5.y, r4.z, r1.x
-            movc r1.x, r4.w, r5.x, r1.x
-            ftou r1.x, r1.x
-            utof r4.z, r1.x
-            utof r4.w, r4.y
-            ftou r5.xy, r3.xyxx
-            ftou r4.yz, r4.zzwz
-            imul null, r5.xy, r5.xyxx, cb0[2].xyxx
-            iadd r1.x, r5.y, r5.x
-            imad r1.x, r4.y, cb0[2].z, r1.x
-            imad r1.x, r4.z, cb0[2].w, r1.x
-            ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r4.yz, r1.x, l(0), u0.xxyx
-            mov r5.xy, r4.yzyy
-            dtof r2.w, r5.xyxy
-          else 
-            mov r2.w, l(0)
-          endif 
-        endif 
-        if_nz r4.x
-          iadd r4.yz, cb0[1].wwzw, l(0, -1, -1, 0)
-          utof r4.yz, r4.yyzy
-          max r1.x, r1.y, l(0.000000)
-          min r1.x, r4.y, r1.x
-          ftou r1.x, r1.x
-          max r4.y, r2.x, l(0.000000)
-          min r4.y, r4.z, r4.y
-          ftou r4.y, r4.y
-          utof r4.z, r4.y
-          utof r4.w, r1.x
-          ftou r5.xy, r3.xyxx
-          ftou r4.yz, r4.zzwz
-          imul null, r5.xy, r5.xyxx, cb0[2].xyxx
-          iadd r1.x, r5.y, r5.x
-          imad r1.x, r4.y, cb0[2].z, r1.x
-          imad r1.x, r4.z, cb0[2].w, r1.x
-          ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r4.yz, r1.x, l(0), u0.xxyx
-          mov r5.xy, r4.yzyy
-          dtof r2.z, r5.xyxy
-        else 
-          ieq r1.x, cb0[0].w, l(2)
-          if_nz r1.x
-            movc r1.x, cb0[7].x, l(-0.000000), l(0.500000)
-            add r4.yz, r0.zzwz, r1.xxxx
-            lt r1.x, r1.y, r0.y
-            add r4.w, r0.y, -r1.y
-            div r5.x, r4.w, r4.y
-            ftou r5.x, r5.x
-            utof r5.y, r5.x
-            mad r4.w, -r5.y, r4.y, r4.w
-            and r5.x, r5.x, l(1)
-            add r5.y, r0.y, r4.w
-            add r4.w, r0.z, -r4.w
-            movc r4.w, r5.x, r4.w, r5.y
-            lt r5.x, r0.z, r1.y
-            add r5.y, -r0.z, r1.y
-            div r5.z, r5.y, r4.y
-            ftou r5.z, r5.z
-            utof r5.w, r5.z
-            mad r4.y, -r5.w, r4.y, r5.y
-            and r5.y, r5.z, l(1)
-            add r5.z, r0.z, -r4.y
-            add r4.y, r0.y, r4.y
-            movc r4.y, r5.y, r4.y, r5.z
-            movc r1.y, r5.x, r4.y, r1.y
-            movc r1.x, r1.x, r4.w, r1.y
-            lt r1.y, r2.x, r0.y
-            add r4.y, r0.y, -r2.x
-            div r4.w, r4.y, r4.z
-            ftou r4.w, r4.w
-            utof r5.x, r4.w
-            mad r4.y, -r5.x, r4.z, r4.y
-            and r4.w, r4.w, l(1)
-            add r5.x, r0.y, r4.y
-            add r4.y, r0.w, -r4.y
-            movc r4.y, r4.w, r4.y, r5.x
-            lt r4.w, r0.w, r2.x
-            add r5.x, -r0.w, r2.x
-            div r5.y, r5.x, r4.z
-            ftou r5.y, r5.y
-            utof r5.z, r5.y
-            mad r4.z, -r5.z, r4.z, r5.x
-            and r5.x, r5.y, l(1)
-            add r5.y, r0.w, -r4.z
-            add r4.z, r0.y, r4.z
-            movc r4.z, r5.x, r4.z, r5.y
-            movc r4.z, r4.w, r4.z, r2.x
-            movc r1.y, r1.y, r4.y, r4.z
-            ftou r1.xy, r1.xyxx
-            utof r4.zw, r1.yyyx
-            ftou r1.xy, r3.xyxx
-            ftou r4.yz, r4.zzwz
-            imul null, r1.xy, r1.xyxx, cb0[2].xyxx
-            iadd r1.x, r1.y, r1.x
-            imad r1.x, r4.y, cb0[2].z, r1.x
-            imad r1.x, r4.z, cb0[2].w, r1.x
-            ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r1.xy, r1.x, l(0), u0.xyxx
-            mov r4.zw, r1.xxxy
-            dtof r2.z, r4.zwzw
-          else 
-            mov r2.z, l(0)
-          endif 
-        endif 
-        if_nz r4.x
-          iadd r1.xy, cb0[1].zwzz, l(-1, -1, 0, 0)
-          utof r1.xy, r1.xyxx
-          max r4.xy, r2.xyxx, l(0.000000, 0.000000, 0.000000, 0.000000)
-          min r1.xy, r1.xyxx, r4.xyxx
-          ftou r1.xy, r1.xyxx
-          ftou r4.xy, r3.xyxx
-          imul null, r4.xy, r4.xyxx, cb0[2].xyxx
-          iadd r4.x, r4.y, r4.x
-          imad r1.x, r1.x, cb0[2].z, r4.x
-          imad r1.x, r1.y, cb0[2].w, r1.x
-          ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r1.xy, r1.x, l(0), u0.xyxx
-          mov r4.xy, r1.xyxx
-          dtof r3.w, r4.xyxy
-        else 
-          ieq r1.x, cb0[0].w, l(2)
-          if_nz r1.x
-            movc r1.x, cb0[7].x, l(-0.000000), l(0.500000)
-            add r1.xy, r0.wzww, r1.xxxx
-            lt r4.xy, r2.xyxx, r0.yyyy
-            add r4.zw, r0.yyyy, -r2.xxxy
-            div r5.xy, r4.zwzz, r1.xyxx
-            ftou r5.xy, r5.xyxx
-            utof r5.zw, r5.xxxy
-            mad r4.zw, -r5.zzzw, r1.xxxy, r4.zzzw
-            and r5.xy, r5.xyxx, l(1, 1, 0, 0)
-            add r5.zw, r0.yyyy, r4.zzzw
-            add r4.zw, r0.wwwz, -r4.zzzw
-            movc r4.zw, r5.xxxy, r4.zzzw, r5.zzzw
-            lt r5.xy, r0.wzww, r2.xyxx
-            add r5.zw, -r0.wwwz, r2.xxxy
-            div r6.xy, r5.zwzz, r1.xyxx
-            ftou r6.xy, r6.xyxx
-            utof r6.zw, r6.xxxy
-            mad r1.xy, -r6.zwzz, r1.xyxx, r5.zwzz
-            and r5.zw, r6.xxxy, l(0, 0, 1, 1)
-            add r6.xy, r0.wzww, -r1.xyxx
-            add r1.xy, r0.yyyy, r1.xyxx
-            movc r1.xy, r5.zwzz, r1.xyxx, r6.xyxx
-            movc r1.xy, r5.xyxx, r1.xyxx, r2.xyxx
-            movc r1.xy, r4.xyxx, r4.zwzz, r1.xyxx
-            ftou r1.xy, r1.xyxx
-            ftou r2.xy, r3.xyxx
-            imul null, r2.xy, r2.xyxx, cb0[2].xyxx
-            iadd r2.x, r2.y, r2.x
-            imad r1.x, r1.x, cb0[2].z, r2.x
-            imad r1.x, r1.y, cb0[2].w, r1.x
-            ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r1.xy, r1.x, l(0), u0.xyxx
-            mov r2.xy, r1.xyxx
-            dtof r3.w, r2.xyxy
-          else 
-            mov r3.w, l(0)
-          endif 
-        endif 
-      endif 
-      frc r1.xy, r1.zwzz
-      add r2.x, r2.w, -r3.z
-      mad r2.x, r1.x, r2.x, r3.z
-      add r2.y, -r2.z, r3.w
-      mad r1.x, r1.x, r2.y, r2.z
-      add r1.x, -r2.x, r1.x
-      mad r1.x, r1.y, r1.x, r2.x
-      ftod r1.xy, r1.x
-      store_structured u2.xy, r0.x, l(0), r1.xyxx
-    else 
-      ieq r1.x, cb0[0].z, l(2)
-      if_nz r1.x
-        round_ni r1.xy, r1.wzww
-        add r2.xyzw, r1.yxyy, l(2.000000, -1.000000, -1.000000, 1.000000)
-        if_z cb0[0].w
-          ge r3.zw, r2.zzzy, l(0.000000, 0.000000, 0.000000, 0.000000)
-          ftou r4.xy, r2.zyzz
-          ult r4.xy, r4.xyxx, cb0[1].wzww
-          and r3.z, r3.z, r4.x
-          and r3.z, r3.w, r3.z
-          and r3.z, r4.y, r3.z
-          ftou r4.xz, r3.xxyx
-          ftou r5.xy, r2.yzyy
-          imul null, r4.xz, r4.xxzx, cb0[2].xxyx
-          iadd r4.x, r4.z, r4.x
-          imad r4.x, r5.x, cb0[2].z, r4.x
-          imad r4.x, r5.y, cb0[2].w, r4.x
-          ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r4.xz, r4.x, l(0), u0.xxyx
-          mov r5.xy, r4.xzxx
-          dtof r4.x, r5.xyxy
-          and r5.x, r3.z, r4.x
-          ge r3.z, r1.y, l(0.000000)
-          ftou r4.xz, r1.yyyy
-          ult r4.x, r4.x, cb0[1].w
-          and r3.z, r3.z, r4.x
-          and r3.z, r3.w, r3.z
-          and r3.z, r4.y, r3.z
-          ftou r4.xy, r3.xyxx
-          ftou r3.w, r2.y
-          imul null, r4.xy, r4.xyxx, cb0[2].xyxx
-          iadd r4.x, r4.y, r4.x
-          imad r3.w, r3.w, cb0[2].z, r4.x
-          imad r3.w, r4.z, cb0[2].w, r3.w
-          ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r4.xy, r3.w, l(0), u0.xyxx
-          mov r4.zw, r4.xxxy
-          dtof r3.w, r4.zwzw
-          and r4.x, r3.w, r3.z
-        else 
-          ieq r3.z, cb0[0].w, l(1)
-          if_nz r3.z
-            iadd r6.xy, cb0[1].zwzz, l(-1, -1, 0, 0)
-            utof r6.xy, r6.xyxx
-            max r6.zw, r2.yyyz, l(0.000000, 0.000000, 0.000000, 0.000000)
-            min r6.xy, r6.xyxx, r6.zwzz
-            ftou r6.xy, r6.xyxx
-            ftou r6.zw, r3.xxxy
-            imul null, r6.zw, r6.zzzw, cb0[2].xxxy
-            iadd r3.w, r6.w, r6.z
-            imad r3.w, r6.x, cb0[2].z, r3.w
-            imad r3.w, r6.y, cb0[2].w, r3.w
-            ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r6.xy, r3.w, l(0), u0.xyxx
-            mov r6.zw, r6.xxxy
-            dtof r5.x, r6.zwzw
-          else 
-            ieq r3.w, cb0[0].w, l(2)
-            if_nz r3.w
-              movc r3.w, cb0[7].x, l(-0.000000), l(0.500000)
-              add r6.xy, r0.wzww, r3.wwww
-              lt r6.zw, r2.yyyz, r0.yyyy
-              add r7.xy, r0.yyyy, -r2.yzyy
-              div r7.zw, r7.xxxy, r6.xxxy
-              ftou r7.zw, r7.zzzw
-              utof r8.xy, r7.zwzz
-              mad r7.xy, -r8.xyxx, r6.xyxx, r7.xyxx
-              and r7.zw, r7.zzzw, l(0, 0, 1, 1)
-              add r8.xy, r0.yyyy, r7.xyxx
-              add r7.xy, r0.wzww, -r7.xyxx
-              movc r7.xy, r7.zwzz, r7.xyxx, r8.xyxx
-              lt r7.zw, r0.wwwz, r2.yyyz
-              add r8.xy, -r0.wzww, r2.yzyy
-              div r8.zw, r8.xxxy, r6.xxxy
-              ftou r8.zw, r8.zzzw
-              utof r9.xy, r8.zwzz
-              mad r6.xy, -r9.xyxx, r6.xyxx, r8.xyxx
-              and r8.xy, r8.zwzz, l(1, 1, 0, 0)
-              add r8.zw, r0.wwwz, -r6.xxxy
-              add r6.xy, r0.yyyy, r6.xyxx
-              movc r6.xy, r8.xyxx, r6.xyxx, r8.zwzz
-              movc r6.xy, r7.zwzz, r6.xyxx, r2.yzyy
-              movc r6.xy, r6.zwzz, r7.xyxx, r6.xyxx
-              ftou r6.xy, r6.xyxx
-              ftou r6.zw, r3.xxxy
-              imul null, r6.zw, r6.zzzw, cb0[2].xxxy
-              iadd r3.w, r6.w, r6.z
-              imad r3.w, r6.x, cb0[2].z, r3.w
-              imad r3.w, r6.y, cb0[2].w, r3.w
-              ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r6.xy, r3.w, l(0), u0.xyxx
-              mov r6.zw, r6.xxxy
-              dtof r5.x, r6.zwzw
-            else 
-              mov r5.x, l(0)
-            endif 
-          endif 
-          if_nz r3.z
-            iadd r3.zw, cb0[1].wwwz, l(0, 0, -1, -1)
-            utof r3.zw, r3.zzzw
-            max r6.x, r1.y, l(0.000000)
-            min r3.z, r3.z, r6.x
-            max r6.x, r2.y, l(0.000000)
-            min r3.w, r3.w, r6.x
-            ftou r3.zw, r3.zzzw
-            utof r6.zw, r3.wwwz
-            ftou r3.zw, r3.xxxy
-            ftou r6.xy, r6.zwzz
-            imul null, r3.zw, r3.zzzw, cb0[2].xxxy
-            iadd r3.z, r3.w, r3.z
-            imad r3.z, r6.x, cb0[2].z, r3.z
-            imad r3.z, r6.y, cb0[2].w, r3.z
-            ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r3.zw, r3.z, l(0), u0.xxxy
-            mov r6.xy, r3.zwzz
-            dtof r4.x, r6.xyxy
-          else 
-            ieq r3.z, cb0[0].w, l(2)
-            if_nz r3.z
-              movc r3.z, cb0[7].x, l(-0.000000), l(0.500000)
-              add r3.zw, r0.zzzw, r3.zzzz
-              lt r6.x, r1.y, r0.y
-              add r6.y, r0.y, -r1.y
-              div r6.z, r6.y, r3.z
-              ftou r6.z, r6.z
-              utof r6.w, r6.z
-              mad r6.y, -r6.w, r3.z, r6.y
-              and r6.z, r6.z, l(1)
-              add r6.w, r0.y, r6.y
-              add r6.y, r0.z, -r6.y
-              movc r6.y, r6.z, r6.y, r6.w
-              lt r6.z, r0.z, r1.y
-              add r6.w, -r0.z, r1.y
-              div r7.x, r6.w, r3.z
-              ftou r7.x, r7.x
-              utof r7.y, r7.x
-              mad r3.z, -r7.y, r3.z, r6.w
-              and r6.w, r7.x, l(1)
-              add r7.x, r0.z, -r3.z
-              add r3.z, r0.y, r3.z
-              movc r3.z, r6.w, r3.z, r7.x
-              movc r3.z, r6.z, r3.z, r1.y
-              movc r3.z, r6.x, r6.y, r3.z
-              lt r6.x, r2.y, r0.y
-              add r6.y, r0.y, -r2.y
-              div r6.z, r6.y, r3.w
-              ftou r6.z, r6.z
-              utof r6.w, r6.z
-              mad r6.y, -r6.w, r3.w, r6.y
-              and r6.z, r6.z, l(1)
-              add r6.w, r0.y, r6.y
-              add r6.y, r0.w, -r6.y
-              movc r6.y, r6.z, r6.y, r6.w
-              lt r6.z, r0.w, r2.y
-              add r6.w, -r0.w, r2.y
-              div r7.x, r6.w, r3.w
-              ftou r7.x, r7.x
-              utof r7.y, r7.x
-              mad r3.w, -r7.y, r3.w, r6.w
-              and r6.w, r7.x, l(1)
-              add r7.x, r0.w, -r3.w
-              add r3.w, r0.y, r3.w
-              movc r3.w, r6.w, r3.w, r7.x
-              movc r3.w, r6.z, r3.w, r2.y
-              movc r3.w, r6.x, r6.y, r3.w
-              ftou r3.zw, r3.zzzw
-              utof r6.zw, r3.wwwz
-              ftou r3.zw, r3.xxxy
-              ftou r6.xy, r6.zwzz
-              imul null, r3.zw, r3.zzzw, cb0[2].xxxy
-              iadd r3.z, r3.w, r3.z
-              imad r3.z, r6.x, cb0[2].z, r3.z
-              imad r3.z, r6.y, cb0[2].w, r3.z
-              ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r3.zw, r3.z, l(0), u0.xxxy
-              mov r6.xy, r3.zwzz
-              dtof r4.x, r6.xyxy
-            else 
-              mov r4.x, l(0)
-            endif 
-          endif 
-        endif 
-        if_z cb0[0].w
-          ge r3.zw, r2.wwwy, l(0.000000, 0.000000, 0.000000, 0.000000)
-          ftou r6.xyzw, r2.wyyw
-          ult r6.xy, r6.xyxx, cb0[1].wzww
-          and r3.z, r3.z, r6.x
-          and r3.z, r3.w, r3.z
-          and r3.z, r6.y, r3.z
-          ftou r6.xy, r3.xyxx
-          imul null, r6.xy, r6.xyxx, cb0[2].xyxx
-          iadd r3.w, r6.y, r6.x
-          imad r3.w, r6.z, cb0[2].z, r3.w
-          imad r3.w, r6.w, cb0[2].w, r3.w
-          ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r6.xy, r3.w, l(0), u0.xyxx
-          mov r6.zw, r6.xxxy
-          dtof r3.w, r6.zwzw
-          and r6.x, r3.w, r3.z
-        else 
-          ieq r3.z, cb0[0].w, l(1)
-          if_nz r3.z
-            iadd r3.zw, cb0[1].zzzw, l(0, 0, -1, -1)
-            utof r3.zw, r3.zzzw
-            max r7.xy, r2.ywyy, l(0.000000, 0.000000, 0.000000, 0.000000)
-            min r3.zw, r3.zzzw, r7.xxxy
-            ftou r3.zw, r3.zzzw
-            ftou r7.xy, r3.xyxx
-            imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-            iadd r7.x, r7.y, r7.x
-            imad r3.z, r3.z, cb0[2].z, r7.x
-            imad r3.z, r3.w, cb0[2].w, r3.z
-            ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r3.zw, r3.z, l(0), u0.xxxy
-            mov r7.xy, r3.zwzz
-            dtof r6.x, r7.xyxy
-          else 
-            ieq r3.z, cb0[0].w, l(2)
-            if_nz r3.z
-              movc r3.z, cb0[7].x, l(-0.000000), l(0.500000)
-              add r3.zw, r0.wwwz, r3.zzzz
-              lt r7.xy, r2.ywyy, r0.yyyy
-              add r7.zw, r0.yyyy, -r2.yyyw
-              div r8.xy, r7.zwzz, r3.zwzz
-              ftou r8.xy, r8.xyxx
-              utof r8.zw, r8.xxxy
-              mad r7.zw, -r8.zzzw, r3.zzzw, r7.zzzw
-              and r8.xy, r8.xyxx, l(1, 1, 0, 0)
-              add r8.zw, r0.yyyy, r7.zzzw
-              add r7.zw, r0.wwwz, -r7.zzzw
-              movc r7.zw, r8.xxxy, r7.zzzw, r8.zzzw
-              lt r8.xy, r0.wzww, r2.ywyy
-              add r8.zw, -r0.wwwz, r2.yyyw
-              div r9.xy, r8.zwzz, r3.zwzz
-              ftou r9.xy, r9.xyxx
-              utof r9.zw, r9.xxxy
-              mad r3.zw, -r9.zzzw, r3.zzzw, r8.zzzw
-              and r8.zw, r9.xxxy, l(0, 0, 1, 1)
-              add r9.xy, r0.wzww, -r3.zwzz
-              add r3.zw, r0.yyyy, r3.zzzw
-              movc r3.zw, r8.zzzw, r3.zzzw, r9.xxxy
-              movc r3.zw, r8.xxxy, r3.zzzw, r2.yyyw
-              movc r3.zw, r7.xxxy, r7.zzzw, r3.zzzw
-              ftou r3.zw, r3.zzzw
-              ftou r7.xy, r3.xyxx
-              imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-              iadd r7.x, r7.y, r7.x
-              imad r3.z, r3.z, cb0[2].z, r7.x
-              imad r3.z, r3.w, cb0[2].w, r3.z
-              ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r3.zw, r3.z, l(0), u0.xxxy
-              mov r7.xy, r3.zwzz
-              dtof r6.x, r7.xyxy
-            else 
-              mov r6.x, l(0)
-            endif 
-          endif 
-        endif 
-        if_z cb0[0].w
-          ge r7.xyzw, r2.xyzw, l(0.000000, 0.000000, 0.000000, 0.000000)
-          ftou r8.xyzw, r2.xyzw
-          ult r8.xyzw, r8.xyzw, cb0[1].wzww
-          and r7.xzw, r7.zzwx, r8.zzwx
-          and r3.z, r7.y, r7.w
-          and r3.z, r8.y, r3.z
-          ftou r8.xy, r3.xyxx
-          ftou r8.zw, r2.yyyx
-          imul null, r8.xy, r8.xyxx, cb0[2].xyxx
-          iadd r3.w, r8.y, r8.x
-          imad r3.w, r8.z, cb0[2].z, r3.w
-          imad r3.w, r8.w, cb0[2].w, r3.w
-          ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r8.xy, r3.w, l(0), u0.xyxx
-          mov r8.zw, r8.xxxy
-          dtof r3.w, r8.zwzw
-          and r8.x, r3.w, r3.z
-          ge r3.zw, r1.xxxy, l(0.000000, 0.000000, 0.000000, 0.000000)
-          and r7.xyz, r3.zzzz, r7.xzwx
-          ftou r9.xy, r1.xyxx
-          ult r9.xy, r9.xyxx, cb0[1].zwzz
-          and r7.xyz, r7.xyzx, r9.xxxx
-          ftou r9.zw, r3.xxxy
-          ftou r7.w, r1.x
-          ftou r10.x, r2.z
-          imul null, r9.zw, r9.zzzw, cb0[2].xxxy
-          iadd r9.z, r9.w, r9.z
-          imad r7.w, r7.w, cb0[2].z, r9.z
-          imad r7.w, r10.x, cb0[2].w, r7.w
-          ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r9.zw, r7.w, l(0), u0.xxxy
-          mov r10.xy, r9.zwzz
-          dtof r7.w, r10.xyxy
-          and r5.z, r7.w, r7.x
-          and r3.w, r3.w, r9.y
-          and r3.z, r3.z, r3.w
-          and r3.z, r9.x, r3.z
-          ftou r7.xw, r3.xxxy
-          ftou r9.xy, r1.xyxx
-          imul null, r7.xw, r7.xxxw, cb0[2].xxxy
-          iadd r3.w, r7.w, r7.x
-          imad r3.w, r9.x, cb0[2].z, r3.w
-          imad r3.w, r9.y, cb0[2].w, r3.w
-          ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r7.xw, r3.w, l(0), u0.xxxy
-          mov r9.xy, r7.xwxx
-          dtof r3.w, r9.xyxy
-          and r4.z, r3.w, r3.z
-          ftou r3.zw, r3.xxxy
-          ftou r7.x, r1.x
-          ftou r7.w, r2.w
-          imul null, r3.zw, r3.zzzw, cb0[2].xxxy
-          iadd r3.z, r3.w, r3.z
-          imad r3.z, r7.x, cb0[2].z, r3.z
-          imad r3.z, r7.w, cb0[2].w, r3.z
-          ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r3.zw, r3.z, l(0), u0.xxxy
-          mov r9.xy, r3.zwzz
-          dtof r3.z, r9.xyxy
-          and r6.z, r3.z, r7.y
-          ftou r3.zw, r3.xxxy
-          ftou r7.x, r1.x
-          ftou r7.y, r2.x
-          imul null, r3.zw, r3.zzzw, cb0[2].xxxy
-          iadd r3.z, r3.w, r3.z
-          imad r3.z, r7.x, cb0[2].z, r3.z
-          imad r3.z, r7.y, cb0[2].w, r3.z
-          ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r3.zw, r3.z, l(0), u0.xxxy
-          mov r7.xy, r3.zwzz
-          dtof r3.z, r7.xyxy
-          and r8.z, r3.z, r7.z
-        else 
-          ieq r3.z, cb0[0].w, l(1)
-          if_nz r3.z
-            iadd r7.xy, cb0[1].zwzz, l(-1, -1, 0, 0)
-            utof r7.xy, r7.xyxx
-            max r7.zw, r2.yyyx, l(0.000000, 0.000000, 0.000000, 0.000000)
-            min r7.xy, r7.xyxx, r7.zwzz
-            ftou r7.xy, r7.xyxx
-            ftou r7.zw, r3.xxxy
-            imul null, r7.zw, r7.zzzw, cb0[2].xxxy
-            iadd r3.w, r7.w, r7.z
-            imad r3.w, r7.x, cb0[2].z, r3.w
-            imad r3.w, r7.y, cb0[2].w, r3.w
-            ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r7.xy, r3.w, l(0), u0.xyxx
-            mov r7.zw, r7.xxxy
-            dtof r8.x, r7.zwzw
-          else 
-            ieq r3.w, cb0[0].w, l(2)
-            if_nz r3.w
-              movc r3.w, cb0[7].x, l(-0.000000), l(0.500000)
-              add r7.xy, r0.wzww, r3.wwww
-              lt r7.zw, r2.yyyx, r0.yyyy
-              add r9.xy, r0.yyyy, -r2.yxyy
-              div r9.zw, r9.xxxy, r7.xxxy
-              ftou r9.zw, r9.zzzw
-              utof r10.xy, r9.zwzz
-              mad r9.xy, -r10.xyxx, r7.xyxx, r9.xyxx
-              and r9.zw, r9.zzzw, l(0, 0, 1, 1)
-              add r10.xy, r0.yyyy, r9.xyxx
-              add r9.xy, r0.wzww, -r9.xyxx
-              movc r9.xy, r9.zwzz, r9.xyxx, r10.xyxx
-              lt r9.zw, r0.wwwz, r2.yyyx
-              add r10.xy, -r0.wzww, r2.yxyy
-              div r10.zw, r10.xxxy, r7.xxxy
-              ftou r10.zw, r10.zzzw
-              utof r11.xy, r10.zwzz
-              mad r7.xy, -r11.xyxx, r7.xyxx, r10.xyxx
-              and r10.xy, r10.zwzz, l(1, 1, 0, 0)
-              add r10.zw, r0.wwwz, -r7.xxxy
-              add r7.xy, r0.yyyy, r7.xyxx
-              movc r7.xy, r10.xyxx, r7.xyxx, r10.zwzz
-              movc r7.xy, r9.zwzz, r7.xyxx, r2.yxyy
-              movc r7.xy, r7.zwzz, r9.xyxx, r7.xyxx
-              ftou r7.xy, r7.xyxx
-              ftou r7.zw, r3.xxxy
-              imul null, r7.zw, r7.zzzw, cb0[2].xxxy
-              iadd r2.y, r7.w, r7.z
-              imad r2.y, r7.x, cb0[2].z, r2.y
-              imad r2.y, r7.y, cb0[2].w, r2.y
-              ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r7.xy, r2.y, l(0), u0.xyxx
-              mov r7.zw, r7.xxxy
-              dtof r8.x, r7.zwzw
-            else 
-              mov r8.x, l(0)
-            endif 
-          endif 
-          if_nz r3.z
-            iadd r7.xy, cb0[1].wzww, l(-1, -1, 0, 0)
-            utof r7.xy, r7.xyxx
-            max r2.y, r2.z, l(0.000000)
-            min r2.y, r7.x, r2.y
-            ftou r2.y, r2.y
-            max r3.w, r1.x, l(0.000000)
-            min r3.w, r7.y, r3.w
-            ftou r3.w, r3.w
-            utof r7.z, r3.w
-            utof r7.w, r2.y
-            ftou r7.xy, r3.xyxx
-            ftou r7.zw, r7.zzzw
-            imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-            iadd r2.y, r7.y, r7.x
-            imad r2.y, r7.z, cb0[2].z, r2.y
-            imad r2.y, r7.w, cb0[2].w, r2.y
-            ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r7.xy, r2.y, l(0), u0.xyxx
-            mov r7.zw, r7.xxxy
-            dtof r5.z, r7.zwzw
-          else 
-            ieq r2.y, cb0[0].w, l(2)
-            if_nz r2.y
-              movc r2.y, cb0[7].x, l(-0.000000), l(0.500000)
-              add r7.xy, r0.zwzz, r2.yyyy
-              lt r2.y, r2.z, r0.y
-              add r3.w, r0.y, -r2.z
-              div r7.z, r3.w, r7.x
-              ftou r7.z, r7.z
-              utof r7.w, r7.z
-              mad r3.w, -r7.w, r7.x, r3.w
-              and r7.z, r7.z, l(1)
-              add r7.w, r0.y, r3.w
-              add r3.w, r0.z, -r3.w
-              movc r3.w, r7.z, r3.w, r7.w
-              lt r7.z, r0.z, r2.z
-              add r7.w, -r0.z, r2.z
-              div r9.x, r7.w, r7.x
-              ftou r9.x, r9.x
-              utof r9.y, r9.x
-              mad r7.x, -r9.y, r7.x, r7.w
-              and r7.w, r9.x, l(1)
-              add r9.x, r0.z, -r7.x
-              add r7.x, r0.y, r7.x
-              movc r7.x, r7.w, r7.x, r9.x
-              movc r7.x, r7.z, r7.x, r2.z
-              movc r2.y, r2.y, r3.w, r7.x
-              ftou r2.y, r2.y
-              lt r3.w, r1.x, r0.y
-              add r7.x, r0.y, -r1.x
-              div r7.z, r7.x, r7.y
-              ftou r7.z, r7.z
-              utof r7.w, r7.z
-              mad r7.x, -r7.w, r7.y, r7.x
-              and r7.z, r7.z, l(1)
-              add r7.w, r0.y, r7.x
-              add r7.x, r0.w, -r7.x
-              movc r7.x, r7.z, r7.x, r7.w
-              lt r7.z, r0.w, r1.x
-              add r7.w, -r0.w, r1.x
-              div r9.x, r7.w, r7.y
-              ftou r9.x, r9.x
-              utof r9.y, r9.x
-              mad r7.y, -r9.y, r7.y, r7.w
-              and r7.w, r9.x, l(1)
-              add r9.x, r0.w, -r7.y
-              add r7.y, r0.y, r7.y
-              movc r7.y, r7.w, r7.y, r9.x
-              movc r7.y, r7.z, r7.y, r1.x
-              movc r3.w, r3.w, r7.x, r7.y
-              ftou r3.w, r3.w
-              utof r7.z, r3.w
-              utof r7.w, r2.y
-              ftou r7.xy, r3.xyxx
-              ftou r7.zw, r7.zzzw
-              imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-              iadd r2.y, r7.y, r7.x
-              imad r2.y, r7.z, cb0[2].z, r2.y
-              imad r2.y, r7.w, cb0[2].w, r2.y
-              ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r7.xy, r2.y, l(0), u0.xyxx
-              mov r7.zw, r7.xxxy
-              dtof r5.z, r7.zwzw
-            else 
-              mov r5.z, l(0)
-            endif 
-          endif 
-          if_nz r3.z
-            iadd r7.xy, cb0[1].zwzz, l(-1, -1, 0, 0)
-            utof r7.xy, r7.xyxx
-            max r7.zw, r1.xxxy, l(0.000000, 0.000000, 0.000000, 0.000000)
-            min r7.xy, r7.xyxx, r7.zwzz
-            ftou r7.xy, r7.xyxx
-            ftou r7.zw, r3.xxxy
-            imul null, r7.zw, r7.zzzw, cb0[2].xxxy
-            iadd r2.y, r7.w, r7.z
-            imad r2.y, r7.x, cb0[2].z, r2.y
-            imad r2.y, r7.y, cb0[2].w, r2.y
-            ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r7.xy, r2.y, l(0), u0.xyxx
-            mov r7.zw, r7.xxxy
-            dtof r4.z, r7.zwzw
-          else 
-            ieq r2.y, cb0[0].w, l(2)
-            if_nz r2.y
-              movc r2.y, cb0[7].x, l(-0.000000), l(0.500000)
-              add r7.xy, r0.wzww, r2.yyyy
-              lt r7.zw, r1.xxxy, r0.yyyy
-              add r9.xy, r0.yyyy, -r1.xyxx
-              div r9.zw, r9.xxxy, r7.xxxy
-              ftou r9.zw, r9.zzzw
-              utof r10.xy, r9.zwzz
-              mad r9.xy, -r10.xyxx, r7.xyxx, r9.xyxx
-              and r9.zw, r9.zzzw, l(0, 0, 1, 1)
-              add r10.xy, r0.yyyy, r9.xyxx
-              add r9.xy, r0.wzww, -r9.xyxx
-              movc r9.xy, r9.zwzz, r9.xyxx, r10.xyxx
-              lt r9.zw, r0.wwwz, r1.xxxy
-              add r10.xy, -r0.wzww, r1.xyxx
-              div r10.zw, r10.xxxy, r7.xxxy
-              ftou r10.zw, r10.zzzw
-              utof r11.xy, r10.zwzz
-              mad r7.xy, -r11.xyxx, r7.xyxx, r10.xyxx
-              and r10.xy, r10.zwzz, l(1, 1, 0, 0)
-              add r10.zw, r0.wwwz, -r7.xxxy
-              add r7.xy, r0.yyyy, r7.xyxx
-              movc r7.xy, r10.xyxx, r7.xyxx, r10.zwzz
-              movc r7.xy, r9.zwzz, r7.xyxx, r1.xyxx
-              movc r7.xy, r7.zwzz, r9.xyxx, r7.xyxx
-              ftou r7.xy, r7.xyxx
-              ftou r7.zw, r3.xxxy
-              imul null, r7.zw, r7.zzzw, cb0[2].xxxy
-              iadd r2.y, r7.w, r7.z
-              imad r2.y, r7.x, cb0[2].z, r2.y
-              imad r2.y, r7.y, cb0[2].w, r2.y
-              ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r7.xy, r2.y, l(0), u0.xyxx
-              mov r7.zw, r7.xxxy
-              dtof r4.z, r7.zwzw
-            else 
-              mov r4.z, l(0)
-            endif 
-          endif 
-          if_nz r3.z
-            iadd r7.xy, cb0[1].wzww, l(-1, -1, 0, 0)
-            utof r7.xy, r7.xyxx
-            max r2.y, r2.w, l(0.000000)
-            min r2.y, r7.x, r2.y
-            ftou r2.y, r2.y
-            max r3.w, r1.x, l(0.000000)
-            min r3.w, r7.y, r3.w
-            ftou r3.w, r3.w
-            utof r7.z, r3.w
-            utof r7.w, r2.y
-            ftou r7.xy, r3.xyxx
-            ftou r7.zw, r7.zzzw
-            imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-            iadd r2.y, r7.y, r7.x
-            imad r2.y, r7.z, cb0[2].z, r2.y
-            imad r2.y, r7.w, cb0[2].w, r2.y
-            ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r7.xy, r2.y, l(0), u0.xyxx
-            mov r7.zw, r7.xxxy
-            dtof r6.z, r7.zwzw
-          else 
-            ieq r2.y, cb0[0].w, l(2)
-            if_nz r2.y
-              movc r2.y, cb0[7].x, l(-0.000000), l(0.500000)
-              add r7.xy, r0.zwzz, r2.yyyy
-              lt r2.y, r2.w, r0.y
-              add r3.w, r0.y, -r2.w
-              div r7.z, r3.w, r7.x
-              ftou r7.z, r7.z
-              utof r7.w, r7.z
-              mad r3.w, -r7.w, r7.x, r3.w
-              and r7.z, r7.z, l(1)
-              add r7.w, r0.y, r3.w
-              add r3.w, r0.z, -r3.w
-              movc r3.w, r7.z, r3.w, r7.w
-              lt r7.z, r0.z, r2.w
-              add r7.w, -r0.z, r2.w
-              div r9.x, r7.w, r7.x
-              ftou r9.x, r9.x
-              utof r9.y, r9.x
-              mad r7.x, -r9.y, r7.x, r7.w
-              and r7.w, r9.x, l(1)
-              add r9.x, r0.z, -r7.x
-              add r7.x, r0.y, r7.x
-              movc r7.x, r7.w, r7.x, r9.x
-              movc r7.x, r7.z, r7.x, r2.w
-              movc r2.y, r2.y, r3.w, r7.x
-              ftou r2.y, r2.y
-              lt r3.w, r1.x, r0.y
-              add r7.x, r0.y, -r1.x
-              div r7.z, r7.x, r7.y
-              ftou r7.z, r7.z
-              utof r7.w, r7.z
-              mad r7.x, -r7.w, r7.y, r7.x
-              and r7.z, r7.z, l(1)
-              add r7.w, r0.y, r7.x
-              add r7.x, r0.w, -r7.x
-              movc r7.x, r7.z, r7.x, r7.w
-              lt r7.z, r0.w, r1.x
-              add r7.w, -r0.w, r1.x
-              div r9.x, r7.w, r7.y
-              ftou r9.x, r9.x
-              utof r9.y, r9.x
-              mad r7.y, -r9.y, r7.y, r7.w
-              and r7.w, r9.x, l(1)
-              add r9.x, r0.w, -r7.y
-              add r7.y, r0.y, r7.y
-              movc r7.y, r7.w, r7.y, r9.x
-              movc r7.y, r7.z, r7.y, r1.x
-              movc r3.w, r3.w, r7.x, r7.y
-              ftou r3.w, r3.w
-              utof r7.z, r3.w
-              utof r7.w, r2.y
-              ftou r7.xy, r3.xyxx
-              ftou r7.zw, r7.zzzw
-              imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-              iadd r2.y, r7.y, r7.x
-              imad r2.y, r7.z, cb0[2].z, r2.y
-              imad r2.y, r7.w, cb0[2].w, r2.y
-              ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r7.xy, r2.y, l(0), u0.xyxx
-              mov r7.zw, r7.xxxy
-              dtof r6.z, r7.zwzw
-            else 
-              mov r6.z, l(0)
-            endif 
-          endif 
-          if_nz r3.z
-            iadd r3.zw, cb0[1].wwwz, l(0, 0, -1, -1)
-            utof r3.zw, r3.zzzw
-            max r2.y, r2.x, l(0.000000)
-            min r2.y, r3.z, r2.y
-            ftou r2.y, r2.y
-            max r3.z, r1.x, l(0.000000)
-            min r3.z, r3.w, r3.z
-            ftou r3.z, r3.z
-            utof r3.z, r3.z
-            utof r3.w, r2.y
-            ftou r7.xy, r3.xyxx
-            ftou r3.zw, r3.zzzw
-            imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-            iadd r2.y, r7.y, r7.x
-            imad r2.y, r3.z, cb0[2].z, r2.y
-            imad r2.y, r3.w, cb0[2].w, r2.y
-            ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r3.zw, r2.y, l(0), u0.xxxy
-            mov r7.xy, r3.zwzz
-            dtof r8.z, r7.xyxy
-          else 
-            ieq r2.y, cb0[0].w, l(2)
-            if_nz r2.y
-              movc r2.y, cb0[7].x, l(-0.000000), l(0.500000)
-              add r3.zw, r0.zzzw, r2.yyyy
-              lt r2.y, r2.x, r0.y
-              add r7.x, r0.y, -r2.x
-              div r7.y, r7.x, r3.z
-              ftou r7.y, r7.y
-              utof r7.z, r7.y
-              mad r7.x, -r7.z, r3.z, r7.x
-              and r7.y, r7.y, l(1)
-              add r7.z, r0.y, r7.x
-              add r7.x, r0.z, -r7.x
-              movc r7.x, r7.y, r7.x, r7.z
-              lt r7.y, r0.z, r2.x
-              add r7.z, -r0.z, r2.x
-              div r7.w, r7.z, r3.z
-              ftou r7.w, r7.w
-              utof r9.x, r7.w
-              mad r3.z, -r9.x, r3.z, r7.z
-              and r7.z, r7.w, l(1)
-              add r7.w, r0.z, -r3.z
-              add r3.z, r0.y, r3.z
-              movc r3.z, r7.z, r3.z, r7.w
-              movc r3.z, r7.y, r3.z, r2.x
-              movc r2.y, r2.y, r7.x, r3.z
-              ftou r2.y, r2.y
-              lt r3.z, r1.x, r0.y
-              add r7.x, r0.y, -r1.x
-              div r7.y, r7.x, r3.w
-              ftou r7.y, r7.y
-              utof r7.z, r7.y
-              mad r7.x, -r7.z, r3.w, r7.x
-              and r7.y, r7.y, l(1)
-              add r7.z, r0.y, r7.x
-              add r7.x, r0.w, -r7.x
-              movc r7.x, r7.y, r7.x, r7.z
-              lt r7.y, r0.w, r1.x
-              add r7.z, -r0.w, r1.x
-              div r7.w, r7.z, r3.w
-              ftou r7.w, r7.w
-              utof r9.x, r7.w
-              mad r3.w, -r9.x, r3.w, r7.z
-              and r7.z, r7.w, l(1)
-              add r7.w, r0.w, -r3.w
-              add r3.w, r0.y, r3.w
-              movc r3.w, r7.z, r3.w, r7.w
-              movc r3.w, r7.y, r3.w, r1.x
-              movc r3.z, r3.z, r7.x, r3.w
-              ftou r3.z, r3.z
-              utof r3.z, r3.z
-              utof r3.w, r2.y
-              ftou r7.xy, r3.xyxx
-              ftou r3.zw, r3.zzzw
-              imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-              iadd r2.y, r7.y, r7.x
-              imad r2.y, r3.z, cb0[2].z, r2.y
-              imad r2.y, r3.w, cb0[2].w, r2.y
-              ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r3.zw, r2.y, l(0), u0.xxxy
-              mov r7.xy, r3.zwzz
-              dtof r8.z, r7.xyxy
-            else 
-              mov r8.z, l(0)
-            endif 
-          endif 
-        endif 
-        add r3.zw, r1.xxxx, l(0.000000, 0.000000, 1.000000, 2.000000)
-        if_z cb0[0].w
-          ge r7.xyz, r2.zwxz, l(0.000000, 0.000000, 0.000000, 0.000000)
-          ftou r9.xyz, r2.zwxz
-          ult r9.xyz, r9.xyzx, cb0[1].wwww
-          and r7.xyz, r7.xyzx, r9.xyzx
-          ge r1.x, r3.z, l(0.000000)
-          and r7.xyz, r1.xxxx, r7.xyzx
-          ftou r2.y, r3.z
-          ult r2.y, r2.y, cb0[1].z
-          and r7.xyz, r2.yyyy, r7.xyzx
-          ftou r9.xy, r3.xyxx
-          ftou r7.w, r3.z
-          ftou r9.z, r2.z
-          imul null, r9.xy, r9.xyxx, cb0[2].xyxx
-          iadd r9.x, r9.y, r9.x
-          imad r7.w, r7.w, cb0[2].z, r9.x
-          imad r7.w, r9.z, cb0[2].w, r7.w
-          ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r9.xy, r7.w, l(0), u0.xyxx
-          mov r9.zw, r9.xxxy
-          dtof r7.w, r9.zwzw
-          and r5.y, r7.w, r7.x
-          ge r7.x, r1.y, l(0.000000)
-          ftou r7.w, r1.y
-          ult r7.w, r7.w, cb0[1].w
-          and r7.x, r7.w, r7.x
-          and r1.x, r1.x, r7.x
-          and r1.x, r2.y, r1.x
-          ftou r7.xw, r3.xxxy
-          ftou r2.y, r3.z
-          ftou r9.x, r1.y
-          imul null, r7.xw, r7.xxxw, cb0[2].xxxy
-          iadd r7.x, r7.w, r7.x
-          imad r2.y, r2.y, cb0[2].z, r7.x
-          imad r2.y, r9.x, cb0[2].w, r2.y
-          ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r7.xw, r2.y, l(0), u0.xxxy
-          mov r9.xy, r7.xwxx
-          dtof r2.y, r9.xyxy
-          and r4.y, r1.x, r2.y
-          ftou r7.xw, r3.xxxy
-          ftou r1.x, r3.z
-          ftou r2.y, r2.w
-          imul null, r7.xw, r7.xxxw, cb0[2].xxxy
-          iadd r7.x, r7.w, r7.x
-          imad r1.x, r1.x, cb0[2].z, r7.x
-          imad r1.x, r2.y, cb0[2].w, r1.x
-          ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r7.xw, r1.x, l(0), u0.xxxy
-          mov r9.xy, r7.xwxx
-          dtof r1.x, r9.xyxy
-          and r6.y, r1.x, r7.y
-          ftou r7.xy, r3.xyxx
-          ftou r1.x, r3.z
-          ftou r2.y, r2.x
-          imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-          iadd r7.x, r7.y, r7.x
-          imad r1.x, r1.x, cb0[2].z, r7.x
-          imad r1.x, r2.y, cb0[2].w, r1.x
-          ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r7.xy, r1.x, l(0), u0.xyxx
-          mov r9.xy, r7.xyxx
-          dtof r1.x, r9.xyxy
-          and r8.y, r1.x, r7.z
-        else 
-          ieq r1.x, cb0[0].w, l(1)
-          if_nz r1.x
-            iadd r7.xy, cb0[1].wzww, l(-1, -1, 0, 0)
-            utof r7.xy, r7.xyxx
-            max r2.y, r2.z, l(0.000000)
-            min r2.y, r7.x, r2.y
-            ftou r2.y, r2.y
-            max r7.x, r3.z, l(0.000000)
-            min r7.x, r7.y, r7.x
-            ftou r7.x, r7.x
-            utof r7.z, r7.x
-            utof r7.w, r2.y
-            ftou r7.xy, r3.xyxx
-            ftou r7.zw, r7.zzzw
-            imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-            iadd r2.y, r7.y, r7.x
-            imad r2.y, r7.z, cb0[2].z, r2.y
-            imad r2.y, r7.w, cb0[2].w, r2.y
-            ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r7.xy, r2.y, l(0), u0.xyxx
-            mov r7.zw, r7.xxxy
-            dtof r5.y, r7.zwzw
-          else 
-            ieq r2.y, cb0[0].w, l(2)
-            if_nz r2.y
-              movc r2.y, cb0[7].x, l(-0.000000), l(0.500000)
-              add r7.xy, r0.zwzz, r2.yyyy
-              lt r2.y, r2.z, r0.y
-              add r7.z, r0.y, -r2.z
-              div r7.w, r7.z, r7.x
-              ftou r7.w, r7.w
-              utof r9.x, r7.w
-              mad r7.z, -r9.x, r7.x, r7.z
-              and r7.w, r7.w, l(1)
-              add r9.x, r0.y, r7.z
-              add r7.z, r0.z, -r7.z
-              movc r7.z, r7.w, r7.z, r9.x
-              lt r7.w, r0.z, r2.z
-              add r9.x, -r0.z, r2.z
-              div r9.y, r9.x, r7.x
-              ftou r9.y, r9.y
-              utof r9.z, r9.y
-              mad r7.x, -r9.z, r7.x, r9.x
-              and r9.x, r9.y, l(1)
-              add r9.y, r0.z, -r7.x
-              add r7.x, r0.y, r7.x
-              movc r7.x, r9.x, r7.x, r9.y
-              movc r7.x, r7.w, r7.x, r2.z
-              movc r2.y, r2.y, r7.z, r7.x
-              ftou r2.y, r2.y
-              lt r7.x, r3.z, r0.y
-              add r7.z, r0.y, -r3.z
-              div r7.w, r7.z, r7.y
-              ftou r7.w, r7.w
-              utof r9.x, r7.w
-              mad r7.z, -r9.x, r7.y, r7.z
-              and r7.w, r7.w, l(1)
-              add r9.x, r0.y, r7.z
-              add r7.z, r0.w, -r7.z
-              movc r7.z, r7.w, r7.z, r9.x
-              lt r7.w, r0.w, r3.z
-              add r9.x, -r0.w, r3.z
-              div r9.y, r9.x, r7.y
-              ftou r9.y, r9.y
-              utof r9.z, r9.y
-              mad r7.y, -r9.z, r7.y, r9.x
-              and r9.x, r9.y, l(1)
-              add r9.y, r0.w, -r7.y
-              add r7.y, r0.y, r7.y
-              movc r7.y, r9.x, r7.y, r9.y
-              movc r7.y, r7.w, r7.y, r3.z
-              movc r7.x, r7.x, r7.z, r7.y
-              ftou r7.x, r7.x
-              utof r7.z, r7.x
-              utof r7.w, r2.y
-              ftou r7.xy, r3.xyxx
-              ftou r7.zw, r7.zzzw
-              imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-              iadd r2.y, r7.y, r7.x
-              imad r2.y, r7.z, cb0[2].z, r2.y
-              imad r2.y, r7.w, cb0[2].w, r2.y
-              ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r7.xy, r2.y, l(0), u0.xyxx
-              mov r7.zw, r7.xxxy
-              dtof r5.y, r7.zwzw
-            else 
-              mov r5.y, l(0)
-            endif 
-          endif 
-          if_nz r1.x
-            iadd r7.xy, cb0[1].wzww, l(-1, -1, 0, 0)
-            utof r7.xy, r7.xyxx
-            max r2.y, r1.y, l(0.000000)
-            min r2.y, r7.x, r2.y
-            ftou r2.y, r2.y
-            max r7.x, r3.z, l(0.000000)
-            min r7.x, r7.y, r7.x
-            ftou r7.x, r7.x
-            utof r7.z, r7.x
-            utof r7.w, r2.y
-            ftou r7.xy, r3.xyxx
-            ftou r7.zw, r7.zzzw
-            imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-            iadd r2.y, r7.y, r7.x
-            imad r2.y, r7.z, cb0[2].z, r2.y
-            imad r2.y, r7.w, cb0[2].w, r2.y
-            ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r7.xy, r2.y, l(0), u0.xyxx
-            mov r7.zw, r7.xxxy
-            dtof r4.y, r7.zwzw
-          else 
-            ieq r2.y, cb0[0].w, l(2)
-            if_nz r2.y
-              movc r2.y, cb0[7].x, l(-0.000000), l(0.500000)
-              add r7.xy, r0.zwzz, r2.yyyy
-              lt r2.y, r1.y, r0.y
-              add r7.z, r0.y, -r1.y
-              div r7.w, r7.z, r7.x
-              ftou r7.w, r7.w
-              utof r9.x, r7.w
-              mad r7.z, -r9.x, r7.x, r7.z
-              and r7.w, r7.w, l(1)
-              add r9.x, r0.y, r7.z
-              add r7.z, r0.z, -r7.z
-              movc r7.z, r7.w, r7.z, r9.x
-              lt r7.w, r0.z, r1.y
-              add r9.x, -r0.z, r1.y
-              div r9.y, r9.x, r7.x
-              ftou r9.y, r9.y
-              utof r9.z, r9.y
-              mad r7.x, -r9.z, r7.x, r9.x
-              and r9.x, r9.y, l(1)
-              add r9.y, r0.z, -r7.x
-              add r7.x, r0.y, r7.x
-              movc r7.x, r9.x, r7.x, r9.y
-              movc r7.x, r7.w, r7.x, r1.y
-              movc r2.y, r2.y, r7.z, r7.x
-              ftou r2.y, r2.y
-              lt r7.x, r3.z, r0.y
-              add r7.z, r0.y, -r3.z
-              div r7.w, r7.z, r7.y
-              ftou r7.w, r7.w
-              utof r9.x, r7.w
-              mad r7.z, -r9.x, r7.y, r7.z
-              and r7.w, r7.w, l(1)
-              add r9.x, r0.y, r7.z
-              add r7.z, r0.w, -r7.z
-              movc r7.z, r7.w, r7.z, r9.x
-              lt r7.w, r0.w, r3.z
-              add r9.x, -r0.w, r3.z
-              div r9.y, r9.x, r7.y
-              ftou r9.y, r9.y
-              utof r9.z, r9.y
-              mad r7.y, -r9.z, r7.y, r9.x
-              and r9.x, r9.y, l(1)
-              add r9.y, r0.w, -r7.y
-              add r7.y, r0.y, r7.y
-              movc r7.y, r9.x, r7.y, r9.y
-              movc r7.y, r7.w, r7.y, r3.z
-              movc r7.x, r7.x, r7.z, r7.y
-              ftou r7.x, r7.x
-              utof r7.z, r7.x
-              utof r7.w, r2.y
-              ftou r7.xy, r3.xyxx
-              ftou r7.zw, r7.zzzw
-              imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-              iadd r2.y, r7.y, r7.x
-              imad r2.y, r7.z, cb0[2].z, r2.y
-              imad r2.y, r7.w, cb0[2].w, r2.y
-              ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r7.xy, r2.y, l(0), u0.xyxx
-              mov r7.zw, r7.xxxy
-              dtof r4.y, r7.zwzw
-            else 
-              mov r4.y, l(0)
-            endif 
-          endif 
-          if_nz r1.x
-            iadd r7.xy, cb0[1].wzww, l(-1, -1, 0, 0)
-            utof r7.xy, r7.xyxx
-            max r2.y, r2.w, l(0.000000)
-            min r2.y, r7.x, r2.y
-            ftou r2.y, r2.y
-            max r7.x, r3.z, l(0.000000)
-            min r7.x, r7.y, r7.x
-            ftou r7.x, r7.x
-            utof r7.z, r7.x
-            utof r7.w, r2.y
-            ftou r7.xy, r3.xyxx
-            ftou r7.zw, r7.zzzw
-            imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-            iadd r2.y, r7.y, r7.x
-            imad r2.y, r7.z, cb0[2].z, r2.y
-            imad r2.y, r7.w, cb0[2].w, r2.y
-            ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r7.xy, r2.y, l(0), u0.xyxx
-            mov r7.zw, r7.xxxy
-            dtof r6.y, r7.zwzw
-          else 
-            ieq r2.y, cb0[0].w, l(2)
-            if_nz r2.y
-              movc r2.y, cb0[7].x, l(-0.000000), l(0.500000)
-              add r7.xy, r0.zwzz, r2.yyyy
-              lt r2.y, r2.w, r0.y
-              add r7.z, r0.y, -r2.w
-              div r7.w, r7.z, r7.x
-              ftou r7.w, r7.w
-              utof r9.x, r7.w
-              mad r7.z, -r9.x, r7.x, r7.z
-              and r7.w, r7.w, l(1)
-              add r9.x, r0.y, r7.z
-              add r7.z, r0.z, -r7.z
-              movc r7.z, r7.w, r7.z, r9.x
-              lt r7.w, r0.z, r2.w
-              add r9.x, -r0.z, r2.w
-              div r9.y, r9.x, r7.x
-              ftou r9.y, r9.y
-              utof r9.z, r9.y
-              mad r7.x, -r9.z, r7.x, r9.x
-              and r9.x, r9.y, l(1)
-              add r9.y, r0.z, -r7.x
-              add r7.x, r0.y, r7.x
-              movc r7.x, r9.x, r7.x, r9.y
-              movc r7.x, r7.w, r7.x, r2.w
-              movc r2.y, r2.y, r7.z, r7.x
-              ftou r2.y, r2.y
-              lt r7.x, r3.z, r0.y
-              add r7.z, r0.y, -r3.z
-              div r7.w, r7.z, r7.y
-              ftou r7.w, r7.w
-              utof r9.x, r7.w
-              mad r7.z, -r9.x, r7.y, r7.z
-              and r7.w, r7.w, l(1)
-              add r9.x, r0.y, r7.z
-              add r7.z, r0.w, -r7.z
-              movc r7.z, r7.w, r7.z, r9.x
-              lt r7.w, r0.w, r3.z
-              add r9.x, -r0.w, r3.z
-              div r9.y, r9.x, r7.y
-              ftou r9.y, r9.y
-              utof r9.z, r9.y
-              mad r7.y, -r9.z, r7.y, r9.x
-              and r9.x, r9.y, l(1)
-              add r9.y, r0.w, -r7.y
-              add r7.y, r0.y, r7.y
-              movc r7.y, r9.x, r7.y, r9.y
-              movc r7.y, r7.w, r7.y, r3.z
-              movc r7.x, r7.x, r7.z, r7.y
-              ftou r7.x, r7.x
-              utof r7.z, r7.x
-              utof r7.w, r2.y
-              ftou r7.xy, r3.xyxx
-              ftou r7.zw, r7.zzzw
-              imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-              iadd r2.y, r7.y, r7.x
-              imad r2.y, r7.z, cb0[2].z, r2.y
-              imad r2.y, r7.w, cb0[2].w, r2.y
-              ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r7.xy, r2.y, l(0), u0.xyxx
-              mov r7.zw, r7.xxxy
-              dtof r6.y, r7.zwzw
-            else 
-              mov r6.y, l(0)
-            endif 
-          endif 
-          if_nz r1.x
-            iadd r7.xy, cb0[1].wzww, l(-1, -1, 0, 0)
-            utof r7.xy, r7.xyxx
-            max r1.x, r2.x, l(0.000000)
-            min r1.x, r7.x, r1.x
-            ftou r1.x, r1.x
-            max r2.y, r3.z, l(0.000000)
-            min r2.y, r7.y, r2.y
-            ftou r2.y, r2.y
-            utof r7.z, r2.y
-            utof r7.w, r1.x
-            ftou r7.xy, r3.xyxx
-            ftou r7.zw, r7.zzzw
-            imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-            iadd r1.x, r7.y, r7.x
-            imad r1.x, r7.z, cb0[2].z, r1.x
-            imad r1.x, r7.w, cb0[2].w, r1.x
-            ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r7.xy, r1.x, l(0), u0.xyxx
-            mov r7.zw, r7.xxxy
-            dtof r8.y, r7.zwzw
-          else 
-            ieq r1.x, cb0[0].w, l(2)
-            if_nz r1.x
-              movc r1.x, cb0[7].x, l(-0.000000), l(0.500000)
-              add r7.xy, r0.zwzz, r1.xxxx
-              lt r1.x, r2.x, r0.y
-              add r2.y, r0.y, -r2.x
-              div r7.z, r2.y, r7.x
-              ftou r7.z, r7.z
-              utof r7.w, r7.z
-              mad r2.y, -r7.w, r7.x, r2.y
-              and r7.z, r7.z, l(1)
-              add r7.w, r0.y, r2.y
-              add r2.y, r0.z, -r2.y
-              movc r2.y, r7.z, r2.y, r7.w
-              lt r7.z, r0.z, r2.x
-              add r7.w, -r0.z, r2.x
-              div r9.x, r7.w, r7.x
-              ftou r9.x, r9.x
-              utof r9.y, r9.x
-              mad r7.x, -r9.y, r7.x, r7.w
-              and r7.w, r9.x, l(1)
-              add r9.x, r0.z, -r7.x
-              add r7.x, r0.y, r7.x
-              movc r7.x, r7.w, r7.x, r9.x
-              movc r7.x, r7.z, r7.x, r2.x
-              movc r1.x, r1.x, r2.y, r7.x
-              ftou r1.x, r1.x
-              lt r2.y, r3.z, r0.y
-              add r7.x, r0.y, -r3.z
-              div r7.z, r7.x, r7.y
-              ftou r7.z, r7.z
-              utof r7.w, r7.z
-              mad r7.x, -r7.w, r7.y, r7.x
-              and r7.z, r7.z, l(1)
-              add r7.w, r0.y, r7.x
-              add r7.x, r0.w, -r7.x
-              movc r7.x, r7.z, r7.x, r7.w
-              lt r7.z, r0.w, r3.z
-              add r7.w, -r0.w, r3.z
-              div r9.x, r7.w, r7.y
-              ftou r9.x, r9.x
-              utof r9.y, r9.x
-              mad r7.y, -r9.y, r7.y, r7.w
-              and r7.w, r9.x, l(1)
-              add r9.x, r0.w, -r7.y
-              add r7.y, r0.y, r7.y
-              movc r7.y, r7.w, r7.y, r9.x
-              movc r3.z, r7.z, r7.y, r3.z
-              movc r2.y, r2.y, r7.x, r3.z
-              ftou r2.y, r2.y
-              utof r7.z, r2.y
-              utof r7.w, r1.x
-              ftou r7.xy, r3.xyxx
-              ftou r7.zw, r7.zzzw
-              imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-              iadd r1.x, r7.y, r7.x
-              imad r1.x, r7.z, cb0[2].z, r1.x
-              imad r1.x, r7.w, cb0[2].w, r1.x
-              ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r7.xy, r1.x, l(0), u0.xyxx
-              mov r7.zw, r7.xxxy
-              dtof r8.y, r7.zwzw
-            else 
-              mov r8.y, l(0)
-            endif 
-          endif 
-        endif 
-        if_z cb0[0].w
-          ge r7.xyz, r2.zwxz, l(0.000000, 0.000000, 0.000000, 0.000000)
-          ftou r9.xyz, r2.zwxz
-          ult r9.xyz, r9.xyzx, cb0[1].wwww
-          and r7.xyz, r7.xyzx, r9.xyzx
-          ge r1.x, r3.w, l(0.000000)
-          and r7.xyz, r1.xxxx, r7.xyzx
-          ftou r2.y, r3.w
-          ult r2.y, r2.y, cb0[1].z
-          and r7.xyz, r2.yyyy, r7.xyzx
-          ftou r9.xy, r3.xyxx
-          ftou r3.z, r3.w
-          ftou r7.w, r2.z
-          imul null, r9.xy, r9.xyxx, cb0[2].xyxx
-          iadd r9.x, r9.y, r9.x
-          imad r3.z, r3.z, cb0[2].z, r9.x
-          imad r3.z, r7.w, cb0[2].w, r3.z
-          ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r9.xy, r3.z, l(0), u0.xyxx
-          mov r9.zw, r9.xxxy
-          dtof r3.z, r9.zwzw
-          and r5.w, r3.z, r7.x
-          ge r3.z, r1.y, l(0.000000)
-          ftou r7.x, r1.y
-          ult r7.x, r7.x, cb0[1].w
-          and r3.z, r3.z, r7.x
-          and r1.x, r1.x, r3.z
-          and r1.x, r2.y, r1.x
-          ftou r7.xw, r3.xxxy
-          ftou r2.y, r3.w
-          ftou r3.z, r1.y
-          imul null, r7.xw, r7.xxxw, cb0[2].xxxy
-          iadd r7.x, r7.w, r7.x
-          imad r2.y, r2.y, cb0[2].z, r7.x
-          imad r2.y, r3.z, cb0[2].w, r2.y
-          ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r7.xw, r2.y, l(0), u0.xxxy
-          mov r9.xy, r7.xwxx
-          dtof r2.y, r9.xyxy
-          and r4.w, r1.x, r2.y
-          ftou r7.xw, r3.xxxy
-          ftou r1.x, r3.w
-          ftou r2.y, r2.w
-          imul null, r7.xw, r7.xxxw, cb0[2].xxxy
-          iadd r3.z, r7.w, r7.x
-          imad r1.x, r1.x, cb0[2].z, r3.z
-          imad r1.x, r2.y, cb0[2].w, r1.x
-          ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r7.xw, r1.x, l(0), u0.xxxy
-          mov r9.xy, r7.xwxx
-          dtof r1.x, r9.xyxy
-          and r6.w, r1.x, r7.y
-          ftou r7.xy, r3.xyxx
-          ftou r1.x, r3.w
-          ftou r2.y, r2.x
-          imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-          iadd r3.z, r7.y, r7.x
-          imad r1.x, r1.x, cb0[2].z, r3.z
-          imad r1.x, r2.y, cb0[2].w, r1.x
-          ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r7.xy, r1.x, l(0), u0.xyxx
-          mov r9.xy, r7.xyxx
-          dtof r1.x, r9.xyxy
-          and r8.w, r1.x, r7.z
-        else 
-          ieq r1.x, cb0[0].w, l(1)
-          if_nz r1.x
-            iadd r7.xy, cb0[1].wzww, l(-1, -1, 0, 0)
-            utof r7.xy, r7.xyxx
-            max r2.y, r2.z, l(0.000000)
-            min r2.y, r7.x, r2.y
-            ftou r2.y, r2.y
-            max r3.z, r3.w, l(0.000000)
-            min r3.z, r7.y, r3.z
-            ftou r3.z, r3.z
-            utof r7.z, r3.z
-            utof r7.w, r2.y
-            ftou r7.xy, r3.xyxx
-            ftou r7.zw, r7.zzzw
-            imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-            iadd r2.y, r7.y, r7.x
-            imad r2.y, r7.z, cb0[2].z, r2.y
-            imad r2.y, r7.w, cb0[2].w, r2.y
-            ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r7.xy, r2.y, l(0), u0.xyxx
-            mov r7.zw, r7.xxxy
-            dtof r5.w, r7.zwzw
-          else 
-            ieq r2.y, cb0[0].w, l(2)
-            if_nz r2.y
-              movc r2.y, cb0[7].x, l(-0.000000), l(0.500000)
-              add r7.xy, r0.zwzz, r2.yyyy
-              lt r2.y, r2.z, r0.y
-              add r3.z, r0.y, -r2.z
-              div r7.z, r3.z, r7.x
-              ftou r7.z, r7.z
-              utof r7.w, r7.z
-              mad r3.z, -r7.w, r7.x, r3.z
-              and r7.z, r7.z, l(1)
-              add r7.w, r0.y, r3.z
-              add r3.z, r0.z, -r3.z
-              movc r3.z, r7.z, r3.z, r7.w
-              lt r7.z, r0.z, r2.z
-              add r7.w, -r0.z, r2.z
-              div r9.x, r7.w, r7.x
-              ftou r9.x, r9.x
-              utof r9.y, r9.x
-              mad r7.x, -r9.y, r7.x, r7.w
-              and r7.w, r9.x, l(1)
-              add r9.x, r0.z, -r7.x
-              add r7.x, r0.y, r7.x
-              movc r7.x, r7.w, r7.x, r9.x
-              movc r2.z, r7.z, r7.x, r2.z
-              movc r2.y, r2.y, r3.z, r2.z
-              lt r2.z, r3.w, r0.y
-              add r3.z, r0.y, -r3.w
-              div r7.x, r3.z, r7.y
-              ftou r7.x, r7.x
-              utof r7.z, r7.x
-              mad r3.z, -r7.z, r7.y, r3.z
-              and r7.x, r7.x, l(1)
-              add r7.z, r0.y, r3.z
-              add r3.z, r0.w, -r3.z
-              movc r3.z, r7.x, r3.z, r7.z
-              lt r7.x, r0.w, r3.w
-              add r7.z, -r0.w, r3.w
-              div r7.w, r7.z, r7.y
-              ftou r7.w, r7.w
-              utof r9.x, r7.w
-              mad r7.y, -r9.x, r7.y, r7.z
-              and r7.z, r7.w, l(1)
-              add r7.w, r0.w, -r7.y
-              add r7.y, r0.y, r7.y
-              movc r7.y, r7.z, r7.y, r7.w
-              movc r7.x, r7.x, r7.y, r3.w
-              movc r2.z, r2.z, r3.z, r7.x
-              ftou r2.yz, r2.yyzy
-              utof r7.zw, r2.zzzy
-              ftou r2.yz, r3.xxyx
-              ftou r7.xy, r7.zwzz
-              imul null, r2.yz, r2.yyzy, cb0[2].xxyx
-              iadd r2.y, r2.z, r2.y
-              imad r2.y, r7.x, cb0[2].z, r2.y
-              imad r2.y, r7.y, cb0[2].w, r2.y
-              ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r2.yz, r2.y, l(0), u0.xxyx
-              mov r7.xy, r2.yzyy
-              dtof r5.w, r7.xyxy
-            else 
-              mov r5.w, l(0)
-            endif 
-          endif 
-          if_nz r1.x
-            iadd r2.yz, cb0[1].wwzw, l(0, -1, -1, 0)
-            utof r2.yz, r2.yyzy
-            max r3.z, r1.y, l(0.000000)
-            min r2.y, r2.y, r3.z
-            max r3.z, r3.w, l(0.000000)
-            min r2.z, r2.z, r3.z
-            ftou r2.yz, r2.yyzy
-            utof r7.zw, r2.zzzy
-            ftou r2.yz, r3.xxyx
-            ftou r7.xy, r7.zwzz
-            imul null, r2.yz, r2.yyzy, cb0[2].xxyx
-            iadd r2.y, r2.z, r2.y
-            imad r2.y, r7.x, cb0[2].z, r2.y
-            imad r2.y, r7.y, cb0[2].w, r2.y
-            ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r2.yz, r2.y, l(0), u0.xxyx
-            mov r7.xy, r2.yzyy
-            dtof r4.w, r7.xyxy
-          else 
-            ieq r2.y, cb0[0].w, l(2)
-            if_nz r2.y
-              movc r2.y, cb0[7].x, l(-0.000000), l(0.500000)
-              add r2.yz, r0.zzwz, r2.yyyy
-              lt r3.z, r1.y, r0.y
-              add r7.x, r0.y, -r1.y
-              div r7.y, r7.x, r2.y
-              ftou r7.y, r7.y
-              utof r7.z, r7.y
-              mad r7.x, -r7.z, r2.y, r7.x
-              and r7.y, r7.y, l(1)
-              add r7.z, r0.y, r7.x
-              add r7.x, r0.z, -r7.x
-              movc r7.x, r7.y, r7.x, r7.z
-              lt r7.y, r0.z, r1.y
-              add r7.z, -r0.z, r1.y
-              div r7.w, r7.z, r2.y
-              ftou r7.w, r7.w
-              utof r9.x, r7.w
-              mad r2.y, -r9.x, r2.y, r7.z
-              and r7.z, r7.w, l(1)
-              add r7.w, r0.z, -r2.y
-              add r2.y, r0.y, r2.y
-              movc r2.y, r7.z, r2.y, r7.w
-              movc r1.y, r7.y, r2.y, r1.y
-              movc r1.y, r3.z, r7.x, r1.y
-              ftou r1.y, r1.y
-              lt r2.y, r3.w, r0.y
-              add r3.z, r0.y, -r3.w
-              div r7.x, r3.z, r2.z
-              ftou r7.x, r7.x
-              utof r7.y, r7.x
-              mad r3.z, -r7.y, r2.z, r3.z
-              and r7.x, r7.x, l(1)
-              add r7.y, r0.y, r3.z
-              add r3.z, r0.w, -r3.z
-              movc r3.z, r7.x, r3.z, r7.y
-              lt r7.x, r0.w, r3.w
-              add r7.y, -r0.w, r3.w
-              div r7.z, r7.y, r2.z
-              ftou r7.z, r7.z
-              utof r7.w, r7.z
-              mad r2.z, -r7.w, r2.z, r7.y
-              and r7.y, r7.z, l(1)
-              add r7.z, r0.w, -r2.z
-              add r2.z, r0.y, r2.z
-              movc r2.z, r7.y, r2.z, r7.z
-              movc r2.z, r7.x, r2.z, r3.w
-              movc r2.y, r2.y, r3.z, r2.z
-              ftou r2.y, r2.y
-              utof r7.z, r2.y
-              utof r7.w, r1.y
-              ftou r2.yz, r3.xxyx
-              ftou r7.xy, r7.zwzz
-              imul null, r2.yz, r2.yyzy, cb0[2].xxyx
-              iadd r1.y, r2.z, r2.y
-              imad r1.y, r7.x, cb0[2].z, r1.y
-              imad r1.y, r7.y, cb0[2].w, r1.y
-              ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r2.yz, r1.y, l(0), u0.xxyx
-              mov r7.xy, r2.yzyy
-              dtof r4.w, r7.xyxy
-            else 
-              mov r4.w, l(0)
-            endif 
-          endif 
-          if_nz r1.x
-            iadd r2.yz, cb0[1].wwzw, l(0, -1, -1, 0)
-            utof r2.yz, r2.yyzy
-            max r1.y, r2.w, l(0.000000)
-            min r1.y, r2.y, r1.y
-            ftou r1.y, r1.y
-            max r2.y, r3.w, l(0.000000)
-            min r2.y, r2.z, r2.y
-            ftou r2.y, r2.y
-            utof r7.z, r2.y
-            utof r7.w, r1.y
-            ftou r2.yz, r3.xxyx
-            ftou r7.xy, r7.zwzz
-            imul null, r2.yz, r2.yyzy, cb0[2].xxyx
-            iadd r1.y, r2.z, r2.y
-            imad r1.y, r7.x, cb0[2].z, r1.y
-            imad r1.y, r7.y, cb0[2].w, r1.y
-            ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r2.yz, r1.y, l(0), u0.xxyx
-            mov r7.xy, r2.yzyy
-            dtof r6.w, r7.xyxy
-          else 
-            ieq r1.y, cb0[0].w, l(2)
-            if_nz r1.y
-              movc r1.y, cb0[7].x, l(-0.000000), l(0.500000)
-              add r2.yz, r0.zzwz, r1.yyyy
-              lt r1.y, r2.w, r0.y
-              add r3.z, r0.y, -r2.w
-              div r7.x, r3.z, r2.y
-              ftou r7.x, r7.x
-              utof r7.y, r7.x
-              mad r3.z, -r7.y, r2.y, r3.z
-              and r7.x, r7.x, l(1)
-              add r7.y, r0.y, r3.z
-              add r3.z, r0.z, -r3.z
-              movc r3.z, r7.x, r3.z, r7.y
-              lt r7.x, r0.z, r2.w
-              add r7.y, -r0.z, r2.w
-              div r7.z, r7.y, r2.y
-              ftou r7.z, r7.z
-              utof r7.w, r7.z
-              mad r2.y, -r7.w, r2.y, r7.y
-              and r7.y, r7.z, l(1)
-              add r7.z, r0.z, -r2.y
-              add r2.y, r0.y, r2.y
-              movc r2.y, r7.y, r2.y, r7.z
-              movc r2.y, r7.x, r2.y, r2.w
-              movc r1.y, r1.y, r3.z, r2.y
-              ftou r1.y, r1.y
-              lt r2.y, r3.w, r0.y
-              add r2.w, r0.y, -r3.w
-              div r3.z, r2.w, r2.z
-              ftou r3.z, r3.z
-              utof r7.x, r3.z
-              mad r2.w, -r7.x, r2.z, r2.w
-              and r3.z, r3.z, l(1)
-              add r7.x, r0.y, r2.w
-              add r2.w, r0.w, -r2.w
-              movc r2.w, r3.z, r2.w, r7.x
-              lt r3.z, r0.w, r3.w
-              add r7.x, -r0.w, r3.w
-              div r7.y, r7.x, r2.z
-              ftou r7.y, r7.y
-              utof r7.z, r7.y
-              mad r2.z, -r7.z, r2.z, r7.x
-              and r7.x, r7.y, l(1)
-              add r7.y, r0.w, -r2.z
-              add r2.z, r0.y, r2.z
-              movc r2.z, r7.x, r2.z, r7.y
-              movc r2.z, r3.z, r2.z, r3.w
-              movc r2.y, r2.y, r2.w, r2.z
-              ftou r2.y, r2.y
-              utof r2.z, r2.y
-              utof r2.w, r1.y
-              ftou r7.xy, r3.xyxx
-              ftou r2.yz, r2.zzwz
-              imul null, r7.xy, r7.xyxx, cb0[2].xyxx
-              iadd r1.y, r7.y, r7.x
-              imad r1.y, r2.y, cb0[2].z, r1.y
-              imad r1.y, r2.z, cb0[2].w, r1.y
-              ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r2.yz, r1.y, l(0), u0.xxyx
-              mov r7.xy, r2.yzyy
-              dtof r6.w, r7.xyxy
-            else 
-              mov r6.w, l(0)
-            endif 
-          endif 
-          if_nz r1.x
-            iadd r1.xy, cb0[1].wzww, l(-1, -1, 0, 0)
-            utof r1.xy, r1.xyxx
-            max r2.y, r2.x, l(0.000000)
-            min r1.x, r1.x, r2.y
-            max r2.y, r3.w, l(0.000000)
-            min r1.y, r1.y, r2.y
-            ftou r1.xy, r1.xyxx
-            utof r2.zw, r1.yyyx
-            ftou r1.xy, r3.xyxx
-            ftou r2.yz, r2.zzwz
-            imul null, r1.xy, r1.xyxx, cb0[2].xyxx
-            iadd r1.x, r1.y, r1.x
-            imad r1.x, r2.y, cb0[2].z, r1.x
-            imad r1.x, r2.z, cb0[2].w, r1.x
-            ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r1.xy, r1.x, l(0), u0.xyxx
-            mov r2.zw, r1.xxxy
-            dtof r8.w, r2.zwzw
-          else 
-            ieq r1.x, cb0[0].w, l(2)
-            if_nz r1.x
-              movc r1.x, cb0[7].x, l(-0.000000), l(0.500000)
-              add r1.xy, r0.zwzz, r1.xxxx
-              lt r2.y, r2.x, r0.y
-              add r2.z, r0.y, -r2.x
-              div r2.w, r2.z, r1.x
-              ftou r2.w, r2.w
-              utof r3.z, r2.w
-              mad r2.z, -r3.z, r1.x, r2.z
-              and r2.w, r2.w, l(1)
-              add r3.z, r0.y, r2.z
-              add r2.z, r0.z, -r2.z
-              movc r2.z, r2.w, r2.z, r3.z
-              lt r2.w, r0.z, r2.x
-              add r3.z, -r0.z, r2.x
-              div r7.x, r3.z, r1.x
-              ftou r7.x, r7.x
-              utof r7.y, r7.x
-              mad r1.x, -r7.y, r1.x, r3.z
-              and r3.z, r7.x, l(1)
-              add r0.z, r0.z, -r1.x
-              add r1.x, r0.y, r1.x
-              movc r0.z, r3.z, r1.x, r0.z
-              movc r0.z, r2.w, r0.z, r2.x
-              movc r0.z, r2.y, r2.z, r0.z
-              lt r1.x, r3.w, r0.y
-              add r2.x, r0.y, -r3.w
-              div r2.y, r2.x, r1.y
-              ftou r2.y, r2.y
-              utof r2.z, r2.y
-              mad r2.x, -r2.z, r1.y, r2.x
-              and r2.y, r2.y, l(1)
-              add r2.z, r0.y, r2.x
-              add r2.x, r0.w, -r2.x
-              movc r2.x, r2.y, r2.x, r2.z
-              lt r2.y, r0.w, r3.w
-              add r2.z, -r0.w, r3.w
-              div r2.w, r2.z, r1.y
-              ftou r2.w, r2.w
-              utof r3.z, r2.w
-              mad r1.y, -r3.z, r1.y, r2.z
-              and r2.z, r2.w, l(1)
-              add r0.w, r0.w, -r1.y
-              add r0.y, r0.y, r1.y
-              movc r0.y, r2.z, r0.y, r0.w
-              movc r0.y, r2.y, r0.y, r3.w
-              movc r0.y, r1.x, r2.x, r0.y
-              ftou r0.yz, r0.yyzy
-              utof r2.zw, r0.yyyz
-              ftou r0.yz, r3.xxyx
-              ftou r1.xy, r2.zwzz
-              imul null, r0.yz, r0.yyzy, cb0[2].xxyx
-              iadd r0.y, r0.z, r0.y
-              imad r0.y, r1.x, cb0[2].z, r0.y
-              imad r0.y, r1.y, cb0[2].w, r0.y
-              ld_structured_indexable(structured_buffer, stride=8)(mixed,mixed,mixed,mixed) r0.yz, r0.y, l(0), u0.xxyx
-              mov r1.xy, r0.yzyy
-              dtof r8.w, r1.xyxy
-            else 
-              mov r8.w, l(0)
-            endif 
-          endif 
-        endif 
-        frc r0.yz, r1.wwzw
-        mul r1.xy, r0.yzyy, r0.yzyy
-        mul r2.xw, r0.yyyz, r1.xxxy
-        dp2 r3.y, l(-0.750000, 0.750000, 0.000000, 0.000000), r5.xyxx
-        dp4 r3.z, l(1.500000, 1.500000, -2.250000, -0.750000), r5.xyzw
-        dp4 r3.w, l(-0.750000, -1.250000, 1.250000, 0.750000), r5.xyzw
-        mov r7.x, l(1.000000)
-        mov r7.y, r0.y
-        mov r7.z, r1.x
-        mov r7.w, r2.x
-        mov r3.x, r5.z
-        dp4 r3.x, r7.xyzw, r3.xyzw
-        dp2 r5.y, l(-0.750000, 0.750000, 0.000000, 0.000000), r4.xyxx
-        dp4 r5.z, l(1.500000, 1.500000, -2.250000, -0.750000), r4.xyzw
-        dp4 r5.w, l(-0.750000, -1.250000, 1.250000, 0.750000), r4.xyzw
-        mov r5.x, r4.z
-        dp4 r3.y, r7.xyzw, r5.xyzw
-        dp2 r4.y, l(-0.750000, 0.750000, 0.000000, 0.000000), r6.xyxx
-        dp4 r4.z, l(1.500000, 1.500000, -2.250000, -0.750000), r6.xyzw
-        dp4 r4.w, l(-0.750000, -1.250000, 1.250000, 0.750000), r6.xyzw
-        mov r4.x, r6.z
-        dp4 r3.z, r7.xyzw, r4.xyzw
-        dp2 r4.y, l(-0.750000, 0.750000, 0.000000, 0.000000), r8.xyxx
-        dp4 r4.z, l(1.500000, 1.500000, -2.250000, -0.750000), r8.xyzw
-        dp4 r4.w, l(-0.750000, -1.250000, 1.250000, 0.750000), r8.xyzw
-        mov r4.x, r8.z
-        dp4 r3.w, r7.xyzw, r4.xyzw
-        dp2 r4.y, l(-0.750000, 0.750000, 0.000000, 0.000000), r3.xzxx
-        dp4 r4.z, l(1.500000, -2.250000, 1.500000, -0.750000), r3.xyzw
-        dp4 r4.w, l(-0.750000, 1.250000, -1.250000, 0.750000), r3.xyzw
-        mov r2.x, l(1.000000)
-        mov r2.y, r0.z
-        mov r2.z, r1.y
-        mov r4.x, r3.y
-        dp4 r0.y, r2.xyzw, r4.xyzw
-        ftod r0.zw, r0.y
-        mov r0.yz, r0.zzwz
-        store_structured u2.xy, r0.x, l(0), r0.yzyy
-      endif 
-    endif 
-  endif 
-endif 
-ret 
-// Approximately 0 instruction slots used
-#endif
-
-const BYTE g_GridSample[] =
-{
-     68,  88,  66,  67,  12, 208, 
-     79, 147,  61,  28, 165, 133, 
-    230, 240, 184,  25, 102, 189, 
-    139, 130,   1,   0,   0,   0, 
-    188, 220,   0,   0,   4,   0, 
-      0,   0,  48,   0,   0,   0, 
-     64,   0,   0,   0,  80,   0, 
-      0,   0, 172, 220,   0,   0, 
-     73,  83,  71,  78,   8,   0, 
-      0,   0,   0,   0,   0,   0, 
-      8,   0,   0,   0,  79,  83, 
-     71,  78,   8,   0,   0,   0, 
-      0,   0,   0,   0,   8,   0, 
-      0,   0,  83,  72,  69,  88, 
-     84, 220,   0,   0,  80,   0, 
-      5,   0,  21,  55,   0,   0, 
-    106,  24,   0,   1,  89,   0, 
-      0,   4,  70, 142,  32,   0, 
-      0,   0,   0,   0,   8,   0, 
-      0,   0, 158,   0,   0,   4, 
-      0, 224,  17,   0,   0,   0, 
-      0,   0,   8,   0,   0,   0, 
-    158,   0,   0,   4,   0, 224, 
-     17,   0,   1,   0,   0,   0, 
-      4,   0,   0,   0, 158,   0, 
-      0,   4,   0, 224,  17,   0, 
-      2,   0,   0,   0,   8,   0, 
-      0,   0,  95,   0,   0,   2, 
-     18,   0,   2,   0, 104,   0, 
-      0,   2,  12,   0,   0,   0, 
-    155,   0,   0,   4,  64,   0, 
-      0,   0,   1,   0,   0,   0, 
-      1,   0,   0,   0,  30,   0, 
-      0,   7,  18,   0,  16,   0, 
-      0,   0,   0,   0,  10,   0, 
-      2,   0,  10, 128,  32,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,  79,   0,   0,   8, 
-     34,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      0,   0,   0,   0,  26, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,  31,   0, 
-      4,   3,  26,   0,  16,   0, 
-      0,   0,   0,   0,  86,   0, 
-      0,   6, 242,   0,  16,   0, 
-      1,   0,   0,   0, 182, 139, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,  10, 242,   0,  16,   0, 
-      2,   0,   0,   0,   6,   5, 
-     16,   0,   1,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-    128, 191,   0,   0,   0, 191, 
-      0,   0,   0, 191,   0,   0, 
-    128, 191,  54,   0,   0,   5, 
-     18,   0,  16,   0,   3,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5,  98,   0,  16,   0, 
-      3,   0,   0,   0,   6,   3, 
-     16,   0,   2,   0,   0,   0, 
-     54,   0,   0,   5,  18,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0, 191,  55,   0,   0,  10, 
-    226,   0,  16,   0,   0,   0, 
-      0,   0,   6, 128,  32,   0, 
-      0,   0,   0,   0,   7,   0, 
-      0,   0,   6,   9,  16,   0, 
-      3,   0,   0,   0,   6,   9, 
-     16,   0,   2,   0,   0,   0, 
-     38,   0,   0,  10,   0, 208, 
-      0,   0,  18,   0,  16,   0, 
-      1,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      5,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      5,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-     34,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  26, 128, 
-     32,   0,   0,   0,   0,   0, 
-      5,   0,   0,   0,  78,   0, 
-      0,   8,  34,   0,  16,   0, 
-      1,   0,   0,   0,   0, 208, 
-      0,   0,  10,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     35,   0,   0,  11,  18,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16, 128,  65,   0, 
-      0,   0,   1,   0,   0,   0, 
-     10, 128,  32,   0,   0,   0, 
-      0,   0,   6,   0,   0,   0, 
-     10,   0,  16,   0,   0,   0, 
-      0,   0,  78,   0,   0,   8, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,   0, 208,   0,   0, 
-     10,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  35,   0, 
-      0,  11,  18,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16, 128,  65,   0,   0,   0, 
-      1,   0,   0,   0,  26, 128, 
-     32,   0,   0,   0,   0,   0, 
-      6,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-     78,   0,   0,   9,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-      0, 208,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   5,   0,   0,   0, 
-     35,   0,   0,  11,  18,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16, 128,  65,   0, 
-      0,   0,   2,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   6,   0,   0,   0, 
-     10,   0,  16,   0,   2,   0, 
-      0,   0,  86,   0,   0,   5, 
-     18,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  86,   0, 
-      0,   5,  98,   0,  16,   0, 
-      3,   0,   0,   0,  86,   4, 
-     16,   0,   2,   0,   0,   0, 
-     86,   0,   0,   6, 242,   0, 
-     16,   0,   2,   0,   0,   0, 
-     70, 142,  32,   0,   0,   0, 
-      0,   0,   4,   0,   0,   0, 
-     16,   0,   0,   7,  18,   0, 
-     16,   0,   2,   0,   0,   0, 
-     70,   2,  16,   0,   3,   0, 
-      0,   0,  70,   2,  16,   0, 
-      2,   0,   0,   0,   0,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-     18,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0, 167,   0, 
-      0, 139,   2,  35,   0, 128, 
-    131, 153,  25,   0,  18,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   2,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,   6, 224, 
-     17,   0,   1,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  35,   0, 128, 131, 153, 
-     25,   0,  34,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 224,  17,   0, 
-      1,   0,   0,   0,  32,   0, 
-      0,   8,  18,   0,  16,   0, 
-      2,   0,   0,   0,  10, 128, 
-     32,   0,   0,   0,   0,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,  10,  98,   0, 
-     16,   0,   2,   0,   0,   0, 
-      6,   1,  16,   0,   3,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-    128,  63,   0,   0, 128,  63, 
-      0,   0,   0,   0,  56,   0, 
-      0,  10,  50,   0,  16,   0, 
-      3,   0,   0,   0, 150,   5, 
-     16,   0,   2,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-      0,  63,   0,   0,   0,  63, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,  10, 
-    194,   0,  16,   0,   3,   0, 
-      0,   0, 166,  14,  16,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-    128, 191,   0,   0, 128, 191, 
-     56,   0,   0,   7,  50,   0, 
-     16,   0,   3,   0,   0,   0, 
-    230,  10,  16,   0,   3,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  50,   0, 
-      0,  12, 194,   0,  16,   0, 
-      1,   0,   0,   0,  86,   9, 
-     16,   0,   2,   0,   0,   0, 
-    166,  14,  16,   0,   1,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0, 128, 191, 
-      0,   0, 128, 191,  56,   0, 
-      0,  10, 194,   0,  16,   0, 
-      1,   0,   0,   0, 166,  14, 
-     16,   0,   1,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,  63,   0,   0, 
-      0,  63,  55,   0,   0,   9, 
-    194,   0,  16,   0,   1,   0, 
-      0,   0,   6,   0,  16,   0, 
-      2,   0,   0,   0,   6,   4, 
-     16,   0,   3,   0,   0,   0, 
-    166,  14,  16,   0,   1,   0, 
-      0,   0,  32,   0,   0,   8, 
-     18,   0,  16,   0,   2,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,  64,   0, 
-      0,   5,  98,   0,  16,   0, 
-      2,   0,   0,   0, 166,  11, 
-     16,   0,   1,   0,   0,   0, 
-     55,   0,   0,   9, 194,   0, 
-     16,   0,   1,   0,   0,   0, 
-      6,   0,  16,   0,   2,   0, 
-      0,   0,  86,   9,  16,   0, 
-      2,   0,   0,   0, 166,  14, 
-     16,   0,   1,   0,   0,   0, 
-     49,   0,   0,   7,  98,   0, 
-     16,   0,   2,   0,   0,   0, 
-    166,  11,  16,   0,   1,   0, 
-      0,   0,  86,   5,  16,   0, 
-      0,   0,   0,   0,  49,   0, 
-      0,   7,  50,   0,  16,   0, 
-      3,   0,   0,   0, 230,  10, 
-     16,   0,   0,   0,   0,   0, 
-    230,  10,  16,   0,   1,   0, 
-      0,   0,  60,   0,   0,   7, 
-    130,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   3,   0,   0,   0, 
-     60,   0,   0,   7, 130,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,  60,   0, 
-      0,   7, 130,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,  31,   0,   4,   3, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,  32,   0,   0,   8, 
-    130,   0,  16,   0,   2,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,  31,   0, 
-      4,   3,  58,   0,  16,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,  11, 194,   0,  16,   0, 
-      3,   0,   0,   0, 246, 139, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0, 255, 255, 
-    255, 255, 255, 255, 255, 255, 
-     86,   0,   0,   5, 194,   0, 
-     16,   0,   3,   0,   0,   0, 
-    166,  14,  16,   0,   3,   0, 
-      0,   0,  52,   0,   0,  10, 
-     50,   0,  16,   0,   4,   0, 
-      0,   0, 230,  10,  16,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7, 194,   0, 
-     16,   0,   1,   0,   0,   0, 
-    166,  14,  16,   0,   3,   0, 
-      0,   0,   6,   4,  16,   0, 
-      4,   0,   0,   0,  18,   0, 
-      0,   1,  32,   0,   0,   8, 
-    130,   0,  16,   0,   2,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   1,  64,   0,   0, 
-      2,   0,   0,   0,  31,   0, 
-      4,   3,  58,   0,  16,   0, 
-      2,   0,   0,   0,  55,   0, 
-      0,  10, 130,   0,  16,   0, 
-      2,   0,   0,   0,  10, 128, 
-     32,   0,   0,   0,   0,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0, 128, 
-      1,  64,   0,   0,   0,   0, 
-      0,  63,   0,   0,   0,   7, 
-    194,   0,  16,   0,   3,   0, 
-      0,   0, 166,  14,  16,   0, 
-      0,   0,   0,   0, 246,  15, 
-     16,   0,   2,   0,   0,   0, 
-      0,   0,   0,   8,  50,   0, 
-     16,   0,   4,   0,   0,   0, 
-     86,   5,  16,   0,   0,   0, 
-      0,   0, 230,  10,  16, 128, 
-     65,   0,   0,   0,   1,   0, 
-      0,   0,  14,   0,   0,   7, 
-    194,   0,  16,   0,   4,   0, 
-      0,   0,   6,   4,  16,   0, 
-      4,   0,   0,   0, 166,  14, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   4,   0,   0,   0, 
-    166,  14,  16,   0,   4,   0, 
-      0,   0,  86,   0,   0,   5, 
-     50,   0,  16,   0,   5,   0, 
-      0,   0, 230,  10,  16,   0, 
-      4,   0,   0,   0,  50,   0, 
-      0,  10,  50,   0,  16,   0, 
-      4,   0,   0,   0,  70,   0, 
-     16, 128,  65,   0,   0,   0, 
-      5,   0,   0,   0, 230,  10, 
-     16,   0,   3,   0,   0,   0, 
-     70,   0,  16,   0,   4,   0, 
-      0,   0,   1,   0,   0,  10, 
-    194,   0,  16,   0,   4,   0, 
-      0,   0, 166,  14,  16,   0, 
-      4,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7,  50,   0, 
-     16,   0,   5,   0,   0,   0, 
-     86,   5,  16,   0,   0,   0, 
-      0,   0,  70,   0,  16,   0, 
-      4,   0,   0,   0,   0,   0, 
-      0,   8,  50,   0,  16,   0, 
-      4,   0,   0,   0, 230,  10, 
-     16,   0,   0,   0,   0,   0, 
-     70,   0,  16, 128,  65,   0, 
-      0,   0,   4,   0,   0,   0, 
-     55,   0,   0,   9,  50,   0, 
-     16,   0,   4,   0,   0,   0, 
-    230,  10,  16,   0,   4,   0, 
-      0,   0,  70,   0,  16,   0, 
-      4,   0,   0,   0,  70,   0, 
-     16,   0,   5,   0,   0,   0, 
-      0,   0,   0,   8, 194,   0, 
-     16,   0,   4,   0,   0,   0, 
-    166,  14,  16, 128,  65,   0, 
-      0,   0,   0,   0,   0,   0, 
-    166,  14,  16,   0,   1,   0, 
-      0,   0,  14,   0,   0,   7, 
-     50,   0,  16,   0,   5,   0, 
-      0,   0, 230,  10,  16,   0, 
-      4,   0,   0,   0, 230,  10, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   5,   0,   0,   0, 
-     70,   0,  16,   0,   5,   0, 
-      0,   0,  86,   0,   0,   5, 
-    194,   0,  16,   0,   5,   0, 
-      0,   0,   6,   4,  16,   0, 
-      5,   0,   0,   0,  50,   0, 
-      0,  10, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16, 128,  65,   0,   0,   0, 
-      5,   0,   0,   0, 166,  14, 
-     16,   0,   3,   0,   0,   0, 
-    166,  14,  16,   0,   4,   0, 
-      0,   0,   1,   0,   0,  10, 
-    194,   0,  16,   0,   4,   0, 
-      0,   0,   6,   4,  16,   0, 
-      5,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8,  50,   0, 
-     16,   0,   5,   0,   0,   0, 
-    230,  10,  16,   0,   0,   0, 
-      0,   0, 230,  10,  16, 128, 
-     65,   0,   0,   0,   3,   0, 
-      0,   0,   0,   0,   0,   7, 
-    194,   0,  16,   0,   3,   0, 
-      0,   0,  86,   5,  16,   0, 
-      0,   0,   0,   0, 166,  14, 
-     16,   0,   3,   0,   0,   0, 
-     55,   0,   0,   9, 194,   0, 
-     16,   0,   3,   0,   0,   0, 
-    166,  14,  16,   0,   4,   0, 
-      0,   0, 166,  14,  16,   0, 
-      3,   0,   0,   0,   6,   4, 
-     16,   0,   5,   0,   0,   0, 
-     55,   0,   0,   9,  50,   0, 
-     16,   0,   3,   0,   0,   0, 
-     70,   0,  16,   0,   3,   0, 
-      0,   0, 230,  10,  16,   0, 
-      3,   0,   0,   0, 230,  10, 
-     16,   0,   1,   0,   0,   0, 
-     55,   0,   0,   9, 194,   0, 
-     16,   0,   1,   0,   0,   0, 
-     86,   9,  16,   0,   2,   0, 
-      0,   0,   6,   4,  16,   0, 
-      4,   0,   0,   0,   6,   4, 
-     16,   0,   3,   0,   0,   0, 
-     21,   0,   0,   1,  21,   0, 
-      0,   1,  21,   0,   0,   1, 
-     86,   0,   0,   5,  50,   0, 
-     16,   0,   3,   0,   0,   0, 
-     22,   5,  16,   0,   1,   0, 
-      0,   0,  31,   0,   4,   3, 
-     10,   0,  16,   0,   2,   0, 
-      0,   0,  31,   0,   0,   4, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     29,   0,   0,  10,  50,   0, 
-     16,   0,   1,   0,   0,   0, 
-    230,  10,  16,   0,   1,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,  28,   0, 
-      0,   5, 242,   0,  16,   0, 
-      2,   0,   0,   0, 230,  11, 
-     16,   0,   1,   0,   0,   0, 
-     79,   0,   0,   8,  50,   0, 
-     16,   0,   2,   0,   0,   0, 
-     70,   0,  16,   0,   2,   0, 
-      0,   0, 182, 143,  32,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   0,   1,   0,   0,   7, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,   0,   0,   7,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,   1,   0, 
-      0,   7,  18,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   2,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-     50,   0,  16,   0,   2,   0, 
-      0,   0,  70,   0,  16,   0, 
-      2,   0,   0,   0,  70, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7,  34,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   2,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   1,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0, 167,   0, 
-      0, 139,   2,  67,   0, 128, 
-    131, 153,  25,   0,  50,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  70, 224, 
-     17,   0,   0,   0,   0,   0, 
-     54,   0,   0,   5, 194,   0, 
-     16,   0,   2,   0,   0,   0, 
-      6,   4,  16,   0,   2,   0, 
-      0,   0, 201,   0,   0,   5, 
-     34,   0,  16,   0,   1,   0, 
-      0,   0, 230,  14,  16,   0, 
-      2,   0,   0,   0,   1,   0, 
-      0,   7,  18,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  18,   0,   0,   1, 
-     32,   0,   0,   8,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,  31,   0,   4,   3, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  30,   0,   0,  11, 
-     50,   0,  16,   0,   2,   0, 
-      0,   0, 230, 138,  32,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   0,   2,  64,   0,   0, 
-    255, 255, 255, 255, 255, 255, 
-    255, 255,   0,   0,   0,   0, 
-      0,   0,   0,   0,  86,   0, 
-      0,   5,  50,   0,  16,   0, 
-      2,   0,   0,   0,  70,   0, 
-     16,   0,   2,   0,   0,   0, 
-     52,   0,   0,  10, 194,   0, 
-     16,   0,   2,   0,   0,   0, 
-    246,  11,  16,   0,   1,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,  51,   0, 
-      0,   7,  50,   0,  16,   0, 
-      2,   0,   0,   0,  70,   0, 
-     16,   0,   2,   0,   0,   0, 
-    230,  10,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   2,   0, 
-      0,   0,  70,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5, 194,   0,  16,   0, 
-      2,   0,   0,   0,   6,   4, 
-     16,   0,   3,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0, 194,   0,  16,   0, 
-      2,   0,   0,   0, 166,  14, 
-     16,   0,   2,   0,   0,   0, 
-      6, 132,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      1,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-    167,   0,   0, 139,   2,  67, 
-      0, 128, 131, 153,  25,   0, 
-     50,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     70, 224,  17,   0,   0,   0, 
-      0,   0,  54,   0,   0,   5, 
-    194,   0,  16,   0,   2,   0, 
-      0,   0,   6,   4,  16,   0, 
-      2,   0,   0,   0, 201,   0, 
-      0,   5,  18,   0,  16,   0, 
-      1,   0,   0,   0, 230,  14, 
-     16,   0,   2,   0,   0,   0, 
-     18,   0,   0,   1,  32,   0, 
-      0,   8,  34,   0,  16,   0, 
-      1,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   2,   0,   0,   0, 
-     31,   0,   4,   3,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     55,   0,   0,  10,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10, 128,  32,   0,   0,   0, 
-      0,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0, 128,   1,  64,   0,   0, 
-      0,   0,   0,  63,   0,   0, 
-      0,   7,  50,   0,  16,   0, 
-      2,   0,   0,   0, 182,  15, 
-     16,   0,   0,   0,   0,   0, 
-     86,   5,  16,   0,   1,   0, 
-      0,   0,  49,   0,   0,   7, 
-    194,   0,  16,   0,   2,   0, 
-      0,   0, 246,  11,  16,   0, 
-      1,   0,   0,   0,  86,   5, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8, 194,   0, 
-     16,   0,   3,   0,   0,   0, 
-     86,   5,  16,   0,   0,   0, 
-      0,   0, 246,  11,  16, 128, 
-     65,   0,   0,   0,   1,   0, 
-      0,   0,  14,   0,   0,   7, 
-     50,   0,  16,   0,   4,   0, 
-      0,   0, 230,  10,  16,   0, 
-      3,   0,   0,   0,  70,   0, 
-     16,   0,   2,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   4,   0,   0,   0, 
-     70,   0,  16,   0,   4,   0, 
-      0,   0,  86,   0,   0,   5, 
-    194,   0,  16,   0,   4,   0, 
-      0,   0,   6,   4,  16,   0, 
-      4,   0,   0,   0,  50,   0, 
-      0,  10, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16, 128,  65,   0,   0,   0, 
-      4,   0,   0,   0,   6,   4, 
-     16,   0,   2,   0,   0,   0, 
-    166,  14,  16,   0,   3,   0, 
-      0,   0,   1,   0,   0,  10, 
-     50,   0,  16,   0,   4,   0, 
-      0,   0,  70,   0,  16,   0, 
-      4,   0,   0,   0,   2,  64, 
-      0,   0,   1,   0,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   7, 194,   0, 
-     16,   0,   4,   0,   0,   0, 
-     86,   5,  16,   0,   0,   0, 
-      0,   0, 166,  14,  16,   0, 
-      3,   0,   0,   0,   0,   0, 
-      0,   8, 194,   0,  16,   0, 
-      3,   0,   0,   0, 246,  11, 
-     16,   0,   0,   0,   0,   0, 
-    166,  14,  16, 128,  65,   0, 
-      0,   0,   3,   0,   0,   0, 
-     55,   0,   0,   9, 194,   0, 
-     16,   0,   3,   0,   0,   0, 
-      6,   4,  16,   0,   4,   0, 
-      0,   0, 166,  14,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   4,   0,   0,   0, 
-     49,   0,   0,   7,  50,   0, 
-     16,   0,   4,   0,   0,   0, 
-    182,  15,  16,   0,   0,   0, 
-      0,   0, 182,  15,  16,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8, 194,   0,  16,   0, 
-      4,   0,   0,   0, 246,  11, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0, 246,  11, 
-     16,   0,   1,   0,   0,   0, 
-     14,   0,   0,   7,  50,   0, 
-     16,   0,   5,   0,   0,   0, 
-    230,  10,  16,   0,   4,   0, 
-      0,   0,  70,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      5,   0,   0,   0,  70,   0, 
-     16,   0,   5,   0,   0,   0, 
-     86,   0,   0,   5, 194,   0, 
-     16,   0,   5,   0,   0,   0, 
-      6,   4,  16,   0,   5,   0, 
-      0,   0,  50,   0,   0,  10, 
-     50,   0,  16,   0,   2,   0, 
-      0,   0, 230,  10,  16, 128, 
-     65,   0,   0,   0,   5,   0, 
-      0,   0,  70,   0,  16,   0, 
-      2,   0,   0,   0, 230,  10, 
-     16,   0,   4,   0,   0,   0, 
-      1,   0,   0,  10, 194,   0, 
-     16,   0,   4,   0,   0,   0, 
-      6,   4,  16,   0,   5,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  50,   0,  16,   0, 
-      5,   0,   0,   0, 182,  15, 
-     16,   0,   0,   0,   0,   0, 
-     70,   0,  16, 128,  65,   0, 
-      0,   0,   2,   0,   0,   0, 
-      0,   0,   0,   7,  50,   0, 
-     16,   0,   2,   0,   0,   0, 
-     86,   5,  16,   0,   0,   0, 
-      0,   0,  70,   0,  16,   0, 
-      2,   0,   0,   0,  55,   0, 
-      0,   9,  50,   0,  16,   0, 
-      2,   0,   0,   0, 230,  10, 
-     16,   0,   4,   0,   0,   0, 
-     70,   0,  16,   0,   2,   0, 
-      0,   0,  70,   0,  16,   0, 
-      5,   0,   0,   0,  55,   0, 
-      0,   9,  50,   0,  16,   0, 
-      2,   0,   0,   0,  70,   0, 
-     16,   0,   4,   0,   0,   0, 
-     70,   0,  16,   0,   2,   0, 
-      0,   0, 182,  15,  16,   0, 
-      1,   0,   0,   0,  55,   0, 
-      0,   9,  50,   0,  16,   0, 
-      2,   0,   0,   0, 230,  10, 
-     16,   0,   2,   0,   0,   0, 
-    230,  10,  16,   0,   3,   0, 
-      0,   0,  70,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      2,   0,   0,   0,  70,   0, 
-     16,   0,   2,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   2,   0,   0,   0, 
-      6,   4,  16,   0,   3,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0, 194,   0, 
-     16,   0,   2,   0,   0,   0, 
-    166,  14,  16,   0,   2,   0, 
-      0,   0,   6, 132,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     34,   0,  16,   0,   1,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   2,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  67,   0, 128, 131, 153, 
-     25,   0,  50,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  70, 224,  17,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5, 194,   0,  16,   0, 
-      2,   0,   0,   0,   6,   4, 
-     16,   0,   2,   0,   0,   0, 
-    201,   0,   0,   5,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-    230,  14,  16,   0,   2,   0, 
-      0,   0,  18,   0,   0,   1, 
-     54,   0,   0,   5,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  21,   0,   0,   1, 
-     21,   0,   0,   1,  21,   0, 
-      0,   1, 202,   0,   0,   5, 
-     50,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0, 168,   0, 
-      0,   9,  50, 224,  17,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   0,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  70,   0,  16,   0, 
-      1,   0,   0,   0,  18,   0, 
-      0,   1,  31,   0,   0,   4, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     65,   0,   0,   5,  50,   0, 
-     16,   0,   1,   0,   0,   0, 
-    182,  15,  16,   0,   1,   0, 
-      0,   0,   0,   0,   0,  10, 
-     50,   0,  16,   0,   2,   0, 
-      0,   0,  70,   0,  16,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0, 128,  63, 
-      0,   0, 128,  63,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     31,   0,   0,   4,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,  29,   0, 
-      0,  10, 194,   0,  16,   0, 
-      2,   0,   0,   0,  86,   1, 
-     16,   0,   1,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,  28,   0,   0,   5, 
-    194,   0,  16,   0,   3,   0, 
-      0,   0,  86,   1,  16,   0, 
-      1,   0,   0,   0,  79,   0, 
-      0,   8, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   3,   0,   0,   0, 
-    246, 139,  32,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   4,   0,   0,   0, 
-     70,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-    194,   0,  16,   0,   4,   0, 
-      0,   0,   6,   4,  16,   0, 
-      1,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-     50,   0,  16,   0,   4,   0, 
-      0,   0,  70,   0,  16,   0, 
-      4,   0,   0,   0,  70, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7,  18,   0,  16,   0, 
-      4,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-     10,   0,  16,   0,   4,   0, 
-      0,   0,  35,   0,   0,  10, 
-     18,   0,  16,   0,   4,   0, 
-      0,   0,  42,   0,  16,   0, 
-      4,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   4,   0,   0,   0, 
-     35,   0,   0,  10,  18,   0, 
-     16,   0,   4,   0,   0,   0, 
-     58,   0,  16,   0,   4,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      4,   0,   0,   0, 167,   0, 
-      0, 139,   2,  67,   0, 128, 
-    131, 153,  25,   0,  50,   0, 
-     16,   0,   4,   0,   0,   0, 
-     10,   0,  16,   0,   4,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  70, 224, 
-     17,   0,   0,   0,   0,   0, 
-     54,   0,   0,   5, 194,   0, 
-     16,   0,   4,   0,   0,   0, 
-      6,   4,  16,   0,   4,   0, 
-      0,   0, 201,   0,   0,   5, 
-     18,   0,  16,   0,   4,   0, 
-      0,   0, 230,  14,  16,   0, 
-      4,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   4,   0, 
-      0,   0,  29,   0,   0,  10, 
-     50,   0,  16,   0,   4,   0, 
-      0,   0,  22,   5,  16,   0, 
-      2,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   4,   0,   0,   0, 
-     86,   1,  16,   0,   2,   0, 
-      0,   0,  79,   0,   0,   8, 
-    194,   0,  16,   0,   4,   0, 
-      0,   0, 166,  14,  16,   0, 
-      4,   0,   0,   0, 246, 139, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   1,   0, 
-      0,   7,  18,   0,  16,   0, 
-      4,   0,   0,   0,  42,   0, 
-     16,   0,   4,   0,   0,   0, 
-     10,   0,  16,   0,   4,   0, 
-      0,   0,   1,   0,   0,   7, 
-    130,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   4,   0,   0,   0, 
-      1,   0,   0,   7, 130,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      5,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  28,   0,   0,   5, 
-     66,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-     50,   0,  16,   0,   5,   0, 
-      0,   0,  70,   0,  16,   0, 
-      5,   0,   0,   0,  70, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7,  18,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   5,   0,   0,   0, 
-     10,   0,  16,   0,   5,   0, 
-      0,   0,  35,   0,   0,  10, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   5,   0,   0,   0, 
-     35,   0,   0,  10, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   4,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0, 167,   0, 
-      0, 139,   2,  67,   0, 128, 
-    131, 153,  25,   0,  50,   0, 
-     16,   0,   5,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  70, 224, 
-     17,   0,   0,   0,   0,   0, 
-     54,   0,   0,   5, 194,   0, 
-     16,   0,   5,   0,   0,   0, 
-      6,   4,  16,   0,   5,   0, 
-      0,   0, 201,   0,   0,   5, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0, 230,  14,  16,   0, 
-      5,   0,   0,   0,   1,   0, 
-      0,   7, 130,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   4,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      5,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-     66,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-     50,   0,  16,   0,   5,   0, 
-      0,   0,  70,   0,  16,   0, 
-      5,   0,   0,   0,  70, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7,  18,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   5,   0,   0,   0, 
-     10,   0,  16,   0,   5,   0, 
-      0,   0,  35,   0,   0,  10, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   5,   0,   0,   0, 
-     35,   0,   0,  10, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   4,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0, 167,   0, 
-      0, 139,   2,  67,   0, 128, 
-    131, 153,  25,   0,  50,   0, 
-     16,   0,   5,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  70, 224, 
-     17,   0,   0,   0,   0,   0, 
-     54,   0,   0,   5, 194,   0, 
-     16,   0,   5,   0,   0,   0, 
-      6,   4,  16,   0,   5,   0, 
-      0,   0, 201,   0,   0,   5, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0, 230,  14,  16,   0, 
-      5,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,   1,   0,   0,   7, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  10,   0, 
-     16,   0,   4,   0,   0,   0, 
-      1,   0,   0,   7, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   4,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      4,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   4,   0,   0,   0, 
-      6,   4,  16,   0,   2,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   4,   0,   0,   0, 
-     70,   0,  16,   0,   4,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     18,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  10,   0, 
-     16,   0,   4,   0,   0,   0, 
-     35,   0,   0,  10,  18,   0, 
-     16,   0,   4,   0,   0,   0, 
-     42,   0,  16,   0,   4,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      4,   0,   0,   0,  35,   0, 
-      0,  10,  18,   0,  16,   0, 
-      4,   0,   0,   0,  58,   0, 
-     16,   0,   4,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   4,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  67,   0, 128, 131, 153, 
-     25,   0,  50,   0,  16,   0, 
-      4,   0,   0,   0,  10,   0, 
-     16,   0,   4,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  70, 224,  17,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5, 194,   0,  16,   0, 
-      4,   0,   0,   0,   6,   4, 
-     16,   0,   4,   0,   0,   0, 
-    201,   0,   0,   5,  18,   0, 
-     16,   0,   4,   0,   0,   0, 
-    230,  14,  16,   0,   4,   0, 
-      0,   0,   1,   0,   0,   7, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   4,   0,   0,   0, 
-     18,   0,   0,   1,  32,   0, 
-      0,   8,  18,   0,  16,   0, 
-      4,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-     31,   0,   4,   3,  10,   0, 
-     16,   0,   4,   0,   0,   0, 
-     30,   0,   0,  11,  98,   0, 
-     16,   0,   4,   0,   0,   0, 
-    166, 139,  32,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-      0,   0, 255, 255, 255, 255, 
-    255, 255, 255, 255,   0,   0, 
-      0,   0,  86,   0,   0,   5, 
-     98,   0,  16,   0,   4,   0, 
-      0,   0,  86,   6,  16,   0, 
-      4,   0,   0,   0,  52,   0, 
-      0,  10,  50,   0,  16,   0, 
-      5,   0,   0,   0,  70,   0, 
-     16,   0,   1,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,  51,   0,   0,   7, 
-     98,   0,  16,   0,   4,   0, 
-      0,   0,  86,   6,  16,   0, 
-      4,   0,   0,   0,   6,   1, 
-     16,   0,   5,   0,   0,   0, 
-     28,   0,   0,   5,  98,   0, 
-     16,   0,   4,   0,   0,   0, 
-     86,   6,  16,   0,   4,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   5,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-     50,   0,  16,   0,   5,   0, 
-      0,   0,  70,   0,  16,   0, 
-      5,   0,   0,   0,  70, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7, 130,   0,  16,   0, 
-      4,   0,   0,   0,  26,   0, 
-     16,   0,   5,   0,   0,   0, 
-     10,   0,  16,   0,   5,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   4,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   4,   0,   0,   0, 
-     42,   0,  16,   0,   4,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0, 167,   0, 
-      0, 139,   2,  67,   0, 128, 
-    131, 153,  25,   0,  98,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,   6, 225, 
-     17,   0,   0,   0,   0,   0, 
-     54,   0,   0,   5,  50,   0, 
-     16,   0,   5,   0,   0,   0, 
-    150,   5,  16,   0,   4,   0, 
-      0,   0, 201,   0,   0,   5, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  70,   4,  16,   0, 
-      5,   0,   0,   0,  18,   0, 
-      0,   1,  32,   0,   0,   8, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   1,  64,   0,   0, 
-      2,   0,   0,   0,  31,   0, 
-      4,   3,  26,   0,  16,   0, 
-      4,   0,   0,   0,  55,   0, 
-      0,  10,  34,   0,  16,   0, 
-      4,   0,   0,   0,  10, 128, 
-     32,   0,   0,   0,   0,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0, 128, 
-      1,  64,   0,   0,   0,   0, 
-      0,  63,   0,   0,   0,   7, 
-     98,   0,  16,   0,   4,   0, 
-      0,   0, 246,  14,  16,   0, 
-      0,   0,   0,   0,  86,   5, 
-     16,   0,   4,   0,   0,   0, 
-     49,   0,   0,   7,  50,   0, 
-     16,   0,   5,   0,   0,   0, 
-     70,   0,  16,   0,   1,   0, 
-      0,   0,  86,   5,  16,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   8, 194,   0,  16,   0, 
-      5,   0,   0,   0,  86,   5, 
-     16,   0,   0,   0,   0,   0, 
-      6,   4,  16, 128,  65,   0, 
-      0,   0,   1,   0,   0,   0, 
-     14,   0,   0,   7,  50,   0, 
-     16,   0,   6,   0,   0,   0, 
-    230,  10,  16,   0,   5,   0, 
-      0,   0, 150,   5,  16,   0, 
-      4,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      6,   0,   0,   0,  70,   0, 
-     16,   0,   6,   0,   0,   0, 
-     86,   0,   0,   5, 194,   0, 
-     16,   0,   6,   0,   0,   0, 
-      6,   4,  16,   0,   6,   0, 
-      0,   0,  50,   0,   0,  10, 
-    194,   0,  16,   0,   5,   0, 
-      0,   0, 166,  14,  16, 128, 
-     65,   0,   0,   0,   6,   0, 
-      0,   0,  86,   9,  16,   0, 
-      4,   0,   0,   0, 166,  14, 
-     16,   0,   5,   0,   0,   0, 
-      1,   0,   0,  10,  50,   0, 
-     16,   0,   6,   0,   0,   0, 
-     70,   0,  16,   0,   6,   0, 
-      0,   0,   2,  64,   0,   0, 
-      1,   0,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   7, 194,   0,  16,   0, 
-      6,   0,   0,   0,  86,   5, 
-     16,   0,   0,   0,   0,   0, 
-    166,  14,  16,   0,   5,   0, 
-      0,   0,   0,   0,   0,   8, 
-    194,   0,  16,   0,   5,   0, 
-      0,   0, 246,  11,  16,   0, 
-      0,   0,   0,   0, 166,  14, 
-     16, 128,  65,   0,   0,   0, 
-      5,   0,   0,   0,  55,   0, 
-      0,   9, 194,   0,  16,   0, 
-      5,   0,   0,   0,   6,   4, 
-     16,   0,   6,   0,   0,   0, 
-    166,  14,  16,   0,   5,   0, 
-      0,   0, 166,  14,  16,   0, 
-      6,   0,   0,   0,  49,   0, 
-      0,   7,  50,   0,  16,   0, 
-      6,   0,   0,   0, 182,  15, 
-     16,   0,   0,   0,   0,   0, 
-     70,   0,  16,   0,   1,   0, 
-      0,   0,   0,   0,   0,   8, 
-    194,   0,  16,   0,   6,   0, 
-      0,   0, 246,  11,  16, 128, 
-     65,   0,   0,   0,   0,   0, 
-      0,   0,   6,   4,  16,   0, 
-      1,   0,   0,   0,  14,   0, 
-      0,   7,  50,   0,  16,   0, 
-      7,   0,   0,   0, 230,  10, 
-     16,   0,   6,   0,   0,   0, 
-    150,   5,  16,   0,   4,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      7,   0,   0,   0,  86,   0, 
-      0,   5, 194,   0,  16,   0, 
-      7,   0,   0,   0,   6,   4, 
-     16,   0,   7,   0,   0,   0, 
-     50,   0,   0,  10,  98,   0, 
-     16,   0,   4,   0,   0,   0, 
-    166,  11,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-     86,   6,  16,   0,   4,   0, 
-      0,   0, 166,  11,  16,   0, 
-      6,   0,   0,   0,   1,   0, 
-      0,  10, 194,   0,  16,   0, 
-      6,   0,   0,   0,   6,   4, 
-     16,   0,   7,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   8, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0, 182,  15,  16,   0, 
-      0,   0,   0,   0, 150,   5, 
-     16, 128,  65,   0,   0,   0, 
-      4,   0,   0,   0,   0,   0, 
-      0,   7,  98,   0,  16,   0, 
-      4,   0,   0,   0,  86,   5, 
-     16,   0,   0,   0,   0,   0, 
-     86,   6,  16,   0,   4,   0, 
-      0,   0,  55,   0,   0,   9, 
-     98,   0,  16,   0,   4,   0, 
-      0,   0, 166,  11,  16,   0, 
-      6,   0,   0,   0,  86,   6, 
-     16,   0,   4,   0,   0,   0, 
-      6,   1,  16,   0,   7,   0, 
-      0,   0,  55,   0,   0,   9, 
-     98,   0,  16,   0,   4,   0, 
-      0,   0,   6,   1,  16,   0, 
-      6,   0,   0,   0,  86,   6, 
-     16,   0,   4,   0,   0,   0, 
-      6,   1,  16,   0,   1,   0, 
-      0,   0,  55,   0,   0,   9, 
-     98,   0,  16,   0,   4,   0, 
-      0,   0,   6,   1,  16,   0, 
-      5,   0,   0,   0, 166,  11, 
-     16,   0,   5,   0,   0,   0, 
-     86,   6,  16,   0,   4,   0, 
-      0,   0,  28,   0,   0,   5, 
-     98,   0,  16,   0,   4,   0, 
-      0,   0,  86,   6,  16,   0, 
-      4,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      5,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      5,   0,   0,   0,  70,   0, 
-     16,   0,   5,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7, 130,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   5,   0, 
-      0,   0,  10,   0,  16,   0, 
-      5,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      4,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   4,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,  42,   0,  16,   0, 
-      4,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-    167,   0,   0, 139,   2,  67, 
-      0, 128, 131, 153,  25,   0, 
-     98,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 225,  17,   0,   0,   0, 
-      0,   0,  54,   0,   0,   5, 
-     50,   0,  16,   0,   5,   0, 
-      0,   0, 150,   5,  16,   0, 
-      4,   0,   0,   0, 201,   0, 
-      0,   5,  66,   0,  16,   0, 
-      3,   0,   0,   0,  70,   4, 
-     16,   0,   5,   0,   0,   0, 
-     18,   0,   0,   1,  54,   0, 
-      0,   5,  66,   0,  16,   0, 
-      3,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     21,   0,   0,   1,  21,   0, 
-      0,   1,  31,   0,   4,   3, 
-     10,   0,  16,   0,   4,   0, 
-      0,   0,  30,   0,   0,  11, 
-     98,   0,  16,   0,   4,   0, 
-      0,   0, 246, 142,  32,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0, 255, 255, 
-    255, 255, 255, 255, 255, 255, 
-      0,   0,   0,   0,  86,   0, 
-      0,   5,  98,   0,  16,   0, 
-      4,   0,   0,   0,  86,   6, 
-     16,   0,   4,   0,   0,   0, 
-     52,   0,   0,   7, 130,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  51,   0, 
-      0,   7,  34,   0,  16,   0, 
-      4,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-     58,   0,  16,   0,   4,   0, 
-      0,   0,  52,   0,   0,   7, 
-    130,   0,  16,   0,   4,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7,  66,   0, 
-     16,   0,   4,   0,   0,   0, 
-     42,   0,  16,   0,   4,   0, 
-      0,   0,  58,   0,  16,   0, 
-      4,   0,   0,   0,  28,   0, 
-      0,   5,  98,   0,  16,   0, 
-      4,   0,   0,   0,  86,   6, 
-     16,   0,   4,   0,   0,   0, 
-     86,   0,   0,   5, 194,   0, 
-     16,   0,   4,   0,   0,   0, 
-    166,   6,  16,   0,   4,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   5,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  98,   0,  16,   0, 
-      4,   0,   0,   0, 166,  11, 
-     16,   0,   4,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      5,   0,   0,   0,  70,   0, 
-     16,   0,   5,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7, 130,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   5,   0, 
-      0,   0,  10,   0,  16,   0, 
-      5,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      4,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   4,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,  42,   0,  16,   0, 
-      4,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-    167,   0,   0, 139,   2,  67, 
-      0, 128, 131, 153,  25,   0, 
-     98,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 225,  17,   0,   0,   0, 
-      0,   0,  54,   0,   0,   5, 
-     50,   0,  16,   0,   5,   0, 
-      0,   0, 150,   5,  16,   0, 
-      4,   0,   0,   0, 201,   0, 
-      0,   5, 130,   0,  16,   0, 
-      2,   0,   0,   0,  70,   4, 
-     16,   0,   5,   0,   0,   0, 
-     18,   0,   0,   1,  32,   0, 
-      0,   8,  34,   0,  16,   0, 
-      4,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   2,   0,   0,   0, 
-     31,   0,   4,   3,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-     55,   0,   0,  10,  34,   0, 
-     16,   0,   4,   0,   0,   0, 
-     10, 128,  32,   0,   0,   0, 
-      0,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0, 128,   1,  64,   0,   0, 
-      0,   0,   0,  63,   0,   0, 
-      0,   7,  98,   0,  16,   0, 
-      4,   0,   0,   0, 166,  11, 
-     16,   0,   0,   0,   0,   0, 
-     86,   5,  16,   0,   4,   0, 
-      0,   0,  49,   0,   0,   7, 
-    130,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8,  18,   0, 
-     16,   0,   5,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   2,   0, 
-      0,   0,  14,   0,   0,   7, 
-     34,   0,  16,   0,   5,   0, 
-      0,   0,  10,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   5,   0,   0,   0, 
-     26,   0,  16,   0,   5,   0, 
-      0,   0,  86,   0,   0,   5, 
-     66,   0,  16,   0,   5,   0, 
-      0,   0,  26,   0,  16,   0, 
-      5,   0,   0,   0,  50,   0, 
-      0,  10,  18,   0,  16,   0, 
-      5,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-     10,   0,  16,   0,   5,   0, 
-      0,   0,   1,   0,   0,   7, 
-     34,   0,  16,   0,   5,   0, 
-      0,   0,  26,   0,  16,   0, 
-      5,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7,  66,   0, 
-     16,   0,   5,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      5,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      5,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   5,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   5,   0,   0,   0, 
-     26,   0,  16,   0,   5,   0, 
-      0,   0,  10,   0,  16,   0, 
-      5,   0,   0,   0,  42,   0, 
-     16,   0,   5,   0,   0,   0, 
-     49,   0,   0,   7,  34,   0, 
-     16,   0,   5,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,   0,   0, 
-      0,   8,  66,   0,  16,   0, 
-      5,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     14,   0,   0,   7, 130,   0, 
-     16,   0,   5,   0,   0,   0, 
-     42,   0,  16,   0,   5,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  28,   0, 
-      0,   5, 130,   0,  16,   0, 
-      5,   0,   0,   0,  58,   0, 
-     16,   0,   5,   0,   0,   0, 
-     86,   0,   0,   5,  18,   0, 
-     16,   0,   6,   0,   0,   0, 
-     58,   0,  16,   0,   5,   0, 
-      0,   0,  50,   0,   0,  10, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   6,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  42,   0, 
-     16,   0,   5,   0,   0,   0, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   5,   0,   0,   0, 
-     58,   0,  16,   0,   5,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8, 130,   0,  16,   0, 
-      5,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     26,   0,  16, 128,  65,   0, 
-      0,   0,   4,   0,   0,   0, 
-      0,   0,   0,   7,  34,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      4,   0,   0,   0,  42,   0, 
-     16,   0,   5,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,  58,   0,  16,   0, 
-      5,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      4,   0,   0,   0,  26,   0, 
-     16,   0,   5,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      4,   0,   0,   0,  58,   0, 
-     16,   0,   4,   0,   0,   0, 
-     10,   0,  16,   0,   5,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      4,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-     49,   0,   0,   7, 130,   0, 
-     16,   0,   4,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   1,   0,   0,   0, 
-     14,   0,   0,   7,  34,   0, 
-     16,   0,   5,   0,   0,   0, 
-     10,   0,  16,   0,   5,   0, 
-      0,   0,  42,   0,  16,   0, 
-      4,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   5,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   5,   0,   0,   0, 
-     26,   0,  16,   0,   5,   0, 
-      0,   0,  50,   0,   0,  10, 
-     18,   0,  16,   0,   5,   0, 
-      0,   0,  42,   0,  16, 128, 
-     65,   0,   0,   0,   5,   0, 
-      0,   0,  42,   0,  16,   0, 
-      4,   0,   0,   0,  10,   0, 
-     16,   0,   5,   0,   0,   0, 
-      1,   0,   0,   7,  34,   0, 
-     16,   0,   5,   0,   0,   0, 
-     26,   0,  16,   0,   5,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   7,  66,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16,   0,   5,   0, 
-      0,   0,   0,   0,   0,   8, 
-     18,   0,  16,   0,   5,   0, 
-      0,   0,  58,   0,  16,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16, 128,  65,   0,   0,   0, 
-      5,   0,   0,   0,  55,   0, 
-      0,   9,  18,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   5,   0,   0,   0, 
-     10,   0,  16,   0,   5,   0, 
-      0,   0,  42,   0,  16,   0, 
-      5,   0,   0,   0,  49,   0, 
-      0,   7,  34,   0,  16,   0, 
-      5,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,   0,   0,   0,   8, 
-     66,   0,  16,   0,   5,   0, 
-      0,   0,  58,   0,  16, 128, 
-     65,   0,   0,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  14,   0, 
-      0,   7, 130,   0,  16,   0, 
-      5,   0,   0,   0,  42,   0, 
-     16,   0,   5,   0,   0,   0, 
-     42,   0,  16,   0,   4,   0, 
-      0,   0,  28,   0,   0,   5, 
-    130,   0,  16,   0,   5,   0, 
-      0,   0,  58,   0,  16,   0, 
-      5,   0,   0,   0,  86,   0, 
-      0,   5,  18,   0,  16,   0, 
-      6,   0,   0,   0,  58,   0, 
-     16,   0,   5,   0,   0,   0, 
-     50,   0,   0,  10,  66,   0, 
-     16,   0,   4,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   6,   0,   0,   0, 
-     42,   0,  16,   0,   4,   0, 
-      0,   0,  42,   0,  16,   0, 
-      5,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      5,   0,   0,   0,  58,   0, 
-     16,   0,   5,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   8, 
-    130,   0,  16,   0,   5,   0, 
-      0,   0,  58,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      4,   0,   0,   0,   0,   0, 
-      0,   7,  66,   0,  16,   0, 
-      4,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16,   0,   4,   0, 
-      0,   0,  55,   0,   0,   9, 
-     66,   0,  16,   0,   4,   0, 
-      0,   0,  42,   0,  16,   0, 
-      5,   0,   0,   0,  42,   0, 
-     16,   0,   4,   0,   0,   0, 
-     58,   0,  16,   0,   5,   0, 
-      0,   0,  55,   0,   0,   9, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      5,   0,   0,   0,  42,   0, 
-     16,   0,   4,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  55,   0,   0,   9, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  58,   0,  16,   0, 
-      4,   0,   0,   0,  10,   0, 
-     16,   0,   5,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  28,   0,   0,   5, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      4,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     86,   0,   0,   5, 130,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   5,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  98,   0,  16,   0, 
-      4,   0,   0,   0, 166,  11, 
-     16,   0,   4,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      5,   0,   0,   0,  70,   0, 
-     16,   0,   5,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   5,   0, 
-      0,   0,  10,   0,  16,   0, 
-      5,   0,   0,   0,  35,   0, 
-      0,  10,  18,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  35,   0,   0,  10, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  42,   0,  16,   0, 
-      4,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-    167,   0,   0, 139,   2,  67, 
-      0, 128, 131, 153,  25,   0, 
-     98,   0,  16,   0,   4,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 225,  17,   0,   0,   0, 
-      0,   0,  54,   0,   0,   5, 
-     50,   0,  16,   0,   5,   0, 
-      0,   0, 150,   5,  16,   0, 
-      4,   0,   0,   0, 201,   0, 
-      0,   5, 130,   0,  16,   0, 
-      2,   0,   0,   0,  70,   4, 
-     16,   0,   5,   0,   0,   0, 
-     18,   0,   0,   1,  54,   0, 
-      0,   5, 130,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     21,   0,   0,   1,  21,   0, 
-      0,   1,  31,   0,   4,   3, 
-     10,   0,  16,   0,   4,   0, 
-      0,   0,  30,   0,   0,  11, 
-     98,   0,  16,   0,   4,   0, 
-      0,   0, 246, 142,  32,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0, 255, 255, 
-    255, 255, 255, 255, 255, 255, 
-      0,   0,   0,   0,  86,   0, 
-      0,   5,  98,   0,  16,   0, 
-      4,   0,   0,   0,  86,   6, 
-     16,   0,   4,   0,   0,   0, 
-     52,   0,   0,   7,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  51,   0, 
-      0,   7,  18,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  28,   0,   0,   5, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  52,   0, 
-      0,   7,  34,   0,  16,   0, 
-      4,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  51,   0,   0,   7, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,  42,   0,  16,   0, 
-      4,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,  86,   0,   0,   5, 
-     66,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  86,   0, 
-      0,   5, 130,   0,  16,   0, 
-      4,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   5,   0,   0,   0, 
-     70,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     98,   0,  16,   0,   4,   0, 
-      0,   0, 166,  11,  16,   0, 
-      4,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-     50,   0,  16,   0,   5,   0, 
-      0,   0,  70,   0,  16,   0, 
-      5,   0,   0,   0,  70, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7,  18,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   5,   0,   0,   0, 
-     10,   0,  16,   0,   5,   0, 
-      0,   0,  35,   0,   0,  10, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     35,   0,   0,  10,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     42,   0,  16,   0,   4,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0, 167,   0, 
-      0, 139,   2,  67,   0, 128, 
-    131, 153,  25,   0,  98,   0, 
-     16,   0,   4,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,   6, 225, 
-     17,   0,   0,   0,   0,   0, 
-     54,   0,   0,   5,  50,   0, 
-     16,   0,   5,   0,   0,   0, 
-    150,   5,  16,   0,   4,   0, 
-      0,   0, 201,   0,   0,   5, 
-     66,   0,  16,   0,   2,   0, 
-      0,   0,  70,   4,  16,   0, 
-      5,   0,   0,   0,  18,   0, 
-      0,   1,  32,   0,   0,   8, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   1,  64,   0,   0, 
-      2,   0,   0,   0,  31,   0, 
-      4,   3,  10,   0,  16,   0, 
-      1,   0,   0,   0,  55,   0, 
-      0,  10,  18,   0,  16,   0, 
-      1,   0,   0,   0,  10, 128, 
-     32,   0,   0,   0,   0,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0, 128, 
-      1,  64,   0,   0,   0,   0, 
-      0,  63,   0,   0,   0,   7, 
-     98,   0,  16,   0,   4,   0, 
-      0,   0, 166,  11,  16,   0, 
-      0,   0,   0,   0,   6,   0, 
-     16,   0,   1,   0,   0,   0, 
-     49,   0,   0,   7,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   8, 130,   0,  16,   0, 
-      4,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     26,   0,  16, 128,  65,   0, 
-      0,   0,   1,   0,   0,   0, 
-     14,   0,   0,   7,  18,   0, 
-     16,   0,   5,   0,   0,   0, 
-     58,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  28,   0, 
-      0,   5,  18,   0,  16,   0, 
-      5,   0,   0,   0,  10,   0, 
-     16,   0,   5,   0,   0,   0, 
-     86,   0,   0,   5,  34,   0, 
-     16,   0,   5,   0,   0,   0, 
-     10,   0,  16,   0,   5,   0, 
-      0,   0,  50,   0,   0,  10, 
-    130,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   5,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  58,   0, 
-     16,   0,   4,   0,   0,   0, 
-      1,   0,   0,   7,  18,   0, 
-     16,   0,   5,   0,   0,   0, 
-     10,   0,  16,   0,   5,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   7,  34,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     58,   0,  16,   0,   4,   0, 
-      0,   0,   0,   0,   0,   8, 
-    130,   0,  16,   0,   4,   0, 
-      0,   0,  42,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      4,   0,   0,   0,  55,   0, 
-      0,   9, 130,   0,  16,   0, 
-      4,   0,   0,   0,  10,   0, 
-     16,   0,   5,   0,   0,   0, 
-     58,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      5,   0,   0,   0,  49,   0, 
-      0,   7,  18,   0,  16,   0, 
-      5,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,   0,   0,   0,   8, 
-     34,   0,  16,   0,   5,   0, 
-      0,   0,  42,   0,  16, 128, 
-     65,   0,   0,   0,   0,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  14,   0, 
-      0,   7,  66,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   5,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,  28,   0,   0,   5, 
-     66,   0,  16,   0,   5,   0, 
-      0,   0,  42,   0,  16,   0, 
-      5,   0,   0,   0,  86,   0, 
-      0,   5, 130,   0,  16,   0, 
-      5,   0,   0,   0,  42,   0, 
-     16,   0,   5,   0,   0,   0, 
-     50,   0,   0,  10,  34,   0, 
-     16,   0,   4,   0,   0,   0, 
-     58,   0,  16, 128,  65,   0, 
-      0,   0,   5,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      5,   0,   0,   0,   1,   0, 
-      0,   7,  34,   0,  16,   0, 
-      5,   0,   0,   0,  42,   0, 
-     16,   0,   5,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   8, 
-     66,   0,  16,   0,   5,   0, 
-      0,   0,  42,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16, 128,  65,   0,   0,   0, 
-      4,   0,   0,   0,   0,   0, 
-      0,   7,  34,   0,  16,   0, 
-      4,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,  55,   0,   0,   9, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-     42,   0,  16,   0,   5,   0, 
-      0,   0,  55,   0,   0,   9, 
-     34,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  55,   0,   0,   9, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  58,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  49,   0,   0,   7, 
-     34,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8,  34,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   2,   0, 
-      0,   0,  14,   0,   0,   7, 
-    130,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  42,   0, 
-     16,   0,   4,   0,   0,   0, 
-     28,   0,   0,   5, 130,   0, 
-     16,   0,   4,   0,   0,   0, 
-     58,   0,  16,   0,   4,   0, 
-      0,   0,  86,   0,   0,   5, 
-     18,   0,  16,   0,   5,   0, 
-      0,   0,  58,   0,  16,   0, 
-      4,   0,   0,   0,  50,   0, 
-      0,  10,  34,   0,  16,   0, 
-      4,   0,   0,   0,  10,   0, 
-     16, 128,  65,   0,   0,   0, 
-      5,   0,   0,   0,  42,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,   1,   0,   0,   7, 
-    130,   0,  16,   0,   4,   0, 
-      0,   0,  58,   0,  16,   0, 
-      4,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7,  18,   0, 
-     16,   0,   5,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,   0,   0, 
-      0,   8,  34,   0,  16,   0, 
-      4,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     26,   0,  16, 128,  65,   0, 
-      0,   0,   4,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   4,   0,   0,   0, 
-     58,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  10,   0, 
-     16,   0,   5,   0,   0,   0, 
-     49,   0,   0,   7, 130,   0, 
-     16,   0,   4,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      5,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-     14,   0,   0,   7,  34,   0, 
-     16,   0,   5,   0,   0,   0, 
-     10,   0,  16,   0,   5,   0, 
-      0,   0,  42,   0,  16,   0, 
-      4,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      5,   0,   0,   0,  26,   0, 
-     16,   0,   5,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   5,   0,   0,   0, 
-     26,   0,  16,   0,   5,   0, 
-      0,   0,  50,   0,   0,  10, 
-     66,   0,  16,   0,   4,   0, 
-      0,   0,  42,   0,  16, 128, 
-     65,   0,   0,   0,   5,   0, 
-      0,   0,  42,   0,  16,   0, 
-      4,   0,   0,   0,  10,   0, 
-     16,   0,   5,   0,   0,   0, 
-      1,   0,   0,   7,  18,   0, 
-     16,   0,   5,   0,   0,   0, 
-     26,   0,  16,   0,   5,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  34,   0,  16,   0, 
-      5,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   4,   0,   0,   0, 
-      0,   0,   0,   7,  66,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      4,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      4,   0,   0,   0,  10,   0, 
-     16,   0,   5,   0,   0,   0, 
-     42,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      5,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      4,   0,   0,   0,  58,   0, 
-     16,   0,   4,   0,   0,   0, 
-     42,   0,  16,   0,   4,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,  42,   0,  16,   0, 
-      4,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      1,   0,   0,   0,  70,   0, 
-     16,   0,   1,   0,   0,   0, 
-     86,   0,   0,   5, 194,   0, 
-     16,   0,   4,   0,   0,   0, 
-     86,   1,  16,   0,   1,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   1,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  98,   0,  16,   0, 
-      4,   0,   0,   0, 166,  11, 
-     16,   0,   4,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      1,   0,   0,   0,  70,   0, 
-     16,   0,   1,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  35,   0, 
-      0,  10,  18,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   4,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  35,   0,   0,  10, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  42,   0,  16,   0, 
-      4,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-    167,   0,   0, 139,   2,  67, 
-      0, 128, 131, 153,  25,   0, 
-     50,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     70, 224,  17,   0,   0,   0, 
-      0,   0,  54,   0,   0,   5, 
-    194,   0,  16,   0,   4,   0, 
-      0,   0,   6,   4,  16,   0, 
-      1,   0,   0,   0, 201,   0, 
-      0,   5,  66,   0,  16,   0, 
-      2,   0,   0,   0, 230,  14, 
-     16,   0,   4,   0,   0,   0, 
-     18,   0,   0,   1,  54,   0, 
-      0,   5,  66,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     21,   0,   0,   1,  21,   0, 
-      0,   1,  31,   0,   4,   3, 
-     10,   0,  16,   0,   4,   0, 
-      0,   0,  30,   0,   0,  11, 
-     50,   0,  16,   0,   1,   0, 
-      0,   0, 230, 138,  32,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   0,   2,  64,   0,   0, 
-    255, 255, 255, 255, 255, 255, 
-    255, 255,   0,   0,   0,   0, 
-      0,   0,   0,   0,  86,   0, 
-      0,   5,  50,   0,  16,   0, 
-      1,   0,   0,   0,  70,   0, 
-     16,   0,   1,   0,   0,   0, 
-     52,   0,   0,  10,  50,   0, 
-     16,   0,   4,   0,   0,   0, 
-     70,   0,  16,   0,   2,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,  51,   0, 
-      0,   7,  50,   0,  16,   0, 
-      1,   0,   0,   0,  70,   0, 
-     16,   0,   1,   0,   0,   0, 
-     70,   0,  16,   0,   4,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   1,   0, 
-      0,   0,  70,   0,  16,   0, 
-      1,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      4,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      4,   0,   0,   0,  70,   0, 
-     16,   0,   4,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  18,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,  10,   0,  16,   0, 
-      4,   0,   0,   0,  35,   0, 
-      0,  10,  18,   0,  16,   0, 
-      1,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   4,   0, 
-      0,   0,  35,   0,   0,  10, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-    167,   0,   0, 139,   2,  67, 
-      0, 128, 131, 153,  25,   0, 
-     50,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     70, 224,  17,   0,   0,   0, 
-      0,   0,  54,   0,   0,   5, 
-     50,   0,  16,   0,   4,   0, 
-      0,   0,  70,   0,  16,   0, 
-      1,   0,   0,   0, 201,   0, 
-      0,   5, 130,   0,  16,   0, 
-      3,   0,   0,   0,  70,   4, 
-     16,   0,   4,   0,   0,   0, 
-     18,   0,   0,   1,  32,   0, 
-      0,   8,  18,   0,  16,   0, 
-      1,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   2,   0,   0,   0, 
-     31,   0,   4,   3,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     55,   0,   0,  10,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10, 128,  32,   0,   0,   0, 
-      0,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0, 128,   1,  64,   0,   0, 
-      0,   0,   0,  63,   0,   0, 
-      0,   7,  50,   0,  16,   0, 
-      1,   0,   0,   0, 182,  15, 
-     16,   0,   0,   0,   0,   0, 
-      6,   0,  16,   0,   1,   0, 
-      0,   0,  49,   0,   0,   7, 
-     50,   0,  16,   0,   4,   0, 
-      0,   0,  70,   0,  16,   0, 
-      2,   0,   0,   0,  86,   5, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8, 194,   0, 
-     16,   0,   4,   0,   0,   0, 
-     86,   5,  16,   0,   0,   0, 
-      0,   0,   6,   4,  16, 128, 
-     65,   0,   0,   0,   2,   0, 
-      0,   0,  14,   0,   0,   7, 
-     50,   0,  16,   0,   5,   0, 
-      0,   0, 230,  10,  16,   0, 
-      4,   0,   0,   0,  70,   0, 
-     16,   0,   1,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   5,   0,   0,   0, 
-     70,   0,  16,   0,   5,   0, 
-      0,   0,  86,   0,   0,   5, 
-    194,   0,  16,   0,   5,   0, 
-      0,   0,   6,   4,  16,   0, 
-      5,   0,   0,   0,  50,   0, 
-      0,  10, 194,   0,  16,   0, 
-      4,   0,   0,   0, 166,  14, 
-     16, 128,  65,   0,   0,   0, 
-      5,   0,   0,   0,   6,   4, 
-     16,   0,   1,   0,   0,   0, 
-    166,  14,  16,   0,   4,   0, 
-      0,   0,   1,   0,   0,  10, 
-     50,   0,  16,   0,   5,   0, 
-      0,   0,  70,   0,  16,   0, 
-      5,   0,   0,   0,   2,  64, 
-      0,   0,   1,   0,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   7, 194,   0, 
-     16,   0,   5,   0,   0,   0, 
-     86,   5,  16,   0,   0,   0, 
-      0,   0, 166,  14,  16,   0, 
-      4,   0,   0,   0,   0,   0, 
-      0,   8, 194,   0,  16,   0, 
-      4,   0,   0,   0, 246,  11, 
-     16,   0,   0,   0,   0,   0, 
-    166,  14,  16, 128,  65,   0, 
-      0,   0,   4,   0,   0,   0, 
-     55,   0,   0,   9, 194,   0, 
-     16,   0,   4,   0,   0,   0, 
-      6,   4,  16,   0,   5,   0, 
-      0,   0, 166,  14,  16,   0, 
-      4,   0,   0,   0, 166,  14, 
-     16,   0,   5,   0,   0,   0, 
-     49,   0,   0,   7,  50,   0, 
-     16,   0,   5,   0,   0,   0, 
-    182,  15,  16,   0,   0,   0, 
-      0,   0,  70,   0,  16,   0, 
-      2,   0,   0,   0,   0,   0, 
-      0,   8, 194,   0,  16,   0, 
-      5,   0,   0,   0, 246,  11, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0,   6,   4, 
-     16,   0,   2,   0,   0,   0, 
-     14,   0,   0,   7,  50,   0, 
-     16,   0,   6,   0,   0,   0, 
-    230,  10,  16,   0,   5,   0, 
-      0,   0,  70,   0,  16,   0, 
-      1,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      6,   0,   0,   0,  70,   0, 
-     16,   0,   6,   0,   0,   0, 
-     86,   0,   0,   5, 194,   0, 
-     16,   0,   6,   0,   0,   0, 
-      6,   4,  16,   0,   6,   0, 
-      0,   0,  50,   0,   0,  10, 
-     50,   0,  16,   0,   1,   0, 
-      0,   0, 230,  10,  16, 128, 
-     65,   0,   0,   0,   6,   0, 
-      0,   0,  70,   0,  16,   0, 
-      1,   0,   0,   0, 230,  10, 
-     16,   0,   5,   0,   0,   0, 
-      1,   0,   0,  10, 194,   0, 
-     16,   0,   5,   0,   0,   0, 
-      6,   4,  16,   0,   6,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  50,   0,  16,   0, 
-      6,   0,   0,   0, 182,  15, 
-     16,   0,   0,   0,   0,   0, 
-     70,   0,  16, 128,  65,   0, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7,  50,   0, 
-     16,   0,   1,   0,   0,   0, 
-     86,   5,  16,   0,   0,   0, 
-      0,   0,  70,   0,  16,   0, 
-      1,   0,   0,   0,  55,   0, 
-      0,   9,  50,   0,  16,   0, 
-      1,   0,   0,   0, 230,  10, 
-     16,   0,   5,   0,   0,   0, 
-     70,   0,  16,   0,   1,   0, 
-      0,   0,  70,   0,  16,   0, 
-      6,   0,   0,   0,  55,   0, 
-      0,   9,  50,   0,  16,   0, 
-      1,   0,   0,   0,  70,   0, 
-     16,   0,   5,   0,   0,   0, 
-     70,   0,  16,   0,   1,   0, 
-      0,   0,  70,   0,  16,   0, 
-      2,   0,   0,   0,  55,   0, 
-      0,   9,  50,   0,  16,   0, 
-      1,   0,   0,   0,  70,   0, 
-     16,   0,   4,   0,   0,   0, 
-    230,  10,  16,   0,   4,   0, 
-      0,   0,  70,   0,  16,   0, 
-      1,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      1,   0,   0,   0,  70,   0, 
-     16,   0,   1,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   2,   0,   0,   0, 
-     70,   0,  16,   0,   3,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   2,   0,   0,   0, 
-     70,   0,  16,   0,   2,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     18,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-     35,   0,   0,  10,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,  35,   0, 
-      0,  10,  18,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  67,   0, 128, 131, 153, 
-     25,   0,  50,   0,  16,   0, 
-      1,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  70, 224,  17,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5,  50,   0,  16,   0, 
-      2,   0,   0,   0,  70,   0, 
-     16,   0,   1,   0,   0,   0, 
-    201,   0,   0,   5, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     70,   4,  16,   0,   2,   0, 
-      0,   0,  18,   0,   0,   1, 
-     54,   0,   0,   5, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  21,   0,   0,   1, 
-     21,   0,   0,   1,  21,   0, 
-      0,   1,  26,   0,   0,   5, 
-     50,   0,  16,   0,   1,   0, 
-      0,   0, 230,  10,  16,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   3,   0,   0,   0, 
-     50,   0,   0,   9,  18,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-      0,   0,   0,   8,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  50,   0,   0,   9, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,   0,   0,   0,   8, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  50,   0, 
-      0,   9,  18,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0, 202,   0, 
-      0,   5,  50,   0,  16,   0, 
-      1,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-    168,   0,   0,   9,  50, 224, 
-     17,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   0,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  70,   0, 
-     16,   0,   1,   0,   0,   0, 
-     18,   0,   0,   1,  32,   0, 
-      0,   8,  18,   0,  16,   0, 
-      1,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   2,   0,   0,   0, 
-     31,   0,   4,   3,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     65,   0,   0,   5,  50,   0, 
-     16,   0,   1,   0,   0,   0, 
-    182,  15,  16,   0,   1,   0, 
-      0,   0,   0,   0,   0,  10, 
-    242,   0,  16,   0,   2,   0, 
-      0,   0,  22,   5,  16,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,  64, 
-      0,   0, 128, 191,   0,   0, 
-    128, 191,   0,   0, 128,  63, 
-     31,   0,   0,   4,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,  29,   0, 
-      0,  10, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,   6, 
-     16,   0,   2,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   4,   0, 
-      0,   0, 102,  10,  16,   0, 
-      2,   0,   0,   0,  79,   0, 
-      0,   8,  50,   0,  16,   0, 
-      4,   0,   0,   0,  70,   0, 
-     16,   0,   4,   0,   0,   0, 
-    182, 143,  32,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      4,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  82,   0, 
-     16,   0,   4,   0,   0,   0, 
-      6,   1,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   5,   0, 
-      0,   0, 150,   5,  16,   0, 
-      2,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-     82,   0,  16,   0,   4,   0, 
-      0,   0,   6,   2,  16,   0, 
-      4,   0,   0,   0,   6, 129, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7,  18,   0,  16,   0, 
-      4,   0,   0,   0,  42,   0, 
-     16,   0,   4,   0,   0,   0, 
-     10,   0,  16,   0,   4,   0, 
-      0,   0,  35,   0,   0,  10, 
-     18,   0,  16,   0,   4,   0, 
-      0,   0,  10,   0,  16,   0, 
-      5,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   4,   0,   0,   0, 
-     35,   0,   0,  10,  18,   0, 
-     16,   0,   4,   0,   0,   0, 
-     26,   0,  16,   0,   5,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      4,   0,   0,   0, 167,   0, 
-      0, 139,   2,  67,   0, 128, 
-    131, 153,  25,   0,  82,   0, 
-     16,   0,   4,   0,   0,   0, 
-     10,   0,  16,   0,   4,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,   6, 225, 
-     17,   0,   0,   0,   0,   0, 
-     54,   0,   0,   5,  50,   0, 
-     16,   0,   5,   0,   0,   0, 
-    134,   0,  16,   0,   4,   0, 
-      0,   0, 201,   0,   0,   5, 
-     18,   0,  16,   0,   4,   0, 
-      0,   0,  70,   4,  16,   0, 
-      5,   0,   0,   0,   1,   0, 
-      0,   7,  18,   0,  16,   0, 
-      5,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   4,   0, 
-      0,   0,  29,   0,   0,   7, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     28,   0,   0,   5,  82,   0, 
-     16,   0,   4,   0,   0,   0, 
-     86,   5,  16,   0,   1,   0, 
-      0,   0,  79,   0,   0,   8, 
-     18,   0,  16,   0,   4,   0, 
-      0,   0,  10,   0,  16,   0, 
-      4,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   4,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   4,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      4,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   4,   0,   0,   0, 
-     70,   0,  16,   0,   4,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     18,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      4,   0,   0,   0,  10,   0, 
-     16,   0,   4,   0,   0,   0, 
-     35,   0,   0,  10, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      4,   0,   0,   0,  35,   0, 
-      0,  10, 130,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   4,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  67,   0, 128, 131, 153, 
-     25,   0,  50,   0,  16,   0, 
-      4,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  70, 224,  17,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5, 194,   0,  16,   0, 
-      4,   0,   0,   0,   6,   4, 
-     16,   0,   4,   0,   0,   0, 
-    201,   0,   0,   5, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-    230,  14,  16,   0,   4,   0, 
-      0,   0,   1,   0,   0,   7, 
-     18,   0,  16,   0,   4,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     18,   0,   0,   1,  32,   0, 
-      0,   8,  66,   0,  16,   0, 
-      3,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-     31,   0,   4,   3,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     30,   0,   0,  11,  50,   0, 
-     16,   0,   6,   0,   0,   0, 
-    230, 138,  32,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      2,  64,   0,   0, 255, 255, 
-    255, 255, 255, 255, 255, 255, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,  86,   0,   0,   5, 
-     50,   0,  16,   0,   6,   0, 
-      0,   0,  70,   0,  16,   0, 
-      6,   0,   0,   0,  52,   0, 
-      0,  10, 194,   0,  16,   0, 
-      6,   0,   0,   0,  86,   9, 
-     16,   0,   2,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,  51,   0,   0,   7, 
-     50,   0,  16,   0,   6,   0, 
-      0,   0,  70,   0,  16,   0, 
-      6,   0,   0,   0, 230,  10, 
-     16,   0,   6,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   6,   0,   0,   0, 
-     70,   0,  16,   0,   6,   0, 
-      0,   0,  28,   0,   0,   5, 
-    194,   0,  16,   0,   6,   0, 
-      0,   0,   6,   4,  16,   0, 
-      3,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-    194,   0,  16,   0,   6,   0, 
-      0,   0, 166,  14,  16,   0, 
-      6,   0,   0,   0,   6, 132, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7, 130,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   6,   0,   0,   0, 
-     42,   0,  16,   0,   6,   0, 
-      0,   0,  35,   0,   0,  10, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      6,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     35,   0,   0,  10, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   6,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0, 167,   0, 
-      0, 139,   2,  67,   0, 128, 
-    131, 153,  25,   0,  50,   0, 
-     16,   0,   6,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  70, 224, 
-     17,   0,   0,   0,   0,   0, 
-     54,   0,   0,   5, 194,   0, 
-     16,   0,   6,   0,   0,   0, 
-      6,   4,  16,   0,   6,   0, 
-      0,   0, 201,   0,   0,   5, 
-     18,   0,  16,   0,   5,   0, 
-      0,   0, 230,  14,  16,   0, 
-      6,   0,   0,   0,  18,   0, 
-      0,   1,  32,   0,   0,   8, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   1,  64,   0,   0, 
-      2,   0,   0,   0,  31,   0, 
-      4,   3,  58,   0,  16,   0, 
-      3,   0,   0,   0,  55,   0, 
-      0,  10, 130,   0,  16,   0, 
-      3,   0,   0,   0,  10, 128, 
-     32,   0,   0,   0,   0,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0, 128, 
-      1,  64,   0,   0,   0,   0, 
-      0,  63,   0,   0,   0,   7, 
-     50,   0,  16,   0,   6,   0, 
-      0,   0, 182,  15,  16,   0, 
-      0,   0,   0,   0, 246,  15, 
-     16,   0,   3,   0,   0,   0, 
-     49,   0,   0,   7, 194,   0, 
-     16,   0,   6,   0,   0,   0, 
-     86,   9,  16,   0,   2,   0, 
-      0,   0,  86,   5,  16,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   8,  50,   0,  16,   0, 
-      7,   0,   0,   0,  86,   5, 
-     16,   0,   0,   0,   0,   0, 
-    150,   5,  16, 128,  65,   0, 
-      0,   0,   2,   0,   0,   0, 
-     14,   0,   0,   7, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-      6,   4,  16,   0,   7,   0, 
-      0,   0,   6,   4,  16,   0, 
-      6,   0,   0,   0,  28,   0, 
-      0,   5, 194,   0,  16,   0, 
-      7,   0,   0,   0, 166,  14, 
-     16,   0,   7,   0,   0,   0, 
-     86,   0,   0,   5,  50,   0, 
-     16,   0,   8,   0,   0,   0, 
-    230,  10,  16,   0,   7,   0, 
-      0,   0,  50,   0,   0,  10, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16, 128, 
-     65,   0,   0,   0,   8,   0, 
-      0,   0,  70,   0,  16,   0, 
-      6,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,   0,   0,  10, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-    166,  14,  16,   0,   7,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   7,  50,   0,  16,   0, 
-      8,   0,   0,   0,  86,   5, 
-     16,   0,   0,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,   0,   0,   0,   8, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0, 182,  15,  16,   0, 
-      0,   0,   0,   0,  70,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  50,   0,  16,   0, 
-      7,   0,   0,   0, 230,  10, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      8,   0,   0,   0,  49,   0, 
-      0,   7, 194,   0,  16,   0, 
-      7,   0,   0,   0, 246,  11, 
-     16,   0,   0,   0,   0,   0, 
-     86,   9,  16,   0,   2,   0, 
-      0,   0,   0,   0,   0,   8, 
-     50,   0,  16,   0,   8,   0, 
-      0,   0, 182,  15,  16, 128, 
-     65,   0,   0,   0,   0,   0, 
-      0,   0, 150,   5,  16,   0, 
-      2,   0,   0,   0,  14,   0, 
-      0,   7, 194,   0,  16,   0, 
-      8,   0,   0,   0,   6,   4, 
-     16,   0,   8,   0,   0,   0, 
-      6,   4,  16,   0,   6,   0, 
-      0,   0,  28,   0,   0,   5, 
-    194,   0,  16,   0,   8,   0, 
-      0,   0, 166,  14,  16,   0, 
-      8,   0,   0,   0,  86,   0, 
-      0,   5,  50,   0,  16,   0, 
-      9,   0,   0,   0, 230,  10, 
-     16,   0,   8,   0,   0,   0, 
-     50,   0,   0,  10,  50,   0, 
-     16,   0,   6,   0,   0,   0, 
-     70,   0,  16, 128,  65,   0, 
-      0,   0,   9,   0,   0,   0, 
-     70,   0,  16,   0,   6,   0, 
-      0,   0,  70,   0,  16,   0, 
-      8,   0,   0,   0,   1,   0, 
-      0,  10,  50,   0,  16,   0, 
-      8,   0,   0,   0, 230,  10, 
-     16,   0,   8,   0,   0,   0, 
-      2,  64,   0,   0,   1,   0, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-    194,   0,  16,   0,   8,   0, 
-      0,   0, 246,  11,  16,   0, 
-      0,   0,   0,   0,   6,   4, 
-     16, 128,  65,   0,   0,   0, 
-      6,   0,   0,   0,   0,   0, 
-      0,   7,  50,   0,  16,   0, 
-      6,   0,   0,   0,  86,   5, 
-     16,   0,   0,   0,   0,   0, 
-     70,   0,  16,   0,   6,   0, 
-      0,   0,  55,   0,   0,   9, 
-     50,   0,  16,   0,   6,   0, 
-      0,   0,  70,   0,  16,   0, 
-      8,   0,   0,   0,  70,   0, 
-     16,   0,   6,   0,   0,   0, 
-    230,  10,  16,   0,   8,   0, 
-      0,   0,  55,   0,   0,   9, 
-     50,   0,  16,   0,   6,   0, 
-      0,   0, 230,  10,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   6,   0,   0,   0, 
-    150,   5,  16,   0,   2,   0, 
-      0,   0,  55,   0,   0,   9, 
-     50,   0,  16,   0,   6,   0, 
-      0,   0, 230,  10,  16,   0, 
-      6,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   6,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   6,   0, 
-      0,   0,  70,   0,  16,   0, 
-      6,   0,   0,   0,  28,   0, 
-      0,   5, 194,   0,  16,   0, 
-      6,   0,   0,   0,   6,   4, 
-     16,   0,   3,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0, 194,   0,  16,   0, 
-      6,   0,   0,   0, 166,  14, 
-     16,   0,   6,   0,   0,   0, 
-      6, 132,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   6,   0, 
-      0,   0,  42,   0,  16,   0, 
-      6,   0,   0,   0,  35,   0, 
-      0,  10, 130,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   6,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  35,   0,   0,  10, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      6,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-    167,   0,   0, 139,   2,  67, 
-      0, 128, 131, 153,  25,   0, 
-     50,   0,  16,   0,   6,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     70, 224,  17,   0,   0,   0, 
-      0,   0,  54,   0,   0,   5, 
-    194,   0,  16,   0,   6,   0, 
-      0,   0,   6,   4,  16,   0, 
-      6,   0,   0,   0, 201,   0, 
-      0,   5,  18,   0,  16,   0, 
-      5,   0,   0,   0, 230,  14, 
-     16,   0,   6,   0,   0,   0, 
-     18,   0,   0,   1,  54,   0, 
-      0,   5,  18,   0,  16,   0, 
-      5,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     21,   0,   0,   1,  21,   0, 
-      0,   1,  31,   0,   4,   3, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  30,   0,   0,  11, 
-    194,   0,  16,   0,   3,   0, 
-      0,   0, 246, 139,  32,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0, 255, 255, 255, 255, 
-    255, 255, 255, 255,  86,   0, 
-      0,   5, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   3,   0,   0,   0, 
-     52,   0,   0,   7,  18,   0, 
-     16,   0,   6,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  51,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   6,   0, 
-      0,   0,  52,   0,   0,   7, 
-     18,   0,  16,   0,   6,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      6,   0,   0,   0,  28,   0, 
-      0,   5, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   3,   0,   0,   0, 
-     86,   0,   0,   5, 194,   0, 
-     16,   0,   6,   0,   0,   0, 
-    246,  11,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-    194,   0,  16,   0,   3,   0, 
-      0,   0,   6,   4,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      6,   0,   0,   0, 230,  10, 
-     16,   0,   6,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   3,   0,   0,   0, 
-      6, 132,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  35,   0, 
-      0,  10,  66,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   6,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  35,   0,   0,  10, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      6,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-    167,   0,   0, 139,   2,  67, 
-      0, 128, 131, 153,  25,   0, 
-    194,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 228,  17,   0,   0,   0, 
-      0,   0,  54,   0,   0,   5, 
-     50,   0,  16,   0,   6,   0, 
-      0,   0, 230,  10,  16,   0, 
-      3,   0,   0,   0, 201,   0, 
-      0,   5,  18,   0,  16,   0, 
-      4,   0,   0,   0,  70,   4, 
-     16,   0,   6,   0,   0,   0, 
-     18,   0,   0,   1,  32,   0, 
-      0,   8,  66,   0,  16,   0, 
-      3,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   2,   0,   0,   0, 
-     31,   0,   4,   3,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     55,   0,   0,  10,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10, 128,  32,   0,   0,   0, 
-      0,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0, 128,   1,  64,   0,   0, 
-      0,   0,   0,  63,   0,   0, 
-      0,   7, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   0,   0,   0,   0, 
-    166,  10,  16,   0,   3,   0, 
-      0,   0,  49,   0,   0,   7, 
-     18,   0,  16,   0,   6,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8,  34,   0, 
-     16,   0,   6,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   1,   0, 
-      0,   0,  14,   0,   0,   7, 
-     66,   0,  16,   0,   6,   0, 
-      0,   0,  26,   0,  16,   0, 
-      6,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  66,   0, 
-     16,   0,   6,   0,   0,   0, 
-     42,   0,  16,   0,   6,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   6,   0, 
-      0,   0,  42,   0,  16,   0, 
-      6,   0,   0,   0,  50,   0, 
-      0,  10,  34,   0,  16,   0, 
-      6,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      6,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   6,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   6,   0, 
-      0,   0,  42,   0,  16,   0, 
-      6,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7, 130,   0, 
-     16,   0,   6,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16,   0, 
-      6,   0,   0,   0,   0,   0, 
-      0,   8,  34,   0,  16,   0, 
-      6,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     26,   0,  16, 128,  65,   0, 
-      0,   0,   6,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   6,   0,   0,   0, 
-     42,   0,  16,   0,   6,   0, 
-      0,   0,  26,   0,  16,   0, 
-      6,   0,   0,   0,  58,   0, 
-     16,   0,   6,   0,   0,   0, 
-     49,   0,   0,   7,  66,   0, 
-     16,   0,   6,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8, 130,   0,  16,   0, 
-      6,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     14,   0,   0,   7,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   6,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  18,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   0,   0,   5,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  50,   0,   0,  10, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   6,   0,   0,   0, 
-      1,   0,   0,   7, 130,   0, 
-     16,   0,   6,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   3,   0,   0,   0, 
-      0,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   6,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   6,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   6,   0,   0,   0, 
-     26,   0,  16,   0,   6,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  49,   0, 
-      0,   7,  18,   0,  16,   0, 
-      6,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-     34,   0,  16,   0,   6,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16, 128,  65,   0,   0,   0, 
-      2,   0,   0,   0,  14,   0, 
-      0,   7,  66,   0,  16,   0, 
-      6,   0,   0,   0,  26,   0, 
-     16,   0,   6,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     66,   0,  16,   0,   6,   0, 
-      0,   0,  42,   0,  16,   0, 
-      6,   0,   0,   0,  86,   0, 
-      0,   5, 130,   0,  16,   0, 
-      6,   0,   0,   0,  42,   0, 
-     16,   0,   6,   0,   0,   0, 
-     50,   0,   0,  10,  34,   0, 
-     16,   0,   6,   0,   0,   0, 
-     58,   0,  16, 128,  65,   0, 
-      0,   0,   6,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      6,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      6,   0,   0,   0,  42,   0, 
-     16,   0,   6,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   7, 
-    130,   0,  16,   0,   6,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   6,   0,   0,   0, 
-      0,   0,   0,   8,  34,   0, 
-     16,   0,   6,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   6,   0, 
-      0,   0,  55,   0,   0,   9, 
-     34,   0,  16,   0,   6,   0, 
-      0,   0,  42,   0,  16,   0, 
-      6,   0,   0,   0,  26,   0, 
-     16,   0,   6,   0,   0,   0, 
-     58,   0,  16,   0,   6,   0, 
-      0,   0,  49,   0,   0,   7, 
-     66,   0,  16,   0,   6,   0, 
-      0,   0,  58,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      0,   0,   0,   8, 130,   0, 
-     16,   0,   6,   0,   0,   0, 
-     58,   0,  16, 128,  65,   0, 
-      0,   0,   0,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  14,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      6,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  50,   0, 
-      0,  10, 130,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   6,   0, 
-      0,   0,   1,   0,   0,   7, 
-    130,   0,  16,   0,   6,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  58,   0,  16, 128, 
-     65,   0,   0,   0,   3,   0, 
-      0,   0,   0,   0,   0,   7, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     55,   0,   0,   9, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   6,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   6,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     55,   0,   0,   9, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   6,   0, 
-      0,   0,  26,   0,  16,   0, 
-      6,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   3,   0,   0,   0, 
-    166,  14,  16,   0,   3,   0, 
-      0,   0,  86,   0,   0,   5, 
-    194,   0,  16,   0,   6,   0, 
-      0,   0, 246,  11,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5, 194,   0,  16,   0, 
-      3,   0,   0,   0,   6,   4, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   6,   0,   0,   0, 
-    230,  10,  16,   0,   6,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0, 194,   0, 
-     16,   0,   3,   0,   0,   0, 
-    166,  14,  16,   0,   3,   0, 
-      0,   0,   6, 132,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     35,   0,   0,  10,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   6,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  35,   0, 
-      0,  10,  66,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   6,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  67,   0, 128, 131, 153, 
-     25,   0, 194,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 228,  17,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5,  50,   0,  16,   0, 
-      6,   0,   0,   0, 230,  10, 
-     16,   0,   3,   0,   0,   0, 
-    201,   0,   0,   5,  18,   0, 
-     16,   0,   4,   0,   0,   0, 
-     70,   4,  16,   0,   6,   0, 
-      0,   0,  18,   0,   0,   1, 
-     54,   0,   0,   5,  18,   0, 
-     16,   0,   4,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  21,   0,   0,   1, 
-     21,   0,   0,   1,  21,   0, 
-      0,   1,  31,   0,   0,   4, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     29,   0,   0,  10, 194,   0, 
-     16,   0,   3,   0,   0,   0, 
-    246,   7,  16,   0,   2,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,  28,   0, 
-      0,   5, 242,   0,  16,   0, 
-      6,   0,   0,   0, 118,  13, 
-     16,   0,   2,   0,   0,   0, 
-     79,   0,   0,   8,  50,   0, 
-     16,   0,   6,   0,   0,   0, 
-     70,   0,  16,   0,   6,   0, 
-      0,   0, 182, 143,  32,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   6,   0,   0,   0, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   6,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   6,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-     50,   0,  16,   0,   6,   0, 
-      0,   0,  70,   0,  16,   0, 
-      6,   0,   0,   0,  70, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7, 130,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   6,   0,   0,   0, 
-     10,   0,  16,   0,   6,   0, 
-      0,   0,  35,   0,   0,  10, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      6,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     35,   0,   0,  10, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   6,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0, 167,   0, 
-      0, 139,   2,  67,   0, 128, 
-    131, 153,  25,   0,  50,   0, 
-     16,   0,   6,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  70, 224, 
-     17,   0,   0,   0,   0,   0, 
-     54,   0,   0,   5, 194,   0, 
-     16,   0,   6,   0,   0,   0, 
-      6,   4,  16,   0,   6,   0, 
-      0,   0, 201,   0,   0,   5, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0, 230,  14,  16,   0, 
-      6,   0,   0,   0,   1,   0, 
-      0,   7,  18,   0,  16,   0, 
-      6,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  18,   0,   0,   1, 
-     32,   0,   0,   8,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,  31,   0,   4,   3, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  30,   0,   0,  11, 
-    194,   0,  16,   0,   3,   0, 
-      0,   0, 166, 142,  32,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0, 255, 255, 255, 255, 
-    255, 255, 255, 255,  86,   0, 
-      0,   5, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   3,   0,   0,   0, 
-     52,   0,   0,  10,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    214,   5,  16,   0,   2,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,  51,   0, 
-      0,   7, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   3,   0,   0,   0, 
-      6,   4,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-    194,   0,  16,   0,   3,   0, 
-      0,   0, 166,  14,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  35,   0, 
-      0,  10,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  35,   0,   0,  10, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-    167,   0,   0, 139,   2,  67, 
-      0, 128, 131, 153,  25,   0, 
-    194,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 228,  17,   0,   0,   0, 
-      0,   0,  54,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0, 230,  10,  16,   0, 
-      3,   0,   0,   0, 201,   0, 
-      0,   5,  18,   0,  16,   0, 
-      6,   0,   0,   0,  70,   4, 
-     16,   0,   7,   0,   0,   0, 
-     18,   0,   0,   1,  32,   0, 
-      0,   8,  66,   0,  16,   0, 
-      3,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   2,   0,   0,   0, 
-     31,   0,   4,   3,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     55,   0,   0,  10,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10, 128,  32,   0,   0,   0, 
-      0,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0, 128,   1,  64,   0,   0, 
-      0,   0,   0,  63,   0,   0, 
-      0,   7, 194,   0,  16,   0, 
-      3,   0,   0,   0, 246,  11, 
-     16,   0,   0,   0,   0,   0, 
-    166,  10,  16,   0,   3,   0, 
-      0,   0,  49,   0,   0,   7, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0, 214,   5,  16,   0, 
-      2,   0,   0,   0,  86,   5, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   5,  16,   0,   0,   0, 
-      0,   0,  86,  13,  16, 128, 
-     65,   0,   0,   0,   2,   0, 
-      0,   0,  14,   0,   0,   7, 
-     50,   0,  16,   0,   8,   0, 
-      0,   0, 230,  10,  16,   0, 
-      7,   0,   0,   0, 230,  10, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   8,   0,   0,   0, 
-     70,   0,  16,   0,   8,   0, 
-      0,   0,  86,   0,   0,   5, 
-    194,   0,  16,   0,   8,   0, 
-      0,   0,   6,   4,  16,   0, 
-      8,   0,   0,   0,  50,   0, 
-      0,  10, 194,   0,  16,   0, 
-      7,   0,   0,   0, 166,  14, 
-     16, 128,  65,   0,   0,   0, 
-      8,   0,   0,   0, 166,  14, 
-     16,   0,   3,   0,   0,   0, 
-    166,  14,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,  10, 
-     50,   0,  16,   0,   8,   0, 
-      0,   0,  70,   0,  16,   0, 
-      8,   0,   0,   0,   2,  64, 
-      0,   0,   1,   0,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   7, 194,   0, 
-     16,   0,   8,   0,   0,   0, 
-     86,   5,  16,   0,   0,   0, 
-      0,   0, 166,  14,  16,   0, 
-      7,   0,   0,   0,   0,   0, 
-      0,   8, 194,   0,  16,   0, 
-      7,   0,   0,   0, 246,  11, 
-     16,   0,   0,   0,   0,   0, 
-    166,  14,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-      6,   4,  16,   0,   8,   0, 
-      0,   0, 166,  14,  16,   0, 
-      7,   0,   0,   0, 166,  14, 
-     16,   0,   8,   0,   0,   0, 
-     49,   0,   0,   7,  50,   0, 
-     16,   0,   8,   0,   0,   0, 
-    182,  15,  16,   0,   0,   0, 
-      0,   0, 214,   5,  16,   0, 
-      2,   0,   0,   0,   0,   0, 
-      0,   8, 194,   0,  16,   0, 
-      8,   0,   0,   0, 246,  11, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0,  86,  13, 
-     16,   0,   2,   0,   0,   0, 
-     14,   0,   0,   7,  50,   0, 
-     16,   0,   9,   0,   0,   0, 
-    230,  10,  16,   0,   8,   0, 
-      0,   0, 230,  10,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      9,   0,   0,   0,  70,   0, 
-     16,   0,   9,   0,   0,   0, 
-     86,   0,   0,   5, 194,   0, 
-     16,   0,   9,   0,   0,   0, 
-      6,   4,  16,   0,   9,   0, 
-      0,   0,  50,   0,   0,  10, 
-    194,   0,  16,   0,   3,   0, 
-      0,   0, 166,  14,  16, 128, 
-     65,   0,   0,   0,   9,   0, 
-      0,   0, 166,  14,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   8,   0,   0,   0, 
-      1,   0,   0,  10, 194,   0, 
-     16,   0,   8,   0,   0,   0, 
-      6,   4,  16,   0,   9,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  50,   0,  16,   0, 
-      9,   0,   0,   0, 182,  15, 
-     16,   0,   0,   0,   0,   0, 
-    230,  10,  16, 128,  65,   0, 
-      0,   0,   3,   0,   0,   0, 
-      0,   0,   0,   7, 194,   0, 
-     16,   0,   3,   0,   0,   0, 
-     86,   5,  16,   0,   0,   0, 
-      0,   0, 166,  14,  16,   0, 
-      3,   0,   0,   0,  55,   0, 
-      0,   9, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   8,   0,   0,   0, 
-    166,  14,  16,   0,   3,   0, 
-      0,   0,   6,   4,  16,   0, 
-      9,   0,   0,   0,  55,   0, 
-      0,   9, 194,   0,  16,   0, 
-      3,   0,   0,   0,   6,   4, 
-     16,   0,   8,   0,   0,   0, 
-    166,  14,  16,   0,   3,   0, 
-      0,   0,  86,  13,  16,   0, 
-      2,   0,   0,   0,  55,   0, 
-      0,   9, 194,   0,  16,   0, 
-      3,   0,   0,   0,   6,   4, 
-     16,   0,   7,   0,   0,   0, 
-    166,  14,  16,   0,   7,   0, 
-      0,   0, 166,  14,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   3,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  35,   0, 
-      0,  10,  66,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  67,   0, 128, 131, 153, 
-     25,   0, 194,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 228,  17,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0, 230,  10, 
-     16,   0,   3,   0,   0,   0, 
-    201,   0,   0,   5,  18,   0, 
-     16,   0,   6,   0,   0,   0, 
-     70,   4,  16,   0,   7,   0, 
-      0,   0,  18,   0,   0,   1, 
-     54,   0,   0,   5,  18,   0, 
-     16,   0,   6,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  21,   0,   0,   1, 
-     21,   0,   0,   1,  21,   0, 
-      0,   1,  31,   0,   0,   4, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     29,   0,   0,  10, 242,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,  14,  16,   0,   2,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,  28,   0, 
-      0,   5, 242,   0,  16,   0, 
-      8,   0,   0,   0,  70,  14, 
-     16,   0,   2,   0,   0,   0, 
-     79,   0,   0,   8, 242,   0, 
-     16,   0,   8,   0,   0,   0, 
-     70,  14,  16,   0,   8,   0, 
-      0,   0, 182, 143,  32,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   0,   1,   0,   0,   7, 
-    210,   0,  16,   0,   7,   0, 
-      0,   0, 166,   3,  16,   0, 
-      7,   0,   0,   0, 166,   3, 
-     16,   0,   8,   0,   0,   0, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   8,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   8,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5, 194,   0,  16,   0, 
-      8,   0,   0,   0,  86,   1, 
-     16,   0,   2,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      8,   0,   0,   0,  70,   0, 
-     16,   0,   8,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   8,   0, 
-      0,   0,  10,   0,  16,   0, 
-      8,   0,   0,   0,  35,   0, 
-      0,  10, 130,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   8,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  35,   0,   0,  10, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      8,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-    167,   0,   0, 139,   2,  67, 
-      0, 128, 131, 153,  25,   0, 
-     50,   0,  16,   0,   8,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     70, 224,  17,   0,   0,   0, 
-      0,   0,  54,   0,   0,   5, 
-    194,   0,  16,   0,   8,   0, 
-      0,   0,   6,   4,  16,   0, 
-      8,   0,   0,   0, 201,   0, 
-      0,   5, 130,   0,  16,   0, 
-      3,   0,   0,   0, 230,  14, 
-     16,   0,   8,   0,   0,   0, 
-      1,   0,   0,   7,  18,   0, 
-     16,   0,   8,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  29,   0, 
-      0,  10, 194,   0,  16,   0, 
-      3,   0,   0,   0,   6,   4, 
-     16,   0,   1,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   1,   0,   0,   7, 
-    114,   0,  16,   0,   7,   0, 
-      0,   0, 166,  10,  16,   0, 
-      3,   0,   0,   0, 134,   3, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   9,   0,   0,   0, 
-     70,   0,  16,   0,   1,   0, 
-      0,   0,  79,   0,   0,   8, 
-     50,   0,  16,   0,   9,   0, 
-      0,   0,  70,   0,  16,   0, 
-      9,   0,   0,   0, 230, 138, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   1,   0, 
-      0,   7, 114,   0,  16,   0, 
-      7,   0,   0,   0,  70,   2, 
-     16,   0,   7,   0,   0,   0, 
-      6,   0,  16,   0,   9,   0, 
-      0,   0,  28,   0,   0,   5, 
-    194,   0,  16,   0,   9,   0, 
-      0,   0,   6,   4,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5, 130,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,  10,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0, 194,   0, 
-     16,   0,   9,   0,   0,   0, 
-    166,  14,  16,   0,   9,   0, 
-      0,   0,   6, 132,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     66,   0,  16,   0,   9,   0, 
-      0,   0,  58,   0,  16,   0, 
-      9,   0,   0,   0,  42,   0, 
-     16,   0,   9,   0,   0,   0, 
-     35,   0,   0,  10, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      9,   0,   0,   0,  35,   0, 
-      0,  10, 130,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,  10,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  67,   0, 128, 131, 153, 
-     25,   0, 194,   0,  16,   0, 
-      9,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 228,  17,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5,  50,   0,  16,   0, 
-     10,   0,   0,   0, 230,  10, 
-     16,   0,   9,   0,   0,   0, 
-    201,   0,   0,   5, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   4,  16,   0,  10,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   5,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,   0,   0,   7, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      9,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5, 146,   0, 
-     16,   0,   7,   0,   0,   0, 
-      6,   4,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   9,   0, 
-      0,   0,  70,   0,  16,   0, 
-      1,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-    146,   0,  16,   0,   7,   0, 
-      0,   0,   6,  12,  16,   0, 
-      7,   0,   0,   0,   6, 132, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7, 130,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  35,   0,   0,  10, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     35,   0,   0,  10, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   9,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0, 167,   0, 
-      0, 139,   2,  67,   0, 128, 
-    131, 153,  25,   0, 146,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,   6, 228, 
-     17,   0,   0,   0,   0,   0, 
-     54,   0,   0,   5,  50,   0, 
-     16,   0,   9,   0,   0,   0, 
-    198,   0,  16,   0,   7,   0, 
-      0,   0, 201,   0,   0,   5, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  70,   4,  16,   0, 
-      9,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      4,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-    194,   0,  16,   0,   3,   0, 
-      0,   0,   6,   4,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  18,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     28,   0,   0,   5, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0, 194,   0, 
-     16,   0,   3,   0,   0,   0, 
-    166,  14,  16,   0,   3,   0, 
-      0,   0,   6, 132,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     35,   0,   0,  10,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  35,   0, 
-      0,  10,  66,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  67,   0, 128, 131, 153, 
-     25,   0, 194,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 228,  17,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5,  50,   0,  16,   0, 
-      9,   0,   0,   0, 230,  10, 
-     16,   0,   3,   0,   0,   0, 
-    201,   0,   0,   5,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     70,   4,  16,   0,   9,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   6,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   3,   0,   0,   0, 
-      6,   4,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   3,   0,   0,   0, 
-      6, 132,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  35,   0, 
-      0,  10,  66,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  35,   0,   0,  10, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-    167,   0,   0, 139,   2,  67, 
-      0, 128, 131, 153,  25,   0, 
-    194,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 228,  17,   0,   0,   0, 
-      0,   0,  54,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0, 230,  10,  16,   0, 
-      3,   0,   0,   0, 201,   0, 
-      0,   5,  66,   0,  16,   0, 
-      3,   0,   0,   0,  70,   4, 
-     16,   0,   7,   0,   0,   0, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   8,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  18,   0, 
-      0,   1,  32,   0,   0,   8, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,  31,   0, 
-      4,   3,  42,   0,  16,   0, 
-      3,   0,   0,   0,  30,   0, 
-      0,  11,  50,   0,  16,   0, 
-      7,   0,   0,   0, 230, 138, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0, 255, 255, 255, 255, 
-    255, 255, 255, 255,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     86,   0,   0,   5,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  52,   0,   0,  10, 
-    194,   0,  16,   0,   7,   0, 
-      0,   0,  86,   1,  16,   0, 
-      2,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0, 230,  10,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-      6,   4,  16,   0,   3,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-    166,  14,  16,   0,   7,   0, 
-      0,   0,   6, 132,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  35,   0, 
-      0,  10, 130,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  67,   0, 128, 131, 153, 
-     25,   0,  50,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  70, 224,  17,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5, 194,   0,  16,   0, 
-      7,   0,   0,   0,   6,   4, 
-     16,   0,   7,   0,   0,   0, 
-    201,   0,   0,   5,  18,   0, 
-     16,   0,   8,   0,   0,   0, 
-    230,  14,  16,   0,   7,   0, 
-      0,   0,  18,   0,   0,   1, 
-     32,   0,   0,   8, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,  64,   0,   0,   2,   0, 
-      0,   0,  31,   0,   4,   3, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  55,   0,   0,  10, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  10, 128,  32,   0, 
-      0,   0,   0,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0, 128,   1,  64, 
-      0,   0,   0,   0,   0,  63, 
-      0,   0,   0,   7,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    182,  15,  16,   0,   0,   0, 
-      0,   0, 246,  15,  16,   0, 
-      3,   0,   0,   0,  49,   0, 
-      0,   7, 194,   0,  16,   0, 
-      7,   0,   0,   0,  86,   1, 
-     16,   0,   2,   0,   0,   0, 
-     86,   5,  16,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-     50,   0,  16,   0,   9,   0, 
-      0,   0,  86,   5,  16,   0, 
-      0,   0,   0,   0,  22,   5, 
-     16, 128,  65,   0,   0,   0, 
-      2,   0,   0,   0,  14,   0, 
-      0,   7, 194,   0,  16,   0, 
-      9,   0,   0,   0,   6,   4, 
-     16,   0,   9,   0,   0,   0, 
-      6,   4,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-    194,   0,  16,   0,   9,   0, 
-      0,   0, 166,  14,  16,   0, 
-      9,   0,   0,   0,  86,   0, 
-      0,   5,  50,   0,  16,   0, 
-     10,   0,   0,   0, 230,  10, 
-     16,   0,   9,   0,   0,   0, 
-     50,   0,   0,  10,  50,   0, 
-     16,   0,   9,   0,   0,   0, 
-     70,   0,  16, 128,  65,   0, 
-      0,   0,  10,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      9,   0,   0,   0,   1,   0, 
-      0,  10, 194,   0,  16,   0, 
-      9,   0,   0,   0, 166,  14, 
-     16,   0,   9,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   7, 
-     50,   0,  16,   0,  10,   0, 
-      0,   0,  86,   5,  16,   0, 
-      0,   0,   0,   0,  70,   0, 
-     16,   0,   9,   0,   0,   0, 
-      0,   0,   0,   8,  50,   0, 
-     16,   0,   9,   0,   0,   0, 
-    182,  15,  16,   0,   0,   0, 
-      0,   0,  70,   0,  16, 128, 
-     65,   0,   0,   0,   9,   0, 
-      0,   0,  55,   0,   0,   9, 
-     50,   0,  16,   0,   9,   0, 
-      0,   0, 230,  10,  16,   0, 
-      9,   0,   0,   0,  70,   0, 
-     16,   0,   9,   0,   0,   0, 
-     70,   0,  16,   0,  10,   0, 
-      0,   0,  49,   0,   0,   7, 
-    194,   0,  16,   0,   9,   0, 
-      0,   0, 246,  11,  16,   0, 
-      0,   0,   0,   0,  86,   1, 
-     16,   0,   2,   0,   0,   0, 
-      0,   0,   0,   8,  50,   0, 
-     16,   0,  10,   0,   0,   0, 
-    182,  15,  16, 128,  65,   0, 
-      0,   0,   0,   0,   0,   0, 
-     22,   5,  16,   0,   2,   0, 
-      0,   0,  14,   0,   0,   7, 
-    194,   0,  16,   0,  10,   0, 
-      0,   0,   6,   4,  16,   0, 
-     10,   0,   0,   0,   6,   4, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,  10,   0,   0,   0, 
-    166,  14,  16,   0,  10,   0, 
-      0,   0,  86,   0,   0,   5, 
-     50,   0,  16,   0,  11,   0, 
-      0,   0, 230,  10,  16,   0, 
-     10,   0,   0,   0,  50,   0, 
-      0,  10,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16, 128,  65,   0,   0,   0, 
-     11,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,  10,   0, 
-      0,   0,   1,   0,   0,  10, 
-     50,   0,  16,   0,  10,   0, 
-      0,   0, 230,  10,  16,   0, 
-     10,   0,   0,   0,   2,  64, 
-      0,   0,   1,   0,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8, 194,   0, 
-     16,   0,  10,   0,   0,   0, 
-    246,  11,  16,   0,   0,   0, 
-      0,   0,   6,   4,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,   0,   0,   0,   7, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  86,   5,  16,   0, 
-      0,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,  10,   0, 
-      0,   0,  70,   0,  16,   0, 
-      7,   0,   0,   0, 230,  10, 
-     16,   0,  10,   0,   0,   0, 
-     55,   0,   0,   9,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    230,  10,  16,   0,   9,   0, 
-      0,   0,  70,   0,  16,   0, 
-      7,   0,   0,   0,  22,   5, 
-     16,   0,   2,   0,   0,   0, 
-     55,   0,   0,   9,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    230,  10,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      9,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-    194,   0,  16,   0,   7,   0, 
-      0,   0,   6,   4,  16,   0, 
-      3,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-    194,   0,  16,   0,   7,   0, 
-      0,   0, 166,  14,  16,   0, 
-      7,   0,   0,   0,   6, 132, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0, 167,   0, 
-      0, 139,   2,  67,   0, 128, 
-    131, 153,  25,   0,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  70, 224, 
-     17,   0,   0,   0,   0,   0, 
-     54,   0,   0,   5, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-      6,   4,  16,   0,   7,   0, 
-      0,   0, 201,   0,   0,   5, 
-     18,   0,  16,   0,   8,   0, 
-      0,   0, 230,  14,  16,   0, 
-      7,   0,   0,   0,  18,   0, 
-      0,   1,  54,   0,   0,   5, 
-     18,   0,  16,   0,   8,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  21,   0, 
-      0,   1,  21,   0,   0,   1, 
-     31,   0,   4,   3,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     30,   0,   0,  11,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    182, 143,  32,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      2,  64,   0,   0, 255, 255, 
-    255, 255, 255, 255, 255, 255, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,  86,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      7,   0,   0,   0,  52,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  51,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  52,   0,   0,   7, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5, 130,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-    166,  14,  16,   0,   7,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  67,   0, 128, 131, 153, 
-     25,   0,  50,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  70, 224,  17,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5, 194,   0,  16,   0, 
-      7,   0,   0,   0,   6,   4, 
-     16,   0,   7,   0,   0,   0, 
-    201,   0,   0,   5,  66,   0, 
-     16,   0,   5,   0,   0,   0, 
-    230,  14,  16,   0,   7,   0, 
-      0,   0,  18,   0,   0,   1, 
-     32,   0,   0,   8,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,  64,   0,   0,   2,   0, 
-      0,   0,  31,   0,   4,   3, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  55,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10, 128,  32,   0, 
-      0,   0,   0,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0, 128,   1,  64, 
-      0,   0,   0,   0,   0,  63, 
-      0,   0,   0,   7,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    230,  10,  16,   0,   0,   0, 
-      0,   0,  86,   5,  16,   0, 
-      2,   0,   0,   0,  49,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      2,   0,   0,   0,  14,   0, 
-      0,   7,  66,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  86,   0, 
-      0,   5, 130,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     50,   0,   0,  10, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-      0,   0,   0,   8, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  58,   0,  16, 128, 
-     65,   0,   0,   0,   3,   0, 
-      0,   0,  55,   0,   0,   9, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  49,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-      0,   0,   0,   8, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   0,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  14,   0,   0,   7, 
-     18,   0,  16,   0,   9,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  86,   0,   0,   5, 
-     34,   0,  16,   0,   9,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  50,   0, 
-      0,  10,  18,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16, 128,  65,   0,   0,   0, 
-      9,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,   0,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  49,   0,   0,   7, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   1,   0, 
-      0,   0,  14,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  50,   0, 
-      0,  10,  18,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     49,   0,   0,   7,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8, 130,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     14,   0,   0,   7,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5,  18,   0,  16,   0, 
-      9,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     86,   0,   0,   5,  34,   0, 
-     16,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  50,   0,   0,  10, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      9,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     26,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-      0,   0,   0,   7,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  55,   0, 
-      0,   9, 130,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5, 130,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-    166,  14,  16,   0,   7,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  67,   0, 128, 131, 153, 
-     25,   0,  50,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  70, 224,  17,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5, 194,   0,  16,   0, 
-      7,   0,   0,   0,   6,   4, 
-     16,   0,   7,   0,   0,   0, 
-    201,   0,   0,   5,  66,   0, 
-     16,   0,   5,   0,   0,   0, 
-    230,  14,  16,   0,   7,   0, 
-      0,   0,  18,   0,   0,   1, 
-     54,   0,   0,   5,  66,   0, 
-     16,   0,   5,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  21,   0,   0,   1, 
-     21,   0,   0,   1,  31,   0, 
-      4,   3,  42,   0,  16,   0, 
-      3,   0,   0,   0,  30,   0, 
-      0,  11,  50,   0,  16,   0, 
-      7,   0,   0,   0, 230, 138, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0, 255, 255, 255, 255, 
-    255, 255, 255, 255,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     86,   0,   0,   5,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  52,   0,   0,  10, 
-    194,   0,  16,   0,   7,   0, 
-      0,   0,   6,   4,  16,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0, 230,  10,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-      6,   4,  16,   0,   3,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-    166,  14,  16,   0,   7,   0, 
-      0,   0,   6, 132,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  67,   0, 128, 131, 153, 
-     25,   0,  50,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  70, 224,  17,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5, 194,   0,  16,   0, 
-      7,   0,   0,   0,   6,   4, 
-     16,   0,   7,   0,   0,   0, 
-    201,   0,   0,   5,  66,   0, 
-     16,   0,   4,   0,   0,   0, 
-    230,  14,  16,   0,   7,   0, 
-      0,   0,  18,   0,   0,   1, 
-     32,   0,   0,   8,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,  64,   0,   0,   2,   0, 
-      0,   0,  31,   0,   4,   3, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  55,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10, 128,  32,   0, 
-      0,   0,   0,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0, 128,   1,  64, 
-      0,   0,   0,   0,   0,  63, 
-      0,   0,   0,   7,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    182,  15,  16,   0,   0,   0, 
-      0,   0,  86,   5,  16,   0, 
-      2,   0,   0,   0,  49,   0, 
-      0,   7, 194,   0,  16,   0, 
-      7,   0,   0,   0,   6,   4, 
-     16,   0,   1,   0,   0,   0, 
-     86,   5,  16,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-     50,   0,  16,   0,   9,   0, 
-      0,   0,  86,   5,  16,   0, 
-      0,   0,   0,   0,  70,   0, 
-     16, 128,  65,   0,   0,   0, 
-      1,   0,   0,   0,  14,   0, 
-      0,   7, 194,   0,  16,   0, 
-      9,   0,   0,   0,   6,   4, 
-     16,   0,   9,   0,   0,   0, 
-      6,   4,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-    194,   0,  16,   0,   9,   0, 
-      0,   0, 166,  14,  16,   0, 
-      9,   0,   0,   0,  86,   0, 
-      0,   5,  50,   0,  16,   0, 
-     10,   0,   0,   0, 230,  10, 
-     16,   0,   9,   0,   0,   0, 
-     50,   0,   0,  10,  50,   0, 
-     16,   0,   9,   0,   0,   0, 
-     70,   0,  16, 128,  65,   0, 
-      0,   0,  10,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      9,   0,   0,   0,   1,   0, 
-      0,  10, 194,   0,  16,   0, 
-      9,   0,   0,   0, 166,  14, 
-     16,   0,   9,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   7, 
-     50,   0,  16,   0,  10,   0, 
-      0,   0,  86,   5,  16,   0, 
-      0,   0,   0,   0,  70,   0, 
-     16,   0,   9,   0,   0,   0, 
-      0,   0,   0,   8,  50,   0, 
-     16,   0,   9,   0,   0,   0, 
-    182,  15,  16,   0,   0,   0, 
-      0,   0,  70,   0,  16, 128, 
-     65,   0,   0,   0,   9,   0, 
-      0,   0,  55,   0,   0,   9, 
-     50,   0,  16,   0,   9,   0, 
-      0,   0, 230,  10,  16,   0, 
-      9,   0,   0,   0,  70,   0, 
-     16,   0,   9,   0,   0,   0, 
-     70,   0,  16,   0,  10,   0, 
-      0,   0,  49,   0,   0,   7, 
-    194,   0,  16,   0,   9,   0, 
-      0,   0, 246,  11,  16,   0, 
-      0,   0,   0,   0,   6,   4, 
-     16,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8,  50,   0, 
-     16,   0,  10,   0,   0,   0, 
-    182,  15,  16, 128,  65,   0, 
-      0,   0,   0,   0,   0,   0, 
-     70,   0,  16,   0,   1,   0, 
-      0,   0,  14,   0,   0,   7, 
-    194,   0,  16,   0,  10,   0, 
-      0,   0,   6,   4,  16,   0, 
-     10,   0,   0,   0,   6,   4, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,  10,   0,   0,   0, 
-    166,  14,  16,   0,  10,   0, 
-      0,   0,  86,   0,   0,   5, 
-     50,   0,  16,   0,  11,   0, 
-      0,   0, 230,  10,  16,   0, 
-     10,   0,   0,   0,  50,   0, 
-      0,  10,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16, 128,  65,   0,   0,   0, 
-     11,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,  10,   0, 
-      0,   0,   1,   0,   0,  10, 
-     50,   0,  16,   0,  10,   0, 
-      0,   0, 230,  10,  16,   0, 
-     10,   0,   0,   0,   2,  64, 
-      0,   0,   1,   0,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8, 194,   0, 
-     16,   0,  10,   0,   0,   0, 
-    246,  11,  16,   0,   0,   0, 
-      0,   0,   6,   4,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,   0,   0,   0,   7, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  86,   5,  16,   0, 
-      0,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,  10,   0, 
-      0,   0,  70,   0,  16,   0, 
-      7,   0,   0,   0, 230,  10, 
-     16,   0,  10,   0,   0,   0, 
-     55,   0,   0,   9,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    230,  10,  16,   0,   9,   0, 
-      0,   0,  70,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   1,   0,   0,   0, 
-     55,   0,   0,   9,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    230,  10,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      9,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-    194,   0,  16,   0,   7,   0, 
-      0,   0,   6,   4,  16,   0, 
-      3,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-    194,   0,  16,   0,   7,   0, 
-      0,   0, 166,  14,  16,   0, 
-      7,   0,   0,   0,   6, 132, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0, 167,   0, 
-      0, 139,   2,  67,   0, 128, 
-    131, 153,  25,   0,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  70, 224, 
-     17,   0,   0,   0,   0,   0, 
-     54,   0,   0,   5, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-      6,   4,  16,   0,   7,   0, 
-      0,   0, 201,   0,   0,   5, 
-     66,   0,  16,   0,   4,   0, 
-      0,   0, 230,  14,  16,   0, 
-      7,   0,   0,   0,  18,   0, 
-      0,   1,  54,   0,   0,   5, 
-     66,   0,  16,   0,   4,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  21,   0, 
-      0,   1,  21,   0,   0,   1, 
-     31,   0,   4,   3,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     30,   0,   0,  11,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    182, 143,  32,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      2,  64,   0,   0, 255, 255, 
-    255, 255, 255, 255, 255, 255, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,  86,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      7,   0,   0,   0,  52,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  51,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  52,   0,   0,   7, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5, 130,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-    166,  14,  16,   0,   7,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  67,   0, 128, 131, 153, 
-     25,   0,  50,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  70, 224,  17,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5, 194,   0,  16,   0, 
-      7,   0,   0,   0,   6,   4, 
-     16,   0,   7,   0,   0,   0, 
-    201,   0,   0,   5,  66,   0, 
-     16,   0,   6,   0,   0,   0, 
-    230,  14,  16,   0,   7,   0, 
-      0,   0,  18,   0,   0,   1, 
-     32,   0,   0,   8,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,  64,   0,   0,   2,   0, 
-      0,   0,  31,   0,   4,   3, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  55,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10, 128,  32,   0, 
-      0,   0,   0,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0, 128,   1,  64, 
-      0,   0,   0,   0,   0,  63, 
-      0,   0,   0,   7,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    230,  10,  16,   0,   0,   0, 
-      0,   0,  86,   5,  16,   0, 
-      2,   0,   0,   0,  49,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      2,   0,   0,   0,  14,   0, 
-      0,   7,  66,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  86,   0, 
-      0,   5, 130,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     50,   0,   0,  10, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-      0,   0,   0,   8, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  58,   0,  16, 128, 
-     65,   0,   0,   0,   3,   0, 
-      0,   0,  55,   0,   0,   9, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  49,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16,   0,   2,   0,   0,   0, 
-      0,   0,   0,   8, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   0,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,  14,   0,   0,   7, 
-     18,   0,  16,   0,   9,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  86,   0,   0,   5, 
-     34,   0,  16,   0,   9,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  50,   0, 
-      0,  10,  18,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16, 128,  65,   0,   0,   0, 
-      9,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,   0,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   2,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  49,   0,   0,   7, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   1,   0, 
-      0,   0,  14,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  50,   0, 
-      0,  10,  18,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     49,   0,   0,   7,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8, 130,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     14,   0,   0,   7,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5,  18,   0,  16,   0, 
-      9,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     86,   0,   0,   5,  34,   0, 
-     16,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  50,   0,   0,  10, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      9,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     26,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-      0,   0,   0,   7,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  55,   0, 
-      0,   9, 130,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5, 130,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-    166,  14,  16,   0,   7,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  67,   0, 128, 131, 153, 
-     25,   0,  50,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  70, 224,  17,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5, 194,   0,  16,   0, 
-      7,   0,   0,   0,   6,   4, 
-     16,   0,   7,   0,   0,   0, 
-    201,   0,   0,   5,  66,   0, 
-     16,   0,   6,   0,   0,   0, 
-    230,  14,  16,   0,   7,   0, 
-      0,   0,  18,   0,   0,   1, 
-     54,   0,   0,   5,  66,   0, 
-     16,   0,   6,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  21,   0,   0,   1, 
-     21,   0,   0,   1,  31,   0, 
-      4,   3,  42,   0,  16,   0, 
-      3,   0,   0,   0,  30,   0, 
-      0,  11, 194,   0,  16,   0, 
-      3,   0,   0,   0, 246, 139, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0, 255, 255, 
-    255, 255, 255, 255, 255, 255, 
-     86,   0,   0,   5, 194,   0, 
-     16,   0,   3,   0,   0,   0, 
-    166,  14,  16,   0,   3,   0, 
-      0,   0,  52,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     52,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  51,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     86,   0,   0,   5, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   3,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-    167,   0,   0, 139,   2,  67, 
-      0, 128, 131, 153,  25,   0, 
-    194,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 228,  17,   0,   0,   0, 
-      0,   0,  54,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0, 230,  10,  16,   0, 
-      3,   0,   0,   0, 201,   0, 
-      0,   5,  66,   0,  16,   0, 
-      8,   0,   0,   0,  70,   4, 
-     16,   0,   7,   0,   0,   0, 
-     18,   0,   0,   1,  32,   0, 
-      0,   8,  34,   0,  16,   0, 
-      2,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   2,   0,   0,   0, 
-     31,   0,   4,   3,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     55,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10, 128,  32,   0,   0,   0, 
-      0,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0, 128,   1,  64,   0,   0, 
-      0,   0,   0,  63,   0,   0, 
-      0,   7, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   0,   0,   0,   0, 
-     86,   5,  16,   0,   2,   0, 
-      0,   0,  49,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   2,   0, 
-      0,   0,  14,   0,   0,   7, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  50,   0, 
-      0,  10,  18,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,   7, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     49,   0,   0,   7,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,   0,   0, 
-      0,   8,  66,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-     14,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5, 130,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   0,   0,   5,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  50,   0,   0,  10, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   9,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8, 130,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   3,   0,   0,   0, 
-      0,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     49,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   1,   0,   0,   0, 
-     14,   0,   0,   7,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  50,   0,   0,  10, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,   0,   0,   7,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   7,  66,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,   0,   0,   0,   8, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  18,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  49,   0, 
-      0,   7,  34,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,   0,   0,   0,   8, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16, 128, 
-     65,   0,   0,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  14,   0, 
-      0,   7, 130,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  86,   0, 
-      0,   5,  18,   0,  16,   0, 
-      9,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     50,   0,   0,  10, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   9,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   8, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      3,   0,   0,   0,   0,   0, 
-      0,   7, 130,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  55,   0,   0,   9, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  55,   0,   0,   9, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  55,   0,   0,   9, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     86,   0,   0,   5, 130,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5, 194,   0,  16,   0, 
-      3,   0,   0,   0, 166,  14, 
-     16,   0,   3,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-    167,   0,   0, 139,   2,  67, 
-      0, 128, 131, 153,  25,   0, 
-    194,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 228,  17,   0,   0,   0, 
-      0,   0,  54,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0, 230,  10,  16,   0, 
-      3,   0,   0,   0, 201,   0, 
-      0,   5,  66,   0,  16,   0, 
-      8,   0,   0,   0,  70,   4, 
-     16,   0,   7,   0,   0,   0, 
-     18,   0,   0,   1,  54,   0, 
-      0,   5,  66,   0,  16,   0, 
-      8,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     21,   0,   0,   1,  21,   0, 
-      0,   1,  21,   0,   0,   1, 
-      0,   0,   0,  10, 194,   0, 
-     16,   0,   3,   0,   0,   0, 
-      6,   0,  16,   0,   1,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0, 128,  63, 
-      0,   0,   0,  64,  31,   0, 
-      0,   4,  58, 128,  32,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,  29,   0,   0,  10, 
-    114,   0,  16,   0,   7,   0, 
-      0,   0, 230,   8,  16,   0, 
-      2,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     28,   0,   0,   5, 114,   0, 
-     16,   0,   9,   0,   0,   0, 
-    230,   8,  16,   0,   2,   0, 
-      0,   0,  79,   0,   0,   8, 
-    114,   0,  16,   0,   9,   0, 
-      0,   0,  70,   2,  16,   0, 
-      9,   0,   0,   0, 246, 143, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   1,   0, 
-      0,   7, 114,   0,  16,   0, 
-      7,   0,   0,   0,  70,   2, 
-     16,   0,   7,   0,   0,   0, 
-     70,   2,  16,   0,   9,   0, 
-      0,   0,  29,   0,   0,   7, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      1,   0,   0,   7, 114,   0, 
-     16,   0,   7,   0,   0,   0, 
-      6,   0,  16,   0,   1,   0, 
-      0,   0,  70,   2,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     79,   0,   0,   8,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   0,   1,   0,   0,   7, 
-    114,   0,  16,   0,   7,   0, 
-      0,   0,  86,   5,  16,   0, 
-      2,   0,   0,   0,  70,   2, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   9,   0,   0,   0, 
-     70,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  66,   0,  16,   0, 
-      9,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      9,   0,   0,   0,  70,   0, 
-     16,   0,   9,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     26,   0,  16,   0,   9,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  35,   0, 
-      0,  10, 130,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  35,   0,   0,  10, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      9,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-    167,   0,   0, 139,   2,  67, 
-      0, 128, 131, 153,  25,   0, 
-     50,   0,  16,   0,   9,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     70, 224,  17,   0,   0,   0, 
-      0,   0,  54,   0,   0,   5, 
-    194,   0,  16,   0,   9,   0, 
-      0,   0,   6,   4,  16,   0, 
-      9,   0,   0,   0, 201,   0, 
-      0,   5, 130,   0,  16,   0, 
-      7,   0,   0,   0, 230,  14, 
-     16,   0,   9,   0,   0,   0, 
-      1,   0,   0,   7,  34,   0, 
-     16,   0,   5,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  29,   0, 
-      0,   7,  18,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  28,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  79,   0, 
-      0,   8, 130,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      1,   0,   0,   7,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,   1,   0, 
-      0,   7,  18,   0,  16,   0, 
-      1,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,   7, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     28,   0,   0,   5, 146,   0, 
-     16,   0,   7,   0,   0,   0, 
-      6,   4,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  18,   0,  16,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0, 146,   0,  16,   0, 
-      7,   0,   0,   0,   6,  12, 
-     16,   0,   7,   0,   0,   0, 
-      6, 132,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-    167,   0,   0, 139,   2,  67, 
-      0, 128, 131, 153,  25,   0, 
-    146,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 228,  17,   0,   0,   0, 
-      0,   0,  54,   0,   0,   5, 
-     50,   0,  16,   0,   9,   0, 
-      0,   0, 198,   0,  16,   0, 
-      7,   0,   0,   0, 201,   0, 
-      0,   5,  34,   0,  16,   0, 
-      2,   0,   0,   0,  70,   4, 
-     16,   0,   9,   0,   0,   0, 
-      1,   0,   0,   7,  34,   0, 
-     16,   0,   4,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5, 146,   0,  16,   0, 
-      7,   0,   0,   0,   6,   4, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-    146,   0,  16,   0,   7,   0, 
-      0,   0,   6,  12,  16,   0, 
-      7,   0,   0,   0,   6, 132, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7,  18,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  35,   0,   0,  10, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0, 167,   0, 
-      0, 139,   2,  67,   0, 128, 
-    131, 153,  25,   0, 146,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,   6, 228, 
-     17,   0,   0,   0,   0,   0, 
-     54,   0,   0,   5,  50,   0, 
-     16,   0,   9,   0,   0,   0, 
-    198,   0,  16,   0,   7,   0, 
-      0,   0, 201,   0,   0,   5, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  70,   4,  16,   0, 
-      9,   0,   0,   0,   1,   0, 
-      0,   7,  34,   0,  16,   0, 
-      6,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  18,   0,  16,   0, 
-      1,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   2,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  35,   0, 
-      0,  10,  18,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  67,   0, 128, 131, 153, 
-     25,   0,  50,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  70, 224,  17,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5,  50,   0,  16,   0, 
-      9,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-    201,   0,   0,   5,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     70,   4,  16,   0,   9,   0, 
-      0,   0,   1,   0,   0,   7, 
-     34,   0,  16,   0,   8,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     18,   0,   0,   1,  32,   0, 
-      0,   8,  18,   0,  16,   0, 
-      1,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-     31,   0,   4,   3,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     30,   0,   0,  11,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    182, 143,  32,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      2,  64,   0,   0, 255, 255, 
-    255, 255, 255, 255, 255, 255, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,  86,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      7,   0,   0,   0,  52,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  51,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  52,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5,  18,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-    166,  14,  16,   0,   7,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  67,   0, 128, 131, 153, 
-     25,   0,  50,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  70, 224,  17,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5, 194,   0,  16,   0, 
-      7,   0,   0,   0,   6,   4, 
-     16,   0,   7,   0,   0,   0, 
-    201,   0,   0,   5,  34,   0, 
-     16,   0,   5,   0,   0,   0, 
-    230,  14,  16,   0,   7,   0, 
-      0,   0,  18,   0,   0,   1, 
-     32,   0,   0,   8,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,  64,   0,   0,   2,   0, 
-      0,   0,  31,   0,   4,   3, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  55,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10, 128,  32,   0, 
-      0,   0,   0,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0, 128,   1,  64, 
-      0,   0,   0,   0,   0,  63, 
-      0,   0,   0,   7,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    230,  10,  16,   0,   0,   0, 
-      0,   0,  86,   5,  16,   0, 
-      2,   0,   0,   0,  49,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      2,   0,   0,   0,  14,   0, 
-      0,   7, 130,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  86,   0, 
-      0,   5,  18,   0,  16,   0, 
-      9,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     50,   0,   0,  10,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,   1,   0, 
-      0,   7, 130,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   7, 
-     18,   0,  16,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-      0,   0,   0,   8,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,  55,   0,   0,   9, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  49,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-      0,   0,   0,   8,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   0,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  14,   0,   0,   7, 
-     34,   0,  16,   0,   9,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   9,   0,   0,   0, 
-     26,   0,  16,   0,   9,   0, 
-      0,   0,  86,   0,   0,   5, 
-     66,   0,  16,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      9,   0,   0,   0,  50,   0, 
-      0,  10,  18,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      9,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,   1,   0,   0,   7, 
-     18,   0,  16,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      9,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8,  34,   0, 
-     16,   0,   9,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,   0,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   9,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  49,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16, 128, 
-     65,   0,   0,   0,   3,   0, 
-      0,   0,  14,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-     18,   0,  16,   0,   9,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  50,   0, 
-      0,  10,  66,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16, 128,  65,   0,   0,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,   0,   0, 
-      0,   8,  66,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     49,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      9,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     14,   0,   0,   7,  34,   0, 
-     16,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   9,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   9,   0,   0,   0, 
-     26,   0,  16,   0,   9,   0, 
-      0,   0,  50,   0,   0,  10, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16, 128, 
-     65,   0,   0,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-      1,   0,   0,   7,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     26,   0,  16,   0,   9,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  34,   0,  16,   0, 
-      9,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     26,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-      0,   0,   0,   7,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      9,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  55,   0, 
-      0,   9,  18,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5,  18,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-    166,  14,  16,   0,   7,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  67,   0, 128, 131, 153, 
-     25,   0,  50,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  70, 224,  17,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5, 194,   0,  16,   0, 
-      7,   0,   0,   0,   6,   4, 
-     16,   0,   7,   0,   0,   0, 
-    201,   0,   0,   5,  34,   0, 
-     16,   0,   5,   0,   0,   0, 
-    230,  14,  16,   0,   7,   0, 
-      0,   0,  18,   0,   0,   1, 
-     54,   0,   0,   5,  34,   0, 
-     16,   0,   5,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  21,   0,   0,   1, 
-     21,   0,   0,   1,  31,   0, 
-      4,   3,  10,   0,  16,   0, 
-      1,   0,   0,   0,  30,   0, 
-      0,  11,  50,   0,  16,   0, 
-      7,   0,   0,   0, 182, 143, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0, 255, 255, 255, 255, 
-    255, 255, 255, 255,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     86,   0,   0,   5,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  52,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     52,   0,   0,   7,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  51,   0, 
-      0,   7,  18,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   0,   0,   5, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5, 194,   0,  16,   0, 
-      7,   0,   0,   0, 166,  14, 
-     16,   0,   7,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-    167,   0,   0, 139,   2,  67, 
-      0, 128, 131, 153,  25,   0, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     70, 224,  17,   0,   0,   0, 
-      0,   0,  54,   0,   0,   5, 
-    194,   0,  16,   0,   7,   0, 
-      0,   0,   6,   4,  16,   0, 
-      7,   0,   0,   0, 201,   0, 
-      0,   5,  34,   0,  16,   0, 
-      4,   0,   0,   0, 230,  14, 
-     16,   0,   7,   0,   0,   0, 
-     18,   0,   0,   1,  32,   0, 
-      0,   8,  34,   0,  16,   0, 
-      2,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   2,   0,   0,   0, 
-     31,   0,   4,   3,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     55,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10, 128,  32,   0,   0,   0, 
-      0,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0, 128,   1,  64,   0,   0, 
-      0,   0,   0,  63,   0,   0, 
-      0,   7,  50,   0,  16,   0, 
-      7,   0,   0,   0, 230,  10, 
-     16,   0,   0,   0,   0,   0, 
-     86,   5,  16,   0,   2,   0, 
-      0,   0,  49,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   1,   0, 
-      0,   0,  14,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-     18,   0,  16,   0,   9,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  50,   0, 
-      0,  10,  66,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16, 128,  65,   0,   0,   0, 
-      9,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,   0,   0, 
-      0,   8,  66,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     49,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      9,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     14,   0,   0,   7,  34,   0, 
-     16,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   9,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   9,   0,   0,   0, 
-     26,   0,  16,   0,   9,   0, 
-      0,   0,  50,   0,   0,  10, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16, 128, 
-     65,   0,   0,   0,   9,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-      1,   0,   0,   7,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     26,   0,  16,   0,   9,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  34,   0,  16,   0, 
-      9,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-      0,   0,   0,   7,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  18,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      9,   0,   0,   0,  55,   0, 
-      0,   9,  18,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     49,   0,   0,   7,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   8,  66,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   3,   0,   0,   0, 
-     14,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5, 130,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   0,   0,   5,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  50,   0,   0,  10, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   7,  18,   0,  16,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,   0,   0,   0,   8, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  49,   0, 
-      0,   7, 130,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,   0,   0,   0,   8, 
-     18,   0,  16,   0,   9,   0, 
-      0,   0,  58,   0,  16, 128, 
-     65,   0,   0,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  14,   0, 
-      0,   7,  34,   0,  16,   0, 
-      9,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-     34,   0,  16,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      9,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   9,   0,   0,   0, 
-     50,   0,   0,  10,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   9,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,   1,   0, 
-      0,   7,  18,   0,  16,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   9,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   8, 
-     34,   0,  16,   0,   9,   0, 
-      0,   0,  58,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,   0,   0, 
-      0,   7,  34,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  55,   0,   0,   9, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   9,   0, 
-      0,   0,  55,   0,   0,   9, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  55,   0,   0,   9, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   0,   0,   5, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5, 194,   0,  16,   0, 
-      7,   0,   0,   0, 166,  14, 
-     16,   0,   7,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-    167,   0,   0, 139,   2,  67, 
-      0, 128, 131, 153,  25,   0, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     70, 224,  17,   0,   0,   0, 
-      0,   0,  54,   0,   0,   5, 
-    194,   0,  16,   0,   7,   0, 
-      0,   0,   6,   4,  16,   0, 
-      7,   0,   0,   0, 201,   0, 
-      0,   5,  34,   0,  16,   0, 
-      4,   0,   0,   0, 230,  14, 
-     16,   0,   7,   0,   0,   0, 
-     18,   0,   0,   1,  54,   0, 
-      0,   5,  34,   0,  16,   0, 
-      4,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     21,   0,   0,   1,  21,   0, 
-      0,   1,  31,   0,   4,   3, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  30,   0,   0,  11, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0, 182, 143,  32,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   0,   2,  64,   0,   0, 
-    255, 255, 255, 255, 255, 255, 
-    255, 255,   0,   0,   0,   0, 
-      0,   0,   0,   0,  86,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     52,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  51,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  52,   0, 
-      0,   7,  18,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  51,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  86,   0, 
-      0,   5, 130,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-    194,   0,  16,   0,   7,   0, 
-      0,   0, 166,  14,  16,   0, 
-      7,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      7,   0,   0,   0,  70, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0, 167,   0, 
-      0, 139,   2,  67,   0, 128, 
-    131, 153,  25,   0,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  70, 224, 
-     17,   0,   0,   0,   0,   0, 
-     54,   0,   0,   5, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-      6,   4,  16,   0,   7,   0, 
-      0,   0, 201,   0,   0,   5, 
-     34,   0,  16,   0,   6,   0, 
-      0,   0, 230,  14,  16,   0, 
-      7,   0,   0,   0,  18,   0, 
-      0,   1,  32,   0,   0,   8, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   1,  64,   0,   0, 
-      2,   0,   0,   0,  31,   0, 
-      4,   3,  26,   0,  16,   0, 
-      2,   0,   0,   0,  55,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  10, 128, 
-     32,   0,   0,   0,   0,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0, 128, 
-      1,  64,   0,   0,   0,   0, 
-      0,  63,   0,   0,   0,   7, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0, 230,  10,  16,   0, 
-      0,   0,   0,   0,  86,   5, 
-     16,   0,   2,   0,   0,   0, 
-     49,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   8,  66,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     58,   0,  16, 128,  65,   0, 
-      0,   0,   2,   0,   0,   0, 
-     14,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5, 130,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   0,   0,   5,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  50,   0,   0,  10, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   9,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   7,  18,   0,  16,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,   0,   0,   0,   8, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  49,   0, 
-      0,   7, 130,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,   0,   0,   0,   8, 
-     18,   0,  16,   0,   9,   0, 
-      0,   0,  42,   0,  16, 128, 
-     65,   0,   0,   0,   0,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,  14,   0, 
-      0,   7,  34,   0,  16,   0, 
-      9,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-     34,   0,  16,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      9,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   9,   0,   0,   0, 
-     50,   0,   0,  10,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,   1,   0, 
-      0,   7,  18,   0,  16,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   9,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   8, 
-     34,   0,  16,   0,   9,   0, 
-      0,   0,  42,   0,  16,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,   0,   0, 
-      0,   7,  18,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  55,   0,   0,   9, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   9,   0, 
-      0,   0,  55,   0,   0,   9, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,  55,   0,   0,   9, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  49,   0, 
-      0,   7,  18,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      3,   0,   0,   0,  14,   0, 
-      0,   7, 130,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  86,   0, 
-      0,   5,  18,   0,  16,   0, 
-      9,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     50,   0,   0,  10,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   9,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,   1,   0, 
-      0,   7, 130,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   7, 
-     18,   0,  16,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-      0,   0,   0,   8,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,  55,   0,   0,   9, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  49,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-      0,   0,   0,   8,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     58,   0,  16, 128,  65,   0, 
-      0,   0,   0,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  14,   0,   0,   7, 
-     34,   0,  16,   0,   9,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   9,   0,   0,   0, 
-     26,   0,  16,   0,   9,   0, 
-      0,   0,  86,   0,   0,   5, 
-     66,   0,  16,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      9,   0,   0,   0,  50,   0, 
-      0,  10,  34,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,   1,   0,   0,   7, 
-     18,   0,  16,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      9,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8,  34,   0, 
-     16,   0,   9,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,   0,   0,   0,   7, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   9,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  86,   0, 
-      0,   5, 130,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-    194,   0,  16,   0,   7,   0, 
-      0,   0, 166,  14,  16,   0, 
-      7,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      7,   0,   0,   0,  70, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0, 167,   0, 
-      0, 139,   2,  67,   0, 128, 
-    131, 153,  25,   0,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  70, 224, 
-     17,   0,   0,   0,   0,   0, 
-     54,   0,   0,   5, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-      6,   4,  16,   0,   7,   0, 
-      0,   0, 201,   0,   0,   5, 
-     34,   0,  16,   0,   6,   0, 
-      0,   0, 230,  14,  16,   0, 
-      7,   0,   0,   0,  18,   0, 
-      0,   1,  54,   0,   0,   5, 
-     34,   0,  16,   0,   6,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  21,   0, 
-      0,   1,  21,   0,   0,   1, 
-     31,   0,   4,   3,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     30,   0,   0,  11,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    182, 143,  32,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      2,  64,   0,   0, 255, 255, 
-    255, 255, 255, 255, 255, 255, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,  86,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      7,   0,   0,   0,  52,   0, 
-      0,   7,  18,   0,  16,   0, 
-      1,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  51,   0,   0,   7, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  52,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-    166,  14,  16,   0,   7,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  35,   0, 
-      0,  10,  18,   0,  16,   0, 
-      1,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  67,   0, 128, 131, 153, 
-     25,   0,  50,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  70, 224,  17,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5, 194,   0,  16,   0, 
-      7,   0,   0,   0,   6,   4, 
-     16,   0,   7,   0,   0,   0, 
-    201,   0,   0,   5,  34,   0, 
-     16,   0,   8,   0,   0,   0, 
-    230,  14,  16,   0,   7,   0, 
-      0,   0,  18,   0,   0,   1, 
-     32,   0,   0,   8,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,  64,   0,   0,   2,   0, 
-      0,   0,  31,   0,   4,   3, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  55,   0,   0,  10, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  10, 128,  32,   0, 
-      0,   0,   0,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0, 128,   1,  64, 
-      0,   0,   0,   0,   0,  63, 
-      0,   0,   0,   7,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    230,  10,  16,   0,   0,   0, 
-      0,   0,   6,   0,  16,   0, 
-      1,   0,   0,   0,  49,   0, 
-      0,   7,  18,   0,  16,   0, 
-      1,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16, 128,  65,   0,   0,   0, 
-      2,   0,   0,   0,  14,   0, 
-      0,   7,  66,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  86,   0, 
-      0,   5, 130,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     50,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      0,   0,   0,   8,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   2,   0, 
-      0,   0,  55,   0,   0,   9, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  49,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-      0,   0,   0,   8, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   0,   0,   0,   0, 
-     10,   0,  16,   0,   2,   0, 
-      0,   0,  14,   0,   0,   7, 
-     18,   0,  16,   0,   9,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  86,   0,   0,   5, 
-     34,   0,  16,   0,   9,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  50,   0, 
-      0,  10,  18,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16, 128,  65,   0,   0,   0, 
-      9,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,   0,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  49,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16, 128, 
-     65,   0,   0,   0,   3,   0, 
-      0,   0,  14,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  50,   0, 
-      0,  10,  18,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     49,   0,   0,   7,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   0,   0, 
-      0,   8, 130,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     14,   0,   0,   7,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5,  18,   0,  16,   0, 
-      9,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     86,   0,   0,   5,  34,   0, 
-     16,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  50,   0,   0,  10, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   9,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      9,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     26,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-      0,   0,   0,   7,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5, 194,   0, 
-     16,   0,   7,   0,   0,   0, 
-    166,  14,  16,   0,   7,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  70, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  35,   0, 
-      0,  10,  18,   0,  16,   0, 
-      1,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  67,   0, 128, 131, 153, 
-     25,   0,  50,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  70, 224,  17,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5, 194,   0,  16,   0, 
-      7,   0,   0,   0,   6,   4, 
-     16,   0,   7,   0,   0,   0, 
-    201,   0,   0,   5,  34,   0, 
-     16,   0,   8,   0,   0,   0, 
-    230,  14,  16,   0,   7,   0, 
-      0,   0,  18,   0,   0,   1, 
-     54,   0,   0,   5,  34,   0, 
-     16,   0,   8,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  21,   0,   0,   1, 
-     21,   0,   0,   1,  21,   0, 
-      0,   1,  31,   0,   0,   4, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     29,   0,   0,  10, 114,   0, 
-     16,   0,   7,   0,   0,   0, 
-    230,   8,  16,   0,   2,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,  28,   0, 
-      0,   5, 114,   0,  16,   0, 
-      9,   0,   0,   0, 230,   8, 
-     16,   0,   2,   0,   0,   0, 
-     79,   0,   0,   8, 114,   0, 
-     16,   0,   9,   0,   0,   0, 
-     70,   2,  16,   0,   9,   0, 
-      0,   0, 246, 143,  32,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   0,   1,   0,   0,   7, 
-    114,   0,  16,   0,   7,   0, 
-      0,   0,  70,   2,  16,   0, 
-      7,   0,   0,   0,  70,   2, 
-     16,   0,   9,   0,   0,   0, 
-     29,   0,   0,   7,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   7, 114,   0,  16,   0, 
-      7,   0,   0,   0,   6,   0, 
-     16,   0,   1,   0,   0,   0, 
-     70,   2,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  79,   0, 
-      0,   8,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   1,   0,   0,   0, 
-      1,   0,   0,   7, 114,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   5,  16,   0,   2,   0, 
-      0,   0,  70,   2,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      9,   0,   0,   0,  70,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-     50,   0,  16,   0,   9,   0, 
-      0,   0,  70,   0,  16,   0, 
-      9,   0,   0,   0,  70, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7,  18,   0,  16,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  35,   0,   0,  10, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     35,   0,   0,  10,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0, 167,   0, 
-      0, 139,   2,  67,   0, 128, 
-    131, 153,  25,   0,  50,   0, 
-     16,   0,   9,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  70, 224, 
-     17,   0,   0,   0,   0,   0, 
-     54,   0,   0,   5, 194,   0, 
-     16,   0,   9,   0,   0,   0, 
-      6,   4,  16,   0,   9,   0, 
-      0,   0, 201,   0,   0,   5, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0, 230,  14,  16,   0, 
-      9,   0,   0,   0,   1,   0, 
-      0,   7, 130,   0,  16,   0, 
-      5,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  29,   0,   0,   7, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  79,   0,   0,   8, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   1,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,   7, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,   0,   0,   7,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  28,   0, 
-      0,   5, 146,   0,  16,   0, 
-      7,   0,   0,   0,   6,   4, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  38,   0, 
-      0,   9,   0, 208,   0,   0, 
-    146,   0,  16,   0,   7,   0, 
-      0,   0,   6,  12,  16,   0, 
-      7,   0,   0,   0,   6, 132, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  30,   0, 
-      0,   7,  18,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  42, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0, 167,   0, 
-      0, 139,   2,  67,   0, 128, 
-    131, 153,  25,   0, 146,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,   6, 228, 
-     17,   0,   0,   0,   0,   0, 
-     54,   0,   0,   5,  50,   0, 
-     16,   0,   9,   0,   0,   0, 
-    198,   0,  16,   0,   7,   0, 
-      0,   0, 201,   0,   0,   5, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  70,   4,  16,   0, 
-      9,   0,   0,   0,   1,   0, 
-      0,   7, 130,   0,  16,   0, 
-      4,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-    146,   0,  16,   0,   7,   0, 
-      0,   0,   6,   4,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  18,   0,  16,   0, 
-      1,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0, 146,   0, 
-     16,   0,   7,   0,   0,   0, 
-      6,  12,  16,   0,   7,   0, 
-      0,   0,   6, 132,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     35,   0,   0,  10,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  35,   0, 
-      0,  10,  18,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  67,   0, 128, 131, 153, 
-     25,   0, 146,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 228,  17,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5,  50,   0,  16,   0, 
-      9,   0,   0,   0, 198,   0, 
-     16,   0,   7,   0,   0,   0, 
-    201,   0,   0,   5,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     70,   4,  16,   0,   9,   0, 
-      0,   0,   1,   0,   0,   7, 
-    130,   0,  16,   0,   6,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  35,   0, 
-      0,  10,  18,   0,  16,   0, 
-      1,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  35,   0,   0,  10, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-    167,   0,   0, 139,   2,  67, 
-      0, 128, 131, 153,  25,   0, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     70, 224,  17,   0,   0,   0, 
-      0,   0,  54,   0,   0,   5, 
-     50,   0,  16,   0,   9,   0, 
-      0,   0,  70,   0,  16,   0, 
-      7,   0,   0,   0, 201,   0, 
-      0,   5,  18,   0,  16,   0, 
-      1,   0,   0,   0,  70,   4, 
-     16,   0,   9,   0,   0,   0, 
-      1,   0,   0,   7, 130,   0, 
-     16,   0,   8,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  18,   0, 
-      0,   1,  32,   0,   0,   8, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  58, 128,  32,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,  31,   0, 
-      4,   3,  10,   0,  16,   0, 
-      1,   0,   0,   0,  30,   0, 
-      0,  11,  50,   0,  16,   0, 
-      7,   0,   0,   0, 182, 143, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0, 255, 255, 255, 255, 
-    255, 255, 255, 255,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-     86,   0,   0,   5,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70,   0,  16,   0,   7,   0, 
-      0,   0,  52,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     52,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  51,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  28,   0,   0,   5, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     86,   0,   0,   5, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5, 194,   0,  16,   0, 
-      7,   0,   0,   0, 166,  14, 
-     16,   0,   7,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-    167,   0,   0, 139,   2,  67, 
-      0, 128, 131, 153,  25,   0, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     70, 224,  17,   0,   0,   0, 
-      0,   0,  54,   0,   0,   5, 
-    194,   0,  16,   0,   7,   0, 
-      0,   0,   6,   4,  16,   0, 
-      7,   0,   0,   0, 201,   0, 
-      0,   5, 130,   0,  16,   0, 
-      5,   0,   0,   0, 230,  14, 
-     16,   0,   7,   0,   0,   0, 
-     18,   0,   0,   1,  32,   0, 
-      0,   8,  34,   0,  16,   0, 
-      2,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   2,   0,   0,   0, 
-     31,   0,   4,   3,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     55,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10, 128,  32,   0,   0,   0, 
-      0,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0, 128,   1,  64,   0,   0, 
-      0,   0,   0,  63,   0,   0, 
-      0,   7,  50,   0,  16,   0, 
-      7,   0,   0,   0, 230,  10, 
-     16,   0,   0,   0,   0,   0, 
-     86,   5,  16,   0,   2,   0, 
-      0,   0,  49,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16, 128, 
-     65,   0,   0,   0,   2,   0, 
-      0,   0,  14,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  50,   0, 
-      0,  10,  66,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   0,   0, 
-      0,   8,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   3,   0,   0,   0, 
-     55,   0,   0,   9,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     49,   0,   0,   7,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,   0,   0, 
-      0,   8, 130,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     14,   0,   0,   7,  18,   0, 
-     16,   0,   9,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  28,   0, 
-      0,   5,  18,   0,  16,   0, 
-      9,   0,   0,   0,  10,   0, 
-     16,   0,   9,   0,   0,   0, 
-     86,   0,   0,   5,  34,   0, 
-     16,   0,   9,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,  50,   0,   0,  10, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   9,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,   0,   0,   7, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   9,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  18,   0,  16,   0, 
-      9,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-      0,   0,   0,   7,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  18,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      9,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  49,   0, 
-      0,   7,  66,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      3,   0,   0,   0,  14,   0, 
-      0,   7,  18,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  28,   0,   0,   5, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     50,   0,   0,  10,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   1,   0, 
-      0,   7,  18,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-      0,   0,   0,   8,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16, 128, 
-     65,   0,   0,   0,   3,   0, 
-      0,   0,  55,   0,   0,   9, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  49,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-      0,   0,   0,   8,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16, 128,  65,   0, 
-      0,   0,   0,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  14,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-     18,   0,  16,   0,   9,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  50,   0, 
-      0,  10,  34,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16, 128,  65,   0,   0,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,   0,   0,   0,   7, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     55,   0,   0,   9,  66,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     28,   0,   0,   5,  98,   0, 
-     16,   0,   2,   0,   0,   0, 
-     86,   6,  16,   0,   2,   0, 
-      0,   0,  86,   0,   0,   5, 
-    194,   0,  16,   0,   7,   0, 
-      0,   0, 166,   6,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  98,   0,  16,   0, 
-      2,   0,   0,   0,   6,   1, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    230,  10,  16,   0,   7,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  98,   0, 
-     16,   0,   2,   0,   0,   0, 
-     86,   6,  16,   0,   2,   0, 
-      0,   0,   6, 129,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  67,   0, 128, 131, 153, 
-     25,   0,  98,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 225,  17,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0, 150,   5, 
-     16,   0,   2,   0,   0,   0, 
-    201,   0,   0,   5, 130,   0, 
-     16,   0,   5,   0,   0,   0, 
-     70,   4,  16,   0,   7,   0, 
-      0,   0,  18,   0,   0,   1, 
-     54,   0,   0,   5, 130,   0, 
-     16,   0,   5,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  21,   0,   0,   1, 
-     21,   0,   0,   1,  31,   0, 
-      4,   3,  10,   0,  16,   0, 
-      1,   0,   0,   0,  30,   0, 
-      0,  11,  98,   0,  16,   0, 
-      2,   0,   0,   0, 246, 142, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-    255, 255, 255, 255, 255, 255, 
-    255, 255,   0,   0,   0,   0, 
-     86,   0,   0,   5,  98,   0, 
-     16,   0,   2,   0,   0,   0, 
-     86,   6,  16,   0,   2,   0, 
-      0,   0,  52,   0,   0,   7, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  52,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  51,   0,   0,   7, 
-     66,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  98,   0, 
-     16,   0,   2,   0,   0,   0, 
-     86,   6,  16,   0,   2,   0, 
-      0,   0,  86,   0,   0,   5, 
-    194,   0,  16,   0,   7,   0, 
-      0,   0, 166,   6,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  98,   0,  16,   0, 
-      2,   0,   0,   0,   6,   1, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    230,  10,  16,   0,   7,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  98,   0, 
-     16,   0,   2,   0,   0,   0, 
-     86,   6,  16,   0,   2,   0, 
-      0,   0,   6, 129,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  67,   0, 128, 131, 153, 
-     25,   0,  98,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 225,  17,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0, 150,   5, 
-     16,   0,   2,   0,   0,   0, 
-    201,   0,   0,   5, 130,   0, 
-     16,   0,   4,   0,   0,   0, 
-     70,   4,  16,   0,   7,   0, 
-      0,   0,  18,   0,   0,   1, 
-     32,   0,   0,   8,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   0,   0,   0,   0, 
-      1,  64,   0,   0,   2,   0, 
-      0,   0,  31,   0,   4,   3, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  55,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10, 128,  32,   0, 
-      0,   0,   0,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0, 128,   1,  64, 
-      0,   0,   0,   0,   0,  63, 
-      0,   0,   0,   7,  98,   0, 
-     16,   0,   2,   0,   0,   0, 
-    166,  11,  16,   0,   0,   0, 
-      0,   0,  86,   5,  16,   0, 
-      2,   0,   0,   0,  49,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16, 128,  65,   0,   0,   0, 
-      1,   0,   0,   0,  14,   0, 
-      0,   7,  34,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     50,   0,   0,  10,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,   1,   0, 
-      0,   7,  34,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-      0,   0,   0,   8,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,  55,   0,   0,   9, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  49,   0,   0,   7, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   0,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  14,   0,   0,   7, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     28,   0,   0,   5, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-     18,   0,  16,   0,   9,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,  50,   0, 
-      0,  10,  34,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16, 128,  65,   0,   0,   0, 
-      9,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   2,   0, 
-      0,   0,   0,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   7,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     28,   0,   0,   5,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  49,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  58,   0,  16, 128, 
-     65,   0,   0,   0,   3,   0, 
-      0,   0,  14,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  50,   0, 
-      0,  10,  66,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,   1,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   0,   0, 
-      0,   8,  66,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   3,   0,   0,   0, 
-     55,   0,   0,   9,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     49,   0,   0,   7,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,   0,   0, 
-      0,   8,  34,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     14,   0,   0,   7,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  66,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   0,   0,   5, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  50,   0,   0,  10, 
-     66,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,   0,   0,   7,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  66,   0,  16,   0, 
-      7,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   2,   0,   0,   0, 
-      0,   0,   0,   7,  66,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     86,   0,   0,   5,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  86,   0,   0,   5, 
-    130,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  28,   0, 
-      0,   5,  98,   0,  16,   0, 
-      2,   0,   0,   0,   6,   1, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   7,   0,   0,   0, 
-    230,  10,  16,   0,   7,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  98,   0, 
-     16,   0,   2,   0,   0,   0, 
-     86,   6,  16,   0,   2,   0, 
-      0,   0,   6, 129,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     34,   0,  16,   0,   1,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  67,   0, 128, 131, 153, 
-     25,   0,  98,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 225,  17,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0, 150,   5, 
-     16,   0,   2,   0,   0,   0, 
-    201,   0,   0,   5, 130,   0, 
-     16,   0,   4,   0,   0,   0, 
-     70,   4,  16,   0,   7,   0, 
-      0,   0,  18,   0,   0,   1, 
-     54,   0,   0,   5, 130,   0, 
-     16,   0,   4,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  21,   0,   0,   1, 
-     21,   0,   0,   1,  31,   0, 
-      4,   3,  10,   0,  16,   0, 
-      1,   0,   0,   0,  30,   0, 
-      0,  11,  98,   0,  16,   0, 
-      2,   0,   0,   0, 246, 142, 
-     32,   0,   0,   0,   0,   0, 
-      1,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,   0,   0, 
-    255, 255, 255, 255, 255, 255, 
-    255, 255,   0,   0,   0,   0, 
-     86,   0,   0,   5,  98,   0, 
-     16,   0,   2,   0,   0,   0, 
-     86,   6,  16,   0,   2,   0, 
-      0,   0,  52,   0,   0,   7, 
-     34,   0,  16,   0,   1,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     52,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  51,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     86,   0,   0,   5, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  28,   0,   0,   5, 
-     98,   0,  16,   0,   2,   0, 
-      0,   0,   6,   1,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      7,   0,   0,   0, 230,  10, 
-     16,   0,   7,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  98,   0,  16,   0, 
-      2,   0,   0,   0,  86,   6, 
-     16,   0,   2,   0,   0,   0, 
-      6, 129,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      1,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-    167,   0,   0, 139,   2,  67, 
-      0, 128, 131, 153,  25,   0, 
-     98,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 225,  17,   0,   0,   0, 
-      0,   0,  54,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0, 150,   5,  16,   0, 
-      2,   0,   0,   0, 201,   0, 
-      0,   5, 130,   0,  16,   0, 
-      6,   0,   0,   0,  70,   4, 
-     16,   0,   7,   0,   0,   0, 
-     18,   0,   0,   1,  32,   0, 
-      0,   8,  34,   0,  16,   0, 
-      1,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   2,   0,   0,   0, 
-     31,   0,   4,   3,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     55,   0,   0,  10,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10, 128,  32,   0,   0,   0, 
-      0,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0, 128,   1,  64,   0,   0, 
-      0,   0,   0,  63,   0,   0, 
-      0,   7,  98,   0,  16,   0, 
-      2,   0,   0,   0, 166,  11, 
-     16,   0,   0,   0,   0,   0, 
-     86,   5,  16,   0,   1,   0, 
-      0,   0,  49,   0,   0,   7, 
-     34,   0,  16,   0,   1,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  58,   0,  16, 128, 
-     65,   0,   0,   0,   2,   0, 
-      0,   0,  14,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     28,   0,   0,   5,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  86,   0,   0,   5, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  50,   0, 
-      0,  10,  66,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16, 128,  65,   0,   0,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,   1,   0,   0,   7, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,   0,   0, 
-      0,   8,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   3,   0,   0,   0, 
-     55,   0,   0,   9,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     49,   0,   0,   7,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,   0,   0, 
-      0,   8,  34,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16,   0,   2,   0,   0,   0, 
-     14,   0,   0,   7,  66,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  66,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   0,   0,   5, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,  50,   0,   0,  10, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,   0,   0,   7,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  66,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     26,   0,  16, 128,  65,   0, 
-      0,   0,   2,   0,   0,   0, 
-      0,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      7,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,  55,   0, 
-      0,   9,  34,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  34,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     49,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,   0,   0, 
-      0,   8, 130,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     58,   0,  16, 128,  65,   0, 
-      0,   0,   3,   0,   0,   0, 
-     14,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     86,   0,   0,   5,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  50,   0,   0,  10, 
-    130,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   7,  18,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,   0,   0,   0,   8, 
-    130,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      2,   0,   0,   0,  55,   0, 
-      0,   9, 130,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  49,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,   0,   0,   0,   8, 
-     18,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16, 128, 
-     65,   0,   0,   0,   0,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,  14,   0, 
-      0,   7,  34,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      7,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-     50,   0,   0,  10,  66,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,   1,   0, 
-      0,   7,  18,   0,  16,   0, 
-      7,   0,   0,   0,  26,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   8, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  58,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      2,   0,   0,   0,   0,   0, 
-      0,   7,  66,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  55,   0,   0,   9, 
-     66,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  55,   0,   0,   9, 
-     66,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  55,   0,   0,   9, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  58,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  28,   0,   0,   5, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     86,   0,   0,   5, 130,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  98,   0,  16,   0, 
-      2,   0,   0,   0, 166,  11, 
-     16,   0,   2,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      7,   0,   0,   0,  70,   0, 
-     16,   0,   7,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      7,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  35,   0,   0,  10, 
-     34,   0,  16,   0,   1,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-    167,   0,   0, 139,   2,  67, 
-      0, 128, 131, 153,  25,   0, 
-     98,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-      6, 225,  17,   0,   0,   0, 
-      0,   0,  54,   0,   0,   5, 
-     50,   0,  16,   0,   7,   0, 
-      0,   0, 150,   5,  16,   0, 
-      2,   0,   0,   0, 201,   0, 
-      0,   5, 130,   0,  16,   0, 
-      6,   0,   0,   0,  70,   4, 
-     16,   0,   7,   0,   0,   0, 
-     18,   0,   0,   1,  54,   0, 
-      0,   5, 130,   0,  16,   0, 
-      6,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     21,   0,   0,   1,  21,   0, 
-      0,   1,  31,   0,   4,   3, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  30,   0,   0,  11, 
-     50,   0,  16,   0,   1,   0, 
-      0,   0, 182, 143,  32,   0, 
-      0,   0,   0,   0,   1,   0, 
-      0,   0,   2,  64,   0,   0, 
-    255, 255, 255, 255, 255, 255, 
-    255, 255,   0,   0,   0,   0, 
-      0,   0,   0,   0,  86,   0, 
-      0,   5,  50,   0,  16,   0, 
-      1,   0,   0,   0,  70,   0, 
-     16,   0,   1,   0,   0,   0, 
-     52,   0,   0,   7,  34,   0, 
-     16,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   2,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0,  51,   0, 
-      0,   7,  18,   0,  16,   0, 
-      1,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  52,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      3,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     51,   0,   0,   7,  34,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  28,   0, 
-      0,   5,  50,   0,  16,   0, 
-      1,   0,   0,   0,  70,   0, 
-     16,   0,   1,   0,   0,   0, 
-     86,   0,   0,   5, 194,   0, 
-     16,   0,   2,   0,   0,   0, 
-     86,   1,  16,   0,   1,   0, 
-      0,   0,  28,   0,   0,   5, 
-     50,   0,  16,   0,   1,   0, 
-      0,   0,  70,   0,  16,   0, 
-      3,   0,   0,   0,  28,   0, 
-      0,   5,  98,   0,  16,   0, 
-      2,   0,   0,   0, 166,  11, 
-     16,   0,   2,   0,   0,   0, 
-     38,   0,   0,   9,   0, 208, 
-      0,   0,  50,   0,  16,   0, 
-      1,   0,   0,   0,  70,   0, 
-     16,   0,   1,   0,   0,   0, 
-     70, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     30,   0,   0,   7,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  35,   0, 
-      0,  10,  18,   0,  16,   0, 
-      1,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  35,   0,   0,  10, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-    167,   0,   0, 139,   2,  67, 
-      0, 128, 131, 153,  25,   0, 
-     50,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,   1,  64, 
-      0,   0,   0,   0,   0,   0, 
-     70, 224,  17,   0,   0,   0, 
-      0,   0,  54,   0,   0,   5, 
-    194,   0,  16,   0,   2,   0, 
-      0,   0,   6,   4,  16,   0, 
-      1,   0,   0,   0, 201,   0, 
-      0,   5, 130,   0,  16,   0, 
-      8,   0,   0,   0, 230,  14, 
-     16,   0,   2,   0,   0,   0, 
-     18,   0,   0,   1,  32,   0, 
-      0,   8,  18,   0,  16,   0, 
-      1,   0,   0,   0,  58, 128, 
-     32,   0,   0,   0,   0,   0, 
-      0,   0,   0,   0,   1,  64, 
-      0,   0,   2,   0,   0,   0, 
-     31,   0,   4,   3,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     55,   0,   0,  10,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     10, 128,  32,   0,   0,   0, 
-      0,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0, 128,   1,  64,   0,   0, 
-      0,   0,   0,  63,   0,   0, 
-      0,   7,  50,   0,  16,   0, 
-      1,   0,   0,   0, 230,  10, 
-     16,   0,   0,   0,   0,   0, 
-      6,   0,  16,   0,   1,   0, 
-      0,   0,  49,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-      0,   0,   0,   8,  66,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   2,   0, 
-      0,   0,  14,   0,   0,   7, 
-    130,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     28,   0,   0,   5, 130,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,  86,   0,   0,   5, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,  50,   0, 
-      0,  10,  66,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      3,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,   1,   0,   0,   7, 
-    130,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,   0,   0, 
-      0,   8,  66,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   2,   0,   0,   0, 
-     55,   0,   0,   9,  66,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     49,   0,   0,   7, 130,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,   0,   0, 
-      0,   8,  66,   0,  16,   0, 
-      3,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-     14,   0,   0,   7,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-     42,   0,  16,   0,   3,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  28,   0, 
-      0,   5,  18,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   7,   0,   0,   0, 
-     86,   0,   0,   5,  34,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,  50,   0,   0,  10, 
-     18,   0,  16,   0,   1,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   7,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-      1,   0,   0,   7,  66,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   7,   0, 
-      0,   0,   1,  64,   0,   0, 
-      1,   0,   0,   0,   0,   0, 
-      0,   8,  66,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16, 128,  65,   0, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   7,  18,   0, 
-     16,   0,   1,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      1,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      0,   0,   0,   0,  42,   0, 
-     16,   0,   3,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  42,   0,  16,   0, 
-      0,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,  55,   0, 
-      0,   9,  66,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      0,   0,   0,   0,  49,   0, 
-      0,   7,  18,   0,  16,   0, 
-      1,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0,   0,   0,   0,   8, 
-     18,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16, 128,  65,   0,   0,   0, 
-      3,   0,   0,   0,  14,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  28,   0,   0,   5, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  86,   0, 
-      0,   5,  66,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-     50,   0,   0,  10,  18,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16, 128,  65,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,   1,   0, 
-      0,   7,  34,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   2,   0,   0,   0, 
-      1,  64,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   7, 
-     66,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-      0,   0,   0,   8,  18,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  10,   0,  16, 128, 
-     65,   0,   0,   0,   2,   0, 
-      0,   0,  55,   0,   0,   9, 
-     18,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      2,   0,   0,   0,  10,   0, 
-     16,   0,   2,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  49,   0,   0,   7, 
-     34,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-      0,   0,   0,   8,  66,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16, 128,  65,   0, 
-      0,   0,   0,   0,   0,   0, 
-     58,   0,  16,   0,   3,   0, 
-      0,   0,  14,   0,   0,   7, 
-    130,   0,  16,   0,   2,   0, 
-      0,   0,  42,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     28,   0,   0,   5, 130,   0, 
-     16,   0,   2,   0,   0,   0, 
-     58,   0,  16,   0,   2,   0, 
-      0,   0,  86,   0,   0,   5, 
-     66,   0,  16,   0,   3,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,  50,   0, 
-      0,  10,  34,   0,  16,   0, 
-      1,   0,   0,   0,  42,   0, 
-     16, 128,  65,   0,   0,   0, 
-      3,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,   1,   0,   0,   7, 
-     66,   0,  16,   0,   2,   0, 
-      0,   0,  58,   0,  16,   0, 
-      2,   0,   0,   0,   1,  64, 
-      0,   0,   1,   0,   0,   0, 
-      0,   0,   0,   8, 130,   0, 
-     16,   0,   0,   0,   0,   0, 
-     58,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16, 128, 
-     65,   0,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   7, 
-     34,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   0,   0,   0,   0, 
-     42,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16,   0,   0,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   0,   0,   0,   0, 
-     26,   0,  16,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  58,   0, 
-     16,   0,   3,   0,   0,   0, 
-     55,   0,   0,   9,  34,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  10,   0,  16,   0, 
-      2,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     28,   0,   0,   5,  98,   0, 
-     16,   0,   0,   0,   0,   0, 
-     86,   6,  16,   0,   0,   0, 
-      0,   0,  86,   0,   0,   5, 
-    194,   0,  16,   0,   2,   0, 
-      0,   0,  86,   9,  16,   0, 
-      0,   0,   0,   0,  28,   0, 
-      0,   5,  98,   0,  16,   0, 
-      0,   0,   0,   0,   6,   1, 
-     16,   0,   3,   0,   0,   0, 
-     28,   0,   0,   5,  50,   0, 
-     16,   0,   1,   0,   0,   0, 
-    230,  10,  16,   0,   2,   0, 
-      0,   0,  38,   0,   0,   9, 
-      0, 208,   0,   0,  98,   0, 
-     16,   0,   0,   0,   0,   0, 
-     86,   6,  16,   0,   0,   0, 
-      0,   0,   6, 129,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  30,   0,   0,   7, 
-     34,   0,  16,   0,   0,   0, 
-      0,   0,  42,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-     35,   0,   0,  10,  34,   0, 
-     16,   0,   0,   0,   0,   0, 
-     10,   0,  16,   0,   1,   0, 
-      0,   0,  42, 128,  32,   0, 
-      0,   0,   0,   0,   2,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  35,   0, 
-      0,  10,  34,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   1,   0,   0,   0, 
-     58, 128,  32,   0,   0,   0, 
-      0,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   0,   0, 
-      0,   0, 167,   0,   0, 139, 
-      2,  67,   0, 128, 131, 153, 
-     25,   0,  98,   0,  16,   0, 
-      0,   0,   0,   0,  26,   0, 
-     16,   0,   0,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,   6, 225,  17,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5,  50,   0,  16,   0, 
-      1,   0,   0,   0, 150,   5, 
-     16,   0,   0,   0,   0,   0, 
-    201,   0,   0,   5, 130,   0, 
-     16,   0,   8,   0,   0,   0, 
-     70,   4,  16,   0,   1,   0, 
-      0,   0,  18,   0,   0,   1, 
-     54,   0,   0,   5, 130,   0, 
-     16,   0,   8,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-      0,   0,  21,   0,   0,   1, 
-     21,   0,   0,   1,  21,   0, 
-      0,   1,  26,   0,   0,   5, 
-     98,   0,  16,   0,   0,   0, 
-      0,   0, 246,  14,  16,   0, 
-      1,   0,   0,   0,  56,   0, 
-      0,   7,  50,   0,  16,   0, 
-      1,   0,   0,   0, 150,   5, 
-     16,   0,   0,   0,   0,   0, 
-    150,   5,  16,   0,   0,   0, 
-      0,   0,  56,   0,   0,   7, 
-    146,   0,  16,   0,   2,   0, 
-      0,   0,  86,   9,  16,   0, 
-      0,   0,   0,   0,   6,   4, 
-     16,   0,   1,   0,   0,   0, 
-     15,   0,   0,  10,  34,   0, 
-     16,   0,   3,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-     64, 191,   0,   0,  64,  63, 
-      0,   0,   0,   0,   0,   0, 
-      0,   0,  70,   0,  16,   0, 
-      5,   0,   0,   0,  17,   0, 
-      0,  10,  66,   0,  16,   0, 
-      3,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0, 192,  63, 
-      0,   0, 192,  63,   0,   0, 
-     16, 192,   0,   0,  64, 191, 
-     70,  14,  16,   0,   5,   0, 
-      0,   0,  17,   0,   0,  10, 
-    130,   0,  16,   0,   3,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,  64, 191,   0,   0, 
-    160, 191,   0,   0, 160,  63, 
-      0,   0,  64,  63,  70,  14, 
-     16,   0,   5,   0,   0,   0, 
-     54,   0,   0,   5,  18,   0, 
-     16,   0,   7,   0,   0,   0, 
-      1,  64,   0,   0,   0,   0, 
-    128,  63,  54,   0,   0,   5, 
-     34,   0,  16,   0,   7,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5,  66,   0,  16,   0, 
-      7,   0,   0,   0,  10,   0, 
-     16,   0,   1,   0,   0,   0, 
-     54,   0,   0,   5, 130,   0, 
-     16,   0,   7,   0,   0,   0, 
-     10,   0,  16,   0,   2,   0, 
-      0,   0,  54,   0,   0,   5, 
-     18,   0,  16,   0,   3,   0, 
-      0,   0,  42,   0,  16,   0, 
-      5,   0,   0,   0,  17,   0, 
-      0,   7,  18,   0,  16,   0, 
-      3,   0,   0,   0,  70,  14, 
-     16,   0,   7,   0,   0,   0, 
-     70,  14,  16,   0,   3,   0, 
-      0,   0,  15,   0,   0,  10, 
-     34,   0,  16,   0,   5,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,  64, 191,   0,   0, 
-     64,  63,   0,   0,   0,   0, 
-      0,   0,   0,   0,  70,   0, 
-     16,   0,   4,   0,   0,   0, 
-     17,   0,   0,  10,  66,   0, 
-     16,   0,   5,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-    192,  63,   0,   0, 192,  63, 
-      0,   0,  16, 192,   0,   0, 
-     64, 191,  70,  14,  16,   0, 
-      4,   0,   0,   0,  17,   0, 
-      0,  10, 130,   0,  16,   0, 
-      5,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,  64, 191, 
-      0,   0, 160, 191,   0,   0, 
-    160,  63,   0,   0,  64,  63, 
-     70,  14,  16,   0,   4,   0, 
-      0,   0,  54,   0,   0,   5, 
-     18,   0,  16,   0,   5,   0, 
-      0,   0,  42,   0,  16,   0, 
-      4,   0,   0,   0,  17,   0, 
-      0,   7,  34,   0,  16,   0, 
-      3,   0,   0,   0,  70,  14, 
-     16,   0,   7,   0,   0,   0, 
-     70,  14,  16,   0,   5,   0, 
-      0,   0,  15,   0,   0,  10, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,  64, 191,   0,   0, 
-     64,  63,   0,   0,   0,   0, 
-      0,   0,   0,   0,  70,   0, 
-     16,   0,   6,   0,   0,   0, 
-     17,   0,   0,  10,  66,   0, 
-     16,   0,   4,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-    192,  63,   0,   0, 192,  63, 
-      0,   0,  16, 192,   0,   0, 
-     64, 191,  70,  14,  16,   0, 
-      6,   0,   0,   0,  17,   0, 
-      0,  10, 130,   0,  16,   0, 
-      4,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,  64, 191, 
-      0,   0, 160, 191,   0,   0, 
-    160,  63,   0,   0,  64,  63, 
-     70,  14,  16,   0,   6,   0, 
-      0,   0,  54,   0,   0,   5, 
-     18,   0,  16,   0,   4,   0, 
-      0,   0,  42,   0,  16,   0, 
-      6,   0,   0,   0,  17,   0, 
-      0,   7,  66,   0,  16,   0, 
-      3,   0,   0,   0,  70,  14, 
-     16,   0,   7,   0,   0,   0, 
-     70,  14,  16,   0,   4,   0, 
-      0,   0,  15,   0,   0,  10, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,  64, 191,   0,   0, 
-     64,  63,   0,   0,   0,   0, 
-      0,   0,   0,   0,  70,   0, 
-     16,   0,   8,   0,   0,   0, 
-     17,   0,   0,  10,  66,   0, 
-     16,   0,   4,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-    192,  63,   0,   0, 192,  63, 
-      0,   0,  16, 192,   0,   0, 
-     64, 191,  70,  14,  16,   0, 
-      8,   0,   0,   0,  17,   0, 
-      0,  10, 130,   0,  16,   0, 
-      4,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,  64, 191, 
-      0,   0, 160, 191,   0,   0, 
-    160,  63,   0,   0,  64,  63, 
-     70,  14,  16,   0,   8,   0, 
-      0,   0,  54,   0,   0,   5, 
-     18,   0,  16,   0,   4,   0, 
-      0,   0,  42,   0,  16,   0, 
-      8,   0,   0,   0,  17,   0, 
-      0,   7, 130,   0,  16,   0, 
-      3,   0,   0,   0,  70,  14, 
-     16,   0,   7,   0,   0,   0, 
-     70,  14,  16,   0,   4,   0, 
-      0,   0,  15,   0,   0,  10, 
-     34,   0,  16,   0,   4,   0, 
-      0,   0,   2,  64,   0,   0, 
-      0,   0,  64, 191,   0,   0, 
-     64,  63,   0,   0,   0,   0, 
-      0,   0,   0,   0, 134,   0, 
-     16,   0,   3,   0,   0,   0, 
-     17,   0,   0,  10,  66,   0, 
-     16,   0,   4,   0,   0,   0, 
-      2,  64,   0,   0,   0,   0, 
-    192,  63,   0,   0,  16, 192, 
-      0,   0, 192,  63,   0,   0, 
-     64, 191,  70,  14,  16,   0, 
-      3,   0,   0,   0,  17,   0, 
-      0,  10, 130,   0,  16,   0, 
-      4,   0,   0,   0,   2,  64, 
-      0,   0,   0,   0,  64, 191, 
-      0,   0, 160,  63,   0,   0, 
-    160, 191,   0,   0,  64,  63, 
-     70,  14,  16,   0,   3,   0, 
-      0,   0,  54,   0,   0,   5, 
-     18,   0,  16,   0,   2,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0, 128,  63,  54,   0, 
-      0,   5,  34,   0,  16,   0, 
-      2,   0,   0,   0,  42,   0, 
-     16,   0,   0,   0,   0,   0, 
-     54,   0,   0,   5,  66,   0, 
-     16,   0,   2,   0,   0,   0, 
-     26,   0,  16,   0,   1,   0, 
-      0,   0,  54,   0,   0,   5, 
-     18,   0,  16,   0,   4,   0, 
-      0,   0,  26,   0,  16,   0, 
-      3,   0,   0,   0,  17,   0, 
-      0,   7,  34,   0,  16,   0, 
-      0,   0,   0,   0,  70,  14, 
-     16,   0,   2,   0,   0,   0, 
-     70,  14,  16,   0,   4,   0, 
-      0,   0, 202,   0,   0,   5, 
-    194,   0,  16,   0,   0,   0, 
-      0,   0,  26,   0,  16,   0, 
-      0,   0,   0,   0,  54,   0, 
-      0,   5,  98,   0,  16,   0, 
-      0,   0,   0,   0, 166,  11, 
-     16,   0,   0,   0,   0,   0, 
-    168,   0,   0,   9,  50, 224, 
-     17,   0,   2,   0,   0,   0, 
-     10,   0,  16,   0,   0,   0, 
-      0,   0,   1,  64,   0,   0, 
-      0,   0,   0,   0, 150,   5, 
-     16,   0,   0,   0,   0,   0, 
-     21,   0,   0,   1,  21,   0, 
-      0,   1,  21,   0,   0,   1, 
-     21,   0,   0,   1,  62,   0, 
-      0,   1,  83,  70,  73,  48, 
-      8,   0,   0,   0,   1,   0, 
-      0,   0,   0,   0,   0,   0
-};
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_double_fp16.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_double_fp16.h
deleted file mode 100644
index be1f6418dd73a..0000000000000
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_double_fp16.h
+++ /dev/null
@@ -1,6540 +0,0 @@
-#if 0
-;
-; Note: shader requires additional functionality:
-;       Double-precision floating point
-;       Use native low precision
-;
-;
-; Input signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no parameters
-;
-; Output signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no parameters
-; shader hash: c884868e50a93c2389e03b595bb88939
-;
-; Pipeline Runtime Information: 
-;
-;
-;
-; Buffer Definitions:
-;
-; cbuffer 
-; {
-;
-;   [116 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [8 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [2 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [8 x i8] (type annotation not present)
-;
-; }
-;
-;
-; Resource Bindings:
-;
-; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-;                                   cbuffer      NA          NA     CB0            cb0     1
-;                                       UAV  struct         r/w      U0             u0     1
-;                                       UAV  struct         r/w      U1             u1     1
-;                                       UAV  struct         r/w      U2             u2     1
-;
-target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
-target triple = "dxil-ms-dx"
-
-%dx.types.Handle = type { i8* }
-%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
-%dx.types.ResRet.f16 = type { half, half, half, half, i32 }
-%dx.types.ResRet.i32 = type { i32, i32, i32, i32, i32 }
-%dx.types.splitdouble = type { i32, i32 }
-%"class.RWStructuredBuffer<double>" = type { double }
-%"class.RWStructuredBuffer<half>" = type { half }
-%Constants = type { i32, i32, i32, i32, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32 }
-
-define void @GridSample() {
-  %1 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 2, i32 2, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %2 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 1, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %3 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %4 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %5 = call i32 @dx.op.threadId.i32(i32 93, i32 0)  ; ThreadId(component)
-  %6 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
-  %7 = extractvalue %dx.types.CBufRet.i32 %6, 0
-  %8 = add i32 %7, %5
-  %9 = extractvalue %dx.types.CBufRet.i32 %6, 1
-  %10 = icmp ult i32 %8, %9
-  br i1 %10, label %11, label %3454
-
-; <label>:11                                      ; preds = %0
-  %12 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
-  %13 = extractvalue %dx.types.CBufRet.i32 %12, 3
-  %14 = uitofp i32 %13 to float
-  %15 = extractvalue %dx.types.CBufRet.i32 %12, 2
-  %16 = uitofp i32 %15 to float
-  %17 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 7)  ; CBufferLoadLegacy(handle,regIndex)
-  %18 = extractvalue %dx.types.CBufRet.i32 %17, 0
-  %19 = icmp eq i32 %18, 0
-  %20 = select i1 %19, float -5.000000e-01, float 0.000000e+00
-  %21 = select i1 %19, float -5.000000e-01, float -1.000000e+00
-  %22 = fadd float %14, %21
-  %23 = select i1 %19, float -5.000000e-01, float -1.000000e+00
-  %24 = fadd float %16, %23
-  %25 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
-  %26 = extractvalue %dx.types.CBufRet.i32 %25, 1
-  %27 = extractvalue %dx.types.CBufRet.i32 %25, 2
-  %28 = extractvalue %dx.types.CBufRet.i32 %25, 3
-  %29 = mul i32 %28, %27
-  %30 = mul i32 %27, %26
-  %31 = mul i32 %30, %28
-  %32 = udiv i32 %8, %31
-  %33 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 6)  ; CBufferLoadLegacy(handle,regIndex)
-  %34 = extractvalue %dx.types.CBufRet.i32 %33, 0
-  %35 = mul i32 %34, %32
-  %36 = sub i32 %8, %35
-  %37 = udiv i32 %36, %29
-  %38 = extractvalue %dx.types.CBufRet.i32 %33, 1
-  %39 = mul i32 %38, %37
-  %40 = sub i32 %36, %39
-  %41 = udiv i32 %40, %28
-  %42 = extractvalue %dx.types.CBufRet.i32 %33, 2
-  %43 = mul i32 %42, %41
-  %44 = sub i32 %40, %43
-  %45 = uitofp i32 %32 to float
-  %46 = uitofp i32 %41 to float
-  %47 = uitofp i32 %44 to float
-  %48 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
-  %49 = extractvalue %dx.types.CBufRet.i32 %48, 0
-  %50 = extractvalue %dx.types.CBufRet.i32 %48, 1
-  %51 = extractvalue %dx.types.CBufRet.i32 %48, 2
-  %52 = extractvalue %dx.types.CBufRet.i32 %48, 3
-  %53 = uitofp i32 %49 to float
-  %54 = uitofp i32 %50 to float
-  %55 = uitofp i32 %51 to float
-  %56 = uitofp i32 %52 to float
-  %57 = call float @dx.op.dot4.f32(i32 56, float %45, float %46, float %47, float 0.000000e+00, float %53, float %54, float %55, float %56)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %58 = fadd fast float %56, %57
-  %59 = fptoui float %57 to i32
-  %60 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %2, i32 %59, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %61 = extractvalue %dx.types.ResRet.f16 %60, 0
-  %62 = fpext half %61 to float
-  %63 = fptoui float %58 to i32
-  %64 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %2, i32 %63, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %65 = extractvalue %dx.types.ResRet.f16 %64, 0
-  %66 = fpext half %65 to float
-  %67 = icmp eq i32 %18, 1
-  %68 = fadd fast float %62, 1.000000e+00
-  %69 = fadd fast float %66, 1.000000e+00
-  br i1 %67, label %70, label %77
-
-; <label>:70                                      ; preds = %11
-  %71 = fmul fast float %68, 5.000000e-01
-  %72 = fmul fast float %69, 5.000000e-01
-  %73 = fadd fast float %14, -1.000000e+00
-  %74 = fadd fast float %16, -1.000000e+00
-  %75 = fmul fast float %71, %73
-  %76 = fmul fast float %72, %74
-  br label %84
-
-; <label>:77                                      ; preds = %11
-  %78 = fmul fast float %14, %68
-  %79 = fmul fast float %69, %16
-  %80 = fadd fast float %78, -1.000000e+00
-  %81 = fadd fast float %79, -1.000000e+00
-  %82 = fmul fast float %80, 5.000000e-01
-  %83 = fmul fast float %81, 5.000000e-01
-  br label %84
-
-; <label>:84                                      ; preds = %77, %70
-  %85 = phi float [ %75, %70 ], [ %82, %77 ]
-  %86 = phi float [ %76, %70 ], [ %83, %77 ]
-  %87 = extractvalue %dx.types.CBufRet.i32 %6, 2
-  %88 = icmp eq i32 %87, 1
-  br i1 %88, label %89, label %92
-
-; <label>:89                                      ; preds = %84
-  %90 = call float @dx.op.unary.f32(i32 26, float %85)  ; Round_ne(value)
-  %91 = call float @dx.op.unary.f32(i32 26, float %86)  ; Round_ne(value)
-  br label %92
-
-; <label>:92                                      ; preds = %89, %84
-  %93 = phi float [ %90, %89 ], [ %85, %84 ]
-  %94 = phi float [ %91, %89 ], [ %86, %84 ]
-  %95 = fcmp fast olt float %93, %20
-  %96 = fcmp fast ogt float %93, %22
-  %97 = or i1 %95, %96
-  %98 = fcmp fast olt float %94, %20
-  %99 = or i1 %97, %98
-  %100 = fcmp fast ogt float %94, %24
-  %101 = or i1 %100, %99
-  br i1 %101, label %102, label %175
-
-; <label>:102                                     ; preds = %92
-  %103 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %104 = icmp eq i32 %103, 1
-  br i1 %104, label %105, label %114
-
-; <label>:105                                     ; preds = %102
-  %106 = add i32 %13, -1
-  %107 = uitofp i32 %106 to float
-  %108 = call float @dx.op.binary.f32(i32 35, float %93, float 0.000000e+00)  ; FMax(a,b)
-  %109 = call float @dx.op.binary.f32(i32 36, float %108, float %107)  ; FMin(a,b)
-  %110 = add i32 %15, -1
-  %111 = uitofp i32 %110 to float
-  %112 = call float @dx.op.binary.f32(i32 35, float %94, float 0.000000e+00)  ; FMax(a,b)
-  %113 = call float @dx.op.binary.f32(i32 36, float %112, float %111)  ; FMin(a,b)
-  br label %175
-
-; <label>:114                                     ; preds = %102
-  %115 = icmp eq i32 %103, 2
-  br i1 %115, label %116, label %175
-
-; <label>:116                                     ; preds = %114
-  %117 = fsub fast float %22, %20
-  br i1 %95, label %118, label %131
-
-; <label>:118                                     ; preds = %116
-  %119 = fsub fast float %20, %93
-  %120 = fdiv fast float %119, %117
-  %121 = fptoui float %120 to i32
-  %122 = uitofp i32 %121 to float
-  %123 = fmul fast float %122, %117
-  %124 = fsub fast float %119, %123
-  %125 = and i32 %121, 1
-  %126 = icmp eq i32 %125, 0
-  br i1 %126, label %127, label %129
-
-; <label>:127                                     ; preds = %118
-  %128 = fadd fast float %124, %20
-  br label %145
-
-; <label>:129                                     ; preds = %118
-  %130 = fsub fast float %22, %124
-  br label %145
-
-; <label>:131                                     ; preds = %116
-  br i1 %96, label %132, label %145
-
-; <label>:132                                     ; preds = %131
-  %133 = fsub fast float %93, %22
-  %134 = fdiv fast float %133, %117
-  %135 = fptoui float %134 to i32
-  %136 = uitofp i32 %135 to float
-  %137 = fmul fast float %136, %117
-  %138 = fsub fast float %133, %137
-  %139 = and i32 %135, 1
-  %140 = icmp eq i32 %139, 0
-  br i1 %140, label %141, label %143
-
-; <label>:141                                     ; preds = %132
-  %142 = fsub fast float %22, %138
-  br label %145
-
-; <label>:143                                     ; preds = %132
-  %144 = fadd fast float %138, %20
-  br label %145
-
-; <label>:145                                     ; preds = %143, %141, %131, %129, %127
-  %146 = phi float [ %128, %127 ], [ %130, %129 ], [ %142, %141 ], [ %144, %143 ], [ %93, %131 ]
-  %147 = fsub fast float %24, %20
-  br i1 %98, label %148, label %161
-
-; <label>:148                                     ; preds = %145
-  %149 = fsub fast float %20, %94
-  %150 = fdiv fast float %149, %147
-  %151 = fptoui float %150 to i32
-  %152 = uitofp i32 %151 to float
-  %153 = fmul fast float %152, %147
-  %154 = fsub fast float %149, %153
-  %155 = and i32 %151, 1
-  %156 = icmp eq i32 %155, 0
-  br i1 %156, label %157, label %159
-
-; <label>:157                                     ; preds = %148
-  %158 = fadd fast float %154, %20
-  br label %175
-
-; <label>:159                                     ; preds = %148
-  %160 = fsub fast float %24, %154
-  br label %175
-
-; <label>:161                                     ; preds = %145
-  br i1 %100, label %162, label %175
-
-; <label>:162                                     ; preds = %161
-  %163 = fsub fast float %94, %24
-  %164 = fdiv fast float %163, %147
-  %165 = fptoui float %164 to i32
-  %166 = uitofp i32 %165 to float
-  %167 = fmul fast float %166, %147
-  %168 = fsub fast float %163, %167
-  %169 = and i32 %165, 1
-  %170 = icmp eq i32 %169, 0
-  br i1 %170, label %171, label %173
-
-; <label>:171                                     ; preds = %162
-  %172 = fsub fast float %24, %168
-  br label %175
-
-; <label>:173                                     ; preds = %162
-  %174 = fadd fast float %168, %20
-  br label %175
-
-; <label>:175                                     ; preds = %173, %171, %161, %159, %157, %114, %105, %92
-  %176 = phi float [ %109, %105 ], [ %93, %114 ], [ %93, %92 ], [ %146, %173 ], [ %146, %171 ], [ %146, %161 ], [ %146, %159 ], [ %146, %157 ]
-  %177 = phi float [ %113, %105 ], [ %94, %114 ], [ %94, %92 ], [ %174, %173 ], [ %172, %171 ], [ %94, %161 ], [ %160, %159 ], [ %158, %157 ]
-  %178 = uitofp i32 %37 to float
-  br i1 %88, label %179, label %337
-
-; <label>:179                                     ; preds = %175
-  %180 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %181 = icmp eq i32 %180, 0
-  br i1 %181, label %182, label %209
-
-; <label>:182                                     ; preds = %179
-  %183 = fcmp fast oge float %176, 0.000000e+00
-  %184 = fptoui float %176 to i32
-  %185 = icmp ult i32 %184, %13
-  %186 = and i1 %183, %185
-  %187 = fcmp fast oge float %177, 0.000000e+00
-  %188 = and i1 %187, %186
-  %189 = fptoui float %177 to i32
-  %190 = icmp ult i32 %189, %15
-  %191 = and i1 %190, %188
-  br i1 %191, label %192, label %331
-
-; <label>:192                                     ; preds = %182
-  %193 = fptoui float %45 to i32
-  %194 = fptoui float %178 to i32
-  %195 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %196 = extractvalue %dx.types.CBufRet.i32 %195, 0
-  %197 = extractvalue %dx.types.CBufRet.i32 %195, 1
-  %198 = extractvalue %dx.types.CBufRet.i32 %195, 2
-  %199 = extractvalue %dx.types.CBufRet.i32 %195, 3
-  %200 = mul i32 %196, %193
-  %201 = call i32 @dx.op.tertiary.i32(i32 48, i32 %194, i32 %197, i32 %200)  ; IMad(a,b,c)
-  %202 = call i32 @dx.op.tertiary.i32(i32 48, i32 %189, i32 %198, i32 %201)  ; IMad(a,b,c)
-  %203 = call i32 @dx.op.tertiary.i32(i32 48, i32 %184, i32 %199, i32 %202)  ; IMad(a,b,c)
-  %204 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %203, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %205 = extractvalue %dx.types.ResRet.i32 %204, 0
-  %206 = extractvalue %dx.types.ResRet.i32 %204, 1
-  %207 = call double @dx.op.makeDouble.f64(i32 101, i32 %205, i32 %206)  ; MakeDouble(lo,hi)
-  %208 = fptrunc double %207 to float
-  br label %331
-
-; <label>:209                                     ; preds = %179
-  %210 = icmp eq i32 %180, 1
-  br i1 %210, label %211, label %242
-
-; <label>:211                                     ; preds = %209
-  %212 = add i32 %13, -1
-  %213 = uitofp i32 %212 to float
-  %214 = call float @dx.op.binary.f32(i32 35, float %176, float 0.000000e+00)  ; FMax(a,b)
-  %215 = call float @dx.op.binary.f32(i32 36, float %214, float %213)  ; FMin(a,b)
-  %216 = fptoui float %215 to i32
-  %217 = add i32 %15, -1
-  %218 = uitofp i32 %217 to float
-  %219 = call float @dx.op.binary.f32(i32 35, float %177, float 0.000000e+00)  ; FMax(a,b)
-  %220 = call float @dx.op.binary.f32(i32 36, float %219, float %218)  ; FMin(a,b)
-  %221 = fptoui float %220 to i32
-  %222 = uitofp i32 %221 to float
-  %223 = uitofp i32 %216 to float
-  %224 = fptoui float %45 to i32
-  %225 = fptoui float %178 to i32
-  %226 = fptoui float %222 to i32
-  %227 = fptoui float %223 to i32
-  %228 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %229 = extractvalue %dx.types.CBufRet.i32 %228, 0
-  %230 = extractvalue %dx.types.CBufRet.i32 %228, 1
-  %231 = extractvalue %dx.types.CBufRet.i32 %228, 2
-  %232 = extractvalue %dx.types.CBufRet.i32 %228, 3
-  %233 = mul i32 %229, %224
-  %234 = call i32 @dx.op.tertiary.i32(i32 48, i32 %225, i32 %230, i32 %233)  ; IMad(a,b,c)
-  %235 = call i32 @dx.op.tertiary.i32(i32 48, i32 %226, i32 %231, i32 %234)  ; IMad(a,b,c)
-  %236 = call i32 @dx.op.tertiary.i32(i32 48, i32 %227, i32 %232, i32 %235)  ; IMad(a,b,c)
-  %237 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %236, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %238 = extractvalue %dx.types.ResRet.i32 %237, 0
-  %239 = extractvalue %dx.types.ResRet.i32 %237, 1
-  %240 = call double @dx.op.makeDouble.f64(i32 101, i32 %238, i32 %239)  ; MakeDouble(lo,hi)
-  %241 = fptrunc double %240 to float
-  br label %331
-
-; <label>:242                                     ; preds = %209
-  %243 = icmp eq i32 %180, 2
-  br i1 %243, label %244, label %331
-
-; <label>:244                                     ; preds = %242
-  %245 = fsub fast float %22, %20
-  %246 = fcmp fast olt float %176, %20
-  br i1 %246, label %247, label %260
-
-; <label>:247                                     ; preds = %244
-  %248 = fsub fast float %20, %176
-  %249 = fdiv fast float %248, %245
-  %250 = fptoui float %249 to i32
-  %251 = uitofp i32 %250 to float
-  %252 = fmul fast float %251, %245
-  %253 = fsub fast float %248, %252
-  %254 = and i32 %250, 1
-  %255 = icmp eq i32 %254, 0
-  br i1 %255, label %256, label %258
-
-; <label>:256                                     ; preds = %247
-  %257 = fadd fast float %253, %20
-  br label %275
-
-; <label>:258                                     ; preds = %247
-  %259 = fsub fast float %22, %253
-  br label %275
-
-; <label>:260                                     ; preds = %244
-  %261 = fcmp fast ogt float %176, %22
-  br i1 %261, label %262, label %275
-
-; <label>:262                                     ; preds = %260
-  %263 = fsub fast float %176, %22
-  %264 = fdiv fast float %263, %245
-  %265 = fptoui float %264 to i32
-  %266 = uitofp i32 %265 to float
-  %267 = fmul fast float %266, %245
-  %268 = fsub fast float %263, %267
-  %269 = and i32 %265, 1
-  %270 = icmp eq i32 %269, 0
-  br i1 %270, label %271, label %273
-
-; <label>:271                                     ; preds = %262
-  %272 = fsub fast float %22, %268
-  br label %275
-
-; <label>:273                                     ; preds = %262
-  %274 = fadd fast float %268, %20
-  br label %275
-
-; <label>:275                                     ; preds = %273, %271, %260, %258, %256
-  %276 = phi float [ %257, %256 ], [ %259, %258 ], [ %272, %271 ], [ %274, %273 ], [ %176, %260 ]
-  %277 = fptoui float %276 to i32
-  %278 = fsub fast float %24, %20
-  %279 = fcmp fast olt float %177, %20
-  br i1 %279, label %280, label %293
-
-; <label>:280                                     ; preds = %275
-  %281 = fsub fast float %20, %177
-  %282 = fdiv fast float %281, %278
-  %283 = fptoui float %282 to i32
-  %284 = uitofp i32 %283 to float
-  %285 = fmul fast float %284, %278
-  %286 = fsub fast float %281, %285
-  %287 = and i32 %283, 1
-  %288 = icmp eq i32 %287, 0
-  br i1 %288, label %289, label %291
-
-; <label>:289                                     ; preds = %280
-  %290 = fadd fast float %286, %20
-  br label %308
-
-; <label>:291                                     ; preds = %280
-  %292 = fsub fast float %24, %286
-  br label %308
-
-; <label>:293                                     ; preds = %275
-  %294 = fcmp fast ogt float %177, %24
-  br i1 %294, label %295, label %308
-
-; <label>:295                                     ; preds = %293
-  %296 = fsub fast float %177, %24
-  %297 = fdiv fast float %296, %278
-  %298 = fptoui float %297 to i32
-  %299 = uitofp i32 %298 to float
-  %300 = fmul fast float %299, %278
-  %301 = fsub fast float %296, %300
-  %302 = and i32 %298, 1
-  %303 = icmp eq i32 %302, 0
-  br i1 %303, label %304, label %306
-
-; <label>:304                                     ; preds = %295
-  %305 = fsub fast float %24, %301
-  br label %308
-
-; <label>:306                                     ; preds = %295
-  %307 = fadd fast float %301, %20
-  br label %308
-
-; <label>:308                                     ; preds = %306, %304, %293, %291, %289
-  %309 = phi float [ %290, %289 ], [ %292, %291 ], [ %305, %304 ], [ %307, %306 ], [ %177, %293 ]
-  %310 = fptoui float %309 to i32
-  %311 = uitofp i32 %310 to float
-  %312 = uitofp i32 %277 to float
-  %313 = fptoui float %45 to i32
-  %314 = fptoui float %178 to i32
-  %315 = fptoui float %311 to i32
-  %316 = fptoui float %312 to i32
-  %317 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %318 = extractvalue %dx.types.CBufRet.i32 %317, 0
-  %319 = extractvalue %dx.types.CBufRet.i32 %317, 1
-  %320 = extractvalue %dx.types.CBufRet.i32 %317, 2
-  %321 = extractvalue %dx.types.CBufRet.i32 %317, 3
-  %322 = mul i32 %318, %313
-  %323 = call i32 @dx.op.tertiary.i32(i32 48, i32 %314, i32 %319, i32 %322)  ; IMad(a,b,c)
-  %324 = call i32 @dx.op.tertiary.i32(i32 48, i32 %315, i32 %320, i32 %323)  ; IMad(a,b,c)
-  %325 = call i32 @dx.op.tertiary.i32(i32 48, i32 %316, i32 %321, i32 %324)  ; IMad(a,b,c)
-  %326 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %325, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %327 = extractvalue %dx.types.ResRet.i32 %326, 0
-  %328 = extractvalue %dx.types.ResRet.i32 %326, 1
-  %329 = call double @dx.op.makeDouble.f64(i32 101, i32 %327, i32 %328)  ; MakeDouble(lo,hi)
-  %330 = fptrunc double %329 to float
-  br label %331
-
-; <label>:331                                     ; preds = %308, %242, %211, %192, %182
-  %332 = phi float [ %208, %192 ], [ 0.000000e+00, %182 ], [ %241, %211 ], [ %330, %308 ], [ 0.000000e+00, %242 ]
-  %333 = fpext float %332 to double
-  %334 = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double %333)  ; SplitDouble(value)
-  %335 = extractvalue %dx.types.splitdouble %334, 0
-  %336 = extractvalue %dx.types.splitdouble %334, 1
-  call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i32 %335, i32 %336, i32 undef, i32 undef, i8 3, i32 8)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3454
-
-; <label>:337                                     ; preds = %175
-  %338 = icmp eq i32 %87, 0
-  br i1 %338, label %339, label %965
-
-; <label>:339                                     ; preds = %337
-  %340 = call float @dx.op.unary.f32(i32 27, float %176)  ; Round_ni(value)
-  %341 = call float @dx.op.unary.f32(i32 27, float %177)  ; Round_ni(value)
-  %342 = fadd fast float %340, 1.000000e+00
-  %343 = fadd fast float %341, 1.000000e+00
-  %344 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %345 = icmp eq i32 %344, 0
-  br i1 %345, label %346, label %373
-
-; <label>:346                                     ; preds = %339
-  %347 = fcmp fast oge float %340, 0.000000e+00
-  %348 = fptoui float %340 to i32
-  %349 = icmp ult i32 %348, %13
-  %350 = and i1 %347, %349
-  %351 = fcmp fast oge float %341, 0.000000e+00
-  %352 = and i1 %351, %350
-  %353 = fptoui float %341 to i32
-  %354 = icmp ult i32 %353, %15
-  %355 = and i1 %354, %352
-  br i1 %355, label %356, label %495
-
-; <label>:356                                     ; preds = %346
-  %357 = fptoui float %45 to i32
-  %358 = fptoui float %178 to i32
-  %359 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %360 = extractvalue %dx.types.CBufRet.i32 %359, 0
-  %361 = extractvalue %dx.types.CBufRet.i32 %359, 1
-  %362 = extractvalue %dx.types.CBufRet.i32 %359, 2
-  %363 = extractvalue %dx.types.CBufRet.i32 %359, 3
-  %364 = mul i32 %360, %357
-  %365 = call i32 @dx.op.tertiary.i32(i32 48, i32 %358, i32 %361, i32 %364)  ; IMad(a,b,c)
-  %366 = call i32 @dx.op.tertiary.i32(i32 48, i32 %353, i32 %362, i32 %365)  ; IMad(a,b,c)
-  %367 = call i32 @dx.op.tertiary.i32(i32 48, i32 %348, i32 %363, i32 %366)  ; IMad(a,b,c)
-  %368 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %367, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %369 = extractvalue %dx.types.ResRet.i32 %368, 0
-  %370 = extractvalue %dx.types.ResRet.i32 %368, 1
-  %371 = call double @dx.op.makeDouble.f64(i32 101, i32 %369, i32 %370)  ; MakeDouble(lo,hi)
-  %372 = fptrunc double %371 to float
-  br label %495
-
-; <label>:373                                     ; preds = %339
-  %374 = icmp eq i32 %344, 1
-  br i1 %374, label %375, label %406
-
-; <label>:375                                     ; preds = %373
-  %376 = add i32 %13, -1
-  %377 = uitofp i32 %376 to float
-  %378 = call float @dx.op.binary.f32(i32 35, float %340, float 0.000000e+00)  ; FMax(a,b)
-  %379 = call float @dx.op.binary.f32(i32 36, float %378, float %377)  ; FMin(a,b)
-  %380 = fptoui float %379 to i32
-  %381 = add i32 %15, -1
-  %382 = uitofp i32 %381 to float
-  %383 = call float @dx.op.binary.f32(i32 35, float %341, float 0.000000e+00)  ; FMax(a,b)
-  %384 = call float @dx.op.binary.f32(i32 36, float %383, float %382)  ; FMin(a,b)
-  %385 = fptoui float %384 to i32
-  %386 = uitofp i32 %385 to float
-  %387 = uitofp i32 %380 to float
-  %388 = fptoui float %45 to i32
-  %389 = fptoui float %178 to i32
-  %390 = fptoui float %386 to i32
-  %391 = fptoui float %387 to i32
-  %392 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %393 = extractvalue %dx.types.CBufRet.i32 %392, 0
-  %394 = extractvalue %dx.types.CBufRet.i32 %392, 1
-  %395 = extractvalue %dx.types.CBufRet.i32 %392, 2
-  %396 = extractvalue %dx.types.CBufRet.i32 %392, 3
-  %397 = mul i32 %393, %388
-  %398 = call i32 @dx.op.tertiary.i32(i32 48, i32 %389, i32 %394, i32 %397)  ; IMad(a,b,c)
-  %399 = call i32 @dx.op.tertiary.i32(i32 48, i32 %390, i32 %395, i32 %398)  ; IMad(a,b,c)
-  %400 = call i32 @dx.op.tertiary.i32(i32 48, i32 %391, i32 %396, i32 %399)  ; IMad(a,b,c)
-  %401 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %400, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %402 = extractvalue %dx.types.ResRet.i32 %401, 0
-  %403 = extractvalue %dx.types.ResRet.i32 %401, 1
-  %404 = call double @dx.op.makeDouble.f64(i32 101, i32 %402, i32 %403)  ; MakeDouble(lo,hi)
-  %405 = fptrunc double %404 to float
-  br label %495
-
-; <label>:406                                     ; preds = %373
-  %407 = icmp eq i32 %344, 2
-  br i1 %407, label %408, label %495
-
-; <label>:408                                     ; preds = %406
-  %409 = fsub fast float %22, %20
-  %410 = fcmp fast olt float %340, %20
-  br i1 %410, label %411, label %424
-
-; <label>:411                                     ; preds = %408
-  %412 = fsub fast float %20, %340
-  %413 = fdiv fast float %412, %409
-  %414 = fptoui float %413 to i32
-  %415 = uitofp i32 %414 to float
-  %416 = fmul fast float %415, %409
-  %417 = fsub fast float %412, %416
-  %418 = and i32 %414, 1
-  %419 = icmp eq i32 %418, 0
-  br i1 %419, label %420, label %422
-
-; <label>:420                                     ; preds = %411
-  %421 = fadd fast float %417, %20
-  br label %439
-
-; <label>:422                                     ; preds = %411
-  %423 = fsub fast float %22, %417
-  br label %439
-
-; <label>:424                                     ; preds = %408
-  %425 = fcmp fast ogt float %340, %22
-  br i1 %425, label %426, label %439
-
-; <label>:426                                     ; preds = %424
-  %427 = fsub fast float %340, %22
-  %428 = fdiv fast float %427, %409
-  %429 = fptoui float %428 to i32
-  %430 = uitofp i32 %429 to float
-  %431 = fmul fast float %430, %409
-  %432 = fsub fast float %427, %431
-  %433 = and i32 %429, 1
-  %434 = icmp eq i32 %433, 0
-  br i1 %434, label %435, label %437
-
-; <label>:435                                     ; preds = %426
-  %436 = fsub fast float %22, %432
-  br label %439
-
-; <label>:437                                     ; preds = %426
-  %438 = fadd fast float %432, %20
-  br label %439
-
-; <label>:439                                     ; preds = %437, %435, %424, %422, %420
-  %440 = phi float [ %421, %420 ], [ %423, %422 ], [ %436, %435 ], [ %438, %437 ], [ %340, %424 ]
-  %441 = fptoui float %440 to i32
-  %442 = fsub fast float %24, %20
-  %443 = fcmp fast olt float %341, %20
-  br i1 %443, label %444, label %457
-
-; <label>:444                                     ; preds = %439
-  %445 = fsub fast float %20, %341
-  %446 = fdiv fast float %445, %442
-  %447 = fptoui float %446 to i32
-  %448 = uitofp i32 %447 to float
-  %449 = fmul fast float %448, %442
-  %450 = fsub fast float %445, %449
-  %451 = and i32 %447, 1
-  %452 = icmp eq i32 %451, 0
-  br i1 %452, label %453, label %455
-
-; <label>:453                                     ; preds = %444
-  %454 = fadd fast float %450, %20
-  br label %472
-
-; <label>:455                                     ; preds = %444
-  %456 = fsub fast float %24, %450
-  br label %472
-
-; <label>:457                                     ; preds = %439
-  %458 = fcmp fast ogt float %341, %24
-  br i1 %458, label %459, label %472
-
-; <label>:459                                     ; preds = %457
-  %460 = fsub fast float %341, %24
-  %461 = fdiv fast float %460, %442
-  %462 = fptoui float %461 to i32
-  %463 = uitofp i32 %462 to float
-  %464 = fmul fast float %463, %442
-  %465 = fsub fast float %460, %464
-  %466 = and i32 %462, 1
-  %467 = icmp eq i32 %466, 0
-  br i1 %467, label %468, label %470
-
-; <label>:468                                     ; preds = %459
-  %469 = fsub fast float %24, %465
-  br label %472
-
-; <label>:470                                     ; preds = %459
-  %471 = fadd fast float %465, %20
-  br label %472
-
-; <label>:472                                     ; preds = %470, %468, %457, %455, %453
-  %473 = phi float [ %454, %453 ], [ %456, %455 ], [ %469, %468 ], [ %471, %470 ], [ %341, %457 ]
-  %474 = fptoui float %473 to i32
-  %475 = uitofp i32 %474 to float
-  %476 = uitofp i32 %441 to float
-  %477 = fptoui float %45 to i32
-  %478 = fptoui float %178 to i32
-  %479 = fptoui float %475 to i32
-  %480 = fptoui float %476 to i32
-  %481 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %482 = extractvalue %dx.types.CBufRet.i32 %481, 0
-  %483 = extractvalue %dx.types.CBufRet.i32 %481, 1
-  %484 = extractvalue %dx.types.CBufRet.i32 %481, 2
-  %485 = extractvalue %dx.types.CBufRet.i32 %481, 3
-  %486 = mul i32 %482, %477
-  %487 = call i32 @dx.op.tertiary.i32(i32 48, i32 %478, i32 %483, i32 %486)  ; IMad(a,b,c)
-  %488 = call i32 @dx.op.tertiary.i32(i32 48, i32 %479, i32 %484, i32 %487)  ; IMad(a,b,c)
-  %489 = call i32 @dx.op.tertiary.i32(i32 48, i32 %480, i32 %485, i32 %488)  ; IMad(a,b,c)
-  %490 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %489, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %491 = extractvalue %dx.types.ResRet.i32 %490, 0
-  %492 = extractvalue %dx.types.ResRet.i32 %490, 1
-  %493 = call double @dx.op.makeDouble.f64(i32 101, i32 %491, i32 %492)  ; MakeDouble(lo,hi)
-  %494 = fptrunc double %493 to float
-  br label %495
-
-; <label>:495                                     ; preds = %472, %406, %375, %356, %346
-  %496 = phi float [ %372, %356 ], [ 0.000000e+00, %346 ], [ %405, %375 ], [ %494, %472 ], [ 0.000000e+00, %406 ]
-  br i1 %345, label %497, label %524
-
-; <label>:497                                     ; preds = %495
-  %498 = fcmp fast oge float %342, 0.000000e+00
-  %499 = fptoui float %342 to i32
-  %500 = icmp ult i32 %499, %13
-  %501 = and i1 %498, %500
-  %502 = fcmp fast oge float %341, 0.000000e+00
-  %503 = and i1 %502, %501
-  %504 = fptoui float %341 to i32
-  %505 = icmp ult i32 %504, %15
-  %506 = and i1 %505, %503
-  br i1 %506, label %507, label %646
-
-; <label>:507                                     ; preds = %497
-  %508 = fptoui float %45 to i32
-  %509 = fptoui float %178 to i32
-  %510 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %511 = extractvalue %dx.types.CBufRet.i32 %510, 0
-  %512 = extractvalue %dx.types.CBufRet.i32 %510, 1
-  %513 = extractvalue %dx.types.CBufRet.i32 %510, 2
-  %514 = extractvalue %dx.types.CBufRet.i32 %510, 3
-  %515 = mul i32 %511, %508
-  %516 = call i32 @dx.op.tertiary.i32(i32 48, i32 %509, i32 %512, i32 %515)  ; IMad(a,b,c)
-  %517 = call i32 @dx.op.tertiary.i32(i32 48, i32 %504, i32 %513, i32 %516)  ; IMad(a,b,c)
-  %518 = call i32 @dx.op.tertiary.i32(i32 48, i32 %499, i32 %514, i32 %517)  ; IMad(a,b,c)
-  %519 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %518, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %520 = extractvalue %dx.types.ResRet.i32 %519, 0
-  %521 = extractvalue %dx.types.ResRet.i32 %519, 1
-  %522 = call double @dx.op.makeDouble.f64(i32 101, i32 %520, i32 %521)  ; MakeDouble(lo,hi)
-  %523 = fptrunc double %522 to float
-  br label %646
-
-; <label>:524                                     ; preds = %495
-  %525 = icmp eq i32 %344, 1
-  br i1 %525, label %526, label %557
-
-; <label>:526                                     ; preds = %524
-  %527 = add i32 %13, -1
-  %528 = uitofp i32 %527 to float
-  %529 = call float @dx.op.binary.f32(i32 35, float %342, float 0.000000e+00)  ; FMax(a,b)
-  %530 = call float @dx.op.binary.f32(i32 36, float %529, float %528)  ; FMin(a,b)
-  %531 = fptoui float %530 to i32
-  %532 = add i32 %15, -1
-  %533 = uitofp i32 %532 to float
-  %534 = call float @dx.op.binary.f32(i32 35, float %341, float 0.000000e+00)  ; FMax(a,b)
-  %535 = call float @dx.op.binary.f32(i32 36, float %534, float %533)  ; FMin(a,b)
-  %536 = fptoui float %535 to i32
-  %537 = uitofp i32 %536 to float
-  %538 = uitofp i32 %531 to float
-  %539 = fptoui float %45 to i32
-  %540 = fptoui float %178 to i32
-  %541 = fptoui float %537 to i32
-  %542 = fptoui float %538 to i32
-  %543 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %544 = extractvalue %dx.types.CBufRet.i32 %543, 0
-  %545 = extractvalue %dx.types.CBufRet.i32 %543, 1
-  %546 = extractvalue %dx.types.CBufRet.i32 %543, 2
-  %547 = extractvalue %dx.types.CBufRet.i32 %543, 3
-  %548 = mul i32 %544, %539
-  %549 = call i32 @dx.op.tertiary.i32(i32 48, i32 %540, i32 %545, i32 %548)  ; IMad(a,b,c)
-  %550 = call i32 @dx.op.tertiary.i32(i32 48, i32 %541, i32 %546, i32 %549)  ; IMad(a,b,c)
-  %551 = call i32 @dx.op.tertiary.i32(i32 48, i32 %542, i32 %547, i32 %550)  ; IMad(a,b,c)
-  %552 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %551, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %553 = extractvalue %dx.types.ResRet.i32 %552, 0
-  %554 = extractvalue %dx.types.ResRet.i32 %552, 1
-  %555 = call double @dx.op.makeDouble.f64(i32 101, i32 %553, i32 %554)  ; MakeDouble(lo,hi)
-  %556 = fptrunc double %555 to float
-  br label %646
-
-; <label>:557                                     ; preds = %524
-  %558 = icmp eq i32 %344, 2
-  br i1 %558, label %559, label %646
-
-; <label>:559                                     ; preds = %557
-  %560 = fsub fast float %22, %20
-  %561 = fcmp fast olt float %342, %20
-  br i1 %561, label %562, label %575
-
-; <label>:562                                     ; preds = %559
-  %563 = fsub fast float %20, %342
-  %564 = fdiv fast float %563, %560
-  %565 = fptoui float %564 to i32
-  %566 = uitofp i32 %565 to float
-  %567 = fmul fast float %566, %560
-  %568 = fsub fast float %563, %567
-  %569 = and i32 %565, 1
-  %570 = icmp eq i32 %569, 0
-  br i1 %570, label %571, label %573
-
-; <label>:571                                     ; preds = %562
-  %572 = fadd fast float %568, %20
-  br label %590
-
-; <label>:573                                     ; preds = %562
-  %574 = fsub fast float %22, %568
-  br label %590
-
-; <label>:575                                     ; preds = %559
-  %576 = fcmp fast ogt float %342, %22
-  br i1 %576, label %577, label %590
-
-; <label>:577                                     ; preds = %575
-  %578 = fsub fast float %342, %22
-  %579 = fdiv fast float %578, %560
-  %580 = fptoui float %579 to i32
-  %581 = uitofp i32 %580 to float
-  %582 = fmul fast float %581, %560
-  %583 = fsub fast float %578, %582
-  %584 = and i32 %580, 1
-  %585 = icmp eq i32 %584, 0
-  br i1 %585, label %586, label %588
-
-; <label>:586                                     ; preds = %577
-  %587 = fsub fast float %22, %583
-  br label %590
-
-; <label>:588                                     ; preds = %577
-  %589 = fadd fast float %583, %20
-  br label %590
-
-; <label>:590                                     ; preds = %588, %586, %575, %573, %571
-  %591 = phi float [ %572, %571 ], [ %574, %573 ], [ %587, %586 ], [ %589, %588 ], [ %342, %575 ]
-  %592 = fptoui float %591 to i32
-  %593 = fsub fast float %24, %20
-  %594 = fcmp fast olt float %341, %20
-  br i1 %594, label %595, label %608
-
-; <label>:595                                     ; preds = %590
-  %596 = fsub fast float %20, %341
-  %597 = fdiv fast float %596, %593
-  %598 = fptoui float %597 to i32
-  %599 = uitofp i32 %598 to float
-  %600 = fmul fast float %599, %593
-  %601 = fsub fast float %596, %600
-  %602 = and i32 %598, 1
-  %603 = icmp eq i32 %602, 0
-  br i1 %603, label %604, label %606
-
-; <label>:604                                     ; preds = %595
-  %605 = fadd fast float %601, %20
-  br label %623
-
-; <label>:606                                     ; preds = %595
-  %607 = fsub fast float %24, %601
-  br label %623
-
-; <label>:608                                     ; preds = %590
-  %609 = fcmp fast ogt float %341, %24
-  br i1 %609, label %610, label %623
-
-; <label>:610                                     ; preds = %608
-  %611 = fsub fast float %341, %24
-  %612 = fdiv fast float %611, %593
-  %613 = fptoui float %612 to i32
-  %614 = uitofp i32 %613 to float
-  %615 = fmul fast float %614, %593
-  %616 = fsub fast float %611, %615
-  %617 = and i32 %613, 1
-  %618 = icmp eq i32 %617, 0
-  br i1 %618, label %619, label %621
-
-; <label>:619                                     ; preds = %610
-  %620 = fsub fast float %24, %616
-  br label %623
-
-; <label>:621                                     ; preds = %610
-  %622 = fadd fast float %616, %20
-  br label %623
-
-; <label>:623                                     ; preds = %621, %619, %608, %606, %604
-  %624 = phi float [ %605, %604 ], [ %607, %606 ], [ %620, %619 ], [ %622, %621 ], [ %341, %608 ]
-  %625 = fptoui float %624 to i32
-  %626 = uitofp i32 %625 to float
-  %627 = uitofp i32 %592 to float
-  %628 = fptoui float %45 to i32
-  %629 = fptoui float %178 to i32
-  %630 = fptoui float %626 to i32
-  %631 = fptoui float %627 to i32
-  %632 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %633 = extractvalue %dx.types.CBufRet.i32 %632, 0
-  %634 = extractvalue %dx.types.CBufRet.i32 %632, 1
-  %635 = extractvalue %dx.types.CBufRet.i32 %632, 2
-  %636 = extractvalue %dx.types.CBufRet.i32 %632, 3
-  %637 = mul i32 %633, %628
-  %638 = call i32 @dx.op.tertiary.i32(i32 48, i32 %629, i32 %634, i32 %637)  ; IMad(a,b,c)
-  %639 = call i32 @dx.op.tertiary.i32(i32 48, i32 %630, i32 %635, i32 %638)  ; IMad(a,b,c)
-  %640 = call i32 @dx.op.tertiary.i32(i32 48, i32 %631, i32 %636, i32 %639)  ; IMad(a,b,c)
-  %641 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %640, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %642 = extractvalue %dx.types.ResRet.i32 %641, 0
-  %643 = extractvalue %dx.types.ResRet.i32 %641, 1
-  %644 = call double @dx.op.makeDouble.f64(i32 101, i32 %642, i32 %643)  ; MakeDouble(lo,hi)
-  %645 = fptrunc double %644 to float
-  br label %646
-
-; <label>:646                                     ; preds = %623, %557, %526, %507, %497
-  %647 = phi float [ %523, %507 ], [ 0.000000e+00, %497 ], [ %556, %526 ], [ %645, %623 ], [ 0.000000e+00, %557 ]
-  br i1 %345, label %648, label %675
-
-; <label>:648                                     ; preds = %646
-  %649 = fcmp fast oge float %340, 0.000000e+00
-  %650 = fptoui float %340 to i32
-  %651 = icmp ult i32 %650, %13
-  %652 = and i1 %649, %651
-  %653 = fcmp fast oge float %343, 0.000000e+00
-  %654 = and i1 %653, %652
-  %655 = fptoui float %343 to i32
-  %656 = icmp ult i32 %655, %15
-  %657 = and i1 %656, %654
-  br i1 %657, label %658, label %797
-
-; <label>:658                                     ; preds = %648
-  %659 = fptoui float %45 to i32
-  %660 = fptoui float %178 to i32
-  %661 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %662 = extractvalue %dx.types.CBufRet.i32 %661, 0
-  %663 = extractvalue %dx.types.CBufRet.i32 %661, 1
-  %664 = extractvalue %dx.types.CBufRet.i32 %661, 2
-  %665 = extractvalue %dx.types.CBufRet.i32 %661, 3
-  %666 = mul i32 %662, %659
-  %667 = call i32 @dx.op.tertiary.i32(i32 48, i32 %660, i32 %663, i32 %666)  ; IMad(a,b,c)
-  %668 = call i32 @dx.op.tertiary.i32(i32 48, i32 %655, i32 %664, i32 %667)  ; IMad(a,b,c)
-  %669 = call i32 @dx.op.tertiary.i32(i32 48, i32 %650, i32 %665, i32 %668)  ; IMad(a,b,c)
-  %670 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %669, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %671 = extractvalue %dx.types.ResRet.i32 %670, 0
-  %672 = extractvalue %dx.types.ResRet.i32 %670, 1
-  %673 = call double @dx.op.makeDouble.f64(i32 101, i32 %671, i32 %672)  ; MakeDouble(lo,hi)
-  %674 = fptrunc double %673 to float
-  br label %797
-
-; <label>:675                                     ; preds = %646
-  %676 = icmp eq i32 %344, 1
-  br i1 %676, label %677, label %708
-
-; <label>:677                                     ; preds = %675
-  %678 = add i32 %13, -1
-  %679 = uitofp i32 %678 to float
-  %680 = call float @dx.op.binary.f32(i32 35, float %340, float 0.000000e+00)  ; FMax(a,b)
-  %681 = call float @dx.op.binary.f32(i32 36, float %680, float %679)  ; FMin(a,b)
-  %682 = fptoui float %681 to i32
-  %683 = add i32 %15, -1
-  %684 = uitofp i32 %683 to float
-  %685 = call float @dx.op.binary.f32(i32 35, float %343, float 0.000000e+00)  ; FMax(a,b)
-  %686 = call float @dx.op.binary.f32(i32 36, float %685, float %684)  ; FMin(a,b)
-  %687 = fptoui float %686 to i32
-  %688 = uitofp i32 %687 to float
-  %689 = uitofp i32 %682 to float
-  %690 = fptoui float %45 to i32
-  %691 = fptoui float %178 to i32
-  %692 = fptoui float %688 to i32
-  %693 = fptoui float %689 to i32
-  %694 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %695 = extractvalue %dx.types.CBufRet.i32 %694, 0
-  %696 = extractvalue %dx.types.CBufRet.i32 %694, 1
-  %697 = extractvalue %dx.types.CBufRet.i32 %694, 2
-  %698 = extractvalue %dx.types.CBufRet.i32 %694, 3
-  %699 = mul i32 %695, %690
-  %700 = call i32 @dx.op.tertiary.i32(i32 48, i32 %691, i32 %696, i32 %699)  ; IMad(a,b,c)
-  %701 = call i32 @dx.op.tertiary.i32(i32 48, i32 %692, i32 %697, i32 %700)  ; IMad(a,b,c)
-  %702 = call i32 @dx.op.tertiary.i32(i32 48, i32 %693, i32 %698, i32 %701)  ; IMad(a,b,c)
-  %703 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %702, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %704 = extractvalue %dx.types.ResRet.i32 %703, 0
-  %705 = extractvalue %dx.types.ResRet.i32 %703, 1
-  %706 = call double @dx.op.makeDouble.f64(i32 101, i32 %704, i32 %705)  ; MakeDouble(lo,hi)
-  %707 = fptrunc double %706 to float
-  br label %797
-
-; <label>:708                                     ; preds = %675
-  %709 = icmp eq i32 %344, 2
-  br i1 %709, label %710, label %797
-
-; <label>:710                                     ; preds = %708
-  %711 = fsub fast float %22, %20
-  %712 = fcmp fast olt float %340, %20
-  br i1 %712, label %713, label %726
-
-; <label>:713                                     ; preds = %710
-  %714 = fsub fast float %20, %340
-  %715 = fdiv fast float %714, %711
-  %716 = fptoui float %715 to i32
-  %717 = uitofp i32 %716 to float
-  %718 = fmul fast float %717, %711
-  %719 = fsub fast float %714, %718
-  %720 = and i32 %716, 1
-  %721 = icmp eq i32 %720, 0
-  br i1 %721, label %722, label %724
-
-; <label>:722                                     ; preds = %713
-  %723 = fadd fast float %719, %20
-  br label %741
-
-; <label>:724                                     ; preds = %713
-  %725 = fsub fast float %22, %719
-  br label %741
-
-; <label>:726                                     ; preds = %710
-  %727 = fcmp fast ogt float %340, %22
-  br i1 %727, label %728, label %741
-
-; <label>:728                                     ; preds = %726
-  %729 = fsub fast float %340, %22
-  %730 = fdiv fast float %729, %711
-  %731 = fptoui float %730 to i32
-  %732 = uitofp i32 %731 to float
-  %733 = fmul fast float %732, %711
-  %734 = fsub fast float %729, %733
-  %735 = and i32 %731, 1
-  %736 = icmp eq i32 %735, 0
-  br i1 %736, label %737, label %739
-
-; <label>:737                                     ; preds = %728
-  %738 = fsub fast float %22, %734
-  br label %741
-
-; <label>:739                                     ; preds = %728
-  %740 = fadd fast float %734, %20
-  br label %741
-
-; <label>:741                                     ; preds = %739, %737, %726, %724, %722
-  %742 = phi float [ %723, %722 ], [ %725, %724 ], [ %738, %737 ], [ %740, %739 ], [ %340, %726 ]
-  %743 = fptoui float %742 to i32
-  %744 = fsub fast float %24, %20
-  %745 = fcmp fast olt float %343, %20
-  br i1 %745, label %746, label %759
-
-; <label>:746                                     ; preds = %741
-  %747 = fsub fast float %20, %343
-  %748 = fdiv fast float %747, %744
-  %749 = fptoui float %748 to i32
-  %750 = uitofp i32 %749 to float
-  %751 = fmul fast float %750, %744
-  %752 = fsub fast float %747, %751
-  %753 = and i32 %749, 1
-  %754 = icmp eq i32 %753, 0
-  br i1 %754, label %755, label %757
-
-; <label>:755                                     ; preds = %746
-  %756 = fadd fast float %752, %20
-  br label %774
-
-; <label>:757                                     ; preds = %746
-  %758 = fsub fast float %24, %752
-  br label %774
-
-; <label>:759                                     ; preds = %741
-  %760 = fcmp fast ogt float %343, %24
-  br i1 %760, label %761, label %774
-
-; <label>:761                                     ; preds = %759
-  %762 = fsub fast float %343, %24
-  %763 = fdiv fast float %762, %744
-  %764 = fptoui float %763 to i32
-  %765 = uitofp i32 %764 to float
-  %766 = fmul fast float %765, %744
-  %767 = fsub fast float %762, %766
-  %768 = and i32 %764, 1
-  %769 = icmp eq i32 %768, 0
-  br i1 %769, label %770, label %772
-
-; <label>:770                                     ; preds = %761
-  %771 = fsub fast float %24, %767
-  br label %774
-
-; <label>:772                                     ; preds = %761
-  %773 = fadd fast float %767, %20
-  br label %774
-
-; <label>:774                                     ; preds = %772, %770, %759, %757, %755
-  %775 = phi float [ %756, %755 ], [ %758, %757 ], [ %771, %770 ], [ %773, %772 ], [ %343, %759 ]
-  %776 = fptoui float %775 to i32
-  %777 = uitofp i32 %776 to float
-  %778 = uitofp i32 %743 to float
-  %779 = fptoui float %45 to i32
-  %780 = fptoui float %178 to i32
-  %781 = fptoui float %777 to i32
-  %782 = fptoui float %778 to i32
-  %783 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %784 = extractvalue %dx.types.CBufRet.i32 %783, 0
-  %785 = extractvalue %dx.types.CBufRet.i32 %783, 1
-  %786 = extractvalue %dx.types.CBufRet.i32 %783, 2
-  %787 = extractvalue %dx.types.CBufRet.i32 %783, 3
-  %788 = mul i32 %784, %779
-  %789 = call i32 @dx.op.tertiary.i32(i32 48, i32 %780, i32 %785, i32 %788)  ; IMad(a,b,c)
-  %790 = call i32 @dx.op.tertiary.i32(i32 48, i32 %781, i32 %786, i32 %789)  ; IMad(a,b,c)
-  %791 = call i32 @dx.op.tertiary.i32(i32 48, i32 %782, i32 %787, i32 %790)  ; IMad(a,b,c)
-  %792 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %791, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %793 = extractvalue %dx.types.ResRet.i32 %792, 0
-  %794 = extractvalue %dx.types.ResRet.i32 %792, 1
-  %795 = call double @dx.op.makeDouble.f64(i32 101, i32 %793, i32 %794)  ; MakeDouble(lo,hi)
-  %796 = fptrunc double %795 to float
-  br label %797
-
-; <label>:797                                     ; preds = %774, %708, %677, %658, %648
-  %798 = phi float [ %674, %658 ], [ 0.000000e+00, %648 ], [ %707, %677 ], [ %796, %774 ], [ 0.000000e+00, %708 ]
-  br i1 %345, label %799, label %826
-
-; <label>:799                                     ; preds = %797
-  %800 = fcmp fast oge float %342, 0.000000e+00
-  %801 = fptoui float %342 to i32
-  %802 = icmp ult i32 %801, %13
-  %803 = and i1 %800, %802
-  %804 = fcmp fast oge float %343, 0.000000e+00
-  %805 = and i1 %804, %803
-  %806 = fptoui float %343 to i32
-  %807 = icmp ult i32 %806, %15
-  %808 = and i1 %807, %805
-  br i1 %808, label %809, label %948
-
-; <label>:809                                     ; preds = %799
-  %810 = fptoui float %45 to i32
-  %811 = fptoui float %178 to i32
-  %812 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %813 = extractvalue %dx.types.CBufRet.i32 %812, 0
-  %814 = extractvalue %dx.types.CBufRet.i32 %812, 1
-  %815 = extractvalue %dx.types.CBufRet.i32 %812, 2
-  %816 = extractvalue %dx.types.CBufRet.i32 %812, 3
-  %817 = mul i32 %813, %810
-  %818 = call i32 @dx.op.tertiary.i32(i32 48, i32 %811, i32 %814, i32 %817)  ; IMad(a,b,c)
-  %819 = call i32 @dx.op.tertiary.i32(i32 48, i32 %806, i32 %815, i32 %818)  ; IMad(a,b,c)
-  %820 = call i32 @dx.op.tertiary.i32(i32 48, i32 %801, i32 %816, i32 %819)  ; IMad(a,b,c)
-  %821 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %820, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %822 = extractvalue %dx.types.ResRet.i32 %821, 0
-  %823 = extractvalue %dx.types.ResRet.i32 %821, 1
-  %824 = call double @dx.op.makeDouble.f64(i32 101, i32 %822, i32 %823)  ; MakeDouble(lo,hi)
-  %825 = fptrunc double %824 to float
-  br label %948
-
-; <label>:826                                     ; preds = %797
-  %827 = icmp eq i32 %344, 1
-  br i1 %827, label %828, label %859
-
-; <label>:828                                     ; preds = %826
-  %829 = add i32 %13, -1
-  %830 = uitofp i32 %829 to float
-  %831 = call float @dx.op.binary.f32(i32 35, float %342, float 0.000000e+00)  ; FMax(a,b)
-  %832 = call float @dx.op.binary.f32(i32 36, float %831, float %830)  ; FMin(a,b)
-  %833 = fptoui float %832 to i32
-  %834 = add i32 %15, -1
-  %835 = uitofp i32 %834 to float
-  %836 = call float @dx.op.binary.f32(i32 35, float %343, float 0.000000e+00)  ; FMax(a,b)
-  %837 = call float @dx.op.binary.f32(i32 36, float %836, float %835)  ; FMin(a,b)
-  %838 = fptoui float %837 to i32
-  %839 = uitofp i32 %838 to float
-  %840 = uitofp i32 %833 to float
-  %841 = fptoui float %45 to i32
-  %842 = fptoui float %178 to i32
-  %843 = fptoui float %839 to i32
-  %844 = fptoui float %840 to i32
-  %845 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %846 = extractvalue %dx.types.CBufRet.i32 %845, 0
-  %847 = extractvalue %dx.types.CBufRet.i32 %845, 1
-  %848 = extractvalue %dx.types.CBufRet.i32 %845, 2
-  %849 = extractvalue %dx.types.CBufRet.i32 %845, 3
-  %850 = mul i32 %846, %841
-  %851 = call i32 @dx.op.tertiary.i32(i32 48, i32 %842, i32 %847, i32 %850)  ; IMad(a,b,c)
-  %852 = call i32 @dx.op.tertiary.i32(i32 48, i32 %843, i32 %848, i32 %851)  ; IMad(a,b,c)
-  %853 = call i32 @dx.op.tertiary.i32(i32 48, i32 %844, i32 %849, i32 %852)  ; IMad(a,b,c)
-  %854 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %853, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %855 = extractvalue %dx.types.ResRet.i32 %854, 0
-  %856 = extractvalue %dx.types.ResRet.i32 %854, 1
-  %857 = call double @dx.op.makeDouble.f64(i32 101, i32 %855, i32 %856)  ; MakeDouble(lo,hi)
-  %858 = fptrunc double %857 to float
-  br label %948
-
-; <label>:859                                     ; preds = %826
-  %860 = icmp eq i32 %344, 2
-  br i1 %860, label %861, label %948
-
-; <label>:861                                     ; preds = %859
-  %862 = fsub fast float %22, %20
-  %863 = fcmp fast olt float %342, %20
-  br i1 %863, label %864, label %877
-
-; <label>:864                                     ; preds = %861
-  %865 = fsub fast float %20, %342
-  %866 = fdiv fast float %865, %862
-  %867 = fptoui float %866 to i32
-  %868 = uitofp i32 %867 to float
-  %869 = fmul fast float %868, %862
-  %870 = fsub fast float %865, %869
-  %871 = and i32 %867, 1
-  %872 = icmp eq i32 %871, 0
-  br i1 %872, label %873, label %875
-
-; <label>:873                                     ; preds = %864
-  %874 = fadd fast float %870, %20
-  br label %892
-
-; <label>:875                                     ; preds = %864
-  %876 = fsub fast float %22, %870
-  br label %892
-
-; <label>:877                                     ; preds = %861
-  %878 = fcmp fast ogt float %342, %22
-  br i1 %878, label %879, label %892
-
-; <label>:879                                     ; preds = %877
-  %880 = fsub fast float %342, %22
-  %881 = fdiv fast float %880, %862
-  %882 = fptoui float %881 to i32
-  %883 = uitofp i32 %882 to float
-  %884 = fmul fast float %883, %862
-  %885 = fsub fast float %880, %884
-  %886 = and i32 %882, 1
-  %887 = icmp eq i32 %886, 0
-  br i1 %887, label %888, label %890
-
-; <label>:888                                     ; preds = %879
-  %889 = fsub fast float %22, %885
-  br label %892
-
-; <label>:890                                     ; preds = %879
-  %891 = fadd fast float %885, %20
-  br label %892
-
-; <label>:892                                     ; preds = %890, %888, %877, %875, %873
-  %893 = phi float [ %874, %873 ], [ %876, %875 ], [ %889, %888 ], [ %891, %890 ], [ %342, %877 ]
-  %894 = fptoui float %893 to i32
-  %895 = fsub fast float %24, %20
-  %896 = fcmp fast olt float %343, %20
-  br i1 %896, label %897, label %910
-
-; <label>:897                                     ; preds = %892
-  %898 = fsub fast float %20, %343
-  %899 = fdiv fast float %898, %895
-  %900 = fptoui float %899 to i32
-  %901 = uitofp i32 %900 to float
-  %902 = fmul fast float %901, %895
-  %903 = fsub fast float %898, %902
-  %904 = and i32 %900, 1
-  %905 = icmp eq i32 %904, 0
-  br i1 %905, label %906, label %908
-
-; <label>:906                                     ; preds = %897
-  %907 = fadd fast float %903, %20
-  br label %925
-
-; <label>:908                                     ; preds = %897
-  %909 = fsub fast float %24, %903
-  br label %925
-
-; <label>:910                                     ; preds = %892
-  %911 = fcmp fast ogt float %343, %24
-  br i1 %911, label %912, label %925
-
-; <label>:912                                     ; preds = %910
-  %913 = fsub fast float %343, %24
-  %914 = fdiv fast float %913, %895
-  %915 = fptoui float %914 to i32
-  %916 = uitofp i32 %915 to float
-  %917 = fmul fast float %916, %895
-  %918 = fsub fast float %913, %917
-  %919 = and i32 %915, 1
-  %920 = icmp eq i32 %919, 0
-  br i1 %920, label %921, label %923
-
-; <label>:921                                     ; preds = %912
-  %922 = fsub fast float %24, %918
-  br label %925
-
-; <label>:923                                     ; preds = %912
-  %924 = fadd fast float %918, %20
-  br label %925
-
-; <label>:925                                     ; preds = %923, %921, %910, %908, %906
-  %926 = phi float [ %907, %906 ], [ %909, %908 ], [ %922, %921 ], [ %924, %923 ], [ %343, %910 ]
-  %927 = fptoui float %926 to i32
-  %928 = uitofp i32 %927 to float
-  %929 = uitofp i32 %894 to float
-  %930 = fptoui float %45 to i32
-  %931 = fptoui float %178 to i32
-  %932 = fptoui float %928 to i32
-  %933 = fptoui float %929 to i32
-  %934 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %935 = extractvalue %dx.types.CBufRet.i32 %934, 0
-  %936 = extractvalue %dx.types.CBufRet.i32 %934, 1
-  %937 = extractvalue %dx.types.CBufRet.i32 %934, 2
-  %938 = extractvalue %dx.types.CBufRet.i32 %934, 3
-  %939 = mul i32 %935, %930
-  %940 = call i32 @dx.op.tertiary.i32(i32 48, i32 %931, i32 %936, i32 %939)  ; IMad(a,b,c)
-  %941 = call i32 @dx.op.tertiary.i32(i32 48, i32 %932, i32 %937, i32 %940)  ; IMad(a,b,c)
-  %942 = call i32 @dx.op.tertiary.i32(i32 48, i32 %933, i32 %938, i32 %941)  ; IMad(a,b,c)
-  %943 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %942, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %944 = extractvalue %dx.types.ResRet.i32 %943, 0
-  %945 = extractvalue %dx.types.ResRet.i32 %943, 1
-  %946 = call double @dx.op.makeDouble.f64(i32 101, i32 %944, i32 %945)  ; MakeDouble(lo,hi)
-  %947 = fptrunc double %946 to float
-  br label %948
-
-; <label>:948                                     ; preds = %925, %859, %828, %809, %799
-  %949 = phi float [ %825, %809 ], [ 0.000000e+00, %799 ], [ %858, %828 ], [ %947, %925 ], [ 0.000000e+00, %859 ]
-  %950 = call float @dx.op.unary.f32(i32 22, float %176)  ; Frc(value)
-  %951 = fsub fast float %647, %496
-  %952 = fmul fast float %950, %951
-  %953 = fadd fast float %952, %496
-  %954 = fsub fast float %949, %798
-  %955 = fmul fast float %950, %954
-  %956 = fadd fast float %955, %798
-  %957 = call float @dx.op.unary.f32(i32 22, float %177)  ; Frc(value)
-  %958 = fsub fast float %956, %953
-  %959 = fmul fast float %958, %957
-  %960 = fadd fast float %959, %953
-  %961 = fpext float %960 to double
-  %962 = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double %961)  ; SplitDouble(value)
-  %963 = extractvalue %dx.types.splitdouble %962, 0
-  %964 = extractvalue %dx.types.splitdouble %962, 1
-  call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i32 %963, i32 %964, i32 undef, i32 undef, i8 3, i32 8)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3454
-
-; <label>:965                                     ; preds = %337
-  %966 = icmp eq i32 %87, 2
-  br i1 %966, label %967, label %3454
-
-; <label>:967                                     ; preds = %965
-  %968 = call float @dx.op.unary.f32(i32 27, float %176)  ; Round_ni(value)
-  %969 = fadd fast float %968, -1.000000e+00
-  %970 = call float @dx.op.unary.f32(i32 27, float %177)  ; Round_ni(value)
-  %971 = fadd fast float %970, -1.000000e+00
-  %972 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %973 = icmp eq i32 %972, 0
-  br i1 %973, label %974, label %1001
-
-; <label>:974                                     ; preds = %967
-  %975 = fcmp fast oge float %969, 0.000000e+00
-  %976 = fptoui float %969 to i32
-  %977 = icmp ult i32 %976, %13
-  %978 = and i1 %975, %977
-  %979 = fcmp fast oge float %971, 0.000000e+00
-  %980 = and i1 %979, %978
-  %981 = fptoui float %971 to i32
-  %982 = icmp ult i32 %981, %15
-  %983 = and i1 %982, %980
-  br i1 %983, label %984, label %1123
-
-; <label>:984                                     ; preds = %974
-  %985 = fptoui float %45 to i32
-  %986 = fptoui float %178 to i32
-  %987 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %988 = extractvalue %dx.types.CBufRet.i32 %987, 0
-  %989 = extractvalue %dx.types.CBufRet.i32 %987, 1
-  %990 = extractvalue %dx.types.CBufRet.i32 %987, 2
-  %991 = extractvalue %dx.types.CBufRet.i32 %987, 3
-  %992 = mul i32 %988, %985
-  %993 = call i32 @dx.op.tertiary.i32(i32 48, i32 %986, i32 %989, i32 %992)  ; IMad(a,b,c)
-  %994 = call i32 @dx.op.tertiary.i32(i32 48, i32 %981, i32 %990, i32 %993)  ; IMad(a,b,c)
-  %995 = call i32 @dx.op.tertiary.i32(i32 48, i32 %976, i32 %991, i32 %994)  ; IMad(a,b,c)
-  %996 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %995, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %997 = extractvalue %dx.types.ResRet.i32 %996, 0
-  %998 = extractvalue %dx.types.ResRet.i32 %996, 1
-  %999 = call double @dx.op.makeDouble.f64(i32 101, i32 %997, i32 %998)  ; MakeDouble(lo,hi)
-  %1000 = fptrunc double %999 to float
-  br label %1123
-
-; <label>:1001                                    ; preds = %967
-  %1002 = icmp eq i32 %972, 1
-  br i1 %1002, label %1003, label %1034
-
-; <label>:1003                                    ; preds = %1001
-  %1004 = add i32 %13, -1
-  %1005 = uitofp i32 %1004 to float
-  %1006 = call float @dx.op.binary.f32(i32 35, float %969, float 0.000000e+00)  ; FMax(a,b)
-  %1007 = call float @dx.op.binary.f32(i32 36, float %1006, float %1005)  ; FMin(a,b)
-  %1008 = fptoui float %1007 to i32
-  %1009 = add i32 %15, -1
-  %1010 = uitofp i32 %1009 to float
-  %1011 = call float @dx.op.binary.f32(i32 35, float %971, float 0.000000e+00)  ; FMax(a,b)
-  %1012 = call float @dx.op.binary.f32(i32 36, float %1011, float %1010)  ; FMin(a,b)
-  %1013 = fptoui float %1012 to i32
-  %1014 = uitofp i32 %1013 to float
-  %1015 = uitofp i32 %1008 to float
-  %1016 = fptoui float %45 to i32
-  %1017 = fptoui float %178 to i32
-  %1018 = fptoui float %1014 to i32
-  %1019 = fptoui float %1015 to i32
-  %1020 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1021 = extractvalue %dx.types.CBufRet.i32 %1020, 0
-  %1022 = extractvalue %dx.types.CBufRet.i32 %1020, 1
-  %1023 = extractvalue %dx.types.CBufRet.i32 %1020, 2
-  %1024 = extractvalue %dx.types.CBufRet.i32 %1020, 3
-  %1025 = mul i32 %1021, %1016
-  %1026 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1017, i32 %1022, i32 %1025)  ; IMad(a,b,c)
-  %1027 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1018, i32 %1023, i32 %1026)  ; IMad(a,b,c)
-  %1028 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1019, i32 %1024, i32 %1027)  ; IMad(a,b,c)
-  %1029 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1028, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1030 = extractvalue %dx.types.ResRet.i32 %1029, 0
-  %1031 = extractvalue %dx.types.ResRet.i32 %1029, 1
-  %1032 = call double @dx.op.makeDouble.f64(i32 101, i32 %1030, i32 %1031)  ; MakeDouble(lo,hi)
-  %1033 = fptrunc double %1032 to float
-  br label %1123
-
-; <label>:1034                                    ; preds = %1001
-  %1035 = icmp eq i32 %972, 2
-  br i1 %1035, label %1036, label %1123
-
-; <label>:1036                                    ; preds = %1034
-  %1037 = fsub fast float %22, %20
-  %1038 = fcmp fast olt float %969, %20
-  br i1 %1038, label %1039, label %1052
-
-; <label>:1039                                    ; preds = %1036
-  %1040 = fsub fast float %20, %969
-  %1041 = fdiv fast float %1040, %1037
-  %1042 = fptoui float %1041 to i32
-  %1043 = uitofp i32 %1042 to float
-  %1044 = fmul fast float %1043, %1037
-  %1045 = fsub fast float %1040, %1044
-  %1046 = and i32 %1042, 1
-  %1047 = icmp eq i32 %1046, 0
-  br i1 %1047, label %1048, label %1050
-
-; <label>:1048                                    ; preds = %1039
-  %1049 = fadd fast float %1045, %20
-  br label %1067
-
-; <label>:1050                                    ; preds = %1039
-  %1051 = fsub fast float %22, %1045
-  br label %1067
-
-; <label>:1052                                    ; preds = %1036
-  %1053 = fcmp fast ogt float %969, %22
-  br i1 %1053, label %1054, label %1067
-
-; <label>:1054                                    ; preds = %1052
-  %1055 = fsub fast float %969, %22
-  %1056 = fdiv fast float %1055, %1037
-  %1057 = fptoui float %1056 to i32
-  %1058 = uitofp i32 %1057 to float
-  %1059 = fmul fast float %1058, %1037
-  %1060 = fsub fast float %1055, %1059
-  %1061 = and i32 %1057, 1
-  %1062 = icmp eq i32 %1061, 0
-  br i1 %1062, label %1063, label %1065
-
-; <label>:1063                                    ; preds = %1054
-  %1064 = fsub fast float %22, %1060
-  br label %1067
-
-; <label>:1065                                    ; preds = %1054
-  %1066 = fadd fast float %1060, %20
-  br label %1067
-
-; <label>:1067                                    ; preds = %1065, %1063, %1052, %1050, %1048
-  %1068 = phi float [ %1049, %1048 ], [ %1051, %1050 ], [ %1064, %1063 ], [ %1066, %1065 ], [ %969, %1052 ]
-  %1069 = fptoui float %1068 to i32
-  %1070 = fsub fast float %24, %20
-  %1071 = fcmp fast olt float %971, %20
-  br i1 %1071, label %1072, label %1085
-
-; <label>:1072                                    ; preds = %1067
-  %1073 = fsub fast float %20, %971
-  %1074 = fdiv fast float %1073, %1070
-  %1075 = fptoui float %1074 to i32
-  %1076 = uitofp i32 %1075 to float
-  %1077 = fmul fast float %1076, %1070
-  %1078 = fsub fast float %1073, %1077
-  %1079 = and i32 %1075, 1
-  %1080 = icmp eq i32 %1079, 0
-  br i1 %1080, label %1081, label %1083
-
-; <label>:1081                                    ; preds = %1072
-  %1082 = fadd fast float %1078, %20
-  br label %1100
-
-; <label>:1083                                    ; preds = %1072
-  %1084 = fsub fast float %24, %1078
-  br label %1100
-
-; <label>:1085                                    ; preds = %1067
-  %1086 = fcmp fast ogt float %971, %24
-  br i1 %1086, label %1087, label %1100
-
-; <label>:1087                                    ; preds = %1085
-  %1088 = fsub fast float %971, %24
-  %1089 = fdiv fast float %1088, %1070
-  %1090 = fptoui float %1089 to i32
-  %1091 = uitofp i32 %1090 to float
-  %1092 = fmul fast float %1091, %1070
-  %1093 = fsub fast float %1088, %1092
-  %1094 = and i32 %1090, 1
-  %1095 = icmp eq i32 %1094, 0
-  br i1 %1095, label %1096, label %1098
-
-; <label>:1096                                    ; preds = %1087
-  %1097 = fsub fast float %24, %1093
-  br label %1100
-
-; <label>:1098                                    ; preds = %1087
-  %1099 = fadd fast float %1093, %20
-  br label %1100
-
-; <label>:1100                                    ; preds = %1098, %1096, %1085, %1083, %1081
-  %1101 = phi float [ %1082, %1081 ], [ %1084, %1083 ], [ %1097, %1096 ], [ %1099, %1098 ], [ %971, %1085 ]
-  %1102 = fptoui float %1101 to i32
-  %1103 = uitofp i32 %1102 to float
-  %1104 = uitofp i32 %1069 to float
-  %1105 = fptoui float %45 to i32
-  %1106 = fptoui float %178 to i32
-  %1107 = fptoui float %1103 to i32
-  %1108 = fptoui float %1104 to i32
-  %1109 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1110 = extractvalue %dx.types.CBufRet.i32 %1109, 0
-  %1111 = extractvalue %dx.types.CBufRet.i32 %1109, 1
-  %1112 = extractvalue %dx.types.CBufRet.i32 %1109, 2
-  %1113 = extractvalue %dx.types.CBufRet.i32 %1109, 3
-  %1114 = mul i32 %1110, %1105
-  %1115 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1106, i32 %1111, i32 %1114)  ; IMad(a,b,c)
-  %1116 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1107, i32 %1112, i32 %1115)  ; IMad(a,b,c)
-  %1117 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1108, i32 %1113, i32 %1116)  ; IMad(a,b,c)
-  %1118 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1117, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1119 = extractvalue %dx.types.ResRet.i32 %1118, 0
-  %1120 = extractvalue %dx.types.ResRet.i32 %1118, 1
-  %1121 = call double @dx.op.makeDouble.f64(i32 101, i32 %1119, i32 %1120)  ; MakeDouble(lo,hi)
-  %1122 = fptrunc double %1121 to float
-  br label %1123
-
-; <label>:1123                                    ; preds = %1100, %1034, %1003, %984, %974
-  %1124 = phi float [ %1000, %984 ], [ 0.000000e+00, %974 ], [ %1033, %1003 ], [ %1122, %1100 ], [ 0.000000e+00, %1034 ]
-  br i1 %973, label %1125, label %1152
-
-; <label>:1125                                    ; preds = %1123
-  %1126 = fcmp fast oge float %968, 0.000000e+00
-  %1127 = fptoui float %968 to i32
-  %1128 = icmp ult i32 %1127, %13
-  %1129 = and i1 %1126, %1128
-  %1130 = fcmp fast oge float %971, 0.000000e+00
-  %1131 = and i1 %1130, %1129
-  %1132 = fptoui float %971 to i32
-  %1133 = icmp ult i32 %1132, %15
-  %1134 = and i1 %1133, %1131
-  br i1 %1134, label %1135, label %1274
-
-; <label>:1135                                    ; preds = %1125
-  %1136 = fptoui float %45 to i32
-  %1137 = fptoui float %178 to i32
-  %1138 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1139 = extractvalue %dx.types.CBufRet.i32 %1138, 0
-  %1140 = extractvalue %dx.types.CBufRet.i32 %1138, 1
-  %1141 = extractvalue %dx.types.CBufRet.i32 %1138, 2
-  %1142 = extractvalue %dx.types.CBufRet.i32 %1138, 3
-  %1143 = mul i32 %1139, %1136
-  %1144 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1137, i32 %1140, i32 %1143)  ; IMad(a,b,c)
-  %1145 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1132, i32 %1141, i32 %1144)  ; IMad(a,b,c)
-  %1146 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1127, i32 %1142, i32 %1145)  ; IMad(a,b,c)
-  %1147 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1146, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1148 = extractvalue %dx.types.ResRet.i32 %1147, 0
-  %1149 = extractvalue %dx.types.ResRet.i32 %1147, 1
-  %1150 = call double @dx.op.makeDouble.f64(i32 101, i32 %1148, i32 %1149)  ; MakeDouble(lo,hi)
-  %1151 = fptrunc double %1150 to float
-  br label %1274
-
-; <label>:1152                                    ; preds = %1123
-  %1153 = icmp eq i32 %972, 1
-  br i1 %1153, label %1154, label %1185
-
-; <label>:1154                                    ; preds = %1152
-  %1155 = add i32 %13, -1
-  %1156 = uitofp i32 %1155 to float
-  %1157 = call float @dx.op.binary.f32(i32 35, float %968, float 0.000000e+00)  ; FMax(a,b)
-  %1158 = call float @dx.op.binary.f32(i32 36, float %1157, float %1156)  ; FMin(a,b)
-  %1159 = fptoui float %1158 to i32
-  %1160 = add i32 %15, -1
-  %1161 = uitofp i32 %1160 to float
-  %1162 = call float @dx.op.binary.f32(i32 35, float %971, float 0.000000e+00)  ; FMax(a,b)
-  %1163 = call float @dx.op.binary.f32(i32 36, float %1162, float %1161)  ; FMin(a,b)
-  %1164 = fptoui float %1163 to i32
-  %1165 = uitofp i32 %1164 to float
-  %1166 = uitofp i32 %1159 to float
-  %1167 = fptoui float %45 to i32
-  %1168 = fptoui float %178 to i32
-  %1169 = fptoui float %1165 to i32
-  %1170 = fptoui float %1166 to i32
-  %1171 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1172 = extractvalue %dx.types.CBufRet.i32 %1171, 0
-  %1173 = extractvalue %dx.types.CBufRet.i32 %1171, 1
-  %1174 = extractvalue %dx.types.CBufRet.i32 %1171, 2
-  %1175 = extractvalue %dx.types.CBufRet.i32 %1171, 3
-  %1176 = mul i32 %1172, %1167
-  %1177 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1168, i32 %1173, i32 %1176)  ; IMad(a,b,c)
-  %1178 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1169, i32 %1174, i32 %1177)  ; IMad(a,b,c)
-  %1179 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1170, i32 %1175, i32 %1178)  ; IMad(a,b,c)
-  %1180 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1179, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1181 = extractvalue %dx.types.ResRet.i32 %1180, 0
-  %1182 = extractvalue %dx.types.ResRet.i32 %1180, 1
-  %1183 = call double @dx.op.makeDouble.f64(i32 101, i32 %1181, i32 %1182)  ; MakeDouble(lo,hi)
-  %1184 = fptrunc double %1183 to float
-  br label %1274
-
-; <label>:1185                                    ; preds = %1152
-  %1186 = icmp eq i32 %972, 2
-  br i1 %1186, label %1187, label %1274
-
-; <label>:1187                                    ; preds = %1185
-  %1188 = fsub fast float %22, %20
-  %1189 = fcmp fast olt float %968, %20
-  br i1 %1189, label %1190, label %1203
-
-; <label>:1190                                    ; preds = %1187
-  %1191 = fsub fast float %20, %968
-  %1192 = fdiv fast float %1191, %1188
-  %1193 = fptoui float %1192 to i32
-  %1194 = uitofp i32 %1193 to float
-  %1195 = fmul fast float %1194, %1188
-  %1196 = fsub fast float %1191, %1195
-  %1197 = and i32 %1193, 1
-  %1198 = icmp eq i32 %1197, 0
-  br i1 %1198, label %1199, label %1201
-
-; <label>:1199                                    ; preds = %1190
-  %1200 = fadd fast float %1196, %20
-  br label %1218
-
-; <label>:1201                                    ; preds = %1190
-  %1202 = fsub fast float %22, %1196
-  br label %1218
-
-; <label>:1203                                    ; preds = %1187
-  %1204 = fcmp fast ogt float %968, %22
-  br i1 %1204, label %1205, label %1218
-
-; <label>:1205                                    ; preds = %1203
-  %1206 = fsub fast float %968, %22
-  %1207 = fdiv fast float %1206, %1188
-  %1208 = fptoui float %1207 to i32
-  %1209 = uitofp i32 %1208 to float
-  %1210 = fmul fast float %1209, %1188
-  %1211 = fsub fast float %1206, %1210
-  %1212 = and i32 %1208, 1
-  %1213 = icmp eq i32 %1212, 0
-  br i1 %1213, label %1214, label %1216
-
-; <label>:1214                                    ; preds = %1205
-  %1215 = fsub fast float %22, %1211
-  br label %1218
-
-; <label>:1216                                    ; preds = %1205
-  %1217 = fadd fast float %1211, %20
-  br label %1218
-
-; <label>:1218                                    ; preds = %1216, %1214, %1203, %1201, %1199
-  %1219 = phi float [ %1200, %1199 ], [ %1202, %1201 ], [ %1215, %1214 ], [ %1217, %1216 ], [ %968, %1203 ]
-  %1220 = fptoui float %1219 to i32
-  %1221 = fsub fast float %24, %20
-  %1222 = fcmp fast olt float %971, %20
-  br i1 %1222, label %1223, label %1236
-
-; <label>:1223                                    ; preds = %1218
-  %1224 = fsub fast float %20, %971
-  %1225 = fdiv fast float %1224, %1221
-  %1226 = fptoui float %1225 to i32
-  %1227 = uitofp i32 %1226 to float
-  %1228 = fmul fast float %1227, %1221
-  %1229 = fsub fast float %1224, %1228
-  %1230 = and i32 %1226, 1
-  %1231 = icmp eq i32 %1230, 0
-  br i1 %1231, label %1232, label %1234
-
-; <label>:1232                                    ; preds = %1223
-  %1233 = fadd fast float %1229, %20
-  br label %1251
-
-; <label>:1234                                    ; preds = %1223
-  %1235 = fsub fast float %24, %1229
-  br label %1251
-
-; <label>:1236                                    ; preds = %1218
-  %1237 = fcmp fast ogt float %971, %24
-  br i1 %1237, label %1238, label %1251
-
-; <label>:1238                                    ; preds = %1236
-  %1239 = fsub fast float %971, %24
-  %1240 = fdiv fast float %1239, %1221
-  %1241 = fptoui float %1240 to i32
-  %1242 = uitofp i32 %1241 to float
-  %1243 = fmul fast float %1242, %1221
-  %1244 = fsub fast float %1239, %1243
-  %1245 = and i32 %1241, 1
-  %1246 = icmp eq i32 %1245, 0
-  br i1 %1246, label %1247, label %1249
-
-; <label>:1247                                    ; preds = %1238
-  %1248 = fsub fast float %24, %1244
-  br label %1251
-
-; <label>:1249                                    ; preds = %1238
-  %1250 = fadd fast float %1244, %20
-  br label %1251
-
-; <label>:1251                                    ; preds = %1249, %1247, %1236, %1234, %1232
-  %1252 = phi float [ %1233, %1232 ], [ %1235, %1234 ], [ %1248, %1247 ], [ %1250, %1249 ], [ %971, %1236 ]
-  %1253 = fptoui float %1252 to i32
-  %1254 = uitofp i32 %1253 to float
-  %1255 = uitofp i32 %1220 to float
-  %1256 = fptoui float %45 to i32
-  %1257 = fptoui float %178 to i32
-  %1258 = fptoui float %1254 to i32
-  %1259 = fptoui float %1255 to i32
-  %1260 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1261 = extractvalue %dx.types.CBufRet.i32 %1260, 0
-  %1262 = extractvalue %dx.types.CBufRet.i32 %1260, 1
-  %1263 = extractvalue %dx.types.CBufRet.i32 %1260, 2
-  %1264 = extractvalue %dx.types.CBufRet.i32 %1260, 3
-  %1265 = mul i32 %1261, %1256
-  %1266 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1257, i32 %1262, i32 %1265)  ; IMad(a,b,c)
-  %1267 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1258, i32 %1263, i32 %1266)  ; IMad(a,b,c)
-  %1268 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1259, i32 %1264, i32 %1267)  ; IMad(a,b,c)
-  %1269 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1268, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1270 = extractvalue %dx.types.ResRet.i32 %1269, 0
-  %1271 = extractvalue %dx.types.ResRet.i32 %1269, 1
-  %1272 = call double @dx.op.makeDouble.f64(i32 101, i32 %1270, i32 %1271)  ; MakeDouble(lo,hi)
-  %1273 = fptrunc double %1272 to float
-  br label %1274
-
-; <label>:1274                                    ; preds = %1251, %1185, %1154, %1135, %1125
-  %1275 = phi float [ %1151, %1135 ], [ 0.000000e+00, %1125 ], [ %1184, %1154 ], [ %1273, %1251 ], [ 0.000000e+00, %1185 ]
-  %1276 = fadd fast float %968, 1.000000e+00
-  br i1 %973, label %1277, label %1304
-
-; <label>:1277                                    ; preds = %1274
-  %1278 = fcmp fast oge float %1276, 0.000000e+00
-  %1279 = fptoui float %1276 to i32
-  %1280 = icmp ult i32 %1279, %13
-  %1281 = and i1 %1278, %1280
-  %1282 = fcmp fast oge float %971, 0.000000e+00
-  %1283 = and i1 %1282, %1281
-  %1284 = fptoui float %971 to i32
-  %1285 = icmp ult i32 %1284, %15
-  %1286 = and i1 %1285, %1283
-  br i1 %1286, label %1287, label %1426
-
-; <label>:1287                                    ; preds = %1277
-  %1288 = fptoui float %45 to i32
-  %1289 = fptoui float %178 to i32
-  %1290 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1291 = extractvalue %dx.types.CBufRet.i32 %1290, 0
-  %1292 = extractvalue %dx.types.CBufRet.i32 %1290, 1
-  %1293 = extractvalue %dx.types.CBufRet.i32 %1290, 2
-  %1294 = extractvalue %dx.types.CBufRet.i32 %1290, 3
-  %1295 = mul i32 %1291, %1288
-  %1296 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1289, i32 %1292, i32 %1295)  ; IMad(a,b,c)
-  %1297 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1284, i32 %1293, i32 %1296)  ; IMad(a,b,c)
-  %1298 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1279, i32 %1294, i32 %1297)  ; IMad(a,b,c)
-  %1299 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1298, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1300 = extractvalue %dx.types.ResRet.i32 %1299, 0
-  %1301 = extractvalue %dx.types.ResRet.i32 %1299, 1
-  %1302 = call double @dx.op.makeDouble.f64(i32 101, i32 %1300, i32 %1301)  ; MakeDouble(lo,hi)
-  %1303 = fptrunc double %1302 to float
-  br label %1426
-
-; <label>:1304                                    ; preds = %1274
-  %1305 = icmp eq i32 %972, 1
-  br i1 %1305, label %1306, label %1337
-
-; <label>:1306                                    ; preds = %1304
-  %1307 = add i32 %13, -1
-  %1308 = uitofp i32 %1307 to float
-  %1309 = call float @dx.op.binary.f32(i32 35, float %1276, float 0.000000e+00)  ; FMax(a,b)
-  %1310 = call float @dx.op.binary.f32(i32 36, float %1309, float %1308)  ; FMin(a,b)
-  %1311 = fptoui float %1310 to i32
-  %1312 = add i32 %15, -1
-  %1313 = uitofp i32 %1312 to float
-  %1314 = call float @dx.op.binary.f32(i32 35, float %971, float 0.000000e+00)  ; FMax(a,b)
-  %1315 = call float @dx.op.binary.f32(i32 36, float %1314, float %1313)  ; FMin(a,b)
-  %1316 = fptoui float %1315 to i32
-  %1317 = uitofp i32 %1316 to float
-  %1318 = uitofp i32 %1311 to float
-  %1319 = fptoui float %45 to i32
-  %1320 = fptoui float %178 to i32
-  %1321 = fptoui float %1317 to i32
-  %1322 = fptoui float %1318 to i32
-  %1323 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1324 = extractvalue %dx.types.CBufRet.i32 %1323, 0
-  %1325 = extractvalue %dx.types.CBufRet.i32 %1323, 1
-  %1326 = extractvalue %dx.types.CBufRet.i32 %1323, 2
-  %1327 = extractvalue %dx.types.CBufRet.i32 %1323, 3
-  %1328 = mul i32 %1324, %1319
-  %1329 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1320, i32 %1325, i32 %1328)  ; IMad(a,b,c)
-  %1330 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1321, i32 %1326, i32 %1329)  ; IMad(a,b,c)
-  %1331 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1322, i32 %1327, i32 %1330)  ; IMad(a,b,c)
-  %1332 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1331, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1333 = extractvalue %dx.types.ResRet.i32 %1332, 0
-  %1334 = extractvalue %dx.types.ResRet.i32 %1332, 1
-  %1335 = call double @dx.op.makeDouble.f64(i32 101, i32 %1333, i32 %1334)  ; MakeDouble(lo,hi)
-  %1336 = fptrunc double %1335 to float
-  br label %1426
-
-; <label>:1337                                    ; preds = %1304
-  %1338 = icmp eq i32 %972, 2
-  br i1 %1338, label %1339, label %1426
-
-; <label>:1339                                    ; preds = %1337
-  %1340 = fsub fast float %22, %20
-  %1341 = fcmp fast olt float %1276, %20
-  br i1 %1341, label %1342, label %1355
-
-; <label>:1342                                    ; preds = %1339
-  %1343 = fsub fast float %20, %1276
-  %1344 = fdiv fast float %1343, %1340
-  %1345 = fptoui float %1344 to i32
-  %1346 = uitofp i32 %1345 to float
-  %1347 = fmul fast float %1346, %1340
-  %1348 = fsub fast float %1343, %1347
-  %1349 = and i32 %1345, 1
-  %1350 = icmp eq i32 %1349, 0
-  br i1 %1350, label %1351, label %1353
-
-; <label>:1351                                    ; preds = %1342
-  %1352 = fadd fast float %1348, %20
-  br label %1370
-
-; <label>:1353                                    ; preds = %1342
-  %1354 = fsub fast float %22, %1348
-  br label %1370
-
-; <label>:1355                                    ; preds = %1339
-  %1356 = fcmp fast ogt float %1276, %22
-  br i1 %1356, label %1357, label %1370
-
-; <label>:1357                                    ; preds = %1355
-  %1358 = fsub fast float %1276, %22
-  %1359 = fdiv fast float %1358, %1340
-  %1360 = fptoui float %1359 to i32
-  %1361 = uitofp i32 %1360 to float
-  %1362 = fmul fast float %1361, %1340
-  %1363 = fsub fast float %1358, %1362
-  %1364 = and i32 %1360, 1
-  %1365 = icmp eq i32 %1364, 0
-  br i1 %1365, label %1366, label %1368
-
-; <label>:1366                                    ; preds = %1357
-  %1367 = fsub fast float %22, %1363
-  br label %1370
-
-; <label>:1368                                    ; preds = %1357
-  %1369 = fadd fast float %1363, %20
-  br label %1370
-
-; <label>:1370                                    ; preds = %1368, %1366, %1355, %1353, %1351
-  %1371 = phi float [ %1352, %1351 ], [ %1354, %1353 ], [ %1367, %1366 ], [ %1369, %1368 ], [ %1276, %1355 ]
-  %1372 = fptoui float %1371 to i32
-  %1373 = fsub fast float %24, %20
-  %1374 = fcmp fast olt float %971, %20
-  br i1 %1374, label %1375, label %1388
-
-; <label>:1375                                    ; preds = %1370
-  %1376 = fsub fast float %20, %971
-  %1377 = fdiv fast float %1376, %1373
-  %1378 = fptoui float %1377 to i32
-  %1379 = uitofp i32 %1378 to float
-  %1380 = fmul fast float %1379, %1373
-  %1381 = fsub fast float %1376, %1380
-  %1382 = and i32 %1378, 1
-  %1383 = icmp eq i32 %1382, 0
-  br i1 %1383, label %1384, label %1386
-
-; <label>:1384                                    ; preds = %1375
-  %1385 = fadd fast float %1381, %20
-  br label %1403
-
-; <label>:1386                                    ; preds = %1375
-  %1387 = fsub fast float %24, %1381
-  br label %1403
-
-; <label>:1388                                    ; preds = %1370
-  %1389 = fcmp fast ogt float %971, %24
-  br i1 %1389, label %1390, label %1403
-
-; <label>:1390                                    ; preds = %1388
-  %1391 = fsub fast float %971, %24
-  %1392 = fdiv fast float %1391, %1373
-  %1393 = fptoui float %1392 to i32
-  %1394 = uitofp i32 %1393 to float
-  %1395 = fmul fast float %1394, %1373
-  %1396 = fsub fast float %1391, %1395
-  %1397 = and i32 %1393, 1
-  %1398 = icmp eq i32 %1397, 0
-  br i1 %1398, label %1399, label %1401
-
-; <label>:1399                                    ; preds = %1390
-  %1400 = fsub fast float %24, %1396
-  br label %1403
-
-; <label>:1401                                    ; preds = %1390
-  %1402 = fadd fast float %1396, %20
-  br label %1403
-
-; <label>:1403                                    ; preds = %1401, %1399, %1388, %1386, %1384
-  %1404 = phi float [ %1385, %1384 ], [ %1387, %1386 ], [ %1400, %1399 ], [ %1402, %1401 ], [ %971, %1388 ]
-  %1405 = fptoui float %1404 to i32
-  %1406 = uitofp i32 %1405 to float
-  %1407 = uitofp i32 %1372 to float
-  %1408 = fptoui float %45 to i32
-  %1409 = fptoui float %178 to i32
-  %1410 = fptoui float %1406 to i32
-  %1411 = fptoui float %1407 to i32
-  %1412 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1413 = extractvalue %dx.types.CBufRet.i32 %1412, 0
-  %1414 = extractvalue %dx.types.CBufRet.i32 %1412, 1
-  %1415 = extractvalue %dx.types.CBufRet.i32 %1412, 2
-  %1416 = extractvalue %dx.types.CBufRet.i32 %1412, 3
-  %1417 = mul i32 %1413, %1408
-  %1418 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1409, i32 %1414, i32 %1417)  ; IMad(a,b,c)
-  %1419 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1410, i32 %1415, i32 %1418)  ; IMad(a,b,c)
-  %1420 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1411, i32 %1416, i32 %1419)  ; IMad(a,b,c)
-  %1421 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1420, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1422 = extractvalue %dx.types.ResRet.i32 %1421, 0
-  %1423 = extractvalue %dx.types.ResRet.i32 %1421, 1
-  %1424 = call double @dx.op.makeDouble.f64(i32 101, i32 %1422, i32 %1423)  ; MakeDouble(lo,hi)
-  %1425 = fptrunc double %1424 to float
-  br label %1426
-
-; <label>:1426                                    ; preds = %1403, %1337, %1306, %1287, %1277
-  %1427 = phi float [ %1303, %1287 ], [ 0.000000e+00, %1277 ], [ %1336, %1306 ], [ %1425, %1403 ], [ 0.000000e+00, %1337 ]
-  %1428 = fadd fast float %968, 2.000000e+00
-  br i1 %973, label %1429, label %1456
-
-; <label>:1429                                    ; preds = %1426
-  %1430 = fcmp fast oge float %1428, 0.000000e+00
-  %1431 = fptoui float %1428 to i32
-  %1432 = icmp ult i32 %1431, %13
-  %1433 = and i1 %1430, %1432
-  %1434 = fcmp fast oge float %971, 0.000000e+00
-  %1435 = and i1 %1434, %1433
-  %1436 = fptoui float %971 to i32
-  %1437 = icmp ult i32 %1436, %15
-  %1438 = and i1 %1437, %1435
-  br i1 %1438, label %1439, label %1578
-
-; <label>:1439                                    ; preds = %1429
-  %1440 = fptoui float %45 to i32
-  %1441 = fptoui float %178 to i32
-  %1442 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1443 = extractvalue %dx.types.CBufRet.i32 %1442, 0
-  %1444 = extractvalue %dx.types.CBufRet.i32 %1442, 1
-  %1445 = extractvalue %dx.types.CBufRet.i32 %1442, 2
-  %1446 = extractvalue %dx.types.CBufRet.i32 %1442, 3
-  %1447 = mul i32 %1443, %1440
-  %1448 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1441, i32 %1444, i32 %1447)  ; IMad(a,b,c)
-  %1449 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1436, i32 %1445, i32 %1448)  ; IMad(a,b,c)
-  %1450 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1431, i32 %1446, i32 %1449)  ; IMad(a,b,c)
-  %1451 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1450, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1452 = extractvalue %dx.types.ResRet.i32 %1451, 0
-  %1453 = extractvalue %dx.types.ResRet.i32 %1451, 1
-  %1454 = call double @dx.op.makeDouble.f64(i32 101, i32 %1452, i32 %1453)  ; MakeDouble(lo,hi)
-  %1455 = fptrunc double %1454 to float
-  br label %1578
-
-; <label>:1456                                    ; preds = %1426
-  %1457 = icmp eq i32 %972, 1
-  br i1 %1457, label %1458, label %1489
-
-; <label>:1458                                    ; preds = %1456
-  %1459 = add i32 %13, -1
-  %1460 = uitofp i32 %1459 to float
-  %1461 = call float @dx.op.binary.f32(i32 35, float %1428, float 0.000000e+00)  ; FMax(a,b)
-  %1462 = call float @dx.op.binary.f32(i32 36, float %1461, float %1460)  ; FMin(a,b)
-  %1463 = fptoui float %1462 to i32
-  %1464 = add i32 %15, -1
-  %1465 = uitofp i32 %1464 to float
-  %1466 = call float @dx.op.binary.f32(i32 35, float %971, float 0.000000e+00)  ; FMax(a,b)
-  %1467 = call float @dx.op.binary.f32(i32 36, float %1466, float %1465)  ; FMin(a,b)
-  %1468 = fptoui float %1467 to i32
-  %1469 = uitofp i32 %1468 to float
-  %1470 = uitofp i32 %1463 to float
-  %1471 = fptoui float %45 to i32
-  %1472 = fptoui float %178 to i32
-  %1473 = fptoui float %1469 to i32
-  %1474 = fptoui float %1470 to i32
-  %1475 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1476 = extractvalue %dx.types.CBufRet.i32 %1475, 0
-  %1477 = extractvalue %dx.types.CBufRet.i32 %1475, 1
-  %1478 = extractvalue %dx.types.CBufRet.i32 %1475, 2
-  %1479 = extractvalue %dx.types.CBufRet.i32 %1475, 3
-  %1480 = mul i32 %1476, %1471
-  %1481 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1472, i32 %1477, i32 %1480)  ; IMad(a,b,c)
-  %1482 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1473, i32 %1478, i32 %1481)  ; IMad(a,b,c)
-  %1483 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1474, i32 %1479, i32 %1482)  ; IMad(a,b,c)
-  %1484 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1483, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1485 = extractvalue %dx.types.ResRet.i32 %1484, 0
-  %1486 = extractvalue %dx.types.ResRet.i32 %1484, 1
-  %1487 = call double @dx.op.makeDouble.f64(i32 101, i32 %1485, i32 %1486)  ; MakeDouble(lo,hi)
-  %1488 = fptrunc double %1487 to float
-  br label %1578
-
-; <label>:1489                                    ; preds = %1456
-  %1490 = icmp eq i32 %972, 2
-  br i1 %1490, label %1491, label %1578
-
-; <label>:1491                                    ; preds = %1489
-  %1492 = fsub fast float %22, %20
-  %1493 = fcmp fast olt float %1428, %20
-  br i1 %1493, label %1494, label %1507
-
-; <label>:1494                                    ; preds = %1491
-  %1495 = fsub fast float %20, %1428
-  %1496 = fdiv fast float %1495, %1492
-  %1497 = fptoui float %1496 to i32
-  %1498 = uitofp i32 %1497 to float
-  %1499 = fmul fast float %1498, %1492
-  %1500 = fsub fast float %1495, %1499
-  %1501 = and i32 %1497, 1
-  %1502 = icmp eq i32 %1501, 0
-  br i1 %1502, label %1503, label %1505
-
-; <label>:1503                                    ; preds = %1494
-  %1504 = fadd fast float %1500, %20
-  br label %1522
-
-; <label>:1505                                    ; preds = %1494
-  %1506 = fsub fast float %22, %1500
-  br label %1522
-
-; <label>:1507                                    ; preds = %1491
-  %1508 = fcmp fast ogt float %1428, %22
-  br i1 %1508, label %1509, label %1522
-
-; <label>:1509                                    ; preds = %1507
-  %1510 = fsub fast float %1428, %22
-  %1511 = fdiv fast float %1510, %1492
-  %1512 = fptoui float %1511 to i32
-  %1513 = uitofp i32 %1512 to float
-  %1514 = fmul fast float %1513, %1492
-  %1515 = fsub fast float %1510, %1514
-  %1516 = and i32 %1512, 1
-  %1517 = icmp eq i32 %1516, 0
-  br i1 %1517, label %1518, label %1520
-
-; <label>:1518                                    ; preds = %1509
-  %1519 = fsub fast float %22, %1515
-  br label %1522
-
-; <label>:1520                                    ; preds = %1509
-  %1521 = fadd fast float %1515, %20
-  br label %1522
-
-; <label>:1522                                    ; preds = %1520, %1518, %1507, %1505, %1503
-  %1523 = phi float [ %1504, %1503 ], [ %1506, %1505 ], [ %1519, %1518 ], [ %1521, %1520 ], [ %1428, %1507 ]
-  %1524 = fptoui float %1523 to i32
-  %1525 = fsub fast float %24, %20
-  %1526 = fcmp fast olt float %971, %20
-  br i1 %1526, label %1527, label %1540
-
-; <label>:1527                                    ; preds = %1522
-  %1528 = fsub fast float %20, %971
-  %1529 = fdiv fast float %1528, %1525
-  %1530 = fptoui float %1529 to i32
-  %1531 = uitofp i32 %1530 to float
-  %1532 = fmul fast float %1531, %1525
-  %1533 = fsub fast float %1528, %1532
-  %1534 = and i32 %1530, 1
-  %1535 = icmp eq i32 %1534, 0
-  br i1 %1535, label %1536, label %1538
-
-; <label>:1536                                    ; preds = %1527
-  %1537 = fadd fast float %1533, %20
-  br label %1555
-
-; <label>:1538                                    ; preds = %1527
-  %1539 = fsub fast float %24, %1533
-  br label %1555
-
-; <label>:1540                                    ; preds = %1522
-  %1541 = fcmp fast ogt float %971, %24
-  br i1 %1541, label %1542, label %1555
-
-; <label>:1542                                    ; preds = %1540
-  %1543 = fsub fast float %971, %24
-  %1544 = fdiv fast float %1543, %1525
-  %1545 = fptoui float %1544 to i32
-  %1546 = uitofp i32 %1545 to float
-  %1547 = fmul fast float %1546, %1525
-  %1548 = fsub fast float %1543, %1547
-  %1549 = and i32 %1545, 1
-  %1550 = icmp eq i32 %1549, 0
-  br i1 %1550, label %1551, label %1553
-
-; <label>:1551                                    ; preds = %1542
-  %1552 = fsub fast float %24, %1548
-  br label %1555
-
-; <label>:1553                                    ; preds = %1542
-  %1554 = fadd fast float %1548, %20
-  br label %1555
-
-; <label>:1555                                    ; preds = %1553, %1551, %1540, %1538, %1536
-  %1556 = phi float [ %1537, %1536 ], [ %1539, %1538 ], [ %1552, %1551 ], [ %1554, %1553 ], [ %971, %1540 ]
-  %1557 = fptoui float %1556 to i32
-  %1558 = uitofp i32 %1557 to float
-  %1559 = uitofp i32 %1524 to float
-  %1560 = fptoui float %45 to i32
-  %1561 = fptoui float %178 to i32
-  %1562 = fptoui float %1558 to i32
-  %1563 = fptoui float %1559 to i32
-  %1564 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1565 = extractvalue %dx.types.CBufRet.i32 %1564, 0
-  %1566 = extractvalue %dx.types.CBufRet.i32 %1564, 1
-  %1567 = extractvalue %dx.types.CBufRet.i32 %1564, 2
-  %1568 = extractvalue %dx.types.CBufRet.i32 %1564, 3
-  %1569 = mul i32 %1565, %1560
-  %1570 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1561, i32 %1566, i32 %1569)  ; IMad(a,b,c)
-  %1571 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1562, i32 %1567, i32 %1570)  ; IMad(a,b,c)
-  %1572 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1563, i32 %1568, i32 %1571)  ; IMad(a,b,c)
-  %1573 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1572, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1574 = extractvalue %dx.types.ResRet.i32 %1573, 0
-  %1575 = extractvalue %dx.types.ResRet.i32 %1573, 1
-  %1576 = call double @dx.op.makeDouble.f64(i32 101, i32 %1574, i32 %1575)  ; MakeDouble(lo,hi)
-  %1577 = fptrunc double %1576 to float
-  br label %1578
-
-; <label>:1578                                    ; preds = %1555, %1489, %1458, %1439, %1429
-  %1579 = phi float [ %1455, %1439 ], [ 0.000000e+00, %1429 ], [ %1488, %1458 ], [ %1577, %1555 ], [ 0.000000e+00, %1489 ]
-  br i1 %973, label %1580, label %1607
-
-; <label>:1580                                    ; preds = %1578
-  %1581 = fcmp fast oge float %969, 0.000000e+00
-  %1582 = fptoui float %969 to i32
-  %1583 = icmp ult i32 %1582, %13
-  %1584 = and i1 %1581, %1583
-  %1585 = fcmp fast oge float %970, 0.000000e+00
-  %1586 = and i1 %1585, %1584
-  %1587 = fptoui float %970 to i32
-  %1588 = icmp ult i32 %1587, %15
-  %1589 = and i1 %1588, %1586
-  br i1 %1589, label %1590, label %1729
-
-; <label>:1590                                    ; preds = %1580
-  %1591 = fptoui float %45 to i32
-  %1592 = fptoui float %178 to i32
-  %1593 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1594 = extractvalue %dx.types.CBufRet.i32 %1593, 0
-  %1595 = extractvalue %dx.types.CBufRet.i32 %1593, 1
-  %1596 = extractvalue %dx.types.CBufRet.i32 %1593, 2
-  %1597 = extractvalue %dx.types.CBufRet.i32 %1593, 3
-  %1598 = mul i32 %1594, %1591
-  %1599 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1592, i32 %1595, i32 %1598)  ; IMad(a,b,c)
-  %1600 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1587, i32 %1596, i32 %1599)  ; IMad(a,b,c)
-  %1601 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1582, i32 %1597, i32 %1600)  ; IMad(a,b,c)
-  %1602 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1601, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1603 = extractvalue %dx.types.ResRet.i32 %1602, 0
-  %1604 = extractvalue %dx.types.ResRet.i32 %1602, 1
-  %1605 = call double @dx.op.makeDouble.f64(i32 101, i32 %1603, i32 %1604)  ; MakeDouble(lo,hi)
-  %1606 = fptrunc double %1605 to float
-  br label %1729
-
-; <label>:1607                                    ; preds = %1578
-  %1608 = icmp eq i32 %972, 1
-  br i1 %1608, label %1609, label %1640
-
-; <label>:1609                                    ; preds = %1607
-  %1610 = add i32 %13, -1
-  %1611 = uitofp i32 %1610 to float
-  %1612 = call float @dx.op.binary.f32(i32 35, float %969, float 0.000000e+00)  ; FMax(a,b)
-  %1613 = call float @dx.op.binary.f32(i32 36, float %1612, float %1611)  ; FMin(a,b)
-  %1614 = fptoui float %1613 to i32
-  %1615 = add i32 %15, -1
-  %1616 = uitofp i32 %1615 to float
-  %1617 = call float @dx.op.binary.f32(i32 35, float %970, float 0.000000e+00)  ; FMax(a,b)
-  %1618 = call float @dx.op.binary.f32(i32 36, float %1617, float %1616)  ; FMin(a,b)
-  %1619 = fptoui float %1618 to i32
-  %1620 = uitofp i32 %1619 to float
-  %1621 = uitofp i32 %1614 to float
-  %1622 = fptoui float %45 to i32
-  %1623 = fptoui float %178 to i32
-  %1624 = fptoui float %1620 to i32
-  %1625 = fptoui float %1621 to i32
-  %1626 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1627 = extractvalue %dx.types.CBufRet.i32 %1626, 0
-  %1628 = extractvalue %dx.types.CBufRet.i32 %1626, 1
-  %1629 = extractvalue %dx.types.CBufRet.i32 %1626, 2
-  %1630 = extractvalue %dx.types.CBufRet.i32 %1626, 3
-  %1631 = mul i32 %1627, %1622
-  %1632 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1623, i32 %1628, i32 %1631)  ; IMad(a,b,c)
-  %1633 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1624, i32 %1629, i32 %1632)  ; IMad(a,b,c)
-  %1634 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1625, i32 %1630, i32 %1633)  ; IMad(a,b,c)
-  %1635 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1634, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1636 = extractvalue %dx.types.ResRet.i32 %1635, 0
-  %1637 = extractvalue %dx.types.ResRet.i32 %1635, 1
-  %1638 = call double @dx.op.makeDouble.f64(i32 101, i32 %1636, i32 %1637)  ; MakeDouble(lo,hi)
-  %1639 = fptrunc double %1638 to float
-  br label %1729
-
-; <label>:1640                                    ; preds = %1607
-  %1641 = icmp eq i32 %972, 2
-  br i1 %1641, label %1642, label %1729
-
-; <label>:1642                                    ; preds = %1640
-  %1643 = fsub fast float %22, %20
-  %1644 = fcmp fast olt float %969, %20
-  br i1 %1644, label %1645, label %1658
-
-; <label>:1645                                    ; preds = %1642
-  %1646 = fsub fast float %20, %969
-  %1647 = fdiv fast float %1646, %1643
-  %1648 = fptoui float %1647 to i32
-  %1649 = uitofp i32 %1648 to float
-  %1650 = fmul fast float %1649, %1643
-  %1651 = fsub fast float %1646, %1650
-  %1652 = and i32 %1648, 1
-  %1653 = icmp eq i32 %1652, 0
-  br i1 %1653, label %1654, label %1656
-
-; <label>:1654                                    ; preds = %1645
-  %1655 = fadd fast float %1651, %20
-  br label %1673
-
-; <label>:1656                                    ; preds = %1645
-  %1657 = fsub fast float %22, %1651
-  br label %1673
-
-; <label>:1658                                    ; preds = %1642
-  %1659 = fcmp fast ogt float %969, %22
-  br i1 %1659, label %1660, label %1673
-
-; <label>:1660                                    ; preds = %1658
-  %1661 = fsub fast float %969, %22
-  %1662 = fdiv fast float %1661, %1643
-  %1663 = fptoui float %1662 to i32
-  %1664 = uitofp i32 %1663 to float
-  %1665 = fmul fast float %1664, %1643
-  %1666 = fsub fast float %1661, %1665
-  %1667 = and i32 %1663, 1
-  %1668 = icmp eq i32 %1667, 0
-  br i1 %1668, label %1669, label %1671
-
-; <label>:1669                                    ; preds = %1660
-  %1670 = fsub fast float %22, %1666
-  br label %1673
-
-; <label>:1671                                    ; preds = %1660
-  %1672 = fadd fast float %1666, %20
-  br label %1673
-
-; <label>:1673                                    ; preds = %1671, %1669, %1658, %1656, %1654
-  %1674 = phi float [ %1655, %1654 ], [ %1657, %1656 ], [ %1670, %1669 ], [ %1672, %1671 ], [ %969, %1658 ]
-  %1675 = fptoui float %1674 to i32
-  %1676 = fsub fast float %24, %20
-  %1677 = fcmp fast olt float %970, %20
-  br i1 %1677, label %1678, label %1691
-
-; <label>:1678                                    ; preds = %1673
-  %1679 = fsub fast float %20, %970
-  %1680 = fdiv fast float %1679, %1676
-  %1681 = fptoui float %1680 to i32
-  %1682 = uitofp i32 %1681 to float
-  %1683 = fmul fast float %1682, %1676
-  %1684 = fsub fast float %1679, %1683
-  %1685 = and i32 %1681, 1
-  %1686 = icmp eq i32 %1685, 0
-  br i1 %1686, label %1687, label %1689
-
-; <label>:1687                                    ; preds = %1678
-  %1688 = fadd fast float %1684, %20
-  br label %1706
-
-; <label>:1689                                    ; preds = %1678
-  %1690 = fsub fast float %24, %1684
-  br label %1706
-
-; <label>:1691                                    ; preds = %1673
-  %1692 = fcmp fast ogt float %970, %24
-  br i1 %1692, label %1693, label %1706
-
-; <label>:1693                                    ; preds = %1691
-  %1694 = fsub fast float %970, %24
-  %1695 = fdiv fast float %1694, %1676
-  %1696 = fptoui float %1695 to i32
-  %1697 = uitofp i32 %1696 to float
-  %1698 = fmul fast float %1697, %1676
-  %1699 = fsub fast float %1694, %1698
-  %1700 = and i32 %1696, 1
-  %1701 = icmp eq i32 %1700, 0
-  br i1 %1701, label %1702, label %1704
-
-; <label>:1702                                    ; preds = %1693
-  %1703 = fsub fast float %24, %1699
-  br label %1706
-
-; <label>:1704                                    ; preds = %1693
-  %1705 = fadd fast float %1699, %20
-  br label %1706
-
-; <label>:1706                                    ; preds = %1704, %1702, %1691, %1689, %1687
-  %1707 = phi float [ %1688, %1687 ], [ %1690, %1689 ], [ %1703, %1702 ], [ %1705, %1704 ], [ %970, %1691 ]
-  %1708 = fptoui float %1707 to i32
-  %1709 = uitofp i32 %1708 to float
-  %1710 = uitofp i32 %1675 to float
-  %1711 = fptoui float %45 to i32
-  %1712 = fptoui float %178 to i32
-  %1713 = fptoui float %1709 to i32
-  %1714 = fptoui float %1710 to i32
-  %1715 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1716 = extractvalue %dx.types.CBufRet.i32 %1715, 0
-  %1717 = extractvalue %dx.types.CBufRet.i32 %1715, 1
-  %1718 = extractvalue %dx.types.CBufRet.i32 %1715, 2
-  %1719 = extractvalue %dx.types.CBufRet.i32 %1715, 3
-  %1720 = mul i32 %1716, %1711
-  %1721 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1712, i32 %1717, i32 %1720)  ; IMad(a,b,c)
-  %1722 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1713, i32 %1718, i32 %1721)  ; IMad(a,b,c)
-  %1723 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1714, i32 %1719, i32 %1722)  ; IMad(a,b,c)
-  %1724 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1723, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1725 = extractvalue %dx.types.ResRet.i32 %1724, 0
-  %1726 = extractvalue %dx.types.ResRet.i32 %1724, 1
-  %1727 = call double @dx.op.makeDouble.f64(i32 101, i32 %1725, i32 %1726)  ; MakeDouble(lo,hi)
-  %1728 = fptrunc double %1727 to float
-  br label %1729
-
-; <label>:1729                                    ; preds = %1706, %1640, %1609, %1590, %1580
-  %1730 = phi float [ %1606, %1590 ], [ 0.000000e+00, %1580 ], [ %1639, %1609 ], [ %1728, %1706 ], [ 0.000000e+00, %1640 ]
-  br i1 %973, label %1731, label %1758
-
-; <label>:1731                                    ; preds = %1729
-  %1732 = fcmp fast oge float %968, 0.000000e+00
-  %1733 = fptoui float %968 to i32
-  %1734 = icmp ult i32 %1733, %13
-  %1735 = and i1 %1732, %1734
-  %1736 = fcmp fast oge float %970, 0.000000e+00
-  %1737 = and i1 %1736, %1735
-  %1738 = fptoui float %970 to i32
-  %1739 = icmp ult i32 %1738, %15
-  %1740 = and i1 %1739, %1737
-  br i1 %1740, label %1741, label %1880
-
-; <label>:1741                                    ; preds = %1731
-  %1742 = fptoui float %45 to i32
-  %1743 = fptoui float %178 to i32
-  %1744 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1745 = extractvalue %dx.types.CBufRet.i32 %1744, 0
-  %1746 = extractvalue %dx.types.CBufRet.i32 %1744, 1
-  %1747 = extractvalue %dx.types.CBufRet.i32 %1744, 2
-  %1748 = extractvalue %dx.types.CBufRet.i32 %1744, 3
-  %1749 = mul i32 %1745, %1742
-  %1750 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1743, i32 %1746, i32 %1749)  ; IMad(a,b,c)
-  %1751 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1738, i32 %1747, i32 %1750)  ; IMad(a,b,c)
-  %1752 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1733, i32 %1748, i32 %1751)  ; IMad(a,b,c)
-  %1753 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1752, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1754 = extractvalue %dx.types.ResRet.i32 %1753, 0
-  %1755 = extractvalue %dx.types.ResRet.i32 %1753, 1
-  %1756 = call double @dx.op.makeDouble.f64(i32 101, i32 %1754, i32 %1755)  ; MakeDouble(lo,hi)
-  %1757 = fptrunc double %1756 to float
-  br label %1880
-
-; <label>:1758                                    ; preds = %1729
-  %1759 = icmp eq i32 %972, 1
-  br i1 %1759, label %1760, label %1791
-
-; <label>:1760                                    ; preds = %1758
-  %1761 = add i32 %13, -1
-  %1762 = uitofp i32 %1761 to float
-  %1763 = call float @dx.op.binary.f32(i32 35, float %968, float 0.000000e+00)  ; FMax(a,b)
-  %1764 = call float @dx.op.binary.f32(i32 36, float %1763, float %1762)  ; FMin(a,b)
-  %1765 = fptoui float %1764 to i32
-  %1766 = add i32 %15, -1
-  %1767 = uitofp i32 %1766 to float
-  %1768 = call float @dx.op.binary.f32(i32 35, float %970, float 0.000000e+00)  ; FMax(a,b)
-  %1769 = call float @dx.op.binary.f32(i32 36, float %1768, float %1767)  ; FMin(a,b)
-  %1770 = fptoui float %1769 to i32
-  %1771 = uitofp i32 %1770 to float
-  %1772 = uitofp i32 %1765 to float
-  %1773 = fptoui float %45 to i32
-  %1774 = fptoui float %178 to i32
-  %1775 = fptoui float %1771 to i32
-  %1776 = fptoui float %1772 to i32
-  %1777 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1778 = extractvalue %dx.types.CBufRet.i32 %1777, 0
-  %1779 = extractvalue %dx.types.CBufRet.i32 %1777, 1
-  %1780 = extractvalue %dx.types.CBufRet.i32 %1777, 2
-  %1781 = extractvalue %dx.types.CBufRet.i32 %1777, 3
-  %1782 = mul i32 %1778, %1773
-  %1783 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1774, i32 %1779, i32 %1782)  ; IMad(a,b,c)
-  %1784 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1775, i32 %1780, i32 %1783)  ; IMad(a,b,c)
-  %1785 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1776, i32 %1781, i32 %1784)  ; IMad(a,b,c)
-  %1786 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1785, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1787 = extractvalue %dx.types.ResRet.i32 %1786, 0
-  %1788 = extractvalue %dx.types.ResRet.i32 %1786, 1
-  %1789 = call double @dx.op.makeDouble.f64(i32 101, i32 %1787, i32 %1788)  ; MakeDouble(lo,hi)
-  %1790 = fptrunc double %1789 to float
-  br label %1880
-
-; <label>:1791                                    ; preds = %1758
-  %1792 = icmp eq i32 %972, 2
-  br i1 %1792, label %1793, label %1880
-
-; <label>:1793                                    ; preds = %1791
-  %1794 = fsub fast float %22, %20
-  %1795 = fcmp fast olt float %968, %20
-  br i1 %1795, label %1796, label %1809
-
-; <label>:1796                                    ; preds = %1793
-  %1797 = fsub fast float %20, %968
-  %1798 = fdiv fast float %1797, %1794
-  %1799 = fptoui float %1798 to i32
-  %1800 = uitofp i32 %1799 to float
-  %1801 = fmul fast float %1800, %1794
-  %1802 = fsub fast float %1797, %1801
-  %1803 = and i32 %1799, 1
-  %1804 = icmp eq i32 %1803, 0
-  br i1 %1804, label %1805, label %1807
-
-; <label>:1805                                    ; preds = %1796
-  %1806 = fadd fast float %1802, %20
-  br label %1824
-
-; <label>:1807                                    ; preds = %1796
-  %1808 = fsub fast float %22, %1802
-  br label %1824
-
-; <label>:1809                                    ; preds = %1793
-  %1810 = fcmp fast ogt float %968, %22
-  br i1 %1810, label %1811, label %1824
-
-; <label>:1811                                    ; preds = %1809
-  %1812 = fsub fast float %968, %22
-  %1813 = fdiv fast float %1812, %1794
-  %1814 = fptoui float %1813 to i32
-  %1815 = uitofp i32 %1814 to float
-  %1816 = fmul fast float %1815, %1794
-  %1817 = fsub fast float %1812, %1816
-  %1818 = and i32 %1814, 1
-  %1819 = icmp eq i32 %1818, 0
-  br i1 %1819, label %1820, label %1822
-
-; <label>:1820                                    ; preds = %1811
-  %1821 = fsub fast float %22, %1817
-  br label %1824
-
-; <label>:1822                                    ; preds = %1811
-  %1823 = fadd fast float %1817, %20
-  br label %1824
-
-; <label>:1824                                    ; preds = %1822, %1820, %1809, %1807, %1805
-  %1825 = phi float [ %1806, %1805 ], [ %1808, %1807 ], [ %1821, %1820 ], [ %1823, %1822 ], [ %968, %1809 ]
-  %1826 = fptoui float %1825 to i32
-  %1827 = fsub fast float %24, %20
-  %1828 = fcmp fast olt float %970, %20
-  br i1 %1828, label %1829, label %1842
-
-; <label>:1829                                    ; preds = %1824
-  %1830 = fsub fast float %20, %970
-  %1831 = fdiv fast float %1830, %1827
-  %1832 = fptoui float %1831 to i32
-  %1833 = uitofp i32 %1832 to float
-  %1834 = fmul fast float %1833, %1827
-  %1835 = fsub fast float %1830, %1834
-  %1836 = and i32 %1832, 1
-  %1837 = icmp eq i32 %1836, 0
-  br i1 %1837, label %1838, label %1840
-
-; <label>:1838                                    ; preds = %1829
-  %1839 = fadd fast float %1835, %20
-  br label %1857
-
-; <label>:1840                                    ; preds = %1829
-  %1841 = fsub fast float %24, %1835
-  br label %1857
-
-; <label>:1842                                    ; preds = %1824
-  %1843 = fcmp fast ogt float %970, %24
-  br i1 %1843, label %1844, label %1857
-
-; <label>:1844                                    ; preds = %1842
-  %1845 = fsub fast float %970, %24
-  %1846 = fdiv fast float %1845, %1827
-  %1847 = fptoui float %1846 to i32
-  %1848 = uitofp i32 %1847 to float
-  %1849 = fmul fast float %1848, %1827
-  %1850 = fsub fast float %1845, %1849
-  %1851 = and i32 %1847, 1
-  %1852 = icmp eq i32 %1851, 0
-  br i1 %1852, label %1853, label %1855
-
-; <label>:1853                                    ; preds = %1844
-  %1854 = fsub fast float %24, %1850
-  br label %1857
-
-; <label>:1855                                    ; preds = %1844
-  %1856 = fadd fast float %1850, %20
-  br label %1857
-
-; <label>:1857                                    ; preds = %1855, %1853, %1842, %1840, %1838
-  %1858 = phi float [ %1839, %1838 ], [ %1841, %1840 ], [ %1854, %1853 ], [ %1856, %1855 ], [ %970, %1842 ]
-  %1859 = fptoui float %1858 to i32
-  %1860 = uitofp i32 %1859 to float
-  %1861 = uitofp i32 %1826 to float
-  %1862 = fptoui float %45 to i32
-  %1863 = fptoui float %178 to i32
-  %1864 = fptoui float %1860 to i32
-  %1865 = fptoui float %1861 to i32
-  %1866 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1867 = extractvalue %dx.types.CBufRet.i32 %1866, 0
-  %1868 = extractvalue %dx.types.CBufRet.i32 %1866, 1
-  %1869 = extractvalue %dx.types.CBufRet.i32 %1866, 2
-  %1870 = extractvalue %dx.types.CBufRet.i32 %1866, 3
-  %1871 = mul i32 %1867, %1862
-  %1872 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1863, i32 %1868, i32 %1871)  ; IMad(a,b,c)
-  %1873 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1864, i32 %1869, i32 %1872)  ; IMad(a,b,c)
-  %1874 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1865, i32 %1870, i32 %1873)  ; IMad(a,b,c)
-  %1875 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1874, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1876 = extractvalue %dx.types.ResRet.i32 %1875, 0
-  %1877 = extractvalue %dx.types.ResRet.i32 %1875, 1
-  %1878 = call double @dx.op.makeDouble.f64(i32 101, i32 %1876, i32 %1877)  ; MakeDouble(lo,hi)
-  %1879 = fptrunc double %1878 to float
-  br label %1880
-
-; <label>:1880                                    ; preds = %1857, %1791, %1760, %1741, %1731
-  %1881 = phi float [ %1757, %1741 ], [ 0.000000e+00, %1731 ], [ %1790, %1760 ], [ %1879, %1857 ], [ 0.000000e+00, %1791 ]
-  br i1 %973, label %1882, label %1909
-
-; <label>:1882                                    ; preds = %1880
-  %1883 = fcmp fast oge float %1276, 0.000000e+00
-  %1884 = fptoui float %1276 to i32
-  %1885 = icmp ult i32 %1884, %13
-  %1886 = and i1 %1883, %1885
-  %1887 = fcmp fast oge float %970, 0.000000e+00
-  %1888 = and i1 %1887, %1886
-  %1889 = fptoui float %970 to i32
-  %1890 = icmp ult i32 %1889, %15
-  %1891 = and i1 %1890, %1888
-  br i1 %1891, label %1892, label %2031
-
-; <label>:1892                                    ; preds = %1882
-  %1893 = fptoui float %45 to i32
-  %1894 = fptoui float %178 to i32
-  %1895 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1896 = extractvalue %dx.types.CBufRet.i32 %1895, 0
-  %1897 = extractvalue %dx.types.CBufRet.i32 %1895, 1
-  %1898 = extractvalue %dx.types.CBufRet.i32 %1895, 2
-  %1899 = extractvalue %dx.types.CBufRet.i32 %1895, 3
-  %1900 = mul i32 %1896, %1893
-  %1901 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1894, i32 %1897, i32 %1900)  ; IMad(a,b,c)
-  %1902 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1889, i32 %1898, i32 %1901)  ; IMad(a,b,c)
-  %1903 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1884, i32 %1899, i32 %1902)  ; IMad(a,b,c)
-  %1904 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1903, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1905 = extractvalue %dx.types.ResRet.i32 %1904, 0
-  %1906 = extractvalue %dx.types.ResRet.i32 %1904, 1
-  %1907 = call double @dx.op.makeDouble.f64(i32 101, i32 %1905, i32 %1906)  ; MakeDouble(lo,hi)
-  %1908 = fptrunc double %1907 to float
-  br label %2031
-
-; <label>:1909                                    ; preds = %1880
-  %1910 = icmp eq i32 %972, 1
-  br i1 %1910, label %1911, label %1942
-
-; <label>:1911                                    ; preds = %1909
-  %1912 = add i32 %13, -1
-  %1913 = uitofp i32 %1912 to float
-  %1914 = call float @dx.op.binary.f32(i32 35, float %1276, float 0.000000e+00)  ; FMax(a,b)
-  %1915 = call float @dx.op.binary.f32(i32 36, float %1914, float %1913)  ; FMin(a,b)
-  %1916 = fptoui float %1915 to i32
-  %1917 = add i32 %15, -1
-  %1918 = uitofp i32 %1917 to float
-  %1919 = call float @dx.op.binary.f32(i32 35, float %970, float 0.000000e+00)  ; FMax(a,b)
-  %1920 = call float @dx.op.binary.f32(i32 36, float %1919, float %1918)  ; FMin(a,b)
-  %1921 = fptoui float %1920 to i32
-  %1922 = uitofp i32 %1921 to float
-  %1923 = uitofp i32 %1916 to float
-  %1924 = fptoui float %45 to i32
-  %1925 = fptoui float %178 to i32
-  %1926 = fptoui float %1922 to i32
-  %1927 = fptoui float %1923 to i32
-  %1928 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1929 = extractvalue %dx.types.CBufRet.i32 %1928, 0
-  %1930 = extractvalue %dx.types.CBufRet.i32 %1928, 1
-  %1931 = extractvalue %dx.types.CBufRet.i32 %1928, 2
-  %1932 = extractvalue %dx.types.CBufRet.i32 %1928, 3
-  %1933 = mul i32 %1929, %1924
-  %1934 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1925, i32 %1930, i32 %1933)  ; IMad(a,b,c)
-  %1935 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1926, i32 %1931, i32 %1934)  ; IMad(a,b,c)
-  %1936 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1927, i32 %1932, i32 %1935)  ; IMad(a,b,c)
-  %1937 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1936, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1938 = extractvalue %dx.types.ResRet.i32 %1937, 0
-  %1939 = extractvalue %dx.types.ResRet.i32 %1937, 1
-  %1940 = call double @dx.op.makeDouble.f64(i32 101, i32 %1938, i32 %1939)  ; MakeDouble(lo,hi)
-  %1941 = fptrunc double %1940 to float
-  br label %2031
-
-; <label>:1942                                    ; preds = %1909
-  %1943 = icmp eq i32 %972, 2
-  br i1 %1943, label %1944, label %2031
-
-; <label>:1944                                    ; preds = %1942
-  %1945 = fsub fast float %22, %20
-  %1946 = fcmp fast olt float %1276, %20
-  br i1 %1946, label %1947, label %1960
-
-; <label>:1947                                    ; preds = %1944
-  %1948 = fsub fast float %20, %1276
-  %1949 = fdiv fast float %1948, %1945
-  %1950 = fptoui float %1949 to i32
-  %1951 = uitofp i32 %1950 to float
-  %1952 = fmul fast float %1951, %1945
-  %1953 = fsub fast float %1948, %1952
-  %1954 = and i32 %1950, 1
-  %1955 = icmp eq i32 %1954, 0
-  br i1 %1955, label %1956, label %1958
-
-; <label>:1956                                    ; preds = %1947
-  %1957 = fadd fast float %1953, %20
-  br label %1975
-
-; <label>:1958                                    ; preds = %1947
-  %1959 = fsub fast float %22, %1953
-  br label %1975
-
-; <label>:1960                                    ; preds = %1944
-  %1961 = fcmp fast ogt float %1276, %22
-  br i1 %1961, label %1962, label %1975
-
-; <label>:1962                                    ; preds = %1960
-  %1963 = fsub fast float %1276, %22
-  %1964 = fdiv fast float %1963, %1945
-  %1965 = fptoui float %1964 to i32
-  %1966 = uitofp i32 %1965 to float
-  %1967 = fmul fast float %1966, %1945
-  %1968 = fsub fast float %1963, %1967
-  %1969 = and i32 %1965, 1
-  %1970 = icmp eq i32 %1969, 0
-  br i1 %1970, label %1971, label %1973
-
-; <label>:1971                                    ; preds = %1962
-  %1972 = fsub fast float %22, %1968
-  br label %1975
-
-; <label>:1973                                    ; preds = %1962
-  %1974 = fadd fast float %1968, %20
-  br label %1975
-
-; <label>:1975                                    ; preds = %1973, %1971, %1960, %1958, %1956
-  %1976 = phi float [ %1957, %1956 ], [ %1959, %1958 ], [ %1972, %1971 ], [ %1974, %1973 ], [ %1276, %1960 ]
-  %1977 = fptoui float %1976 to i32
-  %1978 = fsub fast float %24, %20
-  %1979 = fcmp fast olt float %970, %20
-  br i1 %1979, label %1980, label %1993
-
-; <label>:1980                                    ; preds = %1975
-  %1981 = fsub fast float %20, %970
-  %1982 = fdiv fast float %1981, %1978
-  %1983 = fptoui float %1982 to i32
-  %1984 = uitofp i32 %1983 to float
-  %1985 = fmul fast float %1984, %1978
-  %1986 = fsub fast float %1981, %1985
-  %1987 = and i32 %1983, 1
-  %1988 = icmp eq i32 %1987, 0
-  br i1 %1988, label %1989, label %1991
-
-; <label>:1989                                    ; preds = %1980
-  %1990 = fadd fast float %1986, %20
-  br label %2008
-
-; <label>:1991                                    ; preds = %1980
-  %1992 = fsub fast float %24, %1986
-  br label %2008
-
-; <label>:1993                                    ; preds = %1975
-  %1994 = fcmp fast ogt float %970, %24
-  br i1 %1994, label %1995, label %2008
-
-; <label>:1995                                    ; preds = %1993
-  %1996 = fsub fast float %970, %24
-  %1997 = fdiv fast float %1996, %1978
-  %1998 = fptoui float %1997 to i32
-  %1999 = uitofp i32 %1998 to float
-  %2000 = fmul fast float %1999, %1978
-  %2001 = fsub fast float %1996, %2000
-  %2002 = and i32 %1998, 1
-  %2003 = icmp eq i32 %2002, 0
-  br i1 %2003, label %2004, label %2006
-
-; <label>:2004                                    ; preds = %1995
-  %2005 = fsub fast float %24, %2001
-  br label %2008
-
-; <label>:2006                                    ; preds = %1995
-  %2007 = fadd fast float %2001, %20
-  br label %2008
-
-; <label>:2008                                    ; preds = %2006, %2004, %1993, %1991, %1989
-  %2009 = phi float [ %1990, %1989 ], [ %1992, %1991 ], [ %2005, %2004 ], [ %2007, %2006 ], [ %970, %1993 ]
-  %2010 = fptoui float %2009 to i32
-  %2011 = uitofp i32 %2010 to float
-  %2012 = uitofp i32 %1977 to float
-  %2013 = fptoui float %45 to i32
-  %2014 = fptoui float %178 to i32
-  %2015 = fptoui float %2011 to i32
-  %2016 = fptoui float %2012 to i32
-  %2017 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2018 = extractvalue %dx.types.CBufRet.i32 %2017, 0
-  %2019 = extractvalue %dx.types.CBufRet.i32 %2017, 1
-  %2020 = extractvalue %dx.types.CBufRet.i32 %2017, 2
-  %2021 = extractvalue %dx.types.CBufRet.i32 %2017, 3
-  %2022 = mul i32 %2018, %2013
-  %2023 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2014, i32 %2019, i32 %2022)  ; IMad(a,b,c)
-  %2024 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2015, i32 %2020, i32 %2023)  ; IMad(a,b,c)
-  %2025 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2016, i32 %2021, i32 %2024)  ; IMad(a,b,c)
-  %2026 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2025, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2027 = extractvalue %dx.types.ResRet.i32 %2026, 0
-  %2028 = extractvalue %dx.types.ResRet.i32 %2026, 1
-  %2029 = call double @dx.op.makeDouble.f64(i32 101, i32 %2027, i32 %2028)  ; MakeDouble(lo,hi)
-  %2030 = fptrunc double %2029 to float
-  br label %2031
-
-; <label>:2031                                    ; preds = %2008, %1942, %1911, %1892, %1882
-  %2032 = phi float [ %1908, %1892 ], [ 0.000000e+00, %1882 ], [ %1941, %1911 ], [ %2030, %2008 ], [ 0.000000e+00, %1942 ]
-  br i1 %973, label %2033, label %2060
-
-; <label>:2033                                    ; preds = %2031
-  %2034 = fcmp fast oge float %1428, 0.000000e+00
-  %2035 = fptoui float %1428 to i32
-  %2036 = icmp ult i32 %2035, %13
-  %2037 = and i1 %2034, %2036
-  %2038 = fcmp fast oge float %970, 0.000000e+00
-  %2039 = and i1 %2038, %2037
-  %2040 = fptoui float %970 to i32
-  %2041 = icmp ult i32 %2040, %15
-  %2042 = and i1 %2041, %2039
-  br i1 %2042, label %2043, label %2182
-
-; <label>:2043                                    ; preds = %2033
-  %2044 = fptoui float %45 to i32
-  %2045 = fptoui float %178 to i32
-  %2046 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2047 = extractvalue %dx.types.CBufRet.i32 %2046, 0
-  %2048 = extractvalue %dx.types.CBufRet.i32 %2046, 1
-  %2049 = extractvalue %dx.types.CBufRet.i32 %2046, 2
-  %2050 = extractvalue %dx.types.CBufRet.i32 %2046, 3
-  %2051 = mul i32 %2047, %2044
-  %2052 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2045, i32 %2048, i32 %2051)  ; IMad(a,b,c)
-  %2053 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2040, i32 %2049, i32 %2052)  ; IMad(a,b,c)
-  %2054 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2035, i32 %2050, i32 %2053)  ; IMad(a,b,c)
-  %2055 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2054, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2056 = extractvalue %dx.types.ResRet.i32 %2055, 0
-  %2057 = extractvalue %dx.types.ResRet.i32 %2055, 1
-  %2058 = call double @dx.op.makeDouble.f64(i32 101, i32 %2056, i32 %2057)  ; MakeDouble(lo,hi)
-  %2059 = fptrunc double %2058 to float
-  br label %2182
-
-; <label>:2060                                    ; preds = %2031
-  %2061 = icmp eq i32 %972, 1
-  br i1 %2061, label %2062, label %2093
-
-; <label>:2062                                    ; preds = %2060
-  %2063 = add i32 %13, -1
-  %2064 = uitofp i32 %2063 to float
-  %2065 = call float @dx.op.binary.f32(i32 35, float %1428, float 0.000000e+00)  ; FMax(a,b)
-  %2066 = call float @dx.op.binary.f32(i32 36, float %2065, float %2064)  ; FMin(a,b)
-  %2067 = fptoui float %2066 to i32
-  %2068 = add i32 %15, -1
-  %2069 = uitofp i32 %2068 to float
-  %2070 = call float @dx.op.binary.f32(i32 35, float %970, float 0.000000e+00)  ; FMax(a,b)
-  %2071 = call float @dx.op.binary.f32(i32 36, float %2070, float %2069)  ; FMin(a,b)
-  %2072 = fptoui float %2071 to i32
-  %2073 = uitofp i32 %2072 to float
-  %2074 = uitofp i32 %2067 to float
-  %2075 = fptoui float %45 to i32
-  %2076 = fptoui float %178 to i32
-  %2077 = fptoui float %2073 to i32
-  %2078 = fptoui float %2074 to i32
-  %2079 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2080 = extractvalue %dx.types.CBufRet.i32 %2079, 0
-  %2081 = extractvalue %dx.types.CBufRet.i32 %2079, 1
-  %2082 = extractvalue %dx.types.CBufRet.i32 %2079, 2
-  %2083 = extractvalue %dx.types.CBufRet.i32 %2079, 3
-  %2084 = mul i32 %2080, %2075
-  %2085 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2076, i32 %2081, i32 %2084)  ; IMad(a,b,c)
-  %2086 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2077, i32 %2082, i32 %2085)  ; IMad(a,b,c)
-  %2087 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2078, i32 %2083, i32 %2086)  ; IMad(a,b,c)
-  %2088 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2087, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2089 = extractvalue %dx.types.ResRet.i32 %2088, 0
-  %2090 = extractvalue %dx.types.ResRet.i32 %2088, 1
-  %2091 = call double @dx.op.makeDouble.f64(i32 101, i32 %2089, i32 %2090)  ; MakeDouble(lo,hi)
-  %2092 = fptrunc double %2091 to float
-  br label %2182
-
-; <label>:2093                                    ; preds = %2060
-  %2094 = icmp eq i32 %972, 2
-  br i1 %2094, label %2095, label %2182
-
-; <label>:2095                                    ; preds = %2093
-  %2096 = fsub fast float %22, %20
-  %2097 = fcmp fast olt float %1428, %20
-  br i1 %2097, label %2098, label %2111
-
-; <label>:2098                                    ; preds = %2095
-  %2099 = fsub fast float %20, %1428
-  %2100 = fdiv fast float %2099, %2096
-  %2101 = fptoui float %2100 to i32
-  %2102 = uitofp i32 %2101 to float
-  %2103 = fmul fast float %2102, %2096
-  %2104 = fsub fast float %2099, %2103
-  %2105 = and i32 %2101, 1
-  %2106 = icmp eq i32 %2105, 0
-  br i1 %2106, label %2107, label %2109
-
-; <label>:2107                                    ; preds = %2098
-  %2108 = fadd fast float %2104, %20
-  br label %2126
-
-; <label>:2109                                    ; preds = %2098
-  %2110 = fsub fast float %22, %2104
-  br label %2126
-
-; <label>:2111                                    ; preds = %2095
-  %2112 = fcmp fast ogt float %1428, %22
-  br i1 %2112, label %2113, label %2126
-
-; <label>:2113                                    ; preds = %2111
-  %2114 = fsub fast float %1428, %22
-  %2115 = fdiv fast float %2114, %2096
-  %2116 = fptoui float %2115 to i32
-  %2117 = uitofp i32 %2116 to float
-  %2118 = fmul fast float %2117, %2096
-  %2119 = fsub fast float %2114, %2118
-  %2120 = and i32 %2116, 1
-  %2121 = icmp eq i32 %2120, 0
-  br i1 %2121, label %2122, label %2124
-
-; <label>:2122                                    ; preds = %2113
-  %2123 = fsub fast float %22, %2119
-  br label %2126
-
-; <label>:2124                                    ; preds = %2113
-  %2125 = fadd fast float %2119, %20
-  br label %2126
-
-; <label>:2126                                    ; preds = %2124, %2122, %2111, %2109, %2107
-  %2127 = phi float [ %2108, %2107 ], [ %2110, %2109 ], [ %2123, %2122 ], [ %2125, %2124 ], [ %1428, %2111 ]
-  %2128 = fptoui float %2127 to i32
-  %2129 = fsub fast float %24, %20
-  %2130 = fcmp fast olt float %970, %20
-  br i1 %2130, label %2131, label %2144
-
-; <label>:2131                                    ; preds = %2126
-  %2132 = fsub fast float %20, %970
-  %2133 = fdiv fast float %2132, %2129
-  %2134 = fptoui float %2133 to i32
-  %2135 = uitofp i32 %2134 to float
-  %2136 = fmul fast float %2135, %2129
-  %2137 = fsub fast float %2132, %2136
-  %2138 = and i32 %2134, 1
-  %2139 = icmp eq i32 %2138, 0
-  br i1 %2139, label %2140, label %2142
-
-; <label>:2140                                    ; preds = %2131
-  %2141 = fadd fast float %2137, %20
-  br label %2159
-
-; <label>:2142                                    ; preds = %2131
-  %2143 = fsub fast float %24, %2137
-  br label %2159
-
-; <label>:2144                                    ; preds = %2126
-  %2145 = fcmp fast ogt float %970, %24
-  br i1 %2145, label %2146, label %2159
-
-; <label>:2146                                    ; preds = %2144
-  %2147 = fsub fast float %970, %24
-  %2148 = fdiv fast float %2147, %2129
-  %2149 = fptoui float %2148 to i32
-  %2150 = uitofp i32 %2149 to float
-  %2151 = fmul fast float %2150, %2129
-  %2152 = fsub fast float %2147, %2151
-  %2153 = and i32 %2149, 1
-  %2154 = icmp eq i32 %2153, 0
-  br i1 %2154, label %2155, label %2157
-
-; <label>:2155                                    ; preds = %2146
-  %2156 = fsub fast float %24, %2152
-  br label %2159
-
-; <label>:2157                                    ; preds = %2146
-  %2158 = fadd fast float %2152, %20
-  br label %2159
-
-; <label>:2159                                    ; preds = %2157, %2155, %2144, %2142, %2140
-  %2160 = phi float [ %2141, %2140 ], [ %2143, %2142 ], [ %2156, %2155 ], [ %2158, %2157 ], [ %970, %2144 ]
-  %2161 = fptoui float %2160 to i32
-  %2162 = uitofp i32 %2161 to float
-  %2163 = uitofp i32 %2128 to float
-  %2164 = fptoui float %45 to i32
-  %2165 = fptoui float %178 to i32
-  %2166 = fptoui float %2162 to i32
-  %2167 = fptoui float %2163 to i32
-  %2168 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2169 = extractvalue %dx.types.CBufRet.i32 %2168, 0
-  %2170 = extractvalue %dx.types.CBufRet.i32 %2168, 1
-  %2171 = extractvalue %dx.types.CBufRet.i32 %2168, 2
-  %2172 = extractvalue %dx.types.CBufRet.i32 %2168, 3
-  %2173 = mul i32 %2169, %2164
-  %2174 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2165, i32 %2170, i32 %2173)  ; IMad(a,b,c)
-  %2175 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2166, i32 %2171, i32 %2174)  ; IMad(a,b,c)
-  %2176 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2167, i32 %2172, i32 %2175)  ; IMad(a,b,c)
-  %2177 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2176, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2178 = extractvalue %dx.types.ResRet.i32 %2177, 0
-  %2179 = extractvalue %dx.types.ResRet.i32 %2177, 1
-  %2180 = call double @dx.op.makeDouble.f64(i32 101, i32 %2178, i32 %2179)  ; MakeDouble(lo,hi)
-  %2181 = fptrunc double %2180 to float
-  br label %2182
-
-; <label>:2182                                    ; preds = %2159, %2093, %2062, %2043, %2033
-  %2183 = phi float [ %2059, %2043 ], [ 0.000000e+00, %2033 ], [ %2092, %2062 ], [ %2181, %2159 ], [ 0.000000e+00, %2093 ]
-  %2184 = fadd fast float %970, 1.000000e+00
-  br i1 %973, label %2185, label %2212
-
-; <label>:2185                                    ; preds = %2182
-  %2186 = fcmp fast oge float %969, 0.000000e+00
-  %2187 = fptoui float %969 to i32
-  %2188 = icmp ult i32 %2187, %13
-  %2189 = and i1 %2186, %2188
-  %2190 = fcmp fast oge float %2184, 0.000000e+00
-  %2191 = and i1 %2190, %2189
-  %2192 = fptoui float %2184 to i32
-  %2193 = icmp ult i32 %2192, %15
-  %2194 = and i1 %2193, %2191
-  br i1 %2194, label %2195, label %2334
-
-; <label>:2195                                    ; preds = %2185
-  %2196 = fptoui float %45 to i32
-  %2197 = fptoui float %178 to i32
-  %2198 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2199 = extractvalue %dx.types.CBufRet.i32 %2198, 0
-  %2200 = extractvalue %dx.types.CBufRet.i32 %2198, 1
-  %2201 = extractvalue %dx.types.CBufRet.i32 %2198, 2
-  %2202 = extractvalue %dx.types.CBufRet.i32 %2198, 3
-  %2203 = mul i32 %2199, %2196
-  %2204 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2197, i32 %2200, i32 %2203)  ; IMad(a,b,c)
-  %2205 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2192, i32 %2201, i32 %2204)  ; IMad(a,b,c)
-  %2206 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2187, i32 %2202, i32 %2205)  ; IMad(a,b,c)
-  %2207 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2206, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2208 = extractvalue %dx.types.ResRet.i32 %2207, 0
-  %2209 = extractvalue %dx.types.ResRet.i32 %2207, 1
-  %2210 = call double @dx.op.makeDouble.f64(i32 101, i32 %2208, i32 %2209)  ; MakeDouble(lo,hi)
-  %2211 = fptrunc double %2210 to float
-  br label %2334
-
-; <label>:2212                                    ; preds = %2182
-  %2213 = icmp eq i32 %972, 1
-  br i1 %2213, label %2214, label %2245
-
-; <label>:2214                                    ; preds = %2212
-  %2215 = add i32 %13, -1
-  %2216 = uitofp i32 %2215 to float
-  %2217 = call float @dx.op.binary.f32(i32 35, float %969, float 0.000000e+00)  ; FMax(a,b)
-  %2218 = call float @dx.op.binary.f32(i32 36, float %2217, float %2216)  ; FMin(a,b)
-  %2219 = fptoui float %2218 to i32
-  %2220 = add i32 %15, -1
-  %2221 = uitofp i32 %2220 to float
-  %2222 = call float @dx.op.binary.f32(i32 35, float %2184, float 0.000000e+00)  ; FMax(a,b)
-  %2223 = call float @dx.op.binary.f32(i32 36, float %2222, float %2221)  ; FMin(a,b)
-  %2224 = fptoui float %2223 to i32
-  %2225 = uitofp i32 %2224 to float
-  %2226 = uitofp i32 %2219 to float
-  %2227 = fptoui float %45 to i32
-  %2228 = fptoui float %178 to i32
-  %2229 = fptoui float %2225 to i32
-  %2230 = fptoui float %2226 to i32
-  %2231 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2232 = extractvalue %dx.types.CBufRet.i32 %2231, 0
-  %2233 = extractvalue %dx.types.CBufRet.i32 %2231, 1
-  %2234 = extractvalue %dx.types.CBufRet.i32 %2231, 2
-  %2235 = extractvalue %dx.types.CBufRet.i32 %2231, 3
-  %2236 = mul i32 %2232, %2227
-  %2237 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2228, i32 %2233, i32 %2236)  ; IMad(a,b,c)
-  %2238 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2229, i32 %2234, i32 %2237)  ; IMad(a,b,c)
-  %2239 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2230, i32 %2235, i32 %2238)  ; IMad(a,b,c)
-  %2240 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2239, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2241 = extractvalue %dx.types.ResRet.i32 %2240, 0
-  %2242 = extractvalue %dx.types.ResRet.i32 %2240, 1
-  %2243 = call double @dx.op.makeDouble.f64(i32 101, i32 %2241, i32 %2242)  ; MakeDouble(lo,hi)
-  %2244 = fptrunc double %2243 to float
-  br label %2334
-
-; <label>:2245                                    ; preds = %2212
-  %2246 = icmp eq i32 %972, 2
-  br i1 %2246, label %2247, label %2334
-
-; <label>:2247                                    ; preds = %2245
-  %2248 = fsub fast float %22, %20
-  %2249 = fcmp fast olt float %969, %20
-  br i1 %2249, label %2250, label %2263
-
-; <label>:2250                                    ; preds = %2247
-  %2251 = fsub fast float %20, %969
-  %2252 = fdiv fast float %2251, %2248
-  %2253 = fptoui float %2252 to i32
-  %2254 = uitofp i32 %2253 to float
-  %2255 = fmul fast float %2254, %2248
-  %2256 = fsub fast float %2251, %2255
-  %2257 = and i32 %2253, 1
-  %2258 = icmp eq i32 %2257, 0
-  br i1 %2258, label %2259, label %2261
-
-; <label>:2259                                    ; preds = %2250
-  %2260 = fadd fast float %2256, %20
-  br label %2278
-
-; <label>:2261                                    ; preds = %2250
-  %2262 = fsub fast float %22, %2256
-  br label %2278
-
-; <label>:2263                                    ; preds = %2247
-  %2264 = fcmp fast ogt float %969, %22
-  br i1 %2264, label %2265, label %2278
-
-; <label>:2265                                    ; preds = %2263
-  %2266 = fsub fast float %969, %22
-  %2267 = fdiv fast float %2266, %2248
-  %2268 = fptoui float %2267 to i32
-  %2269 = uitofp i32 %2268 to float
-  %2270 = fmul fast float %2269, %2248
-  %2271 = fsub fast float %2266, %2270
-  %2272 = and i32 %2268, 1
-  %2273 = icmp eq i32 %2272, 0
-  br i1 %2273, label %2274, label %2276
-
-; <label>:2274                                    ; preds = %2265
-  %2275 = fsub fast float %22, %2271
-  br label %2278
-
-; <label>:2276                                    ; preds = %2265
-  %2277 = fadd fast float %2271, %20
-  br label %2278
-
-; <label>:2278                                    ; preds = %2276, %2274, %2263, %2261, %2259
-  %2279 = phi float [ %2260, %2259 ], [ %2262, %2261 ], [ %2275, %2274 ], [ %2277, %2276 ], [ %969, %2263 ]
-  %2280 = fptoui float %2279 to i32
-  %2281 = fsub fast float %24, %20
-  %2282 = fcmp fast olt float %2184, %20
-  br i1 %2282, label %2283, label %2296
-
-; <label>:2283                                    ; preds = %2278
-  %2284 = fsub fast float %20, %2184
-  %2285 = fdiv fast float %2284, %2281
-  %2286 = fptoui float %2285 to i32
-  %2287 = uitofp i32 %2286 to float
-  %2288 = fmul fast float %2287, %2281
-  %2289 = fsub fast float %2284, %2288
-  %2290 = and i32 %2286, 1
-  %2291 = icmp eq i32 %2290, 0
-  br i1 %2291, label %2292, label %2294
-
-; <label>:2292                                    ; preds = %2283
-  %2293 = fadd fast float %2289, %20
-  br label %2311
-
-; <label>:2294                                    ; preds = %2283
-  %2295 = fsub fast float %24, %2289
-  br label %2311
-
-; <label>:2296                                    ; preds = %2278
-  %2297 = fcmp fast ogt float %2184, %24
-  br i1 %2297, label %2298, label %2311
-
-; <label>:2298                                    ; preds = %2296
-  %2299 = fsub fast float %2184, %24
-  %2300 = fdiv fast float %2299, %2281
-  %2301 = fptoui float %2300 to i32
-  %2302 = uitofp i32 %2301 to float
-  %2303 = fmul fast float %2302, %2281
-  %2304 = fsub fast float %2299, %2303
-  %2305 = and i32 %2301, 1
-  %2306 = icmp eq i32 %2305, 0
-  br i1 %2306, label %2307, label %2309
-
-; <label>:2307                                    ; preds = %2298
-  %2308 = fsub fast float %24, %2304
-  br label %2311
-
-; <label>:2309                                    ; preds = %2298
-  %2310 = fadd fast float %2304, %20
-  br label %2311
-
-; <label>:2311                                    ; preds = %2309, %2307, %2296, %2294, %2292
-  %2312 = phi float [ %2293, %2292 ], [ %2295, %2294 ], [ %2308, %2307 ], [ %2310, %2309 ], [ %2184, %2296 ]
-  %2313 = fptoui float %2312 to i32
-  %2314 = uitofp i32 %2313 to float
-  %2315 = uitofp i32 %2280 to float
-  %2316 = fptoui float %45 to i32
-  %2317 = fptoui float %178 to i32
-  %2318 = fptoui float %2314 to i32
-  %2319 = fptoui float %2315 to i32
-  %2320 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2321 = extractvalue %dx.types.CBufRet.i32 %2320, 0
-  %2322 = extractvalue %dx.types.CBufRet.i32 %2320, 1
-  %2323 = extractvalue %dx.types.CBufRet.i32 %2320, 2
-  %2324 = extractvalue %dx.types.CBufRet.i32 %2320, 3
-  %2325 = mul i32 %2321, %2316
-  %2326 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2317, i32 %2322, i32 %2325)  ; IMad(a,b,c)
-  %2327 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2318, i32 %2323, i32 %2326)  ; IMad(a,b,c)
-  %2328 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2319, i32 %2324, i32 %2327)  ; IMad(a,b,c)
-  %2329 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2328, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2330 = extractvalue %dx.types.ResRet.i32 %2329, 0
-  %2331 = extractvalue %dx.types.ResRet.i32 %2329, 1
-  %2332 = call double @dx.op.makeDouble.f64(i32 101, i32 %2330, i32 %2331)  ; MakeDouble(lo,hi)
-  %2333 = fptrunc double %2332 to float
-  br label %2334
-
-; <label>:2334                                    ; preds = %2311, %2245, %2214, %2195, %2185
-  %2335 = phi float [ %2211, %2195 ], [ 0.000000e+00, %2185 ], [ %2244, %2214 ], [ %2333, %2311 ], [ 0.000000e+00, %2245 ]
-  br i1 %973, label %2336, label %2363
-
-; <label>:2336                                    ; preds = %2334
-  %2337 = fcmp fast oge float %968, 0.000000e+00
-  %2338 = fptoui float %968 to i32
-  %2339 = icmp ult i32 %2338, %13
-  %2340 = and i1 %2337, %2339
-  %2341 = fcmp fast oge float %2184, 0.000000e+00
-  %2342 = and i1 %2341, %2340
-  %2343 = fptoui float %2184 to i32
-  %2344 = icmp ult i32 %2343, %15
-  %2345 = and i1 %2344, %2342
-  br i1 %2345, label %2346, label %2485
-
-; <label>:2346                                    ; preds = %2336
-  %2347 = fptoui float %45 to i32
-  %2348 = fptoui float %178 to i32
-  %2349 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2350 = extractvalue %dx.types.CBufRet.i32 %2349, 0
-  %2351 = extractvalue %dx.types.CBufRet.i32 %2349, 1
-  %2352 = extractvalue %dx.types.CBufRet.i32 %2349, 2
-  %2353 = extractvalue %dx.types.CBufRet.i32 %2349, 3
-  %2354 = mul i32 %2350, %2347
-  %2355 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2348, i32 %2351, i32 %2354)  ; IMad(a,b,c)
-  %2356 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2343, i32 %2352, i32 %2355)  ; IMad(a,b,c)
-  %2357 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2338, i32 %2353, i32 %2356)  ; IMad(a,b,c)
-  %2358 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2357, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2359 = extractvalue %dx.types.ResRet.i32 %2358, 0
-  %2360 = extractvalue %dx.types.ResRet.i32 %2358, 1
-  %2361 = call double @dx.op.makeDouble.f64(i32 101, i32 %2359, i32 %2360)  ; MakeDouble(lo,hi)
-  %2362 = fptrunc double %2361 to float
-  br label %2485
-
-; <label>:2363                                    ; preds = %2334
-  %2364 = icmp eq i32 %972, 1
-  br i1 %2364, label %2365, label %2396
-
-; <label>:2365                                    ; preds = %2363
-  %2366 = add i32 %13, -1
-  %2367 = uitofp i32 %2366 to float
-  %2368 = call float @dx.op.binary.f32(i32 35, float %968, float 0.000000e+00)  ; FMax(a,b)
-  %2369 = call float @dx.op.binary.f32(i32 36, float %2368, float %2367)  ; FMin(a,b)
-  %2370 = fptoui float %2369 to i32
-  %2371 = add i32 %15, -1
-  %2372 = uitofp i32 %2371 to float
-  %2373 = call float @dx.op.binary.f32(i32 35, float %2184, float 0.000000e+00)  ; FMax(a,b)
-  %2374 = call float @dx.op.binary.f32(i32 36, float %2373, float %2372)  ; FMin(a,b)
-  %2375 = fptoui float %2374 to i32
-  %2376 = uitofp i32 %2375 to float
-  %2377 = uitofp i32 %2370 to float
-  %2378 = fptoui float %45 to i32
-  %2379 = fptoui float %178 to i32
-  %2380 = fptoui float %2376 to i32
-  %2381 = fptoui float %2377 to i32
-  %2382 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2383 = extractvalue %dx.types.CBufRet.i32 %2382, 0
-  %2384 = extractvalue %dx.types.CBufRet.i32 %2382, 1
-  %2385 = extractvalue %dx.types.CBufRet.i32 %2382, 2
-  %2386 = extractvalue %dx.types.CBufRet.i32 %2382, 3
-  %2387 = mul i32 %2383, %2378
-  %2388 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2379, i32 %2384, i32 %2387)  ; IMad(a,b,c)
-  %2389 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2380, i32 %2385, i32 %2388)  ; IMad(a,b,c)
-  %2390 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2381, i32 %2386, i32 %2389)  ; IMad(a,b,c)
-  %2391 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2390, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2392 = extractvalue %dx.types.ResRet.i32 %2391, 0
-  %2393 = extractvalue %dx.types.ResRet.i32 %2391, 1
-  %2394 = call double @dx.op.makeDouble.f64(i32 101, i32 %2392, i32 %2393)  ; MakeDouble(lo,hi)
-  %2395 = fptrunc double %2394 to float
-  br label %2485
-
-; <label>:2396                                    ; preds = %2363
-  %2397 = icmp eq i32 %972, 2
-  br i1 %2397, label %2398, label %2485
-
-; <label>:2398                                    ; preds = %2396
-  %2399 = fsub fast float %22, %20
-  %2400 = fcmp fast olt float %968, %20
-  br i1 %2400, label %2401, label %2414
-
-; <label>:2401                                    ; preds = %2398
-  %2402 = fsub fast float %20, %968
-  %2403 = fdiv fast float %2402, %2399
-  %2404 = fptoui float %2403 to i32
-  %2405 = uitofp i32 %2404 to float
-  %2406 = fmul fast float %2405, %2399
-  %2407 = fsub fast float %2402, %2406
-  %2408 = and i32 %2404, 1
-  %2409 = icmp eq i32 %2408, 0
-  br i1 %2409, label %2410, label %2412
-
-; <label>:2410                                    ; preds = %2401
-  %2411 = fadd fast float %2407, %20
-  br label %2429
-
-; <label>:2412                                    ; preds = %2401
-  %2413 = fsub fast float %22, %2407
-  br label %2429
-
-; <label>:2414                                    ; preds = %2398
-  %2415 = fcmp fast ogt float %968, %22
-  br i1 %2415, label %2416, label %2429
-
-; <label>:2416                                    ; preds = %2414
-  %2417 = fsub fast float %968, %22
-  %2418 = fdiv fast float %2417, %2399
-  %2419 = fptoui float %2418 to i32
-  %2420 = uitofp i32 %2419 to float
-  %2421 = fmul fast float %2420, %2399
-  %2422 = fsub fast float %2417, %2421
-  %2423 = and i32 %2419, 1
-  %2424 = icmp eq i32 %2423, 0
-  br i1 %2424, label %2425, label %2427
-
-; <label>:2425                                    ; preds = %2416
-  %2426 = fsub fast float %22, %2422
-  br label %2429
-
-; <label>:2427                                    ; preds = %2416
-  %2428 = fadd fast float %2422, %20
-  br label %2429
-
-; <label>:2429                                    ; preds = %2427, %2425, %2414, %2412, %2410
-  %2430 = phi float [ %2411, %2410 ], [ %2413, %2412 ], [ %2426, %2425 ], [ %2428, %2427 ], [ %968, %2414 ]
-  %2431 = fptoui float %2430 to i32
-  %2432 = fsub fast float %24, %20
-  %2433 = fcmp fast olt float %2184, %20
-  br i1 %2433, label %2434, label %2447
-
-; <label>:2434                                    ; preds = %2429
-  %2435 = fsub fast float %20, %2184
-  %2436 = fdiv fast float %2435, %2432
-  %2437 = fptoui float %2436 to i32
-  %2438 = uitofp i32 %2437 to float
-  %2439 = fmul fast float %2438, %2432
-  %2440 = fsub fast float %2435, %2439
-  %2441 = and i32 %2437, 1
-  %2442 = icmp eq i32 %2441, 0
-  br i1 %2442, label %2443, label %2445
-
-; <label>:2443                                    ; preds = %2434
-  %2444 = fadd fast float %2440, %20
-  br label %2462
-
-; <label>:2445                                    ; preds = %2434
-  %2446 = fsub fast float %24, %2440
-  br label %2462
-
-; <label>:2447                                    ; preds = %2429
-  %2448 = fcmp fast ogt float %2184, %24
-  br i1 %2448, label %2449, label %2462
-
-; <label>:2449                                    ; preds = %2447
-  %2450 = fsub fast float %2184, %24
-  %2451 = fdiv fast float %2450, %2432
-  %2452 = fptoui float %2451 to i32
-  %2453 = uitofp i32 %2452 to float
-  %2454 = fmul fast float %2453, %2432
-  %2455 = fsub fast float %2450, %2454
-  %2456 = and i32 %2452, 1
-  %2457 = icmp eq i32 %2456, 0
-  br i1 %2457, label %2458, label %2460
-
-; <label>:2458                                    ; preds = %2449
-  %2459 = fsub fast float %24, %2455
-  br label %2462
-
-; <label>:2460                                    ; preds = %2449
-  %2461 = fadd fast float %2455, %20
-  br label %2462
-
-; <label>:2462                                    ; preds = %2460, %2458, %2447, %2445, %2443
-  %2463 = phi float [ %2444, %2443 ], [ %2446, %2445 ], [ %2459, %2458 ], [ %2461, %2460 ], [ %2184, %2447 ]
-  %2464 = fptoui float %2463 to i32
-  %2465 = uitofp i32 %2464 to float
-  %2466 = uitofp i32 %2431 to float
-  %2467 = fptoui float %45 to i32
-  %2468 = fptoui float %178 to i32
-  %2469 = fptoui float %2465 to i32
-  %2470 = fptoui float %2466 to i32
-  %2471 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2472 = extractvalue %dx.types.CBufRet.i32 %2471, 0
-  %2473 = extractvalue %dx.types.CBufRet.i32 %2471, 1
-  %2474 = extractvalue %dx.types.CBufRet.i32 %2471, 2
-  %2475 = extractvalue %dx.types.CBufRet.i32 %2471, 3
-  %2476 = mul i32 %2472, %2467
-  %2477 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2468, i32 %2473, i32 %2476)  ; IMad(a,b,c)
-  %2478 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2469, i32 %2474, i32 %2477)  ; IMad(a,b,c)
-  %2479 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2470, i32 %2475, i32 %2478)  ; IMad(a,b,c)
-  %2480 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2479, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2481 = extractvalue %dx.types.ResRet.i32 %2480, 0
-  %2482 = extractvalue %dx.types.ResRet.i32 %2480, 1
-  %2483 = call double @dx.op.makeDouble.f64(i32 101, i32 %2481, i32 %2482)  ; MakeDouble(lo,hi)
-  %2484 = fptrunc double %2483 to float
-  br label %2485
-
-; <label>:2485                                    ; preds = %2462, %2396, %2365, %2346, %2336
-  %2486 = phi float [ %2362, %2346 ], [ 0.000000e+00, %2336 ], [ %2395, %2365 ], [ %2484, %2462 ], [ 0.000000e+00, %2396 ]
-  br i1 %973, label %2487, label %2514
-
-; <label>:2487                                    ; preds = %2485
-  %2488 = fcmp fast oge float %1276, 0.000000e+00
-  %2489 = fptoui float %1276 to i32
-  %2490 = icmp ult i32 %2489, %13
-  %2491 = and i1 %2488, %2490
-  %2492 = fcmp fast oge float %2184, 0.000000e+00
-  %2493 = and i1 %2492, %2491
-  %2494 = fptoui float %2184 to i32
-  %2495 = icmp ult i32 %2494, %15
-  %2496 = and i1 %2495, %2493
-  br i1 %2496, label %2497, label %2636
-
-; <label>:2497                                    ; preds = %2487
-  %2498 = fptoui float %45 to i32
-  %2499 = fptoui float %178 to i32
-  %2500 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2501 = extractvalue %dx.types.CBufRet.i32 %2500, 0
-  %2502 = extractvalue %dx.types.CBufRet.i32 %2500, 1
-  %2503 = extractvalue %dx.types.CBufRet.i32 %2500, 2
-  %2504 = extractvalue %dx.types.CBufRet.i32 %2500, 3
-  %2505 = mul i32 %2501, %2498
-  %2506 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2499, i32 %2502, i32 %2505)  ; IMad(a,b,c)
-  %2507 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2494, i32 %2503, i32 %2506)  ; IMad(a,b,c)
-  %2508 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2489, i32 %2504, i32 %2507)  ; IMad(a,b,c)
-  %2509 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2508, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2510 = extractvalue %dx.types.ResRet.i32 %2509, 0
-  %2511 = extractvalue %dx.types.ResRet.i32 %2509, 1
-  %2512 = call double @dx.op.makeDouble.f64(i32 101, i32 %2510, i32 %2511)  ; MakeDouble(lo,hi)
-  %2513 = fptrunc double %2512 to float
-  br label %2636
-
-; <label>:2514                                    ; preds = %2485
-  %2515 = icmp eq i32 %972, 1
-  br i1 %2515, label %2516, label %2547
-
-; <label>:2516                                    ; preds = %2514
-  %2517 = add i32 %13, -1
-  %2518 = uitofp i32 %2517 to float
-  %2519 = call float @dx.op.binary.f32(i32 35, float %1276, float 0.000000e+00)  ; FMax(a,b)
-  %2520 = call float @dx.op.binary.f32(i32 36, float %2519, float %2518)  ; FMin(a,b)
-  %2521 = fptoui float %2520 to i32
-  %2522 = add i32 %15, -1
-  %2523 = uitofp i32 %2522 to float
-  %2524 = call float @dx.op.binary.f32(i32 35, float %2184, float 0.000000e+00)  ; FMax(a,b)
-  %2525 = call float @dx.op.binary.f32(i32 36, float %2524, float %2523)  ; FMin(a,b)
-  %2526 = fptoui float %2525 to i32
-  %2527 = uitofp i32 %2526 to float
-  %2528 = uitofp i32 %2521 to float
-  %2529 = fptoui float %45 to i32
-  %2530 = fptoui float %178 to i32
-  %2531 = fptoui float %2527 to i32
-  %2532 = fptoui float %2528 to i32
-  %2533 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2534 = extractvalue %dx.types.CBufRet.i32 %2533, 0
-  %2535 = extractvalue %dx.types.CBufRet.i32 %2533, 1
-  %2536 = extractvalue %dx.types.CBufRet.i32 %2533, 2
-  %2537 = extractvalue %dx.types.CBufRet.i32 %2533, 3
-  %2538 = mul i32 %2534, %2529
-  %2539 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2530, i32 %2535, i32 %2538)  ; IMad(a,b,c)
-  %2540 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2531, i32 %2536, i32 %2539)  ; IMad(a,b,c)
-  %2541 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2532, i32 %2537, i32 %2540)  ; IMad(a,b,c)
-  %2542 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2541, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2543 = extractvalue %dx.types.ResRet.i32 %2542, 0
-  %2544 = extractvalue %dx.types.ResRet.i32 %2542, 1
-  %2545 = call double @dx.op.makeDouble.f64(i32 101, i32 %2543, i32 %2544)  ; MakeDouble(lo,hi)
-  %2546 = fptrunc double %2545 to float
-  br label %2636
-
-; <label>:2547                                    ; preds = %2514
-  %2548 = icmp eq i32 %972, 2
-  br i1 %2548, label %2549, label %2636
-
-; <label>:2549                                    ; preds = %2547
-  %2550 = fsub fast float %22, %20
-  %2551 = fcmp fast olt float %1276, %20
-  br i1 %2551, label %2552, label %2565
-
-; <label>:2552                                    ; preds = %2549
-  %2553 = fsub fast float %20, %1276
-  %2554 = fdiv fast float %2553, %2550
-  %2555 = fptoui float %2554 to i32
-  %2556 = uitofp i32 %2555 to float
-  %2557 = fmul fast float %2556, %2550
-  %2558 = fsub fast float %2553, %2557
-  %2559 = and i32 %2555, 1
-  %2560 = icmp eq i32 %2559, 0
-  br i1 %2560, label %2561, label %2563
-
-; <label>:2561                                    ; preds = %2552
-  %2562 = fadd fast float %2558, %20
-  br label %2580
-
-; <label>:2563                                    ; preds = %2552
-  %2564 = fsub fast float %22, %2558
-  br label %2580
-
-; <label>:2565                                    ; preds = %2549
-  %2566 = fcmp fast ogt float %1276, %22
-  br i1 %2566, label %2567, label %2580
-
-; <label>:2567                                    ; preds = %2565
-  %2568 = fsub fast float %1276, %22
-  %2569 = fdiv fast float %2568, %2550
-  %2570 = fptoui float %2569 to i32
-  %2571 = uitofp i32 %2570 to float
-  %2572 = fmul fast float %2571, %2550
-  %2573 = fsub fast float %2568, %2572
-  %2574 = and i32 %2570, 1
-  %2575 = icmp eq i32 %2574, 0
-  br i1 %2575, label %2576, label %2578
-
-; <label>:2576                                    ; preds = %2567
-  %2577 = fsub fast float %22, %2573
-  br label %2580
-
-; <label>:2578                                    ; preds = %2567
-  %2579 = fadd fast float %2573, %20
-  br label %2580
-
-; <label>:2580                                    ; preds = %2578, %2576, %2565, %2563, %2561
-  %2581 = phi float [ %2562, %2561 ], [ %2564, %2563 ], [ %2577, %2576 ], [ %2579, %2578 ], [ %1276, %2565 ]
-  %2582 = fptoui float %2581 to i32
-  %2583 = fsub fast float %24, %20
-  %2584 = fcmp fast olt float %2184, %20
-  br i1 %2584, label %2585, label %2598
-
-; <label>:2585                                    ; preds = %2580
-  %2586 = fsub fast float %20, %2184
-  %2587 = fdiv fast float %2586, %2583
-  %2588 = fptoui float %2587 to i32
-  %2589 = uitofp i32 %2588 to float
-  %2590 = fmul fast float %2589, %2583
-  %2591 = fsub fast float %2586, %2590
-  %2592 = and i32 %2588, 1
-  %2593 = icmp eq i32 %2592, 0
-  br i1 %2593, label %2594, label %2596
-
-; <label>:2594                                    ; preds = %2585
-  %2595 = fadd fast float %2591, %20
-  br label %2613
-
-; <label>:2596                                    ; preds = %2585
-  %2597 = fsub fast float %24, %2591
-  br label %2613
-
-; <label>:2598                                    ; preds = %2580
-  %2599 = fcmp fast ogt float %2184, %24
-  br i1 %2599, label %2600, label %2613
-
-; <label>:2600                                    ; preds = %2598
-  %2601 = fsub fast float %2184, %24
-  %2602 = fdiv fast float %2601, %2583
-  %2603 = fptoui float %2602 to i32
-  %2604 = uitofp i32 %2603 to float
-  %2605 = fmul fast float %2604, %2583
-  %2606 = fsub fast float %2601, %2605
-  %2607 = and i32 %2603, 1
-  %2608 = icmp eq i32 %2607, 0
-  br i1 %2608, label %2609, label %2611
-
-; <label>:2609                                    ; preds = %2600
-  %2610 = fsub fast float %24, %2606
-  br label %2613
-
-; <label>:2611                                    ; preds = %2600
-  %2612 = fadd fast float %2606, %20
-  br label %2613
-
-; <label>:2613                                    ; preds = %2611, %2609, %2598, %2596, %2594
-  %2614 = phi float [ %2595, %2594 ], [ %2597, %2596 ], [ %2610, %2609 ], [ %2612, %2611 ], [ %2184, %2598 ]
-  %2615 = fptoui float %2614 to i32
-  %2616 = uitofp i32 %2615 to float
-  %2617 = uitofp i32 %2582 to float
-  %2618 = fptoui float %45 to i32
-  %2619 = fptoui float %178 to i32
-  %2620 = fptoui float %2616 to i32
-  %2621 = fptoui float %2617 to i32
-  %2622 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2623 = extractvalue %dx.types.CBufRet.i32 %2622, 0
-  %2624 = extractvalue %dx.types.CBufRet.i32 %2622, 1
-  %2625 = extractvalue %dx.types.CBufRet.i32 %2622, 2
-  %2626 = extractvalue %dx.types.CBufRet.i32 %2622, 3
-  %2627 = mul i32 %2623, %2618
-  %2628 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2619, i32 %2624, i32 %2627)  ; IMad(a,b,c)
-  %2629 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2620, i32 %2625, i32 %2628)  ; IMad(a,b,c)
-  %2630 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2621, i32 %2626, i32 %2629)  ; IMad(a,b,c)
-  %2631 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2630, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2632 = extractvalue %dx.types.ResRet.i32 %2631, 0
-  %2633 = extractvalue %dx.types.ResRet.i32 %2631, 1
-  %2634 = call double @dx.op.makeDouble.f64(i32 101, i32 %2632, i32 %2633)  ; MakeDouble(lo,hi)
-  %2635 = fptrunc double %2634 to float
-  br label %2636
-
-; <label>:2636                                    ; preds = %2613, %2547, %2516, %2497, %2487
-  %2637 = phi float [ %2513, %2497 ], [ 0.000000e+00, %2487 ], [ %2546, %2516 ], [ %2635, %2613 ], [ 0.000000e+00, %2547 ]
-  br i1 %973, label %2638, label %2665
-
-; <label>:2638                                    ; preds = %2636
-  %2639 = fcmp fast oge float %1428, 0.000000e+00
-  %2640 = fptoui float %1428 to i32
-  %2641 = icmp ult i32 %2640, %13
-  %2642 = and i1 %2639, %2641
-  %2643 = fcmp fast oge float %2184, 0.000000e+00
-  %2644 = and i1 %2643, %2642
-  %2645 = fptoui float %2184 to i32
-  %2646 = icmp ult i32 %2645, %15
-  %2647 = and i1 %2646, %2644
-  br i1 %2647, label %2648, label %2787
-
-; <label>:2648                                    ; preds = %2638
-  %2649 = fptoui float %45 to i32
-  %2650 = fptoui float %178 to i32
-  %2651 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2652 = extractvalue %dx.types.CBufRet.i32 %2651, 0
-  %2653 = extractvalue %dx.types.CBufRet.i32 %2651, 1
-  %2654 = extractvalue %dx.types.CBufRet.i32 %2651, 2
-  %2655 = extractvalue %dx.types.CBufRet.i32 %2651, 3
-  %2656 = mul i32 %2652, %2649
-  %2657 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2650, i32 %2653, i32 %2656)  ; IMad(a,b,c)
-  %2658 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2645, i32 %2654, i32 %2657)  ; IMad(a,b,c)
-  %2659 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2640, i32 %2655, i32 %2658)  ; IMad(a,b,c)
-  %2660 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2659, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2661 = extractvalue %dx.types.ResRet.i32 %2660, 0
-  %2662 = extractvalue %dx.types.ResRet.i32 %2660, 1
-  %2663 = call double @dx.op.makeDouble.f64(i32 101, i32 %2661, i32 %2662)  ; MakeDouble(lo,hi)
-  %2664 = fptrunc double %2663 to float
-  br label %2787
-
-; <label>:2665                                    ; preds = %2636
-  %2666 = icmp eq i32 %972, 1
-  br i1 %2666, label %2667, label %2698
-
-; <label>:2667                                    ; preds = %2665
-  %2668 = add i32 %13, -1
-  %2669 = uitofp i32 %2668 to float
-  %2670 = call float @dx.op.binary.f32(i32 35, float %1428, float 0.000000e+00)  ; FMax(a,b)
-  %2671 = call float @dx.op.binary.f32(i32 36, float %2670, float %2669)  ; FMin(a,b)
-  %2672 = fptoui float %2671 to i32
-  %2673 = add i32 %15, -1
-  %2674 = uitofp i32 %2673 to float
-  %2675 = call float @dx.op.binary.f32(i32 35, float %2184, float 0.000000e+00)  ; FMax(a,b)
-  %2676 = call float @dx.op.binary.f32(i32 36, float %2675, float %2674)  ; FMin(a,b)
-  %2677 = fptoui float %2676 to i32
-  %2678 = uitofp i32 %2677 to float
-  %2679 = uitofp i32 %2672 to float
-  %2680 = fptoui float %45 to i32
-  %2681 = fptoui float %178 to i32
-  %2682 = fptoui float %2678 to i32
-  %2683 = fptoui float %2679 to i32
-  %2684 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2685 = extractvalue %dx.types.CBufRet.i32 %2684, 0
-  %2686 = extractvalue %dx.types.CBufRet.i32 %2684, 1
-  %2687 = extractvalue %dx.types.CBufRet.i32 %2684, 2
-  %2688 = extractvalue %dx.types.CBufRet.i32 %2684, 3
-  %2689 = mul i32 %2685, %2680
-  %2690 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2681, i32 %2686, i32 %2689)  ; IMad(a,b,c)
-  %2691 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2682, i32 %2687, i32 %2690)  ; IMad(a,b,c)
-  %2692 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2683, i32 %2688, i32 %2691)  ; IMad(a,b,c)
-  %2693 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2692, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2694 = extractvalue %dx.types.ResRet.i32 %2693, 0
-  %2695 = extractvalue %dx.types.ResRet.i32 %2693, 1
-  %2696 = call double @dx.op.makeDouble.f64(i32 101, i32 %2694, i32 %2695)  ; MakeDouble(lo,hi)
-  %2697 = fptrunc double %2696 to float
-  br label %2787
-
-; <label>:2698                                    ; preds = %2665
-  %2699 = icmp eq i32 %972, 2
-  br i1 %2699, label %2700, label %2787
-
-; <label>:2700                                    ; preds = %2698
-  %2701 = fsub fast float %22, %20
-  %2702 = fcmp fast olt float %1428, %20
-  br i1 %2702, label %2703, label %2716
-
-; <label>:2703                                    ; preds = %2700
-  %2704 = fsub fast float %20, %1428
-  %2705 = fdiv fast float %2704, %2701
-  %2706 = fptoui float %2705 to i32
-  %2707 = uitofp i32 %2706 to float
-  %2708 = fmul fast float %2707, %2701
-  %2709 = fsub fast float %2704, %2708
-  %2710 = and i32 %2706, 1
-  %2711 = icmp eq i32 %2710, 0
-  br i1 %2711, label %2712, label %2714
-
-; <label>:2712                                    ; preds = %2703
-  %2713 = fadd fast float %2709, %20
-  br label %2731
-
-; <label>:2714                                    ; preds = %2703
-  %2715 = fsub fast float %22, %2709
-  br label %2731
-
-; <label>:2716                                    ; preds = %2700
-  %2717 = fcmp fast ogt float %1428, %22
-  br i1 %2717, label %2718, label %2731
-
-; <label>:2718                                    ; preds = %2716
-  %2719 = fsub fast float %1428, %22
-  %2720 = fdiv fast float %2719, %2701
-  %2721 = fptoui float %2720 to i32
-  %2722 = uitofp i32 %2721 to float
-  %2723 = fmul fast float %2722, %2701
-  %2724 = fsub fast float %2719, %2723
-  %2725 = and i32 %2721, 1
-  %2726 = icmp eq i32 %2725, 0
-  br i1 %2726, label %2727, label %2729
-
-; <label>:2727                                    ; preds = %2718
-  %2728 = fsub fast float %22, %2724
-  br label %2731
-
-; <label>:2729                                    ; preds = %2718
-  %2730 = fadd fast float %2724, %20
-  br label %2731
-
-; <label>:2731                                    ; preds = %2729, %2727, %2716, %2714, %2712
-  %2732 = phi float [ %2713, %2712 ], [ %2715, %2714 ], [ %2728, %2727 ], [ %2730, %2729 ], [ %1428, %2716 ]
-  %2733 = fptoui float %2732 to i32
-  %2734 = fsub fast float %24, %20
-  %2735 = fcmp fast olt float %2184, %20
-  br i1 %2735, label %2736, label %2749
-
-; <label>:2736                                    ; preds = %2731
-  %2737 = fsub fast float %20, %2184
-  %2738 = fdiv fast float %2737, %2734
-  %2739 = fptoui float %2738 to i32
-  %2740 = uitofp i32 %2739 to float
-  %2741 = fmul fast float %2740, %2734
-  %2742 = fsub fast float %2737, %2741
-  %2743 = and i32 %2739, 1
-  %2744 = icmp eq i32 %2743, 0
-  br i1 %2744, label %2745, label %2747
-
-; <label>:2745                                    ; preds = %2736
-  %2746 = fadd fast float %2742, %20
-  br label %2764
-
-; <label>:2747                                    ; preds = %2736
-  %2748 = fsub fast float %24, %2742
-  br label %2764
-
-; <label>:2749                                    ; preds = %2731
-  %2750 = fcmp fast ogt float %2184, %24
-  br i1 %2750, label %2751, label %2764
-
-; <label>:2751                                    ; preds = %2749
-  %2752 = fsub fast float %2184, %24
-  %2753 = fdiv fast float %2752, %2734
-  %2754 = fptoui float %2753 to i32
-  %2755 = uitofp i32 %2754 to float
-  %2756 = fmul fast float %2755, %2734
-  %2757 = fsub fast float %2752, %2756
-  %2758 = and i32 %2754, 1
-  %2759 = icmp eq i32 %2758, 0
-  br i1 %2759, label %2760, label %2762
-
-; <label>:2760                                    ; preds = %2751
-  %2761 = fsub fast float %24, %2757
-  br label %2764
-
-; <label>:2762                                    ; preds = %2751
-  %2763 = fadd fast float %2757, %20
-  br label %2764
-
-; <label>:2764                                    ; preds = %2762, %2760, %2749, %2747, %2745
-  %2765 = phi float [ %2746, %2745 ], [ %2748, %2747 ], [ %2761, %2760 ], [ %2763, %2762 ], [ %2184, %2749 ]
-  %2766 = fptoui float %2765 to i32
-  %2767 = uitofp i32 %2766 to float
-  %2768 = uitofp i32 %2733 to float
-  %2769 = fptoui float %45 to i32
-  %2770 = fptoui float %178 to i32
-  %2771 = fptoui float %2767 to i32
-  %2772 = fptoui float %2768 to i32
-  %2773 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2774 = extractvalue %dx.types.CBufRet.i32 %2773, 0
-  %2775 = extractvalue %dx.types.CBufRet.i32 %2773, 1
-  %2776 = extractvalue %dx.types.CBufRet.i32 %2773, 2
-  %2777 = extractvalue %dx.types.CBufRet.i32 %2773, 3
-  %2778 = mul i32 %2774, %2769
-  %2779 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2770, i32 %2775, i32 %2778)  ; IMad(a,b,c)
-  %2780 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2771, i32 %2776, i32 %2779)  ; IMad(a,b,c)
-  %2781 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2772, i32 %2777, i32 %2780)  ; IMad(a,b,c)
-  %2782 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2781, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2783 = extractvalue %dx.types.ResRet.i32 %2782, 0
-  %2784 = extractvalue %dx.types.ResRet.i32 %2782, 1
-  %2785 = call double @dx.op.makeDouble.f64(i32 101, i32 %2783, i32 %2784)  ; MakeDouble(lo,hi)
-  %2786 = fptrunc double %2785 to float
-  br label %2787
-
-; <label>:2787                                    ; preds = %2764, %2698, %2667, %2648, %2638
-  %2788 = phi float [ %2664, %2648 ], [ 0.000000e+00, %2638 ], [ %2697, %2667 ], [ %2786, %2764 ], [ 0.000000e+00, %2698 ]
-  %2789 = fadd fast float %970, 2.000000e+00
-  br i1 %973, label %2790, label %2817
-
-; <label>:2790                                    ; preds = %2787
-  %2791 = fcmp fast oge float %969, 0.000000e+00
-  %2792 = fptoui float %969 to i32
-  %2793 = icmp ult i32 %2792, %13
-  %2794 = and i1 %2791, %2793
-  %2795 = fcmp fast oge float %2789, 0.000000e+00
-  %2796 = and i1 %2795, %2794
-  %2797 = fptoui float %2789 to i32
-  %2798 = icmp ult i32 %2797, %15
-  %2799 = and i1 %2798, %2796
-  br i1 %2799, label %2800, label %2939
-
-; <label>:2800                                    ; preds = %2790
-  %2801 = fptoui float %45 to i32
-  %2802 = fptoui float %178 to i32
-  %2803 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2804 = extractvalue %dx.types.CBufRet.i32 %2803, 0
-  %2805 = extractvalue %dx.types.CBufRet.i32 %2803, 1
-  %2806 = extractvalue %dx.types.CBufRet.i32 %2803, 2
-  %2807 = extractvalue %dx.types.CBufRet.i32 %2803, 3
-  %2808 = mul i32 %2804, %2801
-  %2809 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2802, i32 %2805, i32 %2808)  ; IMad(a,b,c)
-  %2810 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2797, i32 %2806, i32 %2809)  ; IMad(a,b,c)
-  %2811 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2792, i32 %2807, i32 %2810)  ; IMad(a,b,c)
-  %2812 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2811, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2813 = extractvalue %dx.types.ResRet.i32 %2812, 0
-  %2814 = extractvalue %dx.types.ResRet.i32 %2812, 1
-  %2815 = call double @dx.op.makeDouble.f64(i32 101, i32 %2813, i32 %2814)  ; MakeDouble(lo,hi)
-  %2816 = fptrunc double %2815 to float
-  br label %2939
-
-; <label>:2817                                    ; preds = %2787
-  %2818 = icmp eq i32 %972, 1
-  br i1 %2818, label %2819, label %2850
-
-; <label>:2819                                    ; preds = %2817
-  %2820 = add i32 %13, -1
-  %2821 = uitofp i32 %2820 to float
-  %2822 = call float @dx.op.binary.f32(i32 35, float %969, float 0.000000e+00)  ; FMax(a,b)
-  %2823 = call float @dx.op.binary.f32(i32 36, float %2822, float %2821)  ; FMin(a,b)
-  %2824 = fptoui float %2823 to i32
-  %2825 = add i32 %15, -1
-  %2826 = uitofp i32 %2825 to float
-  %2827 = call float @dx.op.binary.f32(i32 35, float %2789, float 0.000000e+00)  ; FMax(a,b)
-  %2828 = call float @dx.op.binary.f32(i32 36, float %2827, float %2826)  ; FMin(a,b)
-  %2829 = fptoui float %2828 to i32
-  %2830 = uitofp i32 %2829 to float
-  %2831 = uitofp i32 %2824 to float
-  %2832 = fptoui float %45 to i32
-  %2833 = fptoui float %178 to i32
-  %2834 = fptoui float %2830 to i32
-  %2835 = fptoui float %2831 to i32
-  %2836 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2837 = extractvalue %dx.types.CBufRet.i32 %2836, 0
-  %2838 = extractvalue %dx.types.CBufRet.i32 %2836, 1
-  %2839 = extractvalue %dx.types.CBufRet.i32 %2836, 2
-  %2840 = extractvalue %dx.types.CBufRet.i32 %2836, 3
-  %2841 = mul i32 %2837, %2832
-  %2842 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2833, i32 %2838, i32 %2841)  ; IMad(a,b,c)
-  %2843 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2834, i32 %2839, i32 %2842)  ; IMad(a,b,c)
-  %2844 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2835, i32 %2840, i32 %2843)  ; IMad(a,b,c)
-  %2845 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2844, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2846 = extractvalue %dx.types.ResRet.i32 %2845, 0
-  %2847 = extractvalue %dx.types.ResRet.i32 %2845, 1
-  %2848 = call double @dx.op.makeDouble.f64(i32 101, i32 %2846, i32 %2847)  ; MakeDouble(lo,hi)
-  %2849 = fptrunc double %2848 to float
-  br label %2939
-
-; <label>:2850                                    ; preds = %2817
-  %2851 = icmp eq i32 %972, 2
-  br i1 %2851, label %2852, label %2939
-
-; <label>:2852                                    ; preds = %2850
-  %2853 = fsub fast float %22, %20
-  %2854 = fcmp fast olt float %969, %20
-  br i1 %2854, label %2855, label %2868
-
-; <label>:2855                                    ; preds = %2852
-  %2856 = fsub fast float %20, %969
-  %2857 = fdiv fast float %2856, %2853
-  %2858 = fptoui float %2857 to i32
-  %2859 = uitofp i32 %2858 to float
-  %2860 = fmul fast float %2859, %2853
-  %2861 = fsub fast float %2856, %2860
-  %2862 = and i32 %2858, 1
-  %2863 = icmp eq i32 %2862, 0
-  br i1 %2863, label %2864, label %2866
-
-; <label>:2864                                    ; preds = %2855
-  %2865 = fadd fast float %2861, %20
-  br label %2883
-
-; <label>:2866                                    ; preds = %2855
-  %2867 = fsub fast float %22, %2861
-  br label %2883
-
-; <label>:2868                                    ; preds = %2852
-  %2869 = fcmp fast ogt float %969, %22
-  br i1 %2869, label %2870, label %2883
-
-; <label>:2870                                    ; preds = %2868
-  %2871 = fsub fast float %969, %22
-  %2872 = fdiv fast float %2871, %2853
-  %2873 = fptoui float %2872 to i32
-  %2874 = uitofp i32 %2873 to float
-  %2875 = fmul fast float %2874, %2853
-  %2876 = fsub fast float %2871, %2875
-  %2877 = and i32 %2873, 1
-  %2878 = icmp eq i32 %2877, 0
-  br i1 %2878, label %2879, label %2881
-
-; <label>:2879                                    ; preds = %2870
-  %2880 = fsub fast float %22, %2876
-  br label %2883
-
-; <label>:2881                                    ; preds = %2870
-  %2882 = fadd fast float %2876, %20
-  br label %2883
-
-; <label>:2883                                    ; preds = %2881, %2879, %2868, %2866, %2864
-  %2884 = phi float [ %2865, %2864 ], [ %2867, %2866 ], [ %2880, %2879 ], [ %2882, %2881 ], [ %969, %2868 ]
-  %2885 = fptoui float %2884 to i32
-  %2886 = fsub fast float %24, %20
-  %2887 = fcmp fast olt float %2789, %20
-  br i1 %2887, label %2888, label %2901
-
-; <label>:2888                                    ; preds = %2883
-  %2889 = fsub fast float %20, %2789
-  %2890 = fdiv fast float %2889, %2886
-  %2891 = fptoui float %2890 to i32
-  %2892 = uitofp i32 %2891 to float
-  %2893 = fmul fast float %2892, %2886
-  %2894 = fsub fast float %2889, %2893
-  %2895 = and i32 %2891, 1
-  %2896 = icmp eq i32 %2895, 0
-  br i1 %2896, label %2897, label %2899
-
-; <label>:2897                                    ; preds = %2888
-  %2898 = fadd fast float %2894, %20
-  br label %2916
-
-; <label>:2899                                    ; preds = %2888
-  %2900 = fsub fast float %24, %2894
-  br label %2916
-
-; <label>:2901                                    ; preds = %2883
-  %2902 = fcmp fast ogt float %2789, %24
-  br i1 %2902, label %2903, label %2916
-
-; <label>:2903                                    ; preds = %2901
-  %2904 = fsub fast float %2789, %24
-  %2905 = fdiv fast float %2904, %2886
-  %2906 = fptoui float %2905 to i32
-  %2907 = uitofp i32 %2906 to float
-  %2908 = fmul fast float %2907, %2886
-  %2909 = fsub fast float %2904, %2908
-  %2910 = and i32 %2906, 1
-  %2911 = icmp eq i32 %2910, 0
-  br i1 %2911, label %2912, label %2914
-
-; <label>:2912                                    ; preds = %2903
-  %2913 = fsub fast float %24, %2909
-  br label %2916
-
-; <label>:2914                                    ; preds = %2903
-  %2915 = fadd fast float %2909, %20
-  br label %2916
-
-; <label>:2916                                    ; preds = %2914, %2912, %2901, %2899, %2897
-  %2917 = phi float [ %2898, %2897 ], [ %2900, %2899 ], [ %2913, %2912 ], [ %2915, %2914 ], [ %2789, %2901 ]
-  %2918 = fptoui float %2917 to i32
-  %2919 = uitofp i32 %2918 to float
-  %2920 = uitofp i32 %2885 to float
-  %2921 = fptoui float %45 to i32
-  %2922 = fptoui float %178 to i32
-  %2923 = fptoui float %2919 to i32
-  %2924 = fptoui float %2920 to i32
-  %2925 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2926 = extractvalue %dx.types.CBufRet.i32 %2925, 0
-  %2927 = extractvalue %dx.types.CBufRet.i32 %2925, 1
-  %2928 = extractvalue %dx.types.CBufRet.i32 %2925, 2
-  %2929 = extractvalue %dx.types.CBufRet.i32 %2925, 3
-  %2930 = mul i32 %2926, %2921
-  %2931 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2922, i32 %2927, i32 %2930)  ; IMad(a,b,c)
-  %2932 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2923, i32 %2928, i32 %2931)  ; IMad(a,b,c)
-  %2933 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2924, i32 %2929, i32 %2932)  ; IMad(a,b,c)
-  %2934 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2933, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2935 = extractvalue %dx.types.ResRet.i32 %2934, 0
-  %2936 = extractvalue %dx.types.ResRet.i32 %2934, 1
-  %2937 = call double @dx.op.makeDouble.f64(i32 101, i32 %2935, i32 %2936)  ; MakeDouble(lo,hi)
-  %2938 = fptrunc double %2937 to float
-  br label %2939
-
-; <label>:2939                                    ; preds = %2916, %2850, %2819, %2800, %2790
-  %2940 = phi float [ %2816, %2800 ], [ 0.000000e+00, %2790 ], [ %2849, %2819 ], [ %2938, %2916 ], [ 0.000000e+00, %2850 ]
-  br i1 %973, label %2941, label %2968
-
-; <label>:2941                                    ; preds = %2939
-  %2942 = fcmp fast oge float %968, 0.000000e+00
-  %2943 = fptoui float %968 to i32
-  %2944 = icmp ult i32 %2943, %13
-  %2945 = and i1 %2942, %2944
-  %2946 = fcmp fast oge float %2789, 0.000000e+00
-  %2947 = and i1 %2946, %2945
-  %2948 = fptoui float %2789 to i32
-  %2949 = icmp ult i32 %2948, %15
-  %2950 = and i1 %2949, %2947
-  br i1 %2950, label %2951, label %3090
-
-; <label>:2951                                    ; preds = %2941
-  %2952 = fptoui float %45 to i32
-  %2953 = fptoui float %178 to i32
-  %2954 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2955 = extractvalue %dx.types.CBufRet.i32 %2954, 0
-  %2956 = extractvalue %dx.types.CBufRet.i32 %2954, 1
-  %2957 = extractvalue %dx.types.CBufRet.i32 %2954, 2
-  %2958 = extractvalue %dx.types.CBufRet.i32 %2954, 3
-  %2959 = mul i32 %2955, %2952
-  %2960 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2953, i32 %2956, i32 %2959)  ; IMad(a,b,c)
-  %2961 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2948, i32 %2957, i32 %2960)  ; IMad(a,b,c)
-  %2962 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2943, i32 %2958, i32 %2961)  ; IMad(a,b,c)
-  %2963 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2962, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2964 = extractvalue %dx.types.ResRet.i32 %2963, 0
-  %2965 = extractvalue %dx.types.ResRet.i32 %2963, 1
-  %2966 = call double @dx.op.makeDouble.f64(i32 101, i32 %2964, i32 %2965)  ; MakeDouble(lo,hi)
-  %2967 = fptrunc double %2966 to float
-  br label %3090
-
-; <label>:2968                                    ; preds = %2939
-  %2969 = icmp eq i32 %972, 1
-  br i1 %2969, label %2970, label %3001
-
-; <label>:2970                                    ; preds = %2968
-  %2971 = add i32 %13, -1
-  %2972 = uitofp i32 %2971 to float
-  %2973 = call float @dx.op.binary.f32(i32 35, float %968, float 0.000000e+00)  ; FMax(a,b)
-  %2974 = call float @dx.op.binary.f32(i32 36, float %2973, float %2972)  ; FMin(a,b)
-  %2975 = fptoui float %2974 to i32
-  %2976 = add i32 %15, -1
-  %2977 = uitofp i32 %2976 to float
-  %2978 = call float @dx.op.binary.f32(i32 35, float %2789, float 0.000000e+00)  ; FMax(a,b)
-  %2979 = call float @dx.op.binary.f32(i32 36, float %2978, float %2977)  ; FMin(a,b)
-  %2980 = fptoui float %2979 to i32
-  %2981 = uitofp i32 %2980 to float
-  %2982 = uitofp i32 %2975 to float
-  %2983 = fptoui float %45 to i32
-  %2984 = fptoui float %178 to i32
-  %2985 = fptoui float %2981 to i32
-  %2986 = fptoui float %2982 to i32
-  %2987 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2988 = extractvalue %dx.types.CBufRet.i32 %2987, 0
-  %2989 = extractvalue %dx.types.CBufRet.i32 %2987, 1
-  %2990 = extractvalue %dx.types.CBufRet.i32 %2987, 2
-  %2991 = extractvalue %dx.types.CBufRet.i32 %2987, 3
-  %2992 = mul i32 %2988, %2983
-  %2993 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2984, i32 %2989, i32 %2992)  ; IMad(a,b,c)
-  %2994 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2985, i32 %2990, i32 %2993)  ; IMad(a,b,c)
-  %2995 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2986, i32 %2991, i32 %2994)  ; IMad(a,b,c)
-  %2996 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2995, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2997 = extractvalue %dx.types.ResRet.i32 %2996, 0
-  %2998 = extractvalue %dx.types.ResRet.i32 %2996, 1
-  %2999 = call double @dx.op.makeDouble.f64(i32 101, i32 %2997, i32 %2998)  ; MakeDouble(lo,hi)
-  %3000 = fptrunc double %2999 to float
-  br label %3090
-
-; <label>:3001                                    ; preds = %2968
-  %3002 = icmp eq i32 %972, 2
-  br i1 %3002, label %3003, label %3090
-
-; <label>:3003                                    ; preds = %3001
-  %3004 = fsub fast float %22, %20
-  %3005 = fcmp fast olt float %968, %20
-  br i1 %3005, label %3006, label %3019
-
-; <label>:3006                                    ; preds = %3003
-  %3007 = fsub fast float %20, %968
-  %3008 = fdiv fast float %3007, %3004
-  %3009 = fptoui float %3008 to i32
-  %3010 = uitofp i32 %3009 to float
-  %3011 = fmul fast float %3010, %3004
-  %3012 = fsub fast float %3007, %3011
-  %3013 = and i32 %3009, 1
-  %3014 = icmp eq i32 %3013, 0
-  br i1 %3014, label %3015, label %3017
-
-; <label>:3015                                    ; preds = %3006
-  %3016 = fadd fast float %3012, %20
-  br label %3034
-
-; <label>:3017                                    ; preds = %3006
-  %3018 = fsub fast float %22, %3012
-  br label %3034
-
-; <label>:3019                                    ; preds = %3003
-  %3020 = fcmp fast ogt float %968, %22
-  br i1 %3020, label %3021, label %3034
-
-; <label>:3021                                    ; preds = %3019
-  %3022 = fsub fast float %968, %22
-  %3023 = fdiv fast float %3022, %3004
-  %3024 = fptoui float %3023 to i32
-  %3025 = uitofp i32 %3024 to float
-  %3026 = fmul fast float %3025, %3004
-  %3027 = fsub fast float %3022, %3026
-  %3028 = and i32 %3024, 1
-  %3029 = icmp eq i32 %3028, 0
-  br i1 %3029, label %3030, label %3032
-
-; <label>:3030                                    ; preds = %3021
-  %3031 = fsub fast float %22, %3027
-  br label %3034
-
-; <label>:3032                                    ; preds = %3021
-  %3033 = fadd fast float %3027, %20
-  br label %3034
-
-; <label>:3034                                    ; preds = %3032, %3030, %3019, %3017, %3015
-  %3035 = phi float [ %3016, %3015 ], [ %3018, %3017 ], [ %3031, %3030 ], [ %3033, %3032 ], [ %968, %3019 ]
-  %3036 = fptoui float %3035 to i32
-  %3037 = fsub fast float %24, %20
-  %3038 = fcmp fast olt float %2789, %20
-  br i1 %3038, label %3039, label %3052
-
-; <label>:3039                                    ; preds = %3034
-  %3040 = fsub fast float %20, %2789
-  %3041 = fdiv fast float %3040, %3037
-  %3042 = fptoui float %3041 to i32
-  %3043 = uitofp i32 %3042 to float
-  %3044 = fmul fast float %3043, %3037
-  %3045 = fsub fast float %3040, %3044
-  %3046 = and i32 %3042, 1
-  %3047 = icmp eq i32 %3046, 0
-  br i1 %3047, label %3048, label %3050
-
-; <label>:3048                                    ; preds = %3039
-  %3049 = fadd fast float %3045, %20
-  br label %3067
-
-; <label>:3050                                    ; preds = %3039
-  %3051 = fsub fast float %24, %3045
-  br label %3067
-
-; <label>:3052                                    ; preds = %3034
-  %3053 = fcmp fast ogt float %2789, %24
-  br i1 %3053, label %3054, label %3067
-
-; <label>:3054                                    ; preds = %3052
-  %3055 = fsub fast float %2789, %24
-  %3056 = fdiv fast float %3055, %3037
-  %3057 = fptoui float %3056 to i32
-  %3058 = uitofp i32 %3057 to float
-  %3059 = fmul fast float %3058, %3037
-  %3060 = fsub fast float %3055, %3059
-  %3061 = and i32 %3057, 1
-  %3062 = icmp eq i32 %3061, 0
-  br i1 %3062, label %3063, label %3065
-
-; <label>:3063                                    ; preds = %3054
-  %3064 = fsub fast float %24, %3060
-  br label %3067
-
-; <label>:3065                                    ; preds = %3054
-  %3066 = fadd fast float %3060, %20
-  br label %3067
-
-; <label>:3067                                    ; preds = %3065, %3063, %3052, %3050, %3048
-  %3068 = phi float [ %3049, %3048 ], [ %3051, %3050 ], [ %3064, %3063 ], [ %3066, %3065 ], [ %2789, %3052 ]
-  %3069 = fptoui float %3068 to i32
-  %3070 = uitofp i32 %3069 to float
-  %3071 = uitofp i32 %3036 to float
-  %3072 = fptoui float %45 to i32
-  %3073 = fptoui float %178 to i32
-  %3074 = fptoui float %3070 to i32
-  %3075 = fptoui float %3071 to i32
-  %3076 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3077 = extractvalue %dx.types.CBufRet.i32 %3076, 0
-  %3078 = extractvalue %dx.types.CBufRet.i32 %3076, 1
-  %3079 = extractvalue %dx.types.CBufRet.i32 %3076, 2
-  %3080 = extractvalue %dx.types.CBufRet.i32 %3076, 3
-  %3081 = mul i32 %3077, %3072
-  %3082 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3073, i32 %3078, i32 %3081)  ; IMad(a,b,c)
-  %3083 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3074, i32 %3079, i32 %3082)  ; IMad(a,b,c)
-  %3084 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3075, i32 %3080, i32 %3083)  ; IMad(a,b,c)
-  %3085 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3084, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3086 = extractvalue %dx.types.ResRet.i32 %3085, 0
-  %3087 = extractvalue %dx.types.ResRet.i32 %3085, 1
-  %3088 = call double @dx.op.makeDouble.f64(i32 101, i32 %3086, i32 %3087)  ; MakeDouble(lo,hi)
-  %3089 = fptrunc double %3088 to float
-  br label %3090
-
-; <label>:3090                                    ; preds = %3067, %3001, %2970, %2951, %2941
-  %3091 = phi float [ %2967, %2951 ], [ 0.000000e+00, %2941 ], [ %3000, %2970 ], [ %3089, %3067 ], [ 0.000000e+00, %3001 ]
-  br i1 %973, label %3092, label %3119
-
-; <label>:3092                                    ; preds = %3090
-  %3093 = fcmp fast oge float %1276, 0.000000e+00
-  %3094 = fptoui float %1276 to i32
-  %3095 = icmp ult i32 %3094, %13
-  %3096 = and i1 %3093, %3095
-  %3097 = fcmp fast oge float %2789, 0.000000e+00
-  %3098 = and i1 %3097, %3096
-  %3099 = fptoui float %2789 to i32
-  %3100 = icmp ult i32 %3099, %15
-  %3101 = and i1 %3100, %3098
-  br i1 %3101, label %3102, label %3241
-
-; <label>:3102                                    ; preds = %3092
-  %3103 = fptoui float %45 to i32
-  %3104 = fptoui float %178 to i32
-  %3105 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3106 = extractvalue %dx.types.CBufRet.i32 %3105, 0
-  %3107 = extractvalue %dx.types.CBufRet.i32 %3105, 1
-  %3108 = extractvalue %dx.types.CBufRet.i32 %3105, 2
-  %3109 = extractvalue %dx.types.CBufRet.i32 %3105, 3
-  %3110 = mul i32 %3106, %3103
-  %3111 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3104, i32 %3107, i32 %3110)  ; IMad(a,b,c)
-  %3112 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3099, i32 %3108, i32 %3111)  ; IMad(a,b,c)
-  %3113 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3094, i32 %3109, i32 %3112)  ; IMad(a,b,c)
-  %3114 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3113, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3115 = extractvalue %dx.types.ResRet.i32 %3114, 0
-  %3116 = extractvalue %dx.types.ResRet.i32 %3114, 1
-  %3117 = call double @dx.op.makeDouble.f64(i32 101, i32 %3115, i32 %3116)  ; MakeDouble(lo,hi)
-  %3118 = fptrunc double %3117 to float
-  br label %3241
-
-; <label>:3119                                    ; preds = %3090
-  %3120 = icmp eq i32 %972, 1
-  br i1 %3120, label %3121, label %3152
-
-; <label>:3121                                    ; preds = %3119
-  %3122 = add i32 %13, -1
-  %3123 = uitofp i32 %3122 to float
-  %3124 = call float @dx.op.binary.f32(i32 35, float %1276, float 0.000000e+00)  ; FMax(a,b)
-  %3125 = call float @dx.op.binary.f32(i32 36, float %3124, float %3123)  ; FMin(a,b)
-  %3126 = fptoui float %3125 to i32
-  %3127 = add i32 %15, -1
-  %3128 = uitofp i32 %3127 to float
-  %3129 = call float @dx.op.binary.f32(i32 35, float %2789, float 0.000000e+00)  ; FMax(a,b)
-  %3130 = call float @dx.op.binary.f32(i32 36, float %3129, float %3128)  ; FMin(a,b)
-  %3131 = fptoui float %3130 to i32
-  %3132 = uitofp i32 %3131 to float
-  %3133 = uitofp i32 %3126 to float
-  %3134 = fptoui float %45 to i32
-  %3135 = fptoui float %178 to i32
-  %3136 = fptoui float %3132 to i32
-  %3137 = fptoui float %3133 to i32
-  %3138 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3139 = extractvalue %dx.types.CBufRet.i32 %3138, 0
-  %3140 = extractvalue %dx.types.CBufRet.i32 %3138, 1
-  %3141 = extractvalue %dx.types.CBufRet.i32 %3138, 2
-  %3142 = extractvalue %dx.types.CBufRet.i32 %3138, 3
-  %3143 = mul i32 %3139, %3134
-  %3144 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3135, i32 %3140, i32 %3143)  ; IMad(a,b,c)
-  %3145 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3136, i32 %3141, i32 %3144)  ; IMad(a,b,c)
-  %3146 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3137, i32 %3142, i32 %3145)  ; IMad(a,b,c)
-  %3147 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3146, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3148 = extractvalue %dx.types.ResRet.i32 %3147, 0
-  %3149 = extractvalue %dx.types.ResRet.i32 %3147, 1
-  %3150 = call double @dx.op.makeDouble.f64(i32 101, i32 %3148, i32 %3149)  ; MakeDouble(lo,hi)
-  %3151 = fptrunc double %3150 to float
-  br label %3241
-
-; <label>:3152                                    ; preds = %3119
-  %3153 = icmp eq i32 %972, 2
-  br i1 %3153, label %3154, label %3241
-
-; <label>:3154                                    ; preds = %3152
-  %3155 = fsub fast float %22, %20
-  %3156 = fcmp fast olt float %1276, %20
-  br i1 %3156, label %3157, label %3170
-
-; <label>:3157                                    ; preds = %3154
-  %3158 = fsub fast float %20, %1276
-  %3159 = fdiv fast float %3158, %3155
-  %3160 = fptoui float %3159 to i32
-  %3161 = uitofp i32 %3160 to float
-  %3162 = fmul fast float %3161, %3155
-  %3163 = fsub fast float %3158, %3162
-  %3164 = and i32 %3160, 1
-  %3165 = icmp eq i32 %3164, 0
-  br i1 %3165, label %3166, label %3168
-
-; <label>:3166                                    ; preds = %3157
-  %3167 = fadd fast float %3163, %20
-  br label %3185
-
-; <label>:3168                                    ; preds = %3157
-  %3169 = fsub fast float %22, %3163
-  br label %3185
-
-; <label>:3170                                    ; preds = %3154
-  %3171 = fcmp fast ogt float %1276, %22
-  br i1 %3171, label %3172, label %3185
-
-; <label>:3172                                    ; preds = %3170
-  %3173 = fsub fast float %1276, %22
-  %3174 = fdiv fast float %3173, %3155
-  %3175 = fptoui float %3174 to i32
-  %3176 = uitofp i32 %3175 to float
-  %3177 = fmul fast float %3176, %3155
-  %3178 = fsub fast float %3173, %3177
-  %3179 = and i32 %3175, 1
-  %3180 = icmp eq i32 %3179, 0
-  br i1 %3180, label %3181, label %3183
-
-; <label>:3181                                    ; preds = %3172
-  %3182 = fsub fast float %22, %3178
-  br label %3185
-
-; <label>:3183                                    ; preds = %3172
-  %3184 = fadd fast float %3178, %20
-  br label %3185
-
-; <label>:3185                                    ; preds = %3183, %3181, %3170, %3168, %3166
-  %3186 = phi float [ %3167, %3166 ], [ %3169, %3168 ], [ %3182, %3181 ], [ %3184, %3183 ], [ %1276, %3170 ]
-  %3187 = fptoui float %3186 to i32
-  %3188 = fsub fast float %24, %20
-  %3189 = fcmp fast olt float %2789, %20
-  br i1 %3189, label %3190, label %3203
-
-; <label>:3190                                    ; preds = %3185
-  %3191 = fsub fast float %20, %2789
-  %3192 = fdiv fast float %3191, %3188
-  %3193 = fptoui float %3192 to i32
-  %3194 = uitofp i32 %3193 to float
-  %3195 = fmul fast float %3194, %3188
-  %3196 = fsub fast float %3191, %3195
-  %3197 = and i32 %3193, 1
-  %3198 = icmp eq i32 %3197, 0
-  br i1 %3198, label %3199, label %3201
-
-; <label>:3199                                    ; preds = %3190
-  %3200 = fadd fast float %3196, %20
-  br label %3218
-
-; <label>:3201                                    ; preds = %3190
-  %3202 = fsub fast float %24, %3196
-  br label %3218
-
-; <label>:3203                                    ; preds = %3185
-  %3204 = fcmp fast ogt float %2789, %24
-  br i1 %3204, label %3205, label %3218
-
-; <label>:3205                                    ; preds = %3203
-  %3206 = fsub fast float %2789, %24
-  %3207 = fdiv fast float %3206, %3188
-  %3208 = fptoui float %3207 to i32
-  %3209 = uitofp i32 %3208 to float
-  %3210 = fmul fast float %3209, %3188
-  %3211 = fsub fast float %3206, %3210
-  %3212 = and i32 %3208, 1
-  %3213 = icmp eq i32 %3212, 0
-  br i1 %3213, label %3214, label %3216
-
-; <label>:3214                                    ; preds = %3205
-  %3215 = fsub fast float %24, %3211
-  br label %3218
-
-; <label>:3216                                    ; preds = %3205
-  %3217 = fadd fast float %3211, %20
-  br label %3218
-
-; <label>:3218                                    ; preds = %3216, %3214, %3203, %3201, %3199
-  %3219 = phi float [ %3200, %3199 ], [ %3202, %3201 ], [ %3215, %3214 ], [ %3217, %3216 ], [ %2789, %3203 ]
-  %3220 = fptoui float %3219 to i32
-  %3221 = uitofp i32 %3220 to float
-  %3222 = uitofp i32 %3187 to float
-  %3223 = fptoui float %45 to i32
-  %3224 = fptoui float %178 to i32
-  %3225 = fptoui float %3221 to i32
-  %3226 = fptoui float %3222 to i32
-  %3227 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3228 = extractvalue %dx.types.CBufRet.i32 %3227, 0
-  %3229 = extractvalue %dx.types.CBufRet.i32 %3227, 1
-  %3230 = extractvalue %dx.types.CBufRet.i32 %3227, 2
-  %3231 = extractvalue %dx.types.CBufRet.i32 %3227, 3
-  %3232 = mul i32 %3228, %3223
-  %3233 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3224, i32 %3229, i32 %3232)  ; IMad(a,b,c)
-  %3234 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3225, i32 %3230, i32 %3233)  ; IMad(a,b,c)
-  %3235 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3226, i32 %3231, i32 %3234)  ; IMad(a,b,c)
-  %3236 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3235, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3237 = extractvalue %dx.types.ResRet.i32 %3236, 0
-  %3238 = extractvalue %dx.types.ResRet.i32 %3236, 1
-  %3239 = call double @dx.op.makeDouble.f64(i32 101, i32 %3237, i32 %3238)  ; MakeDouble(lo,hi)
-  %3240 = fptrunc double %3239 to float
-  br label %3241
-
-; <label>:3241                                    ; preds = %3218, %3152, %3121, %3102, %3092
-  %3242 = phi float [ %3118, %3102 ], [ 0.000000e+00, %3092 ], [ %3151, %3121 ], [ %3240, %3218 ], [ 0.000000e+00, %3152 ]
-  br i1 %973, label %3243, label %3270
-
-; <label>:3243                                    ; preds = %3241
-  %3244 = fcmp fast oge float %1428, 0.000000e+00
-  %3245 = fptoui float %1428 to i32
-  %3246 = icmp ult i32 %3245, %13
-  %3247 = and i1 %3244, %3246
-  %3248 = fcmp fast oge float %2789, 0.000000e+00
-  %3249 = and i1 %3248, %3247
-  %3250 = fptoui float %2789 to i32
-  %3251 = icmp ult i32 %3250, %15
-  %3252 = and i1 %3251, %3249
-  br i1 %3252, label %3253, label %3392
-
-; <label>:3253                                    ; preds = %3243
-  %3254 = fptoui float %45 to i32
-  %3255 = fptoui float %178 to i32
-  %3256 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3257 = extractvalue %dx.types.CBufRet.i32 %3256, 0
-  %3258 = extractvalue %dx.types.CBufRet.i32 %3256, 1
-  %3259 = extractvalue %dx.types.CBufRet.i32 %3256, 2
-  %3260 = extractvalue %dx.types.CBufRet.i32 %3256, 3
-  %3261 = mul i32 %3257, %3254
-  %3262 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3255, i32 %3258, i32 %3261)  ; IMad(a,b,c)
-  %3263 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3250, i32 %3259, i32 %3262)  ; IMad(a,b,c)
-  %3264 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3245, i32 %3260, i32 %3263)  ; IMad(a,b,c)
-  %3265 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3264, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3266 = extractvalue %dx.types.ResRet.i32 %3265, 0
-  %3267 = extractvalue %dx.types.ResRet.i32 %3265, 1
-  %3268 = call double @dx.op.makeDouble.f64(i32 101, i32 %3266, i32 %3267)  ; MakeDouble(lo,hi)
-  %3269 = fptrunc double %3268 to float
-  br label %3392
-
-; <label>:3270                                    ; preds = %3241
-  %3271 = icmp eq i32 %972, 1
-  br i1 %3271, label %3272, label %3303
-
-; <label>:3272                                    ; preds = %3270
-  %3273 = add i32 %13, -1
-  %3274 = uitofp i32 %3273 to float
-  %3275 = call float @dx.op.binary.f32(i32 35, float %1428, float 0.000000e+00)  ; FMax(a,b)
-  %3276 = call float @dx.op.binary.f32(i32 36, float %3275, float %3274)  ; FMin(a,b)
-  %3277 = fptoui float %3276 to i32
-  %3278 = add i32 %15, -1
-  %3279 = uitofp i32 %3278 to float
-  %3280 = call float @dx.op.binary.f32(i32 35, float %2789, float 0.000000e+00)  ; FMax(a,b)
-  %3281 = call float @dx.op.binary.f32(i32 36, float %3280, float %3279)  ; FMin(a,b)
-  %3282 = fptoui float %3281 to i32
-  %3283 = uitofp i32 %3282 to float
-  %3284 = uitofp i32 %3277 to float
-  %3285 = fptoui float %45 to i32
-  %3286 = fptoui float %178 to i32
-  %3287 = fptoui float %3283 to i32
-  %3288 = fptoui float %3284 to i32
-  %3289 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3290 = extractvalue %dx.types.CBufRet.i32 %3289, 0
-  %3291 = extractvalue %dx.types.CBufRet.i32 %3289, 1
-  %3292 = extractvalue %dx.types.CBufRet.i32 %3289, 2
-  %3293 = extractvalue %dx.types.CBufRet.i32 %3289, 3
-  %3294 = mul i32 %3290, %3285
-  %3295 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3286, i32 %3291, i32 %3294)  ; IMad(a,b,c)
-  %3296 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3287, i32 %3292, i32 %3295)  ; IMad(a,b,c)
-  %3297 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3288, i32 %3293, i32 %3296)  ; IMad(a,b,c)
-  %3298 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3297, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3299 = extractvalue %dx.types.ResRet.i32 %3298, 0
-  %3300 = extractvalue %dx.types.ResRet.i32 %3298, 1
-  %3301 = call double @dx.op.makeDouble.f64(i32 101, i32 %3299, i32 %3300)  ; MakeDouble(lo,hi)
-  %3302 = fptrunc double %3301 to float
-  br label %3392
-
-; <label>:3303                                    ; preds = %3270
-  %3304 = icmp eq i32 %972, 2
-  br i1 %3304, label %3305, label %3392
-
-; <label>:3305                                    ; preds = %3303
-  %3306 = fsub fast float %22, %20
-  %3307 = fcmp fast olt float %1428, %20
-  br i1 %3307, label %3308, label %3321
-
-; <label>:3308                                    ; preds = %3305
-  %3309 = fsub fast float %20, %1428
-  %3310 = fdiv fast float %3309, %3306
-  %3311 = fptoui float %3310 to i32
-  %3312 = uitofp i32 %3311 to float
-  %3313 = fmul fast float %3312, %3306
-  %3314 = fsub fast float %3309, %3313
-  %3315 = and i32 %3311, 1
-  %3316 = icmp eq i32 %3315, 0
-  br i1 %3316, label %3317, label %3319
-
-; <label>:3317                                    ; preds = %3308
-  %3318 = fadd fast float %3314, %20
-  br label %3336
-
-; <label>:3319                                    ; preds = %3308
-  %3320 = fsub fast float %22, %3314
-  br label %3336
-
-; <label>:3321                                    ; preds = %3305
-  %3322 = fcmp fast ogt float %1428, %22
-  br i1 %3322, label %3323, label %3336
-
-; <label>:3323                                    ; preds = %3321
-  %3324 = fsub fast float %1428, %22
-  %3325 = fdiv fast float %3324, %3306
-  %3326 = fptoui float %3325 to i32
-  %3327 = uitofp i32 %3326 to float
-  %3328 = fmul fast float %3327, %3306
-  %3329 = fsub fast float %3324, %3328
-  %3330 = and i32 %3326, 1
-  %3331 = icmp eq i32 %3330, 0
-  br i1 %3331, label %3332, label %3334
-
-; <label>:3332                                    ; preds = %3323
-  %3333 = fsub fast float %22, %3329
-  br label %3336
-
-; <label>:3334                                    ; preds = %3323
-  %3335 = fadd fast float %3329, %20
-  br label %3336
-
-; <label>:3336                                    ; preds = %3334, %3332, %3321, %3319, %3317
-  %3337 = phi float [ %3318, %3317 ], [ %3320, %3319 ], [ %3333, %3332 ], [ %3335, %3334 ], [ %1428, %3321 ]
-  %3338 = fptoui float %3337 to i32
-  %3339 = fsub fast float %24, %20
-  %3340 = fcmp fast olt float %2789, %20
-  br i1 %3340, label %3341, label %3354
-
-; <label>:3341                                    ; preds = %3336
-  %3342 = fsub fast float %20, %2789
-  %3343 = fdiv fast float %3342, %3339
-  %3344 = fptoui float %3343 to i32
-  %3345 = uitofp i32 %3344 to float
-  %3346 = fmul fast float %3345, %3339
-  %3347 = fsub fast float %3342, %3346
-  %3348 = and i32 %3344, 1
-  %3349 = icmp eq i32 %3348, 0
-  br i1 %3349, label %3350, label %3352
-
-; <label>:3350                                    ; preds = %3341
-  %3351 = fadd fast float %3347, %20
-  br label %3369
-
-; <label>:3352                                    ; preds = %3341
-  %3353 = fsub fast float %24, %3347
-  br label %3369
-
-; <label>:3354                                    ; preds = %3336
-  %3355 = fcmp fast ogt float %2789, %24
-  br i1 %3355, label %3356, label %3369
-
-; <label>:3356                                    ; preds = %3354
-  %3357 = fsub fast float %2789, %24
-  %3358 = fdiv fast float %3357, %3339
-  %3359 = fptoui float %3358 to i32
-  %3360 = uitofp i32 %3359 to float
-  %3361 = fmul fast float %3360, %3339
-  %3362 = fsub fast float %3357, %3361
-  %3363 = and i32 %3359, 1
-  %3364 = icmp eq i32 %3363, 0
-  br i1 %3364, label %3365, label %3367
-
-; <label>:3365                                    ; preds = %3356
-  %3366 = fsub fast float %24, %3362
-  br label %3369
-
-; <label>:3367                                    ; preds = %3356
-  %3368 = fadd fast float %3362, %20
-  br label %3369
-
-; <label>:3369                                    ; preds = %3367, %3365, %3354, %3352, %3350
-  %3370 = phi float [ %3351, %3350 ], [ %3353, %3352 ], [ %3366, %3365 ], [ %3368, %3367 ], [ %2789, %3354 ]
-  %3371 = fptoui float %3370 to i32
-  %3372 = uitofp i32 %3371 to float
-  %3373 = uitofp i32 %3338 to float
-  %3374 = fptoui float %45 to i32
-  %3375 = fptoui float %178 to i32
-  %3376 = fptoui float %3372 to i32
-  %3377 = fptoui float %3373 to i32
-  %3378 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3379 = extractvalue %dx.types.CBufRet.i32 %3378, 0
-  %3380 = extractvalue %dx.types.CBufRet.i32 %3378, 1
-  %3381 = extractvalue %dx.types.CBufRet.i32 %3378, 2
-  %3382 = extractvalue %dx.types.CBufRet.i32 %3378, 3
-  %3383 = mul i32 %3379, %3374
-  %3384 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3375, i32 %3380, i32 %3383)  ; IMad(a,b,c)
-  %3385 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3376, i32 %3381, i32 %3384)  ; IMad(a,b,c)
-  %3386 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3377, i32 %3382, i32 %3385)  ; IMad(a,b,c)
-  %3387 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3386, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3388 = extractvalue %dx.types.ResRet.i32 %3387, 0
-  %3389 = extractvalue %dx.types.ResRet.i32 %3387, 1
-  %3390 = call double @dx.op.makeDouble.f64(i32 101, i32 %3388, i32 %3389)  ; MakeDouble(lo,hi)
-  %3391 = fptrunc double %3390 to float
-  br label %3392
-
-; <label>:3392                                    ; preds = %3369, %3303, %3272, %3253, %3243
-  %3393 = phi float [ %3269, %3253 ], [ 0.000000e+00, %3243 ], [ %3302, %3272 ], [ %3391, %3369 ], [ 0.000000e+00, %3303 ]
-  %3394 = call float @dx.op.unary.f32(i32 22, float %176)  ; Frc(value)
-  %3395 = call float @dx.op.unary.f32(i32 22, float %177)  ; Frc(value)
-  %3396 = fmul fast float %3395, %3395
-  %3397 = fmul fast float %3396, %3395
-  %3398 = fmul fast float %1124, -7.500000e-01
-  %3399 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2335, float %3398)  ; FMad(a,b,c)
-  %3400 = fmul fast float %1124, 1.500000e+00
-  %3401 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1730, float %3400)  ; FMad(a,b,c)
-  %3402 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2335, float %3401)  ; FMad(a,b,c)
-  %3403 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %2940, float %3402)  ; FMad(a,b,c)
-  %3404 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1730, float %3398)  ; FMad(a,b,c)
-  %3405 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2335, float %3404)  ; FMad(a,b,c)
-  %3406 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2940, float %3405)  ; FMad(a,b,c)
-  %3407 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3395, float %3396, float %3397, float %1730, float %3399, float %3403, float %3406)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3408 = fmul fast float %1275, -7.500000e-01
-  %3409 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2486, float %3408)  ; FMad(a,b,c)
-  %3410 = fmul fast float %1275, 1.500000e+00
-  %3411 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1881, float %3410)  ; FMad(a,b,c)
-  %3412 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2486, float %3411)  ; FMad(a,b,c)
-  %3413 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3091, float %3412)  ; FMad(a,b,c)
-  %3414 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1881, float %3408)  ; FMad(a,b,c)
-  %3415 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2486, float %3414)  ; FMad(a,b,c)
-  %3416 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3091, float %3415)  ; FMad(a,b,c)
-  %3417 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3395, float %3396, float %3397, float %1881, float %3409, float %3413, float %3416)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3418 = fmul fast float %1427, -7.500000e-01
-  %3419 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2637, float %3418)  ; FMad(a,b,c)
-  %3420 = fmul fast float %1427, 1.500000e+00
-  %3421 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %2032, float %3420)  ; FMad(a,b,c)
-  %3422 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2637, float %3421)  ; FMad(a,b,c)
-  %3423 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3242, float %3422)  ; FMad(a,b,c)
-  %3424 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %2032, float %3418)  ; FMad(a,b,c)
-  %3425 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2637, float %3424)  ; FMad(a,b,c)
-  %3426 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3242, float %3425)  ; FMad(a,b,c)
-  %3427 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3395, float %3396, float %3397, float %2032, float %3419, float %3423, float %3426)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3428 = fmul fast float %1579, -7.500000e-01
-  %3429 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2788, float %3428)  ; FMad(a,b,c)
-  %3430 = fmul fast float %1579, 1.500000e+00
-  %3431 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %2183, float %3430)  ; FMad(a,b,c)
-  %3432 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2788, float %3431)  ; FMad(a,b,c)
-  %3433 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3393, float %3432)  ; FMad(a,b,c)
-  %3434 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %2183, float %3428)  ; FMad(a,b,c)
-  %3435 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2788, float %3434)  ; FMad(a,b,c)
-  %3436 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3393, float %3435)  ; FMad(a,b,c)
-  %3437 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3395, float %3396, float %3397, float %2183, float %3429, float %3433, float %3436)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3438 = fmul fast float %3394, %3394
-  %3439 = fmul fast float %3438, %3394
-  %3440 = fmul fast float %3407, -7.500000e-01
-  %3441 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3427, float %3440)  ; FMad(a,b,c)
-  %3442 = fmul fast float %3407, 1.500000e+00
-  %3443 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %3417, float %3442)  ; FMad(a,b,c)
-  %3444 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %3427, float %3443)  ; FMad(a,b,c)
-  %3445 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3437, float %3444)  ; FMad(a,b,c)
-  %3446 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %3417, float %3440)  ; FMad(a,b,c)
-  %3447 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %3427, float %3446)  ; FMad(a,b,c)
-  %3448 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3437, float %3447)  ; FMad(a,b,c)
-  %3449 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3394, float %3438, float %3439, float %3417, float %3441, float %3445, float %3448)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3450 = fpext float %3449 to double
-  %3451 = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double %3450)  ; SplitDouble(value)
-  %3452 = extractvalue %dx.types.splitdouble %3451, 0
-  %3453 = extractvalue %dx.types.splitdouble %3451, 1
-  call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i32 %3452, i32 %3453, i32 undef, i32 undef, i8 3, i32 8)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3454
-
-; <label>:3454                                    ; preds = %3392, %965, %948, %331, %0
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.threadId.i32(i32, i32) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.unary.f32(i32, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.tertiary.f32(i32, float, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.binary.f32(i32, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.dot4.f32(i32, float, float, float, float, float, float, float, float) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32, %dx.types.Handle, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.tertiary.i32(i32, i32, i32, i32) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32, %dx.types.Handle, i32) #1
-
-; Function Attrs: nounwind readonly
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32, %dx.types.Handle, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind readnone
-declare double @dx.op.makeDouble.f64(i32, i32, i32) #0
-
-; Function Attrs: nounwind
-declare void @dx.op.rawBufferStore.i32(i32, %dx.types.Handle, i32, i32, i32, i32, i32, i32, i8, i32) #2
-
-; Function Attrs: nounwind readnone
-declare %dx.types.splitdouble @dx.op.splitDouble.f64(i32, double) #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.valver = !{!2}
-!dx.shaderModel = !{!3}
-!dx.resources = !{!4}
-!dx.entryPoints = !{!12}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 1, i32 2}
-!2 = !{i32 1, i32 6}
-!3 = !{!"cs", i32 6, i32 2}
-!4 = !{null, !5, !10, null}
-!5 = !{!6, !8, !9}
-!6 = !{i32 0, %"class.RWStructuredBuffer<double>"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !7}
-!7 = !{i32 1, i32 8}
-!8 = !{i32 1, %"class.RWStructuredBuffer<half>"* undef, !"", i32 0, i32 1, i32 1, i32 12, i1 false, i1 false, i1 false, !1}
-!9 = !{i32 2, %"class.RWStructuredBuffer<double>"* undef, !"", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !7}
-!10 = !{!11}
-!11 = !{i32 0, %Constants* undef, !"", i32 0, i32 0, i32 1, i32 116, null}
-!12 = !{void ()* @GridSample, !"GridSample", null, !4, !13}
-!13 = !{i32 0, i64 8388660, i32 4, !14}
-!14 = !{i32 64, i32 1, i32 1}
-
-#endif
-
-const unsigned char g_GridSample[] = {
-  0x44, 0x58, 0x42, 0x43, 0x63, 0x6a, 0x57, 0xa4, 0x27, 0x92, 0x85, 0x76,
-  0xa9, 0xce, 0x08, 0xff, 0x27, 0x96, 0x13, 0x79, 0x01, 0x00, 0x00, 0x00,
-  0x30, 0x59, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
-  0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
-  0x18, 0x01, 0x00, 0x00, 0x34, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30,
-  0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30,
-  0xa8, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-  0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0xc8, 0x84, 0x86, 0x8e, 0x50, 0xa9, 0x3c, 0x23,
-  0x89, 0xe0, 0x3b, 0x59, 0x5b, 0xb8, 0x89, 0x39, 0x44, 0x58, 0x49, 0x4c,
-  0xf4, 0x57, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0xfd, 0x15, 0x00, 0x00,
-  0x44, 0x58, 0x49, 0x4c, 0x02, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-  0xdc, 0x57, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00,
-  0xf4, 0x15, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
-  0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49,
-  0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19,
-  0x1e, 0x04, 0x8b, 0x62, 0x80, 0x18, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42,
-  0xc4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x62, 0x88,
-  0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42,
-  0xe4, 0x48, 0x0e, 0x90, 0x11, 0x23, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c,
-  0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x31, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00,
-  0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07,
-  0x40, 0x02, 0xa8, 0x0d, 0x86, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20,
-  0x01, 0xd5, 0x06, 0x62, 0xf8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x90, 0x00,
-  0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42,
-  0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00,
-  0x5d, 0x00, 0x00, 0x00, 0x32, 0x22, 0x88, 0x09, 0x20, 0x64, 0x85, 0x04,
-  0x13, 0x23, 0xa4, 0x84, 0x04, 0x13, 0x23, 0xe3, 0x84, 0xa1, 0x90, 0x14,
-  0x12, 0x4c, 0x8c, 0x8c, 0x0b, 0x84, 0xc4, 0x4c, 0x10, 0xcc, 0xc1, 0x08,
-  0x40, 0x09, 0x00, 0x0a, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0xc3, 0x30, 0x10,
-  0x31, 0x03, 0x50, 0x06, 0x63, 0x30, 0xe8, 0x28, 0x85, 0x31, 0x18, 0x86,
-  0x41, 0x49, 0x21, 0x8c, 0xc1, 0x30, 0x68, 0x29, 0x8a, 0x31, 0x18, 0x86,
-  0x61, 0x18, 0x86, 0x61, 0x50, 0x33, 0x0a, 0x70, 0xd3, 0x70, 0xf9, 0x13,
-  0xf6, 0x10, 0x92, 0xbf, 0x12, 0xd2, 0x4a, 0x4c, 0x7e, 0x51, 0xeb, 0xa8,
-  0x78, 0x9e, 0xe7, 0x19, 0xe6, 0x08, 0x10, 0x8a, 0xee, 0x19, 0x2e, 0x7f,
-  0xc2, 0x1e, 0x42, 0xf2, 0x43, 0xa0, 0x19, 0x16, 0x02, 0x05, 0x52, 0x39,
-  0xa0, 0x61, 0x1a, 0x86, 0x68, 0x20, 0xaa, 0x14, 0xc3, 0x30, 0x0c, 0x03,
-  0x59, 0x47, 0x0d, 0x97, 0x3f, 0x61, 0x0f, 0x21, 0xf9, 0xdc, 0x46, 0x15,
-  0x2b, 0x31, 0xf9, 0xc8, 0x6d, 0x23, 0x62, 0x18, 0x86, 0xa1, 0x10, 0xd8,
-  0x30, 0x0d, 0x94, 0xcd, 0x11, 0x04, 0xc5, 0x98, 0x86, 0x68, 0x18, 0x36,
-  0xe2, 0x6e, 0x1a, 0x2e, 0x7f, 0xc2, 0x1e, 0x42, 0xf2, 0x57, 0x42, 0x5a,
-  0x89, 0xc9, 0x47, 0x6e, 0x1b, 0x15, 0xc3, 0x30, 0x0c, 0x43, 0x39, 0xbc,
-  0x61, 0x1a, 0x86, 0x68, 0xa0, 0x6f, 0x08, 0xa0, 0x10, 0xe1, 0x30, 0x0c,
-  0x24, 0x96, 0x05, 0x18, 0xa6, 0x61, 0x18, 0x86, 0x61, 0x88, 0x06, 0x22,
-  0x8f, 0x1a, 0x2e, 0x7f, 0xc2, 0x1e, 0x42, 0xf2, 0xa5, 0x67, 0x41, 0xa6,
-  0xc1, 0xa1, 0x82, 0x85, 0x40, 0xc2, 0x30, 0x94, 0xc1, 0x1c, 0x42, 0x3a,
-  0x07, 0x02, 0x66, 0x02, 0x83, 0x71, 0x60, 0x87, 0x70, 0x98, 0x87, 0x79,
-  0x70, 0x03, 0x59, 0xb8, 0x85, 0x59, 0xa0, 0x07, 0x79, 0xa8, 0x87, 0x71,
-  0xa0, 0x87, 0x7a, 0x90, 0x87, 0x72, 0x20, 0x07, 0x51, 0xa8, 0x07, 0x73,
-  0x30, 0x87, 0x72, 0x90, 0x07, 0x3e, 0x20, 0x87, 0x77, 0xa8, 0x07, 0x71,
-  0x60, 0x87, 0x72, 0xf0, 0x03, 0x14, 0x84, 0xa4, 0xce, 0xe4, 0x8d, 0x03,
-  0x3b, 0x84, 0xc3, 0x3c, 0xcc, 0x83, 0x1b, 0xc8, 0xc2, 0x2d, 0xcc, 0x02,
-  0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xd4, 0x83, 0x3c, 0x94, 0x03,
-  0x39, 0x88, 0x42, 0x3d, 0x98, 0x83, 0x39, 0x94, 0x83, 0x3c, 0xf0, 0x01,
-  0x3a, 0x84, 0x03, 0x3b, 0x98, 0x83, 0x1f, 0xa0, 0xe0, 0x21, 0x76, 0x18,
-  0x81, 0x18, 0x2e, 0xe1, 0x9c, 0x46, 0x9a, 0x80, 0x66, 0x92, 0xd0, 0x32,
-  0x0c, 0xc3, 0xe0, 0xba, 0xae, 0xeb, 0xba, 0x03, 0xbd, 0x73, 0x04, 0xa0,
-  0x30, 0x05, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87,
-  0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xae, 0x50,
-  0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30,
-  0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0,
-  0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x78, 0xd0,
-  0x06, 0xe9, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x6d, 0x90,
-  0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x60,
-  0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d, 0x60, 0x0e, 0x71, 0x60,
-  0x07, 0x7a, 0x10, 0x07, 0x76, 0xd0, 0x06, 0xe6, 0x30, 0x07, 0x72, 0xa0,
-  0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60,
-  0x07, 0x74, 0xd0, 0x06, 0xee, 0x80, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xa0,
-  0x07, 0x73, 0x20, 0x07, 0x7a, 0x60, 0x07, 0x74, 0x30, 0xe4, 0x09, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0x43,
-  0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90,
-  0xe7, 0x00, 0x02, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-  0x21, 0x4f, 0x02, 0x04, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x43, 0x9e, 0x05, 0x08, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x86, 0x3c, 0x0d, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x0c, 0x79, 0x28, 0x20, 0x00, 0x04, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x18, 0xf2, 0x58, 0x40, 0x00, 0x04, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xe4, 0xc9, 0x80, 0x00, 0x10, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0xc3, 0x01, 0x01, 0x20,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90, 0xe7, 0x03, 0x02,
-  0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x21, 0x8f, 0x18,
-  0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90,
-  0x87, 0x0c, 0x80, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x60, 0xc8, 0x73, 0x06, 0x40, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x90, 0x05, 0x02, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
-  0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47,
-  0xc6, 0x04, 0x43, 0x1a, 0x4a, 0xa0, 0x08, 0x8a, 0x61, 0x04, 0xa0, 0x30,
-  0x0a, 0xa2, 0xd0, 0x03, 0x0a, 0xa1, 0x00, 0x03, 0x68, 0x1b, 0x01, 0xa0,
-  0xb8, 0xd0, 0x01, 0x01, 0x11, 0xa8, 0x9d, 0x01, 0x20, 0x78, 0x06, 0x80,
-  0xd6, 0x19, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00,
-  0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b,
-  0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99,
-  0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62,
-  0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73,
-  0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84, 0xc1, 0x99, 0x20, 0x0c,
-  0xcf, 0x06, 0x61, 0x20, 0x26, 0x08, 0x03, 0xb4, 0x41, 0x18, 0x0c, 0x0a,
-  0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08, 0x43, 0x34, 0x41, 0x58,
-  0x83, 0x8d, 0xc0, 0x04, 0x61, 0x90, 0x26, 0x08, 0xdb, 0x35, 0x41, 0x18,
-  0xa6, 0x0d, 0xc2, 0xf0, 0x6c, 0x58, 0x94, 0x85, 0x51, 0x94, 0xa1, 0x71,
-  0x1c, 0x07, 0x9a, 0x20, 0xb4, 0x41, 0xb6, 0x61, 0x19, 0x24, 0x46, 0x19,
-  0x86, 0xc6, 0x71, 0x9c, 0x62, 0xc3, 0x42, 0x2c, 0x8c, 0x42, 0x0c, 0x8d,
-  0xe3, 0x38, 0xd0, 0x86, 0x21, 0x9a, 0xa8, 0x09, 0x02, 0x1c, 0x68, 0x13,
-  0x84, 0x81, 0xda, 0x80, 0x28, 0x16, 0xa3, 0x28, 0xc3, 0x05, 0x6c, 0x08,
-  0xb0, 0x0d, 0x04, 0x50, 0x65, 0xc0, 0x04, 0x41, 0x00, 0xa8, 0x1c, 0xc9,
-  0xa5, 0x91, 0x4d, 0x85, 0xb5, 0xc1, 0xb1, 0x95, 0x4d, 0x10, 0xe2, 0x00,
-  0x9b, 0x20, 0x0c, 0xd5, 0x04, 0x61, 0xb0, 0x36, 0x0c, 0xdf, 0x30, 0x6c,
-  0x20, 0x94, 0xce, 0x03, 0x83, 0x0d, 0xc5, 0xc6, 0x01, 0x5a, 0x18, 0x54,
-  0x61, 0x63, 0xb3, 0x6b, 0x73, 0x49, 0x23, 0x2b, 0x73, 0xa3, 0x9b, 0x12,
-  0x04, 0x55, 0xc8, 0xf0, 0x5c, 0xec, 0xca, 0xe4, 0xe6, 0xd2, 0xde, 0xdc,
-  0xa6, 0x04, 0x44, 0x13, 0x32, 0x3c, 0x17, 0xbb, 0x30, 0x36, 0xbb, 0x32,
-  0xb9, 0x29, 0x81, 0x51, 0x87, 0x0c, 0xcf, 0x65, 0x0e, 0x2d, 0x8c, 0xac,
-  0x4c, 0xae, 0xe9, 0x8d, 0xac, 0x8c, 0x6d, 0x4a, 0x80, 0x94, 0x21, 0xc3,
-  0x73, 0x91, 0x2b, 0x9b, 0x7b, 0xab, 0x93, 0x1b, 0x2b, 0x9b, 0x9b, 0x12,
-  0x64, 0x75, 0xc8, 0xf0, 0x5c, 0xca, 0xdc, 0xe8, 0xe4, 0xf2, 0xa0, 0xde,
-  0xd2, 0xdc, 0xe8, 0xe6, 0xa6, 0x04, 0x61, 0x00, 0x79, 0x18, 0x00, 0x00,
-  0x51, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66,
-  0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07,
-  0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10,
-  0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce,
-  0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b,
-  0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c,
-  0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07,
-  0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11,
-  0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0,
-  0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8,
-  0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b,
-  0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b,
-  0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87,
-  0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07,
-  0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87,
-  0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81,
-  0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30,
-  0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4,
-  0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca,
-  0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39,
-  0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b,
-  0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b,
-  0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87,
-  0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20,
-  0x0e, 0xe7, 0xe0, 0x06, 0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90,
-  0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x30, 0x83, 0x81, 0xc8, 0x01, 0x1f, 0xdc,
-  0x40, 0x1c, 0xe4, 0xa1, 0x1c, 0xc2, 0x61, 0x1d, 0xdc, 0x40, 0x1c, 0xe4,
-  0x01, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00, 0x3a, 0x00, 0x00, 0x00,
-  0x06, 0xa0, 0x80, 0x11, 0x32, 0xb0, 0x00, 0xf3, 0x2c, 0x84, 0x11, 0x40,
-  0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x20, 0x0d, 0x10, 0x61, 0x7e, 0x71, 0xdb,
-  0x86, 0xb0, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10, 0x50, 0x45, 0x41, 0x44,
-  0xa5, 0x03, 0x0c, 0x25, 0x61, 0x00, 0x02, 0xe6, 0x23, 0xb7, 0x6d, 0x09,
-  0xd2, 0x70, 0xf9, 0xce, 0xe3, 0x0b, 0x11, 0x01, 0x4c, 0x44, 0x08, 0x34,
-  0xc3, 0x42, 0x58, 0x81, 0x33, 0x5c, 0xbe, 0xf3, 0xf8, 0x83, 0x33, 0xe1,
-  0x7e, 0x71, 0xdb, 0xb6, 0x40, 0x0d, 0x97, 0xef, 0x3c, 0x3e, 0x03, 0x28,
-  0x44, 0xe7, 0x50, 0xc1, 0x42, 0xf8, 0x85, 0x8e, 0x9b, 0xc1, 0x35, 0x5c,
-  0xbe, 0xf3, 0xf8, 0x11, 0x60, 0x6d, 0x54, 0x51, 0x10, 0x51, 0xe9, 0x00,
-  0x83, 0x5f, 0xd4, 0xba, 0x29, 0x5c, 0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x01,
-  0xd6, 0x46, 0x15, 0x05, 0x11, 0x95, 0x0e, 0x30, 0xf8, 0xc8, 0x6d, 0x1b,
-  0x03, 0x36, 0x5c, 0xbe, 0xf3, 0xf8, 0x11, 0x60, 0x6d, 0x54, 0x51, 0x10,
-  0x11, 0x3b, 0x39, 0x11, 0xe1, 0x23, 0xb7, 0x6d, 0x0d, 0xd5, 0x70, 0xf9,
-  0xce, 0xe3, 0x4b, 0xcf, 0x82, 0x4c, 0x9d, 0x43, 0x05, 0x0b, 0xe1, 0x17,
-  0x3a, 0x6e, 0x03, 0xd2, 0x70, 0xf9, 0xce, 0xe3, 0x4f, 0x44, 0x34, 0x21,
-  0x40, 0x84, 0xf9, 0xc5, 0x6d, 0xdb, 0x81, 0x34, 0x5c, 0xbe, 0xf3, 0xf8,
-  0x13, 0x11, 0x4d, 0x08, 0x10, 0x61, 0x3e, 0x72, 0xdb, 0x16, 0x20, 0x0d,
-  0x97, 0xef, 0x3c, 0xfe, 0x74, 0x44, 0x04, 0x30, 0x88, 0x83, 0x8f, 0xdc,
-  0xb6, 0x09, 0x3c, 0xc3, 0xe5, 0x3b, 0x8f, 0x4f, 0x35, 0x40, 0x84, 0xf9,
-  0xc5, 0x6d, 0x03, 0x00, 0x61, 0x20, 0x00, 0x00, 0x32, 0x14, 0x00, 0x00,
-  0x13, 0x04, 0x24, 0x14, 0x0b, 0x04, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00,
-  0x34, 0x14, 0x58, 0xd9, 0x95, 0xa5, 0x40, 0x29, 0x07, 0xd4, 0x40, 0x19,
-  0x15, 0x52, 0x71, 0x15, 0xdc, 0x0c, 0x40, 0xc9, 0x95, 0x4d, 0xb1, 0x14,
-  0x73, 0x40, 0x61, 0x0a, 0x14, 0x4d, 0xe9, 0x06, 0x94, 0x43, 0x29, 0x50,
-  0x54, 0x04, 0x25, 0x50, 0x06, 0x64, 0x8c, 0x11, 0x80, 0x20, 0x08, 0xd2,
-  0xdf, 0x18, 0x01, 0x08, 0x82, 0x20, 0xff, 0x8d, 0x11, 0x80, 0x20, 0x08,
-  0xe2, 0xbf, 0x30, 0x46, 0x00, 0x82, 0x20, 0x18, 0x82, 0xc3, 0x18, 0x01,
-  0x08, 0x82, 0xa0, 0xfe, 0x8d, 0x11, 0x80, 0x20, 0x08, 0xea, 0xbf, 0x30,
-  0x46, 0x00, 0x82, 0x20, 0x08, 0x7f, 0x63, 0x04, 0x20, 0x08, 0x82, 0xf0,
-  0x2f, 0x8c, 0x11, 0x80, 0x20, 0x08, 0x82, 0x60, 0x30, 0x46, 0x00, 0x82,
-  0x20, 0x48, 0xff, 0xc2, 0x18, 0x01, 0x08, 0x82, 0x20, 0xfe, 0x8d, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0xc0, 0xd5,
-  0x01, 0xe6, 0xbc, 0xc1, 0x1b, 0x9c, 0xc1, 0x88, 0x41, 0x02, 0x80, 0x20,
-  0x18, 0x70, 0x76, 0x90, 0x3d, 0x71, 0x10, 0x07, 0x68, 0x30, 0x62, 0x90,
-  0x00, 0x20, 0x08, 0x06, 0xdc, 0x1d, 0x68, 0xd0, 0x1b, 0xbc, 0x41, 0x1a,
-  0x8c, 0x18, 0x24, 0x00, 0x08, 0x82, 0x01, 0x87, 0x07, 0x9b, 0x04, 0x07,
-  0x70, 0xa0, 0x06, 0x23, 0x06, 0x06, 0x00, 0x82, 0x60, 0x40, 0x84, 0x82,
-  0x15, 0x07, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x90, 0xed, 0x81, 0x19,
-  0x08, 0x72, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c, 0x30, 0x9a, 0x30, 0x04,
-  0xc3, 0x0d, 0x42, 0x40, 0x06, 0xb3, 0x0c, 0xc1, 0x08, 0x05, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x90, 0x81, 0xc2, 0x1a, 0x1c, 0x7a, 0x30, 0x9a,
-  0x10, 0x0c, 0x17, 0x18, 0x35, 0x9a, 0x30, 0x08, 0x17, 0x18, 0x35, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x59, 0x29, 0xc0, 0x01, 0x03, 0x06, 0xa3,
-  0x09, 0x01, 0x30, 0xdc, 0x10, 0xf8, 0x01, 0x18, 0x4c, 0x37, 0x5c, 0x53,
-  0x30, 0xdd, 0x80, 0x75, 0x42, 0x21, 0x01, 0x4c, 0x37, 0x68, 0x1f, 0x51,
-  0x48, 0x00, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x90, 0xb5, 0x02, 0x1e,
-  0x50, 0x67, 0x30, 0x9a, 0x10, 0x04, 0xa3, 0x09, 0x82, 0x30, 0x9a, 0x30,
-  0x0c, 0x15, 0x08, 0x52, 0x03, 0x21, 0x15, 0x0c, 0x52, 0x57, 0x30, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0x90, 0xd5, 0x02, 0x28, 0x70, 0xad, 0x30,
-  0x9a, 0x10, 0x00, 0x15, 0x0c, 0x52, 0x5b, 0x10, 0x15, 0x20, 0x33, 0x9a,
-  0x50, 0x04, 0x15, 0x08, 0x52, 0x44, 0x10, 0x15, 0x34, 0x33, 0x9a, 0x90,
-  0x08, 0x15, 0x08, 0x52, 0x44, 0x10, 0xd7, 0x18, 0x75, 0x85, 0x51, 0x37,
-  0x18, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x19, 0x39, 0xbc, 0xc2,
-  0x1a, 0xdc, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2,
-  0x20, 0x8c, 0x26, 0x10, 0xc3, 0x11, 0x46, 0x1d, 0x61, 0xd4, 0x11, 0x46,
-  0x1d, 0x61, 0xd4, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x34, 0xf0, 0x10,
-  0x0b, 0xcc, 0xa2, 0xe0, 0x01, 0x31, 0x08, 0x81, 0x09, 0x01, 0x7c, 0x4e,
-  0x18, 0x66, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x28, 0x79, 0xd0, 0x85,
-  0x3c, 0x08, 0xce, 0x01, 0x15, 0xd2, 0x61, 0x34, 0x21, 0x00, 0x2e, 0x30,
-  0xf0, 0x8a, 0x61, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x80, 0xb2, 0x07,
-  0x5f, 0xe8, 0x83, 0x60, 0x1d, 0x58, 0xa1, 0x1d, 0x46, 0x13, 0x02, 0xe0,
-  0x02, 0x03, 0x87, 0x1b, 0xe2, 0x20, 0x1e, 0xc0, 0xc0, 0x0c, 0x52, 0x80,
-  0x8f, 0x0d, 0xa5, 0x00, 0x9f, 0x59, 0x06, 0x61, 0x18, 0x4c, 0x50, 0x05,
-  0xf9, 0x98, 0xb0, 0x0a, 0xf2, 0x31, 0x3d, 0x80, 0x05, 0xf8, 0x58, 0x1e,
-  0xc4, 0x02, 0x7c, 0x8c, 0x10, 0xe4, 0x63, 0x84, 0x20, 0x9f, 0x59, 0x02,
-  0xc2, 0xfc, 0x00, 0x91, 0x8f, 0x21, 0x7d, 0x20, 0x1f, 0x13, 0x6c, 0x01,
-  0x3e, 0x26, 0xdc, 0x02, 0x7c, 0x4c, 0xa0, 0x05, 0xf9, 0x98, 0x50, 0x0b,
-  0xf2, 0x99, 0x25, 0x20, 0x06, 0x2a, 0x0c, 0x48, 0x20, 0x86, 0x81, 0x0a,
-  0x03, 0x12, 0x88, 0x61, 0x34, 0xa1, 0x15, 0x84, 0xe1, 0x86, 0x60, 0x24,
-  0xc0, 0x60, 0x96, 0xa1, 0x30, 0x82, 0x11, 0x03, 0x03, 0x00, 0x41, 0x30,
-  0x38, 0x60, 0xe2, 0x1c, 0x88, 0x11, 0x03, 0x03, 0x00, 0x41, 0x30, 0x38,
-  0x62, 0x02, 0x1d, 0x88, 0x59, 0x02, 0x63, 0xa0, 0xc2, 0x20, 0x0a, 0x86,
-  0x18, 0xa8, 0x30, 0x88, 0x82, 0x21, 0x86, 0x23, 0x04, 0x53, 0x20, 0xbe,
-  0xe1, 0x88, 0xa1, 0x14, 0x84, 0xaf, 0x84, 0x60, 0x87, 0x23, 0x88, 0x54,
-  0x20, 0xbe, 0x12, 0x82, 0x1d, 0x8e, 0x30, 0x4e, 0x41, 0xf8, 0x2a, 0x10,
-  0x76, 0x96, 0xe1, 0xd0, 0x82, 0xd1, 0x04, 0x5d, 0x18, 0x86, 0x1b, 0x02,
-  0x98, 0x00, 0x83, 0x59, 0x06, 0x24, 0x09, 0xca, 0x16, 0xfe, 0x01, 0x2e,
-  0x30, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x96, 0x9e, 0x00, 0x89,
-  0x86, 0x1c, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xf1, 0x09, 0x90,
-  0x08, 0x84, 0xc2, 0x85, 0x91, 0x80, 0x0b, 0x8c, 0x1a, 0x31, 0x38, 0x00,
-  0x10, 0x04, 0x83, 0x25, 0x2c, 0x48, 0x02, 0x42, 0x87, 0x11, 0x83, 0x03,
-  0x00, 0x41, 0x30, 0x58, 0xc4, 0x82, 0x24, 0x02, 0x61, 0x96, 0x40, 0x1b,
-  0x6e, 0x50, 0x70, 0x02, 0x0c, 0x66, 0x19, 0x14, 0x2d, 0x30, 0x5b, 0xc0,
-  0x85, 0xf8, 0xcc, 0x32, 0x2c, 0xce, 0x64, 0xb9, 0x50, 0xc5, 0xc7, 0x02,
-  0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x16, 0x14, 0xf2, 0xb1, 0x22,
-  0x88, 0x4f, 0x11, 0x61, 0xa1, 0xc3, 0x0d, 0xc1, 0x4f, 0x80, 0xc1, 0x2c,
-  0x03, 0xd3, 0x04, 0x36, 0x84, 0x03, 0x7c, 0x66, 0x09, 0x24, 0x03, 0x07,
-  0x22, 0x3e, 0xb3, 0x04, 0xd2, 0x2c, 0xc3, 0x23, 0x71, 0xf6, 0x85, 0x43,
-  0x7c, 0x2c, 0x60, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65, 0xc1, 0x23,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xb1, 0x16, 0x3a, 0xdc, 0x10, 0xa4, 0x05,
-  0x18, 0xcc, 0x32, 0x40, 0x51, 0x60, 0xe9, 0x30, 0xc4, 0x67, 0x96, 0x40,
-  0x32, 0x82, 0x1d, 0xe0, 0x33, 0x4b, 0x20, 0x0d, 0xb4, 0x18, 0x18, 0x63,
-  0x35, 0x04, 0x24, 0x44, 0xb2, 0xe0, 0x98, 0x3a, 0xb8, 0x43, 0x7c, 0x66,
-  0x19, 0x26, 0xcb, 0x0c, 0xec, 0x1d, 0xd4, 0x20, 0x3e, 0x16, 0x08, 0xf4,
-  0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0xa0, 0x90, 0x8f, 0x15, 0x41, 0x7c,
-  0x8a, 0xb8, 0x0b, 0x1d, 0x6e, 0x08, 0xea, 0x02, 0x0c, 0x66, 0x19, 0xa8,
-  0x2a, 0xb0, 0xe1, 0x1e, 0xe0, 0x33, 0x4b, 0xa0, 0x19, 0x3d, 0x10, 0xf1,
-  0x99, 0x25, 0xd0, 0x66, 0x19, 0x2e, 0xcd, 0x0d, 0x8c, 0x0e, 0xea, 0x21,
-  0x3e, 0x16, 0x30, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0xe0, 0x91,
-  0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x08, 0x0d, 0x1d, 0x6e, 0x08, 0xfe, 0x02,
-  0x0c, 0x66, 0x19, 0xb0, 0x2c, 0xb0, 0x7e, 0x18, 0xe2, 0x33, 0x4b, 0xa0,
-  0x19, 0x21, 0x12, 0xf0, 0x99, 0x25, 0xd0, 0x06, 0x8a, 0x0c, 0x71, 0x40,
-  0xfc, 0x21, 0xf1, 0x07, 0x83, 0x0d, 0x32, 0x36, 0xc0, 0xd8, 0xc0, 0x62,
-  0x83, 0x8a, 0x0d, 0xa8, 0x81, 0x22, 0x83, 0x17, 0x10, 0x7f, 0x48, 0xfc,
-  0xc1, 0x20, 0x32, 0x03, 0xf3, 0x07, 0x0b, 0xab, 0x34, 0xea, 0xe8, 0xc1,
-  0xa8, 0x59, 0x86, 0x6d, 0x0e, 0x4a, 0x61, 0x34, 0x61, 0x26, 0x86, 0xe1,
-  0x86, 0xc0, 0x34, 0xc0, 0x60, 0x96, 0x81, 0xf3, 0x82, 0xe1, 0x88, 0x62,
-  0x27, 0x86, 0xef, 0x8c, 0x61, 0x86, 0x1b, 0x82, 0x98, 0x20, 0x83, 0x1a,
-  0x02, 0x1d, 0x8e, 0x40, 0x7e, 0x62, 0xf8, 0x2a, 0x10, 0xf4, 0x94, 0x61,
-  0x86, 0x1b, 0x02, 0x9a, 0x20, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0xe8, 0xe4,
-  0x20, 0x38, 0x7c, 0x18, 0xe6, 0x9a, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x20, 0xdb, 0x0d, 0xd3, 0x10, 0x0b, 0xda, 0x18, 0x4d, 0x08, 0x80,
-  0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2,
-  0x90, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb0, 0xc4, 0xa3, 0x35, 0x0e,
-  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x6b, 0x3c, 0x5c, 0x83,
-  0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb0, 0xc8, 0xe3, 0x35,
-  0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x4f, 0x3c, 0x5c,
-  0x83, 0x2d, 0x82, 0xdd, 0xd8, 0x8b, 0xdc, 0x18, 0x4d, 0x08, 0x80, 0xd1,
-  0x04, 0x21, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x31, 0x20, 0x0f,
-  0xd8, 0x10, 0x82, 0x0b, 0x8c, 0x9b, 0x25, 0x90, 0x83, 0xe1, 0x86, 0x6d,
-  0x3c, 0xc0, 0x60, 0x96, 0xe1, 0x03, 0x83, 0xa0, 0xd2, 0x42, 0x36, 0xe0,
-  0x02, 0xa3, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x81, 0x8f, 0xd9,
-  0x08, 0x83, 0xbb, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x25, 0x3e,
-  0x66, 0x23, 0x10, 0x2e, 0x18, 0xa6, 0xd8, 0xe2, 0x36, 0xe0, 0x02, 0xa3,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xa9, 0x0f, 0xdc, 0x28, 0x03,
-  0xbe, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0xc5, 0x3e, 0x70, 0x23,
-  0x10, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0xee, 0x30, 0xea, 0x6a, 0x62, 0x98,
-  0x53, 0x83, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00,
-  0x04, 0xc1, 0x20, 0xd3, 0x8f, 0xf2, 0x08, 0x8d, 0xf9, 0x18, 0x4d, 0x08,
-  0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28,
-  0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb0, 0x42, 0x84, 0x3d,
-  0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x4b, 0x44, 0xda,
-  0x23, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb0, 0x46, 0xc4,
-  0x3d, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x2f, 0x44,
-  0xda, 0x63, 0x35, 0x02, 0xfd, 0xd0, 0x0d, 0xfc, 0x18, 0x4d, 0x08, 0x80,
-  0xd1, 0x04, 0x21, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x31, 0x18,
-  0x91, 0xf7, 0x10, 0x82, 0x0b, 0x8c, 0x9b, 0x25, 0x90, 0x83, 0xe1, 0x06,
-  0x3d, 0x08, 0x11, 0x30, 0x98, 0x65, 0x08, 0x03, 0x39, 0x08, 0xec, 0x2f,
-  0x42, 0x23, 0x3e, 0xc3, 0x11, 0x7f, 0x20, 0x1a, 0xc4, 0x37, 0xcb, 0x20,
-  0x06, 0x65, 0x10, 0xd8, 0x68, 0x80, 0x42, 0x7c, 0x2c, 0x18, 0xe8, 0x73,
-  0xc1, 0x30, 0x17, 0x18, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14,
-  0xb1, 0x22, 0x3a, 0xdc, 0x10, 0xa4, 0x08, 0x18, 0xcc, 0x32, 0x8c, 0x01,
-  0x19, 0x04, 0x36, 0xac, 0x06, 0x7c, 0x66, 0x09, 0xd2, 0xc0, 0x54, 0x83,
-  0x88, 0xcf, 0x2c, 0x41, 0x1a, 0x0c, 0x47, 0xa8, 0xc2, 0x6a, 0x08, 0xdf,
-  0x2c, 0x83, 0x19, 0xa4, 0x41, 0x60, 0xab, 0xc0, 0x1a, 0xf1, 0xb1, 0xc0,
-  0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08,
-  0xe2, 0x53, 0x84, 0x8d, 0xe8, 0x70, 0x43, 0x40, 0x23, 0x60, 0x30, 0xcb,
-  0x70, 0x06, 0x68, 0x10, 0x18, 0x6d, 0x0c, 0xf1, 0x99, 0x25, 0x48, 0x03,
-  0x23, 0x6e, 0x03, 0x3e, 0xb3, 0x04, 0x69, 0x30, 0xd0, 0x62, 0x68, 0x63,
-  0x80, 0x91, 0x01, 0x71, 0x06, 0x02, 0x1a, 0xa8, 0x45, 0x19, 0x5c, 0x30,
-  0x8c, 0xd9, 0x86, 0x6e, 0xc4, 0x67, 0x38, 0xe2, 0x16, 0x76, 0x83, 0xf8,
-  0x66, 0x19, 0xd4, 0xa0, 0x0d, 0x02, 0xe3, 0x0d, 0x5c, 0x88, 0x8f, 0x05,
-  0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x30, 0xe4, 0x63, 0x45,
-  0x10, 0x9f, 0x22, 0xc8, 0x44, 0x87, 0x1b, 0x02, 0x31, 0x01, 0x83, 0x59,
-  0x86, 0x35, 0x60, 0x83, 0xc0, 0x06, 0xf2, 0x80, 0xcf, 0x2c, 0x41, 0x1c,
-  0x58, 0x78, 0x10, 0xf1, 0x99, 0x25, 0x88, 0x83, 0xe1, 0x08, 0x71, 0x10,
-  0x0f, 0xe1, 0x9b, 0x65, 0x70, 0x83, 0x38, 0x08, 0x6c, 0x1c, 0xc6, 0x23,
-  0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0x20, 0x92,
-  0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x78, 0x13, 0x1d, 0x6e, 0x08, 0xda, 0x04,
-  0x0c, 0x66, 0x19, 0xde, 0x00, 0x0e, 0x02, 0x5b, 0x8f, 0x21, 0x3e, 0xb3,
-  0x04, 0x71, 0x60, 0x04, 0x7c, 0xc0, 0x67, 0x96, 0x20, 0x0e, 0x06, 0x5a,
-  0x0c, 0x6d, 0x0d, 0x30, 0x36, 0x20, 0xde, 0x40, 0x80, 0x03, 0xdd, 0x68,
-  0x83, 0x0b, 0x86, 0xb9, 0xc0, 0xa8, 0xdb, 0x8c, 0x3a, 0xde, 0x18, 0xe6,
-  0xe2, 0x61, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0xc8, 0x42, 0x85, 0x4d, 0x50, 0x44, 0x4f, 0x46, 0x13, 0x02,
-  0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a,
-  0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x2c, 0x54, 0x99, 0x93,
-  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xc0, 0x4a, 0x15, 0x3a,
-  0x49, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x2c, 0x55, 0xa9,
-  0x93, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0xe0, 0x43, 0x15,
-  0x3a, 0x91, 0x91, 0x20, 0x54, 0xc2, 0xe4, 0x4f, 0x46, 0x13, 0x02, 0x60,
-  0x34, 0x41, 0x08, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x40, 0x0c, 0x54,
-  0xc5, 0x4e, 0x84, 0xe0, 0x02, 0xe3, 0x66, 0x09, 0xe4, 0x60, 0xa0, 0xc5,
-  0x70, 0x8d, 0x8e, 0x8e, 0x38, 0x98, 0xf8, 0x84, 0x38, 0xa0, 0x23, 0x30,
-  0xb8, 0x20, 0xc4, 0x11, 0x03, 0x03, 0x00, 0x41, 0x30, 0x38, 0x03, 0x56,
-  0x81, 0x93, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0xc4, 0xc0,
-  0x01, 0x40, 0x10, 0x0c, 0xc8, 0x00, 0x56, 0xe4, 0xc4, 0x47, 0x6e, 0x64,
-  0x55, 0x84, 0xe0, 0x4e, 0xee, 0x64, 0x4d, 0x52, 0x65, 0x96, 0x60, 0x84,
-  0x86, 0x1b, 0x58, 0x63, 0x55, 0xc0, 0x60, 0x96, 0x81, 0x0e, 0x62, 0x22,
-  0x18, 0x31, 0x30, 0x00, 0x10, 0x04, 0x83, 0x63, 0x57, 0xec, 0x44, 0x25,
-  0x46, 0x0c, 0x0c, 0x00, 0x04, 0xc1, 0xe0, 0xe0, 0x95, 0x3b, 0x51, 0x09,
-  0x13, 0xc6, 0x04, 0x3e, 0x26, 0x90, 0x09, 0x7c, 0x46, 0x13, 0x7c, 0x64,
-  0x18, 0x6e, 0x08, 0x62, 0x05, 0x0c, 0x66, 0x19, 0xea, 0xe0, 0x0e, 0x82,
-  0xe1, 0x08, 0xc3, 0x4c, 0x86, 0xef, 0x8e, 0x61, 0x86, 0x1b, 0x02, 0x1e,
-  0x21, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0x48, 0xd4, 0x64, 0xf8, 0x2a, 0x10,
-  0xf4, 0x96, 0x61, 0x86, 0x1b, 0x82, 0x1f, 0x21, 0x83, 0x0a, 0x06, 0x9d,
-  0x65, 0xb0, 0x83, 0x55, 0x08, 0x6e, 0x44, 0x86, 0x39, 0x9c, 0x18, 0x66,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x32, 0x73, 0x89, 0x95, 0x36, 0xf9,
-  0x95, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18,
-  0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03,
-  0xab, 0x5d, 0x70, 0xe5, 0x20, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0xb0, 0xdc, 0x25, 0x57, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0xeb, 0x5d, 0x74, 0x45, 0x22, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41,
-  0x30, 0xf8, 0xda, 0x25, 0x57, 0xee, 0x24, 0x30, 0x17, 0x53, 0x21, 0x97,
-  0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0x11, 0x83, 0x03, 0x00, 0x41,
-  0x30, 0x10, 0x83, 0x77, 0xd9, 0x15, 0x21, 0xb8, 0xc0, 0xb8, 0x59, 0x82,
-  0x55, 0x18, 0x6e, 0xd8, 0xdc, 0x05, 0x0c, 0x66, 0x19, 0xf0, 0x20, 0x0f,
-  0x82, 0xa2, 0x93, 0x5e, 0x81, 0x0b, 0x8c, 0x1a, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x83, 0x65, 0x5f, 0x7c, 0x45, 0x0c, 0x44, 0x65, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0x16, 0x7e, 0xf1, 0x95, 0x40, 0xb8, 0x60, 0x98, 0xba,
-  0x13, 0x71, 0x81, 0x0b, 0x8c, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83,
-  0x05, 0x64, 0xc6, 0xc5, 0x0c, 0x4e, 0x65, 0xc4, 0xe0, 0x00, 0x40, 0x10,
-  0x0c, 0x96, 0x90, 0x19, 0x97, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x8c, 0xba,
-  0xc3, 0xa8, 0x03, 0x93, 0x61, 0xae, 0x2e, 0x86, 0x39, 0x62, 0x98, 0x23,
-  0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0xac, 0x64, 0xe0, 0x85,
-  0x55, 0xfc, 0x65, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61,
-  0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04,
-  0xc1, 0xc0, 0x62, 0x99, 0x7b, 0x49, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40,
-  0x10, 0x0c, 0xac, 0x96, 0xc1, 0x97, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00,
-  0x04, 0xc1, 0xc0, 0x72, 0x99, 0x7c, 0x49, 0x88, 0x60, 0xc4, 0x40, 0x01,
-  0x40, 0x10, 0x0c, 0x3e, 0x96, 0xc1, 0x17, 0x5b, 0x09, 0x4a, 0xa6, 0x5c,
-  0x46, 0x66, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0xc4, 0xc0, 0x65, 0xf4, 0x45, 0x08, 0x2e, 0x30, 0x6e,
-  0x96, 0x60, 0x15, 0x86, 0x1b, 0xf4, 0x80, 0x65, 0xc0, 0x60, 0x96, 0x41,
-  0x0f, 0x56, 0x21, 0x30, 0x55, 0x61, 0x95, 0xf8, 0x0c, 0x47, 0x80, 0x42,
-  0xab, 0x10, 0xdf, 0x2c, 0xc3, 0x1e, 0xf8, 0x41, 0x60, 0xae, 0x12, 0x0a,
-  0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x86,
-  0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84, 0xcd, 0xe8, 0x70, 0x43, 0x40, 0x33,
-  0x60, 0x30, 0xcb, 0xc0, 0x07, 0x7d, 0x10, 0xd8, 0x60, 0x2b, 0xf0, 0x99,
-  0x25, 0x10, 0x05, 0xab, 0x15, 0x22, 0x3e, 0xb3, 0x04, 0xa2, 0x30, 0x1c,
-  0xb1, 0x0a, 0xb6, 0x22, 0x7c, 0xb3, 0x0c, 0x7f, 0x20, 0x0a, 0x81, 0xb1,
-  0xc2, 0xad, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51,
-  0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x61, 0xa3, 0xc3, 0x0d,
-  0xc1, 0xcf, 0x80, 0xc1, 0x2c, 0x03, 0x28, 0x84, 0x42, 0x60, 0xbf, 0x32,
-  0xc4, 0x67, 0x96, 0x40, 0x14, 0x8c, 0x10, 0x17, 0xf8, 0xcc, 0x12, 0x88,
-  0xc2, 0x40, 0x8b, 0xa1, 0xf1, 0x01, 0xd6, 0x07, 0x04, 0x28, 0x08, 0xa1,
-  0xc0, 0x16, 0x7e, 0x70, 0xc1, 0x30, 0x16, 0x2e, 0xe5, 0x12, 0x9f, 0xe1,
-  0x08, 0x5c, 0x30, 0x17, 0xe2, 0x9b, 0x65, 0x18, 0x05, 0x53, 0x08, 0xec,
-  0x5c, 0x72, 0x21, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c,
-  0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x78, 0x1b, 0x1d, 0x6e,
-  0x08, 0xda, 0x06, 0x0c, 0x66, 0x19, 0x48, 0xa1, 0x14, 0x02, 0x1b, 0xde,
-  0x05, 0x3e, 0xb3, 0x04, 0xaa, 0x60, 0xec, 0x42, 0xc4, 0x67, 0x96, 0x40,
-  0x15, 0x86, 0x23, 0xc6, 0xa1, 0x5d, 0x84, 0x6f, 0x96, 0xe1, 0x14, 0x54,
-  0x21, 0x30, 0x72, 0x70, 0x97, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61,
-  0x2e, 0x30, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x42, 0x6f,
-  0x74, 0xb8, 0x21, 0xc0, 0x1b, 0x30, 0x98, 0x65, 0x40, 0x85, 0x54, 0x08,
-  0xcc, 0x5e, 0x86, 0xf8, 0xcc, 0x12, 0xa8, 0x82, 0x11, 0xfb, 0x02, 0x9f,
-  0x59, 0x02, 0x55, 0x18, 0x68, 0x31, 0x34, 0x52, 0xc0, 0x4a, 0x81, 0x40,
-  0x05, 0x21, 0x15, 0x78, 0xc3, 0x14, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x6e,
-  0x33, 0xea, 0xce, 0x65, 0x98, 0xe3, 0x8f, 0x61, 0x8e, 0x18, 0xe6, 0x88,
-  0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x20, 0x63, 0x9d, 0xbb, 0x99,
-  0x99, 0xd2, 0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18,
-  0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0xb0, 0x66, 0xc7, 0x6f, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x03, 0x8b, 0x76, 0xfe, 0x26, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0xb0, 0x6a, 0x07, 0x74, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00,
-  0x10, 0x04, 0x83, 0x6f, 0x76, 0xfe, 0xa6, 0x67, 0x02, 0xd6, 0x61, 0x1b,
-  0xd5, 0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x31, 0x38, 0x00,
-  0x10, 0x04, 0x03, 0x31, 0xa8, 0x9d, 0xd0, 0x11, 0x82, 0x0b, 0x8c, 0x9b,
-  0x25, 0x58, 0x85, 0x81, 0x16, 0xc3, 0x35, 0xec, 0x40, 0xd5, 0xea, 0x00,
-  0x26, 0xf0, 0x40, 0x50, 0x05, 0x55, 0xcb, 0x83, 0x59, 0x06, 0x56, 0x70,
-  0x85, 0x90, 0x18, 0x8e, 0x20, 0x09, 0xb3, 0x19, 0xbe, 0x2b, 0x89, 0x61,
-  0x86, 0x1b, 0x02, 0x9e, 0x21, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0x48, 0x09,
-  0xb5, 0x19, 0xbe, 0x0a, 0x04, 0xbd, 0x95, 0x18, 0x66, 0xb8, 0x21, 0xf8,
-  0x19, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x86, 0x56, 0x10, 0x87, 0xe0, 0x46,
-  0x66, 0x98, 0xc3, 0x91, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x20,
-  0x33, 0x9f, 0xd8, 0x69, 0x9b, 0xdf, 0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04,
-  0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90, 0x11,
-  0x03, 0x04, 0x00, 0x41, 0x30, 0xb0, 0xda, 0x07, 0x77, 0x0e, 0x22, 0x18,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xcb, 0x7d, 0x72, 0x87, 0x21, 0x82,
-  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb0, 0xde, 0x47, 0x77, 0x24, 0x22,
-  0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0xaf, 0x7d, 0x72, 0xe7, 0x6e,
-  0x02, 0xf3, 0x31, 0x1d, 0xf2, 0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21,
-  0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x31, 0x78, 0x9f, 0xdd, 0x11,
-  0x82, 0x0b, 0x8c, 0x9b, 0x25, 0x10, 0x87, 0xe1, 0x86, 0x9d, 0x70, 0x1f,
-  0x30, 0x98, 0x65, 0x78, 0x05, 0x58, 0x08, 0x8a, 0x6e, 0x7a, 0x07, 0x2e,
-  0x30, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x96, 0xfd, 0xf1, 0x1d,
-  0xb0, 0x10, 0x9d, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0xf8, 0xc7,
-  0x77, 0x02, 0xe1, 0x82, 0x61, 0xea, 0x6e, 0xc4, 0x07, 0x2e, 0x30, 0x6a,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x16, 0x10, 0x1a, 0x1f, 0xb3, 0x38,
-  0x9d, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0x42, 0x68, 0x7c, 0x02,
-  0xe1, 0x82, 0x61, 0x2e, 0x30, 0xea, 0x0e, 0xa3, 0x0e, 0x6c, 0x86, 0xb9,
-  0x3a, 0x19, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40,
-  0x10, 0x0c, 0xb2, 0x12, 0x82, 0x1f, 0xd6, 0xf1, 0x9f, 0xd1, 0x84, 0x00,
-  0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22,
-  0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x8b, 0x85, 0xee, 0x27,
-  0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb0, 0x5a, 0x08, 0x7f,
-  0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xcb, 0x85, 0xf2,
-  0x27, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0xf8, 0x58, 0x08,
-  0x7f, 0x6c, 0x27, 0x28, 0xa1, 0xf2, 0x19, 0xa1, 0xd1, 0x84, 0x00, 0x18,
-  0x4d, 0x10, 0x82, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x10, 0x03, 0x17,
-  0xd2, 0x1f, 0x21, 0xb8, 0xc0, 0xb8, 0x59, 0x02, 0x71, 0x18, 0x6e, 0xd0,
-  0x0b, 0x16, 0x02, 0x83, 0x59, 0x86, 0x58, 0x10, 0x87, 0xc0, 0x54, 0x87,
-  0x75, 0xe2, 0x33, 0x1c, 0xe1, 0x17, 0xad, 0x43, 0x7c, 0xb3, 0x0c, 0xb2,
-  0x50, 0x0b, 0x81, 0xb9, 0xce, 0x5f, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17,
-  0x0c, 0x73, 0x81, 0x51, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11,
-  0x36, 0xa4, 0xc3, 0x0d, 0x01, 0x0d, 0x81, 0xc1, 0x2c, 0xc3, 0x2c, 0xd0,
-  0x42, 0x60, 0x83, 0xed, 0xc0, 0x67, 0x96, 0x20, 0x17, 0xac, 0x76, 0x88,
-  0xf8, 0xcc, 0x12, 0xe4, 0xc2, 0x70, 0x44, 0x6a, 0xd8, 0x8e, 0xf0, 0xcd,
-  0x32, 0xd8, 0x42, 0x2e, 0x04, 0xa6, 0x1a, 0xb7, 0x13, 0x1f, 0x0b, 0x1c,
-  0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20,
-  0x3e, 0x45, 0x84, 0x91, 0x0e, 0x37, 0x04, 0x3f, 0x04, 0x06, 0xb3, 0x0c,
-  0xb7, 0x80, 0x0b, 0x81, 0xfd, 0xce, 0x10, 0x9f, 0x59, 0x82, 0x5c, 0x30,
-  0x42, 0x7c, 0xe0, 0x33, 0x4b, 0x90, 0x0b, 0x03, 0x2d, 0x86, 0x36, 0x0b,
-  0x18, 0x2d, 0x10, 0xb7, 0x20, 0xe0, 0x02, 0xda, 0xd4, 0xc2, 0x05, 0xc3,
-  0x58, 0xf8, 0x94, 0x4f, 0x7c, 0x86, 0x23, 0x70, 0xc3, 0x7c, 0x88, 0x6f,
-  0x96, 0x41, 0x17, 0x7a, 0x21, 0xb0, 0xf3, 0xc9, 0x8d, 0xf8, 0x58, 0x30,
-  0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04,
-  0xf1, 0x29, 0xe2, 0x8d, 0x74, 0xb8, 0x21, 0x68, 0x23, 0x30, 0x98, 0x65,
-  0xd8, 0x05, 0x5e, 0x08, 0x6c, 0x78, 0x1f, 0xf8, 0xcc, 0x12, 0x84, 0x83,
-  0xb1, 0x0f, 0x11, 0x9f, 0x59, 0x82, 0x70, 0x18, 0x8e, 0x18, 0x8f, 0xf6,
-  0x11, 0xbe, 0x59, 0x06, 0x5f, 0x08, 0x87, 0xc0, 0xc8, 0xc3, 0x7d, 0xe2,
-  0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x22, 0xf9,
-  0x58, 0x11, 0xc4, 0xa7, 0x08, 0x3d, 0xd2, 0xe1, 0x86, 0x00, 0x8f, 0xc0,
-  0x60, 0x96, 0xe1, 0x17, 0xc0, 0x21, 0x30, 0xfb, 0x19, 0xe2, 0x33, 0x4b,
-  0x10, 0x0e, 0x46, 0xec, 0x0f, 0x7c, 0x66, 0x09, 0xc2, 0x61, 0xa0, 0xc5,
-  0xd0, 0x76, 0x01, 0xe3, 0x05, 0xe2, 0x17, 0x04, 0x70, 0xe0, 0x9d, 0x5e,
-  0xb8, 0x60, 0x98, 0x0b, 0x8c, 0xba, 0xcd, 0xa8, 0x3b, 0x9f, 0x61, 0x8e,
-  0x5f, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x83, 0x8c, 0x95, 0xee, 0x68, 0x86, 0x4a, 0x69, 0x34, 0x21, 0x00,
-  0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88,
-  0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xc0, 0x9a, 0x25, 0x3f, 0x4a,
-  0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x2c, 0x5a, 0xfa, 0xa3,
-  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xc0, 0xaa, 0x25, 0x50,
-  0x4a, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0xbe, 0x59, 0xfa,
-  0xa3, 0x1e, 0x0a, 0x58, 0x89, 0x8d, 0x54, 0x69, 0x34, 0x21, 0x00, 0x46,
-  0x13, 0x84, 0x60, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xc4, 0xa0, 0x96,
-  0x42, 0x49, 0x08, 0x2e, 0x30, 0x6e, 0x96, 0x40, 0x1c, 0x06, 0x5a, 0x0c,
-  0xd7, 0x68, 0x05, 0x55, 0x0c, 0x58, 0x01, 0x26, 0x5e, 0x41, 0x08, 0x07,
-  0x55, 0x0c, 0x60, 0x61, 0x96, 0x61, 0x1c, 0xca, 0x21, 0x44, 0x86, 0x23,
-  0x4c, 0xc4, 0x8c, 0x86, 0xef, 0x4e, 0x64, 0x98, 0xe1, 0x86, 0x80, 0x87,
-  0xc8, 0xa0, 0x86, 0x40, 0x87, 0x23, 0x4e, 0x44, 0x8d, 0x86, 0xaf, 0x02,
-  0x41, 0x2f, 0x45, 0x86, 0x19, 0x6e, 0x08, 0x7e, 0x88, 0x0c, 0x2a, 0x18,
-  0x74, 0x96, 0x81, 0x1c, 0xf2, 0x21, 0xb8, 0x11, 0x1a, 0xe6, 0x70, 0x66,
-  0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xc8, 0xcc, 0x29, 0x96, 0xda,
-  0xe8, 0x97, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06,
-  0x61, 0x34, 0x81, 0x18, 0x8a, 0x38, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10,
-  0x0c, 0xac, 0x76, 0xc2, 0xa5, 0x83, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04,
-  0xc1, 0xc0, 0x72, 0xa7, 0x5c, 0x62, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40,
-  0x10, 0x0c, 0xac, 0x77, 0xd2, 0x25, 0x89, 0x08, 0x46, 0x0c, 0x14, 0x00,
-  0x04, 0xc1, 0xe0, 0x6b, 0xa7, 0x5c, 0xba, 0xa3, 0xc0, 0x9c, 0x4c, 0x89,
-  0x9c, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x0c, 0x0e, 0x00,
-  0x04, 0xc1, 0x40, 0x0c, 0xde, 0x69, 0x97, 0x84, 0xe0, 0x02, 0xe3, 0x66,
-  0x09, 0xf2, 0x61, 0xb8, 0x61, 0x47, 0xdc, 0x09, 0x0c, 0x66, 0x19, 0xcc,
-  0xe1, 0x1c, 0x82, 0xa2, 0xa3, 0x5e, 0x82, 0x0b, 0x8c, 0x1a, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x83, 0x65, 0x9f, 0x7c, 0x49, 0x4c, 0x44, 0x69, 0xc4,
-  0xe0, 0x00, 0x40, 0x10, 0x0c, 0x16, 0x7e, 0xf2, 0xa5, 0x40, 0xb8, 0x60,
-  0x98, 0xba, 0x23, 0x71, 0x82, 0x0b, 0x8c, 0x1a, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x83, 0x05, 0xa4, 0xc6, 0x89, 0x4c, 0x4e, 0x69, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0x96, 0x90, 0x1a, 0xa7, 0x40, 0xb8, 0x60, 0x98, 0x0b,
-  0x8c, 0xba, 0xc3, 0xa8, 0x03, 0xa3, 0x61, 0xae, 0x6e, 0x86, 0x39, 0x62,
-  0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0xac, 0xa4,
-  0xe0, 0x89, 0x95, 0xfc, 0x69, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60,
-  0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10,
-  0x00, 0x04, 0xc1, 0xc0, 0x62, 0xa9, 0x7b, 0x4a, 0x88, 0x60, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0xac, 0x96, 0xc2, 0xa7, 0x84, 0x08, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0xc0, 0x72, 0xa9, 0x7c, 0x4a, 0x88, 0x60, 0xc4,
-  0x40, 0x01, 0x40, 0x10, 0x0c, 0x3e, 0x96, 0xc2, 0x27, 0x5b, 0x0a, 0x4a,
-  0xaa, 0x9c, 0x46, 0x6a, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0xc4,
-  0xe0, 0x00, 0x40, 0x10, 0x0c, 0xc4, 0xc0, 0xa5, 0xf4, 0x49, 0x08, 0x2e,
-  0x30, 0x6e, 0x96, 0x20, 0x1f, 0x86, 0x1b, 0xf4, 0x84, 0xa5, 0xc0, 0x60,
-  0x96, 0x01, 0x1d, 0xf2, 0x21, 0x30, 0x55, 0x62, 0xa5, 0xf8, 0x0c, 0x47,
-  0x80, 0x4a, 0x2b, 0x11, 0xdf, 0x2c, 0x43, 0x3a, 0xb0, 0x43, 0x60, 0xae,
-  0x14, 0x2a, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94,
-  0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84, 0x4d, 0xe9, 0x70, 0x43,
-  0x40, 0x53, 0x60, 0x30, 0xcb, 0xa0, 0x0e, 0xeb, 0x10, 0xd8, 0x60, 0x4b,
-  0xf0, 0x99, 0x25, 0x80, 0x07, 0xab, 0x25, 0x22, 0x3e, 0xb3, 0x04, 0xf0,
-  0x30, 0x1c, 0xb1, 0x2a, 0xb6, 0x24, 0x7c, 0xb3, 0x0c, 0xed, 0x00, 0x0f,
-  0x81, 0xb1, 0xca, 0x2d, 0xc5, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73,
-  0x81, 0x51, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x61, 0xa5,
-  0xc3, 0x0d, 0xc1, 0x4f, 0x81, 0xc1, 0x2c, 0x83, 0x3b, 0xbc, 0x43, 0x60,
-  0xbf, 0x34, 0xc4, 0x67, 0x96, 0x00, 0x1e, 0x8c, 0x10, 0x27, 0xf8, 0xcc,
-  0x12, 0xc0, 0xc3, 0x40, 0x8b, 0xa1, 0xa9, 0x03, 0xb6, 0x0e, 0x84, 0x3b,
-  0x08, 0xef, 0xc0, 0x56, 0xec, 0x70, 0xc1, 0x30, 0x16, 0x4e, 0xe5, 0x14,
-  0x9f, 0xe1, 0x08, 0x5b, 0x31, 0x27, 0xe2, 0x9b, 0x65, 0x88, 0x07, 0x7a,
-  0x08, 0xec, 0x9c, 0x6e, 0x25, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98,
-  0x0b, 0x8c, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x78, 0x2b,
-  0x1d, 0x6e, 0x08, 0xda, 0x0a, 0x0c, 0x66, 0x19, 0xe4, 0x61, 0x1e, 0x02,
-  0x1b, 0xde, 0x09, 0x3e, 0xb3, 0x04, 0xf8, 0x60, 0xec, 0x44, 0xc4, 0x67,
-  0x96, 0x00, 0x1f, 0x86, 0x23, 0xc2, 0xa5, 0x9d, 0x84, 0x6f, 0x96, 0xa1,
-  0x1e, 0xf0, 0x21, 0x30, 0x71, 0x71, 0xa7, 0xf8, 0x58, 0xe0, 0xd0, 0xe7,
-  0x82, 0x61, 0x2e, 0x30, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29,
-  0x42, 0xaf, 0x74, 0xb8, 0x21, 0xc0, 0x2b, 0x30, 0x98, 0x65, 0xb0, 0x87,
-  0x7b, 0x08, 0xcc, 0x9e, 0x86, 0xf8, 0xcc, 0x12, 0xe0, 0x83, 0x11, 0xfb,
-  0x04, 0x9f, 0x59, 0x02, 0x7c, 0x18, 0x68, 0x31, 0x34, 0x79, 0xc0, 0xe6,
-  0x81, 0xb0, 0x07, 0xe1, 0x1e, 0x70, 0x8b, 0x1e, 0x2e, 0x18, 0xe6, 0x02,
-  0xa3, 0x6e, 0x33, 0xea, 0xce, 0x69, 0x98, 0xe3, 0x9f, 0x61, 0x8e, 0x18,
-  0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x20, 0x63, 0xad,
-  0xbb, 0x9a, 0xa9, 0xd2, 0x1a, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18,
-  0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04,
-  0x00, 0x41, 0x30, 0xb0, 0x66, 0xcb, 0xaf, 0x12, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x03, 0x8b, 0xb6, 0xfe, 0x2a, 0x21, 0x82, 0x11, 0x03,
-  0x04, 0x00, 0x41, 0x30, 0xb0, 0x6a, 0x0b, 0xb4, 0x12, 0x22, 0x18, 0x31,
-  0x50, 0x00, 0x10, 0x04, 0x83, 0x6f, 0xb6, 0xfe, 0xaa, 0xa7, 0x02, 0xd6,
-  0x62, 0x2b, 0xd5, 0x1a, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x31,
-  0x38, 0x00, 0x10, 0x04, 0x03, 0x31, 0xa8, 0xad, 0xd0, 0x12, 0x82, 0x0b,
-  0x8c, 0x9b, 0x25, 0xc8, 0x87, 0x81, 0x16, 0xc3, 0x35, 0xc8, 0x41, 0x55,
-  0x83, 0x71, 0x80, 0x09, 0x73, 0x10, 0xf0, 0x41, 0x55, 0x83, 0x73, 0x98,
-  0x65, 0xd0, 0x07, 0x7e, 0x08, 0x99, 0xe1, 0x08, 0x92, 0x31, 0xab, 0xe1,
-  0xbb, 0x92, 0x19, 0x66, 0xb8, 0x21, 0xe0, 0x29, 0x32, 0xa8, 0x21, 0xd0,
-  0xe1, 0x88, 0x93, 0x51, 0xab, 0xe1, 0xab, 0x40, 0xd0, 0x4b, 0x99, 0x61,
-  0x86, 0x1b, 0x82, 0x9f, 0x22, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0xd8, 0x07,
-  0x98, 0x08, 0x6e, 0xa4, 0x86, 0x39, 0x1c, 0x1a, 0x66, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0x32, 0xf3, 0x8a, 0xad, 0xb6, 0xfa, 0xad, 0xd1, 0x84,
-  0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86,
-  0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xab, 0xbd, 0x70,
-  0xeb, 0x20, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb0, 0xdc, 0x2b,
-  0xb7, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xeb, 0xbd,
-  0x74, 0x4b, 0x22, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0xf8, 0xda,
-  0x2b, 0xb7, 0xee, 0x2a, 0x30, 0x2f, 0xd3, 0x22, 0xaf, 0xd1, 0x84, 0x00,
-  0x18, 0x4d, 0x10, 0x82, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x10, 0x83,
-  0xf7, 0xda, 0x2d, 0x21, 0xb8, 0xc0, 0xb8, 0x59, 0x02, 0x98, 0x18, 0x6e,
-  0xd8, 0x19, 0xf7, 0x02, 0x83, 0x59, 0x86, 0x7e, 0xf0, 0x87, 0xa0, 0xe8,
-  0xaa, 0xb7, 0xe0, 0x02, 0xa3, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60,
-  0xd9, 0x2f, 0xdf, 0x02, 0x1b, 0xd1, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x83, 0x85, 0xbf, 0x7c, 0x2b, 0x10, 0x2e, 0x18, 0xa6, 0xee, 0x4a, 0xbc,
-  0xe0, 0x02, 0xa3, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x01, 0xb1,
-  0xf1, 0x22, 0x9b, 0xd3, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x25,
-  0xc4, 0xc6, 0x2b, 0x10, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0xee, 0x30, 0xea,
-  0xc0, 0x6a, 0x98, 0xab, 0xa3, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x20, 0x2b, 0x31, 0xf8, 0x62, 0x2d, 0xff,
-  0x1a, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1,
-  0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb0,
-  0x58, 0xec, 0xbe, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03,
-  0xab, 0xc5, 0xf0, 0x2b, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0xb0, 0x5c, 0x2c, 0xbf, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04,
-  0x83, 0x8f, 0xc5, 0xf0, 0xcb, 0xb6, 0x82, 0x12, 0x2b, 0xaf, 0x11, 0x1b,
-  0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x03, 0x31, 0x70, 0x31, 0xfd, 0x12, 0x82, 0x0b, 0x8c, 0x9b, 0x25, 0x80,
-  0x89, 0xe1, 0x06, 0xbd, 0x61, 0x31, 0x30, 0x98, 0x65, 0xf8, 0x07, 0x98,
-  0x08, 0x4c, 0xb5, 0x58, 0x2b, 0x3e, 0xc3, 0x11, 0x7e, 0xd3, 0x5a, 0xc4,
-  0x37, 0xcb, 0x00, 0x12, 0x23, 0x11, 0x98, 0x6b, 0xfd, 0x4d, 0x7c, 0x2c,
-  0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65, 0x81, 0x21, 0x1f, 0x2b,
-  0x82, 0xf8, 0x14, 0x61, 0x63, 0x3a, 0xdc, 0x10, 0xd0, 0x18, 0x18, 0xcc,
-  0x32, 0x84, 0x84, 0x48, 0x04, 0x36, 0xd8, 0x16, 0x7c, 0x66, 0x09, 0x4e,
-  0xc2, 0x6a, 0x8b, 0x88, 0xcf, 0x2c, 0xc1, 0x49, 0x0c, 0x47, 0xa4, 0x8e,
-  0x6d, 0x09, 0xdf, 0x2c, 0x03, 0x49, 0x9c, 0x44, 0x60, 0xaa, 0x73, 0x5b,
-  0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x91,
-  0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0x98, 0xe9, 0x70, 0x43, 0xf0, 0x63,
-  0x60, 0x30, 0xcb, 0x50, 0x12, 0x26, 0x11, 0xd8, 0x6f, 0x0d, 0xf1, 0x99,
-  0x25, 0x38, 0x09, 0x23, 0xc4, 0x0b, 0x3e, 0xb3, 0x04, 0x27, 0x31, 0xd0,
-  0x62, 0x68, 0x21, 0x81, 0x89, 0x04, 0x51, 0x12, 0x82, 0x49, 0xa0, 0xdd,
-  0x48, 0x5c, 0x30, 0x8c, 0x85, 0x57, 0x79, 0xc5, 0x67, 0x38, 0xc2, 0x76,
-  0xcc, 0x8b, 0xf8, 0x66, 0x19, 0x50, 0x62, 0x25, 0x02, 0x3b, 0xaf, 0xdb,
-  0x89, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x30,
-  0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xde, 0x4c, 0x87, 0x1b, 0x82, 0x36,
-  0x03, 0x83, 0x59, 0x86, 0x94, 0x50, 0x89, 0xc0, 0x86, 0xf7, 0x82, 0xcf,
-  0x2c, 0xc1, 0x4b, 0x18, 0x7b, 0x11, 0xf1, 0x99, 0x25, 0x78, 0x89, 0xe1,
-  0x88, 0xf0, 0x69, 0x2f, 0xe1, 0x9b, 0x65, 0x60, 0x89, 0x97, 0x08, 0x4c,
-  0x7c, 0xdc, 0x2b, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c,
-  0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xd0, 0x33, 0x1d, 0x6e,
-  0x08, 0xf0, 0x0c, 0x0c, 0x66, 0x19, 0x5a, 0xc2, 0x25, 0x02, 0xb3, 0xaf,
-  0x21, 0x3e, 0xb3, 0x04, 0x2f, 0x61, 0xc4, 0x7e, 0xc1, 0x67, 0x96, 0xe0,
-  0x25, 0x06, 0x5a, 0x0c, 0x2d, 0x25, 0x30, 0x95, 0x20, 0x5a, 0x42, 0x70,
-  0x09, 0xdc, 0x5b, 0x89, 0x0b, 0x86, 0xb9, 0xc0, 0xa8, 0xdb, 0x8c, 0xba,
-  0xf3, 0x1a, 0xe6, 0xf8, 0x69, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11,
-  0x83, 0x03, 0x00, 0x41, 0x30, 0xc8, 0x58, 0xed, 0xce, 0x66, 0xac, 0xd4,
-  0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34,
-  0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xac,
-  0x59, 0xf3, 0xb3, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xc0,
-  0xa2, 0xb5, 0x3f, 0x4b, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
-  0xac, 0x5a, 0x03, 0xb5, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1,
-  0xe0, 0x9b, 0xb5, 0x3f, 0xeb, 0xb1, 0x80, 0xd5, 0xd8, 0x4c, 0xd5, 0x46,
-  0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
-  0x40, 0x0c, 0x6a, 0x2d, 0xd4, 0x84, 0xe0, 0x02, 0xe3, 0x66, 0x09, 0x60,
-  0x62, 0xa0, 0xc5, 0x70, 0x8d, 0x7d, 0x50, 0xe5, 0x40, 0x1f, 0x60, 0xa2,
-  0x1f, 0x84, 0x97, 0x50, 0xe5, 0xc0, 0x1f, 0x46, 0x0c, 0x0c, 0x00, 0x04,
-  0xc1, 0xe0, 0x08, 0xb7, 0x3d, 0x83, 0x29, 0x13, 0x11, 0x91, 0x89, 0x8f,
-  0x09, 0x81, 0x7c, 0x2c, 0x20, 0x19, 0xf8, 0x58, 0x51, 0x12, 0xf1, 0xb1,
-  0x22, 0x90, 0x8f, 0x05, 0x27, 0x01, 0x9f, 0x11, 0x03, 0x03, 0x00, 0x41,
-  0x30, 0x38, 0xd0, 0x4d, 0xd4, 0x6c, 0xca, 0x84, 0x22, 0x3e, 0x16, 0x08,
-  0xf2, 0xb1, 0xe0, 0x80, 0xcf, 0x05, 0x21, 0x8e, 0x18, 0x18, 0x00, 0x08,
-  0x82, 0xc1, 0x19, 0x88, 0x9b, 0xa9, 0x05, 0xa3, 0x09, 0x01, 0x30, 0x9a,
-  0x20, 0x04, 0x23, 0x06, 0x0e, 0x00, 0x82, 0x60, 0x40, 0x06, 0xe6, 0x86,
-  0x6a, 0x74, 0xd6, 0x66, 0xe1, 0x26, 0x04, 0xad, 0xd6, 0x6a, 0xa1, 0xf6,
-  0x6b, 0xb3, 0x04, 0x23, 0x34, 0xdc, 0x20, 0x5e, 0xe3, 0x06, 0x06, 0xb3,
-  0x0c, 0x32, 0x31, 0x42, 0xc1, 0x88, 0x81, 0x01, 0x80, 0x20, 0x18, 0x1c,
-  0xf1, 0xc6, 0x6a, 0x60, 0x65, 0x01, 0xa8, 0xc1, 0x67, 0xc4, 0xc0, 0x00,
-  0x40, 0x10, 0x0c, 0x8e, 0x79, 0x73, 0xb5, 0xb0, 0xb2, 0x40, 0xd4, 0xe0,
-  0x33, 0x9a, 0x40, 0x67, 0xc3, 0x70, 0x43, 0x70, 0x6e, 0x60, 0x30, 0xcb,
-  0x30, 0x13, 0x35, 0x11, 0x0c, 0x47, 0x14, 0x7c, 0x36, 0x7c, 0x67, 0x0c,
-  0x33, 0xdc, 0x10, 0xc8, 0x19, 0x19, 0xd4, 0x10, 0xe8, 0x70, 0xc4, 0x01,
-  0x6a, 0xc3, 0x57, 0x81, 0xa0, 0x97, 0x0c, 0x33, 0xdc, 0x10, 0xd4, 0x19,
-  0x19, 0x54, 0x30, 0xe8, 0x2c, 0x03, 0x4d, 0xa4, 0x45, 0x70, 0x39, 0x36,
-  0xcc, 0xb9, 0xd5, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x90, 0xf1,
-  0xdb, 0xb9, 0x8d, 0x5a, 0xbd, 0x8d, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10,
-  0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x58, 0x23, 0xe7, 0x6e, 0x07, 0x11, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0x81, 0x45, 0x72, 0xef, 0xc6, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x58, 0x25, 0x07, 0x6f, 0x12, 0x11, 0x8c,
-  0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x37, 0x72, 0xef, 0xd6, 0x6a, 0x01,
-  0xbf, 0xf1, 0x9a, 0xbe, 0x8d, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x18, 0x94, 0x5c, 0xbc, 0x09, 0xc1,
-  0x05, 0xc6, 0xcd, 0x12, 0xa4, 0xc5, 0x70, 0xc3, 0x46, 0x72, 0x60, 0x30,
-  0xcb, 0x60, 0x13, 0x37, 0x11, 0x94, 0xaa, 0xcd, 0x1b, 0x5c, 0x60, 0xd4,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c, 0x31, 0x47, 0x6f, 0x61, 0x80,
-  0x6b, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xc8, 0x1c, 0xbd, 0x05,
-  0xc2, 0x05, 0xc3, 0x54, 0xab, 0xe1, 0x1b, 0x5c, 0x60, 0xd4, 0x88, 0xc1,
-  0x01, 0x80, 0x20, 0x18, 0x2c, 0x36, 0x97, 0x6f, 0x64, 0xd0, 0x6b, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xdc, 0x5c, 0xbe, 0x05, 0xc2, 0x05,
-  0xc3, 0x5c, 0x60, 0xd4, 0x1d, 0x46, 0x9d, 0x9d, 0x0d, 0x73, 0xab, 0x35,
-  0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
-  0x64, 0x3b, 0x67, 0x72, 0xe2, 0x46, 0x73, 0xa3, 0x09, 0x01, 0x30, 0x9a,
-  0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x96, 0xd8, 0xb5, 0x5c, 0x42, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x60, 0x8d, 0x9d, 0xcb, 0x25, 0x44,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x16, 0xd9, 0xbd, 0x5c, 0x42,
-  0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xf0, 0x89, 0x9d, 0xcb, 0xb1,
-  0x5b, 0xb0, 0x73, 0xfb, 0x96, 0x73, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20,
-  0x04, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x20, 0x06, 0x64, 0x07, 0x73,
-  0x42, 0x70, 0x81, 0x71, 0xb3, 0x04, 0x69, 0x31, 0xdc, 0xa0, 0x07, 0x62,
-  0x07, 0x06, 0xb3, 0x0c, 0x38, 0x91, 0x16, 0x81, 0x81, 0x9b, 0xb8, 0xc5,
-  0x67, 0x38, 0xe2, 0x0f, 0xc6, 0x8d, 0xf8, 0x66, 0x19, 0x72, 0x82, 0x27,
-  0x02, 0x23, 0x37, 0x50, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6,
-  0x02, 0xa3, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xd8, 0x4e,
-  0x87, 0x1b, 0x02, 0xb5, 0x03, 0x83, 0x59, 0x06, 0x9d, 0xd8, 0x89, 0xc0,
-  0x06, 0x76, 0x83, 0xcf, 0x2c, 0x01, 0x58, 0xd8, 0xba, 0x11, 0xf1, 0x99,
-  0x25, 0x00, 0x8b, 0xe1, 0x08, 0x55, 0x60, 0x37, 0xe1, 0x9b, 0x65, 0xe8,
-  0x09, 0xb0, 0x08, 0x6c, 0x15, 0xda, 0x2d, 0x3e, 0x16, 0x38, 0xf4, 0xb9,
-  0x60, 0x98, 0x0b, 0x8c, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a,
-  0xb8, 0x3b, 0x1d, 0x6e, 0x08, 0xea, 0x0e, 0x0c, 0x66, 0x19, 0x7c, 0xe2,
-  0x27, 0x02, 0xab, 0xb7, 0x21, 0x3e, 0xb3, 0x04, 0x60, 0x61, 0x04, 0xbe,
-  0xc1, 0x67, 0x96, 0x00, 0x2c, 0x06, 0x5a, 0x0c, 0x4d, 0x27, 0xb0, 0x9d,
-  0x20, 0x7c, 0x42, 0xf8, 0x09, 0xb5, 0xe0, 0x89, 0x0b, 0x86, 0xb1, 0x7b,
-  0xdb, 0xb7, 0xf8, 0x0c, 0x47, 0xd8, 0x02, 0xbf, 0x11, 0xdf, 0x2c, 0x43,
-  0x58, 0x90, 0x45, 0x60, 0xfd, 0x76, 0x0b, 0xf1, 0xb1, 0x60, 0xa0, 0xcf,
-  0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53,
-  0x44, 0xe9, 0xe9, 0x70, 0x43, 0x30, 0x7a, 0x60, 0x30, 0xcb, 0x20, 0x16,
-  0x63, 0x11, 0xd8, 0x50, 0x72, 0xf0, 0x99, 0x25, 0x40, 0x0b, 0x13, 0x39,
-  0x22, 0x3e, 0xb3, 0x04, 0x68, 0x31, 0x1c, 0x11, 0x0e, 0x23, 0x27, 0x7c,
-  0xb3, 0x0c, 0x65, 0x81, 0x16, 0x81, 0x89, 0x03, 0xc9, 0xc5, 0xc7, 0x02,
-  0x87, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x16, 0x44, 0xf2, 0xb1, 0x22,
-  0x88, 0x4f, 0x11, 0xb0, 0xa7, 0xc3, 0x0d, 0x81, 0xeb, 0x81, 0xc1, 0x2c,
-  0x83, 0x59, 0x9c, 0x45, 0x60, 0x2c, 0x37, 0xc4, 0x67, 0x96, 0x00, 0x2d,
-  0x8c, 0x88, 0x39, 0xf8, 0xcc, 0x12, 0xa0, 0xc5, 0x40, 0x8b, 0xa1, 0x89,
-  0x05, 0x36, 0x16, 0x84, 0x59, 0x08, 0x67, 0x81, 0x1b, 0x64, 0x71, 0xc1,
-  0x30, 0x17, 0x18, 0x75, 0x9b, 0x51, 0xd7, 0x6f, 0xc3, 0x9c, 0x8c, 0x0d,
-  0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x99, 0xf8, 0xb5, 0x5e, 0xda, 0xed, 0xde, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x95, 0x7e, 0xb4, 0x97, 0x10, 0xc1,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x58, 0xea, 0x57, 0x7b, 0x09, 0x11,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0xb5, 0x7e, 0xb6, 0x97, 0x10,
-  0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x7c, 0xe9, 0x57, 0x7b, 0x73,
-  0x17, 0x88, 0x9f, 0xe8, 0x81, 0xdf, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08,
-  0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x88, 0xc1, 0xfa, 0xdd, 0x9e,
-  0x10, 0x5c, 0x60, 0xdc, 0x2c, 0x41, 0x5a, 0x0c, 0xb4, 0x18, 0xae, 0x41,
-  0x13, 0xf6, 0x1e, 0xcc, 0x04, 0x4c, 0xd8, 0x84, 0x80, 0x16, 0xf6, 0x1e,
-  0xdc, 0xc4, 0x2c, 0x83, 0x5a, 0xb0, 0x45, 0x48, 0x0c, 0x47, 0x98, 0x04,
-  0xdf, 0x0d, 0xdf, 0x9d, 0xc4, 0x30, 0xc3, 0x0d, 0x81, 0xdc, 0x91, 0x41,
-  0x0d, 0x81, 0x0e, 0x47, 0x9c, 0x04, 0xe8, 0x0d, 0x5f, 0x05, 0x82, 0x5e,
-  0x4a, 0x0c, 0x33, 0xdc, 0x10, 0xd4, 0x1d, 0x19, 0x54, 0x30, 0xe8, 0x2c,
-  0xc3, 0x5a, 0x80, 0x46, 0x70, 0x39, 0x37, 0xcc, 0xb9, 0xd9, 0x30, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0x90, 0xf1, 0xdf, 0xf9, 0x8d, 0x5e, 0xfd,
-  0x8d, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68,
-  0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x58,
-  0x23, 0x18, 0xb8, 0xdf, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0x60, 0x91, 0x60, 0xf0, 0x7e, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0x81, 0x55, 0x82, 0x01, 0xfc, 0x49, 0x44, 0x30, 0x62, 0xa0, 0x00,
-  0x20, 0x08, 0x06, 0xdf, 0x08, 0x06, 0xef, 0xd7, 0x7a, 0x01, 0xff, 0xf1,
-  0x9e, 0xfe, 0x8d, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0x81, 0x18, 0x94, 0x60, 0x10, 0x7f, 0x42, 0x70, 0x81,
-  0x71, 0xb3, 0x04, 0xa0, 0x31, 0xdc, 0xb0, 0x13, 0x24, 0x18, 0x80, 0xc1,
-  0x2c, 0x43, 0x5b, 0xb8, 0x45, 0x50, 0xaa, 0x37, 0x7f, 0x70, 0x81, 0x51,
-  0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xc4, 0x60, 0x40, 0x7f, 0x62,
-  0x81, 0x7b, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xc8, 0x60, 0x40,
-  0x7f, 0x81, 0x70, 0xc1, 0x30, 0xd5, 0x7a, 0xf8, 0x07, 0x17, 0x18, 0x35,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x8b, 0x0d, 0x06, 0xf9, 0x47, 0x16,
-  0xbd, 0x37, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xcb, 0x0d, 0x06, 0xf9,
-  0x17, 0x08, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x77, 0x18, 0x75, 0x76, 0x37,
-  0xcc, 0xad, 0xda, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07,
-  0x00, 0x82, 0x60, 0x90, 0xed, 0x60, 0x60, 0x82, 0x81, 0xf8, 0xd1, 0x60,
-  0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3,
-  0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x60,
-  0x89, 0x61, 0xd0, 0x82, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0x60, 0x8d, 0x61, 0xe0, 0x82, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0x60, 0x91, 0x61, 0xf0, 0x82, 0x41, 0x42, 0x04, 0x23,
-  0x06, 0x0a, 0x00, 0x82, 0x60, 0xf0, 0x89, 0x61, 0xe0, 0x82, 0x01, 0xfb,
-  0x05, 0x3b, 0x18, 0xec, 0x5f, 0x0e, 0x06, 0xa3, 0x09, 0x01, 0x30, 0x9a,
-  0x20, 0x04, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x20, 0x06, 0x64, 0x18,
-  0xc0, 0x60, 0x20, 0x04, 0x17, 0x18, 0x37, 0x4b, 0x00, 0x1a, 0xc3, 0x0d,
-  0x7a, 0x21, 0x86, 0x01, 0x18, 0xcc, 0x32, 0xbc, 0x05, 0x68, 0x04, 0x06,
-  0x7e, 0xe2, 0x17, 0x9f, 0xe1, 0x08, 0xd0, 0x18, 0x3f, 0xe2, 0x9b, 0x65,
-  0x80, 0x8b, 0xb9, 0x08, 0x8c, 0xfc, 0x42, 0x23, 0x3e, 0x16, 0x0c, 0xf4,
-  0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c,
-  0x8a, 0x60, 0xc3, 0x40, 0x87, 0x1b, 0x02, 0x35, 0x0c, 0xc0, 0x60, 0x96,
-  0x21, 0x2e, 0xe4, 0x22, 0xb0, 0x81, 0xfd, 0xe0, 0x33, 0x4b, 0x70, 0x17,
-  0xb6, 0x7e, 0x44, 0x7c, 0x66, 0x09, 0xee, 0x62, 0x38, 0x62, 0x35, 0xd8,
-  0x4f, 0xf8, 0x66, 0x19, 0xe8, 0xe2, 0x2e, 0x02, 0x63, 0x8d, 0xf6, 0x8b,
-  0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x88, 0xe4,
-  0x63, 0x45, 0x10, 0x9f, 0x22, 0xee, 0x30, 0xd0, 0xe1, 0x86, 0xa0, 0x0e,
-  0x03, 0x30, 0x98, 0x65, 0xa8, 0x0b, 0xbb, 0x08, 0xac, 0xfe, 0x86, 0xf8,
-  0xcc, 0x12, 0xdc, 0x85, 0x11, 0xf8, 0x07, 0x9f, 0x59, 0x82, 0xbb, 0x18,
-  0x68, 0x31, 0xb4, 0xb8, 0xc0, 0xe4, 0x82, 0xa8, 0x0b, 0xc1, 0x2e, 0xd8,
-  0x66, 0x2e, 0x2e, 0x18, 0xc6, 0xee, 0x6f, 0xff, 0xe2, 0x33, 0x1c, 0x61,
-  0x1b, 0xfc, 0x47, 0x7c, 0xb3, 0x0c, 0x78, 0xb1, 0x17, 0x81, 0xf5, 0xdf,
-  0x6d, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x16,
-  0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xa5, 0x18, 0xe8, 0x70, 0x43,
-  0x30, 0x8a, 0x01, 0x18, 0xcc, 0x32, 0xe4, 0x85, 0x5e, 0x04, 0x36, 0x94,
-  0x60, 0x00, 0x9f, 0x59, 0x82, 0xbf, 0x30, 0x11, 0x0c, 0x88, 0xf8, 0xcc,
-  0x12, 0xfc, 0xc5, 0x70, 0x44, 0x78, 0x8c, 0x60, 0x20, 0x7c, 0xb3, 0x0c,
-  0x7c, 0xf1, 0x17, 0x81, 0x89, 0x07, 0x09, 0x06, 0xf1, 0xb1, 0xc0, 0xa1,
-  0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2,
-  0x53, 0x04, 0x2c, 0x06, 0x3a, 0xdc, 0x10, 0xb8, 0x62, 0x00, 0x06, 0xb3,
-  0x0c, 0x7d, 0xe1, 0x17, 0x81, 0xb1, 0x60, 0x30, 0xc4, 0x67, 0x96, 0xe0,
-  0x2f, 0x8c, 0x88, 0xc1, 0x00, 0x3e, 0xb3, 0x04, 0x7f, 0x31, 0xd0, 0x62,
-  0x68, 0x79, 0x81, 0xe9, 0x05, 0xd1, 0x17, 0x82, 0x5f, 0xe0, 0xce, 0x5e,
-  0x5c, 0x30, 0xcc, 0x05, 0x46, 0xdd, 0x66, 0xd4, 0xf5, 0xdf, 0x30, 0x27,
-  0x73, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08,
-  0x82, 0x41, 0x26, 0x8e, 0x41, 0x2b, 0x06, 0x69, 0x18, 0xec, 0x62, 0x30,
-  0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09,
-  0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x60, 0xa5,
-  0x63, 0x40, 0x8b, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0x60, 0xa9, 0x63, 0x50, 0x8b, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0x60, 0xad, 0x63, 0x60, 0x8b, 0x41, 0x42, 0x04, 0x23, 0x06,
-  0x0a, 0x00, 0x82, 0x60, 0xf0, 0xa5, 0x63, 0x50, 0x8b, 0xc1, 0x1c, 0x06,
-  0x81, 0x38, 0x06, 0xa2, 0x18, 0x80, 0x63, 0x30, 0x9a, 0x10, 0x00, 0xa3,
-  0x09, 0x42, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x62, 0xb0, 0x8e,
-  0xc1, 0x2d, 0x06, 0x42, 0x70, 0x81, 0x71, 0xb3, 0x04, 0xa0, 0x31, 0xd0,
-  0x62, 0xb8, 0xc6, 0x5a, 0xd8, 0xa3, 0xa0, 0x16, 0x30, 0xd1, 0x16, 0xc2,
-  0x5f, 0xd8, 0xa3, 0xe0, 0x16, 0x66, 0x22, 0x7d, 0x18, 0xc0, 0x67, 0x96,
-  0x21, 0x34, 0x46, 0x43, 0x44, 0x86, 0x23, 0x82, 0x3e, 0x0c, 0x86, 0xef,
-  0x84, 0x61, 0x86, 0x1b, 0x82, 0x39, 0x0c, 0xc8, 0xa0, 0x86, 0x40, 0x87,
-  0x23, 0x50, 0x24, 0x14, 0x83, 0xe1, 0xab, 0x40, 0xd0, 0x53, 0x91, 0x61,
-  0x86, 0x1b, 0x02, 0x3b, 0x0c, 0xc8, 0xa0, 0x82, 0x41, 0x67, 0x19, 0x44,
-  0xe3, 0x36, 0x82, 0xd3, 0xc1, 0x60, 0x98, 0x7b, 0xbb, 0x61, 0x46, 0x0c,
-  0x0e, 0x00, 0x04, 0xc1, 0x20, 0xeb, 0xc7, 0x00, 0x1d, 0x03, 0x52, 0x0c,
-  0xec, 0x31, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18,
-  0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0xb0, 0x48, 0x32, 0x78, 0xc7, 0xe0, 0x20, 0x82, 0x11, 0x03, 0x04,
-  0x00, 0x41, 0x30, 0xb0, 0x4a, 0x32, 0x80, 0xc7, 0x80, 0x21, 0x82, 0x11,
-  0x03, 0x04, 0x00, 0x41, 0x30, 0xb0, 0x4c, 0x32, 0x88, 0xc7, 0x40, 0x22,
-  0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0xf8, 0x48, 0x32, 0x80, 0xc7,
-  0xc0, 0x15, 0x83, 0xa0, 0x1f, 0x83, 0x5e, 0x0c, 0xf6, 0x31, 0x18, 0x4d,
-  0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03,
-  0x31, 0x30, 0xc9, 0x40, 0x1e, 0x03, 0x21, 0xb8, 0xc0, 0xb8, 0x59, 0x82,
-  0xdb, 0x18, 0x6e, 0xe0, 0x91, 0x92, 0x0c, 0xc0, 0x60, 0x96, 0x81, 0x34,
-  0x4a, 0x23, 0xa8, 0x55, 0x0c, 0xe8, 0x31, 0x80, 0x0b, 0x8c, 0x1a, 0x31,
-  0x38, 0x00, 0x10, 0x04, 0x83, 0x45, 0x26, 0x83, 0x7a, 0x0c, 0xba, 0x5c,
-  0x0c, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x99, 0xc9, 0xa0, 0x1e,
-  0x83, 0x40, 0xb8, 0x60, 0x98, 0x72, 0xc5, 0x20, 0x1f, 0x03, 0xb8, 0xc0,
-  0xa8, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0x6e, 0x32, 0xd0, 0xc7,
-  0xa0, 0x4c, 0x7c, 0x31, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x05,
-  0x27, 0x03, 0x7d, 0x0c, 0x02, 0xe1, 0x82, 0x61, 0x2e, 0x30, 0xea, 0x0e,
-  0xa3, 0xee, 0x0e, 0x83, 0x61, 0x8e, 0xf5, 0x86, 0x39, 0x62, 0x98, 0x23,
-  0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x8c, 0x27, 0x83, 0x93,
-  0x0c, 0xc6, 0x31, 0xa8, 0xc9, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84,
-  0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0xc0, 0x1a, 0xcb, 0xc0, 0x25, 0x83, 0x84, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xc0, 0x22, 0xcb, 0xe0, 0x25, 0x83,
-  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xc0, 0x2a, 0xcb, 0x00,
-  0x26, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0xe0, 0x1b,
-  0xcb, 0xe0, 0x25, 0x83, 0x76, 0x0c, 0x02, 0x9e, 0x0c, 0xf8, 0x31, 0xd0,
-  0xc9, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0xc4, 0xa0, 0x2c, 0x83, 0x98, 0x0c, 0x84, 0xe0, 0x02,
-  0xe3, 0x66, 0x09, 0x6e, 0x63, 0xb8, 0x61, 0x4f, 0xc6, 0x32, 0x00, 0x83,
-  0x59, 0x06, 0xd3, 0xb8, 0x8d, 0xc0, 0xc2, 0x31, 0x18, 0xc7, 0x20, 0x3e,
-  0xc3, 0x11, 0x7b, 0x40, 0x8e, 0x01, 0xf1, 0xcd, 0x32, 0x9c, 0x86, 0x6a,
-  0x04, 0x56, 0x8e, 0x01, 0x1f, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c,
-  0x73, 0x81, 0x51, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x6d,
-  0x19, 0xe8, 0x70, 0x43, 0xb0, 0x96, 0x01, 0x18, 0xcc, 0x32, 0xa0, 0x46,
-  0x6a, 0x04, 0x36, 0xb4, 0x63, 0x00, 0x9f, 0x59, 0x02, 0xd7, 0x30, 0x76,
-  0x0c, 0x88, 0xf8, 0xcc, 0x12, 0xb8, 0xc6, 0x70, 0x84, 0x29, 0xb4, 0x63,
-  0x20, 0x7c, 0xb3, 0x0c, 0xab, 0xe1, 0x1a, 0x81, 0x9d, 0x82, 0x3b, 0x06,
-  0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x91,
-  0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0x5e, 0x06, 0x3a, 0xdc, 0x10, 0xd8,
-  0x65, 0x00, 0x06, 0xb3, 0x0c, 0xac, 0xd1, 0x1a, 0x81, 0xd9, 0x63, 0x30,
-  0xc4, 0x67, 0x96, 0xc0, 0x35, 0x8c, 0xc8, 0xc7, 0x00, 0x3e, 0xb3, 0x04,
-  0xae, 0x31, 0xd0, 0x62, 0x68, 0xa8, 0x81, 0xa5, 0x06, 0xc1, 0x1a, 0x42,
-  0x6b, 0x88, 0x85, 0x6a, 0x5c, 0x30, 0x8c, 0xe1, 0x63, 0xc0, 0x8f, 0x41,
-  0x7c, 0x86, 0x23, 0x6e, 0xa5, 0x1f, 0x03, 0xe2, 0x9b, 0x65, 0x78, 0x0d,
-  0xd9, 0x08, 0xcc, 0x1f, 0x03, 0x5c, 0x89, 0x8f, 0x05, 0x03, 0x7d, 0x2e,
-  0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22,
-  0x4c, 0x33, 0xd0, 0xe1, 0x86, 0x80, 0x34, 0x03, 0x30, 0x98, 0x65, 0x80,
-  0x8d, 0xd8, 0x08, 0x6c, 0x30, 0xc9, 0x00, 0x3e, 0xb3, 0x04, 0xb6, 0x61,
-  0x23, 0x19, 0x10, 0xf1, 0x99, 0x25, 0xb0, 0x8d, 0xe1, 0x08, 0x71, 0x21,
-  0xc9, 0x40, 0xf8, 0x66, 0x19, 0x66, 0xc3, 0x36, 0x02, 0x1b, 0x97, 0x92,
-  0x0c, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b,
-  0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0xd8, 0x0c, 0x74, 0xb8, 0x21,
-  0x78, 0xcd, 0x00, 0x0c, 0x66, 0x19, 0x68, 0xa3, 0x36, 0x02, 0x6b, 0xc9,
-  0x60, 0x88, 0xcf, 0x2c, 0x81, 0x6d, 0x18, 0x21, 0x93, 0x01, 0x7c, 0x66,
-  0x09, 0x6c, 0x63, 0xa0, 0xc5, 0xd0, 0x60, 0x03, 0x8b, 0x0d, 0x82, 0x36,
-  0x84, 0xda, 0xd0, 0x2d, 0xd9, 0xb8, 0x60, 0x98, 0x0b, 0x8c, 0xba, 0xcd,
-  0xa8, 0xf3, 0xc7, 0x60, 0x98, 0x9b, 0xc1, 0x60, 0x98, 0x23, 0x86, 0x39,
-  0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xc8, 0xc6, 0x33, 0x70,
-  0xcd, 0x40, 0x2d, 0x03, 0xde, 0x0c, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41,
-  0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0x2c, 0xf5, 0x0c, 0x6a, 0x33, 0x48, 0x88,
-  0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xac, 0xf5, 0x0c, 0x6c, 0x33,
-  0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x2c, 0xf6, 0x0c,
-  0x6e, 0x33, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x3e,
-  0xf5, 0x0c, 0x6c, 0x33, 0xa0, 0xcb, 0x20, 0x18, 0xcf, 0x60, 0x34, 0x83,
-  0xf0, 0x0c, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0x40, 0x0c, 0xd8, 0x33, 0xc0, 0xcd, 0x40, 0x08, 0x2e,
-  0x30, 0x6e, 0x96, 0xe0, 0x36, 0x06, 0x5a, 0x0c, 0xd7, 0x10, 0x0d, 0x7c,
-  0x15, 0x42, 0x03, 0x26, 0x48, 0x43, 0xb0, 0x0d, 0x7c, 0x15, 0x4a, 0xc3,
-  0x4e, 0x06, 0x34, 0x03, 0xf8, 0xcc, 0x32, 0xe0, 0x86, 0x6e, 0x8c, 0xcc,
-  0x70, 0x44, 0xe0, 0x97, 0xc1, 0xf0, 0x9d, 0x30, 0xcc, 0x70, 0x43, 0x40,
-  0x97, 0x01, 0x19, 0xd4, 0x10, 0xe8, 0x70, 0x44, 0xca, 0x88, 0x66, 0x30,
-  0x7c, 0x15, 0x08, 0x7a, 0x2b, 0x33, 0xcc, 0x70, 0x43, 0x70, 0x97, 0x01,
-  0x19, 0x54, 0x30, 0xe8, 0x2c, 0x43, 0x6e, 0xb8, 0x47, 0x70, 0x3b, 0x19,
-  0x0c, 0x73, 0x70, 0x18, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x99, 0x7f, 0x06, 0xe9, 0x19, 0x94, 0x66, 0x70, 0x9f, 0xc1, 0x68, 0x42,
-  0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43,
-  0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x55, 0xa2, 0x01,
-  0x7c, 0x06, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x65,
-  0xa2, 0x41, 0x7c, 0x06, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0x81, 0x75, 0xa2, 0x81, 0x7c, 0x06, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00,
-  0x08, 0x82, 0xc1, 0x57, 0xa2, 0x41, 0x7c, 0x06, 0xaf, 0x19, 0x04, 0xfe,
-  0x19, 0xf8, 0x66, 0xc0, 0x9f, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08,
-  0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x88, 0xc1, 0x89, 0x06, 0xf3,
-  0x19, 0x08, 0xc1, 0x05, 0xc6, 0xcd, 0x12, 0xb8, 0xc7, 0x70, 0x43, 0xcf,
-  0x98, 0x68, 0x00, 0x06, 0xb3, 0x0c, 0xbb, 0xc1, 0x1b, 0x41, 0xb1, 0x66,
-  0x50, 0x9f, 0x01, 0x5c, 0x60, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
-  0x2c, 0x33, 0x1a, 0xd8, 0x67, 0xd0, 0xe9, 0x66, 0x30, 0x62, 0x70, 0x00,
-  0x20, 0x08, 0x06, 0x0b, 0x8d, 0x06, 0xf6, 0x19, 0x04, 0xc2, 0x05, 0xc3,
-  0xd4, 0x6b, 0x06, 0xfa, 0x19, 0xc0, 0x05, 0x46, 0x8d, 0x18, 0x1c, 0x00,
-  0x08, 0x82, 0xc1, 0x82, 0xa3, 0xc1, 0x7e, 0x06, 0x66, 0xf3, 0x9b, 0xc1,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c, 0x39, 0x1a, 0xec, 0x67, 0x10,
-  0x08, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x77, 0x18, 0x75, 0x78, 0x19, 0x0c,
-  0x73, 0xad, 0x18, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70,
-  0x00, 0x20, 0x08, 0x06, 0x59, 0x8f, 0x06, 0x28, 0x1a, 0x90, 0x67, 0x60,
-  0xa3, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20,
-  0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0x81, 0x45, 0xa6, 0xc1, 0x8b, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0x81, 0x55, 0xa6, 0x01, 0x8c, 0x06, 0x09, 0x11, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0x81, 0x65, 0xa6, 0x41, 0x8c, 0x06, 0x09, 0x11,
-  0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x47, 0xa6, 0x01, 0x8c, 0x06,
-  0xee, 0x19, 0x04, 0x3d, 0x1a, 0xf4, 0x67, 0xb0, 0xa3, 0xc1, 0x68, 0x42,
-  0x00, 0x8c, 0x26, 0x08, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x88,
-  0x81, 0x99, 0x06, 0x32, 0x1a, 0x08, 0xc1, 0x05, 0xc6, 0xcd, 0x12, 0xb8,
-  0xc7, 0x70, 0x03, 0xdf, 0x90, 0x69, 0x00, 0x06, 0xb3, 0x0c, 0xbd, 0xe1,
-  0x1e, 0x81, 0x89, 0x67, 0x40, 0x9e, 0x41, 0x7c, 0x86, 0x23, 0xf6, 0xa0,
-  0x3c, 0x03, 0xe2, 0x9b, 0x65, 0xf0, 0x8d, 0xf0, 0x08, 0xcc, 0x3c, 0x03,
-  0x3e, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c,
-  0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xdc, 0x34, 0xd0, 0xe1, 0x86,
-  0x80, 0x4d, 0x03, 0x30, 0x98, 0x65, 0xf8, 0x0d, 0xf0, 0x08, 0x6c, 0x70,
-  0xcf, 0x00, 0x3e, 0xb3, 0x04, 0xe5, 0x61, 0xed, 0x19, 0x10, 0xf1, 0x99,
-  0x25, 0x28, 0x8f, 0xe1, 0x08, 0x53, 0x70, 0xcf, 0x40, 0xf8, 0x66, 0x19,
-  0xc4, 0xa3, 0x3c, 0x02, 0x3b, 0x85, 0xf7, 0x0c, 0xe2, 0x63, 0x81, 0x43,
-  0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4,
-  0xa7, 0x88, 0x3c, 0x0d, 0x74, 0xb8, 0x21, 0xb8, 0xd3, 0x00, 0x0c, 0x66,
-  0x19, 0xc6, 0x83, 0x3c, 0x02, 0xbb, 0xcf, 0x60, 0x88, 0xcf, 0x2c, 0x41,
-  0x79, 0x18, 0xa1, 0x9f, 0x01, 0x7c, 0x66, 0x09, 0xca, 0x63, 0xa0, 0xc5,
-  0xd0, 0x7e, 0x03, 0x03, 0x0f, 0x62, 0x3c, 0x04, 0xf2, 0x10, 0x8b, 0xf0,
-  0xb8, 0x60, 0x18, 0xcb, 0xcf, 0xa0, 0x3f, 0x83, 0xf8, 0x0c, 0x47, 0xe0,
-  0x8e, 0x7f, 0x06, 0xc4, 0x37, 0xcb, 0x60, 0x1e, 0xe9, 0x11, 0xd8, 0x7f,
-  0x06, 0xb9, 0x13, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46,
-  0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x9c, 0x6a, 0xa0, 0xc3,
-  0x0d, 0x41, 0xa9, 0x06, 0x60, 0x30, 0xcb, 0x70, 0x1e, 0xe8, 0x11, 0xd8,
-  0x70, 0xa2, 0x01, 0x7c, 0x66, 0x09, 0xda, 0xc3, 0x48, 0x34, 0x20, 0xe2,
-  0x33, 0x4b, 0xd0, 0x1e, 0xc3, 0x11, 0xe3, 0x53, 0xa2, 0x81, 0xf0, 0xcd,
-  0x32, 0xa8, 0x47, 0x7b, 0x04, 0x46, 0x3e, 0x26, 0x1a, 0xc4, 0xc7, 0x02,
-  0x87, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x16, 0x44, 0xf2, 0xb1, 0x22,
-  0x88, 0x4f, 0x11, 0xb2, 0x1a, 0xe8, 0x70, 0x43, 0x00, 0xab, 0x01, 0x18,
-  0xcc, 0x32, 0xac, 0x07, 0x7b, 0x04, 0xe6, 0xa2, 0xc1, 0x10, 0x9f, 0x59,
-  0x82, 0xf6, 0x30, 0x62, 0x46, 0x03, 0xf8, 0xcc, 0x12, 0xb4, 0xc7, 0x40,
-  0x8b, 0xa1, 0x9d, 0x07, 0x86, 0x1e, 0xc4, 0x7a, 0x08, 0xec, 0xc1, 0x7b,
-  0xe9, 0x71, 0xc1, 0x30, 0x17, 0x18, 0x75, 0x9b, 0x51, 0xf7, 0x9f, 0xc1,
-  0x30, 0x47, 0x93, 0xc1, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x90, 0x91, 0x6b, 0xf0, 0xaa, 0xc1, 0x9a, 0x06,
-  0xbd, 0x1a, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c,
-  0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x58, 0xeb, 0x1a, 0xd8, 0x6a, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x58, 0xec, 0x1a, 0xdc, 0x6a, 0x90, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x58, 0xed, 0x1a, 0xe0, 0x6a, 0x90, 0x10,
-  0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x7c, 0xeb, 0x1a, 0xdc, 0x6a,
-  0x50, 0xa7, 0x41, 0x40, 0xae, 0x01, 0xa9, 0x06, 0xe2, 0x1a, 0x8c, 0x26,
-  0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81,
-  0x18, 0xb4, 0x6b, 0x90, 0xab, 0x81, 0x10, 0x5c, 0x60, 0xdc, 0x2c, 0x81,
-  0x7b, 0x0c, 0xb4, 0x18, 0xae, 0x91, 0x1b, 0xfa, 0x2c, 0xe0, 0x06, 0x4c,
-  0xec, 0x86, 0xd0, 0x1e, 0xfa, 0x2c, 0xf0, 0xc6, 0x2c, 0xc3, 0x7b, 0xc4,
-  0xc7, 0x08, 0x0d, 0x47, 0x9c, 0x90, 0x9f, 0x06, 0xc3, 0x77, 0x28, 0x34,
-  0xcc, 0x70, 0x43, 0x40, 0xa7, 0x01, 0x19, 0xd4, 0x10, 0xe8, 0x70, 0x84,
-  0x0a, 0x89, 0x6a, 0x30, 0x7c, 0x15, 0x08, 0x7a, 0x2c, 0x34, 0xcc, 0x70,
-  0x43, 0x70, 0xa7, 0x01, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0x03, 0x7c, 0x94,
-  0x48, 0x70, 0x3b, 0x1a, 0x0c, 0x73, 0x70, 0x19, 0x0c, 0x33, 0x62, 0x70,
-  0x00, 0x20, 0x08, 0x06, 0x99, 0xbf, 0x06, 0xe9, 0x1a, 0x94, 0x6a, 0x70,
-  0xaf, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20,
-  0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0x81, 0x55, 0xb2, 0x01, 0xbc, 0x06, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0x81, 0x65, 0xb2, 0x41, 0xbc, 0x06, 0x0c, 0x11, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0x81, 0x75, 0xb2, 0x81, 0xbc, 0x06, 0x12, 0x11,
-  0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x57, 0xb2, 0x41, 0xbc, 0x06,
-  0xaf, 0x1a, 0x04, 0xfe, 0x1a, 0xf8, 0x6a, 0xc0, 0xaf, 0xc1, 0x68, 0x42,
-  0x00, 0x8c, 0x26, 0x08, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x88,
-  0xc1, 0xc9, 0x06, 0xf3, 0x1a, 0x08, 0xc1, 0x05, 0xc6, 0xcd, 0x12, 0x94,
-  0xc8, 0x70, 0x43, 0x0f, 0x99, 0x6c, 0x00, 0x06, 0xb3, 0x0c, 0xf2, 0x31,
-  0x1f, 0x41, 0xb1, 0x6a, 0x50, 0xaf, 0x01, 0x5c, 0x60, 0xd4, 0x88, 0xc1,
-  0x01, 0x80, 0x20, 0x18, 0x2c, 0x33, 0x1b, 0xd8, 0x6b, 0x30, 0x46, 0xba,
-  0x1a, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x42, 0xb3, 0x81, 0xbd,
-  0x06, 0x81, 0x70, 0xc1, 0x30, 0xf5, 0xaa, 0x81, 0xbe, 0x06, 0x70, 0x81,
-  0x51, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xe0, 0x6c, 0xb0, 0xaf,
-  0xc1, 0x19, 0xfd, 0x6a, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x4b,
-  0xce, 0x06, 0xfb, 0x1a, 0x04, 0xc2, 0x05, 0xc3, 0x5c, 0x60, 0xd4, 0x1d,
-  0x46, 0x1d, 0x9e, 0x06, 0xc3, 0x5c, 0x6b, 0x06, 0xc3, 0x1c, 0x31, 0xcc,
-  0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0xd6, 0xb3, 0x01,
-  0xca, 0x06, 0xe4, 0x1a, 0xd8, 0x6c, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09,
-  0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0x60, 0x91, 0x6d, 0xf0, 0xb2, 0x41, 0x42,
-  0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x60, 0x95, 0x6d, 0x00, 0xb3,
-  0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x60, 0x99, 0x6d,
-  0x10, 0xb3, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xf0,
-  0x91, 0x6d, 0x00, 0xb3, 0x81, 0xbb, 0x06, 0x41, 0xcf, 0x06, 0xfd, 0x1a,
-  0xec, 0x6c, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x62, 0x70,
-  0x00, 0x20, 0x08, 0x06, 0x62, 0x60, 0xb6, 0x81, 0xcc, 0x06, 0x42, 0x70,
-  0x81, 0x71, 0xb3, 0x04, 0x25, 0x32, 0xdc, 0xc0, 0x47, 0x64, 0x1b, 0x80,
-  0xc1, 0x2c, 0x03, 0x7d, 0x94, 0x48, 0x60, 0xe2, 0x1a, 0x90, 0x6b, 0x10,
-  0x9f, 0xe1, 0x88, 0x50, 0x2a, 0xd7, 0x80, 0xf8, 0x66, 0x19, 0xea, 0x03,
-  0x3f, 0x02, 0x33, 0xd7, 0x40, 0x94, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b,
-  0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08,
-  0xb7, 0x0d, 0x74, 0xb8, 0x21, 0x60, 0xdb, 0x00, 0x0c, 0x66, 0x19, 0xec,
-  0xe3, 0x3e, 0x02, 0x1b, 0xdc, 0x35, 0x80, 0xcf, 0x2c, 0x01, 0x7f, 0x58,
-  0xbb, 0x06, 0x44, 0x7c, 0x66, 0x09, 0xf8, 0x63, 0x38, 0x82, 0x95, 0xdc,
-  0x35, 0x10, 0xbe, 0x59, 0x86, 0xfc, 0xe0, 0x8f, 0xc0, 0x5a, 0xe9, 0x5d,
-  0x83, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x82,
-  0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x22, 0x6f, 0x03, 0x1d, 0x6e, 0x08,
-  0xee, 0x36, 0x00, 0x83, 0x59, 0x06, 0xfd, 0xd8, 0x8f, 0xc0, 0xee, 0x35,
-  0x18, 0xe2, 0x33, 0x4b, 0xc0, 0x1f, 0x46, 0xe8, 0x6b, 0x00, 0x9f, 0x59,
-  0x02, 0xfe, 0x18, 0x68, 0x31, 0x34, 0xfb, 0xc0, 0xee, 0x83, 0xd0, 0x0f,
-  0x61, 0x3f, 0xdc, 0x32, 0xc0, 0x8f, 0x0b, 0x86, 0xb1, 0x7c, 0x0d, 0xfa,
-  0x35, 0x88, 0xcf, 0x70, 0x44, 0x2e, 0xf9, 0x6b, 0x40, 0x7c, 0xb3, 0x0c,
-  0xfd, 0x01, 0x22, 0x81, 0xfd, 0x6b, 0xa0, 0x4b, 0xf1, 0xb1, 0x60, 0xa0,
-  0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2,
-  0x53, 0xc4, 0xe9, 0x06, 0x3a, 0xdc, 0x10, 0x94, 0x6e, 0x00, 0x06, 0xb3,
-  0x0c, 0xfe, 0xf1, 0x1f, 0x81, 0x0d, 0x27, 0x1b, 0xc0, 0x67, 0x96, 0x80,
-  0x44, 0x8c, 0x64, 0x03, 0x22, 0x3e, 0xb3, 0x04, 0x24, 0x32, 0x1c, 0x41,
-  0x4e, 0x25, 0x1b, 0x08, 0xdf, 0x2c, 0x43, 0x88, 0x90, 0x48, 0x60, 0xe5,
-  0x64, 0xb2, 0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18,
-  0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x21, 0xbb, 0x81, 0x0e,
-  0x37, 0x04, 0xb0, 0x1b, 0x80, 0xc1, 0x2c, 0x83, 0x88, 0x8c, 0x48, 0x60,
-  0x2e, 0x1b, 0x0c, 0xf1, 0x99, 0x25, 0x20, 0x11, 0x23, 0x66, 0x36, 0x80,
-  0xcf, 0x2c, 0x01, 0x89, 0x0c, 0xb4, 0x18, 0x9a, 0x7f, 0x60, 0xff, 0x41,
-  0x88, 0x88, 0x30, 0x22, 0xbe, 0x19, 0x80, 0xc8, 0x05, 0xc3, 0x5c, 0x60,
-  0xd4, 0x6d, 0x46, 0xdd, 0xbf, 0x06, 0xc3, 0x1c, 0x8d, 0x06, 0xc3, 0x1c,
-  0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0x46,
-  0xbe, 0xc1, 0xeb, 0x06, 0x6b, 0x1b, 0xf4, 0x6e, 0x30, 0x9a, 0x10, 0x00,
-  0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44,
-  0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x60, 0xad, 0x6f, 0x60, 0xbb,
-  0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x60, 0xb1, 0x6f,
-  0x70, 0xbb, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x60,
-  0xb5, 0x6f, 0x80, 0xbb, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82,
-  0x60, 0xf0, 0xad, 0x6f, 0x70, 0xbb, 0x41, 0xdd, 0x06, 0x01, 0xf9, 0x06,
-  0xa4, 0x1b, 0x88, 0x6f, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x62, 0xd0, 0xbe, 0x41, 0xee, 0x06,
-  0x42, 0x70, 0x81, 0x71, 0xb3, 0x04, 0x25, 0x32, 0xd0, 0x62, 0xb8, 0x06,
-  0x7c, 0xe8, 0xbb, 0xf0, 0x1e, 0x30, 0x21, 0x1f, 0x02, 0x89, 0xe8, 0xbb,
-  0x30, 0x1f, 0xb3, 0x0c, 0x26, 0x82, 0x22, 0x23, 0x35, 0x1c, 0x81, 0x52,
-  0x7e, 0x1b, 0x0c, 0xdf, 0xa5, 0xd4, 0x30, 0xc3, 0x0d, 0x01, 0xdd, 0x06,
-  0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0x2a, 0x25, 0xba, 0xc1, 0xf0, 0x55,
-  0x20, 0xe8, 0xb1, 0xd4, 0x30, 0xc3, 0x0d, 0xc1, 0xdd, 0x06, 0x64, 0x50,
-  0xc1, 0xa0, 0xb3, 0x0c, 0x27, 0xc2, 0x23, 0xc1, 0xed, 0x6c, 0x30, 0xcc,
-  0xc1, 0x69, 0x30, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x64, 0xfe,
-  0x1b, 0xa4, 0x6f, 0x50, 0xba, 0xc1, 0xfd, 0x06, 0xa3, 0x09, 0x01, 0x30,
-  0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x1c,
-  0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x56, 0x09, 0x07, 0xf0, 0x1b,
-  0x1c, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x96, 0x09, 0x07,
-  0xf1, 0x1b, 0x30, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd6,
-  0x09, 0x07, 0xf2, 0x1b, 0x48, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08,
-  0x06, 0x5f, 0x09, 0x07, 0xf1, 0x1b, 0xbc, 0x6e, 0x10, 0xf8, 0x6f, 0xe0,
-  0xbb, 0x01, 0xff, 0x06, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0x20, 0x06, 0x27, 0x1c, 0xcc, 0x6f, 0x20,
-  0x04, 0x17, 0x18, 0x37, 0x4b, 0xc0, 0x23, 0xc3, 0x0d, 0x3d, 0x65, 0xc2,
-  0x01, 0x18, 0xcc, 0x32, 0xa4, 0x88, 0x8a, 0x04, 0xc5, 0xba, 0x41, 0xfd,
-  0x06, 0x70, 0x81, 0x51, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xcc,
-  0x70, 0x60, 0xbf, 0x01, 0x59, 0xe9, 0x6e, 0x30, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0x0b, 0x0d, 0x07, 0xf6, 0x1b, 0x04, 0xc2, 0x05, 0xc3, 0xd4,
-  0xeb, 0x06, 0xfa, 0x1b, 0xc0, 0x05, 0x46, 0x8d, 0x18, 0x1c, 0x00, 0x08,
-  0x82, 0xc1, 0x82, 0xc3, 0xc1, 0xfe, 0x06, 0x67, 0xf5, 0xbb, 0xc1, 0x88,
-  0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c, 0x39, 0x1c, 0xec, 0x6f, 0x10, 0x08,
-  0x17, 0x0c, 0x73, 0x81, 0x51, 0x77, 0x18, 0x75, 0x78, 0x1b, 0x0c, 0x73,
-  0xad, 0x1a, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00,
-  0x20, 0x08, 0x06, 0x59, 0x0f, 0x07, 0x28, 0x1c, 0x90, 0x6f, 0x60, 0xc3,
-  0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c,
-  0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81,
-  0x45, 0xc6, 0xc1, 0x0b, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0x81, 0x55, 0xc6, 0x01, 0x0c, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0x81, 0x65, 0xc6, 0x41, 0x0c, 0x07, 0x09, 0x11, 0x8c,
-  0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x47, 0xc6, 0x01, 0x0c, 0x07, 0xee,
-  0x1b, 0x04, 0x3d, 0x1c, 0xf4, 0x6f, 0xb0, 0xc3, 0xc1, 0x68, 0x42, 0x00,
-  0x8c, 0x26, 0x08, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x88, 0x81,
-  0x19, 0x07, 0x32, 0x1c, 0x08, 0xc1, 0x05, 0xc6, 0xcd, 0x12, 0xf0, 0xc8,
-  0x70, 0x03, 0x5f, 0x91, 0x71, 0x00, 0x06, 0xb3, 0x0c, 0x2b, 0xc2, 0x23,
-  0x81, 0x89, 0x6f, 0x40, 0xbe, 0x41, 0x7c, 0x86, 0x23, 0x44, 0xab, 0x7c,
-  0x03, 0xe2, 0x9b, 0x65, 0x60, 0x91, 0x17, 0x09, 0xcc, 0x7c, 0x83, 0xd1,
-  0x8a, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x30,
-  0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xdc, 0x38, 0xd0, 0xe1, 0x86, 0x80,
-  0x8d, 0x03, 0x30, 0x98, 0x65, 0x68, 0x11, 0x17, 0x09, 0x6c, 0x70, 0xdf,
-  0x00, 0x3e, 0xb3, 0x04, 0x33, 0x62, 0xed, 0x1b, 0x10, 0xf1, 0x99, 0x25,
-  0x98, 0x91, 0xe1, 0x88, 0xd6, 0x72, 0xdf, 0x40, 0xf8, 0x66, 0x19, 0x60,
-  0x64, 0x46, 0x02, 0x73, 0xad, 0xf7, 0x0d, 0xe2, 0x63, 0x81, 0x43, 0x9f,
-  0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7,
-  0x88, 0x3c, 0x0e, 0x74, 0xb8, 0x21, 0xb8, 0xe3, 0x00, 0x0c, 0x66, 0x19,
-  0x62, 0x44, 0x46, 0x02, 0xbb, 0xdf, 0x60, 0x88, 0xcf, 0x2c, 0xc1, 0x8c,
-  0x18, 0xa1, 0xbf, 0x01, 0x7c, 0x66, 0x09, 0x66, 0x64, 0xa0, 0xc5, 0xd0,
-  0x5a, 0x04, 0x73, 0x11, 0x22, 0x46, 0x04, 0x19, 0x81, 0xdb, 0xe0, 0x45,
-  0x2e, 0x18, 0xc6, 0xf2, 0x37, 0xe8, 0xdf, 0x20, 0x3e, 0xc3, 0x11, 0xb9,
-  0xe5, 0xbf, 0x01, 0xf1, 0xcd, 0x32, 0xd0, 0xc8, 0x8d, 0x04, 0xf6, 0xbf,
-  0x81, 0x6e, 0xc5, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51,
-  0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xa7, 0x1c, 0xe8, 0x70,
-  0x43, 0x50, 0xca, 0x01, 0x18, 0xcc, 0x32, 0xd4, 0x88, 0x8d, 0x04, 0x36,
-  0x9c, 0x70, 0x00, 0x9f, 0x59, 0x82, 0x1d, 0x31, 0x12, 0x0e, 0x88, 0xf8,
-  0xcc, 0x12, 0xec, 0xc8, 0x70, 0x04, 0x79, 0x95, 0x70, 0x20, 0x7c, 0xb3,
-  0x0c, 0x38, 0xb2, 0x23, 0x81, 0x95, 0x97, 0x09, 0x07, 0xf1, 0xb1, 0xc0,
-  0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08,
-  0xe2, 0x53, 0x84, 0x2c, 0x07, 0x3a, 0xdc, 0x10, 0xc0, 0x72, 0x00, 0x06,
-  0xb3, 0x0c, 0x39, 0xa2, 0x23, 0x81, 0xb9, 0x70, 0x30, 0xc4, 0x67, 0x96,
-  0x60, 0x47, 0x8c, 0x98, 0xe1, 0x00, 0x3e, 0xb3, 0x04, 0x3b, 0x32, 0xd0,
-  0x62, 0x68, 0x35, 0x82, 0xd9, 0x08, 0x91, 0x23, 0x82, 0x8e, 0xf8, 0x6e,
-  0x70, 0x23, 0x17, 0x0c, 0x73, 0x81, 0x51, 0xb7, 0x19, 0x75, 0xff, 0x1b,
-  0x0c, 0x73, 0x34, 0x1b, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x19, 0x39, 0x07, 0xaf, 0x1c, 0xac, 0x71,
-  0xd0, 0xcb, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2,
-  0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0x81, 0xb5, 0xce, 0x81, 0x2d, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0x81, 0xc5, 0xce, 0xc1, 0x2d, 0x07, 0x09, 0x11, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0xd5, 0xce, 0x01, 0x2e, 0x07, 0x09,
-  0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0xb7, 0xce, 0xc1, 0x2d,
-  0x07, 0x75, 0x1c, 0x04, 0xe4, 0x1c, 0x90, 0x72, 0x20, 0xce, 0xc1, 0x68,
-  0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
-  0x88, 0x41, 0x3b, 0x07, 0xb9, 0x1c, 0x08, 0xc1, 0x05, 0xc6, 0xcd, 0x12,
-  0xf0, 0xc8, 0x40, 0x8b, 0xe1, 0x1a, 0x27, 0xa2, 0x8f, 0x83, 0x89, 0xc0,
-  0x44, 0x8a, 0x08, 0x3b, 0xa2, 0x8f, 0x83, 0x8a, 0xcc, 0x32, 0xf4, 0xc8,
-  0x8f, 0x8c, 0xd8, 0x70, 0x84, 0x08, 0xf9, 0x71, 0x30, 0x7c, 0x37, 0x42,
-  0xc3, 0x0c, 0x37, 0x04, 0x74, 0x1c, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47,
-  0xa8, 0x98, 0x28, 0x07, 0xc3, 0x57, 0x81, 0xa0, 0xc7, 0x62, 0xc3, 0x0c,
-  0x37, 0x04, 0x77, 0x1c, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xf8, 0xc8,
-  0x9c, 0x04, 0xb7, 0xc3, 0xc1, 0x30, 0x07, 0xb7, 0xc1, 0x30, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x90, 0xf9, 0x73, 0x90, 0xce, 0x41, 0x29, 0x07,
-  0xf7, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c,
-  0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x58, 0x25, 0x1d, 0xc0, 0x73, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x58, 0x26, 0x1d, 0xc4, 0x73, 0xc0, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x58, 0x27, 0x1d, 0xc8, 0x73, 0x20, 0x11,
-  0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x7c, 0x25, 0x1d, 0xc4, 0x73,
-  0xf0, 0xca, 0x41, 0xe0, 0xcf, 0x81, 0x2f, 0x07, 0xfc, 0x1c, 0x8c, 0x26,
-  0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81,
-  0x18, 0x9c, 0x74, 0x30, 0xcf, 0x81, 0x10, 0x5c, 0x60, 0xdc, 0x2c, 0xc1,
-  0x9c, 0x0c, 0x37, 0xf4, 0x98, 0x49, 0x07, 0x60, 0x30, 0xcb, 0x00, 0x26,
-  0x61, 0x12, 0x14, 0x2b, 0x07, 0xf5, 0x1c, 0xc0, 0x05, 0x46, 0x8d, 0x18,
-  0x1c, 0x00, 0x08, 0x82, 0xc1, 0x32, 0xd3, 0x81, 0x3d, 0x07, 0x3e, 0xa4,
-  0xcb, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c, 0x34, 0x1d, 0xd8,
-  0x73, 0x10, 0x08, 0x17, 0x0c, 0x53, 0xaf, 0x1c, 0xe8, 0x73, 0x00, 0x17,
-  0x18, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x0b, 0x4e, 0x07, 0xfb,
-  0x1c, 0x9c, 0xd9, 0x2f, 0x07, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0,
-  0xe4, 0x74, 0xb0, 0xcf, 0x41, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0xdd,
-  0x61, 0xd4, 0xe1, 0x71, 0x30, 0xcc, 0xb5, 0x6e, 0x30, 0xcc, 0x11, 0xc3,
-  0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x64, 0x3d, 0x1d,
-  0xa0, 0x74, 0x40, 0xce, 0x81, 0x4d, 0x07, 0xa3, 0x09, 0x01, 0x30, 0x9a,
-  0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x16, 0x59, 0x07, 0x2f, 0x1d, 0x24,
-  0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x56, 0x59, 0x07, 0x30,
-  0x1d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x96, 0x59,
-  0x07, 0x31, 0x1d, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06,
-  0x1f, 0x59, 0x07, 0x30, 0x1d, 0xb8, 0x73, 0x10, 0xf4, 0x74, 0xd0, 0xcf,
-  0xc1, 0x4e, 0x07, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x20, 0x06, 0x66, 0x1d, 0xc8, 0x74, 0x20, 0x04,
-  0x17, 0x18, 0x37, 0x4b, 0x30, 0x27, 0xc3, 0x0d, 0x7c, 0x46, 0xd6, 0x01,
-  0x18, 0xcc, 0x32, 0x88, 0xc9, 0x9c, 0x04, 0x26, 0xce, 0x01, 0x39, 0x07,
-  0xf1, 0x19, 0x8e, 0xe0, 0xa3, 0x72, 0x0e, 0x88, 0x6f, 0x96, 0x61, 0x4c,
-  0xcc, 0x24, 0x30, 0x73, 0x0e, 0xfa, 0x28, 0x3e, 0x16, 0x0c, 0xf4, 0xb9,
-  0x60, 0x98, 0x0b, 0x8c, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a,
-  0x70, 0xeb, 0x40, 0x87, 0x1b, 0x02, 0xb6, 0x0e, 0xc0, 0x60, 0x96, 0x81,
-  0x4c, 0xca, 0x24, 0xb0, 0xc1, 0x9d, 0x03, 0xf8, 0xcc, 0x12, 0xa8, 0x89,
-  0xb5, 0x73, 0x40, 0xc4, 0x67, 0x96, 0x40, 0x4d, 0x86, 0x23, 0x4e, 0xc9,
-  0x9d, 0x03, 0xe1, 0x9b, 0x65, 0x38, 0x13, 0x35, 0x09, 0x0c, 0x95, 0xde,
-  0x39, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c,
-  0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xf2, 0x3a, 0xd0, 0xe1, 0x86,
-  0xe0, 0xae, 0x03, 0x30, 0x98, 0x65, 0x40, 0x93, 0x34, 0x09, 0xec, 0x9e,
-  0x83, 0x21, 0x3e, 0xb3, 0x04, 0x6a, 0x62, 0x84, 0x3e, 0x07, 0xf0, 0x99,
-  0x25, 0x50, 0x93, 0x81, 0x16, 0x43, 0x23, 0x13, 0xac, 0x4c, 0x08, 0x34,
-  0x11, 0xd2, 0x84, 0x2c, 0x03, 0x33, 0xb9, 0x60, 0x18, 0xcb, 0xe7, 0xa0,
-  0x9f, 0x83, 0xf8, 0x0c, 0x47, 0xe4, 0x9a, 0x3f, 0x07, 0xc4, 0x37, 0xcb,
-  0xb0, 0x26, 0x6e, 0x12, 0xd8, 0x3f, 0x07, 0xba, 0x16, 0x1f, 0x0b, 0x06,
-  0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20,
-  0x3e, 0x45, 0x9c, 0x76, 0xa0, 0xc3, 0x0d, 0x41, 0x69, 0x07, 0x60, 0x30,
-  0xcb, 0xc0, 0x26, 0x6d, 0x12, 0xd8, 0x70, 0xd2, 0x01, 0x7c, 0x66, 0x09,
-  0xe4, 0xc4, 0x48, 0x3a, 0x20, 0xe2, 0x33, 0x4b, 0x20, 0x27, 0xc3, 0x11,
-  0xe4, 0x56, 0xd2, 0x81, 0xf0, 0xcd, 0x32, 0xbc, 0x89, 0x9c, 0x04, 0x56,
-  0x6e, 0x26, 0x1d, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0x81,
-  0x51, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xb2, 0x1d, 0xe8,
-  0x70, 0x43, 0x00, 0xdb, 0x01, 0x18, 0xcc, 0x32, 0xc0, 0x49, 0x9c, 0x04,
-  0xe6, 0xd2, 0xc1, 0x10, 0x9f, 0x59, 0x02, 0x39, 0x31, 0x62, 0xa6, 0x03,
-  0xf8, 0xcc, 0x12, 0xc8, 0xc9, 0x40, 0x8b, 0xa1, 0xb1, 0x09, 0xd6, 0x26,
-  0x04, 0x9c, 0x08, 0x71, 0xe2, 0xdb, 0x81, 0x9b, 0x5c, 0x30, 0xcc, 0x05,
-  0x46, 0xdd, 0x66, 0xd4, 0xfd, 0x73, 0x30, 0xcc, 0xd1, 0x70, 0x30, 0xcc,
-  0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x64,
-  0xe4, 0x1d, 0xbc, 0x76, 0xb0, 0xd6, 0x41, 0x6f, 0x07, 0xa3, 0x09, 0x01,
-  0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45,
-  0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd6, 0x7a, 0x07, 0xb6,
-  0x1d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x16, 0x7b,
-  0x07, 0xb7, 0x1d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x56, 0x7b, 0x07, 0xb8, 0x1d, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20,
-  0x08, 0x06, 0xdf, 0x7a, 0x07, 0xb7, 0x1d, 0xd4, 0x75, 0x10, 0x90, 0x77,
-  0x40, 0xda, 0x81, 0x78, 0x07, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04,
-  0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x20, 0x06, 0xed, 0x1d, 0xe4, 0x76,
-  0x20, 0x04, 0x17, 0x18, 0x37, 0x4b, 0x30, 0x27, 0x03, 0x2d, 0x86, 0x6b,
-  0xf8, 0x88, 0xbe, 0x0e, 0x3d, 0x02, 0x13, 0x60, 0x22, 0xc8, 0x89, 0xbe,
-  0x0e, 0x61, 0x32, 0xcb, 0x40, 0x27, 0x76, 0x32, 0x72, 0xc3, 0x11, 0x21,
-  0xe4, 0xd7, 0xc1, 0xf0, 0x9d, 0x08, 0x0d, 0x33, 0xdc, 0x10, 0xd0, 0x75,
-  0x40, 0x06, 0x35, 0x04, 0x3a, 0x1c, 0xa1, 0x72, 0xa2, 0x1d, 0x0c, 0x5f,
-  0x05, 0x82, 0x1e, 0xcb, 0x0d, 0x33, 0xdc, 0x10, 0xdc, 0x75, 0x40, 0x06,
-  0x15, 0x0c, 0x3a, 0xcb, 0x50, 0x27, 0xaa, 0x12, 0xdc, 0x4e, 0x07, 0xc3,
-  0x1c, 0x1c, 0x07, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0xe6,
-  0xdf, 0x41, 0x7a, 0x07, 0xa5, 0x1d, 0xdc, 0x77, 0x30, 0x9a, 0x10, 0x00,
-  0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0xc4,
-  0x21, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x60, 0x95, 0x78, 0x00, 0xdf,
-  0xc1, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x60, 0x99, 0x78,
-  0x10, 0xdf, 0x01, 0x43, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x60,
-  0x9d, 0x78, 0x20, 0xdf, 0x81, 0x44, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82,
-  0x60, 0xf0, 0x95, 0x78, 0x10, 0xdf, 0xc1, 0x6b, 0x07, 0x81, 0x7f, 0x07,
-  0xbe, 0x1d, 0xf0, 0x77, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x62, 0x70, 0xe2, 0xc1, 0x7c, 0x07,
-  0x42, 0x70, 0x81, 0x71, 0xb3, 0x04, 0xaa, 0x32, 0xdc, 0xd0, 0x73, 0x26,
-  0x1e, 0x80, 0xc1, 0x2c, 0xc3, 0x9d, 0xe0, 0x49, 0x50, 0xac, 0x1d, 0xd4,
-  0x77, 0x00, 0x17, 0x18, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xcb,
-  0x8c, 0x07, 0xf6, 0x1d, 0xf4, 0x90, 0x6e, 0x07, 0x23, 0x06, 0x07, 0x00,
-  0x82, 0x60, 0xb0, 0xd0, 0x78, 0x60, 0xdf, 0x41, 0x20, 0x5c, 0x30, 0x4c,
-  0xbd, 0x76, 0xa0, 0xdf, 0x01, 0x5c, 0x60, 0xd4, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x2c, 0x38, 0x1e, 0xec, 0x77, 0x70, 0x76, 0xbf, 0x1d, 0x8c,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x92, 0xe3, 0xc1, 0x7e, 0x07, 0x81,
-  0x70, 0xc1, 0x30, 0x17, 0x18, 0x75, 0x87, 0x51, 0x87, 0xd7, 0xc1, 0x30,
-  0xd7, 0xca, 0xc1, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07,
-  0x00, 0x82, 0x60, 0x90, 0xf5, 0x78, 0x80, 0xe2, 0x01, 0x79, 0x07, 0x36,
-  0x1e, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2,
-  0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x58, 0x64, 0x1e, 0xbc, 0x78, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x58, 0x65, 0x1e, 0xc0, 0x78, 0x90, 0x10, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x58, 0x66, 0x1e, 0xc4, 0x78, 0x90, 0x10, 0xc1,
-  0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x7c, 0x64, 0x1e, 0xc0, 0x78, 0xe0,
-  0xde, 0x41, 0xd0, 0xe3, 0x41, 0x7f, 0x07, 0x3b, 0x1e, 0x8c, 0x26, 0x04,
-  0xc0, 0x68, 0x82, 0x10, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x18,
-  0x98, 0x79, 0x20, 0xe3, 0x81, 0x10, 0x5c, 0x60, 0xdc, 0x2c, 0x81, 0xaa,
-  0x0c, 0x37, 0xf0, 0x1d, 0x99, 0x07, 0x60, 0x30, 0xcb, 0x90, 0x27, 0xaa,
-  0x12, 0x98, 0x78, 0x07, 0xe4, 0x1d, 0xc4, 0x67, 0x38, 0x62, 0x8f, 0xca,
-  0x3b, 0x20, 0xbe, 0x59, 0x06, 0x3d, 0xe9, 0x93, 0xc0, 0xcc, 0x3b, 0xe0,
-  0xa3, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x02,
-  0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xc2, 0xcd, 0x03, 0x1d, 0x6e, 0x08,
-  0xd8, 0x3c, 0x00, 0x83, 0x59, 0x86, 0x3d, 0xe1, 0x93, 0xc0, 0x06, 0xf7,
-  0x0e, 0xe0, 0x33, 0x4b, 0x10, 0x2a, 0xd6, 0xde, 0x01, 0x11, 0x9f, 0x59,
-  0x82, 0x50, 0x19, 0x8e, 0x30, 0x25, 0xf7, 0x0e, 0x84, 0x6f, 0x96, 0xc1,
-  0x4f, 0x42, 0x25, 0xb0, 0x53, 0x7a, 0xef, 0x20, 0x3e, 0x16, 0x38, 0xf4,
-  0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c,
-  0x8a, 0xc8, 0xf3, 0x40, 0x87, 0x1b, 0x82, 0x3b, 0x0f, 0xc0, 0x60, 0x96,
-  0xe1, 0x4f, 0x40, 0x25, 0xb0, 0xfb, 0x0e, 0x86, 0xf8, 0xcc, 0x12, 0x84,
-  0x8a, 0x11, 0xfa, 0x1d, 0xc0, 0x67, 0x96, 0x20, 0x54, 0x06, 0x5a, 0x0c,
-  0x6d, 0x4f, 0x30, 0x3e, 0x21, 0xfe, 0x44, 0x00, 0x15, 0xb1, 0x0c, 0xfa,
-  0xe4, 0x82, 0x61, 0x2c, 0xbf, 0x83, 0xfe, 0x0e, 0xe2, 0x33, 0x1c, 0x91,
-  0x7b, 0xfe, 0x1d, 0x10, 0xdf, 0x2c, 0x83, 0xa8, 0x94, 0x4a, 0x60, 0xff,
-  0x1d, 0xe8, 0x5e, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18,
-  0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x71, 0xea, 0x81, 0x0e,
-  0x37, 0x04, 0xa5, 0x1e, 0x80, 0xc1, 0x2c, 0xc3, 0xa8, 0x90, 0x4a, 0x60,
-  0xc3, 0x89, 0x07, 0xf0, 0x99, 0x25, 0x48, 0x15, 0x23, 0xf1, 0x80, 0x88,
-  0xcf, 0x2c, 0x41, 0xaa, 0x0c, 0x47, 0x90, 0x5f, 0x89, 0x07, 0xc2, 0x37,
-  0xcb, 0x60, 0x2a, 0xa9, 0x12, 0x58, 0xf9, 0x99, 0x78, 0x10, 0x1f, 0x0b,
-  0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0x59, 0x10, 0xc9, 0xc7, 0x8a,
-  0x20, 0x3e, 0x45, 0xc8, 0x7a, 0xa0, 0xc3, 0x0d, 0x01, 0xac, 0x07, 0x60,
-  0x30, 0xcb, 0x70, 0x2a, 0xa8, 0x12, 0x98, 0x8b, 0x07, 0x43, 0x7c, 0x66,
-  0x09, 0x52, 0xc5, 0x88, 0x19, 0x0f, 0xe0, 0x33, 0x4b, 0x90, 0x2a, 0x03,
-  0x2d, 0x86, 0x36, 0x2a, 0x18, 0xa9, 0x10, 0xa7, 0x22, 0xa0, 0x8a, 0xef,
-  0x07, 0xa5, 0x72, 0xc1, 0x30, 0x17, 0x18, 0x75, 0x9b, 0x51, 0xf7, 0xdf,
-  0xc1, 0x30, 0x47, 0xd3, 0xc1, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0x90, 0x91, 0x7b, 0xf0, 0xea, 0xc1, 0x9a,
-  0x07, 0xbd, 0x1e, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26,
-  0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x58, 0xeb, 0x1e, 0xd8, 0x7a, 0x90, 0x10, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x58, 0xec, 0x1e, 0xdc, 0x7a, 0x90, 0x10, 0xc1,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x58, 0xed, 0x1e, 0xe0, 0x7a, 0x90,
-  0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x7c, 0xeb, 0x1e, 0xdc,
-  0x7a, 0x50, 0xe7, 0x41, 0x40, 0xee, 0x01, 0xa9, 0x07, 0xe2, 0x1e, 0x8c,
-  0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82,
-  0x81, 0x18, 0xb4, 0x7b, 0x90, 0xeb, 0x81, 0x10, 0x5c, 0x60, 0xdc, 0x2c,
-  0x81, 0xaa, 0x0c, 0xb4, 0x18, 0xae, 0x51, 0x27, 0xfa, 0x3c, 0xd0, 0x09,
-  0x4c, 0xdc, 0x89, 0x90, 0x2a, 0xfa, 0x3c, 0xe0, 0x89, 0x99, 0x60, 0xf0,
-  0xe7, 0x01, 0x7c, 0x66, 0x19, 0x56, 0xa5, 0x55, 0x48, 0x30, 0x18, 0x8e,
-  0x40, 0xc1, 0xe0, 0xcf, 0x83, 0xe1, 0xbb, 0x14, 0x0c, 0x86, 0x19, 0x6e,
-  0x08, 0xea, 0x3c, 0x20, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0x28, 0x46, 0x3d,
-  0x18, 0xbe, 0x0a, 0x04, 0xbd, 0x63, 0x98, 0xe1, 0x86, 0x00, 0xcf, 0x03,
-  0x32, 0xa8, 0x60, 0xd0, 0x59, 0x06, 0x56, 0x09, 0x97, 0xe0, 0x78, 0x3c,
-  0x18, 0xe6, 0xe2, 0x3a, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0xb2, 0x7f, 0x0f, 0xd4, 0x3d, 0x30, 0xf5, 0x00, 0xdf, 0x83, 0xd1, 0x84,
-  0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86,
-  0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xcb, 0xe4, 0x83,
-  0x78, 0x0f, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xeb,
-  0xe4, 0x03, 0x79, 0x0f, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0x0b, 0xe5, 0x83, 0x79, 0x0f, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00,
-  0x10, 0x04, 0x83, 0xcf, 0xe4, 0x03, 0x79, 0x0f, 0x60, 0x3d, 0x08, 0xfe,
-  0x3d, 0xf8, 0xf5, 0xa0, 0xdf, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10,
-  0x82, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x10, 0x03, 0x94, 0x0f, 0xe8,
-  0x3d, 0x10, 0x82, 0x0b, 0x8c, 0x9b, 0x25, 0x08, 0x97, 0xe1, 0x06, 0x1f,
-  0x0c, 0x4e, 0x3e, 0x00, 0x83, 0x59, 0x06, 0x57, 0x79, 0x95, 0xa0, 0x5a,
-  0x3d, 0xb0, 0xf7, 0x00, 0x2e, 0x30, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10,
-  0x0c, 0x16, 0x9a, 0x0f, 0xee, 0x3d, 0x20, 0xc3, 0x60, 0xd7, 0x83, 0x11,
-  0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0x6a, 0x3e, 0xb8, 0xf7, 0x20, 0x10,
-  0x2e, 0x18, 0xa6, 0x60, 0x3d, 0xd8, 0xf7, 0x00, 0x2e, 0x30, 0x6a, 0xc4,
-  0xe0, 0x00, 0x40, 0x10, 0x0c, 0x96, 0x9c, 0x0f, 0xf8, 0x3d, 0x10, 0x03,
-  0x70, 0x0f, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xd1, 0xf9, 0x80,
-  0xdf, 0x83, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x8c, 0xba, 0xc3, 0xa8, 0xcb,
-  0xf3, 0x60, 0x98, 0x73, 0xed, 0x60, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98,
-  0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xc8, 0x7c, 0x3e, 0x48, 0xf9, 0xa0,
-  0xdc, 0x83, 0x9b, 0x0f, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46,
-  0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0xac, 0xb2, 0x0f, 0x60, 0x3e, 0x48, 0x88, 0x60, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0x2c, 0xb3, 0x0f, 0x62, 0x3e, 0x48, 0x88,
-  0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xac, 0xb3, 0x0f, 0x64, 0x3e,
-  0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0xbe, 0xb2, 0x0f,
-  0x62, 0x3e, 0x78, 0xf7, 0x20, 0xf0, 0xf9, 0xc0, 0xdf, 0x03, 0x9e, 0x0f,
-  0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x40, 0x0c, 0xce, 0x3e, 0x98, 0xf9, 0x40, 0x08, 0x2e, 0x30, 0x6e,
-  0x96, 0x20, 0x5c, 0x86, 0x1b, 0xfa, 0x30, 0x28, 0xfb, 0x00, 0x0c, 0x66,
-  0x19, 0x60, 0x25, 0x5c, 0x02, 0x1b, 0xf7, 0xa0, 0xdc, 0x83, 0xf8, 0x0c,
-  0x47, 0x88, 0x62, 0x60, 0xee, 0x01, 0xf1, 0xcd, 0x32, 0xc4, 0x0a, 0xad,
-  0x04, 0x76, 0xee, 0xc1, 0x28, 0x06, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05,
-  0xc3, 0x5c, 0x60, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4,
-  0xdb, 0x07, 0x3a, 0xdc, 0x10, 0xb4, 0x7d, 0x00, 0x06, 0xb3, 0x0c, 0xb2,
-  0x32, 0x2b, 0x81, 0x0d, 0xef, 0x1e, 0xc0, 0x67, 0x96, 0x00, 0x57, 0xcc,
-  0xdd, 0x03, 0x22, 0x3e, 0xb3, 0x04, 0xb8, 0x32, 0x1c, 0xd1, 0x8a, 0xc1,
-  0xbb, 0x07, 0xc2, 0x37, 0xcb, 0x50, 0x2b, 0xb8, 0x12, 0x98, 0x2b, 0x06,
-  0xf0, 0x1e, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51,
-  0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x7a, 0x1f, 0xe8, 0x70,
-  0x43, 0x80, 0xf7, 0x01, 0x18, 0xcc, 0x32, 0xd8, 0xca, 0xad, 0x04, 0x86,
-  0xef, 0xc1, 0x10, 0x9f, 0x59, 0x02, 0x5c, 0x31, 0x62, 0xdf, 0x03, 0xf8,
-  0xcc, 0x12, 0xe0, 0xca, 0x40, 0x8b, 0xa1, 0xc9, 0x0a, 0x36, 0x2b, 0x84,
-  0xad, 0x08, 0xb7, 0x02, 0x97, 0x02, 0xad, 0x5c, 0x30, 0x8c, 0xe9, 0x7b,
-  0xe0, 0xef, 0x41, 0x7c, 0x86, 0x23, 0x68, 0xe1, 0xdf, 0x03, 0xe2, 0x9b,
-  0x65, 0xc8, 0x15, 0x5e, 0x09, 0x0c, 0xe4, 0x83, 0x5a, 0x88, 0x8f, 0x05,
-  0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x30, 0xe4, 0x63, 0x45,
-  0x10, 0x9f, 0x22, 0x50, 0x3f, 0xd0, 0xe1, 0x86, 0xc0, 0xf4, 0x03, 0x30,
-  0x98, 0x65, 0xd0, 0x95, 0x5d, 0x09, 0x6c, 0x40, 0xf9, 0x00, 0x3e, 0xb3,
-  0x04, 0xe0, 0x62, 0x25, 0x1f, 0x10, 0xf1, 0x99, 0x25, 0x00, 0x97, 0xe1,
-  0x88, 0x5f, 0x30, 0xf9, 0x40, 0xf8, 0x66, 0x19, 0x7a, 0x05, 0x5c, 0x02,
-  0x03, 0x87, 0x93, 0x0f, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9,
-  0xc0, 0x28, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0xd9, 0x0f,
-  0x74, 0xb8, 0x21, 0x88, 0xfd, 0x00, 0x0c, 0x66, 0x19, 0x7c, 0xe5, 0x57,
-  0x02, 0x7b, 0xf9, 0x60, 0x88, 0xcf, 0x2c, 0x01, 0xb8, 0x18, 0x41, 0xf3,
-  0x01, 0x7c, 0x66, 0x09, 0xc0, 0x65, 0xa0, 0xc5, 0xd0, 0x74, 0x05, 0xdb,
-  0x15, 0xc2, 0x57, 0x84, 0x5f, 0xa1, 0x0d, 0x5e, 0xb9, 0x60, 0x98, 0x0b,
-  0x8c, 0xba, 0xcd, 0xa8, 0x03, 0xf9, 0x60, 0x98, 0xab, 0xf1, 0x60, 0x98,
-  0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xc8,
-  0xca, 0x3f, 0x80, 0xfd, 0x80, 0xed, 0x03, 0xdf, 0x0f, 0x46, 0x13, 0x02,
-  0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a,
-  0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x2c, 0xf6, 0x0f, 0x6e,
-  0x3f, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xac, 0xf6,
-  0x0f, 0x70, 0x3f, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
-  0x2c, 0xf7, 0x0f, 0x72, 0x3f, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40,
-  0x10, 0x0c, 0x3e, 0xf6, 0x0f, 0x70, 0x3f, 0xb0, 0xfb, 0x20, 0x28, 0xff,
-  0xa0, 0xf4, 0x83, 0xf1, 0x0f, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x40, 0x0c, 0xdc, 0x3f, 0xd0, 0xfd,
-  0x40, 0x08, 0x2e, 0x30, 0x6e, 0x96, 0x20, 0x5c, 0x06, 0x5a, 0x0c, 0xd7,
-  0x60, 0x15, 0x7e, 0x1f, 0x56, 0x05, 0x26, 0x5c, 0x45, 0x00, 0x17, 0x7e,
-  0x1f, 0x5e, 0x65, 0x96, 0x41, 0x5c, 0xc8, 0x85, 0x24, 0x83, 0xe1, 0x88,
-  0x94, 0x0c, 0xfe, 0x3e, 0x18, 0xbe, 0x53, 0xc9, 0x60, 0x98, 0xe1, 0x86,
-  0xa0, 0xee, 0x03, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x92, 0x18, 0xfd,
-  0x60, 0xf8, 0x2a, 0x10, 0xf4, 0x4e, 0x62, 0x98, 0xe1, 0x86, 0x00, 0xef,
-  0x03, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x86, 0x71, 0xc1, 0x97, 0xe0, 0x78,
-  0x3e, 0x18, 0xe6, 0xe2, 0x3c, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10,
-  0x0c, 0xb2, 0xff, 0x0f, 0xd4, 0x3f, 0x30, 0xfd, 0x00, 0xff, 0x83, 0xd1,
-  0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20,
-  0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xcb, 0x04,
-  0x85, 0xf8, 0x0f, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03,
-  0xeb, 0x04, 0x05, 0xf9, 0x0f, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x03, 0x0b, 0x05, 0x85, 0xf9, 0x0f, 0x24, 0x22, 0x18, 0x31, 0x50,
-  0x00, 0x10, 0x04, 0x83, 0xcf, 0x04, 0x05, 0xf9, 0x0f, 0x60, 0x3f, 0x08,
-  0xfe, 0x3f, 0xf8, 0xfd, 0xa0, 0xff, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d,
-  0x10, 0x82, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x10, 0x03, 0x14, 0x14,
-  0xe8, 0x3f, 0x10, 0x82, 0x0b, 0x8c, 0x9b, 0x25, 0xc0, 0x97, 0xe1, 0x06,
-  0x9f, 0x0c, 0x4e, 0x50, 0x00, 0x83, 0x59, 0x86, 0x72, 0x31, 0x97, 0xa0,
-  0x5a, 0x3f, 0xb0, 0xff, 0x00, 0x2e, 0x30, 0x6a, 0xc4, 0xe0, 0x00, 0x40,
-  0x10, 0x0c, 0x16, 0x1a, 0x14, 0xee, 0x3f, 0x28, 0xcb, 0x60, 0xf7, 0x83,
-  0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0x6a, 0x50, 0xb8, 0xff, 0x20,
-  0x10, 0x2e, 0x18, 0xa6, 0x60, 0x3f, 0xd8, 0xff, 0x00, 0x2e, 0x30, 0x6a,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x96, 0x1c, 0x14, 0xf8, 0x3f, 0x10,
-  0x0b, 0xf0, 0x0f, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xd1, 0x41,
-  0x81, 0xff, 0x83, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x8c, 0xba, 0xc3, 0xa8,
-  0xcb, 0xfb, 0x60, 0x98, 0x73, 0xf5, 0x60, 0x98, 0x23, 0x86, 0x39, 0x62,
-  0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xc8, 0x7c, 0x50, 0x48, 0x41,
-  0xa1, 0xfc, 0x83, 0x1b, 0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
-  0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0xac, 0x32, 0x14, 0x60, 0x50, 0x48, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x2c, 0x33, 0x14, 0x62, 0x50, 0x48,
-  0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xac, 0x33, 0x14, 0x64,
-  0x50, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0xbe, 0x32,
-  0x14, 0x62, 0x50, 0x78, 0xff, 0x20, 0xf0, 0x41, 0xc1, 0xff, 0x03, 0x1e,
-  0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x0c, 0x0e, 0x00,
-  0x04, 0xc1, 0x40, 0x0c, 0xce, 0x50, 0x98, 0x41, 0x41, 0x08, 0x2e, 0x30,
-  0x6e, 0x96, 0x00, 0x5f, 0x86, 0x1b, 0xfa, 0x32, 0x28, 0x43, 0x01, 0x0c,
-  0x66, 0x19, 0xce, 0x05, 0x5f, 0x02, 0x1b, 0xff, 0xa0, 0xfc, 0x83, 0xf8,
-  0x0c, 0x47, 0x8c, 0x66, 0x60, 0xfe, 0x01, 0xf1, 0xcd, 0x32, 0xa0, 0xcb,
-  0xba, 0x04, 0x76, 0xfe, 0x01, 0x69, 0x06, 0xf1, 0xb1, 0x60, 0xa0, 0xcf,
-  0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53,
-  0xc4, 0x1b, 0x0a, 0x3a, 0xdc, 0x10, 0xb4, 0xa1, 0x00, 0x06, 0xb3, 0x0c,
-  0xe9, 0xa2, 0x2e, 0x81, 0x0d, 0xef, 0x1f, 0xc0, 0x67, 0x96, 0xe0, 0x5d,
-  0xcc, 0xfd, 0x03, 0x22, 0x3e, 0xb3, 0x04, 0xef, 0x32, 0x1c, 0xe1, 0x9a,
-  0xc1, 0xfb, 0x07, 0xc2, 0x37, 0xcb, 0xc0, 0x2e, 0xef, 0x12, 0xd8, 0x6b,
-  0x06, 0xf0, 0x1f, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0x81,
-  0x51, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x7a, 0x28, 0xe8,
-  0x70, 0x43, 0x80, 0x87, 0x02, 0x18, 0xcc, 0x32, 0xb4, 0x8b, 0xbb, 0x04,
-  0x86, 0xff, 0xc1, 0x10, 0x9f, 0x59, 0x82, 0x77, 0x31, 0x62, 0xff, 0x03,
-  0xf8, 0xcc, 0x12, 0xbc, 0xcb, 0x40, 0x8b, 0xa1, 0xa5, 0x0b, 0xa6, 0x2e,
-  0x44, 0xbb, 0x08, 0xee, 0x22, 0xb7, 0xc2, 0xba, 0x5c, 0x30, 0x8c, 0xe9,
-  0x7f, 0xe0, 0xff, 0x41, 0x7c, 0x86, 0x23, 0x68, 0xe3, 0xff, 0x03, 0xe2,
-  0x9b, 0x65, 0x80, 0x97, 0x79, 0x09, 0x0c, 0x04, 0x85, 0xda, 0x88, 0x8f,
-  0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x30, 0xe4, 0x63,
-  0x45, 0x10, 0x9f, 0x22, 0x50, 0x51, 0xd0, 0xe1, 0x86, 0xc0, 0x14, 0x05,
-  0x30, 0x98, 0x65, 0x88, 0x17, 0x79, 0x09, 0x6c, 0x40, 0x41, 0x01, 0x3e,
-  0xb3, 0x04, 0xf7, 0x62, 0x25, 0x28, 0x10, 0xf1, 0x99, 0x25, 0xb8, 0x97,
-  0xe1, 0x88, 0xdf, 0x30, 0x41, 0x41, 0xf8, 0x66, 0x19, 0xe8, 0xe5, 0x5e,
-  0x02, 0x03, 0x8f, 0x13, 0x14, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86,
-  0xb9, 0xc0, 0x28, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x59,
-  0x14, 0x74, 0xb8, 0x21, 0x88, 0x45, 0x01, 0x0c, 0x66, 0x19, 0xea, 0xc5,
-  0x5e, 0x02, 0x7b, 0x41, 0x61, 0x88, 0xcf, 0x2c, 0xc1, 0xbd, 0x18, 0x41,
-  0x83, 0x02, 0x7c, 0x66, 0x09, 0xee, 0x65, 0xa0, 0xc5, 0xd0, 0xe2, 0x05,
-  0x93, 0x17, 0xa2, 0x5e, 0x04, 0x7b, 0xa1, 0x9d, 0x79, 0xb9, 0x60, 0x98,
-  0x0b, 0x8c, 0xba, 0xcd, 0xa8, 0x03, 0x41, 0x61, 0x98, 0xab, 0xf9, 0x60,
-  0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30,
-  0xc8, 0xca, 0x51, 0x80, 0x45, 0x81, 0x0d, 0x05, 0x5f, 0x14, 0x46, 0x13,
-  0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18,
-  0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x2c, 0x76, 0x14,
-  0x6e, 0x51, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xac,
-  0x76, 0x14, 0x70, 0x51, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10,
-  0x0c, 0x2c, 0x77, 0x14, 0x72, 0x51, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01,
-  0x40, 0x10, 0x0c, 0x3e, 0x76, 0x14, 0x70, 0x51, 0xb0, 0x43, 0x21, 0x28,
-  0x47, 0xa1, 0x14, 0x85, 0x71, 0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41,
-  0x08, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x40, 0x0c, 0xdc, 0x51, 0xd0,
-  0x45, 0x41, 0x08, 0x2e, 0x30, 0x6e, 0x96, 0x00, 0x5f, 0x06, 0x5a, 0x0c,
-  0xd7, 0x18, 0x17, 0x7e, 0x24, 0xc4, 0x05, 0x26, 0xca, 0x45, 0xb8, 0x17,
-  0x7e, 0x24, 0xcc, 0x65, 0x96, 0x21, 0x5f, 0xf6, 0x85, 0x44, 0x83, 0xe1,
-  0x88, 0x11, 0x0c, 0xfe, 0x50, 0x18, 0xbe, 0x23, 0xc1, 0x60, 0x98, 0xe1,
-  0x86, 0xa0, 0x0e, 0x05, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x12, 0x19,
-  0x45, 0x61, 0xf8, 0x2a, 0x10, 0xf4, 0x4e, 0x64, 0x98, 0xe1, 0x86, 0x00,
-  0x0f, 0x05, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x06, 0x7d, 0x79, 0x99, 0xe0,
-  0x78, 0x50, 0x18, 0xe6, 0xe2, 0x3e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40,
-  0x10, 0x0c, 0xb2, 0x7f, 0x14, 0xd4, 0x51, 0x30, 0x45, 0x01, 0x1f, 0x85,
-  0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d,
-  0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xcb,
-  0x24, 0x85, 0x78, 0x14, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0xeb, 0x24, 0x05, 0x79, 0x14, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x03, 0x0b, 0x25, 0x85, 0x79, 0x14, 0x24, 0x22, 0x18, 0x31,
-  0x50, 0x00, 0x10, 0x04, 0x83, 0xcf, 0x24, 0x05, 0x79, 0x14, 0x60, 0x51,
-  0x08, 0xfe, 0x51, 0xf8, 0x45, 0xa1, 0x1f, 0x85, 0xd1, 0x84, 0x00, 0x18,
-  0x4d, 0x10, 0x82, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x10, 0x03, 0x94,
-  0x14, 0xe8, 0x51, 0x10, 0x82, 0x0b, 0x8c, 0x9b, 0x25, 0x78, 0x99, 0xe1,
-  0x06, 0x1f, 0x0d, 0x4e, 0x52, 0x00, 0x83, 0x59, 0x06, 0x7e, 0xe9, 0x97,
-  0xa0, 0x5a, 0x51, 0xb0, 0x47, 0x01, 0x2e, 0x30, 0x6a, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0x16, 0x9a, 0x14, 0xee, 0x51, 0xf8, 0xc1, 0x60, 0x17,
-  0x85, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0x6a, 0x52, 0xb8, 0x47,
-  0x21, 0x10, 0x2e, 0x18, 0xa6, 0x60, 0x51, 0xd8, 0x47, 0x01, 0x2e, 0x30,
-  0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x96, 0x9c, 0x14, 0xf8, 0x51,
-  0x10, 0x13, 0x70, 0x14, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xd1,
-  0x49, 0x81, 0x1f, 0x85, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x8c, 0xba, 0xc3,
-  0xa8, 0xcb, 0x43, 0x61, 0x98, 0x73, 0xfd, 0x60, 0x98, 0x23, 0x86, 0x39,
-  0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xc8, 0x7c, 0x52, 0x48,
-  0x49, 0xa1, 0x1c, 0x85, 0x9b, 0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41,
-  0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0xac, 0xb2, 0x14, 0x60, 0x52, 0x48, 0x88,
-  0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x2c, 0xb3, 0x14, 0x62, 0x52,
-  0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xac, 0xb3, 0x14,
-  0x64, 0x52, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0xbe,
-  0xb2, 0x14, 0x62, 0x52, 0x78, 0x47, 0x21, 0xf0, 0x49, 0xc1, 0x1f, 0x05,
-  0x9e, 0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0x40, 0x0c, 0xce, 0x52, 0x98, 0x49, 0x41, 0x08, 0x2e,
-  0x30, 0x6e, 0x96, 0xe0, 0x65, 0x86, 0x1b, 0xfa, 0x34, 0x28, 0x4b, 0x01,
-  0x0c, 0x66, 0x19, 0xfc, 0xe5, 0x65, 0x02, 0x1b, 0x47, 0xa1, 0x1c, 0x85,
-  0xf8, 0x0c, 0x47, 0xf4, 0x61, 0x60, 0x8e, 0x02, 0xf1, 0xcd, 0x32, 0xfc,
-  0x8b, 0xc8, 0x04, 0x76, 0x8e, 0x82, 0x1f, 0x06, 0xf1, 0xb1, 0x60, 0xa0,
-  0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2,
-  0x53, 0xc4, 0x5b, 0x0a, 0x3a, 0xdc, 0x10, 0xb4, 0xa5, 0x00, 0x06, 0xb3,
-  0x0c, 0x20, 0x13, 0x32, 0x81, 0x0d, 0xef, 0x28, 0xc0, 0x67, 0x96, 0xc0,
-  0x64, 0xcc, 0x1d, 0x05, 0x22, 0x3e, 0xb3, 0x04, 0x26, 0x33, 0x1c, 0x81,
-  0x8a, 0xc1, 0x3b, 0x0a, 0xc2, 0x37, 0xcb, 0x30, 0x32, 0x26, 0x13, 0x58,
-  0x2a, 0x06, 0xf0, 0x28, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73,
-  0x81, 0x51, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x7a, 0x29,
-  0xe8, 0x70, 0x43, 0x80, 0x97, 0x02, 0x18, 0xcc, 0x32, 0x90, 0x4c, 0xc9,
-  0x04, 0x86, 0x8f, 0xc2, 0x10, 0x9f, 0x59, 0x02, 0x93, 0x31, 0x62, 0x1f,
-  0x05, 0xf8, 0xcc, 0x12, 0x98, 0xcc, 0x40, 0x8b, 0xa1, 0x81, 0x0c, 0x16,
-  0x32, 0x04, 0xc9, 0x08, 0x25, 0x63, 0x96, 0x82, 0xc8, 0x5c, 0x30, 0x8c,
-  0xe9, 0xa3, 0xe0, 0x8f, 0x42, 0x7c, 0x86, 0x23, 0x68, 0xe5, 0x1f, 0x05,
-  0xe2, 0x9b, 0x65, 0x38, 0x19, 0x95, 0x09, 0x0c, 0x24, 0x85, 0x5a, 0x89,
-  0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x30, 0xe4,
-  0x63, 0x45, 0x10, 0x9f, 0x22, 0x50, 0x53, 0xd0, 0xe1, 0x86, 0xc0, 0x34,
-  0x05, 0x30, 0x98, 0x65, 0x40, 0x99, 0x94, 0x09, 0x6c, 0x40, 0x49, 0x01,
-  0x3e, 0xb3, 0x04, 0x2e, 0x63, 0x25, 0x29, 0x10, 0xf1, 0x99, 0x25, 0x70,
-  0x99, 0xe1, 0x88, 0x5f, 0x31, 0x49, 0x41, 0xf8, 0x66, 0x19, 0x56, 0xc6,
-  0x65, 0x02, 0x03, 0x97, 0x93, 0x14, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b,
-  0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88,
-  0xd9, 0x14, 0x74, 0xb8, 0x21, 0x88, 0x4d, 0x01, 0x0c, 0x66, 0x19, 0x58,
-  0xa6, 0x65, 0x02, 0x7b, 0x49, 0x61, 0x88, 0xcf, 0x2c, 0x81, 0xcb, 0x18,
-  0x41, 0x93, 0x02, 0x7c, 0x66, 0x09, 0x5c, 0x66, 0xa0, 0xc5, 0xd0, 0x50,
-  0x06, 0x4b, 0x19, 0x82, 0x65, 0x84, 0x96, 0xa1, 0x2d, 0x95, 0xb9, 0x60,
-  0x98, 0x0b, 0x8c, 0xba, 0xcd, 0xa8, 0x03, 0x49, 0x61, 0x98, 0xab, 0x41,
-  0x61, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41,
-  0x30, 0xc8, 0xca, 0x53, 0x80, 0x4d, 0x81, 0x2d, 0x05, 0xdf, 0x14, 0x46,
-  0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81,
-  0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x2c, 0xf6,
-  0x14, 0x6e, 0x53, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
-  0xac, 0xf6, 0x14, 0x70, 0x53, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40,
-  0x10, 0x0c, 0x2c, 0xf7, 0x14, 0x72, 0x53, 0x48, 0x88, 0x60, 0xc4, 0x40,
-  0x01, 0x40, 0x10, 0x0c, 0x3e, 0xf6, 0x14, 0x70, 0x53, 0xb0, 0x4b, 0x21,
-  0x28, 0x4f, 0xa1, 0x34, 0x85, 0xf1, 0x14, 0x46, 0x13, 0x02, 0x60, 0x34,
-  0x41, 0x08, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x40, 0x0c, 0xdc, 0x53,
-  0xd0, 0x4d, 0x41, 0x08, 0x2e, 0x30, 0x6e, 0x96, 0xe0, 0x65, 0x06, 0x5a,
-  0x0c, 0xd7, 0xd0, 0x17, 0x7e, 0x25, 0xf2, 0x05, 0x26, 0xf8, 0x45, 0x70,
-  0x19, 0x7e, 0x25, 0xfa, 0x65, 0x96, 0x01, 0x66, 0x64, 0x86, 0x64, 0x83,
-  0xe1, 0x08, 0x11, 0x0c, 0xfe, 0x52, 0x18, 0xbe, 0x1b, 0xc1, 0x60, 0x98,
-  0xe1, 0x86, 0xa0, 0x2e, 0x05, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x92,
-  0x19, 0x4d, 0x61, 0xf8, 0x2a, 0x10, 0xf4, 0x4e, 0x66, 0x98, 0xe1, 0x86,
-  0x00, 0x2f, 0x05, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x86, 0x98, 0x31, 0x9b,
-  0xe0, 0x78, 0x52, 0x18, 0xe6, 0xe2, 0x50, 0x18, 0x66, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0xb2, 0xff, 0x14, 0xd4, 0x53, 0x30, 0x4d, 0x01, 0x3f,
-  0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18,
-  0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03,
-  0xcb, 0x44, 0x85, 0xf8, 0x14, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x03, 0xeb, 0x44, 0x05, 0xf9, 0x14, 0x18, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x03, 0x0b, 0x45, 0x85, 0xf9, 0x14, 0x24, 0x22, 0x18,
-  0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0xcf, 0x44, 0x05, 0xf9, 0x14, 0x60,
-  0x53, 0x08, 0xfe, 0x53, 0xf8, 0x4d, 0xa1, 0x3f, 0x85, 0xd1, 0x84, 0x00,
-  0x18, 0x4d, 0x10, 0x82, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x10, 0x03,
-  0x14, 0x15, 0xe8, 0x53, 0x10, 0x82, 0x0b, 0x8c, 0x9b, 0x25, 0x30, 0x9b,
-  0xe1, 0x06, 0x9f, 0x0d, 0x4e, 0x54, 0x00, 0x83, 0x59, 0x86, 0x99, 0xa1,
-  0x99, 0xa0, 0x5a, 0x53, 0xb0, 0x4f, 0x01, 0x2e, 0x30, 0x6a, 0xc4, 0xe0,
-  0x00, 0x40, 0x10, 0x0c, 0x16, 0x1a, 0x15, 0xee, 0x53, 0xf0, 0xc1, 0x60,
-  0x37, 0x85, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0x6a, 0x54, 0xb8,
-  0x4f, 0x21, 0x10, 0x2e, 0x18, 0xa6, 0x60, 0x53, 0xd8, 0x4f, 0x01, 0x2e,
-  0x30, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x96, 0x1c, 0x15, 0xf8,
-  0x53, 0x10, 0x1b, 0xf0, 0x14, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60,
-  0xd1, 0x51, 0x81, 0x3f, 0x85, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x8c, 0xba,
-  0xc3, 0xa8, 0xcb, 0x4b, 0x61, 0x98, 0x73, 0x45, 0x61, 0x98, 0x23, 0x86,
-  0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xc8, 0x7c, 0x54,
-  0x48, 0x51, 0xa1, 0x3c, 0x85, 0x1b, 0x15, 0x46, 0x13, 0x02, 0x60, 0x34,
-  0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xac, 0x32, 0x15, 0x60, 0x54, 0x48,
-  0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x2c, 0x33, 0x15, 0x62,
-  0x54, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xac, 0x33,
-  0x15, 0x64, 0x54, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c,
-  0xbe, 0x32, 0x15, 0x62, 0x54, 0x78, 0x4f, 0x21, 0xf0, 0x51, 0xc1, 0x3f,
-  0x05, 0x1e, 0x15, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x0c,
-  0x0e, 0x00, 0x04, 0xc1, 0x40, 0x0c, 0xce, 0x54, 0x98, 0x51, 0x41, 0x08,
-  0x2e, 0x30, 0x6e, 0x96, 0xc0, 0x6c, 0x86, 0x1b, 0xfa, 0x36, 0x28, 0x53,
-  0x01, 0x0c, 0x66, 0x19, 0x6a, 0xc6, 0x6c, 0x02, 0x1b, 0x4f, 0xa1, 0x3c,
-  0x85, 0xf8, 0x0c, 0x47, 0xf0, 0x61, 0x60, 0x9e, 0x02, 0xf1, 0xcd, 0x32,
-  0xd8, 0x4c, 0xce, 0x04, 0x76, 0x9e, 0x42, 0x1f, 0x06, 0xf1, 0xb1, 0x60,
-  0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08,
-  0xe2, 0x53, 0xc4, 0x9b, 0x0a, 0x3a, 0xdc, 0x10, 0xb4, 0xa9, 0x00, 0x06,
-  0xb3, 0x0c, 0x37, 0x83, 0x33, 0x81, 0x0d, 0xef, 0x29, 0xc0, 0x67, 0x96,
-  0xa0, 0x67, 0xcc, 0x3d, 0x05, 0x22, 0x3e, 0xb3, 0x04, 0x3d, 0x33, 0x1c,
-  0x71, 0x8a, 0xc1, 0x7b, 0x0a, 0xc2, 0x37, 0xcb, 0xa0, 0x33, 0x3d, 0x13,
-  0x18, 0x2a, 0x06, 0xf0, 0x29, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c,
-  0x73, 0x81, 0x51, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x7a,
-  0x2a, 0xe8, 0x70, 0x43, 0x80, 0xa7, 0x02, 0x18, 0xcc, 0x32, 0xec, 0x0c,
-  0xcf, 0x04, 0x86, 0x9f, 0xc2, 0x10, 0x9f, 0x59, 0x82, 0x9e, 0x31, 0x62,
-  0x3f, 0x05, 0xf8, 0xcc, 0x12, 0xf4, 0xcc, 0x40, 0x8b, 0xa1, 0xdd, 0x0c,
-  0x86, 0x33, 0xc4, 0xce, 0x08, 0x3c, 0x43, 0x96, 0x42, 0xce, 0x5c, 0x30,
-  0x8c, 0xe9, 0xa7, 0xe0, 0x9f, 0x42, 0x7c, 0x86, 0x23, 0x68, 0xe7, 0x3f,
-  0x05, 0xe2, 0x9b, 0x65, 0xf0, 0x99, 0xb0, 0x09, 0x0c, 0x44, 0x85, 0xda,
-  0x89, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x30,
-  0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x50, 0x55, 0xd0, 0xe1, 0x86, 0xc0,
-  0x54, 0x05, 0x30, 0x98, 0x65, 0xf8, 0x19, 0xb0, 0x09, 0x6c, 0x40, 0x51,
-  0x01, 0x3e, 0xb3, 0x04, 0x65, 0x63, 0x25, 0x2a, 0x10, 0xf1, 0x99, 0x25,
-  0x28, 0x9b, 0xe1, 0x88, 0xdf, 0x31, 0x51, 0x41, 0xf8, 0x66, 0x19, 0xc4,
-  0xa6, 0x6c, 0x02, 0x03, 0x9f, 0x13, 0x15, 0xe2, 0x63, 0x81, 0x43, 0x9f,
-  0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7,
-  0x88, 0x59, 0x15, 0x74, 0xb8, 0x21, 0x88, 0x55, 0x01, 0x0c, 0x66, 0x19,
-  0xc6, 0x86, 0x6c, 0x02, 0x7b, 0x51, 0x61, 0x88, 0xcf, 0x2c, 0x41, 0xd9,
-  0x18, 0x41, 0xa3, 0x02, 0x7c, 0x66, 0x09, 0xca, 0x66, 0xa0, 0xc5, 0xd0,
-  0x7e, 0x06, 0x03, 0x1b, 0x62, 0x6c, 0x04, 0xb2, 0xa1, 0xbd, 0xb0, 0xb9,
-  0x60, 0x98, 0x0b, 0x8c, 0xba, 0xcd, 0xa8, 0x03, 0x51, 0x61, 0x98, 0xab,
-  0x49, 0x61, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0xc8, 0xca, 0x55, 0x80, 0x55, 0x81, 0x4d, 0x05, 0x5f, 0x15,
-  0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34,
-  0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x2c,
-  0x76, 0x15, 0x6e, 0x55, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10,
-  0x0c, 0xac, 0x76, 0x15, 0x70, 0x55, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0x2c, 0x77, 0x15, 0x72, 0x55, 0x48, 0x88, 0x60, 0xc4,
-  0x40, 0x01, 0x40, 0x10, 0x0c, 0x3e, 0x76, 0x15, 0x70, 0x55, 0xb0, 0x53,
-  0x21, 0x28, 0x57, 0xa1, 0x54, 0x85, 0x71, 0x15, 0x46, 0x13, 0x02, 0x60,
-  0x34, 0x41, 0x08, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x40, 0x0c, 0xdc,
-  0x55, 0xd0, 0x55, 0x41, 0x08, 0x2e, 0x30, 0x6e, 0x96, 0xc0, 0x6c, 0x06,
-  0x5a, 0x0c, 0xd7, 0x88, 0x19, 0x7e, 0x26, 0x60, 0x06, 0x26, 0x66, 0x46,
-  0x28, 0x1b, 0x7e, 0x26, 0x68, 0xc6, 0x4e, 0x38, 0x10, 0x55, 0x01, 0x3e,
-  0xb3, 0x0c, 0x67, 0x93, 0x36, 0x25, 0x1c, 0x0c, 0x47, 0xa4, 0x70, 0x00,
-  0xaa, 0xc2, 0xf0, 0x9d, 0x0a, 0x07, 0xc3, 0x0c, 0x37, 0x04, 0x76, 0x2a,
-  0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0x14, 0xa4, 0x2a, 0x0c, 0x5f, 0x05,
-  0x82, 0xde, 0x31, 0xcc, 0x70, 0x43, 0x90, 0xa7, 0x02, 0x19, 0x54, 0x30,
-  0xe8, 0x2c, 0x03, 0xda, 0xf4, 0x4d, 0x70, 0x3d, 0x2a, 0x0c, 0x73, 0x72,
-  0x29, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x19, 0xc8, 0x0a,
-  0xeb, 0x2a, 0x9c, 0xaa, 0x90, 0xaf, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x75, 0xb2, 0x82, 0xbc, 0x0a, 0x07,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x85, 0xb2, 0xc2, 0xbc,
-  0x0a, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x95, 0xb2,
-  0x02, 0xbd, 0x0a, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1,
-  0x77, 0xb2, 0xc2, 0xbc, 0x0a, 0xb1, 0x2a, 0x04, 0x20, 0x2b, 0x80, 0xab,
-  0xe0, 0xaf, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x88, 0xc1,
-  0x01, 0x80, 0x20, 0x18, 0x88, 0x41, 0xca, 0x0a, 0xf5, 0x2a, 0x08, 0xc1,
-  0x05, 0xc6, 0xcd, 0x12, 0xf4, 0xcd, 0x70, 0xc3, 0x0f, 0x07, 0x28, 0x2b,
-  0x80, 0xc1, 0x2c, 0x83, 0xda, 0xac, 0x4d, 0x50, 0xae, 0x2a, 0xdc, 0xab,
-  0x00, 0x17, 0x18, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x4b, 0xcd,
-  0x0a, 0xf8, 0x2a, 0x94, 0x71, 0xc0, 0xab, 0xc2, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x2c, 0x36, 0x2b, 0xe0, 0xab, 0x10, 0x08, 0x17, 0x0c, 0x53,
-  0xb1, 0x2a, 0xf0, 0xab, 0x00, 0x17, 0x18, 0x35, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0x8b, 0xce, 0x0a, 0xfd, 0x2a, 0x88, 0x41, 0xb8, 0x0a, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xec, 0xac, 0xd0, 0xaf, 0x42, 0x20,
-  0x5c, 0x30, 0xcc, 0x05, 0x46, 0xdd, 0x61, 0xd4, 0xe9, 0xa9, 0x30, 0xcc,
-  0xbd, 0xa6, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01,
-  0x80, 0x20, 0x18, 0x64, 0x3f, 0x2b, 0xa8, 0xac, 0x60, 0xae, 0x02, 0xce,
-  0x0a, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30,
-  0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x96, 0xd9, 0x0a, 0x31, 0x2b, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0xd6, 0xd9, 0x0a, 0x32, 0x2b, 0x24, 0x44, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0x16, 0xda, 0x0a, 0x33, 0x2b, 0x24, 0x44, 0x30,
-  0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x9f, 0xd9, 0x0a, 0x32, 0x2b, 0xc0,
-  0xab, 0x10, 0xfc, 0xac, 0xf0, 0xaf, 0x42, 0xcf, 0x0a, 0xa3, 0x09, 0x01,
-  0x30, 0x9a, 0x20, 0x04, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x20, 0x06,
-  0x68, 0x2b, 0xd0, 0xac, 0x20, 0x04, 0x17, 0x18, 0x37, 0x4b, 0xd0, 0x37,
-  0xc3, 0x0d, 0x7e, 0x1c, 0x98, 0xad, 0x00, 0x06, 0xb3, 0x0c, 0x6c, 0xd3,
-  0x37, 0x81, 0x91, 0xab, 0x60, 0xae, 0x42, 0x7c, 0x86, 0x23, 0x46, 0x39,
-  0x38, 0x57, 0x81, 0xf8, 0x66, 0x19, 0xda, 0x06, 0x6e, 0x02, 0x43, 0x57,
-  0x81, 0x94, 0x83, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30,
-  0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x02, 0x6e, 0x05, 0x1d,
-  0x6e, 0x08, 0xdc, 0x56, 0x00, 0x83, 0x59, 0x06, 0xb7, 0x79, 0x9b, 0xc0,
-  0x06, 0x78, 0x15, 0xe0, 0x33, 0x4b, 0x40, 0x37, 0xf6, 0xae, 0x02, 0x11,
-  0x9f, 0x59, 0x02, 0xba, 0x19, 0x8e, 0x70, 0xe5, 0x00, 0x5e, 0x05, 0xe1,
-  0x9b, 0x65, 0x88, 0x1b, 0xba, 0x09, 0xec, 0x95, 0x83, 0x78, 0x15, 0xe2,
-  0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x22, 0xf9,
-  0x58, 0x11, 0xc4, 0xa7, 0x88, 0xbd, 0x15, 0x74, 0xb8, 0x21, 0xc8, 0x5b,
-  0x01, 0x0c, 0x66, 0x19, 0xe4, 0x66, 0x6e, 0x02, 0xcb, 0x57, 0x61, 0x88,
-  0xcf, 0x2c, 0x01, 0xdd, 0x18, 0xc1, 0xaf, 0x02, 0x7c, 0x66, 0x09, 0xe8,
-  0x66, 0xa0, 0xc5, 0xd0, 0xdc, 0x06, 0x7b, 0x1b, 0x42, 0x6e, 0x84, 0xb9,
-  0x91, 0xcb, 0x01, 0x6e, 0x2e, 0x18, 0xc6, 0xf6, 0x55, 0xf8, 0x57, 0x21,
-  0x3e, 0xc3, 0x11, 0xb4, 0x00, 0xb2, 0x02, 0xf1, 0xcd, 0x32, 0xd4, 0x0d,
-  0xde, 0x04, 0x16, 0xb2, 0x42, 0x2d, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17,
-  0x0c, 0x73, 0x81, 0x51, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11,
-  0xa9, 0x2b, 0xe8, 0x70, 0x43, 0x70, 0xba, 0x02, 0x18, 0xcc, 0x32, 0xd8,
-  0xcd, 0xdd, 0x04, 0x36, 0xa4, 0xac, 0x00, 0x9f, 0x59, 0x02, 0xbe, 0x31,
-  0x93, 0x15, 0x88, 0xf8, 0xcc, 0x12, 0xf0, 0xcd, 0x70, 0xc4, 0x2f, 0x9c,
-  0xac, 0x20, 0x7c, 0xb3, 0x0c, 0x79, 0xc3, 0x37, 0x81, 0x81, 0x03, 0xca,
-  0x0a, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05,
-  0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0xed, 0x0a, 0x3a, 0xdc, 0x10,
-  0xc8, 0xae, 0x00, 0x06, 0xb3, 0x0c, 0x7a, 0xb3, 0x37, 0x81, 0xc1, 0xac,
-  0x30, 0xc4, 0x67, 0x96, 0x80, 0x6f, 0x8c, 0xa8, 0x59, 0x01, 0x3e, 0xb3,
-  0x04, 0x7c, 0x33, 0xd0, 0x62, 0x68, 0x76, 0x83, 0xdd, 0x0d, 0xa1, 0x37,
-  0xc2, 0xde, 0xd0, 0x06, 0xde, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0xdd, 0x66,
-  0xd4, 0x85, 0xac, 0x30, 0xcc, 0xd9, 0xa8, 0x30, 0xcc, 0x11, 0xc3, 0x1c,
-  0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x64, 0xe6, 0x2b, 0xc4,
-  0xae, 0xd0, 0xb6, 0xc2, 0xef, 0x0a, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20,
-  0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x56, 0xfb, 0x0a, 0xb8, 0x2b, 0x24, 0x44,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x96, 0xfb, 0x0a, 0xb9, 0x2b,
-  0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd6, 0xfb, 0x0a,
-  0xba, 0x2b, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x5f,
-  0xfb, 0x0a, 0xb9, 0x2b, 0xdc, 0xad, 0x10, 0x98, 0xaf, 0x60, 0xba, 0x02,
-  0xf9, 0x0a, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0x23, 0x06, 0x07,
-  0x00, 0x82, 0x60, 0x20, 0x06, 0xef, 0x2b, 0xec, 0xae, 0x20, 0x04, 0x17,
-  0x18, 0x37, 0x4b, 0xd0, 0x37, 0x03, 0x2d, 0x86, 0x6b, 0xa0, 0x8d, 0xbf,
-  0x13, 0x67, 0x03, 0x13, 0x6a, 0x23, 0xf0, 0x8d, 0xbf, 0x13, 0x6b, 0x33,
-  0xcb, 0xe0, 0x37, 0xa0, 0x53, 0xd2, 0xc1, 0x70, 0x84, 0x4a, 0x07, 0xa0,
-  0x2b, 0x0c, 0xdf, 0xad, 0x74, 0x30, 0xcc, 0x70, 0x43, 0x60, 0xb7, 0x02,
-  0x19, 0xd4, 0x10, 0xe8, 0x70, 0x44, 0x49, 0x90, 0xae, 0x30, 0x7c, 0x15,
-  0x08, 0x7a, 0x27, 0x31, 0xcc, 0x70, 0x43, 0x90, 0xb7, 0x02, 0x19, 0x54,
-  0x30, 0xe8, 0x2c, 0xc3, 0xdf, 0xd0, 0x4e, 0x70, 0x3d, 0x2b, 0x0c, 0x73,
-  0x72, 0x2a, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x19, 0x08,
-  0x0b, 0xeb, 0x2b, 0x9c, 0xae, 0x90, 0xbf, 0xc2, 0x68, 0x42, 0x00, 0x8c,
-  0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x75, 0xc2, 0x82, 0xfc, 0x0a,
-  0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x85, 0xc2, 0xc2,
-  0xfc, 0x0a, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x95,
-  0xc2, 0x02, 0xfd, 0x0a, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82,
-  0xc1, 0x77, 0xc2, 0xc2, 0xfc, 0x0a, 0xb1, 0x2b, 0x04, 0x20, 0x2c, 0x80,
-  0xaf, 0xe0, 0xbf, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x88,
-  0xc1, 0x01, 0x80, 0x20, 0x18, 0x88, 0x41, 0x0a, 0x0b, 0xf5, 0x2b, 0x08,
-  0xc1, 0x05, 0xc6, 0xcd, 0x12, 0xd0, 0xce, 0x70, 0xc3, 0x4f, 0x07, 0x28,
-  0x2c, 0x80, 0xc1, 0x2c, 0x43, 0xe8, 0x88, 0x4e, 0x50, 0xae, 0x2b, 0xdc,
-  0xaf, 0x00, 0x17, 0x18, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x4b,
-  0x0d, 0x0b, 0xf8, 0x2b, 0x98, 0x75, 0xc0, 0xbb, 0xc2, 0x88, 0xc1, 0x01,
-  0x80, 0x20, 0x18, 0x2c, 0x36, 0x2c, 0xe0, 0xaf, 0x10, 0x08, 0x17, 0x0c,
-  0x53, 0xb1, 0x2b, 0xf0, 0xaf, 0x00, 0x17, 0x18, 0x35, 0x62, 0x70, 0x00,
-  0x20, 0x08, 0x06, 0x8b, 0x0e, 0x0b, 0xfd, 0x2b, 0x88, 0x45, 0xf8, 0x0a,
-  0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xec, 0xb0, 0xd0, 0xbf, 0x42,
-  0x20, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0xdd, 0x61, 0xd4, 0xe9, 0xad, 0x30,
-  0xcc, 0xbd, 0xaa, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1,
-  0x01, 0x80, 0x20, 0x18, 0x64, 0x3f, 0x2c, 0xa8, 0xb0, 0x60, 0xbe, 0x02,
-  0x0e, 0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83,
-  0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x96, 0x19, 0x0b, 0x31, 0x2c, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0xd6, 0x19, 0x0b, 0x32, 0x2c, 0x24, 0x44, 0x30, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x16, 0x1a, 0x0b, 0x33, 0x2c, 0x24, 0x44,
-  0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x9f, 0x19, 0x0b, 0x32, 0x2c,
-  0xc0, 0xaf, 0x10, 0xfc, 0xb0, 0xf0, 0xbf, 0x42, 0x0f, 0x0b, 0xa3, 0x09,
-  0x01, 0x30, 0x9a, 0x20, 0x04, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x20,
-  0x06, 0x68, 0x2c, 0xd0, 0xb0, 0x20, 0x04, 0x17, 0x18, 0x37, 0x4b, 0x40,
-  0x3b, 0xc3, 0x0d, 0x7e, 0x1d, 0x98, 0xb1, 0x00, 0x06, 0xb3, 0x0c, 0xa3,
-  0x43, 0x3b, 0x81, 0x91, 0xaf, 0x60, 0xbe, 0x42, 0x7c, 0x86, 0x23, 0x48,
-  0x3b, 0x38, 0x5f, 0x81, 0xf8, 0x66, 0x19, 0x48, 0xe7, 0x74, 0x02, 0x43,
-  0x5f, 0xa1, 0xb4, 0x83, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e,
-  0x30, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x02, 0x8e, 0x05,
-  0x1d, 0x6e, 0x08, 0xdc, 0x58, 0x00, 0x83, 0x59, 0x86, 0xd2, 0x31, 0x9d,
-  0xc0, 0x06, 0xf8, 0x15, 0xe0, 0x33, 0x4b, 0xb0, 0x3a, 0xf6, 0xbe, 0x02,
-  0x11, 0x9f, 0x59, 0x82, 0xd5, 0x19, 0x8e, 0x78, 0xed, 0x00, 0x7e, 0x05,
-  0xe1, 0x9b, 0x65, 0x40, 0x9d, 0xd5, 0x09, 0x0c, 0xb6, 0x83, 0xf8, 0x15,
-  0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x22,
-  0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x3d, 0x16, 0x74, 0xb8, 0x21, 0xc8,
-  0x63, 0x01, 0x0c, 0x66, 0x19, 0x52, 0x47, 0x75, 0x02, 0xcb, 0x5f, 0x61,
-  0x88, 0xcf, 0x2c, 0xc1, 0xea, 0x18, 0xc1, 0xbf, 0x02, 0x7c, 0x66, 0x09,
-  0x56, 0x67, 0xa0, 0xc5, 0xd0, 0x4a, 0x07, 0x33, 0x1d, 0x22, 0x75, 0x04,
-  0xd5, 0xa1, 0xdb, 0xe1, 0x74, 0x2e, 0x18, 0xc6, 0xf6, 0x57, 0xf8, 0x5f,
-  0x21, 0x3e, 0xc3, 0x11, 0xb4, 0x01, 0xc2, 0x02, 0xf1, 0xcd, 0x32, 0xb0,
-  0xce, 0xeb, 0x04, 0x16, 0xc2, 0x42, 0x6d, 0xc4, 0xc7, 0x82, 0x81, 0x3e,
-  0x17, 0x0c, 0x73, 0x81, 0x51, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f,
-  0x11, 0xa9, 0x2c, 0xe8, 0x70, 0x43, 0x70, 0xca, 0x02, 0x18, 0xcc, 0x32,
-  0xb4, 0x8e, 0xeb, 0x04, 0x36, 0xa4, 0xb0, 0x00, 0x9f, 0x59, 0x82, 0xd9,
-  0x31, 0x13, 0x16, 0x88, 0xf8, 0xcc, 0x12, 0xcc, 0xce, 0x70, 0xc4, 0x6f,
-  0x9c, 0xb0, 0x20, 0x7c, 0xb3, 0x0c, 0xb0, 0x33, 0x3b, 0x81, 0x81, 0x07,
-  0x0a, 0x0b, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94,
-  0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0x2d, 0x0b, 0x3a, 0xdc,
-  0x10, 0xc8, 0xb2, 0x00, 0x06, 0xb3, 0x0c, 0xb1, 0x23, 0x3b, 0x81, 0xc1,
-  0xb0, 0x30, 0xc4, 0x67, 0x96, 0x60, 0x76, 0x8c, 0xa8, 0x61, 0x01, 0x3e,
-  0xb3, 0x04, 0xb3, 0x33, 0xd0, 0x62, 0x68, 0xad, 0x83, 0xb9, 0x0e, 0x11,
-  0x3b, 0x82, 0xec, 0xd0, 0xce, 0xeb, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0xdd,
-  0x66, 0xd4, 0x85, 0xb0, 0x30, 0xcc, 0xd9, 0xac, 0x30, 0xcc, 0x11, 0xc3,
-  0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x64, 0xe6, 0x2c,
-  0xc4, 0xb2, 0xd0, 0xc6, 0xc2, 0x2f, 0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a,
-  0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x56, 0x3b, 0x0b, 0xb8, 0x2c, 0x24,
-  0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x96, 0x3b, 0x0b, 0xb9,
-  0x2c, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd6, 0x3b,
-  0x0b, 0xba, 0x2c, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06,
-  0x5f, 0x3b, 0x0b, 0xb9, 0x2c, 0xdc, 0xb1, 0x10, 0x98, 0xb3, 0x60, 0xca,
-  0x02, 0x39, 0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x20, 0x06, 0xef, 0x2c, 0xec, 0xb2, 0x20, 0x04,
-  0x17, 0x18, 0x37, 0x4b, 0x40, 0x3b, 0x03, 0x2d, 0x86, 0x6b, 0xfc, 0x8d,
-  0x3f, 0x16, 0x7e, 0x03, 0x13, 0xa1, 0x23, 0xcc, 0x8e, 0x3f, 0x16, 0xa2,
-  0x33, 0xcb, 0x50, 0x3b, 0xb7, 0x53, 0xe2, 0xc1, 0x70, 0x04, 0x09, 0x07,
-  0xa0, 0x2c, 0x0c, 0xdf, 0x95, 0x70, 0x30, 0xcc, 0x70, 0x43, 0x60, 0xc7,
-  0x02, 0x19, 0xd4, 0x10, 0xe8, 0x70, 0x44, 0x89, 0x90, 0xb2, 0x30, 0x7c,
-  0x15, 0x08, 0x7a, 0x27, 0x32, 0xcc, 0x70, 0x43, 0x90, 0xc7, 0x02, 0x19,
-  0x54, 0x30, 0xe8, 0x2c, 0x83, 0xed, 0xac, 0x4f, 0x70, 0x3d, 0x2c, 0x0c,
-  0x73, 0x72, 0x2b, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x19,
-  0x48, 0x0b, 0xeb, 0x2c, 0x9c, 0xb2, 0x90, 0xcf, 0xc2, 0x68, 0x42, 0x00,
-  0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11,
-  0x87, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x75, 0xd2, 0x82, 0x3c,
-  0x0b, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x85, 0xd2,
-  0xc2, 0x3c, 0x0b, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81,
-  0x95, 0xd2, 0x02, 0x3d, 0x0b, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08,
-  0x82, 0xc1, 0x77, 0xd2, 0xc2, 0x3c, 0x0b, 0xb1, 0x2c, 0x04, 0x20, 0x2d,
-  0x80, 0xb3, 0xe0, 0xcf, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x88, 0x41, 0x4a, 0x0b, 0xf5, 0x2c,
-  0x08, 0xc1, 0x05, 0xc6, 0xcd, 0x12, 0xac, 0xcf, 0x70, 0xc3, 0x8f, 0x07,
-  0x28, 0x2d, 0x80, 0xc1, 0x2c, 0x03, 0xee, 0xe4, 0x4e, 0x50, 0xae, 0x2c,
-  0xdc, 0xb3, 0x00, 0x17, 0x18, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x4b, 0x4d, 0x0b, 0xf8, 0x2c, 0x80, 0x71, 0xc0, 0xcb, 0xc2, 0x88, 0xc1,
-  0x01, 0x80, 0x20, 0x18, 0x2c, 0x36, 0x2d, 0xe0, 0xb3, 0x10, 0x08, 0x17,
-  0x0c, 0x53, 0xb1, 0x2c, 0xf0, 0xb3, 0x00, 0x17, 0x18, 0x35, 0x62, 0x70,
-  0x00, 0x20, 0x08, 0x06, 0x8b, 0x4e, 0x0b, 0xfd, 0x2c, 0x88, 0x49, 0x38,
-  0x0b, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xec, 0xb4, 0xd0, 0xcf,
-  0x42, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0xdd, 0x61, 0xd4, 0xe9, 0xb1,
-  0x30, 0xcc, 0xbd, 0xae, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88,
-  0xc1, 0x01, 0x80, 0x20, 0x18, 0x64, 0x3f, 0x2d, 0xa8, 0xb4, 0x60, 0xce,
-  0x02, 0x4e, 0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09,
-  0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x96, 0x59, 0x0b, 0x31, 0x2d, 0x24, 0x44, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0xd6, 0x59, 0x0b, 0x32, 0x2d, 0x24, 0x44, 0x30,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x16, 0x5a, 0x0b, 0x33, 0x2d, 0x24,
-  0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x9f, 0x59, 0x0b, 0x32,
-  0x2d, 0xc0, 0xb3, 0x10, 0xfc, 0xb4, 0xf0, 0xcf, 0x42, 0x4f, 0x0b, 0xa3,
-  0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0x20, 0x06, 0x68, 0x2d, 0xd0, 0xb4, 0x20, 0x04, 0x17, 0x18, 0x37, 0x4b,
-  0xb0, 0x3e, 0xc3, 0x0d, 0x7e, 0x1e, 0x98, 0xb5, 0x00, 0x06, 0xb3, 0x0c,
-  0xba, 0xb3, 0x3e, 0x81, 0x91, 0xb3, 0x60, 0xce, 0x42, 0x7c, 0x86, 0x23,
-  0xfc, 0x38, 0x38, 0x67, 0x81, 0xf8, 0x66, 0x19, 0x76, 0xc7, 0x77, 0x02,
-  0x43, 0x67, 0xe1, 0x8f, 0x83, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61,
-  0x2e, 0x30, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x02, 0xae,
-  0x05, 0x1d, 0x6e, 0x08, 0xdc, 0x5a, 0x00, 0x83, 0x59, 0x06, 0xde, 0xe9,
-  0x9d, 0xc0, 0x06, 0x78, 0x16, 0xe0, 0x33, 0x4b, 0x20, 0x3e, 0xf6, 0xce,
-  0x02, 0x11, 0x9f, 0x59, 0x02, 0xf1, 0x19, 0x8e, 0x48, 0xe5, 0x00, 0x9e,
-  0x05, 0xe1, 0x9b, 0x65, 0xf8, 0x1d, 0xf1, 0x09, 0x4c, 0x95, 0x83, 0x78,
-  0x16, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b,
-  0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0xbd, 0x16, 0x74, 0xb8, 0x21,
-  0xc8, 0x6b, 0x01, 0x0c, 0x66, 0x19, 0xc0, 0x27, 0x7c, 0x02, 0xcb, 0x67,
-  0x61, 0x88, 0xcf, 0x2c, 0x81, 0xf8, 0x18, 0xc1, 0xcf, 0x02, 0x7c, 0x66,
-  0x09, 0xc4, 0x67, 0xa0, 0xc5, 0xd0, 0x78, 0x07, 0xeb, 0x1d, 0x02, 0x7c,
-  0x84, 0xf0, 0x41, 0xcb, 0xc1, 0x77, 0x2e, 0x18, 0xc6, 0xf6, 0x59, 0xf8,
-  0x67, 0x21, 0x3e, 0xc3, 0x11, 0xb4, 0x02, 0xd2, 0x02, 0xf1, 0xcd, 0x32,
-  0x8c, 0x8f, 0xf9, 0x04, 0x16, 0xd2, 0x42, 0xad, 0xc4, 0xc7, 0x82, 0x81,
-  0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88,
-  0x4f, 0x11, 0xa9, 0x2d, 0xe8, 0x70, 0x43, 0x70, 0xda, 0x02, 0x18, 0xcc,
-  0x32, 0x90, 0x4f, 0xf9, 0x04, 0x36, 0xa4, 0xb4, 0x00, 0x9f, 0x59, 0x02,
-  0xf5, 0x31, 0x93, 0x16, 0x88, 0xf8, 0xcc, 0x12, 0xa8, 0xcf, 0x70, 0xc4,
-  0xaf, 0x9c, 0xb4, 0x20, 0x7c, 0xb3, 0x0c, 0xe7, 0xa3, 0x3e, 0x81, 0x81,
-  0x0b, 0x4a, 0x0b, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0x60,
-  0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0x6d, 0x0b, 0x3a,
-  0xdc, 0x10, 0xc8, 0xb6, 0x00, 0x06, 0xb3, 0x0c, 0xe8, 0x93, 0x3e, 0x81,
-  0xc1, 0xb4, 0x30, 0xc4, 0x67, 0x96, 0x40, 0x7d, 0x8c, 0xa8, 0x69, 0x01,
-  0x3e, 0xb3, 0x04, 0xea, 0x33, 0xd0, 0x62, 0x68, 0xe4, 0x83, 0x95, 0x0f,
-  0x81, 0x3e, 0x42, 0xfa, 0xd0, 0x96, 0xf9, 0x5c, 0x30, 0xcc, 0x05, 0x46,
-  0xdd, 0x66, 0xd4, 0x85, 0xb4, 0x30, 0xcc, 0xd9, 0xb0, 0x30, 0xcc, 0x11,
-  0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x64, 0xe6,
-  0x2d, 0xc4, 0xb6, 0xd0, 0xd6, 0xc2, 0x6f, 0x0b, 0xa3, 0x09, 0x01, 0x30,
-  0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24,
-  0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x56, 0x7b, 0x0b, 0xb8, 0x2d,
-  0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x96, 0x7b, 0x0b,
-  0xb9, 0x2d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd6,
-  0x7b, 0x0b, 0xba, 0x2d, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08,
-  0x06, 0x5f, 0x7b, 0x0b, 0xb9, 0x2d, 0xdc, 0xb5, 0x10, 0x98, 0xb7, 0x60,
-  0xda, 0x02, 0x79, 0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0x20, 0x06, 0xef, 0x2d, 0xec, 0xb6, 0x20,
-  0x04, 0x17, 0x18, 0x37, 0x4b, 0xb0, 0x3e, 0x03, 0x2d, 0x86, 0x6b, 0xd8,
-  0x8e, 0xbf, 0x16, 0xb5, 0x03, 0x13, 0xb8, 0x23, 0xa8, 0x8f, 0xbf, 0x16,
-  0xb9, 0x33, 0xcb, 0xc0, 0x3e, 0xee, 0x53, 0xf2, 0xc1, 0x70, 0xc4, 0x08,
-  0x07, 0xa0, 0x2d, 0x0c, 0xdf, 0x91, 0x70, 0x30, 0xcc, 0x70, 0x43, 0x60,
-  0xd7, 0x02, 0x19, 0xd4, 0x10, 0xe8, 0x70, 0x44, 0xc9, 0x90, 0xb6, 0x30,
-  0x7c, 0x15, 0x08, 0x7a, 0x27, 0x33, 0xcc, 0x70, 0x43, 0x90, 0xd7, 0x02,
-  0x19, 0x54, 0x30, 0xe8, 0x2c, 0x43, 0xfb, 0x88, 0x50, 0x70, 0x3d, 0x2d,
-  0x0c, 0x73, 0x72, 0x2c, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x19, 0x88, 0x0b, 0xeb, 0x2d, 0x9c, 0xb6, 0x90, 0xdf, 0xc2, 0x68, 0x42,
-  0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43,
-  0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x75, 0xe2, 0x82,
-  0x7c, 0x0b, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x85,
-  0xe2, 0xc2, 0x7c, 0x0b, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0x81, 0x95, 0xe2, 0x02, 0x7d, 0x0b, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00,
-  0x08, 0x82, 0xc1, 0x77, 0xe2, 0xc2, 0x7c, 0x0b, 0xb1, 0x2d, 0x04, 0x20,
-  0x2e, 0x80, 0xb7, 0xe0, 0xdf, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08,
-  0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x88, 0x41, 0x8a, 0x0b, 0xf5,
-  0x2d, 0x08, 0xc1, 0x05, 0xc6, 0xcd, 0x12, 0x88, 0xd0, 0x70, 0xc3, 0xcf,
-  0x07, 0x28, 0x2e, 0x80, 0xc1, 0x2c, 0xc3, 0xfb, 0xc0, 0x4f, 0x50, 0xae,
-  0x2d, 0xdc, 0xb7, 0x00, 0x17, 0x18, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x4b, 0x8d, 0x0b, 0xf8, 0x2d, 0xfc, 0x70, 0xc0, 0xdb, 0xc2, 0x88,
-  0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c, 0x36, 0x2e, 0xe0, 0xb7, 0x10, 0x08,
-  0x17, 0x0c, 0x53, 0xb1, 0x2d, 0xf0, 0xb7, 0x00, 0x17, 0x18, 0x35, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x8b, 0x8e, 0x0b, 0xfd, 0x2d, 0x88, 0x4d,
-  0x78, 0x0b, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xec, 0xb8, 0xd0,
-  0xdf, 0x42, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0xdd, 0x61, 0xd4, 0xe9,
-  0xb5, 0x30, 0xcc, 0xbd, 0xb2, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x64, 0x3f, 0x2e, 0xa8, 0xb8, 0x60,
-  0xde, 0x02, 0x8e, 0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3,
-  0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x96, 0x99, 0x0b, 0x31, 0x2e, 0x24, 0x44, 0x30, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0xd6, 0x99, 0x0b, 0x32, 0x2e, 0x24, 0x44,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x16, 0x9a, 0x0b, 0x33, 0x2e,
-  0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x9f, 0x99, 0x0b,
-  0x32, 0x2e, 0xc0, 0xb7, 0x10, 0xfc, 0xb8, 0xf0, 0xdf, 0x42, 0x8f, 0x0b,
-  0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0x23, 0x06, 0x07, 0x00, 0x82,
-  0x60, 0x20, 0x06, 0x68, 0x2e, 0xd0, 0xb8, 0x20, 0x04, 0x17, 0x18, 0x37,
-  0x4b, 0x20, 0x42, 0xc3, 0x0d, 0x7e, 0x1f, 0x98, 0xb9, 0x00, 0x06, 0xb3,
-  0x0c, 0xf1, 0x23, 0x42, 0x81, 0x91, 0xb7, 0x60, 0xde, 0x42, 0x7c, 0x86,
-  0x23, 0xfa, 0x38, 0x38, 0x6f, 0x81, 0xf8, 0x66, 0x19, 0xe4, 0xa7, 0x7e,
-  0x02, 0x43, 0x6f, 0xc1, 0x8f, 0x83, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82,
-  0x61, 0x2e, 0x30, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x02,
-  0xce, 0x05, 0x1d, 0x6e, 0x08, 0xdc, 0x5c, 0x00, 0x83, 0x59, 0x86, 0xf9,
-  0xa1, 0x9f, 0xc0, 0x06, 0xf8, 0x16, 0xe0, 0x33, 0x4b, 0x90, 0x3f, 0xf6,
-  0xde, 0x02, 0x11, 0x9f, 0x59, 0x82, 0xfc, 0x19, 0x8e, 0x40, 0xe5, 0x00,
-  0xbe, 0x05, 0xe1, 0x9b, 0x65, 0xb0, 0x9f, 0xfc, 0x09, 0x2c, 0x95, 0x83,
-  0xf8, 0x16, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28,
-  0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x3d, 0x17, 0x74, 0xb8,
-  0x21, 0xc8, 0x73, 0x01, 0x0c, 0x66, 0x19, 0xee, 0x07, 0x7f, 0x02, 0xcb,
-  0x6f, 0x61, 0x88, 0xcf, 0x2c, 0x41, 0xfe, 0x18, 0xc1, 0xdf, 0x02, 0x7c,
-  0x66, 0x09, 0xf2, 0x67, 0xa0, 0xc5, 0xd0, 0xe6, 0x07, 0xa3, 0x1f, 0xe2,
-  0x7e, 0x04, 0xfc, 0x31, 0xcb, 0xa1, 0x7e, 0x2e, 0x18, 0xc6, 0xf6, 0x5b,
-  0xf8, 0x6f, 0x21, 0x3e, 0xc3, 0x11, 0xb4, 0x03, 0xe2, 0x02, 0xf1, 0xcd,
-  0x32, 0xe8, 0x4f, 0xff, 0x04, 0x16, 0xe2, 0x42, 0xed, 0xc4, 0xc7, 0x82,
-  0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x16, 0x18, 0xf2, 0xb1, 0x22,
-  0x88, 0x4f, 0x11, 0xa9, 0x2e, 0xe8, 0x70, 0x43, 0x70, 0xea, 0x02, 0x18,
-  0xcc, 0x32, 0xec, 0x0f, 0xff, 0x04, 0x36, 0xa4, 0xb8, 0x00, 0x9f, 0x59,
-  0x82, 0x10, 0x32, 0x13, 0x17, 0x88, 0xf8, 0xcc, 0x12, 0x84, 0xd0, 0x70,
-  0xc4, 0xef, 0x9c, 0xb8, 0x20, 0x7c, 0xb3, 0x0c, 0xfe, 0x13, 0x42, 0x81,
-  0x81, 0x0f, 0x8a, 0x0b, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c,
-  0x60, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0xad, 0x0b,
-  0x3a, 0xdc, 0x10, 0xc8, 0xba, 0x00, 0x06, 0xb3, 0x0c, 0xff, 0x03, 0x42,
-  0x81, 0xc1, 0xb8, 0x30, 0xc4, 0x67, 0x96, 0x20, 0x84, 0x8c, 0xa8, 0x71,
-  0x01, 0x3e, 0xb3, 0x04, 0x21, 0x34, 0xd0, 0x62, 0x68, 0xfb, 0x83, 0xf1,
-  0x0f, 0xf1, 0x3f, 0x02, 0x08, 0xd1, 0x5e, 0xff, 0x5c, 0x30, 0xcc, 0x05,
-  0x46, 0xdd, 0x66, 0xd4, 0x85, 0xb8, 0x30, 0xcc, 0xd9, 0xb4, 0x30, 0xcc,
-  0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x64,
-  0xe6, 0x2e, 0xc4, 0xba, 0xd0, 0xe6, 0xc2, 0xaf, 0x0b, 0xa3, 0x09, 0x01,
-  0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45,
-  0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x56, 0xbb, 0x0b, 0xb8,
-  0x2e, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x96, 0xbb,
-  0x0b, 0xb9, 0x2e, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0xd6, 0xbb, 0x0b, 0xba, 0x2e, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20,
-  0x08, 0x06, 0x5f, 0xbb, 0x0b, 0xb9, 0x2e, 0xdc, 0xb9, 0x10, 0x98, 0xbb,
-  0x60, 0xea, 0x02, 0xb9, 0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04,
-  0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x20, 0x06, 0xef, 0x2e, 0xec, 0xba,
-  0x20, 0x04, 0x17, 0x18, 0x37, 0x4b, 0x20, 0x42, 0x03, 0x2d, 0x86, 0x6b,
-  0xb4, 0x8f, 0x3f, 0x17, 0xec, 0x03, 0x13, 0xef, 0x23, 0x84, 0x90, 0x3f,
-  0x17, 0xf0, 0x33, 0x62, 0x60, 0x00, 0x20, 0x08, 0x06, 0xc7, 0xbe, 0x0b,
-  0xb5, 0x2e, 0xa8, 0xb5, 0x30, 0x62, 0x60, 0x00, 0x20, 0x08, 0x06, 0x07,
-  0xbf, 0x0b, 0xb6, 0x2e, 0xa8, 0xb5, 0x60, 0x41, 0x20, 0x1f, 0x0b, 0x04,
-  0xf9, 0x58, 0xca, 0x07, 0xa6, 0x2e, 0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40,
-  0x10, 0x0c, 0x92, 0x7f, 0x17, 0x7e, 0x5d, 0x80, 0x75, 0xe1, 0xe4, 0x02,
-  0x5b, 0xf9, 0x00, 0xd6, 0x05, 0xf9, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0x41, 0x12, 0xf2, 0x42, 0xb8, 0x0b, 0xaf, 0x2e, 0xa8, 0x6c, 0x10, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0x41, 0x22, 0xf2, 0x82, 0xb8, 0x0b, 0xb2,
-  0x2e, 0xa8, 0x5c, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xc9, 0xc8,
-  0x0b, 0xe3, 0x2e, 0xac, 0xba, 0xa0, 0x32, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x24, 0x24, 0x2f, 0x90, 0xbb, 0x10, 0xeb, 0x42, 0xcb, 0x06,
-  0xc6, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x24, 0x25, 0x2f, 0x94, 0xbb,
-  0x10, 0xeb, 0x42, 0xcb, 0x05, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x90,
-  0x98, 0xbc, 0x60, 0xee, 0xc2, 0xad, 0x0b, 0x2d, 0x13, 0x8c, 0x18, 0x34,
-  0x00, 0x08, 0x82, 0x41, 0x53, 0xf2, 0x82, 0xb9, 0x0b, 0xae, 0x2e, 0x30,
-  0x8b, 0x02, 0xb3, 0x01, 0x42, 0x04, 0x36, 0xe3, 0x01, 0xac, 0x0b, 0xf2,
-  0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x24, 0xe5, 0x85, 0x74, 0x17,
-  0x74, 0x5d, 0x88, 0xb1, 0xc0, 0x6a, 0x3c, 0xd0, 0x75, 0x41, 0x3e, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0x90, 0xac, 0xbc, 0xb0, 0xee, 0x42, 0xae,
-  0x0b, 0x34, 0x1a, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x90, 0xb0,
-  0xbc, 0xc0, 0xee, 0x02, 0xaf, 0x0b, 0x34, 0x16, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0x41, 0xd2, 0xf2, 0x42, 0xbb, 0x0b, 0xb5, 0x2e, 0xd0, 0x48,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x89, 0xcb, 0x0b, 0xee, 0x2e,
-  0xec, 0xba, 0x70, 0xa3, 0x81, 0x31, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0xc9, 0xcb, 0x0b, 0xef, 0x2e, 0xec, 0xba, 0x70, 0x63, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x24, 0x30, 0x2f, 0xc0, 0xbb, 0x10, 0xee, 0xc2,
-  0x8d, 0x04, 0x23, 0x06, 0x0d, 0x00, 0x82, 0x60, 0xd0, 0xbc, 0xbc, 0x00,
-  0xef, 0x02, 0xae, 0x0b, 0x56, 0x45, 0xe9, 0x68, 0x80, 0x10, 0x81, 0xf1,
-  0x74, 0xa0, 0xeb, 0x82, 0x7c, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x20,
-  0x99, 0x79, 0x61, 0xde, 0x05, 0x72, 0x17, 0x76, 0x2a, 0x30, 0x9f, 0x0e,
-  0xc8, 0x5d, 0x90, 0xcf, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x24, 0x35,
-  0x2f, 0xd4, 0xbb, 0x30, 0xee, 0x82, 0x4f, 0x06, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x24, 0x36, 0x2f, 0xd8, 0xbb, 0x60, 0xee, 0x82, 0x4f,
-  0x05, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x90, 0xdc, 0xbc, 0x70, 0xef,
-  0xc2, 0xaf, 0x0b, 0x3e, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41,
-  0x82, 0xf3, 0x02, 0xbe, 0x0b, 0xe5, 0x2e, 0x84, 0x65, 0x60, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0x41, 0x92, 0xf3, 0x42, 0xbe, 0x0b, 0xe5, 0x2e,
-  0x84, 0x55, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x89, 0xce, 0x0b,
-  0xfa, 0x2e, 0xac, 0xbb, 0x10, 0x16, 0xc1, 0x88, 0x41, 0x03, 0x80, 0x20,
-  0x18, 0x34, 0x39, 0x2f, 0xe8, 0xbb, 0x20, 0xee, 0x02, 0x18, 0x7c, 0x1e,
-  0x59, 0x06, 0x08, 0x11, 0x58, 0x19, 0x07, 0xe4, 0x2e, 0xc8, 0x67, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0x92, 0x9e, 0x17, 0xfa, 0x5d, 0x70, 0x77,
-  0xa1, 0x8c, 0x02, 0x3b, 0xe3, 0xc0, 0xdd, 0x05, 0xf9, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0x41, 0xf2, 0xf3, 0xc2, 0xbf, 0x0b, 0xed, 0x2e, 0xa0,
-  0x61, 0x10, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41, 0x02, 0xf6, 0x02,
-  0xc8, 0x0b, 0xf0, 0x2e, 0xa0, 0x51, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x49, 0xd8, 0x0b, 0x21, 0x2f, 0xa4, 0xbb, 0x80, 0x06, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x24, 0x62, 0x2f, 0x88, 0xbc, 0xf0, 0xee,
-  0xc2, 0x1a, 0x06, 0xc6, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x24, 0x63,
-  0x2f, 0x8c, 0xbc, 0xf0, 0xee, 0xc2, 0x1a, 0x05, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0x90, 0x90, 0xbd, 0x40, 0xf2, 0x42, 0xbd, 0x0b, 0x6b, 0x10,
-  0x8c, 0x18, 0x34, 0x00, 0x08, 0x82, 0x41, 0x33, 0xf6, 0x02, 0xc9, 0x0b,
-  0xec, 0x2e, 0xa8, 0x41, 0x1a, 0xa0, 0x81, 0x1b, 0x06, 0x08, 0x11, 0x18,
-  0x1b, 0xb0, 0x81, 0x7c, 0x2c, 0x68, 0x03, 0xf9, 0x58, 0x18, 0xc0, 0xbb,
-  0x20, 0x9f, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x48, 0xd2, 0x5e, 0x48,
-  0x79, 0x41, 0xdf, 0x05, 0x27, 0xb0, 0x31, 0xd0, 0x77, 0x41, 0x3e, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0x90, 0xac, 0xbd, 0xb0, 0xf2, 0x42, 0xbe,
-  0x0b, 0x5a, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x09, 0xdb, 0x0b,
-  0x2c, 0x2f, 0xf0, 0xbb, 0x10, 0x05, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0x90, 0xb4, 0xbd, 0xd0, 0xf2, 0x42, 0xbd, 0x0b, 0x48, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0x89, 0xdb, 0x0b, 0x2e, 0x2f, 0xec, 0xbb, 0xd0,
-  0x19, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x90, 0xbc, 0xbd, 0xf0, 0xf2,
-  0xc2, 0xbe, 0x0b, 0x54, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x09,
-  0xdc, 0x0b, 0x30, 0x2f, 0x84, 0xbc, 0xb0, 0x04, 0x23, 0x06, 0x0d, 0x00,
-  0x82, 0x60, 0xd0, 0xbc, 0xbd, 0x00, 0xf3, 0x02, 0xbe, 0x0b, 0x77, 0xb0,
-  0x28, 0x60, 0x80, 0x10, 0xc1, 0x05, 0x21, 0x8e, 0x18, 0x18, 0x00, 0x08,
-  0x82, 0xc1, 0x19, 0xa4, 0xbd, 0xd0, 0xf2, 0x42, 0x30, 0x9a, 0x10, 0x00,
-  0xa3, 0x09, 0x42, 0x30, 0x62, 0xe0, 0x00, 0x20, 0x08, 0x06, 0x64, 0xd0,
-  0xf6, 0xc2, 0xcb, 0x0b, 0xfb, 0x2e, 0xd0, 0xbb, 0x80, 0xf6, 0x82, 0x10,
-  0xd0, 0xbc, 0x40, 0xf3, 0x02, 0xca, 0x0b, 0x66, 0x2f, 0xcc, 0x12, 0x8c,
-  0x10, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-};
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_float_double.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_float_double.h
deleted file mode 100644
index 6ae8cd863dd22..0000000000000
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_float_double.h
+++ /dev/null
@@ -1,6224 +0,0 @@
-#if 0
-;
-; Note: shader requires additional functionality:
-;       Double-precision floating point
-;
-;
-; Input signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no parameters
-;
-; Output signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no parameters
-; shader hash: baaf129c1eacbecbb65916f57913e04f
-;
-; Pipeline Runtime Information: 
-;
-;
-;
-; Buffer Definitions:
-;
-; cbuffer 
-; {
-;
-;   [116 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [4 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [8 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [4 x i8] (type annotation not present)
-;
-; }
-;
-;
-; Resource Bindings:
-;
-; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-;                                   cbuffer      NA          NA     CB0            cb0     1
-;                                       UAV  struct         r/w      U0             u0     1
-;                                       UAV  struct         r/w      U1             u1     1
-;                                       UAV  struct         r/w      U2             u2     1
-;
-target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
-target triple = "dxil-ms-dx"
-
-%dx.types.Handle = type { i8* }
-%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
-%dx.types.ResRet.i32 = type { i32, i32, i32, i32, i32 }
-%dx.types.ResRet.f32 = type { float, float, float, float, i32 }
-%"class.RWStructuredBuffer<float>" = type { float }
-%"class.RWStructuredBuffer<double>" = type { double }
-%Constants = type { i32, i32, i32, i32, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32 }
-
-define void @GridSample() {
-  %1 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 2, i32 2, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %2 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 1, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %3 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %4 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %5 = call i32 @dx.op.threadId.i32(i32 93, i32 0)  ; ThreadId(component)
-  %6 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
-  %7 = extractvalue %dx.types.CBufRet.i32 %6, 0
-  %8 = add i32 %7, %5
-  %9 = extractvalue %dx.types.CBufRet.i32 %6, 1
-  %10 = icmp ult i32 %8, %9
-  br i1 %10, label %11, label %3257
-
-; <label>:11                                      ; preds = %0
-  %12 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
-  %13 = extractvalue %dx.types.CBufRet.i32 %12, 3
-  %14 = uitofp i32 %13 to float
-  %15 = extractvalue %dx.types.CBufRet.i32 %12, 2
-  %16 = uitofp i32 %15 to float
-  %17 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 7)  ; CBufferLoadLegacy(handle,regIndex)
-  %18 = extractvalue %dx.types.CBufRet.i32 %17, 0
-  %19 = icmp eq i32 %18, 0
-  %20 = select i1 %19, float -5.000000e-01, float 0.000000e+00
-  %21 = select i1 %19, float -5.000000e-01, float -1.000000e+00
-  %22 = fadd float %14, %21
-  %23 = select i1 %19, float -5.000000e-01, float -1.000000e+00
-  %24 = fadd float %16, %23
-  %25 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
-  %26 = extractvalue %dx.types.CBufRet.i32 %25, 1
-  %27 = extractvalue %dx.types.CBufRet.i32 %25, 2
-  %28 = extractvalue %dx.types.CBufRet.i32 %25, 3
-  %29 = mul i32 %28, %27
-  %30 = mul i32 %27, %26
-  %31 = mul i32 %30, %28
-  %32 = udiv i32 %8, %31
-  %33 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 6)  ; CBufferLoadLegacy(handle,regIndex)
-  %34 = extractvalue %dx.types.CBufRet.i32 %33, 0
-  %35 = mul i32 %34, %32
-  %36 = sub i32 %8, %35
-  %37 = udiv i32 %36, %29
-  %38 = extractvalue %dx.types.CBufRet.i32 %33, 1
-  %39 = mul i32 %38, %37
-  %40 = sub i32 %36, %39
-  %41 = udiv i32 %40, %28
-  %42 = extractvalue %dx.types.CBufRet.i32 %33, 2
-  %43 = mul i32 %42, %41
-  %44 = sub i32 %40, %43
-  %45 = uitofp i32 %32 to float
-  %46 = uitofp i32 %41 to float
-  %47 = uitofp i32 %44 to float
-  %48 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
-  %49 = extractvalue %dx.types.CBufRet.i32 %48, 0
-  %50 = extractvalue %dx.types.CBufRet.i32 %48, 1
-  %51 = extractvalue %dx.types.CBufRet.i32 %48, 2
-  %52 = extractvalue %dx.types.CBufRet.i32 %48, 3
-  %53 = uitofp i32 %49 to float
-  %54 = uitofp i32 %50 to float
-  %55 = uitofp i32 %51 to float
-  %56 = uitofp i32 %52 to float
-  %57 = call float @dx.op.dot4.f32(i32 56, float %45, float %46, float %47, float 0.000000e+00, float %53, float %54, float %55, float %56)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %58 = fadd fast float %56, %57
-  %59 = fptoui float %57 to i32
-  %60 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %2, i32 %59, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %61 = extractvalue %dx.types.ResRet.i32 %60, 0
-  %62 = extractvalue %dx.types.ResRet.i32 %60, 1
-  %63 = call double @dx.op.makeDouble.f64(i32 101, i32 %61, i32 %62)  ; MakeDouble(lo,hi)
-  %64 = fptrunc double %63 to float
-  %65 = fptoui float %58 to i32
-  %66 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %2, i32 %65, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %67 = extractvalue %dx.types.ResRet.i32 %66, 0
-  %68 = extractvalue %dx.types.ResRet.i32 %66, 1
-  %69 = call double @dx.op.makeDouble.f64(i32 101, i32 %67, i32 %68)  ; MakeDouble(lo,hi)
-  %70 = fptrunc double %69 to float
-  %71 = icmp eq i32 %18, 1
-  %72 = fadd fast float %64, 1.000000e+00
-  %73 = fadd fast float %70, 1.000000e+00
-  br i1 %71, label %74, label %81
-
-; <label>:74                                      ; preds = %11
-  %75 = fmul fast float %72, 5.000000e-01
-  %76 = fmul fast float %73, 5.000000e-01
-  %77 = fadd fast float %14, -1.000000e+00
-  %78 = fadd fast float %16, -1.000000e+00
-  %79 = fmul fast float %75, %77
-  %80 = fmul fast float %76, %78
-  br label %88
-
-; <label>:81                                      ; preds = %11
-  %82 = fmul fast float %14, %72
-  %83 = fmul fast float %73, %16
-  %84 = fadd fast float %82, -1.000000e+00
-  %85 = fadd fast float %83, -1.000000e+00
-  %86 = fmul fast float %84, 5.000000e-01
-  %87 = fmul fast float %85, 5.000000e-01
-  br label %88
-
-; <label>:88                                      ; preds = %81, %74
-  %89 = phi float [ %79, %74 ], [ %86, %81 ]
-  %90 = phi float [ %80, %74 ], [ %87, %81 ]
-  %91 = extractvalue %dx.types.CBufRet.i32 %6, 2
-  %92 = icmp eq i32 %91, 1
-  br i1 %92, label %93, label %96
-
-; <label>:93                                      ; preds = %88
-  %94 = call float @dx.op.unary.f32(i32 26, float %89)  ; Round_ne(value)
-  %95 = call float @dx.op.unary.f32(i32 26, float %90)  ; Round_ne(value)
-  br label %96
-
-; <label>:96                                      ; preds = %93, %88
-  %97 = phi float [ %94, %93 ], [ %89, %88 ]
-  %98 = phi float [ %95, %93 ], [ %90, %88 ]
-  %99 = fcmp fast olt float %97, %20
-  %100 = fcmp fast ogt float %97, %22
-  %101 = or i1 %99, %100
-  %102 = fcmp fast olt float %98, %20
-  %103 = or i1 %101, %102
-  %104 = fcmp fast ogt float %98, %24
-  %105 = or i1 %104, %103
-  br i1 %105, label %106, label %179
-
-; <label>:106                                     ; preds = %96
-  %107 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %108 = icmp eq i32 %107, 1
-  br i1 %108, label %109, label %118
-
-; <label>:109                                     ; preds = %106
-  %110 = add i32 %13, -1
-  %111 = uitofp i32 %110 to float
-  %112 = call float @dx.op.binary.f32(i32 35, float %97, float 0.000000e+00)  ; FMax(a,b)
-  %113 = call float @dx.op.binary.f32(i32 36, float %112, float %111)  ; FMin(a,b)
-  %114 = add i32 %15, -1
-  %115 = uitofp i32 %114 to float
-  %116 = call float @dx.op.binary.f32(i32 35, float %98, float 0.000000e+00)  ; FMax(a,b)
-  %117 = call float @dx.op.binary.f32(i32 36, float %116, float %115)  ; FMin(a,b)
-  br label %179
-
-; <label>:118                                     ; preds = %106
-  %119 = icmp eq i32 %107, 2
-  br i1 %119, label %120, label %179
-
-; <label>:120                                     ; preds = %118
-  %121 = fsub fast float %22, %20
-  br i1 %99, label %122, label %135
-
-; <label>:122                                     ; preds = %120
-  %123 = fsub fast float %20, %97
-  %124 = fdiv fast float %123, %121
-  %125 = fptoui float %124 to i32
-  %126 = uitofp i32 %125 to float
-  %127 = fmul fast float %126, %121
-  %128 = fsub fast float %123, %127
-  %129 = and i32 %125, 1
-  %130 = icmp eq i32 %129, 0
-  br i1 %130, label %131, label %133
-
-; <label>:131                                     ; preds = %122
-  %132 = fadd fast float %128, %20
-  br label %149
-
-; <label>:133                                     ; preds = %122
-  %134 = fsub fast float %22, %128
-  br label %149
-
-; <label>:135                                     ; preds = %120
-  br i1 %100, label %136, label %149
-
-; <label>:136                                     ; preds = %135
-  %137 = fsub fast float %97, %22
-  %138 = fdiv fast float %137, %121
-  %139 = fptoui float %138 to i32
-  %140 = uitofp i32 %139 to float
-  %141 = fmul fast float %140, %121
-  %142 = fsub fast float %137, %141
-  %143 = and i32 %139, 1
-  %144 = icmp eq i32 %143, 0
-  br i1 %144, label %145, label %147
-
-; <label>:145                                     ; preds = %136
-  %146 = fsub fast float %22, %142
-  br label %149
-
-; <label>:147                                     ; preds = %136
-  %148 = fadd fast float %142, %20
-  br label %149
-
-; <label>:149                                     ; preds = %147, %145, %135, %133, %131
-  %150 = phi float [ %132, %131 ], [ %134, %133 ], [ %146, %145 ], [ %148, %147 ], [ %97, %135 ]
-  %151 = fsub fast float %24, %20
-  br i1 %102, label %152, label %165
-
-; <label>:152                                     ; preds = %149
-  %153 = fsub fast float %20, %98
-  %154 = fdiv fast float %153, %151
-  %155 = fptoui float %154 to i32
-  %156 = uitofp i32 %155 to float
-  %157 = fmul fast float %156, %151
-  %158 = fsub fast float %153, %157
-  %159 = and i32 %155, 1
-  %160 = icmp eq i32 %159, 0
-  br i1 %160, label %161, label %163
-
-; <label>:161                                     ; preds = %152
-  %162 = fadd fast float %158, %20
-  br label %179
-
-; <label>:163                                     ; preds = %152
-  %164 = fsub fast float %24, %158
-  br label %179
-
-; <label>:165                                     ; preds = %149
-  br i1 %104, label %166, label %179
-
-; <label>:166                                     ; preds = %165
-  %167 = fsub fast float %98, %24
-  %168 = fdiv fast float %167, %151
-  %169 = fptoui float %168 to i32
-  %170 = uitofp i32 %169 to float
-  %171 = fmul fast float %170, %151
-  %172 = fsub fast float %167, %171
-  %173 = and i32 %169, 1
-  %174 = icmp eq i32 %173, 0
-  br i1 %174, label %175, label %177
-
-; <label>:175                                     ; preds = %166
-  %176 = fsub fast float %24, %172
-  br label %179
-
-; <label>:177                                     ; preds = %166
-  %178 = fadd fast float %172, %20
-  br label %179
-
-; <label>:179                                     ; preds = %177, %175, %165, %163, %161, %118, %109, %96
-  %180 = phi float [ %113, %109 ], [ %97, %118 ], [ %97, %96 ], [ %150, %177 ], [ %150, %175 ], [ %150, %165 ], [ %150, %163 ], [ %150, %161 ]
-  %181 = phi float [ %117, %109 ], [ %98, %118 ], [ %98, %96 ], [ %178, %177 ], [ %176, %175 ], [ %98, %165 ], [ %164, %163 ], [ %162, %161 ]
-  %182 = uitofp i32 %37 to float
-  br i1 %92, label %183, label %328
-
-; <label>:183                                     ; preds = %179
-  %184 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %185 = icmp eq i32 %184, 0
-  br i1 %185, label %186, label %210
-
-; <label>:186                                     ; preds = %183
-  %187 = fcmp fast oge float %180, 0.000000e+00
-  %188 = fptoui float %180 to i32
-  %189 = icmp ult i32 %188, %13
-  %190 = and i1 %187, %189
-  %191 = fcmp fast oge float %181, 0.000000e+00
-  %192 = and i1 %191, %190
-  %193 = fptoui float %181 to i32
-  %194 = icmp ult i32 %193, %15
-  %195 = and i1 %194, %192
-  br i1 %195, label %196, label %326
-
-; <label>:196                                     ; preds = %186
-  %197 = fptoui float %45 to i32
-  %198 = fptoui float %182 to i32
-  %199 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %200 = extractvalue %dx.types.CBufRet.i32 %199, 0
-  %201 = extractvalue %dx.types.CBufRet.i32 %199, 1
-  %202 = extractvalue %dx.types.CBufRet.i32 %199, 2
-  %203 = extractvalue %dx.types.CBufRet.i32 %199, 3
-  %204 = mul i32 %200, %197
-  %205 = call i32 @dx.op.tertiary.i32(i32 48, i32 %198, i32 %201, i32 %204)  ; IMad(a,b,c)
-  %206 = call i32 @dx.op.tertiary.i32(i32 48, i32 %193, i32 %202, i32 %205)  ; IMad(a,b,c)
-  %207 = call i32 @dx.op.tertiary.i32(i32 48, i32 %188, i32 %203, i32 %206)  ; IMad(a,b,c)
-  %208 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %207, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %209 = extractvalue %dx.types.ResRet.f32 %208, 0
-  br label %326
-
-; <label>:210                                     ; preds = %183
-  %211 = icmp eq i32 %184, 1
-  br i1 %211, label %212, label %240
-
-; <label>:212                                     ; preds = %210
-  %213 = add i32 %13, -1
-  %214 = uitofp i32 %213 to float
-  %215 = call float @dx.op.binary.f32(i32 35, float %180, float 0.000000e+00)  ; FMax(a,b)
-  %216 = call float @dx.op.binary.f32(i32 36, float %215, float %214)  ; FMin(a,b)
-  %217 = fptoui float %216 to i32
-  %218 = add i32 %15, -1
-  %219 = uitofp i32 %218 to float
-  %220 = call float @dx.op.binary.f32(i32 35, float %181, float 0.000000e+00)  ; FMax(a,b)
-  %221 = call float @dx.op.binary.f32(i32 36, float %220, float %219)  ; FMin(a,b)
-  %222 = fptoui float %221 to i32
-  %223 = uitofp i32 %222 to float
-  %224 = uitofp i32 %217 to float
-  %225 = fptoui float %45 to i32
-  %226 = fptoui float %182 to i32
-  %227 = fptoui float %223 to i32
-  %228 = fptoui float %224 to i32
-  %229 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %230 = extractvalue %dx.types.CBufRet.i32 %229, 0
-  %231 = extractvalue %dx.types.CBufRet.i32 %229, 1
-  %232 = extractvalue %dx.types.CBufRet.i32 %229, 2
-  %233 = extractvalue %dx.types.CBufRet.i32 %229, 3
-  %234 = mul i32 %230, %225
-  %235 = call i32 @dx.op.tertiary.i32(i32 48, i32 %226, i32 %231, i32 %234)  ; IMad(a,b,c)
-  %236 = call i32 @dx.op.tertiary.i32(i32 48, i32 %227, i32 %232, i32 %235)  ; IMad(a,b,c)
-  %237 = call i32 @dx.op.tertiary.i32(i32 48, i32 %228, i32 %233, i32 %236)  ; IMad(a,b,c)
-  %238 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %237, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %239 = extractvalue %dx.types.ResRet.f32 %238, 0
-  br label %326
-
-; <label>:240                                     ; preds = %210
-  %241 = icmp eq i32 %184, 2
-  br i1 %241, label %242, label %326
-
-; <label>:242                                     ; preds = %240
-  %243 = fsub fast float %22, %20
-  %244 = fcmp fast olt float %180, %20
-  br i1 %244, label %245, label %258
-
-; <label>:245                                     ; preds = %242
-  %246 = fsub fast float %20, %180
-  %247 = fdiv fast float %246, %243
-  %248 = fptoui float %247 to i32
-  %249 = uitofp i32 %248 to float
-  %250 = fmul fast float %249, %243
-  %251 = fsub fast float %246, %250
-  %252 = and i32 %248, 1
-  %253 = icmp eq i32 %252, 0
-  br i1 %253, label %254, label %256
-
-; <label>:254                                     ; preds = %245
-  %255 = fadd fast float %251, %20
-  br label %273
-
-; <label>:256                                     ; preds = %245
-  %257 = fsub fast float %22, %251
-  br label %273
-
-; <label>:258                                     ; preds = %242
-  %259 = fcmp fast ogt float %180, %22
-  br i1 %259, label %260, label %273
-
-; <label>:260                                     ; preds = %258
-  %261 = fsub fast float %180, %22
-  %262 = fdiv fast float %261, %243
-  %263 = fptoui float %262 to i32
-  %264 = uitofp i32 %263 to float
-  %265 = fmul fast float %264, %243
-  %266 = fsub fast float %261, %265
-  %267 = and i32 %263, 1
-  %268 = icmp eq i32 %267, 0
-  br i1 %268, label %269, label %271
-
-; <label>:269                                     ; preds = %260
-  %270 = fsub fast float %22, %266
-  br label %273
-
-; <label>:271                                     ; preds = %260
-  %272 = fadd fast float %266, %20
-  br label %273
-
-; <label>:273                                     ; preds = %271, %269, %258, %256, %254
-  %274 = phi float [ %255, %254 ], [ %257, %256 ], [ %270, %269 ], [ %272, %271 ], [ %180, %258 ]
-  %275 = fptoui float %274 to i32
-  %276 = fsub fast float %24, %20
-  %277 = fcmp fast olt float %181, %20
-  br i1 %277, label %278, label %291
-
-; <label>:278                                     ; preds = %273
-  %279 = fsub fast float %20, %181
-  %280 = fdiv fast float %279, %276
-  %281 = fptoui float %280 to i32
-  %282 = uitofp i32 %281 to float
-  %283 = fmul fast float %282, %276
-  %284 = fsub fast float %279, %283
-  %285 = and i32 %281, 1
-  %286 = icmp eq i32 %285, 0
-  br i1 %286, label %287, label %289
-
-; <label>:287                                     ; preds = %278
-  %288 = fadd fast float %284, %20
-  br label %306
-
-; <label>:289                                     ; preds = %278
-  %290 = fsub fast float %24, %284
-  br label %306
-
-; <label>:291                                     ; preds = %273
-  %292 = fcmp fast ogt float %181, %24
-  br i1 %292, label %293, label %306
-
-; <label>:293                                     ; preds = %291
-  %294 = fsub fast float %181, %24
-  %295 = fdiv fast float %294, %276
-  %296 = fptoui float %295 to i32
-  %297 = uitofp i32 %296 to float
-  %298 = fmul fast float %297, %276
-  %299 = fsub fast float %294, %298
-  %300 = and i32 %296, 1
-  %301 = icmp eq i32 %300, 0
-  br i1 %301, label %302, label %304
-
-; <label>:302                                     ; preds = %293
-  %303 = fsub fast float %24, %299
-  br label %306
-
-; <label>:304                                     ; preds = %293
-  %305 = fadd fast float %299, %20
-  br label %306
-
-; <label>:306                                     ; preds = %304, %302, %291, %289, %287
-  %307 = phi float [ %288, %287 ], [ %290, %289 ], [ %303, %302 ], [ %305, %304 ], [ %181, %291 ]
-  %308 = fptoui float %307 to i32
-  %309 = uitofp i32 %308 to float
-  %310 = uitofp i32 %275 to float
-  %311 = fptoui float %45 to i32
-  %312 = fptoui float %182 to i32
-  %313 = fptoui float %309 to i32
-  %314 = fptoui float %310 to i32
-  %315 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %316 = extractvalue %dx.types.CBufRet.i32 %315, 0
-  %317 = extractvalue %dx.types.CBufRet.i32 %315, 1
-  %318 = extractvalue %dx.types.CBufRet.i32 %315, 2
-  %319 = extractvalue %dx.types.CBufRet.i32 %315, 3
-  %320 = mul i32 %316, %311
-  %321 = call i32 @dx.op.tertiary.i32(i32 48, i32 %312, i32 %317, i32 %320)  ; IMad(a,b,c)
-  %322 = call i32 @dx.op.tertiary.i32(i32 48, i32 %313, i32 %318, i32 %321)  ; IMad(a,b,c)
-  %323 = call i32 @dx.op.tertiary.i32(i32 48, i32 %314, i32 %319, i32 %322)  ; IMad(a,b,c)
-  %324 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %323, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %325 = extractvalue %dx.types.ResRet.f32 %324, 0
-  br label %326
-
-; <label>:326                                     ; preds = %306, %240, %212, %196, %186
-  %327 = phi float [ %209, %196 ], [ 0.000000e+00, %186 ], [ %239, %212 ], [ %325, %306 ], [ 0.000000e+00, %240 ]
-  call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %1, i32 %8, i32 0, float %327, float undef, float undef, float undef, i8 1, i32 4)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3257
-
-; <label>:328                                     ; preds = %179
-  %329 = icmp eq i32 %91, 0
-  br i1 %329, label %330, label %916
-
-; <label>:330                                     ; preds = %328
-  %331 = call float @dx.op.unary.f32(i32 27, float %180)  ; Round_ni(value)
-  %332 = call float @dx.op.unary.f32(i32 27, float %181)  ; Round_ni(value)
-  %333 = fadd fast float %331, 1.000000e+00
-  %334 = fadd fast float %332, 1.000000e+00
-  %335 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %336 = icmp eq i32 %335, 0
-  br i1 %336, label %337, label %361
-
-; <label>:337                                     ; preds = %330
-  %338 = fcmp fast oge float %331, 0.000000e+00
-  %339 = fptoui float %331 to i32
-  %340 = icmp ult i32 %339, %13
-  %341 = and i1 %338, %340
-  %342 = fcmp fast oge float %332, 0.000000e+00
-  %343 = and i1 %342, %341
-  %344 = fptoui float %332 to i32
-  %345 = icmp ult i32 %344, %15
-  %346 = and i1 %345, %343
-  br i1 %346, label %347, label %477
-
-; <label>:347                                     ; preds = %337
-  %348 = fptoui float %45 to i32
-  %349 = fptoui float %182 to i32
-  %350 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %351 = extractvalue %dx.types.CBufRet.i32 %350, 0
-  %352 = extractvalue %dx.types.CBufRet.i32 %350, 1
-  %353 = extractvalue %dx.types.CBufRet.i32 %350, 2
-  %354 = extractvalue %dx.types.CBufRet.i32 %350, 3
-  %355 = mul i32 %351, %348
-  %356 = call i32 @dx.op.tertiary.i32(i32 48, i32 %349, i32 %352, i32 %355)  ; IMad(a,b,c)
-  %357 = call i32 @dx.op.tertiary.i32(i32 48, i32 %344, i32 %353, i32 %356)  ; IMad(a,b,c)
-  %358 = call i32 @dx.op.tertiary.i32(i32 48, i32 %339, i32 %354, i32 %357)  ; IMad(a,b,c)
-  %359 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %358, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %360 = extractvalue %dx.types.ResRet.f32 %359, 0
-  br label %477
-
-; <label>:361                                     ; preds = %330
-  %362 = icmp eq i32 %335, 1
-  br i1 %362, label %363, label %391
-
-; <label>:363                                     ; preds = %361
-  %364 = add i32 %13, -1
-  %365 = uitofp i32 %364 to float
-  %366 = call float @dx.op.binary.f32(i32 35, float %331, float 0.000000e+00)  ; FMax(a,b)
-  %367 = call float @dx.op.binary.f32(i32 36, float %366, float %365)  ; FMin(a,b)
-  %368 = fptoui float %367 to i32
-  %369 = add i32 %15, -1
-  %370 = uitofp i32 %369 to float
-  %371 = call float @dx.op.binary.f32(i32 35, float %332, float 0.000000e+00)  ; FMax(a,b)
-  %372 = call float @dx.op.binary.f32(i32 36, float %371, float %370)  ; FMin(a,b)
-  %373 = fptoui float %372 to i32
-  %374 = uitofp i32 %373 to float
-  %375 = uitofp i32 %368 to float
-  %376 = fptoui float %45 to i32
-  %377 = fptoui float %182 to i32
-  %378 = fptoui float %374 to i32
-  %379 = fptoui float %375 to i32
-  %380 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %381 = extractvalue %dx.types.CBufRet.i32 %380, 0
-  %382 = extractvalue %dx.types.CBufRet.i32 %380, 1
-  %383 = extractvalue %dx.types.CBufRet.i32 %380, 2
-  %384 = extractvalue %dx.types.CBufRet.i32 %380, 3
-  %385 = mul i32 %381, %376
-  %386 = call i32 @dx.op.tertiary.i32(i32 48, i32 %377, i32 %382, i32 %385)  ; IMad(a,b,c)
-  %387 = call i32 @dx.op.tertiary.i32(i32 48, i32 %378, i32 %383, i32 %386)  ; IMad(a,b,c)
-  %388 = call i32 @dx.op.tertiary.i32(i32 48, i32 %379, i32 %384, i32 %387)  ; IMad(a,b,c)
-  %389 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %388, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %390 = extractvalue %dx.types.ResRet.f32 %389, 0
-  br label %477
-
-; <label>:391                                     ; preds = %361
-  %392 = icmp eq i32 %335, 2
-  br i1 %392, label %393, label %477
-
-; <label>:393                                     ; preds = %391
-  %394 = fsub fast float %22, %20
-  %395 = fcmp fast olt float %331, %20
-  br i1 %395, label %396, label %409
-
-; <label>:396                                     ; preds = %393
-  %397 = fsub fast float %20, %331
-  %398 = fdiv fast float %397, %394
-  %399 = fptoui float %398 to i32
-  %400 = uitofp i32 %399 to float
-  %401 = fmul fast float %400, %394
-  %402 = fsub fast float %397, %401
-  %403 = and i32 %399, 1
-  %404 = icmp eq i32 %403, 0
-  br i1 %404, label %405, label %407
-
-; <label>:405                                     ; preds = %396
-  %406 = fadd fast float %402, %20
-  br label %424
-
-; <label>:407                                     ; preds = %396
-  %408 = fsub fast float %22, %402
-  br label %424
-
-; <label>:409                                     ; preds = %393
-  %410 = fcmp fast ogt float %331, %22
-  br i1 %410, label %411, label %424
-
-; <label>:411                                     ; preds = %409
-  %412 = fsub fast float %331, %22
-  %413 = fdiv fast float %412, %394
-  %414 = fptoui float %413 to i32
-  %415 = uitofp i32 %414 to float
-  %416 = fmul fast float %415, %394
-  %417 = fsub fast float %412, %416
-  %418 = and i32 %414, 1
-  %419 = icmp eq i32 %418, 0
-  br i1 %419, label %420, label %422
-
-; <label>:420                                     ; preds = %411
-  %421 = fsub fast float %22, %417
-  br label %424
-
-; <label>:422                                     ; preds = %411
-  %423 = fadd fast float %417, %20
-  br label %424
-
-; <label>:424                                     ; preds = %422, %420, %409, %407, %405
-  %425 = phi float [ %406, %405 ], [ %408, %407 ], [ %421, %420 ], [ %423, %422 ], [ %331, %409 ]
-  %426 = fptoui float %425 to i32
-  %427 = fsub fast float %24, %20
-  %428 = fcmp fast olt float %332, %20
-  br i1 %428, label %429, label %442
-
-; <label>:429                                     ; preds = %424
-  %430 = fsub fast float %20, %332
-  %431 = fdiv fast float %430, %427
-  %432 = fptoui float %431 to i32
-  %433 = uitofp i32 %432 to float
-  %434 = fmul fast float %433, %427
-  %435 = fsub fast float %430, %434
-  %436 = and i32 %432, 1
-  %437 = icmp eq i32 %436, 0
-  br i1 %437, label %438, label %440
-
-; <label>:438                                     ; preds = %429
-  %439 = fadd fast float %435, %20
-  br label %457
-
-; <label>:440                                     ; preds = %429
-  %441 = fsub fast float %24, %435
-  br label %457
-
-; <label>:442                                     ; preds = %424
-  %443 = fcmp fast ogt float %332, %24
-  br i1 %443, label %444, label %457
-
-; <label>:444                                     ; preds = %442
-  %445 = fsub fast float %332, %24
-  %446 = fdiv fast float %445, %427
-  %447 = fptoui float %446 to i32
-  %448 = uitofp i32 %447 to float
-  %449 = fmul fast float %448, %427
-  %450 = fsub fast float %445, %449
-  %451 = and i32 %447, 1
-  %452 = icmp eq i32 %451, 0
-  br i1 %452, label %453, label %455
-
-; <label>:453                                     ; preds = %444
-  %454 = fsub fast float %24, %450
-  br label %457
-
-; <label>:455                                     ; preds = %444
-  %456 = fadd fast float %450, %20
-  br label %457
-
-; <label>:457                                     ; preds = %455, %453, %442, %440, %438
-  %458 = phi float [ %439, %438 ], [ %441, %440 ], [ %454, %453 ], [ %456, %455 ], [ %332, %442 ]
-  %459 = fptoui float %458 to i32
-  %460 = uitofp i32 %459 to float
-  %461 = uitofp i32 %426 to float
-  %462 = fptoui float %45 to i32
-  %463 = fptoui float %182 to i32
-  %464 = fptoui float %460 to i32
-  %465 = fptoui float %461 to i32
-  %466 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %467 = extractvalue %dx.types.CBufRet.i32 %466, 0
-  %468 = extractvalue %dx.types.CBufRet.i32 %466, 1
-  %469 = extractvalue %dx.types.CBufRet.i32 %466, 2
-  %470 = extractvalue %dx.types.CBufRet.i32 %466, 3
-  %471 = mul i32 %467, %462
-  %472 = call i32 @dx.op.tertiary.i32(i32 48, i32 %463, i32 %468, i32 %471)  ; IMad(a,b,c)
-  %473 = call i32 @dx.op.tertiary.i32(i32 48, i32 %464, i32 %469, i32 %472)  ; IMad(a,b,c)
-  %474 = call i32 @dx.op.tertiary.i32(i32 48, i32 %465, i32 %470, i32 %473)  ; IMad(a,b,c)
-  %475 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %474, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %476 = extractvalue %dx.types.ResRet.f32 %475, 0
-  br label %477
-
-; <label>:477                                     ; preds = %457, %391, %363, %347, %337
-  %478 = phi float [ %360, %347 ], [ 0.000000e+00, %337 ], [ %390, %363 ], [ %476, %457 ], [ 0.000000e+00, %391 ]
-  br i1 %336, label %479, label %503
-
-; <label>:479                                     ; preds = %477
-  %480 = fcmp fast oge float %333, 0.000000e+00
-  %481 = fptoui float %333 to i32
-  %482 = icmp ult i32 %481, %13
-  %483 = and i1 %480, %482
-  %484 = fcmp fast oge float %332, 0.000000e+00
-  %485 = and i1 %484, %483
-  %486 = fptoui float %332 to i32
-  %487 = icmp ult i32 %486, %15
-  %488 = and i1 %487, %485
-  br i1 %488, label %489, label %619
-
-; <label>:489                                     ; preds = %479
-  %490 = fptoui float %45 to i32
-  %491 = fptoui float %182 to i32
-  %492 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %493 = extractvalue %dx.types.CBufRet.i32 %492, 0
-  %494 = extractvalue %dx.types.CBufRet.i32 %492, 1
-  %495 = extractvalue %dx.types.CBufRet.i32 %492, 2
-  %496 = extractvalue %dx.types.CBufRet.i32 %492, 3
-  %497 = mul i32 %493, %490
-  %498 = call i32 @dx.op.tertiary.i32(i32 48, i32 %491, i32 %494, i32 %497)  ; IMad(a,b,c)
-  %499 = call i32 @dx.op.tertiary.i32(i32 48, i32 %486, i32 %495, i32 %498)  ; IMad(a,b,c)
-  %500 = call i32 @dx.op.tertiary.i32(i32 48, i32 %481, i32 %496, i32 %499)  ; IMad(a,b,c)
-  %501 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %500, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %502 = extractvalue %dx.types.ResRet.f32 %501, 0
-  br label %619
-
-; <label>:503                                     ; preds = %477
-  %504 = icmp eq i32 %335, 1
-  br i1 %504, label %505, label %533
-
-; <label>:505                                     ; preds = %503
-  %506 = add i32 %13, -1
-  %507 = uitofp i32 %506 to float
-  %508 = call float @dx.op.binary.f32(i32 35, float %333, float 0.000000e+00)  ; FMax(a,b)
-  %509 = call float @dx.op.binary.f32(i32 36, float %508, float %507)  ; FMin(a,b)
-  %510 = fptoui float %509 to i32
-  %511 = add i32 %15, -1
-  %512 = uitofp i32 %511 to float
-  %513 = call float @dx.op.binary.f32(i32 35, float %332, float 0.000000e+00)  ; FMax(a,b)
-  %514 = call float @dx.op.binary.f32(i32 36, float %513, float %512)  ; FMin(a,b)
-  %515 = fptoui float %514 to i32
-  %516 = uitofp i32 %515 to float
-  %517 = uitofp i32 %510 to float
-  %518 = fptoui float %45 to i32
-  %519 = fptoui float %182 to i32
-  %520 = fptoui float %516 to i32
-  %521 = fptoui float %517 to i32
-  %522 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %523 = extractvalue %dx.types.CBufRet.i32 %522, 0
-  %524 = extractvalue %dx.types.CBufRet.i32 %522, 1
-  %525 = extractvalue %dx.types.CBufRet.i32 %522, 2
-  %526 = extractvalue %dx.types.CBufRet.i32 %522, 3
-  %527 = mul i32 %523, %518
-  %528 = call i32 @dx.op.tertiary.i32(i32 48, i32 %519, i32 %524, i32 %527)  ; IMad(a,b,c)
-  %529 = call i32 @dx.op.tertiary.i32(i32 48, i32 %520, i32 %525, i32 %528)  ; IMad(a,b,c)
-  %530 = call i32 @dx.op.tertiary.i32(i32 48, i32 %521, i32 %526, i32 %529)  ; IMad(a,b,c)
-  %531 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %530, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %532 = extractvalue %dx.types.ResRet.f32 %531, 0
-  br label %619
-
-; <label>:533                                     ; preds = %503
-  %534 = icmp eq i32 %335, 2
-  br i1 %534, label %535, label %619
-
-; <label>:535                                     ; preds = %533
-  %536 = fsub fast float %22, %20
-  %537 = fcmp fast olt float %333, %20
-  br i1 %537, label %538, label %551
-
-; <label>:538                                     ; preds = %535
-  %539 = fsub fast float %20, %333
-  %540 = fdiv fast float %539, %536
-  %541 = fptoui float %540 to i32
-  %542 = uitofp i32 %541 to float
-  %543 = fmul fast float %542, %536
-  %544 = fsub fast float %539, %543
-  %545 = and i32 %541, 1
-  %546 = icmp eq i32 %545, 0
-  br i1 %546, label %547, label %549
-
-; <label>:547                                     ; preds = %538
-  %548 = fadd fast float %544, %20
-  br label %566
-
-; <label>:549                                     ; preds = %538
-  %550 = fsub fast float %22, %544
-  br label %566
-
-; <label>:551                                     ; preds = %535
-  %552 = fcmp fast ogt float %333, %22
-  br i1 %552, label %553, label %566
-
-; <label>:553                                     ; preds = %551
-  %554 = fsub fast float %333, %22
-  %555 = fdiv fast float %554, %536
-  %556 = fptoui float %555 to i32
-  %557 = uitofp i32 %556 to float
-  %558 = fmul fast float %557, %536
-  %559 = fsub fast float %554, %558
-  %560 = and i32 %556, 1
-  %561 = icmp eq i32 %560, 0
-  br i1 %561, label %562, label %564
-
-; <label>:562                                     ; preds = %553
-  %563 = fsub fast float %22, %559
-  br label %566
-
-; <label>:564                                     ; preds = %553
-  %565 = fadd fast float %559, %20
-  br label %566
-
-; <label>:566                                     ; preds = %564, %562, %551, %549, %547
-  %567 = phi float [ %548, %547 ], [ %550, %549 ], [ %563, %562 ], [ %565, %564 ], [ %333, %551 ]
-  %568 = fptoui float %567 to i32
-  %569 = fsub fast float %24, %20
-  %570 = fcmp fast olt float %332, %20
-  br i1 %570, label %571, label %584
-
-; <label>:571                                     ; preds = %566
-  %572 = fsub fast float %20, %332
-  %573 = fdiv fast float %572, %569
-  %574 = fptoui float %573 to i32
-  %575 = uitofp i32 %574 to float
-  %576 = fmul fast float %575, %569
-  %577 = fsub fast float %572, %576
-  %578 = and i32 %574, 1
-  %579 = icmp eq i32 %578, 0
-  br i1 %579, label %580, label %582
-
-; <label>:580                                     ; preds = %571
-  %581 = fadd fast float %577, %20
-  br label %599
-
-; <label>:582                                     ; preds = %571
-  %583 = fsub fast float %24, %577
-  br label %599
-
-; <label>:584                                     ; preds = %566
-  %585 = fcmp fast ogt float %332, %24
-  br i1 %585, label %586, label %599
-
-; <label>:586                                     ; preds = %584
-  %587 = fsub fast float %332, %24
-  %588 = fdiv fast float %587, %569
-  %589 = fptoui float %588 to i32
-  %590 = uitofp i32 %589 to float
-  %591 = fmul fast float %590, %569
-  %592 = fsub fast float %587, %591
-  %593 = and i32 %589, 1
-  %594 = icmp eq i32 %593, 0
-  br i1 %594, label %595, label %597
-
-; <label>:595                                     ; preds = %586
-  %596 = fsub fast float %24, %592
-  br label %599
-
-; <label>:597                                     ; preds = %586
-  %598 = fadd fast float %592, %20
-  br label %599
-
-; <label>:599                                     ; preds = %597, %595, %584, %582, %580
-  %600 = phi float [ %581, %580 ], [ %583, %582 ], [ %596, %595 ], [ %598, %597 ], [ %332, %584 ]
-  %601 = fptoui float %600 to i32
-  %602 = uitofp i32 %601 to float
-  %603 = uitofp i32 %568 to float
-  %604 = fptoui float %45 to i32
-  %605 = fptoui float %182 to i32
-  %606 = fptoui float %602 to i32
-  %607 = fptoui float %603 to i32
-  %608 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %609 = extractvalue %dx.types.CBufRet.i32 %608, 0
-  %610 = extractvalue %dx.types.CBufRet.i32 %608, 1
-  %611 = extractvalue %dx.types.CBufRet.i32 %608, 2
-  %612 = extractvalue %dx.types.CBufRet.i32 %608, 3
-  %613 = mul i32 %609, %604
-  %614 = call i32 @dx.op.tertiary.i32(i32 48, i32 %605, i32 %610, i32 %613)  ; IMad(a,b,c)
-  %615 = call i32 @dx.op.tertiary.i32(i32 48, i32 %606, i32 %611, i32 %614)  ; IMad(a,b,c)
-  %616 = call i32 @dx.op.tertiary.i32(i32 48, i32 %607, i32 %612, i32 %615)  ; IMad(a,b,c)
-  %617 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %616, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %618 = extractvalue %dx.types.ResRet.f32 %617, 0
-  br label %619
-
-; <label>:619                                     ; preds = %599, %533, %505, %489, %479
-  %620 = phi float [ %502, %489 ], [ 0.000000e+00, %479 ], [ %532, %505 ], [ %618, %599 ], [ 0.000000e+00, %533 ]
-  br i1 %336, label %621, label %645
-
-; <label>:621                                     ; preds = %619
-  %622 = fcmp fast oge float %331, 0.000000e+00
-  %623 = fptoui float %331 to i32
-  %624 = icmp ult i32 %623, %13
-  %625 = and i1 %622, %624
-  %626 = fcmp fast oge float %334, 0.000000e+00
-  %627 = and i1 %626, %625
-  %628 = fptoui float %334 to i32
-  %629 = icmp ult i32 %628, %15
-  %630 = and i1 %629, %627
-  br i1 %630, label %631, label %761
-
-; <label>:631                                     ; preds = %621
-  %632 = fptoui float %45 to i32
-  %633 = fptoui float %182 to i32
-  %634 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %635 = extractvalue %dx.types.CBufRet.i32 %634, 0
-  %636 = extractvalue %dx.types.CBufRet.i32 %634, 1
-  %637 = extractvalue %dx.types.CBufRet.i32 %634, 2
-  %638 = extractvalue %dx.types.CBufRet.i32 %634, 3
-  %639 = mul i32 %635, %632
-  %640 = call i32 @dx.op.tertiary.i32(i32 48, i32 %633, i32 %636, i32 %639)  ; IMad(a,b,c)
-  %641 = call i32 @dx.op.tertiary.i32(i32 48, i32 %628, i32 %637, i32 %640)  ; IMad(a,b,c)
-  %642 = call i32 @dx.op.tertiary.i32(i32 48, i32 %623, i32 %638, i32 %641)  ; IMad(a,b,c)
-  %643 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %642, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %644 = extractvalue %dx.types.ResRet.f32 %643, 0
-  br label %761
-
-; <label>:645                                     ; preds = %619
-  %646 = icmp eq i32 %335, 1
-  br i1 %646, label %647, label %675
-
-; <label>:647                                     ; preds = %645
-  %648 = add i32 %13, -1
-  %649 = uitofp i32 %648 to float
-  %650 = call float @dx.op.binary.f32(i32 35, float %331, float 0.000000e+00)  ; FMax(a,b)
-  %651 = call float @dx.op.binary.f32(i32 36, float %650, float %649)  ; FMin(a,b)
-  %652 = fptoui float %651 to i32
-  %653 = add i32 %15, -1
-  %654 = uitofp i32 %653 to float
-  %655 = call float @dx.op.binary.f32(i32 35, float %334, float 0.000000e+00)  ; FMax(a,b)
-  %656 = call float @dx.op.binary.f32(i32 36, float %655, float %654)  ; FMin(a,b)
-  %657 = fptoui float %656 to i32
-  %658 = uitofp i32 %657 to float
-  %659 = uitofp i32 %652 to float
-  %660 = fptoui float %45 to i32
-  %661 = fptoui float %182 to i32
-  %662 = fptoui float %658 to i32
-  %663 = fptoui float %659 to i32
-  %664 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %665 = extractvalue %dx.types.CBufRet.i32 %664, 0
-  %666 = extractvalue %dx.types.CBufRet.i32 %664, 1
-  %667 = extractvalue %dx.types.CBufRet.i32 %664, 2
-  %668 = extractvalue %dx.types.CBufRet.i32 %664, 3
-  %669 = mul i32 %665, %660
-  %670 = call i32 @dx.op.tertiary.i32(i32 48, i32 %661, i32 %666, i32 %669)  ; IMad(a,b,c)
-  %671 = call i32 @dx.op.tertiary.i32(i32 48, i32 %662, i32 %667, i32 %670)  ; IMad(a,b,c)
-  %672 = call i32 @dx.op.tertiary.i32(i32 48, i32 %663, i32 %668, i32 %671)  ; IMad(a,b,c)
-  %673 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %672, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %674 = extractvalue %dx.types.ResRet.f32 %673, 0
-  br label %761
-
-; <label>:675                                     ; preds = %645
-  %676 = icmp eq i32 %335, 2
-  br i1 %676, label %677, label %761
-
-; <label>:677                                     ; preds = %675
-  %678 = fsub fast float %22, %20
-  %679 = fcmp fast olt float %331, %20
-  br i1 %679, label %680, label %693
-
-; <label>:680                                     ; preds = %677
-  %681 = fsub fast float %20, %331
-  %682 = fdiv fast float %681, %678
-  %683 = fptoui float %682 to i32
-  %684 = uitofp i32 %683 to float
-  %685 = fmul fast float %684, %678
-  %686 = fsub fast float %681, %685
-  %687 = and i32 %683, 1
-  %688 = icmp eq i32 %687, 0
-  br i1 %688, label %689, label %691
-
-; <label>:689                                     ; preds = %680
-  %690 = fadd fast float %686, %20
-  br label %708
-
-; <label>:691                                     ; preds = %680
-  %692 = fsub fast float %22, %686
-  br label %708
-
-; <label>:693                                     ; preds = %677
-  %694 = fcmp fast ogt float %331, %22
-  br i1 %694, label %695, label %708
-
-; <label>:695                                     ; preds = %693
-  %696 = fsub fast float %331, %22
-  %697 = fdiv fast float %696, %678
-  %698 = fptoui float %697 to i32
-  %699 = uitofp i32 %698 to float
-  %700 = fmul fast float %699, %678
-  %701 = fsub fast float %696, %700
-  %702 = and i32 %698, 1
-  %703 = icmp eq i32 %702, 0
-  br i1 %703, label %704, label %706
-
-; <label>:704                                     ; preds = %695
-  %705 = fsub fast float %22, %701
-  br label %708
-
-; <label>:706                                     ; preds = %695
-  %707 = fadd fast float %701, %20
-  br label %708
-
-; <label>:708                                     ; preds = %706, %704, %693, %691, %689
-  %709 = phi float [ %690, %689 ], [ %692, %691 ], [ %705, %704 ], [ %707, %706 ], [ %331, %693 ]
-  %710 = fptoui float %709 to i32
-  %711 = fsub fast float %24, %20
-  %712 = fcmp fast olt float %334, %20
-  br i1 %712, label %713, label %726
-
-; <label>:713                                     ; preds = %708
-  %714 = fsub fast float %20, %334
-  %715 = fdiv fast float %714, %711
-  %716 = fptoui float %715 to i32
-  %717 = uitofp i32 %716 to float
-  %718 = fmul fast float %717, %711
-  %719 = fsub fast float %714, %718
-  %720 = and i32 %716, 1
-  %721 = icmp eq i32 %720, 0
-  br i1 %721, label %722, label %724
-
-; <label>:722                                     ; preds = %713
-  %723 = fadd fast float %719, %20
-  br label %741
-
-; <label>:724                                     ; preds = %713
-  %725 = fsub fast float %24, %719
-  br label %741
-
-; <label>:726                                     ; preds = %708
-  %727 = fcmp fast ogt float %334, %24
-  br i1 %727, label %728, label %741
-
-; <label>:728                                     ; preds = %726
-  %729 = fsub fast float %334, %24
-  %730 = fdiv fast float %729, %711
-  %731 = fptoui float %730 to i32
-  %732 = uitofp i32 %731 to float
-  %733 = fmul fast float %732, %711
-  %734 = fsub fast float %729, %733
-  %735 = and i32 %731, 1
-  %736 = icmp eq i32 %735, 0
-  br i1 %736, label %737, label %739
-
-; <label>:737                                     ; preds = %728
-  %738 = fsub fast float %24, %734
-  br label %741
-
-; <label>:739                                     ; preds = %728
-  %740 = fadd fast float %734, %20
-  br label %741
-
-; <label>:741                                     ; preds = %739, %737, %726, %724, %722
-  %742 = phi float [ %723, %722 ], [ %725, %724 ], [ %738, %737 ], [ %740, %739 ], [ %334, %726 ]
-  %743 = fptoui float %742 to i32
-  %744 = uitofp i32 %743 to float
-  %745 = uitofp i32 %710 to float
-  %746 = fptoui float %45 to i32
-  %747 = fptoui float %182 to i32
-  %748 = fptoui float %744 to i32
-  %749 = fptoui float %745 to i32
-  %750 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %751 = extractvalue %dx.types.CBufRet.i32 %750, 0
-  %752 = extractvalue %dx.types.CBufRet.i32 %750, 1
-  %753 = extractvalue %dx.types.CBufRet.i32 %750, 2
-  %754 = extractvalue %dx.types.CBufRet.i32 %750, 3
-  %755 = mul i32 %751, %746
-  %756 = call i32 @dx.op.tertiary.i32(i32 48, i32 %747, i32 %752, i32 %755)  ; IMad(a,b,c)
-  %757 = call i32 @dx.op.tertiary.i32(i32 48, i32 %748, i32 %753, i32 %756)  ; IMad(a,b,c)
-  %758 = call i32 @dx.op.tertiary.i32(i32 48, i32 %749, i32 %754, i32 %757)  ; IMad(a,b,c)
-  %759 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %758, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %760 = extractvalue %dx.types.ResRet.f32 %759, 0
-  br label %761
-
-; <label>:761                                     ; preds = %741, %675, %647, %631, %621
-  %762 = phi float [ %644, %631 ], [ 0.000000e+00, %621 ], [ %674, %647 ], [ %760, %741 ], [ 0.000000e+00, %675 ]
-  br i1 %336, label %763, label %787
-
-; <label>:763                                     ; preds = %761
-  %764 = fcmp fast oge float %333, 0.000000e+00
-  %765 = fptoui float %333 to i32
-  %766 = icmp ult i32 %765, %13
-  %767 = and i1 %764, %766
-  %768 = fcmp fast oge float %334, 0.000000e+00
-  %769 = and i1 %768, %767
-  %770 = fptoui float %334 to i32
-  %771 = icmp ult i32 %770, %15
-  %772 = and i1 %771, %769
-  br i1 %772, label %773, label %903
-
-; <label>:773                                     ; preds = %763
-  %774 = fptoui float %45 to i32
-  %775 = fptoui float %182 to i32
-  %776 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %777 = extractvalue %dx.types.CBufRet.i32 %776, 0
-  %778 = extractvalue %dx.types.CBufRet.i32 %776, 1
-  %779 = extractvalue %dx.types.CBufRet.i32 %776, 2
-  %780 = extractvalue %dx.types.CBufRet.i32 %776, 3
-  %781 = mul i32 %777, %774
-  %782 = call i32 @dx.op.tertiary.i32(i32 48, i32 %775, i32 %778, i32 %781)  ; IMad(a,b,c)
-  %783 = call i32 @dx.op.tertiary.i32(i32 48, i32 %770, i32 %779, i32 %782)  ; IMad(a,b,c)
-  %784 = call i32 @dx.op.tertiary.i32(i32 48, i32 %765, i32 %780, i32 %783)  ; IMad(a,b,c)
-  %785 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %784, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %786 = extractvalue %dx.types.ResRet.f32 %785, 0
-  br label %903
-
-; <label>:787                                     ; preds = %761
-  %788 = icmp eq i32 %335, 1
-  br i1 %788, label %789, label %817
-
-; <label>:789                                     ; preds = %787
-  %790 = add i32 %13, -1
-  %791 = uitofp i32 %790 to float
-  %792 = call float @dx.op.binary.f32(i32 35, float %333, float 0.000000e+00)  ; FMax(a,b)
-  %793 = call float @dx.op.binary.f32(i32 36, float %792, float %791)  ; FMin(a,b)
-  %794 = fptoui float %793 to i32
-  %795 = add i32 %15, -1
-  %796 = uitofp i32 %795 to float
-  %797 = call float @dx.op.binary.f32(i32 35, float %334, float 0.000000e+00)  ; FMax(a,b)
-  %798 = call float @dx.op.binary.f32(i32 36, float %797, float %796)  ; FMin(a,b)
-  %799 = fptoui float %798 to i32
-  %800 = uitofp i32 %799 to float
-  %801 = uitofp i32 %794 to float
-  %802 = fptoui float %45 to i32
-  %803 = fptoui float %182 to i32
-  %804 = fptoui float %800 to i32
-  %805 = fptoui float %801 to i32
-  %806 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %807 = extractvalue %dx.types.CBufRet.i32 %806, 0
-  %808 = extractvalue %dx.types.CBufRet.i32 %806, 1
-  %809 = extractvalue %dx.types.CBufRet.i32 %806, 2
-  %810 = extractvalue %dx.types.CBufRet.i32 %806, 3
-  %811 = mul i32 %807, %802
-  %812 = call i32 @dx.op.tertiary.i32(i32 48, i32 %803, i32 %808, i32 %811)  ; IMad(a,b,c)
-  %813 = call i32 @dx.op.tertiary.i32(i32 48, i32 %804, i32 %809, i32 %812)  ; IMad(a,b,c)
-  %814 = call i32 @dx.op.tertiary.i32(i32 48, i32 %805, i32 %810, i32 %813)  ; IMad(a,b,c)
-  %815 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %814, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %816 = extractvalue %dx.types.ResRet.f32 %815, 0
-  br label %903
-
-; <label>:817                                     ; preds = %787
-  %818 = icmp eq i32 %335, 2
-  br i1 %818, label %819, label %903
-
-; <label>:819                                     ; preds = %817
-  %820 = fsub fast float %22, %20
-  %821 = fcmp fast olt float %333, %20
-  br i1 %821, label %822, label %835
-
-; <label>:822                                     ; preds = %819
-  %823 = fsub fast float %20, %333
-  %824 = fdiv fast float %823, %820
-  %825 = fptoui float %824 to i32
-  %826 = uitofp i32 %825 to float
-  %827 = fmul fast float %826, %820
-  %828 = fsub fast float %823, %827
-  %829 = and i32 %825, 1
-  %830 = icmp eq i32 %829, 0
-  br i1 %830, label %831, label %833
-
-; <label>:831                                     ; preds = %822
-  %832 = fadd fast float %828, %20
-  br label %850
-
-; <label>:833                                     ; preds = %822
-  %834 = fsub fast float %22, %828
-  br label %850
-
-; <label>:835                                     ; preds = %819
-  %836 = fcmp fast ogt float %333, %22
-  br i1 %836, label %837, label %850
-
-; <label>:837                                     ; preds = %835
-  %838 = fsub fast float %333, %22
-  %839 = fdiv fast float %838, %820
-  %840 = fptoui float %839 to i32
-  %841 = uitofp i32 %840 to float
-  %842 = fmul fast float %841, %820
-  %843 = fsub fast float %838, %842
-  %844 = and i32 %840, 1
-  %845 = icmp eq i32 %844, 0
-  br i1 %845, label %846, label %848
-
-; <label>:846                                     ; preds = %837
-  %847 = fsub fast float %22, %843
-  br label %850
-
-; <label>:848                                     ; preds = %837
-  %849 = fadd fast float %843, %20
-  br label %850
-
-; <label>:850                                     ; preds = %848, %846, %835, %833, %831
-  %851 = phi float [ %832, %831 ], [ %834, %833 ], [ %847, %846 ], [ %849, %848 ], [ %333, %835 ]
-  %852 = fptoui float %851 to i32
-  %853 = fsub fast float %24, %20
-  %854 = fcmp fast olt float %334, %20
-  br i1 %854, label %855, label %868
-
-; <label>:855                                     ; preds = %850
-  %856 = fsub fast float %20, %334
-  %857 = fdiv fast float %856, %853
-  %858 = fptoui float %857 to i32
-  %859 = uitofp i32 %858 to float
-  %860 = fmul fast float %859, %853
-  %861 = fsub fast float %856, %860
-  %862 = and i32 %858, 1
-  %863 = icmp eq i32 %862, 0
-  br i1 %863, label %864, label %866
-
-; <label>:864                                     ; preds = %855
-  %865 = fadd fast float %861, %20
-  br label %883
-
-; <label>:866                                     ; preds = %855
-  %867 = fsub fast float %24, %861
-  br label %883
-
-; <label>:868                                     ; preds = %850
-  %869 = fcmp fast ogt float %334, %24
-  br i1 %869, label %870, label %883
-
-; <label>:870                                     ; preds = %868
-  %871 = fsub fast float %334, %24
-  %872 = fdiv fast float %871, %853
-  %873 = fptoui float %872 to i32
-  %874 = uitofp i32 %873 to float
-  %875 = fmul fast float %874, %853
-  %876 = fsub fast float %871, %875
-  %877 = and i32 %873, 1
-  %878 = icmp eq i32 %877, 0
-  br i1 %878, label %879, label %881
-
-; <label>:879                                     ; preds = %870
-  %880 = fsub fast float %24, %876
-  br label %883
-
-; <label>:881                                     ; preds = %870
-  %882 = fadd fast float %876, %20
-  br label %883
-
-; <label>:883                                     ; preds = %881, %879, %868, %866, %864
-  %884 = phi float [ %865, %864 ], [ %867, %866 ], [ %880, %879 ], [ %882, %881 ], [ %334, %868 ]
-  %885 = fptoui float %884 to i32
-  %886 = uitofp i32 %885 to float
-  %887 = uitofp i32 %852 to float
-  %888 = fptoui float %45 to i32
-  %889 = fptoui float %182 to i32
-  %890 = fptoui float %886 to i32
-  %891 = fptoui float %887 to i32
-  %892 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %893 = extractvalue %dx.types.CBufRet.i32 %892, 0
-  %894 = extractvalue %dx.types.CBufRet.i32 %892, 1
-  %895 = extractvalue %dx.types.CBufRet.i32 %892, 2
-  %896 = extractvalue %dx.types.CBufRet.i32 %892, 3
-  %897 = mul i32 %893, %888
-  %898 = call i32 @dx.op.tertiary.i32(i32 48, i32 %889, i32 %894, i32 %897)  ; IMad(a,b,c)
-  %899 = call i32 @dx.op.tertiary.i32(i32 48, i32 %890, i32 %895, i32 %898)  ; IMad(a,b,c)
-  %900 = call i32 @dx.op.tertiary.i32(i32 48, i32 %891, i32 %896, i32 %899)  ; IMad(a,b,c)
-  %901 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %900, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %902 = extractvalue %dx.types.ResRet.f32 %901, 0
-  br label %903
-
-; <label>:903                                     ; preds = %883, %817, %789, %773, %763
-  %904 = phi float [ %786, %773 ], [ 0.000000e+00, %763 ], [ %816, %789 ], [ %902, %883 ], [ 0.000000e+00, %817 ]
-  %905 = call float @dx.op.unary.f32(i32 22, float %180)  ; Frc(value)
-  %906 = fsub fast float %620, %478
-  %907 = fmul fast float %905, %906
-  %908 = fadd fast float %907, %478
-  %909 = fsub fast float %904, %762
-  %910 = fmul fast float %905, %909
-  %911 = fadd fast float %910, %762
-  %912 = call float @dx.op.unary.f32(i32 22, float %181)  ; Frc(value)
-  %913 = fsub fast float %911, %908
-  %914 = fmul fast float %913, %912
-  %915 = fadd fast float %914, %908
-  call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %1, i32 %8, i32 0, float %915, float undef, float undef, float undef, i8 1, i32 4)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3257
-
-; <label>:916                                     ; preds = %328
-  %917 = icmp eq i32 %91, 2
-  br i1 %917, label %918, label %3257
-
-; <label>:918                                     ; preds = %916
-  %919 = call float @dx.op.unary.f32(i32 27, float %180)  ; Round_ni(value)
-  %920 = fadd fast float %919, -1.000000e+00
-  %921 = call float @dx.op.unary.f32(i32 27, float %181)  ; Round_ni(value)
-  %922 = fadd fast float %921, -1.000000e+00
-  %923 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %924 = icmp eq i32 %923, 0
-  br i1 %924, label %925, label %949
-
-; <label>:925                                     ; preds = %918
-  %926 = fcmp fast oge float %920, 0.000000e+00
-  %927 = fptoui float %920 to i32
-  %928 = icmp ult i32 %927, %13
-  %929 = and i1 %926, %928
-  %930 = fcmp fast oge float %922, 0.000000e+00
-  %931 = and i1 %930, %929
-  %932 = fptoui float %922 to i32
-  %933 = icmp ult i32 %932, %15
-  %934 = and i1 %933, %931
-  br i1 %934, label %935, label %1065
-
-; <label>:935                                     ; preds = %925
-  %936 = fptoui float %45 to i32
-  %937 = fptoui float %182 to i32
-  %938 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %939 = extractvalue %dx.types.CBufRet.i32 %938, 0
-  %940 = extractvalue %dx.types.CBufRet.i32 %938, 1
-  %941 = extractvalue %dx.types.CBufRet.i32 %938, 2
-  %942 = extractvalue %dx.types.CBufRet.i32 %938, 3
-  %943 = mul i32 %939, %936
-  %944 = call i32 @dx.op.tertiary.i32(i32 48, i32 %937, i32 %940, i32 %943)  ; IMad(a,b,c)
-  %945 = call i32 @dx.op.tertiary.i32(i32 48, i32 %932, i32 %941, i32 %944)  ; IMad(a,b,c)
-  %946 = call i32 @dx.op.tertiary.i32(i32 48, i32 %927, i32 %942, i32 %945)  ; IMad(a,b,c)
-  %947 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %946, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %948 = extractvalue %dx.types.ResRet.f32 %947, 0
-  br label %1065
-
-; <label>:949                                     ; preds = %918
-  %950 = icmp eq i32 %923, 1
-  br i1 %950, label %951, label %979
-
-; <label>:951                                     ; preds = %949
-  %952 = add i32 %13, -1
-  %953 = uitofp i32 %952 to float
-  %954 = call float @dx.op.binary.f32(i32 35, float %920, float 0.000000e+00)  ; FMax(a,b)
-  %955 = call float @dx.op.binary.f32(i32 36, float %954, float %953)  ; FMin(a,b)
-  %956 = fptoui float %955 to i32
-  %957 = add i32 %15, -1
-  %958 = uitofp i32 %957 to float
-  %959 = call float @dx.op.binary.f32(i32 35, float %922, float 0.000000e+00)  ; FMax(a,b)
-  %960 = call float @dx.op.binary.f32(i32 36, float %959, float %958)  ; FMin(a,b)
-  %961 = fptoui float %960 to i32
-  %962 = uitofp i32 %961 to float
-  %963 = uitofp i32 %956 to float
-  %964 = fptoui float %45 to i32
-  %965 = fptoui float %182 to i32
-  %966 = fptoui float %962 to i32
-  %967 = fptoui float %963 to i32
-  %968 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %969 = extractvalue %dx.types.CBufRet.i32 %968, 0
-  %970 = extractvalue %dx.types.CBufRet.i32 %968, 1
-  %971 = extractvalue %dx.types.CBufRet.i32 %968, 2
-  %972 = extractvalue %dx.types.CBufRet.i32 %968, 3
-  %973 = mul i32 %969, %964
-  %974 = call i32 @dx.op.tertiary.i32(i32 48, i32 %965, i32 %970, i32 %973)  ; IMad(a,b,c)
-  %975 = call i32 @dx.op.tertiary.i32(i32 48, i32 %966, i32 %971, i32 %974)  ; IMad(a,b,c)
-  %976 = call i32 @dx.op.tertiary.i32(i32 48, i32 %967, i32 %972, i32 %975)  ; IMad(a,b,c)
-  %977 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %976, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %978 = extractvalue %dx.types.ResRet.f32 %977, 0
-  br label %1065
-
-; <label>:979                                     ; preds = %949
-  %980 = icmp eq i32 %923, 2
-  br i1 %980, label %981, label %1065
-
-; <label>:981                                     ; preds = %979
-  %982 = fsub fast float %22, %20
-  %983 = fcmp fast olt float %920, %20
-  br i1 %983, label %984, label %997
-
-; <label>:984                                     ; preds = %981
-  %985 = fsub fast float %20, %920
-  %986 = fdiv fast float %985, %982
-  %987 = fptoui float %986 to i32
-  %988 = uitofp i32 %987 to float
-  %989 = fmul fast float %988, %982
-  %990 = fsub fast float %985, %989
-  %991 = and i32 %987, 1
-  %992 = icmp eq i32 %991, 0
-  br i1 %992, label %993, label %995
-
-; <label>:993                                     ; preds = %984
-  %994 = fadd fast float %990, %20
-  br label %1012
-
-; <label>:995                                     ; preds = %984
-  %996 = fsub fast float %22, %990
-  br label %1012
-
-; <label>:997                                     ; preds = %981
-  %998 = fcmp fast ogt float %920, %22
-  br i1 %998, label %999, label %1012
-
-; <label>:999                                     ; preds = %997
-  %1000 = fsub fast float %920, %22
-  %1001 = fdiv fast float %1000, %982
-  %1002 = fptoui float %1001 to i32
-  %1003 = uitofp i32 %1002 to float
-  %1004 = fmul fast float %1003, %982
-  %1005 = fsub fast float %1000, %1004
-  %1006 = and i32 %1002, 1
-  %1007 = icmp eq i32 %1006, 0
-  br i1 %1007, label %1008, label %1010
-
-; <label>:1008                                    ; preds = %999
-  %1009 = fsub fast float %22, %1005
-  br label %1012
-
-; <label>:1010                                    ; preds = %999
-  %1011 = fadd fast float %1005, %20
-  br label %1012
-
-; <label>:1012                                    ; preds = %1010, %1008, %997, %995, %993
-  %1013 = phi float [ %994, %993 ], [ %996, %995 ], [ %1009, %1008 ], [ %1011, %1010 ], [ %920, %997 ]
-  %1014 = fptoui float %1013 to i32
-  %1015 = fsub fast float %24, %20
-  %1016 = fcmp fast olt float %922, %20
-  br i1 %1016, label %1017, label %1030
-
-; <label>:1017                                    ; preds = %1012
-  %1018 = fsub fast float %20, %922
-  %1019 = fdiv fast float %1018, %1015
-  %1020 = fptoui float %1019 to i32
-  %1021 = uitofp i32 %1020 to float
-  %1022 = fmul fast float %1021, %1015
-  %1023 = fsub fast float %1018, %1022
-  %1024 = and i32 %1020, 1
-  %1025 = icmp eq i32 %1024, 0
-  br i1 %1025, label %1026, label %1028
-
-; <label>:1026                                    ; preds = %1017
-  %1027 = fadd fast float %1023, %20
-  br label %1045
-
-; <label>:1028                                    ; preds = %1017
-  %1029 = fsub fast float %24, %1023
-  br label %1045
-
-; <label>:1030                                    ; preds = %1012
-  %1031 = fcmp fast ogt float %922, %24
-  br i1 %1031, label %1032, label %1045
-
-; <label>:1032                                    ; preds = %1030
-  %1033 = fsub fast float %922, %24
-  %1034 = fdiv fast float %1033, %1015
-  %1035 = fptoui float %1034 to i32
-  %1036 = uitofp i32 %1035 to float
-  %1037 = fmul fast float %1036, %1015
-  %1038 = fsub fast float %1033, %1037
-  %1039 = and i32 %1035, 1
-  %1040 = icmp eq i32 %1039, 0
-  br i1 %1040, label %1041, label %1043
-
-; <label>:1041                                    ; preds = %1032
-  %1042 = fsub fast float %24, %1038
-  br label %1045
-
-; <label>:1043                                    ; preds = %1032
-  %1044 = fadd fast float %1038, %20
-  br label %1045
-
-; <label>:1045                                    ; preds = %1043, %1041, %1030, %1028, %1026
-  %1046 = phi float [ %1027, %1026 ], [ %1029, %1028 ], [ %1042, %1041 ], [ %1044, %1043 ], [ %922, %1030 ]
-  %1047 = fptoui float %1046 to i32
-  %1048 = uitofp i32 %1047 to float
-  %1049 = uitofp i32 %1014 to float
-  %1050 = fptoui float %45 to i32
-  %1051 = fptoui float %182 to i32
-  %1052 = fptoui float %1048 to i32
-  %1053 = fptoui float %1049 to i32
-  %1054 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1055 = extractvalue %dx.types.CBufRet.i32 %1054, 0
-  %1056 = extractvalue %dx.types.CBufRet.i32 %1054, 1
-  %1057 = extractvalue %dx.types.CBufRet.i32 %1054, 2
-  %1058 = extractvalue %dx.types.CBufRet.i32 %1054, 3
-  %1059 = mul i32 %1055, %1050
-  %1060 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1051, i32 %1056, i32 %1059)  ; IMad(a,b,c)
-  %1061 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1052, i32 %1057, i32 %1060)  ; IMad(a,b,c)
-  %1062 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1053, i32 %1058, i32 %1061)  ; IMad(a,b,c)
-  %1063 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %1062, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1064 = extractvalue %dx.types.ResRet.f32 %1063, 0
-  br label %1065
-
-; <label>:1065                                    ; preds = %1045, %979, %951, %935, %925
-  %1066 = phi float [ %948, %935 ], [ 0.000000e+00, %925 ], [ %978, %951 ], [ %1064, %1045 ], [ 0.000000e+00, %979 ]
-  br i1 %924, label %1067, label %1091
-
-; <label>:1067                                    ; preds = %1065
-  %1068 = fcmp fast oge float %919, 0.000000e+00
-  %1069 = fptoui float %919 to i32
-  %1070 = icmp ult i32 %1069, %13
-  %1071 = and i1 %1068, %1070
-  %1072 = fcmp fast oge float %922, 0.000000e+00
-  %1073 = and i1 %1072, %1071
-  %1074 = fptoui float %922 to i32
-  %1075 = icmp ult i32 %1074, %15
-  %1076 = and i1 %1075, %1073
-  br i1 %1076, label %1077, label %1207
-
-; <label>:1077                                    ; preds = %1067
-  %1078 = fptoui float %45 to i32
-  %1079 = fptoui float %182 to i32
-  %1080 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1081 = extractvalue %dx.types.CBufRet.i32 %1080, 0
-  %1082 = extractvalue %dx.types.CBufRet.i32 %1080, 1
-  %1083 = extractvalue %dx.types.CBufRet.i32 %1080, 2
-  %1084 = extractvalue %dx.types.CBufRet.i32 %1080, 3
-  %1085 = mul i32 %1081, %1078
-  %1086 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1079, i32 %1082, i32 %1085)  ; IMad(a,b,c)
-  %1087 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1074, i32 %1083, i32 %1086)  ; IMad(a,b,c)
-  %1088 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1069, i32 %1084, i32 %1087)  ; IMad(a,b,c)
-  %1089 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %1088, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1090 = extractvalue %dx.types.ResRet.f32 %1089, 0
-  br label %1207
-
-; <label>:1091                                    ; preds = %1065
-  %1092 = icmp eq i32 %923, 1
-  br i1 %1092, label %1093, label %1121
-
-; <label>:1093                                    ; preds = %1091
-  %1094 = add i32 %13, -1
-  %1095 = uitofp i32 %1094 to float
-  %1096 = call float @dx.op.binary.f32(i32 35, float %919, float 0.000000e+00)  ; FMax(a,b)
-  %1097 = call float @dx.op.binary.f32(i32 36, float %1096, float %1095)  ; FMin(a,b)
-  %1098 = fptoui float %1097 to i32
-  %1099 = add i32 %15, -1
-  %1100 = uitofp i32 %1099 to float
-  %1101 = call float @dx.op.binary.f32(i32 35, float %922, float 0.000000e+00)  ; FMax(a,b)
-  %1102 = call float @dx.op.binary.f32(i32 36, float %1101, float %1100)  ; FMin(a,b)
-  %1103 = fptoui float %1102 to i32
-  %1104 = uitofp i32 %1103 to float
-  %1105 = uitofp i32 %1098 to float
-  %1106 = fptoui float %45 to i32
-  %1107 = fptoui float %182 to i32
-  %1108 = fptoui float %1104 to i32
-  %1109 = fptoui float %1105 to i32
-  %1110 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1111 = extractvalue %dx.types.CBufRet.i32 %1110, 0
-  %1112 = extractvalue %dx.types.CBufRet.i32 %1110, 1
-  %1113 = extractvalue %dx.types.CBufRet.i32 %1110, 2
-  %1114 = extractvalue %dx.types.CBufRet.i32 %1110, 3
-  %1115 = mul i32 %1111, %1106
-  %1116 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1107, i32 %1112, i32 %1115)  ; IMad(a,b,c)
-  %1117 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1108, i32 %1113, i32 %1116)  ; IMad(a,b,c)
-  %1118 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1109, i32 %1114, i32 %1117)  ; IMad(a,b,c)
-  %1119 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %1118, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1120 = extractvalue %dx.types.ResRet.f32 %1119, 0
-  br label %1207
-
-; <label>:1121                                    ; preds = %1091
-  %1122 = icmp eq i32 %923, 2
-  br i1 %1122, label %1123, label %1207
-
-; <label>:1123                                    ; preds = %1121
-  %1124 = fsub fast float %22, %20
-  %1125 = fcmp fast olt float %919, %20
-  br i1 %1125, label %1126, label %1139
-
-; <label>:1126                                    ; preds = %1123
-  %1127 = fsub fast float %20, %919
-  %1128 = fdiv fast float %1127, %1124
-  %1129 = fptoui float %1128 to i32
-  %1130 = uitofp i32 %1129 to float
-  %1131 = fmul fast float %1130, %1124
-  %1132 = fsub fast float %1127, %1131
-  %1133 = and i32 %1129, 1
-  %1134 = icmp eq i32 %1133, 0
-  br i1 %1134, label %1135, label %1137
-
-; <label>:1135                                    ; preds = %1126
-  %1136 = fadd fast float %1132, %20
-  br label %1154
-
-; <label>:1137                                    ; preds = %1126
-  %1138 = fsub fast float %22, %1132
-  br label %1154
-
-; <label>:1139                                    ; preds = %1123
-  %1140 = fcmp fast ogt float %919, %22
-  br i1 %1140, label %1141, label %1154
-
-; <label>:1141                                    ; preds = %1139
-  %1142 = fsub fast float %919, %22
-  %1143 = fdiv fast float %1142, %1124
-  %1144 = fptoui float %1143 to i32
-  %1145 = uitofp i32 %1144 to float
-  %1146 = fmul fast float %1145, %1124
-  %1147 = fsub fast float %1142, %1146
-  %1148 = and i32 %1144, 1
-  %1149 = icmp eq i32 %1148, 0
-  br i1 %1149, label %1150, label %1152
-
-; <label>:1150                                    ; preds = %1141
-  %1151 = fsub fast float %22, %1147
-  br label %1154
-
-; <label>:1152                                    ; preds = %1141
-  %1153 = fadd fast float %1147, %20
-  br label %1154
-
-; <label>:1154                                    ; preds = %1152, %1150, %1139, %1137, %1135
-  %1155 = phi float [ %1136, %1135 ], [ %1138, %1137 ], [ %1151, %1150 ], [ %1153, %1152 ], [ %919, %1139 ]
-  %1156 = fptoui float %1155 to i32
-  %1157 = fsub fast float %24, %20
-  %1158 = fcmp fast olt float %922, %20
-  br i1 %1158, label %1159, label %1172
-
-; <label>:1159                                    ; preds = %1154
-  %1160 = fsub fast float %20, %922
-  %1161 = fdiv fast float %1160, %1157
-  %1162 = fptoui float %1161 to i32
-  %1163 = uitofp i32 %1162 to float
-  %1164 = fmul fast float %1163, %1157
-  %1165 = fsub fast float %1160, %1164
-  %1166 = and i32 %1162, 1
-  %1167 = icmp eq i32 %1166, 0
-  br i1 %1167, label %1168, label %1170
-
-; <label>:1168                                    ; preds = %1159
-  %1169 = fadd fast float %1165, %20
-  br label %1187
-
-; <label>:1170                                    ; preds = %1159
-  %1171 = fsub fast float %24, %1165
-  br label %1187
-
-; <label>:1172                                    ; preds = %1154
-  %1173 = fcmp fast ogt float %922, %24
-  br i1 %1173, label %1174, label %1187
-
-; <label>:1174                                    ; preds = %1172
-  %1175 = fsub fast float %922, %24
-  %1176 = fdiv fast float %1175, %1157
-  %1177 = fptoui float %1176 to i32
-  %1178 = uitofp i32 %1177 to float
-  %1179 = fmul fast float %1178, %1157
-  %1180 = fsub fast float %1175, %1179
-  %1181 = and i32 %1177, 1
-  %1182 = icmp eq i32 %1181, 0
-  br i1 %1182, label %1183, label %1185
-
-; <label>:1183                                    ; preds = %1174
-  %1184 = fsub fast float %24, %1180
-  br label %1187
-
-; <label>:1185                                    ; preds = %1174
-  %1186 = fadd fast float %1180, %20
-  br label %1187
-
-; <label>:1187                                    ; preds = %1185, %1183, %1172, %1170, %1168
-  %1188 = phi float [ %1169, %1168 ], [ %1171, %1170 ], [ %1184, %1183 ], [ %1186, %1185 ], [ %922, %1172 ]
-  %1189 = fptoui float %1188 to i32
-  %1190 = uitofp i32 %1189 to float
-  %1191 = uitofp i32 %1156 to float
-  %1192 = fptoui float %45 to i32
-  %1193 = fptoui float %182 to i32
-  %1194 = fptoui float %1190 to i32
-  %1195 = fptoui float %1191 to i32
-  %1196 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1197 = extractvalue %dx.types.CBufRet.i32 %1196, 0
-  %1198 = extractvalue %dx.types.CBufRet.i32 %1196, 1
-  %1199 = extractvalue %dx.types.CBufRet.i32 %1196, 2
-  %1200 = extractvalue %dx.types.CBufRet.i32 %1196, 3
-  %1201 = mul i32 %1197, %1192
-  %1202 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1193, i32 %1198, i32 %1201)  ; IMad(a,b,c)
-  %1203 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1194, i32 %1199, i32 %1202)  ; IMad(a,b,c)
-  %1204 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1195, i32 %1200, i32 %1203)  ; IMad(a,b,c)
-  %1205 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %1204, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1206 = extractvalue %dx.types.ResRet.f32 %1205, 0
-  br label %1207
-
-; <label>:1207                                    ; preds = %1187, %1121, %1093, %1077, %1067
-  %1208 = phi float [ %1090, %1077 ], [ 0.000000e+00, %1067 ], [ %1120, %1093 ], [ %1206, %1187 ], [ 0.000000e+00, %1121 ]
-  %1209 = fadd fast float %919, 1.000000e+00
-  br i1 %924, label %1210, label %1234
-
-; <label>:1210                                    ; preds = %1207
-  %1211 = fcmp fast oge float %1209, 0.000000e+00
-  %1212 = fptoui float %1209 to i32
-  %1213 = icmp ult i32 %1212, %13
-  %1214 = and i1 %1211, %1213
-  %1215 = fcmp fast oge float %922, 0.000000e+00
-  %1216 = and i1 %1215, %1214
-  %1217 = fptoui float %922 to i32
-  %1218 = icmp ult i32 %1217, %15
-  %1219 = and i1 %1218, %1216
-  br i1 %1219, label %1220, label %1350
-
-; <label>:1220                                    ; preds = %1210
-  %1221 = fptoui float %45 to i32
-  %1222 = fptoui float %182 to i32
-  %1223 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1224 = extractvalue %dx.types.CBufRet.i32 %1223, 0
-  %1225 = extractvalue %dx.types.CBufRet.i32 %1223, 1
-  %1226 = extractvalue %dx.types.CBufRet.i32 %1223, 2
-  %1227 = extractvalue %dx.types.CBufRet.i32 %1223, 3
-  %1228 = mul i32 %1224, %1221
-  %1229 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1222, i32 %1225, i32 %1228)  ; IMad(a,b,c)
-  %1230 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1217, i32 %1226, i32 %1229)  ; IMad(a,b,c)
-  %1231 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1212, i32 %1227, i32 %1230)  ; IMad(a,b,c)
-  %1232 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %1231, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1233 = extractvalue %dx.types.ResRet.f32 %1232, 0
-  br label %1350
-
-; <label>:1234                                    ; preds = %1207
-  %1235 = icmp eq i32 %923, 1
-  br i1 %1235, label %1236, label %1264
-
-; <label>:1236                                    ; preds = %1234
-  %1237 = add i32 %13, -1
-  %1238 = uitofp i32 %1237 to float
-  %1239 = call float @dx.op.binary.f32(i32 35, float %1209, float 0.000000e+00)  ; FMax(a,b)
-  %1240 = call float @dx.op.binary.f32(i32 36, float %1239, float %1238)  ; FMin(a,b)
-  %1241 = fptoui float %1240 to i32
-  %1242 = add i32 %15, -1
-  %1243 = uitofp i32 %1242 to float
-  %1244 = call float @dx.op.binary.f32(i32 35, float %922, float 0.000000e+00)  ; FMax(a,b)
-  %1245 = call float @dx.op.binary.f32(i32 36, float %1244, float %1243)  ; FMin(a,b)
-  %1246 = fptoui float %1245 to i32
-  %1247 = uitofp i32 %1246 to float
-  %1248 = uitofp i32 %1241 to float
-  %1249 = fptoui float %45 to i32
-  %1250 = fptoui float %182 to i32
-  %1251 = fptoui float %1247 to i32
-  %1252 = fptoui float %1248 to i32
-  %1253 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1254 = extractvalue %dx.types.CBufRet.i32 %1253, 0
-  %1255 = extractvalue %dx.types.CBufRet.i32 %1253, 1
-  %1256 = extractvalue %dx.types.CBufRet.i32 %1253, 2
-  %1257 = extractvalue %dx.types.CBufRet.i32 %1253, 3
-  %1258 = mul i32 %1254, %1249
-  %1259 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1250, i32 %1255, i32 %1258)  ; IMad(a,b,c)
-  %1260 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1251, i32 %1256, i32 %1259)  ; IMad(a,b,c)
-  %1261 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1252, i32 %1257, i32 %1260)  ; IMad(a,b,c)
-  %1262 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %1261, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1263 = extractvalue %dx.types.ResRet.f32 %1262, 0
-  br label %1350
-
-; <label>:1264                                    ; preds = %1234
-  %1265 = icmp eq i32 %923, 2
-  br i1 %1265, label %1266, label %1350
-
-; <label>:1266                                    ; preds = %1264
-  %1267 = fsub fast float %22, %20
-  %1268 = fcmp fast olt float %1209, %20
-  br i1 %1268, label %1269, label %1282
-
-; <label>:1269                                    ; preds = %1266
-  %1270 = fsub fast float %20, %1209
-  %1271 = fdiv fast float %1270, %1267
-  %1272 = fptoui float %1271 to i32
-  %1273 = uitofp i32 %1272 to float
-  %1274 = fmul fast float %1273, %1267
-  %1275 = fsub fast float %1270, %1274
-  %1276 = and i32 %1272, 1
-  %1277 = icmp eq i32 %1276, 0
-  br i1 %1277, label %1278, label %1280
-
-; <label>:1278                                    ; preds = %1269
-  %1279 = fadd fast float %1275, %20
-  br label %1297
-
-; <label>:1280                                    ; preds = %1269
-  %1281 = fsub fast float %22, %1275
-  br label %1297
-
-; <label>:1282                                    ; preds = %1266
-  %1283 = fcmp fast ogt float %1209, %22
-  br i1 %1283, label %1284, label %1297
-
-; <label>:1284                                    ; preds = %1282
-  %1285 = fsub fast float %1209, %22
-  %1286 = fdiv fast float %1285, %1267
-  %1287 = fptoui float %1286 to i32
-  %1288 = uitofp i32 %1287 to float
-  %1289 = fmul fast float %1288, %1267
-  %1290 = fsub fast float %1285, %1289
-  %1291 = and i32 %1287, 1
-  %1292 = icmp eq i32 %1291, 0
-  br i1 %1292, label %1293, label %1295
-
-; <label>:1293                                    ; preds = %1284
-  %1294 = fsub fast float %22, %1290
-  br label %1297
-
-; <label>:1295                                    ; preds = %1284
-  %1296 = fadd fast float %1290, %20
-  br label %1297
-
-; <label>:1297                                    ; preds = %1295, %1293, %1282, %1280, %1278
-  %1298 = phi float [ %1279, %1278 ], [ %1281, %1280 ], [ %1294, %1293 ], [ %1296, %1295 ], [ %1209, %1282 ]
-  %1299 = fptoui float %1298 to i32
-  %1300 = fsub fast float %24, %20
-  %1301 = fcmp fast olt float %922, %20
-  br i1 %1301, label %1302, label %1315
-
-; <label>:1302                                    ; preds = %1297
-  %1303 = fsub fast float %20, %922
-  %1304 = fdiv fast float %1303, %1300
-  %1305 = fptoui float %1304 to i32
-  %1306 = uitofp i32 %1305 to float
-  %1307 = fmul fast float %1306, %1300
-  %1308 = fsub fast float %1303, %1307
-  %1309 = and i32 %1305, 1
-  %1310 = icmp eq i32 %1309, 0
-  br i1 %1310, label %1311, label %1313
-
-; <label>:1311                                    ; preds = %1302
-  %1312 = fadd fast float %1308, %20
-  br label %1330
-
-; <label>:1313                                    ; preds = %1302
-  %1314 = fsub fast float %24, %1308
-  br label %1330
-
-; <label>:1315                                    ; preds = %1297
-  %1316 = fcmp fast ogt float %922, %24
-  br i1 %1316, label %1317, label %1330
-
-; <label>:1317                                    ; preds = %1315
-  %1318 = fsub fast float %922, %24
-  %1319 = fdiv fast float %1318, %1300
-  %1320 = fptoui float %1319 to i32
-  %1321 = uitofp i32 %1320 to float
-  %1322 = fmul fast float %1321, %1300
-  %1323 = fsub fast float %1318, %1322
-  %1324 = and i32 %1320, 1
-  %1325 = icmp eq i32 %1324, 0
-  br i1 %1325, label %1326, label %1328
-
-; <label>:1326                                    ; preds = %1317
-  %1327 = fsub fast float %24, %1323
-  br label %1330
-
-; <label>:1328                                    ; preds = %1317
-  %1329 = fadd fast float %1323, %20
-  br label %1330
-
-; <label>:1330                                    ; preds = %1328, %1326, %1315, %1313, %1311
-  %1331 = phi float [ %1312, %1311 ], [ %1314, %1313 ], [ %1327, %1326 ], [ %1329, %1328 ], [ %922, %1315 ]
-  %1332 = fptoui float %1331 to i32
-  %1333 = uitofp i32 %1332 to float
-  %1334 = uitofp i32 %1299 to float
-  %1335 = fptoui float %45 to i32
-  %1336 = fptoui float %182 to i32
-  %1337 = fptoui float %1333 to i32
-  %1338 = fptoui float %1334 to i32
-  %1339 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1340 = extractvalue %dx.types.CBufRet.i32 %1339, 0
-  %1341 = extractvalue %dx.types.CBufRet.i32 %1339, 1
-  %1342 = extractvalue %dx.types.CBufRet.i32 %1339, 2
-  %1343 = extractvalue %dx.types.CBufRet.i32 %1339, 3
-  %1344 = mul i32 %1340, %1335
-  %1345 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1336, i32 %1341, i32 %1344)  ; IMad(a,b,c)
-  %1346 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1337, i32 %1342, i32 %1345)  ; IMad(a,b,c)
-  %1347 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1338, i32 %1343, i32 %1346)  ; IMad(a,b,c)
-  %1348 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %1347, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1349 = extractvalue %dx.types.ResRet.f32 %1348, 0
-  br label %1350
-
-; <label>:1350                                    ; preds = %1330, %1264, %1236, %1220, %1210
-  %1351 = phi float [ %1233, %1220 ], [ 0.000000e+00, %1210 ], [ %1263, %1236 ], [ %1349, %1330 ], [ 0.000000e+00, %1264 ]
-  %1352 = fadd fast float %919, 2.000000e+00
-  br i1 %924, label %1353, label %1377
-
-; <label>:1353                                    ; preds = %1350
-  %1354 = fcmp fast oge float %1352, 0.000000e+00
-  %1355 = fptoui float %1352 to i32
-  %1356 = icmp ult i32 %1355, %13
-  %1357 = and i1 %1354, %1356
-  %1358 = fcmp fast oge float %922, 0.000000e+00
-  %1359 = and i1 %1358, %1357
-  %1360 = fptoui float %922 to i32
-  %1361 = icmp ult i32 %1360, %15
-  %1362 = and i1 %1361, %1359
-  br i1 %1362, label %1363, label %1493
-
-; <label>:1363                                    ; preds = %1353
-  %1364 = fptoui float %45 to i32
-  %1365 = fptoui float %182 to i32
-  %1366 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1367 = extractvalue %dx.types.CBufRet.i32 %1366, 0
-  %1368 = extractvalue %dx.types.CBufRet.i32 %1366, 1
-  %1369 = extractvalue %dx.types.CBufRet.i32 %1366, 2
-  %1370 = extractvalue %dx.types.CBufRet.i32 %1366, 3
-  %1371 = mul i32 %1367, %1364
-  %1372 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1365, i32 %1368, i32 %1371)  ; IMad(a,b,c)
-  %1373 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1360, i32 %1369, i32 %1372)  ; IMad(a,b,c)
-  %1374 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1355, i32 %1370, i32 %1373)  ; IMad(a,b,c)
-  %1375 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %1374, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1376 = extractvalue %dx.types.ResRet.f32 %1375, 0
-  br label %1493
-
-; <label>:1377                                    ; preds = %1350
-  %1378 = icmp eq i32 %923, 1
-  br i1 %1378, label %1379, label %1407
-
-; <label>:1379                                    ; preds = %1377
-  %1380 = add i32 %13, -1
-  %1381 = uitofp i32 %1380 to float
-  %1382 = call float @dx.op.binary.f32(i32 35, float %1352, float 0.000000e+00)  ; FMax(a,b)
-  %1383 = call float @dx.op.binary.f32(i32 36, float %1382, float %1381)  ; FMin(a,b)
-  %1384 = fptoui float %1383 to i32
-  %1385 = add i32 %15, -1
-  %1386 = uitofp i32 %1385 to float
-  %1387 = call float @dx.op.binary.f32(i32 35, float %922, float 0.000000e+00)  ; FMax(a,b)
-  %1388 = call float @dx.op.binary.f32(i32 36, float %1387, float %1386)  ; FMin(a,b)
-  %1389 = fptoui float %1388 to i32
-  %1390 = uitofp i32 %1389 to float
-  %1391 = uitofp i32 %1384 to float
-  %1392 = fptoui float %45 to i32
-  %1393 = fptoui float %182 to i32
-  %1394 = fptoui float %1390 to i32
-  %1395 = fptoui float %1391 to i32
-  %1396 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1397 = extractvalue %dx.types.CBufRet.i32 %1396, 0
-  %1398 = extractvalue %dx.types.CBufRet.i32 %1396, 1
-  %1399 = extractvalue %dx.types.CBufRet.i32 %1396, 2
-  %1400 = extractvalue %dx.types.CBufRet.i32 %1396, 3
-  %1401 = mul i32 %1397, %1392
-  %1402 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1393, i32 %1398, i32 %1401)  ; IMad(a,b,c)
-  %1403 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1394, i32 %1399, i32 %1402)  ; IMad(a,b,c)
-  %1404 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1395, i32 %1400, i32 %1403)  ; IMad(a,b,c)
-  %1405 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %1404, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1406 = extractvalue %dx.types.ResRet.f32 %1405, 0
-  br label %1493
-
-; <label>:1407                                    ; preds = %1377
-  %1408 = icmp eq i32 %923, 2
-  br i1 %1408, label %1409, label %1493
-
-; <label>:1409                                    ; preds = %1407
-  %1410 = fsub fast float %22, %20
-  %1411 = fcmp fast olt float %1352, %20
-  br i1 %1411, label %1412, label %1425
-
-; <label>:1412                                    ; preds = %1409
-  %1413 = fsub fast float %20, %1352
-  %1414 = fdiv fast float %1413, %1410
-  %1415 = fptoui float %1414 to i32
-  %1416 = uitofp i32 %1415 to float
-  %1417 = fmul fast float %1416, %1410
-  %1418 = fsub fast float %1413, %1417
-  %1419 = and i32 %1415, 1
-  %1420 = icmp eq i32 %1419, 0
-  br i1 %1420, label %1421, label %1423
-
-; <label>:1421                                    ; preds = %1412
-  %1422 = fadd fast float %1418, %20
-  br label %1440
-
-; <label>:1423                                    ; preds = %1412
-  %1424 = fsub fast float %22, %1418
-  br label %1440
-
-; <label>:1425                                    ; preds = %1409
-  %1426 = fcmp fast ogt float %1352, %22
-  br i1 %1426, label %1427, label %1440
-
-; <label>:1427                                    ; preds = %1425
-  %1428 = fsub fast float %1352, %22
-  %1429 = fdiv fast float %1428, %1410
-  %1430 = fptoui float %1429 to i32
-  %1431 = uitofp i32 %1430 to float
-  %1432 = fmul fast float %1431, %1410
-  %1433 = fsub fast float %1428, %1432
-  %1434 = and i32 %1430, 1
-  %1435 = icmp eq i32 %1434, 0
-  br i1 %1435, label %1436, label %1438
-
-; <label>:1436                                    ; preds = %1427
-  %1437 = fsub fast float %22, %1433
-  br label %1440
-
-; <label>:1438                                    ; preds = %1427
-  %1439 = fadd fast float %1433, %20
-  br label %1440
-
-; <label>:1440                                    ; preds = %1438, %1436, %1425, %1423, %1421
-  %1441 = phi float [ %1422, %1421 ], [ %1424, %1423 ], [ %1437, %1436 ], [ %1439, %1438 ], [ %1352, %1425 ]
-  %1442 = fptoui float %1441 to i32
-  %1443 = fsub fast float %24, %20
-  %1444 = fcmp fast olt float %922, %20
-  br i1 %1444, label %1445, label %1458
-
-; <label>:1445                                    ; preds = %1440
-  %1446 = fsub fast float %20, %922
-  %1447 = fdiv fast float %1446, %1443
-  %1448 = fptoui float %1447 to i32
-  %1449 = uitofp i32 %1448 to float
-  %1450 = fmul fast float %1449, %1443
-  %1451 = fsub fast float %1446, %1450
-  %1452 = and i32 %1448, 1
-  %1453 = icmp eq i32 %1452, 0
-  br i1 %1453, label %1454, label %1456
-
-; <label>:1454                                    ; preds = %1445
-  %1455 = fadd fast float %1451, %20
-  br label %1473
-
-; <label>:1456                                    ; preds = %1445
-  %1457 = fsub fast float %24, %1451
-  br label %1473
-
-; <label>:1458                                    ; preds = %1440
-  %1459 = fcmp fast ogt float %922, %24
-  br i1 %1459, label %1460, label %1473
-
-; <label>:1460                                    ; preds = %1458
-  %1461 = fsub fast float %922, %24
-  %1462 = fdiv fast float %1461, %1443
-  %1463 = fptoui float %1462 to i32
-  %1464 = uitofp i32 %1463 to float
-  %1465 = fmul fast float %1464, %1443
-  %1466 = fsub fast float %1461, %1465
-  %1467 = and i32 %1463, 1
-  %1468 = icmp eq i32 %1467, 0
-  br i1 %1468, label %1469, label %1471
-
-; <label>:1469                                    ; preds = %1460
-  %1470 = fsub fast float %24, %1466
-  br label %1473
-
-; <label>:1471                                    ; preds = %1460
-  %1472 = fadd fast float %1466, %20
-  br label %1473
-
-; <label>:1473                                    ; preds = %1471, %1469, %1458, %1456, %1454
-  %1474 = phi float [ %1455, %1454 ], [ %1457, %1456 ], [ %1470, %1469 ], [ %1472, %1471 ], [ %922, %1458 ]
-  %1475 = fptoui float %1474 to i32
-  %1476 = uitofp i32 %1475 to float
-  %1477 = uitofp i32 %1442 to float
-  %1478 = fptoui float %45 to i32
-  %1479 = fptoui float %182 to i32
-  %1480 = fptoui float %1476 to i32
-  %1481 = fptoui float %1477 to i32
-  %1482 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1483 = extractvalue %dx.types.CBufRet.i32 %1482, 0
-  %1484 = extractvalue %dx.types.CBufRet.i32 %1482, 1
-  %1485 = extractvalue %dx.types.CBufRet.i32 %1482, 2
-  %1486 = extractvalue %dx.types.CBufRet.i32 %1482, 3
-  %1487 = mul i32 %1483, %1478
-  %1488 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1479, i32 %1484, i32 %1487)  ; IMad(a,b,c)
-  %1489 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1480, i32 %1485, i32 %1488)  ; IMad(a,b,c)
-  %1490 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1481, i32 %1486, i32 %1489)  ; IMad(a,b,c)
-  %1491 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %1490, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1492 = extractvalue %dx.types.ResRet.f32 %1491, 0
-  br label %1493
-
-; <label>:1493                                    ; preds = %1473, %1407, %1379, %1363, %1353
-  %1494 = phi float [ %1376, %1363 ], [ 0.000000e+00, %1353 ], [ %1406, %1379 ], [ %1492, %1473 ], [ 0.000000e+00, %1407 ]
-  br i1 %924, label %1495, label %1519
-
-; <label>:1495                                    ; preds = %1493
-  %1496 = fcmp fast oge float %920, 0.000000e+00
-  %1497 = fptoui float %920 to i32
-  %1498 = icmp ult i32 %1497, %13
-  %1499 = and i1 %1496, %1498
-  %1500 = fcmp fast oge float %921, 0.000000e+00
-  %1501 = and i1 %1500, %1499
-  %1502 = fptoui float %921 to i32
-  %1503 = icmp ult i32 %1502, %15
-  %1504 = and i1 %1503, %1501
-  br i1 %1504, label %1505, label %1635
-
-; <label>:1505                                    ; preds = %1495
-  %1506 = fptoui float %45 to i32
-  %1507 = fptoui float %182 to i32
-  %1508 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1509 = extractvalue %dx.types.CBufRet.i32 %1508, 0
-  %1510 = extractvalue %dx.types.CBufRet.i32 %1508, 1
-  %1511 = extractvalue %dx.types.CBufRet.i32 %1508, 2
-  %1512 = extractvalue %dx.types.CBufRet.i32 %1508, 3
-  %1513 = mul i32 %1509, %1506
-  %1514 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1507, i32 %1510, i32 %1513)  ; IMad(a,b,c)
-  %1515 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1502, i32 %1511, i32 %1514)  ; IMad(a,b,c)
-  %1516 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1497, i32 %1512, i32 %1515)  ; IMad(a,b,c)
-  %1517 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %1516, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1518 = extractvalue %dx.types.ResRet.f32 %1517, 0
-  br label %1635
-
-; <label>:1519                                    ; preds = %1493
-  %1520 = icmp eq i32 %923, 1
-  br i1 %1520, label %1521, label %1549
-
-; <label>:1521                                    ; preds = %1519
-  %1522 = add i32 %13, -1
-  %1523 = uitofp i32 %1522 to float
-  %1524 = call float @dx.op.binary.f32(i32 35, float %920, float 0.000000e+00)  ; FMax(a,b)
-  %1525 = call float @dx.op.binary.f32(i32 36, float %1524, float %1523)  ; FMin(a,b)
-  %1526 = fptoui float %1525 to i32
-  %1527 = add i32 %15, -1
-  %1528 = uitofp i32 %1527 to float
-  %1529 = call float @dx.op.binary.f32(i32 35, float %921, float 0.000000e+00)  ; FMax(a,b)
-  %1530 = call float @dx.op.binary.f32(i32 36, float %1529, float %1528)  ; FMin(a,b)
-  %1531 = fptoui float %1530 to i32
-  %1532 = uitofp i32 %1531 to float
-  %1533 = uitofp i32 %1526 to float
-  %1534 = fptoui float %45 to i32
-  %1535 = fptoui float %182 to i32
-  %1536 = fptoui float %1532 to i32
-  %1537 = fptoui float %1533 to i32
-  %1538 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1539 = extractvalue %dx.types.CBufRet.i32 %1538, 0
-  %1540 = extractvalue %dx.types.CBufRet.i32 %1538, 1
-  %1541 = extractvalue %dx.types.CBufRet.i32 %1538, 2
-  %1542 = extractvalue %dx.types.CBufRet.i32 %1538, 3
-  %1543 = mul i32 %1539, %1534
-  %1544 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1535, i32 %1540, i32 %1543)  ; IMad(a,b,c)
-  %1545 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1536, i32 %1541, i32 %1544)  ; IMad(a,b,c)
-  %1546 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1537, i32 %1542, i32 %1545)  ; IMad(a,b,c)
-  %1547 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %1546, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1548 = extractvalue %dx.types.ResRet.f32 %1547, 0
-  br label %1635
-
-; <label>:1549                                    ; preds = %1519
-  %1550 = icmp eq i32 %923, 2
-  br i1 %1550, label %1551, label %1635
-
-; <label>:1551                                    ; preds = %1549
-  %1552 = fsub fast float %22, %20
-  %1553 = fcmp fast olt float %920, %20
-  br i1 %1553, label %1554, label %1567
-
-; <label>:1554                                    ; preds = %1551
-  %1555 = fsub fast float %20, %920
-  %1556 = fdiv fast float %1555, %1552
-  %1557 = fptoui float %1556 to i32
-  %1558 = uitofp i32 %1557 to float
-  %1559 = fmul fast float %1558, %1552
-  %1560 = fsub fast float %1555, %1559
-  %1561 = and i32 %1557, 1
-  %1562 = icmp eq i32 %1561, 0
-  br i1 %1562, label %1563, label %1565
-
-; <label>:1563                                    ; preds = %1554
-  %1564 = fadd fast float %1560, %20
-  br label %1582
-
-; <label>:1565                                    ; preds = %1554
-  %1566 = fsub fast float %22, %1560
-  br label %1582
-
-; <label>:1567                                    ; preds = %1551
-  %1568 = fcmp fast ogt float %920, %22
-  br i1 %1568, label %1569, label %1582
-
-; <label>:1569                                    ; preds = %1567
-  %1570 = fsub fast float %920, %22
-  %1571 = fdiv fast float %1570, %1552
-  %1572 = fptoui float %1571 to i32
-  %1573 = uitofp i32 %1572 to float
-  %1574 = fmul fast float %1573, %1552
-  %1575 = fsub fast float %1570, %1574
-  %1576 = and i32 %1572, 1
-  %1577 = icmp eq i32 %1576, 0
-  br i1 %1577, label %1578, label %1580
-
-; <label>:1578                                    ; preds = %1569
-  %1579 = fsub fast float %22, %1575
-  br label %1582
-
-; <label>:1580                                    ; preds = %1569
-  %1581 = fadd fast float %1575, %20
-  br label %1582
-
-; <label>:1582                                    ; preds = %1580, %1578, %1567, %1565, %1563
-  %1583 = phi float [ %1564, %1563 ], [ %1566, %1565 ], [ %1579, %1578 ], [ %1581, %1580 ], [ %920, %1567 ]
-  %1584 = fptoui float %1583 to i32
-  %1585 = fsub fast float %24, %20
-  %1586 = fcmp fast olt float %921, %20
-  br i1 %1586, label %1587, label %1600
-
-; <label>:1587                                    ; preds = %1582
-  %1588 = fsub fast float %20, %921
-  %1589 = fdiv fast float %1588, %1585
-  %1590 = fptoui float %1589 to i32
-  %1591 = uitofp i32 %1590 to float
-  %1592 = fmul fast float %1591, %1585
-  %1593 = fsub fast float %1588, %1592
-  %1594 = and i32 %1590, 1
-  %1595 = icmp eq i32 %1594, 0
-  br i1 %1595, label %1596, label %1598
-
-; <label>:1596                                    ; preds = %1587
-  %1597 = fadd fast float %1593, %20
-  br label %1615
-
-; <label>:1598                                    ; preds = %1587
-  %1599 = fsub fast float %24, %1593
-  br label %1615
-
-; <label>:1600                                    ; preds = %1582
-  %1601 = fcmp fast ogt float %921, %24
-  br i1 %1601, label %1602, label %1615
-
-; <label>:1602                                    ; preds = %1600
-  %1603 = fsub fast float %921, %24
-  %1604 = fdiv fast float %1603, %1585
-  %1605 = fptoui float %1604 to i32
-  %1606 = uitofp i32 %1605 to float
-  %1607 = fmul fast float %1606, %1585
-  %1608 = fsub fast float %1603, %1607
-  %1609 = and i32 %1605, 1
-  %1610 = icmp eq i32 %1609, 0
-  br i1 %1610, label %1611, label %1613
-
-; <label>:1611                                    ; preds = %1602
-  %1612 = fsub fast float %24, %1608
-  br label %1615
-
-; <label>:1613                                    ; preds = %1602
-  %1614 = fadd fast float %1608, %20
-  br label %1615
-
-; <label>:1615                                    ; preds = %1613, %1611, %1600, %1598, %1596
-  %1616 = phi float [ %1597, %1596 ], [ %1599, %1598 ], [ %1612, %1611 ], [ %1614, %1613 ], [ %921, %1600 ]
-  %1617 = fptoui float %1616 to i32
-  %1618 = uitofp i32 %1617 to float
-  %1619 = uitofp i32 %1584 to float
-  %1620 = fptoui float %45 to i32
-  %1621 = fptoui float %182 to i32
-  %1622 = fptoui float %1618 to i32
-  %1623 = fptoui float %1619 to i32
-  %1624 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1625 = extractvalue %dx.types.CBufRet.i32 %1624, 0
-  %1626 = extractvalue %dx.types.CBufRet.i32 %1624, 1
-  %1627 = extractvalue %dx.types.CBufRet.i32 %1624, 2
-  %1628 = extractvalue %dx.types.CBufRet.i32 %1624, 3
-  %1629 = mul i32 %1625, %1620
-  %1630 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1621, i32 %1626, i32 %1629)  ; IMad(a,b,c)
-  %1631 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1622, i32 %1627, i32 %1630)  ; IMad(a,b,c)
-  %1632 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1623, i32 %1628, i32 %1631)  ; IMad(a,b,c)
-  %1633 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %1632, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1634 = extractvalue %dx.types.ResRet.f32 %1633, 0
-  br label %1635
-
-; <label>:1635                                    ; preds = %1615, %1549, %1521, %1505, %1495
-  %1636 = phi float [ %1518, %1505 ], [ 0.000000e+00, %1495 ], [ %1548, %1521 ], [ %1634, %1615 ], [ 0.000000e+00, %1549 ]
-  br i1 %924, label %1637, label %1661
-
-; <label>:1637                                    ; preds = %1635
-  %1638 = fcmp fast oge float %919, 0.000000e+00
-  %1639 = fptoui float %919 to i32
-  %1640 = icmp ult i32 %1639, %13
-  %1641 = and i1 %1638, %1640
-  %1642 = fcmp fast oge float %921, 0.000000e+00
-  %1643 = and i1 %1642, %1641
-  %1644 = fptoui float %921 to i32
-  %1645 = icmp ult i32 %1644, %15
-  %1646 = and i1 %1645, %1643
-  br i1 %1646, label %1647, label %1777
-
-; <label>:1647                                    ; preds = %1637
-  %1648 = fptoui float %45 to i32
-  %1649 = fptoui float %182 to i32
-  %1650 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1651 = extractvalue %dx.types.CBufRet.i32 %1650, 0
-  %1652 = extractvalue %dx.types.CBufRet.i32 %1650, 1
-  %1653 = extractvalue %dx.types.CBufRet.i32 %1650, 2
-  %1654 = extractvalue %dx.types.CBufRet.i32 %1650, 3
-  %1655 = mul i32 %1651, %1648
-  %1656 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1649, i32 %1652, i32 %1655)  ; IMad(a,b,c)
-  %1657 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1644, i32 %1653, i32 %1656)  ; IMad(a,b,c)
-  %1658 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1639, i32 %1654, i32 %1657)  ; IMad(a,b,c)
-  %1659 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %1658, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1660 = extractvalue %dx.types.ResRet.f32 %1659, 0
-  br label %1777
-
-; <label>:1661                                    ; preds = %1635
-  %1662 = icmp eq i32 %923, 1
-  br i1 %1662, label %1663, label %1691
-
-; <label>:1663                                    ; preds = %1661
-  %1664 = add i32 %13, -1
-  %1665 = uitofp i32 %1664 to float
-  %1666 = call float @dx.op.binary.f32(i32 35, float %919, float 0.000000e+00)  ; FMax(a,b)
-  %1667 = call float @dx.op.binary.f32(i32 36, float %1666, float %1665)  ; FMin(a,b)
-  %1668 = fptoui float %1667 to i32
-  %1669 = add i32 %15, -1
-  %1670 = uitofp i32 %1669 to float
-  %1671 = call float @dx.op.binary.f32(i32 35, float %921, float 0.000000e+00)  ; FMax(a,b)
-  %1672 = call float @dx.op.binary.f32(i32 36, float %1671, float %1670)  ; FMin(a,b)
-  %1673 = fptoui float %1672 to i32
-  %1674 = uitofp i32 %1673 to float
-  %1675 = uitofp i32 %1668 to float
-  %1676 = fptoui float %45 to i32
-  %1677 = fptoui float %182 to i32
-  %1678 = fptoui float %1674 to i32
-  %1679 = fptoui float %1675 to i32
-  %1680 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1681 = extractvalue %dx.types.CBufRet.i32 %1680, 0
-  %1682 = extractvalue %dx.types.CBufRet.i32 %1680, 1
-  %1683 = extractvalue %dx.types.CBufRet.i32 %1680, 2
-  %1684 = extractvalue %dx.types.CBufRet.i32 %1680, 3
-  %1685 = mul i32 %1681, %1676
-  %1686 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1677, i32 %1682, i32 %1685)  ; IMad(a,b,c)
-  %1687 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1678, i32 %1683, i32 %1686)  ; IMad(a,b,c)
-  %1688 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1679, i32 %1684, i32 %1687)  ; IMad(a,b,c)
-  %1689 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %1688, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1690 = extractvalue %dx.types.ResRet.f32 %1689, 0
-  br label %1777
-
-; <label>:1691                                    ; preds = %1661
-  %1692 = icmp eq i32 %923, 2
-  br i1 %1692, label %1693, label %1777
-
-; <label>:1693                                    ; preds = %1691
-  %1694 = fsub fast float %22, %20
-  %1695 = fcmp fast olt float %919, %20
-  br i1 %1695, label %1696, label %1709
-
-; <label>:1696                                    ; preds = %1693
-  %1697 = fsub fast float %20, %919
-  %1698 = fdiv fast float %1697, %1694
-  %1699 = fptoui float %1698 to i32
-  %1700 = uitofp i32 %1699 to float
-  %1701 = fmul fast float %1700, %1694
-  %1702 = fsub fast float %1697, %1701
-  %1703 = and i32 %1699, 1
-  %1704 = icmp eq i32 %1703, 0
-  br i1 %1704, label %1705, label %1707
-
-; <label>:1705                                    ; preds = %1696
-  %1706 = fadd fast float %1702, %20
-  br label %1724
-
-; <label>:1707                                    ; preds = %1696
-  %1708 = fsub fast float %22, %1702
-  br label %1724
-
-; <label>:1709                                    ; preds = %1693
-  %1710 = fcmp fast ogt float %919, %22
-  br i1 %1710, label %1711, label %1724
-
-; <label>:1711                                    ; preds = %1709
-  %1712 = fsub fast float %919, %22
-  %1713 = fdiv fast float %1712, %1694
-  %1714 = fptoui float %1713 to i32
-  %1715 = uitofp i32 %1714 to float
-  %1716 = fmul fast float %1715, %1694
-  %1717 = fsub fast float %1712, %1716
-  %1718 = and i32 %1714, 1
-  %1719 = icmp eq i32 %1718, 0
-  br i1 %1719, label %1720, label %1722
-
-; <label>:1720                                    ; preds = %1711
-  %1721 = fsub fast float %22, %1717
-  br label %1724
-
-; <label>:1722                                    ; preds = %1711
-  %1723 = fadd fast float %1717, %20
-  br label %1724
-
-; <label>:1724                                    ; preds = %1722, %1720, %1709, %1707, %1705
-  %1725 = phi float [ %1706, %1705 ], [ %1708, %1707 ], [ %1721, %1720 ], [ %1723, %1722 ], [ %919, %1709 ]
-  %1726 = fptoui float %1725 to i32
-  %1727 = fsub fast float %24, %20
-  %1728 = fcmp fast olt float %921, %20
-  br i1 %1728, label %1729, label %1742
-
-; <label>:1729                                    ; preds = %1724
-  %1730 = fsub fast float %20, %921
-  %1731 = fdiv fast float %1730, %1727
-  %1732 = fptoui float %1731 to i32
-  %1733 = uitofp i32 %1732 to float
-  %1734 = fmul fast float %1733, %1727
-  %1735 = fsub fast float %1730, %1734
-  %1736 = and i32 %1732, 1
-  %1737 = icmp eq i32 %1736, 0
-  br i1 %1737, label %1738, label %1740
-
-; <label>:1738                                    ; preds = %1729
-  %1739 = fadd fast float %1735, %20
-  br label %1757
-
-; <label>:1740                                    ; preds = %1729
-  %1741 = fsub fast float %24, %1735
-  br label %1757
-
-; <label>:1742                                    ; preds = %1724
-  %1743 = fcmp fast ogt float %921, %24
-  br i1 %1743, label %1744, label %1757
-
-; <label>:1744                                    ; preds = %1742
-  %1745 = fsub fast float %921, %24
-  %1746 = fdiv fast float %1745, %1727
-  %1747 = fptoui float %1746 to i32
-  %1748 = uitofp i32 %1747 to float
-  %1749 = fmul fast float %1748, %1727
-  %1750 = fsub fast float %1745, %1749
-  %1751 = and i32 %1747, 1
-  %1752 = icmp eq i32 %1751, 0
-  br i1 %1752, label %1753, label %1755
-
-; <label>:1753                                    ; preds = %1744
-  %1754 = fsub fast float %24, %1750
-  br label %1757
-
-; <label>:1755                                    ; preds = %1744
-  %1756 = fadd fast float %1750, %20
-  br label %1757
-
-; <label>:1757                                    ; preds = %1755, %1753, %1742, %1740, %1738
-  %1758 = phi float [ %1739, %1738 ], [ %1741, %1740 ], [ %1754, %1753 ], [ %1756, %1755 ], [ %921, %1742 ]
-  %1759 = fptoui float %1758 to i32
-  %1760 = uitofp i32 %1759 to float
-  %1761 = uitofp i32 %1726 to float
-  %1762 = fptoui float %45 to i32
-  %1763 = fptoui float %182 to i32
-  %1764 = fptoui float %1760 to i32
-  %1765 = fptoui float %1761 to i32
-  %1766 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1767 = extractvalue %dx.types.CBufRet.i32 %1766, 0
-  %1768 = extractvalue %dx.types.CBufRet.i32 %1766, 1
-  %1769 = extractvalue %dx.types.CBufRet.i32 %1766, 2
-  %1770 = extractvalue %dx.types.CBufRet.i32 %1766, 3
-  %1771 = mul i32 %1767, %1762
-  %1772 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1763, i32 %1768, i32 %1771)  ; IMad(a,b,c)
-  %1773 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1764, i32 %1769, i32 %1772)  ; IMad(a,b,c)
-  %1774 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1765, i32 %1770, i32 %1773)  ; IMad(a,b,c)
-  %1775 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %1774, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1776 = extractvalue %dx.types.ResRet.f32 %1775, 0
-  br label %1777
-
-; <label>:1777                                    ; preds = %1757, %1691, %1663, %1647, %1637
-  %1778 = phi float [ %1660, %1647 ], [ 0.000000e+00, %1637 ], [ %1690, %1663 ], [ %1776, %1757 ], [ 0.000000e+00, %1691 ]
-  br i1 %924, label %1779, label %1803
-
-; <label>:1779                                    ; preds = %1777
-  %1780 = fcmp fast oge float %1209, 0.000000e+00
-  %1781 = fptoui float %1209 to i32
-  %1782 = icmp ult i32 %1781, %13
-  %1783 = and i1 %1780, %1782
-  %1784 = fcmp fast oge float %921, 0.000000e+00
-  %1785 = and i1 %1784, %1783
-  %1786 = fptoui float %921 to i32
-  %1787 = icmp ult i32 %1786, %15
-  %1788 = and i1 %1787, %1785
-  br i1 %1788, label %1789, label %1919
-
-; <label>:1789                                    ; preds = %1779
-  %1790 = fptoui float %45 to i32
-  %1791 = fptoui float %182 to i32
-  %1792 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1793 = extractvalue %dx.types.CBufRet.i32 %1792, 0
-  %1794 = extractvalue %dx.types.CBufRet.i32 %1792, 1
-  %1795 = extractvalue %dx.types.CBufRet.i32 %1792, 2
-  %1796 = extractvalue %dx.types.CBufRet.i32 %1792, 3
-  %1797 = mul i32 %1793, %1790
-  %1798 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1791, i32 %1794, i32 %1797)  ; IMad(a,b,c)
-  %1799 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1786, i32 %1795, i32 %1798)  ; IMad(a,b,c)
-  %1800 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1781, i32 %1796, i32 %1799)  ; IMad(a,b,c)
-  %1801 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %1800, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1802 = extractvalue %dx.types.ResRet.f32 %1801, 0
-  br label %1919
-
-; <label>:1803                                    ; preds = %1777
-  %1804 = icmp eq i32 %923, 1
-  br i1 %1804, label %1805, label %1833
-
-; <label>:1805                                    ; preds = %1803
-  %1806 = add i32 %13, -1
-  %1807 = uitofp i32 %1806 to float
-  %1808 = call float @dx.op.binary.f32(i32 35, float %1209, float 0.000000e+00)  ; FMax(a,b)
-  %1809 = call float @dx.op.binary.f32(i32 36, float %1808, float %1807)  ; FMin(a,b)
-  %1810 = fptoui float %1809 to i32
-  %1811 = add i32 %15, -1
-  %1812 = uitofp i32 %1811 to float
-  %1813 = call float @dx.op.binary.f32(i32 35, float %921, float 0.000000e+00)  ; FMax(a,b)
-  %1814 = call float @dx.op.binary.f32(i32 36, float %1813, float %1812)  ; FMin(a,b)
-  %1815 = fptoui float %1814 to i32
-  %1816 = uitofp i32 %1815 to float
-  %1817 = uitofp i32 %1810 to float
-  %1818 = fptoui float %45 to i32
-  %1819 = fptoui float %182 to i32
-  %1820 = fptoui float %1816 to i32
-  %1821 = fptoui float %1817 to i32
-  %1822 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1823 = extractvalue %dx.types.CBufRet.i32 %1822, 0
-  %1824 = extractvalue %dx.types.CBufRet.i32 %1822, 1
-  %1825 = extractvalue %dx.types.CBufRet.i32 %1822, 2
-  %1826 = extractvalue %dx.types.CBufRet.i32 %1822, 3
-  %1827 = mul i32 %1823, %1818
-  %1828 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1819, i32 %1824, i32 %1827)  ; IMad(a,b,c)
-  %1829 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1820, i32 %1825, i32 %1828)  ; IMad(a,b,c)
-  %1830 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1821, i32 %1826, i32 %1829)  ; IMad(a,b,c)
-  %1831 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %1830, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1832 = extractvalue %dx.types.ResRet.f32 %1831, 0
-  br label %1919
-
-; <label>:1833                                    ; preds = %1803
-  %1834 = icmp eq i32 %923, 2
-  br i1 %1834, label %1835, label %1919
-
-; <label>:1835                                    ; preds = %1833
-  %1836 = fsub fast float %22, %20
-  %1837 = fcmp fast olt float %1209, %20
-  br i1 %1837, label %1838, label %1851
-
-; <label>:1838                                    ; preds = %1835
-  %1839 = fsub fast float %20, %1209
-  %1840 = fdiv fast float %1839, %1836
-  %1841 = fptoui float %1840 to i32
-  %1842 = uitofp i32 %1841 to float
-  %1843 = fmul fast float %1842, %1836
-  %1844 = fsub fast float %1839, %1843
-  %1845 = and i32 %1841, 1
-  %1846 = icmp eq i32 %1845, 0
-  br i1 %1846, label %1847, label %1849
-
-; <label>:1847                                    ; preds = %1838
-  %1848 = fadd fast float %1844, %20
-  br label %1866
-
-; <label>:1849                                    ; preds = %1838
-  %1850 = fsub fast float %22, %1844
-  br label %1866
-
-; <label>:1851                                    ; preds = %1835
-  %1852 = fcmp fast ogt float %1209, %22
-  br i1 %1852, label %1853, label %1866
-
-; <label>:1853                                    ; preds = %1851
-  %1854 = fsub fast float %1209, %22
-  %1855 = fdiv fast float %1854, %1836
-  %1856 = fptoui float %1855 to i32
-  %1857 = uitofp i32 %1856 to float
-  %1858 = fmul fast float %1857, %1836
-  %1859 = fsub fast float %1854, %1858
-  %1860 = and i32 %1856, 1
-  %1861 = icmp eq i32 %1860, 0
-  br i1 %1861, label %1862, label %1864
-
-; <label>:1862                                    ; preds = %1853
-  %1863 = fsub fast float %22, %1859
-  br label %1866
-
-; <label>:1864                                    ; preds = %1853
-  %1865 = fadd fast float %1859, %20
-  br label %1866
-
-; <label>:1866                                    ; preds = %1864, %1862, %1851, %1849, %1847
-  %1867 = phi float [ %1848, %1847 ], [ %1850, %1849 ], [ %1863, %1862 ], [ %1865, %1864 ], [ %1209, %1851 ]
-  %1868 = fptoui float %1867 to i32
-  %1869 = fsub fast float %24, %20
-  %1870 = fcmp fast olt float %921, %20
-  br i1 %1870, label %1871, label %1884
-
-; <label>:1871                                    ; preds = %1866
-  %1872 = fsub fast float %20, %921
-  %1873 = fdiv fast float %1872, %1869
-  %1874 = fptoui float %1873 to i32
-  %1875 = uitofp i32 %1874 to float
-  %1876 = fmul fast float %1875, %1869
-  %1877 = fsub fast float %1872, %1876
-  %1878 = and i32 %1874, 1
-  %1879 = icmp eq i32 %1878, 0
-  br i1 %1879, label %1880, label %1882
-
-; <label>:1880                                    ; preds = %1871
-  %1881 = fadd fast float %1877, %20
-  br label %1899
-
-; <label>:1882                                    ; preds = %1871
-  %1883 = fsub fast float %24, %1877
-  br label %1899
-
-; <label>:1884                                    ; preds = %1866
-  %1885 = fcmp fast ogt float %921, %24
-  br i1 %1885, label %1886, label %1899
-
-; <label>:1886                                    ; preds = %1884
-  %1887 = fsub fast float %921, %24
-  %1888 = fdiv fast float %1887, %1869
-  %1889 = fptoui float %1888 to i32
-  %1890 = uitofp i32 %1889 to float
-  %1891 = fmul fast float %1890, %1869
-  %1892 = fsub fast float %1887, %1891
-  %1893 = and i32 %1889, 1
-  %1894 = icmp eq i32 %1893, 0
-  br i1 %1894, label %1895, label %1897
-
-; <label>:1895                                    ; preds = %1886
-  %1896 = fsub fast float %24, %1892
-  br label %1899
-
-; <label>:1897                                    ; preds = %1886
-  %1898 = fadd fast float %1892, %20
-  br label %1899
-
-; <label>:1899                                    ; preds = %1897, %1895, %1884, %1882, %1880
-  %1900 = phi float [ %1881, %1880 ], [ %1883, %1882 ], [ %1896, %1895 ], [ %1898, %1897 ], [ %921, %1884 ]
-  %1901 = fptoui float %1900 to i32
-  %1902 = uitofp i32 %1901 to float
-  %1903 = uitofp i32 %1868 to float
-  %1904 = fptoui float %45 to i32
-  %1905 = fptoui float %182 to i32
-  %1906 = fptoui float %1902 to i32
-  %1907 = fptoui float %1903 to i32
-  %1908 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1909 = extractvalue %dx.types.CBufRet.i32 %1908, 0
-  %1910 = extractvalue %dx.types.CBufRet.i32 %1908, 1
-  %1911 = extractvalue %dx.types.CBufRet.i32 %1908, 2
-  %1912 = extractvalue %dx.types.CBufRet.i32 %1908, 3
-  %1913 = mul i32 %1909, %1904
-  %1914 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1905, i32 %1910, i32 %1913)  ; IMad(a,b,c)
-  %1915 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1906, i32 %1911, i32 %1914)  ; IMad(a,b,c)
-  %1916 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1907, i32 %1912, i32 %1915)  ; IMad(a,b,c)
-  %1917 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %1916, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1918 = extractvalue %dx.types.ResRet.f32 %1917, 0
-  br label %1919
-
-; <label>:1919                                    ; preds = %1899, %1833, %1805, %1789, %1779
-  %1920 = phi float [ %1802, %1789 ], [ 0.000000e+00, %1779 ], [ %1832, %1805 ], [ %1918, %1899 ], [ 0.000000e+00, %1833 ]
-  br i1 %924, label %1921, label %1945
-
-; <label>:1921                                    ; preds = %1919
-  %1922 = fcmp fast oge float %1352, 0.000000e+00
-  %1923 = fptoui float %1352 to i32
-  %1924 = icmp ult i32 %1923, %13
-  %1925 = and i1 %1922, %1924
-  %1926 = fcmp fast oge float %921, 0.000000e+00
-  %1927 = and i1 %1926, %1925
-  %1928 = fptoui float %921 to i32
-  %1929 = icmp ult i32 %1928, %15
-  %1930 = and i1 %1929, %1927
-  br i1 %1930, label %1931, label %2061
-
-; <label>:1931                                    ; preds = %1921
-  %1932 = fptoui float %45 to i32
-  %1933 = fptoui float %182 to i32
-  %1934 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1935 = extractvalue %dx.types.CBufRet.i32 %1934, 0
-  %1936 = extractvalue %dx.types.CBufRet.i32 %1934, 1
-  %1937 = extractvalue %dx.types.CBufRet.i32 %1934, 2
-  %1938 = extractvalue %dx.types.CBufRet.i32 %1934, 3
-  %1939 = mul i32 %1935, %1932
-  %1940 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1933, i32 %1936, i32 %1939)  ; IMad(a,b,c)
-  %1941 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1928, i32 %1937, i32 %1940)  ; IMad(a,b,c)
-  %1942 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1923, i32 %1938, i32 %1941)  ; IMad(a,b,c)
-  %1943 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %1942, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1944 = extractvalue %dx.types.ResRet.f32 %1943, 0
-  br label %2061
-
-; <label>:1945                                    ; preds = %1919
-  %1946 = icmp eq i32 %923, 1
-  br i1 %1946, label %1947, label %1975
-
-; <label>:1947                                    ; preds = %1945
-  %1948 = add i32 %13, -1
-  %1949 = uitofp i32 %1948 to float
-  %1950 = call float @dx.op.binary.f32(i32 35, float %1352, float 0.000000e+00)  ; FMax(a,b)
-  %1951 = call float @dx.op.binary.f32(i32 36, float %1950, float %1949)  ; FMin(a,b)
-  %1952 = fptoui float %1951 to i32
-  %1953 = add i32 %15, -1
-  %1954 = uitofp i32 %1953 to float
-  %1955 = call float @dx.op.binary.f32(i32 35, float %921, float 0.000000e+00)  ; FMax(a,b)
-  %1956 = call float @dx.op.binary.f32(i32 36, float %1955, float %1954)  ; FMin(a,b)
-  %1957 = fptoui float %1956 to i32
-  %1958 = uitofp i32 %1957 to float
-  %1959 = uitofp i32 %1952 to float
-  %1960 = fptoui float %45 to i32
-  %1961 = fptoui float %182 to i32
-  %1962 = fptoui float %1958 to i32
-  %1963 = fptoui float %1959 to i32
-  %1964 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1965 = extractvalue %dx.types.CBufRet.i32 %1964, 0
-  %1966 = extractvalue %dx.types.CBufRet.i32 %1964, 1
-  %1967 = extractvalue %dx.types.CBufRet.i32 %1964, 2
-  %1968 = extractvalue %dx.types.CBufRet.i32 %1964, 3
-  %1969 = mul i32 %1965, %1960
-  %1970 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1961, i32 %1966, i32 %1969)  ; IMad(a,b,c)
-  %1971 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1962, i32 %1967, i32 %1970)  ; IMad(a,b,c)
-  %1972 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1963, i32 %1968, i32 %1971)  ; IMad(a,b,c)
-  %1973 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %1972, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1974 = extractvalue %dx.types.ResRet.f32 %1973, 0
-  br label %2061
-
-; <label>:1975                                    ; preds = %1945
-  %1976 = icmp eq i32 %923, 2
-  br i1 %1976, label %1977, label %2061
-
-; <label>:1977                                    ; preds = %1975
-  %1978 = fsub fast float %22, %20
-  %1979 = fcmp fast olt float %1352, %20
-  br i1 %1979, label %1980, label %1993
-
-; <label>:1980                                    ; preds = %1977
-  %1981 = fsub fast float %20, %1352
-  %1982 = fdiv fast float %1981, %1978
-  %1983 = fptoui float %1982 to i32
-  %1984 = uitofp i32 %1983 to float
-  %1985 = fmul fast float %1984, %1978
-  %1986 = fsub fast float %1981, %1985
-  %1987 = and i32 %1983, 1
-  %1988 = icmp eq i32 %1987, 0
-  br i1 %1988, label %1989, label %1991
-
-; <label>:1989                                    ; preds = %1980
-  %1990 = fadd fast float %1986, %20
-  br label %2008
-
-; <label>:1991                                    ; preds = %1980
-  %1992 = fsub fast float %22, %1986
-  br label %2008
-
-; <label>:1993                                    ; preds = %1977
-  %1994 = fcmp fast ogt float %1352, %22
-  br i1 %1994, label %1995, label %2008
-
-; <label>:1995                                    ; preds = %1993
-  %1996 = fsub fast float %1352, %22
-  %1997 = fdiv fast float %1996, %1978
-  %1998 = fptoui float %1997 to i32
-  %1999 = uitofp i32 %1998 to float
-  %2000 = fmul fast float %1999, %1978
-  %2001 = fsub fast float %1996, %2000
-  %2002 = and i32 %1998, 1
-  %2003 = icmp eq i32 %2002, 0
-  br i1 %2003, label %2004, label %2006
-
-; <label>:2004                                    ; preds = %1995
-  %2005 = fsub fast float %22, %2001
-  br label %2008
-
-; <label>:2006                                    ; preds = %1995
-  %2007 = fadd fast float %2001, %20
-  br label %2008
-
-; <label>:2008                                    ; preds = %2006, %2004, %1993, %1991, %1989
-  %2009 = phi float [ %1990, %1989 ], [ %1992, %1991 ], [ %2005, %2004 ], [ %2007, %2006 ], [ %1352, %1993 ]
-  %2010 = fptoui float %2009 to i32
-  %2011 = fsub fast float %24, %20
-  %2012 = fcmp fast olt float %921, %20
-  br i1 %2012, label %2013, label %2026
-
-; <label>:2013                                    ; preds = %2008
-  %2014 = fsub fast float %20, %921
-  %2015 = fdiv fast float %2014, %2011
-  %2016 = fptoui float %2015 to i32
-  %2017 = uitofp i32 %2016 to float
-  %2018 = fmul fast float %2017, %2011
-  %2019 = fsub fast float %2014, %2018
-  %2020 = and i32 %2016, 1
-  %2021 = icmp eq i32 %2020, 0
-  br i1 %2021, label %2022, label %2024
-
-; <label>:2022                                    ; preds = %2013
-  %2023 = fadd fast float %2019, %20
-  br label %2041
-
-; <label>:2024                                    ; preds = %2013
-  %2025 = fsub fast float %24, %2019
-  br label %2041
-
-; <label>:2026                                    ; preds = %2008
-  %2027 = fcmp fast ogt float %921, %24
-  br i1 %2027, label %2028, label %2041
-
-; <label>:2028                                    ; preds = %2026
-  %2029 = fsub fast float %921, %24
-  %2030 = fdiv fast float %2029, %2011
-  %2031 = fptoui float %2030 to i32
-  %2032 = uitofp i32 %2031 to float
-  %2033 = fmul fast float %2032, %2011
-  %2034 = fsub fast float %2029, %2033
-  %2035 = and i32 %2031, 1
-  %2036 = icmp eq i32 %2035, 0
-  br i1 %2036, label %2037, label %2039
-
-; <label>:2037                                    ; preds = %2028
-  %2038 = fsub fast float %24, %2034
-  br label %2041
-
-; <label>:2039                                    ; preds = %2028
-  %2040 = fadd fast float %2034, %20
-  br label %2041
-
-; <label>:2041                                    ; preds = %2039, %2037, %2026, %2024, %2022
-  %2042 = phi float [ %2023, %2022 ], [ %2025, %2024 ], [ %2038, %2037 ], [ %2040, %2039 ], [ %921, %2026 ]
-  %2043 = fptoui float %2042 to i32
-  %2044 = uitofp i32 %2043 to float
-  %2045 = uitofp i32 %2010 to float
-  %2046 = fptoui float %45 to i32
-  %2047 = fptoui float %182 to i32
-  %2048 = fptoui float %2044 to i32
-  %2049 = fptoui float %2045 to i32
-  %2050 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2051 = extractvalue %dx.types.CBufRet.i32 %2050, 0
-  %2052 = extractvalue %dx.types.CBufRet.i32 %2050, 1
-  %2053 = extractvalue %dx.types.CBufRet.i32 %2050, 2
-  %2054 = extractvalue %dx.types.CBufRet.i32 %2050, 3
-  %2055 = mul i32 %2051, %2046
-  %2056 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2047, i32 %2052, i32 %2055)  ; IMad(a,b,c)
-  %2057 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2048, i32 %2053, i32 %2056)  ; IMad(a,b,c)
-  %2058 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2049, i32 %2054, i32 %2057)  ; IMad(a,b,c)
-  %2059 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %2058, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2060 = extractvalue %dx.types.ResRet.f32 %2059, 0
-  br label %2061
-
-; <label>:2061                                    ; preds = %2041, %1975, %1947, %1931, %1921
-  %2062 = phi float [ %1944, %1931 ], [ 0.000000e+00, %1921 ], [ %1974, %1947 ], [ %2060, %2041 ], [ 0.000000e+00, %1975 ]
-  %2063 = fadd fast float %921, 1.000000e+00
-  br i1 %924, label %2064, label %2088
-
-; <label>:2064                                    ; preds = %2061
-  %2065 = fcmp fast oge float %920, 0.000000e+00
-  %2066 = fptoui float %920 to i32
-  %2067 = icmp ult i32 %2066, %13
-  %2068 = and i1 %2065, %2067
-  %2069 = fcmp fast oge float %2063, 0.000000e+00
-  %2070 = and i1 %2069, %2068
-  %2071 = fptoui float %2063 to i32
-  %2072 = icmp ult i32 %2071, %15
-  %2073 = and i1 %2072, %2070
-  br i1 %2073, label %2074, label %2204
-
-; <label>:2074                                    ; preds = %2064
-  %2075 = fptoui float %45 to i32
-  %2076 = fptoui float %182 to i32
-  %2077 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2078 = extractvalue %dx.types.CBufRet.i32 %2077, 0
-  %2079 = extractvalue %dx.types.CBufRet.i32 %2077, 1
-  %2080 = extractvalue %dx.types.CBufRet.i32 %2077, 2
-  %2081 = extractvalue %dx.types.CBufRet.i32 %2077, 3
-  %2082 = mul i32 %2078, %2075
-  %2083 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2076, i32 %2079, i32 %2082)  ; IMad(a,b,c)
-  %2084 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2071, i32 %2080, i32 %2083)  ; IMad(a,b,c)
-  %2085 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2066, i32 %2081, i32 %2084)  ; IMad(a,b,c)
-  %2086 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %2085, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2087 = extractvalue %dx.types.ResRet.f32 %2086, 0
-  br label %2204
-
-; <label>:2088                                    ; preds = %2061
-  %2089 = icmp eq i32 %923, 1
-  br i1 %2089, label %2090, label %2118
-
-; <label>:2090                                    ; preds = %2088
-  %2091 = add i32 %13, -1
-  %2092 = uitofp i32 %2091 to float
-  %2093 = call float @dx.op.binary.f32(i32 35, float %920, float 0.000000e+00)  ; FMax(a,b)
-  %2094 = call float @dx.op.binary.f32(i32 36, float %2093, float %2092)  ; FMin(a,b)
-  %2095 = fptoui float %2094 to i32
-  %2096 = add i32 %15, -1
-  %2097 = uitofp i32 %2096 to float
-  %2098 = call float @dx.op.binary.f32(i32 35, float %2063, float 0.000000e+00)  ; FMax(a,b)
-  %2099 = call float @dx.op.binary.f32(i32 36, float %2098, float %2097)  ; FMin(a,b)
-  %2100 = fptoui float %2099 to i32
-  %2101 = uitofp i32 %2100 to float
-  %2102 = uitofp i32 %2095 to float
-  %2103 = fptoui float %45 to i32
-  %2104 = fptoui float %182 to i32
-  %2105 = fptoui float %2101 to i32
-  %2106 = fptoui float %2102 to i32
-  %2107 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2108 = extractvalue %dx.types.CBufRet.i32 %2107, 0
-  %2109 = extractvalue %dx.types.CBufRet.i32 %2107, 1
-  %2110 = extractvalue %dx.types.CBufRet.i32 %2107, 2
-  %2111 = extractvalue %dx.types.CBufRet.i32 %2107, 3
-  %2112 = mul i32 %2108, %2103
-  %2113 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2104, i32 %2109, i32 %2112)  ; IMad(a,b,c)
-  %2114 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2105, i32 %2110, i32 %2113)  ; IMad(a,b,c)
-  %2115 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2106, i32 %2111, i32 %2114)  ; IMad(a,b,c)
-  %2116 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %2115, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2117 = extractvalue %dx.types.ResRet.f32 %2116, 0
-  br label %2204
-
-; <label>:2118                                    ; preds = %2088
-  %2119 = icmp eq i32 %923, 2
-  br i1 %2119, label %2120, label %2204
-
-; <label>:2120                                    ; preds = %2118
-  %2121 = fsub fast float %22, %20
-  %2122 = fcmp fast olt float %920, %20
-  br i1 %2122, label %2123, label %2136
-
-; <label>:2123                                    ; preds = %2120
-  %2124 = fsub fast float %20, %920
-  %2125 = fdiv fast float %2124, %2121
-  %2126 = fptoui float %2125 to i32
-  %2127 = uitofp i32 %2126 to float
-  %2128 = fmul fast float %2127, %2121
-  %2129 = fsub fast float %2124, %2128
-  %2130 = and i32 %2126, 1
-  %2131 = icmp eq i32 %2130, 0
-  br i1 %2131, label %2132, label %2134
-
-; <label>:2132                                    ; preds = %2123
-  %2133 = fadd fast float %2129, %20
-  br label %2151
-
-; <label>:2134                                    ; preds = %2123
-  %2135 = fsub fast float %22, %2129
-  br label %2151
-
-; <label>:2136                                    ; preds = %2120
-  %2137 = fcmp fast ogt float %920, %22
-  br i1 %2137, label %2138, label %2151
-
-; <label>:2138                                    ; preds = %2136
-  %2139 = fsub fast float %920, %22
-  %2140 = fdiv fast float %2139, %2121
-  %2141 = fptoui float %2140 to i32
-  %2142 = uitofp i32 %2141 to float
-  %2143 = fmul fast float %2142, %2121
-  %2144 = fsub fast float %2139, %2143
-  %2145 = and i32 %2141, 1
-  %2146 = icmp eq i32 %2145, 0
-  br i1 %2146, label %2147, label %2149
-
-; <label>:2147                                    ; preds = %2138
-  %2148 = fsub fast float %22, %2144
-  br label %2151
-
-; <label>:2149                                    ; preds = %2138
-  %2150 = fadd fast float %2144, %20
-  br label %2151
-
-; <label>:2151                                    ; preds = %2149, %2147, %2136, %2134, %2132
-  %2152 = phi float [ %2133, %2132 ], [ %2135, %2134 ], [ %2148, %2147 ], [ %2150, %2149 ], [ %920, %2136 ]
-  %2153 = fptoui float %2152 to i32
-  %2154 = fsub fast float %24, %20
-  %2155 = fcmp fast olt float %2063, %20
-  br i1 %2155, label %2156, label %2169
-
-; <label>:2156                                    ; preds = %2151
-  %2157 = fsub fast float %20, %2063
-  %2158 = fdiv fast float %2157, %2154
-  %2159 = fptoui float %2158 to i32
-  %2160 = uitofp i32 %2159 to float
-  %2161 = fmul fast float %2160, %2154
-  %2162 = fsub fast float %2157, %2161
-  %2163 = and i32 %2159, 1
-  %2164 = icmp eq i32 %2163, 0
-  br i1 %2164, label %2165, label %2167
-
-; <label>:2165                                    ; preds = %2156
-  %2166 = fadd fast float %2162, %20
-  br label %2184
-
-; <label>:2167                                    ; preds = %2156
-  %2168 = fsub fast float %24, %2162
-  br label %2184
-
-; <label>:2169                                    ; preds = %2151
-  %2170 = fcmp fast ogt float %2063, %24
-  br i1 %2170, label %2171, label %2184
-
-; <label>:2171                                    ; preds = %2169
-  %2172 = fsub fast float %2063, %24
-  %2173 = fdiv fast float %2172, %2154
-  %2174 = fptoui float %2173 to i32
-  %2175 = uitofp i32 %2174 to float
-  %2176 = fmul fast float %2175, %2154
-  %2177 = fsub fast float %2172, %2176
-  %2178 = and i32 %2174, 1
-  %2179 = icmp eq i32 %2178, 0
-  br i1 %2179, label %2180, label %2182
-
-; <label>:2180                                    ; preds = %2171
-  %2181 = fsub fast float %24, %2177
-  br label %2184
-
-; <label>:2182                                    ; preds = %2171
-  %2183 = fadd fast float %2177, %20
-  br label %2184
-
-; <label>:2184                                    ; preds = %2182, %2180, %2169, %2167, %2165
-  %2185 = phi float [ %2166, %2165 ], [ %2168, %2167 ], [ %2181, %2180 ], [ %2183, %2182 ], [ %2063, %2169 ]
-  %2186 = fptoui float %2185 to i32
-  %2187 = uitofp i32 %2186 to float
-  %2188 = uitofp i32 %2153 to float
-  %2189 = fptoui float %45 to i32
-  %2190 = fptoui float %182 to i32
-  %2191 = fptoui float %2187 to i32
-  %2192 = fptoui float %2188 to i32
-  %2193 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2194 = extractvalue %dx.types.CBufRet.i32 %2193, 0
-  %2195 = extractvalue %dx.types.CBufRet.i32 %2193, 1
-  %2196 = extractvalue %dx.types.CBufRet.i32 %2193, 2
-  %2197 = extractvalue %dx.types.CBufRet.i32 %2193, 3
-  %2198 = mul i32 %2194, %2189
-  %2199 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2190, i32 %2195, i32 %2198)  ; IMad(a,b,c)
-  %2200 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2191, i32 %2196, i32 %2199)  ; IMad(a,b,c)
-  %2201 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2192, i32 %2197, i32 %2200)  ; IMad(a,b,c)
-  %2202 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %2201, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2203 = extractvalue %dx.types.ResRet.f32 %2202, 0
-  br label %2204
-
-; <label>:2204                                    ; preds = %2184, %2118, %2090, %2074, %2064
-  %2205 = phi float [ %2087, %2074 ], [ 0.000000e+00, %2064 ], [ %2117, %2090 ], [ %2203, %2184 ], [ 0.000000e+00, %2118 ]
-  br i1 %924, label %2206, label %2230
-
-; <label>:2206                                    ; preds = %2204
-  %2207 = fcmp fast oge float %919, 0.000000e+00
-  %2208 = fptoui float %919 to i32
-  %2209 = icmp ult i32 %2208, %13
-  %2210 = and i1 %2207, %2209
-  %2211 = fcmp fast oge float %2063, 0.000000e+00
-  %2212 = and i1 %2211, %2210
-  %2213 = fptoui float %2063 to i32
-  %2214 = icmp ult i32 %2213, %15
-  %2215 = and i1 %2214, %2212
-  br i1 %2215, label %2216, label %2346
-
-; <label>:2216                                    ; preds = %2206
-  %2217 = fptoui float %45 to i32
-  %2218 = fptoui float %182 to i32
-  %2219 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2220 = extractvalue %dx.types.CBufRet.i32 %2219, 0
-  %2221 = extractvalue %dx.types.CBufRet.i32 %2219, 1
-  %2222 = extractvalue %dx.types.CBufRet.i32 %2219, 2
-  %2223 = extractvalue %dx.types.CBufRet.i32 %2219, 3
-  %2224 = mul i32 %2220, %2217
-  %2225 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2218, i32 %2221, i32 %2224)  ; IMad(a,b,c)
-  %2226 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2213, i32 %2222, i32 %2225)  ; IMad(a,b,c)
-  %2227 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2208, i32 %2223, i32 %2226)  ; IMad(a,b,c)
-  %2228 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %2227, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2229 = extractvalue %dx.types.ResRet.f32 %2228, 0
-  br label %2346
-
-; <label>:2230                                    ; preds = %2204
-  %2231 = icmp eq i32 %923, 1
-  br i1 %2231, label %2232, label %2260
-
-; <label>:2232                                    ; preds = %2230
-  %2233 = add i32 %13, -1
-  %2234 = uitofp i32 %2233 to float
-  %2235 = call float @dx.op.binary.f32(i32 35, float %919, float 0.000000e+00)  ; FMax(a,b)
-  %2236 = call float @dx.op.binary.f32(i32 36, float %2235, float %2234)  ; FMin(a,b)
-  %2237 = fptoui float %2236 to i32
-  %2238 = add i32 %15, -1
-  %2239 = uitofp i32 %2238 to float
-  %2240 = call float @dx.op.binary.f32(i32 35, float %2063, float 0.000000e+00)  ; FMax(a,b)
-  %2241 = call float @dx.op.binary.f32(i32 36, float %2240, float %2239)  ; FMin(a,b)
-  %2242 = fptoui float %2241 to i32
-  %2243 = uitofp i32 %2242 to float
-  %2244 = uitofp i32 %2237 to float
-  %2245 = fptoui float %45 to i32
-  %2246 = fptoui float %182 to i32
-  %2247 = fptoui float %2243 to i32
-  %2248 = fptoui float %2244 to i32
-  %2249 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2250 = extractvalue %dx.types.CBufRet.i32 %2249, 0
-  %2251 = extractvalue %dx.types.CBufRet.i32 %2249, 1
-  %2252 = extractvalue %dx.types.CBufRet.i32 %2249, 2
-  %2253 = extractvalue %dx.types.CBufRet.i32 %2249, 3
-  %2254 = mul i32 %2250, %2245
-  %2255 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2246, i32 %2251, i32 %2254)  ; IMad(a,b,c)
-  %2256 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2247, i32 %2252, i32 %2255)  ; IMad(a,b,c)
-  %2257 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2248, i32 %2253, i32 %2256)  ; IMad(a,b,c)
-  %2258 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %2257, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2259 = extractvalue %dx.types.ResRet.f32 %2258, 0
-  br label %2346
-
-; <label>:2260                                    ; preds = %2230
-  %2261 = icmp eq i32 %923, 2
-  br i1 %2261, label %2262, label %2346
-
-; <label>:2262                                    ; preds = %2260
-  %2263 = fsub fast float %22, %20
-  %2264 = fcmp fast olt float %919, %20
-  br i1 %2264, label %2265, label %2278
-
-; <label>:2265                                    ; preds = %2262
-  %2266 = fsub fast float %20, %919
-  %2267 = fdiv fast float %2266, %2263
-  %2268 = fptoui float %2267 to i32
-  %2269 = uitofp i32 %2268 to float
-  %2270 = fmul fast float %2269, %2263
-  %2271 = fsub fast float %2266, %2270
-  %2272 = and i32 %2268, 1
-  %2273 = icmp eq i32 %2272, 0
-  br i1 %2273, label %2274, label %2276
-
-; <label>:2274                                    ; preds = %2265
-  %2275 = fadd fast float %2271, %20
-  br label %2293
-
-; <label>:2276                                    ; preds = %2265
-  %2277 = fsub fast float %22, %2271
-  br label %2293
-
-; <label>:2278                                    ; preds = %2262
-  %2279 = fcmp fast ogt float %919, %22
-  br i1 %2279, label %2280, label %2293
-
-; <label>:2280                                    ; preds = %2278
-  %2281 = fsub fast float %919, %22
-  %2282 = fdiv fast float %2281, %2263
-  %2283 = fptoui float %2282 to i32
-  %2284 = uitofp i32 %2283 to float
-  %2285 = fmul fast float %2284, %2263
-  %2286 = fsub fast float %2281, %2285
-  %2287 = and i32 %2283, 1
-  %2288 = icmp eq i32 %2287, 0
-  br i1 %2288, label %2289, label %2291
-
-; <label>:2289                                    ; preds = %2280
-  %2290 = fsub fast float %22, %2286
-  br label %2293
-
-; <label>:2291                                    ; preds = %2280
-  %2292 = fadd fast float %2286, %20
-  br label %2293
-
-; <label>:2293                                    ; preds = %2291, %2289, %2278, %2276, %2274
-  %2294 = phi float [ %2275, %2274 ], [ %2277, %2276 ], [ %2290, %2289 ], [ %2292, %2291 ], [ %919, %2278 ]
-  %2295 = fptoui float %2294 to i32
-  %2296 = fsub fast float %24, %20
-  %2297 = fcmp fast olt float %2063, %20
-  br i1 %2297, label %2298, label %2311
-
-; <label>:2298                                    ; preds = %2293
-  %2299 = fsub fast float %20, %2063
-  %2300 = fdiv fast float %2299, %2296
-  %2301 = fptoui float %2300 to i32
-  %2302 = uitofp i32 %2301 to float
-  %2303 = fmul fast float %2302, %2296
-  %2304 = fsub fast float %2299, %2303
-  %2305 = and i32 %2301, 1
-  %2306 = icmp eq i32 %2305, 0
-  br i1 %2306, label %2307, label %2309
-
-; <label>:2307                                    ; preds = %2298
-  %2308 = fadd fast float %2304, %20
-  br label %2326
-
-; <label>:2309                                    ; preds = %2298
-  %2310 = fsub fast float %24, %2304
-  br label %2326
-
-; <label>:2311                                    ; preds = %2293
-  %2312 = fcmp fast ogt float %2063, %24
-  br i1 %2312, label %2313, label %2326
-
-; <label>:2313                                    ; preds = %2311
-  %2314 = fsub fast float %2063, %24
-  %2315 = fdiv fast float %2314, %2296
-  %2316 = fptoui float %2315 to i32
-  %2317 = uitofp i32 %2316 to float
-  %2318 = fmul fast float %2317, %2296
-  %2319 = fsub fast float %2314, %2318
-  %2320 = and i32 %2316, 1
-  %2321 = icmp eq i32 %2320, 0
-  br i1 %2321, label %2322, label %2324
-
-; <label>:2322                                    ; preds = %2313
-  %2323 = fsub fast float %24, %2319
-  br label %2326
-
-; <label>:2324                                    ; preds = %2313
-  %2325 = fadd fast float %2319, %20
-  br label %2326
-
-; <label>:2326                                    ; preds = %2324, %2322, %2311, %2309, %2307
-  %2327 = phi float [ %2308, %2307 ], [ %2310, %2309 ], [ %2323, %2322 ], [ %2325, %2324 ], [ %2063, %2311 ]
-  %2328 = fptoui float %2327 to i32
-  %2329 = uitofp i32 %2328 to float
-  %2330 = uitofp i32 %2295 to float
-  %2331 = fptoui float %45 to i32
-  %2332 = fptoui float %182 to i32
-  %2333 = fptoui float %2329 to i32
-  %2334 = fptoui float %2330 to i32
-  %2335 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2336 = extractvalue %dx.types.CBufRet.i32 %2335, 0
-  %2337 = extractvalue %dx.types.CBufRet.i32 %2335, 1
-  %2338 = extractvalue %dx.types.CBufRet.i32 %2335, 2
-  %2339 = extractvalue %dx.types.CBufRet.i32 %2335, 3
-  %2340 = mul i32 %2336, %2331
-  %2341 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2332, i32 %2337, i32 %2340)  ; IMad(a,b,c)
-  %2342 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2333, i32 %2338, i32 %2341)  ; IMad(a,b,c)
-  %2343 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2334, i32 %2339, i32 %2342)  ; IMad(a,b,c)
-  %2344 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %2343, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2345 = extractvalue %dx.types.ResRet.f32 %2344, 0
-  br label %2346
-
-; <label>:2346                                    ; preds = %2326, %2260, %2232, %2216, %2206
-  %2347 = phi float [ %2229, %2216 ], [ 0.000000e+00, %2206 ], [ %2259, %2232 ], [ %2345, %2326 ], [ 0.000000e+00, %2260 ]
-  br i1 %924, label %2348, label %2372
-
-; <label>:2348                                    ; preds = %2346
-  %2349 = fcmp fast oge float %1209, 0.000000e+00
-  %2350 = fptoui float %1209 to i32
-  %2351 = icmp ult i32 %2350, %13
-  %2352 = and i1 %2349, %2351
-  %2353 = fcmp fast oge float %2063, 0.000000e+00
-  %2354 = and i1 %2353, %2352
-  %2355 = fptoui float %2063 to i32
-  %2356 = icmp ult i32 %2355, %15
-  %2357 = and i1 %2356, %2354
-  br i1 %2357, label %2358, label %2488
-
-; <label>:2358                                    ; preds = %2348
-  %2359 = fptoui float %45 to i32
-  %2360 = fptoui float %182 to i32
-  %2361 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2362 = extractvalue %dx.types.CBufRet.i32 %2361, 0
-  %2363 = extractvalue %dx.types.CBufRet.i32 %2361, 1
-  %2364 = extractvalue %dx.types.CBufRet.i32 %2361, 2
-  %2365 = extractvalue %dx.types.CBufRet.i32 %2361, 3
-  %2366 = mul i32 %2362, %2359
-  %2367 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2360, i32 %2363, i32 %2366)  ; IMad(a,b,c)
-  %2368 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2355, i32 %2364, i32 %2367)  ; IMad(a,b,c)
-  %2369 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2350, i32 %2365, i32 %2368)  ; IMad(a,b,c)
-  %2370 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %2369, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2371 = extractvalue %dx.types.ResRet.f32 %2370, 0
-  br label %2488
-
-; <label>:2372                                    ; preds = %2346
-  %2373 = icmp eq i32 %923, 1
-  br i1 %2373, label %2374, label %2402
-
-; <label>:2374                                    ; preds = %2372
-  %2375 = add i32 %13, -1
-  %2376 = uitofp i32 %2375 to float
-  %2377 = call float @dx.op.binary.f32(i32 35, float %1209, float 0.000000e+00)  ; FMax(a,b)
-  %2378 = call float @dx.op.binary.f32(i32 36, float %2377, float %2376)  ; FMin(a,b)
-  %2379 = fptoui float %2378 to i32
-  %2380 = add i32 %15, -1
-  %2381 = uitofp i32 %2380 to float
-  %2382 = call float @dx.op.binary.f32(i32 35, float %2063, float 0.000000e+00)  ; FMax(a,b)
-  %2383 = call float @dx.op.binary.f32(i32 36, float %2382, float %2381)  ; FMin(a,b)
-  %2384 = fptoui float %2383 to i32
-  %2385 = uitofp i32 %2384 to float
-  %2386 = uitofp i32 %2379 to float
-  %2387 = fptoui float %45 to i32
-  %2388 = fptoui float %182 to i32
-  %2389 = fptoui float %2385 to i32
-  %2390 = fptoui float %2386 to i32
-  %2391 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2392 = extractvalue %dx.types.CBufRet.i32 %2391, 0
-  %2393 = extractvalue %dx.types.CBufRet.i32 %2391, 1
-  %2394 = extractvalue %dx.types.CBufRet.i32 %2391, 2
-  %2395 = extractvalue %dx.types.CBufRet.i32 %2391, 3
-  %2396 = mul i32 %2392, %2387
-  %2397 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2388, i32 %2393, i32 %2396)  ; IMad(a,b,c)
-  %2398 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2389, i32 %2394, i32 %2397)  ; IMad(a,b,c)
-  %2399 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2390, i32 %2395, i32 %2398)  ; IMad(a,b,c)
-  %2400 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %2399, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2401 = extractvalue %dx.types.ResRet.f32 %2400, 0
-  br label %2488
-
-; <label>:2402                                    ; preds = %2372
-  %2403 = icmp eq i32 %923, 2
-  br i1 %2403, label %2404, label %2488
-
-; <label>:2404                                    ; preds = %2402
-  %2405 = fsub fast float %22, %20
-  %2406 = fcmp fast olt float %1209, %20
-  br i1 %2406, label %2407, label %2420
-
-; <label>:2407                                    ; preds = %2404
-  %2408 = fsub fast float %20, %1209
-  %2409 = fdiv fast float %2408, %2405
-  %2410 = fptoui float %2409 to i32
-  %2411 = uitofp i32 %2410 to float
-  %2412 = fmul fast float %2411, %2405
-  %2413 = fsub fast float %2408, %2412
-  %2414 = and i32 %2410, 1
-  %2415 = icmp eq i32 %2414, 0
-  br i1 %2415, label %2416, label %2418
-
-; <label>:2416                                    ; preds = %2407
-  %2417 = fadd fast float %2413, %20
-  br label %2435
-
-; <label>:2418                                    ; preds = %2407
-  %2419 = fsub fast float %22, %2413
-  br label %2435
-
-; <label>:2420                                    ; preds = %2404
-  %2421 = fcmp fast ogt float %1209, %22
-  br i1 %2421, label %2422, label %2435
-
-; <label>:2422                                    ; preds = %2420
-  %2423 = fsub fast float %1209, %22
-  %2424 = fdiv fast float %2423, %2405
-  %2425 = fptoui float %2424 to i32
-  %2426 = uitofp i32 %2425 to float
-  %2427 = fmul fast float %2426, %2405
-  %2428 = fsub fast float %2423, %2427
-  %2429 = and i32 %2425, 1
-  %2430 = icmp eq i32 %2429, 0
-  br i1 %2430, label %2431, label %2433
-
-; <label>:2431                                    ; preds = %2422
-  %2432 = fsub fast float %22, %2428
-  br label %2435
-
-; <label>:2433                                    ; preds = %2422
-  %2434 = fadd fast float %2428, %20
-  br label %2435
-
-; <label>:2435                                    ; preds = %2433, %2431, %2420, %2418, %2416
-  %2436 = phi float [ %2417, %2416 ], [ %2419, %2418 ], [ %2432, %2431 ], [ %2434, %2433 ], [ %1209, %2420 ]
-  %2437 = fptoui float %2436 to i32
-  %2438 = fsub fast float %24, %20
-  %2439 = fcmp fast olt float %2063, %20
-  br i1 %2439, label %2440, label %2453
-
-; <label>:2440                                    ; preds = %2435
-  %2441 = fsub fast float %20, %2063
-  %2442 = fdiv fast float %2441, %2438
-  %2443 = fptoui float %2442 to i32
-  %2444 = uitofp i32 %2443 to float
-  %2445 = fmul fast float %2444, %2438
-  %2446 = fsub fast float %2441, %2445
-  %2447 = and i32 %2443, 1
-  %2448 = icmp eq i32 %2447, 0
-  br i1 %2448, label %2449, label %2451
-
-; <label>:2449                                    ; preds = %2440
-  %2450 = fadd fast float %2446, %20
-  br label %2468
-
-; <label>:2451                                    ; preds = %2440
-  %2452 = fsub fast float %24, %2446
-  br label %2468
-
-; <label>:2453                                    ; preds = %2435
-  %2454 = fcmp fast ogt float %2063, %24
-  br i1 %2454, label %2455, label %2468
-
-; <label>:2455                                    ; preds = %2453
-  %2456 = fsub fast float %2063, %24
-  %2457 = fdiv fast float %2456, %2438
-  %2458 = fptoui float %2457 to i32
-  %2459 = uitofp i32 %2458 to float
-  %2460 = fmul fast float %2459, %2438
-  %2461 = fsub fast float %2456, %2460
-  %2462 = and i32 %2458, 1
-  %2463 = icmp eq i32 %2462, 0
-  br i1 %2463, label %2464, label %2466
-
-; <label>:2464                                    ; preds = %2455
-  %2465 = fsub fast float %24, %2461
-  br label %2468
-
-; <label>:2466                                    ; preds = %2455
-  %2467 = fadd fast float %2461, %20
-  br label %2468
-
-; <label>:2468                                    ; preds = %2466, %2464, %2453, %2451, %2449
-  %2469 = phi float [ %2450, %2449 ], [ %2452, %2451 ], [ %2465, %2464 ], [ %2467, %2466 ], [ %2063, %2453 ]
-  %2470 = fptoui float %2469 to i32
-  %2471 = uitofp i32 %2470 to float
-  %2472 = uitofp i32 %2437 to float
-  %2473 = fptoui float %45 to i32
-  %2474 = fptoui float %182 to i32
-  %2475 = fptoui float %2471 to i32
-  %2476 = fptoui float %2472 to i32
-  %2477 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2478 = extractvalue %dx.types.CBufRet.i32 %2477, 0
-  %2479 = extractvalue %dx.types.CBufRet.i32 %2477, 1
-  %2480 = extractvalue %dx.types.CBufRet.i32 %2477, 2
-  %2481 = extractvalue %dx.types.CBufRet.i32 %2477, 3
-  %2482 = mul i32 %2478, %2473
-  %2483 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2474, i32 %2479, i32 %2482)  ; IMad(a,b,c)
-  %2484 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2475, i32 %2480, i32 %2483)  ; IMad(a,b,c)
-  %2485 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2476, i32 %2481, i32 %2484)  ; IMad(a,b,c)
-  %2486 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %2485, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2487 = extractvalue %dx.types.ResRet.f32 %2486, 0
-  br label %2488
-
-; <label>:2488                                    ; preds = %2468, %2402, %2374, %2358, %2348
-  %2489 = phi float [ %2371, %2358 ], [ 0.000000e+00, %2348 ], [ %2401, %2374 ], [ %2487, %2468 ], [ 0.000000e+00, %2402 ]
-  br i1 %924, label %2490, label %2514
-
-; <label>:2490                                    ; preds = %2488
-  %2491 = fcmp fast oge float %1352, 0.000000e+00
-  %2492 = fptoui float %1352 to i32
-  %2493 = icmp ult i32 %2492, %13
-  %2494 = and i1 %2491, %2493
-  %2495 = fcmp fast oge float %2063, 0.000000e+00
-  %2496 = and i1 %2495, %2494
-  %2497 = fptoui float %2063 to i32
-  %2498 = icmp ult i32 %2497, %15
-  %2499 = and i1 %2498, %2496
-  br i1 %2499, label %2500, label %2630
-
-; <label>:2500                                    ; preds = %2490
-  %2501 = fptoui float %45 to i32
-  %2502 = fptoui float %182 to i32
-  %2503 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2504 = extractvalue %dx.types.CBufRet.i32 %2503, 0
-  %2505 = extractvalue %dx.types.CBufRet.i32 %2503, 1
-  %2506 = extractvalue %dx.types.CBufRet.i32 %2503, 2
-  %2507 = extractvalue %dx.types.CBufRet.i32 %2503, 3
-  %2508 = mul i32 %2504, %2501
-  %2509 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2502, i32 %2505, i32 %2508)  ; IMad(a,b,c)
-  %2510 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2497, i32 %2506, i32 %2509)  ; IMad(a,b,c)
-  %2511 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2492, i32 %2507, i32 %2510)  ; IMad(a,b,c)
-  %2512 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %2511, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2513 = extractvalue %dx.types.ResRet.f32 %2512, 0
-  br label %2630
-
-; <label>:2514                                    ; preds = %2488
-  %2515 = icmp eq i32 %923, 1
-  br i1 %2515, label %2516, label %2544
-
-; <label>:2516                                    ; preds = %2514
-  %2517 = add i32 %13, -1
-  %2518 = uitofp i32 %2517 to float
-  %2519 = call float @dx.op.binary.f32(i32 35, float %1352, float 0.000000e+00)  ; FMax(a,b)
-  %2520 = call float @dx.op.binary.f32(i32 36, float %2519, float %2518)  ; FMin(a,b)
-  %2521 = fptoui float %2520 to i32
-  %2522 = add i32 %15, -1
-  %2523 = uitofp i32 %2522 to float
-  %2524 = call float @dx.op.binary.f32(i32 35, float %2063, float 0.000000e+00)  ; FMax(a,b)
-  %2525 = call float @dx.op.binary.f32(i32 36, float %2524, float %2523)  ; FMin(a,b)
-  %2526 = fptoui float %2525 to i32
-  %2527 = uitofp i32 %2526 to float
-  %2528 = uitofp i32 %2521 to float
-  %2529 = fptoui float %45 to i32
-  %2530 = fptoui float %182 to i32
-  %2531 = fptoui float %2527 to i32
-  %2532 = fptoui float %2528 to i32
-  %2533 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2534 = extractvalue %dx.types.CBufRet.i32 %2533, 0
-  %2535 = extractvalue %dx.types.CBufRet.i32 %2533, 1
-  %2536 = extractvalue %dx.types.CBufRet.i32 %2533, 2
-  %2537 = extractvalue %dx.types.CBufRet.i32 %2533, 3
-  %2538 = mul i32 %2534, %2529
-  %2539 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2530, i32 %2535, i32 %2538)  ; IMad(a,b,c)
-  %2540 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2531, i32 %2536, i32 %2539)  ; IMad(a,b,c)
-  %2541 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2532, i32 %2537, i32 %2540)  ; IMad(a,b,c)
-  %2542 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %2541, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2543 = extractvalue %dx.types.ResRet.f32 %2542, 0
-  br label %2630
-
-; <label>:2544                                    ; preds = %2514
-  %2545 = icmp eq i32 %923, 2
-  br i1 %2545, label %2546, label %2630
-
-; <label>:2546                                    ; preds = %2544
-  %2547 = fsub fast float %22, %20
-  %2548 = fcmp fast olt float %1352, %20
-  br i1 %2548, label %2549, label %2562
-
-; <label>:2549                                    ; preds = %2546
-  %2550 = fsub fast float %20, %1352
-  %2551 = fdiv fast float %2550, %2547
-  %2552 = fptoui float %2551 to i32
-  %2553 = uitofp i32 %2552 to float
-  %2554 = fmul fast float %2553, %2547
-  %2555 = fsub fast float %2550, %2554
-  %2556 = and i32 %2552, 1
-  %2557 = icmp eq i32 %2556, 0
-  br i1 %2557, label %2558, label %2560
-
-; <label>:2558                                    ; preds = %2549
-  %2559 = fadd fast float %2555, %20
-  br label %2577
-
-; <label>:2560                                    ; preds = %2549
-  %2561 = fsub fast float %22, %2555
-  br label %2577
-
-; <label>:2562                                    ; preds = %2546
-  %2563 = fcmp fast ogt float %1352, %22
-  br i1 %2563, label %2564, label %2577
-
-; <label>:2564                                    ; preds = %2562
-  %2565 = fsub fast float %1352, %22
-  %2566 = fdiv fast float %2565, %2547
-  %2567 = fptoui float %2566 to i32
-  %2568 = uitofp i32 %2567 to float
-  %2569 = fmul fast float %2568, %2547
-  %2570 = fsub fast float %2565, %2569
-  %2571 = and i32 %2567, 1
-  %2572 = icmp eq i32 %2571, 0
-  br i1 %2572, label %2573, label %2575
-
-; <label>:2573                                    ; preds = %2564
-  %2574 = fsub fast float %22, %2570
-  br label %2577
-
-; <label>:2575                                    ; preds = %2564
-  %2576 = fadd fast float %2570, %20
-  br label %2577
-
-; <label>:2577                                    ; preds = %2575, %2573, %2562, %2560, %2558
-  %2578 = phi float [ %2559, %2558 ], [ %2561, %2560 ], [ %2574, %2573 ], [ %2576, %2575 ], [ %1352, %2562 ]
-  %2579 = fptoui float %2578 to i32
-  %2580 = fsub fast float %24, %20
-  %2581 = fcmp fast olt float %2063, %20
-  br i1 %2581, label %2582, label %2595
-
-; <label>:2582                                    ; preds = %2577
-  %2583 = fsub fast float %20, %2063
-  %2584 = fdiv fast float %2583, %2580
-  %2585 = fptoui float %2584 to i32
-  %2586 = uitofp i32 %2585 to float
-  %2587 = fmul fast float %2586, %2580
-  %2588 = fsub fast float %2583, %2587
-  %2589 = and i32 %2585, 1
-  %2590 = icmp eq i32 %2589, 0
-  br i1 %2590, label %2591, label %2593
-
-; <label>:2591                                    ; preds = %2582
-  %2592 = fadd fast float %2588, %20
-  br label %2610
-
-; <label>:2593                                    ; preds = %2582
-  %2594 = fsub fast float %24, %2588
-  br label %2610
-
-; <label>:2595                                    ; preds = %2577
-  %2596 = fcmp fast ogt float %2063, %24
-  br i1 %2596, label %2597, label %2610
-
-; <label>:2597                                    ; preds = %2595
-  %2598 = fsub fast float %2063, %24
-  %2599 = fdiv fast float %2598, %2580
-  %2600 = fptoui float %2599 to i32
-  %2601 = uitofp i32 %2600 to float
-  %2602 = fmul fast float %2601, %2580
-  %2603 = fsub fast float %2598, %2602
-  %2604 = and i32 %2600, 1
-  %2605 = icmp eq i32 %2604, 0
-  br i1 %2605, label %2606, label %2608
-
-; <label>:2606                                    ; preds = %2597
-  %2607 = fsub fast float %24, %2603
-  br label %2610
-
-; <label>:2608                                    ; preds = %2597
-  %2609 = fadd fast float %2603, %20
-  br label %2610
-
-; <label>:2610                                    ; preds = %2608, %2606, %2595, %2593, %2591
-  %2611 = phi float [ %2592, %2591 ], [ %2594, %2593 ], [ %2607, %2606 ], [ %2609, %2608 ], [ %2063, %2595 ]
-  %2612 = fptoui float %2611 to i32
-  %2613 = uitofp i32 %2612 to float
-  %2614 = uitofp i32 %2579 to float
-  %2615 = fptoui float %45 to i32
-  %2616 = fptoui float %182 to i32
-  %2617 = fptoui float %2613 to i32
-  %2618 = fptoui float %2614 to i32
-  %2619 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2620 = extractvalue %dx.types.CBufRet.i32 %2619, 0
-  %2621 = extractvalue %dx.types.CBufRet.i32 %2619, 1
-  %2622 = extractvalue %dx.types.CBufRet.i32 %2619, 2
-  %2623 = extractvalue %dx.types.CBufRet.i32 %2619, 3
-  %2624 = mul i32 %2620, %2615
-  %2625 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2616, i32 %2621, i32 %2624)  ; IMad(a,b,c)
-  %2626 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2617, i32 %2622, i32 %2625)  ; IMad(a,b,c)
-  %2627 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2618, i32 %2623, i32 %2626)  ; IMad(a,b,c)
-  %2628 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %2627, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2629 = extractvalue %dx.types.ResRet.f32 %2628, 0
-  br label %2630
-
-; <label>:2630                                    ; preds = %2610, %2544, %2516, %2500, %2490
-  %2631 = phi float [ %2513, %2500 ], [ 0.000000e+00, %2490 ], [ %2543, %2516 ], [ %2629, %2610 ], [ 0.000000e+00, %2544 ]
-  %2632 = fadd fast float %921, 2.000000e+00
-  br i1 %924, label %2633, label %2657
-
-; <label>:2633                                    ; preds = %2630
-  %2634 = fcmp fast oge float %920, 0.000000e+00
-  %2635 = fptoui float %920 to i32
-  %2636 = icmp ult i32 %2635, %13
-  %2637 = and i1 %2634, %2636
-  %2638 = fcmp fast oge float %2632, 0.000000e+00
-  %2639 = and i1 %2638, %2637
-  %2640 = fptoui float %2632 to i32
-  %2641 = icmp ult i32 %2640, %15
-  %2642 = and i1 %2641, %2639
-  br i1 %2642, label %2643, label %2773
-
-; <label>:2643                                    ; preds = %2633
-  %2644 = fptoui float %45 to i32
-  %2645 = fptoui float %182 to i32
-  %2646 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2647 = extractvalue %dx.types.CBufRet.i32 %2646, 0
-  %2648 = extractvalue %dx.types.CBufRet.i32 %2646, 1
-  %2649 = extractvalue %dx.types.CBufRet.i32 %2646, 2
-  %2650 = extractvalue %dx.types.CBufRet.i32 %2646, 3
-  %2651 = mul i32 %2647, %2644
-  %2652 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2645, i32 %2648, i32 %2651)  ; IMad(a,b,c)
-  %2653 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2640, i32 %2649, i32 %2652)  ; IMad(a,b,c)
-  %2654 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2635, i32 %2650, i32 %2653)  ; IMad(a,b,c)
-  %2655 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %2654, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2656 = extractvalue %dx.types.ResRet.f32 %2655, 0
-  br label %2773
-
-; <label>:2657                                    ; preds = %2630
-  %2658 = icmp eq i32 %923, 1
-  br i1 %2658, label %2659, label %2687
-
-; <label>:2659                                    ; preds = %2657
-  %2660 = add i32 %13, -1
-  %2661 = uitofp i32 %2660 to float
-  %2662 = call float @dx.op.binary.f32(i32 35, float %920, float 0.000000e+00)  ; FMax(a,b)
-  %2663 = call float @dx.op.binary.f32(i32 36, float %2662, float %2661)  ; FMin(a,b)
-  %2664 = fptoui float %2663 to i32
-  %2665 = add i32 %15, -1
-  %2666 = uitofp i32 %2665 to float
-  %2667 = call float @dx.op.binary.f32(i32 35, float %2632, float 0.000000e+00)  ; FMax(a,b)
-  %2668 = call float @dx.op.binary.f32(i32 36, float %2667, float %2666)  ; FMin(a,b)
-  %2669 = fptoui float %2668 to i32
-  %2670 = uitofp i32 %2669 to float
-  %2671 = uitofp i32 %2664 to float
-  %2672 = fptoui float %45 to i32
-  %2673 = fptoui float %182 to i32
-  %2674 = fptoui float %2670 to i32
-  %2675 = fptoui float %2671 to i32
-  %2676 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2677 = extractvalue %dx.types.CBufRet.i32 %2676, 0
-  %2678 = extractvalue %dx.types.CBufRet.i32 %2676, 1
-  %2679 = extractvalue %dx.types.CBufRet.i32 %2676, 2
-  %2680 = extractvalue %dx.types.CBufRet.i32 %2676, 3
-  %2681 = mul i32 %2677, %2672
-  %2682 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2673, i32 %2678, i32 %2681)  ; IMad(a,b,c)
-  %2683 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2674, i32 %2679, i32 %2682)  ; IMad(a,b,c)
-  %2684 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2675, i32 %2680, i32 %2683)  ; IMad(a,b,c)
-  %2685 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %2684, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2686 = extractvalue %dx.types.ResRet.f32 %2685, 0
-  br label %2773
-
-; <label>:2687                                    ; preds = %2657
-  %2688 = icmp eq i32 %923, 2
-  br i1 %2688, label %2689, label %2773
-
-; <label>:2689                                    ; preds = %2687
-  %2690 = fsub fast float %22, %20
-  %2691 = fcmp fast olt float %920, %20
-  br i1 %2691, label %2692, label %2705
-
-; <label>:2692                                    ; preds = %2689
-  %2693 = fsub fast float %20, %920
-  %2694 = fdiv fast float %2693, %2690
-  %2695 = fptoui float %2694 to i32
-  %2696 = uitofp i32 %2695 to float
-  %2697 = fmul fast float %2696, %2690
-  %2698 = fsub fast float %2693, %2697
-  %2699 = and i32 %2695, 1
-  %2700 = icmp eq i32 %2699, 0
-  br i1 %2700, label %2701, label %2703
-
-; <label>:2701                                    ; preds = %2692
-  %2702 = fadd fast float %2698, %20
-  br label %2720
-
-; <label>:2703                                    ; preds = %2692
-  %2704 = fsub fast float %22, %2698
-  br label %2720
-
-; <label>:2705                                    ; preds = %2689
-  %2706 = fcmp fast ogt float %920, %22
-  br i1 %2706, label %2707, label %2720
-
-; <label>:2707                                    ; preds = %2705
-  %2708 = fsub fast float %920, %22
-  %2709 = fdiv fast float %2708, %2690
-  %2710 = fptoui float %2709 to i32
-  %2711 = uitofp i32 %2710 to float
-  %2712 = fmul fast float %2711, %2690
-  %2713 = fsub fast float %2708, %2712
-  %2714 = and i32 %2710, 1
-  %2715 = icmp eq i32 %2714, 0
-  br i1 %2715, label %2716, label %2718
-
-; <label>:2716                                    ; preds = %2707
-  %2717 = fsub fast float %22, %2713
-  br label %2720
-
-; <label>:2718                                    ; preds = %2707
-  %2719 = fadd fast float %2713, %20
-  br label %2720
-
-; <label>:2720                                    ; preds = %2718, %2716, %2705, %2703, %2701
-  %2721 = phi float [ %2702, %2701 ], [ %2704, %2703 ], [ %2717, %2716 ], [ %2719, %2718 ], [ %920, %2705 ]
-  %2722 = fptoui float %2721 to i32
-  %2723 = fsub fast float %24, %20
-  %2724 = fcmp fast olt float %2632, %20
-  br i1 %2724, label %2725, label %2738
-
-; <label>:2725                                    ; preds = %2720
-  %2726 = fsub fast float %20, %2632
-  %2727 = fdiv fast float %2726, %2723
-  %2728 = fptoui float %2727 to i32
-  %2729 = uitofp i32 %2728 to float
-  %2730 = fmul fast float %2729, %2723
-  %2731 = fsub fast float %2726, %2730
-  %2732 = and i32 %2728, 1
-  %2733 = icmp eq i32 %2732, 0
-  br i1 %2733, label %2734, label %2736
-
-; <label>:2734                                    ; preds = %2725
-  %2735 = fadd fast float %2731, %20
-  br label %2753
-
-; <label>:2736                                    ; preds = %2725
-  %2737 = fsub fast float %24, %2731
-  br label %2753
-
-; <label>:2738                                    ; preds = %2720
-  %2739 = fcmp fast ogt float %2632, %24
-  br i1 %2739, label %2740, label %2753
-
-; <label>:2740                                    ; preds = %2738
-  %2741 = fsub fast float %2632, %24
-  %2742 = fdiv fast float %2741, %2723
-  %2743 = fptoui float %2742 to i32
-  %2744 = uitofp i32 %2743 to float
-  %2745 = fmul fast float %2744, %2723
-  %2746 = fsub fast float %2741, %2745
-  %2747 = and i32 %2743, 1
-  %2748 = icmp eq i32 %2747, 0
-  br i1 %2748, label %2749, label %2751
-
-; <label>:2749                                    ; preds = %2740
-  %2750 = fsub fast float %24, %2746
-  br label %2753
-
-; <label>:2751                                    ; preds = %2740
-  %2752 = fadd fast float %2746, %20
-  br label %2753
-
-; <label>:2753                                    ; preds = %2751, %2749, %2738, %2736, %2734
-  %2754 = phi float [ %2735, %2734 ], [ %2737, %2736 ], [ %2750, %2749 ], [ %2752, %2751 ], [ %2632, %2738 ]
-  %2755 = fptoui float %2754 to i32
-  %2756 = uitofp i32 %2755 to float
-  %2757 = uitofp i32 %2722 to float
-  %2758 = fptoui float %45 to i32
-  %2759 = fptoui float %182 to i32
-  %2760 = fptoui float %2756 to i32
-  %2761 = fptoui float %2757 to i32
-  %2762 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2763 = extractvalue %dx.types.CBufRet.i32 %2762, 0
-  %2764 = extractvalue %dx.types.CBufRet.i32 %2762, 1
-  %2765 = extractvalue %dx.types.CBufRet.i32 %2762, 2
-  %2766 = extractvalue %dx.types.CBufRet.i32 %2762, 3
-  %2767 = mul i32 %2763, %2758
-  %2768 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2759, i32 %2764, i32 %2767)  ; IMad(a,b,c)
-  %2769 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2760, i32 %2765, i32 %2768)  ; IMad(a,b,c)
-  %2770 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2761, i32 %2766, i32 %2769)  ; IMad(a,b,c)
-  %2771 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %2770, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2772 = extractvalue %dx.types.ResRet.f32 %2771, 0
-  br label %2773
-
-; <label>:2773                                    ; preds = %2753, %2687, %2659, %2643, %2633
-  %2774 = phi float [ %2656, %2643 ], [ 0.000000e+00, %2633 ], [ %2686, %2659 ], [ %2772, %2753 ], [ 0.000000e+00, %2687 ]
-  br i1 %924, label %2775, label %2799
-
-; <label>:2775                                    ; preds = %2773
-  %2776 = fcmp fast oge float %919, 0.000000e+00
-  %2777 = fptoui float %919 to i32
-  %2778 = icmp ult i32 %2777, %13
-  %2779 = and i1 %2776, %2778
-  %2780 = fcmp fast oge float %2632, 0.000000e+00
-  %2781 = and i1 %2780, %2779
-  %2782 = fptoui float %2632 to i32
-  %2783 = icmp ult i32 %2782, %15
-  %2784 = and i1 %2783, %2781
-  br i1 %2784, label %2785, label %2915
-
-; <label>:2785                                    ; preds = %2775
-  %2786 = fptoui float %45 to i32
-  %2787 = fptoui float %182 to i32
-  %2788 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2789 = extractvalue %dx.types.CBufRet.i32 %2788, 0
-  %2790 = extractvalue %dx.types.CBufRet.i32 %2788, 1
-  %2791 = extractvalue %dx.types.CBufRet.i32 %2788, 2
-  %2792 = extractvalue %dx.types.CBufRet.i32 %2788, 3
-  %2793 = mul i32 %2789, %2786
-  %2794 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2787, i32 %2790, i32 %2793)  ; IMad(a,b,c)
-  %2795 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2782, i32 %2791, i32 %2794)  ; IMad(a,b,c)
-  %2796 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2777, i32 %2792, i32 %2795)  ; IMad(a,b,c)
-  %2797 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %2796, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2798 = extractvalue %dx.types.ResRet.f32 %2797, 0
-  br label %2915
-
-; <label>:2799                                    ; preds = %2773
-  %2800 = icmp eq i32 %923, 1
-  br i1 %2800, label %2801, label %2829
-
-; <label>:2801                                    ; preds = %2799
-  %2802 = add i32 %13, -1
-  %2803 = uitofp i32 %2802 to float
-  %2804 = call float @dx.op.binary.f32(i32 35, float %919, float 0.000000e+00)  ; FMax(a,b)
-  %2805 = call float @dx.op.binary.f32(i32 36, float %2804, float %2803)  ; FMin(a,b)
-  %2806 = fptoui float %2805 to i32
-  %2807 = add i32 %15, -1
-  %2808 = uitofp i32 %2807 to float
-  %2809 = call float @dx.op.binary.f32(i32 35, float %2632, float 0.000000e+00)  ; FMax(a,b)
-  %2810 = call float @dx.op.binary.f32(i32 36, float %2809, float %2808)  ; FMin(a,b)
-  %2811 = fptoui float %2810 to i32
-  %2812 = uitofp i32 %2811 to float
-  %2813 = uitofp i32 %2806 to float
-  %2814 = fptoui float %45 to i32
-  %2815 = fptoui float %182 to i32
-  %2816 = fptoui float %2812 to i32
-  %2817 = fptoui float %2813 to i32
-  %2818 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2819 = extractvalue %dx.types.CBufRet.i32 %2818, 0
-  %2820 = extractvalue %dx.types.CBufRet.i32 %2818, 1
-  %2821 = extractvalue %dx.types.CBufRet.i32 %2818, 2
-  %2822 = extractvalue %dx.types.CBufRet.i32 %2818, 3
-  %2823 = mul i32 %2819, %2814
-  %2824 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2815, i32 %2820, i32 %2823)  ; IMad(a,b,c)
-  %2825 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2816, i32 %2821, i32 %2824)  ; IMad(a,b,c)
-  %2826 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2817, i32 %2822, i32 %2825)  ; IMad(a,b,c)
-  %2827 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %2826, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2828 = extractvalue %dx.types.ResRet.f32 %2827, 0
-  br label %2915
-
-; <label>:2829                                    ; preds = %2799
-  %2830 = icmp eq i32 %923, 2
-  br i1 %2830, label %2831, label %2915
-
-; <label>:2831                                    ; preds = %2829
-  %2832 = fsub fast float %22, %20
-  %2833 = fcmp fast olt float %919, %20
-  br i1 %2833, label %2834, label %2847
-
-; <label>:2834                                    ; preds = %2831
-  %2835 = fsub fast float %20, %919
-  %2836 = fdiv fast float %2835, %2832
-  %2837 = fptoui float %2836 to i32
-  %2838 = uitofp i32 %2837 to float
-  %2839 = fmul fast float %2838, %2832
-  %2840 = fsub fast float %2835, %2839
-  %2841 = and i32 %2837, 1
-  %2842 = icmp eq i32 %2841, 0
-  br i1 %2842, label %2843, label %2845
-
-; <label>:2843                                    ; preds = %2834
-  %2844 = fadd fast float %2840, %20
-  br label %2862
-
-; <label>:2845                                    ; preds = %2834
-  %2846 = fsub fast float %22, %2840
-  br label %2862
-
-; <label>:2847                                    ; preds = %2831
-  %2848 = fcmp fast ogt float %919, %22
-  br i1 %2848, label %2849, label %2862
-
-; <label>:2849                                    ; preds = %2847
-  %2850 = fsub fast float %919, %22
-  %2851 = fdiv fast float %2850, %2832
-  %2852 = fptoui float %2851 to i32
-  %2853 = uitofp i32 %2852 to float
-  %2854 = fmul fast float %2853, %2832
-  %2855 = fsub fast float %2850, %2854
-  %2856 = and i32 %2852, 1
-  %2857 = icmp eq i32 %2856, 0
-  br i1 %2857, label %2858, label %2860
-
-; <label>:2858                                    ; preds = %2849
-  %2859 = fsub fast float %22, %2855
-  br label %2862
-
-; <label>:2860                                    ; preds = %2849
-  %2861 = fadd fast float %2855, %20
-  br label %2862
-
-; <label>:2862                                    ; preds = %2860, %2858, %2847, %2845, %2843
-  %2863 = phi float [ %2844, %2843 ], [ %2846, %2845 ], [ %2859, %2858 ], [ %2861, %2860 ], [ %919, %2847 ]
-  %2864 = fptoui float %2863 to i32
-  %2865 = fsub fast float %24, %20
-  %2866 = fcmp fast olt float %2632, %20
-  br i1 %2866, label %2867, label %2880
-
-; <label>:2867                                    ; preds = %2862
-  %2868 = fsub fast float %20, %2632
-  %2869 = fdiv fast float %2868, %2865
-  %2870 = fptoui float %2869 to i32
-  %2871 = uitofp i32 %2870 to float
-  %2872 = fmul fast float %2871, %2865
-  %2873 = fsub fast float %2868, %2872
-  %2874 = and i32 %2870, 1
-  %2875 = icmp eq i32 %2874, 0
-  br i1 %2875, label %2876, label %2878
-
-; <label>:2876                                    ; preds = %2867
-  %2877 = fadd fast float %2873, %20
-  br label %2895
-
-; <label>:2878                                    ; preds = %2867
-  %2879 = fsub fast float %24, %2873
-  br label %2895
-
-; <label>:2880                                    ; preds = %2862
-  %2881 = fcmp fast ogt float %2632, %24
-  br i1 %2881, label %2882, label %2895
-
-; <label>:2882                                    ; preds = %2880
-  %2883 = fsub fast float %2632, %24
-  %2884 = fdiv fast float %2883, %2865
-  %2885 = fptoui float %2884 to i32
-  %2886 = uitofp i32 %2885 to float
-  %2887 = fmul fast float %2886, %2865
-  %2888 = fsub fast float %2883, %2887
-  %2889 = and i32 %2885, 1
-  %2890 = icmp eq i32 %2889, 0
-  br i1 %2890, label %2891, label %2893
-
-; <label>:2891                                    ; preds = %2882
-  %2892 = fsub fast float %24, %2888
-  br label %2895
-
-; <label>:2893                                    ; preds = %2882
-  %2894 = fadd fast float %2888, %20
-  br label %2895
-
-; <label>:2895                                    ; preds = %2893, %2891, %2880, %2878, %2876
-  %2896 = phi float [ %2877, %2876 ], [ %2879, %2878 ], [ %2892, %2891 ], [ %2894, %2893 ], [ %2632, %2880 ]
-  %2897 = fptoui float %2896 to i32
-  %2898 = uitofp i32 %2897 to float
-  %2899 = uitofp i32 %2864 to float
-  %2900 = fptoui float %45 to i32
-  %2901 = fptoui float %182 to i32
-  %2902 = fptoui float %2898 to i32
-  %2903 = fptoui float %2899 to i32
-  %2904 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2905 = extractvalue %dx.types.CBufRet.i32 %2904, 0
-  %2906 = extractvalue %dx.types.CBufRet.i32 %2904, 1
-  %2907 = extractvalue %dx.types.CBufRet.i32 %2904, 2
-  %2908 = extractvalue %dx.types.CBufRet.i32 %2904, 3
-  %2909 = mul i32 %2905, %2900
-  %2910 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2901, i32 %2906, i32 %2909)  ; IMad(a,b,c)
-  %2911 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2902, i32 %2907, i32 %2910)  ; IMad(a,b,c)
-  %2912 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2903, i32 %2908, i32 %2911)  ; IMad(a,b,c)
-  %2913 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %2912, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2914 = extractvalue %dx.types.ResRet.f32 %2913, 0
-  br label %2915
-
-; <label>:2915                                    ; preds = %2895, %2829, %2801, %2785, %2775
-  %2916 = phi float [ %2798, %2785 ], [ 0.000000e+00, %2775 ], [ %2828, %2801 ], [ %2914, %2895 ], [ 0.000000e+00, %2829 ]
-  br i1 %924, label %2917, label %2941
-
-; <label>:2917                                    ; preds = %2915
-  %2918 = fcmp fast oge float %1209, 0.000000e+00
-  %2919 = fptoui float %1209 to i32
-  %2920 = icmp ult i32 %2919, %13
-  %2921 = and i1 %2918, %2920
-  %2922 = fcmp fast oge float %2632, 0.000000e+00
-  %2923 = and i1 %2922, %2921
-  %2924 = fptoui float %2632 to i32
-  %2925 = icmp ult i32 %2924, %15
-  %2926 = and i1 %2925, %2923
-  br i1 %2926, label %2927, label %3057
-
-; <label>:2927                                    ; preds = %2917
-  %2928 = fptoui float %45 to i32
-  %2929 = fptoui float %182 to i32
-  %2930 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2931 = extractvalue %dx.types.CBufRet.i32 %2930, 0
-  %2932 = extractvalue %dx.types.CBufRet.i32 %2930, 1
-  %2933 = extractvalue %dx.types.CBufRet.i32 %2930, 2
-  %2934 = extractvalue %dx.types.CBufRet.i32 %2930, 3
-  %2935 = mul i32 %2931, %2928
-  %2936 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2929, i32 %2932, i32 %2935)  ; IMad(a,b,c)
-  %2937 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2924, i32 %2933, i32 %2936)  ; IMad(a,b,c)
-  %2938 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2919, i32 %2934, i32 %2937)  ; IMad(a,b,c)
-  %2939 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %2938, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2940 = extractvalue %dx.types.ResRet.f32 %2939, 0
-  br label %3057
-
-; <label>:2941                                    ; preds = %2915
-  %2942 = icmp eq i32 %923, 1
-  br i1 %2942, label %2943, label %2971
-
-; <label>:2943                                    ; preds = %2941
-  %2944 = add i32 %13, -1
-  %2945 = uitofp i32 %2944 to float
-  %2946 = call float @dx.op.binary.f32(i32 35, float %1209, float 0.000000e+00)  ; FMax(a,b)
-  %2947 = call float @dx.op.binary.f32(i32 36, float %2946, float %2945)  ; FMin(a,b)
-  %2948 = fptoui float %2947 to i32
-  %2949 = add i32 %15, -1
-  %2950 = uitofp i32 %2949 to float
-  %2951 = call float @dx.op.binary.f32(i32 35, float %2632, float 0.000000e+00)  ; FMax(a,b)
-  %2952 = call float @dx.op.binary.f32(i32 36, float %2951, float %2950)  ; FMin(a,b)
-  %2953 = fptoui float %2952 to i32
-  %2954 = uitofp i32 %2953 to float
-  %2955 = uitofp i32 %2948 to float
-  %2956 = fptoui float %45 to i32
-  %2957 = fptoui float %182 to i32
-  %2958 = fptoui float %2954 to i32
-  %2959 = fptoui float %2955 to i32
-  %2960 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2961 = extractvalue %dx.types.CBufRet.i32 %2960, 0
-  %2962 = extractvalue %dx.types.CBufRet.i32 %2960, 1
-  %2963 = extractvalue %dx.types.CBufRet.i32 %2960, 2
-  %2964 = extractvalue %dx.types.CBufRet.i32 %2960, 3
-  %2965 = mul i32 %2961, %2956
-  %2966 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2957, i32 %2962, i32 %2965)  ; IMad(a,b,c)
-  %2967 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2958, i32 %2963, i32 %2966)  ; IMad(a,b,c)
-  %2968 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2959, i32 %2964, i32 %2967)  ; IMad(a,b,c)
-  %2969 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %2968, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2970 = extractvalue %dx.types.ResRet.f32 %2969, 0
-  br label %3057
-
-; <label>:2971                                    ; preds = %2941
-  %2972 = icmp eq i32 %923, 2
-  br i1 %2972, label %2973, label %3057
-
-; <label>:2973                                    ; preds = %2971
-  %2974 = fsub fast float %22, %20
-  %2975 = fcmp fast olt float %1209, %20
-  br i1 %2975, label %2976, label %2989
-
-; <label>:2976                                    ; preds = %2973
-  %2977 = fsub fast float %20, %1209
-  %2978 = fdiv fast float %2977, %2974
-  %2979 = fptoui float %2978 to i32
-  %2980 = uitofp i32 %2979 to float
-  %2981 = fmul fast float %2980, %2974
-  %2982 = fsub fast float %2977, %2981
-  %2983 = and i32 %2979, 1
-  %2984 = icmp eq i32 %2983, 0
-  br i1 %2984, label %2985, label %2987
-
-; <label>:2985                                    ; preds = %2976
-  %2986 = fadd fast float %2982, %20
-  br label %3004
-
-; <label>:2987                                    ; preds = %2976
-  %2988 = fsub fast float %22, %2982
-  br label %3004
-
-; <label>:2989                                    ; preds = %2973
-  %2990 = fcmp fast ogt float %1209, %22
-  br i1 %2990, label %2991, label %3004
-
-; <label>:2991                                    ; preds = %2989
-  %2992 = fsub fast float %1209, %22
-  %2993 = fdiv fast float %2992, %2974
-  %2994 = fptoui float %2993 to i32
-  %2995 = uitofp i32 %2994 to float
-  %2996 = fmul fast float %2995, %2974
-  %2997 = fsub fast float %2992, %2996
-  %2998 = and i32 %2994, 1
-  %2999 = icmp eq i32 %2998, 0
-  br i1 %2999, label %3000, label %3002
-
-; <label>:3000                                    ; preds = %2991
-  %3001 = fsub fast float %22, %2997
-  br label %3004
-
-; <label>:3002                                    ; preds = %2991
-  %3003 = fadd fast float %2997, %20
-  br label %3004
-
-; <label>:3004                                    ; preds = %3002, %3000, %2989, %2987, %2985
-  %3005 = phi float [ %2986, %2985 ], [ %2988, %2987 ], [ %3001, %3000 ], [ %3003, %3002 ], [ %1209, %2989 ]
-  %3006 = fptoui float %3005 to i32
-  %3007 = fsub fast float %24, %20
-  %3008 = fcmp fast olt float %2632, %20
-  br i1 %3008, label %3009, label %3022
-
-; <label>:3009                                    ; preds = %3004
-  %3010 = fsub fast float %20, %2632
-  %3011 = fdiv fast float %3010, %3007
-  %3012 = fptoui float %3011 to i32
-  %3013 = uitofp i32 %3012 to float
-  %3014 = fmul fast float %3013, %3007
-  %3015 = fsub fast float %3010, %3014
-  %3016 = and i32 %3012, 1
-  %3017 = icmp eq i32 %3016, 0
-  br i1 %3017, label %3018, label %3020
-
-; <label>:3018                                    ; preds = %3009
-  %3019 = fadd fast float %3015, %20
-  br label %3037
-
-; <label>:3020                                    ; preds = %3009
-  %3021 = fsub fast float %24, %3015
-  br label %3037
-
-; <label>:3022                                    ; preds = %3004
-  %3023 = fcmp fast ogt float %2632, %24
-  br i1 %3023, label %3024, label %3037
-
-; <label>:3024                                    ; preds = %3022
-  %3025 = fsub fast float %2632, %24
-  %3026 = fdiv fast float %3025, %3007
-  %3027 = fptoui float %3026 to i32
-  %3028 = uitofp i32 %3027 to float
-  %3029 = fmul fast float %3028, %3007
-  %3030 = fsub fast float %3025, %3029
-  %3031 = and i32 %3027, 1
-  %3032 = icmp eq i32 %3031, 0
-  br i1 %3032, label %3033, label %3035
-
-; <label>:3033                                    ; preds = %3024
-  %3034 = fsub fast float %24, %3030
-  br label %3037
-
-; <label>:3035                                    ; preds = %3024
-  %3036 = fadd fast float %3030, %20
-  br label %3037
-
-; <label>:3037                                    ; preds = %3035, %3033, %3022, %3020, %3018
-  %3038 = phi float [ %3019, %3018 ], [ %3021, %3020 ], [ %3034, %3033 ], [ %3036, %3035 ], [ %2632, %3022 ]
-  %3039 = fptoui float %3038 to i32
-  %3040 = uitofp i32 %3039 to float
-  %3041 = uitofp i32 %3006 to float
-  %3042 = fptoui float %45 to i32
-  %3043 = fptoui float %182 to i32
-  %3044 = fptoui float %3040 to i32
-  %3045 = fptoui float %3041 to i32
-  %3046 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3047 = extractvalue %dx.types.CBufRet.i32 %3046, 0
-  %3048 = extractvalue %dx.types.CBufRet.i32 %3046, 1
-  %3049 = extractvalue %dx.types.CBufRet.i32 %3046, 2
-  %3050 = extractvalue %dx.types.CBufRet.i32 %3046, 3
-  %3051 = mul i32 %3047, %3042
-  %3052 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3043, i32 %3048, i32 %3051)  ; IMad(a,b,c)
-  %3053 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3044, i32 %3049, i32 %3052)  ; IMad(a,b,c)
-  %3054 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3045, i32 %3050, i32 %3053)  ; IMad(a,b,c)
-  %3055 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %3054, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3056 = extractvalue %dx.types.ResRet.f32 %3055, 0
-  br label %3057
-
-; <label>:3057                                    ; preds = %3037, %2971, %2943, %2927, %2917
-  %3058 = phi float [ %2940, %2927 ], [ 0.000000e+00, %2917 ], [ %2970, %2943 ], [ %3056, %3037 ], [ 0.000000e+00, %2971 ]
-  br i1 %924, label %3059, label %3083
-
-; <label>:3059                                    ; preds = %3057
-  %3060 = fcmp fast oge float %1352, 0.000000e+00
-  %3061 = fptoui float %1352 to i32
-  %3062 = icmp ult i32 %3061, %13
-  %3063 = and i1 %3060, %3062
-  %3064 = fcmp fast oge float %2632, 0.000000e+00
-  %3065 = and i1 %3064, %3063
-  %3066 = fptoui float %2632 to i32
-  %3067 = icmp ult i32 %3066, %15
-  %3068 = and i1 %3067, %3065
-  br i1 %3068, label %3069, label %3199
-
-; <label>:3069                                    ; preds = %3059
-  %3070 = fptoui float %45 to i32
-  %3071 = fptoui float %182 to i32
-  %3072 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3073 = extractvalue %dx.types.CBufRet.i32 %3072, 0
-  %3074 = extractvalue %dx.types.CBufRet.i32 %3072, 1
-  %3075 = extractvalue %dx.types.CBufRet.i32 %3072, 2
-  %3076 = extractvalue %dx.types.CBufRet.i32 %3072, 3
-  %3077 = mul i32 %3073, %3070
-  %3078 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3071, i32 %3074, i32 %3077)  ; IMad(a,b,c)
-  %3079 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3066, i32 %3075, i32 %3078)  ; IMad(a,b,c)
-  %3080 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3061, i32 %3076, i32 %3079)  ; IMad(a,b,c)
-  %3081 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %3080, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3082 = extractvalue %dx.types.ResRet.f32 %3081, 0
-  br label %3199
-
-; <label>:3083                                    ; preds = %3057
-  %3084 = icmp eq i32 %923, 1
-  br i1 %3084, label %3085, label %3113
-
-; <label>:3085                                    ; preds = %3083
-  %3086 = add i32 %13, -1
-  %3087 = uitofp i32 %3086 to float
-  %3088 = call float @dx.op.binary.f32(i32 35, float %1352, float 0.000000e+00)  ; FMax(a,b)
-  %3089 = call float @dx.op.binary.f32(i32 36, float %3088, float %3087)  ; FMin(a,b)
-  %3090 = fptoui float %3089 to i32
-  %3091 = add i32 %15, -1
-  %3092 = uitofp i32 %3091 to float
-  %3093 = call float @dx.op.binary.f32(i32 35, float %2632, float 0.000000e+00)  ; FMax(a,b)
-  %3094 = call float @dx.op.binary.f32(i32 36, float %3093, float %3092)  ; FMin(a,b)
-  %3095 = fptoui float %3094 to i32
-  %3096 = uitofp i32 %3095 to float
-  %3097 = uitofp i32 %3090 to float
-  %3098 = fptoui float %45 to i32
-  %3099 = fptoui float %182 to i32
-  %3100 = fptoui float %3096 to i32
-  %3101 = fptoui float %3097 to i32
-  %3102 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3103 = extractvalue %dx.types.CBufRet.i32 %3102, 0
-  %3104 = extractvalue %dx.types.CBufRet.i32 %3102, 1
-  %3105 = extractvalue %dx.types.CBufRet.i32 %3102, 2
-  %3106 = extractvalue %dx.types.CBufRet.i32 %3102, 3
-  %3107 = mul i32 %3103, %3098
-  %3108 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3099, i32 %3104, i32 %3107)  ; IMad(a,b,c)
-  %3109 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3100, i32 %3105, i32 %3108)  ; IMad(a,b,c)
-  %3110 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3101, i32 %3106, i32 %3109)  ; IMad(a,b,c)
-  %3111 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %3110, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3112 = extractvalue %dx.types.ResRet.f32 %3111, 0
-  br label %3199
-
-; <label>:3113                                    ; preds = %3083
-  %3114 = icmp eq i32 %923, 2
-  br i1 %3114, label %3115, label %3199
-
-; <label>:3115                                    ; preds = %3113
-  %3116 = fsub fast float %22, %20
-  %3117 = fcmp fast olt float %1352, %20
-  br i1 %3117, label %3118, label %3131
-
-; <label>:3118                                    ; preds = %3115
-  %3119 = fsub fast float %20, %1352
-  %3120 = fdiv fast float %3119, %3116
-  %3121 = fptoui float %3120 to i32
-  %3122 = uitofp i32 %3121 to float
-  %3123 = fmul fast float %3122, %3116
-  %3124 = fsub fast float %3119, %3123
-  %3125 = and i32 %3121, 1
-  %3126 = icmp eq i32 %3125, 0
-  br i1 %3126, label %3127, label %3129
-
-; <label>:3127                                    ; preds = %3118
-  %3128 = fadd fast float %3124, %20
-  br label %3146
-
-; <label>:3129                                    ; preds = %3118
-  %3130 = fsub fast float %22, %3124
-  br label %3146
-
-; <label>:3131                                    ; preds = %3115
-  %3132 = fcmp fast ogt float %1352, %22
-  br i1 %3132, label %3133, label %3146
-
-; <label>:3133                                    ; preds = %3131
-  %3134 = fsub fast float %1352, %22
-  %3135 = fdiv fast float %3134, %3116
-  %3136 = fptoui float %3135 to i32
-  %3137 = uitofp i32 %3136 to float
-  %3138 = fmul fast float %3137, %3116
-  %3139 = fsub fast float %3134, %3138
-  %3140 = and i32 %3136, 1
-  %3141 = icmp eq i32 %3140, 0
-  br i1 %3141, label %3142, label %3144
-
-; <label>:3142                                    ; preds = %3133
-  %3143 = fsub fast float %22, %3139
-  br label %3146
-
-; <label>:3144                                    ; preds = %3133
-  %3145 = fadd fast float %3139, %20
-  br label %3146
-
-; <label>:3146                                    ; preds = %3144, %3142, %3131, %3129, %3127
-  %3147 = phi float [ %3128, %3127 ], [ %3130, %3129 ], [ %3143, %3142 ], [ %3145, %3144 ], [ %1352, %3131 ]
-  %3148 = fptoui float %3147 to i32
-  %3149 = fsub fast float %24, %20
-  %3150 = fcmp fast olt float %2632, %20
-  br i1 %3150, label %3151, label %3164
-
-; <label>:3151                                    ; preds = %3146
-  %3152 = fsub fast float %20, %2632
-  %3153 = fdiv fast float %3152, %3149
-  %3154 = fptoui float %3153 to i32
-  %3155 = uitofp i32 %3154 to float
-  %3156 = fmul fast float %3155, %3149
-  %3157 = fsub fast float %3152, %3156
-  %3158 = and i32 %3154, 1
-  %3159 = icmp eq i32 %3158, 0
-  br i1 %3159, label %3160, label %3162
-
-; <label>:3160                                    ; preds = %3151
-  %3161 = fadd fast float %3157, %20
-  br label %3179
-
-; <label>:3162                                    ; preds = %3151
-  %3163 = fsub fast float %24, %3157
-  br label %3179
-
-; <label>:3164                                    ; preds = %3146
-  %3165 = fcmp fast ogt float %2632, %24
-  br i1 %3165, label %3166, label %3179
-
-; <label>:3166                                    ; preds = %3164
-  %3167 = fsub fast float %2632, %24
-  %3168 = fdiv fast float %3167, %3149
-  %3169 = fptoui float %3168 to i32
-  %3170 = uitofp i32 %3169 to float
-  %3171 = fmul fast float %3170, %3149
-  %3172 = fsub fast float %3167, %3171
-  %3173 = and i32 %3169, 1
-  %3174 = icmp eq i32 %3173, 0
-  br i1 %3174, label %3175, label %3177
-
-; <label>:3175                                    ; preds = %3166
-  %3176 = fsub fast float %24, %3172
-  br label %3179
-
-; <label>:3177                                    ; preds = %3166
-  %3178 = fadd fast float %3172, %20
-  br label %3179
-
-; <label>:3179                                    ; preds = %3177, %3175, %3164, %3162, %3160
-  %3180 = phi float [ %3161, %3160 ], [ %3163, %3162 ], [ %3176, %3175 ], [ %3178, %3177 ], [ %2632, %3164 ]
-  %3181 = fptoui float %3180 to i32
-  %3182 = uitofp i32 %3181 to float
-  %3183 = uitofp i32 %3148 to float
-  %3184 = fptoui float %45 to i32
-  %3185 = fptoui float %182 to i32
-  %3186 = fptoui float %3182 to i32
-  %3187 = fptoui float %3183 to i32
-  %3188 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3189 = extractvalue %dx.types.CBufRet.i32 %3188, 0
-  %3190 = extractvalue %dx.types.CBufRet.i32 %3188, 1
-  %3191 = extractvalue %dx.types.CBufRet.i32 %3188, 2
-  %3192 = extractvalue %dx.types.CBufRet.i32 %3188, 3
-  %3193 = mul i32 %3189, %3184
-  %3194 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3185, i32 %3190, i32 %3193)  ; IMad(a,b,c)
-  %3195 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3186, i32 %3191, i32 %3194)  ; IMad(a,b,c)
-  %3196 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3187, i32 %3192, i32 %3195)  ; IMad(a,b,c)
-  %3197 = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %3, i32 %3196, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3198 = extractvalue %dx.types.ResRet.f32 %3197, 0
-  br label %3199
-
-; <label>:3199                                    ; preds = %3179, %3113, %3085, %3069, %3059
-  %3200 = phi float [ %3082, %3069 ], [ 0.000000e+00, %3059 ], [ %3112, %3085 ], [ %3198, %3179 ], [ 0.000000e+00, %3113 ]
-  %3201 = call float @dx.op.unary.f32(i32 22, float %180)  ; Frc(value)
-  %3202 = call float @dx.op.unary.f32(i32 22, float %181)  ; Frc(value)
-  %3203 = fmul fast float %3202, %3202
-  %3204 = fmul fast float %3203, %3202
-  %3205 = fmul fast float %1066, -7.500000e-01
-  %3206 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2205, float %3205)  ; FMad(a,b,c)
-  %3207 = fmul fast float %1066, 1.500000e+00
-  %3208 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1636, float %3207)  ; FMad(a,b,c)
-  %3209 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2205, float %3208)  ; FMad(a,b,c)
-  %3210 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %2774, float %3209)  ; FMad(a,b,c)
-  %3211 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1636, float %3205)  ; FMad(a,b,c)
-  %3212 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2205, float %3211)  ; FMad(a,b,c)
-  %3213 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2774, float %3212)  ; FMad(a,b,c)
-  %3214 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3202, float %3203, float %3204, float %1636, float %3206, float %3210, float %3213)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3215 = fmul fast float %1208, -7.500000e-01
-  %3216 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2347, float %3215)  ; FMad(a,b,c)
-  %3217 = fmul fast float %1208, 1.500000e+00
-  %3218 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1778, float %3217)  ; FMad(a,b,c)
-  %3219 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2347, float %3218)  ; FMad(a,b,c)
-  %3220 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %2916, float %3219)  ; FMad(a,b,c)
-  %3221 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1778, float %3215)  ; FMad(a,b,c)
-  %3222 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2347, float %3221)  ; FMad(a,b,c)
-  %3223 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2916, float %3222)  ; FMad(a,b,c)
-  %3224 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3202, float %3203, float %3204, float %1778, float %3216, float %3220, float %3223)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3225 = fmul fast float %1351, -7.500000e-01
-  %3226 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2489, float %3225)  ; FMad(a,b,c)
-  %3227 = fmul fast float %1351, 1.500000e+00
-  %3228 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1920, float %3227)  ; FMad(a,b,c)
-  %3229 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2489, float %3228)  ; FMad(a,b,c)
-  %3230 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3058, float %3229)  ; FMad(a,b,c)
-  %3231 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1920, float %3225)  ; FMad(a,b,c)
-  %3232 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2489, float %3231)  ; FMad(a,b,c)
-  %3233 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3058, float %3232)  ; FMad(a,b,c)
-  %3234 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3202, float %3203, float %3204, float %1920, float %3226, float %3230, float %3233)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3235 = fmul fast float %1494, -7.500000e-01
-  %3236 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2631, float %3235)  ; FMad(a,b,c)
-  %3237 = fmul fast float %1494, 1.500000e+00
-  %3238 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %2062, float %3237)  ; FMad(a,b,c)
-  %3239 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2631, float %3238)  ; FMad(a,b,c)
-  %3240 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3200, float %3239)  ; FMad(a,b,c)
-  %3241 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %2062, float %3235)  ; FMad(a,b,c)
-  %3242 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2631, float %3241)  ; FMad(a,b,c)
-  %3243 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3200, float %3242)  ; FMad(a,b,c)
-  %3244 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3202, float %3203, float %3204, float %2062, float %3236, float %3240, float %3243)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3245 = fmul fast float %3201, %3201
-  %3246 = fmul fast float %3245, %3201
-  %3247 = fmul fast float %3214, -7.500000e-01
-  %3248 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3234, float %3247)  ; FMad(a,b,c)
-  %3249 = fmul fast float %3214, 1.500000e+00
-  %3250 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %3224, float %3249)  ; FMad(a,b,c)
-  %3251 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %3234, float %3250)  ; FMad(a,b,c)
-  %3252 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3244, float %3251)  ; FMad(a,b,c)
-  %3253 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %3224, float %3247)  ; FMad(a,b,c)
-  %3254 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %3234, float %3253)  ; FMad(a,b,c)
-  %3255 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3244, float %3254)  ; FMad(a,b,c)
-  %3256 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3201, float %3245, float %3246, float %3224, float %3248, float %3252, float %3255)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %1, i32 %8, i32 0, float %3256, float undef, float undef, float undef, i8 1, i32 4)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3257
-
-; <label>:3257                                    ; preds = %3199, %916, %903, %326, %0
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.threadId.i32(i32, i32) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32, %dx.types.Handle, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.rawBufferStore.f32(i32, %dx.types.Handle, i32, i32, float, float, float, float, i8, i32) #2
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.unary.f32(i32, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.tertiary.f32(i32, float, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.binary.f32(i32, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.dot4.f32(i32, float, float, float, float, float, float, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.tertiary.i32(i32, i32, i32, i32) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32, %dx.types.Handle, i32) #1
-
-; Function Attrs: nounwind readonly
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32, %dx.types.Handle, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind readnone
-declare double @dx.op.makeDouble.f64(i32, i32, i32) #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.valver = !{!2}
-!dx.shaderModel = !{!3}
-!dx.resources = !{!4}
-!dx.entryPoints = !{!13}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 1, i32 2}
-!2 = !{i32 1, i32 6}
-!3 = !{!"cs", i32 6, i32 2}
-!4 = !{null, !5, !11, null}
-!5 = !{!6, !8, !10}
-!6 = !{i32 0, %"class.RWStructuredBuffer<float>"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !7}
-!7 = !{i32 1, i32 4}
-!8 = !{i32 1, %"class.RWStructuredBuffer<double>"* undef, !"", i32 0, i32 1, i32 1, i32 12, i1 false, i1 false, i1 false, !9}
-!9 = !{i32 1, i32 8}
-!10 = !{i32 2, %"class.RWStructuredBuffer<float>"* undef, !"", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !7}
-!11 = !{!12}
-!12 = !{i32 0, %Constants* undef, !"", i32 0, i32 0, i32 1, i32 116, null}
-!13 = !{void ()* @GridSample, !"GridSample", null, !4, !14}
-!14 = !{i32 0, i64 8388628, i32 4, !15}
-!15 = !{i32 64, i32 1, i32 1}
-
-#endif
-
-const unsigned char g_GridSample[] = {
-  0x44, 0x58, 0x42, 0x43, 0xf7, 0x27, 0x59, 0xbc, 0xb6, 0x5f, 0xdc, 0x3b,
-  0x28, 0xbf, 0x0e, 0x33, 0x49, 0xce, 0x2e, 0xab, 0x01, 0x00, 0x00, 0x00,
-  0xd0, 0x53, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
-  0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
-  0x18, 0x01, 0x00, 0x00, 0x34, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30,
-  0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30,
-  0xa8, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-  0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0xba, 0xaf, 0x12, 0x9c, 0x1e, 0xac, 0xbe, 0xcb,
-  0xb6, 0x59, 0x16, 0xf5, 0x79, 0x13, 0xe0, 0x4f, 0x44, 0x58, 0x49, 0x4c,
-  0x94, 0x52, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0xa5, 0x14, 0x00, 0x00,
-  0x44, 0x58, 0x49, 0x4c, 0x02, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-  0x7c, 0x52, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00,
-  0x9c, 0x14, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
-  0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49,
-  0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19,
-  0x1e, 0x04, 0x8b, 0x62, 0x80, 0x18, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42,
-  0xc4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x62, 0x88,
-  0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42,
-  0xe4, 0x48, 0x0e, 0x90, 0x11, 0x23, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c,
-  0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x31, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00,
-  0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07,
-  0x40, 0x02, 0xa8, 0x0d, 0x86, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20,
-  0x01, 0xd5, 0x06, 0x62, 0xf8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x90, 0x00,
-  0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42,
-  0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00,
-  0x57, 0x00, 0x00, 0x00, 0x32, 0x22, 0x88, 0x09, 0x20, 0x64, 0x85, 0x04,
-  0x13, 0x23, 0xa4, 0x84, 0x04, 0x13, 0x23, 0xe3, 0x84, 0xa1, 0x90, 0x14,
-  0x12, 0x4c, 0x8c, 0x8c, 0x0b, 0x84, 0xc4, 0x4c, 0x10, 0xbc, 0xc1, 0x08,
-  0x40, 0x09, 0x00, 0x0a, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0xc3, 0x30, 0x10,
-  0x31, 0x03, 0x70, 0xd3, 0x70, 0xf9, 0x13, 0xf6, 0x10, 0x92, 0xbf, 0x12,
-  0xd2, 0x4a, 0x4c, 0x7e, 0x71, 0xdb, 0xa8, 0x30, 0x0c, 0xc3, 0x18, 0xe6,
-  0x08, 0x10, 0x42, 0xee, 0x19, 0x2e, 0x7f, 0xc2, 0x1e, 0x42, 0xf2, 0x43,
-  0xa0, 0x19, 0x16, 0x02, 0x05, 0x49, 0x39, 0x8e, 0x41, 0x19, 0x06, 0x64,
-  0xa0, 0xa5, 0x2c, 0xc0, 0xa0, 0x0c, 0x83, 0x61, 0x18, 0x06, 0x32, 0x50,
-  0x53, 0x06, 0x63, 0x30, 0xe8, 0x29, 0x85, 0x31, 0x18, 0x86, 0x41, 0x51,
-  0x21, 0x8c, 0xc1, 0x30, 0x68, 0x2a, 0x8a, 0x31, 0x18, 0x86, 0x61, 0x18,
-  0x86, 0x61, 0x50, 0x55, 0x8a, 0x61, 0x18, 0x86, 0x81, 0xae, 0xa3, 0x86,
-  0xcb, 0x9f, 0xb0, 0x87, 0x90, 0x7c, 0x6e, 0xa3, 0x8a, 0x95, 0x98, 0x7c,
-  0xe4, 0xb6, 0x11, 0x31, 0x0c, 0xc3, 0x50, 0x88, 0x6c, 0x50, 0x06, 0xd2,
-  0xe6, 0x08, 0x82, 0x62, 0x28, 0x03, 0x32, 0x0c, 0x1c, 0x75, 0x37, 0x0d,
-  0x97, 0x3f, 0x61, 0x0f, 0x21, 0xf9, 0x2b, 0x21, 0xad, 0xc4, 0xe4, 0x23,
-  0xb7, 0x8d, 0x8a, 0x61, 0x18, 0x86, 0xa1, 0x1c, 0xdf, 0xa0, 0x0c, 0x03,
-  0x32, 0x10, 0x38, 0x04, 0x50, 0x08, 0x71, 0x18, 0x06, 0x1a, 0x07, 0x02,
-  0x66, 0xfa, 0xc6, 0x81, 0x1d, 0xc2, 0x61, 0x1e, 0xe6, 0xc1, 0x0d, 0x64,
-  0xe1, 0x16, 0x66, 0x81, 0x1e, 0xe4, 0xa1, 0x1e, 0xc6, 0x81, 0x1e, 0xea,
-  0x41, 0x1e, 0xca, 0x81, 0x1c, 0x44, 0xa1, 0x1e, 0xcc, 0xc1, 0x1c, 0xca,
-  0x41, 0x1e, 0xf8, 0xc0, 0x1c, 0xd8, 0xe1, 0x1d, 0xc2, 0x81, 0x1e, 0xfc,
-  0x00, 0x05, 0x06, 0x99, 0x33, 0x81, 0xc1, 0x38, 0xb0, 0x43, 0x38, 0xcc,
-  0xc3, 0x3c, 0xb8, 0x81, 0x2c, 0xdc, 0xc2, 0x2c, 0xd0, 0x83, 0x3c, 0xd4,
-  0xc3, 0x38, 0xd0, 0x43, 0x3d, 0xc8, 0x43, 0x39, 0x90, 0x83, 0x28, 0xd4,
-  0x83, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x03, 0x1f, 0x90, 0xc3, 0x3b, 0xd4,
-  0x83, 0x38, 0xb0, 0x43, 0x39, 0xf8, 0x01, 0x0a, 0x44, 0x42, 0x87, 0x11,
-  0x88, 0xe1, 0x12, 0xce, 0x69, 0xa4, 0x09, 0x68, 0x26, 0x09, 0x2d, 0xc3,
-  0x30, 0x0c, 0xaa, 0xaa, 0xaa, 0xaa, 0x3a, 0xd0, 0x3a, 0x47, 0x00, 0x0a,
-  0x53, 0x00, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87,
-  0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xae, 0x50,
-  0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30,
-  0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0,
-  0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x78, 0xd0,
-  0x06, 0xe9, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x6d, 0x90,
-  0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x60,
-  0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d, 0x60, 0x0e, 0x71, 0x60,
-  0x07, 0x7a, 0x10, 0x07, 0x76, 0xd0, 0x06, 0xe6, 0x30, 0x07, 0x72, 0xa0,
-  0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60,
-  0x07, 0x74, 0xd0, 0x06, 0xee, 0x80, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xa0,
-  0x07, 0x73, 0x20, 0x07, 0x7a, 0x60, 0x07, 0x74, 0x30, 0xe4, 0x09, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0x43,
-  0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90,
-  0x67, 0x01, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-  0x21, 0x4f, 0x03, 0x04, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x43, 0x9e, 0x07, 0x08, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x86, 0x3c, 0x11, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x0c, 0x79, 0x26, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x18, 0xf2, 0x54, 0x40, 0x00, 0x04, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xe4, 0xb9, 0x80, 0x00, 0x08, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0xa3, 0x01, 0x01, 0x20,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90, 0xa7, 0x03, 0x02,
-  0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x21, 0x0f, 0x18,
-  0x00, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90,
-  0x67, 0x0c, 0x80, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x20, 0x0b, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14,
-  0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47, 0xc6, 0x04, 0x43, 0x1a,
-  0x4a, 0xa0, 0x08, 0x8a, 0x61, 0x04, 0xa0, 0x30, 0x0a, 0xa1, 0x20, 0x0a,
-  0x3d, 0xa0, 0x00, 0x03, 0x88, 0x1b, 0x01, 0xa0, 0xb6, 0x50, 0x01, 0x01,
-  0x11, 0x28, 0x9d, 0x01, 0x20, 0x76, 0x06, 0x80, 0xce, 0x19, 0x00, 0x00,
-  0x79, 0x18, 0x00, 0x00, 0x4b, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90,
-  0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1,
-  0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99, 0x71, 0xb9, 0x01, 0x41,
-  0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a,
-  0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9,
-  0x10, 0x04, 0x13, 0x84, 0xa1, 0x99, 0x20, 0x0c, 0xce, 0x06, 0x61, 0x20,
-  0x26, 0x08, 0xc3, 0xb3, 0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06,
-  0xc4, 0x20, 0x26, 0x08, 0x03, 0x34, 0x41, 0x38, 0x03, 0x8d, 0xc0, 0x04,
-  0x61, 0x88, 0x26, 0x08, 0x9c, 0x35, 0x41, 0x18, 0xa4, 0x0d, 0xc2, 0xf0,
-  0x6c, 0x58, 0x94, 0x85, 0x51, 0x94, 0xa1, 0x71, 0x1c, 0x07, 0x9a, 0x20,
-  0xa4, 0x01, 0x36, 0x41, 0x18, 0xa6, 0x0d, 0xc2, 0x30, 0x6d, 0x58, 0x06,
-  0x89, 0x51, 0x86, 0xa1, 0x71, 0x1c, 0x87, 0xda, 0xb0, 0x10, 0x0b, 0xa3,
-  0x10, 0x43, 0xe3, 0x38, 0x0e, 0xb4, 0x61, 0x88, 0x2a, 0x6b, 0x82, 0xc0,
-  0x06, 0xd9, 0x04, 0x61, 0xa0, 0x36, 0x20, 0x0a, 0xc6, 0x28, 0xca, 0x90,
-  0x01, 0x1b, 0x02, 0x6d, 0x03, 0x01, 0x5c, 0x1b, 0x30, 0x41, 0x10, 0x00,
-  0x2a, 0x47, 0x72, 0x69, 0x64, 0x53, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x13,
-  0x84, 0x36, 0xb8, 0x26, 0x08, 0x43, 0xb5, 0x61, 0x00, 0x83, 0x61, 0xd8,
-  0x40, 0x28, 0xdf, 0x13, 0x06, 0x1b, 0x8a, 0xce, 0x03, 0x38, 0x31, 0xa8,
-  0xc2, 0xc6, 0x66, 0xd7, 0xe6, 0x92, 0x46, 0x56, 0xe6, 0x46, 0x37, 0x25,
-  0x08, 0xaa, 0x90, 0xe1, 0xb9, 0xd8, 0x95, 0xc9, 0xcd, 0xa5, 0xbd, 0xb9,
-  0x4d, 0x09, 0x88, 0x26, 0x64, 0x78, 0x2e, 0x76, 0x61, 0x6c, 0x76, 0x65,
-  0x72, 0x53, 0x02, 0xa3, 0x0e, 0x19, 0x9e, 0xcb, 0x1c, 0x5a, 0x18, 0x59,
-  0x99, 0x5c, 0xd3, 0x1b, 0x59, 0x19, 0xdb, 0x94, 0x00, 0x29, 0x43, 0x86,
-  0xe7, 0x22, 0x57, 0x36, 0xf7, 0x56, 0x27, 0x37, 0x56, 0x36, 0x37, 0x25,
-  0xd8, 0xea, 0x90, 0xe1, 0xb9, 0x94, 0xb9, 0xd1, 0xc9, 0xe5, 0x41, 0xbd,
-  0xa5, 0xb9, 0xd1, 0xcd, 0x4d, 0x09, 0xc4, 0x00, 0x79, 0x18, 0x00, 0x00,
-  0x51, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66,
-  0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07,
-  0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10,
-  0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce,
-  0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b,
-  0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c,
-  0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07,
-  0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11,
-  0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0,
-  0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8,
-  0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b,
-  0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b,
-  0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87,
-  0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07,
-  0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87,
-  0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81,
-  0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30,
-  0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4,
-  0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca,
-  0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39,
-  0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b,
-  0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b,
-  0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87,
-  0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20,
-  0x0e, 0xe7, 0xe0, 0x06, 0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90,
-  0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x30, 0x83, 0x81, 0xc8, 0x01, 0x1f, 0xdc,
-  0x40, 0x1c, 0xe4, 0xa1, 0x1c, 0xc2, 0x61, 0x1d, 0xdc, 0x40, 0x1c, 0xe4,
-  0x01, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00,
-  0x06, 0xa0, 0x80, 0x11, 0x32, 0xb0, 0x00, 0xf3, 0x2c, 0x84, 0x19, 0x40,
-  0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x20, 0x0d, 0x10, 0x61, 0x7e, 0x71, 0xdb,
-  0x96, 0xb0, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10, 0x50, 0x45, 0x41, 0x44,
-  0xa5, 0x03, 0x0c, 0x25, 0x61, 0x00, 0x02, 0xe6, 0x23, 0xb7, 0x6d, 0x0a,
-  0xd2, 0x70, 0xf9, 0xce, 0xe3, 0x0b, 0x11, 0x01, 0x4c, 0x44, 0x08, 0x34,
-  0xc3, 0x42, 0xd8, 0x81, 0x33, 0x5c, 0xbe, 0xf3, 0xf8, 0x83, 0x33, 0xe1,
-  0x7e, 0x71, 0xdb, 0xc6, 0x40, 0x0d, 0x97, 0xef, 0x3c, 0x3e, 0x03, 0x28,
-  0x44, 0xe7, 0x50, 0xc1, 0x42, 0xf8, 0x85, 0x8e, 0x9b, 0xc0, 0x35, 0x5c,
-  0xbe, 0xf3, 0xf8, 0x11, 0x60, 0x6d, 0x54, 0x51, 0x10, 0x51, 0xe9, 0x00,
-  0x83, 0x5f, 0xdc, 0xb6, 0x2d, 0x5c, 0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x01,
-  0xd6, 0x46, 0x15, 0x05, 0x11, 0x95, 0x0e, 0x30, 0xf8, 0xc8, 0x6d, 0xdb,
-  0x00, 0x36, 0x5c, 0xbe, 0xf3, 0xf8, 0x11, 0x60, 0x6d, 0x54, 0x51, 0x10,
-  0x11, 0x3b, 0x39, 0x11, 0xe1, 0x17, 0xb7, 0x6d, 0x05, 0xd2, 0x70, 0xf9,
-  0xce, 0xe3, 0x4f, 0x44, 0x34, 0x21, 0x40, 0x84, 0xf9, 0xc5, 0x6d, 0x1b,
-  0x82, 0x34, 0x5c, 0xbe, 0xf3, 0xf8, 0x13, 0x11, 0x4d, 0x08, 0x10, 0x61,
-  0x3e, 0x72, 0xdb, 0x16, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xfe, 0x74, 0x44,
-  0x04, 0x30, 0x88, 0x83, 0x8f, 0xdc, 0xb6, 0x11, 0x3c, 0xc3, 0xe5, 0x3b,
-  0x8f, 0x4f, 0x35, 0x40, 0x84, 0xf9, 0xc5, 0x6d, 0x03, 0x00, 0x00, 0x00,
-  0x61, 0x20, 0x00, 0x00, 0xe7, 0x12, 0x00, 0x00, 0x13, 0x04, 0x24, 0x14,
-  0x0b, 0x04, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x34, 0x14, 0x58, 0xd9,
-  0x95, 0xa5, 0x40, 0x0d, 0x94, 0x51, 0x21, 0x15, 0x57, 0xc1, 0x95, 0x5c,
-  0xd9, 0x14, 0x4b, 0x61, 0x0a, 0x94, 0x72, 0x40, 0xd1, 0x94, 0x6e, 0x40,
-  0x39, 0x94, 0x02, 0x21, 0x45, 0x50, 0x06, 0x25, 0x40, 0xc6, 0x18, 0x01,
-  0x08, 0x82, 0x20, 0xfd, 0x8d, 0x11, 0x80, 0x20, 0x08, 0xf2, 0xdf, 0x0c,
-  0xc0, 0x18, 0x01, 0x08, 0x82, 0x20, 0xfe, 0x0b, 0x63, 0x04, 0x20, 0x08,
-  0x82, 0x21, 0x38, 0x8c, 0x11, 0x80, 0x20, 0x08, 0xea, 0xdf, 0x18, 0x01,
-  0x08, 0x82, 0xa0, 0xfe, 0x0b, 0x63, 0x04, 0x20, 0x08, 0x82, 0xf0, 0x37,
-  0x46, 0x00, 0x82, 0x20, 0x08, 0xff, 0xc2, 0x18, 0x01, 0x08, 0x82, 0x20,
-  0x08, 0x06, 0x63, 0x04, 0x20, 0x08, 0x82, 0xf4, 0x2f, 0x8c, 0x11, 0x80,
-  0x20, 0x08, 0xe2, 0xdf, 0x08, 0x00, 0x00, 0x00, 0x23, 0x06, 0x09, 0x00,
-  0x82, 0x60, 0xd0, 0xc9, 0x41, 0xe6, 0xb8, 0x81, 0x1b, 0x98, 0xc1, 0x88,
-  0x41, 0x02, 0x80, 0x20, 0x18, 0x74, 0x73, 0xa0, 0x3d, 0x70, 0x00, 0x07,
-  0x67, 0x30, 0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0x1d, 0x1d, 0x6c, 0x90,
-  0x1b, 0xb8, 0x01, 0x1a, 0x8c, 0x18, 0x24, 0x00, 0x08, 0x82, 0x41, 0x57,
-  0x07, 0xdc, 0xf4, 0x06, 0x6f, 0x90, 0x06, 0x23, 0x06, 0x06, 0x00, 0x82,
-  0x60, 0x40, 0xfc, 0xc1, 0x05, 0x07, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0xa0, 0xe1, 0x41, 0x19, 0x08, 0x71, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c,
-  0x30, 0x9a, 0x30, 0x04, 0xc3, 0x0d, 0x42, 0x40, 0x06, 0xb3, 0x0c, 0xc1,
-  0x08, 0x05, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0, 0xf5, 0x81, 0x1a,
-  0x1c, 0x79, 0x30, 0x9a, 0x10, 0x0c, 0x17, 0x18, 0x35, 0x9a, 0x30, 0x08,
-  0x17, 0x18, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x9a, 0x28, 0xbc,
-  0x01, 0x13, 0x06, 0xa3, 0x09, 0x01, 0x30, 0xdc, 0x10, 0xf4, 0x01, 0x18,
-  0x4c, 0x37, 0x5c, 0x53, 0x30, 0xdd, 0x80, 0x75, 0x42, 0x21, 0x01, 0x4c,
-  0x37, 0x68, 0x1f, 0x51, 0x48, 0x00, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0xa0, 0xa9, 0xc2, 0x1d, 0x50, 0x68, 0x30, 0x9a, 0x10, 0x04, 0xa3, 0x09,
-  0x82, 0x30, 0x9a, 0x30, 0x0c, 0x15, 0x08, 0x52, 0x03, 0x21, 0x15, 0x0c,
-  0x52, 0x57, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0, 0xc9, 0xc2,
-  0x1f, 0x70, 0xac, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c, 0x52, 0x5b, 0x10,
-  0x15, 0x20, 0x33, 0x9a, 0x50, 0x04, 0x15, 0x08, 0x52, 0x44, 0x10, 0x15,
-  0x34, 0x33, 0x9a, 0x90, 0x08, 0x15, 0x08, 0x52, 0x44, 0x10, 0xd7, 0x18,
-  0x75, 0x85, 0x51, 0x37, 0x18, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x5a, 0x38, 0xb8, 0xc2, 0x1a, 0xe0, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0xc3, 0x11, 0x46, 0x1d,
-  0x61, 0xd4, 0x11, 0x46, 0x1d, 0x61, 0xd4, 0x88, 0x41, 0x03, 0x80, 0x20,
-  0x18, 0x54, 0xec, 0x10, 0x0b, 0xcc, 0xa2, 0xe0, 0x01, 0x31, 0x08, 0x81,
-  0x09, 0x01, 0x7c, 0x4e, 0x18, 0x66, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c,
-  0xc0, 0x60, 0x1d, 0x72, 0x21, 0x0f, 0x02, 0x73, 0x48, 0x85, 0x71, 0x18,
-  0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x83, 0x31, 0x68, 0x07, 0x59, 0x10, 0x82, 0x0b, 0x8c, 0xbb, 0x63, 0x98,
-  0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x00, 0x83, 0x78, 0xf8, 0x85, 0x3f,
-  0x08, 0xd8, 0xe1, 0x15, 0xd2, 0x61, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84,
-  0x60, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xc6, 0x60, 0x1e, 0x70, 0x41,
-  0x08, 0x2e, 0x30, 0x6e, 0xb8, 0xa1, 0x0e, 0xe8, 0x01, 0x0c, 0x0c, 0x41,
-  0x05, 0xf8, 0xd8, 0x90, 0x0a, 0xf0, 0x99, 0x65, 0x10, 0x86, 0xc1, 0x04,
-  0x57, 0x90, 0x8f, 0x09, 0xaf, 0x20, 0x1f, 0xf3, 0x03, 0x5a, 0x80, 0x8f,
-  0xf5, 0x41, 0x2d, 0xc0, 0xc7, 0x08, 0x41, 0x3e, 0x46, 0x08, 0xf2, 0x99,
-  0x25, 0x20, 0x4c, 0x14, 0x10, 0xf9, 0x18, 0x12, 0x0a, 0xf2, 0x31, 0x41,
-  0x17, 0xe0, 0x63, 0xc2, 0x2e, 0xc0, 0xc7, 0x04, 0x5c, 0x90, 0x8f, 0x09,
-  0xb9, 0x20, 0x9f, 0x59, 0x02, 0x62, 0xa0, 0xc2, 0x80, 0x04, 0x62, 0x18,
-  0xa8, 0x30, 0x20, 0x81, 0x18, 0x46, 0x13, 0x62, 0x41, 0x18, 0x6e, 0x08,
-  0x4c, 0x02, 0x0c, 0x66, 0x19, 0x0a, 0x23, 0x18, 0x31, 0x30, 0x00, 0x10,
-  0x04, 0x83, 0x07, 0x26, 0xd8, 0x81, 0x18, 0x31, 0x30, 0x00, 0x10, 0x04,
-  0x83, 0x27, 0x26, 0xda, 0x81, 0x98, 0x25, 0x30, 0x06, 0x2a, 0x0c, 0xa2,
-  0x60, 0x88, 0x81, 0x0a, 0x83, 0x28, 0x18, 0x62, 0x38, 0x42, 0x50, 0x05,
-  0xe2, 0x1b, 0x8e, 0x18, 0x52, 0x41, 0xf8, 0x4a, 0x08, 0x76, 0x38, 0x82,
-  0x68, 0x05, 0xe2, 0x2b, 0x21, 0xd8, 0xe1, 0x08, 0x63, 0x15, 0x84, 0xaf,
-  0x02, 0x61, 0x67, 0x19, 0x0e, 0x2d, 0x18, 0x4d, 0xf0, 0x85, 0x61, 0xb8,
-  0x21, 0x98, 0x09, 0x30, 0x98, 0x65, 0x40, 0x92, 0xa0, 0x74, 0x61, 0x24,
-  0xe0, 0x02, 0xa3, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xea, 0x09,
-  0x92, 0x68, 0xd0, 0x61, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x26, 0x9f,
-  0x20, 0x89, 0x40, 0x28, 0x5e, 0x38, 0x09, 0xb8, 0xc0, 0xa8, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0x98, 0xc2, 0x02, 0x25, 0x20, 0x76, 0x18, 0x31,
-  0x38, 0x00, 0x10, 0x04, 0x83, 0x49, 0x2c, 0x50, 0x22, 0x10, 0x66, 0x09,
-  0xb4, 0xe1, 0x06, 0x65, 0x27, 0xc0, 0x60, 0x96, 0x41, 0xd1, 0x02, 0xd3,
-  0x05, 0x5e, 0x88, 0xcf, 0x2c, 0xc3, 0xe2, 0x4c, 0xd6, 0x0b, 0x55, 0x7c,
-  0x2c, 0x10, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65, 0x41, 0x21, 0x1f,
-  0x2b, 0x82, 0xf8, 0x14, 0x41, 0x16, 0x3a, 0xdc, 0x10, 0x88, 0x05, 0x18,
-  0xcc, 0x32, 0x30, 0x4d, 0x60, 0x43, 0x39, 0xc0, 0x67, 0x96, 0x40, 0x32,
-  0x72, 0x20, 0xe2, 0x33, 0x4b, 0x20, 0xcd, 0x32, 0x3c, 0x12, 0x67, 0x5f,
-  0x39, 0xc4, 0xc7, 0x02, 0x86, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x16,
-  0x3c, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x6e, 0xa1, 0xc3, 0x0d, 0x01,
-  0x5b, 0x80, 0xc1, 0x2c, 0x03, 0x14, 0x05, 0xd6, 0x0e, 0x43, 0x7c, 0x66,
-  0x09, 0x24, 0x23, 0xe0, 0x01, 0x3e, 0xb3, 0x04, 0xd2, 0x40, 0x8b, 0x81,
-  0x31, 0x56, 0x43, 0x40, 0x42, 0x24, 0x0b, 0x8e, 0xb9, 0x83, 0x3c, 0xc4,
-  0x67, 0x96, 0x61, 0xb2, 0xcc, 0xc0, 0xe6, 0x41, 0x0d, 0xe2, 0x63, 0x81,
-  0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x0a, 0xf9, 0x58, 0x11,
-  0xc4, 0xa7, 0x08, 0xbd, 0xd0, 0xe1, 0x86, 0x00, 0x2f, 0xc0, 0x60, 0x96,
-  0x81, 0xaa, 0x02, 0x1b, 0xf6, 0x01, 0x3e, 0xb3, 0x04, 0x9a, 0xe1, 0x03,
-  0x11, 0x9f, 0x59, 0x02, 0x6d, 0x96, 0xe1, 0xd2, 0xdc, 0xc0, 0xe8, 0x20,
-  0x1f, 0xe2, 0x63, 0x01, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b,
-  0x1e, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0xd2, 0xd0, 0xe1, 0x86, 0x40,
-  0x34, 0xc0, 0x60, 0x96, 0x01, 0xcb, 0x02, 0x0b, 0x89, 0x21, 0x3e, 0xb3,
-  0x04, 0x9a, 0x11, 0x26, 0x01, 0x9f, 0x59, 0x02, 0x6d, 0xa0, 0xc8, 0x10,
-  0x07, 0xc4, 0x1f, 0x12, 0x7f, 0x30, 0xd8, 0x20, 0x63, 0x03, 0x8c, 0x0d,
-  0x2c, 0x36, 0xa8, 0xd8, 0x80, 0x1a, 0x28, 0x32, 0x78, 0x01, 0xf1, 0x87,
-  0xc4, 0x1f, 0x0c, 0x22, 0x33, 0x30, 0x7f, 0xb0, 0xb0, 0x4a, 0xa3, 0x0e,
-  0x1f, 0x8c, 0x9a, 0x65, 0xd8, 0xe6, 0xa0, 0x14, 0x46, 0x13, 0x6e, 0x62,
-  0x18, 0x6e, 0x08, 0x52, 0x03, 0x0c, 0x66, 0x19, 0x38, 0x2f, 0x18, 0x8e,
-  0x28, 0x7e, 0x62, 0xf8, 0xce, 0x18, 0x66, 0xb8, 0x21, 0xa8, 0x09, 0x32,
-  0xa8, 0x21, 0xd0, 0xe1, 0x08, 0x64, 0x2c, 0x86, 0xaf, 0x02, 0x41, 0x4f,
-  0x19, 0x66, 0xb8, 0x21, 0xc0, 0x09, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x86,
-  0x4e, 0x0e, 0x82, 0xe3, 0x87, 0x61, 0xae, 0x19, 0x66, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0x34, 0xde, 0x48, 0x0d, 0xb3, 0xb8, 0x8d, 0xd1, 0x84,
-  0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86,
-  0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x6b, 0x3c, 0x60,
-  0xe3, 0x20, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0xc8, 0x23,
-  0x36, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xab, 0x3c,
-  0x64, 0x43, 0x22, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0xd8,
-  0x23, 0x36, 0xe0, 0x22, 0xf0, 0x0d, 0xd0, 0xe0, 0x8d, 0xd1, 0x84, 0x00,
-  0x98, 0x25, 0x90, 0x83, 0xe1, 0x06, 0x6c, 0x3c, 0xc0, 0x60, 0x96, 0xe1,
-  0x03, 0x83, 0xa0, 0xd4, 0x62, 0x36, 0xe0, 0x02, 0xa3, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0x60, 0x6a, 0x0f, 0xda, 0xf0, 0xf0, 0x62, 0xc4, 0xe0,
-  0x00, 0x40, 0x10, 0x0c, 0x26, 0xf7, 0xa0, 0x8d, 0x40, 0xb8, 0x60, 0x98,
-  0x6a, 0x0b, 0xdc, 0x80, 0x0b, 0x8c, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x83, 0x49, 0x3e, 0x72, 0x43, 0x0c, 0xfa, 0x62, 0xc4, 0xe0, 0x00, 0x40,
-  0x10, 0x0c, 0xa6, 0xf9, 0xc8, 0x8d, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x8c,
-  0xba, 0xc3, 0xa8, 0xb3, 0x89, 0x61, 0xee, 0x0c, 0x86, 0x39, 0x62, 0x98,
-  0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x0d, 0x3f, 0xca,
-  0x43, 0x34, 0xe6, 0x63, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34,
-  0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00,
-  0x04, 0xc1, 0xe0, 0xfa, 0x0f, 0xf6, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0x2e, 0x10, 0x69, 0x8f, 0x84, 0x08, 0x46, 0x0c, 0x10,
-  0x00, 0x04, 0xc1, 0xe0, 0x0a, 0x11, 0xf7, 0x48, 0x88, 0x60, 0xc4, 0x40,
-  0x01, 0x40, 0x10, 0x0c, 0x16, 0x14, 0x69, 0x0f, 0xd6, 0x08, 0xf4, 0x83,
-  0x37, 0xf0, 0x63, 0x34, 0x21, 0x00, 0x66, 0x09, 0xe4, 0x60, 0xb8, 0x81,
-  0x0e, 0xfc, 0x03, 0x0c, 0x66, 0x19, 0xc2, 0x40, 0x0e, 0x02, 0xeb, 0x8b,
-  0xbf, 0x88, 0xcf, 0x70, 0x44, 0x1e, 0x80, 0x06, 0xf1, 0xcd, 0x32, 0x88,
-  0x41, 0x19, 0x04, 0x16, 0x1a, 0x7a, 0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c,
-  0x30, 0xcc, 0x05, 0x46, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45,
-  0xa0, 0x88, 0x0e, 0x37, 0x04, 0x26, 0x02, 0x06, 0xb3, 0x0c, 0x63, 0x40,
-  0x06, 0x81, 0x0d, 0xa9, 0x01, 0x9f, 0x59, 0x82, 0x34, 0x30, 0xd4, 0x20,
-  0xe2, 0x33, 0x4b, 0x90, 0x06, 0xc3, 0x11, 0xa4, 0x90, 0x1a, 0xc2, 0x37,
-  0xcb, 0x60, 0x06, 0x69, 0x10, 0x58, 0x29, 0xa8, 0x46, 0x7c, 0x2c, 0x70,
-  0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82,
-  0xf8, 0x14, 0x31, 0x23, 0x3a, 0xdc, 0x10, 0xc4, 0x08, 0x18, 0xcc, 0x32,
-  0x9c, 0x01, 0x1a, 0x04, 0x26, 0x1b, 0x43, 0x7c, 0x66, 0x09, 0xd2, 0xc0,
-  0x88, 0xda, 0x80, 0xcf, 0x2c, 0x41, 0x1a, 0x0c, 0xb4, 0x18, 0xda, 0x18,
-  0x60, 0x64, 0x40, 0x9c, 0x81, 0x80, 0x06, 0x3e, 0x51, 0x06, 0x17, 0x0c,
-  0x63, 0xb4, 0x81, 0x1b, 0xf1, 0x19, 0x8e, 0x88, 0x85, 0xdc, 0x20, 0xbe,
-  0x59, 0x06, 0x35, 0x68, 0x83, 0xc0, 0x74, 0x43, 0x16, 0xe2, 0x63, 0xc1,
-  0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x0c, 0xf9, 0x58, 0x11,
-  0xc4, 0xa7, 0x88, 0x30, 0xd1, 0xe1, 0x86, 0xe0, 0x47, 0xc0, 0x60, 0x96,
-  0x61, 0x0d, 0xd8, 0x20, 0xb0, 0x41, 0x3c, 0xe0, 0x33, 0x4b, 0x10, 0x07,
-  0xf6, 0x1b, 0x44, 0x7c, 0x66, 0x09, 0xe2, 0x60, 0x38, 0x82, 0x17, 0xc0,
-  0x43, 0xf8, 0x66, 0x19, 0xdc, 0x20, 0x0e, 0x02, 0xeb, 0x85, 0xf0, 0x88,
-  0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x88, 0xe4,
-  0x63, 0x45, 0x10, 0x9f, 0x22, 0xd8, 0x44, 0x87, 0x1b, 0x02, 0x35, 0x01,
-  0x83, 0x59, 0x86, 0x37, 0x80, 0x83, 0xc0, 0xd2, 0x63, 0x88, 0xcf, 0x2c,
-  0x41, 0x1c, 0x18, 0xe1, 0x1e, 0xf0, 0x99, 0x25, 0x88, 0x83, 0x81, 0x16,
-  0x43, 0x5b, 0x03, 0x8c, 0x0d, 0x88, 0x37, 0x10, 0xe0, 0xc0, 0x35, 0xda,
-  0xe0, 0x82, 0x61, 0x2e, 0x30, 0xea, 0x36, 0xa3, 0x4e, 0x37, 0x86, 0xb9,
-  0x75, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40,
-  0x10, 0x0c, 0x34, 0x3e, 0x49, 0x13, 0x13, 0xb9, 0x93, 0xd1, 0x84, 0x00,
-  0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22,
-  0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x6b, 0x54, 0xe0, 0x24,
-  0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0x48, 0x25, 0x4e,
-  0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xab, 0x54, 0xe4,
-  0x24, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0x58, 0x25,
-  0x4e, 0x60, 0x24, 0xf0, 0x13, 0x30, 0xe1, 0x93, 0xd1, 0x84, 0x00, 0x98,
-  0x25, 0x90, 0x83, 0x81, 0x16, 0x43, 0x34, 0x3a, 0x35, 0xe2, 0x54, 0xe2,
-  0x13, 0xe2, 0x40, 0x8d, 0xc0, 0x60, 0xc4, 0xc0, 0x01, 0x40, 0x10, 0x0c,
-  0x1a, 0x57, 0x59, 0x93, 0x1a, 0x71, 0x91, 0x50, 0x09, 0xc0, 0x04, 0x4c,
-  0xc0, 0x64, 0x4c, 0xfe, 0x64, 0x96, 0x60, 0x84, 0x86, 0x1b, 0xfe, 0x22,
-  0x54, 0xc0, 0x60, 0x96, 0x81, 0x0e, 0x62, 0x22, 0x18, 0x31, 0x30, 0x00,
-  0x10, 0x04, 0x83, 0xc7, 0x55, 0xdc, 0xa4, 0x1f, 0x46, 0x0c, 0x0c, 0x00,
-  0x04, 0xc1, 0xe0, 0x79, 0x95, 0x37, 0xe9, 0x07, 0x13, 0x74, 0x04, 0x3e,
-  0x26, 0xec, 0x08, 0x7c, 0x46, 0x13, 0x6a, 0x64, 0x18, 0x6e, 0x08, 0x4e,
-  0x05, 0x0c, 0x66, 0x19, 0xea, 0xe0, 0x0e, 0x82, 0xe1, 0x08, 0xa3, 0x47,
-  0x86, 0xef, 0x8e, 0x61, 0x86, 0x1b, 0x82, 0x19, 0x21, 0x83, 0x1a, 0x02,
-  0x1d, 0x8e, 0x48, 0xc2, 0x64, 0xf8, 0x2a, 0x10, 0xf4, 0x96, 0x61, 0x86,
-  0x1b, 0x02, 0x1b, 0x21, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0xb0, 0x83, 0x55,
-  0x08, 0x4e, 0x3f, 0x86, 0xb9, 0x95, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40,
-  0x10, 0x0c, 0x34, 0x5d, 0x39, 0x15, 0x32, 0xa9, 0x95, 0xd1, 0x84, 0x00,
-  0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22,
-  0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x2b, 0x5c, 0x5c, 0xe5,
-  0x20, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0xc4, 0xe5, 0x55,
-  0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x6b, 0x5c, 0x60,
-  0x45, 0x22, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0xd4, 0xe5,
-  0x55, 0xdc, 0x24, 0xe0, 0x15, 0x3f, 0xd1, 0x95, 0xd1, 0x84, 0x00, 0x98,
-  0x25, 0x58, 0x85, 0xe1, 0x06, 0x2c, 0x5c, 0xc0, 0x60, 0x96, 0x01, 0x0f,
-  0xf2, 0x20, 0x28, 0x34, 0x89, 0x15, 0xb8, 0xc0, 0xa8, 0x11, 0x83, 0x03,
-  0x00, 0x41, 0x30, 0x98, 0xd6, 0x45, 0x56, 0x3e, 0x3b, 0x19, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x83, 0x89, 0x5d, 0x64, 0x25, 0x10, 0x2e, 0x18, 0xa6,
-  0xd6, 0xc4, 0x56, 0xe0, 0x02, 0xa3, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
-  0x60, 0x82, 0x97, 0x5b, 0x19, 0x83, 0x3d, 0x19, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x83, 0x29, 0x5e, 0x6e, 0x25, 0x10, 0x2e, 0x18, 0xe6, 0x02, 0xa3,
-  0xee, 0x30, 0xea, 0x68, 0x64, 0x98, 0x2b, 0x8b, 0x61, 0x8e, 0x18, 0xe6,
-  0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x40, 0xb3, 0x97, 0x71,
-  0x01, 0x95, 0x78, 0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d,
-  0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0xb8, 0xfa, 0x45, 0x5d, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x83, 0xcb, 0x5f, 0xd6, 0x25, 0x21, 0x82, 0x11, 0x03, 0x04,
-  0x00, 0x41, 0x30, 0xb8, 0xfe, 0x85, 0x5d, 0x12, 0x22, 0x18, 0x31, 0x50,
-  0x00, 0x10, 0x04, 0x83, 0xc5, 0x64, 0xd6, 0x45, 0x55, 0x02, 0x7c, 0xd1,
-  0x15, 0x7b, 0x19, 0x4d, 0x08, 0x80, 0x59, 0x82, 0x55, 0x18, 0x6e, 0xa0,
-  0x03, 0x7e, 0x01, 0x83, 0x59, 0x06, 0x3d, 0x58, 0x85, 0xc0, 0xf6, 0xa4,
-  0x4f, 0xe2, 0x33, 0x1c, 0xa1, 0x07, 0x7e, 0x42, 0x7c, 0xb3, 0x0c, 0x7b,
-  0xe0, 0x07, 0x81, 0xfd, 0xc9, 0x1e, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17,
-  0x0c, 0x73, 0x81, 0x51, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11,
-  0x26, 0xa3, 0xc3, 0x0d, 0x01, 0xc9, 0x80, 0xc1, 0x2c, 0x03, 0x1f, 0xf4,
-  0x41, 0x60, 0xc3, 0xa9, 0xc0, 0x67, 0x96, 0x40, 0x14, 0xcc, 0x54, 0x88,
-  0xf8, 0xcc, 0x12, 0x88, 0xc2, 0x70, 0x44, 0x29, 0x9c, 0x8a, 0xf0, 0xcd,
-  0x32, 0xfc, 0x81, 0x28, 0x04, 0x66, 0x0a, 0xa8, 0x12, 0x1f, 0x0b, 0x1c,
-  0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20,
-  0x3e, 0x45, 0xc4, 0x8c, 0x0e, 0x37, 0x04, 0x2f, 0x03, 0x06, 0xb3, 0x0c,
-  0xa0, 0x10, 0x0a, 0x81, 0xc1, 0xca, 0x10, 0x9f, 0x59, 0x02, 0x51, 0x30,
-  0x62, 0x56, 0xe0, 0x33, 0x4b, 0x20, 0x0a, 0x03, 0x2d, 0x86, 0xc6, 0x07,
-  0x58, 0x1f, 0x10, 0xa0, 0x20, 0x84, 0x02, 0x58, 0xf8, 0xc1, 0x05, 0xc3,
-  0x98, 0xac, 0xd8, 0x4a, 0x7c, 0x86, 0x23, 0x64, 0xe1, 0x56, 0x88, 0x6f,
-  0x96, 0x61, 0x14, 0x4c, 0x21, 0x30, 0x5c, 0x99, 0x85, 0xf8, 0x58, 0x30,
-  0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04,
-  0xf1, 0x29, 0xe2, 0x67, 0x74, 0xb8, 0x21, 0xe8, 0x19, 0x30, 0x98, 0x65,
-  0x20, 0x85, 0x52, 0x08, 0x6c, 0x00, 0x17, 0xf8, 0xcc, 0x12, 0xa8, 0x82,
-  0xf5, 0x0a, 0x11, 0x9f, 0x59, 0x02, 0x55, 0x18, 0x8e, 0xe8, 0x05, 0x5f,
-  0x11, 0xbe, 0x59, 0x86, 0x53, 0x50, 0x85, 0xc0, 0x7c, 0xe1, 0x57, 0xe2,
-  0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x22, 0xf9,
-  0x58, 0x11, 0xc4, 0xa7, 0x08, 0xb5, 0xd1, 0xe1, 0x86, 0x00, 0x6d, 0xc0,
-  0x60, 0x96, 0x01, 0x15, 0x52, 0x21, 0xb0, 0x73, 0x19, 0xe2, 0x33, 0x4b,
-  0xa0, 0x0a, 0x46, 0xb0, 0x0b, 0x7c, 0x66, 0x09, 0x54, 0x61, 0xa0, 0xc5,
-  0xd0, 0x48, 0x01, 0x2b, 0x05, 0x02, 0x15, 0x84, 0x54, 0x80, 0x0d, 0x53,
-  0xb8, 0x60, 0x98, 0x0b, 0x8c, 0xba, 0xcd, 0xa8, 0xc3, 0x95, 0x61, 0x2e,
-  0x3d, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x03, 0x4d, 0x6f, 0xce, 0x86, 0x64, 0xea, 0x66, 0x34, 0x21, 0x00,
-  0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88,
-  0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x0a, 0x1d, 0xb7, 0x49,
-  0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x2e, 0xd1, 0x79, 0x9b,
-  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x1a, 0x1d, 0xb8,
-  0x49, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x16, 0xd5, 0x79,
-  0x1b, 0x97, 0x09, 0xf8, 0xc6, 0x67, 0xf4, 0x66, 0x34, 0x21, 0x00, 0x66,
-  0x09, 0x56, 0x61, 0xa0, 0xc5, 0x10, 0x0d, 0x3b, 0x30, 0xb3, 0x3a, 0x50,
-  0x09, 0x3c, 0x10, 0x54, 0xc1, 0xcc, 0xf2, 0x60, 0x96, 0x81, 0x15, 0x5c,
-  0x01, 0x1f, 0x86, 0x23, 0xf6, 0x81, 0x66, 0x86, 0xef, 0xf8, 0x61, 0x98,
-  0xe1, 0x86, 0x40, 0x65, 0xc8, 0xa0, 0x86, 0x40, 0x87, 0x23, 0x40, 0x02,
-  0x67, 0x86, 0xaf, 0x02, 0x41, 0x4f, 0x24, 0x86, 0x19, 0x6e, 0x08, 0x5a,
-  0x86, 0x0c, 0x2a, 0x18, 0x74, 0x96, 0xa1, 0x15, 0xc4, 0x21, 0xb8, 0x78,
-  0x19, 0xe6, 0x44, 0x64, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xd0,
-  0x62, 0xc7, 0x6f, 0x76, 0x86, 0x75, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41,
-  0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x38, 0x64, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0x2e, 0xdc, 0x29, 0x9d, 0x83, 0x08, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xca, 0x1d, 0xd3, 0x61, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x2e, 0xdd, 0x39, 0x1d, 0x89, 0x08,
-  0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x09, 0x1f, 0xd3, 0x29, 0x9b,
-  0x60, 0x76, 0xea, 0x26, 0x76, 0x46, 0x13, 0x02, 0x60, 0x96, 0x40, 0x1c,
-  0x86, 0x1b, 0x5e, 0x02, 0x77, 0xc0, 0x60, 0x96, 0xe1, 0x15, 0x60, 0x21,
-  0xa8, 0x9f, 0x41, 0x1d, 0xb8, 0xc0, 0xa8, 0x11, 0x83, 0x03, 0x00, 0x41,
-  0x30, 0x98, 0xc4, 0x27, 0x75, 0x68, 0xa2, 0x6d, 0x46, 0x0c, 0x0e, 0x00,
-  0x04, 0xc1, 0x60, 0x1a, 0x9f, 0xd4, 0x09, 0x84, 0x0b, 0x86, 0x29, 0xb1,
-  0x69, 0x1d, 0xb8, 0xc0, 0xa8, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98,
-  0xce, 0xc7, 0x75, 0x74, 0x42, 0x6e, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
-  0x60, 0x42, 0x1f, 0xd7, 0x09, 0x84, 0x0b, 0x86, 0xb9, 0xc0, 0xa8, 0x3b,
-  0x8c, 0xba, 0x95, 0x19, 0xe6, 0x78, 0x64, 0x98, 0x23, 0x86, 0x39, 0x62,
-  0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xd0, 0xda, 0x47, 0x77, 0xee,
-  0x06, 0x7d, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06,
-  0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10,
-  0x0c, 0x2e, 0xfa, 0x09, 0x9f, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04,
-  0xc1, 0xe0, 0xaa, 0x1f, 0xf1, 0x49, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40,
-  0x10, 0x0c, 0x2e, 0xfb, 0x19, 0x9f, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00,
-  0x04, 0xc1, 0x60, 0xe9, 0x1f, 0xf1, 0x09, 0x9d, 0xe0, 0x7d, 0x62, 0xa7,
-  0x7d, 0x46, 0x13, 0x02, 0x60, 0x96, 0x40, 0x1c, 0x86, 0x1b, 0xd6, 0x62,
-  0x7e, 0xc0, 0x60, 0x96, 0x21, 0x16, 0xc4, 0x21, 0x30, 0xb9, 0xa1, 0x9b,
-  0xf8, 0x0c, 0x47, 0xbc, 0x45, 0xdd, 0x10, 0xdf, 0x2c, 0x83, 0x2c, 0xd4,
-  0x42, 0x60, 0x76, 0x03, 0x17, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3,
-  0x5c, 0x60, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0xff,
-  0xe8, 0x70, 0x43, 0xb0, 0x3f, 0x60, 0x30, 0xcb, 0x30, 0x0b, 0xb4, 0x10,
-  0xd8, 0xe0, 0x37, 0xf0, 0x99, 0x25, 0xc8, 0x05, 0xeb, 0x1b, 0x22, 0x3e,
-  0xb3, 0x04, 0xb9, 0x30, 0x1c, 0xa1, 0x17, 0x7e, 0x23, 0x7c, 0xb3, 0x0c,
-  0xb6, 0x90, 0x0b, 0x81, 0xed, 0xc5, 0xdf, 0xc4, 0xc7, 0x02, 0x87, 0x3e,
-  0x17, 0x0c, 0x73, 0x81, 0x51, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f,
-  0x11, 0x28, 0xa4, 0xc3, 0x0d, 0x81, 0x09, 0x81, 0xc1, 0x2c, 0xc3, 0x2d,
-  0xe0, 0x42, 0x60, 0xa7, 0x33, 0xc4, 0x67, 0x96, 0x20, 0x17, 0x8c, 0x50,
-  0x1d, 0xf8, 0xcc, 0x12, 0xe4, 0xc2, 0x40, 0x8b, 0xa1, 0xcd, 0x02, 0x46,
-  0x0b, 0xc4, 0x2d, 0x08, 0xb8, 0xa0, 0x32, 0xb5, 0x70, 0xc1, 0x30, 0x96,
-  0x3a, 0xad, 0x13, 0x9f, 0xe1, 0x88, 0xd4, 0x70, 0x1d, 0xe2, 0x9b, 0x65,
-  0xd0, 0x85, 0x5e, 0x08, 0xec, 0x75, 0x54, 0x23, 0x3e, 0x16, 0x0c, 0xf4,
-  0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c,
-  0x8a, 0xb0, 0x21, 0x1d, 0x6e, 0x08, 0x68, 0x08, 0x0c, 0x66, 0x19, 0x76,
-  0x81, 0x17, 0x02, 0x1b, 0x6e, 0x07, 0x3e, 0xb3, 0x04, 0xe1, 0x60, 0xb4,
-  0x43, 0xc4, 0x67, 0x96, 0x20, 0x1c, 0x86, 0x23, 0x68, 0xa3, 0x76, 0x84,
-  0x6f, 0x96, 0xc1, 0x17, 0xc2, 0x21, 0xb0, 0xda, 0xb0, 0x9d, 0xf8, 0x58,
-  0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x82, 0x48, 0x3e, 0x56,
-  0x04, 0xf1, 0x29, 0x22, 0x8c, 0x74, 0xb8, 0x21, 0xf8, 0x21, 0x30, 0x98,
-  0x65, 0xf8, 0x05, 0x70, 0x08, 0xcc, 0x77, 0x86, 0xf8, 0xcc, 0x12, 0x84,
-  0x83, 0x11, 0xe3, 0x03, 0x9f, 0x59, 0x82, 0x70, 0x18, 0x68, 0x31, 0xb4,
-  0x5d, 0xc0, 0x78, 0x81, 0xf8, 0x05, 0x01, 0x1c, 0xfc, 0xa6, 0x17, 0x2e,
-  0x18, 0xe6, 0x02, 0xa3, 0x6e, 0x33, 0xea, 0x5e, 0x67, 0x98, 0x03, 0x97,
-  0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
-  0x40, 0x8b, 0x23, 0x1f, 0xda, 0x1f, 0x36, 0x1a, 0x4d, 0x08, 0x80, 0xd1,
-  0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91,
-  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0xf0, 0xa8, 0x8c, 0x12, 0x22,
-  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x2b, 0x8f, 0xcc, 0x28, 0x21,
-  0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0xf4, 0xe8, 0x8c, 0x12,
-  0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x25, 0x94, 0xcc, 0xa8,
-  0x84, 0x82, 0x39, 0xaa, 0xa1, 0x38, 0x1a, 0x4d, 0x08, 0x80, 0x59, 0x02,
-  0x71, 0x18, 0x68, 0x31, 0x44, 0xa3, 0x15, 0x68, 0x30, 0x60, 0x05, 0x95,
-  0x78, 0x05, 0x21, 0x1c, 0x68, 0x30, 0x80, 0x85, 0x59, 0x86, 0x71, 0x28,
-  0x87, 0xf7, 0x18, 0x8e, 0xa0, 0x8f, 0x15, 0x1a, 0xbe, 0xab, 0x8f, 0x61,
-  0x86, 0x1b, 0x82, 0x10, 0x22, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0xa8, 0x8f,
-  0x17, 0x1a, 0xbe, 0x0a, 0x04, 0xbd, 0xfb, 0x18, 0x66, 0xb8, 0x21, 0x20,
-  0x21, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x06, 0x72, 0xc8, 0x87, 0xe0, 0xd0,
-  0x67, 0x98, 0xcb, 0x97, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x40,
-  0x43, 0xa5, 0x3a, 0x92, 0xa1, 0x51, 0x1a, 0x4d, 0x08, 0x80, 0xd1, 0x04,
-  0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90, 0x11,
-  0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0x5e, 0x89, 0x8f, 0x0e, 0x22, 0x18,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x0b, 0x96, 0xfa, 0x88, 0x21, 0x82,
-  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0x62, 0xc9, 0x8f, 0x24, 0x22,
-  0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x05, 0x97, 0xfa, 0x88, 0x87,
-  0x02, 0x55, 0x62, 0x23, 0x54, 0x1a, 0x4d, 0x08, 0x80, 0x59, 0x82, 0x7c,
-  0x18, 0x6e, 0x30, 0x91, 0x57, 0x02, 0x83, 0x59, 0x06, 0x73, 0x38, 0x87,
-  0xa0, 0x6c, 0xe8, 0x8f, 0xe0, 0x02, 0xa3, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x60, 0xca, 0x25, 0x50, 0x6a, 0x11, 0x32, 0x1a, 0x31, 0x38, 0x00,
-  0x10, 0x04, 0x83, 0x49, 0x97, 0x40, 0x29, 0x10, 0x2e, 0x18, 0xa6, 0x72,
-  0x88, 0x94, 0xe0, 0x02, 0xa3, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60,
-  0xf2, 0xa5, 0x52, 0x7a, 0x91, 0x34, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x83, 0xe9, 0x97, 0x4a, 0x29, 0x10, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0xee,
-  0x30, 0xea, 0x44, 0x68, 0x98, 0x9b, 0x99, 0x61, 0x8e, 0x18, 0xe6, 0x88,
-  0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x40, 0x23, 0xa7, 0x58, 0x72,
-  0xa3, 0x5f, 0x1a, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18,
-  0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0xb8, 0xd6, 0x09, 0x97, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0x8b, 0x9d, 0x72, 0x29, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0xb8, 0xda, 0x49, 0x97, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00,
-  0x10, 0x04, 0x83, 0x85, 0x9e, 0x72, 0x09, 0x8f, 0x02, 0x73, 0x42, 0x25,
-  0x72, 0x1a, 0x4d, 0x08, 0x80, 0x59, 0x82, 0x7c, 0x18, 0x6e, 0x10, 0x13,
-  0x75, 0x02, 0x83, 0x59, 0x06, 0x74, 0xc8, 0x87, 0xc0, 0xd2, 0x68, 0x8d,
-  0xe2, 0x33, 0x1c, 0x81, 0x26, 0x6c, 0x44, 0x7c, 0xb3, 0x0c, 0xe9, 0xc0,
-  0x0e, 0x81, 0xb5, 0x51, 0x9a, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c,
-  0x73, 0x81, 0x51, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xf4,
-  0xa4, 0xc3, 0x0d, 0x81, 0x3c, 0x81, 0xc1, 0x2c, 0x83, 0x3a, 0xac, 0x43,
-  0x60, 0x43, 0x1d, 0xc1, 0x67, 0x96, 0x00, 0x1e, 0x8c, 0x8e, 0x88, 0xf8,
-  0xcc, 0x12, 0xc0, 0xc3, 0x70, 0xc4, 0x9c, 0xd4, 0x91, 0xf0, 0xcd, 0x32,
-  0xb4, 0x03, 0x3c, 0x04, 0x46, 0x27, 0x76, 0x14, 0x1f, 0x0b, 0x1c, 0xfa,
-  0x5c, 0x30, 0xcc, 0x05, 0x46, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e,
-  0x45, 0xfc, 0x93, 0x0e, 0x37, 0x04, 0xfd, 0x04, 0x06, 0xb3, 0x0c, 0xee,
-  0xf0, 0x0e, 0x81, 0xf9, 0xd1, 0x10, 0x9f, 0x59, 0x02, 0x78, 0x30, 0x22,
-  0x94, 0xe0, 0x33, 0x4b, 0x00, 0x0f, 0x03, 0x2d, 0x86, 0xa6, 0x0e, 0xd8,
-  0x3a, 0x10, 0xee, 0x20, 0xbc, 0x03, 0x3f, 0xb1, 0xc3, 0x05, 0xc3, 0x18,
-  0x28, 0x91, 0x52, 0x7c, 0x86, 0x23, 0xfc, 0xa4, 0x94, 0x88, 0x6f, 0x96,
-  0x21, 0x1e, 0xe8, 0x21, 0x30, 0x53, 0xfa, 0x93, 0xf8, 0x58, 0x30, 0xd0,
-  0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1,
-  0x29, 0xa2, 0xa5, 0x74, 0xb8, 0x21, 0x58, 0x29, 0x30, 0x98, 0x65, 0x90,
-  0x87, 0x79, 0x08, 0x6c, 0x70, 0x25, 0xf8, 0xcc, 0x12, 0xe0, 0x83, 0xad,
-  0x12, 0x11, 0x9f, 0x59, 0x02, 0x7c, 0x18, 0x8e, 0x48, 0x15, 0x56, 0x12,
-  0xbe, 0x59, 0x86, 0x7a, 0xc0, 0x87, 0xc0, 0x54, 0xa5, 0x95, 0xe2, 0x63,
-  0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x22, 0xf9, 0x58,
-  0x11, 0xc4, 0xa7, 0x08, 0x9c, 0xd2, 0xe1, 0x86, 0xc0, 0xa6, 0xc0, 0x60,
-  0x96, 0xc1, 0x1e, 0xee, 0x21, 0xb0, 0x5a, 0x1a, 0xe2, 0x33, 0x4b, 0x80,
-  0x0f, 0x46, 0xe8, 0x12, 0x7c, 0x66, 0x09, 0xf0, 0x61, 0xa0, 0xc5, 0xd0,
-  0xe4, 0x01, 0x9b, 0x07, 0xc2, 0x1e, 0x84, 0x7b, 0x40, 0x2b, 0x7a, 0xb8,
-  0x60, 0x98, 0x0b, 0x8c, 0xba, 0xcd, 0xa8, 0x33, 0xa5, 0x61, 0xee, 0x76,
-  0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x03, 0x0d, 0xad, 0x6a, 0x4a, 0x9e, 0xc6, 0x6a, 0x34, 0x21, 0x00, 0x46,
-  0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x7a, 0x2b, 0x9e, 0x4a, 0x88,
-  0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x2e, 0xb8, 0xea, 0xa9, 0x84,
-  0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x8a, 0x2b, 0x9f, 0x4a,
-  0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x16, 0xbc, 0xea, 0x29,
-  0x7e, 0x0a, 0xd4, 0x8a, 0xa5, 0xd0, 0x6a, 0x34, 0x21, 0x00, 0x66, 0x09,
-  0xf2, 0x61, 0xa0, 0xc5, 0x10, 0x0d, 0x72, 0x10, 0xd1, 0x60, 0x1c, 0x54,
-  0xc2, 0x1c, 0x04, 0x7c, 0x10, 0xd1, 0xe0, 0x1c, 0x66, 0x19, 0xf4, 0x81,
-  0x1f, 0xcc, 0x65, 0x38, 0x22, 0x5d, 0x44, 0x6a, 0xf8, 0x4e, 0x5d, 0x86,
-  0x19, 0x6e, 0x08, 0xf0, 0x89, 0x0c, 0x6a, 0x08, 0x74, 0x38, 0x82, 0x5d,
-  0x4c, 0x6a, 0xf8, 0x2a, 0x10, 0xf4, 0xdc, 0x65, 0x98, 0xe1, 0x86, 0x60,
-  0x9f, 0xc8, 0xa0, 0x82, 0x41, 0x67, 0x19, 0xf6, 0x01, 0x26, 0x82, 0xfb,
-  0xa5, 0x61, 0x0e, 0x7e, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03,
-  0xed, 0xaf, 0xd8, 0x2a, 0xa5, 0xf4, 0x6a, 0x34, 0x21, 0x00, 0x46, 0x13,
-  0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x43, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x32, 0xad, 0xb9, 0x3a, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xae, 0xd3, 0xa2, 0x2b, 0x86, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x42, 0xad, 0xba, 0x92, 0x88,
-  0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x96, 0xd7, 0xa2, 0xab, 0x99,
-  0x0a, 0x42, 0x6b, 0xac, 0xfe, 0x6a, 0x34, 0x21, 0x00, 0x66, 0x09, 0x60,
-  0x62, 0xb8, 0xa1, 0x5f, 0x4c, 0x0b, 0x0c, 0x66, 0x19, 0xfa, 0xc1, 0x1f,
-  0x82, 0x6a, 0x29, 0xbb, 0x82, 0x0b, 0x8c, 0x1a, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x83, 0x09, 0xb6, 0xee, 0x4a, 0x64, 0x76, 0x6a, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0xa6, 0xd8, 0xba, 0xab, 0x40, 0xb8, 0x60, 0x98, 0x82,
-  0xa9, 0xbd, 0x82, 0x0b, 0x8c, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83,
-  0xa9, 0xb6, 0xf8, 0xca, 0x64, 0xc0, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10,
-  0x0c, 0x26, 0xdb, 0xe2, 0xab, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x8c, 0xba,
-  0xc3, 0xa8, 0xcb, 0xa7, 0x61, 0x4e, 0x85, 0x86, 0x39, 0x62, 0x98, 0x23,
-  0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x6d, 0xb7, 0x50, 0xab,
-  0xac, 0x6c, 0x6b, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61,
-  0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04,
-  0xc1, 0xe0, 0x12, 0xaf, 0xd7, 0x4a, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40,
-  0x10, 0x0c, 0xae, 0xf1, 0x82, 0xad, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00,
-  0x04, 0xc1, 0xe0, 0x22, 0xaf, 0xd8, 0x4a, 0x88, 0x60, 0xc4, 0x40, 0x01,
-  0x40, 0x10, 0x0c, 0x96, 0xf5, 0x82, 0xad, 0xb7, 0x0a, 0x7a, 0xeb, 0xaf,
-  0x76, 0x6b, 0x34, 0x21, 0x00, 0x66, 0x09, 0x60, 0x62, 0xb8, 0x21, 0x67,
-  0xc2, 0x0b, 0x0c, 0x66, 0x19, 0xfe, 0x01, 0x26, 0x02, 0x03, 0x2b, 0xb1,
-  0x8a, 0xcf, 0x70, 0x44, 0xcf, 0x8c, 0x15, 0xf1, 0xcd, 0x32, 0x80, 0xc4,
-  0x48, 0x04, 0x46, 0x56, 0x3e, 0x13, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30,
-  0xcc, 0x05, 0x46, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xac,
-  0x97, 0x0e, 0x37, 0x04, 0xe9, 0x05, 0x06, 0xb3, 0x0c, 0x21, 0x21, 0x12,
-  0x81, 0x0d, 0x6c, 0x05, 0x9f, 0x59, 0x82, 0x93, 0xb0, 0xb5, 0x22, 0xe2,
-  0x33, 0x4b, 0x70, 0x12, 0xc3, 0x11, 0x68, 0xc3, 0x56, 0xc2, 0x37, 0xcb,
-  0x40, 0x12, 0x27, 0x11, 0x58, 0xda, 0xb4, 0x55, 0x7c, 0x2c, 0x70, 0xe8,
-  0x73, 0xc1, 0x30, 0x17, 0x18, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8,
-  0x14, 0x61, 0x5f, 0x3a, 0xdc, 0x10, 0xd0, 0x17, 0x18, 0xcc, 0x32, 0x94,
-  0x84, 0x49, 0x04, 0x56, 0x57, 0x43, 0x7c, 0x66, 0x09, 0x4e, 0xc2, 0x08,
-  0xbc, 0x82, 0xcf, 0x2c, 0xc1, 0x49, 0x0c, 0xb4, 0x18, 0x5a, 0x48, 0x60,
-  0x22, 0x41, 0x94, 0x84, 0x60, 0x12, 0xe6, 0x36, 0x12, 0x17, 0x0c, 0x63,
-  0x77, 0xb5, 0x57, 0xf1, 0x19, 0x8e, 0xa8, 0x1b, 0xbe, 0x22, 0xbe, 0x59,
-  0x06, 0x94, 0x58, 0x89, 0xc0, 0xfa, 0xca, 0x6e, 0xe2, 0x63, 0xc1, 0x40,
-  0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4,
-  0xa7, 0x08, 0x12, 0xd3, 0xe1, 0x86, 0x40, 0xc4, 0xc0, 0x60, 0x96, 0x21,
-  0x25, 0x54, 0x22, 0xb0, 0xa1, 0xb4, 0xe0, 0x33, 0x4b, 0xf0, 0x12, 0x26,
-  0x5a, 0x44, 0x7c, 0x66, 0x09, 0x5e, 0x62, 0x38, 0x02, 0x74, 0x46, 0x4b,
-  0xf8, 0x66, 0x19, 0x58, 0xe2, 0x25, 0x02, 0x0b, 0x1d, 0xd2, 0x8a, 0x8f,
-  0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x88, 0xe4, 0x63,
-  0x45, 0x10, 0x9f, 0x22, 0x5e, 0x4c, 0x87, 0x1b, 0x82, 0x16, 0x03, 0x83,
-  0x59, 0x86, 0x96, 0x70, 0x89, 0xc0, 0x58, 0x6b, 0x88, 0xcf, 0x2c, 0xc1,
-  0x4b, 0x18, 0x11, 0x5b, 0xf0, 0x99, 0x25, 0x78, 0x89, 0x81, 0x16, 0x43,
-  0x4b, 0x09, 0x4c, 0x25, 0x88, 0x96, 0x10, 0x5c, 0xc2, 0xe6, 0x56, 0xe2,
-  0x82, 0x61, 0x2e, 0x30, 0xea, 0x36, 0xa3, 0xae, 0xaf, 0x86, 0x39, 0x57,
-  0x1a, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10,
-  0x0c, 0xb4, 0x1f, 0x63, 0xb1, 0xf4, 0xd2, 0xb1, 0xd1, 0x84, 0x00, 0x18,
-  0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12,
-  0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xcb, 0xcc, 0x66, 0x2c, 0x21,
-  0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0xce, 0x8c, 0xc6, 0x12,
-  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x0b, 0xcd, 0x6a, 0x2c,
-  0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0xde, 0x8c, 0xc6,
-  0xe6, 0x2b, 0x08, 0xb3, 0x11, 0xfb, 0xb1, 0xd1, 0x84, 0x00, 0x98, 0x25,
-  0x80, 0x89, 0x81, 0x16, 0x43, 0x34, 0xf6, 0x01, 0x7e, 0x03, 0x7d, 0x50,
-  0x89, 0x7e, 0x10, 0x5e, 0x02, 0x7e, 0x03, 0x7f, 0x18, 0x31, 0x30, 0x00,
-  0x10, 0x04, 0x83, 0x07, 0xce, 0x5e, 0xec, 0x97, 0x0c, 0x3e, 0xce, 0x25,
-  0x3e, 0x26, 0x04, 0xf2, 0xb1, 0x20, 0x5d, 0xe0, 0x63, 0x05, 0x3f, 0xc4,
-  0xc7, 0x8a, 0x40, 0x3e, 0x16, 0xf8, 0x03, 0x7c, 0x46, 0x0c, 0x0c, 0x00,
-  0x04, 0xc1, 0xe0, 0xb9, 0x33, 0x1b, 0x2b, 0x27, 0x13, 0x8a, 0xf8, 0x58,
-  0x20, 0xc8, 0xc7, 0x82, 0x03, 0x3e, 0x23, 0x06, 0x0e, 0x00, 0x82, 0x60,
-  0xd0, 0xf0, 0x59, 0x8e, 0x8d, 0x18, 0x7f, 0xbd, 0x59, 0xe0, 0x62, 0x2e,
-  0xe6, 0x62, 0x31, 0xd6, 0x66, 0xb3, 0x04, 0x23, 0x34, 0xdc, 0xd0, 0x56,
-  0x71, 0x06, 0x06, 0xb3, 0x0c, 0x32, 0x31, 0x42, 0xc1, 0x88, 0x81, 0x01,
-  0x80, 0x20, 0x18, 0x3c, 0x7c, 0xc6, 0x63, 0xeb, 0x64, 0xc1, 0x8b, 0xc1,
-  0x67, 0xc4, 0xc0, 0x00, 0x40, 0x10, 0x0c, 0x1e, 0x3f, 0xf3, 0x31, 0x76,
-  0xb2, 0x20, 0xc6, 0xe0, 0x33, 0x9a, 0x30, 0x62, 0xc3, 0x70, 0x43, 0x50,
-  0x67, 0x60, 0x30, 0xcb, 0x30, 0x13, 0x35, 0x11, 0x0c, 0x47, 0x14, 0x2b,
-  0x36, 0x7c, 0x67, 0x0c, 0x33, 0xdc, 0x10, 0x84, 0x18, 0x19, 0xd4, 0x10,
-  0xe8, 0x70, 0xc4, 0xf1, 0x62, 0xc3, 0x57, 0x81, 0xa0, 0x97, 0x0c, 0x33,
-  0xdc, 0x10, 0x90, 0x18, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0x03, 0x4d, 0xa4,
-  0x45, 0x70, 0xe8, 0x35, 0xcc, 0xe5, 0xd3, 0x30, 0x23, 0x06, 0x07, 0x00,
-  0x82, 0x60, 0xa0, 0xa1, 0x5a, 0x9d, 0xc9, 0xd8, 0xa8, 0x8d, 0x26, 0x04,
-  0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14,
-  0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xaf, 0xc6, 0x67,
-  0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x05, 0x6b, 0x7d,
-  0xc6, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xb1, 0xe6,
-  0x67, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x82, 0x6b,
-  0x7d, 0xc6, 0x63, 0x81, 0xaa, 0xb1, 0x19, 0xaa, 0x8d, 0x26, 0x04, 0xc0,
-  0x2c, 0x41, 0x5a, 0x0c, 0x37, 0x60, 0xaf, 0x06, 0x06, 0xb3, 0x0c, 0x36,
-  0x71, 0x13, 0x41, 0xd9, 0xd8, 0x9f, 0xc1, 0x05, 0x46, 0x8d, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0xc1, 0x94, 0x6b, 0xa0, 0xe6, 0x91, 0xd9, 0x88, 0xc1,
-  0x01, 0x80, 0x20, 0x18, 0x4c, 0xba, 0x06, 0x6a, 0x81, 0x70, 0xc1, 0x30,
-  0x95, 0x63, 0xa4, 0x06, 0x17, 0x18, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x93, 0xaf, 0x95, 0x5a, 0x18, 0xa4, 0xd9, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x4c, 0xbf, 0x56, 0x6a, 0x81, 0x70, 0xc1, 0x30, 0x17, 0x18,
-  0x75, 0x87, 0x51, 0x27, 0x62, 0xc3, 0xdc, 0x4c, 0x0d, 0x73, 0xc4, 0x30,
-  0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x1a, 0xb9, 0xc5,
-  0x9a, 0x9b, 0xfd, 0xda, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68,
-  0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0xc1, 0xb5, 0x6e, 0xb8, 0x96, 0x10, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x5c, 0xec, 0x96, 0x6b, 0x09, 0x11, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0xc1, 0xd5, 0x6e, 0xba, 0x96, 0x10, 0xc1, 0x88, 0x81,
-  0x02, 0x80, 0x20, 0x18, 0x2c, 0xf4, 0x96, 0x6b, 0x78, 0x16, 0x98, 0x1b,
-  0xaa, 0x91, 0xdb, 0x68, 0x42, 0x00, 0xcc, 0x12, 0xa4, 0xc5, 0x70, 0x03,
-  0x1d, 0xa8, 0x1b, 0x18, 0xcc, 0x32, 0xe0, 0x44, 0x5a, 0x04, 0x96, 0x66,
-  0x6b, 0x16, 0x9f, 0xe1, 0x88, 0x3c, 0x60, 0x33, 0xe2, 0x9b, 0x65, 0xc8,
-  0x09, 0x9e, 0x08, 0xac, 0xcd, 0xf4, 0x20, 0x3e, 0x16, 0x0c, 0xf4, 0xb9,
-  0x60, 0x98, 0x0b, 0x8c, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a,
-  0xa0, 0x37, 0x1d, 0x6e, 0x08, 0xe4, 0x0d, 0x0c, 0x66, 0x19, 0x74, 0x62,
-  0x27, 0x02, 0x1b, 0xea, 0x0c, 0x3e, 0xb3, 0x04, 0x60, 0x61, 0x74, 0x46,
-  0xc4, 0x67, 0x96, 0x00, 0x2c, 0x86, 0x23, 0x48, 0xa1, 0xce, 0x84, 0x6f,
-  0x96, 0xa1, 0x27, 0xc0, 0x22, 0xb0, 0x52, 0xb0, 0xb3, 0xf8, 0x58, 0xe0,
-  0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04,
-  0xf1, 0x29, 0xe2, 0xdf, 0x74, 0xb8, 0x21, 0xe8, 0x37, 0x30, 0x98, 0x65,
-  0xf0, 0x89, 0x9f, 0x08, 0xcc, 0xcf, 0x86, 0xf8, 0xcc, 0x12, 0x80, 0x85,
-  0x11, 0xa1, 0x06, 0x9f, 0x59, 0x02, 0xb0, 0x18, 0x68, 0x31, 0x34, 0x9d,
-  0xc0, 0x76, 0x82, 0xf0, 0x09, 0xe1, 0x27, 0x7c, 0x82, 0x27, 0x2e, 0x18,
-  0xc6, 0x40, 0x8d, 0xd4, 0xe2, 0x33, 0x1c, 0x01, 0x0b, 0xa5, 0x46, 0x7c,
-  0xb3, 0x0c, 0x61, 0x41, 0x16, 0x81, 0x99, 0x5a, 0x2c, 0xc4, 0xc7, 0x82,
-  0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x16, 0x18, 0xf2, 0xb1, 0x22,
-  0x88, 0x4f, 0x11, 0x2d, 0xa7, 0xc3, 0x0d, 0xc1, 0xca, 0x81, 0xc1, 0x2c,
-  0x83, 0x58, 0x8c, 0x45, 0x60, 0x83, 0xab, 0xc1, 0x67, 0x96, 0x00, 0x2d,
-  0x6c, 0xd5, 0x88, 0xf8, 0xcc, 0x12, 0xa0, 0xc5, 0x70, 0xc4, 0x2e, 0xb0,
-  0x9a, 0xf0, 0xcd, 0x32, 0x94, 0x05, 0x5a, 0x04, 0xc6, 0x0b, 0xad, 0x16,
-  0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0x59, 0x10, 0xc9,
-  0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xe0, 0x9c, 0x0e, 0x37, 0x04, 0x36, 0x07,
-  0x06, 0xb3, 0x0c, 0x66, 0x71, 0x16, 0x81, 0xd5, 0xda, 0x10, 0x9f, 0x59,
-  0x02, 0xb4, 0x30, 0x42, 0xd7, 0xe0, 0x33, 0x4b, 0x80, 0x16, 0x03, 0x2d,
-  0x86, 0x26, 0x16, 0xd8, 0x58, 0x10, 0x66, 0x21, 0x9c, 0x05, 0x6b, 0x90,
-  0xc5, 0x05, 0xc3, 0x5c, 0x60, 0xd4, 0x6d, 0x46, 0x9d, 0xa9, 0x0d, 0x73,
-  0xb7, 0x35, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x68, 0x68, 0x57, 0x73, 0xf2, 0x36, 0x76, 0xa3, 0x09, 0x01,
-  0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45,
-  0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd7, 0xdb, 0xf1, 0x5c,
-  0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xc1, 0x5d, 0xcf,
-  0x25, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x57, 0xdc, 0xf9,
-  0x5c, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xe0, 0x5d,
-  0xcf, 0xf1, 0x5b, 0xa0, 0x76, 0x2c, 0x87, 0x76, 0xa3, 0x09, 0x01, 0x30,
-  0x4b, 0x90, 0x16, 0x03, 0x2d, 0x86, 0x68, 0xd0, 0x84, 0x88, 0x07, 0x33,
-  0xa1, 0x12, 0x36, 0x21, 0xa0, 0x85, 0x88, 0x07, 0x37, 0x31, 0xcb, 0xa0,
-  0x16, 0x6c, 0x81, 0x0f, 0xc3, 0x11, 0xfd, 0x20, 0x72, 0xc3, 0x77, 0xfe,
-  0x30, 0xcc, 0x70, 0x43, 0x80, 0x6f, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11,
-  0xfe, 0x60, 0x72, 0xc3, 0x57, 0x81, 0xa0, 0x07, 0x12, 0xc3, 0x0c, 0x37,
-  0x04, 0xfb, 0x46, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0xb0, 0x16, 0xa0, 0x11,
-  0xdc, 0xaf, 0x0d, 0x73, 0xf0, 0x35, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20,
-  0x18, 0x68, 0x7f, 0xc7, 0x76, 0x29, 0xa7, 0x77, 0xa3, 0x09, 0x01, 0x30,
-  0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x1c,
-  0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x97, 0xe9, 0xcd, 0xdd, 0x41,
-  0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0x9d, 0x1e, 0xdd, 0x31,
-  0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x17, 0xea, 0xd5, 0x9d,
-  0x44, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xbc, 0x1e, 0xdd,
-  0xcd, 0x5c, 0x10, 0x7a, 0x63, 0xf7, 0x77, 0xa3, 0x09, 0x01, 0x30, 0x4b,
-  0x00, 0x1a, 0xc3, 0x0d, 0x2f, 0x61, 0x7a, 0x60, 0x30, 0xcb, 0xd0, 0x16,
-  0x6e, 0x11, 0x54, 0xcb, 0xd9, 0x1d, 0x5c, 0x60, 0xd4, 0x88, 0xc1, 0x01,
-  0x80, 0x20, 0x18, 0x4c, 0xb0, 0x77, 0x77, 0x36, 0xb1, 0x73, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x30, 0xc5, 0xde, 0xdd, 0x05, 0xc2, 0x05, 0xc3,
-  0x14, 0xcc, 0xed, 0x1d, 0x5c, 0x60, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20,
-  0x18, 0x4c, 0xb5, 0xc7, 0x77, 0x38, 0x01, 0x76, 0x23, 0x06, 0x07, 0x00,
-  0x82, 0x60, 0x30, 0xd9, 0x1e, 0xdf, 0x05, 0xc2, 0x05, 0xc3, 0x5c, 0x60,
-  0xd4, 0x1d, 0x46, 0x5d, 0xbe, 0x0d, 0x73, 0x2a, 0x36, 0xcc, 0x11, 0xc3,
-  0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x68, 0xbb, 0x87,
-  0x7a, 0x65, 0x67, 0x7b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3,
-  0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x97, 0xf8, 0xbd, 0x5e, 0x42, 0x04, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0x70, 0x8d, 0x1f, 0xec, 0x25, 0x44, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0x17, 0xf9, 0xc5, 0x5e, 0x42, 0x04, 0x23, 0x06,
-  0x0a, 0x00, 0x82, 0x60, 0xb0, 0xac, 0x1f, 0xec, 0xbd, 0x5d, 0xd0, 0x7b,
-  0x7f, 0xb7, 0x7b, 0xa3, 0x09, 0x01, 0x30, 0x4b, 0x00, 0x1a, 0xc3, 0x0d,
-  0x6b, 0x11, 0x7e, 0x60, 0x30, 0xcb, 0xf0, 0x16, 0xa0, 0x11, 0x18, 0xd8,
-  0x89, 0x5d, 0x7c, 0x86, 0x23, 0xe2, 0x62, 0xec, 0x88, 0x6f, 0x96, 0x01,
-  0x2e, 0xe6, 0x22, 0x30, 0xb2, 0x93, 0x8b, 0xf8, 0x58, 0x30, 0xd0, 0xe7,
-  0x82, 0x61, 0x2e, 0x30, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29,
-  0x62, 0xfd, 0x74, 0xb8, 0x21, 0x48, 0x3f, 0x30, 0x98, 0x65, 0x88, 0x0b,
-  0xb9, 0x08, 0x6c, 0x60, 0x3b, 0xf8, 0xcc, 0x12, 0xdc, 0x85, 0xad, 0x1d,
-  0x11, 0x9f, 0x59, 0x82, 0xbb, 0x18, 0x8e, 0xe0, 0x0b, 0xb6, 0x13, 0xbe,
-  0x59, 0x06, 0xba, 0xb8, 0x8b, 0xc0, 0xfa, 0xa2, 0xed, 0xe2, 0x63, 0x81,
-  0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x22, 0xf9, 0x58, 0x11,
-  0xc4, 0xa7, 0x08, 0xfb, 0xd3, 0xe1, 0x86, 0x80, 0xfe, 0xc0, 0x60, 0x96,
-  0xa1, 0x2e, 0xec, 0x22, 0xb0, 0xba, 0x1b, 0xe2, 0x33, 0x4b, 0x70, 0x17,
-  0x46, 0xe0, 0x1d, 0x7c, 0x66, 0x09, 0xee, 0x62, 0xa0, 0xc5, 0xd0, 0xe2,
-  0x02, 0x93, 0x0b, 0xa2, 0x2e, 0x04, 0xbb, 0x70, 0x99, 0xb9, 0xb8, 0x60,
-  0x18, 0xbb, 0xbb, 0xbd, 0x8b, 0xcf, 0x70, 0xc4, 0x69, 0xf0, 0x1d, 0xf1,
-  0xcd, 0x32, 0xe0, 0xc5, 0x5e, 0x04, 0xd6, 0x77, 0xa8, 0x11, 0x1f, 0x0b,
-  0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0x59, 0x60, 0xc8, 0xc7, 0x8a,
-  0x20, 0x3e, 0x45, 0x90, 0x60, 0xa0, 0xc3, 0x0d, 0x81, 0x08, 0x06, 0x60,
-  0x30, 0xcb, 0x90, 0x17, 0x7a, 0x11, 0xd8, 0x50, 0x7a, 0xf0, 0x99, 0x25,
-  0xf8, 0x0b, 0x13, 0x3d, 0x22, 0x3e, 0xb3, 0x04, 0x7f, 0x31, 0x1c, 0x21,
-  0x1b, 0xa3, 0x27, 0x7c, 0xb3, 0x0c, 0x7c, 0xf1, 0x17, 0x81, 0xcd, 0x06,
-  0xe9, 0xc5, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x16,
-  0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x2f, 0x18, 0xe8, 0x70, 0x43,
-  0xd0, 0x82, 0x01, 0x18, 0xcc, 0x32, 0xf4, 0x85, 0x5f, 0x04, 0xc6, 0x7a,
-  0x43, 0x7c, 0x66, 0x09, 0xfe, 0xc2, 0x88, 0xd8, 0x83, 0xcf, 0x2c, 0xc1,
-  0x5f, 0x0c, 0xb4, 0x18, 0x5a, 0x5e, 0x60, 0x7a, 0x41, 0xf4, 0x85, 0xe0,
-  0x17, 0x7a, 0xb3, 0x17, 0x17, 0x0c, 0x73, 0x81, 0x51, 0xb7, 0x19, 0x75,
-  0x7d, 0x37, 0xcc, 0xb9, 0xda, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0xa0, 0xfd, 0x60, 0xc0, 0x82, 0x41, 0xfa,
-  0xe9, 0x60, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30,
-  0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0x70, 0x99, 0x61, 0x30, 0x83, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0x70, 0x9d, 0x61, 0x40, 0x83, 0x41, 0x42, 0x04, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xa1, 0x61, 0x50, 0x83, 0x41, 0x42,
-  0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xbc, 0x61, 0x40, 0x83,
-  0xc1, 0xfc, 0x05, 0x61, 0x18, 0x8c, 0x60, 0xf0, 0x83, 0xc1, 0x68, 0x42,
-  0x00, 0xcc, 0x12, 0x80, 0xc6, 0x40, 0x8b, 0x21, 0x1a, 0x6b, 0x01, 0xff,
-  0x81, 0x5a, 0xa8, 0x44, 0x5b, 0x08, 0x7f, 0x01, 0xff, 0x81, 0x5b, 0x18,
-  0x7d, 0xe8, 0x1f, 0x7c, 0x66, 0x19, 0x42, 0x63, 0x34, 0xe0, 0x63, 0x38,
-  0x22, 0xd0, 0xbf, 0xe1, 0x3b, 0x61, 0x98, 0xe1, 0x86, 0x00, 0xfe, 0xc8,
-  0xa0, 0x86, 0x40, 0x87, 0x23, 0xec, 0xc3, 0xff, 0x86, 0xaf, 0x02, 0x41,
-  0x0f, 0x3f, 0x86, 0x19, 0x6e, 0x08, 0xe6, 0x8f, 0x0c, 0x2a, 0x18, 0x74,
-  0x96, 0x41, 0x34, 0x6e, 0x23, 0xb8, 0xdb, 0x1b, 0xe6, 0xd0, 0x6d, 0x98,
-  0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xd0, 0xee, 0x30, 0x20, 0xc3, 0x20,
-  0x04, 0x03, 0x39, 0x0c, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46,
-  0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x38, 0x64, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0x2e, 0x3f, 0x0c, 0xd6, 0x30, 0x38, 0x88, 0x60, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0xae, 0x3f, 0x0c, 0xd8, 0x30, 0x60, 0x88,
-  0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x2e, 0x50, 0x0c, 0xda, 0x30,
-  0x90, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x96, 0x53, 0x0c,
-  0xd8, 0x30, 0x58, 0xc1, 0x20, 0xc8, 0xc3, 0x60, 0x07, 0x83, 0x3b, 0x0c,
-  0x46, 0x13, 0x02, 0x60, 0x96, 0xe0, 0x36, 0x86, 0x1b, 0x4e, 0xc4, 0x0f,
-  0x03, 0x30, 0x98, 0x65, 0x20, 0x8d, 0xd2, 0x08, 0xaa, 0x04, 0x03, 0x37,
-  0x0c, 0xe0, 0x02, 0xa3, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x42,
-  0xc5, 0xe0, 0x0d, 0x03, 0x6d, 0x06, 0x83, 0x11, 0x83, 0x03, 0x00, 0x41,
-  0x30, 0x98, 0x52, 0x31, 0x78, 0xc3, 0x20, 0x10, 0x2e, 0x18, 0xa6, 0x50,
-  0x30, 0x98, 0xc3, 0x00, 0x2e, 0x30, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10,
-  0x0c, 0xa6, 0x56, 0x0c, 0xe8, 0x30, 0x80, 0x11, 0x1c, 0x0c, 0x46, 0x0c,
-  0x0e, 0x00, 0x04, 0xc1, 0x60, 0x72, 0xc5, 0x80, 0x0e, 0x83, 0x40, 0xb8,
-  0x60, 0x98, 0x0b, 0x8c, 0xba, 0xc3, 0xa8, 0x8b, 0xbf, 0x61, 0x4e, 0xe4,
-  0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x03, 0x6d, 0x16, 0x03, 0x50, 0x0c, 0x7a, 0x30, 0x70, 0xc5, 0x60, 0x34,
-  0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88,
-  0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xd2, 0xc5,
-  0xe0, 0x14, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0,
-  0xda, 0xc5, 0x00, 0x15, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04,
-  0xc1, 0xe0, 0xe2, 0xc5, 0x20, 0x15, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x14,
-  0x00, 0x04, 0xc1, 0x60, 0x19, 0xc7, 0x00, 0x15, 0x83, 0x33, 0x0c, 0x82,
-  0x5a, 0x0c, 0xee, 0x30, 0x98, 0xc5, 0x60, 0x34, 0x21, 0x00, 0x66, 0x09,
-  0x6e, 0x63, 0xb8, 0x61, 0x4c, 0x72, 0x31, 0x00, 0x83, 0x59, 0x06, 0xd3,
-  0xb8, 0x8d, 0xc0, 0x70, 0x30, 0xd0, 0xc1, 0x20, 0x3e, 0xc3, 0x11, 0x75,
-  0xb0, 0x83, 0x01, 0xf1, 0xcd, 0x32, 0x9c, 0x86, 0x6a, 0x04, 0xc6, 0x83,
-  0x81, 0x1d, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51,
-  0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xe3, 0x18, 0xe8, 0x70,
-  0x43, 0x10, 0x8e, 0x01, 0x18, 0xcc, 0x32, 0xa0, 0x46, 0x6a, 0x04, 0x36,
-  0x90, 0x61, 0x00, 0x9f, 0x59, 0x02, 0xd7, 0xb0, 0x31, 0x0c, 0x88, 0xf8,
-  0xcc, 0x12, 0xb8, 0xc6, 0x70, 0x04, 0x28, 0x90, 0x61, 0x20, 0x7c, 0xb3,
-  0x0c, 0xab, 0xe1, 0x1a, 0x81, 0x85, 0x42, 0x19, 0x06, 0xf1, 0xb1, 0xc0,
-  0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08,
-  0xe2, 0x53, 0x84, 0x3b, 0x06, 0x3a, 0xdc, 0x10, 0xb0, 0x63, 0x00, 0x06,
-  0xb3, 0x0c, 0xac, 0xd1, 0x1a, 0x81, 0xb5, 0x61, 0x30, 0xc4, 0x67, 0x96,
-  0xc0, 0x35, 0x8c, 0x80, 0xc3, 0x00, 0x3e, 0xb3, 0x04, 0xae, 0x31, 0xd0,
-  0x62, 0x68, 0xa8, 0x81, 0xa5, 0x06, 0xc1, 0x1a, 0x42, 0x6b, 0xd8, 0x84,
-  0x6a, 0x5c, 0x30, 0x8c, 0xbd, 0x61, 0x30, 0x87, 0x41, 0x7c, 0x86, 0x23,
-  0xfe, 0x84, 0x0e, 0x03, 0xe2, 0x9b, 0x65, 0x78, 0x0d, 0xd9, 0x08, 0xac,
-  0x0e, 0x03, 0x50, 0x89, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02,
-  0xa3, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xf8, 0x31, 0xd0,
-  0xe1, 0x86, 0x40, 0x1f, 0x03, 0x30, 0x98, 0x65, 0x80, 0x8d, 0xd8, 0x08,
-  0x6c, 0xe8, 0xc3, 0x00, 0x3e, 0xb3, 0x04, 0xb6, 0x61, 0x7a, 0x18, 0x10,
-  0xf1, 0x99, 0x25, 0xb0, 0x8d, 0xe1, 0x08, 0x55, 0xd9, 0xc3, 0x40, 0xf8,
-  0x66, 0x19, 0x66, 0xc3, 0x36, 0x02, 0x5b, 0x15, 0x3e, 0x0c, 0xe2, 0x63,
-  0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x22, 0xf9, 0x58,
-  0x11, 0xc4, 0xa7, 0x88, 0x93, 0x0c, 0x74, 0xb8, 0x21, 0x28, 0xc9, 0x00,
-  0x0c, 0x66, 0x19, 0x68, 0xa3, 0x36, 0x02, 0x23, 0xc5, 0x60, 0x88, 0xcf,
-  0x2c, 0x81, 0x6d, 0x18, 0x91, 0x8a, 0x01, 0x7c, 0x66, 0x09, 0x6c, 0x63,
-  0xa0, 0xc5, 0xd0, 0x60, 0x03, 0x8b, 0x0d, 0x82, 0x36, 0x84, 0xda, 0x50,
-  0x2b, 0xd9, 0xb8, 0x60, 0x98, 0x0b, 0x8c, 0xba, 0xcd, 0xa8, 0xab, 0xc3,
-  0x60, 0x98, 0x33, 0xbd, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c,
-  0x0e, 0x00, 0x04, 0xc1, 0x40, 0xbb, 0xc9, 0x80, 0x24, 0x83, 0x70, 0x0c,
-  0x64, 0x32, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18,
-  0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0xb8, 0x7c, 0x32, 0x58, 0xc9, 0x20, 0x21, 0x82, 0x11, 0x03, 0x04,
-  0x00, 0x41, 0x30, 0xb8, 0x7e, 0x32, 0x60, 0xc9, 0x20, 0x21, 0x82, 0x11,
-  0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0xc0, 0x32, 0x68, 0xc9, 0x20, 0x21,
-  0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0xce, 0x32, 0x60, 0xc9,
-  0x60, 0x1d, 0x83, 0x20, 0x27, 0x83, 0x7d, 0x0c, 0x6e, 0x32, 0x18, 0x4d,
-  0x08, 0x80, 0x59, 0x82, 0xdb, 0x18, 0x68, 0x31, 0x44, 0x43, 0x34, 0xc0,
-  0x53, 0x08, 0x0d, 0x95, 0x20, 0x0d, 0xc1, 0x36, 0xc0, 0x53, 0x28, 0x0d,
-  0x63, 0x17, 0x7a, 0x0c, 0xe0, 0x33, 0xcb, 0x80, 0x1b, 0xba, 0x81, 0x2e,
-  0xc3, 0x11, 0x81, 0x3c, 0x06, 0xc3, 0x77, 0xc2, 0x30, 0xc3, 0x0d, 0x01,
-  0x3a, 0x06, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0xee, 0x62, 0x8f, 0xc1,
-  0xf0, 0x55, 0x20, 0xe8, 0xc1, 0xcb, 0x30, 0xc3, 0x0d, 0xc1, 0x3a, 0x06,
-  0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c, 0xb9, 0xe1, 0x1e, 0xc1, 0xbd, 0x62,
-  0x30, 0xcc, 0x81, 0xdf, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0,
-  0xbd, 0x65, 0xc0, 0x93, 0x41, 0x3e, 0x06, 0x6a, 0x19, 0x8c, 0x26, 0x04,
-  0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14,
-  0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0x76, 0x19, 0x8c,
-  0x65, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0x77,
-  0x19, 0x90, 0x65, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x5c, 0x78, 0x19, 0x94, 0x65, 0x20, 0x11, 0xc1, 0x88, 0x81, 0x02, 0x80,
-  0x20, 0x18, 0x2c, 0x7f, 0x19, 0x90, 0x65, 0x30, 0x92, 0x41, 0x10, 0x97,
-  0xc1, 0x4c, 0x06, 0x6f, 0x19, 0x8c, 0x26, 0x04, 0xc0, 0x2c, 0x81, 0x7b,
-  0x0c, 0x37, 0xfc, 0x8b, 0x5d, 0x06, 0x60, 0x30, 0xcb, 0xb0, 0x1b, 0xbc,
-  0x11, 0x54, 0x3f, 0x06, 0x66, 0x19, 0xc0, 0x05, 0x46, 0x8d, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0xc1, 0x04, 0x9a, 0xc1, 0x59, 0x06, 0xda, 0x4a, 0x06,
-  0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30, 0x85, 0x66, 0x70, 0x96, 0x41,
-  0x20, 0x5c, 0x30, 0x4c, 0x81, 0x64, 0xb0, 0x96, 0x01, 0x5c, 0x60, 0xd4,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0xa5, 0x19, 0xb0, 0x65, 0x80,
-  0x32, 0x30, 0x19, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x64, 0x9a,
-  0x01, 0x5b, 0x06, 0x81, 0x70, 0xc1, 0x30, 0x17, 0x18, 0x75, 0x87, 0x51,
-  0x97, 0x8e, 0xc1, 0x30, 0xa7, 0x7f, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3,
-  0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0xb6, 0x9a, 0x01, 0x5e, 0x06,
-  0x35, 0x19, 0x98, 0x66, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30,
-  0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0x70, 0xc9, 0x66, 0xf0, 0x97, 0x41, 0x42, 0x04, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xcd, 0x66, 0x00, 0x9a, 0x41, 0x42,
-  0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xd1, 0x66, 0x10, 0x9a,
-  0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xec, 0x66,
-  0x00, 0x9a, 0xc1, 0x4f, 0x06, 0x41, 0x6b, 0x06, 0x6f, 0x19, 0xac, 0x66,
-  0x30, 0x9a, 0x10, 0x00, 0xb3, 0x04, 0xee, 0x31, 0xdc, 0xb0, 0x33, 0xb1,
-  0x19, 0x80, 0xc1, 0x2c, 0x43, 0x6f, 0xb8, 0x47, 0x60, 0x30, 0x19, 0xc8,
-  0x64, 0x10, 0x9f, 0xe1, 0x88, 0x3a, 0x98, 0xc9, 0x80, 0xf8, 0x66, 0x19,
-  0x7c, 0x23, 0x3c, 0x02, 0xa3, 0xc9, 0xc0, 0x0e, 0xe2, 0x63, 0xc1, 0x40,
-  0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4,
-  0xa7, 0x88, 0xdd, 0x0c, 0x74, 0xb8, 0x21, 0xc8, 0xcd, 0x00, 0x0c, 0x66,
-  0x19, 0x7e, 0x03, 0x3c, 0x02, 0x1b, 0x78, 0x32, 0x80, 0xcf, 0x2c, 0x41,
-  0x79, 0xd8, 0x4e, 0x06, 0x44, 0x7c, 0x66, 0x09, 0xca, 0x63, 0x38, 0x02,
-  0x14, 0x78, 0x32, 0x10, 0xbe, 0x59, 0x06, 0xf1, 0x28, 0x8f, 0xc0, 0x42,
-  0xa1, 0x27, 0x83, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30,
-  0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xc2, 0x3c, 0x03, 0x1d,
-  0x6e, 0x08, 0xc8, 0x33, 0x00, 0x83, 0x59, 0x86, 0xf1, 0x20, 0x8f, 0xc0,
-  0xca, 0x32, 0x18, 0xe2, 0x33, 0x4b, 0x50, 0x1e, 0x46, 0xa0, 0x65, 0x00,
-  0x9f, 0x59, 0x82, 0xf2, 0x18, 0x68, 0x31, 0xb4, 0xdf, 0xc0, 0xc0, 0x83,
-  0x18, 0x0f, 0x81, 0x3c, 0x6c, 0x22, 0x3c, 0x2e, 0x18, 0xc6, 0xce, 0x32,
-  0x58, 0xcb, 0x20, 0x3e, 0xc3, 0x11, 0x77, 0xc3, 0x96, 0x01, 0xf1, 0xcd,
-  0x32, 0x98, 0x47, 0x7a, 0x04, 0xd6, 0x96, 0x01, 0xde, 0xc4, 0xc7, 0x82,
-  0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x16, 0x18, 0xf2, 0xb1, 0x22,
-  0x88, 0x4f, 0x11, 0xf4, 0x19, 0xe8, 0x70, 0x43, 0x20, 0x9f, 0x01, 0x18,
-  0xcc, 0x32, 0x9c, 0x07, 0x7a, 0x04, 0x36, 0xd4, 0x65, 0x00, 0x9f, 0x59,
-  0x82, 0xf6, 0x30, 0xb9, 0x0c, 0x88, 0xf8, 0xcc, 0x12, 0xb4, 0xc7, 0x70,
-  0x84, 0xe8, 0xcc, 0x65, 0x20, 0x7c, 0xb3, 0x0c, 0xea, 0xd1, 0x1e, 0x81,
-  0x8d, 0x0e, 0x5d, 0x06, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c,
-  0x60, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4, 0x7f, 0x06,
-  0x3a, 0xdc, 0x10, 0xf4, 0x67, 0x00, 0x06, 0xb3, 0x0c, 0xeb, 0xc1, 0x1e,
-  0x81, 0xf1, 0x65, 0x30, 0xc4, 0x67, 0x96, 0xa0, 0x3d, 0x8c, 0x08, 0xcd,
-  0x00, 0x3e, 0xb3, 0x04, 0xed, 0x31, 0xd0, 0x62, 0x68, 0xe7, 0x81, 0xa1,
-  0x07, 0xb1, 0x1e, 0x02, 0x7b, 0xe8, 0x5c, 0x7a, 0x5c, 0x30, 0xcc, 0x05,
-  0x46, 0xdd, 0x66, 0xd4, 0xb5, 0x65, 0x30, 0xcc, 0xf9, 0x61, 0x30, 0xcc,
-  0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x68,
-  0x2f, 0x1a, 0xf0, 0x67, 0x90, 0x9b, 0x81, 0x8a, 0x06, 0xa3, 0x09, 0x01,
-  0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45,
-  0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x97, 0x8d, 0x06, 0x23,
-  0x1a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd7, 0x8d,
-  0x06, 0x24, 0x1a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x17, 0x8e, 0x06, 0x25, 0x1a, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20,
-  0x08, 0x06, 0xcb, 0x8f, 0x06, 0x24, 0x1a, 0x8c, 0x67, 0x10, 0xc4, 0x68,
-  0x30, 0x9f, 0xc1, 0x8b, 0x06, 0xa3, 0x09, 0x01, 0x30, 0x4b, 0xe0, 0x1e,
-  0x03, 0x2d, 0x86, 0x68, 0xe4, 0x06, 0xec, 0x0a, 0xb8, 0xa1, 0x12, 0xbb,
-  0x21, 0xb4, 0x07, 0xec, 0x0a, 0xbc, 0x31, 0xcb, 0xf0, 0x1e, 0xf1, 0xf1,
-  0x3b, 0xc3, 0x11, 0xe3, 0x93, 0x9e, 0xc1, 0xf0, 0x1d, 0xf9, 0x0c, 0x33,
-  0xdc, 0x10, 0xfc, 0x66, 0x40, 0x06, 0x35, 0x04, 0x3a, 0x1c, 0x61, 0x3e,
-  0xed, 0x19, 0x0c, 0x5f, 0x05, 0x82, 0x1e, 0xfa, 0x0c, 0x33, 0xdc, 0x10,
-  0x88, 0x67, 0x40, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0x00, 0x1f, 0x25, 0x12,
-  0x9c, 0x69, 0x06, 0xc3, 0xdc, 0x2d, 0x06, 0xc3, 0x8c, 0x18, 0x1c, 0x00,
-  0x08, 0x82, 0x81, 0x66, 0xa6, 0xc1, 0x8c, 0x06, 0xf0, 0x19, 0x84, 0x69,
-  0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3,
-  0x09, 0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70,
-  0xb5, 0x69, 0xa0, 0xa3, 0xc1, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0x70, 0xb9, 0x69, 0xb0, 0xa3, 0x01, 0x43, 0x04, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0x70, 0xbd, 0x69, 0xc0, 0xa3, 0x81, 0x44, 0x04, 0x23,
-  0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xd8, 0x69, 0xb0, 0xa3, 0x81, 0x7e,
-  0x06, 0x01, 0x9a, 0x06, 0x2a, 0x1a, 0x98, 0x69, 0x30, 0x9a, 0x10, 0x00,
-  0xb3, 0x04, 0x25, 0x32, 0xdc, 0x60, 0x3f, 0x6d, 0x1a, 0x80, 0xc1, 0x2c,
-  0x83, 0x7c, 0xcc, 0x47, 0x50, 0xf4, 0x19, 0xf4, 0x68, 0x00, 0x17, 0x18,
-  0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xd3, 0x9d, 0x06, 0x3e, 0x1a,
-  0xf0, 0x8f, 0x88, 0x06, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30, 0xe1,
-  0x69, 0xe0, 0xa3, 0x41, 0x20, 0x5c, 0x30, 0x4c, 0xdd, 0x67, 0x20, 0xa6,
-  0x01, 0x5c, 0x60, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0x7c,
-  0x1a, 0x8c, 0x69, 0x00, 0x42, 0x27, 0x1a, 0x8c, 0x18, 0x1c, 0x00, 0x08,
-  0x82, 0xc1, 0xd4, 0xa7, 0xc1, 0x98, 0x06, 0x81, 0x70, 0xc1, 0x30, 0x17,
-  0x18, 0x75, 0x87, 0x51, 0x07, 0x9e, 0xc1, 0x30, 0x17, 0x8f, 0xc1, 0x30,
-  0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0,
-  0x89, 0x6a, 0xf0, 0xa6, 0x01, 0x8b, 0x06, 0x7d, 0x1a, 0x8c, 0x26, 0x04,
-  0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14,
-  0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xa9, 0x1a, 0xd8,
-  0x69, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xaa,
-  0x1a, 0xdc, 0x69, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x5c, 0xab, 0x1a, 0xe0, 0x69, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80,
-  0x20, 0x18, 0x2c, 0xb2, 0x1a, 0xdc, 0x69, 0x60, 0xa3, 0x41, 0x40, 0xaa,
-  0x81, 0x99, 0x06, 0xa2, 0x1a, 0x8c, 0x26, 0x04, 0xc0, 0x2c, 0x41, 0x89,
-  0x0c, 0x37, 0xc8, 0x10, 0xaa, 0x06, 0x60, 0x30, 0xcb, 0x40, 0x1f, 0x25,
-  0x12, 0xd8, 0x89, 0x06, 0x29, 0x1a, 0xc4, 0x67, 0x38, 0xe2, 0x86, 0x54,
-  0x34, 0x20, 0xbe, 0x59, 0x86, 0xfa, 0xc0, 0x8f, 0xc0, 0x56, 0x34, 0xc0,
-  0xa1, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x02,
-  0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x42, 0x56, 0x03, 0x1d, 0x6e, 0x08,
-  0x60, 0x35, 0x00, 0x83, 0x59, 0x06, 0xfb, 0xb8, 0x8f, 0xc0, 0x86, 0x19,
-  0x0d, 0xe0, 0x33, 0x4b, 0xc0, 0x1f, 0x26, 0xa3, 0x01, 0x11, 0x9f, 0x59,
-  0x02, 0xfe, 0x18, 0x8e, 0x10, 0xa3, 0x19, 0x0d, 0x84, 0x6f, 0x96, 0x21,
-  0x3f, 0xf8, 0x23, 0xb0, 0x31, 0xa2, 0xd1, 0x20, 0x3e, 0x16, 0x38, 0xf4,
-  0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c,
-  0x8a, 0xe8, 0xd5, 0x40, 0x87, 0x1b, 0x82, 0x5d, 0x0d, 0xc0, 0x60, 0x96,
-  0x41, 0x3f, 0xf6, 0x23, 0x30, 0x1e, 0x0d, 0x86, 0xf8, 0xcc, 0x12, 0xf0,
-  0x87, 0x11, 0x3f, 0x1a, 0xc0, 0x67, 0x96, 0x80, 0x3f, 0x06, 0x5a, 0x0c,
-  0xcd, 0x3e, 0xb0, 0xfb, 0x20, 0xf4, 0x43, 0xd8, 0x0f, 0x5d, 0x0c, 0xf0,
-  0xe3, 0x82, 0x61, 0xcc, 0x47, 0x03, 0x31, 0x0d, 0xe2, 0x33, 0x1c, 0xf1,
-  0x46, 0x63, 0x1a, 0x10, 0xdf, 0x2c, 0x43, 0x7f, 0x80, 0x48, 0x60, 0x64,
-  0x1a, 0xc0, 0x51, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18,
-  0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xb1, 0xae, 0x81, 0x0e,
-  0x37, 0x04, 0xe9, 0x1a, 0x80, 0xc1, 0x2c, 0x83, 0x7f, 0xfc, 0x47, 0x60,
-  0x03, 0x9b, 0x06, 0xf0, 0x99, 0x25, 0x20, 0x11, 0x4b, 0xd3, 0x80, 0x88,
-  0xcf, 0x2c, 0x01, 0x89, 0x0c, 0x47, 0xe8, 0x91, 0x9a, 0x06, 0xc2, 0x37,
-  0xcb, 0x10, 0x22, 0x24, 0x12, 0xd8, 0x1e, 0xad, 0x69, 0x10, 0x1f, 0x0b,
-  0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0x59, 0x10, 0xc9, 0xc7, 0x8a,
-  0x20, 0x3e, 0x45, 0xd8, 0x6b, 0xa0, 0xc3, 0x0d, 0x01, 0xbd, 0x06, 0x60,
-  0x30, 0xcb, 0x20, 0x22, 0x23, 0x12, 0xd8, 0x9c, 0x06, 0x43, 0x7c, 0x66,
-  0x09, 0x48, 0xc4, 0x08, 0x3c, 0x0d, 0xe0, 0x33, 0x4b, 0x40, 0x22, 0x03,
-  0x2d, 0x86, 0xe6, 0x1f, 0xd8, 0x7f, 0x10, 0x22, 0x22, 0x8c, 0x88, 0x4a,
-  0x06, 0x20, 0x72, 0xc1, 0x30, 0x17, 0x18, 0x75, 0x9b, 0x51, 0x47, 0xa6,
-  0xc1, 0x30, 0x57, 0x97, 0xc1, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0xa0, 0x99, 0x6c, 0x30, 0xaf, 0x01, 0xac,
-  0x06, 0x21, 0x1b, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26,
-  0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x5c, 0x2d, 0x1b, 0xe8, 0x6b, 0x90, 0x10, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x5c, 0x2e, 0x1b, 0xec, 0x6b, 0x90, 0x10, 0xc1,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0x2f, 0x1b, 0xf0, 0x6b, 0x90,
-  0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x36, 0x1b, 0xec,
-  0x6b, 0xa0, 0xab, 0x41, 0x80, 0xb2, 0x81, 0xba, 0x06, 0x26, 0x1b, 0x8c,
-  0x26, 0x04, 0xc0, 0x2c, 0x41, 0x89, 0x0c, 0xb4, 0x18, 0xa2, 0x01, 0x1f,
-  0x7e, 0x2d, 0xbc, 0x87, 0x4a, 0xc8, 0x87, 0x40, 0x22, 0x7e, 0x2d, 0xcc,
-  0xc7, 0x2c, 0x83, 0x89, 0xa0, 0x88, 0x2d, 0x0d, 0x47, 0xec, 0x12, 0xb8,
-  0x06, 0xc3, 0x77, 0xbc, 0x34, 0xcc, 0x70, 0x43, 0x60, 0xab, 0x01, 0x19,
-  0xd4, 0x10, 0xe8, 0x70, 0x44, 0x2f, 0x91, 0x6b, 0x30, 0x7c, 0x15, 0x08,
-  0x7a, 0xbf, 0x34, 0xcc, 0x70, 0x43, 0x90, 0xab, 0x01, 0x19, 0x54, 0x30,
-  0xe8, 0x2c, 0xc3, 0x89, 0xf0, 0x48, 0x70, 0x7d, 0x1a, 0x0c, 0x73, 0xae,
-  0x19, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x5a, 0xcf, 0x06,
-  0x2a, 0x1b, 0x9c, 0x6b, 0x80, 0xb3, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x45, 0xb6, 0x41, 0xcc, 0x06, 0x07,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x55, 0xb6, 0x81, 0xcc,
-  0x06, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x65, 0xb6,
-  0xc1, 0xcc, 0x06, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1,
-  0xd2, 0xb6, 0x81, 0xcc, 0x06, 0xf1, 0x1a, 0x04, 0x3f, 0x1b, 0x84, 0x6c,
-  0xd0, 0xb3, 0xc1, 0x68, 0x42, 0x00, 0xcc, 0x12, 0xf0, 0xc8, 0x70, 0x43,
-  0x3b, 0x91, 0x6d, 0x00, 0x06, 0xb3, 0x0c, 0x29, 0xa2, 0x22, 0x41, 0xad,
-  0x6b, 0x40, 0xb3, 0x01, 0x5c, 0x60, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20,
-  0x18, 0x4c, 0x6e, 0x1b, 0xd4, 0x6c, 0x40, 0x4f, 0xf9, 0x1a, 0x8c, 0x18,
-  0x1c, 0x00, 0x08, 0x82, 0xc1, 0xf4, 0xb6, 0x41, 0xcd, 0x06, 0x81, 0x70,
-  0xc1, 0x30, 0xe5, 0xae, 0x41, 0xce, 0x06, 0x70, 0x81, 0x51, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x30, 0xcd, 0x6d, 0xa0, 0xb3, 0xc1, 0x3d, 0xf9,
-  0x6b, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x13, 0xdd, 0x06, 0x3a,
-  0x1b, 0x04, 0xc2, 0x05, 0xc3, 0x5c, 0x60, 0xd4, 0x1d, 0x46, 0xdd, 0xad,
-  0x06, 0xc3, 0x1c, 0x7a, 0x06, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x96, 0xb7, 0x81, 0xd9, 0x06, 0x23,
-  0x1b, 0xd0, 0x6d, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a,
-  0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0x70, 0x81, 0x6e, 0xd0, 0xb6, 0x41, 0x42, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x70, 0x85, 0x6e, 0xe0, 0xb6, 0x41, 0x42, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0x89, 0x6e, 0xf0, 0xb6, 0x41,
-  0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xa4, 0x6e, 0xe0,
-  0xb6, 0x41, 0xcb, 0x06, 0xc1, 0xde, 0x06, 0x3d, 0x1b, 0xe4, 0x6d, 0x30,
-  0x9a, 0x10, 0x00, 0xb3, 0x04, 0x3c, 0x32, 0xdc, 0x90, 0x52, 0x7f, 0x1b,
-  0x80, 0xc1, 0x2c, 0xc3, 0x8a, 0xf0, 0x48, 0x60, 0xfe, 0x1a, 0x80, 0x6c,
-  0x10, 0x9f, 0xe1, 0x88, 0x97, 0x0a, 0xd9, 0x80, 0xf8, 0x66, 0x19, 0x58,
-  0xe4, 0x45, 0x02, 0x13, 0xd9, 0x00, 0xa6, 0xe2, 0x63, 0xc1, 0x40, 0x9f,
-  0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7,
-  0x88, 0xd4, 0x0d, 0x74, 0xb8, 0x21, 0x38, 0xdd, 0x00, 0x0c, 0x66, 0x19,
-  0x5a, 0xc4, 0x45, 0x02, 0x1b, 0x54, 0x36, 0x80, 0xcf, 0x2c, 0xc1, 0x8c,
-  0x58, 0xca, 0x06, 0x44, 0x7c, 0x66, 0x09, 0x66, 0x64, 0x38, 0x42, 0xa7,
-  0x54, 0x36, 0x10, 0xbe, 0x59, 0x06, 0x18, 0x99, 0x91, 0xc0, 0x76, 0x6a,
-  0x65, 0x83, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca,
-  0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x82, 0x76, 0x03, 0x1d, 0x6e,
-  0x08, 0x64, 0x37, 0x00, 0x83, 0x59, 0x86, 0x18, 0x91, 0x91, 0xc0, 0x66,
-  0x36, 0x18, 0xe2, 0x33, 0x4b, 0x30, 0x23, 0x46, 0xd8, 0x6c, 0x00, 0x9f,
-  0x59, 0x82, 0x19, 0x19, 0x68, 0x31, 0xb4, 0x16, 0xc1, 0x5c, 0x84, 0x88,
-  0x11, 0x41, 0x46, 0x54, 0x35, 0x78, 0x91, 0x0b, 0x86, 0xb1, 0x9a, 0x0d,
-  0x72, 0x36, 0x88, 0xcf, 0x70, 0x84, 0x59, 0xe9, 0x6c, 0x40, 0x7c, 0xb3,
-  0x0c, 0x34, 0x72, 0x23, 0x81, 0xed, 0x6c, 0x70, 0x56, 0xf1, 0xb1, 0x60,
-  0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08,
-  0xe2, 0x53, 0x84, 0xf8, 0x06, 0x3a, 0xdc, 0x10, 0x80, 0x6f, 0x00, 0x06,
-  0xb3, 0x0c, 0x35, 0x62, 0x23, 0x81, 0x0d, 0x63, 0x1b, 0xc0, 0x67, 0x96,
-  0x60, 0x47, 0x0c, 0x6c, 0x03, 0x22, 0x3e, 0xb3, 0x04, 0x3b, 0x32, 0x1c,
-  0x11, 0x57, 0x61, 0x1b, 0x08, 0xdf, 0x2c, 0x03, 0x8e, 0xec, 0x48, 0x60,
-  0x72, 0x25, 0xb6, 0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17,
-  0x18, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xd1, 0xbe, 0x81,
-  0x0e, 0x37, 0x04, 0xeb, 0x1b, 0x80, 0xc1, 0x2c, 0x43, 0x8e, 0xe8, 0x48,
-  0x60, 0x6a, 0x1b, 0x0c, 0xf1, 0x99, 0x25, 0xd8, 0x11, 0x23, 0xde, 0x36,
-  0x80, 0xcf, 0x2c, 0xc1, 0x8e, 0x0c, 0xb4, 0x18, 0x5a, 0x8d, 0x60, 0x36,
-  0x42, 0xe4, 0x88, 0xa0, 0x23, 0xf8, 0x1a, 0xdc, 0xc8, 0x05, 0xc3, 0x5c,
-  0x60, 0xd4, 0x6d, 0x46, 0xdd, 0xce, 0x06, 0xc3, 0x1c, 0x9b, 0x06, 0xc3,
-  0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81,
-  0xd6, 0xbf, 0x81, 0xfa, 0x06, 0xa7, 0x1b, 0xe0, 0x6f, 0x30, 0x9a, 0x10,
-  0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50,
-  0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0x91, 0x70, 0x10,
-  0xbf, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0x95,
-  0x70, 0x20, 0xbf, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0x70, 0x99, 0x70, 0x30, 0xbf, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00,
-  0x82, 0x60, 0xb0, 0xb4, 0x70, 0x20, 0xbf, 0x41, 0xec, 0x06, 0xc1, 0xff,
-  0x06, 0xe1, 0x1b, 0xf4, 0x6f, 0x30, 0x9a, 0x10, 0x00, 0xb3, 0x04, 0x3c,
-  0x32, 0xd0, 0x62, 0x88, 0xc6, 0x89, 0xb0, 0xbd, 0x60, 0x22, 0x2a, 0x91,
-  0x22, 0xc2, 0x8e, 0xb0, 0xbd, 0xa0, 0x22, 0xb3, 0x0c, 0x3d, 0xf2, 0x23,
-  0xad, 0x35, 0x1c, 0xe1, 0x3b, 0xb7, 0x1b, 0x0c, 0xdf, 0xfd, 0xce, 0x30,
-  0xc3, 0x0d, 0x41, 0xeb, 0x06, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0xb4,
-  0xb5, 0xbb, 0xc1, 0xf0, 0x55, 0x20, 0xe8, 0xd9, 0xd6, 0x30, 0xc3, 0x0d,
-  0x01, 0xec, 0x06, 0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c, 0x3e, 0x32, 0x27,
-  0xc1, 0xd1, 0x6d, 0x30, 0xcc, 0x95, 0x6a, 0x30, 0xcc, 0x88, 0xc1, 0x01,
-  0x80, 0x20, 0x18, 0x68, 0x34, 0x1c, 0x84, 0x70, 0xe0, 0xbb, 0xc1, 0x0b,
-  0x07, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30,
-  0x9a, 0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0xd7, 0x0e, 0x07, 0x28, 0x1c, 0x1c, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x17, 0x0f, 0x07, 0x29, 0x1c, 0x30, 0x44, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0x57, 0x0f, 0x07, 0x2a, 0x1c, 0x48, 0x44, 0x30,
-  0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x0b, 0x19, 0x07, 0x29, 0x1c, 0xa0,
-  0x6f, 0x10, 0xd8, 0x70, 0x80, 0xbf, 0x01, 0x0d, 0x07, 0xa3, 0x09, 0x01,
-  0x30, 0x4b, 0x30, 0x27, 0xc3, 0x0d, 0xe4, 0xb5, 0xc3, 0x01, 0x18, 0xcc,
-  0x32, 0x80, 0x49, 0x98, 0x04, 0x25, 0xbe, 0xc1, 0x0a, 0x07, 0x70, 0x81,
-  0x51, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30, 0x95, 0x71, 0xc0, 0xc2,
-  0xc1, 0xfd, 0xc0, 0x6f, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x93,
-  0x19, 0x07, 0x2c, 0x1c, 0x04, 0xc2, 0x05, 0xc3, 0x54, 0xf9, 0x06, 0x30,
-  0x1c, 0xc0, 0x05, 0x46, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0xa4,
-  0xc6, 0x41, 0x0c, 0x07, 0xee, 0x55, 0xbf, 0xc1, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x4c, 0x6b, 0x1c, 0xc4, 0x70, 0x10, 0x08, 0x17, 0x0c, 0x73,
-  0x81, 0x51, 0x77, 0x18, 0x75, 0xae, 0x1b, 0x0c, 0x73, 0xbf, 0x1a, 0x0c,
-  0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x1a, 0x1c, 0x07, 0x3d, 0x1c, 0xe8, 0x6f, 0xb0, 0xc6, 0xc1, 0x68, 0x42,
-  0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43,
-  0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x75, 0xc7, 0x01,
-  0x19, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x85,
-  0xc7, 0x41, 0x19, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0xc1, 0x95, 0xc7, 0x81, 0x19, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00,
-  0x08, 0x82, 0xc1, 0x02, 0xca, 0x41, 0x19, 0x07, 0x24, 0x1c, 0x04, 0x72,
-  0x1c, 0xd0, 0x70, 0x00, 0xc7, 0xc1, 0x68, 0x42, 0x00, 0xcc, 0x12, 0xcc,
-  0xc9, 0x70, 0x03, 0x88, 0xd9, 0x71, 0x00, 0x06, 0xb3, 0x0c, 0x62, 0x32,
-  0x27, 0x81, 0xd5, 0x6f, 0x70, 0xbf, 0x41, 0x7c, 0x86, 0x23, 0x64, 0x08,
-  0x7f, 0x03, 0xe2, 0x9b, 0x65, 0x18, 0x13, 0x33, 0x09, 0x2c, 0x7f, 0x83,
-  0x19, 0x8a, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c,
-  0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x40, 0x39, 0xd0, 0xe1, 0x86,
-  0xc0, 0x8f, 0x03, 0x30, 0x98, 0x65, 0x20, 0x93, 0x32, 0x09, 0x6c, 0x08,
-  0xe1, 0x00, 0x3e, 0xb3, 0x04, 0x6a, 0x62, 0x20, 0x1c, 0x10, 0xf1, 0x99,
-  0x25, 0x50, 0x93, 0xe1, 0x88, 0x1e, 0x0a, 0xe1, 0x40, 0xf8, 0x66, 0x19,
-  0xce, 0x44, 0x4d, 0x02, 0xf3, 0x21, 0x11, 0x0e, 0xe2, 0x63, 0x81, 0x43,
-  0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4,
-  0xa7, 0x88, 0x55, 0x0e, 0x74, 0xb8, 0x21, 0x48, 0xe5, 0x00, 0x0c, 0x66,
-  0x19, 0xd0, 0x24, 0x4d, 0x02, 0x53, 0xe1, 0x60, 0x88, 0xcf, 0x2c, 0x81,
-  0x9a, 0x18, 0xd1, 0xc2, 0x01, 0x7c, 0x66, 0x09, 0xd4, 0x64, 0xa0, 0xc5,
-  0xd0, 0xc8, 0x04, 0x2b, 0x13, 0x02, 0x4d, 0x84, 0x34, 0x81, 0xc5, 0xc0,
-  0x4c, 0x2e, 0x18, 0xc6, 0x58, 0x38, 0x80, 0xe1, 0x20, 0x3e, 0xc3, 0x11,
-  0x3d, 0x16, 0xc3, 0x01, 0xf1, 0xcd, 0x32, 0xac, 0x89, 0x9b, 0x04, 0x26,
-  0xc3, 0x81, 0x8f, 0xc5, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81,
-  0x51, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xb9, 0x1c, 0xe8,
-  0x70, 0x43, 0x70, 0xcb, 0x01, 0x18, 0xcc, 0x32, 0xb0, 0x49, 0x9b, 0x04,
-  0x36, 0xe8, 0x70, 0x00, 0x9f, 0x59, 0x02, 0x39, 0xb1, 0x1b, 0x0e, 0x88,
-  0xf8, 0xcc, 0x12, 0xc8, 0xc9, 0x70, 0x04, 0x9a, 0xe1, 0x70, 0x20, 0x7c,
-  0xb3, 0x0c, 0x6f, 0x22, 0x27, 0x81, 0xa5, 0x59, 0x0e, 0x07, 0xf1, 0xb1,
-  0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x91, 0x7c, 0xac,
-  0x08, 0xe2, 0x53, 0x04, 0x39, 0x07, 0x3a, 0xdc, 0x10, 0x88, 0x73, 0x00,
-  0x06, 0xb3, 0x0c, 0x70, 0x12, 0x27, 0x81, 0x85, 0x71, 0x30, 0xc4, 0x67,
-  0x96, 0x40, 0x4e, 0x8c, 0x30, 0xe3, 0x00, 0x3e, 0xb3, 0x04, 0x72, 0x32,
-  0xd0, 0x62, 0x68, 0x6c, 0x82, 0xb5, 0x09, 0x01, 0x27, 0x42, 0x9c, 0x98,
-  0x73, 0xe0, 0x26, 0x17, 0x0c, 0x73, 0x81, 0x51, 0xb7, 0x19, 0x75, 0x32,
-  0x1c, 0x0c, 0x73, 0x63, 0x1b, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x1a, 0x3d, 0x07, 0xe1, 0x1c, 0xf8,
-  0x71, 0xf0, 0xce, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68,
-  0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0xc1, 0xb5, 0xcf, 0x01, 0x3a, 0x07, 0x09, 0x11, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0xc1, 0xc5, 0xcf, 0x41, 0x3a, 0x07, 0x09, 0x11,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xd5, 0xcf, 0x81, 0x3a, 0x07,
-  0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x42, 0xd2, 0x41,
-  0x3a, 0x07, 0xa8, 0x1c, 0x04, 0xf6, 0x1c, 0xe0, 0x72, 0x40, 0xcf, 0xc1,
-  0x68, 0x42, 0x00, 0xcc, 0x12, 0xcc, 0xc9, 0x40, 0x8b, 0x21, 0x1a, 0x3e,
-  0xa2, 0x93, 0x43, 0x8f, 0xa8, 0x04, 0x98, 0x08, 0x72, 0xa2, 0x93, 0x43,
-  0x98, 0xcc, 0x32, 0xd0, 0x89, 0x9d, 0x90, 0xda, 0x70, 0x44, 0xef, 0xb8,
-  0x72, 0x30, 0x7c, 0xe7, 0x3b, 0xc3, 0x0c, 0x37, 0x04, 0xa4, 0x1c, 0x90,
-  0x41, 0x0d, 0x81, 0x0e, 0x47, 0xac, 0x9a, 0x2c, 0x07, 0xc3, 0x57, 0x81,
-  0xa0, 0xd7, 0x6a, 0xc3, 0x0c, 0x37, 0x04, 0xa7, 0x1c, 0x90, 0x41, 0x05,
-  0x83, 0xce, 0x32, 0xd4, 0x89, 0xaa, 0x04, 0xb7, 0xc6, 0xc1, 0x30, 0xc7,
-  0xb7, 0xc1, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0, 0xad, 0x74,
-  0x80, 0xcf, 0x41, 0x2d, 0x07, 0x26, 0x1d, 0x8c, 0x26, 0x04, 0xc0, 0x68,
-  0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0x32, 0x1d, 0xfc, 0x73, 0x70,
-  0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0x33, 0x1d, 0x80,
-  0x74, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0x34,
-  0x1d, 0x84, 0x74, 0x20, 0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18,
-  0x2c, 0x3b, 0x1d, 0x80, 0x74, 0xf0, 0xcb, 0x41, 0xd0, 0xd2, 0xc1, 0x3b,
-  0x07, 0x2b, 0x1d, 0x8c, 0x26, 0x04, 0xc0, 0x2c, 0x81, 0xaa, 0x0c, 0x37,
-  0xec, 0x9a, 0x4c, 0x07, 0x60, 0x30, 0xcb, 0x70, 0x27, 0x78, 0x12, 0x54,
-  0x2e, 0x07, 0x22, 0x1d, 0xc0, 0x05, 0x46, 0x8d, 0x18, 0x1c, 0x00, 0x08,
-  0x82, 0xc1, 0xc4, 0xd3, 0xc1, 0x48, 0x07, 0xf6, 0x73, 0xce, 0xc1, 0x88,
-  0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0x3d, 0x1d, 0x8c, 0x74, 0x10, 0x08,
-  0x17, 0x0c, 0x53, 0xbc, 0x1c, 0x9c, 0x74, 0x00, 0x17, 0x18, 0x35, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x53, 0x58, 0x07, 0x28, 0x1d, 0x94, 0x1b,
-  0x3b, 0x07, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30, 0x89, 0x75, 0x80,
-  0xd2, 0x41, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0xdd, 0x61, 0xd4, 0x95,
-  0x72, 0x30, 0xcc, 0xd9, 0x6e, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x68, 0x67, 0x1d, 0xd0, 0x74, 0x10,
-  0xcf, 0x81, 0x58, 0x07, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3,
-  0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x97, 0x5b, 0x07, 0x3b, 0x1d, 0x24, 0x44, 0x30, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0xd7, 0x5b, 0x07, 0x3c, 0x1d, 0x24, 0x44,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x17, 0x5c, 0x07, 0x3d, 0x1d,
-  0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0xcb, 0x5d, 0x07,
-  0x3c, 0x1d, 0xec, 0x73, 0x10, 0xa4, 0x75, 0xb0, 0xd2, 0xc1, 0x59, 0x07,
-  0xa3, 0x09, 0x01, 0x30, 0x4b, 0xa0, 0x2a, 0xc3, 0x0d, 0xf7, 0xd6, 0xd6,
-  0x01, 0x18, 0xcc, 0x32, 0xe4, 0x89, 0xaa, 0x04, 0xc6, 0xce, 0x81, 0x3b,
-  0x07, 0xf1, 0x19, 0x8e, 0x88, 0xa1, 0x77, 0x0e, 0x88, 0x6f, 0x96, 0x41,
-  0x4f, 0xfa, 0x24, 0x30, 0x78, 0x0e, 0x64, 0x28, 0x3e, 0x16, 0x0c, 0xf4,
-  0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c,
-  0x8a, 0xb8, 0xeb, 0x40, 0x87, 0x1b, 0x82, 0xba, 0x0e, 0xc0, 0x60, 0x96,
-  0x61, 0x4f, 0xf8, 0x24, 0xb0, 0x01, 0x9f, 0x03, 0xf8, 0xcc, 0x12, 0x84,
-  0x8a, 0xdd, 0x73, 0x40, 0xc4, 0x67, 0x96, 0x20, 0x54, 0x86, 0x23, 0x78,
-  0x08, 0x9f, 0x03, 0xe1, 0x9b, 0x65, 0xf0, 0x93, 0x50, 0x09, 0xac, 0x87,
-  0xf2, 0x39, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3,
-  0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x44, 0x3b, 0xd0, 0xe1,
-  0x86, 0x00, 0xb4, 0x03, 0x30, 0x98, 0x65, 0xf8, 0x13, 0x50, 0x09, 0x2c,
-  0xa4, 0x83, 0x21, 0x3e, 0xb3, 0x04, 0xa1, 0x62, 0x04, 0x49, 0x07, 0xf0,
-  0x99, 0x25, 0x08, 0x95, 0x81, 0x16, 0x43, 0xdb, 0x13, 0x8c, 0x4f, 0x88,
-  0x3f, 0x11, 0x40, 0xc5, 0x15, 0x83, 0x3e, 0xb9, 0x60, 0x18, 0x1b, 0xe9,
-  0xe0, 0xa4, 0x83, 0xf8, 0x0c, 0x47, 0xd0, 0x1c, 0x4a, 0x07, 0xc4, 0x37,
-  0xcb, 0x20, 0x2a, 0xa5, 0x12, 0x58, 0x4a, 0x07, 0x35, 0x17, 0x1f, 0x0b,
-  0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0x59, 0x60, 0xc8, 0xc7, 0x8a,
-  0x20, 0x3e, 0x45, 0xc0, 0x76, 0xa0, 0xc3, 0x0d, 0x81, 0x6b, 0x07, 0x60,
-  0x30, 0xcb, 0x30, 0x2a, 0xa4, 0x12, 0xd8, 0x10, 0xd3, 0x01, 0x7c, 0x66,
-  0x09, 0x52, 0xc5, 0x5c, 0x3a, 0x20, 0xe2, 0x33, 0x4b, 0x90, 0x2a, 0xc3,
-  0x11, 0x3f, 0xf7, 0xd2, 0x81, 0xf0, 0xcd, 0x32, 0x98, 0x4a, 0xaa, 0x04,
-  0x06, 0x76, 0x30, 0x1d, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73,
-  0x81, 0x51, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xbb, 0x1d,
-  0xe8, 0x70, 0x43, 0x90, 0xdb, 0x01, 0x18, 0xcc, 0x32, 0x9c, 0x0a, 0xaa,
-  0x04, 0x86, 0xd3, 0xc1, 0x10, 0x9f, 0x59, 0x82, 0x54, 0x31, 0xa2, 0xa7,
-  0x03, 0xf8, 0xcc, 0x12, 0xa4, 0xca, 0x40, 0x8b, 0xa1, 0x8d, 0x0a, 0x46,
-  0x2a, 0xc4, 0xa9, 0x08, 0xa8, 0x42, 0xeb, 0x41, 0xa9, 0x5c, 0x30, 0xcc,
-  0x05, 0x46, 0xdd, 0x66, 0xd4, 0xa5, 0x74, 0x30, 0xcc, 0xe9, 0x70, 0x30,
-  0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
-  0x68, 0xeb, 0x1d, 0xe0, 0x76, 0x50, 0xd7, 0x81, 0x79, 0x07, 0xa3, 0x09,
-  0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c,
-  0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x97, 0x7c, 0x07,
-  0xbf, 0x1d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd7,
-  0x7c, 0x07, 0xe0, 0x1d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x17, 0x7d, 0x07, 0xe1, 0x1d, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00,
-  0x20, 0x08, 0x06, 0xcb, 0x7e, 0x07, 0xe0, 0x1d, 0xfc, 0x75, 0x10, 0xb4,
-  0x77, 0xf0, 0xda, 0xc1, 0x7a, 0x07, 0xa3, 0x09, 0x01, 0x30, 0x4b, 0xa0,
-  0x2a, 0x03, 0x2d, 0x86, 0x68, 0xd4, 0x09, 0xca, 0x0e, 0x74, 0xa2, 0x12,
-  0x77, 0x22, 0xa4, 0x0a, 0xca, 0x0e, 0x78, 0x62, 0x7e, 0x67, 0xda, 0x01,
-  0x7c, 0x66, 0x19, 0x56, 0xa5, 0x55, 0xf8, 0x6e, 0x38, 0x02, 0xf4, 0x4c,
-  0x3b, 0x18, 0xbe, 0x0b, 0xbd, 0x61, 0x86, 0x1b, 0x02, 0xbe, 0x0e, 0xc8,
-  0xa0, 0x86, 0x40, 0x87, 0x23, 0x0a, 0xd5, 0x0e, 0x86, 0xaf, 0x02, 0x41,
-  0xef, 0x18, 0x66, 0xb8, 0x21, 0xf8, 0xeb, 0x80, 0x0c, 0x2a, 0x18, 0x74,
-  0x96, 0x81, 0x55, 0xc2, 0x25, 0xb8, 0xb1, 0x0e, 0x86, 0x39, 0x3a, 0x0e,
-  0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x6d, 0xc4, 0x03, 0xf8,
-  0x0e, 0x5a, 0x3b, 0xf0, 0xef, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84,
-  0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x43, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0xe0, 0x52, 0xf1, 0xe0, 0xbe, 0x83, 0x83, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x5a, 0xf1, 0x00, 0xbf, 0x03,
-  0x86, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x62, 0xf1, 0x20,
-  0xbf, 0x03, 0x89, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x99,
-  0xf1, 0x00, 0xbf, 0x83, 0xdb, 0x0e, 0x82, 0x12, 0x0f, 0xce, 0x3b, 0x18,
-  0xf1, 0x60, 0x34, 0x21, 0x00, 0x66, 0x09, 0xc2, 0x65, 0xb8, 0x61, 0xf6,
-  0x54, 0x3c, 0x00, 0x83, 0x59, 0x06, 0x57, 0x79, 0x95, 0xa0, 0x62, 0x3b,
-  0xd0, 0xef, 0x00, 0x2e, 0x30, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0x26, 0x1a, 0x0f, 0xf6, 0x3b, 0xc8, 0xbd, 0xdf, 0x0e, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0x60, 0xaa, 0xf1, 0x60, 0xbf, 0x83, 0x40, 0xb8, 0x60,
-  0x98, 0xa2, 0xed, 0xe0, 0xbf, 0x03, 0xb8, 0xc0, 0xa8, 0x11, 0x83, 0x03,
-  0x00, 0x41, 0x30, 0x98, 0x72, 0x3c, 0x00, 0xf1, 0xe0, 0x23, 0xef, 0x60,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x26, 0x1d, 0x0f, 0x40, 0x3c, 0x08,
-  0x84, 0x0b, 0x86, 0xb9, 0xc0, 0xa8, 0x3b, 0x8c, 0xba, 0xbe, 0x0e, 0x86,
-  0x39, 0x57, 0x0e, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x03, 0xed, 0xc7, 0x03, 0x16, 0x0f, 0xd2, 0x3b, 0xd0,
-  0xf1, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10,
-  0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
-  0xe0, 0x32, 0xf3, 0x60, 0xc6, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00,
-  0x04, 0xc1, 0xe0, 0x3a, 0xf3, 0x80, 0xc6, 0x83, 0x84, 0x08, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0xe0, 0x42, 0xf3, 0xa0, 0xc6, 0x83, 0x84, 0x08,
-  0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x79, 0xf3, 0x80, 0xc6, 0x83,
-  0xf9, 0x0e, 0x82, 0x30, 0x0f, 0x46, 0x3c, 0xf8, 0xf1, 0x60, 0x34, 0x21,
-  0x00, 0x66, 0x09, 0xc2, 0x65, 0xb8, 0xe1, 0xfd, 0xca, 0x3c, 0x00, 0x83,
-  0x59, 0x06, 0x58, 0x09, 0x97, 0xc0, 0xc8, 0x3b, 0x30, 0xef, 0x20, 0x3e,
-  0xc3, 0x11, 0xf4, 0x77, 0xde, 0x01, 0xf1, 0xcd, 0x32, 0xc4, 0x0a, 0xad,
-  0x04, 0x86, 0xde, 0x41, 0xfd, 0xc5, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c,
-  0x73, 0x81, 0x51, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x6f,
-  0x1e, 0xe8, 0x70, 0x43, 0xd0, 0xe6, 0x01, 0x18, 0xcc, 0x32, 0xc8, 0xca,
-  0xac, 0x04, 0x36, 0xc0, 0x77, 0x00, 0x9f, 0x59, 0x02, 0x5c, 0xb1, 0xf7,
-  0x0e, 0x88, 0xf8, 0xcc, 0x12, 0xe0, 0xca, 0x70, 0xc4, 0xff, 0xc1, 0x77,
-  0x20, 0x7c, 0xb3, 0x0c, 0xb5, 0x82, 0x2b, 0x81, 0x81, 0x60, 0x10, 0xdf,
-  0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65, 0x41,
-  0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xa1, 0xe7, 0x81, 0x0e, 0x37, 0x04,
-  0x78, 0x1e, 0x80, 0xc1, 0x2c, 0x83, 0xad, 0xdc, 0x4a, 0x60, 0xf9, 0x1d,
-  0x0c, 0xf1, 0x99, 0x25, 0xc0, 0x15, 0x23, 0xf8, 0x3b, 0x80, 0xcf, 0x2c,
-  0x01, 0xae, 0x0c, 0xb4, 0x18, 0x9a, 0xac, 0x60, 0xb3, 0x42, 0xd8, 0x8a,
-  0x70, 0x2b, 0x34, 0x28, 0xd0, 0xca, 0x05, 0xc3, 0xd8, 0x7e, 0x07, 0xff,
-  0x1d, 0xc4, 0x67, 0x38, 0xc2, 0x15, 0x40, 0x3c, 0x20, 0xbe, 0x59, 0x86,
-  0x5c, 0xe1, 0x95, 0xc0, 0x42, 0x3c, 0x78, 0x85, 0xf8, 0x58, 0x30, 0xd0,
-  0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1,
-  0x29, 0x02, 0xd5, 0x03, 0x1d, 0x6e, 0x08, 0x4c, 0x3d, 0x00, 0x83, 0x59,
-  0x06, 0x5d, 0xd9, 0x95, 0xc0, 0x86, 0x14, 0x0f, 0xe0, 0x33, 0x4b, 0x00,
-  0x2e, 0x66, 0xe2, 0x01, 0x11, 0x9f, 0x59, 0x02, 0x70, 0x19, 0x8e, 0xc8,
-  0x85, 0x13, 0x0f, 0x84, 0x6f, 0x96, 0xa1, 0x57, 0xc0, 0x25, 0x30, 0x5d,
-  0x40, 0xf1, 0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c,
-  0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x98, 0xf5, 0x40, 0x87,
-  0x1b, 0x82, 0x58, 0x0f, 0xc0, 0x60, 0x96, 0xc1, 0x57, 0x7e, 0x25, 0x30,
-  0x18, 0x0f, 0x86, 0xf8, 0xcc, 0x12, 0x80, 0x8b, 0x11, 0x35, 0x1e, 0xc0,
-  0x67, 0x96, 0x00, 0x5c, 0x06, 0x5a, 0x0c, 0x4d, 0x57, 0xb0, 0x5d, 0x21,
-  0x7c, 0x45, 0xf8, 0x15, 0xd4, 0xe0, 0x95, 0x0b, 0x86, 0xb9, 0xc0, 0xa8,
-  0xdb, 0x8c, 0xba, 0x10, 0x0f, 0x86, 0x39, 0x99, 0x0e, 0x86, 0x39, 0x62,
-  0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x6d, 0xdc,
-  0x03, 0x58, 0x0f, 0xda, 0x3c, 0xf0, 0xf5, 0x60, 0x34, 0x21, 0x00, 0x46,
-  0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x52, 0xf7, 0xe0, 0xd6, 0x83,
-  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x5a, 0xf7, 0x00,
-  0xd7, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x62,
-  0xf7, 0x20, 0xd7, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1,
-  0x60, 0x99, 0xf7, 0x00, 0xd7, 0x83, 0x3b, 0x0f, 0x82, 0x72, 0x0f, 0x4e,
-  0x3d, 0x18, 0xf7, 0x60, 0x34, 0x21, 0x00, 0x66, 0x09, 0xc2, 0x65, 0xa0,
-  0xc5, 0x10, 0x0d, 0x56, 0xc1, 0xe7, 0x61, 0x55, 0x54, 0xc2, 0x55, 0x04,
-  0x70, 0xc1, 0xe7, 0xe1, 0x55, 0x66, 0x19, 0xc4, 0x85, 0x5c, 0xe6, 0x30,
-  0x18, 0x8e, 0xc0, 0xc3, 0xa0, 0xcf, 0x83, 0xe1, 0xbb, 0x3c, 0x0c, 0x86,
-  0x19, 0x6e, 0x08, 0xe6, 0x3c, 0x20, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0xe0,
-  0x87, 0x50, 0x0f, 0x86, 0xaf, 0x02, 0x41, 0xcf, 0x1f, 0x86, 0x19, 0x6e,
-  0x08, 0xec, 0x3c, 0x20, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0x18, 0x17, 0x7c,
-  0x09, 0x4e, 0xc7, 0x83, 0x61, 0x6e, 0xad, 0x83, 0x61, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0x40, 0xd3, 0xf7, 0xe0, 0xdc, 0x03, 0x52, 0x0f, 0xea,
-  0x3d, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84,
-  0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0xb8, 0x42, 0x3e, 0x70, 0xf7, 0xe0, 0x20, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0xb8, 0x44, 0x3e, 0x78, 0xf7, 0x80, 0x21, 0x82, 0x11, 0x03,
-  0x04, 0x00, 0x41, 0x30, 0xb8, 0x46, 0x3e, 0x80, 0xf7, 0x40, 0x22, 0x82,
-  0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0x54, 0x3e, 0x78, 0xf7, 0xc0,
-  0xd5, 0x83, 0x80, 0xdf, 0x03, 0x5f, 0x0f, 0xf4, 0x3d, 0x18, 0x4d, 0x08,
-  0x80, 0x59, 0x02, 0x7c, 0x19, 0x6e, 0x50, 0xc5, 0x20, 0xe4, 0x03, 0x30,
-  0x98, 0x65, 0x28, 0x17, 0x73, 0x09, 0x0a, 0xd5, 0x83, 0x78, 0x0f, 0xe0,
-  0x02, 0xa3, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x5a, 0xf9, 0x40,
-  0xde, 0x83, 0x58, 0x0c, 0x6c, 0x3d, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x83, 0x89, 0xe5, 0x03, 0x79, 0x0f, 0x02, 0xe1, 0x82, 0x61, 0x6a, 0xd5,
-  0x03, 0x7b, 0x0f, 0xe0, 0x02, 0xa3, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
-  0x60, 0x82, 0xf9, 0xe0, 0xde, 0x03, 0x9b, 0xd8, 0xf5, 0x60, 0xc4, 0xe0,
-  0x00, 0x40, 0x10, 0x0c, 0xa6, 0x98, 0x0f, 0xee, 0x3d, 0x08, 0x84, 0x0b,
-  0x86, 0xb9, 0xc0, 0xa8, 0x3b, 0x8c, 0x3a, 0x3a, 0x0f, 0x86, 0xb9, 0xd2,
-  0x0e, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x03, 0xcd, 0xe6, 0x83, 0x91, 0x0f, 0xc0, 0x3d, 0x88, 0xf9, 0x60,
-  0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13,
-  0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xea,
-  0xf9, 0x40, 0xe5, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
-  0xe0, 0xf2, 0xf9, 0x60, 0xe5, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00,
-  0x04, 0xc1, 0xe0, 0xfa, 0xf9, 0x80, 0xe5, 0x83, 0x84, 0x08, 0x46, 0x0c,
-  0x14, 0x00, 0x04, 0xc1, 0x60, 0x31, 0xfb, 0x60, 0xe5, 0x03, 0x75, 0x0f,
-  0x02, 0x9c, 0x0f, 0xf4, 0x3d, 0xb0, 0xf9, 0x60, 0x34, 0x21, 0x00, 0x66,
-  0x09, 0xf0, 0x65, 0xb8, 0xc1, 0x1c, 0x03, 0x9e, 0x0f, 0xc0, 0x60, 0x96,
-  0xe1, 0x5c, 0xf0, 0x25, 0xb0, 0x5d, 0x0f, 0x7a, 0x3d, 0x88, 0xcf, 0x70,
-  0x04, 0x3b, 0x06, 0xbe, 0x1e, 0x10, 0xdf, 0x2c, 0x03, 0xba, 0xac, 0x4b,
-  0x60, 0xbf, 0x1e, 0xb4, 0x63, 0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30,
-  0xcc, 0x05, 0x46, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x98,
-  0x7d, 0xa0, 0xc3, 0x0d, 0x01, 0xd9, 0x07, 0x60, 0x30, 0xcb, 0x90, 0x2e,
-  0xea, 0x12, 0xd8, 0x70, 0xee, 0x01, 0x7c, 0x66, 0x09, 0xde, 0xc5, 0xcc,
-  0x3d, 0x20, 0xe2, 0x33, 0x4b, 0xf0, 0x2e, 0xc3, 0x11, 0xf7, 0x18, 0x9c,
-  0x7b, 0x20, 0x7c, 0xb3, 0x0c, 0xec, 0xf2, 0x2e, 0x81, 0xe1, 0x63, 0x80,
-  0xee, 0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65,
-  0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x11, 0xf7, 0x81, 0x0e, 0x37,
-  0x04, 0x6f, 0x1f, 0x80, 0xc1, 0x2c, 0x43, 0xbb, 0xb8, 0x4b, 0x60, 0xf0,
-  0x1e, 0x0c, 0xf1, 0x99, 0x25, 0x78, 0x17, 0x23, 0xe6, 0x3d, 0x80, 0xcf,
-  0x2c, 0xc1, 0xbb, 0x0c, 0xb4, 0x18, 0x5a, 0xba, 0x60, 0xea, 0x42, 0xb4,
-  0x8b, 0xe0, 0x2e, 0x24, 0x2a, 0xac, 0xcb, 0x05, 0xc3, 0x98, 0xbc, 0x07,
-  0xf6, 0x1e, 0xc4, 0x67, 0x38, 0xa2, 0x34, 0xee, 0x3d, 0x20, 0xbe, 0x59,
-  0x06, 0x78, 0x99, 0x97, 0xc0, 0xf0, 0x3d, 0x30, 0x8d, 0xf8, 0x58, 0x30,
-  0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04,
-  0xf1, 0x29, 0xe2, 0xef, 0x03, 0x1d, 0x6e, 0x08, 0xfa, 0x3e, 0x00, 0x83,
-  0x59, 0x86, 0x78, 0x91, 0x97, 0xc0, 0x06, 0x90, 0x0f, 0xe0, 0x33, 0x4b,
-  0x70, 0x2f, 0xd6, 0xef, 0x01, 0x11, 0x9f, 0x59, 0x82, 0x7b, 0x19, 0x8e,
-  0x80, 0x0d, 0x7f, 0x0f, 0x84, 0x6f, 0x96, 0x81, 0x5e, 0xee, 0x25, 0xb0,
-  0xd8, 0xf8, 0xf7, 0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b,
-  0x8c, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x50, 0xfd, 0x40,
-  0x87, 0x1b, 0x02, 0xd4, 0x0f, 0xc0, 0x60, 0x96, 0xa1, 0x5e, 0xec, 0x25,
-  0xb0, 0x93, 0x0f, 0x86, 0xf8, 0xcc, 0x12, 0xdc, 0x8b, 0x11, 0x2c, 0x1f,
-  0xc0, 0x67, 0x96, 0xe0, 0x5e, 0x06, 0x5a, 0x0c, 0x2d, 0x5e, 0x30, 0x79,
-  0x21, 0xea, 0x45, 0xb0, 0x17, 0xbb, 0x99, 0x97, 0x0b, 0x86, 0xb9, 0xc0,
-  0xa8, 0xdb, 0x8c, 0x3a, 0x7c, 0x0f, 0x86, 0xb9, 0x14, 0x0f, 0x86, 0x39,
-  0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x4d,
-  0xf7, 0x83, 0xd3, 0x0f, 0xc8, 0x3e, 0xa8, 0xfd, 0x60, 0x34, 0x21, 0x00,
-  0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88,
-  0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x0a, 0xff, 0xc0, 0xf5,
-  0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x12, 0xff,
-  0xe0, 0xf5, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0,
-  0x1a, 0xff, 0x00, 0xf6, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04,
-  0xc1, 0x60, 0x51, 0xff, 0xe0, 0xf5, 0x03, 0xb7, 0x0f, 0x02, 0xde, 0x0f,
-  0xfc, 0x3e, 0xd0, 0xfd, 0x60, 0x34, 0x21, 0x00, 0x66, 0x09, 0xf0, 0x65,
-  0xa0, 0xc5, 0x10, 0x8d, 0x71, 0x31, 0xf7, 0x41, 0x5c, 0x54, 0xa2, 0x5c,
-  0x84, 0x7b, 0x31, 0xf7, 0xc1, 0x5c, 0x66, 0x19, 0xf2, 0x65, 0x5f, 0xd4,
-  0x32, 0x18, 0x8e, 0xd8, 0x3b, 0xba, 0x0f, 0x86, 0xef, 0xf8, 0x6e, 0x98,
-  0xe1, 0x86, 0x40, 0xed, 0x03, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0xf9,
-  0xc0, 0xfb, 0x60, 0xf8, 0x2a, 0x10, 0xf4, 0xea, 0x63, 0x98, 0xe1, 0x86,
-  0xa0, 0xed, 0x03, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x06, 0x7d, 0x79, 0x99,
-  0xe0, 0x62, 0x3e, 0x18, 0xe6, 0xc4, 0x3c, 0x18, 0x66, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0xb4, 0xf8, 0x0f, 0x7c, 0x3f, 0xd8, 0xfb, 0x80, 0xfd,
-  0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18,
-  0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
-  0x0b, 0xff, 0x83, 0xf2, 0x0f, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0x2b, 0xff, 0x03, 0xf3, 0x0f, 0x18, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x83, 0x4b, 0xff, 0x83, 0xf3, 0x0f, 0x24, 0x22, 0x18,
-  0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x25, 0x04, 0x05, 0xf3, 0x0f, 0x4a,
-  0x3f, 0x08, 0xe6, 0x3f, 0xa8, 0xfd, 0x20, 0xfe, 0x83, 0xd1, 0x84, 0x00,
-  0x98, 0x25, 0x78, 0x99, 0xe1, 0x86, 0xd0, 0x0c, 0xf0, 0x3f, 0x00, 0x83,
-  0x59, 0x06, 0x7e, 0xe9, 0x97, 0xa0, 0xfe, 0x3e, 0x40, 0xff, 0x00, 0x2e,
-  0x30, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x26, 0x11, 0x14, 0xd2,
-  0x3f, 0xa0, 0xbd, 0xd6, 0x0f, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60,
-  0x1a, 0x41, 0x21, 0xfd, 0x83, 0x40, 0xb8, 0x60, 0x98, 0x12, 0xfd, 0xa0,
-  0xfd, 0x03, 0xb8, 0xc0, 0xa8, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98,
-  0x4e, 0x50, 0x70, 0xff, 0xa0, 0x45, 0x64, 0x3f, 0x18, 0x31, 0x38, 0x00,
-  0x10, 0x04, 0x83, 0x09, 0x05, 0x05, 0xf7, 0x0f, 0x02, 0xe1, 0x82, 0x61,
-  0x2e, 0x30, 0xea, 0x0e, 0xa3, 0x6e, 0xed, 0x83, 0x61, 0x8e, 0xcf, 0x83,
-  0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
-  0x40, 0x6b, 0x41, 0x41, 0xff, 0x83, 0xdb, 0x0f, 0x50, 0x50, 0x18, 0x4d,
-  0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62,
-  0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0x68, 0x50,
-  0x08, 0x41, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8,
-  0x6a, 0x50, 0x10, 0x41, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0xb8, 0x6c, 0x50, 0x18, 0x41, 0x21, 0x21, 0x82, 0x11, 0x03, 0x05,
-  0x00, 0x41, 0x30, 0x58, 0x7a, 0x50, 0x10, 0x41, 0x21, 0xfc, 0x83, 0xe0,
-  0x05, 0x85, 0xf8, 0x0f, 0x5a, 0x50, 0x18, 0x4d, 0x08, 0x80, 0x59, 0x82,
-  0x97, 0x19, 0x6e, 0xe8, 0xcd, 0x60, 0x06, 0x05, 0x30, 0x98, 0x65, 0xf0,
-  0x97, 0x97, 0x09, 0x4c, 0xf6, 0x03, 0xda, 0x0f, 0xe2, 0x33, 0x1c, 0xf1,
-  0x7e, 0xb5, 0x1f, 0x10, 0xdf, 0x2c, 0xc3, 0xbf, 0x88, 0x4c, 0x60, 0xb6,
-  0x1f, 0xc0, 0x5f, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18,
-  0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xd1, 0x83, 0x82, 0x0e,
-  0x37, 0x04, 0x3b, 0x28, 0x80, 0xc1, 0x2c, 0x03, 0xc8, 0x84, 0x4c, 0x60,
-  0x83, 0xef, 0x07, 0xf0, 0x99, 0x25, 0x30, 0x19, 0xeb, 0xfd, 0x80, 0x88,
-  0xcf, 0x2c, 0x81, 0xc9, 0x0c, 0x47, 0xe8, 0x9f, 0xef, 0x07, 0xc2, 0x37,
-  0xcb, 0x30, 0x32, 0x26, 0x13, 0xd8, 0xfe, 0xfd, 0x7e, 0x10, 0x1f, 0x0b,
-  0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0x59, 0x10, 0xc9, 0xc7, 0x8a,
-  0x20, 0x3e, 0x45, 0xa0, 0xa1, 0xa0, 0xc3, 0x0d, 0x81, 0x19, 0x0a, 0x60,
-  0x30, 0xcb, 0x40, 0x32, 0x25, 0x13, 0xd8, 0xf9, 0x07, 0x43, 0x7c, 0x66,
-  0x09, 0x4c, 0xc6, 0x08, 0xf5, 0x0f, 0xe0, 0x33, 0x4b, 0x60, 0x32, 0x03,
-  0x2d, 0x86, 0x06, 0x32, 0x58, 0xc8, 0x10, 0x24, 0x23, 0x94, 0x8c, 0x0a,
-  0x0a, 0x22, 0x73, 0xc1, 0x30, 0x96, 0xfe, 0x41, 0xfb, 0x07, 0xf1, 0x19,
-  0x8e, 0xe0, 0x13, 0xf7, 0x0f, 0x88, 0x6f, 0x96, 0xe1, 0x64, 0x54, 0x26,
-  0xb0, 0xf7, 0x0f, 0xfa, 0x24, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98,
-  0x0b, 0x8c, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xb0, 0x43,
-  0x41, 0x87, 0x1b, 0x02, 0x3a, 0x14, 0xc0, 0x60, 0x96, 0x01, 0x65, 0x52,
-  0x26, 0xb0, 0xe1, 0xfe, 0x03, 0xf8, 0xcc, 0x12, 0xb8, 0x8c, 0xd1, 0x7f,
-  0x40, 0xc4, 0x67, 0x96, 0xc0, 0x65, 0x86, 0x23, 0x4e, 0xa5, 0xfe, 0x03,
-  0xe1, 0x9b, 0x65, 0x58, 0x19, 0x97, 0x09, 0x0c, 0x55, 0xec, 0x3f, 0x88,
-  0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x88, 0xe4,
-  0x63, 0x45, 0x10, 0x9f, 0x22, 0x42, 0x51, 0xd0, 0xe1, 0x86, 0xe0, 0x0f,
-  0x05, 0x30, 0x98, 0x65, 0x60, 0x99, 0x96, 0x09, 0xcc, 0xff, 0x83, 0x21,
-  0x3e, 0xb3, 0x04, 0x2e, 0x63, 0xc4, 0x08, 0x0a, 0xf0, 0x99, 0x25, 0x70,
-  0x99, 0x81, 0x16, 0x43, 0x43, 0x19, 0x2c, 0x65, 0x08, 0x96, 0x11, 0x5a,
-  0x86, 0xac, 0x54, 0xe6, 0x82, 0x61, 0x2e, 0x30, 0xea, 0x36, 0xa3, 0xee,
-  0xfd, 0x83, 0x61, 0x0e, 0xe4, 0x83, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x40, 0x8b, 0x45, 0xc1, 0x0f, 0x85,
-  0x1d, 0x14, 0x58, 0x51, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18,
-  0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04,
-  0x00, 0x41, 0x30, 0xb8, 0x70, 0x51, 0x28, 0x45, 0x21, 0x21, 0x82, 0x11,
-  0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0x72, 0x51, 0x30, 0x45, 0x21, 0x21,
-  0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0x74, 0x51, 0x38, 0x45,
-  0x21, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0xc2, 0x51,
-  0x30, 0x45, 0xa1, 0x0c, 0x85, 0x60, 0x16, 0x85, 0x3a, 0x14, 0x62, 0x51,
-  0x18, 0x4d, 0x08, 0x80, 0x59, 0x82, 0x97, 0x19, 0x68, 0x31, 0x44, 0x43,
-  0x5f, 0x68, 0x91, 0xc8, 0x17, 0x95, 0xe0, 0x17, 0xc1, 0x65, 0x68, 0x91,
-  0xe8, 0x97, 0x59, 0x06, 0x98, 0x91, 0x99, 0x30, 0x0d, 0x86, 0x23, 0xf4,
-  0x6e, 0x0d, 0x85, 0xe1, 0xbb, 0xbd, 0x1b, 0x66, 0xb8, 0x21, 0x08, 0x43,
-  0x81, 0x0c, 0x6a, 0x08, 0x74, 0x38, 0x42, 0x5d, 0xde, 0x50, 0x18, 0xbe,
-  0x0a, 0x04, 0x3d, 0x76, 0x19, 0x66, 0xb8, 0x21, 0x20, 0x43, 0x81, 0x0c,
-  0x2a, 0x18, 0x74, 0x96, 0x21, 0x66, 0xcc, 0x26, 0x38, 0x14, 0x14, 0x86,
-  0xb9, 0x9c, 0x0f, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x0d,
-  0x1d, 0x85, 0x5a, 0x14, 0xe4, 0x50, 0x18, 0x47, 0x61, 0x34, 0x21, 0x00,
-  0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88,
-  0x43, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x7a, 0x47, 0x81, 0x17,
-  0x85, 0x83, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x82, 0x47,
-  0xa1, 0x17, 0x05, 0x86, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0,
-  0x8a, 0x47, 0xc1, 0x17, 0x05, 0x89, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04,
-  0xc1, 0x60, 0xc1, 0x47, 0xa1, 0x17, 0x05, 0x3e, 0x14, 0x02, 0x75, 0x14,
-  0x58, 0x51, 0x40, 0x47, 0x61, 0x34, 0x21, 0x00, 0x66, 0x09, 0xcc, 0x66,
-  0xb8, 0x01, 0x4f, 0x83, 0x77, 0x14, 0xc0, 0x60, 0x96, 0x61, 0x66, 0x68,
-  0x26, 0x28, 0x3b, 0x14, 0x7e, 0x51, 0x80, 0x0b, 0x8c, 0x1a, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x83, 0x29, 0x1f, 0x05, 0x70, 0x14, 0x66, 0x8f, 0x14,
-  0x85, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0xf4, 0x51, 0x00, 0x47,
-  0x21, 0x10, 0x2e, 0x18, 0xa6, 0xf2, 0x50, 0x20, 0x47, 0x01, 0x2e, 0x30,
-  0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x26, 0x7f, 0x14, 0xca, 0x51,
-  0x20, 0x99, 0x54, 0x14, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xfa,
-  0x47, 0xa1, 0x1c, 0x85, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x8c, 0xba, 0xc3,
-  0xa8, 0x13, 0x43, 0x61, 0x98, 0x9b, 0xfb, 0x60, 0x98, 0x23, 0x86, 0x39,
-  0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xd0, 0x48, 0x52, 0x88,
-  0x47, 0xc1, 0x15, 0x85, 0x7f, 0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41,
-  0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0xae, 0x95, 0x14, 0xf0, 0x51, 0x48, 0x88,
-  0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x2e, 0x96, 0x14, 0xf2, 0x51,
-  0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xae, 0x96, 0x14,
-  0xf4, 0x51, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x16,
-  0x9a, 0x14, 0xf2, 0x51, 0xc0, 0x45, 0x21, 0x30, 0x49, 0x01, 0x1d, 0x05,
-  0x92, 0x14, 0x46, 0x13, 0x02, 0x60, 0x96, 0xc0, 0x6c, 0x86, 0x1b, 0x68,
-  0x35, 0x50, 0x49, 0x01, 0x0c, 0x66, 0x19, 0x6a, 0xc6, 0x6c, 0x02, 0x4b,
-  0x45, 0x61, 0x15, 0x85, 0xf8, 0x0c, 0x47, 0xb8, 0x1f, 0x2b, 0x0a, 0xc4,
-  0x37, 0xcb, 0x60, 0x33, 0x39, 0x13, 0x58, 0x2b, 0x0a, 0xef, 0x17, 0x1f,
-  0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0x59, 0x60, 0xc8, 0xc7,
-  0x8a, 0x20, 0x3e, 0x45, 0xd0, 0xa4, 0xa0, 0xc3, 0x0d, 0x81, 0x4c, 0x0a,
-  0x60, 0x30, 0xcb, 0x70, 0x33, 0x38, 0x13, 0xd8, 0x50, 0x8b, 0x02, 0x7c,
-  0x66, 0x09, 0x7a, 0xc6, 0x68, 0x51, 0x20, 0xe2, 0x33, 0x4b, 0xd0, 0x33,
-  0xc3, 0x11, 0xf9, 0x57, 0x8b, 0x82, 0xf0, 0xcd, 0x32, 0xe8, 0x4c, 0xcf,
-  0x04, 0xa6, 0x7f, 0xb6, 0x28, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c,
-  0x73, 0x81, 0x51, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x3f,
-  0x29, 0xe8, 0x70, 0x43, 0xd0, 0x93, 0x02, 0x18, 0xcc, 0x32, 0xec, 0x0c,
-  0xcf, 0x04, 0xe6, 0x8b, 0xc2, 0x10, 0x9f, 0x59, 0x82, 0x9e, 0x31, 0x22,
-  0x1c, 0x05, 0xf8, 0xcc, 0x12, 0xf4, 0xcc, 0x40, 0x8b, 0xa1, 0xdd, 0x0c,
-  0x86, 0x33, 0xc4, 0xce, 0x08, 0x3c, 0x83, 0x82, 0x42, 0xce, 0x5c, 0x30,
-  0x8c, 0x81, 0xa3, 0x40, 0x8e, 0x42, 0x7c, 0x86, 0x23, 0xe6, 0xa6, 0x1c,
-  0x05, 0xe2, 0x9b, 0x65, 0xf0, 0x99, 0xb0, 0x09, 0xcc, 0x1c, 0x05, 0xba,
-  0x89, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x30,
-  0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xda, 0x52, 0xd0, 0xe1, 0x86, 0x60,
-  0x2d, 0x05, 0x30, 0x98, 0x65, 0xf8, 0x19, 0xb0, 0x09, 0x6c, 0x70, 0x47,
-  0x01, 0x3e, 0xb3, 0x04, 0x65, 0x63, 0xeb, 0x28, 0x10, 0xf1, 0x99, 0x25,
-  0x28, 0x9b, 0xe1, 0x08, 0xbf, 0x61, 0x47, 0x41, 0xf8, 0x66, 0x19, 0xc4,
-  0xa6, 0x6c, 0x02, 0xfb, 0x9b, 0x76, 0x14, 0xe2, 0x63, 0x81, 0x43, 0x9f,
-  0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7,
-  0x08, 0xbc, 0x14, 0x74, 0xb8, 0x21, 0xb0, 0x4b, 0x01, 0x0c, 0x66, 0x19,
-  0xc6, 0x86, 0x6c, 0x02, 0xab, 0x47, 0x61, 0x88, 0xcf, 0x2c, 0x41, 0xd9,
-  0x18, 0xa1, 0x8f, 0x02, 0x7c, 0x66, 0x09, 0xca, 0x66, 0xa0, 0xc5, 0xd0,
-  0x7e, 0x06, 0x03, 0x1b, 0x62, 0x6c, 0x04, 0xb2, 0x91, 0xb9, 0xb0, 0xb9,
-  0x60, 0x98, 0x0b, 0x8c, 0xba, 0xcd, 0xa8, 0x33, 0x47, 0x61, 0x98, 0xbb,
-  0xff, 0x60, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0xd0, 0x50, 0x53, 0xa8, 0x4b, 0x41, 0x26, 0x85, 0xd1, 0x14,
-  0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34,
-  0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xae,
-  0xd7, 0x14, 0xf8, 0x52, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10,
-  0x0c, 0x2e, 0xd8, 0x14, 0xfa, 0x52, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0xae, 0xd8, 0x14, 0xfc, 0x52, 0x48, 0x88, 0x60, 0xc4,
-  0x40, 0x01, 0x40, 0x10, 0x0c, 0x16, 0xdc, 0x14, 0xfa, 0x52, 0xe0, 0x49,
-  0x21, 0x50, 0x4d, 0x81, 0x2d, 0x05, 0xd4, 0x14, 0x46, 0x13, 0x02, 0x60,
-  0x96, 0xc0, 0x6c, 0x06, 0x5a, 0x0c, 0xd1, 0x88, 0x19, 0x51, 0x25, 0x60,
-  0x46, 0x25, 0x66, 0x46, 0x28, 0x1b, 0x51, 0x25, 0x68, 0xc6, 0x76, 0x36,
-  0x28, 0x4b, 0x01, 0x3e, 0xb3, 0x0c, 0x67, 0x93, 0x36, 0x39, 0x1b, 0x0c,
-  0x47, 0xf4, 0x6c, 0x30, 0x96, 0xc2, 0xf0, 0x9d, 0xcf, 0x06, 0xc3, 0x0c,
-  0x37, 0x04, 0x39, 0x29, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0x14, 0x67,
-  0x29, 0x0c, 0x5f, 0x05, 0x82, 0xde, 0x31, 0xcc, 0x70, 0x43, 0xc0, 0x93,
-  0x02, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0x03, 0xda, 0xf4, 0x4d, 0x70, 0x20,
-  0x29, 0x0c, 0x73, 0x31, 0x28, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x1a, 0x78, 0x0a, 0xad, 0x29, 0xa8, 0xa5, 0xb0, 0x9b, 0xc2, 0x68,
-  0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10,
-  0x43, 0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x75, 0x9e,
-  0x02, 0x6d, 0x0a, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1,
-  0x85, 0x9e, 0x42, 0x6d, 0x0a, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0xc1, 0x95, 0x9e, 0x82, 0x6d, 0x0a, 0x12, 0x11, 0x8c, 0x18, 0x28,
-  0x00, 0x08, 0x82, 0xc1, 0x02, 0x9f, 0x42, 0x6d, 0x0a, 0x74, 0x29, 0x04,
-  0xe2, 0x29, 0x90, 0xa6, 0x00, 0x9e, 0xc2, 0x68, 0x42, 0x00, 0xcc, 0x12,
-  0xf4, 0xcd, 0x70, 0x03, 0xdc, 0x06, 0xe7, 0x29, 0x80, 0xc1, 0x2c, 0x83,
-  0xda, 0xac, 0x4d, 0x50, 0x6e, 0x29, 0xdc, 0xa6, 0x00, 0x17, 0x18, 0x35,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x53, 0x7c, 0x0a, 0xb8, 0x29, 0xd8,
-  0x6d, 0xc0, 0x97, 0xc2, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0xf2,
-  0x29, 0xe0, 0xa6, 0x10, 0x08, 0x17, 0x0c, 0x53, 0x71, 0x29, 0xf0, 0xa6,
-  0x00, 0x17, 0x18, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x93, 0x7d,
-  0x0a, 0xbd, 0x29, 0x7c, 0xa1, 0x29, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82,
-  0xc1, 0x74, 0x9f, 0x42, 0x6f, 0x0a, 0x81, 0x70, 0xc1, 0x30, 0x17, 0x18,
-  0x75, 0x87, 0x51, 0xa7, 0x93, 0xc2, 0x30, 0xb7, 0x86, 0xc2, 0x30, 0x47,
-  0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0, 0xf1,
-  0xa7, 0x90, 0x9e, 0x82, 0x69, 0x0a, 0xf7, 0x29, 0x8c, 0x26, 0x04, 0xc0,
-  0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91,
-  0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0x23, 0x2a, 0xc0, 0xa7,
-  0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0x24, 0x2a,
-  0xc4, 0xa7, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c,
-  0x25, 0x2a, 0xc8, 0xa7, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20,
-  0x18, 0x2c, 0x2c, 0x2a, 0xc4, 0xa7, 0x00, 0x9b, 0x42, 0xe0, 0x9f, 0x02,
-  0x78, 0x0a, 0xfc, 0x29, 0x8c, 0x26, 0x04, 0xc0, 0x2c, 0x41, 0xdf, 0x0c,
-  0x37, 0xb0, 0x6e, 0x20, 0xa2, 0x02, 0x18, 0xcc, 0x32, 0xb0, 0x4d, 0xdf,
-  0x04, 0x16, 0x9a, 0xc2, 0x68, 0x0a, 0xf1, 0x19, 0x8e, 0x88, 0xdd, 0x80,
-  0x34, 0x05, 0xe2, 0x9b, 0x65, 0x68, 0x1b, 0xb8, 0x09, 0xac, 0x34, 0x05,
-  0xd9, 0x0d, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28,
-  0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x16, 0x15, 0x74, 0xb8,
-  0x21, 0x50, 0x51, 0x01, 0x0c, 0x66, 0x19, 0xdc, 0xe6, 0x6d, 0x02, 0x1b,
-  0x5a, 0x53, 0x80, 0xcf, 0x2c, 0x01, 0xdd, 0x18, 0x6b, 0x0a, 0x44, 0x7c,
-  0x66, 0x09, 0xe8, 0x66, 0x38, 0x82, 0x77, 0x83, 0xd6, 0x14, 0x84, 0x6f,
-  0x96, 0x21, 0x6e, 0xe8, 0x26, 0xb0, 0xde, 0x0d, 0x5c, 0x53, 0x88, 0x8f,
-  0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x88, 0xe4, 0x63,
-  0x45, 0x10, 0x9f, 0x22, 0x6e, 0x54, 0xd0, 0xe1, 0x86, 0xa0, 0x46, 0x05,
-  0x30, 0x98, 0x65, 0x90, 0x9b, 0xb9, 0x09, 0xcc, 0x36, 0x85, 0x21, 0x3e,
-  0xb3, 0x04, 0x74, 0x63, 0x44, 0x6e, 0x0a, 0xf0, 0x99, 0x25, 0xa0, 0x9b,
-  0x81, 0x16, 0x43, 0x73, 0x1b, 0xec, 0x6d, 0x08, 0xb9, 0x11, 0xe6, 0xc6,
-  0xf5, 0x05, 0xb8, 0xb9, 0x60, 0x18, 0xc3, 0x4d, 0x81, 0x37, 0x85, 0xf8,
-  0x0c, 0x47, 0xb8, 0x42, 0x6f, 0x0a, 0xc4, 0x37, 0xcb, 0x50, 0x37, 0x78,
-  0x13, 0x98, 0x6f, 0x0a, 0xaf, 0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30,
-  0xcc, 0x05, 0x46, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x94,
-  0xa9, 0xa0, 0xc3, 0x0d, 0xc1, 0x98, 0x0a, 0x60, 0x30, 0xcb, 0x60, 0x37,
-  0x77, 0x13, 0xd8, 0x60, 0x9e, 0x02, 0x7c, 0x66, 0x09, 0xf8, 0xc6, 0xc6,
-  0x53, 0x20, 0xe2, 0x33, 0x4b, 0xc0, 0x37, 0xc3, 0x11, 0xb9, 0x40, 0x9e,
-  0x82, 0xf0, 0xcd, 0x32, 0xe4, 0x0d, 0xdf, 0x04, 0xa6, 0x0b, 0xe5, 0x29,
-  0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x16, 0x44,
-  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x70, 0x2a, 0xe8, 0x70, 0x43, 0xe0,
-  0xa6, 0x02, 0x18, 0xcc, 0x32, 0xe8, 0xcd, 0xde, 0x04, 0xd6, 0x9e, 0xc2,
-  0x10, 0x9f, 0x59, 0x02, 0xbe, 0x31, 0x42, 0x3e, 0x05, 0xf8, 0xcc, 0x12,
-  0xf0, 0xcd, 0x40, 0x8b, 0xa1, 0xd9, 0x0d, 0x76, 0x37, 0x84, 0xde, 0x08,
-  0x7b, 0x83, 0x1a, 0x78, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x75, 0x9b, 0x51,
-  0xe7, 0x9b, 0xc2, 0x30, 0xf7, 0x8e, 0xc2, 0x30, 0x47, 0x0c, 0x73, 0xc4,
-  0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0, 0x81, 0xaa, 0xd0, 0xa6,
-  0x82, 0x8a, 0x0a, 0x7b, 0x2a, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10,
-  0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x5c, 0xa7, 0x2a, 0xd0, 0xa9, 0x90, 0x10, 0xc1,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xa8, 0x2a, 0xd4, 0xa9, 0x90,
-  0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xa9, 0x2a, 0xd8,
-  0xa9, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0xb0,
-  0x2a, 0xd4, 0xa9, 0x40, 0xa3, 0x42, 0x20, 0xaa, 0x02, 0x99, 0x0a, 0xa0,
-  0x2a, 0x8c, 0x26, 0x04, 0xc0, 0x2c, 0x41, 0xdf, 0x0c, 0xb4, 0x18, 0xa2,
-  0x81, 0x36, 0x72, 0x4c, 0x9c, 0x8d, 0x4a, 0xa8, 0x8d, 0xc0, 0x37, 0x72,
-  0x4c, 0xac, 0xcd, 0x2c, 0x83, 0xdf, 0x80, 0x0e, 0x0c, 0x07, 0xc3, 0x11,
-  0x35, 0x1c, 0xe8, 0xa8, 0x30, 0x7c, 0x67, 0xc3, 0xc1, 0x30, 0xc3, 0x0d,
-  0x01, 0x8c, 0x0a, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0xfc, 0xe0, 0xa3,
-  0xc2, 0xf0, 0x55, 0x20, 0xe8, 0xf9, 0xc3, 0x30, 0xc3, 0x0d, 0xc1, 0x8c,
-  0x0a, 0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c, 0x7f, 0x43, 0x3b, 0xc1, 0xdd,
-  0xa7, 0x30, 0xcc, 0xa1, 0xa4, 0x30, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20,
-  0x18, 0x68, 0xb7, 0x2a, 0x90, 0xaa, 0x10, 0xa6, 0x82, 0xac, 0x0a, 0xa3,
-  0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40,
-  0x0c, 0x45, 0x1c, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x97, 0xaf,
-  0x0a, 0xab, 0x2a, 0x1c, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0xd7, 0xaf, 0x0a, 0xac, 0x2a, 0x30, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x17, 0xb8, 0x0a, 0xad, 0x2a, 0x48, 0x44, 0x30, 0x62, 0xa0,
-  0x00, 0x20, 0x08, 0x06, 0xcb, 0xb9, 0x0a, 0xac, 0x2a, 0xac, 0xa9, 0x10,
-  0xe4, 0xaa, 0xb0, 0xa7, 0xc2, 0xad, 0x0a, 0xa3, 0x09, 0x01, 0x30, 0x4b,
-  0x40, 0x3b, 0xc3, 0x0d, 0x67, 0x1c, 0xf8, 0xaa, 0x00, 0x06, 0xb3, 0x0c,
-  0xa1, 0x23, 0x3a, 0x41, 0x95, 0xa9, 0xe0, 0xaa, 0x02, 0x5c, 0x60, 0xd4,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0xe8, 0x2a, 0xbc, 0xaa, 0xe0,
-  0xc6, 0xc1, 0x9c, 0x0a, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30, 0xa5,
-  0xab, 0xf0, 0xaa, 0x42, 0x20, 0x5c, 0x30, 0x4c, 0xa1, 0xa9, 0x30, 0xab,
-  0x02, 0x5c, 0x60, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0xed,
-  0x2a, 0xd0, 0xaa, 0x60, 0x13, 0x78, 0x2a, 0x8c, 0x18, 0x1c, 0x00, 0x08,
-  0x82, 0xc1, 0xe4, 0xae, 0x02, 0xad, 0x0a, 0x81, 0x70, 0xc1, 0x30, 0x17,
-  0x18, 0x75, 0x87, 0x51, 0x17, 0xa3, 0xc2, 0x30, 0x27, 0x96, 0xc2, 0x30,
-  0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0,
-  0xcd, 0xab, 0x00, 0xae, 0x42, 0x9f, 0x0a, 0xee, 0x2a, 0x8c, 0x26, 0x04,
-  0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14,
-  0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xfa, 0x2a, 0x9c,
-  0xab, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xfb,
-  0x2a, 0xa0, 0xab, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x5c, 0xfc, 0x2a, 0xa4, 0xab, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80,
-  0x20, 0x18, 0x2c, 0x23, 0x2b, 0xa0, 0xab, 0x70, 0xaa, 0x42, 0x50, 0xaf,
-  0xc2, 0xad, 0x0a, 0xf3, 0x2a, 0x8c, 0x26, 0x04, 0xc0, 0x2c, 0x01, 0xed,
-  0x0c, 0x37, 0x8c, 0x72, 0x90, 0xaf, 0x02, 0x18, 0xcc, 0x32, 0x8c, 0x0e,
-  0xed, 0x04, 0x86, 0xa7, 0x82, 0x9e, 0x0a, 0xf1, 0x19, 0x8e, 0x48, 0xe5,
-  0x60, 0x4f, 0x05, 0xe2, 0x9b, 0x65, 0x20, 0x9d, 0xd3, 0x09, 0x8c, 0x4f,
-  0x05, 0x55, 0x0e, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0,
-  0x28, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x91, 0x15, 0x74,
-  0xb8, 0x21, 0x08, 0x59, 0x01, 0x0c, 0x66, 0x19, 0x4a, 0xc7, 0x74, 0x02,
-  0x1b, 0x48, 0x55, 0x80, 0xcf, 0x2c, 0xc1, 0xea, 0xd8, 0xa8, 0x0a, 0x44,
-  0x7c, 0x66, 0x09, 0x56, 0x67, 0x38, 0x82, 0x96, 0x03, 0x52, 0x15, 0x84,
-  0x6f, 0x96, 0x01, 0x75, 0x56, 0x27, 0xb0, 0x5a, 0x0e, 0x4a, 0x55, 0x88,
-  0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x88, 0xe4,
-  0x63, 0x45, 0x10, 0x9f, 0x22, 0x5c, 0x56, 0xd0, 0xe1, 0x86, 0x80, 0x65,
-  0x05, 0x30, 0x98, 0x65, 0x48, 0x1d, 0xd5, 0x09, 0xac, 0x55, 0x85, 0x21,
-  0x3e, 0xb3, 0x04, 0xab, 0x63, 0x04, 0xac, 0x0a, 0xf0, 0x99, 0x25, 0x58,
-  0x9d, 0x81, 0x16, 0x43, 0x2b, 0x1d, 0xcc, 0x74, 0x88, 0xd4, 0x11, 0x54,
-  0xc7, 0x2f, 0x87, 0xd3, 0xb9, 0x60, 0x18, 0x7b, 0x55, 0x61, 0x56, 0x85,
-  0xf8, 0x0c, 0x47, 0x94, 0x06, 0xad, 0x0a, 0xc4, 0x37, 0xcb, 0xc0, 0x3a,
-  0xaf, 0x13, 0x58, 0xad, 0x0a, 0xa6, 0x11, 0x1f, 0x0b, 0x06, 0xfa, 0x5c,
-  0x30, 0xcc, 0x05, 0x46, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45,
-  0xf0, 0xac, 0xa0, 0xc3, 0x0d, 0x81, 0xce, 0x0a, 0x60, 0x30, 0xcb, 0xd0,
-  0x3a, 0xae, 0x13, 0xd8, 0xd0, 0xab, 0x02, 0x7c, 0x66, 0x09, 0x66, 0xc7,
-  0x74, 0x55, 0x20, 0xe2, 0x33, 0x4b, 0x30, 0x3b, 0xc3, 0x11, 0xb0, 0xb1,
-  0xab, 0x82, 0xf0, 0xcd, 0x32, 0xc0, 0xce, 0xec, 0x04, 0x16, 0x1b, 0xbc,
-  0x2a, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x16,
-  0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x67, 0x2b, 0xe8, 0x70, 0x43,
-  0x50, 0xb6, 0x02, 0x18, 0xcc, 0x32, 0xc4, 0x8e, 0xec, 0x04, 0x46, 0xae,
-  0xc2, 0x10, 0x9f, 0x59, 0x82, 0xd9, 0x31, 0x22, 0x5d, 0x05, 0xf8, 0xcc,
-  0x12, 0xcc, 0xce, 0x40, 0x8b, 0xa1, 0xb5, 0x0e, 0xe6, 0x3a, 0x44, 0xec,
-  0x08, 0xb2, 0x63, 0x37, 0xaf, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x75, 0x9b,
-  0x51, 0x57, 0xab, 0xc2, 0x30, 0x67, 0x9e, 0xc2, 0x30, 0x47, 0x0c, 0x73,
-  0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0, 0xdd, 0xad, 0x40,
-  0xb6, 0x42, 0xc8, 0x0a, 0x72, 0x2b, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82,
-  0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0x7e, 0x2b, 0xac, 0xad, 0x90, 0x10,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0x7f, 0x2b, 0xb0, 0xad,
-  0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xa0, 0x2b,
-  0xb4, 0xad, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c,
-  0xa7, 0x2b, 0xb0, 0xad, 0xb0, 0xb2, 0x42, 0x90, 0xb7, 0xc2, 0xce, 0x0a,
-  0x77, 0x2b, 0x8c, 0x26, 0x04, 0xc0, 0x2c, 0x01, 0xed, 0x0c, 0xb4, 0x18,
-  0xa2, 0xf1, 0x37, 0x60, 0x4e, 0xf8, 0x8d, 0x4a, 0x84, 0x8e, 0x30, 0x3b,
-  0x60, 0x4e, 0x88, 0xce, 0x2c, 0x43, 0xed, 0xdc, 0xce, 0x49, 0x07, 0xc3,
-  0x11, 0x38, 0x1b, 0xc4, 0xac, 0x30, 0x7c, 0x97, 0xb3, 0xc1, 0x30, 0xc3,
-  0x0d, 0xc1, 0xc9, 0x0a, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0xf3, 0x51,
-  0xb3, 0xc2, 0xf0, 0x55, 0x20, 0xe8, 0xd5, 0xc7, 0x30, 0xc3, 0x0d, 0x81,
-  0xca, 0x0a, 0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c, 0xb6, 0xb3, 0x3e, 0xc1,
-  0xb9, 0xab, 0x30, 0xcc, 0xfd, 0xa7, 0x30, 0xcc, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x68, 0xae, 0x2b, 0xec, 0xad, 0x80, 0xb3, 0x42, 0xea, 0x0a,
-  0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a,
-  0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x57,
-  0xed, 0x0a, 0xa2, 0x2b, 0x1c, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x97, 0xed, 0x0a, 0xa3, 0x2b, 0x30, 0x44, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0xd7, 0xed, 0x0a, 0xa4, 0x2b, 0x48, 0x44, 0x30, 0x62,
-  0xa0, 0x00, 0x20, 0x08, 0x06, 0x8b, 0xef, 0x0a, 0xa3, 0x2b, 0x88, 0xad,
-  0x10, 0xc0, 0xae, 0x20, 0xb7, 0x82, 0xeb, 0x0a, 0xa3, 0x09, 0x01, 0x30,
-  0x4b, 0xb0, 0x3e, 0xc3, 0x0d, 0x3e, 0x1d, 0xd4, 0xae, 0x00, 0x06, 0xb3,
-  0x0c, 0xb8, 0x93, 0x3b, 0x41, 0xf1, 0xac, 0x50, 0xba, 0x02, 0x5c, 0x60,
-  0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0xbf, 0x2b, 0x98, 0xae,
-  0x10, 0xb7, 0x81, 0xda, 0x0a, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30,
-  0x81, 0xaf, 0x60, 0xba, 0x42, 0x20, 0x5c, 0x30, 0x4c, 0xfd, 0xac, 0xa0,
-  0xba, 0x02, 0x5c, 0x60, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c,
-  0xe4, 0x2b, 0xac, 0xae, 0xd0, 0x22, 0x6f, 0x2b, 0x8c, 0x18, 0x1c, 0x00,
-  0x08, 0x82, 0xc1, 0x54, 0xbe, 0xc2, 0xea, 0x0a, 0x81, 0x70, 0xc1, 0x30,
-  0x17, 0x18, 0x75, 0x87, 0x51, 0x87, 0xb2, 0xc2, 0x30, 0x97, 0xa3, 0xc2,
-  0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0xa0, 0xa9, 0xaf, 0x70, 0xbb, 0x02, 0xdd, 0x0a, 0xe5, 0x2b, 0x8c, 0x26,
-  0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31,
-  0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xf1, 0x2b,
-  0xf8, 0xae, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c,
-  0xf2, 0x2b, 0xfc, 0xae, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x5c, 0xf3, 0x2b, 0x80, 0xaf, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02,
-  0x80, 0x20, 0x18, 0x2c, 0xfa, 0x2b, 0xfc, 0xae, 0xe0, 0xb7, 0x42, 0xc0,
-  0xbe, 0x82, 0xeb, 0x0a, 0xea, 0x2b, 0x8c, 0x26, 0x04, 0xc0, 0x2c, 0xc1,
-  0xfa, 0x0c, 0x37, 0xe8, 0x75, 0x00, 0xbf, 0x02, 0x18, 0xcc, 0x32, 0xe8,
-  0xce, 0xfa, 0x04, 0xf6, 0xb6, 0x42, 0xdc, 0x0a, 0xf1, 0x19, 0x8e, 0x60,
-  0xdd, 0x40, 0x6e, 0x05, 0xe2, 0x9b, 0x65, 0xd8, 0x1d, 0xdf, 0x09, 0x6c,
-  0x6e, 0x85, 0xd6, 0x0d, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9,
-  0xc0, 0x28, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0xfd, 0x15,
-  0x74, 0xb8, 0x21, 0xc0, 0x5f, 0x01, 0x0c, 0x66, 0x19, 0x78, 0xa7, 0x77,
-  0x02, 0x1b, 0xf6, 0x56, 0x80, 0xcf, 0x2c, 0x81, 0xf8, 0x98, 0xde, 0x0a,
-  0x44, 0x7c, 0x66, 0x09, 0xc4, 0x67, 0x38, 0xe2, 0x76, 0x83, 0xbd, 0x15,
-  0x84, 0x6f, 0x96, 0xe1, 0x77, 0xc4, 0x27, 0x30, 0xdc, 0x0d, 0xf8, 0x56,
-  0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x88,
-  0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x4a, 0x58, 0xd0, 0xe1, 0x86, 0x60,
-  0x84, 0x05, 0x30, 0x98, 0x65, 0x00, 0x9f, 0xf0, 0x09, 0x8c, 0x74, 0x85,
-  0x21, 0x3e, 0xb3, 0x04, 0xe2, 0x63, 0xc4, 0xe9, 0x0a, 0xf0, 0x99, 0x25,
-  0x10, 0x9f, 0x81, 0x16, 0x43, 0xe3, 0x1d, 0xac, 0x77, 0x08, 0xf0, 0x11,
-  0xc2, 0x87, 0xf4, 0x05, 0xdf, 0xb9, 0x60, 0x18, 0x33, 0x5d, 0x41, 0x75,
-  0x85, 0xf8, 0x0c, 0x47, 0xf0, 0xc9, 0xea, 0x0a, 0xc4, 0x37, 0xcb, 0x30,
-  0x3e, 0xe6, 0x13, 0x18, 0xeb, 0x0a, 0x7d, 0x12, 0x1f, 0x0b, 0x06, 0xfa,
-  0x5c, 0x30, 0xcc, 0x05, 0x46, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e,
-  0x45, 0xcc, 0xb0, 0xa0, 0xc3, 0x0d, 0x41, 0x0c, 0x0b, 0x60, 0x30, 0xcb,
-  0x40, 0x3e, 0xe5, 0x13, 0xd8, 0x40, 0xbb, 0x02, 0x7c, 0x66, 0x09, 0xd4,
-  0xc7, 0x62, 0x57, 0x20, 0xe2, 0x33, 0x4b, 0xa0, 0x3e, 0xc3, 0x11, 0xa7,
-  0x22, 0xbb, 0x82, 0xf0, 0xcd, 0x32, 0x9c, 0x8f, 0xfa, 0x04, 0x86, 0x2a,
-  0xb3, 0x2b, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51,
-  0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x3e, 0x2c, 0xe8, 0x70,
-  0x43, 0xc0, 0xc3, 0x02, 0x18, 0xcc, 0x32, 0xa0, 0x4f, 0xfa, 0x04, 0xb6,
-  0xbb, 0xc2, 0x10, 0x9f, 0x59, 0x02, 0xf5, 0x31, 0x02, 0x7c, 0x05, 0xf8,
-  0xcc, 0x12, 0xa8, 0xcf, 0x40, 0x8b, 0xa1, 0x91, 0x0f, 0x56, 0x3e, 0x04,
-  0xfa, 0x08, 0xe9, 0x43, 0x56, 0xe6, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x75,
-  0x9b, 0x51, 0xc7, 0xba, 0xc2, 0x30, 0xd7, 0xab, 0xc2, 0x30, 0x47, 0x0c,
-  0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0, 0xb9, 0xb1,
-  0xb0, 0xc3, 0x02, 0xfe, 0x0a, 0x69, 0x2c, 0x8c, 0x26, 0x04, 0xc0, 0x68,
-  0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0x75, 0x2c, 0x88, 0xb1, 0x90,
-  0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0x76, 0x2c, 0x8c,
-  0xb1, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0x77,
-  0x2c, 0x90, 0xb1, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18,
-  0x2c, 0x7e, 0x2c, 0x8c, 0xb1, 0x20, 0xc2, 0x42, 0x00, 0xc7, 0x82, 0x0c,
-  0x0b, 0x6e, 0x2c, 0x8c, 0x26, 0x04, 0xc0, 0x2c, 0xc1, 0xfa, 0x0c, 0xb4,
-  0x18, 0xa2, 0x61, 0x3b, 0x2e, 0x58, 0xd4, 0x8e, 0x4a, 0xe0, 0x8e, 0xa0,
-  0x3e, 0x2e, 0x58, 0xe4, 0xce, 0x2c, 0x03, 0xfb, 0xb8, 0x8f, 0x7f, 0x07,
-  0xc3, 0x11, 0x37, 0x1b, 0xa0, 0xb0, 0x30, 0x7c, 0x87, 0xb3, 0xc1, 0x30,
-  0xc3, 0x0d, 0x81, 0xff, 0x0a, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0xea,
-  0xc2, 0xc2, 0xc2, 0xf0, 0x55, 0x20, 0xe8, 0xb1, 0xcb, 0x30, 0xc3, 0x0d,
-  0x41, 0x08, 0x0b, 0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c, 0xed, 0x23, 0x42,
-  0xc1, 0x95, 0xaf, 0x30, 0xcc, 0xd9, 0xab, 0x30, 0xcc, 0x88, 0xc1, 0x01,
-  0x80, 0x20, 0x18, 0x68, 0xa5, 0x2c, 0xc8, 0xb1, 0xf0, 0xc2, 0x02, 0x28,
-  0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30,
-  0x9a, 0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x17, 0x2b, 0x0b, 0x79, 0x2c, 0x1c, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x57, 0x2b, 0x0b, 0x7a, 0x2c, 0x30, 0x44, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0x97, 0x2b, 0x0b, 0x7b, 0x2c, 0x48, 0x44, 0x30,
-  0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x4b, 0x2d, 0x0b, 0x7a, 0x2c, 0xe4,
-  0xb0, 0x10, 0x9c, 0xb2, 0x90, 0xc6, 0x42, 0x29, 0x0b, 0xa3, 0x09, 0x01,
-  0x30, 0x4b, 0x20, 0x42, 0xc3, 0x0d, 0x35, 0x1e, 0xb0, 0xb2, 0x00, 0x06,
-  0xb3, 0x0c, 0xef, 0x03, 0x3f, 0x41, 0xcd, 0xb0, 0xc0, 0xc7, 0x02, 0x5c,
-  0x60, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0xb6, 0x2c, 0xf4,
-  0xb1, 0x00, 0xb7, 0x41, 0x18, 0x0b, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0x30, 0xdd, 0xb2, 0xd0, 0xc7, 0x42, 0x20, 0x5c, 0x30, 0x4c, 0xd9, 0xb0,
-  0x10, 0xca, 0x02, 0x5c, 0x60, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
-  0x4c, 0xbb, 0x2c, 0x88, 0xb2, 0x40, 0x32, 0x66, 0x2c, 0x8c, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0xc1, 0xc4, 0xcb, 0x82, 0x28, 0x0b, 0x81, 0x70, 0xc1,
-  0x30, 0x17, 0x18, 0x75, 0x87, 0x51, 0xf7, 0xbf, 0xc2, 0x30, 0x07, 0xb3,
-  0xc2, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82,
-  0x60, 0xa0, 0x85, 0xb3, 0xe0, 0xca, 0xc2, 0x1a, 0x0b, 0xbc, 0x2c, 0x8c,
-  0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02,
-  0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xe8,
-  0x2c, 0xd4, 0xb2, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x5c, 0xe9, 0x2c, 0xd8, 0xb2, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x5c, 0xea, 0x2c, 0xdc, 0xb2, 0x90, 0x10, 0xc1, 0x88, 0x81,
-  0x02, 0x80, 0x20, 0x18, 0x2c, 0xf1, 0x2c, 0xd8, 0xb2, 0x50, 0xc7, 0x42,
-  0x30, 0xce, 0x42, 0x29, 0x0b, 0xe1, 0x2c, 0x8c, 0x26, 0x04, 0xc0, 0x2c,
-  0x81, 0x08, 0x0d, 0x37, 0xc4, 0x79, 0x70, 0xce, 0x02, 0x18, 0xcc, 0x32,
-  0xc4, 0x8f, 0x08, 0x05, 0x66, 0xc6, 0x02, 0x1a, 0x0b, 0xf1, 0x19, 0x8e,
-  0x58, 0xdd, 0x20, 0x8d, 0x05, 0xe2, 0x9b, 0x65, 0x90, 0x9f, 0xfa, 0x09,
-  0x4c, 0x8d, 0x05, 0xd6, 0x0d, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86,
-  0xb9, 0xc0, 0x28, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x78,
-  0x16, 0x74, 0xb8, 0x21, 0x78, 0x67, 0x01, 0x0c, 0x66, 0x19, 0xe6, 0x87,
-  0x7e, 0x02, 0x1b, 0xe4, 0x58, 0x80, 0xcf, 0x2c, 0x41, 0xfe, 0x58, 0x1c,
-  0x0b, 0x44, 0x7c, 0x66, 0x09, 0xf2, 0x67, 0x38, 0xc2, 0x76, 0x03, 0x39,
-  0x16, 0x84, 0x6f, 0x96, 0xc1, 0x7e, 0xf2, 0x27, 0xb0, 0xdb, 0x0d, 0xe6,
-  0x58, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c,
-  0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xf8, 0x59, 0xd0, 0xe1, 0x86,
-  0x40, 0x9f, 0x05, 0x30, 0x98, 0x65, 0xb8, 0x1f, 0xfc, 0x09, 0x6c, 0x8f,
-  0x85, 0x21, 0x3e, 0xb3, 0x04, 0xf9, 0x63, 0x84, 0x1f, 0x0b, 0xf0, 0x99,
-  0x25, 0xc8, 0x9f, 0x81, 0x16, 0x43, 0x9b, 0x1f, 0x8c, 0x7e, 0x88, 0xfb,
-  0x11, 0xf0, 0x47, 0xf4, 0x85, 0xfa, 0xb9, 0x60, 0x18, 0xeb, 0x63, 0x21,
-  0x94, 0x85, 0xf8, 0x0c, 0x47, 0xcc, 0x8d, 0x28, 0x0b, 0xc4, 0x37, 0xcb,
-  0xa0, 0x3f, 0xfd, 0x13, 0xd8, 0x28, 0x0b, 0x74, 0x13, 0x1f, 0x0b, 0x06,
-  0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20,
-  0x3e, 0x45, 0xa8, 0xb4, 0xa0, 0xc3, 0x0d, 0x01, 0x4a, 0x0b, 0x60, 0x30,
-  0xcb, 0xb0, 0x3f, 0xfc, 0x13, 0xd8, 0xb0, 0xca, 0x02, 0x7c, 0x66, 0x09,
-  0x42, 0xc8, 0x50, 0x59, 0x20, 0xe2, 0x33, 0x4b, 0x10, 0x42, 0xc3, 0x11,
-  0x7e, 0x93, 0xca, 0x82, 0xf0, 0xcd, 0x32, 0xf8, 0x4f, 0x08, 0x05, 0xf6,
-  0x37, 0xaa, 0x2c, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0x81,
-  0x51, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x35, 0x2d, 0xe8,
-  0x70, 0x43, 0x30, 0xd3, 0x02, 0x18, 0xcc, 0x32, 0xfc, 0x0f, 0x08, 0x05,
-  0x26, 0xcb, 0xc2, 0x10, 0x9f, 0x59, 0x82, 0x10, 0x32, 0xe2, 0x96, 0x05,
-  0xf8, 0xcc, 0x12, 0x84, 0xd0, 0x40, 0x8b, 0xa1, 0xed, 0x0f, 0xc6, 0x3f,
-  0xc4, 0xff, 0x08, 0x20, 0x24, 0x73, 0xfd, 0x73, 0xc1, 0x30, 0x17, 0x18,
-  0x75, 0x9b, 0x51, 0x37, 0xca, 0xc2, 0x30, 0x47, 0xbb, 0xc2, 0x30, 0x47,
-  0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0, 0x95,
-  0xb5, 0x20, 0xd3, 0xc2, 0x3b, 0x0b, 0x60, 0x2d, 0x8c, 0x26, 0x04, 0xc0,
-  0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91,
-  0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0x6c, 0x2d, 0xe4, 0xb4,
-  0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0x6d, 0x2d,
-  0xe8, 0xb4, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c,
-  0x6e, 0x2d, 0xec, 0xb4, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20,
-  0x18, 0x2c, 0x75, 0x2d, 0xe8, 0xb4, 0x90, 0xcf, 0x42, 0x70, 0xd6, 0x42,
-  0x4a, 0x0b, 0x65, 0x2d, 0x8c, 0x26, 0x04, 0xc0, 0x2c, 0x81, 0x08, 0x0d,
-  0xb4, 0x18, 0xa2, 0xd1, 0x3e, 0xfc, 0x59, 0xb0, 0x8f, 0x4a, 0xbc, 0x8f,
-  0x10, 0x42, 0xfc, 0x59, 0xc0, 0xcf, 0x88, 0x81, 0x01, 0x80, 0x20, 0x18,
-  0x3c, 0x76, 0x2d, 0xd4, 0xb4, 0x50, 0xbe, 0xc2, 0x88, 0x81, 0x01, 0x80,
-  0x20, 0x18, 0x3c, 0x77, 0x2d, 0xd8, 0xb4, 0x50, 0xbe, 0x82, 0x05, 0x81,
-  0x7c, 0x2c, 0x10, 0xe4, 0x63, 0x22, 0x1e, 0x94, 0xb4, 0x20, 0x9f, 0x11,
-  0x03, 0x04, 0x00, 0x41, 0x30, 0x88, 0xf4, 0x5a, 0xf0, 0x69, 0x01, 0xa6,
-  0x05, 0x54, 0x0b, 0x8c, 0xc4, 0x03, 0x98, 0x16, 0xe4, 0x33, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0x11, 0x5f, 0x0b, 0x60, 0x2d, 0xb8, 0xb4, 0x70,
-  0xa6, 0x41, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x51, 0x5f, 0x0b,
-  0x61, 0x2d, 0xc8, 0xb4, 0xb0, 0x6a, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x44, 0x7e, 0x2d, 0x88, 0xb5, 0xa0, 0xd2, 0xc2, 0xbb, 0x04, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0x10, 0xfd, 0xb5, 0x30, 0xd6, 0x02, 0x4c,
-  0x0b, 0x6a, 0x1a, 0x18, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x10, 0x81,
-  0xb6, 0x40, 0xd6, 0x02, 0x4c, 0x0b, 0xae, 0x16, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0x41, 0x14, 0xda, 0x42, 0x59, 0x0b, 0x37, 0x2d, 0xc8, 0x4b,
-  0x30, 0x62, 0xd0, 0x00, 0x20, 0x08, 0x06, 0x15, 0x68, 0x0b, 0x65, 0x2d,
-  0xb4, 0xb4, 0xc0, 0x2c, 0x4a, 0x9b, 0x06, 0x08, 0x11, 0x58, 0x4d, 0x07,
-  0x2f, 0x2d, 0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x22, 0xd2,
-  0x16, 0xd0, 0x5a, 0xd0, 0x69, 0x61, 0xb7, 0x02, 0xbb, 0xe9, 0x40, 0xa7,
-  0x05, 0xf9, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41, 0x64, 0xda, 0x82,
-  0x5a, 0x0b, 0x38, 0x2d, 0xe8, 0x65, 0x10, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0x41, 0x74, 0xda, 0xc2, 0x5a, 0x0b, 0x3c, 0x2d, 0xf8, 0x56, 0x30,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x11, 0x6a, 0x0b, 0x6c, 0x2d, 0xd0,
-  0xb4, 0x20, 0x22, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x44, 0xa9,
-  0x2d, 0xb4, 0xb5, 0xa0, 0xd3, 0x42, 0x5f, 0x06, 0xc6, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x44, 0xaa, 0x2d, 0xb8, 0xb5, 0xa0, 0xd3, 0x42, 0x78,
-  0x05, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x10, 0xad, 0xb6, 0xf0, 0xd6,
-  0x42, 0x58, 0x0b, 0x25, 0x12, 0x8c, 0x18, 0x34, 0x00, 0x08, 0x82, 0x41,
-  0xa5, 0xda, 0xc2, 0x5b, 0x0b, 0x37, 0x2d, 0x58, 0x15, 0x05, 0x9a, 0x01,
-  0x42, 0x04, 0x76, 0xc6, 0x41, 0x4e, 0x0b, 0xf2, 0x19, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x83, 0xc8, 0xb5, 0x05, 0xb9, 0x16, 0xc8, 0x5a, 0x70, 0xa7,
-  0xc0, 0xd2, 0x38, 0x20, 0x6b, 0x41, 0x3e, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0x10, 0xc1, 0xb6, 0x40, 0xd7, 0x82, 0x58, 0x0b, 0xad, 0x18, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x10, 0xc5, 0xb6, 0x50, 0xd7, 0x82,
-  0x59, 0x0b, 0xf1, 0x14, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41, 0x24,
-  0xdb, 0x82, 0x5d, 0x0b, 0x3e, 0x2d, 0xd4, 0x44, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0xd1, 0x6c, 0x0b, 0x77, 0x2d, 0x90, 0xb5, 0x00, 0x8b,
-  0x81, 0x31, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x11, 0x6d, 0x0b, 0x78,
-  0x2d, 0x90, 0xb5, 0x40, 0x4f, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x44, 0xb5, 0x2d, 0xe4, 0xb5, 0xb0, 0xd6, 0x02, 0x4e, 0x04, 0x23, 0x06,
-  0x0d, 0x00, 0x82, 0x60, 0x50, 0xd1, 0xb6, 0x90, 0xd7, 0x42, 0x58, 0x0b,
-  0x60, 0xf0, 0x79, 0xb3, 0x18, 0x20, 0x44, 0x60, 0x79, 0x1b, 0x8c, 0xb5,
-  0x20, 0x9f, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x88, 0x70, 0x5b, 0xe0,
-  0x6b, 0xc1, 0xad, 0x85, 0x10, 0x0a, 0x6c, 0x6f, 0x03, 0xb7, 0x16, 0xe4,
-  0x33, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x91, 0x6e, 0x0b, 0x7e, 0x2d,
-  0xb0, 0xb5, 0x00, 0x7e, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x44,
-  0xbb, 0x2d, 0xfc, 0xb5, 0x00, 0xd7, 0x02, 0x09, 0x05, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0x10, 0xf1, 0xb6, 0x00, 0xda, 0x02, 0x5a, 0x0b, 0x68,
-  0x10, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41, 0xd4, 0xdb, 0x42, 0x68,
-  0x0b, 0x6e, 0x2d, 0x8c, 0x9f, 0x31, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x91, 0x6f, 0x0b, 0xa2, 0x2d, 0xb8, 0xb5, 0x70, 0x42, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x44, 0xbf, 0x2d, 0x8c, 0xb6, 0x50, 0xd7, 0xc2,
-  0x1a, 0x04, 0x23, 0x06, 0x0d, 0x00, 0x82, 0x60, 0x50, 0xf9, 0xb6, 0x30,
-  0xda, 0xc2, 0x5a, 0x0b, 0x6a, 0x90, 0x06, 0x68, 0x60, 0x7e, 0x08, 0x11,
-  0x18, 0x1b, 0xb0, 0x81, 0x7c, 0x2c, 0x68, 0x03, 0xf9, 0x58, 0x18, 0xbc,
-  0xb5, 0x20, 0x9f, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x88, 0xc8, 0x5b,
-  0x40, 0x6d, 0x41, 0xaf, 0x05, 0x27, 0xb0, 0x31, 0xd0, 0x6b, 0x41, 0x3e,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x10, 0x99, 0xb7, 0xa0, 0xda, 0x02,
-  0x5e, 0x0b, 0x5a, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd1, 0x79,
-  0x0b, 0xab, 0x2d, 0xf0, 0xb5, 0x10, 0x05, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0x10, 0xa1, 0xb7, 0xc0, 0xda, 0x02, 0x5d, 0x0b, 0x48, 0x30, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x51, 0x7a, 0x0b, 0xad, 0x2d, 0xe8, 0xb5,
-  0xd0, 0x19, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x10, 0xa9, 0xb7, 0xe0,
-  0xda, 0x82, 0x5e, 0x0b, 0x54, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0xd1, 0x7a, 0x0b, 0xaf, 0x2d, 0x84, 0xb6, 0xb0, 0x04, 0x23, 0x06, 0x0d,
-  0x00, 0x82, 0x60, 0x50, 0xa9, 0xb7, 0xf0, 0xda, 0xc2, 0x5d, 0x0b, 0x77,
-  0xb0, 0x28, 0x60, 0x80, 0x10, 0xc1, 0x88, 0x81, 0x03, 0x80, 0x20, 0x18,
-  0x34, 0xef, 0x2d, 0xb0, 0xb6, 0x60, 0xd7, 0xc2, 0x5b, 0x0b, 0xe2, 0x2d,
-  0x04, 0xa1, 0x2d, 0x84, 0xb6, 0x10, 0xda, 0x02, 0x69, 0x0b, 0xe0, 0x2d,
-  0xcc, 0x12, 0x8c, 0x10, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-};
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_fp16_double.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_fp16_double.h
deleted file mode 100644
index 35d953fd689ca..0000000000000
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_fp16_double.h
+++ /dev/null
@@ -1,6305 +0,0 @@
-#if 0
-;
-; Note: shader requires additional functionality:
-;       Double-precision floating point
-;       Use native low precision
-;
-;
-; Input signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no parameters
-;
-; Output signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no parameters
-; shader hash: f08c2806969b3ec2ad03b453168beb8b
-;
-; Pipeline Runtime Information: 
-;
-;
-;
-; Buffer Definitions:
-;
-; cbuffer 
-; {
-;
-;   [116 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [2 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [8 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [2 x i8] (type annotation not present)
-;
-; }
-;
-;
-; Resource Bindings:
-;
-; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-;                                   cbuffer      NA          NA     CB0            cb0     1
-;                                       UAV  struct         r/w      U0             u0     1
-;                                       UAV  struct         r/w      U1             u1     1
-;                                       UAV  struct         r/w      U2             u2     1
-;
-target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
-target triple = "dxil-ms-dx"
-
-%dx.types.Handle = type { i8* }
-%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
-%dx.types.ResRet.i32 = type { i32, i32, i32, i32, i32 }
-%dx.types.ResRet.f16 = type { half, half, half, half, i32 }
-%"class.RWStructuredBuffer<half>" = type { half }
-%"class.RWStructuredBuffer<double>" = type { double }
-%Constants = type { i32, i32, i32, i32, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32 }
-
-define void @GridSample() {
-  %1 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 2, i32 2, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %2 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 1, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %3 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %4 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %5 = call i32 @dx.op.threadId.i32(i32 93, i32 0)  ; ThreadId(component)
-  %6 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
-  %7 = extractvalue %dx.types.CBufRet.i32 %6, 0
-  %8 = add i32 %7, %5
-  %9 = extractvalue %dx.types.CBufRet.i32 %6, 1
-  %10 = icmp ult i32 %8, %9
-  br i1 %10, label %11, label %3323
-
-; <label>:11                                      ; preds = %0
-  %12 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
-  %13 = extractvalue %dx.types.CBufRet.i32 %12, 3
-  %14 = uitofp i32 %13 to float
-  %15 = extractvalue %dx.types.CBufRet.i32 %12, 2
-  %16 = uitofp i32 %15 to float
-  %17 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 7)  ; CBufferLoadLegacy(handle,regIndex)
-  %18 = extractvalue %dx.types.CBufRet.i32 %17, 0
-  %19 = icmp eq i32 %18, 0
-  %20 = select i1 %19, float -5.000000e-01, float 0.000000e+00
-  %21 = select i1 %19, float -5.000000e-01, float -1.000000e+00
-  %22 = fadd float %14, %21
-  %23 = select i1 %19, float -5.000000e-01, float -1.000000e+00
-  %24 = fadd float %16, %23
-  %25 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
-  %26 = extractvalue %dx.types.CBufRet.i32 %25, 1
-  %27 = extractvalue %dx.types.CBufRet.i32 %25, 2
-  %28 = extractvalue %dx.types.CBufRet.i32 %25, 3
-  %29 = mul i32 %28, %27
-  %30 = mul i32 %27, %26
-  %31 = mul i32 %30, %28
-  %32 = udiv i32 %8, %31
-  %33 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 6)  ; CBufferLoadLegacy(handle,regIndex)
-  %34 = extractvalue %dx.types.CBufRet.i32 %33, 0
-  %35 = mul i32 %34, %32
-  %36 = sub i32 %8, %35
-  %37 = udiv i32 %36, %29
-  %38 = extractvalue %dx.types.CBufRet.i32 %33, 1
-  %39 = mul i32 %38, %37
-  %40 = sub i32 %36, %39
-  %41 = udiv i32 %40, %28
-  %42 = extractvalue %dx.types.CBufRet.i32 %33, 2
-  %43 = mul i32 %42, %41
-  %44 = sub i32 %40, %43
-  %45 = uitofp i32 %32 to float
-  %46 = uitofp i32 %41 to float
-  %47 = uitofp i32 %44 to float
-  %48 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
-  %49 = extractvalue %dx.types.CBufRet.i32 %48, 0
-  %50 = extractvalue %dx.types.CBufRet.i32 %48, 1
-  %51 = extractvalue %dx.types.CBufRet.i32 %48, 2
-  %52 = extractvalue %dx.types.CBufRet.i32 %48, 3
-  %53 = uitofp i32 %49 to float
-  %54 = uitofp i32 %50 to float
-  %55 = uitofp i32 %51 to float
-  %56 = uitofp i32 %52 to float
-  %57 = call float @dx.op.dot4.f32(i32 56, float %45, float %46, float %47, float 0.000000e+00, float %53, float %54, float %55, float %56)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %58 = fadd fast float %56, %57
-  %59 = fptoui float %57 to i32
-  %60 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %2, i32 %59, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %61 = extractvalue %dx.types.ResRet.i32 %60, 0
-  %62 = extractvalue %dx.types.ResRet.i32 %60, 1
-  %63 = call double @dx.op.makeDouble.f64(i32 101, i32 %61, i32 %62)  ; MakeDouble(lo,hi)
-  %64 = fptrunc double %63 to float
-  %65 = fptoui float %58 to i32
-  %66 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %2, i32 %65, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %67 = extractvalue %dx.types.ResRet.i32 %66, 0
-  %68 = extractvalue %dx.types.ResRet.i32 %66, 1
-  %69 = call double @dx.op.makeDouble.f64(i32 101, i32 %67, i32 %68)  ; MakeDouble(lo,hi)
-  %70 = fptrunc double %69 to float
-  %71 = icmp eq i32 %18, 1
-  %72 = fadd fast float %64, 1.000000e+00
-  %73 = fadd fast float %70, 1.000000e+00
-  br i1 %71, label %74, label %81
-
-; <label>:74                                      ; preds = %11
-  %75 = fmul fast float %72, 5.000000e-01
-  %76 = fmul fast float %73, 5.000000e-01
-  %77 = fadd fast float %14, -1.000000e+00
-  %78 = fadd fast float %16, -1.000000e+00
-  %79 = fmul fast float %75, %77
-  %80 = fmul fast float %76, %78
-  br label %88
-
-; <label>:81                                      ; preds = %11
-  %82 = fmul fast float %14, %72
-  %83 = fmul fast float %73, %16
-  %84 = fadd fast float %82, -1.000000e+00
-  %85 = fadd fast float %83, -1.000000e+00
-  %86 = fmul fast float %84, 5.000000e-01
-  %87 = fmul fast float %85, 5.000000e-01
-  br label %88
-
-; <label>:88                                      ; preds = %81, %74
-  %89 = phi float [ %79, %74 ], [ %86, %81 ]
-  %90 = phi float [ %80, %74 ], [ %87, %81 ]
-  %91 = extractvalue %dx.types.CBufRet.i32 %6, 2
-  %92 = icmp eq i32 %91, 1
-  br i1 %92, label %93, label %96
-
-; <label>:93                                      ; preds = %88
-  %94 = call float @dx.op.unary.f32(i32 26, float %89)  ; Round_ne(value)
-  %95 = call float @dx.op.unary.f32(i32 26, float %90)  ; Round_ne(value)
-  br label %96
-
-; <label>:96                                      ; preds = %93, %88
-  %97 = phi float [ %94, %93 ], [ %89, %88 ]
-  %98 = phi float [ %95, %93 ], [ %90, %88 ]
-  %99 = fcmp fast olt float %97, %20
-  %100 = fcmp fast ogt float %97, %22
-  %101 = or i1 %99, %100
-  %102 = fcmp fast olt float %98, %20
-  %103 = or i1 %101, %102
-  %104 = fcmp fast ogt float %98, %24
-  %105 = or i1 %104, %103
-  br i1 %105, label %106, label %179
-
-; <label>:106                                     ; preds = %96
-  %107 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %108 = icmp eq i32 %107, 1
-  br i1 %108, label %109, label %118
-
-; <label>:109                                     ; preds = %106
-  %110 = add i32 %13, -1
-  %111 = uitofp i32 %110 to float
-  %112 = call float @dx.op.binary.f32(i32 35, float %97, float 0.000000e+00)  ; FMax(a,b)
-  %113 = call float @dx.op.binary.f32(i32 36, float %112, float %111)  ; FMin(a,b)
-  %114 = add i32 %15, -1
-  %115 = uitofp i32 %114 to float
-  %116 = call float @dx.op.binary.f32(i32 35, float %98, float 0.000000e+00)  ; FMax(a,b)
-  %117 = call float @dx.op.binary.f32(i32 36, float %116, float %115)  ; FMin(a,b)
-  br label %179
-
-; <label>:118                                     ; preds = %106
-  %119 = icmp eq i32 %107, 2
-  br i1 %119, label %120, label %179
-
-; <label>:120                                     ; preds = %118
-  %121 = fsub fast float %22, %20
-  br i1 %99, label %122, label %135
-
-; <label>:122                                     ; preds = %120
-  %123 = fsub fast float %20, %97
-  %124 = fdiv fast float %123, %121
-  %125 = fptoui float %124 to i32
-  %126 = uitofp i32 %125 to float
-  %127 = fmul fast float %126, %121
-  %128 = fsub fast float %123, %127
-  %129 = and i32 %125, 1
-  %130 = icmp eq i32 %129, 0
-  br i1 %130, label %131, label %133
-
-; <label>:131                                     ; preds = %122
-  %132 = fadd fast float %128, %20
-  br label %149
-
-; <label>:133                                     ; preds = %122
-  %134 = fsub fast float %22, %128
-  br label %149
-
-; <label>:135                                     ; preds = %120
-  br i1 %100, label %136, label %149
-
-; <label>:136                                     ; preds = %135
-  %137 = fsub fast float %97, %22
-  %138 = fdiv fast float %137, %121
-  %139 = fptoui float %138 to i32
-  %140 = uitofp i32 %139 to float
-  %141 = fmul fast float %140, %121
-  %142 = fsub fast float %137, %141
-  %143 = and i32 %139, 1
-  %144 = icmp eq i32 %143, 0
-  br i1 %144, label %145, label %147
-
-; <label>:145                                     ; preds = %136
-  %146 = fsub fast float %22, %142
-  br label %149
-
-; <label>:147                                     ; preds = %136
-  %148 = fadd fast float %142, %20
-  br label %149
-
-; <label>:149                                     ; preds = %147, %145, %135, %133, %131
-  %150 = phi float [ %132, %131 ], [ %134, %133 ], [ %146, %145 ], [ %148, %147 ], [ %97, %135 ]
-  %151 = fsub fast float %24, %20
-  br i1 %102, label %152, label %165
-
-; <label>:152                                     ; preds = %149
-  %153 = fsub fast float %20, %98
-  %154 = fdiv fast float %153, %151
-  %155 = fptoui float %154 to i32
-  %156 = uitofp i32 %155 to float
-  %157 = fmul fast float %156, %151
-  %158 = fsub fast float %153, %157
-  %159 = and i32 %155, 1
-  %160 = icmp eq i32 %159, 0
-  br i1 %160, label %161, label %163
-
-; <label>:161                                     ; preds = %152
-  %162 = fadd fast float %158, %20
-  br label %179
-
-; <label>:163                                     ; preds = %152
-  %164 = fsub fast float %24, %158
-  br label %179
-
-; <label>:165                                     ; preds = %149
-  br i1 %104, label %166, label %179
-
-; <label>:166                                     ; preds = %165
-  %167 = fsub fast float %98, %24
-  %168 = fdiv fast float %167, %151
-  %169 = fptoui float %168 to i32
-  %170 = uitofp i32 %169 to float
-  %171 = fmul fast float %170, %151
-  %172 = fsub fast float %167, %171
-  %173 = and i32 %169, 1
-  %174 = icmp eq i32 %173, 0
-  br i1 %174, label %175, label %177
-
-; <label>:175                                     ; preds = %166
-  %176 = fsub fast float %24, %172
-  br label %179
-
-; <label>:177                                     ; preds = %166
-  %178 = fadd fast float %172, %20
-  br label %179
-
-; <label>:179                                     ; preds = %177, %175, %165, %163, %161, %118, %109, %96
-  %180 = phi float [ %113, %109 ], [ %97, %118 ], [ %97, %96 ], [ %150, %177 ], [ %150, %175 ], [ %150, %165 ], [ %150, %163 ], [ %150, %161 ]
-  %181 = phi float [ %117, %109 ], [ %98, %118 ], [ %98, %96 ], [ %178, %177 ], [ %176, %175 ], [ %98, %165 ], [ %164, %163 ], [ %162, %161 ]
-  %182 = uitofp i32 %37 to float
-  br i1 %92, label %183, label %332
-
-; <label>:183                                     ; preds = %179
-  %184 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %185 = icmp eq i32 %184, 0
-  br i1 %185, label %186, label %211
-
-; <label>:186                                     ; preds = %183
-  %187 = fcmp fast oge float %180, 0.000000e+00
-  %188 = fptoui float %180 to i32
-  %189 = icmp ult i32 %188, %13
-  %190 = and i1 %187, %189
-  %191 = fcmp fast oge float %181, 0.000000e+00
-  %192 = and i1 %191, %190
-  %193 = fptoui float %181 to i32
-  %194 = icmp ult i32 %193, %15
-  %195 = and i1 %194, %192
-  br i1 %195, label %196, label %329
-
-; <label>:196                                     ; preds = %186
-  %197 = fptoui float %45 to i32
-  %198 = fptoui float %182 to i32
-  %199 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %200 = extractvalue %dx.types.CBufRet.i32 %199, 0
-  %201 = extractvalue %dx.types.CBufRet.i32 %199, 1
-  %202 = extractvalue %dx.types.CBufRet.i32 %199, 2
-  %203 = extractvalue %dx.types.CBufRet.i32 %199, 3
-  %204 = mul i32 %200, %197
-  %205 = call i32 @dx.op.tertiary.i32(i32 48, i32 %198, i32 %201, i32 %204)  ; IMad(a,b,c)
-  %206 = call i32 @dx.op.tertiary.i32(i32 48, i32 %193, i32 %202, i32 %205)  ; IMad(a,b,c)
-  %207 = call i32 @dx.op.tertiary.i32(i32 48, i32 %188, i32 %203, i32 %206)  ; IMad(a,b,c)
-  %208 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %207, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %209 = extractvalue %dx.types.ResRet.f16 %208, 0
-  %210 = fpext half %209 to float
-  br label %329
-
-; <label>:211                                     ; preds = %183
-  %212 = icmp eq i32 %184, 1
-  br i1 %212, label %213, label %242
-
-; <label>:213                                     ; preds = %211
-  %214 = add i32 %13, -1
-  %215 = uitofp i32 %214 to float
-  %216 = call float @dx.op.binary.f32(i32 35, float %180, float 0.000000e+00)  ; FMax(a,b)
-  %217 = call float @dx.op.binary.f32(i32 36, float %216, float %215)  ; FMin(a,b)
-  %218 = fptoui float %217 to i32
-  %219 = add i32 %15, -1
-  %220 = uitofp i32 %219 to float
-  %221 = call float @dx.op.binary.f32(i32 35, float %181, float 0.000000e+00)  ; FMax(a,b)
-  %222 = call float @dx.op.binary.f32(i32 36, float %221, float %220)  ; FMin(a,b)
-  %223 = fptoui float %222 to i32
-  %224 = uitofp i32 %223 to float
-  %225 = uitofp i32 %218 to float
-  %226 = fptoui float %45 to i32
-  %227 = fptoui float %182 to i32
-  %228 = fptoui float %224 to i32
-  %229 = fptoui float %225 to i32
-  %230 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %231 = extractvalue %dx.types.CBufRet.i32 %230, 0
-  %232 = extractvalue %dx.types.CBufRet.i32 %230, 1
-  %233 = extractvalue %dx.types.CBufRet.i32 %230, 2
-  %234 = extractvalue %dx.types.CBufRet.i32 %230, 3
-  %235 = mul i32 %231, %226
-  %236 = call i32 @dx.op.tertiary.i32(i32 48, i32 %227, i32 %232, i32 %235)  ; IMad(a,b,c)
-  %237 = call i32 @dx.op.tertiary.i32(i32 48, i32 %228, i32 %233, i32 %236)  ; IMad(a,b,c)
-  %238 = call i32 @dx.op.tertiary.i32(i32 48, i32 %229, i32 %234, i32 %237)  ; IMad(a,b,c)
-  %239 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %238, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %240 = extractvalue %dx.types.ResRet.f16 %239, 0
-  %241 = fpext half %240 to float
-  br label %329
-
-; <label>:242                                     ; preds = %211
-  %243 = icmp eq i32 %184, 2
-  br i1 %243, label %244, label %329
-
-; <label>:244                                     ; preds = %242
-  %245 = fsub fast float %22, %20
-  %246 = fcmp fast olt float %180, %20
-  br i1 %246, label %247, label %260
-
-; <label>:247                                     ; preds = %244
-  %248 = fsub fast float %20, %180
-  %249 = fdiv fast float %248, %245
-  %250 = fptoui float %249 to i32
-  %251 = uitofp i32 %250 to float
-  %252 = fmul fast float %251, %245
-  %253 = fsub fast float %248, %252
-  %254 = and i32 %250, 1
-  %255 = icmp eq i32 %254, 0
-  br i1 %255, label %256, label %258
-
-; <label>:256                                     ; preds = %247
-  %257 = fadd fast float %253, %20
-  br label %275
-
-; <label>:258                                     ; preds = %247
-  %259 = fsub fast float %22, %253
-  br label %275
-
-; <label>:260                                     ; preds = %244
-  %261 = fcmp fast ogt float %180, %22
-  br i1 %261, label %262, label %275
-
-; <label>:262                                     ; preds = %260
-  %263 = fsub fast float %180, %22
-  %264 = fdiv fast float %263, %245
-  %265 = fptoui float %264 to i32
-  %266 = uitofp i32 %265 to float
-  %267 = fmul fast float %266, %245
-  %268 = fsub fast float %263, %267
-  %269 = and i32 %265, 1
-  %270 = icmp eq i32 %269, 0
-  br i1 %270, label %271, label %273
-
-; <label>:271                                     ; preds = %262
-  %272 = fsub fast float %22, %268
-  br label %275
-
-; <label>:273                                     ; preds = %262
-  %274 = fadd fast float %268, %20
-  br label %275
-
-; <label>:275                                     ; preds = %273, %271, %260, %258, %256
-  %276 = phi float [ %257, %256 ], [ %259, %258 ], [ %272, %271 ], [ %274, %273 ], [ %180, %260 ]
-  %277 = fptoui float %276 to i32
-  %278 = fsub fast float %24, %20
-  %279 = fcmp fast olt float %181, %20
-  br i1 %279, label %280, label %293
-
-; <label>:280                                     ; preds = %275
-  %281 = fsub fast float %20, %181
-  %282 = fdiv fast float %281, %278
-  %283 = fptoui float %282 to i32
-  %284 = uitofp i32 %283 to float
-  %285 = fmul fast float %284, %278
-  %286 = fsub fast float %281, %285
-  %287 = and i32 %283, 1
-  %288 = icmp eq i32 %287, 0
-  br i1 %288, label %289, label %291
-
-; <label>:289                                     ; preds = %280
-  %290 = fadd fast float %286, %20
-  br label %308
-
-; <label>:291                                     ; preds = %280
-  %292 = fsub fast float %24, %286
-  br label %308
-
-; <label>:293                                     ; preds = %275
-  %294 = fcmp fast ogt float %181, %24
-  br i1 %294, label %295, label %308
-
-; <label>:295                                     ; preds = %293
-  %296 = fsub fast float %181, %24
-  %297 = fdiv fast float %296, %278
-  %298 = fptoui float %297 to i32
-  %299 = uitofp i32 %298 to float
-  %300 = fmul fast float %299, %278
-  %301 = fsub fast float %296, %300
-  %302 = and i32 %298, 1
-  %303 = icmp eq i32 %302, 0
-  br i1 %303, label %304, label %306
-
-; <label>:304                                     ; preds = %295
-  %305 = fsub fast float %24, %301
-  br label %308
-
-; <label>:306                                     ; preds = %295
-  %307 = fadd fast float %301, %20
-  br label %308
-
-; <label>:308                                     ; preds = %306, %304, %293, %291, %289
-  %309 = phi float [ %290, %289 ], [ %292, %291 ], [ %305, %304 ], [ %307, %306 ], [ %181, %293 ]
-  %310 = fptoui float %309 to i32
-  %311 = uitofp i32 %310 to float
-  %312 = uitofp i32 %277 to float
-  %313 = fptoui float %45 to i32
-  %314 = fptoui float %182 to i32
-  %315 = fptoui float %311 to i32
-  %316 = fptoui float %312 to i32
-  %317 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %318 = extractvalue %dx.types.CBufRet.i32 %317, 0
-  %319 = extractvalue %dx.types.CBufRet.i32 %317, 1
-  %320 = extractvalue %dx.types.CBufRet.i32 %317, 2
-  %321 = extractvalue %dx.types.CBufRet.i32 %317, 3
-  %322 = mul i32 %318, %313
-  %323 = call i32 @dx.op.tertiary.i32(i32 48, i32 %314, i32 %319, i32 %322)  ; IMad(a,b,c)
-  %324 = call i32 @dx.op.tertiary.i32(i32 48, i32 %315, i32 %320, i32 %323)  ; IMad(a,b,c)
-  %325 = call i32 @dx.op.tertiary.i32(i32 48, i32 %316, i32 %321, i32 %324)  ; IMad(a,b,c)
-  %326 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %325, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %327 = extractvalue %dx.types.ResRet.f16 %326, 0
-  %328 = fpext half %327 to float
-  br label %329
-
-; <label>:329                                     ; preds = %308, %242, %213, %196, %186
-  %330 = phi float [ %210, %196 ], [ 0.000000e+00, %186 ], [ %241, %213 ], [ %328, %308 ], [ 0.000000e+00, %242 ]
-  %331 = fptrunc float %330 to half
-  call void @dx.op.rawBufferStore.f16(i32 140, %dx.types.Handle %1, i32 %8, i32 0, half %331, half undef, half undef, half undef, i8 1, i32 2)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3323
-
-; <label>:332                                     ; preds = %179
-  %333 = icmp eq i32 %91, 0
-  br i1 %333, label %334, label %933
-
-; <label>:334                                     ; preds = %332
-  %335 = call float @dx.op.unary.f32(i32 27, float %180)  ; Round_ni(value)
-  %336 = call float @dx.op.unary.f32(i32 27, float %181)  ; Round_ni(value)
-  %337 = fadd fast float %335, 1.000000e+00
-  %338 = fadd fast float %336, 1.000000e+00
-  %339 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %340 = icmp eq i32 %339, 0
-  br i1 %340, label %341, label %366
-
-; <label>:341                                     ; preds = %334
-  %342 = fcmp fast oge float %335, 0.000000e+00
-  %343 = fptoui float %335 to i32
-  %344 = icmp ult i32 %343, %13
-  %345 = and i1 %342, %344
-  %346 = fcmp fast oge float %336, 0.000000e+00
-  %347 = and i1 %346, %345
-  %348 = fptoui float %336 to i32
-  %349 = icmp ult i32 %348, %15
-  %350 = and i1 %349, %347
-  br i1 %350, label %351, label %484
-
-; <label>:351                                     ; preds = %341
-  %352 = fptoui float %45 to i32
-  %353 = fptoui float %182 to i32
-  %354 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %355 = extractvalue %dx.types.CBufRet.i32 %354, 0
-  %356 = extractvalue %dx.types.CBufRet.i32 %354, 1
-  %357 = extractvalue %dx.types.CBufRet.i32 %354, 2
-  %358 = extractvalue %dx.types.CBufRet.i32 %354, 3
-  %359 = mul i32 %355, %352
-  %360 = call i32 @dx.op.tertiary.i32(i32 48, i32 %353, i32 %356, i32 %359)  ; IMad(a,b,c)
-  %361 = call i32 @dx.op.tertiary.i32(i32 48, i32 %348, i32 %357, i32 %360)  ; IMad(a,b,c)
-  %362 = call i32 @dx.op.tertiary.i32(i32 48, i32 %343, i32 %358, i32 %361)  ; IMad(a,b,c)
-  %363 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %362, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %364 = extractvalue %dx.types.ResRet.f16 %363, 0
-  %365 = fpext half %364 to float
-  br label %484
-
-; <label>:366                                     ; preds = %334
-  %367 = icmp eq i32 %339, 1
-  br i1 %367, label %368, label %397
-
-; <label>:368                                     ; preds = %366
-  %369 = add i32 %13, -1
-  %370 = uitofp i32 %369 to float
-  %371 = call float @dx.op.binary.f32(i32 35, float %335, float 0.000000e+00)  ; FMax(a,b)
-  %372 = call float @dx.op.binary.f32(i32 36, float %371, float %370)  ; FMin(a,b)
-  %373 = fptoui float %372 to i32
-  %374 = add i32 %15, -1
-  %375 = uitofp i32 %374 to float
-  %376 = call float @dx.op.binary.f32(i32 35, float %336, float 0.000000e+00)  ; FMax(a,b)
-  %377 = call float @dx.op.binary.f32(i32 36, float %376, float %375)  ; FMin(a,b)
-  %378 = fptoui float %377 to i32
-  %379 = uitofp i32 %378 to float
-  %380 = uitofp i32 %373 to float
-  %381 = fptoui float %45 to i32
-  %382 = fptoui float %182 to i32
-  %383 = fptoui float %379 to i32
-  %384 = fptoui float %380 to i32
-  %385 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %386 = extractvalue %dx.types.CBufRet.i32 %385, 0
-  %387 = extractvalue %dx.types.CBufRet.i32 %385, 1
-  %388 = extractvalue %dx.types.CBufRet.i32 %385, 2
-  %389 = extractvalue %dx.types.CBufRet.i32 %385, 3
-  %390 = mul i32 %386, %381
-  %391 = call i32 @dx.op.tertiary.i32(i32 48, i32 %382, i32 %387, i32 %390)  ; IMad(a,b,c)
-  %392 = call i32 @dx.op.tertiary.i32(i32 48, i32 %383, i32 %388, i32 %391)  ; IMad(a,b,c)
-  %393 = call i32 @dx.op.tertiary.i32(i32 48, i32 %384, i32 %389, i32 %392)  ; IMad(a,b,c)
-  %394 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %393, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %395 = extractvalue %dx.types.ResRet.f16 %394, 0
-  %396 = fpext half %395 to float
-  br label %484
-
-; <label>:397                                     ; preds = %366
-  %398 = icmp eq i32 %339, 2
-  br i1 %398, label %399, label %484
-
-; <label>:399                                     ; preds = %397
-  %400 = fsub fast float %22, %20
-  %401 = fcmp fast olt float %335, %20
-  br i1 %401, label %402, label %415
-
-; <label>:402                                     ; preds = %399
-  %403 = fsub fast float %20, %335
-  %404 = fdiv fast float %403, %400
-  %405 = fptoui float %404 to i32
-  %406 = uitofp i32 %405 to float
-  %407 = fmul fast float %406, %400
-  %408 = fsub fast float %403, %407
-  %409 = and i32 %405, 1
-  %410 = icmp eq i32 %409, 0
-  br i1 %410, label %411, label %413
-
-; <label>:411                                     ; preds = %402
-  %412 = fadd fast float %408, %20
-  br label %430
-
-; <label>:413                                     ; preds = %402
-  %414 = fsub fast float %22, %408
-  br label %430
-
-; <label>:415                                     ; preds = %399
-  %416 = fcmp fast ogt float %335, %22
-  br i1 %416, label %417, label %430
-
-; <label>:417                                     ; preds = %415
-  %418 = fsub fast float %335, %22
-  %419 = fdiv fast float %418, %400
-  %420 = fptoui float %419 to i32
-  %421 = uitofp i32 %420 to float
-  %422 = fmul fast float %421, %400
-  %423 = fsub fast float %418, %422
-  %424 = and i32 %420, 1
-  %425 = icmp eq i32 %424, 0
-  br i1 %425, label %426, label %428
-
-; <label>:426                                     ; preds = %417
-  %427 = fsub fast float %22, %423
-  br label %430
-
-; <label>:428                                     ; preds = %417
-  %429 = fadd fast float %423, %20
-  br label %430
-
-; <label>:430                                     ; preds = %428, %426, %415, %413, %411
-  %431 = phi float [ %412, %411 ], [ %414, %413 ], [ %427, %426 ], [ %429, %428 ], [ %335, %415 ]
-  %432 = fptoui float %431 to i32
-  %433 = fsub fast float %24, %20
-  %434 = fcmp fast olt float %336, %20
-  br i1 %434, label %435, label %448
-
-; <label>:435                                     ; preds = %430
-  %436 = fsub fast float %20, %336
-  %437 = fdiv fast float %436, %433
-  %438 = fptoui float %437 to i32
-  %439 = uitofp i32 %438 to float
-  %440 = fmul fast float %439, %433
-  %441 = fsub fast float %436, %440
-  %442 = and i32 %438, 1
-  %443 = icmp eq i32 %442, 0
-  br i1 %443, label %444, label %446
-
-; <label>:444                                     ; preds = %435
-  %445 = fadd fast float %441, %20
-  br label %463
-
-; <label>:446                                     ; preds = %435
-  %447 = fsub fast float %24, %441
-  br label %463
-
-; <label>:448                                     ; preds = %430
-  %449 = fcmp fast ogt float %336, %24
-  br i1 %449, label %450, label %463
-
-; <label>:450                                     ; preds = %448
-  %451 = fsub fast float %336, %24
-  %452 = fdiv fast float %451, %433
-  %453 = fptoui float %452 to i32
-  %454 = uitofp i32 %453 to float
-  %455 = fmul fast float %454, %433
-  %456 = fsub fast float %451, %455
-  %457 = and i32 %453, 1
-  %458 = icmp eq i32 %457, 0
-  br i1 %458, label %459, label %461
-
-; <label>:459                                     ; preds = %450
-  %460 = fsub fast float %24, %456
-  br label %463
-
-; <label>:461                                     ; preds = %450
-  %462 = fadd fast float %456, %20
-  br label %463
-
-; <label>:463                                     ; preds = %461, %459, %448, %446, %444
-  %464 = phi float [ %445, %444 ], [ %447, %446 ], [ %460, %459 ], [ %462, %461 ], [ %336, %448 ]
-  %465 = fptoui float %464 to i32
-  %466 = uitofp i32 %465 to float
-  %467 = uitofp i32 %432 to float
-  %468 = fptoui float %45 to i32
-  %469 = fptoui float %182 to i32
-  %470 = fptoui float %466 to i32
-  %471 = fptoui float %467 to i32
-  %472 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %473 = extractvalue %dx.types.CBufRet.i32 %472, 0
-  %474 = extractvalue %dx.types.CBufRet.i32 %472, 1
-  %475 = extractvalue %dx.types.CBufRet.i32 %472, 2
-  %476 = extractvalue %dx.types.CBufRet.i32 %472, 3
-  %477 = mul i32 %473, %468
-  %478 = call i32 @dx.op.tertiary.i32(i32 48, i32 %469, i32 %474, i32 %477)  ; IMad(a,b,c)
-  %479 = call i32 @dx.op.tertiary.i32(i32 48, i32 %470, i32 %475, i32 %478)  ; IMad(a,b,c)
-  %480 = call i32 @dx.op.tertiary.i32(i32 48, i32 %471, i32 %476, i32 %479)  ; IMad(a,b,c)
-  %481 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %480, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %482 = extractvalue %dx.types.ResRet.f16 %481, 0
-  %483 = fpext half %482 to float
-  br label %484
-
-; <label>:484                                     ; preds = %463, %397, %368, %351, %341
-  %485 = phi float [ %365, %351 ], [ 0.000000e+00, %341 ], [ %396, %368 ], [ %483, %463 ], [ 0.000000e+00, %397 ]
-  br i1 %340, label %486, label %511
-
-; <label>:486                                     ; preds = %484
-  %487 = fcmp fast oge float %337, 0.000000e+00
-  %488 = fptoui float %337 to i32
-  %489 = icmp ult i32 %488, %13
-  %490 = and i1 %487, %489
-  %491 = fcmp fast oge float %336, 0.000000e+00
-  %492 = and i1 %491, %490
-  %493 = fptoui float %336 to i32
-  %494 = icmp ult i32 %493, %15
-  %495 = and i1 %494, %492
-  br i1 %495, label %496, label %629
-
-; <label>:496                                     ; preds = %486
-  %497 = fptoui float %45 to i32
-  %498 = fptoui float %182 to i32
-  %499 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %500 = extractvalue %dx.types.CBufRet.i32 %499, 0
-  %501 = extractvalue %dx.types.CBufRet.i32 %499, 1
-  %502 = extractvalue %dx.types.CBufRet.i32 %499, 2
-  %503 = extractvalue %dx.types.CBufRet.i32 %499, 3
-  %504 = mul i32 %500, %497
-  %505 = call i32 @dx.op.tertiary.i32(i32 48, i32 %498, i32 %501, i32 %504)  ; IMad(a,b,c)
-  %506 = call i32 @dx.op.tertiary.i32(i32 48, i32 %493, i32 %502, i32 %505)  ; IMad(a,b,c)
-  %507 = call i32 @dx.op.tertiary.i32(i32 48, i32 %488, i32 %503, i32 %506)  ; IMad(a,b,c)
-  %508 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %507, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %509 = extractvalue %dx.types.ResRet.f16 %508, 0
-  %510 = fpext half %509 to float
-  br label %629
-
-; <label>:511                                     ; preds = %484
-  %512 = icmp eq i32 %339, 1
-  br i1 %512, label %513, label %542
-
-; <label>:513                                     ; preds = %511
-  %514 = add i32 %13, -1
-  %515 = uitofp i32 %514 to float
-  %516 = call float @dx.op.binary.f32(i32 35, float %337, float 0.000000e+00)  ; FMax(a,b)
-  %517 = call float @dx.op.binary.f32(i32 36, float %516, float %515)  ; FMin(a,b)
-  %518 = fptoui float %517 to i32
-  %519 = add i32 %15, -1
-  %520 = uitofp i32 %519 to float
-  %521 = call float @dx.op.binary.f32(i32 35, float %336, float 0.000000e+00)  ; FMax(a,b)
-  %522 = call float @dx.op.binary.f32(i32 36, float %521, float %520)  ; FMin(a,b)
-  %523 = fptoui float %522 to i32
-  %524 = uitofp i32 %523 to float
-  %525 = uitofp i32 %518 to float
-  %526 = fptoui float %45 to i32
-  %527 = fptoui float %182 to i32
-  %528 = fptoui float %524 to i32
-  %529 = fptoui float %525 to i32
-  %530 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %531 = extractvalue %dx.types.CBufRet.i32 %530, 0
-  %532 = extractvalue %dx.types.CBufRet.i32 %530, 1
-  %533 = extractvalue %dx.types.CBufRet.i32 %530, 2
-  %534 = extractvalue %dx.types.CBufRet.i32 %530, 3
-  %535 = mul i32 %531, %526
-  %536 = call i32 @dx.op.tertiary.i32(i32 48, i32 %527, i32 %532, i32 %535)  ; IMad(a,b,c)
-  %537 = call i32 @dx.op.tertiary.i32(i32 48, i32 %528, i32 %533, i32 %536)  ; IMad(a,b,c)
-  %538 = call i32 @dx.op.tertiary.i32(i32 48, i32 %529, i32 %534, i32 %537)  ; IMad(a,b,c)
-  %539 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %538, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %540 = extractvalue %dx.types.ResRet.f16 %539, 0
-  %541 = fpext half %540 to float
-  br label %629
-
-; <label>:542                                     ; preds = %511
-  %543 = icmp eq i32 %339, 2
-  br i1 %543, label %544, label %629
-
-; <label>:544                                     ; preds = %542
-  %545 = fsub fast float %22, %20
-  %546 = fcmp fast olt float %337, %20
-  br i1 %546, label %547, label %560
-
-; <label>:547                                     ; preds = %544
-  %548 = fsub fast float %20, %337
-  %549 = fdiv fast float %548, %545
-  %550 = fptoui float %549 to i32
-  %551 = uitofp i32 %550 to float
-  %552 = fmul fast float %551, %545
-  %553 = fsub fast float %548, %552
-  %554 = and i32 %550, 1
-  %555 = icmp eq i32 %554, 0
-  br i1 %555, label %556, label %558
-
-; <label>:556                                     ; preds = %547
-  %557 = fadd fast float %553, %20
-  br label %575
-
-; <label>:558                                     ; preds = %547
-  %559 = fsub fast float %22, %553
-  br label %575
-
-; <label>:560                                     ; preds = %544
-  %561 = fcmp fast ogt float %337, %22
-  br i1 %561, label %562, label %575
-
-; <label>:562                                     ; preds = %560
-  %563 = fsub fast float %337, %22
-  %564 = fdiv fast float %563, %545
-  %565 = fptoui float %564 to i32
-  %566 = uitofp i32 %565 to float
-  %567 = fmul fast float %566, %545
-  %568 = fsub fast float %563, %567
-  %569 = and i32 %565, 1
-  %570 = icmp eq i32 %569, 0
-  br i1 %570, label %571, label %573
-
-; <label>:571                                     ; preds = %562
-  %572 = fsub fast float %22, %568
-  br label %575
-
-; <label>:573                                     ; preds = %562
-  %574 = fadd fast float %568, %20
-  br label %575
-
-; <label>:575                                     ; preds = %573, %571, %560, %558, %556
-  %576 = phi float [ %557, %556 ], [ %559, %558 ], [ %572, %571 ], [ %574, %573 ], [ %337, %560 ]
-  %577 = fptoui float %576 to i32
-  %578 = fsub fast float %24, %20
-  %579 = fcmp fast olt float %336, %20
-  br i1 %579, label %580, label %593
-
-; <label>:580                                     ; preds = %575
-  %581 = fsub fast float %20, %336
-  %582 = fdiv fast float %581, %578
-  %583 = fptoui float %582 to i32
-  %584 = uitofp i32 %583 to float
-  %585 = fmul fast float %584, %578
-  %586 = fsub fast float %581, %585
-  %587 = and i32 %583, 1
-  %588 = icmp eq i32 %587, 0
-  br i1 %588, label %589, label %591
-
-; <label>:589                                     ; preds = %580
-  %590 = fadd fast float %586, %20
-  br label %608
-
-; <label>:591                                     ; preds = %580
-  %592 = fsub fast float %24, %586
-  br label %608
-
-; <label>:593                                     ; preds = %575
-  %594 = fcmp fast ogt float %336, %24
-  br i1 %594, label %595, label %608
-
-; <label>:595                                     ; preds = %593
-  %596 = fsub fast float %336, %24
-  %597 = fdiv fast float %596, %578
-  %598 = fptoui float %597 to i32
-  %599 = uitofp i32 %598 to float
-  %600 = fmul fast float %599, %578
-  %601 = fsub fast float %596, %600
-  %602 = and i32 %598, 1
-  %603 = icmp eq i32 %602, 0
-  br i1 %603, label %604, label %606
-
-; <label>:604                                     ; preds = %595
-  %605 = fsub fast float %24, %601
-  br label %608
-
-; <label>:606                                     ; preds = %595
-  %607 = fadd fast float %601, %20
-  br label %608
-
-; <label>:608                                     ; preds = %606, %604, %593, %591, %589
-  %609 = phi float [ %590, %589 ], [ %592, %591 ], [ %605, %604 ], [ %607, %606 ], [ %336, %593 ]
-  %610 = fptoui float %609 to i32
-  %611 = uitofp i32 %610 to float
-  %612 = uitofp i32 %577 to float
-  %613 = fptoui float %45 to i32
-  %614 = fptoui float %182 to i32
-  %615 = fptoui float %611 to i32
-  %616 = fptoui float %612 to i32
-  %617 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %618 = extractvalue %dx.types.CBufRet.i32 %617, 0
-  %619 = extractvalue %dx.types.CBufRet.i32 %617, 1
-  %620 = extractvalue %dx.types.CBufRet.i32 %617, 2
-  %621 = extractvalue %dx.types.CBufRet.i32 %617, 3
-  %622 = mul i32 %618, %613
-  %623 = call i32 @dx.op.tertiary.i32(i32 48, i32 %614, i32 %619, i32 %622)  ; IMad(a,b,c)
-  %624 = call i32 @dx.op.tertiary.i32(i32 48, i32 %615, i32 %620, i32 %623)  ; IMad(a,b,c)
-  %625 = call i32 @dx.op.tertiary.i32(i32 48, i32 %616, i32 %621, i32 %624)  ; IMad(a,b,c)
-  %626 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %625, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %627 = extractvalue %dx.types.ResRet.f16 %626, 0
-  %628 = fpext half %627 to float
-  br label %629
-
-; <label>:629                                     ; preds = %608, %542, %513, %496, %486
-  %630 = phi float [ %510, %496 ], [ 0.000000e+00, %486 ], [ %541, %513 ], [ %628, %608 ], [ 0.000000e+00, %542 ]
-  br i1 %340, label %631, label %656
-
-; <label>:631                                     ; preds = %629
-  %632 = fcmp fast oge float %335, 0.000000e+00
-  %633 = fptoui float %335 to i32
-  %634 = icmp ult i32 %633, %13
-  %635 = and i1 %632, %634
-  %636 = fcmp fast oge float %338, 0.000000e+00
-  %637 = and i1 %636, %635
-  %638 = fptoui float %338 to i32
-  %639 = icmp ult i32 %638, %15
-  %640 = and i1 %639, %637
-  br i1 %640, label %641, label %774
-
-; <label>:641                                     ; preds = %631
-  %642 = fptoui float %45 to i32
-  %643 = fptoui float %182 to i32
-  %644 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %645 = extractvalue %dx.types.CBufRet.i32 %644, 0
-  %646 = extractvalue %dx.types.CBufRet.i32 %644, 1
-  %647 = extractvalue %dx.types.CBufRet.i32 %644, 2
-  %648 = extractvalue %dx.types.CBufRet.i32 %644, 3
-  %649 = mul i32 %645, %642
-  %650 = call i32 @dx.op.tertiary.i32(i32 48, i32 %643, i32 %646, i32 %649)  ; IMad(a,b,c)
-  %651 = call i32 @dx.op.tertiary.i32(i32 48, i32 %638, i32 %647, i32 %650)  ; IMad(a,b,c)
-  %652 = call i32 @dx.op.tertiary.i32(i32 48, i32 %633, i32 %648, i32 %651)  ; IMad(a,b,c)
-  %653 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %652, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %654 = extractvalue %dx.types.ResRet.f16 %653, 0
-  %655 = fpext half %654 to float
-  br label %774
-
-; <label>:656                                     ; preds = %629
-  %657 = icmp eq i32 %339, 1
-  br i1 %657, label %658, label %687
-
-; <label>:658                                     ; preds = %656
-  %659 = add i32 %13, -1
-  %660 = uitofp i32 %659 to float
-  %661 = call float @dx.op.binary.f32(i32 35, float %335, float 0.000000e+00)  ; FMax(a,b)
-  %662 = call float @dx.op.binary.f32(i32 36, float %661, float %660)  ; FMin(a,b)
-  %663 = fptoui float %662 to i32
-  %664 = add i32 %15, -1
-  %665 = uitofp i32 %664 to float
-  %666 = call float @dx.op.binary.f32(i32 35, float %338, float 0.000000e+00)  ; FMax(a,b)
-  %667 = call float @dx.op.binary.f32(i32 36, float %666, float %665)  ; FMin(a,b)
-  %668 = fptoui float %667 to i32
-  %669 = uitofp i32 %668 to float
-  %670 = uitofp i32 %663 to float
-  %671 = fptoui float %45 to i32
-  %672 = fptoui float %182 to i32
-  %673 = fptoui float %669 to i32
-  %674 = fptoui float %670 to i32
-  %675 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %676 = extractvalue %dx.types.CBufRet.i32 %675, 0
-  %677 = extractvalue %dx.types.CBufRet.i32 %675, 1
-  %678 = extractvalue %dx.types.CBufRet.i32 %675, 2
-  %679 = extractvalue %dx.types.CBufRet.i32 %675, 3
-  %680 = mul i32 %676, %671
-  %681 = call i32 @dx.op.tertiary.i32(i32 48, i32 %672, i32 %677, i32 %680)  ; IMad(a,b,c)
-  %682 = call i32 @dx.op.tertiary.i32(i32 48, i32 %673, i32 %678, i32 %681)  ; IMad(a,b,c)
-  %683 = call i32 @dx.op.tertiary.i32(i32 48, i32 %674, i32 %679, i32 %682)  ; IMad(a,b,c)
-  %684 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %683, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %685 = extractvalue %dx.types.ResRet.f16 %684, 0
-  %686 = fpext half %685 to float
-  br label %774
-
-; <label>:687                                     ; preds = %656
-  %688 = icmp eq i32 %339, 2
-  br i1 %688, label %689, label %774
-
-; <label>:689                                     ; preds = %687
-  %690 = fsub fast float %22, %20
-  %691 = fcmp fast olt float %335, %20
-  br i1 %691, label %692, label %705
-
-; <label>:692                                     ; preds = %689
-  %693 = fsub fast float %20, %335
-  %694 = fdiv fast float %693, %690
-  %695 = fptoui float %694 to i32
-  %696 = uitofp i32 %695 to float
-  %697 = fmul fast float %696, %690
-  %698 = fsub fast float %693, %697
-  %699 = and i32 %695, 1
-  %700 = icmp eq i32 %699, 0
-  br i1 %700, label %701, label %703
-
-; <label>:701                                     ; preds = %692
-  %702 = fadd fast float %698, %20
-  br label %720
-
-; <label>:703                                     ; preds = %692
-  %704 = fsub fast float %22, %698
-  br label %720
-
-; <label>:705                                     ; preds = %689
-  %706 = fcmp fast ogt float %335, %22
-  br i1 %706, label %707, label %720
-
-; <label>:707                                     ; preds = %705
-  %708 = fsub fast float %335, %22
-  %709 = fdiv fast float %708, %690
-  %710 = fptoui float %709 to i32
-  %711 = uitofp i32 %710 to float
-  %712 = fmul fast float %711, %690
-  %713 = fsub fast float %708, %712
-  %714 = and i32 %710, 1
-  %715 = icmp eq i32 %714, 0
-  br i1 %715, label %716, label %718
-
-; <label>:716                                     ; preds = %707
-  %717 = fsub fast float %22, %713
-  br label %720
-
-; <label>:718                                     ; preds = %707
-  %719 = fadd fast float %713, %20
-  br label %720
-
-; <label>:720                                     ; preds = %718, %716, %705, %703, %701
-  %721 = phi float [ %702, %701 ], [ %704, %703 ], [ %717, %716 ], [ %719, %718 ], [ %335, %705 ]
-  %722 = fptoui float %721 to i32
-  %723 = fsub fast float %24, %20
-  %724 = fcmp fast olt float %338, %20
-  br i1 %724, label %725, label %738
-
-; <label>:725                                     ; preds = %720
-  %726 = fsub fast float %20, %338
-  %727 = fdiv fast float %726, %723
-  %728 = fptoui float %727 to i32
-  %729 = uitofp i32 %728 to float
-  %730 = fmul fast float %729, %723
-  %731 = fsub fast float %726, %730
-  %732 = and i32 %728, 1
-  %733 = icmp eq i32 %732, 0
-  br i1 %733, label %734, label %736
-
-; <label>:734                                     ; preds = %725
-  %735 = fadd fast float %731, %20
-  br label %753
-
-; <label>:736                                     ; preds = %725
-  %737 = fsub fast float %24, %731
-  br label %753
-
-; <label>:738                                     ; preds = %720
-  %739 = fcmp fast ogt float %338, %24
-  br i1 %739, label %740, label %753
-
-; <label>:740                                     ; preds = %738
-  %741 = fsub fast float %338, %24
-  %742 = fdiv fast float %741, %723
-  %743 = fptoui float %742 to i32
-  %744 = uitofp i32 %743 to float
-  %745 = fmul fast float %744, %723
-  %746 = fsub fast float %741, %745
-  %747 = and i32 %743, 1
-  %748 = icmp eq i32 %747, 0
-  br i1 %748, label %749, label %751
-
-; <label>:749                                     ; preds = %740
-  %750 = fsub fast float %24, %746
-  br label %753
-
-; <label>:751                                     ; preds = %740
-  %752 = fadd fast float %746, %20
-  br label %753
-
-; <label>:753                                     ; preds = %751, %749, %738, %736, %734
-  %754 = phi float [ %735, %734 ], [ %737, %736 ], [ %750, %749 ], [ %752, %751 ], [ %338, %738 ]
-  %755 = fptoui float %754 to i32
-  %756 = uitofp i32 %755 to float
-  %757 = uitofp i32 %722 to float
-  %758 = fptoui float %45 to i32
-  %759 = fptoui float %182 to i32
-  %760 = fptoui float %756 to i32
-  %761 = fptoui float %757 to i32
-  %762 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %763 = extractvalue %dx.types.CBufRet.i32 %762, 0
-  %764 = extractvalue %dx.types.CBufRet.i32 %762, 1
-  %765 = extractvalue %dx.types.CBufRet.i32 %762, 2
-  %766 = extractvalue %dx.types.CBufRet.i32 %762, 3
-  %767 = mul i32 %763, %758
-  %768 = call i32 @dx.op.tertiary.i32(i32 48, i32 %759, i32 %764, i32 %767)  ; IMad(a,b,c)
-  %769 = call i32 @dx.op.tertiary.i32(i32 48, i32 %760, i32 %765, i32 %768)  ; IMad(a,b,c)
-  %770 = call i32 @dx.op.tertiary.i32(i32 48, i32 %761, i32 %766, i32 %769)  ; IMad(a,b,c)
-  %771 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %770, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %772 = extractvalue %dx.types.ResRet.f16 %771, 0
-  %773 = fpext half %772 to float
-  br label %774
-
-; <label>:774                                     ; preds = %753, %687, %658, %641, %631
-  %775 = phi float [ %655, %641 ], [ 0.000000e+00, %631 ], [ %686, %658 ], [ %773, %753 ], [ 0.000000e+00, %687 ]
-  br i1 %340, label %776, label %801
-
-; <label>:776                                     ; preds = %774
-  %777 = fcmp fast oge float %337, 0.000000e+00
-  %778 = fptoui float %337 to i32
-  %779 = icmp ult i32 %778, %13
-  %780 = and i1 %777, %779
-  %781 = fcmp fast oge float %338, 0.000000e+00
-  %782 = and i1 %781, %780
-  %783 = fptoui float %338 to i32
-  %784 = icmp ult i32 %783, %15
-  %785 = and i1 %784, %782
-  br i1 %785, label %786, label %919
-
-; <label>:786                                     ; preds = %776
-  %787 = fptoui float %45 to i32
-  %788 = fptoui float %182 to i32
-  %789 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %790 = extractvalue %dx.types.CBufRet.i32 %789, 0
-  %791 = extractvalue %dx.types.CBufRet.i32 %789, 1
-  %792 = extractvalue %dx.types.CBufRet.i32 %789, 2
-  %793 = extractvalue %dx.types.CBufRet.i32 %789, 3
-  %794 = mul i32 %790, %787
-  %795 = call i32 @dx.op.tertiary.i32(i32 48, i32 %788, i32 %791, i32 %794)  ; IMad(a,b,c)
-  %796 = call i32 @dx.op.tertiary.i32(i32 48, i32 %783, i32 %792, i32 %795)  ; IMad(a,b,c)
-  %797 = call i32 @dx.op.tertiary.i32(i32 48, i32 %778, i32 %793, i32 %796)  ; IMad(a,b,c)
-  %798 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %797, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %799 = extractvalue %dx.types.ResRet.f16 %798, 0
-  %800 = fpext half %799 to float
-  br label %919
-
-; <label>:801                                     ; preds = %774
-  %802 = icmp eq i32 %339, 1
-  br i1 %802, label %803, label %832
-
-; <label>:803                                     ; preds = %801
-  %804 = add i32 %13, -1
-  %805 = uitofp i32 %804 to float
-  %806 = call float @dx.op.binary.f32(i32 35, float %337, float 0.000000e+00)  ; FMax(a,b)
-  %807 = call float @dx.op.binary.f32(i32 36, float %806, float %805)  ; FMin(a,b)
-  %808 = fptoui float %807 to i32
-  %809 = add i32 %15, -1
-  %810 = uitofp i32 %809 to float
-  %811 = call float @dx.op.binary.f32(i32 35, float %338, float 0.000000e+00)  ; FMax(a,b)
-  %812 = call float @dx.op.binary.f32(i32 36, float %811, float %810)  ; FMin(a,b)
-  %813 = fptoui float %812 to i32
-  %814 = uitofp i32 %813 to float
-  %815 = uitofp i32 %808 to float
-  %816 = fptoui float %45 to i32
-  %817 = fptoui float %182 to i32
-  %818 = fptoui float %814 to i32
-  %819 = fptoui float %815 to i32
-  %820 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %821 = extractvalue %dx.types.CBufRet.i32 %820, 0
-  %822 = extractvalue %dx.types.CBufRet.i32 %820, 1
-  %823 = extractvalue %dx.types.CBufRet.i32 %820, 2
-  %824 = extractvalue %dx.types.CBufRet.i32 %820, 3
-  %825 = mul i32 %821, %816
-  %826 = call i32 @dx.op.tertiary.i32(i32 48, i32 %817, i32 %822, i32 %825)  ; IMad(a,b,c)
-  %827 = call i32 @dx.op.tertiary.i32(i32 48, i32 %818, i32 %823, i32 %826)  ; IMad(a,b,c)
-  %828 = call i32 @dx.op.tertiary.i32(i32 48, i32 %819, i32 %824, i32 %827)  ; IMad(a,b,c)
-  %829 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %828, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %830 = extractvalue %dx.types.ResRet.f16 %829, 0
-  %831 = fpext half %830 to float
-  br label %919
-
-; <label>:832                                     ; preds = %801
-  %833 = icmp eq i32 %339, 2
-  br i1 %833, label %834, label %919
-
-; <label>:834                                     ; preds = %832
-  %835 = fsub fast float %22, %20
-  %836 = fcmp fast olt float %337, %20
-  br i1 %836, label %837, label %850
-
-; <label>:837                                     ; preds = %834
-  %838 = fsub fast float %20, %337
-  %839 = fdiv fast float %838, %835
-  %840 = fptoui float %839 to i32
-  %841 = uitofp i32 %840 to float
-  %842 = fmul fast float %841, %835
-  %843 = fsub fast float %838, %842
-  %844 = and i32 %840, 1
-  %845 = icmp eq i32 %844, 0
-  br i1 %845, label %846, label %848
-
-; <label>:846                                     ; preds = %837
-  %847 = fadd fast float %843, %20
-  br label %865
-
-; <label>:848                                     ; preds = %837
-  %849 = fsub fast float %22, %843
-  br label %865
-
-; <label>:850                                     ; preds = %834
-  %851 = fcmp fast ogt float %337, %22
-  br i1 %851, label %852, label %865
-
-; <label>:852                                     ; preds = %850
-  %853 = fsub fast float %337, %22
-  %854 = fdiv fast float %853, %835
-  %855 = fptoui float %854 to i32
-  %856 = uitofp i32 %855 to float
-  %857 = fmul fast float %856, %835
-  %858 = fsub fast float %853, %857
-  %859 = and i32 %855, 1
-  %860 = icmp eq i32 %859, 0
-  br i1 %860, label %861, label %863
-
-; <label>:861                                     ; preds = %852
-  %862 = fsub fast float %22, %858
-  br label %865
-
-; <label>:863                                     ; preds = %852
-  %864 = fadd fast float %858, %20
-  br label %865
-
-; <label>:865                                     ; preds = %863, %861, %850, %848, %846
-  %866 = phi float [ %847, %846 ], [ %849, %848 ], [ %862, %861 ], [ %864, %863 ], [ %337, %850 ]
-  %867 = fptoui float %866 to i32
-  %868 = fsub fast float %24, %20
-  %869 = fcmp fast olt float %338, %20
-  br i1 %869, label %870, label %883
-
-; <label>:870                                     ; preds = %865
-  %871 = fsub fast float %20, %338
-  %872 = fdiv fast float %871, %868
-  %873 = fptoui float %872 to i32
-  %874 = uitofp i32 %873 to float
-  %875 = fmul fast float %874, %868
-  %876 = fsub fast float %871, %875
-  %877 = and i32 %873, 1
-  %878 = icmp eq i32 %877, 0
-  br i1 %878, label %879, label %881
-
-; <label>:879                                     ; preds = %870
-  %880 = fadd fast float %876, %20
-  br label %898
-
-; <label>:881                                     ; preds = %870
-  %882 = fsub fast float %24, %876
-  br label %898
-
-; <label>:883                                     ; preds = %865
-  %884 = fcmp fast ogt float %338, %24
-  br i1 %884, label %885, label %898
-
-; <label>:885                                     ; preds = %883
-  %886 = fsub fast float %338, %24
-  %887 = fdiv fast float %886, %868
-  %888 = fptoui float %887 to i32
-  %889 = uitofp i32 %888 to float
-  %890 = fmul fast float %889, %868
-  %891 = fsub fast float %886, %890
-  %892 = and i32 %888, 1
-  %893 = icmp eq i32 %892, 0
-  br i1 %893, label %894, label %896
-
-; <label>:894                                     ; preds = %885
-  %895 = fsub fast float %24, %891
-  br label %898
-
-; <label>:896                                     ; preds = %885
-  %897 = fadd fast float %891, %20
-  br label %898
-
-; <label>:898                                     ; preds = %896, %894, %883, %881, %879
-  %899 = phi float [ %880, %879 ], [ %882, %881 ], [ %895, %894 ], [ %897, %896 ], [ %338, %883 ]
-  %900 = fptoui float %899 to i32
-  %901 = uitofp i32 %900 to float
-  %902 = uitofp i32 %867 to float
-  %903 = fptoui float %45 to i32
-  %904 = fptoui float %182 to i32
-  %905 = fptoui float %901 to i32
-  %906 = fptoui float %902 to i32
-  %907 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %908 = extractvalue %dx.types.CBufRet.i32 %907, 0
-  %909 = extractvalue %dx.types.CBufRet.i32 %907, 1
-  %910 = extractvalue %dx.types.CBufRet.i32 %907, 2
-  %911 = extractvalue %dx.types.CBufRet.i32 %907, 3
-  %912 = mul i32 %908, %903
-  %913 = call i32 @dx.op.tertiary.i32(i32 48, i32 %904, i32 %909, i32 %912)  ; IMad(a,b,c)
-  %914 = call i32 @dx.op.tertiary.i32(i32 48, i32 %905, i32 %910, i32 %913)  ; IMad(a,b,c)
-  %915 = call i32 @dx.op.tertiary.i32(i32 48, i32 %906, i32 %911, i32 %914)  ; IMad(a,b,c)
-  %916 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %915, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %917 = extractvalue %dx.types.ResRet.f16 %916, 0
-  %918 = fpext half %917 to float
-  br label %919
-
-; <label>:919                                     ; preds = %898, %832, %803, %786, %776
-  %920 = phi float [ %800, %786 ], [ 0.000000e+00, %776 ], [ %831, %803 ], [ %918, %898 ], [ 0.000000e+00, %832 ]
-  %921 = call float @dx.op.unary.f32(i32 22, float %180)  ; Frc(value)
-  %922 = fsub fast float %630, %485
-  %923 = fmul fast float %921, %922
-  %924 = fadd fast float %923, %485
-  %925 = fsub fast float %920, %775
-  %926 = fmul fast float %921, %925
-  %927 = fadd fast float %926, %775
-  %928 = call float @dx.op.unary.f32(i32 22, float %181)  ; Frc(value)
-  %929 = fsub fast float %927, %924
-  %930 = fmul fast float %929, %928
-  %931 = fadd fast float %930, %924
-  %932 = fptrunc float %931 to half
-  call void @dx.op.rawBufferStore.f16(i32 140, %dx.types.Handle %1, i32 %8, i32 0, half %932, half undef, half undef, half undef, i8 1, i32 2)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3323
-
-; <label>:933                                     ; preds = %332
-  %934 = icmp eq i32 %91, 2
-  br i1 %934, label %935, label %3323
-
-; <label>:935                                     ; preds = %933
-  %936 = call float @dx.op.unary.f32(i32 27, float %180)  ; Round_ni(value)
-  %937 = fadd fast float %936, -1.000000e+00
-  %938 = call float @dx.op.unary.f32(i32 27, float %181)  ; Round_ni(value)
-  %939 = fadd fast float %938, -1.000000e+00
-  %940 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %941 = icmp eq i32 %940, 0
-  br i1 %941, label %942, label %967
-
-; <label>:942                                     ; preds = %935
-  %943 = fcmp fast oge float %937, 0.000000e+00
-  %944 = fptoui float %937 to i32
-  %945 = icmp ult i32 %944, %13
-  %946 = and i1 %943, %945
-  %947 = fcmp fast oge float %939, 0.000000e+00
-  %948 = and i1 %947, %946
-  %949 = fptoui float %939 to i32
-  %950 = icmp ult i32 %949, %15
-  %951 = and i1 %950, %948
-  br i1 %951, label %952, label %1085
-
-; <label>:952                                     ; preds = %942
-  %953 = fptoui float %45 to i32
-  %954 = fptoui float %182 to i32
-  %955 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %956 = extractvalue %dx.types.CBufRet.i32 %955, 0
-  %957 = extractvalue %dx.types.CBufRet.i32 %955, 1
-  %958 = extractvalue %dx.types.CBufRet.i32 %955, 2
-  %959 = extractvalue %dx.types.CBufRet.i32 %955, 3
-  %960 = mul i32 %956, %953
-  %961 = call i32 @dx.op.tertiary.i32(i32 48, i32 %954, i32 %957, i32 %960)  ; IMad(a,b,c)
-  %962 = call i32 @dx.op.tertiary.i32(i32 48, i32 %949, i32 %958, i32 %961)  ; IMad(a,b,c)
-  %963 = call i32 @dx.op.tertiary.i32(i32 48, i32 %944, i32 %959, i32 %962)  ; IMad(a,b,c)
-  %964 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %963, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %965 = extractvalue %dx.types.ResRet.f16 %964, 0
-  %966 = fpext half %965 to float
-  br label %1085
-
-; <label>:967                                     ; preds = %935
-  %968 = icmp eq i32 %940, 1
-  br i1 %968, label %969, label %998
-
-; <label>:969                                     ; preds = %967
-  %970 = add i32 %13, -1
-  %971 = uitofp i32 %970 to float
-  %972 = call float @dx.op.binary.f32(i32 35, float %937, float 0.000000e+00)  ; FMax(a,b)
-  %973 = call float @dx.op.binary.f32(i32 36, float %972, float %971)  ; FMin(a,b)
-  %974 = fptoui float %973 to i32
-  %975 = add i32 %15, -1
-  %976 = uitofp i32 %975 to float
-  %977 = call float @dx.op.binary.f32(i32 35, float %939, float 0.000000e+00)  ; FMax(a,b)
-  %978 = call float @dx.op.binary.f32(i32 36, float %977, float %976)  ; FMin(a,b)
-  %979 = fptoui float %978 to i32
-  %980 = uitofp i32 %979 to float
-  %981 = uitofp i32 %974 to float
-  %982 = fptoui float %45 to i32
-  %983 = fptoui float %182 to i32
-  %984 = fptoui float %980 to i32
-  %985 = fptoui float %981 to i32
-  %986 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %987 = extractvalue %dx.types.CBufRet.i32 %986, 0
-  %988 = extractvalue %dx.types.CBufRet.i32 %986, 1
-  %989 = extractvalue %dx.types.CBufRet.i32 %986, 2
-  %990 = extractvalue %dx.types.CBufRet.i32 %986, 3
-  %991 = mul i32 %987, %982
-  %992 = call i32 @dx.op.tertiary.i32(i32 48, i32 %983, i32 %988, i32 %991)  ; IMad(a,b,c)
-  %993 = call i32 @dx.op.tertiary.i32(i32 48, i32 %984, i32 %989, i32 %992)  ; IMad(a,b,c)
-  %994 = call i32 @dx.op.tertiary.i32(i32 48, i32 %985, i32 %990, i32 %993)  ; IMad(a,b,c)
-  %995 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %994, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %996 = extractvalue %dx.types.ResRet.f16 %995, 0
-  %997 = fpext half %996 to float
-  br label %1085
-
-; <label>:998                                     ; preds = %967
-  %999 = icmp eq i32 %940, 2
-  br i1 %999, label %1000, label %1085
-
-; <label>:1000                                    ; preds = %998
-  %1001 = fsub fast float %22, %20
-  %1002 = fcmp fast olt float %937, %20
-  br i1 %1002, label %1003, label %1016
-
-; <label>:1003                                    ; preds = %1000
-  %1004 = fsub fast float %20, %937
-  %1005 = fdiv fast float %1004, %1001
-  %1006 = fptoui float %1005 to i32
-  %1007 = uitofp i32 %1006 to float
-  %1008 = fmul fast float %1007, %1001
-  %1009 = fsub fast float %1004, %1008
-  %1010 = and i32 %1006, 1
-  %1011 = icmp eq i32 %1010, 0
-  br i1 %1011, label %1012, label %1014
-
-; <label>:1012                                    ; preds = %1003
-  %1013 = fadd fast float %1009, %20
-  br label %1031
-
-; <label>:1014                                    ; preds = %1003
-  %1015 = fsub fast float %22, %1009
-  br label %1031
-
-; <label>:1016                                    ; preds = %1000
-  %1017 = fcmp fast ogt float %937, %22
-  br i1 %1017, label %1018, label %1031
-
-; <label>:1018                                    ; preds = %1016
-  %1019 = fsub fast float %937, %22
-  %1020 = fdiv fast float %1019, %1001
-  %1021 = fptoui float %1020 to i32
-  %1022 = uitofp i32 %1021 to float
-  %1023 = fmul fast float %1022, %1001
-  %1024 = fsub fast float %1019, %1023
-  %1025 = and i32 %1021, 1
-  %1026 = icmp eq i32 %1025, 0
-  br i1 %1026, label %1027, label %1029
-
-; <label>:1027                                    ; preds = %1018
-  %1028 = fsub fast float %22, %1024
-  br label %1031
-
-; <label>:1029                                    ; preds = %1018
-  %1030 = fadd fast float %1024, %20
-  br label %1031
-
-; <label>:1031                                    ; preds = %1029, %1027, %1016, %1014, %1012
-  %1032 = phi float [ %1013, %1012 ], [ %1015, %1014 ], [ %1028, %1027 ], [ %1030, %1029 ], [ %937, %1016 ]
-  %1033 = fptoui float %1032 to i32
-  %1034 = fsub fast float %24, %20
-  %1035 = fcmp fast olt float %939, %20
-  br i1 %1035, label %1036, label %1049
-
-; <label>:1036                                    ; preds = %1031
-  %1037 = fsub fast float %20, %939
-  %1038 = fdiv fast float %1037, %1034
-  %1039 = fptoui float %1038 to i32
-  %1040 = uitofp i32 %1039 to float
-  %1041 = fmul fast float %1040, %1034
-  %1042 = fsub fast float %1037, %1041
-  %1043 = and i32 %1039, 1
-  %1044 = icmp eq i32 %1043, 0
-  br i1 %1044, label %1045, label %1047
-
-; <label>:1045                                    ; preds = %1036
-  %1046 = fadd fast float %1042, %20
-  br label %1064
-
-; <label>:1047                                    ; preds = %1036
-  %1048 = fsub fast float %24, %1042
-  br label %1064
-
-; <label>:1049                                    ; preds = %1031
-  %1050 = fcmp fast ogt float %939, %24
-  br i1 %1050, label %1051, label %1064
-
-; <label>:1051                                    ; preds = %1049
-  %1052 = fsub fast float %939, %24
-  %1053 = fdiv fast float %1052, %1034
-  %1054 = fptoui float %1053 to i32
-  %1055 = uitofp i32 %1054 to float
-  %1056 = fmul fast float %1055, %1034
-  %1057 = fsub fast float %1052, %1056
-  %1058 = and i32 %1054, 1
-  %1059 = icmp eq i32 %1058, 0
-  br i1 %1059, label %1060, label %1062
-
-; <label>:1060                                    ; preds = %1051
-  %1061 = fsub fast float %24, %1057
-  br label %1064
-
-; <label>:1062                                    ; preds = %1051
-  %1063 = fadd fast float %1057, %20
-  br label %1064
-
-; <label>:1064                                    ; preds = %1062, %1060, %1049, %1047, %1045
-  %1065 = phi float [ %1046, %1045 ], [ %1048, %1047 ], [ %1061, %1060 ], [ %1063, %1062 ], [ %939, %1049 ]
-  %1066 = fptoui float %1065 to i32
-  %1067 = uitofp i32 %1066 to float
-  %1068 = uitofp i32 %1033 to float
-  %1069 = fptoui float %45 to i32
-  %1070 = fptoui float %182 to i32
-  %1071 = fptoui float %1067 to i32
-  %1072 = fptoui float %1068 to i32
-  %1073 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1074 = extractvalue %dx.types.CBufRet.i32 %1073, 0
-  %1075 = extractvalue %dx.types.CBufRet.i32 %1073, 1
-  %1076 = extractvalue %dx.types.CBufRet.i32 %1073, 2
-  %1077 = extractvalue %dx.types.CBufRet.i32 %1073, 3
-  %1078 = mul i32 %1074, %1069
-  %1079 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1070, i32 %1075, i32 %1078)  ; IMad(a,b,c)
-  %1080 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1071, i32 %1076, i32 %1079)  ; IMad(a,b,c)
-  %1081 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1072, i32 %1077, i32 %1080)  ; IMad(a,b,c)
-  %1082 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %1081, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1083 = extractvalue %dx.types.ResRet.f16 %1082, 0
-  %1084 = fpext half %1083 to float
-  br label %1085
-
-; <label>:1085                                    ; preds = %1064, %998, %969, %952, %942
-  %1086 = phi float [ %966, %952 ], [ 0.000000e+00, %942 ], [ %997, %969 ], [ %1084, %1064 ], [ 0.000000e+00, %998 ]
-  br i1 %941, label %1087, label %1112
-
-; <label>:1087                                    ; preds = %1085
-  %1088 = fcmp fast oge float %936, 0.000000e+00
-  %1089 = fptoui float %936 to i32
-  %1090 = icmp ult i32 %1089, %13
-  %1091 = and i1 %1088, %1090
-  %1092 = fcmp fast oge float %939, 0.000000e+00
-  %1093 = and i1 %1092, %1091
-  %1094 = fptoui float %939 to i32
-  %1095 = icmp ult i32 %1094, %15
-  %1096 = and i1 %1095, %1093
-  br i1 %1096, label %1097, label %1230
-
-; <label>:1097                                    ; preds = %1087
-  %1098 = fptoui float %45 to i32
-  %1099 = fptoui float %182 to i32
-  %1100 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1101 = extractvalue %dx.types.CBufRet.i32 %1100, 0
-  %1102 = extractvalue %dx.types.CBufRet.i32 %1100, 1
-  %1103 = extractvalue %dx.types.CBufRet.i32 %1100, 2
-  %1104 = extractvalue %dx.types.CBufRet.i32 %1100, 3
-  %1105 = mul i32 %1101, %1098
-  %1106 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1099, i32 %1102, i32 %1105)  ; IMad(a,b,c)
-  %1107 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1094, i32 %1103, i32 %1106)  ; IMad(a,b,c)
-  %1108 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1089, i32 %1104, i32 %1107)  ; IMad(a,b,c)
-  %1109 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %1108, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1110 = extractvalue %dx.types.ResRet.f16 %1109, 0
-  %1111 = fpext half %1110 to float
-  br label %1230
-
-; <label>:1112                                    ; preds = %1085
-  %1113 = icmp eq i32 %940, 1
-  br i1 %1113, label %1114, label %1143
-
-; <label>:1114                                    ; preds = %1112
-  %1115 = add i32 %13, -1
-  %1116 = uitofp i32 %1115 to float
-  %1117 = call float @dx.op.binary.f32(i32 35, float %936, float 0.000000e+00)  ; FMax(a,b)
-  %1118 = call float @dx.op.binary.f32(i32 36, float %1117, float %1116)  ; FMin(a,b)
-  %1119 = fptoui float %1118 to i32
-  %1120 = add i32 %15, -1
-  %1121 = uitofp i32 %1120 to float
-  %1122 = call float @dx.op.binary.f32(i32 35, float %939, float 0.000000e+00)  ; FMax(a,b)
-  %1123 = call float @dx.op.binary.f32(i32 36, float %1122, float %1121)  ; FMin(a,b)
-  %1124 = fptoui float %1123 to i32
-  %1125 = uitofp i32 %1124 to float
-  %1126 = uitofp i32 %1119 to float
-  %1127 = fptoui float %45 to i32
-  %1128 = fptoui float %182 to i32
-  %1129 = fptoui float %1125 to i32
-  %1130 = fptoui float %1126 to i32
-  %1131 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1132 = extractvalue %dx.types.CBufRet.i32 %1131, 0
-  %1133 = extractvalue %dx.types.CBufRet.i32 %1131, 1
-  %1134 = extractvalue %dx.types.CBufRet.i32 %1131, 2
-  %1135 = extractvalue %dx.types.CBufRet.i32 %1131, 3
-  %1136 = mul i32 %1132, %1127
-  %1137 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1128, i32 %1133, i32 %1136)  ; IMad(a,b,c)
-  %1138 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1129, i32 %1134, i32 %1137)  ; IMad(a,b,c)
-  %1139 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1130, i32 %1135, i32 %1138)  ; IMad(a,b,c)
-  %1140 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %1139, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1141 = extractvalue %dx.types.ResRet.f16 %1140, 0
-  %1142 = fpext half %1141 to float
-  br label %1230
-
-; <label>:1143                                    ; preds = %1112
-  %1144 = icmp eq i32 %940, 2
-  br i1 %1144, label %1145, label %1230
-
-; <label>:1145                                    ; preds = %1143
-  %1146 = fsub fast float %22, %20
-  %1147 = fcmp fast olt float %936, %20
-  br i1 %1147, label %1148, label %1161
-
-; <label>:1148                                    ; preds = %1145
-  %1149 = fsub fast float %20, %936
-  %1150 = fdiv fast float %1149, %1146
-  %1151 = fptoui float %1150 to i32
-  %1152 = uitofp i32 %1151 to float
-  %1153 = fmul fast float %1152, %1146
-  %1154 = fsub fast float %1149, %1153
-  %1155 = and i32 %1151, 1
-  %1156 = icmp eq i32 %1155, 0
-  br i1 %1156, label %1157, label %1159
-
-; <label>:1157                                    ; preds = %1148
-  %1158 = fadd fast float %1154, %20
-  br label %1176
-
-; <label>:1159                                    ; preds = %1148
-  %1160 = fsub fast float %22, %1154
-  br label %1176
-
-; <label>:1161                                    ; preds = %1145
-  %1162 = fcmp fast ogt float %936, %22
-  br i1 %1162, label %1163, label %1176
-
-; <label>:1163                                    ; preds = %1161
-  %1164 = fsub fast float %936, %22
-  %1165 = fdiv fast float %1164, %1146
-  %1166 = fptoui float %1165 to i32
-  %1167 = uitofp i32 %1166 to float
-  %1168 = fmul fast float %1167, %1146
-  %1169 = fsub fast float %1164, %1168
-  %1170 = and i32 %1166, 1
-  %1171 = icmp eq i32 %1170, 0
-  br i1 %1171, label %1172, label %1174
-
-; <label>:1172                                    ; preds = %1163
-  %1173 = fsub fast float %22, %1169
-  br label %1176
-
-; <label>:1174                                    ; preds = %1163
-  %1175 = fadd fast float %1169, %20
-  br label %1176
-
-; <label>:1176                                    ; preds = %1174, %1172, %1161, %1159, %1157
-  %1177 = phi float [ %1158, %1157 ], [ %1160, %1159 ], [ %1173, %1172 ], [ %1175, %1174 ], [ %936, %1161 ]
-  %1178 = fptoui float %1177 to i32
-  %1179 = fsub fast float %24, %20
-  %1180 = fcmp fast olt float %939, %20
-  br i1 %1180, label %1181, label %1194
-
-; <label>:1181                                    ; preds = %1176
-  %1182 = fsub fast float %20, %939
-  %1183 = fdiv fast float %1182, %1179
-  %1184 = fptoui float %1183 to i32
-  %1185 = uitofp i32 %1184 to float
-  %1186 = fmul fast float %1185, %1179
-  %1187 = fsub fast float %1182, %1186
-  %1188 = and i32 %1184, 1
-  %1189 = icmp eq i32 %1188, 0
-  br i1 %1189, label %1190, label %1192
-
-; <label>:1190                                    ; preds = %1181
-  %1191 = fadd fast float %1187, %20
-  br label %1209
-
-; <label>:1192                                    ; preds = %1181
-  %1193 = fsub fast float %24, %1187
-  br label %1209
-
-; <label>:1194                                    ; preds = %1176
-  %1195 = fcmp fast ogt float %939, %24
-  br i1 %1195, label %1196, label %1209
-
-; <label>:1196                                    ; preds = %1194
-  %1197 = fsub fast float %939, %24
-  %1198 = fdiv fast float %1197, %1179
-  %1199 = fptoui float %1198 to i32
-  %1200 = uitofp i32 %1199 to float
-  %1201 = fmul fast float %1200, %1179
-  %1202 = fsub fast float %1197, %1201
-  %1203 = and i32 %1199, 1
-  %1204 = icmp eq i32 %1203, 0
-  br i1 %1204, label %1205, label %1207
-
-; <label>:1205                                    ; preds = %1196
-  %1206 = fsub fast float %24, %1202
-  br label %1209
-
-; <label>:1207                                    ; preds = %1196
-  %1208 = fadd fast float %1202, %20
-  br label %1209
-
-; <label>:1209                                    ; preds = %1207, %1205, %1194, %1192, %1190
-  %1210 = phi float [ %1191, %1190 ], [ %1193, %1192 ], [ %1206, %1205 ], [ %1208, %1207 ], [ %939, %1194 ]
-  %1211 = fptoui float %1210 to i32
-  %1212 = uitofp i32 %1211 to float
-  %1213 = uitofp i32 %1178 to float
-  %1214 = fptoui float %45 to i32
-  %1215 = fptoui float %182 to i32
-  %1216 = fptoui float %1212 to i32
-  %1217 = fptoui float %1213 to i32
-  %1218 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1219 = extractvalue %dx.types.CBufRet.i32 %1218, 0
-  %1220 = extractvalue %dx.types.CBufRet.i32 %1218, 1
-  %1221 = extractvalue %dx.types.CBufRet.i32 %1218, 2
-  %1222 = extractvalue %dx.types.CBufRet.i32 %1218, 3
-  %1223 = mul i32 %1219, %1214
-  %1224 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1215, i32 %1220, i32 %1223)  ; IMad(a,b,c)
-  %1225 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1216, i32 %1221, i32 %1224)  ; IMad(a,b,c)
-  %1226 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1217, i32 %1222, i32 %1225)  ; IMad(a,b,c)
-  %1227 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %1226, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1228 = extractvalue %dx.types.ResRet.f16 %1227, 0
-  %1229 = fpext half %1228 to float
-  br label %1230
-
-; <label>:1230                                    ; preds = %1209, %1143, %1114, %1097, %1087
-  %1231 = phi float [ %1111, %1097 ], [ 0.000000e+00, %1087 ], [ %1142, %1114 ], [ %1229, %1209 ], [ 0.000000e+00, %1143 ]
-  %1232 = fadd fast float %936, 1.000000e+00
-  br i1 %941, label %1233, label %1258
-
-; <label>:1233                                    ; preds = %1230
-  %1234 = fcmp fast oge float %1232, 0.000000e+00
-  %1235 = fptoui float %1232 to i32
-  %1236 = icmp ult i32 %1235, %13
-  %1237 = and i1 %1234, %1236
-  %1238 = fcmp fast oge float %939, 0.000000e+00
-  %1239 = and i1 %1238, %1237
-  %1240 = fptoui float %939 to i32
-  %1241 = icmp ult i32 %1240, %15
-  %1242 = and i1 %1241, %1239
-  br i1 %1242, label %1243, label %1376
-
-; <label>:1243                                    ; preds = %1233
-  %1244 = fptoui float %45 to i32
-  %1245 = fptoui float %182 to i32
-  %1246 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1247 = extractvalue %dx.types.CBufRet.i32 %1246, 0
-  %1248 = extractvalue %dx.types.CBufRet.i32 %1246, 1
-  %1249 = extractvalue %dx.types.CBufRet.i32 %1246, 2
-  %1250 = extractvalue %dx.types.CBufRet.i32 %1246, 3
-  %1251 = mul i32 %1247, %1244
-  %1252 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1245, i32 %1248, i32 %1251)  ; IMad(a,b,c)
-  %1253 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1240, i32 %1249, i32 %1252)  ; IMad(a,b,c)
-  %1254 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1235, i32 %1250, i32 %1253)  ; IMad(a,b,c)
-  %1255 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %1254, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1256 = extractvalue %dx.types.ResRet.f16 %1255, 0
-  %1257 = fpext half %1256 to float
-  br label %1376
-
-; <label>:1258                                    ; preds = %1230
-  %1259 = icmp eq i32 %940, 1
-  br i1 %1259, label %1260, label %1289
-
-; <label>:1260                                    ; preds = %1258
-  %1261 = add i32 %13, -1
-  %1262 = uitofp i32 %1261 to float
-  %1263 = call float @dx.op.binary.f32(i32 35, float %1232, float 0.000000e+00)  ; FMax(a,b)
-  %1264 = call float @dx.op.binary.f32(i32 36, float %1263, float %1262)  ; FMin(a,b)
-  %1265 = fptoui float %1264 to i32
-  %1266 = add i32 %15, -1
-  %1267 = uitofp i32 %1266 to float
-  %1268 = call float @dx.op.binary.f32(i32 35, float %939, float 0.000000e+00)  ; FMax(a,b)
-  %1269 = call float @dx.op.binary.f32(i32 36, float %1268, float %1267)  ; FMin(a,b)
-  %1270 = fptoui float %1269 to i32
-  %1271 = uitofp i32 %1270 to float
-  %1272 = uitofp i32 %1265 to float
-  %1273 = fptoui float %45 to i32
-  %1274 = fptoui float %182 to i32
-  %1275 = fptoui float %1271 to i32
-  %1276 = fptoui float %1272 to i32
-  %1277 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1278 = extractvalue %dx.types.CBufRet.i32 %1277, 0
-  %1279 = extractvalue %dx.types.CBufRet.i32 %1277, 1
-  %1280 = extractvalue %dx.types.CBufRet.i32 %1277, 2
-  %1281 = extractvalue %dx.types.CBufRet.i32 %1277, 3
-  %1282 = mul i32 %1278, %1273
-  %1283 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1274, i32 %1279, i32 %1282)  ; IMad(a,b,c)
-  %1284 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1275, i32 %1280, i32 %1283)  ; IMad(a,b,c)
-  %1285 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1276, i32 %1281, i32 %1284)  ; IMad(a,b,c)
-  %1286 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %1285, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1287 = extractvalue %dx.types.ResRet.f16 %1286, 0
-  %1288 = fpext half %1287 to float
-  br label %1376
-
-; <label>:1289                                    ; preds = %1258
-  %1290 = icmp eq i32 %940, 2
-  br i1 %1290, label %1291, label %1376
-
-; <label>:1291                                    ; preds = %1289
-  %1292 = fsub fast float %22, %20
-  %1293 = fcmp fast olt float %1232, %20
-  br i1 %1293, label %1294, label %1307
-
-; <label>:1294                                    ; preds = %1291
-  %1295 = fsub fast float %20, %1232
-  %1296 = fdiv fast float %1295, %1292
-  %1297 = fptoui float %1296 to i32
-  %1298 = uitofp i32 %1297 to float
-  %1299 = fmul fast float %1298, %1292
-  %1300 = fsub fast float %1295, %1299
-  %1301 = and i32 %1297, 1
-  %1302 = icmp eq i32 %1301, 0
-  br i1 %1302, label %1303, label %1305
-
-; <label>:1303                                    ; preds = %1294
-  %1304 = fadd fast float %1300, %20
-  br label %1322
-
-; <label>:1305                                    ; preds = %1294
-  %1306 = fsub fast float %22, %1300
-  br label %1322
-
-; <label>:1307                                    ; preds = %1291
-  %1308 = fcmp fast ogt float %1232, %22
-  br i1 %1308, label %1309, label %1322
-
-; <label>:1309                                    ; preds = %1307
-  %1310 = fsub fast float %1232, %22
-  %1311 = fdiv fast float %1310, %1292
-  %1312 = fptoui float %1311 to i32
-  %1313 = uitofp i32 %1312 to float
-  %1314 = fmul fast float %1313, %1292
-  %1315 = fsub fast float %1310, %1314
-  %1316 = and i32 %1312, 1
-  %1317 = icmp eq i32 %1316, 0
-  br i1 %1317, label %1318, label %1320
-
-; <label>:1318                                    ; preds = %1309
-  %1319 = fsub fast float %22, %1315
-  br label %1322
-
-; <label>:1320                                    ; preds = %1309
-  %1321 = fadd fast float %1315, %20
-  br label %1322
-
-; <label>:1322                                    ; preds = %1320, %1318, %1307, %1305, %1303
-  %1323 = phi float [ %1304, %1303 ], [ %1306, %1305 ], [ %1319, %1318 ], [ %1321, %1320 ], [ %1232, %1307 ]
-  %1324 = fptoui float %1323 to i32
-  %1325 = fsub fast float %24, %20
-  %1326 = fcmp fast olt float %939, %20
-  br i1 %1326, label %1327, label %1340
-
-; <label>:1327                                    ; preds = %1322
-  %1328 = fsub fast float %20, %939
-  %1329 = fdiv fast float %1328, %1325
-  %1330 = fptoui float %1329 to i32
-  %1331 = uitofp i32 %1330 to float
-  %1332 = fmul fast float %1331, %1325
-  %1333 = fsub fast float %1328, %1332
-  %1334 = and i32 %1330, 1
-  %1335 = icmp eq i32 %1334, 0
-  br i1 %1335, label %1336, label %1338
-
-; <label>:1336                                    ; preds = %1327
-  %1337 = fadd fast float %1333, %20
-  br label %1355
-
-; <label>:1338                                    ; preds = %1327
-  %1339 = fsub fast float %24, %1333
-  br label %1355
-
-; <label>:1340                                    ; preds = %1322
-  %1341 = fcmp fast ogt float %939, %24
-  br i1 %1341, label %1342, label %1355
-
-; <label>:1342                                    ; preds = %1340
-  %1343 = fsub fast float %939, %24
-  %1344 = fdiv fast float %1343, %1325
-  %1345 = fptoui float %1344 to i32
-  %1346 = uitofp i32 %1345 to float
-  %1347 = fmul fast float %1346, %1325
-  %1348 = fsub fast float %1343, %1347
-  %1349 = and i32 %1345, 1
-  %1350 = icmp eq i32 %1349, 0
-  br i1 %1350, label %1351, label %1353
-
-; <label>:1351                                    ; preds = %1342
-  %1352 = fsub fast float %24, %1348
-  br label %1355
-
-; <label>:1353                                    ; preds = %1342
-  %1354 = fadd fast float %1348, %20
-  br label %1355
-
-; <label>:1355                                    ; preds = %1353, %1351, %1340, %1338, %1336
-  %1356 = phi float [ %1337, %1336 ], [ %1339, %1338 ], [ %1352, %1351 ], [ %1354, %1353 ], [ %939, %1340 ]
-  %1357 = fptoui float %1356 to i32
-  %1358 = uitofp i32 %1357 to float
-  %1359 = uitofp i32 %1324 to float
-  %1360 = fptoui float %45 to i32
-  %1361 = fptoui float %182 to i32
-  %1362 = fptoui float %1358 to i32
-  %1363 = fptoui float %1359 to i32
-  %1364 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1365 = extractvalue %dx.types.CBufRet.i32 %1364, 0
-  %1366 = extractvalue %dx.types.CBufRet.i32 %1364, 1
-  %1367 = extractvalue %dx.types.CBufRet.i32 %1364, 2
-  %1368 = extractvalue %dx.types.CBufRet.i32 %1364, 3
-  %1369 = mul i32 %1365, %1360
-  %1370 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1361, i32 %1366, i32 %1369)  ; IMad(a,b,c)
-  %1371 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1362, i32 %1367, i32 %1370)  ; IMad(a,b,c)
-  %1372 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1363, i32 %1368, i32 %1371)  ; IMad(a,b,c)
-  %1373 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %1372, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1374 = extractvalue %dx.types.ResRet.f16 %1373, 0
-  %1375 = fpext half %1374 to float
-  br label %1376
-
-; <label>:1376                                    ; preds = %1355, %1289, %1260, %1243, %1233
-  %1377 = phi float [ %1257, %1243 ], [ 0.000000e+00, %1233 ], [ %1288, %1260 ], [ %1375, %1355 ], [ 0.000000e+00, %1289 ]
-  %1378 = fadd fast float %936, 2.000000e+00
-  br i1 %941, label %1379, label %1404
-
-; <label>:1379                                    ; preds = %1376
-  %1380 = fcmp fast oge float %1378, 0.000000e+00
-  %1381 = fptoui float %1378 to i32
-  %1382 = icmp ult i32 %1381, %13
-  %1383 = and i1 %1380, %1382
-  %1384 = fcmp fast oge float %939, 0.000000e+00
-  %1385 = and i1 %1384, %1383
-  %1386 = fptoui float %939 to i32
-  %1387 = icmp ult i32 %1386, %15
-  %1388 = and i1 %1387, %1385
-  br i1 %1388, label %1389, label %1522
-
-; <label>:1389                                    ; preds = %1379
-  %1390 = fptoui float %45 to i32
-  %1391 = fptoui float %182 to i32
-  %1392 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1393 = extractvalue %dx.types.CBufRet.i32 %1392, 0
-  %1394 = extractvalue %dx.types.CBufRet.i32 %1392, 1
-  %1395 = extractvalue %dx.types.CBufRet.i32 %1392, 2
-  %1396 = extractvalue %dx.types.CBufRet.i32 %1392, 3
-  %1397 = mul i32 %1393, %1390
-  %1398 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1391, i32 %1394, i32 %1397)  ; IMad(a,b,c)
-  %1399 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1386, i32 %1395, i32 %1398)  ; IMad(a,b,c)
-  %1400 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1381, i32 %1396, i32 %1399)  ; IMad(a,b,c)
-  %1401 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %1400, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1402 = extractvalue %dx.types.ResRet.f16 %1401, 0
-  %1403 = fpext half %1402 to float
-  br label %1522
-
-; <label>:1404                                    ; preds = %1376
-  %1405 = icmp eq i32 %940, 1
-  br i1 %1405, label %1406, label %1435
-
-; <label>:1406                                    ; preds = %1404
-  %1407 = add i32 %13, -1
-  %1408 = uitofp i32 %1407 to float
-  %1409 = call float @dx.op.binary.f32(i32 35, float %1378, float 0.000000e+00)  ; FMax(a,b)
-  %1410 = call float @dx.op.binary.f32(i32 36, float %1409, float %1408)  ; FMin(a,b)
-  %1411 = fptoui float %1410 to i32
-  %1412 = add i32 %15, -1
-  %1413 = uitofp i32 %1412 to float
-  %1414 = call float @dx.op.binary.f32(i32 35, float %939, float 0.000000e+00)  ; FMax(a,b)
-  %1415 = call float @dx.op.binary.f32(i32 36, float %1414, float %1413)  ; FMin(a,b)
-  %1416 = fptoui float %1415 to i32
-  %1417 = uitofp i32 %1416 to float
-  %1418 = uitofp i32 %1411 to float
-  %1419 = fptoui float %45 to i32
-  %1420 = fptoui float %182 to i32
-  %1421 = fptoui float %1417 to i32
-  %1422 = fptoui float %1418 to i32
-  %1423 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1424 = extractvalue %dx.types.CBufRet.i32 %1423, 0
-  %1425 = extractvalue %dx.types.CBufRet.i32 %1423, 1
-  %1426 = extractvalue %dx.types.CBufRet.i32 %1423, 2
-  %1427 = extractvalue %dx.types.CBufRet.i32 %1423, 3
-  %1428 = mul i32 %1424, %1419
-  %1429 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1420, i32 %1425, i32 %1428)  ; IMad(a,b,c)
-  %1430 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1421, i32 %1426, i32 %1429)  ; IMad(a,b,c)
-  %1431 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1422, i32 %1427, i32 %1430)  ; IMad(a,b,c)
-  %1432 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %1431, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1433 = extractvalue %dx.types.ResRet.f16 %1432, 0
-  %1434 = fpext half %1433 to float
-  br label %1522
-
-; <label>:1435                                    ; preds = %1404
-  %1436 = icmp eq i32 %940, 2
-  br i1 %1436, label %1437, label %1522
-
-; <label>:1437                                    ; preds = %1435
-  %1438 = fsub fast float %22, %20
-  %1439 = fcmp fast olt float %1378, %20
-  br i1 %1439, label %1440, label %1453
-
-; <label>:1440                                    ; preds = %1437
-  %1441 = fsub fast float %20, %1378
-  %1442 = fdiv fast float %1441, %1438
-  %1443 = fptoui float %1442 to i32
-  %1444 = uitofp i32 %1443 to float
-  %1445 = fmul fast float %1444, %1438
-  %1446 = fsub fast float %1441, %1445
-  %1447 = and i32 %1443, 1
-  %1448 = icmp eq i32 %1447, 0
-  br i1 %1448, label %1449, label %1451
-
-; <label>:1449                                    ; preds = %1440
-  %1450 = fadd fast float %1446, %20
-  br label %1468
-
-; <label>:1451                                    ; preds = %1440
-  %1452 = fsub fast float %22, %1446
-  br label %1468
-
-; <label>:1453                                    ; preds = %1437
-  %1454 = fcmp fast ogt float %1378, %22
-  br i1 %1454, label %1455, label %1468
-
-; <label>:1455                                    ; preds = %1453
-  %1456 = fsub fast float %1378, %22
-  %1457 = fdiv fast float %1456, %1438
-  %1458 = fptoui float %1457 to i32
-  %1459 = uitofp i32 %1458 to float
-  %1460 = fmul fast float %1459, %1438
-  %1461 = fsub fast float %1456, %1460
-  %1462 = and i32 %1458, 1
-  %1463 = icmp eq i32 %1462, 0
-  br i1 %1463, label %1464, label %1466
-
-; <label>:1464                                    ; preds = %1455
-  %1465 = fsub fast float %22, %1461
-  br label %1468
-
-; <label>:1466                                    ; preds = %1455
-  %1467 = fadd fast float %1461, %20
-  br label %1468
-
-; <label>:1468                                    ; preds = %1466, %1464, %1453, %1451, %1449
-  %1469 = phi float [ %1450, %1449 ], [ %1452, %1451 ], [ %1465, %1464 ], [ %1467, %1466 ], [ %1378, %1453 ]
-  %1470 = fptoui float %1469 to i32
-  %1471 = fsub fast float %24, %20
-  %1472 = fcmp fast olt float %939, %20
-  br i1 %1472, label %1473, label %1486
-
-; <label>:1473                                    ; preds = %1468
-  %1474 = fsub fast float %20, %939
-  %1475 = fdiv fast float %1474, %1471
-  %1476 = fptoui float %1475 to i32
-  %1477 = uitofp i32 %1476 to float
-  %1478 = fmul fast float %1477, %1471
-  %1479 = fsub fast float %1474, %1478
-  %1480 = and i32 %1476, 1
-  %1481 = icmp eq i32 %1480, 0
-  br i1 %1481, label %1482, label %1484
-
-; <label>:1482                                    ; preds = %1473
-  %1483 = fadd fast float %1479, %20
-  br label %1501
-
-; <label>:1484                                    ; preds = %1473
-  %1485 = fsub fast float %24, %1479
-  br label %1501
-
-; <label>:1486                                    ; preds = %1468
-  %1487 = fcmp fast ogt float %939, %24
-  br i1 %1487, label %1488, label %1501
-
-; <label>:1488                                    ; preds = %1486
-  %1489 = fsub fast float %939, %24
-  %1490 = fdiv fast float %1489, %1471
-  %1491 = fptoui float %1490 to i32
-  %1492 = uitofp i32 %1491 to float
-  %1493 = fmul fast float %1492, %1471
-  %1494 = fsub fast float %1489, %1493
-  %1495 = and i32 %1491, 1
-  %1496 = icmp eq i32 %1495, 0
-  br i1 %1496, label %1497, label %1499
-
-; <label>:1497                                    ; preds = %1488
-  %1498 = fsub fast float %24, %1494
-  br label %1501
-
-; <label>:1499                                    ; preds = %1488
-  %1500 = fadd fast float %1494, %20
-  br label %1501
-
-; <label>:1501                                    ; preds = %1499, %1497, %1486, %1484, %1482
-  %1502 = phi float [ %1483, %1482 ], [ %1485, %1484 ], [ %1498, %1497 ], [ %1500, %1499 ], [ %939, %1486 ]
-  %1503 = fptoui float %1502 to i32
-  %1504 = uitofp i32 %1503 to float
-  %1505 = uitofp i32 %1470 to float
-  %1506 = fptoui float %45 to i32
-  %1507 = fptoui float %182 to i32
-  %1508 = fptoui float %1504 to i32
-  %1509 = fptoui float %1505 to i32
-  %1510 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1511 = extractvalue %dx.types.CBufRet.i32 %1510, 0
-  %1512 = extractvalue %dx.types.CBufRet.i32 %1510, 1
-  %1513 = extractvalue %dx.types.CBufRet.i32 %1510, 2
-  %1514 = extractvalue %dx.types.CBufRet.i32 %1510, 3
-  %1515 = mul i32 %1511, %1506
-  %1516 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1507, i32 %1512, i32 %1515)  ; IMad(a,b,c)
-  %1517 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1508, i32 %1513, i32 %1516)  ; IMad(a,b,c)
-  %1518 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1509, i32 %1514, i32 %1517)  ; IMad(a,b,c)
-  %1519 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %1518, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1520 = extractvalue %dx.types.ResRet.f16 %1519, 0
-  %1521 = fpext half %1520 to float
-  br label %1522
-
-; <label>:1522                                    ; preds = %1501, %1435, %1406, %1389, %1379
-  %1523 = phi float [ %1403, %1389 ], [ 0.000000e+00, %1379 ], [ %1434, %1406 ], [ %1521, %1501 ], [ 0.000000e+00, %1435 ]
-  br i1 %941, label %1524, label %1549
-
-; <label>:1524                                    ; preds = %1522
-  %1525 = fcmp fast oge float %937, 0.000000e+00
-  %1526 = fptoui float %937 to i32
-  %1527 = icmp ult i32 %1526, %13
-  %1528 = and i1 %1525, %1527
-  %1529 = fcmp fast oge float %938, 0.000000e+00
-  %1530 = and i1 %1529, %1528
-  %1531 = fptoui float %938 to i32
-  %1532 = icmp ult i32 %1531, %15
-  %1533 = and i1 %1532, %1530
-  br i1 %1533, label %1534, label %1667
-
-; <label>:1534                                    ; preds = %1524
-  %1535 = fptoui float %45 to i32
-  %1536 = fptoui float %182 to i32
-  %1537 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1538 = extractvalue %dx.types.CBufRet.i32 %1537, 0
-  %1539 = extractvalue %dx.types.CBufRet.i32 %1537, 1
-  %1540 = extractvalue %dx.types.CBufRet.i32 %1537, 2
-  %1541 = extractvalue %dx.types.CBufRet.i32 %1537, 3
-  %1542 = mul i32 %1538, %1535
-  %1543 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1536, i32 %1539, i32 %1542)  ; IMad(a,b,c)
-  %1544 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1531, i32 %1540, i32 %1543)  ; IMad(a,b,c)
-  %1545 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1526, i32 %1541, i32 %1544)  ; IMad(a,b,c)
-  %1546 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %1545, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1547 = extractvalue %dx.types.ResRet.f16 %1546, 0
-  %1548 = fpext half %1547 to float
-  br label %1667
-
-; <label>:1549                                    ; preds = %1522
-  %1550 = icmp eq i32 %940, 1
-  br i1 %1550, label %1551, label %1580
-
-; <label>:1551                                    ; preds = %1549
-  %1552 = add i32 %13, -1
-  %1553 = uitofp i32 %1552 to float
-  %1554 = call float @dx.op.binary.f32(i32 35, float %937, float 0.000000e+00)  ; FMax(a,b)
-  %1555 = call float @dx.op.binary.f32(i32 36, float %1554, float %1553)  ; FMin(a,b)
-  %1556 = fptoui float %1555 to i32
-  %1557 = add i32 %15, -1
-  %1558 = uitofp i32 %1557 to float
-  %1559 = call float @dx.op.binary.f32(i32 35, float %938, float 0.000000e+00)  ; FMax(a,b)
-  %1560 = call float @dx.op.binary.f32(i32 36, float %1559, float %1558)  ; FMin(a,b)
-  %1561 = fptoui float %1560 to i32
-  %1562 = uitofp i32 %1561 to float
-  %1563 = uitofp i32 %1556 to float
-  %1564 = fptoui float %45 to i32
-  %1565 = fptoui float %182 to i32
-  %1566 = fptoui float %1562 to i32
-  %1567 = fptoui float %1563 to i32
-  %1568 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1569 = extractvalue %dx.types.CBufRet.i32 %1568, 0
-  %1570 = extractvalue %dx.types.CBufRet.i32 %1568, 1
-  %1571 = extractvalue %dx.types.CBufRet.i32 %1568, 2
-  %1572 = extractvalue %dx.types.CBufRet.i32 %1568, 3
-  %1573 = mul i32 %1569, %1564
-  %1574 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1565, i32 %1570, i32 %1573)  ; IMad(a,b,c)
-  %1575 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1566, i32 %1571, i32 %1574)  ; IMad(a,b,c)
-  %1576 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1567, i32 %1572, i32 %1575)  ; IMad(a,b,c)
-  %1577 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %1576, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1578 = extractvalue %dx.types.ResRet.f16 %1577, 0
-  %1579 = fpext half %1578 to float
-  br label %1667
-
-; <label>:1580                                    ; preds = %1549
-  %1581 = icmp eq i32 %940, 2
-  br i1 %1581, label %1582, label %1667
-
-; <label>:1582                                    ; preds = %1580
-  %1583 = fsub fast float %22, %20
-  %1584 = fcmp fast olt float %937, %20
-  br i1 %1584, label %1585, label %1598
-
-; <label>:1585                                    ; preds = %1582
-  %1586 = fsub fast float %20, %937
-  %1587 = fdiv fast float %1586, %1583
-  %1588 = fptoui float %1587 to i32
-  %1589 = uitofp i32 %1588 to float
-  %1590 = fmul fast float %1589, %1583
-  %1591 = fsub fast float %1586, %1590
-  %1592 = and i32 %1588, 1
-  %1593 = icmp eq i32 %1592, 0
-  br i1 %1593, label %1594, label %1596
-
-; <label>:1594                                    ; preds = %1585
-  %1595 = fadd fast float %1591, %20
-  br label %1613
-
-; <label>:1596                                    ; preds = %1585
-  %1597 = fsub fast float %22, %1591
-  br label %1613
-
-; <label>:1598                                    ; preds = %1582
-  %1599 = fcmp fast ogt float %937, %22
-  br i1 %1599, label %1600, label %1613
-
-; <label>:1600                                    ; preds = %1598
-  %1601 = fsub fast float %937, %22
-  %1602 = fdiv fast float %1601, %1583
-  %1603 = fptoui float %1602 to i32
-  %1604 = uitofp i32 %1603 to float
-  %1605 = fmul fast float %1604, %1583
-  %1606 = fsub fast float %1601, %1605
-  %1607 = and i32 %1603, 1
-  %1608 = icmp eq i32 %1607, 0
-  br i1 %1608, label %1609, label %1611
-
-; <label>:1609                                    ; preds = %1600
-  %1610 = fsub fast float %22, %1606
-  br label %1613
-
-; <label>:1611                                    ; preds = %1600
-  %1612 = fadd fast float %1606, %20
-  br label %1613
-
-; <label>:1613                                    ; preds = %1611, %1609, %1598, %1596, %1594
-  %1614 = phi float [ %1595, %1594 ], [ %1597, %1596 ], [ %1610, %1609 ], [ %1612, %1611 ], [ %937, %1598 ]
-  %1615 = fptoui float %1614 to i32
-  %1616 = fsub fast float %24, %20
-  %1617 = fcmp fast olt float %938, %20
-  br i1 %1617, label %1618, label %1631
-
-; <label>:1618                                    ; preds = %1613
-  %1619 = fsub fast float %20, %938
-  %1620 = fdiv fast float %1619, %1616
-  %1621 = fptoui float %1620 to i32
-  %1622 = uitofp i32 %1621 to float
-  %1623 = fmul fast float %1622, %1616
-  %1624 = fsub fast float %1619, %1623
-  %1625 = and i32 %1621, 1
-  %1626 = icmp eq i32 %1625, 0
-  br i1 %1626, label %1627, label %1629
-
-; <label>:1627                                    ; preds = %1618
-  %1628 = fadd fast float %1624, %20
-  br label %1646
-
-; <label>:1629                                    ; preds = %1618
-  %1630 = fsub fast float %24, %1624
-  br label %1646
-
-; <label>:1631                                    ; preds = %1613
-  %1632 = fcmp fast ogt float %938, %24
-  br i1 %1632, label %1633, label %1646
-
-; <label>:1633                                    ; preds = %1631
-  %1634 = fsub fast float %938, %24
-  %1635 = fdiv fast float %1634, %1616
-  %1636 = fptoui float %1635 to i32
-  %1637 = uitofp i32 %1636 to float
-  %1638 = fmul fast float %1637, %1616
-  %1639 = fsub fast float %1634, %1638
-  %1640 = and i32 %1636, 1
-  %1641 = icmp eq i32 %1640, 0
-  br i1 %1641, label %1642, label %1644
-
-; <label>:1642                                    ; preds = %1633
-  %1643 = fsub fast float %24, %1639
-  br label %1646
-
-; <label>:1644                                    ; preds = %1633
-  %1645 = fadd fast float %1639, %20
-  br label %1646
-
-; <label>:1646                                    ; preds = %1644, %1642, %1631, %1629, %1627
-  %1647 = phi float [ %1628, %1627 ], [ %1630, %1629 ], [ %1643, %1642 ], [ %1645, %1644 ], [ %938, %1631 ]
-  %1648 = fptoui float %1647 to i32
-  %1649 = uitofp i32 %1648 to float
-  %1650 = uitofp i32 %1615 to float
-  %1651 = fptoui float %45 to i32
-  %1652 = fptoui float %182 to i32
-  %1653 = fptoui float %1649 to i32
-  %1654 = fptoui float %1650 to i32
-  %1655 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1656 = extractvalue %dx.types.CBufRet.i32 %1655, 0
-  %1657 = extractvalue %dx.types.CBufRet.i32 %1655, 1
-  %1658 = extractvalue %dx.types.CBufRet.i32 %1655, 2
-  %1659 = extractvalue %dx.types.CBufRet.i32 %1655, 3
-  %1660 = mul i32 %1656, %1651
-  %1661 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1652, i32 %1657, i32 %1660)  ; IMad(a,b,c)
-  %1662 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1653, i32 %1658, i32 %1661)  ; IMad(a,b,c)
-  %1663 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1654, i32 %1659, i32 %1662)  ; IMad(a,b,c)
-  %1664 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %1663, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1665 = extractvalue %dx.types.ResRet.f16 %1664, 0
-  %1666 = fpext half %1665 to float
-  br label %1667
-
-; <label>:1667                                    ; preds = %1646, %1580, %1551, %1534, %1524
-  %1668 = phi float [ %1548, %1534 ], [ 0.000000e+00, %1524 ], [ %1579, %1551 ], [ %1666, %1646 ], [ 0.000000e+00, %1580 ]
-  br i1 %941, label %1669, label %1694
-
-; <label>:1669                                    ; preds = %1667
-  %1670 = fcmp fast oge float %936, 0.000000e+00
-  %1671 = fptoui float %936 to i32
-  %1672 = icmp ult i32 %1671, %13
-  %1673 = and i1 %1670, %1672
-  %1674 = fcmp fast oge float %938, 0.000000e+00
-  %1675 = and i1 %1674, %1673
-  %1676 = fptoui float %938 to i32
-  %1677 = icmp ult i32 %1676, %15
-  %1678 = and i1 %1677, %1675
-  br i1 %1678, label %1679, label %1812
-
-; <label>:1679                                    ; preds = %1669
-  %1680 = fptoui float %45 to i32
-  %1681 = fptoui float %182 to i32
-  %1682 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1683 = extractvalue %dx.types.CBufRet.i32 %1682, 0
-  %1684 = extractvalue %dx.types.CBufRet.i32 %1682, 1
-  %1685 = extractvalue %dx.types.CBufRet.i32 %1682, 2
-  %1686 = extractvalue %dx.types.CBufRet.i32 %1682, 3
-  %1687 = mul i32 %1683, %1680
-  %1688 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1681, i32 %1684, i32 %1687)  ; IMad(a,b,c)
-  %1689 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1676, i32 %1685, i32 %1688)  ; IMad(a,b,c)
-  %1690 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1671, i32 %1686, i32 %1689)  ; IMad(a,b,c)
-  %1691 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %1690, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1692 = extractvalue %dx.types.ResRet.f16 %1691, 0
-  %1693 = fpext half %1692 to float
-  br label %1812
-
-; <label>:1694                                    ; preds = %1667
-  %1695 = icmp eq i32 %940, 1
-  br i1 %1695, label %1696, label %1725
-
-; <label>:1696                                    ; preds = %1694
-  %1697 = add i32 %13, -1
-  %1698 = uitofp i32 %1697 to float
-  %1699 = call float @dx.op.binary.f32(i32 35, float %936, float 0.000000e+00)  ; FMax(a,b)
-  %1700 = call float @dx.op.binary.f32(i32 36, float %1699, float %1698)  ; FMin(a,b)
-  %1701 = fptoui float %1700 to i32
-  %1702 = add i32 %15, -1
-  %1703 = uitofp i32 %1702 to float
-  %1704 = call float @dx.op.binary.f32(i32 35, float %938, float 0.000000e+00)  ; FMax(a,b)
-  %1705 = call float @dx.op.binary.f32(i32 36, float %1704, float %1703)  ; FMin(a,b)
-  %1706 = fptoui float %1705 to i32
-  %1707 = uitofp i32 %1706 to float
-  %1708 = uitofp i32 %1701 to float
-  %1709 = fptoui float %45 to i32
-  %1710 = fptoui float %182 to i32
-  %1711 = fptoui float %1707 to i32
-  %1712 = fptoui float %1708 to i32
-  %1713 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1714 = extractvalue %dx.types.CBufRet.i32 %1713, 0
-  %1715 = extractvalue %dx.types.CBufRet.i32 %1713, 1
-  %1716 = extractvalue %dx.types.CBufRet.i32 %1713, 2
-  %1717 = extractvalue %dx.types.CBufRet.i32 %1713, 3
-  %1718 = mul i32 %1714, %1709
-  %1719 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1710, i32 %1715, i32 %1718)  ; IMad(a,b,c)
-  %1720 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1711, i32 %1716, i32 %1719)  ; IMad(a,b,c)
-  %1721 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1712, i32 %1717, i32 %1720)  ; IMad(a,b,c)
-  %1722 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %1721, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1723 = extractvalue %dx.types.ResRet.f16 %1722, 0
-  %1724 = fpext half %1723 to float
-  br label %1812
-
-; <label>:1725                                    ; preds = %1694
-  %1726 = icmp eq i32 %940, 2
-  br i1 %1726, label %1727, label %1812
-
-; <label>:1727                                    ; preds = %1725
-  %1728 = fsub fast float %22, %20
-  %1729 = fcmp fast olt float %936, %20
-  br i1 %1729, label %1730, label %1743
-
-; <label>:1730                                    ; preds = %1727
-  %1731 = fsub fast float %20, %936
-  %1732 = fdiv fast float %1731, %1728
-  %1733 = fptoui float %1732 to i32
-  %1734 = uitofp i32 %1733 to float
-  %1735 = fmul fast float %1734, %1728
-  %1736 = fsub fast float %1731, %1735
-  %1737 = and i32 %1733, 1
-  %1738 = icmp eq i32 %1737, 0
-  br i1 %1738, label %1739, label %1741
-
-; <label>:1739                                    ; preds = %1730
-  %1740 = fadd fast float %1736, %20
-  br label %1758
-
-; <label>:1741                                    ; preds = %1730
-  %1742 = fsub fast float %22, %1736
-  br label %1758
-
-; <label>:1743                                    ; preds = %1727
-  %1744 = fcmp fast ogt float %936, %22
-  br i1 %1744, label %1745, label %1758
-
-; <label>:1745                                    ; preds = %1743
-  %1746 = fsub fast float %936, %22
-  %1747 = fdiv fast float %1746, %1728
-  %1748 = fptoui float %1747 to i32
-  %1749 = uitofp i32 %1748 to float
-  %1750 = fmul fast float %1749, %1728
-  %1751 = fsub fast float %1746, %1750
-  %1752 = and i32 %1748, 1
-  %1753 = icmp eq i32 %1752, 0
-  br i1 %1753, label %1754, label %1756
-
-; <label>:1754                                    ; preds = %1745
-  %1755 = fsub fast float %22, %1751
-  br label %1758
-
-; <label>:1756                                    ; preds = %1745
-  %1757 = fadd fast float %1751, %20
-  br label %1758
-
-; <label>:1758                                    ; preds = %1756, %1754, %1743, %1741, %1739
-  %1759 = phi float [ %1740, %1739 ], [ %1742, %1741 ], [ %1755, %1754 ], [ %1757, %1756 ], [ %936, %1743 ]
-  %1760 = fptoui float %1759 to i32
-  %1761 = fsub fast float %24, %20
-  %1762 = fcmp fast olt float %938, %20
-  br i1 %1762, label %1763, label %1776
-
-; <label>:1763                                    ; preds = %1758
-  %1764 = fsub fast float %20, %938
-  %1765 = fdiv fast float %1764, %1761
-  %1766 = fptoui float %1765 to i32
-  %1767 = uitofp i32 %1766 to float
-  %1768 = fmul fast float %1767, %1761
-  %1769 = fsub fast float %1764, %1768
-  %1770 = and i32 %1766, 1
-  %1771 = icmp eq i32 %1770, 0
-  br i1 %1771, label %1772, label %1774
-
-; <label>:1772                                    ; preds = %1763
-  %1773 = fadd fast float %1769, %20
-  br label %1791
-
-; <label>:1774                                    ; preds = %1763
-  %1775 = fsub fast float %24, %1769
-  br label %1791
-
-; <label>:1776                                    ; preds = %1758
-  %1777 = fcmp fast ogt float %938, %24
-  br i1 %1777, label %1778, label %1791
-
-; <label>:1778                                    ; preds = %1776
-  %1779 = fsub fast float %938, %24
-  %1780 = fdiv fast float %1779, %1761
-  %1781 = fptoui float %1780 to i32
-  %1782 = uitofp i32 %1781 to float
-  %1783 = fmul fast float %1782, %1761
-  %1784 = fsub fast float %1779, %1783
-  %1785 = and i32 %1781, 1
-  %1786 = icmp eq i32 %1785, 0
-  br i1 %1786, label %1787, label %1789
-
-; <label>:1787                                    ; preds = %1778
-  %1788 = fsub fast float %24, %1784
-  br label %1791
-
-; <label>:1789                                    ; preds = %1778
-  %1790 = fadd fast float %1784, %20
-  br label %1791
-
-; <label>:1791                                    ; preds = %1789, %1787, %1776, %1774, %1772
-  %1792 = phi float [ %1773, %1772 ], [ %1775, %1774 ], [ %1788, %1787 ], [ %1790, %1789 ], [ %938, %1776 ]
-  %1793 = fptoui float %1792 to i32
-  %1794 = uitofp i32 %1793 to float
-  %1795 = uitofp i32 %1760 to float
-  %1796 = fptoui float %45 to i32
-  %1797 = fptoui float %182 to i32
-  %1798 = fptoui float %1794 to i32
-  %1799 = fptoui float %1795 to i32
-  %1800 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1801 = extractvalue %dx.types.CBufRet.i32 %1800, 0
-  %1802 = extractvalue %dx.types.CBufRet.i32 %1800, 1
-  %1803 = extractvalue %dx.types.CBufRet.i32 %1800, 2
-  %1804 = extractvalue %dx.types.CBufRet.i32 %1800, 3
-  %1805 = mul i32 %1801, %1796
-  %1806 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1797, i32 %1802, i32 %1805)  ; IMad(a,b,c)
-  %1807 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1798, i32 %1803, i32 %1806)  ; IMad(a,b,c)
-  %1808 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1799, i32 %1804, i32 %1807)  ; IMad(a,b,c)
-  %1809 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %1808, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1810 = extractvalue %dx.types.ResRet.f16 %1809, 0
-  %1811 = fpext half %1810 to float
-  br label %1812
-
-; <label>:1812                                    ; preds = %1791, %1725, %1696, %1679, %1669
-  %1813 = phi float [ %1693, %1679 ], [ 0.000000e+00, %1669 ], [ %1724, %1696 ], [ %1811, %1791 ], [ 0.000000e+00, %1725 ]
-  br i1 %941, label %1814, label %1839
-
-; <label>:1814                                    ; preds = %1812
-  %1815 = fcmp fast oge float %1232, 0.000000e+00
-  %1816 = fptoui float %1232 to i32
-  %1817 = icmp ult i32 %1816, %13
-  %1818 = and i1 %1815, %1817
-  %1819 = fcmp fast oge float %938, 0.000000e+00
-  %1820 = and i1 %1819, %1818
-  %1821 = fptoui float %938 to i32
-  %1822 = icmp ult i32 %1821, %15
-  %1823 = and i1 %1822, %1820
-  br i1 %1823, label %1824, label %1957
-
-; <label>:1824                                    ; preds = %1814
-  %1825 = fptoui float %45 to i32
-  %1826 = fptoui float %182 to i32
-  %1827 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1828 = extractvalue %dx.types.CBufRet.i32 %1827, 0
-  %1829 = extractvalue %dx.types.CBufRet.i32 %1827, 1
-  %1830 = extractvalue %dx.types.CBufRet.i32 %1827, 2
-  %1831 = extractvalue %dx.types.CBufRet.i32 %1827, 3
-  %1832 = mul i32 %1828, %1825
-  %1833 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1826, i32 %1829, i32 %1832)  ; IMad(a,b,c)
-  %1834 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1821, i32 %1830, i32 %1833)  ; IMad(a,b,c)
-  %1835 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1816, i32 %1831, i32 %1834)  ; IMad(a,b,c)
-  %1836 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %1835, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1837 = extractvalue %dx.types.ResRet.f16 %1836, 0
-  %1838 = fpext half %1837 to float
-  br label %1957
-
-; <label>:1839                                    ; preds = %1812
-  %1840 = icmp eq i32 %940, 1
-  br i1 %1840, label %1841, label %1870
-
-; <label>:1841                                    ; preds = %1839
-  %1842 = add i32 %13, -1
-  %1843 = uitofp i32 %1842 to float
-  %1844 = call float @dx.op.binary.f32(i32 35, float %1232, float 0.000000e+00)  ; FMax(a,b)
-  %1845 = call float @dx.op.binary.f32(i32 36, float %1844, float %1843)  ; FMin(a,b)
-  %1846 = fptoui float %1845 to i32
-  %1847 = add i32 %15, -1
-  %1848 = uitofp i32 %1847 to float
-  %1849 = call float @dx.op.binary.f32(i32 35, float %938, float 0.000000e+00)  ; FMax(a,b)
-  %1850 = call float @dx.op.binary.f32(i32 36, float %1849, float %1848)  ; FMin(a,b)
-  %1851 = fptoui float %1850 to i32
-  %1852 = uitofp i32 %1851 to float
-  %1853 = uitofp i32 %1846 to float
-  %1854 = fptoui float %45 to i32
-  %1855 = fptoui float %182 to i32
-  %1856 = fptoui float %1852 to i32
-  %1857 = fptoui float %1853 to i32
-  %1858 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1859 = extractvalue %dx.types.CBufRet.i32 %1858, 0
-  %1860 = extractvalue %dx.types.CBufRet.i32 %1858, 1
-  %1861 = extractvalue %dx.types.CBufRet.i32 %1858, 2
-  %1862 = extractvalue %dx.types.CBufRet.i32 %1858, 3
-  %1863 = mul i32 %1859, %1854
-  %1864 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1855, i32 %1860, i32 %1863)  ; IMad(a,b,c)
-  %1865 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1856, i32 %1861, i32 %1864)  ; IMad(a,b,c)
-  %1866 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1857, i32 %1862, i32 %1865)  ; IMad(a,b,c)
-  %1867 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %1866, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1868 = extractvalue %dx.types.ResRet.f16 %1867, 0
-  %1869 = fpext half %1868 to float
-  br label %1957
-
-; <label>:1870                                    ; preds = %1839
-  %1871 = icmp eq i32 %940, 2
-  br i1 %1871, label %1872, label %1957
-
-; <label>:1872                                    ; preds = %1870
-  %1873 = fsub fast float %22, %20
-  %1874 = fcmp fast olt float %1232, %20
-  br i1 %1874, label %1875, label %1888
-
-; <label>:1875                                    ; preds = %1872
-  %1876 = fsub fast float %20, %1232
-  %1877 = fdiv fast float %1876, %1873
-  %1878 = fptoui float %1877 to i32
-  %1879 = uitofp i32 %1878 to float
-  %1880 = fmul fast float %1879, %1873
-  %1881 = fsub fast float %1876, %1880
-  %1882 = and i32 %1878, 1
-  %1883 = icmp eq i32 %1882, 0
-  br i1 %1883, label %1884, label %1886
-
-; <label>:1884                                    ; preds = %1875
-  %1885 = fadd fast float %1881, %20
-  br label %1903
-
-; <label>:1886                                    ; preds = %1875
-  %1887 = fsub fast float %22, %1881
-  br label %1903
-
-; <label>:1888                                    ; preds = %1872
-  %1889 = fcmp fast ogt float %1232, %22
-  br i1 %1889, label %1890, label %1903
-
-; <label>:1890                                    ; preds = %1888
-  %1891 = fsub fast float %1232, %22
-  %1892 = fdiv fast float %1891, %1873
-  %1893 = fptoui float %1892 to i32
-  %1894 = uitofp i32 %1893 to float
-  %1895 = fmul fast float %1894, %1873
-  %1896 = fsub fast float %1891, %1895
-  %1897 = and i32 %1893, 1
-  %1898 = icmp eq i32 %1897, 0
-  br i1 %1898, label %1899, label %1901
-
-; <label>:1899                                    ; preds = %1890
-  %1900 = fsub fast float %22, %1896
-  br label %1903
-
-; <label>:1901                                    ; preds = %1890
-  %1902 = fadd fast float %1896, %20
-  br label %1903
-
-; <label>:1903                                    ; preds = %1901, %1899, %1888, %1886, %1884
-  %1904 = phi float [ %1885, %1884 ], [ %1887, %1886 ], [ %1900, %1899 ], [ %1902, %1901 ], [ %1232, %1888 ]
-  %1905 = fptoui float %1904 to i32
-  %1906 = fsub fast float %24, %20
-  %1907 = fcmp fast olt float %938, %20
-  br i1 %1907, label %1908, label %1921
-
-; <label>:1908                                    ; preds = %1903
-  %1909 = fsub fast float %20, %938
-  %1910 = fdiv fast float %1909, %1906
-  %1911 = fptoui float %1910 to i32
-  %1912 = uitofp i32 %1911 to float
-  %1913 = fmul fast float %1912, %1906
-  %1914 = fsub fast float %1909, %1913
-  %1915 = and i32 %1911, 1
-  %1916 = icmp eq i32 %1915, 0
-  br i1 %1916, label %1917, label %1919
-
-; <label>:1917                                    ; preds = %1908
-  %1918 = fadd fast float %1914, %20
-  br label %1936
-
-; <label>:1919                                    ; preds = %1908
-  %1920 = fsub fast float %24, %1914
-  br label %1936
-
-; <label>:1921                                    ; preds = %1903
-  %1922 = fcmp fast ogt float %938, %24
-  br i1 %1922, label %1923, label %1936
-
-; <label>:1923                                    ; preds = %1921
-  %1924 = fsub fast float %938, %24
-  %1925 = fdiv fast float %1924, %1906
-  %1926 = fptoui float %1925 to i32
-  %1927 = uitofp i32 %1926 to float
-  %1928 = fmul fast float %1927, %1906
-  %1929 = fsub fast float %1924, %1928
-  %1930 = and i32 %1926, 1
-  %1931 = icmp eq i32 %1930, 0
-  br i1 %1931, label %1932, label %1934
-
-; <label>:1932                                    ; preds = %1923
-  %1933 = fsub fast float %24, %1929
-  br label %1936
-
-; <label>:1934                                    ; preds = %1923
-  %1935 = fadd fast float %1929, %20
-  br label %1936
-
-; <label>:1936                                    ; preds = %1934, %1932, %1921, %1919, %1917
-  %1937 = phi float [ %1918, %1917 ], [ %1920, %1919 ], [ %1933, %1932 ], [ %1935, %1934 ], [ %938, %1921 ]
-  %1938 = fptoui float %1937 to i32
-  %1939 = uitofp i32 %1938 to float
-  %1940 = uitofp i32 %1905 to float
-  %1941 = fptoui float %45 to i32
-  %1942 = fptoui float %182 to i32
-  %1943 = fptoui float %1939 to i32
-  %1944 = fptoui float %1940 to i32
-  %1945 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1946 = extractvalue %dx.types.CBufRet.i32 %1945, 0
-  %1947 = extractvalue %dx.types.CBufRet.i32 %1945, 1
-  %1948 = extractvalue %dx.types.CBufRet.i32 %1945, 2
-  %1949 = extractvalue %dx.types.CBufRet.i32 %1945, 3
-  %1950 = mul i32 %1946, %1941
-  %1951 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1942, i32 %1947, i32 %1950)  ; IMad(a,b,c)
-  %1952 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1943, i32 %1948, i32 %1951)  ; IMad(a,b,c)
-  %1953 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1944, i32 %1949, i32 %1952)  ; IMad(a,b,c)
-  %1954 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %1953, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1955 = extractvalue %dx.types.ResRet.f16 %1954, 0
-  %1956 = fpext half %1955 to float
-  br label %1957
-
-; <label>:1957                                    ; preds = %1936, %1870, %1841, %1824, %1814
-  %1958 = phi float [ %1838, %1824 ], [ 0.000000e+00, %1814 ], [ %1869, %1841 ], [ %1956, %1936 ], [ 0.000000e+00, %1870 ]
-  br i1 %941, label %1959, label %1984
-
-; <label>:1959                                    ; preds = %1957
-  %1960 = fcmp fast oge float %1378, 0.000000e+00
-  %1961 = fptoui float %1378 to i32
-  %1962 = icmp ult i32 %1961, %13
-  %1963 = and i1 %1960, %1962
-  %1964 = fcmp fast oge float %938, 0.000000e+00
-  %1965 = and i1 %1964, %1963
-  %1966 = fptoui float %938 to i32
-  %1967 = icmp ult i32 %1966, %15
-  %1968 = and i1 %1967, %1965
-  br i1 %1968, label %1969, label %2102
-
-; <label>:1969                                    ; preds = %1959
-  %1970 = fptoui float %45 to i32
-  %1971 = fptoui float %182 to i32
-  %1972 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1973 = extractvalue %dx.types.CBufRet.i32 %1972, 0
-  %1974 = extractvalue %dx.types.CBufRet.i32 %1972, 1
-  %1975 = extractvalue %dx.types.CBufRet.i32 %1972, 2
-  %1976 = extractvalue %dx.types.CBufRet.i32 %1972, 3
-  %1977 = mul i32 %1973, %1970
-  %1978 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1971, i32 %1974, i32 %1977)  ; IMad(a,b,c)
-  %1979 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1966, i32 %1975, i32 %1978)  ; IMad(a,b,c)
-  %1980 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1961, i32 %1976, i32 %1979)  ; IMad(a,b,c)
-  %1981 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %1980, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1982 = extractvalue %dx.types.ResRet.f16 %1981, 0
-  %1983 = fpext half %1982 to float
-  br label %2102
-
-; <label>:1984                                    ; preds = %1957
-  %1985 = icmp eq i32 %940, 1
-  br i1 %1985, label %1986, label %2015
-
-; <label>:1986                                    ; preds = %1984
-  %1987 = add i32 %13, -1
-  %1988 = uitofp i32 %1987 to float
-  %1989 = call float @dx.op.binary.f32(i32 35, float %1378, float 0.000000e+00)  ; FMax(a,b)
-  %1990 = call float @dx.op.binary.f32(i32 36, float %1989, float %1988)  ; FMin(a,b)
-  %1991 = fptoui float %1990 to i32
-  %1992 = add i32 %15, -1
-  %1993 = uitofp i32 %1992 to float
-  %1994 = call float @dx.op.binary.f32(i32 35, float %938, float 0.000000e+00)  ; FMax(a,b)
-  %1995 = call float @dx.op.binary.f32(i32 36, float %1994, float %1993)  ; FMin(a,b)
-  %1996 = fptoui float %1995 to i32
-  %1997 = uitofp i32 %1996 to float
-  %1998 = uitofp i32 %1991 to float
-  %1999 = fptoui float %45 to i32
-  %2000 = fptoui float %182 to i32
-  %2001 = fptoui float %1997 to i32
-  %2002 = fptoui float %1998 to i32
-  %2003 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2004 = extractvalue %dx.types.CBufRet.i32 %2003, 0
-  %2005 = extractvalue %dx.types.CBufRet.i32 %2003, 1
-  %2006 = extractvalue %dx.types.CBufRet.i32 %2003, 2
-  %2007 = extractvalue %dx.types.CBufRet.i32 %2003, 3
-  %2008 = mul i32 %2004, %1999
-  %2009 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2000, i32 %2005, i32 %2008)  ; IMad(a,b,c)
-  %2010 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2001, i32 %2006, i32 %2009)  ; IMad(a,b,c)
-  %2011 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2002, i32 %2007, i32 %2010)  ; IMad(a,b,c)
-  %2012 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %2011, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2013 = extractvalue %dx.types.ResRet.f16 %2012, 0
-  %2014 = fpext half %2013 to float
-  br label %2102
-
-; <label>:2015                                    ; preds = %1984
-  %2016 = icmp eq i32 %940, 2
-  br i1 %2016, label %2017, label %2102
-
-; <label>:2017                                    ; preds = %2015
-  %2018 = fsub fast float %22, %20
-  %2019 = fcmp fast olt float %1378, %20
-  br i1 %2019, label %2020, label %2033
-
-; <label>:2020                                    ; preds = %2017
-  %2021 = fsub fast float %20, %1378
-  %2022 = fdiv fast float %2021, %2018
-  %2023 = fptoui float %2022 to i32
-  %2024 = uitofp i32 %2023 to float
-  %2025 = fmul fast float %2024, %2018
-  %2026 = fsub fast float %2021, %2025
-  %2027 = and i32 %2023, 1
-  %2028 = icmp eq i32 %2027, 0
-  br i1 %2028, label %2029, label %2031
-
-; <label>:2029                                    ; preds = %2020
-  %2030 = fadd fast float %2026, %20
-  br label %2048
-
-; <label>:2031                                    ; preds = %2020
-  %2032 = fsub fast float %22, %2026
-  br label %2048
-
-; <label>:2033                                    ; preds = %2017
-  %2034 = fcmp fast ogt float %1378, %22
-  br i1 %2034, label %2035, label %2048
-
-; <label>:2035                                    ; preds = %2033
-  %2036 = fsub fast float %1378, %22
-  %2037 = fdiv fast float %2036, %2018
-  %2038 = fptoui float %2037 to i32
-  %2039 = uitofp i32 %2038 to float
-  %2040 = fmul fast float %2039, %2018
-  %2041 = fsub fast float %2036, %2040
-  %2042 = and i32 %2038, 1
-  %2043 = icmp eq i32 %2042, 0
-  br i1 %2043, label %2044, label %2046
-
-; <label>:2044                                    ; preds = %2035
-  %2045 = fsub fast float %22, %2041
-  br label %2048
-
-; <label>:2046                                    ; preds = %2035
-  %2047 = fadd fast float %2041, %20
-  br label %2048
-
-; <label>:2048                                    ; preds = %2046, %2044, %2033, %2031, %2029
-  %2049 = phi float [ %2030, %2029 ], [ %2032, %2031 ], [ %2045, %2044 ], [ %2047, %2046 ], [ %1378, %2033 ]
-  %2050 = fptoui float %2049 to i32
-  %2051 = fsub fast float %24, %20
-  %2052 = fcmp fast olt float %938, %20
-  br i1 %2052, label %2053, label %2066
-
-; <label>:2053                                    ; preds = %2048
-  %2054 = fsub fast float %20, %938
-  %2055 = fdiv fast float %2054, %2051
-  %2056 = fptoui float %2055 to i32
-  %2057 = uitofp i32 %2056 to float
-  %2058 = fmul fast float %2057, %2051
-  %2059 = fsub fast float %2054, %2058
-  %2060 = and i32 %2056, 1
-  %2061 = icmp eq i32 %2060, 0
-  br i1 %2061, label %2062, label %2064
-
-; <label>:2062                                    ; preds = %2053
-  %2063 = fadd fast float %2059, %20
-  br label %2081
-
-; <label>:2064                                    ; preds = %2053
-  %2065 = fsub fast float %24, %2059
-  br label %2081
-
-; <label>:2066                                    ; preds = %2048
-  %2067 = fcmp fast ogt float %938, %24
-  br i1 %2067, label %2068, label %2081
-
-; <label>:2068                                    ; preds = %2066
-  %2069 = fsub fast float %938, %24
-  %2070 = fdiv fast float %2069, %2051
-  %2071 = fptoui float %2070 to i32
-  %2072 = uitofp i32 %2071 to float
-  %2073 = fmul fast float %2072, %2051
-  %2074 = fsub fast float %2069, %2073
-  %2075 = and i32 %2071, 1
-  %2076 = icmp eq i32 %2075, 0
-  br i1 %2076, label %2077, label %2079
-
-; <label>:2077                                    ; preds = %2068
-  %2078 = fsub fast float %24, %2074
-  br label %2081
-
-; <label>:2079                                    ; preds = %2068
-  %2080 = fadd fast float %2074, %20
-  br label %2081
-
-; <label>:2081                                    ; preds = %2079, %2077, %2066, %2064, %2062
-  %2082 = phi float [ %2063, %2062 ], [ %2065, %2064 ], [ %2078, %2077 ], [ %2080, %2079 ], [ %938, %2066 ]
-  %2083 = fptoui float %2082 to i32
-  %2084 = uitofp i32 %2083 to float
-  %2085 = uitofp i32 %2050 to float
-  %2086 = fptoui float %45 to i32
-  %2087 = fptoui float %182 to i32
-  %2088 = fptoui float %2084 to i32
-  %2089 = fptoui float %2085 to i32
-  %2090 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2091 = extractvalue %dx.types.CBufRet.i32 %2090, 0
-  %2092 = extractvalue %dx.types.CBufRet.i32 %2090, 1
-  %2093 = extractvalue %dx.types.CBufRet.i32 %2090, 2
-  %2094 = extractvalue %dx.types.CBufRet.i32 %2090, 3
-  %2095 = mul i32 %2091, %2086
-  %2096 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2087, i32 %2092, i32 %2095)  ; IMad(a,b,c)
-  %2097 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2088, i32 %2093, i32 %2096)  ; IMad(a,b,c)
-  %2098 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2089, i32 %2094, i32 %2097)  ; IMad(a,b,c)
-  %2099 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %2098, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2100 = extractvalue %dx.types.ResRet.f16 %2099, 0
-  %2101 = fpext half %2100 to float
-  br label %2102
-
-; <label>:2102                                    ; preds = %2081, %2015, %1986, %1969, %1959
-  %2103 = phi float [ %1983, %1969 ], [ 0.000000e+00, %1959 ], [ %2014, %1986 ], [ %2101, %2081 ], [ 0.000000e+00, %2015 ]
-  %2104 = fadd fast float %938, 1.000000e+00
-  br i1 %941, label %2105, label %2130
-
-; <label>:2105                                    ; preds = %2102
-  %2106 = fcmp fast oge float %937, 0.000000e+00
-  %2107 = fptoui float %937 to i32
-  %2108 = icmp ult i32 %2107, %13
-  %2109 = and i1 %2106, %2108
-  %2110 = fcmp fast oge float %2104, 0.000000e+00
-  %2111 = and i1 %2110, %2109
-  %2112 = fptoui float %2104 to i32
-  %2113 = icmp ult i32 %2112, %15
-  %2114 = and i1 %2113, %2111
-  br i1 %2114, label %2115, label %2248
-
-; <label>:2115                                    ; preds = %2105
-  %2116 = fptoui float %45 to i32
-  %2117 = fptoui float %182 to i32
-  %2118 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2119 = extractvalue %dx.types.CBufRet.i32 %2118, 0
-  %2120 = extractvalue %dx.types.CBufRet.i32 %2118, 1
-  %2121 = extractvalue %dx.types.CBufRet.i32 %2118, 2
-  %2122 = extractvalue %dx.types.CBufRet.i32 %2118, 3
-  %2123 = mul i32 %2119, %2116
-  %2124 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2117, i32 %2120, i32 %2123)  ; IMad(a,b,c)
-  %2125 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2112, i32 %2121, i32 %2124)  ; IMad(a,b,c)
-  %2126 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2107, i32 %2122, i32 %2125)  ; IMad(a,b,c)
-  %2127 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %2126, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2128 = extractvalue %dx.types.ResRet.f16 %2127, 0
-  %2129 = fpext half %2128 to float
-  br label %2248
-
-; <label>:2130                                    ; preds = %2102
-  %2131 = icmp eq i32 %940, 1
-  br i1 %2131, label %2132, label %2161
-
-; <label>:2132                                    ; preds = %2130
-  %2133 = add i32 %13, -1
-  %2134 = uitofp i32 %2133 to float
-  %2135 = call float @dx.op.binary.f32(i32 35, float %937, float 0.000000e+00)  ; FMax(a,b)
-  %2136 = call float @dx.op.binary.f32(i32 36, float %2135, float %2134)  ; FMin(a,b)
-  %2137 = fptoui float %2136 to i32
-  %2138 = add i32 %15, -1
-  %2139 = uitofp i32 %2138 to float
-  %2140 = call float @dx.op.binary.f32(i32 35, float %2104, float 0.000000e+00)  ; FMax(a,b)
-  %2141 = call float @dx.op.binary.f32(i32 36, float %2140, float %2139)  ; FMin(a,b)
-  %2142 = fptoui float %2141 to i32
-  %2143 = uitofp i32 %2142 to float
-  %2144 = uitofp i32 %2137 to float
-  %2145 = fptoui float %45 to i32
-  %2146 = fptoui float %182 to i32
-  %2147 = fptoui float %2143 to i32
-  %2148 = fptoui float %2144 to i32
-  %2149 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2150 = extractvalue %dx.types.CBufRet.i32 %2149, 0
-  %2151 = extractvalue %dx.types.CBufRet.i32 %2149, 1
-  %2152 = extractvalue %dx.types.CBufRet.i32 %2149, 2
-  %2153 = extractvalue %dx.types.CBufRet.i32 %2149, 3
-  %2154 = mul i32 %2150, %2145
-  %2155 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2146, i32 %2151, i32 %2154)  ; IMad(a,b,c)
-  %2156 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2147, i32 %2152, i32 %2155)  ; IMad(a,b,c)
-  %2157 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2148, i32 %2153, i32 %2156)  ; IMad(a,b,c)
-  %2158 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %2157, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2159 = extractvalue %dx.types.ResRet.f16 %2158, 0
-  %2160 = fpext half %2159 to float
-  br label %2248
-
-; <label>:2161                                    ; preds = %2130
-  %2162 = icmp eq i32 %940, 2
-  br i1 %2162, label %2163, label %2248
-
-; <label>:2163                                    ; preds = %2161
-  %2164 = fsub fast float %22, %20
-  %2165 = fcmp fast olt float %937, %20
-  br i1 %2165, label %2166, label %2179
-
-; <label>:2166                                    ; preds = %2163
-  %2167 = fsub fast float %20, %937
-  %2168 = fdiv fast float %2167, %2164
-  %2169 = fptoui float %2168 to i32
-  %2170 = uitofp i32 %2169 to float
-  %2171 = fmul fast float %2170, %2164
-  %2172 = fsub fast float %2167, %2171
-  %2173 = and i32 %2169, 1
-  %2174 = icmp eq i32 %2173, 0
-  br i1 %2174, label %2175, label %2177
-
-; <label>:2175                                    ; preds = %2166
-  %2176 = fadd fast float %2172, %20
-  br label %2194
-
-; <label>:2177                                    ; preds = %2166
-  %2178 = fsub fast float %22, %2172
-  br label %2194
-
-; <label>:2179                                    ; preds = %2163
-  %2180 = fcmp fast ogt float %937, %22
-  br i1 %2180, label %2181, label %2194
-
-; <label>:2181                                    ; preds = %2179
-  %2182 = fsub fast float %937, %22
-  %2183 = fdiv fast float %2182, %2164
-  %2184 = fptoui float %2183 to i32
-  %2185 = uitofp i32 %2184 to float
-  %2186 = fmul fast float %2185, %2164
-  %2187 = fsub fast float %2182, %2186
-  %2188 = and i32 %2184, 1
-  %2189 = icmp eq i32 %2188, 0
-  br i1 %2189, label %2190, label %2192
-
-; <label>:2190                                    ; preds = %2181
-  %2191 = fsub fast float %22, %2187
-  br label %2194
-
-; <label>:2192                                    ; preds = %2181
-  %2193 = fadd fast float %2187, %20
-  br label %2194
-
-; <label>:2194                                    ; preds = %2192, %2190, %2179, %2177, %2175
-  %2195 = phi float [ %2176, %2175 ], [ %2178, %2177 ], [ %2191, %2190 ], [ %2193, %2192 ], [ %937, %2179 ]
-  %2196 = fptoui float %2195 to i32
-  %2197 = fsub fast float %24, %20
-  %2198 = fcmp fast olt float %2104, %20
-  br i1 %2198, label %2199, label %2212
-
-; <label>:2199                                    ; preds = %2194
-  %2200 = fsub fast float %20, %2104
-  %2201 = fdiv fast float %2200, %2197
-  %2202 = fptoui float %2201 to i32
-  %2203 = uitofp i32 %2202 to float
-  %2204 = fmul fast float %2203, %2197
-  %2205 = fsub fast float %2200, %2204
-  %2206 = and i32 %2202, 1
-  %2207 = icmp eq i32 %2206, 0
-  br i1 %2207, label %2208, label %2210
-
-; <label>:2208                                    ; preds = %2199
-  %2209 = fadd fast float %2205, %20
-  br label %2227
-
-; <label>:2210                                    ; preds = %2199
-  %2211 = fsub fast float %24, %2205
-  br label %2227
-
-; <label>:2212                                    ; preds = %2194
-  %2213 = fcmp fast ogt float %2104, %24
-  br i1 %2213, label %2214, label %2227
-
-; <label>:2214                                    ; preds = %2212
-  %2215 = fsub fast float %2104, %24
-  %2216 = fdiv fast float %2215, %2197
-  %2217 = fptoui float %2216 to i32
-  %2218 = uitofp i32 %2217 to float
-  %2219 = fmul fast float %2218, %2197
-  %2220 = fsub fast float %2215, %2219
-  %2221 = and i32 %2217, 1
-  %2222 = icmp eq i32 %2221, 0
-  br i1 %2222, label %2223, label %2225
-
-; <label>:2223                                    ; preds = %2214
-  %2224 = fsub fast float %24, %2220
-  br label %2227
-
-; <label>:2225                                    ; preds = %2214
-  %2226 = fadd fast float %2220, %20
-  br label %2227
-
-; <label>:2227                                    ; preds = %2225, %2223, %2212, %2210, %2208
-  %2228 = phi float [ %2209, %2208 ], [ %2211, %2210 ], [ %2224, %2223 ], [ %2226, %2225 ], [ %2104, %2212 ]
-  %2229 = fptoui float %2228 to i32
-  %2230 = uitofp i32 %2229 to float
-  %2231 = uitofp i32 %2196 to float
-  %2232 = fptoui float %45 to i32
-  %2233 = fptoui float %182 to i32
-  %2234 = fptoui float %2230 to i32
-  %2235 = fptoui float %2231 to i32
-  %2236 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2237 = extractvalue %dx.types.CBufRet.i32 %2236, 0
-  %2238 = extractvalue %dx.types.CBufRet.i32 %2236, 1
-  %2239 = extractvalue %dx.types.CBufRet.i32 %2236, 2
-  %2240 = extractvalue %dx.types.CBufRet.i32 %2236, 3
-  %2241 = mul i32 %2237, %2232
-  %2242 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2233, i32 %2238, i32 %2241)  ; IMad(a,b,c)
-  %2243 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2234, i32 %2239, i32 %2242)  ; IMad(a,b,c)
-  %2244 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2235, i32 %2240, i32 %2243)  ; IMad(a,b,c)
-  %2245 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %2244, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2246 = extractvalue %dx.types.ResRet.f16 %2245, 0
-  %2247 = fpext half %2246 to float
-  br label %2248
-
-; <label>:2248                                    ; preds = %2227, %2161, %2132, %2115, %2105
-  %2249 = phi float [ %2129, %2115 ], [ 0.000000e+00, %2105 ], [ %2160, %2132 ], [ %2247, %2227 ], [ 0.000000e+00, %2161 ]
-  br i1 %941, label %2250, label %2275
-
-; <label>:2250                                    ; preds = %2248
-  %2251 = fcmp fast oge float %936, 0.000000e+00
-  %2252 = fptoui float %936 to i32
-  %2253 = icmp ult i32 %2252, %13
-  %2254 = and i1 %2251, %2253
-  %2255 = fcmp fast oge float %2104, 0.000000e+00
-  %2256 = and i1 %2255, %2254
-  %2257 = fptoui float %2104 to i32
-  %2258 = icmp ult i32 %2257, %15
-  %2259 = and i1 %2258, %2256
-  br i1 %2259, label %2260, label %2393
-
-; <label>:2260                                    ; preds = %2250
-  %2261 = fptoui float %45 to i32
-  %2262 = fptoui float %182 to i32
-  %2263 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2264 = extractvalue %dx.types.CBufRet.i32 %2263, 0
-  %2265 = extractvalue %dx.types.CBufRet.i32 %2263, 1
-  %2266 = extractvalue %dx.types.CBufRet.i32 %2263, 2
-  %2267 = extractvalue %dx.types.CBufRet.i32 %2263, 3
-  %2268 = mul i32 %2264, %2261
-  %2269 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2262, i32 %2265, i32 %2268)  ; IMad(a,b,c)
-  %2270 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2257, i32 %2266, i32 %2269)  ; IMad(a,b,c)
-  %2271 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2252, i32 %2267, i32 %2270)  ; IMad(a,b,c)
-  %2272 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %2271, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2273 = extractvalue %dx.types.ResRet.f16 %2272, 0
-  %2274 = fpext half %2273 to float
-  br label %2393
-
-; <label>:2275                                    ; preds = %2248
-  %2276 = icmp eq i32 %940, 1
-  br i1 %2276, label %2277, label %2306
-
-; <label>:2277                                    ; preds = %2275
-  %2278 = add i32 %13, -1
-  %2279 = uitofp i32 %2278 to float
-  %2280 = call float @dx.op.binary.f32(i32 35, float %936, float 0.000000e+00)  ; FMax(a,b)
-  %2281 = call float @dx.op.binary.f32(i32 36, float %2280, float %2279)  ; FMin(a,b)
-  %2282 = fptoui float %2281 to i32
-  %2283 = add i32 %15, -1
-  %2284 = uitofp i32 %2283 to float
-  %2285 = call float @dx.op.binary.f32(i32 35, float %2104, float 0.000000e+00)  ; FMax(a,b)
-  %2286 = call float @dx.op.binary.f32(i32 36, float %2285, float %2284)  ; FMin(a,b)
-  %2287 = fptoui float %2286 to i32
-  %2288 = uitofp i32 %2287 to float
-  %2289 = uitofp i32 %2282 to float
-  %2290 = fptoui float %45 to i32
-  %2291 = fptoui float %182 to i32
-  %2292 = fptoui float %2288 to i32
-  %2293 = fptoui float %2289 to i32
-  %2294 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2295 = extractvalue %dx.types.CBufRet.i32 %2294, 0
-  %2296 = extractvalue %dx.types.CBufRet.i32 %2294, 1
-  %2297 = extractvalue %dx.types.CBufRet.i32 %2294, 2
-  %2298 = extractvalue %dx.types.CBufRet.i32 %2294, 3
-  %2299 = mul i32 %2295, %2290
-  %2300 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2291, i32 %2296, i32 %2299)  ; IMad(a,b,c)
-  %2301 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2292, i32 %2297, i32 %2300)  ; IMad(a,b,c)
-  %2302 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2293, i32 %2298, i32 %2301)  ; IMad(a,b,c)
-  %2303 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %2302, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2304 = extractvalue %dx.types.ResRet.f16 %2303, 0
-  %2305 = fpext half %2304 to float
-  br label %2393
-
-; <label>:2306                                    ; preds = %2275
-  %2307 = icmp eq i32 %940, 2
-  br i1 %2307, label %2308, label %2393
-
-; <label>:2308                                    ; preds = %2306
-  %2309 = fsub fast float %22, %20
-  %2310 = fcmp fast olt float %936, %20
-  br i1 %2310, label %2311, label %2324
-
-; <label>:2311                                    ; preds = %2308
-  %2312 = fsub fast float %20, %936
-  %2313 = fdiv fast float %2312, %2309
-  %2314 = fptoui float %2313 to i32
-  %2315 = uitofp i32 %2314 to float
-  %2316 = fmul fast float %2315, %2309
-  %2317 = fsub fast float %2312, %2316
-  %2318 = and i32 %2314, 1
-  %2319 = icmp eq i32 %2318, 0
-  br i1 %2319, label %2320, label %2322
-
-; <label>:2320                                    ; preds = %2311
-  %2321 = fadd fast float %2317, %20
-  br label %2339
-
-; <label>:2322                                    ; preds = %2311
-  %2323 = fsub fast float %22, %2317
-  br label %2339
-
-; <label>:2324                                    ; preds = %2308
-  %2325 = fcmp fast ogt float %936, %22
-  br i1 %2325, label %2326, label %2339
-
-; <label>:2326                                    ; preds = %2324
-  %2327 = fsub fast float %936, %22
-  %2328 = fdiv fast float %2327, %2309
-  %2329 = fptoui float %2328 to i32
-  %2330 = uitofp i32 %2329 to float
-  %2331 = fmul fast float %2330, %2309
-  %2332 = fsub fast float %2327, %2331
-  %2333 = and i32 %2329, 1
-  %2334 = icmp eq i32 %2333, 0
-  br i1 %2334, label %2335, label %2337
-
-; <label>:2335                                    ; preds = %2326
-  %2336 = fsub fast float %22, %2332
-  br label %2339
-
-; <label>:2337                                    ; preds = %2326
-  %2338 = fadd fast float %2332, %20
-  br label %2339
-
-; <label>:2339                                    ; preds = %2337, %2335, %2324, %2322, %2320
-  %2340 = phi float [ %2321, %2320 ], [ %2323, %2322 ], [ %2336, %2335 ], [ %2338, %2337 ], [ %936, %2324 ]
-  %2341 = fptoui float %2340 to i32
-  %2342 = fsub fast float %24, %20
-  %2343 = fcmp fast olt float %2104, %20
-  br i1 %2343, label %2344, label %2357
-
-; <label>:2344                                    ; preds = %2339
-  %2345 = fsub fast float %20, %2104
-  %2346 = fdiv fast float %2345, %2342
-  %2347 = fptoui float %2346 to i32
-  %2348 = uitofp i32 %2347 to float
-  %2349 = fmul fast float %2348, %2342
-  %2350 = fsub fast float %2345, %2349
-  %2351 = and i32 %2347, 1
-  %2352 = icmp eq i32 %2351, 0
-  br i1 %2352, label %2353, label %2355
-
-; <label>:2353                                    ; preds = %2344
-  %2354 = fadd fast float %2350, %20
-  br label %2372
-
-; <label>:2355                                    ; preds = %2344
-  %2356 = fsub fast float %24, %2350
-  br label %2372
-
-; <label>:2357                                    ; preds = %2339
-  %2358 = fcmp fast ogt float %2104, %24
-  br i1 %2358, label %2359, label %2372
-
-; <label>:2359                                    ; preds = %2357
-  %2360 = fsub fast float %2104, %24
-  %2361 = fdiv fast float %2360, %2342
-  %2362 = fptoui float %2361 to i32
-  %2363 = uitofp i32 %2362 to float
-  %2364 = fmul fast float %2363, %2342
-  %2365 = fsub fast float %2360, %2364
-  %2366 = and i32 %2362, 1
-  %2367 = icmp eq i32 %2366, 0
-  br i1 %2367, label %2368, label %2370
-
-; <label>:2368                                    ; preds = %2359
-  %2369 = fsub fast float %24, %2365
-  br label %2372
-
-; <label>:2370                                    ; preds = %2359
-  %2371 = fadd fast float %2365, %20
-  br label %2372
-
-; <label>:2372                                    ; preds = %2370, %2368, %2357, %2355, %2353
-  %2373 = phi float [ %2354, %2353 ], [ %2356, %2355 ], [ %2369, %2368 ], [ %2371, %2370 ], [ %2104, %2357 ]
-  %2374 = fptoui float %2373 to i32
-  %2375 = uitofp i32 %2374 to float
-  %2376 = uitofp i32 %2341 to float
-  %2377 = fptoui float %45 to i32
-  %2378 = fptoui float %182 to i32
-  %2379 = fptoui float %2375 to i32
-  %2380 = fptoui float %2376 to i32
-  %2381 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2382 = extractvalue %dx.types.CBufRet.i32 %2381, 0
-  %2383 = extractvalue %dx.types.CBufRet.i32 %2381, 1
-  %2384 = extractvalue %dx.types.CBufRet.i32 %2381, 2
-  %2385 = extractvalue %dx.types.CBufRet.i32 %2381, 3
-  %2386 = mul i32 %2382, %2377
-  %2387 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2378, i32 %2383, i32 %2386)  ; IMad(a,b,c)
-  %2388 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2379, i32 %2384, i32 %2387)  ; IMad(a,b,c)
-  %2389 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2380, i32 %2385, i32 %2388)  ; IMad(a,b,c)
-  %2390 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %2389, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2391 = extractvalue %dx.types.ResRet.f16 %2390, 0
-  %2392 = fpext half %2391 to float
-  br label %2393
-
-; <label>:2393                                    ; preds = %2372, %2306, %2277, %2260, %2250
-  %2394 = phi float [ %2274, %2260 ], [ 0.000000e+00, %2250 ], [ %2305, %2277 ], [ %2392, %2372 ], [ 0.000000e+00, %2306 ]
-  br i1 %941, label %2395, label %2420
-
-; <label>:2395                                    ; preds = %2393
-  %2396 = fcmp fast oge float %1232, 0.000000e+00
-  %2397 = fptoui float %1232 to i32
-  %2398 = icmp ult i32 %2397, %13
-  %2399 = and i1 %2396, %2398
-  %2400 = fcmp fast oge float %2104, 0.000000e+00
-  %2401 = and i1 %2400, %2399
-  %2402 = fptoui float %2104 to i32
-  %2403 = icmp ult i32 %2402, %15
-  %2404 = and i1 %2403, %2401
-  br i1 %2404, label %2405, label %2538
-
-; <label>:2405                                    ; preds = %2395
-  %2406 = fptoui float %45 to i32
-  %2407 = fptoui float %182 to i32
-  %2408 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2409 = extractvalue %dx.types.CBufRet.i32 %2408, 0
-  %2410 = extractvalue %dx.types.CBufRet.i32 %2408, 1
-  %2411 = extractvalue %dx.types.CBufRet.i32 %2408, 2
-  %2412 = extractvalue %dx.types.CBufRet.i32 %2408, 3
-  %2413 = mul i32 %2409, %2406
-  %2414 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2407, i32 %2410, i32 %2413)  ; IMad(a,b,c)
-  %2415 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2402, i32 %2411, i32 %2414)  ; IMad(a,b,c)
-  %2416 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2397, i32 %2412, i32 %2415)  ; IMad(a,b,c)
-  %2417 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %2416, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2418 = extractvalue %dx.types.ResRet.f16 %2417, 0
-  %2419 = fpext half %2418 to float
-  br label %2538
-
-; <label>:2420                                    ; preds = %2393
-  %2421 = icmp eq i32 %940, 1
-  br i1 %2421, label %2422, label %2451
-
-; <label>:2422                                    ; preds = %2420
-  %2423 = add i32 %13, -1
-  %2424 = uitofp i32 %2423 to float
-  %2425 = call float @dx.op.binary.f32(i32 35, float %1232, float 0.000000e+00)  ; FMax(a,b)
-  %2426 = call float @dx.op.binary.f32(i32 36, float %2425, float %2424)  ; FMin(a,b)
-  %2427 = fptoui float %2426 to i32
-  %2428 = add i32 %15, -1
-  %2429 = uitofp i32 %2428 to float
-  %2430 = call float @dx.op.binary.f32(i32 35, float %2104, float 0.000000e+00)  ; FMax(a,b)
-  %2431 = call float @dx.op.binary.f32(i32 36, float %2430, float %2429)  ; FMin(a,b)
-  %2432 = fptoui float %2431 to i32
-  %2433 = uitofp i32 %2432 to float
-  %2434 = uitofp i32 %2427 to float
-  %2435 = fptoui float %45 to i32
-  %2436 = fptoui float %182 to i32
-  %2437 = fptoui float %2433 to i32
-  %2438 = fptoui float %2434 to i32
-  %2439 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2440 = extractvalue %dx.types.CBufRet.i32 %2439, 0
-  %2441 = extractvalue %dx.types.CBufRet.i32 %2439, 1
-  %2442 = extractvalue %dx.types.CBufRet.i32 %2439, 2
-  %2443 = extractvalue %dx.types.CBufRet.i32 %2439, 3
-  %2444 = mul i32 %2440, %2435
-  %2445 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2436, i32 %2441, i32 %2444)  ; IMad(a,b,c)
-  %2446 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2437, i32 %2442, i32 %2445)  ; IMad(a,b,c)
-  %2447 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2438, i32 %2443, i32 %2446)  ; IMad(a,b,c)
-  %2448 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %2447, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2449 = extractvalue %dx.types.ResRet.f16 %2448, 0
-  %2450 = fpext half %2449 to float
-  br label %2538
-
-; <label>:2451                                    ; preds = %2420
-  %2452 = icmp eq i32 %940, 2
-  br i1 %2452, label %2453, label %2538
-
-; <label>:2453                                    ; preds = %2451
-  %2454 = fsub fast float %22, %20
-  %2455 = fcmp fast olt float %1232, %20
-  br i1 %2455, label %2456, label %2469
-
-; <label>:2456                                    ; preds = %2453
-  %2457 = fsub fast float %20, %1232
-  %2458 = fdiv fast float %2457, %2454
-  %2459 = fptoui float %2458 to i32
-  %2460 = uitofp i32 %2459 to float
-  %2461 = fmul fast float %2460, %2454
-  %2462 = fsub fast float %2457, %2461
-  %2463 = and i32 %2459, 1
-  %2464 = icmp eq i32 %2463, 0
-  br i1 %2464, label %2465, label %2467
-
-; <label>:2465                                    ; preds = %2456
-  %2466 = fadd fast float %2462, %20
-  br label %2484
-
-; <label>:2467                                    ; preds = %2456
-  %2468 = fsub fast float %22, %2462
-  br label %2484
-
-; <label>:2469                                    ; preds = %2453
-  %2470 = fcmp fast ogt float %1232, %22
-  br i1 %2470, label %2471, label %2484
-
-; <label>:2471                                    ; preds = %2469
-  %2472 = fsub fast float %1232, %22
-  %2473 = fdiv fast float %2472, %2454
-  %2474 = fptoui float %2473 to i32
-  %2475 = uitofp i32 %2474 to float
-  %2476 = fmul fast float %2475, %2454
-  %2477 = fsub fast float %2472, %2476
-  %2478 = and i32 %2474, 1
-  %2479 = icmp eq i32 %2478, 0
-  br i1 %2479, label %2480, label %2482
-
-; <label>:2480                                    ; preds = %2471
-  %2481 = fsub fast float %22, %2477
-  br label %2484
-
-; <label>:2482                                    ; preds = %2471
-  %2483 = fadd fast float %2477, %20
-  br label %2484
-
-; <label>:2484                                    ; preds = %2482, %2480, %2469, %2467, %2465
-  %2485 = phi float [ %2466, %2465 ], [ %2468, %2467 ], [ %2481, %2480 ], [ %2483, %2482 ], [ %1232, %2469 ]
-  %2486 = fptoui float %2485 to i32
-  %2487 = fsub fast float %24, %20
-  %2488 = fcmp fast olt float %2104, %20
-  br i1 %2488, label %2489, label %2502
-
-; <label>:2489                                    ; preds = %2484
-  %2490 = fsub fast float %20, %2104
-  %2491 = fdiv fast float %2490, %2487
-  %2492 = fptoui float %2491 to i32
-  %2493 = uitofp i32 %2492 to float
-  %2494 = fmul fast float %2493, %2487
-  %2495 = fsub fast float %2490, %2494
-  %2496 = and i32 %2492, 1
-  %2497 = icmp eq i32 %2496, 0
-  br i1 %2497, label %2498, label %2500
-
-; <label>:2498                                    ; preds = %2489
-  %2499 = fadd fast float %2495, %20
-  br label %2517
-
-; <label>:2500                                    ; preds = %2489
-  %2501 = fsub fast float %24, %2495
-  br label %2517
-
-; <label>:2502                                    ; preds = %2484
-  %2503 = fcmp fast ogt float %2104, %24
-  br i1 %2503, label %2504, label %2517
-
-; <label>:2504                                    ; preds = %2502
-  %2505 = fsub fast float %2104, %24
-  %2506 = fdiv fast float %2505, %2487
-  %2507 = fptoui float %2506 to i32
-  %2508 = uitofp i32 %2507 to float
-  %2509 = fmul fast float %2508, %2487
-  %2510 = fsub fast float %2505, %2509
-  %2511 = and i32 %2507, 1
-  %2512 = icmp eq i32 %2511, 0
-  br i1 %2512, label %2513, label %2515
-
-; <label>:2513                                    ; preds = %2504
-  %2514 = fsub fast float %24, %2510
-  br label %2517
-
-; <label>:2515                                    ; preds = %2504
-  %2516 = fadd fast float %2510, %20
-  br label %2517
-
-; <label>:2517                                    ; preds = %2515, %2513, %2502, %2500, %2498
-  %2518 = phi float [ %2499, %2498 ], [ %2501, %2500 ], [ %2514, %2513 ], [ %2516, %2515 ], [ %2104, %2502 ]
-  %2519 = fptoui float %2518 to i32
-  %2520 = uitofp i32 %2519 to float
-  %2521 = uitofp i32 %2486 to float
-  %2522 = fptoui float %45 to i32
-  %2523 = fptoui float %182 to i32
-  %2524 = fptoui float %2520 to i32
-  %2525 = fptoui float %2521 to i32
-  %2526 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2527 = extractvalue %dx.types.CBufRet.i32 %2526, 0
-  %2528 = extractvalue %dx.types.CBufRet.i32 %2526, 1
-  %2529 = extractvalue %dx.types.CBufRet.i32 %2526, 2
-  %2530 = extractvalue %dx.types.CBufRet.i32 %2526, 3
-  %2531 = mul i32 %2527, %2522
-  %2532 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2523, i32 %2528, i32 %2531)  ; IMad(a,b,c)
-  %2533 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2524, i32 %2529, i32 %2532)  ; IMad(a,b,c)
-  %2534 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2525, i32 %2530, i32 %2533)  ; IMad(a,b,c)
-  %2535 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %2534, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2536 = extractvalue %dx.types.ResRet.f16 %2535, 0
-  %2537 = fpext half %2536 to float
-  br label %2538
-
-; <label>:2538                                    ; preds = %2517, %2451, %2422, %2405, %2395
-  %2539 = phi float [ %2419, %2405 ], [ 0.000000e+00, %2395 ], [ %2450, %2422 ], [ %2537, %2517 ], [ 0.000000e+00, %2451 ]
-  br i1 %941, label %2540, label %2565
-
-; <label>:2540                                    ; preds = %2538
-  %2541 = fcmp fast oge float %1378, 0.000000e+00
-  %2542 = fptoui float %1378 to i32
-  %2543 = icmp ult i32 %2542, %13
-  %2544 = and i1 %2541, %2543
-  %2545 = fcmp fast oge float %2104, 0.000000e+00
-  %2546 = and i1 %2545, %2544
-  %2547 = fptoui float %2104 to i32
-  %2548 = icmp ult i32 %2547, %15
-  %2549 = and i1 %2548, %2546
-  br i1 %2549, label %2550, label %2683
-
-; <label>:2550                                    ; preds = %2540
-  %2551 = fptoui float %45 to i32
-  %2552 = fptoui float %182 to i32
-  %2553 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2554 = extractvalue %dx.types.CBufRet.i32 %2553, 0
-  %2555 = extractvalue %dx.types.CBufRet.i32 %2553, 1
-  %2556 = extractvalue %dx.types.CBufRet.i32 %2553, 2
-  %2557 = extractvalue %dx.types.CBufRet.i32 %2553, 3
-  %2558 = mul i32 %2554, %2551
-  %2559 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2552, i32 %2555, i32 %2558)  ; IMad(a,b,c)
-  %2560 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2547, i32 %2556, i32 %2559)  ; IMad(a,b,c)
-  %2561 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2542, i32 %2557, i32 %2560)  ; IMad(a,b,c)
-  %2562 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %2561, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2563 = extractvalue %dx.types.ResRet.f16 %2562, 0
-  %2564 = fpext half %2563 to float
-  br label %2683
-
-; <label>:2565                                    ; preds = %2538
-  %2566 = icmp eq i32 %940, 1
-  br i1 %2566, label %2567, label %2596
-
-; <label>:2567                                    ; preds = %2565
-  %2568 = add i32 %13, -1
-  %2569 = uitofp i32 %2568 to float
-  %2570 = call float @dx.op.binary.f32(i32 35, float %1378, float 0.000000e+00)  ; FMax(a,b)
-  %2571 = call float @dx.op.binary.f32(i32 36, float %2570, float %2569)  ; FMin(a,b)
-  %2572 = fptoui float %2571 to i32
-  %2573 = add i32 %15, -1
-  %2574 = uitofp i32 %2573 to float
-  %2575 = call float @dx.op.binary.f32(i32 35, float %2104, float 0.000000e+00)  ; FMax(a,b)
-  %2576 = call float @dx.op.binary.f32(i32 36, float %2575, float %2574)  ; FMin(a,b)
-  %2577 = fptoui float %2576 to i32
-  %2578 = uitofp i32 %2577 to float
-  %2579 = uitofp i32 %2572 to float
-  %2580 = fptoui float %45 to i32
-  %2581 = fptoui float %182 to i32
-  %2582 = fptoui float %2578 to i32
-  %2583 = fptoui float %2579 to i32
-  %2584 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2585 = extractvalue %dx.types.CBufRet.i32 %2584, 0
-  %2586 = extractvalue %dx.types.CBufRet.i32 %2584, 1
-  %2587 = extractvalue %dx.types.CBufRet.i32 %2584, 2
-  %2588 = extractvalue %dx.types.CBufRet.i32 %2584, 3
-  %2589 = mul i32 %2585, %2580
-  %2590 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2581, i32 %2586, i32 %2589)  ; IMad(a,b,c)
-  %2591 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2582, i32 %2587, i32 %2590)  ; IMad(a,b,c)
-  %2592 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2583, i32 %2588, i32 %2591)  ; IMad(a,b,c)
-  %2593 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %2592, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2594 = extractvalue %dx.types.ResRet.f16 %2593, 0
-  %2595 = fpext half %2594 to float
-  br label %2683
-
-; <label>:2596                                    ; preds = %2565
-  %2597 = icmp eq i32 %940, 2
-  br i1 %2597, label %2598, label %2683
-
-; <label>:2598                                    ; preds = %2596
-  %2599 = fsub fast float %22, %20
-  %2600 = fcmp fast olt float %1378, %20
-  br i1 %2600, label %2601, label %2614
-
-; <label>:2601                                    ; preds = %2598
-  %2602 = fsub fast float %20, %1378
-  %2603 = fdiv fast float %2602, %2599
-  %2604 = fptoui float %2603 to i32
-  %2605 = uitofp i32 %2604 to float
-  %2606 = fmul fast float %2605, %2599
-  %2607 = fsub fast float %2602, %2606
-  %2608 = and i32 %2604, 1
-  %2609 = icmp eq i32 %2608, 0
-  br i1 %2609, label %2610, label %2612
-
-; <label>:2610                                    ; preds = %2601
-  %2611 = fadd fast float %2607, %20
-  br label %2629
-
-; <label>:2612                                    ; preds = %2601
-  %2613 = fsub fast float %22, %2607
-  br label %2629
-
-; <label>:2614                                    ; preds = %2598
-  %2615 = fcmp fast ogt float %1378, %22
-  br i1 %2615, label %2616, label %2629
-
-; <label>:2616                                    ; preds = %2614
-  %2617 = fsub fast float %1378, %22
-  %2618 = fdiv fast float %2617, %2599
-  %2619 = fptoui float %2618 to i32
-  %2620 = uitofp i32 %2619 to float
-  %2621 = fmul fast float %2620, %2599
-  %2622 = fsub fast float %2617, %2621
-  %2623 = and i32 %2619, 1
-  %2624 = icmp eq i32 %2623, 0
-  br i1 %2624, label %2625, label %2627
-
-; <label>:2625                                    ; preds = %2616
-  %2626 = fsub fast float %22, %2622
-  br label %2629
-
-; <label>:2627                                    ; preds = %2616
-  %2628 = fadd fast float %2622, %20
-  br label %2629
-
-; <label>:2629                                    ; preds = %2627, %2625, %2614, %2612, %2610
-  %2630 = phi float [ %2611, %2610 ], [ %2613, %2612 ], [ %2626, %2625 ], [ %2628, %2627 ], [ %1378, %2614 ]
-  %2631 = fptoui float %2630 to i32
-  %2632 = fsub fast float %24, %20
-  %2633 = fcmp fast olt float %2104, %20
-  br i1 %2633, label %2634, label %2647
-
-; <label>:2634                                    ; preds = %2629
-  %2635 = fsub fast float %20, %2104
-  %2636 = fdiv fast float %2635, %2632
-  %2637 = fptoui float %2636 to i32
-  %2638 = uitofp i32 %2637 to float
-  %2639 = fmul fast float %2638, %2632
-  %2640 = fsub fast float %2635, %2639
-  %2641 = and i32 %2637, 1
-  %2642 = icmp eq i32 %2641, 0
-  br i1 %2642, label %2643, label %2645
-
-; <label>:2643                                    ; preds = %2634
-  %2644 = fadd fast float %2640, %20
-  br label %2662
-
-; <label>:2645                                    ; preds = %2634
-  %2646 = fsub fast float %24, %2640
-  br label %2662
-
-; <label>:2647                                    ; preds = %2629
-  %2648 = fcmp fast ogt float %2104, %24
-  br i1 %2648, label %2649, label %2662
-
-; <label>:2649                                    ; preds = %2647
-  %2650 = fsub fast float %2104, %24
-  %2651 = fdiv fast float %2650, %2632
-  %2652 = fptoui float %2651 to i32
-  %2653 = uitofp i32 %2652 to float
-  %2654 = fmul fast float %2653, %2632
-  %2655 = fsub fast float %2650, %2654
-  %2656 = and i32 %2652, 1
-  %2657 = icmp eq i32 %2656, 0
-  br i1 %2657, label %2658, label %2660
-
-; <label>:2658                                    ; preds = %2649
-  %2659 = fsub fast float %24, %2655
-  br label %2662
-
-; <label>:2660                                    ; preds = %2649
-  %2661 = fadd fast float %2655, %20
-  br label %2662
-
-; <label>:2662                                    ; preds = %2660, %2658, %2647, %2645, %2643
-  %2663 = phi float [ %2644, %2643 ], [ %2646, %2645 ], [ %2659, %2658 ], [ %2661, %2660 ], [ %2104, %2647 ]
-  %2664 = fptoui float %2663 to i32
-  %2665 = uitofp i32 %2664 to float
-  %2666 = uitofp i32 %2631 to float
-  %2667 = fptoui float %45 to i32
-  %2668 = fptoui float %182 to i32
-  %2669 = fptoui float %2665 to i32
-  %2670 = fptoui float %2666 to i32
-  %2671 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2672 = extractvalue %dx.types.CBufRet.i32 %2671, 0
-  %2673 = extractvalue %dx.types.CBufRet.i32 %2671, 1
-  %2674 = extractvalue %dx.types.CBufRet.i32 %2671, 2
-  %2675 = extractvalue %dx.types.CBufRet.i32 %2671, 3
-  %2676 = mul i32 %2672, %2667
-  %2677 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2668, i32 %2673, i32 %2676)  ; IMad(a,b,c)
-  %2678 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2669, i32 %2674, i32 %2677)  ; IMad(a,b,c)
-  %2679 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2670, i32 %2675, i32 %2678)  ; IMad(a,b,c)
-  %2680 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %2679, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2681 = extractvalue %dx.types.ResRet.f16 %2680, 0
-  %2682 = fpext half %2681 to float
-  br label %2683
-
-; <label>:2683                                    ; preds = %2662, %2596, %2567, %2550, %2540
-  %2684 = phi float [ %2564, %2550 ], [ 0.000000e+00, %2540 ], [ %2595, %2567 ], [ %2682, %2662 ], [ 0.000000e+00, %2596 ]
-  %2685 = fadd fast float %938, 2.000000e+00
-  br i1 %941, label %2686, label %2711
-
-; <label>:2686                                    ; preds = %2683
-  %2687 = fcmp fast oge float %937, 0.000000e+00
-  %2688 = fptoui float %937 to i32
-  %2689 = icmp ult i32 %2688, %13
-  %2690 = and i1 %2687, %2689
-  %2691 = fcmp fast oge float %2685, 0.000000e+00
-  %2692 = and i1 %2691, %2690
-  %2693 = fptoui float %2685 to i32
-  %2694 = icmp ult i32 %2693, %15
-  %2695 = and i1 %2694, %2692
-  br i1 %2695, label %2696, label %2829
-
-; <label>:2696                                    ; preds = %2686
-  %2697 = fptoui float %45 to i32
-  %2698 = fptoui float %182 to i32
-  %2699 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2700 = extractvalue %dx.types.CBufRet.i32 %2699, 0
-  %2701 = extractvalue %dx.types.CBufRet.i32 %2699, 1
-  %2702 = extractvalue %dx.types.CBufRet.i32 %2699, 2
-  %2703 = extractvalue %dx.types.CBufRet.i32 %2699, 3
-  %2704 = mul i32 %2700, %2697
-  %2705 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2698, i32 %2701, i32 %2704)  ; IMad(a,b,c)
-  %2706 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2693, i32 %2702, i32 %2705)  ; IMad(a,b,c)
-  %2707 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2688, i32 %2703, i32 %2706)  ; IMad(a,b,c)
-  %2708 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %2707, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2709 = extractvalue %dx.types.ResRet.f16 %2708, 0
-  %2710 = fpext half %2709 to float
-  br label %2829
-
-; <label>:2711                                    ; preds = %2683
-  %2712 = icmp eq i32 %940, 1
-  br i1 %2712, label %2713, label %2742
-
-; <label>:2713                                    ; preds = %2711
-  %2714 = add i32 %13, -1
-  %2715 = uitofp i32 %2714 to float
-  %2716 = call float @dx.op.binary.f32(i32 35, float %937, float 0.000000e+00)  ; FMax(a,b)
-  %2717 = call float @dx.op.binary.f32(i32 36, float %2716, float %2715)  ; FMin(a,b)
-  %2718 = fptoui float %2717 to i32
-  %2719 = add i32 %15, -1
-  %2720 = uitofp i32 %2719 to float
-  %2721 = call float @dx.op.binary.f32(i32 35, float %2685, float 0.000000e+00)  ; FMax(a,b)
-  %2722 = call float @dx.op.binary.f32(i32 36, float %2721, float %2720)  ; FMin(a,b)
-  %2723 = fptoui float %2722 to i32
-  %2724 = uitofp i32 %2723 to float
-  %2725 = uitofp i32 %2718 to float
-  %2726 = fptoui float %45 to i32
-  %2727 = fptoui float %182 to i32
-  %2728 = fptoui float %2724 to i32
-  %2729 = fptoui float %2725 to i32
-  %2730 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2731 = extractvalue %dx.types.CBufRet.i32 %2730, 0
-  %2732 = extractvalue %dx.types.CBufRet.i32 %2730, 1
-  %2733 = extractvalue %dx.types.CBufRet.i32 %2730, 2
-  %2734 = extractvalue %dx.types.CBufRet.i32 %2730, 3
-  %2735 = mul i32 %2731, %2726
-  %2736 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2727, i32 %2732, i32 %2735)  ; IMad(a,b,c)
-  %2737 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2728, i32 %2733, i32 %2736)  ; IMad(a,b,c)
-  %2738 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2729, i32 %2734, i32 %2737)  ; IMad(a,b,c)
-  %2739 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %2738, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2740 = extractvalue %dx.types.ResRet.f16 %2739, 0
-  %2741 = fpext half %2740 to float
-  br label %2829
-
-; <label>:2742                                    ; preds = %2711
-  %2743 = icmp eq i32 %940, 2
-  br i1 %2743, label %2744, label %2829
-
-; <label>:2744                                    ; preds = %2742
-  %2745 = fsub fast float %22, %20
-  %2746 = fcmp fast olt float %937, %20
-  br i1 %2746, label %2747, label %2760
-
-; <label>:2747                                    ; preds = %2744
-  %2748 = fsub fast float %20, %937
-  %2749 = fdiv fast float %2748, %2745
-  %2750 = fptoui float %2749 to i32
-  %2751 = uitofp i32 %2750 to float
-  %2752 = fmul fast float %2751, %2745
-  %2753 = fsub fast float %2748, %2752
-  %2754 = and i32 %2750, 1
-  %2755 = icmp eq i32 %2754, 0
-  br i1 %2755, label %2756, label %2758
-
-; <label>:2756                                    ; preds = %2747
-  %2757 = fadd fast float %2753, %20
-  br label %2775
-
-; <label>:2758                                    ; preds = %2747
-  %2759 = fsub fast float %22, %2753
-  br label %2775
-
-; <label>:2760                                    ; preds = %2744
-  %2761 = fcmp fast ogt float %937, %22
-  br i1 %2761, label %2762, label %2775
-
-; <label>:2762                                    ; preds = %2760
-  %2763 = fsub fast float %937, %22
-  %2764 = fdiv fast float %2763, %2745
-  %2765 = fptoui float %2764 to i32
-  %2766 = uitofp i32 %2765 to float
-  %2767 = fmul fast float %2766, %2745
-  %2768 = fsub fast float %2763, %2767
-  %2769 = and i32 %2765, 1
-  %2770 = icmp eq i32 %2769, 0
-  br i1 %2770, label %2771, label %2773
-
-; <label>:2771                                    ; preds = %2762
-  %2772 = fsub fast float %22, %2768
-  br label %2775
-
-; <label>:2773                                    ; preds = %2762
-  %2774 = fadd fast float %2768, %20
-  br label %2775
-
-; <label>:2775                                    ; preds = %2773, %2771, %2760, %2758, %2756
-  %2776 = phi float [ %2757, %2756 ], [ %2759, %2758 ], [ %2772, %2771 ], [ %2774, %2773 ], [ %937, %2760 ]
-  %2777 = fptoui float %2776 to i32
-  %2778 = fsub fast float %24, %20
-  %2779 = fcmp fast olt float %2685, %20
-  br i1 %2779, label %2780, label %2793
-
-; <label>:2780                                    ; preds = %2775
-  %2781 = fsub fast float %20, %2685
-  %2782 = fdiv fast float %2781, %2778
-  %2783 = fptoui float %2782 to i32
-  %2784 = uitofp i32 %2783 to float
-  %2785 = fmul fast float %2784, %2778
-  %2786 = fsub fast float %2781, %2785
-  %2787 = and i32 %2783, 1
-  %2788 = icmp eq i32 %2787, 0
-  br i1 %2788, label %2789, label %2791
-
-; <label>:2789                                    ; preds = %2780
-  %2790 = fadd fast float %2786, %20
-  br label %2808
-
-; <label>:2791                                    ; preds = %2780
-  %2792 = fsub fast float %24, %2786
-  br label %2808
-
-; <label>:2793                                    ; preds = %2775
-  %2794 = fcmp fast ogt float %2685, %24
-  br i1 %2794, label %2795, label %2808
-
-; <label>:2795                                    ; preds = %2793
-  %2796 = fsub fast float %2685, %24
-  %2797 = fdiv fast float %2796, %2778
-  %2798 = fptoui float %2797 to i32
-  %2799 = uitofp i32 %2798 to float
-  %2800 = fmul fast float %2799, %2778
-  %2801 = fsub fast float %2796, %2800
-  %2802 = and i32 %2798, 1
-  %2803 = icmp eq i32 %2802, 0
-  br i1 %2803, label %2804, label %2806
-
-; <label>:2804                                    ; preds = %2795
-  %2805 = fsub fast float %24, %2801
-  br label %2808
-
-; <label>:2806                                    ; preds = %2795
-  %2807 = fadd fast float %2801, %20
-  br label %2808
-
-; <label>:2808                                    ; preds = %2806, %2804, %2793, %2791, %2789
-  %2809 = phi float [ %2790, %2789 ], [ %2792, %2791 ], [ %2805, %2804 ], [ %2807, %2806 ], [ %2685, %2793 ]
-  %2810 = fptoui float %2809 to i32
-  %2811 = uitofp i32 %2810 to float
-  %2812 = uitofp i32 %2777 to float
-  %2813 = fptoui float %45 to i32
-  %2814 = fptoui float %182 to i32
-  %2815 = fptoui float %2811 to i32
-  %2816 = fptoui float %2812 to i32
-  %2817 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2818 = extractvalue %dx.types.CBufRet.i32 %2817, 0
-  %2819 = extractvalue %dx.types.CBufRet.i32 %2817, 1
-  %2820 = extractvalue %dx.types.CBufRet.i32 %2817, 2
-  %2821 = extractvalue %dx.types.CBufRet.i32 %2817, 3
-  %2822 = mul i32 %2818, %2813
-  %2823 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2814, i32 %2819, i32 %2822)  ; IMad(a,b,c)
-  %2824 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2815, i32 %2820, i32 %2823)  ; IMad(a,b,c)
-  %2825 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2816, i32 %2821, i32 %2824)  ; IMad(a,b,c)
-  %2826 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %2825, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2827 = extractvalue %dx.types.ResRet.f16 %2826, 0
-  %2828 = fpext half %2827 to float
-  br label %2829
-
-; <label>:2829                                    ; preds = %2808, %2742, %2713, %2696, %2686
-  %2830 = phi float [ %2710, %2696 ], [ 0.000000e+00, %2686 ], [ %2741, %2713 ], [ %2828, %2808 ], [ 0.000000e+00, %2742 ]
-  br i1 %941, label %2831, label %2856
-
-; <label>:2831                                    ; preds = %2829
-  %2832 = fcmp fast oge float %936, 0.000000e+00
-  %2833 = fptoui float %936 to i32
-  %2834 = icmp ult i32 %2833, %13
-  %2835 = and i1 %2832, %2834
-  %2836 = fcmp fast oge float %2685, 0.000000e+00
-  %2837 = and i1 %2836, %2835
-  %2838 = fptoui float %2685 to i32
-  %2839 = icmp ult i32 %2838, %15
-  %2840 = and i1 %2839, %2837
-  br i1 %2840, label %2841, label %2974
-
-; <label>:2841                                    ; preds = %2831
-  %2842 = fptoui float %45 to i32
-  %2843 = fptoui float %182 to i32
-  %2844 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2845 = extractvalue %dx.types.CBufRet.i32 %2844, 0
-  %2846 = extractvalue %dx.types.CBufRet.i32 %2844, 1
-  %2847 = extractvalue %dx.types.CBufRet.i32 %2844, 2
-  %2848 = extractvalue %dx.types.CBufRet.i32 %2844, 3
-  %2849 = mul i32 %2845, %2842
-  %2850 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2843, i32 %2846, i32 %2849)  ; IMad(a,b,c)
-  %2851 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2838, i32 %2847, i32 %2850)  ; IMad(a,b,c)
-  %2852 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2833, i32 %2848, i32 %2851)  ; IMad(a,b,c)
-  %2853 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %2852, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2854 = extractvalue %dx.types.ResRet.f16 %2853, 0
-  %2855 = fpext half %2854 to float
-  br label %2974
-
-; <label>:2856                                    ; preds = %2829
-  %2857 = icmp eq i32 %940, 1
-  br i1 %2857, label %2858, label %2887
-
-; <label>:2858                                    ; preds = %2856
-  %2859 = add i32 %13, -1
-  %2860 = uitofp i32 %2859 to float
-  %2861 = call float @dx.op.binary.f32(i32 35, float %936, float 0.000000e+00)  ; FMax(a,b)
-  %2862 = call float @dx.op.binary.f32(i32 36, float %2861, float %2860)  ; FMin(a,b)
-  %2863 = fptoui float %2862 to i32
-  %2864 = add i32 %15, -1
-  %2865 = uitofp i32 %2864 to float
-  %2866 = call float @dx.op.binary.f32(i32 35, float %2685, float 0.000000e+00)  ; FMax(a,b)
-  %2867 = call float @dx.op.binary.f32(i32 36, float %2866, float %2865)  ; FMin(a,b)
-  %2868 = fptoui float %2867 to i32
-  %2869 = uitofp i32 %2868 to float
-  %2870 = uitofp i32 %2863 to float
-  %2871 = fptoui float %45 to i32
-  %2872 = fptoui float %182 to i32
-  %2873 = fptoui float %2869 to i32
-  %2874 = fptoui float %2870 to i32
-  %2875 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2876 = extractvalue %dx.types.CBufRet.i32 %2875, 0
-  %2877 = extractvalue %dx.types.CBufRet.i32 %2875, 1
-  %2878 = extractvalue %dx.types.CBufRet.i32 %2875, 2
-  %2879 = extractvalue %dx.types.CBufRet.i32 %2875, 3
-  %2880 = mul i32 %2876, %2871
-  %2881 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2872, i32 %2877, i32 %2880)  ; IMad(a,b,c)
-  %2882 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2873, i32 %2878, i32 %2881)  ; IMad(a,b,c)
-  %2883 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2874, i32 %2879, i32 %2882)  ; IMad(a,b,c)
-  %2884 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %2883, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2885 = extractvalue %dx.types.ResRet.f16 %2884, 0
-  %2886 = fpext half %2885 to float
-  br label %2974
-
-; <label>:2887                                    ; preds = %2856
-  %2888 = icmp eq i32 %940, 2
-  br i1 %2888, label %2889, label %2974
-
-; <label>:2889                                    ; preds = %2887
-  %2890 = fsub fast float %22, %20
-  %2891 = fcmp fast olt float %936, %20
-  br i1 %2891, label %2892, label %2905
-
-; <label>:2892                                    ; preds = %2889
-  %2893 = fsub fast float %20, %936
-  %2894 = fdiv fast float %2893, %2890
-  %2895 = fptoui float %2894 to i32
-  %2896 = uitofp i32 %2895 to float
-  %2897 = fmul fast float %2896, %2890
-  %2898 = fsub fast float %2893, %2897
-  %2899 = and i32 %2895, 1
-  %2900 = icmp eq i32 %2899, 0
-  br i1 %2900, label %2901, label %2903
-
-; <label>:2901                                    ; preds = %2892
-  %2902 = fadd fast float %2898, %20
-  br label %2920
-
-; <label>:2903                                    ; preds = %2892
-  %2904 = fsub fast float %22, %2898
-  br label %2920
-
-; <label>:2905                                    ; preds = %2889
-  %2906 = fcmp fast ogt float %936, %22
-  br i1 %2906, label %2907, label %2920
-
-; <label>:2907                                    ; preds = %2905
-  %2908 = fsub fast float %936, %22
-  %2909 = fdiv fast float %2908, %2890
-  %2910 = fptoui float %2909 to i32
-  %2911 = uitofp i32 %2910 to float
-  %2912 = fmul fast float %2911, %2890
-  %2913 = fsub fast float %2908, %2912
-  %2914 = and i32 %2910, 1
-  %2915 = icmp eq i32 %2914, 0
-  br i1 %2915, label %2916, label %2918
-
-; <label>:2916                                    ; preds = %2907
-  %2917 = fsub fast float %22, %2913
-  br label %2920
-
-; <label>:2918                                    ; preds = %2907
-  %2919 = fadd fast float %2913, %20
-  br label %2920
-
-; <label>:2920                                    ; preds = %2918, %2916, %2905, %2903, %2901
-  %2921 = phi float [ %2902, %2901 ], [ %2904, %2903 ], [ %2917, %2916 ], [ %2919, %2918 ], [ %936, %2905 ]
-  %2922 = fptoui float %2921 to i32
-  %2923 = fsub fast float %24, %20
-  %2924 = fcmp fast olt float %2685, %20
-  br i1 %2924, label %2925, label %2938
-
-; <label>:2925                                    ; preds = %2920
-  %2926 = fsub fast float %20, %2685
-  %2927 = fdiv fast float %2926, %2923
-  %2928 = fptoui float %2927 to i32
-  %2929 = uitofp i32 %2928 to float
-  %2930 = fmul fast float %2929, %2923
-  %2931 = fsub fast float %2926, %2930
-  %2932 = and i32 %2928, 1
-  %2933 = icmp eq i32 %2932, 0
-  br i1 %2933, label %2934, label %2936
-
-; <label>:2934                                    ; preds = %2925
-  %2935 = fadd fast float %2931, %20
-  br label %2953
-
-; <label>:2936                                    ; preds = %2925
-  %2937 = fsub fast float %24, %2931
-  br label %2953
-
-; <label>:2938                                    ; preds = %2920
-  %2939 = fcmp fast ogt float %2685, %24
-  br i1 %2939, label %2940, label %2953
-
-; <label>:2940                                    ; preds = %2938
-  %2941 = fsub fast float %2685, %24
-  %2942 = fdiv fast float %2941, %2923
-  %2943 = fptoui float %2942 to i32
-  %2944 = uitofp i32 %2943 to float
-  %2945 = fmul fast float %2944, %2923
-  %2946 = fsub fast float %2941, %2945
-  %2947 = and i32 %2943, 1
-  %2948 = icmp eq i32 %2947, 0
-  br i1 %2948, label %2949, label %2951
-
-; <label>:2949                                    ; preds = %2940
-  %2950 = fsub fast float %24, %2946
-  br label %2953
-
-; <label>:2951                                    ; preds = %2940
-  %2952 = fadd fast float %2946, %20
-  br label %2953
-
-; <label>:2953                                    ; preds = %2951, %2949, %2938, %2936, %2934
-  %2954 = phi float [ %2935, %2934 ], [ %2937, %2936 ], [ %2950, %2949 ], [ %2952, %2951 ], [ %2685, %2938 ]
-  %2955 = fptoui float %2954 to i32
-  %2956 = uitofp i32 %2955 to float
-  %2957 = uitofp i32 %2922 to float
-  %2958 = fptoui float %45 to i32
-  %2959 = fptoui float %182 to i32
-  %2960 = fptoui float %2956 to i32
-  %2961 = fptoui float %2957 to i32
-  %2962 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2963 = extractvalue %dx.types.CBufRet.i32 %2962, 0
-  %2964 = extractvalue %dx.types.CBufRet.i32 %2962, 1
-  %2965 = extractvalue %dx.types.CBufRet.i32 %2962, 2
-  %2966 = extractvalue %dx.types.CBufRet.i32 %2962, 3
-  %2967 = mul i32 %2963, %2958
-  %2968 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2959, i32 %2964, i32 %2967)  ; IMad(a,b,c)
-  %2969 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2960, i32 %2965, i32 %2968)  ; IMad(a,b,c)
-  %2970 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2961, i32 %2966, i32 %2969)  ; IMad(a,b,c)
-  %2971 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %2970, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2972 = extractvalue %dx.types.ResRet.f16 %2971, 0
-  %2973 = fpext half %2972 to float
-  br label %2974
-
-; <label>:2974                                    ; preds = %2953, %2887, %2858, %2841, %2831
-  %2975 = phi float [ %2855, %2841 ], [ 0.000000e+00, %2831 ], [ %2886, %2858 ], [ %2973, %2953 ], [ 0.000000e+00, %2887 ]
-  br i1 %941, label %2976, label %3001
-
-; <label>:2976                                    ; preds = %2974
-  %2977 = fcmp fast oge float %1232, 0.000000e+00
-  %2978 = fptoui float %1232 to i32
-  %2979 = icmp ult i32 %2978, %13
-  %2980 = and i1 %2977, %2979
-  %2981 = fcmp fast oge float %2685, 0.000000e+00
-  %2982 = and i1 %2981, %2980
-  %2983 = fptoui float %2685 to i32
-  %2984 = icmp ult i32 %2983, %15
-  %2985 = and i1 %2984, %2982
-  br i1 %2985, label %2986, label %3119
-
-; <label>:2986                                    ; preds = %2976
-  %2987 = fptoui float %45 to i32
-  %2988 = fptoui float %182 to i32
-  %2989 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2990 = extractvalue %dx.types.CBufRet.i32 %2989, 0
-  %2991 = extractvalue %dx.types.CBufRet.i32 %2989, 1
-  %2992 = extractvalue %dx.types.CBufRet.i32 %2989, 2
-  %2993 = extractvalue %dx.types.CBufRet.i32 %2989, 3
-  %2994 = mul i32 %2990, %2987
-  %2995 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2988, i32 %2991, i32 %2994)  ; IMad(a,b,c)
-  %2996 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2983, i32 %2992, i32 %2995)  ; IMad(a,b,c)
-  %2997 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2978, i32 %2993, i32 %2996)  ; IMad(a,b,c)
-  %2998 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %2997, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2999 = extractvalue %dx.types.ResRet.f16 %2998, 0
-  %3000 = fpext half %2999 to float
-  br label %3119
-
-; <label>:3001                                    ; preds = %2974
-  %3002 = icmp eq i32 %940, 1
-  br i1 %3002, label %3003, label %3032
-
-; <label>:3003                                    ; preds = %3001
-  %3004 = add i32 %13, -1
-  %3005 = uitofp i32 %3004 to float
-  %3006 = call float @dx.op.binary.f32(i32 35, float %1232, float 0.000000e+00)  ; FMax(a,b)
-  %3007 = call float @dx.op.binary.f32(i32 36, float %3006, float %3005)  ; FMin(a,b)
-  %3008 = fptoui float %3007 to i32
-  %3009 = add i32 %15, -1
-  %3010 = uitofp i32 %3009 to float
-  %3011 = call float @dx.op.binary.f32(i32 35, float %2685, float 0.000000e+00)  ; FMax(a,b)
-  %3012 = call float @dx.op.binary.f32(i32 36, float %3011, float %3010)  ; FMin(a,b)
-  %3013 = fptoui float %3012 to i32
-  %3014 = uitofp i32 %3013 to float
-  %3015 = uitofp i32 %3008 to float
-  %3016 = fptoui float %45 to i32
-  %3017 = fptoui float %182 to i32
-  %3018 = fptoui float %3014 to i32
-  %3019 = fptoui float %3015 to i32
-  %3020 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3021 = extractvalue %dx.types.CBufRet.i32 %3020, 0
-  %3022 = extractvalue %dx.types.CBufRet.i32 %3020, 1
-  %3023 = extractvalue %dx.types.CBufRet.i32 %3020, 2
-  %3024 = extractvalue %dx.types.CBufRet.i32 %3020, 3
-  %3025 = mul i32 %3021, %3016
-  %3026 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3017, i32 %3022, i32 %3025)  ; IMad(a,b,c)
-  %3027 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3018, i32 %3023, i32 %3026)  ; IMad(a,b,c)
-  %3028 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3019, i32 %3024, i32 %3027)  ; IMad(a,b,c)
-  %3029 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %3028, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3030 = extractvalue %dx.types.ResRet.f16 %3029, 0
-  %3031 = fpext half %3030 to float
-  br label %3119
-
-; <label>:3032                                    ; preds = %3001
-  %3033 = icmp eq i32 %940, 2
-  br i1 %3033, label %3034, label %3119
-
-; <label>:3034                                    ; preds = %3032
-  %3035 = fsub fast float %22, %20
-  %3036 = fcmp fast olt float %1232, %20
-  br i1 %3036, label %3037, label %3050
-
-; <label>:3037                                    ; preds = %3034
-  %3038 = fsub fast float %20, %1232
-  %3039 = fdiv fast float %3038, %3035
-  %3040 = fptoui float %3039 to i32
-  %3041 = uitofp i32 %3040 to float
-  %3042 = fmul fast float %3041, %3035
-  %3043 = fsub fast float %3038, %3042
-  %3044 = and i32 %3040, 1
-  %3045 = icmp eq i32 %3044, 0
-  br i1 %3045, label %3046, label %3048
-
-; <label>:3046                                    ; preds = %3037
-  %3047 = fadd fast float %3043, %20
-  br label %3065
-
-; <label>:3048                                    ; preds = %3037
-  %3049 = fsub fast float %22, %3043
-  br label %3065
-
-; <label>:3050                                    ; preds = %3034
-  %3051 = fcmp fast ogt float %1232, %22
-  br i1 %3051, label %3052, label %3065
-
-; <label>:3052                                    ; preds = %3050
-  %3053 = fsub fast float %1232, %22
-  %3054 = fdiv fast float %3053, %3035
-  %3055 = fptoui float %3054 to i32
-  %3056 = uitofp i32 %3055 to float
-  %3057 = fmul fast float %3056, %3035
-  %3058 = fsub fast float %3053, %3057
-  %3059 = and i32 %3055, 1
-  %3060 = icmp eq i32 %3059, 0
-  br i1 %3060, label %3061, label %3063
-
-; <label>:3061                                    ; preds = %3052
-  %3062 = fsub fast float %22, %3058
-  br label %3065
-
-; <label>:3063                                    ; preds = %3052
-  %3064 = fadd fast float %3058, %20
-  br label %3065
-
-; <label>:3065                                    ; preds = %3063, %3061, %3050, %3048, %3046
-  %3066 = phi float [ %3047, %3046 ], [ %3049, %3048 ], [ %3062, %3061 ], [ %3064, %3063 ], [ %1232, %3050 ]
-  %3067 = fptoui float %3066 to i32
-  %3068 = fsub fast float %24, %20
-  %3069 = fcmp fast olt float %2685, %20
-  br i1 %3069, label %3070, label %3083
-
-; <label>:3070                                    ; preds = %3065
-  %3071 = fsub fast float %20, %2685
-  %3072 = fdiv fast float %3071, %3068
-  %3073 = fptoui float %3072 to i32
-  %3074 = uitofp i32 %3073 to float
-  %3075 = fmul fast float %3074, %3068
-  %3076 = fsub fast float %3071, %3075
-  %3077 = and i32 %3073, 1
-  %3078 = icmp eq i32 %3077, 0
-  br i1 %3078, label %3079, label %3081
-
-; <label>:3079                                    ; preds = %3070
-  %3080 = fadd fast float %3076, %20
-  br label %3098
-
-; <label>:3081                                    ; preds = %3070
-  %3082 = fsub fast float %24, %3076
-  br label %3098
-
-; <label>:3083                                    ; preds = %3065
-  %3084 = fcmp fast ogt float %2685, %24
-  br i1 %3084, label %3085, label %3098
-
-; <label>:3085                                    ; preds = %3083
-  %3086 = fsub fast float %2685, %24
-  %3087 = fdiv fast float %3086, %3068
-  %3088 = fptoui float %3087 to i32
-  %3089 = uitofp i32 %3088 to float
-  %3090 = fmul fast float %3089, %3068
-  %3091 = fsub fast float %3086, %3090
-  %3092 = and i32 %3088, 1
-  %3093 = icmp eq i32 %3092, 0
-  br i1 %3093, label %3094, label %3096
-
-; <label>:3094                                    ; preds = %3085
-  %3095 = fsub fast float %24, %3091
-  br label %3098
-
-; <label>:3096                                    ; preds = %3085
-  %3097 = fadd fast float %3091, %20
-  br label %3098
-
-; <label>:3098                                    ; preds = %3096, %3094, %3083, %3081, %3079
-  %3099 = phi float [ %3080, %3079 ], [ %3082, %3081 ], [ %3095, %3094 ], [ %3097, %3096 ], [ %2685, %3083 ]
-  %3100 = fptoui float %3099 to i32
-  %3101 = uitofp i32 %3100 to float
-  %3102 = uitofp i32 %3067 to float
-  %3103 = fptoui float %45 to i32
-  %3104 = fptoui float %182 to i32
-  %3105 = fptoui float %3101 to i32
-  %3106 = fptoui float %3102 to i32
-  %3107 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3108 = extractvalue %dx.types.CBufRet.i32 %3107, 0
-  %3109 = extractvalue %dx.types.CBufRet.i32 %3107, 1
-  %3110 = extractvalue %dx.types.CBufRet.i32 %3107, 2
-  %3111 = extractvalue %dx.types.CBufRet.i32 %3107, 3
-  %3112 = mul i32 %3108, %3103
-  %3113 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3104, i32 %3109, i32 %3112)  ; IMad(a,b,c)
-  %3114 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3105, i32 %3110, i32 %3113)  ; IMad(a,b,c)
-  %3115 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3106, i32 %3111, i32 %3114)  ; IMad(a,b,c)
-  %3116 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %3115, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3117 = extractvalue %dx.types.ResRet.f16 %3116, 0
-  %3118 = fpext half %3117 to float
-  br label %3119
-
-; <label>:3119                                    ; preds = %3098, %3032, %3003, %2986, %2976
-  %3120 = phi float [ %3000, %2986 ], [ 0.000000e+00, %2976 ], [ %3031, %3003 ], [ %3118, %3098 ], [ 0.000000e+00, %3032 ]
-  br i1 %941, label %3121, label %3146
-
-; <label>:3121                                    ; preds = %3119
-  %3122 = fcmp fast oge float %1378, 0.000000e+00
-  %3123 = fptoui float %1378 to i32
-  %3124 = icmp ult i32 %3123, %13
-  %3125 = and i1 %3122, %3124
-  %3126 = fcmp fast oge float %2685, 0.000000e+00
-  %3127 = and i1 %3126, %3125
-  %3128 = fptoui float %2685 to i32
-  %3129 = icmp ult i32 %3128, %15
-  %3130 = and i1 %3129, %3127
-  br i1 %3130, label %3131, label %3264
-
-; <label>:3131                                    ; preds = %3121
-  %3132 = fptoui float %45 to i32
-  %3133 = fptoui float %182 to i32
-  %3134 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3135 = extractvalue %dx.types.CBufRet.i32 %3134, 0
-  %3136 = extractvalue %dx.types.CBufRet.i32 %3134, 1
-  %3137 = extractvalue %dx.types.CBufRet.i32 %3134, 2
-  %3138 = extractvalue %dx.types.CBufRet.i32 %3134, 3
-  %3139 = mul i32 %3135, %3132
-  %3140 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3133, i32 %3136, i32 %3139)  ; IMad(a,b,c)
-  %3141 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3128, i32 %3137, i32 %3140)  ; IMad(a,b,c)
-  %3142 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3123, i32 %3138, i32 %3141)  ; IMad(a,b,c)
-  %3143 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %3142, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3144 = extractvalue %dx.types.ResRet.f16 %3143, 0
-  %3145 = fpext half %3144 to float
-  br label %3264
-
-; <label>:3146                                    ; preds = %3119
-  %3147 = icmp eq i32 %940, 1
-  br i1 %3147, label %3148, label %3177
-
-; <label>:3148                                    ; preds = %3146
-  %3149 = add i32 %13, -1
-  %3150 = uitofp i32 %3149 to float
-  %3151 = call float @dx.op.binary.f32(i32 35, float %1378, float 0.000000e+00)  ; FMax(a,b)
-  %3152 = call float @dx.op.binary.f32(i32 36, float %3151, float %3150)  ; FMin(a,b)
-  %3153 = fptoui float %3152 to i32
-  %3154 = add i32 %15, -1
-  %3155 = uitofp i32 %3154 to float
-  %3156 = call float @dx.op.binary.f32(i32 35, float %2685, float 0.000000e+00)  ; FMax(a,b)
-  %3157 = call float @dx.op.binary.f32(i32 36, float %3156, float %3155)  ; FMin(a,b)
-  %3158 = fptoui float %3157 to i32
-  %3159 = uitofp i32 %3158 to float
-  %3160 = uitofp i32 %3153 to float
-  %3161 = fptoui float %45 to i32
-  %3162 = fptoui float %182 to i32
-  %3163 = fptoui float %3159 to i32
-  %3164 = fptoui float %3160 to i32
-  %3165 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3166 = extractvalue %dx.types.CBufRet.i32 %3165, 0
-  %3167 = extractvalue %dx.types.CBufRet.i32 %3165, 1
-  %3168 = extractvalue %dx.types.CBufRet.i32 %3165, 2
-  %3169 = extractvalue %dx.types.CBufRet.i32 %3165, 3
-  %3170 = mul i32 %3166, %3161
-  %3171 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3162, i32 %3167, i32 %3170)  ; IMad(a,b,c)
-  %3172 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3163, i32 %3168, i32 %3171)  ; IMad(a,b,c)
-  %3173 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3164, i32 %3169, i32 %3172)  ; IMad(a,b,c)
-  %3174 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %3173, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3175 = extractvalue %dx.types.ResRet.f16 %3174, 0
-  %3176 = fpext half %3175 to float
-  br label %3264
-
-; <label>:3177                                    ; preds = %3146
-  %3178 = icmp eq i32 %940, 2
-  br i1 %3178, label %3179, label %3264
-
-; <label>:3179                                    ; preds = %3177
-  %3180 = fsub fast float %22, %20
-  %3181 = fcmp fast olt float %1378, %20
-  br i1 %3181, label %3182, label %3195
-
-; <label>:3182                                    ; preds = %3179
-  %3183 = fsub fast float %20, %1378
-  %3184 = fdiv fast float %3183, %3180
-  %3185 = fptoui float %3184 to i32
-  %3186 = uitofp i32 %3185 to float
-  %3187 = fmul fast float %3186, %3180
-  %3188 = fsub fast float %3183, %3187
-  %3189 = and i32 %3185, 1
-  %3190 = icmp eq i32 %3189, 0
-  br i1 %3190, label %3191, label %3193
-
-; <label>:3191                                    ; preds = %3182
-  %3192 = fadd fast float %3188, %20
-  br label %3210
-
-; <label>:3193                                    ; preds = %3182
-  %3194 = fsub fast float %22, %3188
-  br label %3210
-
-; <label>:3195                                    ; preds = %3179
-  %3196 = fcmp fast ogt float %1378, %22
-  br i1 %3196, label %3197, label %3210
-
-; <label>:3197                                    ; preds = %3195
-  %3198 = fsub fast float %1378, %22
-  %3199 = fdiv fast float %3198, %3180
-  %3200 = fptoui float %3199 to i32
-  %3201 = uitofp i32 %3200 to float
-  %3202 = fmul fast float %3201, %3180
-  %3203 = fsub fast float %3198, %3202
-  %3204 = and i32 %3200, 1
-  %3205 = icmp eq i32 %3204, 0
-  br i1 %3205, label %3206, label %3208
-
-; <label>:3206                                    ; preds = %3197
-  %3207 = fsub fast float %22, %3203
-  br label %3210
-
-; <label>:3208                                    ; preds = %3197
-  %3209 = fadd fast float %3203, %20
-  br label %3210
-
-; <label>:3210                                    ; preds = %3208, %3206, %3195, %3193, %3191
-  %3211 = phi float [ %3192, %3191 ], [ %3194, %3193 ], [ %3207, %3206 ], [ %3209, %3208 ], [ %1378, %3195 ]
-  %3212 = fptoui float %3211 to i32
-  %3213 = fsub fast float %24, %20
-  %3214 = fcmp fast olt float %2685, %20
-  br i1 %3214, label %3215, label %3228
-
-; <label>:3215                                    ; preds = %3210
-  %3216 = fsub fast float %20, %2685
-  %3217 = fdiv fast float %3216, %3213
-  %3218 = fptoui float %3217 to i32
-  %3219 = uitofp i32 %3218 to float
-  %3220 = fmul fast float %3219, %3213
-  %3221 = fsub fast float %3216, %3220
-  %3222 = and i32 %3218, 1
-  %3223 = icmp eq i32 %3222, 0
-  br i1 %3223, label %3224, label %3226
-
-; <label>:3224                                    ; preds = %3215
-  %3225 = fadd fast float %3221, %20
-  br label %3243
-
-; <label>:3226                                    ; preds = %3215
-  %3227 = fsub fast float %24, %3221
-  br label %3243
-
-; <label>:3228                                    ; preds = %3210
-  %3229 = fcmp fast ogt float %2685, %24
-  br i1 %3229, label %3230, label %3243
-
-; <label>:3230                                    ; preds = %3228
-  %3231 = fsub fast float %2685, %24
-  %3232 = fdiv fast float %3231, %3213
-  %3233 = fptoui float %3232 to i32
-  %3234 = uitofp i32 %3233 to float
-  %3235 = fmul fast float %3234, %3213
-  %3236 = fsub fast float %3231, %3235
-  %3237 = and i32 %3233, 1
-  %3238 = icmp eq i32 %3237, 0
-  br i1 %3238, label %3239, label %3241
-
-; <label>:3239                                    ; preds = %3230
-  %3240 = fsub fast float %24, %3236
-  br label %3243
-
-; <label>:3241                                    ; preds = %3230
-  %3242 = fadd fast float %3236, %20
-  br label %3243
-
-; <label>:3243                                    ; preds = %3241, %3239, %3228, %3226, %3224
-  %3244 = phi float [ %3225, %3224 ], [ %3227, %3226 ], [ %3240, %3239 ], [ %3242, %3241 ], [ %2685, %3228 ]
-  %3245 = fptoui float %3244 to i32
-  %3246 = uitofp i32 %3245 to float
-  %3247 = uitofp i32 %3212 to float
-  %3248 = fptoui float %45 to i32
-  %3249 = fptoui float %182 to i32
-  %3250 = fptoui float %3246 to i32
-  %3251 = fptoui float %3247 to i32
-  %3252 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3253 = extractvalue %dx.types.CBufRet.i32 %3252, 0
-  %3254 = extractvalue %dx.types.CBufRet.i32 %3252, 1
-  %3255 = extractvalue %dx.types.CBufRet.i32 %3252, 2
-  %3256 = extractvalue %dx.types.CBufRet.i32 %3252, 3
-  %3257 = mul i32 %3253, %3248
-  %3258 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3249, i32 %3254, i32 %3257)  ; IMad(a,b,c)
-  %3259 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3250, i32 %3255, i32 %3258)  ; IMad(a,b,c)
-  %3260 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3251, i32 %3256, i32 %3259)  ; IMad(a,b,c)
-  %3261 = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %3, i32 %3260, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3262 = extractvalue %dx.types.ResRet.f16 %3261, 0
-  %3263 = fpext half %3262 to float
-  br label %3264
-
-; <label>:3264                                    ; preds = %3243, %3177, %3148, %3131, %3121
-  %3265 = phi float [ %3145, %3131 ], [ 0.000000e+00, %3121 ], [ %3176, %3148 ], [ %3263, %3243 ], [ 0.000000e+00, %3177 ]
-  %3266 = call float @dx.op.unary.f32(i32 22, float %180)  ; Frc(value)
-  %3267 = call float @dx.op.unary.f32(i32 22, float %181)  ; Frc(value)
-  %3268 = fmul fast float %3267, %3267
-  %3269 = fmul fast float %3268, %3267
-  %3270 = fmul fast float %1086, -7.500000e-01
-  %3271 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2249, float %3270)  ; FMad(a,b,c)
-  %3272 = fmul fast float %1086, 1.500000e+00
-  %3273 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1668, float %3272)  ; FMad(a,b,c)
-  %3274 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2249, float %3273)  ; FMad(a,b,c)
-  %3275 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %2830, float %3274)  ; FMad(a,b,c)
-  %3276 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1668, float %3270)  ; FMad(a,b,c)
-  %3277 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2249, float %3276)  ; FMad(a,b,c)
-  %3278 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2830, float %3277)  ; FMad(a,b,c)
-  %3279 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3267, float %3268, float %3269, float %1668, float %3271, float %3275, float %3278)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3280 = fmul fast float %1231, -7.500000e-01
-  %3281 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2394, float %3280)  ; FMad(a,b,c)
-  %3282 = fmul fast float %1231, 1.500000e+00
-  %3283 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1813, float %3282)  ; FMad(a,b,c)
-  %3284 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2394, float %3283)  ; FMad(a,b,c)
-  %3285 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %2975, float %3284)  ; FMad(a,b,c)
-  %3286 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1813, float %3280)  ; FMad(a,b,c)
-  %3287 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2394, float %3286)  ; FMad(a,b,c)
-  %3288 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2975, float %3287)  ; FMad(a,b,c)
-  %3289 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3267, float %3268, float %3269, float %1813, float %3281, float %3285, float %3288)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3290 = fmul fast float %1377, -7.500000e-01
-  %3291 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2539, float %3290)  ; FMad(a,b,c)
-  %3292 = fmul fast float %1377, 1.500000e+00
-  %3293 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1958, float %3292)  ; FMad(a,b,c)
-  %3294 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2539, float %3293)  ; FMad(a,b,c)
-  %3295 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3120, float %3294)  ; FMad(a,b,c)
-  %3296 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1958, float %3290)  ; FMad(a,b,c)
-  %3297 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2539, float %3296)  ; FMad(a,b,c)
-  %3298 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3120, float %3297)  ; FMad(a,b,c)
-  %3299 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3267, float %3268, float %3269, float %1958, float %3291, float %3295, float %3298)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3300 = fmul fast float %1523, -7.500000e-01
-  %3301 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2684, float %3300)  ; FMad(a,b,c)
-  %3302 = fmul fast float %1523, 1.500000e+00
-  %3303 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %2103, float %3302)  ; FMad(a,b,c)
-  %3304 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2684, float %3303)  ; FMad(a,b,c)
-  %3305 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3265, float %3304)  ; FMad(a,b,c)
-  %3306 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %2103, float %3300)  ; FMad(a,b,c)
-  %3307 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2684, float %3306)  ; FMad(a,b,c)
-  %3308 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3265, float %3307)  ; FMad(a,b,c)
-  %3309 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3267, float %3268, float %3269, float %2103, float %3301, float %3305, float %3308)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3310 = fmul fast float %3266, %3266
-  %3311 = fmul fast float %3310, %3266
-  %3312 = fmul fast float %3279, -7.500000e-01
-  %3313 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3299, float %3312)  ; FMad(a,b,c)
-  %3314 = fmul fast float %3279, 1.500000e+00
-  %3315 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %3289, float %3314)  ; FMad(a,b,c)
-  %3316 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %3299, float %3315)  ; FMad(a,b,c)
-  %3317 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3309, float %3316)  ; FMad(a,b,c)
-  %3318 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %3289, float %3312)  ; FMad(a,b,c)
-  %3319 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %3299, float %3318)  ; FMad(a,b,c)
-  %3320 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3309, float %3319)  ; FMad(a,b,c)
-  %3321 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3266, float %3310, float %3311, float %3289, float %3313, float %3317, float %3320)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3322 = fptrunc float %3321 to half
-  call void @dx.op.rawBufferStore.f16(i32 140, %dx.types.Handle %1, i32 %8, i32 0, half %3322, half undef, half undef, half undef, i8 1, i32 2)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3323
-
-; <label>:3323                                    ; preds = %3264, %933, %919, %329, %0
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.threadId.i32(i32, i32) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32, %dx.types.Handle, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.rawBufferStore.f16(i32, %dx.types.Handle, i32, i32, half, half, half, half, i8, i32) #2
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.unary.f32(i32, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.tertiary.f32(i32, float, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.binary.f32(i32, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.dot4.f32(i32, float, float, float, float, float, float, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.tertiary.i32(i32, i32, i32, i32) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32, %dx.types.Handle, i32) #1
-
-; Function Attrs: nounwind readonly
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32, %dx.types.Handle, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind readnone
-declare double @dx.op.makeDouble.f64(i32, i32, i32) #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.valver = !{!2}
-!dx.shaderModel = !{!3}
-!dx.resources = !{!4}
-!dx.entryPoints = !{!12}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 1, i32 2}
-!2 = !{i32 1, i32 6}
-!3 = !{!"cs", i32 6, i32 2}
-!4 = !{null, !5, !10, null}
-!5 = !{!6, !7, !9}
-!6 = !{i32 0, %"class.RWStructuredBuffer<half>"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !1}
-!7 = !{i32 1, %"class.RWStructuredBuffer<double>"* undef, !"", i32 0, i32 1, i32 1, i32 12, i1 false, i1 false, i1 false, !8}
-!8 = !{i32 1, i32 8}
-!9 = !{i32 2, %"class.RWStructuredBuffer<half>"* undef, !"", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !1}
-!10 = !{!11}
-!11 = !{i32 0, %Constants* undef, !"", i32 0, i32 0, i32 1, i32 116, null}
-!12 = !{void ()* @GridSample, !"GridSample", null, !4, !13}
-!13 = !{i32 0, i64 8388660, i32 4, !14}
-!14 = !{i32 64, i32 1, i32 1}
-
-#endif
-
-const unsigned char g_GridSample[] = {
-  0x44, 0x58, 0x42, 0x43, 0x38, 0x38, 0x0c, 0x84, 0x52, 0x31, 0x30, 0xce,
-  0xb3, 0x85, 0xab, 0xa3, 0x18, 0x84, 0xee, 0xaa, 0x01, 0x00, 0x00, 0x00,
-  0x84, 0x54, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
-  0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
-  0x18, 0x01, 0x00, 0x00, 0x34, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30,
-  0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30,
-  0xa8, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-  0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0xf0, 0x8c, 0x28, 0x06, 0x96, 0x9b, 0x3e, 0xc2,
-  0xad, 0x03, 0xb4, 0x53, 0x16, 0x8b, 0xeb, 0x8b, 0x44, 0x58, 0x49, 0x4c,
-  0x48, 0x53, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0xd2, 0x14, 0x00, 0x00,
-  0x44, 0x58, 0x49, 0x4c, 0x02, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-  0x30, 0x53, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00,
-  0xc9, 0x14, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
-  0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49,
-  0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19,
-  0x1e, 0x04, 0x8b, 0x62, 0x80, 0x18, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42,
-  0xc4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x62, 0x88,
-  0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42,
-  0xe4, 0x48, 0x0e, 0x90, 0x11, 0x23, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c,
-  0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x31, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00,
-  0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07,
-  0x40, 0x02, 0xa8, 0x0d, 0x86, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20,
-  0x01, 0xd5, 0x06, 0x62, 0xf8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x90, 0x00,
-  0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42,
-  0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00,
-  0x57, 0x00, 0x00, 0x00, 0x32, 0x22, 0x88, 0x09, 0x20, 0x64, 0x85, 0x04,
-  0x13, 0x23, 0xa4, 0x84, 0x04, 0x13, 0x23, 0xe3, 0x84, 0xa1, 0x90, 0x14,
-  0x12, 0x4c, 0x8c, 0x8c, 0x0b, 0x84, 0xc4, 0x4c, 0x10, 0xc0, 0xc1, 0x08,
-  0x40, 0x09, 0x00, 0x0a, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0xc3, 0x30, 0x10,
-  0x31, 0x0a, 0x70, 0xd3, 0x70, 0xf9, 0x13, 0xf6, 0x10, 0x92, 0xbf, 0x12,
-  0xd2, 0x4a, 0x4c, 0x7e, 0x51, 0xeb, 0xa8, 0x30, 0x0c, 0xc3, 0x18, 0xe6,
-  0x08, 0x10, 0x42, 0xee, 0x19, 0x2e, 0x7f, 0xc2, 0x1e, 0x42, 0xf2, 0x43,
-  0xa0, 0x19, 0x16, 0x02, 0x05, 0x49, 0x39, 0x8e, 0x41, 0x19, 0x06, 0x64,
-  0xa0, 0xa5, 0x2c, 0xc0, 0xa0, 0x0c, 0x83, 0x61, 0x18, 0x06, 0x32, 0x50,
-  0x33, 0x03, 0x50, 0x86, 0x67, 0x78, 0x08, 0x2a, 0xc5, 0x33, 0x3c, 0xcf,
-  0x43, 0x52, 0x21, 0x9e, 0xe1, 0x79, 0x88, 0x2a, 0xca, 0x33, 0x3c, 0xcf,
-  0xf3, 0x3c, 0xcf, 0xf3, 0x90, 0x55, 0x8a, 0x61, 0x18, 0x86, 0x81, 0xb0,
-  0xa3, 0x86, 0xcb, 0x9f, 0xb0, 0x87, 0x90, 0x7c, 0x6e, 0xa3, 0x8a, 0x95,
-  0x98, 0x7c, 0xe4, 0xb6, 0x11, 0x31, 0x0c, 0xc3, 0x50, 0x08, 0x6d, 0x50,
-  0x06, 0xda, 0xe6, 0x08, 0x82, 0x62, 0x28, 0x03, 0x32, 0x0c, 0x1d, 0x79,
-  0x37, 0x0d, 0x97, 0x3f, 0x61, 0x0f, 0x21, 0xf9, 0x2b, 0x21, 0xad, 0xc4,
-  0xe4, 0x23, 0xb7, 0x8d, 0x8a, 0x61, 0x18, 0x86, 0xa1, 0x1c, 0xe0, 0xa0,
-  0x0c, 0x03, 0x32, 0x50, 0x38, 0x04, 0x50, 0x88, 0x71, 0x18, 0x06, 0x22,
-  0x07, 0x02, 0x66, 0xf2, 0xc6, 0x81, 0x1d, 0xc2, 0x61, 0x1e, 0xe6, 0xc1,
-  0x0d, 0x64, 0xe1, 0x16, 0x66, 0x81, 0x1e, 0xe4, 0xa1, 0x1e, 0xc6, 0x81,
-  0x1e, 0xea, 0x41, 0x1e, 0xca, 0x81, 0x1c, 0x44, 0xa1, 0x1e, 0xcc, 0xc1,
-  0x1c, 0xca, 0x41, 0x1e, 0xf8, 0x00, 0x1d, 0xc2, 0x81, 0x1d, 0xcc, 0xc1,
-  0x0f, 0x50, 0x60, 0xd0, 0x39, 0x13, 0x18, 0x8c, 0x03, 0x3b, 0x84, 0xc3,
-  0x3c, 0xcc, 0x83, 0x1b, 0xc8, 0xc2, 0x2d, 0xcc, 0x02, 0x3d, 0xc8, 0x43,
-  0x3d, 0x8c, 0x03, 0x3d, 0xd4, 0x83, 0x3c, 0x94, 0x03, 0x39, 0x88, 0x42,
-  0x3d, 0x98, 0x83, 0x39, 0x94, 0x83, 0x3c, 0xf0, 0x01, 0x39, 0xbc, 0x43,
-  0x3d, 0x88, 0x03, 0x3b, 0x94, 0x83, 0x1f, 0xa0, 0x60, 0xa4, 0x74, 0x18,
-  0x81, 0x18, 0x2e, 0xe1, 0x9c, 0x46, 0x9a, 0x80, 0x66, 0x92, 0xd0, 0x32,
-  0x0c, 0xc3, 0xb0, 0xae, 0xeb, 0xba, 0xae, 0x03, 0xb1, 0x73, 0x04, 0xa0,
-  0x30, 0x05, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87,
-  0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xae, 0x50,
-  0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30,
-  0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0,
-  0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x78, 0xd0,
-  0x06, 0xe9, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x6d, 0x90,
-  0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x60,
-  0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d, 0x60, 0x0e, 0x71, 0x60,
-  0x07, 0x7a, 0x10, 0x07, 0x76, 0xd0, 0x06, 0xe6, 0x30, 0x07, 0x72, 0xa0,
-  0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60,
-  0x07, 0x74, 0xd0, 0x06, 0xee, 0x80, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xa0,
-  0x07, 0x73, 0x20, 0x07, 0x7a, 0x60, 0x07, 0x74, 0x30, 0xe4, 0x09, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0x43,
-  0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90,
-  0x67, 0x01, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-  0x21, 0x4f, 0x03, 0x04, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x43, 0x1e, 0x08, 0x08, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x86, 0x3c, 0x12, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x0c, 0x79, 0x28, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x18, 0xf2, 0x58, 0x40, 0x00, 0x04, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xe4, 0xc1, 0x80, 0x00, 0x08, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0xb3, 0x01, 0x01, 0x20,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90, 0xc7, 0x03, 0x02,
-  0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x21, 0x4f, 0x18,
-  0x00, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90,
-  0x87, 0x0c, 0x80, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x20, 0x0b, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14,
-  0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47, 0xc6, 0x04, 0x43, 0x1a,
-  0x4a, 0xa0, 0x08, 0x8a, 0x61, 0x04, 0xa0, 0x30, 0x0a, 0xa2, 0xd0, 0x03,
-  0x0a, 0xa1, 0x00, 0x03, 0xa8, 0x1b, 0x01, 0x20, 0xb7, 0xd0, 0x01, 0x01,
-  0x11, 0x48, 0x9d, 0x01, 0xa0, 0x76, 0x06, 0x80, 0xd0, 0x19, 0x00, 0x00,
-  0x79, 0x18, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90,
-  0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1,
-  0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99, 0x71, 0xb9, 0x01, 0x41,
-  0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a,
-  0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9,
-  0x10, 0x04, 0x13, 0x84, 0xa1, 0x99, 0x20, 0x0c, 0xce, 0x06, 0x61, 0x20,
-  0x26, 0x08, 0xc3, 0xb3, 0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06,
-  0xc4, 0x20, 0x26, 0x08, 0x03, 0x34, 0x41, 0x40, 0x03, 0x8d, 0xc0, 0x04,
-  0x61, 0x88, 0x26, 0x08, 0x9d, 0xb5, 0x61, 0x51, 0x16, 0x46, 0x51, 0x86,
-  0xc6, 0x71, 0x9c, 0x62, 0x82, 0xa0, 0x06, 0xd8, 0x04, 0x61, 0x90, 0x36,
-  0x08, 0x43, 0xb4, 0x61, 0x19, 0x20, 0x46, 0x19, 0x86, 0xc6, 0x71, 0x1c,
-  0x69, 0xc3, 0x42, 0x2c, 0x8c, 0x42, 0x0c, 0x8d, 0xe3, 0x38, 0xc5, 0x86,
-  0xe1, 0x99, 0xa8, 0x09, 0x42, 0x1b, 0x64, 0x13, 0x84, 0x61, 0xda, 0x80,
-  0x28, 0x16, 0xa3, 0x28, 0xc3, 0x05, 0x6c, 0x08, 0xb0, 0x0d, 0x04, 0x50,
-  0x65, 0xc0, 0x04, 0x41, 0x00, 0xa8, 0x1c, 0xc9, 0xa5, 0x91, 0x4d, 0x85,
-  0xb5, 0xc1, 0xb1, 0x95, 0x4d, 0x10, 0xdc, 0xe0, 0x9a, 0x20, 0x0c, 0xd4,
-  0x04, 0x61, 0xa8, 0x36, 0x0c, 0xdf, 0x30, 0x6c, 0x20, 0x94, 0xce, 0x03,
-  0x83, 0x0d, 0xc5, 0xc6, 0x01, 0x5a, 0x18, 0x54, 0x61, 0x63, 0xb3, 0x6b,
-  0x73, 0x49, 0x23, 0x2b, 0x73, 0xa3, 0x9b, 0x12, 0x04, 0x55, 0xc8, 0xf0,
-  0x5c, 0xec, 0xca, 0xe4, 0xe6, 0xd2, 0xde, 0xdc, 0xa6, 0x04, 0x44, 0x13,
-  0x32, 0x3c, 0x17, 0xbb, 0x30, 0x36, 0xbb, 0x32, 0xb9, 0x29, 0x81, 0x51,
-  0x87, 0x0c, 0xcf, 0x65, 0x0e, 0x2d, 0x8c, 0xac, 0x4c, 0xae, 0xe9, 0x8d,
-  0xac, 0x8c, 0x6d, 0x4a, 0x80, 0x94, 0x21, 0xc3, 0x73, 0x91, 0x2b, 0x9b,
-  0x7b, 0xab, 0x93, 0x1b, 0x2b, 0x9b, 0x9b, 0x12, 0x64, 0x75, 0xc8, 0xf0,
-  0x5c, 0xca, 0xdc, 0xe8, 0xe4, 0xf2, 0xa0, 0xde, 0xd2, 0xdc, 0xe8, 0xe6,
-  0xa6, 0x04, 0x61, 0x00, 0x79, 0x18, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00,
-  0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88,
-  0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73,
-  0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e,
-  0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30,
-  0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8,
-  0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b,
-  0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76,
-  0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e,
-  0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e,
-  0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61,
-  0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4,
-  0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76,
-  0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37,
-  0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76,
-  0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71,
-  0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e,
-  0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1,
-  0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61,
-  0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90,
-  0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8,
-  0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc,
-  0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4,
-  0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87, 0x76, 0x80, 0x87, 0x19,
-  0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20, 0x0e, 0xe7, 0xe0, 0x06,
-  0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f,
-  0xf4, 0x30, 0x83, 0x81, 0xc8, 0x01, 0x1f, 0xdc, 0x40, 0x1c, 0xe4, 0xa1,
-  0x1c, 0xc2, 0x61, 0x1d, 0xdc, 0x40, 0x1c, 0xe4, 0x01, 0x00, 0x00, 0x00,
-  0x71, 0x20, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x06, 0xa0, 0x80, 0x11,
-  0x32, 0xb0, 0x00, 0xf3, 0x2c, 0x84, 0x19, 0x40, 0xc3, 0xe5, 0x3b, 0x8f,
-  0x1f, 0x20, 0x0d, 0x10, 0x61, 0x7e, 0x71, 0xdb, 0x96, 0xb0, 0x0d, 0x97,
-  0xef, 0x3c, 0xbe, 0x10, 0x50, 0x45, 0x41, 0x44, 0xa5, 0x03, 0x0c, 0x25,
-  0x61, 0x00, 0x02, 0xe6, 0x23, 0xb7, 0x6d, 0x0a, 0xd2, 0x70, 0xf9, 0xce,
-  0xe3, 0x0b, 0x11, 0x01, 0x4c, 0x44, 0x08, 0x34, 0xc3, 0x42, 0xd8, 0x81,
-  0x33, 0x5c, 0xbe, 0xf3, 0xf8, 0x83, 0x33, 0xe1, 0x7e, 0x71, 0xdb, 0xc6,
-  0x40, 0x0d, 0x97, 0xef, 0x3c, 0x3e, 0x03, 0x28, 0x44, 0xe7, 0x50, 0xc1,
-  0x42, 0xf8, 0x85, 0x8e, 0x9b, 0xc0, 0x35, 0x5c, 0xbe, 0xf3, 0xf8, 0x11,
-  0x60, 0x6d, 0x54, 0x51, 0x10, 0x51, 0xe9, 0x00, 0x83, 0x5f, 0xd4, 0xba,
-  0x2d, 0x5c, 0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x01, 0xd6, 0x46, 0x15, 0x05,
-  0x11, 0x95, 0x0e, 0x30, 0xf8, 0xc8, 0x6d, 0xdb, 0x00, 0x36, 0x5c, 0xbe,
-  0xf3, 0xf8, 0x11, 0x60, 0x6d, 0x54, 0x51, 0x10, 0x11, 0x3b, 0x39, 0x11,
-  0xe1, 0x17, 0xb5, 0x6e, 0x05, 0xd2, 0x70, 0xf9, 0xce, 0xe3, 0x4f, 0x44,
-  0x34, 0x21, 0x40, 0x84, 0xf9, 0xc5, 0x6d, 0x1b, 0x82, 0x34, 0x5c, 0xbe,
-  0xf3, 0xf8, 0x13, 0x11, 0x4d, 0x08, 0x10, 0x61, 0x3e, 0x72, 0xdb, 0x16,
-  0x20, 0x0d, 0x97, 0xef, 0x3c, 0xfe, 0x74, 0x44, 0x04, 0x30, 0x88, 0x83,
-  0x8f, 0xdc, 0xb6, 0x11, 0x3c, 0xc3, 0xe5, 0x3b, 0x8f, 0x4f, 0x35, 0x40,
-  0x84, 0xf9, 0xc5, 0x6d, 0x03, 0x00, 0x00, 0x00, 0x61, 0x20, 0x00, 0x00,
-  0x15, 0x13, 0x00, 0x00, 0x13, 0x04, 0x24, 0x14, 0x0b, 0x04, 0x00, 0x00,
-  0x1e, 0x00, 0x00, 0x00, 0x34, 0x14, 0x58, 0xd9, 0x95, 0xa5, 0x40, 0x0d,
-  0x94, 0x51, 0x21, 0x15, 0x57, 0xc1, 0x95, 0x5c, 0xd9, 0x14, 0x4b, 0x61,
-  0x0a, 0x94, 0x72, 0x40, 0xd1, 0x94, 0x6e, 0x40, 0x39, 0x94, 0x02, 0x21,
-  0x45, 0x50, 0x02, 0x65, 0x40, 0xc6, 0x0c, 0x00, 0x3d, 0x23, 0x00, 0x63,
-  0x04, 0x20, 0x08, 0x82, 0xf8, 0x37, 0x46, 0x00, 0x82, 0x20, 0x48, 0xff,
-  0xc2, 0x18, 0x01, 0x08, 0x82, 0x20, 0xfd, 0x8d, 0x11, 0x80, 0x20, 0x08,
-  0xf2, 0xdf, 0x18, 0x01, 0x08, 0x82, 0x20, 0xfe, 0x0b, 0x63, 0x04, 0x20,
-  0x08, 0x82, 0x21, 0x38, 0x8c, 0x11, 0x80, 0x20, 0x08, 0xea, 0xdf, 0x18,
-  0x01, 0x08, 0x82, 0xa0, 0xfe, 0x0b, 0x63, 0x04, 0x20, 0x08, 0x82, 0xf0,
-  0x37, 0x46, 0x00, 0x82, 0x20, 0x08, 0xff, 0xc2, 0x18, 0x01, 0x08, 0x82,
-  0x20, 0x08, 0x06, 0x00, 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0xe0, 0xc9,
-  0x41, 0xf6, 0xb8, 0x81, 0x1b, 0x98, 0xc1, 0x88, 0x41, 0x02, 0x80, 0x20,
-  0x18, 0x78, 0x73, 0xa0, 0x41, 0x70, 0x00, 0x07, 0x67, 0x30, 0x62, 0x90,
-  0x00, 0x20, 0x08, 0x06, 0x1e, 0x1d, 0x6c, 0x91, 0x1b, 0xb8, 0x01, 0x1a,
-  0x8c, 0x18, 0x24, 0x00, 0x08, 0x82, 0x81, 0x57, 0x07, 0xdc, 0xf4, 0x06,
-  0x6f, 0x90, 0x06, 0x23, 0x06, 0x06, 0x00, 0x82, 0x60, 0x40, 0xfc, 0xc1,
-  0x05, 0x07, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xe1, 0x41, 0x19,
-  0x08, 0x71, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c, 0x30, 0x9a, 0x30, 0x04,
-  0xc3, 0x0d, 0x42, 0x40, 0x06, 0xb3, 0x0c, 0xc1, 0x08, 0x05, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0xb0, 0xf5, 0x81, 0x1a, 0x1c, 0x79, 0x30, 0x9a,
-  0x10, 0x0c, 0x17, 0x3c, 0x35, 0x9a, 0x30, 0x08, 0x17, 0x3c, 0x35, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x9b, 0x28, 0xbc, 0x01, 0x13, 0x06, 0xa3,
-  0x09, 0x01, 0x30, 0xdc, 0x10, 0xf4, 0x01, 0x18, 0x4c, 0x37, 0x50, 0x5e,
-  0x30, 0xdd, 0x50, 0x69, 0x42, 0x21, 0x01, 0x4c, 0x37, 0x5c, 0x1c, 0x51,
-  0x48, 0x00, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xa9, 0xc2, 0x1d,
-  0x50, 0x68, 0x30, 0x9a, 0x10, 0x04, 0xa3, 0x09, 0x82, 0x30, 0x9a, 0x30,
-  0x0c, 0x15, 0x08, 0x52, 0x03, 0x21, 0x15, 0x0c, 0x52, 0x57, 0x30, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xc9, 0xc2, 0x1f, 0x70, 0xac, 0x30,
-  0x9a, 0x10, 0x00, 0x15, 0x0c, 0x52, 0x5b, 0x10, 0x15, 0x20, 0x33, 0x9a,
-  0x50, 0x04, 0x15, 0x08, 0x52, 0x44, 0x10, 0x15, 0x34, 0x33, 0x9a, 0x90,
-  0x08, 0x15, 0x08, 0x52, 0x44, 0x10, 0xd7, 0x3c, 0x75, 0xc5, 0x53, 0x37,
-  0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x5b, 0x38, 0xb8, 0xc2,
-  0x1a, 0xd8, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2,
-  0x20, 0x8c, 0x26, 0x10, 0xc3, 0x11, 0x4f, 0x1d, 0xf1, 0xd4, 0x11, 0x4f,
-  0x1d, 0xf1, 0xd4, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x58, 0xec, 0x10,
-  0x0b, 0xcc, 0xa2, 0x8c, 0x02, 0x31, 0x08, 0x81, 0x09, 0x01, 0x7c, 0x4e,
-  0x18, 0x66, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0xc2, 0x60, 0x1d, 0x72,
-  0x21, 0x0f, 0x02, 0x73, 0x40, 0x05, 0x72, 0x18, 0x4d, 0x08, 0x80, 0xd1,
-  0x04, 0x21, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x32, 0x68, 0x07,
-  0x59, 0x10, 0x82, 0x0b, 0x9e, 0xbb, 0x63, 0x98, 0x11, 0x03, 0x05, 0x00,
-  0x41, 0x30, 0x08, 0x83, 0x78, 0xf8, 0x85, 0x3f, 0x08, 0xd8, 0xc1, 0x15,
-  0xd4, 0x61, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0xc8, 0x60, 0x1e, 0x70, 0x41, 0x08, 0x2e, 0x78, 0x6e,
-  0xb8, 0xa1, 0x0e, 0xe8, 0x01, 0x0c, 0x0c, 0x89, 0x05, 0xf8, 0xd8, 0x20,
-  0x0b, 0xf0, 0x99, 0x65, 0x10, 0x86, 0xc1, 0x84, 0x55, 0x90, 0x8f, 0x09,
-  0xac, 0x20, 0x1f, 0xf3, 0x83, 0x58, 0x80, 0x8f, 0xf5, 0x81, 0x2c, 0xc0,
-  0xc7, 0x08, 0x41, 0x3e, 0x46, 0x08, 0xf2, 0x99, 0x25, 0x20, 0x4c, 0x14,
-  0x10, 0xf9, 0x18, 0x12, 0x0a, 0xf2, 0x31, 0xe1, 0x16, 0xe0, 0x63, 0x02,
-  0x2e, 0xc0, 0xc7, 0x84, 0x5a, 0x90, 0x8f, 0x09, 0xb6, 0x20, 0x9f, 0x59,
-  0x02, 0x62, 0xa0, 0xe2, 0x81, 0x04, 0x62, 0x18, 0xa8, 0x78, 0x20, 0x81,
-  0x18, 0x46, 0x13, 0x62, 0x41, 0x18, 0x6e, 0x08, 0x4c, 0x02, 0x0c, 0x66,
-  0x19, 0x0a, 0x23, 0x18, 0x31, 0x30, 0x00, 0x10, 0x04, 0x03, 0x08, 0x26,
-  0xd8, 0x81, 0x18, 0x31, 0x30, 0x00, 0x10, 0x04, 0x03, 0x28, 0x26, 0xda,
-  0x81, 0x98, 0x25, 0x30, 0x06, 0x2a, 0x1e, 0xa2, 0x60, 0x88, 0x81, 0x8a,
-  0x87, 0x28, 0x18, 0x62, 0x38, 0x42, 0x50, 0x05, 0xe2, 0x1b, 0x8e, 0x18,
-  0x52, 0x41, 0xf8, 0x4a, 0x08, 0x76, 0x38, 0x82, 0x68, 0x05, 0xe2, 0x2b,
-  0x21, 0xd8, 0xe1, 0x08, 0x63, 0x15, 0x84, 0xaf, 0x02, 0x61, 0x67, 0x19,
-  0x0e, 0x2d, 0x18, 0x4d, 0xf0, 0x85, 0x61, 0xb8, 0x21, 0x98, 0x09, 0x30,
-  0x98, 0x65, 0x40, 0x92, 0xa0, 0x74, 0x61, 0x24, 0xe0, 0x82, 0xa7, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0xea, 0x09, 0x92, 0x68, 0xe6, 0x61,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0x9f, 0x20, 0x89, 0x40, 0x28,
-  0x5e, 0x38, 0x09, 0xb8, 0xe0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30,
-  0xa0, 0xc2, 0x02, 0x25, 0xa0, 0x7b, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x03, 0x4a, 0x2c, 0x50, 0x22, 0x10, 0x66, 0x09, 0xb4, 0xe1, 0x06, 0x65,
-  0x27, 0xc0, 0x60, 0x96, 0x41, 0xd1, 0x02, 0xd3, 0x05, 0x5e, 0x88, 0xcf,
-  0x2c, 0xc3, 0xe2, 0x4c, 0xd6, 0x0b, 0x55, 0x7c, 0x2c, 0x10, 0xe8, 0x73,
-  0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14,
-  0x41, 0x16, 0x3a, 0xdc, 0x10, 0x88, 0x05, 0x18, 0xcc, 0x32, 0x30, 0x4d,
-  0x60, 0x43, 0x39, 0xc0, 0x67, 0x96, 0x40, 0x32, 0x72, 0x20, 0xe2, 0x33,
-  0x4b, 0x20, 0xcd, 0x32, 0x3c, 0x12, 0x67, 0x5f, 0x39, 0xc4, 0xc7, 0x02,
-  0x86, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x3c, 0xf2, 0xb1, 0x22,
-  0x88, 0x4f, 0x11, 0x6e, 0xa1, 0xc3, 0x0d, 0x01, 0x5b, 0x80, 0xc1, 0x2c,
-  0x03, 0x14, 0x05, 0xd6, 0x0e, 0x43, 0x7c, 0x66, 0x09, 0x24, 0x23, 0xe0,
-  0x01, 0x3e, 0xb3, 0x04, 0xd2, 0x40, 0xcb, 0x83, 0x31, 0x56, 0x43, 0x40,
-  0x42, 0x24, 0x0b, 0x8e, 0xb9, 0x83, 0x3c, 0xc4, 0x67, 0x96, 0x61, 0xb2,
-  0xcc, 0xc0, 0xe6, 0x41, 0x0d, 0xe2, 0x63, 0x81, 0x40, 0x9f, 0x0b, 0x86,
-  0xb9, 0xe0, 0x29, 0x0b, 0x0a, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0xbd,
-  0xd0, 0xe1, 0x86, 0x00, 0x2f, 0xc0, 0x60, 0x96, 0x81, 0xaa, 0x02, 0x1b,
-  0xf6, 0x01, 0x3e, 0xb3, 0x04, 0x9a, 0xe1, 0x03, 0x11, 0x9f, 0x59, 0x02,
-  0x6d, 0x96, 0xe1, 0xd2, 0xdc, 0xc0, 0xe8, 0x20, 0x1f, 0xe2, 0x63, 0x01,
-  0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x1e, 0xf9, 0x58, 0x11,
-  0xc4, 0xa7, 0x08, 0xd2, 0xd0, 0xe1, 0x86, 0x40, 0x34, 0xc0, 0x60, 0x96,
-  0x01, 0xcb, 0x02, 0x0b, 0x89, 0x21, 0x3e, 0xb3, 0x04, 0x9a, 0x11, 0x26,
-  0x01, 0x9f, 0x59, 0x02, 0x6d, 0xa0, 0xe8, 0x11, 0x07, 0xc4, 0x1f, 0x12,
-  0x7f, 0x30, 0xd8, 0x20, 0x63, 0x03, 0x8c, 0x0d, 0x2c, 0x36, 0xa8, 0xd8,
-  0x80, 0x1a, 0x28, 0x7a, 0x78, 0x01, 0xf1, 0x87, 0xc4, 0x1f, 0x0c, 0x22,
-  0x33, 0x30, 0x7f, 0xb0, 0xb0, 0x4a, 0xa3, 0x0e, 0x1f, 0x9e, 0x9a, 0x65,
-  0xd8, 0xe6, 0xa0, 0x14, 0x46, 0x13, 0x6e, 0x62, 0x18, 0x6e, 0x08, 0x52,
-  0x03, 0x0c, 0x66, 0x19, 0x38, 0x2f, 0x18, 0x8e, 0x28, 0xd4, 0x62, 0xf8,
-  0xce, 0x18, 0x66, 0xb8, 0x21, 0xa8, 0x09, 0x32, 0xa8, 0x21, 0xd0, 0xe1,
-  0x08, 0xc4, 0x2d, 0x86, 0xaf, 0x02, 0x41, 0x4f, 0x19, 0x66, 0xb8, 0x21,
-  0xc0, 0x09, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x86, 0x4e, 0x0e, 0x82, 0xe3,
-  0x87, 0x61, 0xae, 0x19, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x36,
-  0xde, 0x48, 0x0d, 0xb3, 0xb8, 0x8d, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10,
-  0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31,
-  0x40, 0x00, 0x10, 0x04, 0x03, 0x6c, 0x3c, 0x60, 0xe3, 0x20, 0x82, 0x11,
-  0x03, 0x04, 0x00, 0x41, 0x30, 0xc0, 0xc8, 0x23, 0x36, 0x18, 0x22, 0x18,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xac, 0x3c, 0x64, 0x43, 0x22, 0x82,
-  0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0xd8, 0x23, 0x36, 0xe0, 0x22,
-  0xf0, 0x8d, 0xd0, 0x00, 0x8f, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xc1, 0x59,
-  0x02, 0x39, 0x18, 0x6e, 0xc8, 0xc8, 0x03, 0x0c, 0x66, 0x19, 0x3e, 0x30,
-  0x08, 0x6a, 0x2d, 0x68, 0x03, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40,
-  0x10, 0x0c, 0x28, 0xf7, 0xa8, 0x8d, 0x8f, 0x34, 0x46, 0x0c, 0x0e, 0x00,
-  0x04, 0xc1, 0x80, 0x7a, 0x8f, 0xda, 0x08, 0x84, 0x0b, 0x86, 0x29, 0xb7,
-  0xc8, 0x0d, 0xb8, 0xe0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0,
-  0xe6, 0x43, 0x37, 0xc6, 0x20, 0x35, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
-  0x80, 0xa2, 0x0f, 0xdd, 0x08, 0x84, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0x3b,
-  0x9e, 0xba, 0x9b, 0x18, 0xe6, 0xd0, 0x60, 0x98, 0x23, 0x86, 0x39, 0x62,
-  0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xd8, 0xf2, 0xc3, 0x3c, 0x46,
-  0x83, 0x3e, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06,
-  0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10,
-  0x0c, 0x30, 0x10, 0x69, 0x8f, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04,
-  0xc1, 0x00, 0x0b, 0x11, 0xf7, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40,
-  0x10, 0x0c, 0x30, 0x11, 0x79, 0x8f, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00,
-  0x04, 0xc1, 0x60, 0x49, 0x11, 0xf7, 0x68, 0x8d, 0x60, 0x3f, 0x7c, 0xa3,
-  0x3f, 0x46, 0x13, 0x02, 0xe0, 0x82, 0x07, 0x67, 0x09, 0xe4, 0x60, 0xb8,
-  0xc1, 0x0e, 0x40, 0x04, 0x0c, 0x66, 0x19, 0xc2, 0x40, 0x0e, 0x02, 0xfb,
-  0x8b, 0xd0, 0x88, 0xcf, 0x70, 0xc4, 0x1e, 0x88, 0x06, 0xf1, 0xcd, 0x32,
-  0x88, 0x41, 0x19, 0x04, 0x36, 0x1a, 0x7c, 0x10, 0x1f, 0x0b, 0x06, 0xfa,
-  0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e,
-  0x45, 0xa8, 0x88, 0x0e, 0x37, 0x04, 0x28, 0x02, 0x06, 0xb3, 0x0c, 0x63,
-  0x40, 0x06, 0x81, 0x0d, 0xab, 0x01, 0x9f, 0x59, 0x82, 0x34, 0x30, 0xd5,
-  0x20, 0xe2, 0x33, 0x4b, 0x90, 0x06, 0xc3, 0x11, 0xa6, 0xb0, 0x1a, 0xc2,
-  0x37, 0xcb, 0x60, 0x06, 0x69, 0x10, 0xd8, 0x29, 0xb0, 0x46, 0x7c, 0x2c,
-  0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b,
-  0x82, 0xf8, 0x14, 0x51, 0x23, 0x3a, 0xdc, 0x10, 0xcc, 0x08, 0x18, 0xcc,
-  0x32, 0x9c, 0x01, 0x1a, 0x04, 0x46, 0x1b, 0x43, 0x7c, 0x66, 0x09, 0xd2,
-  0xc0, 0x88, 0xdb, 0x80, 0xcf, 0x2c, 0x41, 0x1a, 0x0c, 0xb4, 0x3c, 0xda,
-  0x18, 0x60, 0x64, 0x40, 0x9c, 0x81, 0x80, 0x06, 0x62, 0x51, 0x06, 0x17,
-  0x0c, 0x63, 0xb6, 0xa1, 0x1b, 0xf1, 0x19, 0x8e, 0x98, 0x85, 0xdd, 0x20,
-  0xbe, 0x59, 0x06, 0x35, 0x68, 0x83, 0xc0, 0x78, 0x83, 0x16, 0xe2, 0x63,
-  0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x0c, 0xf9, 0x58,
-  0x11, 0xc4, 0xa7, 0x88, 0x31, 0xd1, 0xe1, 0x86, 0x20, 0x4c, 0xc0, 0x60,
-  0x96, 0x61, 0x0d, 0xd8, 0x20, 0xb0, 0x81, 0x3c, 0xe0, 0x33, 0x4b, 0x10,
-  0x07, 0x16, 0x1e, 0x44, 0x7c, 0x66, 0x09, 0xe2, 0x60, 0x38, 0xc2, 0x17,
-  0xc4, 0x43, 0xf8, 0x66, 0x19, 0xdc, 0x20, 0x0e, 0x02, 0xfb, 0x85, 0xf1,
-  0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88,
-  0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xdc, 0x44, 0x87, 0x1b, 0x02, 0x36,
-  0x01, 0x83, 0x59, 0x86, 0x37, 0x80, 0x83, 0xc0, 0xd6, 0x63, 0x88, 0xcf,
-  0x2c, 0x41, 0x1c, 0x18, 0x01, 0x1f, 0xf0, 0x99, 0x25, 0x88, 0x83, 0x81,
-  0x96, 0x47, 0x5b, 0x03, 0x8c, 0x0d, 0x88, 0x37, 0x10, 0xe0, 0x40, 0x36,
-  0xda, 0xe0, 0x82, 0x61, 0x2e, 0x78, 0xea, 0xb6, 0xa7, 0x8e, 0x37, 0x86,
-  0xb9, 0x76, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0x36, 0x3f, 0x59, 0x13, 0x14, 0xc9, 0x93, 0xd1, 0x84,
-  0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86,
-  0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xac, 0x54, 0xe4,
-  0x24, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xc0, 0x4c, 0x65,
-  0x4e, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xec, 0x54,
-  0xe8, 0x24, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0x5c,
-  0x65, 0x4e, 0x64, 0x24, 0x00, 0x95, 0x31, 0x11, 0x95, 0xd1, 0x84, 0x00,
-  0xb8, 0xe0, 0xc1, 0x59, 0x02, 0x39, 0x18, 0x68, 0x79, 0x4c, 0xa3, 0x33,
-  0x25, 0x8e, 0x25, 0x3e, 0x21, 0x0e, 0x4c, 0x09, 0x0c, 0x2e, 0x30, 0x6e,
-  0xc4, 0xc0, 0x01, 0x40, 0x10, 0x0c, 0x1a, 0x59, 0x79, 0x93, 0x1c, 0x91,
-  0x91, 0x52, 0x09, 0xcc, 0xc4, 0x4c, 0xcc, 0x04, 0x4d, 0x4e, 0x65, 0x96,
-  0x60, 0x84, 0x86, 0x1b, 0x46, 0xa3, 0x54, 0xc0, 0x60, 0x96, 0x81, 0x0e,
-  0x62, 0x22, 0x18, 0x31, 0x30, 0x00, 0x10, 0x04, 0x03, 0x48, 0x56, 0xe4,
-  0x24, 0x24, 0x46, 0x0c, 0x0c, 0x00, 0x04, 0xc1, 0x00, 0x9a, 0x95, 0x39,
-  0x09, 0x09, 0x13, 0xce, 0x04, 0x3e, 0x26, 0xa0, 0x09, 0x7c, 0x46, 0x13,
-  0x72, 0x64, 0x18, 0x6e, 0x08, 0x56, 0x05, 0x0c, 0x66, 0x19, 0xea, 0xe0,
-  0x0e, 0x82, 0xe1, 0x08, 0x83, 0x4d, 0x86, 0xef, 0x8e, 0x61, 0x86, 0x1b,
-  0x82, 0x1b, 0x21, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0x48, 0xe0, 0x64, 0xf8,
-  0x2a, 0x10, 0xf4, 0x96, 0x61, 0x86, 0x1b, 0x02, 0x1d, 0x21, 0x83, 0x0a,
-  0x06, 0x9d, 0x65, 0xb0, 0x83, 0x55, 0x08, 0xce, 0x3f, 0x86, 0xb9, 0x97,
-  0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x36, 0x5f, 0x59, 0x15,
-  0x34, 0xc9, 0x95, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84,
-  0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x03, 0xac, 0x5c, 0x64, 0xe5, 0x20, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0xc0, 0xcc, 0x65, 0x56, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x03, 0xec, 0x5c, 0x68, 0x45, 0x22, 0x82, 0x11, 0x03, 0x05,
-  0x00, 0x41, 0x30, 0x58, 0xdc, 0x65, 0x56, 0xe4, 0x24, 0x00, 0x97, 0x51,
-  0x11, 0x97, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xc1, 0x59, 0x82, 0x55, 0x18,
-  0x6e, 0xc8, 0xcc, 0x05, 0x0c, 0x66, 0x19, 0xf0, 0x20, 0x0f, 0x82, 0x6a,
-  0x13, 0x5b, 0x81, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03,
-  0x0a, 0x5e, 0x6e, 0x05, 0x0c, 0x4c, 0x65, 0xc4, 0xe0, 0x00, 0x40, 0x10,
-  0x0c, 0xa8, 0x78, 0xb9, 0x95, 0x40, 0xb8, 0x60, 0x98, 0x82, 0x93, 0x5d,
-  0x81, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xaa, 0x5e,
-  0x78, 0x85, 0x0c, 0x56, 0x65, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28,
-  0x7b, 0xe1, 0x95, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x9e, 0xba, 0xe3, 0xa9,
-  0xcb, 0x91, 0x61, 0x4e, 0x2d, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19,
-  0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x6d, 0x5f, 0xd0, 0xa5, 0x54, 0xec,
-  0x65, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46,
-  0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00,
-  0x13, 0x99, 0x77, 0x49, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
-  0xb0, 0x91, 0x81, 0x97, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
-  0x00, 0x23, 0x99, 0x78, 0x49, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10,
-  0x0c, 0x96, 0x95, 0x81, 0x97, 0x57, 0x09, 0xfa, 0x05, 0x5c, 0xfe, 0x65,
-  0x34, 0x21, 0x00, 0x2e, 0x78, 0x70, 0x96, 0x60, 0x15, 0x86, 0x1b, 0xec,
-  0x40, 0x64, 0xc0, 0x60, 0x96, 0x41, 0x0f, 0x56, 0x21, 0xb0, 0x50, 0x19,
-  0x95, 0xf8, 0x0c, 0x47, 0xf0, 0x01, 0xa9, 0x10, 0xdf, 0x2c, 0xc3, 0x1e,
-  0xf8, 0x41, 0x60, 0xa5, 0xd2, 0x07, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05,
-  0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04,
-  0xcb, 0xe8, 0x70, 0x43, 0xa0, 0x32, 0x60, 0x30, 0xcb, 0xc0, 0x07, 0x7d,
-  0x10, 0xd8, 0xd0, 0x2a, 0xf0, 0x99, 0x25, 0x10, 0x05, 0x63, 0x15, 0x22,
-  0x3e, 0xb3, 0x04, 0xa2, 0x30, 0x1c, 0x71, 0x0a, 0xad, 0x22, 0x7c, 0xb3,
-  0x0c, 0x7f, 0x20, 0x0a, 0x81, 0xa1, 0x82, 0xab, 0xc4, 0xc7, 0x02, 0x87,
-  0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88,
-  0x4f, 0x11, 0x37, 0xa3, 0xc3, 0x0d, 0x41, 0xcd, 0x80, 0xc1, 0x2c, 0x03,
-  0x28, 0x84, 0x42, 0x60, 0xb6, 0x32, 0xc4, 0x67, 0x96, 0x40, 0x14, 0x8c,
-  0xc8, 0x15, 0xf8, 0xcc, 0x12, 0x88, 0xc2, 0x40, 0xcb, 0xa3, 0xf1, 0x01,
-  0xd6, 0x07, 0x04, 0x28, 0x08, 0xa1, 0x40, 0x16, 0x7e, 0x70, 0xc1, 0x30,
-  0x86, 0x2b, 0xbc, 0x12, 0x9f, 0xe1, 0x08, 0x5a, 0xe8, 0x15, 0xe2, 0x9b,
-  0x65, 0x18, 0x05, 0x53, 0x08, 0xcc, 0x57, 0x6a, 0x21, 0x3e, 0x16, 0x0c,
-  0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41,
-  0x7c, 0x8a, 0x28, 0x1b, 0x1d, 0x6e, 0x08, 0xc6, 0x06, 0x0c, 0x66, 0x19,
-  0x48, 0xa1, 0x14, 0x02, 0x1b, 0xcc, 0x05, 0x3e, 0xb3, 0x04, 0xaa, 0x60,
-  0xe3, 0x42, 0xc4, 0x67, 0x96, 0x40, 0x15, 0x86, 0x23, 0x7e, 0x81, 0x5c,
-  0x84, 0x6f, 0x96, 0xe1, 0x14, 0x54, 0x21, 0x30, 0x70, 0x28, 0x97, 0xf8,
-  0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x82, 0x48, 0x3e,
-  0x56, 0x04, 0xf1, 0x29, 0x02, 0x6e, 0x74, 0xb8, 0x21, 0x70, 0x1b, 0x30,
-  0x98, 0x65, 0x40, 0x85, 0x54, 0x08, 0xac, 0x5d, 0x86, 0xf8, 0xcc, 0x12,
-  0xa8, 0x82, 0x11, 0xf2, 0x02, 0x9f, 0x59, 0x02, 0x55, 0x18, 0x68, 0x79,
-  0x34, 0x52, 0xc0, 0x4a, 0x81, 0x40, 0x05, 0x21, 0x15, 0x68, 0xc3, 0x14,
-  0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x6e, 0x7b, 0xea, 0x7c, 0x65, 0x98, 0x7b,
-  0x8f, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x60, 0x03, 0x9d, 0xb6, 0x51, 0x99, 0xbd, 0x19, 0x4d, 0x08, 0x80,
-  0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22,
-  0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xc0, 0x4e, 0x87, 0x6e, 0x12,
-  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x0c, 0x75, 0xea, 0x26,
-  0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xc0, 0x52, 0xc7, 0x6e,
-  0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x05, 0x76, 0xea,
-  0x86, 0x66, 0x02, 0xd1, 0x29, 0x1b, 0xd2, 0x19, 0x4d, 0x08, 0x80, 0x0b,
-  0x1e, 0x9c, 0x25, 0x58, 0x85, 0x81, 0x96, 0xc7, 0x34, 0xec, 0x40, 0xd5,
-  0xea, 0x80, 0x25, 0xf0, 0x40, 0x50, 0x05, 0x55, 0xcb, 0x83, 0x59, 0x06,
-  0x56, 0x70, 0x85, 0x7d, 0x18, 0x8e, 0xf0, 0x07, 0xb3, 0x19, 0xbe, 0xfb,
-  0x87, 0x61, 0x86, 0x1b, 0x82, 0x98, 0x21, 0x83, 0x1a, 0x02, 0x1d, 0x8e,
-  0x18, 0x09, 0xb5, 0x19, 0xbe, 0x0a, 0x04, 0xbd, 0x92, 0x18, 0x66, 0xb8,
-  0x21, 0xa0, 0x19, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x86, 0x56, 0x10, 0x87,
-  0xe0, 0xf0, 0x65, 0x98, 0x4b, 0x91, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x60, 0xc3, 0x9d, 0xd2, 0x11, 0x9b, 0xd9, 0x19, 0x4d, 0x08, 0x80,
-  0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2,
-  0x90, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xc0, 0x7e, 0x87, 0x75, 0x0e,
-  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x0c, 0x7c, 0x5a, 0x87,
-  0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xc0, 0xc2, 0xc7, 0x75,
-  0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x05, 0x7d, 0x5a,
-  0x87, 0x6d, 0x02, 0xdd, 0xe9, 0x1b, 0xde, 0x19, 0x4d, 0x08, 0x80, 0x0b,
-  0x1e, 0x9c, 0x25, 0x10, 0x87, 0xe1, 0x86, 0x99, 0x00, 0x1f, 0x30, 0x98,
-  0x65, 0x78, 0x05, 0x58, 0x08, 0xea, 0x6c, 0x60, 0x07, 0x2e, 0x78, 0x6a,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0xf5, 0x89, 0x1d, 0x9c, 0x00,
-  0x9d, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xd6, 0x27, 0x76, 0x02,
-  0xe1, 0x82, 0x61, 0x4a, 0x6d, 0x6a, 0x07, 0x2e, 0x78, 0x6a, 0xc4, 0xe0,
-  0x00, 0x40, 0x10, 0x0c, 0xa8, 0xf7, 0xb1, 0x1d, 0x9f, 0x28, 0x9d, 0x11,
-  0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xe0, 0xc7, 0x76, 0x02, 0xe1, 0x82,
-  0x61, 0x2e, 0x78, 0xea, 0x8e, 0xa7, 0x6e, 0x66, 0x86, 0x39, 0x32, 0x19,
-  0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0xb6, 0xfa, 0x11, 0x9f, 0xbf, 0x81, 0x9f, 0xd1, 0x84, 0x00, 0x18, 0x4d,
-  0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x8c, 0x7f, 0xd2, 0x27, 0x21, 0x82,
-  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xc0, 0xfa, 0x47, 0x7d, 0x12, 0x22,
-  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xcc, 0x7f, 0xd6, 0x27, 0x21,
-  0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0x4a, 0x48, 0x7d, 0x52,
-  0x27, 0xb8, 0x1f, 0xdd, 0xc9, 0x9f, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xc1,
-  0x59, 0x02, 0x71, 0x18, 0x6e, 0x80, 0x0b, 0xfe, 0x01, 0x83, 0x59, 0x86,
-  0x58, 0x10, 0x87, 0xc0, 0xf6, 0xa6, 0x6f, 0xe2, 0x33, 0x1c, 0x41, 0x17,
-  0x7e, 0x43, 0x7c, 0xb3, 0x0c, 0xb2, 0x50, 0x0b, 0x81, 0xfd, 0x4d, 0x5d,
-  0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18,
-  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x26, 0xa4, 0xc3, 0x0d, 0x01, 0x09,
-  0x81, 0xc1, 0x2c, 0xc3, 0x2c, 0xd0, 0x42, 0x60, 0xc3, 0xe9, 0xc0, 0x67,
-  0x96, 0x20, 0x17, 0xcc, 0x74, 0x88, 0xf8, 0xcc, 0x12, 0xe4, 0xc2, 0x70,
-  0xc4, 0x5f, 0x9c, 0x8e, 0xf0, 0xcd, 0x32, 0xd8, 0x42, 0x2e, 0x04, 0x06,
-  0x1a, 0xa8, 0x13, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f,
-  0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xc4, 0x90, 0x0e, 0x37,
-  0x04, 0x2f, 0x04, 0x06, 0xb3, 0x0c, 0xb7, 0x80, 0x0b, 0x81, 0xc1, 0xce,
-  0x10, 0x9f, 0x59, 0x82, 0x5c, 0x30, 0x62, 0x76, 0xe0, 0x33, 0x4b, 0x90,
-  0x0b, 0x03, 0x2d, 0x8f, 0x36, 0x0b, 0x18, 0x2d, 0x10, 0xb7, 0x20, 0xe0,
-  0x02, 0xcd, 0xd4, 0xc2, 0x05, 0xc3, 0x98, 0xec, 0xd8, 0x4e, 0x7c, 0x86,
-  0x23, 0x5c, 0xe3, 0x76, 0x88, 0x6f, 0x96, 0x41, 0x17, 0x7a, 0x21, 0x30,
-  0xdc, 0x79, 0x8d, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78,
-  0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xe2, 0x87, 0x74, 0xb8,
-  0x21, 0xe8, 0x21, 0x30, 0x98, 0x65, 0xd8, 0x05, 0x5e, 0x08, 0x6c, 0x00,
-  0x1f, 0xf8, 0xcc, 0x12, 0x84, 0x83, 0xf5, 0x0e, 0x11, 0x9f, 0x59, 0x82,
-  0x70, 0x18, 0x8e, 0xc8, 0x0d, 0xdf, 0x11, 0xbe, 0x59, 0x06, 0x5f, 0x08,
-  0x87, 0xc0, 0x74, 0xe3, 0x77, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86,
-  0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x35,
-  0xd2, 0xe1, 0x86, 0x00, 0x8d, 0xc0, 0x60, 0x96, 0xe1, 0x17, 0xc0, 0x21,
-  0xb0, 0xf3, 0x19, 0xe2, 0x33, 0x4b, 0x10, 0x0e, 0x46, 0xb0, 0x0f, 0x7c,
-  0x66, 0x09, 0xc2, 0x61, 0xa0, 0xe5, 0xd1, 0x76, 0x01, 0xe3, 0x05, 0xe2,
-  0x17, 0x04, 0x70, 0x40, 0x9d, 0x5e, 0xb8, 0x60, 0x98, 0x0b, 0x9e, 0xba,
-  0xed, 0xa9, 0xc3, 0x9d, 0x61, 0x2e, 0x5d, 0x86, 0x39, 0x62, 0x98, 0x23,
-  0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x4d, 0x8f, 0xce, 0x88,
-  0x84, 0xea, 0x68, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61,
-  0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04,
-  0xc1, 0x00, 0x0b, 0x25, 0x37, 0x4a, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40,
-  0x10, 0x0c, 0x30, 0x51, 0x7a, 0xa3, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00,
-  0x04, 0xc1, 0x00, 0x1b, 0x25, 0x38, 0x4a, 0x88, 0x60, 0xc4, 0x40, 0x01,
-  0x40, 0x10, 0x0c, 0x16, 0x55, 0x7a, 0x23, 0x17, 0x0a, 0xf8, 0xe8, 0x87,
-  0xfc, 0x68, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x70, 0x96, 0x40, 0x1c, 0x06,
-  0x5a, 0x1e, 0xd3, 0x68, 0x05, 0x3f, 0x0c, 0x58, 0x81, 0x25, 0x5e, 0x41,
-  0x08, 0x07, 0x3f, 0x0c, 0x60, 0x61, 0x96, 0x61, 0x1c, 0xca, 0xa1, 0x3e,
-  0x86, 0x23, 0xf4, 0x03, 0x8c, 0x86, 0xef, 0xf6, 0x63, 0x98, 0xe1, 0x86,
-  0x60, 0x85, 0xc8, 0xa0, 0x86, 0x40, 0x87, 0x23, 0xf6, 0x83, 0x8c, 0x86,
-  0xaf, 0x02, 0x41, 0xaf, 0x3f, 0x86, 0x19, 0x6e, 0x08, 0x5c, 0x88, 0x0c,
-  0x2a, 0x18, 0x74, 0x96, 0x81, 0x1c, 0xf2, 0x21, 0x38, 0xf9, 0x19, 0xe6,
-  0x46, 0x66, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xd8, 0x64, 0xe9,
-  0x8f, 0x78, 0xa8, 0x95, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46,
-  0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x38, 0x64, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0xb0, 0x5c, 0x32, 0xa5, 0x83, 0x08, 0x46, 0x0c, 0x10,
-  0x00, 0x04, 0xc1, 0x00, 0xd3, 0xa5, 0x53, 0x62, 0x88, 0x60, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0xb0, 0x5d, 0x42, 0x25, 0x89, 0x08, 0x46, 0x0c,
-  0x14, 0x00, 0x04, 0xc1, 0x60, 0x11, 0xa7, 0x53, 0x32, 0xa3, 0x80, 0x96,
-  0xee, 0xc8, 0x96, 0x46, 0x13, 0x02, 0xe0, 0x82, 0x07, 0x67, 0x09, 0xf2,
-  0x61, 0xb8, 0xa1, 0x45, 0x74, 0x09, 0x0c, 0x66, 0x19, 0xcc, 0xe1, 0x1c,
-  0x82, 0x0a, 0x23, 0x55, 0x82, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x03, 0x8a, 0x9c, 0x56, 0x89, 0x46, 0xf4, 0x68, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0xa8, 0x72, 0x5a, 0xa5, 0x40, 0xb8, 0x60, 0x98, 0x22,
-  0xa3, 0x57, 0x82, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03,
-  0x2a, 0x9d, 0x60, 0xc9, 0x46, 0xfe, 0x68, 0xc4, 0xe0, 0x00, 0x40, 0x10,
-  0x0c, 0x28, 0x75, 0x82, 0xa5, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x9e, 0xba,
-  0xe3, 0xa9, 0x6b, 0xa1, 0x61, 0xce, 0x67, 0x86, 0x39, 0x62, 0x98, 0x23,
-  0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0xed, 0x9d, 0x78, 0x29,
-  0x8f, 0xd4, 0x69, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61,
-  0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04,
-  0xc1, 0x00, 0xb3, 0xa7, 0x71, 0x4a, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40,
-  0x10, 0x0c, 0xb0, 0x7b, 0x22, 0xa7, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00,
-  0x04, 0xc1, 0x00, 0xc3, 0xa7, 0x72, 0x4a, 0x88, 0x60, 0xc4, 0x40, 0x01,
-  0x40, 0x10, 0x0c, 0x96, 0x7f, 0x22, 0xa7, 0x51, 0x0a, 0xe2, 0x89, 0x96,
-  0xe6, 0x69, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x70, 0x96, 0x20, 0x1f, 0x86,
-  0x1b, 0xd4, 0xc4, 0x9e, 0xc0, 0x60, 0x96, 0x01, 0x1d, 0xf2, 0x21, 0xb0,
-  0x3a, 0xba, 0xa3, 0xf8, 0x0c, 0x47, 0xc0, 0x09, 0x1e, 0x11, 0xdf, 0x2c,
-  0x43, 0x3a, 0xb0, 0x43, 0x60, 0x79, 0x14, 0x27, 0xf1, 0xb1, 0x60, 0xa0,
-  0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2,
-  0x53, 0x04, 0x48, 0xe9, 0x70, 0x43, 0xe0, 0x4f, 0x60, 0x30, 0xcb, 0xa0,
-  0x0e, 0xeb, 0x10, 0xd8, 0x10, 0x4a, 0xf0, 0x99, 0x25, 0x80, 0x07, 0x03,
-  0x25, 0x22, 0x3e, 0xb3, 0x04, 0xf0, 0x30, 0x1c, 0xb1, 0x27, 0xa1, 0x24,
-  0x7c, 0xb3, 0x0c, 0xed, 0x00, 0x0f, 0x81, 0xf1, 0x89, 0x28, 0xc5, 0xc7,
-  0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x44, 0xf2, 0xb1,
-  0x22, 0x88, 0x4f, 0x11, 0x2b, 0xa5, 0xc3, 0x0d, 0x41, 0x4a, 0x81, 0xc1,
-  0x2c, 0x83, 0x3b, 0xbc, 0x43, 0x60, 0xaa, 0x34, 0xc4, 0x67, 0x96, 0x00,
-  0x1e, 0x8c, 0x68, 0x25, 0xf8, 0xcc, 0x12, 0xc0, 0xc3, 0x40, 0xcb, 0xa3,
-  0xa9, 0x03, 0xb6, 0x0e, 0x84, 0x3b, 0x08, 0xef, 0xc0, 0x52, 0xec, 0x70,
-  0xc1, 0x30, 0xc6, 0x4a, 0xb0, 0x14, 0x9f, 0xe1, 0x08, 0x53, 0x89, 0x25,
-  0xe2, 0x9b, 0x65, 0x88, 0x07, 0x7a, 0x08, 0x4c, 0x96, 0x4e, 0x25, 0x3e,
-  0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xc0, 0x90, 0x8f,
-  0x15, 0x41, 0x7c, 0x8a, 0xc8, 0x29, 0x1d, 0x6e, 0x08, 0x6e, 0x0a, 0x0c,
-  0x66, 0x19, 0xe4, 0x61, 0x1e, 0x02, 0x1b, 0x74, 0x09, 0x3e, 0xb3, 0x04,
-  0xf8, 0x60, 0xb7, 0x44, 0xc4, 0x67, 0x96, 0x00, 0x1f, 0x86, 0x23, 0x62,
-  0x05, 0x97, 0x84, 0x6f, 0x96, 0xa1, 0x1e, 0xf0, 0x21, 0x30, 0x59, 0xc9,
-  0xa5, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x82,
-  0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x82, 0xac, 0x74, 0xb8, 0x21, 0x10,
-  0x2b, 0x30, 0x98, 0x65, 0xb0, 0x87, 0x7b, 0x08, 0x2c, 0x9c, 0x86, 0xf8,
-  0xcc, 0x12, 0xe0, 0x83, 0x11, 0xe6, 0x04, 0x9f, 0x59, 0x02, 0x7c, 0x18,
-  0x68, 0x79, 0x34, 0x79, 0xc0, 0xe6, 0x81, 0xb0, 0x07, 0xe1, 0x1e, 0xf0,
-  0x8a, 0x1e, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x6e, 0x7b, 0xea, 0x64, 0x69,
-  0x98, 0x1b, 0x9f, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0x60, 0xa3, 0xab, 0xb0, 0xf2, 0xa7, 0xb7, 0x1a, 0x4d,
-  0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62,
-  0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xc0, 0xf6, 0x0a,
-  0xad, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x8c, 0xaf,
-  0xd2, 0x2a, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xc0, 0xfa,
-  0x4a, 0xad, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x85,
-  0xb4, 0xd2, 0x0a, 0xa5, 0x02, 0xbb, 0xca, 0x29, 0xbc, 0x1a, 0x4d, 0x08,
-  0x80, 0x0b, 0x1e, 0x9c, 0x25, 0xc8, 0x87, 0x81, 0x96, 0xc7, 0x34, 0xc8,
-  0x41, 0x4e, 0x83, 0x71, 0x60, 0x09, 0x73, 0x10, 0xf0, 0x41, 0x4e, 0x83,
-  0x73, 0x98, 0x65, 0xd0, 0x07, 0x7e, 0x78, 0x97, 0xe1, 0x08, 0x79, 0xd1,
-  0xa9, 0xe1, 0xbb, 0x79, 0x19, 0x66, 0xb8, 0x21, 0x28, 0x29, 0x32, 0xa8,
-  0x21, 0xd0, 0xe1, 0x88, 0x7a, 0xf1, 0xa9, 0xe1, 0xab, 0x40, 0xd0, 0xbb,
-  0x97, 0x61, 0x86, 0x1b, 0x02, 0x94, 0x22, 0x83, 0x0a, 0x06, 0x9d, 0x65,
-  0xd8, 0x07, 0x98, 0x08, 0x8e, 0x9d, 0x86, 0xb9, 0xfe, 0x19, 0x66, 0xc4,
-  0xe0, 0x00, 0x40, 0x10, 0x0c, 0x36, 0xd6, 0xca, 0x2b, 0x9b, 0x3a, 0xad,
-  0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d,
-  0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x6c,
-  0xb6, 0x40, 0xeb, 0x20, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xc0,
-  0x68, 0x2b, 0xb4, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03,
-  0xac, 0xb6, 0x44, 0x4b, 0x22, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30,
-  0x58, 0x78, 0x2b, 0xb4, 0xc0, 0x2a, 0x70, 0xad, 0xb8, 0x82, 0xad, 0xd1,
-  0x84, 0x00, 0xb8, 0xe0, 0xc1, 0x59, 0x02, 0x98, 0x18, 0x6e, 0x38, 0x19,
-  0xda, 0x02, 0x83, 0x59, 0x86, 0x7e, 0xf0, 0x87, 0xa0, 0x76, 0x8a, 0xb4,
-  0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0xf2, 0xad,
-  0xd2, 0x62, 0x19, 0xba, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xea,
-  0xb7, 0x4a, 0x2b, 0x10, 0x2e, 0x18, 0xa6, 0x7c, 0x2a, 0xb5, 0xe0, 0x82,
-  0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x1a, 0x2f, 0xd5, 0x82,
-  0x99, 0xbc, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x8a, 0xbc, 0x54,
-  0x2b, 0x10, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0xee, 0x78, 0xea, 0x4e, 0x6a,
-  0x98, 0xc3, 0xa1, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0x60, 0x4b, 0x2f, 0xdb, 0x9a, 0x2b, 0xf2, 0x1a, 0x4d,
-  0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62,
-  0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xc0, 0xe0, 0xab,
-  0xb7, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x2c, 0xbe,
-  0x7c, 0x2b, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xc0, 0xe4,
-  0xeb, 0xb7, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x25,
-  0xbf, 0x7c, 0xab, 0xaf, 0x82, 0xf5, 0x72, 0xad, 0xf6, 0x1a, 0x4d, 0x08,
-  0x80, 0x0b, 0x1e, 0x9c, 0x25, 0x80, 0x89, 0xe1, 0x06, 0xb2, 0x81, 0x2f,
-  0x30, 0x98, 0x65, 0xf8, 0x07, 0x98, 0x08, 0xec, 0xad, 0xe2, 0x2a, 0x3e,
-  0xc3, 0x11, 0x68, 0x23, 0x57, 0xc4, 0x37, 0xcb, 0x00, 0x12, 0x23, 0x11,
-  0xd8, 0x5c, 0xa5, 0x4d, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17,
-  0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xa1, 0x5f, 0x3a,
-  0xdc, 0x10, 0xe0, 0x17, 0x18, 0xcc, 0x32, 0x84, 0x84, 0x48, 0x04, 0x36,
-  0xec, 0x15, 0x7c, 0x66, 0x09, 0x4e, 0xc2, 0xf4, 0x8a, 0x88, 0xcf, 0x2c,
-  0xc1, 0x49, 0x0c, 0x47, 0xcc, 0xcd, 0x5e, 0x09, 0xdf, 0x2c, 0x03, 0x49,
-  0x9c, 0x44, 0x60, 0x74, 0xc3, 0x57, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05,
-  0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44,
-  0x89, 0xe9, 0x70, 0x43, 0x30, 0x62, 0x60, 0x30, 0xcb, 0x50, 0x12, 0x26,
-  0x11, 0x18, 0x69, 0x0d, 0xf1, 0x99, 0x25, 0x38, 0x09, 0x23, 0x4e, 0x0b,
-  0x3e, 0xb3, 0x04, 0x27, 0x31, 0xd0, 0xf2, 0x68, 0x21, 0x81, 0x89, 0x04,
-  0x51, 0x12, 0x82, 0x49, 0xf0, 0xdb, 0x48, 0x5c, 0x30, 0x8c, 0x99, 0x96,
-  0x6a, 0xc5, 0x67, 0x38, 0x02, 0x74, 0x56, 0x8b, 0xf8, 0x66, 0x19, 0x50,
-  0x62, 0x25, 0x02, 0x63, 0xad, 0xd0, 0x89, 0x8f, 0x05, 0x03, 0x7d, 0x2e,
-  0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22,
-  0x66, 0x4c, 0x87, 0x1b, 0x82, 0x18, 0x03, 0x83, 0x59, 0x86, 0x94, 0x50,
-  0x89, 0xc0, 0x06, 0xda, 0x82, 0xcf, 0x2c, 0xc1, 0x4b, 0x58, 0x6c, 0x11,
-  0xf1, 0x99, 0x25, 0x78, 0x89, 0xe1, 0x88, 0xd5, 0x91, 0x2d, 0xe1, 0x9b,
-  0x65, 0x60, 0x89, 0x97, 0x08, 0x8c, 0x75, 0x66, 0x2b, 0x3e, 0x16, 0x38,
-  0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41,
-  0x7c, 0x8a, 0xf0, 0x31, 0x1d, 0x6e, 0x08, 0x78, 0x0c, 0x0c, 0x66, 0x19,
-  0x5a, 0xc2, 0x25, 0x02, 0xdb, 0xad, 0x21, 0x3e, 0xb3, 0x04, 0x2f, 0x61,
-  0x04, 0x78, 0xc1, 0x67, 0x96, 0xe0, 0x25, 0x06, 0x5a, 0x1e, 0x2d, 0x25,
-  0x30, 0x95, 0x20, 0x5a, 0x42, 0x70, 0x09, 0xb6, 0x5b, 0x89, 0x0b, 0x86,
-  0xb9, 0xe0, 0xa9, 0xdb, 0x9e, 0x3a, 0xd6, 0x1a, 0xe6, 0x7a, 0x69, 0x98,
-  0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xd8,
-  0xdc, 0x6c, 0xc7, 0xf0, 0x2b, 0xcd, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41,
-  0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0xb0, 0x3a, 0x13, 0xb3, 0x84, 0x08, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0xb3, 0xb3, 0x31, 0x4b, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb0, 0x3b, 0x23, 0xb3, 0x84, 0x08,
-  0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0xf1, 0xb3, 0x31, 0x13, 0xb1,
-  0x00, 0xce, 0x66, 0x4c, 0xce, 0x46, 0x13, 0x02, 0xe0, 0x82, 0x07, 0x67,
-  0x09, 0x60, 0x62, 0xa0, 0xe5, 0x31, 0x8d, 0x7d, 0x30, 0xe3, 0x40, 0x1f,
-  0x58, 0xa2, 0x1f, 0x84, 0x97, 0x30, 0xe3, 0xc0, 0x1f, 0x46, 0x0c, 0x0c,
-  0x00, 0x04, 0xc1, 0x00, 0x02, 0xb5, 0x1f, 0x7b, 0x27, 0xb3, 0x0f, 0x78,
-  0x89, 0x8f, 0x09, 0x81, 0x7c, 0x2c, 0x90, 0x17, 0xf8, 0x58, 0xf1, 0x0f,
-  0xf1, 0xb1, 0x22, 0x90, 0x8f, 0x05, 0x21, 0x01, 0x9f, 0x11, 0x03, 0x03,
-  0x00, 0x41, 0x30, 0x80, 0x4e, 0xcd, 0xcc, 0xea, 0xc9, 0x84, 0x22, 0x3e,
-  0x16, 0x08, 0xf2, 0xb1, 0xe0, 0x80, 0xcf, 0x05, 0xc6, 0x8d, 0x18, 0x38,
-  0x00, 0x08, 0x82, 0x41, 0xd3, 0x6a, 0x6a, 0x46, 0x63, 0x2d, 0x06, 0x6a,
-  0x41, 0x98, 0x85, 0x59, 0x98, 0x8d, 0x99, 0xa8, 0xcd, 0x12, 0x8c, 0xd0,
-  0x70, 0x83, 0x5f, 0x89, 0x1a, 0x18, 0xcc, 0x32, 0xc8, 0xc4, 0x08, 0x05,
-  0x23, 0x06, 0x06, 0x00, 0x82, 0x60, 0x00, 0xb5, 0x5a, 0x9b, 0xf1, 0x93,
-  0x05, 0x3d, 0x06, 0x9f, 0x11, 0x03, 0x03, 0x00, 0x41, 0x30, 0x80, 0x5e,
-  0xed, 0xcd, 0xfa, 0xc9, 0x82, 0x1f, 0x83, 0xcf, 0x68, 0x02, 0x8d, 0x0d,
-  0xc3, 0x0d, 0x81, 0xa9, 0x81, 0xc1, 0x2c, 0xc3, 0x4c, 0xd4, 0x44, 0x30,
-  0x1c, 0x51, 0x9c, 0xd9, 0xf0, 0x9d, 0x31, 0xcc, 0x70, 0x43, 0x20, 0x63,
-  0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0xc7, 0x9a, 0x0d, 0x5f, 0x05, 0x82,
-  0x5e, 0x32, 0xcc, 0x70, 0x43, 0x50, 0x63, 0x64, 0x50, 0xc1, 0xa0, 0xb3,
-  0x0c, 0x34, 0x91, 0x16, 0xc1, 0xe5, 0xd7, 0x30, 0xa7, 0x52, 0xc3, 0x8c,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x96, 0x6b, 0xa6, 0x36, 0x66, 0xb4,
-  0x36, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3,
-  0x09, 0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80,
-  0x81, 0x5b, 0xab, 0x1d, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x58, 0xb8, 0xb9, 0x1a, 0x43, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0x80, 0x89, 0xdb, 0xab, 0x49, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08,
-  0x06, 0x4b, 0xba, 0xb9, 0x5a, 0x9b, 0x05, 0xbb, 0xe6, 0x67, 0xbd, 0x36,
-  0x9a, 0x10, 0x00, 0x17, 0x3c, 0x38, 0x4b, 0x90, 0x16, 0xc3, 0x0d, 0x59,
-  0xb8, 0x81, 0xc1, 0x2c, 0x83, 0x4d, 0xdc, 0x44, 0x50, 0x68, 0x16, 0x6b,
-  0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xad, 0x9b,
-  0xac, 0x7d, 0xa1, 0x36, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x14, 0xbb,
-  0xc9, 0x5a, 0x20, 0x5c, 0x30, 0x4c, 0xad, 0x99, 0xad, 0xc1, 0x05, 0x4f,
-  0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x05, 0x6f, 0xb7, 0x26, 0x06,
-  0xa6, 0x36, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x54, 0xbc, 0xdd, 0x5a,
-  0x20, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf1, 0xd4, 0xd1, 0xd8, 0x30,
-  0x57, 0x56, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00,
-  0x08, 0x82, 0xc1, 0x66, 0x6f, 0xe3, 0x06, 0x6a, 0xf1, 0x36, 0x9a, 0x10,
-  0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50,
-  0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xf5, 0x9b, 0xba,
-  0x25, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x98, 0xbf, 0xad,
-  0x5b, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xfd, 0x1b,
-  0xbb, 0x25, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x8b, 0xc9,
-  0xad, 0x9b, 0xaa, 0x05, 0xf8, 0xb6, 0x6b, 0xfa, 0x36, 0x9a, 0x10, 0x00,
-  0x17, 0x3c, 0x38, 0x4b, 0x90, 0x16, 0xc3, 0x0d, 0x76, 0xd0, 0x6f, 0x60,
-  0x30, 0xcb, 0x80, 0x13, 0x69, 0x11, 0x18, 0x9f, 0xf9, 0x59, 0x7c, 0x86,
-  0x23, 0xf6, 0xe0, 0xcf, 0x88, 0x6f, 0x96, 0x21, 0x27, 0x78, 0x22, 0x30,
-  0x50, 0xe3, 0x83, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78,
-  0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xe2, 0xe4, 0x74, 0xb8,
-  0x21, 0x28, 0x39, 0x30, 0x98, 0x65, 0xd0, 0x89, 0x9d, 0x08, 0x6c, 0x40,
-  0x35, 0xf8, 0xcc, 0x12, 0x80, 0x85, 0x9d, 0x1a, 0x11, 0x9f, 0x59, 0x02,
-  0xb0, 0x18, 0x8e, 0x30, 0x05, 0x54, 0x13, 0xbe, 0x59, 0x86, 0x9e, 0x00,
-  0x8b, 0xc0, 0x4e, 0x21, 0xd5, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86,
-  0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x99,
-  0xd3, 0xe1, 0x86, 0x00, 0xe6, 0xc0, 0x60, 0x96, 0xc1, 0x27, 0x7e, 0x22,
-  0xb0, 0x58, 0x1b, 0xe2, 0x33, 0x4b, 0x00, 0x16, 0x46, 0xd0, 0x1a, 0x7c,
-  0x66, 0x09, 0xc0, 0x62, 0xa0, 0xe5, 0xd1, 0x74, 0x02, 0xdb, 0x09, 0xc2,
-  0x27, 0x84, 0x9f, 0x10, 0x0b, 0x9e, 0xb8, 0x60, 0x18, 0x9b, 0xb5, 0x5b,
-  0x8b, 0xcf, 0x70, 0x84, 0x2c, 0xe0, 0x1a, 0xf1, 0xcd, 0x32, 0x84, 0x05,
-  0x59, 0x04, 0x96, 0x6b, 0xb3, 0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30,
-  0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x80,
-  0x9d, 0x0e, 0x37, 0x04, 0x3e, 0x07, 0x06, 0xb3, 0x0c, 0x62, 0x31, 0x16,
-  0x81, 0x0d, 0xe1, 0x06, 0x9f, 0x59, 0x02, 0xb4, 0x30, 0x5f, 0x23, 0xe2,
-  0x33, 0x4b, 0x80, 0x16, 0xc3, 0x11, 0xbd, 0xf0, 0x6b, 0xc2, 0x37, 0xcb,
-  0x50, 0x16, 0x68, 0x11, 0x98, 0x2f, 0x80, 0x5b, 0x7c, 0x2c, 0x70, 0xe8,
-  0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8,
-  0x14, 0xb1, 0x76, 0x3a, 0xdc, 0x10, 0xa4, 0x1d, 0x18, 0xcc, 0x32, 0x98,
-  0xc5, 0x59, 0x04, 0x86, 0x6e, 0x43, 0x7c, 0x66, 0x09, 0xd0, 0xc2, 0x88,
-  0x76, 0x83, 0xcf, 0x2c, 0x01, 0x5a, 0x0c, 0xb4, 0x3c, 0x9a, 0x58, 0x60,
-  0x63, 0x41, 0x98, 0x85, 0x70, 0x16, 0xb0, 0x41, 0x16, 0x17, 0x0c, 0x73,
-  0xc1, 0x53, 0xb7, 0x3d, 0x75, 0xb9, 0x36, 0xcc, 0xa9, 0xd7, 0x30, 0x47,
-  0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xed,
-  0x1d, 0xda, 0x95, 0x9c, 0xdd, 0x8d, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10,
-  0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x60, 0xa2, 0xf7, 0x76, 0x09, 0x11, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0x01, 0x36, 0x7a, 0x70, 0x97, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0xa4, 0x17, 0x77, 0x09, 0x11, 0x8c,
-  0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0xb2, 0x7a, 0x70, 0xf7, 0x72, 0x41,
-  0xdf, 0x81, 0xdd, 0xdf, 0x8d, 0x26, 0x04, 0xc0, 0x05, 0x0f, 0xce, 0x12,
-  0xa4, 0xc5, 0x40, 0xcb, 0x63, 0x1a, 0x34, 0x01, 0xea, 0xc1, 0x4c, 0xb0,
-  0x84, 0x4d, 0x08, 0x68, 0x01, 0xea, 0xc1, 0x4d, 0xcc, 0x32, 0xa8, 0x05,
-  0x5b, 0xec, 0xc3, 0x70, 0x04, 0x48, 0x84, 0xdd, 0xf0, 0x5d, 0x48, 0x0c,
-  0x33, 0xdc, 0x10, 0xb0, 0x1c, 0x19, 0xd4, 0x10, 0xe8, 0x70, 0x44, 0x48,
-  0x94, 0xdd, 0xf0, 0x55, 0x20, 0xe8, 0x8d, 0xc4, 0x30, 0xc3, 0x0d, 0xc1,
-  0xcb, 0x91, 0x41, 0x05, 0x83, 0xce, 0x32, 0xac, 0x05, 0x68, 0x04, 0x37,
-  0x6f, 0xc3, 0x1c, 0x89, 0x0d, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0xdb, 0xec, 0x81, 0x5e, 0xcf, 0xb9, 0xde, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xa6, 0x7b, 0xa7, 0x77, 0x10, 0xc1,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0xbb, 0x87, 0x7a, 0x0c, 0x11,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xc6, 0x7b, 0xa9, 0x27, 0x11,
-  0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0xe3, 0x87, 0x7a, 0x67,
-  0x17, 0xd4, 0x1e, 0xde, 0xdd, 0xde, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xe0,
-  0x2c, 0x01, 0x68, 0x0c, 0x37, 0xcc, 0xc4, 0xee, 0x81, 0xc1, 0x2c, 0x43,
-  0x5b, 0xb8, 0x45, 0x50, 0x62, 0xb7, 0x7a, 0x70, 0xc1, 0x53, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x40, 0x95, 0x1f, 0xeb, 0xe9, 0xc4, 0xde, 0x8d,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x65, 0x7e, 0xac, 0x17, 0x08, 0x17,
-  0x0c, 0x53, 0x65, 0x07, 0x7b, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00,
-  0x82, 0x60, 0x40, 0xa9, 0x5f, 0xec, 0xf1, 0x04, 0xe8, 0x8d, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0x01, 0xb5, 0x7e, 0xb1, 0x17, 0x08, 0x17, 0x0c, 0x73,
-  0xc1, 0x53, 0x77, 0x3c, 0x75, 0x2e, 0x37, 0xcc, 0xfd, 0xd8, 0x30, 0x47,
-  0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xc1,
-  0x5f, 0xef, 0xe9, 0xdd, 0xfa, 0x8d, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10,
-  0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x60, 0xf7, 0x47, 0x7e, 0x09, 0x11, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0x01, 0x86, 0x7f, 0xe5, 0x97, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0xf9, 0x67, 0x7e, 0x09, 0x11, 0x8c,
-  0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x02, 0x82, 0x41, 0xf9, 0x91, 0x5e,
-  0x20, 0x7f, 0xb5, 0x47, 0x7f, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x83, 0xb3,
-  0x04, 0xa0, 0x31, 0xdc, 0x00, 0x17, 0xf7, 0x07, 0x06, 0xb3, 0x0c, 0x6f,
-  0x01, 0x1a, 0x81, 0xd9, 0x1d, 0xde, 0xc5, 0x67, 0x38, 0xc2, 0x2e, 0xf2,
-  0x8e, 0xf8, 0x66, 0x19, 0xe0, 0x62, 0x2e, 0x02, 0xd3, 0xbb, 0xbb, 0x88,
-  0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4,
-  0x63, 0x45, 0x10, 0x9f, 0x22, 0x42, 0x30, 0xd0, 0xe1, 0x86, 0xe0, 0xff,
-  0xc0, 0x60, 0x96, 0x21, 0x2e, 0xe4, 0x22, 0xb0, 0x41, 0xf4, 0xe0, 0x33,
-  0x4b, 0x70, 0x17, 0x16, 0x7a, 0x44, 0x7c, 0x66, 0x09, 0xee, 0x62, 0x38,
-  0x22, 0x34, 0x44, 0x4f, 0xf8, 0x66, 0x19, 0xe8, 0xe2, 0x2e, 0x02, 0x13,
-  0x8d, 0xd1, 0x8b, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7,
-  0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x58, 0x30, 0xd0, 0xe1,
-  0x86, 0x40, 0x05, 0x03, 0x30, 0x98, 0x65, 0xa8, 0x0b, 0xbb, 0x08, 0x6c,
-  0xf5, 0x86, 0xf8, 0xcc, 0x12, 0xdc, 0x85, 0x11, 0xae, 0x07, 0x9f, 0x59,
-  0x82, 0xbb, 0x18, 0x68, 0x79, 0xb4, 0xb8, 0xc0, 0xe4, 0x82, 0xa8, 0x0b,
-  0xc1, 0x2e, 0x70, 0x66, 0x2e, 0x2e, 0x18, 0xc6, 0x5a, 0x2f, 0xf6, 0xe2,
-  0x33, 0x1c, 0xc1, 0x1a, 0xb2, 0x47, 0x7c, 0xb3, 0x0c, 0x78, 0xb1, 0x17,
-  0x81, 0xcd, 0x5e, 0x6b, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73,
-  0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x3a, 0x18,
-  0xe8, 0x70, 0x43, 0x80, 0x83, 0x01, 0x18, 0xcc, 0x32, 0xe4, 0x85, 0x5e,
-  0x04, 0x36, 0xec, 0x1e, 0x7c, 0x66, 0x09, 0xfe, 0xc2, 0x70, 0x8f, 0x88,
-  0xcf, 0x2c, 0xc1, 0x5f, 0x0c, 0x47, 0xdc, 0x46, 0xee, 0x09, 0xdf, 0x2c,
-  0x03, 0x5f, 0xfc, 0x45, 0x60, 0xb8, 0xa1, 0x7b, 0xf1, 0xb1, 0xc0, 0xa1,
-  0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2,
-  0x53, 0x44, 0x19, 0x06, 0x3a, 0xdc, 0x10, 0x8c, 0x61, 0x00, 0x06, 0xb3,
-  0x0c, 0x7d, 0xe1, 0x17, 0x81, 0x89, 0xdf, 0x10, 0x9f, 0x59, 0x82, 0xbf,
-  0x30, 0xe2, 0xfc, 0xe0, 0x33, 0x4b, 0xf0, 0x17, 0x03, 0x2d, 0x8f, 0x96,
-  0x17, 0x98, 0x5e, 0x10, 0x7d, 0x21, 0xf8, 0x05, 0xe9, 0xec, 0xc5, 0x05,
-  0xc3, 0x5c, 0xf0, 0xd4, 0x6d, 0x4f, 0xdd, 0xec, 0x0d, 0x73, 0xe4, 0x36,
-  0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
-  0x6c, 0x75, 0x18, 0x88, 0x61, 0xf0, 0x7f, 0x70, 0x18, 0x8c, 0x26, 0x04,
-  0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14,
-  0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0x7c, 0x18, 0xa4,
-  0x61, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0x7d,
-  0x18, 0xa8, 0x61, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x60, 0x7e, 0x18, 0xac, 0x61, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80,
-  0x20, 0x18, 0x2c, 0xa5, 0x18, 0xa8, 0x61, 0x90, 0x82, 0x41, 0x70, 0x87,
-  0x81, 0x0e, 0x06, 0x79, 0x18, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x0f, 0xce,
-  0x12, 0x80, 0xc6, 0x40, 0xcb, 0x63, 0x1a, 0x6b, 0x41, 0x87, 0x82, 0x5a,
-  0xb0, 0x44, 0x5b, 0x08, 0x7f, 0x41, 0x87, 0x82, 0x5b, 0x98, 0x7e, 0xe8,
-  0x60, 0x00, 0x9f, 0x59, 0x86, 0xd0, 0x18, 0x0d, 0xfb, 0x18, 0x8e, 0x08,
-  0x78, 0x30, 0x18, 0xbe, 0x13, 0x86, 0x19, 0x6e, 0x08, 0x4e, 0x30, 0x20,
-  0x83, 0x1a, 0x02, 0x1d, 0x8e, 0xe0, 0x0f, 0x30, 0x0c, 0x86, 0xaf, 0x02,
-  0x41, 0xcf, 0x3f, 0x86, 0x19, 0x6e, 0x08, 0x54, 0x30, 0x20, 0x83, 0x0a,
-  0x06, 0x9d, 0x65, 0x10, 0x8d, 0xdb, 0x08, 0xce, 0xfd, 0x86, 0xb9, 0x7f,
-  0x1b, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x36, 0x57, 0x0c, 0xf6,
-  0x30, 0xc0, 0xc1, 0x20, 0x15, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10,
-  0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31,
-  0x40, 0x00, 0x10, 0x04, 0x03, 0xac, 0x16, 0x03, 0x51, 0x0c, 0x0e, 0x22,
-  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xcc, 0x16, 0x83, 0x51, 0x0c,
-  0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xec, 0x16, 0x03,
-  0x52, 0x0c, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0xc5,
-  0x17, 0x83, 0x51, 0x0c, 0xc4, 0x30, 0x08, 0x60, 0x31, 0x98, 0xc3, 0x40,
-  0x16, 0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xc1, 0x59, 0x82, 0xdb, 0x18,
-  0x6e, 0x70, 0x11, 0x5b, 0x0c, 0xc0, 0x60, 0x96, 0x81, 0x34, 0x4a, 0x23,
-  0xa8, 0x1e, 0x0c, 0x4c, 0x31, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00,
-  0x10, 0x04, 0x03, 0x0a, 0x1c, 0x83, 0x53, 0x0c, 0x36, 0x3b, 0x0c, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x0a, 0xc7, 0xe0, 0x14, 0x83, 0x40,
-  0xb8, 0x60, 0x98, 0x02, 0xc3, 0x60, 0x15, 0x03, 0xb8, 0xe0, 0xa9, 0x11,
-  0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xca, 0x31, 0x60, 0xc5, 0xe0, 0x46,
-  0xf6, 0x30, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xca, 0x1c, 0x03,
-  0x56, 0x0c, 0x02, 0xe1, 0x82, 0x61, 0x2e, 0x78, 0xea, 0x8e, 0xa7, 0x2e,
-  0x05, 0x83, 0x61, 0x4e, 0xe7, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19,
-  0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x6d, 0x1d, 0x03, 0x5c, 0x0c, 0xea,
-  0x30, 0x30, 0xc7, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34,
-  0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00,
-  0x04, 0xc1, 0x00, 0x93, 0xc7, 0xe0, 0x17, 0x83, 0x84, 0x08, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0x00, 0x9b, 0xc7, 0x00, 0x1c, 0x83, 0x84, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0xa3, 0xc7, 0x20, 0x1c, 0x83,
-  0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0xd9, 0xc7, 0x00,
-  0x1c, 0x83, 0x3f, 0x0c, 0x82, 0x76, 0x0c, 0x60, 0x31, 0x78, 0xc7, 0x60,
-  0x34, 0x21, 0x00, 0x2e, 0x78, 0x70, 0x96, 0xe0, 0x36, 0x86, 0x1b, 0xd6,
-  0x44, 0x1e, 0x03, 0x30, 0x98, 0x65, 0x30, 0x8d, 0xdb, 0x08, 0x2c, 0x0e,
-  0x83, 0x39, 0x0c, 0xe2, 0x33, 0x1c, 0x71, 0x07, 0x74, 0x18, 0x10, 0xdf,
-  0x2c, 0xc3, 0x69, 0xa8, 0x46, 0x60, 0x75, 0x18, 0xe0, 0x41, 0x7c, 0x2c,
-  0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b,
-  0x82, 0xf8, 0x14, 0xc1, 0x8f, 0x81, 0x0e, 0x37, 0x04, 0xfa, 0x18, 0x80,
-  0xc1, 0x2c, 0x03, 0x6a, 0xa4, 0x46, 0x60, 0x43, 0x1f, 0x06, 0xf0, 0x99,
-  0x25, 0x70, 0x0d, 0xe3, 0xc3, 0x80, 0x88, 0xcf, 0x2c, 0x81, 0x6b, 0x0c,
-  0x47, 0x88, 0x42, 0x1f, 0x06, 0xc2, 0x37, 0xcb, 0xb0, 0x1a, 0xae, 0x11,
-  0xd8, 0x28, 0xf8, 0x61, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc,
-  0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x9c, 0x64,
-  0xa0, 0xc3, 0x0d, 0x41, 0x49, 0x06, 0x60, 0x30, 0xcb, 0xc0, 0x1a, 0xad,
-  0x11, 0x98, 0x29, 0x06, 0x43, 0x7c, 0x66, 0x09, 0x5c, 0xc3, 0x88, 0x54,
-  0x0c, 0xe0, 0x33, 0x4b, 0xe0, 0x1a, 0x03, 0x2d, 0x8f, 0x86, 0x1a, 0x58,
-  0x6a, 0x10, 0xac, 0x21, 0xb4, 0x86, 0x4e, 0xa8, 0xc6, 0x05, 0xc3, 0x18,
-  0x2a, 0x06, 0xac, 0x18, 0xc4, 0x67, 0x38, 0xe2, 0x54, 0x5a, 0x31, 0x20,
-  0xbe, 0x59, 0x86, 0xd7, 0x90, 0x8d, 0xc0, 0x5c, 0x31, 0x40, 0x95, 0xf8,
-  0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e,
-  0x56, 0x04, 0xf1, 0x29, 0xa2, 0x26, 0x03, 0x1d, 0x6e, 0x08, 0x66, 0x32,
-  0x00, 0x83, 0x59, 0x06, 0xd8, 0x88, 0x8d, 0xc0, 0x06, 0x5b, 0x0c, 0xe0,
-  0x33, 0x4b, 0x60, 0x1b, 0x36, 0x8b, 0x01, 0x11, 0x9f, 0x59, 0x02, 0xdb,
-  0x18, 0x8e, 0x90, 0x15, 0x5a, 0x0c, 0x84, 0x6f, 0x96, 0x61, 0x36, 0x6c,
-  0x23, 0xb0, 0x59, 0xa9, 0xc5, 0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60,
-  0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x00,
-  0xcb, 0x40, 0x87, 0x1b, 0x02, 0x9f, 0x0c, 0xc0, 0x60, 0x96, 0x81, 0x36,
-  0x6a, 0x23, 0xb0, 0x5e, 0x0c, 0x86, 0xf8, 0xcc, 0x12, 0xd8, 0x86, 0x11,
-  0xe2, 0x18, 0xc0, 0x67, 0x96, 0xc0, 0x36, 0x06, 0x5a, 0x1e, 0x0d, 0x36,
-  0xb0, 0xd8, 0x20, 0x68, 0x43, 0xa8, 0x0d, 0xbd, 0x92, 0x8d, 0x0b, 0x86,
-  0xb9, 0xe0, 0xa9, 0xdb, 0x9e, 0x3a, 0x57, 0x0c, 0x86, 0xb9, 0xdf, 0x1b,
-  0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0x36, 0xb8, 0x0c, 0x7a, 0x32, 0xd0, 0xc7, 0x60, 0x2d, 0x83, 0xd1, 0x84,
-  0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86,
-  0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xec, 0x2e, 0x03,
-  0xb2, 0x0c, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x0c,
-  0x2f, 0x83, 0xb2, 0x0c, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0x2c, 0x2f, 0x03, 0xb3, 0x0c, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00,
-  0x10, 0x04, 0x83, 0x05, 0x34, 0x83, 0xb2, 0x0c, 0x48, 0x32, 0x08, 0xe4,
-  0x32, 0xa8, 0xc9, 0x80, 0x2e, 0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xc1,
-  0x59, 0x82, 0xdb, 0x18, 0x68, 0x79, 0x4c, 0x43, 0x34, 0xd4, 0x54, 0x08,
-  0x0d, 0x96, 0x20, 0x0d, 0xc1, 0x36, 0xd4, 0x54, 0x28, 0x0d, 0xab, 0x97,
-  0x95, 0x0c, 0xe0, 0x33, 0xcb, 0x80, 0x1b, 0xba, 0x11, 0x2f, 0xc3, 0x11,
-  0xc1, 0x4d, 0x06, 0xc3, 0x77, 0xc2, 0x30, 0xc3, 0x0d, 0x81, 0x48, 0x06,
-  0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0xf7, 0xb2, 0x93, 0xc1, 0xf0, 0x55,
-  0x20, 0xe8, 0xe5, 0xcb, 0x30, 0xc3, 0x0d, 0x41, 0x49, 0x06, 0x64, 0x50,
-  0xc1, 0xa0, 0xb3, 0x0c, 0xb9, 0xe1, 0x1e, 0xc1, 0xa5, 0x63, 0x30, 0xcc,
-  0xe9, 0xdf, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xa5, 0x66,
-  0x60, 0x97, 0xc1, 0x4c, 0x06, 0xa4, 0x19, 0x8c, 0x26, 0x04, 0xc0, 0x68,
-  0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0xb0, 0x19, 0xf4, 0x65, 0x70,
-  0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0xb1, 0x19, 0xf8,
-  0x65, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0xb2,
-  0x19, 0xfc, 0x65, 0x20, 0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18,
-  0x2c, 0xb9, 0x19, 0xf8, 0x65, 0xd0, 0x93, 0x41, 0xb0, 0x9a, 0x81, 0x5b,
-  0x06, 0xad, 0x19, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x0f, 0xce, 0x12, 0xb8,
-  0xc7, 0x70, 0x43, 0xca, 0xc4, 0x66, 0x00, 0x06, 0xb3, 0x0c, 0xbb, 0xc1,
-  0x1b, 0x41, 0xe1, 0x64, 0x10, 0x9a, 0x01, 0x5c, 0xf0, 0xd4, 0x88, 0xc1,
-  0x01, 0x80, 0x20, 0x18, 0x50, 0xbb, 0x19, 0x88, 0x66, 0xb0, 0xc5, 0x65,
-  0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x14, 0x6f, 0x06, 0xa2, 0x19,
-  0x04, 0xc2, 0x05, 0xc3, 0xd4, 0x4e, 0x06, 0xa6, 0x19, 0xc0, 0x05, 0x4f,
-  0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x05, 0x9e, 0xc1, 0x69, 0x06,
-  0x32, 0x63, 0x97, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xe1,
-  0x19, 0x9c, 0x66, 0x10, 0x08, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x77, 0x3c,
-  0x75, 0x24, 0x19, 0x0c, 0x73, 0x35, 0x18, 0x0c, 0x73, 0xc4, 0x30, 0x47,
-  0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x9b, 0x79, 0x06, 0xb3,
-  0x19, 0xc0, 0x65, 0x10, 0x9e, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08,
-  0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0x01, 0xd6, 0x9e, 0x81, 0x6e, 0x06, 0x09, 0x11,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xe6, 0x9e, 0xc1, 0x6e, 0x06,
-  0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xf6, 0x9e, 0x01,
-  0x6f, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x62,
-  0x9f, 0xc1, 0x6e, 0x06, 0x7a, 0x19, 0x04, 0xe8, 0x19, 0xac, 0x66, 0xa0,
-  0x9e, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xe0, 0x2c, 0x81, 0x7b, 0x0c,
-  0x37, 0x98, 0x4d, 0x7b, 0x06, 0x60, 0x30, 0xcb, 0xd0, 0x1b, 0xee, 0x11,
-  0x18, 0x5b, 0x06, 0x6e, 0x19, 0xc4, 0x67, 0x38, 0xe2, 0x0e, 0xde, 0x32,
-  0x20, 0xbe, 0x59, 0x06, 0xdf, 0x08, 0x8f, 0xc0, 0xe0, 0x32, 0xc0, 0x83,
-  0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43,
-  0x3e, 0x56, 0x04, 0xf1, 0x29, 0xe2, 0x3e, 0x03, 0x1d, 0x6e, 0x08, 0xea,
-  0x33, 0x00, 0x83, 0x59, 0x86, 0xdf, 0x00, 0x8f, 0xc0, 0x06, 0xbc, 0x0c,
-  0xe0, 0x33, 0x4b, 0x50, 0x1e, 0x76, 0x97, 0x01, 0x11, 0x9f, 0x59, 0x82,
-  0xf2, 0x18, 0x8e, 0x10, 0x05, 0xbc, 0x0c, 0x84, 0x6f, 0x96, 0x41, 0x3c,
-  0xca, 0x23, 0xb0, 0x51, 0xc8, 0xcb, 0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9,
-  0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a,
-  0x10, 0xd1, 0x40, 0x87, 0x1b, 0x02, 0x10, 0x0d, 0xc0, 0x60, 0x96, 0x61,
-  0x3c, 0xc8, 0x23, 0xb0, 0xd0, 0x0c, 0x86, 0xf8, 0xcc, 0x12, 0x94, 0x87,
-  0x11, 0xa4, 0x19, 0xc0, 0x67, 0x96, 0xa0, 0x3c, 0x06, 0x5a, 0x1e, 0xed,
-  0x37, 0x30, 0xf0, 0x20, 0xc6, 0x43, 0x20, 0x0f, 0x9d, 0x08, 0x8f, 0x0b,
-  0x86, 0xb1, 0xd1, 0x0c, 0x4e, 0x33, 0x88, 0xcf, 0x70, 0x84, 0xe8, 0xa0,
-  0x66, 0x40, 0x7c, 0xb3, 0x0c, 0xe6, 0x91, 0x1e, 0x81, 0xa5, 0x66, 0x30,
-  0x3a, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05,
-  0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0x8c, 0x06, 0x3a, 0xdc, 0x10,
-  0xb8, 0x68, 0x00, 0x06, 0xb3, 0x0c, 0xe7, 0x81, 0x1e, 0x81, 0x0d, 0xb1,
-  0x19, 0xc0, 0x67, 0x96, 0xa0, 0x3d, 0xcc, 0x35, 0x03, 0x22, 0x3e, 0xb3,
-  0x04, 0xed, 0x31, 0x1c, 0xd1, 0x3a, 0xaf, 0x19, 0x08, 0xdf, 0x2c, 0x83,
-  0x7a, 0xb4, 0x47, 0x60, 0xae, 0x03, 0x9b, 0x41, 0x7c, 0x2c, 0x70, 0xe8,
-  0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8,
-  0x14, 0xb1, 0xa3, 0x81, 0x0e, 0x37, 0x04, 0x39, 0x1a, 0x80, 0xc1, 0x2c,
-  0xc3, 0x7a, 0xb0, 0x47, 0x60, 0xb8, 0x19, 0x0c, 0xf1, 0x99, 0x25, 0x68,
-  0x0f, 0x23, 0x7a, 0x33, 0x80, 0xcf, 0x2c, 0x41, 0x7b, 0x0c, 0xb4, 0x3c,
-  0xda, 0x79, 0x60, 0xe8, 0x41, 0xac, 0x87, 0xc0, 0x1e, 0x70, 0x97, 0x1e,
-  0x17, 0x0c, 0x73, 0xc1, 0x53, 0xb7, 0x3d, 0x75, 0xa9, 0x19, 0x0c, 0x73,
-  0xba, 0x18, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00,
-  0x20, 0x08, 0x06, 0xdb, 0x9a, 0x06, 0x38, 0x1a, 0xd4, 0x67, 0x60, 0xa6,
-  0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c,
-  0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01,
-  0x26, 0xa7, 0xc1, 0x8f, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0x01, 0x36, 0xa7, 0x01, 0x98, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0x01, 0x46, 0xa7, 0x41, 0x98, 0x06, 0x09, 0x11, 0x8c,
-  0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0xb2, 0xa7, 0x01, 0x98, 0x06, 0xff,
-  0x19, 0x04, 0x6d, 0x1a, 0xc0, 0x68, 0xf0, 0xa6, 0xc1, 0x68, 0x42, 0x00,
-  0x5c, 0xf0, 0xe0, 0x2c, 0x81, 0x7b, 0x0c, 0xb4, 0x3c, 0xa6, 0x91, 0x1b,
-  0x60, 0x2c, 0xe0, 0x06, 0x4b, 0xec, 0x86, 0xd0, 0x1e, 0x60, 0x2c, 0xf0,
-  0xc6, 0x2c, 0xc3, 0x7b, 0xc4, 0xc7, 0xfa, 0x0c, 0x47, 0xbc, 0x4f, 0x8c,
-  0x06, 0xc3, 0x77, 0xf0, 0x33, 0xcc, 0x70, 0x43, 0xc0, 0x9f, 0x01, 0x19,
-  0xd4, 0x10, 0xe8, 0x70, 0x84, 0xfc, 0xd4, 0x68, 0x30, 0x7c, 0x15, 0x08,
-  0x7a, 0xf4, 0x33, 0xcc, 0x70, 0x43, 0xf0, 0x9f, 0x01, 0x19, 0x54, 0x30,
-  0xe8, 0x2c, 0x03, 0x7c, 0x94, 0x48, 0x70, 0xe3, 0x19, 0x0c, 0x73, 0xf4,
-  0x18, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xdb, 0xa8, 0x06,
-  0x70, 0x1a, 0xb4, 0x68, 0xe0, 0xa7, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xa6, 0xaa, 0xc1, 0x9d, 0x06, 0x07,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xb6, 0xaa, 0x01, 0x9e,
-  0x06, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xc6, 0xaa,
-  0x41, 0x9e, 0x06, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1,
-  0x32, 0xab, 0x01, 0x9e, 0x06, 0x37, 0x1a, 0x04, 0xa5, 0x1a, 0xa0, 0x69,
-  0x70, 0xaa, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xe0, 0x2c, 0x41, 0x89,
-  0x0c, 0x37, 0x8c, 0xd0, 0xaa, 0x06, 0x60, 0x30, 0xcb, 0x20, 0x1f, 0xf3,
-  0x11, 0x94, 0x8c, 0x06, 0x7b, 0x1a, 0xc0, 0x05, 0x4f, 0x8d, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0x01, 0x55, 0xab, 0x01, 0x9f, 0x06, 0x29, 0xb4, 0xa6,
-  0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xb6, 0x1a, 0xf0, 0x69,
-  0x10, 0x08, 0x17, 0x0c, 0x53, 0x35, 0x1a, 0x80, 0x6a, 0x00, 0x17, 0x3c,
-  0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x94, 0xae, 0x06, 0xa1, 0x1a,
-  0xb4, 0x10, 0x9c, 0x06, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xed,
-  0x6a, 0x10, 0xaa, 0x41, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf1,
-  0xd4, 0xf9, 0x67, 0x30, 0xcc, 0xbd, 0x64, 0x30, 0xcc, 0x11, 0xc3, 0x1c,
-  0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x6c, 0xe0, 0x1a, 0xb4,
-  0x6a, 0xa0, 0xa6, 0xc1, 0xae, 0x06, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20,
-  0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0xd8, 0xb9, 0x06, 0xb4, 0x1a, 0x24, 0x44,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x18, 0xba, 0x06, 0xb5, 0x1a,
-  0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x58, 0xba, 0x06,
-  0xb6, 0x1a, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x0b,
-  0xbc, 0x06, 0xb5, 0x1a, 0xd0, 0x69, 0x10, 0x88, 0x6b, 0x50, 0xaa, 0x01,
-  0xb9, 0x06, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x83, 0xb3, 0x04, 0x25, 0x32,
-  0xdc, 0x00, 0x46, 0xe7, 0x1a, 0x80, 0xc1, 0x2c, 0x03, 0x7d, 0x94, 0x48,
-  0x60, 0x66, 0x1a, 0xa0, 0x69, 0x10, 0x9f, 0xe1, 0x88, 0x32, 0x4a, 0xd3,
-  0x80, 0xf8, 0x66, 0x19, 0xea, 0x03, 0x3f, 0x02, 0x53, 0xd3, 0xc0, 0x8c,
-  0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x0c,
-  0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x78, 0x0d, 0x74, 0xb8, 0x21, 0x78,
-  0xd7, 0x00, 0x0c, 0x66, 0x19, 0xec, 0xe3, 0x3e, 0x02, 0x1b, 0xe4, 0x34,
-  0x80, 0xcf, 0x2c, 0x01, 0x7f, 0x58, 0x9c, 0x06, 0x44, 0x7c, 0x66, 0x09,
-  0xf8, 0x63, 0x38, 0x02, 0x8e, 0xe4, 0x34, 0x10, 0xbe, 0x59, 0x86, 0xfc,
-  0xe0, 0x8f, 0xc0, 0xe2, 0x68, 0x4e, 0x83, 0xf8, 0x58, 0xe0, 0xd0, 0xe7,
-  0x82, 0x61, 0x2e, 0x78, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29,
-  0x82, 0x5f, 0x03, 0x1d, 0x6e, 0x08, 0xf4, 0x35, 0x00, 0x83, 0x59, 0x06,
-  0xfd, 0xd8, 0x8f, 0xc0, 0xf6, 0x34, 0x18, 0xe2, 0x33, 0x4b, 0xc0, 0x1f,
-  0x46, 0xf8, 0x69, 0x00, 0x9f, 0x59, 0x02, 0xfe, 0x18, 0x68, 0x79, 0x34,
-  0xfb, 0xc0, 0xee, 0x83, 0xd0, 0x0f, 0x61, 0x3f, 0xec, 0x31, 0xc0, 0x8f,
-  0x0b, 0x86, 0xb1, 0x3e, 0x0d, 0x42, 0x35, 0x88, 0xcf, 0x70, 0x44, 0x1f,
-  0x89, 0x6a, 0x40, 0x7c, 0xb3, 0x0c, 0xfd, 0x01, 0x22, 0x81, 0x8d, 0x6a,
-  0xe0, 0x47, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94,
-  0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84, 0xca, 0x06, 0x3a, 0xdc,
-  0x10, 0xa0, 0x6c, 0x00, 0x06, 0xb3, 0x0c, 0xfe, 0xf1, 0x1f, 0x81, 0x0d,
-  0xab, 0x1a, 0xc0, 0x67, 0x96, 0x80, 0x44, 0x0c, 0x55, 0x03, 0x22, 0x3e,
-  0xb3, 0x04, 0x24, 0x32, 0x1c, 0x81, 0x4a, 0xa9, 0x1a, 0x08, 0xdf, 0x2c,
-  0x43, 0x88, 0x90, 0x48, 0x60, 0xa9, 0xa4, 0xaa, 0x41, 0x7c, 0x2c, 0x70,
-  0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82,
-  0xf8, 0x14, 0x51, 0xb3, 0x81, 0x0e, 0x37, 0x04, 0x33, 0x1b, 0x80, 0xc1,
-  0x2c, 0x83, 0x88, 0x8c, 0x48, 0x60, 0xb2, 0x1a, 0x0c, 0xf1, 0x99, 0x25,
-  0x20, 0x11, 0x23, 0x6e, 0x35, 0x80, 0xcf, 0x2c, 0x01, 0x89, 0x0c, 0xb4,
-  0x3c, 0x9a, 0x7f, 0x60, 0xff, 0x41, 0x88, 0x88, 0x30, 0x22, 0x66, 0x19,
-  0x80, 0xc8, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x6d, 0x4f, 0xdd, 0xa8, 0x06,
-  0xc3, 0x1c, 0x6d, 0x06, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18,
-  0x1c, 0x00, 0x08, 0x82, 0xc1, 0x56, 0xb6, 0x81, 0xcc, 0x06, 0xef, 0x1a,
-  0x80, 0x6d, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30,
-  0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0x80, 0xb1, 0x6d, 0x90, 0xb3, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0x80, 0xb5, 0x6d, 0xa0, 0xb3, 0x41, 0x42, 0x04, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xb9, 0x6d, 0xb0, 0xb3, 0x41, 0x42,
-  0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xd4, 0x6d, 0xa0, 0xb3,
-  0x41, 0xbe, 0x06, 0xc1, 0xd9, 0x06, 0x2a, 0x1b, 0xa4, 0x6d, 0x30, 0x9a,
-  0x10, 0x00, 0x17, 0x3c, 0x38, 0x4b, 0x50, 0x22, 0x03, 0x2d, 0x8f, 0x69,
-  0xc0, 0x07, 0x8d, 0x0b, 0xef, 0xc1, 0x12, 0xf2, 0x21, 0x90, 0x08, 0x8d,
-  0x0b, 0xf3, 0x31, 0xcb, 0x60, 0x22, 0x28, 0x52, 0x4e, 0xc3, 0x11, 0xea,
-  0xb4, 0xb2, 0xc1, 0xf0, 0xdd, 0x3a, 0x0d, 0x33, 0xdc, 0x10, 0xd8, 0x6b,
-  0x40, 0x06, 0x35, 0x04, 0x3a, 0x1c, 0xc1, 0x4e, 0x2f, 0x1b, 0x0c, 0x5f,
-  0x05, 0x82, 0x9e, 0x3b, 0x0d, 0x33, 0xdc, 0x10, 0xe4, 0x6b, 0x40, 0x06,
-  0x15, 0x0c, 0x3a, 0xcb, 0x70, 0x22, 0x3c, 0x12, 0x5c, 0xaf, 0x06, 0xc3,
-  0x9c, 0x7b, 0x06, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0xd6,
-  0xb7, 0x81, 0xda, 0x06, 0x27, 0x1b, 0xe0, 0x6d, 0x30, 0x9a, 0x10, 0x00,
-  0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0xc4,
-  0x21, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0x91, 0x6e, 0x10, 0xb7,
-  0xc1, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0x95, 0x6e,
-  0x20, 0xb7, 0x01, 0x43, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80,
-  0x99, 0x6e, 0x30, 0xb7, 0x81, 0x44, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82,
-  0x60, 0xb0, 0xb4, 0x6e, 0x20, 0xb7, 0x41, 0xcc, 0x06, 0xc1, 0xdf, 0x06,
-  0x62, 0x1b, 0x84, 0x6e, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x38, 0x4b,
-  0xc0, 0x23, 0xc3, 0x0d, 0xfd, 0x54, 0xba, 0x01, 0x18, 0xcc, 0x32, 0xa4,
-  0x88, 0x8a, 0x04, 0xc5, 0xb2, 0x41, 0xdd, 0x06, 0x70, 0xc1, 0x53, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xbd, 0x6e, 0x60, 0xb7, 0x01, 0x49,
-  0x95, 0x6d, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x14, 0xec, 0x06,
-  0x76, 0x1b, 0x04, 0xc2, 0x05, 0xc3, 0xd4, 0xcb, 0x06, 0x7a, 0x1b, 0xc0,
-  0x05, 0x4f, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x45, 0xbb, 0xc1,
-  0xde, 0x06, 0x27, 0xa5, 0xb6, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
-  0x50, 0xb5, 0x1b, 0xec, 0x6d, 0x10, 0x08, 0x17, 0x0c, 0x73, 0xc1, 0x53,
-  0x77, 0x3c, 0x75, 0xf8, 0x1a, 0x0c, 0x73, 0x29, 0x1a, 0x0c, 0x73, 0xc4,
-  0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x9b, 0xee,
-  0x06, 0xa7, 0x1b, 0x90, 0x6d, 0x50, 0xbb, 0xc1, 0x68, 0x42, 0x00, 0x8c,
-  0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x16, 0xbe, 0x81, 0xeb, 0x06,
-  0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x26, 0xbe, 0xc1,
-  0xeb, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x36,
-  0xbe, 0x01, 0xec, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82,
-  0xc1, 0xa2, 0xbe, 0xc1, 0xeb, 0x06, 0x6e, 0x1b, 0x04, 0xbc, 0x1b, 0xfc,
-  0x6d, 0xe0, 0xbb, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xe0, 0x2c, 0x01,
-  0x8f, 0x0c, 0x37, 0xe8, 0x54, 0xf8, 0x06, 0x60, 0x30, 0xcb, 0xb0, 0x22,
-  0x3c, 0x12, 0x18, 0xd8, 0x06, 0x62, 0x1b, 0xc4, 0x67, 0x38, 0x02, 0xac,
-  0xc6, 0x36, 0x20, 0xbe, 0x59, 0x06, 0x16, 0x79, 0x91, 0xc0, 0xc8, 0x36,
-  0x08, 0xab, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca,
-  0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x62, 0x7d, 0x03, 0x1d, 0x6e,
-  0x08, 0xd2, 0x37, 0x00, 0x83, 0x59, 0x86, 0x16, 0x71, 0x91, 0xc0, 0x06,
-  0xb6, 0x0d, 0xe0, 0x33, 0x4b, 0x30, 0x23, 0xb6, 0xb6, 0x01, 0x11, 0x9f,
-  0x59, 0x82, 0x19, 0x19, 0x8e, 0x58, 0x2b, 0xb6, 0x0d, 0x84, 0x6f, 0x96,
-  0x01, 0x46, 0x66, 0x24, 0x30, 0xb6, 0x6a, 0xdb, 0x20, 0x3e, 0x16, 0x38,
-  0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41,
-  0x7c, 0x8a, 0xb0, 0xdf, 0x40, 0x87, 0x1b, 0x02, 0xfa, 0x0d, 0xc0, 0x60,
-  0x96, 0x21, 0x46, 0x64, 0x24, 0xb0, 0xba, 0x0d, 0x86, 0xf8, 0xcc, 0x12,
-  0xcc, 0x88, 0x11, 0x78, 0x1b, 0xc0, 0x67, 0x96, 0x60, 0x46, 0x06, 0x5a,
-  0x1e, 0xad, 0x45, 0x30, 0x17, 0x21, 0x62, 0x44, 0x90, 0x11, 0x76, 0x0d,
-  0x5e, 0xe4, 0x82, 0x61, 0xec, 0x6e, 0x83, 0xbd, 0x0d, 0xe2, 0x33, 0x1c,
-  0x71, 0x57, 0x7c, 0x1b, 0x10, 0xdf, 0x2c, 0x03, 0x8d, 0xdc, 0x48, 0x60,
-  0x7d, 0x1b, 0xe0, 0x55, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17,
-  0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x41, 0xc2, 0x81,
-  0x0e, 0x37, 0x04, 0x22, 0x1c, 0x80, 0xc1, 0x2c, 0x43, 0x8d, 0xd8, 0x48,
-  0x60, 0x43, 0xe9, 0x06, 0xf0, 0x99, 0x25, 0xd8, 0x11, 0x13, 0xdd, 0x80,
-  0x88, 0xcf, 0x2c, 0xc1, 0x8e, 0x0c, 0x47, 0x88, 0xd6, 0xe8, 0x06, 0xc2,
-  0x37, 0xcb, 0x80, 0x23, 0x3b, 0x12, 0xd8, 0x68, 0x91, 0x6e, 0x10, 0x1f,
-  0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7,
-  0x8a, 0x20, 0x3e, 0x45, 0xbc, 0x70, 0xa0, 0xc3, 0x0d, 0x41, 0x0b, 0x07,
-  0x60, 0x30, 0xcb, 0x90, 0x23, 0x3a, 0x12, 0x18, 0xeb, 0x06, 0x43, 0x7c,
-  0x66, 0x09, 0x76, 0xc4, 0x88, 0xd8, 0x0d, 0xe0, 0x33, 0x4b, 0xb0, 0x23,
-  0x03, 0x2d, 0x8f, 0x56, 0x23, 0x98, 0x8d, 0x10, 0x39, 0x22, 0xe8, 0x88,
-  0xce, 0x06, 0x37, 0x72, 0xc1, 0x30, 0x17, 0x3c, 0x75, 0xdb, 0x53, 0xd7,
-  0xb7, 0xc1, 0x30, 0xe7, 0xaa, 0xc1, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30,
-  0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xfd, 0x70, 0xc0, 0xc2, 0x41,
-  0xfa, 0x06, 0x3a, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c,
-  0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x60, 0x66, 0x1c, 0xcc, 0x70, 0x90, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0x67, 0x1c, 0xd0, 0x70, 0x90, 0x10,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0x68, 0x1c, 0xd4, 0x70,
-  0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x6f, 0x1c,
-  0xd0, 0x70, 0x30, 0xbf, 0x41, 0x10, 0xc6, 0x01, 0x09, 0x07, 0x63, 0x1c,
-  0x8c, 0x26, 0x04, 0xc0, 0x05, 0x0f, 0xce, 0x12, 0xf0, 0xc8, 0x40, 0xcb,
-  0x63, 0x1a, 0x27, 0x82, 0x82, 0x83, 0x89, 0xb0, 0x44, 0x8a, 0x08, 0x3b,
-  0x82, 0x82, 0x83, 0x8a, 0xcc, 0x32, 0xf4, 0xc8, 0x8f, 0xfc, 0xd6, 0x70,
-  0x84, 0xfa, 0x94, 0x70, 0x30, 0x7c, 0xb7, 0x3e, 0xc3, 0x0c, 0x37, 0x04,
-  0xf0, 0x1b, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0x98, 0x57, 0x0a, 0x07,
-  0xc3, 0x57, 0x81, 0xa0, 0x87, 0x5e, 0xc3, 0x0c, 0x37, 0x04, 0xf3, 0x1b,
-  0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xf8, 0xc8, 0x9c, 0x04, 0x77, 0xbb,
-  0xc1, 0x30, 0x87, 0xae, 0xc1, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0xb0, 0xdd, 0x71, 0x40, 0xc6, 0x41, 0x08, 0x07, 0x72, 0x1c, 0x8c, 0x26,
-  0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31,
-  0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0x7e, 0x1c,
-  0xac, 0x71, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60,
-  0x7f, 0x1c, 0xb0, 0x71, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x60, 0xa0, 0x1c, 0xb4, 0x71, 0x20, 0x11, 0xc1, 0x88, 0x81, 0x02,
-  0x80, 0x20, 0x18, 0x2c, 0xa7, 0x1c, 0xb0, 0x71, 0xb0, 0xc2, 0x41, 0x90,
-  0xc7, 0x01, 0x0f, 0x07, 0x7b, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x0f,
-  0xce, 0x12, 0xcc, 0xc9, 0x70, 0xc3, 0x7d, 0xfd, 0x71, 0x00, 0x06, 0xb3,
-  0x0c, 0x60, 0x12, 0x26, 0x41, 0x99, 0x70, 0xf0, 0xc6, 0x01, 0x5c, 0xf0,
-  0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xa9, 0x1c, 0xc0, 0x71,
-  0x40, 0x42, 0x3f, 0x1c, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0xa5,
-  0xca, 0x01, 0x1c, 0x07, 0x81, 0x70, 0xc1, 0x30, 0x95, 0xc2, 0x01, 0x1d,
-  0x07, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xb9,
-  0x72, 0x50, 0xc7, 0x41, 0x88, 0x91, 0x71, 0x30, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0xd4, 0x2b, 0x07, 0x75, 0x1c, 0x04, 0xc2, 0x05, 0xc3, 0x5c,
-  0xf0, 0xd4, 0x1d, 0x4f, 0x9d, 0xfc, 0x06, 0xc3, 0xdc, 0xc8, 0x06, 0xc3,
-  0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1,
-  0x46, 0xcb, 0x41, 0x28, 0x07, 0x3e, 0x1c, 0xbc, 0x72, 0x30, 0x9a, 0x10,
-  0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50,
-  0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xed, 0x72, 0x80,
-  0xca, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xf1,
-  0x72, 0x90, 0xca, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0x80, 0xf5, 0x72, 0xa0, 0xca, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00,
-  0x82, 0x60, 0xb0, 0x90, 0x73, 0x90, 0xca, 0x01, 0x1a, 0x07, 0x81, 0x2d,
-  0x07, 0x79, 0x1c, 0xe0, 0x72, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x38,
-  0x4b, 0x30, 0x27, 0xc3, 0x0d, 0x34, 0xb6, 0xcb, 0x01, 0x18, 0xcc, 0x32,
-  0x88, 0xc9, 0x9c, 0x04, 0xa6, 0xc3, 0x01, 0x0f, 0x07, 0xf1, 0x19, 0x8e,
-  0x00, 0xa3, 0x1e, 0x0e, 0x88, 0x6f, 0x96, 0x61, 0x4c, 0xcc, 0x24, 0x30,
-  0x1f, 0x0e, 0xc2, 0x28, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b,
-  0x9e, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x28, 0xe7, 0x40,
-  0x87, 0x1b, 0x82, 0x71, 0x0e, 0xc0, 0x60, 0x96, 0x81, 0x4c, 0xca, 0x24,
-  0xb0, 0xc1, 0x8c, 0x03, 0xf8, 0xcc, 0x12, 0xa8, 0x89, 0x95, 0x71, 0x40,
-  0xc4, 0x67, 0x96, 0x40, 0x4d, 0x86, 0x23, 0xd6, 0xc8, 0x8c, 0x03, 0xe1,
-  0x9b, 0x65, 0x38, 0x13, 0x35, 0x09, 0x8c, 0x8d, 0xce, 0x38, 0x88, 0x8f,
-  0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63,
-  0x45, 0x10, 0x9f, 0x22, 0xe0, 0x39, 0xd0, 0xe1, 0x86, 0xc0, 0x9d, 0x03,
-  0x30, 0x98, 0x65, 0x40, 0x93, 0x34, 0x09, 0xec, 0x8d, 0x83, 0x21, 0x3e,
-  0xb3, 0x04, 0x6a, 0x62, 0x84, 0x1c, 0x07, 0xf0, 0x99, 0x25, 0x50, 0x93,
-  0x81, 0x96, 0x47, 0x23, 0x13, 0xac, 0x4c, 0x08, 0x34, 0x11, 0xd2, 0x84,
-  0x1d, 0x03, 0x33, 0xb9, 0x60, 0x18, 0x8b, 0xe3, 0xa0, 0x8e, 0x83, 0xf8,
-  0x0c, 0x47, 0xc4, 0x99, 0x1d, 0x07, 0xc4, 0x37, 0xcb, 0xb0, 0x26, 0x6e,
-  0x12, 0xd8, 0x1d, 0x07, 0x72, 0x16, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30,
-  0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xf8,
-  0x73, 0xa0, 0xc3, 0x0d, 0x01, 0x3f, 0x07, 0x60, 0x30, 0xcb, 0xc0, 0x26,
-  0x6d, 0x12, 0xd8, 0xf0, 0xc7, 0x01, 0x7c, 0x66, 0x09, 0xe4, 0xc4, 0xf8,
-  0x38, 0x20, 0xe2, 0x33, 0x4b, 0x20, 0x27, 0xc3, 0x11, 0x7c, 0xd6, 0xc7,
-  0x81, 0xf0, 0xcd, 0x32, 0xbc, 0x89, 0x9c, 0x04, 0xd6, 0x67, 0x7e, 0x1c,
-  0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x44,
-  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x29, 0x1d, 0xe8, 0x70, 0x43, 0x70,
-  0xd2, 0x01, 0x18, 0xcc, 0x32, 0xc0, 0x49, 0x9c, 0x04, 0x66, 0xca, 0xc1,
-  0x10, 0x9f, 0x59, 0x02, 0x39, 0x31, 0x62, 0x95, 0x03, 0xf8, 0xcc, 0x12,
-  0xc8, 0xc9, 0x40, 0xcb, 0xa3, 0xb1, 0x09, 0xd6, 0x26, 0x04, 0x9c, 0x08,
-  0x71, 0xe2, 0xd2, 0x81, 0x9b, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf6,
-  0xd4, 0xdd, 0x71, 0x30, 0xcc, 0xa1, 0x6e, 0x30, 0xcc, 0x11, 0xc3, 0x1c,
-  0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x6c, 0x39, 0x1d, 0x98,
-  0x74, 0x30, 0xce, 0x01, 0x4d, 0x07, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20,
-  0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x18, 0x58, 0x07, 0x2d, 0x1d, 0x24, 0x44,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x58, 0x58, 0x07, 0x2e, 0x1d,
-  0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x98, 0x58, 0x07,
-  0x2f, 0x1d, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x4b,
-  0x5a, 0x07, 0x2e, 0x1d, 0xb4, 0x73, 0x10, 0xec, 0x74, 0xe0, 0xcf, 0x41,
-  0x4f, 0x07, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x83, 0xb3, 0x04, 0x73, 0x32,
-  0xd0, 0xf2, 0x98, 0x86, 0x8f, 0xf0, 0xe7, 0xd0, 0x23, 0x2c, 0x01, 0x26,
-  0x82, 0x9c, 0xf0, 0xe7, 0x10, 0x26, 0xb3, 0x0c, 0x74, 0x62, 0x27, 0xb9,
-  0x36, 0x1c, 0x91, 0x3e, 0xff, 0x1c, 0x0c, 0xdf, 0xa9, 0xcf, 0x30, 0xc3,
-  0x0d, 0x81, 0x3a, 0x07, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0xe0, 0x36,
-  0xd2, 0xc1, 0xf0, 0x55, 0x20, 0xe8, 0x89, 0xdb, 0x30, 0xc3, 0x0d, 0x41,
-  0x3b, 0x07, 0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c, 0x75, 0xa2, 0x2a, 0xc1,
-  0xc5, 0x72, 0x30, 0xcc, 0x89, 0x6f, 0x30, 0xcc, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x6c, 0x71, 0x1d, 0xf8, 0x74, 0xb0, 0xcf, 0x01, 0x5b, 0x07,
-  0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a,
-  0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x18,
-  0x5e, 0x07, 0x65, 0x1d, 0x1c, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x58, 0x5e, 0x07, 0x66, 0x1d, 0x30, 0x44, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x98, 0x5e, 0x07, 0x67, 0x1d, 0x48, 0x44, 0x30, 0x62,
-  0xa0, 0x00, 0x20, 0x08, 0x06, 0x4b, 0x68, 0x07, 0x66, 0x1d, 0x94, 0x74,
-  0x10, 0xcc, 0x75, 0x60, 0xd3, 0x41, 0x5d, 0x07, 0xa3, 0x09, 0x01, 0x70,
-  0xc1, 0x83, 0xb3, 0x04, 0xaa, 0x32, 0xdc, 0x10, 0x6f, 0x79, 0x1d, 0x80,
-  0xc1, 0x2c, 0xc3, 0x9d, 0xe0, 0x49, 0x50, 0x20, 0x1d, 0xa4, 0x75, 0x00,
-  0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xd4, 0x68, 0x07,
-  0x6a, 0x1d, 0x8c, 0x50, 0x4e, 0x07, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0x40, 0x91, 0x76, 0xa0, 0xd6, 0x41, 0x20, 0x5c, 0x30, 0x4c, 0x8d, 0x74,
-  0xe0, 0xd6, 0x01, 0x5c, 0xf0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
-  0x50, 0xa8, 0x1d, 0xbc, 0x75, 0xb0, 0x6f, 0x3e, 0x1d, 0x8c, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0x01, 0x95, 0xda, 0xc1, 0x5b, 0x07, 0x81, 0x70, 0xc1,
-  0x30, 0x17, 0x3c, 0x75, 0xc7, 0x53, 0xc7, 0xce, 0xc1, 0x30, 0xd7, 0xbf,
-  0xc1, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82,
-  0x60, 0xb0, 0xb9, 0x76, 0xb0, 0xd7, 0x01, 0x4e, 0x07, 0xa9, 0x1d, 0x8c,
-  0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02,
-  0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0xb5,
-  0x1d, 0x88, 0x76, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x60, 0xb6, 0x1d, 0x8c, 0x76, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x60, 0xb7, 0x1d, 0x90, 0x76, 0x90, 0x10, 0xc1, 0x88, 0x81,
-  0x02, 0x80, 0x20, 0x18, 0x2c, 0xbe, 0x1d, 0x8c, 0x76, 0x20, 0xd6, 0x41,
-  0x00, 0xdb, 0xc1, 0x5c, 0x07, 0xb2, 0x1d, 0x8c, 0x26, 0x04, 0xc0, 0x05,
-  0x0f, 0xce, 0x12, 0xa8, 0xca, 0x70, 0x83, 0xcb, 0xd5, 0x76, 0x00, 0x06,
-  0xb3, 0x0c, 0x79, 0xa2, 0x2a, 0x81, 0xd1, 0x74, 0x60, 0xd3, 0x41, 0x7c,
-  0x86, 0x23, 0x7e, 0xe8, 0xa6, 0x03, 0xe2, 0x9b, 0x65, 0xd0, 0x93, 0x3e,
-  0x09, 0x0c, 0xa7, 0x03, 0x30, 0x8a, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18,
-  0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x7e,
-  0x3b, 0xd0, 0xe1, 0x86, 0xa0, 0xb7, 0x03, 0x30, 0x98, 0x65, 0xd8, 0x13,
-  0x3e, 0x09, 0x6c, 0x00, 0xeb, 0x00, 0x3e, 0xb3, 0x04, 0xa1, 0x62, 0x3f,
-  0x1d, 0x10, 0xf1, 0x99, 0x25, 0x08, 0x95, 0xe1, 0x08, 0x35, 0x02, 0xeb,
-  0x40, 0xf8, 0x66, 0x19, 0xfc, 0x24, 0x54, 0x02, 0x5b, 0xa3, 0xb0, 0x0e,
-  0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22,
-  0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0xf5, 0x0e, 0x74, 0xb8, 0x21, 0x40,
-  0xef, 0x00, 0x0c, 0x66, 0x19, 0xfe, 0x04, 0x54, 0x02, 0x4b, 0xeb, 0x60,
-  0x88, 0xcf, 0x2c, 0x41, 0xa8, 0x18, 0xc1, 0xd6, 0x01, 0x7c, 0x66, 0x09,
-  0x42, 0x65, 0xa0, 0xe5, 0xd1, 0xf6, 0x04, 0xe3, 0x13, 0xe2, 0x4f, 0x04,
-  0x50, 0x51, 0xc7, 0xa0, 0x4f, 0x2e, 0x18, 0xc6, 0xd6, 0x3a, 0x78, 0xeb,
-  0x20, 0x3e, 0xc3, 0x11, 0x6b, 0x07, 0xd7, 0x01, 0xf1, 0xcd, 0x32, 0x88,
-  0x4a, 0xa9, 0x04, 0x16, 0xd7, 0x01, 0xdb, 0xc5, 0xc7, 0x82, 0x81, 0x3e,
-  0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f,
-  0x11, 0xf8, 0x1d, 0xe8, 0x70, 0x43, 0x60, 0xdf, 0x01, 0x18, 0xcc, 0x32,
-  0x8c, 0x0a, 0xa9, 0x04, 0x36, 0xe4, 0x75, 0x00, 0x9f, 0x59, 0x82, 0x54,
-  0x31, 0xbb, 0x0e, 0x88, 0xf8, 0xcc, 0x12, 0xa4, 0xca, 0x70, 0x84, 0xdd,
-  0xdd, 0x75, 0x20, 0x7c, 0xb3, 0x0c, 0xa6, 0x92, 0x2a, 0x81, 0xdd, 0x1d,
-  0x5e, 0x07, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94,
-  0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4, 0x88, 0x07, 0x3a, 0xdc,
-  0x10, 0x84, 0x78, 0x00, 0x06, 0xb3, 0x0c, 0xa7, 0x82, 0x2a, 0x81, 0x81,
-  0x76, 0x30, 0xc4, 0x67, 0x96, 0x20, 0x55, 0x8c, 0x28, 0xed, 0x00, 0x3e,
-  0xb3, 0x04, 0xa9, 0x32, 0xd0, 0xf2, 0x68, 0xa3, 0x82, 0x91, 0x0a, 0x71,
-  0x2a, 0x02, 0xaa, 0x88, 0x7c, 0x50, 0x2a, 0x17, 0x0c, 0x73, 0xc1, 0x53,
-  0xb7, 0x3d, 0x75, 0x71, 0x1d, 0x0c, 0x73, 0xa2, 0x1c, 0x0c, 0x73, 0xc4,
-  0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xdb, 0x8c,
-  0x07, 0x20, 0x1e, 0xf4, 0x76, 0xe0, 0xe2, 0xc1, 0x68, 0x42, 0x00, 0x8c,
-  0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xa6, 0xe3, 0xc1, 0x89, 0x07,
-  0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xb6, 0xe3, 0x01,
-  0x8a, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xc6,
-  0xe3, 0x41, 0x8a, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82,
-  0xc1, 0x32, 0xe6, 0x01, 0x8a, 0x07, 0xe7, 0x1d, 0x04, 0x35, 0x1e, 0xe0,
-  0x77, 0x70, 0xe3, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xe0, 0x2c, 0x81,
-  0xaa, 0x0c, 0xb4, 0x3c, 0xa6, 0x51, 0x27, 0xf0, 0x3b, 0xd0, 0x09, 0x4b,
-  0xdc, 0x89, 0x90, 0x2a, 0xf0, 0x3b, 0xe0, 0x89, 0xd9, 0x1e, 0x7e, 0x07,
-  0xf0, 0x99, 0x65, 0x58, 0x95, 0x56, 0xa1, 0xbd, 0xe1, 0x08, 0xdc, 0xd3,
-  0xef, 0x60, 0xf8, 0x2e, 0xf7, 0x86, 0x19, 0x6e, 0x08, 0xca, 0x3b, 0x20,
-  0x83, 0x1a, 0x02, 0x1d, 0x8e, 0x28, 0xfc, 0x3b, 0x18, 0xbe, 0x0a, 0x04,
-  0xbd, 0x63, 0x98, 0xe1, 0x86, 0x00, 0xbd, 0x03, 0x32, 0xa8, 0x60, 0xd0,
-  0x59, 0x06, 0x56, 0x09, 0x97, 0xe0, 0x58, 0x3b, 0x18, 0xe6, 0x7a, 0x39,
-  0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x36, 0x36, 0x0f, 0x72,
-  0x3c, 0xb0, 0xef, 0xe0, 0xcc, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10,
-  0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31,
-  0x40, 0x00, 0x10, 0x04, 0x03, 0x6c, 0xce, 0x03, 0x30, 0x0f, 0x0e, 0x22,
-  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x8c, 0xce, 0x83, 0x30, 0x0f,
-  0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xac, 0xce, 0x03,
-  0x31, 0x0f, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x85,
-  0xcf, 0x83, 0x30, 0x0f, 0x40, 0x3c, 0x08, 0xdc, 0x3c, 0x88, 0xf1, 0x00,
-  0xce, 0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xc1, 0x59, 0x82, 0x70, 0x19,
-  0x6e, 0x60, 0x3f, 0x3a, 0x0f, 0xc0, 0x60, 0x96, 0xc1, 0x55, 0x5e, 0x25,
-  0xa8, 0xfd, 0x0e, 0xc8, 0x3c, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00,
-  0x10, 0x04, 0x03, 0xca, 0xcf, 0x83, 0x32, 0x0f, 0xe4, 0x8f, 0xc6, 0x83,
-  0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xfe, 0x3c, 0x28, 0xf3, 0x20,
-  0x10, 0x2e, 0x18, 0xa6, 0xfc, 0x3b, 0x48, 0xf3, 0x00, 0x2e, 0x78, 0x6a,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0x51, 0x0f, 0xd4, 0x3c, 0x00,
-  0x83, 0x1c, 0x0f, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x22, 0xf5,
-  0x40, 0xcd, 0x83, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x9e, 0xba, 0xe3, 0xa9,
-  0x3b, 0xef, 0x60, 0x98, 0xc3, 0xe7, 0x60, 0x98, 0x23, 0x86, 0x39, 0x62,
-  0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xd8, 0x52, 0x3d, 0xb0, 0xf3,
-  0x60, 0xc6, 0x03, 0x52, 0x0f, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
-  0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0x30, 0x58, 0x0f, 0xfa, 0x3c, 0x48, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb0, 0x58, 0x0f, 0xfc, 0x3c, 0x48,
-  0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x30, 0x59, 0x0f, 0xfe,
-  0x3c, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x96, 0x5c,
-  0x0f, 0xfc, 0x3c, 0xe8, 0xf1, 0x20, 0x58, 0xf5, 0xc0, 0xcd, 0x83, 0x56,
-  0x0f, 0x46, 0x13, 0x02, 0xe0, 0x82, 0x07, 0x67, 0x09, 0xc2, 0x65, 0xb8,
-  0x21, 0x05, 0x03, 0x58, 0x0f, 0xc0, 0x60, 0x96, 0x01, 0x56, 0xc2, 0x25,
-  0xb0, 0x17, 0x0f, 0x62, 0x3c, 0x88, 0xcf, 0x70, 0x84, 0x0b, 0x06, 0x32,
-  0x1e, 0x10, 0xdf, 0x2c, 0x43, 0xac, 0xd0, 0x4a, 0x60, 0x33, 0x1e, 0xbc,
-  0x60, 0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59,
-  0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xe8, 0x7a, 0xa0, 0xc3, 0x0d,
-  0x01, 0xae, 0x07, 0x60, 0x30, 0xcb, 0x20, 0x2b, 0xb3, 0x12, 0xd8, 0xb0,
-  0xe3, 0x01, 0x7c, 0x66, 0x09, 0x70, 0xc5, 0x74, 0x3c, 0x20, 0xe2, 0x33,
-  0x4b, 0x80, 0x2b, 0xc3, 0x11, 0x39, 0x18, 0xec, 0x78, 0x20, 0x7c, 0xb3,
-  0x0c, 0xb5, 0x82, 0x2b, 0x81, 0xe9, 0x60, 0xc0, 0xe3, 0x41, 0x7c, 0x2c,
-  0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b,
-  0x82, 0xf8, 0x14, 0x51, 0xee, 0x81, 0x0e, 0x37, 0x04, 0xe3, 0x1e, 0x80,
-  0xc1, 0x2c, 0x83, 0xad, 0xdc, 0x4a, 0x60, 0x64, 0x1e, 0x0c, 0xf1, 0x99,
-  0x25, 0xc0, 0x15, 0x23, 0xce, 0x3c, 0x80, 0xcf, 0x2c, 0x01, 0xae, 0x0c,
-  0xb4, 0x3c, 0x9a, 0xac, 0x60, 0xb3, 0x42, 0xd8, 0x8a, 0x70, 0x2b, 0xa8,
-  0x28, 0xd0, 0xca, 0x05, 0xc3, 0x98, 0x99, 0x07, 0x6a, 0x1e, 0xc4, 0x67,
-  0x38, 0x02, 0x16, 0xd6, 0x3c, 0x20, 0xbe, 0x59, 0x86, 0x5c, 0xe1, 0x95,
-  0xc0, 0xd8, 0x3c, 0x88, 0x85, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61,
-  0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x62, 0xde,
-  0x03, 0x1d, 0x6e, 0x08, 0xe2, 0x3d, 0x00, 0x83, 0x59, 0x06, 0x5d, 0xd9,
-  0x95, 0xc0, 0x06, 0x3a, 0x0f, 0xe0, 0x33, 0x4b, 0x00, 0x2e, 0x16, 0xe7,
-  0x01, 0x11, 0x9f, 0x59, 0x02, 0x70, 0x19, 0x8e, 0xd8, 0x05, 0x39, 0x0f,
-  0x84, 0x6f, 0x96, 0xa1, 0x57, 0xc0, 0x25, 0x30, 0x5e, 0x98, 0xf3, 0x20,
-  0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92,
-  0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xf0, 0xf7, 0x40, 0x87, 0x1b, 0x02, 0x7e,
-  0x0f, 0xc0, 0x60, 0x96, 0xc1, 0x57, 0x7e, 0x25, 0xb0, 0x3d, 0x0f, 0x86,
-  0xf8, 0xcc, 0x12, 0x80, 0x8b, 0x11, 0xa0, 0x1e, 0xc0, 0x67, 0x96, 0x00,
-  0x5c, 0x06, 0x5a, 0x1e, 0x4d, 0x57, 0xb0, 0x5d, 0x21, 0x7c, 0x45, 0xf8,
-  0x15, 0xd6, 0xe0, 0x95, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e, 0x3a,
-  0x36, 0x0f, 0x86, 0xb9, 0xbe, 0x0e, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86,
-  0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0xcd, 0xe5, 0x83, 0x7d, 0x0f,
-  0x70, 0x3d, 0x48, 0xf9, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60,
-  0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10,
-  0x00, 0x04, 0xc1, 0x00, 0xab, 0xf9, 0x40, 0xe4, 0x83, 0x84, 0x08, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0xb3, 0xf9, 0x60, 0xe4, 0x83, 0x84,
-  0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0xbb, 0xf9, 0x80, 0xe4,
-  0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0xf1, 0xf9,
-  0x60, 0xe4, 0x03, 0x71, 0x0f, 0x02, 0x98, 0x0f, 0xe6, 0x3d, 0x90, 0xf9,
-  0x60, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x70, 0x96, 0x20, 0x5c, 0x06, 0x5a,
-  0x1e, 0xd3, 0x60, 0x15, 0xf3, 0x1e, 0x56, 0x85, 0x25, 0x5c, 0x45, 0x00,
-  0x17, 0xf3, 0x1e, 0x5e, 0x65, 0x96, 0x41, 0x5c, 0xc8, 0xc5, 0x15, 0x83,
-  0xe1, 0x88, 0x59, 0x0c, 0xe8, 0x3d, 0x18, 0xbe, 0xa3, 0xc5, 0x60, 0x98,
-  0xe1, 0x86, 0xe0, 0xd7, 0x03, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x7f,
-  0xc0, 0xf7, 0x60, 0xf8, 0x2a, 0x10, 0xf4, 0x42, 0x62, 0x98, 0xe1, 0x86,
-  0x40, 0xdc, 0x03, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x86, 0x71, 0xc1, 0x97,
-  0xe0, 0x4c, 0x3d, 0x18, 0xe6, 0x6e, 0x3b, 0x18, 0x66, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0x36, 0xb3, 0x0f, 0x66, 0x3e, 0x80, 0xf7, 0x20, 0xec,
-  0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18,
-  0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03,
-  0xac, 0xed, 0x03, 0x9d, 0x0f, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x03, 0xcc, 0xed, 0x83, 0x9d, 0x0f, 0x18, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x03, 0xec, 0xed, 0x03, 0x9e, 0x0f, 0x24, 0x22, 0x18,
-  0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0xc5, 0xee, 0x83, 0x9d, 0x0f, 0xf4,
-  0x3d, 0x08, 0xd0, 0x3e, 0x58, 0xf9, 0x40, 0xed, 0x83, 0xd1, 0x84, 0x00,
-  0xb8, 0xe0, 0xc1, 0x59, 0x02, 0x7c, 0x19, 0x6e, 0x30, 0xc7, 0xc0, 0xed,
-  0x03, 0x30, 0x98, 0x65, 0x28, 0x17, 0x73, 0x09, 0xaa, 0xde, 0x03, 0x9f,
-  0x0f, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0xc2,
-  0xfb, 0xe0, 0xe7, 0x83, 0x76, 0x0c, 0x5c, 0x3e, 0x18, 0x31, 0x38, 0x00,
-  0x10, 0x04, 0x03, 0x2a, 0xef, 0x83, 0x9f, 0x0f, 0x02, 0xe1, 0x82, 0x61,
-  0x0a, 0xdf, 0x83, 0xb1, 0x0f, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00,
-  0x04, 0xc1, 0x80, 0xea, 0xfb, 0x80, 0xec, 0x03, 0x9d, 0x98, 0xf9, 0x60,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0xbf, 0x0f, 0xc8, 0x3e, 0x08,
-  0x84, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0x3b, 0x9e, 0xba, 0x70, 0x0f, 0x86,
-  0x39, 0xf9, 0x0e, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x83, 0x6d, 0xf4, 0x03, 0xb8, 0x0f, 0x5a, 0x3e, 0xf0,
-  0xfb, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10,
-  0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
-  0x00, 0x53, 0xfd, 0xe0, 0xee, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00,
-  0x04, 0xc1, 0x00, 0x5b, 0xfd, 0x00, 0xef, 0x83, 0x84, 0x08, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0x00, 0x63, 0xfd, 0x20, 0xef, 0x83, 0x84, 0x08,
-  0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x99, 0xfd, 0x00, 0xef, 0x83,
-  0x9b, 0x0f, 0x82, 0xd2, 0x0f, 0xd0, 0x3e, 0x38, 0xfd, 0x60, 0x34, 0x21,
-  0x00, 0x2e, 0x78, 0x70, 0x96, 0x00, 0x5f, 0x86, 0x1b, 0x46, 0x32, 0x50,
-  0xfd, 0x00, 0x0c, 0x66, 0x19, 0xce, 0x05, 0x5f, 0x02, 0x4b, 0xf9, 0x60,
-  0xe5, 0x83, 0xf8, 0x0c, 0x47, 0xa4, 0x64, 0xc0, 0xf2, 0x01, 0xf1, 0xcd,
-  0x32, 0xa0, 0xcb, 0xba, 0x04, 0xd6, 0xf2, 0x81, 0x4a, 0x06, 0xf1, 0xb1,
-  0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac,
-  0x08, 0xe2, 0x53, 0x04, 0xed, 0x07, 0x3a, 0xdc, 0x10, 0xc8, 0x7e, 0x00,
-  0x06, 0xb3, 0x0c, 0xe9, 0xa2, 0x2e, 0x81, 0x0d, 0x35, 0x1f, 0xc0, 0x67,
-  0x96, 0xe0, 0x5d, 0x8c, 0xe6, 0x03, 0x22, 0x3e, 0xb3, 0x04, 0xef, 0x32,
-  0x1c, 0x41, 0x93, 0x41, 0xcd, 0x07, 0xc2, 0x37, 0xcb, 0xc0, 0x2e, 0xef,
-  0x12, 0x58, 0x4d, 0x06, 0x36, 0x1f, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17,
-  0x0c, 0x73, 0xc1, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11,
-  0xbf, 0x1f, 0xe8, 0x70, 0x43, 0xd0, 0xfb, 0x01, 0x18, 0xcc, 0x32, 0xb4,
-  0x8b, 0xbb, 0x04, 0xe6, 0xf3, 0xc1, 0x10, 0x9f, 0x59, 0x82, 0x77, 0x31,
-  0x22, 0xec, 0x03, 0xf8, 0xcc, 0x12, 0xbc, 0xcb, 0x40, 0xcb, 0xa3, 0xa5,
-  0x0b, 0xa6, 0x2e, 0x44, 0xbb, 0x08, 0xee, 0xe2, 0xa7, 0xc2, 0xba, 0x5c,
-  0x30, 0x8c, 0x81, 0x7d, 0x40, 0xf6, 0x41, 0x7c, 0x86, 0x23, 0x54, 0xa3,
-  0xec, 0x03, 0xe2, 0x9b, 0x65, 0x80, 0x97, 0x79, 0x09, 0xcc, 0xec, 0x83,
-  0xd5, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c,
-  0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xda, 0x3f, 0xd0, 0xe1, 0x86,
-  0x60, 0xfd, 0x03, 0x30, 0x98, 0x65, 0x88, 0x17, 0x79, 0x09, 0x6c, 0x70,
-  0xfb, 0x00, 0x3e, 0xb3, 0x04, 0xf7, 0x62, 0x6b, 0x1f, 0x10, 0xf1, 0x99,
-  0x25, 0xb8, 0x97, 0xe1, 0x88, 0xda, 0x60, 0xfb, 0x40, 0xf8, 0x66, 0x19,
-  0xe8, 0xe5, 0x5e, 0x02, 0xb3, 0x8d, 0xb6, 0x0f, 0xe2, 0x63, 0x81, 0x43,
-  0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4,
-  0xa7, 0x08, 0xfc, 0x0f, 0x74, 0xb8, 0x21, 0xb0, 0xff, 0x00, 0x0c, 0x66,
-  0x19, 0xea, 0xc5, 0x5e, 0x02, 0xab, 0xfb, 0x60, 0x88, 0xcf, 0x2c, 0xc1,
-  0xbd, 0x18, 0xa1, 0xf7, 0x01, 0x7c, 0x66, 0x09, 0xee, 0x65, 0xa0, 0xe5,
-  0xd1, 0xe2, 0x05, 0x93, 0x17, 0xa2, 0x5e, 0x04, 0x7b, 0x01, 0x9d, 0x79,
-  0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xba, 0xed, 0xa9, 0x33, 0xfb, 0x60, 0x98,
-  0xbb, 0xf3, 0x60, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03,
-  0x00, 0x41, 0x30, 0xd8, 0x50, 0x50, 0xa8, 0xff, 0x40, 0xf6, 0x83, 0x11,
-  0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61,
-  0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
-  0xb0, 0x17, 0x14, 0xf8, 0x3f, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40,
-  0x10, 0x0c, 0x30, 0x18, 0x14, 0xfa, 0x3f, 0x48, 0x88, 0x60, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0xb0, 0x18, 0x14, 0xfc, 0x3f, 0x48, 0x88, 0x60,
-  0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x16, 0x1c, 0x14, 0xfa, 0x3f, 0xe0,
-  0xfd, 0x20, 0x50, 0x41, 0xa1, 0xfd, 0x03, 0x16, 0x14, 0x46, 0x13, 0x02,
-  0xe0, 0x82, 0x07, 0x67, 0x09, 0xf0, 0x65, 0xa0, 0xe5, 0x31, 0x8d, 0x71,
-  0xd1, 0xfd, 0x41, 0x5c, 0x58, 0xa2, 0x5c, 0x84, 0x7b, 0xd1, 0xfd, 0xc1,
-  0x5c, 0x66, 0x19, 0xf2, 0x65, 0x5f, 0x50, 0x33, 0x18, 0x8e, 0x98, 0x3d,
-  0xf7, 0x0f, 0x86, 0xef, 0x68, 0x6f, 0x98, 0xe1, 0x86, 0x20, 0xf7, 0x03,
-  0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0xfc, 0x90, 0xff, 0x60, 0xf8, 0x2a,
-  0x10, 0xf4, 0xf6, 0x63, 0x98, 0xe1, 0x86, 0x80, 0xf7, 0x03, 0x32, 0xa8,
-  0x60, 0xd0, 0x59, 0x06, 0x7d, 0x79, 0x99, 0xe0, 0x40, 0x3f, 0x18, 0xe6,
-  0x62, 0x3d, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x36, 0x30,
-  0x14, 0x5a, 0x50, 0x50, 0xff, 0x60, 0x07, 0x85, 0xd1, 0x84, 0x00, 0x18,
-  0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e,
-  0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xec, 0x0c, 0x05, 0x1a, 0x14,
-  0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x0c, 0x0d, 0x85,
-  0x1a, 0x14, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x2c,
-  0x0d, 0x05, 0x1b, 0x14, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04,
-  0x83, 0x05, 0x0e, 0x85, 0x1a, 0x14, 0xe8, 0x3f, 0x08, 0xc4, 0x50, 0x28,
-  0x41, 0x81, 0x0c, 0x85, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xc1, 0x59, 0x82,
-  0x97, 0x19, 0x6e, 0x00, 0xcf, 0x00, 0x0d, 0x05, 0x30, 0x98, 0x65, 0xe0,
-  0x97, 0x7e, 0x09, 0xea, 0xfd, 0x03, 0x1c, 0x14, 0xe0, 0x82, 0xa7, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x92, 0x43, 0x21, 0x07, 0x85, 0xf6,
-  0x43, 0x41, 0x61, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0x39, 0x14,
-  0x72, 0x50, 0x08, 0x84, 0x0b, 0x86, 0x29, 0xf9, 0x0f, 0x7a, 0x50, 0x80,
-  0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xea, 0x0e, 0x05,
-  0x1f, 0x14, 0x68, 0xa4, 0x05, 0x85, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30,
-  0xa0, 0xf0, 0x50, 0xf0, 0x41, 0x21, 0x10, 0x2e, 0x18, 0xe6, 0x82, 0xa7,
-  0xee, 0x78, 0xea, 0x76, 0x3f, 0x18, 0xe6, 0xd8, 0x3d, 0x18, 0xe6, 0x88,
-  0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xb6, 0x3e,
-  0x14, 0xd4, 0x50, 0x38, 0x41, 0x01, 0x0f, 0x85, 0xd1, 0x84, 0x00, 0x18,
-  0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12,
-  0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x8c, 0x14, 0x85, 0x38, 0x14,
-  0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xac, 0x14, 0x05,
-  0x39, 0x14, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xcc,
-  0x14, 0x85, 0x39, 0x14, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04,
-  0x83, 0xa5, 0x15, 0x05, 0x39, 0x14, 0x62, 0x50, 0x08, 0xfe, 0x50, 0x10,
-  0x43, 0x21, 0x14, 0x85, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xc1, 0x59, 0x82,
-  0x97, 0x19, 0x6e, 0xe8, 0xcf, 0x80, 0x14, 0x05, 0x30, 0x98, 0x65, 0xf0,
-  0x97, 0x97, 0x09, 0x6c, 0x04, 0x85, 0x12, 0x14, 0xe2, 0x33, 0x1c, 0x91,
-  0x82, 0x81, 0x09, 0x0a, 0xc4, 0x37, 0xcb, 0xf0, 0x2f, 0x22, 0x13, 0xd8,
-  0x09, 0x0a, 0x2a, 0x18, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73,
-  0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xae, 0x28,
-  0xe8, 0x70, 0x43, 0xc0, 0x8a, 0x02, 0x18, 0xcc, 0x32, 0x80, 0x4c, 0xc8,
-  0x04, 0x36, 0xbc, 0xa0, 0x00, 0x9f, 0x59, 0x02, 0x93, 0x31, 0x17, 0x14,
-  0x88, 0xf8, 0xcc, 0x12, 0x98, 0xcc, 0x70, 0x04, 0x0d, 0x06, 0x2f, 0x28,
-  0x08, 0xdf, 0x2c, 0xc3, 0xc8, 0x98, 0x4c, 0x60, 0x35, 0x18, 0xc0, 0xa0,
-  0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10,
-  0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xe4, 0xa2, 0xa0, 0xc3, 0x0d, 0xc1,
-  0x2d, 0x0a, 0x60, 0x30, 0xcb, 0x40, 0x32, 0x25, 0x13, 0x18, 0x0e, 0x0a,
-  0x43, 0x7c, 0x66, 0x09, 0x4c, 0xc6, 0x88, 0x1d, 0x14, 0xe0, 0x33, 0x4b,
-  0x60, 0x32, 0x03, 0x2d, 0x8f, 0x06, 0x32, 0x58, 0xc8, 0x10, 0x24, 0x23,
-  0x94, 0x8c, 0x1f, 0x0a, 0x22, 0x73, 0xc1, 0x30, 0xa6, 0x83, 0x82, 0x0f,
-  0x0a, 0xf1, 0x19, 0x8e, 0x20, 0x95, 0x1f, 0x14, 0x88, 0x6f, 0x96, 0xe1,
-  0x64, 0x54, 0x26, 0x30, 0x30, 0x14, 0x4a, 0x25, 0x3e, 0x16, 0x0c, 0xf4,
-  0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c,
-  0x8a, 0x38, 0x47, 0x41, 0x87, 0x1b, 0x82, 0x72, 0x14, 0xc0, 0x60, 0x96,
-  0x01, 0x65, 0x52, 0x26, 0xb0, 0x01, 0x0d, 0x05, 0xf8, 0xcc, 0x12, 0xb8,
-  0x8c, 0x95, 0xa1, 0x40, 0xc4, 0x67, 0x96, 0xc0, 0x65, 0x86, 0x23, 0x5e,
-  0xc5, 0x0c, 0x05, 0xe1, 0x9b, 0x65, 0x58, 0x19, 0x97, 0x09, 0x0c, 0x56,
-  0xce, 0x50, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7,
-  0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xe4, 0x51, 0xd0, 0xe1,
-  0x86, 0x00, 0x1e, 0x05, 0x30, 0x98, 0x65, 0x60, 0x99, 0x96, 0x09, 0xec,
-  0x0d, 0x85, 0x21, 0x3e, 0xb3, 0x04, 0x2e, 0x63, 0x04, 0x1d, 0x0a, 0xf0,
-  0x99, 0x25, 0x70, 0x99, 0x81, 0x96, 0x47, 0x43, 0x19, 0x2c, 0x65, 0x08,
-  0x96, 0x11, 0x5a, 0x86, 0xae, 0x54, 0xe6, 0x82, 0x61, 0x2e, 0x78, 0xea,
-  0xb6, 0xa7, 0x0e, 0x0c, 0x85, 0x61, 0x2e, 0xee, 0x83, 0x61, 0x8e, 0x18,
-  0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x13, 0x49,
-  0xe1, 0x1d, 0x05, 0x56, 0x14, 0xfa, 0x51, 0x18, 0x4d, 0x08, 0x80, 0xd1,
-  0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91,
-  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xc0, 0x52, 0x52, 0xb0, 0x47, 0x21,
-  0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xc0, 0x54, 0x52, 0xb8,
-  0x47, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xc0, 0x56,
-  0x52, 0xc0, 0x47, 0x21, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30,
-  0x58, 0x64, 0x52, 0xb8, 0x47, 0xc1, 0x16, 0x85, 0x80, 0x24, 0x85, 0x73,
-  0x14, 0x4c, 0x52, 0x18, 0x4d, 0x08, 0x80, 0x0b, 0x1e, 0x9c, 0x25, 0x78,
-  0x99, 0x81, 0x96, 0xc7, 0x34, 0xf4, 0xc5, 0x35, 0x89, 0x7c, 0x61, 0x09,
-  0x7e, 0x11, 0x5c, 0xc6, 0x35, 0x89, 0x7e, 0x99, 0x65, 0x80, 0x19, 0x99,
-  0x11, 0xd5, 0x60, 0x38, 0x42, 0xf6, 0xd0, 0x51, 0x18, 0xbe, 0x9b, 0xbd,
-  0x61, 0x86, 0x1b, 0x82, 0x59, 0x14, 0xc8, 0xa0, 0x86, 0x40, 0x87, 0x23,
-  0xe6, 0x85, 0x1d, 0x85, 0xe1, 0xab, 0x40, 0xd0, 0xab, 0x97, 0x61, 0x86,
-  0x1b, 0x02, 0x5b, 0x14, 0xc8, 0xa0, 0x82, 0x41, 0x67, 0x19, 0x62, 0xc6,
-  0x6c, 0x82, 0xd3, 0x43, 0x61, 0x98, 0x5b, 0xfd, 0x60, 0x98, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0xd8, 0x74, 0x52, 0x38, 0x49, 0x81, 0x1c, 0x85,
-  0x9a, 0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06,
-  0x61, 0x34, 0x81, 0x18, 0x8a, 0x38, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10,
-  0x0c, 0xb0, 0xb0, 0x14, 0x5c, 0x52, 0x38, 0x88, 0x60, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0x30, 0xb1, 0x14, 0x5e, 0x52, 0x60, 0x88, 0x60, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0xb0, 0xb1, 0x14, 0x60, 0x52, 0x90, 0x88,
-  0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x16, 0xb5, 0x14, 0x5e, 0x52,
-  0x70, 0x47, 0x21, 0xe0, 0x49, 0xe1, 0x1f, 0x05, 0x9f, 0x14, 0x46, 0x13,
-  0x02, 0xe0, 0x82, 0x07, 0x67, 0x09, 0xcc, 0x66, 0xb8, 0x41, 0x57, 0x03,
-  0xb1, 0x14, 0xc0, 0x60, 0x96, 0x61, 0x66, 0x68, 0x26, 0xa8, 0x74, 0x14,
-  0x64, 0x52, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03,
-  0x8a, 0x2d, 0x85, 0x99, 0x14, 0xd8, 0x4f, 0x24, 0x85, 0x11, 0x83, 0x03,
-  0x00, 0x41, 0x30, 0xa0, 0xda, 0x52, 0x98, 0x49, 0x21, 0x10, 0x2e, 0x18,
-  0xa6, 0xd8, 0x51, 0xb8, 0x49, 0x01, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0xa8, 0xb8, 0x14, 0x70, 0x52, 0x70, 0x99, 0x93, 0x14,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x92, 0x4b, 0x01, 0x27, 0x85,
-  0x40, 0xb8, 0x60, 0x98, 0x0b, 0x9e, 0xba, 0xe3, 0xa9, 0xab, 0x45, 0x61,
-  0x98, 0x33, 0xff, 0x60, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0xd8, 0xee, 0x52, 0x20, 0x4b, 0x21, 0x24, 0x05,
-  0xb9, 0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06,
-  0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10,
-  0x0c, 0x30, 0xbf, 0x14, 0xd6, 0x52, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0xb0, 0xbf, 0x14, 0xd8, 0x52, 0x48, 0x88, 0x60, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0x30, 0xd0, 0x14, 0xda, 0x52, 0x48, 0x88,
-  0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x96, 0xd3, 0x14, 0xd8, 0x52,
-  0x58, 0x49, 0x21, 0xc8, 0x4b, 0x81, 0x27, 0x85, 0xbd, 0x14, 0x46, 0x13,
-  0x02, 0xe0, 0x82, 0x07, 0x67, 0x09, 0xcc, 0x66, 0xb8, 0xe1, 0x5e, 0x03,
-  0xbf, 0x14, 0xc0, 0x60, 0x96, 0xa1, 0x66, 0xcc, 0x26, 0xb0, 0x7e, 0x14,
-  0xfe, 0x51, 0x88, 0xcf, 0x70, 0x04, 0x0a, 0x06, 0x20, 0x29, 0x10, 0xdf,
-  0x2c, 0x83, 0xcd, 0xe4, 0x4c, 0x60, 0x21, 0x29, 0xa4, 0x60, 0x10, 0x1f,
-  0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7,
-  0x8a, 0x20, 0x3e, 0x45, 0xa0, 0xa6, 0xa0, 0xc3, 0x0d, 0x81, 0x69, 0x0a,
-  0x60, 0x30, 0xcb, 0x70, 0x33, 0x38, 0x13, 0xd8, 0x90, 0x92, 0x02, 0x7c,
-  0x66, 0x09, 0x7a, 0xc6, 0x50, 0x52, 0x20, 0xe2, 0x33, 0x4b, 0xd0, 0x33,
-  0xc3, 0x11, 0x33, 0x18, 0xa4, 0xa4, 0x20, 0x7c, 0xb3, 0x0c, 0x3a, 0xd3,
-  0x33, 0x81, 0xd1, 0x60, 0xa0, 0x92, 0x42, 0x7c, 0x2c, 0x70, 0xe8, 0x73,
-  0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14,
-  0x31, 0x9b, 0x82, 0x0e, 0x37, 0x04, 0xb1, 0x29, 0x80, 0xc1, 0x2c, 0xc3,
-  0xce, 0xf0, 0x4c, 0x60, 0x32, 0x29, 0x0c, 0xf1, 0x99, 0x25, 0xe8, 0x19,
-  0x23, 0x6a, 0x52, 0x80, 0xcf, 0x2c, 0x41, 0xcf, 0x0c, 0xb4, 0x3c, 0xda,
-  0xcd, 0x60, 0x38, 0x43, 0xec, 0x8c, 0xc0, 0x33, 0x7c, 0x28, 0xe4, 0xcc,
-  0x05, 0xc3, 0x18, 0x4d, 0x0a, 0x38, 0x29, 0xc4, 0x67, 0x38, 0xc2, 0x6f,
-  0x72, 0x52, 0x20, 0xbe, 0x59, 0x06, 0x9f, 0x09, 0x9b, 0xc0, 0x74, 0x52,
-  0xf8, 0x9b, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca,
-  0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x22, 0x3c, 0x05, 0x1d, 0x6e,
-  0x08, 0x7e, 0x53, 0x00, 0x83, 0x59, 0x86, 0x9f, 0x01, 0x9b, 0xc0, 0x06,
-  0xb1, 0x14, 0xe0, 0x33, 0x4b, 0x50, 0x36, 0xf6, 0x93, 0x02, 0x11, 0x9f,
-  0x59, 0x82, 0xb2, 0x19, 0x8e, 0x48, 0x1d, 0xb0, 0x14, 0x84, 0x6f, 0x96,
-  0x41, 0x6c, 0xca, 0x26, 0x30, 0xd5, 0x09, 0x4b, 0x21, 0x3e, 0x16, 0x38,
-  0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41,
-  0x7c, 0x8a, 0x60, 0x4f, 0x41, 0x87, 0x1b, 0x02, 0xf5, 0x14, 0xc0, 0x60,
-  0x96, 0x61, 0x6c, 0xc8, 0x26, 0xb0, 0xb4, 0x14, 0x86, 0xf8, 0xcc, 0x12,
-  0x94, 0x8d, 0x11, 0x6e, 0x29, 0xc0, 0x67, 0x96, 0xa0, 0x6c, 0x06, 0x5a,
-  0x1e, 0xed, 0x67, 0x30, 0xb0, 0x21, 0xc6, 0x46, 0x20, 0x1b, 0xb4, 0x0b,
-  0x9b, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e, 0x3a, 0x9d, 0x14, 0x86,
-  0xb9, 0x35, 0x14, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x83, 0x8d, 0x3f, 0x85, 0xf4, 0x14, 0x4c, 0x53, 0xb8,
-  0x4f, 0x61, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10,
-  0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
-  0x00, 0x1b, 0x51, 0x01, 0x3e, 0x85, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00,
-  0x04, 0xc1, 0x00, 0x23, 0x51, 0x21, 0x3e, 0x85, 0x84, 0x08, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0x00, 0x2b, 0x51, 0x41, 0x3e, 0x85, 0x84, 0x08,
-  0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x61, 0x51, 0x21, 0x3e, 0x05,
-  0xd8, 0x14, 0x02, 0xff, 0x14, 0xc2, 0x53, 0x00, 0x51, 0x61, 0x34, 0x21,
-  0x00, 0x2e, 0x78, 0x70, 0x96, 0xc0, 0x6c, 0x06, 0x5a, 0x1e, 0xd3, 0x88,
-  0x19, 0xd1, 0x25, 0x60, 0x86, 0x25, 0x66, 0x46, 0x28, 0x1b, 0xd1, 0x25,
-  0x68, 0xc6, 0xfe, 0x36, 0xb8, 0x4d, 0x01, 0x3e, 0xb3, 0x0c, 0x67, 0x93,
-  0x36, 0x7d, 0x1b, 0x0c, 0x47, 0x84, 0x6e, 0x30, 0x9e, 0xc2, 0xf0, 0x9d,
-  0xe8, 0x06, 0xc3, 0x0c, 0x37, 0x04, 0xae, 0x29, 0x90, 0x41, 0x0d, 0x81,
-  0x0e, 0x47, 0x14, 0xe7, 0x29, 0x0c, 0x5f, 0x05, 0x82, 0xde, 0x31, 0xcc,
-  0x70, 0x43, 0x10, 0x9b, 0x02, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0x03, 0xda,
-  0xf4, 0x4d, 0x70, 0x75, 0x29, 0x0c, 0x73, 0xa6, 0x28, 0x0c, 0x33, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x5b, 0x8d, 0x0a, 0x22, 0x2a, 0xfc, 0xa6,
-  0x00, 0xa3, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2,
-  0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0x01, 0xc6, 0xa3, 0x42, 0x8a, 0x0a, 0x07, 0x11, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0x01, 0xd6, 0xa3, 0x82, 0x8a, 0x0a, 0x0c, 0x11, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xe6, 0xa3, 0xc2, 0x8a, 0x0a, 0x12,
-  0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x52, 0xa6, 0x82, 0x8a,
-  0x0a, 0xe9, 0x29, 0x04, 0x37, 0x2a, 0xe8, 0xa7, 0x90, 0xa3, 0xc2, 0x68,
-  0x42, 0x00, 0x5c, 0xf0, 0xe0, 0x2c, 0x41, 0xdf, 0x0c, 0x37, 0xd4, 0x6e,
-  0xd0, 0xa3, 0x02, 0x18, 0xcc, 0x32, 0xa8, 0xcd, 0xda, 0x04, 0x45, 0x9e,
-  0x42, 0x8b, 0x0a, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0x40, 0x9d, 0xa9, 0xe0, 0xa2, 0xc2, 0xee, 0x06, 0xfd, 0x29, 0x8c, 0x18,
-  0x1c, 0x00, 0x08, 0x82, 0x01, 0x85, 0xa6, 0x82, 0x8b, 0x0a, 0x81, 0x70,
-  0xc1, 0x30, 0x75, 0x9e, 0x82, 0x8c, 0x0a, 0x70, 0xc1, 0x53, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x40, 0xb1, 0xa9, 0x30, 0xa3, 0x02, 0x18, 0x88,
-  0xa8, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x54, 0x9b, 0x0a, 0x33,
-  0x2a, 0x04, 0xc2, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x1d, 0x4f, 0x1d, 0x6c,
-  0x0a, 0xc3, 0x5c, 0x38, 0x0a, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x26, 0xa7, 0xc2, 0x8f, 0x0a, 0xfc,
-  0x29, 0xb4, 0xa9, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a,
-  0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0x80, 0xe5, 0xa9, 0x60, 0xa6, 0x42, 0x42, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x80, 0xe9, 0xa9, 0x70, 0xa6, 0x42, 0x42, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xed, 0xa9, 0x80, 0xa6, 0x42,
-  0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0x88, 0xaa, 0x70,
-  0xa6, 0x82, 0x89, 0x0a, 0x01, 0x9d, 0x0a, 0x37, 0x2a, 0xd8, 0xa9, 0x30,
-  0x9a, 0x10, 0x00, 0x17, 0x3c, 0x38, 0x4b, 0xd0, 0x37, 0xc3, 0x0d, 0xf2,
-  0x1b, 0xe4, 0xa9, 0x00, 0x06, 0xb3, 0x0c, 0x6c, 0xd3, 0x37, 0x81, 0xe1,
-  0xa7, 0xa0, 0x9f, 0x42, 0x7c, 0x86, 0x23, 0xee, 0x37, 0xd8, 0x4f, 0x81,
-  0xf8, 0x66, 0x19, 0xda, 0x06, 0x6e, 0x02, 0xe3, 0x4f, 0x01, 0x7f, 0x83,
-  0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43,
-  0x3e, 0x56, 0x04, 0xf1, 0x29, 0x62, 0x54, 0x05, 0x1d, 0x6e, 0x08, 0x42,
-  0x55, 0x00, 0x83, 0x59, 0x06, 0xb7, 0x79, 0x9b, 0xc0, 0x06, 0x12, 0x15,
-  0xe0, 0x33, 0x4b, 0x40, 0x37, 0x36, 0xa2, 0x02, 0x11, 0x9f, 0x59, 0x02,
-  0xba, 0x19, 0x8e, 0x10, 0xe1, 0x80, 0x44, 0x05, 0xe1, 0x9b, 0x65, 0x88,
-  0x1b, 0xba, 0x09, 0x6c, 0x84, 0x83, 0x12, 0x15, 0xe2, 0x63, 0x81, 0x43,
-  0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4,
-  0xa7, 0x08, 0x57, 0x15, 0x74, 0xb8, 0x21, 0x60, 0x55, 0x01, 0x0c, 0x66,
-  0x19, 0xe4, 0x66, 0x6e, 0x02, 0x6b, 0x51, 0x61, 0x88, 0xcf, 0x2c, 0x01,
-  0xdd, 0x18, 0x01, 0xa3, 0x02, 0x7c, 0x66, 0x09, 0xe8, 0x66, 0xa0, 0xe5,
-  0xd1, 0xdc, 0x06, 0x7b, 0x1b, 0x42, 0x6e, 0x84, 0xb9, 0xd1, 0xc1, 0x01,
-  0x6e, 0x2e, 0x18, 0xc6, 0x5e, 0x54, 0x98, 0x51, 0x21, 0x3e, 0xc3, 0x11,
-  0xb0, 0x40, 0xa3, 0x02, 0xf1, 0xcd, 0x32, 0xd4, 0x0d, 0xde, 0x04, 0x56,
-  0xa3, 0x42, 0x2c, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1,
-  0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xbc, 0x2a, 0xe8,
-  0x70, 0x43, 0xa0, 0xab, 0x02, 0x18, 0xcc, 0x32, 0xd8, 0xcd, 0xdd, 0x04,
-  0x36, 0xf4, 0xa8, 0x00, 0x9f, 0x59, 0x02, 0xbe, 0x31, 0x1d, 0x15, 0x88,
-  0xf8, 0xcc, 0x12, 0xf0, 0xcd, 0x70, 0xc4, 0x2e, 0xec, 0xa8, 0x20, 0x7c,
-  0xb3, 0x0c, 0x79, 0xc3, 0x37, 0x81, 0xf1, 0x02, 0x8f, 0x0a, 0xf1, 0xb1,
-  0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac,
-  0x08, 0xe2, 0x53, 0xc4, 0xb9, 0x0a, 0x3a, 0xdc, 0x10, 0x94, 0xab, 0x00,
-  0x06, 0xb3, 0x0c, 0x7a, 0xb3, 0x37, 0x81, 0x91, 0xa9, 0x30, 0xc4, 0x67,
-  0x96, 0x80, 0x6f, 0x8c, 0x48, 0x53, 0x01, 0x3e, 0xb3, 0x04, 0x7c, 0x33,
-  0xd0, 0xf2, 0x68, 0x76, 0x83, 0xdd, 0x0d, 0xa1, 0x37, 0xc2, 0xde, 0xb0,
-  0x06, 0xde, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf6, 0xd4, 0xd5, 0xa8,
-  0x30, 0xcc, 0x99, 0xa5, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88,
-  0xc1, 0x01, 0x80, 0x20, 0x18, 0x6c, 0xf7, 0x2a, 0x90, 0xab, 0x10, 0xaa,
-  0x82, 0xbc, 0x0a, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09,
-  0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x98, 0xbf, 0x0a, 0xeb, 0x2a, 0x24, 0x44, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0xd8, 0xbf, 0x0a, 0xec, 0x2a, 0x24, 0x44, 0x30,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x18, 0xc8, 0x0a, 0xed, 0x2a, 0x24,
-  0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0xcb, 0xc9, 0x0a, 0xec,
-  0x2a, 0xac, 0xaa, 0x10, 0xe4, 0xab, 0xc0, 0xab, 0xc2, 0xbe, 0x0a, 0xa3,
-  0x09, 0x01, 0x70, 0xc1, 0x83, 0xb3, 0x04, 0x7d, 0x33, 0xd0, 0xf2, 0x98,
-  0x06, 0xda, 0xe0, 0x35, 0x71, 0x36, 0x2c, 0xa1, 0x36, 0x02, 0xdf, 0xe0,
-  0x35, 0xb1, 0x36, 0xb3, 0x0c, 0x7e, 0x03, 0x3a, 0x77, 0x1c, 0x0c, 0x47,
-  0xf0, 0x71, 0xd0, 0xab, 0xc2, 0xf0, 0x5d, 0x1f, 0x07, 0xc3, 0x0c, 0x37,
-  0x04, 0xa8, 0x2a, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0xfc, 0x43, 0xb8,
-  0x0a, 0xc3, 0x57, 0x81, 0xa0, 0x17, 0x12, 0xc3, 0x0c, 0x37, 0x04, 0xab,
-  0x2a, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xfc, 0x0d, 0xed, 0x04, 0xf7,
-  0xa6, 0xc2, 0x30, 0x07, 0x9a, 0xc2, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82,
-  0x60, 0xb0, 0xbd, 0xac, 0xc0, 0xaf, 0x42, 0xae, 0x0a, 0x2a, 0x2b, 0x8c,
-  0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02,
-  0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0x36,
-  0x2b, 0x8c, 0xac, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x60, 0x37, 0x2b, 0x90, 0xac, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x60, 0x38, 0x2b, 0x94, 0xac, 0x20, 0x11, 0xc1, 0x88, 0x81,
-  0x02, 0x80, 0x20, 0x18, 0x2c, 0x3f, 0x2b, 0x90, 0xac, 0x30, 0xae, 0x42,
-  0x10, 0xb3, 0x02, 0xbd, 0x0a, 0x33, 0x2b, 0x8c, 0x26, 0x04, 0xc0, 0x05,
-  0x0f, 0xce, 0x12, 0xd0, 0xce, 0x70, 0xc3, 0x2b, 0x07, 0x37, 0x2b, 0x80,
-  0xc1, 0x2c, 0x43, 0xe8, 0x88, 0x4e, 0x50, 0xbe, 0x2a, 0x9c, 0xac, 0x00,
-  0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x54, 0xd8, 0x0a,
-  0x28, 0x2b, 0xd8, 0x72, 0x70, 0xaf, 0xc2, 0x88, 0xc1, 0x01, 0x80, 0x20,
-  0x18, 0x50, 0x62, 0x2b, 0xa0, 0xac, 0x10, 0x08, 0x17, 0x0c, 0x53, 0xe1,
-  0x2a, 0xb0, 0xac, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x94, 0xd9, 0x0a, 0x2d, 0x2b, 0xe8, 0x04, 0xbf, 0x0a, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x40, 0x9d, 0xad, 0xd0, 0xb2, 0x42, 0x20, 0x5c,
-  0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf1, 0xd4, 0xa9, 0xaa, 0x30, 0xcc, 0xed,
-  0xa6, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x6c, 0x6c, 0x2b, 0xe4, 0xac, 0x60, 0xaf, 0xc2, 0xd9, 0x0a,
-  0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a,
-  0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd8,
-  0xdc, 0x0a, 0x60, 0x2b, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x18, 0xdd, 0x0a, 0x61, 0x2b, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x58, 0xdd, 0x0a, 0x62, 0x2b, 0x24, 0x44, 0x30, 0x62,
-  0xa0, 0x00, 0x20, 0x08, 0x06, 0x0b, 0xdf, 0x0a, 0x61, 0x2b, 0x80, 0xac,
-  0x10, 0xb8, 0xad, 0x10, 0xb3, 0x02, 0xdc, 0x0a, 0xa3, 0x09, 0x01, 0x70,
-  0xc1, 0x83, 0xb3, 0x04, 0xb4, 0x33, 0xdc, 0xc0, 0xce, 0xc1, 0xdc, 0x0a,
-  0x60, 0x30, 0xcb, 0x30, 0x3a, 0xb4, 0x13, 0x98, 0xbc, 0x0a, 0xf4, 0x2a,
-  0xc4, 0x67, 0x38, 0x42, 0x9e, 0x83, 0x7a, 0x15, 0x88, 0x6f, 0x96, 0x81,
-  0x74, 0x4e, 0x27, 0x30, 0x7b, 0x15, 0xe6, 0x39, 0x88, 0x8f, 0x05, 0x03,
-  0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10,
-  0x9f, 0x22, 0xfa, 0x56, 0xd0, 0xe1, 0x86, 0x60, 0x6f, 0x05, 0x30, 0x98,
-  0x65, 0x28, 0x1d, 0xd3, 0x09, 0x6c, 0xf0, 0x57, 0x01, 0x3e, 0xb3, 0x04,
-  0xab, 0x63, 0xfd, 0x2a, 0x10, 0xf1, 0x99, 0x25, 0x58, 0x9d, 0xe1, 0x88,
-  0x7e, 0x0e, 0xfc, 0x55, 0x10, 0xbe, 0x59, 0x06, 0xd4, 0x59, 0x9d, 0xc0,
-  0xfc, 0x39, 0xf8, 0x57, 0x21, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98,
-  0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x40, 0x5d,
-  0x41, 0x87, 0x1b, 0x02, 0xd3, 0x15, 0xc0, 0x60, 0x96, 0x21, 0x75, 0x54,
-  0x27, 0xb0, 0x93, 0x15, 0x86, 0xf8, 0xcc, 0x12, 0xac, 0x8e, 0x11, 0x2a,
-  0x2b, 0xc0, 0x67, 0x96, 0x60, 0x75, 0x06, 0x5a, 0x1e, 0xad, 0x74, 0x30,
-  0xd3, 0x21, 0x52, 0x47, 0x50, 0x1d, 0x18, 0x1d, 0x4e, 0xe7, 0x82, 0x61,
-  0x2c, 0x65, 0x85, 0x96, 0x15, 0xe2, 0x33, 0x1c, 0xa1, 0x1a, 0x2e, 0x2b,
-  0x10, 0xdf, 0x2c, 0x03, 0xeb, 0xbc, 0x4e, 0x60, 0x2f, 0x2b, 0xac, 0x46,
-  0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x61, 0xbb, 0x82, 0x0e, 0x37, 0x04, 0xb4,
-  0x2b, 0x80, 0xc1, 0x2c, 0x43, 0xeb, 0xb8, 0x4e, 0x60, 0xc3, 0xcd, 0x0a,
-  0xf0, 0x99, 0x25, 0x98, 0x1d, 0xa3, 0x59, 0x81, 0x88, 0xcf, 0x2c, 0xc1,
-  0xec, 0x0c, 0x47, 0xd4, 0x46, 0xcd, 0x0a, 0xc2, 0x37, 0xcb, 0x00, 0x3b,
-  0xb3, 0x13, 0x98, 0x6d, 0xd8, 0xac, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c,
-  0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45,
-  0x84, 0xaf, 0xa0, 0xc3, 0x0d, 0xc1, 0xef, 0x0a, 0x60, 0x30, 0xcb, 0x10,
-  0x3b, 0xb2, 0x13, 0x98, 0xcf, 0x0a, 0x43, 0x7c, 0x66, 0x09, 0x66, 0xc7,
-  0x88, 0xb1, 0x15, 0xe0, 0x33, 0x4b, 0x30, 0x3b, 0x03, 0x2d, 0x8f, 0xd6,
-  0x3a, 0x98, 0xeb, 0x10, 0xb1, 0x23, 0xc8, 0x0e, 0xe8, 0xbc, 0xce, 0x05,
-  0xc3, 0x5c, 0xf0, 0xd4, 0x6d, 0x4f, 0xdd, 0xcb, 0x0a, 0xc3, 0x1c, 0x98,
-  0x0a, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08,
-  0x82, 0xc1, 0x16, 0xbf, 0x82, 0xef, 0x0a, 0x7b, 0x2b, 0xb0, 0xaf, 0x30,
-  0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09,
-  0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xe1,
-  0xaf, 0x50, 0xbe, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0x80, 0xe5, 0xaf, 0x60, 0xbe, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0x80, 0xe9, 0xaf, 0x70, 0xbe, 0x42, 0x42, 0x04, 0x23, 0x06,
-  0x0a, 0x00, 0x82, 0x60, 0xb0, 0x84, 0xb0, 0x60, 0xbe, 0x42, 0xe9, 0x0a,
-  0xc1, 0xfc, 0x0a, 0xb6, 0x2b, 0xd4, 0xaf, 0x30, 0x9a, 0x10, 0x00, 0x17,
-  0x3c, 0x38, 0x4b, 0x40, 0x3b, 0x03, 0x2d, 0x8f, 0x69, 0xfc, 0x0d, 0xdb,
-  0x13, 0x7e, 0xc3, 0x12, 0xa1, 0x23, 0xcc, 0x0e, 0xdb, 0x13, 0xa2, 0x33,
-  0xcb, 0x50, 0x3b, 0xb7, 0x13, 0xd7, 0xc1, 0x70, 0x04, 0xdf, 0x06, 0xb7,
-  0x2b, 0x0c, 0xdf, 0xf5, 0x6d, 0x30, 0xcc, 0x70, 0x43, 0x20, 0xba, 0x02,
-  0x19, 0xd4, 0x10, 0xe8, 0x70, 0x44, 0x7e, 0xec, 0xae, 0x30, 0x7c, 0x15,
-  0x08, 0x7a, 0xfb, 0x31, 0xcc, 0x70, 0x43, 0x50, 0xba, 0x02, 0x19, 0x54,
-  0x30, 0xe8, 0x2c, 0x83, 0xed, 0xac, 0x4f, 0x70, 0x69, 0x2b, 0x0c, 0x73,
-  0x7a, 0x2a, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x5b, 0x0a,
-  0x0b, 0xf6, 0x2b, 0xcc, 0xae, 0x40, 0xc2, 0xc2, 0x68, 0x42, 0x00, 0x8c,
-  0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x06, 0xc3, 0x42, 0xff, 0x0a,
-  0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x16, 0xc3, 0x82,
-  0xff, 0x0a, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x26,
-  0xc3, 0xc2, 0xff, 0x0a, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82,
-  0xc1, 0x92, 0xc3, 0x82, 0xff, 0x0a, 0xbd, 0x2b, 0x04, 0x2b, 0x2c, 0xb8,
-  0xaf, 0xd0, 0xc2, 0xc2, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xe0, 0x2c, 0xc1,
-  0xfa, 0x0c, 0x37, 0xa4, 0x76, 0x10, 0xc3, 0x02, 0x18, 0xcc, 0x32, 0xe0,
-  0x4e, 0xee, 0x04, 0x85, 0xbb, 0x42, 0x08, 0x0b, 0x70, 0xc1, 0x53, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xed, 0xb0, 0x20, 0xc2, 0x82, 0xed,
-  0x06, 0xf1, 0x2b, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0xc5, 0xc3,
-  0x82, 0x08, 0x0b, 0x81, 0x70, 0xc1, 0x30, 0xb5, 0xbb, 0x82, 0x09, 0x0b,
-  0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0x81, 0xb1,
-  0x70, 0xc2, 0x02, 0x8d, 0xd8, 0xaf, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x54, 0x18, 0x0b, 0x27, 0x2c, 0x04, 0xc2, 0x05, 0xc3, 0x5c, 0xf0,
-  0xd4, 0x1d, 0x4f, 0x1d, 0xe9, 0x0a, 0xc3, 0x5c, 0xad, 0x0a, 0xc3, 0x1c,
-  0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x66,
-  0xc6, 0xc2, 0x0c, 0x0b, 0xf0, 0x2b, 0x84, 0xb1, 0x30, 0x9a, 0x10, 0x00,
-  0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44,
-  0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xb5, 0xb1, 0xa0, 0xc3,
-  0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xb9, 0xb1,
-  0xb0, 0xc3, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80,
-  0xbd, 0xb1, 0xc0, 0xc3, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82,
-  0x60, 0xb0, 0xd8, 0xb1, 0xb0, 0xc3, 0x82, 0xfe, 0x0a, 0x01, 0x1a, 0x0b,
-  0x2b, 0x2c, 0xa8, 0xb1, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x38, 0x4b,
-  0xb0, 0x3e, 0xc3, 0x0d, 0xe6, 0x1d, 0xb4, 0xb1, 0x00, 0x06, 0xb3, 0x0c,
-  0xba, 0xb3, 0x3e, 0x81, 0xb1, 0xaf, 0xe0, 0xbe, 0x42, 0x7c, 0x86, 0x23,
-  0xe4, 0x37, 0x78, 0x5f, 0x81, 0xf8, 0x66, 0x19, 0x76, 0xc7, 0x77, 0x02,
-  0x83, 0x5f, 0x61, 0x7e, 0x83, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61,
-  0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xe2, 0x8e,
-  0x05, 0x1d, 0x6e, 0x08, 0xea, 0x58, 0x00, 0x83, 0x59, 0x06, 0xde, 0xe9,
-  0x9d, 0xc0, 0x06, 0xfc, 0x15, 0xe0, 0x33, 0x4b, 0x20, 0x3e, 0x76, 0xbf,
-  0x02, 0x11, 0x9f, 0x59, 0x02, 0xf1, 0x19, 0x8e, 0xe8, 0xdf, 0x00, 0x7f,
-  0x05, 0xe1, 0x9b, 0x65, 0xf8, 0x1d, 0xf1, 0x09, 0xcc, 0x7f, 0x83, 0xfc,
-  0x15, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b,
-  0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x51, 0x16, 0x74, 0xb8, 0x21,
-  0x00, 0x65, 0x01, 0x0c, 0x66, 0x19, 0xc0, 0x27, 0x7c, 0x02, 0x0b, 0x61,
-  0x61, 0x88, 0xcf, 0x2c, 0x81, 0xf8, 0x18, 0x41, 0xc2, 0x02, 0x7c, 0x66,
-  0x09, 0xc4, 0x67, 0xa0, 0xe5, 0xd1, 0x78, 0x07, 0xeb, 0x1d, 0x02, 0x7c,
-  0x84, 0xf0, 0x81, 0xc1, 0xc1, 0x77, 0x2e, 0x18, 0xc6, 0x46, 0x58, 0x38,
-  0x61, 0x21, 0x3e, 0xc3, 0x11, 0xa4, 0x82, 0xc2, 0x02, 0xf1, 0xcd, 0x32,
-  0x8c, 0x8f, 0xf9, 0x04, 0x96, 0xc2, 0x42, 0xa9, 0xc4, 0xc7, 0x82, 0x81,
-  0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88,
-  0x4f, 0x11, 0xb0, 0x2c, 0xe8, 0x70, 0x43, 0xe0, 0xca, 0x02, 0x18, 0xcc,
-  0x32, 0x90, 0x4f, 0xf9, 0x04, 0x36, 0xc4, 0xb0, 0x00, 0x9f, 0x59, 0x02,
-  0xf5, 0x31, 0x17, 0x16, 0x88, 0xf8, 0xcc, 0x12, 0xa8, 0xcf, 0x70, 0xc4,
-  0xab, 0xbc, 0xb0, 0x20, 0x7c, 0xb3, 0x0c, 0xe7, 0xa3, 0x3e, 0x81, 0xc1,
-  0x0a, 0x0c, 0x0b, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0,
-  0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4, 0x2e, 0x0b, 0x3a,
-  0xdc, 0x10, 0xe4, 0xb2, 0x00, 0x06, 0xb3, 0x0c, 0xe8, 0x93, 0x3e, 0x81,
-  0xe1, 0xb0, 0x30, 0xc4, 0x67, 0x96, 0x40, 0x7d, 0x8c, 0xe8, 0x61, 0x01,
-  0x3e, 0xb3, 0x04, 0xea, 0x33, 0xd0, 0xf2, 0x68, 0xe4, 0x83, 0x95, 0x0f,
-  0x81, 0x3e, 0x42, 0xfa, 0xd0, 0x95, 0xf9, 0x5c, 0x30, 0xcc, 0x05, 0x4f,
-  0xdd, 0xf6, 0xd4, 0xa5, 0xb0, 0x30, 0xcc, 0xe9, 0xac, 0x30, 0xcc, 0x11,
-  0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x6c, 0xeb,
-  0x2c, 0xe0, 0xb2, 0x50, 0xc7, 0x82, 0x39, 0x0b, 0xa3, 0x09, 0x01, 0x30,
-  0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24,
-  0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x98, 0x3c, 0x0b, 0xbf, 0x2c,
-  0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd8, 0x3c, 0x0b,
-  0xe0, 0x2c, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x18,
-  0x3d, 0x0b, 0xe1, 0x2c, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08,
-  0x06, 0xcb, 0x3e, 0x0b, 0xe0, 0x2c, 0xfc, 0xb1, 0x10, 0xb4, 0xb3, 0x00,
-  0xcb, 0xc2, 0x3b, 0x0b, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x83, 0xb3, 0x04,
-  0xeb, 0x33, 0xd0, 0xf2, 0x98, 0x86, 0xed, 0x80, 0x65, 0x51, 0x3b, 0x2c,
-  0x81, 0x3b, 0x82, 0xfa, 0x80, 0x65, 0x91, 0x3b, 0xb3, 0x0c, 0xec, 0xe3,
-  0x3e, 0x6b, 0x1e, 0x0c, 0x47, 0xec, 0x6d, 0x10, 0xcb, 0xc2, 0xf0, 0x1d,
-  0xdf, 0x06, 0xc3, 0x0c, 0x37, 0x04, 0x7c, 0x2c, 0x90, 0x41, 0x0d, 0x81,
-  0x0e, 0x47, 0xcc, 0x4b, 0x2d, 0x0b, 0xc3, 0x57, 0x81, 0xa0, 0x57, 0x2f,
-  0xc3, 0x0c, 0x37, 0x04, 0x7f, 0x2c, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32,
-  0xb4, 0x8f, 0x08, 0x05, 0x37, 0xc6, 0xc2, 0x30, 0x47, 0xb7, 0xc2, 0x30,
-  0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0x8d, 0xb4, 0x00, 0xcf, 0x42,
-  0x2b, 0x0b, 0xfe, 0x2c, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c,
-  0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x60, 0x2a, 0x2d, 0xdc, 0xb3, 0x70, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0x2b, 0x2d, 0xe0, 0xb3, 0xc0, 0x10,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0x2c, 0x2d, 0xe4, 0xb3,
-  0x20, 0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x33, 0x2d,
-  0xe0, 0xb3, 0x70, 0xcb, 0x42, 0x50, 0xd2, 0x02, 0x3a, 0x0b, 0x27, 0x2d,
-  0x8c, 0x26, 0x04, 0xc0, 0x05, 0x0f, 0xce, 0x12, 0x88, 0xd0, 0x70, 0xc3,
-  0xa8, 0x07, 0x2b, 0x2d, 0x80, 0xc1, 0x2c, 0xc3, 0xfb, 0xc0, 0x4f, 0x50,
-  0xb2, 0x2c, 0xec, 0xb3, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0x54, 0x4d, 0x0b, 0xfc, 0x2c, 0xd4, 0x6e, 0xb0, 0xce, 0xc2,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0x36, 0x2d, 0xf0, 0xb3, 0x10,
-  0x08, 0x17, 0x0c, 0x53, 0xb5, 0x2c, 0x80, 0xb4, 0x00, 0x17, 0x3c, 0x35,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x94, 0x4e, 0x0b, 0x21, 0x2d, 0xb8,
-  0x0c, 0x3c, 0x0b, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xed, 0xb4,
-  0x10, 0xd2, 0x42, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf1, 0xd4,
-  0xf9, 0xb1, 0x30, 0xcc, 0xbd, 0xae, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31,
-  0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x6c, 0x60, 0x2d, 0xb4, 0xb4,
-  0xa0, 0xce, 0xc2, 0x4e, 0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04,
-  0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0xd8, 0x59, 0x0b, 0x34, 0x2d, 0x24, 0x44, 0x30,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x18, 0x5a, 0x0b, 0x35, 0x2d, 0x24,
-  0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x58, 0x5a, 0x0b, 0x36,
-  0x2d, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x0b, 0x5c,
-  0x0b, 0x35, 0x2d, 0xd0, 0xb3, 0x10, 0x88, 0xb5, 0x50, 0xd2, 0x02, 0x59,
-  0x0b, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x83, 0xb3, 0x04, 0x22, 0x34, 0xdc,
-  0x00, 0xee, 0xc1, 0x59, 0x0b, 0x60, 0x30, 0xcb, 0x10, 0x3f, 0x22, 0x14,
-  0x98, 0x39, 0x0b, 0xe8, 0x2c, 0xc4, 0x67, 0x38, 0x22, 0x7e, 0x83, 0x74,
-  0x16, 0x88, 0x6f, 0x96, 0x41, 0x7e, 0xea, 0x27, 0x30, 0x75, 0x16, 0xe4,
-  0x37, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c,
-  0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xe2, 0x5a, 0xd0, 0xe1, 0x86,
-  0xe0, 0xad, 0x05, 0x30, 0x98, 0x65, 0x98, 0x1f, 0xfa, 0x09, 0x6c, 0x90,
-  0x67, 0x01, 0x3e, 0xb3, 0x04, 0xf9, 0x63, 0xf1, 0x2c, 0x10, 0xf1, 0x99,
-  0x25, 0xc8, 0x9f, 0xe1, 0x08, 0xfe, 0x0d, 0xe4, 0x59, 0x10, 0xbe, 0x59,
-  0x06, 0xfb, 0xc9, 0x9f, 0xc0, 0xfa, 0x37, 0x98, 0x67, 0x21, 0x3e, 0x16,
-  0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15,
-  0x41, 0x7c, 0x8a, 0xe0, 0x6b, 0x41, 0x87, 0x1b, 0x02, 0xbd, 0x16, 0xc0,
-  0x60, 0x96, 0xe1, 0x7e, 0xf0, 0x27, 0xb0, 0x7d, 0x16, 0x86, 0xf8, 0xcc,
-  0x12, 0xe4, 0x8f, 0x11, 0xfe, 0x2c, 0xc0, 0x67, 0x96, 0x20, 0x7f, 0x06,
-  0x5a, 0x1e, 0x6d, 0x7e, 0x30, 0xfa, 0x21, 0xee, 0x47, 0xc0, 0x1f, 0x17,
-  0x1c, 0xea, 0xe7, 0x82, 0x61, 0xac, 0x9f, 0x85, 0x90, 0x16, 0xe2, 0x33,
-  0x1c, 0xe1, 0x37, 0x22, 0x2d, 0x10, 0xdf, 0x2c, 0x83, 0xfe, 0xf4, 0x4f,
-  0x60, 0x23, 0x2d, 0xfc, 0x4d, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30,
-  0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xa1, 0xda,
-  0x82, 0x0e, 0x37, 0x04, 0xa8, 0x2d, 0x80, 0xc1, 0x2c, 0xc3, 0xfe, 0xf0,
-  0x4f, 0x60, 0xc3, 0x4a, 0x0b, 0xf0, 0x99, 0x25, 0x08, 0x21, 0x43, 0x69,
-  0x81, 0x88, 0xcf, 0x2c, 0x41, 0x08, 0x0d, 0x47, 0xa4, 0x4e, 0x4a, 0x0b,
-  0xc2, 0x37, 0xcb, 0xe0, 0x3f, 0x21, 0x14, 0x98, 0xea, 0xa8, 0xb4, 0x10,
-  0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9,
-  0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xd4, 0xb6, 0xa0, 0xc3, 0x0d, 0xc1, 0x6c,
-  0x0b, 0x60, 0x30, 0xcb, 0xf0, 0x3f, 0x20, 0x14, 0x98, 0x4c, 0x0b, 0x43,
-  0x7c, 0x66, 0x09, 0x42, 0xc8, 0x88, 0x9b, 0x16, 0xe0, 0x33, 0x4b, 0x10,
-  0x42, 0x03, 0x2d, 0x8f, 0xb6, 0x3f, 0x18, 0xff, 0x10, 0xff, 0x23, 0x80,
-  0x10, 0xda, 0xf5, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x6d, 0x4f, 0xdd,
-  0x48, 0x0b, 0xc3, 0x1c, 0x0d, 0x0b, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3,
-  0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x56, 0xde, 0x82, 0x6c, 0x0b,
-  0x6f, 0x2d, 0x80, 0xb7, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30,
-  0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0x80, 0xb1, 0xb7, 0x90, 0xdb, 0x42, 0x42, 0x04, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xb5, 0xb7, 0xa0, 0xdb, 0x42, 0x42,
-  0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xb9, 0xb7, 0xb0, 0xdb,
-  0x42, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xd4, 0xb7,
-  0xa0, 0xdb, 0x42, 0x5e, 0x0b, 0xc1, 0x79, 0x0b, 0xaa, 0x2d, 0xa4, 0xb7,
-  0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x38, 0x4b, 0x20, 0x42, 0x03, 0x2d,
-  0x8f, 0x69, 0xb4, 0x0f, 0xcd, 0x16, 0xec, 0xc3, 0x12, 0xef, 0x23, 0x84,
-  0x10, 0xcd, 0x16, 0xf0, 0x33, 0x62, 0x60, 0x00, 0x20, 0x08, 0x06, 0xd0,
-  0x7d, 0x0b, 0xb6, 0x2d, 0x98, 0xb1, 0x30, 0x62, 0x60, 0x00, 0x20, 0x08,
-  0x06, 0x10, 0x7e, 0x0b, 0xb7, 0x2d, 0x98, 0xb1, 0x60, 0x41, 0x20, 0x1f,
-  0x0b, 0x04, 0xf9, 0xd8, 0x9b, 0x07, 0xad, 0x2d, 0xc8, 0x67, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0xa4, 0xfd, 0x16, 0x7e, 0x5b, 0x68, 0x6d, 0xa1,
-  0xd7, 0x02, 0x8b, 0xf3, 0xa0, 0xb5, 0x05, 0xf9, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0x81, 0xd4, 0xdf, 0x42, 0x78, 0x0b, 0xac, 0x2d, 0xa0, 0x6a,
-  0x10, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0xe4, 0xdf, 0x82, 0x78,
-  0x0b, 0xaf, 0x2d, 0x80, 0x5b, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0xd2, 0x7f, 0x0b, 0xe3, 0x2d, 0xc8, 0xb6, 0x80, 0x2f, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x48, 0x20, 0x2e, 0x90, 0xb7, 0xe0, 0xda, 0xc2,
-  0xaa, 0x06, 0xc6, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x21, 0x2e,
-  0x94, 0xb7, 0xe0, 0xda, 0xc2, 0xb8, 0x05, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0x20, 0x89, 0xb8, 0x60, 0xde, 0x02, 0x6d, 0x0b, 0xfb, 0x12, 0x8c,
-  0x18, 0x34, 0x00, 0x08, 0x82, 0x81, 0x15, 0xe2, 0x82, 0x79, 0x0b, 0xb7,
-  0x2d, 0x30, 0x8b, 0xe2, 0xaa, 0x01, 0x42, 0x04, 0xf6, 0xd7, 0xc1, 0x6d,
-  0x0b, 0xf2, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xa9, 0xc4, 0x85,
-  0xf4, 0x16, 0x6e, 0x5b, 0x68, 0xaf, 0xc0, 0x42, 0x3b, 0xb8, 0x6d, 0x41,
-  0x3e, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x20, 0x9d, 0xb8, 0xb0, 0xde,
-  0x82, 0x6d, 0x0b, 0xb8, 0x19, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0x20, 0xa1, 0xb8, 0xc0, 0xde, 0x42, 0x6e, 0x0b, 0xf0, 0x15, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0x81, 0x94, 0xe2, 0x42, 0x7b, 0x0b, 0xbc, 0x2d,
-  0xa0, 0x48, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x92, 0x8a, 0x0b,
-  0xee, 0x2d, 0xe0, 0xb6, 0xb0, 0x9b, 0x81, 0x31, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0xd2, 0x8a, 0x0b, 0xef, 0x2d, 0xe0, 0xb6, 0x30, 0x5f, 0xc1,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x2c, 0x2e, 0xc0, 0xb7, 0xe0,
-  0xdb, 0xc2, 0x8a, 0x04, 0x23, 0x06, 0x0d, 0x00, 0x82, 0x60, 0x60, 0xad,
-  0xb8, 0x00, 0xdf, 0x42, 0x78, 0x0b, 0x56, 0x45, 0xf9, 0x66, 0x80, 0x10,
-  0x81, 0xb9, 0x72, 0x10, 0xde, 0x82, 0x7c, 0x46, 0x0c, 0x10, 0x00, 0x04,
-  0xc1, 0x40, 0x7a, 0x71, 0x61, 0xbe, 0x85, 0xf0, 0x16, 0xfa, 0x29, 0x30,
-  0x58, 0x0e, 0xc2, 0x5b, 0x90, 0xcf, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x48, 0x31, 0x2e, 0xd4, 0xb7, 0x00, 0xde, 0x02, 0x3a, 0x06, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x32, 0x2e, 0xd8, 0xb7, 0x30, 0xde,
-  0x02, 0x48, 0x05, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x20, 0xcd, 0xb8,
-  0x70, 0xdf, 0x82, 0x79, 0x0b, 0x38, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0x81, 0x44, 0xe3, 0x02, 0x7e, 0x0b, 0xe2, 0x2d, 0xac, 0x63, 0x60,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x54, 0xe3, 0x42, 0x7e, 0x0b,
-  0xe2, 0x2d, 0x8c, 0x54, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x92,
-  0x8d, 0x0b, 0xfa, 0x2d, 0xa0, 0xb7, 0xb0, 0x13, 0xc1, 0x88, 0x41, 0x03,
-  0x80, 0x20, 0x18, 0x58, 0x35, 0x2e, 0xe8, 0xb7, 0xb0, 0xde, 0x02, 0x18,
-  0x7c, 0x9e, 0x3b, 0x06, 0x08, 0x11, 0x58, 0xef, 0x06, 0xeb, 0x2d, 0xc8,
-  0x67, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xa4, 0x1c, 0x17, 0xfa, 0x5b,
-  0x58, 0x6f, 0xa1, 0x85, 0x02, 0xfb, 0xdd, 0x60, 0xbd, 0x05, 0xf9, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0xb4, 0xe3, 0xc2, 0x7f, 0x0b, 0xea,
-  0x2d, 0xe0, 0x5f, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x12, 0x8f,
-  0x0b, 0x20, 0x2e, 0xb4, 0xb7, 0x00, 0x43, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x48, 0x3d, 0x2e, 0x84, 0xb8, 0x00, 0xdf, 0x02, 0x1a, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x20, 0xf9, 0xb8, 0x20, 0xe2, 0x02,
-  0x7b, 0x0b, 0xfb, 0x67, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0xf4,
-  0xe3, 0xc2, 0x88, 0x0b, 0xec, 0x2d, 0xcc, 0x50, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x12, 0x98, 0x0b, 0x24, 0x2e, 0xc8, 0xb7, 0xb0, 0x06,
-  0xc1, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x58, 0x3f, 0x2e, 0x90, 0xb8,
-  0x50, 0xdf, 0x82, 0x1a, 0xa4, 0x01, 0x1a, 0xf8, 0x1f, 0x42, 0x04, 0xc6,
-  0x06, 0x6c, 0x20, 0x1f, 0x0b, 0xda, 0x40, 0x3e, 0x16, 0x06, 0xf7, 0x2d,
-  0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xa4, 0x32, 0x17, 0x52,
-  0x5c, 0xb8, 0x6f, 0xc1, 0x09, 0x6c, 0x0c, 0xee, 0x5b, 0x90, 0xcf, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x67, 0x2e, 0xac, 0xb8, 0x60, 0xdf,
-  0x82, 0x16, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x84, 0xe6, 0x02,
-  0x8b, 0x0b, 0xf9, 0x2d, 0x44, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x48, 0x69, 0x2e, 0xb4, 0xb8, 0xc0, 0xdf, 0x02, 0x12, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0x81, 0xa4, 0xe6, 0x82, 0x8b, 0x0b, 0xf8, 0x2d, 0x74,
-  0xc6, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x6b, 0x2e, 0xbc, 0xb8,
-  0x80, 0xdf, 0x02, 0x15, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0xc4,
-  0xe6, 0x02, 0x8c, 0x0b, 0xfe, 0x2d, 0x2c, 0xc1, 0x88, 0x41, 0x03, 0x80,
-  0x20, 0x18, 0x58, 0x6b, 0x2e, 0xc0, 0xb8, 0x10, 0xe2, 0xc2, 0x1d, 0x2c,
-  0x0a, 0x18, 0x20, 0x44, 0x70, 0x81, 0x71, 0x23, 0x06, 0x0e, 0x00, 0x82,
-  0x60, 0xd0, 0xc4, 0xb9, 0xe0, 0xe2, 0x02, 0x7e, 0x0b, 0xf1, 0x2d, 0x90,
-  0xb9, 0x10, 0x94, 0xb8, 0x50, 0xe2, 0x42, 0x89, 0x0b, 0x27, 0x2e, 0x98,
-  0xb9, 0x30, 0x4b, 0x30, 0x42, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-};
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_int16_double.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_int16_double.h
deleted file mode 100644
index 7c34dfe188e5f..0000000000000
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_int16_double.h
+++ /dev/null
@@ -1,6306 +0,0 @@
-#if 0
-;
-; Note: shader requires additional functionality:
-;       Double-precision floating point
-;       Use native low precision
-;
-;
-; Input signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no parameters
-;
-; Output signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no parameters
-; shader hash: 1b727f0ed6e60692cc1631e4508eac5a
-;
-; Pipeline Runtime Information: 
-;
-;
-;
-; Buffer Definitions:
-;
-; cbuffer 
-; {
-;
-;   [116 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [2 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [8 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [2 x i8] (type annotation not present)
-;
-; }
-;
-;
-; Resource Bindings:
-;
-; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-;                                   cbuffer      NA          NA     CB0            cb0     1
-;                                       UAV  struct         r/w      U0             u0     1
-;                                       UAV  struct         r/w      U1             u1     1
-;                                       UAV  struct         r/w      U2             u2     1
-;
-target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
-target triple = "dxil-ms-dx"
-
-%dx.types.Handle = type { i8* }
-%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
-%dx.types.ResRet.i32 = type { i32, i32, i32, i32, i32 }
-%dx.types.ResRet.i16 = type { i16, i16, i16, i16, i32 }
-%"class.RWStructuredBuffer<short>" = type { i16 }
-%"class.RWStructuredBuffer<double>" = type { double }
-%Constants = type { i32, i32, i32, i32, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32 }
-
-define void @GridSample() {
-  %1 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 2, i32 2, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %2 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 1, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %3 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %4 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %5 = call i32 @dx.op.threadId.i32(i32 93, i32 0)  ; ThreadId(component)
-  %6 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
-  %7 = extractvalue %dx.types.CBufRet.i32 %6, 0
-  %8 = add i32 %7, %5
-  %9 = extractvalue %dx.types.CBufRet.i32 %6, 1
-  %10 = icmp ult i32 %8, %9
-  br i1 %10, label %11, label %3323
-
-; <label>:11                                      ; preds = %0
-  %12 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
-  %13 = extractvalue %dx.types.CBufRet.i32 %12, 3
-  %14 = uitofp i32 %13 to float
-  %15 = extractvalue %dx.types.CBufRet.i32 %12, 2
-  %16 = uitofp i32 %15 to float
-  %17 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 7)  ; CBufferLoadLegacy(handle,regIndex)
-  %18 = extractvalue %dx.types.CBufRet.i32 %17, 0
-  %19 = icmp eq i32 %18, 0
-  %20 = select i1 %19, float -5.000000e-01, float 0.000000e+00
-  %21 = select i1 %19, float -5.000000e-01, float -1.000000e+00
-  %22 = fadd float %14, %21
-  %23 = select i1 %19, float -5.000000e-01, float -1.000000e+00
-  %24 = fadd float %16, %23
-  %25 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
-  %26 = extractvalue %dx.types.CBufRet.i32 %25, 1
-  %27 = extractvalue %dx.types.CBufRet.i32 %25, 2
-  %28 = extractvalue %dx.types.CBufRet.i32 %25, 3
-  %29 = mul i32 %28, %27
-  %30 = mul i32 %27, %26
-  %31 = mul i32 %30, %28
-  %32 = udiv i32 %8, %31
-  %33 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 6)  ; CBufferLoadLegacy(handle,regIndex)
-  %34 = extractvalue %dx.types.CBufRet.i32 %33, 0
-  %35 = mul i32 %34, %32
-  %36 = sub i32 %8, %35
-  %37 = udiv i32 %36, %29
-  %38 = extractvalue %dx.types.CBufRet.i32 %33, 1
-  %39 = mul i32 %38, %37
-  %40 = sub i32 %36, %39
-  %41 = udiv i32 %40, %28
-  %42 = extractvalue %dx.types.CBufRet.i32 %33, 2
-  %43 = mul i32 %42, %41
-  %44 = sub i32 %40, %43
-  %45 = uitofp i32 %32 to float
-  %46 = uitofp i32 %41 to float
-  %47 = uitofp i32 %44 to float
-  %48 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
-  %49 = extractvalue %dx.types.CBufRet.i32 %48, 0
-  %50 = extractvalue %dx.types.CBufRet.i32 %48, 1
-  %51 = extractvalue %dx.types.CBufRet.i32 %48, 2
-  %52 = extractvalue %dx.types.CBufRet.i32 %48, 3
-  %53 = uitofp i32 %49 to float
-  %54 = uitofp i32 %50 to float
-  %55 = uitofp i32 %51 to float
-  %56 = uitofp i32 %52 to float
-  %57 = call float @dx.op.dot4.f32(i32 56, float %45, float %46, float %47, float 0.000000e+00, float %53, float %54, float %55, float %56)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %58 = fadd fast float %56, %57
-  %59 = fptoui float %57 to i32
-  %60 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %2, i32 %59, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %61 = extractvalue %dx.types.ResRet.i32 %60, 0
-  %62 = extractvalue %dx.types.ResRet.i32 %60, 1
-  %63 = call double @dx.op.makeDouble.f64(i32 101, i32 %61, i32 %62)  ; MakeDouble(lo,hi)
-  %64 = fptrunc double %63 to float
-  %65 = fptoui float %58 to i32
-  %66 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %2, i32 %65, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %67 = extractvalue %dx.types.ResRet.i32 %66, 0
-  %68 = extractvalue %dx.types.ResRet.i32 %66, 1
-  %69 = call double @dx.op.makeDouble.f64(i32 101, i32 %67, i32 %68)  ; MakeDouble(lo,hi)
-  %70 = fptrunc double %69 to float
-  %71 = icmp eq i32 %18, 1
-  %72 = fadd fast float %64, 1.000000e+00
-  %73 = fadd fast float %70, 1.000000e+00
-  br i1 %71, label %74, label %81
-
-; <label>:74                                      ; preds = %11
-  %75 = fmul fast float %72, 5.000000e-01
-  %76 = fmul fast float %73, 5.000000e-01
-  %77 = fadd fast float %14, -1.000000e+00
-  %78 = fadd fast float %16, -1.000000e+00
-  %79 = fmul fast float %75, %77
-  %80 = fmul fast float %76, %78
-  br label %88
-
-; <label>:81                                      ; preds = %11
-  %82 = fmul fast float %14, %72
-  %83 = fmul fast float %73, %16
-  %84 = fadd fast float %82, -1.000000e+00
-  %85 = fadd fast float %83, -1.000000e+00
-  %86 = fmul fast float %84, 5.000000e-01
-  %87 = fmul fast float %85, 5.000000e-01
-  br label %88
-
-; <label>:88                                      ; preds = %81, %74
-  %89 = phi float [ %79, %74 ], [ %86, %81 ]
-  %90 = phi float [ %80, %74 ], [ %87, %81 ]
-  %91 = extractvalue %dx.types.CBufRet.i32 %6, 2
-  %92 = icmp eq i32 %91, 1
-  br i1 %92, label %93, label %96
-
-; <label>:93                                      ; preds = %88
-  %94 = call float @dx.op.unary.f32(i32 26, float %89)  ; Round_ne(value)
-  %95 = call float @dx.op.unary.f32(i32 26, float %90)  ; Round_ne(value)
-  br label %96
-
-; <label>:96                                      ; preds = %93, %88
-  %97 = phi float [ %94, %93 ], [ %89, %88 ]
-  %98 = phi float [ %95, %93 ], [ %90, %88 ]
-  %99 = fcmp fast olt float %97, %20
-  %100 = fcmp fast ogt float %97, %22
-  %101 = or i1 %99, %100
-  %102 = fcmp fast olt float %98, %20
-  %103 = or i1 %101, %102
-  %104 = fcmp fast ogt float %98, %24
-  %105 = or i1 %104, %103
-  br i1 %105, label %106, label %179
-
-; <label>:106                                     ; preds = %96
-  %107 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %108 = icmp eq i32 %107, 1
-  br i1 %108, label %109, label %118
-
-; <label>:109                                     ; preds = %106
-  %110 = add i32 %13, -1
-  %111 = uitofp i32 %110 to float
-  %112 = call float @dx.op.binary.f32(i32 35, float %97, float 0.000000e+00)  ; FMax(a,b)
-  %113 = call float @dx.op.binary.f32(i32 36, float %112, float %111)  ; FMin(a,b)
-  %114 = add i32 %15, -1
-  %115 = uitofp i32 %114 to float
-  %116 = call float @dx.op.binary.f32(i32 35, float %98, float 0.000000e+00)  ; FMax(a,b)
-  %117 = call float @dx.op.binary.f32(i32 36, float %116, float %115)  ; FMin(a,b)
-  br label %179
-
-; <label>:118                                     ; preds = %106
-  %119 = icmp eq i32 %107, 2
-  br i1 %119, label %120, label %179
-
-; <label>:120                                     ; preds = %118
-  %121 = fsub fast float %22, %20
-  br i1 %99, label %122, label %135
-
-; <label>:122                                     ; preds = %120
-  %123 = fsub fast float %20, %97
-  %124 = fdiv fast float %123, %121
-  %125 = fptoui float %124 to i32
-  %126 = uitofp i32 %125 to float
-  %127 = fmul fast float %126, %121
-  %128 = fsub fast float %123, %127
-  %129 = and i32 %125, 1
-  %130 = icmp eq i32 %129, 0
-  br i1 %130, label %131, label %133
-
-; <label>:131                                     ; preds = %122
-  %132 = fadd fast float %128, %20
-  br label %149
-
-; <label>:133                                     ; preds = %122
-  %134 = fsub fast float %22, %128
-  br label %149
-
-; <label>:135                                     ; preds = %120
-  br i1 %100, label %136, label %149
-
-; <label>:136                                     ; preds = %135
-  %137 = fsub fast float %97, %22
-  %138 = fdiv fast float %137, %121
-  %139 = fptoui float %138 to i32
-  %140 = uitofp i32 %139 to float
-  %141 = fmul fast float %140, %121
-  %142 = fsub fast float %137, %141
-  %143 = and i32 %139, 1
-  %144 = icmp eq i32 %143, 0
-  br i1 %144, label %145, label %147
-
-; <label>:145                                     ; preds = %136
-  %146 = fsub fast float %22, %142
-  br label %149
-
-; <label>:147                                     ; preds = %136
-  %148 = fadd fast float %142, %20
-  br label %149
-
-; <label>:149                                     ; preds = %147, %145, %135, %133, %131
-  %150 = phi float [ %132, %131 ], [ %134, %133 ], [ %146, %145 ], [ %148, %147 ], [ %97, %135 ]
-  %151 = fsub fast float %24, %20
-  br i1 %102, label %152, label %165
-
-; <label>:152                                     ; preds = %149
-  %153 = fsub fast float %20, %98
-  %154 = fdiv fast float %153, %151
-  %155 = fptoui float %154 to i32
-  %156 = uitofp i32 %155 to float
-  %157 = fmul fast float %156, %151
-  %158 = fsub fast float %153, %157
-  %159 = and i32 %155, 1
-  %160 = icmp eq i32 %159, 0
-  br i1 %160, label %161, label %163
-
-; <label>:161                                     ; preds = %152
-  %162 = fadd fast float %158, %20
-  br label %179
-
-; <label>:163                                     ; preds = %152
-  %164 = fsub fast float %24, %158
-  br label %179
-
-; <label>:165                                     ; preds = %149
-  br i1 %104, label %166, label %179
-
-; <label>:166                                     ; preds = %165
-  %167 = fsub fast float %98, %24
-  %168 = fdiv fast float %167, %151
-  %169 = fptoui float %168 to i32
-  %170 = uitofp i32 %169 to float
-  %171 = fmul fast float %170, %151
-  %172 = fsub fast float %167, %171
-  %173 = and i32 %169, 1
-  %174 = icmp eq i32 %173, 0
-  br i1 %174, label %175, label %177
-
-; <label>:175                                     ; preds = %166
-  %176 = fsub fast float %24, %172
-  br label %179
-
-; <label>:177                                     ; preds = %166
-  %178 = fadd fast float %172, %20
-  br label %179
-
-; <label>:179                                     ; preds = %177, %175, %165, %163, %161, %118, %109, %96
-  %180 = phi float [ %113, %109 ], [ %97, %118 ], [ %97, %96 ], [ %150, %177 ], [ %150, %175 ], [ %150, %165 ], [ %150, %163 ], [ %150, %161 ]
-  %181 = phi float [ %117, %109 ], [ %98, %118 ], [ %98, %96 ], [ %178, %177 ], [ %176, %175 ], [ %98, %165 ], [ %164, %163 ], [ %162, %161 ]
-  %182 = uitofp i32 %37 to float
-  br i1 %92, label %183, label %332
-
-; <label>:183                                     ; preds = %179
-  %184 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %185 = icmp eq i32 %184, 0
-  br i1 %185, label %186, label %211
-
-; <label>:186                                     ; preds = %183
-  %187 = fcmp fast oge float %180, 0.000000e+00
-  %188 = fptoui float %180 to i32
-  %189 = icmp ult i32 %188, %13
-  %190 = and i1 %187, %189
-  %191 = fcmp fast oge float %181, 0.000000e+00
-  %192 = and i1 %191, %190
-  %193 = fptoui float %181 to i32
-  %194 = icmp ult i32 %193, %15
-  %195 = and i1 %194, %192
-  br i1 %195, label %196, label %329
-
-; <label>:196                                     ; preds = %186
-  %197 = fptoui float %45 to i32
-  %198 = fptoui float %182 to i32
-  %199 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %200 = extractvalue %dx.types.CBufRet.i32 %199, 0
-  %201 = extractvalue %dx.types.CBufRet.i32 %199, 1
-  %202 = extractvalue %dx.types.CBufRet.i32 %199, 2
-  %203 = extractvalue %dx.types.CBufRet.i32 %199, 3
-  %204 = mul i32 %200, %197
-  %205 = call i32 @dx.op.tertiary.i32(i32 48, i32 %198, i32 %201, i32 %204)  ; IMad(a,b,c)
-  %206 = call i32 @dx.op.tertiary.i32(i32 48, i32 %193, i32 %202, i32 %205)  ; IMad(a,b,c)
-  %207 = call i32 @dx.op.tertiary.i32(i32 48, i32 %188, i32 %203, i32 %206)  ; IMad(a,b,c)
-  %208 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %207, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %209 = extractvalue %dx.types.ResRet.i16 %208, 0
-  %210 = sitofp i16 %209 to float
-  br label %329
-
-; <label>:211                                     ; preds = %183
-  %212 = icmp eq i32 %184, 1
-  br i1 %212, label %213, label %242
-
-; <label>:213                                     ; preds = %211
-  %214 = add i32 %13, -1
-  %215 = uitofp i32 %214 to float
-  %216 = call float @dx.op.binary.f32(i32 35, float %180, float 0.000000e+00)  ; FMax(a,b)
-  %217 = call float @dx.op.binary.f32(i32 36, float %216, float %215)  ; FMin(a,b)
-  %218 = fptoui float %217 to i32
-  %219 = add i32 %15, -1
-  %220 = uitofp i32 %219 to float
-  %221 = call float @dx.op.binary.f32(i32 35, float %181, float 0.000000e+00)  ; FMax(a,b)
-  %222 = call float @dx.op.binary.f32(i32 36, float %221, float %220)  ; FMin(a,b)
-  %223 = fptoui float %222 to i32
-  %224 = uitofp i32 %223 to float
-  %225 = uitofp i32 %218 to float
-  %226 = fptoui float %45 to i32
-  %227 = fptoui float %182 to i32
-  %228 = fptoui float %224 to i32
-  %229 = fptoui float %225 to i32
-  %230 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %231 = extractvalue %dx.types.CBufRet.i32 %230, 0
-  %232 = extractvalue %dx.types.CBufRet.i32 %230, 1
-  %233 = extractvalue %dx.types.CBufRet.i32 %230, 2
-  %234 = extractvalue %dx.types.CBufRet.i32 %230, 3
-  %235 = mul i32 %231, %226
-  %236 = call i32 @dx.op.tertiary.i32(i32 48, i32 %227, i32 %232, i32 %235)  ; IMad(a,b,c)
-  %237 = call i32 @dx.op.tertiary.i32(i32 48, i32 %228, i32 %233, i32 %236)  ; IMad(a,b,c)
-  %238 = call i32 @dx.op.tertiary.i32(i32 48, i32 %229, i32 %234, i32 %237)  ; IMad(a,b,c)
-  %239 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %238, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %240 = extractvalue %dx.types.ResRet.i16 %239, 0
-  %241 = sitofp i16 %240 to float
-  br label %329
-
-; <label>:242                                     ; preds = %211
-  %243 = icmp eq i32 %184, 2
-  br i1 %243, label %244, label %329
-
-; <label>:244                                     ; preds = %242
-  %245 = fsub fast float %22, %20
-  %246 = fcmp fast olt float %180, %20
-  br i1 %246, label %247, label %260
-
-; <label>:247                                     ; preds = %244
-  %248 = fsub fast float %20, %180
-  %249 = fdiv fast float %248, %245
-  %250 = fptoui float %249 to i32
-  %251 = uitofp i32 %250 to float
-  %252 = fmul fast float %251, %245
-  %253 = fsub fast float %248, %252
-  %254 = and i32 %250, 1
-  %255 = icmp eq i32 %254, 0
-  br i1 %255, label %256, label %258
-
-; <label>:256                                     ; preds = %247
-  %257 = fadd fast float %253, %20
-  br label %275
-
-; <label>:258                                     ; preds = %247
-  %259 = fsub fast float %22, %253
-  br label %275
-
-; <label>:260                                     ; preds = %244
-  %261 = fcmp fast ogt float %180, %22
-  br i1 %261, label %262, label %275
-
-; <label>:262                                     ; preds = %260
-  %263 = fsub fast float %180, %22
-  %264 = fdiv fast float %263, %245
-  %265 = fptoui float %264 to i32
-  %266 = uitofp i32 %265 to float
-  %267 = fmul fast float %266, %245
-  %268 = fsub fast float %263, %267
-  %269 = and i32 %265, 1
-  %270 = icmp eq i32 %269, 0
-  br i1 %270, label %271, label %273
-
-; <label>:271                                     ; preds = %262
-  %272 = fsub fast float %22, %268
-  br label %275
-
-; <label>:273                                     ; preds = %262
-  %274 = fadd fast float %268, %20
-  br label %275
-
-; <label>:275                                     ; preds = %273, %271, %260, %258, %256
-  %276 = phi float [ %257, %256 ], [ %259, %258 ], [ %272, %271 ], [ %274, %273 ], [ %180, %260 ]
-  %277 = fptoui float %276 to i32
-  %278 = fsub fast float %24, %20
-  %279 = fcmp fast olt float %181, %20
-  br i1 %279, label %280, label %293
-
-; <label>:280                                     ; preds = %275
-  %281 = fsub fast float %20, %181
-  %282 = fdiv fast float %281, %278
-  %283 = fptoui float %282 to i32
-  %284 = uitofp i32 %283 to float
-  %285 = fmul fast float %284, %278
-  %286 = fsub fast float %281, %285
-  %287 = and i32 %283, 1
-  %288 = icmp eq i32 %287, 0
-  br i1 %288, label %289, label %291
-
-; <label>:289                                     ; preds = %280
-  %290 = fadd fast float %286, %20
-  br label %308
-
-; <label>:291                                     ; preds = %280
-  %292 = fsub fast float %24, %286
-  br label %308
-
-; <label>:293                                     ; preds = %275
-  %294 = fcmp fast ogt float %181, %24
-  br i1 %294, label %295, label %308
-
-; <label>:295                                     ; preds = %293
-  %296 = fsub fast float %181, %24
-  %297 = fdiv fast float %296, %278
-  %298 = fptoui float %297 to i32
-  %299 = uitofp i32 %298 to float
-  %300 = fmul fast float %299, %278
-  %301 = fsub fast float %296, %300
-  %302 = and i32 %298, 1
-  %303 = icmp eq i32 %302, 0
-  br i1 %303, label %304, label %306
-
-; <label>:304                                     ; preds = %295
-  %305 = fsub fast float %24, %301
-  br label %308
-
-; <label>:306                                     ; preds = %295
-  %307 = fadd fast float %301, %20
-  br label %308
-
-; <label>:308                                     ; preds = %306, %304, %293, %291, %289
-  %309 = phi float [ %290, %289 ], [ %292, %291 ], [ %305, %304 ], [ %307, %306 ], [ %181, %293 ]
-  %310 = fptoui float %309 to i32
-  %311 = uitofp i32 %310 to float
-  %312 = uitofp i32 %277 to float
-  %313 = fptoui float %45 to i32
-  %314 = fptoui float %182 to i32
-  %315 = fptoui float %311 to i32
-  %316 = fptoui float %312 to i32
-  %317 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %318 = extractvalue %dx.types.CBufRet.i32 %317, 0
-  %319 = extractvalue %dx.types.CBufRet.i32 %317, 1
-  %320 = extractvalue %dx.types.CBufRet.i32 %317, 2
-  %321 = extractvalue %dx.types.CBufRet.i32 %317, 3
-  %322 = mul i32 %318, %313
-  %323 = call i32 @dx.op.tertiary.i32(i32 48, i32 %314, i32 %319, i32 %322)  ; IMad(a,b,c)
-  %324 = call i32 @dx.op.tertiary.i32(i32 48, i32 %315, i32 %320, i32 %323)  ; IMad(a,b,c)
-  %325 = call i32 @dx.op.tertiary.i32(i32 48, i32 %316, i32 %321, i32 %324)  ; IMad(a,b,c)
-  %326 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %325, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %327 = extractvalue %dx.types.ResRet.i16 %326, 0
-  %328 = sitofp i16 %327 to float
-  br label %329
-
-; <label>:329                                     ; preds = %308, %242, %213, %196, %186
-  %330 = phi float [ %210, %196 ], [ 0.000000e+00, %186 ], [ %241, %213 ], [ %328, %308 ], [ 0.000000e+00, %242 ]
-  %331 = fptosi float %330 to i16
-  call void @dx.op.rawBufferStore.i16(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i16 %331, i16 undef, i16 undef, i16 undef, i8 1, i32 2)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3323
-
-; <label>:332                                     ; preds = %179
-  %333 = icmp eq i32 %91, 0
-  br i1 %333, label %334, label %933
-
-; <label>:334                                     ; preds = %332
-  %335 = call float @dx.op.unary.f32(i32 27, float %180)  ; Round_ni(value)
-  %336 = call float @dx.op.unary.f32(i32 27, float %181)  ; Round_ni(value)
-  %337 = fadd fast float %335, 1.000000e+00
-  %338 = fadd fast float %336, 1.000000e+00
-  %339 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %340 = icmp eq i32 %339, 0
-  br i1 %340, label %341, label %366
-
-; <label>:341                                     ; preds = %334
-  %342 = fcmp fast oge float %335, 0.000000e+00
-  %343 = fptoui float %335 to i32
-  %344 = icmp ult i32 %343, %13
-  %345 = and i1 %342, %344
-  %346 = fcmp fast oge float %336, 0.000000e+00
-  %347 = and i1 %346, %345
-  %348 = fptoui float %336 to i32
-  %349 = icmp ult i32 %348, %15
-  %350 = and i1 %349, %347
-  br i1 %350, label %351, label %484
-
-; <label>:351                                     ; preds = %341
-  %352 = fptoui float %45 to i32
-  %353 = fptoui float %182 to i32
-  %354 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %355 = extractvalue %dx.types.CBufRet.i32 %354, 0
-  %356 = extractvalue %dx.types.CBufRet.i32 %354, 1
-  %357 = extractvalue %dx.types.CBufRet.i32 %354, 2
-  %358 = extractvalue %dx.types.CBufRet.i32 %354, 3
-  %359 = mul i32 %355, %352
-  %360 = call i32 @dx.op.tertiary.i32(i32 48, i32 %353, i32 %356, i32 %359)  ; IMad(a,b,c)
-  %361 = call i32 @dx.op.tertiary.i32(i32 48, i32 %348, i32 %357, i32 %360)  ; IMad(a,b,c)
-  %362 = call i32 @dx.op.tertiary.i32(i32 48, i32 %343, i32 %358, i32 %361)  ; IMad(a,b,c)
-  %363 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %362, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %364 = extractvalue %dx.types.ResRet.i16 %363, 0
-  %365 = sitofp i16 %364 to float
-  br label %484
-
-; <label>:366                                     ; preds = %334
-  %367 = icmp eq i32 %339, 1
-  br i1 %367, label %368, label %397
-
-; <label>:368                                     ; preds = %366
-  %369 = add i32 %13, -1
-  %370 = uitofp i32 %369 to float
-  %371 = call float @dx.op.binary.f32(i32 35, float %335, float 0.000000e+00)  ; FMax(a,b)
-  %372 = call float @dx.op.binary.f32(i32 36, float %371, float %370)  ; FMin(a,b)
-  %373 = fptoui float %372 to i32
-  %374 = add i32 %15, -1
-  %375 = uitofp i32 %374 to float
-  %376 = call float @dx.op.binary.f32(i32 35, float %336, float 0.000000e+00)  ; FMax(a,b)
-  %377 = call float @dx.op.binary.f32(i32 36, float %376, float %375)  ; FMin(a,b)
-  %378 = fptoui float %377 to i32
-  %379 = uitofp i32 %378 to float
-  %380 = uitofp i32 %373 to float
-  %381 = fptoui float %45 to i32
-  %382 = fptoui float %182 to i32
-  %383 = fptoui float %379 to i32
-  %384 = fptoui float %380 to i32
-  %385 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %386 = extractvalue %dx.types.CBufRet.i32 %385, 0
-  %387 = extractvalue %dx.types.CBufRet.i32 %385, 1
-  %388 = extractvalue %dx.types.CBufRet.i32 %385, 2
-  %389 = extractvalue %dx.types.CBufRet.i32 %385, 3
-  %390 = mul i32 %386, %381
-  %391 = call i32 @dx.op.tertiary.i32(i32 48, i32 %382, i32 %387, i32 %390)  ; IMad(a,b,c)
-  %392 = call i32 @dx.op.tertiary.i32(i32 48, i32 %383, i32 %388, i32 %391)  ; IMad(a,b,c)
-  %393 = call i32 @dx.op.tertiary.i32(i32 48, i32 %384, i32 %389, i32 %392)  ; IMad(a,b,c)
-  %394 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %393, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %395 = extractvalue %dx.types.ResRet.i16 %394, 0
-  %396 = sitofp i16 %395 to float
-  br label %484
-
-; <label>:397                                     ; preds = %366
-  %398 = icmp eq i32 %339, 2
-  br i1 %398, label %399, label %484
-
-; <label>:399                                     ; preds = %397
-  %400 = fsub fast float %22, %20
-  %401 = fcmp fast olt float %335, %20
-  br i1 %401, label %402, label %415
-
-; <label>:402                                     ; preds = %399
-  %403 = fsub fast float %20, %335
-  %404 = fdiv fast float %403, %400
-  %405 = fptoui float %404 to i32
-  %406 = uitofp i32 %405 to float
-  %407 = fmul fast float %406, %400
-  %408 = fsub fast float %403, %407
-  %409 = and i32 %405, 1
-  %410 = icmp eq i32 %409, 0
-  br i1 %410, label %411, label %413
-
-; <label>:411                                     ; preds = %402
-  %412 = fadd fast float %408, %20
-  br label %430
-
-; <label>:413                                     ; preds = %402
-  %414 = fsub fast float %22, %408
-  br label %430
-
-; <label>:415                                     ; preds = %399
-  %416 = fcmp fast ogt float %335, %22
-  br i1 %416, label %417, label %430
-
-; <label>:417                                     ; preds = %415
-  %418 = fsub fast float %335, %22
-  %419 = fdiv fast float %418, %400
-  %420 = fptoui float %419 to i32
-  %421 = uitofp i32 %420 to float
-  %422 = fmul fast float %421, %400
-  %423 = fsub fast float %418, %422
-  %424 = and i32 %420, 1
-  %425 = icmp eq i32 %424, 0
-  br i1 %425, label %426, label %428
-
-; <label>:426                                     ; preds = %417
-  %427 = fsub fast float %22, %423
-  br label %430
-
-; <label>:428                                     ; preds = %417
-  %429 = fadd fast float %423, %20
-  br label %430
-
-; <label>:430                                     ; preds = %428, %426, %415, %413, %411
-  %431 = phi float [ %412, %411 ], [ %414, %413 ], [ %427, %426 ], [ %429, %428 ], [ %335, %415 ]
-  %432 = fptoui float %431 to i32
-  %433 = fsub fast float %24, %20
-  %434 = fcmp fast olt float %336, %20
-  br i1 %434, label %435, label %448
-
-; <label>:435                                     ; preds = %430
-  %436 = fsub fast float %20, %336
-  %437 = fdiv fast float %436, %433
-  %438 = fptoui float %437 to i32
-  %439 = uitofp i32 %438 to float
-  %440 = fmul fast float %439, %433
-  %441 = fsub fast float %436, %440
-  %442 = and i32 %438, 1
-  %443 = icmp eq i32 %442, 0
-  br i1 %443, label %444, label %446
-
-; <label>:444                                     ; preds = %435
-  %445 = fadd fast float %441, %20
-  br label %463
-
-; <label>:446                                     ; preds = %435
-  %447 = fsub fast float %24, %441
-  br label %463
-
-; <label>:448                                     ; preds = %430
-  %449 = fcmp fast ogt float %336, %24
-  br i1 %449, label %450, label %463
-
-; <label>:450                                     ; preds = %448
-  %451 = fsub fast float %336, %24
-  %452 = fdiv fast float %451, %433
-  %453 = fptoui float %452 to i32
-  %454 = uitofp i32 %453 to float
-  %455 = fmul fast float %454, %433
-  %456 = fsub fast float %451, %455
-  %457 = and i32 %453, 1
-  %458 = icmp eq i32 %457, 0
-  br i1 %458, label %459, label %461
-
-; <label>:459                                     ; preds = %450
-  %460 = fsub fast float %24, %456
-  br label %463
-
-; <label>:461                                     ; preds = %450
-  %462 = fadd fast float %456, %20
-  br label %463
-
-; <label>:463                                     ; preds = %461, %459, %448, %446, %444
-  %464 = phi float [ %445, %444 ], [ %447, %446 ], [ %460, %459 ], [ %462, %461 ], [ %336, %448 ]
-  %465 = fptoui float %464 to i32
-  %466 = uitofp i32 %465 to float
-  %467 = uitofp i32 %432 to float
-  %468 = fptoui float %45 to i32
-  %469 = fptoui float %182 to i32
-  %470 = fptoui float %466 to i32
-  %471 = fptoui float %467 to i32
-  %472 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %473 = extractvalue %dx.types.CBufRet.i32 %472, 0
-  %474 = extractvalue %dx.types.CBufRet.i32 %472, 1
-  %475 = extractvalue %dx.types.CBufRet.i32 %472, 2
-  %476 = extractvalue %dx.types.CBufRet.i32 %472, 3
-  %477 = mul i32 %473, %468
-  %478 = call i32 @dx.op.tertiary.i32(i32 48, i32 %469, i32 %474, i32 %477)  ; IMad(a,b,c)
-  %479 = call i32 @dx.op.tertiary.i32(i32 48, i32 %470, i32 %475, i32 %478)  ; IMad(a,b,c)
-  %480 = call i32 @dx.op.tertiary.i32(i32 48, i32 %471, i32 %476, i32 %479)  ; IMad(a,b,c)
-  %481 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %480, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %482 = extractvalue %dx.types.ResRet.i16 %481, 0
-  %483 = sitofp i16 %482 to float
-  br label %484
-
-; <label>:484                                     ; preds = %463, %397, %368, %351, %341
-  %485 = phi float [ %365, %351 ], [ 0.000000e+00, %341 ], [ %396, %368 ], [ %483, %463 ], [ 0.000000e+00, %397 ]
-  br i1 %340, label %486, label %511
-
-; <label>:486                                     ; preds = %484
-  %487 = fcmp fast oge float %337, 0.000000e+00
-  %488 = fptoui float %337 to i32
-  %489 = icmp ult i32 %488, %13
-  %490 = and i1 %487, %489
-  %491 = fcmp fast oge float %336, 0.000000e+00
-  %492 = and i1 %491, %490
-  %493 = fptoui float %336 to i32
-  %494 = icmp ult i32 %493, %15
-  %495 = and i1 %494, %492
-  br i1 %495, label %496, label %629
-
-; <label>:496                                     ; preds = %486
-  %497 = fptoui float %45 to i32
-  %498 = fptoui float %182 to i32
-  %499 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %500 = extractvalue %dx.types.CBufRet.i32 %499, 0
-  %501 = extractvalue %dx.types.CBufRet.i32 %499, 1
-  %502 = extractvalue %dx.types.CBufRet.i32 %499, 2
-  %503 = extractvalue %dx.types.CBufRet.i32 %499, 3
-  %504 = mul i32 %500, %497
-  %505 = call i32 @dx.op.tertiary.i32(i32 48, i32 %498, i32 %501, i32 %504)  ; IMad(a,b,c)
-  %506 = call i32 @dx.op.tertiary.i32(i32 48, i32 %493, i32 %502, i32 %505)  ; IMad(a,b,c)
-  %507 = call i32 @dx.op.tertiary.i32(i32 48, i32 %488, i32 %503, i32 %506)  ; IMad(a,b,c)
-  %508 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %507, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %509 = extractvalue %dx.types.ResRet.i16 %508, 0
-  %510 = sitofp i16 %509 to float
-  br label %629
-
-; <label>:511                                     ; preds = %484
-  %512 = icmp eq i32 %339, 1
-  br i1 %512, label %513, label %542
-
-; <label>:513                                     ; preds = %511
-  %514 = add i32 %13, -1
-  %515 = uitofp i32 %514 to float
-  %516 = call float @dx.op.binary.f32(i32 35, float %337, float 0.000000e+00)  ; FMax(a,b)
-  %517 = call float @dx.op.binary.f32(i32 36, float %516, float %515)  ; FMin(a,b)
-  %518 = fptoui float %517 to i32
-  %519 = add i32 %15, -1
-  %520 = uitofp i32 %519 to float
-  %521 = call float @dx.op.binary.f32(i32 35, float %336, float 0.000000e+00)  ; FMax(a,b)
-  %522 = call float @dx.op.binary.f32(i32 36, float %521, float %520)  ; FMin(a,b)
-  %523 = fptoui float %522 to i32
-  %524 = uitofp i32 %523 to float
-  %525 = uitofp i32 %518 to float
-  %526 = fptoui float %45 to i32
-  %527 = fptoui float %182 to i32
-  %528 = fptoui float %524 to i32
-  %529 = fptoui float %525 to i32
-  %530 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %531 = extractvalue %dx.types.CBufRet.i32 %530, 0
-  %532 = extractvalue %dx.types.CBufRet.i32 %530, 1
-  %533 = extractvalue %dx.types.CBufRet.i32 %530, 2
-  %534 = extractvalue %dx.types.CBufRet.i32 %530, 3
-  %535 = mul i32 %531, %526
-  %536 = call i32 @dx.op.tertiary.i32(i32 48, i32 %527, i32 %532, i32 %535)  ; IMad(a,b,c)
-  %537 = call i32 @dx.op.tertiary.i32(i32 48, i32 %528, i32 %533, i32 %536)  ; IMad(a,b,c)
-  %538 = call i32 @dx.op.tertiary.i32(i32 48, i32 %529, i32 %534, i32 %537)  ; IMad(a,b,c)
-  %539 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %538, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %540 = extractvalue %dx.types.ResRet.i16 %539, 0
-  %541 = sitofp i16 %540 to float
-  br label %629
-
-; <label>:542                                     ; preds = %511
-  %543 = icmp eq i32 %339, 2
-  br i1 %543, label %544, label %629
-
-; <label>:544                                     ; preds = %542
-  %545 = fsub fast float %22, %20
-  %546 = fcmp fast olt float %337, %20
-  br i1 %546, label %547, label %560
-
-; <label>:547                                     ; preds = %544
-  %548 = fsub fast float %20, %337
-  %549 = fdiv fast float %548, %545
-  %550 = fptoui float %549 to i32
-  %551 = uitofp i32 %550 to float
-  %552 = fmul fast float %551, %545
-  %553 = fsub fast float %548, %552
-  %554 = and i32 %550, 1
-  %555 = icmp eq i32 %554, 0
-  br i1 %555, label %556, label %558
-
-; <label>:556                                     ; preds = %547
-  %557 = fadd fast float %553, %20
-  br label %575
-
-; <label>:558                                     ; preds = %547
-  %559 = fsub fast float %22, %553
-  br label %575
-
-; <label>:560                                     ; preds = %544
-  %561 = fcmp fast ogt float %337, %22
-  br i1 %561, label %562, label %575
-
-; <label>:562                                     ; preds = %560
-  %563 = fsub fast float %337, %22
-  %564 = fdiv fast float %563, %545
-  %565 = fptoui float %564 to i32
-  %566 = uitofp i32 %565 to float
-  %567 = fmul fast float %566, %545
-  %568 = fsub fast float %563, %567
-  %569 = and i32 %565, 1
-  %570 = icmp eq i32 %569, 0
-  br i1 %570, label %571, label %573
-
-; <label>:571                                     ; preds = %562
-  %572 = fsub fast float %22, %568
-  br label %575
-
-; <label>:573                                     ; preds = %562
-  %574 = fadd fast float %568, %20
-  br label %575
-
-; <label>:575                                     ; preds = %573, %571, %560, %558, %556
-  %576 = phi float [ %557, %556 ], [ %559, %558 ], [ %572, %571 ], [ %574, %573 ], [ %337, %560 ]
-  %577 = fptoui float %576 to i32
-  %578 = fsub fast float %24, %20
-  %579 = fcmp fast olt float %336, %20
-  br i1 %579, label %580, label %593
-
-; <label>:580                                     ; preds = %575
-  %581 = fsub fast float %20, %336
-  %582 = fdiv fast float %581, %578
-  %583 = fptoui float %582 to i32
-  %584 = uitofp i32 %583 to float
-  %585 = fmul fast float %584, %578
-  %586 = fsub fast float %581, %585
-  %587 = and i32 %583, 1
-  %588 = icmp eq i32 %587, 0
-  br i1 %588, label %589, label %591
-
-; <label>:589                                     ; preds = %580
-  %590 = fadd fast float %586, %20
-  br label %608
-
-; <label>:591                                     ; preds = %580
-  %592 = fsub fast float %24, %586
-  br label %608
-
-; <label>:593                                     ; preds = %575
-  %594 = fcmp fast ogt float %336, %24
-  br i1 %594, label %595, label %608
-
-; <label>:595                                     ; preds = %593
-  %596 = fsub fast float %336, %24
-  %597 = fdiv fast float %596, %578
-  %598 = fptoui float %597 to i32
-  %599 = uitofp i32 %598 to float
-  %600 = fmul fast float %599, %578
-  %601 = fsub fast float %596, %600
-  %602 = and i32 %598, 1
-  %603 = icmp eq i32 %602, 0
-  br i1 %603, label %604, label %606
-
-; <label>:604                                     ; preds = %595
-  %605 = fsub fast float %24, %601
-  br label %608
-
-; <label>:606                                     ; preds = %595
-  %607 = fadd fast float %601, %20
-  br label %608
-
-; <label>:608                                     ; preds = %606, %604, %593, %591, %589
-  %609 = phi float [ %590, %589 ], [ %592, %591 ], [ %605, %604 ], [ %607, %606 ], [ %336, %593 ]
-  %610 = fptoui float %609 to i32
-  %611 = uitofp i32 %610 to float
-  %612 = uitofp i32 %577 to float
-  %613 = fptoui float %45 to i32
-  %614 = fptoui float %182 to i32
-  %615 = fptoui float %611 to i32
-  %616 = fptoui float %612 to i32
-  %617 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %618 = extractvalue %dx.types.CBufRet.i32 %617, 0
-  %619 = extractvalue %dx.types.CBufRet.i32 %617, 1
-  %620 = extractvalue %dx.types.CBufRet.i32 %617, 2
-  %621 = extractvalue %dx.types.CBufRet.i32 %617, 3
-  %622 = mul i32 %618, %613
-  %623 = call i32 @dx.op.tertiary.i32(i32 48, i32 %614, i32 %619, i32 %622)  ; IMad(a,b,c)
-  %624 = call i32 @dx.op.tertiary.i32(i32 48, i32 %615, i32 %620, i32 %623)  ; IMad(a,b,c)
-  %625 = call i32 @dx.op.tertiary.i32(i32 48, i32 %616, i32 %621, i32 %624)  ; IMad(a,b,c)
-  %626 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %625, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %627 = extractvalue %dx.types.ResRet.i16 %626, 0
-  %628 = sitofp i16 %627 to float
-  br label %629
-
-; <label>:629                                     ; preds = %608, %542, %513, %496, %486
-  %630 = phi float [ %510, %496 ], [ 0.000000e+00, %486 ], [ %541, %513 ], [ %628, %608 ], [ 0.000000e+00, %542 ]
-  br i1 %340, label %631, label %656
-
-; <label>:631                                     ; preds = %629
-  %632 = fcmp fast oge float %335, 0.000000e+00
-  %633 = fptoui float %335 to i32
-  %634 = icmp ult i32 %633, %13
-  %635 = and i1 %632, %634
-  %636 = fcmp fast oge float %338, 0.000000e+00
-  %637 = and i1 %636, %635
-  %638 = fptoui float %338 to i32
-  %639 = icmp ult i32 %638, %15
-  %640 = and i1 %639, %637
-  br i1 %640, label %641, label %774
-
-; <label>:641                                     ; preds = %631
-  %642 = fptoui float %45 to i32
-  %643 = fptoui float %182 to i32
-  %644 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %645 = extractvalue %dx.types.CBufRet.i32 %644, 0
-  %646 = extractvalue %dx.types.CBufRet.i32 %644, 1
-  %647 = extractvalue %dx.types.CBufRet.i32 %644, 2
-  %648 = extractvalue %dx.types.CBufRet.i32 %644, 3
-  %649 = mul i32 %645, %642
-  %650 = call i32 @dx.op.tertiary.i32(i32 48, i32 %643, i32 %646, i32 %649)  ; IMad(a,b,c)
-  %651 = call i32 @dx.op.tertiary.i32(i32 48, i32 %638, i32 %647, i32 %650)  ; IMad(a,b,c)
-  %652 = call i32 @dx.op.tertiary.i32(i32 48, i32 %633, i32 %648, i32 %651)  ; IMad(a,b,c)
-  %653 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %652, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %654 = extractvalue %dx.types.ResRet.i16 %653, 0
-  %655 = sitofp i16 %654 to float
-  br label %774
-
-; <label>:656                                     ; preds = %629
-  %657 = icmp eq i32 %339, 1
-  br i1 %657, label %658, label %687
-
-; <label>:658                                     ; preds = %656
-  %659 = add i32 %13, -1
-  %660 = uitofp i32 %659 to float
-  %661 = call float @dx.op.binary.f32(i32 35, float %335, float 0.000000e+00)  ; FMax(a,b)
-  %662 = call float @dx.op.binary.f32(i32 36, float %661, float %660)  ; FMin(a,b)
-  %663 = fptoui float %662 to i32
-  %664 = add i32 %15, -1
-  %665 = uitofp i32 %664 to float
-  %666 = call float @dx.op.binary.f32(i32 35, float %338, float 0.000000e+00)  ; FMax(a,b)
-  %667 = call float @dx.op.binary.f32(i32 36, float %666, float %665)  ; FMin(a,b)
-  %668 = fptoui float %667 to i32
-  %669 = uitofp i32 %668 to float
-  %670 = uitofp i32 %663 to float
-  %671 = fptoui float %45 to i32
-  %672 = fptoui float %182 to i32
-  %673 = fptoui float %669 to i32
-  %674 = fptoui float %670 to i32
-  %675 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %676 = extractvalue %dx.types.CBufRet.i32 %675, 0
-  %677 = extractvalue %dx.types.CBufRet.i32 %675, 1
-  %678 = extractvalue %dx.types.CBufRet.i32 %675, 2
-  %679 = extractvalue %dx.types.CBufRet.i32 %675, 3
-  %680 = mul i32 %676, %671
-  %681 = call i32 @dx.op.tertiary.i32(i32 48, i32 %672, i32 %677, i32 %680)  ; IMad(a,b,c)
-  %682 = call i32 @dx.op.tertiary.i32(i32 48, i32 %673, i32 %678, i32 %681)  ; IMad(a,b,c)
-  %683 = call i32 @dx.op.tertiary.i32(i32 48, i32 %674, i32 %679, i32 %682)  ; IMad(a,b,c)
-  %684 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %683, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %685 = extractvalue %dx.types.ResRet.i16 %684, 0
-  %686 = sitofp i16 %685 to float
-  br label %774
-
-; <label>:687                                     ; preds = %656
-  %688 = icmp eq i32 %339, 2
-  br i1 %688, label %689, label %774
-
-; <label>:689                                     ; preds = %687
-  %690 = fsub fast float %22, %20
-  %691 = fcmp fast olt float %335, %20
-  br i1 %691, label %692, label %705
-
-; <label>:692                                     ; preds = %689
-  %693 = fsub fast float %20, %335
-  %694 = fdiv fast float %693, %690
-  %695 = fptoui float %694 to i32
-  %696 = uitofp i32 %695 to float
-  %697 = fmul fast float %696, %690
-  %698 = fsub fast float %693, %697
-  %699 = and i32 %695, 1
-  %700 = icmp eq i32 %699, 0
-  br i1 %700, label %701, label %703
-
-; <label>:701                                     ; preds = %692
-  %702 = fadd fast float %698, %20
-  br label %720
-
-; <label>:703                                     ; preds = %692
-  %704 = fsub fast float %22, %698
-  br label %720
-
-; <label>:705                                     ; preds = %689
-  %706 = fcmp fast ogt float %335, %22
-  br i1 %706, label %707, label %720
-
-; <label>:707                                     ; preds = %705
-  %708 = fsub fast float %335, %22
-  %709 = fdiv fast float %708, %690
-  %710 = fptoui float %709 to i32
-  %711 = uitofp i32 %710 to float
-  %712 = fmul fast float %711, %690
-  %713 = fsub fast float %708, %712
-  %714 = and i32 %710, 1
-  %715 = icmp eq i32 %714, 0
-  br i1 %715, label %716, label %718
-
-; <label>:716                                     ; preds = %707
-  %717 = fsub fast float %22, %713
-  br label %720
-
-; <label>:718                                     ; preds = %707
-  %719 = fadd fast float %713, %20
-  br label %720
-
-; <label>:720                                     ; preds = %718, %716, %705, %703, %701
-  %721 = phi float [ %702, %701 ], [ %704, %703 ], [ %717, %716 ], [ %719, %718 ], [ %335, %705 ]
-  %722 = fptoui float %721 to i32
-  %723 = fsub fast float %24, %20
-  %724 = fcmp fast olt float %338, %20
-  br i1 %724, label %725, label %738
-
-; <label>:725                                     ; preds = %720
-  %726 = fsub fast float %20, %338
-  %727 = fdiv fast float %726, %723
-  %728 = fptoui float %727 to i32
-  %729 = uitofp i32 %728 to float
-  %730 = fmul fast float %729, %723
-  %731 = fsub fast float %726, %730
-  %732 = and i32 %728, 1
-  %733 = icmp eq i32 %732, 0
-  br i1 %733, label %734, label %736
-
-; <label>:734                                     ; preds = %725
-  %735 = fadd fast float %731, %20
-  br label %753
-
-; <label>:736                                     ; preds = %725
-  %737 = fsub fast float %24, %731
-  br label %753
-
-; <label>:738                                     ; preds = %720
-  %739 = fcmp fast ogt float %338, %24
-  br i1 %739, label %740, label %753
-
-; <label>:740                                     ; preds = %738
-  %741 = fsub fast float %338, %24
-  %742 = fdiv fast float %741, %723
-  %743 = fptoui float %742 to i32
-  %744 = uitofp i32 %743 to float
-  %745 = fmul fast float %744, %723
-  %746 = fsub fast float %741, %745
-  %747 = and i32 %743, 1
-  %748 = icmp eq i32 %747, 0
-  br i1 %748, label %749, label %751
-
-; <label>:749                                     ; preds = %740
-  %750 = fsub fast float %24, %746
-  br label %753
-
-; <label>:751                                     ; preds = %740
-  %752 = fadd fast float %746, %20
-  br label %753
-
-; <label>:753                                     ; preds = %751, %749, %738, %736, %734
-  %754 = phi float [ %735, %734 ], [ %737, %736 ], [ %750, %749 ], [ %752, %751 ], [ %338, %738 ]
-  %755 = fptoui float %754 to i32
-  %756 = uitofp i32 %755 to float
-  %757 = uitofp i32 %722 to float
-  %758 = fptoui float %45 to i32
-  %759 = fptoui float %182 to i32
-  %760 = fptoui float %756 to i32
-  %761 = fptoui float %757 to i32
-  %762 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %763 = extractvalue %dx.types.CBufRet.i32 %762, 0
-  %764 = extractvalue %dx.types.CBufRet.i32 %762, 1
-  %765 = extractvalue %dx.types.CBufRet.i32 %762, 2
-  %766 = extractvalue %dx.types.CBufRet.i32 %762, 3
-  %767 = mul i32 %763, %758
-  %768 = call i32 @dx.op.tertiary.i32(i32 48, i32 %759, i32 %764, i32 %767)  ; IMad(a,b,c)
-  %769 = call i32 @dx.op.tertiary.i32(i32 48, i32 %760, i32 %765, i32 %768)  ; IMad(a,b,c)
-  %770 = call i32 @dx.op.tertiary.i32(i32 48, i32 %761, i32 %766, i32 %769)  ; IMad(a,b,c)
-  %771 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %770, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %772 = extractvalue %dx.types.ResRet.i16 %771, 0
-  %773 = sitofp i16 %772 to float
-  br label %774
-
-; <label>:774                                     ; preds = %753, %687, %658, %641, %631
-  %775 = phi float [ %655, %641 ], [ 0.000000e+00, %631 ], [ %686, %658 ], [ %773, %753 ], [ 0.000000e+00, %687 ]
-  br i1 %340, label %776, label %801
-
-; <label>:776                                     ; preds = %774
-  %777 = fcmp fast oge float %337, 0.000000e+00
-  %778 = fptoui float %337 to i32
-  %779 = icmp ult i32 %778, %13
-  %780 = and i1 %777, %779
-  %781 = fcmp fast oge float %338, 0.000000e+00
-  %782 = and i1 %781, %780
-  %783 = fptoui float %338 to i32
-  %784 = icmp ult i32 %783, %15
-  %785 = and i1 %784, %782
-  br i1 %785, label %786, label %919
-
-; <label>:786                                     ; preds = %776
-  %787 = fptoui float %45 to i32
-  %788 = fptoui float %182 to i32
-  %789 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %790 = extractvalue %dx.types.CBufRet.i32 %789, 0
-  %791 = extractvalue %dx.types.CBufRet.i32 %789, 1
-  %792 = extractvalue %dx.types.CBufRet.i32 %789, 2
-  %793 = extractvalue %dx.types.CBufRet.i32 %789, 3
-  %794 = mul i32 %790, %787
-  %795 = call i32 @dx.op.tertiary.i32(i32 48, i32 %788, i32 %791, i32 %794)  ; IMad(a,b,c)
-  %796 = call i32 @dx.op.tertiary.i32(i32 48, i32 %783, i32 %792, i32 %795)  ; IMad(a,b,c)
-  %797 = call i32 @dx.op.tertiary.i32(i32 48, i32 %778, i32 %793, i32 %796)  ; IMad(a,b,c)
-  %798 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %797, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %799 = extractvalue %dx.types.ResRet.i16 %798, 0
-  %800 = sitofp i16 %799 to float
-  br label %919
-
-; <label>:801                                     ; preds = %774
-  %802 = icmp eq i32 %339, 1
-  br i1 %802, label %803, label %832
-
-; <label>:803                                     ; preds = %801
-  %804 = add i32 %13, -1
-  %805 = uitofp i32 %804 to float
-  %806 = call float @dx.op.binary.f32(i32 35, float %337, float 0.000000e+00)  ; FMax(a,b)
-  %807 = call float @dx.op.binary.f32(i32 36, float %806, float %805)  ; FMin(a,b)
-  %808 = fptoui float %807 to i32
-  %809 = add i32 %15, -1
-  %810 = uitofp i32 %809 to float
-  %811 = call float @dx.op.binary.f32(i32 35, float %338, float 0.000000e+00)  ; FMax(a,b)
-  %812 = call float @dx.op.binary.f32(i32 36, float %811, float %810)  ; FMin(a,b)
-  %813 = fptoui float %812 to i32
-  %814 = uitofp i32 %813 to float
-  %815 = uitofp i32 %808 to float
-  %816 = fptoui float %45 to i32
-  %817 = fptoui float %182 to i32
-  %818 = fptoui float %814 to i32
-  %819 = fptoui float %815 to i32
-  %820 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %821 = extractvalue %dx.types.CBufRet.i32 %820, 0
-  %822 = extractvalue %dx.types.CBufRet.i32 %820, 1
-  %823 = extractvalue %dx.types.CBufRet.i32 %820, 2
-  %824 = extractvalue %dx.types.CBufRet.i32 %820, 3
-  %825 = mul i32 %821, %816
-  %826 = call i32 @dx.op.tertiary.i32(i32 48, i32 %817, i32 %822, i32 %825)  ; IMad(a,b,c)
-  %827 = call i32 @dx.op.tertiary.i32(i32 48, i32 %818, i32 %823, i32 %826)  ; IMad(a,b,c)
-  %828 = call i32 @dx.op.tertiary.i32(i32 48, i32 %819, i32 %824, i32 %827)  ; IMad(a,b,c)
-  %829 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %828, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %830 = extractvalue %dx.types.ResRet.i16 %829, 0
-  %831 = sitofp i16 %830 to float
-  br label %919
-
-; <label>:832                                     ; preds = %801
-  %833 = icmp eq i32 %339, 2
-  br i1 %833, label %834, label %919
-
-; <label>:834                                     ; preds = %832
-  %835 = fsub fast float %22, %20
-  %836 = fcmp fast olt float %337, %20
-  br i1 %836, label %837, label %850
-
-; <label>:837                                     ; preds = %834
-  %838 = fsub fast float %20, %337
-  %839 = fdiv fast float %838, %835
-  %840 = fptoui float %839 to i32
-  %841 = uitofp i32 %840 to float
-  %842 = fmul fast float %841, %835
-  %843 = fsub fast float %838, %842
-  %844 = and i32 %840, 1
-  %845 = icmp eq i32 %844, 0
-  br i1 %845, label %846, label %848
-
-; <label>:846                                     ; preds = %837
-  %847 = fadd fast float %843, %20
-  br label %865
-
-; <label>:848                                     ; preds = %837
-  %849 = fsub fast float %22, %843
-  br label %865
-
-; <label>:850                                     ; preds = %834
-  %851 = fcmp fast ogt float %337, %22
-  br i1 %851, label %852, label %865
-
-; <label>:852                                     ; preds = %850
-  %853 = fsub fast float %337, %22
-  %854 = fdiv fast float %853, %835
-  %855 = fptoui float %854 to i32
-  %856 = uitofp i32 %855 to float
-  %857 = fmul fast float %856, %835
-  %858 = fsub fast float %853, %857
-  %859 = and i32 %855, 1
-  %860 = icmp eq i32 %859, 0
-  br i1 %860, label %861, label %863
-
-; <label>:861                                     ; preds = %852
-  %862 = fsub fast float %22, %858
-  br label %865
-
-; <label>:863                                     ; preds = %852
-  %864 = fadd fast float %858, %20
-  br label %865
-
-; <label>:865                                     ; preds = %863, %861, %850, %848, %846
-  %866 = phi float [ %847, %846 ], [ %849, %848 ], [ %862, %861 ], [ %864, %863 ], [ %337, %850 ]
-  %867 = fptoui float %866 to i32
-  %868 = fsub fast float %24, %20
-  %869 = fcmp fast olt float %338, %20
-  br i1 %869, label %870, label %883
-
-; <label>:870                                     ; preds = %865
-  %871 = fsub fast float %20, %338
-  %872 = fdiv fast float %871, %868
-  %873 = fptoui float %872 to i32
-  %874 = uitofp i32 %873 to float
-  %875 = fmul fast float %874, %868
-  %876 = fsub fast float %871, %875
-  %877 = and i32 %873, 1
-  %878 = icmp eq i32 %877, 0
-  br i1 %878, label %879, label %881
-
-; <label>:879                                     ; preds = %870
-  %880 = fadd fast float %876, %20
-  br label %898
-
-; <label>:881                                     ; preds = %870
-  %882 = fsub fast float %24, %876
-  br label %898
-
-; <label>:883                                     ; preds = %865
-  %884 = fcmp fast ogt float %338, %24
-  br i1 %884, label %885, label %898
-
-; <label>:885                                     ; preds = %883
-  %886 = fsub fast float %338, %24
-  %887 = fdiv fast float %886, %868
-  %888 = fptoui float %887 to i32
-  %889 = uitofp i32 %888 to float
-  %890 = fmul fast float %889, %868
-  %891 = fsub fast float %886, %890
-  %892 = and i32 %888, 1
-  %893 = icmp eq i32 %892, 0
-  br i1 %893, label %894, label %896
-
-; <label>:894                                     ; preds = %885
-  %895 = fsub fast float %24, %891
-  br label %898
-
-; <label>:896                                     ; preds = %885
-  %897 = fadd fast float %891, %20
-  br label %898
-
-; <label>:898                                     ; preds = %896, %894, %883, %881, %879
-  %899 = phi float [ %880, %879 ], [ %882, %881 ], [ %895, %894 ], [ %897, %896 ], [ %338, %883 ]
-  %900 = fptoui float %899 to i32
-  %901 = uitofp i32 %900 to float
-  %902 = uitofp i32 %867 to float
-  %903 = fptoui float %45 to i32
-  %904 = fptoui float %182 to i32
-  %905 = fptoui float %901 to i32
-  %906 = fptoui float %902 to i32
-  %907 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %908 = extractvalue %dx.types.CBufRet.i32 %907, 0
-  %909 = extractvalue %dx.types.CBufRet.i32 %907, 1
-  %910 = extractvalue %dx.types.CBufRet.i32 %907, 2
-  %911 = extractvalue %dx.types.CBufRet.i32 %907, 3
-  %912 = mul i32 %908, %903
-  %913 = call i32 @dx.op.tertiary.i32(i32 48, i32 %904, i32 %909, i32 %912)  ; IMad(a,b,c)
-  %914 = call i32 @dx.op.tertiary.i32(i32 48, i32 %905, i32 %910, i32 %913)  ; IMad(a,b,c)
-  %915 = call i32 @dx.op.tertiary.i32(i32 48, i32 %906, i32 %911, i32 %914)  ; IMad(a,b,c)
-  %916 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %915, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %917 = extractvalue %dx.types.ResRet.i16 %916, 0
-  %918 = sitofp i16 %917 to float
-  br label %919
-
-; <label>:919                                     ; preds = %898, %832, %803, %786, %776
-  %920 = phi float [ %800, %786 ], [ 0.000000e+00, %776 ], [ %831, %803 ], [ %918, %898 ], [ 0.000000e+00, %832 ]
-  %921 = call float @dx.op.unary.f32(i32 22, float %180)  ; Frc(value)
-  %922 = fsub fast float %630, %485
-  %923 = fmul fast float %921, %922
-  %924 = fadd fast float %923, %485
-  %925 = fsub fast float %920, %775
-  %926 = fmul fast float %921, %925
-  %927 = fadd fast float %926, %775
-  %928 = call float @dx.op.unary.f32(i32 22, float %181)  ; Frc(value)
-  %929 = fsub fast float %927, %924
-  %930 = fmul fast float %929, %928
-  %931 = fadd fast float %930, %924
-  %932 = fptosi float %931 to i16
-  call void @dx.op.rawBufferStore.i16(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i16 %932, i16 undef, i16 undef, i16 undef, i8 1, i32 2)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3323
-
-; <label>:933                                     ; preds = %332
-  %934 = icmp eq i32 %91, 2
-  br i1 %934, label %935, label %3323
-
-; <label>:935                                     ; preds = %933
-  %936 = call float @dx.op.unary.f32(i32 27, float %180)  ; Round_ni(value)
-  %937 = fadd fast float %936, -1.000000e+00
-  %938 = call float @dx.op.unary.f32(i32 27, float %181)  ; Round_ni(value)
-  %939 = fadd fast float %938, -1.000000e+00
-  %940 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %941 = icmp eq i32 %940, 0
-  br i1 %941, label %942, label %967
-
-; <label>:942                                     ; preds = %935
-  %943 = fcmp fast oge float %937, 0.000000e+00
-  %944 = fptoui float %937 to i32
-  %945 = icmp ult i32 %944, %13
-  %946 = and i1 %943, %945
-  %947 = fcmp fast oge float %939, 0.000000e+00
-  %948 = and i1 %947, %946
-  %949 = fptoui float %939 to i32
-  %950 = icmp ult i32 %949, %15
-  %951 = and i1 %950, %948
-  br i1 %951, label %952, label %1085
-
-; <label>:952                                     ; preds = %942
-  %953 = fptoui float %45 to i32
-  %954 = fptoui float %182 to i32
-  %955 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %956 = extractvalue %dx.types.CBufRet.i32 %955, 0
-  %957 = extractvalue %dx.types.CBufRet.i32 %955, 1
-  %958 = extractvalue %dx.types.CBufRet.i32 %955, 2
-  %959 = extractvalue %dx.types.CBufRet.i32 %955, 3
-  %960 = mul i32 %956, %953
-  %961 = call i32 @dx.op.tertiary.i32(i32 48, i32 %954, i32 %957, i32 %960)  ; IMad(a,b,c)
-  %962 = call i32 @dx.op.tertiary.i32(i32 48, i32 %949, i32 %958, i32 %961)  ; IMad(a,b,c)
-  %963 = call i32 @dx.op.tertiary.i32(i32 48, i32 %944, i32 %959, i32 %962)  ; IMad(a,b,c)
-  %964 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %963, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %965 = extractvalue %dx.types.ResRet.i16 %964, 0
-  %966 = sitofp i16 %965 to float
-  br label %1085
-
-; <label>:967                                     ; preds = %935
-  %968 = icmp eq i32 %940, 1
-  br i1 %968, label %969, label %998
-
-; <label>:969                                     ; preds = %967
-  %970 = add i32 %13, -1
-  %971 = uitofp i32 %970 to float
-  %972 = call float @dx.op.binary.f32(i32 35, float %937, float 0.000000e+00)  ; FMax(a,b)
-  %973 = call float @dx.op.binary.f32(i32 36, float %972, float %971)  ; FMin(a,b)
-  %974 = fptoui float %973 to i32
-  %975 = add i32 %15, -1
-  %976 = uitofp i32 %975 to float
-  %977 = call float @dx.op.binary.f32(i32 35, float %939, float 0.000000e+00)  ; FMax(a,b)
-  %978 = call float @dx.op.binary.f32(i32 36, float %977, float %976)  ; FMin(a,b)
-  %979 = fptoui float %978 to i32
-  %980 = uitofp i32 %979 to float
-  %981 = uitofp i32 %974 to float
-  %982 = fptoui float %45 to i32
-  %983 = fptoui float %182 to i32
-  %984 = fptoui float %980 to i32
-  %985 = fptoui float %981 to i32
-  %986 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %987 = extractvalue %dx.types.CBufRet.i32 %986, 0
-  %988 = extractvalue %dx.types.CBufRet.i32 %986, 1
-  %989 = extractvalue %dx.types.CBufRet.i32 %986, 2
-  %990 = extractvalue %dx.types.CBufRet.i32 %986, 3
-  %991 = mul i32 %987, %982
-  %992 = call i32 @dx.op.tertiary.i32(i32 48, i32 %983, i32 %988, i32 %991)  ; IMad(a,b,c)
-  %993 = call i32 @dx.op.tertiary.i32(i32 48, i32 %984, i32 %989, i32 %992)  ; IMad(a,b,c)
-  %994 = call i32 @dx.op.tertiary.i32(i32 48, i32 %985, i32 %990, i32 %993)  ; IMad(a,b,c)
-  %995 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %994, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %996 = extractvalue %dx.types.ResRet.i16 %995, 0
-  %997 = sitofp i16 %996 to float
-  br label %1085
-
-; <label>:998                                     ; preds = %967
-  %999 = icmp eq i32 %940, 2
-  br i1 %999, label %1000, label %1085
-
-; <label>:1000                                    ; preds = %998
-  %1001 = fsub fast float %22, %20
-  %1002 = fcmp fast olt float %937, %20
-  br i1 %1002, label %1003, label %1016
-
-; <label>:1003                                    ; preds = %1000
-  %1004 = fsub fast float %20, %937
-  %1005 = fdiv fast float %1004, %1001
-  %1006 = fptoui float %1005 to i32
-  %1007 = uitofp i32 %1006 to float
-  %1008 = fmul fast float %1007, %1001
-  %1009 = fsub fast float %1004, %1008
-  %1010 = and i32 %1006, 1
-  %1011 = icmp eq i32 %1010, 0
-  br i1 %1011, label %1012, label %1014
-
-; <label>:1012                                    ; preds = %1003
-  %1013 = fadd fast float %1009, %20
-  br label %1031
-
-; <label>:1014                                    ; preds = %1003
-  %1015 = fsub fast float %22, %1009
-  br label %1031
-
-; <label>:1016                                    ; preds = %1000
-  %1017 = fcmp fast ogt float %937, %22
-  br i1 %1017, label %1018, label %1031
-
-; <label>:1018                                    ; preds = %1016
-  %1019 = fsub fast float %937, %22
-  %1020 = fdiv fast float %1019, %1001
-  %1021 = fptoui float %1020 to i32
-  %1022 = uitofp i32 %1021 to float
-  %1023 = fmul fast float %1022, %1001
-  %1024 = fsub fast float %1019, %1023
-  %1025 = and i32 %1021, 1
-  %1026 = icmp eq i32 %1025, 0
-  br i1 %1026, label %1027, label %1029
-
-; <label>:1027                                    ; preds = %1018
-  %1028 = fsub fast float %22, %1024
-  br label %1031
-
-; <label>:1029                                    ; preds = %1018
-  %1030 = fadd fast float %1024, %20
-  br label %1031
-
-; <label>:1031                                    ; preds = %1029, %1027, %1016, %1014, %1012
-  %1032 = phi float [ %1013, %1012 ], [ %1015, %1014 ], [ %1028, %1027 ], [ %1030, %1029 ], [ %937, %1016 ]
-  %1033 = fptoui float %1032 to i32
-  %1034 = fsub fast float %24, %20
-  %1035 = fcmp fast olt float %939, %20
-  br i1 %1035, label %1036, label %1049
-
-; <label>:1036                                    ; preds = %1031
-  %1037 = fsub fast float %20, %939
-  %1038 = fdiv fast float %1037, %1034
-  %1039 = fptoui float %1038 to i32
-  %1040 = uitofp i32 %1039 to float
-  %1041 = fmul fast float %1040, %1034
-  %1042 = fsub fast float %1037, %1041
-  %1043 = and i32 %1039, 1
-  %1044 = icmp eq i32 %1043, 0
-  br i1 %1044, label %1045, label %1047
-
-; <label>:1045                                    ; preds = %1036
-  %1046 = fadd fast float %1042, %20
-  br label %1064
-
-; <label>:1047                                    ; preds = %1036
-  %1048 = fsub fast float %24, %1042
-  br label %1064
-
-; <label>:1049                                    ; preds = %1031
-  %1050 = fcmp fast ogt float %939, %24
-  br i1 %1050, label %1051, label %1064
-
-; <label>:1051                                    ; preds = %1049
-  %1052 = fsub fast float %939, %24
-  %1053 = fdiv fast float %1052, %1034
-  %1054 = fptoui float %1053 to i32
-  %1055 = uitofp i32 %1054 to float
-  %1056 = fmul fast float %1055, %1034
-  %1057 = fsub fast float %1052, %1056
-  %1058 = and i32 %1054, 1
-  %1059 = icmp eq i32 %1058, 0
-  br i1 %1059, label %1060, label %1062
-
-; <label>:1060                                    ; preds = %1051
-  %1061 = fsub fast float %24, %1057
-  br label %1064
-
-; <label>:1062                                    ; preds = %1051
-  %1063 = fadd fast float %1057, %20
-  br label %1064
-
-; <label>:1064                                    ; preds = %1062, %1060, %1049, %1047, %1045
-  %1065 = phi float [ %1046, %1045 ], [ %1048, %1047 ], [ %1061, %1060 ], [ %1063, %1062 ], [ %939, %1049 ]
-  %1066 = fptoui float %1065 to i32
-  %1067 = uitofp i32 %1066 to float
-  %1068 = uitofp i32 %1033 to float
-  %1069 = fptoui float %45 to i32
-  %1070 = fptoui float %182 to i32
-  %1071 = fptoui float %1067 to i32
-  %1072 = fptoui float %1068 to i32
-  %1073 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1074 = extractvalue %dx.types.CBufRet.i32 %1073, 0
-  %1075 = extractvalue %dx.types.CBufRet.i32 %1073, 1
-  %1076 = extractvalue %dx.types.CBufRet.i32 %1073, 2
-  %1077 = extractvalue %dx.types.CBufRet.i32 %1073, 3
-  %1078 = mul i32 %1074, %1069
-  %1079 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1070, i32 %1075, i32 %1078)  ; IMad(a,b,c)
-  %1080 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1071, i32 %1076, i32 %1079)  ; IMad(a,b,c)
-  %1081 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1072, i32 %1077, i32 %1080)  ; IMad(a,b,c)
-  %1082 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1081, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1083 = extractvalue %dx.types.ResRet.i16 %1082, 0
-  %1084 = sitofp i16 %1083 to float
-  br label %1085
-
-; <label>:1085                                    ; preds = %1064, %998, %969, %952, %942
-  %1086 = phi float [ %966, %952 ], [ 0.000000e+00, %942 ], [ %997, %969 ], [ %1084, %1064 ], [ 0.000000e+00, %998 ]
-  br i1 %941, label %1087, label %1112
-
-; <label>:1087                                    ; preds = %1085
-  %1088 = fcmp fast oge float %936, 0.000000e+00
-  %1089 = fptoui float %936 to i32
-  %1090 = icmp ult i32 %1089, %13
-  %1091 = and i1 %1088, %1090
-  %1092 = fcmp fast oge float %939, 0.000000e+00
-  %1093 = and i1 %1092, %1091
-  %1094 = fptoui float %939 to i32
-  %1095 = icmp ult i32 %1094, %15
-  %1096 = and i1 %1095, %1093
-  br i1 %1096, label %1097, label %1230
-
-; <label>:1097                                    ; preds = %1087
-  %1098 = fptoui float %45 to i32
-  %1099 = fptoui float %182 to i32
-  %1100 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1101 = extractvalue %dx.types.CBufRet.i32 %1100, 0
-  %1102 = extractvalue %dx.types.CBufRet.i32 %1100, 1
-  %1103 = extractvalue %dx.types.CBufRet.i32 %1100, 2
-  %1104 = extractvalue %dx.types.CBufRet.i32 %1100, 3
-  %1105 = mul i32 %1101, %1098
-  %1106 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1099, i32 %1102, i32 %1105)  ; IMad(a,b,c)
-  %1107 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1094, i32 %1103, i32 %1106)  ; IMad(a,b,c)
-  %1108 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1089, i32 %1104, i32 %1107)  ; IMad(a,b,c)
-  %1109 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1108, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1110 = extractvalue %dx.types.ResRet.i16 %1109, 0
-  %1111 = sitofp i16 %1110 to float
-  br label %1230
-
-; <label>:1112                                    ; preds = %1085
-  %1113 = icmp eq i32 %940, 1
-  br i1 %1113, label %1114, label %1143
-
-; <label>:1114                                    ; preds = %1112
-  %1115 = add i32 %13, -1
-  %1116 = uitofp i32 %1115 to float
-  %1117 = call float @dx.op.binary.f32(i32 35, float %936, float 0.000000e+00)  ; FMax(a,b)
-  %1118 = call float @dx.op.binary.f32(i32 36, float %1117, float %1116)  ; FMin(a,b)
-  %1119 = fptoui float %1118 to i32
-  %1120 = add i32 %15, -1
-  %1121 = uitofp i32 %1120 to float
-  %1122 = call float @dx.op.binary.f32(i32 35, float %939, float 0.000000e+00)  ; FMax(a,b)
-  %1123 = call float @dx.op.binary.f32(i32 36, float %1122, float %1121)  ; FMin(a,b)
-  %1124 = fptoui float %1123 to i32
-  %1125 = uitofp i32 %1124 to float
-  %1126 = uitofp i32 %1119 to float
-  %1127 = fptoui float %45 to i32
-  %1128 = fptoui float %182 to i32
-  %1129 = fptoui float %1125 to i32
-  %1130 = fptoui float %1126 to i32
-  %1131 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1132 = extractvalue %dx.types.CBufRet.i32 %1131, 0
-  %1133 = extractvalue %dx.types.CBufRet.i32 %1131, 1
-  %1134 = extractvalue %dx.types.CBufRet.i32 %1131, 2
-  %1135 = extractvalue %dx.types.CBufRet.i32 %1131, 3
-  %1136 = mul i32 %1132, %1127
-  %1137 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1128, i32 %1133, i32 %1136)  ; IMad(a,b,c)
-  %1138 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1129, i32 %1134, i32 %1137)  ; IMad(a,b,c)
-  %1139 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1130, i32 %1135, i32 %1138)  ; IMad(a,b,c)
-  %1140 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1139, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1141 = extractvalue %dx.types.ResRet.i16 %1140, 0
-  %1142 = sitofp i16 %1141 to float
-  br label %1230
-
-; <label>:1143                                    ; preds = %1112
-  %1144 = icmp eq i32 %940, 2
-  br i1 %1144, label %1145, label %1230
-
-; <label>:1145                                    ; preds = %1143
-  %1146 = fsub fast float %22, %20
-  %1147 = fcmp fast olt float %936, %20
-  br i1 %1147, label %1148, label %1161
-
-; <label>:1148                                    ; preds = %1145
-  %1149 = fsub fast float %20, %936
-  %1150 = fdiv fast float %1149, %1146
-  %1151 = fptoui float %1150 to i32
-  %1152 = uitofp i32 %1151 to float
-  %1153 = fmul fast float %1152, %1146
-  %1154 = fsub fast float %1149, %1153
-  %1155 = and i32 %1151, 1
-  %1156 = icmp eq i32 %1155, 0
-  br i1 %1156, label %1157, label %1159
-
-; <label>:1157                                    ; preds = %1148
-  %1158 = fadd fast float %1154, %20
-  br label %1176
-
-; <label>:1159                                    ; preds = %1148
-  %1160 = fsub fast float %22, %1154
-  br label %1176
-
-; <label>:1161                                    ; preds = %1145
-  %1162 = fcmp fast ogt float %936, %22
-  br i1 %1162, label %1163, label %1176
-
-; <label>:1163                                    ; preds = %1161
-  %1164 = fsub fast float %936, %22
-  %1165 = fdiv fast float %1164, %1146
-  %1166 = fptoui float %1165 to i32
-  %1167 = uitofp i32 %1166 to float
-  %1168 = fmul fast float %1167, %1146
-  %1169 = fsub fast float %1164, %1168
-  %1170 = and i32 %1166, 1
-  %1171 = icmp eq i32 %1170, 0
-  br i1 %1171, label %1172, label %1174
-
-; <label>:1172                                    ; preds = %1163
-  %1173 = fsub fast float %22, %1169
-  br label %1176
-
-; <label>:1174                                    ; preds = %1163
-  %1175 = fadd fast float %1169, %20
-  br label %1176
-
-; <label>:1176                                    ; preds = %1174, %1172, %1161, %1159, %1157
-  %1177 = phi float [ %1158, %1157 ], [ %1160, %1159 ], [ %1173, %1172 ], [ %1175, %1174 ], [ %936, %1161 ]
-  %1178 = fptoui float %1177 to i32
-  %1179 = fsub fast float %24, %20
-  %1180 = fcmp fast olt float %939, %20
-  br i1 %1180, label %1181, label %1194
-
-; <label>:1181                                    ; preds = %1176
-  %1182 = fsub fast float %20, %939
-  %1183 = fdiv fast float %1182, %1179
-  %1184 = fptoui float %1183 to i32
-  %1185 = uitofp i32 %1184 to float
-  %1186 = fmul fast float %1185, %1179
-  %1187 = fsub fast float %1182, %1186
-  %1188 = and i32 %1184, 1
-  %1189 = icmp eq i32 %1188, 0
-  br i1 %1189, label %1190, label %1192
-
-; <label>:1190                                    ; preds = %1181
-  %1191 = fadd fast float %1187, %20
-  br label %1209
-
-; <label>:1192                                    ; preds = %1181
-  %1193 = fsub fast float %24, %1187
-  br label %1209
-
-; <label>:1194                                    ; preds = %1176
-  %1195 = fcmp fast ogt float %939, %24
-  br i1 %1195, label %1196, label %1209
-
-; <label>:1196                                    ; preds = %1194
-  %1197 = fsub fast float %939, %24
-  %1198 = fdiv fast float %1197, %1179
-  %1199 = fptoui float %1198 to i32
-  %1200 = uitofp i32 %1199 to float
-  %1201 = fmul fast float %1200, %1179
-  %1202 = fsub fast float %1197, %1201
-  %1203 = and i32 %1199, 1
-  %1204 = icmp eq i32 %1203, 0
-  br i1 %1204, label %1205, label %1207
-
-; <label>:1205                                    ; preds = %1196
-  %1206 = fsub fast float %24, %1202
-  br label %1209
-
-; <label>:1207                                    ; preds = %1196
-  %1208 = fadd fast float %1202, %20
-  br label %1209
-
-; <label>:1209                                    ; preds = %1207, %1205, %1194, %1192, %1190
-  %1210 = phi float [ %1191, %1190 ], [ %1193, %1192 ], [ %1206, %1205 ], [ %1208, %1207 ], [ %939, %1194 ]
-  %1211 = fptoui float %1210 to i32
-  %1212 = uitofp i32 %1211 to float
-  %1213 = uitofp i32 %1178 to float
-  %1214 = fptoui float %45 to i32
-  %1215 = fptoui float %182 to i32
-  %1216 = fptoui float %1212 to i32
-  %1217 = fptoui float %1213 to i32
-  %1218 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1219 = extractvalue %dx.types.CBufRet.i32 %1218, 0
-  %1220 = extractvalue %dx.types.CBufRet.i32 %1218, 1
-  %1221 = extractvalue %dx.types.CBufRet.i32 %1218, 2
-  %1222 = extractvalue %dx.types.CBufRet.i32 %1218, 3
-  %1223 = mul i32 %1219, %1214
-  %1224 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1215, i32 %1220, i32 %1223)  ; IMad(a,b,c)
-  %1225 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1216, i32 %1221, i32 %1224)  ; IMad(a,b,c)
-  %1226 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1217, i32 %1222, i32 %1225)  ; IMad(a,b,c)
-  %1227 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1226, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1228 = extractvalue %dx.types.ResRet.i16 %1227, 0
-  %1229 = sitofp i16 %1228 to float
-  br label %1230
-
-; <label>:1230                                    ; preds = %1209, %1143, %1114, %1097, %1087
-  %1231 = phi float [ %1111, %1097 ], [ 0.000000e+00, %1087 ], [ %1142, %1114 ], [ %1229, %1209 ], [ 0.000000e+00, %1143 ]
-  %1232 = fadd fast float %936, 1.000000e+00
-  br i1 %941, label %1233, label %1258
-
-; <label>:1233                                    ; preds = %1230
-  %1234 = fcmp fast oge float %1232, 0.000000e+00
-  %1235 = fptoui float %1232 to i32
-  %1236 = icmp ult i32 %1235, %13
-  %1237 = and i1 %1234, %1236
-  %1238 = fcmp fast oge float %939, 0.000000e+00
-  %1239 = and i1 %1238, %1237
-  %1240 = fptoui float %939 to i32
-  %1241 = icmp ult i32 %1240, %15
-  %1242 = and i1 %1241, %1239
-  br i1 %1242, label %1243, label %1376
-
-; <label>:1243                                    ; preds = %1233
-  %1244 = fptoui float %45 to i32
-  %1245 = fptoui float %182 to i32
-  %1246 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1247 = extractvalue %dx.types.CBufRet.i32 %1246, 0
-  %1248 = extractvalue %dx.types.CBufRet.i32 %1246, 1
-  %1249 = extractvalue %dx.types.CBufRet.i32 %1246, 2
-  %1250 = extractvalue %dx.types.CBufRet.i32 %1246, 3
-  %1251 = mul i32 %1247, %1244
-  %1252 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1245, i32 %1248, i32 %1251)  ; IMad(a,b,c)
-  %1253 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1240, i32 %1249, i32 %1252)  ; IMad(a,b,c)
-  %1254 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1235, i32 %1250, i32 %1253)  ; IMad(a,b,c)
-  %1255 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1254, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1256 = extractvalue %dx.types.ResRet.i16 %1255, 0
-  %1257 = sitofp i16 %1256 to float
-  br label %1376
-
-; <label>:1258                                    ; preds = %1230
-  %1259 = icmp eq i32 %940, 1
-  br i1 %1259, label %1260, label %1289
-
-; <label>:1260                                    ; preds = %1258
-  %1261 = add i32 %13, -1
-  %1262 = uitofp i32 %1261 to float
-  %1263 = call float @dx.op.binary.f32(i32 35, float %1232, float 0.000000e+00)  ; FMax(a,b)
-  %1264 = call float @dx.op.binary.f32(i32 36, float %1263, float %1262)  ; FMin(a,b)
-  %1265 = fptoui float %1264 to i32
-  %1266 = add i32 %15, -1
-  %1267 = uitofp i32 %1266 to float
-  %1268 = call float @dx.op.binary.f32(i32 35, float %939, float 0.000000e+00)  ; FMax(a,b)
-  %1269 = call float @dx.op.binary.f32(i32 36, float %1268, float %1267)  ; FMin(a,b)
-  %1270 = fptoui float %1269 to i32
-  %1271 = uitofp i32 %1270 to float
-  %1272 = uitofp i32 %1265 to float
-  %1273 = fptoui float %45 to i32
-  %1274 = fptoui float %182 to i32
-  %1275 = fptoui float %1271 to i32
-  %1276 = fptoui float %1272 to i32
-  %1277 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1278 = extractvalue %dx.types.CBufRet.i32 %1277, 0
-  %1279 = extractvalue %dx.types.CBufRet.i32 %1277, 1
-  %1280 = extractvalue %dx.types.CBufRet.i32 %1277, 2
-  %1281 = extractvalue %dx.types.CBufRet.i32 %1277, 3
-  %1282 = mul i32 %1278, %1273
-  %1283 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1274, i32 %1279, i32 %1282)  ; IMad(a,b,c)
-  %1284 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1275, i32 %1280, i32 %1283)  ; IMad(a,b,c)
-  %1285 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1276, i32 %1281, i32 %1284)  ; IMad(a,b,c)
-  %1286 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1285, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1287 = extractvalue %dx.types.ResRet.i16 %1286, 0
-  %1288 = sitofp i16 %1287 to float
-  br label %1376
-
-; <label>:1289                                    ; preds = %1258
-  %1290 = icmp eq i32 %940, 2
-  br i1 %1290, label %1291, label %1376
-
-; <label>:1291                                    ; preds = %1289
-  %1292 = fsub fast float %22, %20
-  %1293 = fcmp fast olt float %1232, %20
-  br i1 %1293, label %1294, label %1307
-
-; <label>:1294                                    ; preds = %1291
-  %1295 = fsub fast float %20, %1232
-  %1296 = fdiv fast float %1295, %1292
-  %1297 = fptoui float %1296 to i32
-  %1298 = uitofp i32 %1297 to float
-  %1299 = fmul fast float %1298, %1292
-  %1300 = fsub fast float %1295, %1299
-  %1301 = and i32 %1297, 1
-  %1302 = icmp eq i32 %1301, 0
-  br i1 %1302, label %1303, label %1305
-
-; <label>:1303                                    ; preds = %1294
-  %1304 = fadd fast float %1300, %20
-  br label %1322
-
-; <label>:1305                                    ; preds = %1294
-  %1306 = fsub fast float %22, %1300
-  br label %1322
-
-; <label>:1307                                    ; preds = %1291
-  %1308 = fcmp fast ogt float %1232, %22
-  br i1 %1308, label %1309, label %1322
-
-; <label>:1309                                    ; preds = %1307
-  %1310 = fsub fast float %1232, %22
-  %1311 = fdiv fast float %1310, %1292
-  %1312 = fptoui float %1311 to i32
-  %1313 = uitofp i32 %1312 to float
-  %1314 = fmul fast float %1313, %1292
-  %1315 = fsub fast float %1310, %1314
-  %1316 = and i32 %1312, 1
-  %1317 = icmp eq i32 %1316, 0
-  br i1 %1317, label %1318, label %1320
-
-; <label>:1318                                    ; preds = %1309
-  %1319 = fsub fast float %22, %1315
-  br label %1322
-
-; <label>:1320                                    ; preds = %1309
-  %1321 = fadd fast float %1315, %20
-  br label %1322
-
-; <label>:1322                                    ; preds = %1320, %1318, %1307, %1305, %1303
-  %1323 = phi float [ %1304, %1303 ], [ %1306, %1305 ], [ %1319, %1318 ], [ %1321, %1320 ], [ %1232, %1307 ]
-  %1324 = fptoui float %1323 to i32
-  %1325 = fsub fast float %24, %20
-  %1326 = fcmp fast olt float %939, %20
-  br i1 %1326, label %1327, label %1340
-
-; <label>:1327                                    ; preds = %1322
-  %1328 = fsub fast float %20, %939
-  %1329 = fdiv fast float %1328, %1325
-  %1330 = fptoui float %1329 to i32
-  %1331 = uitofp i32 %1330 to float
-  %1332 = fmul fast float %1331, %1325
-  %1333 = fsub fast float %1328, %1332
-  %1334 = and i32 %1330, 1
-  %1335 = icmp eq i32 %1334, 0
-  br i1 %1335, label %1336, label %1338
-
-; <label>:1336                                    ; preds = %1327
-  %1337 = fadd fast float %1333, %20
-  br label %1355
-
-; <label>:1338                                    ; preds = %1327
-  %1339 = fsub fast float %24, %1333
-  br label %1355
-
-; <label>:1340                                    ; preds = %1322
-  %1341 = fcmp fast ogt float %939, %24
-  br i1 %1341, label %1342, label %1355
-
-; <label>:1342                                    ; preds = %1340
-  %1343 = fsub fast float %939, %24
-  %1344 = fdiv fast float %1343, %1325
-  %1345 = fptoui float %1344 to i32
-  %1346 = uitofp i32 %1345 to float
-  %1347 = fmul fast float %1346, %1325
-  %1348 = fsub fast float %1343, %1347
-  %1349 = and i32 %1345, 1
-  %1350 = icmp eq i32 %1349, 0
-  br i1 %1350, label %1351, label %1353
-
-; <label>:1351                                    ; preds = %1342
-  %1352 = fsub fast float %24, %1348
-  br label %1355
-
-; <label>:1353                                    ; preds = %1342
-  %1354 = fadd fast float %1348, %20
-  br label %1355
-
-; <label>:1355                                    ; preds = %1353, %1351, %1340, %1338, %1336
-  %1356 = phi float [ %1337, %1336 ], [ %1339, %1338 ], [ %1352, %1351 ], [ %1354, %1353 ], [ %939, %1340 ]
-  %1357 = fptoui float %1356 to i32
-  %1358 = uitofp i32 %1357 to float
-  %1359 = uitofp i32 %1324 to float
-  %1360 = fptoui float %45 to i32
-  %1361 = fptoui float %182 to i32
-  %1362 = fptoui float %1358 to i32
-  %1363 = fptoui float %1359 to i32
-  %1364 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1365 = extractvalue %dx.types.CBufRet.i32 %1364, 0
-  %1366 = extractvalue %dx.types.CBufRet.i32 %1364, 1
-  %1367 = extractvalue %dx.types.CBufRet.i32 %1364, 2
-  %1368 = extractvalue %dx.types.CBufRet.i32 %1364, 3
-  %1369 = mul i32 %1365, %1360
-  %1370 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1361, i32 %1366, i32 %1369)  ; IMad(a,b,c)
-  %1371 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1362, i32 %1367, i32 %1370)  ; IMad(a,b,c)
-  %1372 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1363, i32 %1368, i32 %1371)  ; IMad(a,b,c)
-  %1373 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1372, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1374 = extractvalue %dx.types.ResRet.i16 %1373, 0
-  %1375 = sitofp i16 %1374 to float
-  br label %1376
-
-; <label>:1376                                    ; preds = %1355, %1289, %1260, %1243, %1233
-  %1377 = phi float [ %1257, %1243 ], [ 0.000000e+00, %1233 ], [ %1288, %1260 ], [ %1375, %1355 ], [ 0.000000e+00, %1289 ]
-  %1378 = fadd fast float %936, 2.000000e+00
-  br i1 %941, label %1379, label %1404
-
-; <label>:1379                                    ; preds = %1376
-  %1380 = fcmp fast oge float %1378, 0.000000e+00
-  %1381 = fptoui float %1378 to i32
-  %1382 = icmp ult i32 %1381, %13
-  %1383 = and i1 %1380, %1382
-  %1384 = fcmp fast oge float %939, 0.000000e+00
-  %1385 = and i1 %1384, %1383
-  %1386 = fptoui float %939 to i32
-  %1387 = icmp ult i32 %1386, %15
-  %1388 = and i1 %1387, %1385
-  br i1 %1388, label %1389, label %1522
-
-; <label>:1389                                    ; preds = %1379
-  %1390 = fptoui float %45 to i32
-  %1391 = fptoui float %182 to i32
-  %1392 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1393 = extractvalue %dx.types.CBufRet.i32 %1392, 0
-  %1394 = extractvalue %dx.types.CBufRet.i32 %1392, 1
-  %1395 = extractvalue %dx.types.CBufRet.i32 %1392, 2
-  %1396 = extractvalue %dx.types.CBufRet.i32 %1392, 3
-  %1397 = mul i32 %1393, %1390
-  %1398 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1391, i32 %1394, i32 %1397)  ; IMad(a,b,c)
-  %1399 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1386, i32 %1395, i32 %1398)  ; IMad(a,b,c)
-  %1400 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1381, i32 %1396, i32 %1399)  ; IMad(a,b,c)
-  %1401 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1400, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1402 = extractvalue %dx.types.ResRet.i16 %1401, 0
-  %1403 = sitofp i16 %1402 to float
-  br label %1522
-
-; <label>:1404                                    ; preds = %1376
-  %1405 = icmp eq i32 %940, 1
-  br i1 %1405, label %1406, label %1435
-
-; <label>:1406                                    ; preds = %1404
-  %1407 = add i32 %13, -1
-  %1408 = uitofp i32 %1407 to float
-  %1409 = call float @dx.op.binary.f32(i32 35, float %1378, float 0.000000e+00)  ; FMax(a,b)
-  %1410 = call float @dx.op.binary.f32(i32 36, float %1409, float %1408)  ; FMin(a,b)
-  %1411 = fptoui float %1410 to i32
-  %1412 = add i32 %15, -1
-  %1413 = uitofp i32 %1412 to float
-  %1414 = call float @dx.op.binary.f32(i32 35, float %939, float 0.000000e+00)  ; FMax(a,b)
-  %1415 = call float @dx.op.binary.f32(i32 36, float %1414, float %1413)  ; FMin(a,b)
-  %1416 = fptoui float %1415 to i32
-  %1417 = uitofp i32 %1416 to float
-  %1418 = uitofp i32 %1411 to float
-  %1419 = fptoui float %45 to i32
-  %1420 = fptoui float %182 to i32
-  %1421 = fptoui float %1417 to i32
-  %1422 = fptoui float %1418 to i32
-  %1423 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1424 = extractvalue %dx.types.CBufRet.i32 %1423, 0
-  %1425 = extractvalue %dx.types.CBufRet.i32 %1423, 1
-  %1426 = extractvalue %dx.types.CBufRet.i32 %1423, 2
-  %1427 = extractvalue %dx.types.CBufRet.i32 %1423, 3
-  %1428 = mul i32 %1424, %1419
-  %1429 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1420, i32 %1425, i32 %1428)  ; IMad(a,b,c)
-  %1430 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1421, i32 %1426, i32 %1429)  ; IMad(a,b,c)
-  %1431 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1422, i32 %1427, i32 %1430)  ; IMad(a,b,c)
-  %1432 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1431, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1433 = extractvalue %dx.types.ResRet.i16 %1432, 0
-  %1434 = sitofp i16 %1433 to float
-  br label %1522
-
-; <label>:1435                                    ; preds = %1404
-  %1436 = icmp eq i32 %940, 2
-  br i1 %1436, label %1437, label %1522
-
-; <label>:1437                                    ; preds = %1435
-  %1438 = fsub fast float %22, %20
-  %1439 = fcmp fast olt float %1378, %20
-  br i1 %1439, label %1440, label %1453
-
-; <label>:1440                                    ; preds = %1437
-  %1441 = fsub fast float %20, %1378
-  %1442 = fdiv fast float %1441, %1438
-  %1443 = fptoui float %1442 to i32
-  %1444 = uitofp i32 %1443 to float
-  %1445 = fmul fast float %1444, %1438
-  %1446 = fsub fast float %1441, %1445
-  %1447 = and i32 %1443, 1
-  %1448 = icmp eq i32 %1447, 0
-  br i1 %1448, label %1449, label %1451
-
-; <label>:1449                                    ; preds = %1440
-  %1450 = fadd fast float %1446, %20
-  br label %1468
-
-; <label>:1451                                    ; preds = %1440
-  %1452 = fsub fast float %22, %1446
-  br label %1468
-
-; <label>:1453                                    ; preds = %1437
-  %1454 = fcmp fast ogt float %1378, %22
-  br i1 %1454, label %1455, label %1468
-
-; <label>:1455                                    ; preds = %1453
-  %1456 = fsub fast float %1378, %22
-  %1457 = fdiv fast float %1456, %1438
-  %1458 = fptoui float %1457 to i32
-  %1459 = uitofp i32 %1458 to float
-  %1460 = fmul fast float %1459, %1438
-  %1461 = fsub fast float %1456, %1460
-  %1462 = and i32 %1458, 1
-  %1463 = icmp eq i32 %1462, 0
-  br i1 %1463, label %1464, label %1466
-
-; <label>:1464                                    ; preds = %1455
-  %1465 = fsub fast float %22, %1461
-  br label %1468
-
-; <label>:1466                                    ; preds = %1455
-  %1467 = fadd fast float %1461, %20
-  br label %1468
-
-; <label>:1468                                    ; preds = %1466, %1464, %1453, %1451, %1449
-  %1469 = phi float [ %1450, %1449 ], [ %1452, %1451 ], [ %1465, %1464 ], [ %1467, %1466 ], [ %1378, %1453 ]
-  %1470 = fptoui float %1469 to i32
-  %1471 = fsub fast float %24, %20
-  %1472 = fcmp fast olt float %939, %20
-  br i1 %1472, label %1473, label %1486
-
-; <label>:1473                                    ; preds = %1468
-  %1474 = fsub fast float %20, %939
-  %1475 = fdiv fast float %1474, %1471
-  %1476 = fptoui float %1475 to i32
-  %1477 = uitofp i32 %1476 to float
-  %1478 = fmul fast float %1477, %1471
-  %1479 = fsub fast float %1474, %1478
-  %1480 = and i32 %1476, 1
-  %1481 = icmp eq i32 %1480, 0
-  br i1 %1481, label %1482, label %1484
-
-; <label>:1482                                    ; preds = %1473
-  %1483 = fadd fast float %1479, %20
-  br label %1501
-
-; <label>:1484                                    ; preds = %1473
-  %1485 = fsub fast float %24, %1479
-  br label %1501
-
-; <label>:1486                                    ; preds = %1468
-  %1487 = fcmp fast ogt float %939, %24
-  br i1 %1487, label %1488, label %1501
-
-; <label>:1488                                    ; preds = %1486
-  %1489 = fsub fast float %939, %24
-  %1490 = fdiv fast float %1489, %1471
-  %1491 = fptoui float %1490 to i32
-  %1492 = uitofp i32 %1491 to float
-  %1493 = fmul fast float %1492, %1471
-  %1494 = fsub fast float %1489, %1493
-  %1495 = and i32 %1491, 1
-  %1496 = icmp eq i32 %1495, 0
-  br i1 %1496, label %1497, label %1499
-
-; <label>:1497                                    ; preds = %1488
-  %1498 = fsub fast float %24, %1494
-  br label %1501
-
-; <label>:1499                                    ; preds = %1488
-  %1500 = fadd fast float %1494, %20
-  br label %1501
-
-; <label>:1501                                    ; preds = %1499, %1497, %1486, %1484, %1482
-  %1502 = phi float [ %1483, %1482 ], [ %1485, %1484 ], [ %1498, %1497 ], [ %1500, %1499 ], [ %939, %1486 ]
-  %1503 = fptoui float %1502 to i32
-  %1504 = uitofp i32 %1503 to float
-  %1505 = uitofp i32 %1470 to float
-  %1506 = fptoui float %45 to i32
-  %1507 = fptoui float %182 to i32
-  %1508 = fptoui float %1504 to i32
-  %1509 = fptoui float %1505 to i32
-  %1510 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1511 = extractvalue %dx.types.CBufRet.i32 %1510, 0
-  %1512 = extractvalue %dx.types.CBufRet.i32 %1510, 1
-  %1513 = extractvalue %dx.types.CBufRet.i32 %1510, 2
-  %1514 = extractvalue %dx.types.CBufRet.i32 %1510, 3
-  %1515 = mul i32 %1511, %1506
-  %1516 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1507, i32 %1512, i32 %1515)  ; IMad(a,b,c)
-  %1517 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1508, i32 %1513, i32 %1516)  ; IMad(a,b,c)
-  %1518 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1509, i32 %1514, i32 %1517)  ; IMad(a,b,c)
-  %1519 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1518, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1520 = extractvalue %dx.types.ResRet.i16 %1519, 0
-  %1521 = sitofp i16 %1520 to float
-  br label %1522
-
-; <label>:1522                                    ; preds = %1501, %1435, %1406, %1389, %1379
-  %1523 = phi float [ %1403, %1389 ], [ 0.000000e+00, %1379 ], [ %1434, %1406 ], [ %1521, %1501 ], [ 0.000000e+00, %1435 ]
-  br i1 %941, label %1524, label %1549
-
-; <label>:1524                                    ; preds = %1522
-  %1525 = fcmp fast oge float %937, 0.000000e+00
-  %1526 = fptoui float %937 to i32
-  %1527 = icmp ult i32 %1526, %13
-  %1528 = and i1 %1525, %1527
-  %1529 = fcmp fast oge float %938, 0.000000e+00
-  %1530 = and i1 %1529, %1528
-  %1531 = fptoui float %938 to i32
-  %1532 = icmp ult i32 %1531, %15
-  %1533 = and i1 %1532, %1530
-  br i1 %1533, label %1534, label %1667
-
-; <label>:1534                                    ; preds = %1524
-  %1535 = fptoui float %45 to i32
-  %1536 = fptoui float %182 to i32
-  %1537 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1538 = extractvalue %dx.types.CBufRet.i32 %1537, 0
-  %1539 = extractvalue %dx.types.CBufRet.i32 %1537, 1
-  %1540 = extractvalue %dx.types.CBufRet.i32 %1537, 2
-  %1541 = extractvalue %dx.types.CBufRet.i32 %1537, 3
-  %1542 = mul i32 %1538, %1535
-  %1543 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1536, i32 %1539, i32 %1542)  ; IMad(a,b,c)
-  %1544 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1531, i32 %1540, i32 %1543)  ; IMad(a,b,c)
-  %1545 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1526, i32 %1541, i32 %1544)  ; IMad(a,b,c)
-  %1546 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1545, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1547 = extractvalue %dx.types.ResRet.i16 %1546, 0
-  %1548 = sitofp i16 %1547 to float
-  br label %1667
-
-; <label>:1549                                    ; preds = %1522
-  %1550 = icmp eq i32 %940, 1
-  br i1 %1550, label %1551, label %1580
-
-; <label>:1551                                    ; preds = %1549
-  %1552 = add i32 %13, -1
-  %1553 = uitofp i32 %1552 to float
-  %1554 = call float @dx.op.binary.f32(i32 35, float %937, float 0.000000e+00)  ; FMax(a,b)
-  %1555 = call float @dx.op.binary.f32(i32 36, float %1554, float %1553)  ; FMin(a,b)
-  %1556 = fptoui float %1555 to i32
-  %1557 = add i32 %15, -1
-  %1558 = uitofp i32 %1557 to float
-  %1559 = call float @dx.op.binary.f32(i32 35, float %938, float 0.000000e+00)  ; FMax(a,b)
-  %1560 = call float @dx.op.binary.f32(i32 36, float %1559, float %1558)  ; FMin(a,b)
-  %1561 = fptoui float %1560 to i32
-  %1562 = uitofp i32 %1561 to float
-  %1563 = uitofp i32 %1556 to float
-  %1564 = fptoui float %45 to i32
-  %1565 = fptoui float %182 to i32
-  %1566 = fptoui float %1562 to i32
-  %1567 = fptoui float %1563 to i32
-  %1568 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1569 = extractvalue %dx.types.CBufRet.i32 %1568, 0
-  %1570 = extractvalue %dx.types.CBufRet.i32 %1568, 1
-  %1571 = extractvalue %dx.types.CBufRet.i32 %1568, 2
-  %1572 = extractvalue %dx.types.CBufRet.i32 %1568, 3
-  %1573 = mul i32 %1569, %1564
-  %1574 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1565, i32 %1570, i32 %1573)  ; IMad(a,b,c)
-  %1575 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1566, i32 %1571, i32 %1574)  ; IMad(a,b,c)
-  %1576 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1567, i32 %1572, i32 %1575)  ; IMad(a,b,c)
-  %1577 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1576, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1578 = extractvalue %dx.types.ResRet.i16 %1577, 0
-  %1579 = sitofp i16 %1578 to float
-  br label %1667
-
-; <label>:1580                                    ; preds = %1549
-  %1581 = icmp eq i32 %940, 2
-  br i1 %1581, label %1582, label %1667
-
-; <label>:1582                                    ; preds = %1580
-  %1583 = fsub fast float %22, %20
-  %1584 = fcmp fast olt float %937, %20
-  br i1 %1584, label %1585, label %1598
-
-; <label>:1585                                    ; preds = %1582
-  %1586 = fsub fast float %20, %937
-  %1587 = fdiv fast float %1586, %1583
-  %1588 = fptoui float %1587 to i32
-  %1589 = uitofp i32 %1588 to float
-  %1590 = fmul fast float %1589, %1583
-  %1591 = fsub fast float %1586, %1590
-  %1592 = and i32 %1588, 1
-  %1593 = icmp eq i32 %1592, 0
-  br i1 %1593, label %1594, label %1596
-
-; <label>:1594                                    ; preds = %1585
-  %1595 = fadd fast float %1591, %20
-  br label %1613
-
-; <label>:1596                                    ; preds = %1585
-  %1597 = fsub fast float %22, %1591
-  br label %1613
-
-; <label>:1598                                    ; preds = %1582
-  %1599 = fcmp fast ogt float %937, %22
-  br i1 %1599, label %1600, label %1613
-
-; <label>:1600                                    ; preds = %1598
-  %1601 = fsub fast float %937, %22
-  %1602 = fdiv fast float %1601, %1583
-  %1603 = fptoui float %1602 to i32
-  %1604 = uitofp i32 %1603 to float
-  %1605 = fmul fast float %1604, %1583
-  %1606 = fsub fast float %1601, %1605
-  %1607 = and i32 %1603, 1
-  %1608 = icmp eq i32 %1607, 0
-  br i1 %1608, label %1609, label %1611
-
-; <label>:1609                                    ; preds = %1600
-  %1610 = fsub fast float %22, %1606
-  br label %1613
-
-; <label>:1611                                    ; preds = %1600
-  %1612 = fadd fast float %1606, %20
-  br label %1613
-
-; <label>:1613                                    ; preds = %1611, %1609, %1598, %1596, %1594
-  %1614 = phi float [ %1595, %1594 ], [ %1597, %1596 ], [ %1610, %1609 ], [ %1612, %1611 ], [ %937, %1598 ]
-  %1615 = fptoui float %1614 to i32
-  %1616 = fsub fast float %24, %20
-  %1617 = fcmp fast olt float %938, %20
-  br i1 %1617, label %1618, label %1631
-
-; <label>:1618                                    ; preds = %1613
-  %1619 = fsub fast float %20, %938
-  %1620 = fdiv fast float %1619, %1616
-  %1621 = fptoui float %1620 to i32
-  %1622 = uitofp i32 %1621 to float
-  %1623 = fmul fast float %1622, %1616
-  %1624 = fsub fast float %1619, %1623
-  %1625 = and i32 %1621, 1
-  %1626 = icmp eq i32 %1625, 0
-  br i1 %1626, label %1627, label %1629
-
-; <label>:1627                                    ; preds = %1618
-  %1628 = fadd fast float %1624, %20
-  br label %1646
-
-; <label>:1629                                    ; preds = %1618
-  %1630 = fsub fast float %24, %1624
-  br label %1646
-
-; <label>:1631                                    ; preds = %1613
-  %1632 = fcmp fast ogt float %938, %24
-  br i1 %1632, label %1633, label %1646
-
-; <label>:1633                                    ; preds = %1631
-  %1634 = fsub fast float %938, %24
-  %1635 = fdiv fast float %1634, %1616
-  %1636 = fptoui float %1635 to i32
-  %1637 = uitofp i32 %1636 to float
-  %1638 = fmul fast float %1637, %1616
-  %1639 = fsub fast float %1634, %1638
-  %1640 = and i32 %1636, 1
-  %1641 = icmp eq i32 %1640, 0
-  br i1 %1641, label %1642, label %1644
-
-; <label>:1642                                    ; preds = %1633
-  %1643 = fsub fast float %24, %1639
-  br label %1646
-
-; <label>:1644                                    ; preds = %1633
-  %1645 = fadd fast float %1639, %20
-  br label %1646
-
-; <label>:1646                                    ; preds = %1644, %1642, %1631, %1629, %1627
-  %1647 = phi float [ %1628, %1627 ], [ %1630, %1629 ], [ %1643, %1642 ], [ %1645, %1644 ], [ %938, %1631 ]
-  %1648 = fptoui float %1647 to i32
-  %1649 = uitofp i32 %1648 to float
-  %1650 = uitofp i32 %1615 to float
-  %1651 = fptoui float %45 to i32
-  %1652 = fptoui float %182 to i32
-  %1653 = fptoui float %1649 to i32
-  %1654 = fptoui float %1650 to i32
-  %1655 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1656 = extractvalue %dx.types.CBufRet.i32 %1655, 0
-  %1657 = extractvalue %dx.types.CBufRet.i32 %1655, 1
-  %1658 = extractvalue %dx.types.CBufRet.i32 %1655, 2
-  %1659 = extractvalue %dx.types.CBufRet.i32 %1655, 3
-  %1660 = mul i32 %1656, %1651
-  %1661 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1652, i32 %1657, i32 %1660)  ; IMad(a,b,c)
-  %1662 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1653, i32 %1658, i32 %1661)  ; IMad(a,b,c)
-  %1663 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1654, i32 %1659, i32 %1662)  ; IMad(a,b,c)
-  %1664 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1663, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1665 = extractvalue %dx.types.ResRet.i16 %1664, 0
-  %1666 = sitofp i16 %1665 to float
-  br label %1667
-
-; <label>:1667                                    ; preds = %1646, %1580, %1551, %1534, %1524
-  %1668 = phi float [ %1548, %1534 ], [ 0.000000e+00, %1524 ], [ %1579, %1551 ], [ %1666, %1646 ], [ 0.000000e+00, %1580 ]
-  br i1 %941, label %1669, label %1694
-
-; <label>:1669                                    ; preds = %1667
-  %1670 = fcmp fast oge float %936, 0.000000e+00
-  %1671 = fptoui float %936 to i32
-  %1672 = icmp ult i32 %1671, %13
-  %1673 = and i1 %1670, %1672
-  %1674 = fcmp fast oge float %938, 0.000000e+00
-  %1675 = and i1 %1674, %1673
-  %1676 = fptoui float %938 to i32
-  %1677 = icmp ult i32 %1676, %15
-  %1678 = and i1 %1677, %1675
-  br i1 %1678, label %1679, label %1812
-
-; <label>:1679                                    ; preds = %1669
-  %1680 = fptoui float %45 to i32
-  %1681 = fptoui float %182 to i32
-  %1682 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1683 = extractvalue %dx.types.CBufRet.i32 %1682, 0
-  %1684 = extractvalue %dx.types.CBufRet.i32 %1682, 1
-  %1685 = extractvalue %dx.types.CBufRet.i32 %1682, 2
-  %1686 = extractvalue %dx.types.CBufRet.i32 %1682, 3
-  %1687 = mul i32 %1683, %1680
-  %1688 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1681, i32 %1684, i32 %1687)  ; IMad(a,b,c)
-  %1689 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1676, i32 %1685, i32 %1688)  ; IMad(a,b,c)
-  %1690 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1671, i32 %1686, i32 %1689)  ; IMad(a,b,c)
-  %1691 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1690, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1692 = extractvalue %dx.types.ResRet.i16 %1691, 0
-  %1693 = sitofp i16 %1692 to float
-  br label %1812
-
-; <label>:1694                                    ; preds = %1667
-  %1695 = icmp eq i32 %940, 1
-  br i1 %1695, label %1696, label %1725
-
-; <label>:1696                                    ; preds = %1694
-  %1697 = add i32 %13, -1
-  %1698 = uitofp i32 %1697 to float
-  %1699 = call float @dx.op.binary.f32(i32 35, float %936, float 0.000000e+00)  ; FMax(a,b)
-  %1700 = call float @dx.op.binary.f32(i32 36, float %1699, float %1698)  ; FMin(a,b)
-  %1701 = fptoui float %1700 to i32
-  %1702 = add i32 %15, -1
-  %1703 = uitofp i32 %1702 to float
-  %1704 = call float @dx.op.binary.f32(i32 35, float %938, float 0.000000e+00)  ; FMax(a,b)
-  %1705 = call float @dx.op.binary.f32(i32 36, float %1704, float %1703)  ; FMin(a,b)
-  %1706 = fptoui float %1705 to i32
-  %1707 = uitofp i32 %1706 to float
-  %1708 = uitofp i32 %1701 to float
-  %1709 = fptoui float %45 to i32
-  %1710 = fptoui float %182 to i32
-  %1711 = fptoui float %1707 to i32
-  %1712 = fptoui float %1708 to i32
-  %1713 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1714 = extractvalue %dx.types.CBufRet.i32 %1713, 0
-  %1715 = extractvalue %dx.types.CBufRet.i32 %1713, 1
-  %1716 = extractvalue %dx.types.CBufRet.i32 %1713, 2
-  %1717 = extractvalue %dx.types.CBufRet.i32 %1713, 3
-  %1718 = mul i32 %1714, %1709
-  %1719 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1710, i32 %1715, i32 %1718)  ; IMad(a,b,c)
-  %1720 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1711, i32 %1716, i32 %1719)  ; IMad(a,b,c)
-  %1721 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1712, i32 %1717, i32 %1720)  ; IMad(a,b,c)
-  %1722 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1721, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1723 = extractvalue %dx.types.ResRet.i16 %1722, 0
-  %1724 = sitofp i16 %1723 to float
-  br label %1812
-
-; <label>:1725                                    ; preds = %1694
-  %1726 = icmp eq i32 %940, 2
-  br i1 %1726, label %1727, label %1812
-
-; <label>:1727                                    ; preds = %1725
-  %1728 = fsub fast float %22, %20
-  %1729 = fcmp fast olt float %936, %20
-  br i1 %1729, label %1730, label %1743
-
-; <label>:1730                                    ; preds = %1727
-  %1731 = fsub fast float %20, %936
-  %1732 = fdiv fast float %1731, %1728
-  %1733 = fptoui float %1732 to i32
-  %1734 = uitofp i32 %1733 to float
-  %1735 = fmul fast float %1734, %1728
-  %1736 = fsub fast float %1731, %1735
-  %1737 = and i32 %1733, 1
-  %1738 = icmp eq i32 %1737, 0
-  br i1 %1738, label %1739, label %1741
-
-; <label>:1739                                    ; preds = %1730
-  %1740 = fadd fast float %1736, %20
-  br label %1758
-
-; <label>:1741                                    ; preds = %1730
-  %1742 = fsub fast float %22, %1736
-  br label %1758
-
-; <label>:1743                                    ; preds = %1727
-  %1744 = fcmp fast ogt float %936, %22
-  br i1 %1744, label %1745, label %1758
-
-; <label>:1745                                    ; preds = %1743
-  %1746 = fsub fast float %936, %22
-  %1747 = fdiv fast float %1746, %1728
-  %1748 = fptoui float %1747 to i32
-  %1749 = uitofp i32 %1748 to float
-  %1750 = fmul fast float %1749, %1728
-  %1751 = fsub fast float %1746, %1750
-  %1752 = and i32 %1748, 1
-  %1753 = icmp eq i32 %1752, 0
-  br i1 %1753, label %1754, label %1756
-
-; <label>:1754                                    ; preds = %1745
-  %1755 = fsub fast float %22, %1751
-  br label %1758
-
-; <label>:1756                                    ; preds = %1745
-  %1757 = fadd fast float %1751, %20
-  br label %1758
-
-; <label>:1758                                    ; preds = %1756, %1754, %1743, %1741, %1739
-  %1759 = phi float [ %1740, %1739 ], [ %1742, %1741 ], [ %1755, %1754 ], [ %1757, %1756 ], [ %936, %1743 ]
-  %1760 = fptoui float %1759 to i32
-  %1761 = fsub fast float %24, %20
-  %1762 = fcmp fast olt float %938, %20
-  br i1 %1762, label %1763, label %1776
-
-; <label>:1763                                    ; preds = %1758
-  %1764 = fsub fast float %20, %938
-  %1765 = fdiv fast float %1764, %1761
-  %1766 = fptoui float %1765 to i32
-  %1767 = uitofp i32 %1766 to float
-  %1768 = fmul fast float %1767, %1761
-  %1769 = fsub fast float %1764, %1768
-  %1770 = and i32 %1766, 1
-  %1771 = icmp eq i32 %1770, 0
-  br i1 %1771, label %1772, label %1774
-
-; <label>:1772                                    ; preds = %1763
-  %1773 = fadd fast float %1769, %20
-  br label %1791
-
-; <label>:1774                                    ; preds = %1763
-  %1775 = fsub fast float %24, %1769
-  br label %1791
-
-; <label>:1776                                    ; preds = %1758
-  %1777 = fcmp fast ogt float %938, %24
-  br i1 %1777, label %1778, label %1791
-
-; <label>:1778                                    ; preds = %1776
-  %1779 = fsub fast float %938, %24
-  %1780 = fdiv fast float %1779, %1761
-  %1781 = fptoui float %1780 to i32
-  %1782 = uitofp i32 %1781 to float
-  %1783 = fmul fast float %1782, %1761
-  %1784 = fsub fast float %1779, %1783
-  %1785 = and i32 %1781, 1
-  %1786 = icmp eq i32 %1785, 0
-  br i1 %1786, label %1787, label %1789
-
-; <label>:1787                                    ; preds = %1778
-  %1788 = fsub fast float %24, %1784
-  br label %1791
-
-; <label>:1789                                    ; preds = %1778
-  %1790 = fadd fast float %1784, %20
-  br label %1791
-
-; <label>:1791                                    ; preds = %1789, %1787, %1776, %1774, %1772
-  %1792 = phi float [ %1773, %1772 ], [ %1775, %1774 ], [ %1788, %1787 ], [ %1790, %1789 ], [ %938, %1776 ]
-  %1793 = fptoui float %1792 to i32
-  %1794 = uitofp i32 %1793 to float
-  %1795 = uitofp i32 %1760 to float
-  %1796 = fptoui float %45 to i32
-  %1797 = fptoui float %182 to i32
-  %1798 = fptoui float %1794 to i32
-  %1799 = fptoui float %1795 to i32
-  %1800 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1801 = extractvalue %dx.types.CBufRet.i32 %1800, 0
-  %1802 = extractvalue %dx.types.CBufRet.i32 %1800, 1
-  %1803 = extractvalue %dx.types.CBufRet.i32 %1800, 2
-  %1804 = extractvalue %dx.types.CBufRet.i32 %1800, 3
-  %1805 = mul i32 %1801, %1796
-  %1806 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1797, i32 %1802, i32 %1805)  ; IMad(a,b,c)
-  %1807 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1798, i32 %1803, i32 %1806)  ; IMad(a,b,c)
-  %1808 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1799, i32 %1804, i32 %1807)  ; IMad(a,b,c)
-  %1809 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1808, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1810 = extractvalue %dx.types.ResRet.i16 %1809, 0
-  %1811 = sitofp i16 %1810 to float
-  br label %1812
-
-; <label>:1812                                    ; preds = %1791, %1725, %1696, %1679, %1669
-  %1813 = phi float [ %1693, %1679 ], [ 0.000000e+00, %1669 ], [ %1724, %1696 ], [ %1811, %1791 ], [ 0.000000e+00, %1725 ]
-  br i1 %941, label %1814, label %1839
-
-; <label>:1814                                    ; preds = %1812
-  %1815 = fcmp fast oge float %1232, 0.000000e+00
-  %1816 = fptoui float %1232 to i32
-  %1817 = icmp ult i32 %1816, %13
-  %1818 = and i1 %1815, %1817
-  %1819 = fcmp fast oge float %938, 0.000000e+00
-  %1820 = and i1 %1819, %1818
-  %1821 = fptoui float %938 to i32
-  %1822 = icmp ult i32 %1821, %15
-  %1823 = and i1 %1822, %1820
-  br i1 %1823, label %1824, label %1957
-
-; <label>:1824                                    ; preds = %1814
-  %1825 = fptoui float %45 to i32
-  %1826 = fptoui float %182 to i32
-  %1827 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1828 = extractvalue %dx.types.CBufRet.i32 %1827, 0
-  %1829 = extractvalue %dx.types.CBufRet.i32 %1827, 1
-  %1830 = extractvalue %dx.types.CBufRet.i32 %1827, 2
-  %1831 = extractvalue %dx.types.CBufRet.i32 %1827, 3
-  %1832 = mul i32 %1828, %1825
-  %1833 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1826, i32 %1829, i32 %1832)  ; IMad(a,b,c)
-  %1834 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1821, i32 %1830, i32 %1833)  ; IMad(a,b,c)
-  %1835 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1816, i32 %1831, i32 %1834)  ; IMad(a,b,c)
-  %1836 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1835, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1837 = extractvalue %dx.types.ResRet.i16 %1836, 0
-  %1838 = sitofp i16 %1837 to float
-  br label %1957
-
-; <label>:1839                                    ; preds = %1812
-  %1840 = icmp eq i32 %940, 1
-  br i1 %1840, label %1841, label %1870
-
-; <label>:1841                                    ; preds = %1839
-  %1842 = add i32 %13, -1
-  %1843 = uitofp i32 %1842 to float
-  %1844 = call float @dx.op.binary.f32(i32 35, float %1232, float 0.000000e+00)  ; FMax(a,b)
-  %1845 = call float @dx.op.binary.f32(i32 36, float %1844, float %1843)  ; FMin(a,b)
-  %1846 = fptoui float %1845 to i32
-  %1847 = add i32 %15, -1
-  %1848 = uitofp i32 %1847 to float
-  %1849 = call float @dx.op.binary.f32(i32 35, float %938, float 0.000000e+00)  ; FMax(a,b)
-  %1850 = call float @dx.op.binary.f32(i32 36, float %1849, float %1848)  ; FMin(a,b)
-  %1851 = fptoui float %1850 to i32
-  %1852 = uitofp i32 %1851 to float
-  %1853 = uitofp i32 %1846 to float
-  %1854 = fptoui float %45 to i32
-  %1855 = fptoui float %182 to i32
-  %1856 = fptoui float %1852 to i32
-  %1857 = fptoui float %1853 to i32
-  %1858 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1859 = extractvalue %dx.types.CBufRet.i32 %1858, 0
-  %1860 = extractvalue %dx.types.CBufRet.i32 %1858, 1
-  %1861 = extractvalue %dx.types.CBufRet.i32 %1858, 2
-  %1862 = extractvalue %dx.types.CBufRet.i32 %1858, 3
-  %1863 = mul i32 %1859, %1854
-  %1864 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1855, i32 %1860, i32 %1863)  ; IMad(a,b,c)
-  %1865 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1856, i32 %1861, i32 %1864)  ; IMad(a,b,c)
-  %1866 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1857, i32 %1862, i32 %1865)  ; IMad(a,b,c)
-  %1867 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1866, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1868 = extractvalue %dx.types.ResRet.i16 %1867, 0
-  %1869 = sitofp i16 %1868 to float
-  br label %1957
-
-; <label>:1870                                    ; preds = %1839
-  %1871 = icmp eq i32 %940, 2
-  br i1 %1871, label %1872, label %1957
-
-; <label>:1872                                    ; preds = %1870
-  %1873 = fsub fast float %22, %20
-  %1874 = fcmp fast olt float %1232, %20
-  br i1 %1874, label %1875, label %1888
-
-; <label>:1875                                    ; preds = %1872
-  %1876 = fsub fast float %20, %1232
-  %1877 = fdiv fast float %1876, %1873
-  %1878 = fptoui float %1877 to i32
-  %1879 = uitofp i32 %1878 to float
-  %1880 = fmul fast float %1879, %1873
-  %1881 = fsub fast float %1876, %1880
-  %1882 = and i32 %1878, 1
-  %1883 = icmp eq i32 %1882, 0
-  br i1 %1883, label %1884, label %1886
-
-; <label>:1884                                    ; preds = %1875
-  %1885 = fadd fast float %1881, %20
-  br label %1903
-
-; <label>:1886                                    ; preds = %1875
-  %1887 = fsub fast float %22, %1881
-  br label %1903
-
-; <label>:1888                                    ; preds = %1872
-  %1889 = fcmp fast ogt float %1232, %22
-  br i1 %1889, label %1890, label %1903
-
-; <label>:1890                                    ; preds = %1888
-  %1891 = fsub fast float %1232, %22
-  %1892 = fdiv fast float %1891, %1873
-  %1893 = fptoui float %1892 to i32
-  %1894 = uitofp i32 %1893 to float
-  %1895 = fmul fast float %1894, %1873
-  %1896 = fsub fast float %1891, %1895
-  %1897 = and i32 %1893, 1
-  %1898 = icmp eq i32 %1897, 0
-  br i1 %1898, label %1899, label %1901
-
-; <label>:1899                                    ; preds = %1890
-  %1900 = fsub fast float %22, %1896
-  br label %1903
-
-; <label>:1901                                    ; preds = %1890
-  %1902 = fadd fast float %1896, %20
-  br label %1903
-
-; <label>:1903                                    ; preds = %1901, %1899, %1888, %1886, %1884
-  %1904 = phi float [ %1885, %1884 ], [ %1887, %1886 ], [ %1900, %1899 ], [ %1902, %1901 ], [ %1232, %1888 ]
-  %1905 = fptoui float %1904 to i32
-  %1906 = fsub fast float %24, %20
-  %1907 = fcmp fast olt float %938, %20
-  br i1 %1907, label %1908, label %1921
-
-; <label>:1908                                    ; preds = %1903
-  %1909 = fsub fast float %20, %938
-  %1910 = fdiv fast float %1909, %1906
-  %1911 = fptoui float %1910 to i32
-  %1912 = uitofp i32 %1911 to float
-  %1913 = fmul fast float %1912, %1906
-  %1914 = fsub fast float %1909, %1913
-  %1915 = and i32 %1911, 1
-  %1916 = icmp eq i32 %1915, 0
-  br i1 %1916, label %1917, label %1919
-
-; <label>:1917                                    ; preds = %1908
-  %1918 = fadd fast float %1914, %20
-  br label %1936
-
-; <label>:1919                                    ; preds = %1908
-  %1920 = fsub fast float %24, %1914
-  br label %1936
-
-; <label>:1921                                    ; preds = %1903
-  %1922 = fcmp fast ogt float %938, %24
-  br i1 %1922, label %1923, label %1936
-
-; <label>:1923                                    ; preds = %1921
-  %1924 = fsub fast float %938, %24
-  %1925 = fdiv fast float %1924, %1906
-  %1926 = fptoui float %1925 to i32
-  %1927 = uitofp i32 %1926 to float
-  %1928 = fmul fast float %1927, %1906
-  %1929 = fsub fast float %1924, %1928
-  %1930 = and i32 %1926, 1
-  %1931 = icmp eq i32 %1930, 0
-  br i1 %1931, label %1932, label %1934
-
-; <label>:1932                                    ; preds = %1923
-  %1933 = fsub fast float %24, %1929
-  br label %1936
-
-; <label>:1934                                    ; preds = %1923
-  %1935 = fadd fast float %1929, %20
-  br label %1936
-
-; <label>:1936                                    ; preds = %1934, %1932, %1921, %1919, %1917
-  %1937 = phi float [ %1918, %1917 ], [ %1920, %1919 ], [ %1933, %1932 ], [ %1935, %1934 ], [ %938, %1921 ]
-  %1938 = fptoui float %1937 to i32
-  %1939 = uitofp i32 %1938 to float
-  %1940 = uitofp i32 %1905 to float
-  %1941 = fptoui float %45 to i32
-  %1942 = fptoui float %182 to i32
-  %1943 = fptoui float %1939 to i32
-  %1944 = fptoui float %1940 to i32
-  %1945 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1946 = extractvalue %dx.types.CBufRet.i32 %1945, 0
-  %1947 = extractvalue %dx.types.CBufRet.i32 %1945, 1
-  %1948 = extractvalue %dx.types.CBufRet.i32 %1945, 2
-  %1949 = extractvalue %dx.types.CBufRet.i32 %1945, 3
-  %1950 = mul i32 %1946, %1941
-  %1951 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1942, i32 %1947, i32 %1950)  ; IMad(a,b,c)
-  %1952 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1943, i32 %1948, i32 %1951)  ; IMad(a,b,c)
-  %1953 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1944, i32 %1949, i32 %1952)  ; IMad(a,b,c)
-  %1954 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1953, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1955 = extractvalue %dx.types.ResRet.i16 %1954, 0
-  %1956 = sitofp i16 %1955 to float
-  br label %1957
-
-; <label>:1957                                    ; preds = %1936, %1870, %1841, %1824, %1814
-  %1958 = phi float [ %1838, %1824 ], [ 0.000000e+00, %1814 ], [ %1869, %1841 ], [ %1956, %1936 ], [ 0.000000e+00, %1870 ]
-  br i1 %941, label %1959, label %1984
-
-; <label>:1959                                    ; preds = %1957
-  %1960 = fcmp fast oge float %1378, 0.000000e+00
-  %1961 = fptoui float %1378 to i32
-  %1962 = icmp ult i32 %1961, %13
-  %1963 = and i1 %1960, %1962
-  %1964 = fcmp fast oge float %938, 0.000000e+00
-  %1965 = and i1 %1964, %1963
-  %1966 = fptoui float %938 to i32
-  %1967 = icmp ult i32 %1966, %15
-  %1968 = and i1 %1967, %1965
-  br i1 %1968, label %1969, label %2102
-
-; <label>:1969                                    ; preds = %1959
-  %1970 = fptoui float %45 to i32
-  %1971 = fptoui float %182 to i32
-  %1972 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1973 = extractvalue %dx.types.CBufRet.i32 %1972, 0
-  %1974 = extractvalue %dx.types.CBufRet.i32 %1972, 1
-  %1975 = extractvalue %dx.types.CBufRet.i32 %1972, 2
-  %1976 = extractvalue %dx.types.CBufRet.i32 %1972, 3
-  %1977 = mul i32 %1973, %1970
-  %1978 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1971, i32 %1974, i32 %1977)  ; IMad(a,b,c)
-  %1979 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1966, i32 %1975, i32 %1978)  ; IMad(a,b,c)
-  %1980 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1961, i32 %1976, i32 %1979)  ; IMad(a,b,c)
-  %1981 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1980, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1982 = extractvalue %dx.types.ResRet.i16 %1981, 0
-  %1983 = sitofp i16 %1982 to float
-  br label %2102
-
-; <label>:1984                                    ; preds = %1957
-  %1985 = icmp eq i32 %940, 1
-  br i1 %1985, label %1986, label %2015
-
-; <label>:1986                                    ; preds = %1984
-  %1987 = add i32 %13, -1
-  %1988 = uitofp i32 %1987 to float
-  %1989 = call float @dx.op.binary.f32(i32 35, float %1378, float 0.000000e+00)  ; FMax(a,b)
-  %1990 = call float @dx.op.binary.f32(i32 36, float %1989, float %1988)  ; FMin(a,b)
-  %1991 = fptoui float %1990 to i32
-  %1992 = add i32 %15, -1
-  %1993 = uitofp i32 %1992 to float
-  %1994 = call float @dx.op.binary.f32(i32 35, float %938, float 0.000000e+00)  ; FMax(a,b)
-  %1995 = call float @dx.op.binary.f32(i32 36, float %1994, float %1993)  ; FMin(a,b)
-  %1996 = fptoui float %1995 to i32
-  %1997 = uitofp i32 %1996 to float
-  %1998 = uitofp i32 %1991 to float
-  %1999 = fptoui float %45 to i32
-  %2000 = fptoui float %182 to i32
-  %2001 = fptoui float %1997 to i32
-  %2002 = fptoui float %1998 to i32
-  %2003 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2004 = extractvalue %dx.types.CBufRet.i32 %2003, 0
-  %2005 = extractvalue %dx.types.CBufRet.i32 %2003, 1
-  %2006 = extractvalue %dx.types.CBufRet.i32 %2003, 2
-  %2007 = extractvalue %dx.types.CBufRet.i32 %2003, 3
-  %2008 = mul i32 %2004, %1999
-  %2009 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2000, i32 %2005, i32 %2008)  ; IMad(a,b,c)
-  %2010 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2001, i32 %2006, i32 %2009)  ; IMad(a,b,c)
-  %2011 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2002, i32 %2007, i32 %2010)  ; IMad(a,b,c)
-  %2012 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2011, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2013 = extractvalue %dx.types.ResRet.i16 %2012, 0
-  %2014 = sitofp i16 %2013 to float
-  br label %2102
-
-; <label>:2015                                    ; preds = %1984
-  %2016 = icmp eq i32 %940, 2
-  br i1 %2016, label %2017, label %2102
-
-; <label>:2017                                    ; preds = %2015
-  %2018 = fsub fast float %22, %20
-  %2019 = fcmp fast olt float %1378, %20
-  br i1 %2019, label %2020, label %2033
-
-; <label>:2020                                    ; preds = %2017
-  %2021 = fsub fast float %20, %1378
-  %2022 = fdiv fast float %2021, %2018
-  %2023 = fptoui float %2022 to i32
-  %2024 = uitofp i32 %2023 to float
-  %2025 = fmul fast float %2024, %2018
-  %2026 = fsub fast float %2021, %2025
-  %2027 = and i32 %2023, 1
-  %2028 = icmp eq i32 %2027, 0
-  br i1 %2028, label %2029, label %2031
-
-; <label>:2029                                    ; preds = %2020
-  %2030 = fadd fast float %2026, %20
-  br label %2048
-
-; <label>:2031                                    ; preds = %2020
-  %2032 = fsub fast float %22, %2026
-  br label %2048
-
-; <label>:2033                                    ; preds = %2017
-  %2034 = fcmp fast ogt float %1378, %22
-  br i1 %2034, label %2035, label %2048
-
-; <label>:2035                                    ; preds = %2033
-  %2036 = fsub fast float %1378, %22
-  %2037 = fdiv fast float %2036, %2018
-  %2038 = fptoui float %2037 to i32
-  %2039 = uitofp i32 %2038 to float
-  %2040 = fmul fast float %2039, %2018
-  %2041 = fsub fast float %2036, %2040
-  %2042 = and i32 %2038, 1
-  %2043 = icmp eq i32 %2042, 0
-  br i1 %2043, label %2044, label %2046
-
-; <label>:2044                                    ; preds = %2035
-  %2045 = fsub fast float %22, %2041
-  br label %2048
-
-; <label>:2046                                    ; preds = %2035
-  %2047 = fadd fast float %2041, %20
-  br label %2048
-
-; <label>:2048                                    ; preds = %2046, %2044, %2033, %2031, %2029
-  %2049 = phi float [ %2030, %2029 ], [ %2032, %2031 ], [ %2045, %2044 ], [ %2047, %2046 ], [ %1378, %2033 ]
-  %2050 = fptoui float %2049 to i32
-  %2051 = fsub fast float %24, %20
-  %2052 = fcmp fast olt float %938, %20
-  br i1 %2052, label %2053, label %2066
-
-; <label>:2053                                    ; preds = %2048
-  %2054 = fsub fast float %20, %938
-  %2055 = fdiv fast float %2054, %2051
-  %2056 = fptoui float %2055 to i32
-  %2057 = uitofp i32 %2056 to float
-  %2058 = fmul fast float %2057, %2051
-  %2059 = fsub fast float %2054, %2058
-  %2060 = and i32 %2056, 1
-  %2061 = icmp eq i32 %2060, 0
-  br i1 %2061, label %2062, label %2064
-
-; <label>:2062                                    ; preds = %2053
-  %2063 = fadd fast float %2059, %20
-  br label %2081
-
-; <label>:2064                                    ; preds = %2053
-  %2065 = fsub fast float %24, %2059
-  br label %2081
-
-; <label>:2066                                    ; preds = %2048
-  %2067 = fcmp fast ogt float %938, %24
-  br i1 %2067, label %2068, label %2081
-
-; <label>:2068                                    ; preds = %2066
-  %2069 = fsub fast float %938, %24
-  %2070 = fdiv fast float %2069, %2051
-  %2071 = fptoui float %2070 to i32
-  %2072 = uitofp i32 %2071 to float
-  %2073 = fmul fast float %2072, %2051
-  %2074 = fsub fast float %2069, %2073
-  %2075 = and i32 %2071, 1
-  %2076 = icmp eq i32 %2075, 0
-  br i1 %2076, label %2077, label %2079
-
-; <label>:2077                                    ; preds = %2068
-  %2078 = fsub fast float %24, %2074
-  br label %2081
-
-; <label>:2079                                    ; preds = %2068
-  %2080 = fadd fast float %2074, %20
-  br label %2081
-
-; <label>:2081                                    ; preds = %2079, %2077, %2066, %2064, %2062
-  %2082 = phi float [ %2063, %2062 ], [ %2065, %2064 ], [ %2078, %2077 ], [ %2080, %2079 ], [ %938, %2066 ]
-  %2083 = fptoui float %2082 to i32
-  %2084 = uitofp i32 %2083 to float
-  %2085 = uitofp i32 %2050 to float
-  %2086 = fptoui float %45 to i32
-  %2087 = fptoui float %182 to i32
-  %2088 = fptoui float %2084 to i32
-  %2089 = fptoui float %2085 to i32
-  %2090 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2091 = extractvalue %dx.types.CBufRet.i32 %2090, 0
-  %2092 = extractvalue %dx.types.CBufRet.i32 %2090, 1
-  %2093 = extractvalue %dx.types.CBufRet.i32 %2090, 2
-  %2094 = extractvalue %dx.types.CBufRet.i32 %2090, 3
-  %2095 = mul i32 %2091, %2086
-  %2096 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2087, i32 %2092, i32 %2095)  ; IMad(a,b,c)
-  %2097 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2088, i32 %2093, i32 %2096)  ; IMad(a,b,c)
-  %2098 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2089, i32 %2094, i32 %2097)  ; IMad(a,b,c)
-  %2099 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2098, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2100 = extractvalue %dx.types.ResRet.i16 %2099, 0
-  %2101 = sitofp i16 %2100 to float
-  br label %2102
-
-; <label>:2102                                    ; preds = %2081, %2015, %1986, %1969, %1959
-  %2103 = phi float [ %1983, %1969 ], [ 0.000000e+00, %1959 ], [ %2014, %1986 ], [ %2101, %2081 ], [ 0.000000e+00, %2015 ]
-  %2104 = fadd fast float %938, 1.000000e+00
-  br i1 %941, label %2105, label %2130
-
-; <label>:2105                                    ; preds = %2102
-  %2106 = fcmp fast oge float %937, 0.000000e+00
-  %2107 = fptoui float %937 to i32
-  %2108 = icmp ult i32 %2107, %13
-  %2109 = and i1 %2106, %2108
-  %2110 = fcmp fast oge float %2104, 0.000000e+00
-  %2111 = and i1 %2110, %2109
-  %2112 = fptoui float %2104 to i32
-  %2113 = icmp ult i32 %2112, %15
-  %2114 = and i1 %2113, %2111
-  br i1 %2114, label %2115, label %2248
-
-; <label>:2115                                    ; preds = %2105
-  %2116 = fptoui float %45 to i32
-  %2117 = fptoui float %182 to i32
-  %2118 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2119 = extractvalue %dx.types.CBufRet.i32 %2118, 0
-  %2120 = extractvalue %dx.types.CBufRet.i32 %2118, 1
-  %2121 = extractvalue %dx.types.CBufRet.i32 %2118, 2
-  %2122 = extractvalue %dx.types.CBufRet.i32 %2118, 3
-  %2123 = mul i32 %2119, %2116
-  %2124 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2117, i32 %2120, i32 %2123)  ; IMad(a,b,c)
-  %2125 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2112, i32 %2121, i32 %2124)  ; IMad(a,b,c)
-  %2126 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2107, i32 %2122, i32 %2125)  ; IMad(a,b,c)
-  %2127 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2126, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2128 = extractvalue %dx.types.ResRet.i16 %2127, 0
-  %2129 = sitofp i16 %2128 to float
-  br label %2248
-
-; <label>:2130                                    ; preds = %2102
-  %2131 = icmp eq i32 %940, 1
-  br i1 %2131, label %2132, label %2161
-
-; <label>:2132                                    ; preds = %2130
-  %2133 = add i32 %13, -1
-  %2134 = uitofp i32 %2133 to float
-  %2135 = call float @dx.op.binary.f32(i32 35, float %937, float 0.000000e+00)  ; FMax(a,b)
-  %2136 = call float @dx.op.binary.f32(i32 36, float %2135, float %2134)  ; FMin(a,b)
-  %2137 = fptoui float %2136 to i32
-  %2138 = add i32 %15, -1
-  %2139 = uitofp i32 %2138 to float
-  %2140 = call float @dx.op.binary.f32(i32 35, float %2104, float 0.000000e+00)  ; FMax(a,b)
-  %2141 = call float @dx.op.binary.f32(i32 36, float %2140, float %2139)  ; FMin(a,b)
-  %2142 = fptoui float %2141 to i32
-  %2143 = uitofp i32 %2142 to float
-  %2144 = uitofp i32 %2137 to float
-  %2145 = fptoui float %45 to i32
-  %2146 = fptoui float %182 to i32
-  %2147 = fptoui float %2143 to i32
-  %2148 = fptoui float %2144 to i32
-  %2149 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2150 = extractvalue %dx.types.CBufRet.i32 %2149, 0
-  %2151 = extractvalue %dx.types.CBufRet.i32 %2149, 1
-  %2152 = extractvalue %dx.types.CBufRet.i32 %2149, 2
-  %2153 = extractvalue %dx.types.CBufRet.i32 %2149, 3
-  %2154 = mul i32 %2150, %2145
-  %2155 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2146, i32 %2151, i32 %2154)  ; IMad(a,b,c)
-  %2156 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2147, i32 %2152, i32 %2155)  ; IMad(a,b,c)
-  %2157 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2148, i32 %2153, i32 %2156)  ; IMad(a,b,c)
-  %2158 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2157, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2159 = extractvalue %dx.types.ResRet.i16 %2158, 0
-  %2160 = sitofp i16 %2159 to float
-  br label %2248
-
-; <label>:2161                                    ; preds = %2130
-  %2162 = icmp eq i32 %940, 2
-  br i1 %2162, label %2163, label %2248
-
-; <label>:2163                                    ; preds = %2161
-  %2164 = fsub fast float %22, %20
-  %2165 = fcmp fast olt float %937, %20
-  br i1 %2165, label %2166, label %2179
-
-; <label>:2166                                    ; preds = %2163
-  %2167 = fsub fast float %20, %937
-  %2168 = fdiv fast float %2167, %2164
-  %2169 = fptoui float %2168 to i32
-  %2170 = uitofp i32 %2169 to float
-  %2171 = fmul fast float %2170, %2164
-  %2172 = fsub fast float %2167, %2171
-  %2173 = and i32 %2169, 1
-  %2174 = icmp eq i32 %2173, 0
-  br i1 %2174, label %2175, label %2177
-
-; <label>:2175                                    ; preds = %2166
-  %2176 = fadd fast float %2172, %20
-  br label %2194
-
-; <label>:2177                                    ; preds = %2166
-  %2178 = fsub fast float %22, %2172
-  br label %2194
-
-; <label>:2179                                    ; preds = %2163
-  %2180 = fcmp fast ogt float %937, %22
-  br i1 %2180, label %2181, label %2194
-
-; <label>:2181                                    ; preds = %2179
-  %2182 = fsub fast float %937, %22
-  %2183 = fdiv fast float %2182, %2164
-  %2184 = fptoui float %2183 to i32
-  %2185 = uitofp i32 %2184 to float
-  %2186 = fmul fast float %2185, %2164
-  %2187 = fsub fast float %2182, %2186
-  %2188 = and i32 %2184, 1
-  %2189 = icmp eq i32 %2188, 0
-  br i1 %2189, label %2190, label %2192
-
-; <label>:2190                                    ; preds = %2181
-  %2191 = fsub fast float %22, %2187
-  br label %2194
-
-; <label>:2192                                    ; preds = %2181
-  %2193 = fadd fast float %2187, %20
-  br label %2194
-
-; <label>:2194                                    ; preds = %2192, %2190, %2179, %2177, %2175
-  %2195 = phi float [ %2176, %2175 ], [ %2178, %2177 ], [ %2191, %2190 ], [ %2193, %2192 ], [ %937, %2179 ]
-  %2196 = fptoui float %2195 to i32
-  %2197 = fsub fast float %24, %20
-  %2198 = fcmp fast olt float %2104, %20
-  br i1 %2198, label %2199, label %2212
-
-; <label>:2199                                    ; preds = %2194
-  %2200 = fsub fast float %20, %2104
-  %2201 = fdiv fast float %2200, %2197
-  %2202 = fptoui float %2201 to i32
-  %2203 = uitofp i32 %2202 to float
-  %2204 = fmul fast float %2203, %2197
-  %2205 = fsub fast float %2200, %2204
-  %2206 = and i32 %2202, 1
-  %2207 = icmp eq i32 %2206, 0
-  br i1 %2207, label %2208, label %2210
-
-; <label>:2208                                    ; preds = %2199
-  %2209 = fadd fast float %2205, %20
-  br label %2227
-
-; <label>:2210                                    ; preds = %2199
-  %2211 = fsub fast float %24, %2205
-  br label %2227
-
-; <label>:2212                                    ; preds = %2194
-  %2213 = fcmp fast ogt float %2104, %24
-  br i1 %2213, label %2214, label %2227
-
-; <label>:2214                                    ; preds = %2212
-  %2215 = fsub fast float %2104, %24
-  %2216 = fdiv fast float %2215, %2197
-  %2217 = fptoui float %2216 to i32
-  %2218 = uitofp i32 %2217 to float
-  %2219 = fmul fast float %2218, %2197
-  %2220 = fsub fast float %2215, %2219
-  %2221 = and i32 %2217, 1
-  %2222 = icmp eq i32 %2221, 0
-  br i1 %2222, label %2223, label %2225
-
-; <label>:2223                                    ; preds = %2214
-  %2224 = fsub fast float %24, %2220
-  br label %2227
-
-; <label>:2225                                    ; preds = %2214
-  %2226 = fadd fast float %2220, %20
-  br label %2227
-
-; <label>:2227                                    ; preds = %2225, %2223, %2212, %2210, %2208
-  %2228 = phi float [ %2209, %2208 ], [ %2211, %2210 ], [ %2224, %2223 ], [ %2226, %2225 ], [ %2104, %2212 ]
-  %2229 = fptoui float %2228 to i32
-  %2230 = uitofp i32 %2229 to float
-  %2231 = uitofp i32 %2196 to float
-  %2232 = fptoui float %45 to i32
-  %2233 = fptoui float %182 to i32
-  %2234 = fptoui float %2230 to i32
-  %2235 = fptoui float %2231 to i32
-  %2236 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2237 = extractvalue %dx.types.CBufRet.i32 %2236, 0
-  %2238 = extractvalue %dx.types.CBufRet.i32 %2236, 1
-  %2239 = extractvalue %dx.types.CBufRet.i32 %2236, 2
-  %2240 = extractvalue %dx.types.CBufRet.i32 %2236, 3
-  %2241 = mul i32 %2237, %2232
-  %2242 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2233, i32 %2238, i32 %2241)  ; IMad(a,b,c)
-  %2243 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2234, i32 %2239, i32 %2242)  ; IMad(a,b,c)
-  %2244 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2235, i32 %2240, i32 %2243)  ; IMad(a,b,c)
-  %2245 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2244, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2246 = extractvalue %dx.types.ResRet.i16 %2245, 0
-  %2247 = sitofp i16 %2246 to float
-  br label %2248
-
-; <label>:2248                                    ; preds = %2227, %2161, %2132, %2115, %2105
-  %2249 = phi float [ %2129, %2115 ], [ 0.000000e+00, %2105 ], [ %2160, %2132 ], [ %2247, %2227 ], [ 0.000000e+00, %2161 ]
-  br i1 %941, label %2250, label %2275
-
-; <label>:2250                                    ; preds = %2248
-  %2251 = fcmp fast oge float %936, 0.000000e+00
-  %2252 = fptoui float %936 to i32
-  %2253 = icmp ult i32 %2252, %13
-  %2254 = and i1 %2251, %2253
-  %2255 = fcmp fast oge float %2104, 0.000000e+00
-  %2256 = and i1 %2255, %2254
-  %2257 = fptoui float %2104 to i32
-  %2258 = icmp ult i32 %2257, %15
-  %2259 = and i1 %2258, %2256
-  br i1 %2259, label %2260, label %2393
-
-; <label>:2260                                    ; preds = %2250
-  %2261 = fptoui float %45 to i32
-  %2262 = fptoui float %182 to i32
-  %2263 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2264 = extractvalue %dx.types.CBufRet.i32 %2263, 0
-  %2265 = extractvalue %dx.types.CBufRet.i32 %2263, 1
-  %2266 = extractvalue %dx.types.CBufRet.i32 %2263, 2
-  %2267 = extractvalue %dx.types.CBufRet.i32 %2263, 3
-  %2268 = mul i32 %2264, %2261
-  %2269 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2262, i32 %2265, i32 %2268)  ; IMad(a,b,c)
-  %2270 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2257, i32 %2266, i32 %2269)  ; IMad(a,b,c)
-  %2271 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2252, i32 %2267, i32 %2270)  ; IMad(a,b,c)
-  %2272 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2271, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2273 = extractvalue %dx.types.ResRet.i16 %2272, 0
-  %2274 = sitofp i16 %2273 to float
-  br label %2393
-
-; <label>:2275                                    ; preds = %2248
-  %2276 = icmp eq i32 %940, 1
-  br i1 %2276, label %2277, label %2306
-
-; <label>:2277                                    ; preds = %2275
-  %2278 = add i32 %13, -1
-  %2279 = uitofp i32 %2278 to float
-  %2280 = call float @dx.op.binary.f32(i32 35, float %936, float 0.000000e+00)  ; FMax(a,b)
-  %2281 = call float @dx.op.binary.f32(i32 36, float %2280, float %2279)  ; FMin(a,b)
-  %2282 = fptoui float %2281 to i32
-  %2283 = add i32 %15, -1
-  %2284 = uitofp i32 %2283 to float
-  %2285 = call float @dx.op.binary.f32(i32 35, float %2104, float 0.000000e+00)  ; FMax(a,b)
-  %2286 = call float @dx.op.binary.f32(i32 36, float %2285, float %2284)  ; FMin(a,b)
-  %2287 = fptoui float %2286 to i32
-  %2288 = uitofp i32 %2287 to float
-  %2289 = uitofp i32 %2282 to float
-  %2290 = fptoui float %45 to i32
-  %2291 = fptoui float %182 to i32
-  %2292 = fptoui float %2288 to i32
-  %2293 = fptoui float %2289 to i32
-  %2294 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2295 = extractvalue %dx.types.CBufRet.i32 %2294, 0
-  %2296 = extractvalue %dx.types.CBufRet.i32 %2294, 1
-  %2297 = extractvalue %dx.types.CBufRet.i32 %2294, 2
-  %2298 = extractvalue %dx.types.CBufRet.i32 %2294, 3
-  %2299 = mul i32 %2295, %2290
-  %2300 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2291, i32 %2296, i32 %2299)  ; IMad(a,b,c)
-  %2301 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2292, i32 %2297, i32 %2300)  ; IMad(a,b,c)
-  %2302 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2293, i32 %2298, i32 %2301)  ; IMad(a,b,c)
-  %2303 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2302, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2304 = extractvalue %dx.types.ResRet.i16 %2303, 0
-  %2305 = sitofp i16 %2304 to float
-  br label %2393
-
-; <label>:2306                                    ; preds = %2275
-  %2307 = icmp eq i32 %940, 2
-  br i1 %2307, label %2308, label %2393
-
-; <label>:2308                                    ; preds = %2306
-  %2309 = fsub fast float %22, %20
-  %2310 = fcmp fast olt float %936, %20
-  br i1 %2310, label %2311, label %2324
-
-; <label>:2311                                    ; preds = %2308
-  %2312 = fsub fast float %20, %936
-  %2313 = fdiv fast float %2312, %2309
-  %2314 = fptoui float %2313 to i32
-  %2315 = uitofp i32 %2314 to float
-  %2316 = fmul fast float %2315, %2309
-  %2317 = fsub fast float %2312, %2316
-  %2318 = and i32 %2314, 1
-  %2319 = icmp eq i32 %2318, 0
-  br i1 %2319, label %2320, label %2322
-
-; <label>:2320                                    ; preds = %2311
-  %2321 = fadd fast float %2317, %20
-  br label %2339
-
-; <label>:2322                                    ; preds = %2311
-  %2323 = fsub fast float %22, %2317
-  br label %2339
-
-; <label>:2324                                    ; preds = %2308
-  %2325 = fcmp fast ogt float %936, %22
-  br i1 %2325, label %2326, label %2339
-
-; <label>:2326                                    ; preds = %2324
-  %2327 = fsub fast float %936, %22
-  %2328 = fdiv fast float %2327, %2309
-  %2329 = fptoui float %2328 to i32
-  %2330 = uitofp i32 %2329 to float
-  %2331 = fmul fast float %2330, %2309
-  %2332 = fsub fast float %2327, %2331
-  %2333 = and i32 %2329, 1
-  %2334 = icmp eq i32 %2333, 0
-  br i1 %2334, label %2335, label %2337
-
-; <label>:2335                                    ; preds = %2326
-  %2336 = fsub fast float %22, %2332
-  br label %2339
-
-; <label>:2337                                    ; preds = %2326
-  %2338 = fadd fast float %2332, %20
-  br label %2339
-
-; <label>:2339                                    ; preds = %2337, %2335, %2324, %2322, %2320
-  %2340 = phi float [ %2321, %2320 ], [ %2323, %2322 ], [ %2336, %2335 ], [ %2338, %2337 ], [ %936, %2324 ]
-  %2341 = fptoui float %2340 to i32
-  %2342 = fsub fast float %24, %20
-  %2343 = fcmp fast olt float %2104, %20
-  br i1 %2343, label %2344, label %2357
-
-; <label>:2344                                    ; preds = %2339
-  %2345 = fsub fast float %20, %2104
-  %2346 = fdiv fast float %2345, %2342
-  %2347 = fptoui float %2346 to i32
-  %2348 = uitofp i32 %2347 to float
-  %2349 = fmul fast float %2348, %2342
-  %2350 = fsub fast float %2345, %2349
-  %2351 = and i32 %2347, 1
-  %2352 = icmp eq i32 %2351, 0
-  br i1 %2352, label %2353, label %2355
-
-; <label>:2353                                    ; preds = %2344
-  %2354 = fadd fast float %2350, %20
-  br label %2372
-
-; <label>:2355                                    ; preds = %2344
-  %2356 = fsub fast float %24, %2350
-  br label %2372
-
-; <label>:2357                                    ; preds = %2339
-  %2358 = fcmp fast ogt float %2104, %24
-  br i1 %2358, label %2359, label %2372
-
-; <label>:2359                                    ; preds = %2357
-  %2360 = fsub fast float %2104, %24
-  %2361 = fdiv fast float %2360, %2342
-  %2362 = fptoui float %2361 to i32
-  %2363 = uitofp i32 %2362 to float
-  %2364 = fmul fast float %2363, %2342
-  %2365 = fsub fast float %2360, %2364
-  %2366 = and i32 %2362, 1
-  %2367 = icmp eq i32 %2366, 0
-  br i1 %2367, label %2368, label %2370
-
-; <label>:2368                                    ; preds = %2359
-  %2369 = fsub fast float %24, %2365
-  br label %2372
-
-; <label>:2370                                    ; preds = %2359
-  %2371 = fadd fast float %2365, %20
-  br label %2372
-
-; <label>:2372                                    ; preds = %2370, %2368, %2357, %2355, %2353
-  %2373 = phi float [ %2354, %2353 ], [ %2356, %2355 ], [ %2369, %2368 ], [ %2371, %2370 ], [ %2104, %2357 ]
-  %2374 = fptoui float %2373 to i32
-  %2375 = uitofp i32 %2374 to float
-  %2376 = uitofp i32 %2341 to float
-  %2377 = fptoui float %45 to i32
-  %2378 = fptoui float %182 to i32
-  %2379 = fptoui float %2375 to i32
-  %2380 = fptoui float %2376 to i32
-  %2381 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2382 = extractvalue %dx.types.CBufRet.i32 %2381, 0
-  %2383 = extractvalue %dx.types.CBufRet.i32 %2381, 1
-  %2384 = extractvalue %dx.types.CBufRet.i32 %2381, 2
-  %2385 = extractvalue %dx.types.CBufRet.i32 %2381, 3
-  %2386 = mul i32 %2382, %2377
-  %2387 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2378, i32 %2383, i32 %2386)  ; IMad(a,b,c)
-  %2388 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2379, i32 %2384, i32 %2387)  ; IMad(a,b,c)
-  %2389 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2380, i32 %2385, i32 %2388)  ; IMad(a,b,c)
-  %2390 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2389, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2391 = extractvalue %dx.types.ResRet.i16 %2390, 0
-  %2392 = sitofp i16 %2391 to float
-  br label %2393
-
-; <label>:2393                                    ; preds = %2372, %2306, %2277, %2260, %2250
-  %2394 = phi float [ %2274, %2260 ], [ 0.000000e+00, %2250 ], [ %2305, %2277 ], [ %2392, %2372 ], [ 0.000000e+00, %2306 ]
-  br i1 %941, label %2395, label %2420
-
-; <label>:2395                                    ; preds = %2393
-  %2396 = fcmp fast oge float %1232, 0.000000e+00
-  %2397 = fptoui float %1232 to i32
-  %2398 = icmp ult i32 %2397, %13
-  %2399 = and i1 %2396, %2398
-  %2400 = fcmp fast oge float %2104, 0.000000e+00
-  %2401 = and i1 %2400, %2399
-  %2402 = fptoui float %2104 to i32
-  %2403 = icmp ult i32 %2402, %15
-  %2404 = and i1 %2403, %2401
-  br i1 %2404, label %2405, label %2538
-
-; <label>:2405                                    ; preds = %2395
-  %2406 = fptoui float %45 to i32
-  %2407 = fptoui float %182 to i32
-  %2408 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2409 = extractvalue %dx.types.CBufRet.i32 %2408, 0
-  %2410 = extractvalue %dx.types.CBufRet.i32 %2408, 1
-  %2411 = extractvalue %dx.types.CBufRet.i32 %2408, 2
-  %2412 = extractvalue %dx.types.CBufRet.i32 %2408, 3
-  %2413 = mul i32 %2409, %2406
-  %2414 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2407, i32 %2410, i32 %2413)  ; IMad(a,b,c)
-  %2415 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2402, i32 %2411, i32 %2414)  ; IMad(a,b,c)
-  %2416 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2397, i32 %2412, i32 %2415)  ; IMad(a,b,c)
-  %2417 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2416, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2418 = extractvalue %dx.types.ResRet.i16 %2417, 0
-  %2419 = sitofp i16 %2418 to float
-  br label %2538
-
-; <label>:2420                                    ; preds = %2393
-  %2421 = icmp eq i32 %940, 1
-  br i1 %2421, label %2422, label %2451
-
-; <label>:2422                                    ; preds = %2420
-  %2423 = add i32 %13, -1
-  %2424 = uitofp i32 %2423 to float
-  %2425 = call float @dx.op.binary.f32(i32 35, float %1232, float 0.000000e+00)  ; FMax(a,b)
-  %2426 = call float @dx.op.binary.f32(i32 36, float %2425, float %2424)  ; FMin(a,b)
-  %2427 = fptoui float %2426 to i32
-  %2428 = add i32 %15, -1
-  %2429 = uitofp i32 %2428 to float
-  %2430 = call float @dx.op.binary.f32(i32 35, float %2104, float 0.000000e+00)  ; FMax(a,b)
-  %2431 = call float @dx.op.binary.f32(i32 36, float %2430, float %2429)  ; FMin(a,b)
-  %2432 = fptoui float %2431 to i32
-  %2433 = uitofp i32 %2432 to float
-  %2434 = uitofp i32 %2427 to float
-  %2435 = fptoui float %45 to i32
-  %2436 = fptoui float %182 to i32
-  %2437 = fptoui float %2433 to i32
-  %2438 = fptoui float %2434 to i32
-  %2439 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2440 = extractvalue %dx.types.CBufRet.i32 %2439, 0
-  %2441 = extractvalue %dx.types.CBufRet.i32 %2439, 1
-  %2442 = extractvalue %dx.types.CBufRet.i32 %2439, 2
-  %2443 = extractvalue %dx.types.CBufRet.i32 %2439, 3
-  %2444 = mul i32 %2440, %2435
-  %2445 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2436, i32 %2441, i32 %2444)  ; IMad(a,b,c)
-  %2446 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2437, i32 %2442, i32 %2445)  ; IMad(a,b,c)
-  %2447 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2438, i32 %2443, i32 %2446)  ; IMad(a,b,c)
-  %2448 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2447, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2449 = extractvalue %dx.types.ResRet.i16 %2448, 0
-  %2450 = sitofp i16 %2449 to float
-  br label %2538
-
-; <label>:2451                                    ; preds = %2420
-  %2452 = icmp eq i32 %940, 2
-  br i1 %2452, label %2453, label %2538
-
-; <label>:2453                                    ; preds = %2451
-  %2454 = fsub fast float %22, %20
-  %2455 = fcmp fast olt float %1232, %20
-  br i1 %2455, label %2456, label %2469
-
-; <label>:2456                                    ; preds = %2453
-  %2457 = fsub fast float %20, %1232
-  %2458 = fdiv fast float %2457, %2454
-  %2459 = fptoui float %2458 to i32
-  %2460 = uitofp i32 %2459 to float
-  %2461 = fmul fast float %2460, %2454
-  %2462 = fsub fast float %2457, %2461
-  %2463 = and i32 %2459, 1
-  %2464 = icmp eq i32 %2463, 0
-  br i1 %2464, label %2465, label %2467
-
-; <label>:2465                                    ; preds = %2456
-  %2466 = fadd fast float %2462, %20
-  br label %2484
-
-; <label>:2467                                    ; preds = %2456
-  %2468 = fsub fast float %22, %2462
-  br label %2484
-
-; <label>:2469                                    ; preds = %2453
-  %2470 = fcmp fast ogt float %1232, %22
-  br i1 %2470, label %2471, label %2484
-
-; <label>:2471                                    ; preds = %2469
-  %2472 = fsub fast float %1232, %22
-  %2473 = fdiv fast float %2472, %2454
-  %2474 = fptoui float %2473 to i32
-  %2475 = uitofp i32 %2474 to float
-  %2476 = fmul fast float %2475, %2454
-  %2477 = fsub fast float %2472, %2476
-  %2478 = and i32 %2474, 1
-  %2479 = icmp eq i32 %2478, 0
-  br i1 %2479, label %2480, label %2482
-
-; <label>:2480                                    ; preds = %2471
-  %2481 = fsub fast float %22, %2477
-  br label %2484
-
-; <label>:2482                                    ; preds = %2471
-  %2483 = fadd fast float %2477, %20
-  br label %2484
-
-; <label>:2484                                    ; preds = %2482, %2480, %2469, %2467, %2465
-  %2485 = phi float [ %2466, %2465 ], [ %2468, %2467 ], [ %2481, %2480 ], [ %2483, %2482 ], [ %1232, %2469 ]
-  %2486 = fptoui float %2485 to i32
-  %2487 = fsub fast float %24, %20
-  %2488 = fcmp fast olt float %2104, %20
-  br i1 %2488, label %2489, label %2502
-
-; <label>:2489                                    ; preds = %2484
-  %2490 = fsub fast float %20, %2104
-  %2491 = fdiv fast float %2490, %2487
-  %2492 = fptoui float %2491 to i32
-  %2493 = uitofp i32 %2492 to float
-  %2494 = fmul fast float %2493, %2487
-  %2495 = fsub fast float %2490, %2494
-  %2496 = and i32 %2492, 1
-  %2497 = icmp eq i32 %2496, 0
-  br i1 %2497, label %2498, label %2500
-
-; <label>:2498                                    ; preds = %2489
-  %2499 = fadd fast float %2495, %20
-  br label %2517
-
-; <label>:2500                                    ; preds = %2489
-  %2501 = fsub fast float %24, %2495
-  br label %2517
-
-; <label>:2502                                    ; preds = %2484
-  %2503 = fcmp fast ogt float %2104, %24
-  br i1 %2503, label %2504, label %2517
-
-; <label>:2504                                    ; preds = %2502
-  %2505 = fsub fast float %2104, %24
-  %2506 = fdiv fast float %2505, %2487
-  %2507 = fptoui float %2506 to i32
-  %2508 = uitofp i32 %2507 to float
-  %2509 = fmul fast float %2508, %2487
-  %2510 = fsub fast float %2505, %2509
-  %2511 = and i32 %2507, 1
-  %2512 = icmp eq i32 %2511, 0
-  br i1 %2512, label %2513, label %2515
-
-; <label>:2513                                    ; preds = %2504
-  %2514 = fsub fast float %24, %2510
-  br label %2517
-
-; <label>:2515                                    ; preds = %2504
-  %2516 = fadd fast float %2510, %20
-  br label %2517
-
-; <label>:2517                                    ; preds = %2515, %2513, %2502, %2500, %2498
-  %2518 = phi float [ %2499, %2498 ], [ %2501, %2500 ], [ %2514, %2513 ], [ %2516, %2515 ], [ %2104, %2502 ]
-  %2519 = fptoui float %2518 to i32
-  %2520 = uitofp i32 %2519 to float
-  %2521 = uitofp i32 %2486 to float
-  %2522 = fptoui float %45 to i32
-  %2523 = fptoui float %182 to i32
-  %2524 = fptoui float %2520 to i32
-  %2525 = fptoui float %2521 to i32
-  %2526 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2527 = extractvalue %dx.types.CBufRet.i32 %2526, 0
-  %2528 = extractvalue %dx.types.CBufRet.i32 %2526, 1
-  %2529 = extractvalue %dx.types.CBufRet.i32 %2526, 2
-  %2530 = extractvalue %dx.types.CBufRet.i32 %2526, 3
-  %2531 = mul i32 %2527, %2522
-  %2532 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2523, i32 %2528, i32 %2531)  ; IMad(a,b,c)
-  %2533 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2524, i32 %2529, i32 %2532)  ; IMad(a,b,c)
-  %2534 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2525, i32 %2530, i32 %2533)  ; IMad(a,b,c)
-  %2535 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2534, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2536 = extractvalue %dx.types.ResRet.i16 %2535, 0
-  %2537 = sitofp i16 %2536 to float
-  br label %2538
-
-; <label>:2538                                    ; preds = %2517, %2451, %2422, %2405, %2395
-  %2539 = phi float [ %2419, %2405 ], [ 0.000000e+00, %2395 ], [ %2450, %2422 ], [ %2537, %2517 ], [ 0.000000e+00, %2451 ]
-  br i1 %941, label %2540, label %2565
-
-; <label>:2540                                    ; preds = %2538
-  %2541 = fcmp fast oge float %1378, 0.000000e+00
-  %2542 = fptoui float %1378 to i32
-  %2543 = icmp ult i32 %2542, %13
-  %2544 = and i1 %2541, %2543
-  %2545 = fcmp fast oge float %2104, 0.000000e+00
-  %2546 = and i1 %2545, %2544
-  %2547 = fptoui float %2104 to i32
-  %2548 = icmp ult i32 %2547, %15
-  %2549 = and i1 %2548, %2546
-  br i1 %2549, label %2550, label %2683
-
-; <label>:2550                                    ; preds = %2540
-  %2551 = fptoui float %45 to i32
-  %2552 = fptoui float %182 to i32
-  %2553 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2554 = extractvalue %dx.types.CBufRet.i32 %2553, 0
-  %2555 = extractvalue %dx.types.CBufRet.i32 %2553, 1
-  %2556 = extractvalue %dx.types.CBufRet.i32 %2553, 2
-  %2557 = extractvalue %dx.types.CBufRet.i32 %2553, 3
-  %2558 = mul i32 %2554, %2551
-  %2559 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2552, i32 %2555, i32 %2558)  ; IMad(a,b,c)
-  %2560 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2547, i32 %2556, i32 %2559)  ; IMad(a,b,c)
-  %2561 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2542, i32 %2557, i32 %2560)  ; IMad(a,b,c)
-  %2562 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2561, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2563 = extractvalue %dx.types.ResRet.i16 %2562, 0
-  %2564 = sitofp i16 %2563 to float
-  br label %2683
-
-; <label>:2565                                    ; preds = %2538
-  %2566 = icmp eq i32 %940, 1
-  br i1 %2566, label %2567, label %2596
-
-; <label>:2567                                    ; preds = %2565
-  %2568 = add i32 %13, -1
-  %2569 = uitofp i32 %2568 to float
-  %2570 = call float @dx.op.binary.f32(i32 35, float %1378, float 0.000000e+00)  ; FMax(a,b)
-  %2571 = call float @dx.op.binary.f32(i32 36, float %2570, float %2569)  ; FMin(a,b)
-  %2572 = fptoui float %2571 to i32
-  %2573 = add i32 %15, -1
-  %2574 = uitofp i32 %2573 to float
-  %2575 = call float @dx.op.binary.f32(i32 35, float %2104, float 0.000000e+00)  ; FMax(a,b)
-  %2576 = call float @dx.op.binary.f32(i32 36, float %2575, float %2574)  ; FMin(a,b)
-  %2577 = fptoui float %2576 to i32
-  %2578 = uitofp i32 %2577 to float
-  %2579 = uitofp i32 %2572 to float
-  %2580 = fptoui float %45 to i32
-  %2581 = fptoui float %182 to i32
-  %2582 = fptoui float %2578 to i32
-  %2583 = fptoui float %2579 to i32
-  %2584 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2585 = extractvalue %dx.types.CBufRet.i32 %2584, 0
-  %2586 = extractvalue %dx.types.CBufRet.i32 %2584, 1
-  %2587 = extractvalue %dx.types.CBufRet.i32 %2584, 2
-  %2588 = extractvalue %dx.types.CBufRet.i32 %2584, 3
-  %2589 = mul i32 %2585, %2580
-  %2590 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2581, i32 %2586, i32 %2589)  ; IMad(a,b,c)
-  %2591 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2582, i32 %2587, i32 %2590)  ; IMad(a,b,c)
-  %2592 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2583, i32 %2588, i32 %2591)  ; IMad(a,b,c)
-  %2593 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2592, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2594 = extractvalue %dx.types.ResRet.i16 %2593, 0
-  %2595 = sitofp i16 %2594 to float
-  br label %2683
-
-; <label>:2596                                    ; preds = %2565
-  %2597 = icmp eq i32 %940, 2
-  br i1 %2597, label %2598, label %2683
-
-; <label>:2598                                    ; preds = %2596
-  %2599 = fsub fast float %22, %20
-  %2600 = fcmp fast olt float %1378, %20
-  br i1 %2600, label %2601, label %2614
-
-; <label>:2601                                    ; preds = %2598
-  %2602 = fsub fast float %20, %1378
-  %2603 = fdiv fast float %2602, %2599
-  %2604 = fptoui float %2603 to i32
-  %2605 = uitofp i32 %2604 to float
-  %2606 = fmul fast float %2605, %2599
-  %2607 = fsub fast float %2602, %2606
-  %2608 = and i32 %2604, 1
-  %2609 = icmp eq i32 %2608, 0
-  br i1 %2609, label %2610, label %2612
-
-; <label>:2610                                    ; preds = %2601
-  %2611 = fadd fast float %2607, %20
-  br label %2629
-
-; <label>:2612                                    ; preds = %2601
-  %2613 = fsub fast float %22, %2607
-  br label %2629
-
-; <label>:2614                                    ; preds = %2598
-  %2615 = fcmp fast ogt float %1378, %22
-  br i1 %2615, label %2616, label %2629
-
-; <label>:2616                                    ; preds = %2614
-  %2617 = fsub fast float %1378, %22
-  %2618 = fdiv fast float %2617, %2599
-  %2619 = fptoui float %2618 to i32
-  %2620 = uitofp i32 %2619 to float
-  %2621 = fmul fast float %2620, %2599
-  %2622 = fsub fast float %2617, %2621
-  %2623 = and i32 %2619, 1
-  %2624 = icmp eq i32 %2623, 0
-  br i1 %2624, label %2625, label %2627
-
-; <label>:2625                                    ; preds = %2616
-  %2626 = fsub fast float %22, %2622
-  br label %2629
-
-; <label>:2627                                    ; preds = %2616
-  %2628 = fadd fast float %2622, %20
-  br label %2629
-
-; <label>:2629                                    ; preds = %2627, %2625, %2614, %2612, %2610
-  %2630 = phi float [ %2611, %2610 ], [ %2613, %2612 ], [ %2626, %2625 ], [ %2628, %2627 ], [ %1378, %2614 ]
-  %2631 = fptoui float %2630 to i32
-  %2632 = fsub fast float %24, %20
-  %2633 = fcmp fast olt float %2104, %20
-  br i1 %2633, label %2634, label %2647
-
-; <label>:2634                                    ; preds = %2629
-  %2635 = fsub fast float %20, %2104
-  %2636 = fdiv fast float %2635, %2632
-  %2637 = fptoui float %2636 to i32
-  %2638 = uitofp i32 %2637 to float
-  %2639 = fmul fast float %2638, %2632
-  %2640 = fsub fast float %2635, %2639
-  %2641 = and i32 %2637, 1
-  %2642 = icmp eq i32 %2641, 0
-  br i1 %2642, label %2643, label %2645
-
-; <label>:2643                                    ; preds = %2634
-  %2644 = fadd fast float %2640, %20
-  br label %2662
-
-; <label>:2645                                    ; preds = %2634
-  %2646 = fsub fast float %24, %2640
-  br label %2662
-
-; <label>:2647                                    ; preds = %2629
-  %2648 = fcmp fast ogt float %2104, %24
-  br i1 %2648, label %2649, label %2662
-
-; <label>:2649                                    ; preds = %2647
-  %2650 = fsub fast float %2104, %24
-  %2651 = fdiv fast float %2650, %2632
-  %2652 = fptoui float %2651 to i32
-  %2653 = uitofp i32 %2652 to float
-  %2654 = fmul fast float %2653, %2632
-  %2655 = fsub fast float %2650, %2654
-  %2656 = and i32 %2652, 1
-  %2657 = icmp eq i32 %2656, 0
-  br i1 %2657, label %2658, label %2660
-
-; <label>:2658                                    ; preds = %2649
-  %2659 = fsub fast float %24, %2655
-  br label %2662
-
-; <label>:2660                                    ; preds = %2649
-  %2661 = fadd fast float %2655, %20
-  br label %2662
-
-; <label>:2662                                    ; preds = %2660, %2658, %2647, %2645, %2643
-  %2663 = phi float [ %2644, %2643 ], [ %2646, %2645 ], [ %2659, %2658 ], [ %2661, %2660 ], [ %2104, %2647 ]
-  %2664 = fptoui float %2663 to i32
-  %2665 = uitofp i32 %2664 to float
-  %2666 = uitofp i32 %2631 to float
-  %2667 = fptoui float %45 to i32
-  %2668 = fptoui float %182 to i32
-  %2669 = fptoui float %2665 to i32
-  %2670 = fptoui float %2666 to i32
-  %2671 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2672 = extractvalue %dx.types.CBufRet.i32 %2671, 0
-  %2673 = extractvalue %dx.types.CBufRet.i32 %2671, 1
-  %2674 = extractvalue %dx.types.CBufRet.i32 %2671, 2
-  %2675 = extractvalue %dx.types.CBufRet.i32 %2671, 3
-  %2676 = mul i32 %2672, %2667
-  %2677 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2668, i32 %2673, i32 %2676)  ; IMad(a,b,c)
-  %2678 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2669, i32 %2674, i32 %2677)  ; IMad(a,b,c)
-  %2679 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2670, i32 %2675, i32 %2678)  ; IMad(a,b,c)
-  %2680 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2679, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2681 = extractvalue %dx.types.ResRet.i16 %2680, 0
-  %2682 = sitofp i16 %2681 to float
-  br label %2683
-
-; <label>:2683                                    ; preds = %2662, %2596, %2567, %2550, %2540
-  %2684 = phi float [ %2564, %2550 ], [ 0.000000e+00, %2540 ], [ %2595, %2567 ], [ %2682, %2662 ], [ 0.000000e+00, %2596 ]
-  %2685 = fadd fast float %938, 2.000000e+00
-  br i1 %941, label %2686, label %2711
-
-; <label>:2686                                    ; preds = %2683
-  %2687 = fcmp fast oge float %937, 0.000000e+00
-  %2688 = fptoui float %937 to i32
-  %2689 = icmp ult i32 %2688, %13
-  %2690 = and i1 %2687, %2689
-  %2691 = fcmp fast oge float %2685, 0.000000e+00
-  %2692 = and i1 %2691, %2690
-  %2693 = fptoui float %2685 to i32
-  %2694 = icmp ult i32 %2693, %15
-  %2695 = and i1 %2694, %2692
-  br i1 %2695, label %2696, label %2829
-
-; <label>:2696                                    ; preds = %2686
-  %2697 = fptoui float %45 to i32
-  %2698 = fptoui float %182 to i32
-  %2699 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2700 = extractvalue %dx.types.CBufRet.i32 %2699, 0
-  %2701 = extractvalue %dx.types.CBufRet.i32 %2699, 1
-  %2702 = extractvalue %dx.types.CBufRet.i32 %2699, 2
-  %2703 = extractvalue %dx.types.CBufRet.i32 %2699, 3
-  %2704 = mul i32 %2700, %2697
-  %2705 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2698, i32 %2701, i32 %2704)  ; IMad(a,b,c)
-  %2706 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2693, i32 %2702, i32 %2705)  ; IMad(a,b,c)
-  %2707 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2688, i32 %2703, i32 %2706)  ; IMad(a,b,c)
-  %2708 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2707, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2709 = extractvalue %dx.types.ResRet.i16 %2708, 0
-  %2710 = sitofp i16 %2709 to float
-  br label %2829
-
-; <label>:2711                                    ; preds = %2683
-  %2712 = icmp eq i32 %940, 1
-  br i1 %2712, label %2713, label %2742
-
-; <label>:2713                                    ; preds = %2711
-  %2714 = add i32 %13, -1
-  %2715 = uitofp i32 %2714 to float
-  %2716 = call float @dx.op.binary.f32(i32 35, float %937, float 0.000000e+00)  ; FMax(a,b)
-  %2717 = call float @dx.op.binary.f32(i32 36, float %2716, float %2715)  ; FMin(a,b)
-  %2718 = fptoui float %2717 to i32
-  %2719 = add i32 %15, -1
-  %2720 = uitofp i32 %2719 to float
-  %2721 = call float @dx.op.binary.f32(i32 35, float %2685, float 0.000000e+00)  ; FMax(a,b)
-  %2722 = call float @dx.op.binary.f32(i32 36, float %2721, float %2720)  ; FMin(a,b)
-  %2723 = fptoui float %2722 to i32
-  %2724 = uitofp i32 %2723 to float
-  %2725 = uitofp i32 %2718 to float
-  %2726 = fptoui float %45 to i32
-  %2727 = fptoui float %182 to i32
-  %2728 = fptoui float %2724 to i32
-  %2729 = fptoui float %2725 to i32
-  %2730 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2731 = extractvalue %dx.types.CBufRet.i32 %2730, 0
-  %2732 = extractvalue %dx.types.CBufRet.i32 %2730, 1
-  %2733 = extractvalue %dx.types.CBufRet.i32 %2730, 2
-  %2734 = extractvalue %dx.types.CBufRet.i32 %2730, 3
-  %2735 = mul i32 %2731, %2726
-  %2736 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2727, i32 %2732, i32 %2735)  ; IMad(a,b,c)
-  %2737 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2728, i32 %2733, i32 %2736)  ; IMad(a,b,c)
-  %2738 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2729, i32 %2734, i32 %2737)  ; IMad(a,b,c)
-  %2739 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2738, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2740 = extractvalue %dx.types.ResRet.i16 %2739, 0
-  %2741 = sitofp i16 %2740 to float
-  br label %2829
-
-; <label>:2742                                    ; preds = %2711
-  %2743 = icmp eq i32 %940, 2
-  br i1 %2743, label %2744, label %2829
-
-; <label>:2744                                    ; preds = %2742
-  %2745 = fsub fast float %22, %20
-  %2746 = fcmp fast olt float %937, %20
-  br i1 %2746, label %2747, label %2760
-
-; <label>:2747                                    ; preds = %2744
-  %2748 = fsub fast float %20, %937
-  %2749 = fdiv fast float %2748, %2745
-  %2750 = fptoui float %2749 to i32
-  %2751 = uitofp i32 %2750 to float
-  %2752 = fmul fast float %2751, %2745
-  %2753 = fsub fast float %2748, %2752
-  %2754 = and i32 %2750, 1
-  %2755 = icmp eq i32 %2754, 0
-  br i1 %2755, label %2756, label %2758
-
-; <label>:2756                                    ; preds = %2747
-  %2757 = fadd fast float %2753, %20
-  br label %2775
-
-; <label>:2758                                    ; preds = %2747
-  %2759 = fsub fast float %22, %2753
-  br label %2775
-
-; <label>:2760                                    ; preds = %2744
-  %2761 = fcmp fast ogt float %937, %22
-  br i1 %2761, label %2762, label %2775
-
-; <label>:2762                                    ; preds = %2760
-  %2763 = fsub fast float %937, %22
-  %2764 = fdiv fast float %2763, %2745
-  %2765 = fptoui float %2764 to i32
-  %2766 = uitofp i32 %2765 to float
-  %2767 = fmul fast float %2766, %2745
-  %2768 = fsub fast float %2763, %2767
-  %2769 = and i32 %2765, 1
-  %2770 = icmp eq i32 %2769, 0
-  br i1 %2770, label %2771, label %2773
-
-; <label>:2771                                    ; preds = %2762
-  %2772 = fsub fast float %22, %2768
-  br label %2775
-
-; <label>:2773                                    ; preds = %2762
-  %2774 = fadd fast float %2768, %20
-  br label %2775
-
-; <label>:2775                                    ; preds = %2773, %2771, %2760, %2758, %2756
-  %2776 = phi float [ %2757, %2756 ], [ %2759, %2758 ], [ %2772, %2771 ], [ %2774, %2773 ], [ %937, %2760 ]
-  %2777 = fptoui float %2776 to i32
-  %2778 = fsub fast float %24, %20
-  %2779 = fcmp fast olt float %2685, %20
-  br i1 %2779, label %2780, label %2793
-
-; <label>:2780                                    ; preds = %2775
-  %2781 = fsub fast float %20, %2685
-  %2782 = fdiv fast float %2781, %2778
-  %2783 = fptoui float %2782 to i32
-  %2784 = uitofp i32 %2783 to float
-  %2785 = fmul fast float %2784, %2778
-  %2786 = fsub fast float %2781, %2785
-  %2787 = and i32 %2783, 1
-  %2788 = icmp eq i32 %2787, 0
-  br i1 %2788, label %2789, label %2791
-
-; <label>:2789                                    ; preds = %2780
-  %2790 = fadd fast float %2786, %20
-  br label %2808
-
-; <label>:2791                                    ; preds = %2780
-  %2792 = fsub fast float %24, %2786
-  br label %2808
-
-; <label>:2793                                    ; preds = %2775
-  %2794 = fcmp fast ogt float %2685, %24
-  br i1 %2794, label %2795, label %2808
-
-; <label>:2795                                    ; preds = %2793
-  %2796 = fsub fast float %2685, %24
-  %2797 = fdiv fast float %2796, %2778
-  %2798 = fptoui float %2797 to i32
-  %2799 = uitofp i32 %2798 to float
-  %2800 = fmul fast float %2799, %2778
-  %2801 = fsub fast float %2796, %2800
-  %2802 = and i32 %2798, 1
-  %2803 = icmp eq i32 %2802, 0
-  br i1 %2803, label %2804, label %2806
-
-; <label>:2804                                    ; preds = %2795
-  %2805 = fsub fast float %24, %2801
-  br label %2808
-
-; <label>:2806                                    ; preds = %2795
-  %2807 = fadd fast float %2801, %20
-  br label %2808
-
-; <label>:2808                                    ; preds = %2806, %2804, %2793, %2791, %2789
-  %2809 = phi float [ %2790, %2789 ], [ %2792, %2791 ], [ %2805, %2804 ], [ %2807, %2806 ], [ %2685, %2793 ]
-  %2810 = fptoui float %2809 to i32
-  %2811 = uitofp i32 %2810 to float
-  %2812 = uitofp i32 %2777 to float
-  %2813 = fptoui float %45 to i32
-  %2814 = fptoui float %182 to i32
-  %2815 = fptoui float %2811 to i32
-  %2816 = fptoui float %2812 to i32
-  %2817 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2818 = extractvalue %dx.types.CBufRet.i32 %2817, 0
-  %2819 = extractvalue %dx.types.CBufRet.i32 %2817, 1
-  %2820 = extractvalue %dx.types.CBufRet.i32 %2817, 2
-  %2821 = extractvalue %dx.types.CBufRet.i32 %2817, 3
-  %2822 = mul i32 %2818, %2813
-  %2823 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2814, i32 %2819, i32 %2822)  ; IMad(a,b,c)
-  %2824 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2815, i32 %2820, i32 %2823)  ; IMad(a,b,c)
-  %2825 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2816, i32 %2821, i32 %2824)  ; IMad(a,b,c)
-  %2826 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2825, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2827 = extractvalue %dx.types.ResRet.i16 %2826, 0
-  %2828 = sitofp i16 %2827 to float
-  br label %2829
-
-; <label>:2829                                    ; preds = %2808, %2742, %2713, %2696, %2686
-  %2830 = phi float [ %2710, %2696 ], [ 0.000000e+00, %2686 ], [ %2741, %2713 ], [ %2828, %2808 ], [ 0.000000e+00, %2742 ]
-  br i1 %941, label %2831, label %2856
-
-; <label>:2831                                    ; preds = %2829
-  %2832 = fcmp fast oge float %936, 0.000000e+00
-  %2833 = fptoui float %936 to i32
-  %2834 = icmp ult i32 %2833, %13
-  %2835 = and i1 %2832, %2834
-  %2836 = fcmp fast oge float %2685, 0.000000e+00
-  %2837 = and i1 %2836, %2835
-  %2838 = fptoui float %2685 to i32
-  %2839 = icmp ult i32 %2838, %15
-  %2840 = and i1 %2839, %2837
-  br i1 %2840, label %2841, label %2974
-
-; <label>:2841                                    ; preds = %2831
-  %2842 = fptoui float %45 to i32
-  %2843 = fptoui float %182 to i32
-  %2844 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2845 = extractvalue %dx.types.CBufRet.i32 %2844, 0
-  %2846 = extractvalue %dx.types.CBufRet.i32 %2844, 1
-  %2847 = extractvalue %dx.types.CBufRet.i32 %2844, 2
-  %2848 = extractvalue %dx.types.CBufRet.i32 %2844, 3
-  %2849 = mul i32 %2845, %2842
-  %2850 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2843, i32 %2846, i32 %2849)  ; IMad(a,b,c)
-  %2851 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2838, i32 %2847, i32 %2850)  ; IMad(a,b,c)
-  %2852 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2833, i32 %2848, i32 %2851)  ; IMad(a,b,c)
-  %2853 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2852, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2854 = extractvalue %dx.types.ResRet.i16 %2853, 0
-  %2855 = sitofp i16 %2854 to float
-  br label %2974
-
-; <label>:2856                                    ; preds = %2829
-  %2857 = icmp eq i32 %940, 1
-  br i1 %2857, label %2858, label %2887
-
-; <label>:2858                                    ; preds = %2856
-  %2859 = add i32 %13, -1
-  %2860 = uitofp i32 %2859 to float
-  %2861 = call float @dx.op.binary.f32(i32 35, float %936, float 0.000000e+00)  ; FMax(a,b)
-  %2862 = call float @dx.op.binary.f32(i32 36, float %2861, float %2860)  ; FMin(a,b)
-  %2863 = fptoui float %2862 to i32
-  %2864 = add i32 %15, -1
-  %2865 = uitofp i32 %2864 to float
-  %2866 = call float @dx.op.binary.f32(i32 35, float %2685, float 0.000000e+00)  ; FMax(a,b)
-  %2867 = call float @dx.op.binary.f32(i32 36, float %2866, float %2865)  ; FMin(a,b)
-  %2868 = fptoui float %2867 to i32
-  %2869 = uitofp i32 %2868 to float
-  %2870 = uitofp i32 %2863 to float
-  %2871 = fptoui float %45 to i32
-  %2872 = fptoui float %182 to i32
-  %2873 = fptoui float %2869 to i32
-  %2874 = fptoui float %2870 to i32
-  %2875 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2876 = extractvalue %dx.types.CBufRet.i32 %2875, 0
-  %2877 = extractvalue %dx.types.CBufRet.i32 %2875, 1
-  %2878 = extractvalue %dx.types.CBufRet.i32 %2875, 2
-  %2879 = extractvalue %dx.types.CBufRet.i32 %2875, 3
-  %2880 = mul i32 %2876, %2871
-  %2881 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2872, i32 %2877, i32 %2880)  ; IMad(a,b,c)
-  %2882 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2873, i32 %2878, i32 %2881)  ; IMad(a,b,c)
-  %2883 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2874, i32 %2879, i32 %2882)  ; IMad(a,b,c)
-  %2884 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2883, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2885 = extractvalue %dx.types.ResRet.i16 %2884, 0
-  %2886 = sitofp i16 %2885 to float
-  br label %2974
-
-; <label>:2887                                    ; preds = %2856
-  %2888 = icmp eq i32 %940, 2
-  br i1 %2888, label %2889, label %2974
-
-; <label>:2889                                    ; preds = %2887
-  %2890 = fsub fast float %22, %20
-  %2891 = fcmp fast olt float %936, %20
-  br i1 %2891, label %2892, label %2905
-
-; <label>:2892                                    ; preds = %2889
-  %2893 = fsub fast float %20, %936
-  %2894 = fdiv fast float %2893, %2890
-  %2895 = fptoui float %2894 to i32
-  %2896 = uitofp i32 %2895 to float
-  %2897 = fmul fast float %2896, %2890
-  %2898 = fsub fast float %2893, %2897
-  %2899 = and i32 %2895, 1
-  %2900 = icmp eq i32 %2899, 0
-  br i1 %2900, label %2901, label %2903
-
-; <label>:2901                                    ; preds = %2892
-  %2902 = fadd fast float %2898, %20
-  br label %2920
-
-; <label>:2903                                    ; preds = %2892
-  %2904 = fsub fast float %22, %2898
-  br label %2920
-
-; <label>:2905                                    ; preds = %2889
-  %2906 = fcmp fast ogt float %936, %22
-  br i1 %2906, label %2907, label %2920
-
-; <label>:2907                                    ; preds = %2905
-  %2908 = fsub fast float %936, %22
-  %2909 = fdiv fast float %2908, %2890
-  %2910 = fptoui float %2909 to i32
-  %2911 = uitofp i32 %2910 to float
-  %2912 = fmul fast float %2911, %2890
-  %2913 = fsub fast float %2908, %2912
-  %2914 = and i32 %2910, 1
-  %2915 = icmp eq i32 %2914, 0
-  br i1 %2915, label %2916, label %2918
-
-; <label>:2916                                    ; preds = %2907
-  %2917 = fsub fast float %22, %2913
-  br label %2920
-
-; <label>:2918                                    ; preds = %2907
-  %2919 = fadd fast float %2913, %20
-  br label %2920
-
-; <label>:2920                                    ; preds = %2918, %2916, %2905, %2903, %2901
-  %2921 = phi float [ %2902, %2901 ], [ %2904, %2903 ], [ %2917, %2916 ], [ %2919, %2918 ], [ %936, %2905 ]
-  %2922 = fptoui float %2921 to i32
-  %2923 = fsub fast float %24, %20
-  %2924 = fcmp fast olt float %2685, %20
-  br i1 %2924, label %2925, label %2938
-
-; <label>:2925                                    ; preds = %2920
-  %2926 = fsub fast float %20, %2685
-  %2927 = fdiv fast float %2926, %2923
-  %2928 = fptoui float %2927 to i32
-  %2929 = uitofp i32 %2928 to float
-  %2930 = fmul fast float %2929, %2923
-  %2931 = fsub fast float %2926, %2930
-  %2932 = and i32 %2928, 1
-  %2933 = icmp eq i32 %2932, 0
-  br i1 %2933, label %2934, label %2936
-
-; <label>:2934                                    ; preds = %2925
-  %2935 = fadd fast float %2931, %20
-  br label %2953
-
-; <label>:2936                                    ; preds = %2925
-  %2937 = fsub fast float %24, %2931
-  br label %2953
-
-; <label>:2938                                    ; preds = %2920
-  %2939 = fcmp fast ogt float %2685, %24
-  br i1 %2939, label %2940, label %2953
-
-; <label>:2940                                    ; preds = %2938
-  %2941 = fsub fast float %2685, %24
-  %2942 = fdiv fast float %2941, %2923
-  %2943 = fptoui float %2942 to i32
-  %2944 = uitofp i32 %2943 to float
-  %2945 = fmul fast float %2944, %2923
-  %2946 = fsub fast float %2941, %2945
-  %2947 = and i32 %2943, 1
-  %2948 = icmp eq i32 %2947, 0
-  br i1 %2948, label %2949, label %2951
-
-; <label>:2949                                    ; preds = %2940
-  %2950 = fsub fast float %24, %2946
-  br label %2953
-
-; <label>:2951                                    ; preds = %2940
-  %2952 = fadd fast float %2946, %20
-  br label %2953
-
-; <label>:2953                                    ; preds = %2951, %2949, %2938, %2936, %2934
-  %2954 = phi float [ %2935, %2934 ], [ %2937, %2936 ], [ %2950, %2949 ], [ %2952, %2951 ], [ %2685, %2938 ]
-  %2955 = fptoui float %2954 to i32
-  %2956 = uitofp i32 %2955 to float
-  %2957 = uitofp i32 %2922 to float
-  %2958 = fptoui float %45 to i32
-  %2959 = fptoui float %182 to i32
-  %2960 = fptoui float %2956 to i32
-  %2961 = fptoui float %2957 to i32
-  %2962 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2963 = extractvalue %dx.types.CBufRet.i32 %2962, 0
-  %2964 = extractvalue %dx.types.CBufRet.i32 %2962, 1
-  %2965 = extractvalue %dx.types.CBufRet.i32 %2962, 2
-  %2966 = extractvalue %dx.types.CBufRet.i32 %2962, 3
-  %2967 = mul i32 %2963, %2958
-  %2968 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2959, i32 %2964, i32 %2967)  ; IMad(a,b,c)
-  %2969 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2960, i32 %2965, i32 %2968)  ; IMad(a,b,c)
-  %2970 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2961, i32 %2966, i32 %2969)  ; IMad(a,b,c)
-  %2971 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2970, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2972 = extractvalue %dx.types.ResRet.i16 %2971, 0
-  %2973 = sitofp i16 %2972 to float
-  br label %2974
-
-; <label>:2974                                    ; preds = %2953, %2887, %2858, %2841, %2831
-  %2975 = phi float [ %2855, %2841 ], [ 0.000000e+00, %2831 ], [ %2886, %2858 ], [ %2973, %2953 ], [ 0.000000e+00, %2887 ]
-  br i1 %941, label %2976, label %3001
-
-; <label>:2976                                    ; preds = %2974
-  %2977 = fcmp fast oge float %1232, 0.000000e+00
-  %2978 = fptoui float %1232 to i32
-  %2979 = icmp ult i32 %2978, %13
-  %2980 = and i1 %2977, %2979
-  %2981 = fcmp fast oge float %2685, 0.000000e+00
-  %2982 = and i1 %2981, %2980
-  %2983 = fptoui float %2685 to i32
-  %2984 = icmp ult i32 %2983, %15
-  %2985 = and i1 %2984, %2982
-  br i1 %2985, label %2986, label %3119
-
-; <label>:2986                                    ; preds = %2976
-  %2987 = fptoui float %45 to i32
-  %2988 = fptoui float %182 to i32
-  %2989 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2990 = extractvalue %dx.types.CBufRet.i32 %2989, 0
-  %2991 = extractvalue %dx.types.CBufRet.i32 %2989, 1
-  %2992 = extractvalue %dx.types.CBufRet.i32 %2989, 2
-  %2993 = extractvalue %dx.types.CBufRet.i32 %2989, 3
-  %2994 = mul i32 %2990, %2987
-  %2995 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2988, i32 %2991, i32 %2994)  ; IMad(a,b,c)
-  %2996 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2983, i32 %2992, i32 %2995)  ; IMad(a,b,c)
-  %2997 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2978, i32 %2993, i32 %2996)  ; IMad(a,b,c)
-  %2998 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2997, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2999 = extractvalue %dx.types.ResRet.i16 %2998, 0
-  %3000 = sitofp i16 %2999 to float
-  br label %3119
-
-; <label>:3001                                    ; preds = %2974
-  %3002 = icmp eq i32 %940, 1
-  br i1 %3002, label %3003, label %3032
-
-; <label>:3003                                    ; preds = %3001
-  %3004 = add i32 %13, -1
-  %3005 = uitofp i32 %3004 to float
-  %3006 = call float @dx.op.binary.f32(i32 35, float %1232, float 0.000000e+00)  ; FMax(a,b)
-  %3007 = call float @dx.op.binary.f32(i32 36, float %3006, float %3005)  ; FMin(a,b)
-  %3008 = fptoui float %3007 to i32
-  %3009 = add i32 %15, -1
-  %3010 = uitofp i32 %3009 to float
-  %3011 = call float @dx.op.binary.f32(i32 35, float %2685, float 0.000000e+00)  ; FMax(a,b)
-  %3012 = call float @dx.op.binary.f32(i32 36, float %3011, float %3010)  ; FMin(a,b)
-  %3013 = fptoui float %3012 to i32
-  %3014 = uitofp i32 %3013 to float
-  %3015 = uitofp i32 %3008 to float
-  %3016 = fptoui float %45 to i32
-  %3017 = fptoui float %182 to i32
-  %3018 = fptoui float %3014 to i32
-  %3019 = fptoui float %3015 to i32
-  %3020 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3021 = extractvalue %dx.types.CBufRet.i32 %3020, 0
-  %3022 = extractvalue %dx.types.CBufRet.i32 %3020, 1
-  %3023 = extractvalue %dx.types.CBufRet.i32 %3020, 2
-  %3024 = extractvalue %dx.types.CBufRet.i32 %3020, 3
-  %3025 = mul i32 %3021, %3016
-  %3026 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3017, i32 %3022, i32 %3025)  ; IMad(a,b,c)
-  %3027 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3018, i32 %3023, i32 %3026)  ; IMad(a,b,c)
-  %3028 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3019, i32 %3024, i32 %3027)  ; IMad(a,b,c)
-  %3029 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %3028, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3030 = extractvalue %dx.types.ResRet.i16 %3029, 0
-  %3031 = sitofp i16 %3030 to float
-  br label %3119
-
-; <label>:3032                                    ; preds = %3001
-  %3033 = icmp eq i32 %940, 2
-  br i1 %3033, label %3034, label %3119
-
-; <label>:3034                                    ; preds = %3032
-  %3035 = fsub fast float %22, %20
-  %3036 = fcmp fast olt float %1232, %20
-  br i1 %3036, label %3037, label %3050
-
-; <label>:3037                                    ; preds = %3034
-  %3038 = fsub fast float %20, %1232
-  %3039 = fdiv fast float %3038, %3035
-  %3040 = fptoui float %3039 to i32
-  %3041 = uitofp i32 %3040 to float
-  %3042 = fmul fast float %3041, %3035
-  %3043 = fsub fast float %3038, %3042
-  %3044 = and i32 %3040, 1
-  %3045 = icmp eq i32 %3044, 0
-  br i1 %3045, label %3046, label %3048
-
-; <label>:3046                                    ; preds = %3037
-  %3047 = fadd fast float %3043, %20
-  br label %3065
-
-; <label>:3048                                    ; preds = %3037
-  %3049 = fsub fast float %22, %3043
-  br label %3065
-
-; <label>:3050                                    ; preds = %3034
-  %3051 = fcmp fast ogt float %1232, %22
-  br i1 %3051, label %3052, label %3065
-
-; <label>:3052                                    ; preds = %3050
-  %3053 = fsub fast float %1232, %22
-  %3054 = fdiv fast float %3053, %3035
-  %3055 = fptoui float %3054 to i32
-  %3056 = uitofp i32 %3055 to float
-  %3057 = fmul fast float %3056, %3035
-  %3058 = fsub fast float %3053, %3057
-  %3059 = and i32 %3055, 1
-  %3060 = icmp eq i32 %3059, 0
-  br i1 %3060, label %3061, label %3063
-
-; <label>:3061                                    ; preds = %3052
-  %3062 = fsub fast float %22, %3058
-  br label %3065
-
-; <label>:3063                                    ; preds = %3052
-  %3064 = fadd fast float %3058, %20
-  br label %3065
-
-; <label>:3065                                    ; preds = %3063, %3061, %3050, %3048, %3046
-  %3066 = phi float [ %3047, %3046 ], [ %3049, %3048 ], [ %3062, %3061 ], [ %3064, %3063 ], [ %1232, %3050 ]
-  %3067 = fptoui float %3066 to i32
-  %3068 = fsub fast float %24, %20
-  %3069 = fcmp fast olt float %2685, %20
-  br i1 %3069, label %3070, label %3083
-
-; <label>:3070                                    ; preds = %3065
-  %3071 = fsub fast float %20, %2685
-  %3072 = fdiv fast float %3071, %3068
-  %3073 = fptoui float %3072 to i32
-  %3074 = uitofp i32 %3073 to float
-  %3075 = fmul fast float %3074, %3068
-  %3076 = fsub fast float %3071, %3075
-  %3077 = and i32 %3073, 1
-  %3078 = icmp eq i32 %3077, 0
-  br i1 %3078, label %3079, label %3081
-
-; <label>:3079                                    ; preds = %3070
-  %3080 = fadd fast float %3076, %20
-  br label %3098
-
-; <label>:3081                                    ; preds = %3070
-  %3082 = fsub fast float %24, %3076
-  br label %3098
-
-; <label>:3083                                    ; preds = %3065
-  %3084 = fcmp fast ogt float %2685, %24
-  br i1 %3084, label %3085, label %3098
-
-; <label>:3085                                    ; preds = %3083
-  %3086 = fsub fast float %2685, %24
-  %3087 = fdiv fast float %3086, %3068
-  %3088 = fptoui float %3087 to i32
-  %3089 = uitofp i32 %3088 to float
-  %3090 = fmul fast float %3089, %3068
-  %3091 = fsub fast float %3086, %3090
-  %3092 = and i32 %3088, 1
-  %3093 = icmp eq i32 %3092, 0
-  br i1 %3093, label %3094, label %3096
-
-; <label>:3094                                    ; preds = %3085
-  %3095 = fsub fast float %24, %3091
-  br label %3098
-
-; <label>:3096                                    ; preds = %3085
-  %3097 = fadd fast float %3091, %20
-  br label %3098
-
-; <label>:3098                                    ; preds = %3096, %3094, %3083, %3081, %3079
-  %3099 = phi float [ %3080, %3079 ], [ %3082, %3081 ], [ %3095, %3094 ], [ %3097, %3096 ], [ %2685, %3083 ]
-  %3100 = fptoui float %3099 to i32
-  %3101 = uitofp i32 %3100 to float
-  %3102 = uitofp i32 %3067 to float
-  %3103 = fptoui float %45 to i32
-  %3104 = fptoui float %182 to i32
-  %3105 = fptoui float %3101 to i32
-  %3106 = fptoui float %3102 to i32
-  %3107 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3108 = extractvalue %dx.types.CBufRet.i32 %3107, 0
-  %3109 = extractvalue %dx.types.CBufRet.i32 %3107, 1
-  %3110 = extractvalue %dx.types.CBufRet.i32 %3107, 2
-  %3111 = extractvalue %dx.types.CBufRet.i32 %3107, 3
-  %3112 = mul i32 %3108, %3103
-  %3113 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3104, i32 %3109, i32 %3112)  ; IMad(a,b,c)
-  %3114 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3105, i32 %3110, i32 %3113)  ; IMad(a,b,c)
-  %3115 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3106, i32 %3111, i32 %3114)  ; IMad(a,b,c)
-  %3116 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %3115, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3117 = extractvalue %dx.types.ResRet.i16 %3116, 0
-  %3118 = sitofp i16 %3117 to float
-  br label %3119
-
-; <label>:3119                                    ; preds = %3098, %3032, %3003, %2986, %2976
-  %3120 = phi float [ %3000, %2986 ], [ 0.000000e+00, %2976 ], [ %3031, %3003 ], [ %3118, %3098 ], [ 0.000000e+00, %3032 ]
-  br i1 %941, label %3121, label %3146
-
-; <label>:3121                                    ; preds = %3119
-  %3122 = fcmp fast oge float %1378, 0.000000e+00
-  %3123 = fptoui float %1378 to i32
-  %3124 = icmp ult i32 %3123, %13
-  %3125 = and i1 %3122, %3124
-  %3126 = fcmp fast oge float %2685, 0.000000e+00
-  %3127 = and i1 %3126, %3125
-  %3128 = fptoui float %2685 to i32
-  %3129 = icmp ult i32 %3128, %15
-  %3130 = and i1 %3129, %3127
-  br i1 %3130, label %3131, label %3264
-
-; <label>:3131                                    ; preds = %3121
-  %3132 = fptoui float %45 to i32
-  %3133 = fptoui float %182 to i32
-  %3134 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3135 = extractvalue %dx.types.CBufRet.i32 %3134, 0
-  %3136 = extractvalue %dx.types.CBufRet.i32 %3134, 1
-  %3137 = extractvalue %dx.types.CBufRet.i32 %3134, 2
-  %3138 = extractvalue %dx.types.CBufRet.i32 %3134, 3
-  %3139 = mul i32 %3135, %3132
-  %3140 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3133, i32 %3136, i32 %3139)  ; IMad(a,b,c)
-  %3141 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3128, i32 %3137, i32 %3140)  ; IMad(a,b,c)
-  %3142 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3123, i32 %3138, i32 %3141)  ; IMad(a,b,c)
-  %3143 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %3142, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3144 = extractvalue %dx.types.ResRet.i16 %3143, 0
-  %3145 = sitofp i16 %3144 to float
-  br label %3264
-
-; <label>:3146                                    ; preds = %3119
-  %3147 = icmp eq i32 %940, 1
-  br i1 %3147, label %3148, label %3177
-
-; <label>:3148                                    ; preds = %3146
-  %3149 = add i32 %13, -1
-  %3150 = uitofp i32 %3149 to float
-  %3151 = call float @dx.op.binary.f32(i32 35, float %1378, float 0.000000e+00)  ; FMax(a,b)
-  %3152 = call float @dx.op.binary.f32(i32 36, float %3151, float %3150)  ; FMin(a,b)
-  %3153 = fptoui float %3152 to i32
-  %3154 = add i32 %15, -1
-  %3155 = uitofp i32 %3154 to float
-  %3156 = call float @dx.op.binary.f32(i32 35, float %2685, float 0.000000e+00)  ; FMax(a,b)
-  %3157 = call float @dx.op.binary.f32(i32 36, float %3156, float %3155)  ; FMin(a,b)
-  %3158 = fptoui float %3157 to i32
-  %3159 = uitofp i32 %3158 to float
-  %3160 = uitofp i32 %3153 to float
-  %3161 = fptoui float %45 to i32
-  %3162 = fptoui float %182 to i32
-  %3163 = fptoui float %3159 to i32
-  %3164 = fptoui float %3160 to i32
-  %3165 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3166 = extractvalue %dx.types.CBufRet.i32 %3165, 0
-  %3167 = extractvalue %dx.types.CBufRet.i32 %3165, 1
-  %3168 = extractvalue %dx.types.CBufRet.i32 %3165, 2
-  %3169 = extractvalue %dx.types.CBufRet.i32 %3165, 3
-  %3170 = mul i32 %3166, %3161
-  %3171 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3162, i32 %3167, i32 %3170)  ; IMad(a,b,c)
-  %3172 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3163, i32 %3168, i32 %3171)  ; IMad(a,b,c)
-  %3173 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3164, i32 %3169, i32 %3172)  ; IMad(a,b,c)
-  %3174 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %3173, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3175 = extractvalue %dx.types.ResRet.i16 %3174, 0
-  %3176 = sitofp i16 %3175 to float
-  br label %3264
-
-; <label>:3177                                    ; preds = %3146
-  %3178 = icmp eq i32 %940, 2
-  br i1 %3178, label %3179, label %3264
-
-; <label>:3179                                    ; preds = %3177
-  %3180 = fsub fast float %22, %20
-  %3181 = fcmp fast olt float %1378, %20
-  br i1 %3181, label %3182, label %3195
-
-; <label>:3182                                    ; preds = %3179
-  %3183 = fsub fast float %20, %1378
-  %3184 = fdiv fast float %3183, %3180
-  %3185 = fptoui float %3184 to i32
-  %3186 = uitofp i32 %3185 to float
-  %3187 = fmul fast float %3186, %3180
-  %3188 = fsub fast float %3183, %3187
-  %3189 = and i32 %3185, 1
-  %3190 = icmp eq i32 %3189, 0
-  br i1 %3190, label %3191, label %3193
-
-; <label>:3191                                    ; preds = %3182
-  %3192 = fadd fast float %3188, %20
-  br label %3210
-
-; <label>:3193                                    ; preds = %3182
-  %3194 = fsub fast float %22, %3188
-  br label %3210
-
-; <label>:3195                                    ; preds = %3179
-  %3196 = fcmp fast ogt float %1378, %22
-  br i1 %3196, label %3197, label %3210
-
-; <label>:3197                                    ; preds = %3195
-  %3198 = fsub fast float %1378, %22
-  %3199 = fdiv fast float %3198, %3180
-  %3200 = fptoui float %3199 to i32
-  %3201 = uitofp i32 %3200 to float
-  %3202 = fmul fast float %3201, %3180
-  %3203 = fsub fast float %3198, %3202
-  %3204 = and i32 %3200, 1
-  %3205 = icmp eq i32 %3204, 0
-  br i1 %3205, label %3206, label %3208
-
-; <label>:3206                                    ; preds = %3197
-  %3207 = fsub fast float %22, %3203
-  br label %3210
-
-; <label>:3208                                    ; preds = %3197
-  %3209 = fadd fast float %3203, %20
-  br label %3210
-
-; <label>:3210                                    ; preds = %3208, %3206, %3195, %3193, %3191
-  %3211 = phi float [ %3192, %3191 ], [ %3194, %3193 ], [ %3207, %3206 ], [ %3209, %3208 ], [ %1378, %3195 ]
-  %3212 = fptoui float %3211 to i32
-  %3213 = fsub fast float %24, %20
-  %3214 = fcmp fast olt float %2685, %20
-  br i1 %3214, label %3215, label %3228
-
-; <label>:3215                                    ; preds = %3210
-  %3216 = fsub fast float %20, %2685
-  %3217 = fdiv fast float %3216, %3213
-  %3218 = fptoui float %3217 to i32
-  %3219 = uitofp i32 %3218 to float
-  %3220 = fmul fast float %3219, %3213
-  %3221 = fsub fast float %3216, %3220
-  %3222 = and i32 %3218, 1
-  %3223 = icmp eq i32 %3222, 0
-  br i1 %3223, label %3224, label %3226
-
-; <label>:3224                                    ; preds = %3215
-  %3225 = fadd fast float %3221, %20
-  br label %3243
-
-; <label>:3226                                    ; preds = %3215
-  %3227 = fsub fast float %24, %3221
-  br label %3243
-
-; <label>:3228                                    ; preds = %3210
-  %3229 = fcmp fast ogt float %2685, %24
-  br i1 %3229, label %3230, label %3243
-
-; <label>:3230                                    ; preds = %3228
-  %3231 = fsub fast float %2685, %24
-  %3232 = fdiv fast float %3231, %3213
-  %3233 = fptoui float %3232 to i32
-  %3234 = uitofp i32 %3233 to float
-  %3235 = fmul fast float %3234, %3213
-  %3236 = fsub fast float %3231, %3235
-  %3237 = and i32 %3233, 1
-  %3238 = icmp eq i32 %3237, 0
-  br i1 %3238, label %3239, label %3241
-
-; <label>:3239                                    ; preds = %3230
-  %3240 = fsub fast float %24, %3236
-  br label %3243
-
-; <label>:3241                                    ; preds = %3230
-  %3242 = fadd fast float %3236, %20
-  br label %3243
-
-; <label>:3243                                    ; preds = %3241, %3239, %3228, %3226, %3224
-  %3244 = phi float [ %3225, %3224 ], [ %3227, %3226 ], [ %3240, %3239 ], [ %3242, %3241 ], [ %2685, %3228 ]
-  %3245 = fptoui float %3244 to i32
-  %3246 = uitofp i32 %3245 to float
-  %3247 = uitofp i32 %3212 to float
-  %3248 = fptoui float %45 to i32
-  %3249 = fptoui float %182 to i32
-  %3250 = fptoui float %3246 to i32
-  %3251 = fptoui float %3247 to i32
-  %3252 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3253 = extractvalue %dx.types.CBufRet.i32 %3252, 0
-  %3254 = extractvalue %dx.types.CBufRet.i32 %3252, 1
-  %3255 = extractvalue %dx.types.CBufRet.i32 %3252, 2
-  %3256 = extractvalue %dx.types.CBufRet.i32 %3252, 3
-  %3257 = mul i32 %3253, %3248
-  %3258 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3249, i32 %3254, i32 %3257)  ; IMad(a,b,c)
-  %3259 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3250, i32 %3255, i32 %3258)  ; IMad(a,b,c)
-  %3260 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3251, i32 %3256, i32 %3259)  ; IMad(a,b,c)
-  %3261 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %3260, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3262 = extractvalue %dx.types.ResRet.i16 %3261, 0
-  %3263 = sitofp i16 %3262 to float
-  br label %3264
-
-; <label>:3264                                    ; preds = %3243, %3177, %3148, %3131, %3121
-  %3265 = phi float [ %3145, %3131 ], [ 0.000000e+00, %3121 ], [ %3176, %3148 ], [ %3263, %3243 ], [ 0.000000e+00, %3177 ]
-  %3266 = call float @dx.op.unary.f32(i32 22, float %180)  ; Frc(value)
-  %3267 = call float @dx.op.unary.f32(i32 22, float %181)  ; Frc(value)
-  %3268 = fmul fast float %3267, %3267
-  %3269 = fmul fast float %3268, %3267
-  %3270 = fmul fast float %1086, -7.500000e-01
-  %3271 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2249, float %3270)  ; FMad(a,b,c)
-  %3272 = fmul fast float %1086, 1.500000e+00
-  %3273 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1668, float %3272)  ; FMad(a,b,c)
-  %3274 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2249, float %3273)  ; FMad(a,b,c)
-  %3275 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %2830, float %3274)  ; FMad(a,b,c)
-  %3276 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1668, float %3270)  ; FMad(a,b,c)
-  %3277 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2249, float %3276)  ; FMad(a,b,c)
-  %3278 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2830, float %3277)  ; FMad(a,b,c)
-  %3279 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3267, float %3268, float %3269, float %1668, float %3271, float %3275, float %3278)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3280 = fmul fast float %1231, -7.500000e-01
-  %3281 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2394, float %3280)  ; FMad(a,b,c)
-  %3282 = fmul fast float %1231, 1.500000e+00
-  %3283 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1813, float %3282)  ; FMad(a,b,c)
-  %3284 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2394, float %3283)  ; FMad(a,b,c)
-  %3285 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %2975, float %3284)  ; FMad(a,b,c)
-  %3286 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1813, float %3280)  ; FMad(a,b,c)
-  %3287 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2394, float %3286)  ; FMad(a,b,c)
-  %3288 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2975, float %3287)  ; FMad(a,b,c)
-  %3289 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3267, float %3268, float %3269, float %1813, float %3281, float %3285, float %3288)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3290 = fmul fast float %1377, -7.500000e-01
-  %3291 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2539, float %3290)  ; FMad(a,b,c)
-  %3292 = fmul fast float %1377, 1.500000e+00
-  %3293 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1958, float %3292)  ; FMad(a,b,c)
-  %3294 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2539, float %3293)  ; FMad(a,b,c)
-  %3295 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3120, float %3294)  ; FMad(a,b,c)
-  %3296 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1958, float %3290)  ; FMad(a,b,c)
-  %3297 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2539, float %3296)  ; FMad(a,b,c)
-  %3298 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3120, float %3297)  ; FMad(a,b,c)
-  %3299 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3267, float %3268, float %3269, float %1958, float %3291, float %3295, float %3298)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3300 = fmul fast float %1523, -7.500000e-01
-  %3301 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2684, float %3300)  ; FMad(a,b,c)
-  %3302 = fmul fast float %1523, 1.500000e+00
-  %3303 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %2103, float %3302)  ; FMad(a,b,c)
-  %3304 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2684, float %3303)  ; FMad(a,b,c)
-  %3305 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3265, float %3304)  ; FMad(a,b,c)
-  %3306 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %2103, float %3300)  ; FMad(a,b,c)
-  %3307 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2684, float %3306)  ; FMad(a,b,c)
-  %3308 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3265, float %3307)  ; FMad(a,b,c)
-  %3309 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3267, float %3268, float %3269, float %2103, float %3301, float %3305, float %3308)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3310 = fmul fast float %3266, %3266
-  %3311 = fmul fast float %3310, %3266
-  %3312 = fmul fast float %3279, -7.500000e-01
-  %3313 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3299, float %3312)  ; FMad(a,b,c)
-  %3314 = fmul fast float %3279, 1.500000e+00
-  %3315 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %3289, float %3314)  ; FMad(a,b,c)
-  %3316 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %3299, float %3315)  ; FMad(a,b,c)
-  %3317 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3309, float %3316)  ; FMad(a,b,c)
-  %3318 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %3289, float %3312)  ; FMad(a,b,c)
-  %3319 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %3299, float %3318)  ; FMad(a,b,c)
-  %3320 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3309, float %3319)  ; FMad(a,b,c)
-  %3321 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3266, float %3310, float %3311, float %3289, float %3313, float %3317, float %3320)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3322 = fptosi float %3321 to i16
-  call void @dx.op.rawBufferStore.i16(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i16 %3322, i16 undef, i16 undef, i16 undef, i8 1, i32 2)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3323
-
-; <label>:3323                                    ; preds = %3264, %933, %919, %329, %0
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.threadId.i32(i32, i32) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32, %dx.types.Handle, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.rawBufferStore.i16(i32, %dx.types.Handle, i32, i32, i16, i16, i16, i16, i8, i32) #2
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.unary.f32(i32, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.tertiary.f32(i32, float, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.binary.f32(i32, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.dot4.f32(i32, float, float, float, float, float, float, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.tertiary.i32(i32, i32, i32, i32) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32, %dx.types.Handle, i32) #1
-
-; Function Attrs: nounwind readonly
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32, %dx.types.Handle, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind readnone
-declare double @dx.op.makeDouble.f64(i32, i32, i32) #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.valver = !{!2}
-!dx.shaderModel = !{!3}
-!dx.resources = !{!4}
-!dx.entryPoints = !{!12}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 1, i32 2}
-!2 = !{i32 1, i32 6}
-!3 = !{!"cs", i32 6, i32 2}
-!4 = !{null, !5, !10, null}
-!5 = !{!6, !7, !9}
-!6 = !{i32 0, %"class.RWStructuredBuffer<short>"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !1}
-!7 = !{i32 1, %"class.RWStructuredBuffer<double>"* undef, !"", i32 0, i32 1, i32 1, i32 12, i1 false, i1 false, i1 false, !8}
-!8 = !{i32 1, i32 8}
-!9 = !{i32 2, %"class.RWStructuredBuffer<short>"* undef, !"", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !1}
-!10 = !{!11}
-!11 = !{i32 0, %Constants* undef, !"", i32 0, i32 0, i32 1, i32 116, null}
-!12 = !{void ()* @GridSample, !"GridSample", null, !4, !13}
-!13 = !{i32 0, i64 8388660, i32 4, !14}
-!14 = !{i32 64, i32 1, i32 1}
-
-#endif
-
-const unsigned char g_GridSample[] = {
-  0x44, 0x58, 0x42, 0x43, 0x30, 0x44, 0x77, 0x6f, 0x7c, 0xed, 0x92, 0x65,
-  0xb9, 0xb3, 0x96, 0xf5, 0x5d, 0x83, 0x1b, 0x66, 0x01, 0x00, 0x00, 0x00,
-  0x88, 0x54, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
-  0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
-  0x18, 0x01, 0x00, 0x00, 0x34, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30,
-  0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30,
-  0xa8, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-  0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x1b, 0x72, 0x7f, 0x0e, 0xd6, 0xe6, 0x06, 0x92,
-  0xcc, 0x16, 0x31, 0xe4, 0x50, 0x8e, 0xac, 0x5a, 0x44, 0x58, 0x49, 0x4c,
-  0x4c, 0x53, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0xd3, 0x14, 0x00, 0x00,
-  0x44, 0x58, 0x49, 0x4c, 0x02, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-  0x34, 0x53, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00,
-  0xca, 0x14, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
-  0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49,
-  0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19,
-  0x1e, 0x04, 0x8b, 0x62, 0x80, 0x18, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42,
-  0xc4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x62, 0x88,
-  0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42,
-  0xe4, 0x48, 0x0e, 0x90, 0x11, 0x23, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c,
-  0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x31, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00,
-  0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07,
-  0x40, 0x02, 0xa8, 0x0d, 0x86, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20,
-  0x01, 0xd5, 0x06, 0x62, 0xf8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x90, 0x00,
-  0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42,
-  0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00,
-  0x58, 0x00, 0x00, 0x00, 0x32, 0x22, 0x88, 0x09, 0x20, 0x64, 0x85, 0x04,
-  0x13, 0x23, 0xa4, 0x84, 0x04, 0x13, 0x23, 0xe3, 0x84, 0xa1, 0x90, 0x14,
-  0x12, 0x4c, 0x8c, 0x8c, 0x0b, 0x84, 0xc4, 0x4c, 0x10, 0xc0, 0xc1, 0x08,
-  0x40, 0x09, 0x00, 0x0a, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0xc3, 0x30, 0x10,
-  0x31, 0x47, 0x00, 0xdd, 0x34, 0x5c, 0xfe, 0x84, 0x3d, 0x84, 0xe4, 0xaf,
-  0x84, 0xb4, 0x12, 0x93, 0x8f, 0xd4, 0x3a, 0x2a, 0x0c, 0xc3, 0x30, 0x86,
-  0x39, 0x02, 0x84, 0x90, 0x7b, 0x86, 0xcb, 0x9f, 0xb0, 0x87, 0x90, 0xfc,
-  0x10, 0x68, 0x86, 0x85, 0x40, 0x41, 0x52, 0x8e, 0x63, 0x50, 0x86, 0x01,
-  0x19, 0x68, 0x29, 0x0b, 0x30, 0x28, 0xc3, 0x60, 0x18, 0x86, 0x81, 0x0c,
-  0xd4, 0xcc, 0x00, 0x94, 0xe1, 0x19, 0x1e, 0x82, 0x4a, 0xf1, 0x0c, 0xcf,
-  0xf3, 0x90, 0x54, 0x88, 0x67, 0x78, 0x1e, 0xa2, 0x8a, 0xf2, 0x0c, 0xcf,
-  0xf3, 0x3c, 0xcf, 0xf3, 0x3c, 0x64, 0x95, 0x62, 0x18, 0x86, 0x61, 0x20,
-  0xec, 0xa8, 0xe1, 0xf2, 0x27, 0xec, 0x21, 0x24, 0x9f, 0xdb, 0xa8, 0x62,
-  0x25, 0x26, 0x1f, 0xb9, 0x6d, 0x44, 0x0c, 0xc3, 0x30, 0x14, 0x42, 0x1b,
-  0x94, 0x81, 0xb6, 0x39, 0x82, 0xa0, 0x18, 0xca, 0x80, 0x0c, 0x43, 0x47,
-  0xde, 0x4d, 0xc3, 0xe5, 0x4f, 0xd8, 0x43, 0x48, 0xfe, 0x4a, 0x48, 0x2b,
-  0x31, 0xf9, 0xc8, 0x6d, 0xa3, 0x62, 0x18, 0x86, 0x61, 0x28, 0x07, 0x38,
-  0x28, 0xc3, 0x80, 0x0c, 0x14, 0x0e, 0x01, 0x14, 0x62, 0x1c, 0x86, 0x81,
-  0xc8, 0x81, 0x80, 0x99, 0xbe, 0x71, 0x60, 0x87, 0x70, 0x98, 0x87, 0x79,
-  0x70, 0x03, 0x59, 0xb8, 0x85, 0x59, 0xa0, 0x07, 0x79, 0xa8, 0x87, 0x71,
-  0xa0, 0x87, 0x7a, 0x90, 0x87, 0x72, 0x20, 0x07, 0x51, 0xa8, 0x07, 0x73,
-  0x30, 0x87, 0x72, 0x90, 0x07, 0x3e, 0x98, 0x07, 0x74, 0x78, 0x07, 0x79,
-  0xa0, 0x07, 0x3f, 0x40, 0x81, 0x41, 0xe7, 0x4c, 0x60, 0x30, 0x0e, 0xec,
-  0x10, 0x0e, 0xf3, 0x30, 0x0f, 0x6e, 0x20, 0x0b, 0xb7, 0x30, 0x0b, 0xf4,
-  0x20, 0x0f, 0xf5, 0x30, 0x0e, 0xf4, 0x50, 0x0f, 0xf2, 0x50, 0x0e, 0xe4,
-  0x20, 0x0a, 0xf5, 0x60, 0x0e, 0xe6, 0x50, 0x0e, 0xf2, 0xc0, 0x07, 0xe4,
-  0xf0, 0x0e, 0xf5, 0x20, 0x0e, 0xec, 0x50, 0x0e, 0x7e, 0x80, 0x82, 0x91,
-  0xd2, 0x61, 0x04, 0x62, 0xb8, 0x84, 0x73, 0x1a, 0x69, 0x02, 0x9a, 0x49,
-  0x42, 0xcb, 0x30, 0x0c, 0xc3, 0xba, 0xae, 0xeb, 0xba, 0x0e, 0xc4, 0xce,
-  0x11, 0x80, 0xc2, 0x14, 0x00, 0x00, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0,
-  0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0,
-  0x87, 0x0d, 0xae, 0x50, 0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d,
-  0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d,
-  0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78,
-  0xa0, 0x07, 0x78, 0xd0, 0x06, 0xe9, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x71,
-  0x60, 0x07, 0x6d, 0x90, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72,
-  0xd0, 0x06, 0xe9, 0x60, 0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d,
-  0x60, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xd0, 0x06, 0xe6,
-  0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x76,
-  0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xee, 0x80, 0x07, 0x7a,
-  0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x7a, 0x60, 0x07, 0x74,
-  0x30, 0xe4, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x60, 0xc8, 0x43, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0xc0, 0x90, 0x67, 0x01, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x80, 0x21, 0x4f, 0x03, 0x04, 0xc0, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x43, 0x1e, 0x08, 0x08, 0x80, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x3c, 0x12, 0x10, 0x00, 0x01,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x79, 0x28, 0x20, 0x00,
-  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xf2, 0x58, 0x40,
-  0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xe4, 0xc1,
-  0x80, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8,
-  0xb3, 0x01, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0,
-  0x90, 0xc7, 0x03, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x80, 0x21, 0x4f, 0x18, 0x00, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0xc0, 0x90, 0x87, 0x0c, 0x80, 0x00, 0x08, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x20, 0x0b, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00,
-  0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47,
-  0xc6, 0x04, 0x43, 0x1a, 0x4a, 0xa0, 0x08, 0x8a, 0x61, 0x04, 0xa0, 0x30,
-  0x0a, 0xa2, 0xd0, 0x03, 0x0a, 0xa1, 0x00, 0x03, 0xa8, 0x1b, 0x01, 0x20,
-  0xb7, 0xd0, 0x01, 0x01, 0x11, 0x48, 0x9d, 0x01, 0xa0, 0x76, 0x06, 0x80,
-  0xd0, 0x19, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00,
-  0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b,
-  0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99,
-  0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62,
-  0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73,
-  0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84, 0xa1, 0x99, 0x20, 0x0c,
-  0xce, 0x06, 0x61, 0x20, 0x26, 0x08, 0xc3, 0xb3, 0x41, 0x18, 0x0c, 0x0a,
-  0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08, 0x03, 0x34, 0x41, 0x40,
-  0x03, 0x8d, 0xc0, 0x04, 0x61, 0x88, 0x26, 0x08, 0x9d, 0xb5, 0x61, 0x51,
-  0x16, 0x46, 0x51, 0x86, 0xc6, 0x71, 0x9c, 0x62, 0x82, 0xa0, 0x06, 0xd8,
-  0x04, 0x61, 0x90, 0x36, 0x08, 0x43, 0xb4, 0x61, 0x19, 0x20, 0x46, 0x19,
-  0x86, 0xc6, 0x71, 0x1c, 0x69, 0xc3, 0x42, 0x2c, 0x8c, 0x42, 0x0c, 0x8d,
-  0xe3, 0x38, 0xc5, 0x86, 0xe1, 0x99, 0xa8, 0x09, 0x42, 0x1b, 0x64, 0x13,
-  0x84, 0x61, 0xda, 0x80, 0x28, 0x16, 0xa3, 0x28, 0xc3, 0x05, 0x6c, 0x08,
-  0xb0, 0x0d, 0x04, 0x50, 0x65, 0xc0, 0x04, 0x41, 0x00, 0xa8, 0x1c, 0xc9,
-  0xa5, 0x91, 0x4d, 0x85, 0xb5, 0xc1, 0xb1, 0x95, 0x4d, 0x10, 0xdc, 0xe0,
-  0x9a, 0x20, 0x0c, 0xd4, 0x04, 0x61, 0xa8, 0x36, 0x0c, 0xdf, 0x30, 0x6c,
-  0x20, 0x94, 0xce, 0x03, 0x83, 0x0d, 0xc5, 0xc6, 0x01, 0x5a, 0x18, 0x54,
-  0x61, 0x63, 0xb3, 0x6b, 0x73, 0x49, 0x23, 0x2b, 0x73, 0xa3, 0x9b, 0x12,
-  0x04, 0x55, 0xc8, 0xf0, 0x5c, 0xec, 0xca, 0xe4, 0xe6, 0xd2, 0xde, 0xdc,
-  0xa6, 0x04, 0x44, 0x13, 0x32, 0x3c, 0x17, 0xbb, 0x30, 0x36, 0xbb, 0x32,
-  0xb9, 0x29, 0x81, 0x51, 0x87, 0x0c, 0xcf, 0x65, 0x0e, 0x2d, 0x8c, 0xac,
-  0x4c, 0xae, 0xe9, 0x8d, 0xac, 0x8c, 0x6d, 0x4a, 0x80, 0x94, 0x21, 0xc3,
-  0x73, 0x91, 0x2b, 0x9b, 0x7b, 0xab, 0x93, 0x1b, 0x2b, 0x9b, 0x9b, 0x12,
-  0x64, 0x75, 0xc8, 0xf0, 0x5c, 0xca, 0xdc, 0xe8, 0xe4, 0xf2, 0xa0, 0xde,
-  0xd2, 0xdc, 0xe8, 0xe6, 0xa6, 0x04, 0x61, 0x00, 0x79, 0x18, 0x00, 0x00,
-  0x51, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66,
-  0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07,
-  0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10,
-  0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce,
-  0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b,
-  0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c,
-  0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07,
-  0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11,
-  0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0,
-  0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8,
-  0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b,
-  0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b,
-  0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87,
-  0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07,
-  0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87,
-  0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81,
-  0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30,
-  0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4,
-  0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca,
-  0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39,
-  0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b,
-  0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b,
-  0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87,
-  0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20,
-  0x0e, 0xe7, 0xe0, 0x06, 0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90,
-  0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x30, 0x83, 0x81, 0xc8, 0x01, 0x1f, 0xdc,
-  0x40, 0x1c, 0xe4, 0xa1, 0x1c, 0xc2, 0x61, 0x1d, 0xdc, 0x40, 0x1c, 0xe4,
-  0x01, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00,
-  0x06, 0xa0, 0x80, 0x11, 0x32, 0xb0, 0x00, 0xf3, 0x2c, 0x84, 0x19, 0x40,
-  0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x20, 0x0d, 0x10, 0x61, 0x7e, 0x71, 0xdb,
-  0x96, 0xb0, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10, 0x50, 0x45, 0x41, 0x44,
-  0xa5, 0x03, 0x0c, 0x25, 0x61, 0x00, 0x02, 0xe6, 0x23, 0xb7, 0x6d, 0x0a,
-  0xd2, 0x70, 0xf9, 0xce, 0xe3, 0x0b, 0x11, 0x01, 0x4c, 0x44, 0x08, 0x34,
-  0xc3, 0x42, 0xd8, 0x81, 0x33, 0x5c, 0xbe, 0xf3, 0xf8, 0x83, 0x33, 0xe1,
-  0x7e, 0x71, 0xdb, 0xc6, 0x40, 0x0d, 0x97, 0xef, 0x3c, 0x3e, 0x03, 0x28,
-  0x44, 0xe7, 0x50, 0xc1, 0x42, 0xf8, 0x85, 0x8e, 0x9b, 0xc0, 0x35, 0x5c,
-  0xbe, 0xf3, 0xf8, 0x11, 0x60, 0x6d, 0x54, 0x51, 0x10, 0x51, 0xe9, 0x00,
-  0x83, 0x8f, 0xd4, 0xba, 0x2d, 0x5c, 0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x01,
-  0xd6, 0x46, 0x15, 0x05, 0x11, 0x95, 0x0e, 0x30, 0xf8, 0xc8, 0x6d, 0xdb,
-  0x00, 0x36, 0x5c, 0xbe, 0xf3, 0xf8, 0x11, 0x60, 0x6d, 0x54, 0x51, 0x10,
-  0x11, 0x3b, 0x39, 0x11, 0xe1, 0x23, 0xb5, 0x6e, 0x05, 0xd2, 0x70, 0xf9,
-  0xce, 0xe3, 0x4f, 0x44, 0x34, 0x21, 0x40, 0x84, 0xf9, 0xc5, 0x6d, 0x1b,
-  0x82, 0x34, 0x5c, 0xbe, 0xf3, 0xf8, 0x13, 0x11, 0x4d, 0x08, 0x10, 0x61,
-  0x3e, 0x72, 0xdb, 0x16, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xfe, 0x74, 0x44,
-  0x04, 0x30, 0x88, 0x83, 0x8f, 0xdc, 0xb6, 0x11, 0x3c, 0xc3, 0xe5, 0x3b,
-  0x8f, 0x4f, 0x35, 0x40, 0x84, 0xf9, 0xc5, 0x6d, 0x03, 0x00, 0x00, 0x00,
-  0x61, 0x20, 0x00, 0x00, 0x15, 0x13, 0x00, 0x00, 0x13, 0x04, 0x24, 0x14,
-  0x0b, 0x04, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x34, 0x14, 0x58, 0xd9,
-  0x95, 0xa5, 0x40, 0x0d, 0x94, 0x51, 0x21, 0x15, 0x57, 0xc1, 0x95, 0x5c,
-  0xd9, 0x14, 0x4b, 0x61, 0x0a, 0x94, 0x72, 0x40, 0xd1, 0x94, 0x6e, 0x40,
-  0x39, 0x94, 0x02, 0x19, 0x33, 0x00, 0x84, 0x94, 0x40, 0x19, 0x14, 0x01,
-  0x3d, 0x23, 0x00, 0x63, 0x04, 0x20, 0x08, 0x82, 0xf8, 0x37, 0x46, 0x00,
-  0x82, 0x20, 0x48, 0xff, 0xc2, 0x18, 0x01, 0x08, 0x82, 0x20, 0xfd, 0x8d,
-  0x11, 0x80, 0x20, 0x08, 0xf2, 0xdf, 0x18, 0x01, 0x08, 0x82, 0x20, 0xfe,
-  0x0b, 0x63, 0x04, 0x20, 0x08, 0x82, 0x21, 0x38, 0x8c, 0x11, 0x80, 0x20,
-  0x08, 0xea, 0xdf, 0x18, 0x01, 0x08, 0x82, 0xa0, 0xfe, 0x0b, 0x63, 0x04,
-  0x20, 0x08, 0x82, 0xf0, 0x37, 0x46, 0x00, 0x82, 0x20, 0x08, 0xff, 0xc2,
-  0x18, 0x01, 0x08, 0x82, 0x20, 0x08, 0x06, 0x00, 0x23, 0x06, 0x09, 0x00,
-  0x82, 0x60, 0xe0, 0xc9, 0x41, 0xf6, 0xb8, 0x81, 0x1b, 0x98, 0xc1, 0x88,
-  0x41, 0x02, 0x80, 0x20, 0x18, 0x78, 0x73, 0xa0, 0x41, 0x70, 0x00, 0x07,
-  0x67, 0x30, 0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0x1e, 0x1d, 0x6c, 0x91,
-  0x1b, 0xb8, 0x01, 0x1a, 0x8c, 0x18, 0x24, 0x00, 0x08, 0x82, 0x81, 0x57,
-  0x07, 0x1c, 0xf4, 0x06, 0x6f, 0x90, 0x06, 0x23, 0x06, 0x06, 0x00, 0x82,
-  0x60, 0x40, 0xfc, 0xc1, 0x05, 0x07, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0xb0, 0xe1, 0x41, 0x19, 0x08, 0x71, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c,
-  0x30, 0x9a, 0x30, 0x04, 0xc3, 0x0d, 0x42, 0x40, 0x06, 0xb3, 0x0c, 0xc1,
-  0x08, 0x05, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xf5, 0x81, 0x1a,
-  0x1c, 0x79, 0x30, 0x9a, 0x10, 0x0c, 0x17, 0x3c, 0x35, 0x9a, 0x30, 0x08,
-  0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x9b, 0x28, 0xbc,
-  0x01, 0x13, 0x06, 0xa3, 0x09, 0x01, 0x30, 0xdc, 0x10, 0xf4, 0x01, 0x18,
-  0x4c, 0x37, 0x50, 0x5e, 0x30, 0xdd, 0x50, 0x69, 0x42, 0x21, 0x01, 0x4c,
-  0x37, 0x5c, 0x1c, 0x51, 0x48, 0x00, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0xb0, 0xa9, 0xc2, 0x1d, 0x50, 0x68, 0x30, 0x9a, 0x10, 0x04, 0xa3, 0x09,
-  0x82, 0x30, 0x9a, 0x30, 0x0c, 0x15, 0x08, 0x52, 0x03, 0x21, 0x15, 0x0c,
-  0x52, 0x57, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xc9, 0xc2,
-  0x1f, 0x70, 0xac, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c, 0x52, 0x5b, 0x10,
-  0x15, 0x20, 0x33, 0x9a, 0x50, 0x04, 0x15, 0x08, 0x52, 0x44, 0x10, 0x15,
-  0x34, 0x33, 0x9a, 0x90, 0x08, 0x15, 0x08, 0x52, 0x44, 0x10, 0xd7, 0x3c,
-  0x75, 0xc5, 0x53, 0x37, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x5b, 0x38, 0xb8, 0xc2, 0x1a, 0xd8, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0xc3, 0x11, 0x4f, 0x1d,
-  0xf1, 0xd4, 0x11, 0x4f, 0x1d, 0xf1, 0xd4, 0x88, 0x41, 0x03, 0x80, 0x20,
-  0x18, 0x58, 0xec, 0x10, 0x0b, 0xcc, 0xa2, 0x8c, 0x02, 0x31, 0x08, 0x81,
-  0x09, 0x01, 0x7c, 0x4e, 0x18, 0x66, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c,
-  0xc2, 0x60, 0x1d, 0x72, 0x21, 0x0f, 0x02, 0x73, 0x40, 0x05, 0x72, 0x18,
-  0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x03, 0x32, 0x68, 0x07, 0x59, 0x10, 0x82, 0x0b, 0x9e, 0xbb, 0x63, 0x98,
-  0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x08, 0x83, 0x78, 0xf8, 0x85, 0x3f,
-  0x08, 0xd8, 0xc1, 0x15, 0xd4, 0x61, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84,
-  0x60, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xc8, 0x60, 0x1e, 0x70, 0x41,
-  0x08, 0x2e, 0x78, 0x6e, 0xb8, 0xa1, 0x0e, 0xe8, 0x01, 0x0c, 0x0c, 0x89,
-  0x05, 0xf8, 0xd8, 0x20, 0x0b, 0xf0, 0x99, 0x65, 0x10, 0x86, 0xc1, 0x84,
-  0x55, 0x90, 0x8f, 0x09, 0xac, 0x20, 0x1f, 0xf3, 0x83, 0x58, 0x80, 0x8f,
-  0xf5, 0x81, 0x2c, 0xc0, 0xc7, 0x08, 0x41, 0x3e, 0x46, 0x08, 0xf2, 0x99,
-  0x25, 0x20, 0x4c, 0x14, 0x10, 0xf9, 0x18, 0x12, 0x0a, 0xf2, 0x31, 0xe1,
-  0x16, 0xe0, 0x63, 0x02, 0x2e, 0xc0, 0xc7, 0x84, 0x5a, 0x90, 0x8f, 0x09,
-  0xb6, 0x20, 0x9f, 0x59, 0x02, 0x62, 0xa0, 0xe2, 0x81, 0x04, 0x62, 0x18,
-  0xa8, 0x78, 0x20, 0x81, 0x18, 0x46, 0x13, 0x62, 0x41, 0x18, 0x6e, 0x08,
-  0x4c, 0x02, 0x0c, 0x66, 0x19, 0x0a, 0x23, 0x18, 0x31, 0x30, 0x00, 0x10,
-  0x04, 0x03, 0x08, 0x26, 0xd8, 0x81, 0x18, 0x31, 0x30, 0x00, 0x10, 0x04,
-  0x03, 0x28, 0x26, 0xda, 0x81, 0x98, 0x25, 0x30, 0x06, 0x2a, 0x1e, 0xa2,
-  0x60, 0x88, 0x81, 0x8a, 0x87, 0x28, 0x18, 0x62, 0x38, 0x42, 0x50, 0x05,
-  0xe2, 0x1b, 0x8e, 0x18, 0x52, 0x41, 0xf8, 0x4a, 0x08, 0x76, 0x38, 0x82,
-  0x68, 0x05, 0xe2, 0x2b, 0x21, 0xd8, 0xe1, 0x08, 0x63, 0x15, 0x84, 0xaf,
-  0x02, 0x61, 0x67, 0x19, 0x0e, 0x2d, 0x18, 0x4d, 0xf0, 0x85, 0x61, 0xb8,
-  0x21, 0x98, 0x09, 0x30, 0x98, 0x65, 0x40, 0x92, 0xa0, 0x74, 0x61, 0x24,
-  0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0xea, 0x09,
-  0x92, 0x68, 0xe6, 0x61, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0x9f,
-  0x20, 0x89, 0x40, 0x28, 0x5e, 0x38, 0x09, 0xb8, 0xe0, 0xa9, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0xa0, 0xc2, 0x02, 0x25, 0xa0, 0x7b, 0x18, 0x31,
-  0x38, 0x00, 0x10, 0x04, 0x03, 0x4a, 0x2c, 0x50, 0x22, 0x10, 0x66, 0x09,
-  0xb4, 0xe1, 0x06, 0x65, 0x27, 0xc0, 0x60, 0x96, 0x41, 0xd1, 0x02, 0xd3,
-  0x05, 0x5e, 0x88, 0xcf, 0x2c, 0xc3, 0xe2, 0x4c, 0xd6, 0x0b, 0x55, 0x7c,
-  0x2c, 0x10, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x21, 0x1f,
-  0x2b, 0x82, 0xf8, 0x14, 0x41, 0x16, 0x3a, 0xdc, 0x10, 0x88, 0x05, 0x18,
-  0xcc, 0x32, 0x30, 0x4d, 0x60, 0x43, 0x39, 0xc0, 0x67, 0x96, 0x40, 0x32,
-  0x72, 0x20, 0xe2, 0x33, 0x4b, 0x20, 0xcd, 0x32, 0x3c, 0x12, 0x67, 0x5f,
-  0x39, 0xc4, 0xc7, 0x02, 0x86, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16,
-  0x3c, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x6e, 0xa1, 0xc3, 0x0d, 0x01,
-  0x5b, 0x80, 0xc1, 0x2c, 0x03, 0x14, 0x05, 0xd6, 0x0e, 0x43, 0x7c, 0x66,
-  0x09, 0x24, 0x23, 0xe0, 0x01, 0x3e, 0xb3, 0x04, 0xd2, 0x40, 0xcb, 0x83,
-  0x31, 0x56, 0x43, 0x40, 0x42, 0x24, 0x0b, 0x8e, 0xb9, 0x83, 0x3c, 0xc4,
-  0x67, 0x96, 0x61, 0xb2, 0xcc, 0xc0, 0xe6, 0x41, 0x0d, 0xe2, 0x63, 0x81,
-  0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x0a, 0xf9, 0x58, 0x11,
-  0xc4, 0xa7, 0x08, 0xbd, 0xd0, 0xe1, 0x86, 0x00, 0x2f, 0xc0, 0x60, 0x96,
-  0x81, 0xaa, 0x02, 0x1b, 0xf6, 0x01, 0x3e, 0xb3, 0x04, 0x9a, 0xe1, 0x03,
-  0x11, 0x9f, 0x59, 0x02, 0x6d, 0x96, 0xe1, 0xd2, 0xdc, 0xc0, 0xe8, 0x20,
-  0x1f, 0xe2, 0x63, 0x01, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b,
-  0x1e, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0xd2, 0xd0, 0xe1, 0x86, 0x40,
-  0x34, 0xc0, 0x60, 0x96, 0x01, 0xcb, 0x02, 0x0b, 0x89, 0x21, 0x3e, 0xb3,
-  0x04, 0x9a, 0x11, 0x26, 0x01, 0x9f, 0x59, 0x02, 0x6d, 0xa0, 0xe8, 0x11,
-  0x07, 0xc4, 0x1f, 0x12, 0x7f, 0x30, 0xd8, 0x20, 0x63, 0x03, 0x8c, 0x0d,
-  0x2c, 0x36, 0xa8, 0xd8, 0x80, 0x1a, 0x28, 0x7a, 0x78, 0x01, 0xf1, 0x87,
-  0xc4, 0x1f, 0x0c, 0x22, 0x33, 0x30, 0x7f, 0xb0, 0xb0, 0x4a, 0xa3, 0x0e,
-  0x1f, 0x9e, 0x9a, 0x65, 0xd8, 0xe6, 0xa0, 0x14, 0x46, 0x13, 0x6e, 0x62,
-  0x18, 0x6e, 0x08, 0x52, 0x03, 0x0c, 0x66, 0x19, 0x38, 0x2f, 0x18, 0x8e,
-  0x28, 0xd4, 0x62, 0xf8, 0xce, 0x18, 0x66, 0xb8, 0x21, 0xa8, 0x09, 0x32,
-  0xa8, 0x21, 0xd0, 0xe1, 0x08, 0xc4, 0x2d, 0x86, 0xaf, 0x02, 0x41, 0x4f,
-  0x19, 0x66, 0xb8, 0x21, 0xc0, 0x09, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x86,
-  0x4e, 0x0e, 0x82, 0xe3, 0x87, 0x61, 0xae, 0x19, 0x66, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0x36, 0xde, 0x48, 0x0d, 0xb3, 0xb8, 0x8d, 0xd1, 0x84,
-  0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86,
-  0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x6c, 0x3c, 0x60,
-  0xe3, 0x20, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xc0, 0xc8, 0x23,
-  0x36, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xac, 0x3c,
-  0x64, 0x43, 0x22, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0xd8,
-  0x23, 0x36, 0xe0, 0x22, 0xf0, 0x8d, 0xd0, 0x00, 0x8f, 0xd1, 0x84, 0x00,
-  0xb8, 0xe0, 0xb1, 0x59, 0x02, 0x39, 0x18, 0x6e, 0xc8, 0xc8, 0x03, 0x0c,
-  0x66, 0x19, 0x3e, 0x30, 0x08, 0x6a, 0x2d, 0x68, 0x03, 0x2e, 0x78, 0x6a,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0xf7, 0xa8, 0x8d, 0x8f, 0x34,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x7a, 0x8f, 0xda, 0x08, 0x84,
-  0x0b, 0x86, 0x29, 0xb7, 0xc8, 0x0d, 0xb8, 0xe0, 0xa9, 0x11, 0x83, 0x03,
-  0x00, 0x41, 0x30, 0xa0, 0xe6, 0x43, 0x37, 0xc6, 0x20, 0x35, 0x46, 0x0c,
-  0x0e, 0x00, 0x04, 0xc1, 0x80, 0xa2, 0x0f, 0xdd, 0x08, 0x84, 0x0b, 0x86,
-  0xb9, 0xe0, 0xa9, 0x3b, 0x9e, 0xba, 0x9b, 0x18, 0xe6, 0xd0, 0x60, 0x98,
-  0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xd8,
-  0xf2, 0xc3, 0x3c, 0x46, 0x83, 0x3e, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41,
-  0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0x30, 0x10, 0x69, 0x8f, 0x84, 0x08, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0x0b, 0x11, 0xf7, 0x48, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x30, 0x11, 0x79, 0x8f, 0x84, 0x08,
-  0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x49, 0x11, 0xf7, 0x68, 0x8d,
-  0x60, 0x3f, 0x7c, 0xa3, 0x3f, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xc7, 0x66,
-  0x09, 0xe4, 0x60, 0xb8, 0xc1, 0x0e, 0x40, 0x04, 0x0c, 0x66, 0x19, 0xc2,
-  0x40, 0x0e, 0x02, 0xfb, 0x8b, 0xd0, 0x88, 0xcf, 0x70, 0xc4, 0x1e, 0x88,
-  0x06, 0xf1, 0xcd, 0x32, 0x88, 0x41, 0x19, 0x04, 0x36, 0x1a, 0x7c, 0x10,
-  0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8,
-  0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xa8, 0x88, 0x0e, 0x37, 0x04, 0x28, 0x02,
-  0x06, 0xb3, 0x0c, 0x63, 0x40, 0x06, 0x81, 0x0d, 0xab, 0x01, 0x9f, 0x59,
-  0x82, 0x34, 0x30, 0xd5, 0x20, 0xe2, 0x33, 0x4b, 0x90, 0x06, 0xc3, 0x11,
-  0xa6, 0xb0, 0x1a, 0xc2, 0x37, 0xcb, 0x60, 0x06, 0x69, 0x10, 0xd8, 0x29,
-  0xb0, 0x46, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65,
-  0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x51, 0x23, 0x3a, 0xdc, 0x10,
-  0xcc, 0x08, 0x18, 0xcc, 0x32, 0x9c, 0x01, 0x1a, 0x04, 0x46, 0x1b, 0x43,
-  0x7c, 0x66, 0x09, 0xd2, 0xc0, 0x88, 0xdb, 0x80, 0xcf, 0x2c, 0x41, 0x1a,
-  0x0c, 0xb4, 0x3c, 0xda, 0x18, 0x60, 0x64, 0x40, 0x9c, 0x81, 0x80, 0x06,
-  0x62, 0x51, 0x06, 0x17, 0x0c, 0x63, 0xb6, 0xa1, 0x1b, 0xf1, 0x19, 0x8e,
-  0x98, 0x85, 0xdd, 0x20, 0xbe, 0x59, 0x06, 0x35, 0x68, 0x83, 0xc0, 0x78,
-  0x83, 0x16, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29,
-  0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x31, 0xd1, 0xe1, 0x86,
-  0x20, 0x4c, 0xc0, 0x60, 0x96, 0x61, 0x0d, 0xd8, 0x20, 0xb0, 0x81, 0x3c,
-  0xe0, 0x33, 0x4b, 0x10, 0x07, 0x16, 0x1e, 0x44, 0x7c, 0x66, 0x09, 0xe2,
-  0x60, 0x38, 0xc2, 0x17, 0xc4, 0x43, 0xf8, 0x66, 0x19, 0xdc, 0x20, 0x0e,
-  0x02, 0xfb, 0x85, 0xf1, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6,
-  0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xdc, 0x44,
-  0x87, 0x1b, 0x02, 0x36, 0x01, 0x83, 0x59, 0x86, 0x37, 0x80, 0x83, 0xc0,
-  0xd6, 0x63, 0x88, 0xcf, 0x2c, 0x41, 0x1c, 0x18, 0x01, 0x1f, 0xf0, 0x99,
-  0x25, 0x88, 0x83, 0x81, 0x96, 0x47, 0x5b, 0x03, 0x8c, 0x0d, 0x88, 0x37,
-  0x10, 0xe0, 0x40, 0x36, 0xda, 0xe0, 0x82, 0x61, 0x2e, 0x78, 0xea, 0xb6,
-  0xa7, 0x8e, 0x37, 0x86, 0xb9, 0x76, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18,
-  0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x36, 0x3f, 0x59, 0x13, 0x14,
-  0xc9, 0x93, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41,
-  0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0xac, 0x54, 0xe4, 0x24, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0xc0, 0x4c, 0x65, 0x4e, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x03, 0xec, 0x54, 0xe8, 0x24, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00,
-  0x41, 0x30, 0x58, 0x5c, 0x65, 0x4e, 0x64, 0x24, 0x00, 0x95, 0x31, 0x11,
-  0x95, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x02, 0x39, 0x18, 0x68,
-  0x79, 0x4c, 0xa3, 0x33, 0x25, 0x8e, 0x25, 0x3e, 0x21, 0x0e, 0x4c, 0x09,
-  0x0c, 0x2e, 0x30, 0x68, 0xc4, 0xc0, 0x01, 0x40, 0x10, 0x0c, 0x1a, 0x59,
-  0x79, 0x93, 0x1c, 0x91, 0x91, 0x52, 0x09, 0xd2, 0x24, 0x4d, 0xd2, 0x04,
-  0x4d, 0x4e, 0x65, 0x96, 0x60, 0x84, 0x86, 0x1b, 0x46, 0xa3, 0x54, 0xc0,
-  0x60, 0x96, 0x81, 0x0e, 0x62, 0x22, 0x18, 0x31, 0x30, 0x00, 0x10, 0x04,
-  0x03, 0x48, 0x56, 0xe4, 0x24, 0x24, 0x46, 0x0c, 0x0c, 0x00, 0x04, 0xc1,
-  0x00, 0x9a, 0x95, 0x39, 0x09, 0x09, 0x13, 0xce, 0x04, 0x3e, 0x26, 0xa0,
-  0x09, 0x7c, 0x46, 0x13, 0x72, 0x64, 0x18, 0x6e, 0x08, 0x56, 0x05, 0x0c,
-  0x66, 0x19, 0xea, 0xe0, 0x0e, 0x82, 0xe1, 0x08, 0x83, 0x4d, 0x86, 0xef,
-  0x8e, 0x61, 0x86, 0x1b, 0x82, 0x1b, 0x21, 0x83, 0x1a, 0x02, 0x1d, 0x8e,
-  0x48, 0xe0, 0x64, 0xf8, 0x2a, 0x10, 0xf4, 0x96, 0x61, 0x86, 0x1b, 0x02,
-  0x1d, 0x21, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0xb0, 0x83, 0x55, 0x08, 0xce,
-  0x3f, 0x86, 0xb9, 0x97, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0x36, 0x5f, 0x59, 0x15, 0x34, 0xc9, 0x95, 0xd1, 0x84, 0x00, 0x18, 0x4d,
-  0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xac, 0x5c, 0x64, 0xe5, 0x20, 0x82,
-  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xc0, 0xcc, 0x65, 0x56, 0x18, 0x22,
-  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xec, 0x5c, 0x68, 0x45, 0x22,
-  0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0xdc, 0x65, 0x56, 0xe4,
-  0x24, 0x00, 0x97, 0x51, 0x11, 0x97, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1,
-  0x59, 0x82, 0x55, 0x18, 0x6e, 0xc8, 0xcc, 0x05, 0x0c, 0x66, 0x19, 0xf0,
-  0x20, 0x0f, 0x82, 0x6a, 0x13, 0x5b, 0x81, 0x0b, 0x9e, 0x1a, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x03, 0x0a, 0x5e, 0x6e, 0x05, 0x0c, 0x4c, 0x65, 0xc4,
-  0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0x78, 0xb9, 0x95, 0x40, 0xb8, 0x60,
-  0x98, 0x82, 0x93, 0x5d, 0x81, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x03, 0xaa, 0x5e, 0x78, 0x85, 0x0c, 0x56, 0x65, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0x28, 0x7b, 0xe1, 0x95, 0x40, 0xb8, 0x60, 0x98, 0x0b,
-  0x9e, 0xba, 0xe3, 0xa9, 0xcb, 0x91, 0x61, 0x4e, 0x2d, 0x86, 0x39, 0x62,
-  0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x6d, 0x5f,
-  0xd0, 0xa5, 0x54, 0xec, 0x65, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60,
-  0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10,
-  0x00, 0x04, 0xc1, 0x00, 0x13, 0x99, 0x77, 0x49, 0x88, 0x60, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0xb0, 0x91, 0x81, 0x97, 0x84, 0x08, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0x00, 0x23, 0x99, 0x78, 0x49, 0x88, 0x60, 0xc4,
-  0x40, 0x01, 0x40, 0x10, 0x0c, 0x96, 0x95, 0x81, 0x97, 0x57, 0x09, 0xfa,
-  0x05, 0x5c, 0xfe, 0x65, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6c, 0x96, 0x60,
-  0x15, 0x86, 0x1b, 0xec, 0x40, 0x64, 0xc0, 0x60, 0x96, 0x41, 0x0f, 0x56,
-  0x21, 0xb0, 0x50, 0x19, 0x95, 0xf8, 0x0c, 0x47, 0xf0, 0x01, 0xa9, 0x10,
-  0xdf, 0x2c, 0xc3, 0x1e, 0xf8, 0x41, 0x60, 0xa5, 0xd2, 0x07, 0xf1, 0xb1,
-  0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac,
-  0x08, 0xe2, 0x53, 0x04, 0xcb, 0xe8, 0x70, 0x43, 0xa0, 0x32, 0x60, 0x30,
-  0xcb, 0xc0, 0x07, 0x7d, 0x10, 0xd8, 0xd0, 0x2a, 0xf0, 0x99, 0x25, 0x10,
-  0x05, 0x63, 0x15, 0x22, 0x3e, 0xb3, 0x04, 0xa2, 0x30, 0x1c, 0x71, 0x0a,
-  0xad, 0x22, 0x7c, 0xb3, 0x0c, 0x7f, 0x20, 0x0a, 0x81, 0xa1, 0x82, 0xab,
-  0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x44,
-  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x37, 0xa3, 0xc3, 0x0d, 0x41, 0xcd,
-  0x80, 0xc1, 0x2c, 0x03, 0x28, 0x84, 0x42, 0x60, 0xb6, 0x32, 0xc4, 0x67,
-  0x96, 0x40, 0x14, 0x8c, 0xc8, 0x15, 0xf8, 0xcc, 0x12, 0x88, 0xc2, 0x40,
-  0xcb, 0xa3, 0xf1, 0x01, 0xd6, 0x07, 0x04, 0x28, 0x08, 0xa1, 0x40, 0x16,
-  0x7e, 0x70, 0xc1, 0x30, 0x86, 0x2b, 0xbc, 0x12, 0x9f, 0xe1, 0x08, 0x5a,
-  0xe8, 0x15, 0xe2, 0x9b, 0x65, 0x18, 0x05, 0x53, 0x08, 0xcc, 0x57, 0x6a,
-  0x21, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xc0,
-  0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x28, 0x1b, 0x1d, 0x6e, 0x08, 0xc6,
-  0x06, 0x0c, 0x66, 0x19, 0x48, 0xa1, 0x14, 0x02, 0x1b, 0xcc, 0x05, 0x3e,
-  0xb3, 0x04, 0xaa, 0x60, 0xe3, 0x42, 0xc4, 0x67, 0x96, 0x40, 0x15, 0x86,
-  0x23, 0x7e, 0x81, 0x5c, 0x84, 0x6f, 0x96, 0xe1, 0x14, 0x54, 0x21, 0x30,
-  0x70, 0x28, 0x97, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78,
-  0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x02, 0x6e, 0x74, 0xb8,
-  0x21, 0x70, 0x1b, 0x30, 0x98, 0x65, 0x40, 0x85, 0x54, 0x08, 0xac, 0x5d,
-  0x86, 0xf8, 0xcc, 0x12, 0xa8, 0x82, 0x11, 0xf2, 0x02, 0x9f, 0x59, 0x02,
-  0x55, 0x18, 0x68, 0x79, 0x34, 0x52, 0xc0, 0x4a, 0x81, 0x40, 0x05, 0x21,
-  0x15, 0x68, 0xc3, 0x14, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x6e, 0x7b, 0xea,
-  0x7c, 0x65, 0x98, 0x7b, 0x8f, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x03, 0x9d, 0xb6, 0x51, 0x99, 0xbd,
-  0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1,
-  0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xc0,
-  0x4e, 0x87, 0x6e, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03,
-  0x0c, 0x75, 0xea, 0x26, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0xc0, 0x52, 0xc7, 0x6e, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04,
-  0x83, 0x05, 0x76, 0xea, 0x86, 0x66, 0x02, 0xd1, 0x29, 0x1b, 0xd2, 0x19,
-  0x4d, 0x08, 0x80, 0x0b, 0x1e, 0x9b, 0x25, 0x58, 0x85, 0x81, 0x96, 0xc7,
-  0x34, 0xec, 0x40, 0xd5, 0xea, 0x80, 0x25, 0xf0, 0x40, 0x50, 0x05, 0x55,
-  0xcb, 0x83, 0x59, 0x06, 0x56, 0x70, 0x85, 0x7d, 0x18, 0x8e, 0xf0, 0x07,
-  0xb3, 0x19, 0xbe, 0xfb, 0x87, 0x61, 0x86, 0x1b, 0x82, 0x98, 0x21, 0x83,
-  0x1a, 0x02, 0x1d, 0x8e, 0x18, 0x09, 0xb5, 0x19, 0xbe, 0x0a, 0x04, 0xbd,
-  0x92, 0x18, 0x66, 0xb8, 0x21, 0xa0, 0x19, 0x32, 0xa8, 0x60, 0xd0, 0x59,
-  0x86, 0x56, 0x10, 0x87, 0xe0, 0xf0, 0x65, 0x98, 0x4b, 0x91, 0x61, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xc3, 0x9d, 0xd2, 0x11, 0x9b, 0xd9,
-  0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1,
-  0x04, 0x62, 0x28, 0xe2, 0x90, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xc0,
-  0x7e, 0x87, 0x75, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03,
-  0x0c, 0x7c, 0x5a, 0x87, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0xc0, 0xc2, 0xc7, 0x75, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04,
-  0x83, 0x05, 0x7d, 0x5a, 0x87, 0x6d, 0x02, 0xdd, 0xe9, 0x1b, 0xde, 0x19,
-  0x4d, 0x08, 0x80, 0x0b, 0x1e, 0x9b, 0x25, 0x10, 0x87, 0xe1, 0x86, 0x99,
-  0x00, 0x1f, 0x30, 0x98, 0x65, 0x78, 0x05, 0x58, 0x08, 0xea, 0x6c, 0x60,
-  0x07, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0xf5,
-  0x89, 0x1d, 0x9c, 0x00, 0x9d, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0,
-  0xd6, 0x27, 0x76, 0x02, 0xe1, 0x82, 0x61, 0x4a, 0x6d, 0x6a, 0x07, 0x2e,
-  0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0xf7, 0xb1, 0x1d,
-  0x9f, 0x28, 0x9d, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xe0, 0xc7,
-  0x76, 0x02, 0xe1, 0x82, 0x61, 0x2e, 0x78, 0xea, 0x8e, 0xa7, 0x6e, 0x66,
-  0x86, 0x39, 0x32, 0x19, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0,
-  0x00, 0x40, 0x10, 0x0c, 0xb6, 0xfa, 0x11, 0x9f, 0xbf, 0x81, 0x9f, 0xd1,
-  0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20,
-  0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x8c, 0x7f,
-  0xd2, 0x27, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xc0, 0xfa,
-  0x47, 0x7d, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xcc,
-  0x7f, 0xd6, 0x27, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58,
-  0x4a, 0x48, 0x7d, 0x52, 0x27, 0xb8, 0x1f, 0xdd, 0xc9, 0x9f, 0xd1, 0x84,
-  0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x02, 0x71, 0x18, 0x6e, 0x80, 0x0b, 0xfe,
-  0x01, 0x83, 0x59, 0x86, 0x58, 0x10, 0x87, 0xc0, 0xf6, 0xa6, 0x6f, 0xe2,
-  0x33, 0x1c, 0x41, 0x17, 0x7e, 0x43, 0x7c, 0xb3, 0x0c, 0xb2, 0x50, 0x0b,
-  0x81, 0xfd, 0x4d, 0x5d, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73,
-  0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x26, 0xa4,
-  0xc3, 0x0d, 0x01, 0x09, 0x81, 0xc1, 0x2c, 0xc3, 0x2c, 0xd0, 0x42, 0x60,
-  0xc3, 0xe9, 0xc0, 0x67, 0x96, 0x20, 0x17, 0xcc, 0x74, 0x88, 0xf8, 0xcc,
-  0x12, 0xe4, 0xc2, 0x70, 0xc4, 0x5f, 0x9c, 0x8e, 0xf0, 0xcd, 0x32, 0xd8,
-  0x42, 0x2e, 0x04, 0x06, 0x1a, 0xa8, 0x13, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c,
-  0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45,
-  0xc4, 0x90, 0x0e, 0x37, 0x04, 0x2f, 0x04, 0x06, 0xb3, 0x0c, 0xb7, 0x80,
-  0x0b, 0x81, 0xc1, 0xce, 0x10, 0x9f, 0x59, 0x82, 0x5c, 0x30, 0x62, 0x76,
-  0xe0, 0x33, 0x4b, 0x90, 0x0b, 0x03, 0x2d, 0x8f, 0x36, 0x0b, 0x18, 0x2d,
-  0x10, 0xb7, 0x20, 0xe0, 0x02, 0xcd, 0xd4, 0xc2, 0x05, 0xc3, 0x98, 0xec,
-  0xd8, 0x4e, 0x7c, 0x86, 0x23, 0x5c, 0xe3, 0x76, 0x88, 0x6f, 0x96, 0x41,
-  0x17, 0x7a, 0x21, 0x30, 0xdc, 0x79, 0x8d, 0xf8, 0x58, 0x30, 0xd0, 0xe7,
-  0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29,
-  0xe2, 0x87, 0x74, 0xb8, 0x21, 0xe8, 0x21, 0x30, 0x98, 0x65, 0xd8, 0x05,
-  0x5e, 0x08, 0x6c, 0x00, 0x1f, 0xf8, 0xcc, 0x12, 0x84, 0x83, 0xf5, 0x0e,
-  0x11, 0x9f, 0x59, 0x82, 0x70, 0x18, 0x8e, 0xc8, 0x0d, 0xdf, 0x11, 0xbe,
-  0x59, 0x06, 0x5f, 0x08, 0x87, 0xc0, 0x74, 0xe3, 0x77, 0xe2, 0x63, 0x81,
-  0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11,
-  0xc4, 0xa7, 0x08, 0x35, 0xd2, 0xe1, 0x86, 0x00, 0x8d, 0xc0, 0x60, 0x96,
-  0xe1, 0x17, 0xc0, 0x21, 0xb0, 0xf3, 0x19, 0xe2, 0x33, 0x4b, 0x10, 0x0e,
-  0x46, 0xb0, 0x0f, 0x7c, 0x66, 0x09, 0xc2, 0x61, 0xa0, 0xe5, 0xd1, 0x76,
-  0x01, 0xe3, 0x05, 0xe2, 0x17, 0x04, 0x70, 0x40, 0x9d, 0x5e, 0xb8, 0x60,
-  0x98, 0x0b, 0x9e, 0xba, 0xed, 0xa9, 0xc3, 0x9d, 0x61, 0x2e, 0x5d, 0x86,
-  0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83,
-  0x4d, 0x8f, 0xce, 0x88, 0x84, 0xea, 0x68, 0x34, 0x21, 0x00, 0x46, 0x13,
-  0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0x0b, 0x25, 0x37, 0x4a, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x30, 0x51, 0x7a, 0xa3, 0x84, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0x1b, 0x25, 0x38, 0x4a, 0x88,
-  0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x16, 0x55, 0x7a, 0x23, 0x17,
-  0x0a, 0xf8, 0xe8, 0x87, 0xfc, 0x68, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6c,
-  0x96, 0x40, 0x1c, 0x06, 0x5a, 0x1e, 0xd3, 0x68, 0x05, 0x3f, 0x0c, 0x58,
-  0x81, 0x25, 0x5e, 0x41, 0x08, 0x07, 0x3f, 0x0c, 0x60, 0x61, 0x96, 0x61,
-  0x1c, 0xca, 0xa1, 0x3e, 0x86, 0x23, 0xf4, 0x03, 0x8c, 0x86, 0xef, 0xf6,
-  0x63, 0x98, 0xe1, 0x86, 0x60, 0x85, 0xc8, 0xa0, 0x86, 0x40, 0x87, 0x23,
-  0xf6, 0x83, 0x8c, 0x86, 0xaf, 0x02, 0x41, 0xaf, 0x3f, 0x86, 0x19, 0x6e,
-  0x08, 0x5c, 0x88, 0x0c, 0x2a, 0x18, 0x74, 0x96, 0x81, 0x1c, 0xf2, 0x21,
-  0x38, 0xf9, 0x19, 0xe6, 0x46, 0x66, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41,
-  0x30, 0xd8, 0x64, 0xe9, 0x8f, 0x78, 0xa8, 0x95, 0x46, 0x13, 0x02, 0x60,
-  0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x38,
-  0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb0, 0x5c, 0x32, 0xa5, 0x83,
-  0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0xd3, 0xa5, 0x53, 0x62,
-  0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb0, 0x5d, 0x42, 0x25,
-  0x89, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x11, 0xa7, 0x53,
-  0x32, 0xa3, 0x80, 0x96, 0xee, 0xc8, 0x96, 0x46, 0x13, 0x02, 0xe0, 0x82,
-  0xc7, 0x66, 0x09, 0xf2, 0x61, 0xb8, 0xa1, 0x45, 0x74, 0x09, 0x0c, 0x66,
-  0x19, 0xcc, 0xe1, 0x1c, 0x82, 0x0a, 0x23, 0x55, 0x82, 0x0b, 0x9e, 0x1a,
-  0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x8a, 0x9c, 0x56, 0x89, 0x46, 0xf4,
-  0x68, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0x72, 0x5a, 0xa5, 0x40,
-  0xb8, 0x60, 0x98, 0x22, 0xa3, 0x57, 0x82, 0x0b, 0x9e, 0x1a, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x03, 0x2a, 0x9d, 0x60, 0xc9, 0x46, 0xfe, 0x68, 0xc4,
-  0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0x75, 0x82, 0xa5, 0x40, 0xb8, 0x60,
-  0x98, 0x0b, 0x9e, 0xba, 0xe3, 0xa9, 0x6b, 0xa1, 0x61, 0xce, 0x67, 0x86,
-  0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83,
-  0xed, 0x9d, 0x78, 0x29, 0x8f, 0xd4, 0x69, 0x34, 0x21, 0x00, 0x46, 0x13,
-  0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0xb3, 0xa7, 0x71, 0x4a, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb0, 0x7b, 0x22, 0xa7, 0x84, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0xc3, 0xa7, 0x72, 0x4a, 0x88,
-  0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x96, 0x7f, 0x22, 0xa7, 0x51,
-  0x0a, 0xe2, 0x89, 0x96, 0xe6, 0x69, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6c,
-  0x96, 0x20, 0x1f, 0x86, 0x1b, 0xd4, 0xc4, 0x9e, 0xc0, 0x60, 0x96, 0x01,
-  0x1d, 0xf2, 0x21, 0xb0, 0x3a, 0xba, 0xa3, 0xf8, 0x0c, 0x47, 0xc0, 0x09,
-  0x1e, 0x11, 0xdf, 0x2c, 0x43, 0x3a, 0xb0, 0x43, 0x60, 0x79, 0x14, 0x27,
-  0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86,
-  0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0x48, 0xe9, 0x70, 0x43, 0xe0, 0x4f,
-  0x60, 0x30, 0xcb, 0xa0, 0x0e, 0xeb, 0x10, 0xd8, 0x10, 0x4a, 0xf0, 0x99,
-  0x25, 0x80, 0x07, 0x03, 0x25, 0x22, 0x3e, 0xb3, 0x04, 0xf0, 0x30, 0x1c,
-  0xb1, 0x27, 0xa1, 0x24, 0x7c, 0xb3, 0x0c, 0xed, 0x00, 0x0f, 0x81, 0xf1,
-  0x89, 0x28, 0xc5, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53,
-  0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x2b, 0xa5, 0xc3, 0x0d,
-  0x41, 0x4a, 0x81, 0xc1, 0x2c, 0x83, 0x3b, 0xbc, 0x43, 0x60, 0xaa, 0x34,
-  0xc4, 0x67, 0x96, 0x00, 0x1e, 0x8c, 0x68, 0x25, 0xf8, 0xcc, 0x12, 0xc0,
-  0xc3, 0x40, 0xcb, 0xa3, 0xa9, 0x03, 0xb6, 0x0e, 0x84, 0x3b, 0x08, 0xef,
-  0xc0, 0x52, 0xec, 0x70, 0xc1, 0x30, 0xc6, 0x4a, 0xb0, 0x14, 0x9f, 0xe1,
-  0x08, 0x53, 0x89, 0x25, 0xe2, 0x9b, 0x65, 0x88, 0x07, 0x7a, 0x08, 0x4c,
-  0x96, 0x4e, 0x25, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e,
-  0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xc8, 0x29, 0x1d, 0x6e,
-  0x08, 0x6e, 0x0a, 0x0c, 0x66, 0x19, 0xe4, 0x61, 0x1e, 0x02, 0x1b, 0x74,
-  0x09, 0x3e, 0xb3, 0x04, 0xf8, 0x60, 0xb7, 0x44, 0xc4, 0x67, 0x96, 0x00,
-  0x1f, 0x86, 0x23, 0x62, 0x05, 0x97, 0x84, 0x6f, 0x96, 0xa1, 0x1e, 0xf0,
-  0x21, 0x30, 0x59, 0xc9, 0xa5, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61,
-  0x2e, 0x78, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x82, 0xac,
-  0x74, 0xb8, 0x21, 0x10, 0x2b, 0x30, 0x98, 0x65, 0xb0, 0x87, 0x7b, 0x08,
-  0x2c, 0x9c, 0x86, 0xf8, 0xcc, 0x12, 0xe0, 0x83, 0x11, 0xe6, 0x04, 0x9f,
-  0x59, 0x02, 0x7c, 0x18, 0x68, 0x79, 0x34, 0x79, 0xc0, 0xe6, 0x81, 0xb0,
-  0x07, 0xe1, 0x1e, 0xf0, 0x8a, 0x1e, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x6e,
-  0x7b, 0xea, 0x64, 0x69, 0x98, 0x1b, 0x9f, 0x61, 0x8e, 0x18, 0xe6, 0x88,
-  0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xa3, 0xab, 0xb0, 0xf2,
-  0xa7, 0xb7, 0x1a, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18,
-  0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0xc0, 0xf6, 0x0a, 0xad, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x03, 0x8c, 0xaf, 0xd2, 0x2a, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0xc0, 0xfa, 0x4a, 0xad, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00,
-  0x10, 0x04, 0x83, 0x85, 0xb4, 0xd2, 0x0a, 0xa5, 0x02, 0xbb, 0xca, 0x29,
-  0xbc, 0x1a, 0x4d, 0x08, 0x80, 0x0b, 0x1e, 0x9b, 0x25, 0xc8, 0x87, 0x81,
-  0x96, 0xc7, 0x34, 0xc8, 0x41, 0x4e, 0x83, 0x71, 0x60, 0x09, 0x73, 0x10,
-  0xf0, 0x41, 0x4e, 0x83, 0x73, 0x98, 0x65, 0xd0, 0x07, 0x7e, 0x78, 0x97,
-  0xe1, 0x08, 0x79, 0xd1, 0xa9, 0xe1, 0xbb, 0x79, 0x19, 0x66, 0xb8, 0x21,
-  0x28, 0x29, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x7a, 0xf1, 0xa9, 0xe1,
-  0xab, 0x40, 0xd0, 0xbb, 0x97, 0x61, 0x86, 0x1b, 0x02, 0x94, 0x22, 0x83,
-  0x0a, 0x06, 0x9d, 0x65, 0xd8, 0x07, 0x98, 0x08, 0x8e, 0x9d, 0x86, 0xb9,
-  0xfe, 0x19, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x36, 0xd6, 0xca,
-  0x2b, 0x9b, 0x3a, 0xad, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1,
-  0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x03, 0x6c, 0xb6, 0x40, 0xeb, 0x20, 0x82, 0x11, 0x03, 0x04,
-  0x00, 0x41, 0x30, 0xc0, 0x68, 0x2b, 0xb4, 0x18, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x03, 0xac, 0xb6, 0x44, 0x4b, 0x22, 0x82, 0x11, 0x03,
-  0x05, 0x00, 0x41, 0x30, 0x58, 0x78, 0x2b, 0xb4, 0xc0, 0x2a, 0x70, 0xad,
-  0xb8, 0x82, 0xad, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x02, 0x98,
-  0x18, 0x6e, 0x38, 0x19, 0xda, 0x02, 0x83, 0x59, 0x86, 0x7e, 0xf0, 0x87,
-  0xa0, 0x76, 0x8a, 0xb4, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x80, 0xf2, 0xad, 0xd2, 0x62, 0x19, 0xba, 0x1a, 0x31, 0x38, 0x00,
-  0x10, 0x04, 0x03, 0xea, 0xb7, 0x4a, 0x2b, 0x10, 0x2e, 0x18, 0xa6, 0x7c,
-  0x2a, 0xb5, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80,
-  0x1a, 0x2f, 0xd5, 0x82, 0x99, 0xbc, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x03, 0x8a, 0xbc, 0x54, 0x2b, 0x10, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0xee,
-  0x78, 0xea, 0x4e, 0x6a, 0x98, 0xc3, 0xa1, 0x61, 0x8e, 0x18, 0xe6, 0x88,
-  0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x4b, 0x2f, 0xdb, 0x9a,
-  0x2b, 0xf2, 0x1a, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18,
-  0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0xc0, 0xe0, 0xab, 0xb7, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x03, 0x2c, 0xbe, 0x7c, 0x2b, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0xc0, 0xe4, 0xeb, 0xb7, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00,
-  0x10, 0x04, 0x83, 0x25, 0xbf, 0x7c, 0xab, 0xaf, 0x82, 0xf5, 0x72, 0xad,
-  0xf6, 0x1a, 0x4d, 0x08, 0x80, 0x0b, 0x1e, 0x9b, 0x25, 0x80, 0x89, 0xe1,
-  0x06, 0xb2, 0x81, 0x2f, 0x30, 0x98, 0x65, 0xf8, 0x07, 0x98, 0x08, 0xec,
-  0xad, 0xe2, 0x2a, 0x3e, 0xc3, 0x11, 0x68, 0x23, 0x57, 0xc4, 0x37, 0xcb,
-  0x00, 0x12, 0x23, 0x11, 0xd8, 0x5c, 0xa5, 0x4d, 0x7c, 0x2c, 0x18, 0xe8,
-  0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8,
-  0x14, 0xa1, 0x5f, 0x3a, 0xdc, 0x10, 0xe0, 0x17, 0x18, 0xcc, 0x32, 0x84,
-  0x84, 0x48, 0x04, 0x36, 0xec, 0x15, 0x7c, 0x66, 0x09, 0x4e, 0xc2, 0xf4,
-  0x8a, 0x88, 0xcf, 0x2c, 0xc1, 0x49, 0x0c, 0x47, 0xcc, 0xcd, 0x5e, 0x09,
-  0xdf, 0x2c, 0x03, 0x49, 0x9c, 0x44, 0x60, 0x74, 0xc3, 0x57, 0xf1, 0xb1,
-  0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac,
-  0x08, 0xe2, 0x53, 0x44, 0x89, 0xe9, 0x70, 0x43, 0x30, 0x62, 0x60, 0x30,
-  0xcb, 0x50, 0x12, 0x26, 0x11, 0x18, 0x69, 0x0d, 0xf1, 0x99, 0x25, 0x38,
-  0x09, 0x23, 0x4e, 0x0b, 0x3e, 0xb3, 0x04, 0x27, 0x31, 0xd0, 0xf2, 0x68,
-  0x21, 0x81, 0x89, 0x04, 0x51, 0x12, 0x82, 0x49, 0xf0, 0xdb, 0x48, 0x5c,
-  0x30, 0x8c, 0x99, 0x96, 0x6a, 0xc5, 0x67, 0x38, 0x02, 0x74, 0x56, 0x8b,
-  0xf8, 0x66, 0x19, 0x50, 0x62, 0x25, 0x02, 0x63, 0xad, 0xd0, 0x89, 0x8f,
-  0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63,
-  0x45, 0x10, 0x9f, 0x22, 0x66, 0x4c, 0x87, 0x1b, 0x82, 0x18, 0x03, 0x83,
-  0x59, 0x86, 0x94, 0x50, 0x89, 0xc0, 0x06, 0xda, 0x82, 0xcf, 0x2c, 0xc1,
-  0x4b, 0x58, 0x6c, 0x11, 0xf1, 0x99, 0x25, 0x78, 0x89, 0xe1, 0x88, 0xd5,
-  0x91, 0x2d, 0xe1, 0x9b, 0x65, 0x60, 0x89, 0x97, 0x08, 0x8c, 0x75, 0x66,
-  0x2b, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20,
-  0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xf0, 0x31, 0x1d, 0x6e, 0x08, 0x78,
-  0x0c, 0x0c, 0x66, 0x19, 0x5a, 0xc2, 0x25, 0x02, 0xdb, 0xad, 0x21, 0x3e,
-  0xb3, 0x04, 0x2f, 0x61, 0x04, 0x78, 0xc1, 0x67, 0x96, 0xe0, 0x25, 0x06,
-  0x5a, 0x1e, 0x2d, 0x25, 0x30, 0x95, 0x20, 0x5a, 0x42, 0x70, 0x09, 0xb6,
-  0x5b, 0x89, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e, 0x3a, 0xd6, 0x1a,
-  0xe6, 0x7a, 0x69, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03,
-  0x00, 0x41, 0x30, 0xd8, 0xdc, 0x6c, 0xc7, 0xf0, 0x2b, 0xcd, 0x46, 0x13,
-  0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18,
-  0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb0, 0x3a, 0x13,
-  0xb3, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0xb3, 0xb3,
-  0x31, 0x4b, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb0, 0x3b,
-  0x23, 0xb3, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0xf1,
-  0xb3, 0x31, 0x13, 0xb1, 0x00, 0xce, 0x66, 0x4c, 0xce, 0x46, 0x13, 0x02,
-  0xe0, 0x82, 0xc7, 0x66, 0x09, 0x60, 0x62, 0xa0, 0xe5, 0x31, 0x8d, 0x7d,
-  0x30, 0xe3, 0x40, 0x1f, 0x58, 0xa2, 0x1f, 0x84, 0x97, 0x30, 0xe3, 0xc0,
-  0x1f, 0x46, 0x0c, 0x0c, 0x00, 0x04, 0xc1, 0x00, 0x02, 0xb5, 0x1f, 0x7b,
-  0x27, 0xb3, 0x0f, 0x78, 0x89, 0x8f, 0x09, 0x81, 0x7c, 0x2c, 0x90, 0x17,
-  0xf8, 0x58, 0xf1, 0x0f, 0xf1, 0xb1, 0x22, 0x90, 0x8f, 0x05, 0x21, 0x01,
-  0x9f, 0x11, 0x03, 0x03, 0x00, 0x41, 0x30, 0x80, 0x4e, 0xcd, 0xcc, 0xea,
-  0xc9, 0x84, 0x22, 0x3e, 0x16, 0x08, 0xf2, 0xb1, 0xe0, 0x80, 0xcf, 0x05,
-  0x06, 0x8d, 0x18, 0x38, 0x00, 0x08, 0x82, 0x41, 0xd3, 0x6a, 0x6a, 0x46,
-  0x63, 0x2d, 0x06, 0x6a, 0x01, 0x99, 0x91, 0x19, 0x99, 0x8d, 0x99, 0xa8,
-  0xcd, 0x12, 0x8c, 0xd0, 0x70, 0x83, 0x5f, 0x89, 0x1a, 0x18, 0xcc, 0x32,
-  0xc8, 0xc4, 0x08, 0x05, 0x23, 0x06, 0x06, 0x00, 0x82, 0x60, 0x00, 0xb5,
-  0x5a, 0x9b, 0xf1, 0x93, 0x05, 0x3d, 0x06, 0x9f, 0x11, 0x03, 0x03, 0x00,
-  0x41, 0x30, 0x80, 0x5e, 0xed, 0xcd, 0xfa, 0xc9, 0x82, 0x1f, 0x83, 0xcf,
-  0x68, 0x02, 0x8d, 0x0d, 0xc3, 0x0d, 0x81, 0xa9, 0x81, 0xc1, 0x2c, 0xc3,
-  0x4c, 0xd4, 0x44, 0x30, 0x1c, 0x51, 0x9c, 0xd9, 0xf0, 0x9d, 0x31, 0xcc,
-  0x70, 0x43, 0x20, 0x63, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0xc7, 0x9a,
-  0x0d, 0x5f, 0x05, 0x82, 0x5e, 0x32, 0xcc, 0x70, 0x43, 0x50, 0x63, 0x64,
-  0x50, 0xc1, 0xa0, 0xb3, 0x0c, 0x34, 0x91, 0x16, 0xc1, 0xe5, 0xd7, 0x30,
-  0xa7, 0x52, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x96, 0x6b,
-  0xa6, 0x36, 0x66, 0xb4, 0x36, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30,
-  0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0x80, 0x81, 0x5b, 0xab, 0x1d, 0x44, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0x58, 0xb8, 0xb9, 0x1a, 0x43, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x80, 0x89, 0xdb, 0xab, 0x49, 0x44, 0x30, 0x62,
-  0xa0, 0x00, 0x20, 0x08, 0x06, 0x4b, 0xba, 0xb9, 0x5a, 0x9b, 0x05, 0xbb,
-  0xe6, 0x67, 0xbd, 0x36, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x36, 0x4b, 0x90,
-  0x16, 0xc3, 0x0d, 0x59, 0xb8, 0x81, 0xc1, 0x2c, 0x83, 0x4d, 0xdc, 0x44,
-  0x50, 0x68, 0x16, 0x6b, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82,
-  0x60, 0x40, 0xad, 0x9b, 0xac, 0x7d, 0xa1, 0x36, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0x14, 0xbb, 0xc9, 0x5a, 0x20, 0x5c, 0x30, 0x4c, 0xad, 0x99,
-  0xad, 0xc1, 0x05, 0x4f, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x05,
-  0x6f, 0xb7, 0x26, 0x06, 0xa6, 0x36, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x54, 0xbc, 0xdd, 0x5a, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf1,
-  0xd4, 0xd1, 0xd8, 0x30, 0x57, 0x56, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3,
-  0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x66, 0x6f, 0xe3, 0x06, 0x6a,
-  0xf1, 0x36, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08,
-  0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0x80, 0xf5, 0x9b, 0xba, 0x25, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x98, 0xbf, 0xad, 0x5b, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0x80, 0xfd, 0x1b, 0xbb, 0x25, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20,
-  0x08, 0x06, 0x8b, 0xc9, 0xad, 0x9b, 0xaa, 0x05, 0xf8, 0xb6, 0x6b, 0xfa,
-  0x36, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x36, 0x4b, 0x90, 0x16, 0xc3, 0x0d,
-  0x76, 0xd0, 0x6f, 0x60, 0x30, 0xcb, 0x80, 0x13, 0x69, 0x11, 0x18, 0x9f,
-  0xf9, 0x59, 0x7c, 0x86, 0x23, 0xf6, 0xe0, 0xcf, 0x88, 0x6f, 0x96, 0x21,
-  0x27, 0x78, 0x22, 0x30, 0x50, 0xe3, 0x83, 0xf8, 0x58, 0x30, 0xd0, 0xe7,
-  0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29,
-  0xe2, 0xe4, 0x74, 0xb8, 0x21, 0x28, 0x39, 0x30, 0x98, 0x65, 0xd0, 0x89,
-  0x9d, 0x08, 0x6c, 0x40, 0x35, 0xf8, 0xcc, 0x12, 0x80, 0x85, 0x9d, 0x1a,
-  0x11, 0x9f, 0x59, 0x02, 0xb0, 0x18, 0x8e, 0x30, 0x05, 0x54, 0x13, 0xbe,
-  0x59, 0x86, 0x9e, 0x00, 0x8b, 0xc0, 0x4e, 0x21, 0xd5, 0xe2, 0x63, 0x81,
-  0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11,
-  0xc4, 0xa7, 0x08, 0x99, 0xd3, 0xe1, 0x86, 0x00, 0xe6, 0xc0, 0x60, 0x96,
-  0xc1, 0x27, 0x7e, 0x22, 0xb0, 0x58, 0x1b, 0xe2, 0x33, 0x4b, 0x00, 0x16,
-  0x46, 0xd0, 0x1a, 0x7c, 0x66, 0x09, 0xc0, 0x62, 0xa0, 0xe5, 0xd1, 0x74,
-  0x02, 0xdb, 0x09, 0xc2, 0x27, 0x84, 0x9f, 0x10, 0x0b, 0x9e, 0xb8, 0x60,
-  0x18, 0x9b, 0xb5, 0x5b, 0x8b, 0xcf, 0x70, 0x84, 0x2c, 0xe0, 0x1a, 0xf1,
-  0xcd, 0x32, 0x84, 0x05, 0x59, 0x04, 0x96, 0x6b, 0xb3, 0x10, 0x1f, 0x0b,
-  0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a,
-  0x20, 0x3e, 0x45, 0x80, 0x9d, 0x0e, 0x37, 0x04, 0x3e, 0x07, 0x06, 0xb3,
-  0x0c, 0x62, 0x31, 0x16, 0x81, 0x0d, 0xe1, 0x06, 0x9f, 0x59, 0x02, 0xb4,
-  0x30, 0x5f, 0x23, 0xe2, 0x33, 0x4b, 0x80, 0x16, 0xc3, 0x11, 0xbd, 0xf0,
-  0x6b, 0xc2, 0x37, 0xcb, 0x50, 0x16, 0x68, 0x11, 0x98, 0x2f, 0x80, 0x5b,
-  0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xb1, 0x76, 0x3a, 0xdc, 0x10, 0xa4, 0x1d,
-  0x18, 0xcc, 0x32, 0x98, 0xc5, 0x59, 0x04, 0x86, 0x6e, 0x43, 0x7c, 0x66,
-  0x09, 0xd0, 0xc2, 0x88, 0x76, 0x83, 0xcf, 0x2c, 0x01, 0x5a, 0x0c, 0xb4,
-  0x3c, 0x9a, 0x58, 0x60, 0x63, 0x41, 0x98, 0x85, 0x70, 0x16, 0xb0, 0x41,
-  0x16, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0xb7, 0x3d, 0x75, 0xb9, 0x36, 0xcc,
-  0xa9, 0xd7, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00,
-  0x82, 0x60, 0xb0, 0xed, 0x1d, 0xda, 0x95, 0x9c, 0xdd, 0x8d, 0x26, 0x04,
-  0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14,
-  0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0xa2, 0xf7, 0x76,
-  0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x36, 0x7a, 0x70,
-  0x97, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0xa4, 0x17,
-  0x77, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0xb2, 0x7a,
-  0x70, 0xf7, 0x72, 0x41, 0xdf, 0x81, 0xdd, 0xdf, 0x8d, 0x26, 0x04, 0xc0,
-  0x05, 0x8f, 0xcd, 0x12, 0xa4, 0xc5, 0x40, 0xcb, 0x63, 0x1a, 0x34, 0x01,
-  0xea, 0xc1, 0x4c, 0xb0, 0x84, 0x4d, 0x08, 0x68, 0x01, 0xea, 0xc1, 0x4d,
-  0xcc, 0x32, 0xa8, 0x05, 0x5b, 0xec, 0xc3, 0x70, 0x04, 0x48, 0x84, 0xdd,
-  0xf0, 0x5d, 0x48, 0x0c, 0x33, 0xdc, 0x10, 0xb0, 0x1c, 0x19, 0xd4, 0x10,
-  0xe8, 0x70, 0x44, 0x48, 0x94, 0xdd, 0xf0, 0x55, 0x20, 0xe8, 0x8d, 0xc4,
-  0x30, 0xc3, 0x0d, 0xc1, 0xcb, 0x91, 0x41, 0x05, 0x83, 0xce, 0x32, 0xac,
-  0x05, 0x68, 0x04, 0x37, 0x6f, 0xc3, 0x1c, 0x89, 0x0d, 0x33, 0x62, 0x70,
-  0x00, 0x20, 0x08, 0x06, 0xdb, 0xec, 0x81, 0x5e, 0xcf, 0xb9, 0xde, 0x68,
-  0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10,
-  0x43, 0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xa6, 0x7b,
-  0xa7, 0x77, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0xbb,
-  0x87, 0x7a, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xc6,
-  0x7b, 0xa9, 0x27, 0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c,
-  0xe3, 0x87, 0x7a, 0x67, 0x17, 0xd4, 0x1e, 0xde, 0xdd, 0xde, 0x68, 0x42,
-  0x00, 0x5c, 0xf0, 0xd8, 0x2c, 0x01, 0x68, 0x0c, 0x37, 0xcc, 0xc4, 0xee,
-  0x81, 0xc1, 0x2c, 0x43, 0x5b, 0xb8, 0x45, 0x50, 0x62, 0xb7, 0x7a, 0x70,
-  0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0x95, 0x1f, 0xeb,
-  0xe9, 0xc4, 0xde, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x65, 0x7e,
-  0xac, 0x17, 0x08, 0x17, 0x0c, 0x53, 0x65, 0x07, 0x7b, 0x70, 0xc1, 0x53,
-  0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xa9, 0x5f, 0xec, 0xf1, 0x04,
-  0xe8, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0xb5, 0x7e, 0xb1, 0x17,
-  0x08, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x77, 0x3c, 0x75, 0x2e, 0x37, 0xcc,
-  0xfd, 0xd8, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00,
-  0x82, 0x60, 0xb0, 0xc1, 0x5f, 0xef, 0xe9, 0xdd, 0xfa, 0x8d, 0x26, 0x04,
-  0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14,
-  0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0xf7, 0x47, 0x7e,
-  0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x86, 0x7f, 0xe5,
-  0x97, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0xf9, 0x67,
-  0x7e, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x02, 0x82,
-  0x41, 0xf9, 0x91, 0x5e, 0x20, 0x7f, 0xb5, 0x47, 0x7f, 0xa3, 0x09, 0x01,
-  0x70, 0xc1, 0x63, 0xb3, 0x04, 0xa0, 0x31, 0xdc, 0x00, 0x17, 0xf7, 0x07,
-  0x06, 0xb3, 0x0c, 0x6f, 0x01, 0x1a, 0x81, 0xd9, 0x1d, 0xde, 0xc5, 0x67,
-  0x38, 0xc2, 0x2e, 0xf2, 0x8e, 0xf8, 0x66, 0x19, 0xe0, 0x62, 0x2e, 0x02,
-  0xd3, 0xbb, 0xbb, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82,
-  0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x42, 0x30, 0xd0,
-  0xe1, 0x86, 0xe0, 0xff, 0xc0, 0x60, 0x96, 0x21, 0x2e, 0xe4, 0x22, 0xb0,
-  0x41, 0xf4, 0xe0, 0x33, 0x4b, 0x70, 0x17, 0x16, 0x7a, 0x44, 0x7c, 0x66,
-  0x09, 0xee, 0x62, 0x38, 0x22, 0x34, 0x44, 0x4f, 0xf8, 0x66, 0x19, 0xe8,
-  0xe2, 0x2e, 0x02, 0x13, 0x8d, 0xd1, 0x8b, 0x8f, 0x05, 0x0e, 0x7d, 0x2e,
-  0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22,
-  0x58, 0x30, 0xd0, 0xe1, 0x86, 0x40, 0x05, 0x03, 0x30, 0x98, 0x65, 0xa8,
-  0x0b, 0xbb, 0x08, 0x6c, 0xf5, 0x86, 0xf8, 0xcc, 0x12, 0xdc, 0x85, 0x11,
-  0xae, 0x07, 0x9f, 0x59, 0x82, 0xbb, 0x18, 0x68, 0x79, 0xb4, 0xb8, 0xc0,
-  0xe4, 0x82, 0xa8, 0x0b, 0xc1, 0x2e, 0x70, 0x66, 0x2e, 0x2e, 0x18, 0xc6,
-  0x5a, 0x2f, 0xf6, 0xe2, 0x33, 0x1c, 0xc1, 0x1a, 0xb2, 0x47, 0x7c, 0xb3,
-  0x0c, 0x78, 0xb1, 0x17, 0x81, 0xcd, 0x5e, 0x6b, 0xc4, 0xc7, 0x82, 0x81,
-  0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88,
-  0x4f, 0x11, 0x3a, 0x18, 0xe8, 0x70, 0x43, 0x80, 0x83, 0x01, 0x18, 0xcc,
-  0x32, 0xe4, 0x85, 0x5e, 0x04, 0x36, 0xec, 0x1e, 0x7c, 0x66, 0x09, 0xfe,
-  0xc2, 0x70, 0x8f, 0x88, 0xcf, 0x2c, 0xc1, 0x5f, 0x0c, 0x47, 0xdc, 0x46,
-  0xee, 0x09, 0xdf, 0x2c, 0x03, 0x5f, 0xfc, 0x45, 0x60, 0xb8, 0xa1, 0x7b,
-  0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91,
-  0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0x19, 0x06, 0x3a, 0xdc, 0x10, 0x8c,
-  0x61, 0x00, 0x06, 0xb3, 0x0c, 0x7d, 0xe1, 0x17, 0x81, 0x89, 0xdf, 0x10,
-  0x9f, 0x59, 0x82, 0xbf, 0x30, 0xe2, 0xfc, 0xe0, 0x33, 0x4b, 0xf0, 0x17,
-  0x03, 0x2d, 0x8f, 0x96, 0x17, 0x98, 0x5e, 0x10, 0x7d, 0x21, 0xf8, 0x05,
-  0xe9, 0xec, 0xc5, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x6d, 0x4f, 0xdd, 0xec,
-  0x0d, 0x73, 0xe4, 0x36, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1,
-  0x01, 0x80, 0x20, 0x18, 0x6c, 0x75, 0x18, 0x88, 0x61, 0xf0, 0x7f, 0x70,
-  0x18, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2,
-  0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x60, 0x7c, 0x18, 0xa4, 0x61, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x60, 0x7d, 0x18, 0xa8, 0x61, 0x90, 0x10, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x60, 0x7e, 0x18, 0xac, 0x61, 0x90, 0x10, 0xc1,
-  0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0xa5, 0x18, 0xa8, 0x61, 0x90,
-  0x82, 0x41, 0x70, 0x87, 0x81, 0x0e, 0x06, 0x79, 0x18, 0x8c, 0x26, 0x04,
-  0xc0, 0x05, 0x8f, 0xcd, 0x12, 0x80, 0xc6, 0x40, 0xcb, 0x63, 0x1a, 0x6b,
-  0x41, 0x87, 0x82, 0x5a, 0xb0, 0x44, 0x5b, 0x08, 0x7f, 0x41, 0x87, 0x82,
-  0x5b, 0x98, 0x7e, 0xe8, 0x60, 0x00, 0x9f, 0x59, 0x86, 0xd0, 0x18, 0x0d,
-  0xfb, 0x18, 0x8e, 0x08, 0x78, 0x30, 0x18, 0xbe, 0x13, 0x86, 0x19, 0x6e,
-  0x08, 0x4e, 0x30, 0x20, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0xe0, 0x0f, 0x30,
-  0x0c, 0x86, 0xaf, 0x02, 0x41, 0xcf, 0x3f, 0x86, 0x19, 0x6e, 0x08, 0x54,
-  0x30, 0x20, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0x10, 0x8d, 0xdb, 0x08, 0xce,
-  0xfd, 0x86, 0xb9, 0x7f, 0x1b, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0x36, 0x57, 0x0c, 0xf6, 0x30, 0xc0, 0xc1, 0x20, 0x15, 0x83, 0xd1, 0x84,
-  0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86,
-  0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xac, 0x16, 0x03,
-  0x51, 0x0c, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xcc,
-  0x16, 0x83, 0x51, 0x0c, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0xec, 0x16, 0x03, 0x52, 0x0c, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00,
-  0x10, 0x04, 0x83, 0xc5, 0x17, 0x83, 0x51, 0x0c, 0xc4, 0x30, 0x08, 0x60,
-  0x31, 0x98, 0xc3, 0x40, 0x16, 0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1,
-  0x59, 0x82, 0xdb, 0x18, 0x6e, 0x70, 0x11, 0x5b, 0x0c, 0xc0, 0x60, 0x96,
-  0x81, 0x34, 0x4a, 0x23, 0xa8, 0x1e, 0x0c, 0x4c, 0x31, 0x80, 0x0b, 0x9e,
-  0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x0a, 0x1c, 0x83, 0x53, 0x0c,
-  0x36, 0x3b, 0x0c, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x0a, 0xc7,
-  0xe0, 0x14, 0x83, 0x40, 0xb8, 0x60, 0x98, 0x02, 0xc3, 0x60, 0x15, 0x03,
-  0xb8, 0xe0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xca, 0x31,
-  0x60, 0xc5, 0xe0, 0x46, 0xf6, 0x30, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x03, 0xca, 0x1c, 0x03, 0x56, 0x0c, 0x02, 0xe1, 0x82, 0x61, 0x2e, 0x78,
-  0xea, 0x8e, 0xa7, 0x2e, 0x05, 0x83, 0x61, 0x4e, 0xe7, 0x86, 0x39, 0x62,
-  0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x6d, 0x1d,
-  0x03, 0x5c, 0x0c, 0xea, 0x30, 0x30, 0xc7, 0x60, 0x34, 0x21, 0x00, 0x46,
-  0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0x93, 0xc7, 0xe0, 0x17, 0x83,
-  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0x9b, 0xc7, 0x00,
-  0x1c, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0xa3,
-  0xc7, 0x20, 0x1c, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1,
-  0x60, 0xd9, 0xc7, 0x00, 0x1c, 0x83, 0x3f, 0x0c, 0x82, 0x76, 0x0c, 0x60,
-  0x31, 0x78, 0xc7, 0x60, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6c, 0x96, 0xe0,
-  0x36, 0x86, 0x1b, 0xd6, 0x44, 0x1e, 0x03, 0x30, 0x98, 0x65, 0x30, 0x8d,
-  0xdb, 0x08, 0x2c, 0x0e, 0x83, 0x39, 0x0c, 0xe2, 0x33, 0x1c, 0x71, 0x07,
-  0x74, 0x18, 0x10, 0xdf, 0x2c, 0xc3, 0x69, 0xa8, 0x46, 0x60, 0x75, 0x18,
-  0xe0, 0x41, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65,
-  0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xc1, 0x8f, 0x81, 0x0e, 0x37,
-  0x04, 0xfa, 0x18, 0x80, 0xc1, 0x2c, 0x03, 0x6a, 0xa4, 0x46, 0x60, 0x43,
-  0x1f, 0x06, 0xf0, 0x99, 0x25, 0x70, 0x0d, 0xe3, 0xc3, 0x80, 0x88, 0xcf,
-  0x2c, 0x81, 0x6b, 0x0c, 0x47, 0x88, 0x42, 0x1f, 0x06, 0xc2, 0x37, 0xcb,
-  0xb0, 0x1a, 0xae, 0x11, 0xd8, 0x28, 0xf8, 0x61, 0x10, 0x1f, 0x0b, 0x1c,
-  0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20,
-  0x3e, 0x45, 0x9c, 0x64, 0xa0, 0xc3, 0x0d, 0x41, 0x49, 0x06, 0x60, 0x30,
-  0xcb, 0xc0, 0x1a, 0xad, 0x11, 0x98, 0x29, 0x06, 0x43, 0x7c, 0x66, 0x09,
-  0x5c, 0xc3, 0x88, 0x54, 0x0c, 0xe0, 0x33, 0x4b, 0xe0, 0x1a, 0x03, 0x2d,
-  0x8f, 0x86, 0x1a, 0x58, 0x6a, 0x10, 0xac, 0x21, 0xb4, 0x86, 0x4e, 0xa8,
-  0xc6, 0x05, 0xc3, 0x18, 0x2a, 0x06, 0xac, 0x18, 0xc4, 0x67, 0x38, 0xe2,
-  0x54, 0x5a, 0x31, 0x20, 0xbe, 0x59, 0x86, 0xd7, 0x90, 0x8d, 0xc0, 0x5c,
-  0x31, 0x40, 0x95, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78,
-  0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xa2, 0x26, 0x03, 0x1d,
-  0x6e, 0x08, 0x66, 0x32, 0x00, 0x83, 0x59, 0x06, 0xd8, 0x88, 0x8d, 0xc0,
-  0x06, 0x5b, 0x0c, 0xe0, 0x33, 0x4b, 0x60, 0x1b, 0x36, 0x8b, 0x01, 0x11,
-  0x9f, 0x59, 0x02, 0xdb, 0x18, 0x8e, 0x90, 0x15, 0x5a, 0x0c, 0x84, 0x6f,
-  0x96, 0x61, 0x36, 0x6c, 0x23, 0xb0, 0x59, 0xa9, 0xc5, 0x20, 0x3e, 0x16,
-  0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15,
-  0x41, 0x7c, 0x8a, 0x00, 0xcb, 0x40, 0x87, 0x1b, 0x02, 0x9f, 0x0c, 0xc0,
-  0x60, 0x96, 0x81, 0x36, 0x6a, 0x23, 0xb0, 0x5e, 0x0c, 0x86, 0xf8, 0xcc,
-  0x12, 0xd8, 0x86, 0x11, 0xe2, 0x18, 0xc0, 0x67, 0x96, 0xc0, 0x36, 0x06,
-  0x5a, 0x1e, 0x0d, 0x36, 0xb0, 0xd8, 0x20, 0x68, 0x43, 0xa8, 0x0d, 0xbd,
-  0x92, 0x8d, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e, 0x3a, 0x57, 0x0c,
-  0x86, 0xb9, 0xdf, 0x1b, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0,
-  0x00, 0x40, 0x10, 0x0c, 0x36, 0xb8, 0x0c, 0x7a, 0x32, 0xd0, 0xc7, 0x60,
-  0x2d, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41,
-  0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0xec, 0x2e, 0x03, 0xb2, 0x0c, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x03, 0x0c, 0x2f, 0x83, 0xb2, 0x0c, 0x12, 0x22, 0x18, 0x31,
-  0x40, 0x00, 0x10, 0x04, 0x03, 0x2c, 0x2f, 0x03, 0xb3, 0x0c, 0x12, 0x22,
-  0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x05, 0x34, 0x83, 0xb2, 0x0c,
-  0x48, 0x32, 0x08, 0xe4, 0x32, 0xa8, 0xc9, 0x80, 0x2e, 0x83, 0xd1, 0x84,
-  0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x82, 0xdb, 0x18, 0x68, 0x79, 0x4c, 0x43,
-  0x34, 0xd4, 0x54, 0x08, 0x0d, 0x96, 0x20, 0x0d, 0xc1, 0x36, 0xd4, 0x54,
-  0x28, 0x0d, 0xab, 0x97, 0x95, 0x0c, 0xe0, 0x33, 0xcb, 0x80, 0x1b, 0xba,
-  0x11, 0x2f, 0xc3, 0x11, 0xc1, 0x4d, 0x06, 0xc3, 0x77, 0xc2, 0x30, 0xc3,
-  0x0d, 0x81, 0x48, 0x06, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0xf7, 0xb2,
-  0x93, 0xc1, 0xf0, 0x55, 0x20, 0xe8, 0xe5, 0xcb, 0x30, 0xc3, 0x0d, 0x41,
-  0x49, 0x06, 0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c, 0xb9, 0xe1, 0x1e, 0xc1,
-  0xa5, 0x63, 0x30, 0xcc, 0xe9, 0xdf, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82,
-  0x60, 0xb0, 0xa5, 0x66, 0x60, 0x97, 0xc1, 0x4c, 0x06, 0xa4, 0x19, 0x8c,
-  0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02,
-  0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0xb0,
-  0x19, 0xf4, 0x65, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x60, 0xb1, 0x19, 0xf8, 0x65, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x60, 0xb2, 0x19, 0xfc, 0x65, 0x20, 0x11, 0xc1, 0x88, 0x81,
-  0x02, 0x80, 0x20, 0x18, 0x2c, 0xb9, 0x19, 0xf8, 0x65, 0xd0, 0x93, 0x41,
-  0xb0, 0x9a, 0x81, 0x5b, 0x06, 0xad, 0x19, 0x8c, 0x26, 0x04, 0xc0, 0x05,
-  0x8f, 0xcd, 0x12, 0xb8, 0xc7, 0x70, 0x43, 0xca, 0xc4, 0x66, 0x00, 0x06,
-  0xb3, 0x0c, 0xbb, 0xc1, 0x1b, 0x41, 0xe1, 0x64, 0x10, 0x9a, 0x01, 0x5c,
-  0xf0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xbb, 0x19, 0x88,
-  0x66, 0xb0, 0xc5, 0x65, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x14,
-  0x6f, 0x06, 0xa2, 0x19, 0x04, 0xc2, 0x05, 0xc3, 0xd4, 0x4e, 0x06, 0xa6,
-  0x19, 0xc0, 0x05, 0x4f, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x05,
-  0x9e, 0xc1, 0x69, 0x06, 0x32, 0x63, 0x97, 0xc1, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x50, 0xe1, 0x19, 0x9c, 0x66, 0x10, 0x08, 0x17, 0x0c, 0x73,
-  0xc1, 0x53, 0x77, 0x3c, 0x75, 0x24, 0x19, 0x0c, 0x73, 0x35, 0x18, 0x0c,
-  0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x9b, 0x79, 0x06, 0xb3, 0x19, 0xc0, 0x65, 0x10, 0x9e, 0xc1, 0x68, 0x42,
-  0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43,
-  0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xd6, 0x9e, 0x81,
-  0x6e, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xe6,
-  0x9e, 0xc1, 0x6e, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0x01, 0xf6, 0x9e, 0x01, 0x6f, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00,
-  0x08, 0x82, 0xc1, 0x62, 0x9f, 0xc1, 0x6e, 0x06, 0x7a, 0x19, 0x04, 0xe8,
-  0x19, 0xac, 0x66, 0xa0, 0x9e, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd8,
-  0x2c, 0x81, 0x7b, 0x0c, 0x37, 0x98, 0x4d, 0x7b, 0x06, 0x60, 0x30, 0xcb,
-  0xd0, 0x1b, 0xee, 0x11, 0x18, 0x5b, 0x06, 0x6e, 0x19, 0xc4, 0x67, 0x38,
-  0xe2, 0x0e, 0xde, 0x32, 0x20, 0xbe, 0x59, 0x06, 0xdf, 0x08, 0x8f, 0xc0,
-  0xe0, 0x32, 0xc0, 0x83, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e,
-  0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xe2, 0x3e, 0x03,
-  0x1d, 0x6e, 0x08, 0xea, 0x33, 0x00, 0x83, 0x59, 0x86, 0xdf, 0x00, 0x8f,
-  0xc0, 0x06, 0xbc, 0x0c, 0xe0, 0x33, 0x4b, 0x50, 0x1e, 0x76, 0x97, 0x01,
-  0x11, 0x9f, 0x59, 0x82, 0xf2, 0x18, 0x8e, 0x10, 0x05, 0xbc, 0x0c, 0x84,
-  0x6f, 0x96, 0x41, 0x3c, 0xca, 0x23, 0xb0, 0x51, 0xc8, 0xcb, 0x20, 0x3e,
-  0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f,
-  0x15, 0x41, 0x7c, 0x8a, 0x10, 0xd1, 0x40, 0x87, 0x1b, 0x02, 0x10, 0x0d,
-  0xc0, 0x60, 0x96, 0x61, 0x3c, 0xc8, 0x23, 0xb0, 0xd0, 0x0c, 0x86, 0xf8,
-  0xcc, 0x12, 0x94, 0x87, 0x11, 0xa4, 0x19, 0xc0, 0x67, 0x96, 0xa0, 0x3c,
-  0x06, 0x5a, 0x1e, 0xed, 0x37, 0x30, 0xf0, 0x20, 0xc6, 0x43, 0x20, 0x0f,
-  0x9d, 0x08, 0x8f, 0x0b, 0x86, 0xb1, 0xd1, 0x0c, 0x4e, 0x33, 0x88, 0xcf,
-  0x70, 0x84, 0xe8, 0xa0, 0x66, 0x40, 0x7c, 0xb3, 0x0c, 0xe6, 0x91, 0x1e,
-  0x81, 0xa5, 0x66, 0x30, 0x3a, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3,
-  0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0x8c,
-  0x06, 0x3a, 0xdc, 0x10, 0xb8, 0x68, 0x00, 0x06, 0xb3, 0x0c, 0xe7, 0x81,
-  0x1e, 0x81, 0x0d, 0xb1, 0x19, 0xc0, 0x67, 0x96, 0xa0, 0x3d, 0xcc, 0x35,
-  0x03, 0x22, 0x3e, 0xb3, 0x04, 0xed, 0x31, 0x1c, 0xd1, 0x3a, 0xaf, 0x19,
-  0x08, 0xdf, 0x2c, 0x83, 0x7a, 0xb4, 0x47, 0x60, 0xae, 0x03, 0x9b, 0x41,
-  0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xb1, 0xa3, 0x81, 0x0e, 0x37, 0x04, 0x39,
-  0x1a, 0x80, 0xc1, 0x2c, 0xc3, 0x7a, 0xb0, 0x47, 0x60, 0xb8, 0x19, 0x0c,
-  0xf1, 0x99, 0x25, 0x68, 0x0f, 0x23, 0x7a, 0x33, 0x80, 0xcf, 0x2c, 0x41,
-  0x7b, 0x0c, 0xb4, 0x3c, 0xda, 0x79, 0x60, 0xe8, 0x41, 0xac, 0x87, 0xc0,
-  0x1e, 0x70, 0x97, 0x1e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0xb7, 0x3d, 0x75,
-  0xa9, 0x19, 0x0c, 0x73, 0xba, 0x18, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c,
-  0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xdb, 0x9a, 0x06, 0x38, 0x1a,
-  0xd4, 0x67, 0x60, 0xa6, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1,
-  0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0x01, 0x26, 0xa7, 0xc1, 0x8f, 0x06, 0x09, 0x11, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x36, 0xa7, 0x01, 0x98, 0x06, 0x09,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x46, 0xa7, 0x41, 0x98,
-  0x06, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0xb2, 0xa7,
-  0x01, 0x98, 0x06, 0xff, 0x19, 0x04, 0x6d, 0x1a, 0xc0, 0x68, 0xf0, 0xa6,
-  0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd8, 0x2c, 0x81, 0x7b, 0x0c, 0xb4,
-  0x3c, 0xa6, 0x91, 0x1b, 0x60, 0x2c, 0xe0, 0x06, 0x4b, 0xec, 0x86, 0xd0,
-  0x1e, 0x60, 0x2c, 0xf0, 0xc6, 0x2c, 0xc3, 0x7b, 0xc4, 0xc7, 0xfa, 0x0c,
-  0x47, 0xbc, 0x4f, 0x8c, 0x06, 0xc3, 0x77, 0xf0, 0x33, 0xcc, 0x70, 0x43,
-  0xc0, 0x9f, 0x01, 0x19, 0xd4, 0x10, 0xe8, 0x70, 0x84, 0xfc, 0xd4, 0x68,
-  0x30, 0x7c, 0x15, 0x08, 0x7a, 0xf4, 0x33, 0xcc, 0x70, 0x43, 0xf0, 0x9f,
-  0x01, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0x03, 0x7c, 0x94, 0x48, 0x70, 0xe3,
-  0x19, 0x0c, 0x73, 0xf4, 0x18, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0xdb, 0xa8, 0x06, 0x70, 0x1a, 0xb4, 0x68, 0xe0, 0xa7, 0xc1, 0x68,
-  0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10,
-  0x43, 0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xa6, 0xaa,
-  0xc1, 0x9d, 0x06, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01,
-  0xb6, 0xaa, 0x01, 0x9e, 0x06, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0x01, 0xc6, 0xaa, 0x41, 0x9e, 0x06, 0x12, 0x11, 0x8c, 0x18, 0x28,
-  0x00, 0x08, 0x82, 0xc1, 0x32, 0xab, 0x01, 0x9e, 0x06, 0x37, 0x1a, 0x04,
-  0xa5, 0x1a, 0xa0, 0x69, 0x70, 0xaa, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0,
-  0xd8, 0x2c, 0x41, 0x89, 0x0c, 0x37, 0x8c, 0xd0, 0xaa, 0x06, 0x60, 0x30,
-  0xcb, 0x20, 0x1f, 0xf3, 0x11, 0x94, 0x8c, 0x06, 0x7b, 0x1a, 0xc0, 0x05,
-  0x4f, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x55, 0xab, 0x01, 0x9f,
-  0x06, 0x29, 0xb4, 0xa6, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50,
-  0xb6, 0x1a, 0xf0, 0x69, 0x10, 0x08, 0x17, 0x0c, 0x53, 0x35, 0x1a, 0x80,
-  0x6a, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x94,
-  0xae, 0x06, 0xa1, 0x1a, 0xb4, 0x10, 0x9c, 0x06, 0x23, 0x06, 0x07, 0x00,
-  0x82, 0x60, 0x40, 0xed, 0x6a, 0x10, 0xaa, 0x41, 0x20, 0x5c, 0x30, 0xcc,
-  0x05, 0x4f, 0xdd, 0xf1, 0xd4, 0xf9, 0x67, 0x30, 0xcc, 0xbd, 0x64, 0x30,
-  0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
-  0x6c, 0xe0, 0x1a, 0xb4, 0x6a, 0xa0, 0xa6, 0xc1, 0xae, 0x06, 0xa3, 0x09,
-  0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c,
-  0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd8, 0xb9, 0x06,
-  0xb4, 0x1a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x18,
-  0xba, 0x06, 0xb5, 0x1a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x58, 0xba, 0x06, 0xb6, 0x1a, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00,
-  0x20, 0x08, 0x06, 0x0b, 0xbc, 0x06, 0xb5, 0x1a, 0xd0, 0x69, 0x10, 0x88,
-  0x6b, 0x50, 0xaa, 0x01, 0xb9, 0x06, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63,
-  0xb3, 0x04, 0x25, 0x32, 0xdc, 0x00, 0x46, 0xe7, 0x1a, 0x80, 0xc1, 0x2c,
-  0x03, 0x7d, 0x94, 0x48, 0x60, 0x66, 0x1a, 0xa0, 0x69, 0x10, 0x9f, 0xe1,
-  0x88, 0x32, 0x4a, 0xd3, 0x80, 0xf8, 0x66, 0x19, 0xea, 0x03, 0x3f, 0x02,
-  0x53, 0xd3, 0xc0, 0x8c, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9,
-  0xe0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x78, 0x0d,
-  0x74, 0xb8, 0x21, 0x78, 0xd7, 0x00, 0x0c, 0x66, 0x19, 0xec, 0xe3, 0x3e,
-  0x02, 0x1b, 0xe4, 0x34, 0x80, 0xcf, 0x2c, 0x01, 0x7f, 0x58, 0x9c, 0x06,
-  0x44, 0x7c, 0x66, 0x09, 0xf8, 0x63, 0x38, 0x02, 0x8e, 0xe4, 0x34, 0x10,
-  0xbe, 0x59, 0x86, 0xfc, 0xe0, 0x8f, 0xc0, 0xe2, 0x68, 0x4e, 0x83, 0xf8,
-  0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x82, 0x48, 0x3e,
-  0x56, 0x04, 0xf1, 0x29, 0x82, 0x5f, 0x03, 0x1d, 0x6e, 0x08, 0xf4, 0x35,
-  0x00, 0x83, 0x59, 0x06, 0xfd, 0xd8, 0x8f, 0xc0, 0xf6, 0x34, 0x18, 0xe2,
-  0x33, 0x4b, 0xc0, 0x1f, 0x46, 0xf8, 0x69, 0x00, 0x9f, 0x59, 0x02, 0xfe,
-  0x18, 0x68, 0x79, 0x34, 0xfb, 0xc0, 0xee, 0x83, 0xd0, 0x0f, 0x61, 0x3f,
-  0xec, 0x31, 0xc0, 0x8f, 0x0b, 0x86, 0xb1, 0x3e, 0x0d, 0x42, 0x35, 0x88,
-  0xcf, 0x70, 0x44, 0x1f, 0x89, 0x6a, 0x40, 0x7c, 0xb3, 0x0c, 0xfd, 0x01,
-  0x22, 0x81, 0x8d, 0x6a, 0xe0, 0x47, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05,
-  0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84,
-  0xca, 0x06, 0x3a, 0xdc, 0x10, 0xa0, 0x6c, 0x00, 0x06, 0xb3, 0x0c, 0xfe,
-  0xf1, 0x1f, 0x81, 0x0d, 0xab, 0x1a, 0xc0, 0x67, 0x96, 0x80, 0x44, 0x0c,
-  0x55, 0x03, 0x22, 0x3e, 0xb3, 0x04, 0x24, 0x32, 0x1c, 0x81, 0x4a, 0xa9,
-  0x1a, 0x08, 0xdf, 0x2c, 0x43, 0x88, 0x90, 0x48, 0x60, 0xa9, 0xa4, 0xaa,
-  0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41,
-  0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x51, 0xb3, 0x81, 0x0e, 0x37, 0x04,
-  0x33, 0x1b, 0x80, 0xc1, 0x2c, 0x83, 0x88, 0x8c, 0x48, 0x60, 0xb2, 0x1a,
-  0x0c, 0xf1, 0x99, 0x25, 0x20, 0x11, 0x23, 0x6e, 0x35, 0x80, 0xcf, 0x2c,
-  0x01, 0x89, 0x0c, 0xb4, 0x3c, 0x9a, 0x7f, 0x60, 0xff, 0x41, 0x88, 0x88,
-  0x30, 0x22, 0x66, 0x19, 0x80, 0xc8, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x6d,
-  0x4f, 0xdd, 0xa8, 0x06, 0xc3, 0x1c, 0x6d, 0x06, 0xc3, 0x1c, 0x31, 0xcc,
-  0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x56, 0xb6, 0x81,
-  0xcc, 0x06, 0xef, 0x1a, 0x80, 0x6d, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09,
-  0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xb1, 0x6d, 0x90, 0xb3, 0x41, 0x42,
-  0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xb5, 0x6d, 0xa0, 0xb3,
-  0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xb9, 0x6d,
-  0xb0, 0xb3, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0,
-  0xd4, 0x6d, 0xa0, 0xb3, 0x41, 0xbe, 0x06, 0xc1, 0xd9, 0x06, 0x2a, 0x1b,
-  0xa4, 0x6d, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x36, 0x4b, 0x50, 0x22,
-  0x03, 0x2d, 0x8f, 0x69, 0xc0, 0x07, 0x8d, 0x0b, 0xef, 0xc1, 0x12, 0xf2,
-  0x21, 0x90, 0x08, 0x8d, 0x0b, 0xf3, 0x31, 0xcb, 0x60, 0x22, 0x28, 0x52,
-  0x4e, 0xc3, 0x11, 0xea, 0xb4, 0xb2, 0xc1, 0xf0, 0xdd, 0x3a, 0x0d, 0x33,
-  0xdc, 0x10, 0xd8, 0x6b, 0x40, 0x06, 0x35, 0x04, 0x3a, 0x1c, 0xc1, 0x4e,
-  0x2f, 0x1b, 0x0c, 0x5f, 0x05, 0x82, 0x9e, 0x3b, 0x0d, 0x33, 0xdc, 0x10,
-  0xe4, 0x6b, 0x40, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0x70, 0x22, 0x3c, 0x12,
-  0x5c, 0xaf, 0x06, 0xc3, 0x9c, 0x7b, 0x06, 0xc3, 0x8c, 0x18, 0x1c, 0x00,
-  0x08, 0x82, 0xc1, 0xd6, 0xb7, 0x81, 0xda, 0x06, 0x27, 0x1b, 0xe0, 0x6d,
-  0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3,
-  0x09, 0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80,
-  0x91, 0x6e, 0x10, 0xb7, 0xc1, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0x80, 0x95, 0x6e, 0x20, 0xb7, 0x01, 0x43, 0x04, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0x80, 0x99, 0x6e, 0x30, 0xb7, 0x81, 0x44, 0x04, 0x23,
-  0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xb4, 0x6e, 0x20, 0xb7, 0x41, 0xcc,
-  0x06, 0xc1, 0xdf, 0x06, 0x62, 0x1b, 0x84, 0x6e, 0x30, 0x9a, 0x10, 0x00,
-  0x17, 0x3c, 0x36, 0x4b, 0xc0, 0x23, 0xc3, 0x0d, 0xfd, 0x54, 0xba, 0x01,
-  0x18, 0xcc, 0x32, 0xa4, 0x88, 0x8a, 0x04, 0xc5, 0xb2, 0x41, 0xdd, 0x06,
-  0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xbd, 0x6e,
-  0x60, 0xb7, 0x01, 0x49, 0x95, 0x6d, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x14, 0xec, 0x06, 0x76, 0x1b, 0x04, 0xc2, 0x05, 0xc3, 0xd4, 0xcb,
-  0x06, 0x7a, 0x1b, 0xc0, 0x05, 0x4f, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82,
-  0x01, 0x45, 0xbb, 0xc1, 0xde, 0x06, 0x27, 0xa5, 0xb6, 0xc1, 0x88, 0xc1,
-  0x01, 0x80, 0x20, 0x18, 0x50, 0xb5, 0x1b, 0xec, 0x6d, 0x10, 0x08, 0x17,
-  0x0c, 0x73, 0xc1, 0x53, 0x77, 0x3c, 0x75, 0xf8, 0x1a, 0x0c, 0x73, 0x29,
-  0x1a, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0x9b, 0xee, 0x06, 0xa7, 0x1b, 0x90, 0x6d, 0x50, 0xbb, 0xc1,
-  0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26,
-  0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x16,
-  0xbe, 0x81, 0xeb, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0x01, 0x26, 0xbe, 0xc1, 0xeb, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0x01, 0x36, 0xbe, 0x01, 0xec, 0x06, 0x09, 0x11, 0x8c, 0x18,
-  0x28, 0x00, 0x08, 0x82, 0xc1, 0xa2, 0xbe, 0xc1, 0xeb, 0x06, 0x6e, 0x1b,
-  0x04, 0xbc, 0x1b, 0xfc, 0x6d, 0xe0, 0xbb, 0xc1, 0x68, 0x42, 0x00, 0x5c,
-  0xf0, 0xd8, 0x2c, 0x01, 0x8f, 0x0c, 0x37, 0xe8, 0x54, 0xf8, 0x06, 0x60,
-  0x30, 0xcb, 0xb0, 0x22, 0x3c, 0x12, 0x18, 0xd8, 0x06, 0x62, 0x1b, 0xc4,
-  0x67, 0x38, 0x02, 0xac, 0xc6, 0x36, 0x20, 0xbe, 0x59, 0x06, 0x16, 0x79,
-  0x91, 0xc0, 0xc8, 0x36, 0x08, 0xab, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82,
-  0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x62,
-  0x7d, 0x03, 0x1d, 0x6e, 0x08, 0xd2, 0x37, 0x00, 0x83, 0x59, 0x86, 0x16,
-  0x71, 0x91, 0xc0, 0x06, 0xb6, 0x0d, 0xe0, 0x33, 0x4b, 0x30, 0x23, 0xb6,
-  0xb6, 0x01, 0x11, 0x9f, 0x59, 0x82, 0x19, 0x19, 0x8e, 0x58, 0x2b, 0xb6,
-  0x0d, 0x84, 0x6f, 0x96, 0x01, 0x46, 0x66, 0x24, 0x30, 0xb6, 0x6a, 0xdb,
-  0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20,
-  0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xb0, 0xdf, 0x40, 0x87, 0x1b, 0x02,
-  0xfa, 0x0d, 0xc0, 0x60, 0x96, 0x21, 0x46, 0x64, 0x24, 0xb0, 0xba, 0x0d,
-  0x86, 0xf8, 0xcc, 0x12, 0xcc, 0x88, 0x11, 0x78, 0x1b, 0xc0, 0x67, 0x96,
-  0x60, 0x46, 0x06, 0x5a, 0x1e, 0xad, 0x45, 0x30, 0x17, 0x21, 0x62, 0x44,
-  0x90, 0x11, 0x76, 0x0d, 0x5e, 0xe4, 0x82, 0x61, 0xec, 0x6e, 0x83, 0xbd,
-  0x0d, 0xe2, 0x33, 0x1c, 0x71, 0x57, 0x7c, 0x1b, 0x10, 0xdf, 0x2c, 0x03,
-  0x8d, 0xdc, 0x48, 0x60, 0x7d, 0x1b, 0xe0, 0x55, 0x7c, 0x2c, 0x18, 0xe8,
-  0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8,
-  0x14, 0x41, 0xc2, 0x81, 0x0e, 0x37, 0x04, 0x22, 0x1c, 0x80, 0xc1, 0x2c,
-  0x43, 0x8d, 0xd8, 0x48, 0x60, 0x43, 0xe9, 0x06, 0xf0, 0x99, 0x25, 0xd8,
-  0x11, 0x13, 0xdd, 0x80, 0x88, 0xcf, 0x2c, 0xc1, 0x8e, 0x0c, 0x47, 0x88,
-  0xd6, 0xe8, 0x06, 0xc2, 0x37, 0xcb, 0x80, 0x23, 0x3b, 0x12, 0xd8, 0x68,
-  0x91, 0x6e, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f,
-  0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xbc, 0x70, 0xa0, 0xc3,
-  0x0d, 0x41, 0x0b, 0x07, 0x60, 0x30, 0xcb, 0x90, 0x23, 0x3a, 0x12, 0x18,
-  0xeb, 0x06, 0x43, 0x7c, 0x66, 0x09, 0x76, 0xc4, 0x88, 0xd8, 0x0d, 0xe0,
-  0x33, 0x4b, 0xb0, 0x23, 0x03, 0x2d, 0x8f, 0x56, 0x23, 0x98, 0x8d, 0x10,
-  0x39, 0x22, 0xe8, 0x88, 0xce, 0x06, 0x37, 0x72, 0xc1, 0x30, 0x17, 0x3c,
-  0x75, 0xdb, 0x53, 0xd7, 0xb7, 0xc1, 0x30, 0xe7, 0xaa, 0xc1, 0x30, 0x47,
-  0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xfd,
-  0x70, 0xc0, 0xc2, 0x41, 0xfa, 0x06, 0x3a, 0x1c, 0x8c, 0x26, 0x04, 0xc0,
-  0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91,
-  0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0x66, 0x1c, 0xcc, 0x70,
-  0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0x67, 0x1c,
-  0xd0, 0x70, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60,
-  0x68, 0x1c, 0xd4, 0x70, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20,
-  0x18, 0x2c, 0x6f, 0x1c, 0xd0, 0x70, 0x30, 0xbf, 0x41, 0x10, 0xc6, 0x01,
-  0x09, 0x07, 0x63, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x8f, 0xcd, 0x12,
-  0xf0, 0xc8, 0x40, 0xcb, 0x63, 0x1a, 0x27, 0x82, 0x82, 0x83, 0x89, 0xb0,
-  0x44, 0x8a, 0x08, 0x3b, 0x82, 0x82, 0x83, 0x8a, 0xcc, 0x32, 0xf4, 0xc8,
-  0x8f, 0xfc, 0xd6, 0x70, 0x84, 0xfa, 0x94, 0x70, 0x30, 0x7c, 0xb7, 0x3e,
-  0xc3, 0x0c, 0x37, 0x04, 0xf0, 0x1b, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47,
-  0x98, 0x57, 0x0a, 0x07, 0xc3, 0x57, 0x81, 0xa0, 0x87, 0x5e, 0xc3, 0x0c,
-  0x37, 0x04, 0xf3, 0x1b, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xf8, 0xc8,
-  0x9c, 0x04, 0x77, 0xbb, 0xc1, 0x30, 0x87, 0xae, 0xc1, 0x30, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0xb0, 0xdd, 0x71, 0x40, 0xc6, 0x41, 0x08, 0x07,
-  0x72, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c,
-  0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x60, 0x7e, 0x1c, 0xac, 0x71, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x60, 0x7f, 0x1c, 0xb0, 0x71, 0xc0, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0xa0, 0x1c, 0xb4, 0x71, 0x20, 0x11,
-  0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0xa7, 0x1c, 0xb0, 0x71,
-  0xb0, 0xc2, 0x41, 0x90, 0xc7, 0x01, 0x0f, 0x07, 0x7b, 0x1c, 0x8c, 0x26,
-  0x04, 0xc0, 0x05, 0x8f, 0xcd, 0x12, 0xcc, 0xc9, 0x70, 0xc3, 0x7d, 0xfd,
-  0x71, 0x00, 0x06, 0xb3, 0x0c, 0x60, 0x12, 0x26, 0x41, 0x99, 0x70, 0xf0,
-  0xc6, 0x01, 0x5c, 0xf0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50,
-  0xa9, 0x1c, 0xc0, 0x71, 0x40, 0x42, 0x3f, 0x1c, 0x8c, 0x18, 0x1c, 0x00,
-  0x08, 0x82, 0x01, 0xa5, 0xca, 0x01, 0x1c, 0x07, 0x81, 0x70, 0xc1, 0x30,
-  0x95, 0xc2, 0x01, 0x1d, 0x07, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00,
-  0x82, 0x60, 0x40, 0xb9, 0x72, 0x50, 0xc7, 0x41, 0x88, 0x91, 0x71, 0x30,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xd4, 0x2b, 0x07, 0x75, 0x1c, 0x04,
-  0xc2, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x1d, 0x4f, 0x9d, 0xfc, 0x06, 0xc3,
-  0xdc, 0xc8, 0x06, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0xc1, 0x46, 0xcb, 0x41, 0x28, 0x07, 0x3e, 0x1c, 0xbc,
-  0x72, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08,
-  0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0x80, 0xed, 0x72, 0x80, 0xca, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0x80, 0xf1, 0x72, 0x90, 0xca, 0x41, 0x42, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x80, 0xf5, 0x72, 0xa0, 0xca, 0x41, 0x42, 0x04,
-  0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0x90, 0x73, 0x90, 0xca, 0x01,
-  0x1a, 0x07, 0x81, 0x2d, 0x07, 0x79, 0x1c, 0xe0, 0x72, 0x30, 0x9a, 0x10,
-  0x00, 0x17, 0x3c, 0x36, 0x4b, 0x30, 0x27, 0xc3, 0x0d, 0x34, 0xb6, 0xcb,
-  0x01, 0x18, 0xcc, 0x32, 0x88, 0xc9, 0x9c, 0x04, 0xa6, 0xc3, 0x01, 0x0f,
-  0x07, 0xf1, 0x19, 0x8e, 0x00, 0xa3, 0x1e, 0x0e, 0x88, 0x6f, 0x96, 0x61,
-  0x4c, 0xcc, 0x24, 0x30, 0x1f, 0x0e, 0xc2, 0x28, 0x3e, 0x16, 0x0c, 0xf4,
-  0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c,
-  0x8a, 0x28, 0xe7, 0x40, 0x87, 0x1b, 0x82, 0x71, 0x0e, 0xc0, 0x60, 0x96,
-  0x81, 0x4c, 0xca, 0x24, 0xb0, 0xc1, 0x8c, 0x03, 0xf8, 0xcc, 0x12, 0xa8,
-  0x89, 0x95, 0x71, 0x40, 0xc4, 0x67, 0x96, 0x40, 0x4d, 0x86, 0x23, 0xd6,
-  0xc8, 0x8c, 0x03, 0xe1, 0x9b, 0x65, 0x38, 0x13, 0x35, 0x09, 0x8c, 0x8d,
-  0xce, 0x38, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7,
-  0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xe0, 0x39, 0xd0, 0xe1,
-  0x86, 0xc0, 0x9d, 0x03, 0x30, 0x98, 0x65, 0x40, 0x93, 0x34, 0x09, 0xec,
-  0x8d, 0x83, 0x21, 0x3e, 0xb3, 0x04, 0x6a, 0x62, 0x84, 0x1c, 0x07, 0xf0,
-  0x99, 0x25, 0x50, 0x93, 0x81, 0x96, 0x47, 0x23, 0x13, 0xac, 0x4c, 0x08,
-  0x34, 0x11, 0xd2, 0x84, 0x1d, 0x03, 0x33, 0xb9, 0x60, 0x18, 0x8b, 0xe3,
-  0xa0, 0x8e, 0x83, 0xf8, 0x0c, 0x47, 0xc4, 0x99, 0x1d, 0x07, 0xc4, 0x37,
-  0xcb, 0xb0, 0x26, 0x6e, 0x12, 0xd8, 0x1d, 0x07, 0x72, 0x16, 0x1f, 0x0b,
-  0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a,
-  0x20, 0x3e, 0x45, 0xf8, 0x73, 0xa0, 0xc3, 0x0d, 0x01, 0x3f, 0x07, 0x60,
-  0x30, 0xcb, 0xc0, 0x26, 0x6d, 0x12, 0xd8, 0xf0, 0xc7, 0x01, 0x7c, 0x66,
-  0x09, 0xe4, 0xc4, 0xf8, 0x38, 0x20, 0xe2, 0x33, 0x4b, 0x20, 0x27, 0xc3,
-  0x11, 0x7c, 0xd6, 0xc7, 0x81, 0xf0, 0xcd, 0x32, 0xbc, 0x89, 0x9c, 0x04,
-  0xd6, 0x67, 0x7e, 0x1c, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73,
-  0xc1, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x29, 0x1d,
-  0xe8, 0x70, 0x43, 0x70, 0xd2, 0x01, 0x18, 0xcc, 0x32, 0xc0, 0x49, 0x9c,
-  0x04, 0x66, 0xca, 0xc1, 0x10, 0x9f, 0x59, 0x02, 0x39, 0x31, 0x62, 0x95,
-  0x03, 0xf8, 0xcc, 0x12, 0xc8, 0xc9, 0x40, 0xcb, 0xa3, 0xb1, 0x09, 0xd6,
-  0x26, 0x04, 0x9c, 0x08, 0x71, 0xe2, 0xd2, 0x81, 0x9b, 0x5c, 0x30, 0xcc,
-  0x05, 0x4f, 0xdd, 0xf6, 0xd4, 0xdd, 0x71, 0x30, 0xcc, 0xa1, 0x6e, 0x30,
-  0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
-  0x6c, 0x39, 0x1d, 0x98, 0x74, 0x30, 0xce, 0x01, 0x4d, 0x07, 0xa3, 0x09,
-  0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c,
-  0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x18, 0x58, 0x07,
-  0x2d, 0x1d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x58,
-  0x58, 0x07, 0x2e, 0x1d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x98, 0x58, 0x07, 0x2f, 0x1d, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00,
-  0x20, 0x08, 0x06, 0x4b, 0x5a, 0x07, 0x2e, 0x1d, 0xb4, 0x73, 0x10, 0xec,
-  0x74, 0xe0, 0xcf, 0x41, 0x4f, 0x07, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63,
-  0xb3, 0x04, 0x73, 0x32, 0xd0, 0xf2, 0x98, 0x86, 0x8f, 0xf0, 0xe7, 0xd0,
-  0x23, 0x2c, 0x01, 0x26, 0x82, 0x9c, 0xf0, 0xe7, 0x10, 0x26, 0xb3, 0x0c,
-  0x74, 0x62, 0x27, 0xb9, 0x36, 0x1c, 0x91, 0x3e, 0xff, 0x1c, 0x0c, 0xdf,
-  0xa9, 0xcf, 0x30, 0xc3, 0x0d, 0x81, 0x3a, 0x07, 0x64, 0x50, 0x43, 0xa0,
-  0xc3, 0x11, 0xe0, 0x36, 0xd2, 0xc1, 0xf0, 0x55, 0x20, 0xe8, 0x89, 0xdb,
-  0x30, 0xc3, 0x0d, 0x41, 0x3b, 0x07, 0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c,
-  0x75, 0xa2, 0x2a, 0xc1, 0xc5, 0x72, 0x30, 0xcc, 0x89, 0x6f, 0x30, 0xcc,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x6c, 0x71, 0x1d, 0xf8, 0x74, 0xb0,
-  0xcf, 0x01, 0x5b, 0x07, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3,
-  0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x18, 0x5e, 0x07, 0x65, 0x1d, 0x1c, 0x44, 0x30, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x58, 0x5e, 0x07, 0x66, 0x1d, 0x30, 0x44,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x98, 0x5e, 0x07, 0x67, 0x1d,
-  0x48, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x4b, 0x68, 0x07,
-  0x66, 0x1d, 0x94, 0x74, 0x10, 0xcc, 0x75, 0x60, 0xd3, 0x41, 0x5d, 0x07,
-  0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3, 0x04, 0xaa, 0x32, 0xdc, 0x10,
-  0x6f, 0x79, 0x1d, 0x80, 0xc1, 0x2c, 0xc3, 0x9d, 0xe0, 0x49, 0x50, 0x20,
-  0x1d, 0xa4, 0x75, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0xd4, 0x68, 0x07, 0x6a, 0x1d, 0x8c, 0x50, 0x4e, 0x07, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x40, 0x91, 0x76, 0xa0, 0xd6, 0x41, 0x20, 0x5c,
-  0x30, 0x4c, 0x8d, 0x74, 0xe0, 0xd6, 0x01, 0x5c, 0xf0, 0xd4, 0x88, 0xc1,
-  0x01, 0x80, 0x20, 0x18, 0x50, 0xa8, 0x1d, 0xbc, 0x75, 0xb0, 0x6f, 0x3e,
-  0x1d, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x95, 0xda, 0xc1, 0x5b,
-  0x07, 0x81, 0x70, 0xc1, 0x30, 0x17, 0x3c, 0x75, 0xc7, 0x53, 0xc7, 0xce,
-  0xc1, 0x30, 0xd7, 0xbf, 0xc1, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xb9, 0x76, 0xb0, 0xd7, 0x01, 0x4e,
-  0x07, 0xa9, 0x1d, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26,
-  0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x60, 0xb5, 0x1d, 0x88, 0x76, 0x90, 0x10, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x60, 0xb6, 0x1d, 0x8c, 0x76, 0x90, 0x10, 0xc1,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0xb7, 0x1d, 0x90, 0x76, 0x90,
-  0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0xbe, 0x1d, 0x8c,
-  0x76, 0x20, 0xd6, 0x41, 0x00, 0xdb, 0xc1, 0x5c, 0x07, 0xb2, 0x1d, 0x8c,
-  0x26, 0x04, 0xc0, 0x05, 0x8f, 0xcd, 0x12, 0xa8, 0xca, 0x70, 0x83, 0xcb,
-  0xd5, 0x76, 0x00, 0x06, 0xb3, 0x0c, 0x79, 0xa2, 0x2a, 0x81, 0xd1, 0x74,
-  0x60, 0xd3, 0x41, 0x7c, 0x86, 0x23, 0x7e, 0xe8, 0xa6, 0x03, 0xe2, 0x9b,
-  0x65, 0xd0, 0x93, 0x3e, 0x09, 0x0c, 0xa7, 0x03, 0x30, 0x8a, 0x8f, 0x05,
-  0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45,
-  0x10, 0x9f, 0x22, 0x7e, 0x3b, 0xd0, 0xe1, 0x86, 0xa0, 0xb7, 0x03, 0x30,
-  0x98, 0x65, 0xd8, 0x13, 0x3e, 0x09, 0x6c, 0x00, 0xeb, 0x00, 0x3e, 0xb3,
-  0x04, 0xa1, 0x62, 0x3f, 0x1d, 0x10, 0xf1, 0x99, 0x25, 0x08, 0x95, 0xe1,
-  0x08, 0x35, 0x02, 0xeb, 0x40, 0xf8, 0x66, 0x19, 0xfc, 0x24, 0x54, 0x02,
-  0x5b, 0xa3, 0xb0, 0x0e, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9,
-  0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0xf5, 0x0e,
-  0x74, 0xb8, 0x21, 0x40, 0xef, 0x00, 0x0c, 0x66, 0x19, 0xfe, 0x04, 0x54,
-  0x02, 0x4b, 0xeb, 0x60, 0x88, 0xcf, 0x2c, 0x41, 0xa8, 0x18, 0xc1, 0xd6,
-  0x01, 0x7c, 0x66, 0x09, 0x42, 0x65, 0xa0, 0xe5, 0xd1, 0xf6, 0x04, 0xe3,
-  0x13, 0xe2, 0x4f, 0x04, 0x50, 0x51, 0xc7, 0xa0, 0x4f, 0x2e, 0x18, 0xc6,
-  0xd6, 0x3a, 0x78, 0xeb, 0x20, 0x3e, 0xc3, 0x11, 0x6b, 0x07, 0xd7, 0x01,
-  0xf1, 0xcd, 0x32, 0x88, 0x4a, 0xa9, 0x04, 0x16, 0xd7, 0x01, 0xdb, 0xc5,
-  0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2,
-  0xb1, 0x22, 0x88, 0x4f, 0x11, 0xf8, 0x1d, 0xe8, 0x70, 0x43, 0x60, 0xdf,
-  0x01, 0x18, 0xcc, 0x32, 0x8c, 0x0a, 0xa9, 0x04, 0x36, 0xe4, 0x75, 0x00,
-  0x9f, 0x59, 0x82, 0x54, 0x31, 0xbb, 0x0e, 0x88, 0xf8, 0xcc, 0x12, 0xa4,
-  0xca, 0x70, 0x84, 0xdd, 0xdd, 0x75, 0x20, 0x7c, 0xb3, 0x0c, 0xa6, 0x92,
-  0x2a, 0x81, 0xdd, 0x1d, 0x5e, 0x07, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05,
-  0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4,
-  0x88, 0x07, 0x3a, 0xdc, 0x10, 0x84, 0x78, 0x00, 0x06, 0xb3, 0x0c, 0xa7,
-  0x82, 0x2a, 0x81, 0x81, 0x76, 0x30, 0xc4, 0x67, 0x96, 0x20, 0x55, 0x8c,
-  0x28, 0xed, 0x00, 0x3e, 0xb3, 0x04, 0xa9, 0x32, 0xd0, 0xf2, 0x68, 0xa3,
-  0x82, 0x91, 0x0a, 0x71, 0x2a, 0x02, 0xaa, 0x88, 0x7c, 0x50, 0x2a, 0x17,
-  0x0c, 0x73, 0xc1, 0x53, 0xb7, 0x3d, 0x75, 0x71, 0x1d, 0x0c, 0x73, 0xa2,
-  0x1c, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0xdb, 0x8c, 0x07, 0x20, 0x1e, 0xf4, 0x76, 0xe0, 0xe2, 0xc1,
-  0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26,
-  0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xa6,
-  0xe3, 0xc1, 0x89, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0x01, 0xb6, 0xe3, 0x01, 0x8a, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0x01, 0xc6, 0xe3, 0x41, 0x8a, 0x07, 0x09, 0x11, 0x8c, 0x18,
-  0x28, 0x00, 0x08, 0x82, 0xc1, 0x32, 0xe6, 0x01, 0x8a, 0x07, 0xe7, 0x1d,
-  0x04, 0x35, 0x1e, 0xe0, 0x77, 0x70, 0xe3, 0xc1, 0x68, 0x42, 0x00, 0x5c,
-  0xf0, 0xd8, 0x2c, 0x81, 0xaa, 0x0c, 0xb4, 0x3c, 0xa6, 0x51, 0x27, 0xf0,
-  0x3b, 0xd0, 0x09, 0x4b, 0xdc, 0x89, 0x90, 0x2a, 0xf0, 0x3b, 0xe0, 0x89,
-  0xd9, 0x1e, 0x7e, 0x07, 0xf0, 0x99, 0x65, 0x58, 0x95, 0x56, 0xa1, 0xbd,
-  0xe1, 0x08, 0xdc, 0xd3, 0xef, 0x60, 0xf8, 0x2e, 0xf7, 0x86, 0x19, 0x6e,
-  0x08, 0xca, 0x3b, 0x20, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0x28, 0xfc, 0x3b,
-  0x18, 0xbe, 0x0a, 0x04, 0xbd, 0x63, 0x98, 0xe1, 0x86, 0x00, 0xbd, 0x03,
-  0x32, 0xa8, 0x60, 0xd0, 0x59, 0x06, 0x56, 0x09, 0x97, 0xe0, 0x58, 0x3b,
-  0x18, 0xe6, 0x7a, 0x39, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0x36, 0x36, 0x0f, 0x72, 0x3c, 0xb0, 0xef, 0xe0, 0xcc, 0x83, 0xd1, 0x84,
-  0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86,
-  0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x6c, 0xce, 0x03,
-  0x30, 0x0f, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x8c,
-  0xce, 0x83, 0x30, 0x0f, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0xac, 0xce, 0x03, 0x31, 0x0f, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00,
-  0x10, 0x04, 0x83, 0x85, 0xcf, 0x83, 0x30, 0x0f, 0x40, 0x3c, 0x08, 0xdc,
-  0x3c, 0x88, 0xf1, 0x00, 0xce, 0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1,
-  0x59, 0x82, 0x70, 0x19, 0x6e, 0x60, 0x3f, 0x3a, 0x0f, 0xc0, 0x60, 0x96,
-  0xc1, 0x55, 0x5e, 0x25, 0xa8, 0xfd, 0x0e, 0xc8, 0x3c, 0x80, 0x0b, 0x9e,
-  0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xca, 0xcf, 0x83, 0x32, 0x0f,
-  0xe4, 0x8f, 0xc6, 0x83, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xfe,
-  0x3c, 0x28, 0xf3, 0x20, 0x10, 0x2e, 0x18, 0xa6, 0xfc, 0x3b, 0x48, 0xf3,
-  0x00, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0x51,
-  0x0f, 0xd4, 0x3c, 0x00, 0x83, 0x1c, 0x0f, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x80, 0x22, 0xf5, 0x40, 0xcd, 0x83, 0x40, 0xb8, 0x60, 0x98, 0x0b,
-  0x9e, 0xba, 0xe3, 0xa9, 0x3b, 0xef, 0x60, 0x98, 0xc3, 0xe7, 0x60, 0x98,
-  0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xd8,
-  0x52, 0x3d, 0xb0, 0xf3, 0x60, 0xc6, 0x03, 0x52, 0x0f, 0x46, 0x13, 0x02,
-  0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a,
-  0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x30, 0x58, 0x0f, 0xfa,
-  0x3c, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb0, 0x58,
-  0x0f, 0xfc, 0x3c, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
-  0x30, 0x59, 0x0f, 0xfe, 0x3c, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40,
-  0x10, 0x0c, 0x96, 0x5c, 0x0f, 0xfc, 0x3c, 0xe8, 0xf1, 0x20, 0x58, 0xf5,
-  0xc0, 0xcd, 0x83, 0x56, 0x0f, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xc7, 0x66,
-  0x09, 0xc2, 0x65, 0xb8, 0x21, 0x05, 0x03, 0x58, 0x0f, 0xc0, 0x60, 0x96,
-  0x01, 0x56, 0xc2, 0x25, 0xb0, 0x17, 0x0f, 0x62, 0x3c, 0x88, 0xcf, 0x70,
-  0x84, 0x0b, 0x06, 0x32, 0x1e, 0x10, 0xdf, 0x2c, 0x43, 0xac, 0xd0, 0x4a,
-  0x60, 0x33, 0x1e, 0xbc, 0x60, 0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30,
-  0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xe8,
-  0x7a, 0xa0, 0xc3, 0x0d, 0x01, 0xae, 0x07, 0x60, 0x30, 0xcb, 0x20, 0x2b,
-  0xb3, 0x12, 0xd8, 0xb0, 0xe3, 0x01, 0x7c, 0x66, 0x09, 0x70, 0xc5, 0x74,
-  0x3c, 0x20, 0xe2, 0x33, 0x4b, 0x80, 0x2b, 0xc3, 0x11, 0x39, 0x18, 0xec,
-  0x78, 0x20, 0x7c, 0xb3, 0x0c, 0xb5, 0x82, 0x2b, 0x81, 0xe9, 0x60, 0xc0,
-  0xe3, 0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65,
-  0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x51, 0xee, 0x81, 0x0e, 0x37,
-  0x04, 0xe3, 0x1e, 0x80, 0xc1, 0x2c, 0x83, 0xad, 0xdc, 0x4a, 0x60, 0x64,
-  0x1e, 0x0c, 0xf1, 0x99, 0x25, 0xc0, 0x15, 0x23, 0xce, 0x3c, 0x80, 0xcf,
-  0x2c, 0x01, 0xae, 0x0c, 0xb4, 0x3c, 0x9a, 0xac, 0x60, 0xb3, 0x42, 0xd8,
-  0x8a, 0x70, 0x2b, 0xa8, 0x28, 0xd0, 0xca, 0x05, 0xc3, 0x98, 0x99, 0x07,
-  0x6a, 0x1e, 0xc4, 0x67, 0x38, 0x02, 0x16, 0xd6, 0x3c, 0x20, 0xbe, 0x59,
-  0x86, 0x5c, 0xe1, 0x95, 0xc0, 0xd8, 0x3c, 0x88, 0x85, 0xf8, 0x58, 0x30,
-  0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04,
-  0xf1, 0x29, 0x62, 0xde, 0x03, 0x1d, 0x6e, 0x08, 0xe2, 0x3d, 0x00, 0x83,
-  0x59, 0x06, 0x5d, 0xd9, 0x95, 0xc0, 0x06, 0x3a, 0x0f, 0xe0, 0x33, 0x4b,
-  0x00, 0x2e, 0x16, 0xe7, 0x01, 0x11, 0x9f, 0x59, 0x02, 0x70, 0x19, 0x8e,
-  0xd8, 0x05, 0x39, 0x0f, 0x84, 0x6f, 0x96, 0xa1, 0x57, 0xc0, 0x25, 0x30,
-  0x5e, 0x98, 0xf3, 0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b,
-  0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xf0, 0xf7, 0x40,
-  0x87, 0x1b, 0x02, 0x7e, 0x0f, 0xc0, 0x60, 0x96, 0xc1, 0x57, 0x7e, 0x25,
-  0xb0, 0x3d, 0x0f, 0x86, 0xf8, 0xcc, 0x12, 0x80, 0x8b, 0x11, 0xa0, 0x1e,
-  0xc0, 0x67, 0x96, 0x00, 0x5c, 0x06, 0x5a, 0x1e, 0x4d, 0x57, 0xb0, 0x5d,
-  0x21, 0x7c, 0x45, 0xf8, 0x15, 0xd6, 0xe0, 0x95, 0x0b, 0x86, 0xb9, 0xe0,
-  0xa9, 0xdb, 0x9e, 0x3a, 0x36, 0x0f, 0x86, 0xb9, 0xbe, 0x0e, 0x86, 0x39,
-  0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0xcd,
-  0xe5, 0x83, 0x7d, 0x0f, 0x70, 0x3d, 0x48, 0xf9, 0x60, 0x34, 0x21, 0x00,
-  0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88,
-  0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0xab, 0xf9, 0x40, 0xe4,
-  0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0xb3, 0xf9,
-  0x60, 0xe4, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00,
-  0xbb, 0xf9, 0x80, 0xe4, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04,
-  0xc1, 0x60, 0xf1, 0xf9, 0x60, 0xe4, 0x03, 0x71, 0x0f, 0x02, 0x98, 0x0f,
-  0xe6, 0x3d, 0x90, 0xf9, 0x60, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6c, 0x96,
-  0x20, 0x5c, 0x06, 0x5a, 0x1e, 0xd3, 0x60, 0x15, 0xf3, 0x1e, 0x56, 0x85,
-  0x25, 0x5c, 0x45, 0x00, 0x17, 0xf3, 0x1e, 0x5e, 0x65, 0x96, 0x41, 0x5c,
-  0xc8, 0xc5, 0x15, 0x83, 0xe1, 0x88, 0x59, 0x0c, 0xe8, 0x3d, 0x18, 0xbe,
-  0xa3, 0xc5, 0x60, 0x98, 0xe1, 0x86, 0xe0, 0xd7, 0x03, 0x32, 0xa8, 0x21,
-  0xd0, 0xe1, 0x88, 0x7f, 0xc0, 0xf7, 0x60, 0xf8, 0x2a, 0x10, 0xf4, 0x42,
-  0x62, 0x98, 0xe1, 0x86, 0x40, 0xdc, 0x03, 0x32, 0xa8, 0x60, 0xd0, 0x59,
-  0x86, 0x71, 0xc1, 0x97, 0xe0, 0x4c, 0x3d, 0x18, 0xe6, 0x6e, 0x3b, 0x18,
-  0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x36, 0xb3, 0x0f, 0x66, 0x3e,
-  0x80, 0xf7, 0x20, 0xec, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82,
-  0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x03, 0xac, 0xed, 0x03, 0x9d, 0x0f, 0x0e, 0x22, 0x18,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xcc, 0xed, 0x83, 0x9d, 0x0f, 0x18,
-  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xec, 0xed, 0x03, 0x9e,
-  0x0f, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0xc5, 0xee,
-  0x83, 0x9d, 0x0f, 0xf4, 0x3d, 0x08, 0xd0, 0x3e, 0x58, 0xf9, 0x40, 0xed,
-  0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x02, 0x7c, 0x19, 0x6e,
-  0x30, 0xc7, 0xc0, 0xed, 0x03, 0x30, 0x98, 0x65, 0x28, 0x17, 0x73, 0x09,
-  0xaa, 0xde, 0x03, 0x9f, 0x0f, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00,
-  0x04, 0xc1, 0x80, 0xc2, 0xfb, 0xe0, 0xe7, 0x83, 0x76, 0x0c, 0x5c, 0x3e,
-  0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x2a, 0xef, 0x83, 0x9f, 0x0f,
-  0x02, 0xe1, 0x82, 0x61, 0x0a, 0xdf, 0x83, 0xb1, 0x0f, 0xe0, 0x82, 0xa7,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0xea, 0xfb, 0x80, 0xec, 0x03,
-  0x9d, 0x98, 0xf9, 0x60, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0xbf,
-  0x0f, 0xc8, 0x3e, 0x08, 0x84, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0x3b, 0x9e,
-  0xba, 0x70, 0x0f, 0x86, 0x39, 0xf9, 0x0e, 0x86, 0x39, 0x62, 0x98, 0x23,
-  0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x6d, 0xf4, 0x03, 0xb8,
-  0x0f, 0x5a, 0x3e, 0xf0, 0xfb, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84,
-  0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0x00, 0x53, 0xfd, 0xe0, 0xee, 0x83, 0x84, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0x5b, 0xfd, 0x00, 0xef, 0x83,
-  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0x63, 0xfd, 0x20,
-  0xef, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x99,
-  0xfd, 0x00, 0xef, 0x83, 0x9b, 0x0f, 0x82, 0xd2, 0x0f, 0xd0, 0x3e, 0x38,
-  0xfd, 0x60, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6c, 0x96, 0x00, 0x5f, 0x86,
-  0x1b, 0x46, 0x32, 0x50, 0xfd, 0x00, 0x0c, 0x66, 0x19, 0xce, 0x05, 0x5f,
-  0x02, 0x4b, 0xf9, 0x60, 0xe5, 0x83, 0xf8, 0x0c, 0x47, 0xa4, 0x64, 0xc0,
-  0xf2, 0x01, 0xf1, 0xcd, 0x32, 0xa0, 0xcb, 0xba, 0x04, 0xd6, 0xf2, 0x81,
-  0x4a, 0x06, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94,
-  0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0xed, 0x07, 0x3a, 0xdc,
-  0x10, 0xc8, 0x7e, 0x00, 0x06, 0xb3, 0x0c, 0xe9, 0xa2, 0x2e, 0x81, 0x0d,
-  0x35, 0x1f, 0xc0, 0x67, 0x96, 0xe0, 0x5d, 0x8c, 0xe6, 0x03, 0x22, 0x3e,
-  0xb3, 0x04, 0xef, 0x32, 0x1c, 0x41, 0x93, 0x41, 0xcd, 0x07, 0xc2, 0x37,
-  0xcb, 0xc0, 0x2e, 0xef, 0x12, 0x58, 0x4d, 0x06, 0x36, 0x1f, 0xc4, 0xc7,
-  0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x44, 0xf2, 0xb1,
-  0x22, 0x88, 0x4f, 0x11, 0xbf, 0x1f, 0xe8, 0x70, 0x43, 0xd0, 0xfb, 0x01,
-  0x18, 0xcc, 0x32, 0xb4, 0x8b, 0xbb, 0x04, 0xe6, 0xf3, 0xc1, 0x10, 0x9f,
-  0x59, 0x82, 0x77, 0x31, 0x22, 0xec, 0x03, 0xf8, 0xcc, 0x12, 0xbc, 0xcb,
-  0x40, 0xcb, 0xa3, 0xa5, 0x0b, 0xa6, 0x2e, 0x44, 0xbb, 0x08, 0xee, 0xe2,
-  0xa7, 0xc2, 0xba, 0x5c, 0x30, 0x8c, 0x81, 0x7d, 0x40, 0xf6, 0x41, 0x7c,
-  0x86, 0x23, 0x54, 0xa3, 0xec, 0x03, 0xe2, 0x9b, 0x65, 0x80, 0x97, 0x79,
-  0x09, 0xcc, 0xec, 0x83, 0xd5, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18,
-  0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xda,
-  0x3f, 0xd0, 0xe1, 0x86, 0x60, 0xfd, 0x03, 0x30, 0x98, 0x65, 0x88, 0x17,
-  0x79, 0x09, 0x6c, 0x70, 0xfb, 0x00, 0x3e, 0xb3, 0x04, 0xf7, 0x62, 0x6b,
-  0x1f, 0x10, 0xf1, 0x99, 0x25, 0xb8, 0x97, 0xe1, 0x88, 0xda, 0x60, 0xfb,
-  0x40, 0xf8, 0x66, 0x19, 0xe8, 0xe5, 0x5e, 0x02, 0xb3, 0x8d, 0xb6, 0x0f,
-  0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22,
-  0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0xfc, 0x0f, 0x74, 0xb8, 0x21, 0xb0,
-  0xff, 0x00, 0x0c, 0x66, 0x19, 0xea, 0xc5, 0x5e, 0x02, 0xab, 0xfb, 0x60,
-  0x88, 0xcf, 0x2c, 0xc1, 0xbd, 0x18, 0xa1, 0xf7, 0x01, 0x7c, 0x66, 0x09,
-  0xee, 0x65, 0xa0, 0xe5, 0xd1, 0xe2, 0x05, 0x93, 0x17, 0xa2, 0x5e, 0x04,
-  0x7b, 0x01, 0x9d, 0x79, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xba, 0xed, 0xa9,
-  0x33, 0xfb, 0x60, 0x98, 0xbb, 0xf3, 0x60, 0x98, 0x23, 0x86, 0x39, 0x62,
-  0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xd8, 0x50, 0x50, 0xa8, 0xff,
-  0x40, 0xf6, 0x83, 0x11, 0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
-  0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0xb0, 0x17, 0x14, 0xf8, 0x3f, 0x48, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x30, 0x18, 0x14, 0xfa, 0x3f, 0x48,
-  0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb0, 0x18, 0x14, 0xfc,
-  0x3f, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x16, 0x1c,
-  0x14, 0xfa, 0x3f, 0xe0, 0xfd, 0x20, 0x50, 0x41, 0xa1, 0xfd, 0x03, 0x16,
-  0x14, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xc7, 0x66, 0x09, 0xf0, 0x65, 0xa0,
-  0xe5, 0x31, 0x8d, 0x71, 0xd1, 0xfd, 0x41, 0x5c, 0x58, 0xa2, 0x5c, 0x84,
-  0x7b, 0xd1, 0xfd, 0xc1, 0x5c, 0x66, 0x19, 0xf2, 0x65, 0x5f, 0x50, 0x33,
-  0x18, 0x8e, 0x98, 0x3d, 0xf7, 0x0f, 0x86, 0xef, 0x68, 0x6f, 0x98, 0xe1,
-  0x86, 0x20, 0xf7, 0x03, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0xfc, 0x90,
-  0xff, 0x60, 0xf8, 0x2a, 0x10, 0xf4, 0xf6, 0x63, 0x98, 0xe1, 0x86, 0x80,
-  0xf7, 0x03, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x06, 0x7d, 0x79, 0x99, 0xe0,
-  0x40, 0x3f, 0x18, 0xe6, 0x62, 0x3d, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40,
-  0x10, 0x0c, 0x36, 0x30, 0x14, 0x5a, 0x50, 0x50, 0xff, 0x60, 0x07, 0x85,
-  0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d,
-  0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xec,
-  0x0c, 0x05, 0x1a, 0x14, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0x0c, 0x0d, 0x85, 0x1a, 0x14, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x03, 0x2c, 0x0d, 0x05, 0x1b, 0x14, 0x24, 0x22, 0x18, 0x31,
-  0x50, 0x00, 0x10, 0x04, 0x83, 0x05, 0x0e, 0x85, 0x1a, 0x14, 0xe8, 0x3f,
-  0x08, 0xc4, 0x50, 0x28, 0x41, 0x81, 0x0c, 0x85, 0xd1, 0x84, 0x00, 0xb8,
-  0xe0, 0xb1, 0x59, 0x82, 0x97, 0x19, 0x6e, 0x00, 0xcf, 0x00, 0x0d, 0x05,
-  0x30, 0x98, 0x65, 0xe0, 0x97, 0x7e, 0x09, 0xea, 0xfd, 0x03, 0x1c, 0x14,
-  0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x92, 0x43,
-  0x21, 0x07, 0x85, 0xf6, 0x43, 0x41, 0x61, 0xc4, 0xe0, 0x00, 0x40, 0x10,
-  0x0c, 0xa8, 0x39, 0x14, 0x72, 0x50, 0x08, 0x84, 0x0b, 0x86, 0x29, 0xf9,
-  0x0f, 0x7a, 0x50, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x03, 0xea, 0x0e, 0x05, 0x1f, 0x14, 0x68, 0xa4, 0x05, 0x85, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0xa0, 0xf0, 0x50, 0xf0, 0x41, 0x21, 0x10, 0x2e,
-  0x18, 0xe6, 0x82, 0xa7, 0xee, 0x78, 0xea, 0x76, 0x3f, 0x18, 0xe6, 0xd8,
-  0x3d, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40,
-  0x10, 0x0c, 0xb6, 0x3e, 0x14, 0xd4, 0x50, 0x38, 0x41, 0x01, 0x0f, 0x85,
-  0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d,
-  0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x8c,
-  0x14, 0x85, 0x38, 0x14, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0xac, 0x14, 0x05, 0x39, 0x14, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x03, 0xcc, 0x14, 0x85, 0x39, 0x14, 0x12, 0x22, 0x18, 0x31,
-  0x50, 0x00, 0x10, 0x04, 0x83, 0xa5, 0x15, 0x05, 0x39, 0x14, 0x62, 0x50,
-  0x08, 0xfe, 0x50, 0x10, 0x43, 0x21, 0x14, 0x85, 0xd1, 0x84, 0x00, 0xb8,
-  0xe0, 0xb1, 0x59, 0x82, 0x97, 0x19, 0x6e, 0xe8, 0xcf, 0x80, 0x14, 0x05,
-  0x30, 0x98, 0x65, 0xf0, 0x97, 0x97, 0x09, 0x6c, 0x04, 0x85, 0x12, 0x14,
-  0xe2, 0x33, 0x1c, 0x91, 0x82, 0x81, 0x09, 0x0a, 0xc4, 0x37, 0xcb, 0xf0,
-  0x2f, 0x22, 0x13, 0xd8, 0x09, 0x0a, 0x2a, 0x18, 0xc4, 0xc7, 0x82, 0x81,
-  0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88,
-  0x4f, 0x11, 0xae, 0x28, 0xe8, 0x70, 0x43, 0xc0, 0x8a, 0x02, 0x18, 0xcc,
-  0x32, 0x80, 0x4c, 0xc8, 0x04, 0x36, 0xbc, 0xa0, 0x00, 0x9f, 0x59, 0x02,
-  0x93, 0x31, 0x17, 0x14, 0x88, 0xf8, 0xcc, 0x12, 0x98, 0xcc, 0x70, 0x04,
-  0x0d, 0x06, 0x2f, 0x28, 0x08, 0xdf, 0x2c, 0xc3, 0xc8, 0x98, 0x4c, 0x60,
-  0x35, 0x18, 0xc0, 0xa0, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc,
-  0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xe4, 0xa2,
-  0xa0, 0xc3, 0x0d, 0xc1, 0x2d, 0x0a, 0x60, 0x30, 0xcb, 0x40, 0x32, 0x25,
-  0x13, 0x18, 0x0e, 0x0a, 0x43, 0x7c, 0x66, 0x09, 0x4c, 0xc6, 0x88, 0x1d,
-  0x14, 0xe0, 0x33, 0x4b, 0x60, 0x32, 0x03, 0x2d, 0x8f, 0x06, 0x32, 0x58,
-  0xc8, 0x10, 0x24, 0x23, 0x94, 0x8c, 0x1f, 0x0a, 0x22, 0x73, 0xc1, 0x30,
-  0xa6, 0x83, 0x82, 0x0f, 0x0a, 0xf1, 0x19, 0x8e, 0x20, 0x95, 0x1f, 0x14,
-  0x88, 0x6f, 0x96, 0xe1, 0x64, 0x54, 0x26, 0x30, 0x30, 0x14, 0x4a, 0x25,
-  0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xc0, 0x90,
-  0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x38, 0x47, 0x41, 0x87, 0x1b, 0x82, 0x72,
-  0x14, 0xc0, 0x60, 0x96, 0x01, 0x65, 0x52, 0x26, 0xb0, 0x01, 0x0d, 0x05,
-  0xf8, 0xcc, 0x12, 0xb8, 0x8c, 0x95, 0xa1, 0x40, 0xc4, 0x67, 0x96, 0xc0,
-  0x65, 0x86, 0x23, 0x5e, 0xc5, 0x0c, 0x05, 0xe1, 0x9b, 0x65, 0x58, 0x19,
-  0x97, 0x09, 0x0c, 0x56, 0xce, 0x50, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e,
-  0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22,
-  0xe4, 0x51, 0xd0, 0xe1, 0x86, 0x00, 0x1e, 0x05, 0x30, 0x98, 0x65, 0x60,
-  0x99, 0x96, 0x09, 0xec, 0x0d, 0x85, 0x21, 0x3e, 0xb3, 0x04, 0x2e, 0x63,
-  0x04, 0x1d, 0x0a, 0xf0, 0x99, 0x25, 0x70, 0x99, 0x81, 0x96, 0x47, 0x43,
-  0x19, 0x2c, 0x65, 0x08, 0x96, 0x11, 0x5a, 0x86, 0xae, 0x54, 0xe6, 0x82,
-  0x61, 0x2e, 0x78, 0xea, 0xb6, 0xa7, 0x0e, 0x0c, 0x85, 0x61, 0x2e, 0xee,
-  0x83, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x60, 0x13, 0x49, 0xe1, 0x1d, 0x05, 0x56, 0x14, 0xfa, 0x51, 0x18,
-  0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04,
-  0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xc0, 0x52,
-  0x52, 0xb0, 0x47, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0xc0, 0x54, 0x52, 0xb8, 0x47, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0xc0, 0x56, 0x52, 0xc0, 0x47, 0x21, 0x21, 0x82, 0x11, 0x03,
-  0x05, 0x00, 0x41, 0x30, 0x58, 0x64, 0x52, 0xb8, 0x47, 0xc1, 0x16, 0x85,
-  0x80, 0x24, 0x85, 0x73, 0x14, 0x4c, 0x52, 0x18, 0x4d, 0x08, 0x80, 0x0b,
-  0x1e, 0x9b, 0x25, 0x78, 0x99, 0x81, 0x96, 0xc7, 0x34, 0xf4, 0xc5, 0x35,
-  0x89, 0x7c, 0x61, 0x09, 0x7e, 0x11, 0x5c, 0xc6, 0x35, 0x89, 0x7e, 0x99,
-  0x65, 0x80, 0x19, 0x99, 0x11, 0xd5, 0x60, 0x38, 0x42, 0xf6, 0xd0, 0x51,
-  0x18, 0xbe, 0x9b, 0xbd, 0x61, 0x86, 0x1b, 0x82, 0x59, 0x14, 0xc8, 0xa0,
-  0x86, 0x40, 0x87, 0x23, 0xe6, 0x85, 0x1d, 0x85, 0xe1, 0xab, 0x40, 0xd0,
-  0xab, 0x97, 0x61, 0x86, 0x1b, 0x02, 0x5b, 0x14, 0xc8, 0xa0, 0x82, 0x41,
-  0x67, 0x19, 0x62, 0xc6, 0x6c, 0x82, 0xd3, 0x43, 0x61, 0x98, 0x5b, 0xfd,
-  0x60, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xd8, 0x74, 0x52, 0x38,
-  0x49, 0x81, 0x1c, 0x85, 0x9a, 0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41,
-  0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x38, 0x64, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0xb0, 0xb0, 0x14, 0x5c, 0x52, 0x38, 0x88,
-  0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x30, 0xb1, 0x14, 0x5e, 0x52,
-  0x60, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb0, 0xb1, 0x14,
-  0x60, 0x52, 0x90, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x16,
-  0xb5, 0x14, 0x5e, 0x52, 0x70, 0x47, 0x21, 0xe0, 0x49, 0xe1, 0x1f, 0x05,
-  0x9f, 0x14, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xc7, 0x66, 0x09, 0xcc, 0x66,
-  0xb8, 0x41, 0x57, 0x03, 0xb1, 0x14, 0xc0, 0x60, 0x96, 0x61, 0x66, 0x68,
-  0x26, 0xa8, 0x74, 0x14, 0x64, 0x52, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x03, 0x8a, 0x2d, 0x85, 0x99, 0x14, 0xd8, 0x4f, 0x24,
-  0x85, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xda, 0x52, 0x98, 0x49,
-  0x21, 0x10, 0x2e, 0x18, 0xa6, 0xd8, 0x51, 0xb8, 0x49, 0x01, 0x2e, 0x78,
-  0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0xb8, 0x14, 0x70, 0x52,
-  0x70, 0x99, 0x93, 0x14, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x92,
-  0x4b, 0x01, 0x27, 0x85, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x9e, 0xba, 0xe3,
-  0xa9, 0xab, 0x45, 0x61, 0x98, 0x33, 0xff, 0x60, 0x98, 0x23, 0x86, 0x39,
-  0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xd8, 0xee, 0x52, 0x20,
-  0x4b, 0x21, 0x24, 0x05, 0xb9, 0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41,
-  0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0x30, 0xbf, 0x14, 0xd6, 0x52, 0x48, 0x88,
-  0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb0, 0xbf, 0x14, 0xd8, 0x52,
-  0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x30, 0xd0, 0x14,
-  0xda, 0x52, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x96,
-  0xd3, 0x14, 0xd8, 0x52, 0x58, 0x49, 0x21, 0xc8, 0x4b, 0x81, 0x27, 0x85,
-  0xbd, 0x14, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xc7, 0x66, 0x09, 0xcc, 0x66,
-  0xb8, 0xe1, 0x5e, 0x03, 0xbf, 0x14, 0xc0, 0x60, 0x96, 0xa1, 0x66, 0xcc,
-  0x26, 0xb0, 0x7e, 0x14, 0xfe, 0x51, 0x88, 0xcf, 0x70, 0x04, 0x0a, 0x06,
-  0x20, 0x29, 0x10, 0xdf, 0x2c, 0x83, 0xcd, 0xe4, 0x4c, 0x60, 0x21, 0x29,
-  0xa4, 0x60, 0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f,
-  0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xa0, 0xa6, 0xa0, 0xc3,
-  0x0d, 0x81, 0x69, 0x0a, 0x60, 0x30, 0xcb, 0x70, 0x33, 0x38, 0x13, 0xd8,
-  0x90, 0x92, 0x02, 0x7c, 0x66, 0x09, 0x7a, 0xc6, 0x50, 0x52, 0x20, 0xe2,
-  0x33, 0x4b, 0xd0, 0x33, 0xc3, 0x11, 0x33, 0x18, 0xa4, 0xa4, 0x20, 0x7c,
-  0xb3, 0x0c, 0x3a, 0xd3, 0x33, 0x81, 0xd1, 0x60, 0xa0, 0x92, 0x42, 0x7c,
-  0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f,
-  0x2b, 0x82, 0xf8, 0x14, 0x31, 0x9b, 0x82, 0x0e, 0x37, 0x04, 0xb1, 0x29,
-  0x80, 0xc1, 0x2c, 0xc3, 0xce, 0xf0, 0x4c, 0x60, 0x32, 0x29, 0x0c, 0xf1,
-  0x99, 0x25, 0xe8, 0x19, 0x23, 0x6a, 0x52, 0x80, 0xcf, 0x2c, 0x41, 0xcf,
-  0x0c, 0xb4, 0x3c, 0xda, 0xcd, 0x60, 0x38, 0x43, 0xec, 0x8c, 0xc0, 0x33,
-  0x7c, 0x28, 0xe4, 0xcc, 0x05, 0xc3, 0x18, 0x4d, 0x0a, 0x38, 0x29, 0xc4,
-  0x67, 0x38, 0xc2, 0x6f, 0x72, 0x52, 0x20, 0xbe, 0x59, 0x06, 0x9f, 0x09,
-  0x9b, 0xc0, 0x74, 0x52, 0xf8, 0x9b, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82,
-  0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x22,
-  0x3c, 0x05, 0x1d, 0x6e, 0x08, 0x7e, 0x53, 0x00, 0x83, 0x59, 0x86, 0x9f,
-  0x01, 0x9b, 0xc0, 0x06, 0xb1, 0x14, 0xe0, 0x33, 0x4b, 0x50, 0x36, 0xf6,
-  0x93, 0x02, 0x11, 0x9f, 0x59, 0x82, 0xb2, 0x19, 0x8e, 0x48, 0x1d, 0xb0,
-  0x14, 0x84, 0x6f, 0x96, 0x41, 0x6c, 0xca, 0x26, 0x30, 0xd5, 0x09, 0x4b,
-  0x21, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20,
-  0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x60, 0x4f, 0x41, 0x87, 0x1b, 0x02,
-  0xf5, 0x14, 0xc0, 0x60, 0x96, 0x61, 0x6c, 0xc8, 0x26, 0xb0, 0xb4, 0x14,
-  0x86, 0xf8, 0xcc, 0x12, 0x94, 0x8d, 0x11, 0x6e, 0x29, 0xc0, 0x67, 0x96,
-  0xa0, 0x6c, 0x06, 0x5a, 0x1e, 0xed, 0x67, 0x30, 0xb0, 0x21, 0xc6, 0x46,
-  0x20, 0x1b, 0xb4, 0x0b, 0x9b, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e,
-  0x3a, 0x9d, 0x14, 0x86, 0xb9, 0x35, 0x14, 0x86, 0x39, 0x62, 0x98, 0x23,
-  0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x8d, 0x3f, 0x85, 0xf4,
-  0x14, 0x4c, 0x53, 0xb8, 0x4f, 0x61, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84,
-  0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0x00, 0x1b, 0x51, 0x01, 0x3e, 0x85, 0x84, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0x23, 0x51, 0x21, 0x3e, 0x85,
-  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0x2b, 0x51, 0x41,
-  0x3e, 0x85, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x61,
-  0x51, 0x21, 0x3e, 0x05, 0xd8, 0x14, 0x02, 0xff, 0x14, 0xc2, 0x53, 0x00,
-  0x51, 0x61, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6c, 0x96, 0xc0, 0x6c, 0x06,
-  0x5a, 0x1e, 0xd3, 0x88, 0x19, 0xd1, 0x25, 0x60, 0x86, 0x25, 0x66, 0x46,
-  0x28, 0x1b, 0xd1, 0x25, 0x68, 0xc6, 0xfe, 0x36, 0xb8, 0x4d, 0x01, 0x3e,
-  0xb3, 0x0c, 0x67, 0x93, 0x36, 0x7d, 0x1b, 0x0c, 0x47, 0x84, 0x6e, 0x30,
-  0x9e, 0xc2, 0xf0, 0x9d, 0xe8, 0x06, 0xc3, 0x0c, 0x37, 0x04, 0xae, 0x29,
-  0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0x14, 0xe7, 0x29, 0x0c, 0x5f, 0x05,
-  0x82, 0xde, 0x31, 0xcc, 0x70, 0x43, 0x10, 0x9b, 0x02, 0x19, 0x54, 0x30,
-  0xe8, 0x2c, 0x03, 0xda, 0xf4, 0x4d, 0x70, 0x75, 0x29, 0x0c, 0x73, 0xa6,
-  0x28, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x5b, 0x8d, 0x0a,
-  0x22, 0x2a, 0xfc, 0xa6, 0x00, 0xa3, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xc6, 0xa3, 0x42, 0x8a, 0x0a, 0x07,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xd6, 0xa3, 0x82, 0x8a,
-  0x0a, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xe6, 0xa3,
-  0xc2, 0x8a, 0x0a, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1,
-  0x52, 0xa6, 0x82, 0x8a, 0x0a, 0xe9, 0x29, 0x04, 0x37, 0x2a, 0xe8, 0xa7,
-  0x90, 0xa3, 0xc2, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd8, 0x2c, 0x41, 0xdf,
-  0x0c, 0x37, 0xd4, 0x6e, 0xd0, 0xa3, 0x02, 0x18, 0xcc, 0x32, 0xa8, 0xcd,
-  0xda, 0x04, 0x45, 0x9e, 0x42, 0x8b, 0x0a, 0x70, 0xc1, 0x53, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x40, 0x9d, 0xa9, 0xe0, 0xa2, 0xc2, 0xee, 0x06,
-  0xfd, 0x29, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x85, 0xa6, 0x82,
-  0x8b, 0x0a, 0x81, 0x70, 0xc1, 0x30, 0x75, 0x9e, 0x82, 0x8c, 0x0a, 0x70,
-  0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xb1, 0xa9, 0x30,
-  0xa3, 0x02, 0x18, 0x88, 0xa8, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x54, 0x9b, 0x0a, 0x33, 0x2a, 0x04, 0xc2, 0x05, 0xc3, 0x5c, 0xf0, 0xd4,
-  0x1d, 0x4f, 0x1d, 0x6c, 0x0a, 0xc3, 0x5c, 0x38, 0x0a, 0xc3, 0x1c, 0x31,
-  0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x26, 0xa7,
-  0xc2, 0x8f, 0x0a, 0xfc, 0x29, 0xb4, 0xa9, 0x30, 0x9a, 0x10, 0x00, 0xa3,
-  0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xe5, 0xa9, 0x60, 0xa6, 0x42,
-  0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xe9, 0xa9, 0x70,
-  0xa6, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xed,
-  0xa9, 0x80, 0xa6, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60,
-  0xb0, 0x88, 0xaa, 0x70, 0xa6, 0x82, 0x89, 0x0a, 0x01, 0x9d, 0x0a, 0x37,
-  0x2a, 0xd8, 0xa9, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x36, 0x4b, 0xd0,
-  0x37, 0xc3, 0x0d, 0xf2, 0x1b, 0xe4, 0xa9, 0x00, 0x06, 0xb3, 0x0c, 0x6c,
-  0xd3, 0x37, 0x81, 0xe1, 0xa7, 0xa0, 0x9f, 0x42, 0x7c, 0x86, 0x23, 0xee,
-  0x37, 0xd8, 0x4f, 0x81, 0xf8, 0x66, 0x19, 0xda, 0x06, 0x6e, 0x02, 0xe3,
-  0x4f, 0x01, 0x7f, 0x83, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e,
-  0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x62, 0x54, 0x05,
-  0x1d, 0x6e, 0x08, 0x42, 0x55, 0x00, 0x83, 0x59, 0x06, 0xb7, 0x79, 0x9b,
-  0xc0, 0x06, 0x12, 0x15, 0xe0, 0x33, 0x4b, 0x40, 0x37, 0x36, 0xa2, 0x02,
-  0x11, 0x9f, 0x59, 0x02, 0xba, 0x19, 0x8e, 0x10, 0xe1, 0x80, 0x44, 0x05,
-  0xe1, 0x9b, 0x65, 0x88, 0x1b, 0xba, 0x09, 0x6c, 0x84, 0x83, 0x12, 0x15,
-  0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22,
-  0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x57, 0x15, 0x74, 0xb8, 0x21, 0x60,
-  0x55, 0x01, 0x0c, 0x66, 0x19, 0xe4, 0x66, 0x6e, 0x02, 0x6b, 0x51, 0x61,
-  0x88, 0xcf, 0x2c, 0x01, 0xdd, 0x18, 0x01, 0xa3, 0x02, 0x7c, 0x66, 0x09,
-  0xe8, 0x66, 0xa0, 0xe5, 0xd1, 0xdc, 0x06, 0x7b, 0x1b, 0x42, 0x6e, 0x84,
-  0xb9, 0xd1, 0xc1, 0x01, 0x6e, 0x2e, 0x18, 0xc6, 0x5e, 0x54, 0x98, 0x51,
-  0x21, 0x3e, 0xc3, 0x11, 0xb0, 0x40, 0xa3, 0x02, 0xf1, 0xcd, 0x32, 0xd4,
-  0x0d, 0xde, 0x04, 0x56, 0xa3, 0x42, 0x2c, 0xc4, 0xc7, 0x82, 0x81, 0x3e,
-  0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f,
-  0x11, 0xbc, 0x2a, 0xe8, 0x70, 0x43, 0xa0, 0xab, 0x02, 0x18, 0xcc, 0x32,
-  0xd8, 0xcd, 0xdd, 0x04, 0x36, 0xf4, 0xa8, 0x00, 0x9f, 0x59, 0x02, 0xbe,
-  0x31, 0x1d, 0x15, 0x88, 0xf8, 0xcc, 0x12, 0xf0, 0xcd, 0x70, 0xc4, 0x2e,
-  0xec, 0xa8, 0x20, 0x7c, 0xb3, 0x0c, 0x79, 0xc3, 0x37, 0x81, 0xf1, 0x02,
-  0x8f, 0x0a, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94,
-  0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4, 0xb9, 0x0a, 0x3a, 0xdc,
-  0x10, 0x94, 0xab, 0x00, 0x06, 0xb3, 0x0c, 0x7a, 0xb3, 0x37, 0x81, 0x91,
-  0xa9, 0x30, 0xc4, 0x67, 0x96, 0x80, 0x6f, 0x8c, 0x48, 0x53, 0x01, 0x3e,
-  0xb3, 0x04, 0x7c, 0x33, 0xd0, 0xf2, 0x68, 0x76, 0x83, 0xdd, 0x0d, 0xa1,
-  0x37, 0xc2, 0xde, 0xb0, 0x06, 0xde, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd,
-  0xf6, 0xd4, 0xd5, 0xa8, 0x30, 0xcc, 0x99, 0xa5, 0x30, 0xcc, 0x11, 0xc3,
-  0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x6c, 0xf7, 0x2a,
-  0x90, 0xab, 0x10, 0xaa, 0x82, 0xbc, 0x0a, 0xa3, 0x09, 0x01, 0x30, 0x9a,
-  0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x98, 0xbf, 0x0a, 0xeb, 0x2a, 0x24,
-  0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd8, 0xbf, 0x0a, 0xec,
-  0x2a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x18, 0xc8,
-  0x0a, 0xed, 0x2a, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06,
-  0xcb, 0xc9, 0x0a, 0xec, 0x2a, 0xac, 0xaa, 0x10, 0xe4, 0xab, 0xc0, 0xab,
-  0xc2, 0xbe, 0x0a, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3, 0x04, 0x7d,
-  0x33, 0xd0, 0xf2, 0x98, 0x06, 0xda, 0xe0, 0x35, 0x71, 0x36, 0x2c, 0xa1,
-  0x36, 0x02, 0xdf, 0xe0, 0x35, 0xb1, 0x36, 0xb3, 0x0c, 0x7e, 0x03, 0x3a,
-  0x77, 0x1c, 0x0c, 0x47, 0xf0, 0x71, 0xd0, 0xab, 0xc2, 0xf0, 0x5d, 0x1f,
-  0x07, 0xc3, 0x0c, 0x37, 0x04, 0xa8, 0x2a, 0x90, 0x41, 0x0d, 0x81, 0x0e,
-  0x47, 0xfc, 0x43, 0xb8, 0x0a, 0xc3, 0x57, 0x81, 0xa0, 0x17, 0x12, 0xc3,
-  0x0c, 0x37, 0x04, 0xab, 0x2a, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xfc,
-  0x0d, 0xed, 0x04, 0xf7, 0xa6, 0xc2, 0x30, 0x07, 0x9a, 0xc2, 0x30, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xbd, 0xac, 0xc0, 0xaf, 0x42, 0xae,
-  0x0a, 0x2a, 0x2b, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26,
-  0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x60, 0x36, 0x2b, 0x8c, 0xac, 0x70, 0x10, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x60, 0x37, 0x2b, 0x90, 0xac, 0xc0, 0x10, 0xc1,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0x38, 0x2b, 0x94, 0xac, 0x20,
-  0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x3f, 0x2b, 0x90,
-  0xac, 0x30, 0xae, 0x42, 0x10, 0xb3, 0x02, 0xbd, 0x0a, 0x33, 0x2b, 0x8c,
-  0x26, 0x04, 0xc0, 0x05, 0x8f, 0xcd, 0x12, 0xd0, 0xce, 0x70, 0xc3, 0x2b,
-  0x07, 0x37, 0x2b, 0x80, 0xc1, 0x2c, 0x43, 0xe8, 0x88, 0x4e, 0x50, 0xbe,
-  0x2a, 0x9c, 0xac, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x54, 0xd8, 0x0a, 0x28, 0x2b, 0xd8, 0x72, 0x70, 0xaf, 0xc2, 0x88,
-  0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0x62, 0x2b, 0xa0, 0xac, 0x10, 0x08,
-  0x17, 0x0c, 0x53, 0xe1, 0x2a, 0xb0, 0xac, 0x00, 0x17, 0x3c, 0x35, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x94, 0xd9, 0x0a, 0x2d, 0x2b, 0xe8, 0x04,
-  0xbf, 0x0a, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0x9d, 0xad, 0xd0,
-  0xb2, 0x42, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf1, 0xd4, 0xa9,
-  0xaa, 0x30, 0xcc, 0xed, 0xa6, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x6c, 0x6c, 0x2b, 0xe4, 0xac, 0x60,
-  0xaf, 0xc2, 0xd9, 0x0a, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3,
-  0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0xd8, 0xdc, 0x0a, 0x60, 0x2b, 0x24, 0x44, 0x30, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x18, 0xdd, 0x0a, 0x61, 0x2b, 0x24, 0x44,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x58, 0xdd, 0x0a, 0x62, 0x2b,
-  0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x0b, 0xdf, 0x0a,
-  0x61, 0x2b, 0x80, 0xac, 0x10, 0xb8, 0xad, 0x10, 0xb3, 0x02, 0xdc, 0x0a,
-  0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3, 0x04, 0xb4, 0x33, 0xdc, 0xc0,
-  0xce, 0xc1, 0xdc, 0x0a, 0x60, 0x30, 0xcb, 0x30, 0x3a, 0xb4, 0x13, 0x98,
-  0xbc, 0x0a, 0xf4, 0x2a, 0xc4, 0x67, 0x38, 0x42, 0x9e, 0x83, 0x7a, 0x15,
-  0x88, 0x6f, 0x96, 0x81, 0x74, 0x4e, 0x27, 0x30, 0x7b, 0x15, 0xe6, 0x39,
-  0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30,
-  0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xfa, 0x56, 0xd0, 0xe1, 0x86, 0x60,
-  0x6f, 0x05, 0x30, 0x98, 0x65, 0x28, 0x1d, 0xd3, 0x09, 0x6c, 0xf0, 0x57,
-  0x01, 0x3e, 0xb3, 0x04, 0xab, 0x63, 0xfd, 0x2a, 0x10, 0xf1, 0x99, 0x25,
-  0x58, 0x9d, 0xe1, 0x88, 0x7e, 0x0e, 0xfc, 0x55, 0x10, 0xbe, 0x59, 0x06,
-  0xd4, 0x59, 0x9d, 0xc0, 0xfc, 0x39, 0xf8, 0x57, 0x21, 0x3e, 0x16, 0x38,
-  0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41,
-  0x7c, 0x8a, 0x40, 0x5d, 0x41, 0x87, 0x1b, 0x02, 0xd3, 0x15, 0xc0, 0x60,
-  0x96, 0x21, 0x75, 0x54, 0x27, 0xb0, 0x93, 0x15, 0x86, 0xf8, 0xcc, 0x12,
-  0xac, 0x8e, 0x11, 0x2a, 0x2b, 0xc0, 0x67, 0x96, 0x60, 0x75, 0x06, 0x5a,
-  0x1e, 0xad, 0x74, 0x30, 0xd3, 0x21, 0x52, 0x47, 0x50, 0x1d, 0x18, 0x1d,
-  0x4e, 0xe7, 0x82, 0x61, 0x2c, 0x65, 0x85, 0x96, 0x15, 0xe2, 0x33, 0x1c,
-  0xa1, 0x1a, 0x2e, 0x2b, 0x10, 0xdf, 0x2c, 0x03, 0xeb, 0xbc, 0x4e, 0x60,
-  0x2f, 0x2b, 0xac, 0x46, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17,
-  0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x61, 0xbb, 0x82,
-  0x0e, 0x37, 0x04, 0xb4, 0x2b, 0x80, 0xc1, 0x2c, 0x43, 0xeb, 0xb8, 0x4e,
-  0x60, 0xc3, 0xcd, 0x0a, 0xf0, 0x99, 0x25, 0x98, 0x1d, 0xa3, 0x59, 0x81,
-  0x88, 0xcf, 0x2c, 0xc1, 0xec, 0x0c, 0x47, 0xd4, 0x46, 0xcd, 0x0a, 0xc2,
-  0x37, 0xcb, 0x00, 0x3b, 0xb3, 0x13, 0x98, 0x6d, 0xd8, 0xac, 0x10, 0x1f,
-  0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7,
-  0x8a, 0x20, 0x3e, 0x45, 0x84, 0xaf, 0xa0, 0xc3, 0x0d, 0xc1, 0xef, 0x0a,
-  0x60, 0x30, 0xcb, 0x10, 0x3b, 0xb2, 0x13, 0x98, 0xcf, 0x0a, 0x43, 0x7c,
-  0x66, 0x09, 0x66, 0xc7, 0x88, 0xb1, 0x15, 0xe0, 0x33, 0x4b, 0x30, 0x3b,
-  0x03, 0x2d, 0x8f, 0xd6, 0x3a, 0x98, 0xeb, 0x10, 0xb1, 0x23, 0xc8, 0x0e,
-  0xe8, 0xbc, 0xce, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x6d, 0x4f, 0xdd, 0xcb,
-  0x0a, 0xc3, 0x1c, 0x98, 0x0a, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x16, 0xbf, 0x82, 0xef, 0x0a, 0x7b,
-  0x2b, 0xb0, 0xaf, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a,
-  0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0x80, 0xe1, 0xaf, 0x50, 0xbe, 0x42, 0x42, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x80, 0xe5, 0xaf, 0x60, 0xbe, 0x42, 0x42, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xe9, 0xaf, 0x70, 0xbe, 0x42,
-  0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0x84, 0xb0, 0x60,
-  0xbe, 0x42, 0xe9, 0x0a, 0xc1, 0xfc, 0x0a, 0xb6, 0x2b, 0xd4, 0xaf, 0x30,
-  0x9a, 0x10, 0x00, 0x17, 0x3c, 0x36, 0x4b, 0x40, 0x3b, 0x03, 0x2d, 0x8f,
-  0x69, 0xfc, 0x0d, 0xdb, 0x13, 0x7e, 0xc3, 0x12, 0xa1, 0x23, 0xcc, 0x0e,
-  0xdb, 0x13, 0xa2, 0x33, 0xcb, 0x50, 0x3b, 0xb7, 0x13, 0xd7, 0xc1, 0x70,
-  0x04, 0xdf, 0x06, 0xb7, 0x2b, 0x0c, 0xdf, 0xf5, 0x6d, 0x30, 0xcc, 0x70,
-  0x43, 0x20, 0xba, 0x02, 0x19, 0xd4, 0x10, 0xe8, 0x70, 0x44, 0x7e, 0xec,
-  0xae, 0x30, 0x7c, 0x15, 0x08, 0x7a, 0xfb, 0x31, 0xcc, 0x70, 0x43, 0x50,
-  0xba, 0x02, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0x83, 0xed, 0xac, 0x4f, 0x70,
-  0x69, 0x2b, 0x0c, 0x73, 0x7a, 0x2a, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0x5b, 0x0a, 0x0b, 0xf6, 0x2b, 0xcc, 0xae, 0x40, 0xc2, 0xc2,
-  0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26,
-  0x10, 0x43, 0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x06,
-  0xc3, 0x42, 0xff, 0x0a, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0x01, 0x16, 0xc3, 0x82, 0xff, 0x0a, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0x01, 0x26, 0xc3, 0xc2, 0xff, 0x0a, 0x12, 0x11, 0x8c, 0x18,
-  0x28, 0x00, 0x08, 0x82, 0xc1, 0x92, 0xc3, 0x82, 0xff, 0x0a, 0xbd, 0x2b,
-  0x04, 0x2b, 0x2c, 0xb8, 0xaf, 0xd0, 0xc2, 0xc2, 0x68, 0x42, 0x00, 0x5c,
-  0xf0, 0xd8, 0x2c, 0xc1, 0xfa, 0x0c, 0x37, 0xa4, 0x76, 0x10, 0xc3, 0x02,
-  0x18, 0xcc, 0x32, 0xe0, 0x4e, 0xee, 0x04, 0x85, 0xbb, 0x42, 0x08, 0x0b,
-  0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xed, 0xb0,
-  0x20, 0xc2, 0x82, 0xed, 0x06, 0xf1, 0x2b, 0x8c, 0x18, 0x1c, 0x00, 0x08,
-  0x82, 0x01, 0xc5, 0xc3, 0x82, 0x08, 0x0b, 0x81, 0x70, 0xc1, 0x30, 0xb5,
-  0xbb, 0x82, 0x09, 0x0b, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82,
-  0x60, 0x40, 0x81, 0xb1, 0x70, 0xc2, 0x02, 0x8d, 0xd8, 0xaf, 0x30, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x54, 0x18, 0x0b, 0x27, 0x2c, 0x04, 0xc2,
-  0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x1d, 0x4f, 0x1d, 0xe9, 0x0a, 0xc3, 0x5c,
-  0xad, 0x0a, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00,
-  0x08, 0x82, 0xc1, 0x66, 0xc6, 0xc2, 0x0c, 0x0b, 0xf0, 0x2b, 0x84, 0xb1,
-  0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3,
-  0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80,
-  0xb5, 0xb1, 0xa0, 0xc3, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0x80, 0xb9, 0xb1, 0xb0, 0xc3, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0x80, 0xbd, 0xb1, 0xc0, 0xc3, 0x42, 0x42, 0x04, 0x23,
-  0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xd8, 0xb1, 0xb0, 0xc3, 0x82, 0xfe,
-  0x0a, 0x01, 0x1a, 0x0b, 0x2b, 0x2c, 0xa8, 0xb1, 0x30, 0x9a, 0x10, 0x00,
-  0x17, 0x3c, 0x36, 0x4b, 0xb0, 0x3e, 0xc3, 0x0d, 0xe6, 0x1d, 0xb4, 0xb1,
-  0x00, 0x06, 0xb3, 0x0c, 0xba, 0xb3, 0x3e, 0x81, 0xb1, 0xaf, 0xe0, 0xbe,
-  0x42, 0x7c, 0x86, 0x23, 0xe4, 0x37, 0x78, 0x5f, 0x81, 0xf8, 0x66, 0x19,
-  0x76, 0xc7, 0x77, 0x02, 0x83, 0x5f, 0x61, 0x7e, 0x83, 0xf8, 0x58, 0x30,
-  0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04,
-  0xf1, 0x29, 0xe2, 0x8e, 0x05, 0x1d, 0x6e, 0x08, 0xea, 0x58, 0x00, 0x83,
-  0x59, 0x06, 0xde, 0xe9, 0x9d, 0xc0, 0x06, 0xfc, 0x15, 0xe0, 0x33, 0x4b,
-  0x20, 0x3e, 0x76, 0xbf, 0x02, 0x11, 0x9f, 0x59, 0x02, 0xf1, 0x19, 0x8e,
-  0xe8, 0xdf, 0x00, 0x7f, 0x05, 0xe1, 0x9b, 0x65, 0xf8, 0x1d, 0xf1, 0x09,
-  0xcc, 0x7f, 0x83, 0xfc, 0x15, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86,
-  0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x51,
-  0x16, 0x74, 0xb8, 0x21, 0x00, 0x65, 0x01, 0x0c, 0x66, 0x19, 0xc0, 0x27,
-  0x7c, 0x02, 0x0b, 0x61, 0x61, 0x88, 0xcf, 0x2c, 0x81, 0xf8, 0x18, 0x41,
-  0xc2, 0x02, 0x7c, 0x66, 0x09, 0xc4, 0x67, 0xa0, 0xe5, 0xd1, 0x78, 0x07,
-  0xeb, 0x1d, 0x02, 0x7c, 0x84, 0xf0, 0x81, 0xc1, 0xc1, 0x77, 0x2e, 0x18,
-  0xc6, 0x46, 0x58, 0x38, 0x61, 0x21, 0x3e, 0xc3, 0x11, 0xa4, 0x82, 0xc2,
-  0x02, 0xf1, 0xcd, 0x32, 0x8c, 0x8f, 0xf9, 0x04, 0x96, 0xc2, 0x42, 0xa9,
-  0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18,
-  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xb0, 0x2c, 0xe8, 0x70, 0x43, 0xe0,
-  0xca, 0x02, 0x18, 0xcc, 0x32, 0x90, 0x4f, 0xf9, 0x04, 0x36, 0xc4, 0xb0,
-  0x00, 0x9f, 0x59, 0x02, 0xf5, 0x31, 0x17, 0x16, 0x88, 0xf8, 0xcc, 0x12,
-  0xa8, 0xcf, 0x70, 0xc4, 0xab, 0xbc, 0xb0, 0x20, 0x7c, 0xb3, 0x0c, 0xe7,
-  0xa3, 0x3e, 0x81, 0xc1, 0x0a, 0x0c, 0x0b, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf,
-  0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53,
-  0xc4, 0x2e, 0x0b, 0x3a, 0xdc, 0x10, 0xe4, 0xb2, 0x00, 0x06, 0xb3, 0x0c,
-  0xe8, 0x93, 0x3e, 0x81, 0xe1, 0xb0, 0x30, 0xc4, 0x67, 0x96, 0x40, 0x7d,
-  0x8c, 0xe8, 0x61, 0x01, 0x3e, 0xb3, 0x04, 0xea, 0x33, 0xd0, 0xf2, 0x68,
-  0xe4, 0x83, 0x95, 0x0f, 0x81, 0x3e, 0x42, 0xfa, 0xd0, 0x95, 0xf9, 0x5c,
-  0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf6, 0xd4, 0xa5, 0xb0, 0x30, 0xcc, 0xe9,
-  0xac, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x6c, 0xeb, 0x2c, 0xe0, 0xb2, 0x50, 0xc7, 0x82, 0x39, 0x0b,
-  0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a,
-  0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x98,
-  0x3c, 0x0b, 0xbf, 0x2c, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0xd8, 0x3c, 0x0b, 0xe0, 0x2c, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x18, 0x3d, 0x0b, 0xe1, 0x2c, 0x24, 0x44, 0x30, 0x62,
-  0xa0, 0x00, 0x20, 0x08, 0x06, 0xcb, 0x3e, 0x0b, 0xe0, 0x2c, 0xfc, 0xb1,
-  0x10, 0xb4, 0xb3, 0x00, 0xcb, 0xc2, 0x3b, 0x0b, 0xa3, 0x09, 0x01, 0x70,
-  0xc1, 0x63, 0xb3, 0x04, 0xeb, 0x33, 0xd0, 0xf2, 0x98, 0x86, 0xed, 0x80,
-  0x65, 0x51, 0x3b, 0x2c, 0x81, 0x3b, 0x82, 0xfa, 0x80, 0x65, 0x91, 0x3b,
-  0xb3, 0x0c, 0xec, 0xe3, 0x3e, 0x6b, 0x1e, 0x0c, 0x47, 0xec, 0x6d, 0x10,
-  0xcb, 0xc2, 0xf0, 0x1d, 0xdf, 0x06, 0xc3, 0x0c, 0x37, 0x04, 0x7c, 0x2c,
-  0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0xcc, 0x4b, 0x2d, 0x0b, 0xc3, 0x57,
-  0x81, 0xa0, 0x57, 0x2f, 0xc3, 0x0c, 0x37, 0x04, 0x7f, 0x2c, 0x90, 0x41,
-  0x05, 0x83, 0xce, 0x32, 0xb4, 0x8f, 0x08, 0x05, 0x37, 0xc6, 0xc2, 0x30,
-  0x47, 0xb7, 0xc2, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0x8d,
-  0xb4, 0x00, 0xcf, 0x42, 0x2b, 0x0b, 0xfe, 0x2c, 0x8c, 0x26, 0x04, 0xc0,
-  0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71,
-  0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0x2a, 0x2d, 0xdc, 0xb3,
-  0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0x2b, 0x2d,
-  0xe0, 0xb3, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60,
-  0x2c, 0x2d, 0xe4, 0xb3, 0x20, 0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20,
-  0x18, 0x2c, 0x33, 0x2d, 0xe0, 0xb3, 0x70, 0xcb, 0x42, 0x50, 0xd2, 0x02,
-  0x3a, 0x0b, 0x27, 0x2d, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x8f, 0xcd, 0x12,
-  0x88, 0xd0, 0x70, 0xc3, 0xa8, 0x07, 0x2b, 0x2d, 0x80, 0xc1, 0x2c, 0xc3,
-  0xfb, 0xc0, 0x4f, 0x50, 0xb2, 0x2c, 0xec, 0xb3, 0x00, 0x17, 0x3c, 0x35,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x54, 0x4d, 0x0b, 0xfc, 0x2c, 0xd4,
-  0x6e, 0xb0, 0xce, 0xc2, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0x36,
-  0x2d, 0xf0, 0xb3, 0x10, 0x08, 0x17, 0x0c, 0x53, 0xb5, 0x2c, 0x80, 0xb4,
-  0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x94, 0x4e,
-  0x0b, 0x21, 0x2d, 0xb8, 0x0c, 0x3c, 0x0b, 0x23, 0x06, 0x07, 0x00, 0x82,
-  0x60, 0x40, 0xed, 0xb4, 0x10, 0xd2, 0x42, 0x20, 0x5c, 0x30, 0xcc, 0x05,
-  0x4f, 0xdd, 0xf1, 0xd4, 0xf9, 0xb1, 0x30, 0xcc, 0xbd, 0xae, 0x30, 0xcc,
-  0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x6c,
-  0x60, 0x2d, 0xb4, 0xb4, 0xa0, 0xce, 0xc2, 0x4e, 0x0b, 0xa3, 0x09, 0x01,
-  0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45,
-  0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd8, 0x59, 0x0b, 0x34,
-  0x2d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x18, 0x5a,
-  0x0b, 0x35, 0x2d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x58, 0x5a, 0x0b, 0x36, 0x2d, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20,
-  0x08, 0x06, 0x0b, 0x5c, 0x0b, 0x35, 0x2d, 0xd0, 0xb3, 0x10, 0x88, 0xb5,
-  0x50, 0xd2, 0x02, 0x59, 0x0b, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3,
-  0x04, 0x22, 0x34, 0xdc, 0x00, 0xee, 0xc1, 0x59, 0x0b, 0x60, 0x30, 0xcb,
-  0x10, 0x3f, 0x22, 0x14, 0x98, 0x39, 0x0b, 0xe8, 0x2c, 0xc4, 0x67, 0x38,
-  0x22, 0x7e, 0x83, 0x74, 0x16, 0x88, 0x6f, 0x96, 0x41, 0x7e, 0xea, 0x27,
-  0x30, 0x75, 0x16, 0xe4, 0x37, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18,
-  0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xe2,
-  0x5a, 0xd0, 0xe1, 0x86, 0xe0, 0xad, 0x05, 0x30, 0x98, 0x65, 0x98, 0x1f,
-  0xfa, 0x09, 0x6c, 0x90, 0x67, 0x01, 0x3e, 0xb3, 0x04, 0xf9, 0x63, 0xf1,
-  0x2c, 0x10, 0xf1, 0x99, 0x25, 0xc8, 0x9f, 0xe1, 0x08, 0xfe, 0x0d, 0xe4,
-  0x59, 0x10, 0xbe, 0x59, 0x06, 0xfb, 0xc9, 0x9f, 0xc0, 0xfa, 0x37, 0x98,
-  0x67, 0x21, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2,
-  0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xe0, 0x6b, 0x41, 0x87, 0x1b,
-  0x02, 0xbd, 0x16, 0xc0, 0x60, 0x96, 0xe1, 0x7e, 0xf0, 0x27, 0xb0, 0x7d,
-  0x16, 0x86, 0xf8, 0xcc, 0x12, 0xe4, 0x8f, 0x11, 0xfe, 0x2c, 0xc0, 0x67,
-  0x96, 0x20, 0x7f, 0x06, 0x5a, 0x1e, 0x6d, 0x7e, 0x30, 0xfa, 0x21, 0xee,
-  0x47, 0xc0, 0x1f, 0x17, 0x1c, 0xea, 0xe7, 0x82, 0x61, 0xac, 0x9f, 0x85,
-  0x90, 0x16, 0xe2, 0x33, 0x1c, 0xe1, 0x37, 0x22, 0x2d, 0x10, 0xdf, 0x2c,
-  0x83, 0xfe, 0xf4, 0x4f, 0x60, 0x23, 0x2d, 0xfc, 0x4d, 0x7c, 0x2c, 0x18,
-  0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82,
-  0xf8, 0x14, 0xa1, 0xda, 0x82, 0x0e, 0x37, 0x04, 0xa8, 0x2d, 0x80, 0xc1,
-  0x2c, 0xc3, 0xfe, 0xf0, 0x4f, 0x60, 0xc3, 0x4a, 0x0b, 0xf0, 0x99, 0x25,
-  0x08, 0x21, 0x43, 0x69, 0x81, 0x88, 0xcf, 0x2c, 0x41, 0x08, 0x0d, 0x47,
-  0xa4, 0x4e, 0x4a, 0x0b, 0xc2, 0x37, 0xcb, 0xe0, 0x3f, 0x21, 0x14, 0x98,
-  0xea, 0xa8, 0xb4, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05,
-  0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xd4, 0xb6, 0xa0,
-  0xc3, 0x0d, 0xc1, 0x6c, 0x0b, 0x60, 0x30, 0xcb, 0xf0, 0x3f, 0x20, 0x14,
-  0x98, 0x4c, 0x0b, 0x43, 0x7c, 0x66, 0x09, 0x42, 0xc8, 0x88, 0x9b, 0x16,
-  0xe0, 0x33, 0x4b, 0x10, 0x42, 0x03, 0x2d, 0x8f, 0xb6, 0x3f, 0x18, 0xff,
-  0x10, 0xff, 0x23, 0x80, 0x10, 0xda, 0xf5, 0xcf, 0x05, 0xc3, 0x5c, 0xf0,
-  0xd4, 0x6d, 0x4f, 0xdd, 0x48, 0x0b, 0xc3, 0x1c, 0x0d, 0x0b, 0xc3, 0x1c,
-  0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x56,
-  0xde, 0x82, 0x6c, 0x0b, 0x6f, 0x2d, 0x80, 0xb7, 0x30, 0x9a, 0x10, 0x00,
-  0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44,
-  0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xb1, 0xb7, 0x90, 0xdb,
-  0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xb5, 0xb7,
-  0xa0, 0xdb, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80,
-  0xb9, 0xb7, 0xb0, 0xdb, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82,
-  0x60, 0xb0, 0xd4, 0xb7, 0xa0, 0xdb, 0x42, 0x5e, 0x0b, 0xc1, 0x79, 0x0b,
-  0xaa, 0x2d, 0xa4, 0xb7, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x36, 0x4b,
-  0x20, 0x42, 0x03, 0x2d, 0x8f, 0x69, 0xb4, 0x0f, 0xcd, 0x16, 0xec, 0xc3,
-  0x12, 0xef, 0x23, 0x84, 0x10, 0xcd, 0x16, 0xf0, 0x33, 0x62, 0x60, 0x00,
-  0x20, 0x08, 0x06, 0xd0, 0x7d, 0x0b, 0xb6, 0x2d, 0x98, 0xb1, 0x30, 0x62,
-  0x60, 0x00, 0x20, 0x08, 0x06, 0x10, 0x7e, 0x0b, 0xb7, 0x2d, 0x98, 0xb1,
-  0x60, 0x41, 0x20, 0x1f, 0x0b, 0x04, 0xf9, 0xd8, 0x9b, 0x07, 0xad, 0x2d,
-  0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xa4, 0xfd, 0x16, 0x7e,
-  0x5b, 0x68, 0x6d, 0xa1, 0xd7, 0x02, 0x8b, 0xf3, 0xa0, 0xb5, 0x05, 0xf9,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0xd4, 0xdf, 0x42, 0x78, 0x0b,
-  0xac, 0x2d, 0xa0, 0x6a, 0x10, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81,
-  0xe4, 0xdf, 0x82, 0x78, 0x0b, 0xaf, 0x2d, 0x80, 0x5b, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0xd2, 0x7f, 0x0b, 0xe3, 0x2d, 0xc8, 0xb6, 0x80,
-  0x2f, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x20, 0x2e, 0x90,
-  0xb7, 0xe0, 0xda, 0xc2, 0xaa, 0x06, 0xc6, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x48, 0x21, 0x2e, 0x94, 0xb7, 0xe0, 0xda, 0xc2, 0xb8, 0x05, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0x20, 0x89, 0xb8, 0x60, 0xde, 0x02, 0x6d,
-  0x0b, 0xfb, 0x12, 0x8c, 0x18, 0x34, 0x00, 0x08, 0x82, 0x81, 0x15, 0xe2,
-  0x82, 0x79, 0x0b, 0xb7, 0x2d, 0x30, 0x8b, 0xe2, 0xaa, 0x01, 0x42, 0x04,
-  0xf6, 0xd7, 0xc1, 0x6d, 0x0b, 0xf2, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0xa9, 0xc4, 0x85, 0xf4, 0x16, 0x6e, 0x5b, 0x68, 0xaf, 0xc0, 0x42,
-  0x3b, 0xb8, 0x6d, 0x41, 0x3e, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x20,
-  0x9d, 0xb8, 0xb0, 0xde, 0x82, 0x6d, 0x0b, 0xb8, 0x19, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x20, 0xa1, 0xb8, 0xc0, 0xde, 0x42, 0x6e, 0x0b,
-  0xf0, 0x15, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x94, 0xe2, 0x42,
-  0x7b, 0x0b, 0xbc, 0x2d, 0xa0, 0x48, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x92, 0x8a, 0x0b, 0xee, 0x2d, 0xe0, 0xb6, 0xb0, 0x9b, 0x81, 0x31,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd2, 0x8a, 0x0b, 0xef, 0x2d, 0xe0,
-  0xb6, 0x30, 0x5f, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x2c,
-  0x2e, 0xc0, 0xb7, 0xe0, 0xdb, 0xc2, 0x8a, 0x04, 0x23, 0x06, 0x0d, 0x00,
-  0x82, 0x60, 0x60, 0xad, 0xb8, 0x00, 0xdf, 0x42, 0x78, 0x0b, 0x56, 0x45,
-  0xf9, 0x66, 0x80, 0x10, 0x81, 0xb9, 0x72, 0x10, 0xde, 0x82, 0x7c, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0x40, 0x7a, 0x71, 0x61, 0xbe, 0x85, 0xf0,
-  0x16, 0xfa, 0x29, 0x30, 0x58, 0x0e, 0xc2, 0x5b, 0x90, 0xcf, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x48, 0x31, 0x2e, 0xd4, 0xb7, 0x00, 0xde, 0x02,
-  0x3a, 0x06, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x32, 0x2e,
-  0xd8, 0xb7, 0x30, 0xde, 0x02, 0x48, 0x05, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0x20, 0xcd, 0xb8, 0x70, 0xdf, 0x82, 0x79, 0x0b, 0x38, 0x11, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x44, 0xe3, 0x02, 0x7e, 0x0b, 0xe2,
-  0x2d, 0xac, 0x63, 0x60, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x54,
-  0xe3, 0x42, 0x7e, 0x0b, 0xe2, 0x2d, 0x8c, 0x54, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x92, 0x8d, 0x0b, 0xfa, 0x2d, 0xa0, 0xb7, 0xb0, 0x13,
-  0xc1, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x58, 0x35, 0x2e, 0xe8, 0xb7,
-  0xb0, 0xde, 0x02, 0x18, 0x7c, 0x9e, 0x3b, 0x06, 0x08, 0x11, 0x58, 0xef,
-  0x06, 0xeb, 0x2d, 0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xa4,
-  0x1c, 0x17, 0xfa, 0x5b, 0x58, 0x6f, 0xa1, 0x85, 0x02, 0xfb, 0xdd, 0x60,
-  0xbd, 0x05, 0xf9, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0xb4, 0xe3,
-  0xc2, 0x7f, 0x0b, 0xea, 0x2d, 0xe0, 0x5f, 0x30, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x12, 0x8f, 0x0b, 0x20, 0x2e, 0xb4, 0xb7, 0x00, 0x43, 0xc1,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x3d, 0x2e, 0x84, 0xb8, 0x00,
-  0xdf, 0x02, 0x1a, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x20, 0xf9,
-  0xb8, 0x20, 0xe2, 0x02, 0x7b, 0x0b, 0xfb, 0x67, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0x81, 0xf4, 0xe3, 0xc2, 0x88, 0x0b, 0xec, 0x2d, 0xcc, 0x50,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x12, 0x98, 0x0b, 0x24, 0x2e,
-  0xc8, 0xb7, 0xb0, 0x06, 0xc1, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x58,
-  0x3f, 0x2e, 0x90, 0xb8, 0x50, 0xdf, 0x82, 0x1a, 0xa4, 0x01, 0x1a, 0xf8,
-  0x1f, 0x42, 0x04, 0xc6, 0x06, 0x6c, 0x20, 0x1f, 0x0b, 0xda, 0x40, 0x3e,
-  0x16, 0x06, 0xf7, 0x2d, 0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
-  0xa4, 0x32, 0x17, 0x52, 0x5c, 0xb8, 0x6f, 0xc1, 0x09, 0x6c, 0x0c, 0xee,
-  0x5b, 0x90, 0xcf, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x67, 0x2e,
-  0xac, 0xb8, 0x60, 0xdf, 0x82, 0x16, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0x81, 0x84, 0xe6, 0x02, 0x8b, 0x0b, 0xf9, 0x2d, 0x44, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x48, 0x69, 0x2e, 0xb4, 0xb8, 0xc0, 0xdf, 0x02,
-  0x12, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0xa4, 0xe6, 0x82, 0x8b,
-  0x0b, 0xf8, 0x2d, 0x74, 0xc6, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48,
-  0x6b, 0x2e, 0xbc, 0xb8, 0x80, 0xdf, 0x02, 0x15, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0x81, 0xc4, 0xe6, 0x02, 0x8c, 0x0b, 0xfe, 0x2d, 0x2c, 0xc1,
-  0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x58, 0x6b, 0x2e, 0xc0, 0xb8, 0x10,
-  0xe2, 0xc2, 0x1d, 0x2c, 0x0a, 0x18, 0x20, 0x44, 0x70, 0x81, 0x41, 0x23,
-  0x06, 0x0e, 0x00, 0x82, 0x60, 0xd0, 0xc4, 0xb9, 0xe0, 0xe2, 0x02, 0x7e,
-  0x0b, 0xf1, 0x2d, 0x90, 0xb9, 0x10, 0xa0, 0xb8, 0x80, 0xe2, 0x02, 0x8a,
-  0x0b, 0x27, 0x2e, 0x98, 0xb9, 0x30, 0x4b, 0x30, 0x42, 0x08, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00
-};
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_int16_float.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_int16_float.h
index e4ca30b37098a..170d703d46885 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_int16_float.h
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_int16_float.h
@@ -15,7 +15,7 @@
 ; Name                 Index   Mask Register SysValue  Format   Used
 ; -------------------- ----- ------ -------- -------- ------- ------
 ; no parameters
-; shader hash: 6c72da9b3c67c95014f740130c83b980
+; shader hash: 6f717b11e88ba0edf18e30c501c4874c
 ;
 ; Pipeline Runtime Information: 
 ;
@@ -68,7 +68,7 @@ target triple = "dxil-ms-dx"
 %dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
 %dx.types.ResRet.f32 = type { float, float, float, float, i32 }
 %dx.types.ResRet.i16 = type { i16, i16, i16, i16, i32 }
-%"class.RWStructuredBuffer<short>" = type { i16 }
+%"class.RWStructuredBuffer<int16_t>" = type { i16 }
 %"class.RWStructuredBuffer<float>" = type { float }
 %Constants = type { i32, i32, i32, i32, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32 }
 
@@ -4476,10 +4476,10 @@ attributes #2 = { nounwind }
 !3 = !{!"cs", i32 6, i32 2}
 !4 = !{null, !5, !10, null}
 !5 = !{!6, !7, !9}
-!6 = !{i32 0, %"class.RWStructuredBuffer<short>"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !1}
+!6 = !{i32 0, %"class.RWStructuredBuffer<int16_t>"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !1}
 !7 = !{i32 1, %"class.RWStructuredBuffer<float>"* undef, !"", i32 0, i32 1, i32 1, i32 12, i1 false, i1 false, i1 false, !8}
 !8 = !{i32 1, i32 4}
-!9 = !{i32 2, %"class.RWStructuredBuffer<short>"* undef, !"", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !1}
+!9 = !{i32 2, %"class.RWStructuredBuffer<int16_t>"* undef, !"", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !1}
 !10 = !{!11}
 !11 = !{i32 0, %Constants* undef, !"", i32 0, i32 0, i32 1, i32 116, null}
 !12 = !{void ()* @GridSample, !"GridSample", null, !4, !13}
@@ -4489,9 +4489,9 @@ attributes #2 = { nounwind }
 #endif
 
 const unsigned char g_GridSample[] = {
-  0x44, 0x58, 0x42, 0x43, 0x5e, 0xa7, 0x46, 0x9a, 0xca, 0x01, 0xea, 0xaf,
-  0xab, 0x34, 0x21, 0x34, 0xd6, 0x18, 0x58, 0x17, 0x01, 0x00, 0x00, 0x00,
-  0x34, 0x54, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
+  0x44, 0x58, 0x42, 0x43, 0x40, 0x47, 0x5b, 0x4c, 0x0d, 0x62, 0x8a, 0x97,
+  0x20, 0x76, 0xaa, 0x4c, 0xaa, 0xaa, 0x4e, 0x62, 0x01, 0x00, 0x00, 0x00,
+  0x38, 0x54, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
   0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
   0x18, 0x01, 0x00, 0x00, 0x34, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30,
   0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -4513,12 +4513,12 @@ const unsigned char g_GridSample[] = {
   0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x6c, 0x72, 0xda, 0x9b, 0x3c, 0x67, 0xc9, 0x50,
-  0x14, 0xf7, 0x40, 0x13, 0x0c, 0x83, 0xb9, 0x80, 0x44, 0x58, 0x49, 0x4c,
-  0xf8, 0x52, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0xbe, 0x14, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x6f, 0x71, 0x7b, 0x11, 0xe8, 0x8b, 0xa0, 0xed,
+  0xf1, 0x8e, 0x30, 0xc5, 0x01, 0xc4, 0x87, 0x4c, 0x44, 0x58, 0x49, 0x4c,
+  0xfc, 0x52, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0xbf, 0x14, 0x00, 0x00,
   0x44, 0x58, 0x49, 0x4c, 0x02, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-  0xe0, 0x52, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00,
-  0xb5, 0x14, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0xe4, 0x52, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00,
+  0xb6, 0x14, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
   0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49,
   0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19,
   0x1e, 0x04, 0x8b, 0x62, 0x80, 0x18, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42,
@@ -4531,7 +4531,7 @@ const unsigned char g_GridSample[] = {
   0x01, 0xd5, 0x06, 0x62, 0xf8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x90, 0x00,
   0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42,
   0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00,
-  0x55, 0x00, 0x00, 0x00, 0x32, 0x22, 0x88, 0x09, 0x20, 0x64, 0x85, 0x04,
+  0x56, 0x00, 0x00, 0x00, 0x32, 0x22, 0x88, 0x09, 0x20, 0x64, 0x85, 0x04,
   0x13, 0x23, 0xa4, 0x84, 0x04, 0x13, 0x23, 0xe3, 0x84, 0xa1, 0x90, 0x14,
   0x12, 0x4c, 0x8c, 0x8c, 0x0b, 0x84, 0xc4, 0x4c, 0x10, 0xb4, 0xc1, 0x08,
   0x40, 0x09, 0x00, 0x0a, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0xc3, 0x30, 0x10,
@@ -4548,1742 +4548,1742 @@ const unsigned char g_GridSample[] = {
   0x86, 0x61, 0x18, 0x06, 0xda, 0x8e, 0x1a, 0x2e, 0x7f, 0xc2, 0x1e, 0x42,
   0xf2, 0xb9, 0x8d, 0x2a, 0x56, 0x62, 0xf2, 0x91, 0xdb, 0x46, 0xc4, 0x30,
   0x0c, 0x43, 0x21, 0xba, 0x41, 0x19, 0xc8, 0x9b, 0x23, 0x08, 0x8a, 0xa1,
-  0x0c, 0xc8, 0x30, 0x80, 0x14, 0x0e, 0x04, 0xcc, 0xf4, 0x8d, 0x03, 0x3b,
-  0x84, 0xc3, 0x3c, 0xcc, 0x83, 0x1b, 0xc8, 0xc2, 0x2d, 0xcc, 0x02, 0x3d,
-  0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xd4, 0x83, 0x3c, 0x94, 0x03, 0x39,
-  0x88, 0x42, 0x3d, 0x98, 0x83, 0x39, 0x94, 0x83, 0x3c, 0xf0, 0xc1, 0x3c,
-  0xa0, 0xc3, 0x3b, 0xc8, 0x03, 0x3d, 0xf8, 0x01, 0x0a, 0x0c, 0x22, 0x67,
-  0xfa, 0xc6, 0x81, 0x1d, 0xc2, 0x61, 0x1e, 0xe6, 0xc1, 0x0d, 0x64, 0xe1,
-  0x16, 0x66, 0x81, 0x1e, 0xe4, 0xa1, 0x1e, 0xc6, 0x81, 0x1e, 0xea, 0x41,
-  0x1e, 0xca, 0x81, 0x1c, 0x44, 0xa1, 0x1e, 0xcc, 0xc1, 0x1c, 0xca, 0x41,
-  0x1e, 0xf8, 0xc0, 0x1c, 0xd8, 0xe1, 0x1d, 0xc2, 0x81, 0x1e, 0xfc, 0x00,
-  0x05, 0x0f, 0x99, 0xc3, 0x08, 0xc4, 0x70, 0x09, 0xe7, 0x34, 0xd2, 0x04,
-  0x34, 0x93, 0x84, 0x96, 0x61, 0x18, 0x06, 0x14, 0x45, 0x51, 0x14, 0x1d,
-  0x28, 0x9d, 0x23, 0x00, 0x85, 0x29, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0,
-  0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0,
-  0x87, 0x0d, 0xae, 0x50, 0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d,
-  0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d,
-  0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78,
-  0xa0, 0x07, 0x78, 0xd0, 0x06, 0xe9, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x71,
-  0x60, 0x07, 0x6d, 0x90, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72,
-  0xd0, 0x06, 0xe9, 0x60, 0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d,
-  0x60, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xd0, 0x06, 0xe6,
-  0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x76,
-  0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xee, 0x80, 0x07, 0x7a,
-  0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x7a, 0x60, 0x07, 0x74,
-  0x30, 0xe4, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x60, 0xc8, 0x43, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0xc0, 0x90, 0x67, 0x01, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x80, 0x21, 0x4f, 0x03, 0x04, 0xc0, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x43, 0x1e, 0x08, 0x08, 0x80, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x3c, 0x12, 0x10, 0x00, 0x01,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x79, 0x28, 0x20, 0x00,
-  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xf2, 0x58, 0x40,
-  0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xe4, 0xc9,
-  0x80, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8,
-  0xb3, 0x01, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0,
-  0x90, 0xc7, 0x03, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x80, 0x21, 0x4f, 0x18, 0x00, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x40, 0x16, 0x08, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
-  0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47,
-  0xc6, 0x04, 0x43, 0x1a, 0x4a, 0xa0, 0x08, 0x8a, 0x61, 0x04, 0xa0, 0x30,
-  0x0a, 0xa1, 0xd0, 0x03, 0x0a, 0x30, 0x80, 0xc0, 0x11, 0x00, 0x5a, 0x0b,
-  0x1c, 0x10, 0x10, 0x81, 0xce, 0x19, 0x00, 0x52, 0x67, 0x00, 0xa8, 0x9c,
-  0x01, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00,
-  0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b,
-  0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99,
-  0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62,
-  0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73,
-  0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84, 0x81, 0x99, 0x20, 0x0c,
-  0xcd, 0x06, 0x61, 0x20, 0x26, 0x08, 0x83, 0xb3, 0x41, 0x18, 0x0c, 0x0a,
-  0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08, 0xc3, 0x33, 0x41, 0x28,
-  0x03, 0x8c, 0xc0, 0x04, 0x61, 0x80, 0x26, 0x08, 0x60, 0x40, 0x6d, 0x58,
-  0x94, 0x85, 0x51, 0x94, 0xa1, 0x71, 0x1c, 0xa7, 0x98, 0x20, 0x9c, 0x81,
-  0x35, 0x41, 0x18, 0xa2, 0x0d, 0xc2, 0x10, 0x6d, 0x58, 0x06, 0x88, 0x51,
-  0x86, 0xa1, 0x71, 0x1c, 0x47, 0xda, 0xb0, 0x10, 0x0b, 0xa3, 0x10, 0x43,
-  0xe3, 0x38, 0x4e, 0xb1, 0x61, 0x78, 0x26, 0x6a, 0x82, 0xa0, 0x06, 0xd7,
-  0x04, 0x61, 0x90, 0x36, 0x20, 0x8a, 0xc5, 0x28, 0xca, 0x70, 0x01, 0x1b,
-  0x02, 0x6c, 0x03, 0x01, 0x54, 0x19, 0x30, 0x41, 0x10, 0x00, 0x2a, 0x47,
-  0x72, 0x69, 0x64, 0x53, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x13, 0x84, 0x35,
-  0xa8, 0x26, 0x08, 0xc3, 0xb4, 0x61, 0xf0, 0x86, 0x61, 0x03, 0xa1, 0x74,
-  0xd1, 0xb7, 0xa1, 0xd8, 0x38, 0x40, 0x03, 0x83, 0x2a, 0x6c, 0x6c, 0x76,
-  0x6d, 0x2e, 0x69, 0x64, 0x65, 0x6e, 0x74, 0x53, 0x82, 0xa0, 0x0a, 0x19,
-  0x9e, 0x8b, 0x5d, 0x99, 0xdc, 0x5c, 0xda, 0x9b, 0xdb, 0x94, 0x80, 0x68,
-  0x42, 0x86, 0xe7, 0x62, 0x17, 0xc6, 0x66, 0x57, 0x26, 0x37, 0x25, 0x30,
-  0xea, 0x90, 0xe1, 0xb9, 0xcc, 0xa1, 0x85, 0x91, 0x95, 0xc9, 0x35, 0xbd,
-  0x91, 0x95, 0xb1, 0x4d, 0x09, 0x90, 0x32, 0x64, 0x78, 0x2e, 0x72, 0x65,
-  0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x73, 0x53, 0x82, 0xac, 0x0e, 0x19,
-  0x9e, 0x4b, 0x99, 0x1b, 0x9d, 0x5c, 0x1e, 0xd4, 0x5b, 0x9a, 0x1b, 0xdd,
-  0xdc, 0x94, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00,
-  0x51, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66,
-  0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07,
-  0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10,
-  0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce,
-  0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b,
-  0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c,
-  0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07,
-  0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11,
-  0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0,
-  0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8,
-  0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b,
-  0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b,
-  0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87,
-  0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07,
-  0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87,
-  0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81,
-  0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30,
-  0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4,
-  0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca,
-  0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39,
-  0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b,
-  0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b,
-  0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87,
-  0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20,
-  0x0e, 0xe7, 0xe0, 0x06, 0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90,
-  0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x30, 0x83, 0x81, 0xc8, 0x01, 0x1f, 0xdc,
-  0x40, 0x1c, 0xe4, 0xa1, 0x1c, 0xc2, 0x61, 0x1d, 0xdc, 0x40, 0x1c, 0xe4,
-  0x01, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00,
-  0x06, 0xa0, 0x80, 0x11, 0x32, 0xb0, 0x00, 0xf3, 0x2c, 0x84, 0x19, 0x40,
-  0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x20, 0x0d, 0x10, 0x61, 0x7e, 0x71, 0xdb,
-  0xa6, 0xb0, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10, 0x50, 0x45, 0x41, 0x44,
-  0xa5, 0x03, 0x0c, 0x25, 0x61, 0x00, 0x02, 0xe6, 0x23, 0xb7, 0x6d, 0x0b,
-  0xd2, 0x70, 0xf9, 0xce, 0xe3, 0x0b, 0x11, 0x01, 0x4c, 0x44, 0x08, 0x34,
-  0xc3, 0x42, 0xd8, 0x81, 0x33, 0x5c, 0xbe, 0xf3, 0xf8, 0x83, 0x33, 0xe1,
-  0x7e, 0x71, 0xdb, 0x86, 0x70, 0x0d, 0x97, 0xef, 0x3c, 0x7e, 0x04, 0x58,
-  0x1b, 0x55, 0x14, 0x44, 0x54, 0x3a, 0xc0, 0xe0, 0x17, 0xb7, 0x6d, 0x02,
-  0xd7, 0x70, 0xf9, 0xce, 0xe3, 0x47, 0x80, 0xb5, 0x51, 0x45, 0x41, 0x44,
-  0xa5, 0x03, 0x0c, 0x3e, 0x52, 0xeb, 0x36, 0x80, 0x0d, 0x97, 0xef, 0x3c,
-  0x7e, 0x04, 0x58, 0x1b, 0x55, 0x14, 0x44, 0xc4, 0x4e, 0x4e, 0x44, 0xf8,
-  0x48, 0xad, 0x5b, 0x81, 0x34, 0x5c, 0xbe, 0xf3, 0xf8, 0x13, 0x11, 0x4d,
-  0x08, 0x10, 0x61, 0x7e, 0x71, 0xdb, 0x96, 0x20, 0x0d, 0x97, 0xef, 0x3c,
-  0xfe, 0x44, 0x44, 0x13, 0x02, 0x44, 0x98, 0x8f, 0xdc, 0xb6, 0x05, 0x48,
-  0xc3, 0xe5, 0x3b, 0x8f, 0x3f, 0x1d, 0x11, 0x01, 0x0c, 0xe2, 0xe0, 0x23,
-  0xb7, 0x6d, 0x04, 0xcf, 0x70, 0xf9, 0xce, 0xe3, 0x53, 0x0d, 0x10, 0x61,
-  0x7e, 0x71, 0xdb, 0x00, 0x61, 0x20, 0x00, 0x00, 0x0b, 0x13, 0x00, 0x00,
-  0x13, 0x04, 0x24, 0x14, 0x0b, 0x04, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
-  0x34, 0x14, 0x58, 0xd9, 0x95, 0xa5, 0x40, 0x0d, 0x94, 0x51, 0x21, 0x15,
-  0x57, 0xc1, 0x95, 0x5c, 0xd9, 0x14, 0x4b, 0x61, 0x0a, 0x14, 0x4d, 0xe9,
-  0x06, 0x94, 0x43, 0x29, 0x90, 0x31, 0x03, 0x40, 0x48, 0x09, 0x14, 0x01,
-  0x3d, 0x23, 0x00, 0x63, 0x04, 0x20, 0x08, 0x82, 0xf8, 0x37, 0x46, 0x00,
-  0x82, 0x20, 0x48, 0xff, 0xc2, 0x18, 0x01, 0x08, 0x82, 0x20, 0xfd, 0x8d,
-  0x11, 0x80, 0x20, 0x08, 0xf2, 0xdf, 0x18, 0x01, 0x08, 0x82, 0x20, 0xfe,
-  0x0b, 0x63, 0x04, 0x20, 0x08, 0x82, 0x21, 0x38, 0x8c, 0x11, 0x80, 0x20,
-  0x08, 0xea, 0xdf, 0x18, 0x01, 0x08, 0x82, 0xa0, 0xfe, 0x0b, 0x63, 0x04,
-  0x20, 0x08, 0x82, 0xf0, 0x37, 0x46, 0x00, 0x82, 0x20, 0x08, 0xff, 0xc2,
-  0x18, 0x01, 0x08, 0x82, 0x20, 0x08, 0x06, 0x00, 0x23, 0x06, 0x09, 0x00,
-  0x82, 0x60, 0x10, 0x06, 0x6d, 0x70, 0x39, 0x6b, 0xb0, 0x06, 0x64, 0x30,
-  0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0x61, 0xe0, 0x06, 0xd8, 0xd3, 0x06,
-  0x6d, 0x50, 0x06, 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0x10, 0x06, 0x6f,
-  0x90, 0x41, 0x6b, 0xb0, 0x06, 0x66, 0x30, 0x62, 0x90, 0x00, 0x20, 0x08,
-  0x06, 0x61, 0x00, 0x07, 0x1a, 0xc4, 0x06, 0x6c, 0x70, 0x06, 0x23, 0x06,
-  0x06, 0x00, 0x82, 0x60, 0x40, 0xec, 0x81, 0xd5, 0x06, 0x23, 0x06, 0x07,
-  0x00, 0x82, 0x60, 0xe0, 0xcd, 0xc1, 0x18, 0x08, 0x6e, 0x30, 0x9a, 0x10,
-  0x00, 0x15, 0x0c, 0x30, 0x9a, 0x30, 0x04, 0xc3, 0x0d, 0x42, 0x40, 0x06,
-  0xb3, 0x0c, 0xc1, 0x08, 0x05, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0,
-  0xe1, 0x01, 0x1a, 0x1c, 0x76, 0x30, 0x9a, 0x10, 0x0c, 0x17, 0x3c, 0x35,
-  0x9a, 0x30, 0x08, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x5e, 0x1f, 0xb4, 0x01, 0x03, 0x06, 0xa3, 0x09, 0x01, 0x30, 0xdc, 0x10,
-  0xe8, 0x01, 0x18, 0x4c, 0x37, 0x50, 0x5e, 0x30, 0xdd, 0x50, 0x69, 0x42,
-  0x21, 0x01, 0x4c, 0x37, 0x5c, 0x1c, 0x51, 0x48, 0x00, 0x23, 0x06, 0x07,
-  0x00, 0x82, 0x60, 0xe0, 0x95, 0x42, 0x1d, 0x50, 0x67, 0x30, 0x9a, 0x10,
-  0x04, 0xa3, 0x09, 0x82, 0x30, 0x9a, 0x30, 0x0c, 0x15, 0x08, 0x52, 0x03,
-  0x21, 0x15, 0x0c, 0x52, 0x57, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0xe0, 0xb5, 0x42, 0x1f, 0x70, 0xa9, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c,
-  0x52, 0x5b, 0x10, 0x15, 0x20, 0x33, 0x9a, 0x50, 0x04, 0x15, 0x08, 0x52,
-  0x44, 0x10, 0x15, 0x34, 0x33, 0x9a, 0x90, 0x08, 0x15, 0x08, 0x52, 0x44,
-  0x10, 0xd7, 0x3c, 0x75, 0xc5, 0x53, 0x37, 0x3c, 0x35, 0x62, 0x70, 0x00,
-  0x20, 0x08, 0x06, 0x1e, 0x2f, 0xb0, 0xc2, 0x1a, 0xd4, 0xc2, 0x68, 0x42,
-  0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0xc3,
-  0x11, 0x4f, 0x1d, 0xf1, 0xd4, 0x11, 0x4f, 0x1d, 0xf1, 0xd4, 0x88, 0x41,
-  0x03, 0x80, 0x20, 0x18, 0x58, 0xe8, 0xf0, 0x0a, 0xcc, 0xa2, 0x8c, 0x02,
-  0x31, 0x08, 0x81, 0x09, 0x01, 0x7c, 0x4e, 0x18, 0x66, 0xc4, 0x40, 0x01,
-  0x40, 0x10, 0x0c, 0x32, 0x75, 0xb8, 0x85, 0x3c, 0x08, 0xc6, 0x01, 0x15,
-  0xc2, 0x61, 0x34, 0x21, 0x00, 0x8e, 0x18, 0x66, 0xc4, 0x40, 0x01, 0x40,
-  0x10, 0x0c, 0xb2, 0x76, 0xd0, 0x05, 0x3e, 0x08, 0xcc, 0x61, 0x15, 0xc8,
-  0x61, 0x34, 0x21, 0x00, 0x86, 0x1b, 0xde, 0x60, 0x1d, 0xc0, 0xc0, 0x8a,
-  0x55, 0x80, 0x8f, 0x0d, 0xac, 0x00, 0x9f, 0x59, 0x06, 0x61, 0x18, 0x4c,
-  0x28, 0x05, 0xf9, 0x98, 0x60, 0x0a, 0xf2, 0x31, 0x3c, 0x58, 0x05, 0xf8,
-  0xd8, 0x1d, 0xb0, 0x02, 0x7c, 0x8c, 0x10, 0xe4, 0x63, 0x84, 0x20, 0x9f,
-  0x59, 0x02, 0xc2, 0xf8, 0x00, 0x91, 0x8f, 0xed, 0x01, 0x22, 0x1f, 0x13,
-  0x62, 0x01, 0x3e, 0x26, 0xc8, 0x02, 0x7c, 0x4c, 0x78, 0x05, 0xf9, 0x98,
-  0x00, 0x0b, 0xf2, 0x99, 0x25, 0x20, 0x06, 0x2a, 0x1e, 0x48, 0x20, 0x86,
-  0x81, 0x8a, 0x07, 0x12, 0x88, 0x61, 0x34, 0x61, 0x15, 0x84, 0xe1, 0x86,
-  0xa0, 0x1f, 0xc0, 0x60, 0x96, 0xa1, 0x30, 0x82, 0x11, 0x03, 0x03, 0x00,
-  0x41, 0x30, 0x80, 0x4c, 0xa2, 0x1c, 0x88, 0x11, 0x03, 0x03, 0x00, 0x41,
-  0x30, 0x80, 0x4e, 0xc2, 0x1c, 0x88, 0x59, 0x02, 0x63, 0xa0, 0xe2, 0x21,
-  0x0a, 0x86, 0x18, 0xa8, 0x78, 0x88, 0x82, 0x21, 0x86, 0x23, 0x04, 0x52,
-  0x20, 0xbe, 0xe1, 0x88, 0x61, 0x14, 0x84, 0xaf, 0x84, 0x60, 0x87, 0x23,
-  0x88, 0x53, 0x20, 0xbe, 0x12, 0x82, 0x1d, 0x8e, 0x30, 0x4a, 0x41, 0xf8,
-  0x2a, 0x10, 0x76, 0x96, 0xe1, 0xd0, 0x82, 0xd1, 0x04, 0x5c, 0x18, 0x86,
-  0x1b, 0x02, 0x95, 0x00, 0x83, 0x59, 0x06, 0x24, 0x09, 0x8a, 0x16, 0xf6,
-  0x01, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0x99,
-  0xe0, 0x87, 0xa6, 0x1d, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0xa2,
-  0x09, 0x7e, 0x08, 0x84, 0xb2, 0x85, 0x7f, 0x80, 0x0b, 0x9e, 0x1a, 0x31,
-  0x38, 0x00, 0x10, 0x04, 0x03, 0xea, 0x26, 0x40, 0x02, 0x8a, 0x87, 0x11,
-  0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0x70, 0x02, 0x24, 0x02, 0x61, 0x96,
-  0x40, 0x1b, 0x6e, 0x50, 0x64, 0x02, 0x0c, 0x66, 0x19, 0x14, 0x2d, 0x30,
-  0x5a, 0xb0, 0x85, 0xf8, 0xcc, 0x32, 0x2c, 0xce, 0x64, 0xb7, 0x50, 0xc5,
-  0xc7, 0x02, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x14, 0xf2,
-  0xb1, 0x22, 0x88, 0x4f, 0x11, 0x3b, 0xa1, 0xc3, 0x0d, 0x41, 0x4e, 0x80,
-  0xc1, 0x2c, 0x03, 0xd3, 0x04, 0x36, 0xfc, 0x02, 0x7c, 0x66, 0x09, 0x24,
-  0xf3, 0x05, 0x22, 0x3e, 0xb3, 0x04, 0xd2, 0x2c, 0xc3, 0x23, 0x71, 0xf6,
-  0xfd, 0x42, 0x7c, 0x2c, 0x60, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65,
-  0xc1, 0x23, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x51, 0x16, 0x3a, 0xdc, 0x10,
-  0x8c, 0x05, 0x18, 0xcc, 0x32, 0x40, 0x51, 0x60, 0xe7, 0x30, 0xc4, 0x67,
-  0x96, 0x40, 0x32, 0x42, 0x1d, 0xe0, 0x33, 0x4b, 0x20, 0x0d, 0xb4, 0x3c,
-  0x18, 0x63, 0x35, 0x04, 0x24, 0x44, 0xb2, 0xe0, 0x18, 0x3a, 0xb0, 0x43,
-  0x7c, 0x66, 0x19, 0x26, 0xcb, 0x0c, 0xac, 0x1d, 0xd4, 0x20, 0x3e, 0x16,
-  0x08, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xa0, 0x90, 0x8f, 0x15,
-  0x41, 0x7c, 0x8a, 0x88, 0x0b, 0x1d, 0x6e, 0x08, 0xde, 0x02, 0x0c, 0x66,
-  0x19, 0xa8, 0x2a, 0xb0, 0xa1, 0x1e, 0xe0, 0x33, 0x4b, 0xa0, 0x99, 0x3c,
-  0x10, 0xf1, 0x99, 0x25, 0xd0, 0x66, 0x19, 0x2e, 0xcd, 0x0d, 0x8c, 0x0e,
-  0xe6, 0x21, 0x3e, 0x16, 0x30, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2,
-  0xe0, 0x91, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xd8, 0x0b, 0x1d, 0x6e, 0x08,
-  0xf2, 0x02, 0x0c, 0x66, 0x19, 0xb0, 0x2c, 0xb0, 0x7d, 0x18, 0xe2, 0x33,
-  0x4b, 0xa0, 0x19, 0x01, 0x12, 0xf0, 0x99, 0x25, 0xd0, 0x06, 0x8a, 0x1e,
-  0x71, 0x40, 0xfc, 0x21, 0xf1, 0x07, 0x83, 0x0d, 0x32, 0x36, 0xc0, 0xd8,
-  0xc0, 0x62, 0x83, 0x8a, 0x0d, 0xa8, 0x81, 0xa2, 0x87, 0x17, 0x10, 0x7f,
-  0x48, 0xfc, 0xc1, 0x20, 0x32, 0x03, 0xf3, 0x07, 0x0b, 0xab, 0x34, 0xea,
-  0xe4, 0xe1, 0xa9, 0x59, 0x86, 0x6d, 0x0e, 0x4a, 0x61, 0x34, 0x21, 0x26,
-  0x86, 0xe1, 0x86, 0x00, 0x34, 0xc0, 0x60, 0x96, 0x81, 0xf3, 0x82, 0xe1,
-  0x88, 0x82, 0x2c, 0x86, 0xef, 0x8c, 0x61, 0x86, 0x1b, 0x82, 0x97, 0x20,
-  0x83, 0x1a, 0x02, 0x1d, 0x8e, 0x40, 0xd0, 0x62, 0xf8, 0x2a, 0x10, 0xf4,
-  0x94, 0x61, 0x86, 0x1b, 0x02, 0x99, 0x20, 0x83, 0x0a, 0x06, 0x9d, 0x65,
-  0xe8, 0xe4, 0x20, 0x38, 0x7b, 0x18, 0xe6, 0x9a, 0x61, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0xc0, 0x8b, 0x8d, 0xd0, 0x00, 0x0b, 0xd7, 0x18, 0x4d,
-  0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62,
-  0x28, 0xe2, 0x90, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0x70, 0x03,
-  0x35, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x2d, 0x37,
-  0x52, 0x83, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0x74,
-  0x43, 0x35, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x45,
-  0x3c, 0x52, 0x43, 0x2d, 0x82, 0xda, 0xd0, 0x8b, 0xdb, 0x18, 0x4d, 0x08,
-  0x80, 0x0b, 0x1e, 0x9b, 0x25, 0x90, 0x83, 0xe1, 0x86, 0x6c, 0x37, 0xc0,
-  0x60, 0x96, 0xe1, 0x03, 0x83, 0xa0, 0xca, 0x82, 0x35, 0xe0, 0x82, 0xa7,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x22, 0x8f, 0xd6, 0xf8, 0xfc,
-  0x62, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0xf2, 0x68, 0x8d, 0x40,
-  0xb8, 0x60, 0x98, 0x42, 0x8b, 0xd8, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x03, 0x2a, 0x3d, 0x64, 0x63, 0x0c, 0x46, 0x63, 0xc4,
-  0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0xf5, 0x90, 0x8d, 0x40, 0xb8, 0x60,
-  0x98, 0x0b, 0x9e, 0xba, 0xe3, 0xa9, 0x8b, 0x89, 0x61, 0x0e, 0x0d, 0x86,
-  0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03,
-  0xcf, 0x3d, 0x7c, 0xa3, 0x2f, 0xd6, 0x63, 0x34, 0x21, 0x00, 0x46, 0x13,
-  0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0xab, 0x8f, 0xf2, 0x48, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x36, 0xfb, 0x30, 0x8f, 0x84, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0xbb, 0x8f, 0xf3, 0x48, 0x88,
-  0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x96, 0xff, 0x30, 0x8f, 0xd3,
-  0x08, 0xe4, 0xe3, 0x36, 0xe8, 0x63, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6c,
-  0x96, 0x40, 0x0e, 0x86, 0x1b, 0xec, 0xe0, 0x3e, 0xc0, 0x60, 0x96, 0x21,
-  0x0c, 0xe4, 0x20, 0xb0, 0xbc, 0xd8, 0x8b, 0xf8, 0x0c, 0x47, 0xec, 0x01,
-  0x5f, 0x10, 0xdf, 0x2c, 0x83, 0x18, 0x94, 0x41, 0x60, 0x7d, 0xc1, 0x07,
-  0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86,
-  0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0x88, 0xe8, 0x70, 0x43, 0xf0, 0x1f,
-  0x60, 0x30, 0xcb, 0x30, 0x06, 0x64, 0x10, 0xd8, 0x50, 0x1a, 0xf0, 0x99,
-  0x25, 0x48, 0x03, 0x23, 0x0d, 0x22, 0x3e, 0xb3, 0x04, 0x69, 0x30, 0x1c,
-  0x61, 0x0a, 0xa5, 0x21, 0x7c, 0xb3, 0x0c, 0x66, 0x90, 0x06, 0x81, 0x9d,
-  0x82, 0x69, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53,
-  0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x2c, 0xa2, 0xc3, 0x0d,
-  0x81, 0x8a, 0x80, 0xc1, 0x2c, 0xc3, 0x19, 0xa0, 0x41, 0x60, 0xae, 0x31,
-  0xc4, 0x67, 0x96, 0x20, 0x0d, 0x8c, 0x88, 0x0d, 0xf8, 0xcc, 0x12, 0xa4,
-  0xc1, 0x40, 0xcb, 0xa3, 0x8d, 0x01, 0x46, 0x06, 0xc4, 0x19, 0x08, 0x68,
-  0x20, 0x16, 0x65, 0x70, 0xc1, 0x30, 0x06, 0x1b, 0xb4, 0x11, 0x9f, 0xe1,
-  0x88, 0x59, 0xa8, 0x0d, 0xe2, 0x9b, 0x65, 0x50, 0x83, 0x36, 0x08, 0xcc,
-  0x36, 0x68, 0x21, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e,
-  0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xd0, 0x11, 0x1d, 0x6e,
-  0x08, 0x70, 0x04, 0x0c, 0x66, 0x19, 0xd6, 0x80, 0x0d, 0x02, 0x1b, 0x7c,
-  0x03, 0x3e, 0xb3, 0x04, 0x71, 0x60, 0xbb, 0x41, 0xc4, 0x67, 0x96, 0x20,
-  0x0e, 0x86, 0x23, 0x7c, 0x81, 0x37, 0x84, 0x6f, 0x96, 0xc1, 0x0d, 0xe2,
-  0x20, 0xb0, 0x5f, 0xe8, 0x8d, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61,
-  0x2e, 0x78, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xa2, 0x4c,
-  0x74, 0xb8, 0x21, 0x18, 0x13, 0x30, 0x98, 0x65, 0x78, 0x03, 0x38, 0x08,
-  0xac, 0x3c, 0x86, 0xf8, 0xcc, 0x12, 0xc4, 0x81, 0x11, 0xea, 0x01, 0x9f,
-  0x59, 0x82, 0x38, 0x18, 0x68, 0x79, 0xb4, 0x35, 0xc0, 0xd8, 0x80, 0x78,
-  0x03, 0x01, 0x0e, 0x64, 0xa3, 0x0d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x6e,
-  0x7b, 0xea, 0x6c, 0x63, 0x98, 0x6b, 0x87, 0x61, 0x8e, 0x18, 0xe6, 0x88,
-  0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xc0, 0x9b, 0x93, 0x31, 0x11,
-  0x11, 0x38, 0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18,
-  0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0xd8, 0xf4, 0x44, 0x4d, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0x6d, 0x4f, 0xd6, 0x24, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0xd8, 0xf8, 0x84, 0x4d, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00,
-  0x10, 0x04, 0x83, 0x85, 0x54, 0xd6, 0x84, 0x45, 0x82, 0x3b, 0xe1, 0x91,
-  0x3c, 0x19, 0x4d, 0x08, 0x80, 0x0b, 0x1e, 0x9b, 0x25, 0x90, 0x83, 0x81,
-  0x96, 0xc7, 0x34, 0x3a, 0x3d, 0xe2, 0x58, 0xe2, 0x13, 0xe2, 0x40, 0x8f,
-  0xc0, 0xe0, 0x02, 0x83, 0x46, 0x0c, 0x1c, 0x00, 0x04, 0xc1, 0xa0, 0x41,
-  0x95, 0x33, 0x99, 0x11, 0x16, 0xe1, 0x93, 0x40, 0x4c, 0xc4, 0x44, 0x4c,
-  0xc2, 0xc4, 0x4f, 0x66, 0x09, 0x46, 0x68, 0xb8, 0x61, 0x34, 0xf8, 0x04,
-  0x0c, 0x66, 0x19, 0xe8, 0x20, 0x26, 0x82, 0x11, 0x03, 0x03, 0x00, 0x41,
-  0x30, 0x80, 0x50, 0x45, 0x4d, 0x42, 0x62, 0xc4, 0xc0, 0x00, 0x40, 0x10,
-  0x0c, 0xa0, 0x54, 0x59, 0x93, 0x90, 0x30, 0x21, 0x4c, 0xe0, 0x63, 0x82,
-  0x98, 0xc0, 0x67, 0x34, 0x61, 0x46, 0x86, 0xe1, 0x86, 0x40, 0x54, 0xc0,
-  0x60, 0x96, 0xa1, 0x0e, 0xee, 0x20, 0x18, 0x8e, 0x30, 0xcc, 0x64, 0xf8,
-  0xee, 0x18, 0x66, 0xb8, 0x21, 0x88, 0x11, 0x32, 0xa8, 0x21, 0xd0, 0xe1,
-  0x88, 0x44, 0x4d, 0x86, 0xaf, 0x02, 0x41, 0x6f, 0x19, 0x66, 0xb8, 0x21,
-  0xa0, 0x11, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x06, 0x3b, 0x58, 0x85, 0xe0,
-  0xf0, 0x63, 0x98, 0x7b, 0x89, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
-  0xc0, 0x9b, 0x95, 0x51, 0x11, 0x13, 0x58, 0x19, 0x4d, 0x08, 0x80, 0xd1,
-  0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90,
-  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0x74, 0x45, 0x55, 0x0e, 0x22,
-  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x6d, 0x57, 0x56, 0x85, 0x21,
-  0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0x78, 0x85, 0x55, 0x24,
-  0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x85, 0x5c, 0x56, 0x85,
-  0x4d, 0x82, 0x5b, 0xe1, 0x93, 0x5c, 0x19, 0x4d, 0x08, 0x80, 0x0b, 0x1e,
-  0x9b, 0x25, 0x58, 0x85, 0xe1, 0x86, 0xac, 0x57, 0xc0, 0x60, 0x96, 0x01,
-  0x0f, 0xf2, 0x20, 0xa8, 0x33, 0x71, 0x15, 0xb8, 0xe0, 0xa9, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0xa0, 0xcc, 0xe5, 0x55, 0xc0, 0x00, 0x54, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x3a, 0x97, 0x57, 0x09, 0x84, 0x0b,
-  0x86, 0x29, 0x35, 0x99, 0x15, 0xb8, 0xe0, 0xa9, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0xa0, 0xd6, 0x85, 0x56, 0xc8, 0xa0, 0x54, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0x80, 0x62, 0x17, 0x5a, 0x09, 0x84, 0x0b, 0x86, 0xb9,
-  0xe0, 0xa9, 0x3b, 0x9e, 0xba, 0x19, 0x19, 0xe6, 0xd4, 0x62, 0x98, 0x23,
-  0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf0, 0xe0,
-  0x05, 0x5c, 0xfe, 0xa4, 0x5d, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
-  0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0xb6, 0x7b, 0x39, 0x97, 0x84, 0x08, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0x60, 0xc3, 0x17, 0x74, 0x49, 0x88, 0x60, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0x7c, 0x49, 0x97, 0x84, 0x08, 0x46,
-  0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x09, 0x19, 0x74, 0x49, 0x95, 0x80,
-  0x5e, 0x72, 0xc5, 0x5e, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xc7, 0x66, 0x09,
-  0x56, 0x61, 0xb8, 0xc1, 0x0e, 0xf2, 0x05, 0x0c, 0x66, 0x19, 0xf4, 0x60,
-  0x15, 0x02, 0xdb, 0x93, 0x3e, 0x89, 0xcf, 0x70, 0x04, 0x1f, 0xf8, 0x09,
-  0xf1, 0xcd, 0x32, 0xec, 0x81, 0x1f, 0x04, 0xf6, 0x27, 0x7d, 0x10, 0x1f,
-  0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7,
-  0x8a, 0x20, 0x3e, 0x45, 0x8c, 0x8c, 0x0e, 0x37, 0x04, 0x21, 0x03, 0x06,
-  0xb3, 0x0c, 0x7c, 0xd0, 0x07, 0x81, 0x0d, 0xa7, 0x02, 0x9f, 0x59, 0x02,
-  0x51, 0x30, 0x53, 0x21, 0xe2, 0x33, 0x4b, 0x20, 0x0a, 0xc3, 0x11, 0xa7,
-  0x70, 0x2a, 0xc2, 0x37, 0xcb, 0xf0, 0x07, 0xa2, 0x10, 0x18, 0x2a, 0xa0,
-  0x4a, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41,
-  0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xe1, 0x32, 0x3a, 0xdc, 0x10, 0xb0,
-  0x0c, 0x18, 0xcc, 0x32, 0x80, 0x42, 0x28, 0x04, 0x06, 0x2b, 0x43, 0x7c,
-  0x66, 0x09, 0x44, 0xc1, 0x88, 0x59, 0x81, 0xcf, 0x2c, 0x81, 0x28, 0x0c,
-  0xb4, 0x3c, 0x1a, 0x1f, 0x60, 0x7d, 0x40, 0x80, 0x82, 0x10, 0x0a, 0x64,
-  0xe1, 0x07, 0x17, 0x0c, 0x63, 0xb2, 0x62, 0x2b, 0xf1, 0x19, 0x8e, 0xa0,
-  0x85, 0x5b, 0x21, 0xbe, 0x59, 0x86, 0x51, 0x30, 0x85, 0xc0, 0x70, 0xa5,
-  0x16, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b,
-  0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x9e, 0xd1, 0xe1, 0x86, 0x40,
-  0x67, 0xc0, 0x60, 0x96, 0x81, 0x14, 0x4a, 0x21, 0xb0, 0x01, 0x5c, 0xe0,
-  0x33, 0x4b, 0xa0, 0x0a, 0xd6, 0x2b, 0x44, 0x7c, 0x66, 0x09, 0x54, 0x61,
-  0x38, 0xe2, 0x17, 0x7c, 0x45, 0xf8, 0x66, 0x19, 0x4e, 0x41, 0x15, 0x02,
-  0x03, 0x87, 0x5f, 0x89, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82,
-  0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xce, 0x46, 0x87,
-  0x1b, 0x82, 0xb2, 0x01, 0x83, 0x59, 0x06, 0x54, 0x48, 0x85, 0xc0, 0xce,
-  0x65, 0x88, 0xcf, 0x2c, 0x81, 0x2a, 0x18, 0xc1, 0x2e, 0xf0, 0x99, 0x25,
-  0x50, 0x85, 0x81, 0x96, 0x47, 0x23, 0x05, 0xac, 0x14, 0x08, 0x54, 0x10,
-  0x52, 0x81, 0x36, 0x4c, 0xe1, 0x82, 0x61, 0x2e, 0x78, 0xea, 0xb6, 0xa7,
-  0x0e, 0x57, 0x86, 0xb9, 0xf7, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc, 0xba, 0x29, 0x1b, 0x92, 0x91,
-  0x9b, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18,
-  0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
-  0x8d, 0x6f, 0xd8, 0x26, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0xd8, 0xfa, 0xa6, 0x6d, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x83, 0xcd, 0x6f, 0xdc, 0x26, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41,
-  0x30, 0x58, 0x4c, 0xa7, 0x6d, 0x5c, 0x26, 0xc8, 0x1b, 0x9f, 0xd9, 0x9b,
-  0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x82, 0x55, 0x18, 0x68, 0x79,
-  0x4c, 0xc3, 0x0e, 0xfc, 0xac, 0x0e, 0x58, 0x02, 0x0f, 0x04, 0x55, 0xf0,
-  0xb3, 0x3c, 0x98, 0x65, 0x60, 0x05, 0x57, 0xd8, 0x87, 0xe1, 0x08, 0x7f,
-  0x00, 0x9b, 0xe1, 0xbb, 0x7f, 0x18, 0x66, 0xb8, 0x21, 0x58, 0x19, 0x32,
-  0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x91, 0x20, 0x9b, 0xe1, 0xab, 0x40, 0xd0,
-  0x2b, 0x89, 0x61, 0x86, 0x1b, 0x02, 0x97, 0x21, 0x83, 0x0a, 0x06, 0x9d,
-  0x65, 0x68, 0x05, 0x71, 0x08, 0x4e, 0x5e, 0x86, 0xb9, 0x14, 0x19, 0x66,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc, 0xd6, 0xe9, 0x1b, 0x9e, 0x51,
-  0x9d, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18,
-  0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
-  0x8d, 0x76, 0x48, 0xe7, 0x20, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0xd8, 0x6a, 0xa7, 0x74, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x83, 0xcd, 0x76, 0x4c, 0x47, 0x22, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41,
-  0x30, 0x58, 0x7c, 0xa7, 0x74, 0xcc, 0x26, 0x88, 0x1d, 0xbb, 0x99, 0x9d,
-  0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x02, 0x71, 0x18, 0x6e, 0x98,
-  0x89, 0xdb, 0x01, 0x83, 0x59, 0x86, 0x57, 0x80, 0x85, 0xa0, 0xc2, 0x06,
-  0x75, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x02,
-  0x9f, 0xd4, 0xc1, 0x09, 0xbd, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03,
-  0x2a, 0x7c, 0x52, 0x27, 0x10, 0x2e, 0x18, 0xa6, 0xc8, 0xa6, 0x75, 0xe0,
-  0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x2a, 0x1f, 0xd7,
-  0xf1, 0x89, 0xbf, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xca, 0x7c,
-  0x5c, 0x27, 0x10, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0xee, 0x78, 0xea, 0x5a,
-  0x66, 0x98, 0x23, 0x93, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c,
-  0x0e, 0x00, 0x04, 0xc1, 0xc0, 0x53, 0x1f, 0xdd, 0xc9, 0x9b, 0xf3, 0x19,
+  0x0c, 0xc8, 0x30, 0x80, 0x14, 0x0e, 0x04, 0xcc, 0x14, 0x06, 0xe3, 0xc0,
+  0x0e, 0xe1, 0x30, 0x0f, 0xf3, 0xe0, 0x06, 0xb2, 0x70, 0x0b, 0xb3, 0x40,
+  0x0f, 0xf2, 0x50, 0x0f, 0xe3, 0x40, 0x0f, 0xf5, 0x20, 0x0f, 0xe5, 0x40,
+  0x0e, 0xa2, 0x50, 0x0f, 0xe6, 0x60, 0x0e, 0xe5, 0x20, 0x0f, 0x7c, 0x90,
+  0x0e, 0xee, 0x40, 0x0f, 0x71, 0x60, 0x07, 0xbf, 0x40, 0x0f, 0x7e, 0x80,
+  0x02, 0x83, 0xc8, 0x99, 0xbe, 0x71, 0x60, 0x87, 0x70, 0x98, 0x87, 0x79,
+  0x70, 0x03, 0x59, 0xb8, 0x85, 0x59, 0xa0, 0x07, 0x79, 0xa8, 0x87, 0x71,
+  0xa0, 0x87, 0x7a, 0x90, 0x87, 0x72, 0x20, 0x07, 0x51, 0xa8, 0x07, 0x73,
+  0x30, 0x87, 0x72, 0x90, 0x07, 0x3e, 0x30, 0x07, 0x76, 0x78, 0x87, 0x70,
+  0xa0, 0x07, 0x3f, 0x40, 0xc1, 0x43, 0xe6, 0x30, 0x02, 0x31, 0x5c, 0xc2,
+  0x39, 0x8d, 0x34, 0x01, 0xcd, 0x24, 0xa1, 0x65, 0x18, 0x86, 0x01, 0x45,
+  0x51, 0x14, 0x45, 0x07, 0x4a, 0xe7, 0x08, 0x40, 0x61, 0x0a, 0x00, 0x00,
+  0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79,
+  0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xae, 0x50, 0x0e, 0x6d, 0xd0, 0x0e,
+  0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07,
+  0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07,
+  0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x78, 0xd0, 0x06, 0xe9, 0x10, 0x07,
+  0x76, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x6d, 0x90, 0x0e, 0x73, 0x20, 0x07,
+  0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x60, 0x07, 0x74, 0xa0, 0x07,
+  0x76, 0x40, 0x07, 0x6d, 0x60, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x10, 0x07,
+  0x76, 0xd0, 0x06, 0xe6, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07,
+  0x6d, 0x60, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06,
+  0xee, 0x80, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07,
+  0x7a, 0x60, 0x07, 0x74, 0x30, 0xe4, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0x43, 0x00, 0x01, 0x10, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90, 0x67, 0x01, 0x02, 0x40,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x21, 0x4f, 0x03, 0x04,
+  0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x43, 0x1e, 0x08,
+  0x08, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x3c,
+  0x12, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c,
+  0x79, 0x28, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x18, 0xf2, 0x58, 0x40, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x30, 0xe4, 0xc9, 0x80, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x60, 0xc8, 0xb3, 0x01, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0xc0, 0x90, 0xc7, 0x03, 0x02, 0x40, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x80, 0x21, 0x4f, 0x18, 0x00, 0x01, 0x20, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x16, 0x08, 0x00, 0x00, 0x00,
+  0x0d, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90,
+  0x8c, 0x09, 0x26, 0x47, 0xc6, 0x04, 0x43, 0x1a, 0x4a, 0xa0, 0x08, 0x8a,
+  0x61, 0x04, 0xa0, 0x30, 0x0a, 0xa1, 0xd0, 0x03, 0x0a, 0x30, 0x80, 0xc0,
+  0x11, 0x00, 0x5a, 0x0b, 0x1c, 0x10, 0x10, 0x81, 0xce, 0x19, 0x00, 0x52,
+  0x67, 0x00, 0xa8, 0x9c, 0x01, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00,
+  0x4a, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44,
+  0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b,
+  0x7b, 0x73, 0x03, 0x99, 0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b,
+  0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81,
+  0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84,
+  0x81, 0x99, 0x20, 0x0c, 0xcd, 0x06, 0x61, 0x20, 0x26, 0x08, 0x83, 0xb3,
+  0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08,
+  0xc3, 0x33, 0x41, 0x28, 0x03, 0x8c, 0xc0, 0x04, 0x61, 0x80, 0x26, 0x08,
+  0x60, 0x40, 0x6d, 0x58, 0x94, 0x85, 0x51, 0x94, 0xa1, 0x71, 0x1c, 0xa7,
+  0x98, 0x20, 0x9c, 0x81, 0x35, 0x41, 0x18, 0xa2, 0x0d, 0xc2, 0x10, 0x6d,
+  0x58, 0x06, 0x88, 0x51, 0x86, 0xa1, 0x71, 0x1c, 0x47, 0xda, 0xb0, 0x10,
+  0x0b, 0xa3, 0x10, 0x43, 0xe3, 0x38, 0x4e, 0xb1, 0x61, 0x78, 0x26, 0x6a,
+  0x82, 0xa0, 0x06, 0xd7, 0x04, 0x61, 0x90, 0x36, 0x20, 0x8a, 0xc5, 0x28,
+  0xca, 0x70, 0x01, 0x1b, 0x02, 0x6c, 0x03, 0x01, 0x54, 0x19, 0x30, 0x41,
+  0x10, 0x00, 0x2a, 0x47, 0x72, 0x69, 0x64, 0x53, 0x61, 0x6d, 0x70, 0x6c,
+  0x65, 0x13, 0x84, 0x35, 0xa8, 0x26, 0x08, 0xc3, 0xb4, 0x61, 0xf0, 0x86,
+  0x61, 0x03, 0xa1, 0x74, 0xd1, 0xb7, 0xa1, 0xd8, 0x38, 0x40, 0x03, 0x83,
+  0x2a, 0x6c, 0x6c, 0x76, 0x6d, 0x2e, 0x69, 0x64, 0x65, 0x6e, 0x74, 0x53,
+  0x82, 0xa0, 0x0a, 0x19, 0x9e, 0x8b, 0x5d, 0x99, 0xdc, 0x5c, 0xda, 0x9b,
+  0xdb, 0x94, 0x80, 0x68, 0x42, 0x86, 0xe7, 0x62, 0x17, 0xc6, 0x66, 0x57,
+  0x26, 0x37, 0x25, 0x30, 0xea, 0x90, 0xe1, 0xb9, 0xcc, 0xa1, 0x85, 0x91,
+  0x95, 0xc9, 0x35, 0xbd, 0x91, 0x95, 0xb1, 0x4d, 0x09, 0x90, 0x32, 0x64,
+  0x78, 0x2e, 0x72, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x73, 0x53,
+  0x82, 0xac, 0x0e, 0x19, 0x9e, 0x4b, 0x99, 0x1b, 0x9d, 0x5c, 0x1e, 0xd4,
+  0x5b, 0x9a, 0x1b, 0xdd, 0xdc, 0x94, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00,
+  0x79, 0x18, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c,
+  0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3,
+  0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6,
+  0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e,
+  0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43,
+  0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03,
+  0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48,
+  0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20,
+  0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e,
+  0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d,
+  0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89,
+  0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83,
+  0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68,
+  0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90,
+  0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78,
+  0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98,
+  0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5,
+  0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c,
+  0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c,
+  0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43,
+  0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43,
+  0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82,
+  0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70,
+  0x03, 0x7a, 0x28, 0x87, 0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8,
+  0xe0, 0x06, 0xe4, 0x20, 0x0e, 0xe7, 0xe0, 0x06, 0xf6, 0x10, 0x0e, 0xf2,
+  0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x30, 0x83, 0x81,
+  0xc8, 0x01, 0x1f, 0xdc, 0x40, 0x1c, 0xe4, 0xa1, 0x1c, 0xc2, 0x61, 0x1d,
+  0xdc, 0x40, 0x1c, 0xe4, 0x01, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00,
+  0x31, 0x00, 0x00, 0x00, 0x06, 0xa0, 0x80, 0x11, 0x32, 0xb0, 0x00, 0xf3,
+  0x2c, 0x84, 0x19, 0x40, 0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x20, 0x0d, 0x10,
+  0x61, 0x7e, 0x71, 0xdb, 0xa6, 0xb0, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10,
+  0x50, 0x45, 0x41, 0x44, 0xa5, 0x03, 0x0c, 0x25, 0x61, 0x00, 0x02, 0xe6,
+  0x23, 0xb7, 0x6d, 0x0b, 0xd2, 0x70, 0xf9, 0xce, 0xe3, 0x0b, 0x11, 0x01,
+  0x4c, 0x44, 0x08, 0x34, 0xc3, 0x42, 0xd8, 0x81, 0x33, 0x5c, 0xbe, 0xf3,
+  0xf8, 0x83, 0x33, 0xe1, 0x7e, 0x71, 0xdb, 0x86, 0x70, 0x0d, 0x97, 0xef,
+  0x3c, 0x7e, 0x04, 0x58, 0x1b, 0x55, 0x14, 0x44, 0x54, 0x3a, 0xc0, 0xe0,
+  0x17, 0xb7, 0x6d, 0x02, 0xd7, 0x70, 0xf9, 0xce, 0xe3, 0x47, 0x80, 0xb5,
+  0x51, 0x45, 0x41, 0x44, 0xa5, 0x03, 0x0c, 0x3e, 0x52, 0xeb, 0x36, 0x80,
+  0x0d, 0x97, 0xef, 0x3c, 0x7e, 0x04, 0x58, 0x1b, 0x55, 0x14, 0x44, 0xc4,
+  0x4e, 0x4e, 0x44, 0xf8, 0x48, 0xad, 0x5b, 0x81, 0x34, 0x5c, 0xbe, 0xf3,
+  0xf8, 0x13, 0x11, 0x4d, 0x08, 0x10, 0x61, 0x7e, 0x71, 0xdb, 0x96, 0x20,
+  0x0d, 0x97, 0xef, 0x3c, 0xfe, 0x44, 0x44, 0x13, 0x02, 0x44, 0x98, 0x8f,
+  0xdc, 0xb6, 0x05, 0x48, 0xc3, 0xe5, 0x3b, 0x8f, 0x3f, 0x1d, 0x11, 0x01,
+  0x0c, 0xe2, 0xe0, 0x23, 0xb7, 0x6d, 0x04, 0xcf, 0x70, 0xf9, 0xce, 0xe3,
+  0x53, 0x0d, 0x10, 0x61, 0x7e, 0x71, 0xdb, 0x00, 0x61, 0x20, 0x00, 0x00,
+  0x0b, 0x13, 0x00, 0x00, 0x13, 0x04, 0x24, 0x14, 0x0b, 0x04, 0x00, 0x00,
+  0x1d, 0x00, 0x00, 0x00, 0x34, 0x14, 0x58, 0xd9, 0x95, 0xa5, 0x40, 0x0d,
+  0x94, 0x51, 0x21, 0x15, 0x57, 0xc1, 0x95, 0x5c, 0xd9, 0x14, 0x4b, 0x61,
+  0x0a, 0x14, 0x4d, 0xe9, 0x06, 0x94, 0x43, 0x29, 0x90, 0x31, 0x03, 0x40,
+  0x48, 0x09, 0x14, 0x01, 0x3d, 0x23, 0x00, 0x63, 0x04, 0x20, 0x08, 0x82,
+  0xf8, 0x37, 0x46, 0x00, 0x82, 0x20, 0x48, 0xff, 0xc2, 0x18, 0x01, 0x08,
+  0x82, 0x20, 0xfd, 0x8d, 0x11, 0x80, 0x20, 0x08, 0xf2, 0xdf, 0x18, 0x01,
+  0x08, 0x82, 0x20, 0xfe, 0x0b, 0x63, 0x04, 0x20, 0x08, 0x82, 0x21, 0x38,
+  0x8c, 0x11, 0x80, 0x20, 0x08, 0xea, 0xdf, 0x18, 0x01, 0x08, 0x82, 0xa0,
+  0xfe, 0x0b, 0x63, 0x04, 0x20, 0x08, 0x82, 0xf0, 0x37, 0x46, 0x00, 0x82,
+  0x20, 0x08, 0xff, 0xc2, 0x18, 0x01, 0x08, 0x82, 0x20, 0x08, 0x06, 0x00,
+  0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0x10, 0x06, 0x6d, 0x70, 0x39, 0x6b,
+  0xb0, 0x06, 0x64, 0x30, 0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0x61, 0xe0,
+  0x06, 0xd8, 0xd3, 0x06, 0x6d, 0x50, 0x06, 0x23, 0x06, 0x09, 0x00, 0x82,
+  0x60, 0x10, 0x06, 0x6f, 0x90, 0x41, 0x6b, 0xb0, 0x06, 0x66, 0x30, 0x62,
+  0x90, 0x00, 0x20, 0x08, 0x06, 0x61, 0x00, 0x07, 0x1a, 0xc4, 0x06, 0x6c,
+  0x70, 0x06, 0x23, 0x06, 0x06, 0x00, 0x82, 0x60, 0x40, 0xec, 0x81, 0xd5,
+  0x06, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0, 0xcd, 0xc1, 0x18, 0x08,
+  0x6e, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c, 0x30, 0x9a, 0x30, 0x04, 0xc3,
+  0x0d, 0x42, 0x40, 0x06, 0xb3, 0x0c, 0xc1, 0x08, 0x05, 0x23, 0x06, 0x07,
+  0x00, 0x82, 0x60, 0xe0, 0xe1, 0x01, 0x1a, 0x1c, 0x76, 0x30, 0x9a, 0x10,
+  0x0c, 0x17, 0x3c, 0x35, 0x9a, 0x30, 0x08, 0x17, 0x3c, 0x35, 0x62, 0x70,
+  0x00, 0x20, 0x08, 0x06, 0x5e, 0x1f, 0xb4, 0x01, 0x03, 0x06, 0xa3, 0x09,
+  0x01, 0x30, 0xdc, 0x10, 0xe8, 0x01, 0x18, 0x4c, 0x37, 0x50, 0x5e, 0x30,
+  0xdd, 0x50, 0x69, 0x42, 0x21, 0x01, 0x4c, 0x37, 0x5c, 0x1c, 0x51, 0x48,
+  0x00, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0, 0x95, 0x42, 0x1d, 0x50,
+  0x67, 0x30, 0x9a, 0x10, 0x04, 0xa3, 0x09, 0x82, 0x30, 0x9a, 0x30, 0x0c,
+  0x15, 0x08, 0x52, 0x03, 0x21, 0x15, 0x0c, 0x52, 0x57, 0x30, 0x23, 0x06,
+  0x07, 0x00, 0x82, 0x60, 0xe0, 0xb5, 0x42, 0x1f, 0x70, 0xa9, 0x30, 0x9a,
+  0x10, 0x00, 0x15, 0x0c, 0x52, 0x5b, 0x10, 0x15, 0x20, 0x33, 0x9a, 0x50,
+  0x04, 0x15, 0x08, 0x52, 0x44, 0x10, 0x15, 0x34, 0x33, 0x9a, 0x90, 0x08,
+  0x15, 0x08, 0x52, 0x44, 0x10, 0xd7, 0x3c, 0x75, 0xc5, 0x53, 0x37, 0x3c,
+  0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x1e, 0x2f, 0xb0, 0xc2, 0x1a,
+  0xd4, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20,
+  0x8c, 0x26, 0x10, 0xc3, 0x11, 0x4f, 0x1d, 0xf1, 0xd4, 0x11, 0x4f, 0x1d,
+  0xf1, 0xd4, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x58, 0xe8, 0xf0, 0x0a,
+  0xcc, 0xa2, 0x8c, 0x02, 0x31, 0x08, 0x81, 0x09, 0x01, 0x7c, 0x4e, 0x18,
+  0x66, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x32, 0x75, 0xb8, 0x85, 0x3c,
+  0x08, 0xc6, 0x01, 0x15, 0xc2, 0x61, 0x34, 0x21, 0x00, 0x8e, 0x18, 0x66,
+  0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0xb2, 0x76, 0xd0, 0x05, 0x3e, 0x08,
+  0xcc, 0x61, 0x15, 0xc8, 0x61, 0x34, 0x21, 0x00, 0x86, 0x1b, 0xde, 0x60,
+  0x1d, 0xc0, 0xc0, 0x8a, 0x55, 0x80, 0x8f, 0x0d, 0xac, 0x00, 0x9f, 0x59,
+  0x06, 0x61, 0x18, 0x4c, 0x28, 0x05, 0xf9, 0x98, 0x60, 0x0a, 0xf2, 0x31,
+  0x3c, 0x58, 0x05, 0xf8, 0xd8, 0x1d, 0xb0, 0x02, 0x7c, 0x8c, 0x10, 0xe4,
+  0x63, 0x84, 0x20, 0x9f, 0x59, 0x02, 0xc2, 0xf8, 0x00, 0x91, 0x8f, 0xed,
+  0x01, 0x22, 0x1f, 0x13, 0x62, 0x01, 0x3e, 0x26, 0xc8, 0x02, 0x7c, 0x4c,
+  0x78, 0x05, 0xf9, 0x98, 0x00, 0x0b, 0xf2, 0x99, 0x25, 0x20, 0x06, 0x2a,
+  0x1e, 0x48, 0x20, 0x86, 0x81, 0x8a, 0x07, 0x12, 0x88, 0x61, 0x34, 0x61,
+  0x15, 0x84, 0xe1, 0x86, 0xa0, 0x1f, 0xc0, 0x60, 0x96, 0xa1, 0x30, 0x82,
+  0x11, 0x03, 0x03, 0x00, 0x41, 0x30, 0x80, 0x4c, 0xa2, 0x1c, 0x88, 0x11,
+  0x03, 0x03, 0x00, 0x41, 0x30, 0x80, 0x4e, 0xc2, 0x1c, 0x88, 0x59, 0x02,
+  0x63, 0xa0, 0xe2, 0x21, 0x0a, 0x86, 0x18, 0xa8, 0x78, 0x88, 0x82, 0x21,
+  0x86, 0x23, 0x04, 0x52, 0x20, 0xbe, 0xe1, 0x88, 0x61, 0x14, 0x84, 0xaf,
+  0x84, 0x60, 0x87, 0x23, 0x88, 0x53, 0x20, 0xbe, 0x12, 0x82, 0x1d, 0x8e,
+  0x30, 0x4a, 0x41, 0xf8, 0x2a, 0x10, 0x76, 0x96, 0xe1, 0xd0, 0x82, 0xd1,
+  0x04, 0x5c, 0x18, 0x86, 0x1b, 0x02, 0x95, 0x00, 0x83, 0x59, 0x06, 0x24,
+  0x09, 0x8a, 0x16, 0xf6, 0x01, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40,
+  0x10, 0x0c, 0xa8, 0x99, 0xe0, 0x87, 0xa6, 0x1d, 0x46, 0x0c, 0x0e, 0x00,
+  0x04, 0xc1, 0x80, 0xa2, 0x09, 0x7e, 0x08, 0x84, 0xb2, 0x85, 0x7f, 0x80,
+  0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xea, 0x26, 0x40,
+  0x02, 0x8a, 0x87, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0x70, 0x02,
+  0x24, 0x02, 0x61, 0x96, 0x40, 0x1b, 0x6e, 0x50, 0x64, 0x02, 0x0c, 0x66,
+  0x19, 0x14, 0x2d, 0x30, 0x5a, 0xb0, 0x85, 0xf8, 0xcc, 0x32, 0x2c, 0xce,
+  0x64, 0xb7, 0x50, 0xc5, 0xc7, 0x02, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1,
+  0x53, 0x16, 0x14, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x3b, 0xa1, 0xc3,
+  0x0d, 0x41, 0x4e, 0x80, 0xc1, 0x2c, 0x03, 0xd3, 0x04, 0x36, 0xfc, 0x02,
+  0x7c, 0x66, 0x09, 0x24, 0xf3, 0x05, 0x22, 0x3e, 0xb3, 0x04, 0xd2, 0x2c,
+  0xc3, 0x23, 0x71, 0xf6, 0xfd, 0x42, 0x7c, 0x2c, 0x60, 0xe8, 0x73, 0xc1,
+  0x30, 0x17, 0x3c, 0x65, 0xc1, 0x23, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x51,
+  0x16, 0x3a, 0xdc, 0x10, 0x8c, 0x05, 0x18, 0xcc, 0x32, 0x40, 0x51, 0x60,
+  0xe7, 0x30, 0xc4, 0x67, 0x96, 0x40, 0x32, 0x42, 0x1d, 0xe0, 0x33, 0x4b,
+  0x20, 0x0d, 0xb4, 0x3c, 0x18, 0x63, 0x35, 0x04, 0x24, 0x44, 0xb2, 0xe0,
+  0x18, 0x3a, 0xb0, 0x43, 0x7c, 0x66, 0x19, 0x26, 0xcb, 0x0c, 0xac, 0x1d,
+  0xd4, 0x20, 0x3e, 0x16, 0x08, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2,
+  0xa0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x88, 0x0b, 0x1d, 0x6e, 0x08,
+  0xde, 0x02, 0x0c, 0x66, 0x19, 0xa8, 0x2a, 0xb0, 0xa1, 0x1e, 0xe0, 0x33,
+  0x4b, 0xa0, 0x99, 0x3c, 0x10, 0xf1, 0x99, 0x25, 0xd0, 0x66, 0x19, 0x2e,
+  0xcd, 0x0d, 0x8c, 0x0e, 0xe6, 0x21, 0x3e, 0x16, 0x30, 0xf4, 0xb9, 0x60,
+  0x98, 0x0b, 0x9e, 0xb2, 0xe0, 0x91, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xd8,
+  0x0b, 0x1d, 0x6e, 0x08, 0xf2, 0x02, 0x0c, 0x66, 0x19, 0xb0, 0x2c, 0xb0,
+  0x7d, 0x18, 0xe2, 0x33, 0x4b, 0xa0, 0x19, 0x01, 0x12, 0xf0, 0x99, 0x25,
+  0xd0, 0x06, 0x8a, 0x1e, 0x71, 0x40, 0xfc, 0x21, 0xf1, 0x07, 0x83, 0x0d,
+  0x32, 0x36, 0xc0, 0xd8, 0xc0, 0x62, 0x83, 0x8a, 0x0d, 0xa8, 0x81, 0xa2,
+  0x87, 0x17, 0x10, 0x7f, 0x48, 0xfc, 0xc1, 0x20, 0x32, 0x03, 0xf3, 0x07,
+  0x0b, 0xab, 0x34, 0xea, 0xe4, 0xe1, 0xa9, 0x59, 0x86, 0x6d, 0x0e, 0x4a,
+  0x61, 0x34, 0x21, 0x26, 0x86, 0xe1, 0x86, 0x00, 0x34, 0xc0, 0x60, 0x96,
+  0x81, 0xf3, 0x82, 0xe1, 0x88, 0x82, 0x2c, 0x86, 0xef, 0x8c, 0x61, 0x86,
+  0x1b, 0x82, 0x97, 0x20, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0x40, 0xd0, 0x62,
+  0xf8, 0x2a, 0x10, 0xf4, 0x94, 0x61, 0x86, 0x1b, 0x02, 0x99, 0x20, 0x83,
+  0x0a, 0x06, 0x9d, 0x65, 0xe8, 0xe4, 0x20, 0x38, 0x7b, 0x18, 0xe6, 0x9a,
+  0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xc0, 0x8b, 0x8d, 0xd0, 0x00,
+  0x0b, 0xd7, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18,
+  0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90, 0x11, 0x03, 0x04, 0x00, 0x41,
+  0x30, 0xd8, 0x70, 0x03, 0x35, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
+  0x04, 0x83, 0x2d, 0x37, 0x52, 0x83, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00,
+  0x41, 0x30, 0xd8, 0x74, 0x43, 0x35, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00,
+  0x10, 0x04, 0x83, 0x45, 0x3c, 0x52, 0x43, 0x2d, 0x82, 0xda, 0xd0, 0x8b,
+  0xdb, 0x18, 0x4d, 0x08, 0x80, 0x0b, 0x1e, 0x9b, 0x25, 0x90, 0x83, 0xe1,
+  0x86, 0x6c, 0x37, 0xc0, 0x60, 0x96, 0xe1, 0x03, 0x83, 0xa0, 0xca, 0x82,
+  0x35, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x22,
+  0x8f, 0xd6, 0xf8, 0xfc, 0x62, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8,
+  0xf2, 0x68, 0x8d, 0x40, 0xb8, 0x60, 0x98, 0x42, 0x8b, 0xd8, 0x80, 0x0b,
+  0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x2a, 0x3d, 0x64, 0x63,
+  0x0c, 0x46, 0x63, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0xf5, 0x90,
+  0x8d, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x9e, 0xba, 0xe3, 0xa9, 0x8b, 0x89,
+  0x61, 0x0e, 0x0d, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38,
+  0x00, 0x10, 0x04, 0x03, 0xcf, 0x3d, 0x7c, 0xa3, 0x2f, 0xd6, 0x63, 0x34,
+  0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88,
+  0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0xab, 0x8f,
+  0xf2, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x36, 0xfb,
+  0x30, 0x8f, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0xbb,
+  0x8f, 0xf3, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x96,
+  0xff, 0x30, 0x8f, 0xd3, 0x08, 0xe4, 0xe3, 0x36, 0xe8, 0x63, 0x34, 0x21,
+  0x00, 0x2e, 0x78, 0x6c, 0x96, 0x40, 0x0e, 0x86, 0x1b, 0xec, 0xe0, 0x3e,
+  0xc0, 0x60, 0x96, 0x21, 0x0c, 0xe4, 0x20, 0xb0, 0xbc, 0xd8, 0x8b, 0xf8,
+  0x0c, 0x47, 0xec, 0x01, 0x5f, 0x10, 0xdf, 0x2c, 0x83, 0x18, 0x94, 0x41,
+  0x60, 0x7d, 0xc1, 0x07, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c,
+  0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0x88, 0xe8,
+  0x70, 0x43, 0xf0, 0x1f, 0x60, 0x30, 0xcb, 0x30, 0x06, 0x64, 0x10, 0xd8,
+  0x50, 0x1a, 0xf0, 0x99, 0x25, 0x48, 0x03, 0x23, 0x0d, 0x22, 0x3e, 0xb3,
+  0x04, 0x69, 0x30, 0x1c, 0x61, 0x0a, 0xa5, 0x21, 0x7c, 0xb3, 0x0c, 0x66,
+  0x90, 0x06, 0x81, 0x9d, 0x82, 0x69, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17,
+  0x0c, 0x73, 0xc1, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11,
+  0x2c, 0xa2, 0xc3, 0x0d, 0x81, 0x8a, 0x80, 0xc1, 0x2c, 0xc3, 0x19, 0xa0,
+  0x41, 0x60, 0xae, 0x31, 0xc4, 0x67, 0x96, 0x20, 0x0d, 0x8c, 0x88, 0x0d,
+  0xf8, 0xcc, 0x12, 0xa4, 0xc1, 0x40, 0xcb, 0xa3, 0x8d, 0x01, 0x46, 0x06,
+  0xc4, 0x19, 0x08, 0x68, 0x20, 0x16, 0x65, 0x70, 0xc1, 0x30, 0x06, 0x1b,
+  0xb4, 0x11, 0x9f, 0xe1, 0x88, 0x59, 0xa8, 0x0d, 0xe2, 0x9b, 0x65, 0x50,
+  0x83, 0x36, 0x08, 0xcc, 0x36, 0x68, 0x21, 0x3e, 0x16, 0x0c, 0xf4, 0xb9,
+  0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a,
+  0xd0, 0x11, 0x1d, 0x6e, 0x08, 0x70, 0x04, 0x0c, 0x66, 0x19, 0xd6, 0x80,
+  0x0d, 0x02, 0x1b, 0x7c, 0x03, 0x3e, 0xb3, 0x04, 0x71, 0x60, 0xbb, 0x41,
+  0xc4, 0x67, 0x96, 0x20, 0x0e, 0x86, 0x23, 0x7c, 0x81, 0x37, 0x84, 0x6f,
+  0x96, 0xc1, 0x0d, 0xe2, 0x20, 0xb0, 0x5f, 0xe8, 0x8d, 0xf8, 0x58, 0xe0,
+  0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04,
+  0xf1, 0x29, 0xa2, 0x4c, 0x74, 0xb8, 0x21, 0x18, 0x13, 0x30, 0x98, 0x65,
+  0x78, 0x03, 0x38, 0x08, 0xac, 0x3c, 0x86, 0xf8, 0xcc, 0x12, 0xc4, 0x81,
+  0x11, 0xea, 0x01, 0x9f, 0x59, 0x82, 0x38, 0x18, 0x68, 0x79, 0xb4, 0x35,
+  0xc0, 0xd8, 0x80, 0x78, 0x03, 0x01, 0x0e, 0x64, 0xa3, 0x0d, 0x2e, 0x18,
+  0xe6, 0x82, 0xa7, 0x6e, 0x7b, 0xea, 0x6c, 0x63, 0x98, 0x6b, 0x87, 0x61,
+  0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xc0,
+  0x9b, 0x93, 0x31, 0x11, 0x11, 0x38, 0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04,
+  0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11,
+  0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xf4, 0x44, 0x4d, 0x12, 0x22, 0x18,
+  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x6d, 0x4f, 0xd6, 0x24, 0x21, 0x82,
+  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xf8, 0x84, 0x4d, 0x12, 0x22,
+  0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x85, 0x54, 0xd6, 0x84, 0x45,
+  0x82, 0x3b, 0xe1, 0x91, 0x3c, 0x19, 0x4d, 0x08, 0x80, 0x0b, 0x1e, 0x9b,
+  0x25, 0x90, 0x83, 0x81, 0x96, 0xc7, 0x34, 0x3a, 0x3d, 0xe2, 0x58, 0xe2,
+  0x13, 0xe2, 0x40, 0x8f, 0xc0, 0xe0, 0x02, 0x83, 0x46, 0x0c, 0x1c, 0x00,
+  0x04, 0xc1, 0xa0, 0x41, 0x95, 0x33, 0x99, 0x11, 0x16, 0xe1, 0x93, 0x40,
+  0x4c, 0xc4, 0x44, 0x4c, 0xc2, 0xc4, 0x4f, 0x66, 0x09, 0x46, 0x68, 0xb8,
+  0x61, 0x34, 0xf8, 0x04, 0x0c, 0x66, 0x19, 0xe8, 0x20, 0x26, 0x82, 0x11,
+  0x03, 0x03, 0x00, 0x41, 0x30, 0x80, 0x50, 0x45, 0x4d, 0x42, 0x62, 0xc4,
+  0xc0, 0x00, 0x40, 0x10, 0x0c, 0xa0, 0x54, 0x59, 0x93, 0x90, 0x30, 0x21,
+  0x4c, 0xe0, 0x63, 0x82, 0x98, 0xc0, 0x67, 0x34, 0x61, 0x46, 0x86, 0xe1,
+  0x86, 0x40, 0x54, 0xc0, 0x60, 0x96, 0xa1, 0x0e, 0xee, 0x20, 0x18, 0x8e,
+  0x30, 0xcc, 0x64, 0xf8, 0xee, 0x18, 0x66, 0xb8, 0x21, 0x88, 0x11, 0x32,
+  0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x44, 0x4d, 0x86, 0xaf, 0x02, 0x41, 0x6f,
+  0x19, 0x66, 0xb8, 0x21, 0xa0, 0x11, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x06,
+  0x3b, 0x58, 0x85, 0xe0, 0xf0, 0x63, 0x98, 0x7b, 0x89, 0x61, 0x46, 0x0c,
+  0x0e, 0x00, 0x04, 0xc1, 0xc0, 0x9b, 0x95, 0x51, 0x11, 0x13, 0x58, 0x19,
   0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04,
-  0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xe2,
-  0x27, 0x7c, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x4d,
-  0x7e, 0xc4, 0x27, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8,
-  0xe6, 0x67, 0x7c, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83,
-  0x65, 0x7f, 0xc4, 0x67, 0x74, 0x02, 0xf7, 0x99, 0x1d, 0xf8, 0x19, 0x4d,
-  0x08, 0x80, 0x0b, 0x1e, 0x9b, 0x25, 0x10, 0x87, 0xe1, 0x06, 0xb8, 0x98,
-  0x1f, 0x30, 0x98, 0x65, 0x88, 0x05, 0x71, 0x08, 0xac, 0x6e, 0xee, 0x26,
-  0x3e, 0xc3, 0x11, 0x74, 0x81, 0x37, 0xc4, 0x37, 0xcb, 0x20, 0x0b, 0xb5,
-  0x10, 0x58, 0xde, 0xd4, 0x45, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30,
-  0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xd1, 0x3f,
-  0x3a, 0xdc, 0x10, 0xec, 0x0f, 0x18, 0xcc, 0x32, 0xcc, 0x02, 0x2d, 0x04,
-  0x36, 0x84, 0x0e, 0x7c, 0x66, 0x09, 0x72, 0xc1, 0x40, 0x87, 0x88, 0xcf,
-  0x2c, 0x41, 0x2e, 0x0c, 0x47, 0xfc, 0x45, 0xe8, 0x08, 0xdf, 0x2c, 0x83,
-  0x2d, 0xe4, 0x42, 0x60, 0xa0, 0x21, 0x3a, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf,
-  0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53,
-  0x04, 0x0a, 0xe9, 0x70, 0x43, 0x60, 0x42, 0x60, 0x30, 0xcb, 0x70, 0x0b,
-  0xb8, 0x10, 0x98, 0xea, 0x0c, 0xf1, 0x99, 0x25, 0xc8, 0x05, 0x23, 0x5a,
-  0x07, 0x3e, 0xb3, 0x04, 0xb9, 0x30, 0xd0, 0xf2, 0x68, 0xb3, 0x80, 0xd1,
-  0x02, 0x71, 0x0b, 0x02, 0x2e, 0xd0, 0x4c, 0x2d, 0x5c, 0x30, 0x8c, 0xb1,
-  0x0e, 0xec, 0xc4, 0x67, 0x38, 0xc2, 0x35, 0x62, 0x87, 0xf8, 0x66, 0x19,
-  0x74, 0xa1, 0x17, 0x02, 0x93, 0x9d, 0xd7, 0x88, 0x8f, 0x05, 0x03, 0x7d,
-  0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f,
-  0x22, 0x6c, 0x48, 0x87, 0x1b, 0x02, 0x1a, 0x02, 0x83, 0x59, 0x86, 0x5d,
-  0xe0, 0x85, 0xc0, 0x06, 0xdd, 0x81, 0xcf, 0x2c, 0x41, 0x38, 0xd8, 0xed,
-  0x10, 0xf1, 0x99, 0x25, 0x08, 0x87, 0xe1, 0x88, 0xdc, 0xc0, 0x1d, 0xe1,
-  0x9b, 0x65, 0xf0, 0x85, 0x70, 0x08, 0x4c, 0x37, 0x72, 0x27, 0x3e, 0x16,
-  0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15,
-  0x41, 0x7c, 0x8a, 0x08, 0x23, 0x1d, 0x6e, 0x08, 0x7e, 0x08, 0x0c, 0x66,
-  0x19, 0x7e, 0x01, 0x1c, 0x02, 0x0b, 0x9f, 0x21, 0x3e, 0xb3, 0x04, 0xe1,
-  0x60, 0x84, 0xf9, 0xc0, 0x67, 0x96, 0x20, 0x1c, 0x06, 0x5a, 0x1e, 0x6d,
-  0x17, 0x30, 0x5e, 0x20, 0x7e, 0x41, 0x00, 0x07, 0xd4, 0xe9, 0x85, 0x0b,
-  0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e, 0x3a, 0xd9, 0x19, 0xe6, 0xd2, 0x65,
-  0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30,
-  0xf0, 0xde, 0xe8, 0x87, 0xfc, 0x87, 0x8d, 0x46, 0x13, 0x02, 0x60, 0x34,
-  0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x36, 0x3b, 0x32, 0xa3, 0x84, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0xbb, 0xa3, 0x33, 0x4a, 0x88,
-  0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x36, 0x3c, 0x42, 0xa3, 0x84,
-  0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x01, 0xa5, 0x33, 0x42,
-  0xa1, 0x60, 0x8e, 0x70, 0xa8, 0x8e, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xc7,
-  0x66, 0x09, 0xc4, 0x61, 0xa0, 0xe5, 0x31, 0x8d, 0x56, 0x90, 0xc3, 0x80,
-  0x15, 0x58, 0xe2, 0x15, 0x84, 0x70, 0x90, 0xc3, 0x00, 0x16, 0x66, 0x19,
-  0xc6, 0xa1, 0x1c, 0xea, 0x63, 0x38, 0x42, 0x3f, 0x74, 0x68, 0xf8, 0x6e,
-  0x3f, 0x86, 0x19, 0x6e, 0x08, 0x4a, 0x88, 0x0c, 0x6a, 0x08, 0x74, 0x38,
-  0x62, 0x3f, 0x7c, 0x68, 0xf8, 0x2a, 0x10, 0xf4, 0xfa, 0x63, 0x98, 0xe1,
-  0x86, 0x00, 0x85, 0xc8, 0xa0, 0x82, 0x41, 0x67, 0x19, 0xc8, 0x21, 0x1f,
-  0x82, 0x63, 0x9f, 0x61, 0x6e, 0x64, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x03, 0xef, 0x94, 0xee, 0xc8, 0x86, 0x48, 0x69, 0x34, 0x21, 0x00,
-  0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88,
-  0x43, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x73, 0x25, 0x3f, 0x3a,
-  0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0x57, 0xfa, 0x23,
-  0x86, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x83, 0x25, 0x50,
-  0x92, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x16, 0x5c, 0xfa,
-  0x23, 0x30, 0x0a, 0x56, 0x09, 0x8e, 0x5a, 0x69, 0x34, 0x21, 0x00, 0x2e,
-  0x78, 0x6c, 0x96, 0x20, 0x1f, 0x86, 0x1b, 0x5a, 0x24, 0x96, 0xc0, 0x60,
-  0x96, 0xc1, 0x1c, 0xce, 0x21, 0xa8, 0x1d, 0x12, 0x25, 0xb8, 0xe0, 0xa9,
-  0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0x74, 0x69, 0x94, 0x68, 0x84,
-  0x8e, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0xda, 0xa5, 0x51, 0x0a,
-  0x84, 0x0b, 0x86, 0x29, 0x1f, 0x3a, 0x25, 0xb8, 0xe0, 0xa9, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0xa0, 0x7e, 0x09, 0x95, 0x6c, 0x24, 0x8f, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x02, 0x27, 0x54, 0x0a, 0x84, 0x0b,
-  0x86, 0xb9, 0xe0, 0xa9, 0x3b, 0x9e, 0xba, 0x13, 0x1a, 0xe6, 0x7c, 0x66,
-  0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30,
-  0xf0, 0xc8, 0x89, 0x96, 0xe6, 0x28, 0x9c, 0x46, 0x13, 0x02, 0x60, 0x34,
-  0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0x75, 0xda, 0xa5, 0x84, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x63, 0x27, 0x5e, 0x4a, 0x88,
-  0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0x76, 0xea, 0xa5, 0x84,
-  0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0xa9, 0x27, 0x5e, 0xea,
-  0xa3, 0x00, 0x9d, 0x5a, 0x49, 0x9d, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xc7,
-  0x66, 0x09, 0xf2, 0x61, 0xb8, 0x41, 0x4d, 0xda, 0x09, 0x0c, 0x66, 0x19,
-  0xd0, 0x21, 0x1f, 0x02, 0x7b, 0xa3, 0x38, 0x8a, 0xcf, 0x70, 0x04, 0x9c,
-  0xc8, 0x11, 0xf1, 0xcd, 0x32, 0xa4, 0x03, 0x3b, 0x04, 0x36, 0x47, 0x71,
-  0x12, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x60,
-  0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xdc, 0x93, 0x0e, 0x37, 0x04, 0xf5,
-  0x04, 0x06, 0xb3, 0x0c, 0xea, 0xb0, 0x0e, 0x81, 0x0d, 0x7b, 0x04, 0x9f,
-  0x59, 0x02, 0x78, 0x30, 0x3d, 0x22, 0xe2, 0x33, 0x4b, 0x00, 0x0f, 0xc3,
-  0x11, 0x7b, 0xb2, 0x47, 0xc2, 0x37, 0xcb, 0xd0, 0x0e, 0xf0, 0x10, 0x18,
-  0x9f, 0xf0, 0x51, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c,
-  0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x21, 0x52, 0x3a, 0xdc,
-  0x10, 0x80, 0x14, 0x18, 0xcc, 0x32, 0xb8, 0xc3, 0x3b, 0x04, 0x46, 0x4a,
-  0x43, 0x7c, 0x66, 0x09, 0xe0, 0xc1, 0x88, 0x53, 0x82, 0xcf, 0x2c, 0x01,
-  0x3c, 0x0c, 0xb4, 0x3c, 0x9a, 0x3a, 0x60, 0xeb, 0x40, 0xb8, 0x83, 0xf0,
-  0x0e, 0x2c, 0xc5, 0x0e, 0x17, 0x0c, 0x63, 0xa6, 0xa4, 0x4a, 0xf1, 0x19,
-  0x8e, 0x30, 0x95, 0x55, 0x22, 0xbe, 0x59, 0x86, 0x78, 0xa0, 0x87, 0xc0,
-  0x58, 0xe9, 0x54, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xe0,
-  0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x98, 0xd2, 0xe1,
-  0x86, 0xc0, 0xa5, 0xc0, 0x60, 0x96, 0x41, 0x1e, 0xe6, 0x21, 0xb0, 0x81,
-  0x96, 0xe0, 0x33, 0x4b, 0x80, 0x0f, 0x16, 0x4b, 0x44, 0x7c, 0x66, 0x09,
-  0xf0, 0x61, 0x38, 0x22, 0x56, 0x64, 0x49, 0xf8, 0x66, 0x19, 0xea, 0x01,
-  0x1f, 0x02, 0x93, 0x95, 0x59, 0x8a, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18,
-  0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x76,
-  0x4a, 0x87, 0x1b, 0x82, 0x9c, 0x02, 0x83, 0x59, 0x06, 0x7b, 0xb8, 0x87,
-  0xc0, 0x76, 0x69, 0x88, 0xcf, 0x2c, 0x01, 0x3e, 0x18, 0x01, 0x4e, 0xf0,
-  0x99, 0x25, 0xc0, 0x87, 0x81, 0x96, 0x47, 0x93, 0x07, 0x6c, 0x1e, 0x08,
-  0x7b, 0x10, 0xee, 0x01, 0xaf, 0xe8, 0xe1, 0x82, 0x61, 0x2e, 0x78, 0xea,
-  0xb6, 0xa7, 0x8e, 0x95, 0x86, 0xb9, 0xf1, 0x19, 0xe6, 0x88, 0x61, 0x8e,
-  0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc, 0xb4, 0xca, 0x29,
-  0x7c, 0x32, 0xab, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84,
-  0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0x0d, 0xae, 0xc0, 0x2a, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0xd8, 0xe2, 0x2a, 0xac, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x83, 0x4d, 0xae, 0xc4, 0x2a, 0x21, 0x82, 0x11, 0x03, 0x05,
-  0x00, 0x41, 0x30, 0x58, 0xf4, 0x2a, 0xac, 0x44, 0x2a, 0x68, 0x2b, 0x99,
-  0x7a, 0xab, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x82, 0x7c, 0x18,
-  0x68, 0x79, 0x4c, 0x83, 0x1c, 0xcc, 0x34, 0x18, 0x07, 0x96, 0x30, 0x07,
-  0x01, 0x1f, 0xcc, 0x34, 0x38, 0x87, 0x59, 0x06, 0x7d, 0xe0, 0x87, 0x77,
-  0x19, 0x8e, 0x90, 0x17, 0x9a, 0x1a, 0xbe, 0x9b, 0x97, 0x61, 0x86, 0x1b,
-  0x82, 0x7f, 0x22, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0xa8, 0x17, 0x9c, 0x1a,
-  0xbe, 0x0a, 0x04, 0xbd, 0x7b, 0x19, 0x66, 0xb8, 0x21, 0x10, 0x29, 0x32,
-  0xa8, 0x60, 0xd0, 0x59, 0x86, 0x7d, 0x80, 0x89, 0xe0, 0xcc, 0x69, 0x98,
-  0xeb, 0x9f, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xc0, 0x0b, 0xad,
-  0xb8, 0x82, 0x29, 0xbf, 0x1a, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18,
-  0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90, 0x11, 0x03, 0x04,
-  0x00, 0x41, 0x30, 0xd8, 0x50, 0x0b, 0xaf, 0x0e, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x83, 0x2d, 0xb5, 0xf2, 0x8a, 0x21, 0x82, 0x11, 0x03,
-  0x04, 0x00, 0x41, 0x30, 0xd8, 0x54, 0x4b, 0xaf, 0x24, 0x22, 0x18, 0x31,
-  0x50, 0x00, 0x10, 0x04, 0x83, 0x45, 0xb6, 0xf2, 0x4a, 0xa7, 0x82, 0xd2,
-  0x52, 0xab, 0xd3, 0x1a, 0x4d, 0x08, 0x80, 0x0b, 0x1e, 0x9b, 0x25, 0x80,
-  0x89, 0xe1, 0x86, 0x93, 0x59, 0x2d, 0x30, 0x98, 0x65, 0xe8, 0x07, 0x7f,
-  0x08, 0xaa, 0xa6, 0xf8, 0x0a, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40,
-  0x10, 0x0c, 0x28, 0xda, 0xea, 0x2b, 0x96, 0x71, 0xab, 0x11, 0x83, 0x03,
-  0x00, 0x41, 0x30, 0xa0, 0x6a, 0xab, 0xaf, 0x02, 0xe1, 0x82, 0x61, 0x0a,
-  0xa7, 0x42, 0x0b, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0xa8, 0xdc, 0x12, 0x2d, 0x98, 0x99, 0xab, 0x11, 0x83, 0x03, 0x00, 0x41,
-  0x30, 0xa0, 0x74, 0x4b, 0xb4, 0x02, 0xe1, 0x82, 0x61, 0x2e, 0x78, 0xea,
-  0x8e, 0xa7, 0x2e, 0xa4, 0x86, 0x39, 0x1c, 0x1a, 0xe6, 0x88, 0x61, 0x8e,
-  0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x3c, 0xdf, 0x72, 0xad,
-  0xb6, 0xda, 0xad, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84,
-  0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0xad, 0xbc, 0x6a, 0x2b, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0xd8, 0xcc, 0xcb, 0xb6, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x83, 0xed, 0xbc, 0x6e, 0x2b, 0x21, 0x82, 0x11, 0x03, 0x05,
-  0x00, 0x41, 0x30, 0x58, 0xde, 0xcb, 0xb6, 0xee, 0x2a, 0x10, 0xaf, 0xd3,
-  0x22, 0xaf, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x02, 0x98, 0x18,
-  0x6e, 0x20, 0x9b, 0xf3, 0x02, 0x83, 0x59, 0x86, 0x7f, 0x80, 0x89, 0xc0,
-  0xd2, 0x6a, 0xad, 0xe2, 0x33, 0x1c, 0x81, 0x36, 0x6c, 0x45, 0x7c, 0xb3,
-  0x0c, 0x20, 0x31, 0x12, 0x81, 0xb5, 0x55, 0xda, 0xc4, 0xc7, 0x82, 0x81,
-  0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88,
-  0x4f, 0x11, 0xf1, 0xa5, 0xc3, 0x0d, 0xc1, 0x7b, 0x81, 0xc1, 0x2c, 0x43,
-  0x48, 0x88, 0x44, 0x60, 0x43, 0x5d, 0xc1, 0x67, 0x96, 0xe0, 0x24, 0x8c,
-  0xae, 0x88, 0xf8, 0xcc, 0x12, 0x9c, 0xc4, 0x70, 0xc4, 0xdc, 0xd4, 0x95,
-  0xf0, 0xcd, 0x32, 0x90, 0xc4, 0x49, 0x04, 0x46, 0x37, 0x76, 0x15, 0x1f,
-  0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7,
-  0x8a, 0x20, 0x3e, 0x45, 0xf0, 0x97, 0x0e, 0x37, 0x04, 0xfa, 0x05, 0x06,
-  0xb3, 0x0c, 0x25, 0x61, 0x12, 0x81, 0xf9, 0xd5, 0x10, 0x9f, 0x59, 0x82,
-  0x93, 0x30, 0x22, 0xb4, 0xe0, 0x33, 0x4b, 0x70, 0x12, 0x03, 0x2d, 0x8f,
-  0x16, 0x12, 0x98, 0x48, 0x10, 0x25, 0x21, 0x98, 0x04, 0xbf, 0x8d, 0xc4,
-  0x05, 0xc3, 0x18, 0x68, 0x91, 0x56, 0x7c, 0x86, 0x23, 0x40, 0xa7, 0xb4,
-  0x88, 0x6f, 0x96, 0x01, 0x25, 0x56, 0x22, 0x30, 0xd3, 0x0a, 0x9d, 0xf8,
-  0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e,
-  0x56, 0x04, 0xf1, 0x29, 0x42, 0xc5, 0x74, 0xb8, 0x21, 0x40, 0x31, 0x30,
-  0x98, 0x65, 0x48, 0x09, 0x95, 0x08, 0x6c, 0x70, 0x2d, 0xf8, 0xcc, 0x12,
-  0xbc, 0x84, 0xad, 0x16, 0x11, 0x9f, 0x59, 0x82, 0x97, 0x18, 0x8e, 0x58,
-  0x1d, 0xd6, 0x12, 0xbe, 0x59, 0x06, 0x96, 0x78, 0x89, 0xc0, 0x58, 0xa7,
-  0xb5, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b,
-  0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x1a, 0xd3, 0xe1, 0x86, 0x60,
-  0xc6, 0xc0, 0x60, 0x96, 0xa1, 0x25, 0x5c, 0x22, 0xb0, 0xda, 0x1a, 0xe2,
-  0x33, 0x4b, 0xf0, 0x12, 0x46, 0xe8, 0x16, 0x7c, 0x66, 0x09, 0x5e, 0x62,
-  0xa0, 0xe5, 0xd1, 0x52, 0x02, 0x53, 0x09, 0xa2, 0x25, 0x04, 0x97, 0x60,
-  0xbb, 0x95, 0xb8, 0x60, 0x98, 0x0b, 0x9e, 0xba, 0xed, 0xa9, 0x33, 0xad,
-  0x61, 0xae, 0x97, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x03, 0x6f, 0xcc, 0x66, 0x4c, 0xbe, 0xc0, 0x6c, 0x34,
+  0x62, 0x28, 0xe2, 0x90, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0x74,
+  0x45, 0x55, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x6d,
+  0x57, 0x56, 0x85, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8,
+  0x78, 0x85, 0x55, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83,
+  0x85, 0x5c, 0x56, 0x85, 0x4d, 0x82, 0x5b, 0xe1, 0x93, 0x5c, 0x19, 0x4d,
+  0x08, 0x80, 0x0b, 0x1e, 0x9b, 0x25, 0x58, 0x85, 0xe1, 0x86, 0xac, 0x57,
+  0xc0, 0x60, 0x96, 0x01, 0x0f, 0xf2, 0x20, 0xa8, 0x33, 0x71, 0x15, 0xb8,
+  0xe0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xcc, 0xe5, 0x55,
+  0xc0, 0x00, 0x54, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x3a, 0x97,
+  0x57, 0x09, 0x84, 0x0b, 0x86, 0x29, 0x35, 0x99, 0x15, 0xb8, 0xe0, 0xa9,
+  0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xd6, 0x85, 0x56, 0xc8, 0xa0,
+  0x54, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x62, 0x17, 0x5a, 0x09,
+  0x84, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0x3b, 0x9e, 0xba, 0x19, 0x19, 0xe6,
+  0xd4, 0x62, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00,
+  0x41, 0x30, 0xf0, 0xe0, 0x05, 0x5c, 0xfe, 0xa4, 0x5d, 0x46, 0x13, 0x02,
+  0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a,
+  0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0x7b, 0x39, 0x97,
+  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0xc3, 0x17, 0x74,
+  0x49, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0x7c, 0x49,
+  0x97, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x09, 0x19,
+  0x74, 0x49, 0x95, 0x80, 0x5e, 0x72, 0xc5, 0x5e, 0x46, 0x13, 0x02, 0xe0,
+  0x82, 0xc7, 0x66, 0x09, 0x56, 0x61, 0xb8, 0xc1, 0x0e, 0xf2, 0x05, 0x0c,
+  0x66, 0x19, 0xf4, 0x60, 0x15, 0x02, 0xdb, 0x93, 0x3e, 0x89, 0xcf, 0x70,
+  0x04, 0x1f, 0xf8, 0x09, 0xf1, 0xcd, 0x32, 0xec, 0x81, 0x1f, 0x04, 0xf6,
+  0x27, 0x7d, 0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f,
+  0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x8c, 0x8c, 0x0e, 0x37,
+  0x04, 0x21, 0x03, 0x06, 0xb3, 0x0c, 0x7c, 0xd0, 0x07, 0x81, 0x0d, 0xa7,
+  0x02, 0x9f, 0x59, 0x02, 0x51, 0x30, 0x53, 0x21, 0xe2, 0x33, 0x4b, 0x20,
+  0x0a, 0xc3, 0x11, 0xa7, 0x70, 0x2a, 0xc2, 0x37, 0xcb, 0xf0, 0x07, 0xa2,
+  0x10, 0x18, 0x2a, 0xa0, 0x4a, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30,
+  0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xe1, 0x32,
+  0x3a, 0xdc, 0x10, 0xb0, 0x0c, 0x18, 0xcc, 0x32, 0x80, 0x42, 0x28, 0x04,
+  0x06, 0x2b, 0x43, 0x7c, 0x66, 0x09, 0x44, 0xc1, 0x88, 0x59, 0x81, 0xcf,
+  0x2c, 0x81, 0x28, 0x0c, 0xb4, 0x3c, 0x1a, 0x1f, 0x60, 0x7d, 0x40, 0x80,
+  0x82, 0x10, 0x0a, 0x64, 0xe1, 0x07, 0x17, 0x0c, 0x63, 0xb2, 0x62, 0x2b,
+  0xf1, 0x19, 0x8e, 0xa0, 0x85, 0x5b, 0x21, 0xbe, 0x59, 0x86, 0x51, 0x30,
+  0x85, 0xc0, 0x70, 0xa5, 0x16, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86,
+  0xb9, 0xe0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x9e,
+  0xd1, 0xe1, 0x86, 0x40, 0x67, 0xc0, 0x60, 0x96, 0x81, 0x14, 0x4a, 0x21,
+  0xb0, 0x01, 0x5c, 0xe0, 0x33, 0x4b, 0xa0, 0x0a, 0xd6, 0x2b, 0x44, 0x7c,
+  0x66, 0x09, 0x54, 0x61, 0x38, 0xe2, 0x17, 0x7c, 0x45, 0xf8, 0x66, 0x19,
+  0x4e, 0x41, 0x15, 0x02, 0x03, 0x87, 0x5f, 0x89, 0x8f, 0x05, 0x0e, 0x7d,
+  0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f,
+  0x22, 0xce, 0x46, 0x87, 0x1b, 0x82, 0xb2, 0x01, 0x83, 0x59, 0x06, 0x54,
+  0x48, 0x85, 0xc0, 0xce, 0x65, 0x88, 0xcf, 0x2c, 0x81, 0x2a, 0x18, 0xc1,
+  0x2e, 0xf0, 0x99, 0x25, 0x50, 0x85, 0x81, 0x96, 0x47, 0x23, 0x05, 0xac,
+  0x14, 0x08, 0x54, 0x10, 0x52, 0x81, 0x36, 0x4c, 0xe1, 0x82, 0x61, 0x2e,
+  0x78, 0xea, 0xb6, 0xa7, 0x0e, 0x57, 0x86, 0xb9, 0xf7, 0x18, 0xe6, 0x88,
+  0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc, 0xba,
+  0x29, 0x1b, 0x92, 0x91, 0x9b, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82,
+  0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40,
+  0x00, 0x10, 0x04, 0x83, 0x8d, 0x6f, 0xd8, 0x26, 0x21, 0x82, 0x11, 0x03,
+  0x04, 0x00, 0x41, 0x30, 0xd8, 0xfa, 0xa6, 0x6d, 0x12, 0x22, 0x18, 0x31,
+  0x40, 0x00, 0x10, 0x04, 0x83, 0xcd, 0x6f, 0xdc, 0x26, 0x21, 0x82, 0x11,
+  0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0x4c, 0xa7, 0x6d, 0x5c, 0x26, 0xc8,
+  0x1b, 0x9f, 0xd9, 0x9b, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x82,
+  0x55, 0x18, 0x68, 0x79, 0x4c, 0xc3, 0x0e, 0xfc, 0xac, 0x0e, 0x58, 0x02,
+  0x0f, 0x04, 0x55, 0xf0, 0xb3, 0x3c, 0x98, 0x65, 0x60, 0x05, 0x57, 0xd8,
+  0x87, 0xe1, 0x08, 0x7f, 0x00, 0x9b, 0xe1, 0xbb, 0x7f, 0x18, 0x66, 0xb8,
+  0x21, 0x58, 0x19, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x91, 0x20, 0x9b,
+  0xe1, 0xab, 0x40, 0xd0, 0x2b, 0x89, 0x61, 0x86, 0x1b, 0x02, 0x97, 0x21,
+  0x83, 0x0a, 0x06, 0x9d, 0x65, 0x68, 0x05, 0x71, 0x08, 0x4e, 0x5e, 0x86,
+  0xb9, 0x14, 0x19, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc, 0xd6,
+  0xe9, 0x1b, 0x9e, 0x51, 0x9d, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82,
+  0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40,
+  0x00, 0x10, 0x04, 0x83, 0x8d, 0x76, 0x48, 0xe7, 0x20, 0x82, 0x11, 0x03,
+  0x04, 0x00, 0x41, 0x30, 0xd8, 0x6a, 0xa7, 0x74, 0x18, 0x22, 0x18, 0x31,
+  0x40, 0x00, 0x10, 0x04, 0x83, 0xcd, 0x76, 0x4c, 0x47, 0x22, 0x82, 0x11,
+  0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0x7c, 0xa7, 0x74, 0xcc, 0x26, 0x88,
+  0x1d, 0xbb, 0x99, 0x9d, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x02,
+  0x71, 0x18, 0x6e, 0x98, 0x89, 0xdb, 0x01, 0x83, 0x59, 0x86, 0x57, 0x80,
+  0x85, 0xa0, 0xc2, 0x06, 0x75, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00,
+  0x04, 0xc1, 0x80, 0x02, 0x9f, 0xd4, 0xc1, 0x09, 0xbd, 0x19, 0x31, 0x38,
+  0x00, 0x10, 0x04, 0x03, 0x2a, 0x7c, 0x52, 0x27, 0x10, 0x2e, 0x18, 0xa6,
+  0xc8, 0xa6, 0x75, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
+  0x80, 0x2a, 0x1f, 0xd7, 0xf1, 0x89, 0xbf, 0x19, 0x31, 0x38, 0x00, 0x10,
+  0x04, 0x03, 0xca, 0x7c, 0x5c, 0x27, 0x10, 0x2e, 0x18, 0xe6, 0x82, 0xa7,
+  0xee, 0x78, 0xea, 0x5a, 0x66, 0x98, 0x23, 0x93, 0x61, 0x8e, 0x18, 0xe6,
+  0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xc0, 0x53, 0x1f, 0xdd,
+  0xc9, 0x9b, 0xf3, 0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d,
+  0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00,
+  0x41, 0x30, 0xd8, 0xe2, 0x27, 0x7c, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00,
+  0x10, 0x04, 0x83, 0x4d, 0x7e, 0xc4, 0x27, 0x21, 0x82, 0x11, 0x03, 0x04,
+  0x00, 0x41, 0x30, 0xd8, 0xe6, 0x67, 0x7c, 0x12, 0x22, 0x18, 0x31, 0x50,
+  0x00, 0x10, 0x04, 0x83, 0x65, 0x7f, 0xc4, 0x67, 0x74, 0x02, 0xf7, 0x99,
+  0x1d, 0xf8, 0x19, 0x4d, 0x08, 0x80, 0x0b, 0x1e, 0x9b, 0x25, 0x10, 0x87,
+  0xe1, 0x06, 0xb8, 0x98, 0x1f, 0x30, 0x98, 0x65, 0x88, 0x05, 0x71, 0x08,
+  0xac, 0x6e, 0xee, 0x26, 0x3e, 0xc3, 0x11, 0x74, 0x81, 0x37, 0xc4, 0x37,
+  0xcb, 0x20, 0x0b, 0xb5, 0x10, 0x58, 0xde, 0xd4, 0x45, 0x7c, 0x2c, 0x18,
+  0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82,
+  0xf8, 0x14, 0xd1, 0x3f, 0x3a, 0xdc, 0x10, 0xec, 0x0f, 0x18, 0xcc, 0x32,
+  0xcc, 0x02, 0x2d, 0x04, 0x36, 0x84, 0x0e, 0x7c, 0x66, 0x09, 0x72, 0xc1,
+  0x40, 0x87, 0x88, 0xcf, 0x2c, 0x41, 0x2e, 0x0c, 0x47, 0xfc, 0x45, 0xe8,
+  0x08, 0xdf, 0x2c, 0x83, 0x2d, 0xe4, 0x42, 0x60, 0xa0, 0x21, 0x3a, 0xf1,
+  0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c,
+  0xac, 0x08, 0xe2, 0x53, 0x04, 0x0a, 0xe9, 0x70, 0x43, 0x60, 0x42, 0x60,
+  0x30, 0xcb, 0x70, 0x0b, 0xb8, 0x10, 0x98, 0xea, 0x0c, 0xf1, 0x99, 0x25,
+  0xc8, 0x05, 0x23, 0x5a, 0x07, 0x3e, 0xb3, 0x04, 0xb9, 0x30, 0xd0, 0xf2,
+  0x68, 0xb3, 0x80, 0xd1, 0x02, 0x71, 0x0b, 0x02, 0x2e, 0xd0, 0x4c, 0x2d,
+  0x5c, 0x30, 0x8c, 0xb1, 0x0e, 0xec, 0xc4, 0x67, 0x38, 0xc2, 0x35, 0x62,
+  0x87, 0xf8, 0x66, 0x19, 0x74, 0xa1, 0x17, 0x02, 0x93, 0x9d, 0xd7, 0x88,
+  0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4,
+  0x63, 0x45, 0x10, 0x9f, 0x22, 0x6c, 0x48, 0x87, 0x1b, 0x02, 0x1a, 0x02,
+  0x83, 0x59, 0x86, 0x5d, 0xe0, 0x85, 0xc0, 0x06, 0xdd, 0x81, 0xcf, 0x2c,
+  0x41, 0x38, 0xd8, 0xed, 0x10, 0xf1, 0x99, 0x25, 0x08, 0x87, 0xe1, 0x88,
+  0xdc, 0xc0, 0x1d, 0xe1, 0x9b, 0x65, 0xf0, 0x85, 0x70, 0x08, 0x4c, 0x37,
+  0x72, 0x27, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2,
+  0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x08, 0x23, 0x1d, 0x6e, 0x08,
+  0x7e, 0x08, 0x0c, 0x66, 0x19, 0x7e, 0x01, 0x1c, 0x02, 0x0b, 0x9f, 0x21,
+  0x3e, 0xb3, 0x04, 0xe1, 0x60, 0x84, 0xf9, 0xc0, 0x67, 0x96, 0x20, 0x1c,
+  0x06, 0x5a, 0x1e, 0x6d, 0x17, 0x30, 0x5e, 0x20, 0x7e, 0x41, 0x00, 0x07,
+  0xd4, 0xe9, 0x85, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e, 0x3a, 0xd9,
+  0x19, 0xe6, 0xd2, 0x65, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83,
+  0x03, 0x00, 0x41, 0x30, 0xf0, 0xde, 0xe8, 0x87, 0xfc, 0x87, 0x8d, 0x46,
+  0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81,
+  0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x36, 0x3b,
+  0x32, 0xa3, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0xbb,
+  0xa3, 0x33, 0x4a, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x36,
+  0x3c, 0x42, 0xa3, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60,
+  0x01, 0xa5, 0x33, 0x42, 0xa1, 0x60, 0x8e, 0x70, 0xa8, 0x8e, 0x46, 0x13,
+  0x02, 0xe0, 0x82, 0xc7, 0x66, 0x09, 0xc4, 0x61, 0xa0, 0xe5, 0x31, 0x8d,
+  0x56, 0x90, 0xc3, 0x80, 0x15, 0x58, 0xe2, 0x15, 0x84, 0x70, 0x90, 0xc3,
+  0x00, 0x16, 0x66, 0x19, 0xc6, 0xa1, 0x1c, 0xea, 0x63, 0x38, 0x42, 0x3f,
+  0x74, 0x68, 0xf8, 0x6e, 0x3f, 0x86, 0x19, 0x6e, 0x08, 0x4a, 0x88, 0x0c,
+  0x6a, 0x08, 0x74, 0x38, 0x62, 0x3f, 0x7c, 0x68, 0xf8, 0x2a, 0x10, 0xf4,
+  0xfa, 0x63, 0x98, 0xe1, 0x86, 0x00, 0x85, 0xc8, 0xa0, 0x82, 0x41, 0x67,
+  0x19, 0xc8, 0x21, 0x1f, 0x82, 0x63, 0x9f, 0x61, 0x6e, 0x64, 0x86, 0x19,
+  0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xef, 0x94, 0xee, 0xc8, 0x86, 0x48,
+  0x69, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46,
+  0x13, 0x88, 0xa1, 0x88, 0x43, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60,
+  0x73, 0x25, 0x3f, 0x3a, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
+  0xb6, 0x57, 0xfa, 0x23, 0x86, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
+  0x60, 0x83, 0x25, 0x50, 0x92, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10,
+  0x0c, 0x16, 0x5c, 0xfa, 0x23, 0x30, 0x0a, 0x56, 0x09, 0x8e, 0x5a, 0x69,
+  0x34, 0x21, 0x00, 0x2e, 0x78, 0x6c, 0x96, 0x20, 0x1f, 0x86, 0x1b, 0x5a,
+  0x24, 0x96, 0xc0, 0x60, 0x96, 0xc1, 0x1c, 0xce, 0x21, 0xa8, 0x1d, 0x12,
+  0x25, 0xb8, 0xe0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0x74,
+  0x69, 0x94, 0x68, 0x84, 0x8e, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80,
+  0xda, 0xa5, 0x51, 0x0a, 0x84, 0x0b, 0x86, 0x29, 0x1f, 0x3a, 0x25, 0xb8,
+  0xe0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0x7e, 0x09, 0x95,
+  0x6c, 0x24, 0x8f, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x02, 0x27,
+  0x54, 0x0a, 0x84, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0x3b, 0x9e, 0xba, 0x13,
+  0x1a, 0xe6, 0x7c, 0x66, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83,
+  0x03, 0x00, 0x41, 0x30, 0xf0, 0xc8, 0x89, 0x96, 0xe6, 0x28, 0x9c, 0x46,
+  0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81,
+  0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0x75,
+  0xda, 0xa5, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x63,
+  0x27, 0x5e, 0x4a, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6,
+  0x76, 0xea, 0xa5, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60,
+  0xa9, 0x27, 0x5e, 0xea, 0xa3, 0x00, 0x9d, 0x5a, 0x49, 0x9d, 0x46, 0x13,
+  0x02, 0xe0, 0x82, 0xc7, 0x66, 0x09, 0xf2, 0x61, 0xb8, 0x41, 0x4d, 0xda,
+  0x09, 0x0c, 0x66, 0x19, 0xd0, 0x21, 0x1f, 0x02, 0x7b, 0xa3, 0x38, 0x8a,
+  0xcf, 0x70, 0x04, 0x9c, 0xc8, 0x11, 0xf1, 0xcd, 0x32, 0xa4, 0x03, 0x3b,
+  0x04, 0x36, 0x47, 0x71, 0x12, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc,
+  0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xdc, 0x93,
+  0x0e, 0x37, 0x04, 0xf5, 0x04, 0x06, 0xb3, 0x0c, 0xea, 0xb0, 0x0e, 0x81,
+  0x0d, 0x7b, 0x04, 0x9f, 0x59, 0x02, 0x78, 0x30, 0x3d, 0x22, 0xe2, 0x33,
+  0x4b, 0x00, 0x0f, 0xc3, 0x11, 0x7b, 0xb2, 0x47, 0xc2, 0x37, 0xcb, 0xd0,
+  0x0e, 0xf0, 0x10, 0x18, 0x9f, 0xf0, 0x51, 0x7c, 0x2c, 0x70, 0xe8, 0x73,
+  0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14,
+  0x21, 0x52, 0x3a, 0xdc, 0x10, 0x80, 0x14, 0x18, 0xcc, 0x32, 0xb8, 0xc3,
+  0x3b, 0x04, 0x46, 0x4a, 0x43, 0x7c, 0x66, 0x09, 0xe0, 0xc1, 0x88, 0x53,
+  0x82, 0xcf, 0x2c, 0x01, 0x3c, 0x0c, 0xb4, 0x3c, 0x9a, 0x3a, 0x60, 0xeb,
+  0x40, 0xb8, 0x83, 0xf0, 0x0e, 0x2c, 0xc5, 0x0e, 0x17, 0x0c, 0x63, 0xa6,
+  0xa4, 0x4a, 0xf1, 0x19, 0x8e, 0x30, 0x95, 0x55, 0x22, 0xbe, 0x59, 0x86,
+  0x78, 0xa0, 0x87, 0xc0, 0x58, 0xe9, 0x54, 0xe2, 0x63, 0xc1, 0x40, 0x9f,
+  0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7,
+  0x08, 0x98, 0xd2, 0xe1, 0x86, 0xc0, 0xa5, 0xc0, 0x60, 0x96, 0x41, 0x1e,
+  0xe6, 0x21, 0xb0, 0x81, 0x96, 0xe0, 0x33, 0x4b, 0x80, 0x0f, 0x16, 0x4b,
+  0x44, 0x7c, 0x66, 0x09, 0xf0, 0x61, 0x38, 0x22, 0x56, 0x64, 0x49, 0xf8,
+  0x66, 0x19, 0xea, 0x01, 0x1f, 0x02, 0x93, 0x95, 0x59, 0x8a, 0x8f, 0x05,
+  0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45,
+  0x10, 0x9f, 0x22, 0x76, 0x4a, 0x87, 0x1b, 0x82, 0x9c, 0x02, 0x83, 0x59,
+  0x06, 0x7b, 0xb8, 0x87, 0xc0, 0x76, 0x69, 0x88, 0xcf, 0x2c, 0x01, 0x3e,
+  0x18, 0x01, 0x4e, 0xf0, 0x99, 0x25, 0xc0, 0x87, 0x81, 0x96, 0x47, 0x93,
+  0x07, 0x6c, 0x1e, 0x08, 0x7b, 0x10, 0xee, 0x01, 0xaf, 0xe8, 0xe1, 0x82,
+  0x61, 0x2e, 0x78, 0xea, 0xb6, 0xa7, 0x8e, 0x95, 0x86, 0xb9, 0xf1, 0x19,
+  0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
+  0xbc, 0xb4, 0xca, 0x29, 0x7c, 0x32, 0xab, 0xd1, 0x84, 0x00, 0x18, 0x4d,
+  0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19,
+  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x0d, 0xae, 0xc0, 0x2a, 0x21, 0x82,
+  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xe2, 0x2a, 0xac, 0x12, 0x22,
+  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x4d, 0xae, 0xc4, 0x2a, 0x21,
+  0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0xf4, 0x2a, 0xac, 0x44,
+  0x2a, 0x68, 0x2b, 0x99, 0x7a, 0xab, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1,
+  0x59, 0x82, 0x7c, 0x18, 0x68, 0x79, 0x4c, 0x83, 0x1c, 0xcc, 0x34, 0x18,
+  0x07, 0x96, 0x30, 0x07, 0x01, 0x1f, 0xcc, 0x34, 0x38, 0x87, 0x59, 0x06,
+  0x7d, 0xe0, 0x87, 0x77, 0x19, 0x8e, 0x90, 0x17, 0x9a, 0x1a, 0xbe, 0x9b,
+  0x97, 0x61, 0x86, 0x1b, 0x82, 0x7f, 0x22, 0x83, 0x1a, 0x02, 0x1d, 0x8e,
+  0xa8, 0x17, 0x9c, 0x1a, 0xbe, 0x0a, 0x04, 0xbd, 0x7b, 0x19, 0x66, 0xb8,
+  0x21, 0x10, 0x29, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x86, 0x7d, 0x80, 0x89,
+  0xe0, 0xcc, 0x69, 0x98, 0xeb, 0x9f, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04,
+  0xc1, 0xc0, 0x0b, 0xad, 0xb8, 0x82, 0x29, 0xbf, 0x1a, 0x4d, 0x08, 0x80,
+  0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2,
+  0x90, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0x50, 0x0b, 0xaf, 0x0e,
+  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x2d, 0xb5, 0xf2, 0x8a,
+  0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0x54, 0x4b, 0xaf,
+  0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x45, 0xb6, 0xf2,
+  0x4a, 0xa7, 0x82, 0xd2, 0x52, 0xab, 0xd3, 0x1a, 0x4d, 0x08, 0x80, 0x0b,
+  0x1e, 0x9b, 0x25, 0x80, 0x89, 0xe1, 0x86, 0x93, 0x59, 0x2d, 0x30, 0x98,
+  0x65, 0xe8, 0x07, 0x7f, 0x08, 0xaa, 0xa6, 0xf8, 0x0a, 0x2e, 0x78, 0x6a,
+  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0xda, 0xea, 0x2b, 0x96, 0x71,
+  0xab, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0x6a, 0xab, 0xaf, 0x02,
+  0xe1, 0x82, 0x61, 0x0a, 0xa7, 0x42, 0x0b, 0x2e, 0x78, 0x6a, 0xc4, 0xe0,
+  0x00, 0x40, 0x10, 0x0c, 0xa8, 0xdc, 0x12, 0x2d, 0x98, 0x99, 0xab, 0x11,
+  0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0x74, 0x4b, 0xb4, 0x02, 0xe1, 0x82,
+  0x61, 0x2e, 0x78, 0xea, 0x8e, 0xa7, 0x2e, 0xa4, 0x86, 0x39, 0x1c, 0x1a,
+  0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
+  0x3c, 0xdf, 0x72, 0xad, 0xb6, 0xda, 0xad, 0xd1, 0x84, 0x00, 0x18, 0x4d,
+  0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19,
+  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xad, 0xbc, 0x6a, 0x2b, 0x21, 0x82,
+  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xcc, 0xcb, 0xb6, 0x12, 0x22,
+  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xed, 0xbc, 0x6e, 0x2b, 0x21,
+  0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0xde, 0xcb, 0xb6, 0xee,
+  0x2a, 0x10, 0xaf, 0xd3, 0x22, 0xaf, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1,
+  0x59, 0x02, 0x98, 0x18, 0x6e, 0x20, 0x9b, 0xf3, 0x02, 0x83, 0x59, 0x86,
+  0x7f, 0x80, 0x89, 0xc0, 0xd2, 0x6a, 0xad, 0xe2, 0x33, 0x1c, 0x81, 0x36,
+  0x6c, 0x45, 0x7c, 0xb3, 0x0c, 0x20, 0x31, 0x12, 0x81, 0xb5, 0x55, 0xda,
+  0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18,
+  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xf1, 0xa5, 0xc3, 0x0d, 0xc1, 0x7b,
+  0x81, 0xc1, 0x2c, 0x43, 0x48, 0x88, 0x44, 0x60, 0x43, 0x5d, 0xc1, 0x67,
+  0x96, 0xe0, 0x24, 0x8c, 0xae, 0x88, 0xf8, 0xcc, 0x12, 0x9c, 0xc4, 0x70,
+  0xc4, 0xdc, 0xd4, 0x95, 0xf0, 0xcd, 0x32, 0x90, 0xc4, 0x49, 0x04, 0x46,
+  0x37, 0x76, 0x15, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f,
+  0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xf0, 0x97, 0x0e, 0x37,
+  0x04, 0xfa, 0x05, 0x06, 0xb3, 0x0c, 0x25, 0x61, 0x12, 0x81, 0xf9, 0xd5,
+  0x10, 0x9f, 0x59, 0x82, 0x93, 0x30, 0x22, 0xb4, 0xe0, 0x33, 0x4b, 0x70,
+  0x12, 0x03, 0x2d, 0x8f, 0x16, 0x12, 0x98, 0x48, 0x10, 0x25, 0x21, 0x98,
+  0x04, 0xbf, 0x8d, 0xc4, 0x05, 0xc3, 0x18, 0x68, 0x91, 0x56, 0x7c, 0x86,
+  0x23, 0x40, 0xa7, 0xb4, 0x88, 0x6f, 0x96, 0x01, 0x25, 0x56, 0x22, 0x30,
+  0xd3, 0x0a, 0x9d, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78,
+  0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x42, 0xc5, 0x74, 0xb8,
+  0x21, 0x40, 0x31, 0x30, 0x98, 0x65, 0x48, 0x09, 0x95, 0x08, 0x6c, 0x70,
+  0x2d, 0xf8, 0xcc, 0x12, 0xbc, 0x84, 0xad, 0x16, 0x11, 0x9f, 0x59, 0x82,
+  0x97, 0x18, 0x8e, 0x58, 0x1d, 0xd6, 0x12, 0xbe, 0x59, 0x06, 0x96, 0x78,
+  0x89, 0xc0, 0x58, 0xa7, 0xb5, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86,
+  0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x1a,
+  0xd3, 0xe1, 0x86, 0x60, 0xc6, 0xc0, 0x60, 0x96, 0xa1, 0x25, 0x5c, 0x22,
+  0xb0, 0xda, 0x1a, 0xe2, 0x33, 0x4b, 0xf0, 0x12, 0x46, 0xe8, 0x16, 0x7c,
+  0x66, 0x09, 0x5e, 0x62, 0xa0, 0xe5, 0xd1, 0x52, 0x02, 0x53, 0x09, 0xa2,
+  0x25, 0x04, 0x97, 0x60, 0xbb, 0x95, 0xb8, 0x60, 0x98, 0x0b, 0x9e, 0xba,
+  0xed, 0xa9, 0x33, 0xad, 0x61, 0xae, 0x97, 0x86, 0x39, 0x62, 0x98, 0x23,
+  0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x6f, 0xcc, 0x66, 0x4c,
+  0xbe, 0xc0, 0x6c, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61,
+  0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04,
+  0xc1, 0x60, 0x53, 0x33, 0x1d, 0x4b, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40,
+  0x10, 0x0c, 0xb6, 0x35, 0xdb, 0xb1, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00,
+  0x04, 0xc1, 0x60, 0x63, 0x33, 0x1e, 0x4b, 0x88, 0x60, 0xc4, 0x40, 0x01,
+  0x40, 0x10, 0x0c, 0x16, 0x3a, 0xdb, 0x31, 0xfe, 0x0a, 0xce, 0x8c, 0xc5,
+  0xd2, 0x6c, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6c, 0x96, 0x00, 0x26, 0x06,
+  0x5a, 0x1e, 0xd3, 0xd8, 0x07, 0x1d, 0x0e, 0xf4, 0x81, 0x25, 0xfa, 0x41,
+  0x78, 0x09, 0x1d, 0x0e, 0xfc, 0x61, 0xc4, 0xc0, 0x00, 0x40, 0x10, 0x0c,
+  0x20, 0x3b, 0xbb, 0xb1, 0x77, 0x32, 0xfb, 0x80, 0x97, 0xf8, 0x98, 0x10,
+  0xc8, 0xc7, 0x02, 0x79, 0x81, 0x8f, 0x15, 0xff, 0x10, 0x1f, 0x2b, 0x02,
+  0xf9, 0x58, 0x10, 0x12, 0xf0, 0x19, 0x31, 0x30, 0x00, 0x10, 0x04, 0x03,
+  0xa8, 0xcf, 0x7c, 0xac, 0x9e, 0x4c, 0x28, 0xe2, 0x63, 0x81, 0x20, 0x1f,
+  0x0b, 0x0e, 0xf8, 0x5c, 0x60, 0xd0, 0x88, 0x81, 0x03, 0x80, 0x20, 0x18,
+  0x34, 0xa3, 0x26, 0x66, 0x2e, 0x76, 0x62, 0x77, 0x16, 0xf4, 0x58, 0x8f,
+  0xf5, 0x18, 0x8f, 0xe5, 0xd9, 0x2c, 0xc1, 0x08, 0x0d, 0x37, 0xf8, 0x55,
+  0x9e, 0x81, 0xc1, 0x2c, 0x83, 0x4c, 0x8c, 0x50, 0x30, 0x62, 0x60, 0x00,
+  0x20, 0x08, 0x06, 0xd0, 0xa8, 0x95, 0x19, 0x3f, 0x59, 0x70, 0x63, 0xf0,
+  0x19, 0x31, 0x30, 0x00, 0x10, 0x04, 0x03, 0xa8, 0xd4, 0xce, 0xac, 0x9f,
+  0x2c, 0xc8, 0x31, 0xf8, 0x8c, 0x26, 0xb8, 0xd8, 0x30, 0xdc, 0x10, 0xf4,
+  0x19, 0x18, 0xcc, 0x32, 0xcc, 0x44, 0x4d, 0x04, 0xc3, 0x11, 0x45, 0x98,
+  0x0d, 0xdf, 0x19, 0xc3, 0x0c, 0x37, 0x04, 0x2c, 0x46, 0x06, 0x35, 0x04,
+  0x3a, 0x1c, 0x71, 0x94, 0xd9, 0xf0, 0x55, 0x20, 0xe8, 0x25, 0xc3, 0x0c,
+  0x37, 0x04, 0x2f, 0x46, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0x40, 0x13, 0x69,
+  0x11, 0xdc, 0x7c, 0x0d, 0x73, 0x2a, 0x35, 0xcc, 0x88, 0xc1, 0x01, 0x80,
+  0x20, 0x18, 0x78, 0xae, 0xe6, 0x67, 0x3d, 0xb6, 0x6a, 0xa3, 0x09, 0x01,
+  0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45,
+  0x1c, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5b, 0xad, 0x95, 0xda,
+  0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xd9, 0x9a, 0xa9,
+  0x31, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdb, 0xad, 0x9d,
+  0x9a, 0x44, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xfc, 0x9a,
+  0xa9, 0x9d, 0x59, 0x20, 0x6b, 0x77, 0x46, 0x6b, 0xa3, 0x09, 0x01, 0x70,
+  0xc1, 0x63, 0xb3, 0x04, 0x69, 0x31, 0xdc, 0x90, 0xe1, 0x1a, 0x18, 0xcc,
+  0x32, 0xd8, 0xc4, 0x4d, 0x04, 0x25, 0x66, 0xa9, 0x06, 0x17, 0x3c, 0x35,
+  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x54, 0xb8, 0xa9, 0xda, 0xb7, 0x67,
+  0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0x89, 0x9b, 0xaa, 0x05, 0xc2,
+  0x05, 0xc3, 0x54, 0x99, 0xb9, 0x1a, 0x5c, 0xf0, 0xd4, 0x88, 0xc1, 0x01,
+  0x80, 0x20, 0x18, 0x50, 0xe6, 0xf6, 0x6a, 0x62, 0x00, 0x6a, 0x23, 0x06,
+  0x07, 0x00, 0x82, 0x60, 0x40, 0x9d, 0xdb, 0xab, 0x05, 0xc2, 0x05, 0xc3,
+  0x5c, 0xf0, 0xd4, 0x1d, 0x4f, 0x9d, 0x8b, 0x0d, 0x73, 0x65, 0x35, 0xcc,
+  0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x78,
+  0xeb, 0xb6, 0x6b, 0x7a, 0x86, 0x6e, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20,
+  0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62,
+  0x80, 0x00, 0x20, 0x08, 0x06, 0x9b, 0xbc, 0x89, 0x5b, 0x42, 0x04, 0x23,
+  0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xcd, 0xdb, 0xb8, 0x25, 0x44, 0x30,
+  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x1b, 0xbd, 0x91, 0x5b, 0x42, 0x04,
+  0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xf0, 0xdb, 0xb8, 0x91, 0x5a,
+  0xf0, 0x6e, 0xb4, 0x16, 0x6f, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3,
+  0x04, 0x69, 0x31, 0xdc, 0x60, 0x07, 0xf4, 0x06, 0x06, 0xb3, 0x0c, 0x38,
+  0x91, 0x16, 0x81, 0xd9, 0x19, 0x9e, 0xc5, 0x67, 0x38, 0x62, 0x0f, 0xf2,
+  0x8c, 0xf8, 0x66, 0x19, 0x72, 0x82, 0x27, 0x02, 0xd3, 0x33, 0x3e, 0x88,
+  0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4,
+  0x63, 0x45, 0x10, 0x9f, 0x22, 0xfc, 0x4d, 0x87, 0x1b, 0x02, 0x7e, 0x03,
+  0x83, 0x59, 0x06, 0x9d, 0xd8, 0x89, 0xc0, 0x06, 0x51, 0x83, 0xcf, 0x2c,
+  0x01, 0x58, 0x58, 0xa8, 0x11, 0xf1, 0x99, 0x25, 0x00, 0x8b, 0xe1, 0x08,
+  0x53, 0x10, 0x35, 0xe1, 0x9b, 0x65, 0xe8, 0x09, 0xb0, 0x08, 0xec, 0x14,
+  0x46, 0x2d, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2,
+  0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x48, 0x39, 0x1d, 0x6e, 0x08,
+  0x4e, 0x0e, 0x0c, 0x66, 0x19, 0x7c, 0xe2, 0x27, 0x02, 0x5b, 0xb5, 0x21,
+  0x3e, 0xb3, 0x04, 0x60, 0x61, 0x84, 0xab, 0xc1, 0x67, 0x96, 0x00, 0x2c,
+  0x06, 0x5a, 0x1e, 0x4d, 0x27, 0xb0, 0x9d, 0x20, 0x7c, 0x42, 0xf8, 0x09,
+  0xb1, 0xe0, 0x89, 0x0b, 0x86, 0xb1, 0x56, 0x8b, 0xb5, 0xf8, 0x0c, 0x47,
+  0xc8, 0x82, 0xac, 0x11, 0xdf, 0x2c, 0x43, 0x58, 0x90, 0x45, 0x60, 0xb3,
+  0x36, 0x0b, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94,
+  0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4, 0xcd, 0xe9, 0x70, 0x43,
+  0x50, 0x73, 0x60, 0x30, 0xcb, 0x20, 0x16, 0x63, 0x11, 0xd8, 0xb0, 0x6b,
+  0xf0, 0x99, 0x25, 0x40, 0x0b, 0xc3, 0x35, 0x22, 0x3e, 0xb3, 0x04, 0x68,
+  0x31, 0x1c, 0xd1, 0x0b, 0xb9, 0x26, 0x7c, 0xb3, 0x0c, 0x65, 0x81, 0x16,
+  0x81, 0xf9, 0x82, 0xae, 0xc5, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73,
+  0xc1, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x62, 0xa7,
+  0xc3, 0x0d, 0x01, 0xd8, 0x81, 0xc1, 0x2c, 0x83, 0x59, 0x9c, 0x45, 0x60,
+  0xe2, 0x36, 0xc4, 0x67, 0x96, 0x00, 0x2d, 0x8c, 0x38, 0x37, 0xf8, 0xcc,
+  0x12, 0xa0, 0xc5, 0x40, 0xcb, 0xa3, 0x89, 0x05, 0x36, 0x16, 0x84, 0x59,
+  0x08, 0x67, 0x01, 0x1b, 0x64, 0x71, 0xc1, 0x30, 0x17, 0x3c, 0x75, 0xdb,
+  0x53, 0x37, 0x6b, 0xc3, 0x9c, 0x7a, 0x0d, 0x73, 0xc4, 0x30, 0x47, 0x0c,
+  0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x1e, 0xdc, 0x81, 0xdd, 0xbf,
+  0xb5, 0xdd, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20,
+  0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
+  0xc1, 0x76, 0x77, 0x67, 0x97, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20,
+  0x18, 0x6c, 0x78, 0x87, 0x76, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
+  0x82, 0xc1, 0x96, 0x77, 0x69, 0x97, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80,
+  0x20, 0x18, 0x2c, 0xa1, 0x87, 0x76, 0x29, 0x17, 0xd0, 0x5d, 0xce, 0xd9,
+  0xdd, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd8, 0x2c, 0x41, 0x5a, 0x0c, 0xb4,
+  0x3c, 0xa6, 0x41, 0x13, 0x74, 0x1e, 0xcc, 0x04, 0x4b, 0xd8, 0x84, 0x80,
+  0x16, 0x74, 0x1e, 0xdc, 0xc4, 0x2c, 0x83, 0x5a, 0xb0, 0xc5, 0x3e, 0x0c,
+  0x47, 0x80, 0xc4, 0xce, 0x0d, 0xdf, 0x85, 0xc4, 0x30, 0xc3, 0x0d, 0x81,
+  0xc9, 0x91, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0x84, 0xc4, 0xcf, 0x0d, 0x5f,
+  0x05, 0x82, 0xde, 0x48, 0x0c, 0x33, 0xdc, 0x10, 0xa4, 0x1c, 0x19, 0x54,
+  0x30, 0xe8, 0x2c, 0xc3, 0x5a, 0x80, 0x46, 0x70, 0xed, 0x36, 0xcc, 0x91,
+  0xd8, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0, 0xa1, 0x1e, 0xde,
+  0xdd, 0x5c, 0xe9, 0x8d, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26,
+  0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80,
+  0x20, 0x18, 0x6c, 0xaf, 0xf7, 0x77, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00,
+  0x08, 0x82, 0xc1, 0x06, 0x7b, 0xa0, 0xc7, 0x10, 0xc1, 0x88, 0x01, 0x02,
+  0x80, 0x20, 0x18, 0x6c, 0xb1, 0x17, 0x7a, 0x12, 0x11, 0x8c, 0x18, 0x28,
+  0x00, 0x08, 0x82, 0xc1, 0x92, 0x7b, 0xa0, 0x17, 0x76, 0x01, 0xeb, 0xc5,
+  0x9d, 0xeb, 0x8d, 0x26, 0x04, 0xc0, 0x05, 0x8f, 0xcd, 0x12, 0x80, 0xc6,
+  0x70, 0xc3, 0x4c, 0xc8, 0x1e, 0x18, 0xcc, 0x32, 0xb4, 0x85, 0x5b, 0x04,
+  0xc5, 0x73, 0xa3, 0x07, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08,
+  0x06, 0xd4, 0xee, 0x91, 0x9e, 0x4e, 0xd4, 0xdd, 0x88, 0xc1, 0x01, 0x80,
+  0x20, 0x18, 0x50, 0xbc, 0x47, 0x7a, 0x81, 0x70, 0xc1, 0x30, 0xf5, 0x73,
+  0xa8, 0x07, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x14,
+  0xf8, 0xa5, 0x1e, 0x4f, 0xe8, 0xdd, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
+  0x50, 0xe1, 0x97, 0x7a, 0x81, 0x70, 0xc1, 0x30, 0x17, 0x3c, 0x75, 0xc7,
+  0x53, 0x87, 0x72, 0xc3, 0xdc, 0x8f, 0x0d, 0x73, 0xc4, 0x30, 0x47, 0x0c,
+  0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x5e, 0xf9, 0xd5, 0x1e, 0xdd,
+  0x89, 0xdf, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20,
+  0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
+  0xc1, 0xc6, 0x7e, 0xbc, 0x97, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20,
+  0x18, 0x6c, 0xed, 0xd7, 0x7b, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
+  0x82, 0xc1, 0xe6, 0x7e, 0xbe, 0x97, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80,
+  0x20, 0x18, 0x2c, 0xf6, 0xd7, 0x7b, 0x7e, 0x17, 0xa4, 0x9f, 0xeb, 0xad,
+  0xdf, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd8, 0x2c, 0x01, 0x68, 0x0c, 0x37,
+  0xc0, 0x85, 0xfb, 0x81, 0xc1, 0x2c, 0xc3, 0x5b, 0x80, 0x46, 0x60, 0x70,
+  0x27, 0x77, 0xf1, 0x19, 0x8e, 0xb0, 0x8b, 0xb9, 0x23, 0xbe, 0x59, 0x06,
+  0xb8, 0x98, 0x8b, 0xc0, 0xe8, 0xee, 0x2e, 0xe2, 0x63, 0xc1, 0x40, 0x9f,
+  0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7,
+  0x08, 0xfc, 0xd3, 0xe1, 0x86, 0xc0, 0xfe, 0xc0, 0x60, 0x96, 0x21, 0x2e,
+  0xe4, 0x22, 0xb0, 0x81, 0xef, 0xe0, 0x33, 0x4b, 0x70, 0x17, 0xb6, 0x77,
+  0x44, 0x7c, 0x66, 0x09, 0xee, 0x62, 0x38, 0x22, 0x34, 0xf8, 0x4e, 0xf8,
+  0x66, 0x19, 0xe8, 0xe2, 0x2e, 0x02, 0x13, 0x8d, 0xbe, 0x8b, 0x8f, 0x05,
+  0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45,
+  0x10, 0x9f, 0x22, 0x46, 0x30, 0xd0, 0xe1, 0x86, 0x20, 0x04, 0x03, 0x30,
+  0x98, 0x65, 0xa8, 0x0b, 0xbb, 0x08, 0xac, 0xf4, 0x86, 0xf8, 0xcc, 0x12,
+  0xdc, 0x85, 0x11, 0xa8, 0x07, 0x9f, 0x59, 0x82, 0xbb, 0x18, 0x68, 0x79,
+  0xb4, 0xb8, 0xc0, 0xe4, 0x82, 0xa8, 0x0b, 0xc1, 0x2e, 0x70, 0x66, 0x2e,
+  0x2e, 0x18, 0xc6, 0x4e, 0x6f, 0xf5, 0xe2, 0x33, 0x1c, 0xc1, 0x1a, 0xac,
+  0x47, 0x7c, 0xb3, 0x0c, 0x78, 0xb1, 0x17, 0x81, 0xb5, 0x5e, 0x6b, 0xc4,
+  0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2,
+  0xb1, 0x22, 0x88, 0x4f, 0x11, 0x31, 0x18, 0xe8, 0x70, 0x43, 0xf0, 0x82,
+  0x01, 0x18, 0xcc, 0x32, 0xe4, 0x85, 0x5e, 0x04, 0x36, 0xd4, 0x1e, 0x7c,
+  0x66, 0x09, 0xfe, 0xc2, 0x64, 0x8f, 0x88, 0xcf, 0x2c, 0xc1, 0x5f, 0x0c,
+  0x47, 0xdc, 0xc6, 0xec, 0x09, 0xdf, 0x2c, 0x03, 0x5f, 0xfc, 0x45, 0x60,
+  0xb8, 0x41, 0x7b, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0,
+  0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0x0f, 0x06, 0x3a,
+  0xdc, 0x10, 0xe8, 0x60, 0x00, 0x06, 0xb3, 0x0c, 0x7d, 0xe1, 0x17, 0x81,
+  0xf1, 0xde, 0x10, 0x9f, 0x59, 0x82, 0xbf, 0x30, 0x22, 0xfc, 0xe0, 0x33,
+  0x4b, 0xf0, 0x17, 0x03, 0x2d, 0x8f, 0x96, 0x17, 0x98, 0x5e, 0x10, 0x7d,
+  0x21, 0xf8, 0x05, 0xe9, 0xec, 0xc5, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x6d,
+  0x4f, 0x5d, 0xeb, 0x0d, 0x73, 0xe4, 0x36, 0xcc, 0x11, 0xc3, 0x1c, 0x31,
+  0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x78, 0x6a, 0x18, 0xe8, 0x60,
+  0x90, 0x7f, 0x67, 0x18, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c,
+  0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02,
+  0x80, 0x20, 0x18, 0x6c, 0x71, 0x18, 0x84, 0x61, 0x90, 0x10, 0xc1, 0x88,
+  0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x72, 0x18, 0x88, 0x61, 0x90, 0x10,
+  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x73, 0x18, 0x8c, 0x61,
+  0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x7b, 0x18,
+  0x88, 0x61, 0x30, 0x82, 0x41, 0xe0, 0x86, 0xc1, 0x0c, 0x06, 0x70, 0x18,
+  0x8c, 0x26, 0x04, 0xc0, 0x05, 0x8f, 0xcd, 0x12, 0x80, 0xc6, 0x40, 0xcb,
+  0x63, 0x1a, 0x6b, 0x81, 0x86, 0x82, 0x5a, 0xb0, 0x44, 0x5b, 0x08, 0x7f,
+  0x81, 0x86, 0x82, 0x5b, 0x98, 0x7e, 0xd0, 0x60, 0x00, 0x9f, 0x59, 0x86,
+  0xd0, 0x18, 0x0d, 0xfb, 0x18, 0x8e, 0x08, 0x6c, 0x30, 0x18, 0xbe, 0x13,
+  0x86, 0x19, 0x6e, 0x08, 0x42, 0x30, 0x20, 0x83, 0x1a, 0x02, 0x1d, 0x8e,
+  0xe0, 0x0f, 0x1d, 0x0c, 0x86, 0xaf, 0x02, 0x41, 0xcf, 0x3f, 0x86, 0x19,
+  0x6e, 0x08, 0x48, 0x30, 0x20, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0x10, 0x8d,
+  0xdb, 0x08, 0x0e, 0xfd, 0x86, 0xb9, 0x7f, 0x1b, 0x66, 0xc4, 0xe0, 0x00,
+  0x40, 0x10, 0x0c, 0xbc, 0x51, 0x0c, 0xe6, 0x30, 0x90, 0xc1, 0x00, 0x14,
+  0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18,
+  0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
+  0x4d, 0x15, 0x03, 0x3d, 0x0c, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
+  0x04, 0x83, 0x6d, 0x15, 0x83, 0x3d, 0x0c, 0x18, 0x22, 0x18, 0x31, 0x40,
+  0x00, 0x10, 0x04, 0x83, 0x8d, 0x15, 0x03, 0x3e, 0x0c, 0x24, 0x22, 0x18,
+  0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x85, 0x16, 0x83, 0x3d, 0x0c, 0x78,
+  0x30, 0x08, 0x4e, 0x31, 0x60, 0xc3, 0x20, 0x15, 0x83, 0xd1, 0x84, 0x00,
+  0xb8, 0xe0, 0xb1, 0x59, 0x82, 0xdb, 0x18, 0x6e, 0x70, 0x91, 0x56, 0x0c,
+  0xc0, 0x60, 0x96, 0x81, 0x34, 0x4a, 0x23, 0xa8, 0x1b, 0x0c, 0xfc, 0x30,
+  0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xca, 0x16,
+  0x83, 0x3f, 0x0c, 0x36, 0x38, 0x0c, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
+  0x80, 0xba, 0xc5, 0xe0, 0x0f, 0x83, 0x40, 0xb8, 0x60, 0x98, 0xd2, 0xc1,
+  0x60, 0x14, 0x03, 0xb8, 0xe0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30,
+  0xa0, 0x76, 0x31, 0x20, 0xc5, 0xe0, 0x46, 0xea, 0x30, 0x18, 0x31, 0x38,
+  0x00, 0x10, 0x04, 0x03, 0x8a, 0x17, 0x03, 0x52, 0x0c, 0x02, 0xe1, 0x82,
+  0x61, 0x2e, 0x78, 0xea, 0x8e, 0xa7, 0x6e, 0x04, 0x83, 0x61, 0x4e, 0xe7,
+  0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04,
+  0x03, 0x0f, 0x1c, 0x03, 0x58, 0x0c, 0xde, 0x30, 0xe8, 0xc5, 0x60, 0x34,
   0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88,
-  0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x53, 0x33,
-  0x1d, 0x4b, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0x35,
-  0xdb, 0xb1, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x63,
-  0x33, 0x1e, 0x4b, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x16,
-  0x3a, 0xdb, 0x31, 0xfe, 0x0a, 0xce, 0x8c, 0xc5, 0xd2, 0x6c, 0x34, 0x21,
-  0x00, 0x2e, 0x78, 0x6c, 0x96, 0x00, 0x26, 0x06, 0x5a, 0x1e, 0xd3, 0xd8,
-  0x07, 0x1d, 0x0e, 0xf4, 0x81, 0x25, 0xfa, 0x41, 0x78, 0x09, 0x1d, 0x0e,
-  0xfc, 0x61, 0xc4, 0xc0, 0x00, 0x40, 0x10, 0x0c, 0x20, 0x3b, 0xbb, 0xb1,
-  0x77, 0x32, 0xfb, 0x80, 0x97, 0xf8, 0x98, 0x10, 0xc8, 0xc7, 0x02, 0x79,
-  0x81, 0x8f, 0x15, 0xff, 0x10, 0x1f, 0x2b, 0x02, 0xf9, 0x58, 0x10, 0x12,
-  0xf0, 0x19, 0x31, 0x30, 0x00, 0x10, 0x04, 0x03, 0xa8, 0xcf, 0x7c, 0xac,
-  0x9e, 0x4c, 0x28, 0xe2, 0x63, 0x81, 0x20, 0x1f, 0x0b, 0x0e, 0xf8, 0x5c,
-  0x60, 0xd0, 0x88, 0x81, 0x03, 0x80, 0x20, 0x18, 0x34, 0xa3, 0x26, 0x66,
-  0x2e, 0x76, 0x62, 0x77, 0x16, 0xf4, 0x58, 0x8f, 0xf5, 0x18, 0x8f, 0xe5,
-  0xd9, 0x2c, 0xc1, 0x08, 0x0d, 0x37, 0xf8, 0x55, 0x9e, 0x81, 0xc1, 0x2c,
-  0x83, 0x4c, 0x8c, 0x50, 0x30, 0x62, 0x60, 0x00, 0x20, 0x08, 0x06, 0xd0,
-  0xa8, 0x95, 0x19, 0x3f, 0x59, 0x70, 0x63, 0xf0, 0x19, 0x31, 0x30, 0x00,
-  0x10, 0x04, 0x03, 0xa8, 0xd4, 0xce, 0xac, 0x9f, 0x2c, 0xc8, 0x31, 0xf8,
-  0x8c, 0x26, 0xb8, 0xd8, 0x30, 0xdc, 0x10, 0xf4, 0x19, 0x18, 0xcc, 0x32,
-  0xcc, 0x44, 0x4d, 0x04, 0xc3, 0x11, 0x45, 0x98, 0x0d, 0xdf, 0x19, 0xc3,
-  0x0c, 0x37, 0x04, 0x2c, 0x46, 0x06, 0x35, 0x04, 0x3a, 0x1c, 0x71, 0x94,
-  0xd9, 0xf0, 0x55, 0x20, 0xe8, 0x25, 0xc3, 0x0c, 0x37, 0x04, 0x2f, 0x46,
-  0x06, 0x15, 0x0c, 0x3a, 0xcb, 0x40, 0x13, 0x69, 0x11, 0xdc, 0x7c, 0x0d,
-  0x73, 0x2a, 0x35, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x78, 0xae,
-  0xe6, 0x67, 0x3d, 0xb6, 0x6a, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04,
-  0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0x5b, 0xad, 0x95, 0xda, 0x41, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0xb0, 0xd9, 0x9a, 0xa9, 0x31, 0x44, 0x30, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0xdb, 0xad, 0x9d, 0x9a, 0x44, 0x04, 0x23,
-  0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xfc, 0x9a, 0xa9, 0x9d, 0x59, 0x20,
-  0x6b, 0x77, 0x46, 0x6b, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3, 0x04,
-  0x69, 0x31, 0xdc, 0x90, 0xe1, 0x1a, 0x18, 0xcc, 0x32, 0xd8, 0xc4, 0x4d,
-  0x04, 0x25, 0x66, 0xa9, 0x06, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0x54, 0xb8, 0xa9, 0xda, 0xb7, 0x67, 0x23, 0x06, 0x07, 0x00,
-  0x82, 0x60, 0x40, 0x89, 0x9b, 0xaa, 0x05, 0xc2, 0x05, 0xc3, 0x54, 0x99,
-  0xb9, 0x1a, 0x5c, 0xf0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50,
-  0xe6, 0xf6, 0x6a, 0x62, 0x00, 0x6a, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0x40, 0x9d, 0xdb, 0xab, 0x05, 0xc2, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x1d,
-  0x4f, 0x9d, 0x8b, 0x0d, 0x73, 0x65, 0x35, 0xcc, 0x11, 0xc3, 0x1c, 0x31,
-  0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x78, 0xeb, 0xb6, 0x6b, 0x7a,
-  0x86, 0x6e, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83,
-  0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x9b, 0xbc, 0x89, 0x5b, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0xb0, 0xcd, 0xdb, 0xb8, 0x25, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x1b, 0xbd, 0x91, 0x5b, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00,
-  0x82, 0x60, 0xb0, 0xf0, 0xdb, 0xb8, 0x91, 0x5a, 0xf0, 0x6e, 0xb4, 0x16,
-  0x6f, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3, 0x04, 0x69, 0x31, 0xdc,
-  0x60, 0x07, 0xf4, 0x06, 0x06, 0xb3, 0x0c, 0x38, 0x91, 0x16, 0x81, 0xd9,
-  0x19, 0x9e, 0xc5, 0x67, 0x38, 0x62, 0x0f, 0xf2, 0x8c, 0xf8, 0x66, 0x19,
-  0x72, 0x82, 0x27, 0x02, 0xd3, 0x33, 0x3e, 0x88, 0x8f, 0x05, 0x03, 0x7d,
-  0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f,
-  0x22, 0xfc, 0x4d, 0x87, 0x1b, 0x02, 0x7e, 0x03, 0x83, 0x59, 0x06, 0x9d,
-  0xd8, 0x89, 0xc0, 0x06, 0x51, 0x83, 0xcf, 0x2c, 0x01, 0x58, 0x58, 0xa8,
-  0x11, 0xf1, 0x99, 0x25, 0x00, 0x8b, 0xe1, 0x08, 0x53, 0x10, 0x35, 0xe1,
-  0x9b, 0x65, 0xe8, 0x09, 0xb0, 0x08, 0xec, 0x14, 0x46, 0x2d, 0x3e, 0x16,
-  0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15,
-  0x41, 0x7c, 0x8a, 0x48, 0x39, 0x1d, 0x6e, 0x08, 0x4e, 0x0e, 0x0c, 0x66,
-  0x19, 0x7c, 0xe2, 0x27, 0x02, 0x5b, 0xb5, 0x21, 0x3e, 0xb3, 0x04, 0x60,
-  0x61, 0x84, 0xab, 0xc1, 0x67, 0x96, 0x00, 0x2c, 0x06, 0x5a, 0x1e, 0x4d,
-  0x27, 0xb0, 0x9d, 0x20, 0x7c, 0x42, 0xf8, 0x09, 0xb1, 0xe0, 0x89, 0x0b,
-  0x86, 0xb1, 0x56, 0x8b, 0xb5, 0xf8, 0x0c, 0x47, 0xc8, 0x82, 0xac, 0x11,
-  0xdf, 0x2c, 0x43, 0x58, 0x90, 0x45, 0x60, 0xb3, 0x36, 0x0b, 0xf1, 0xb1,
-  0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac,
-  0x08, 0xe2, 0x53, 0xc4, 0xcd, 0xe9, 0x70, 0x43, 0x50, 0x73, 0x60, 0x30,
-  0xcb, 0x20, 0x16, 0x63, 0x11, 0xd8, 0xb0, 0x6b, 0xf0, 0x99, 0x25, 0x40,
-  0x0b, 0xc3, 0x35, 0x22, 0x3e, 0xb3, 0x04, 0x68, 0x31, 0x1c, 0xd1, 0x0b,
-  0xb9, 0x26, 0x7c, 0xb3, 0x0c, 0x65, 0x81, 0x16, 0x81, 0xf9, 0x82, 0xae,
-  0xc5, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x44,
-  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x62, 0xa7, 0xc3, 0x0d, 0x01, 0xd8,
-  0x81, 0xc1, 0x2c, 0x83, 0x59, 0x9c, 0x45, 0x60, 0xe2, 0x36, 0xc4, 0x67,
-  0x96, 0x00, 0x2d, 0x8c, 0x38, 0x37, 0xf8, 0xcc, 0x12, 0xa0, 0xc5, 0x40,
-  0xcb, 0xa3, 0x89, 0x05, 0x36, 0x16, 0x84, 0x59, 0x08, 0x67, 0x01, 0x1b,
-  0x64, 0x71, 0xc1, 0x30, 0x17, 0x3c, 0x75, 0xdb, 0x53, 0x37, 0x6b, 0xc3,
-  0x9c, 0x7a, 0x0d, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00,
-  0x20, 0x08, 0x06, 0x1e, 0xdc, 0x81, 0xdd, 0xbf, 0xb5, 0xdd, 0x68, 0x42,
-  0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43,
-  0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x76, 0x77, 0x67,
-  0x97, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x78, 0x87,
-  0x76, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x96, 0x77,
-  0x69, 0x97, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0xa1,
-  0x87, 0x76, 0x29, 0x17, 0xd0, 0x5d, 0xce, 0xd9, 0xdd, 0x68, 0x42, 0x00,
-  0x5c, 0xf0, 0xd8, 0x2c, 0x41, 0x5a, 0x0c, 0xb4, 0x3c, 0xa6, 0x41, 0x13,
-  0x74, 0x1e, 0xcc, 0x04, 0x4b, 0xd8, 0x84, 0x80, 0x16, 0x74, 0x1e, 0xdc,
-  0xc4, 0x2c, 0x83, 0x5a, 0xb0, 0xc5, 0x3e, 0x0c, 0x47, 0x80, 0xc4, 0xce,
-  0x0d, 0xdf, 0x85, 0xc4, 0x30, 0xc3, 0x0d, 0x81, 0xc9, 0x91, 0x41, 0x0d,
-  0x81, 0x0e, 0x47, 0x84, 0xc4, 0xcf, 0x0d, 0x5f, 0x05, 0x82, 0xde, 0x48,
-  0x0c, 0x33, 0xdc, 0x10, 0xa4, 0x1c, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0xc3,
-  0x5a, 0x80, 0x46, 0x70, 0xed, 0x36, 0xcc, 0x91, 0xd8, 0x30, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0xe0, 0xa1, 0x1e, 0xde, 0xdd, 0x5c, 0xe9, 0x8d,
+  0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x3b, 0xc7,
+  0xe0, 0x16, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60,
+  0x43, 0xc7, 0x00, 0x17, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04,
+  0xc1, 0x60, 0x4b, 0xc7, 0x20, 0x17, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x14,
+  0x00, 0x04, 0xc1, 0x60, 0x89, 0xc7, 0x00, 0x17, 0x83, 0x3c, 0x0c, 0x02,
+  0x72, 0x0c, 0x52, 0x31, 0x30, 0xc7, 0x60, 0x34, 0x21, 0x00, 0x2e, 0x78,
+  0x6c, 0x96, 0xe0, 0x36, 0x86, 0x1b, 0xd6, 0x24, 0x1d, 0x03, 0x30, 0x98,
+  0x65, 0x30, 0x8d, 0xdb, 0x08, 0x6c, 0x0d, 0x83, 0x36, 0x0c, 0xe2, 0x33,
+  0x1c, 0x71, 0x07, 0x6e, 0x18, 0x10, 0xdf, 0x2c, 0xc3, 0x69, 0xa8, 0x46,
+  0x60, 0x6f, 0x18, 0xe0, 0x41, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30,
+  0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x31, 0x8f,
+  0x81, 0x0e, 0x37, 0x04, 0xf1, 0x18, 0x80, 0xc1, 0x2c, 0x03, 0x6a, 0xa4,
+  0x46, 0x60, 0xc3, 0x1d, 0x06, 0xf0, 0x99, 0x25, 0x70, 0x0d, 0xb3, 0xc3,
+  0x80, 0x88, 0xcf, 0x2c, 0x81, 0x6b, 0x0c, 0x47, 0x88, 0xc2, 0x1d, 0x06,
+  0xc2, 0x37, 0xcb, 0xb0, 0x1a, 0xae, 0x11, 0xd8, 0x28, 0xe0, 0x61, 0x10,
+  0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9,
+  0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xf8, 0x63, 0xa0, 0xc3, 0x0d, 0x01, 0x3f,
+  0x06, 0x60, 0x30, 0xcb, 0xc0, 0x1a, 0xad, 0x11, 0x18, 0x28, 0x06, 0x43,
+  0x7c, 0x66, 0x09, 0x5c, 0xc3, 0x88, 0x51, 0x0c, 0xe0, 0x33, 0x4b, 0xe0,
+  0x1a, 0x03, 0x2d, 0x8f, 0x86, 0x1a, 0x58, 0x6a, 0x10, 0xac, 0x21, 0xb4,
+  0x86, 0x4e, 0xa8, 0xc6, 0x05, 0xc3, 0x98, 0x28, 0x06, 0xa6, 0x18, 0xc4,
+  0x67, 0x38, 0xe2, 0x54, 0x4e, 0x31, 0x20, 0xbe, 0x59, 0x86, 0xd7, 0x90,
+  0x8d, 0xc0, 0x50, 0x31, 0x40, 0x95, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82,
+  0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x82,
+  0x25, 0x03, 0x1d, 0x6e, 0x08, 0x54, 0x32, 0x00, 0x83, 0x59, 0x06, 0xd8,
+  0x88, 0x8d, 0xc0, 0x06, 0x58, 0x0c, 0xe0, 0x33, 0x4b, 0x60, 0x1b, 0xd6,
+  0x8a, 0x01, 0x11, 0x9f, 0x59, 0x02, 0xdb, 0x18, 0x8e, 0x90, 0x15, 0x57,
+  0x0c, 0x84, 0x6f, 0x96, 0x61, 0x36, 0x6c, 0x23, 0xb0, 0x59, 0x79, 0xc5,
+  0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20,
+  0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xb8, 0xc9, 0x40, 0x87, 0x1b, 0x82,
+  0x9a, 0x0c, 0xc0, 0x60, 0x96, 0x81, 0x36, 0x6a, 0x23, 0xb0, 0x5b, 0x0c,
+  0x86, 0xf8, 0xcc, 0x12, 0xd8, 0x86, 0x11, 0xbc, 0x18, 0xc0, 0x67, 0x96,
+  0xc0, 0x36, 0x06, 0x5a, 0x1e, 0x0d, 0x36, 0xb0, 0xd8, 0x20, 0x68, 0x43,
+  0xa8, 0x0d, 0xbd, 0x92, 0x8d, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e,
+  0x3a, 0x54, 0x0c, 0x86, 0xb9, 0xdf, 0x1b, 0xe6, 0x88, 0x61, 0x8e, 0x18,
+  0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc, 0xb2, 0x0c, 0x6a, 0x32,
+  0xa0, 0xc7, 0x40, 0x2c, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82,
+  0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40,
+  0x00, 0x10, 0x04, 0x83, 0x8d, 0x2d, 0x03, 0x9e, 0x0c, 0x12, 0x22, 0x18,
+  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xad, 0x2d, 0x83, 0x9e, 0x0c, 0x12,
+  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xcd, 0x2d, 0x03, 0x9f,
+  0x0c, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0xc5, 0x2e,
+  0x83, 0x9e, 0x0c, 0xfc, 0x31, 0x08, 0xd2, 0x32, 0x70, 0xc9, 0x60, 0x2d,
+  0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x82, 0xdb, 0x18, 0x68,
+  0x79, 0x4c, 0x43, 0x34, 0x7c, 0x54, 0x08, 0x0d, 0x96, 0x20, 0x0d, 0xc1,
+  0x36, 0x7c, 0x54, 0x28, 0x0d, 0xab, 0x97, 0x92, 0x0c, 0xe0, 0x33, 0xcb,
+  0x80, 0x1b, 0xba, 0x11, 0x2f, 0xc3, 0x11, 0x41, 0x4c, 0x06, 0xc3, 0x77,
+  0xc2, 0x30, 0xc3, 0x0d, 0x01, 0x3f, 0x06, 0x64, 0x50, 0x43, 0xa0, 0xc3,
+  0x11, 0xf7, 0x52, 0x93, 0xc1, 0xf0, 0x55, 0x20, 0xe8, 0xe5, 0xcb, 0x30,
+  0xc3, 0x0d, 0xc1, 0x3f, 0x06, 0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c, 0xb9,
+  0xe1, 0x1e, 0xc1, 0x8d, 0x63, 0x30, 0xcc, 0xe9, 0xdf, 0x30, 0x23, 0x06,
+  0x07, 0x00, 0x82, 0x60, 0xe0, 0xf9, 0x65, 0xe0, 0x96, 0x41, 0x4b, 0x06,
+  0x7b, 0x19, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c,
+  0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20,
+  0x18, 0x6c, 0xa5, 0x19, 0xd4, 0x65, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02,
+  0x80, 0x20, 0x18, 0x6c, 0xa6, 0x19, 0xd8, 0x65, 0xc0, 0x10, 0xc1, 0x88,
+  0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xa7, 0x19, 0xdc, 0x65, 0x20, 0x11,
+  0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0xaf, 0x19, 0xd8, 0x65,
+  0x70, 0x93, 0x41, 0x20, 0x9a, 0xc1, 0x59, 0x06, 0xa4, 0x19, 0x8c, 0x26,
+  0x04, 0xc0, 0x05, 0x8f, 0xcd, 0x12, 0xb8, 0xc7, 0x70, 0x43, 0xca, 0xa0,
+  0x66, 0x00, 0x06, 0xb3, 0x0c, 0xbb, 0xc1, 0x1b, 0x41, 0xc9, 0x64, 0x90,
+  0x97, 0x01, 0x5c, 0xf0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50,
+  0xb1, 0x19, 0xe8, 0x65, 0xb0, 0xad, 0x65, 0x30, 0x62, 0x70, 0x00, 0x20,
+  0x08, 0x06, 0x94, 0x6c, 0x06, 0x7a, 0x19, 0x04, 0xc2, 0x05, 0xc3, 0x54,
+  0x4d, 0x06, 0x7e, 0x19, 0xc0, 0x05, 0x4f, 0x8d, 0x18, 0x1c, 0x00, 0x08,
+  0x82, 0x01, 0x65, 0x9b, 0xc1, 0x5f, 0x06, 0x32, 0x03, 0x97, 0xc1, 0x88,
+  0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xb7, 0x19, 0xfc, 0x65, 0x10, 0x08,
+  0x17, 0x0c, 0x73, 0xc1, 0x53, 0x77, 0x3c, 0x75, 0xfe, 0x18, 0x0c, 0x73,
+  0x35, 0x18, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00,
+  0x20, 0x08, 0x06, 0xde, 0x6e, 0x06, 0xab, 0x19, 0xa8, 0x65, 0x80, 0x9b,
+  0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c,
+  0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1,
+  0x26, 0x9e, 0x81, 0x6c, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
+  0x82, 0xc1, 0x36, 0x9e, 0xc1, 0x6c, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20,
+  0x00, 0x08, 0x82, 0xc1, 0x46, 0x9e, 0x01, 0x6d, 0x06, 0x09, 0x11, 0x8c,
+  0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0xc2, 0x9e, 0xc1, 0x6c, 0x06, 0x74,
+  0x19, 0x04, 0xbf, 0x19, 0x90, 0x66, 0x10, 0x9e, 0xc1, 0x68, 0x42, 0x00,
+  0x5c, 0xf0, 0xd8, 0x2c, 0x81, 0x7b, 0x0c, 0x37, 0x98, 0x0d, 0x79, 0x06,
+  0x60, 0x30, 0xcb, 0xd0, 0x1b, 0xee, 0x11, 0x98, 0x59, 0x06, 0x68, 0x19,
+  0xc4, 0x67, 0x38, 0xe2, 0x0e, 0xd2, 0x32, 0x20, 0xbe, 0x59, 0x06, 0xdf,
+  0x08, 0x8f, 0xc0, 0xd4, 0x32, 0xc0, 0x83, 0xf8, 0x58, 0x30, 0xd0, 0xe7,
+  0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29,
+  0xc2, 0x3d, 0x03, 0x1d, 0x6e, 0x08, 0xd8, 0x33, 0x00, 0x83, 0x59, 0x86,
+  0xdf, 0x00, 0x8f, 0xc0, 0x06, 0xb9, 0x0c, 0xe0, 0x33, 0x4b, 0x50, 0x1e,
+  0x16, 0x97, 0x01, 0x11, 0x9f, 0x59, 0x82, 0xf2, 0x18, 0x8e, 0x10, 0x05,
+  0xb9, 0x0c, 0x84, 0x6f, 0x96, 0x41, 0x3c, 0xca, 0x23, 0xb0, 0x51, 0x98,
+  0xcb, 0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2,
+  0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xc8, 0xcf, 0x40, 0x87, 0x1b,
+  0x82, 0xfb, 0x0c, 0xc0, 0x60, 0x96, 0x61, 0x3c, 0xc8, 0x23, 0xb0, 0xbd,
+  0x0c, 0x86, 0xf8, 0xcc, 0x12, 0x94, 0x87, 0x11, 0x7e, 0x19, 0xc0, 0x67,
+  0x96, 0xa0, 0x3c, 0x06, 0x5a, 0x1e, 0xed, 0x37, 0x30, 0xf0, 0x20, 0xc6,
+  0x43, 0x20, 0x0f, 0x9d, 0x08, 0x8f, 0x0b, 0x86, 0xb1, 0xbe, 0x0c, 0x42,
+  0x33, 0x88, 0xcf, 0x70, 0x84, 0xe8, 0x88, 0x66, 0x40, 0x7c, 0xb3, 0x0c,
+  0xe6, 0x91, 0x1e, 0x81, 0x8d, 0x66, 0x30, 0x3a, 0xf1, 0xb1, 0x60, 0xa0,
+  0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2,
+  0x53, 0xc4, 0x89, 0x06, 0x3a, 0xdc, 0x10, 0x94, 0x68, 0x00, 0x06, 0xb3,
+  0x0c, 0xe7, 0x81, 0x1e, 0x81, 0x0d, 0xab, 0x19, 0xc0, 0x67, 0x96, 0xa0,
+  0x3d, 0x0c, 0x35, 0x03, 0x22, 0x3e, 0xb3, 0x04, 0xed, 0x31, 0x1c, 0xd1,
+  0x3a, 0xa9, 0x19, 0x08, 0xdf, 0x2c, 0x83, 0x7a, 0xb4, 0x47, 0x60, 0xae,
+  0xa3, 0x9a, 0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c,
+  0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x21, 0xa3, 0x81, 0x0e,
+  0x37, 0x04, 0x30, 0x1a, 0x80, 0xc1, 0x2c, 0xc3, 0x7a, 0xb0, 0x47, 0x60,
+  0xb2, 0x19, 0x0c, 0xf1, 0x99, 0x25, 0x68, 0x0f, 0x23, 0x6e, 0x33, 0x80,
+  0xcf, 0x2c, 0x41, 0x7b, 0x0c, 0xb4, 0x3c, 0xda, 0x79, 0x60, 0xe8, 0x41,
+  0xac, 0x87, 0xc0, 0x1e, 0x70, 0x97, 0x1e, 0x17, 0x0c, 0x73, 0xc1, 0x53,
+  0xb7, 0x3d, 0x75, 0xa3, 0x19, 0x0c, 0x73, 0xba, 0x18, 0x0c, 0x73, 0xc4,
+  0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x1e, 0x98,
+  0x06, 0x30, 0x1a, 0xbc, 0x67, 0xd0, 0xa3, 0xc1, 0x68, 0x42, 0x00, 0x8c,
+  0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89,
+  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x76, 0xa6, 0xc1, 0x8d, 0x06,
+  0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x86, 0xa6, 0x01,
+  0x8e, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x96,
+  0xa6, 0x41, 0x8e, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82,
+  0xc1, 0x12, 0xa7, 0x01, 0x8e, 0x06, 0xf9, 0x19, 0x04, 0x64, 0x1a, 0xa4,
+  0x68, 0x60, 0xa6, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd8, 0x2c, 0x81,
+  0x7b, 0x0c, 0xb4, 0x3c, 0xa6, 0x91, 0x1b, 0x34, 0x2c, 0xe0, 0x06, 0x4b,
+  0xec, 0x86, 0xd0, 0x1e, 0x34, 0x2c, 0xf0, 0xc6, 0x2c, 0xc3, 0x7b, 0xc4,
+  0xc7, 0xfa, 0x0c, 0x47, 0xbc, 0xcf, 0x8a, 0x06, 0xc3, 0x77, 0xf0, 0x33,
+  0xcc, 0x70, 0x43, 0x60, 0x9f, 0x01, 0x19, 0xd4, 0x10, 0xe8, 0x70, 0x84,
+  0xfc, 0xbc, 0x68, 0x30, 0x7c, 0x15, 0x08, 0x7a, 0xf4, 0x33, 0xcc, 0x70,
+  0x43, 0x90, 0x9f, 0x01, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0x03, 0x7c, 0x94,
+  0x48, 0x70, 0xbd, 0x19, 0x0c, 0x73, 0xf4, 0x18, 0x0c, 0x33, 0x62, 0x70,
+  0x00, 0x20, 0x08, 0x06, 0x1e, 0x9e, 0x06, 0x68, 0x1a, 0x9c, 0x68, 0x50,
+  0xa7, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20,
+  0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
+  0xc1, 0xf6, 0xa7, 0xc1, 0x9b, 0x06, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00,
+  0x08, 0x82, 0xc1, 0x06, 0xaa, 0x01, 0x9c, 0x06, 0x0c, 0x11, 0x8c, 0x18,
+  0x20, 0x00, 0x08, 0x82, 0xc1, 0x16, 0xaa, 0x41, 0x9c, 0x06, 0x12, 0x11,
+  0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x92, 0xaa, 0x01, 0x9c, 0x06,
+  0x31, 0x1a, 0x04, 0x7c, 0x1a, 0x84, 0x69, 0xe0, 0xa7, 0xc1, 0x68, 0x42,
+  0x00, 0x5c, 0xf0, 0xd8, 0x2c, 0x41, 0x89, 0x0c, 0x37, 0x8c, 0x90, 0xa8,
+  0x06, 0x60, 0x30, 0xcb, 0x20, 0x1f, 0xf3, 0x11, 0x14, 0x8b, 0x06, 0x73,
+  0x1a, 0xc0, 0x05, 0x4f, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0xb5,
+  0xaa, 0x01, 0x9d, 0x06, 0x29, 0x54, 0xa6, 0xc1, 0x88, 0xc1, 0x01, 0x80,
+  0x20, 0x18, 0x50, 0xac, 0x1a, 0xd0, 0x69, 0x10, 0x08, 0x17, 0x0c, 0x53,
+  0x2f, 0x1a, 0xe0, 0x69, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20,
+  0x08, 0x06, 0x14, 0xac, 0x06, 0x79, 0x1a, 0xb4, 0x90, 0x9a, 0x06, 0x23,
+  0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xc5, 0x6a, 0x90, 0xa7, 0x41, 0x20,
+  0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf1, 0xd4, 0xe1, 0x67, 0x30, 0xcc,
+  0xbd, 0x64, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01,
+  0x80, 0x20, 0x18, 0x78, 0xb5, 0x1a, 0x94, 0x6a, 0x40, 0xa6, 0x81, 0xac,
+  0x06, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30,
+  0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
+  0x1b, 0xaf, 0x06, 0xac, 0x1a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20,
+  0x08, 0x06, 0x5b, 0xaf, 0x06, 0xad, 0x1a, 0x24, 0x44, 0x30, 0x62, 0x80,
+  0x00, 0x20, 0x08, 0x06, 0x9b, 0xaf, 0x06, 0xae, 0x1a, 0x24, 0x44, 0x30,
+  0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x8b, 0xb9, 0x06, 0xad, 0x1a, 0xb8,
+  0x69, 0x10, 0xe4, 0x6a, 0xe0, 0xa7, 0xc1, 0xae, 0x06, 0xa3, 0x09, 0x01,
+  0x70, 0xc1, 0x63, 0xb3, 0x04, 0x25, 0x32, 0xdc, 0x00, 0x46, 0xbe, 0x1a,
+  0x80, 0xc1, 0x2c, 0x03, 0x7d, 0x94, 0x48, 0x60, 0x60, 0x1a, 0x88, 0x69,
+  0x10, 0x9f, 0xe1, 0x88, 0x32, 0x1a, 0xd3, 0x80, 0xf8, 0x66, 0x19, 0xea,
+  0x03, 0x3f, 0x02, 0x23, 0xd3, 0xc0, 0x8c, 0xe2, 0x63, 0xc1, 0x40, 0x9f,
+  0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7,
+  0x08, 0x74, 0x0d, 0x74, 0xb8, 0x21, 0x30, 0xd7, 0x00, 0x0c, 0x66, 0x19,
+  0xec, 0xe3, 0x3e, 0x02, 0x1b, 0xd8, 0x34, 0x80, 0xcf, 0x2c, 0x01, 0x7f,
+  0xd8, 0x9a, 0x06, 0x44, 0x7c, 0x66, 0x09, 0xf8, 0x63, 0x38, 0x02, 0x8e,
+  0xd8, 0x34, 0x10, 0xbe, 0x59, 0x86, 0xfc, 0xe0, 0x8f, 0xc0, 0xe2, 0xa8,
+  0x4d, 0x83, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca,
+  0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x62, 0x5e, 0x03, 0x1d, 0x6e,
+  0x08, 0xe2, 0x35, 0x00, 0x83, 0x59, 0x06, 0xfd, 0xd8, 0x8f, 0xc0, 0xea,
+  0x34, 0x18, 0xe2, 0x33, 0x4b, 0xc0, 0x1f, 0x46, 0xe0, 0x69, 0x00, 0x9f,
+  0x59, 0x02, 0xfe, 0x18, 0x68, 0x79, 0x34, 0xfb, 0xc0, 0xee, 0x83, 0xd0,
+  0x0f, 0x61, 0x3f, 0xec, 0x31, 0xc0, 0x8f, 0x0b, 0x86, 0xb1, 0x3b, 0x0d,
+  0xf6, 0x34, 0x88, 0xcf, 0x70, 0x44, 0x1f, 0xf1, 0x69, 0x40, 0x7c, 0xb3,
+  0x0c, 0xfd, 0x01, 0x22, 0x81, 0xf5, 0x69, 0xe0, 0x47, 0xf1, 0xb1, 0x60,
+  0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08,
+  0xe2, 0x53, 0x44, 0xc8, 0x06, 0x3a, 0xdc, 0x10, 0xfc, 0x6b, 0x00, 0x06,
+  0xb3, 0x0c, 0xfe, 0xf1, 0x1f, 0x81, 0x0d, 0xa5, 0x1a, 0xc0, 0x67, 0x96,
+  0x80, 0x44, 0x4c, 0x54, 0x03, 0x22, 0x3e, 0xb3, 0x04, 0x24, 0x32, 0x1c,
+  0x81, 0x4a, 0xa3, 0x1a, 0x08, 0xdf, 0x2c, 0x43, 0x88, 0x90, 0x48, 0x60,
+  0xa9, 0x44, 0xaa, 0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17,
+  0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xc1, 0xb2, 0x81,
+  0x0e, 0x37, 0x04, 0x2a, 0x1b, 0x80, 0xc1, 0x2c, 0x83, 0x88, 0x8c, 0x48,
+  0x60, 0xac, 0x1a, 0x0c, 0xf1, 0x99, 0x25, 0x20, 0x11, 0x23, 0x62, 0x35,
+  0x80, 0xcf, 0x2c, 0x01, 0x89, 0x0c, 0xb4, 0x3c, 0x9a, 0x7f, 0x60, 0xff,
+  0x41, 0x88, 0x88, 0x30, 0x22, 0x66, 0x19, 0x80, 0xc8, 0x05, 0xc3, 0x5c,
+  0xf0, 0xd4, 0x6d, 0x4f, 0x5d, 0x9f, 0x06, 0xc3, 0x1c, 0x6d, 0x06, 0xc3,
+  0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81,
+  0xa7, 0xb3, 0x81, 0xca, 0x06, 0xe9, 0x1a, 0xdc, 0x6c, 0x30, 0x9a, 0x10,
+  0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50,
+  0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0x85, 0x6d, 0x10,
+  0xb3, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0x89,
+  0x6d, 0x20, 0xb3, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
+  0xb0, 0x8d, 0x6d, 0x30, 0xb3, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00,
+  0x82, 0x60, 0xb0, 0xac, 0x6d, 0x20, 0xb3, 0xc1, 0xbc, 0x06, 0x81, 0xcf,
+  0x06, 0x23, 0x1b, 0x80, 0x6d, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x36,
+  0x4b, 0x50, 0x22, 0x03, 0x2d, 0x8f, 0x69, 0xc0, 0x07, 0x8a, 0x0b, 0xef,
+  0xc1, 0x12, 0xf2, 0x21, 0x90, 0x08, 0x8a, 0x0b, 0xf3, 0x31, 0xcb, 0x60,
+  0x22, 0x28, 0x52, 0x4e, 0xc3, 0x11, 0xea, 0x54, 0xb2, 0xc1, 0xf0, 0xdd,
+  0x3a, 0x0d, 0x33, 0xdc, 0x10, 0xc0, 0x6b, 0x40, 0x06, 0x35, 0x04, 0x3a,
+  0x1c, 0xc1, 0x4e, 0x29, 0x1b, 0x0c, 0x5f, 0x05, 0x82, 0x9e, 0x3b, 0x0d,
+  0x33, 0xdc, 0x10, 0xcc, 0x6b, 0x40, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0x70,
+  0x22, 0x3c, 0x12, 0xdc, 0xad, 0x06, 0xc3, 0x9c, 0x7b, 0x06, 0xc3, 0x8c,
+  0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x27, 0xb7, 0x81, 0xd8, 0x06, 0x21,
+  0x1b, 0xbc, 0x6d, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a,
+  0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06, 0x08, 0x00,
+  0x82, 0x60, 0xb0, 0xe5, 0x6d, 0x90, 0xb6, 0xc1, 0x41, 0x04, 0x23, 0x06,
+  0x08, 0x00, 0x82, 0x60, 0xb0, 0xe9, 0x6d, 0xa0, 0xb6, 0x01, 0x43, 0x04,
+  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xed, 0x6d, 0xb0, 0xb6, 0x81,
+  0x44, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0x8c, 0x6e, 0xa0,
+  0xb6, 0xc1, 0xca, 0x06, 0x81, 0xdd, 0x06, 0x3b, 0x1b, 0xe0, 0x6d, 0x30,
+  0x9a, 0x10, 0x00, 0x17, 0x3c, 0x36, 0x4b, 0xc0, 0x23, 0xc3, 0x0d, 0xfd,
+  0xc4, 0xb7, 0x01, 0x18, 0xcc, 0x32, 0xa4, 0x88, 0x8a, 0x04, 0x65, 0xb2,
+  0x41, 0xdb, 0x06, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
+  0x40, 0x95, 0x6e, 0xe0, 0xb6, 0x01, 0x49, 0xfd, 0x6c, 0x30, 0x62, 0x70,
+  0x00, 0x20, 0x08, 0x06, 0x94, 0xe9, 0x06, 0x6e, 0x1b, 0x04, 0xc2, 0x05,
+  0xc3, 0x54, 0xca, 0x06, 0x72, 0x1b, 0xc0, 0x05, 0x4f, 0x8d, 0x18, 0x1c,
+  0x00, 0x08, 0x82, 0x01, 0xa5, 0xba, 0xc1, 0xdc, 0x06, 0x27, 0x45, 0xb6,
+  0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xab, 0x1b, 0xcc, 0x6d,
+  0x10, 0x08, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x77, 0x3c, 0x75, 0xf2, 0x1a,
+  0x0c, 0x73, 0x29, 0x1a, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62,
+  0x70, 0x00, 0x20, 0x08, 0x06, 0xde, 0xeb, 0x06, 0x7f, 0x1b, 0xf8, 0x6c,
+  0xc0, 0xba, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2,
+  0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08,
+  0x82, 0xc1, 0x66, 0xbb, 0x81, 0xe9, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20,
+  0x00, 0x08, 0x82, 0xc1, 0x76, 0xbb, 0xc1, 0xe9, 0x06, 0x09, 0x11, 0x8c,
+  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x86, 0xbb, 0x01, 0xea, 0x06, 0x09,
+  0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x02, 0xbe, 0xc1, 0xe9,
+  0x06, 0x68, 0x1b, 0x04, 0xb3, 0x1b, 0xe0, 0x6d, 0x50, 0xbb, 0xc1, 0x68,
+  0x42, 0x00, 0x5c, 0xf0, 0xd8, 0x2c, 0x01, 0x8f, 0x0c, 0x37, 0xe8, 0x14,
+  0xee, 0x06, 0x60, 0x30, 0xcb, 0xb0, 0x22, 0x3c, 0x12, 0x98, 0xce, 0x06,
+  0x3c, 0x1b, 0xc4, 0x67, 0x38, 0x02, 0xac, 0x7a, 0x36, 0x20, 0xbe, 0x59,
+  0x06, 0x16, 0x79, 0x91, 0xc0, 0x7c, 0x36, 0x08, 0xab, 0xf8, 0x58, 0x30,
+  0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04,
+  0xf1, 0x29, 0x42, 0x7c, 0x03, 0x1d, 0x6e, 0x08, 0xc0, 0x37, 0x00, 0x83,
+  0x59, 0x86, 0x16, 0x71, 0x91, 0xc0, 0x06, 0xb3, 0x0d, 0xe0, 0x33, 0x4b,
+  0x30, 0x23, 0x56, 0xb6, 0x01, 0x11, 0x9f, 0x59, 0x82, 0x19, 0x19, 0x8e,
+  0x58, 0x2b, 0xb3, 0x0d, 0x84, 0x6f, 0x96, 0x01, 0x46, 0x66, 0x24, 0x30,
+  0xb6, 0x3a, 0xdb, 0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b,
+  0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x68, 0xdf, 0x40,
+  0x87, 0x1b, 0x82, 0xf5, 0x0d, 0xc0, 0x60, 0x96, 0x21, 0x46, 0x64, 0x24,
+  0xb0, 0xb7, 0x0d, 0x86, 0xf8, 0xcc, 0x12, 0xcc, 0x88, 0x11, 0x72, 0x1b,
+  0xc0, 0x67, 0x96, 0x60, 0x46, 0x06, 0x5a, 0x1e, 0xad, 0x45, 0x30, 0x17,
+  0x21, 0x62, 0x44, 0x90, 0x11, 0x76, 0x0d, 0x5e, 0xe4, 0x82, 0x61, 0x2c,
+  0x6e, 0x83, 0xba, 0x0d, 0xe2, 0x33, 0x1c, 0x71, 0x57, 0x76, 0x1b, 0x10,
+  0xdf, 0x2c, 0x03, 0x8d, 0xdc, 0x48, 0x60, 0x77, 0x1b, 0xe0, 0x55, 0x7c,
+  0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f,
+  0x2b, 0x82, 0xf8, 0x14, 0xb1, 0xbf, 0x81, 0x0e, 0x37, 0x04, 0xf9, 0x1b,
+  0x80, 0xc1, 0x2c, 0x43, 0x8d, 0xd8, 0x48, 0x60, 0xc3, 0xdf, 0x06, 0xf0,
+  0x99, 0x25, 0xd8, 0x11, 0xe3, 0xdb, 0x80, 0x88, 0xcf, 0x2c, 0xc1, 0x8e,
+  0x0c, 0x47, 0x88, 0x56, 0xdf, 0x06, 0xc2, 0x37, 0xcb, 0x80, 0x23, 0x3b,
+  0x12, 0xd8, 0x68, 0xf9, 0x6d, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30,
+  0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x98,
+  0x70, 0xa0, 0xc3, 0x0d, 0x01, 0x09, 0x07, 0x60, 0x30, 0xcb, 0x90, 0x23,
+  0x3a, 0x12, 0x98, 0xe9, 0x06, 0x43, 0x7c, 0x66, 0x09, 0x76, 0xc4, 0x88,
+  0xd5, 0x0d, 0xe0, 0x33, 0x4b, 0xb0, 0x23, 0x03, 0x2d, 0x8f, 0x56, 0x23,
+  0x98, 0x8d, 0x10, 0x39, 0x22, 0xe8, 0x88, 0xce, 0x06, 0x37, 0x72, 0xc1,
+  0x30, 0x17, 0x3c, 0x75, 0xdb, 0x53, 0x77, 0xb7, 0xc1, 0x30, 0xe7, 0xaa,
+  0xc1, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82,
+  0x60, 0xe0, 0xd1, 0x70, 0x40, 0xc2, 0xc1, 0xf8, 0x06, 0x31, 0x1c, 0x8c,
   0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02,
-  0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xaf,
-  0xf7, 0x77, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x06,
-  0x7b, 0xa0, 0xc7, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c,
-  0xb1, 0x17, 0x7a, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1,
-  0x92, 0x7b, 0xa0, 0x17, 0x76, 0x01, 0xeb, 0xc5, 0x9d, 0xeb, 0x8d, 0x26,
-  0x04, 0xc0, 0x05, 0x8f, 0xcd, 0x12, 0x80, 0xc6, 0x70, 0xc3, 0x4c, 0xc8,
-  0x1e, 0x18, 0xcc, 0x32, 0xb4, 0x85, 0x5b, 0x04, 0xc5, 0x73, 0xa3, 0x07,
-  0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xd4, 0xee, 0x91,
-  0x9e, 0x4e, 0xd4, 0xdd, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xbc,
-  0x47, 0x7a, 0x81, 0x70, 0xc1, 0x30, 0xf5, 0x73, 0xa8, 0x07, 0x17, 0x3c,
-  0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x14, 0xf8, 0xa5, 0x1e, 0x4f,
-  0xe8, 0xdd, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xe1, 0x97, 0x7a,
-  0x81, 0x70, 0xc1, 0x30, 0x17, 0x3c, 0x75, 0xc7, 0x53, 0x87, 0x72, 0xc3,
-  0xdc, 0x8f, 0x0d, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00,
-  0x20, 0x08, 0x06, 0x5e, 0xf9, 0xd5, 0x1e, 0xdd, 0x89, 0xdf, 0x68, 0x42,
-  0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43,
-  0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xc6, 0x7e, 0xbc,
-  0x97, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xed, 0xd7,
-  0x7b, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xe6, 0x7e,
-  0xbe, 0x97, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0xf6,
-  0xd7, 0x7b, 0x7e, 0x17, 0xa4, 0x9f, 0xeb, 0xad, 0xdf, 0x68, 0x42, 0x00,
-  0x5c, 0xf0, 0xd8, 0x2c, 0x01, 0x68, 0x0c, 0x37, 0xc0, 0x85, 0xfb, 0x81,
-  0xc1, 0x2c, 0xc3, 0x5b, 0x80, 0x46, 0x60, 0x70, 0x27, 0x77, 0xf1, 0x19,
-  0x8e, 0xb0, 0x8b, 0xb9, 0x23, 0xbe, 0x59, 0x06, 0xb8, 0x98, 0x8b, 0xc0,
-  0xe8, 0xee, 0x2e, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xe0,
-  0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0xfc, 0xd3, 0xe1,
-  0x86, 0xc0, 0xfe, 0xc0, 0x60, 0x96, 0x21, 0x2e, 0xe4, 0x22, 0xb0, 0x81,
-  0xef, 0xe0, 0x33, 0x4b, 0x70, 0x17, 0xb6, 0x77, 0x44, 0x7c, 0x66, 0x09,
-  0xee, 0x62, 0x38, 0x22, 0x34, 0xf8, 0x4e, 0xf8, 0x66, 0x19, 0xe8, 0xe2,
-  0x2e, 0x02, 0x13, 0x8d, 0xbe, 0x8b, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18,
-  0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x46,
-  0x30, 0xd0, 0xe1, 0x86, 0x20, 0x04, 0x03, 0x30, 0x98, 0x65, 0xa8, 0x0b,
-  0xbb, 0x08, 0xac, 0xf4, 0x86, 0xf8, 0xcc, 0x12, 0xdc, 0x85, 0x11, 0xa8,
-  0x07, 0x9f, 0x59, 0x82, 0xbb, 0x18, 0x68, 0x79, 0xb4, 0xb8, 0xc0, 0xe4,
-  0x82, 0xa8, 0x0b, 0xc1, 0x2e, 0x70, 0x66, 0x2e, 0x2e, 0x18, 0xc6, 0x4e,
-  0x6f, 0xf5, 0xe2, 0x33, 0x1c, 0xc1, 0x1a, 0xac, 0x47, 0x7c, 0xb3, 0x0c,
-  0x78, 0xb1, 0x17, 0x81, 0xb5, 0x5e, 0x6b, 0xc4, 0xc7, 0x82, 0x81, 0x3e,
-  0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f,
-  0x11, 0x31, 0x18, 0xe8, 0x70, 0x43, 0xf0, 0x82, 0x01, 0x18, 0xcc, 0x32,
-  0xe4, 0x85, 0x5e, 0x04, 0x36, 0xd4, 0x1e, 0x7c, 0x66, 0x09, 0xfe, 0xc2,
-  0x64, 0x8f, 0x88, 0xcf, 0x2c, 0xc1, 0x5f, 0x0c, 0x47, 0xdc, 0xc6, 0xec,
-  0x09, 0xdf, 0x2c, 0x03, 0x5f, 0xfc, 0x45, 0x60, 0xb8, 0x41, 0x7b, 0xf1,
-  0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c,
-  0xac, 0x08, 0xe2, 0x53, 0x04, 0x0f, 0x06, 0x3a, 0xdc, 0x10, 0xe8, 0x60,
-  0x00, 0x06, 0xb3, 0x0c, 0x7d, 0xe1, 0x17, 0x81, 0xf1, 0xde, 0x10, 0x9f,
-  0x59, 0x82, 0xbf, 0x30, 0x22, 0xfc, 0xe0, 0x33, 0x4b, 0xf0, 0x17, 0x03,
-  0x2d, 0x8f, 0x96, 0x17, 0x98, 0x5e, 0x10, 0x7d, 0x21, 0xf8, 0x05, 0xe9,
-  0xec, 0xc5, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x6d, 0x4f, 0x5d, 0xeb, 0x0d,
-  0x73, 0xe4, 0x36, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01,
-  0x80, 0x20, 0x18, 0x78, 0x6a, 0x18, 0xe8, 0x60, 0x90, 0x7f, 0x67, 0x18,
-  0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68,
-  0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c,
-  0x71, 0x18, 0x84, 0x61, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x6c, 0x72, 0x18, 0x88, 0x61, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x6c, 0x73, 0x18, 0x8c, 0x61, 0x90, 0x10, 0xc1, 0x88,
-  0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x7b, 0x18, 0x88, 0x61, 0x30, 0x82,
-  0x41, 0xe0, 0x86, 0xc1, 0x0c, 0x06, 0x70, 0x18, 0x8c, 0x26, 0x04, 0xc0,
-  0x05, 0x8f, 0xcd, 0x12, 0x80, 0xc6, 0x40, 0xcb, 0x63, 0x1a, 0x6b, 0x81,
-  0x86, 0x82, 0x5a, 0xb0, 0x44, 0x5b, 0x08, 0x7f, 0x81, 0x86, 0x82, 0x5b,
-  0x98, 0x7e, 0xd0, 0x60, 0x00, 0x9f, 0x59, 0x86, 0xd0, 0x18, 0x0d, 0xfb,
-  0x18, 0x8e, 0x08, 0x6c, 0x30, 0x18, 0xbe, 0x13, 0x86, 0x19, 0x6e, 0x08,
-  0x42, 0x30, 0x20, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0xe0, 0x0f, 0x1d, 0x0c,
-  0x86, 0xaf, 0x02, 0x41, 0xcf, 0x3f, 0x86, 0x19, 0x6e, 0x08, 0x48, 0x30,
-  0x20, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0x10, 0x8d, 0xdb, 0x08, 0x0e, 0xfd,
-  0x86, 0xb9, 0x7f, 0x1b, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc,
-  0x51, 0x0c, 0xe6, 0x30, 0x90, 0xc1, 0x00, 0x14, 0x83, 0xd1, 0x84, 0x00,
-  0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22,
-  0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x4d, 0x15, 0x03, 0x3d,
-  0x0c, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x6d, 0x15,
-  0x83, 0x3d, 0x0c, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
-  0x8d, 0x15, 0x03, 0x3e, 0x0c, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10,
-  0x04, 0x83, 0x85, 0x16, 0x83, 0x3d, 0x0c, 0x78, 0x30, 0x08, 0x4e, 0x31,
-  0x60, 0xc3, 0x20, 0x15, 0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1, 0x59,
-  0x82, 0xdb, 0x18, 0x6e, 0x70, 0x91, 0x56, 0x0c, 0xc0, 0x60, 0x96, 0x81,
-  0x34, 0x4a, 0x23, 0xa8, 0x1b, 0x0c, 0xfc, 0x30, 0x80, 0x0b, 0x9e, 0x1a,
-  0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xca, 0x16, 0x83, 0x3f, 0x0c, 0x36,
-  0x38, 0x0c, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0xba, 0xc5, 0xe0,
-  0x0f, 0x83, 0x40, 0xb8, 0x60, 0x98, 0xd2, 0xc1, 0x60, 0x14, 0x03, 0xb8,
-  0xe0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0x76, 0x31, 0x20,
-  0xc5, 0xe0, 0x46, 0xea, 0x30, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03,
-  0x8a, 0x17, 0x03, 0x52, 0x0c, 0x02, 0xe1, 0x82, 0x61, 0x2e, 0x78, 0xea,
-  0x8e, 0xa7, 0x6e, 0x04, 0x83, 0x61, 0x4e, 0xe7, 0x86, 0x39, 0x62, 0x98,
-  0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x0f, 0x1c, 0x03,
-  0x58, 0x0c, 0xde, 0x30, 0xe8, 0xc5, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13,
-  0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x3b, 0xc7, 0xe0, 0x16, 0x83, 0x84,
-  0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x43, 0xc7, 0x00, 0x17,
-  0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x4b, 0xc7,
-  0x20, 0x17, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60,
-  0x89, 0xc7, 0x00, 0x17, 0x83, 0x3c, 0x0c, 0x02, 0x72, 0x0c, 0x52, 0x31,
-  0x30, 0xc7, 0x60, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6c, 0x96, 0xe0, 0x36,
-  0x86, 0x1b, 0xd6, 0x24, 0x1d, 0x03, 0x30, 0x98, 0x65, 0x30, 0x8d, 0xdb,
-  0x08, 0x6c, 0x0d, 0x83, 0x36, 0x0c, 0xe2, 0x33, 0x1c, 0x71, 0x07, 0x6e,
-  0x18, 0x10, 0xdf, 0x2c, 0xc3, 0x69, 0xa8, 0x46, 0x60, 0x6f, 0x18, 0xe0,
-  0x41, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81,
-  0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x31, 0x8f, 0x81, 0x0e, 0x37, 0x04,
-  0xf1, 0x18, 0x80, 0xc1, 0x2c, 0x03, 0x6a, 0xa4, 0x46, 0x60, 0xc3, 0x1d,
-  0x06, 0xf0, 0x99, 0x25, 0x70, 0x0d, 0xb3, 0xc3, 0x80, 0x88, 0xcf, 0x2c,
-  0x81, 0x6b, 0x0c, 0x47, 0x88, 0xc2, 0x1d, 0x06, 0xc2, 0x37, 0xcb, 0xb0,
-  0x1a, 0xae, 0x11, 0xd8, 0x28, 0xe0, 0x61, 0x10, 0x1f, 0x0b, 0x1c, 0xfa,
-  0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e,
-  0x45, 0xf8, 0x63, 0xa0, 0xc3, 0x0d, 0x01, 0x3f, 0x06, 0x60, 0x30, 0xcb,
-  0xc0, 0x1a, 0xad, 0x11, 0x18, 0x28, 0x06, 0x43, 0x7c, 0x66, 0x09, 0x5c,
-  0xc3, 0x88, 0x51, 0x0c, 0xe0, 0x33, 0x4b, 0xe0, 0x1a, 0x03, 0x2d, 0x8f,
-  0x86, 0x1a, 0x58, 0x6a, 0x10, 0xac, 0x21, 0xb4, 0x86, 0x4e, 0xa8, 0xc6,
-  0x05, 0xc3, 0x98, 0x28, 0x06, 0xa6, 0x18, 0xc4, 0x67, 0x38, 0xe2, 0x54,
-  0x4e, 0x31, 0x20, 0xbe, 0x59, 0x86, 0xd7, 0x90, 0x8d, 0xc0, 0x50, 0x31,
-  0x40, 0x95, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca,
-  0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x82, 0x25, 0x03, 0x1d, 0x6e,
-  0x08, 0x54, 0x32, 0x00, 0x83, 0x59, 0x06, 0xd8, 0x88, 0x8d, 0xc0, 0x06,
-  0x58, 0x0c, 0xe0, 0x33, 0x4b, 0x60, 0x1b, 0xd6, 0x8a, 0x01, 0x11, 0x9f,
-  0x59, 0x02, 0xdb, 0x18, 0x8e, 0x90, 0x15, 0x57, 0x0c, 0x84, 0x6f, 0x96,
-  0x61, 0x36, 0x6c, 0x23, 0xb0, 0x59, 0x79, 0xc5, 0x20, 0x3e, 0x16, 0x38,
-  0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41,
-  0x7c, 0x8a, 0xb8, 0xc9, 0x40, 0x87, 0x1b, 0x82, 0x9a, 0x0c, 0xc0, 0x60,
-  0x96, 0x81, 0x36, 0x6a, 0x23, 0xb0, 0x5b, 0x0c, 0x86, 0xf8, 0xcc, 0x12,
-  0xd8, 0x86, 0x11, 0xbc, 0x18, 0xc0, 0x67, 0x96, 0xc0, 0x36, 0x06, 0x5a,
-  0x1e, 0x0d, 0x36, 0xb0, 0xd8, 0x20, 0x68, 0x43, 0xa8, 0x0d, 0xbd, 0x92,
-  0x8d, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e, 0x3a, 0x54, 0x0c, 0x86,
-  0xb9, 0xdf, 0x1b, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0xbc, 0xb2, 0x0c, 0x6a, 0x32, 0xa0, 0xc7, 0x40, 0x2c,
+  0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x3b,
+  0x1c, 0xac, 0x70, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
+  0x6c, 0x3c, 0x1c, 0xb0, 0x70, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
+  0x20, 0x18, 0x6c, 0x3d, 0x1c, 0xb4, 0x70, 0x90, 0x10, 0xc1, 0x88, 0x81,
+  0x02, 0x80, 0x20, 0x18, 0x2c, 0x65, 0x1c, 0xb0, 0x70, 0xd0, 0xbe, 0x41,
+  0x80, 0xc3, 0x41, 0xff, 0x06, 0x3a, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x05,
+  0x8f, 0xcd, 0x12, 0xf0, 0xc8, 0x40, 0xcb, 0x63, 0x1a, 0x27, 0xc2, 0xff,
+  0x82, 0x89, 0xb0, 0x44, 0x8a, 0x08, 0x3b, 0xc2, 0xff, 0x82, 0x8a, 0xcc,
+  0x32, 0xf4, 0xc8, 0x8f, 0xfc, 0xd6, 0x70, 0x84, 0xfa, 0xfc, 0x6f, 0x30,
+  0x7c, 0xb7, 0x3e, 0xc3, 0x0c, 0x37, 0x04, 0xea, 0x1b, 0x90, 0x41, 0x0d,
+  0x81, 0x0e, 0x47, 0x98, 0xd7, 0x08, 0x07, 0xc3, 0x57, 0x81, 0xa0, 0x87,
+  0x5e, 0xc3, 0x0c, 0x37, 0x04, 0xed, 0x1b, 0x90, 0x41, 0x05, 0x83, 0xce,
+  0x32, 0xf8, 0xc8, 0x9c, 0x04, 0x17, 0xbb, 0xc1, 0x30, 0x87, 0xae, 0xc1,
+  0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0, 0xb1, 0x71, 0xc0, 0xc3,
+  0xc1, 0xfe, 0x06, 0x69, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10,
+  0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01,
+  0x02, 0x80, 0x20, 0x18, 0x6c, 0x73, 0x1c, 0x8c, 0x71, 0x70, 0x10, 0xc1,
+  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x74, 0x1c, 0x90, 0x71, 0xc0,
+  0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x75, 0x1c, 0x94,
+  0x71, 0x20, 0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x7d,
+  0x1c, 0x90, 0x71, 0x50, 0xc2, 0x41, 0x00, 0xc7, 0x41, 0x0d, 0x07, 0x72,
+  0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x8f, 0xcd, 0x12, 0xcc, 0xc9, 0x70,
+  0xc3, 0x7d, 0xd9, 0x71, 0x00, 0x06, 0xb3, 0x0c, 0x60, 0x12, 0x26, 0x41,
+  0x81, 0x70, 0x70, 0xc6, 0x01, 0x5c, 0xf0, 0xd4, 0x88, 0xc1, 0x01, 0x80,
+  0x20, 0x18, 0x50, 0x7f, 0x1c, 0xa0, 0x71, 0x40, 0x42, 0x39, 0x1c, 0x8c,
+  0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x05, 0xca, 0x01, 0x1a, 0x07, 0x81,
+  0x70, 0xc1, 0x30, 0x35, 0xc2, 0x01, 0x1b, 0x07, 0x70, 0xc1, 0x53, 0x23,
+  0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0x91, 0x72, 0xd0, 0xc6, 0x41, 0x88,
+  0xf9, 0x70, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x54, 0x29, 0x07,
+  0x6d, 0x1c, 0x04, 0xc2, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x1d, 0x4f, 0x1d,
+  0xfb, 0x06, 0xc3, 0xdc, 0xc8, 0x06, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3,
+  0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x97, 0xca, 0x41, 0x1e, 0x07,
+  0x38, 0x1c, 0x98, 0x72, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30,
+  0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08,
+  0x00, 0x82, 0x60, 0xb0, 0xc1, 0x72, 0x00, 0xca, 0x41, 0x42, 0x04, 0x23,
+  0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xc5, 0x72, 0x10, 0xca, 0x41, 0x42,
+  0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xc9, 0x72, 0x20, 0xca,
+  0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xe8, 0x72,
+  0x10, 0xca, 0x81, 0x18, 0x07, 0x41, 0x2b, 0x07, 0x72, 0x1c, 0xbc, 0x72,
+  0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x36, 0x4b, 0x30, 0x27, 0xc3, 0x0d,
+  0x34, 0x26, 0xcb, 0x01, 0x18, 0xcc, 0x32, 0x88, 0xc9, 0x9c, 0x04, 0x46,
+  0xc3, 0x81, 0x0d, 0x07, 0xf1, 0x19, 0x8e, 0x00, 0xa3, 0x1b, 0x0e, 0x88,
+  0x6f, 0x96, 0x61, 0x4c, 0xcc, 0x24, 0x30, 0x1c, 0x0e, 0xc2, 0x28, 0x3e,
+  0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xc0, 0x90, 0x8f,
+  0x15, 0x41, 0x7c, 0x8a, 0xe0, 0xe5, 0x40, 0x87, 0x1b, 0x02, 0x5d, 0x0e,
+  0xc0, 0x60, 0x96, 0x81, 0x4c, 0xca, 0x24, 0xb0, 0x01, 0x8c, 0x03, 0xf8,
+  0xcc, 0x12, 0xa8, 0x89, 0xfd, 0x70, 0x40, 0xc4, 0x67, 0x96, 0x40, 0x4d,
+  0x86, 0x23, 0xd6, 0x08, 0x8c, 0x03, 0xe1, 0x9b, 0x65, 0x38, 0x13, 0x35,
+  0x09, 0x8c, 0x8d, 0xc2, 0x38, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18,
+  0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xce,
+  0x39, 0xd0, 0xe1, 0x86, 0xa0, 0x9c, 0x03, 0x30, 0x98, 0x65, 0x40, 0x93,
+  0x34, 0x09, 0x2c, 0x8d, 0x83, 0x21, 0x3e, 0xb3, 0x04, 0x6a, 0x62, 0x04,
+  0x1b, 0x07, 0xf0, 0x99, 0x25, 0x50, 0x93, 0x81, 0x96, 0x47, 0x23, 0x13,
+  0xac, 0x4c, 0x08, 0x34, 0x11, 0xd2, 0x84, 0x1d, 0x03, 0x33, 0xb9, 0x60,
+  0x18, 0x5b, 0xe3, 0xe0, 0x8d, 0x83, 0xf8, 0x0c, 0x47, 0xc4, 0x19, 0x1c,
+  0x07, 0xc4, 0x37, 0xcb, 0xb0, 0x26, 0x6e, 0x12, 0x58, 0x1c, 0x07, 0x72,
+  0x16, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x60,
+  0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xd4, 0x73, 0xa0, 0xc3, 0x0d, 0xc1,
+  0x3c, 0x07, 0x60, 0x30, 0xcb, 0xc0, 0x26, 0x6d, 0x12, 0xd8, 0x90, 0xc7,
+  0x01, 0x7c, 0x66, 0x09, 0xe4, 0xc4, 0xec, 0x38, 0x20, 0xe2, 0x33, 0x4b,
+  0x20, 0x27, 0xc3, 0x11, 0x7c, 0x76, 0xc7, 0x81, 0xf0, 0xcd, 0x32, 0xbc,
+  0x89, 0x9c, 0x04, 0xd6, 0x67, 0x78, 0x1c, 0xc4, 0xc7, 0x02, 0x87, 0x3e,
+  0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f,
+  0x11, 0x20, 0x1d, 0xe8, 0x70, 0x43, 0xe0, 0xcf, 0x01, 0x18, 0xcc, 0x32,
+  0xc0, 0x49, 0x9c, 0x04, 0x06, 0xca, 0xc1, 0x10, 0x9f, 0x59, 0x02, 0x39,
+  0x31, 0xa2, 0x94, 0x03, 0xf8, 0xcc, 0x12, 0xc8, 0xc9, 0x40, 0xcb, 0xa3,
+  0xb1, 0x09, 0xd6, 0x26, 0x04, 0x9c, 0x08, 0x71, 0xe2, 0xd2, 0x81, 0x9b,
+  0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf6, 0xd4, 0xc5, 0x71, 0x30, 0xcc,
+  0xa1, 0x6e, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01,
+  0x80, 0x20, 0x18, 0x78, 0x2e, 0x1d, 0xf8, 0x73, 0xd0, 0xcb, 0xc1, 0x4a,
+  0x07, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30,
+  0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
+  0x5b, 0x4d, 0x07, 0x25, 0x1d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20,
+  0x08, 0x06, 0x9b, 0x4d, 0x07, 0x26, 0x1d, 0x24, 0x44, 0x30, 0x62, 0x80,
+  0x00, 0x20, 0x08, 0x06, 0xdb, 0x4d, 0x07, 0x27, 0x1d, 0x24, 0x44, 0x30,
+  0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0xcb, 0x4f, 0x07, 0x26, 0x1d, 0x9c,
+  0x73, 0x10, 0xc8, 0x74, 0x70, 0xcf, 0x01, 0x4d, 0x07, 0xa3, 0x09, 0x01,
+  0x70, 0xc1, 0x63, 0xb3, 0x04, 0x73, 0x32, 0xd0, 0xf2, 0x98, 0x86, 0x8f,
+  0xc0, 0xe7, 0xd0, 0x23, 0x2c, 0x01, 0x26, 0x82, 0x9c, 0xc0, 0xe7, 0x10,
+  0x26, 0xb3, 0x0c, 0x74, 0x62, 0x27, 0xb9, 0x36, 0x1c, 0x91, 0x3e, 0xf9,
+  0x1c, 0x0c, 0xdf, 0xa9, 0xcf, 0x30, 0xc3, 0x0d, 0x01, 0x39, 0x07, 0x64,
+  0x50, 0x43, 0xa0, 0xc3, 0x11, 0xe0, 0xd6, 0xcf, 0xc1, 0xf0, 0x55, 0x20,
+  0xe8, 0x89, 0xdb, 0x30, 0xc3, 0x0d, 0xc1, 0x39, 0x07, 0x64, 0x50, 0xc1,
+  0xa0, 0xb3, 0x0c, 0x75, 0xa2, 0x2a, 0xc1, 0xad, 0x72, 0x30, 0xcc, 0x89,
+  0x6f, 0x30, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x78, 0x66, 0x1d,
+  0xd8, 0x74, 0x50, 0xcf, 0xc1, 0x58, 0x07, 0xa3, 0x09, 0x01, 0x30, 0x9a,
+  0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x1c, 0x32,
+  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5b, 0x5b, 0x07, 0x3d, 0x1d, 0x1c,
+  0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x9b, 0x5b, 0x07, 0x3e,
+  0x1d, 0x30, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdb, 0x5b,
+  0x07, 0x3f, 0x1d, 0x48, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06,
+  0xcb, 0x5d, 0x07, 0x3e, 0x1d, 0xfc, 0x73, 0x10, 0xa8, 0x75, 0xf0, 0xd2,
+  0x01, 0x5b, 0x07, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3, 0x04, 0xaa,
+  0x32, 0xdc, 0x10, 0x6f, 0x70, 0x1d, 0x80, 0xc1, 0x2c, 0xc3, 0x9d, 0xe0,
+  0x49, 0x50, 0xfa, 0x1c, 0x84, 0x75, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70,
+  0x00, 0x20, 0x08, 0x06, 0x54, 0x5e, 0x07, 0x62, 0x1d, 0x8c, 0xd0, 0x4c,
+  0x07, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xe9, 0x75, 0x20, 0xd6,
+  0x41, 0x20, 0x5c, 0x30, 0x4c, 0xf5, 0x73, 0x60, 0xd6, 0x01, 0x5c, 0xf0,
+  0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0x7e, 0x1d, 0x9c, 0x75,
+  0xb0, 0x6f, 0x38, 0x1d, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0xf5,
+  0xd7, 0xc1, 0x59, 0x07, 0x81, 0x70, 0xc1, 0x30, 0x17, 0x3c, 0x75, 0xc7,
+  0x53, 0x67, 0xce, 0xc1, 0x30, 0xd7, 0xbf, 0xc1, 0x30, 0x47, 0x0c, 0x73,
+  0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0, 0x8d, 0x76, 0x30,
+  0xd7, 0x81, 0x4c, 0x07, 0xa0, 0x1d, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82,
+  0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88,
+  0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xaa, 0x1d, 0xe8, 0x75, 0x90, 0x10,
+  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xab, 0x1d, 0xec, 0x75,
+  0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xac, 0x1d,
+  0xf0, 0x75, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c,
+  0xb4, 0x1d, 0xec, 0x75, 0xc0, 0xd3, 0x41, 0x70, 0xda, 0x01, 0x5b, 0x07,
+  0xa9, 0x1d, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x8f, 0xcd, 0x12, 0xa8, 0xca,
+  0x70, 0x83, 0xcb, 0xb1, 0x76, 0x00, 0x06, 0xb3, 0x0c, 0x79, 0xa2, 0x2a,
+  0x81, 0xb9, 0x74, 0x00, 0xd3, 0x41, 0x7c, 0x86, 0x23, 0x7e, 0x28, 0xa6,
+  0x03, 0xe2, 0x9b, 0x65, 0xd0, 0x93, 0x3e, 0x09, 0x4c, 0xa6, 0x03, 0x30,
+  0x8a, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30,
+  0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x6c, 0x3b, 0xd0, 0xe1, 0x86, 0x80,
+  0xb6, 0x03, 0x30, 0x98, 0x65, 0xd8, 0x13, 0x3e, 0x09, 0x6c, 0xd0, 0xe9,
+  0x00, 0x3e, 0xb3, 0x04, 0xa1, 0x62, 0x39, 0x1d, 0x10, 0xf1, 0x99, 0x25,
+  0x08, 0x95, 0xe1, 0x08, 0x35, 0xd2, 0xe9, 0x40, 0xf8, 0x66, 0x19, 0xfc,
+  0x24, 0x54, 0x02, 0x5b, 0xa3, 0x9d, 0x0e, 0xe2, 0x63, 0x81, 0x43, 0x9f,
+  0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7,
+  0x88, 0xf0, 0x0e, 0x74, 0xb8, 0x21, 0xf8, 0xed, 0x00, 0x0c, 0x66, 0x19,
+  0xfe, 0x04, 0x54, 0x02, 0x1b, 0xeb, 0x60, 0x88, 0xcf, 0x2c, 0x41, 0xa8,
+  0x18, 0x61, 0xd6, 0x01, 0x7c, 0x66, 0x09, 0x42, 0x65, 0xa0, 0xe5, 0xd1,
+  0xf6, 0x04, 0xe3, 0x13, 0xe2, 0x4f, 0x04, 0x50, 0x51, 0xc7, 0xa0, 0x4f,
+  0x2e, 0x18, 0xc6, 0xca, 0x3a, 0x48, 0xeb, 0x20, 0x3e, 0xc3, 0x11, 0x6b,
+  0xa7, 0xd6, 0x01, 0xf1, 0xcd, 0x32, 0x88, 0x4a, 0xa9, 0x04, 0xb6, 0xd6,
+  0x01, 0xdb, 0xc5, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53,
+  0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xef, 0x1d, 0xe8, 0x70,
+  0x43, 0xd0, 0xde, 0x01, 0x18, 0xcc, 0x32, 0x8c, 0x0a, 0xa9, 0x04, 0x36,
+  0xcc, 0x75, 0x00, 0x9f, 0x59, 0x82, 0x54, 0x31, 0xb8, 0x0e, 0x88, 0xf8,
+  0xcc, 0x12, 0xa4, 0xca, 0x70, 0x84, 0xdd, 0xc5, 0x75, 0x20, 0x7c, 0xb3,
+  0x0c, 0xa6, 0x92, 0x2a, 0x81, 0xdd, 0x9d, 0x5c, 0x07, 0xf1, 0xb1, 0xc0,
+  0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08,
+  0xe2, 0x53, 0x84, 0x7e, 0x07, 0x3a, 0xdc, 0x10, 0xe0, 0x77, 0x00, 0x06,
+  0xb3, 0x0c, 0xa7, 0x82, 0x2a, 0x81, 0xe9, 0x75, 0x30, 0xc4, 0x67, 0x96,
+  0x20, 0x55, 0x8c, 0xf8, 0xeb, 0x00, 0x3e, 0xb3, 0x04, 0xa9, 0x32, 0xd0,
+  0xf2, 0x68, 0xa3, 0x82, 0x91, 0x0a, 0x71, 0x2a, 0x02, 0xaa, 0x88, 0x7c,
+  0x50, 0x2a, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0xb7, 0x3d, 0x75, 0x6b, 0x1d,
+  0x0c, 0x73, 0xa2, 0x1c, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62,
+  0x70, 0x00, 0x20, 0x08, 0x06, 0x1e, 0x8a, 0x07, 0xf8, 0x1d, 0xdc, 0x76,
+  0x50, 0xe2, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2,
+  0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08,
+  0x82, 0xc1, 0xf6, 0xe2, 0xc1, 0x7f, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x20,
+  0x00, 0x08, 0x82, 0xc1, 0x06, 0xe3, 0x01, 0x88, 0x07, 0x09, 0x11, 0x8c,
+  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x16, 0xe3, 0x41, 0x88, 0x07, 0x09,
+  0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x92, 0xe3, 0x01, 0x88,
+  0x07, 0xe1, 0x1d, 0x04, 0x2c, 0x1e, 0xc4, 0x77, 0xe0, 0xe2, 0xc1, 0x68,
+  0x42, 0x00, 0x5c, 0xf0, 0xd8, 0x2c, 0x81, 0xaa, 0x0c, 0xb4, 0x3c, 0xa6,
+  0x51, 0x27, 0xe4, 0x3b, 0xd0, 0x09, 0x4b, 0xdc, 0x89, 0x90, 0x2a, 0xe4,
+  0x3b, 0xe0, 0x89, 0xd9, 0x9e, 0x7c, 0x07, 0xf0, 0x99, 0x65, 0x58, 0x95,
+  0x56, 0xa1, 0xbd, 0xe1, 0x08, 0xdc, 0xa3, 0xef, 0x60, 0xf8, 0x2e, 0xf7,
+  0x86, 0x19, 0x6e, 0x08, 0x7e, 0x3b, 0x20, 0x83, 0x1a, 0x02, 0x1d, 0x8e,
+  0x28, 0xf0, 0x3b, 0x18, 0xbe, 0x0a, 0x04, 0xbd, 0x63, 0x98, 0xe1, 0x86,
+  0x40, 0xbc, 0x03, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x06, 0x56, 0x09, 0x97,
+  0xe0, 0x4c, 0x3b, 0x18, 0xe6, 0x7a, 0x39, 0x18, 0x66, 0xc4, 0xe0, 0x00,
+  0x40, 0x10, 0x0c, 0xbc, 0x30, 0x0f, 0x62, 0x3c, 0x80, 0xef, 0xc0, 0xc7,
   0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18,
-  0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
-  0x8d, 0x2d, 0x03, 0x9e, 0x0c, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0xad, 0x2d, 0x83, 0x9e, 0x0c, 0x12, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x83, 0xcd, 0x2d, 0x03, 0x9f, 0x0c, 0x12, 0x22, 0x18,
-  0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0xc5, 0x2e, 0x83, 0x9e, 0x0c, 0xfc,
-  0x31, 0x08, 0xd2, 0x32, 0x70, 0xc9, 0x60, 0x2d, 0x83, 0xd1, 0x84, 0x00,
-  0xb8, 0xe0, 0xb1, 0x59, 0x82, 0xdb, 0x18, 0x68, 0x79, 0x4c, 0x43, 0x34,
-  0x7c, 0x54, 0x08, 0x0d, 0x96, 0x20, 0x0d, 0xc1, 0x36, 0x7c, 0x54, 0x28,
-  0x0d, 0xab, 0x97, 0x92, 0x0c, 0xe0, 0x33, 0xcb, 0x80, 0x1b, 0xba, 0x11,
-  0x2f, 0xc3, 0x11, 0x41, 0x4c, 0x06, 0xc3, 0x77, 0xc2, 0x30, 0xc3, 0x0d,
-  0x01, 0x3f, 0x06, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0xf7, 0x52, 0x93,
-  0xc1, 0xf0, 0x55, 0x20, 0xe8, 0xe5, 0xcb, 0x30, 0xc3, 0x0d, 0xc1, 0x3f,
-  0x06, 0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c, 0xb9, 0xe1, 0x1e, 0xc1, 0x8d,
-  0x63, 0x30, 0xcc, 0xe9, 0xdf, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0xe0, 0xf9, 0x65, 0xe0, 0x96, 0x41, 0x4b, 0x06, 0x7b, 0x19, 0x8c, 0x26,
-  0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31,
-  0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xa5, 0x19,
-  0xd4, 0x65, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c,
-  0xa6, 0x19, 0xd8, 0x65, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x6c, 0xa7, 0x19, 0xdc, 0x65, 0x20, 0x11, 0xc1, 0x88, 0x81, 0x02,
-  0x80, 0x20, 0x18, 0x2c, 0xaf, 0x19, 0xd8, 0x65, 0x70, 0x93, 0x41, 0x20,
-  0x9a, 0xc1, 0x59, 0x06, 0xa4, 0x19, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x8f,
-  0xcd, 0x12, 0xb8, 0xc7, 0x70, 0x43, 0xca, 0xa0, 0x66, 0x00, 0x06, 0xb3,
-  0x0c, 0xbb, 0xc1, 0x1b, 0x41, 0xc9, 0x64, 0x90, 0x97, 0x01, 0x5c, 0xf0,
-  0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xb1, 0x19, 0xe8, 0x65,
-  0xb0, 0xad, 0x65, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x94, 0x6c,
-  0x06, 0x7a, 0x19, 0x04, 0xc2, 0x05, 0xc3, 0x54, 0x4d, 0x06, 0x7e, 0x19,
-  0xc0, 0x05, 0x4f, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x65, 0x9b,
-  0xc1, 0x5f, 0x06, 0x32, 0x03, 0x97, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20,
-  0x18, 0x50, 0xb7, 0x19, 0xfc, 0x65, 0x10, 0x08, 0x17, 0x0c, 0x73, 0xc1,
-  0x53, 0x77, 0x3c, 0x75, 0xfe, 0x18, 0x0c, 0x73, 0x35, 0x18, 0x0c, 0x73,
-  0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xde,
-  0x6e, 0x06, 0xab, 0x19, 0xa8, 0x65, 0x80, 0x9b, 0xc1, 0x68, 0x42, 0x00,
-  0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11,
-  0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x26, 0x9e, 0x81, 0x6c,
-  0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x36, 0x9e,
-  0xc1, 0x6c, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1,
-  0x46, 0x9e, 0x01, 0x6d, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08,
-  0x82, 0xc1, 0xc2, 0x9e, 0xc1, 0x6c, 0x06, 0x74, 0x19, 0x04, 0xbf, 0x19,
-  0x90, 0x66, 0x10, 0x9e, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd8, 0x2c,
-  0x81, 0x7b, 0x0c, 0x37, 0x98, 0x0d, 0x79, 0x06, 0x60, 0x30, 0xcb, 0xd0,
-  0x1b, 0xee, 0x11, 0x98, 0x59, 0x06, 0x68, 0x19, 0xc4, 0x67, 0x38, 0xe2,
-  0x0e, 0xd2, 0x32, 0x20, 0xbe, 0x59, 0x06, 0xdf, 0x08, 0x8f, 0xc0, 0xd4,
-  0x32, 0xc0, 0x83, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78,
-  0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xc2, 0x3d, 0x03, 0x1d,
-  0x6e, 0x08, 0xd8, 0x33, 0x00, 0x83, 0x59, 0x86, 0xdf, 0x00, 0x8f, 0xc0,
-  0x06, 0xb9, 0x0c, 0xe0, 0x33, 0x4b, 0x50, 0x1e, 0x16, 0x97, 0x01, 0x11,
-  0x9f, 0x59, 0x82, 0xf2, 0x18, 0x8e, 0x10, 0x05, 0xb9, 0x0c, 0x84, 0x6f,
-  0x96, 0x41, 0x3c, 0xca, 0x23, 0xb0, 0x51, 0x98, 0xcb, 0x20, 0x3e, 0x16,
-  0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15,
-  0x41, 0x7c, 0x8a, 0xc8, 0xcf, 0x40, 0x87, 0x1b, 0x82, 0xfb, 0x0c, 0xc0,
-  0x60, 0x96, 0x61, 0x3c, 0xc8, 0x23, 0xb0, 0xbd, 0x0c, 0x86, 0xf8, 0xcc,
-  0x12, 0x94, 0x87, 0x11, 0x7e, 0x19, 0xc0, 0x67, 0x96, 0xa0, 0x3c, 0x06,
-  0x5a, 0x1e, 0xed, 0x37, 0x30, 0xf0, 0x20, 0xc6, 0x43, 0x20, 0x0f, 0x9d,
-  0x08, 0x8f, 0x0b, 0x86, 0xb1, 0xbe, 0x0c, 0x42, 0x33, 0x88, 0xcf, 0x70,
-  0x84, 0xe8, 0x88, 0x66, 0x40, 0x7c, 0xb3, 0x0c, 0xe6, 0x91, 0x1e, 0x81,
-  0x8d, 0x66, 0x30, 0x3a, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c,
-  0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4, 0x89, 0x06,
-  0x3a, 0xdc, 0x10, 0x94, 0x68, 0x00, 0x06, 0xb3, 0x0c, 0xe7, 0x81, 0x1e,
-  0x81, 0x0d, 0xab, 0x19, 0xc0, 0x67, 0x96, 0xa0, 0x3d, 0x0c, 0x35, 0x03,
-  0x22, 0x3e, 0xb3, 0x04, 0xed, 0x31, 0x1c, 0xd1, 0x3a, 0xa9, 0x19, 0x08,
-  0xdf, 0x2c, 0x83, 0x7a, 0xb4, 0x47, 0x60, 0xae, 0xa3, 0x9a, 0x41, 0x7c,
-  0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f,
-  0x2b, 0x82, 0xf8, 0x14, 0x21, 0xa3, 0x81, 0x0e, 0x37, 0x04, 0x30, 0x1a,
-  0x80, 0xc1, 0x2c, 0xc3, 0x7a, 0xb0, 0x47, 0x60, 0xb2, 0x19, 0x0c, 0xf1,
-  0x99, 0x25, 0x68, 0x0f, 0x23, 0x6e, 0x33, 0x80, 0xcf, 0x2c, 0x41, 0x7b,
-  0x0c, 0xb4, 0x3c, 0xda, 0x79, 0x60, 0xe8, 0x41, 0xac, 0x87, 0xc0, 0x1e,
-  0x70, 0x97, 0x1e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0xb7, 0x3d, 0x75, 0xa3,
-  0x19, 0x0c, 0x73, 0xba, 0x18, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x1e, 0x98, 0x06, 0x30, 0x1a, 0xbc,
-  0x67, 0xd0, 0xa3, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68,
-  0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0xc1, 0x76, 0xa6, 0xc1, 0x8d, 0x06, 0x09, 0x11, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0xc1, 0x86, 0xa6, 0x01, 0x8e, 0x06, 0x09, 0x11,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x96, 0xa6, 0x41, 0x8e, 0x06,
-  0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x12, 0xa7, 0x01,
-  0x8e, 0x06, 0xf9, 0x19, 0x04, 0x64, 0x1a, 0xa4, 0x68, 0x60, 0xa6, 0xc1,
-  0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd8, 0x2c, 0x81, 0x7b, 0x0c, 0xb4, 0x3c,
-  0xa6, 0x91, 0x1b, 0x34, 0x2c, 0xe0, 0x06, 0x4b, 0xec, 0x86, 0xd0, 0x1e,
-  0x34, 0x2c, 0xf0, 0xc6, 0x2c, 0xc3, 0x7b, 0xc4, 0xc7, 0xfa, 0x0c, 0x47,
-  0xbc, 0xcf, 0x8a, 0x06, 0xc3, 0x77, 0xf0, 0x33, 0xcc, 0x70, 0x43, 0x60,
-  0x9f, 0x01, 0x19, 0xd4, 0x10, 0xe8, 0x70, 0x84, 0xfc, 0xbc, 0x68, 0x30,
-  0x7c, 0x15, 0x08, 0x7a, 0xf4, 0x33, 0xcc, 0x70, 0x43, 0x90, 0x9f, 0x01,
-  0x19, 0x54, 0x30, 0xe8, 0x2c, 0x03, 0x7c, 0x94, 0x48, 0x70, 0xbd, 0x19,
-  0x0c, 0x73, 0xf4, 0x18, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x1e, 0x9e, 0x06, 0x68, 0x1a, 0x9c, 0x68, 0x50, 0xa7, 0xc1, 0x68, 0x42,
-  0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43,
-  0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xf6, 0xa7, 0xc1,
-  0x9b, 0x06, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x06,
-  0xaa, 0x01, 0x9c, 0x06, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0xc1, 0x16, 0xaa, 0x41, 0x9c, 0x06, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00,
-  0x08, 0x82, 0xc1, 0x92, 0xaa, 0x01, 0x9c, 0x06, 0x31, 0x1a, 0x04, 0x7c,
-  0x1a, 0x84, 0x69, 0xe0, 0xa7, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd8,
-  0x2c, 0x41, 0x89, 0x0c, 0x37, 0x8c, 0x90, 0xa8, 0x06, 0x60, 0x30, 0xcb,
-  0x20, 0x1f, 0xf3, 0x11, 0x14, 0x8b, 0x06, 0x73, 0x1a, 0xc0, 0x05, 0x4f,
-  0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0xb5, 0xaa, 0x01, 0x9d, 0x06,
-  0x29, 0x54, 0xa6, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xac,
-  0x1a, 0xd0, 0x69, 0x10, 0x08, 0x17, 0x0c, 0x53, 0x2f, 0x1a, 0xe0, 0x69,
-  0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x14, 0xac,
-  0x06, 0x79, 0x1a, 0xb4, 0x90, 0x9a, 0x06, 0x23, 0x06, 0x07, 0x00, 0x82,
-  0x60, 0x40, 0xc5, 0x6a, 0x90, 0xa7, 0x41, 0x20, 0x5c, 0x30, 0xcc, 0x05,
-  0x4f, 0xdd, 0xf1, 0xd4, 0xe1, 0x67, 0x30, 0xcc, 0xbd, 0x64, 0x30, 0xcc,
-  0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x78,
-  0xb5, 0x1a, 0x94, 0x6a, 0x40, 0xa6, 0x81, 0xac, 0x06, 0xa3, 0x09, 0x01,
-  0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45,
-  0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x1b, 0xaf, 0x06, 0xac,
-  0x1a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5b, 0xaf,
-  0x06, 0xad, 0x1a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x9b, 0xaf, 0x06, 0xae, 0x1a, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20,
-  0x08, 0x06, 0x8b, 0xb9, 0x06, 0xad, 0x1a, 0xb8, 0x69, 0x10, 0xe4, 0x6a,
-  0xe0, 0xa7, 0xc1, 0xae, 0x06, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3,
-  0x04, 0x25, 0x32, 0xdc, 0x00, 0x46, 0xbe, 0x1a, 0x80, 0xc1, 0x2c, 0x03,
-  0x7d, 0x94, 0x48, 0x60, 0x60, 0x1a, 0x88, 0x69, 0x10, 0x9f, 0xe1, 0x88,
-  0x32, 0x1a, 0xd3, 0x80, 0xf8, 0x66, 0x19, 0xea, 0x03, 0x3f, 0x02, 0x23,
-  0xd3, 0xc0, 0x8c, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xe0,
-  0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x74, 0x0d, 0x74,
-  0xb8, 0x21, 0x30, 0xd7, 0x00, 0x0c, 0x66, 0x19, 0xec, 0xe3, 0x3e, 0x02,
-  0x1b, 0xd8, 0x34, 0x80, 0xcf, 0x2c, 0x01, 0x7f, 0xd8, 0x9a, 0x06, 0x44,
-  0x7c, 0x66, 0x09, 0xf8, 0x63, 0x38, 0x02, 0x8e, 0xd8, 0x34, 0x10, 0xbe,
-  0x59, 0x86, 0xfc, 0xe0, 0x8f, 0xc0, 0xe2, 0xa8, 0x4d, 0x83, 0xf8, 0x58,
-  0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x82, 0x48, 0x3e, 0x56,
-  0x04, 0xf1, 0x29, 0x62, 0x5e, 0x03, 0x1d, 0x6e, 0x08, 0xe2, 0x35, 0x00,
-  0x83, 0x59, 0x06, 0xfd, 0xd8, 0x8f, 0xc0, 0xea, 0x34, 0x18, 0xe2, 0x33,
-  0x4b, 0xc0, 0x1f, 0x46, 0xe0, 0x69, 0x00, 0x9f, 0x59, 0x02, 0xfe, 0x18,
-  0x68, 0x79, 0x34, 0xfb, 0xc0, 0xee, 0x83, 0xd0, 0x0f, 0x61, 0x3f, 0xec,
-  0x31, 0xc0, 0x8f, 0x0b, 0x86, 0xb1, 0x3b, 0x0d, 0xf6, 0x34, 0x88, 0xcf,
-  0x70, 0x44, 0x1f, 0xf1, 0x69, 0x40, 0x7c, 0xb3, 0x0c, 0xfd, 0x01, 0x22,
-  0x81, 0xf5, 0x69, 0xe0, 0x47, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3,
-  0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0xc8,
-  0x06, 0x3a, 0xdc, 0x10, 0xfc, 0x6b, 0x00, 0x06, 0xb3, 0x0c, 0xfe, 0xf1,
-  0x1f, 0x81, 0x0d, 0xa5, 0x1a, 0xc0, 0x67, 0x96, 0x80, 0x44, 0x4c, 0x54,
-  0x03, 0x22, 0x3e, 0xb3, 0x04, 0x24, 0x32, 0x1c, 0x81, 0x4a, 0xa3, 0x1a,
-  0x08, 0xdf, 0x2c, 0x43, 0x88, 0x90, 0x48, 0x60, 0xa9, 0x44, 0xaa, 0x41,
-  0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xc1, 0xb2, 0x81, 0x0e, 0x37, 0x04, 0x2a,
-  0x1b, 0x80, 0xc1, 0x2c, 0x83, 0x88, 0x8c, 0x48, 0x60, 0xac, 0x1a, 0x0c,
-  0xf1, 0x99, 0x25, 0x20, 0x11, 0x23, 0x62, 0x35, 0x80, 0xcf, 0x2c, 0x01,
-  0x89, 0x0c, 0xb4, 0x3c, 0x9a, 0x7f, 0x60, 0xff, 0x41, 0x88, 0x88, 0x30,
-  0x22, 0x66, 0x19, 0x80, 0xc8, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x6d, 0x4f,
-  0x5d, 0x9f, 0x06, 0xc3, 0x1c, 0x6d, 0x06, 0xc3, 0x1c, 0x31, 0xcc, 0x11,
-  0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0xa7, 0xb3, 0x81, 0xca,
-  0x06, 0xe9, 0x1a, 0xdc, 0x6c, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42,
-  0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0xb0, 0x85, 0x6d, 0x10, 0xb3, 0x41, 0x42, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0x89, 0x6d, 0x20, 0xb3, 0x41,
-  0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0x8d, 0x6d, 0x30,
-  0xb3, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xac,
-  0x6d, 0x20, 0xb3, 0xc1, 0xbc, 0x06, 0x81, 0xcf, 0x06, 0x23, 0x1b, 0x80,
-  0x6d, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x36, 0x4b, 0x50, 0x22, 0x03,
-  0x2d, 0x8f, 0x69, 0xc0, 0x07, 0x8a, 0x0b, 0xef, 0xc1, 0x12, 0xf2, 0x21,
-  0x90, 0x08, 0x8a, 0x0b, 0xf3, 0x31, 0xcb, 0x60, 0x22, 0x28, 0x52, 0x4e,
-  0xc3, 0x11, 0xea, 0x54, 0xb2, 0xc1, 0xf0, 0xdd, 0x3a, 0x0d, 0x33, 0xdc,
-  0x10, 0xc0, 0x6b, 0x40, 0x06, 0x35, 0x04, 0x3a, 0x1c, 0xc1, 0x4e, 0x29,
-  0x1b, 0x0c, 0x5f, 0x05, 0x82, 0x9e, 0x3b, 0x0d, 0x33, 0xdc, 0x10, 0xcc,
-  0x6b, 0x40, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0x70, 0x22, 0x3c, 0x12, 0xdc,
-  0xad, 0x06, 0xc3, 0x9c, 0x7b, 0x06, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08,
-  0x82, 0x81, 0x27, 0xb7, 0x81, 0xd8, 0x06, 0x21, 0x1b, 0xbc, 0x6d, 0x30,
-  0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09,
-  0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xe5,
-  0x6d, 0x90, 0xb6, 0xc1, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0xb0, 0xe9, 0x6d, 0xa0, 0xb6, 0x01, 0x43, 0x04, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0xb0, 0xed, 0x6d, 0xb0, 0xb6, 0x81, 0x44, 0x04, 0x23, 0x06,
-  0x0a, 0x00, 0x82, 0x60, 0xb0, 0x8c, 0x6e, 0xa0, 0xb6, 0xc1, 0xca, 0x06,
-  0x81, 0xdd, 0x06, 0x3b, 0x1b, 0xe0, 0x6d, 0x30, 0x9a, 0x10, 0x00, 0x17,
-  0x3c, 0x36, 0x4b, 0xc0, 0x23, 0xc3, 0x0d, 0xfd, 0xc4, 0xb7, 0x01, 0x18,
-  0xcc, 0x32, 0xa4, 0x88, 0x8a, 0x04, 0x65, 0xb2, 0x41, 0xdb, 0x06, 0x70,
-  0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0x95, 0x6e, 0xe0,
-  0xb6, 0x01, 0x49, 0xfd, 0x6c, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x94, 0xe9, 0x06, 0x6e, 0x1b, 0x04, 0xc2, 0x05, 0xc3, 0x54, 0xca, 0x06,
-  0x72, 0x1b, 0xc0, 0x05, 0x4f, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01,
-  0xa5, 0xba, 0xc1, 0xdc, 0x06, 0x27, 0x45, 0xb6, 0xc1, 0x88, 0xc1, 0x01,
-  0x80, 0x20, 0x18, 0x50, 0xab, 0x1b, 0xcc, 0x6d, 0x10, 0x08, 0x17, 0x0c,
-  0x73, 0xc1, 0x53, 0x77, 0x3c, 0x75, 0xf2, 0x1a, 0x0c, 0x73, 0x29, 0x1a,
-  0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0xde, 0xeb, 0x06, 0x7f, 0x1b, 0xf8, 0x6c, 0xc0, 0xba, 0xc1, 0x68,
-  0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10,
-  0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x66, 0xbb,
-  0x81, 0xe9, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1,
-  0x76, 0xbb, 0xc1, 0xe9, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0xc1, 0x86, 0xbb, 0x01, 0xea, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x28,
-  0x00, 0x08, 0x82, 0xc1, 0x02, 0xbe, 0xc1, 0xe9, 0x06, 0x68, 0x1b, 0x04,
-  0xb3, 0x1b, 0xe0, 0x6d, 0x50, 0xbb, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0,
-  0xd8, 0x2c, 0x01, 0x8f, 0x0c, 0x37, 0xe8, 0x14, 0xee, 0x06, 0x60, 0x30,
-  0xcb, 0xb0, 0x22, 0x3c, 0x12, 0x98, 0xce, 0x06, 0x3c, 0x1b, 0xc4, 0x67,
-  0x38, 0x02, 0xac, 0x7a, 0x36, 0x20, 0xbe, 0x59, 0x06, 0x16, 0x79, 0x91,
-  0xc0, 0x7c, 0x36, 0x08, 0xab, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61,
-  0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x42, 0x7c,
-  0x03, 0x1d, 0x6e, 0x08, 0xc0, 0x37, 0x00, 0x83, 0x59, 0x86, 0x16, 0x71,
-  0x91, 0xc0, 0x06, 0xb3, 0x0d, 0xe0, 0x33, 0x4b, 0x30, 0x23, 0x56, 0xb6,
-  0x01, 0x11, 0x9f, 0x59, 0x82, 0x19, 0x19, 0x8e, 0x58, 0x2b, 0xb3, 0x0d,
-  0x84, 0x6f, 0x96, 0x01, 0x46, 0x66, 0x24, 0x30, 0xb6, 0x3a, 0xdb, 0x20,
-  0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92,
-  0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x68, 0xdf, 0x40, 0x87, 0x1b, 0x82, 0xf5,
-  0x0d, 0xc0, 0x60, 0x96, 0x21, 0x46, 0x64, 0x24, 0xb0, 0xb7, 0x0d, 0x86,
-  0xf8, 0xcc, 0x12, 0xcc, 0x88, 0x11, 0x72, 0x1b, 0xc0, 0x67, 0x96, 0x60,
-  0x46, 0x06, 0x5a, 0x1e, 0xad, 0x45, 0x30, 0x17, 0x21, 0x62, 0x44, 0x90,
-  0x11, 0x76, 0x0d, 0x5e, 0xe4, 0x82, 0x61, 0x2c, 0x6e, 0x83, 0xba, 0x0d,
-  0xe2, 0x33, 0x1c, 0x71, 0x57, 0x76, 0x1b, 0x10, 0xdf, 0x2c, 0x03, 0x8d,
-  0xdc, 0x48, 0x60, 0x77, 0x1b, 0xe0, 0x55, 0x7c, 0x2c, 0x18, 0xe8, 0x73,
-  0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14,
-  0xb1, 0xbf, 0x81, 0x0e, 0x37, 0x04, 0xf9, 0x1b, 0x80, 0xc1, 0x2c, 0x43,
-  0x8d, 0xd8, 0x48, 0x60, 0xc3, 0xdf, 0x06, 0xf0, 0x99, 0x25, 0xd8, 0x11,
-  0xe3, 0xdb, 0x80, 0x88, 0xcf, 0x2c, 0xc1, 0x8e, 0x0c, 0x47, 0x88, 0x56,
-  0xdf, 0x06, 0xc2, 0x37, 0xcb, 0x80, 0x23, 0x3b, 0x12, 0xd8, 0x68, 0xf9,
-  0x6d, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59,
-  0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x98, 0x70, 0xa0, 0xc3, 0x0d,
-  0x01, 0x09, 0x07, 0x60, 0x30, 0xcb, 0x90, 0x23, 0x3a, 0x12, 0x98, 0xe9,
-  0x06, 0x43, 0x7c, 0x66, 0x09, 0x76, 0xc4, 0x88, 0xd5, 0x0d, 0xe0, 0x33,
-  0x4b, 0xb0, 0x23, 0x03, 0x2d, 0x8f, 0x56, 0x23, 0x98, 0x8d, 0x10, 0x39,
-  0x22, 0xe8, 0x88, 0xce, 0x06, 0x37, 0x72, 0xc1, 0x30, 0x17, 0x3c, 0x75,
-  0xdb, 0x53, 0x77, 0xb7, 0xc1, 0x30, 0xe7, 0xaa, 0xc1, 0x30, 0x47, 0x0c,
-  0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0, 0xd1, 0x70,
-  0x40, 0xc2, 0xc1, 0xf8, 0x06, 0x31, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x68,
-  0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x3b, 0x1c, 0xac, 0x70, 0x90,
-  0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x3c, 0x1c, 0xb0,
-  0x70, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x3d,
-  0x1c, 0xb4, 0x70, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18,
-  0x2c, 0x65, 0x1c, 0xb0, 0x70, 0xd0, 0xbe, 0x41, 0x80, 0xc3, 0x41, 0xff,
-  0x06, 0x3a, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x8f, 0xcd, 0x12, 0xf0,
-  0xc8, 0x40, 0xcb, 0x63, 0x1a, 0x27, 0xc2, 0xff, 0x82, 0x89, 0xb0, 0x44,
-  0x8a, 0x08, 0x3b, 0xc2, 0xff, 0x82, 0x8a, 0xcc, 0x32, 0xf4, 0xc8, 0x8f,
-  0xfc, 0xd6, 0x70, 0x84, 0xfa, 0xfc, 0x6f, 0x30, 0x7c, 0xb7, 0x3e, 0xc3,
-  0x0c, 0x37, 0x04, 0xea, 0x1b, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0x98,
-  0xd7, 0x08, 0x07, 0xc3, 0x57, 0x81, 0xa0, 0x87, 0x5e, 0xc3, 0x0c, 0x37,
-  0x04, 0xed, 0x1b, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xf8, 0xc8, 0x9c,
-  0x04, 0x17, 0xbb, 0xc1, 0x30, 0x87, 0xae, 0xc1, 0x30, 0x23, 0x06, 0x07,
-  0x00, 0x82, 0x60, 0xe0, 0xb1, 0x71, 0xc0, 0xc3, 0xc1, 0xfe, 0x06, 0x69,
-  0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2,
-  0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x6c, 0x73, 0x1c, 0x8c, 0x71, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x6c, 0x74, 0x1c, 0x90, 0x71, 0xc0, 0x10, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x6c, 0x75, 0x1c, 0x94, 0x71, 0x20, 0x11, 0xc1,
-  0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x7d, 0x1c, 0x90, 0x71, 0x50,
-  0xc2, 0x41, 0x00, 0xc7, 0x41, 0x0d, 0x07, 0x72, 0x1c, 0x8c, 0x26, 0x04,
-  0xc0, 0x05, 0x8f, 0xcd, 0x12, 0xcc, 0xc9, 0x70, 0xc3, 0x7d, 0xd9, 0x71,
-  0x00, 0x06, 0xb3, 0x0c, 0x60, 0x12, 0x26, 0x41, 0x81, 0x70, 0x70, 0xc6,
-  0x01, 0x5c, 0xf0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0x7f,
-  0x1c, 0xa0, 0x71, 0x40, 0x42, 0x39, 0x1c, 0x8c, 0x18, 0x1c, 0x00, 0x08,
-  0x82, 0x01, 0x05, 0xca, 0x01, 0x1a, 0x07, 0x81, 0x70, 0xc1, 0x30, 0x35,
-  0xc2, 0x01, 0x1b, 0x07, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82,
-  0x60, 0x40, 0x91, 0x72, 0xd0, 0xc6, 0x41, 0x88, 0xf9, 0x70, 0x30, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x54, 0x29, 0x07, 0x6d, 0x1c, 0x04, 0xc2,
-  0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x1d, 0x4f, 0x1d, 0xfb, 0x06, 0xc3, 0xdc,
-  0xc8, 0x06, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00,
-  0x08, 0x82, 0x81, 0x97, 0xca, 0x41, 0x1e, 0x07, 0x38, 0x1c, 0x98, 0x72,
-  0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3,
-  0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0,
-  0xc1, 0x72, 0x00, 0xca, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0xb0, 0xc5, 0x72, 0x10, 0xca, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0xb0, 0xc9, 0x72, 0x20, 0xca, 0x41, 0x42, 0x04, 0x23,
-  0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xe8, 0x72, 0x10, 0xca, 0x81, 0x18,
-  0x07, 0x41, 0x2b, 0x07, 0x72, 0x1c, 0xbc, 0x72, 0x30, 0x9a, 0x10, 0x00,
-  0x17, 0x3c, 0x36, 0x4b, 0x30, 0x27, 0xc3, 0x0d, 0x34, 0x26, 0xcb, 0x01,
-  0x18, 0xcc, 0x32, 0x88, 0xc9, 0x9c, 0x04, 0x46, 0xc3, 0x81, 0x0d, 0x07,
-  0xf1, 0x19, 0x8e, 0x00, 0xa3, 0x1b, 0x0e, 0x88, 0x6f, 0x96, 0x61, 0x4c,
-  0xcc, 0x24, 0x30, 0x1c, 0x0e, 0xc2, 0x28, 0x3e, 0x16, 0x0c, 0xf4, 0xb9,
-  0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a,
-  0xe0, 0xe5, 0x40, 0x87, 0x1b, 0x02, 0x5d, 0x0e, 0xc0, 0x60, 0x96, 0x81,
-  0x4c, 0xca, 0x24, 0xb0, 0x01, 0x8c, 0x03, 0xf8, 0xcc, 0x12, 0xa8, 0x89,
-  0xfd, 0x70, 0x40, 0xc4, 0x67, 0x96, 0x40, 0x4d, 0x86, 0x23, 0xd6, 0x08,
-  0x8c, 0x03, 0xe1, 0x9b, 0x65, 0x38, 0x13, 0x35, 0x09, 0x8c, 0x8d, 0xc2,
-  0x38, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c,
-  0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xce, 0x39, 0xd0, 0xe1, 0x86,
-  0xa0, 0x9c, 0x03, 0x30, 0x98, 0x65, 0x40, 0x93, 0x34, 0x09, 0x2c, 0x8d,
-  0x83, 0x21, 0x3e, 0xb3, 0x04, 0x6a, 0x62, 0x04, 0x1b, 0x07, 0xf0, 0x99,
-  0x25, 0x50, 0x93, 0x81, 0x96, 0x47, 0x23, 0x13, 0xac, 0x4c, 0x08, 0x34,
-  0x11, 0xd2, 0x84, 0x1d, 0x03, 0x33, 0xb9, 0x60, 0x18, 0x5b, 0xe3, 0xe0,
-  0x8d, 0x83, 0xf8, 0x0c, 0x47, 0xc4, 0x19, 0x1c, 0x07, 0xc4, 0x37, 0xcb,
-  0xb0, 0x26, 0x6e, 0x12, 0x58, 0x1c, 0x07, 0x72, 0x16, 0x1f, 0x0b, 0x06,
+  0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
+  0x0d, 0xcd, 0x03, 0x1c, 0x0f, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
+  0x04, 0x83, 0x2d, 0xcd, 0x83, 0x1c, 0x0f, 0x18, 0x22, 0x18, 0x31, 0x40,
+  0x00, 0x10, 0x04, 0x83, 0x4d, 0xcd, 0x03, 0x1d, 0x0f, 0x24, 0x22, 0x18,
+  0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x45, 0xce, 0x83, 0x1c, 0x0f, 0xf4,
+  0x3b, 0x08, 0xca, 0x3c, 0x50, 0xf1, 0xe0, 0xcc, 0x83, 0xd1, 0x84, 0x00,
+  0xb8, 0xe0, 0xb1, 0x59, 0x82, 0x70, 0x19, 0x6e, 0x60, 0xbf, 0x35, 0x0f,
+  0xc0, 0x60, 0x96, 0xc1, 0x55, 0x5e, 0x25, 0xa8, 0xfa, 0x0e, 0x78, 0x3c,
+  0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x8a, 0xce,
+  0x83, 0x1e, 0x0f, 0xe4, 0xcf, 0xc5, 0x83, 0x11, 0x83, 0x03, 0x00, 0x41,
+  0x30, 0xa0, 0xea, 0x3c, 0xe8, 0xf1, 0x20, 0x10, 0x2e, 0x18, 0xa6, 0xf0,
+  0x3b, 0x08, 0xf3, 0x00, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10,
+  0x0c, 0xa8, 0x3c, 0x0f, 0xc4, 0x3c, 0x00, 0x83, 0x19, 0x0f, 0x46, 0x0c,
+  0x0e, 0x00, 0x04, 0xc1, 0x80, 0xd2, 0xf3, 0x40, 0xcc, 0x83, 0x40, 0xb8,
+  0x60, 0x98, 0x0b, 0x9e, 0xba, 0xe3, 0xa9, 0x0b, 0xef, 0x60, 0x98, 0xc3,
+  0xe7, 0x60, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00,
+  0x41, 0x30, 0xf0, 0xfc, 0x3c, 0x70, 0xf3, 0xa0, 0xc5, 0x83, 0x3d, 0x0f,
+  0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34,
+  0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6,
+  0x52, 0x0f, 0xea, 0x3c, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10,
+  0x0c, 0x36, 0x53, 0x0f, 0xec, 0x3c, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01,
+  0x40, 0x10, 0x0c, 0xb6, 0x53, 0x0f, 0xee, 0x3c, 0x48, 0x88, 0x60, 0xc4,
+  0x40, 0x01, 0x40, 0x10, 0x0c, 0x96, 0x57, 0x0f, 0xec, 0x3c, 0xb8, 0xf1,
+  0x20, 0x10, 0xf5, 0xe0, 0xcc, 0x03, 0x52, 0x0f, 0x46, 0x13, 0x02, 0xe0,
+  0x82, 0xc7, 0x66, 0x09, 0xc2, 0x65, 0xb8, 0x21, 0x05, 0x83, 0x53, 0x0f,
+  0xc0, 0x60, 0x96, 0x01, 0x56, 0xc2, 0x25, 0xb0, 0x14, 0x0f, 0x56, 0x3c,
+  0x88, 0xcf, 0x70, 0x84, 0x0b, 0x06, 0x2c, 0x1e, 0x10, 0xdf, 0x2c, 0x43,
+  0xac, 0xd0, 0x4a, 0x60, 0x2d, 0x1e, 0xbc, 0x60, 0x10, 0x1f, 0x0b, 0x06,
   0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20,
-  0x3e, 0x45, 0xd4, 0x73, 0xa0, 0xc3, 0x0d, 0xc1, 0x3c, 0x07, 0x60, 0x30,
-  0xcb, 0xc0, 0x26, 0x6d, 0x12, 0xd8, 0x90, 0xc7, 0x01, 0x7c, 0x66, 0x09,
-  0xe4, 0xc4, 0xec, 0x38, 0x20, 0xe2, 0x33, 0x4b, 0x20, 0x27, 0xc3, 0x11,
-  0x7c, 0x76, 0xc7, 0x81, 0xf0, 0xcd, 0x32, 0xbc, 0x89, 0x9c, 0x04, 0xd6,
-  0x67, 0x78, 0x1c, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0xc1,
-  0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x20, 0x1d, 0xe8,
-  0x70, 0x43, 0xe0, 0xcf, 0x01, 0x18, 0xcc, 0x32, 0xc0, 0x49, 0x9c, 0x04,
-  0x06, 0xca, 0xc1, 0x10, 0x9f, 0x59, 0x02, 0x39, 0x31, 0xa2, 0x94, 0x03,
-  0xf8, 0xcc, 0x12, 0xc8, 0xc9, 0x40, 0xcb, 0xa3, 0xb1, 0x09, 0xd6, 0x26,
-  0x04, 0x9c, 0x08, 0x71, 0xe2, 0xd2, 0x81, 0x9b, 0x5c, 0x30, 0xcc, 0x05,
-  0x4f, 0xdd, 0xf6, 0xd4, 0xc5, 0x71, 0x30, 0xcc, 0xa1, 0x6e, 0x30, 0xcc,
-  0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x78,
-  0x2e, 0x1d, 0xf8, 0x73, 0xd0, 0xcb, 0xc1, 0x4a, 0x07, 0xa3, 0x09, 0x01,
-  0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45,
-  0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5b, 0x4d, 0x07, 0x25,
-  0x1d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x9b, 0x4d,
-  0x07, 0x26, 0x1d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0xdb, 0x4d, 0x07, 0x27, 0x1d, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20,
-  0x08, 0x06, 0xcb, 0x4f, 0x07, 0x26, 0x1d, 0x9c, 0x73, 0x10, 0xc8, 0x74,
-  0x70, 0xcf, 0x01, 0x4d, 0x07, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3,
-  0x04, 0x73, 0x32, 0xd0, 0xf2, 0x98, 0x86, 0x8f, 0xc0, 0xe7, 0xd0, 0x23,
-  0x2c, 0x01, 0x26, 0x82, 0x9c, 0xc0, 0xe7, 0x10, 0x26, 0xb3, 0x0c, 0x74,
-  0x62, 0x27, 0xb9, 0x36, 0x1c, 0x91, 0x3e, 0xf9, 0x1c, 0x0c, 0xdf, 0xa9,
-  0xcf, 0x30, 0xc3, 0x0d, 0x01, 0x39, 0x07, 0x64, 0x50, 0x43, 0xa0, 0xc3,
-  0x11, 0xe0, 0xd6, 0xcf, 0xc1, 0xf0, 0x55, 0x20, 0xe8, 0x89, 0xdb, 0x30,
-  0xc3, 0x0d, 0xc1, 0x39, 0x07, 0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c, 0x75,
-  0xa2, 0x2a, 0xc1, 0xad, 0x72, 0x30, 0xcc, 0x89, 0x6f, 0x30, 0xcc, 0x88,
-  0xc1, 0x01, 0x80, 0x20, 0x18, 0x78, 0x66, 0x1d, 0xd8, 0x74, 0x50, 0xcf,
-  0xc1, 0x58, 0x07, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09,
-  0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x5b, 0x5b, 0x07, 0x3d, 0x1d, 0x1c, 0x44, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0x9b, 0x5b, 0x07, 0x3e, 0x1d, 0x30, 0x44, 0x30,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdb, 0x5b, 0x07, 0x3f, 0x1d, 0x48,
-  0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0xcb, 0x5d, 0x07, 0x3e,
-  0x1d, 0xfc, 0x73, 0x10, 0xa8, 0x75, 0xf0, 0xd2, 0x01, 0x5b, 0x07, 0xa3,
-  0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3, 0x04, 0xaa, 0x32, 0xdc, 0x10, 0x6f,
-  0x70, 0x1d, 0x80, 0xc1, 0x2c, 0xc3, 0x9d, 0xe0, 0x49, 0x50, 0xfa, 0x1c,
-  0x84, 0x75, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x54, 0x5e, 0x07, 0x62, 0x1d, 0x8c, 0xd0, 0x4c, 0x07, 0x23, 0x06, 0x07,
-  0x00, 0x82, 0x60, 0x40, 0xe9, 0x75, 0x20, 0xd6, 0x41, 0x20, 0x5c, 0x30,
-  0x4c, 0xf5, 0x73, 0x60, 0xd6, 0x01, 0x5c, 0xf0, 0xd4, 0x88, 0xc1, 0x01,
-  0x80, 0x20, 0x18, 0x50, 0x7e, 0x1d, 0x9c, 0x75, 0xb0, 0x6f, 0x38, 0x1d,
-  0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0xf5, 0xd7, 0xc1, 0x59, 0x07,
-  0x81, 0x70, 0xc1, 0x30, 0x17, 0x3c, 0x75, 0xc7, 0x53, 0x67, 0xce, 0xc1,
-  0x30, 0xd7, 0xbf, 0xc1, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0xe0, 0x8d, 0x76, 0x30, 0xd7, 0x81, 0x4c, 0x07,
-  0xa0, 0x1d, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c,
-  0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x6c, 0xaa, 0x1d, 0xe8, 0x75, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x6c, 0xab, 0x1d, 0xec, 0x75, 0x90, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xac, 0x1d, 0xf0, 0x75, 0x90, 0x10,
-  0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0xb4, 0x1d, 0xec, 0x75,
-  0xc0, 0xd3, 0x41, 0x70, 0xda, 0x01, 0x5b, 0x07, 0xa9, 0x1d, 0x8c, 0x26,
-  0x04, 0xc0, 0x05, 0x8f, 0xcd, 0x12, 0xa8, 0xca, 0x70, 0x83, 0xcb, 0xb1,
-  0x76, 0x00, 0x06, 0xb3, 0x0c, 0x79, 0xa2, 0x2a, 0x81, 0xb9, 0x74, 0x00,
-  0xd3, 0x41, 0x7c, 0x86, 0x23, 0x7e, 0x28, 0xa6, 0x03, 0xe2, 0x9b, 0x65,
-  0xd0, 0x93, 0x3e, 0x09, 0x4c, 0xa6, 0x03, 0x30, 0x8a, 0x8f, 0x05, 0x03,
+  0x3e, 0x45, 0xc4, 0x7a, 0xa0, 0xc3, 0x0d, 0xc1, 0xab, 0x07, 0x60, 0x30,
+  0xcb, 0x20, 0x2b, 0xb3, 0x12, 0xd8, 0x50, 0xe3, 0x01, 0x7c, 0x66, 0x09,
+  0x70, 0xc5, 0x68, 0x3c, 0x20, 0xe2, 0x33, 0x4b, 0x80, 0x2b, 0xc3, 0x11,
+  0x39, 0x18, 0xd4, 0x78, 0x20, 0x7c, 0xb3, 0x0c, 0xb5, 0x82, 0x2b, 0x81,
+  0xe9, 0x60, 0x60, 0xe3, 0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30,
+  0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xc1, 0xeb,
+  0x81, 0x0e, 0x37, 0x04, 0xba, 0x1e, 0x80, 0xc1, 0x2c, 0x83, 0xad, 0xdc,
+  0x4a, 0x60, 0x3e, 0x1e, 0x0c, 0xf1, 0x99, 0x25, 0xc0, 0x15, 0x23, 0xc2,
+  0x3c, 0x80, 0xcf, 0x2c, 0x01, 0xae, 0x0c, 0xb4, 0x3c, 0x9a, 0xac, 0x60,
+  0xb3, 0x42, 0xd8, 0x8a, 0x70, 0x2b, 0xa8, 0x28, 0xd0, 0xca, 0x05, 0xc3,
+  0x18, 0x98, 0x07, 0x64, 0x1e, 0xc4, 0x67, 0x38, 0x02, 0x16, 0xca, 0x3c,
+  0x20, 0xbe, 0x59, 0x86, 0x5c, 0xe1, 0x95, 0xc0, 0xcc, 0x3c, 0x88, 0x85,
+  0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43,
+  0x3e, 0x56, 0x04, 0xf1, 0x29, 0x42, 0xdd, 0x03, 0x1d, 0x6e, 0x08, 0xd0,
+  0x3d, 0x00, 0x83, 0x59, 0x06, 0x5d, 0xd9, 0x95, 0xc0, 0x06, 0x37, 0x0f,
+  0xe0, 0x33, 0x4b, 0x00, 0x2e, 0xb6, 0xe6, 0x01, 0x11, 0x9f, 0x59, 0x02,
+  0x70, 0x19, 0x8e, 0xd8, 0x05, 0x36, 0x0f, 0x84, 0x6f, 0x96, 0xa1, 0x57,
+  0xc0, 0x25, 0x30, 0x5e, 0x68, 0xf3, 0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9,
+  0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a,
+  0xa8, 0xf7, 0x40, 0x87, 0x1b, 0x82, 0x79, 0x0f, 0xc0, 0x60, 0x96, 0xc1,
+  0x57, 0x7e, 0x25, 0xb0, 0x3a, 0x0f, 0x86, 0xf8, 0xcc, 0x12, 0x80, 0x8b,
+  0x11, 0x7a, 0x1e, 0xc0, 0x67, 0x96, 0x00, 0x5c, 0x06, 0x5a, 0x1e, 0x4d,
+  0x57, 0xb0, 0x5d, 0x21, 0x7c, 0x45, 0xf8, 0x15, 0xd6, 0xe0, 0x95, 0x0b,
+  0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e, 0x3a, 0x33, 0x0f, 0x86, 0xb9, 0xbe,
+  0x0e, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10,
+  0x04, 0x03, 0x6f, 0xe4, 0x83, 0x79, 0x0f, 0x64, 0x3d, 0x00, 0xf9, 0x60,
+  0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13,
+  0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x53,
+  0xf9, 0x40, 0xdf, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
+  0x60, 0x5b, 0xf9, 0x60, 0xdf, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00,
+  0x04, 0xc1, 0x60, 0x63, 0xf9, 0x80, 0xdf, 0x83, 0x84, 0x08, 0x46, 0x0c,
+  0x14, 0x00, 0x04, 0xc1, 0x60, 0xa1, 0xf9, 0x60, 0xdf, 0x03, 0x5e, 0x0f,
+  0x82, 0x93, 0x0f, 0xd8, 0x3d, 0x48, 0xf9, 0x60, 0x34, 0x21, 0x00, 0x2e,
+  0x78, 0x6c, 0x96, 0x20, 0x5c, 0x06, 0x5a, 0x1e, 0xd3, 0x60, 0x15, 0xdd,
+  0x1e, 0x56, 0x85, 0x25, 0x5c, 0x45, 0x00, 0x17, 0xdd, 0x1e, 0x5e, 0x65,
+  0x96, 0x41, 0x5c, 0xc8, 0xc5, 0x15, 0x83, 0xe1, 0x88, 0x59, 0x0c, 0xdc,
+  0x3d, 0x18, 0xbe, 0xa3, 0xc5, 0x60, 0x98, 0xe1, 0x86, 0x20, 0xd7, 0x03,
+  0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x7f, 0x90, 0xf7, 0x60, 0xf8, 0x2a,
+  0x10, 0xf4, 0x42, 0x62, 0x98, 0xe1, 0x86, 0x80, 0xd7, 0x03, 0x32, 0xa8,
+  0x60, 0xd0, 0x59, 0x86, 0x71, 0xc1, 0x97, 0xe0, 0x40, 0x3d, 0x18, 0xe6,
+  0x6e, 0x3b, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc, 0x9d,
+  0x0f, 0x56, 0x3e, 0x50, 0xf7, 0x00, 0xe7, 0x83, 0xd1, 0x84, 0x00, 0x18,
+  0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e,
+  0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x4d, 0xec, 0x03, 0x99, 0x0f,
+  0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x6d, 0xec, 0x83,
+  0x99, 0x0f, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x8d,
+  0xec, 0x03, 0x9a, 0x0f, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04,
+  0x83, 0x85, 0xed, 0x83, 0x99, 0x0f, 0xe8, 0x3d, 0x08, 0x7e, 0x3e, 0x20,
+  0xf9, 0x20, 0xec, 0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x02,
+  0x7c, 0x19, 0x6e, 0x30, 0xc7, 0xa0, 0xec, 0x03, 0x30, 0x98, 0x65, 0x28,
+  0x17, 0x73, 0x09, 0xea, 0xdd, 0x03, 0x9b, 0x0f, 0xe0, 0x82, 0xa7, 0x46,
+  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x72, 0xfb, 0xe0, 0xe6, 0x83, 0x76,
+  0x0c, 0x50, 0x3e, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xea, 0xed,
+  0x83, 0x9b, 0x0f, 0x02, 0xe1, 0x82, 0x61, 0x4a, 0xde, 0x83, 0x9d, 0x0f,
+  0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x9a, 0xfb,
+  0x80, 0xe7, 0x03, 0x9d, 0x68, 0xf9, 0x60, 0xc4, 0xe0, 0x00, 0x40, 0x10,
+  0x0c, 0x28, 0xba, 0x0f, 0x78, 0x3e, 0x08, 0x84, 0x0b, 0x86, 0xb9, 0xe0,
+  0xa9, 0x3b, 0x9e, 0xba, 0x5d, 0x0f, 0x86, 0x39, 0xf9, 0x0e, 0x86, 0x39,
+  0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x0f,
+  0xef, 0x03, 0xb4, 0x0f, 0x4e, 0x3e, 0xa8, 0xfb, 0x60, 0x34, 0x21, 0x00,
+  0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88,
+  0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0xfb, 0xfb, 0xe0, 0xed,
+  0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x03, 0xfd,
+  0x00, 0xee, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60,
+  0x0b, 0xfd, 0x20, 0xee, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04,
+  0xc1, 0x60, 0x49, 0xfd, 0x00, 0xee, 0x83, 0x98, 0x0f, 0x02, 0xbe, 0x0f,
+  0xc2, 0x3e, 0xf0, 0xfb, 0x60, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6c, 0x96,
+  0x00, 0x5f, 0x86, 0x1b, 0x46, 0x32, 0x08, 0xfd, 0x00, 0x0c, 0x66, 0x19,
+  0xce, 0x05, 0x5f, 0x02, 0x1b, 0xf9, 0xa0, 0xe4, 0x83, 0xf8, 0x0c, 0x47,
+  0xa4, 0x64, 0x60, 0xf2, 0x01, 0xf1, 0xcd, 0x32, 0xa0, 0xcb, 0xba, 0x04,
+  0x76, 0xf2, 0x81, 0x4a, 0x06, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3,
+  0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4, 0xea,
+  0x07, 0x3a, 0xdc, 0x10, 0xa4, 0x7e, 0x00, 0x06, 0xb3, 0x0c, 0xe9, 0xa2,
+  0x2e, 0x81, 0x0d, 0x2f, 0x1f, 0xc0, 0x67, 0x96, 0xe0, 0x5d, 0xcc, 0xe5,
+  0x03, 0x22, 0x3e, 0xb3, 0x04, 0xef, 0x32, 0x1c, 0x41, 0x93, 0xc1, 0xcb,
+  0x07, 0xc2, 0x37, 0xcb, 0xc0, 0x2e, 0xef, 0x12, 0x58, 0x4d, 0x06, 0x30,
+  0x1f, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16,
+  0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xb6, 0x1f, 0xe8, 0x70, 0x43,
+  0x40, 0xfb, 0x01, 0x18, 0xcc, 0x32, 0xb4, 0x8b, 0xbb, 0x04, 0x86, 0xf3,
+  0xc1, 0x10, 0x9f, 0x59, 0x82, 0x77, 0x31, 0x62, 0xe7, 0x03, 0xf8, 0xcc,
+  0x12, 0xbc, 0xcb, 0x40, 0xcb, 0xa3, 0xa5, 0x0b, 0xa6, 0x2e, 0x44, 0xbb,
+  0x08, 0xee, 0xe2, 0xa7, 0xc2, 0xba, 0x5c, 0x30, 0x8c, 0xe9, 0x7c, 0xe0,
+  0xf3, 0x41, 0x7c, 0x86, 0x23, 0x54, 0xe3, 0xe7, 0x03, 0xe2, 0x9b, 0x65,
+  0x80, 0x97, 0x79, 0x09, 0x0c, 0xec, 0x83, 0xd5, 0x88, 0x8f, 0x05, 0x03,
   0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10,
-  0x9f, 0x22, 0x6c, 0x3b, 0xd0, 0xe1, 0x86, 0x80, 0xb6, 0x03, 0x30, 0x98,
-  0x65, 0xd8, 0x13, 0x3e, 0x09, 0x6c, 0xd0, 0xe9, 0x00, 0x3e, 0xb3, 0x04,
-  0xa1, 0x62, 0x39, 0x1d, 0x10, 0xf1, 0x99, 0x25, 0x08, 0x95, 0xe1, 0x08,
-  0x35, 0xd2, 0xe9, 0x40, 0xf8, 0x66, 0x19, 0xfc, 0x24, 0x54, 0x02, 0x5b,
-  0xa3, 0x9d, 0x0e, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0,
-  0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0xf0, 0x0e, 0x74,
-  0xb8, 0x21, 0xf8, 0xed, 0x00, 0x0c, 0x66, 0x19, 0xfe, 0x04, 0x54, 0x02,
-  0x1b, 0xeb, 0x60, 0x88, 0xcf, 0x2c, 0x41, 0xa8, 0x18, 0x61, 0xd6, 0x01,
-  0x7c, 0x66, 0x09, 0x42, 0x65, 0xa0, 0xe5, 0xd1, 0xf6, 0x04, 0xe3, 0x13,
-  0xe2, 0x4f, 0x04, 0x50, 0x51, 0xc7, 0xa0, 0x4f, 0x2e, 0x18, 0xc6, 0xca,
-  0x3a, 0x48, 0xeb, 0x20, 0x3e, 0xc3, 0x11, 0x6b, 0xa7, 0xd6, 0x01, 0xf1,
-  0xcd, 0x32, 0x88, 0x4a, 0xa9, 0x04, 0xb6, 0xd6, 0x01, 0xdb, 0xc5, 0xc7,
-  0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1,
-  0x22, 0x88, 0x4f, 0x11, 0xef, 0x1d, 0xe8, 0x70, 0x43, 0xd0, 0xde, 0x01,
-  0x18, 0xcc, 0x32, 0x8c, 0x0a, 0xa9, 0x04, 0x36, 0xcc, 0x75, 0x00, 0x9f,
-  0x59, 0x82, 0x54, 0x31, 0xb8, 0x0e, 0x88, 0xf8, 0xcc, 0x12, 0xa4, 0xca,
-  0x70, 0x84, 0xdd, 0xc5, 0x75, 0x20, 0x7c, 0xb3, 0x0c, 0xa6, 0x92, 0x2a,
-  0x81, 0xdd, 0x9d, 0x5c, 0x07, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3,
-  0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84, 0x7e,
-  0x07, 0x3a, 0xdc, 0x10, 0xe0, 0x77, 0x00, 0x06, 0xb3, 0x0c, 0xa7, 0x82,
-  0x2a, 0x81, 0xe9, 0x75, 0x30, 0xc4, 0x67, 0x96, 0x20, 0x55, 0x8c, 0xf8,
-  0xeb, 0x00, 0x3e, 0xb3, 0x04, 0xa9, 0x32, 0xd0, 0xf2, 0x68, 0xa3, 0x82,
-  0x91, 0x0a, 0x71, 0x2a, 0x02, 0xaa, 0x88, 0x7c, 0x50, 0x2a, 0x17, 0x0c,
-  0x73, 0xc1, 0x53, 0xb7, 0x3d, 0x75, 0x6b, 0x1d, 0x0c, 0x73, 0xa2, 0x1c,
-  0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x1e, 0x8a, 0x07, 0xf8, 0x1d, 0xdc, 0x76, 0x50, 0xe2, 0xc1, 0x68,
-  0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10,
-  0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xf6, 0xe2,
-  0xc1, 0x7f, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1,
-  0x06, 0xe3, 0x01, 0x88, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0xc1, 0x16, 0xe3, 0x41, 0x88, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x28,
-  0x00, 0x08, 0x82, 0xc1, 0x92, 0xe3, 0x01, 0x88, 0x07, 0xe1, 0x1d, 0x04,
-  0x2c, 0x1e, 0xc4, 0x77, 0xe0, 0xe2, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0,
-  0xd8, 0x2c, 0x81, 0xaa, 0x0c, 0xb4, 0x3c, 0xa6, 0x51, 0x27, 0xe4, 0x3b,
-  0xd0, 0x09, 0x4b, 0xdc, 0x89, 0x90, 0x2a, 0xe4, 0x3b, 0xe0, 0x89, 0xd9,
-  0x9e, 0x7c, 0x07, 0xf0, 0x99, 0x65, 0x58, 0x95, 0x56, 0xa1, 0xbd, 0xe1,
-  0x08, 0xdc, 0xa3, 0xef, 0x60, 0xf8, 0x2e, 0xf7, 0x86, 0x19, 0x6e, 0x08,
-  0x7e, 0x3b, 0x20, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0x28, 0xf0, 0x3b, 0x18,
-  0xbe, 0x0a, 0x04, 0xbd, 0x63, 0x98, 0xe1, 0x86, 0x40, 0xbc, 0x03, 0x32,
-  0xa8, 0x60, 0xd0, 0x59, 0x06, 0x56, 0x09, 0x97, 0xe0, 0x4c, 0x3b, 0x18,
-  0xe6, 0x7a, 0x39, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc,
-  0x30, 0x0f, 0x62, 0x3c, 0x80, 0xef, 0xc0, 0xc7, 0x83, 0xd1, 0x84, 0x00,
-  0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22,
-  0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x0d, 0xcd, 0x03, 0x1c,
-  0x0f, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x2d, 0xcd,
-  0x83, 0x1c, 0x0f, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
-  0x4d, 0xcd, 0x03, 0x1d, 0x0f, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10,
-  0x04, 0x83, 0x45, 0xce, 0x83, 0x1c, 0x0f, 0xf4, 0x3b, 0x08, 0xca, 0x3c,
-  0x50, 0xf1, 0xe0, 0xcc, 0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1, 0x59,
-  0x82, 0x70, 0x19, 0x6e, 0x60, 0xbf, 0x35, 0x0f, 0xc0, 0x60, 0x96, 0xc1,
-  0x55, 0x5e, 0x25, 0xa8, 0xfa, 0x0e, 0x78, 0x3c, 0x80, 0x0b, 0x9e, 0x1a,
-  0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x8a, 0xce, 0x83, 0x1e, 0x0f, 0xe4,
-  0xcf, 0xc5, 0x83, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xea, 0x3c,
-  0xe8, 0xf1, 0x20, 0x10, 0x2e, 0x18, 0xa6, 0xf0, 0x3b, 0x08, 0xf3, 0x00,
-  0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0x3c, 0x0f,
-  0xc4, 0x3c, 0x00, 0x83, 0x19, 0x0f, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
-  0x80, 0xd2, 0xf3, 0x40, 0xcc, 0x83, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x9e,
-  0xba, 0xe3, 0xa9, 0x0b, 0xef, 0x60, 0x98, 0xc3, 0xe7, 0x60, 0x98, 0x23,
-  0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf0, 0xfc,
-  0x3c, 0x70, 0xf3, 0xa0, 0xc5, 0x83, 0x3d, 0x0f, 0x46, 0x13, 0x02, 0x60,
+  0x9f, 0x22, 0xc8, 0x3f, 0xd0, 0xe1, 0x86, 0x40, 0xfc, 0x03, 0x30, 0x98,
+  0x65, 0x88, 0x17, 0x79, 0x09, 0x6c, 0x40, 0xfb, 0x00, 0x3e, 0xb3, 0x04,
+  0xf7, 0x62, 0x65, 0x1f, 0x10, 0xf1, 0x99, 0x25, 0xb8, 0x97, 0xe1, 0x88,
+  0xda, 0x30, 0xfb, 0x40, 0xf8, 0x66, 0x19, 0xe8, 0xe5, 0x5e, 0x02, 0xb3,
+  0x8d, 0xb3, 0x0f, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0,
+  0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0xf7, 0x0f, 0x74,
+  0xb8, 0x21, 0x68, 0xff, 0x00, 0x0c, 0x66, 0x19, 0xea, 0xc5, 0x5e, 0x02,
+  0x7b, 0xfb, 0x60, 0x88, 0xcf, 0x2c, 0xc1, 0xbd, 0x18, 0x41, 0xf7, 0x01,
+  0x7c, 0x66, 0x09, 0xee, 0x65, 0xa0, 0xe5, 0xd1, 0xe2, 0x05, 0x93, 0x17,
+  0xa2, 0x5e, 0x04, 0x7b, 0x01, 0x9d, 0x79, 0xb9, 0x60, 0x98, 0x0b, 0x9e,
+  0xba, 0xed, 0xa9, 0x03, 0xfb, 0x60, 0x98, 0xbb, 0xf3, 0x60, 0x98, 0x23,
+  0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf0, 0xfa,
+  0x3f, 0x68, 0xff, 0x80, 0xf5, 0x03, 0xfd, 0x0f, 0x46, 0x13, 0x02, 0x60,
   0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48,
-  0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0x52, 0x0f, 0xea, 0x3c,
-  0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x36, 0x53, 0x0f,
-  0xec, 0x3c, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6,
-  0x53, 0x0f, 0xee, 0x3c, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10,
-  0x0c, 0x96, 0x57, 0x0f, 0xec, 0x3c, 0xb8, 0xf1, 0x20, 0x10, 0xf5, 0xe0,
-  0xcc, 0x03, 0x52, 0x0f, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xc7, 0x66, 0x09,
-  0xc2, 0x65, 0xb8, 0x21, 0x05, 0x83, 0x53, 0x0f, 0xc0, 0x60, 0x96, 0x01,
-  0x56, 0xc2, 0x25, 0xb0, 0x14, 0x0f, 0x56, 0x3c, 0x88, 0xcf, 0x70, 0x84,
-  0x0b, 0x06, 0x2c, 0x1e, 0x10, 0xdf, 0x2c, 0x43, 0xac, 0xd0, 0x4a, 0x60,
-  0x2d, 0x1e, 0xbc, 0x60, 0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc,
-  0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xc4, 0x7a,
-  0xa0, 0xc3, 0x0d, 0xc1, 0xab, 0x07, 0x60, 0x30, 0xcb, 0x20, 0x2b, 0xb3,
-  0x12, 0xd8, 0x50, 0xe3, 0x01, 0x7c, 0x66, 0x09, 0x70, 0xc5, 0x68, 0x3c,
-  0x20, 0xe2, 0x33, 0x4b, 0x80, 0x2b, 0xc3, 0x11, 0x39, 0x18, 0xd4, 0x78,
-  0x20, 0x7c, 0xb3, 0x0c, 0xb5, 0x82, 0x2b, 0x81, 0xe9, 0x60, 0x60, 0xe3,
-  0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41,
-  0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xc1, 0xeb, 0x81, 0x0e, 0x37, 0x04,
-  0xba, 0x1e, 0x80, 0xc1, 0x2c, 0x83, 0xad, 0xdc, 0x4a, 0x60, 0x3e, 0x1e,
-  0x0c, 0xf1, 0x99, 0x25, 0xc0, 0x15, 0x23, 0xc2, 0x3c, 0x80, 0xcf, 0x2c,
-  0x01, 0xae, 0x0c, 0xb4, 0x3c, 0x9a, 0xac, 0x60, 0xb3, 0x42, 0xd8, 0x8a,
-  0x70, 0x2b, 0xa8, 0x28, 0xd0, 0xca, 0x05, 0xc3, 0x18, 0x98, 0x07, 0x64,
-  0x1e, 0xc4, 0x67, 0x38, 0x02, 0x16, 0xca, 0x3c, 0x20, 0xbe, 0x59, 0x86,
-  0x5c, 0xe1, 0x95, 0xc0, 0xcc, 0x3c, 0x88, 0x85, 0xf8, 0x58, 0x30, 0xd0,
-  0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1,
-  0x29, 0x42, 0xdd, 0x03, 0x1d, 0x6e, 0x08, 0xd0, 0x3d, 0x00, 0x83, 0x59,
-  0x06, 0x5d, 0xd9, 0x95, 0xc0, 0x06, 0x37, 0x0f, 0xe0, 0x33, 0x4b, 0x00,
-  0x2e, 0xb6, 0xe6, 0x01, 0x11, 0x9f, 0x59, 0x02, 0x70, 0x19, 0x8e, 0xd8,
-  0x05, 0x36, 0x0f, 0x84, 0x6f, 0x96, 0xa1, 0x57, 0xc0, 0x25, 0x30, 0x5e,
-  0x68, 0xf3, 0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e,
-  0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xa8, 0xf7, 0x40, 0x87,
-  0x1b, 0x82, 0x79, 0x0f, 0xc0, 0x60, 0x96, 0xc1, 0x57, 0x7e, 0x25, 0xb0,
-  0x3a, 0x0f, 0x86, 0xf8, 0xcc, 0x12, 0x80, 0x8b, 0x11, 0x7a, 0x1e, 0xc0,
-  0x67, 0x96, 0x00, 0x5c, 0x06, 0x5a, 0x1e, 0x4d, 0x57, 0xb0, 0x5d, 0x21,
-  0x7c, 0x45, 0xf8, 0x15, 0xd6, 0xe0, 0x95, 0x0b, 0x86, 0xb9, 0xe0, 0xa9,
-  0xdb, 0x9e, 0x3a, 0x33, 0x0f, 0x86, 0xb9, 0xbe, 0x0e, 0x86, 0x39, 0x62,
-  0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x6f, 0xe4,
-  0x83, 0x79, 0x0f, 0x64, 0x3d, 0x00, 0xf9, 0x60, 0x34, 0x21, 0x00, 0x46,
-  0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x53, 0xf9, 0x40, 0xdf, 0x83,
-  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x5b, 0xf9, 0x60,
-  0xdf, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x63,
-  0xf9, 0x80, 0xdf, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1,
-  0x60, 0xa1, 0xf9, 0x60, 0xdf, 0x03, 0x5e, 0x0f, 0x82, 0x93, 0x0f, 0xd8,
-  0x3d, 0x48, 0xf9, 0x60, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6c, 0x96, 0x20,
-  0x5c, 0x06, 0x5a, 0x1e, 0xd3, 0x60, 0x15, 0xdd, 0x1e, 0x56, 0x85, 0x25,
-  0x5c, 0x45, 0x00, 0x17, 0xdd, 0x1e, 0x5e, 0x65, 0x96, 0x41, 0x5c, 0xc8,
-  0xc5, 0x15, 0x83, 0xe1, 0x88, 0x59, 0x0c, 0xdc, 0x3d, 0x18, 0xbe, 0xa3,
-  0xc5, 0x60, 0x98, 0xe1, 0x86, 0x20, 0xd7, 0x03, 0x32, 0xa8, 0x21, 0xd0,
-  0xe1, 0x88, 0x7f, 0x90, 0xf7, 0x60, 0xf8, 0x2a, 0x10, 0xf4, 0x42, 0x62,
-  0x98, 0xe1, 0x86, 0x80, 0xd7, 0x03, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x86,
-  0x71, 0xc1, 0x97, 0xe0, 0x40, 0x3d, 0x18, 0xe6, 0x6e, 0x3b, 0x18, 0x66,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc, 0x9d, 0x0f, 0x56, 0x3e, 0x50,
-  0xf7, 0x00, 0xe7, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1,
-  0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x83, 0x4d, 0xec, 0x03, 0x99, 0x0f, 0x0e, 0x22, 0x18, 0x31,
-  0x40, 0x00, 0x10, 0x04, 0x83, 0x6d, 0xec, 0x83, 0x99, 0x0f, 0x18, 0x22,
-  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x8d, 0xec, 0x03, 0x9a, 0x0f,
-  0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x85, 0xed, 0x83,
-  0x99, 0x0f, 0xe8, 0x3d, 0x08, 0x7e, 0x3e, 0x20, 0xf9, 0x20, 0xec, 0x83,
-  0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x02, 0x7c, 0x19, 0x6e, 0x30,
-  0xc7, 0xa0, 0xec, 0x03, 0x30, 0x98, 0x65, 0x28, 0x17, 0x73, 0x09, 0xea,
-  0xdd, 0x03, 0x9b, 0x0f, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x80, 0x72, 0xfb, 0xe0, 0xe6, 0x83, 0x76, 0x0c, 0x50, 0x3e, 0x18,
-  0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xea, 0xed, 0x83, 0x9b, 0x0f, 0x02,
-  0xe1, 0x82, 0x61, 0x4a, 0xde, 0x83, 0x9d, 0x0f, 0xe0, 0x82, 0xa7, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x9a, 0xfb, 0x80, 0xe7, 0x03, 0x9d,
-  0x68, 0xf9, 0x60, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0xba, 0x0f,
-  0x78, 0x3e, 0x08, 0x84, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0x3b, 0x9e, 0xba,
-  0x5d, 0x0f, 0x86, 0x39, 0xf9, 0x0e, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86,
-  0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x0f, 0xef, 0x03, 0xb4, 0x0f,
-  0x4e, 0x3e, 0xa8, 0xfb, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60,
-  0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10,
-  0x00, 0x04, 0xc1, 0x60, 0xfb, 0xfb, 0xe0, 0xed, 0x83, 0x84, 0x08, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x03, 0xfd, 0x00, 0xee, 0x83, 0x84,
-  0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x0b, 0xfd, 0x20, 0xee,
-  0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x49, 0xfd,
-  0x00, 0xee, 0x83, 0x98, 0x0f, 0x02, 0xbe, 0x0f, 0xc2, 0x3e, 0xf0, 0xfb,
-  0x60, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6c, 0x96, 0x00, 0x5f, 0x86, 0x1b,
-  0x46, 0x32, 0x08, 0xfd, 0x00, 0x0c, 0x66, 0x19, 0xce, 0x05, 0x5f, 0x02,
-  0x1b, 0xf9, 0xa0, 0xe4, 0x83, 0xf8, 0x0c, 0x47, 0xa4, 0x64, 0x60, 0xf2,
-  0x01, 0xf1, 0xcd, 0x32, 0xa0, 0xcb, 0xba, 0x04, 0x76, 0xf2, 0x81, 0x4a,
-  0x06, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05,
-  0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4, 0xea, 0x07, 0x3a, 0xdc, 0x10,
-  0xa4, 0x7e, 0x00, 0x06, 0xb3, 0x0c, 0xe9, 0xa2, 0x2e, 0x81, 0x0d, 0x2f,
-  0x1f, 0xc0, 0x67, 0x96, 0xe0, 0x5d, 0xcc, 0xe5, 0x03, 0x22, 0x3e, 0xb3,
-  0x04, 0xef, 0x32, 0x1c, 0x41, 0x93, 0xc1, 0xcb, 0x07, 0xc2, 0x37, 0xcb,
-  0xc0, 0x2e, 0xef, 0x12, 0x58, 0x4d, 0x06, 0x30, 0x1f, 0xc4, 0xc7, 0x02,
-  0x87, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22,
-  0x88, 0x4f, 0x11, 0xb6, 0x1f, 0xe8, 0x70, 0x43, 0x40, 0xfb, 0x01, 0x18,
-  0xcc, 0x32, 0xb4, 0x8b, 0xbb, 0x04, 0x86, 0xf3, 0xc1, 0x10, 0x9f, 0x59,
-  0x82, 0x77, 0x31, 0x62, 0xe7, 0x03, 0xf8, 0xcc, 0x12, 0xbc, 0xcb, 0x40,
-  0xcb, 0xa3, 0xa5, 0x0b, 0xa6, 0x2e, 0x44, 0xbb, 0x08, 0xee, 0xe2, 0xa7,
-  0xc2, 0xba, 0x5c, 0x30, 0x8c, 0xe9, 0x7c, 0xe0, 0xf3, 0x41, 0x7c, 0x86,
-  0x23, 0x54, 0xe3, 0xe7, 0x03, 0xe2, 0x9b, 0x65, 0x80, 0x97, 0x79, 0x09,
-  0x0c, 0xec, 0x83, 0xd5, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6,
-  0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xc8, 0x3f,
-  0xd0, 0xe1, 0x86, 0x40, 0xfc, 0x03, 0x30, 0x98, 0x65, 0x88, 0x17, 0x79,
-  0x09, 0x6c, 0x40, 0xfb, 0x00, 0x3e, 0xb3, 0x04, 0xf7, 0x62, 0x65, 0x1f,
-  0x10, 0xf1, 0x99, 0x25, 0xb8, 0x97, 0xe1, 0x88, 0xda, 0x30, 0xfb, 0x40,
-  0xf8, 0x66, 0x19, 0xe8, 0xe5, 0x5e, 0x02, 0xb3, 0x8d, 0xb3, 0x0f, 0xe2,
-  0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9,
-  0x58, 0x11, 0xc4, 0xa7, 0x88, 0xf7, 0x0f, 0x74, 0xb8, 0x21, 0x68, 0xff,
-  0x00, 0x0c, 0x66, 0x19, 0xea, 0xc5, 0x5e, 0x02, 0x7b, 0xfb, 0x60, 0x88,
-  0xcf, 0x2c, 0xc1, 0xbd, 0x18, 0x41, 0xf7, 0x01, 0x7c, 0x66, 0x09, 0xee,
-  0x65, 0xa0, 0xe5, 0xd1, 0xe2, 0x05, 0x93, 0x17, 0xa2, 0x5e, 0x04, 0x7b,
-  0x01, 0x9d, 0x79, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xba, 0xed, 0xa9, 0x03,
-  0xfb, 0x60, 0x98, 0xbb, 0xf3, 0x60, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98,
-  0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf0, 0xfa, 0x3f, 0x68, 0xff, 0x80,
-  0xf5, 0x03, 0xfd, 0x0f, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46,
-  0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0x36, 0x12, 0x14, 0xe8, 0x3f, 0x48, 0x88, 0x60, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0x12, 0x14, 0xea, 0x3f, 0x48, 0x88,
-  0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x36, 0x13, 0x14, 0xec, 0x3f,
-  0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x16, 0x17, 0x14,
-  0xea, 0x3f, 0xb0, 0xfd, 0x20, 0x08, 0x41, 0xc1, 0xfc, 0x83, 0x11, 0x14,
-  0x46, 0x13, 0x02, 0xe0, 0x82, 0xc7, 0x66, 0x09, 0xf0, 0x65, 0xa0, 0xe5,
-  0x31, 0x8d, 0x71, 0x71, 0xfd, 0x41, 0x5c, 0x58, 0xa2, 0x5c, 0x84, 0x7b,
-  0x71, 0xfd, 0xc1, 0x5c, 0x66, 0x19, 0xf2, 0x65, 0x5f, 0x50, 0x33, 0x18,
-  0x8e, 0x98, 0x3d, 0xf4, 0x0f, 0x86, 0xef, 0x68, 0x6f, 0x98, 0xe1, 0x86,
-  0x60, 0xf6, 0x03, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0xfc, 0x60, 0xff,
-  0x60, 0xf8, 0x2a, 0x10, 0xf4, 0xf6, 0x63, 0x98, 0xe1, 0x86, 0xc0, 0xf6,
-  0x03, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x06, 0x7d, 0x79, 0x99, 0xe0, 0xf4,
-  0x3e, 0x18, 0xe6, 0x62, 0x3d, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10,
-  0x0c, 0xbc, 0x1a, 0x14, 0x4a, 0x50, 0x20, 0xff, 0x40, 0x06, 0x85, 0xd1,
-  0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20,
-  0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x8d, 0x07,
-  0x05, 0x16, 0x14, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
-  0xad, 0x07, 0x85, 0x16, 0x14, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0xcd, 0x07, 0x05, 0x17, 0x14, 0x24, 0x22, 0x18, 0x31, 0x50,
-  0x00, 0x10, 0x04, 0x83, 0xc5, 0x0c, 0x85, 0x16, 0x14, 0xdc, 0x3f, 0x08,
-  0x72, 0x50, 0xf0, 0xff, 0x60, 0x07, 0x85, 0xd1, 0x84, 0x00, 0xb8, 0xe0,
-  0xb1, 0x59, 0x82, 0x97, 0x19, 0x6e, 0x00, 0xcf, 0xe0, 0x07, 0x05, 0x30,
-  0x98, 0x65, 0xe0, 0x97, 0x7e, 0x09, 0x2a, 0xfd, 0x03, 0x18, 0x14, 0xe0,
-  0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x42, 0x43, 0x21,
-  0x06, 0x85, 0xf6, 0x13, 0x41, 0x61, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0xa8, 0x34, 0x14, 0x62, 0x50, 0x08, 0x84, 0x0b, 0x86, 0x29, 0xf6, 0x0f,
-  0x6a, 0x50, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03,
-  0xaa, 0x0d, 0x05, 0x1b, 0x14, 0x68, 0xe4, 0x04, 0x85, 0x11, 0x83, 0x03,
-  0x00, 0x41, 0x30, 0xa0, 0xdc, 0x50, 0xb0, 0x41, 0x21, 0x10, 0x2e, 0x18,
-  0xe6, 0x82, 0xa7, 0xee, 0x78, 0xea, 0x6a, 0x3f, 0x18, 0xe6, 0xd8, 0x3d,
-  0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10,
-  0x0c, 0x3c, 0x39, 0x14, 0xc4, 0x50, 0x08, 0x41, 0xe1, 0x0d, 0x85, 0xd1,
-  0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20,
-  0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x2d, 0x0f,
-  0x85, 0x34, 0x14, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
-  0x4d, 0x0f, 0x05, 0x35, 0x14, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0x6d, 0x0f, 0x85, 0x35, 0x14, 0x12, 0x22, 0x18, 0x31, 0x50,
-  0x00, 0x10, 0x04, 0x83, 0x65, 0x14, 0x05, 0x35, 0x14, 0x56, 0x50, 0x08,
-  0xec, 0x50, 0xd8, 0x41, 0x01, 0x0f, 0x85, 0xd1, 0x84, 0x00, 0xb8, 0xe0,
-  0xb1, 0x59, 0x82, 0x97, 0x19, 0x6e, 0xe8, 0xcf, 0x60, 0x0f, 0x05, 0x30,
-  0x98, 0x65, 0xf0, 0x97, 0x97, 0x09, 0xac, 0xff, 0x83, 0xff, 0x0f, 0xe2,
-  0x33, 0x1c, 0x91, 0x82, 0x01, 0x08, 0x0a, 0xc4, 0x37, 0xcb, 0xf0, 0x2f,
-  0x22, 0x13, 0x58, 0x08, 0x0a, 0x2a, 0x18, 0xc4, 0xc7, 0x82, 0x81, 0x3e,
-  0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f,
-  0x11, 0xa5, 0x28, 0xe8, 0x70, 0x43, 0x30, 0x8a, 0x02, 0x18, 0xcc, 0x32,
-  0x80, 0x4c, 0xc8, 0x04, 0x36, 0xa4, 0xa0, 0x00, 0x9f, 0x59, 0x02, 0x93,
-  0x31, 0x14, 0x14, 0x88, 0xf8, 0xcc, 0x12, 0x98, 0xcc, 0x70, 0x04, 0x0d,
-  0x06, 0x29, 0x28, 0x08, 0xdf, 0x2c, 0xc3, 0xc8, 0x98, 0x4c, 0x60, 0x35,
-  0x18, 0xa8, 0xa0, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05,
-  0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xc0, 0xa2, 0xa0,
-  0xc3, 0x0d, 0x81, 0x2b, 0x0a, 0x60, 0x30, 0xcb, 0x40, 0x32, 0x25, 0x13,
-  0x98, 0x0c, 0x0a, 0x43, 0x7c, 0x66, 0x09, 0x4c, 0xc6, 0x88, 0x1a, 0x14,
-  0xe0, 0x33, 0x4b, 0x60, 0x32, 0x03, 0x2d, 0x8f, 0x06, 0x32, 0x58, 0xc8,
-  0x10, 0x24, 0x23, 0x94, 0x8c, 0x1f, 0x0a, 0x22, 0x73, 0xc1, 0x30, 0x46,
-  0x83, 0x02, 0x0e, 0x0a, 0xf1, 0x19, 0x8e, 0x20, 0x95, 0x1c, 0x14, 0x88,
-  0x6f, 0x96, 0xe1, 0x64, 0x54, 0x26, 0x30, 0x1d, 0x14, 0x4a, 0x25, 0x3e,
-  0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xc0, 0x90, 0x8f,
-  0x15, 0x41, 0x7c, 0x8a, 0xf0, 0x45, 0x41, 0x87, 0x1b, 0x02, 0x5e, 0x14,
-  0xc0, 0x60, 0x96, 0x01, 0x65, 0x52, 0x26, 0xb0, 0x41, 0x0c, 0x05, 0xf8,
-  0xcc, 0x12, 0xb8, 0x8c, 0xfd, 0xa0, 0x40, 0xc4, 0x67, 0x96, 0xc0, 0x65,
-  0x86, 0x23, 0x5e, 0x05, 0x0c, 0x05, 0xe1, 0x9b, 0x65, 0x58, 0x19, 0x97,
-  0x09, 0x0c, 0x56, 0xc2, 0x50, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18,
-  0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xd2,
-  0x51, 0xd0, 0xe1, 0x86, 0xe0, 0x1c, 0x05, 0x30, 0x98, 0x65, 0x60, 0x99,
-  0x96, 0x09, 0x2c, 0x0d, 0x85, 0x21, 0x3e, 0xb3, 0x04, 0x2e, 0x63, 0x84,
-  0x1b, 0x0a, 0xf0, 0x99, 0x25, 0x70, 0x99, 0x81, 0x96, 0x47, 0x43, 0x19,
-  0x2c, 0x65, 0x08, 0x96, 0x11, 0x5a, 0x86, 0xae, 0x54, 0xe6, 0x82, 0x61,
-  0x2e, 0x78, 0xea, 0xb6, 0xa7, 0x4e, 0x07, 0x85, 0x61, 0x2e, 0xee, 0x83,
-  0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
-  0xc0, 0xbb, 0x47, 0xe1, 0x1c, 0x05, 0x53, 0x14, 0xe8, 0x51, 0x18, 0x4d,
-  0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62,
-  0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xfc, 0x51,
-  0x70, 0x47, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8,
-  0xfe, 0x51, 0x78, 0x47, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0xd8, 0x40, 0x52, 0x80, 0x47, 0x21, 0x21, 0x82, 0x11, 0x03, 0x05,
-  0x00, 0x41, 0x30, 0x58, 0x50, 0x52, 0x78, 0x47, 0x01, 0x16, 0x85, 0x60,
-  0x1f, 0x05, 0x70, 0x14, 0xfa, 0x51, 0x18, 0x4d, 0x08, 0x80, 0x0b, 0x1e,
-  0x9b, 0x25, 0x78, 0x99, 0x81, 0x96, 0xc7, 0x34, 0xf4, 0x45, 0x34, 0x89,
-  0x7c, 0x61, 0x09, 0x7e, 0x11, 0x5c, 0x46, 0x34, 0x89, 0x7e, 0x99, 0x65,
-  0x80, 0x19, 0x99, 0x11, 0xd5, 0x60, 0x38, 0x42, 0xf6, 0xc4, 0x51, 0x18,
-  0xbe, 0x9b, 0xbd, 0x61, 0x86, 0x1b, 0x82, 0x56, 0x14, 0xc8, 0xa0, 0x86,
-  0x40, 0x87, 0x23, 0xe6, 0xc5, 0x1c, 0x85, 0xe1, 0xab, 0x40, 0xd0, 0xab,
-  0x97, 0x61, 0x86, 0x1b, 0x02, 0x58, 0x14, 0xc8, 0xa0, 0x82, 0x41, 0x67,
-  0x19, 0x62, 0xc6, 0x6c, 0x82, 0xa3, 0x43, 0x61, 0x98, 0x5b, 0xfd, 0x60,
-  0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf0, 0x5e, 0x52, 0xf8, 0x47,
-  0xc1, 0x17, 0x05, 0x96, 0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
-  0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x38, 0x64, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0x36, 0x9b, 0x14, 0x4c, 0x52, 0x38, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0x9b, 0x14, 0x4e, 0x52, 0x60,
-  0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x36, 0x9c, 0x14, 0x50,
-  0x52, 0x90, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x16, 0xb0,
-  0x14, 0x4e, 0x52, 0x40, 0x47, 0x21, 0x98, 0x49, 0x01, 0x1f, 0x85, 0x9a,
-  0x14, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xc7, 0x66, 0x09, 0xcc, 0x66, 0xb8,
-  0x41, 0x57, 0x83, 0x9c, 0x14, 0xc0, 0x60, 0x96, 0x61, 0x66, 0x68, 0x26,
-  0xa8, 0x71, 0x14, 0x54, 0x52, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00,
-  0x10, 0x04, 0x03, 0x4a, 0x2c, 0x85, 0x95, 0x14, 0xd8, 0x8f, 0x1f, 0x85,
-  0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xc6, 0x52, 0x58, 0x49, 0x21,
-  0x10, 0x2e, 0x18, 0xa6, 0xcc, 0x51, 0x78, 0x49, 0x01, 0x2e, 0x78, 0x6a,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0xb3, 0x14, 0x60, 0x52, 0x70,
-  0x99, 0x90, 0x14, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x42, 0x4b,
-  0x01, 0x26, 0x85, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x9e, 0xba, 0xe3, 0xa9,
-  0x7b, 0x45, 0x61, 0x98, 0x33, 0xff, 0x60, 0x98, 0x23, 0x86, 0x39, 0x62,
-  0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf0, 0xd8, 0x52, 0xe0, 0x49,
-  0x61, 0x1f, 0x85, 0xb4, 0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
-  0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0xb6, 0xb9, 0x14, 0xc6, 0x52, 0x48, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x36, 0xba, 0x14, 0xc8, 0x52, 0x48,
-  0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0xba, 0x14, 0xca,
-  0x52, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x96, 0xbe,
-  0x14, 0xc8, 0x52, 0x28, 0x49, 0x21, 0x80, 0x4b, 0xa1, 0x26, 0x05, 0xb9,
-  0x14, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xc7, 0x66, 0x09, 0xcc, 0x66, 0xb8,
-  0xe1, 0x5e, 0x83, 0xba, 0x14, 0xc0, 0x60, 0x96, 0xa1, 0x66, 0xcc, 0x26,
-  0xb0, 0x7b, 0x14, 0xf2, 0x51, 0x88, 0xcf, 0x70, 0x04, 0x0a, 0x06, 0xfa,
-  0x28, 0x10, 0xdf, 0x2c, 0x83, 0xcd, 0xe4, 0x4c, 0x60, 0xfb, 0x28, 0xa4,
-  0x60, 0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59,
-  0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xfc, 0xa5, 0xa0, 0xc3, 0x0d,
-  0x41, 0x5f, 0x0a, 0x60, 0x30, 0xcb, 0x70, 0x33, 0x38, 0x13, 0xd8, 0x30,
-  0x92, 0x02, 0x7c, 0x66, 0x09, 0x7a, 0xc6, 0x44, 0x52, 0x20, 0xe2, 0x33,
-  0x4b, 0xd0, 0x33, 0xc3, 0x11, 0x33, 0x18, 0x8c, 0xa4, 0x20, 0x7c, 0xb3,
-  0x0c, 0x3a, 0xd3, 0x33, 0x81, 0xd1, 0x60, 0x40, 0x92, 0x42, 0x7c, 0x2c,
-  0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b,
-  0x82, 0xf8, 0x14, 0xa1, 0x9a, 0x82, 0x0e, 0x37, 0x04, 0xa8, 0x29, 0x80,
-  0xc1, 0x2c, 0xc3, 0xce, 0xf0, 0x4c, 0x60, 0x2c, 0x29, 0x0c, 0xf1, 0x99,
-  0x25, 0xe8, 0x19, 0x23, 0x5e, 0x52, 0x80, 0xcf, 0x2c, 0x41, 0xcf, 0x0c,
-  0xb4, 0x3c, 0xda, 0xcd, 0x60, 0x38, 0x43, 0xec, 0x8c, 0xc0, 0x33, 0x7c,
-  0x28, 0xe4, 0xcc, 0x05, 0xc3, 0x98, 0x4b, 0x0a, 0x32, 0x29, 0xc4, 0x67,
-  0x38, 0xc2, 0x6f, 0x66, 0x52, 0x20, 0xbe, 0x59, 0x06, 0x9f, 0x09, 0x9b,
-  0xc0, 0x68, 0x52, 0xf8, 0x9b, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61,
-  0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x02, 0x37,
-  0x05, 0x1d, 0x6e, 0x08, 0x6c, 0x53, 0x00, 0x83, 0x59, 0x86, 0x9f, 0x01,
-  0x9b, 0xc0, 0x06, 0x9e, 0x14, 0xe0, 0x33, 0x4b, 0x50, 0x36, 0x96, 0x93,
-  0x02, 0x11, 0x9f, 0x59, 0x82, 0xb2, 0x19, 0x8e, 0x48, 0x1d, 0x9d, 0x14,
-  0x84, 0x6f, 0x96, 0x41, 0x6c, 0xca, 0x26, 0x30, 0xd5, 0xd9, 0x49, 0x21,
+  0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x36, 0x12, 0x14, 0xe8, 0x3f,
+  0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0x12, 0x14,
+  0xea, 0x3f, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x36,
+  0x13, 0x14, 0xec, 0x3f, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10,
+  0x0c, 0x16, 0x17, 0x14, 0xea, 0x3f, 0xb0, 0xfd, 0x20, 0x08, 0x41, 0xc1,
+  0xfc, 0x83, 0x11, 0x14, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xc7, 0x66, 0x09,
+  0xf0, 0x65, 0xa0, 0xe5, 0x31, 0x8d, 0x71, 0x71, 0xfd, 0x41, 0x5c, 0x58,
+  0xa2, 0x5c, 0x84, 0x7b, 0x71, 0xfd, 0xc1, 0x5c, 0x66, 0x19, 0xf2, 0x65,
+  0x5f, 0x50, 0x33, 0x18, 0x8e, 0x98, 0x3d, 0xf4, 0x0f, 0x86, 0xef, 0x68,
+  0x6f, 0x98, 0xe1, 0x86, 0x60, 0xf6, 0x03, 0x32, 0xa8, 0x21, 0xd0, 0xe1,
+  0x88, 0xfc, 0x60, 0xff, 0x60, 0xf8, 0x2a, 0x10, 0xf4, 0xf6, 0x63, 0x98,
+  0xe1, 0x86, 0xc0, 0xf6, 0x03, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x06, 0x7d,
+  0x79, 0x99, 0xe0, 0xf4, 0x3e, 0x18, 0xe6, 0x62, 0x3d, 0x18, 0x66, 0xc4,
+  0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc, 0x1a, 0x14, 0x4a, 0x50, 0x20, 0xff,
+  0x40, 0x06, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84,
+  0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10,
+  0x04, 0x83, 0x8d, 0x07, 0x05, 0x16, 0x14, 0x0e, 0x22, 0x18, 0x31, 0x40,
+  0x00, 0x10, 0x04, 0x83, 0xad, 0x07, 0x85, 0x16, 0x14, 0x18, 0x22, 0x18,
+  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xcd, 0x07, 0x05, 0x17, 0x14, 0x24,
+  0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0xc5, 0x0c, 0x85, 0x16,
+  0x14, 0xdc, 0x3f, 0x08, 0x72, 0x50, 0xf0, 0xff, 0x60, 0x07, 0x85, 0xd1,
+  0x84, 0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x82, 0x97, 0x19, 0x6e, 0x00, 0xcf,
+  0xe0, 0x07, 0x05, 0x30, 0x98, 0x65, 0xe0, 0x97, 0x7e, 0x09, 0x2a, 0xfd,
+  0x03, 0x18, 0x14, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
+  0x80, 0x42, 0x43, 0x21, 0x06, 0x85, 0xf6, 0x13, 0x41, 0x61, 0xc4, 0xe0,
+  0x00, 0x40, 0x10, 0x0c, 0xa8, 0x34, 0x14, 0x62, 0x50, 0x08, 0x84, 0x0b,
+  0x86, 0x29, 0xf6, 0x0f, 0x6a, 0x50, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38,
+  0x00, 0x10, 0x04, 0x03, 0xaa, 0x0d, 0x05, 0x1b, 0x14, 0x68, 0xe4, 0x04,
+  0x85, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xdc, 0x50, 0xb0, 0x41,
+  0x21, 0x10, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0xee, 0x78, 0xea, 0x6a, 0x3f,
+  0x18, 0xe6, 0xd8, 0x3d, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4,
+  0xe0, 0x00, 0x40, 0x10, 0x0c, 0x3c, 0x39, 0x14, 0xc4, 0x50, 0x08, 0x41,
+  0xe1, 0x0d, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84,
+  0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10,
+  0x04, 0x83, 0x2d, 0x0f, 0x85, 0x34, 0x14, 0x12, 0x22, 0x18, 0x31, 0x40,
+  0x00, 0x10, 0x04, 0x83, 0x4d, 0x0f, 0x05, 0x35, 0x14, 0x12, 0x22, 0x18,
+  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x6d, 0x0f, 0x85, 0x35, 0x14, 0x12,
+  0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x65, 0x14, 0x05, 0x35,
+  0x14, 0x56, 0x50, 0x08, 0xec, 0x50, 0xd8, 0x41, 0x01, 0x0f, 0x85, 0xd1,
+  0x84, 0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x82, 0x97, 0x19, 0x6e, 0xe8, 0xcf,
+  0x60, 0x0f, 0x05, 0x30, 0x98, 0x65, 0xf0, 0x97, 0x97, 0x09, 0xac, 0xff,
+  0x83, 0xff, 0x0f, 0xe2, 0x33, 0x1c, 0x91, 0x82, 0x01, 0x08, 0x0a, 0xc4,
+  0x37, 0xcb, 0xf0, 0x2f, 0x22, 0x13, 0x58, 0x08, 0x0a, 0x2a, 0x18, 0xc4,
+  0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2,
+  0xb1, 0x22, 0x88, 0x4f, 0x11, 0xa5, 0x28, 0xe8, 0x70, 0x43, 0x30, 0x8a,
+  0x02, 0x18, 0xcc, 0x32, 0x80, 0x4c, 0xc8, 0x04, 0x36, 0xa4, 0xa0, 0x00,
+  0x9f, 0x59, 0x02, 0x93, 0x31, 0x14, 0x14, 0x88, 0xf8, 0xcc, 0x12, 0x98,
+  0xcc, 0x70, 0x04, 0x0d, 0x06, 0x29, 0x28, 0x08, 0xdf, 0x2c, 0xc3, 0xc8,
+  0x98, 0x4c, 0x60, 0x35, 0x18, 0xa8, 0xa0, 0x10, 0x1f, 0x0b, 0x1c, 0xfa,
+  0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e,
+  0x45, 0xc0, 0xa2, 0xa0, 0xc3, 0x0d, 0x81, 0x2b, 0x0a, 0x60, 0x30, 0xcb,
+  0x40, 0x32, 0x25, 0x13, 0x98, 0x0c, 0x0a, 0x43, 0x7c, 0x66, 0x09, 0x4c,
+  0xc6, 0x88, 0x1a, 0x14, 0xe0, 0x33, 0x4b, 0x60, 0x32, 0x03, 0x2d, 0x8f,
+  0x06, 0x32, 0x58, 0xc8, 0x10, 0x24, 0x23, 0x94, 0x8c, 0x1f, 0x0a, 0x22,
+  0x73, 0xc1, 0x30, 0x46, 0x83, 0x02, 0x0e, 0x0a, 0xf1, 0x19, 0x8e, 0x20,
+  0x95, 0x1c, 0x14, 0x88, 0x6f, 0x96, 0xe1, 0x64, 0x54, 0x26, 0x30, 0x1d,
+  0x14, 0x4a, 0x25, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e,
+  0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xf0, 0x45, 0x41, 0x87,
+  0x1b, 0x02, 0x5e, 0x14, 0xc0, 0x60, 0x96, 0x01, 0x65, 0x52, 0x26, 0xb0,
+  0x41, 0x0c, 0x05, 0xf8, 0xcc, 0x12, 0xb8, 0x8c, 0xfd, 0xa0, 0x40, 0xc4,
+  0x67, 0x96, 0xc0, 0x65, 0x86, 0x23, 0x5e, 0x05, 0x0c, 0x05, 0xe1, 0x9b,
+  0x65, 0x58, 0x19, 0x97, 0x09, 0x0c, 0x56, 0xc2, 0x50, 0x88, 0x8f, 0x05,
+  0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45,
+  0x10, 0x9f, 0x22, 0xd2, 0x51, 0xd0, 0xe1, 0x86, 0xe0, 0x1c, 0x05, 0x30,
+  0x98, 0x65, 0x60, 0x99, 0x96, 0x09, 0x2c, 0x0d, 0x85, 0x21, 0x3e, 0xb3,
+  0x04, 0x2e, 0x63, 0x84, 0x1b, 0x0a, 0xf0, 0x99, 0x25, 0x70, 0x99, 0x81,
+  0x96, 0x47, 0x43, 0x19, 0x2c, 0x65, 0x08, 0x96, 0x11, 0x5a, 0x86, 0xae,
+  0x54, 0xe6, 0x82, 0x61, 0x2e, 0x78, 0xea, 0xb6, 0xa7, 0x4e, 0x07, 0x85,
+  0x61, 0x2e, 0xee, 0x83, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c,
+  0x0e, 0x00, 0x04, 0xc1, 0xc0, 0xbb, 0x47, 0xe1, 0x1c, 0x05, 0x53, 0x14,
+  0xe8, 0x51, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18,
+  0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41,
+  0x30, 0xd8, 0xfc, 0x51, 0x70, 0x47, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04,
+  0x00, 0x41, 0x30, 0xd8, 0xfe, 0x51, 0x78, 0x47, 0x21, 0x21, 0x82, 0x11,
+  0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0x40, 0x52, 0x80, 0x47, 0x21, 0x21,
+  0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0x50, 0x52, 0x78, 0x47,
+  0x01, 0x16, 0x85, 0x60, 0x1f, 0x05, 0x70, 0x14, 0xfa, 0x51, 0x18, 0x4d,
+  0x08, 0x80, 0x0b, 0x1e, 0x9b, 0x25, 0x78, 0x99, 0x81, 0x96, 0xc7, 0x34,
+  0xf4, 0x45, 0x34, 0x89, 0x7c, 0x61, 0x09, 0x7e, 0x11, 0x5c, 0x46, 0x34,
+  0x89, 0x7e, 0x99, 0x65, 0x80, 0x19, 0x99, 0x11, 0xd5, 0x60, 0x38, 0x42,
+  0xf6, 0xc4, 0x51, 0x18, 0xbe, 0x9b, 0xbd, 0x61, 0x86, 0x1b, 0x82, 0x56,
+  0x14, 0xc8, 0xa0, 0x86, 0x40, 0x87, 0x23, 0xe6, 0xc5, 0x1c, 0x85, 0xe1,
+  0xab, 0x40, 0xd0, 0xab, 0x97, 0x61, 0x86, 0x1b, 0x02, 0x58, 0x14, 0xc8,
+  0xa0, 0x82, 0x41, 0x67, 0x19, 0x62, 0xc6, 0x6c, 0x82, 0xa3, 0x43, 0x61,
+  0x98, 0x5b, 0xfd, 0x60, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf0,
+  0x5e, 0x52, 0xf8, 0x47, 0xc1, 0x17, 0x05, 0x96, 0x14, 0x46, 0x13, 0x02,
+  0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a,
+  0x38, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x36, 0x9b, 0x14, 0x4c,
+  0x52, 0x38, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0x9b,
+  0x14, 0x4e, 0x52, 0x60, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
+  0x36, 0x9c, 0x14, 0x50, 0x52, 0x90, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40,
+  0x10, 0x0c, 0x16, 0xb0, 0x14, 0x4e, 0x52, 0x40, 0x47, 0x21, 0x98, 0x49,
+  0x01, 0x1f, 0x85, 0x9a, 0x14, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xc7, 0x66,
+  0x09, 0xcc, 0x66, 0xb8, 0x41, 0x57, 0x83, 0x9c, 0x14, 0xc0, 0x60, 0x96,
+  0x61, 0x66, 0x68, 0x26, 0xa8, 0x71, 0x14, 0x54, 0x52, 0x80, 0x0b, 0x9e,
+  0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x4a, 0x2c, 0x85, 0x95, 0x14,
+  0xd8, 0x8f, 0x1f, 0x85, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xc6,
+  0x52, 0x58, 0x49, 0x21, 0x10, 0x2e, 0x18, 0xa6, 0xcc, 0x51, 0x78, 0x49,
+  0x01, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0xb3,
+  0x14, 0x60, 0x52, 0x70, 0x99, 0x90, 0x14, 0x46, 0x0c, 0x0e, 0x00, 0x04,
+  0xc1, 0x80, 0x42, 0x4b, 0x01, 0x26, 0x85, 0x40, 0xb8, 0x60, 0x98, 0x0b,
+  0x9e, 0xba, 0xe3, 0xa9, 0x7b, 0x45, 0x61, 0x98, 0x33, 0xff, 0x60, 0x98,
+  0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf0,
+  0xd8, 0x52, 0xe0, 0x49, 0x61, 0x1f, 0x85, 0xb4, 0x14, 0x46, 0x13, 0x02,
+  0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a,
+  0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0xb9, 0x14, 0xc6,
+  0x52, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x36, 0xba,
+  0x14, 0xc8, 0x52, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
+  0xb6, 0xba, 0x14, 0xca, 0x52, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40,
+  0x10, 0x0c, 0x96, 0xbe, 0x14, 0xc8, 0x52, 0x28, 0x49, 0x21, 0x80, 0x4b,
+  0xa1, 0x26, 0x05, 0xb9, 0x14, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xc7, 0x66,
+  0x09, 0xcc, 0x66, 0xb8, 0xe1, 0x5e, 0x83, 0xba, 0x14, 0xc0, 0x60, 0x96,
+  0xa1, 0x66, 0xcc, 0x26, 0xb0, 0x7b, 0x14, 0xf2, 0x51, 0x88, 0xcf, 0x70,
+  0x04, 0x0a, 0x06, 0xfa, 0x28, 0x10, 0xdf, 0x2c, 0x83, 0xcd, 0xe4, 0x4c,
+  0x60, 0xfb, 0x28, 0xa4, 0x60, 0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30,
+  0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xfc,
+  0xa5, 0xa0, 0xc3, 0x0d, 0x41, 0x5f, 0x0a, 0x60, 0x30, 0xcb, 0x70, 0x33,
+  0x38, 0x13, 0xd8, 0x30, 0x92, 0x02, 0x7c, 0x66, 0x09, 0x7a, 0xc6, 0x44,
+  0x52, 0x20, 0xe2, 0x33, 0x4b, 0xd0, 0x33, 0xc3, 0x11, 0x33, 0x18, 0x8c,
+  0xa4, 0x20, 0x7c, 0xb3, 0x0c, 0x3a, 0xd3, 0x33, 0x81, 0xd1, 0x60, 0x40,
+  0x92, 0x42, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65,
+  0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xa1, 0x9a, 0x82, 0x0e, 0x37,
+  0x04, 0xa8, 0x29, 0x80, 0xc1, 0x2c, 0xc3, 0xce, 0xf0, 0x4c, 0x60, 0x2c,
+  0x29, 0x0c, 0xf1, 0x99, 0x25, 0xe8, 0x19, 0x23, 0x5e, 0x52, 0x80, 0xcf,
+  0x2c, 0x41, 0xcf, 0x0c, 0xb4, 0x3c, 0xda, 0xcd, 0x60, 0x38, 0x43, 0xec,
+  0x8c, 0xc0, 0x33, 0x7c, 0x28, 0xe4, 0xcc, 0x05, 0xc3, 0x98, 0x4b, 0x0a,
+  0x32, 0x29, 0xc4, 0x67, 0x38, 0xc2, 0x6f, 0x66, 0x52, 0x20, 0xbe, 0x59,
+  0x06, 0x9f, 0x09, 0x9b, 0xc0, 0x68, 0x52, 0xf8, 0x9b, 0xf8, 0x58, 0x30,
+  0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04,
+  0xf1, 0x29, 0x02, 0x37, 0x05, 0x1d, 0x6e, 0x08, 0x6c, 0x53, 0x00, 0x83,
+  0x59, 0x86, 0x9f, 0x01, 0x9b, 0xc0, 0x06, 0x9e, 0x14, 0xe0, 0x33, 0x4b,
+  0x50, 0x36, 0x96, 0x93, 0x02, 0x11, 0x9f, 0x59, 0x82, 0xb2, 0x19, 0x8e,
+  0x48, 0x1d, 0x9d, 0x14, 0x84, 0x6f, 0x96, 0x41, 0x6c, 0xca, 0x26, 0x30,
+  0xd5, 0xd9, 0x49, 0x21, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b,
+  0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x18, 0x4f, 0x41,
+  0x87, 0x1b, 0x82, 0xf0, 0x14, 0xc0, 0x60, 0x96, 0x61, 0x6c, 0xc8, 0x26,
+  0xb0, 0xb1, 0x14, 0x86, 0xf8, 0xcc, 0x12, 0x94, 0x8d, 0x11, 0x68, 0x29,
+  0xc0, 0x67, 0x96, 0xa0, 0x6c, 0x06, 0x5a, 0x1e, 0xed, 0x67, 0x30, 0xb0,
+  0x21, 0xc6, 0x46, 0x20, 0x1b, 0xb4, 0x0b, 0x9b, 0x0b, 0x86, 0xb9, 0xe0,
+  0xa9, 0xdb, 0x9e, 0x3a, 0x9a, 0x14, 0x86, 0xb9, 0x35, 0x14, 0x86, 0x39,
+  0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x2f,
+  0x3e, 0x85, 0xf0, 0x14, 0x40, 0x53, 0x70, 0x4f, 0x61, 0x34, 0x21, 0x00,
+  0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88,
+  0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0xc3, 0x4f, 0x01, 0x3d,
+  0x85, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0xcb, 0x4f,
+  0x21, 0x3d, 0x85, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60,
+  0xd3, 0x4f, 0x41, 0x3d, 0x85, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04,
+  0xc1, 0x60, 0x11, 0x51, 0x21, 0x3d, 0x05, 0xd5, 0x14, 0x82, 0xfa, 0x14,
+  0x74, 0x53, 0xb8, 0x4f, 0x61, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6c, 0x96,
+  0xc0, 0x6c, 0x06, 0x5a, 0x1e, 0xd3, 0x88, 0x19, 0xbb, 0x25, 0x60, 0x86,
+  0x25, 0x66, 0x46, 0x28, 0x1b, 0xbb, 0x25, 0x68, 0xc6, 0xfe, 0x36, 0x88,
+  0x4d, 0x01, 0x3e, 0xb3, 0x0c, 0x67, 0x93, 0x36, 0x7d, 0x1b, 0x0c, 0x47,
+  0x84, 0x6e, 0xd0, 0x9b, 0xc2, 0xf0, 0x9d, 0xe8, 0x06, 0xc3, 0x0c, 0x37,
+  0x04, 0xa8, 0x29, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0x14, 0xe1, 0x29,
+  0x0c, 0x5f, 0x05, 0x82, 0xde, 0x31, 0xcc, 0x70, 0x43, 0xb0, 0x9a, 0x02,
+  0x19, 0x54, 0x30, 0xe8, 0x2c, 0x03, 0xda, 0xf4, 0x4d, 0x70, 0x6f, 0x29,
+  0x0c, 0x73, 0xa6, 0x28, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
+  0x9e, 0x8a, 0x0a, 0xfa, 0x29, 0xe4, 0xa6, 0x70, 0xa2, 0xc2, 0x68, 0x42,
+  0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43,
+  0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x16, 0xa3, 0x42,
+  0x88, 0x0a, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x26,
+  0xa3, 0x82, 0x88, 0x0a, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
+  0xc1, 0x36, 0xa3, 0xc2, 0x88, 0x0a, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00,
+  0x08, 0x82, 0xc1, 0xb2, 0xa3, 0x82, 0x88, 0x0a, 0xe3, 0x29, 0x04, 0x2e,
+  0x2a, 0xcc, 0xa7, 0x00, 0xa3, 0xc2, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd8,
+  0x2c, 0x41, 0xdf, 0x0c, 0x37, 0xd4, 0x6e, 0x40, 0xa3, 0x02, 0x18, 0xcc,
+  0x32, 0xa8, 0xcd, 0xda, 0x04, 0xe5, 0x9b, 0x42, 0x89, 0x0a, 0x70, 0xc1,
+  0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xf5, 0xa8, 0x60, 0xa2,
+  0xc2, 0xee, 0x06, 0xf7, 0x29, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01,
+  0xe5, 0xa3, 0x82, 0x89, 0x0a, 0x81, 0x70, 0xc1, 0x30, 0x15, 0x9e, 0x82,
+  0x8a, 0x0a, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40,
+  0x89, 0xa9, 0xb0, 0xa2, 0x02, 0x18, 0xf0, 0xa7, 0x30, 0x62, 0x70, 0x00,
+  0x20, 0x08, 0x06, 0xd4, 0x98, 0x0a, 0x2b, 0x2a, 0x04, 0xc2, 0x05, 0xc3,
+  0x5c, 0xf0, 0xd4, 0x1d, 0x4f, 0x9d, 0x6a, 0x0a, 0xc3, 0x5c, 0x38, 0x0a,
+  0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82,
+  0x81, 0x77, 0xa6, 0xc2, 0x8d, 0x0a, 0xf6, 0x29, 0x90, 0xa9, 0x30, 0x9a,
+  0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4,
+  0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xb9, 0xa9,
+  0xe0, 0xa3, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0,
+  0xbd, 0xa9, 0xf0, 0xa3, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
+  0x60, 0xb0, 0xc1, 0xa9, 0x00, 0xa6, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a,
+  0x00, 0x82, 0x60, 0xb0, 0xe0, 0xa9, 0xf0, 0xa3, 0x02, 0x88, 0x0a, 0xc1,
+  0x9a, 0x0a, 0x30, 0x2a, 0xb4, 0xa9, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c,
+  0x36, 0x4b, 0xd0, 0x37, 0xc3, 0x0d, 0xf2, 0x1b, 0xc0, 0xa9, 0x00, 0x06,
+  0xb3, 0x0c, 0x6c, 0xd3, 0x37, 0x81, 0xc9, 0xa7, 0x40, 0x9f, 0x42, 0x7c,
+  0x86, 0x23, 0xee, 0x37, 0xa8, 0x4f, 0x81, 0xf8, 0x66, 0x19, 0xda, 0x06,
+  0x6e, 0x02, 0xb3, 0x4f, 0x01, 0x7f, 0x83, 0xf8, 0x58, 0x30, 0xd0, 0xe7,
+  0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29,
+  0x42, 0x4f, 0x05, 0x1d, 0x6e, 0x08, 0xf0, 0x54, 0x00, 0x83, 0x59, 0x06,
+  0xb7, 0x79, 0x9b, 0xc0, 0x06, 0xff, 0x14, 0xe0, 0x33, 0x4b, 0x40, 0x37,
+  0xd6, 0x9f, 0x02, 0x11, 0x9f, 0x59, 0x02, 0xba, 0x19, 0x8e, 0x10, 0xe1,
+  0xc0, 0x3f, 0x05, 0xe1, 0x9b, 0x65, 0x88, 0x1b, 0xba, 0x09, 0x6c, 0x84,
+  0x83, 0xff, 0x14, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0,
+  0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x52, 0x15, 0x74,
+  0xb8, 0x21, 0x18, 0x55, 0x01, 0x0c, 0x66, 0x19, 0xe4, 0x66, 0x6e, 0x02,
+  0x3b, 0x51, 0x61, 0x88, 0xcf, 0x2c, 0x01, 0xdd, 0x18, 0xa1, 0xa2, 0x02,
+  0x7c, 0x66, 0x09, 0xe8, 0x66, 0xa0, 0xe5, 0xd1, 0xdc, 0x06, 0x7b, 0x1b,
+  0x42, 0x6e, 0x84, 0xb9, 0xd1, 0xc1, 0x01, 0x6e, 0x2e, 0x18, 0xc6, 0x52,
+  0x54, 0x68, 0x51, 0x21, 0x3e, 0xc3, 0x11, 0xb0, 0xe0, 0xa2, 0x02, 0xf1,
+  0xcd, 0x32, 0xd4, 0x0d, 0xde, 0x04, 0xf6, 0xa2, 0x42, 0x2c, 0xc4, 0xc7,
+  0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1,
+  0x22, 0x88, 0x4f, 0x11, 0xb3, 0x2a, 0xe8, 0x70, 0x43, 0x10, 0xab, 0x02,
+  0x18, 0xcc, 0x32, 0xd8, 0xcd, 0xdd, 0x04, 0x36, 0xdc, 0xa8, 0x00, 0x9f,
+  0x59, 0x02, 0xbe, 0x31, 0x1a, 0x15, 0x88, 0xf8, 0xcc, 0x12, 0xf0, 0xcd,
+  0x70, 0xc4, 0x2e, 0xd4, 0xa8, 0x20, 0x7c, 0xb3, 0x0c, 0x79, 0xc3, 0x37,
+  0x81, 0xf1, 0x82, 0x8d, 0x0a, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3,
+  0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84, 0xaf,
+  0x0a, 0x3a, 0xdc, 0x10, 0xf0, 0xaa, 0x00, 0x06, 0xb3, 0x0c, 0x7a, 0xb3,
+  0x37, 0x81, 0xf9, 0xa8, 0x30, 0xc4, 0x67, 0x96, 0x80, 0x6f, 0x8c, 0x18,
+  0x53, 0x01, 0x3e, 0xb3, 0x04, 0x7c, 0x33, 0xd0, 0xf2, 0x68, 0x76, 0x83,
+  0xdd, 0x0d, 0xa1, 0x37, 0xc2, 0xde, 0xb0, 0x06, 0xde, 0x5c, 0x30, 0xcc,
+  0x05, 0x4f, 0xdd, 0xf6, 0xd4, 0xbd, 0xa8, 0x30, 0xcc, 0x99, 0xa5, 0x30,
+  0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
+  0x78, 0xec, 0x2a, 0xf0, 0xaa, 0xb0, 0xa7, 0x42, 0xba, 0x0a, 0xa3, 0x09,
+  0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c,
+  0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdb, 0xbc, 0x0a,
+  0xe3, 0x2a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x1b,
+  0xbd, 0x0a, 0xe4, 0x2a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
+  0x06, 0x5b, 0xbd, 0x0a, 0xe5, 0x2a, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00,
+  0x20, 0x08, 0x06, 0x4b, 0xbf, 0x0a, 0xe4, 0x2a, 0x94, 0xaa, 0x10, 0xc0,
+  0xab, 0x50, 0xab, 0x82, 0xbc, 0x0a, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63,
+  0xb3, 0x04, 0x7d, 0x33, 0xd0, 0xf2, 0x98, 0x06, 0xda, 0xb0, 0x35, 0x71,
+  0x36, 0x2c, 0xa1, 0x36, 0x02, 0xdf, 0xb0, 0x35, 0xb1, 0x36, 0xb3, 0x0c,
+  0x7e, 0x03, 0x3a, 0x77, 0x1c, 0x0c, 0x47, 0xf0, 0x71, 0x70, 0xab, 0xc2,
+  0xf0, 0x5d, 0x1f, 0x07, 0xc3, 0x0c, 0x37, 0x04, 0xa2, 0x2a, 0x90, 0x41,
+  0x0d, 0x81, 0x0e, 0x47, 0xfc, 0xc3, 0xae, 0x0a, 0xc3, 0x57, 0x81, 0xa0,
+  0x17, 0x12, 0xc3, 0x0c, 0x37, 0x04, 0xa5, 0x2a, 0x90, 0x41, 0x05, 0x83,
+  0xce, 0x32, 0xfc, 0x0d, 0xed, 0x04, 0x97, 0xa6, 0xc2, 0x30, 0x07, 0x9a,
+  0xc2, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0, 0x91, 0xac, 0x40,
+  0xaf, 0xc2, 0xac, 0x0a, 0x21, 0x2b, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82,
+  0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88,
+  0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x2b, 0x2b, 0xec, 0xab, 0x70, 0x10,
+  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x2c, 0x2b, 0xf0, 0xab,
+  0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x2d, 0x2b,
+  0xf4, 0xab, 0x20, 0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c,
+  0x35, 0x2b, 0xf0, 0xab, 0xd0, 0xab, 0x42, 0x80, 0xb2, 0x42, 0xbb, 0x0a,
+  0x2a, 0x2b, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x8f, 0xcd, 0x12, 0xd0, 0xce,
+  0x70, 0xc3, 0x2b, 0x07, 0x2e, 0x2b, 0x80, 0xc1, 0x2c, 0x43, 0xe8, 0x88,
+  0x4e, 0x50, 0xb8, 0x2a, 0xfc, 0xab, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70,
+  0x00, 0x20, 0x08, 0x06, 0xd4, 0xcd, 0x0a, 0x20, 0x2b, 0xd8, 0x72, 0x10,
+  0xaf, 0xc2, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0x38, 0x2b, 0x80,
+  0xac, 0x10, 0x08, 0x17, 0x0c, 0x53, 0xbb, 0x2a, 0x90, 0xac, 0x00, 0x17,
+  0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x14, 0xcf, 0x0a, 0x25,
+  0x2b, 0xe8, 0x84, 0xbd, 0x0a, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40,
+  0xf5, 0xac, 0x50, 0xb2, 0x42, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd,
+  0xf1, 0xd4, 0x91, 0xaa, 0x30, 0xcc, 0xed, 0xa6, 0x30, 0xcc, 0x11, 0xc3,
+  0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x78, 0x61, 0x2b,
+  0xc4, 0xac, 0x00, 0xaf, 0x82, 0xcf, 0x0a, 0xa3, 0x09, 0x01, 0x30, 0x9a,
+  0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32,
+  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x1b, 0xda, 0x0a, 0x38, 0x2b, 0x24,
+  0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5b, 0xda, 0x0a, 0x39,
+  0x2b, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x9b, 0xda,
+  0x0a, 0x3a, 0x2b, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06,
+  0x8b, 0xdc, 0x0a, 0x39, 0x2b, 0xe8, 0xab, 0x10, 0x94, 0xad, 0xa0, 0xb2,
+  0xc2, 0xd9, 0x0a, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3, 0x04, 0xb4,
+  0x33, 0xdc, 0xc0, 0xce, 0x81, 0xda, 0x0a, 0x60, 0x30, 0xcb, 0x30, 0x3a,
+  0xb4, 0x13, 0x18, 0xbb, 0x0a, 0xee, 0x2a, 0xc4, 0x67, 0x38, 0x42, 0x9e,
+  0x83, 0x77, 0x15, 0x88, 0x6f, 0x96, 0x81, 0x74, 0x4e, 0x27, 0x30, 0x78,
+  0x15, 0xe6, 0x39, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82,
+  0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xe8, 0x56, 0xd0,
+  0xe1, 0x86, 0x40, 0x6e, 0x05, 0x30, 0x98, 0x65, 0x28, 0x1d, 0xd3, 0x09,
+  0x6c, 0xc0, 0x57, 0x01, 0x3e, 0xb3, 0x04, 0xab, 0x63, 0xf7, 0x2a, 0x10,
+  0xf1, 0x99, 0x25, 0x58, 0x9d, 0xe1, 0x88, 0x7e, 0x0e, 0xf0, 0x55, 0x10,
+  0xbe, 0x59, 0x06, 0xd4, 0x59, 0x9d, 0xc0, 0xfc, 0x39, 0xc8, 0x57, 0x21,
   0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92,
-  0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x18, 0x4f, 0x41, 0x87, 0x1b, 0x82, 0xf0,
-  0x14, 0xc0, 0x60, 0x96, 0x61, 0x6c, 0xc8, 0x26, 0xb0, 0xb1, 0x14, 0x86,
-  0xf8, 0xcc, 0x12, 0x94, 0x8d, 0x11, 0x68, 0x29, 0xc0, 0x67, 0x96, 0xa0,
-  0x6c, 0x06, 0x5a, 0x1e, 0xed, 0x67, 0x30, 0xb0, 0x21, 0xc6, 0x46, 0x20,
-  0x1b, 0xb4, 0x0b, 0x9b, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e, 0x3a,
-  0x9a, 0x14, 0x86, 0xb9, 0x35, 0x14, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86,
-  0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x2f, 0x3e, 0x85, 0xf0, 0x14,
-  0x40, 0x53, 0x70, 0x4f, 0x61, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60,
-  0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10,
-  0x00, 0x04, 0xc1, 0x60, 0xc3, 0x4f, 0x01, 0x3d, 0x85, 0x84, 0x08, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0xcb, 0x4f, 0x21, 0x3d, 0x85, 0x84,
-  0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0xd3, 0x4f, 0x41, 0x3d,
-  0x85, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x11, 0x51,
-  0x21, 0x3d, 0x05, 0xd5, 0x14, 0x82, 0xfa, 0x14, 0x74, 0x53, 0xb8, 0x4f,
-  0x61, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6c, 0x96, 0xc0, 0x6c, 0x06, 0x5a,
-  0x1e, 0xd3, 0x88, 0x19, 0xbb, 0x25, 0x60, 0x86, 0x25, 0x66, 0x46, 0x28,
-  0x1b, 0xbb, 0x25, 0x68, 0xc6, 0xfe, 0x36, 0x88, 0x4d, 0x01, 0x3e, 0xb3,
-  0x0c, 0x67, 0x93, 0x36, 0x7d, 0x1b, 0x0c, 0x47, 0x84, 0x6e, 0xd0, 0x9b,
-  0xc2, 0xf0, 0x9d, 0xe8, 0x06, 0xc3, 0x0c, 0x37, 0x04, 0xa8, 0x29, 0x90,
-  0x41, 0x0d, 0x81, 0x0e, 0x47, 0x14, 0xe1, 0x29, 0x0c, 0x5f, 0x05, 0x82,
-  0xde, 0x31, 0xcc, 0x70, 0x43, 0xb0, 0x9a, 0x02, 0x19, 0x54, 0x30, 0xe8,
-  0x2c, 0x03, 0xda, 0xf4, 0x4d, 0x70, 0x6f, 0x29, 0x0c, 0x73, 0xa6, 0x28,
-  0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x9e, 0x8a, 0x0a, 0xfa,
-  0x29, 0xe4, 0xa6, 0x70, 0xa2, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08,
-  0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0xc1, 0x16, 0xa3, 0x42, 0x88, 0x0a, 0x07, 0x11,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x26, 0xa3, 0x82, 0x88, 0x0a,
-  0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x36, 0xa3, 0xc2,
-  0x88, 0x0a, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0xb2,
-  0xa3, 0x82, 0x88, 0x0a, 0xe3, 0x29, 0x04, 0x2e, 0x2a, 0xcc, 0xa7, 0x00,
-  0xa3, 0xc2, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd8, 0x2c, 0x41, 0xdf, 0x0c,
-  0x37, 0xd4, 0x6e, 0x40, 0xa3, 0x02, 0x18, 0xcc, 0x32, 0xa8, 0xcd, 0xda,
-  0x04, 0xe5, 0x9b, 0x42, 0x89, 0x0a, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07,
-  0x00, 0x82, 0x60, 0x40, 0xf5, 0xa8, 0x60, 0xa2, 0xc2, 0xee, 0x06, 0xf7,
-  0x29, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0xe5, 0xa3, 0x82, 0x89,
-  0x0a, 0x81, 0x70, 0xc1, 0x30, 0x15, 0x9e, 0x82, 0x8a, 0x0a, 0x70, 0xc1,
-  0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0x89, 0xa9, 0xb0, 0xa2,
-  0x02, 0x18, 0xf0, 0xa7, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xd4,
-  0x98, 0x0a, 0x2b, 0x2a, 0x04, 0xc2, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x1d,
-  0x4f, 0x9d, 0x6a, 0x0a, 0xc3, 0x5c, 0x38, 0x0a, 0xc3, 0x1c, 0x31, 0xcc,
-  0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x77, 0xa6, 0xc2,
-  0x8d, 0x0a, 0xf6, 0x29, 0x90, 0xa9, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09,
+  0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xf8, 0x5b, 0x41, 0x87, 0x1b, 0x82, 0xbe,
+  0x15, 0xc0, 0x60, 0x96, 0x21, 0x75, 0x54, 0x27, 0xb0, 0x90, 0x15, 0x86,
+  0xf8, 0xcc, 0x12, 0xac, 0x8e, 0x11, 0x24, 0x2b, 0xc0, 0x67, 0x96, 0x60,
+  0x75, 0x06, 0x5a, 0x1e, 0xad, 0x74, 0x30, 0xd3, 0x21, 0x52, 0x47, 0x50,
+  0x1d, 0x18, 0x1d, 0x4e, 0xe7, 0x82, 0x61, 0x6c, 0x64, 0x85, 0x93, 0x15,
+  0xe2, 0x33, 0x1c, 0xa1, 0x1a, 0x28, 0x2b, 0x10, 0xdf, 0x2c, 0x03, 0xeb,
+  0xbc, 0x4e, 0x60, 0x29, 0x2b, 0xac, 0x46, 0x7c, 0x2c, 0x18, 0xe8, 0x73,
+  0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14,
+  0xd1, 0xba, 0x82, 0x0e, 0x37, 0x04, 0xab, 0x2b, 0x80, 0xc1, 0x2c, 0x43,
+  0xeb, 0xb8, 0x4e, 0x60, 0x43, 0xcc, 0x0a, 0xf0, 0x99, 0x25, 0x98, 0x1d,
+  0x73, 0x59, 0x81, 0x88, 0xcf, 0x2c, 0xc1, 0xec, 0x0c, 0x47, 0xd4, 0xc6,
+  0xcb, 0x0a, 0xc2, 0x37, 0xcb, 0x00, 0x3b, 0xb3, 0x13, 0x98, 0x6d, 0xc0,
+  0xac, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59,
+  0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xe0, 0xae, 0xa0, 0xc3, 0x0d,
+  0x81, 0xed, 0x0a, 0x60, 0x30, 0xcb, 0x10, 0x3b, 0xb2, 0x13, 0x18, 0xce,
+  0x0a, 0x43, 0x7c, 0x66, 0x09, 0x66, 0xc7, 0x88, 0x9e, 0x15, 0xe0, 0x33,
+  0x4b, 0x30, 0x3b, 0x03, 0x2d, 0x8f, 0xd6, 0x3a, 0x98, 0xeb, 0x10, 0xb1,
+  0x23, 0xc8, 0x0e, 0xe8, 0xbc, 0xce, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x6d,
+  0x4f, 0x5d, 0xca, 0x0a, 0xc3, 0x1c, 0x98, 0x0a, 0xc3, 0x1c, 0x31, 0xcc,
+  0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x67, 0xbe, 0x82,
+  0xed, 0x0a, 0x75, 0x2b, 0x8c, 0xaf, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09,
   0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xb9, 0xa9, 0xe0, 0xa3, 0x42, 0x42,
-  0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xbd, 0xa9, 0xf0, 0xa3,
-  0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xc1, 0xa9,
-  0x00, 0xa6, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0,
-  0xe0, 0xa9, 0xf0, 0xa3, 0x02, 0x88, 0x0a, 0xc1, 0x9a, 0x0a, 0x30, 0x2a,
-  0xb4, 0xa9, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x36, 0x4b, 0xd0, 0x37,
-  0xc3, 0x0d, 0xf2, 0x1b, 0xc0, 0xa9, 0x00, 0x06, 0xb3, 0x0c, 0x6c, 0xd3,
-  0x37, 0x81, 0xc9, 0xa7, 0x40, 0x9f, 0x42, 0x7c, 0x86, 0x23, 0xee, 0x37,
-  0xa8, 0x4f, 0x81, 0xf8, 0x66, 0x19, 0xda, 0x06, 0x6e, 0x02, 0xb3, 0x4f,
-  0x01, 0x7f, 0x83, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78,
-  0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x42, 0x4f, 0x05, 0x1d,
-  0x6e, 0x08, 0xf0, 0x54, 0x00, 0x83, 0x59, 0x06, 0xb7, 0x79, 0x9b, 0xc0,
-  0x06, 0xff, 0x14, 0xe0, 0x33, 0x4b, 0x40, 0x37, 0xd6, 0x9f, 0x02, 0x11,
-  0x9f, 0x59, 0x02, 0xba, 0x19, 0x8e, 0x10, 0xe1, 0xc0, 0x3f, 0x05, 0xe1,
-  0x9b, 0x65, 0x88, 0x1b, 0xba, 0x09, 0x6c, 0x84, 0x83, 0xff, 0x14, 0xe2,
-  0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9,
-  0x58, 0x11, 0xc4, 0xa7, 0x88, 0x52, 0x15, 0x74, 0xb8, 0x21, 0x18, 0x55,
-  0x01, 0x0c, 0x66, 0x19, 0xe4, 0x66, 0x6e, 0x02, 0x3b, 0x51, 0x61, 0x88,
-  0xcf, 0x2c, 0x01, 0xdd, 0x18, 0xa1, 0xa2, 0x02, 0x7c, 0x66, 0x09, 0xe8,
-  0x66, 0xa0, 0xe5, 0xd1, 0xdc, 0x06, 0x7b, 0x1b, 0x42, 0x6e, 0x84, 0xb9,
-  0xd1, 0xc1, 0x01, 0x6e, 0x2e, 0x18, 0xc6, 0x52, 0x54, 0x68, 0x51, 0x21,
-  0x3e, 0xc3, 0x11, 0xb0, 0xe0, 0xa2, 0x02, 0xf1, 0xcd, 0x32, 0xd4, 0x0d,
-  0xde, 0x04, 0xf6, 0xa2, 0x42, 0x2c, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17,
-  0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11,
-  0xb3, 0x2a, 0xe8, 0x70, 0x43, 0x10, 0xab, 0x02, 0x18, 0xcc, 0x32, 0xd8,
-  0xcd, 0xdd, 0x04, 0x36, 0xdc, 0xa8, 0x00, 0x9f, 0x59, 0x02, 0xbe, 0x31,
-  0x1a, 0x15, 0x88, 0xf8, 0xcc, 0x12, 0xf0, 0xcd, 0x70, 0xc4, 0x2e, 0xd4,
-  0xa8, 0x20, 0x7c, 0xb3, 0x0c, 0x79, 0xc3, 0x37, 0x81, 0xf1, 0x82, 0x8d,
-  0x0a, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05,
-  0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84, 0xaf, 0x0a, 0x3a, 0xdc, 0x10,
-  0xf0, 0xaa, 0x00, 0x06, 0xb3, 0x0c, 0x7a, 0xb3, 0x37, 0x81, 0xf9, 0xa8,
-  0x30, 0xc4, 0x67, 0x96, 0x80, 0x6f, 0x8c, 0x18, 0x53, 0x01, 0x3e, 0xb3,
-  0x04, 0x7c, 0x33, 0xd0, 0xf2, 0x68, 0x76, 0x83, 0xdd, 0x0d, 0xa1, 0x37,
-  0xc2, 0xde, 0xb0, 0x06, 0xde, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf6,
-  0xd4, 0xbd, 0xa8, 0x30, 0xcc, 0x99, 0xa5, 0x30, 0xcc, 0x11, 0xc3, 0x1c,
-  0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x78, 0xec, 0x2a, 0xf0,
-  0xaa, 0xb0, 0xa7, 0x42, 0xba, 0x0a, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20,
-  0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0xdb, 0xbc, 0x0a, 0xe3, 0x2a, 0x24, 0x44,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x1b, 0xbd, 0x0a, 0xe4, 0x2a,
-  0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5b, 0xbd, 0x0a,
-  0xe5, 0x2a, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x4b,
-  0xbf, 0x0a, 0xe4, 0x2a, 0x94, 0xaa, 0x10, 0xc0, 0xab, 0x50, 0xab, 0x82,
-  0xbc, 0x0a, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3, 0x04, 0x7d, 0x33,
-  0xd0, 0xf2, 0x98, 0x06, 0xda, 0xb0, 0x35, 0x71, 0x36, 0x2c, 0xa1, 0x36,
-  0x02, 0xdf, 0xb0, 0x35, 0xb1, 0x36, 0xb3, 0x0c, 0x7e, 0x03, 0x3a, 0x77,
-  0x1c, 0x0c, 0x47, 0xf0, 0x71, 0x70, 0xab, 0xc2, 0xf0, 0x5d, 0x1f, 0x07,
-  0xc3, 0x0c, 0x37, 0x04, 0xa2, 0x2a, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47,
-  0xfc, 0xc3, 0xae, 0x0a, 0xc3, 0x57, 0x81, 0xa0, 0x17, 0x12, 0xc3, 0x0c,
-  0x37, 0x04, 0xa5, 0x2a, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xfc, 0x0d,
-  0xed, 0x04, 0x97, 0xa6, 0xc2, 0x30, 0x07, 0x9a, 0xc2, 0x30, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0xe0, 0x91, 0xac, 0x40, 0xaf, 0xc2, 0xac, 0x0a,
-  0x21, 0x2b, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c,
-  0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x6c, 0x2b, 0x2b, 0xec, 0xab, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x6c, 0x2c, 0x2b, 0xf0, 0xab, 0xc0, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x2d, 0x2b, 0xf4, 0xab, 0x20, 0x11,
-  0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x35, 0x2b, 0xf0, 0xab,
-  0xd0, 0xab, 0x42, 0x80, 0xb2, 0x42, 0xbb, 0x0a, 0x2a, 0x2b, 0x8c, 0x26,
-  0x04, 0xc0, 0x05, 0x8f, 0xcd, 0x12, 0xd0, 0xce, 0x70, 0xc3, 0x2b, 0x07,
-  0x2e, 0x2b, 0x80, 0xc1, 0x2c, 0x43, 0xe8, 0x88, 0x4e, 0x50, 0xb8, 0x2a,
-  0xfc, 0xab, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0xd4, 0xcd, 0x0a, 0x20, 0x2b, 0xd8, 0x72, 0x10, 0xaf, 0xc2, 0x88, 0xc1,
-  0x01, 0x80, 0x20, 0x18, 0x50, 0x38, 0x2b, 0x80, 0xac, 0x10, 0x08, 0x17,
-  0x0c, 0x53, 0xbb, 0x2a, 0x90, 0xac, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70,
-  0x00, 0x20, 0x08, 0x06, 0x14, 0xcf, 0x0a, 0x25, 0x2b, 0xe8, 0x84, 0xbd,
-  0x0a, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xf5, 0xac, 0x50, 0xb2,
-  0x42, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf1, 0xd4, 0x91, 0xaa,
-  0x30, 0xcc, 0xed, 0xa6, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88,
-  0xc1, 0x01, 0x80, 0x20, 0x18, 0x78, 0x61, 0x2b, 0xc4, 0xac, 0x00, 0xaf,
-  0x82, 0xcf, 0x0a, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09,
+  0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xb5, 0xaf, 0xd0, 0xbb, 0x42, 0x42,
+  0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xb9, 0xaf, 0xe0, 0xbb,
+  0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xbd, 0xaf,
+  0xf0, 0xbb, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0,
+  0xdc, 0xaf, 0xe0, 0xbb, 0xc2, 0xdf, 0x0a, 0x81, 0xfa, 0x0a, 0xaf, 0x2b,
+  0xb0, 0xaf, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x36, 0x4b, 0x40, 0x3b,
+  0x03, 0x2d, 0x8f, 0x69, 0xfc, 0x0d, 0xd8, 0x13, 0x7e, 0xc3, 0x12, 0xa1,
+  0x23, 0xcc, 0x0e, 0xd8, 0x13, 0xa2, 0x33, 0xcb, 0x50, 0x3b, 0xb7, 0x13,
+  0xd7, 0xc1, 0x70, 0x04, 0xdf, 0x06, 0xb1, 0x2b, 0x0c, 0xdf, 0xf5, 0x6d,
+  0x30, 0xcc, 0x70, 0x43, 0xc0, 0xb7, 0x02, 0x19, 0xd4, 0x10, 0xe8, 0x70,
+  0x44, 0x7e, 0xd4, 0xae, 0x30, 0x7c, 0x15, 0x08, 0x7a, 0xfb, 0x31, 0xcc,
+  0x70, 0x43, 0xf0, 0xb7, 0x02, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0x83, 0xed,
+  0xac, 0x4f, 0x70, 0x63, 0x2b, 0x0c, 0x73, 0x7a, 0x2a, 0x0c, 0x33, 0x62,
+  0x70, 0x00, 0x20, 0x08, 0x06, 0x9e, 0xff, 0x0a, 0xee, 0x2b, 0xb4, 0xae,
+  0xb0, 0xbf, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2,
+  0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08,
+  0x82, 0xc1, 0x56, 0xc2, 0x42, 0xfd, 0x0a, 0x07, 0x11, 0x8c, 0x18, 0x20,
+  0x00, 0x08, 0x82, 0xc1, 0x66, 0xc2, 0x82, 0xfd, 0x0a, 0x0c, 0x11, 0x8c,
+  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x76, 0xc2, 0xc2, 0xfd, 0x0a, 0x12,
+  0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0xf2, 0xc2, 0x82, 0xfd,
+  0x0a, 0xb7, 0x2b, 0x04, 0x22, 0x2c, 0x9c, 0xaf, 0x40, 0xc2, 0xc2, 0x68,
+  0x42, 0x00, 0x5c, 0xf0, 0xd8, 0x2c, 0xc1, 0xfa, 0x0c, 0x37, 0xa4, 0x76,
+  0x80, 0xc2, 0x02, 0x18, 0xcc, 0x32, 0xe0, 0x4e, 0xee, 0x04, 0x25, 0xbb,
+  0x42, 0xfe, 0x0a, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
+  0x40, 0xc5, 0xb0, 0xa0, 0xbf, 0x82, 0xed, 0x06, 0xeb, 0x2b, 0x8c, 0x18,
+  0x1c, 0x00, 0x08, 0x82, 0x01, 0x25, 0xc3, 0x82, 0xfe, 0x0a, 0x81, 0x70,
+  0xc1, 0x30, 0x55, 0xbb, 0x82, 0xff, 0x0a, 0x70, 0xc1, 0x53, 0x23, 0x06,
+  0x07, 0x00, 0x82, 0x60, 0x40, 0xd9, 0xb0, 0xf0, 0xbf, 0x02, 0x8d, 0xc0,
+  0xaf, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xd4, 0x0d, 0x0b, 0xff,
+  0x2b, 0x04, 0xc2, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x1d, 0x4f, 0x9d, 0xdf,
+  0x0a, 0xc3, 0x5c, 0xad, 0x0a, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c,
+  0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0xb7, 0xc3, 0xc2, 0x0a, 0x0b, 0xea,
+  0x2b, 0xe0, 0xb0, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a,
+  0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00,
+  0x82, 0x60, 0xb0, 0x89, 0xb1, 0x20, 0xc3, 0x42, 0x42, 0x04, 0x23, 0x06,
+  0x08, 0x00, 0x82, 0x60, 0xb0, 0x8d, 0xb1, 0x30, 0xc3, 0x42, 0x42, 0x04,
+  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0x91, 0xb1, 0x40, 0xc3, 0x42,
+  0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xb0, 0xb1, 0x30,
+  0xc3, 0x02, 0xfd, 0x0a, 0xc1, 0x0f, 0x0b, 0x24, 0x2c, 0x84, 0xb1, 0x30,
+  0x9a, 0x10, 0x00, 0x17, 0x3c, 0x36, 0x4b, 0xb0, 0x3e, 0xc3, 0x0d, 0xe6,
+  0x1d, 0x90, 0xb1, 0x00, 0x06, 0xb3, 0x0c, 0xba, 0xb3, 0x3e, 0x81, 0x99,
+  0xaf, 0x80, 0xbe, 0x42, 0x7c, 0x86, 0x23, 0xe4, 0x37, 0x48, 0x5f, 0x81,
+  0xf8, 0x66, 0x19, 0x76, 0xc7, 0x77, 0x02, 0x53, 0x5f, 0x61, 0x7e, 0x83,
+  0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43,
+  0x3e, 0x56, 0x04, 0xf1, 0x29, 0xc2, 0x8d, 0x05, 0x1d, 0x6e, 0x08, 0xd8,
+  0x58, 0x00, 0x83, 0x59, 0x06, 0xde, 0xe9, 0x9d, 0xc0, 0x06, 0xf9, 0x15,
+  0xe0, 0x33, 0x4b, 0x20, 0x3e, 0x16, 0xbf, 0x02, 0x11, 0x9f, 0x59, 0x02,
+  0xf1, 0x19, 0x8e, 0xe8, 0xdf, 0x40, 0x7e, 0x05, 0xe1, 0x9b, 0x65, 0xf8,
+  0x1d, 0xf1, 0x09, 0xcc, 0x7f, 0x83, 0xf9, 0x15, 0xe2, 0x63, 0x81, 0x43,
+  0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4,
+  0xa7, 0x88, 0x3c, 0x16, 0x74, 0xb8, 0x21, 0xb8, 0x63, 0x01, 0x0c, 0x66,
+  0x19, 0xc0, 0x27, 0x7c, 0x02, 0xdb, 0x5f, 0x61, 0x88, 0xcf, 0x2c, 0x81,
+  0xf8, 0x18, 0xe1, 0xbf, 0x02, 0x7c, 0x66, 0x09, 0xc4, 0x67, 0xa0, 0xe5,
+  0xd1, 0x78, 0x07, 0xeb, 0x1d, 0x02, 0x7c, 0x84, 0xf0, 0x81, 0xc1, 0xc1,
+  0x77, 0x2e, 0x18, 0xc6, 0xfa, 0x57, 0x08, 0x61, 0x21, 0x3e, 0xc3, 0x11,
+  0xa4, 0x22, 0xc2, 0x02, 0xf1, 0xcd, 0x32, 0x8c, 0x8f, 0xf9, 0x04, 0x36,
+  0xc2, 0x42, 0xa9, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1,
+  0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xa7, 0x2c, 0xe8,
+  0x70, 0x43, 0x50, 0xca, 0x02, 0x18, 0xcc, 0x32, 0x90, 0x4f, 0xf9, 0x04,
+  0x36, 0xac, 0xb0, 0x00, 0x9f, 0x59, 0x02, 0xf5, 0x31, 0x14, 0x16, 0x88,
+  0xf8, 0xcc, 0x12, 0xa8, 0xcf, 0x70, 0xc4, 0xab, 0xa4, 0xb0, 0x20, 0x7c,
+  0xb3, 0x0c, 0xe7, 0xa3, 0x3e, 0x81, 0xc1, 0x8a, 0x0a, 0x0b, 0xf1, 0xb1,
+  0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac,
+  0x08, 0xe2, 0x53, 0x84, 0x2c, 0x0b, 0x3a, 0xdc, 0x10, 0xc0, 0xb2, 0x00,
+  0x06, 0xb3, 0x0c, 0xe8, 0x93, 0x3e, 0x81, 0xc9, 0xb0, 0x30, 0xc4, 0x67,
+  0x96, 0x40, 0x7d, 0x8c, 0xb8, 0x61, 0x01, 0x3e, 0xb3, 0x04, 0xea, 0x33,
+  0xd0, 0xf2, 0x68, 0xe4, 0x83, 0x95, 0x0f, 0x81, 0x3e, 0x42, 0xfa, 0xd0,
+  0x95, 0xf9, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf6, 0xd4, 0x8d, 0xb0,
+  0x30, 0xcc, 0xe9, 0xac, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88,
+  0xc1, 0x01, 0x80, 0x20, 0x18, 0x78, 0xe0, 0x2c, 0xc0, 0xb2, 0xf0, 0xc6,
+  0x42, 0x2f, 0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09,
   0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x1b, 0xda, 0x0a, 0x38, 0x2b, 0x24, 0x44, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0x5b, 0xda, 0x0a, 0x39, 0x2b, 0x24, 0x44, 0x30,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x9b, 0xda, 0x0a, 0x3a, 0x2b, 0x24,
-  0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x8b, 0xdc, 0x0a, 0x39,
-  0x2b, 0xe8, 0xab, 0x10, 0x94, 0xad, 0xa0, 0xb2, 0xc2, 0xd9, 0x0a, 0xa3,
-  0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3, 0x04, 0xb4, 0x33, 0xdc, 0xc0, 0xce,
-  0x81, 0xda, 0x0a, 0x60, 0x30, 0xcb, 0x30, 0x3a, 0xb4, 0x13, 0x18, 0xbb,
-  0x0a, 0xee, 0x2a, 0xc4, 0x67, 0x38, 0x42, 0x9e, 0x83, 0x77, 0x15, 0x88,
-  0x6f, 0x96, 0x81, 0x74, 0x4e, 0x27, 0x30, 0x78, 0x15, 0xe6, 0x39, 0x88,
-  0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4,
-  0x63, 0x45, 0x10, 0x9f, 0x22, 0xe8, 0x56, 0xd0, 0xe1, 0x86, 0x40, 0x6e,
-  0x05, 0x30, 0x98, 0x65, 0x28, 0x1d, 0xd3, 0x09, 0x6c, 0xc0, 0x57, 0x01,
-  0x3e, 0xb3, 0x04, 0xab, 0x63, 0xf7, 0x2a, 0x10, 0xf1, 0x99, 0x25, 0x58,
-  0x9d, 0xe1, 0x88, 0x7e, 0x0e, 0xf0, 0x55, 0x10, 0xbe, 0x59, 0x06, 0xd4,
-  0x59, 0x9d, 0xc0, 0xfc, 0x39, 0xc8, 0x57, 0x21, 0x3e, 0x16, 0x38, 0xf4,
-  0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c,
-  0x8a, 0xf8, 0x5b, 0x41, 0x87, 0x1b, 0x82, 0xbe, 0x15, 0xc0, 0x60, 0x96,
-  0x21, 0x75, 0x54, 0x27, 0xb0, 0x90, 0x15, 0x86, 0xf8, 0xcc, 0x12, 0xac,
-  0x8e, 0x11, 0x24, 0x2b, 0xc0, 0x67, 0x96, 0x60, 0x75, 0x06, 0x5a, 0x1e,
-  0xad, 0x74, 0x30, 0xd3, 0x21, 0x52, 0x47, 0x50, 0x1d, 0x18, 0x1d, 0x4e,
-  0xe7, 0x82, 0x61, 0x6c, 0x64, 0x85, 0x93, 0x15, 0xe2, 0x33, 0x1c, 0xa1,
-  0x1a, 0x28, 0x2b, 0x10, 0xdf, 0x2c, 0x03, 0xeb, 0xbc, 0x4e, 0x60, 0x29,
-  0x2b, 0xac, 0x46, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c,
-  0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xd1, 0xba, 0x82, 0x0e,
-  0x37, 0x04, 0xab, 0x2b, 0x80, 0xc1, 0x2c, 0x43, 0xeb, 0xb8, 0x4e, 0x60,
-  0x43, 0xcc, 0x0a, 0xf0, 0x99, 0x25, 0x98, 0x1d, 0x73, 0x59, 0x81, 0x88,
-  0xcf, 0x2c, 0xc1, 0xec, 0x0c, 0x47, 0xd4, 0xc6, 0xcb, 0x0a, 0xc2, 0x37,
-  0xcb, 0x00, 0x3b, 0xb3, 0x13, 0x98, 0x6d, 0xc0, 0xac, 0x10, 0x1f, 0x0b,
-  0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a,
-  0x20, 0x3e, 0x45, 0xe0, 0xae, 0xa0, 0xc3, 0x0d, 0x81, 0xed, 0x0a, 0x60,
-  0x30, 0xcb, 0x10, 0x3b, 0xb2, 0x13, 0x18, 0xce, 0x0a, 0x43, 0x7c, 0x66,
-  0x09, 0x66, 0xc7, 0x88, 0x9e, 0x15, 0xe0, 0x33, 0x4b, 0x30, 0x3b, 0x03,
-  0x2d, 0x8f, 0xd6, 0x3a, 0x98, 0xeb, 0x10, 0xb1, 0x23, 0xc8, 0x0e, 0xe8,
-  0xbc, 0xce, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x6d, 0x4f, 0x5d, 0xca, 0x0a,
-  0xc3, 0x1c, 0x98, 0x0a, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18,
-  0x1c, 0x00, 0x08, 0x82, 0x81, 0x67, 0xbe, 0x82, 0xed, 0x0a, 0x75, 0x2b,
-  0x8c, 0xaf, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30,
-  0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0xb0, 0xb5, 0xaf, 0xd0, 0xbb, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0xb0, 0xb9, 0xaf, 0xe0, 0xbb, 0x42, 0x42, 0x04, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xbd, 0xaf, 0xf0, 0xbb, 0x42, 0x42,
-  0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xdc, 0xaf, 0xe0, 0xbb,
-  0xc2, 0xdf, 0x0a, 0x81, 0xfa, 0x0a, 0xaf, 0x2b, 0xb0, 0xaf, 0x30, 0x9a,
-  0x10, 0x00, 0x17, 0x3c, 0x36, 0x4b, 0x40, 0x3b, 0x03, 0x2d, 0x8f, 0x69,
-  0xfc, 0x0d, 0xd8, 0x13, 0x7e, 0xc3, 0x12, 0xa1, 0x23, 0xcc, 0x0e, 0xd8,
-  0x13, 0xa2, 0x33, 0xcb, 0x50, 0x3b, 0xb7, 0x13, 0xd7, 0xc1, 0x70, 0x04,
-  0xdf, 0x06, 0xb1, 0x2b, 0x0c, 0xdf, 0xf5, 0x6d, 0x30, 0xcc, 0x70, 0x43,
-  0xc0, 0xb7, 0x02, 0x19, 0xd4, 0x10, 0xe8, 0x70, 0x44, 0x7e, 0xd4, 0xae,
-  0x30, 0x7c, 0x15, 0x08, 0x7a, 0xfb, 0x31, 0xcc, 0x70, 0x43, 0xf0, 0xb7,
-  0x02, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0x83, 0xed, 0xac, 0x4f, 0x70, 0x63,
-  0x2b, 0x0c, 0x73, 0x7a, 0x2a, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x9e, 0xff, 0x0a, 0xee, 0x2b, 0xb4, 0xae, 0xb0, 0xbf, 0xc2, 0x68,
-  0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10,
-  0x43, 0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x56, 0xc2,
-  0x42, 0xfd, 0x0a, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1,
-  0x66, 0xc2, 0x82, 0xfd, 0x0a, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0xc1, 0x76, 0xc2, 0xc2, 0xfd, 0x0a, 0x12, 0x11, 0x8c, 0x18, 0x28,
-  0x00, 0x08, 0x82, 0xc1, 0xf2, 0xc2, 0x82, 0xfd, 0x0a, 0xb7, 0x2b, 0x04,
-  0x22, 0x2c, 0x9c, 0xaf, 0x40, 0xc2, 0xc2, 0x68, 0x42, 0x00, 0x5c, 0xf0,
-  0xd8, 0x2c, 0xc1, 0xfa, 0x0c, 0x37, 0xa4, 0x76, 0x80, 0xc2, 0x02, 0x18,
-  0xcc, 0x32, 0xe0, 0x4e, 0xee, 0x04, 0x25, 0xbb, 0x42, 0xfe, 0x0a, 0x70,
-  0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xc5, 0xb0, 0xa0,
-  0xbf, 0x82, 0xed, 0x06, 0xeb, 0x2b, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82,
-  0x01, 0x25, 0xc3, 0x82, 0xfe, 0x0a, 0x81, 0x70, 0xc1, 0x30, 0x55, 0xbb,
-  0x82, 0xff, 0x0a, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0x40, 0xd9, 0xb0, 0xf0, 0xbf, 0x02, 0x8d, 0xc0, 0xaf, 0x30, 0x62, 0x70,
-  0x00, 0x20, 0x08, 0x06, 0xd4, 0x0d, 0x0b, 0xff, 0x2b, 0x04, 0xc2, 0x05,
-  0xc3, 0x5c, 0xf0, 0xd4, 0x1d, 0x4f, 0x9d, 0xdf, 0x0a, 0xc3, 0x5c, 0xad,
-  0x0a, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08,
-  0x82, 0x81, 0xb7, 0xc3, 0xc2, 0x0a, 0x0b, 0xea, 0x2b, 0xe0, 0xb0, 0x30,
+  0x08, 0x06, 0xdb, 0x39, 0x0b, 0xb7, 0x2c, 0x24, 0x44, 0x30, 0x62, 0x80,
+  0x00, 0x20, 0x08, 0x06, 0x1b, 0x3a, 0x0b, 0xb8, 0x2c, 0x24, 0x44, 0x30,
+  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5b, 0x3a, 0x0b, 0xb9, 0x2c, 0x24,
+  0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x4b, 0x3c, 0x0b, 0xb8,
+  0x2c, 0xe4, 0xb1, 0x10, 0x90, 0xb3, 0x90, 0xca, 0x82, 0x39, 0x0b, 0xa3,
+  0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3, 0x04, 0xeb, 0x33, 0xd0, 0xf2, 0x98,
+  0x86, 0xed, 0xd0, 0x64, 0x51, 0x3b, 0x2c, 0x81, 0x3b, 0x82, 0xfa, 0xd0,
+  0x64, 0x91, 0x3b, 0xb3, 0x0c, 0xec, 0xe3, 0x3e, 0x6b, 0x1e, 0x0c, 0x47,
+  0xec, 0x6d, 0xb0, 0xca, 0xc2, 0xf0, 0x1d, 0xdf, 0x06, 0xc3, 0x0c, 0x37,
+  0x04, 0x76, 0x2c, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0xcc, 0xcb, 0x2b,
+  0x0b, 0xc3, 0x57, 0x81, 0xa0, 0x57, 0x2f, 0xc3, 0x0c, 0x37, 0x04, 0x79,
+  0x2c, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xb4, 0x8f, 0x08, 0x05, 0xd7,
+  0xc3, 0xc2, 0x30, 0x47, 0xb7, 0xc2, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82,
+  0x60, 0xe0, 0xe1, 0xb3, 0x80, 0xce, 0xc2, 0x29, 0x0b, 0xf5, 0x2c, 0x8c,
+  0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02,
+  0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xff,
+  0x2c, 0xbc, 0xb3, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
+  0x6c, 0x20, 0x2d, 0xc0, 0xb3, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
+  0x20, 0x18, 0x6c, 0x21, 0x2d, 0xc4, 0xb3, 0x20, 0x11, 0xc1, 0x88, 0x81,
+  0x02, 0x80, 0x20, 0x18, 0x2c, 0x29, 0x2d, 0xc0, 0xb3, 0x10, 0xcb, 0x42,
+  0xc0, 0xcf, 0x42, 0x38, 0x0b, 0xfe, 0x2c, 0x8c, 0x26, 0x04, 0xc0, 0x05,
+  0x8f, 0xcd, 0x12, 0x88, 0xd0, 0x70, 0xc3, 0xa8, 0x07, 0x22, 0x2d, 0x80,
+  0xc1, 0x2c, 0xc3, 0xfb, 0xc0, 0x4f, 0x50, 0xac, 0x2c, 0xcc, 0xb3, 0x00,
+  0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xd4, 0x4a, 0x0b,
+  0xf4, 0x2c, 0xd4, 0x6e, 0x50, 0xce, 0xc2, 0x88, 0xc1, 0x01, 0x80, 0x20,
+  0x18, 0x50, 0x2c, 0x2d, 0xd0, 0xb3, 0x10, 0x08, 0x17, 0x0c, 0x53, 0xaf,
+  0x2c, 0xe0, 0xb3, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08,
+  0x06, 0x14, 0x4c, 0x0b, 0xf9, 0x2c, 0xb8, 0x8c, 0x3a, 0x0b, 0x23, 0x06,
+  0x07, 0x00, 0x82, 0x60, 0x40, 0xc5, 0xb4, 0x90, 0xcf, 0x42, 0x20, 0x5c,
+  0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf1, 0xd4, 0xe1, 0xb1, 0x30, 0xcc, 0xbd,
+  0xae, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80,
+  0x20, 0x18, 0x78, 0x35, 0x2d, 0x94, 0xb4, 0x40, 0xce, 0x82, 0x4c, 0x0b,
+  0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a,
+  0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x1b,
+  0x4f, 0x0b, 0x2c, 0x2d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
+  0x06, 0x5b, 0x4f, 0x0b, 0x2d, 0x2d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00,
+  0x20, 0x08, 0x06, 0x9b, 0x4f, 0x0b, 0x2e, 0x2d, 0x24, 0x44, 0x30, 0x62,
+  0xa0, 0x00, 0x20, 0x08, 0x06, 0x8b, 0x59, 0x0b, 0x2d, 0x2d, 0xb8, 0xb3,
+  0x10, 0xe4, 0xb4, 0xe0, 0xcf, 0xc2, 0x4e, 0x0b, 0xa3, 0x09, 0x01, 0x70,
+  0xc1, 0x63, 0xb3, 0x04, 0x22, 0x34, 0xdc, 0x00, 0xee, 0x81, 0x4f, 0x0b,
+  0x60, 0x30, 0xcb, 0x10, 0x3f, 0x22, 0x14, 0x18, 0x38, 0x0b, 0xe2, 0x2c,
+  0xc4, 0x67, 0x38, 0x22, 0x7e, 0x83, 0x71, 0x16, 0x88, 0x6f, 0x96, 0x41,
+  0x7e, 0xea, 0x27, 0x30, 0x72, 0x16, 0xe4, 0x37, 0x88, 0x8f, 0x05, 0x03,
+  0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10,
+  0x9f, 0x22, 0xd0, 0x5a, 0xd0, 0xe1, 0x86, 0xc0, 0xac, 0x05, 0x30, 0x98,
+  0x65, 0x98, 0x1f, 0xfa, 0x09, 0x6c, 0x60, 0x67, 0x01, 0x3e, 0xb3, 0x04,
+  0xf9, 0x63, 0xeb, 0x2c, 0x10, 0xf1, 0x99, 0x25, 0xc8, 0x9f, 0xe1, 0x08,
+  0xfe, 0x0d, 0xd8, 0x59, 0x10, 0xbe, 0x59, 0x06, 0xfb, 0xc9, 0x9f, 0xc0,
+  0xfa, 0x37, 0x68, 0x67, 0x21, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98,
+  0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x98, 0x6b,
+  0x41, 0x87, 0x1b, 0x82, 0xb8, 0x16, 0xc0, 0x60, 0x96, 0xe1, 0x7e, 0xf0,
+  0x27, 0xb0, 0x7a, 0x16, 0x86, 0xf8, 0xcc, 0x12, 0xe4, 0x8f, 0x11, 0xf8,
+  0x2c, 0xc0, 0x67, 0x96, 0x20, 0x7f, 0x06, 0x5a, 0x1e, 0x6d, 0x7e, 0x30,
+  0xfa, 0x21, 0xee, 0x47, 0xc0, 0x1f, 0x17, 0x1c, 0xea, 0xe7, 0x82, 0x61,
+  0xec, 0x9e, 0x85, 0x7d, 0x16, 0xe2, 0x33, 0x1c, 0xe1, 0x37, 0xfc, 0x2c,
+  0x10, 0xdf, 0x2c, 0x83, 0xfe, 0xf4, 0x4f, 0x60, 0xfd, 0x2c, 0xfc, 0x4d,
+  0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21,
+  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x11, 0xda, 0x82, 0x0e, 0x37, 0x04, 0x7f,
+  0x2d, 0x80, 0xc1, 0x2c, 0xc3, 0xfe, 0xf0, 0x4f, 0x60, 0x43, 0x49, 0x0b,
+  0xf0, 0x99, 0x25, 0x08, 0x21, 0x13, 0x69, 0x81, 0x88, 0xcf, 0x2c, 0x41,
+  0x08, 0x0d, 0x47, 0xa4, 0xce, 0x48, 0x0b, 0xc2, 0x37, 0xcb, 0xe0, 0x3f,
+  0x21, 0x14, 0x98, 0xea, 0x90, 0xb4, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c,
+  0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45,
+  0xb0, 0xb6, 0xa0, 0xc3, 0x0d, 0x81, 0x6a, 0x0b, 0x60, 0x30, 0xcb, 0xf0,
+  0x3f, 0x20, 0x14, 0x18, 0x4b, 0x0b, 0x43, 0x7c, 0x66, 0x09, 0x42, 0xc8,
+  0x88, 0x98, 0x16, 0xe0, 0x33, 0x4b, 0x10, 0x42, 0x03, 0x2d, 0x8f, 0xb6,
+  0x3f, 0x18, 0xff, 0x10, 0xff, 0x23, 0x80, 0x10, 0xda, 0xf5, 0xcf, 0x05,
+  0xc3, 0x5c, 0xf0, 0xd4, 0x6d, 0x4f, 0x5d, 0x3f, 0x0b, 0xc3, 0x1c, 0x0d,
+  0x0b, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08,
+  0x82, 0x81, 0xa7, 0xdb, 0x82, 0x6a, 0x0b, 0x69, 0x2d, 0xdc, 0xb6, 0x30,
   0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09,
-  0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0x89,
-  0xb1, 0x20, 0xc3, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0xb0, 0x8d, 0xb1, 0x30, 0xc3, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0xb0, 0x91, 0xb1, 0x40, 0xc3, 0x42, 0x42, 0x04, 0x23, 0x06,
-  0x0a, 0x00, 0x82, 0x60, 0xb0, 0xb0, 0xb1, 0x30, 0xc3, 0x02, 0xfd, 0x0a,
-  0xc1, 0x0f, 0x0b, 0x24, 0x2c, 0x84, 0xb1, 0x30, 0x9a, 0x10, 0x00, 0x17,
-  0x3c, 0x36, 0x4b, 0xb0, 0x3e, 0xc3, 0x0d, 0xe6, 0x1d, 0x90, 0xb1, 0x00,
-  0x06, 0xb3, 0x0c, 0xba, 0xb3, 0x3e, 0x81, 0x99, 0xaf, 0x80, 0xbe, 0x42,
-  0x7c, 0x86, 0x23, 0xe4, 0x37, 0x48, 0x5f, 0x81, 0xf8, 0x66, 0x19, 0x76,
-  0xc7, 0x77, 0x02, 0x53, 0x5f, 0x61, 0x7e, 0x83, 0xf8, 0x58, 0x30, 0xd0,
-  0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1,
-  0x29, 0xc2, 0x8d, 0x05, 0x1d, 0x6e, 0x08, 0xd8, 0x58, 0x00, 0x83, 0x59,
-  0x06, 0xde, 0xe9, 0x9d, 0xc0, 0x06, 0xf9, 0x15, 0xe0, 0x33, 0x4b, 0x20,
-  0x3e, 0x16, 0xbf, 0x02, 0x11, 0x9f, 0x59, 0x02, 0xf1, 0x19, 0x8e, 0xe8,
-  0xdf, 0x40, 0x7e, 0x05, 0xe1, 0x9b, 0x65, 0xf8, 0x1d, 0xf1, 0x09, 0xcc,
-  0x7f, 0x83, 0xf9, 0x15, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9,
-  0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x3c, 0x16,
-  0x74, 0xb8, 0x21, 0xb8, 0x63, 0x01, 0x0c, 0x66, 0x19, 0xc0, 0x27, 0x7c,
-  0x02, 0xdb, 0x5f, 0x61, 0x88, 0xcf, 0x2c, 0x81, 0xf8, 0x18, 0xe1, 0xbf,
-  0x02, 0x7c, 0x66, 0x09, 0xc4, 0x67, 0xa0, 0xe5, 0xd1, 0x78, 0x07, 0xeb,
-  0x1d, 0x02, 0x7c, 0x84, 0xf0, 0x81, 0xc1, 0xc1, 0x77, 0x2e, 0x18, 0xc6,
-  0xfa, 0x57, 0x08, 0x61, 0x21, 0x3e, 0xc3, 0x11, 0xa4, 0x22, 0xc2, 0x02,
-  0xf1, 0xcd, 0x32, 0x8c, 0x8f, 0xf9, 0x04, 0x36, 0xc2, 0x42, 0xa9, 0xc4,
-  0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2,
-  0xb1, 0x22, 0x88, 0x4f, 0x11, 0xa7, 0x2c, 0xe8, 0x70, 0x43, 0x50, 0xca,
-  0x02, 0x18, 0xcc, 0x32, 0x90, 0x4f, 0xf9, 0x04, 0x36, 0xac, 0xb0, 0x00,
-  0x9f, 0x59, 0x02, 0xf5, 0x31, 0x14, 0x16, 0x88, 0xf8, 0xcc, 0x12, 0xa8,
-  0xcf, 0x70, 0xc4, 0xab, 0xa4, 0xb0, 0x20, 0x7c, 0xb3, 0x0c, 0xe7, 0xa3,
-  0x3e, 0x81, 0xc1, 0x8a, 0x0a, 0x0b, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05,
-  0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84,
-  0x2c, 0x0b, 0x3a, 0xdc, 0x10, 0xc0, 0xb2, 0x00, 0x06, 0xb3, 0x0c, 0xe8,
-  0x93, 0x3e, 0x81, 0xc9, 0xb0, 0x30, 0xc4, 0x67, 0x96, 0x40, 0x7d, 0x8c,
-  0xb8, 0x61, 0x01, 0x3e, 0xb3, 0x04, 0xea, 0x33, 0xd0, 0xf2, 0x68, 0xe4,
-  0x83, 0x95, 0x0f, 0x81, 0x3e, 0x42, 0xfa, 0xd0, 0x95, 0xf9, 0x5c, 0x30,
-  0xcc, 0x05, 0x4f, 0xdd, 0xf6, 0xd4, 0x8d, 0xb0, 0x30, 0xcc, 0xe9, 0xac,
-  0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20,
-  0x18, 0x78, 0xe0, 0x2c, 0xc0, 0xb2, 0xf0, 0xc6, 0x42, 0x2f, 0x0b, 0xa3,
-  0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40,
-  0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdb, 0x39,
-  0x0b, 0xb7, 0x2c, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x1b, 0x3a, 0x0b, 0xb8, 0x2c, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x5b, 0x3a, 0x0b, 0xb9, 0x2c, 0x24, 0x44, 0x30, 0x62, 0xa0,
-  0x00, 0x20, 0x08, 0x06, 0x4b, 0x3c, 0x0b, 0xb8, 0x2c, 0xe4, 0xb1, 0x10,
-  0x90, 0xb3, 0x90, 0xca, 0x82, 0x39, 0x0b, 0xa3, 0x09, 0x01, 0x70, 0xc1,
-  0x63, 0xb3, 0x04, 0xeb, 0x33, 0xd0, 0xf2, 0x98, 0x86, 0xed, 0xd0, 0x64,
-  0x51, 0x3b, 0x2c, 0x81, 0x3b, 0x82, 0xfa, 0xd0, 0x64, 0x91, 0x3b, 0xb3,
-  0x0c, 0xec, 0xe3, 0x3e, 0x6b, 0x1e, 0x0c, 0x47, 0xec, 0x6d, 0xb0, 0xca,
-  0xc2, 0xf0, 0x1d, 0xdf, 0x06, 0xc3, 0x0c, 0x37, 0x04, 0x76, 0x2c, 0x90,
-  0x41, 0x0d, 0x81, 0x0e, 0x47, 0xcc, 0xcb, 0x2b, 0x0b, 0xc3, 0x57, 0x81,
-  0xa0, 0x57, 0x2f, 0xc3, 0x0c, 0x37, 0x04, 0x79, 0x2c, 0x90, 0x41, 0x05,
-  0x83, 0xce, 0x32, 0xb4, 0x8f, 0x08, 0x05, 0xd7, 0xc3, 0xc2, 0x30, 0x47,
-  0xb7, 0xc2, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0, 0xe1, 0xb3,
-  0x80, 0xce, 0xc2, 0x29, 0x0b, 0xf5, 0x2c, 0x8c, 0x26, 0x04, 0xc0, 0x68,
-  0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xff, 0x2c, 0xbc, 0xb3, 0x70,
-  0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x20, 0x2d, 0xc0,
-  0xb3, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x21,
-  0x2d, 0xc4, 0xb3, 0x20, 0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18,
-  0x2c, 0x29, 0x2d, 0xc0, 0xb3, 0x10, 0xcb, 0x42, 0xc0, 0xcf, 0x42, 0x38,
-  0x0b, 0xfe, 0x2c, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x8f, 0xcd, 0x12, 0x88,
-  0xd0, 0x70, 0xc3, 0xa8, 0x07, 0x22, 0x2d, 0x80, 0xc1, 0x2c, 0xc3, 0xfb,
-  0xc0, 0x4f, 0x50, 0xac, 0x2c, 0xcc, 0xb3, 0x00, 0x17, 0x3c, 0x35, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0xd4, 0x4a, 0x0b, 0xf4, 0x2c, 0xd4, 0x6e,
-  0x50, 0xce, 0xc2, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0x2c, 0x2d,
-  0xd0, 0xb3, 0x10, 0x08, 0x17, 0x0c, 0x53, 0xaf, 0x2c, 0xe0, 0xb3, 0x00,
-  0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x14, 0x4c, 0x0b,
-  0xf9, 0x2c, 0xb8, 0x8c, 0x3a, 0x0b, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0x40, 0xc5, 0xb4, 0x90, 0xcf, 0x42, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x4f,
-  0xdd, 0xf1, 0xd4, 0xe1, 0xb1, 0x30, 0xcc, 0xbd, 0xae, 0x30, 0xcc, 0x11,
-  0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x78, 0x35,
-  0x2d, 0x94, 0xb4, 0x40, 0xce, 0x82, 0x4c, 0x0b, 0xa3, 0x09, 0x01, 0x30,
-  0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24,
-  0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x1b, 0x4f, 0x0b, 0x2c, 0x2d,
-  0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5b, 0x4f, 0x0b,
-  0x2d, 0x2d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x9b,
-  0x4f, 0x0b, 0x2e, 0x2d, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08,
-  0x06, 0x8b, 0x59, 0x0b, 0x2d, 0x2d, 0xb8, 0xb3, 0x10, 0xe4, 0xb4, 0xe0,
-  0xcf, 0xc2, 0x4e, 0x0b, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3, 0x04,
-  0x22, 0x34, 0xdc, 0x00, 0xee, 0x81, 0x4f, 0x0b, 0x60, 0x30, 0xcb, 0x10,
-  0x3f, 0x22, 0x14, 0x18, 0x38, 0x0b, 0xe2, 0x2c, 0xc4, 0x67, 0x38, 0x22,
-  0x7e, 0x83, 0x71, 0x16, 0x88, 0x6f, 0x96, 0x41, 0x7e, 0xea, 0x27, 0x30,
-  0x72, 0x16, 0xe4, 0x37, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6,
-  0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xd0, 0x5a,
-  0xd0, 0xe1, 0x86, 0xc0, 0xac, 0x05, 0x30, 0x98, 0x65, 0x98, 0x1f, 0xfa,
-  0x09, 0x6c, 0x60, 0x67, 0x01, 0x3e, 0xb3, 0x04, 0xf9, 0x63, 0xeb, 0x2c,
-  0x10, 0xf1, 0x99, 0x25, 0xc8, 0x9f, 0xe1, 0x08, 0xfe, 0x0d, 0xd8, 0x59,
-  0x10, 0xbe, 0x59, 0x06, 0xfb, 0xc9, 0x9f, 0xc0, 0xfa, 0x37, 0x68, 0x67,
-  0x21, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20,
-  0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x98, 0x6b, 0x41, 0x87, 0x1b, 0x82,
-  0xb8, 0x16, 0xc0, 0x60, 0x96, 0xe1, 0x7e, 0xf0, 0x27, 0xb0, 0x7a, 0x16,
-  0x86, 0xf8, 0xcc, 0x12, 0xe4, 0x8f, 0x11, 0xf8, 0x2c, 0xc0, 0x67, 0x96,
-  0x20, 0x7f, 0x06, 0x5a, 0x1e, 0x6d, 0x7e, 0x30, 0xfa, 0x21, 0xee, 0x47,
-  0xc0, 0x1f, 0x17, 0x1c, 0xea, 0xe7, 0x82, 0x61, 0xec, 0x9e, 0x85, 0x7d,
-  0x16, 0xe2, 0x33, 0x1c, 0xe1, 0x37, 0xfc, 0x2c, 0x10, 0xdf, 0x2c, 0x83,
-  0xfe, 0xf4, 0x4f, 0x60, 0xfd, 0x2c, 0xfc, 0x4d, 0x7c, 0x2c, 0x18, 0xe8,
-  0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8,
-  0x14, 0x11, 0xda, 0x82, 0x0e, 0x37, 0x04, 0x7f, 0x2d, 0x80, 0xc1, 0x2c,
-  0xc3, 0xfe, 0xf0, 0x4f, 0x60, 0x43, 0x49, 0x0b, 0xf0, 0x99, 0x25, 0x08,
-  0x21, 0x13, 0x69, 0x81, 0x88, 0xcf, 0x2c, 0x41, 0x08, 0x0d, 0x47, 0xa4,
-  0xce, 0x48, 0x0b, 0xc2, 0x37, 0xcb, 0xe0, 0x3f, 0x21, 0x14, 0x98, 0xea,
-  0x90, 0xb4, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f,
-  0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xb0, 0xb6, 0xa0, 0xc3,
-  0x0d, 0x81, 0x6a, 0x0b, 0x60, 0x30, 0xcb, 0xf0, 0x3f, 0x20, 0x14, 0x18,
-  0x4b, 0x0b, 0x43, 0x7c, 0x66, 0x09, 0x42, 0xc8, 0x88, 0x98, 0x16, 0xe0,
-  0x33, 0x4b, 0x10, 0x42, 0x03, 0x2d, 0x8f, 0xb6, 0x3f, 0x18, 0xff, 0x10,
-  0xff, 0x23, 0x80, 0x10, 0xda, 0xf5, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0xd4,
-  0x6d, 0x4f, 0x5d, 0x3f, 0x0b, 0xc3, 0x1c, 0x0d, 0x0b, 0xc3, 0x1c, 0x31,
-  0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0xa7, 0xdb,
-  0x82, 0x6a, 0x0b, 0x69, 0x2d, 0xdc, 0xb6, 0x30, 0x9a, 0x10, 0x00, 0xa3,
-  0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0x85, 0xb7, 0x10, 0xdb, 0x42,
-  0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0x89, 0xb7, 0x20,
-  0xdb, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0x8d,
-  0xb7, 0x30, 0xdb, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60,
-  0xb0, 0xac, 0xb7, 0x20, 0xdb, 0xc2, 0x5c, 0x0b, 0x81, 0x6f, 0x0b, 0xa3,
-  0x2d, 0x80, 0xb7, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x36, 0x4b, 0x20,
-  0x42, 0x03, 0x2d, 0x8f, 0x69, 0xb4, 0x0f, 0xca, 0x16, 0xec, 0xc3, 0x12,
-  0xef, 0x23, 0x84, 0x10, 0xca, 0x16, 0xf0, 0x33, 0x62, 0x60, 0x00, 0x20,
-  0x08, 0x06, 0x50, 0x7b, 0x0b, 0xae, 0x2d, 0x98, 0xb1, 0x30, 0x62, 0x60,
-  0x00, 0x20, 0x08, 0x06, 0x90, 0x7b, 0x0b, 0xaf, 0x2d, 0x98, 0xb1, 0x60,
-  0x41, 0x20, 0x1f, 0x0b, 0x04, 0xf9, 0xd8, 0x9b, 0x07, 0xa7, 0x2d, 0xc8,
-  0x67, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xa4, 0xf8, 0x16, 0x6e, 0x5b,
-  0x38, 0x6d, 0xa1, 0xd7, 0x02, 0x8b, 0xf3, 0xe0, 0xb4, 0x05, 0xf9, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x34, 0xdf, 0x42, 0x6e, 0x0b, 0xa6,
-  0x2d, 0xa0, 0x6a, 0x10, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x44,
-  0xdf, 0x82, 0x6e, 0x0b, 0xa9, 0x2d, 0x80, 0x5b, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x52, 0x7d, 0x0b, 0xbb, 0x2d, 0xb0, 0xb6, 0x80, 0x2f,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0xf6, 0x2d, 0xf0, 0xb6,
-  0x80, 0xda, 0xc2, 0xaa, 0x06, 0xc6, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x48, 0xf7, 0x2d, 0xf4, 0xb6, 0x80, 0xda, 0xc2, 0xb8, 0x05, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x20, 0xe1, 0xb7, 0xe0, 0xdb, 0x82, 0x6b, 0x0b,
-  0xfb, 0x12, 0x8c, 0x18, 0x34, 0x00, 0x08, 0x82, 0x81, 0x75, 0xdf, 0x82,
-  0x6f, 0x0b, 0xb1, 0x2d, 0x30, 0x8b, 0xe2, 0xaa, 0x01, 0x42, 0x04, 0xf6,
-  0xd7, 0x41, 0x6c, 0x0b, 0xf2, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03,
-  0x69, 0xbf, 0x85, 0xf0, 0x16, 0x62, 0x5b, 0x68, 0xaf, 0xc0, 0x42, 0x3b,
-  0x88, 0x6d, 0x41, 0x3e, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x20, 0xf5,
-  0xb7, 0x30, 0xde, 0x02, 0x6c, 0x0b, 0xb8, 0x19, 0x04, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0x20, 0xf9, 0xb7, 0x40, 0xde, 0xc2, 0x6c, 0x0b, 0xf0,
-  0x15, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0xf4, 0xdf, 0x42, 0x79,
-  0x0b, 0xb6, 0x2d, 0xa0, 0x48, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x12, 0x88, 0x0b, 0xe6, 0x2d, 0xc8, 0xb6, 0xb0, 0x9b, 0x81, 0x31, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x52, 0x88, 0x0b, 0xe7, 0x2d, 0xc8, 0xb6,
-  0x30, 0x5f, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x22, 0x2e,
-  0xa0, 0xb7, 0x80, 0xdb, 0xc2, 0x8a, 0x04, 0x23, 0x06, 0x0d, 0x00, 0x82,
-  0x60, 0x60, 0x85, 0xb8, 0x80, 0xde, 0xc2, 0x6e, 0x0b, 0x56, 0x45, 0xf9,
-  0x66, 0x80, 0x10, 0x81, 0xb9, 0x72, 0xb0, 0xdb, 0x82, 0x7c, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0x40, 0x2a, 0x71, 0x61, 0xbd, 0x85, 0xdd, 0x16,
-  0xfa, 0x29, 0x30, 0x58, 0x0e, 0x76, 0x5b, 0x90, 0xcf, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x48, 0x27, 0x2e, 0xb4, 0xb7, 0xa0, 0xdb, 0x02, 0x3a,
-  0x06, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x28, 0x2e, 0xb8,
-  0xb7, 0xd0, 0xdb, 0x02, 0x48, 0x05, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0x20, 0xa5, 0xb8, 0xf0, 0xde, 0x02, 0x78, 0x0b, 0x38, 0x11, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0x81, 0xa4, 0xe2, 0x02, 0x7c, 0x0b, 0xbc, 0x2d,
-  0xac, 0x63, 0x60, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0xb4, 0xe2,
-  0x42, 0x7c, 0x0b, 0xbc, 0x2d, 0x8c, 0x54, 0x30, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x12, 0x8b, 0x0b, 0xf2, 0x2d, 0x88, 0xb7, 0xb0, 0x13, 0xc1,
-  0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x58, 0x2b, 0x2e, 0xc8, 0xb7, 0x50,
-  0xde, 0x02, 0x18, 0x7c, 0x9e, 0x3b, 0x06, 0x08, 0x11, 0x58, 0xef, 0x06,
-  0xe5, 0x2d, 0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xa4, 0x17,
-  0x17, 0xea, 0x5b, 0x28, 0x6f, 0xa1, 0x85, 0x02, 0xfb, 0xdd, 0xa0, 0xbc,
-  0x05, 0xf9, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x14, 0xe3, 0xc2,
-  0x7d, 0x0b, 0xe4, 0x2d, 0xe0, 0x5f, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x92, 0x8c, 0x0b, 0xf8, 0x2d, 0x9c, 0xb7, 0x00, 0x43, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x33, 0x2e, 0xe4, 0xb7, 0xa0, 0xde,
-  0x02, 0x1a, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x20, 0xd1, 0xb8,
-  0xa0, 0xdf, 0x82, 0x79, 0x0b, 0xfb, 0x67, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0x81, 0x54, 0xe3, 0xc2, 0x7e, 0x0b, 0xe6, 0x2d, 0xcc, 0x50, 0x30,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x92, 0x8d, 0x0b, 0xfc, 0x2d, 0xb0,
-  0xb7, 0xb0, 0x06, 0xc1, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x58, 0x35,
-  0x2e, 0xf0, 0xb7, 0xf0, 0xde, 0x82, 0x1a, 0xa4, 0x01, 0x1a, 0xf8, 0x1f,
-  0x42, 0x04, 0xc6, 0x06, 0x6c, 0x20, 0x1f, 0x0b, 0xda, 0x40, 0x3e, 0x16,
-  0x06, 0xf1, 0x2d, 0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xa4,
-  0x1d, 0x17, 0x42, 0x5c, 0x88, 0x6f, 0xc1, 0x09, 0x6c, 0x0c, 0xe2, 0x5b,
-  0x90, 0xcf, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x3d, 0x2e, 0x8c,
-  0xb8, 0x00, 0xdf, 0x82, 0x16, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81,
-  0xe4, 0xe3, 0x02, 0x89, 0x0b, 0xf3, 0x2d, 0x44, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x48, 0x3f, 0x2e, 0x94, 0xb8, 0x60, 0xdf, 0x02, 0x12,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x04, 0xe6, 0x82, 0x89, 0x0b,
-  0xf2, 0x2d, 0x74, 0xc6, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x61,
-  0x2e, 0x9c, 0xb8, 0x20, 0xdf, 0x02, 0x15, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0x81, 0x24, 0xe6, 0x02, 0x8a, 0x0b, 0xf8, 0x2d, 0x2c, 0xc1, 0x88,
-  0x41, 0x03, 0x80, 0x20, 0x18, 0x58, 0x61, 0x2e, 0xa0, 0xb8, 0xb0, 0xdf,
-  0xc2, 0x1d, 0x2c, 0x0a, 0x18, 0x20, 0x44, 0x70, 0x81, 0x41, 0x23, 0x06,
-  0x0e, 0x00, 0x82, 0x60, 0xd0, 0x9c, 0xb9, 0x60, 0xe2, 0x82, 0x7c, 0x0b,
-  0xeb, 0x2d, 0xec, 0xb8, 0x10, 0x84, 0xb8, 0x10, 0xe2, 0x42, 0x88, 0x0b,
-  0x20, 0x2e, 0xf4, 0xb8, 0x30, 0x4b, 0x30, 0x42, 0x08, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00
+  0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0x85,
+  0xb7, 0x10, 0xdb, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
+  0xb0, 0x89, 0xb7, 0x20, 0xdb, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00,
+  0x82, 0x60, 0xb0, 0x8d, 0xb7, 0x30, 0xdb, 0x42, 0x42, 0x04, 0x23, 0x06,
+  0x0a, 0x00, 0x82, 0x60, 0xb0, 0xac, 0xb7, 0x20, 0xdb, 0xc2, 0x5c, 0x0b,
+  0x81, 0x6f, 0x0b, 0xa3, 0x2d, 0x80, 0xb7, 0x30, 0x9a, 0x10, 0x00, 0x17,
+  0x3c, 0x36, 0x4b, 0x20, 0x42, 0x03, 0x2d, 0x8f, 0x69, 0xb4, 0x0f, 0xca,
+  0x16, 0xec, 0xc3, 0x12, 0xef, 0x23, 0x84, 0x10, 0xca, 0x16, 0xf0, 0x33,
+  0x62, 0x60, 0x00, 0x20, 0x08, 0x06, 0x50, 0x7b, 0x0b, 0xae, 0x2d, 0x98,
+  0xb1, 0x30, 0x62, 0x60, 0x00, 0x20, 0x08, 0x06, 0x90, 0x7b, 0x0b, 0xaf,
+  0x2d, 0x98, 0xb1, 0x60, 0x41, 0x20, 0x1f, 0x0b, 0x04, 0xf9, 0xd8, 0x9b,
+  0x07, 0xa7, 0x2d, 0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xa4,
+  0xf8, 0x16, 0x6e, 0x5b, 0x38, 0x6d, 0xa1, 0xd7, 0x02, 0x8b, 0xf3, 0xe0,
+  0xb4, 0x05, 0xf9, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x34, 0xdf,
+  0x42, 0x6e, 0x0b, 0xa6, 0x2d, 0xa0, 0x6a, 0x10, 0x8c, 0x18, 0x20, 0x00,
+  0x08, 0x82, 0x81, 0x44, 0xdf, 0x82, 0x6e, 0x0b, 0xa9, 0x2d, 0x80, 0x5b,
+  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x52, 0x7d, 0x0b, 0xbb, 0x2d,
+  0xb0, 0xb6, 0x80, 0x2f, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48,
+  0xf6, 0x2d, 0xf0, 0xb6, 0x80, 0xda, 0xc2, 0xaa, 0x06, 0xc6, 0x88, 0x01,
+  0x02, 0x80, 0x20, 0x18, 0x48, 0xf7, 0x2d, 0xf4, 0xb6, 0x80, 0xda, 0xc2,
+  0xb8, 0x05, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x20, 0xe1, 0xb7, 0xe0,
+  0xdb, 0x82, 0x6b, 0x0b, 0xfb, 0x12, 0x8c, 0x18, 0x34, 0x00, 0x08, 0x82,
+  0x81, 0x75, 0xdf, 0x82, 0x6f, 0x0b, 0xb1, 0x2d, 0x30, 0x8b, 0xe2, 0xaa,
+  0x01, 0x42, 0x04, 0xf6, 0xd7, 0x41, 0x6c, 0x0b, 0xf2, 0x19, 0x31, 0x40,
+  0x00, 0x10, 0x04, 0x03, 0x69, 0xbf, 0x85, 0xf0, 0x16, 0x62, 0x5b, 0x68,
+  0xaf, 0xc0, 0x42, 0x3b, 0x88, 0x6d, 0x41, 0x3e, 0x23, 0x06, 0x08, 0x00,
+  0x82, 0x60, 0x20, 0xf5, 0xb7, 0x30, 0xde, 0x02, 0x6c, 0x0b, 0xb8, 0x19,
+  0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x20, 0xf9, 0xb7, 0x40, 0xde,
+  0xc2, 0x6c, 0x0b, 0xf0, 0x15, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81,
+  0xf4, 0xdf, 0x42, 0x79, 0x0b, 0xb6, 0x2d, 0xa0, 0x48, 0x30, 0x62, 0x80,
+  0x00, 0x20, 0x08, 0x06, 0x12, 0x88, 0x0b, 0xe6, 0x2d, 0xc8, 0xb6, 0xb0,
+  0x9b, 0x81, 0x31, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x52, 0x88, 0x0b,
+  0xe7, 0x2d, 0xc8, 0xb6, 0x30, 0x5f, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20,
+  0x18, 0x48, 0x22, 0x2e, 0xa0, 0xb7, 0x80, 0xdb, 0xc2, 0x8a, 0x04, 0x23,
+  0x06, 0x0d, 0x00, 0x82, 0x60, 0x60, 0x85, 0xb8, 0x80, 0xde, 0xc2, 0x6e,
+  0x0b, 0x56, 0x45, 0xf9, 0x66, 0x80, 0x10, 0x81, 0xb9, 0x72, 0xb0, 0xdb,
+  0x82, 0x7c, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x40, 0x2a, 0x71, 0x61,
+  0xbd, 0x85, 0xdd, 0x16, 0xfa, 0x29, 0x30, 0x58, 0x0e, 0x76, 0x5b, 0x90,
+  0xcf, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x27, 0x2e, 0xb4, 0xb7,
+  0xa0, 0xdb, 0x02, 0x3a, 0x06, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
+  0x48, 0x28, 0x2e, 0xb8, 0xb7, 0xd0, 0xdb, 0x02, 0x48, 0x05, 0x23, 0x06,
+  0x08, 0x00, 0x82, 0x60, 0x20, 0xa5, 0xb8, 0xf0, 0xde, 0x02, 0x78, 0x0b,
+  0x38, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0xa4, 0xe2, 0x02,
+  0x7c, 0x0b, 0xbc, 0x2d, 0xac, 0x63, 0x60, 0x8c, 0x18, 0x20, 0x00, 0x08,
+  0x82, 0x81, 0xb4, 0xe2, 0x42, 0x7c, 0x0b, 0xbc, 0x2d, 0x8c, 0x54, 0x30,
+  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x12, 0x8b, 0x0b, 0xf2, 0x2d, 0x88,
+  0xb7, 0xb0, 0x13, 0xc1, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x58, 0x2b,
+  0x2e, 0xc8, 0xb7, 0x50, 0xde, 0x02, 0x18, 0x7c, 0x9e, 0x3b, 0x06, 0x08,
+  0x11, 0x58, 0xef, 0x06, 0xe5, 0x2d, 0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40,
+  0x10, 0x0c, 0xa4, 0x17, 0x17, 0xea, 0x5b, 0x28, 0x6f, 0xa1, 0x85, 0x02,
+  0xfb, 0xdd, 0xa0, 0xbc, 0x05, 0xf9, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
+  0x81, 0x14, 0xe3, 0xc2, 0x7d, 0x0b, 0xe4, 0x2d, 0xe0, 0x5f, 0x30, 0x62,
+  0x80, 0x00, 0x20, 0x08, 0x06, 0x92, 0x8c, 0x0b, 0xf8, 0x2d, 0x9c, 0xb7,
+  0x00, 0x43, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x33, 0x2e,
+  0xe4, 0xb7, 0xa0, 0xde, 0x02, 0x1a, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
+  0x60, 0x20, 0xd1, 0xb8, 0xa0, 0xdf, 0x82, 0x79, 0x0b, 0xfb, 0x67, 0x8c,
+  0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x54, 0xe3, 0xc2, 0x7e, 0x0b, 0xe6,
+  0x2d, 0xcc, 0x50, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x92, 0x8d,
+  0x0b, 0xfc, 0x2d, 0xb0, 0xb7, 0xb0, 0x06, 0xc1, 0x88, 0x41, 0x03, 0x80,
+  0x20, 0x18, 0x58, 0x35, 0x2e, 0xf0, 0xb7, 0xf0, 0xde, 0x82, 0x1a, 0xa4,
+  0x01, 0x1a, 0xf8, 0x1f, 0x42, 0x04, 0xc6, 0x06, 0x6c, 0x20, 0x1f, 0x0b,
+  0xda, 0x40, 0x3e, 0x16, 0x06, 0xf1, 0x2d, 0xc8, 0x67, 0xc4, 0x00, 0x01,
+  0x40, 0x10, 0x0c, 0xa4, 0x1d, 0x17, 0x42, 0x5c, 0x88, 0x6f, 0xc1, 0x09,
+  0x6c, 0x0c, 0xe2, 0x5b, 0x90, 0xcf, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
+  0x48, 0x3d, 0x2e, 0x8c, 0xb8, 0x00, 0xdf, 0x82, 0x16, 0x8c, 0x18, 0x20,
+  0x00, 0x08, 0x82, 0x81, 0xe4, 0xe3, 0x02, 0x89, 0x0b, 0xf3, 0x2d, 0x44,
+  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x3f, 0x2e, 0x94, 0xb8,
+  0x60, 0xdf, 0x02, 0x12, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x04,
+  0xe6, 0x82, 0x89, 0x0b, 0xf2, 0x2d, 0x74, 0xc6, 0x88, 0x01, 0x02, 0x80,
+  0x20, 0x18, 0x48, 0x61, 0x2e, 0x9c, 0xb8, 0x20, 0xdf, 0x02, 0x15, 0x8c,
+  0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x24, 0xe6, 0x02, 0x8a, 0x0b, 0xf8,
+  0x2d, 0x2c, 0xc1, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x58, 0x61, 0x2e,
+  0xa0, 0xb8, 0xb0, 0xdf, 0xc2, 0x1d, 0x2c, 0x0a, 0x18, 0x20, 0x44, 0x70,
+  0x81, 0x41, 0x23, 0x06, 0x0e, 0x00, 0x82, 0x60, 0xd0, 0x9c, 0xb9, 0x60,
+  0xe2, 0x82, 0x7c, 0x0b, 0xeb, 0x2d, 0xec, 0xb8, 0x10, 0x84, 0xb8, 0x10,
+  0xe2, 0x42, 0x88, 0x0b, 0x20, 0x2e, 0xf4, 0xb8, 0x30, 0x4b, 0x30, 0x42,
+  0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
 };
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_int16_fp16.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_int16_fp16.h
index 8915fce41d27c..648dadbd51c8b 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_int16_fp16.h
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_int16_fp16.h
@@ -15,7 +15,7 @@
 ; Name                 Index   Mask Register SysValue  Format   Used
 ; -------------------- ----- ------ -------- -------- ------- ------
 ; no parameters
-; shader hash: c6680b7166708ce33b64ddf6e2e7d30d
+; shader hash: c9a21ebee7360ce32454fdb857aba36d
 ;
 ; Pipeline Runtime Information: 
 ;
@@ -68,7 +68,7 @@ target triple = "dxil-ms-dx"
 %dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
 %dx.types.ResRet.f16 = type { half, half, half, half, i32 }
 %dx.types.ResRet.i16 = type { i16, i16, i16, i16, i32 }
-%"class.RWStructuredBuffer<short>" = type { i16 }
+%"class.RWStructuredBuffer<int16_t>" = type { i16 }
 %"class.RWStructuredBuffer<half>" = type { half }
 %Constants = type { i32, i32, i32, i32, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32 }
 
@@ -4478,9 +4478,9 @@ attributes #2 = { nounwind }
 !3 = !{!"cs", i32 6, i32 2}
 !4 = !{null, !5, !9, null}
 !5 = !{!6, !7, !8}
-!6 = !{i32 0, %"class.RWStructuredBuffer<short>"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !1}
+!6 = !{i32 0, %"class.RWStructuredBuffer<int16_t>"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !1}
 !7 = !{i32 1, %"class.RWStructuredBuffer<half>"* undef, !"", i32 0, i32 1, i32 1, i32 12, i1 false, i1 false, i1 false, !1}
-!8 = !{i32 2, %"class.RWStructuredBuffer<short>"* undef, !"", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !1}
+!8 = !{i32 2, %"class.RWStructuredBuffer<int16_t>"* undef, !"", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !1}
 !9 = !{!10}
 !10 = !{i32 0, %Constants* undef, !"", i32 0, i32 0, i32 1, i32 116, null}
 !11 = !{void ()* @GridSample, !"GridSample", null, !4, !12}
@@ -4490,9 +4490,9 @@ attributes #2 = { nounwind }
 #endif
 
 const unsigned char g_GridSample[] = {
-  0x44, 0x58, 0x42, 0x43, 0xf5, 0xf4, 0x45, 0xca, 0x48, 0x3e, 0x7d, 0x4c,
-  0x4c, 0x16, 0x2b, 0xc7, 0x0f, 0xf4, 0xbd, 0xc2, 0x01, 0x00, 0x00, 0x00,
-  0x30, 0x54, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
+  0x44, 0x58, 0x42, 0x43, 0x75, 0xab, 0xc2, 0xb2, 0x22, 0x68, 0xea, 0xbf,
+  0xc8, 0x0b, 0xb5, 0xb4, 0xff, 0xea, 0xd4, 0xf0, 0x01, 0x00, 0x00, 0x00,
+  0x34, 0x54, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
   0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
   0x18, 0x01, 0x00, 0x00, 0x34, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30,
   0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -4514,12 +4514,12 @@ const unsigned char g_GridSample[] = {
   0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0xc6, 0x68, 0x0b, 0x71, 0x66, 0x70, 0x8c, 0xe3,
-  0x3b, 0x64, 0xdd, 0xf6, 0xe2, 0xe7, 0xd3, 0x0d, 0x44, 0x58, 0x49, 0x4c,
-  0xf4, 0x52, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0xbd, 0x14, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0xc9, 0xa2, 0x1e, 0xbe, 0xe7, 0x36, 0x0c, 0xe3,
+  0x24, 0x54, 0xfd, 0xb8, 0x57, 0xab, 0xa3, 0x6d, 0x44, 0x58, 0x49, 0x4c,
+  0xf8, 0x52, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0xbe, 0x14, 0x00, 0x00,
   0x44, 0x58, 0x49, 0x4c, 0x02, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-  0xdc, 0x52, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00,
-  0xb4, 0x14, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0xe0, 0x52, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00,
+  0xb5, 0x14, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
   0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49,
   0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19,
   0x1e, 0x04, 0x8b, 0x62, 0x80, 0x18, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42,
@@ -4532,7 +4532,7 @@ const unsigned char g_GridSample[] = {
   0x01, 0xd5, 0x06, 0x62, 0xf8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x90, 0x00,
   0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42,
   0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00,
-  0x55, 0x00, 0x00, 0x00, 0x32, 0x22, 0x88, 0x09, 0x20, 0x64, 0x85, 0x04,
+  0x56, 0x00, 0x00, 0x00, 0x32, 0x22, 0x88, 0x09, 0x20, 0x64, 0x85, 0x04,
   0x13, 0x23, 0xa4, 0x84, 0x04, 0x13, 0x23, 0xe3, 0x84, 0xa1, 0x90, 0x14,
   0x12, 0x4c, 0x8c, 0x8c, 0x0b, 0x84, 0xc4, 0x4c, 0x10, 0xb8, 0xc1, 0x08,
   0x40, 0x09, 0x00, 0x0a, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0xc3, 0x30, 0x10,
@@ -4549,1741 +4549,1742 @@ const unsigned char g_GridSample[] = {
   0x5a, 0x29, 0x86, 0x61, 0x18, 0x06, 0xe2, 0x8e, 0x1a, 0x2e, 0x7f, 0xc2,
   0x1e, 0x42, 0xf2, 0xb9, 0x8d, 0x2a, 0x56, 0x62, 0xf2, 0x91, 0xdb, 0x46,
   0xc4, 0x30, 0x0c, 0x43, 0x21, 0xbc, 0x41, 0x19, 0xe8, 0x9b, 0x23, 0x08,
-  0x8a, 0xa1, 0x0c, 0xc8, 0x30, 0x84, 0x24, 0x0e, 0x04, 0xcc, 0xf4, 0x8d,
-  0x03, 0x3b, 0x84, 0xc3, 0x3c, 0xcc, 0x83, 0x1b, 0xc8, 0xc2, 0x2d, 0xcc,
-  0x02, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xd4, 0x83, 0x3c, 0x94,
-  0x03, 0x39, 0x88, 0x42, 0x3d, 0x98, 0x83, 0x39, 0x94, 0x83, 0x3c, 0xf0,
-  0xc1, 0x3c, 0xa0, 0xc3, 0x3b, 0xc8, 0x03, 0x3d, 0xf8, 0x01, 0x0a, 0x0c,
-  0x2a, 0x67, 0xf2, 0xc6, 0x81, 0x1d, 0xc2, 0x61, 0x1e, 0xe6, 0xc1, 0x0d,
-  0x64, 0xe1, 0x16, 0x66, 0x81, 0x1e, 0xe4, 0xa1, 0x1e, 0xc6, 0x81, 0x1e,
-  0xea, 0x41, 0x1e, 0xca, 0x81, 0x1c, 0x44, 0xa1, 0x1e, 0xcc, 0xc1, 0x1c,
-  0xca, 0x41, 0x1e, 0xf8, 0x00, 0x1d, 0xc2, 0x81, 0x1d, 0xcc, 0xc1, 0x0f,
-  0x50, 0x80, 0xd1, 0x39, 0x8c, 0x40, 0x0c, 0x97, 0x70, 0x4e, 0x23, 0x4d,
-  0x40, 0x33, 0x49, 0x68, 0x19, 0x86, 0x61, 0x48, 0xd3, 0x34, 0x4d, 0xd3,
-  0x81, 0xd4, 0x39, 0x02, 0x50, 0x98, 0x02, 0x00, 0x13, 0x14, 0x72, 0xc0,
-  0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0,
-  0x87, 0x0d, 0xae, 0x50, 0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d,
-  0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d,
-  0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78,
-  0xa0, 0x07, 0x78, 0xd0, 0x06, 0xe9, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x71,
-  0x60, 0x07, 0x6d, 0x90, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72,
-  0xd0, 0x06, 0xe9, 0x60, 0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d,
-  0x60, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xd0, 0x06, 0xe6,
-  0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x76,
-  0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xee, 0x80, 0x07, 0x7a,
-  0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x7a, 0x60, 0x07, 0x74,
-  0x30, 0xe4, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x60, 0xc8, 0x43, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0xc0, 0x90, 0x67, 0x01, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x80, 0x21, 0x4f, 0x03, 0x04, 0xc0, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x43, 0x1e, 0x08, 0x08, 0x80, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x3c, 0x12, 0x10, 0x00, 0x01,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x79, 0x28, 0x20, 0x00,
-  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xf2, 0x58, 0x40,
-  0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xe4, 0xd1,
-  0x80, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8,
-  0xc3, 0x01, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0,
-  0x90, 0xe7, 0x03, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x80, 0x21, 0x8f, 0x18, 0x00, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x40, 0x16, 0x08, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
-  0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47,
-  0xc6, 0x04, 0x43, 0x1a, 0x4a, 0xa0, 0x08, 0x8a, 0x61, 0x04, 0xa0, 0x30,
-  0x0a, 0x3d, 0xa0, 0x10, 0x0a, 0x30, 0x80, 0xc2, 0x11, 0x00, 0x62, 0x0b,
-  0x1c, 0x10, 0x10, 0x81, 0xd0, 0x19, 0x00, 0x5a, 0x67, 0x00, 0xc8, 0x9c,
-  0x01, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00,
-  0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b,
-  0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99,
-  0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62,
-  0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73,
-  0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84, 0x81, 0x99, 0x20, 0x0c,
-  0xcd, 0x06, 0x61, 0x20, 0x26, 0x08, 0x83, 0xb3, 0x41, 0x18, 0x0c, 0x0a,
-  0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08, 0xc3, 0x33, 0x41, 0x30,
-  0x03, 0x8c, 0xc0, 0x04, 0x61, 0x80, 0x26, 0x08, 0x61, 0x40, 0x6d, 0x58,
-  0x94, 0x85, 0x51, 0x94, 0xa1, 0x71, 0x1c, 0xa7, 0x98, 0x20, 0xa0, 0x81,
-  0xb5, 0x61, 0x19, 0x20, 0x46, 0x19, 0x86, 0xc6, 0x71, 0x9c, 0x62, 0xc3,
-  0x42, 0x2c, 0x8c, 0x42, 0x0c, 0x8d, 0xe3, 0x38, 0xc5, 0x86, 0xe1, 0x89,
-  0xa4, 0x09, 0xc2, 0x1a, 0x5c, 0x13, 0x84, 0x21, 0xda, 0x80, 0x28, 0x14,
-  0xa3, 0x28, 0x43, 0x05, 0x6c, 0x08, 0xac, 0x0d, 0x04, 0x30, 0x5d, 0xc0,
-  0x04, 0x41, 0x00, 0xa8, 0x1c, 0xc9, 0xa5, 0x91, 0x4d, 0x85, 0xb5, 0xc1,
-  0xb1, 0x95, 0x4d, 0x10, 0xd8, 0xa0, 0x9a, 0x20, 0x0c, 0xd2, 0x04, 0x61,
-  0x98, 0x36, 0x0c, 0xdd, 0x30, 0x6c, 0x20, 0x94, 0x8d, 0xf3, 0x36, 0x14,
-  0x99, 0x06, 0x60, 0x5f, 0x15, 0x36, 0x36, 0xbb, 0x36, 0x97, 0x34, 0xb2,
-  0x32, 0x37, 0xba, 0x29, 0x41, 0x50, 0x85, 0x0c, 0xcf, 0xc5, 0xae, 0x4c,
-  0x6e, 0x2e, 0xed, 0xcd, 0x6d, 0x4a, 0x40, 0x34, 0x21, 0xc3, 0x73, 0xb1,
-  0x0b, 0x63, 0xb3, 0x2b, 0x93, 0x9b, 0x12, 0x18, 0x75, 0xc8, 0xf0, 0x5c,
-  0xe6, 0xd0, 0xc2, 0xc8, 0xca, 0xe4, 0x9a, 0xde, 0xc8, 0xca, 0xd8, 0xa6,
-  0x04, 0x48, 0x19, 0x32, 0x3c, 0x17, 0xb9, 0xb2, 0xb9, 0xb7, 0x3a, 0xb9,
-  0xb1, 0xb2, 0xb9, 0x29, 0xc1, 0x55, 0x87, 0x0c, 0xcf, 0xa5, 0xcc, 0x8d,
-  0x4e, 0x2e, 0x0f, 0xea, 0x2d, 0xcd, 0x8d, 0x6e, 0x6e, 0x4a, 0xf0, 0x01,
-  0x79, 0x18, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c,
-  0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3,
-  0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6,
-  0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e,
-  0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43,
-  0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03,
-  0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48,
-  0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20,
-  0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e,
-  0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d,
-  0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89,
-  0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83,
-  0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68,
-  0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90,
-  0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78,
-  0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98,
-  0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5,
-  0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c,
-  0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c,
-  0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43,
-  0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43,
-  0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82,
-  0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70,
-  0x03, 0x7a, 0x28, 0x87, 0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8,
-  0xe0, 0x06, 0xe4, 0x20, 0x0e, 0xe7, 0xe0, 0x06, 0xf6, 0x10, 0x0e, 0xf2,
-  0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x30, 0x83, 0x81,
-  0xc8, 0x01, 0x1f, 0xdc, 0x40, 0x1c, 0xe4, 0xa1, 0x1c, 0xc2, 0x61, 0x1d,
-  0xdc, 0x40, 0x1c, 0xe4, 0x01, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00,
-  0x31, 0x00, 0x00, 0x00, 0x06, 0xa0, 0x80, 0x11, 0x32, 0xb0, 0x00, 0xf3,
-  0x2c, 0x84, 0x19, 0x40, 0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x20, 0x0d, 0x10,
-  0x61, 0x7e, 0x71, 0xdb, 0xa6, 0xb0, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10,
-  0x50, 0x45, 0x41, 0x44, 0xa5, 0x03, 0x0c, 0x25, 0x61, 0x00, 0x02, 0xe6,
-  0x23, 0xb7, 0x6d, 0x0b, 0xd2, 0x70, 0xf9, 0xce, 0xe3, 0x0b, 0x11, 0x01,
-  0x4c, 0x44, 0x08, 0x34, 0xc3, 0x42, 0xd8, 0x81, 0x33, 0x5c, 0xbe, 0xf3,
-  0xf8, 0x83, 0x33, 0xe1, 0x7e, 0x71, 0xdb, 0x86, 0x70, 0x0d, 0x97, 0xef,
-  0x3c, 0x7e, 0x04, 0x58, 0x1b, 0x55, 0x14, 0x44, 0x54, 0x3a, 0xc0, 0xe0,
-  0x17, 0xb5, 0x6e, 0x02, 0xd7, 0x70, 0xf9, 0xce, 0xe3, 0x47, 0x80, 0xb5,
-  0x51, 0x45, 0x41, 0x44, 0xa5, 0x03, 0x0c, 0x3e, 0x52, 0xeb, 0x36, 0x80,
-  0x0d, 0x97, 0xef, 0x3c, 0x7e, 0x04, 0x58, 0x1b, 0x55, 0x14, 0x44, 0xc4,
-  0x4e, 0x4e, 0x44, 0xf8, 0x48, 0xad, 0x5b, 0x81, 0x34, 0x5c, 0xbe, 0xf3,
-  0xf8, 0x13, 0x11, 0x4d, 0x08, 0x10, 0x61, 0x7e, 0x71, 0xdb, 0x96, 0x20,
-  0x0d, 0x97, 0xef, 0x3c, 0xfe, 0x44, 0x44, 0x13, 0x02, 0x44, 0x98, 0x8f,
-  0xdc, 0xb6, 0x05, 0x48, 0xc3, 0xe5, 0x3b, 0x8f, 0x3f, 0x1d, 0x11, 0x01,
-  0x0c, 0xe2, 0xe0, 0x23, 0xb7, 0x6d, 0x04, 0xcf, 0x70, 0xf9, 0xce, 0xe3,
-  0x53, 0x0d, 0x10, 0x61, 0x7e, 0x71, 0xdb, 0x00, 0x61, 0x20, 0x00, 0x00,
-  0x0c, 0x13, 0x00, 0x00, 0x13, 0x04, 0x24, 0x14, 0x0b, 0x04, 0x00, 0x00,
-  0x1d, 0x00, 0x00, 0x00, 0x34, 0x14, 0x58, 0xd9, 0x95, 0xa5, 0x40, 0x0d,
-  0x94, 0x51, 0x21, 0x15, 0x57, 0xc1, 0x95, 0x5c, 0xd9, 0x14, 0x4b, 0x61,
-  0x0a, 0x14, 0x4d, 0xe9, 0x06, 0x94, 0x43, 0x29, 0x90, 0x31, 0x03, 0x40,
-  0x48, 0x09, 0x14, 0x01, 0x3d, 0x23, 0x00, 0x63, 0x04, 0x20, 0x08, 0x82,
-  0xf8, 0x37, 0x46, 0x00, 0x82, 0x20, 0x48, 0xff, 0xc2, 0x18, 0x01, 0x08,
-  0x82, 0x20, 0xfd, 0x8d, 0x11, 0x80, 0x20, 0x08, 0xf2, 0xdf, 0x18, 0x01,
-  0x08, 0x82, 0x20, 0xfe, 0x0b, 0x63, 0x04, 0x20, 0x08, 0x82, 0x21, 0x38,
-  0x8c, 0x11, 0x80, 0x20, 0x08, 0xea, 0xdf, 0x18, 0x01, 0x08, 0x82, 0xa0,
-  0xfe, 0x0b, 0x63, 0x04, 0x20, 0x08, 0x82, 0xf0, 0x37, 0x46, 0x00, 0x82,
-  0x20, 0x08, 0xff, 0xc2, 0x18, 0x01, 0x08, 0x82, 0x20, 0x08, 0x06, 0x00,
-  0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0x20, 0x06, 0x6d, 0x70, 0x39, 0x6b,
-  0xb0, 0x06, 0x64, 0x30, 0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0x62, 0xe0,
-  0x06, 0xd8, 0xd3, 0x06, 0x6d, 0x50, 0x06, 0x23, 0x06, 0x09, 0x00, 0x82,
-  0x60, 0x20, 0x06, 0x6f, 0x90, 0x41, 0x6b, 0xb0, 0x06, 0x66, 0x30, 0x62,
-  0x90, 0x00, 0x20, 0x08, 0x06, 0x62, 0x00, 0x07, 0x1a, 0xc4, 0x06, 0x6c,
-  0x70, 0x06, 0x23, 0x06, 0x06, 0x00, 0x82, 0x60, 0x40, 0xec, 0x81, 0xd5,
-  0x06, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xf0, 0xcd, 0xc1, 0x18, 0x08,
-  0x6e, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c, 0x30, 0x9a, 0x30, 0x04, 0xc3,
-  0x0d, 0x42, 0x40, 0x06, 0xb3, 0x0c, 0xc1, 0x08, 0x05, 0x23, 0x06, 0x07,
-  0x00, 0x82, 0x60, 0xf0, 0xe1, 0x01, 0x1a, 0x1c, 0x76, 0x30, 0x9a, 0x10,
-  0x0c, 0x17, 0x3c, 0x35, 0x9a, 0x30, 0x08, 0x17, 0x3c, 0x35, 0x62, 0x70,
-  0x00, 0x20, 0x08, 0x06, 0x5f, 0x1f, 0xb4, 0x01, 0x03, 0x06, 0xa3, 0x09,
-  0x01, 0x30, 0xdc, 0x10, 0xe8, 0x01, 0x18, 0x4c, 0x37, 0x50, 0x5e, 0x30,
-  0xdd, 0x50, 0x69, 0x42, 0x21, 0x01, 0x4c, 0x37, 0x5c, 0x1c, 0x51, 0x48,
-  0x00, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xf0, 0x95, 0x42, 0x1d, 0x50,
-  0x67, 0x30, 0x9a, 0x10, 0x04, 0xa3, 0x09, 0x82, 0x30, 0x9a, 0x30, 0x0c,
-  0x15, 0x08, 0x52, 0x03, 0x21, 0x15, 0x0c, 0x52, 0x57, 0x30, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0xf0, 0xb5, 0x42, 0x1f, 0x70, 0xa9, 0x30, 0x9a,
-  0x10, 0x00, 0x15, 0x0c, 0x52, 0x5b, 0x10, 0x15, 0x20, 0x33, 0x9a, 0x50,
-  0x04, 0x15, 0x08, 0x52, 0x44, 0x10, 0x15, 0x34, 0x33, 0x9a, 0x90, 0x08,
-  0x15, 0x08, 0x52, 0x44, 0x10, 0xd7, 0x3c, 0x75, 0xc5, 0x53, 0x37, 0x3c,
-  0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x1f, 0x2f, 0xb0, 0xc2, 0x1a,
-  0xd0, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20,
-  0x8c, 0x26, 0x10, 0xc3, 0x11, 0x4f, 0x1d, 0xf1, 0xd4, 0x11, 0x4f, 0x1d,
-  0xf1, 0xd4, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x58, 0xe8, 0xf0, 0x0a,
-  0xcc, 0xa2, 0x8c, 0x02, 0x31, 0x08, 0x81, 0x09, 0x01, 0x7c, 0x4e, 0x18,
-  0x66, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x34, 0x75, 0xb8, 0x85, 0x3c,
-  0x08, 0xc6, 0x01, 0x15, 0xca, 0x61, 0x34, 0x21, 0x00, 0x2e, 0x78, 0xf0,
-  0x8a, 0x61, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x40, 0x73, 0x87, 0x5d,
-  0xe8, 0x83, 0xe0, 0x1c, 0x58, 0x21, 0x1d, 0x46, 0x13, 0x02, 0xe0, 0x82,
-  0x07, 0x87, 0x1b, 0xe2, 0xa0, 0x1d, 0xc0, 0xc0, 0x8c, 0x56, 0x80, 0x8f,
-  0x0d, 0xae, 0x00, 0x9f, 0x59, 0x06, 0x61, 0x18, 0x4c, 0x38, 0x05, 0xf9,
-  0x98, 0x80, 0x0a, 0xf2, 0x31, 0x3d, 0x68, 0x05, 0xf8, 0x58, 0x1e, 0xb8,
-  0x02, 0x7c, 0x8c, 0x10, 0xe4, 0x63, 0x84, 0x20, 0x9f, 0x59, 0x02, 0xc2,
-  0xfc, 0x00, 0x91, 0x8f, 0x21, 0x7d, 0x20, 0x1f, 0x13, 0x66, 0x01, 0x3e,
-  0x26, 0xd0, 0x02, 0x7c, 0x4c, 0x88, 0x05, 0xf9, 0x98, 0x20, 0x0b, 0xf2,
-  0x99, 0x25, 0x20, 0x06, 0x2a, 0x1e, 0x48, 0x20, 0x86, 0x81, 0x8a, 0x07,
-  0x12, 0x88, 0x61, 0x34, 0xa1, 0x15, 0x84, 0xe1, 0x86, 0xe0, 0x1f, 0xc0,
-  0x60, 0x96, 0xa1, 0x30, 0x82, 0x11, 0x03, 0x03, 0x00, 0x41, 0x30, 0x80,
-  0x50, 0xe2, 0x1c, 0x88, 0x11, 0x03, 0x03, 0x00, 0x41, 0x30, 0x80, 0x52,
-  0x02, 0x1d, 0x88, 0x59, 0x02, 0x63, 0xa0, 0xe2, 0x21, 0x0a, 0x86, 0x18,
-  0xa8, 0x78, 0x88, 0x82, 0x21, 0x86, 0x23, 0x04, 0x53, 0x20, 0xbe, 0xe1,
-  0x88, 0xa1, 0x14, 0x84, 0xaf, 0x84, 0x60, 0x87, 0x23, 0x88, 0x54, 0x20,
-  0xbe, 0x12, 0x82, 0x1d, 0x8e, 0x30, 0x4e, 0x41, 0xf8, 0x2a, 0x10, 0x76,
-  0x96, 0xe1, 0xd0, 0x82, 0xd1, 0x04, 0x5d, 0x18, 0x86, 0x1b, 0x02, 0x96,
-  0x00, 0x83, 0x59, 0x06, 0x24, 0x09, 0xca, 0x16, 0xfa, 0x01, 0x2e, 0x78,
-  0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0x9a, 0xf0, 0x87, 0xe6,
-  0x1d, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0xb2, 0x09, 0x7f, 0x08,
-  0x84, 0xc2, 0x85, 0x90, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x03, 0x2a, 0x27, 0x44, 0x02, 0x9a, 0x87, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0xa0, 0x74, 0x42, 0x24, 0x02, 0x61, 0x96, 0x40, 0x1b, 0x6e,
-  0x50, 0x68, 0x02, 0x0c, 0x66, 0x19, 0x14, 0x2d, 0x30, 0x5b, 0xc0, 0x85,
-  0xf8, 0xcc, 0x32, 0x2c, 0xce, 0x64, 0xb9, 0x50, 0xc5, 0xc7, 0x02, 0x81,
-  0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x14, 0xf2, 0xb1, 0x22, 0x88,
-  0x4f, 0x11, 0x3d, 0xa1, 0xc3, 0x0d, 0xc1, 0x4e, 0x80, 0xc1, 0x2c, 0x03,
-  0xd3, 0x04, 0x36, 0x84, 0x03, 0x7c, 0x66, 0x09, 0x24, 0x03, 0x07, 0x22,
-  0x3e, 0xb3, 0x04, 0xd2, 0x2c, 0xc3, 0x23, 0x71, 0xf6, 0x85, 0x43, 0x7c,
-  0x2c, 0x60, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0xc1, 0x23, 0x1f,
-  0x2b, 0x82, 0xf8, 0x14, 0x71, 0x16, 0x3a, 0xdc, 0x10, 0x94, 0x05, 0x18,
-  0xcc, 0x32, 0x40, 0x51, 0x60, 0xe9, 0x30, 0xc4, 0x67, 0x96, 0x40, 0x32,
-  0x82, 0x1d, 0xe0, 0x33, 0x4b, 0x20, 0x0d, 0xb4, 0x3c, 0x18, 0x63, 0x35,
-  0x04, 0x24, 0x44, 0xb2, 0xe0, 0x98, 0x3a, 0xb8, 0x43, 0x7c, 0x66, 0x19,
-  0x26, 0xcb, 0x0c, 0xec, 0x1d, 0xd4, 0x20, 0x3e, 0x16, 0x08, 0xf4, 0xb9,
-  0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xa0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a,
-  0x98, 0x0b, 0x1d, 0x6e, 0x08, 0xe2, 0x02, 0x0c, 0x66, 0x19, 0xa8, 0x2a,
-  0xb0, 0xe1, 0x1e, 0xe0, 0x33, 0x4b, 0xa0, 0x19, 0x3d, 0x10, 0xf1, 0x99,
-  0x25, 0xd0, 0x66, 0x19, 0x2e, 0xcd, 0x0d, 0x8c, 0x0e, 0xea, 0x21, 0x3e,
-  0x16, 0x30, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xe0, 0x91, 0x8f,
-  0x15, 0x41, 0x7c, 0x8a, 0xe8, 0x0b, 0x1d, 0x6e, 0x08, 0xf6, 0x02, 0x0c,
-  0x66, 0x19, 0xb0, 0x2c, 0xb0, 0x7e, 0x18, 0xe2, 0x33, 0x4b, 0xa0, 0x19,
-  0x21, 0x12, 0xf0, 0x99, 0x25, 0xd0, 0x06, 0x8a, 0x1e, 0x71, 0x40, 0xfc,
-  0x21, 0xf1, 0x07, 0x83, 0x0d, 0x32, 0x36, 0xc0, 0xd8, 0xc0, 0x62, 0x83,
-  0x8a, 0x0d, 0xa8, 0x81, 0xa2, 0x87, 0x17, 0x10, 0x7f, 0x48, 0xfc, 0xc1,
-  0x20, 0x32, 0x03, 0xf3, 0x07, 0x0b, 0xab, 0x34, 0xea, 0xe8, 0xe1, 0xa9,
-  0x59, 0x86, 0x6d, 0x0e, 0x4a, 0x61, 0x34, 0x61, 0x26, 0x86, 0xe1, 0x86,
-  0x40, 0x34, 0xc0, 0x60, 0x96, 0x81, 0xf3, 0x82, 0xe1, 0x88, 0xc2, 0x2c,
-  0x86, 0xef, 0x8c, 0x61, 0x86, 0x1b, 0x82, 0x98, 0x20, 0x83, 0x1a, 0x02,
-  0x1d, 0x8e, 0x40, 0xd4, 0x62, 0xf8, 0x2a, 0x10, 0xf4, 0x94, 0x61, 0x86,
-  0x1b, 0x02, 0x9a, 0x20, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0xe8, 0xe4, 0x20,
-  0x38, 0x7c, 0x18, 0xe6, 0x9a, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
-  0xe0, 0x9b, 0x8d, 0xd1, 0x10, 0x0b, 0xd8, 0x18, 0x4d, 0x08, 0x80, 0xd1,
-  0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90,
-  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0x74, 0x43, 0x35, 0x0e, 0x22,
-  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x6e, 0x37, 0x56, 0x83, 0x21,
-  0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0x78, 0x83, 0x35, 0x24,
-  0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x85, 0x3c, 0x56, 0x83,
-  0x2d, 0x82, 0xdb, 0xe0, 0x8b, 0xdc, 0x18, 0x4d, 0x08, 0x80, 0x0b, 0x1e,
-  0x9b, 0x25, 0x90, 0x83, 0xe1, 0x86, 0xac, 0x37, 0xc0, 0x60, 0x96, 0xe1,
-  0x03, 0x83, 0xa0, 0xce, 0xc2, 0x35, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0x80, 0x32, 0x8f, 0xd7, 0xf8, 0x40, 0x63, 0xc4, 0xe0,
-  0x00, 0x40, 0x10, 0x0c, 0xa8, 0xf3, 0x78, 0x8d, 0x40, 0xb8, 0x60, 0x98,
-  0x52, 0x8b, 0xd9, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x03, 0x6a, 0x3d, 0x68, 0x63, 0x0c, 0x4a, 0x63, 0xc4, 0xe0, 0x00, 0x40,
-  0x10, 0x0c, 0x28, 0xf6, 0xa0, 0x8d, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x9e,
-  0xba, 0xe3, 0xa9, 0x9b, 0x89, 0x61, 0x0e, 0x0d, 0x86, 0x39, 0x62, 0x98,
-  0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x0f, 0x3e, 0xc0,
-  0xe3, 0x2f, 0xda, 0x63, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34,
-  0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00,
-  0x04, 0xc1, 0x80, 0xbb, 0x8f, 0xf3, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0x38, 0xfc, 0x40, 0x8f, 0x84, 0x08, 0x46, 0x0c, 0x10,
-  0x00, 0x04, 0xc1, 0x80, 0xcb, 0x8f, 0xf4, 0x48, 0x88, 0x60, 0xc4, 0x40,
-  0x01, 0x40, 0x10, 0x0c, 0x96, 0x10, 0x41, 0x8f, 0xd4, 0x08, 0xe8, 0x23,
-  0x37, 0xec, 0x63, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6c, 0x96, 0x40, 0x0e,
-  0x86, 0x1b, 0xec, 0x20, 0x3f, 0xc0, 0x60, 0x96, 0x21, 0x0c, 0xe4, 0x20,
-  0xb0, 0xbd, 0xe8, 0x8b, 0xf8, 0x0c, 0x47, 0xec, 0x81, 0x5f, 0x10, 0xdf,
-  0x2c, 0x83, 0x18, 0x94, 0x41, 0x60, 0x7f, 0xc1, 0x07, 0xf1, 0xb1, 0x60,
-  0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08,
-  0xe2, 0x53, 0xc4, 0x88, 0xe8, 0x70, 0x43, 0x10, 0x22, 0x60, 0x30, 0xcb,
-  0x30, 0x06, 0x64, 0x10, 0xd8, 0x70, 0x1a, 0xf0, 0x99, 0x25, 0x48, 0x03,
-  0x33, 0x0d, 0x22, 0x3e, 0xb3, 0x04, 0x69, 0x30, 0x1c, 0x61, 0x0a, 0xa7,
-  0x21, 0x7c, 0xb3, 0x0c, 0x66, 0x90, 0x06, 0x81, 0x9d, 0x02, 0x6a, 0xc4,
-  0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x44, 0xf2,
-  0xb1, 0x22, 0x88, 0x4f, 0x11, 0x2e, 0xa2, 0xc3, 0x0d, 0x01, 0x8b, 0x80,
-  0xc1, 0x2c, 0xc3, 0x19, 0xa0, 0x41, 0x60, 0xb0, 0x31, 0xc4, 0x67, 0x96,
-  0x20, 0x0d, 0x8c, 0x98, 0x0d, 0xf8, 0xcc, 0x12, 0xa4, 0xc1, 0x40, 0xcb,
-  0xa3, 0x8d, 0x01, 0x46, 0x06, 0xc4, 0x19, 0x08, 0x68, 0x20, 0x16, 0x65,
-  0x70, 0xc1, 0x30, 0x26, 0x1b, 0xb6, 0x11, 0x9f, 0xe1, 0x88, 0x59, 0xb8,
-  0x0d, 0xe2, 0x9b, 0x65, 0x50, 0x83, 0x36, 0x08, 0x0c, 0x37, 0x68, 0x21,
-  0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xc0, 0x90,
-  0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xe0, 0x11, 0x1d, 0x6e, 0x08, 0x74, 0x04,
-  0x0c, 0x66, 0x19, 0xd6, 0x80, 0x0d, 0x02, 0x1b, 0xc0, 0x03, 0x3e, 0xb3,
-  0x04, 0x71, 0x60, 0xbd, 0x41, 0xc4, 0x67, 0x96, 0x20, 0x0e, 0x86, 0x23,
-  0x7c, 0xc1, 0x37, 0x84, 0x6f, 0x96, 0xc1, 0x0d, 0xe2, 0x20, 0xb0, 0x5f,
-  0xf8, 0x8d, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca,
-  0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xe2, 0x4c, 0x74, 0xb8, 0x21,
-  0x28, 0x13, 0x30, 0x98, 0x65, 0x78, 0x03, 0x38, 0x08, 0xec, 0x3c, 0x86,
-  0xf8, 0xcc, 0x12, 0xc4, 0x81, 0x11, 0xec, 0x01, 0x9f, 0x59, 0x82, 0x38,
-  0x18, 0x68, 0x79, 0xb4, 0x35, 0xc0, 0xd8, 0x80, 0x78, 0x03, 0x01, 0x0e,
-  0x64, 0xa3, 0x0d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x6e, 0x7b, 0xea, 0x70,
-  0x63, 0x98, 0x6b, 0x87, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c,
-  0x0e, 0x00, 0x04, 0xc1, 0xe0, 0xab, 0x93, 0x32, 0x21, 0x11, 0x39, 0x19,
+  0x8a, 0xa1, 0x0c, 0xc8, 0x30, 0x84, 0x24, 0x0e, 0x04, 0xcc, 0x14, 0x06,
+  0xe3, 0xc0, 0x0e, 0xe1, 0x30, 0x0f, 0xf3, 0xe0, 0x06, 0xb2, 0x70, 0x0b,
+  0xb3, 0x40, 0x0f, 0xf2, 0x50, 0x0f, 0xe3, 0x40, 0x0f, 0xf5, 0x20, 0x0f,
+  0xe5, 0x40, 0x0e, 0xa2, 0x50, 0x0f, 0xe6, 0x60, 0x0e, 0xe5, 0x20, 0x0f,
+  0x7c, 0x90, 0x0e, 0xee, 0x40, 0x0f, 0x71, 0x60, 0x07, 0xbf, 0x40, 0x0f,
+  0x7e, 0x80, 0x02, 0x83, 0xca, 0x99, 0xbc, 0x71, 0x60, 0x87, 0x70, 0x98,
+  0x87, 0x79, 0x70, 0x03, 0x59, 0xb8, 0x85, 0x59, 0xa0, 0x07, 0x79, 0xa8,
+  0x87, 0x71, 0xa0, 0x87, 0x7a, 0x90, 0x87, 0x72, 0x20, 0x07, 0x51, 0xa8,
+  0x07, 0x73, 0x30, 0x87, 0x72, 0x90, 0x07, 0x3e, 0x40, 0x87, 0x70, 0x60,
+  0x07, 0x73, 0xf0, 0x03, 0x14, 0x60, 0x74, 0x0e, 0x23, 0x10, 0xc3, 0x25,
+  0x9c, 0xd3, 0x48, 0x13, 0xd0, 0x4c, 0x12, 0x5a, 0x86, 0x61, 0x18, 0xd2,
+  0x34, 0x4d, 0xd3, 0x74, 0x20, 0x75, 0x8e, 0x00, 0x14, 0xa6, 0x00, 0x00,
+  0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79,
+  0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xae, 0x50, 0x0e, 0x6d, 0xd0, 0x0e,
+  0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07,
+  0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07,
+  0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x78, 0xd0, 0x06, 0xe9, 0x10, 0x07,
+  0x76, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x6d, 0x90, 0x0e, 0x73, 0x20, 0x07,
+  0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x60, 0x07, 0x74, 0xa0, 0x07,
+  0x76, 0x40, 0x07, 0x6d, 0x60, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x10, 0x07,
+  0x76, 0xd0, 0x06, 0xe6, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07,
+  0x6d, 0x60, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06,
+  0xee, 0x80, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07,
+  0x7a, 0x60, 0x07, 0x74, 0x30, 0xe4, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0x43, 0x00, 0x01, 0x10, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90, 0x67, 0x01, 0x02, 0x40,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x21, 0x4f, 0x03, 0x04,
+  0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x43, 0x1e, 0x08,
+  0x08, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x3c,
+  0x12, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c,
+  0x79, 0x28, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x18, 0xf2, 0x58, 0x40, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x30, 0xe4, 0xd1, 0x80, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x60, 0xc8, 0xc3, 0x01, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0xc0, 0x90, 0xe7, 0x03, 0x02, 0x40, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x80, 0x21, 0x8f, 0x18, 0x00, 0x01, 0x20, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x16, 0x08, 0x00, 0x00, 0x00,
+  0x0d, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90,
+  0x8c, 0x09, 0x26, 0x47, 0xc6, 0x04, 0x43, 0x1a, 0x4a, 0xa0, 0x08, 0x8a,
+  0x61, 0x04, 0xa0, 0x30, 0x0a, 0x3d, 0xa0, 0x10, 0x0a, 0x30, 0x80, 0xc2,
+  0x11, 0x00, 0x62, 0x0b, 0x1c, 0x10, 0x10, 0x81, 0xd0, 0x19, 0x00, 0x5a,
+  0x67, 0x00, 0xc8, 0x9c, 0x01, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00,
+  0x48, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44,
+  0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b,
+  0x7b, 0x73, 0x03, 0x99, 0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b,
+  0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81,
+  0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84,
+  0x81, 0x99, 0x20, 0x0c, 0xcd, 0x06, 0x61, 0x20, 0x26, 0x08, 0x83, 0xb3,
+  0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08,
+  0xc3, 0x33, 0x41, 0x30, 0x03, 0x8c, 0xc0, 0x04, 0x61, 0x80, 0x26, 0x08,
+  0x61, 0x40, 0x6d, 0x58, 0x94, 0x85, 0x51, 0x94, 0xa1, 0x71, 0x1c, 0xa7,
+  0x98, 0x20, 0xa0, 0x81, 0xb5, 0x61, 0x19, 0x20, 0x46, 0x19, 0x86, 0xc6,
+  0x71, 0x9c, 0x62, 0xc3, 0x42, 0x2c, 0x8c, 0x42, 0x0c, 0x8d, 0xe3, 0x38,
+  0xc5, 0x86, 0xe1, 0x89, 0xa4, 0x09, 0xc2, 0x1a, 0x5c, 0x13, 0x84, 0x21,
+  0xda, 0x80, 0x28, 0x14, 0xa3, 0x28, 0x43, 0x05, 0x6c, 0x08, 0xac, 0x0d,
+  0x04, 0x30, 0x5d, 0xc0, 0x04, 0x41, 0x00, 0xa8, 0x1c, 0xc9, 0xa5, 0x91,
+  0x4d, 0x85, 0xb5, 0xc1, 0xb1, 0x95, 0x4d, 0x10, 0xd8, 0xa0, 0x9a, 0x20,
+  0x0c, 0xd2, 0x04, 0x61, 0x98, 0x36, 0x0c, 0xdd, 0x30, 0x6c, 0x20, 0x94,
+  0x8d, 0xf3, 0x36, 0x14, 0x99, 0x06, 0x60, 0x5f, 0x15, 0x36, 0x36, 0xbb,
+  0x36, 0x97, 0x34, 0xb2, 0x32, 0x37, 0xba, 0x29, 0x41, 0x50, 0x85, 0x0c,
+  0xcf, 0xc5, 0xae, 0x4c, 0x6e, 0x2e, 0xed, 0xcd, 0x6d, 0x4a, 0x40, 0x34,
+  0x21, 0xc3, 0x73, 0xb1, 0x0b, 0x63, 0xb3, 0x2b, 0x93, 0x9b, 0x12, 0x18,
+  0x75, 0xc8, 0xf0, 0x5c, 0xe6, 0xd0, 0xc2, 0xc8, 0xca, 0xe4, 0x9a, 0xde,
+  0xc8, 0xca, 0xd8, 0xa6, 0x04, 0x48, 0x19, 0x32, 0x3c, 0x17, 0xb9, 0xb2,
+  0xb9, 0xb7, 0x3a, 0xb9, 0xb1, 0xb2, 0xb9, 0x29, 0xc1, 0x55, 0x87, 0x0c,
+  0xcf, 0xa5, 0xcc, 0x8d, 0x4e, 0x2e, 0x0f, 0xea, 0x2d, 0xcd, 0x8d, 0x6e,
+  0x6e, 0x4a, 0xf0, 0x01, 0x79, 0x18, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00,
+  0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88,
+  0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73,
+  0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e,
+  0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30,
+  0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8,
+  0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b,
+  0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76,
+  0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e,
+  0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e,
+  0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61,
+  0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4,
+  0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76,
+  0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37,
+  0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76,
+  0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71,
+  0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e,
+  0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1,
+  0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61,
+  0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90,
+  0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8,
+  0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc,
+  0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4,
+  0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87, 0x76, 0x80, 0x87, 0x19,
+  0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20, 0x0e, 0xe7, 0xe0, 0x06,
+  0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f,
+  0xf4, 0x30, 0x83, 0x81, 0xc8, 0x01, 0x1f, 0xdc, 0x40, 0x1c, 0xe4, 0xa1,
+  0x1c, 0xc2, 0x61, 0x1d, 0xdc, 0x40, 0x1c, 0xe4, 0x01, 0x00, 0x00, 0x00,
+  0x71, 0x20, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x06, 0xa0, 0x80, 0x11,
+  0x32, 0xb0, 0x00, 0xf3, 0x2c, 0x84, 0x19, 0x40, 0xc3, 0xe5, 0x3b, 0x8f,
+  0x1f, 0x20, 0x0d, 0x10, 0x61, 0x7e, 0x71, 0xdb, 0xa6, 0xb0, 0x0d, 0x97,
+  0xef, 0x3c, 0xbe, 0x10, 0x50, 0x45, 0x41, 0x44, 0xa5, 0x03, 0x0c, 0x25,
+  0x61, 0x00, 0x02, 0xe6, 0x23, 0xb7, 0x6d, 0x0b, 0xd2, 0x70, 0xf9, 0xce,
+  0xe3, 0x0b, 0x11, 0x01, 0x4c, 0x44, 0x08, 0x34, 0xc3, 0x42, 0xd8, 0x81,
+  0x33, 0x5c, 0xbe, 0xf3, 0xf8, 0x83, 0x33, 0xe1, 0x7e, 0x71, 0xdb, 0x86,
+  0x70, 0x0d, 0x97, 0xef, 0x3c, 0x7e, 0x04, 0x58, 0x1b, 0x55, 0x14, 0x44,
+  0x54, 0x3a, 0xc0, 0xe0, 0x17, 0xb5, 0x6e, 0x02, 0xd7, 0x70, 0xf9, 0xce,
+  0xe3, 0x47, 0x80, 0xb5, 0x51, 0x45, 0x41, 0x44, 0xa5, 0x03, 0x0c, 0x3e,
+  0x52, 0xeb, 0x36, 0x80, 0x0d, 0x97, 0xef, 0x3c, 0x7e, 0x04, 0x58, 0x1b,
+  0x55, 0x14, 0x44, 0xc4, 0x4e, 0x4e, 0x44, 0xf8, 0x48, 0xad, 0x5b, 0x81,
+  0x34, 0x5c, 0xbe, 0xf3, 0xf8, 0x13, 0x11, 0x4d, 0x08, 0x10, 0x61, 0x7e,
+  0x71, 0xdb, 0x96, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xfe, 0x44, 0x44, 0x13,
+  0x02, 0x44, 0x98, 0x8f, 0xdc, 0xb6, 0x05, 0x48, 0xc3, 0xe5, 0x3b, 0x8f,
+  0x3f, 0x1d, 0x11, 0x01, 0x0c, 0xe2, 0xe0, 0x23, 0xb7, 0x6d, 0x04, 0xcf,
+  0x70, 0xf9, 0xce, 0xe3, 0x53, 0x0d, 0x10, 0x61, 0x7e, 0x71, 0xdb, 0x00,
+  0x61, 0x20, 0x00, 0x00, 0x0c, 0x13, 0x00, 0x00, 0x13, 0x04, 0x24, 0x14,
+  0x0b, 0x04, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x34, 0x14, 0x58, 0xd9,
+  0x95, 0xa5, 0x40, 0x0d, 0x94, 0x51, 0x21, 0x15, 0x57, 0xc1, 0x95, 0x5c,
+  0xd9, 0x14, 0x4b, 0x61, 0x0a, 0x14, 0x4d, 0xe9, 0x06, 0x94, 0x43, 0x29,
+  0x90, 0x31, 0x03, 0x40, 0x48, 0x09, 0x14, 0x01, 0x3d, 0x23, 0x00, 0x63,
+  0x04, 0x20, 0x08, 0x82, 0xf8, 0x37, 0x46, 0x00, 0x82, 0x20, 0x48, 0xff,
+  0xc2, 0x18, 0x01, 0x08, 0x82, 0x20, 0xfd, 0x8d, 0x11, 0x80, 0x20, 0x08,
+  0xf2, 0xdf, 0x18, 0x01, 0x08, 0x82, 0x20, 0xfe, 0x0b, 0x63, 0x04, 0x20,
+  0x08, 0x82, 0x21, 0x38, 0x8c, 0x11, 0x80, 0x20, 0x08, 0xea, 0xdf, 0x18,
+  0x01, 0x08, 0x82, 0xa0, 0xfe, 0x0b, 0x63, 0x04, 0x20, 0x08, 0x82, 0xf0,
+  0x37, 0x46, 0x00, 0x82, 0x20, 0x08, 0xff, 0xc2, 0x18, 0x01, 0x08, 0x82,
+  0x20, 0x08, 0x06, 0x00, 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0x20, 0x06,
+  0x6d, 0x70, 0x39, 0x6b, 0xb0, 0x06, 0x64, 0x30, 0x62, 0x90, 0x00, 0x20,
+  0x08, 0x06, 0x62, 0xe0, 0x06, 0xd8, 0xd3, 0x06, 0x6d, 0x50, 0x06, 0x23,
+  0x06, 0x09, 0x00, 0x82, 0x60, 0x20, 0x06, 0x6f, 0x90, 0x41, 0x6b, 0xb0,
+  0x06, 0x66, 0x30, 0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0x62, 0x00, 0x07,
+  0x1a, 0xc4, 0x06, 0x6c, 0x70, 0x06, 0x23, 0x06, 0x06, 0x00, 0x82, 0x60,
+  0x40, 0xec, 0x81, 0xd5, 0x06, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xf0,
+  0xcd, 0xc1, 0x18, 0x08, 0x6e, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c, 0x30,
+  0x9a, 0x30, 0x04, 0xc3, 0x0d, 0x42, 0x40, 0x06, 0xb3, 0x0c, 0xc1, 0x08,
+  0x05, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xf0, 0xe1, 0x01, 0x1a, 0x1c,
+  0x76, 0x30, 0x9a, 0x10, 0x0c, 0x17, 0x3c, 0x35, 0x9a, 0x30, 0x08, 0x17,
+  0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x5f, 0x1f, 0xb4, 0x01,
+  0x03, 0x06, 0xa3, 0x09, 0x01, 0x30, 0xdc, 0x10, 0xe8, 0x01, 0x18, 0x4c,
+  0x37, 0x50, 0x5e, 0x30, 0xdd, 0x50, 0x69, 0x42, 0x21, 0x01, 0x4c, 0x37,
+  0x5c, 0x1c, 0x51, 0x48, 0x00, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xf0,
+  0x95, 0x42, 0x1d, 0x50, 0x67, 0x30, 0x9a, 0x10, 0x04, 0xa3, 0x09, 0x82,
+  0x30, 0x9a, 0x30, 0x0c, 0x15, 0x08, 0x52, 0x03, 0x21, 0x15, 0x0c, 0x52,
+  0x57, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xf0, 0xb5, 0x42, 0x1f,
+  0x70, 0xa9, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c, 0x52, 0x5b, 0x10, 0x15,
+  0x20, 0x33, 0x9a, 0x50, 0x04, 0x15, 0x08, 0x52, 0x44, 0x10, 0x15, 0x34,
+  0x33, 0x9a, 0x90, 0x08, 0x15, 0x08, 0x52, 0x44, 0x10, 0xd7, 0x3c, 0x75,
+  0xc5, 0x53, 0x37, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x1f,
+  0x2f, 0xb0, 0xc2, 0x1a, 0xd0, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08,
+  0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0xc3, 0x11, 0x4f, 0x1d, 0xf1,
+  0xd4, 0x11, 0x4f, 0x1d, 0xf1, 0xd4, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18,
+  0x58, 0xe8, 0xf0, 0x0a, 0xcc, 0xa2, 0x8c, 0x02, 0x31, 0x08, 0x81, 0x09,
+  0x01, 0x7c, 0x4e, 0x18, 0x66, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x34,
+  0x75, 0xb8, 0x85, 0x3c, 0x08, 0xc6, 0x01, 0x15, 0xca, 0x61, 0x34, 0x21,
+  0x00, 0x2e, 0x78, 0xf0, 0x8a, 0x61, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1,
+  0x40, 0x73, 0x87, 0x5d, 0xe8, 0x83, 0xe0, 0x1c, 0x58, 0x21, 0x1d, 0x46,
+  0x13, 0x02, 0xe0, 0x82, 0x07, 0x87, 0x1b, 0xe2, 0xa0, 0x1d, 0xc0, 0xc0,
+  0x8c, 0x56, 0x80, 0x8f, 0x0d, 0xae, 0x00, 0x9f, 0x59, 0x06, 0x61, 0x18,
+  0x4c, 0x38, 0x05, 0xf9, 0x98, 0x80, 0x0a, 0xf2, 0x31, 0x3d, 0x68, 0x05,
+  0xf8, 0x58, 0x1e, 0xb8, 0x02, 0x7c, 0x8c, 0x10, 0xe4, 0x63, 0x84, 0x20,
+  0x9f, 0x59, 0x02, 0xc2, 0xfc, 0x00, 0x91, 0x8f, 0x21, 0x7d, 0x20, 0x1f,
+  0x13, 0x66, 0x01, 0x3e, 0x26, 0xd0, 0x02, 0x7c, 0x4c, 0x88, 0x05, 0xf9,
+  0x98, 0x20, 0x0b, 0xf2, 0x99, 0x25, 0x20, 0x06, 0x2a, 0x1e, 0x48, 0x20,
+  0x86, 0x81, 0x8a, 0x07, 0x12, 0x88, 0x61, 0x34, 0xa1, 0x15, 0x84, 0xe1,
+  0x86, 0xe0, 0x1f, 0xc0, 0x60, 0x96, 0xa1, 0x30, 0x82, 0x11, 0x03, 0x03,
+  0x00, 0x41, 0x30, 0x80, 0x50, 0xe2, 0x1c, 0x88, 0x11, 0x03, 0x03, 0x00,
+  0x41, 0x30, 0x80, 0x52, 0x02, 0x1d, 0x88, 0x59, 0x02, 0x63, 0xa0, 0xe2,
+  0x21, 0x0a, 0x86, 0x18, 0xa8, 0x78, 0x88, 0x82, 0x21, 0x86, 0x23, 0x04,
+  0x53, 0x20, 0xbe, 0xe1, 0x88, 0xa1, 0x14, 0x84, 0xaf, 0x84, 0x60, 0x87,
+  0x23, 0x88, 0x54, 0x20, 0xbe, 0x12, 0x82, 0x1d, 0x8e, 0x30, 0x4e, 0x41,
+  0xf8, 0x2a, 0x10, 0x76, 0x96, 0xe1, 0xd0, 0x82, 0xd1, 0x04, 0x5d, 0x18,
+  0x86, 0x1b, 0x02, 0x96, 0x00, 0x83, 0x59, 0x06, 0x24, 0x09, 0xca, 0x16,
+  0xfa, 0x01, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8,
+  0x9a, 0xf0, 0x87, 0xe6, 0x1d, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80,
+  0xb2, 0x09, 0x7f, 0x08, 0x84, 0xc2, 0x85, 0x90, 0x80, 0x0b, 0x9e, 0x1a,
+  0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x2a, 0x27, 0x44, 0x02, 0x9a, 0x87,
+  0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0x74, 0x42, 0x24, 0x02, 0x61,
+  0x96, 0x40, 0x1b, 0x6e, 0x50, 0x68, 0x02, 0x0c, 0x66, 0x19, 0x14, 0x2d,
+  0x30, 0x5b, 0xc0, 0x85, 0xf8, 0xcc, 0x32, 0x2c, 0xce, 0x64, 0xb9, 0x50,
+  0xc5, 0xc7, 0x02, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x14,
+  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x3d, 0xa1, 0xc3, 0x0d, 0xc1, 0x4e,
+  0x80, 0xc1, 0x2c, 0x03, 0xd3, 0x04, 0x36, 0x84, 0x03, 0x7c, 0x66, 0x09,
+  0x24, 0x03, 0x07, 0x22, 0x3e, 0xb3, 0x04, 0xd2, 0x2c, 0xc3, 0x23, 0x71,
+  0xf6, 0x85, 0x43, 0x7c, 0x2c, 0x60, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c,
+  0x65, 0xc1, 0x23, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x71, 0x16, 0x3a, 0xdc,
+  0x10, 0x94, 0x05, 0x18, 0xcc, 0x32, 0x40, 0x51, 0x60, 0xe9, 0x30, 0xc4,
+  0x67, 0x96, 0x40, 0x32, 0x82, 0x1d, 0xe0, 0x33, 0x4b, 0x20, 0x0d, 0xb4,
+  0x3c, 0x18, 0x63, 0x35, 0x04, 0x24, 0x44, 0xb2, 0xe0, 0x98, 0x3a, 0xb8,
+  0x43, 0x7c, 0x66, 0x19, 0x26, 0xcb, 0x0c, 0xec, 0x1d, 0xd4, 0x20, 0x3e,
+  0x16, 0x08, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xa0, 0x90, 0x8f,
+  0x15, 0x41, 0x7c, 0x8a, 0x98, 0x0b, 0x1d, 0x6e, 0x08, 0xe2, 0x02, 0x0c,
+  0x66, 0x19, 0xa8, 0x2a, 0xb0, 0xe1, 0x1e, 0xe0, 0x33, 0x4b, 0xa0, 0x19,
+  0x3d, 0x10, 0xf1, 0x99, 0x25, 0xd0, 0x66, 0x19, 0x2e, 0xcd, 0x0d, 0x8c,
+  0x0e, 0xea, 0x21, 0x3e, 0x16, 0x30, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e,
+  0xb2, 0xe0, 0x91, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xe8, 0x0b, 0x1d, 0x6e,
+  0x08, 0xf6, 0x02, 0x0c, 0x66, 0x19, 0xb0, 0x2c, 0xb0, 0x7e, 0x18, 0xe2,
+  0x33, 0x4b, 0xa0, 0x19, 0x21, 0x12, 0xf0, 0x99, 0x25, 0xd0, 0x06, 0x8a,
+  0x1e, 0x71, 0x40, 0xfc, 0x21, 0xf1, 0x07, 0x83, 0x0d, 0x32, 0x36, 0xc0,
+  0xd8, 0xc0, 0x62, 0x83, 0x8a, 0x0d, 0xa8, 0x81, 0xa2, 0x87, 0x17, 0x10,
+  0x7f, 0x48, 0xfc, 0xc1, 0x20, 0x32, 0x03, 0xf3, 0x07, 0x0b, 0xab, 0x34,
+  0xea, 0xe8, 0xe1, 0xa9, 0x59, 0x86, 0x6d, 0x0e, 0x4a, 0x61, 0x34, 0x61,
+  0x26, 0x86, 0xe1, 0x86, 0x40, 0x34, 0xc0, 0x60, 0x96, 0x81, 0xf3, 0x82,
+  0xe1, 0x88, 0xc2, 0x2c, 0x86, 0xef, 0x8c, 0x61, 0x86, 0x1b, 0x82, 0x98,
+  0x20, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0x40, 0xd4, 0x62, 0xf8, 0x2a, 0x10,
+  0xf4, 0x94, 0x61, 0x86, 0x1b, 0x02, 0x9a, 0x20, 0x83, 0x0a, 0x06, 0x9d,
+  0x65, 0xe8, 0xe4, 0x20, 0x38, 0x7c, 0x18, 0xe6, 0x9a, 0x61, 0x46, 0x0c,
+  0x0e, 0x00, 0x04, 0xc1, 0xe0, 0x9b, 0x8d, 0xd1, 0x10, 0x0b, 0xd8, 0x18,
   0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04,
-  0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0xf8,
-  0x84, 0x4d, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xae,
-  0x4f, 0xda, 0x24, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0,
-  0xfc, 0xc4, 0x4d, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83,
-  0xc5, 0x54, 0xda, 0xc4, 0x45, 0x82, 0x3c, 0xf1, 0x91, 0x3d, 0x19, 0x4d,
-  0x08, 0x80, 0x0b, 0x1e, 0x9b, 0x25, 0x90, 0x83, 0x81, 0x96, 0xc7, 0x34,
-  0x3a, 0x3f, 0xe2, 0x58, 0xe2, 0x13, 0xe2, 0xc0, 0x8f, 0xc0, 0xe0, 0x02,
-  0x83, 0x46, 0x0c, 0x1c, 0x00, 0x04, 0xc1, 0xa0, 0x51, 0x95, 0x34, 0xa9,
-  0x11, 0x17, 0xf1, 0x93, 0x80, 0x4c, 0xc8, 0x84, 0x4c, 0xc6, 0x04, 0x54,
-  0x66, 0x09, 0x46, 0x68, 0xb8, 0x61, 0x34, 0xfc, 0x04, 0x0c, 0x66, 0x19,
-  0xe8, 0x20, 0x26, 0x82, 0x11, 0x03, 0x03, 0x00, 0x41, 0x30, 0x80, 0x54,
-  0x85, 0x4d, 0x42, 0x62, 0xc4, 0xc0, 0x00, 0x40, 0x10, 0x0c, 0xa0, 0x55,
-  0x69, 0x93, 0x90, 0x30, 0x61, 0x4c, 0xe0, 0x63, 0x02, 0x99, 0xc0, 0x67,
-  0x34, 0xa1, 0x46, 0x86, 0xe1, 0x86, 0x80, 0x54, 0xc0, 0x60, 0x96, 0xa1,
-  0x0e, 0xee, 0x20, 0x18, 0x8e, 0x30, 0xd0, 0x64, 0xf8, 0xee, 0x18, 0x66,
-  0xb8, 0x21, 0x98, 0x11, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x84, 0x4d,
-  0x86, 0xaf, 0x02, 0x41, 0x6f, 0x19, 0x66, 0xb8, 0x21, 0xb0, 0x11, 0x32,
-  0xa8, 0x60, 0xd0, 0x59, 0x06, 0x3b, 0x58, 0x85, 0xe0, 0xf4, 0x63, 0x98,
-  0x7b, 0x89, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xe0, 0xab, 0x95,
-  0x52, 0x21, 0x13, 0x59, 0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18,
-  0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90, 0x11, 0x03, 0x04,
-  0x00, 0x41, 0x30, 0xe0, 0x78, 0x85, 0x55, 0x0e, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x03, 0xae, 0x57, 0x5a, 0x85, 0x21, 0x82, 0x11, 0x03,
-  0x04, 0x00, 0x41, 0x30, 0xe0, 0x7c, 0xc5, 0x55, 0x24, 0x22, 0x18, 0x31,
-  0x50, 0x00, 0x10, 0x04, 0x83, 0xc5, 0x5c, 0x5a, 0xc5, 0x4d, 0x82, 0x5c,
-  0xf1, 0x93, 0x5d, 0x19, 0x4d, 0x08, 0x80, 0x0b, 0x1e, 0x9b, 0x25, 0x58,
-  0x85, 0xe1, 0x86, 0xec, 0x57, 0xc0, 0x60, 0x96, 0x01, 0x0f, 0xf2, 0x20,
-  0xa8, 0x34, 0x81, 0x15, 0xb8, 0xe0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41,
-  0x30, 0xa0, 0xd0, 0x25, 0x56, 0xc0, 0x40, 0x54, 0x46, 0x0c, 0x0e, 0x00,
-  0x04, 0xc1, 0x80, 0x4a, 0x97, 0x58, 0x09, 0x84, 0x0b, 0x86, 0x29, 0x36,
-  0xa9, 0x15, 0xb8, 0xe0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0,
-  0xda, 0xc5, 0x56, 0xc8, 0xe0, 0x54, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
-  0x80, 0x72, 0x17, 0x5b, 0x09, 0x84, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0x3b,
-  0x9e, 0xba, 0x1a, 0x19, 0xe6, 0xd4, 0x62, 0x98, 0x23, 0x86, 0x39, 0x62,
-  0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf8, 0xe4, 0x45, 0x5c, 0x42,
-  0xe5, 0x5d, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06,
-  0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10,
-  0x0c, 0xb8, 0x7c, 0x49, 0x97, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04,
-  0xc1, 0x80, 0xd3, 0x17, 0x75, 0x49, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40,
-  0x10, 0x0c, 0xb8, 0x7d, 0x59, 0x97, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00,
-  0x04, 0xc1, 0x60, 0x19, 0x19, 0x75, 0x59, 0x95, 0xc0, 0x5e, 0x76, 0x05,
-  0x5f, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xc7, 0x66, 0x09, 0x56, 0x61, 0xb8,
-  0xc1, 0x0e, 0xf6, 0x05, 0x0c, 0x66, 0x19, 0xf4, 0x60, 0x15, 0x02, 0xeb,
-  0x93, 0x3f, 0x89, 0xcf, 0x70, 0x04, 0x1f, 0x80, 0x0a, 0xf1, 0xcd, 0x32,
-  0xec, 0x81, 0x1f, 0x04, 0x16, 0x2a, 0x7d, 0x10, 0x1f, 0x0b, 0x06, 0xfa,
-  0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e,
-  0x45, 0x94, 0x8c, 0x0e, 0x37, 0x04, 0x23, 0x03, 0x06, 0xb3, 0x0c, 0x7c,
-  0xd0, 0x07, 0x81, 0x0d, 0xa9, 0x02, 0x9f, 0x59, 0x02, 0x51, 0x30, 0x54,
-  0x21, 0xe2, 0x33, 0x4b, 0x20, 0x0a, 0xc3, 0x11, 0xa7, 0x90, 0x2a, 0xc2,
-  0x37, 0xcb, 0xf0, 0x07, 0xa2, 0x10, 0x18, 0x2a, 0xa8, 0x4a, 0x7c, 0x2c,
-  0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b,
-  0x82, 0xf8, 0x14, 0x01, 0x33, 0x3a, 0xdc, 0x10, 0xb8, 0x0c, 0x18, 0xcc,
-  0x32, 0x80, 0x42, 0x28, 0x04, 0x26, 0x2b, 0x43, 0x7c, 0x66, 0x09, 0x44,
-  0xc1, 0x88, 0x5a, 0x81, 0xcf, 0x2c, 0x81, 0x28, 0x0c, 0xb4, 0x3c, 0x1a,
-  0x1f, 0x60, 0x7d, 0x40, 0x80, 0x82, 0x10, 0x0a, 0x64, 0xe1, 0x07, 0x17,
-  0x0c, 0x63, 0xb4, 0x82, 0x2b, 0xf1, 0x19, 0x8e, 0xa0, 0x85, 0x5c, 0x21,
-  0xbe, 0x59, 0x86, 0x51, 0x30, 0x85, 0xc0, 0x74, 0xa5, 0x16, 0xe2, 0x63,
-  0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x0c, 0xf9, 0x58,
-  0x11, 0xc4, 0xa7, 0x08, 0x9f, 0xd1, 0xe1, 0x86, 0x80, 0x67, 0xc0, 0x60,
-  0x96, 0x81, 0x14, 0x4a, 0x21, 0xb0, 0x41, 0x5c, 0xe0, 0x33, 0x4b, 0xa0,
-  0x0a, 0xf6, 0x2b, 0x44, 0x7c, 0x66, 0x09, 0x54, 0x61, 0x38, 0xe2, 0x17,
-  0xc0, 0x45, 0xf8, 0x66, 0x19, 0x4e, 0x41, 0x15, 0x02, 0x03, 0x87, 0x70,
-  0x89, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88,
-  0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xd2, 0x46, 0x87, 0x1b, 0x82, 0xb3,
-  0x01, 0x83, 0x59, 0x06, 0x54, 0x48, 0x85, 0xc0, 0xd2, 0x65, 0x88, 0xcf,
-  0x2c, 0x81, 0x2a, 0x18, 0xe1, 0x2e, 0xf0, 0x99, 0x25, 0x50, 0x85, 0x81,
-  0x96, 0x47, 0x23, 0x05, 0xac, 0x14, 0x08, 0x54, 0x10, 0x52, 0x81, 0x36,
-  0x4c, 0xe1, 0x82, 0x61, 0x2e, 0x78, 0xea, 0xb6, 0xa7, 0x4e, 0x57, 0x86,
-  0xb9, 0xf7, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0xbe, 0xbb, 0x39, 0x1b, 0x93, 0xa1, 0x9b, 0xd1, 0x84,
-  0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86,
-  0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xce, 0x6f, 0xdc,
-  0x26, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0xfe, 0xe6,
-  0x6d, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x0e, 0x74,
-  0xe0, 0x26, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0x50,
-  0xe7, 0x6d, 0x60, 0x26, 0xd8, 0x1b, 0xb0, 0xe9, 0x9b, 0xd1, 0x84, 0x00,
-  0xb8, 0xe0, 0xb1, 0x59, 0x82, 0x55, 0x18, 0x68, 0x79, 0x4c, 0xc3, 0x0e,
-  0x44, 0xad, 0x0e, 0x58, 0x02, 0x0f, 0x04, 0x55, 0x10, 0xb5, 0x3c, 0x98,
-  0x65, 0x60, 0x05, 0x57, 0xd8, 0x87, 0xe1, 0x08, 0x7f, 0x10, 0x9b, 0xe1,
-  0xbb, 0x7f, 0x18, 0x66, 0xb8, 0x21, 0x68, 0x19, 0x32, 0xa8, 0x21, 0xd0,
-  0xe1, 0x88, 0x91, 0x30, 0x9b, 0xe1, 0xab, 0x40, 0xd0, 0x2b, 0x89, 0x61,
-  0x86, 0x1b, 0x02, 0x98, 0x21, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0x68, 0x05,
-  0x71, 0x08, 0x8e, 0x5e, 0x86, 0xb9, 0x14, 0x19, 0x66, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0xbe, 0xd7, 0xf9, 0x1b, 0x9f, 0x61, 0x9d, 0xd1, 0x84,
-  0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86,
-  0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xce, 0x76, 0x4c,
-  0xe7, 0x20, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0x6e, 0xe7,
-  0x74, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x0e, 0x77,
-  0x50, 0x47, 0x22, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0xc0,
-  0xe7, 0x74, 0xd0, 0x26, 0x98, 0x1d, 0xbc, 0xa9, 0x9d, 0xd1, 0x84, 0x00,
-  0xb8, 0xe0, 0xb1, 0x59, 0x02, 0x71, 0x18, 0x6e, 0x98, 0x89, 0xdc, 0x01,
-  0x83, 0x59, 0x86, 0x57, 0x80, 0x85, 0xa0, 0xc6, 0x46, 0x75, 0xe0, 0x82,
-  0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x12, 0x9f, 0xd5, 0xc1,
-  0x09, 0xbe, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x6a, 0x7c, 0x56,
-  0x27, 0x10, 0x2e, 0x18, 0xa6, 0xcc, 0xe6, 0x75, 0xe0, 0x82, 0xa7, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x3a, 0x1f, 0xd8, 0xf1, 0x89, 0xd0,
-  0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x0a, 0x7d, 0x60, 0x27, 0x10,
-  0x2e, 0x18, 0xe6, 0x82, 0xa7, 0xee, 0x78, 0xea, 0x5e, 0x66, 0x98, 0x23,
-  0x93, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0xe0, 0x63, 0x1f, 0xde, 0xd9, 0x9b, 0xf4, 0x19, 0x4d, 0x08, 0x80,
-  0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22,
-  0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0xe6, 0x67, 0x7c, 0x12,
-  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x8e, 0x7e, 0xc8, 0x27,
-  0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0xea, 0xa7, 0x7c,
-  0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0xa5, 0x7f, 0xc8,
-  0xa7, 0x74, 0x02, 0xf8, 0xa9, 0x1d, 0xf9, 0x19, 0x4d, 0x08, 0x80, 0x0b,
-  0x1e, 0x9b, 0x25, 0x10, 0x87, 0xe1, 0x06, 0xb8, 0xa8, 0x1f, 0x30, 0x98,
-  0x65, 0x88, 0x05, 0x71, 0x08, 0xec, 0x6e, 0xf2, 0x26, 0x3e, 0xc3, 0x11,
-  0x74, 0xa1, 0x37, 0xc4, 0x37, 0xcb, 0x20, 0x0b, 0xb5, 0x10, 0xd8, 0xde,
-  0xd4, 0x45, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65,
-  0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xf1, 0x3f, 0x3a, 0xdc, 0x10,
-  0xf4, 0x0f, 0x18, 0xcc, 0x32, 0xcc, 0x02, 0x2d, 0x04, 0x36, 0x8c, 0x0e,
-  0x7c, 0x66, 0x09, 0x72, 0xc1, 0x44, 0x87, 0x88, 0xcf, 0x2c, 0x41, 0x2e,
-  0x0c, 0x47, 0xfc, 0xc5, 0xe8, 0x08, 0xdf, 0x2c, 0x83, 0x2d, 0xe4, 0x42,
-  0x60, 0xa0, 0x41, 0x3a, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c,
-  0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84, 0x0a, 0xe9,
-  0x70, 0x43, 0x80, 0x42, 0x60, 0x30, 0xcb, 0x70, 0x0b, 0xb8, 0x10, 0x18,
-  0xeb, 0x0c, 0xf1, 0x99, 0x25, 0xc8, 0x05, 0x23, 0x5e, 0x07, 0x3e, 0xb3,
-  0x04, 0xb9, 0x30, 0xd0, 0xf2, 0x68, 0xb3, 0x80, 0xd1, 0x02, 0x71, 0x0b,
-  0x02, 0x2e, 0xd0, 0x4c, 0x2d, 0x5c, 0x30, 0x8c, 0xb9, 0x8e, 0xec, 0xc4,
-  0x67, 0x38, 0xc2, 0x35, 0x66, 0x87, 0xf8, 0x66, 0x19, 0x74, 0xa1, 0x17,
-  0x02, 0xa3, 0x9d, 0xd7, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6,
-  0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x70, 0x48,
-  0x87, 0x1b, 0x02, 0x1b, 0x02, 0x83, 0x59, 0x86, 0x5d, 0xe0, 0x85, 0xc0,
-  0x06, 0xde, 0x81, 0xcf, 0x2c, 0x41, 0x38, 0x58, 0xee, 0x10, 0xf1, 0x99,
-  0x25, 0x08, 0x87, 0xe1, 0x88, 0xdc, 0xd0, 0x1d, 0xe1, 0x9b, 0x65, 0xf0,
-  0x85, 0x70, 0x08, 0x4c, 0x37, 0x76, 0x27, 0x3e, 0x16, 0x38, 0xf4, 0xb9,
-  0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a,
-  0x18, 0x23, 0x1d, 0x6e, 0x08, 0xc2, 0x08, 0x0c, 0x66, 0x19, 0x7e, 0x01,
-  0x1c, 0x02, 0x1b, 0x9f, 0x21, 0x3e, 0xb3, 0x04, 0xe1, 0x60, 0x04, 0xfa,
-  0xc0, 0x67, 0x96, 0x20, 0x1c, 0x06, 0x5a, 0x1e, 0x6d, 0x17, 0x30, 0x5e,
-  0x20, 0x7e, 0x41, 0x00, 0x07, 0xd4, 0xe9, 0x85, 0x0b, 0x86, 0xb9, 0xe0,
-  0xa9, 0xdb, 0x9e, 0x3a, 0xda, 0x19, 0xe6, 0xd2, 0x65, 0x98, 0x23, 0x86,
-  0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf8, 0xe2, 0x28,
-  0x8c, 0x40, 0xc8, 0x8d, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46,
-  0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0x38, 0x3c, 0x42, 0xa3, 0x84, 0x08, 0x46, 0x0c, 0x10,
-  0x00, 0x04, 0xc1, 0x80, 0xcb, 0xa3, 0x34, 0x4a, 0x88, 0x60, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0x38, 0x3d, 0x52, 0xa3, 0x84, 0x08, 0x46, 0x0c,
-  0x14, 0x00, 0x04, 0xc1, 0x60, 0x11, 0xa5, 0x34, 0x52, 0xa1, 0xa0, 0x8e,
-  0x74, 0xe8, 0x8e, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xc7, 0x66, 0x09, 0xc4,
-  0x61, 0xa0, 0xe5, 0x31, 0x8d, 0x56, 0xb0, 0xc3, 0x80, 0x15, 0x58, 0xe2,
-  0x15, 0x84, 0x70, 0xb0, 0xc3, 0x00, 0x16, 0x66, 0x19, 0xc6, 0xa1, 0x1c,
-  0xea, 0x63, 0x38, 0x42, 0x3f, 0x78, 0x68, 0xf8, 0x6e, 0x3f, 0x86, 0x19,
-  0x6e, 0x08, 0x4e, 0x88, 0x0c, 0x6a, 0x08, 0x74, 0x38, 0x62, 0x3f, 0xc0,
-  0x68, 0xf8, 0x2a, 0x10, 0xf4, 0xfa, 0x63, 0x98, 0xe1, 0x86, 0x40, 0x85,
-  0xc8, 0xa0, 0x82, 0x41, 0x67, 0x19, 0xc8, 0x21, 0x1f, 0x82, 0x73, 0x9f,
-  0x61, 0x6e, 0x64, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x2f,
-  0x95, 0xf2, 0x08, 0x87, 0x4c, 0x69, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84,
-  0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x43, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0x80, 0x83, 0x25, 0x50, 0x3a, 0x88, 0x60, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0x58, 0x0a, 0x25, 0x86, 0x08, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x93, 0x25, 0x51, 0x92, 0x88, 0x60,
-  0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x16, 0x5d, 0x0a, 0x25, 0x31, 0x0a,
-  0x5a, 0x49, 0x8e, 0x5e, 0x69, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6c, 0x96,
-  0x20, 0x1f, 0x86, 0x1b, 0x5a, 0x64, 0x96, 0xc0, 0x60, 0x96, 0xc1, 0x1c,
-  0xce, 0x21, 0xa8, 0x1e, 0x22, 0x25, 0xb8, 0xe0, 0xa9, 0x11, 0x83, 0x03,
-  0x00, 0x41, 0x30, 0xa0, 0x78, 0xa9, 0x94, 0x68, 0xc4, 0x8e, 0x46, 0x0c,
-  0x0e, 0x00, 0x04, 0xc1, 0x80, 0xea, 0xa5, 0x52, 0x0a, 0x84, 0x0b, 0x86,
-  0x29, 0x30, 0x4a, 0x25, 0xb8, 0xe0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41,
-  0x30, 0xa0, 0xc2, 0x49, 0x95, 0x6c, 0x64, 0x8f, 0x46, 0x0c, 0x0e, 0x00,
-  0x04, 0xc1, 0x80, 0x12, 0x27, 0x55, 0x0a, 0x84, 0x0b, 0x86, 0xb9, 0xe0,
-  0xa9, 0x3b, 0x9e, 0xba, 0x14, 0x1a, 0xe6, 0x7c, 0x66, 0x98, 0x23, 0x86,
-  0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf8, 0xcc, 0xc9,
-  0x96, 0xea, 0x68, 0x9c, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46,
-  0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0xb8, 0x76, 0xea, 0xa5, 0x84, 0x08, 0x46, 0x0c, 0x10,
-  0x00, 0x04, 0xc1, 0x80, 0x73, 0x27, 0x5f, 0x4a, 0x88, 0x60, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0xb8, 0x77, 0xfa, 0xa5, 0x84, 0x08, 0x46, 0x0c,
-  0x14, 0x00, 0x04, 0xc1, 0x60, 0xb9, 0x27, 0x5f, 0xfa, 0xa3, 0x40, 0x9d,
-  0x5e, 0x89, 0x9d, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xc7, 0x66, 0x09, 0xf2,
-  0x61, 0xb8, 0x41, 0x4d, 0xde, 0x09, 0x0c, 0x66, 0x19, 0xd0, 0x21, 0x1f,
-  0x02, 0x8b, 0xa3, 0x39, 0x8a, 0xcf, 0x70, 0x04, 0x9c, 0xd0, 0x11, 0xf1,
-  0xcd, 0x32, 0xa4, 0x03, 0x3b, 0x04, 0x56, 0x47, 0x71, 0x12, 0x1f, 0x0b,
-  0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a,
-  0x20, 0x3e, 0x45, 0xe4, 0x93, 0x0e, 0x37, 0x04, 0xf7, 0x04, 0x06, 0xb3,
-  0x0c, 0xea, 0xb0, 0x0e, 0x81, 0x0d, 0x7d, 0x04, 0x9f, 0x59, 0x02, 0x78,
-  0x30, 0x3e, 0x22, 0xe2, 0x33, 0x4b, 0x00, 0x0f, 0xc3, 0x11, 0x7b, 0xd2,
-  0x47, 0xc2, 0x37, 0xcb, 0xd0, 0x0e, 0xf0, 0x10, 0x18, 0x9f, 0xf8, 0x51,
-  0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x41, 0x52, 0x3a, 0xdc, 0x10, 0x88, 0x14,
-  0x18, 0xcc, 0x32, 0xb8, 0xc3, 0x3b, 0x04, 0x66, 0x4a, 0x43, 0x7c, 0x66,
-  0x09, 0xe0, 0xc1, 0x88, 0x54, 0x82, 0xcf, 0x2c, 0x01, 0x3c, 0x0c, 0xb4,
-  0x3c, 0x9a, 0x3a, 0x60, 0xeb, 0x40, 0xb8, 0x83, 0xf0, 0x0e, 0x2c, 0xc5,
-  0x0e, 0x17, 0x0c, 0x63, 0xa8, 0xc4, 0x4a, 0xf1, 0x19, 0x8e, 0x30, 0x95,
-  0x56, 0x22, 0xbe, 0x59, 0x86, 0x78, 0xa0, 0x87, 0xc0, 0x5c, 0xe9, 0x54,
-  0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x0c,
-  0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x99, 0xd2, 0xe1, 0x86, 0x00, 0xa6,
-  0xc0, 0x60, 0x96, 0x41, 0x1e, 0xe6, 0x21, 0xb0, 0xc1, 0x96, 0xe0, 0x33,
-  0x4b, 0x80, 0x0f, 0x36, 0x4b, 0x44, 0x7c, 0x66, 0x09, 0xf0, 0x61, 0x38,
-  0x22, 0x56, 0x68, 0x49, 0xf8, 0x66, 0x19, 0xea, 0x01, 0x1f, 0x02, 0x93,
-  0x95, 0x5a, 0x8a, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7,
-  0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x7a, 0x4a, 0x87, 0x1b,
-  0x82, 0x9d, 0x02, 0x83, 0x59, 0x06, 0x7b, 0xb8, 0x87, 0xc0, 0x7a, 0x69,
-  0x88, 0xcf, 0x2c, 0x01, 0x3e, 0x18, 0x21, 0x4e, 0xf0, 0x99, 0x25, 0xc0,
-  0x87, 0x81, 0x96, 0x47, 0x93, 0x07, 0x6c, 0x1e, 0x08, 0x7b, 0x10, 0xee,
-  0x01, 0xaf, 0xe8, 0xe1, 0x82, 0x61, 0x2e, 0x78, 0xea, 0xb6, 0xa7, 0xce,
-  0x95, 0x86, 0xb9, 0xf1, 0x19, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4,
-  0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbe, 0xb5, 0xda, 0x29, 0x7d, 0x42, 0xab,
-  0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d,
-  0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x4e,
-  0xae, 0xc4, 0x2a, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0,
-  0xe6, 0x6a, 0xac, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03,
-  0x8e, 0xae, 0xc8, 0x2a, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30,
-  0x58, 0xf8, 0x6a, 0xac, 0x48, 0x2a, 0x78, 0x2b, 0x9a, 0x8a, 0xab, 0xd1,
-  0x84, 0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x82, 0x7c, 0x18, 0x68, 0x79, 0x4c,
-  0x83, 0x1c, 0xd4, 0x34, 0x18, 0x07, 0x96, 0x30, 0x07, 0x01, 0x1f, 0xd4,
-  0x34, 0x38, 0x87, 0x59, 0x06, 0x7d, 0xe0, 0x87, 0x77, 0x19, 0x8e, 0x90,
-  0x17, 0x9b, 0x1a, 0xbe, 0x9b, 0x97, 0x61, 0x86, 0x1b, 0x82, 0x90, 0x22,
-  0x83, 0x1a, 0x02, 0x1d, 0x8e, 0xa8, 0x17, 0x9d, 0x1a, 0xbe, 0x0a, 0x04,
-  0xbd, 0x7b, 0x19, 0x66, 0xb8, 0x21, 0x20, 0x29, 0x32, 0xa8, 0x60, 0xd0,
-  0x59, 0x86, 0x7d, 0x80, 0x89, 0xe0, 0xd0, 0x69, 0x98, 0xeb, 0x9f, 0x61,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xe0, 0x1b, 0xad, 0xb9, 0x92, 0x29,
-  0xd0, 0x1a, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84,
-  0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0xe0, 0x54, 0x4b, 0xaf, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0x6e, 0xb5, 0xf6, 0x8a, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0xe0, 0x58, 0x8b, 0xaf, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10,
-  0x04, 0x83, 0x85, 0xb6, 0xf6, 0x8a, 0xa7, 0x82, 0xd3, 0x62, 0xab, 0xd4,
-  0x1a, 0x4d, 0x08, 0x80, 0x0b, 0x1e, 0x9b, 0x25, 0x80, 0x89, 0xe1, 0x86,
-  0x93, 0x69, 0x2d, 0x30, 0x98, 0x65, 0xe8, 0x07, 0x7f, 0x08, 0xea, 0xa6,
-  0xfc, 0x0a, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28,
-  0xdb, 0xfa, 0x2b, 0x96, 0x81, 0xab, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30,
-  0xa0, 0x6e, 0xeb, 0xaf, 0x02, 0xe1, 0x82, 0x61, 0x4a, 0xa7, 0x46, 0x0b,
-  0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0xdd, 0x22,
-  0x2d, 0x98, 0xa9, 0xab, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0x78,
-  0x8b, 0xb4, 0x02, 0xe1, 0x82, 0x61, 0x2e, 0x78, 0xea, 0x8e, 0xa7, 0x6e,
-  0xa4, 0x86, 0x39, 0x1c, 0x1a, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4,
-  0xe0, 0x00, 0x40, 0x10, 0x0c, 0x3e, 0xf0, 0x82, 0xad, 0xb7, 0xea, 0xad,
-  0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d,
-  0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xee,
-  0xbc, 0x6e, 0x2b, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0,
-  0xd0, 0x0b, 0xb7, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03,
-  0x2e, 0xbd, 0x72, 0x2b, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30,
-  0x58, 0xe2, 0x0b, 0xb7, 0xf2, 0x2a, 0x20, 0xaf, 0xd4, 0x32, 0xaf, 0xd1,
-  0x84, 0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x02, 0x98, 0x18, 0x6e, 0x20, 0x9b,
-  0xf4, 0x02, 0x83, 0x59, 0x86, 0x7f, 0x80, 0x89, 0xc0, 0xd6, 0xaa, 0xad,
-  0xe2, 0x33, 0x1c, 0x81, 0x36, 0x6e, 0x45, 0x7c, 0xb3, 0x0c, 0x20, 0x31,
-  0x12, 0x81, 0xbd, 0x55, 0xda, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c,
-  0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xf3,
-  0xa5, 0xc3, 0x0d, 0x41, 0x7c, 0x81, 0xc1, 0x2c, 0x43, 0x48, 0x88, 0x44,
-  0x60, 0xc3, 0x5d, 0xc1, 0x67, 0x96, 0xe0, 0x24, 0xcc, 0xae, 0x88, 0xf8,
-  0xcc, 0x12, 0x9c, 0xc4, 0x70, 0xc4, 0xdc, 0xdc, 0x95, 0xf0, 0xcd, 0x32,
-  0x90, 0xc4, 0x49, 0x04, 0x46, 0x37, 0x78, 0x15, 0x1f, 0x0b, 0x1c, 0xfa,
-  0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e,
-  0x45, 0xf8, 0x97, 0x0e, 0x37, 0x04, 0xfc, 0x05, 0x06, 0xb3, 0x0c, 0x25,
-  0x61, 0x12, 0x81, 0x81, 0xd6, 0x10, 0x9f, 0x59, 0x82, 0x93, 0x30, 0x62,
-  0xb4, 0xe0, 0x33, 0x4b, 0x70, 0x12, 0x03, 0x2d, 0x8f, 0x16, 0x12, 0x98,
-  0x48, 0x10, 0x25, 0x21, 0x98, 0x04, 0xbf, 0x8d, 0xc4, 0x05, 0xc3, 0x98,
-  0x68, 0x99, 0x56, 0x7c, 0x86, 0x23, 0x40, 0xe7, 0xb4, 0x88, 0x6f, 0x96,
-  0x01, 0x25, 0x56, 0x22, 0x30, 0xd4, 0x0a, 0x9d, 0xf8, 0x58, 0x30, 0xd0,
-  0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1,
-  0x29, 0x82, 0xc5, 0x74, 0xb8, 0x21, 0x50, 0x31, 0x30, 0x98, 0x65, 0x48,
-  0x09, 0x95, 0x08, 0x6c, 0x80, 0x2d, 0xf8, 0xcc, 0x12, 0xbc, 0x84, 0xb5,
-  0x16, 0x11, 0x9f, 0x59, 0x82, 0x97, 0x18, 0x8e, 0x58, 0x1d, 0xd7, 0x12,
-  0xbe, 0x59, 0x06, 0x96, 0x78, 0x89, 0xc0, 0x58, 0xe7, 0xb5, 0xe2, 0x63,
-  0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58,
-  0x11, 0xc4, 0xa7, 0x88, 0x1b, 0xd3, 0xe1, 0x86, 0xa0, 0xc6, 0xc0, 0x60,
-  0x96, 0xa1, 0x25, 0x5c, 0x22, 0xb0, 0xdb, 0x1a, 0xe2, 0x33, 0x4b, 0xf0,
-  0x12, 0x46, 0xf0, 0x16, 0x7c, 0x66, 0x09, 0x5e, 0x62, 0xa0, 0xe5, 0xd1,
-  0x52, 0x02, 0x53, 0x09, 0xa2, 0x25, 0x04, 0x97, 0x60, 0xbb, 0x95, 0xb8,
-  0x60, 0x98, 0x0b, 0x9e, 0xba, 0xed, 0xa9, 0x43, 0xad, 0x61, 0xae, 0x97,
+  0x62, 0x28, 0xe2, 0x90, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0x74,
+  0x43, 0x35, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x6e,
+  0x37, 0x56, 0x83, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0,
+  0x78, 0x83, 0x35, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83,
+  0x85, 0x3c, 0x56, 0x83, 0x2d, 0x82, 0xdb, 0xe0, 0x8b, 0xdc, 0x18, 0x4d,
+  0x08, 0x80, 0x0b, 0x1e, 0x9b, 0x25, 0x90, 0x83, 0xe1, 0x86, 0xac, 0x37,
+  0xc0, 0x60, 0x96, 0xe1, 0x03, 0x83, 0xa0, 0xce, 0xc2, 0x35, 0xe0, 0x82,
+  0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x32, 0x8f, 0xd7, 0xf8,
+  0x40, 0x63, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0xf3, 0x78, 0x8d,
+  0x40, 0xb8, 0x60, 0x98, 0x52, 0x8b, 0xd9, 0x80, 0x0b, 0x9e, 0x1a, 0x31,
+  0x38, 0x00, 0x10, 0x04, 0x03, 0x6a, 0x3d, 0x68, 0x63, 0x0c, 0x4a, 0x63,
+  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0xf6, 0xa0, 0x8d, 0x40, 0xb8,
+  0x60, 0x98, 0x0b, 0x9e, 0xba, 0xe3, 0xa9, 0x9b, 0x89, 0x61, 0x0e, 0x0d,
   0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x83, 0xaf, 0xcc, 0x6a, 0x8c, 0xbe, 0xc4, 0x6c, 0x34, 0x21, 0x00, 0x46,
+  0x83, 0x0f, 0x3e, 0xc0, 0xe3, 0x2f, 0xda, 0x63, 0x34, 0x21, 0x00, 0x46,
   0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x63, 0x33, 0x1e, 0x4b, 0x88,
-  0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0x36, 0xeb, 0xb1, 0x84,
-  0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x73, 0x33, 0x1f, 0x4b,
-  0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x16, 0x3b, 0xeb, 0x31,
-  0xff, 0x0a, 0xd2, 0xcc, 0xc5, 0xd6, 0x6c, 0x34, 0x21, 0x00, 0x2e, 0x78,
-  0x6c, 0x96, 0x00, 0x26, 0x06, 0x5a, 0x1e, 0xd3, 0xd8, 0x07, 0x1f, 0x0e,
-  0xf4, 0x81, 0x25, 0xfa, 0x41, 0x78, 0x09, 0x1f, 0x0e, 0xfc, 0x61, 0xc4,
-  0xc0, 0x00, 0x40, 0x10, 0x0c, 0x20, 0x3c, 0xcb, 0xb1, 0x77, 0x32, 0xfb,
-  0x80, 0x97, 0xf8, 0x98, 0x10, 0xc8, 0xc7, 0x02, 0x79, 0x81, 0x8f, 0x15,
-  0xff, 0x10, 0x1f, 0x2b, 0x02, 0xf9, 0x58, 0x10, 0x12, 0xf0, 0x19, 0x31,
-  0x30, 0x00, 0x10, 0x04, 0x03, 0xe8, 0xcf, 0xc0, 0xac, 0x9e, 0x4c, 0x28,
-  0xe2, 0x63, 0x81, 0x20, 0x1f, 0x0b, 0x0e, 0xf8, 0x5c, 0x60, 0xd0, 0x88,
-  0x81, 0x03, 0x80, 0x20, 0x18, 0x34, 0xa5, 0x46, 0x66, 0x30, 0x96, 0x62,
-  0x79, 0x16, 0xfc, 0xd8, 0x8f, 0xfd, 0x98, 0x8f, 0xed, 0xd9, 0x2c, 0xc1,
-  0x08, 0x0d, 0x37, 0xf8, 0xd5, 0x9e, 0x81, 0xc1, 0x2c, 0x83, 0x4c, 0x8c,
-  0x50, 0x30, 0x62, 0x60, 0x00, 0x20, 0x08, 0x06, 0x50, 0xa9, 0x9d, 0x19,
-  0x3f, 0x59, 0x90, 0x63, 0xf0, 0x19, 0x31, 0x30, 0x00, 0x10, 0x04, 0x03,
-  0xe8, 0xd4, 0xd2, 0xac, 0x9f, 0x2c, 0xd8, 0x31, 0xf8, 0x8c, 0x26, 0xc0,
-  0xd8, 0x30, 0xdc, 0x10, 0xfc, 0x19, 0x18, 0xcc, 0x32, 0xcc, 0x44, 0x4d,
-  0x04, 0xc3, 0x11, 0xc5, 0x98, 0x0d, 0xdf, 0x19, 0xc3, 0x0c, 0x37, 0x04,
-  0x2e, 0x46, 0x06, 0x35, 0x04, 0x3a, 0x1c, 0x71, 0x9c, 0xd9, 0xf0, 0x55,
-  0x20, 0xe8, 0x25, 0xc3, 0x0c, 0x37, 0x04, 0x31, 0x46, 0x06, 0x15, 0x0c,
-  0x3a, 0xcb, 0x40, 0x13, 0x69, 0x11, 0x5c, 0x7d, 0x0d, 0x73, 0x2a, 0x35,
-  0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x7c, 0xb0, 0x06, 0x6a, 0x3f,
-  0xd6, 0x6a, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83,
-  0x30, 0x9a, 0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0xdc, 0xad, 0x9d, 0xda, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0xc0, 0xe1, 0x1a, 0xaa, 0x31, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x5c, 0xae, 0xa5, 0x9a, 0x44, 0x04, 0x23, 0x06, 0x0a, 0x00,
-  0x82, 0x60, 0xb0, 0x84, 0x1b, 0xaa, 0xa5, 0x59, 0x40, 0x6b, 0x79, 0x66,
-  0x6b, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3, 0x04, 0x69, 0x31, 0xdc,
-  0x90, 0xe9, 0x1a, 0x18, 0xcc, 0x32, 0xd8, 0xc4, 0x4d, 0x04, 0x45, 0x66,
-  0xab, 0x06, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xd4,
-  0xb8, 0xb1, 0xda, 0xd7, 0x67, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40,
-  0x91, 0x1b, 0xab, 0x05, 0xc2, 0x05, 0xc3, 0xd4, 0x99, 0xc1, 0x1a, 0x5c,
-  0xf0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xe8, 0x16, 0x6b,
-  0x62, 0x20, 0x6a, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xa5, 0x5b,
-  0xac, 0x05, 0xc2, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x1d, 0x4f, 0x1d, 0x8c,
-  0x0d, 0x73, 0x65, 0x35, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1,
-  0x01, 0x80, 0x20, 0x18, 0x7c, 0xed, 0xd6, 0x6b, 0x7c, 0xa6, 0x6e, 0xa3,
-  0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40,
-  0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x1c, 0xbd,
-  0x91, 0x5b, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xd5,
-  0x5b, 0xb9, 0x25, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x9c,
-  0xbd, 0x99, 0x5b, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0,
-  0xf8, 0x5b, 0xb9, 0x99, 0x5a, 0x10, 0x6f, 0xb6, 0x36, 0x6f, 0xa3, 0x09,
-  0x01, 0x70, 0xc1, 0x63, 0xb3, 0x04, 0x69, 0x31, 0xdc, 0x60, 0x07, 0xf6,
-  0x06, 0x06, 0xb3, 0x0c, 0x38, 0x91, 0x16, 0x81, 0xe1, 0x99, 0x9e, 0xc5,
-  0x67, 0x38, 0x62, 0x0f, 0xf6, 0x8c, 0xf8, 0x66, 0x19, 0x72, 0x82, 0x27,
-  0x02, 0xe3, 0x33, 0x3e, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6,
-  0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x40, 0x4e,
-  0x87, 0x1b, 0x02, 0x7f, 0x03, 0x83, 0x59, 0x06, 0x9d, 0xd8, 0x89, 0xc0,
-  0x06, 0x52, 0x83, 0xcf, 0x2c, 0x01, 0x58, 0xd8, 0xa8, 0x11, 0xf1, 0x99,
-  0x25, 0x00, 0x8b, 0xe1, 0x08, 0x53, 0x20, 0x35, 0xe1, 0x9b, 0x65, 0xe8,
-  0x09, 0xb0, 0x08, 0xec, 0x14, 0x4a, 0x2d, 0x3e, 0x16, 0x38, 0xf4, 0xb9,
-  0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a,
-  0x58, 0x39, 0x1d, 0x6e, 0x08, 0x52, 0x0e, 0x0c, 0x66, 0x19, 0x7c, 0xe2,
-  0x27, 0x02, 0x6b, 0xb5, 0x21, 0x3e, 0xb3, 0x04, 0x60, 0x61, 0x04, 0xac,
-  0xc1, 0x67, 0x96, 0x00, 0x2c, 0x06, 0x5a, 0x1e, 0x4d, 0x27, 0xb0, 0x9d,
-  0x20, 0x7c, 0x42, 0xf8, 0x09, 0xb1, 0xe0, 0x89, 0x0b, 0x86, 0xb1, 0x57,
-  0x9b, 0xb5, 0xf8, 0x0c, 0x47, 0xc8, 0x02, 0xad, 0x11, 0xdf, 0x2c, 0x43,
-  0x58, 0x90, 0x45, 0x60, 0xb5, 0x36, 0x0b, 0xf1, 0xb1, 0x60, 0xa0, 0xcf,
-  0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53,
-  0x44, 0xce, 0xe9, 0x70, 0x43, 0x70, 0x73, 0x60, 0x30, 0xcb, 0x20, 0x16,
-  0x63, 0x11, 0xd8, 0xd0, 0x6b, 0xf0, 0x99, 0x25, 0x40, 0x0b, 0xd3, 0x35,
-  0x22, 0x3e, 0xb3, 0x04, 0x68, 0x31, 0x1c, 0xd1, 0x0b, 0xbb, 0x26, 0x7c,
-  0xb3, 0x0c, 0x65, 0x81, 0x16, 0x81, 0xf9, 0x02, 0xaf, 0xc5, 0xc7, 0x02,
-  0x87, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22,
-  0x88, 0x4f, 0x11, 0x64, 0xa7, 0xc3, 0x0d, 0x81, 0xd8, 0x81, 0xc1, 0x2c,
-  0x83, 0x59, 0x9c, 0x45, 0x60, 0xe4, 0x36, 0xc4, 0x67, 0x96, 0x00, 0x2d,
-  0x8c, 0x48, 0x37, 0xf8, 0xcc, 0x12, 0xa0, 0xc5, 0x40, 0xcb, 0xa3, 0x89,
-  0x05, 0x36, 0x16, 0x84, 0x59, 0x08, 0x67, 0x01, 0x1b, 0x64, 0x71, 0xc1,
-  0x30, 0x17, 0x3c, 0x75, 0xdb, 0x53, 0x57, 0x6b, 0xc3, 0x9c, 0x7a, 0x0d,
-  0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x9f, 0xdc, 0x89, 0x5d, 0xc8, 0xbd, 0xdd, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x97, 0x77, 0x69, 0x97, 0x10, 0xc1,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x7a, 0xa7, 0x76, 0x09, 0x11,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xb7, 0x77, 0x6b, 0x97, 0x10,
-  0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0xa3, 0xa7, 0x76, 0x2b,
-  0x17, 0xd8, 0xdd, 0xce, 0xe1, 0xdd, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd8,
-  0x2c, 0x41, 0x5a, 0x0c, 0xb4, 0x3c, 0xa6, 0x41, 0x13, 0x78, 0x1e, 0xcc,
-  0x04, 0x4b, 0xd8, 0x84, 0x80, 0x16, 0x78, 0x1e, 0xdc, 0xc4, 0x2c, 0x83,
-  0x5a, 0xb0, 0xc5, 0x3e, 0x0c, 0x47, 0x80, 0x44, 0xcf, 0x0d, 0xdf, 0x85,
-  0xc4, 0x30, 0xc3, 0x0d, 0x01, 0xca, 0x91, 0x41, 0x0d, 0x81, 0x0e, 0x47,
-  0x84, 0x44, 0xd8, 0x0d, 0x5f, 0x05, 0x82, 0xde, 0x48, 0x0c, 0x33, 0xdc,
-  0x10, 0xac, 0x1c, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0xc3, 0x5a, 0x80, 0x46,
-  0x70, 0xef, 0x36, 0xcc, 0x91, 0xd8, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82,
-  0x60, 0xf0, 0xa9, 0x9e, 0xde, 0xe5, 0xdc, 0xe9, 0x8d, 0x26, 0x04, 0xc0,
-  0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71,
-  0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0xb1, 0x17, 0x7a, 0x07,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x27, 0x7b, 0xa2, 0xc7,
-  0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0xb3, 0x37, 0x7a,
-  0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0xb2, 0x7b, 0xa2,
-  0x37, 0x76, 0x81, 0xeb, 0xcd, 0x1d, 0xec, 0x8d, 0x26, 0x04, 0xc0, 0x05,
-  0x8f, 0xcd, 0x12, 0x80, 0xc6, 0x70, 0xc3, 0x4c, 0xd0, 0x1e, 0x18, 0xcc,
-  0x32, 0xb4, 0x85, 0x5b, 0x04, 0xe5, 0x73, 0xa5, 0x07, 0x17, 0x3c, 0x35,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x54, 0xef, 0x99, 0x9e, 0x4e, 0xdc,
-  0xdd, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xbe, 0x67, 0x7a, 0x81,
-  0x70, 0xc1, 0x30, 0x15, 0x76, 0xaa, 0x07, 0x17, 0x3c, 0x35, 0x62, 0x70,
-  0x00, 0x20, 0x08, 0x06, 0x94, 0xf8, 0xad, 0x1e, 0x4f, 0xf0, 0xdd, 0x88,
-  0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xe3, 0xb7, 0x7a, 0x81, 0x70, 0xc1,
-  0x30, 0x17, 0x3c, 0x75, 0xc7, 0x53, 0xa7, 0x72, 0xc3, 0xdc, 0x8f, 0x0d,
-  0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0xdf, 0xf9, 0xdd, 0x9e, 0xdd, 0x91, 0xdf, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xe7, 0x7e, 0xbe, 0x97, 0x10, 0xc1,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0xef, 0xf7, 0x7b, 0x09, 0x11,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x07, 0x7f, 0xe0, 0x97, 0x10,
-  0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0xf8, 0xf7, 0x7b, 0xa0,
-  0x17, 0xac, 0x1f, 0xec, 0xb5, 0xdf, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd8,
-  0x2c, 0x01, 0x68, 0x0c, 0x37, 0xc0, 0x05, 0xfc, 0x81, 0xc1, 0x2c, 0xc3,
-  0x5b, 0x80, 0x46, 0x60, 0x72, 0x47, 0x77, 0xf1, 0x19, 0x8e, 0xb0, 0x8b,
-  0xba, 0x23, 0xbe, 0x59, 0x06, 0xb8, 0x98, 0x8b, 0xc0, 0xec, 0xee, 0x2e,
-  0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x0c,
-  0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0xfd, 0xd3, 0xe1, 0x86, 0x00, 0xff,
-  0xc0, 0x60, 0x96, 0x21, 0x2e, 0xe4, 0x22, 0xb0, 0xc1, 0xef, 0xe0, 0x33,
-  0x4b, 0x70, 0x17, 0xd6, 0x77, 0x44, 0x7c, 0x66, 0x09, 0xee, 0x62, 0x38,
-  0x22, 0x34, 0xfc, 0x4e, 0xf8, 0x66, 0x19, 0xe8, 0xe2, 0x2e, 0x02, 0x13,
-  0x8d, 0xbf, 0x8b, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7,
-  0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x4a, 0x30, 0xd0, 0xe1,
-  0x86, 0x60, 0x04, 0x03, 0x30, 0x98, 0x65, 0xa8, 0x0b, 0xbb, 0x08, 0xec,
-  0xf4, 0x86, 0xf8, 0xcc, 0x12, 0xdc, 0x85, 0x11, 0xaa, 0x07, 0x9f, 0x59,
-  0x82, 0xbb, 0x18, 0x68, 0x79, 0xb4, 0xb8, 0xc0, 0xe4, 0x82, 0xa8, 0x0b,
-  0xc1, 0x2e, 0x70, 0x66, 0x2e, 0x2e, 0x18, 0xc6, 0x52, 0xaf, 0xf5, 0xe2,
-  0x33, 0x1c, 0xc1, 0x1a, 0xae, 0x47, 0x7c, 0xb3, 0x0c, 0x78, 0xb1, 0x17,
-  0x81, 0xbd, 0x5e, 0x6b, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73,
-  0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x33, 0x18,
-  0xe8, 0x70, 0x43, 0x10, 0x83, 0x01, 0x18, 0xcc, 0x32, 0xe4, 0x85, 0x5e,
-  0x04, 0x36, 0xdc, 0x1e, 0x7c, 0x66, 0x09, 0xfe, 0xc2, 0x68, 0x8f, 0x88,
-  0xcf, 0x2c, 0xc1, 0x5f, 0x0c, 0x47, 0xdc, 0x46, 0xed, 0x09, 0xdf, 0x2c,
-  0x03, 0x5f, 0xfc, 0x45, 0x60, 0xb8, 0x61, 0x7b, 0xf1, 0xb1, 0xc0, 0xa1,
+  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0xbb, 0x8f, 0xf3, 0x48, 0x88,
+  0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x38, 0xfc, 0x40, 0x8f, 0x84,
+  0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0xcb, 0x8f, 0xf4, 0x48,
+  0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x96, 0x10, 0x41, 0x8f,
+  0xd4, 0x08, 0xe8, 0x23, 0x37, 0xec, 0x63, 0x34, 0x21, 0x00, 0x2e, 0x78,
+  0x6c, 0x96, 0x40, 0x0e, 0x86, 0x1b, 0xec, 0x20, 0x3f, 0xc0, 0x60, 0x96,
+  0x21, 0x0c, 0xe4, 0x20, 0xb0, 0xbd, 0xe8, 0x8b, 0xf8, 0x0c, 0x47, 0xec,
+  0x81, 0x5f, 0x10, 0xdf, 0x2c, 0x83, 0x18, 0x94, 0x41, 0x60, 0x7f, 0xc1,
+  0x07, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05,
+  0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4, 0x88, 0xe8, 0x70, 0x43, 0x10,
+  0x22, 0x60, 0x30, 0xcb, 0x30, 0x06, 0x64, 0x10, 0xd8, 0x70, 0x1a, 0xf0,
+  0x99, 0x25, 0x48, 0x03, 0x33, 0x0d, 0x22, 0x3e, 0xb3, 0x04, 0x69, 0x30,
+  0x1c, 0x61, 0x0a, 0xa7, 0x21, 0x7c, 0xb3, 0x0c, 0x66, 0x90, 0x06, 0x81,
+  0x9d, 0x02, 0x6a, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0xc1,
+  0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x2e, 0xa2, 0xc3,
+  0x0d, 0x01, 0x8b, 0x80, 0xc1, 0x2c, 0xc3, 0x19, 0xa0, 0x41, 0x60, 0xb0,
+  0x31, 0xc4, 0x67, 0x96, 0x20, 0x0d, 0x8c, 0x98, 0x0d, 0xf8, 0xcc, 0x12,
+  0xa4, 0xc1, 0x40, 0xcb, 0xa3, 0x8d, 0x01, 0x46, 0x06, 0xc4, 0x19, 0x08,
+  0x68, 0x20, 0x16, 0x65, 0x70, 0xc1, 0x30, 0x26, 0x1b, 0xb6, 0x11, 0x9f,
+  0xe1, 0x88, 0x59, 0xb8, 0x0d, 0xe2, 0x9b, 0x65, 0x50, 0x83, 0x36, 0x08,
+  0x0c, 0x37, 0x68, 0x21, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b,
+  0x9e, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xe0, 0x11, 0x1d,
+  0x6e, 0x08, 0x74, 0x04, 0x0c, 0x66, 0x19, 0xd6, 0x80, 0x0d, 0x02, 0x1b,
+  0xc0, 0x03, 0x3e, 0xb3, 0x04, 0x71, 0x60, 0xbd, 0x41, 0xc4, 0x67, 0x96,
+  0x20, 0x0e, 0x86, 0x23, 0x7c, 0xc1, 0x37, 0x84, 0x6f, 0x96, 0xc1, 0x0d,
+  0xe2, 0x20, 0xb0, 0x5f, 0xf8, 0x8d, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82,
+  0x61, 0x2e, 0x78, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xe2,
+  0x4c, 0x74, 0xb8, 0x21, 0x28, 0x13, 0x30, 0x98, 0x65, 0x78, 0x03, 0x38,
+  0x08, 0xec, 0x3c, 0x86, 0xf8, 0xcc, 0x12, 0xc4, 0x81, 0x11, 0xec, 0x01,
+  0x9f, 0x59, 0x82, 0x38, 0x18, 0x68, 0x79, 0xb4, 0x35, 0xc0, 0xd8, 0x80,
+  0x78, 0x03, 0x01, 0x0e, 0x64, 0xa3, 0x0d, 0x2e, 0x18, 0xe6, 0x82, 0xa7,
+  0x6e, 0x7b, 0xea, 0x70, 0x63, 0x98, 0x6b, 0x87, 0x61, 0x8e, 0x18, 0xe6,
+  0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xe0, 0xab, 0x93, 0x32,
+  0x21, 0x11, 0x39, 0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d,
+  0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00,
+  0x41, 0x30, 0xe0, 0xf8, 0x84, 0x4d, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00,
+  0x10, 0x04, 0x03, 0xae, 0x4f, 0xda, 0x24, 0x21, 0x82, 0x11, 0x03, 0x04,
+  0x00, 0x41, 0x30, 0xe0, 0xfc, 0xc4, 0x4d, 0x12, 0x22, 0x18, 0x31, 0x50,
+  0x00, 0x10, 0x04, 0x83, 0xc5, 0x54, 0xda, 0xc4, 0x45, 0x82, 0x3c, 0xf1,
+  0x91, 0x3d, 0x19, 0x4d, 0x08, 0x80, 0x0b, 0x1e, 0x9b, 0x25, 0x90, 0x83,
+  0x81, 0x96, 0xc7, 0x34, 0x3a, 0x3f, 0xe2, 0x58, 0xe2, 0x13, 0xe2, 0xc0,
+  0x8f, 0xc0, 0xe0, 0x02, 0x83, 0x46, 0x0c, 0x1c, 0x00, 0x04, 0xc1, 0xa0,
+  0x51, 0x95, 0x34, 0xa9, 0x11, 0x17, 0xf1, 0x93, 0x80, 0x4c, 0xc8, 0x84,
+  0x4c, 0xc6, 0x04, 0x54, 0x66, 0x09, 0x46, 0x68, 0xb8, 0x61, 0x34, 0xfc,
+  0x04, 0x0c, 0x66, 0x19, 0xe8, 0x20, 0x26, 0x82, 0x11, 0x03, 0x03, 0x00,
+  0x41, 0x30, 0x80, 0x54, 0x85, 0x4d, 0x42, 0x62, 0xc4, 0xc0, 0x00, 0x40,
+  0x10, 0x0c, 0xa0, 0x55, 0x69, 0x93, 0x90, 0x30, 0x61, 0x4c, 0xe0, 0x63,
+  0x02, 0x99, 0xc0, 0x67, 0x34, 0xa1, 0x46, 0x86, 0xe1, 0x86, 0x80, 0x54,
+  0xc0, 0x60, 0x96, 0xa1, 0x0e, 0xee, 0x20, 0x18, 0x8e, 0x30, 0xd0, 0x64,
+  0xf8, 0xee, 0x18, 0x66, 0xb8, 0x21, 0x98, 0x11, 0x32, 0xa8, 0x21, 0xd0,
+  0xe1, 0x88, 0x84, 0x4d, 0x86, 0xaf, 0x02, 0x41, 0x6f, 0x19, 0x66, 0xb8,
+  0x21, 0xb0, 0x11, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x06, 0x3b, 0x58, 0x85,
+  0xe0, 0xf4, 0x63, 0x98, 0x7b, 0x89, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04,
+  0xc1, 0xe0, 0xab, 0x95, 0x52, 0x21, 0x13, 0x59, 0x19, 0x4d, 0x08, 0x80,
+  0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2,
+  0x90, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0x78, 0x85, 0x55, 0x0e,
+  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xae, 0x57, 0x5a, 0x85,
+  0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0x7c, 0xc5, 0x55,
+  0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0xc5, 0x5c, 0x5a,
+  0xc5, 0x4d, 0x82, 0x5c, 0xf1, 0x93, 0x5d, 0x19, 0x4d, 0x08, 0x80, 0x0b,
+  0x1e, 0x9b, 0x25, 0x58, 0x85, 0xe1, 0x86, 0xec, 0x57, 0xc0, 0x60, 0x96,
+  0x01, 0x0f, 0xf2, 0x20, 0xa8, 0x34, 0x81, 0x15, 0xb8, 0xe0, 0xa9, 0x11,
+  0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xd0, 0x25, 0x56, 0xc0, 0x40, 0x54,
+  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x4a, 0x97, 0x58, 0x09, 0x84,
+  0x0b, 0x86, 0x29, 0x36, 0xa9, 0x15, 0xb8, 0xe0, 0xa9, 0x11, 0x83, 0x03,
+  0x00, 0x41, 0x30, 0xa0, 0xda, 0xc5, 0x56, 0xc8, 0xe0, 0x54, 0x46, 0x0c,
+  0x0e, 0x00, 0x04, 0xc1, 0x80, 0x72, 0x17, 0x5b, 0x09, 0x84, 0x0b, 0x86,
+  0xb9, 0xe0, 0xa9, 0x3b, 0x9e, 0xba, 0x1a, 0x19, 0xe6, 0xd4, 0x62, 0x98,
+  0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf8,
+  0xe4, 0x45, 0x5c, 0x42, 0xe5, 0x5d, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41,
+  0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4,
+  0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0x7c, 0x49, 0x97, 0x84, 0x08, 0x46,
+  0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0xd3, 0x17, 0x75, 0x49, 0x88, 0x60,
+  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0x7d, 0x59, 0x97, 0x84, 0x08,
+  0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x19, 0x19, 0x75, 0x59, 0x95,
+  0xc0, 0x5e, 0x76, 0x05, 0x5f, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xc7, 0x66,
+  0x09, 0x56, 0x61, 0xb8, 0xc1, 0x0e, 0xf6, 0x05, 0x0c, 0x66, 0x19, 0xf4,
+  0x60, 0x15, 0x02, 0xeb, 0x93, 0x3f, 0x89, 0xcf, 0x70, 0x04, 0x1f, 0x80,
+  0x0a, 0xf1, 0xcd, 0x32, 0xec, 0x81, 0x1f, 0x04, 0x16, 0x2a, 0x7d, 0x10,
+  0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8,
+  0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x94, 0x8c, 0x0e, 0x37, 0x04, 0x23, 0x03,
+  0x06, 0xb3, 0x0c, 0x7c, 0xd0, 0x07, 0x81, 0x0d, 0xa9, 0x02, 0x9f, 0x59,
+  0x02, 0x51, 0x30, 0x54, 0x21, 0xe2, 0x33, 0x4b, 0x20, 0x0a, 0xc3, 0x11,
+  0xa7, 0x90, 0x2a, 0xc2, 0x37, 0xcb, 0xf0, 0x07, 0xa2, 0x10, 0x18, 0x2a,
+  0xa8, 0x4a, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65,
+  0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x01, 0x33, 0x3a, 0xdc, 0x10,
+  0xb8, 0x0c, 0x18, 0xcc, 0x32, 0x80, 0x42, 0x28, 0x04, 0x26, 0x2b, 0x43,
+  0x7c, 0x66, 0x09, 0x44, 0xc1, 0x88, 0x5a, 0x81, 0xcf, 0x2c, 0x81, 0x28,
+  0x0c, 0xb4, 0x3c, 0x1a, 0x1f, 0x60, 0x7d, 0x40, 0x80, 0x82, 0x10, 0x0a,
+  0x64, 0xe1, 0x07, 0x17, 0x0c, 0x63, 0xb4, 0x82, 0x2b, 0xf1, 0x19, 0x8e,
+  0xa0, 0x85, 0x5c, 0x21, 0xbe, 0x59, 0x86, 0x51, 0x30, 0x85, 0xc0, 0x74,
+  0xa5, 0x16, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29,
+  0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x9f, 0xd1, 0xe1, 0x86,
+  0x80, 0x67, 0xc0, 0x60, 0x96, 0x81, 0x14, 0x4a, 0x21, 0xb0, 0x41, 0x5c,
+  0xe0, 0x33, 0x4b, 0xa0, 0x0a, 0xf6, 0x2b, 0x44, 0x7c, 0x66, 0x09, 0x54,
+  0x61, 0x38, 0xe2, 0x17, 0xc0, 0x45, 0xf8, 0x66, 0x19, 0x4e, 0x41, 0x15,
+  0x02, 0x03, 0x87, 0x70, 0x89, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6,
+  0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xd2, 0x46,
+  0x87, 0x1b, 0x82, 0xb3, 0x01, 0x83, 0x59, 0x06, 0x54, 0x48, 0x85, 0xc0,
+  0xd2, 0x65, 0x88, 0xcf, 0x2c, 0x81, 0x2a, 0x18, 0xe1, 0x2e, 0xf0, 0x99,
+  0x25, 0x50, 0x85, 0x81, 0x96, 0x47, 0x23, 0x05, 0xac, 0x14, 0x08, 0x54,
+  0x10, 0x52, 0x81, 0x36, 0x4c, 0xe1, 0x82, 0x61, 0x2e, 0x78, 0xea, 0xb6,
+  0xa7, 0x4e, 0x57, 0x86, 0xb9, 0xf7, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18,
+  0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbe, 0xbb, 0x39, 0x1b, 0x93,
+  0xa1, 0x9b, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41,
+  0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04,
+  0x03, 0xce, 0x6f, 0xdc, 0x26, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41,
+  0x30, 0xe0, 0xfe, 0xe6, 0x6d, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
+  0x04, 0x03, 0x0e, 0x74, 0xe0, 0x26, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00,
+  0x41, 0x30, 0x58, 0x50, 0xe7, 0x6d, 0x60, 0x26, 0xd8, 0x1b, 0xb0, 0xe9,
+  0x9b, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x82, 0x55, 0x18, 0x68,
+  0x79, 0x4c, 0xc3, 0x0e, 0x44, 0xad, 0x0e, 0x58, 0x02, 0x0f, 0x04, 0x55,
+  0x10, 0xb5, 0x3c, 0x98, 0x65, 0x60, 0x05, 0x57, 0xd8, 0x87, 0xe1, 0x08,
+  0x7f, 0x10, 0x9b, 0xe1, 0xbb, 0x7f, 0x18, 0x66, 0xb8, 0x21, 0x68, 0x19,
+  0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x91, 0x30, 0x9b, 0xe1, 0xab, 0x40,
+  0xd0, 0x2b, 0x89, 0x61, 0x86, 0x1b, 0x02, 0x98, 0x21, 0x83, 0x0a, 0x06,
+  0x9d, 0x65, 0x68, 0x05, 0x71, 0x08, 0x8e, 0x5e, 0x86, 0xb9, 0x14, 0x19,
+  0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbe, 0xd7, 0xf9, 0x1b, 0x9f,
+  0x61, 0x9d, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41,
+  0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04,
+  0x03, 0xce, 0x76, 0x4c, 0xe7, 0x20, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41,
+  0x30, 0xe0, 0x6e, 0xe7, 0x74, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
+  0x04, 0x03, 0x0e, 0x77, 0x50, 0x47, 0x22, 0x82, 0x11, 0x03, 0x05, 0x00,
+  0x41, 0x30, 0x58, 0xc0, 0xe7, 0x74, 0xd0, 0x26, 0x98, 0x1d, 0xbc, 0xa9,
+  0x9d, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x02, 0x71, 0x18, 0x6e,
+  0x98, 0x89, 0xdc, 0x01, 0x83, 0x59, 0x86, 0x57, 0x80, 0x85, 0xa0, 0xc6,
+  0x46, 0x75, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80,
+  0x12, 0x9f, 0xd5, 0xc1, 0x09, 0xbe, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04,
+  0x03, 0x6a, 0x7c, 0x56, 0x27, 0x10, 0x2e, 0x18, 0xa6, 0xcc, 0xe6, 0x75,
+  0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x3a, 0x1f,
+  0xd8, 0xf1, 0x89, 0xd0, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x0a,
+  0x7d, 0x60, 0x27, 0x10, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0xee, 0x78, 0xea,
+  0x5e, 0x66, 0x98, 0x23, 0x93, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46,
+  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xe0, 0x63, 0x1f, 0xde, 0xd9, 0x9b, 0xf4,
+  0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1,
+  0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0,
+  0xe6, 0x67, 0x7c, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03,
+  0x8e, 0x7e, 0xc8, 0x27, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
+  0xe0, 0xea, 0xa7, 0x7c, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04,
+  0x83, 0xa5, 0x7f, 0xc8, 0xa7, 0x74, 0x02, 0xf8, 0xa9, 0x1d, 0xf9, 0x19,
+  0x4d, 0x08, 0x80, 0x0b, 0x1e, 0x9b, 0x25, 0x10, 0x87, 0xe1, 0x06, 0xb8,
+  0xa8, 0x1f, 0x30, 0x98, 0x65, 0x88, 0x05, 0x71, 0x08, 0xec, 0x6e, 0xf2,
+  0x26, 0x3e, 0xc3, 0x11, 0x74, 0xa1, 0x37, 0xc4, 0x37, 0xcb, 0x20, 0x0b,
+  0xb5, 0x10, 0xd8, 0xde, 0xd4, 0x45, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1,
+  0x30, 0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xf1,
+  0x3f, 0x3a, 0xdc, 0x10, 0xf4, 0x0f, 0x18, 0xcc, 0x32, 0xcc, 0x02, 0x2d,
+  0x04, 0x36, 0x8c, 0x0e, 0x7c, 0x66, 0x09, 0x72, 0xc1, 0x44, 0x87, 0x88,
+  0xcf, 0x2c, 0x41, 0x2e, 0x0c, 0x47, 0xfc, 0xc5, 0xe8, 0x08, 0xdf, 0x2c,
+  0x83, 0x2d, 0xe4, 0x42, 0x60, 0xa0, 0x41, 0x3a, 0xf1, 0xb1, 0xc0, 0xa1,
   0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2,
-  0x53, 0x84, 0x0f, 0x06, 0x3a, 0xdc, 0x10, 0xf0, 0x60, 0x00, 0x06, 0xb3,
-  0x0c, 0x7d, 0xe1, 0x17, 0x81, 0xf9, 0xde, 0x10, 0x9f, 0x59, 0x82, 0xbf,
-  0x30, 0x62, 0xfc, 0xe0, 0x33, 0x4b, 0xf0, 0x17, 0x03, 0x2d, 0x8f, 0x96,
-  0x17, 0x98, 0x5e, 0x10, 0x7d, 0x21, 0xf8, 0x05, 0xe9, 0xec, 0xc5, 0x05,
-  0xc3, 0x5c, 0xf0, 0xd4, 0x6d, 0x4f, 0xdd, 0xeb, 0x0d, 0x73, 0xe4, 0x36,
+  0x53, 0x84, 0x0a, 0xe9, 0x70, 0x43, 0x80, 0x42, 0x60, 0x30, 0xcb, 0x70,
+  0x0b, 0xb8, 0x10, 0x18, 0xeb, 0x0c, 0xf1, 0x99, 0x25, 0xc8, 0x05, 0x23,
+  0x5e, 0x07, 0x3e, 0xb3, 0x04, 0xb9, 0x30, 0xd0, 0xf2, 0x68, 0xb3, 0x80,
+  0xd1, 0x02, 0x71, 0x0b, 0x02, 0x2e, 0xd0, 0x4c, 0x2d, 0x5c, 0x30, 0x8c,
+  0xb9, 0x8e, 0xec, 0xc4, 0x67, 0x38, 0xc2, 0x35, 0x66, 0x87, 0xf8, 0x66,
+  0x19, 0x74, 0xa1, 0x17, 0x02, 0xa3, 0x9d, 0xd7, 0x88, 0x8f, 0x05, 0x03,
+  0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10,
+  0x9f, 0x22, 0x70, 0x48, 0x87, 0x1b, 0x02, 0x1b, 0x02, 0x83, 0x59, 0x86,
+  0x5d, 0xe0, 0x85, 0xc0, 0x06, 0xde, 0x81, 0xcf, 0x2c, 0x41, 0x38, 0x58,
+  0xee, 0x10, 0xf1, 0x99, 0x25, 0x08, 0x87, 0xe1, 0x88, 0xdc, 0xd0, 0x1d,
+  0xe1, 0x9b, 0x65, 0xf0, 0x85, 0x70, 0x08, 0x4c, 0x37, 0x76, 0x27, 0x3e,
+  0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f,
+  0x15, 0x41, 0x7c, 0x8a, 0x18, 0x23, 0x1d, 0x6e, 0x08, 0xc2, 0x08, 0x0c,
+  0x66, 0x19, 0x7e, 0x01, 0x1c, 0x02, 0x1b, 0x9f, 0x21, 0x3e, 0xb3, 0x04,
+  0xe1, 0x60, 0x04, 0xfa, 0xc0, 0x67, 0x96, 0x20, 0x1c, 0x06, 0x5a, 0x1e,
+  0x6d, 0x17, 0x30, 0x5e, 0x20, 0x7e, 0x41, 0x00, 0x07, 0xd4, 0xe9, 0x85,
+  0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e, 0x3a, 0xda, 0x19, 0xe6, 0xd2,
+  0x65, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41,
+  0x30, 0xf8, 0xe2, 0x28, 0x8c, 0x40, 0xc8, 0x8d, 0x46, 0x13, 0x02, 0x60,
+  0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48,
+  0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x38, 0x3c, 0x42, 0xa3, 0x84,
+  0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0xcb, 0xa3, 0x34, 0x4a,
+  0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x38, 0x3d, 0x52, 0xa3,
+  0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x11, 0xa5, 0x34,
+  0x52, 0xa1, 0xa0, 0x8e, 0x74, 0xe8, 0x8e, 0x46, 0x13, 0x02, 0xe0, 0x82,
+  0xc7, 0x66, 0x09, 0xc4, 0x61, 0xa0, 0xe5, 0x31, 0x8d, 0x56, 0xb0, 0xc3,
+  0x80, 0x15, 0x58, 0xe2, 0x15, 0x84, 0x70, 0xb0, 0xc3, 0x00, 0x16, 0x66,
+  0x19, 0xc6, 0xa1, 0x1c, 0xea, 0x63, 0x38, 0x42, 0x3f, 0x78, 0x68, 0xf8,
+  0x6e, 0x3f, 0x86, 0x19, 0x6e, 0x08, 0x4e, 0x88, 0x0c, 0x6a, 0x08, 0x74,
+  0x38, 0x62, 0x3f, 0xc0, 0x68, 0xf8, 0x2a, 0x10, 0xf4, 0xfa, 0x63, 0x98,
+  0xe1, 0x86, 0x40, 0x85, 0xc8, 0xa0, 0x82, 0x41, 0x67, 0x19, 0xc8, 0x21,
+  0x1f, 0x82, 0x73, 0x9f, 0x61, 0x6e, 0x64, 0x86, 0x19, 0x31, 0x38, 0x00,
+  0x10, 0x04, 0x83, 0x2f, 0x95, 0xf2, 0x08, 0x87, 0x4c, 0x69, 0x34, 0x21,
+  0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1,
+  0x88, 0x43, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x83, 0x25, 0x50,
+  0x3a, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0x58, 0x0a,
+  0x25, 0x86, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x93, 0x25,
+  0x51, 0x92, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x16, 0x5d,
+  0x0a, 0x25, 0x31, 0x0a, 0x5a, 0x49, 0x8e, 0x5e, 0x69, 0x34, 0x21, 0x00,
+  0x2e, 0x78, 0x6c, 0x96, 0x20, 0x1f, 0x86, 0x1b, 0x5a, 0x64, 0x96, 0xc0,
+  0x60, 0x96, 0xc1, 0x1c, 0xce, 0x21, 0xa8, 0x1e, 0x22, 0x25, 0xb8, 0xe0,
+  0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0x78, 0xa9, 0x94, 0x68,
+  0xc4, 0x8e, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0xea, 0xa5, 0x52,
+  0x0a, 0x84, 0x0b, 0x86, 0x29, 0x30, 0x4a, 0x25, 0xb8, 0xe0, 0xa9, 0x11,
+  0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xc2, 0x49, 0x95, 0x6c, 0x64, 0x8f,
+  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x12, 0x27, 0x55, 0x0a, 0x84,
+  0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0x3b, 0x9e, 0xba, 0x14, 0x1a, 0xe6, 0x7c,
+  0x66, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41,
+  0x30, 0xf8, 0xcc, 0xc9, 0x96, 0xea, 0x68, 0x9c, 0x46, 0x13, 0x02, 0x60,
+  0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48,
+  0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0x76, 0xea, 0xa5, 0x84,
+  0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x73, 0x27, 0x5f, 0x4a,
+  0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0x77, 0xfa, 0xa5,
+  0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0xb9, 0x27, 0x5f,
+  0xfa, 0xa3, 0x40, 0x9d, 0x5e, 0x89, 0x9d, 0x46, 0x13, 0x02, 0xe0, 0x82,
+  0xc7, 0x66, 0x09, 0xf2, 0x61, 0xb8, 0x41, 0x4d, 0xde, 0x09, 0x0c, 0x66,
+  0x19, 0xd0, 0x21, 0x1f, 0x02, 0x8b, 0xa3, 0x39, 0x8a, 0xcf, 0x70, 0x04,
+  0x9c, 0xd0, 0x11, 0xf1, 0xcd, 0x32, 0xa4, 0x03, 0x3b, 0x04, 0x56, 0x47,
+  0x71, 0x12, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59,
+  0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xe4, 0x93, 0x0e, 0x37, 0x04,
+  0xf7, 0x04, 0x06, 0xb3, 0x0c, 0xea, 0xb0, 0x0e, 0x81, 0x0d, 0x7d, 0x04,
+  0x9f, 0x59, 0x02, 0x78, 0x30, 0x3e, 0x22, 0xe2, 0x33, 0x4b, 0x00, 0x0f,
+  0xc3, 0x11, 0x7b, 0xd2, 0x47, 0xc2, 0x37, 0xcb, 0xd0, 0x0e, 0xf0, 0x10,
+  0x18, 0x9f, 0xf8, 0x51, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17,
+  0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x41, 0x52, 0x3a,
+  0xdc, 0x10, 0x88, 0x14, 0x18, 0xcc, 0x32, 0xb8, 0xc3, 0x3b, 0x04, 0x66,
+  0x4a, 0x43, 0x7c, 0x66, 0x09, 0xe0, 0xc1, 0x88, 0x54, 0x82, 0xcf, 0x2c,
+  0x01, 0x3c, 0x0c, 0xb4, 0x3c, 0x9a, 0x3a, 0x60, 0xeb, 0x40, 0xb8, 0x83,
+  0xf0, 0x0e, 0x2c, 0xc5, 0x0e, 0x17, 0x0c, 0x63, 0xa8, 0xc4, 0x4a, 0xf1,
+  0x19, 0x8e, 0x30, 0x95, 0x56, 0x22, 0xbe, 0x59, 0x86, 0x78, 0xa0, 0x87,
+  0xc0, 0x5c, 0xe9, 0x54, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9,
+  0xe0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x99, 0xd2,
+  0xe1, 0x86, 0x00, 0xa6, 0xc0, 0x60, 0x96, 0x41, 0x1e, 0xe6, 0x21, 0xb0,
+  0xc1, 0x96, 0xe0, 0x33, 0x4b, 0x80, 0x0f, 0x36, 0x4b, 0x44, 0x7c, 0x66,
+  0x09, 0xf0, 0x61, 0x38, 0x22, 0x56, 0x68, 0x49, 0xf8, 0x66, 0x19, 0xea,
+  0x01, 0x1f, 0x02, 0x93, 0x95, 0x5a, 0x8a, 0x8f, 0x05, 0x0e, 0x7d, 0x2e,
+  0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22,
+  0x7a, 0x4a, 0x87, 0x1b, 0x82, 0x9d, 0x02, 0x83, 0x59, 0x06, 0x7b, 0xb8,
+  0x87, 0xc0, 0x7a, 0x69, 0x88, 0xcf, 0x2c, 0x01, 0x3e, 0x18, 0x21, 0x4e,
+  0xf0, 0x99, 0x25, 0xc0, 0x87, 0x81, 0x96, 0x47, 0x93, 0x07, 0x6c, 0x1e,
+  0x08, 0x7b, 0x10, 0xee, 0x01, 0xaf, 0xe8, 0xe1, 0x82, 0x61, 0x2e, 0x78,
+  0xea, 0xb6, 0xa7, 0xce, 0x95, 0x86, 0xb9, 0xf1, 0x19, 0xe6, 0x88, 0x61,
+  0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbe, 0xb5, 0xda,
+  0x29, 0x7d, 0x42, 0xab, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1,
+  0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00,
+  0x10, 0x04, 0x03, 0x4e, 0xae, 0xc4, 0x2a, 0x21, 0x82, 0x11, 0x03, 0x04,
+  0x00, 0x41, 0x30, 0xe0, 0xe6, 0x6a, 0xac, 0x12, 0x22, 0x18, 0x31, 0x40,
+  0x00, 0x10, 0x04, 0x03, 0x8e, 0xae, 0xc8, 0x2a, 0x21, 0x82, 0x11, 0x03,
+  0x05, 0x00, 0x41, 0x30, 0x58, 0xf8, 0x6a, 0xac, 0x48, 0x2a, 0x78, 0x2b,
+  0x9a, 0x8a, 0xab, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x82, 0x7c,
+  0x18, 0x68, 0x79, 0x4c, 0x83, 0x1c, 0xd4, 0x34, 0x18, 0x07, 0x96, 0x30,
+  0x07, 0x01, 0x1f, 0xd4, 0x34, 0x38, 0x87, 0x59, 0x06, 0x7d, 0xe0, 0x87,
+  0x77, 0x19, 0x8e, 0x90, 0x17, 0x9b, 0x1a, 0xbe, 0x9b, 0x97, 0x61, 0x86,
+  0x1b, 0x82, 0x90, 0x22, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0xa8, 0x17, 0x9d,
+  0x1a, 0xbe, 0x0a, 0x04, 0xbd, 0x7b, 0x19, 0x66, 0xb8, 0x21, 0x20, 0x29,
+  0x32, 0xa8, 0x60, 0xd0, 0x59, 0x86, 0x7d, 0x80, 0x89, 0xe0, 0xd0, 0x69,
+  0x98, 0xeb, 0x9f, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xe0, 0x1b,
+  0xad, 0xb9, 0x92, 0x29, 0xd0, 0x1a, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21,
+  0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90, 0x11, 0x03,
+  0x04, 0x00, 0x41, 0x30, 0xe0, 0x54, 0x4b, 0xaf, 0x0e, 0x22, 0x18, 0x31,
+  0x40, 0x00, 0x10, 0x04, 0x03, 0x6e, 0xb5, 0xf6, 0x8a, 0x21, 0x82, 0x11,
+  0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0x58, 0x8b, 0xaf, 0x24, 0x22, 0x18,
+  0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x85, 0xb6, 0xf6, 0x8a, 0xa7, 0x82,
+  0xd3, 0x62, 0xab, 0xd4, 0x1a, 0x4d, 0x08, 0x80, 0x0b, 0x1e, 0x9b, 0x25,
+  0x80, 0x89, 0xe1, 0x86, 0x93, 0x69, 0x2d, 0x30, 0x98, 0x65, 0xe8, 0x07,
+  0x7f, 0x08, 0xea, 0xa6, 0xfc, 0x0a, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00,
+  0x40, 0x10, 0x0c, 0x28, 0xdb, 0xfa, 0x2b, 0x96, 0x81, 0xab, 0x11, 0x83,
+  0x03, 0x00, 0x41, 0x30, 0xa0, 0x6e, 0xeb, 0xaf, 0x02, 0xe1, 0x82, 0x61,
+  0x4a, 0xa7, 0x46, 0x0b, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10,
+  0x0c, 0xa8, 0xdd, 0x22, 0x2d, 0x98, 0xa9, 0xab, 0x11, 0x83, 0x03, 0x00,
+  0x41, 0x30, 0xa0, 0x78, 0x8b, 0xb4, 0x02, 0xe1, 0x82, 0x61, 0x2e, 0x78,
+  0xea, 0x8e, 0xa7, 0x6e, 0xa4, 0x86, 0x39, 0x1c, 0x1a, 0xe6, 0x88, 0x61,
+  0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x3e, 0xf0, 0x82,
+  0xad, 0xb7, 0xea, 0xad, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1,
+  0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00,
+  0x10, 0x04, 0x03, 0xee, 0xbc, 0x6e, 0x2b, 0x21, 0x82, 0x11, 0x03, 0x04,
+  0x00, 0x41, 0x30, 0xe0, 0xd0, 0x0b, 0xb7, 0x12, 0x22, 0x18, 0x31, 0x40,
+  0x00, 0x10, 0x04, 0x03, 0x2e, 0xbd, 0x72, 0x2b, 0x21, 0x82, 0x11, 0x03,
+  0x05, 0x00, 0x41, 0x30, 0x58, 0xe2, 0x0b, 0xb7, 0xf2, 0x2a, 0x20, 0xaf,
+  0xd4, 0x32, 0xaf, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x02, 0x98,
+  0x18, 0x6e, 0x20, 0x9b, 0xf4, 0x02, 0x83, 0x59, 0x86, 0x7f, 0x80, 0x89,
+  0xc0, 0xd6, 0xaa, 0xad, 0xe2, 0x33, 0x1c, 0x81, 0x36, 0x6e, 0x45, 0x7c,
+  0xb3, 0x0c, 0x20, 0x31, 0x12, 0x81, 0xbd, 0x55, 0xda, 0xc4, 0xc7, 0x82,
+  0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22,
+  0x88, 0x4f, 0x11, 0xf3, 0xa5, 0xc3, 0x0d, 0x41, 0x7c, 0x81, 0xc1, 0x2c,
+  0x43, 0x48, 0x88, 0x44, 0x60, 0xc3, 0x5d, 0xc1, 0x67, 0x96, 0xe0, 0x24,
+  0xcc, 0xae, 0x88, 0xf8, 0xcc, 0x12, 0x9c, 0xc4, 0x70, 0xc4, 0xdc, 0xdc,
+  0x95, 0xf0, 0xcd, 0x32, 0x90, 0xc4, 0x49, 0x04, 0x46, 0x37, 0x78, 0x15,
+  0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9,
+  0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xf8, 0x97, 0x0e, 0x37, 0x04, 0xfc, 0x05,
+  0x06, 0xb3, 0x0c, 0x25, 0x61, 0x12, 0x81, 0x81, 0xd6, 0x10, 0x9f, 0x59,
+  0x82, 0x93, 0x30, 0x62, 0xb4, 0xe0, 0x33, 0x4b, 0x70, 0x12, 0x03, 0x2d,
+  0x8f, 0x16, 0x12, 0x98, 0x48, 0x10, 0x25, 0x21, 0x98, 0x04, 0xbf, 0x8d,
+  0xc4, 0x05, 0xc3, 0x98, 0x68, 0x99, 0x56, 0x7c, 0x86, 0x23, 0x40, 0xe7,
+  0xb4, 0x88, 0x6f, 0x96, 0x01, 0x25, 0x56, 0x22, 0x30, 0xd4, 0x0a, 0x9d,
+  0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43,
+  0x3e, 0x56, 0x04, 0xf1, 0x29, 0x82, 0xc5, 0x74, 0xb8, 0x21, 0x50, 0x31,
+  0x30, 0x98, 0x65, 0x48, 0x09, 0x95, 0x08, 0x6c, 0x80, 0x2d, 0xf8, 0xcc,
+  0x12, 0xbc, 0x84, 0xb5, 0x16, 0x11, 0x9f, 0x59, 0x82, 0x97, 0x18, 0x8e,
+  0x58, 0x1d, 0xd7, 0x12, 0xbe, 0x59, 0x06, 0x96, 0x78, 0x89, 0xc0, 0x58,
+  0xe7, 0xb5, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29,
+  0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x1b, 0xd3, 0xe1, 0x86,
+  0xa0, 0xc6, 0xc0, 0x60, 0x96, 0xa1, 0x25, 0x5c, 0x22, 0xb0, 0xdb, 0x1a,
+  0xe2, 0x33, 0x4b, 0xf0, 0x12, 0x46, 0xf0, 0x16, 0x7c, 0x66, 0x09, 0x5e,
+  0x62, 0xa0, 0xe5, 0xd1, 0x52, 0x02, 0x53, 0x09, 0xa2, 0x25, 0x04, 0x97,
+  0x60, 0xbb, 0x95, 0xb8, 0x60, 0x98, 0x0b, 0x9e, 0xba, 0xed, 0xa9, 0x43,
+  0xad, 0x61, 0xae, 0x97, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31,
+  0x38, 0x00, 0x10, 0x04, 0x83, 0xaf, 0xcc, 0x6a, 0x8c, 0xbe, 0xc4, 0x6c,
+  0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13,
+  0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x63,
+  0x33, 0x1e, 0x4b, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8,
+  0x36, 0xeb, 0xb1, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80,
+  0x73, 0x33, 0x1f, 0x4b, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c,
+  0x16, 0x3b, 0xeb, 0x31, 0xff, 0x0a, 0xd2, 0xcc, 0xc5, 0xd6, 0x6c, 0x34,
+  0x21, 0x00, 0x2e, 0x78, 0x6c, 0x96, 0x00, 0x26, 0x06, 0x5a, 0x1e, 0xd3,
+  0xd8, 0x07, 0x1f, 0x0e, 0xf4, 0x81, 0x25, 0xfa, 0x41, 0x78, 0x09, 0x1f,
+  0x0e, 0xfc, 0x61, 0xc4, 0xc0, 0x00, 0x40, 0x10, 0x0c, 0x20, 0x3c, 0xcb,
+  0xb1, 0x77, 0x32, 0xfb, 0x80, 0x97, 0xf8, 0x98, 0x10, 0xc8, 0xc7, 0x02,
+  0x79, 0x81, 0x8f, 0x15, 0xff, 0x10, 0x1f, 0x2b, 0x02, 0xf9, 0x58, 0x10,
+  0x12, 0xf0, 0x19, 0x31, 0x30, 0x00, 0x10, 0x04, 0x03, 0xe8, 0xcf, 0xc0,
+  0xac, 0x9e, 0x4c, 0x28, 0xe2, 0x63, 0x81, 0x20, 0x1f, 0x0b, 0x0e, 0xf8,
+  0x5c, 0x60, 0xd0, 0x88, 0x81, 0x03, 0x80, 0x20, 0x18, 0x34, 0xa5, 0x46,
+  0x66, 0x30, 0x96, 0x62, 0x79, 0x16, 0xfc, 0xd8, 0x8f, 0xfd, 0x98, 0x8f,
+  0xed, 0xd9, 0x2c, 0xc1, 0x08, 0x0d, 0x37, 0xf8, 0xd5, 0x9e, 0x81, 0xc1,
+  0x2c, 0x83, 0x4c, 0x8c, 0x50, 0x30, 0x62, 0x60, 0x00, 0x20, 0x08, 0x06,
+  0x50, 0xa9, 0x9d, 0x19, 0x3f, 0x59, 0x90, 0x63, 0xf0, 0x19, 0x31, 0x30,
+  0x00, 0x10, 0x04, 0x03, 0xe8, 0xd4, 0xd2, 0xac, 0x9f, 0x2c, 0xd8, 0x31,
+  0xf8, 0x8c, 0x26, 0xc0, 0xd8, 0x30, 0xdc, 0x10, 0xfc, 0x19, 0x18, 0xcc,
+  0x32, 0xcc, 0x44, 0x4d, 0x04, 0xc3, 0x11, 0xc5, 0x98, 0x0d, 0xdf, 0x19,
+  0xc3, 0x0c, 0x37, 0x04, 0x2e, 0x46, 0x06, 0x35, 0x04, 0x3a, 0x1c, 0x71,
+  0x9c, 0xd9, 0xf0, 0x55, 0x20, 0xe8, 0x25, 0xc3, 0x0c, 0x37, 0x04, 0x31,
+  0x46, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0x40, 0x13, 0x69, 0x11, 0x5c, 0x7d,
+  0x0d, 0x73, 0x2a, 0x35, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x7c,
+  0xb0, 0x06, 0x6a, 0x3f, 0xd6, 0x6a, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20,
+  0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62,
+  0x80, 0x00, 0x20, 0x08, 0x06, 0xdc, 0xad, 0x9d, 0xda, 0x41, 0x04, 0x23,
+  0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xe1, 0x1a, 0xaa, 0x31, 0x44, 0x30,
+  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5c, 0xae, 0xa5, 0x9a, 0x44, 0x04,
+  0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0x84, 0x1b, 0xaa, 0xa5, 0x59,
+  0x40, 0x6b, 0x79, 0x66, 0x6b, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3,
+  0x04, 0x69, 0x31, 0xdc, 0x90, 0xe9, 0x1a, 0x18, 0xcc, 0x32, 0xd8, 0xc4,
+  0x4d, 0x04, 0x45, 0x66, 0xab, 0x06, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00,
+  0x20, 0x08, 0x06, 0xd4, 0xb8, 0xb1, 0xda, 0xd7, 0x67, 0x23, 0x06, 0x07,
+  0x00, 0x82, 0x60, 0x40, 0x91, 0x1b, 0xab, 0x05, 0xc2, 0x05, 0xc3, 0xd4,
+  0x99, 0xc1, 0x1a, 0x5c, 0xf0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
+  0x50, 0xe8, 0x16, 0x6b, 0x62, 0x20, 0x6a, 0x23, 0x06, 0x07, 0x00, 0x82,
+  0x60, 0x40, 0xa5, 0x5b, 0xac, 0x05, 0xc2, 0x05, 0xc3, 0x5c, 0xf0, 0xd4,
+  0x1d, 0x4f, 0x1d, 0x8c, 0x0d, 0x73, 0x65, 0x35, 0xcc, 0x11, 0xc3, 0x1c,
+  0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x7c, 0xed, 0xd6, 0x6b,
+  0x7c, 0xa6, 0x6e, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09,
+  0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20,
+  0x08, 0x06, 0x1c, 0xbd, 0x91, 0x5b, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00,
+  0x82, 0x60, 0xc0, 0xd5, 0x5b, 0xb9, 0x25, 0x44, 0x30, 0x62, 0x80, 0x00,
+  0x20, 0x08, 0x06, 0x9c, 0xbd, 0x99, 0x5b, 0x42, 0x04, 0x23, 0x06, 0x0a,
+  0x00, 0x82, 0x60, 0xb0, 0xf8, 0x5b, 0xb9, 0x99, 0x5a, 0x10, 0x6f, 0xb6,
+  0x36, 0x6f, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3, 0x04, 0x69, 0x31,
+  0xdc, 0x60, 0x07, 0xf6, 0x06, 0x06, 0xb3, 0x0c, 0x38, 0x91, 0x16, 0x81,
+  0xe1, 0x99, 0x9e, 0xc5, 0x67, 0x38, 0x62, 0x0f, 0xf6, 0x8c, 0xf8, 0x66,
+  0x19, 0x72, 0x82, 0x27, 0x02, 0xe3, 0x33, 0x3e, 0x88, 0x8f, 0x05, 0x03,
+  0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10,
+  0x9f, 0x22, 0x40, 0x4e, 0x87, 0x1b, 0x02, 0x7f, 0x03, 0x83, 0x59, 0x06,
+  0x9d, 0xd8, 0x89, 0xc0, 0x06, 0x52, 0x83, 0xcf, 0x2c, 0x01, 0x58, 0xd8,
+  0xa8, 0x11, 0xf1, 0x99, 0x25, 0x00, 0x8b, 0xe1, 0x08, 0x53, 0x20, 0x35,
+  0xe1, 0x9b, 0x65, 0xe8, 0x09, 0xb0, 0x08, 0xec, 0x14, 0x4a, 0x2d, 0x3e,
+  0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f,
+  0x15, 0x41, 0x7c, 0x8a, 0x58, 0x39, 0x1d, 0x6e, 0x08, 0x52, 0x0e, 0x0c,
+  0x66, 0x19, 0x7c, 0xe2, 0x27, 0x02, 0x6b, 0xb5, 0x21, 0x3e, 0xb3, 0x04,
+  0x60, 0x61, 0x04, 0xac, 0xc1, 0x67, 0x96, 0x00, 0x2c, 0x06, 0x5a, 0x1e,
+  0x4d, 0x27, 0xb0, 0x9d, 0x20, 0x7c, 0x42, 0xf8, 0x09, 0xb1, 0xe0, 0x89,
+  0x0b, 0x86, 0xb1, 0x57, 0x9b, 0xb5, 0xf8, 0x0c, 0x47, 0xc8, 0x02, 0xad,
+  0x11, 0xdf, 0x2c, 0x43, 0x58, 0x90, 0x45, 0x60, 0xb5, 0x36, 0x0b, 0xf1,
+  0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c,
+  0xac, 0x08, 0xe2, 0x53, 0x44, 0xce, 0xe9, 0x70, 0x43, 0x70, 0x73, 0x60,
+  0x30, 0xcb, 0x20, 0x16, 0x63, 0x11, 0xd8, 0xd0, 0x6b, 0xf0, 0x99, 0x25,
+  0x40, 0x0b, 0xd3, 0x35, 0x22, 0x3e, 0xb3, 0x04, 0x68, 0x31, 0x1c, 0xd1,
+  0x0b, 0xbb, 0x26, 0x7c, 0xb3, 0x0c, 0x65, 0x81, 0x16, 0x81, 0xf9, 0x02,
+  0xaf, 0xc5, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16,
+  0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x64, 0xa7, 0xc3, 0x0d, 0x81,
+  0xd8, 0x81, 0xc1, 0x2c, 0x83, 0x59, 0x9c, 0x45, 0x60, 0xe4, 0x36, 0xc4,
+  0x67, 0x96, 0x00, 0x2d, 0x8c, 0x48, 0x37, 0xf8, 0xcc, 0x12, 0xa0, 0xc5,
+  0x40, 0xcb, 0xa3, 0x89, 0x05, 0x36, 0x16, 0x84, 0x59, 0x08, 0x67, 0x01,
+  0x1b, 0x64, 0x71, 0xc1, 0x30, 0x17, 0x3c, 0x75, 0xdb, 0x53, 0x57, 0x6b,
+  0xc3, 0x9c, 0x7a, 0x0d, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70,
+  0x00, 0x20, 0x08, 0x06, 0x9f, 0xdc, 0x89, 0x5d, 0xc8, 0xbd, 0xdd, 0x68,
+  0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10,
+  0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x97, 0x77,
+  0x69, 0x97, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x7a,
+  0xa7, 0x76, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xb7,
+  0x77, 0x6b, 0x97, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c,
+  0xa3, 0xa7, 0x76, 0x2b, 0x17, 0xd8, 0xdd, 0xce, 0xe1, 0xdd, 0x68, 0x42,
+  0x00, 0x5c, 0xf0, 0xd8, 0x2c, 0x41, 0x5a, 0x0c, 0xb4, 0x3c, 0xa6, 0x41,
+  0x13, 0x78, 0x1e, 0xcc, 0x04, 0x4b, 0xd8, 0x84, 0x80, 0x16, 0x78, 0x1e,
+  0xdc, 0xc4, 0x2c, 0x83, 0x5a, 0xb0, 0xc5, 0x3e, 0x0c, 0x47, 0x80, 0x44,
+  0xcf, 0x0d, 0xdf, 0x85, 0xc4, 0x30, 0xc3, 0x0d, 0x01, 0xca, 0x91, 0x41,
+  0x0d, 0x81, 0x0e, 0x47, 0x84, 0x44, 0xd8, 0x0d, 0x5f, 0x05, 0x82, 0xde,
+  0x48, 0x0c, 0x33, 0xdc, 0x10, 0xac, 0x1c, 0x19, 0x54, 0x30, 0xe8, 0x2c,
+  0xc3, 0x5a, 0x80, 0x46, 0x70, 0xef, 0x36, 0xcc, 0x91, 0xd8, 0x30, 0x23,
+  0x06, 0x07, 0x00, 0x82, 0x60, 0xf0, 0xa9, 0x9e, 0xde, 0xe5, 0xdc, 0xe9,
+  0x8d, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68,
+  0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70,
+  0xb1, 0x17, 0x7a, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01,
+  0x27, 0x7b, 0xa2, 0xc7, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
+  0x70, 0xb3, 0x37, 0x7a, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82,
+  0xc1, 0xb2, 0x7b, 0xa2, 0x37, 0x76, 0x81, 0xeb, 0xcd, 0x1d, 0xec, 0x8d,
+  0x26, 0x04, 0xc0, 0x05, 0x8f, 0xcd, 0x12, 0x80, 0xc6, 0x70, 0xc3, 0x4c,
+  0xd0, 0x1e, 0x18, 0xcc, 0x32, 0xb4, 0x85, 0x5b, 0x04, 0xe5, 0x73, 0xa5,
+  0x07, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x54, 0xef,
+  0x99, 0x9e, 0x4e, 0xdc, 0xdd, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50,
+  0xbe, 0x67, 0x7a, 0x81, 0x70, 0xc1, 0x30, 0x15, 0x76, 0xaa, 0x07, 0x17,
+  0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x94, 0xf8, 0xad, 0x1e,
+  0x4f, 0xf0, 0xdd, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xe3, 0xb7,
+  0x7a, 0x81, 0x70, 0xc1, 0x30, 0x17, 0x3c, 0x75, 0xc7, 0x53, 0xa7, 0x72,
+  0xc3, 0xdc, 0x8f, 0x0d, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70,
+  0x00, 0x20, 0x08, 0x06, 0xdf, 0xf9, 0xdd, 0x9e, 0xdd, 0x91, 0xdf, 0x68,
+  0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10,
+  0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xe7, 0x7e,
+  0xbe, 0x97, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0xef,
+  0xf7, 0x7b, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x07,
+  0x7f, 0xe0, 0x97, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c,
+  0xf8, 0xf7, 0x7b, 0xa0, 0x17, 0xac, 0x1f, 0xec, 0xb5, 0xdf, 0x68, 0x42,
+  0x00, 0x5c, 0xf0, 0xd8, 0x2c, 0x01, 0x68, 0x0c, 0x37, 0xc0, 0x05, 0xfc,
+  0x81, 0xc1, 0x2c, 0xc3, 0x5b, 0x80, 0x46, 0x60, 0x72, 0x47, 0x77, 0xf1,
+  0x19, 0x8e, 0xb0, 0x8b, 0xba, 0x23, 0xbe, 0x59, 0x06, 0xb8, 0x98, 0x8b,
+  0xc0, 0xec, 0xee, 0x2e, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9,
+  0xe0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0xfd, 0xd3,
+  0xe1, 0x86, 0x00, 0xff, 0xc0, 0x60, 0x96, 0x21, 0x2e, 0xe4, 0x22, 0xb0,
+  0xc1, 0xef, 0xe0, 0x33, 0x4b, 0x70, 0x17, 0xd6, 0x77, 0x44, 0x7c, 0x66,
+  0x09, 0xee, 0x62, 0x38, 0x22, 0x34, 0xfc, 0x4e, 0xf8, 0x66, 0x19, 0xe8,
+  0xe2, 0x2e, 0x02, 0x13, 0x8d, 0xbf, 0x8b, 0x8f, 0x05, 0x0e, 0x7d, 0x2e,
+  0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22,
+  0x4a, 0x30, 0xd0, 0xe1, 0x86, 0x60, 0x04, 0x03, 0x30, 0x98, 0x65, 0xa8,
+  0x0b, 0xbb, 0x08, 0xec, 0xf4, 0x86, 0xf8, 0xcc, 0x12, 0xdc, 0x85, 0x11,
+  0xaa, 0x07, 0x9f, 0x59, 0x82, 0xbb, 0x18, 0x68, 0x79, 0xb4, 0xb8, 0xc0,
+  0xe4, 0x82, 0xa8, 0x0b, 0xc1, 0x2e, 0x70, 0x66, 0x2e, 0x2e, 0x18, 0xc6,
+  0x52, 0xaf, 0xf5, 0xe2, 0x33, 0x1c, 0xc1, 0x1a, 0xae, 0x47, 0x7c, 0xb3,
+  0x0c, 0x78, 0xb1, 0x17, 0x81, 0xbd, 0x5e, 0x6b, 0xc4, 0xc7, 0x82, 0x81,
+  0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88,
+  0x4f, 0x11, 0x33, 0x18, 0xe8, 0x70, 0x43, 0x10, 0x83, 0x01, 0x18, 0xcc,
+  0x32, 0xe4, 0x85, 0x5e, 0x04, 0x36, 0xdc, 0x1e, 0x7c, 0x66, 0x09, 0xfe,
+  0xc2, 0x68, 0x8f, 0x88, 0xcf, 0x2c, 0xc1, 0x5f, 0x0c, 0x47, 0xdc, 0x46,
+  0xed, 0x09, 0xdf, 0x2c, 0x03, 0x5f, 0xfc, 0x45, 0x60, 0xb8, 0x61, 0x7b,
+  0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91,
+  0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84, 0x0f, 0x06, 0x3a, 0xdc, 0x10, 0xf0,
+  0x60, 0x00, 0x06, 0xb3, 0x0c, 0x7d, 0xe1, 0x17, 0x81, 0xf9, 0xde, 0x10,
+  0x9f, 0x59, 0x82, 0xbf, 0x30, 0x62, 0xfc, 0xe0, 0x33, 0x4b, 0xf0, 0x17,
+  0x03, 0x2d, 0x8f, 0x96, 0x17, 0x98, 0x5e, 0x10, 0x7d, 0x21, 0xf8, 0x05,
+  0xe9, 0xec, 0xc5, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x6d, 0x4f, 0xdd, 0xeb,
+  0x0d, 0x73, 0xe4, 0x36, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1,
+  0x01, 0x80, 0x20, 0x18, 0x7c, 0x6c, 0x18, 0xf0, 0x60, 0xb0, 0x7f, 0x69,
+  0x18, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2,
+  0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
+  0x70, 0x73, 0x18, 0x8c, 0x61, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
+  0x20, 0x18, 0x70, 0x74, 0x18, 0x90, 0x61, 0x90, 0x10, 0xc1, 0x88, 0x01,
+  0x02, 0x80, 0x20, 0x18, 0x70, 0x75, 0x18, 0x94, 0x61, 0x90, 0x10, 0xc1,
+  0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x7d, 0x18, 0x90, 0x61, 0x50,
+  0x82, 0x41, 0x00, 0x87, 0x41, 0x0d, 0x06, 0x72, 0x18, 0x8c, 0x26, 0x04,
+  0xc0, 0x05, 0x8f, 0xcd, 0x12, 0x80, 0xc6, 0x40, 0xcb, 0x63, 0x1a, 0x6b,
+  0xc1, 0x86, 0x82, 0x5a, 0xb0, 0x44, 0x5b, 0x08, 0x7f, 0xc1, 0x86, 0x82,
+  0x5b, 0x98, 0x7e, 0xd8, 0x60, 0x00, 0x9f, 0x59, 0x86, 0xd0, 0x18, 0x0d,
+  0xfb, 0x18, 0x8e, 0x08, 0x70, 0x30, 0x18, 0xbe, 0x13, 0x86, 0x19, 0x6e,
+  0x08, 0x46, 0x30, 0x20, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0xe0, 0x0f, 0x1e,
+  0x0c, 0x86, 0xaf, 0x02, 0x41, 0xcf, 0x3f, 0x86, 0x19, 0x6e, 0x08, 0x4c,
+  0x30, 0x20, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0x10, 0x8d, 0xdb, 0x08, 0x4e,
+  0xfd, 0x86, 0xb9, 0x7f, 0x1b, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
+  0xbe, 0x52, 0x0c, 0xea, 0x30, 0xa0, 0xc1, 0x40, 0x14, 0x83, 0xd1, 0x84,
+  0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86,
+  0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x8e, 0x15, 0x03,
+  0x3e, 0x0c, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xae,
+  0x15, 0x83, 0x3e, 0x0c, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
+  0x03, 0xce, 0x15, 0x03, 0x3f, 0x0c, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00,
+  0x10, 0x04, 0x83, 0xc5, 0x16, 0x83, 0x3e, 0x0c, 0x7c, 0x30, 0x08, 0x52,
+  0x31, 0x70, 0xc3, 0x60, 0x15, 0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1,
+  0x59, 0x82, 0xdb, 0x18, 0x6e, 0x70, 0x91, 0x57, 0x0c, 0xc0, 0x60, 0x96,
+  0x81, 0x34, 0x4a, 0x23, 0xa8, 0x1c, 0x0c, 0x40, 0x31, 0x80, 0x0b, 0x9e,
+  0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x0a, 0x17, 0x83, 0x50, 0x0c,
+  0x36, 0x39, 0x0c, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0xca, 0xc5,
+  0x20, 0x14, 0x83, 0x40, 0xb8, 0x60, 0x98, 0xe2, 0xc1, 0xa0, 0x14, 0x03,
+  0xb8, 0xe0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0x7a, 0x31,
+  0x30, 0xc5, 0xe0, 0x46, 0xee, 0x30, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04,
+  0x03, 0xca, 0x17, 0x03, 0x53, 0x0c, 0x02, 0xe1, 0x82, 0x61, 0x2e, 0x78,
+  0xea, 0x8e, 0xa7, 0xae, 0x04, 0x83, 0x61, 0x4e, 0xe7, 0x86, 0x39, 0x62,
+  0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x4f, 0x1c,
+  0x03, 0x59, 0x0c, 0xe2, 0x30, 0xf8, 0xc5, 0x60, 0x34, 0x21, 0x00, 0x46,
+  0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44,
+  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x4b, 0xc7, 0x20, 0x17, 0x83,
+  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x53, 0xc7, 0x40,
+  0x17, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x5b,
+  0xc7, 0x60, 0x17, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1,
+  0x60, 0x99, 0xc7, 0x40, 0x17, 0x83, 0x3d, 0x0c, 0x02, 0x73, 0x0c, 0x56,
+  0x31, 0x40, 0xc7, 0x60, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6c, 0x96, 0xe0,
+  0x36, 0x86, 0x1b, 0xd6, 0x64, 0x1d, 0x03, 0x30, 0x98, 0x65, 0x30, 0x8d,
+  0xdb, 0x08, 0xac, 0x0d, 0x83, 0x37, 0x0c, 0xe2, 0x33, 0x1c, 0x71, 0x07,
+  0x70, 0x18, 0x10, 0xdf, 0x2c, 0xc3, 0x69, 0xa8, 0x46, 0x60, 0x71, 0x18,
+  0xe0, 0x41, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65,
+  0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x51, 0x8f, 0x81, 0x0e, 0x37,
+  0x04, 0xf3, 0x18, 0x80, 0xc1, 0x2c, 0x03, 0x6a, 0xa4, 0x46, 0x60, 0x43,
+  0x1e, 0x06, 0xf0, 0x99, 0x25, 0x70, 0x0d, 0xc3, 0xc3, 0x80, 0x88, 0xcf,
+  0x2c, 0x81, 0x6b, 0x0c, 0x47, 0x88, 0x42, 0x1e, 0x06, 0xc2, 0x37, 0xcb,
+  0xb0, 0x1a, 0xae, 0x11, 0xd8, 0x28, 0xe8, 0x61, 0x10, 0x1f, 0x0b, 0x1c,
+  0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20,
+  0x3e, 0x45, 0x80, 0x64, 0xa0, 0xc3, 0x0d, 0x81, 0x3f, 0x06, 0x60, 0x30,
+  0xcb, 0xc0, 0x1a, 0xad, 0x11, 0x98, 0x28, 0x06, 0x43, 0x7c, 0x66, 0x09,
+  0x5c, 0xc3, 0x88, 0x52, 0x0c, 0xe0, 0x33, 0x4b, 0xe0, 0x1a, 0x03, 0x2d,
+  0x8f, 0x86, 0x1a, 0x58, 0x6a, 0x10, 0xac, 0x21, 0xb4, 0x86, 0x4e, 0xa8,
+  0xc6, 0x05, 0xc3, 0x18, 0x29, 0x06, 0xa8, 0x18, 0xc4, 0x67, 0x38, 0xe2,
+  0x54, 0x52, 0x31, 0x20, 0xbe, 0x59, 0x86, 0xd7, 0x90, 0x8d, 0xc0, 0x54,
+  0x31, 0x40, 0x95, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78,
+  0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xc2, 0x25, 0x03, 0x1d,
+  0x6e, 0x08, 0x58, 0x32, 0x00, 0x83, 0x59, 0x06, 0xd8, 0x88, 0x8d, 0xc0,
+  0x06, 0x59, 0x0c, 0xe0, 0x33, 0x4b, 0x60, 0x1b, 0xf6, 0x8a, 0x01, 0x11,
+  0x9f, 0x59, 0x02, 0xdb, 0x18, 0x8e, 0x90, 0x15, 0x58, 0x0c, 0x84, 0x6f,
+  0x96, 0x61, 0x36, 0x6c, 0x23, 0xb0, 0x59, 0x89, 0xc5, 0x20, 0x3e, 0x16,
+  0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15,
+  0x41, 0x7c, 0x8a, 0xc8, 0xc9, 0x40, 0x87, 0x1b, 0x82, 0x9b, 0x0c, 0xc0,
+  0x60, 0x96, 0x81, 0x36, 0x6a, 0x23, 0xb0, 0x5c, 0x0c, 0x86, 0xf8, 0xcc,
+  0x12, 0xd8, 0x86, 0x11, 0xbe, 0x18, 0xc0, 0x67, 0x96, 0xc0, 0x36, 0x06,
+  0x5a, 0x1e, 0x0d, 0x36, 0xb0, 0xd8, 0x20, 0x68, 0x43, 0xa8, 0x0d, 0xbd,
+  0x92, 0x8d, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e, 0x3a, 0x55, 0x0c,
+  0x86, 0xb9, 0xdf, 0x1b, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0,
+  0x00, 0x40, 0x10, 0x0c, 0xbe, 0xb3, 0x0c, 0x6e, 0x32, 0xb0, 0xc7, 0x80,
+  0x2c, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41,
+  0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04,
+  0x03, 0xce, 0x2d, 0x03, 0x9f, 0x0c, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00,
+  0x10, 0x04, 0x03, 0xee, 0x2d, 0x83, 0x9f, 0x0c, 0x12, 0x22, 0x18, 0x31,
+  0x40, 0x00, 0x10, 0x04, 0x03, 0x0e, 0x2e, 0x03, 0xb0, 0x0c, 0x12, 0x22,
+  0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x05, 0x2f, 0x83, 0x9f, 0x0c,
+  0x40, 0x32, 0x08, 0xd6, 0x32, 0x80, 0xc9, 0xa0, 0x2d, 0x83, 0xd1, 0x84,
+  0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x82, 0xdb, 0x18, 0x68, 0x79, 0x4c, 0x43,
+  0x34, 0xc4, 0x54, 0x08, 0x0d, 0x96, 0x20, 0x0d, 0xc1, 0x36, 0xc4, 0x54,
+  0x28, 0x0d, 0xab, 0x97, 0x93, 0x0c, 0xe0, 0x33, 0xcb, 0x80, 0x1b, 0xba,
+  0x11, 0x2f, 0xc3, 0x11, 0xc1, 0x4c, 0x06, 0xc3, 0x77, 0xc2, 0x30, 0xc3,
+  0x0d, 0x81, 0x3f, 0x06, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0xf7, 0x72,
+  0x93, 0xc1, 0xf0, 0x55, 0x20, 0xe8, 0xe5, 0xcb, 0x30, 0xc3, 0x0d, 0x41,
+  0x48, 0x06, 0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c, 0xb9, 0xe1, 0x1e, 0xc1,
+  0x95, 0x63, 0x30, 0xcc, 0xe9, 0xdf, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82,
+  0x60, 0xf0, 0x81, 0x66, 0x00, 0x97, 0xc1, 0x4b, 0x06, 0x7d, 0x19, 0x8c,
+  0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02,
+  0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0xa7,
+  0x19, 0xdc, 0x65, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
+  0x70, 0xa8, 0x19, 0xe0, 0x65, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
+  0x20, 0x18, 0x70, 0xa9, 0x19, 0xe4, 0x65, 0x20, 0x11, 0xc1, 0x88, 0x81,
+  0x02, 0x80, 0x20, 0x18, 0x2c, 0xb1, 0x19, 0xe0, 0x65, 0x90, 0x93, 0x41,
+  0x40, 0x9a, 0x41, 0x5a, 0x06, 0xa6, 0x19, 0x8c, 0x26, 0x04, 0xc0, 0x05,
+  0x8f, 0xcd, 0x12, 0xb8, 0xc7, 0x70, 0x43, 0xca, 0xa8, 0x66, 0x00, 0x06,
+  0xb3, 0x0c, 0xbb, 0xc1, 0x1b, 0x41, 0xd1, 0x64, 0xb0, 0x97, 0x01, 0x5c,
+  0xf0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xb3, 0x19, 0xf0,
+  0x65, 0xb0, 0xb5, 0x65, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x14,
+  0x6d, 0x06, 0x7c, 0x19, 0x04, 0xc2, 0x05, 0xc3, 0xd4, 0x4d, 0x06, 0xa0,
+  0x19, 0xc0, 0x05, 0x4f, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x85,
+  0x9b, 0x41, 0x68, 0x06, 0x32, 0x23, 0x97, 0xc1, 0x88, 0xc1, 0x01, 0x80,
+  0x20, 0x18, 0x50, 0xb9, 0x19, 0x84, 0x66, 0x10, 0x08, 0x17, 0x0c, 0x73,
+  0xc1, 0x53, 0x77, 0x3c, 0x75, 0x20, 0x19, 0x0c, 0x73, 0x35, 0x18, 0x0c,
+  0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
+  0x5f, 0x6f, 0x06, 0xad, 0x19, 0xb0, 0x65, 0xa0, 0x9b, 0xc1, 0x68, 0x42,
+  0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43,
+  0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x47, 0x9e, 0x01,
+  0x6d, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x57,
+  0x9e, 0x41, 0x6d, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
+  0x01, 0x67, 0x9e, 0x81, 0x6d, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00,
+  0x08, 0x82, 0xc1, 0xe2, 0x9e, 0x41, 0x6d, 0x06, 0x76, 0x19, 0x04, 0xe1,
+  0x19, 0x98, 0x66, 0x30, 0x9e, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd8,
+  0x2c, 0x81, 0x7b, 0x0c, 0x37, 0x98, 0x8d, 0x79, 0x06, 0x60, 0x30, 0xcb,
+  0xd0, 0x1b, 0xee, 0x11, 0x18, 0x5a, 0x06, 0x6a, 0x19, 0xc4, 0x67, 0x38,
+  0xe2, 0x0e, 0xd6, 0x32, 0x20, 0xbe, 0x59, 0x06, 0xdf, 0x08, 0x8f, 0xc0,
+  0xd8, 0x32, 0xc0, 0x83, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e,
+  0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x02, 0x3e, 0x03,
+  0x1d, 0x6e, 0x08, 0xdc, 0x33, 0x00, 0x83, 0x59, 0x86, 0xdf, 0x00, 0x8f,
+  0xc0, 0x06, 0xba, 0x0c, 0xe0, 0x33, 0x4b, 0x50, 0x1e, 0x36, 0x97, 0x01,
+  0x11, 0x9f, 0x59, 0x82, 0xf2, 0x18, 0x8e, 0x10, 0x05, 0xba, 0x0c, 0x84,
+  0x6f, 0x96, 0x41, 0x3c, 0xca, 0x23, 0xb0, 0x51, 0xa8, 0xcb, 0x20, 0x3e,
+  0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f,
+  0x15, 0x41, 0x7c, 0x8a, 0xd8, 0xcf, 0x40, 0x87, 0x1b, 0x82, 0xfc, 0x0c,
+  0xc0, 0x60, 0x96, 0x61, 0x3c, 0xc8, 0x23, 0xb0, 0xbe, 0x0c, 0x86, 0xf8,
+  0xcc, 0x12, 0x94, 0x87, 0x11, 0xa0, 0x19, 0xc0, 0x67, 0x96, 0xa0, 0x3c,
+  0x06, 0x5a, 0x1e, 0xed, 0x37, 0x30, 0xf0, 0x20, 0xc6, 0x43, 0x20, 0x0f,
+  0x9d, 0x08, 0x8f, 0x0b, 0x86, 0xb1, 0xbf, 0x0c, 0x46, 0x33, 0x88, 0xcf,
+  0x70, 0x84, 0xe8, 0x90, 0x66, 0x40, 0x7c, 0xb3, 0x0c, 0xe6, 0x91, 0x1e,
+  0x81, 0x95, 0x66, 0x30, 0x3a, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3,
+  0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0x8a,
+  0x06, 0x3a, 0xdc, 0x10, 0x9c, 0x68, 0x00, 0x06, 0xb3, 0x0c, 0xe7, 0x81,
+  0x1e, 0x81, 0x0d, 0xad, 0x19, 0xc0, 0x67, 0x96, 0xa0, 0x3d, 0x4c, 0x35,
+  0x03, 0x22, 0x3e, 0xb3, 0x04, 0xed, 0x31, 0x1c, 0xd1, 0x3a, 0xab, 0x19,
+  0x08, 0xdf, 0x2c, 0x83, 0x7a, 0xb4, 0x47, 0x60, 0xae, 0xc3, 0x9a, 0x41,
+  0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24,
+  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x41, 0xa3, 0x81, 0x0e, 0x37, 0x04, 0x32,
+  0x1a, 0x80, 0xc1, 0x2c, 0xc3, 0x7a, 0xb0, 0x47, 0x60, 0xb4, 0x19, 0x0c,
+  0xf1, 0x99, 0x25, 0x68, 0x0f, 0x23, 0x72, 0x33, 0x80, 0xcf, 0x2c, 0x41,
+  0x7b, 0x0c, 0xb4, 0x3c, 0xda, 0x79, 0x60, 0xe8, 0x41, 0xac, 0x87, 0xc0,
+  0x1e, 0x70, 0x97, 0x1e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0xb7, 0x3d, 0x75,
+  0xa5, 0x19, 0x0c, 0x73, 0xba, 0x18, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c,
+  0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x9f, 0x98, 0x06, 0x32, 0x1a,
+  0xc4, 0x67, 0xf0, 0xa3, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1,
+  0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20,
+  0x00, 0x08, 0x82, 0x01, 0x97, 0xa6, 0x41, 0x8e, 0x06, 0x09, 0x11, 0x8c,
+  0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xa7, 0xa6, 0x81, 0x8e, 0x06, 0x09,
+  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xb7, 0xa6, 0xc1, 0x8e,
+  0x06, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x32, 0xa7,
+  0x81, 0x8e, 0x06, 0xfb, 0x19, 0x04, 0x66, 0x1a, 0xac, 0x68, 0x80, 0xa6,
+  0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd8, 0x2c, 0x81, 0x7b, 0x0c, 0xb4,
+  0x3c, 0xa6, 0x91, 0x1b, 0x38, 0x2c, 0xe0, 0x06, 0x4b, 0xec, 0x86, 0xd0,
+  0x1e, 0x38, 0x2c, 0xf0, 0xc6, 0x2c, 0xc3, 0x7b, 0xc4, 0xc7, 0xfa, 0x0c,
+  0x47, 0xbc, 0x4f, 0x8b, 0x06, 0xc3, 0x77, 0xf0, 0x33, 0xcc, 0x70, 0x43,
+  0x80, 0x9f, 0x01, 0x19, 0xd4, 0x10, 0xe8, 0x70, 0x84, 0xfc, 0xc4, 0x68,
+  0x30, 0x7c, 0x15, 0x08, 0x7a, 0xf4, 0x33, 0xcc, 0x70, 0x43, 0xb0, 0x9f,
+  0x01, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0x03, 0x7c, 0x94, 0x48, 0x70, 0xbf,
+  0x19, 0x0c, 0x73, 0xf4, 0x18, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08,
+  0x06, 0x9f, 0x9e, 0x06, 0x6a, 0x1a, 0xa4, 0x68, 0x70, 0xa7, 0xc1, 0x68,
+  0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10,
+  0x43, 0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x17, 0xaa,
+  0x41, 0x9c, 0x06, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01,
+  0x27, 0xaa, 0x81, 0x9c, 0x06, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
+  0x82, 0x01, 0x37, 0xaa, 0xc1, 0x9c, 0x06, 0x12, 0x11, 0x8c, 0x18, 0x28,
+  0x00, 0x08, 0x82, 0xc1, 0xb2, 0xaa, 0x81, 0x9c, 0x06, 0x33, 0x1a, 0x04,
+  0x7e, 0x1a, 0x8c, 0x69, 0x00, 0xaa, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0,
+  0xd8, 0x2c, 0x41, 0x89, 0x0c, 0x37, 0x8c, 0x10, 0xa9, 0x06, 0x60, 0x30,
+  0xcb, 0x20, 0x1f, 0xf3, 0x11, 0x94, 0x8b, 0x06, 0x75, 0x1a, 0xc0, 0x05,
+  0x4f, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0xd5, 0xaa, 0x81, 0x9d,
+  0x06, 0x29, 0x74, 0xa6, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50,
+  0xae, 0x1a, 0xd8, 0x69, 0x10, 0x08, 0x17, 0x0c, 0x53, 0x31, 0x1a, 0xe8,
+  0x69, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x94,
+  0xac, 0x06, 0x7b, 0x1a, 0xb4, 0x10, 0x9b, 0x06, 0x23, 0x06, 0x07, 0x00,
+  0x82, 0x60, 0x40, 0xcd, 0x6a, 0xb0, 0xa7, 0x41, 0x20, 0x5c, 0x30, 0xcc,
+  0x05, 0x4f, 0xdd, 0xf1, 0xd4, 0xe9, 0x67, 0x30, 0xcc, 0xbd, 0x64, 0x30,
+  0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
+  0x7c, 0xb7, 0x1a, 0x9c, 0x6a, 0x60, 0xa6, 0x01, 0xad, 0x06, 0xa3, 0x09,
+  0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c,
+  0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x9c, 0xaf, 0x06,
+  0xae, 0x1a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdc,
+  0xaf, 0x06, 0xaf, 0x1a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
+  0x06, 0x1c, 0xb8, 0x06, 0xb0, 0x1a, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00,
+  0x20, 0x08, 0x06, 0x0b, 0xba, 0x06, 0xaf, 0x1a, 0xc0, 0x69, 0x10, 0xec,
+  0x6a, 0x00, 0xaa, 0x41, 0xaf, 0x06, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63,
+  0xb3, 0x04, 0x25, 0x32, 0xdc, 0x00, 0x46, 0xe0, 0x1a, 0x80, 0xc1, 0x2c,
+  0x03, 0x7d, 0x94, 0x48, 0x60, 0x62, 0x1a, 0x90, 0x69, 0x10, 0x9f, 0xe1,
+  0x88, 0x32, 0x2a, 0xd3, 0x80, 0xf8, 0x66, 0x19, 0xea, 0x03, 0x3f, 0x02,
+  0x33, 0xd3, 0xc0, 0x8c, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9,
+  0xe0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x75, 0x0d,
+  0x74, 0xb8, 0x21, 0x40, 0xd7, 0x00, 0x0c, 0x66, 0x19, 0xec, 0xe3, 0x3e,
+  0x02, 0x1b, 0xdc, 0x34, 0x80, 0xcf, 0x2c, 0x01, 0x7f, 0x58, 0x9b, 0x06,
+  0x44, 0x7c, 0x66, 0x09, 0xf8, 0x63, 0x38, 0x02, 0x8e, 0xdc, 0x34, 0x10,
+  0xbe, 0x59, 0x86, 0xfc, 0xe0, 0x8f, 0xc0, 0xe2, 0xe8, 0x4d, 0x83, 0xf8,
+  0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x82, 0x48, 0x3e,
+  0x56, 0x04, 0xf1, 0x29, 0xa2, 0x5e, 0x03, 0x1d, 0x6e, 0x08, 0xe6, 0x35,
+  0x00, 0x83, 0x59, 0x06, 0xfd, 0xd8, 0x8f, 0xc0, 0xee, 0x34, 0x18, 0xe2,
+  0x33, 0x4b, 0xc0, 0x1f, 0x46, 0xe8, 0x69, 0x00, 0x9f, 0x59, 0x02, 0xfe,
+  0x18, 0x68, 0x79, 0x34, 0xfb, 0xc0, 0xee, 0x83, 0xd0, 0x0f, 0x61, 0x3f,
+  0xec, 0x31, 0xc0, 0x8f, 0x0b, 0x86, 0xb1, 0x3c, 0x0d, 0xfa, 0x34, 0x88,
+  0xcf, 0x70, 0x44, 0x1f, 0xf9, 0x69, 0x40, 0x7c, 0xb3, 0x0c, 0xfd, 0x01,
+  0x22, 0x81, 0xfd, 0x69, 0xe0, 0x47, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05,
+  0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4,
+  0xc8, 0x06, 0x3a, 0xdc, 0x10, 0x84, 0x6c, 0x00, 0x06, 0xb3, 0x0c, 0xfe,
+  0xf1, 0x1f, 0x81, 0x0d, 0xa7, 0x1a, 0xc0, 0x67, 0x96, 0x80, 0x44, 0x8c,
+  0x54, 0x03, 0x22, 0x3e, 0xb3, 0x04, 0x24, 0x32, 0x1c, 0x81, 0x4a, 0xa5,
+  0x1a, 0x08, 0xdf, 0x2c, 0x43, 0x88, 0x90, 0x48, 0x60, 0xa9, 0x64, 0xaa,
+  0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41,
+  0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xe1, 0xb2, 0x81, 0x0e, 0x37, 0x04,
+  0x2c, 0x1b, 0x80, 0xc1, 0x2c, 0x83, 0x88, 0x8c, 0x48, 0x60, 0xae, 0x1a,
+  0x0c, 0xf1, 0x99, 0x25, 0x20, 0x11, 0x23, 0x66, 0x35, 0x80, 0xcf, 0x2c,
+  0x01, 0x89, 0x0c, 0xb4, 0x3c, 0x9a, 0x7f, 0x60, 0xff, 0x41, 0x88, 0x88,
+  0x30, 0x22, 0x66, 0x19, 0x80, 0xc8, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x6d,
+  0x4f, 0xdd, 0x9f, 0x06, 0xc3, 0x1c, 0x6d, 0x06, 0xc3, 0x1c, 0x31, 0xcc,
+  0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0xc7, 0xb3, 0x01,
+  0xcb, 0x06, 0xeb, 0x1a, 0xe4, 0x6c, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09,
+  0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23,
+  0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0x8d, 0x6d, 0x30, 0xb3, 0x41, 0x42,
+  0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0x91, 0x6d, 0x40, 0xb3,
+  0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0x95, 0x6d,
+  0x50, 0xb3, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0,
+  0xb4, 0x6d, 0x40, 0xb3, 0x41, 0xbd, 0x06, 0x01, 0xd8, 0x06, 0x25, 0x1b,
+  0x88, 0x6d, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x36, 0x4b, 0x50, 0x22,
+  0x03, 0x2d, 0x8f, 0x69, 0xc0, 0x07, 0x8b, 0x0b, 0xef, 0xc1, 0x12, 0xf2,
+  0x21, 0x90, 0x08, 0x8b, 0x0b, 0xf3, 0x31, 0xcb, 0x60, 0x22, 0x28, 0x52,
+  0x4e, 0xc3, 0x11, 0xea, 0x74, 0xb2, 0xc1, 0xf0, 0xdd, 0x3a, 0x0d, 0x33,
+  0xdc, 0x10, 0xc8, 0x6b, 0x40, 0x06, 0x35, 0x04, 0x3a, 0x1c, 0xc1, 0x4e,
+  0x2b, 0x1b, 0x0c, 0x5f, 0x05, 0x82, 0x9e, 0x3b, 0x0d, 0x33, 0xdc, 0x10,
+  0xd4, 0x6b, 0x40, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0x70, 0x22, 0x3c, 0x12,
+  0x5c, 0xae, 0x06, 0xc3, 0x9c, 0x7b, 0x06, 0xc3, 0x8c, 0x18, 0x1c, 0x00,
+  0x08, 0x82, 0xc1, 0x47, 0xb7, 0x01, 0xd9, 0x06, 0x23, 0x1b, 0xc4, 0x6d,
+  0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3,
+  0x09, 0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0,
+  0xed, 0x6d, 0xb0, 0xb6, 0xc1, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
+  0x60, 0xc0, 0xf1, 0x6d, 0xc0, 0xb6, 0x01, 0x43, 0x04, 0x23, 0x06, 0x08,
+  0x00, 0x82, 0x60, 0xc0, 0xf5, 0x6d, 0xd0, 0xb6, 0x81, 0x44, 0x04, 0x23,
+  0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0x94, 0x6e, 0xc0, 0xb6, 0x41, 0xcb,
+  0x06, 0x01, 0xde, 0x06, 0x3d, 0x1b, 0xe8, 0x6d, 0x30, 0x9a, 0x10, 0x00,
+  0x17, 0x3c, 0x36, 0x4b, 0xc0, 0x23, 0xc3, 0x0d, 0xfd, 0xe4, 0xb7, 0x01,
+  0x18, 0xcc, 0x32, 0xa4, 0x88, 0x8a, 0x04, 0x85, 0xb2, 0xc1, 0xdb, 0x06,
+  0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0x9d, 0x6e,
+  0x00, 0xb7, 0x01, 0x49, 0x85, 0x6d, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08,
+  0x06, 0x14, 0xea, 0x06, 0x70, 0x1b, 0x04, 0xc2, 0x05, 0xc3, 0xd4, 0xca,
+  0x06, 0x74, 0x1b, 0xc0, 0x05, 0x4f, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82,
+  0x01, 0xc5, 0xba, 0x41, 0xdd, 0x06, 0x27, 0x65, 0xb6, 0xc1, 0x88, 0xc1,
+  0x01, 0x80, 0x20, 0x18, 0x50, 0xad, 0x1b, 0xd4, 0x6d, 0x10, 0x08, 0x17,
+  0x0c, 0x73, 0xc1, 0x53, 0x77, 0x3c, 0x75, 0xf4, 0x1a, 0x0c, 0x73, 0x29,
+  0x1a, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20,
+  0x08, 0x06, 0x5f, 0xec, 0x06, 0xa1, 0x1b, 0x80, 0x6d, 0xe0, 0xba, 0xc1,
+  0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26,
+  0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x87,
+  0xbb, 0x01, 0xea, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
+  0x01, 0x97, 0xbb, 0x41, 0xea, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00,
+  0x08, 0x82, 0x01, 0xa7, 0xbb, 0x81, 0xea, 0x06, 0x09, 0x11, 0x8c, 0x18,
+  0x28, 0x00, 0x08, 0x82, 0xc1, 0x22, 0xbe, 0x41, 0xea, 0x06, 0x6a, 0x1b,
+  0x04, 0xb5, 0x1b, 0xe8, 0x6d, 0x70, 0xbb, 0xc1, 0x68, 0x42, 0x00, 0x5c,
+  0xf0, 0xd8, 0x2c, 0x01, 0x8f, 0x0c, 0x37, 0xe8, 0x94, 0xee, 0x06, 0x60,
+  0x30, 0xcb, 0xb0, 0x22, 0x3c, 0x12, 0x18, 0xcf, 0x06, 0x3e, 0x1b, 0xc4,
+  0x67, 0x38, 0x02, 0xac, 0x7e, 0x36, 0x20, 0xbe, 0x59, 0x06, 0x16, 0x79,
+  0x91, 0xc0, 0xc0, 0x36, 0x08, 0xab, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82,
+  0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x82,
+  0x7c, 0x03, 0x1d, 0x6e, 0x08, 0xc4, 0x37, 0x00, 0x83, 0x59, 0x86, 0x16,
+  0x71, 0x91, 0xc0, 0x06, 0xb4, 0x0d, 0xe0, 0x33, 0x4b, 0x30, 0x23, 0x76,
+  0xb6, 0x01, 0x11, 0x9f, 0x59, 0x82, 0x19, 0x19, 0x8e, 0x58, 0x2b, 0xb4,
+  0x0d, 0x84, 0x6f, 0x96, 0x01, 0x46, 0x66, 0x24, 0x30, 0xb6, 0x4a, 0xdb,
+  0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20,
+  0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x78, 0xdf, 0x40, 0x87, 0x1b, 0x82,
+  0xf6, 0x0d, 0xc0, 0x60, 0x96, 0x21, 0x46, 0x64, 0x24, 0xb0, 0xb8, 0x0d,
+  0x86, 0xf8, 0xcc, 0x12, 0xcc, 0x88, 0x11, 0x74, 0x1b, 0xc0, 0x67, 0x96,
+  0x60, 0x46, 0x06, 0x5a, 0x1e, 0xad, 0x45, 0x30, 0x17, 0x21, 0x62, 0x44,
+  0x90, 0x11, 0x76, 0x0d, 0x5e, 0xe4, 0x82, 0x61, 0x6c, 0x6e, 0x83, 0xbb,
+  0x0d, 0xe2, 0x33, 0x1c, 0x71, 0x57, 0x78, 0x1b, 0x10, 0xdf, 0x2c, 0x03,
+  0x8d, 0xdc, 0x48, 0x60, 0x79, 0x1b, 0xe0, 0x55, 0x7c, 0x2c, 0x18, 0xe8,
+  0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8,
+  0x14, 0xd1, 0xbf, 0x81, 0x0e, 0x37, 0x04, 0xfb, 0x1b, 0x80, 0xc1, 0x2c,
+  0x43, 0x8d, 0xd8, 0x48, 0x60, 0x43, 0xe8, 0x06, 0xf0, 0x99, 0x25, 0xd8,
+  0x11, 0xf3, 0xdb, 0x80, 0x88, 0xcf, 0x2c, 0xc1, 0x8e, 0x0c, 0x47, 0x88,
+  0xd6, 0xdf, 0x06, 0xc2, 0x37, 0xcb, 0x80, 0x23, 0x3b, 0x12, 0xd8, 0x68,
+  0x81, 0x6e, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f,
+  0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xa0, 0x70, 0xa0, 0xc3,
+  0x0d, 0x81, 0x09, 0x07, 0x60, 0x30, 0xcb, 0x90, 0x23, 0x3a, 0x12, 0x18,
+  0xea, 0x06, 0x43, 0x7c, 0x66, 0x09, 0x76, 0xc4, 0x88, 0xd6, 0x0d, 0xe0,
+  0x33, 0x4b, 0xb0, 0x23, 0x03, 0x2d, 0x8f, 0x56, 0x23, 0x98, 0x8d, 0x10,
+  0x39, 0x22, 0xe8, 0x88, 0xce, 0x06, 0x37, 0x72, 0xc1, 0x30, 0x17, 0x3c,
+  0x75, 0xdb, 0x53, 0x97, 0xb7, 0xc1, 0x30, 0xe7, 0xaa, 0xc1, 0x30, 0x47,
+  0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xf0, 0xd9,
+  0x70, 0x60, 0xc2, 0x41, 0xf9, 0x06, 0x33, 0x1c, 0x8c, 0x26, 0x04, 0xc0,
+  0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91,
+  0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x3d, 0x1c, 0xb4, 0x70,
+  0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x3e, 0x1c,
+  0xb8, 0x70, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70,
+  0x3f, 0x1c, 0xbc, 0x70, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20,
+  0x18, 0x2c, 0x67, 0x1c, 0xb8, 0x70, 0xf0, 0xbe, 0x41, 0xa0, 0xc3, 0xc1,
+  0xff, 0x06, 0x3c, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x8f, 0xcd, 0x12,
+  0xf0, 0xc8, 0x40, 0xcb, 0x63, 0x1a, 0x27, 0x02, 0x82, 0x83, 0x89, 0xb0,
+  0x44, 0x8a, 0x08, 0x3b, 0x02, 0x82, 0x83, 0x8a, 0xcc, 0x32, 0xf4, 0xc8,
+  0x8f, 0xfc, 0xd6, 0x70, 0x84, 0xfa, 0x84, 0x70, 0x30, 0x7c, 0xb7, 0x3e,
+  0xc3, 0x0c, 0x37, 0x04, 0xec, 0x1b, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47,
+  0x98, 0x57, 0x09, 0x07, 0xc3, 0x57, 0x81, 0xa0, 0x87, 0x5e, 0xc3, 0x0c,
+  0x37, 0x04, 0xef, 0x1b, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xf8, 0xc8,
+  0x9c, 0x04, 0x37, 0xbb, 0xc1, 0x30, 0x87, 0xae, 0xc1, 0x30, 0x23, 0x06,
+  0x07, 0x00, 0x82, 0x60, 0xf0, 0xb9, 0x71, 0xe0, 0xc3, 0x41, 0xff, 0x06,
+  0x6b, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c,
+  0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20,
+  0x18, 0x70, 0x75, 0x1c, 0x94, 0x71, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02,
+  0x80, 0x20, 0x18, 0x70, 0x76, 0x1c, 0x98, 0x71, 0xc0, 0x10, 0xc1, 0x88,
+  0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x77, 0x1c, 0x9c, 0x71, 0x20, 0x11,
+  0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x7f, 0x1c, 0x98, 0x71,
+  0x70, 0xc2, 0x41, 0x20, 0xc7, 0xc1, 0x0d, 0x07, 0x74, 0x1c, 0x8c, 0x26,
+  0x04, 0xc0, 0x05, 0x8f, 0xcd, 0x12, 0xcc, 0xc9, 0x70, 0xc3, 0x7d, 0xe1,
+  0x71, 0x00, 0x06, 0xb3, 0x0c, 0x60, 0x12, 0x26, 0x41, 0x89, 0x70, 0x90,
+  0xc6, 0x01, 0x5c, 0xf0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50,
+  0xa1, 0x1c, 0xa8, 0x71, 0x40, 0x42, 0x3b, 0x1c, 0x8c, 0x18, 0x1c, 0x00,
+  0x08, 0x82, 0x01, 0x25, 0xca, 0x81, 0x1a, 0x07, 0x81, 0x70, 0xc1, 0x30,
+  0x55, 0xc2, 0x81, 0x1b, 0x07, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00,
+  0x82, 0x60, 0x40, 0x99, 0x72, 0xf0, 0xc6, 0x41, 0x88, 0x81, 0x71, 0x30,
+  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xd4, 0x29, 0x07, 0x6f, 0x1c, 0x04,
+  0xc2, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x1d, 0x4f, 0x9d, 0xfb, 0x06, 0xc3,
+  0xdc, 0xc8, 0x06, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c,
+  0x00, 0x08, 0x82, 0xc1, 0xb7, 0xca, 0xc1, 0x1e, 0x07, 0x3a, 0x1c, 0xa0,
+  0x72, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08,
+  0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
+  0xc0, 0xc9, 0x72, 0x20, 0xca, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00,
+  0x82, 0x60, 0xc0, 0xcd, 0x72, 0x30, 0xca, 0x41, 0x42, 0x04, 0x23, 0x06,
+  0x08, 0x00, 0x82, 0x60, 0xc0, 0xd1, 0x72, 0x40, 0xca, 0x41, 0x42, 0x04,
+  0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xf0, 0x72, 0x30, 0xca, 0x01,
+  0x19, 0x07, 0xc1, 0x2b, 0x07, 0x74, 0x1c, 0xc4, 0x72, 0x30, 0x9a, 0x10,
+  0x00, 0x17, 0x3c, 0x36, 0x4b, 0x30, 0x27, 0xc3, 0x0d, 0x34, 0x46, 0xcb,
+  0x01, 0x18, 0xcc, 0x32, 0x88, 0xc9, 0x9c, 0x04, 0x66, 0xc3, 0x01, 0x0e,
+  0x07, 0xf1, 0x19, 0x8e, 0x00, 0xa3, 0x1c, 0x0e, 0x88, 0x6f, 0x96, 0x61,
+  0x4c, 0xcc, 0x24, 0x30, 0x1d, 0x0e, 0xc2, 0x28, 0x3e, 0x16, 0x0c, 0xf4,
+  0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c,
+  0x8a, 0xf0, 0xe5, 0x40, 0x87, 0x1b, 0x02, 0x5e, 0x0e, 0xc0, 0x60, 0x96,
+  0x81, 0x4c, 0xca, 0x24, 0xb0, 0x41, 0x8c, 0x03, 0xf8, 0xcc, 0x12, 0xa8,
+  0x89, 0x85, 0x71, 0x40, 0xc4, 0x67, 0x96, 0x40, 0x4d, 0x86, 0x23, 0xd6,
+  0x48, 0x8c, 0x03, 0xe1, 0x9b, 0x65, 0x38, 0x13, 0x35, 0x09, 0x8c, 0x8d,
+  0xc6, 0x38, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7,
+  0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xd2, 0x39, 0xd0, 0xe1,
+  0x86, 0xe0, 0x9c, 0x03, 0x30, 0x98, 0x65, 0x40, 0x93, 0x34, 0x09, 0x6c,
+  0x8d, 0x83, 0x21, 0x3e, 0xb3, 0x04, 0x6a, 0x62, 0x84, 0x1b, 0x07, 0xf0,
+  0x99, 0x25, 0x50, 0x93, 0x81, 0x96, 0x47, 0x23, 0x13, 0xac, 0x4c, 0x08,
+  0x34, 0x11, 0xd2, 0x84, 0x1d, 0x03, 0x33, 0xb9, 0x60, 0x18, 0x6b, 0xe3,
+  0x20, 0x8e, 0x83, 0xf8, 0x0c, 0x47, 0xc4, 0x99, 0x1c, 0x07, 0xc4, 0x37,
+  0xcb, 0xb0, 0x26, 0x6e, 0x12, 0xd8, 0x1c, 0x07, 0x72, 0x16, 0x1f, 0x0b,
+  0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a,
+  0x20, 0x3e, 0x45, 0xdc, 0x73, 0xa0, 0xc3, 0x0d, 0x41, 0x3d, 0x07, 0x60,
+  0x30, 0xcb, 0xc0, 0x26, 0x6d, 0x12, 0xd8, 0xb0, 0xc7, 0x01, 0x7c, 0x66,
+  0x09, 0xe4, 0xc4, 0xf0, 0x38, 0x20, 0xe2, 0x33, 0x4b, 0x20, 0x27, 0xc3,
+  0x11, 0x7c, 0x96, 0xc7, 0x81, 0xf0, 0xcd, 0x32, 0xbc, 0x89, 0x9c, 0x04,
+  0xd6, 0x67, 0x7a, 0x1c, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73,
+  0xc1, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x22, 0x1d,
+  0xe8, 0x70, 0x43, 0x00, 0xd2, 0x01, 0x18, 0xcc, 0x32, 0xc0, 0x49, 0x9c,
+  0x04, 0x26, 0xca, 0xc1, 0x10, 0x9f, 0x59, 0x02, 0x39, 0x31, 0xe2, 0x94,
+  0x03, 0xf8, 0xcc, 0x12, 0xc8, 0xc9, 0x40, 0xcb, 0xa3, 0xb1, 0x09, 0xd6,
+  0x26, 0x04, 0x9c, 0x08, 0x71, 0xe2, 0xd2, 0x81, 0x9b, 0x5c, 0x30, 0xcc,
+  0x05, 0x4f, 0xdd, 0xf6, 0xd4, 0xcd, 0x71, 0x30, 0xcc, 0xa1, 0x6e, 0x30,
   0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
-  0x7c, 0x6c, 0x18, 0xf0, 0x60, 0xb0, 0x7f, 0x69, 0x18, 0x8c, 0x26, 0x04,
-  0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14,
-  0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x73, 0x18, 0x8c,
-  0x61, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x74,
-  0x18, 0x90, 0x61, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x70, 0x75, 0x18, 0x94, 0x61, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80,
-  0x20, 0x18, 0x2c, 0x7d, 0x18, 0x90, 0x61, 0x50, 0x82, 0x41, 0x00, 0x87,
-  0x41, 0x0d, 0x06, 0x72, 0x18, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x8f, 0xcd,
-  0x12, 0x80, 0xc6, 0x40, 0xcb, 0x63, 0x1a, 0x6b, 0xc1, 0x86, 0x82, 0x5a,
-  0xb0, 0x44, 0x5b, 0x08, 0x7f, 0xc1, 0x86, 0x82, 0x5b, 0x98, 0x7e, 0xd8,
-  0x60, 0x00, 0x9f, 0x59, 0x86, 0xd0, 0x18, 0x0d, 0xfb, 0x18, 0x8e, 0x08,
-  0x70, 0x30, 0x18, 0xbe, 0x13, 0x86, 0x19, 0x6e, 0x08, 0x46, 0x30, 0x20,
-  0x83, 0x1a, 0x02, 0x1d, 0x8e, 0xe0, 0x0f, 0x1e, 0x0c, 0x86, 0xaf, 0x02,
-  0x41, 0xcf, 0x3f, 0x86, 0x19, 0x6e, 0x08, 0x4c, 0x30, 0x20, 0x83, 0x0a,
-  0x06, 0x9d, 0x65, 0x10, 0x8d, 0xdb, 0x08, 0x4e, 0xfd, 0x86, 0xb9, 0x7f,
-  0x1b, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbe, 0x52, 0x0c, 0xea,
-  0x30, 0xa0, 0xc1, 0x40, 0x14, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10,
-  0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31,
-  0x40, 0x00, 0x10, 0x04, 0x03, 0x8e, 0x15, 0x03, 0x3e, 0x0c, 0x0e, 0x22,
-  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xae, 0x15, 0x83, 0x3e, 0x0c,
-  0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xce, 0x15, 0x03,
-  0x3f, 0x0c, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0xc5,
-  0x16, 0x83, 0x3e, 0x0c, 0x7c, 0x30, 0x08, 0x52, 0x31, 0x70, 0xc3, 0x60,
-  0x15, 0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x82, 0xdb, 0x18,
-  0x6e, 0x70, 0x91, 0x57, 0x0c, 0xc0, 0x60, 0x96, 0x81, 0x34, 0x4a, 0x23,
-  0xa8, 0x1c, 0x0c, 0x40, 0x31, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00,
-  0x10, 0x04, 0x03, 0x0a, 0x17, 0x83, 0x50, 0x0c, 0x36, 0x39, 0x0c, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0xca, 0xc5, 0x20, 0x14, 0x83, 0x40,
-  0xb8, 0x60, 0x98, 0xe2, 0xc1, 0xa0, 0x14, 0x03, 0xb8, 0xe0, 0xa9, 0x11,
-  0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0x7a, 0x31, 0x30, 0xc5, 0xe0, 0x46,
-  0xee, 0x30, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xca, 0x17, 0x03,
-  0x53, 0x0c, 0x02, 0xe1, 0x82, 0x61, 0x2e, 0x78, 0xea, 0x8e, 0xa7, 0xae,
-  0x04, 0x83, 0x61, 0x4e, 0xe7, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19,
-  0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x4f, 0x1c, 0x03, 0x59, 0x0c, 0xe2,
-  0x30, 0xf8, 0xc5, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34,
-  0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00,
-  0x04, 0xc1, 0x80, 0x4b, 0xc7, 0x20, 0x17, 0x83, 0x84, 0x08, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0x80, 0x53, 0xc7, 0x40, 0x17, 0x83, 0x84, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x5b, 0xc7, 0x60, 0x17, 0x83,
-  0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x99, 0xc7, 0x40,
-  0x17, 0x83, 0x3d, 0x0c, 0x02, 0x73, 0x0c, 0x56, 0x31, 0x40, 0xc7, 0x60,
-  0x34, 0x21, 0x00, 0x2e, 0x78, 0x6c, 0x96, 0xe0, 0x36, 0x86, 0x1b, 0xd6,
-  0x64, 0x1d, 0x03, 0x30, 0x98, 0x65, 0x30, 0x8d, 0xdb, 0x08, 0xac, 0x0d,
-  0x83, 0x37, 0x0c, 0xe2, 0x33, 0x1c, 0x71, 0x07, 0x70, 0x18, 0x10, 0xdf,
-  0x2c, 0xc3, 0x69, 0xa8, 0x46, 0x60, 0x71, 0x18, 0xe0, 0x41, 0x7c, 0x2c,
-  0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b,
-  0x82, 0xf8, 0x14, 0x51, 0x8f, 0x81, 0x0e, 0x37, 0x04, 0xf3, 0x18, 0x80,
-  0xc1, 0x2c, 0x03, 0x6a, 0xa4, 0x46, 0x60, 0x43, 0x1e, 0x06, 0xf0, 0x99,
-  0x25, 0x70, 0x0d, 0xc3, 0xc3, 0x80, 0x88, 0xcf, 0x2c, 0x81, 0x6b, 0x0c,
-  0x47, 0x88, 0x42, 0x1e, 0x06, 0xc2, 0x37, 0xcb, 0xb0, 0x1a, 0xae, 0x11,
-  0xd8, 0x28, 0xe8, 0x61, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc,
-  0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x80, 0x64,
-  0xa0, 0xc3, 0x0d, 0x81, 0x3f, 0x06, 0x60, 0x30, 0xcb, 0xc0, 0x1a, 0xad,
-  0x11, 0x98, 0x28, 0x06, 0x43, 0x7c, 0x66, 0x09, 0x5c, 0xc3, 0x88, 0x52,
-  0x0c, 0xe0, 0x33, 0x4b, 0xe0, 0x1a, 0x03, 0x2d, 0x8f, 0x86, 0x1a, 0x58,
-  0x6a, 0x10, 0xac, 0x21, 0xb4, 0x86, 0x4e, 0xa8, 0xc6, 0x05, 0xc3, 0x18,
-  0x29, 0x06, 0xa8, 0x18, 0xc4, 0x67, 0x38, 0xe2, 0x54, 0x52, 0x31, 0x20,
-  0xbe, 0x59, 0x86, 0xd7, 0x90, 0x8d, 0xc0, 0x54, 0x31, 0x40, 0x95, 0xf8,
-  0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e,
-  0x56, 0x04, 0xf1, 0x29, 0xc2, 0x25, 0x03, 0x1d, 0x6e, 0x08, 0x58, 0x32,
-  0x00, 0x83, 0x59, 0x06, 0xd8, 0x88, 0x8d, 0xc0, 0x06, 0x59, 0x0c, 0xe0,
-  0x33, 0x4b, 0x60, 0x1b, 0xf6, 0x8a, 0x01, 0x11, 0x9f, 0x59, 0x02, 0xdb,
-  0x18, 0x8e, 0x90, 0x15, 0x58, 0x0c, 0x84, 0x6f, 0x96, 0x61, 0x36, 0x6c,
-  0x23, 0xb0, 0x59, 0x89, 0xc5, 0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60,
-  0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xc8,
-  0xc9, 0x40, 0x87, 0x1b, 0x82, 0x9b, 0x0c, 0xc0, 0x60, 0x96, 0x81, 0x36,
-  0x6a, 0x23, 0xb0, 0x5c, 0x0c, 0x86, 0xf8, 0xcc, 0x12, 0xd8, 0x86, 0x11,
-  0xbe, 0x18, 0xc0, 0x67, 0x96, 0xc0, 0x36, 0x06, 0x5a, 0x1e, 0x0d, 0x36,
-  0xb0, 0xd8, 0x20, 0x68, 0x43, 0xa8, 0x0d, 0xbd, 0x92, 0x8d, 0x0b, 0x86,
-  0xb9, 0xe0, 0xa9, 0xdb, 0x9e, 0x3a, 0x55, 0x0c, 0x86, 0xb9, 0xdf, 0x1b,
-  0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0xbe, 0xb3, 0x0c, 0x6e, 0x32, 0xb0, 0xc7, 0x80, 0x2c, 0x83, 0xd1, 0x84,
+  0x7c, 0x30, 0x1d, 0x80, 0x74, 0xf0, 0xcb, 0x41, 0x4b, 0x07, 0xa3, 0x09,
+  0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c,
+  0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdc, 0x4d, 0x07,
+  0x27, 0x1d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x1c,
+  0x4e, 0x07, 0x28, 0x1d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
+  0x06, 0x5c, 0x4e, 0x07, 0x29, 0x1d, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00,
+  0x20, 0x08, 0x06, 0x4b, 0x58, 0x07, 0x28, 0x1d, 0xa4, 0x73, 0x10, 0xd0,
+  0x74, 0x90, 0xcf, 0x81, 0x4d, 0x07, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63,
+  0xb3, 0x04, 0x73, 0x32, 0xd0, 0xf2, 0x98, 0x86, 0x8f, 0xd0, 0xe7, 0xd0,
+  0x23, 0x2c, 0x01, 0x26, 0x82, 0x9c, 0xd0, 0xe7, 0x10, 0x26, 0xb3, 0x0c,
+  0x74, 0x62, 0x27, 0xb9, 0x36, 0x1c, 0x91, 0x3e, 0xfb, 0x1c, 0x0c, 0xdf,
+  0xa9, 0xcf, 0x30, 0xc3, 0x0d, 0x81, 0x39, 0x07, 0x64, 0x50, 0x43, 0xa0,
+  0xc3, 0x11, 0xe0, 0xf6, 0xcf, 0xc1, 0xf0, 0x55, 0x20, 0xe8, 0x89, 0xdb,
+  0x30, 0xc3, 0x0d, 0x41, 0x3a, 0x07, 0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c,
+  0x75, 0xa2, 0x2a, 0xc1, 0xb5, 0x72, 0x30, 0xcc, 0x89, 0x6f, 0x30, 0xcc,
+  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x7c, 0x68, 0x1d, 0xe0, 0x74, 0x70,
+  0xcf, 0x41, 0x59, 0x07, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3,
+  0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62, 0x80, 0x00,
+  0x20, 0x08, 0x06, 0xdc, 0x5b, 0x07, 0x3f, 0x1d, 0x1c, 0x44, 0x30, 0x62,
+  0x80, 0x00, 0x20, 0x08, 0x06, 0x1c, 0x5c, 0x07, 0x60, 0x1d, 0x30, 0x44,
+  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5c, 0x5c, 0x07, 0x61, 0x1d,
+  0x48, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x4b, 0x5e, 0x07,
+  0x60, 0x1d, 0x84, 0x74, 0x10, 0xb0, 0x75, 0x10, 0xd3, 0x81, 0x5b, 0x07,
+  0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3, 0x04, 0xaa, 0x32, 0xdc, 0x10,
+  0x6f, 0x72, 0x1d, 0x80, 0xc1, 0x2c, 0xc3, 0x9d, 0xe0, 0x49, 0x50, 0xfc,
+  0x1c, 0x8c, 0x75, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08,
+  0x06, 0xd4, 0x5e, 0x07, 0x64, 0x1d, 0x8c, 0x50, 0x4d, 0x07, 0x23, 0x06,
+  0x07, 0x00, 0x82, 0x60, 0x40, 0xf1, 0x75, 0x40, 0xd6, 0x41, 0x20, 0x5c,
+  0x30, 0x4c, 0xfd, 0x73, 0x80, 0xd6, 0x01, 0x5c, 0xf0, 0xd4, 0x88, 0xc1,
+  0x01, 0x80, 0x20, 0x18, 0x50, 0xa0, 0x1d, 0xa4, 0x75, 0xb0, 0x6f, 0x3a,
+  0x1d, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x15, 0xda, 0x41, 0x5a,
+  0x07, 0x81, 0x70, 0xc1, 0x30, 0x17, 0x3c, 0x75, 0xc7, 0x53, 0x87, 0xce,
+  0xc1, 0x30, 0xd7, 0xbf, 0xc1, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23,
+  0x06, 0x07, 0x00, 0x82, 0x60, 0xf0, 0x95, 0x76, 0x50, 0xd7, 0x01, 0x4d,
+  0x07, 0xa2, 0x1d, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26,
+  0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80,
+  0x20, 0x18, 0x70, 0xac, 0x1d, 0xf0, 0x75, 0x90, 0x10, 0xc1, 0x88, 0x01,
+  0x02, 0x80, 0x20, 0x18, 0x70, 0xad, 0x1d, 0xf4, 0x75, 0x90, 0x10, 0xc1,
+  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0xae, 0x1d, 0xf8, 0x75, 0x90,
+  0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0xb6, 0x1d, 0xf4,
+  0x75, 0xe0, 0xd3, 0x41, 0x90, 0xda, 0x81, 0x5b, 0x07, 0xab, 0x1d, 0x8c,
+  0x26, 0x04, 0xc0, 0x05, 0x8f, 0xcd, 0x12, 0xa8, 0xca, 0x70, 0x83, 0xcb,
+  0xb9, 0x76, 0x00, 0x06, 0xb3, 0x0c, 0x79, 0xa2, 0x2a, 0x81, 0xc1, 0x74,
+  0x20, 0xd3, 0x41, 0x7c, 0x86, 0x23, 0x7e, 0x68, 0xa6, 0x03, 0xe2, 0x9b,
+  0x65, 0xd0, 0x93, 0x3e, 0x09, 0x8c, 0xa6, 0x03, 0x30, 0x8a, 0x8f, 0x05,
+  0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45,
+  0x10, 0x9f, 0x22, 0x70, 0x3b, 0xd0, 0xe1, 0x86, 0xc0, 0xb6, 0x03, 0x30,
+  0x98, 0x65, 0xd8, 0x13, 0x3e, 0x09, 0x6c, 0xe0, 0xe9, 0x00, 0x3e, 0xb3,
+  0x04, 0xa1, 0x62, 0x3b, 0x1d, 0x10, 0xf1, 0x99, 0x25, 0x08, 0x95, 0xe1,
+  0x08, 0x35, 0xe2, 0xe9, 0x40, 0xf8, 0x66, 0x19, 0xfc, 0x24, 0x54, 0x02,
+  0x5b, 0xa3, 0x9e, 0x0e, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9,
+  0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0xf1, 0x0e,
+  0x74, 0xb8, 0x21, 0x08, 0xef, 0x00, 0x0c, 0x66, 0x19, 0xfe, 0x04, 0x54,
+  0x02, 0x2b, 0xeb, 0x60, 0x88, 0xcf, 0x2c, 0x41, 0xa8, 0x18, 0x81, 0xd6,
+  0x01, 0x7c, 0x66, 0x09, 0x42, 0x65, 0xa0, 0xe5, 0xd1, 0xf6, 0x04, 0xe3,
+  0x13, 0xe2, 0x4f, 0x04, 0x50, 0x51, 0xc7, 0xa0, 0x4f, 0x2e, 0x18, 0xc6,
+  0xce, 0x3a, 0x58, 0xeb, 0x20, 0x3e, 0xc3, 0x11, 0x6b, 0xc7, 0xd6, 0x01,
+  0xf1, 0xcd, 0x32, 0x88, 0x4a, 0xa9, 0x04, 0xd6, 0xd6, 0x01, 0xdb, 0xc5,
+  0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2,
+  0xb1, 0x22, 0x88, 0x4f, 0x11, 0xf1, 0x1d, 0xe8, 0x70, 0x43, 0xf0, 0xde,
+  0x01, 0x18, 0xcc, 0x32, 0x8c, 0x0a, 0xa9, 0x04, 0x36, 0xd4, 0x75, 0x00,
+  0x9f, 0x59, 0x82, 0x54, 0x31, 0xb9, 0x0e, 0x88, 0xf8, 0xcc, 0x12, 0xa4,
+  0xca, 0x70, 0x84, 0xdd, 0xcd, 0x75, 0x20, 0x7c, 0xb3, 0x0c, 0xa6, 0x92,
+  0x2a, 0x81, 0xdd, 0x1d, 0x5d, 0x07, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05,
+  0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04,
+  0x7f, 0x07, 0x3a, 0xdc, 0x10, 0xe8, 0x77, 0x00, 0x06, 0xb3, 0x0c, 0xa7,
+  0x82, 0x2a, 0x81, 0xf1, 0x75, 0x30, 0xc4, 0x67, 0x96, 0x20, 0x55, 0x8c,
+  0x08, 0xed, 0x00, 0x3e, 0xb3, 0x04, 0xa9, 0x32, 0xd0, 0xf2, 0x68, 0xa3,
+  0x82, 0x91, 0x0a, 0x71, 0x2a, 0x02, 0xaa, 0x88, 0x7c, 0x50, 0x2a, 0x17,
+  0x0c, 0x73, 0xc1, 0x53, 0xb7, 0x3d, 0x75, 0x6d, 0x1d, 0x0c, 0x73, 0xa2,
+  0x1c, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20,
+  0x08, 0x06, 0x9f, 0x8a, 0x07, 0xfa, 0x1d, 0xe4, 0x76, 0x70, 0xe2, 0xc1,
+  0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26,
+  0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x17,
+  0xe3, 0x41, 0x88, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
+  0x01, 0x27, 0xe3, 0x81, 0x88, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00,
+  0x08, 0x82, 0x01, 0x37, 0xe3, 0xc1, 0x88, 0x07, 0x09, 0x11, 0x8c, 0x18,
+  0x28, 0x00, 0x08, 0x82, 0xc1, 0xb2, 0xe3, 0x81, 0x88, 0x07, 0xe3, 0x1d,
+  0x04, 0x2e, 0x1e, 0xcc, 0x77, 0x00, 0xe3, 0xc1, 0x68, 0x42, 0x00, 0x5c,
+  0xf0, 0xd8, 0x2c, 0x81, 0xaa, 0x0c, 0xb4, 0x3c, 0xa6, 0x51, 0x27, 0xe8,
+  0x3b, 0xd0, 0x09, 0x4b, 0xdc, 0x89, 0x90, 0x2a, 0xe8, 0x3b, 0xe0, 0x89,
+  0xd9, 0x1e, 0x7d, 0x07, 0xf0, 0x99, 0x65, 0x58, 0x95, 0x56, 0xa1, 0xbd,
+  0xe1, 0x08, 0xdc, 0xb3, 0xef, 0x60, 0xf8, 0x2e, 0xf7, 0x86, 0x19, 0x6e,
+  0x08, 0xc2, 0x3b, 0x20, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0x28, 0xf4, 0x3b,
+  0x18, 0xbe, 0x0a, 0x04, 0xbd, 0x63, 0x98, 0xe1, 0x86, 0x80, 0xbc, 0x03,
+  0x32, 0xa8, 0x60, 0xd0, 0x59, 0x06, 0x56, 0x09, 0x97, 0xe0, 0x50, 0x3b,
+  0x18, 0xe6, 0x7a, 0x39, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
+  0xbe, 0x31, 0x0f, 0x66, 0x3c, 0x90, 0xef, 0x00, 0xcc, 0x83, 0xd1, 0x84,
   0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86,
-  0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xce, 0x2d, 0x03,
-  0x9f, 0x0c, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xee,
-  0x2d, 0x83, 0x9f, 0x0c, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0x0e, 0x2e, 0x03, 0xb0, 0x0c, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00,
-  0x10, 0x04, 0x83, 0x05, 0x2f, 0x83, 0x9f, 0x0c, 0x40, 0x32, 0x08, 0xd6,
-  0x32, 0x80, 0xc9, 0xa0, 0x2d, 0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1,
-  0x59, 0x82, 0xdb, 0x18, 0x68, 0x79, 0x4c, 0x43, 0x34, 0xc4, 0x54, 0x08,
-  0x0d, 0x96, 0x20, 0x0d, 0xc1, 0x36, 0xc4, 0x54, 0x28, 0x0d, 0xab, 0x97,
-  0x93, 0x0c, 0xe0, 0x33, 0xcb, 0x80, 0x1b, 0xba, 0x11, 0x2f, 0xc3, 0x11,
-  0xc1, 0x4c, 0x06, 0xc3, 0x77, 0xc2, 0x30, 0xc3, 0x0d, 0x81, 0x3f, 0x06,
-  0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0xf7, 0x72, 0x93, 0xc1, 0xf0, 0x55,
-  0x20, 0xe8, 0xe5, 0xcb, 0x30, 0xc3, 0x0d, 0x41, 0x48, 0x06, 0x64, 0x50,
-  0xc1, 0xa0, 0xb3, 0x0c, 0xb9, 0xe1, 0x1e, 0xc1, 0x95, 0x63, 0x30, 0xcc,
-  0xe9, 0xdf, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xf0, 0x81, 0x66,
-  0x00, 0x97, 0xc1, 0x4b, 0x06, 0x7d, 0x19, 0x8c, 0x26, 0x04, 0xc0, 0x68,
-  0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0xa7, 0x19, 0xdc, 0x65, 0x70,
-  0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0xa8, 0x19, 0xe0,
-  0x65, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0xa9,
-  0x19, 0xe4, 0x65, 0x20, 0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18,
-  0x2c, 0xb1, 0x19, 0xe0, 0x65, 0x90, 0x93, 0x41, 0x40, 0x9a, 0x41, 0x5a,
-  0x06, 0xa6, 0x19, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x8f, 0xcd, 0x12, 0xb8,
-  0xc7, 0x70, 0x43, 0xca, 0xa8, 0x66, 0x00, 0x06, 0xb3, 0x0c, 0xbb, 0xc1,
-  0x1b, 0x41, 0xd1, 0x64, 0xb0, 0x97, 0x01, 0x5c, 0xf0, 0xd4, 0x88, 0xc1,
-  0x01, 0x80, 0x20, 0x18, 0x50, 0xb3, 0x19, 0xf0, 0x65, 0xb0, 0xb5, 0x65,
-  0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x14, 0x6d, 0x06, 0x7c, 0x19,
-  0x04, 0xc2, 0x05, 0xc3, 0xd4, 0x4d, 0x06, 0xa0, 0x19, 0xc0, 0x05, 0x4f,
-  0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x85, 0x9b, 0x41, 0x68, 0x06,
-  0x32, 0x23, 0x97, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xb9,
-  0x19, 0x84, 0x66, 0x10, 0x08, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x77, 0x3c,
-  0x75, 0x20, 0x19, 0x0c, 0x73, 0x35, 0x18, 0x0c, 0x73, 0xc4, 0x30, 0x47,
-  0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x5f, 0x6f, 0x06, 0xad,
-  0x19, 0xb0, 0x65, 0xa0, 0x9b, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08,
-  0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0x01, 0x47, 0x9e, 0x01, 0x6d, 0x06, 0x09, 0x11,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x57, 0x9e, 0x41, 0x6d, 0x06,
-  0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x67, 0x9e, 0x81,
-  0x6d, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0xe2,
-  0x9e, 0x41, 0x6d, 0x06, 0x76, 0x19, 0x04, 0xe1, 0x19, 0x98, 0x66, 0x30,
-  0x9e, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd8, 0x2c, 0x81, 0x7b, 0x0c,
-  0x37, 0x98, 0x8d, 0x79, 0x06, 0x60, 0x30, 0xcb, 0xd0, 0x1b, 0xee, 0x11,
-  0x18, 0x5a, 0x06, 0x6a, 0x19, 0xc4, 0x67, 0x38, 0xe2, 0x0e, 0xd6, 0x32,
-  0x20, 0xbe, 0x59, 0x06, 0xdf, 0x08, 0x8f, 0xc0, 0xd8, 0x32, 0xc0, 0x83,
-  0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43,
-  0x3e, 0x56, 0x04, 0xf1, 0x29, 0x02, 0x3e, 0x03, 0x1d, 0x6e, 0x08, 0xdc,
-  0x33, 0x00, 0x83, 0x59, 0x86, 0xdf, 0x00, 0x8f, 0xc0, 0x06, 0xba, 0x0c,
-  0xe0, 0x33, 0x4b, 0x50, 0x1e, 0x36, 0x97, 0x01, 0x11, 0x9f, 0x59, 0x82,
-  0xf2, 0x18, 0x8e, 0x10, 0x05, 0xba, 0x0c, 0x84, 0x6f, 0x96, 0x41, 0x3c,
-  0xca, 0x23, 0xb0, 0x51, 0xa8, 0xcb, 0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9,
-  0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a,
-  0xd8, 0xcf, 0x40, 0x87, 0x1b, 0x82, 0xfc, 0x0c, 0xc0, 0x60, 0x96, 0x61,
-  0x3c, 0xc8, 0x23, 0xb0, 0xbe, 0x0c, 0x86, 0xf8, 0xcc, 0x12, 0x94, 0x87,
-  0x11, 0xa0, 0x19, 0xc0, 0x67, 0x96, 0xa0, 0x3c, 0x06, 0x5a, 0x1e, 0xed,
-  0x37, 0x30, 0xf0, 0x20, 0xc6, 0x43, 0x20, 0x0f, 0x9d, 0x08, 0x8f, 0x0b,
-  0x86, 0xb1, 0xbf, 0x0c, 0x46, 0x33, 0x88, 0xcf, 0x70, 0x84, 0xe8, 0x90,
-  0x66, 0x40, 0x7c, 0xb3, 0x0c, 0xe6, 0x91, 0x1e, 0x81, 0x95, 0x66, 0x30,
-  0x3a, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05,
-  0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0x8a, 0x06, 0x3a, 0xdc, 0x10,
-  0x9c, 0x68, 0x00, 0x06, 0xb3, 0x0c, 0xe7, 0x81, 0x1e, 0x81, 0x0d, 0xad,
-  0x19, 0xc0, 0x67, 0x96, 0xa0, 0x3d, 0x4c, 0x35, 0x03, 0x22, 0x3e, 0xb3,
-  0x04, 0xed, 0x31, 0x1c, 0xd1, 0x3a, 0xab, 0x19, 0x08, 0xdf, 0x2c, 0x83,
-  0x7a, 0xb4, 0x47, 0x60, 0xae, 0xc3, 0x9a, 0x41, 0x7c, 0x2c, 0x70, 0xe8,
-  0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8,
-  0x14, 0x41, 0xa3, 0x81, 0x0e, 0x37, 0x04, 0x32, 0x1a, 0x80, 0xc1, 0x2c,
-  0xc3, 0x7a, 0xb0, 0x47, 0x60, 0xb4, 0x19, 0x0c, 0xf1, 0x99, 0x25, 0x68,
-  0x0f, 0x23, 0x72, 0x33, 0x80, 0xcf, 0x2c, 0x41, 0x7b, 0x0c, 0xb4, 0x3c,
-  0xda, 0x79, 0x60, 0xe8, 0x41, 0xac, 0x87, 0xc0, 0x1e, 0x70, 0x97, 0x1e,
-  0x17, 0x0c, 0x73, 0xc1, 0x53, 0xb7, 0x3d, 0x75, 0xa5, 0x19, 0x0c, 0x73,
-  0xba, 0x18, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00,
-  0x20, 0x08, 0x06, 0x9f, 0x98, 0x06, 0x32, 0x1a, 0xc4, 0x67, 0xf0, 0xa3,
-  0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c,
-  0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01,
-  0x97, 0xa6, 0x41, 0x8e, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0x01, 0xa7, 0xa6, 0x81, 0x8e, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0x01, 0xb7, 0xa6, 0xc1, 0x8e, 0x06, 0x09, 0x11, 0x8c,
-  0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x32, 0xa7, 0x81, 0x8e, 0x06, 0xfb,
-  0x19, 0x04, 0x66, 0x1a, 0xac, 0x68, 0x80, 0xa6, 0xc1, 0x68, 0x42, 0x00,
-  0x5c, 0xf0, 0xd8, 0x2c, 0x81, 0x7b, 0x0c, 0xb4, 0x3c, 0xa6, 0x91, 0x1b,
-  0x38, 0x2c, 0xe0, 0x06, 0x4b, 0xec, 0x86, 0xd0, 0x1e, 0x38, 0x2c, 0xf0,
-  0xc6, 0x2c, 0xc3, 0x7b, 0xc4, 0xc7, 0xfa, 0x0c, 0x47, 0xbc, 0x4f, 0x8b,
-  0x06, 0xc3, 0x77, 0xf0, 0x33, 0xcc, 0x70, 0x43, 0x80, 0x9f, 0x01, 0x19,
-  0xd4, 0x10, 0xe8, 0x70, 0x84, 0xfc, 0xc4, 0x68, 0x30, 0x7c, 0x15, 0x08,
-  0x7a, 0xf4, 0x33, 0xcc, 0x70, 0x43, 0xb0, 0x9f, 0x01, 0x19, 0x54, 0x30,
-  0xe8, 0x2c, 0x03, 0x7c, 0x94, 0x48, 0x70, 0xbf, 0x19, 0x0c, 0x73, 0xf4,
-  0x18, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x9f, 0x9e, 0x06,
-  0x6a, 0x1a, 0xa4, 0x68, 0x70, 0xa7, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26,
+  0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x4e, 0xcd, 0x03,
+  0x1d, 0x0f, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x6e,
+  0xcd, 0x83, 0x1d, 0x0f, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
+  0x03, 0x8e, 0xcd, 0x03, 0x1e, 0x0f, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00,
+  0x10, 0x04, 0x83, 0x85, 0xce, 0x83, 0x1d, 0x0f, 0xf8, 0x3b, 0x08, 0xce,
+  0x3c, 0x60, 0xf1, 0x20, 0xcd, 0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1,
+  0x59, 0x82, 0x70, 0x19, 0x6e, 0x60, 0xbf, 0x36, 0x0f, 0xc0, 0x60, 0x96,
+  0xc1, 0x55, 0x5e, 0x25, 0xa8, 0xfb, 0x0e, 0x7c, 0x3c, 0x80, 0x0b, 0x9e,
+  0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xca, 0xce, 0x83, 0x1f, 0x0f,
+  0xe4, 0x0f, 0xc6, 0x83, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xee,
+  0x3c, 0xf8, 0xf1, 0x20, 0x10, 0x2e, 0x18, 0xa6, 0xf4, 0x3b, 0x18, 0xf3,
+  0x00, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0x3d,
+  0x0f, 0xc8, 0x3c, 0x00, 0x83, 0x1a, 0x0f, 0x46, 0x0c, 0x0e, 0x00, 0x04,
+  0xc1, 0x80, 0xe2, 0xf3, 0x80, 0xcc, 0x83, 0x40, 0xb8, 0x60, 0x98, 0x0b,
+  0x9e, 0xba, 0xe3, 0xa9, 0x1b, 0xef, 0x60, 0x98, 0xc3, 0xe7, 0x60, 0x98,
+  0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf8,
+  0x40, 0x3d, 0x80, 0xf3, 0xe0, 0xc5, 0x83, 0x3e, 0x0f, 0x46, 0x13, 0x02,
+  0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a,
+  0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0x53, 0x0f, 0xee,
+  0x3c, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x38, 0x54,
+  0x0f, 0xf0, 0x3c, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
+  0xb8, 0x54, 0x0f, 0xf2, 0x3c, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40,
+  0x10, 0x0c, 0x96, 0x58, 0x0f, 0xf0, 0x3c, 0xc8, 0xf1, 0x20, 0x20, 0xf5,
+  0x20, 0xcd, 0x03, 0x53, 0x0f, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xc7, 0x66,
+  0x09, 0xc2, 0x65, 0xb8, 0x21, 0x05, 0x83, 0x54, 0x0f, 0xc0, 0x60, 0x96,
+  0x01, 0x56, 0xc2, 0x25, 0xb0, 0x15, 0x0f, 0x5a, 0x3c, 0x88, 0xcf, 0x70,
+  0x84, 0x0b, 0x06, 0x2e, 0x1e, 0x10, 0xdf, 0x2c, 0x43, 0xac, 0xd0, 0x4a,
+  0x60, 0x2f, 0x1e, 0xbc, 0x60, 0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30,
+  0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xcc,
+  0x7a, 0xa0, 0xc3, 0x0d, 0x41, 0xac, 0x07, 0x60, 0x30, 0xcb, 0x20, 0x2b,
+  0xb3, 0x12, 0xd8, 0x70, 0xe3, 0x01, 0x7c, 0x66, 0x09, 0x70, 0xc5, 0x6c,
+  0x3c, 0x20, 0xe2, 0x33, 0x4b, 0x80, 0x2b, 0xc3, 0x11, 0x39, 0x18, 0xdc,
+  0x78, 0x20, 0x7c, 0xb3, 0x0c, 0xb5, 0x82, 0x2b, 0x81, 0xe9, 0x60, 0x80,
+  0xe3, 0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65,
+  0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xe1, 0xeb, 0x81, 0x0e, 0x37,
+  0x04, 0xbc, 0x1e, 0x80, 0xc1, 0x2c, 0x83, 0xad, 0xdc, 0x4a, 0x60, 0x60,
+  0x1e, 0x0c, 0xf1, 0x99, 0x25, 0xc0, 0x15, 0x23, 0xc6, 0x3c, 0x80, 0xcf,
+  0x2c, 0x01, 0xae, 0x0c, 0xb4, 0x3c, 0x9a, 0xac, 0x60, 0xb3, 0x42, 0xd8,
+  0x8a, 0x70, 0x2b, 0xa8, 0x28, 0xd0, 0xca, 0x05, 0xc3, 0x98, 0x98, 0x07,
+  0x66, 0x1e, 0xc4, 0x67, 0x38, 0x02, 0x16, 0xce, 0x3c, 0x20, 0xbe, 0x59,
+  0x86, 0x5c, 0xe1, 0x95, 0xc0, 0xd0, 0x3c, 0x88, 0x85, 0xf8, 0x58, 0x30,
+  0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04,
+  0xf1, 0x29, 0x82, 0xdd, 0x03, 0x1d, 0x6e, 0x08, 0xd4, 0x3d, 0x00, 0x83,
+  0x59, 0x06, 0x5d, 0xd9, 0x95, 0xc0, 0x06, 0x38, 0x0f, 0xe0, 0x33, 0x4b,
+  0x00, 0x2e, 0xd6, 0xe6, 0x01, 0x11, 0x9f, 0x59, 0x02, 0x70, 0x19, 0x8e,
+  0xd8, 0x05, 0x37, 0x0f, 0x84, 0x6f, 0x96, 0xa1, 0x57, 0xc0, 0x25, 0x30,
+  0x5e, 0x78, 0xf3, 0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b,
+  0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xb8, 0xf7, 0x40,
+  0x87, 0x1b, 0x82, 0x7a, 0x0f, 0xc0, 0x60, 0x96, 0xc1, 0x57, 0x7e, 0x25,
+  0xb0, 0x3b, 0x0f, 0x86, 0xf8, 0xcc, 0x12, 0x80, 0x8b, 0x11, 0x7c, 0x1e,
+  0xc0, 0x67, 0x96, 0x00, 0x5c, 0x06, 0x5a, 0x1e, 0x4d, 0x57, 0xb0, 0x5d,
+  0x21, 0x7c, 0x45, 0xf8, 0x15, 0xd6, 0xe0, 0x95, 0x0b, 0x86, 0xb9, 0xe0,
+  0xa9, 0xdb, 0x9e, 0x3a, 0x34, 0x0f, 0x86, 0xb9, 0xbe, 0x0e, 0x86, 0x39,
+  0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0xaf,
+  0xe4, 0x83, 0x7a, 0x0f, 0x68, 0x3d, 0x10, 0xf9, 0x60, 0x34, 0x21, 0x00,
+  0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88,
+  0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x63, 0xf9, 0x80, 0xdf,
+  0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x6b, 0xf9,
+  0xa0, 0xdf, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80,
+  0x73, 0xf9, 0xc0, 0xdf, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04,
+  0xc1, 0x60, 0xb1, 0xf9, 0xa0, 0xdf, 0x03, 0x5f, 0x0f, 0x82, 0x94, 0x0f,
+  0xdc, 0x3d, 0x58, 0xf9, 0x60, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6c, 0x96,
+  0x20, 0x5c, 0x06, 0x5a, 0x1e, 0xd3, 0x60, 0x15, 0xdf, 0x1e, 0x56, 0x85,
+  0x25, 0x5c, 0x45, 0x00, 0x17, 0xdf, 0x1e, 0x5e, 0x65, 0x96, 0x41, 0x5c,
+  0xc8, 0xc5, 0x15, 0x83, 0xe1, 0x88, 0x59, 0x0c, 0xe0, 0x3d, 0x18, 0xbe,
+  0xa3, 0xc5, 0x60, 0x98, 0xe1, 0x86, 0x60, 0xd7, 0x03, 0x32, 0xa8, 0x21,
+  0xd0, 0xe1, 0x88, 0x7f, 0xa0, 0xf7, 0x60, 0xf8, 0x2a, 0x10, 0xf4, 0x42,
+  0x62, 0x98, 0xe1, 0x86, 0xc0, 0xd7, 0x03, 0x32, 0xa8, 0x60, 0xd0, 0x59,
+  0x86, 0x71, 0xc1, 0x97, 0xe0, 0x44, 0x3d, 0x18, 0xe6, 0x6e, 0x3b, 0x18,
+  0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbe, 0x9e, 0x0f, 0x5a, 0x3e,
+  0x60, 0xf7, 0x40, 0xe7, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82,
+  0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40,
+  0x00, 0x10, 0x04, 0x03, 0x8e, 0xec, 0x03, 0x9a, 0x0f, 0x0e, 0x22, 0x18,
+  0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xae, 0xec, 0x83, 0x9a, 0x0f, 0x18,
+  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xce, 0xec, 0x03, 0x9b,
+  0x0f, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0xc5, 0xed,
+  0x83, 0x9a, 0x0f, 0xec, 0x3d, 0x08, 0xc2, 0x3e, 0x30, 0xf9, 0x60, 0xec,
+  0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x02, 0x7c, 0x19, 0x6e,
+  0x30, 0xc7, 0xe0, 0xec, 0x03, 0x30, 0x98, 0x65, 0x28, 0x17, 0x73, 0x09,
+  0x2a, 0xde, 0x03, 0x9c, 0x0f, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00,
+  0x04, 0xc1, 0x80, 0x82, 0xfb, 0x20, 0xe7, 0x83, 0x76, 0x0c, 0x54, 0x3e,
+  0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x2a, 0xee, 0x83, 0x9c, 0x0f,
+  0x02, 0xe1, 0x82, 0x61, 0x8a, 0xde, 0x83, 0x9e, 0x0f, 0xe0, 0x82, 0xa7,
+  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0xaa, 0xfb, 0xc0, 0xe7, 0x03,
+  0x9d, 0x78, 0xf9, 0x60, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0xbb,
+  0x0f, 0x7c, 0x3e, 0x08, 0x84, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0x3b, 0x9e,
+  0xba, 0x5e, 0x0f, 0x86, 0x39, 0xf9, 0x0e, 0x86, 0x39, 0x62, 0x98, 0x23,
+  0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x4f, 0xef, 0x03, 0xb5,
+  0x0f, 0x52, 0x3e, 0xb8, 0xfb, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84,
+  0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c,
+  0x10, 0x00, 0x04, 0xc1, 0x80, 0x0b, 0xfd, 0x20, 0xee, 0x83, 0x84, 0x08,
+  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x13, 0xfd, 0x40, 0xee, 0x83,
+  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x1b, 0xfd, 0x60,
+  0xee, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x59,
+  0xfd, 0x40, 0xee, 0x83, 0x99, 0x0f, 0x02, 0xbf, 0x0f, 0xc6, 0x3e, 0x00,
+  0xfd, 0x60, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6c, 0x96, 0x00, 0x5f, 0x86,
+  0x1b, 0x46, 0x32, 0x18, 0xfd, 0x00, 0x0c, 0x66, 0x19, 0xce, 0x05, 0x5f,
+  0x02, 0x2b, 0xf9, 0xe0, 0xe4, 0x83, 0xf8, 0x0c, 0x47, 0xa4, 0x64, 0x80,
+  0xf2, 0x01, 0xf1, 0xcd, 0x32, 0xa0, 0xcb, 0xba, 0x04, 0x96, 0xf2, 0x81,
+  0x4a, 0x06, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94,
+  0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0xeb, 0x07, 0x3a, 0xdc,
+  0x10, 0xac, 0x7e, 0x00, 0x06, 0xb3, 0x0c, 0xe9, 0xa2, 0x2e, 0x81, 0x0d,
+  0x31, 0x1f, 0xc0, 0x67, 0x96, 0xe0, 0x5d, 0x0c, 0xe6, 0x03, 0x22, 0x3e,
+  0xb3, 0x04, 0xef, 0x32, 0x1c, 0x41, 0x93, 0x41, 0xcc, 0x07, 0xc2, 0x37,
+  0xcb, 0xc0, 0x2e, 0xef, 0x12, 0x58, 0x4d, 0x06, 0x32, 0x1f, 0xc4, 0xc7,
+  0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x44, 0xf2, 0xb1,
+  0x22, 0x88, 0x4f, 0x11, 0xb8, 0x1f, 0xe8, 0x70, 0x43, 0x60, 0xfb, 0x01,
+  0x18, 0xcc, 0x32, 0xb4, 0x8b, 0xbb, 0x04, 0xa6, 0xf3, 0xc1, 0x10, 0x9f,
+  0x59, 0x82, 0x77, 0x31, 0xa2, 0xe7, 0x03, 0xf8, 0xcc, 0x12, 0xbc, 0xcb,
+  0x40, 0xcb, 0xa3, 0xa5, 0x0b, 0xa6, 0x2e, 0x44, 0xbb, 0x08, 0xee, 0xe2,
+  0xa7, 0xc2, 0xba, 0x5c, 0x30, 0x8c, 0xf1, 0x7c, 0x00, 0xf6, 0x41, 0x7c,
+  0x86, 0x23, 0x54, 0x23, 0xec, 0x03, 0xe2, 0x9b, 0x65, 0x80, 0x97, 0x79,
+  0x09, 0x4c, 0xec, 0x83, 0xd5, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18,
+  0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xcc,
+  0x3f, 0xd0, 0xe1, 0x86, 0x80, 0xfc, 0x03, 0x30, 0x98, 0x65, 0x88, 0x17,
+  0x79, 0x09, 0x6c, 0x50, 0xfb, 0x00, 0x3e, 0xb3, 0x04, 0xf7, 0x62, 0x67,
+  0x1f, 0x10, 0xf1, 0x99, 0x25, 0xb8, 0x97, 0xe1, 0x88, 0xda, 0x40, 0xfb,
+  0x40, 0xf8, 0x66, 0x19, 0xe8, 0xe5, 0x5e, 0x02, 0xb3, 0x8d, 0xb4, 0x0f,
+  0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22,
+  0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0xf8, 0x0f, 0x74, 0xb8, 0x21, 0x78,
+  0xff, 0x00, 0x0c, 0x66, 0x19, 0xea, 0xc5, 0x5e, 0x02, 0x8b, 0xfb, 0x60,
+  0x88, 0xcf, 0x2c, 0xc1, 0xbd, 0x18, 0x61, 0xf7, 0x01, 0x7c, 0x66, 0x09,
+  0xee, 0x65, 0xa0, 0xe5, 0xd1, 0xe2, 0x05, 0x93, 0x17, 0xa2, 0x5e, 0x04,
+  0x7b, 0x01, 0x9d, 0x79, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xba, 0xed, 0xa9,
+  0x13, 0xfb, 0x60, 0x98, 0xbb, 0xf3, 0x60, 0x98, 0x23, 0x86, 0x39, 0x62,
+  0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf8, 0xfe, 0x3f, 0x78, 0xff,
+  0xc0, 0xf5, 0x03, 0xfe, 0x0f, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
+  0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00,
+  0x01, 0x40, 0x10, 0x0c, 0x38, 0x13, 0x14, 0xec, 0x3f, 0x48, 0x88, 0x60,
+  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0x13, 0x14, 0xee, 0x3f, 0x48,
+  0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x38, 0x14, 0x14, 0xf0,
+  0x3f, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x16, 0x18,
+  0x14, 0xee, 0x3f, 0xc0, 0xfd, 0x20, 0x18, 0x41, 0x01, 0xfd, 0x83, 0x12,
+  0x14, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xc7, 0x66, 0x09, 0xf0, 0x65, 0xa0,
+  0xe5, 0x31, 0x8d, 0x71, 0x91, 0xfd, 0x41, 0x5c, 0x58, 0xa2, 0x5c, 0x84,
+  0x7b, 0x91, 0xfd, 0xc1, 0x5c, 0x66, 0x19, 0xf2, 0x65, 0x5f, 0x50, 0x33,
+  0x18, 0x8e, 0x98, 0x3d, 0xf5, 0x0f, 0x86, 0xef, 0x68, 0x6f, 0x98, 0xe1,
+  0x86, 0xa0, 0xf6, 0x03, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0xfc, 0x70,
+  0xff, 0x60, 0xf8, 0x2a, 0x10, 0xf4, 0xf6, 0x63, 0x98, 0xe1, 0x86, 0x00,
+  0xf7, 0x03, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x06, 0x7d, 0x79, 0x99, 0xe0,
+  0xf8, 0x3e, 0x18, 0xe6, 0x62, 0x3d, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40,
+  0x10, 0x0c, 0xbe, 0x1b, 0x14, 0x4e, 0x50, 0x30, 0xff, 0x80, 0x06, 0x85,
+  0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d,
+  0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xce,
+  0x07, 0x05, 0x17, 0x14, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
+  0x03, 0xee, 0x07, 0x85, 0x17, 0x14, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00,
+  0x10, 0x04, 0x03, 0x0e, 0x0c, 0x05, 0x18, 0x14, 0x24, 0x22, 0x18, 0x31,
+  0x50, 0x00, 0x10, 0x04, 0x83, 0x05, 0x0d, 0x85, 0x17, 0x14, 0xe0, 0x3f,
+  0x08, 0x76, 0x50, 0x00, 0x41, 0xa1, 0x07, 0x85, 0xd1, 0x84, 0x00, 0xb8,
+  0xe0, 0xb1, 0x59, 0x82, 0x97, 0x19, 0x6e, 0x00, 0xcf, 0x20, 0x0c, 0x05,
+  0x30, 0x98, 0x65, 0xe0, 0x97, 0x7e, 0x09, 0x6a, 0xfd, 0x03, 0x19, 0x14,
+  0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x52, 0x43,
+  0x61, 0x06, 0x85, 0xf6, 0x23, 0x41, 0x61, 0xc4, 0xe0, 0x00, 0x40, 0x10,
+  0x0c, 0xa8, 0x35, 0x14, 0x66, 0x50, 0x08, 0x84, 0x0b, 0x86, 0x29, 0xf7,
+  0x0f, 0x6e, 0x50, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04,
+  0x03, 0xea, 0x0d, 0x05, 0x1c, 0x14, 0x68, 0x24, 0x05, 0x85, 0x11, 0x83,
+  0x03, 0x00, 0x41, 0x30, 0xa0, 0xe0, 0x50, 0xc0, 0x41, 0x21, 0x10, 0x2e,
+  0x18, 0xe6, 0x82, 0xa7, 0xee, 0x78, 0xea, 0x6e, 0x3f, 0x18, 0xe6, 0xd8,
+  0x3d, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40,
+  0x10, 0x0c, 0x3e, 0x3a, 0x14, 0xc8, 0x50, 0x18, 0x41, 0x21, 0x0e, 0x85,
+  0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d,
+  0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x6e,
+  0x0f, 0x85, 0x35, 0x14, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
+  0x03, 0x8e, 0x0f, 0x05, 0x36, 0x14, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00,
+  0x10, 0x04, 0x03, 0xae, 0x0f, 0x85, 0x36, 0x14, 0x12, 0x22, 0x18, 0x31,
+  0x50, 0x00, 0x10, 0x04, 0x83, 0xa5, 0x14, 0x05, 0x36, 0x14, 0x5a, 0x50,
+  0x08, 0xf0, 0x50, 0xe8, 0x41, 0x41, 0x0f, 0x85, 0xd1, 0x84, 0x00, 0xb8,
+  0xe0, 0xb1, 0x59, 0x82, 0x97, 0x19, 0x6e, 0xe8, 0xcf, 0xa0, 0x0f, 0x05,
+  0x30, 0x98, 0x65, 0xf0, 0x97, 0x97, 0x09, 0xec, 0xff, 0x83, 0x10, 0x14,
+  0xe2, 0x33, 0x1c, 0x91, 0x82, 0x81, 0x08, 0x0a, 0xc4, 0x37, 0xcb, 0xf0,
+  0x2f, 0x22, 0x13, 0xd8, 0x08, 0x0a, 0x2a, 0x18, 0xc4, 0xc7, 0x82, 0x81,
+  0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88,
+  0x4f, 0x11, 0xa7, 0x28, 0xe8, 0x70, 0x43, 0x50, 0x8a, 0x02, 0x18, 0xcc,
+  0x32, 0x80, 0x4c, 0xc8, 0x04, 0x36, 0xac, 0xa0, 0x00, 0x9f, 0x59, 0x02,
+  0x93, 0x31, 0x15, 0x14, 0x88, 0xf8, 0xcc, 0x12, 0x98, 0xcc, 0x70, 0x04,
+  0x0d, 0x06, 0x2b, 0x28, 0x08, 0xdf, 0x2c, 0xc3, 0xc8, 0x98, 0x4c, 0x60,
+  0x35, 0x18, 0xb0, 0xa0, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc,
+  0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xc8, 0xa2,
+  0xa0, 0xc3, 0x0d, 0x01, 0x2c, 0x0a, 0x60, 0x30, 0xcb, 0x40, 0x32, 0x25,
+  0x13, 0x18, 0x0d, 0x0a, 0x43, 0x7c, 0x66, 0x09, 0x4c, 0xc6, 0x88, 0x1b,
+  0x14, 0xe0, 0x33, 0x4b, 0x60, 0x32, 0x03, 0x2d, 0x8f, 0x06, 0x32, 0x58,
+  0xc8, 0x10, 0x24, 0x23, 0x94, 0x8c, 0x1f, 0x0a, 0x22, 0x73, 0xc1, 0x30,
+  0x66, 0x83, 0x82, 0x0e, 0x0a, 0xf1, 0x19, 0x8e, 0x20, 0x95, 0x1d, 0x14,
+  0x88, 0x6f, 0x96, 0xe1, 0x64, 0x54, 0x26, 0x30, 0x1e, 0x14, 0x4a, 0x25,
+  0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xc0, 0x90,
+  0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x00, 0x47, 0x41, 0x87, 0x1b, 0x02, 0x5f,
+  0x14, 0xc0, 0x60, 0x96, 0x01, 0x65, 0x52, 0x26, 0xb0, 0x81, 0x0c, 0x05,
+  0xf8, 0xcc, 0x12, 0xb8, 0x8c, 0x85, 0xa1, 0x40, 0xc4, 0x67, 0x96, 0xc0,
+  0x65, 0x86, 0x23, 0x5e, 0x45, 0x0c, 0x05, 0xe1, 0x9b, 0x65, 0x58, 0x19,
+  0x97, 0x09, 0x0c, 0x56, 0xc6, 0x50, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e,
+  0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22,
+  0xd6, 0x51, 0xd0, 0xe1, 0x86, 0x20, 0x1d, 0x05, 0x30, 0x98, 0x65, 0x60,
+  0x99, 0x96, 0x09, 0x6c, 0x0d, 0x85, 0x21, 0x3e, 0xb3, 0x04, 0x2e, 0x63,
+  0x04, 0x1c, 0x0a, 0xf0, 0x99, 0x25, 0x70, 0x99, 0x81, 0x96, 0x47, 0x43,
+  0x19, 0x2c, 0x65, 0x08, 0x96, 0x11, 0x5a, 0x86, 0xae, 0x54, 0xe6, 0x82,
+  0x61, 0x2e, 0x78, 0xea, 0xb6, 0xa7, 0x8e, 0x07, 0x85, 0x61, 0x2e, 0xee,
+  0x83, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04,
+  0xc1, 0xe0, 0xcb, 0x47, 0x21, 0x1d, 0x05, 0x54, 0x14, 0xec, 0x51, 0x18,
+  0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04,
+  0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0x40,
+  0x52, 0x80, 0x47, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
+  0xe0, 0x42, 0x52, 0x88, 0x47, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00,
+  0x41, 0x30, 0xe0, 0x44, 0x52, 0x90, 0x47, 0x21, 0x21, 0x82, 0x11, 0x03,
+  0x05, 0x00, 0x41, 0x30, 0x58, 0x54, 0x52, 0x88, 0x47, 0x41, 0x16, 0x85,
+  0xa0, 0x1f, 0x05, 0x71, 0x14, 0xfe, 0x51, 0x18, 0x4d, 0x08, 0x80, 0x0b,
+  0x1e, 0x9b, 0x25, 0x78, 0x99, 0x81, 0x96, 0xc7, 0x34, 0xf4, 0xc5, 0x34,
+  0x89, 0x7c, 0x61, 0x09, 0x7e, 0x11, 0x5c, 0xc6, 0x34, 0x89, 0x7e, 0x99,
+  0x65, 0x80, 0x19, 0x99, 0x11, 0xd5, 0x60, 0x38, 0x42, 0xf6, 0xc8, 0x51,
+  0x18, 0xbe, 0x9b, 0xbd, 0x61, 0x86, 0x1b, 0x82, 0x57, 0x14, 0xc8, 0xa0,
+  0x86, 0x40, 0x87, 0x23, 0xe6, 0x05, 0x1d, 0x85, 0xe1, 0xab, 0x40, 0xd0,
+  0xab, 0x97, 0x61, 0x86, 0x1b, 0x02, 0x59, 0x14, 0xc8, 0xa0, 0x82, 0x41,
+  0x67, 0x19, 0x62, 0xc6, 0x6c, 0x82, 0xb3, 0x43, 0x61, 0x98, 0x5b, 0xfd,
+  0x60, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf8, 0x62, 0x52, 0x08,
+  0x49, 0x01, 0x1c, 0x05, 0x97, 0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41,
+  0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x38, 0x64, 0xc4,
+  0x00, 0x01, 0x40, 0x10, 0x0c, 0x38, 0x9c, 0x14, 0x50, 0x52, 0x38, 0x88,
+  0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0x9c, 0x14, 0x52, 0x52,
+  0x60, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x38, 0x9d, 0x14,
+  0x54, 0x52, 0x90, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x16,
+  0xb1, 0x14, 0x52, 0x52, 0x50, 0x47, 0x21, 0xa8, 0x49, 0x41, 0x1f, 0x85,
+  0x9b, 0x14, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xc7, 0x66, 0x09, 0xcc, 0x66,
+  0xb8, 0x41, 0x57, 0x83, 0x9d, 0x14, 0xc0, 0x60, 0x96, 0x61, 0x66, 0x68,
+  0x26, 0xa8, 0x72, 0x14, 0x58, 0x52, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38,
+  0x00, 0x10, 0x04, 0x03, 0x8a, 0x2c, 0x85, 0x96, 0x14, 0xd8, 0xcf, 0x1f,
+  0x85, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xca, 0x52, 0x68, 0x49,
+  0x21, 0x10, 0x2e, 0x18, 0xa6, 0xd0, 0x51, 0x88, 0x49, 0x01, 0x2e, 0x78,
+  0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0xb4, 0x14, 0x64, 0x52,
+  0x70, 0x99, 0x91, 0x14, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x52,
+  0x4b, 0x41, 0x26, 0x85, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x9e, 0xba, 0xe3,
+  0xa9, 0x8b, 0x45, 0x61, 0x98, 0x33, 0xff, 0x60, 0x98, 0x23, 0x86, 0x39,
+  0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf8, 0xdc, 0x52, 0xf0,
+  0x49, 0xa1, 0x1f, 0x85, 0xb5, 0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41,
+  0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4,
+  0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0xba, 0x14, 0xca, 0x52, 0x48, 0x88,
+  0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x38, 0xbb, 0x14, 0xcc, 0x52,
+  0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0xbb, 0x14,
+  0xce, 0x52, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x96,
+  0xbf, 0x14, 0xcc, 0x52, 0x38, 0x49, 0x21, 0x90, 0x4b, 0xe1, 0x26, 0x05,
+  0xba, 0x14, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xc7, 0x66, 0x09, 0xcc, 0x66,
+  0xb8, 0xe1, 0x5e, 0x83, 0xbb, 0x14, 0xc0, 0x60, 0x96, 0xa1, 0x66, 0xcc,
+  0x26, 0xb0, 0x7c, 0x14, 0xf6, 0x51, 0x88, 0xcf, 0x70, 0x04, 0x0a, 0x06,
+  0xfc, 0x28, 0x10, 0xdf, 0x2c, 0x83, 0xcd, 0xe4, 0x4c, 0x60, 0xfd, 0x28,
+  0xa4, 0x60, 0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f,
+  0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x84, 0xa6, 0xa0, 0xc3,
+  0x0d, 0xc1, 0x5f, 0x0a, 0x60, 0x30, 0xcb, 0x70, 0x33, 0x38, 0x13, 0xd8,
+  0x50, 0x92, 0x02, 0x7c, 0x66, 0x09, 0x7a, 0xc6, 0x48, 0x52, 0x20, 0xe2,
+  0x33, 0x4b, 0xd0, 0x33, 0xc3, 0x11, 0x33, 0x18, 0x94, 0xa4, 0x20, 0x7c,
+  0xb3, 0x0c, 0x3a, 0xd3, 0x33, 0x81, 0xd1, 0x60, 0x60, 0x92, 0x42, 0x7c,
+  0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f,
+  0x2b, 0x82, 0xf8, 0x14, 0xc1, 0x9a, 0x82, 0x0e, 0x37, 0x04, 0xaa, 0x29,
+  0x80, 0xc1, 0x2c, 0xc3, 0xce, 0xf0, 0x4c, 0x60, 0x2e, 0x29, 0x0c, 0xf1,
+  0x99, 0x25, 0xe8, 0x19, 0x23, 0x62, 0x52, 0x80, 0xcf, 0x2c, 0x41, 0xcf,
+  0x0c, 0xb4, 0x3c, 0xda, 0xcd, 0x60, 0x38, 0x43, 0xec, 0x8c, 0xc0, 0x33,
+  0x7c, 0x28, 0xe4, 0xcc, 0x05, 0xc3, 0x18, 0x4c, 0x0a, 0x34, 0x29, 0xc4,
+  0x67, 0x38, 0xc2, 0x6f, 0x6a, 0x52, 0x20, 0xbe, 0x59, 0x06, 0x9f, 0x09,
+  0x9b, 0xc0, 0x6c, 0x52, 0xf8, 0x9b, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82,
+  0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x42,
+  0x37, 0x05, 0x1d, 0x6e, 0x08, 0x70, 0x53, 0x00, 0x83, 0x59, 0x86, 0x9f,
+  0x01, 0x9b, 0xc0, 0x06, 0x9f, 0x14, 0xe0, 0x33, 0x4b, 0x50, 0x36, 0xb6,
+  0x93, 0x02, 0x11, 0x9f, 0x59, 0x82, 0xb2, 0x19, 0x8e, 0x48, 0x1d, 0x9e,
+  0x14, 0x84, 0x6f, 0x96, 0x41, 0x6c, 0xca, 0x26, 0x30, 0xd5, 0xe9, 0x49,
+  0x21, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20,
+  0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x28, 0x4f, 0x41, 0x87, 0x1b, 0x82,
+  0xf1, 0x14, 0xc0, 0x60, 0x96, 0x61, 0x6c, 0xc8, 0x26, 0xb0, 0xb2, 0x14,
+  0x86, 0xf8, 0xcc, 0x12, 0x94, 0x8d, 0x11, 0x6a, 0x29, 0xc0, 0x67, 0x96,
+  0xa0, 0x6c, 0x06, 0x5a, 0x1e, 0xed, 0x67, 0x30, 0xb0, 0x21, 0xc6, 0x46,
+  0x20, 0x1b, 0xb4, 0x0b, 0x9b, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e,
+  0x3a, 0x9b, 0x14, 0x86, 0xb9, 0x35, 0x14, 0x86, 0x39, 0x62, 0x98, 0x23,
+  0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x6f, 0x3e, 0x85, 0xf1,
+  0x14, 0x44, 0x53, 0x80, 0x4f, 0x61, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84,
+  0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c,
+  0x10, 0x00, 0x04, 0xc1, 0x80, 0xd3, 0x4f, 0x41, 0x3d, 0x85, 0x84, 0x08,
+  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0xdb, 0x4f, 0x61, 0x3d, 0x85,
+  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0xe3, 0x4f, 0x81,
+  0x3d, 0x85, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x21,
+  0x51, 0x61, 0x3d, 0x05, 0xd6, 0x14, 0x82, 0xfb, 0x14, 0x78, 0x53, 0xc8,
+  0x4f, 0x61, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6c, 0x96, 0xc0, 0x6c, 0x06,
+  0x5a, 0x1e, 0xd3, 0x88, 0x19, 0xbd, 0x25, 0x60, 0x86, 0x25, 0x66, 0x46,
+  0x28, 0x1b, 0xbd, 0x25, 0x68, 0xc6, 0xfe, 0x36, 0x98, 0x4d, 0x01, 0x3e,
+  0xb3, 0x0c, 0x67, 0x93, 0x36, 0x7d, 0x1b, 0x0c, 0x47, 0x84, 0x6e, 0xf0,
+  0x9b, 0xc2, 0xf0, 0x9d, 0xe8, 0x06, 0xc3, 0x0c, 0x37, 0x04, 0xaa, 0x29,
+  0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0x14, 0xe3, 0x29, 0x0c, 0x5f, 0x05,
+  0x82, 0xde, 0x31, 0xcc, 0x70, 0x43, 0xd0, 0x9a, 0x02, 0x19, 0x54, 0x30,
+  0xe8, 0x2c, 0x03, 0xda, 0xf4, 0x4d, 0x70, 0x71, 0x29, 0x0c, 0x73, 0xa6,
+  0x28, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x1f, 0x8b, 0x0a,
+  0xfc, 0x29, 0xec, 0xa6, 0x90, 0xa2, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26,
   0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x17, 0xaa, 0x41, 0x9c, 0x06, 0x07,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x27, 0xaa, 0x81, 0x9c,
-  0x06, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x37, 0xaa,
-  0xc1, 0x9c, 0x06, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1,
-  0xb2, 0xaa, 0x81, 0x9c, 0x06, 0x33, 0x1a, 0x04, 0x7e, 0x1a, 0x8c, 0x69,
-  0x00, 0xaa, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd8, 0x2c, 0x41, 0x89,
-  0x0c, 0x37, 0x8c, 0x10, 0xa9, 0x06, 0x60, 0x30, 0xcb, 0x20, 0x1f, 0xf3,
-  0x11, 0x94, 0x8b, 0x06, 0x75, 0x1a, 0xc0, 0x05, 0x4f, 0x8d, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0x01, 0xd5, 0xaa, 0x81, 0x9d, 0x06, 0x29, 0x74, 0xa6,
-  0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xae, 0x1a, 0xd8, 0x69,
-  0x10, 0x08, 0x17, 0x0c, 0x53, 0x31, 0x1a, 0xe8, 0x69, 0x00, 0x17, 0x3c,
-  0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x94, 0xac, 0x06, 0x7b, 0x1a,
-  0xb4, 0x10, 0x9b, 0x06, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xcd,
-  0x6a, 0xb0, 0xa7, 0x41, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf1,
-  0xd4, 0xe9, 0x67, 0x30, 0xcc, 0xbd, 0x64, 0x30, 0xcc, 0x11, 0xc3, 0x1c,
-  0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x7c, 0xb7, 0x1a, 0x9c,
-  0x6a, 0x60, 0xa6, 0x01, 0xad, 0x06, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20,
-  0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x9c, 0xaf, 0x06, 0xae, 0x1a, 0x24, 0x44,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdc, 0xaf, 0x06, 0xaf, 0x1a,
-  0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x1c, 0xb8, 0x06,
-  0xb0, 0x1a, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x0b,
-  0xba, 0x06, 0xaf, 0x1a, 0xc0, 0x69, 0x10, 0xec, 0x6a, 0x00, 0xaa, 0x41,
-  0xaf, 0x06, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3, 0x04, 0x25, 0x32,
-  0xdc, 0x00, 0x46, 0xe0, 0x1a, 0x80, 0xc1, 0x2c, 0x03, 0x7d, 0x94, 0x48,
-  0x60, 0x62, 0x1a, 0x90, 0x69, 0x10, 0x9f, 0xe1, 0x88, 0x32, 0x2a, 0xd3,
-  0x80, 0xf8, 0x66, 0x19, 0xea, 0x03, 0x3f, 0x02, 0x33, 0xd3, 0xc0, 0x8c,
-  0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x0c,
-  0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x75, 0x0d, 0x74, 0xb8, 0x21, 0x40,
-  0xd7, 0x00, 0x0c, 0x66, 0x19, 0xec, 0xe3, 0x3e, 0x02, 0x1b, 0xdc, 0x34,
-  0x80, 0xcf, 0x2c, 0x01, 0x7f, 0x58, 0x9b, 0x06, 0x44, 0x7c, 0x66, 0x09,
-  0xf8, 0x63, 0x38, 0x02, 0x8e, 0xdc, 0x34, 0x10, 0xbe, 0x59, 0x86, 0xfc,
-  0xe0, 0x8f, 0xc0, 0xe2, 0xe8, 0x4d, 0x83, 0xf8, 0x58, 0xe0, 0xd0, 0xe7,
-  0x82, 0x61, 0x2e, 0x78, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29,
-  0xa2, 0x5e, 0x03, 0x1d, 0x6e, 0x08, 0xe6, 0x35, 0x00, 0x83, 0x59, 0x06,
-  0xfd, 0xd8, 0x8f, 0xc0, 0xee, 0x34, 0x18, 0xe2, 0x33, 0x4b, 0xc0, 0x1f,
-  0x46, 0xe8, 0x69, 0x00, 0x9f, 0x59, 0x02, 0xfe, 0x18, 0x68, 0x79, 0x34,
-  0xfb, 0xc0, 0xee, 0x83, 0xd0, 0x0f, 0x61, 0x3f, 0xec, 0x31, 0xc0, 0x8f,
-  0x0b, 0x86, 0xb1, 0x3c, 0x0d, 0xfa, 0x34, 0x88, 0xcf, 0x70, 0x44, 0x1f,
-  0xf9, 0x69, 0x40, 0x7c, 0xb3, 0x0c, 0xfd, 0x01, 0x22, 0x81, 0xfd, 0x69,
-  0xe0, 0x47, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94,
-  0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4, 0xc8, 0x06, 0x3a, 0xdc,
-  0x10, 0x84, 0x6c, 0x00, 0x06, 0xb3, 0x0c, 0xfe, 0xf1, 0x1f, 0x81, 0x0d,
-  0xa7, 0x1a, 0xc0, 0x67, 0x96, 0x80, 0x44, 0x8c, 0x54, 0x03, 0x22, 0x3e,
-  0xb3, 0x04, 0x24, 0x32, 0x1c, 0x81, 0x4a, 0xa5, 0x1a, 0x08, 0xdf, 0x2c,
-  0x43, 0x88, 0x90, 0x48, 0x60, 0xa9, 0x64, 0xaa, 0x41, 0x7c, 0x2c, 0x70,
-  0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82,
-  0xf8, 0x14, 0xe1, 0xb2, 0x81, 0x0e, 0x37, 0x04, 0x2c, 0x1b, 0x80, 0xc1,
-  0x2c, 0x83, 0x88, 0x8c, 0x48, 0x60, 0xae, 0x1a, 0x0c, 0xf1, 0x99, 0x25,
-  0x20, 0x11, 0x23, 0x66, 0x35, 0x80, 0xcf, 0x2c, 0x01, 0x89, 0x0c, 0xb4,
-  0x3c, 0x9a, 0x7f, 0x60, 0xff, 0x41, 0x88, 0x88, 0x30, 0x22, 0x66, 0x19,
-  0x80, 0xc8, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x6d, 0x4f, 0xdd, 0x9f, 0x06,
-  0xc3, 0x1c, 0x6d, 0x06, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18,
-  0x1c, 0x00, 0x08, 0x82, 0xc1, 0xc7, 0xb3, 0x01, 0xcb, 0x06, 0xeb, 0x1a,
-  0xe4, 0x6c, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30,
-  0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0xc0, 0x8d, 0x6d, 0x30, 0xb3, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0xc0, 0x91, 0x6d, 0x40, 0xb3, 0x41, 0x42, 0x04, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0x95, 0x6d, 0x50, 0xb3, 0x41, 0x42,
-  0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xb4, 0x6d, 0x40, 0xb3,
-  0x41, 0xbd, 0x06, 0x01, 0xd8, 0x06, 0x25, 0x1b, 0x88, 0x6d, 0x30, 0x9a,
-  0x10, 0x00, 0x17, 0x3c, 0x36, 0x4b, 0x50, 0x22, 0x03, 0x2d, 0x8f, 0x69,
-  0xc0, 0x07, 0x8b, 0x0b, 0xef, 0xc1, 0x12, 0xf2, 0x21, 0x90, 0x08, 0x8b,
-  0x0b, 0xf3, 0x31, 0xcb, 0x60, 0x22, 0x28, 0x52, 0x4e, 0xc3, 0x11, 0xea,
-  0x74, 0xb2, 0xc1, 0xf0, 0xdd, 0x3a, 0x0d, 0x33, 0xdc, 0x10, 0xc8, 0x6b,
-  0x40, 0x06, 0x35, 0x04, 0x3a, 0x1c, 0xc1, 0x4e, 0x2b, 0x1b, 0x0c, 0x5f,
-  0x05, 0x82, 0x9e, 0x3b, 0x0d, 0x33, 0xdc, 0x10, 0xd4, 0x6b, 0x40, 0x06,
-  0x15, 0x0c, 0x3a, 0xcb, 0x70, 0x22, 0x3c, 0x12, 0x5c, 0xae, 0x06, 0xc3,
-  0x9c, 0x7b, 0x06, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x47,
-  0xb7, 0x01, 0xd9, 0x06, 0x23, 0x1b, 0xc4, 0x6d, 0x30, 0x9a, 0x10, 0x00,
-  0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0xc4,
-  0x21, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xed, 0x6d, 0xb0, 0xb6,
-  0xc1, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xf1, 0x6d,
-  0xc0, 0xb6, 0x01, 0x43, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0,
-  0xf5, 0x6d, 0xd0, 0xb6, 0x81, 0x44, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82,
-  0x60, 0xb0, 0x94, 0x6e, 0xc0, 0xb6, 0x41, 0xcb, 0x06, 0x01, 0xde, 0x06,
-  0x3d, 0x1b, 0xe8, 0x6d, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x36, 0x4b,
-  0xc0, 0x23, 0xc3, 0x0d, 0xfd, 0xe4, 0xb7, 0x01, 0x18, 0xcc, 0x32, 0xa4,
-  0x88, 0x8a, 0x04, 0x85, 0xb2, 0xc1, 0xdb, 0x06, 0x70, 0xc1, 0x53, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0x9d, 0x6e, 0x00, 0xb7, 0x01, 0x49,
-  0x85, 0x6d, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x14, 0xea, 0x06,
-  0x70, 0x1b, 0x04, 0xc2, 0x05, 0xc3, 0xd4, 0xca, 0x06, 0x74, 0x1b, 0xc0,
-  0x05, 0x4f, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0xc5, 0xba, 0x41,
-  0xdd, 0x06, 0x27, 0x65, 0xb6, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
-  0x50, 0xad, 0x1b, 0xd4, 0x6d, 0x10, 0x08, 0x17, 0x0c, 0x73, 0xc1, 0x53,
-  0x77, 0x3c, 0x75, 0xf4, 0x1a, 0x0c, 0x73, 0x29, 0x1a, 0x0c, 0x73, 0xc4,
-  0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x5f, 0xec,
-  0x06, 0xa1, 0x1b, 0x80, 0x6d, 0xe0, 0xba, 0xc1, 0x68, 0x42, 0x00, 0x8c,
-  0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x87, 0xbb, 0x01, 0xea, 0x06,
-  0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x97, 0xbb, 0x41,
-  0xea, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xa7,
-  0xbb, 0x81, 0xea, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82,
-  0xc1, 0x22, 0xbe, 0x41, 0xea, 0x06, 0x6a, 0x1b, 0x04, 0xb5, 0x1b, 0xe8,
-  0x6d, 0x70, 0xbb, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd8, 0x2c, 0x01,
-  0x8f, 0x0c, 0x37, 0xe8, 0x94, 0xee, 0x06, 0x60, 0x30, 0xcb, 0xb0, 0x22,
-  0x3c, 0x12, 0x18, 0xcf, 0x06, 0x3e, 0x1b, 0xc4, 0x67, 0x38, 0x02, 0xac,
-  0x7e, 0x36, 0x20, 0xbe, 0x59, 0x06, 0x16, 0x79, 0x91, 0xc0, 0xc0, 0x36,
-  0x08, 0xab, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca,
-  0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x82, 0x7c, 0x03, 0x1d, 0x6e,
-  0x08, 0xc4, 0x37, 0x00, 0x83, 0x59, 0x86, 0x16, 0x71, 0x91, 0xc0, 0x06,
-  0xb4, 0x0d, 0xe0, 0x33, 0x4b, 0x30, 0x23, 0x76, 0xb6, 0x01, 0x11, 0x9f,
-  0x59, 0x82, 0x19, 0x19, 0x8e, 0x58, 0x2b, 0xb4, 0x0d, 0x84, 0x6f, 0x96,
-  0x01, 0x46, 0x66, 0x24, 0x30, 0xb6, 0x4a, 0xdb, 0x20, 0x3e, 0x16, 0x38,
-  0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41,
-  0x7c, 0x8a, 0x78, 0xdf, 0x40, 0x87, 0x1b, 0x82, 0xf6, 0x0d, 0xc0, 0x60,
-  0x96, 0x21, 0x46, 0x64, 0x24, 0xb0, 0xb8, 0x0d, 0x86, 0xf8, 0xcc, 0x12,
-  0xcc, 0x88, 0x11, 0x74, 0x1b, 0xc0, 0x67, 0x96, 0x60, 0x46, 0x06, 0x5a,
-  0x1e, 0xad, 0x45, 0x30, 0x17, 0x21, 0x62, 0x44, 0x90, 0x11, 0x76, 0x0d,
-  0x5e, 0xe4, 0x82, 0x61, 0x6c, 0x6e, 0x83, 0xbb, 0x0d, 0xe2, 0x33, 0x1c,
-  0x71, 0x57, 0x78, 0x1b, 0x10, 0xdf, 0x2c, 0x03, 0x8d, 0xdc, 0x48, 0x60,
-  0x79, 0x1b, 0xe0, 0x55, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17,
-  0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xd1, 0xbf, 0x81,
-  0x0e, 0x37, 0x04, 0xfb, 0x1b, 0x80, 0xc1, 0x2c, 0x43, 0x8d, 0xd8, 0x48,
-  0x60, 0x43, 0xe8, 0x06, 0xf0, 0x99, 0x25, 0xd8, 0x11, 0xf3, 0xdb, 0x80,
-  0x88, 0xcf, 0x2c, 0xc1, 0x8e, 0x0c, 0x47, 0x88, 0xd6, 0xdf, 0x06, 0xc2,
-  0x37, 0xcb, 0x80, 0x23, 0x3b, 0x12, 0xd8, 0x68, 0x81, 0x6e, 0x10, 0x1f,
-  0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7,
-  0x8a, 0x20, 0x3e, 0x45, 0xa0, 0x70, 0xa0, 0xc3, 0x0d, 0x81, 0x09, 0x07,
-  0x60, 0x30, 0xcb, 0x90, 0x23, 0x3a, 0x12, 0x18, 0xea, 0x06, 0x43, 0x7c,
-  0x66, 0x09, 0x76, 0xc4, 0x88, 0xd6, 0x0d, 0xe0, 0x33, 0x4b, 0xb0, 0x23,
-  0x03, 0x2d, 0x8f, 0x56, 0x23, 0x98, 0x8d, 0x10, 0x39, 0x22, 0xe8, 0x88,
-  0xce, 0x06, 0x37, 0x72, 0xc1, 0x30, 0x17, 0x3c, 0x75, 0xdb, 0x53, 0x97,
-  0xb7, 0xc1, 0x30, 0xe7, 0xaa, 0xc1, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30,
-  0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xf0, 0xd9, 0x70, 0x60, 0xc2, 0x41,
-  0xf9, 0x06, 0x33, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c,
-  0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x70, 0x3d, 0x1c, 0xb4, 0x70, 0x90, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x3e, 0x1c, 0xb8, 0x70, 0x90, 0x10,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x3f, 0x1c, 0xbc, 0x70,
-  0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x67, 0x1c,
-  0xb8, 0x70, 0xf0, 0xbe, 0x41, 0xa0, 0xc3, 0xc1, 0xff, 0x06, 0x3c, 0x1c,
-  0x8c, 0x26, 0x04, 0xc0, 0x05, 0x8f, 0xcd, 0x12, 0xf0, 0xc8, 0x40, 0xcb,
-  0x63, 0x1a, 0x27, 0x02, 0x82, 0x83, 0x89, 0xb0, 0x44, 0x8a, 0x08, 0x3b,
-  0x02, 0x82, 0x83, 0x8a, 0xcc, 0x32, 0xf4, 0xc8, 0x8f, 0xfc, 0xd6, 0x70,
-  0x84, 0xfa, 0x84, 0x70, 0x30, 0x7c, 0xb7, 0x3e, 0xc3, 0x0c, 0x37, 0x04,
-  0xec, 0x1b, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0x98, 0x57, 0x09, 0x07,
-  0xc3, 0x57, 0x81, 0xa0, 0x87, 0x5e, 0xc3, 0x0c, 0x37, 0x04, 0xef, 0x1b,
-  0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xf8, 0xc8, 0x9c, 0x04, 0x37, 0xbb,
-  0xc1, 0x30, 0x87, 0xae, 0xc1, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0xf0, 0xb9, 0x71, 0xe0, 0xc3, 0x41, 0xff, 0x06, 0x6b, 0x1c, 0x8c, 0x26,
-  0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31,
-  0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x75, 0x1c,
-  0x94, 0x71, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70,
-  0x76, 0x1c, 0x98, 0x71, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x70, 0x77, 0x1c, 0x9c, 0x71, 0x20, 0x11, 0xc1, 0x88, 0x81, 0x02,
-  0x80, 0x20, 0x18, 0x2c, 0x7f, 0x1c, 0x98, 0x71, 0x70, 0xc2, 0x41, 0x20,
-  0xc7, 0xc1, 0x0d, 0x07, 0x74, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x8f,
-  0xcd, 0x12, 0xcc, 0xc9, 0x70, 0xc3, 0x7d, 0xe1, 0x71, 0x00, 0x06, 0xb3,
-  0x0c, 0x60, 0x12, 0x26, 0x41, 0x89, 0x70, 0x90, 0xc6, 0x01, 0x5c, 0xf0,
-  0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xa1, 0x1c, 0xa8, 0x71,
-  0x40, 0x42, 0x3b, 0x1c, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x25,
-  0xca, 0x81, 0x1a, 0x07, 0x81, 0x70, 0xc1, 0x30, 0x55, 0xc2, 0x81, 0x1b,
-  0x07, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0x99,
-  0x72, 0xf0, 0xc6, 0x41, 0x88, 0x81, 0x71, 0x30, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0xd4, 0x29, 0x07, 0x6f, 0x1c, 0x04, 0xc2, 0x05, 0xc3, 0x5c,
-  0xf0, 0xd4, 0x1d, 0x4f, 0x9d, 0xfb, 0x06, 0xc3, 0xdc, 0xc8, 0x06, 0xc3,
-  0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1,
-  0xb7, 0xca, 0xc1, 0x1e, 0x07, 0x3a, 0x1c, 0xa0, 0x72, 0x30, 0x9a, 0x10,
-  0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50,
-  0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xc9, 0x72, 0x20,
-  0xca, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xcd,
-  0x72, 0x30, 0xca, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0xc0, 0xd1, 0x72, 0x40, 0xca, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00,
-  0x82, 0x60, 0xb0, 0xf0, 0x72, 0x30, 0xca, 0x01, 0x19, 0x07, 0xc1, 0x2b,
-  0x07, 0x74, 0x1c, 0xc4, 0x72, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x36,
-  0x4b, 0x30, 0x27, 0xc3, 0x0d, 0x34, 0x46, 0xcb, 0x01, 0x18, 0xcc, 0x32,
-  0x88, 0xc9, 0x9c, 0x04, 0x66, 0xc3, 0x01, 0x0e, 0x07, 0xf1, 0x19, 0x8e,
-  0x00, 0xa3, 0x1c, 0x0e, 0x88, 0x6f, 0x96, 0x61, 0x4c, 0xcc, 0x24, 0x30,
-  0x1d, 0x0e, 0xc2, 0x28, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b,
-  0x9e, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xf0, 0xe5, 0x40,
-  0x87, 0x1b, 0x02, 0x5e, 0x0e, 0xc0, 0x60, 0x96, 0x81, 0x4c, 0xca, 0x24,
-  0xb0, 0x41, 0x8c, 0x03, 0xf8, 0xcc, 0x12, 0xa8, 0x89, 0x85, 0x71, 0x40,
-  0xc4, 0x67, 0x96, 0x40, 0x4d, 0x86, 0x23, 0xd6, 0x48, 0x8c, 0x03, 0xe1,
-  0x9b, 0x65, 0x38, 0x13, 0x35, 0x09, 0x8c, 0x8d, 0xc6, 0x38, 0x88, 0x8f,
-  0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63,
-  0x45, 0x10, 0x9f, 0x22, 0xd2, 0x39, 0xd0, 0xe1, 0x86, 0xe0, 0x9c, 0x03,
-  0x30, 0x98, 0x65, 0x40, 0x93, 0x34, 0x09, 0x6c, 0x8d, 0x83, 0x21, 0x3e,
-  0xb3, 0x04, 0x6a, 0x62, 0x84, 0x1b, 0x07, 0xf0, 0x99, 0x25, 0x50, 0x93,
-  0x81, 0x96, 0x47, 0x23, 0x13, 0xac, 0x4c, 0x08, 0x34, 0x11, 0xd2, 0x84,
-  0x1d, 0x03, 0x33, 0xb9, 0x60, 0x18, 0x6b, 0xe3, 0x20, 0x8e, 0x83, 0xf8,
-  0x0c, 0x47, 0xc4, 0x99, 0x1c, 0x07, 0xc4, 0x37, 0xcb, 0xb0, 0x26, 0x6e,
-  0x12, 0xd8, 0x1c, 0x07, 0x72, 0x16, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30,
-  0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xdc,
-  0x73, 0xa0, 0xc3, 0x0d, 0x41, 0x3d, 0x07, 0x60, 0x30, 0xcb, 0xc0, 0x26,
-  0x6d, 0x12, 0xd8, 0xb0, 0xc7, 0x01, 0x7c, 0x66, 0x09, 0xe4, 0xc4, 0xf0,
-  0x38, 0x20, 0xe2, 0x33, 0x4b, 0x20, 0x27, 0xc3, 0x11, 0x7c, 0x96, 0xc7,
-  0x81, 0xf0, 0xcd, 0x32, 0xbc, 0x89, 0x9c, 0x04, 0xd6, 0x67, 0x7a, 0x1c,
-  0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x44,
-  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x22, 0x1d, 0xe8, 0x70, 0x43, 0x00,
-  0xd2, 0x01, 0x18, 0xcc, 0x32, 0xc0, 0x49, 0x9c, 0x04, 0x26, 0xca, 0xc1,
-  0x10, 0x9f, 0x59, 0x02, 0x39, 0x31, 0xe2, 0x94, 0x03, 0xf8, 0xcc, 0x12,
-  0xc8, 0xc9, 0x40, 0xcb, 0xa3, 0xb1, 0x09, 0xd6, 0x26, 0x04, 0x9c, 0x08,
-  0x71, 0xe2, 0xd2, 0x81, 0x9b, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf6,
-  0xd4, 0xcd, 0x71, 0x30, 0xcc, 0xa1, 0x6e, 0x30, 0xcc, 0x11, 0xc3, 0x1c,
-  0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x7c, 0x30, 0x1d, 0x80,
-  0x74, 0xf0, 0xcb, 0x41, 0x4b, 0x07, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20,
-  0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0xdc, 0x4d, 0x07, 0x27, 0x1d, 0x24, 0x44,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x1c, 0x4e, 0x07, 0x28, 0x1d,
-  0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5c, 0x4e, 0x07,
-  0x29, 0x1d, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x4b,
-  0x58, 0x07, 0x28, 0x1d, 0xa4, 0x73, 0x10, 0xd0, 0x74, 0x90, 0xcf, 0x81,
-  0x4d, 0x07, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3, 0x04, 0x73, 0x32,
-  0xd0, 0xf2, 0x98, 0x86, 0x8f, 0xd0, 0xe7, 0xd0, 0x23, 0x2c, 0x01, 0x26,
-  0x82, 0x9c, 0xd0, 0xe7, 0x10, 0x26, 0xb3, 0x0c, 0x74, 0x62, 0x27, 0xb9,
-  0x36, 0x1c, 0x91, 0x3e, 0xfb, 0x1c, 0x0c, 0xdf, 0xa9, 0xcf, 0x30, 0xc3,
-  0x0d, 0x81, 0x39, 0x07, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0xe0, 0xf6,
-  0xcf, 0xc1, 0xf0, 0x55, 0x20, 0xe8, 0x89, 0xdb, 0x30, 0xc3, 0x0d, 0x41,
-  0x3a, 0x07, 0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c, 0x75, 0xa2, 0x2a, 0xc1,
-  0xb5, 0x72, 0x30, 0xcc, 0x89, 0x6f, 0x30, 0xcc, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x7c, 0x68, 0x1d, 0xe0, 0x74, 0x70, 0xcf, 0x41, 0x59, 0x07,
-  0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a,
-  0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdc,
-  0x5b, 0x07, 0x3f, 0x1d, 0x1c, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x1c, 0x5c, 0x07, 0x60, 0x1d, 0x30, 0x44, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x5c, 0x5c, 0x07, 0x61, 0x1d, 0x48, 0x44, 0x30, 0x62,
-  0xa0, 0x00, 0x20, 0x08, 0x06, 0x4b, 0x5e, 0x07, 0x60, 0x1d, 0x84, 0x74,
-  0x10, 0xb0, 0x75, 0x10, 0xd3, 0x81, 0x5b, 0x07, 0xa3, 0x09, 0x01, 0x70,
-  0xc1, 0x63, 0xb3, 0x04, 0xaa, 0x32, 0xdc, 0x10, 0x6f, 0x72, 0x1d, 0x80,
-  0xc1, 0x2c, 0xc3, 0x9d, 0xe0, 0x49, 0x50, 0xfc, 0x1c, 0x8c, 0x75, 0x00,
-  0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xd4, 0x5e, 0x07,
-  0x64, 0x1d, 0x8c, 0x50, 0x4d, 0x07, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0x40, 0xf1, 0x75, 0x40, 0xd6, 0x41, 0x20, 0x5c, 0x30, 0x4c, 0xfd, 0x73,
-  0x80, 0xd6, 0x01, 0x5c, 0xf0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
-  0x50, 0xa0, 0x1d, 0xa4, 0x75, 0xb0, 0x6f, 0x3a, 0x1d, 0x8c, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0x01, 0x15, 0xda, 0x41, 0x5a, 0x07, 0x81, 0x70, 0xc1,
-  0x30, 0x17, 0x3c, 0x75, 0xc7, 0x53, 0x87, 0xce, 0xc1, 0x30, 0xd7, 0xbf,
-  0xc1, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82,
-  0x60, 0xf0, 0x95, 0x76, 0x50, 0xd7, 0x01, 0x4d, 0x07, 0xa2, 0x1d, 0x8c,
-  0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02,
-  0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0xac,
-  0x1d, 0xf0, 0x75, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x70, 0xad, 0x1d, 0xf4, 0x75, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x70, 0xae, 0x1d, 0xf8, 0x75, 0x90, 0x10, 0xc1, 0x88, 0x81,
-  0x02, 0x80, 0x20, 0x18, 0x2c, 0xb6, 0x1d, 0xf4, 0x75, 0xe0, 0xd3, 0x41,
-  0x90, 0xda, 0x81, 0x5b, 0x07, 0xab, 0x1d, 0x8c, 0x26, 0x04, 0xc0, 0x05,
-  0x8f, 0xcd, 0x12, 0xa8, 0xca, 0x70, 0x83, 0xcb, 0xb9, 0x76, 0x00, 0x06,
-  0xb3, 0x0c, 0x79, 0xa2, 0x2a, 0x81, 0xc1, 0x74, 0x20, 0xd3, 0x41, 0x7c,
-  0x86, 0x23, 0x7e, 0x68, 0xa6, 0x03, 0xe2, 0x9b, 0x65, 0xd0, 0x93, 0x3e,
-  0x09, 0x8c, 0xa6, 0x03, 0x30, 0x8a, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18,
-  0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x70,
-  0x3b, 0xd0, 0xe1, 0x86, 0xc0, 0xb6, 0x03, 0x30, 0x98, 0x65, 0xd8, 0x13,
-  0x3e, 0x09, 0x6c, 0xe0, 0xe9, 0x00, 0x3e, 0xb3, 0x04, 0xa1, 0x62, 0x3b,
-  0x1d, 0x10, 0xf1, 0x99, 0x25, 0x08, 0x95, 0xe1, 0x08, 0x35, 0xe2, 0xe9,
-  0x40, 0xf8, 0x66, 0x19, 0xfc, 0x24, 0x54, 0x02, 0x5b, 0xa3, 0x9e, 0x0e,
+  0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x37, 0xa3, 0xc2, 0x88, 0x0a, 0x07,
+  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x47, 0xa3, 0x02, 0x89,
+  0x0a, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x57, 0xa3,
+  0x42, 0x89, 0x0a, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1,
+  0xd2, 0xa3, 0x02, 0x89, 0x0a, 0xe5, 0x29, 0x04, 0x30, 0x2a, 0xd4, 0xa7,
+  0x20, 0xa3, 0xc2, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd8, 0x2c, 0x41, 0xdf,
+  0x0c, 0x37, 0xd4, 0x6e, 0x60, 0xa3, 0x02, 0x18, 0xcc, 0x32, 0xa8, 0xcd,
+  0xda, 0x04, 0x05, 0x9e, 0xc2, 0x89, 0x0a, 0x70, 0xc1, 0x53, 0x23, 0x06,
+  0x07, 0x00, 0x82, 0x60, 0x40, 0xfd, 0xa8, 0x80, 0xa2, 0xc2, 0xee, 0x06,
+  0xf9, 0x29, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x05, 0xa6, 0x02,
+  0x8a, 0x0a, 0x81, 0x70, 0xc1, 0x30, 0x35, 0x9e, 0x02, 0x8b, 0x0a, 0x70,
+  0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0x91, 0xa9, 0xd0,
+  0xa2, 0x02, 0x18, 0xf8, 0xa7, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
+  0x54, 0x99, 0x0a, 0x2d, 0x2a, 0x04, 0xc2, 0x05, 0xc3, 0x5c, 0xf0, 0xd4,
+  0x1d, 0x4f, 0x1d, 0x6b, 0x0a, 0xc3, 0x5c, 0x38, 0x0a, 0xc3, 0x1c, 0x31,
+  0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x97, 0xa6,
+  0x42, 0x8e, 0x0a, 0xf8, 0x29, 0x98, 0xa9, 0x30, 0x9a, 0x10, 0x00, 0xa3,
+  0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22,
+  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xc1, 0xa9, 0x00, 0xa6, 0x42,
+  0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xc5, 0xa9, 0x10,
+  0xa6, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xc9,
+  0xa9, 0x20, 0xa6, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60,
+  0xb0, 0xe8, 0xa9, 0x10, 0xa6, 0x82, 0x88, 0x0a, 0x41, 0x9b, 0x0a, 0x32,
+  0x2a, 0xbc, 0xa9, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x36, 0x4b, 0xd0,
+  0x37, 0xc3, 0x0d, 0xf2, 0x1b, 0xc8, 0xa9, 0x00, 0x06, 0xb3, 0x0c, 0x6c,
+  0xd3, 0x37, 0x81, 0xd1, 0xa7, 0x60, 0x9f, 0x42, 0x7c, 0x86, 0x23, 0xee,
+  0x37, 0xb8, 0x4f, 0x81, 0xf8, 0x66, 0x19, 0xda, 0x06, 0x6e, 0x02, 0xc3,
+  0x4f, 0x01, 0x7f, 0x83, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e,
+  0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x82, 0x4f, 0x05,
+  0x1d, 0x6e, 0x08, 0xf4, 0x54, 0x00, 0x83, 0x59, 0x06, 0xb7, 0x79, 0x9b,
+  0xc0, 0x06, 0x10, 0x15, 0xe0, 0x33, 0x4b, 0x40, 0x37, 0xf6, 0x9f, 0x02,
+  0x11, 0x9f, 0x59, 0x02, 0xba, 0x19, 0x8e, 0x10, 0xe1, 0x00, 0x44, 0x05,
+  0xe1, 0x9b, 0x65, 0x88, 0x1b, 0xba, 0x09, 0x6c, 0x84, 0x83, 0x10, 0x15,
   0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22,
-  0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0xf1, 0x0e, 0x74, 0xb8, 0x21, 0x08,
-  0xef, 0x00, 0x0c, 0x66, 0x19, 0xfe, 0x04, 0x54, 0x02, 0x2b, 0xeb, 0x60,
-  0x88, 0xcf, 0x2c, 0x41, 0xa8, 0x18, 0x81, 0xd6, 0x01, 0x7c, 0x66, 0x09,
-  0x42, 0x65, 0xa0, 0xe5, 0xd1, 0xf6, 0x04, 0xe3, 0x13, 0xe2, 0x4f, 0x04,
-  0x50, 0x51, 0xc7, 0xa0, 0x4f, 0x2e, 0x18, 0xc6, 0xce, 0x3a, 0x58, 0xeb,
-  0x20, 0x3e, 0xc3, 0x11, 0x6b, 0xc7, 0xd6, 0x01, 0xf1, 0xcd, 0x32, 0x88,
-  0x4a, 0xa9, 0x04, 0xd6, 0xd6, 0x01, 0xdb, 0xc5, 0xc7, 0x82, 0x81, 0x3e,
+  0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x53, 0x15, 0x74, 0xb8, 0x21, 0x28,
+  0x55, 0x01, 0x0c, 0x66, 0x19, 0xe4, 0x66, 0x6e, 0x02, 0x4b, 0x51, 0x61,
+  0x88, 0xcf, 0x2c, 0x01, 0xdd, 0x18, 0xc1, 0xa2, 0x02, 0x7c, 0x66, 0x09,
+  0xe8, 0x66, 0xa0, 0xe5, 0xd1, 0xdc, 0x06, 0x7b, 0x1b, 0x42, 0x6e, 0x84,
+  0xb9, 0xd1, 0xc1, 0x01, 0x6e, 0x2e, 0x18, 0xc6, 0x56, 0x54, 0x78, 0x51,
+  0x21, 0x3e, 0xc3, 0x11, 0xb0, 0x00, 0xa3, 0x02, 0xf1, 0xcd, 0x32, 0xd4,
+  0x0d, 0xde, 0x04, 0x16, 0xa3, 0x42, 0x2c, 0xc4, 0xc7, 0x82, 0x81, 0x3e,
   0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f,
-  0x11, 0xf1, 0x1d, 0xe8, 0x70, 0x43, 0xf0, 0xde, 0x01, 0x18, 0xcc, 0x32,
-  0x8c, 0x0a, 0xa9, 0x04, 0x36, 0xd4, 0x75, 0x00, 0x9f, 0x59, 0x82, 0x54,
-  0x31, 0xb9, 0x0e, 0x88, 0xf8, 0xcc, 0x12, 0xa4, 0xca, 0x70, 0x84, 0xdd,
-  0xcd, 0x75, 0x20, 0x7c, 0xb3, 0x0c, 0xa6, 0x92, 0x2a, 0x81, 0xdd, 0x1d,
-  0x5d, 0x07, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94,
-  0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0x7f, 0x07, 0x3a, 0xdc,
-  0x10, 0xe8, 0x77, 0x00, 0x06, 0xb3, 0x0c, 0xa7, 0x82, 0x2a, 0x81, 0xf1,
-  0x75, 0x30, 0xc4, 0x67, 0x96, 0x20, 0x55, 0x8c, 0x08, 0xed, 0x00, 0x3e,
-  0xb3, 0x04, 0xa9, 0x32, 0xd0, 0xf2, 0x68, 0xa3, 0x82, 0x91, 0x0a, 0x71,
-  0x2a, 0x02, 0xaa, 0x88, 0x7c, 0x50, 0x2a, 0x17, 0x0c, 0x73, 0xc1, 0x53,
-  0xb7, 0x3d, 0x75, 0x6d, 0x1d, 0x0c, 0x73, 0xa2, 0x1c, 0x0c, 0x73, 0xc4,
-  0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x9f, 0x8a,
-  0x07, 0xfa, 0x1d, 0xe4, 0x76, 0x70, 0xe2, 0xc1, 0x68, 0x42, 0x00, 0x8c,
-  0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x17, 0xe3, 0x41, 0x88, 0x07,
-  0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x27, 0xe3, 0x81,
-  0x88, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x37,
-  0xe3, 0xc1, 0x88, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82,
-  0xc1, 0xb2, 0xe3, 0x81, 0x88, 0x07, 0xe3, 0x1d, 0x04, 0x2e, 0x1e, 0xcc,
-  0x77, 0x00, 0xe3, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd8, 0x2c, 0x81,
-  0xaa, 0x0c, 0xb4, 0x3c, 0xa6, 0x51, 0x27, 0xe8, 0x3b, 0xd0, 0x09, 0x4b,
-  0xdc, 0x89, 0x90, 0x2a, 0xe8, 0x3b, 0xe0, 0x89, 0xd9, 0x1e, 0x7d, 0x07,
-  0xf0, 0x99, 0x65, 0x58, 0x95, 0x56, 0xa1, 0xbd, 0xe1, 0x08, 0xdc, 0xb3,
-  0xef, 0x60, 0xf8, 0x2e, 0xf7, 0x86, 0x19, 0x6e, 0x08, 0xc2, 0x3b, 0x20,
-  0x83, 0x1a, 0x02, 0x1d, 0x8e, 0x28, 0xf4, 0x3b, 0x18, 0xbe, 0x0a, 0x04,
-  0xbd, 0x63, 0x98, 0xe1, 0x86, 0x80, 0xbc, 0x03, 0x32, 0xa8, 0x60, 0xd0,
-  0x59, 0x06, 0x56, 0x09, 0x97, 0xe0, 0x50, 0x3b, 0x18, 0xe6, 0x7a, 0x39,
-  0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbe, 0x31, 0x0f, 0x66,
-  0x3c, 0x90, 0xef, 0x00, 0xcc, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10,
-  0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31,
-  0x40, 0x00, 0x10, 0x04, 0x03, 0x4e, 0xcd, 0x03, 0x1d, 0x0f, 0x0e, 0x22,
-  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x6e, 0xcd, 0x83, 0x1d, 0x0f,
-  0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x8e, 0xcd, 0x03,
-  0x1e, 0x0f, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x85,
-  0xce, 0x83, 0x1d, 0x0f, 0xf8, 0x3b, 0x08, 0xce, 0x3c, 0x60, 0xf1, 0x20,
-  0xcd, 0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x82, 0x70, 0x19,
-  0x6e, 0x60, 0xbf, 0x36, 0x0f, 0xc0, 0x60, 0x96, 0xc1, 0x55, 0x5e, 0x25,
-  0xa8, 0xfb, 0x0e, 0x7c, 0x3c, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00,
-  0x10, 0x04, 0x03, 0xca, 0xce, 0x83, 0x1f, 0x0f, 0xe4, 0x0f, 0xc6, 0x83,
-  0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xee, 0x3c, 0xf8, 0xf1, 0x20,
-  0x10, 0x2e, 0x18, 0xa6, 0xf4, 0x3b, 0x18, 0xf3, 0x00, 0x2e, 0x78, 0x6a,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0x3d, 0x0f, 0xc8, 0x3c, 0x00,
-  0x83, 0x1a, 0x0f, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0xe2, 0xf3,
-  0x80, 0xcc, 0x83, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x9e, 0xba, 0xe3, 0xa9,
-  0x1b, 0xef, 0x60, 0x98, 0xc3, 0xe7, 0x60, 0x98, 0x23, 0x86, 0x39, 0x62,
-  0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf8, 0x40, 0x3d, 0x80, 0xf3,
-  0xe0, 0xc5, 0x83, 0x3e, 0x0f, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
-  0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0xb8, 0x53, 0x0f, 0xee, 0x3c, 0x48, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x38, 0x54, 0x0f, 0xf0, 0x3c, 0x48,
-  0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0x54, 0x0f, 0xf2,
-  0x3c, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x96, 0x58,
-  0x0f, 0xf0, 0x3c, 0xc8, 0xf1, 0x20, 0x20, 0xf5, 0x20, 0xcd, 0x03, 0x53,
-  0x0f, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xc7, 0x66, 0x09, 0xc2, 0x65, 0xb8,
-  0x21, 0x05, 0x83, 0x54, 0x0f, 0xc0, 0x60, 0x96, 0x01, 0x56, 0xc2, 0x25,
-  0xb0, 0x15, 0x0f, 0x5a, 0x3c, 0x88, 0xcf, 0x70, 0x84, 0x0b, 0x06, 0x2e,
-  0x1e, 0x10, 0xdf, 0x2c, 0x43, 0xac, 0xd0, 0x4a, 0x60, 0x2f, 0x1e, 0xbc,
-  0x60, 0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59,
-  0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xcc, 0x7a, 0xa0, 0xc3, 0x0d,
-  0x41, 0xac, 0x07, 0x60, 0x30, 0xcb, 0x20, 0x2b, 0xb3, 0x12, 0xd8, 0x70,
-  0xe3, 0x01, 0x7c, 0x66, 0x09, 0x70, 0xc5, 0x6c, 0x3c, 0x20, 0xe2, 0x33,
-  0x4b, 0x80, 0x2b, 0xc3, 0x11, 0x39, 0x18, 0xdc, 0x78, 0x20, 0x7c, 0xb3,
-  0x0c, 0xb5, 0x82, 0x2b, 0x81, 0xe9, 0x60, 0x80, 0xe3, 0x41, 0x7c, 0x2c,
-  0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b,
-  0x82, 0xf8, 0x14, 0xe1, 0xeb, 0x81, 0x0e, 0x37, 0x04, 0xbc, 0x1e, 0x80,
-  0xc1, 0x2c, 0x83, 0xad, 0xdc, 0x4a, 0x60, 0x60, 0x1e, 0x0c, 0xf1, 0x99,
-  0x25, 0xc0, 0x15, 0x23, 0xc6, 0x3c, 0x80, 0xcf, 0x2c, 0x01, 0xae, 0x0c,
-  0xb4, 0x3c, 0x9a, 0xac, 0x60, 0xb3, 0x42, 0xd8, 0x8a, 0x70, 0x2b, 0xa8,
-  0x28, 0xd0, 0xca, 0x05, 0xc3, 0x98, 0x98, 0x07, 0x66, 0x1e, 0xc4, 0x67,
-  0x38, 0x02, 0x16, 0xce, 0x3c, 0x20, 0xbe, 0x59, 0x86, 0x5c, 0xe1, 0x95,
-  0xc0, 0xd0, 0x3c, 0x88, 0x85, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61,
-  0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x82, 0xdd,
-  0x03, 0x1d, 0x6e, 0x08, 0xd4, 0x3d, 0x00, 0x83, 0x59, 0x06, 0x5d, 0xd9,
-  0x95, 0xc0, 0x06, 0x38, 0x0f, 0xe0, 0x33, 0x4b, 0x00, 0x2e, 0xd6, 0xe6,
-  0x01, 0x11, 0x9f, 0x59, 0x02, 0x70, 0x19, 0x8e, 0xd8, 0x05, 0x37, 0x0f,
-  0x84, 0x6f, 0x96, 0xa1, 0x57, 0xc0, 0x25, 0x30, 0x5e, 0x78, 0xf3, 0x20,
-  0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92,
-  0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xb8, 0xf7, 0x40, 0x87, 0x1b, 0x82, 0x7a,
-  0x0f, 0xc0, 0x60, 0x96, 0xc1, 0x57, 0x7e, 0x25, 0xb0, 0x3b, 0x0f, 0x86,
-  0xf8, 0xcc, 0x12, 0x80, 0x8b, 0x11, 0x7c, 0x1e, 0xc0, 0x67, 0x96, 0x00,
-  0x5c, 0x06, 0x5a, 0x1e, 0x4d, 0x57, 0xb0, 0x5d, 0x21, 0x7c, 0x45, 0xf8,
-  0x15, 0xd6, 0xe0, 0x95, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e, 0x3a,
-  0x34, 0x0f, 0x86, 0xb9, 0xbe, 0x0e, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86,
-  0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0xaf, 0xe4, 0x83, 0x7a, 0x0f,
-  0x68, 0x3d, 0x10, 0xf9, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60,
-  0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10,
-  0x00, 0x04, 0xc1, 0x80, 0x63, 0xf9, 0x80, 0xdf, 0x83, 0x84, 0x08, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x6b, 0xf9, 0xa0, 0xdf, 0x83, 0x84,
-  0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x73, 0xf9, 0xc0, 0xdf,
-  0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0xb1, 0xf9,
-  0xa0, 0xdf, 0x03, 0x5f, 0x0f, 0x82, 0x94, 0x0f, 0xdc, 0x3d, 0x58, 0xf9,
-  0x60, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6c, 0x96, 0x20, 0x5c, 0x06, 0x5a,
-  0x1e, 0xd3, 0x60, 0x15, 0xdf, 0x1e, 0x56, 0x85, 0x25, 0x5c, 0x45, 0x00,
-  0x17, 0xdf, 0x1e, 0x5e, 0x65, 0x96, 0x41, 0x5c, 0xc8, 0xc5, 0x15, 0x83,
-  0xe1, 0x88, 0x59, 0x0c, 0xe0, 0x3d, 0x18, 0xbe, 0xa3, 0xc5, 0x60, 0x98,
-  0xe1, 0x86, 0x60, 0xd7, 0x03, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x7f,
-  0xa0, 0xf7, 0x60, 0xf8, 0x2a, 0x10, 0xf4, 0x42, 0x62, 0x98, 0xe1, 0x86,
-  0xc0, 0xd7, 0x03, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x86, 0x71, 0xc1, 0x97,
-  0xe0, 0x44, 0x3d, 0x18, 0xe6, 0x6e, 0x3b, 0x18, 0x66, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0xbe, 0x9e, 0x0f, 0x5a, 0x3e, 0x60, 0xf7, 0x40, 0xe7,
-  0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18,
-  0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03,
-  0x8e, 0xec, 0x03, 0x9a, 0x0f, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x03, 0xae, 0xec, 0x83, 0x9a, 0x0f, 0x18, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x03, 0xce, 0xec, 0x03, 0x9b, 0x0f, 0x24, 0x22, 0x18,
-  0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0xc5, 0xed, 0x83, 0x9a, 0x0f, 0xec,
-  0x3d, 0x08, 0xc2, 0x3e, 0x30, 0xf9, 0x60, 0xec, 0x83, 0xd1, 0x84, 0x00,
-  0xb8, 0xe0, 0xb1, 0x59, 0x02, 0x7c, 0x19, 0x6e, 0x30, 0xc7, 0xe0, 0xec,
-  0x03, 0x30, 0x98, 0x65, 0x28, 0x17, 0x73, 0x09, 0x2a, 0xde, 0x03, 0x9c,
-  0x0f, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x82,
-  0xfb, 0x20, 0xe7, 0x83, 0x76, 0x0c, 0x54, 0x3e, 0x18, 0x31, 0x38, 0x00,
-  0x10, 0x04, 0x03, 0x2a, 0xee, 0x83, 0x9c, 0x0f, 0x02, 0xe1, 0x82, 0x61,
-  0x8a, 0xde, 0x83, 0x9e, 0x0f, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00,
-  0x04, 0xc1, 0x80, 0xaa, 0xfb, 0xc0, 0xe7, 0x03, 0x9d, 0x78, 0xf9, 0x60,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0xbb, 0x0f, 0x7c, 0x3e, 0x08,
-  0x84, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0x3b, 0x9e, 0xba, 0x5e, 0x0f, 0x86,
-  0x39, 0xf9, 0x0e, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x83, 0x4f, 0xef, 0x03, 0xb5, 0x0f, 0x52, 0x3e, 0xb8,
-  0xfb, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10,
-  0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
-  0x80, 0x0b, 0xfd, 0x20, 0xee, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00,
-  0x04, 0xc1, 0x80, 0x13, 0xfd, 0x40, 0xee, 0x83, 0x84, 0x08, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0x80, 0x1b, 0xfd, 0x60, 0xee, 0x83, 0x84, 0x08,
-  0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x59, 0xfd, 0x40, 0xee, 0x83,
-  0x99, 0x0f, 0x02, 0xbf, 0x0f, 0xc6, 0x3e, 0x00, 0xfd, 0x60, 0x34, 0x21,
-  0x00, 0x2e, 0x78, 0x6c, 0x96, 0x00, 0x5f, 0x86, 0x1b, 0x46, 0x32, 0x18,
-  0xfd, 0x00, 0x0c, 0x66, 0x19, 0xce, 0x05, 0x5f, 0x02, 0x2b, 0xf9, 0xe0,
-  0xe4, 0x83, 0xf8, 0x0c, 0x47, 0xa4, 0x64, 0x80, 0xf2, 0x01, 0xf1, 0xcd,
-  0x32, 0xa0, 0xcb, 0xba, 0x04, 0x96, 0xf2, 0x81, 0x4a, 0x06, 0xf1, 0xb1,
-  0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac,
-  0x08, 0xe2, 0x53, 0x44, 0xeb, 0x07, 0x3a, 0xdc, 0x10, 0xac, 0x7e, 0x00,
-  0x06, 0xb3, 0x0c, 0xe9, 0xa2, 0x2e, 0x81, 0x0d, 0x31, 0x1f, 0xc0, 0x67,
-  0x96, 0xe0, 0x5d, 0x0c, 0xe6, 0x03, 0x22, 0x3e, 0xb3, 0x04, 0xef, 0x32,
-  0x1c, 0x41, 0x93, 0x41, 0xcc, 0x07, 0xc2, 0x37, 0xcb, 0xc0, 0x2e, 0xef,
-  0x12, 0x58, 0x4d, 0x06, 0x32, 0x1f, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17,
-  0x0c, 0x73, 0xc1, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11,
-  0xb8, 0x1f, 0xe8, 0x70, 0x43, 0x60, 0xfb, 0x01, 0x18, 0xcc, 0x32, 0xb4,
-  0x8b, 0xbb, 0x04, 0xa6, 0xf3, 0xc1, 0x10, 0x9f, 0x59, 0x82, 0x77, 0x31,
-  0xa2, 0xe7, 0x03, 0xf8, 0xcc, 0x12, 0xbc, 0xcb, 0x40, 0xcb, 0xa3, 0xa5,
-  0x0b, 0xa6, 0x2e, 0x44, 0xbb, 0x08, 0xee, 0xe2, 0xa7, 0xc2, 0xba, 0x5c,
-  0x30, 0x8c, 0xf1, 0x7c, 0x00, 0xf6, 0x41, 0x7c, 0x86, 0x23, 0x54, 0x23,
-  0xec, 0x03, 0xe2, 0x9b, 0x65, 0x80, 0x97, 0x79, 0x09, 0x4c, 0xec, 0x83,
-  0xd5, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c,
-  0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xcc, 0x3f, 0xd0, 0xe1, 0x86,
-  0x80, 0xfc, 0x03, 0x30, 0x98, 0x65, 0x88, 0x17, 0x79, 0x09, 0x6c, 0x50,
-  0xfb, 0x00, 0x3e, 0xb3, 0x04, 0xf7, 0x62, 0x67, 0x1f, 0x10, 0xf1, 0x99,
-  0x25, 0xb8, 0x97, 0xe1, 0x88, 0xda, 0x40, 0xfb, 0x40, 0xf8, 0x66, 0x19,
-  0xe8, 0xe5, 0x5e, 0x02, 0xb3, 0x8d, 0xb4, 0x0f, 0xe2, 0x63, 0x81, 0x43,
-  0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4,
-  0xa7, 0x88, 0xf8, 0x0f, 0x74, 0xb8, 0x21, 0x78, 0xff, 0x00, 0x0c, 0x66,
-  0x19, 0xea, 0xc5, 0x5e, 0x02, 0x8b, 0xfb, 0x60, 0x88, 0xcf, 0x2c, 0xc1,
-  0xbd, 0x18, 0x61, 0xf7, 0x01, 0x7c, 0x66, 0x09, 0xee, 0x65, 0xa0, 0xe5,
-  0xd1, 0xe2, 0x05, 0x93, 0x17, 0xa2, 0x5e, 0x04, 0x7b, 0x01, 0x9d, 0x79,
-  0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xba, 0xed, 0xa9, 0x13, 0xfb, 0x60, 0x98,
-  0xbb, 0xf3, 0x60, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03,
-  0x00, 0x41, 0x30, 0xf8, 0xfe, 0x3f, 0x78, 0xff, 0xc0, 0xf5, 0x03, 0xfe,
-  0x0f, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61,
-  0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
-  0x38, 0x13, 0x14, 0xec, 0x3f, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40,
-  0x10, 0x0c, 0xb8, 0x13, 0x14, 0xee, 0x3f, 0x48, 0x88, 0x60, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0x38, 0x14, 0x14, 0xf0, 0x3f, 0x48, 0x88, 0x60,
-  0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x16, 0x18, 0x14, 0xee, 0x3f, 0xc0,
-  0xfd, 0x20, 0x18, 0x41, 0x01, 0xfd, 0x83, 0x12, 0x14, 0x46, 0x13, 0x02,
-  0xe0, 0x82, 0xc7, 0x66, 0x09, 0xf0, 0x65, 0xa0, 0xe5, 0x31, 0x8d, 0x71,
-  0x91, 0xfd, 0x41, 0x5c, 0x58, 0xa2, 0x5c, 0x84, 0x7b, 0x91, 0xfd, 0xc1,
-  0x5c, 0x66, 0x19, 0xf2, 0x65, 0x5f, 0x50, 0x33, 0x18, 0x8e, 0x98, 0x3d,
-  0xf5, 0x0f, 0x86, 0xef, 0x68, 0x6f, 0x98, 0xe1, 0x86, 0xa0, 0xf6, 0x03,
-  0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0xfc, 0x70, 0xff, 0x60, 0xf8, 0x2a,
-  0x10, 0xf4, 0xf6, 0x63, 0x98, 0xe1, 0x86, 0x00, 0xf7, 0x03, 0x32, 0xa8,
-  0x60, 0xd0, 0x59, 0x06, 0x7d, 0x79, 0x99, 0xe0, 0xf8, 0x3e, 0x18, 0xe6,
-  0x62, 0x3d, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbe, 0x1b,
-  0x14, 0x4e, 0x50, 0x30, 0xff, 0x80, 0x06, 0x85, 0xd1, 0x84, 0x00, 0x18,
-  0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e,
-  0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xce, 0x07, 0x05, 0x17, 0x14,
-  0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xee, 0x07, 0x85,
-  0x17, 0x14, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x0e,
-  0x0c, 0x05, 0x18, 0x14, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04,
-  0x83, 0x05, 0x0d, 0x85, 0x17, 0x14, 0xe0, 0x3f, 0x08, 0x76, 0x50, 0x00,
-  0x41, 0xa1, 0x07, 0x85, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x82,
-  0x97, 0x19, 0x6e, 0x00, 0xcf, 0x20, 0x0c, 0x05, 0x30, 0x98, 0x65, 0xe0,
-  0x97, 0x7e, 0x09, 0x6a, 0xfd, 0x03, 0x19, 0x14, 0xe0, 0x82, 0xa7, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x52, 0x43, 0x61, 0x06, 0x85, 0xf6,
-  0x23, 0x41, 0x61, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0x35, 0x14,
-  0x66, 0x50, 0x08, 0x84, 0x0b, 0x86, 0x29, 0xf7, 0x0f, 0x6e, 0x50, 0x80,
-  0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xea, 0x0d, 0x05,
-  0x1c, 0x14, 0x68, 0x24, 0x05, 0x85, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30,
-  0xa0, 0xe0, 0x50, 0xc0, 0x41, 0x21, 0x10, 0x2e, 0x18, 0xe6, 0x82, 0xa7,
-  0xee, 0x78, 0xea, 0x6e, 0x3f, 0x18, 0xe6, 0xd8, 0x3d, 0x18, 0xe6, 0x88,
-  0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x3e, 0x3a,
-  0x14, 0xc8, 0x50, 0x18, 0x41, 0x21, 0x0e, 0x85, 0xd1, 0x84, 0x00, 0x18,
-  0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12,
-  0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x6e, 0x0f, 0x85, 0x35, 0x14,
-  0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x8e, 0x0f, 0x05,
-  0x36, 0x14, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xae,
-  0x0f, 0x85, 0x36, 0x14, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04,
-  0x83, 0xa5, 0x14, 0x05, 0x36, 0x14, 0x5a, 0x50, 0x08, 0xf0, 0x50, 0xe8,
-  0x41, 0x41, 0x0f, 0x85, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xb1, 0x59, 0x82,
-  0x97, 0x19, 0x6e, 0xe8, 0xcf, 0xa0, 0x0f, 0x05, 0x30, 0x98, 0x65, 0xf0,
-  0x97, 0x97, 0x09, 0xec, 0xff, 0x83, 0x10, 0x14, 0xe2, 0x33, 0x1c, 0x91,
-  0x82, 0x81, 0x08, 0x0a, 0xc4, 0x37, 0xcb, 0xf0, 0x2f, 0x22, 0x13, 0xd8,
-  0x08, 0x0a, 0x2a, 0x18, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73,
-  0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xa7, 0x28,
-  0xe8, 0x70, 0x43, 0x50, 0x8a, 0x02, 0x18, 0xcc, 0x32, 0x80, 0x4c, 0xc8,
-  0x04, 0x36, 0xac, 0xa0, 0x00, 0x9f, 0x59, 0x02, 0x93, 0x31, 0x15, 0x14,
-  0x88, 0xf8, 0xcc, 0x12, 0x98, 0xcc, 0x70, 0x04, 0x0d, 0x06, 0x2b, 0x28,
-  0x08, 0xdf, 0x2c, 0xc3, 0xc8, 0x98, 0x4c, 0x60, 0x35, 0x18, 0xb0, 0xa0,
-  0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10,
-  0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xc8, 0xa2, 0xa0, 0xc3, 0x0d, 0x01,
-  0x2c, 0x0a, 0x60, 0x30, 0xcb, 0x40, 0x32, 0x25, 0x13, 0x18, 0x0d, 0x0a,
-  0x43, 0x7c, 0x66, 0x09, 0x4c, 0xc6, 0x88, 0x1b, 0x14, 0xe0, 0x33, 0x4b,
-  0x60, 0x32, 0x03, 0x2d, 0x8f, 0x06, 0x32, 0x58, 0xc8, 0x10, 0x24, 0x23,
-  0x94, 0x8c, 0x1f, 0x0a, 0x22, 0x73, 0xc1, 0x30, 0x66, 0x83, 0x82, 0x0e,
-  0x0a, 0xf1, 0x19, 0x8e, 0x20, 0x95, 0x1d, 0x14, 0x88, 0x6f, 0x96, 0xe1,
-  0x64, 0x54, 0x26, 0x30, 0x1e, 0x14, 0x4a, 0x25, 0x3e, 0x16, 0x0c, 0xf4,
-  0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c,
-  0x8a, 0x00, 0x47, 0x41, 0x87, 0x1b, 0x02, 0x5f, 0x14, 0xc0, 0x60, 0x96,
-  0x01, 0x65, 0x52, 0x26, 0xb0, 0x81, 0x0c, 0x05, 0xf8, 0xcc, 0x12, 0xb8,
-  0x8c, 0x85, 0xa1, 0x40, 0xc4, 0x67, 0x96, 0xc0, 0x65, 0x86, 0x23, 0x5e,
-  0x45, 0x0c, 0x05, 0xe1, 0x9b, 0x65, 0x58, 0x19, 0x97, 0x09, 0x0c, 0x56,
-  0xc6, 0x50, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7,
-  0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xd6, 0x51, 0xd0, 0xe1,
-  0x86, 0x20, 0x1d, 0x05, 0x30, 0x98, 0x65, 0x60, 0x99, 0x96, 0x09, 0x6c,
-  0x0d, 0x85, 0x21, 0x3e, 0xb3, 0x04, 0x2e, 0x63, 0x04, 0x1c, 0x0a, 0xf0,
-  0x99, 0x25, 0x70, 0x99, 0x81, 0x96, 0x47, 0x43, 0x19, 0x2c, 0x65, 0x08,
-  0x96, 0x11, 0x5a, 0x86, 0xae, 0x54, 0xe6, 0x82, 0x61, 0x2e, 0x78, 0xea,
-  0xb6, 0xa7, 0x8e, 0x07, 0x85, 0x61, 0x2e, 0xee, 0x83, 0x61, 0x8e, 0x18,
-  0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xe0, 0xcb, 0x47,
-  0x21, 0x1d, 0x05, 0x54, 0x14, 0xec, 0x51, 0x18, 0x4d, 0x08, 0x80, 0xd1,
-  0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91,
-  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0x40, 0x52, 0x80, 0x47, 0x21,
-  0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0x42, 0x52, 0x88,
-  0x47, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0x44,
-  0x52, 0x90, 0x47, 0x21, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30,
-  0x58, 0x54, 0x52, 0x88, 0x47, 0x41, 0x16, 0x85, 0xa0, 0x1f, 0x05, 0x71,
-  0x14, 0xfe, 0x51, 0x18, 0x4d, 0x08, 0x80, 0x0b, 0x1e, 0x9b, 0x25, 0x78,
-  0x99, 0x81, 0x96, 0xc7, 0x34, 0xf4, 0xc5, 0x34, 0x89, 0x7c, 0x61, 0x09,
-  0x7e, 0x11, 0x5c, 0xc6, 0x34, 0x89, 0x7e, 0x99, 0x65, 0x80, 0x19, 0x99,
-  0x11, 0xd5, 0x60, 0x38, 0x42, 0xf6, 0xc8, 0x51, 0x18, 0xbe, 0x9b, 0xbd,
-  0x61, 0x86, 0x1b, 0x82, 0x57, 0x14, 0xc8, 0xa0, 0x86, 0x40, 0x87, 0x23,
-  0xe6, 0x05, 0x1d, 0x85, 0xe1, 0xab, 0x40, 0xd0, 0xab, 0x97, 0x61, 0x86,
-  0x1b, 0x02, 0x59, 0x14, 0xc8, 0xa0, 0x82, 0x41, 0x67, 0x19, 0x62, 0xc6,
-  0x6c, 0x82, 0xb3, 0x43, 0x61, 0x98, 0x5b, 0xfd, 0x60, 0x98, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0xf8, 0x62, 0x52, 0x08, 0x49, 0x01, 0x1c, 0x05,
-  0x97, 0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06,
-  0x61, 0x34, 0x81, 0x18, 0x8a, 0x38, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10,
-  0x0c, 0x38, 0x9c, 0x14, 0x50, 0x52, 0x38, 0x88, 0x60, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0xb8, 0x9c, 0x14, 0x52, 0x52, 0x60, 0x88, 0x60, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0x38, 0x9d, 0x14, 0x54, 0x52, 0x90, 0x88,
-  0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x16, 0xb1, 0x14, 0x52, 0x52,
-  0x50, 0x47, 0x21, 0xa8, 0x49, 0x41, 0x1f, 0x85, 0x9b, 0x14, 0x46, 0x13,
-  0x02, 0xe0, 0x82, 0xc7, 0x66, 0x09, 0xcc, 0x66, 0xb8, 0x41, 0x57, 0x83,
-  0x9d, 0x14, 0xc0, 0x60, 0x96, 0x61, 0x66, 0x68, 0x26, 0xa8, 0x72, 0x14,
-  0x58, 0x52, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03,
-  0x8a, 0x2c, 0x85, 0x96, 0x14, 0xd8, 0xcf, 0x1f, 0x85, 0x11, 0x83, 0x03,
-  0x00, 0x41, 0x30, 0xa0, 0xca, 0x52, 0x68, 0x49, 0x21, 0x10, 0x2e, 0x18,
-  0xa6, 0xd0, 0x51, 0x88, 0x49, 0x01, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0xa8, 0xb4, 0x14, 0x64, 0x52, 0x70, 0x99, 0x91, 0x14,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x52, 0x4b, 0x41, 0x26, 0x85,
-  0x40, 0xb8, 0x60, 0x98, 0x0b, 0x9e, 0xba, 0xe3, 0xa9, 0x8b, 0x45, 0x61,
-  0x98, 0x33, 0xff, 0x60, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0xf8, 0xdc, 0x52, 0xf0, 0x49, 0xa1, 0x1f, 0x85,
-  0xb5, 0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06,
-  0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10,
-  0x0c, 0xb8, 0xba, 0x14, 0xca, 0x52, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0x38, 0xbb, 0x14, 0xcc, 0x52, 0x48, 0x88, 0x60, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0xbb, 0x14, 0xce, 0x52, 0x48, 0x88,
-  0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x96, 0xbf, 0x14, 0xcc, 0x52,
-  0x38, 0x49, 0x21, 0x90, 0x4b, 0xe1, 0x26, 0x05, 0xba, 0x14, 0x46, 0x13,
-  0x02, 0xe0, 0x82, 0xc7, 0x66, 0x09, 0xcc, 0x66, 0xb8, 0xe1, 0x5e, 0x83,
-  0xbb, 0x14, 0xc0, 0x60, 0x96, 0xa1, 0x66, 0xcc, 0x26, 0xb0, 0x7c, 0x14,
-  0xf6, 0x51, 0x88, 0xcf, 0x70, 0x04, 0x0a, 0x06, 0xfc, 0x28, 0x10, 0xdf,
-  0x2c, 0x83, 0xcd, 0xe4, 0x4c, 0x60, 0xfd, 0x28, 0xa4, 0x60, 0x10, 0x1f,
-  0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7,
-  0x8a, 0x20, 0x3e, 0x45, 0x84, 0xa6, 0xa0, 0xc3, 0x0d, 0xc1, 0x5f, 0x0a,
-  0x60, 0x30, 0xcb, 0x70, 0x33, 0x38, 0x13, 0xd8, 0x50, 0x92, 0x02, 0x7c,
-  0x66, 0x09, 0x7a, 0xc6, 0x48, 0x52, 0x20, 0xe2, 0x33, 0x4b, 0xd0, 0x33,
-  0xc3, 0x11, 0x33, 0x18, 0x94, 0xa4, 0x20, 0x7c, 0xb3, 0x0c, 0x3a, 0xd3,
-  0x33, 0x81, 0xd1, 0x60, 0x60, 0x92, 0x42, 0x7c, 0x2c, 0x70, 0xe8, 0x73,
-  0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14,
-  0xc1, 0x9a, 0x82, 0x0e, 0x37, 0x04, 0xaa, 0x29, 0x80, 0xc1, 0x2c, 0xc3,
-  0xce, 0xf0, 0x4c, 0x60, 0x2e, 0x29, 0x0c, 0xf1, 0x99, 0x25, 0xe8, 0x19,
-  0x23, 0x62, 0x52, 0x80, 0xcf, 0x2c, 0x41, 0xcf, 0x0c, 0xb4, 0x3c, 0xda,
-  0xcd, 0x60, 0x38, 0x43, 0xec, 0x8c, 0xc0, 0x33, 0x7c, 0x28, 0xe4, 0xcc,
-  0x05, 0xc3, 0x18, 0x4c, 0x0a, 0x34, 0x29, 0xc4, 0x67, 0x38, 0xc2, 0x6f,
-  0x6a, 0x52, 0x20, 0xbe, 0x59, 0x06, 0x9f, 0x09, 0x9b, 0xc0, 0x6c, 0x52,
-  0xf8, 0x9b, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca,
-  0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x42, 0x37, 0x05, 0x1d, 0x6e,
-  0x08, 0x70, 0x53, 0x00, 0x83, 0x59, 0x86, 0x9f, 0x01, 0x9b, 0xc0, 0x06,
-  0x9f, 0x14, 0xe0, 0x33, 0x4b, 0x50, 0x36, 0xb6, 0x93, 0x02, 0x11, 0x9f,
-  0x59, 0x82, 0xb2, 0x19, 0x8e, 0x48, 0x1d, 0x9e, 0x14, 0x84, 0x6f, 0x96,
-  0x41, 0x6c, 0xca, 0x26, 0x30, 0xd5, 0xe9, 0x49, 0x21, 0x3e, 0x16, 0x38,
+  0x11, 0xb5, 0x2a, 0xe8, 0x70, 0x43, 0x30, 0xab, 0x02, 0x18, 0xcc, 0x32,
+  0xd8, 0xcd, 0xdd, 0x04, 0x36, 0xe4, 0xa8, 0x00, 0x9f, 0x59, 0x02, 0xbe,
+  0x31, 0x1b, 0x15, 0x88, 0xf8, 0xcc, 0x12, 0xf0, 0xcd, 0x70, 0xc4, 0x2e,
+  0xdc, 0xa8, 0x20, 0x7c, 0xb3, 0x0c, 0x79, 0xc3, 0x37, 0x81, 0xf1, 0x02,
+  0x8e, 0x0a, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94,
+  0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0xb8, 0x0a, 0x3a, 0xdc,
+  0x10, 0xf8, 0xaa, 0x00, 0x06, 0xb3, 0x0c, 0x7a, 0xb3, 0x37, 0x81, 0x81,
+  0xa9, 0x30, 0xc4, 0x67, 0x96, 0x80, 0x6f, 0x8c, 0x28, 0x53, 0x01, 0x3e,
+  0xb3, 0x04, 0x7c, 0x33, 0xd0, 0xf2, 0x68, 0x76, 0x83, 0xdd, 0x0d, 0xa1,
+  0x37, 0xc2, 0xde, 0xb0, 0x06, 0xde, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd,
+  0xf6, 0xd4, 0xc5, 0xa8, 0x30, 0xcc, 0x99, 0xa5, 0x30, 0xcc, 0x11, 0xc3,
+  0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x7c, 0xee, 0x2a,
+  0xf8, 0xaa, 0xd0, 0xa7, 0xc2, 0xba, 0x0a, 0xa3, 0x09, 0x01, 0x30, 0x9a,
+  0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32,
+  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5c, 0xbd, 0x0a, 0xe5, 0x2a, 0x24,
+  0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x9c, 0xbd, 0x0a, 0xe6,
+  0x2a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdc, 0xbd,
+  0x0a, 0xe7, 0x2a, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06,
+  0xcb, 0xbf, 0x0a, 0xe6, 0x2a, 0x9c, 0xaa, 0x10, 0xc8, 0xab, 0x70, 0xab,
+  0x02, 0xbd, 0x0a, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3, 0x04, 0x7d,
+  0x33, 0xd0, 0xf2, 0x98, 0x06, 0xda, 0xc0, 0x35, 0x71, 0x36, 0x2c, 0xa1,
+  0x36, 0x02, 0xdf, 0xc0, 0x35, 0xb1, 0x36, 0xb3, 0x0c, 0x7e, 0x03, 0x3a,
+  0x77, 0x1c, 0x0c, 0x47, 0xf0, 0x71, 0x90, 0xab, 0xc2, 0xf0, 0x5d, 0x1f,
+  0x07, 0xc3, 0x0c, 0x37, 0x04, 0xa4, 0x2a, 0x90, 0x41, 0x0d, 0x81, 0x0e,
+  0x47, 0xfc, 0x43, 0xaf, 0x0a, 0xc3, 0x57, 0x81, 0xa0, 0x17, 0x12, 0xc3,
+  0x0c, 0x37, 0x04, 0xa7, 0x2a, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xfc,
+  0x0d, 0xed, 0x04, 0xb7, 0xa6, 0xc2, 0x30, 0x07, 0x9a, 0xc2, 0x30, 0x23,
+  0x06, 0x07, 0x00, 0x82, 0x60, 0xf0, 0x99, 0xac, 0x60, 0xaf, 0x42, 0xad,
+  0x0a, 0x23, 0x2b, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26,
+  0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80,
+  0x20, 0x18, 0x70, 0x2d, 0x2b, 0xf4, 0xab, 0x70, 0x10, 0xc1, 0x88, 0x01,
+  0x02, 0x80, 0x20, 0x18, 0x70, 0x2e, 0x2b, 0xf8, 0xab, 0xc0, 0x10, 0xc1,
+  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x2f, 0x2b, 0xfc, 0xab, 0x20,
+  0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x37, 0x2b, 0xf8,
+  0xab, 0xf0, 0xab, 0x42, 0xa0, 0xb2, 0xc2, 0xbb, 0x0a, 0x2c, 0x2b, 0x8c,
+  0x26, 0x04, 0xc0, 0x05, 0x8f, 0xcd, 0x12, 0xd0, 0xce, 0x70, 0xc3, 0x2b,
+  0x07, 0x30, 0x2b, 0x80, 0xc1, 0x2c, 0x43, 0xe8, 0x88, 0x4e, 0x50, 0xba,
+  0x2a, 0x84, 0xac, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08,
+  0x06, 0x54, 0xce, 0x0a, 0x22, 0x2b, 0xd8, 0x72, 0x30, 0xaf, 0xc2, 0x88,
+  0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0x3a, 0x2b, 0x88, 0xac, 0x10, 0x08,
+  0x17, 0x0c, 0x53, 0xbd, 0x2a, 0x98, 0xac, 0x00, 0x17, 0x3c, 0x35, 0x62,
+  0x70, 0x00, 0x20, 0x08, 0x06, 0x94, 0xcf, 0x0a, 0x27, 0x2b, 0xe8, 0x04,
+  0xbe, 0x0a, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xfd, 0xac, 0x70,
+  0xb2, 0x42, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf1, 0xd4, 0x99,
+  0xaa, 0x30, 0xcc, 0xed, 0xa6, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc,
+  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x7c, 0x63, 0x2b, 0xcc, 0xac, 0x20,
+  0xaf, 0x02, 0xd8, 0x0a, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3,
+  0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00,
+  0x20, 0x08, 0x06, 0x9c, 0xda, 0x0a, 0x3a, 0x2b, 0x24, 0x44, 0x30, 0x62,
+  0x80, 0x00, 0x20, 0x08, 0x06, 0xdc, 0xda, 0x0a, 0x3b, 0x2b, 0x24, 0x44,
+  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x1c, 0xdb, 0x0a, 0x3c, 0x2b,
+  0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x0b, 0xdd, 0x0a,
+  0x3b, 0x2b, 0xf0, 0xab, 0x10, 0x9c, 0xad, 0xc0, 0xb2, 0x42, 0xda, 0x0a,
+  0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3, 0x04, 0xb4, 0x33, 0xdc, 0xc0,
+  0xce, 0x01, 0xdb, 0x0a, 0x60, 0x30, 0xcb, 0x30, 0x3a, 0xb4, 0x13, 0x98,
+  0xbb, 0x0a, 0xf0, 0x2a, 0xc4, 0x67, 0x38, 0x42, 0x9e, 0x83, 0x78, 0x15,
+  0x88, 0x6f, 0x96, 0x81, 0x74, 0x4e, 0x27, 0x30, 0x79, 0x15, 0xe6, 0x39,
+  0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30,
+  0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xec, 0x56, 0xd0, 0xe1, 0x86, 0x80,
+  0x6e, 0x05, 0x30, 0x98, 0x65, 0x28, 0x1d, 0xd3, 0x09, 0x6c, 0xd0, 0x57,
+  0x01, 0x3e, 0xb3, 0x04, 0xab, 0x63, 0xf9, 0x2a, 0x10, 0xf1, 0x99, 0x25,
+  0x58, 0x9d, 0xe1, 0x88, 0x7e, 0x0e, 0xf4, 0x55, 0x10, 0xbe, 0x59, 0x06,
+  0xd4, 0x59, 0x9d, 0xc0, 0xfc, 0x39, 0xd8, 0x57, 0x21, 0x3e, 0x16, 0x38,
   0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41,
-  0x7c, 0x8a, 0x28, 0x4f, 0x41, 0x87, 0x1b, 0x82, 0xf1, 0x14, 0xc0, 0x60,
-  0x96, 0x61, 0x6c, 0xc8, 0x26, 0xb0, 0xb2, 0x14, 0x86, 0xf8, 0xcc, 0x12,
-  0x94, 0x8d, 0x11, 0x6a, 0x29, 0xc0, 0x67, 0x96, 0xa0, 0x6c, 0x06, 0x5a,
-  0x1e, 0xed, 0x67, 0x30, 0xb0, 0x21, 0xc6, 0x46, 0x20, 0x1b, 0xb4, 0x0b,
-  0x9b, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e, 0x3a, 0x9b, 0x14, 0x86,
-  0xb9, 0x35, 0x14, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x83, 0x6f, 0x3e, 0x85, 0xf1, 0x14, 0x44, 0x53, 0x80,
-  0x4f, 0x61, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10,
-  0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
-  0x80, 0xd3, 0x4f, 0x41, 0x3d, 0x85, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00,
-  0x04, 0xc1, 0x80, 0xdb, 0x4f, 0x61, 0x3d, 0x85, 0x84, 0x08, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0x80, 0xe3, 0x4f, 0x81, 0x3d, 0x85, 0x84, 0x08,
-  0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x21, 0x51, 0x61, 0x3d, 0x05,
-  0xd6, 0x14, 0x82, 0xfb, 0x14, 0x78, 0x53, 0xc8, 0x4f, 0x61, 0x34, 0x21,
-  0x00, 0x2e, 0x78, 0x6c, 0x96, 0xc0, 0x6c, 0x06, 0x5a, 0x1e, 0xd3, 0x88,
-  0x19, 0xbd, 0x25, 0x60, 0x86, 0x25, 0x66, 0x46, 0x28, 0x1b, 0xbd, 0x25,
-  0x68, 0xc6, 0xfe, 0x36, 0x98, 0x4d, 0x01, 0x3e, 0xb3, 0x0c, 0x67, 0x93,
-  0x36, 0x7d, 0x1b, 0x0c, 0x47, 0x84, 0x6e, 0xf0, 0x9b, 0xc2, 0xf0, 0x9d,
-  0xe8, 0x06, 0xc3, 0x0c, 0x37, 0x04, 0xaa, 0x29, 0x90, 0x41, 0x0d, 0x81,
-  0x0e, 0x47, 0x14, 0xe3, 0x29, 0x0c, 0x5f, 0x05, 0x82, 0xde, 0x31, 0xcc,
-  0x70, 0x43, 0xd0, 0x9a, 0x02, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0x03, 0xda,
-  0xf4, 0x4d, 0x70, 0x71, 0x29, 0x0c, 0x73, 0xa6, 0x28, 0x0c, 0x33, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x1f, 0x8b, 0x0a, 0xfc, 0x29, 0xec, 0xa6,
-  0x90, 0xa2, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2,
-  0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0x01, 0x37, 0xa3, 0xc2, 0x88, 0x0a, 0x07, 0x11, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0x01, 0x47, 0xa3, 0x02, 0x89, 0x0a, 0x0c, 0x11, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x57, 0xa3, 0x42, 0x89, 0x0a, 0x12,
-  0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0xd2, 0xa3, 0x02, 0x89,
-  0x0a, 0xe5, 0x29, 0x04, 0x30, 0x2a, 0xd4, 0xa7, 0x20, 0xa3, 0xc2, 0x68,
-  0x42, 0x00, 0x5c, 0xf0, 0xd8, 0x2c, 0x41, 0xdf, 0x0c, 0x37, 0xd4, 0x6e,
-  0x60, 0xa3, 0x02, 0x18, 0xcc, 0x32, 0xa8, 0xcd, 0xda, 0x04, 0x05, 0x9e,
-  0xc2, 0x89, 0x0a, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0x40, 0xfd, 0xa8, 0x80, 0xa2, 0xc2, 0xee, 0x06, 0xf9, 0x29, 0x8c, 0x18,
-  0x1c, 0x00, 0x08, 0x82, 0x01, 0x05, 0xa6, 0x02, 0x8a, 0x0a, 0x81, 0x70,
-  0xc1, 0x30, 0x35, 0x9e, 0x02, 0x8b, 0x0a, 0x70, 0xc1, 0x53, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x40, 0x91, 0xa9, 0xd0, 0xa2, 0x02, 0x18, 0xf8,
-  0xa7, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x54, 0x99, 0x0a, 0x2d,
-  0x2a, 0x04, 0xc2, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x1d, 0x4f, 0x1d, 0x6b,
-  0x0a, 0xc3, 0x5c, 0x38, 0x0a, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x97, 0xa6, 0x42, 0x8e, 0x0a, 0xf8,
-  0x29, 0x98, 0xa9, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a,
+  0x7c, 0x8a, 0x08, 0x5d, 0x41, 0x87, 0x1b, 0x82, 0xbf, 0x15, 0xc0, 0x60,
+  0x96, 0x21, 0x75, 0x54, 0x27, 0xb0, 0x91, 0x15, 0x86, 0xf8, 0xcc, 0x12,
+  0xac, 0x8e, 0x11, 0x26, 0x2b, 0xc0, 0x67, 0x96, 0x60, 0x75, 0x06, 0x5a,
+  0x1e, 0xad, 0x74, 0x30, 0xd3, 0x21, 0x52, 0x47, 0x50, 0x1d, 0x18, 0x1d,
+  0x4e, 0xe7, 0x82, 0x61, 0xac, 0x64, 0x85, 0x94, 0x15, 0xe2, 0x33, 0x1c,
+  0xa1, 0x1a, 0x2a, 0x2b, 0x10, 0xdf, 0x2c, 0x03, 0xeb, 0xbc, 0x4e, 0x60,
+  0x2b, 0x2b, 0xac, 0x46, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17,
+  0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xf1, 0xba, 0x82,
+  0x0e, 0x37, 0x04, 0xad, 0x2b, 0x80, 0xc1, 0x2c, 0x43, 0xeb, 0xb8, 0x4e,
+  0x60, 0xc3, 0xcc, 0x0a, 0xf0, 0x99, 0x25, 0x98, 0x1d, 0x83, 0x59, 0x81,
+  0x88, 0xcf, 0x2c, 0xc1, 0xec, 0x0c, 0x47, 0xd4, 0x46, 0xcc, 0x0a, 0xc2,
+  0x37, 0xcb, 0x00, 0x3b, 0xb3, 0x13, 0x98, 0x6d, 0xc8, 0xac, 0x10, 0x1f,
+  0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7,
+  0x8a, 0x20, 0x3e, 0x45, 0xe8, 0xae, 0xa0, 0xc3, 0x0d, 0x01, 0xee, 0x0a,
+  0x60, 0x30, 0xcb, 0x10, 0x3b, 0xb2, 0x13, 0x98, 0xce, 0x0a, 0x43, 0x7c,
+  0x66, 0x09, 0x66, 0xc7, 0x88, 0x9f, 0x15, 0xe0, 0x33, 0x4b, 0x30, 0x3b,
+  0x03, 0x2d, 0x8f, 0xd6, 0x3a, 0x98, 0xeb, 0x10, 0xb1, 0x23, 0xc8, 0x0e,
+  0xe8, 0xbc, 0xce, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x6d, 0x4f, 0xdd, 0xca,
+  0x0a, 0xc3, 0x1c, 0x98, 0x0a, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c,
+  0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x87, 0xbe, 0x02, 0xee, 0x0a, 0x77,
+  0x2b, 0x94, 0xaf, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a,
   0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0xc0, 0xc1, 0xa9, 0x00, 0xa6, 0x42, 0x42, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0xc0, 0xc5, 0xa9, 0x10, 0xa6, 0x42, 0x42, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xc9, 0xa9, 0x20, 0xa6, 0x42,
-  0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xe8, 0xa9, 0x10,
-  0xa6, 0x82, 0x88, 0x0a, 0x41, 0x9b, 0x0a, 0x32, 0x2a, 0xbc, 0xa9, 0x30,
-  0x9a, 0x10, 0x00, 0x17, 0x3c, 0x36, 0x4b, 0xd0, 0x37, 0xc3, 0x0d, 0xf2,
-  0x1b, 0xc8, 0xa9, 0x00, 0x06, 0xb3, 0x0c, 0x6c, 0xd3, 0x37, 0x81, 0xd1,
-  0xa7, 0x60, 0x9f, 0x42, 0x7c, 0x86, 0x23, 0xee, 0x37, 0xb8, 0x4f, 0x81,
-  0xf8, 0x66, 0x19, 0xda, 0x06, 0x6e, 0x02, 0xc3, 0x4f, 0x01, 0x7f, 0x83,
-  0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43,
-  0x3e, 0x56, 0x04, 0xf1, 0x29, 0x82, 0x4f, 0x05, 0x1d, 0x6e, 0x08, 0xf4,
-  0x54, 0x00, 0x83, 0x59, 0x06, 0xb7, 0x79, 0x9b, 0xc0, 0x06, 0x10, 0x15,
-  0xe0, 0x33, 0x4b, 0x40, 0x37, 0xf6, 0x9f, 0x02, 0x11, 0x9f, 0x59, 0x02,
-  0xba, 0x19, 0x8e, 0x10, 0xe1, 0x00, 0x44, 0x05, 0xe1, 0x9b, 0x65, 0x88,
-  0x1b, 0xba, 0x09, 0x6c, 0x84, 0x83, 0x10, 0x15, 0xe2, 0x63, 0x81, 0x43,
-  0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4,
-  0xa7, 0x88, 0x53, 0x15, 0x74, 0xb8, 0x21, 0x28, 0x55, 0x01, 0x0c, 0x66,
-  0x19, 0xe4, 0x66, 0x6e, 0x02, 0x4b, 0x51, 0x61, 0x88, 0xcf, 0x2c, 0x01,
-  0xdd, 0x18, 0xc1, 0xa2, 0x02, 0x7c, 0x66, 0x09, 0xe8, 0x66, 0xa0, 0xe5,
-  0xd1, 0xdc, 0x06, 0x7b, 0x1b, 0x42, 0x6e, 0x84, 0xb9, 0xd1, 0xc1, 0x01,
-  0x6e, 0x2e, 0x18, 0xc6, 0x56, 0x54, 0x78, 0x51, 0x21, 0x3e, 0xc3, 0x11,
-  0xb0, 0x00, 0xa3, 0x02, 0xf1, 0xcd, 0x32, 0xd4, 0x0d, 0xde, 0x04, 0x16,
-  0xa3, 0x42, 0x2c, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1,
-  0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xb5, 0x2a, 0xe8,
-  0x70, 0x43, 0x30, 0xab, 0x02, 0x18, 0xcc, 0x32, 0xd8, 0xcd, 0xdd, 0x04,
-  0x36, 0xe4, 0xa8, 0x00, 0x9f, 0x59, 0x02, 0xbe, 0x31, 0x1b, 0x15, 0x88,
-  0xf8, 0xcc, 0x12, 0xf0, 0xcd, 0x70, 0xc4, 0x2e, 0xdc, 0xa8, 0x20, 0x7c,
-  0xb3, 0x0c, 0x79, 0xc3, 0x37, 0x81, 0xf1, 0x02, 0x8e, 0x0a, 0xf1, 0xb1,
-  0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac,
-  0x08, 0xe2, 0x53, 0x04, 0xb8, 0x0a, 0x3a, 0xdc, 0x10, 0xf8, 0xaa, 0x00,
-  0x06, 0xb3, 0x0c, 0x7a, 0xb3, 0x37, 0x81, 0x81, 0xa9, 0x30, 0xc4, 0x67,
-  0x96, 0x80, 0x6f, 0x8c, 0x28, 0x53, 0x01, 0x3e, 0xb3, 0x04, 0x7c, 0x33,
-  0xd0, 0xf2, 0x68, 0x76, 0x83, 0xdd, 0x0d, 0xa1, 0x37, 0xc2, 0xde, 0xb0,
-  0x06, 0xde, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf6, 0xd4, 0xc5, 0xa8,
-  0x30, 0xcc, 0x99, 0xa5, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88,
-  0xc1, 0x01, 0x80, 0x20, 0x18, 0x7c, 0xee, 0x2a, 0xf8, 0xaa, 0xd0, 0xa7,
-  0xc2, 0xba, 0x0a, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09,
-  0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x5c, 0xbd, 0x0a, 0xe5, 0x2a, 0x24, 0x44, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0x9c, 0xbd, 0x0a, 0xe6, 0x2a, 0x24, 0x44, 0x30,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdc, 0xbd, 0x0a, 0xe7, 0x2a, 0x24,
-  0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0xcb, 0xbf, 0x0a, 0xe6,
-  0x2a, 0x9c, 0xaa, 0x10, 0xc8, 0xab, 0x70, 0xab, 0x02, 0xbd, 0x0a, 0xa3,
-  0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3, 0x04, 0x7d, 0x33, 0xd0, 0xf2, 0x98,
-  0x06, 0xda, 0xc0, 0x35, 0x71, 0x36, 0x2c, 0xa1, 0x36, 0x02, 0xdf, 0xc0,
-  0x35, 0xb1, 0x36, 0xb3, 0x0c, 0x7e, 0x03, 0x3a, 0x77, 0x1c, 0x0c, 0x47,
-  0xf0, 0x71, 0x90, 0xab, 0xc2, 0xf0, 0x5d, 0x1f, 0x07, 0xc3, 0x0c, 0x37,
-  0x04, 0xa4, 0x2a, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0xfc, 0x43, 0xaf,
-  0x0a, 0xc3, 0x57, 0x81, 0xa0, 0x17, 0x12, 0xc3, 0x0c, 0x37, 0x04, 0xa7,
-  0x2a, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xfc, 0x0d, 0xed, 0x04, 0xb7,
-  0xa6, 0xc2, 0x30, 0x07, 0x9a, 0xc2, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82,
-  0x60, 0xf0, 0x99, 0xac, 0x60, 0xaf, 0x42, 0xad, 0x0a, 0x23, 0x2b, 0x8c,
-  0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02,
-  0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x2d,
-  0x2b, 0xf4, 0xab, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x70, 0x2e, 0x2b, 0xf8, 0xab, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x70, 0x2f, 0x2b, 0xfc, 0xab, 0x20, 0x11, 0xc1, 0x88, 0x81,
-  0x02, 0x80, 0x20, 0x18, 0x2c, 0x37, 0x2b, 0xf8, 0xab, 0xf0, 0xab, 0x42,
-  0xa0, 0xb2, 0xc2, 0xbb, 0x0a, 0x2c, 0x2b, 0x8c, 0x26, 0x04, 0xc0, 0x05,
-  0x8f, 0xcd, 0x12, 0xd0, 0xce, 0x70, 0xc3, 0x2b, 0x07, 0x30, 0x2b, 0x80,
-  0xc1, 0x2c, 0x43, 0xe8, 0x88, 0x4e, 0x50, 0xba, 0x2a, 0x84, 0xac, 0x00,
-  0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x54, 0xce, 0x0a,
-  0x22, 0x2b, 0xd8, 0x72, 0x30, 0xaf, 0xc2, 0x88, 0xc1, 0x01, 0x80, 0x20,
-  0x18, 0x50, 0x3a, 0x2b, 0x88, 0xac, 0x10, 0x08, 0x17, 0x0c, 0x53, 0xbd,
-  0x2a, 0x98, 0xac, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x94, 0xcf, 0x0a, 0x27, 0x2b, 0xe8, 0x04, 0xbe, 0x0a, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x40, 0xfd, 0xac, 0x70, 0xb2, 0x42, 0x20, 0x5c,
-  0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf1, 0xd4, 0x99, 0xaa, 0x30, 0xcc, 0xed,
-  0xa6, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x7c, 0x63, 0x2b, 0xcc, 0xac, 0x20, 0xaf, 0x02, 0xd8, 0x0a,
+  0x82, 0x60, 0xc0, 0xbd, 0xaf, 0xf0, 0xbb, 0x42, 0x42, 0x04, 0x23, 0x06,
+  0x08, 0x00, 0x82, 0x60, 0xc0, 0xc1, 0xaf, 0x00, 0xbe, 0x42, 0x42, 0x04,
+  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xc5, 0xaf, 0x10, 0xbe, 0x42,
+  0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xe4, 0xaf, 0x00,
+  0xbe, 0x42, 0xe8, 0x0a, 0x01, 0xfb, 0x0a, 0xb1, 0x2b, 0xb8, 0xaf, 0x30,
+  0x9a, 0x10, 0x00, 0x17, 0x3c, 0x36, 0x4b, 0x40, 0x3b, 0x03, 0x2d, 0x8f,
+  0x69, 0xfc, 0x0d, 0xd9, 0x13, 0x7e, 0xc3, 0x12, 0xa1, 0x23, 0xcc, 0x0e,
+  0xd9, 0x13, 0xa2, 0x33, 0xcb, 0x50, 0x3b, 0xb7, 0x13, 0xd7, 0xc1, 0x70,
+  0x04, 0xdf, 0x06, 0xb3, 0x2b, 0x0c, 0xdf, 0xf5, 0x6d, 0x30, 0xcc, 0x70,
+  0x43, 0xe0, 0xb7, 0x02, 0x19, 0xd4, 0x10, 0xe8, 0x70, 0x44, 0x7e, 0xdc,
+  0xae, 0x30, 0x7c, 0x15, 0x08, 0x7a, 0xfb, 0x31, 0xcc, 0x70, 0x43, 0x10,
+  0xba, 0x02, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0x83, 0xed, 0xac, 0x4f, 0x70,
+  0x65, 0x2b, 0x0c, 0x73, 0x7a, 0x2a, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20,
+  0x08, 0x06, 0x1f, 0x08, 0x0b, 0xf0, 0x2b, 0xbc, 0xae, 0xd0, 0xbf, 0xc2,
+  0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26,
+  0x10, 0x43, 0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x77,
+  0xc2, 0xc2, 0xfd, 0x0a, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
+  0x01, 0x87, 0xc2, 0x02, 0xfe, 0x0a, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00,
+  0x08, 0x82, 0x01, 0x97, 0xc2, 0x42, 0xfe, 0x0a, 0x12, 0x11, 0x8c, 0x18,
+  0x28, 0x00, 0x08, 0x82, 0xc1, 0x12, 0xc3, 0x02, 0xfe, 0x0a, 0xb9, 0x2b,
+  0x04, 0x24, 0x2c, 0xa4, 0xaf, 0x60, 0xc2, 0xc2, 0x68, 0x42, 0x00, 0x5c,
+  0xf0, 0xd8, 0x2c, 0xc1, 0xfa, 0x0c, 0x37, 0xa4, 0x76, 0xa0, 0xc2, 0x02,
+  0x18, 0xcc, 0x32, 0xe0, 0x4e, 0xee, 0x04, 0x45, 0xbb, 0xc2, 0xfe, 0x0a,
+  0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xcd, 0xb0,
+  0xc0, 0xbf, 0x82, 0xed, 0x06, 0xed, 0x2b, 0x8c, 0x18, 0x1c, 0x00, 0x08,
+  0x82, 0x01, 0x45, 0xc3, 0x02, 0xff, 0x0a, 0x81, 0x70, 0xc1, 0x30, 0x75,
+  0xbb, 0x02, 0x08, 0x0b, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82,
+  0x60, 0x40, 0xe1, 0xb0, 0x10, 0xc2, 0x02, 0x8d, 0xc8, 0xaf, 0x30, 0x62,
+  0x70, 0x00, 0x20, 0x08, 0x06, 0x54, 0x0e, 0x0b, 0x21, 0x2c, 0x04, 0xc2,
+  0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x1d, 0x4f, 0x1d, 0xe8, 0x0a, 0xc3, 0x5c,
+  0xad, 0x0a, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00,
+  0x08, 0x82, 0xc1, 0xd7, 0xc3, 0x42, 0x0b, 0x0b, 0xec, 0x2b, 0xe8, 0xb0,
+  0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3,
+  0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0,
+  0x91, 0xb1, 0x40, 0xc3, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
+  0x60, 0xc0, 0x95, 0xb1, 0x50, 0xc3, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08,
+  0x00, 0x82, 0x60, 0xc0, 0x99, 0xb1, 0x60, 0xc3, 0x42, 0x42, 0x04, 0x23,
+  0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xb8, 0xb1, 0x50, 0xc3, 0x82, 0xfd,
+  0x0a, 0x41, 0x18, 0x0b, 0x26, 0x2c, 0x8c, 0xb1, 0x30, 0x9a, 0x10, 0x00,
+  0x17, 0x3c, 0x36, 0x4b, 0xb0, 0x3e, 0xc3, 0x0d, 0xe6, 0x1d, 0x98, 0xb1,
+  0x00, 0x06, 0xb3, 0x0c, 0xba, 0xb3, 0x3e, 0x81, 0xa1, 0xaf, 0xa0, 0xbe,
+  0x42, 0x7c, 0x86, 0x23, 0xe4, 0x37, 0x58, 0x5f, 0x81, 0xf8, 0x66, 0x19,
+  0x76, 0xc7, 0x77, 0x02, 0x63, 0x5f, 0x61, 0x7e, 0x83, 0xf8, 0x58, 0x30,
+  0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04,
+  0xf1, 0x29, 0x02, 0x8e, 0x05, 0x1d, 0x6e, 0x08, 0xdc, 0x58, 0x00, 0x83,
+  0x59, 0x06, 0xde, 0xe9, 0x9d, 0xc0, 0x06, 0xfa, 0x15, 0xe0, 0x33, 0x4b,
+  0x20, 0x3e, 0x36, 0xbf, 0x02, 0x11, 0x9f, 0x59, 0x02, 0xf1, 0x19, 0x8e,
+  0xe8, 0xdf, 0x80, 0x7e, 0x05, 0xe1, 0x9b, 0x65, 0xf8, 0x1d, 0xf1, 0x09,
+  0xcc, 0x7f, 0x83, 0xfa, 0x15, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86,
+  0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x3d,
+  0x16, 0x74, 0xb8, 0x21, 0xc8, 0x63, 0x01, 0x0c, 0x66, 0x19, 0xc0, 0x27,
+  0x7c, 0x02, 0xeb, 0x5f, 0x61, 0x88, 0xcf, 0x2c, 0x81, 0xf8, 0x18, 0x01,
+  0xc2, 0x02, 0x7c, 0x66, 0x09, 0xc4, 0x67, 0xa0, 0xe5, 0xd1, 0x78, 0x07,
+  0xeb, 0x1d, 0x02, 0x7c, 0x84, 0xf0, 0x81, 0xc1, 0xc1, 0x77, 0x2e, 0x18,
+  0xc6, 0xfe, 0x57, 0x18, 0x61, 0x21, 0x3e, 0xc3, 0x11, 0xa4, 0x42, 0xc2,
+  0x02, 0xf1, 0xcd, 0x32, 0x8c, 0x8f, 0xf9, 0x04, 0x56, 0xc2, 0x42, 0xa9,
+  0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18,
+  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xa9, 0x2c, 0xe8, 0x70, 0x43, 0x70,
+  0xca, 0x02, 0x18, 0xcc, 0x32, 0x90, 0x4f, 0xf9, 0x04, 0x36, 0xb4, 0xb0,
+  0x00, 0x9f, 0x59, 0x02, 0xf5, 0x31, 0x15, 0x16, 0x88, 0xf8, 0xcc, 0x12,
+  0xa8, 0xcf, 0x70, 0xc4, 0xab, 0xac, 0xb0, 0x20, 0x7c, 0xb3, 0x0c, 0xe7,
+  0xa3, 0x3e, 0x81, 0xc1, 0x0a, 0x0b, 0x0b, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf,
+  0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53,
+  0x04, 0x2d, 0x0b, 0x3a, 0xdc, 0x10, 0xc8, 0xb2, 0x00, 0x06, 0xb3, 0x0c,
+  0xe8, 0x93, 0x3e, 0x81, 0xd1, 0xb0, 0x30, 0xc4, 0x67, 0x96, 0x40, 0x7d,
+  0x8c, 0xc8, 0x61, 0x01, 0x3e, 0xb3, 0x04, 0xea, 0x33, 0xd0, 0xf2, 0x68,
+  0xe4, 0x83, 0x95, 0x0f, 0x81, 0x3e, 0x42, 0xfa, 0xd0, 0x95, 0xf9, 0x5c,
+  0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf6, 0xd4, 0x95, 0xb0, 0x30, 0xcc, 0xe9,
+  0xac, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80,
+  0x20, 0x18, 0x7c, 0xe2, 0x2c, 0xc8, 0xb2, 0x10, 0xc7, 0xc2, 0x2f, 0x0b,
   0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a,
-  0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x9c,
-  0xda, 0x0a, 0x3a, 0x2b, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0xdc, 0xda, 0x0a, 0x3b, 0x2b, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x1c, 0xdb, 0x0a, 0x3c, 0x2b, 0x24, 0x44, 0x30, 0x62,
-  0xa0, 0x00, 0x20, 0x08, 0x06, 0x0b, 0xdd, 0x0a, 0x3b, 0x2b, 0xf0, 0xab,
-  0x10, 0x9c, 0xad, 0xc0, 0xb2, 0x42, 0xda, 0x0a, 0xa3, 0x09, 0x01, 0x70,
-  0xc1, 0x63, 0xb3, 0x04, 0xb4, 0x33, 0xdc, 0xc0, 0xce, 0x01, 0xdb, 0x0a,
-  0x60, 0x30, 0xcb, 0x30, 0x3a, 0xb4, 0x13, 0x98, 0xbb, 0x0a, 0xf0, 0x2a,
-  0xc4, 0x67, 0x38, 0x42, 0x9e, 0x83, 0x78, 0x15, 0x88, 0x6f, 0x96, 0x81,
-  0x74, 0x4e, 0x27, 0x30, 0x79, 0x15, 0xe6, 0x39, 0x88, 0x8f, 0x05, 0x03,
-  0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10,
-  0x9f, 0x22, 0xec, 0x56, 0xd0, 0xe1, 0x86, 0x80, 0x6e, 0x05, 0x30, 0x98,
-  0x65, 0x28, 0x1d, 0xd3, 0x09, 0x6c, 0xd0, 0x57, 0x01, 0x3e, 0xb3, 0x04,
-  0xab, 0x63, 0xf9, 0x2a, 0x10, 0xf1, 0x99, 0x25, 0x58, 0x9d, 0xe1, 0x88,
-  0x7e, 0x0e, 0xf4, 0x55, 0x10, 0xbe, 0x59, 0x06, 0xd4, 0x59, 0x9d, 0xc0,
-  0xfc, 0x39, 0xd8, 0x57, 0x21, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98,
-  0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x08, 0x5d,
-  0x41, 0x87, 0x1b, 0x82, 0xbf, 0x15, 0xc0, 0x60, 0x96, 0x21, 0x75, 0x54,
-  0x27, 0xb0, 0x91, 0x15, 0x86, 0xf8, 0xcc, 0x12, 0xac, 0x8e, 0x11, 0x26,
-  0x2b, 0xc0, 0x67, 0x96, 0x60, 0x75, 0x06, 0x5a, 0x1e, 0xad, 0x74, 0x30,
-  0xd3, 0x21, 0x52, 0x47, 0x50, 0x1d, 0x18, 0x1d, 0x4e, 0xe7, 0x82, 0x61,
-  0xac, 0x64, 0x85, 0x94, 0x15, 0xe2, 0x33, 0x1c, 0xa1, 0x1a, 0x2a, 0x2b,
-  0x10, 0xdf, 0x2c, 0x03, 0xeb, 0xbc, 0x4e, 0x60, 0x2b, 0x2b, 0xac, 0x46,
-  0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xf1, 0xba, 0x82, 0x0e, 0x37, 0x04, 0xad,
-  0x2b, 0x80, 0xc1, 0x2c, 0x43, 0xeb, 0xb8, 0x4e, 0x60, 0xc3, 0xcc, 0x0a,
-  0xf0, 0x99, 0x25, 0x98, 0x1d, 0x83, 0x59, 0x81, 0x88, 0xcf, 0x2c, 0xc1,
-  0xec, 0x0c, 0x47, 0xd4, 0x46, 0xcc, 0x0a, 0xc2, 0x37, 0xcb, 0x00, 0x3b,
-  0xb3, 0x13, 0x98, 0x6d, 0xc8, 0xac, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c,
-  0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45,
-  0xe8, 0xae, 0xa0, 0xc3, 0x0d, 0x01, 0xee, 0x0a, 0x60, 0x30, 0xcb, 0x10,
-  0x3b, 0xb2, 0x13, 0x98, 0xce, 0x0a, 0x43, 0x7c, 0x66, 0x09, 0x66, 0xc7,
-  0x88, 0x9f, 0x15, 0xe0, 0x33, 0x4b, 0x30, 0x3b, 0x03, 0x2d, 0x8f, 0xd6,
-  0x3a, 0x98, 0xeb, 0x10, 0xb1, 0x23, 0xc8, 0x0e, 0xe8, 0xbc, 0xce, 0x05,
-  0xc3, 0x5c, 0xf0, 0xd4, 0x6d, 0x4f, 0xdd, 0xca, 0x0a, 0xc3, 0x1c, 0x98,
-  0x0a, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08,
-  0x82, 0xc1, 0x87, 0xbe, 0x02, 0xee, 0x0a, 0x77, 0x2b, 0x94, 0xaf, 0x30,
-  0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09,
-  0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xbd,
-  0xaf, 0xf0, 0xbb, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0xc0, 0xc1, 0xaf, 0x00, 0xbe, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0xc0, 0xc5, 0xaf, 0x10, 0xbe, 0x42, 0x42, 0x04, 0x23, 0x06,
-  0x0a, 0x00, 0x82, 0x60, 0xb0, 0xe4, 0xaf, 0x00, 0xbe, 0x42, 0xe8, 0x0a,
-  0x01, 0xfb, 0x0a, 0xb1, 0x2b, 0xb8, 0xaf, 0x30, 0x9a, 0x10, 0x00, 0x17,
-  0x3c, 0x36, 0x4b, 0x40, 0x3b, 0x03, 0x2d, 0x8f, 0x69, 0xfc, 0x0d, 0xd9,
-  0x13, 0x7e, 0xc3, 0x12, 0xa1, 0x23, 0xcc, 0x0e, 0xd9, 0x13, 0xa2, 0x33,
-  0xcb, 0x50, 0x3b, 0xb7, 0x13, 0xd7, 0xc1, 0x70, 0x04, 0xdf, 0x06, 0xb3,
-  0x2b, 0x0c, 0xdf, 0xf5, 0x6d, 0x30, 0xcc, 0x70, 0x43, 0xe0, 0xb7, 0x02,
-  0x19, 0xd4, 0x10, 0xe8, 0x70, 0x44, 0x7e, 0xdc, 0xae, 0x30, 0x7c, 0x15,
-  0x08, 0x7a, 0xfb, 0x31, 0xcc, 0x70, 0x43, 0x10, 0xba, 0x02, 0x19, 0x54,
-  0x30, 0xe8, 0x2c, 0x83, 0xed, 0xac, 0x4f, 0x70, 0x65, 0x2b, 0x0c, 0x73,
-  0x7a, 0x2a, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x1f, 0x08,
-  0x0b, 0xf0, 0x2b, 0xbc, 0xae, 0xd0, 0xbf, 0xc2, 0x68, 0x42, 0x00, 0x8c,
-  0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x77, 0xc2, 0xc2, 0xfd, 0x0a,
-  0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x87, 0xc2, 0x02,
-  0xfe, 0x0a, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x97,
-  0xc2, 0x42, 0xfe, 0x0a, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82,
-  0xc1, 0x12, 0xc3, 0x02, 0xfe, 0x0a, 0xb9, 0x2b, 0x04, 0x24, 0x2c, 0xa4,
-  0xaf, 0x60, 0xc2, 0xc2, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd8, 0x2c, 0xc1,
-  0xfa, 0x0c, 0x37, 0xa4, 0x76, 0xa0, 0xc2, 0x02, 0x18, 0xcc, 0x32, 0xe0,
-  0x4e, 0xee, 0x04, 0x45, 0xbb, 0xc2, 0xfe, 0x0a, 0x70, 0xc1, 0x53, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xcd, 0xb0, 0xc0, 0xbf, 0x82, 0xed,
-  0x06, 0xed, 0x2b, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x45, 0xc3,
-  0x02, 0xff, 0x0a, 0x81, 0x70, 0xc1, 0x30, 0x75, 0xbb, 0x02, 0x08, 0x0b,
-  0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xe1, 0xb0,
-  0x10, 0xc2, 0x02, 0x8d, 0xc8, 0xaf, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x54, 0x0e, 0x0b, 0x21, 0x2c, 0x04, 0xc2, 0x05, 0xc3, 0x5c, 0xf0,
-  0xd4, 0x1d, 0x4f, 0x1d, 0xe8, 0x0a, 0xc3, 0x5c, 0xad, 0x0a, 0xc3, 0x1c,
-  0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0xd7,
-  0xc3, 0x42, 0x0b, 0x0b, 0xec, 0x2b, 0xe8, 0xb0, 0x30, 0x9a, 0x10, 0x00,
+  0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5c,
+  0x3a, 0x0b, 0xb9, 0x2c, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
+  0x06, 0x9c, 0x3a, 0x0b, 0xba, 0x2c, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00,
+  0x20, 0x08, 0x06, 0xdc, 0x3a, 0x0b, 0xbb, 0x2c, 0x24, 0x44, 0x30, 0x62,
+  0xa0, 0x00, 0x20, 0x08, 0x06, 0xcb, 0x3c, 0x0b, 0xba, 0x2c, 0xec, 0xb1,
+  0x10, 0x98, 0xb3, 0xb0, 0xca, 0x02, 0x3a, 0x0b, 0xa3, 0x09, 0x01, 0x70,
+  0xc1, 0x63, 0xb3, 0x04, 0xeb, 0x33, 0xd0, 0xf2, 0x98, 0x86, 0xed, 0xe0,
+  0x64, 0x51, 0x3b, 0x2c, 0x81, 0x3b, 0x82, 0xfa, 0xe0, 0x64, 0x91, 0x3b,
+  0xb3, 0x0c, 0xec, 0xe3, 0x3e, 0x6b, 0x1e, 0x0c, 0x47, 0xec, 0x6d, 0xd0,
+  0xca, 0xc2, 0xf0, 0x1d, 0xdf, 0x06, 0xc3, 0x0c, 0x37, 0x04, 0x78, 0x2c,
+  0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0xcc, 0x4b, 0x2c, 0x0b, 0xc3, 0x57,
+  0x81, 0xa0, 0x57, 0x2f, 0xc3, 0x0c, 0x37, 0x04, 0x7b, 0x2c, 0x90, 0x41,
+  0x05, 0x83, 0xce, 0x32, 0xb4, 0x8f, 0x08, 0x05, 0xf7, 0xc3, 0xc2, 0x30,
+  0x47, 0xb7, 0xc2, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xf0, 0xe9,
+  0xb3, 0xa0, 0xce, 0x42, 0x2a, 0x0b, 0xf7, 0x2c, 0x8c, 0x26, 0x04, 0xc0,
+  0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71,
+  0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x21, 0x2d, 0xc4, 0xb3,
+  0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x22, 0x2d,
+  0xc8, 0xb3, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70,
+  0x23, 0x2d, 0xcc, 0xb3, 0x20, 0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20,
+  0x18, 0x2c, 0x2b, 0x2d, 0xc8, 0xb3, 0x30, 0xcb, 0x42, 0xe0, 0xcf, 0xc2,
+  0x38, 0x0b, 0x20, 0x2d, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x8f, 0xcd, 0x12,
+  0x88, 0xd0, 0x70, 0xc3, 0xa8, 0x07, 0x24, 0x2d, 0x80, 0xc1, 0x2c, 0xc3,
+  0xfb, 0xc0, 0x4f, 0x50, 0xae, 0x2c, 0xd4, 0xb3, 0x00, 0x17, 0x3c, 0x35,
+  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x54, 0x4b, 0x0b, 0xf6, 0x2c, 0xd4,
+  0x6e, 0x70, 0xce, 0xc2, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0x2e,
+  0x2d, 0xd8, 0xb3, 0x10, 0x08, 0x17, 0x0c, 0x53, 0xb1, 0x2c, 0xe8, 0xb3,
+  0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x94, 0x4c,
+  0x0b, 0xfb, 0x2c, 0xb8, 0x0c, 0x3b, 0x0b, 0x23, 0x06, 0x07, 0x00, 0x82,
+  0x60, 0x40, 0xcd, 0xb4, 0xb0, 0xcf, 0x42, 0x20, 0x5c, 0x30, 0xcc, 0x05,
+  0x4f, 0xdd, 0xf1, 0xd4, 0xe9, 0xb1, 0x30, 0xcc, 0xbd, 0xae, 0x30, 0xcc,
+  0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x7c,
+  0x37, 0x2d, 0x9c, 0xb4, 0x60, 0xce, 0x02, 0x4d, 0x0b, 0xa3, 0x09, 0x01,
+  0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45,
+  0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x9c, 0x4f, 0x0b, 0x2e,
+  0x2d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdc, 0x4f,
+  0x0b, 0x2f, 0x2d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
+  0x1c, 0x58, 0x0b, 0x30, 0x2d, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20,
+  0x08, 0x06, 0x0b, 0x5a, 0x0b, 0x2f, 0x2d, 0xc0, 0xb3, 0x10, 0xec, 0xb4,
+  0x00, 0xd2, 0x42, 0x4f, 0x0b, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3,
+  0x04, 0x22, 0x34, 0xdc, 0x00, 0xee, 0x01, 0x58, 0x0b, 0x60, 0x30, 0xcb,
+  0x10, 0x3f, 0x22, 0x14, 0x98, 0x38, 0x0b, 0xe4, 0x2c, 0xc4, 0x67, 0x38,
+  0x22, 0x7e, 0x83, 0x72, 0x16, 0x88, 0x6f, 0x96, 0x41, 0x7e, 0xea, 0x27,
+  0x30, 0x73, 0x16, 0xe4, 0x37, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18,
+  0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xd4,
+  0x5a, 0xd0, 0xe1, 0x86, 0x00, 0xad, 0x05, 0x30, 0x98, 0x65, 0x98, 0x1f,
+  0xfa, 0x09, 0x6c, 0x70, 0x67, 0x01, 0x3e, 0xb3, 0x04, 0xf9, 0x63, 0xed,
+  0x2c, 0x10, 0xf1, 0x99, 0x25, 0xc8, 0x9f, 0xe1, 0x08, 0xfe, 0x0d, 0xdc,
+  0x59, 0x10, 0xbe, 0x59, 0x06, 0xfb, 0xc9, 0x9f, 0xc0, 0xfa, 0x37, 0x78,
+  0x67, 0x21, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2,
+  0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xa8, 0x6b, 0x41, 0x87, 0x1b,
+  0x82, 0xb9, 0x16, 0xc0, 0x60, 0x96, 0xe1, 0x7e, 0xf0, 0x27, 0xb0, 0x7b,
+  0x16, 0x86, 0xf8, 0xcc, 0x12, 0xe4, 0x8f, 0x11, 0xfa, 0x2c, 0xc0, 0x67,
+  0x96, 0x20, 0x7f, 0x06, 0x5a, 0x1e, 0x6d, 0x7e, 0x30, 0xfa, 0x21, 0xee,
+  0x47, 0xc0, 0x1f, 0x17, 0x1c, 0xea, 0xe7, 0x82, 0x61, 0x2c, 0x9f, 0x85,
+  0x7e, 0x16, 0xe2, 0x33, 0x1c, 0xe1, 0x37, 0xfe, 0x2c, 0x10, 0xdf, 0x2c,
+  0x83, 0xfe, 0xf4, 0x4f, 0x60, 0xff, 0x2c, 0xfc, 0x4d, 0x7c, 0x2c, 0x18,
+  0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82,
+  0xf8, 0x14, 0x31, 0xda, 0x82, 0x0e, 0x37, 0x04, 0xa1, 0x2d, 0x80, 0xc1,
+  0x2c, 0xc3, 0xfe, 0xf0, 0x4f, 0x60, 0xc3, 0x49, 0x0b, 0xf0, 0x99, 0x25,
+  0x08, 0x21, 0x23, 0x69, 0x81, 0x88, 0xcf, 0x2c, 0x41, 0x08, 0x0d, 0x47,
+  0xa4, 0x4e, 0x49, 0x0b, 0xc2, 0x37, 0xcb, 0xe0, 0x3f, 0x21, 0x14, 0x98,
+  0xea, 0x98, 0xb4, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05,
+  0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xb8, 0xb6, 0xa0,
+  0xc3, 0x0d, 0x01, 0x6b, 0x0b, 0x60, 0x30, 0xcb, 0xf0, 0x3f, 0x20, 0x14,
+  0x98, 0x4b, 0x0b, 0x43, 0x7c, 0x66, 0x09, 0x42, 0xc8, 0x88, 0x99, 0x16,
+  0xe0, 0x33, 0x4b, 0x10, 0x42, 0x03, 0x2d, 0x8f, 0xb6, 0x3f, 0x18, 0xff,
+  0x10, 0xff, 0x23, 0x80, 0x10, 0xda, 0xf5, 0xcf, 0x05, 0xc3, 0x5c, 0xf0,
+  0xd4, 0x6d, 0x4f, 0xdd, 0x3f, 0x0b, 0xc3, 0x1c, 0x0d, 0x0b, 0xc3, 0x1c,
+  0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0xc7,
+  0xdb, 0x02, 0x6b, 0x0b, 0x6b, 0x2d, 0xe4, 0xb6, 0x30, 0x9a, 0x10, 0x00,
   0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44,
-  0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0x91, 0xb1, 0x40, 0xc3,
-  0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0x95, 0xb1,
-  0x50, 0xc3, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0,
-  0x99, 0xb1, 0x60, 0xc3, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82,
-  0x60, 0xb0, 0xb8, 0xb1, 0x50, 0xc3, 0x82, 0xfd, 0x0a, 0x41, 0x18, 0x0b,
-  0x26, 0x2c, 0x8c, 0xb1, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x36, 0x4b,
-  0xb0, 0x3e, 0xc3, 0x0d, 0xe6, 0x1d, 0x98, 0xb1, 0x00, 0x06, 0xb3, 0x0c,
-  0xba, 0xb3, 0x3e, 0x81, 0xa1, 0xaf, 0xa0, 0xbe, 0x42, 0x7c, 0x86, 0x23,
-  0xe4, 0x37, 0x58, 0x5f, 0x81, 0xf8, 0x66, 0x19, 0x76, 0xc7, 0x77, 0x02,
-  0x63, 0x5f, 0x61, 0x7e, 0x83, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61,
-  0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x02, 0x8e,
-  0x05, 0x1d, 0x6e, 0x08, 0xdc, 0x58, 0x00, 0x83, 0x59, 0x06, 0xde, 0xe9,
-  0x9d, 0xc0, 0x06, 0xfa, 0x15, 0xe0, 0x33, 0x4b, 0x20, 0x3e, 0x36, 0xbf,
-  0x02, 0x11, 0x9f, 0x59, 0x02, 0xf1, 0x19, 0x8e, 0xe8, 0xdf, 0x80, 0x7e,
-  0x05, 0xe1, 0x9b, 0x65, 0xf8, 0x1d, 0xf1, 0x09, 0xcc, 0x7f, 0x83, 0xfa,
-  0x15, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b,
-  0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x3d, 0x16, 0x74, 0xb8, 0x21,
-  0xc8, 0x63, 0x01, 0x0c, 0x66, 0x19, 0xc0, 0x27, 0x7c, 0x02, 0xeb, 0x5f,
-  0x61, 0x88, 0xcf, 0x2c, 0x81, 0xf8, 0x18, 0x01, 0xc2, 0x02, 0x7c, 0x66,
-  0x09, 0xc4, 0x67, 0xa0, 0xe5, 0xd1, 0x78, 0x07, 0xeb, 0x1d, 0x02, 0x7c,
-  0x84, 0xf0, 0x81, 0xc1, 0xc1, 0x77, 0x2e, 0x18, 0xc6, 0xfe, 0x57, 0x18,
-  0x61, 0x21, 0x3e, 0xc3, 0x11, 0xa4, 0x42, 0xc2, 0x02, 0xf1, 0xcd, 0x32,
-  0x8c, 0x8f, 0xf9, 0x04, 0x56, 0xc2, 0x42, 0xa9, 0xc4, 0xc7, 0x82, 0x81,
-  0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88,
-  0x4f, 0x11, 0xa9, 0x2c, 0xe8, 0x70, 0x43, 0x70, 0xca, 0x02, 0x18, 0xcc,
-  0x32, 0x90, 0x4f, 0xf9, 0x04, 0x36, 0xb4, 0xb0, 0x00, 0x9f, 0x59, 0x02,
-  0xf5, 0x31, 0x15, 0x16, 0x88, 0xf8, 0xcc, 0x12, 0xa8, 0xcf, 0x70, 0xc4,
-  0xab, 0xac, 0xb0, 0x20, 0x7c, 0xb3, 0x0c, 0xe7, 0xa3, 0x3e, 0x81, 0xc1,
-  0x0a, 0x0b, 0x0b, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0,
-  0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0x2d, 0x0b, 0x3a,
-  0xdc, 0x10, 0xc8, 0xb2, 0x00, 0x06, 0xb3, 0x0c, 0xe8, 0x93, 0x3e, 0x81,
-  0xd1, 0xb0, 0x30, 0xc4, 0x67, 0x96, 0x40, 0x7d, 0x8c, 0xc8, 0x61, 0x01,
-  0x3e, 0xb3, 0x04, 0xea, 0x33, 0xd0, 0xf2, 0x68, 0xe4, 0x83, 0x95, 0x0f,
-  0x81, 0x3e, 0x42, 0xfa, 0xd0, 0x95, 0xf9, 0x5c, 0x30, 0xcc, 0x05, 0x4f,
-  0xdd, 0xf6, 0xd4, 0x95, 0xb0, 0x30, 0xcc, 0xe9, 0xac, 0x30, 0xcc, 0x11,
-  0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x7c, 0xe2,
-  0x2c, 0xc8, 0xb2, 0x10, 0xc7, 0xc2, 0x2f, 0x0b, 0xa3, 0x09, 0x01, 0x30,
-  0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24,
-  0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5c, 0x3a, 0x0b, 0xb9, 0x2c,
-  0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x9c, 0x3a, 0x0b,
-  0xba, 0x2c, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdc,
-  0x3a, 0x0b, 0xbb, 0x2c, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08,
-  0x06, 0xcb, 0x3c, 0x0b, 0xba, 0x2c, 0xec, 0xb1, 0x10, 0x98, 0xb3, 0xb0,
-  0xca, 0x02, 0x3a, 0x0b, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3, 0x04,
-  0xeb, 0x33, 0xd0, 0xf2, 0x98, 0x86, 0xed, 0xe0, 0x64, 0x51, 0x3b, 0x2c,
-  0x81, 0x3b, 0x82, 0xfa, 0xe0, 0x64, 0x91, 0x3b, 0xb3, 0x0c, 0xec, 0xe3,
-  0x3e, 0x6b, 0x1e, 0x0c, 0x47, 0xec, 0x6d, 0xd0, 0xca, 0xc2, 0xf0, 0x1d,
-  0xdf, 0x06, 0xc3, 0x0c, 0x37, 0x04, 0x78, 0x2c, 0x90, 0x41, 0x0d, 0x81,
-  0x0e, 0x47, 0xcc, 0x4b, 0x2c, 0x0b, 0xc3, 0x57, 0x81, 0xa0, 0x57, 0x2f,
-  0xc3, 0x0c, 0x37, 0x04, 0x7b, 0x2c, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32,
-  0xb4, 0x8f, 0x08, 0x05, 0xf7, 0xc3, 0xc2, 0x30, 0x47, 0xb7, 0xc2, 0x30,
-  0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xf0, 0xe9, 0xb3, 0xa0, 0xce, 0x42,
-  0x2a, 0x0b, 0xf7, 0x2c, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c,
-  0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x70, 0x21, 0x2d, 0xc4, 0xb3, 0x70, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x22, 0x2d, 0xc8, 0xb3, 0xc0, 0x10,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x23, 0x2d, 0xcc, 0xb3,
-  0x20, 0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x2b, 0x2d,
-  0xc8, 0xb3, 0x30, 0xcb, 0x42, 0xe0, 0xcf, 0xc2, 0x38, 0x0b, 0x20, 0x2d,
-  0x8c, 0x26, 0x04, 0xc0, 0x05, 0x8f, 0xcd, 0x12, 0x88, 0xd0, 0x70, 0xc3,
-  0xa8, 0x07, 0x24, 0x2d, 0x80, 0xc1, 0x2c, 0xc3, 0xfb, 0xc0, 0x4f, 0x50,
-  0xae, 0x2c, 0xd4, 0xb3, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0x54, 0x4b, 0x0b, 0xf6, 0x2c, 0xd4, 0x6e, 0x70, 0xce, 0xc2,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0x2e, 0x2d, 0xd8, 0xb3, 0x10,
-  0x08, 0x17, 0x0c, 0x53, 0xb1, 0x2c, 0xe8, 0xb3, 0x00, 0x17, 0x3c, 0x35,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x94, 0x4c, 0x0b, 0xfb, 0x2c, 0xb8,
-  0x0c, 0x3b, 0x0b, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xcd, 0xb4,
-  0xb0, 0xcf, 0x42, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf1, 0xd4,
-  0xe9, 0xb1, 0x30, 0xcc, 0xbd, 0xae, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31,
-  0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x7c, 0x37, 0x2d, 0x9c, 0xb4,
-  0x60, 0xce, 0x02, 0x4d, 0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04,
-  0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0x9c, 0x4f, 0x0b, 0x2e, 0x2d, 0x24, 0x44, 0x30,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdc, 0x4f, 0x0b, 0x2f, 0x2d, 0x24,
-  0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x1c, 0x58, 0x0b, 0x30,
-  0x2d, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x0b, 0x5a,
-  0x0b, 0x2f, 0x2d, 0xc0, 0xb3, 0x10, 0xec, 0xb4, 0x00, 0xd2, 0x42, 0x4f,
-  0x0b, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x63, 0xb3, 0x04, 0x22, 0x34, 0xdc,
-  0x00, 0xee, 0x01, 0x58, 0x0b, 0x60, 0x30, 0xcb, 0x10, 0x3f, 0x22, 0x14,
-  0x98, 0x38, 0x0b, 0xe4, 0x2c, 0xc4, 0x67, 0x38, 0x22, 0x7e, 0x83, 0x72,
-  0x16, 0x88, 0x6f, 0x96, 0x41, 0x7e, 0xea, 0x27, 0x30, 0x73, 0x16, 0xe4,
-  0x37, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c,
-  0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xd4, 0x5a, 0xd0, 0xe1, 0x86,
-  0x00, 0xad, 0x05, 0x30, 0x98, 0x65, 0x98, 0x1f, 0xfa, 0x09, 0x6c, 0x70,
-  0x67, 0x01, 0x3e, 0xb3, 0x04, 0xf9, 0x63, 0xed, 0x2c, 0x10, 0xf1, 0x99,
-  0x25, 0xc8, 0x9f, 0xe1, 0x08, 0xfe, 0x0d, 0xdc, 0x59, 0x10, 0xbe, 0x59,
-  0x06, 0xfb, 0xc9, 0x9f, 0xc0, 0xfa, 0x37, 0x78, 0x67, 0x21, 0x3e, 0x16,
-  0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15,
-  0x41, 0x7c, 0x8a, 0xa8, 0x6b, 0x41, 0x87, 0x1b, 0x82, 0xb9, 0x16, 0xc0,
-  0x60, 0x96, 0xe1, 0x7e, 0xf0, 0x27, 0xb0, 0x7b, 0x16, 0x86, 0xf8, 0xcc,
-  0x12, 0xe4, 0x8f, 0x11, 0xfa, 0x2c, 0xc0, 0x67, 0x96, 0x20, 0x7f, 0x06,
-  0x5a, 0x1e, 0x6d, 0x7e, 0x30, 0xfa, 0x21, 0xee, 0x47, 0xc0, 0x1f, 0x17,
-  0x1c, 0xea, 0xe7, 0x82, 0x61, 0x2c, 0x9f, 0x85, 0x7e, 0x16, 0xe2, 0x33,
-  0x1c, 0xe1, 0x37, 0xfe, 0x2c, 0x10, 0xdf, 0x2c, 0x83, 0xfe, 0xf4, 0x4f,
-  0x60, 0xff, 0x2c, 0xfc, 0x4d, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30,
-  0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x31, 0xda,
-  0x82, 0x0e, 0x37, 0x04, 0xa1, 0x2d, 0x80, 0xc1, 0x2c, 0xc3, 0xfe, 0xf0,
-  0x4f, 0x60, 0xc3, 0x49, 0x0b, 0xf0, 0x99, 0x25, 0x08, 0x21, 0x23, 0x69,
-  0x81, 0x88, 0xcf, 0x2c, 0x41, 0x08, 0x0d, 0x47, 0xa4, 0x4e, 0x49, 0x0b,
-  0xc2, 0x37, 0xcb, 0xe0, 0x3f, 0x21, 0x14, 0x98, 0xea, 0x98, 0xb4, 0x10,
-  0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9,
-  0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xb8, 0xb6, 0xa0, 0xc3, 0x0d, 0x01, 0x6b,
-  0x0b, 0x60, 0x30, 0xcb, 0xf0, 0x3f, 0x20, 0x14, 0x98, 0x4b, 0x0b, 0x43,
-  0x7c, 0x66, 0x09, 0x42, 0xc8, 0x88, 0x99, 0x16, 0xe0, 0x33, 0x4b, 0x10,
-  0x42, 0x03, 0x2d, 0x8f, 0xb6, 0x3f, 0x18, 0xff, 0x10, 0xff, 0x23, 0x80,
-  0x10, 0xda, 0xf5, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x6d, 0x4f, 0xdd,
-  0x3f, 0x0b, 0xc3, 0x1c, 0x0d, 0x0b, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3,
-  0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0xc7, 0xdb, 0x02, 0x6b, 0x0b,
-  0x6b, 0x2d, 0xe4, 0xb6, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30,
-  0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0xc0, 0x8d, 0xb7, 0x30, 0xdb, 0x42, 0x42, 0x04, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0x91, 0xb7, 0x40, 0xdb, 0x42, 0x42,
-  0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0x95, 0xb7, 0x50, 0xdb,
-  0x42, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xb4, 0xb7,
-  0x40, 0xdb, 0x42, 0x5d, 0x0b, 0x01, 0x78, 0x0b, 0xa5, 0x2d, 0x88, 0xb7,
-  0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x36, 0x4b, 0x20, 0x42, 0x03, 0x2d,
-  0x8f, 0x69, 0xb4, 0x0f, 0xcb, 0x16, 0xec, 0xc3, 0x12, 0xef, 0x23, 0x84,
-  0x10, 0xcb, 0x16, 0xf0, 0x33, 0x62, 0x60, 0x00, 0x20, 0x08, 0x06, 0xd0,
-  0x7b, 0x0b, 0xb0, 0x2d, 0x98, 0xb1, 0x30, 0x62, 0x60, 0x00, 0x20, 0x08,
-  0x06, 0x10, 0x7c, 0x0b, 0xb1, 0x2d, 0x98, 0xb1, 0x60, 0x41, 0x20, 0x1f,
-  0x0b, 0x04, 0xf9, 0xd8, 0x9b, 0x07, 0xa9, 0x2d, 0xc8, 0x67, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0xa4, 0xf9, 0x16, 0x72, 0x5b, 0x48, 0x6d, 0xa1,
-  0xd7, 0x02, 0x8b, 0xf3, 0x20, 0xb5, 0x05, 0xf9, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0x81, 0x54, 0xdf, 0xc2, 0x6e, 0x0b, 0xa8, 0x2d, 0xa0, 0x6a,
-  0x10, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x64, 0xdf, 0x02, 0x6f,
-  0x0b, 0xab, 0x2d, 0x80, 0x5b, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0xd2, 0x7d, 0x0b, 0xbd, 0x2d, 0xb8, 0xb6, 0x80, 0x2f, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x48, 0xf8, 0x2d, 0xf8, 0xb6, 0xa0, 0xda, 0xc2,
-  0xaa, 0x06, 0xc6, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0xf9, 0x2d,
-  0xfc, 0xb6, 0xa0, 0xda, 0xc2, 0xb8, 0x05, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0x20, 0xe9, 0xb7, 0x00, 0xde, 0x02, 0x6c, 0x0b, 0xfb, 0x12, 0x8c,
-  0x18, 0x34, 0x00, 0x08, 0x82, 0x81, 0x95, 0xdf, 0x02, 0x78, 0x0b, 0xb3,
-  0x2d, 0x30, 0x8b, 0xe2, 0xaa, 0x01, 0x42, 0x04, 0xf6, 0xd7, 0xc1, 0x6c,
-  0x0b, 0xf2, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xa9, 0xbf, 0x85,
-  0xf1, 0x16, 0x66, 0x5b, 0x68, 0xaf, 0xc0, 0x42, 0x3b, 0x98, 0x6d, 0x41,
-  0x3e, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x20, 0xfd, 0xb7, 0x50, 0xde,
-  0x82, 0x6c, 0x0b, 0xb8, 0x19, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0x20, 0x81, 0xb8, 0x60, 0xde, 0x42, 0x6d, 0x0b, 0xf0, 0x15, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0x81, 0x14, 0xe2, 0xc2, 0x79, 0x0b, 0xb8, 0x2d,
-  0xa0, 0x48, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x92, 0x88, 0x0b,
-  0xe8, 0x2d, 0xd0, 0xb6, 0xb0, 0x9b, 0x81, 0x31, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0xd2, 0x88, 0x0b, 0xe9, 0x2d, 0xd0, 0xb6, 0x30, 0x5f, 0xc1,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x24, 0x2e, 0xa8, 0xb7, 0xa0,
-  0xdb, 0xc2, 0x8a, 0x04, 0x23, 0x06, 0x0d, 0x00, 0x82, 0x60, 0x60, 0x8d,
-  0xb8, 0xa0, 0xde, 0x42, 0x6f, 0x0b, 0x56, 0x45, 0xf9, 0x66, 0x80, 0x10,
-  0x81, 0xb9, 0x72, 0xd0, 0xdb, 0x82, 0x7c, 0x46, 0x0c, 0x10, 0x00, 0x04,
-  0xc1, 0x40, 0x3a, 0x71, 0xa1, 0xbd, 0x85, 0xde, 0x16, 0xfa, 0x29, 0x30,
-  0x58, 0x0e, 0x7a, 0x5b, 0x90, 0xcf, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x48, 0x29, 0x2e, 0xbc, 0xb7, 0xc0, 0xdb, 0x02, 0x3a, 0x06, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x2a, 0x2e, 0xc0, 0xb7, 0xf0, 0xdb,
-  0x02, 0x48, 0x05, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x20, 0xad, 0xb8,
-  0x10, 0xdf, 0x82, 0x78, 0x0b, 0x38, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0x81, 0xc4, 0xe2, 0x82, 0x7c, 0x0b, 0xbe, 0x2d, 0xac, 0x63, 0x60,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0xd4, 0xe2, 0xc2, 0x7c, 0x0b,
-  0xbe, 0x2d, 0x8c, 0x54, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x92,
-  0x8b, 0x0b, 0xf4, 0x2d, 0x90, 0xb7, 0xb0, 0x13, 0xc1, 0x88, 0x41, 0x03,
-  0x80, 0x20, 0x18, 0x58, 0x2d, 0x2e, 0xd0, 0xb7, 0x70, 0xde, 0x02, 0x18,
-  0x7c, 0x9e, 0x3b, 0x06, 0x08, 0x11, 0x58, 0xef, 0x06, 0xe7, 0x2d, 0xc8,
-  0x67, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xa4, 0x18, 0x17, 0xee, 0x5b,
-  0x38, 0x6f, 0xa1, 0x85, 0x02, 0xfb, 0xdd, 0xe0, 0xbc, 0x05, 0xf9, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x34, 0xe3, 0x42, 0x7e, 0x0b, 0xe6,
-  0x2d, 0xe0, 0x5f, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x12, 0x8d,
-  0x0b, 0xfa, 0x2d, 0xa4, 0xb7, 0x00, 0x43, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x48, 0x35, 0x2e, 0xec, 0xb7, 0xc0, 0xde, 0x02, 0x1a, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x20, 0xd9, 0xb8, 0xc0, 0xdf, 0x02,
-  0x7a, 0x0b, 0xfb, 0x67, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x74,
-  0xe3, 0x42, 0x7f, 0x0b, 0xe8, 0x2d, 0xcc, 0x50, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x12, 0x8e, 0x0b, 0xfe, 0x2d, 0xb8, 0xb7, 0xb0, 0x06,
-  0xc1, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x58, 0x37, 0x2e, 0xf8, 0xb7,
-  0x10, 0xdf, 0x82, 0x1a, 0xa4, 0x01, 0x1a, 0xf8, 0x1f, 0x42, 0x04, 0xc6,
-  0x06, 0x6c, 0x20, 0x1f, 0x0b, 0xda, 0x40, 0x3e, 0x16, 0x06, 0xf3, 0x2d,
-  0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xa4, 0x1e, 0x17, 0x46,
-  0x5c, 0x98, 0x6f, 0xc1, 0x09, 0x6c, 0x0c, 0xe6, 0x5b, 0x90, 0xcf, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x3f, 0x2e, 0x94, 0xb8, 0x20, 0xdf,
-  0x82, 0x16, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x04, 0xe6, 0x82,
-  0x89, 0x0b, 0xf5, 0x2d, 0x44, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x48, 0x61, 0x2e, 0x9c, 0xb8, 0x80, 0xdf, 0x02, 0x12, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0x81, 0x24, 0xe6, 0x02, 0x8a, 0x0b, 0xf4, 0x2d, 0x74,
-  0xc6, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x63, 0x2e, 0xa4, 0xb8,
-  0x40, 0xdf, 0x02, 0x15, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x44,
-  0xe6, 0x82, 0x8a, 0x0b, 0xfa, 0x2d, 0x2c, 0xc1, 0x88, 0x41, 0x03, 0x80,
-  0x20, 0x18, 0x58, 0x63, 0x2e, 0xa8, 0xb8, 0xd0, 0xdf, 0xc2, 0x1d, 0x2c,
-  0x0a, 0x18, 0x20, 0x44, 0x70, 0x81, 0x41, 0x23, 0x06, 0x0e, 0x00, 0x82,
-  0x60, 0xd0, 0xa4, 0xb9, 0x80, 0xe2, 0x02, 0x7d, 0x0b, 0xed, 0x2d, 0xf4,
-  0xb8, 0x10, 0x8c, 0xb8, 0x30, 0xe2, 0xc2, 0x88, 0x0b, 0x22, 0x2e, 0xfc,
-  0xb8, 0x30, 0x4b, 0x30, 0x42, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+  0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0x8d, 0xb7, 0x30, 0xdb,
+  0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0x91, 0xb7,
+  0x40, 0xdb, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0,
+  0x95, 0xb7, 0x50, 0xdb, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82,
+  0x60, 0xb0, 0xb4, 0xb7, 0x40, 0xdb, 0x42, 0x5d, 0x0b, 0x01, 0x78, 0x0b,
+  0xa5, 0x2d, 0x88, 0xb7, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x36, 0x4b,
+  0x20, 0x42, 0x03, 0x2d, 0x8f, 0x69, 0xb4, 0x0f, 0xcb, 0x16, 0xec, 0xc3,
+  0x12, 0xef, 0x23, 0x84, 0x10, 0xcb, 0x16, 0xf0, 0x33, 0x62, 0x60, 0x00,
+  0x20, 0x08, 0x06, 0xd0, 0x7b, 0x0b, 0xb0, 0x2d, 0x98, 0xb1, 0x30, 0x62,
+  0x60, 0x00, 0x20, 0x08, 0x06, 0x10, 0x7c, 0x0b, 0xb1, 0x2d, 0x98, 0xb1,
+  0x60, 0x41, 0x20, 0x1f, 0x0b, 0x04, 0xf9, 0xd8, 0x9b, 0x07, 0xa9, 0x2d,
+  0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xa4, 0xf9, 0x16, 0x72,
+  0x5b, 0x48, 0x6d, 0xa1, 0xd7, 0x02, 0x8b, 0xf3, 0x20, 0xb5, 0x05, 0xf9,
+  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x54, 0xdf, 0xc2, 0x6e, 0x0b,
+  0xa8, 0x2d, 0xa0, 0x6a, 0x10, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81,
+  0x64, 0xdf, 0x02, 0x6f, 0x0b, 0xab, 0x2d, 0x80, 0x5b, 0x30, 0x62, 0x80,
+  0x00, 0x20, 0x08, 0x06, 0xd2, 0x7d, 0x0b, 0xbd, 0x2d, 0xb8, 0xb6, 0x80,
+  0x2f, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0xf8, 0x2d, 0xf8,
+  0xb6, 0xa0, 0xda, 0xc2, 0xaa, 0x06, 0xc6, 0x88, 0x01, 0x02, 0x80, 0x20,
+  0x18, 0x48, 0xf9, 0x2d, 0xfc, 0xb6, 0xa0, 0xda, 0xc2, 0xb8, 0x05, 0x23,
+  0x06, 0x08, 0x00, 0x82, 0x60, 0x20, 0xe9, 0xb7, 0x00, 0xde, 0x02, 0x6c,
+  0x0b, 0xfb, 0x12, 0x8c, 0x18, 0x34, 0x00, 0x08, 0x82, 0x81, 0x95, 0xdf,
+  0x02, 0x78, 0x0b, 0xb3, 0x2d, 0x30, 0x8b, 0xe2, 0xaa, 0x01, 0x42, 0x04,
+  0xf6, 0xd7, 0xc1, 0x6c, 0x0b, 0xf2, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04,
+  0x03, 0xa9, 0xbf, 0x85, 0xf1, 0x16, 0x66, 0x5b, 0x68, 0xaf, 0xc0, 0x42,
+  0x3b, 0x98, 0x6d, 0x41, 0x3e, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x20,
+  0xfd, 0xb7, 0x50, 0xde, 0x82, 0x6c, 0x0b, 0xb8, 0x19, 0x04, 0x23, 0x06,
+  0x08, 0x00, 0x82, 0x60, 0x20, 0x81, 0xb8, 0x60, 0xde, 0x42, 0x6d, 0x0b,
+  0xf0, 0x15, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x14, 0xe2, 0xc2,
+  0x79, 0x0b, 0xb8, 0x2d, 0xa0, 0x48, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
+  0x06, 0x92, 0x88, 0x0b, 0xe8, 0x2d, 0xd0, 0xb6, 0xb0, 0x9b, 0x81, 0x31,
+  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd2, 0x88, 0x0b, 0xe9, 0x2d, 0xd0,
+  0xb6, 0x30, 0x5f, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x24,
+  0x2e, 0xa8, 0xb7, 0xa0, 0xdb, 0xc2, 0x8a, 0x04, 0x23, 0x06, 0x0d, 0x00,
+  0x82, 0x60, 0x60, 0x8d, 0xb8, 0xa0, 0xde, 0x42, 0x6f, 0x0b, 0x56, 0x45,
+  0xf9, 0x66, 0x80, 0x10, 0x81, 0xb9, 0x72, 0xd0, 0xdb, 0x82, 0x7c, 0x46,
+  0x0c, 0x10, 0x00, 0x04, 0xc1, 0x40, 0x3a, 0x71, 0xa1, 0xbd, 0x85, 0xde,
+  0x16, 0xfa, 0x29, 0x30, 0x58, 0x0e, 0x7a, 0x5b, 0x90, 0xcf, 0x88, 0x01,
+  0x02, 0x80, 0x20, 0x18, 0x48, 0x29, 0x2e, 0xbc, 0xb7, 0xc0, 0xdb, 0x02,
+  0x3a, 0x06, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x2a, 0x2e,
+  0xc0, 0xb7, 0xf0, 0xdb, 0x02, 0x48, 0x05, 0x23, 0x06, 0x08, 0x00, 0x82,
+  0x60, 0x20, 0xad, 0xb8, 0x10, 0xdf, 0x82, 0x78, 0x0b, 0x38, 0x11, 0x8c,
+  0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0xc4, 0xe2, 0x82, 0x7c, 0x0b, 0xbe,
+  0x2d, 0xac, 0x63, 0x60, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0xd4,
+  0xe2, 0xc2, 0x7c, 0x0b, 0xbe, 0x2d, 0x8c, 0x54, 0x30, 0x62, 0x80, 0x00,
+  0x20, 0x08, 0x06, 0x92, 0x8b, 0x0b, 0xf4, 0x2d, 0x90, 0xb7, 0xb0, 0x13,
+  0xc1, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x58, 0x2d, 0x2e, 0xd0, 0xb7,
+  0x70, 0xde, 0x02, 0x18, 0x7c, 0x9e, 0x3b, 0x06, 0x08, 0x11, 0x58, 0xef,
+  0x06, 0xe7, 0x2d, 0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xa4,
+  0x18, 0x17, 0xee, 0x5b, 0x38, 0x6f, 0xa1, 0x85, 0x02, 0xfb, 0xdd, 0xe0,
+  0xbc, 0x05, 0xf9, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x34, 0xe3,
+  0x42, 0x7e, 0x0b, 0xe6, 0x2d, 0xe0, 0x5f, 0x30, 0x62, 0x80, 0x00, 0x20,
+  0x08, 0x06, 0x12, 0x8d, 0x0b, 0xfa, 0x2d, 0xa4, 0xb7, 0x00, 0x43, 0xc1,
+  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x35, 0x2e, 0xec, 0xb7, 0xc0,
+  0xde, 0x02, 0x1a, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x20, 0xd9,
+  0xb8, 0xc0, 0xdf, 0x02, 0x7a, 0x0b, 0xfb, 0x67, 0x8c, 0x18, 0x20, 0x00,
+  0x08, 0x82, 0x81, 0x74, 0xe3, 0x42, 0x7f, 0x0b, 0xe8, 0x2d, 0xcc, 0x50,
+  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x12, 0x8e, 0x0b, 0xfe, 0x2d,
+  0xb8, 0xb7, 0xb0, 0x06, 0xc1, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x58,
+  0x37, 0x2e, 0xf8, 0xb7, 0x10, 0xdf, 0x82, 0x1a, 0xa4, 0x01, 0x1a, 0xf8,
+  0x1f, 0x42, 0x04, 0xc6, 0x06, 0x6c, 0x20, 0x1f, 0x0b, 0xda, 0x40, 0x3e,
+  0x16, 0x06, 0xf3, 0x2d, 0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
+  0xa4, 0x1e, 0x17, 0x46, 0x5c, 0x98, 0x6f, 0xc1, 0x09, 0x6c, 0x0c, 0xe6,
+  0x5b, 0x90, 0xcf, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x3f, 0x2e,
+  0x94, 0xb8, 0x20, 0xdf, 0x82, 0x16, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
+  0x81, 0x04, 0xe6, 0x82, 0x89, 0x0b, 0xf5, 0x2d, 0x44, 0xc1, 0x88, 0x01,
+  0x02, 0x80, 0x20, 0x18, 0x48, 0x61, 0x2e, 0x9c, 0xb8, 0x80, 0xdf, 0x02,
+  0x12, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x24, 0xe6, 0x02, 0x8a,
+  0x0b, 0xf4, 0x2d, 0x74, 0xc6, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48,
+  0x63, 0x2e, 0xa4, 0xb8, 0x40, 0xdf, 0x02, 0x15, 0x8c, 0x18, 0x20, 0x00,
+  0x08, 0x82, 0x81, 0x44, 0xe6, 0x82, 0x8a, 0x0b, 0xfa, 0x2d, 0x2c, 0xc1,
+  0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x58, 0x63, 0x2e, 0xa8, 0xb8, 0xd0,
+  0xdf, 0xc2, 0x1d, 0x2c, 0x0a, 0x18, 0x20, 0x44, 0x70, 0x81, 0x41, 0x23,
+  0x06, 0x0e, 0x00, 0x82, 0x60, 0xd0, 0xa4, 0xb9, 0x80, 0xe2, 0x02, 0x7d,
+  0x0b, 0xed, 0x2d, 0xf4, 0xb8, 0x10, 0x8c, 0xb8, 0x30, 0xe2, 0xc2, 0x88,
+  0x0b, 0x22, 0x2e, 0xfc, 0xb8, 0x30, 0x4b, 0x30, 0x42, 0x08, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00
 };
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_int64_double.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_int64_double.h
deleted file mode 100644
index e7ad52b03066b..0000000000000
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_int64_double.h
+++ /dev/null
@@ -1,6708 +0,0 @@
-#if 0
-;
-; Note: shader requires additional functionality:
-;       Double-precision floating point
-;       64-Bit integer
-;
-;
-; Input signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no parameters
-;
-; Output signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no parameters
-; shader hash: d969fcfd377d54a24b0f4b8f6ce0e616
-;
-; Pipeline Runtime Information: 
-;
-;
-;
-; Buffer Definitions:
-;
-; cbuffer 
-; {
-;
-;   [116 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [8 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [8 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [8 x i8] (type annotation not present)
-;
-; }
-;
-;
-; Resource Bindings:
-;
-; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-;                                   cbuffer      NA          NA     CB0            cb0     1
-;                                       UAV  struct         r/w      U0             u0     1
-;                                       UAV  struct         r/w      U1             u1     1
-;                                       UAV  struct         r/w      U2             u2     1
-;
-target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
-target triple = "dxil-ms-dx"
-
-%dx.types.Handle = type { i8* }
-%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
-%dx.types.ResRet.i32 = type { i32, i32, i32, i32, i32 }
-%"class.RWStructuredBuffer<long long>" = type { i64 }
-%"class.RWStructuredBuffer<double>" = type { double }
-%Constants = type { i32, i32, i32, i32, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32 }
-
-define void @GridSample() {
-  %1 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 2, i32 2, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %2 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 1, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %3 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %4 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %5 = call i32 @dx.op.threadId.i32(i32 93, i32 0)  ; ThreadId(component)
-  %6 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
-  %7 = extractvalue %dx.types.CBufRet.i32 %6, 0
-  %8 = add i32 %7, %5
-  %9 = extractvalue %dx.types.CBufRet.i32 %6, 1
-  %10 = icmp ult i32 %8, %9
-  br i1 %10, label %11, label %3647
-
-; <label>:11                                      ; preds = %0
-  %12 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
-  %13 = extractvalue %dx.types.CBufRet.i32 %12, 3
-  %14 = uitofp i32 %13 to float
-  %15 = extractvalue %dx.types.CBufRet.i32 %12, 2
-  %16 = uitofp i32 %15 to float
-  %17 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 7)  ; CBufferLoadLegacy(handle,regIndex)
-  %18 = extractvalue %dx.types.CBufRet.i32 %17, 0
-  %19 = icmp eq i32 %18, 0
-  %20 = select i1 %19, float -5.000000e-01, float 0.000000e+00
-  %21 = select i1 %19, float -5.000000e-01, float -1.000000e+00
-  %22 = fadd float %14, %21
-  %23 = select i1 %19, float -5.000000e-01, float -1.000000e+00
-  %24 = fadd float %16, %23
-  %25 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
-  %26 = extractvalue %dx.types.CBufRet.i32 %25, 1
-  %27 = extractvalue %dx.types.CBufRet.i32 %25, 2
-  %28 = extractvalue %dx.types.CBufRet.i32 %25, 3
-  %29 = mul i32 %28, %27
-  %30 = mul i32 %27, %26
-  %31 = mul i32 %30, %28
-  %32 = udiv i32 %8, %31
-  %33 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 6)  ; CBufferLoadLegacy(handle,regIndex)
-  %34 = extractvalue %dx.types.CBufRet.i32 %33, 0
-  %35 = mul i32 %34, %32
-  %36 = sub i32 %8, %35
-  %37 = udiv i32 %36, %29
-  %38 = extractvalue %dx.types.CBufRet.i32 %33, 1
-  %39 = mul i32 %38, %37
-  %40 = sub i32 %36, %39
-  %41 = udiv i32 %40, %28
-  %42 = extractvalue %dx.types.CBufRet.i32 %33, 2
-  %43 = mul i32 %42, %41
-  %44 = sub i32 %40, %43
-  %45 = uitofp i32 %32 to float
-  %46 = uitofp i32 %41 to float
-  %47 = uitofp i32 %44 to float
-  %48 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
-  %49 = extractvalue %dx.types.CBufRet.i32 %48, 0
-  %50 = extractvalue %dx.types.CBufRet.i32 %48, 1
-  %51 = extractvalue %dx.types.CBufRet.i32 %48, 2
-  %52 = extractvalue %dx.types.CBufRet.i32 %48, 3
-  %53 = uitofp i32 %49 to float
-  %54 = uitofp i32 %50 to float
-  %55 = uitofp i32 %51 to float
-  %56 = uitofp i32 %52 to float
-  %57 = call float @dx.op.dot4.f32(i32 56, float %45, float %46, float %47, float 0.000000e+00, float %53, float %54, float %55, float %56)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %58 = fadd fast float %56, %57
-  %59 = fptoui float %57 to i32
-  %60 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %2, i32 %59, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %61 = extractvalue %dx.types.ResRet.i32 %60, 0
-  %62 = extractvalue %dx.types.ResRet.i32 %60, 1
-  %63 = call double @dx.op.makeDouble.f64(i32 101, i32 %61, i32 %62)  ; MakeDouble(lo,hi)
-  %64 = fptrunc double %63 to float
-  %65 = fptoui float %58 to i32
-  %66 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %2, i32 %65, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %67 = extractvalue %dx.types.ResRet.i32 %66, 0
-  %68 = extractvalue %dx.types.ResRet.i32 %66, 1
-  %69 = call double @dx.op.makeDouble.f64(i32 101, i32 %67, i32 %68)  ; MakeDouble(lo,hi)
-  %70 = fptrunc double %69 to float
-  %71 = icmp eq i32 %18, 1
-  %72 = fadd fast float %64, 1.000000e+00
-  %73 = fadd fast float %70, 1.000000e+00
-  br i1 %71, label %74, label %81
-
-; <label>:74                                      ; preds = %11
-  %75 = fmul fast float %72, 5.000000e-01
-  %76 = fmul fast float %73, 5.000000e-01
-  %77 = fadd fast float %14, -1.000000e+00
-  %78 = fadd fast float %16, -1.000000e+00
-  %79 = fmul fast float %75, %77
-  %80 = fmul fast float %76, %78
-  br label %88
-
-; <label>:81                                      ; preds = %11
-  %82 = fmul fast float %14, %72
-  %83 = fmul fast float %73, %16
-  %84 = fadd fast float %82, -1.000000e+00
-  %85 = fadd fast float %83, -1.000000e+00
-  %86 = fmul fast float %84, 5.000000e-01
-  %87 = fmul fast float %85, 5.000000e-01
-  br label %88
-
-; <label>:88                                      ; preds = %81, %74
-  %89 = phi float [ %79, %74 ], [ %86, %81 ]
-  %90 = phi float [ %80, %74 ], [ %87, %81 ]
-  %91 = extractvalue %dx.types.CBufRet.i32 %6, 2
-  %92 = icmp eq i32 %91, 1
-  br i1 %92, label %93, label %96
-
-; <label>:93                                      ; preds = %88
-  %94 = call float @dx.op.unary.f32(i32 26, float %89)  ; Round_ne(value)
-  %95 = call float @dx.op.unary.f32(i32 26, float %90)  ; Round_ne(value)
-  br label %96
-
-; <label>:96                                      ; preds = %93, %88
-  %97 = phi float [ %94, %93 ], [ %89, %88 ]
-  %98 = phi float [ %95, %93 ], [ %90, %88 ]
-  %99 = fcmp fast olt float %97, %20
-  %100 = fcmp fast ogt float %97, %22
-  %101 = or i1 %99, %100
-  %102 = fcmp fast olt float %98, %20
-  %103 = or i1 %101, %102
-  %104 = fcmp fast ogt float %98, %24
-  %105 = or i1 %104, %103
-  br i1 %105, label %106, label %179
-
-; <label>:106                                     ; preds = %96
-  %107 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %108 = icmp eq i32 %107, 1
-  br i1 %108, label %109, label %118
-
-; <label>:109                                     ; preds = %106
-  %110 = add i32 %13, -1
-  %111 = uitofp i32 %110 to float
-  %112 = call float @dx.op.binary.f32(i32 35, float %97, float 0.000000e+00)  ; FMax(a,b)
-  %113 = call float @dx.op.binary.f32(i32 36, float %112, float %111)  ; FMin(a,b)
-  %114 = add i32 %15, -1
-  %115 = uitofp i32 %114 to float
-  %116 = call float @dx.op.binary.f32(i32 35, float %98, float 0.000000e+00)  ; FMax(a,b)
-  %117 = call float @dx.op.binary.f32(i32 36, float %116, float %115)  ; FMin(a,b)
-  br label %179
-
-; <label>:118                                     ; preds = %106
-  %119 = icmp eq i32 %107, 2
-  br i1 %119, label %120, label %179
-
-; <label>:120                                     ; preds = %118
-  %121 = fsub fast float %22, %20
-  br i1 %99, label %122, label %135
-
-; <label>:122                                     ; preds = %120
-  %123 = fsub fast float %20, %97
-  %124 = fdiv fast float %123, %121
-  %125 = fptoui float %124 to i32
-  %126 = uitofp i32 %125 to float
-  %127 = fmul fast float %126, %121
-  %128 = fsub fast float %123, %127
-  %129 = and i32 %125, 1
-  %130 = icmp eq i32 %129, 0
-  br i1 %130, label %131, label %133
-
-; <label>:131                                     ; preds = %122
-  %132 = fadd fast float %128, %20
-  br label %149
-
-; <label>:133                                     ; preds = %122
-  %134 = fsub fast float %22, %128
-  br label %149
-
-; <label>:135                                     ; preds = %120
-  br i1 %100, label %136, label %149
-
-; <label>:136                                     ; preds = %135
-  %137 = fsub fast float %97, %22
-  %138 = fdiv fast float %137, %121
-  %139 = fptoui float %138 to i32
-  %140 = uitofp i32 %139 to float
-  %141 = fmul fast float %140, %121
-  %142 = fsub fast float %137, %141
-  %143 = and i32 %139, 1
-  %144 = icmp eq i32 %143, 0
-  br i1 %144, label %145, label %147
-
-; <label>:145                                     ; preds = %136
-  %146 = fsub fast float %22, %142
-  br label %149
-
-; <label>:147                                     ; preds = %136
-  %148 = fadd fast float %142, %20
-  br label %149
-
-; <label>:149                                     ; preds = %147, %145, %135, %133, %131
-  %150 = phi float [ %132, %131 ], [ %134, %133 ], [ %146, %145 ], [ %148, %147 ], [ %97, %135 ]
-  %151 = fsub fast float %24, %20
-  br i1 %102, label %152, label %165
-
-; <label>:152                                     ; preds = %149
-  %153 = fsub fast float %20, %98
-  %154 = fdiv fast float %153, %151
-  %155 = fptoui float %154 to i32
-  %156 = uitofp i32 %155 to float
-  %157 = fmul fast float %156, %151
-  %158 = fsub fast float %153, %157
-  %159 = and i32 %155, 1
-  %160 = icmp eq i32 %159, 0
-  br i1 %160, label %161, label %163
-
-; <label>:161                                     ; preds = %152
-  %162 = fadd fast float %158, %20
-  br label %179
-
-; <label>:163                                     ; preds = %152
-  %164 = fsub fast float %24, %158
-  br label %179
-
-; <label>:165                                     ; preds = %149
-  br i1 %104, label %166, label %179
-
-; <label>:166                                     ; preds = %165
-  %167 = fsub fast float %98, %24
-  %168 = fdiv fast float %167, %151
-  %169 = fptoui float %168 to i32
-  %170 = uitofp i32 %169 to float
-  %171 = fmul fast float %170, %151
-  %172 = fsub fast float %167, %171
-  %173 = and i32 %169, 1
-  %174 = icmp eq i32 %173, 0
-  br i1 %174, label %175, label %177
-
-; <label>:175                                     ; preds = %166
-  %176 = fsub fast float %24, %172
-  br label %179
-
-; <label>:177                                     ; preds = %166
-  %178 = fadd fast float %172, %20
-  br label %179
-
-; <label>:179                                     ; preds = %177, %175, %165, %163, %161, %118, %109, %96
-  %180 = phi float [ %113, %109 ], [ %97, %118 ], [ %97, %96 ], [ %150, %177 ], [ %150, %175 ], [ %150, %165 ], [ %150, %163 ], [ %150, %161 ]
-  %181 = phi float [ %117, %109 ], [ %98, %118 ], [ %98, %96 ], [ %178, %177 ], [ %176, %175 ], [ %98, %165 ], [ %164, %163 ], [ %162, %161 ]
-  %182 = uitofp i32 %37 to float
-  br i1 %92, label %183, label %350
-
-; <label>:183                                     ; preds = %179
-  %184 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %185 = icmp eq i32 %184, 0
-  br i1 %185, label %186, label %216
-
-; <label>:186                                     ; preds = %183
-  %187 = fcmp fast oge float %180, 0.000000e+00
-  %188 = fptoui float %180 to i32
-  %189 = icmp ult i32 %188, %13
-  %190 = and i1 %187, %189
-  %191 = fcmp fast oge float %181, 0.000000e+00
-  %192 = and i1 %191, %190
-  %193 = fptoui float %181 to i32
-  %194 = icmp ult i32 %193, %15
-  %195 = and i1 %194, %192
-  br i1 %195, label %196, label %344
-
-; <label>:196                                     ; preds = %186
-  %197 = fptoui float %45 to i32
-  %198 = fptoui float %182 to i32
-  %199 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %200 = extractvalue %dx.types.CBufRet.i32 %199, 0
-  %201 = extractvalue %dx.types.CBufRet.i32 %199, 1
-  %202 = extractvalue %dx.types.CBufRet.i32 %199, 2
-  %203 = extractvalue %dx.types.CBufRet.i32 %199, 3
-  %204 = mul i32 %200, %197
-  %205 = call i32 @dx.op.tertiary.i32(i32 48, i32 %198, i32 %201, i32 %204)  ; IMad(a,b,c)
-  %206 = call i32 @dx.op.tertiary.i32(i32 48, i32 %193, i32 %202, i32 %205)  ; IMad(a,b,c)
-  %207 = call i32 @dx.op.tertiary.i32(i32 48, i32 %188, i32 %203, i32 %206)  ; IMad(a,b,c)
-  %208 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %207, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %209 = extractvalue %dx.types.ResRet.i32 %208, 0
-  %210 = extractvalue %dx.types.ResRet.i32 %208, 1
-  %211 = zext i32 %209 to i64
-  %212 = zext i32 %210 to i64
-  %213 = shl i64 %212, 32
-  %214 = or i64 %211, %213
-  %215 = sitofp i64 %214 to float
-  br label %344
-
-; <label>:216                                     ; preds = %183
-  %217 = icmp eq i32 %184, 1
-  br i1 %217, label %218, label %252
-
-; <label>:218                                     ; preds = %216
-  %219 = add i32 %13, -1
-  %220 = uitofp i32 %219 to float
-  %221 = call float @dx.op.binary.f32(i32 35, float %180, float 0.000000e+00)  ; FMax(a,b)
-  %222 = call float @dx.op.binary.f32(i32 36, float %221, float %220)  ; FMin(a,b)
-  %223 = fptoui float %222 to i32
-  %224 = add i32 %15, -1
-  %225 = uitofp i32 %224 to float
-  %226 = call float @dx.op.binary.f32(i32 35, float %181, float 0.000000e+00)  ; FMax(a,b)
-  %227 = call float @dx.op.binary.f32(i32 36, float %226, float %225)  ; FMin(a,b)
-  %228 = fptoui float %227 to i32
-  %229 = uitofp i32 %228 to float
-  %230 = uitofp i32 %223 to float
-  %231 = fptoui float %45 to i32
-  %232 = fptoui float %182 to i32
-  %233 = fptoui float %229 to i32
-  %234 = fptoui float %230 to i32
-  %235 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %236 = extractvalue %dx.types.CBufRet.i32 %235, 0
-  %237 = extractvalue %dx.types.CBufRet.i32 %235, 1
-  %238 = extractvalue %dx.types.CBufRet.i32 %235, 2
-  %239 = extractvalue %dx.types.CBufRet.i32 %235, 3
-  %240 = mul i32 %236, %231
-  %241 = call i32 @dx.op.tertiary.i32(i32 48, i32 %232, i32 %237, i32 %240)  ; IMad(a,b,c)
-  %242 = call i32 @dx.op.tertiary.i32(i32 48, i32 %233, i32 %238, i32 %241)  ; IMad(a,b,c)
-  %243 = call i32 @dx.op.tertiary.i32(i32 48, i32 %234, i32 %239, i32 %242)  ; IMad(a,b,c)
-  %244 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %243, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %245 = extractvalue %dx.types.ResRet.i32 %244, 0
-  %246 = extractvalue %dx.types.ResRet.i32 %244, 1
-  %247 = zext i32 %245 to i64
-  %248 = zext i32 %246 to i64
-  %249 = shl i64 %248, 32
-  %250 = or i64 %247, %249
-  %251 = sitofp i64 %250 to float
-  br label %344
-
-; <label>:252                                     ; preds = %216
-  %253 = icmp eq i32 %184, 2
-  br i1 %253, label %254, label %344
-
-; <label>:254                                     ; preds = %252
-  %255 = fsub fast float %22, %20
-  %256 = fcmp fast olt float %180, %20
-  br i1 %256, label %257, label %270
-
-; <label>:257                                     ; preds = %254
-  %258 = fsub fast float %20, %180
-  %259 = fdiv fast float %258, %255
-  %260 = fptoui float %259 to i32
-  %261 = uitofp i32 %260 to float
-  %262 = fmul fast float %261, %255
-  %263 = fsub fast float %258, %262
-  %264 = and i32 %260, 1
-  %265 = icmp eq i32 %264, 0
-  br i1 %265, label %266, label %268
-
-; <label>:266                                     ; preds = %257
-  %267 = fadd fast float %263, %20
-  br label %285
-
-; <label>:268                                     ; preds = %257
-  %269 = fsub fast float %22, %263
-  br label %285
-
-; <label>:270                                     ; preds = %254
-  %271 = fcmp fast ogt float %180, %22
-  br i1 %271, label %272, label %285
-
-; <label>:272                                     ; preds = %270
-  %273 = fsub fast float %180, %22
-  %274 = fdiv fast float %273, %255
-  %275 = fptoui float %274 to i32
-  %276 = uitofp i32 %275 to float
-  %277 = fmul fast float %276, %255
-  %278 = fsub fast float %273, %277
-  %279 = and i32 %275, 1
-  %280 = icmp eq i32 %279, 0
-  br i1 %280, label %281, label %283
-
-; <label>:281                                     ; preds = %272
-  %282 = fsub fast float %22, %278
-  br label %285
-
-; <label>:283                                     ; preds = %272
-  %284 = fadd fast float %278, %20
-  br label %285
-
-; <label>:285                                     ; preds = %283, %281, %270, %268, %266
-  %286 = phi float [ %267, %266 ], [ %269, %268 ], [ %282, %281 ], [ %284, %283 ], [ %180, %270 ]
-  %287 = fptoui float %286 to i32
-  %288 = fsub fast float %24, %20
-  %289 = fcmp fast olt float %181, %20
-  br i1 %289, label %290, label %303
-
-; <label>:290                                     ; preds = %285
-  %291 = fsub fast float %20, %181
-  %292 = fdiv fast float %291, %288
-  %293 = fptoui float %292 to i32
-  %294 = uitofp i32 %293 to float
-  %295 = fmul fast float %294, %288
-  %296 = fsub fast float %291, %295
-  %297 = and i32 %293, 1
-  %298 = icmp eq i32 %297, 0
-  br i1 %298, label %299, label %301
-
-; <label>:299                                     ; preds = %290
-  %300 = fadd fast float %296, %20
-  br label %318
-
-; <label>:301                                     ; preds = %290
-  %302 = fsub fast float %24, %296
-  br label %318
-
-; <label>:303                                     ; preds = %285
-  %304 = fcmp fast ogt float %181, %24
-  br i1 %304, label %305, label %318
-
-; <label>:305                                     ; preds = %303
-  %306 = fsub fast float %181, %24
-  %307 = fdiv fast float %306, %288
-  %308 = fptoui float %307 to i32
-  %309 = uitofp i32 %308 to float
-  %310 = fmul fast float %309, %288
-  %311 = fsub fast float %306, %310
-  %312 = and i32 %308, 1
-  %313 = icmp eq i32 %312, 0
-  br i1 %313, label %314, label %316
-
-; <label>:314                                     ; preds = %305
-  %315 = fsub fast float %24, %311
-  br label %318
-
-; <label>:316                                     ; preds = %305
-  %317 = fadd fast float %311, %20
-  br label %318
-
-; <label>:318                                     ; preds = %316, %314, %303, %301, %299
-  %319 = phi float [ %300, %299 ], [ %302, %301 ], [ %315, %314 ], [ %317, %316 ], [ %181, %303 ]
-  %320 = fptoui float %319 to i32
-  %321 = uitofp i32 %320 to float
-  %322 = uitofp i32 %287 to float
-  %323 = fptoui float %45 to i32
-  %324 = fptoui float %182 to i32
-  %325 = fptoui float %321 to i32
-  %326 = fptoui float %322 to i32
-  %327 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %328 = extractvalue %dx.types.CBufRet.i32 %327, 0
-  %329 = extractvalue %dx.types.CBufRet.i32 %327, 1
-  %330 = extractvalue %dx.types.CBufRet.i32 %327, 2
-  %331 = extractvalue %dx.types.CBufRet.i32 %327, 3
-  %332 = mul i32 %328, %323
-  %333 = call i32 @dx.op.tertiary.i32(i32 48, i32 %324, i32 %329, i32 %332)  ; IMad(a,b,c)
-  %334 = call i32 @dx.op.tertiary.i32(i32 48, i32 %325, i32 %330, i32 %333)  ; IMad(a,b,c)
-  %335 = call i32 @dx.op.tertiary.i32(i32 48, i32 %326, i32 %331, i32 %334)  ; IMad(a,b,c)
-  %336 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %335, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %337 = extractvalue %dx.types.ResRet.i32 %336, 0
-  %338 = extractvalue %dx.types.ResRet.i32 %336, 1
-  %339 = zext i32 %337 to i64
-  %340 = zext i32 %338 to i64
-  %341 = shl i64 %340, 32
-  %342 = or i64 %339, %341
-  %343 = sitofp i64 %342 to float
-  br label %344
-
-; <label>:344                                     ; preds = %318, %252, %218, %196, %186
-  %345 = phi float [ %215, %196 ], [ 0.000000e+00, %186 ], [ %251, %218 ], [ %343, %318 ], [ 0.000000e+00, %252 ]
-  %346 = fptosi float %345 to i64
-  %347 = trunc i64 %346 to i32
-  %348 = lshr i64 %346, 32
-  %349 = trunc i64 %348 to i32
-  call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i32 %347, i32 %349, i32 undef, i32 undef, i8 3, i32 8)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3647
-
-; <label>:350                                     ; preds = %179
-  %351 = icmp eq i32 %91, 0
-  br i1 %351, label %352, label %1014
-
-; <label>:352                                     ; preds = %350
-  %353 = call float @dx.op.unary.f32(i32 27, float %180)  ; Round_ni(value)
-  %354 = call float @dx.op.unary.f32(i32 27, float %181)  ; Round_ni(value)
-  %355 = fadd fast float %353, 1.000000e+00
-  %356 = fadd fast float %354, 1.000000e+00
-  %357 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %358 = icmp eq i32 %357, 0
-  br i1 %358, label %359, label %389
-
-; <label>:359                                     ; preds = %352
-  %360 = fcmp fast oge float %353, 0.000000e+00
-  %361 = fptoui float %353 to i32
-  %362 = icmp ult i32 %361, %13
-  %363 = and i1 %360, %362
-  %364 = fcmp fast oge float %354, 0.000000e+00
-  %365 = and i1 %364, %363
-  %366 = fptoui float %354 to i32
-  %367 = icmp ult i32 %366, %15
-  %368 = and i1 %367, %365
-  br i1 %368, label %369, label %517
-
-; <label>:369                                     ; preds = %359
-  %370 = fptoui float %45 to i32
-  %371 = fptoui float %182 to i32
-  %372 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %373 = extractvalue %dx.types.CBufRet.i32 %372, 0
-  %374 = extractvalue %dx.types.CBufRet.i32 %372, 1
-  %375 = extractvalue %dx.types.CBufRet.i32 %372, 2
-  %376 = extractvalue %dx.types.CBufRet.i32 %372, 3
-  %377 = mul i32 %373, %370
-  %378 = call i32 @dx.op.tertiary.i32(i32 48, i32 %371, i32 %374, i32 %377)  ; IMad(a,b,c)
-  %379 = call i32 @dx.op.tertiary.i32(i32 48, i32 %366, i32 %375, i32 %378)  ; IMad(a,b,c)
-  %380 = call i32 @dx.op.tertiary.i32(i32 48, i32 %361, i32 %376, i32 %379)  ; IMad(a,b,c)
-  %381 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %380, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %382 = extractvalue %dx.types.ResRet.i32 %381, 0
-  %383 = extractvalue %dx.types.ResRet.i32 %381, 1
-  %384 = zext i32 %382 to i64
-  %385 = zext i32 %383 to i64
-  %386 = shl i64 %385, 32
-  %387 = or i64 %384, %386
-  %388 = sitofp i64 %387 to float
-  br label %517
-
-; <label>:389                                     ; preds = %352
-  %390 = icmp eq i32 %357, 1
-  br i1 %390, label %391, label %425
-
-; <label>:391                                     ; preds = %389
-  %392 = add i32 %13, -1
-  %393 = uitofp i32 %392 to float
-  %394 = call float @dx.op.binary.f32(i32 35, float %353, float 0.000000e+00)  ; FMax(a,b)
-  %395 = call float @dx.op.binary.f32(i32 36, float %394, float %393)  ; FMin(a,b)
-  %396 = fptoui float %395 to i32
-  %397 = add i32 %15, -1
-  %398 = uitofp i32 %397 to float
-  %399 = call float @dx.op.binary.f32(i32 35, float %354, float 0.000000e+00)  ; FMax(a,b)
-  %400 = call float @dx.op.binary.f32(i32 36, float %399, float %398)  ; FMin(a,b)
-  %401 = fptoui float %400 to i32
-  %402 = uitofp i32 %401 to float
-  %403 = uitofp i32 %396 to float
-  %404 = fptoui float %45 to i32
-  %405 = fptoui float %182 to i32
-  %406 = fptoui float %402 to i32
-  %407 = fptoui float %403 to i32
-  %408 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %409 = extractvalue %dx.types.CBufRet.i32 %408, 0
-  %410 = extractvalue %dx.types.CBufRet.i32 %408, 1
-  %411 = extractvalue %dx.types.CBufRet.i32 %408, 2
-  %412 = extractvalue %dx.types.CBufRet.i32 %408, 3
-  %413 = mul i32 %409, %404
-  %414 = call i32 @dx.op.tertiary.i32(i32 48, i32 %405, i32 %410, i32 %413)  ; IMad(a,b,c)
-  %415 = call i32 @dx.op.tertiary.i32(i32 48, i32 %406, i32 %411, i32 %414)  ; IMad(a,b,c)
-  %416 = call i32 @dx.op.tertiary.i32(i32 48, i32 %407, i32 %412, i32 %415)  ; IMad(a,b,c)
-  %417 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %416, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %418 = extractvalue %dx.types.ResRet.i32 %417, 0
-  %419 = extractvalue %dx.types.ResRet.i32 %417, 1
-  %420 = zext i32 %418 to i64
-  %421 = zext i32 %419 to i64
-  %422 = shl i64 %421, 32
-  %423 = or i64 %420, %422
-  %424 = sitofp i64 %423 to float
-  br label %517
-
-; <label>:425                                     ; preds = %389
-  %426 = icmp eq i32 %357, 2
-  br i1 %426, label %427, label %517
-
-; <label>:427                                     ; preds = %425
-  %428 = fsub fast float %22, %20
-  %429 = fcmp fast olt float %353, %20
-  br i1 %429, label %430, label %443
-
-; <label>:430                                     ; preds = %427
-  %431 = fsub fast float %20, %353
-  %432 = fdiv fast float %431, %428
-  %433 = fptoui float %432 to i32
-  %434 = uitofp i32 %433 to float
-  %435 = fmul fast float %434, %428
-  %436 = fsub fast float %431, %435
-  %437 = and i32 %433, 1
-  %438 = icmp eq i32 %437, 0
-  br i1 %438, label %439, label %441
-
-; <label>:439                                     ; preds = %430
-  %440 = fadd fast float %436, %20
-  br label %458
-
-; <label>:441                                     ; preds = %430
-  %442 = fsub fast float %22, %436
-  br label %458
-
-; <label>:443                                     ; preds = %427
-  %444 = fcmp fast ogt float %353, %22
-  br i1 %444, label %445, label %458
-
-; <label>:445                                     ; preds = %443
-  %446 = fsub fast float %353, %22
-  %447 = fdiv fast float %446, %428
-  %448 = fptoui float %447 to i32
-  %449 = uitofp i32 %448 to float
-  %450 = fmul fast float %449, %428
-  %451 = fsub fast float %446, %450
-  %452 = and i32 %448, 1
-  %453 = icmp eq i32 %452, 0
-  br i1 %453, label %454, label %456
-
-; <label>:454                                     ; preds = %445
-  %455 = fsub fast float %22, %451
-  br label %458
-
-; <label>:456                                     ; preds = %445
-  %457 = fadd fast float %451, %20
-  br label %458
-
-; <label>:458                                     ; preds = %456, %454, %443, %441, %439
-  %459 = phi float [ %440, %439 ], [ %442, %441 ], [ %455, %454 ], [ %457, %456 ], [ %353, %443 ]
-  %460 = fptoui float %459 to i32
-  %461 = fsub fast float %24, %20
-  %462 = fcmp fast olt float %354, %20
-  br i1 %462, label %463, label %476
-
-; <label>:463                                     ; preds = %458
-  %464 = fsub fast float %20, %354
-  %465 = fdiv fast float %464, %461
-  %466 = fptoui float %465 to i32
-  %467 = uitofp i32 %466 to float
-  %468 = fmul fast float %467, %461
-  %469 = fsub fast float %464, %468
-  %470 = and i32 %466, 1
-  %471 = icmp eq i32 %470, 0
-  br i1 %471, label %472, label %474
-
-; <label>:472                                     ; preds = %463
-  %473 = fadd fast float %469, %20
-  br label %491
-
-; <label>:474                                     ; preds = %463
-  %475 = fsub fast float %24, %469
-  br label %491
-
-; <label>:476                                     ; preds = %458
-  %477 = fcmp fast ogt float %354, %24
-  br i1 %477, label %478, label %491
-
-; <label>:478                                     ; preds = %476
-  %479 = fsub fast float %354, %24
-  %480 = fdiv fast float %479, %461
-  %481 = fptoui float %480 to i32
-  %482 = uitofp i32 %481 to float
-  %483 = fmul fast float %482, %461
-  %484 = fsub fast float %479, %483
-  %485 = and i32 %481, 1
-  %486 = icmp eq i32 %485, 0
-  br i1 %486, label %487, label %489
-
-; <label>:487                                     ; preds = %478
-  %488 = fsub fast float %24, %484
-  br label %491
-
-; <label>:489                                     ; preds = %478
-  %490 = fadd fast float %484, %20
-  br label %491
-
-; <label>:491                                     ; preds = %489, %487, %476, %474, %472
-  %492 = phi float [ %473, %472 ], [ %475, %474 ], [ %488, %487 ], [ %490, %489 ], [ %354, %476 ]
-  %493 = fptoui float %492 to i32
-  %494 = uitofp i32 %493 to float
-  %495 = uitofp i32 %460 to float
-  %496 = fptoui float %45 to i32
-  %497 = fptoui float %182 to i32
-  %498 = fptoui float %494 to i32
-  %499 = fptoui float %495 to i32
-  %500 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %501 = extractvalue %dx.types.CBufRet.i32 %500, 0
-  %502 = extractvalue %dx.types.CBufRet.i32 %500, 1
-  %503 = extractvalue %dx.types.CBufRet.i32 %500, 2
-  %504 = extractvalue %dx.types.CBufRet.i32 %500, 3
-  %505 = mul i32 %501, %496
-  %506 = call i32 @dx.op.tertiary.i32(i32 48, i32 %497, i32 %502, i32 %505)  ; IMad(a,b,c)
-  %507 = call i32 @dx.op.tertiary.i32(i32 48, i32 %498, i32 %503, i32 %506)  ; IMad(a,b,c)
-  %508 = call i32 @dx.op.tertiary.i32(i32 48, i32 %499, i32 %504, i32 %507)  ; IMad(a,b,c)
-  %509 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %508, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %510 = extractvalue %dx.types.ResRet.i32 %509, 0
-  %511 = extractvalue %dx.types.ResRet.i32 %509, 1
-  %512 = zext i32 %510 to i64
-  %513 = zext i32 %511 to i64
-  %514 = shl i64 %513, 32
-  %515 = or i64 %512, %514
-  %516 = sitofp i64 %515 to float
-  br label %517
-
-; <label>:517                                     ; preds = %491, %425, %391, %369, %359
-  %518 = phi float [ %388, %369 ], [ 0.000000e+00, %359 ], [ %424, %391 ], [ %516, %491 ], [ 0.000000e+00, %425 ]
-  br i1 %358, label %519, label %549
-
-; <label>:519                                     ; preds = %517
-  %520 = fcmp fast oge float %355, 0.000000e+00
-  %521 = fptoui float %355 to i32
-  %522 = icmp ult i32 %521, %13
-  %523 = and i1 %520, %522
-  %524 = fcmp fast oge float %354, 0.000000e+00
-  %525 = and i1 %524, %523
-  %526 = fptoui float %354 to i32
-  %527 = icmp ult i32 %526, %15
-  %528 = and i1 %527, %525
-  br i1 %528, label %529, label %677
-
-; <label>:529                                     ; preds = %519
-  %530 = fptoui float %45 to i32
-  %531 = fptoui float %182 to i32
-  %532 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %533 = extractvalue %dx.types.CBufRet.i32 %532, 0
-  %534 = extractvalue %dx.types.CBufRet.i32 %532, 1
-  %535 = extractvalue %dx.types.CBufRet.i32 %532, 2
-  %536 = extractvalue %dx.types.CBufRet.i32 %532, 3
-  %537 = mul i32 %533, %530
-  %538 = call i32 @dx.op.tertiary.i32(i32 48, i32 %531, i32 %534, i32 %537)  ; IMad(a,b,c)
-  %539 = call i32 @dx.op.tertiary.i32(i32 48, i32 %526, i32 %535, i32 %538)  ; IMad(a,b,c)
-  %540 = call i32 @dx.op.tertiary.i32(i32 48, i32 %521, i32 %536, i32 %539)  ; IMad(a,b,c)
-  %541 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %540, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %542 = extractvalue %dx.types.ResRet.i32 %541, 0
-  %543 = extractvalue %dx.types.ResRet.i32 %541, 1
-  %544 = zext i32 %542 to i64
-  %545 = zext i32 %543 to i64
-  %546 = shl i64 %545, 32
-  %547 = or i64 %544, %546
-  %548 = sitofp i64 %547 to float
-  br label %677
-
-; <label>:549                                     ; preds = %517
-  %550 = icmp eq i32 %357, 1
-  br i1 %550, label %551, label %585
-
-; <label>:551                                     ; preds = %549
-  %552 = add i32 %13, -1
-  %553 = uitofp i32 %552 to float
-  %554 = call float @dx.op.binary.f32(i32 35, float %355, float 0.000000e+00)  ; FMax(a,b)
-  %555 = call float @dx.op.binary.f32(i32 36, float %554, float %553)  ; FMin(a,b)
-  %556 = fptoui float %555 to i32
-  %557 = add i32 %15, -1
-  %558 = uitofp i32 %557 to float
-  %559 = call float @dx.op.binary.f32(i32 35, float %354, float 0.000000e+00)  ; FMax(a,b)
-  %560 = call float @dx.op.binary.f32(i32 36, float %559, float %558)  ; FMin(a,b)
-  %561 = fptoui float %560 to i32
-  %562 = uitofp i32 %561 to float
-  %563 = uitofp i32 %556 to float
-  %564 = fptoui float %45 to i32
-  %565 = fptoui float %182 to i32
-  %566 = fptoui float %562 to i32
-  %567 = fptoui float %563 to i32
-  %568 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %569 = extractvalue %dx.types.CBufRet.i32 %568, 0
-  %570 = extractvalue %dx.types.CBufRet.i32 %568, 1
-  %571 = extractvalue %dx.types.CBufRet.i32 %568, 2
-  %572 = extractvalue %dx.types.CBufRet.i32 %568, 3
-  %573 = mul i32 %569, %564
-  %574 = call i32 @dx.op.tertiary.i32(i32 48, i32 %565, i32 %570, i32 %573)  ; IMad(a,b,c)
-  %575 = call i32 @dx.op.tertiary.i32(i32 48, i32 %566, i32 %571, i32 %574)  ; IMad(a,b,c)
-  %576 = call i32 @dx.op.tertiary.i32(i32 48, i32 %567, i32 %572, i32 %575)  ; IMad(a,b,c)
-  %577 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %576, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %578 = extractvalue %dx.types.ResRet.i32 %577, 0
-  %579 = extractvalue %dx.types.ResRet.i32 %577, 1
-  %580 = zext i32 %578 to i64
-  %581 = zext i32 %579 to i64
-  %582 = shl i64 %581, 32
-  %583 = or i64 %580, %582
-  %584 = sitofp i64 %583 to float
-  br label %677
-
-; <label>:585                                     ; preds = %549
-  %586 = icmp eq i32 %357, 2
-  br i1 %586, label %587, label %677
-
-; <label>:587                                     ; preds = %585
-  %588 = fsub fast float %22, %20
-  %589 = fcmp fast olt float %355, %20
-  br i1 %589, label %590, label %603
-
-; <label>:590                                     ; preds = %587
-  %591 = fsub fast float %20, %355
-  %592 = fdiv fast float %591, %588
-  %593 = fptoui float %592 to i32
-  %594 = uitofp i32 %593 to float
-  %595 = fmul fast float %594, %588
-  %596 = fsub fast float %591, %595
-  %597 = and i32 %593, 1
-  %598 = icmp eq i32 %597, 0
-  br i1 %598, label %599, label %601
-
-; <label>:599                                     ; preds = %590
-  %600 = fadd fast float %596, %20
-  br label %618
-
-; <label>:601                                     ; preds = %590
-  %602 = fsub fast float %22, %596
-  br label %618
-
-; <label>:603                                     ; preds = %587
-  %604 = fcmp fast ogt float %355, %22
-  br i1 %604, label %605, label %618
-
-; <label>:605                                     ; preds = %603
-  %606 = fsub fast float %355, %22
-  %607 = fdiv fast float %606, %588
-  %608 = fptoui float %607 to i32
-  %609 = uitofp i32 %608 to float
-  %610 = fmul fast float %609, %588
-  %611 = fsub fast float %606, %610
-  %612 = and i32 %608, 1
-  %613 = icmp eq i32 %612, 0
-  br i1 %613, label %614, label %616
-
-; <label>:614                                     ; preds = %605
-  %615 = fsub fast float %22, %611
-  br label %618
-
-; <label>:616                                     ; preds = %605
-  %617 = fadd fast float %611, %20
-  br label %618
-
-; <label>:618                                     ; preds = %616, %614, %603, %601, %599
-  %619 = phi float [ %600, %599 ], [ %602, %601 ], [ %615, %614 ], [ %617, %616 ], [ %355, %603 ]
-  %620 = fptoui float %619 to i32
-  %621 = fsub fast float %24, %20
-  %622 = fcmp fast olt float %354, %20
-  br i1 %622, label %623, label %636
-
-; <label>:623                                     ; preds = %618
-  %624 = fsub fast float %20, %354
-  %625 = fdiv fast float %624, %621
-  %626 = fptoui float %625 to i32
-  %627 = uitofp i32 %626 to float
-  %628 = fmul fast float %627, %621
-  %629 = fsub fast float %624, %628
-  %630 = and i32 %626, 1
-  %631 = icmp eq i32 %630, 0
-  br i1 %631, label %632, label %634
-
-; <label>:632                                     ; preds = %623
-  %633 = fadd fast float %629, %20
-  br label %651
-
-; <label>:634                                     ; preds = %623
-  %635 = fsub fast float %24, %629
-  br label %651
-
-; <label>:636                                     ; preds = %618
-  %637 = fcmp fast ogt float %354, %24
-  br i1 %637, label %638, label %651
-
-; <label>:638                                     ; preds = %636
-  %639 = fsub fast float %354, %24
-  %640 = fdiv fast float %639, %621
-  %641 = fptoui float %640 to i32
-  %642 = uitofp i32 %641 to float
-  %643 = fmul fast float %642, %621
-  %644 = fsub fast float %639, %643
-  %645 = and i32 %641, 1
-  %646 = icmp eq i32 %645, 0
-  br i1 %646, label %647, label %649
-
-; <label>:647                                     ; preds = %638
-  %648 = fsub fast float %24, %644
-  br label %651
-
-; <label>:649                                     ; preds = %638
-  %650 = fadd fast float %644, %20
-  br label %651
-
-; <label>:651                                     ; preds = %649, %647, %636, %634, %632
-  %652 = phi float [ %633, %632 ], [ %635, %634 ], [ %648, %647 ], [ %650, %649 ], [ %354, %636 ]
-  %653 = fptoui float %652 to i32
-  %654 = uitofp i32 %653 to float
-  %655 = uitofp i32 %620 to float
-  %656 = fptoui float %45 to i32
-  %657 = fptoui float %182 to i32
-  %658 = fptoui float %654 to i32
-  %659 = fptoui float %655 to i32
-  %660 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %661 = extractvalue %dx.types.CBufRet.i32 %660, 0
-  %662 = extractvalue %dx.types.CBufRet.i32 %660, 1
-  %663 = extractvalue %dx.types.CBufRet.i32 %660, 2
-  %664 = extractvalue %dx.types.CBufRet.i32 %660, 3
-  %665 = mul i32 %661, %656
-  %666 = call i32 @dx.op.tertiary.i32(i32 48, i32 %657, i32 %662, i32 %665)  ; IMad(a,b,c)
-  %667 = call i32 @dx.op.tertiary.i32(i32 48, i32 %658, i32 %663, i32 %666)  ; IMad(a,b,c)
-  %668 = call i32 @dx.op.tertiary.i32(i32 48, i32 %659, i32 %664, i32 %667)  ; IMad(a,b,c)
-  %669 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %668, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %670 = extractvalue %dx.types.ResRet.i32 %669, 0
-  %671 = extractvalue %dx.types.ResRet.i32 %669, 1
-  %672 = zext i32 %670 to i64
-  %673 = zext i32 %671 to i64
-  %674 = shl i64 %673, 32
-  %675 = or i64 %672, %674
-  %676 = sitofp i64 %675 to float
-  br label %677
-
-; <label>:677                                     ; preds = %651, %585, %551, %529, %519
-  %678 = phi float [ %548, %529 ], [ 0.000000e+00, %519 ], [ %584, %551 ], [ %676, %651 ], [ 0.000000e+00, %585 ]
-  br i1 %358, label %679, label %709
-
-; <label>:679                                     ; preds = %677
-  %680 = fcmp fast oge float %353, 0.000000e+00
-  %681 = fptoui float %353 to i32
-  %682 = icmp ult i32 %681, %13
-  %683 = and i1 %680, %682
-  %684 = fcmp fast oge float %356, 0.000000e+00
-  %685 = and i1 %684, %683
-  %686 = fptoui float %356 to i32
-  %687 = icmp ult i32 %686, %15
-  %688 = and i1 %687, %685
-  br i1 %688, label %689, label %837
-
-; <label>:689                                     ; preds = %679
-  %690 = fptoui float %45 to i32
-  %691 = fptoui float %182 to i32
-  %692 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %693 = extractvalue %dx.types.CBufRet.i32 %692, 0
-  %694 = extractvalue %dx.types.CBufRet.i32 %692, 1
-  %695 = extractvalue %dx.types.CBufRet.i32 %692, 2
-  %696 = extractvalue %dx.types.CBufRet.i32 %692, 3
-  %697 = mul i32 %693, %690
-  %698 = call i32 @dx.op.tertiary.i32(i32 48, i32 %691, i32 %694, i32 %697)  ; IMad(a,b,c)
-  %699 = call i32 @dx.op.tertiary.i32(i32 48, i32 %686, i32 %695, i32 %698)  ; IMad(a,b,c)
-  %700 = call i32 @dx.op.tertiary.i32(i32 48, i32 %681, i32 %696, i32 %699)  ; IMad(a,b,c)
-  %701 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %700, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %702 = extractvalue %dx.types.ResRet.i32 %701, 0
-  %703 = extractvalue %dx.types.ResRet.i32 %701, 1
-  %704 = zext i32 %702 to i64
-  %705 = zext i32 %703 to i64
-  %706 = shl i64 %705, 32
-  %707 = or i64 %704, %706
-  %708 = sitofp i64 %707 to float
-  br label %837
-
-; <label>:709                                     ; preds = %677
-  %710 = icmp eq i32 %357, 1
-  br i1 %710, label %711, label %745
-
-; <label>:711                                     ; preds = %709
-  %712 = add i32 %13, -1
-  %713 = uitofp i32 %712 to float
-  %714 = call float @dx.op.binary.f32(i32 35, float %353, float 0.000000e+00)  ; FMax(a,b)
-  %715 = call float @dx.op.binary.f32(i32 36, float %714, float %713)  ; FMin(a,b)
-  %716 = fptoui float %715 to i32
-  %717 = add i32 %15, -1
-  %718 = uitofp i32 %717 to float
-  %719 = call float @dx.op.binary.f32(i32 35, float %356, float 0.000000e+00)  ; FMax(a,b)
-  %720 = call float @dx.op.binary.f32(i32 36, float %719, float %718)  ; FMin(a,b)
-  %721 = fptoui float %720 to i32
-  %722 = uitofp i32 %721 to float
-  %723 = uitofp i32 %716 to float
-  %724 = fptoui float %45 to i32
-  %725 = fptoui float %182 to i32
-  %726 = fptoui float %722 to i32
-  %727 = fptoui float %723 to i32
-  %728 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %729 = extractvalue %dx.types.CBufRet.i32 %728, 0
-  %730 = extractvalue %dx.types.CBufRet.i32 %728, 1
-  %731 = extractvalue %dx.types.CBufRet.i32 %728, 2
-  %732 = extractvalue %dx.types.CBufRet.i32 %728, 3
-  %733 = mul i32 %729, %724
-  %734 = call i32 @dx.op.tertiary.i32(i32 48, i32 %725, i32 %730, i32 %733)  ; IMad(a,b,c)
-  %735 = call i32 @dx.op.tertiary.i32(i32 48, i32 %726, i32 %731, i32 %734)  ; IMad(a,b,c)
-  %736 = call i32 @dx.op.tertiary.i32(i32 48, i32 %727, i32 %732, i32 %735)  ; IMad(a,b,c)
-  %737 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %736, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %738 = extractvalue %dx.types.ResRet.i32 %737, 0
-  %739 = extractvalue %dx.types.ResRet.i32 %737, 1
-  %740 = zext i32 %738 to i64
-  %741 = zext i32 %739 to i64
-  %742 = shl i64 %741, 32
-  %743 = or i64 %740, %742
-  %744 = sitofp i64 %743 to float
-  br label %837
-
-; <label>:745                                     ; preds = %709
-  %746 = icmp eq i32 %357, 2
-  br i1 %746, label %747, label %837
-
-; <label>:747                                     ; preds = %745
-  %748 = fsub fast float %22, %20
-  %749 = fcmp fast olt float %353, %20
-  br i1 %749, label %750, label %763
-
-; <label>:750                                     ; preds = %747
-  %751 = fsub fast float %20, %353
-  %752 = fdiv fast float %751, %748
-  %753 = fptoui float %752 to i32
-  %754 = uitofp i32 %753 to float
-  %755 = fmul fast float %754, %748
-  %756 = fsub fast float %751, %755
-  %757 = and i32 %753, 1
-  %758 = icmp eq i32 %757, 0
-  br i1 %758, label %759, label %761
-
-; <label>:759                                     ; preds = %750
-  %760 = fadd fast float %756, %20
-  br label %778
-
-; <label>:761                                     ; preds = %750
-  %762 = fsub fast float %22, %756
-  br label %778
-
-; <label>:763                                     ; preds = %747
-  %764 = fcmp fast ogt float %353, %22
-  br i1 %764, label %765, label %778
-
-; <label>:765                                     ; preds = %763
-  %766 = fsub fast float %353, %22
-  %767 = fdiv fast float %766, %748
-  %768 = fptoui float %767 to i32
-  %769 = uitofp i32 %768 to float
-  %770 = fmul fast float %769, %748
-  %771 = fsub fast float %766, %770
-  %772 = and i32 %768, 1
-  %773 = icmp eq i32 %772, 0
-  br i1 %773, label %774, label %776
-
-; <label>:774                                     ; preds = %765
-  %775 = fsub fast float %22, %771
-  br label %778
-
-; <label>:776                                     ; preds = %765
-  %777 = fadd fast float %771, %20
-  br label %778
-
-; <label>:778                                     ; preds = %776, %774, %763, %761, %759
-  %779 = phi float [ %760, %759 ], [ %762, %761 ], [ %775, %774 ], [ %777, %776 ], [ %353, %763 ]
-  %780 = fptoui float %779 to i32
-  %781 = fsub fast float %24, %20
-  %782 = fcmp fast olt float %356, %20
-  br i1 %782, label %783, label %796
-
-; <label>:783                                     ; preds = %778
-  %784 = fsub fast float %20, %356
-  %785 = fdiv fast float %784, %781
-  %786 = fptoui float %785 to i32
-  %787 = uitofp i32 %786 to float
-  %788 = fmul fast float %787, %781
-  %789 = fsub fast float %784, %788
-  %790 = and i32 %786, 1
-  %791 = icmp eq i32 %790, 0
-  br i1 %791, label %792, label %794
-
-; <label>:792                                     ; preds = %783
-  %793 = fadd fast float %789, %20
-  br label %811
-
-; <label>:794                                     ; preds = %783
-  %795 = fsub fast float %24, %789
-  br label %811
-
-; <label>:796                                     ; preds = %778
-  %797 = fcmp fast ogt float %356, %24
-  br i1 %797, label %798, label %811
-
-; <label>:798                                     ; preds = %796
-  %799 = fsub fast float %356, %24
-  %800 = fdiv fast float %799, %781
-  %801 = fptoui float %800 to i32
-  %802 = uitofp i32 %801 to float
-  %803 = fmul fast float %802, %781
-  %804 = fsub fast float %799, %803
-  %805 = and i32 %801, 1
-  %806 = icmp eq i32 %805, 0
-  br i1 %806, label %807, label %809
-
-; <label>:807                                     ; preds = %798
-  %808 = fsub fast float %24, %804
-  br label %811
-
-; <label>:809                                     ; preds = %798
-  %810 = fadd fast float %804, %20
-  br label %811
-
-; <label>:811                                     ; preds = %809, %807, %796, %794, %792
-  %812 = phi float [ %793, %792 ], [ %795, %794 ], [ %808, %807 ], [ %810, %809 ], [ %356, %796 ]
-  %813 = fptoui float %812 to i32
-  %814 = uitofp i32 %813 to float
-  %815 = uitofp i32 %780 to float
-  %816 = fptoui float %45 to i32
-  %817 = fptoui float %182 to i32
-  %818 = fptoui float %814 to i32
-  %819 = fptoui float %815 to i32
-  %820 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %821 = extractvalue %dx.types.CBufRet.i32 %820, 0
-  %822 = extractvalue %dx.types.CBufRet.i32 %820, 1
-  %823 = extractvalue %dx.types.CBufRet.i32 %820, 2
-  %824 = extractvalue %dx.types.CBufRet.i32 %820, 3
-  %825 = mul i32 %821, %816
-  %826 = call i32 @dx.op.tertiary.i32(i32 48, i32 %817, i32 %822, i32 %825)  ; IMad(a,b,c)
-  %827 = call i32 @dx.op.tertiary.i32(i32 48, i32 %818, i32 %823, i32 %826)  ; IMad(a,b,c)
-  %828 = call i32 @dx.op.tertiary.i32(i32 48, i32 %819, i32 %824, i32 %827)  ; IMad(a,b,c)
-  %829 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %828, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %830 = extractvalue %dx.types.ResRet.i32 %829, 0
-  %831 = extractvalue %dx.types.ResRet.i32 %829, 1
-  %832 = zext i32 %830 to i64
-  %833 = zext i32 %831 to i64
-  %834 = shl i64 %833, 32
-  %835 = or i64 %832, %834
-  %836 = sitofp i64 %835 to float
-  br label %837
-
-; <label>:837                                     ; preds = %811, %745, %711, %689, %679
-  %838 = phi float [ %708, %689 ], [ 0.000000e+00, %679 ], [ %744, %711 ], [ %836, %811 ], [ 0.000000e+00, %745 ]
-  br i1 %358, label %839, label %869
-
-; <label>:839                                     ; preds = %837
-  %840 = fcmp fast oge float %355, 0.000000e+00
-  %841 = fptoui float %355 to i32
-  %842 = icmp ult i32 %841, %13
-  %843 = and i1 %840, %842
-  %844 = fcmp fast oge float %356, 0.000000e+00
-  %845 = and i1 %844, %843
-  %846 = fptoui float %356 to i32
-  %847 = icmp ult i32 %846, %15
-  %848 = and i1 %847, %845
-  br i1 %848, label %849, label %997
-
-; <label>:849                                     ; preds = %839
-  %850 = fptoui float %45 to i32
-  %851 = fptoui float %182 to i32
-  %852 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %853 = extractvalue %dx.types.CBufRet.i32 %852, 0
-  %854 = extractvalue %dx.types.CBufRet.i32 %852, 1
-  %855 = extractvalue %dx.types.CBufRet.i32 %852, 2
-  %856 = extractvalue %dx.types.CBufRet.i32 %852, 3
-  %857 = mul i32 %853, %850
-  %858 = call i32 @dx.op.tertiary.i32(i32 48, i32 %851, i32 %854, i32 %857)  ; IMad(a,b,c)
-  %859 = call i32 @dx.op.tertiary.i32(i32 48, i32 %846, i32 %855, i32 %858)  ; IMad(a,b,c)
-  %860 = call i32 @dx.op.tertiary.i32(i32 48, i32 %841, i32 %856, i32 %859)  ; IMad(a,b,c)
-  %861 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %860, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %862 = extractvalue %dx.types.ResRet.i32 %861, 0
-  %863 = extractvalue %dx.types.ResRet.i32 %861, 1
-  %864 = zext i32 %862 to i64
-  %865 = zext i32 %863 to i64
-  %866 = shl i64 %865, 32
-  %867 = or i64 %864, %866
-  %868 = sitofp i64 %867 to float
-  br label %997
-
-; <label>:869                                     ; preds = %837
-  %870 = icmp eq i32 %357, 1
-  br i1 %870, label %871, label %905
-
-; <label>:871                                     ; preds = %869
-  %872 = add i32 %13, -1
-  %873 = uitofp i32 %872 to float
-  %874 = call float @dx.op.binary.f32(i32 35, float %355, float 0.000000e+00)  ; FMax(a,b)
-  %875 = call float @dx.op.binary.f32(i32 36, float %874, float %873)  ; FMin(a,b)
-  %876 = fptoui float %875 to i32
-  %877 = add i32 %15, -1
-  %878 = uitofp i32 %877 to float
-  %879 = call float @dx.op.binary.f32(i32 35, float %356, float 0.000000e+00)  ; FMax(a,b)
-  %880 = call float @dx.op.binary.f32(i32 36, float %879, float %878)  ; FMin(a,b)
-  %881 = fptoui float %880 to i32
-  %882 = uitofp i32 %881 to float
-  %883 = uitofp i32 %876 to float
-  %884 = fptoui float %45 to i32
-  %885 = fptoui float %182 to i32
-  %886 = fptoui float %882 to i32
-  %887 = fptoui float %883 to i32
-  %888 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %889 = extractvalue %dx.types.CBufRet.i32 %888, 0
-  %890 = extractvalue %dx.types.CBufRet.i32 %888, 1
-  %891 = extractvalue %dx.types.CBufRet.i32 %888, 2
-  %892 = extractvalue %dx.types.CBufRet.i32 %888, 3
-  %893 = mul i32 %889, %884
-  %894 = call i32 @dx.op.tertiary.i32(i32 48, i32 %885, i32 %890, i32 %893)  ; IMad(a,b,c)
-  %895 = call i32 @dx.op.tertiary.i32(i32 48, i32 %886, i32 %891, i32 %894)  ; IMad(a,b,c)
-  %896 = call i32 @dx.op.tertiary.i32(i32 48, i32 %887, i32 %892, i32 %895)  ; IMad(a,b,c)
-  %897 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %896, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %898 = extractvalue %dx.types.ResRet.i32 %897, 0
-  %899 = extractvalue %dx.types.ResRet.i32 %897, 1
-  %900 = zext i32 %898 to i64
-  %901 = zext i32 %899 to i64
-  %902 = shl i64 %901, 32
-  %903 = or i64 %900, %902
-  %904 = sitofp i64 %903 to float
-  br label %997
-
-; <label>:905                                     ; preds = %869
-  %906 = icmp eq i32 %357, 2
-  br i1 %906, label %907, label %997
-
-; <label>:907                                     ; preds = %905
-  %908 = fsub fast float %22, %20
-  %909 = fcmp fast olt float %355, %20
-  br i1 %909, label %910, label %923
-
-; <label>:910                                     ; preds = %907
-  %911 = fsub fast float %20, %355
-  %912 = fdiv fast float %911, %908
-  %913 = fptoui float %912 to i32
-  %914 = uitofp i32 %913 to float
-  %915 = fmul fast float %914, %908
-  %916 = fsub fast float %911, %915
-  %917 = and i32 %913, 1
-  %918 = icmp eq i32 %917, 0
-  br i1 %918, label %919, label %921
-
-; <label>:919                                     ; preds = %910
-  %920 = fadd fast float %916, %20
-  br label %938
-
-; <label>:921                                     ; preds = %910
-  %922 = fsub fast float %22, %916
-  br label %938
-
-; <label>:923                                     ; preds = %907
-  %924 = fcmp fast ogt float %355, %22
-  br i1 %924, label %925, label %938
-
-; <label>:925                                     ; preds = %923
-  %926 = fsub fast float %355, %22
-  %927 = fdiv fast float %926, %908
-  %928 = fptoui float %927 to i32
-  %929 = uitofp i32 %928 to float
-  %930 = fmul fast float %929, %908
-  %931 = fsub fast float %926, %930
-  %932 = and i32 %928, 1
-  %933 = icmp eq i32 %932, 0
-  br i1 %933, label %934, label %936
-
-; <label>:934                                     ; preds = %925
-  %935 = fsub fast float %22, %931
-  br label %938
-
-; <label>:936                                     ; preds = %925
-  %937 = fadd fast float %931, %20
-  br label %938
-
-; <label>:938                                     ; preds = %936, %934, %923, %921, %919
-  %939 = phi float [ %920, %919 ], [ %922, %921 ], [ %935, %934 ], [ %937, %936 ], [ %355, %923 ]
-  %940 = fptoui float %939 to i32
-  %941 = fsub fast float %24, %20
-  %942 = fcmp fast olt float %356, %20
-  br i1 %942, label %943, label %956
-
-; <label>:943                                     ; preds = %938
-  %944 = fsub fast float %20, %356
-  %945 = fdiv fast float %944, %941
-  %946 = fptoui float %945 to i32
-  %947 = uitofp i32 %946 to float
-  %948 = fmul fast float %947, %941
-  %949 = fsub fast float %944, %948
-  %950 = and i32 %946, 1
-  %951 = icmp eq i32 %950, 0
-  br i1 %951, label %952, label %954
-
-; <label>:952                                     ; preds = %943
-  %953 = fadd fast float %949, %20
-  br label %971
-
-; <label>:954                                     ; preds = %943
-  %955 = fsub fast float %24, %949
-  br label %971
-
-; <label>:956                                     ; preds = %938
-  %957 = fcmp fast ogt float %356, %24
-  br i1 %957, label %958, label %971
-
-; <label>:958                                     ; preds = %956
-  %959 = fsub fast float %356, %24
-  %960 = fdiv fast float %959, %941
-  %961 = fptoui float %960 to i32
-  %962 = uitofp i32 %961 to float
-  %963 = fmul fast float %962, %941
-  %964 = fsub fast float %959, %963
-  %965 = and i32 %961, 1
-  %966 = icmp eq i32 %965, 0
-  br i1 %966, label %967, label %969
-
-; <label>:967                                     ; preds = %958
-  %968 = fsub fast float %24, %964
-  br label %971
-
-; <label>:969                                     ; preds = %958
-  %970 = fadd fast float %964, %20
-  br label %971
-
-; <label>:971                                     ; preds = %969, %967, %956, %954, %952
-  %972 = phi float [ %953, %952 ], [ %955, %954 ], [ %968, %967 ], [ %970, %969 ], [ %356, %956 ]
-  %973 = fptoui float %972 to i32
-  %974 = uitofp i32 %973 to float
-  %975 = uitofp i32 %940 to float
-  %976 = fptoui float %45 to i32
-  %977 = fptoui float %182 to i32
-  %978 = fptoui float %974 to i32
-  %979 = fptoui float %975 to i32
-  %980 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %981 = extractvalue %dx.types.CBufRet.i32 %980, 0
-  %982 = extractvalue %dx.types.CBufRet.i32 %980, 1
-  %983 = extractvalue %dx.types.CBufRet.i32 %980, 2
-  %984 = extractvalue %dx.types.CBufRet.i32 %980, 3
-  %985 = mul i32 %981, %976
-  %986 = call i32 @dx.op.tertiary.i32(i32 48, i32 %977, i32 %982, i32 %985)  ; IMad(a,b,c)
-  %987 = call i32 @dx.op.tertiary.i32(i32 48, i32 %978, i32 %983, i32 %986)  ; IMad(a,b,c)
-  %988 = call i32 @dx.op.tertiary.i32(i32 48, i32 %979, i32 %984, i32 %987)  ; IMad(a,b,c)
-  %989 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %988, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %990 = extractvalue %dx.types.ResRet.i32 %989, 0
-  %991 = extractvalue %dx.types.ResRet.i32 %989, 1
-  %992 = zext i32 %990 to i64
-  %993 = zext i32 %991 to i64
-  %994 = shl i64 %993, 32
-  %995 = or i64 %992, %994
-  %996 = sitofp i64 %995 to float
-  br label %997
-
-; <label>:997                                     ; preds = %971, %905, %871, %849, %839
-  %998 = phi float [ %868, %849 ], [ 0.000000e+00, %839 ], [ %904, %871 ], [ %996, %971 ], [ 0.000000e+00, %905 ]
-  %999 = call float @dx.op.unary.f32(i32 22, float %180)  ; Frc(value)
-  %1000 = fsub fast float %678, %518
-  %1001 = fmul fast float %999, %1000
-  %1002 = fadd fast float %1001, %518
-  %1003 = fsub fast float %998, %838
-  %1004 = fmul fast float %999, %1003
-  %1005 = fadd fast float %1004, %838
-  %1006 = call float @dx.op.unary.f32(i32 22, float %181)  ; Frc(value)
-  %1007 = fsub fast float %1005, %1002
-  %1008 = fmul fast float %1007, %1006
-  %1009 = fadd fast float %1008, %1002
-  %1010 = fptosi float %1009 to i64
-  %1011 = trunc i64 %1010 to i32
-  %1012 = lshr i64 %1010, 32
-  %1013 = trunc i64 %1012 to i32
-  call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i32 %1011, i32 %1013, i32 undef, i32 undef, i8 3, i32 8)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3647
-
-; <label>:1014                                    ; preds = %350
-  %1015 = icmp eq i32 %91, 2
-  br i1 %1015, label %1016, label %3647
-
-; <label>:1016                                    ; preds = %1014
-  %1017 = call float @dx.op.unary.f32(i32 27, float %180)  ; Round_ni(value)
-  %1018 = fadd fast float %1017, -1.000000e+00
-  %1019 = call float @dx.op.unary.f32(i32 27, float %181)  ; Round_ni(value)
-  %1020 = fadd fast float %1019, -1.000000e+00
-  %1021 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %1022 = icmp eq i32 %1021, 0
-  br i1 %1022, label %1023, label %1053
-
-; <label>:1023                                    ; preds = %1016
-  %1024 = fcmp fast oge float %1018, 0.000000e+00
-  %1025 = fptoui float %1018 to i32
-  %1026 = icmp ult i32 %1025, %13
-  %1027 = and i1 %1024, %1026
-  %1028 = fcmp fast oge float %1020, 0.000000e+00
-  %1029 = and i1 %1028, %1027
-  %1030 = fptoui float %1020 to i32
-  %1031 = icmp ult i32 %1030, %15
-  %1032 = and i1 %1031, %1029
-  br i1 %1032, label %1033, label %1181
-
-; <label>:1033                                    ; preds = %1023
-  %1034 = fptoui float %45 to i32
-  %1035 = fptoui float %182 to i32
-  %1036 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1037 = extractvalue %dx.types.CBufRet.i32 %1036, 0
-  %1038 = extractvalue %dx.types.CBufRet.i32 %1036, 1
-  %1039 = extractvalue %dx.types.CBufRet.i32 %1036, 2
-  %1040 = extractvalue %dx.types.CBufRet.i32 %1036, 3
-  %1041 = mul i32 %1037, %1034
-  %1042 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1035, i32 %1038, i32 %1041)  ; IMad(a,b,c)
-  %1043 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1030, i32 %1039, i32 %1042)  ; IMad(a,b,c)
-  %1044 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1025, i32 %1040, i32 %1043)  ; IMad(a,b,c)
-  %1045 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1044, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1046 = extractvalue %dx.types.ResRet.i32 %1045, 0
-  %1047 = extractvalue %dx.types.ResRet.i32 %1045, 1
-  %1048 = zext i32 %1046 to i64
-  %1049 = zext i32 %1047 to i64
-  %1050 = shl i64 %1049, 32
-  %1051 = or i64 %1048, %1050
-  %1052 = sitofp i64 %1051 to float
-  br label %1181
-
-; <label>:1053                                    ; preds = %1016
-  %1054 = icmp eq i32 %1021, 1
-  br i1 %1054, label %1055, label %1089
-
-; <label>:1055                                    ; preds = %1053
-  %1056 = add i32 %13, -1
-  %1057 = uitofp i32 %1056 to float
-  %1058 = call float @dx.op.binary.f32(i32 35, float %1018, float 0.000000e+00)  ; FMax(a,b)
-  %1059 = call float @dx.op.binary.f32(i32 36, float %1058, float %1057)  ; FMin(a,b)
-  %1060 = fptoui float %1059 to i32
-  %1061 = add i32 %15, -1
-  %1062 = uitofp i32 %1061 to float
-  %1063 = call float @dx.op.binary.f32(i32 35, float %1020, float 0.000000e+00)  ; FMax(a,b)
-  %1064 = call float @dx.op.binary.f32(i32 36, float %1063, float %1062)  ; FMin(a,b)
-  %1065 = fptoui float %1064 to i32
-  %1066 = uitofp i32 %1065 to float
-  %1067 = uitofp i32 %1060 to float
-  %1068 = fptoui float %45 to i32
-  %1069 = fptoui float %182 to i32
-  %1070 = fptoui float %1066 to i32
-  %1071 = fptoui float %1067 to i32
-  %1072 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1073 = extractvalue %dx.types.CBufRet.i32 %1072, 0
-  %1074 = extractvalue %dx.types.CBufRet.i32 %1072, 1
-  %1075 = extractvalue %dx.types.CBufRet.i32 %1072, 2
-  %1076 = extractvalue %dx.types.CBufRet.i32 %1072, 3
-  %1077 = mul i32 %1073, %1068
-  %1078 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1069, i32 %1074, i32 %1077)  ; IMad(a,b,c)
-  %1079 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1070, i32 %1075, i32 %1078)  ; IMad(a,b,c)
-  %1080 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1071, i32 %1076, i32 %1079)  ; IMad(a,b,c)
-  %1081 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1080, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1082 = extractvalue %dx.types.ResRet.i32 %1081, 0
-  %1083 = extractvalue %dx.types.ResRet.i32 %1081, 1
-  %1084 = zext i32 %1082 to i64
-  %1085 = zext i32 %1083 to i64
-  %1086 = shl i64 %1085, 32
-  %1087 = or i64 %1084, %1086
-  %1088 = sitofp i64 %1087 to float
-  br label %1181
-
-; <label>:1089                                    ; preds = %1053
-  %1090 = icmp eq i32 %1021, 2
-  br i1 %1090, label %1091, label %1181
-
-; <label>:1091                                    ; preds = %1089
-  %1092 = fsub fast float %22, %20
-  %1093 = fcmp fast olt float %1018, %20
-  br i1 %1093, label %1094, label %1107
-
-; <label>:1094                                    ; preds = %1091
-  %1095 = fsub fast float %20, %1018
-  %1096 = fdiv fast float %1095, %1092
-  %1097 = fptoui float %1096 to i32
-  %1098 = uitofp i32 %1097 to float
-  %1099 = fmul fast float %1098, %1092
-  %1100 = fsub fast float %1095, %1099
-  %1101 = and i32 %1097, 1
-  %1102 = icmp eq i32 %1101, 0
-  br i1 %1102, label %1103, label %1105
-
-; <label>:1103                                    ; preds = %1094
-  %1104 = fadd fast float %1100, %20
-  br label %1122
-
-; <label>:1105                                    ; preds = %1094
-  %1106 = fsub fast float %22, %1100
-  br label %1122
-
-; <label>:1107                                    ; preds = %1091
-  %1108 = fcmp fast ogt float %1018, %22
-  br i1 %1108, label %1109, label %1122
-
-; <label>:1109                                    ; preds = %1107
-  %1110 = fsub fast float %1018, %22
-  %1111 = fdiv fast float %1110, %1092
-  %1112 = fptoui float %1111 to i32
-  %1113 = uitofp i32 %1112 to float
-  %1114 = fmul fast float %1113, %1092
-  %1115 = fsub fast float %1110, %1114
-  %1116 = and i32 %1112, 1
-  %1117 = icmp eq i32 %1116, 0
-  br i1 %1117, label %1118, label %1120
-
-; <label>:1118                                    ; preds = %1109
-  %1119 = fsub fast float %22, %1115
-  br label %1122
-
-; <label>:1120                                    ; preds = %1109
-  %1121 = fadd fast float %1115, %20
-  br label %1122
-
-; <label>:1122                                    ; preds = %1120, %1118, %1107, %1105, %1103
-  %1123 = phi float [ %1104, %1103 ], [ %1106, %1105 ], [ %1119, %1118 ], [ %1121, %1120 ], [ %1018, %1107 ]
-  %1124 = fptoui float %1123 to i32
-  %1125 = fsub fast float %24, %20
-  %1126 = fcmp fast olt float %1020, %20
-  br i1 %1126, label %1127, label %1140
-
-; <label>:1127                                    ; preds = %1122
-  %1128 = fsub fast float %20, %1020
-  %1129 = fdiv fast float %1128, %1125
-  %1130 = fptoui float %1129 to i32
-  %1131 = uitofp i32 %1130 to float
-  %1132 = fmul fast float %1131, %1125
-  %1133 = fsub fast float %1128, %1132
-  %1134 = and i32 %1130, 1
-  %1135 = icmp eq i32 %1134, 0
-  br i1 %1135, label %1136, label %1138
-
-; <label>:1136                                    ; preds = %1127
-  %1137 = fadd fast float %1133, %20
-  br label %1155
-
-; <label>:1138                                    ; preds = %1127
-  %1139 = fsub fast float %24, %1133
-  br label %1155
-
-; <label>:1140                                    ; preds = %1122
-  %1141 = fcmp fast ogt float %1020, %24
-  br i1 %1141, label %1142, label %1155
-
-; <label>:1142                                    ; preds = %1140
-  %1143 = fsub fast float %1020, %24
-  %1144 = fdiv fast float %1143, %1125
-  %1145 = fptoui float %1144 to i32
-  %1146 = uitofp i32 %1145 to float
-  %1147 = fmul fast float %1146, %1125
-  %1148 = fsub fast float %1143, %1147
-  %1149 = and i32 %1145, 1
-  %1150 = icmp eq i32 %1149, 0
-  br i1 %1150, label %1151, label %1153
-
-; <label>:1151                                    ; preds = %1142
-  %1152 = fsub fast float %24, %1148
-  br label %1155
-
-; <label>:1153                                    ; preds = %1142
-  %1154 = fadd fast float %1148, %20
-  br label %1155
-
-; <label>:1155                                    ; preds = %1153, %1151, %1140, %1138, %1136
-  %1156 = phi float [ %1137, %1136 ], [ %1139, %1138 ], [ %1152, %1151 ], [ %1154, %1153 ], [ %1020, %1140 ]
-  %1157 = fptoui float %1156 to i32
-  %1158 = uitofp i32 %1157 to float
-  %1159 = uitofp i32 %1124 to float
-  %1160 = fptoui float %45 to i32
-  %1161 = fptoui float %182 to i32
-  %1162 = fptoui float %1158 to i32
-  %1163 = fptoui float %1159 to i32
-  %1164 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1165 = extractvalue %dx.types.CBufRet.i32 %1164, 0
-  %1166 = extractvalue %dx.types.CBufRet.i32 %1164, 1
-  %1167 = extractvalue %dx.types.CBufRet.i32 %1164, 2
-  %1168 = extractvalue %dx.types.CBufRet.i32 %1164, 3
-  %1169 = mul i32 %1165, %1160
-  %1170 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1161, i32 %1166, i32 %1169)  ; IMad(a,b,c)
-  %1171 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1162, i32 %1167, i32 %1170)  ; IMad(a,b,c)
-  %1172 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1163, i32 %1168, i32 %1171)  ; IMad(a,b,c)
-  %1173 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1172, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1174 = extractvalue %dx.types.ResRet.i32 %1173, 0
-  %1175 = extractvalue %dx.types.ResRet.i32 %1173, 1
-  %1176 = zext i32 %1174 to i64
-  %1177 = zext i32 %1175 to i64
-  %1178 = shl i64 %1177, 32
-  %1179 = or i64 %1176, %1178
-  %1180 = sitofp i64 %1179 to float
-  br label %1181
-
-; <label>:1181                                    ; preds = %1155, %1089, %1055, %1033, %1023
-  %1182 = phi float [ %1052, %1033 ], [ 0.000000e+00, %1023 ], [ %1088, %1055 ], [ %1180, %1155 ], [ 0.000000e+00, %1089 ]
-  br i1 %1022, label %1183, label %1213
-
-; <label>:1183                                    ; preds = %1181
-  %1184 = fcmp fast oge float %1017, 0.000000e+00
-  %1185 = fptoui float %1017 to i32
-  %1186 = icmp ult i32 %1185, %13
-  %1187 = and i1 %1184, %1186
-  %1188 = fcmp fast oge float %1020, 0.000000e+00
-  %1189 = and i1 %1188, %1187
-  %1190 = fptoui float %1020 to i32
-  %1191 = icmp ult i32 %1190, %15
-  %1192 = and i1 %1191, %1189
-  br i1 %1192, label %1193, label %1341
-
-; <label>:1193                                    ; preds = %1183
-  %1194 = fptoui float %45 to i32
-  %1195 = fptoui float %182 to i32
-  %1196 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1197 = extractvalue %dx.types.CBufRet.i32 %1196, 0
-  %1198 = extractvalue %dx.types.CBufRet.i32 %1196, 1
-  %1199 = extractvalue %dx.types.CBufRet.i32 %1196, 2
-  %1200 = extractvalue %dx.types.CBufRet.i32 %1196, 3
-  %1201 = mul i32 %1197, %1194
-  %1202 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1195, i32 %1198, i32 %1201)  ; IMad(a,b,c)
-  %1203 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1190, i32 %1199, i32 %1202)  ; IMad(a,b,c)
-  %1204 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1185, i32 %1200, i32 %1203)  ; IMad(a,b,c)
-  %1205 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1204, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1206 = extractvalue %dx.types.ResRet.i32 %1205, 0
-  %1207 = extractvalue %dx.types.ResRet.i32 %1205, 1
-  %1208 = zext i32 %1206 to i64
-  %1209 = zext i32 %1207 to i64
-  %1210 = shl i64 %1209, 32
-  %1211 = or i64 %1208, %1210
-  %1212 = sitofp i64 %1211 to float
-  br label %1341
-
-; <label>:1213                                    ; preds = %1181
-  %1214 = icmp eq i32 %1021, 1
-  br i1 %1214, label %1215, label %1249
-
-; <label>:1215                                    ; preds = %1213
-  %1216 = add i32 %13, -1
-  %1217 = uitofp i32 %1216 to float
-  %1218 = call float @dx.op.binary.f32(i32 35, float %1017, float 0.000000e+00)  ; FMax(a,b)
-  %1219 = call float @dx.op.binary.f32(i32 36, float %1218, float %1217)  ; FMin(a,b)
-  %1220 = fptoui float %1219 to i32
-  %1221 = add i32 %15, -1
-  %1222 = uitofp i32 %1221 to float
-  %1223 = call float @dx.op.binary.f32(i32 35, float %1020, float 0.000000e+00)  ; FMax(a,b)
-  %1224 = call float @dx.op.binary.f32(i32 36, float %1223, float %1222)  ; FMin(a,b)
-  %1225 = fptoui float %1224 to i32
-  %1226 = uitofp i32 %1225 to float
-  %1227 = uitofp i32 %1220 to float
-  %1228 = fptoui float %45 to i32
-  %1229 = fptoui float %182 to i32
-  %1230 = fptoui float %1226 to i32
-  %1231 = fptoui float %1227 to i32
-  %1232 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1233 = extractvalue %dx.types.CBufRet.i32 %1232, 0
-  %1234 = extractvalue %dx.types.CBufRet.i32 %1232, 1
-  %1235 = extractvalue %dx.types.CBufRet.i32 %1232, 2
-  %1236 = extractvalue %dx.types.CBufRet.i32 %1232, 3
-  %1237 = mul i32 %1233, %1228
-  %1238 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1229, i32 %1234, i32 %1237)  ; IMad(a,b,c)
-  %1239 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1230, i32 %1235, i32 %1238)  ; IMad(a,b,c)
-  %1240 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1231, i32 %1236, i32 %1239)  ; IMad(a,b,c)
-  %1241 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1240, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1242 = extractvalue %dx.types.ResRet.i32 %1241, 0
-  %1243 = extractvalue %dx.types.ResRet.i32 %1241, 1
-  %1244 = zext i32 %1242 to i64
-  %1245 = zext i32 %1243 to i64
-  %1246 = shl i64 %1245, 32
-  %1247 = or i64 %1244, %1246
-  %1248 = sitofp i64 %1247 to float
-  br label %1341
-
-; <label>:1249                                    ; preds = %1213
-  %1250 = icmp eq i32 %1021, 2
-  br i1 %1250, label %1251, label %1341
-
-; <label>:1251                                    ; preds = %1249
-  %1252 = fsub fast float %22, %20
-  %1253 = fcmp fast olt float %1017, %20
-  br i1 %1253, label %1254, label %1267
-
-; <label>:1254                                    ; preds = %1251
-  %1255 = fsub fast float %20, %1017
-  %1256 = fdiv fast float %1255, %1252
-  %1257 = fptoui float %1256 to i32
-  %1258 = uitofp i32 %1257 to float
-  %1259 = fmul fast float %1258, %1252
-  %1260 = fsub fast float %1255, %1259
-  %1261 = and i32 %1257, 1
-  %1262 = icmp eq i32 %1261, 0
-  br i1 %1262, label %1263, label %1265
-
-; <label>:1263                                    ; preds = %1254
-  %1264 = fadd fast float %1260, %20
-  br label %1282
-
-; <label>:1265                                    ; preds = %1254
-  %1266 = fsub fast float %22, %1260
-  br label %1282
-
-; <label>:1267                                    ; preds = %1251
-  %1268 = fcmp fast ogt float %1017, %22
-  br i1 %1268, label %1269, label %1282
-
-; <label>:1269                                    ; preds = %1267
-  %1270 = fsub fast float %1017, %22
-  %1271 = fdiv fast float %1270, %1252
-  %1272 = fptoui float %1271 to i32
-  %1273 = uitofp i32 %1272 to float
-  %1274 = fmul fast float %1273, %1252
-  %1275 = fsub fast float %1270, %1274
-  %1276 = and i32 %1272, 1
-  %1277 = icmp eq i32 %1276, 0
-  br i1 %1277, label %1278, label %1280
-
-; <label>:1278                                    ; preds = %1269
-  %1279 = fsub fast float %22, %1275
-  br label %1282
-
-; <label>:1280                                    ; preds = %1269
-  %1281 = fadd fast float %1275, %20
-  br label %1282
-
-; <label>:1282                                    ; preds = %1280, %1278, %1267, %1265, %1263
-  %1283 = phi float [ %1264, %1263 ], [ %1266, %1265 ], [ %1279, %1278 ], [ %1281, %1280 ], [ %1017, %1267 ]
-  %1284 = fptoui float %1283 to i32
-  %1285 = fsub fast float %24, %20
-  %1286 = fcmp fast olt float %1020, %20
-  br i1 %1286, label %1287, label %1300
-
-; <label>:1287                                    ; preds = %1282
-  %1288 = fsub fast float %20, %1020
-  %1289 = fdiv fast float %1288, %1285
-  %1290 = fptoui float %1289 to i32
-  %1291 = uitofp i32 %1290 to float
-  %1292 = fmul fast float %1291, %1285
-  %1293 = fsub fast float %1288, %1292
-  %1294 = and i32 %1290, 1
-  %1295 = icmp eq i32 %1294, 0
-  br i1 %1295, label %1296, label %1298
-
-; <label>:1296                                    ; preds = %1287
-  %1297 = fadd fast float %1293, %20
-  br label %1315
-
-; <label>:1298                                    ; preds = %1287
-  %1299 = fsub fast float %24, %1293
-  br label %1315
-
-; <label>:1300                                    ; preds = %1282
-  %1301 = fcmp fast ogt float %1020, %24
-  br i1 %1301, label %1302, label %1315
-
-; <label>:1302                                    ; preds = %1300
-  %1303 = fsub fast float %1020, %24
-  %1304 = fdiv fast float %1303, %1285
-  %1305 = fptoui float %1304 to i32
-  %1306 = uitofp i32 %1305 to float
-  %1307 = fmul fast float %1306, %1285
-  %1308 = fsub fast float %1303, %1307
-  %1309 = and i32 %1305, 1
-  %1310 = icmp eq i32 %1309, 0
-  br i1 %1310, label %1311, label %1313
-
-; <label>:1311                                    ; preds = %1302
-  %1312 = fsub fast float %24, %1308
-  br label %1315
-
-; <label>:1313                                    ; preds = %1302
-  %1314 = fadd fast float %1308, %20
-  br label %1315
-
-; <label>:1315                                    ; preds = %1313, %1311, %1300, %1298, %1296
-  %1316 = phi float [ %1297, %1296 ], [ %1299, %1298 ], [ %1312, %1311 ], [ %1314, %1313 ], [ %1020, %1300 ]
-  %1317 = fptoui float %1316 to i32
-  %1318 = uitofp i32 %1317 to float
-  %1319 = uitofp i32 %1284 to float
-  %1320 = fptoui float %45 to i32
-  %1321 = fptoui float %182 to i32
-  %1322 = fptoui float %1318 to i32
-  %1323 = fptoui float %1319 to i32
-  %1324 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1325 = extractvalue %dx.types.CBufRet.i32 %1324, 0
-  %1326 = extractvalue %dx.types.CBufRet.i32 %1324, 1
-  %1327 = extractvalue %dx.types.CBufRet.i32 %1324, 2
-  %1328 = extractvalue %dx.types.CBufRet.i32 %1324, 3
-  %1329 = mul i32 %1325, %1320
-  %1330 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1321, i32 %1326, i32 %1329)  ; IMad(a,b,c)
-  %1331 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1322, i32 %1327, i32 %1330)  ; IMad(a,b,c)
-  %1332 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1323, i32 %1328, i32 %1331)  ; IMad(a,b,c)
-  %1333 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1332, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1334 = extractvalue %dx.types.ResRet.i32 %1333, 0
-  %1335 = extractvalue %dx.types.ResRet.i32 %1333, 1
-  %1336 = zext i32 %1334 to i64
-  %1337 = zext i32 %1335 to i64
-  %1338 = shl i64 %1337, 32
-  %1339 = or i64 %1336, %1338
-  %1340 = sitofp i64 %1339 to float
-  br label %1341
-
-; <label>:1341                                    ; preds = %1315, %1249, %1215, %1193, %1183
-  %1342 = phi float [ %1212, %1193 ], [ 0.000000e+00, %1183 ], [ %1248, %1215 ], [ %1340, %1315 ], [ 0.000000e+00, %1249 ]
-  %1343 = fadd fast float %1017, 1.000000e+00
-  br i1 %1022, label %1344, label %1374
-
-; <label>:1344                                    ; preds = %1341
-  %1345 = fcmp fast oge float %1343, 0.000000e+00
-  %1346 = fptoui float %1343 to i32
-  %1347 = icmp ult i32 %1346, %13
-  %1348 = and i1 %1345, %1347
-  %1349 = fcmp fast oge float %1020, 0.000000e+00
-  %1350 = and i1 %1349, %1348
-  %1351 = fptoui float %1020 to i32
-  %1352 = icmp ult i32 %1351, %15
-  %1353 = and i1 %1352, %1350
-  br i1 %1353, label %1354, label %1502
-
-; <label>:1354                                    ; preds = %1344
-  %1355 = fptoui float %45 to i32
-  %1356 = fptoui float %182 to i32
-  %1357 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1358 = extractvalue %dx.types.CBufRet.i32 %1357, 0
-  %1359 = extractvalue %dx.types.CBufRet.i32 %1357, 1
-  %1360 = extractvalue %dx.types.CBufRet.i32 %1357, 2
-  %1361 = extractvalue %dx.types.CBufRet.i32 %1357, 3
-  %1362 = mul i32 %1358, %1355
-  %1363 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1356, i32 %1359, i32 %1362)  ; IMad(a,b,c)
-  %1364 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1351, i32 %1360, i32 %1363)  ; IMad(a,b,c)
-  %1365 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1346, i32 %1361, i32 %1364)  ; IMad(a,b,c)
-  %1366 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1365, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1367 = extractvalue %dx.types.ResRet.i32 %1366, 0
-  %1368 = extractvalue %dx.types.ResRet.i32 %1366, 1
-  %1369 = zext i32 %1367 to i64
-  %1370 = zext i32 %1368 to i64
-  %1371 = shl i64 %1370, 32
-  %1372 = or i64 %1369, %1371
-  %1373 = sitofp i64 %1372 to float
-  br label %1502
-
-; <label>:1374                                    ; preds = %1341
-  %1375 = icmp eq i32 %1021, 1
-  br i1 %1375, label %1376, label %1410
-
-; <label>:1376                                    ; preds = %1374
-  %1377 = add i32 %13, -1
-  %1378 = uitofp i32 %1377 to float
-  %1379 = call float @dx.op.binary.f32(i32 35, float %1343, float 0.000000e+00)  ; FMax(a,b)
-  %1380 = call float @dx.op.binary.f32(i32 36, float %1379, float %1378)  ; FMin(a,b)
-  %1381 = fptoui float %1380 to i32
-  %1382 = add i32 %15, -1
-  %1383 = uitofp i32 %1382 to float
-  %1384 = call float @dx.op.binary.f32(i32 35, float %1020, float 0.000000e+00)  ; FMax(a,b)
-  %1385 = call float @dx.op.binary.f32(i32 36, float %1384, float %1383)  ; FMin(a,b)
-  %1386 = fptoui float %1385 to i32
-  %1387 = uitofp i32 %1386 to float
-  %1388 = uitofp i32 %1381 to float
-  %1389 = fptoui float %45 to i32
-  %1390 = fptoui float %182 to i32
-  %1391 = fptoui float %1387 to i32
-  %1392 = fptoui float %1388 to i32
-  %1393 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1394 = extractvalue %dx.types.CBufRet.i32 %1393, 0
-  %1395 = extractvalue %dx.types.CBufRet.i32 %1393, 1
-  %1396 = extractvalue %dx.types.CBufRet.i32 %1393, 2
-  %1397 = extractvalue %dx.types.CBufRet.i32 %1393, 3
-  %1398 = mul i32 %1394, %1389
-  %1399 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1390, i32 %1395, i32 %1398)  ; IMad(a,b,c)
-  %1400 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1391, i32 %1396, i32 %1399)  ; IMad(a,b,c)
-  %1401 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1392, i32 %1397, i32 %1400)  ; IMad(a,b,c)
-  %1402 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1401, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1403 = extractvalue %dx.types.ResRet.i32 %1402, 0
-  %1404 = extractvalue %dx.types.ResRet.i32 %1402, 1
-  %1405 = zext i32 %1403 to i64
-  %1406 = zext i32 %1404 to i64
-  %1407 = shl i64 %1406, 32
-  %1408 = or i64 %1405, %1407
-  %1409 = sitofp i64 %1408 to float
-  br label %1502
-
-; <label>:1410                                    ; preds = %1374
-  %1411 = icmp eq i32 %1021, 2
-  br i1 %1411, label %1412, label %1502
-
-; <label>:1412                                    ; preds = %1410
-  %1413 = fsub fast float %22, %20
-  %1414 = fcmp fast olt float %1343, %20
-  br i1 %1414, label %1415, label %1428
-
-; <label>:1415                                    ; preds = %1412
-  %1416 = fsub fast float %20, %1343
-  %1417 = fdiv fast float %1416, %1413
-  %1418 = fptoui float %1417 to i32
-  %1419 = uitofp i32 %1418 to float
-  %1420 = fmul fast float %1419, %1413
-  %1421 = fsub fast float %1416, %1420
-  %1422 = and i32 %1418, 1
-  %1423 = icmp eq i32 %1422, 0
-  br i1 %1423, label %1424, label %1426
-
-; <label>:1424                                    ; preds = %1415
-  %1425 = fadd fast float %1421, %20
-  br label %1443
-
-; <label>:1426                                    ; preds = %1415
-  %1427 = fsub fast float %22, %1421
-  br label %1443
-
-; <label>:1428                                    ; preds = %1412
-  %1429 = fcmp fast ogt float %1343, %22
-  br i1 %1429, label %1430, label %1443
-
-; <label>:1430                                    ; preds = %1428
-  %1431 = fsub fast float %1343, %22
-  %1432 = fdiv fast float %1431, %1413
-  %1433 = fptoui float %1432 to i32
-  %1434 = uitofp i32 %1433 to float
-  %1435 = fmul fast float %1434, %1413
-  %1436 = fsub fast float %1431, %1435
-  %1437 = and i32 %1433, 1
-  %1438 = icmp eq i32 %1437, 0
-  br i1 %1438, label %1439, label %1441
-
-; <label>:1439                                    ; preds = %1430
-  %1440 = fsub fast float %22, %1436
-  br label %1443
-
-; <label>:1441                                    ; preds = %1430
-  %1442 = fadd fast float %1436, %20
-  br label %1443
-
-; <label>:1443                                    ; preds = %1441, %1439, %1428, %1426, %1424
-  %1444 = phi float [ %1425, %1424 ], [ %1427, %1426 ], [ %1440, %1439 ], [ %1442, %1441 ], [ %1343, %1428 ]
-  %1445 = fptoui float %1444 to i32
-  %1446 = fsub fast float %24, %20
-  %1447 = fcmp fast olt float %1020, %20
-  br i1 %1447, label %1448, label %1461
-
-; <label>:1448                                    ; preds = %1443
-  %1449 = fsub fast float %20, %1020
-  %1450 = fdiv fast float %1449, %1446
-  %1451 = fptoui float %1450 to i32
-  %1452 = uitofp i32 %1451 to float
-  %1453 = fmul fast float %1452, %1446
-  %1454 = fsub fast float %1449, %1453
-  %1455 = and i32 %1451, 1
-  %1456 = icmp eq i32 %1455, 0
-  br i1 %1456, label %1457, label %1459
-
-; <label>:1457                                    ; preds = %1448
-  %1458 = fadd fast float %1454, %20
-  br label %1476
-
-; <label>:1459                                    ; preds = %1448
-  %1460 = fsub fast float %24, %1454
-  br label %1476
-
-; <label>:1461                                    ; preds = %1443
-  %1462 = fcmp fast ogt float %1020, %24
-  br i1 %1462, label %1463, label %1476
-
-; <label>:1463                                    ; preds = %1461
-  %1464 = fsub fast float %1020, %24
-  %1465 = fdiv fast float %1464, %1446
-  %1466 = fptoui float %1465 to i32
-  %1467 = uitofp i32 %1466 to float
-  %1468 = fmul fast float %1467, %1446
-  %1469 = fsub fast float %1464, %1468
-  %1470 = and i32 %1466, 1
-  %1471 = icmp eq i32 %1470, 0
-  br i1 %1471, label %1472, label %1474
-
-; <label>:1472                                    ; preds = %1463
-  %1473 = fsub fast float %24, %1469
-  br label %1476
-
-; <label>:1474                                    ; preds = %1463
-  %1475 = fadd fast float %1469, %20
-  br label %1476
-
-; <label>:1476                                    ; preds = %1474, %1472, %1461, %1459, %1457
-  %1477 = phi float [ %1458, %1457 ], [ %1460, %1459 ], [ %1473, %1472 ], [ %1475, %1474 ], [ %1020, %1461 ]
-  %1478 = fptoui float %1477 to i32
-  %1479 = uitofp i32 %1478 to float
-  %1480 = uitofp i32 %1445 to float
-  %1481 = fptoui float %45 to i32
-  %1482 = fptoui float %182 to i32
-  %1483 = fptoui float %1479 to i32
-  %1484 = fptoui float %1480 to i32
-  %1485 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1486 = extractvalue %dx.types.CBufRet.i32 %1485, 0
-  %1487 = extractvalue %dx.types.CBufRet.i32 %1485, 1
-  %1488 = extractvalue %dx.types.CBufRet.i32 %1485, 2
-  %1489 = extractvalue %dx.types.CBufRet.i32 %1485, 3
-  %1490 = mul i32 %1486, %1481
-  %1491 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1482, i32 %1487, i32 %1490)  ; IMad(a,b,c)
-  %1492 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1483, i32 %1488, i32 %1491)  ; IMad(a,b,c)
-  %1493 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1484, i32 %1489, i32 %1492)  ; IMad(a,b,c)
-  %1494 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1493, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1495 = extractvalue %dx.types.ResRet.i32 %1494, 0
-  %1496 = extractvalue %dx.types.ResRet.i32 %1494, 1
-  %1497 = zext i32 %1495 to i64
-  %1498 = zext i32 %1496 to i64
-  %1499 = shl i64 %1498, 32
-  %1500 = or i64 %1497, %1499
-  %1501 = sitofp i64 %1500 to float
-  br label %1502
-
-; <label>:1502                                    ; preds = %1476, %1410, %1376, %1354, %1344
-  %1503 = phi float [ %1373, %1354 ], [ 0.000000e+00, %1344 ], [ %1409, %1376 ], [ %1501, %1476 ], [ 0.000000e+00, %1410 ]
-  %1504 = fadd fast float %1017, 2.000000e+00
-  br i1 %1022, label %1505, label %1535
-
-; <label>:1505                                    ; preds = %1502
-  %1506 = fcmp fast oge float %1504, 0.000000e+00
-  %1507 = fptoui float %1504 to i32
-  %1508 = icmp ult i32 %1507, %13
-  %1509 = and i1 %1506, %1508
-  %1510 = fcmp fast oge float %1020, 0.000000e+00
-  %1511 = and i1 %1510, %1509
-  %1512 = fptoui float %1020 to i32
-  %1513 = icmp ult i32 %1512, %15
-  %1514 = and i1 %1513, %1511
-  br i1 %1514, label %1515, label %1663
-
-; <label>:1515                                    ; preds = %1505
-  %1516 = fptoui float %45 to i32
-  %1517 = fptoui float %182 to i32
-  %1518 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1519 = extractvalue %dx.types.CBufRet.i32 %1518, 0
-  %1520 = extractvalue %dx.types.CBufRet.i32 %1518, 1
-  %1521 = extractvalue %dx.types.CBufRet.i32 %1518, 2
-  %1522 = extractvalue %dx.types.CBufRet.i32 %1518, 3
-  %1523 = mul i32 %1519, %1516
-  %1524 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1517, i32 %1520, i32 %1523)  ; IMad(a,b,c)
-  %1525 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1512, i32 %1521, i32 %1524)  ; IMad(a,b,c)
-  %1526 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1507, i32 %1522, i32 %1525)  ; IMad(a,b,c)
-  %1527 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1526, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1528 = extractvalue %dx.types.ResRet.i32 %1527, 0
-  %1529 = extractvalue %dx.types.ResRet.i32 %1527, 1
-  %1530 = zext i32 %1528 to i64
-  %1531 = zext i32 %1529 to i64
-  %1532 = shl i64 %1531, 32
-  %1533 = or i64 %1530, %1532
-  %1534 = sitofp i64 %1533 to float
-  br label %1663
-
-; <label>:1535                                    ; preds = %1502
-  %1536 = icmp eq i32 %1021, 1
-  br i1 %1536, label %1537, label %1571
-
-; <label>:1537                                    ; preds = %1535
-  %1538 = add i32 %13, -1
-  %1539 = uitofp i32 %1538 to float
-  %1540 = call float @dx.op.binary.f32(i32 35, float %1504, float 0.000000e+00)  ; FMax(a,b)
-  %1541 = call float @dx.op.binary.f32(i32 36, float %1540, float %1539)  ; FMin(a,b)
-  %1542 = fptoui float %1541 to i32
-  %1543 = add i32 %15, -1
-  %1544 = uitofp i32 %1543 to float
-  %1545 = call float @dx.op.binary.f32(i32 35, float %1020, float 0.000000e+00)  ; FMax(a,b)
-  %1546 = call float @dx.op.binary.f32(i32 36, float %1545, float %1544)  ; FMin(a,b)
-  %1547 = fptoui float %1546 to i32
-  %1548 = uitofp i32 %1547 to float
-  %1549 = uitofp i32 %1542 to float
-  %1550 = fptoui float %45 to i32
-  %1551 = fptoui float %182 to i32
-  %1552 = fptoui float %1548 to i32
-  %1553 = fptoui float %1549 to i32
-  %1554 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1555 = extractvalue %dx.types.CBufRet.i32 %1554, 0
-  %1556 = extractvalue %dx.types.CBufRet.i32 %1554, 1
-  %1557 = extractvalue %dx.types.CBufRet.i32 %1554, 2
-  %1558 = extractvalue %dx.types.CBufRet.i32 %1554, 3
-  %1559 = mul i32 %1555, %1550
-  %1560 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1551, i32 %1556, i32 %1559)  ; IMad(a,b,c)
-  %1561 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1552, i32 %1557, i32 %1560)  ; IMad(a,b,c)
-  %1562 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1553, i32 %1558, i32 %1561)  ; IMad(a,b,c)
-  %1563 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1562, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1564 = extractvalue %dx.types.ResRet.i32 %1563, 0
-  %1565 = extractvalue %dx.types.ResRet.i32 %1563, 1
-  %1566 = zext i32 %1564 to i64
-  %1567 = zext i32 %1565 to i64
-  %1568 = shl i64 %1567, 32
-  %1569 = or i64 %1566, %1568
-  %1570 = sitofp i64 %1569 to float
-  br label %1663
-
-; <label>:1571                                    ; preds = %1535
-  %1572 = icmp eq i32 %1021, 2
-  br i1 %1572, label %1573, label %1663
-
-; <label>:1573                                    ; preds = %1571
-  %1574 = fsub fast float %22, %20
-  %1575 = fcmp fast olt float %1504, %20
-  br i1 %1575, label %1576, label %1589
-
-; <label>:1576                                    ; preds = %1573
-  %1577 = fsub fast float %20, %1504
-  %1578 = fdiv fast float %1577, %1574
-  %1579 = fptoui float %1578 to i32
-  %1580 = uitofp i32 %1579 to float
-  %1581 = fmul fast float %1580, %1574
-  %1582 = fsub fast float %1577, %1581
-  %1583 = and i32 %1579, 1
-  %1584 = icmp eq i32 %1583, 0
-  br i1 %1584, label %1585, label %1587
-
-; <label>:1585                                    ; preds = %1576
-  %1586 = fadd fast float %1582, %20
-  br label %1604
-
-; <label>:1587                                    ; preds = %1576
-  %1588 = fsub fast float %22, %1582
-  br label %1604
-
-; <label>:1589                                    ; preds = %1573
-  %1590 = fcmp fast ogt float %1504, %22
-  br i1 %1590, label %1591, label %1604
-
-; <label>:1591                                    ; preds = %1589
-  %1592 = fsub fast float %1504, %22
-  %1593 = fdiv fast float %1592, %1574
-  %1594 = fptoui float %1593 to i32
-  %1595 = uitofp i32 %1594 to float
-  %1596 = fmul fast float %1595, %1574
-  %1597 = fsub fast float %1592, %1596
-  %1598 = and i32 %1594, 1
-  %1599 = icmp eq i32 %1598, 0
-  br i1 %1599, label %1600, label %1602
-
-; <label>:1600                                    ; preds = %1591
-  %1601 = fsub fast float %22, %1597
-  br label %1604
-
-; <label>:1602                                    ; preds = %1591
-  %1603 = fadd fast float %1597, %20
-  br label %1604
-
-; <label>:1604                                    ; preds = %1602, %1600, %1589, %1587, %1585
-  %1605 = phi float [ %1586, %1585 ], [ %1588, %1587 ], [ %1601, %1600 ], [ %1603, %1602 ], [ %1504, %1589 ]
-  %1606 = fptoui float %1605 to i32
-  %1607 = fsub fast float %24, %20
-  %1608 = fcmp fast olt float %1020, %20
-  br i1 %1608, label %1609, label %1622
-
-; <label>:1609                                    ; preds = %1604
-  %1610 = fsub fast float %20, %1020
-  %1611 = fdiv fast float %1610, %1607
-  %1612 = fptoui float %1611 to i32
-  %1613 = uitofp i32 %1612 to float
-  %1614 = fmul fast float %1613, %1607
-  %1615 = fsub fast float %1610, %1614
-  %1616 = and i32 %1612, 1
-  %1617 = icmp eq i32 %1616, 0
-  br i1 %1617, label %1618, label %1620
-
-; <label>:1618                                    ; preds = %1609
-  %1619 = fadd fast float %1615, %20
-  br label %1637
-
-; <label>:1620                                    ; preds = %1609
-  %1621 = fsub fast float %24, %1615
-  br label %1637
-
-; <label>:1622                                    ; preds = %1604
-  %1623 = fcmp fast ogt float %1020, %24
-  br i1 %1623, label %1624, label %1637
-
-; <label>:1624                                    ; preds = %1622
-  %1625 = fsub fast float %1020, %24
-  %1626 = fdiv fast float %1625, %1607
-  %1627 = fptoui float %1626 to i32
-  %1628 = uitofp i32 %1627 to float
-  %1629 = fmul fast float %1628, %1607
-  %1630 = fsub fast float %1625, %1629
-  %1631 = and i32 %1627, 1
-  %1632 = icmp eq i32 %1631, 0
-  br i1 %1632, label %1633, label %1635
-
-; <label>:1633                                    ; preds = %1624
-  %1634 = fsub fast float %24, %1630
-  br label %1637
-
-; <label>:1635                                    ; preds = %1624
-  %1636 = fadd fast float %1630, %20
-  br label %1637
-
-; <label>:1637                                    ; preds = %1635, %1633, %1622, %1620, %1618
-  %1638 = phi float [ %1619, %1618 ], [ %1621, %1620 ], [ %1634, %1633 ], [ %1636, %1635 ], [ %1020, %1622 ]
-  %1639 = fptoui float %1638 to i32
-  %1640 = uitofp i32 %1639 to float
-  %1641 = uitofp i32 %1606 to float
-  %1642 = fptoui float %45 to i32
-  %1643 = fptoui float %182 to i32
-  %1644 = fptoui float %1640 to i32
-  %1645 = fptoui float %1641 to i32
-  %1646 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1647 = extractvalue %dx.types.CBufRet.i32 %1646, 0
-  %1648 = extractvalue %dx.types.CBufRet.i32 %1646, 1
-  %1649 = extractvalue %dx.types.CBufRet.i32 %1646, 2
-  %1650 = extractvalue %dx.types.CBufRet.i32 %1646, 3
-  %1651 = mul i32 %1647, %1642
-  %1652 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1643, i32 %1648, i32 %1651)  ; IMad(a,b,c)
-  %1653 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1644, i32 %1649, i32 %1652)  ; IMad(a,b,c)
-  %1654 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1645, i32 %1650, i32 %1653)  ; IMad(a,b,c)
-  %1655 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1654, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1656 = extractvalue %dx.types.ResRet.i32 %1655, 0
-  %1657 = extractvalue %dx.types.ResRet.i32 %1655, 1
-  %1658 = zext i32 %1656 to i64
-  %1659 = zext i32 %1657 to i64
-  %1660 = shl i64 %1659, 32
-  %1661 = or i64 %1658, %1660
-  %1662 = sitofp i64 %1661 to float
-  br label %1663
-
-; <label>:1663                                    ; preds = %1637, %1571, %1537, %1515, %1505
-  %1664 = phi float [ %1534, %1515 ], [ 0.000000e+00, %1505 ], [ %1570, %1537 ], [ %1662, %1637 ], [ 0.000000e+00, %1571 ]
-  br i1 %1022, label %1665, label %1695
-
-; <label>:1665                                    ; preds = %1663
-  %1666 = fcmp fast oge float %1018, 0.000000e+00
-  %1667 = fptoui float %1018 to i32
-  %1668 = icmp ult i32 %1667, %13
-  %1669 = and i1 %1666, %1668
-  %1670 = fcmp fast oge float %1019, 0.000000e+00
-  %1671 = and i1 %1670, %1669
-  %1672 = fptoui float %1019 to i32
-  %1673 = icmp ult i32 %1672, %15
-  %1674 = and i1 %1673, %1671
-  br i1 %1674, label %1675, label %1823
-
-; <label>:1675                                    ; preds = %1665
-  %1676 = fptoui float %45 to i32
-  %1677 = fptoui float %182 to i32
-  %1678 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1679 = extractvalue %dx.types.CBufRet.i32 %1678, 0
-  %1680 = extractvalue %dx.types.CBufRet.i32 %1678, 1
-  %1681 = extractvalue %dx.types.CBufRet.i32 %1678, 2
-  %1682 = extractvalue %dx.types.CBufRet.i32 %1678, 3
-  %1683 = mul i32 %1679, %1676
-  %1684 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1677, i32 %1680, i32 %1683)  ; IMad(a,b,c)
-  %1685 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1672, i32 %1681, i32 %1684)  ; IMad(a,b,c)
-  %1686 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1667, i32 %1682, i32 %1685)  ; IMad(a,b,c)
-  %1687 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1686, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1688 = extractvalue %dx.types.ResRet.i32 %1687, 0
-  %1689 = extractvalue %dx.types.ResRet.i32 %1687, 1
-  %1690 = zext i32 %1688 to i64
-  %1691 = zext i32 %1689 to i64
-  %1692 = shl i64 %1691, 32
-  %1693 = or i64 %1690, %1692
-  %1694 = sitofp i64 %1693 to float
-  br label %1823
-
-; <label>:1695                                    ; preds = %1663
-  %1696 = icmp eq i32 %1021, 1
-  br i1 %1696, label %1697, label %1731
-
-; <label>:1697                                    ; preds = %1695
-  %1698 = add i32 %13, -1
-  %1699 = uitofp i32 %1698 to float
-  %1700 = call float @dx.op.binary.f32(i32 35, float %1018, float 0.000000e+00)  ; FMax(a,b)
-  %1701 = call float @dx.op.binary.f32(i32 36, float %1700, float %1699)  ; FMin(a,b)
-  %1702 = fptoui float %1701 to i32
-  %1703 = add i32 %15, -1
-  %1704 = uitofp i32 %1703 to float
-  %1705 = call float @dx.op.binary.f32(i32 35, float %1019, float 0.000000e+00)  ; FMax(a,b)
-  %1706 = call float @dx.op.binary.f32(i32 36, float %1705, float %1704)  ; FMin(a,b)
-  %1707 = fptoui float %1706 to i32
-  %1708 = uitofp i32 %1707 to float
-  %1709 = uitofp i32 %1702 to float
-  %1710 = fptoui float %45 to i32
-  %1711 = fptoui float %182 to i32
-  %1712 = fptoui float %1708 to i32
-  %1713 = fptoui float %1709 to i32
-  %1714 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1715 = extractvalue %dx.types.CBufRet.i32 %1714, 0
-  %1716 = extractvalue %dx.types.CBufRet.i32 %1714, 1
-  %1717 = extractvalue %dx.types.CBufRet.i32 %1714, 2
-  %1718 = extractvalue %dx.types.CBufRet.i32 %1714, 3
-  %1719 = mul i32 %1715, %1710
-  %1720 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1711, i32 %1716, i32 %1719)  ; IMad(a,b,c)
-  %1721 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1712, i32 %1717, i32 %1720)  ; IMad(a,b,c)
-  %1722 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1713, i32 %1718, i32 %1721)  ; IMad(a,b,c)
-  %1723 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1722, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1724 = extractvalue %dx.types.ResRet.i32 %1723, 0
-  %1725 = extractvalue %dx.types.ResRet.i32 %1723, 1
-  %1726 = zext i32 %1724 to i64
-  %1727 = zext i32 %1725 to i64
-  %1728 = shl i64 %1727, 32
-  %1729 = or i64 %1726, %1728
-  %1730 = sitofp i64 %1729 to float
-  br label %1823
-
-; <label>:1731                                    ; preds = %1695
-  %1732 = icmp eq i32 %1021, 2
-  br i1 %1732, label %1733, label %1823
-
-; <label>:1733                                    ; preds = %1731
-  %1734 = fsub fast float %22, %20
-  %1735 = fcmp fast olt float %1018, %20
-  br i1 %1735, label %1736, label %1749
-
-; <label>:1736                                    ; preds = %1733
-  %1737 = fsub fast float %20, %1018
-  %1738 = fdiv fast float %1737, %1734
-  %1739 = fptoui float %1738 to i32
-  %1740 = uitofp i32 %1739 to float
-  %1741 = fmul fast float %1740, %1734
-  %1742 = fsub fast float %1737, %1741
-  %1743 = and i32 %1739, 1
-  %1744 = icmp eq i32 %1743, 0
-  br i1 %1744, label %1745, label %1747
-
-; <label>:1745                                    ; preds = %1736
-  %1746 = fadd fast float %1742, %20
-  br label %1764
-
-; <label>:1747                                    ; preds = %1736
-  %1748 = fsub fast float %22, %1742
-  br label %1764
-
-; <label>:1749                                    ; preds = %1733
-  %1750 = fcmp fast ogt float %1018, %22
-  br i1 %1750, label %1751, label %1764
-
-; <label>:1751                                    ; preds = %1749
-  %1752 = fsub fast float %1018, %22
-  %1753 = fdiv fast float %1752, %1734
-  %1754 = fptoui float %1753 to i32
-  %1755 = uitofp i32 %1754 to float
-  %1756 = fmul fast float %1755, %1734
-  %1757 = fsub fast float %1752, %1756
-  %1758 = and i32 %1754, 1
-  %1759 = icmp eq i32 %1758, 0
-  br i1 %1759, label %1760, label %1762
-
-; <label>:1760                                    ; preds = %1751
-  %1761 = fsub fast float %22, %1757
-  br label %1764
-
-; <label>:1762                                    ; preds = %1751
-  %1763 = fadd fast float %1757, %20
-  br label %1764
-
-; <label>:1764                                    ; preds = %1762, %1760, %1749, %1747, %1745
-  %1765 = phi float [ %1746, %1745 ], [ %1748, %1747 ], [ %1761, %1760 ], [ %1763, %1762 ], [ %1018, %1749 ]
-  %1766 = fptoui float %1765 to i32
-  %1767 = fsub fast float %24, %20
-  %1768 = fcmp fast olt float %1019, %20
-  br i1 %1768, label %1769, label %1782
-
-; <label>:1769                                    ; preds = %1764
-  %1770 = fsub fast float %20, %1019
-  %1771 = fdiv fast float %1770, %1767
-  %1772 = fptoui float %1771 to i32
-  %1773 = uitofp i32 %1772 to float
-  %1774 = fmul fast float %1773, %1767
-  %1775 = fsub fast float %1770, %1774
-  %1776 = and i32 %1772, 1
-  %1777 = icmp eq i32 %1776, 0
-  br i1 %1777, label %1778, label %1780
-
-; <label>:1778                                    ; preds = %1769
-  %1779 = fadd fast float %1775, %20
-  br label %1797
-
-; <label>:1780                                    ; preds = %1769
-  %1781 = fsub fast float %24, %1775
-  br label %1797
-
-; <label>:1782                                    ; preds = %1764
-  %1783 = fcmp fast ogt float %1019, %24
-  br i1 %1783, label %1784, label %1797
-
-; <label>:1784                                    ; preds = %1782
-  %1785 = fsub fast float %1019, %24
-  %1786 = fdiv fast float %1785, %1767
-  %1787 = fptoui float %1786 to i32
-  %1788 = uitofp i32 %1787 to float
-  %1789 = fmul fast float %1788, %1767
-  %1790 = fsub fast float %1785, %1789
-  %1791 = and i32 %1787, 1
-  %1792 = icmp eq i32 %1791, 0
-  br i1 %1792, label %1793, label %1795
-
-; <label>:1793                                    ; preds = %1784
-  %1794 = fsub fast float %24, %1790
-  br label %1797
-
-; <label>:1795                                    ; preds = %1784
-  %1796 = fadd fast float %1790, %20
-  br label %1797
-
-; <label>:1797                                    ; preds = %1795, %1793, %1782, %1780, %1778
-  %1798 = phi float [ %1779, %1778 ], [ %1781, %1780 ], [ %1794, %1793 ], [ %1796, %1795 ], [ %1019, %1782 ]
-  %1799 = fptoui float %1798 to i32
-  %1800 = uitofp i32 %1799 to float
-  %1801 = uitofp i32 %1766 to float
-  %1802 = fptoui float %45 to i32
-  %1803 = fptoui float %182 to i32
-  %1804 = fptoui float %1800 to i32
-  %1805 = fptoui float %1801 to i32
-  %1806 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1807 = extractvalue %dx.types.CBufRet.i32 %1806, 0
-  %1808 = extractvalue %dx.types.CBufRet.i32 %1806, 1
-  %1809 = extractvalue %dx.types.CBufRet.i32 %1806, 2
-  %1810 = extractvalue %dx.types.CBufRet.i32 %1806, 3
-  %1811 = mul i32 %1807, %1802
-  %1812 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1803, i32 %1808, i32 %1811)  ; IMad(a,b,c)
-  %1813 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1804, i32 %1809, i32 %1812)  ; IMad(a,b,c)
-  %1814 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1805, i32 %1810, i32 %1813)  ; IMad(a,b,c)
-  %1815 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1814, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1816 = extractvalue %dx.types.ResRet.i32 %1815, 0
-  %1817 = extractvalue %dx.types.ResRet.i32 %1815, 1
-  %1818 = zext i32 %1816 to i64
-  %1819 = zext i32 %1817 to i64
-  %1820 = shl i64 %1819, 32
-  %1821 = or i64 %1818, %1820
-  %1822 = sitofp i64 %1821 to float
-  br label %1823
-
-; <label>:1823                                    ; preds = %1797, %1731, %1697, %1675, %1665
-  %1824 = phi float [ %1694, %1675 ], [ 0.000000e+00, %1665 ], [ %1730, %1697 ], [ %1822, %1797 ], [ 0.000000e+00, %1731 ]
-  br i1 %1022, label %1825, label %1855
-
-; <label>:1825                                    ; preds = %1823
-  %1826 = fcmp fast oge float %1017, 0.000000e+00
-  %1827 = fptoui float %1017 to i32
-  %1828 = icmp ult i32 %1827, %13
-  %1829 = and i1 %1826, %1828
-  %1830 = fcmp fast oge float %1019, 0.000000e+00
-  %1831 = and i1 %1830, %1829
-  %1832 = fptoui float %1019 to i32
-  %1833 = icmp ult i32 %1832, %15
-  %1834 = and i1 %1833, %1831
-  br i1 %1834, label %1835, label %1983
-
-; <label>:1835                                    ; preds = %1825
-  %1836 = fptoui float %45 to i32
-  %1837 = fptoui float %182 to i32
-  %1838 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1839 = extractvalue %dx.types.CBufRet.i32 %1838, 0
-  %1840 = extractvalue %dx.types.CBufRet.i32 %1838, 1
-  %1841 = extractvalue %dx.types.CBufRet.i32 %1838, 2
-  %1842 = extractvalue %dx.types.CBufRet.i32 %1838, 3
-  %1843 = mul i32 %1839, %1836
-  %1844 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1837, i32 %1840, i32 %1843)  ; IMad(a,b,c)
-  %1845 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1832, i32 %1841, i32 %1844)  ; IMad(a,b,c)
-  %1846 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1827, i32 %1842, i32 %1845)  ; IMad(a,b,c)
-  %1847 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1846, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1848 = extractvalue %dx.types.ResRet.i32 %1847, 0
-  %1849 = extractvalue %dx.types.ResRet.i32 %1847, 1
-  %1850 = zext i32 %1848 to i64
-  %1851 = zext i32 %1849 to i64
-  %1852 = shl i64 %1851, 32
-  %1853 = or i64 %1850, %1852
-  %1854 = sitofp i64 %1853 to float
-  br label %1983
-
-; <label>:1855                                    ; preds = %1823
-  %1856 = icmp eq i32 %1021, 1
-  br i1 %1856, label %1857, label %1891
-
-; <label>:1857                                    ; preds = %1855
-  %1858 = add i32 %13, -1
-  %1859 = uitofp i32 %1858 to float
-  %1860 = call float @dx.op.binary.f32(i32 35, float %1017, float 0.000000e+00)  ; FMax(a,b)
-  %1861 = call float @dx.op.binary.f32(i32 36, float %1860, float %1859)  ; FMin(a,b)
-  %1862 = fptoui float %1861 to i32
-  %1863 = add i32 %15, -1
-  %1864 = uitofp i32 %1863 to float
-  %1865 = call float @dx.op.binary.f32(i32 35, float %1019, float 0.000000e+00)  ; FMax(a,b)
-  %1866 = call float @dx.op.binary.f32(i32 36, float %1865, float %1864)  ; FMin(a,b)
-  %1867 = fptoui float %1866 to i32
-  %1868 = uitofp i32 %1867 to float
-  %1869 = uitofp i32 %1862 to float
-  %1870 = fptoui float %45 to i32
-  %1871 = fptoui float %182 to i32
-  %1872 = fptoui float %1868 to i32
-  %1873 = fptoui float %1869 to i32
-  %1874 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1875 = extractvalue %dx.types.CBufRet.i32 %1874, 0
-  %1876 = extractvalue %dx.types.CBufRet.i32 %1874, 1
-  %1877 = extractvalue %dx.types.CBufRet.i32 %1874, 2
-  %1878 = extractvalue %dx.types.CBufRet.i32 %1874, 3
-  %1879 = mul i32 %1875, %1870
-  %1880 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1871, i32 %1876, i32 %1879)  ; IMad(a,b,c)
-  %1881 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1872, i32 %1877, i32 %1880)  ; IMad(a,b,c)
-  %1882 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1873, i32 %1878, i32 %1881)  ; IMad(a,b,c)
-  %1883 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1882, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1884 = extractvalue %dx.types.ResRet.i32 %1883, 0
-  %1885 = extractvalue %dx.types.ResRet.i32 %1883, 1
-  %1886 = zext i32 %1884 to i64
-  %1887 = zext i32 %1885 to i64
-  %1888 = shl i64 %1887, 32
-  %1889 = or i64 %1886, %1888
-  %1890 = sitofp i64 %1889 to float
-  br label %1983
-
-; <label>:1891                                    ; preds = %1855
-  %1892 = icmp eq i32 %1021, 2
-  br i1 %1892, label %1893, label %1983
-
-; <label>:1893                                    ; preds = %1891
-  %1894 = fsub fast float %22, %20
-  %1895 = fcmp fast olt float %1017, %20
-  br i1 %1895, label %1896, label %1909
-
-; <label>:1896                                    ; preds = %1893
-  %1897 = fsub fast float %20, %1017
-  %1898 = fdiv fast float %1897, %1894
-  %1899 = fptoui float %1898 to i32
-  %1900 = uitofp i32 %1899 to float
-  %1901 = fmul fast float %1900, %1894
-  %1902 = fsub fast float %1897, %1901
-  %1903 = and i32 %1899, 1
-  %1904 = icmp eq i32 %1903, 0
-  br i1 %1904, label %1905, label %1907
-
-; <label>:1905                                    ; preds = %1896
-  %1906 = fadd fast float %1902, %20
-  br label %1924
-
-; <label>:1907                                    ; preds = %1896
-  %1908 = fsub fast float %22, %1902
-  br label %1924
-
-; <label>:1909                                    ; preds = %1893
-  %1910 = fcmp fast ogt float %1017, %22
-  br i1 %1910, label %1911, label %1924
-
-; <label>:1911                                    ; preds = %1909
-  %1912 = fsub fast float %1017, %22
-  %1913 = fdiv fast float %1912, %1894
-  %1914 = fptoui float %1913 to i32
-  %1915 = uitofp i32 %1914 to float
-  %1916 = fmul fast float %1915, %1894
-  %1917 = fsub fast float %1912, %1916
-  %1918 = and i32 %1914, 1
-  %1919 = icmp eq i32 %1918, 0
-  br i1 %1919, label %1920, label %1922
-
-; <label>:1920                                    ; preds = %1911
-  %1921 = fsub fast float %22, %1917
-  br label %1924
-
-; <label>:1922                                    ; preds = %1911
-  %1923 = fadd fast float %1917, %20
-  br label %1924
-
-; <label>:1924                                    ; preds = %1922, %1920, %1909, %1907, %1905
-  %1925 = phi float [ %1906, %1905 ], [ %1908, %1907 ], [ %1921, %1920 ], [ %1923, %1922 ], [ %1017, %1909 ]
-  %1926 = fptoui float %1925 to i32
-  %1927 = fsub fast float %24, %20
-  %1928 = fcmp fast olt float %1019, %20
-  br i1 %1928, label %1929, label %1942
-
-; <label>:1929                                    ; preds = %1924
-  %1930 = fsub fast float %20, %1019
-  %1931 = fdiv fast float %1930, %1927
-  %1932 = fptoui float %1931 to i32
-  %1933 = uitofp i32 %1932 to float
-  %1934 = fmul fast float %1933, %1927
-  %1935 = fsub fast float %1930, %1934
-  %1936 = and i32 %1932, 1
-  %1937 = icmp eq i32 %1936, 0
-  br i1 %1937, label %1938, label %1940
-
-; <label>:1938                                    ; preds = %1929
-  %1939 = fadd fast float %1935, %20
-  br label %1957
-
-; <label>:1940                                    ; preds = %1929
-  %1941 = fsub fast float %24, %1935
-  br label %1957
-
-; <label>:1942                                    ; preds = %1924
-  %1943 = fcmp fast ogt float %1019, %24
-  br i1 %1943, label %1944, label %1957
-
-; <label>:1944                                    ; preds = %1942
-  %1945 = fsub fast float %1019, %24
-  %1946 = fdiv fast float %1945, %1927
-  %1947 = fptoui float %1946 to i32
-  %1948 = uitofp i32 %1947 to float
-  %1949 = fmul fast float %1948, %1927
-  %1950 = fsub fast float %1945, %1949
-  %1951 = and i32 %1947, 1
-  %1952 = icmp eq i32 %1951, 0
-  br i1 %1952, label %1953, label %1955
-
-; <label>:1953                                    ; preds = %1944
-  %1954 = fsub fast float %24, %1950
-  br label %1957
-
-; <label>:1955                                    ; preds = %1944
-  %1956 = fadd fast float %1950, %20
-  br label %1957
-
-; <label>:1957                                    ; preds = %1955, %1953, %1942, %1940, %1938
-  %1958 = phi float [ %1939, %1938 ], [ %1941, %1940 ], [ %1954, %1953 ], [ %1956, %1955 ], [ %1019, %1942 ]
-  %1959 = fptoui float %1958 to i32
-  %1960 = uitofp i32 %1959 to float
-  %1961 = uitofp i32 %1926 to float
-  %1962 = fptoui float %45 to i32
-  %1963 = fptoui float %182 to i32
-  %1964 = fptoui float %1960 to i32
-  %1965 = fptoui float %1961 to i32
-  %1966 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1967 = extractvalue %dx.types.CBufRet.i32 %1966, 0
-  %1968 = extractvalue %dx.types.CBufRet.i32 %1966, 1
-  %1969 = extractvalue %dx.types.CBufRet.i32 %1966, 2
-  %1970 = extractvalue %dx.types.CBufRet.i32 %1966, 3
-  %1971 = mul i32 %1967, %1962
-  %1972 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1963, i32 %1968, i32 %1971)  ; IMad(a,b,c)
-  %1973 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1964, i32 %1969, i32 %1972)  ; IMad(a,b,c)
-  %1974 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1965, i32 %1970, i32 %1973)  ; IMad(a,b,c)
-  %1975 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1974, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1976 = extractvalue %dx.types.ResRet.i32 %1975, 0
-  %1977 = extractvalue %dx.types.ResRet.i32 %1975, 1
-  %1978 = zext i32 %1976 to i64
-  %1979 = zext i32 %1977 to i64
-  %1980 = shl i64 %1979, 32
-  %1981 = or i64 %1978, %1980
-  %1982 = sitofp i64 %1981 to float
-  br label %1983
-
-; <label>:1983                                    ; preds = %1957, %1891, %1857, %1835, %1825
-  %1984 = phi float [ %1854, %1835 ], [ 0.000000e+00, %1825 ], [ %1890, %1857 ], [ %1982, %1957 ], [ 0.000000e+00, %1891 ]
-  br i1 %1022, label %1985, label %2015
-
-; <label>:1985                                    ; preds = %1983
-  %1986 = fcmp fast oge float %1343, 0.000000e+00
-  %1987 = fptoui float %1343 to i32
-  %1988 = icmp ult i32 %1987, %13
-  %1989 = and i1 %1986, %1988
-  %1990 = fcmp fast oge float %1019, 0.000000e+00
-  %1991 = and i1 %1990, %1989
-  %1992 = fptoui float %1019 to i32
-  %1993 = icmp ult i32 %1992, %15
-  %1994 = and i1 %1993, %1991
-  br i1 %1994, label %1995, label %2143
-
-; <label>:1995                                    ; preds = %1985
-  %1996 = fptoui float %45 to i32
-  %1997 = fptoui float %182 to i32
-  %1998 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1999 = extractvalue %dx.types.CBufRet.i32 %1998, 0
-  %2000 = extractvalue %dx.types.CBufRet.i32 %1998, 1
-  %2001 = extractvalue %dx.types.CBufRet.i32 %1998, 2
-  %2002 = extractvalue %dx.types.CBufRet.i32 %1998, 3
-  %2003 = mul i32 %1999, %1996
-  %2004 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1997, i32 %2000, i32 %2003)  ; IMad(a,b,c)
-  %2005 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1992, i32 %2001, i32 %2004)  ; IMad(a,b,c)
-  %2006 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1987, i32 %2002, i32 %2005)  ; IMad(a,b,c)
-  %2007 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2006, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2008 = extractvalue %dx.types.ResRet.i32 %2007, 0
-  %2009 = extractvalue %dx.types.ResRet.i32 %2007, 1
-  %2010 = zext i32 %2008 to i64
-  %2011 = zext i32 %2009 to i64
-  %2012 = shl i64 %2011, 32
-  %2013 = or i64 %2010, %2012
-  %2014 = sitofp i64 %2013 to float
-  br label %2143
-
-; <label>:2015                                    ; preds = %1983
-  %2016 = icmp eq i32 %1021, 1
-  br i1 %2016, label %2017, label %2051
-
-; <label>:2017                                    ; preds = %2015
-  %2018 = add i32 %13, -1
-  %2019 = uitofp i32 %2018 to float
-  %2020 = call float @dx.op.binary.f32(i32 35, float %1343, float 0.000000e+00)  ; FMax(a,b)
-  %2021 = call float @dx.op.binary.f32(i32 36, float %2020, float %2019)  ; FMin(a,b)
-  %2022 = fptoui float %2021 to i32
-  %2023 = add i32 %15, -1
-  %2024 = uitofp i32 %2023 to float
-  %2025 = call float @dx.op.binary.f32(i32 35, float %1019, float 0.000000e+00)  ; FMax(a,b)
-  %2026 = call float @dx.op.binary.f32(i32 36, float %2025, float %2024)  ; FMin(a,b)
-  %2027 = fptoui float %2026 to i32
-  %2028 = uitofp i32 %2027 to float
-  %2029 = uitofp i32 %2022 to float
-  %2030 = fptoui float %45 to i32
-  %2031 = fptoui float %182 to i32
-  %2032 = fptoui float %2028 to i32
-  %2033 = fptoui float %2029 to i32
-  %2034 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2035 = extractvalue %dx.types.CBufRet.i32 %2034, 0
-  %2036 = extractvalue %dx.types.CBufRet.i32 %2034, 1
-  %2037 = extractvalue %dx.types.CBufRet.i32 %2034, 2
-  %2038 = extractvalue %dx.types.CBufRet.i32 %2034, 3
-  %2039 = mul i32 %2035, %2030
-  %2040 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2031, i32 %2036, i32 %2039)  ; IMad(a,b,c)
-  %2041 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2032, i32 %2037, i32 %2040)  ; IMad(a,b,c)
-  %2042 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2033, i32 %2038, i32 %2041)  ; IMad(a,b,c)
-  %2043 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2042, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2044 = extractvalue %dx.types.ResRet.i32 %2043, 0
-  %2045 = extractvalue %dx.types.ResRet.i32 %2043, 1
-  %2046 = zext i32 %2044 to i64
-  %2047 = zext i32 %2045 to i64
-  %2048 = shl i64 %2047, 32
-  %2049 = or i64 %2046, %2048
-  %2050 = sitofp i64 %2049 to float
-  br label %2143
-
-; <label>:2051                                    ; preds = %2015
-  %2052 = icmp eq i32 %1021, 2
-  br i1 %2052, label %2053, label %2143
-
-; <label>:2053                                    ; preds = %2051
-  %2054 = fsub fast float %22, %20
-  %2055 = fcmp fast olt float %1343, %20
-  br i1 %2055, label %2056, label %2069
-
-; <label>:2056                                    ; preds = %2053
-  %2057 = fsub fast float %20, %1343
-  %2058 = fdiv fast float %2057, %2054
-  %2059 = fptoui float %2058 to i32
-  %2060 = uitofp i32 %2059 to float
-  %2061 = fmul fast float %2060, %2054
-  %2062 = fsub fast float %2057, %2061
-  %2063 = and i32 %2059, 1
-  %2064 = icmp eq i32 %2063, 0
-  br i1 %2064, label %2065, label %2067
-
-; <label>:2065                                    ; preds = %2056
-  %2066 = fadd fast float %2062, %20
-  br label %2084
-
-; <label>:2067                                    ; preds = %2056
-  %2068 = fsub fast float %22, %2062
-  br label %2084
-
-; <label>:2069                                    ; preds = %2053
-  %2070 = fcmp fast ogt float %1343, %22
-  br i1 %2070, label %2071, label %2084
-
-; <label>:2071                                    ; preds = %2069
-  %2072 = fsub fast float %1343, %22
-  %2073 = fdiv fast float %2072, %2054
-  %2074 = fptoui float %2073 to i32
-  %2075 = uitofp i32 %2074 to float
-  %2076 = fmul fast float %2075, %2054
-  %2077 = fsub fast float %2072, %2076
-  %2078 = and i32 %2074, 1
-  %2079 = icmp eq i32 %2078, 0
-  br i1 %2079, label %2080, label %2082
-
-; <label>:2080                                    ; preds = %2071
-  %2081 = fsub fast float %22, %2077
-  br label %2084
-
-; <label>:2082                                    ; preds = %2071
-  %2083 = fadd fast float %2077, %20
-  br label %2084
-
-; <label>:2084                                    ; preds = %2082, %2080, %2069, %2067, %2065
-  %2085 = phi float [ %2066, %2065 ], [ %2068, %2067 ], [ %2081, %2080 ], [ %2083, %2082 ], [ %1343, %2069 ]
-  %2086 = fptoui float %2085 to i32
-  %2087 = fsub fast float %24, %20
-  %2088 = fcmp fast olt float %1019, %20
-  br i1 %2088, label %2089, label %2102
-
-; <label>:2089                                    ; preds = %2084
-  %2090 = fsub fast float %20, %1019
-  %2091 = fdiv fast float %2090, %2087
-  %2092 = fptoui float %2091 to i32
-  %2093 = uitofp i32 %2092 to float
-  %2094 = fmul fast float %2093, %2087
-  %2095 = fsub fast float %2090, %2094
-  %2096 = and i32 %2092, 1
-  %2097 = icmp eq i32 %2096, 0
-  br i1 %2097, label %2098, label %2100
-
-; <label>:2098                                    ; preds = %2089
-  %2099 = fadd fast float %2095, %20
-  br label %2117
-
-; <label>:2100                                    ; preds = %2089
-  %2101 = fsub fast float %24, %2095
-  br label %2117
-
-; <label>:2102                                    ; preds = %2084
-  %2103 = fcmp fast ogt float %1019, %24
-  br i1 %2103, label %2104, label %2117
-
-; <label>:2104                                    ; preds = %2102
-  %2105 = fsub fast float %1019, %24
-  %2106 = fdiv fast float %2105, %2087
-  %2107 = fptoui float %2106 to i32
-  %2108 = uitofp i32 %2107 to float
-  %2109 = fmul fast float %2108, %2087
-  %2110 = fsub fast float %2105, %2109
-  %2111 = and i32 %2107, 1
-  %2112 = icmp eq i32 %2111, 0
-  br i1 %2112, label %2113, label %2115
-
-; <label>:2113                                    ; preds = %2104
-  %2114 = fsub fast float %24, %2110
-  br label %2117
-
-; <label>:2115                                    ; preds = %2104
-  %2116 = fadd fast float %2110, %20
-  br label %2117
-
-; <label>:2117                                    ; preds = %2115, %2113, %2102, %2100, %2098
-  %2118 = phi float [ %2099, %2098 ], [ %2101, %2100 ], [ %2114, %2113 ], [ %2116, %2115 ], [ %1019, %2102 ]
-  %2119 = fptoui float %2118 to i32
-  %2120 = uitofp i32 %2119 to float
-  %2121 = uitofp i32 %2086 to float
-  %2122 = fptoui float %45 to i32
-  %2123 = fptoui float %182 to i32
-  %2124 = fptoui float %2120 to i32
-  %2125 = fptoui float %2121 to i32
-  %2126 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2127 = extractvalue %dx.types.CBufRet.i32 %2126, 0
-  %2128 = extractvalue %dx.types.CBufRet.i32 %2126, 1
-  %2129 = extractvalue %dx.types.CBufRet.i32 %2126, 2
-  %2130 = extractvalue %dx.types.CBufRet.i32 %2126, 3
-  %2131 = mul i32 %2127, %2122
-  %2132 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2123, i32 %2128, i32 %2131)  ; IMad(a,b,c)
-  %2133 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2124, i32 %2129, i32 %2132)  ; IMad(a,b,c)
-  %2134 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2125, i32 %2130, i32 %2133)  ; IMad(a,b,c)
-  %2135 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2134, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2136 = extractvalue %dx.types.ResRet.i32 %2135, 0
-  %2137 = extractvalue %dx.types.ResRet.i32 %2135, 1
-  %2138 = zext i32 %2136 to i64
-  %2139 = zext i32 %2137 to i64
-  %2140 = shl i64 %2139, 32
-  %2141 = or i64 %2138, %2140
-  %2142 = sitofp i64 %2141 to float
-  br label %2143
-
-; <label>:2143                                    ; preds = %2117, %2051, %2017, %1995, %1985
-  %2144 = phi float [ %2014, %1995 ], [ 0.000000e+00, %1985 ], [ %2050, %2017 ], [ %2142, %2117 ], [ 0.000000e+00, %2051 ]
-  br i1 %1022, label %2145, label %2175
-
-; <label>:2145                                    ; preds = %2143
-  %2146 = fcmp fast oge float %1504, 0.000000e+00
-  %2147 = fptoui float %1504 to i32
-  %2148 = icmp ult i32 %2147, %13
-  %2149 = and i1 %2146, %2148
-  %2150 = fcmp fast oge float %1019, 0.000000e+00
-  %2151 = and i1 %2150, %2149
-  %2152 = fptoui float %1019 to i32
-  %2153 = icmp ult i32 %2152, %15
-  %2154 = and i1 %2153, %2151
-  br i1 %2154, label %2155, label %2303
-
-; <label>:2155                                    ; preds = %2145
-  %2156 = fptoui float %45 to i32
-  %2157 = fptoui float %182 to i32
-  %2158 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2159 = extractvalue %dx.types.CBufRet.i32 %2158, 0
-  %2160 = extractvalue %dx.types.CBufRet.i32 %2158, 1
-  %2161 = extractvalue %dx.types.CBufRet.i32 %2158, 2
-  %2162 = extractvalue %dx.types.CBufRet.i32 %2158, 3
-  %2163 = mul i32 %2159, %2156
-  %2164 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2157, i32 %2160, i32 %2163)  ; IMad(a,b,c)
-  %2165 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2152, i32 %2161, i32 %2164)  ; IMad(a,b,c)
-  %2166 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2147, i32 %2162, i32 %2165)  ; IMad(a,b,c)
-  %2167 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2166, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2168 = extractvalue %dx.types.ResRet.i32 %2167, 0
-  %2169 = extractvalue %dx.types.ResRet.i32 %2167, 1
-  %2170 = zext i32 %2168 to i64
-  %2171 = zext i32 %2169 to i64
-  %2172 = shl i64 %2171, 32
-  %2173 = or i64 %2170, %2172
-  %2174 = sitofp i64 %2173 to float
-  br label %2303
-
-; <label>:2175                                    ; preds = %2143
-  %2176 = icmp eq i32 %1021, 1
-  br i1 %2176, label %2177, label %2211
-
-; <label>:2177                                    ; preds = %2175
-  %2178 = add i32 %13, -1
-  %2179 = uitofp i32 %2178 to float
-  %2180 = call float @dx.op.binary.f32(i32 35, float %1504, float 0.000000e+00)  ; FMax(a,b)
-  %2181 = call float @dx.op.binary.f32(i32 36, float %2180, float %2179)  ; FMin(a,b)
-  %2182 = fptoui float %2181 to i32
-  %2183 = add i32 %15, -1
-  %2184 = uitofp i32 %2183 to float
-  %2185 = call float @dx.op.binary.f32(i32 35, float %1019, float 0.000000e+00)  ; FMax(a,b)
-  %2186 = call float @dx.op.binary.f32(i32 36, float %2185, float %2184)  ; FMin(a,b)
-  %2187 = fptoui float %2186 to i32
-  %2188 = uitofp i32 %2187 to float
-  %2189 = uitofp i32 %2182 to float
-  %2190 = fptoui float %45 to i32
-  %2191 = fptoui float %182 to i32
-  %2192 = fptoui float %2188 to i32
-  %2193 = fptoui float %2189 to i32
-  %2194 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2195 = extractvalue %dx.types.CBufRet.i32 %2194, 0
-  %2196 = extractvalue %dx.types.CBufRet.i32 %2194, 1
-  %2197 = extractvalue %dx.types.CBufRet.i32 %2194, 2
-  %2198 = extractvalue %dx.types.CBufRet.i32 %2194, 3
-  %2199 = mul i32 %2195, %2190
-  %2200 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2191, i32 %2196, i32 %2199)  ; IMad(a,b,c)
-  %2201 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2192, i32 %2197, i32 %2200)  ; IMad(a,b,c)
-  %2202 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2193, i32 %2198, i32 %2201)  ; IMad(a,b,c)
-  %2203 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2202, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2204 = extractvalue %dx.types.ResRet.i32 %2203, 0
-  %2205 = extractvalue %dx.types.ResRet.i32 %2203, 1
-  %2206 = zext i32 %2204 to i64
-  %2207 = zext i32 %2205 to i64
-  %2208 = shl i64 %2207, 32
-  %2209 = or i64 %2206, %2208
-  %2210 = sitofp i64 %2209 to float
-  br label %2303
-
-; <label>:2211                                    ; preds = %2175
-  %2212 = icmp eq i32 %1021, 2
-  br i1 %2212, label %2213, label %2303
-
-; <label>:2213                                    ; preds = %2211
-  %2214 = fsub fast float %22, %20
-  %2215 = fcmp fast olt float %1504, %20
-  br i1 %2215, label %2216, label %2229
-
-; <label>:2216                                    ; preds = %2213
-  %2217 = fsub fast float %20, %1504
-  %2218 = fdiv fast float %2217, %2214
-  %2219 = fptoui float %2218 to i32
-  %2220 = uitofp i32 %2219 to float
-  %2221 = fmul fast float %2220, %2214
-  %2222 = fsub fast float %2217, %2221
-  %2223 = and i32 %2219, 1
-  %2224 = icmp eq i32 %2223, 0
-  br i1 %2224, label %2225, label %2227
-
-; <label>:2225                                    ; preds = %2216
-  %2226 = fadd fast float %2222, %20
-  br label %2244
-
-; <label>:2227                                    ; preds = %2216
-  %2228 = fsub fast float %22, %2222
-  br label %2244
-
-; <label>:2229                                    ; preds = %2213
-  %2230 = fcmp fast ogt float %1504, %22
-  br i1 %2230, label %2231, label %2244
-
-; <label>:2231                                    ; preds = %2229
-  %2232 = fsub fast float %1504, %22
-  %2233 = fdiv fast float %2232, %2214
-  %2234 = fptoui float %2233 to i32
-  %2235 = uitofp i32 %2234 to float
-  %2236 = fmul fast float %2235, %2214
-  %2237 = fsub fast float %2232, %2236
-  %2238 = and i32 %2234, 1
-  %2239 = icmp eq i32 %2238, 0
-  br i1 %2239, label %2240, label %2242
-
-; <label>:2240                                    ; preds = %2231
-  %2241 = fsub fast float %22, %2237
-  br label %2244
-
-; <label>:2242                                    ; preds = %2231
-  %2243 = fadd fast float %2237, %20
-  br label %2244
-
-; <label>:2244                                    ; preds = %2242, %2240, %2229, %2227, %2225
-  %2245 = phi float [ %2226, %2225 ], [ %2228, %2227 ], [ %2241, %2240 ], [ %2243, %2242 ], [ %1504, %2229 ]
-  %2246 = fptoui float %2245 to i32
-  %2247 = fsub fast float %24, %20
-  %2248 = fcmp fast olt float %1019, %20
-  br i1 %2248, label %2249, label %2262
-
-; <label>:2249                                    ; preds = %2244
-  %2250 = fsub fast float %20, %1019
-  %2251 = fdiv fast float %2250, %2247
-  %2252 = fptoui float %2251 to i32
-  %2253 = uitofp i32 %2252 to float
-  %2254 = fmul fast float %2253, %2247
-  %2255 = fsub fast float %2250, %2254
-  %2256 = and i32 %2252, 1
-  %2257 = icmp eq i32 %2256, 0
-  br i1 %2257, label %2258, label %2260
-
-; <label>:2258                                    ; preds = %2249
-  %2259 = fadd fast float %2255, %20
-  br label %2277
-
-; <label>:2260                                    ; preds = %2249
-  %2261 = fsub fast float %24, %2255
-  br label %2277
-
-; <label>:2262                                    ; preds = %2244
-  %2263 = fcmp fast ogt float %1019, %24
-  br i1 %2263, label %2264, label %2277
-
-; <label>:2264                                    ; preds = %2262
-  %2265 = fsub fast float %1019, %24
-  %2266 = fdiv fast float %2265, %2247
-  %2267 = fptoui float %2266 to i32
-  %2268 = uitofp i32 %2267 to float
-  %2269 = fmul fast float %2268, %2247
-  %2270 = fsub fast float %2265, %2269
-  %2271 = and i32 %2267, 1
-  %2272 = icmp eq i32 %2271, 0
-  br i1 %2272, label %2273, label %2275
-
-; <label>:2273                                    ; preds = %2264
-  %2274 = fsub fast float %24, %2270
-  br label %2277
-
-; <label>:2275                                    ; preds = %2264
-  %2276 = fadd fast float %2270, %20
-  br label %2277
-
-; <label>:2277                                    ; preds = %2275, %2273, %2262, %2260, %2258
-  %2278 = phi float [ %2259, %2258 ], [ %2261, %2260 ], [ %2274, %2273 ], [ %2276, %2275 ], [ %1019, %2262 ]
-  %2279 = fptoui float %2278 to i32
-  %2280 = uitofp i32 %2279 to float
-  %2281 = uitofp i32 %2246 to float
-  %2282 = fptoui float %45 to i32
-  %2283 = fptoui float %182 to i32
-  %2284 = fptoui float %2280 to i32
-  %2285 = fptoui float %2281 to i32
-  %2286 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2287 = extractvalue %dx.types.CBufRet.i32 %2286, 0
-  %2288 = extractvalue %dx.types.CBufRet.i32 %2286, 1
-  %2289 = extractvalue %dx.types.CBufRet.i32 %2286, 2
-  %2290 = extractvalue %dx.types.CBufRet.i32 %2286, 3
-  %2291 = mul i32 %2287, %2282
-  %2292 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2283, i32 %2288, i32 %2291)  ; IMad(a,b,c)
-  %2293 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2284, i32 %2289, i32 %2292)  ; IMad(a,b,c)
-  %2294 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2285, i32 %2290, i32 %2293)  ; IMad(a,b,c)
-  %2295 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2294, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2296 = extractvalue %dx.types.ResRet.i32 %2295, 0
-  %2297 = extractvalue %dx.types.ResRet.i32 %2295, 1
-  %2298 = zext i32 %2296 to i64
-  %2299 = zext i32 %2297 to i64
-  %2300 = shl i64 %2299, 32
-  %2301 = or i64 %2298, %2300
-  %2302 = sitofp i64 %2301 to float
-  br label %2303
-
-; <label>:2303                                    ; preds = %2277, %2211, %2177, %2155, %2145
-  %2304 = phi float [ %2174, %2155 ], [ 0.000000e+00, %2145 ], [ %2210, %2177 ], [ %2302, %2277 ], [ 0.000000e+00, %2211 ]
-  %2305 = fadd fast float %1019, 1.000000e+00
-  br i1 %1022, label %2306, label %2336
-
-; <label>:2306                                    ; preds = %2303
-  %2307 = fcmp fast oge float %1018, 0.000000e+00
-  %2308 = fptoui float %1018 to i32
-  %2309 = icmp ult i32 %2308, %13
-  %2310 = and i1 %2307, %2309
-  %2311 = fcmp fast oge float %2305, 0.000000e+00
-  %2312 = and i1 %2311, %2310
-  %2313 = fptoui float %2305 to i32
-  %2314 = icmp ult i32 %2313, %15
-  %2315 = and i1 %2314, %2312
-  br i1 %2315, label %2316, label %2464
-
-; <label>:2316                                    ; preds = %2306
-  %2317 = fptoui float %45 to i32
-  %2318 = fptoui float %182 to i32
-  %2319 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2320 = extractvalue %dx.types.CBufRet.i32 %2319, 0
-  %2321 = extractvalue %dx.types.CBufRet.i32 %2319, 1
-  %2322 = extractvalue %dx.types.CBufRet.i32 %2319, 2
-  %2323 = extractvalue %dx.types.CBufRet.i32 %2319, 3
-  %2324 = mul i32 %2320, %2317
-  %2325 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2318, i32 %2321, i32 %2324)  ; IMad(a,b,c)
-  %2326 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2313, i32 %2322, i32 %2325)  ; IMad(a,b,c)
-  %2327 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2308, i32 %2323, i32 %2326)  ; IMad(a,b,c)
-  %2328 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2327, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2329 = extractvalue %dx.types.ResRet.i32 %2328, 0
-  %2330 = extractvalue %dx.types.ResRet.i32 %2328, 1
-  %2331 = zext i32 %2329 to i64
-  %2332 = zext i32 %2330 to i64
-  %2333 = shl i64 %2332, 32
-  %2334 = or i64 %2331, %2333
-  %2335 = sitofp i64 %2334 to float
-  br label %2464
-
-; <label>:2336                                    ; preds = %2303
-  %2337 = icmp eq i32 %1021, 1
-  br i1 %2337, label %2338, label %2372
-
-; <label>:2338                                    ; preds = %2336
-  %2339 = add i32 %13, -1
-  %2340 = uitofp i32 %2339 to float
-  %2341 = call float @dx.op.binary.f32(i32 35, float %1018, float 0.000000e+00)  ; FMax(a,b)
-  %2342 = call float @dx.op.binary.f32(i32 36, float %2341, float %2340)  ; FMin(a,b)
-  %2343 = fptoui float %2342 to i32
-  %2344 = add i32 %15, -1
-  %2345 = uitofp i32 %2344 to float
-  %2346 = call float @dx.op.binary.f32(i32 35, float %2305, float 0.000000e+00)  ; FMax(a,b)
-  %2347 = call float @dx.op.binary.f32(i32 36, float %2346, float %2345)  ; FMin(a,b)
-  %2348 = fptoui float %2347 to i32
-  %2349 = uitofp i32 %2348 to float
-  %2350 = uitofp i32 %2343 to float
-  %2351 = fptoui float %45 to i32
-  %2352 = fptoui float %182 to i32
-  %2353 = fptoui float %2349 to i32
-  %2354 = fptoui float %2350 to i32
-  %2355 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2356 = extractvalue %dx.types.CBufRet.i32 %2355, 0
-  %2357 = extractvalue %dx.types.CBufRet.i32 %2355, 1
-  %2358 = extractvalue %dx.types.CBufRet.i32 %2355, 2
-  %2359 = extractvalue %dx.types.CBufRet.i32 %2355, 3
-  %2360 = mul i32 %2356, %2351
-  %2361 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2352, i32 %2357, i32 %2360)  ; IMad(a,b,c)
-  %2362 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2353, i32 %2358, i32 %2361)  ; IMad(a,b,c)
-  %2363 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2354, i32 %2359, i32 %2362)  ; IMad(a,b,c)
-  %2364 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2363, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2365 = extractvalue %dx.types.ResRet.i32 %2364, 0
-  %2366 = extractvalue %dx.types.ResRet.i32 %2364, 1
-  %2367 = zext i32 %2365 to i64
-  %2368 = zext i32 %2366 to i64
-  %2369 = shl i64 %2368, 32
-  %2370 = or i64 %2367, %2369
-  %2371 = sitofp i64 %2370 to float
-  br label %2464
-
-; <label>:2372                                    ; preds = %2336
-  %2373 = icmp eq i32 %1021, 2
-  br i1 %2373, label %2374, label %2464
-
-; <label>:2374                                    ; preds = %2372
-  %2375 = fsub fast float %22, %20
-  %2376 = fcmp fast olt float %1018, %20
-  br i1 %2376, label %2377, label %2390
-
-; <label>:2377                                    ; preds = %2374
-  %2378 = fsub fast float %20, %1018
-  %2379 = fdiv fast float %2378, %2375
-  %2380 = fptoui float %2379 to i32
-  %2381 = uitofp i32 %2380 to float
-  %2382 = fmul fast float %2381, %2375
-  %2383 = fsub fast float %2378, %2382
-  %2384 = and i32 %2380, 1
-  %2385 = icmp eq i32 %2384, 0
-  br i1 %2385, label %2386, label %2388
-
-; <label>:2386                                    ; preds = %2377
-  %2387 = fadd fast float %2383, %20
-  br label %2405
-
-; <label>:2388                                    ; preds = %2377
-  %2389 = fsub fast float %22, %2383
-  br label %2405
-
-; <label>:2390                                    ; preds = %2374
-  %2391 = fcmp fast ogt float %1018, %22
-  br i1 %2391, label %2392, label %2405
-
-; <label>:2392                                    ; preds = %2390
-  %2393 = fsub fast float %1018, %22
-  %2394 = fdiv fast float %2393, %2375
-  %2395 = fptoui float %2394 to i32
-  %2396 = uitofp i32 %2395 to float
-  %2397 = fmul fast float %2396, %2375
-  %2398 = fsub fast float %2393, %2397
-  %2399 = and i32 %2395, 1
-  %2400 = icmp eq i32 %2399, 0
-  br i1 %2400, label %2401, label %2403
-
-; <label>:2401                                    ; preds = %2392
-  %2402 = fsub fast float %22, %2398
-  br label %2405
-
-; <label>:2403                                    ; preds = %2392
-  %2404 = fadd fast float %2398, %20
-  br label %2405
-
-; <label>:2405                                    ; preds = %2403, %2401, %2390, %2388, %2386
-  %2406 = phi float [ %2387, %2386 ], [ %2389, %2388 ], [ %2402, %2401 ], [ %2404, %2403 ], [ %1018, %2390 ]
-  %2407 = fptoui float %2406 to i32
-  %2408 = fsub fast float %24, %20
-  %2409 = fcmp fast olt float %2305, %20
-  br i1 %2409, label %2410, label %2423
-
-; <label>:2410                                    ; preds = %2405
-  %2411 = fsub fast float %20, %2305
-  %2412 = fdiv fast float %2411, %2408
-  %2413 = fptoui float %2412 to i32
-  %2414 = uitofp i32 %2413 to float
-  %2415 = fmul fast float %2414, %2408
-  %2416 = fsub fast float %2411, %2415
-  %2417 = and i32 %2413, 1
-  %2418 = icmp eq i32 %2417, 0
-  br i1 %2418, label %2419, label %2421
-
-; <label>:2419                                    ; preds = %2410
-  %2420 = fadd fast float %2416, %20
-  br label %2438
-
-; <label>:2421                                    ; preds = %2410
-  %2422 = fsub fast float %24, %2416
-  br label %2438
-
-; <label>:2423                                    ; preds = %2405
-  %2424 = fcmp fast ogt float %2305, %24
-  br i1 %2424, label %2425, label %2438
-
-; <label>:2425                                    ; preds = %2423
-  %2426 = fsub fast float %2305, %24
-  %2427 = fdiv fast float %2426, %2408
-  %2428 = fptoui float %2427 to i32
-  %2429 = uitofp i32 %2428 to float
-  %2430 = fmul fast float %2429, %2408
-  %2431 = fsub fast float %2426, %2430
-  %2432 = and i32 %2428, 1
-  %2433 = icmp eq i32 %2432, 0
-  br i1 %2433, label %2434, label %2436
-
-; <label>:2434                                    ; preds = %2425
-  %2435 = fsub fast float %24, %2431
-  br label %2438
-
-; <label>:2436                                    ; preds = %2425
-  %2437 = fadd fast float %2431, %20
-  br label %2438
-
-; <label>:2438                                    ; preds = %2436, %2434, %2423, %2421, %2419
-  %2439 = phi float [ %2420, %2419 ], [ %2422, %2421 ], [ %2435, %2434 ], [ %2437, %2436 ], [ %2305, %2423 ]
-  %2440 = fptoui float %2439 to i32
-  %2441 = uitofp i32 %2440 to float
-  %2442 = uitofp i32 %2407 to float
-  %2443 = fptoui float %45 to i32
-  %2444 = fptoui float %182 to i32
-  %2445 = fptoui float %2441 to i32
-  %2446 = fptoui float %2442 to i32
-  %2447 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2448 = extractvalue %dx.types.CBufRet.i32 %2447, 0
-  %2449 = extractvalue %dx.types.CBufRet.i32 %2447, 1
-  %2450 = extractvalue %dx.types.CBufRet.i32 %2447, 2
-  %2451 = extractvalue %dx.types.CBufRet.i32 %2447, 3
-  %2452 = mul i32 %2448, %2443
-  %2453 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2444, i32 %2449, i32 %2452)  ; IMad(a,b,c)
-  %2454 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2445, i32 %2450, i32 %2453)  ; IMad(a,b,c)
-  %2455 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2446, i32 %2451, i32 %2454)  ; IMad(a,b,c)
-  %2456 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2455, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2457 = extractvalue %dx.types.ResRet.i32 %2456, 0
-  %2458 = extractvalue %dx.types.ResRet.i32 %2456, 1
-  %2459 = zext i32 %2457 to i64
-  %2460 = zext i32 %2458 to i64
-  %2461 = shl i64 %2460, 32
-  %2462 = or i64 %2459, %2461
-  %2463 = sitofp i64 %2462 to float
-  br label %2464
-
-; <label>:2464                                    ; preds = %2438, %2372, %2338, %2316, %2306
-  %2465 = phi float [ %2335, %2316 ], [ 0.000000e+00, %2306 ], [ %2371, %2338 ], [ %2463, %2438 ], [ 0.000000e+00, %2372 ]
-  br i1 %1022, label %2466, label %2496
-
-; <label>:2466                                    ; preds = %2464
-  %2467 = fcmp fast oge float %1017, 0.000000e+00
-  %2468 = fptoui float %1017 to i32
-  %2469 = icmp ult i32 %2468, %13
-  %2470 = and i1 %2467, %2469
-  %2471 = fcmp fast oge float %2305, 0.000000e+00
-  %2472 = and i1 %2471, %2470
-  %2473 = fptoui float %2305 to i32
-  %2474 = icmp ult i32 %2473, %15
-  %2475 = and i1 %2474, %2472
-  br i1 %2475, label %2476, label %2624
-
-; <label>:2476                                    ; preds = %2466
-  %2477 = fptoui float %45 to i32
-  %2478 = fptoui float %182 to i32
-  %2479 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2480 = extractvalue %dx.types.CBufRet.i32 %2479, 0
-  %2481 = extractvalue %dx.types.CBufRet.i32 %2479, 1
-  %2482 = extractvalue %dx.types.CBufRet.i32 %2479, 2
-  %2483 = extractvalue %dx.types.CBufRet.i32 %2479, 3
-  %2484 = mul i32 %2480, %2477
-  %2485 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2478, i32 %2481, i32 %2484)  ; IMad(a,b,c)
-  %2486 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2473, i32 %2482, i32 %2485)  ; IMad(a,b,c)
-  %2487 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2468, i32 %2483, i32 %2486)  ; IMad(a,b,c)
-  %2488 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2487, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2489 = extractvalue %dx.types.ResRet.i32 %2488, 0
-  %2490 = extractvalue %dx.types.ResRet.i32 %2488, 1
-  %2491 = zext i32 %2489 to i64
-  %2492 = zext i32 %2490 to i64
-  %2493 = shl i64 %2492, 32
-  %2494 = or i64 %2491, %2493
-  %2495 = sitofp i64 %2494 to float
-  br label %2624
-
-; <label>:2496                                    ; preds = %2464
-  %2497 = icmp eq i32 %1021, 1
-  br i1 %2497, label %2498, label %2532
-
-; <label>:2498                                    ; preds = %2496
-  %2499 = add i32 %13, -1
-  %2500 = uitofp i32 %2499 to float
-  %2501 = call float @dx.op.binary.f32(i32 35, float %1017, float 0.000000e+00)  ; FMax(a,b)
-  %2502 = call float @dx.op.binary.f32(i32 36, float %2501, float %2500)  ; FMin(a,b)
-  %2503 = fptoui float %2502 to i32
-  %2504 = add i32 %15, -1
-  %2505 = uitofp i32 %2504 to float
-  %2506 = call float @dx.op.binary.f32(i32 35, float %2305, float 0.000000e+00)  ; FMax(a,b)
-  %2507 = call float @dx.op.binary.f32(i32 36, float %2506, float %2505)  ; FMin(a,b)
-  %2508 = fptoui float %2507 to i32
-  %2509 = uitofp i32 %2508 to float
-  %2510 = uitofp i32 %2503 to float
-  %2511 = fptoui float %45 to i32
-  %2512 = fptoui float %182 to i32
-  %2513 = fptoui float %2509 to i32
-  %2514 = fptoui float %2510 to i32
-  %2515 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2516 = extractvalue %dx.types.CBufRet.i32 %2515, 0
-  %2517 = extractvalue %dx.types.CBufRet.i32 %2515, 1
-  %2518 = extractvalue %dx.types.CBufRet.i32 %2515, 2
-  %2519 = extractvalue %dx.types.CBufRet.i32 %2515, 3
-  %2520 = mul i32 %2516, %2511
-  %2521 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2512, i32 %2517, i32 %2520)  ; IMad(a,b,c)
-  %2522 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2513, i32 %2518, i32 %2521)  ; IMad(a,b,c)
-  %2523 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2514, i32 %2519, i32 %2522)  ; IMad(a,b,c)
-  %2524 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2523, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2525 = extractvalue %dx.types.ResRet.i32 %2524, 0
-  %2526 = extractvalue %dx.types.ResRet.i32 %2524, 1
-  %2527 = zext i32 %2525 to i64
-  %2528 = zext i32 %2526 to i64
-  %2529 = shl i64 %2528, 32
-  %2530 = or i64 %2527, %2529
-  %2531 = sitofp i64 %2530 to float
-  br label %2624
-
-; <label>:2532                                    ; preds = %2496
-  %2533 = icmp eq i32 %1021, 2
-  br i1 %2533, label %2534, label %2624
-
-; <label>:2534                                    ; preds = %2532
-  %2535 = fsub fast float %22, %20
-  %2536 = fcmp fast olt float %1017, %20
-  br i1 %2536, label %2537, label %2550
-
-; <label>:2537                                    ; preds = %2534
-  %2538 = fsub fast float %20, %1017
-  %2539 = fdiv fast float %2538, %2535
-  %2540 = fptoui float %2539 to i32
-  %2541 = uitofp i32 %2540 to float
-  %2542 = fmul fast float %2541, %2535
-  %2543 = fsub fast float %2538, %2542
-  %2544 = and i32 %2540, 1
-  %2545 = icmp eq i32 %2544, 0
-  br i1 %2545, label %2546, label %2548
-
-; <label>:2546                                    ; preds = %2537
-  %2547 = fadd fast float %2543, %20
-  br label %2565
-
-; <label>:2548                                    ; preds = %2537
-  %2549 = fsub fast float %22, %2543
-  br label %2565
-
-; <label>:2550                                    ; preds = %2534
-  %2551 = fcmp fast ogt float %1017, %22
-  br i1 %2551, label %2552, label %2565
-
-; <label>:2552                                    ; preds = %2550
-  %2553 = fsub fast float %1017, %22
-  %2554 = fdiv fast float %2553, %2535
-  %2555 = fptoui float %2554 to i32
-  %2556 = uitofp i32 %2555 to float
-  %2557 = fmul fast float %2556, %2535
-  %2558 = fsub fast float %2553, %2557
-  %2559 = and i32 %2555, 1
-  %2560 = icmp eq i32 %2559, 0
-  br i1 %2560, label %2561, label %2563
-
-; <label>:2561                                    ; preds = %2552
-  %2562 = fsub fast float %22, %2558
-  br label %2565
-
-; <label>:2563                                    ; preds = %2552
-  %2564 = fadd fast float %2558, %20
-  br label %2565
-
-; <label>:2565                                    ; preds = %2563, %2561, %2550, %2548, %2546
-  %2566 = phi float [ %2547, %2546 ], [ %2549, %2548 ], [ %2562, %2561 ], [ %2564, %2563 ], [ %1017, %2550 ]
-  %2567 = fptoui float %2566 to i32
-  %2568 = fsub fast float %24, %20
-  %2569 = fcmp fast olt float %2305, %20
-  br i1 %2569, label %2570, label %2583
-
-; <label>:2570                                    ; preds = %2565
-  %2571 = fsub fast float %20, %2305
-  %2572 = fdiv fast float %2571, %2568
-  %2573 = fptoui float %2572 to i32
-  %2574 = uitofp i32 %2573 to float
-  %2575 = fmul fast float %2574, %2568
-  %2576 = fsub fast float %2571, %2575
-  %2577 = and i32 %2573, 1
-  %2578 = icmp eq i32 %2577, 0
-  br i1 %2578, label %2579, label %2581
-
-; <label>:2579                                    ; preds = %2570
-  %2580 = fadd fast float %2576, %20
-  br label %2598
-
-; <label>:2581                                    ; preds = %2570
-  %2582 = fsub fast float %24, %2576
-  br label %2598
-
-; <label>:2583                                    ; preds = %2565
-  %2584 = fcmp fast ogt float %2305, %24
-  br i1 %2584, label %2585, label %2598
-
-; <label>:2585                                    ; preds = %2583
-  %2586 = fsub fast float %2305, %24
-  %2587 = fdiv fast float %2586, %2568
-  %2588 = fptoui float %2587 to i32
-  %2589 = uitofp i32 %2588 to float
-  %2590 = fmul fast float %2589, %2568
-  %2591 = fsub fast float %2586, %2590
-  %2592 = and i32 %2588, 1
-  %2593 = icmp eq i32 %2592, 0
-  br i1 %2593, label %2594, label %2596
-
-; <label>:2594                                    ; preds = %2585
-  %2595 = fsub fast float %24, %2591
-  br label %2598
-
-; <label>:2596                                    ; preds = %2585
-  %2597 = fadd fast float %2591, %20
-  br label %2598
-
-; <label>:2598                                    ; preds = %2596, %2594, %2583, %2581, %2579
-  %2599 = phi float [ %2580, %2579 ], [ %2582, %2581 ], [ %2595, %2594 ], [ %2597, %2596 ], [ %2305, %2583 ]
-  %2600 = fptoui float %2599 to i32
-  %2601 = uitofp i32 %2600 to float
-  %2602 = uitofp i32 %2567 to float
-  %2603 = fptoui float %45 to i32
-  %2604 = fptoui float %182 to i32
-  %2605 = fptoui float %2601 to i32
-  %2606 = fptoui float %2602 to i32
-  %2607 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2608 = extractvalue %dx.types.CBufRet.i32 %2607, 0
-  %2609 = extractvalue %dx.types.CBufRet.i32 %2607, 1
-  %2610 = extractvalue %dx.types.CBufRet.i32 %2607, 2
-  %2611 = extractvalue %dx.types.CBufRet.i32 %2607, 3
-  %2612 = mul i32 %2608, %2603
-  %2613 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2604, i32 %2609, i32 %2612)  ; IMad(a,b,c)
-  %2614 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2605, i32 %2610, i32 %2613)  ; IMad(a,b,c)
-  %2615 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2606, i32 %2611, i32 %2614)  ; IMad(a,b,c)
-  %2616 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2615, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2617 = extractvalue %dx.types.ResRet.i32 %2616, 0
-  %2618 = extractvalue %dx.types.ResRet.i32 %2616, 1
-  %2619 = zext i32 %2617 to i64
-  %2620 = zext i32 %2618 to i64
-  %2621 = shl i64 %2620, 32
-  %2622 = or i64 %2619, %2621
-  %2623 = sitofp i64 %2622 to float
-  br label %2624
-
-; <label>:2624                                    ; preds = %2598, %2532, %2498, %2476, %2466
-  %2625 = phi float [ %2495, %2476 ], [ 0.000000e+00, %2466 ], [ %2531, %2498 ], [ %2623, %2598 ], [ 0.000000e+00, %2532 ]
-  br i1 %1022, label %2626, label %2656
-
-; <label>:2626                                    ; preds = %2624
-  %2627 = fcmp fast oge float %1343, 0.000000e+00
-  %2628 = fptoui float %1343 to i32
-  %2629 = icmp ult i32 %2628, %13
-  %2630 = and i1 %2627, %2629
-  %2631 = fcmp fast oge float %2305, 0.000000e+00
-  %2632 = and i1 %2631, %2630
-  %2633 = fptoui float %2305 to i32
-  %2634 = icmp ult i32 %2633, %15
-  %2635 = and i1 %2634, %2632
-  br i1 %2635, label %2636, label %2784
-
-; <label>:2636                                    ; preds = %2626
-  %2637 = fptoui float %45 to i32
-  %2638 = fptoui float %182 to i32
-  %2639 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2640 = extractvalue %dx.types.CBufRet.i32 %2639, 0
-  %2641 = extractvalue %dx.types.CBufRet.i32 %2639, 1
-  %2642 = extractvalue %dx.types.CBufRet.i32 %2639, 2
-  %2643 = extractvalue %dx.types.CBufRet.i32 %2639, 3
-  %2644 = mul i32 %2640, %2637
-  %2645 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2638, i32 %2641, i32 %2644)  ; IMad(a,b,c)
-  %2646 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2633, i32 %2642, i32 %2645)  ; IMad(a,b,c)
-  %2647 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2628, i32 %2643, i32 %2646)  ; IMad(a,b,c)
-  %2648 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2647, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2649 = extractvalue %dx.types.ResRet.i32 %2648, 0
-  %2650 = extractvalue %dx.types.ResRet.i32 %2648, 1
-  %2651 = zext i32 %2649 to i64
-  %2652 = zext i32 %2650 to i64
-  %2653 = shl i64 %2652, 32
-  %2654 = or i64 %2651, %2653
-  %2655 = sitofp i64 %2654 to float
-  br label %2784
-
-; <label>:2656                                    ; preds = %2624
-  %2657 = icmp eq i32 %1021, 1
-  br i1 %2657, label %2658, label %2692
-
-; <label>:2658                                    ; preds = %2656
-  %2659 = add i32 %13, -1
-  %2660 = uitofp i32 %2659 to float
-  %2661 = call float @dx.op.binary.f32(i32 35, float %1343, float 0.000000e+00)  ; FMax(a,b)
-  %2662 = call float @dx.op.binary.f32(i32 36, float %2661, float %2660)  ; FMin(a,b)
-  %2663 = fptoui float %2662 to i32
-  %2664 = add i32 %15, -1
-  %2665 = uitofp i32 %2664 to float
-  %2666 = call float @dx.op.binary.f32(i32 35, float %2305, float 0.000000e+00)  ; FMax(a,b)
-  %2667 = call float @dx.op.binary.f32(i32 36, float %2666, float %2665)  ; FMin(a,b)
-  %2668 = fptoui float %2667 to i32
-  %2669 = uitofp i32 %2668 to float
-  %2670 = uitofp i32 %2663 to float
-  %2671 = fptoui float %45 to i32
-  %2672 = fptoui float %182 to i32
-  %2673 = fptoui float %2669 to i32
-  %2674 = fptoui float %2670 to i32
-  %2675 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2676 = extractvalue %dx.types.CBufRet.i32 %2675, 0
-  %2677 = extractvalue %dx.types.CBufRet.i32 %2675, 1
-  %2678 = extractvalue %dx.types.CBufRet.i32 %2675, 2
-  %2679 = extractvalue %dx.types.CBufRet.i32 %2675, 3
-  %2680 = mul i32 %2676, %2671
-  %2681 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2672, i32 %2677, i32 %2680)  ; IMad(a,b,c)
-  %2682 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2673, i32 %2678, i32 %2681)  ; IMad(a,b,c)
-  %2683 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2674, i32 %2679, i32 %2682)  ; IMad(a,b,c)
-  %2684 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2683, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2685 = extractvalue %dx.types.ResRet.i32 %2684, 0
-  %2686 = extractvalue %dx.types.ResRet.i32 %2684, 1
-  %2687 = zext i32 %2685 to i64
-  %2688 = zext i32 %2686 to i64
-  %2689 = shl i64 %2688, 32
-  %2690 = or i64 %2687, %2689
-  %2691 = sitofp i64 %2690 to float
-  br label %2784
-
-; <label>:2692                                    ; preds = %2656
-  %2693 = icmp eq i32 %1021, 2
-  br i1 %2693, label %2694, label %2784
-
-; <label>:2694                                    ; preds = %2692
-  %2695 = fsub fast float %22, %20
-  %2696 = fcmp fast olt float %1343, %20
-  br i1 %2696, label %2697, label %2710
-
-; <label>:2697                                    ; preds = %2694
-  %2698 = fsub fast float %20, %1343
-  %2699 = fdiv fast float %2698, %2695
-  %2700 = fptoui float %2699 to i32
-  %2701 = uitofp i32 %2700 to float
-  %2702 = fmul fast float %2701, %2695
-  %2703 = fsub fast float %2698, %2702
-  %2704 = and i32 %2700, 1
-  %2705 = icmp eq i32 %2704, 0
-  br i1 %2705, label %2706, label %2708
-
-; <label>:2706                                    ; preds = %2697
-  %2707 = fadd fast float %2703, %20
-  br label %2725
-
-; <label>:2708                                    ; preds = %2697
-  %2709 = fsub fast float %22, %2703
-  br label %2725
-
-; <label>:2710                                    ; preds = %2694
-  %2711 = fcmp fast ogt float %1343, %22
-  br i1 %2711, label %2712, label %2725
-
-; <label>:2712                                    ; preds = %2710
-  %2713 = fsub fast float %1343, %22
-  %2714 = fdiv fast float %2713, %2695
-  %2715 = fptoui float %2714 to i32
-  %2716 = uitofp i32 %2715 to float
-  %2717 = fmul fast float %2716, %2695
-  %2718 = fsub fast float %2713, %2717
-  %2719 = and i32 %2715, 1
-  %2720 = icmp eq i32 %2719, 0
-  br i1 %2720, label %2721, label %2723
-
-; <label>:2721                                    ; preds = %2712
-  %2722 = fsub fast float %22, %2718
-  br label %2725
-
-; <label>:2723                                    ; preds = %2712
-  %2724 = fadd fast float %2718, %20
-  br label %2725
-
-; <label>:2725                                    ; preds = %2723, %2721, %2710, %2708, %2706
-  %2726 = phi float [ %2707, %2706 ], [ %2709, %2708 ], [ %2722, %2721 ], [ %2724, %2723 ], [ %1343, %2710 ]
-  %2727 = fptoui float %2726 to i32
-  %2728 = fsub fast float %24, %20
-  %2729 = fcmp fast olt float %2305, %20
-  br i1 %2729, label %2730, label %2743
-
-; <label>:2730                                    ; preds = %2725
-  %2731 = fsub fast float %20, %2305
-  %2732 = fdiv fast float %2731, %2728
-  %2733 = fptoui float %2732 to i32
-  %2734 = uitofp i32 %2733 to float
-  %2735 = fmul fast float %2734, %2728
-  %2736 = fsub fast float %2731, %2735
-  %2737 = and i32 %2733, 1
-  %2738 = icmp eq i32 %2737, 0
-  br i1 %2738, label %2739, label %2741
-
-; <label>:2739                                    ; preds = %2730
-  %2740 = fadd fast float %2736, %20
-  br label %2758
-
-; <label>:2741                                    ; preds = %2730
-  %2742 = fsub fast float %24, %2736
-  br label %2758
-
-; <label>:2743                                    ; preds = %2725
-  %2744 = fcmp fast ogt float %2305, %24
-  br i1 %2744, label %2745, label %2758
-
-; <label>:2745                                    ; preds = %2743
-  %2746 = fsub fast float %2305, %24
-  %2747 = fdiv fast float %2746, %2728
-  %2748 = fptoui float %2747 to i32
-  %2749 = uitofp i32 %2748 to float
-  %2750 = fmul fast float %2749, %2728
-  %2751 = fsub fast float %2746, %2750
-  %2752 = and i32 %2748, 1
-  %2753 = icmp eq i32 %2752, 0
-  br i1 %2753, label %2754, label %2756
-
-; <label>:2754                                    ; preds = %2745
-  %2755 = fsub fast float %24, %2751
-  br label %2758
-
-; <label>:2756                                    ; preds = %2745
-  %2757 = fadd fast float %2751, %20
-  br label %2758
-
-; <label>:2758                                    ; preds = %2756, %2754, %2743, %2741, %2739
-  %2759 = phi float [ %2740, %2739 ], [ %2742, %2741 ], [ %2755, %2754 ], [ %2757, %2756 ], [ %2305, %2743 ]
-  %2760 = fptoui float %2759 to i32
-  %2761 = uitofp i32 %2760 to float
-  %2762 = uitofp i32 %2727 to float
-  %2763 = fptoui float %45 to i32
-  %2764 = fptoui float %182 to i32
-  %2765 = fptoui float %2761 to i32
-  %2766 = fptoui float %2762 to i32
-  %2767 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2768 = extractvalue %dx.types.CBufRet.i32 %2767, 0
-  %2769 = extractvalue %dx.types.CBufRet.i32 %2767, 1
-  %2770 = extractvalue %dx.types.CBufRet.i32 %2767, 2
-  %2771 = extractvalue %dx.types.CBufRet.i32 %2767, 3
-  %2772 = mul i32 %2768, %2763
-  %2773 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2764, i32 %2769, i32 %2772)  ; IMad(a,b,c)
-  %2774 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2765, i32 %2770, i32 %2773)  ; IMad(a,b,c)
-  %2775 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2766, i32 %2771, i32 %2774)  ; IMad(a,b,c)
-  %2776 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2775, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2777 = extractvalue %dx.types.ResRet.i32 %2776, 0
-  %2778 = extractvalue %dx.types.ResRet.i32 %2776, 1
-  %2779 = zext i32 %2777 to i64
-  %2780 = zext i32 %2778 to i64
-  %2781 = shl i64 %2780, 32
-  %2782 = or i64 %2779, %2781
-  %2783 = sitofp i64 %2782 to float
-  br label %2784
-
-; <label>:2784                                    ; preds = %2758, %2692, %2658, %2636, %2626
-  %2785 = phi float [ %2655, %2636 ], [ 0.000000e+00, %2626 ], [ %2691, %2658 ], [ %2783, %2758 ], [ 0.000000e+00, %2692 ]
-  br i1 %1022, label %2786, label %2816
-
-; <label>:2786                                    ; preds = %2784
-  %2787 = fcmp fast oge float %1504, 0.000000e+00
-  %2788 = fptoui float %1504 to i32
-  %2789 = icmp ult i32 %2788, %13
-  %2790 = and i1 %2787, %2789
-  %2791 = fcmp fast oge float %2305, 0.000000e+00
-  %2792 = and i1 %2791, %2790
-  %2793 = fptoui float %2305 to i32
-  %2794 = icmp ult i32 %2793, %15
-  %2795 = and i1 %2794, %2792
-  br i1 %2795, label %2796, label %2944
-
-; <label>:2796                                    ; preds = %2786
-  %2797 = fptoui float %45 to i32
-  %2798 = fptoui float %182 to i32
-  %2799 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2800 = extractvalue %dx.types.CBufRet.i32 %2799, 0
-  %2801 = extractvalue %dx.types.CBufRet.i32 %2799, 1
-  %2802 = extractvalue %dx.types.CBufRet.i32 %2799, 2
-  %2803 = extractvalue %dx.types.CBufRet.i32 %2799, 3
-  %2804 = mul i32 %2800, %2797
-  %2805 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2798, i32 %2801, i32 %2804)  ; IMad(a,b,c)
-  %2806 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2793, i32 %2802, i32 %2805)  ; IMad(a,b,c)
-  %2807 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2788, i32 %2803, i32 %2806)  ; IMad(a,b,c)
-  %2808 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2807, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2809 = extractvalue %dx.types.ResRet.i32 %2808, 0
-  %2810 = extractvalue %dx.types.ResRet.i32 %2808, 1
-  %2811 = zext i32 %2809 to i64
-  %2812 = zext i32 %2810 to i64
-  %2813 = shl i64 %2812, 32
-  %2814 = or i64 %2811, %2813
-  %2815 = sitofp i64 %2814 to float
-  br label %2944
-
-; <label>:2816                                    ; preds = %2784
-  %2817 = icmp eq i32 %1021, 1
-  br i1 %2817, label %2818, label %2852
-
-; <label>:2818                                    ; preds = %2816
-  %2819 = add i32 %13, -1
-  %2820 = uitofp i32 %2819 to float
-  %2821 = call float @dx.op.binary.f32(i32 35, float %1504, float 0.000000e+00)  ; FMax(a,b)
-  %2822 = call float @dx.op.binary.f32(i32 36, float %2821, float %2820)  ; FMin(a,b)
-  %2823 = fptoui float %2822 to i32
-  %2824 = add i32 %15, -1
-  %2825 = uitofp i32 %2824 to float
-  %2826 = call float @dx.op.binary.f32(i32 35, float %2305, float 0.000000e+00)  ; FMax(a,b)
-  %2827 = call float @dx.op.binary.f32(i32 36, float %2826, float %2825)  ; FMin(a,b)
-  %2828 = fptoui float %2827 to i32
-  %2829 = uitofp i32 %2828 to float
-  %2830 = uitofp i32 %2823 to float
-  %2831 = fptoui float %45 to i32
-  %2832 = fptoui float %182 to i32
-  %2833 = fptoui float %2829 to i32
-  %2834 = fptoui float %2830 to i32
-  %2835 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2836 = extractvalue %dx.types.CBufRet.i32 %2835, 0
-  %2837 = extractvalue %dx.types.CBufRet.i32 %2835, 1
-  %2838 = extractvalue %dx.types.CBufRet.i32 %2835, 2
-  %2839 = extractvalue %dx.types.CBufRet.i32 %2835, 3
-  %2840 = mul i32 %2836, %2831
-  %2841 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2832, i32 %2837, i32 %2840)  ; IMad(a,b,c)
-  %2842 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2833, i32 %2838, i32 %2841)  ; IMad(a,b,c)
-  %2843 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2834, i32 %2839, i32 %2842)  ; IMad(a,b,c)
-  %2844 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2843, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2845 = extractvalue %dx.types.ResRet.i32 %2844, 0
-  %2846 = extractvalue %dx.types.ResRet.i32 %2844, 1
-  %2847 = zext i32 %2845 to i64
-  %2848 = zext i32 %2846 to i64
-  %2849 = shl i64 %2848, 32
-  %2850 = or i64 %2847, %2849
-  %2851 = sitofp i64 %2850 to float
-  br label %2944
-
-; <label>:2852                                    ; preds = %2816
-  %2853 = icmp eq i32 %1021, 2
-  br i1 %2853, label %2854, label %2944
-
-; <label>:2854                                    ; preds = %2852
-  %2855 = fsub fast float %22, %20
-  %2856 = fcmp fast olt float %1504, %20
-  br i1 %2856, label %2857, label %2870
-
-; <label>:2857                                    ; preds = %2854
-  %2858 = fsub fast float %20, %1504
-  %2859 = fdiv fast float %2858, %2855
-  %2860 = fptoui float %2859 to i32
-  %2861 = uitofp i32 %2860 to float
-  %2862 = fmul fast float %2861, %2855
-  %2863 = fsub fast float %2858, %2862
-  %2864 = and i32 %2860, 1
-  %2865 = icmp eq i32 %2864, 0
-  br i1 %2865, label %2866, label %2868
-
-; <label>:2866                                    ; preds = %2857
-  %2867 = fadd fast float %2863, %20
-  br label %2885
-
-; <label>:2868                                    ; preds = %2857
-  %2869 = fsub fast float %22, %2863
-  br label %2885
-
-; <label>:2870                                    ; preds = %2854
-  %2871 = fcmp fast ogt float %1504, %22
-  br i1 %2871, label %2872, label %2885
-
-; <label>:2872                                    ; preds = %2870
-  %2873 = fsub fast float %1504, %22
-  %2874 = fdiv fast float %2873, %2855
-  %2875 = fptoui float %2874 to i32
-  %2876 = uitofp i32 %2875 to float
-  %2877 = fmul fast float %2876, %2855
-  %2878 = fsub fast float %2873, %2877
-  %2879 = and i32 %2875, 1
-  %2880 = icmp eq i32 %2879, 0
-  br i1 %2880, label %2881, label %2883
-
-; <label>:2881                                    ; preds = %2872
-  %2882 = fsub fast float %22, %2878
-  br label %2885
-
-; <label>:2883                                    ; preds = %2872
-  %2884 = fadd fast float %2878, %20
-  br label %2885
-
-; <label>:2885                                    ; preds = %2883, %2881, %2870, %2868, %2866
-  %2886 = phi float [ %2867, %2866 ], [ %2869, %2868 ], [ %2882, %2881 ], [ %2884, %2883 ], [ %1504, %2870 ]
-  %2887 = fptoui float %2886 to i32
-  %2888 = fsub fast float %24, %20
-  %2889 = fcmp fast olt float %2305, %20
-  br i1 %2889, label %2890, label %2903
-
-; <label>:2890                                    ; preds = %2885
-  %2891 = fsub fast float %20, %2305
-  %2892 = fdiv fast float %2891, %2888
-  %2893 = fptoui float %2892 to i32
-  %2894 = uitofp i32 %2893 to float
-  %2895 = fmul fast float %2894, %2888
-  %2896 = fsub fast float %2891, %2895
-  %2897 = and i32 %2893, 1
-  %2898 = icmp eq i32 %2897, 0
-  br i1 %2898, label %2899, label %2901
-
-; <label>:2899                                    ; preds = %2890
-  %2900 = fadd fast float %2896, %20
-  br label %2918
-
-; <label>:2901                                    ; preds = %2890
-  %2902 = fsub fast float %24, %2896
-  br label %2918
-
-; <label>:2903                                    ; preds = %2885
-  %2904 = fcmp fast ogt float %2305, %24
-  br i1 %2904, label %2905, label %2918
-
-; <label>:2905                                    ; preds = %2903
-  %2906 = fsub fast float %2305, %24
-  %2907 = fdiv fast float %2906, %2888
-  %2908 = fptoui float %2907 to i32
-  %2909 = uitofp i32 %2908 to float
-  %2910 = fmul fast float %2909, %2888
-  %2911 = fsub fast float %2906, %2910
-  %2912 = and i32 %2908, 1
-  %2913 = icmp eq i32 %2912, 0
-  br i1 %2913, label %2914, label %2916
-
-; <label>:2914                                    ; preds = %2905
-  %2915 = fsub fast float %24, %2911
-  br label %2918
-
-; <label>:2916                                    ; preds = %2905
-  %2917 = fadd fast float %2911, %20
-  br label %2918
-
-; <label>:2918                                    ; preds = %2916, %2914, %2903, %2901, %2899
-  %2919 = phi float [ %2900, %2899 ], [ %2902, %2901 ], [ %2915, %2914 ], [ %2917, %2916 ], [ %2305, %2903 ]
-  %2920 = fptoui float %2919 to i32
-  %2921 = uitofp i32 %2920 to float
-  %2922 = uitofp i32 %2887 to float
-  %2923 = fptoui float %45 to i32
-  %2924 = fptoui float %182 to i32
-  %2925 = fptoui float %2921 to i32
-  %2926 = fptoui float %2922 to i32
-  %2927 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2928 = extractvalue %dx.types.CBufRet.i32 %2927, 0
-  %2929 = extractvalue %dx.types.CBufRet.i32 %2927, 1
-  %2930 = extractvalue %dx.types.CBufRet.i32 %2927, 2
-  %2931 = extractvalue %dx.types.CBufRet.i32 %2927, 3
-  %2932 = mul i32 %2928, %2923
-  %2933 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2924, i32 %2929, i32 %2932)  ; IMad(a,b,c)
-  %2934 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2925, i32 %2930, i32 %2933)  ; IMad(a,b,c)
-  %2935 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2926, i32 %2931, i32 %2934)  ; IMad(a,b,c)
-  %2936 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2935, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2937 = extractvalue %dx.types.ResRet.i32 %2936, 0
-  %2938 = extractvalue %dx.types.ResRet.i32 %2936, 1
-  %2939 = zext i32 %2937 to i64
-  %2940 = zext i32 %2938 to i64
-  %2941 = shl i64 %2940, 32
-  %2942 = or i64 %2939, %2941
-  %2943 = sitofp i64 %2942 to float
-  br label %2944
-
-; <label>:2944                                    ; preds = %2918, %2852, %2818, %2796, %2786
-  %2945 = phi float [ %2815, %2796 ], [ 0.000000e+00, %2786 ], [ %2851, %2818 ], [ %2943, %2918 ], [ 0.000000e+00, %2852 ]
-  %2946 = fadd fast float %1019, 2.000000e+00
-  br i1 %1022, label %2947, label %2977
-
-; <label>:2947                                    ; preds = %2944
-  %2948 = fcmp fast oge float %1018, 0.000000e+00
-  %2949 = fptoui float %1018 to i32
-  %2950 = icmp ult i32 %2949, %13
-  %2951 = and i1 %2948, %2950
-  %2952 = fcmp fast oge float %2946, 0.000000e+00
-  %2953 = and i1 %2952, %2951
-  %2954 = fptoui float %2946 to i32
-  %2955 = icmp ult i32 %2954, %15
-  %2956 = and i1 %2955, %2953
-  br i1 %2956, label %2957, label %3105
-
-; <label>:2957                                    ; preds = %2947
-  %2958 = fptoui float %45 to i32
-  %2959 = fptoui float %182 to i32
-  %2960 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2961 = extractvalue %dx.types.CBufRet.i32 %2960, 0
-  %2962 = extractvalue %dx.types.CBufRet.i32 %2960, 1
-  %2963 = extractvalue %dx.types.CBufRet.i32 %2960, 2
-  %2964 = extractvalue %dx.types.CBufRet.i32 %2960, 3
-  %2965 = mul i32 %2961, %2958
-  %2966 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2959, i32 %2962, i32 %2965)  ; IMad(a,b,c)
-  %2967 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2954, i32 %2963, i32 %2966)  ; IMad(a,b,c)
-  %2968 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2949, i32 %2964, i32 %2967)  ; IMad(a,b,c)
-  %2969 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2968, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2970 = extractvalue %dx.types.ResRet.i32 %2969, 0
-  %2971 = extractvalue %dx.types.ResRet.i32 %2969, 1
-  %2972 = zext i32 %2970 to i64
-  %2973 = zext i32 %2971 to i64
-  %2974 = shl i64 %2973, 32
-  %2975 = or i64 %2972, %2974
-  %2976 = sitofp i64 %2975 to float
-  br label %3105
-
-; <label>:2977                                    ; preds = %2944
-  %2978 = icmp eq i32 %1021, 1
-  br i1 %2978, label %2979, label %3013
-
-; <label>:2979                                    ; preds = %2977
-  %2980 = add i32 %13, -1
-  %2981 = uitofp i32 %2980 to float
-  %2982 = call float @dx.op.binary.f32(i32 35, float %1018, float 0.000000e+00)  ; FMax(a,b)
-  %2983 = call float @dx.op.binary.f32(i32 36, float %2982, float %2981)  ; FMin(a,b)
-  %2984 = fptoui float %2983 to i32
-  %2985 = add i32 %15, -1
-  %2986 = uitofp i32 %2985 to float
-  %2987 = call float @dx.op.binary.f32(i32 35, float %2946, float 0.000000e+00)  ; FMax(a,b)
-  %2988 = call float @dx.op.binary.f32(i32 36, float %2987, float %2986)  ; FMin(a,b)
-  %2989 = fptoui float %2988 to i32
-  %2990 = uitofp i32 %2989 to float
-  %2991 = uitofp i32 %2984 to float
-  %2992 = fptoui float %45 to i32
-  %2993 = fptoui float %182 to i32
-  %2994 = fptoui float %2990 to i32
-  %2995 = fptoui float %2991 to i32
-  %2996 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2997 = extractvalue %dx.types.CBufRet.i32 %2996, 0
-  %2998 = extractvalue %dx.types.CBufRet.i32 %2996, 1
-  %2999 = extractvalue %dx.types.CBufRet.i32 %2996, 2
-  %3000 = extractvalue %dx.types.CBufRet.i32 %2996, 3
-  %3001 = mul i32 %2997, %2992
-  %3002 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2993, i32 %2998, i32 %3001)  ; IMad(a,b,c)
-  %3003 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2994, i32 %2999, i32 %3002)  ; IMad(a,b,c)
-  %3004 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2995, i32 %3000, i32 %3003)  ; IMad(a,b,c)
-  %3005 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3004, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3006 = extractvalue %dx.types.ResRet.i32 %3005, 0
-  %3007 = extractvalue %dx.types.ResRet.i32 %3005, 1
-  %3008 = zext i32 %3006 to i64
-  %3009 = zext i32 %3007 to i64
-  %3010 = shl i64 %3009, 32
-  %3011 = or i64 %3008, %3010
-  %3012 = sitofp i64 %3011 to float
-  br label %3105
-
-; <label>:3013                                    ; preds = %2977
-  %3014 = icmp eq i32 %1021, 2
-  br i1 %3014, label %3015, label %3105
-
-; <label>:3015                                    ; preds = %3013
-  %3016 = fsub fast float %22, %20
-  %3017 = fcmp fast olt float %1018, %20
-  br i1 %3017, label %3018, label %3031
-
-; <label>:3018                                    ; preds = %3015
-  %3019 = fsub fast float %20, %1018
-  %3020 = fdiv fast float %3019, %3016
-  %3021 = fptoui float %3020 to i32
-  %3022 = uitofp i32 %3021 to float
-  %3023 = fmul fast float %3022, %3016
-  %3024 = fsub fast float %3019, %3023
-  %3025 = and i32 %3021, 1
-  %3026 = icmp eq i32 %3025, 0
-  br i1 %3026, label %3027, label %3029
-
-; <label>:3027                                    ; preds = %3018
-  %3028 = fadd fast float %3024, %20
-  br label %3046
-
-; <label>:3029                                    ; preds = %3018
-  %3030 = fsub fast float %22, %3024
-  br label %3046
-
-; <label>:3031                                    ; preds = %3015
-  %3032 = fcmp fast ogt float %1018, %22
-  br i1 %3032, label %3033, label %3046
-
-; <label>:3033                                    ; preds = %3031
-  %3034 = fsub fast float %1018, %22
-  %3035 = fdiv fast float %3034, %3016
-  %3036 = fptoui float %3035 to i32
-  %3037 = uitofp i32 %3036 to float
-  %3038 = fmul fast float %3037, %3016
-  %3039 = fsub fast float %3034, %3038
-  %3040 = and i32 %3036, 1
-  %3041 = icmp eq i32 %3040, 0
-  br i1 %3041, label %3042, label %3044
-
-; <label>:3042                                    ; preds = %3033
-  %3043 = fsub fast float %22, %3039
-  br label %3046
-
-; <label>:3044                                    ; preds = %3033
-  %3045 = fadd fast float %3039, %20
-  br label %3046
-
-; <label>:3046                                    ; preds = %3044, %3042, %3031, %3029, %3027
-  %3047 = phi float [ %3028, %3027 ], [ %3030, %3029 ], [ %3043, %3042 ], [ %3045, %3044 ], [ %1018, %3031 ]
-  %3048 = fptoui float %3047 to i32
-  %3049 = fsub fast float %24, %20
-  %3050 = fcmp fast olt float %2946, %20
-  br i1 %3050, label %3051, label %3064
-
-; <label>:3051                                    ; preds = %3046
-  %3052 = fsub fast float %20, %2946
-  %3053 = fdiv fast float %3052, %3049
-  %3054 = fptoui float %3053 to i32
-  %3055 = uitofp i32 %3054 to float
-  %3056 = fmul fast float %3055, %3049
-  %3057 = fsub fast float %3052, %3056
-  %3058 = and i32 %3054, 1
-  %3059 = icmp eq i32 %3058, 0
-  br i1 %3059, label %3060, label %3062
-
-; <label>:3060                                    ; preds = %3051
-  %3061 = fadd fast float %3057, %20
-  br label %3079
-
-; <label>:3062                                    ; preds = %3051
-  %3063 = fsub fast float %24, %3057
-  br label %3079
-
-; <label>:3064                                    ; preds = %3046
-  %3065 = fcmp fast ogt float %2946, %24
-  br i1 %3065, label %3066, label %3079
-
-; <label>:3066                                    ; preds = %3064
-  %3067 = fsub fast float %2946, %24
-  %3068 = fdiv fast float %3067, %3049
-  %3069 = fptoui float %3068 to i32
-  %3070 = uitofp i32 %3069 to float
-  %3071 = fmul fast float %3070, %3049
-  %3072 = fsub fast float %3067, %3071
-  %3073 = and i32 %3069, 1
-  %3074 = icmp eq i32 %3073, 0
-  br i1 %3074, label %3075, label %3077
-
-; <label>:3075                                    ; preds = %3066
-  %3076 = fsub fast float %24, %3072
-  br label %3079
-
-; <label>:3077                                    ; preds = %3066
-  %3078 = fadd fast float %3072, %20
-  br label %3079
-
-; <label>:3079                                    ; preds = %3077, %3075, %3064, %3062, %3060
-  %3080 = phi float [ %3061, %3060 ], [ %3063, %3062 ], [ %3076, %3075 ], [ %3078, %3077 ], [ %2946, %3064 ]
-  %3081 = fptoui float %3080 to i32
-  %3082 = uitofp i32 %3081 to float
-  %3083 = uitofp i32 %3048 to float
-  %3084 = fptoui float %45 to i32
-  %3085 = fptoui float %182 to i32
-  %3086 = fptoui float %3082 to i32
-  %3087 = fptoui float %3083 to i32
-  %3088 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3089 = extractvalue %dx.types.CBufRet.i32 %3088, 0
-  %3090 = extractvalue %dx.types.CBufRet.i32 %3088, 1
-  %3091 = extractvalue %dx.types.CBufRet.i32 %3088, 2
-  %3092 = extractvalue %dx.types.CBufRet.i32 %3088, 3
-  %3093 = mul i32 %3089, %3084
-  %3094 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3085, i32 %3090, i32 %3093)  ; IMad(a,b,c)
-  %3095 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3086, i32 %3091, i32 %3094)  ; IMad(a,b,c)
-  %3096 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3087, i32 %3092, i32 %3095)  ; IMad(a,b,c)
-  %3097 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3096, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3098 = extractvalue %dx.types.ResRet.i32 %3097, 0
-  %3099 = extractvalue %dx.types.ResRet.i32 %3097, 1
-  %3100 = zext i32 %3098 to i64
-  %3101 = zext i32 %3099 to i64
-  %3102 = shl i64 %3101, 32
-  %3103 = or i64 %3100, %3102
-  %3104 = sitofp i64 %3103 to float
-  br label %3105
-
-; <label>:3105                                    ; preds = %3079, %3013, %2979, %2957, %2947
-  %3106 = phi float [ %2976, %2957 ], [ 0.000000e+00, %2947 ], [ %3012, %2979 ], [ %3104, %3079 ], [ 0.000000e+00, %3013 ]
-  br i1 %1022, label %3107, label %3137
-
-; <label>:3107                                    ; preds = %3105
-  %3108 = fcmp fast oge float %1017, 0.000000e+00
-  %3109 = fptoui float %1017 to i32
-  %3110 = icmp ult i32 %3109, %13
-  %3111 = and i1 %3108, %3110
-  %3112 = fcmp fast oge float %2946, 0.000000e+00
-  %3113 = and i1 %3112, %3111
-  %3114 = fptoui float %2946 to i32
-  %3115 = icmp ult i32 %3114, %15
-  %3116 = and i1 %3115, %3113
-  br i1 %3116, label %3117, label %3265
-
-; <label>:3117                                    ; preds = %3107
-  %3118 = fptoui float %45 to i32
-  %3119 = fptoui float %182 to i32
-  %3120 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3121 = extractvalue %dx.types.CBufRet.i32 %3120, 0
-  %3122 = extractvalue %dx.types.CBufRet.i32 %3120, 1
-  %3123 = extractvalue %dx.types.CBufRet.i32 %3120, 2
-  %3124 = extractvalue %dx.types.CBufRet.i32 %3120, 3
-  %3125 = mul i32 %3121, %3118
-  %3126 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3119, i32 %3122, i32 %3125)  ; IMad(a,b,c)
-  %3127 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3114, i32 %3123, i32 %3126)  ; IMad(a,b,c)
-  %3128 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3109, i32 %3124, i32 %3127)  ; IMad(a,b,c)
-  %3129 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3128, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3130 = extractvalue %dx.types.ResRet.i32 %3129, 0
-  %3131 = extractvalue %dx.types.ResRet.i32 %3129, 1
-  %3132 = zext i32 %3130 to i64
-  %3133 = zext i32 %3131 to i64
-  %3134 = shl i64 %3133, 32
-  %3135 = or i64 %3132, %3134
-  %3136 = sitofp i64 %3135 to float
-  br label %3265
-
-; <label>:3137                                    ; preds = %3105
-  %3138 = icmp eq i32 %1021, 1
-  br i1 %3138, label %3139, label %3173
-
-; <label>:3139                                    ; preds = %3137
-  %3140 = add i32 %13, -1
-  %3141 = uitofp i32 %3140 to float
-  %3142 = call float @dx.op.binary.f32(i32 35, float %1017, float 0.000000e+00)  ; FMax(a,b)
-  %3143 = call float @dx.op.binary.f32(i32 36, float %3142, float %3141)  ; FMin(a,b)
-  %3144 = fptoui float %3143 to i32
-  %3145 = add i32 %15, -1
-  %3146 = uitofp i32 %3145 to float
-  %3147 = call float @dx.op.binary.f32(i32 35, float %2946, float 0.000000e+00)  ; FMax(a,b)
-  %3148 = call float @dx.op.binary.f32(i32 36, float %3147, float %3146)  ; FMin(a,b)
-  %3149 = fptoui float %3148 to i32
-  %3150 = uitofp i32 %3149 to float
-  %3151 = uitofp i32 %3144 to float
-  %3152 = fptoui float %45 to i32
-  %3153 = fptoui float %182 to i32
-  %3154 = fptoui float %3150 to i32
-  %3155 = fptoui float %3151 to i32
-  %3156 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3157 = extractvalue %dx.types.CBufRet.i32 %3156, 0
-  %3158 = extractvalue %dx.types.CBufRet.i32 %3156, 1
-  %3159 = extractvalue %dx.types.CBufRet.i32 %3156, 2
-  %3160 = extractvalue %dx.types.CBufRet.i32 %3156, 3
-  %3161 = mul i32 %3157, %3152
-  %3162 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3153, i32 %3158, i32 %3161)  ; IMad(a,b,c)
-  %3163 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3154, i32 %3159, i32 %3162)  ; IMad(a,b,c)
-  %3164 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3155, i32 %3160, i32 %3163)  ; IMad(a,b,c)
-  %3165 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3164, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3166 = extractvalue %dx.types.ResRet.i32 %3165, 0
-  %3167 = extractvalue %dx.types.ResRet.i32 %3165, 1
-  %3168 = zext i32 %3166 to i64
-  %3169 = zext i32 %3167 to i64
-  %3170 = shl i64 %3169, 32
-  %3171 = or i64 %3168, %3170
-  %3172 = sitofp i64 %3171 to float
-  br label %3265
-
-; <label>:3173                                    ; preds = %3137
-  %3174 = icmp eq i32 %1021, 2
-  br i1 %3174, label %3175, label %3265
-
-; <label>:3175                                    ; preds = %3173
-  %3176 = fsub fast float %22, %20
-  %3177 = fcmp fast olt float %1017, %20
-  br i1 %3177, label %3178, label %3191
-
-; <label>:3178                                    ; preds = %3175
-  %3179 = fsub fast float %20, %1017
-  %3180 = fdiv fast float %3179, %3176
-  %3181 = fptoui float %3180 to i32
-  %3182 = uitofp i32 %3181 to float
-  %3183 = fmul fast float %3182, %3176
-  %3184 = fsub fast float %3179, %3183
-  %3185 = and i32 %3181, 1
-  %3186 = icmp eq i32 %3185, 0
-  br i1 %3186, label %3187, label %3189
-
-; <label>:3187                                    ; preds = %3178
-  %3188 = fadd fast float %3184, %20
-  br label %3206
-
-; <label>:3189                                    ; preds = %3178
-  %3190 = fsub fast float %22, %3184
-  br label %3206
-
-; <label>:3191                                    ; preds = %3175
-  %3192 = fcmp fast ogt float %1017, %22
-  br i1 %3192, label %3193, label %3206
-
-; <label>:3193                                    ; preds = %3191
-  %3194 = fsub fast float %1017, %22
-  %3195 = fdiv fast float %3194, %3176
-  %3196 = fptoui float %3195 to i32
-  %3197 = uitofp i32 %3196 to float
-  %3198 = fmul fast float %3197, %3176
-  %3199 = fsub fast float %3194, %3198
-  %3200 = and i32 %3196, 1
-  %3201 = icmp eq i32 %3200, 0
-  br i1 %3201, label %3202, label %3204
-
-; <label>:3202                                    ; preds = %3193
-  %3203 = fsub fast float %22, %3199
-  br label %3206
-
-; <label>:3204                                    ; preds = %3193
-  %3205 = fadd fast float %3199, %20
-  br label %3206
-
-; <label>:3206                                    ; preds = %3204, %3202, %3191, %3189, %3187
-  %3207 = phi float [ %3188, %3187 ], [ %3190, %3189 ], [ %3203, %3202 ], [ %3205, %3204 ], [ %1017, %3191 ]
-  %3208 = fptoui float %3207 to i32
-  %3209 = fsub fast float %24, %20
-  %3210 = fcmp fast olt float %2946, %20
-  br i1 %3210, label %3211, label %3224
-
-; <label>:3211                                    ; preds = %3206
-  %3212 = fsub fast float %20, %2946
-  %3213 = fdiv fast float %3212, %3209
-  %3214 = fptoui float %3213 to i32
-  %3215 = uitofp i32 %3214 to float
-  %3216 = fmul fast float %3215, %3209
-  %3217 = fsub fast float %3212, %3216
-  %3218 = and i32 %3214, 1
-  %3219 = icmp eq i32 %3218, 0
-  br i1 %3219, label %3220, label %3222
-
-; <label>:3220                                    ; preds = %3211
-  %3221 = fadd fast float %3217, %20
-  br label %3239
-
-; <label>:3222                                    ; preds = %3211
-  %3223 = fsub fast float %24, %3217
-  br label %3239
-
-; <label>:3224                                    ; preds = %3206
-  %3225 = fcmp fast ogt float %2946, %24
-  br i1 %3225, label %3226, label %3239
-
-; <label>:3226                                    ; preds = %3224
-  %3227 = fsub fast float %2946, %24
-  %3228 = fdiv fast float %3227, %3209
-  %3229 = fptoui float %3228 to i32
-  %3230 = uitofp i32 %3229 to float
-  %3231 = fmul fast float %3230, %3209
-  %3232 = fsub fast float %3227, %3231
-  %3233 = and i32 %3229, 1
-  %3234 = icmp eq i32 %3233, 0
-  br i1 %3234, label %3235, label %3237
-
-; <label>:3235                                    ; preds = %3226
-  %3236 = fsub fast float %24, %3232
-  br label %3239
-
-; <label>:3237                                    ; preds = %3226
-  %3238 = fadd fast float %3232, %20
-  br label %3239
-
-; <label>:3239                                    ; preds = %3237, %3235, %3224, %3222, %3220
-  %3240 = phi float [ %3221, %3220 ], [ %3223, %3222 ], [ %3236, %3235 ], [ %3238, %3237 ], [ %2946, %3224 ]
-  %3241 = fptoui float %3240 to i32
-  %3242 = uitofp i32 %3241 to float
-  %3243 = uitofp i32 %3208 to float
-  %3244 = fptoui float %45 to i32
-  %3245 = fptoui float %182 to i32
-  %3246 = fptoui float %3242 to i32
-  %3247 = fptoui float %3243 to i32
-  %3248 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3249 = extractvalue %dx.types.CBufRet.i32 %3248, 0
-  %3250 = extractvalue %dx.types.CBufRet.i32 %3248, 1
-  %3251 = extractvalue %dx.types.CBufRet.i32 %3248, 2
-  %3252 = extractvalue %dx.types.CBufRet.i32 %3248, 3
-  %3253 = mul i32 %3249, %3244
-  %3254 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3245, i32 %3250, i32 %3253)  ; IMad(a,b,c)
-  %3255 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3246, i32 %3251, i32 %3254)  ; IMad(a,b,c)
-  %3256 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3247, i32 %3252, i32 %3255)  ; IMad(a,b,c)
-  %3257 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3256, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3258 = extractvalue %dx.types.ResRet.i32 %3257, 0
-  %3259 = extractvalue %dx.types.ResRet.i32 %3257, 1
-  %3260 = zext i32 %3258 to i64
-  %3261 = zext i32 %3259 to i64
-  %3262 = shl i64 %3261, 32
-  %3263 = or i64 %3260, %3262
-  %3264 = sitofp i64 %3263 to float
-  br label %3265
-
-; <label>:3265                                    ; preds = %3239, %3173, %3139, %3117, %3107
-  %3266 = phi float [ %3136, %3117 ], [ 0.000000e+00, %3107 ], [ %3172, %3139 ], [ %3264, %3239 ], [ 0.000000e+00, %3173 ]
-  br i1 %1022, label %3267, label %3297
-
-; <label>:3267                                    ; preds = %3265
-  %3268 = fcmp fast oge float %1343, 0.000000e+00
-  %3269 = fptoui float %1343 to i32
-  %3270 = icmp ult i32 %3269, %13
-  %3271 = and i1 %3268, %3270
-  %3272 = fcmp fast oge float %2946, 0.000000e+00
-  %3273 = and i1 %3272, %3271
-  %3274 = fptoui float %2946 to i32
-  %3275 = icmp ult i32 %3274, %15
-  %3276 = and i1 %3275, %3273
-  br i1 %3276, label %3277, label %3425
-
-; <label>:3277                                    ; preds = %3267
-  %3278 = fptoui float %45 to i32
-  %3279 = fptoui float %182 to i32
-  %3280 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3281 = extractvalue %dx.types.CBufRet.i32 %3280, 0
-  %3282 = extractvalue %dx.types.CBufRet.i32 %3280, 1
-  %3283 = extractvalue %dx.types.CBufRet.i32 %3280, 2
-  %3284 = extractvalue %dx.types.CBufRet.i32 %3280, 3
-  %3285 = mul i32 %3281, %3278
-  %3286 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3279, i32 %3282, i32 %3285)  ; IMad(a,b,c)
-  %3287 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3274, i32 %3283, i32 %3286)  ; IMad(a,b,c)
-  %3288 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3269, i32 %3284, i32 %3287)  ; IMad(a,b,c)
-  %3289 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3288, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3290 = extractvalue %dx.types.ResRet.i32 %3289, 0
-  %3291 = extractvalue %dx.types.ResRet.i32 %3289, 1
-  %3292 = zext i32 %3290 to i64
-  %3293 = zext i32 %3291 to i64
-  %3294 = shl i64 %3293, 32
-  %3295 = or i64 %3292, %3294
-  %3296 = sitofp i64 %3295 to float
-  br label %3425
-
-; <label>:3297                                    ; preds = %3265
-  %3298 = icmp eq i32 %1021, 1
-  br i1 %3298, label %3299, label %3333
-
-; <label>:3299                                    ; preds = %3297
-  %3300 = add i32 %13, -1
-  %3301 = uitofp i32 %3300 to float
-  %3302 = call float @dx.op.binary.f32(i32 35, float %1343, float 0.000000e+00)  ; FMax(a,b)
-  %3303 = call float @dx.op.binary.f32(i32 36, float %3302, float %3301)  ; FMin(a,b)
-  %3304 = fptoui float %3303 to i32
-  %3305 = add i32 %15, -1
-  %3306 = uitofp i32 %3305 to float
-  %3307 = call float @dx.op.binary.f32(i32 35, float %2946, float 0.000000e+00)  ; FMax(a,b)
-  %3308 = call float @dx.op.binary.f32(i32 36, float %3307, float %3306)  ; FMin(a,b)
-  %3309 = fptoui float %3308 to i32
-  %3310 = uitofp i32 %3309 to float
-  %3311 = uitofp i32 %3304 to float
-  %3312 = fptoui float %45 to i32
-  %3313 = fptoui float %182 to i32
-  %3314 = fptoui float %3310 to i32
-  %3315 = fptoui float %3311 to i32
-  %3316 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3317 = extractvalue %dx.types.CBufRet.i32 %3316, 0
-  %3318 = extractvalue %dx.types.CBufRet.i32 %3316, 1
-  %3319 = extractvalue %dx.types.CBufRet.i32 %3316, 2
-  %3320 = extractvalue %dx.types.CBufRet.i32 %3316, 3
-  %3321 = mul i32 %3317, %3312
-  %3322 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3313, i32 %3318, i32 %3321)  ; IMad(a,b,c)
-  %3323 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3314, i32 %3319, i32 %3322)  ; IMad(a,b,c)
-  %3324 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3315, i32 %3320, i32 %3323)  ; IMad(a,b,c)
-  %3325 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3324, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3326 = extractvalue %dx.types.ResRet.i32 %3325, 0
-  %3327 = extractvalue %dx.types.ResRet.i32 %3325, 1
-  %3328 = zext i32 %3326 to i64
-  %3329 = zext i32 %3327 to i64
-  %3330 = shl i64 %3329, 32
-  %3331 = or i64 %3328, %3330
-  %3332 = sitofp i64 %3331 to float
-  br label %3425
-
-; <label>:3333                                    ; preds = %3297
-  %3334 = icmp eq i32 %1021, 2
-  br i1 %3334, label %3335, label %3425
-
-; <label>:3335                                    ; preds = %3333
-  %3336 = fsub fast float %22, %20
-  %3337 = fcmp fast olt float %1343, %20
-  br i1 %3337, label %3338, label %3351
-
-; <label>:3338                                    ; preds = %3335
-  %3339 = fsub fast float %20, %1343
-  %3340 = fdiv fast float %3339, %3336
-  %3341 = fptoui float %3340 to i32
-  %3342 = uitofp i32 %3341 to float
-  %3343 = fmul fast float %3342, %3336
-  %3344 = fsub fast float %3339, %3343
-  %3345 = and i32 %3341, 1
-  %3346 = icmp eq i32 %3345, 0
-  br i1 %3346, label %3347, label %3349
-
-; <label>:3347                                    ; preds = %3338
-  %3348 = fadd fast float %3344, %20
-  br label %3366
-
-; <label>:3349                                    ; preds = %3338
-  %3350 = fsub fast float %22, %3344
-  br label %3366
-
-; <label>:3351                                    ; preds = %3335
-  %3352 = fcmp fast ogt float %1343, %22
-  br i1 %3352, label %3353, label %3366
-
-; <label>:3353                                    ; preds = %3351
-  %3354 = fsub fast float %1343, %22
-  %3355 = fdiv fast float %3354, %3336
-  %3356 = fptoui float %3355 to i32
-  %3357 = uitofp i32 %3356 to float
-  %3358 = fmul fast float %3357, %3336
-  %3359 = fsub fast float %3354, %3358
-  %3360 = and i32 %3356, 1
-  %3361 = icmp eq i32 %3360, 0
-  br i1 %3361, label %3362, label %3364
-
-; <label>:3362                                    ; preds = %3353
-  %3363 = fsub fast float %22, %3359
-  br label %3366
-
-; <label>:3364                                    ; preds = %3353
-  %3365 = fadd fast float %3359, %20
-  br label %3366
-
-; <label>:3366                                    ; preds = %3364, %3362, %3351, %3349, %3347
-  %3367 = phi float [ %3348, %3347 ], [ %3350, %3349 ], [ %3363, %3362 ], [ %3365, %3364 ], [ %1343, %3351 ]
-  %3368 = fptoui float %3367 to i32
-  %3369 = fsub fast float %24, %20
-  %3370 = fcmp fast olt float %2946, %20
-  br i1 %3370, label %3371, label %3384
-
-; <label>:3371                                    ; preds = %3366
-  %3372 = fsub fast float %20, %2946
-  %3373 = fdiv fast float %3372, %3369
-  %3374 = fptoui float %3373 to i32
-  %3375 = uitofp i32 %3374 to float
-  %3376 = fmul fast float %3375, %3369
-  %3377 = fsub fast float %3372, %3376
-  %3378 = and i32 %3374, 1
-  %3379 = icmp eq i32 %3378, 0
-  br i1 %3379, label %3380, label %3382
-
-; <label>:3380                                    ; preds = %3371
-  %3381 = fadd fast float %3377, %20
-  br label %3399
-
-; <label>:3382                                    ; preds = %3371
-  %3383 = fsub fast float %24, %3377
-  br label %3399
-
-; <label>:3384                                    ; preds = %3366
-  %3385 = fcmp fast ogt float %2946, %24
-  br i1 %3385, label %3386, label %3399
-
-; <label>:3386                                    ; preds = %3384
-  %3387 = fsub fast float %2946, %24
-  %3388 = fdiv fast float %3387, %3369
-  %3389 = fptoui float %3388 to i32
-  %3390 = uitofp i32 %3389 to float
-  %3391 = fmul fast float %3390, %3369
-  %3392 = fsub fast float %3387, %3391
-  %3393 = and i32 %3389, 1
-  %3394 = icmp eq i32 %3393, 0
-  br i1 %3394, label %3395, label %3397
-
-; <label>:3395                                    ; preds = %3386
-  %3396 = fsub fast float %24, %3392
-  br label %3399
-
-; <label>:3397                                    ; preds = %3386
-  %3398 = fadd fast float %3392, %20
-  br label %3399
-
-; <label>:3399                                    ; preds = %3397, %3395, %3384, %3382, %3380
-  %3400 = phi float [ %3381, %3380 ], [ %3383, %3382 ], [ %3396, %3395 ], [ %3398, %3397 ], [ %2946, %3384 ]
-  %3401 = fptoui float %3400 to i32
-  %3402 = uitofp i32 %3401 to float
-  %3403 = uitofp i32 %3368 to float
-  %3404 = fptoui float %45 to i32
-  %3405 = fptoui float %182 to i32
-  %3406 = fptoui float %3402 to i32
-  %3407 = fptoui float %3403 to i32
-  %3408 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3409 = extractvalue %dx.types.CBufRet.i32 %3408, 0
-  %3410 = extractvalue %dx.types.CBufRet.i32 %3408, 1
-  %3411 = extractvalue %dx.types.CBufRet.i32 %3408, 2
-  %3412 = extractvalue %dx.types.CBufRet.i32 %3408, 3
-  %3413 = mul i32 %3409, %3404
-  %3414 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3405, i32 %3410, i32 %3413)  ; IMad(a,b,c)
-  %3415 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3406, i32 %3411, i32 %3414)  ; IMad(a,b,c)
-  %3416 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3407, i32 %3412, i32 %3415)  ; IMad(a,b,c)
-  %3417 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3416, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3418 = extractvalue %dx.types.ResRet.i32 %3417, 0
-  %3419 = extractvalue %dx.types.ResRet.i32 %3417, 1
-  %3420 = zext i32 %3418 to i64
-  %3421 = zext i32 %3419 to i64
-  %3422 = shl i64 %3421, 32
-  %3423 = or i64 %3420, %3422
-  %3424 = sitofp i64 %3423 to float
-  br label %3425
-
-; <label>:3425                                    ; preds = %3399, %3333, %3299, %3277, %3267
-  %3426 = phi float [ %3296, %3277 ], [ 0.000000e+00, %3267 ], [ %3332, %3299 ], [ %3424, %3399 ], [ 0.000000e+00, %3333 ]
-  br i1 %1022, label %3427, label %3457
-
-; <label>:3427                                    ; preds = %3425
-  %3428 = fcmp fast oge float %1504, 0.000000e+00
-  %3429 = fptoui float %1504 to i32
-  %3430 = icmp ult i32 %3429, %13
-  %3431 = and i1 %3428, %3430
-  %3432 = fcmp fast oge float %2946, 0.000000e+00
-  %3433 = and i1 %3432, %3431
-  %3434 = fptoui float %2946 to i32
-  %3435 = icmp ult i32 %3434, %15
-  %3436 = and i1 %3435, %3433
-  br i1 %3436, label %3437, label %3585
-
-; <label>:3437                                    ; preds = %3427
-  %3438 = fptoui float %45 to i32
-  %3439 = fptoui float %182 to i32
-  %3440 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3441 = extractvalue %dx.types.CBufRet.i32 %3440, 0
-  %3442 = extractvalue %dx.types.CBufRet.i32 %3440, 1
-  %3443 = extractvalue %dx.types.CBufRet.i32 %3440, 2
-  %3444 = extractvalue %dx.types.CBufRet.i32 %3440, 3
-  %3445 = mul i32 %3441, %3438
-  %3446 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3439, i32 %3442, i32 %3445)  ; IMad(a,b,c)
-  %3447 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3434, i32 %3443, i32 %3446)  ; IMad(a,b,c)
-  %3448 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3429, i32 %3444, i32 %3447)  ; IMad(a,b,c)
-  %3449 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3448, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3450 = extractvalue %dx.types.ResRet.i32 %3449, 0
-  %3451 = extractvalue %dx.types.ResRet.i32 %3449, 1
-  %3452 = zext i32 %3450 to i64
-  %3453 = zext i32 %3451 to i64
-  %3454 = shl i64 %3453, 32
-  %3455 = or i64 %3452, %3454
-  %3456 = sitofp i64 %3455 to float
-  br label %3585
-
-; <label>:3457                                    ; preds = %3425
-  %3458 = icmp eq i32 %1021, 1
-  br i1 %3458, label %3459, label %3493
-
-; <label>:3459                                    ; preds = %3457
-  %3460 = add i32 %13, -1
-  %3461 = uitofp i32 %3460 to float
-  %3462 = call float @dx.op.binary.f32(i32 35, float %1504, float 0.000000e+00)  ; FMax(a,b)
-  %3463 = call float @dx.op.binary.f32(i32 36, float %3462, float %3461)  ; FMin(a,b)
-  %3464 = fptoui float %3463 to i32
-  %3465 = add i32 %15, -1
-  %3466 = uitofp i32 %3465 to float
-  %3467 = call float @dx.op.binary.f32(i32 35, float %2946, float 0.000000e+00)  ; FMax(a,b)
-  %3468 = call float @dx.op.binary.f32(i32 36, float %3467, float %3466)  ; FMin(a,b)
-  %3469 = fptoui float %3468 to i32
-  %3470 = uitofp i32 %3469 to float
-  %3471 = uitofp i32 %3464 to float
-  %3472 = fptoui float %45 to i32
-  %3473 = fptoui float %182 to i32
-  %3474 = fptoui float %3470 to i32
-  %3475 = fptoui float %3471 to i32
-  %3476 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3477 = extractvalue %dx.types.CBufRet.i32 %3476, 0
-  %3478 = extractvalue %dx.types.CBufRet.i32 %3476, 1
-  %3479 = extractvalue %dx.types.CBufRet.i32 %3476, 2
-  %3480 = extractvalue %dx.types.CBufRet.i32 %3476, 3
-  %3481 = mul i32 %3477, %3472
-  %3482 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3473, i32 %3478, i32 %3481)  ; IMad(a,b,c)
-  %3483 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3474, i32 %3479, i32 %3482)  ; IMad(a,b,c)
-  %3484 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3475, i32 %3480, i32 %3483)  ; IMad(a,b,c)
-  %3485 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3484, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3486 = extractvalue %dx.types.ResRet.i32 %3485, 0
-  %3487 = extractvalue %dx.types.ResRet.i32 %3485, 1
-  %3488 = zext i32 %3486 to i64
-  %3489 = zext i32 %3487 to i64
-  %3490 = shl i64 %3489, 32
-  %3491 = or i64 %3488, %3490
-  %3492 = sitofp i64 %3491 to float
-  br label %3585
-
-; <label>:3493                                    ; preds = %3457
-  %3494 = icmp eq i32 %1021, 2
-  br i1 %3494, label %3495, label %3585
-
-; <label>:3495                                    ; preds = %3493
-  %3496 = fsub fast float %22, %20
-  %3497 = fcmp fast olt float %1504, %20
-  br i1 %3497, label %3498, label %3511
-
-; <label>:3498                                    ; preds = %3495
-  %3499 = fsub fast float %20, %1504
-  %3500 = fdiv fast float %3499, %3496
-  %3501 = fptoui float %3500 to i32
-  %3502 = uitofp i32 %3501 to float
-  %3503 = fmul fast float %3502, %3496
-  %3504 = fsub fast float %3499, %3503
-  %3505 = and i32 %3501, 1
-  %3506 = icmp eq i32 %3505, 0
-  br i1 %3506, label %3507, label %3509
-
-; <label>:3507                                    ; preds = %3498
-  %3508 = fadd fast float %3504, %20
-  br label %3526
-
-; <label>:3509                                    ; preds = %3498
-  %3510 = fsub fast float %22, %3504
-  br label %3526
-
-; <label>:3511                                    ; preds = %3495
-  %3512 = fcmp fast ogt float %1504, %22
-  br i1 %3512, label %3513, label %3526
-
-; <label>:3513                                    ; preds = %3511
-  %3514 = fsub fast float %1504, %22
-  %3515 = fdiv fast float %3514, %3496
-  %3516 = fptoui float %3515 to i32
-  %3517 = uitofp i32 %3516 to float
-  %3518 = fmul fast float %3517, %3496
-  %3519 = fsub fast float %3514, %3518
-  %3520 = and i32 %3516, 1
-  %3521 = icmp eq i32 %3520, 0
-  br i1 %3521, label %3522, label %3524
-
-; <label>:3522                                    ; preds = %3513
-  %3523 = fsub fast float %22, %3519
-  br label %3526
-
-; <label>:3524                                    ; preds = %3513
-  %3525 = fadd fast float %3519, %20
-  br label %3526
-
-; <label>:3526                                    ; preds = %3524, %3522, %3511, %3509, %3507
-  %3527 = phi float [ %3508, %3507 ], [ %3510, %3509 ], [ %3523, %3522 ], [ %3525, %3524 ], [ %1504, %3511 ]
-  %3528 = fptoui float %3527 to i32
-  %3529 = fsub fast float %24, %20
-  %3530 = fcmp fast olt float %2946, %20
-  br i1 %3530, label %3531, label %3544
-
-; <label>:3531                                    ; preds = %3526
-  %3532 = fsub fast float %20, %2946
-  %3533 = fdiv fast float %3532, %3529
-  %3534 = fptoui float %3533 to i32
-  %3535 = uitofp i32 %3534 to float
-  %3536 = fmul fast float %3535, %3529
-  %3537 = fsub fast float %3532, %3536
-  %3538 = and i32 %3534, 1
-  %3539 = icmp eq i32 %3538, 0
-  br i1 %3539, label %3540, label %3542
-
-; <label>:3540                                    ; preds = %3531
-  %3541 = fadd fast float %3537, %20
-  br label %3559
-
-; <label>:3542                                    ; preds = %3531
-  %3543 = fsub fast float %24, %3537
-  br label %3559
-
-; <label>:3544                                    ; preds = %3526
-  %3545 = fcmp fast ogt float %2946, %24
-  br i1 %3545, label %3546, label %3559
-
-; <label>:3546                                    ; preds = %3544
-  %3547 = fsub fast float %2946, %24
-  %3548 = fdiv fast float %3547, %3529
-  %3549 = fptoui float %3548 to i32
-  %3550 = uitofp i32 %3549 to float
-  %3551 = fmul fast float %3550, %3529
-  %3552 = fsub fast float %3547, %3551
-  %3553 = and i32 %3549, 1
-  %3554 = icmp eq i32 %3553, 0
-  br i1 %3554, label %3555, label %3557
-
-; <label>:3555                                    ; preds = %3546
-  %3556 = fsub fast float %24, %3552
-  br label %3559
-
-; <label>:3557                                    ; preds = %3546
-  %3558 = fadd fast float %3552, %20
-  br label %3559
-
-; <label>:3559                                    ; preds = %3557, %3555, %3544, %3542, %3540
-  %3560 = phi float [ %3541, %3540 ], [ %3543, %3542 ], [ %3556, %3555 ], [ %3558, %3557 ], [ %2946, %3544 ]
-  %3561 = fptoui float %3560 to i32
-  %3562 = uitofp i32 %3561 to float
-  %3563 = uitofp i32 %3528 to float
-  %3564 = fptoui float %45 to i32
-  %3565 = fptoui float %182 to i32
-  %3566 = fptoui float %3562 to i32
-  %3567 = fptoui float %3563 to i32
-  %3568 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3569 = extractvalue %dx.types.CBufRet.i32 %3568, 0
-  %3570 = extractvalue %dx.types.CBufRet.i32 %3568, 1
-  %3571 = extractvalue %dx.types.CBufRet.i32 %3568, 2
-  %3572 = extractvalue %dx.types.CBufRet.i32 %3568, 3
-  %3573 = mul i32 %3569, %3564
-  %3574 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3565, i32 %3570, i32 %3573)  ; IMad(a,b,c)
-  %3575 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3566, i32 %3571, i32 %3574)  ; IMad(a,b,c)
-  %3576 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3567, i32 %3572, i32 %3575)  ; IMad(a,b,c)
-  %3577 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3576, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3578 = extractvalue %dx.types.ResRet.i32 %3577, 0
-  %3579 = extractvalue %dx.types.ResRet.i32 %3577, 1
-  %3580 = zext i32 %3578 to i64
-  %3581 = zext i32 %3579 to i64
-  %3582 = shl i64 %3581, 32
-  %3583 = or i64 %3580, %3582
-  %3584 = sitofp i64 %3583 to float
-  br label %3585
-
-; <label>:3585                                    ; preds = %3559, %3493, %3459, %3437, %3427
-  %3586 = phi float [ %3456, %3437 ], [ 0.000000e+00, %3427 ], [ %3492, %3459 ], [ %3584, %3559 ], [ 0.000000e+00, %3493 ]
-  %3587 = call float @dx.op.unary.f32(i32 22, float %180)  ; Frc(value)
-  %3588 = call float @dx.op.unary.f32(i32 22, float %181)  ; Frc(value)
-  %3589 = fmul fast float %3588, %3588
-  %3590 = fmul fast float %3589, %3588
-  %3591 = fmul fast float %1182, -7.500000e-01
-  %3592 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2465, float %3591)  ; FMad(a,b,c)
-  %3593 = fmul fast float %1182, 1.500000e+00
-  %3594 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1824, float %3593)  ; FMad(a,b,c)
-  %3595 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2465, float %3594)  ; FMad(a,b,c)
-  %3596 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3106, float %3595)  ; FMad(a,b,c)
-  %3597 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1824, float %3591)  ; FMad(a,b,c)
-  %3598 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2465, float %3597)  ; FMad(a,b,c)
-  %3599 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3106, float %3598)  ; FMad(a,b,c)
-  %3600 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3588, float %3589, float %3590, float %1824, float %3592, float %3596, float %3599)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3601 = fmul fast float %1342, -7.500000e-01
-  %3602 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2625, float %3601)  ; FMad(a,b,c)
-  %3603 = fmul fast float %1342, 1.500000e+00
-  %3604 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1984, float %3603)  ; FMad(a,b,c)
-  %3605 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2625, float %3604)  ; FMad(a,b,c)
-  %3606 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3266, float %3605)  ; FMad(a,b,c)
-  %3607 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1984, float %3601)  ; FMad(a,b,c)
-  %3608 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2625, float %3607)  ; FMad(a,b,c)
-  %3609 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3266, float %3608)  ; FMad(a,b,c)
-  %3610 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3588, float %3589, float %3590, float %1984, float %3602, float %3606, float %3609)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3611 = fmul fast float %1503, -7.500000e-01
-  %3612 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2785, float %3611)  ; FMad(a,b,c)
-  %3613 = fmul fast float %1503, 1.500000e+00
-  %3614 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %2144, float %3613)  ; FMad(a,b,c)
-  %3615 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2785, float %3614)  ; FMad(a,b,c)
-  %3616 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3426, float %3615)  ; FMad(a,b,c)
-  %3617 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %2144, float %3611)  ; FMad(a,b,c)
-  %3618 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2785, float %3617)  ; FMad(a,b,c)
-  %3619 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3426, float %3618)  ; FMad(a,b,c)
-  %3620 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3588, float %3589, float %3590, float %2144, float %3612, float %3616, float %3619)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3621 = fmul fast float %1664, -7.500000e-01
-  %3622 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2945, float %3621)  ; FMad(a,b,c)
-  %3623 = fmul fast float %1664, 1.500000e+00
-  %3624 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %2304, float %3623)  ; FMad(a,b,c)
-  %3625 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2945, float %3624)  ; FMad(a,b,c)
-  %3626 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3586, float %3625)  ; FMad(a,b,c)
-  %3627 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %2304, float %3621)  ; FMad(a,b,c)
-  %3628 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2945, float %3627)  ; FMad(a,b,c)
-  %3629 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3586, float %3628)  ; FMad(a,b,c)
-  %3630 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3588, float %3589, float %3590, float %2304, float %3622, float %3626, float %3629)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3631 = fmul fast float %3587, %3587
-  %3632 = fmul fast float %3631, %3587
-  %3633 = fmul fast float %3600, -7.500000e-01
-  %3634 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3620, float %3633)  ; FMad(a,b,c)
-  %3635 = fmul fast float %3600, 1.500000e+00
-  %3636 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %3610, float %3635)  ; FMad(a,b,c)
-  %3637 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %3620, float %3636)  ; FMad(a,b,c)
-  %3638 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3630, float %3637)  ; FMad(a,b,c)
-  %3639 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %3610, float %3633)  ; FMad(a,b,c)
-  %3640 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %3620, float %3639)  ; FMad(a,b,c)
-  %3641 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3630, float %3640)  ; FMad(a,b,c)
-  %3642 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3587, float %3631, float %3632, float %3610, float %3634, float %3638, float %3641)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3643 = fptosi float %3642 to i64
-  %3644 = trunc i64 %3643 to i32
-  %3645 = lshr i64 %3643, 32
-  %3646 = trunc i64 %3645 to i32
-  call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i32 %3644, i32 %3646, i32 undef, i32 undef, i8 3, i32 8)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3647
-
-; <label>:3647                                    ; preds = %3585, %1014, %997, %344, %0
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.threadId.i32(i32, i32) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.unary.f32(i32, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.tertiary.f32(i32, float, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.binary.f32(i32, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.dot4.f32(i32, float, float, float, float, float, float, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.tertiary.i32(i32, i32, i32, i32) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32, %dx.types.Handle, i32) #1
-
-; Function Attrs: nounwind readonly
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32, %dx.types.Handle, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.rawBufferStore.i32(i32, %dx.types.Handle, i32, i32, i32, i32, i32, i32, i8, i32) #2
-
-; Function Attrs: nounwind readnone
-declare double @dx.op.makeDouble.f64(i32, i32, i32) #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.valver = !{!2}
-!dx.shaderModel = !{!3}
-!dx.resources = !{!4}
-!dx.entryPoints = !{!12}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 1, i32 2}
-!2 = !{i32 1, i32 6}
-!3 = !{!"cs", i32 6, i32 2}
-!4 = !{null, !5, !10, null}
-!5 = !{!6, !8, !9}
-!6 = !{i32 0, %"class.RWStructuredBuffer<long long>"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !7}
-!7 = !{i32 1, i32 8}
-!8 = !{i32 1, %"class.RWStructuredBuffer<double>"* undef, !"", i32 0, i32 1, i32 1, i32 12, i1 false, i1 false, i1 false, !7}
-!9 = !{i32 2, %"class.RWStructuredBuffer<long long>"* undef, !"", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !7}
-!10 = !{!11}
-!11 = !{i32 0, %Constants* undef, !"", i32 0, i32 0, i32 1, i32 116, null}
-!12 = !{void ()* @GridSample, !"GridSample", null, !4, !13}
-!13 = !{i32 0, i64 9437204, i32 4, !14}
-!14 = !{i32 64, i32 1, i32 1}
-
-#endif
-
-const unsigned char g_GridSample[] = {
-  0x44, 0x58, 0x42, 0x43, 0x7a, 0xa4, 0xa3, 0x19, 0x65, 0x76, 0x99, 0xf1,
-  0xd0, 0x85, 0x8d, 0x9e, 0xad, 0xef, 0xee, 0x6f, 0x01, 0x00, 0x00, 0x00,
-  0x64, 0x58, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
-  0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
-  0x18, 0x01, 0x00, 0x00, 0x34, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30,
-  0x08, 0x00, 0x00, 0x00, 0x01, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30,
-  0xa8, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-  0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0xd9, 0x69, 0xfc, 0xfd, 0x37, 0x7d, 0x54, 0xa2,
-  0x4b, 0x0f, 0x4b, 0x8f, 0x6c, 0xe0, 0xe6, 0x16, 0x44, 0x58, 0x49, 0x4c,
-  0x28, 0x57, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0xca, 0x15, 0x00, 0x00,
-  0x44, 0x58, 0x49, 0x4c, 0x02, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-  0x10, 0x57, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00,
-  0xc1, 0x15, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
-  0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49,
-  0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19,
-  0x1e, 0x04, 0x8b, 0x62, 0x80, 0x18, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42,
-  0xc4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x62, 0x88,
-  0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42,
-  0xe4, 0x48, 0x0e, 0x90, 0x11, 0x23, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c,
-  0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x31, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00,
-  0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07,
-  0x40, 0x02, 0xa8, 0x0d, 0x86, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20,
-  0x01, 0xd5, 0x06, 0x62, 0xf8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x90, 0x00,
-  0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42,
-  0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00,
-  0x52, 0x00, 0x00, 0x00, 0x32, 0x22, 0x88, 0x09, 0x20, 0x64, 0x85, 0x04,
-  0x13, 0x23, 0xa4, 0x84, 0x04, 0x13, 0x23, 0xe3, 0x84, 0xa1, 0x90, 0x14,
-  0x12, 0x4c, 0x8c, 0x8c, 0x0b, 0x84, 0xc4, 0x4c, 0x10, 0xb0, 0xc1, 0x08,
-  0x40, 0x09, 0x00, 0x0a, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0xc3, 0x30, 0x10,
-  0x31, 0x03, 0x50, 0x06, 0x63, 0x30, 0xe8, 0x28, 0x85, 0x31, 0x18, 0x86,
-  0x41, 0x49, 0x21, 0x8c, 0xc1, 0x30, 0x68, 0x29, 0x8a, 0x31, 0x18, 0x86,
-  0x61, 0x18, 0x86, 0x61, 0x50, 0x53, 0x8a, 0x61, 0x18, 0x86, 0x81, 0x9e,
-  0xa3, 0x86, 0xcb, 0x9f, 0xb0, 0x87, 0x90, 0x7c, 0x6e, 0xa3, 0x8a, 0x95,
-  0x98, 0x7c, 0xe4, 0xb6, 0x11, 0x31, 0x0c, 0xc3, 0x30, 0x47, 0x80, 0x90,
-  0x74, 0xcf, 0x70, 0xf9, 0x13, 0xf6, 0x10, 0x92, 0x1f, 0x02, 0xcd, 0xb0,
-  0x10, 0x28, 0x98, 0x0a, 0x11, 0x0d, 0xd4, 0x40, 0xd5, 0x1c, 0x41, 0x50,
-  0x0c, 0x6a, 0x90, 0x86, 0xe1, 0x22, 0xec, 0xa6, 0xe1, 0xf2, 0x27, 0xec,
-  0x21, 0x24, 0x7f, 0x25, 0xa4, 0x95, 0x98, 0x7c, 0xe4, 0xb6, 0x51, 0x31,
-  0x0c, 0xc3, 0x30, 0x94, 0x43, 0x1b, 0xa8, 0x61, 0x90, 0x06, 0xda, 0xca,
-  0x02, 0x0c, 0xd4, 0x30, 0x0c, 0xc3, 0x30, 0x48, 0x03, 0x75, 0x43, 0x00,
-  0x85, 0xf8, 0x86, 0x61, 0x20, 0x70, 0x20, 0x60, 0x8e, 0x00, 0x14, 0x66,
-  0x1a, 0x83, 0x71, 0x60, 0x87, 0x70, 0x98, 0x87, 0x79, 0x70, 0x03, 0x59,
-  0xb8, 0x85, 0x59, 0xa0, 0x07, 0x79, 0xa8, 0x87, 0x71, 0xa0, 0x87, 0x7a,
-  0x90, 0x87, 0x72, 0x20, 0x07, 0x51, 0xa8, 0x07, 0x73, 0x30, 0x87, 0x72,
-  0x90, 0x07, 0x3e, 0x60, 0x87, 0x77, 0x70, 0x87, 0x73, 0x00, 0x03, 0x76,
-  0x78, 0x07, 0x77, 0x38, 0x07, 0x3f, 0x40, 0xc1, 0x48, 0xe4, 0x4c, 0x60,
-  0x30, 0x0e, 0xec, 0x10, 0x0e, 0xf3, 0x30, 0x0f, 0x6e, 0x20, 0x0b, 0xb7,
-  0x30, 0x0b, 0xf4, 0x20, 0x0f, 0xf5, 0x30, 0x0e, 0xf4, 0x50, 0x0f, 0xf2,
-  0x50, 0x0e, 0xe4, 0x20, 0x0a, 0xf5, 0x60, 0x0e, 0xe6, 0x50, 0x0e, 0xf2,
-  0xc0, 0x07, 0xe4, 0xf0, 0x0e, 0xf5, 0x20, 0x0e, 0xec, 0x50, 0x0e, 0x7e,
-  0x80, 0x82, 0x8f, 0xcc, 0x61, 0x04, 0x62, 0xb8, 0x84, 0x73, 0x1a, 0x69,
-  0x02, 0x9a, 0x49, 0x42, 0xcb, 0x30, 0x0c, 0x03, 0x8a, 0xa2, 0x28, 0x8a,
-  0x0e, 0x94, 0x4e, 0x01, 0x00, 0x00, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0,
-  0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0,
-  0x87, 0x0d, 0xae, 0x50, 0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d,
-  0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d,
-  0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78,
-  0xa0, 0x07, 0x78, 0xd0, 0x06, 0xe9, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x71,
-  0x60, 0x07, 0x6d, 0x90, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72,
-  0xd0, 0x06, 0xe9, 0x60, 0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d,
-  0x60, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xd0, 0x06, 0xe6,
-  0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x76,
-  0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xee, 0x80, 0x07, 0x7a,
-  0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x7a, 0x60, 0x07, 0x74,
-  0x30, 0xe4, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x60, 0xc8, 0x43, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0xc0, 0x90, 0xe7, 0x00, 0x02, 0x20, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x80, 0x21, 0x4f, 0x02, 0x04, 0x40, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x43, 0x9e, 0x05, 0x08, 0x80, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x3c, 0x0d, 0x10, 0x00, 0x01,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x79, 0x1e, 0x20, 0x00,
-  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xf2, 0x54, 0x40,
-  0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xe4, 0xc1,
-  0x80, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8,
-  0xb3, 0x01, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0,
-  0x90, 0xa7, 0x03, 0x02, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x80, 0x21, 0x0f, 0x18, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x40, 0x16, 0x08, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
-  0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47,
-  0xc6, 0x04, 0x43, 0x1a, 0x4a, 0xa0, 0x08, 0x8a, 0x61, 0x04, 0xa0, 0x30,
-  0x0a, 0xa2, 0xd0, 0x03, 0x0a, 0xa1, 0x00, 0x03, 0xe8, 0x1a, 0x01, 0xa0,
-  0xb1, 0x50, 0x01, 0x01, 0x13, 0xa8, 0x9c, 0x01, 0xa0, 0x73, 0x06, 0x80,
-  0xd4, 0x19, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00,
-  0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b,
-  0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99,
-  0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62,
-  0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73,
-  0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84, 0x81, 0x99, 0x20, 0x0c,
-  0xcd, 0x06, 0x61, 0x20, 0x26, 0x08, 0x83, 0xb3, 0x41, 0x18, 0x0c, 0x0a,
-  0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08, 0xc3, 0x33, 0x41, 0x28,
-  0x83, 0x8b, 0xc0, 0x04, 0x61, 0x80, 0x26, 0x08, 0x57, 0x35, 0x41, 0x18,
-  0xa2, 0x0d, 0xc2, 0xf0, 0x6c, 0x58, 0x94, 0x85, 0x51, 0x94, 0xa1, 0x71,
-  0x1c, 0x07, 0x9a, 0x20, 0x9c, 0x01, 0xb6, 0x61, 0x19, 0x24, 0x46, 0x19,
-  0x86, 0xc6, 0x71, 0x1c, 0x68, 0xc3, 0x42, 0x2c, 0x8c, 0x42, 0x0c, 0x8d,
-  0xe3, 0x38, 0xd0, 0x86, 0x21, 0x9a, 0xa8, 0x09, 0x82, 0x1a, 0x64, 0x13,
-  0x84, 0x41, 0xda, 0x80, 0x28, 0x16, 0xa3, 0x28, 0xc3, 0x05, 0x6c, 0x08,
-  0xb0, 0x0d, 0x04, 0x50, 0x65, 0xc0, 0x04, 0x41, 0x00, 0xa8, 0x1c, 0xc9,
-  0xa5, 0x91, 0x4d, 0x85, 0xb5, 0xc1, 0xb1, 0x95, 0x4d, 0x10, 0xc6, 0xc0,
-  0x9a, 0x20, 0x0c, 0xd3, 0x04, 0x61, 0xa0, 0x36, 0x0c, 0xdf, 0x30, 0x6c,
-  0x20, 0x94, 0xce, 0x03, 0x83, 0x0d, 0xc5, 0xc6, 0x01, 0x5a, 0x18, 0x54,
-  0x61, 0x63, 0xb3, 0x6b, 0x73, 0x49, 0x23, 0x2b, 0x73, 0xa3, 0x9b, 0x12,
-  0x04, 0x55, 0xc8, 0xf0, 0x5c, 0xec, 0xca, 0xe4, 0xe6, 0xd2, 0xde, 0xdc,
-  0xa6, 0x04, 0x44, 0x13, 0x32, 0x3c, 0x17, 0xbb, 0x30, 0x36, 0xbb, 0x32,
-  0xb9, 0x29, 0x81, 0x51, 0x87, 0x0c, 0xcf, 0x65, 0x0e, 0x2d, 0x8c, 0xac,
-  0x4c, 0xae, 0xe9, 0x8d, 0xac, 0x8c, 0x6d, 0x4a, 0x80, 0x94, 0x21, 0xc3,
-  0x73, 0x91, 0x2b, 0x9b, 0x7b, 0xab, 0x93, 0x1b, 0x2b, 0x9b, 0x9b, 0x12,
-  0x64, 0x75, 0xc8, 0xf0, 0x5c, 0xca, 0xdc, 0xe8, 0xe4, 0xf2, 0xa0, 0xde,
-  0xd2, 0xdc, 0xe8, 0xe6, 0xa6, 0x04, 0x61, 0x00, 0x79, 0x18, 0x00, 0x00,
-  0x51, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66,
-  0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07,
-  0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10,
-  0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce,
-  0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b,
-  0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c,
-  0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07,
-  0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11,
-  0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0,
-  0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8,
-  0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b,
-  0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b,
-  0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87,
-  0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07,
-  0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87,
-  0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81,
-  0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30,
-  0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4,
-  0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca,
-  0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39,
-  0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b,
-  0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b,
-  0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87,
-  0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20,
-  0x0e, 0xe7, 0xe0, 0x06, 0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90,
-  0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x30, 0x83, 0x81, 0xc8, 0x01, 0x1f, 0xdc,
-  0x40, 0x1c, 0xe4, 0xa1, 0x1c, 0xc2, 0x61, 0x1d, 0xdc, 0x40, 0x1c, 0xe4,
-  0x01, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00,
-  0x06, 0xa0, 0x80, 0x11, 0x32, 0xb0, 0x00, 0xf3, 0x2c, 0x84, 0x11, 0x40,
-  0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x20, 0x0d, 0x10, 0x61, 0x7e, 0x71, 0xdb,
-  0x76, 0xb0, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10, 0x50, 0x45, 0x41, 0x44,
-  0xa5, 0x03, 0x0c, 0x25, 0x61, 0x00, 0x02, 0xe6, 0x23, 0xb7, 0x6d, 0x08,
-  0xd2, 0x70, 0xf9, 0xce, 0xe3, 0x0b, 0x11, 0x01, 0x4c, 0x44, 0x08, 0x34,
-  0xc3, 0x42, 0x58, 0x81, 0x33, 0x5c, 0xbe, 0xf3, 0xf8, 0x83, 0x33, 0xe1,
-  0x7e, 0x71, 0xdb, 0xb6, 0x40, 0x0d, 0x97, 0xef, 0x3c, 0x3e, 0x03, 0x28,
-  0x44, 0xe7, 0x50, 0xc1, 0x42, 0xf8, 0x85, 0x8e, 0x5b, 0xc2, 0x35, 0x5c,
-  0xbe, 0xf3, 0xf8, 0x11, 0x60, 0x6d, 0x54, 0x51, 0x10, 0x51, 0xe9, 0x00,
-  0x83, 0x8f, 0xdc, 0xb6, 0x29, 0x60, 0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x01,
-  0xd6, 0x46, 0x15, 0x05, 0x11, 0xb1, 0x93, 0x13, 0x11, 0x3e, 0x72, 0xdb,
-  0x36, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xfe, 0x44, 0x44, 0x13, 0x02, 0x44,
-  0x98, 0x5f, 0xdc, 0xb6, 0x19, 0x48, 0xc3, 0xe5, 0x3b, 0x8f, 0x3f, 0x11,
-  0xd1, 0x84, 0x00, 0x11, 0xe6, 0x23, 0xb7, 0x6d, 0x01, 0xd2, 0x70, 0xf9,
-  0xce, 0xe3, 0x4f, 0x47, 0x44, 0x00, 0x83, 0x38, 0xf8, 0xc8, 0x6d, 0x9b,
-  0xc0, 0x33, 0x5c, 0xbe, 0xf3, 0xf8, 0x54, 0x03, 0x44, 0x98, 0x5f, 0xdc,
-  0x36, 0x00, 0x00, 0x00, 0x61, 0x20, 0x00, 0x00, 0x1a, 0x14, 0x00, 0x00,
-  0x13, 0x04, 0x24, 0x14, 0x0b, 0x04, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00,
-  0x34, 0x14, 0x58, 0xd9, 0x95, 0xa5, 0x40, 0x0d, 0x94, 0x51, 0x21, 0x15,
-  0x57, 0xc1, 0xcd, 0x00, 0x94, 0x5c, 0xd9, 0x14, 0x4b, 0x61, 0x0a, 0x94,
-  0x72, 0x40, 0xd1, 0x94, 0x6e, 0x40, 0x39, 0x94, 0x02, 0x8d, 0x05, 0x44,
-  0x52, 0x11, 0x94, 0x40, 0x19, 0x90, 0x31, 0x46, 0x00, 0x82, 0x20, 0xc8,
-  0x7f, 0x63, 0x04, 0x20, 0x08, 0x82, 0xf8, 0x2f, 0x8c, 0x11, 0x80, 0x20,
-  0x08, 0x86, 0xe0, 0x30, 0x46, 0x00, 0x82, 0x20, 0xa8, 0x7f, 0x63, 0x04,
-  0x20, 0x08, 0x82, 0xfa, 0x2f, 0x8c, 0x11, 0x80, 0x20, 0x08, 0xc2, 0xdf,
-  0x18, 0x01, 0x08, 0x82, 0x20, 0xfc, 0x0b, 0x63, 0x04, 0x20, 0x08, 0x82,
-  0x20, 0x18, 0x8c, 0x11, 0x80, 0x20, 0x08, 0xd2, 0xdf, 0x18, 0x01, 0x08,
-  0x82, 0x20, 0xfd, 0x0b, 0x63, 0x04, 0x20, 0x08, 0x82, 0xf8, 0x37, 0x02,
-  0x00, 0x00, 0x00, 0x00, 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0x80, 0xd1,
-  0x41, 0xe6, 0xbc, 0xc1, 0x1b, 0x9c, 0xc1, 0x88, 0x41, 0x02, 0x80, 0x20,
-  0x18, 0x60, 0x75, 0xa0, 0x3d, 0x71, 0x10, 0x07, 0x68, 0x30, 0x62, 0x90,
-  0x00, 0x20, 0x08, 0x06, 0x98, 0x1d, 0x6c, 0xd0, 0x1b, 0xbc, 0x41, 0x1a,
-  0x8c, 0x18, 0x24, 0x00, 0x08, 0x82, 0x01, 0x76, 0x07, 0x9c, 0x04, 0x07,
-  0x70, 0xa0, 0x06, 0x23, 0x06, 0x06, 0x00, 0x82, 0x60, 0x40, 0xfc, 0xc1,
-  0x15, 0x07, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x50, 0xe9, 0x81, 0x19,
-  0x08, 0x72, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c, 0x30, 0x9a, 0x30, 0x04,
-  0xc3, 0x0d, 0x42, 0x40, 0x06, 0xb3, 0x0c, 0xc1, 0x08, 0x05, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x50, 0xfd, 0xc1, 0x1a, 0x1c, 0x7a, 0x30, 0x9a,
-  0x10, 0x0c, 0x17, 0x18, 0x35, 0x9a, 0x30, 0x08, 0x17, 0x18, 0x35, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x15, 0x29, 0xc0, 0x01, 0x13, 0x06, 0xa3,
-  0x09, 0x01, 0x30, 0xdc, 0x10, 0xf8, 0x01, 0x18, 0x4c, 0x37, 0x60, 0x53,
-  0x30, 0xdd, 0x90, 0x79, 0x42, 0x21, 0x01, 0x4c, 0x37, 0x6c, 0x60, 0x40,
-  0x14, 0x12, 0xc0, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x54, 0xac, 0x80,
-  0x07, 0x14, 0x1a, 0x8c, 0x26, 0x04, 0xc1, 0x68, 0x82, 0x20, 0x8c, 0x26,
-  0x0c, 0x43, 0x05, 0x82, 0xd4, 0x40, 0x48, 0x05, 0x83, 0xd4, 0x15, 0xcc,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x54, 0xb4, 0x00, 0x0a, 0x5c, 0x2b,
-  0x8c, 0x26, 0x04, 0x40, 0x05, 0x83, 0xd4, 0x16, 0x44, 0x05, 0xc8, 0x8c,
-  0x26, 0x14, 0x41, 0x05, 0x82, 0x14, 0x11, 0x44, 0x05, 0xcd, 0x8c, 0x26,
-  0x24, 0x42, 0x05, 0x82, 0x14, 0x11, 0xc4, 0x35, 0x46, 0x5d, 0x61, 0xd4,
-  0x0d, 0x46, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0x35, 0x0e, 0xaf,
-  0xb0, 0x06, 0xb7, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a,
-  0x30, 0x08, 0xa3, 0x09, 0xc4, 0x70, 0x84, 0x51, 0x47, 0x18, 0x75, 0x84,
-  0x51, 0x47, 0x18, 0x35, 0x62, 0xd0, 0x00, 0x20, 0x08, 0x06, 0x8d, 0x3b,
-  0xc8, 0x02, 0xb3, 0x28, 0x78, 0x40, 0x0c, 0x42, 0x60, 0x42, 0x00, 0x9f,
-  0x13, 0x86, 0x19, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0xad, 0x1d, 0x74,
-  0x21, 0x0f, 0x82, 0x73, 0x38, 0x85, 0x72, 0x18, 0x4d, 0x08, 0x80, 0xd1,
-  0x04, 0x21, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x30, 0x70, 0x07,
-  0x59, 0x10, 0x82, 0x0b, 0x8c, 0xbb, 0x63, 0x98, 0x11, 0x03, 0x05, 0x00,
-  0x41, 0x30, 0xd8, 0xe6, 0x01, 0x1c, 0xfe, 0x20, 0x68, 0x87, 0x56, 0x58,
-  0x87, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0x00, 0x03, 0x7a, 0xc0, 0x05, 0x21, 0xb8, 0xc0, 0xb8, 0xe1,
-  0x86, 0x3a, 0xa8, 0x07, 0x30, 0x30, 0x04, 0x15, 0xe0, 0x63, 0x43, 0x2a,
-  0xc0, 0x67, 0x96, 0x41, 0x18, 0x06, 0x13, 0x5e, 0x41, 0x3e, 0x26, 0xc0,
-  0x82, 0x7c, 0xcc, 0x0f, 0x6a, 0x01, 0x3e, 0xd6, 0x07, 0xb6, 0x00, 0x1f,
-  0x23, 0x04, 0xf9, 0x18, 0x21, 0xc8, 0x67, 0x96, 0x80, 0x30, 0x51, 0x40,
-  0xe4, 0x63, 0x48, 0x28, 0xc8, 0xc7, 0x84, 0x5d, 0x80, 0x8f, 0x09, 0xbc,
-  0x00, 0x1f, 0x13, 0x72, 0x41, 0x3e, 0x26, 0xe8, 0x82, 0x7c, 0x66, 0x09,
-  0x88, 0x81, 0x0a, 0x03, 0x12, 0x88, 0x61, 0xa0, 0xc2, 0x80, 0x04, 0x62,
-  0x18, 0x4d, 0x88, 0x05, 0x61, 0xb8, 0x21, 0x38, 0x09, 0x30, 0x98, 0x65,
-  0x28, 0x8c, 0x60, 0xc4, 0xc0, 0x00, 0x40, 0x10, 0x0c, 0x0e, 0x99, 0x60,
-  0x07, 0x62, 0xc4, 0xc0, 0x00, 0x40, 0x10, 0x0c, 0x8e, 0x99, 0x68, 0x07,
-  0x62, 0x96, 0xc0, 0x18, 0xa8, 0x30, 0x88, 0x82, 0x21, 0x06, 0x2a, 0x0c,
-  0xa2, 0x60, 0x88, 0xe1, 0x08, 0x41, 0x15, 0x88, 0x6f, 0x38, 0x62, 0x48,
-  0x05, 0xe1, 0x2b, 0x21, 0xd8, 0xe1, 0x08, 0xa2, 0x15, 0x88, 0xaf, 0x84,
-  0x60, 0x87, 0x23, 0x8c, 0x55, 0x10, 0xbe, 0x0a, 0x84, 0x9d, 0x65, 0x38,
-  0xb4, 0x60, 0x34, 0xc1, 0x17, 0x86, 0xe1, 0x86, 0x80, 0x26, 0xc0, 0x60,
-  0x96, 0x01, 0x49, 0x82, 0xd2, 0x05, 0x92, 0x80, 0x0b, 0x8c, 0x1a, 0x31,
-  0x38, 0x00, 0x10, 0x04, 0x83, 0xe5, 0x27, 0x4a, 0xa2, 0x41, 0x87, 0x11,
-  0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0xc0, 0xa2, 0x24, 0x02, 0xa1, 0x78,
-  0x01, 0x25, 0xe0, 0x02, 0xa3, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60,
-  0x19, 0x8b, 0x94, 0x80, 0xd8, 0x61, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0x16, 0xb2, 0x48, 0x89, 0x40, 0x98, 0x25, 0xd0, 0x86, 0x1b, 0x14, 0x9e,
-  0x00, 0x83, 0x59, 0x06, 0x45, 0x0b, 0x4c, 0x17, 0x78, 0x21, 0x3e, 0xb3,
-  0x0c, 0x8b, 0x33, 0x59, 0x2f, 0x54, 0xf1, 0xb1, 0x40, 0xa0, 0xcf, 0x05,
-  0xc3, 0x5c, 0x60, 0x94, 0x05, 0x85, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44,
-  0x59, 0xe8, 0x70, 0x43, 0x30, 0x16, 0x60, 0x30, 0xcb, 0xc0, 0x34, 0x81,
-  0x0d, 0xe5, 0x00, 0x9f, 0x59, 0x02, 0xc9, 0xc8, 0x81, 0x88, 0xcf, 0x2c,
-  0x81, 0x34, 0xcb, 0xf0, 0x48, 0x9c, 0x7d, 0xe5, 0x10, 0x1f, 0x0b, 0x18,
-  0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0x59, 0xf0, 0xc8, 0xc7, 0x8a, 0x20,
-  0x3e, 0x45, 0xbc, 0x85, 0x0e, 0x37, 0x04, 0x6d, 0x01, 0x06, 0xb3, 0x0c,
-  0x50, 0x14, 0x58, 0x3b, 0x0c, 0xf1, 0x99, 0x25, 0x90, 0x8c, 0x80, 0x07,
-  0xf8, 0xcc, 0x12, 0x48, 0x03, 0x2d, 0x06, 0xc6, 0x58, 0x0d, 0x01, 0x09,
-  0x91, 0x2c, 0x38, 0xe6, 0x0e, 0xf2, 0x10, 0x9f, 0x59, 0x86, 0xc9, 0x32,
-  0x03, 0x9b, 0x07, 0x35, 0x88, 0x8f, 0x05, 0x02, 0x7d, 0x2e, 0x18, 0xe6,
-  0x02, 0xa3, 0x2c, 0x28, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xf6, 0x42,
-  0x87, 0x1b, 0x82, 0xbc, 0x00, 0x83, 0x59, 0x06, 0xaa, 0x0a, 0x6c, 0xd8,
-  0x07, 0xf8, 0xcc, 0x12, 0x68, 0x86, 0x0f, 0x44, 0x7c, 0x66, 0x09, 0xb4,
-  0x59, 0x86, 0x4b, 0x73, 0x03, 0xa3, 0x83, 0x7c, 0x88, 0x8f, 0x05, 0x0c,
-  0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x78, 0xe4, 0x63, 0x45, 0x10,
-  0x9f, 0x22, 0x4a, 0x43, 0x87, 0x1b, 0x82, 0xd1, 0x00, 0x83, 0x59, 0x06,
-  0x2c, 0x0b, 0x2c, 0x24, 0x86, 0xf8, 0xcc, 0x12, 0x68, 0x46, 0x98, 0x04,
-  0x7c, 0x66, 0x09, 0xb4, 0x81, 0x22, 0x43, 0x1c, 0x10, 0x7f, 0x48, 0xfc,
-  0xc1, 0x60, 0x83, 0x8c, 0x0d, 0x30, 0x36, 0xb0, 0xd8, 0xa0, 0x62, 0x03,
-  0x6a, 0xa0, 0xc8, 0xe0, 0x05, 0xc4, 0x1f, 0x12, 0x7f, 0x30, 0x88, 0xcc,
-  0xc0, 0xfc, 0xc1, 0xc2, 0x2a, 0x8d, 0x3a, 0x7c, 0x30, 0x6a, 0x96, 0x61,
-  0x9b, 0x83, 0x52, 0x18, 0x4d, 0xb8, 0x89, 0x61, 0xb8, 0x21, 0x50, 0x0d,
-  0x30, 0x98, 0x65, 0xe0, 0xbc, 0x60, 0x38, 0xa2, 0xf8, 0x89, 0xe1, 0x3b,
-  0x63, 0x98, 0xe1, 0x86, 0xa0, 0x26, 0xc8, 0xa0, 0x86, 0x40, 0x87, 0x23,
-  0x90, 0xb1, 0x18, 0xbe, 0x0a, 0x04, 0x3d, 0x65, 0x98, 0xe1, 0x86, 0x00,
-  0x27, 0xc8, 0xa0, 0x82, 0x41, 0x67, 0x19, 0x3a, 0x39, 0x08, 0x8e, 0x1f,
-  0x86, 0xb9, 0x66, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa8, 0x7c,
-  0x43, 0x35, 0xcc, 0x02, 0x37, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
-  0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x38, 0x64, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0x9e, 0xf2, 0x88, 0x8d, 0x83, 0x08, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0xe0, 0x31, 0x0f, 0xd9, 0x60, 0x88, 0x60, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0x9e, 0xf3, 0x98, 0x0d, 0x89, 0x08, 0x46,
-  0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x2b, 0x0f, 0xd9, 0x80, 0x8b, 0xe0,
-  0x37, 0xfe, 0xa2, 0x37, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x4e,
-  0x18, 0xe3, 0x84, 0x31, 0x2a, 0x38, 0x8d, 0xab, 0x21, 0xd8, 0x0b, 0x0c,
-  0x9b, 0x25, 0x90, 0x83, 0xe1, 0x06, 0x4f, 0x3d, 0xc0, 0x60, 0x96, 0xe1,
-  0x03, 0x83, 0xa0, 0xe0, 0x42, 0x37, 0xe0, 0x02, 0xa3, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0x60, 0xa9, 0x8f, 0xdd, 0x20, 0x03, 0xbf, 0x18, 0x31,
-  0x38, 0x00, 0x10, 0x04, 0x83, 0xc5, 0x3e, 0x76, 0x23, 0x10, 0x2e, 0x18,
-  0xa6, 0xe6, 0xe2, 0x37, 0xe0, 0x02, 0xa3, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x60, 0xd1, 0x0f, 0xf0, 0x40, 0x83, 0xd1, 0x18, 0x31, 0x38, 0x00,
-  0x10, 0x04, 0x83, 0x65, 0x3f, 0xc0, 0x23, 0x10, 0x2e, 0x18, 0xe6, 0x02,
-  0xa3, 0xee, 0x30, 0xea, 0x78, 0x62, 0x98, 0x6b, 0x83, 0x61, 0x8e, 0x18,
-  0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xa0, 0x02, 0x11,
-  0xf6, 0x40, 0x0d, 0xfd, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18,
-  0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04,
-  0x00, 0x41, 0x30, 0x78, 0x4e, 0x64, 0x3e, 0x12, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x83, 0x07, 0x45, 0xe8, 0x23, 0x21, 0x82, 0x11, 0x03,
-  0x04, 0x00, 0x41, 0x30, 0x78, 0x52, 0xa4, 0x3e, 0x12, 0x22, 0x18, 0x31,
-  0x50, 0x00, 0x10, 0x04, 0x83, 0xed, 0x44, 0xe8, 0x43, 0x36, 0x82, 0x10,
-  0x09, 0x8f, 0xff, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x38, 0x61,
-  0x8c, 0x13, 0xc6, 0xa8, 0x20, 0x3d, 0xae, 0x86, 0x60, 0x2f, 0x30, 0x6c,
-  0x96, 0x40, 0x0e, 0x86, 0x1b, 0x40, 0x61, 0x45, 0xc0, 0x60, 0x96, 0x21,
-  0x0c, 0xe4, 0x20, 0xb0, 0xd4, 0x58, 0x8d, 0xf8, 0x0c, 0x47, 0x94, 0x02,
-  0x6b, 0x10, 0xdf, 0x2c, 0x83, 0x18, 0x94, 0x41, 0x60, 0xad, 0x61, 0x0a,
-  0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x86,
-  0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0x8d, 0xe8, 0x70, 0x43, 0x30, 0x23,
-  0x60, 0x30, 0xcb, 0x30, 0x06, 0x64, 0x10, 0xd8, 0x50, 0x1b, 0xf0, 0x99,
-  0x25, 0x48, 0x03, 0xa3, 0x0d, 0x22, 0x3e, 0xb3, 0x04, 0x69, 0x30, 0x1c,
-  0x01, 0x0b, 0xb5, 0x21, 0x7c, 0xb3, 0x0c, 0x66, 0x90, 0x06, 0x81, 0xc5,
-  0x82, 0x6d, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51,
-  0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x60, 0xa2, 0xc3, 0x0d,
-  0x81, 0x8f, 0x80, 0xc1, 0x2c, 0xc3, 0x19, 0xa0, 0x41, 0x60, 0xbe, 0x31,
-  0xc4, 0x67, 0x96, 0x20, 0x0d, 0x8c, 0x08, 0x0f, 0xf8, 0xcc, 0x12, 0xa4,
-  0xc1, 0x40, 0x8b, 0xa1, 0x8d, 0x01, 0x46, 0x06, 0xc4, 0x19, 0x08, 0x68,
-  0x60, 0x17, 0x65, 0x70, 0xc1, 0x30, 0x06, 0x1e, 0xe4, 0x11, 0x9f, 0xe1,
-  0x88, 0x5e, 0x28, 0x0f, 0xe2, 0x9b, 0x65, 0x50, 0x83, 0x36, 0x08, 0xcc,
-  0x3c, 0x7c, 0x21, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c,
-  0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x70, 0x13, 0x1d, 0x6e,
-  0x08, 0xd8, 0x04, 0x0c, 0x66, 0x19, 0xd6, 0x80, 0x0d, 0x02, 0x1b, 0xdc,
-  0x03, 0x3e, 0xb3, 0x04, 0x71, 0x60, 0xeb, 0x41, 0xc4, 0x67, 0x96, 0x20,
-  0x0e, 0x86, 0x23, 0xd0, 0x81, 0x3d, 0x84, 0x6f, 0x96, 0xc1, 0x0d, 0xe2,
-  0x20, 0xb0, 0x74, 0x68, 0x8f, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61,
-  0x2e, 0x30, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x22, 0x4f,
-  0x74, 0xb8, 0x21, 0xb8, 0x13, 0x30, 0x98, 0x65, 0x78, 0x03, 0x38, 0x08,
-  0xac, 0x3e, 0x86, 0xf8, 0xcc, 0x12, 0xc4, 0x81, 0x11, 0xfa, 0x01, 0x9f,
-  0x59, 0x82, 0x38, 0x18, 0x68, 0x31, 0xb4, 0x35, 0xc0, 0xd8, 0x80, 0x78,
-  0x03, 0x01, 0x0e, 0xcc, 0xa3, 0x0d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x6e,
-  0x33, 0xea, 0xcc, 0x63, 0x98, 0xbb, 0x87, 0x61, 0x8e, 0x18, 0xe6, 0x88,
-  0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xa0, 0x52, 0x15, 0x3b, 0x91,
-  0x11, 0x52, 0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18,
-  0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0x78, 0x62, 0xa5, 0x4f, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0x47, 0x56, 0xfc, 0x24, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0x78, 0x66, 0xe5, 0x4f, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00,
-  0x10, 0x04, 0x83, 0x2d, 0x56, 0xfc, 0x84, 0x47, 0x82, 0x55, 0x59, 0x93,
-  0x54, 0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x38, 0x61, 0x8c, 0x13,
-  0xc6, 0xa8, 0x60, 0x4e, 0xae, 0x86, 0x60, 0x2f, 0x30, 0x6c, 0x96, 0x40,
-  0x0e, 0x06, 0x5a, 0x0c, 0xdd, 0xe8, 0x5c, 0x89, 0xb3, 0x89, 0x4f, 0x88,
-  0x03, 0x57, 0x02, 0x83, 0x0b, 0x46, 0xba, 0x60, 0x80, 0x12, 0xf2, 0x04,
-  0x2f, 0x18, 0x60, 0xc4, 0xc0, 0x01, 0x40, 0x10, 0x0c, 0xba, 0x5e, 0x09,
-  0x95, 0x35, 0x21, 0x13, 0x5c, 0x19, 0x82, 0x52, 0x29, 0x15, 0x3c, 0xb1,
-  0x95, 0x59, 0x82, 0x11, 0x1a, 0x6e, 0xa8, 0x0d, 0x5c, 0x01, 0x83, 0x59,
-  0x06, 0x3a, 0x88, 0x89, 0x60, 0xc4, 0xc0, 0x00, 0x40, 0x10, 0x0c, 0x0e,
-  0x73, 0x21, 0x95, 0x99, 0x18, 0x31, 0x30, 0x00, 0x10, 0x04, 0x83, 0xe3,
-  0x5c, 0x4a, 0x65, 0x26, 0x4c, 0x80, 0x13, 0xf8, 0x98, 0x10, 0x27, 0xf0,
-  0x19, 0x4d, 0x58, 0x93, 0x61, 0xb8, 0x21, 0xf0, 0x15, 0x30, 0x98, 0x65,
-  0xa8, 0x83, 0x3b, 0x08, 0x86, 0x23, 0x8c, 0x39, 0x19, 0xbe, 0x3b, 0x86,
-  0x19, 0x6e, 0x08, 0xd2, 0x84, 0x0c, 0x6a, 0x08, 0x74, 0x38, 0x22, 0xb9,
-  0x93, 0xe1, 0xab, 0x40, 0xd0, 0x5b, 0x86, 0x19, 0x6e, 0x08, 0xd8, 0x84,
-  0x0c, 0x2a, 0x18, 0x74, 0x96, 0xc1, 0x0e, 0x56, 0x21, 0x38, 0x18, 0x19,
-  0xe6, 0xc2, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa8, 0xe4,
-  0xc5, 0x57, 0xf4, 0x84, 0x5d, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
-  0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x38, 0x64, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0x9e, 0x7c, 0x29, 0x97, 0x83, 0x08, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0xe0, 0xd1, 0x17, 0x73, 0x61, 0x88, 0x60, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0x9e, 0x7d, 0x39, 0x17, 0x89, 0x08, 0x46,
-  0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0xcb, 0x17, 0x73, 0x21, 0x95, 0x60,
-  0x5e, 0x66, 0x25, 0x5e, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x4e,
-  0x18, 0xe3, 0x84, 0x31, 0x2a, 0xd8, 0x95, 0xab, 0x21, 0xd8, 0x0b, 0x0c,
-  0x9b, 0x25, 0x58, 0x85, 0xe1, 0x06, 0xcf, 0x5f, 0xc0, 0x60, 0x96, 0x01,
-  0x0f, 0xf2, 0x20, 0x28, 0x52, 0x71, 0x17, 0xb8, 0xc0, 0xa8, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0x58, 0x52, 0xe6, 0x5d, 0xca, 0x40, 0x56, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x51, 0x99, 0x77, 0x09, 0x84, 0x0b,
-  0x86, 0xa9, 0x53, 0x99, 0x17, 0xb8, 0xc0, 0xa8, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0x58, 0x5c, 0x86, 0x5e, 0xd2, 0xe0, 0x56, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0x60, 0x79, 0x19, 0x7a, 0x09, 0x84, 0x0b, 0x86, 0xb9,
-  0xc0, 0xa8, 0x3b, 0x8c, 0x3a, 0x38, 0x19, 0xe6, 0x42, 0x63, 0x98, 0x23,
-  0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa8, 0x68,
-  0x06, 0x64, 0x78, 0xc5, 0x65, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
-  0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0x9e, 0x9d, 0x39, 0x99, 0x84, 0x08, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0xe0, 0xe1, 0x19, 0x94, 0x49, 0x88, 0x60, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0x9e, 0x9e, 0x49, 0x99, 0x84, 0x08, 0x46,
-  0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0xdb, 0x19, 0x94, 0x31, 0x97, 0xa0,
-  0x66, 0xea, 0x65, 0x66, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x4e,
-  0x18, 0xe3, 0x84, 0x31, 0x2a, 0xe8, 0x97, 0xab, 0x21, 0xd8, 0x0b, 0x0c,
-  0x9b, 0x25, 0x58, 0x85, 0xe1, 0x06, 0x50, 0xf8, 0x19, 0x30, 0x98, 0x65,
-  0xd0, 0x83, 0x55, 0x08, 0xac, 0x57, 0x7e, 0x25, 0x3e, 0xc3, 0x11, 0xa6,
-  0x00, 0x2e, 0xc4, 0x37, 0xcb, 0xb0, 0x07, 0x7e, 0x10, 0x58, 0xb8, 0x9c,
-  0x42, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65, 0x81,
-  0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x91, 0x36, 0x3a, 0xdc, 0x10, 0x9c,
-  0x0d, 0x18, 0xcc, 0x32, 0xf0, 0x41, 0x1f, 0x04, 0x36, 0xa4, 0x0b, 0x7c,
-  0x66, 0x09, 0x44, 0xc1, 0xd0, 0x85, 0x88, 0xcf, 0x2c, 0x81, 0x28, 0x0c,
-  0x47, 0xc4, 0x42, 0xba, 0x08, 0xdf, 0x2c, 0xc3, 0x1f, 0x88, 0x42, 0x60,
-  0xb2, 0xa0, 0x2e, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0x60,
-  0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0xdd, 0xe8, 0x70,
-  0x43, 0x20, 0x37, 0x60, 0x30, 0xcb, 0x00, 0x0a, 0xa1, 0x10, 0x98, 0xbc,
-  0x0c, 0xf1, 0x99, 0x25, 0x10, 0x05, 0x23, 0xea, 0x05, 0x3e, 0xb3, 0x04,
-  0xa2, 0x30, 0xd0, 0x62, 0x68, 0x7c, 0x80, 0xf5, 0x01, 0x01, 0x0a, 0x42,
-  0x28, 0xe0, 0x85, 0x1f, 0x5c, 0x30, 0x8c, 0xd1, 0x0b, 0xbe, 0xc4, 0x67,
-  0x38, 0xc2, 0x17, 0xf2, 0x85, 0xf8, 0x66, 0x19, 0x46, 0xc1, 0x14, 0x02,
-  0xd3, 0x97, 0x5f, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02,
-  0xa3, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x44, 0x47, 0x87,
-  0x1b, 0x02, 0xd0, 0x01, 0x83, 0x59, 0x06, 0x52, 0x28, 0x85, 0xc0, 0x06,
-  0x91, 0x81, 0xcf, 0x2c, 0x81, 0x2a, 0xd8, 0xbf, 0x10, 0xf1, 0x99, 0x25,
-  0x50, 0x85, 0xe1, 0x88, 0x74, 0x00, 0x19, 0xe1, 0x9b, 0x65, 0x38, 0x05,
-  0x55, 0x08, 0x4c, 0x1d, 0x42, 0x26, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60,
-  0x98, 0x0b, 0x8c, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x68,
-  0x1d, 0x1d, 0x6e, 0x08, 0x56, 0x07, 0x0c, 0x66, 0x19, 0x50, 0x21, 0x15,
-  0x02, 0x4b, 0x99, 0x21, 0x3e, 0xb3, 0x04, 0xaa, 0x60, 0x84, 0xcb, 0xc0,
-  0x67, 0x96, 0x40, 0x15, 0x06, 0x5a, 0x0c, 0x8d, 0x14, 0xb0, 0x52, 0x20,
-  0x50, 0x41, 0x48, 0x05, 0xf4, 0x30, 0x85, 0x0b, 0x86, 0xb9, 0xc0, 0xa8,
-  0xdb, 0x8c, 0x3a, 0x7d, 0x19, 0xe6, 0x56, 0x64, 0x98, 0x23, 0x86, 0x39,
-  0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa8, 0x7c, 0x47, 0x75,
-  0xcc, 0x06, 0x77, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13,
-  0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40,
-  0x10, 0x0c, 0x9e, 0xf2, 0x89, 0x9d, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00,
-  0x04, 0xc1, 0xe0, 0x31, 0x1f, 0xd9, 0x49, 0x88, 0x60, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0x9e, 0xf3, 0x99, 0x9d, 0x84, 0x08, 0x46, 0x0c, 0x14,
-  0x00, 0x04, 0xc1, 0x60, 0x2b, 0x1f, 0xd9, 0x81, 0x9b, 0xe0, 0x77, 0xfe,
-  0xa6, 0x77, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x4e, 0x18, 0xe3,
-  0x84, 0x31, 0x2a, 0x38, 0x9d, 0xab, 0x21, 0xd8, 0x0b, 0x0c, 0x9b, 0x25,
-  0x58, 0x85, 0x81, 0x16, 0x43, 0x37, 0xec, 0xc0, 0xde, 0xea, 0xc0, 0x26,
-  0xf0, 0x40, 0x50, 0x05, 0x7b, 0xcb, 0x83, 0x59, 0x06, 0x56, 0x70, 0x05,
-  0x95, 0x18, 0x8e, 0x68, 0x09, 0xbe, 0x19, 0xbe, 0x73, 0x89, 0x61, 0x86,
-  0x1b, 0x02, 0xb9, 0x21, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0x90, 0x09, 0xd0,
-  0x19, 0xbe, 0x0a, 0x04, 0x3d, 0x9a, 0x18, 0x66, 0xb8, 0x21, 0xa8, 0x1b,
-  0x32, 0xa8, 0x60, 0xd0, 0x59, 0x86, 0x56, 0x10, 0x87, 0xe0, 0x72, 0x66,
-  0x98, 0x53, 0x93, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xa0, 0xda,
-  0x9f, 0xf3, 0x19, 0x9d, 0xfa, 0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21,
-  0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90, 0x11, 0x03,
-  0x04, 0x00, 0x41, 0x30, 0x78, 0x44, 0xc8, 0x7d, 0x0e, 0x22, 0x18, 0x31,
-  0x40, 0x00, 0x10, 0x04, 0x83, 0x67, 0x84, 0xde, 0x87, 0x21, 0x82, 0x11,
-  0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0x48, 0x08, 0x7e, 0x24, 0x22, 0x18,
-  0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x4d, 0x84, 0xde, 0xa7, 0x75, 0x02,
-  0xfe, 0xe1, 0x1d, 0xfd, 0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x38,
-  0x61, 0x8c, 0x13, 0xc6, 0xa8, 0x80, 0x7c, 0xae, 0x86, 0x60, 0x2f, 0x30,
-  0x6c, 0x96, 0x40, 0x1c, 0x86, 0x1b, 0xce, 0xe2, 0x84, 0xc0, 0x60, 0x96,
-  0xe1, 0x15, 0x60, 0x21, 0xa8, 0xd6, 0xb9, 0x1f, 0xb8, 0xc0, 0xa8, 0x11,
-  0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0x64, 0x08, 0x7f, 0xd8, 0x62, 0x77,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x99, 0x21, 0xfc, 0x09, 0x84,
-  0x0b, 0x86, 0x29, 0xd8, 0xe1, 0x1f, 0xb8, 0xc0, 0xa8, 0x11, 0x83, 0x03,
-  0x00, 0x41, 0x30, 0x58, 0x6e, 0xa8, 0x7f, 0xe4, 0x02, 0x7c, 0x46, 0x0c,
-  0x0e, 0x00, 0x04, 0xc1, 0x60, 0xc1, 0xa1, 0xfe, 0x09, 0x84, 0x0b, 0x86,
-  0xb9, 0xc0, 0xa8, 0x3b, 0x8c, 0xba, 0xbc, 0x19, 0xe6, 0x54, 0x65, 0x98,
-  0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa8,
-  0x7a, 0x28, 0x85, 0xca, 0xe7, 0x86, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41,
-  0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0x1e, 0x32, 0x82, 0xa1, 0x84, 0x08, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x29, 0xa3, 0x18, 0x4a, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x1e, 0x33, 0x92, 0xa1, 0x84, 0x08,
-  0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x23, 0xa3, 0x18, 0x7a, 0x9f,
-  0xc0, 0x87, 0xfc, 0x87, 0x87, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
-  0x4e, 0x18, 0xe3, 0x84, 0x31, 0x2a, 0x30, 0xa1, 0xab, 0x21, 0xd8, 0x0b,
-  0x0c, 0x9b, 0x25, 0x10, 0x87, 0xe1, 0x86, 0xd4, 0x40, 0x23, 0x30, 0x98,
-  0x65, 0x88, 0x05, 0x71, 0x08, 0xcc, 0x7c, 0xd0, 0x27, 0x3e, 0xc3, 0x11,
-  0xad, 0x91, 0x3e, 0xc4, 0x37, 0xcb, 0x20, 0x0b, 0xb5, 0x10, 0x98, 0xfa,
-  0xb8, 0x46, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65,
-  0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x21, 0x47, 0x3a, 0xdc, 0x10,
-  0xc0, 0x11, 0x18, 0xcc, 0x32, 0xcc, 0x02, 0x2d, 0x04, 0x36, 0xc8, 0x0f,
-  0x7c, 0x66, 0x09, 0x72, 0xc1, 0xe2, 0x87, 0x88, 0xcf, 0x2c, 0x41, 0x2e,
-  0x0c, 0x47, 0xe0, 0x86, 0xfc, 0x08, 0xdf, 0x2c, 0x83, 0x2d, 0xe4, 0x42,
-  0x60, 0xb9, 0x31, 0x3f, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c,
-  0x60, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0x1f, 0xe9,
-  0x70, 0x43, 0xb0, 0x47, 0x60, 0x30, 0xcb, 0x70, 0x0b, 0xb8, 0x10, 0xd8,
-  0xfe, 0x0c, 0xf1, 0x99, 0x25, 0xc8, 0x05, 0x23, 0xfc, 0x07, 0x3e, 0xb3,
-  0x04, 0xb9, 0x30, 0xd0, 0x62, 0x68, 0xb3, 0x80, 0xd1, 0x02, 0x71, 0x0b,
-  0x02, 0x2e, 0x98, 0x4e, 0x2d, 0x5c, 0x30, 0x8c, 0xf5, 0x4f, 0x08, 0xc5,
-  0x67, 0x38, 0xe2, 0x3c, 0x44, 0x88, 0xf8, 0x66, 0x19, 0x74, 0xa1, 0x17,
-  0x02, 0x1b, 0x21, 0xf4, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6,
-  0x02, 0xa3, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x56, 0x49,
-  0x87, 0x1b, 0x82, 0x54, 0x02, 0x83, 0x59, 0x86, 0x5d, 0xe0, 0x85, 0xc0,
-  0x86, 0x15, 0x82, 0xcf, 0x2c, 0x41, 0x38, 0x18, 0x0a, 0x11, 0xf1, 0x99,
-  0x25, 0x08, 0x87, 0xe1, 0x08, 0xf9, 0x48, 0x21, 0xe1, 0x9b, 0x65, 0xf0,
-  0x85, 0x70, 0x08, 0x6c, 0x3e, 0x54, 0x28, 0x3e, 0x16, 0x38, 0xf4, 0xb9,
-  0x60, 0x98, 0x0b, 0x8c, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a,
-  0xb0, 0x25, 0x1d, 0x6e, 0x08, 0x68, 0x09, 0x0c, 0x66, 0x19, 0x7e, 0x01,
-  0x1c, 0x02, 0x93, 0xa1, 0x21, 0x3e, 0xb3, 0x04, 0xe1, 0x60, 0xc4, 0x0d,
-  0xc1, 0x67, 0x96, 0x20, 0x1c, 0x06, 0x5a, 0x0c, 0x6d, 0x17, 0x30, 0x5e,
-  0x20, 0x7e, 0x41, 0x00, 0x07, 0xfd, 0xe9, 0x85, 0x0b, 0x86, 0xb9, 0xc0,
-  0xa8, 0xdb, 0x8c, 0xba, 0x11, 0x1a, 0xe6, 0x68, 0x66, 0x98, 0x23, 0x86,
-  0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa8, 0xce, 0x69,
-  0x96, 0xde, 0x28, 0x9c, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46,
-  0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0x1e, 0x77, 0xd2, 0xa5, 0x84, 0x08, 0x46, 0x0c, 0x10,
-  0x00, 0x04, 0xc1, 0xe0, 0x79, 0xa7, 0x5d, 0x4a, 0x88, 0x60, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0x1e, 0x78, 0xe2, 0xa5, 0x84, 0x08, 0x46, 0x0c,
-  0x14, 0x00, 0x04, 0xc1, 0x60, 0x73, 0xa7, 0x5d, 0xca, 0xa3, 0x00, 0x9d,
-  0x50, 0xc9, 0x9c, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x4e, 0x18,
-  0xe3, 0x84, 0x31, 0x2a, 0x80, 0xa5, 0xab, 0x21, 0xd8, 0x0b, 0x0c, 0x9b,
-  0x25, 0x10, 0x87, 0x81, 0x16, 0x43, 0x37, 0x5a, 0x01, 0x25, 0x03, 0x56,
-  0xb0, 0x89, 0x57, 0x10, 0xc2, 0x01, 0x25, 0x03, 0x58, 0x98, 0x65, 0x18,
-  0x87, 0x72, 0x98, 0x91, 0xe1, 0x08, 0x1c, 0x29, 0xa5, 0xe1, 0xbb, 0x1c,
-  0x19, 0x66, 0xb8, 0x21, 0xd8, 0x23, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88,
-  0x1c, 0x49, 0xa5, 0xe1, 0xab, 0x40, 0xd0, 0xdb, 0x91, 0x61, 0x86, 0x1b,
-  0x02, 0x3f, 0x22, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0x20, 0x87, 0x7c, 0x08,
-  0x4e, 0x8c, 0x86, 0xb9, 0xb9, 0x19, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10,
-  0x0c, 0x2a, 0x92, 0x82, 0x27, 0x56, 0xf2, 0xa7, 0xd1, 0x84, 0x00, 0x18,
-  0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e,
-  0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x67, 0xa5, 0xee, 0xe9, 0x20,
-  0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0x58, 0x0a, 0x9f, 0x18,
-  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xa7, 0xa5, 0xf2, 0x49,
-  0x22, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0xd8, 0x56, 0x0a, 0x9f,
-  0x6c, 0x29, 0x28, 0xa9, 0x72, 0x1a, 0xa9, 0xd1, 0x84, 0x00, 0x18, 0x4d,
-  0x10, 0x82, 0x13, 0xc6, 0x38, 0x61, 0x8c, 0x0a, 0xda, 0xe9, 0x6a, 0x08,
-  0xf6, 0x02, 0xc3, 0x66, 0x09, 0xf2, 0x61, 0xb8, 0x01, 0x4e, 0x60, 0x0a,
-  0x0c, 0x66, 0x19, 0xcc, 0xe1, 0x1c, 0x82, 0xb2, 0x25, 0x90, 0x82, 0x0b,
-  0x8c, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x65, 0xa7, 0x42, 0xea,
-  0x4e, 0xc8, 0x69, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x16, 0x9e, 0x0a,
-  0xa9, 0x40, 0xb8, 0x60, 0x98, 0xca, 0xa5, 0x92, 0x82, 0x0b, 0x8c, 0x1a,
-  0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x05, 0xac, 0x4c, 0x2a, 0x4f, 0xd2,
-  0x69, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x96, 0xb0, 0x32, 0xa9, 0x40,
-  0xb8, 0x60, 0x98, 0x0b, 0x8c, 0xba, 0xc3, 0xa8, 0x13, 0xa5, 0x61, 0x6e,
-  0x76, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x83, 0xca, 0xac, 0x64, 0xca, 0x9d, 0xc0, 0x6a, 0x34, 0x21, 0x00,
-  0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88,
-  0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x69, 0xab, 0x9c, 0x4a,
-  0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x1e, 0xb7, 0xd2, 0xa9,
-  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x79, 0xab, 0x9d,
-  0x4a, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0xb6, 0xd2,
-  0x29, 0x7c, 0x0a, 0xce, 0xea, 0xa4, 0xca, 0x6a, 0x34, 0x21, 0x00, 0x46,
-  0x13, 0x84, 0xe0, 0x84, 0x31, 0x4e, 0x18, 0xa3, 0x82, 0x97, 0xba, 0x1a,
-  0x82, 0xbd, 0xc0, 0xb0, 0x59, 0x82, 0x7c, 0x18, 0x6e, 0x90, 0x95, 0xb8,
-  0x02, 0x83, 0x59, 0x06, 0x74, 0xc8, 0x87, 0xc0, 0xde, 0x29, 0x9e, 0xe2,
-  0x33, 0x1c, 0x81, 0x2b, 0xf2, 0x44, 0x7c, 0xb3, 0x0c, 0xe9, 0xc0, 0x0e,
-  0x81, 0xcd, 0x53, 0xae, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73,
-  0x81, 0x51, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x7b, 0xa5,
-  0xc3, 0x0d, 0x41, 0x5e, 0x81, 0xc1, 0x2c, 0x83, 0x3a, 0xac, 0x43, 0x60,
-  0xc3, 0x3e, 0xc1, 0x67, 0x96, 0x00, 0x1e, 0x4c, 0x9f, 0x88, 0xf8, 0xcc,
-  0x12, 0xc0, 0xc3, 0x70, 0xc4, 0xb8, 0xec, 0x93, 0xf0, 0xcd, 0x32, 0xb4,
-  0x03, 0x3c, 0x04, 0x46, 0x2e, 0xfc, 0x14, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c,
-  0x30, 0xcc, 0x05, 0x46, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45,
-  0x98, 0x96, 0x0e, 0x37, 0x04, 0xa4, 0x05, 0x06, 0xb3, 0x0c, 0xee, 0xf0,
-  0x0e, 0x81, 0x91, 0xd4, 0x10, 0x9f, 0x59, 0x02, 0x78, 0x30, 0xe2, 0xa4,
-  0xe0, 0x33, 0x4b, 0x00, 0x0f, 0x03, 0x2d, 0x86, 0xa6, 0x0e, 0xd8, 0x3a,
-  0x10, 0xee, 0x20, 0xbc, 0x03, 0x6f, 0xb1, 0xc3, 0x05, 0xc3, 0x98, 0x49,
-  0xa9, 0x54, 0x7c, 0x86, 0x23, 0xdc, 0x65, 0xa5, 0x88, 0x6f, 0x96, 0x21,
-  0x1e, 0xe8, 0x21, 0x30, 0x96, 0x7a, 0x97, 0xf8, 0x58, 0x30, 0xd0, 0xe7,
-  0x82, 0x61, 0x2e, 0x30, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29,
-  0x82, 0xb6, 0x74, 0xb8, 0x21, 0x90, 0x2d, 0x30, 0x98, 0x65, 0x90, 0x87,
-  0x79, 0x08, 0x6c, 0xa0, 0x29, 0xf8, 0xcc, 0x12, 0xe0, 0x83, 0xc5, 0x14,
-  0x11, 0x9f, 0x59, 0x02, 0x7c, 0x18, 0x8e, 0xc8, 0x17, 0x99, 0x12, 0xbe,
-  0x59, 0x86, 0x7a, 0xc0, 0x87, 0xc0, 0xf4, 0x65, 0xa6, 0xe2, 0x63, 0x81,
-  0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x22, 0xf9, 0x58, 0x11,
-  0xc4, 0xa7, 0x88, 0xdf, 0xd2, 0xe1, 0x86, 0xa0, 0xb7, 0xc0, 0x60, 0x96,
-  0xc1, 0x1e, 0xee, 0x21, 0xb0, 0x9d, 0x1a, 0xe2, 0x33, 0x4b, 0x80, 0x0f,
-  0x46, 0x80, 0x15, 0x7c, 0x66, 0x09, 0xf0, 0x61, 0xa0, 0xc5, 0xd0, 0xe4,
-  0x01, 0x9b, 0x07, 0xc2, 0x1e, 0x84, 0x7b, 0x40, 0x31, 0x7a, 0xb8, 0x60,
-  0x98, 0x0b, 0x8c, 0xba, 0xcd, 0xa8, 0x63, 0xa9, 0x61, 0xae, 0x87, 0x86,
-  0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83,
-  0x0a, 0xbe, 0x78, 0x0b, 0xaf, 0xd4, 0x6b, 0x34, 0x21, 0x00, 0x46, 0x13,
-  0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xb9, 0xaf, 0xf1, 0x4a, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x1e, 0xfc, 0x22, 0xaf, 0x84, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xc9, 0xaf, 0xf2, 0x4a, 0x88,
-  0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0xfb, 0x22, 0x2f, 0xd1,
-  0x0a, 0xe2, 0x2b, 0xb6, 0xde, 0x6b, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84,
-  0xe0, 0x84, 0x31, 0x4e, 0x18, 0xa3, 0x82, 0xdc, 0xba, 0x1a, 0x82, 0xbd,
-  0xc0, 0xb0, 0x59, 0x82, 0x7c, 0x18, 0x68, 0x31, 0x74, 0x83, 0x1c, 0x74,
-  0x36, 0x18, 0x07, 0x9b, 0x30, 0x07, 0x01, 0x1f, 0x74, 0x36, 0x38, 0x87,
-  0x59, 0x06, 0x7d, 0xe0, 0x07, 0x9e, 0x19, 0x8e, 0xf8, 0x19, 0xd7, 0x1a,
-  0xbe, 0x03, 0x9b, 0x61, 0x86, 0x1b, 0x02, 0xd2, 0x22, 0x83, 0x1a, 0x02,
-  0x1d, 0x8e, 0x10, 0x1b, 0xd9, 0x1a, 0xbe, 0x0a, 0x04, 0x3d, 0xb2, 0x19,
-  0x66, 0xb8, 0x21, 0x38, 0x2d, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x86, 0x7d,
-  0x80, 0x89, 0xe0, 0xd6, 0x6a, 0x98, 0xe3, 0xa3, 0x61, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0xa0, 0x6a, 0xb1, 0xfc, 0xaa, 0xad, 0x13, 0x1b, 0x4d,
-  0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62,
-  0x28, 0xe2, 0x90, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0x68, 0x0c,
-  0xc4, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xa7, 0xc6,
-  0x42, 0x8c, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0x6c,
-  0x4c, 0xc4, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x8d,
-  0xc6, 0x42, 0xec, 0xb7, 0x02, 0x17, 0x73, 0x2f, 0x16, 0x1b, 0x4d, 0x08,
-  0x80, 0xd1, 0x04, 0x21, 0x38, 0x61, 0x8c, 0x13, 0xc6, 0xa8, 0xc0, 0xbe,
-  0xae, 0x86, 0x60, 0x2f, 0x30, 0x6c, 0x96, 0x00, 0x26, 0x86, 0x1b, 0xf2,
-  0x26, 0xc7, 0xc0, 0x60, 0x96, 0xa1, 0x1f, 0xfc, 0x21, 0xa8, 0xdf, 0x4a,
-  0x31, 0xb8, 0xc0, 0xa8, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0xc8,
-  0x4c, 0xc5, 0xfc, 0xa6, 0xbd, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60,
-  0x29, 0x33, 0x15, 0x0b, 0x84, 0x0b, 0x86, 0x29, 0xf1, 0x72, 0x31, 0xb8,
-  0xc0, 0xa8, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0xd2, 0xec, 0xc5,
-  0x44, 0x47, 0xbe, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x51, 0xb3,
-  0x17, 0x0b, 0x84, 0x0b, 0x86, 0xb9, 0xc0, 0xa8, 0x3b, 0x8c, 0xba, 0xd5,
-  0x1a, 0xe6, 0x78, 0x69, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0xa8, 0xde, 0x6c, 0xc7, 0xee, 0x2b, 0xcd, 0x46,
-  0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81,
-  0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x1e, 0x3b,
-  0x13, 0xb3, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xb9,
-  0xb3, 0x31, 0x4b, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x1e,
-  0x3c, 0x23, 0xb3, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60,
-  0xb3, 0xb3, 0x31, 0x0b, 0xb1, 0x00, 0xce, 0x60, 0xcc, 0xcd, 0x46, 0x13,
-  0x02, 0x60, 0x34, 0x41, 0x08, 0x4e, 0x18, 0xe3, 0x84, 0x31, 0x2a, 0xc0,
-  0xb1, 0xab, 0x21, 0xd8, 0x0b, 0x0c, 0x9b, 0x25, 0x80, 0x89, 0xe1, 0x86,
-  0xdd, 0xd1, 0x33, 0x30, 0x98, 0x65, 0xf8, 0x07, 0x98, 0x08, 0x0c, 0xbf,
-  0xf4, 0x2b, 0x3e, 0xc3, 0x11, 0xbf, 0xb3, 0x5f, 0xc4, 0x37, 0xcb, 0x00,
-  0x12, 0x23, 0x11, 0x18, 0x7f, 0x81, 0x4f, 0x7c, 0x2c, 0x18, 0xe8, 0x73,
-  0xc1, 0x30, 0x17, 0x18, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14,
-  0x41, 0x6a, 0x3a, 0xdc, 0x10, 0x88, 0x1a, 0x18, 0xcc, 0x32, 0x84, 0x84,
-  0x48, 0x04, 0x36, 0x90, 0x18, 0x7c, 0x66, 0x09, 0x4e, 0xc2, 0x46, 0x8c,
-  0x88, 0xcf, 0x2c, 0xc1, 0x49, 0x0c, 0x47, 0xa8, 0x0f, 0x89, 0x09, 0xdf,
-  0x2c, 0x03, 0x49, 0x9c, 0x44, 0x60, 0xeb, 0x53, 0x62, 0xf1, 0xb1, 0xc0,
-  0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08,
-  0xe2, 0x53, 0xc4, 0xab, 0xe9, 0x70, 0x43, 0xd0, 0x6a, 0x60, 0x30, 0xcb,
-  0x50, 0x12, 0x26, 0x11, 0x58, 0x8b, 0x0d, 0xf1, 0x99, 0x25, 0x38, 0x09,
-  0x23, 0x60, 0x0c, 0x3e, 0xb3, 0x04, 0x27, 0x31, 0xd0, 0x62, 0x68, 0x21,
-  0x81, 0x89, 0x04, 0x51, 0x12, 0x82, 0x49, 0xa8, 0xdf, 0x48, 0x5c, 0x30,
-  0x8c, 0xbd, 0xd8, 0x8c, 0xc5, 0x67, 0x38, 0xe2, 0x7e, 0x68, 0x8c, 0xf8,
-  0x66, 0x19, 0x50, 0x62, 0x25, 0x02, 0xab, 0x31, 0xfc, 0x89, 0x8f, 0x05,
-  0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x30, 0xe4, 0x63, 0x45,
-  0x10, 0x9f, 0x22, 0x7a, 0x4d, 0x87, 0x1b, 0x82, 0x5d, 0x03, 0x83, 0x59,
-  0x86, 0x94, 0x50, 0x89, 0xc0, 0x86, 0x1e, 0x83, 0xcf, 0x2c, 0xc1, 0x4b,
-  0x98, 0x8e, 0x11, 0xf1, 0x99, 0x25, 0x78, 0x89, 0xe1, 0x08, 0x11, 0xda,
-  0x31, 0xe1, 0x9b, 0x65, 0x60, 0x89, 0x97, 0x08, 0x6c, 0x84, 0x78, 0x2c,
-  0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0x20, 0x92,
-  0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x40, 0x37, 0x1d, 0x6e, 0x08, 0xcc, 0x0d,
-  0x0c, 0x66, 0x19, 0x5a, 0xc2, 0x25, 0x02, 0x23, 0xb3, 0x21, 0x3e, 0xb3,
-  0x04, 0x2f, 0x61, 0x44, 0x9a, 0xc1, 0x67, 0x96, 0xe0, 0x25, 0x06, 0x5a,
-  0x0c, 0x2d, 0x25, 0x30, 0x95, 0x20, 0x5a, 0x42, 0x70, 0x09, 0x1d, 0x0c,
-  0x56, 0xe2, 0x82, 0x61, 0x2e, 0x30, 0xea, 0x36, 0xa3, 0xae, 0xc6, 0x86,
-  0x39, 0xb3, 0x1a, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0xaa, 0x7c, 0x2b, 0xb7, 0x50, 0x9b, 0xb7, 0xd1, 0x84,
-  0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86,
-  0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x07, 0xe4, 0xd8,
-  0x2d, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0x42, 0xae,
-  0xdd, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x47, 0xe4,
-  0xdc, 0x2d, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0xd8, 0x40,
-  0xae, 0xdd, 0x56, 0x2d, 0xd0, 0x37, 0x5d, 0xc3, 0xb7, 0xd1, 0x84, 0x00,
-  0x18, 0x4d, 0x10, 0x82, 0x13, 0xc6, 0x38, 0x61, 0x8c, 0x0a, 0xc4, 0xed,
-  0x6a, 0x08, 0xf6, 0x02, 0xc3, 0x66, 0x09, 0x60, 0x62, 0xa0, 0xc5, 0xd0,
-  0x8d, 0x7d, 0x60, 0xeb, 0x40, 0x1f, 0x6c, 0xa2, 0x1f, 0x84, 0x97, 0x60,
-  0xeb, 0xc0, 0x1f, 0x46, 0x0c, 0x0c, 0x00, 0x04, 0xc1, 0xe0, 0x80, 0xb9,
-  0x76, 0xeb, 0x2b, 0xa3, 0x91, 0x9e, 0x89, 0x8f, 0x09, 0x81, 0x7c, 0x2c,
-  0xf8, 0x19, 0xf8, 0x58, 0xe1, 0x12, 0xf1, 0xb1, 0x22, 0x90, 0x8f, 0x05,
-  0x30, 0x01, 0x9f, 0x11, 0x03, 0x03, 0x00, 0x41, 0x30, 0x38, 0x6e, 0x8e,
-  0xde, 0x46, 0xcb, 0x84, 0x22, 0x3e, 0x16, 0x08, 0xf2, 0xb1, 0xe0, 0x80,
-  0xcf, 0x05, 0x23, 0x5d, 0x30, 0x40, 0x09, 0xf3, 0x86, 0x17, 0x0c, 0x30,
-  0x62, 0xe0, 0x00, 0x20, 0x08, 0x06, 0xdd, 0xcd, 0xed, 0x5b, 0xb9, 0xf9,
-  0x9a, 0xcc, 0x0d, 0xc1, 0xbf, 0xfd, 0x9b, 0xbc, 0xc1, 0xdc, 0x2c, 0xc1,
-  0x08, 0x0d, 0x37, 0xbc, 0x18, 0xcd, 0x81, 0xc1, 0x2c, 0x83, 0x4c, 0x8c,
-  0x50, 0x30, 0x62, 0x60, 0x00, 0x20, 0x08, 0x06, 0x07, 0xd8, 0xf9, 0x5b,
-  0x6b, 0x59, 0x20, 0x6f, 0xf0, 0x19, 0x31, 0x30, 0x00, 0x10, 0x04, 0x83,
-  0x43, 0xec, 0x40, 0xce, 0xb5, 0x2c, 0xa0, 0x37, 0xf8, 0x8c, 0x26, 0x94,
-  0xdb, 0x30, 0xdc, 0x10, 0xe0, 0x1c, 0x18, 0xcc, 0x32, 0xcc, 0x44, 0x4d,
-  0x04, 0xc3, 0x11, 0x45, 0xbb, 0x0d, 0xdf, 0x19, 0xc3, 0x0c, 0x37, 0x04,
-  0xe3, 0x46, 0x06, 0x35, 0x04, 0x3a, 0x1c, 0x71, 0xc4, 0xdb, 0xf0, 0x55,
-  0x20, 0xe8, 0x25, 0xc3, 0x0c, 0x37, 0x04, 0xe6, 0x46, 0x06, 0x15, 0x0c,
-  0x3a, 0xcb, 0x40, 0x13, 0x69, 0x11, 0x9c, 0xaa, 0x0d, 0x73, 0xbb, 0x35,
-  0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x54, 0x6c, 0x87, 0x73, 0xf4,
-  0x66, 0x76, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83,
-  0x30, 0x9a, 0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0xcf, 0xdc, 0xfd, 0xdc, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0xf0, 0xd0, 0x1d, 0xd8, 0x31, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x4f, 0xdd, 0x85, 0x9d, 0x44, 0x04, 0x23, 0x06, 0x0a, 0x00,
-  0x82, 0x60, 0xb0, 0xcd, 0x1d, 0xd8, 0xf9, 0x5b, 0xd0, 0x76, 0x2d, 0xb7,
-  0x76, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0x27, 0x8c, 0x71, 0xc2,
-  0x18, 0x15, 0xd4, 0xdc, 0xd5, 0x10, 0xec, 0x05, 0x86, 0xcd, 0x12, 0xa4,
-  0xc5, 0x70, 0x83, 0x87, 0x77, 0x60, 0x30, 0xcb, 0x60, 0x13, 0x37, 0x11,
-  0x94, 0xbf, 0xa1, 0x1d, 0x5c, 0x60, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20,
-  0x18, 0x2c, 0xa3, 0x97, 0x76, 0x64, 0xc0, 0x72, 0x23, 0x06, 0x07, 0x00,
-  0x82, 0x60, 0xb0, 0x90, 0x5e, 0xda, 0x05, 0xc2, 0x05, 0xc3, 0x54, 0xc8,
-  0xb5, 0x1d, 0x5c, 0x60, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c,
-  0xa8, 0xe7, 0x76, 0x67, 0x10, 0x73, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0xb0, 0xa4, 0x9e, 0xdb, 0x05, 0xc2, 0x05, 0xc3, 0x5c, 0x60, 0xd4, 0x1d,
-  0x46, 0x9d, 0xba, 0x0d, 0x73, 0xfb, 0x35, 0xcc, 0x11, 0xc3, 0x1c, 0x31,
-  0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x54, 0xae, 0xa7, 0x77, 0x36,
-  0x87, 0x7a, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83,
-  0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x4f, 0xed, 0x85, 0x5e, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0xf0, 0xd8, 0x9e, 0xe8, 0x25, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0xcf, 0xed, 0x8d, 0x5e, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00,
-  0x82, 0x60, 0xb0, 0xd5, 0x9e, 0xe8, 0x81, 0x5d, 0xf0, 0x7a, 0x6f, 0xd7,
-  0x7a, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0x27, 0x8c, 0x71, 0xc2,
-  0x18, 0x15, 0xdc, 0xdd, 0xd5, 0x10, 0xec, 0x05, 0x86, 0xcd, 0x12, 0xa4,
-  0xc5, 0x70, 0x03, 0x28, 0xe4, 0x1e, 0x18, 0xcc, 0x32, 0xe0, 0x44, 0x5a,
-  0x04, 0x76, 0x73, 0x39, 0x17, 0x9f, 0xe1, 0x88, 0x52, 0xd0, 0x39, 0xe2,
-  0x9b, 0x65, 0xc8, 0x09, 0x9e, 0x08, 0x6c, 0xe7, 0x4c, 0x21, 0x3e, 0x16,
-  0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0xc0, 0x90, 0x8f, 0x15,
-  0x41, 0x7c, 0x8a, 0x18, 0x3f, 0x1d, 0x6e, 0x08, 0xc2, 0x0f, 0x0c, 0x66,
-  0x19, 0x74, 0x62, 0x27, 0x02, 0x1b, 0xc6, 0x0e, 0x3e, 0xb3, 0x04, 0x60,
-  0x61, 0x62, 0x47, 0xc4, 0x67, 0x96, 0x00, 0x2c, 0x86, 0x23, 0x60, 0x61,
-  0xec, 0x84, 0x6f, 0x96, 0xa1, 0x27, 0xc0, 0x22, 0xb0, 0x58, 0x20, 0xbb,
-  0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x82, 0x48,
-  0x3e, 0x56, 0x04, 0xf1, 0x29, 0xc2, 0xfd, 0x74, 0xb8, 0x21, 0x60, 0x3f,
-  0x30, 0x98, 0x65, 0xf0, 0x89, 0x9f, 0x08, 0x8c, 0xed, 0x86, 0xf8, 0xcc,
-  0x12, 0x80, 0x85, 0x11, 0x6f, 0x07, 0x9f, 0x59, 0x02, 0xb0, 0x18, 0x68,
-  0x31, 0x34, 0x9d, 0xc0, 0x76, 0x82, 0xf0, 0x09, 0xe1, 0x27, 0xec, 0x82,
-  0x27, 0x2e, 0x18, 0xc6, 0xdc, 0x4e, 0xee, 0xe2, 0x33, 0x1c, 0xc1, 0x0b,
-  0x73, 0x47, 0x7c, 0xb3, 0x0c, 0x61, 0x41, 0x16, 0x81, 0xd1, 0x5d, 0x2f,
-  0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x16, 0x18,
-  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xfc, 0xa7, 0xc3, 0x0d, 0x81, 0xfe,
-  0x81, 0xc1, 0x2c, 0x83, 0x58, 0x8c, 0x45, 0x60, 0x03, 0xdf, 0xc1, 0x67,
-  0x96, 0x00, 0x2d, 0x2c, 0xef, 0x88, 0xf8, 0xcc, 0x12, 0xa0, 0xc5, 0x70,
-  0xc4, 0x39, 0xe8, 0x9d, 0xf0, 0xcd, 0x32, 0x94, 0x05, 0x5a, 0x04, 0x86,
-  0x0e, 0x7b, 0x17, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46,
-  0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x9c, 0x60, 0xa0, 0xc3,
-  0x0d, 0x41, 0x09, 0x06, 0x60, 0x30, 0xcb, 0x60, 0x16, 0x67, 0x11, 0xd8,
-  0xe8, 0x0d, 0xf1, 0x99, 0x25, 0x40, 0x0b, 0x23, 0x50, 0x0f, 0x3e, 0xb3,
-  0x04, 0x68, 0x31, 0xd0, 0x62, 0x68, 0x62, 0x81, 0x8d, 0x05, 0x61, 0x16,
-  0xc2, 0x59, 0x90, 0x07, 0x59, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0xdd, 0x66,
-  0xd4, 0xd1, 0xdd, 0x30, 0x57, 0x6a, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3,
-  0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0x85, 0x83, 0x01, 0x09, 0x06,
-  0xe0, 0x27, 0x83, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68,
-  0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0xc1, 0xf3, 0x83, 0xc1, 0x0a, 0x06, 0x09, 0x11, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0xc1, 0x03, 0x86, 0x01, 0x0b, 0x06, 0x09, 0x11,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x13, 0x86, 0x41, 0x0b, 0x06,
-  0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0xf6, 0x83, 0x01,
-  0x0b, 0x06, 0xea, 0x17, 0xe4, 0x60, 0x90, 0x7f, 0x37, 0x18, 0x8c, 0x26,
-  0x04, 0xc0, 0x68, 0x82, 0x10, 0x9c, 0x30, 0xc6, 0x09, 0x63, 0x54, 0x10,
-  0x82, 0xc1, 0xd5, 0x10, 0xec, 0x05, 0x86, 0xcd, 0x12, 0xa4, 0xc5, 0x40,
-  0x8b, 0xa1, 0x1b, 0x34, 0xa1, 0xfe, 0xc1, 0x4c, 0xd8, 0x84, 0x4d, 0x08,
-  0x68, 0xa1, 0xfe, 0xc1, 0x4d, 0xcc, 0x32, 0xa8, 0x05, 0x5b, 0xa8, 0xc4,
-  0x70, 0xc4, 0x4b, 0xd8, 0xdf, 0xf0, 0x1d, 0x4c, 0x0c, 0x33, 0xdc, 0x10,
-  0xb0, 0x1f, 0x19, 0xd4, 0x10, 0xe8, 0x70, 0x04, 0x4c, 0xe8, 0xdf, 0xf0,
-  0x55, 0x20, 0xe8, 0xc9, 0xc4, 0x30, 0xc3, 0x0d, 0xc1, 0xfb, 0x91, 0x41,
-  0x05, 0x83, 0xce, 0x32, 0xac, 0x05, 0x68, 0x04, 0x37, 0x7b, 0xc3, 0x1c,
-  0xb9, 0x0d, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x55, 0x1d, 0x06,
-  0x61, 0x18, 0xf4, 0xdf, 0x1b, 0x06, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20,
-  0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x0f, 0x1f, 0x06, 0x68, 0x18, 0x1c, 0x44,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x4f, 0x1f, 0x06, 0x69, 0x18,
-  0x30, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x8f, 0x1f, 0x06,
-  0x6a, 0x18, 0x48, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x1b,
-  0x1f, 0x06, 0x69, 0x18, 0x9c, 0x60, 0x10, 0xd8, 0x61, 0x60, 0x83, 0x01,
-  0x1d, 0x06, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0x27, 0x8c, 0x71,
-  0xc2, 0x18, 0x15, 0xf8, 0x60, 0x70, 0x35, 0x04, 0x7b, 0x81, 0x61, 0xb3,
-  0x04, 0xa0, 0x31, 0xdc, 0x70, 0x16, 0xa1, 0x18, 0x80, 0xc1, 0x2c, 0x43,
-  0x5b, 0xb8, 0x45, 0x50, 0x27, 0x18, 0xc4, 0x61, 0x00, 0x17, 0x18, 0x35,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x0b, 0x2b, 0x06, 0x72, 0x18, 0xb8,
-  0x45, 0x0d, 0x06, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xb4, 0x62,
-  0x20, 0x87, 0x41, 0x20, 0x5c, 0x30, 0x4c, 0xa9, 0x60, 0x60, 0x87, 0x01,
-  0x5c, 0x60, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c, 0xb1, 0x18,
-  0xdc, 0x61, 0x00, 0x17, 0x3a, 0x18, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82,
-  0xc1, 0x22, 0x8b, 0xc1, 0x1d, 0x06, 0x81, 0x70, 0xc1, 0x30, 0x17, 0x18,
-  0x75, 0x87, 0x51, 0x37, 0x7f, 0xc3, 0x1c, 0xc9, 0x0d, 0x73, 0xc4, 0x30,
-  0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xd5, 0x2d, 0x06,
-  0xa3, 0x18, 0xfc, 0x60, 0x10, 0x8b, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xe3, 0x8b, 0x81, 0x2a, 0x06, 0x09,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xf3, 0x8b, 0xc1, 0x2a,
-  0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x03, 0x8e,
-  0x01, 0x2b, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1,
-  0xe6, 0x8b, 0xc1, 0x2a, 0x06, 0x69, 0x18, 0x04, 0xb8, 0x18, 0xe0, 0x61,
-  0x60, 0x8b, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x09, 0x63,
-  0x9c, 0x30, 0x46, 0x05, 0xa0, 0x18, 0x5c, 0x0d, 0xc1, 0x5e, 0x60, 0xd8,
-  0x2c, 0x01, 0x68, 0x0c, 0x37, 0xa4, 0x86, 0x38, 0x06, 0x60, 0x30, 0xcb,
-  0xf0, 0x16, 0xa0, 0x11, 0x18, 0x18, 0x06, 0x62, 0x18, 0xc4, 0x67, 0x38,
-  0xe2, 0x35, 0xc6, 0x30, 0x20, 0xbe, 0x59, 0x06, 0xb8, 0x98, 0x8b, 0xc0,
-  0xc8, 0x30, 0x80, 0x8d, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e,
-  0x30, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x82, 0x1d, 0x03,
-  0x1d, 0x6e, 0x08, 0xd4, 0x31, 0x00, 0x83, 0x59, 0x86, 0xb8, 0x90, 0x8b,
-  0xc0, 0x06, 0x36, 0x0c, 0xe0, 0x33, 0x4b, 0x70, 0x17, 0xb6, 0x86, 0x01,
-  0x11, 0x9f, 0x59, 0x82, 0xbb, 0x18, 0x8e, 0xd0, 0x0d, 0x36, 0x0c, 0x84,
-  0x6f, 0x96, 0x81, 0x2e, 0xee, 0x22, 0xb0, 0xdd, 0x68, 0xc3, 0x20, 0x3e,
-  0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0x20, 0x92, 0x8f,
-  0x15, 0x41, 0x7c, 0x8a, 0xb8, 0xc7, 0x40, 0x87, 0x1b, 0x82, 0x7a, 0x0c,
-  0xc0, 0x60, 0x96, 0xa1, 0x2e, 0xec, 0x22, 0xb0, 0x3a, 0x0c, 0x86, 0xf8,
-  0xcc, 0x12, 0xdc, 0x85, 0x11, 0x78, 0x18, 0xc0, 0x67, 0x96, 0xe0, 0x2e,
-  0x06, 0x5a, 0x0c, 0x2d, 0x2e, 0x30, 0xb9, 0x20, 0xea, 0x42, 0xb0, 0x0b,
-  0xd5, 0x99, 0x8b, 0x0b, 0x86, 0xb1, 0x3b, 0x0c, 0xf6, 0x30, 0x88, 0xcf,
-  0x70, 0x44, 0x79, 0xf0, 0x61, 0x40, 0x7c, 0xb3, 0x0c, 0x78, 0xb1, 0x17,
-  0x81, 0xf5, 0x61, 0x60, 0x1e, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3,
-  0x5c, 0x60, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0x49,
-  0x06, 0x3a, 0xdc, 0x10, 0x8c, 0x64, 0x00, 0x06, 0xb3, 0x0c, 0x79, 0xa1,
-  0x17, 0x81, 0x0d, 0xa5, 0x18, 0xc0, 0x67, 0x96, 0xe0, 0x2f, 0x4c, 0x14,
-  0x03, 0x22, 0x3e, 0xb3, 0x04, 0x7f, 0x31, 0x1c, 0x01, 0x1f, 0xa3, 0x18,
-  0x08, 0xdf, 0x2c, 0x03, 0x5f, 0xfc, 0x45, 0x60, 0xf1, 0x41, 0x8a, 0x41,
-  0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65, 0x41, 0x24,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x01, 0x93, 0x81, 0x0e, 0x37, 0x04, 0x2e,
-  0x19, 0x80, 0xc1, 0x2c, 0x43, 0x5f, 0xf8, 0x45, 0x60, 0xac, 0x18, 0x0c,
-  0xf1, 0x99, 0x25, 0xf8, 0x0b, 0x23, 0x62, 0x31, 0x80, 0xcf, 0x2c, 0xc1,
-  0x5f, 0x0c, 0xb4, 0x18, 0x5a, 0x5e, 0x60, 0x7a, 0x41, 0xf4, 0x85, 0xe0,
-  0x17, 0xf6, 0xb3, 0x17, 0x17, 0x0c, 0x73, 0x81, 0x51, 0xb7, 0x19, 0x75,
-  0x7d, 0x18, 0x0c, 0x73, 0xae, 0x37, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x54, 0x61, 0x19, 0xb4, 0x64, 0x90,
-  0x8e, 0xc1, 0x4e, 0x06, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3,
-  0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x0f, 0x5a, 0x06, 0x34, 0x19, 0x24, 0x44, 0x30, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x4f, 0x5a, 0x06, 0x35, 0x19, 0x24, 0x44,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x8f, 0x5a, 0x06, 0x36, 0x19,
-  0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x1b, 0x5a, 0x06,
-  0x35, 0x19, 0xcc, 0x63, 0x10, 0x88, 0x65, 0x20, 0x92, 0x01, 0x58, 0x06,
-  0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0x27, 0x8c, 0x71, 0xc2, 0x18,
-  0x15, 0xa8, 0x64, 0x70, 0x35, 0x04, 0x7b, 0x81, 0x61, 0xb3, 0x04, 0xa0,
-  0x31, 0xd0, 0x62, 0xe8, 0xc6, 0x5a, 0xf0, 0xa7, 0xa0, 0x16, 0x36, 0xd1,
-  0x16, 0xc2, 0x5f, 0xf0, 0xa7, 0xe0, 0x16, 0x86, 0x23, 0x20, 0x19, 0xc0,
-  0x67, 0x96, 0x21, 0x34, 0x46, 0x83, 0x46, 0x86, 0x23, 0x02, 0x90, 0x0c,
-  0x86, 0xef, 0x84, 0x61, 0x86, 0x1b, 0x02, 0x7b, 0x0c, 0xc8, 0xa0, 0x86,
-  0x40, 0x87, 0x23, 0x74, 0x84, 0x24, 0x83, 0xe1, 0xab, 0x40, 0xd0, 0xe3,
-  0x91, 0x61, 0x86, 0x1b, 0x82, 0x7c, 0x0c, 0xc8, 0xa0, 0x82, 0x41, 0x67,
-  0x19, 0x44, 0xe3, 0x36, 0x82, 0xeb, 0xc5, 0x60, 0x98, 0x73, 0xbf, 0x61,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xa0, 0xfa, 0xcb, 0x60, 0x2d, 0x83,
-  0x93, 0x0c, 0xf2, 0x32, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18,
-  0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90, 0x11, 0x03, 0x04,
-  0x00, 0x41, 0x30, 0x78, 0x4c, 0x33, 0x90, 0xcb, 0xe0, 0x20, 0x82, 0x11,
-  0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0x4e, 0x33, 0x98, 0xcb, 0x80, 0x21,
-  0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0x50, 0x33, 0xa0, 0xcb,
-  0x40, 0x22, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0xd8, 0x4c, 0x33,
-  0x98, 0xcb, 0x20, 0x26, 0x83, 0x00, 0x34, 0x03, 0xb0, 0x0c, 0xfc, 0x32,
-  0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x38, 0x61, 0x8c, 0x13, 0xc6,
-  0xa8, 0x00, 0x2d, 0x83, 0xab, 0x21, 0xd8, 0x0b, 0x0c, 0x9b, 0x25, 0xb8,
-  0x8d, 0xe1, 0x86, 0x38, 0x59, 0xcd, 0x00, 0x0c, 0x66, 0x19, 0x48, 0xa3,
-  0x34, 0x82, 0x8a, 0xc9, 0x60, 0x2f, 0x03, 0xb8, 0xc0, 0xa8, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0x58, 0x6c, 0x33, 0xe0, 0xcb, 0x00, 0x0c, 0x7e,
-  0x32, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0xe5, 0x36, 0x03, 0xbe,
-  0x0c, 0x02, 0xe1, 0x82, 0x61, 0x8a, 0x26, 0x03, 0xd0, 0x0c, 0xe0, 0x02,
-  0xa3, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xd9, 0xcd, 0x20, 0x34,
-  0x03, 0x3d, 0x21, 0xcb, 0x60, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x16,
-  0xde, 0x0c, 0x42, 0x33, 0x08, 0x84, 0x0b, 0x86, 0xb9, 0xc0, 0xa8, 0x3b,
-  0x8c, 0xba, 0x7e, 0x0c, 0x86, 0x39, 0x17, 0x0c, 0x86, 0x39, 0x62, 0x98,
-  0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x2a, 0x3c, 0x83,
-  0xd6, 0x0c, 0xd2, 0x32, 0xd8, 0xcd, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13,
-  0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x41, 0xcf, 0x80, 0x36, 0x83, 0x84,
-  0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x49, 0xcf, 0xa0, 0x36,
-  0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x51, 0xcf,
-  0xc0, 0x36, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60,
-  0x43, 0xcf, 0xa0, 0x36, 0x83, 0xb9, 0x0c, 0x02, 0xf1, 0x0c, 0x44, 0x33,
-  0x00, 0xcf, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0xe0, 0x84, 0x31,
-  0x4e, 0x18, 0xa3, 0x02, 0xd5, 0x0c, 0xae, 0x86, 0x60, 0x2f, 0x30, 0x6c,
-  0x96, 0xe0, 0x36, 0x86, 0x1b, 0x66, 0x85, 0x3d, 0x03, 0x30, 0x98, 0x65,
-  0x30, 0x8d, 0xdb, 0x08, 0x4c, 0x2d, 0x03, 0xb6, 0x0c, 0xe2, 0x33, 0x1c,
-  0x11, 0x0a, 0x6d, 0x19, 0x10, 0xdf, 0x2c, 0xc3, 0x69, 0xa8, 0x46, 0x60,
-  0x6e, 0x19, 0x88, 0x42, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17,
-  0x18, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x61, 0x9f, 0x81,
-  0x0e, 0x37, 0x04, 0xf4, 0x19, 0x80, 0xc1, 0x2c, 0x03, 0x6a, 0xa4, 0x46,
-  0x60, 0x83, 0x5d, 0x06, 0xf0, 0x99, 0x25, 0x70, 0x0d, 0xab, 0xcb, 0x80,
-  0x88, 0xcf, 0x2c, 0x81, 0x6b, 0x0c, 0x47, 0xb0, 0x82, 0x5d, 0x06, 0xc2,
-  0x37, 0xcb, 0xb0, 0x1a, 0xae, 0x11, 0x58, 0x2b, 0xdc, 0x65, 0x10, 0x1f,
-  0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0x59, 0x10, 0xc9, 0xc7,
-  0x8a, 0x20, 0x3e, 0x45, 0x84, 0x68, 0xa0, 0xc3, 0x0d, 0xc1, 0x7f, 0x06,
-  0x60, 0x30, 0xcb, 0xc0, 0x1a, 0xad, 0x11, 0xd8, 0x5f, 0x06, 0x43, 0x7c,
-  0x66, 0x09, 0x5c, 0xc3, 0x08, 0xd1, 0x0c, 0xe0, 0x33, 0x4b, 0xe0, 0x1a,
-  0x03, 0x2d, 0x86, 0x86, 0x1a, 0x58, 0x6a, 0x10, 0xac, 0x21, 0xb4, 0x86,
-  0x5b, 0xa8, 0xc6, 0x05, 0xc3, 0x58, 0x68, 0x06, 0xa5, 0x19, 0xc4, 0x67,
-  0x38, 0xe2, 0x5d, 0x4c, 0x33, 0x20, 0xbe, 0x59, 0x86, 0xd7, 0x90, 0x8d,
-  0xc0, 0x4e, 0x33, 0x80, 0x97, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61,
-  0x2e, 0x30, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xe2, 0x45,
-  0x03, 0x1d, 0x6e, 0x08, 0x5a, 0x34, 0x00, 0x83, 0x59, 0x06, 0xd8, 0x88,
-  0x8d, 0xc0, 0x86, 0xd7, 0x0c, 0xe0, 0x33, 0x4b, 0x60, 0x1b, 0xc6, 0x9a,
-  0x01, 0x11, 0x9f, 0x59, 0x02, 0xdb, 0x18, 0x8e, 0xd0, 0x97, 0xd6, 0x0c,
-  0x84, 0x6f, 0x96, 0x61, 0x36, 0x6c, 0x23, 0xb0, 0x7d, 0x71, 0xcd, 0x20,
-  0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0x20, 0x92,
-  0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xd0, 0xd1, 0x40, 0x87, 0x1b, 0x02, 0x1c,
-  0x0d, 0xc0, 0x60, 0x96, 0x81, 0x36, 0x6a, 0x23, 0x30, 0xdb, 0x0c, 0x86,
-  0xf8, 0xcc, 0x12, 0xd8, 0x86, 0x11, 0xbb, 0x19, 0xc0, 0x67, 0x96, 0xc0,
-  0x36, 0x06, 0x5a, 0x0c, 0x0d, 0x36, 0xb0, 0xd8, 0x20, 0x68, 0x43, 0xa8,
-  0x0d, 0x15, 0x93, 0x8d, 0x0b, 0x86, 0xb9, 0xc0, 0xa8, 0xdb, 0x8c, 0xba,
-  0xd3, 0x0c, 0x86, 0x39, 0x5c, 0x0c, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86,
-  0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x6a, 0x4d, 0x83, 0x1b, 0x0d,
-  0xe6, 0x33, 0x28, 0xd3, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60,
-  0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10,
-  0x00, 0x04, 0xc1, 0xe0, 0x91, 0xd3, 0xc0, 0x47, 0x83, 0x84, 0x08, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x99, 0xd3, 0xe0, 0x47, 0x83, 0x84,
-  0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xa1, 0xd3, 0x00, 0x4c,
-  0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x93, 0xd3,
-  0xe0, 0x47, 0x83, 0xfe, 0x0c, 0x02, 0x36, 0x0d, 0x58, 0x34, 0x50, 0xd3,
-  0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0xe0, 0x84, 0x31, 0x4e, 0x18,
-  0xa3, 0x02, 0x1a, 0x0d, 0xae, 0x86, 0x60, 0x2f, 0x30, 0x6c, 0x96, 0xe0,
-  0x36, 0x06, 0x5a, 0x0c, 0xdd, 0x10, 0x0d, 0x18, 0x16, 0x42, 0xc3, 0x26,
-  0x48, 0x43, 0xb0, 0x0d, 0x18, 0x16, 0x4a, 0xc3, 0xc4, 0xa6, 0x45, 0x03,
-  0xf8, 0xcc, 0x32, 0xe0, 0x86, 0x6e, 0xf8, 0xcc, 0x70, 0x44, 0xa0, 0xa2,
-  0xc1, 0xf0, 0x9d, 0x30, 0xcc, 0x70, 0x43, 0x00, 0xa2, 0x01, 0x19, 0xd4,
-  0x10, 0xe8, 0x70, 0x04, 0xd9, 0xb8, 0x68, 0x30, 0x7c, 0x15, 0x08, 0x7a,
-  0x66, 0x33, 0xcc, 0x70, 0x43, 0x30, 0xa2, 0x01, 0x19, 0x54, 0x30, 0xe8,
-  0x2c, 0x43, 0x6e, 0xb8, 0x47, 0x70, 0xe7, 0x19, 0x0c, 0x73, 0xf8, 0x18,
-  0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x55, 0xaa, 0x06, 0x75,
-  0x1a, 0xc4, 0x68, 0x30, 0xaa, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08,
-  0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0xc1, 0x03, 0xab, 0x01, 0x9f, 0x06, 0x07, 0x11,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x13, 0xab, 0x41, 0x9f, 0x06,
-  0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x23, 0xab, 0x81,
-  0x9f, 0x06, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x06,
-  0xab, 0x41, 0x9f, 0x06, 0x3b, 0x1a, 0x04, 0xaa, 0x1a, 0xa8, 0x69, 0x80,
-  0xaa, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x09, 0x63, 0x9c,
-  0x30, 0x46, 0x05, 0x72, 0x1a, 0x5c, 0x0d, 0xc1, 0x5e, 0x60, 0xd8, 0x2c,
-  0x81, 0x7b, 0x0c, 0x37, 0xec, 0x4d, 0xad, 0x06, 0x60, 0x30, 0xcb, 0xb0,
-  0x1b, 0xbc, 0x11, 0xd4, 0x8e, 0x06, 0xa5, 0x1a, 0xc0, 0x05, 0x46, 0x8d,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x02, 0xae, 0x81, 0xa9, 0x06, 0x60,
-  0x90, 0xa6, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c, 0xe1, 0x1a,
-  0x98, 0x6a, 0x10, 0x08, 0x17, 0x0c, 0x53, 0x3e, 0x1a, 0xa8, 0x6a, 0x00,
-  0x17, 0x18, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x4b, 0xb9, 0x06,
-  0xab, 0x1a, 0x90, 0x8e, 0x9b, 0x06, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0xb0, 0x98, 0x6b, 0xb0, 0xaa, 0x41, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x46,
-  0xdd, 0x61, 0xd4, 0x9d, 0x68, 0x30, 0xcc, 0xe1, 0x64, 0x30, 0xcc, 0x11,
-  0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x54, 0xeb,
-  0x1a, 0xdc, 0x6a, 0x30, 0xa7, 0x41, 0xb9, 0x06, 0xa3, 0x09, 0x01, 0x30,
-  0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24,
-  0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x8f, 0xbc, 0x06, 0xbe, 0x1a,
-  0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xcf, 0xbc, 0x06,
-  0xbf, 0x1a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x0f,
-  0xbd, 0x06, 0xe0, 0x1a, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08,
-  0x06, 0x9b, 0xbc, 0x06, 0xbf, 0x1a, 0xf4, 0x69, 0x10, 0xb0, 0x6b, 0xc0,
-  0xaa, 0x81, 0xba, 0x06, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0x27,
-  0x8c, 0x71, 0xc2, 0x18, 0x15, 0xd0, 0x6a, 0x70, 0x35, 0x04, 0x7b, 0x81,
-  0x61, 0xb3, 0x04, 0xee, 0x31, 0xdc, 0xd0, 0x3b, 0xf6, 0x1a, 0x80, 0xc1,
-  0x2c, 0x43, 0x6f, 0xb8, 0x47, 0x60, 0x74, 0x1a, 0xd8, 0x69, 0x10, 0x9f,
-  0xe1, 0x88, 0x50, 0xb8, 0xd3, 0x80, 0xf8, 0x66, 0x19, 0x7c, 0x23, 0x3c,
-  0x02, 0xc3, 0xd3, 0x40, 0x14, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86,
-  0xb9, 0xc0, 0x28, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x90,
-  0x0d, 0x74, 0xb8, 0x21, 0xf0, 0xd7, 0x00, 0x0c, 0x66, 0x19, 0x7e, 0x03,
-  0x3c, 0x02, 0x1b, 0x40, 0x35, 0x80, 0xcf, 0x2c, 0x41, 0x79, 0xd8, 0x9f,
-  0x06, 0x44, 0x7c, 0x66, 0x09, 0xca, 0x63, 0x38, 0x82, 0x15, 0x40, 0x35,
-  0x10, 0xbe, 0x59, 0x06, 0xf1, 0x28, 0x8f, 0xc0, 0x5a, 0x21, 0x54, 0x83,
-  0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x82, 0x48,
-  0x3e, 0x56, 0x04, 0xf1, 0x29, 0x62, 0x65, 0x03, 0x1d, 0x6e, 0x08, 0x52,
-  0x36, 0x00, 0x83, 0x59, 0x86, 0xf1, 0x20, 0x8f, 0xc0, 0x52, 0x35, 0x18,
-  0xe2, 0x33, 0x4b, 0x50, 0x1e, 0x46, 0xb0, 0x6a, 0x00, 0x9f, 0x59, 0x82,
-  0xf2, 0x18, 0x68, 0x31, 0xb4, 0xdf, 0xc0, 0xc0, 0x83, 0x18, 0x0f, 0x81,
-  0x3c, 0xdc, 0x22, 0x3c, 0x2e, 0x18, 0xc6, 0x56, 0x35, 0x78, 0xd5, 0x20,
-  0x3e, 0xc3, 0x11, 0xf9, 0x03, 0xab, 0x01, 0xf1, 0xcd, 0x32, 0x98, 0x47,
-  0x7a, 0x04, 0x16, 0xab, 0x81, 0xfe, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17,
-  0x0c, 0x73, 0x81, 0x51, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11,
-  0x39, 0x1b, 0xe8, 0x70, 0x43, 0x70, 0xb3, 0x01, 0x18, 0xcc, 0x32, 0x9c,
-  0x07, 0x7a, 0x04, 0x36, 0xe4, 0x6a, 0x00, 0x9f, 0x59, 0x82, 0xf6, 0x30,
-  0x5b, 0x0d, 0x88, 0xf8, 0xcc, 0x12, 0xb4, 0xc7, 0x70, 0x04, 0x09, 0xdd,
-  0x6a, 0x20, 0x7c, 0xb3, 0x0c, 0xea, 0xd1, 0x1e, 0x81, 0x95, 0x10, 0xae,
-  0x06, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05,
-  0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0xd9, 0x06, 0x3a, 0xdc, 0x10,
-  0x88, 0x6d, 0x00, 0x06, 0xb3, 0x0c, 0xeb, 0xc1, 0x1e, 0x81, 0x81, 0x6b,
-  0x30, 0xc4, 0x67, 0x96, 0xa0, 0x3d, 0x8c, 0x28, 0xd7, 0x00, 0x3e, 0xb3,
-  0x04, 0xed, 0x31, 0xd0, 0x62, 0x68, 0xe7, 0x81, 0xa1, 0x07, 0xb1, 0x1e,
-  0x02, 0x7b, 0xf8, 0x60, 0x90, 0x1e, 0x17, 0x0c, 0x73, 0x81, 0x51, 0xb7,
-  0x19, 0x75, 0xb1, 0x1a, 0x0c, 0x73, 0xe2, 0x19, 0x0c, 0x73, 0xc4, 0x30,
-  0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x55, 0xdd, 0x06,
-  0x61, 0x1b, 0xf4, 0x6b, 0xf0, 0xb6, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xc3, 0xb7, 0x01, 0xda, 0x06, 0x09,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xd3, 0xb7, 0x41, 0xda,
-  0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xe3, 0xb7,
-  0x81, 0xda, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1,
-  0xc6, 0xb7, 0x41, 0xda, 0x06, 0x27, 0x1b, 0x04, 0x76, 0x1b, 0xd8, 0x6c,
-  0x40, 0xb7, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x09, 0x63,
-  0x9c, 0x30, 0x46, 0x05, 0x3e, 0x1b, 0x5c, 0x0d, 0xc1, 0x5e, 0x60, 0xd8,
-  0x2c, 0x81, 0x7b, 0x0c, 0xb4, 0x18, 0xba, 0x91, 0x1b, 0x64, 0x2e, 0xe0,
-  0x86, 0x4d, 0xec, 0x86, 0xd0, 0x1e, 0x64, 0x2e, 0xf0, 0xc6, 0x2c, 0xc3,
-  0x7b, 0xc4, 0xc7, 0x19, 0x0d, 0x47, 0xac, 0xd1, 0xcc, 0x06, 0xc3, 0x77,
-  0x6c, 0x34, 0xcc, 0x70, 0x43, 0x90, 0xb2, 0x01, 0x19, 0xd4, 0x10, 0xe8,
-  0x70, 0x84, 0x1b, 0xdd, 0x6c, 0x30, 0x7c, 0x15, 0x08, 0x7a, 0x70, 0x34,
-  0xcc, 0x70, 0x43, 0xc0, 0xb2, 0x01, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0x03,
-  0x7c, 0x94, 0x48, 0x70, 0xf0, 0x1a, 0x0c, 0x73, 0x21, 0x1a, 0x0c, 0x33,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x95, 0xec, 0x06, 0x7e, 0x1b, 0xe8,
-  0x6c, 0xc0, 0xba, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68,
-  0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0xc1, 0x93, 0xbb, 0x41, 0xe9, 0x06, 0x07, 0x11, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0xc1, 0xa3, 0xbb, 0x81, 0xe9, 0x06, 0x0c, 0x11,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xb3, 0xbb, 0xc1, 0xe9, 0x06,
-  0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x96, 0xbb, 0x81,
-  0xe9, 0x06, 0x64, 0x1b, 0x04, 0xb3, 0x1b, 0xcc, 0x6d, 0x10, 0xbb, 0xc1,
-  0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x09, 0x63, 0x9c, 0x30, 0x46,
-  0x05, 0x7b, 0x1b, 0x5c, 0x0d, 0xc1, 0x5e, 0x60, 0xd8, 0x2c, 0x41, 0x89,
-  0x0c, 0x37, 0x90, 0x92, 0xef, 0x06, 0x60, 0x30, 0xcb, 0x20, 0x1f, 0xf3,
-  0x11, 0x14, 0xd9, 0x06, 0xae, 0x1b, 0xc0, 0x05, 0x46, 0x8d, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0xc1, 0x92, 0xbe, 0xc1, 0xeb, 0x06, 0xaa, 0x24, 0xb7,
-  0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c, 0xea, 0x1b, 0xbc, 0x6e,
-  0x10, 0x08, 0x17, 0x0c, 0x53, 0x67, 0x1b, 0xcc, 0x6e, 0x00, 0x17, 0x18,
-  0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x8b, 0xfb, 0x06, 0xb4, 0x1b,
-  0xb8, 0xd2, 0xdd, 0x06, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xbc,
-  0x6f, 0x40, 0xbb, 0x41, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0xdd, 0x61,
-  0xd4, 0xc1, 0x6c, 0x30, 0xcc, 0x85, 0x69, 0x30, 0xcc, 0x11, 0xc3, 0x1c,
-  0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x54, 0xf4, 0x1b, 0x80,
-  0x6f, 0xc0, 0xb7, 0x81, 0xfb, 0x06, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20,
-  0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0xcf, 0xfe, 0x06, 0xe7, 0x1b, 0x24, 0x44,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x0f, 0xff, 0x06, 0xe8, 0x1b,
-  0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x4f, 0xff, 0x06,
-  0xe9, 0x1b, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0xdb,
-  0xfe, 0x06, 0xe8, 0x1b, 0x98, 0x6e, 0x10, 0xd4, 0x6f, 0x50, 0xbb, 0xc1,
-  0xfc, 0x06, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0x27, 0x8c, 0x71,
-  0xc2, 0x18, 0x15, 0xf4, 0x6e, 0x70, 0x35, 0x04, 0x7b, 0x81, 0x61, 0xb3,
-  0x04, 0x25, 0x32, 0xdc, 0x60, 0x4e, 0xff, 0x1b, 0x80, 0xc1, 0x2c, 0x03,
-  0x7d, 0x94, 0x48, 0x60, 0x7d, 0x1b, 0xfc, 0x6d, 0x10, 0x9f, 0xe1, 0x88,
-  0x75, 0x02, 0xdd, 0x80, 0xf8, 0x66, 0x19, 0xea, 0x03, 0x3f, 0x02, 0x0b,
-  0xdd, 0x80, 0x9d, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0,
-  0x28, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x14, 0x0e, 0x74,
-  0xb8, 0x21, 0x38, 0xe1, 0x00, 0x0c, 0x66, 0x19, 0xec, 0xe3, 0x3e, 0x02,
-  0x1b, 0x52, 0x37, 0x80, 0xcf, 0x2c, 0x01, 0x7f, 0x18, 0xea, 0x06, 0x44,
-  0x7c, 0x66, 0x09, 0xf8, 0x63, 0x38, 0xc2, 0x9e, 0x52, 0x37, 0x10, 0xbe,
-  0x59, 0x86, 0xfc, 0xe0, 0x8f, 0xc0, 0xee, 0x49, 0x75, 0x83, 0xf8, 0x58,
-  0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x82, 0x48, 0x3e, 0x56,
-  0x04, 0xf1, 0x29, 0x82, 0x86, 0x03, 0x1d, 0x6e, 0x08, 0x64, 0x38, 0x00,
-  0x83, 0x59, 0x06, 0xfd, 0xd8, 0x8f, 0xc0, 0x64, 0x37, 0x18, 0xe2, 0x33,
-  0x4b, 0xc0, 0x1f, 0x46, 0xd4, 0x6e, 0x00, 0x9f, 0x59, 0x02, 0xfe, 0x18,
-  0x68, 0x31, 0x34, 0xfb, 0xc0, 0xee, 0x83, 0xd0, 0x0f, 0x61, 0x3f, 0x44,
-  0x34, 0xc0, 0x8f, 0x0b, 0x86, 0x31, 0xda, 0x0d, 0x70, 0x37, 0x88, 0xcf,
-  0x70, 0xc4, 0x48, 0xe5, 0x6e, 0x40, 0x7c, 0xb3, 0x0c, 0xfd, 0x01, 0x22,
-  0x81, 0xe9, 0x6e, 0x40, 0x52, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3,
-  0x5c, 0x60, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84, 0x18,
-  0x07, 0x3a, 0xdc, 0x10, 0x80, 0x71, 0x00, 0x06, 0xb3, 0x0c, 0xfe, 0xf1,
-  0x1f, 0x81, 0x0d, 0xe2, 0x1b, 0xc0, 0x67, 0x96, 0x80, 0x44, 0xec, 0x77,
-  0x03, 0x22, 0x3e, 0xb3, 0x04, 0x24, 0x32, 0x1c, 0xe1, 0x52, 0xe0, 0x1b,
-  0x08, 0xdf, 0x2c, 0x43, 0x88, 0x90, 0x48, 0x60, 0x2f, 0x15, 0xbe, 0x41,
-  0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65, 0x41, 0x24,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xd1, 0xc6, 0x81, 0x0e, 0x37, 0x04, 0x6b,
-  0x1c, 0x80, 0xc1, 0x2c, 0x83, 0x88, 0x8c, 0x48, 0x60, 0xe9, 0x1b, 0x0c,
-  0xf1, 0x99, 0x25, 0x20, 0x11, 0x23, 0xdc, 0x37, 0x80, 0xcf, 0x2c, 0x01,
-  0x89, 0x0c, 0xb4, 0x18, 0x9a, 0x7f, 0x60, 0xff, 0x41, 0x88, 0x88, 0x30,
-  0x22, 0x72, 0x1a, 0x80, 0xc8, 0x05, 0xc3, 0x5c, 0x60, 0xd4, 0x6d, 0x46,
-  0x9d, 0xee, 0x06, 0xc3, 0xdc, 0xba, 0x06, 0xc3, 0x1c, 0x31, 0xcc, 0x11,
-  0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0xe5, 0xc7, 0x81, 0x1a,
-  0x07, 0x26, 0x1c, 0xe0, 0x71, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42,
-  0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0xf0, 0x94, 0x72, 0x10, 0xc7, 0x41, 0x42, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0x98, 0x72, 0x20, 0xc7, 0x41,
-  0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0x9c, 0x72, 0x30,
-  0xc7, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0x95,
-  0x72, 0x20, 0xc7, 0x01, 0x0c, 0x07, 0xc1, 0x1f, 0x07, 0x3f, 0x1c, 0xf4,
-  0x71, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x70, 0xc2, 0x18, 0x27,
-  0x8c, 0x51, 0xc1, 0x19, 0x07, 0x57, 0x43, 0xb0, 0x17, 0x18, 0x36, 0x4b,
-  0x50, 0x22, 0x03, 0x2d, 0x86, 0x6e, 0xc0, 0x87, 0x1d, 0x0e, 0xef, 0x61,
-  0x13, 0xf2, 0x21, 0x90, 0x88, 0x1d, 0x0e, 0xf3, 0x31, 0xcb, 0x60, 0x22,
-  0x28, 0x02, 0x57, 0xc3, 0x11, 0x75, 0xc5, 0xc3, 0xc1, 0xf0, 0x9d, 0x5d,
-  0x0d, 0x33, 0xdc, 0x10, 0xc8, 0x70, 0x40, 0x06, 0x35, 0x04, 0x3a, 0x1c,
-  0x71, 0x57, 0x60, 0x1c, 0x0c, 0x5f, 0x05, 0x82, 0x5e, 0x5e, 0x0d, 0x33,
-  0xdc, 0x10, 0xd4, 0x70, 0x40, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0x70, 0x22,
-  0x3c, 0x12, 0x5c, 0xfe, 0x06, 0xc3, 0x9c, 0xca, 0x06, 0xc3, 0x8c, 0x18,
-  0x1c, 0x00, 0x08, 0x82, 0x41, 0xb5, 0xcb, 0xc1, 0x29, 0x07, 0x63, 0x1c,
-  0xd4, 0x72, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30,
-  0x08, 0xa3, 0x09, 0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0xf0, 0x88, 0x73, 0xe0, 0xca, 0xc1, 0x41, 0x04, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0xf0, 0x8c, 0x73, 0xf0, 0xca, 0x01, 0x43, 0x04, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0x90, 0x73, 0x00, 0xcb, 0x81, 0x44,
-  0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0x89, 0x73, 0xf0, 0xca,
-  0x41, 0x1b, 0x07, 0x01, 0x2f, 0x07, 0x7c, 0x1c, 0xe8, 0x72, 0x30, 0x9a,
-  0x10, 0x00, 0xa3, 0x09, 0x42, 0x70, 0xc2, 0x18, 0x27, 0x8c, 0x51, 0x01,
-  0x29, 0x07, 0x57, 0x43, 0xb0, 0x17, 0x18, 0x36, 0x4b, 0xc0, 0x23, 0xc3,
-  0x0d, 0xad, 0x75, 0xce, 0x01, 0x18, 0xcc, 0x32, 0xa4, 0x88, 0x8a, 0x04,
-  0xd5, 0xc6, 0xc1, 0x2d, 0x07, 0x70, 0x81, 0x51, 0x23, 0x06, 0x07, 0x00,
-  0x82, 0x60, 0xb0, 0xc8, 0x73, 0x80, 0xcb, 0x01, 0x6d, 0xed, 0x71, 0x30,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xcb, 0x3c, 0x07, 0xb8, 0x1c, 0x04,
-  0xc2, 0x05, 0xc3, 0x14, 0x1c, 0x07, 0xbc, 0x1c, 0xc0, 0x05, 0x46, 0x8d,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x72, 0xcf, 0x41, 0x2f, 0x07, 0xb7,
-  0x05, 0xca, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c, 0xf8, 0x1c,
-  0xf4, 0x72, 0x10, 0x08, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x77, 0x18, 0x75,
-  0x39, 0x1c, 0x0c, 0x73, 0x6a, 0x1b, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c,
-  0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x55, 0x3f, 0x07, 0xe9, 0x1c,
-  0x94, 0x72, 0x70, 0xcf, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1,
-  0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0xc1, 0x43, 0xd2, 0x01, 0x3c, 0x07, 0x09, 0x11, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x53, 0xd2, 0x41, 0x3c, 0x07, 0x09,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x63, 0xd2, 0x81, 0x3c,
-  0x07, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x46, 0xd2,
-  0x41, 0x3c, 0x07, 0xaf, 0x1c, 0x04, 0xfe, 0x1c, 0xf8, 0x72, 0xc0, 0xcf,
-  0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x09, 0x63, 0x9c, 0x30,
-  0x46, 0x05, 0xe6, 0x1c, 0x5c, 0x0d, 0xc1, 0x5e, 0x60, 0xd8, 0x2c, 0x01,
-  0x8f, 0x0c, 0x37, 0xbc, 0x17, 0x4a, 0x07, 0x60, 0x30, 0xcb, 0xb0, 0x22,
-  0x3c, 0x12, 0x98, 0x29, 0x07, 0xa8, 0x1c, 0xc4, 0x67, 0x38, 0xa2, 0xbe,
-  0x52, 0x39, 0x20, 0xbe, 0x59, 0x06, 0x16, 0x79, 0x91, 0xc0, 0x54, 0x39,
-  0xb0, 0xaf, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca,
-  0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x42, 0xa6, 0x03, 0x1d, 0x6e,
-  0x08, 0x60, 0x3a, 0x00, 0x83, 0x59, 0x86, 0x16, 0x71, 0x91, 0xc0, 0x06,
-  0x59, 0x0e, 0xe0, 0x33, 0x4b, 0x30, 0x23, 0x16, 0xcb, 0x01, 0x11, 0x9f,
-  0x59, 0x82, 0x19, 0x19, 0x8e, 0x00, 0x31, 0x59, 0x0e, 0x84, 0x6f, 0x96,
-  0x01, 0x46, 0x66, 0x24, 0xb0, 0x10, 0x9b, 0xe5, 0x20, 0x3e, 0x16, 0x38,
-  0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41,
-  0x7c, 0x8a, 0xe8, 0xe9, 0x40, 0x87, 0x1b, 0x82, 0x9d, 0x0e, 0xc0, 0x60,
-  0x96, 0x21, 0x46, 0x64, 0x24, 0xb0, 0x5d, 0x0e, 0x86, 0xf8, 0xcc, 0x12,
-  0xcc, 0x88, 0x11, 0xbe, 0x1c, 0xc0, 0x67, 0x96, 0x60, 0x46, 0x06, 0x5a,
-  0x0c, 0xad, 0x45, 0x30, 0x17, 0x21, 0x62, 0x44, 0x90, 0x11, 0x1b, 0x0e,
-  0x5e, 0xe4, 0x82, 0x61, 0xac, 0x97, 0x83, 0x70, 0x0e, 0xe2, 0x33, 0x1c,
-  0xc1, 0x62, 0xe2, 0x1c, 0x10, 0xdf, 0x2c, 0x03, 0x8d, 0xdc, 0x48, 0x60,
-  0xe3, 0x1c, 0xb4, 0x58, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17,
-  0x18, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xb1, 0xd6, 0x81,
-  0x0e, 0x37, 0x04, 0x69, 0x1d, 0x80, 0xc1, 0x2c, 0x43, 0x8d, 0xd8, 0x48,
-  0x60, 0xc3, 0x3a, 0x07, 0xf0, 0x99, 0x25, 0xd8, 0x11, 0x43, 0xe7, 0x80,
-  0x88, 0xcf, 0x2c, 0xc1, 0x8e, 0x0c, 0x47, 0xdc, 0x58, 0x3a, 0x07, 0xc2,
-  0x37, 0xcb, 0x80, 0x23, 0x3b, 0x12, 0x18, 0x8e, 0xa9, 0x73, 0x10, 0x1f,
-  0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0x59, 0x10, 0xc9, 0xc7,
-  0x8a, 0x20, 0x3e, 0x45, 0xd8, 0x75, 0xa0, 0xc3, 0x0d, 0x01, 0x5d, 0x07,
-  0x60, 0x30, 0xcb, 0x90, 0x23, 0x3a, 0x12, 0x98, 0x3c, 0x07, 0x43, 0x7c,
-  0x66, 0x09, 0x76, 0xc4, 0x88, 0x7b, 0x0e, 0xe0, 0x33, 0x4b, 0xb0, 0x23,
-  0x03, 0x2d, 0x86, 0x56, 0x23, 0x98, 0x8d, 0x10, 0x39, 0x22, 0xe8, 0x08,
-  0x29, 0x07, 0x37, 0x72, 0xc1, 0x30, 0x17, 0x18, 0x75, 0x9b, 0x51, 0x37,
-  0xce, 0xc1, 0x30, 0x47, 0xbf, 0xc1, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30,
-  0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x50, 0x9d, 0x76, 0x30, 0xd7, 0xc1,
-  0x4b, 0x07, 0xa1, 0x1d, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c,
-  0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x3c, 0xae, 0x1d, 0xe8, 0x75, 0x90, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x3c, 0xaf, 0x1d, 0xec, 0x75, 0x90, 0x10,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x3c, 0xb0, 0x1d, 0xf0, 0x75,
-  0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xae, 0x1d,
-  0xec, 0x75, 0x90, 0xd3, 0x41, 0x80, 0xda, 0x01, 0x5a, 0x07, 0xa6, 0x1d,
-  0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x9c, 0x30, 0xc6, 0x09, 0x63,
-  0x54, 0x00, 0xd7, 0xc1, 0xd5, 0x10, 0xec, 0x05, 0x86, 0xcd, 0x12, 0xf0,
-  0xc8, 0x40, 0x8b, 0xa1, 0x1b, 0x27, 0x82, 0xaa, 0x83, 0x89, 0xd8, 0x44,
-  0x8a, 0x08, 0x3b, 0x82, 0xaa, 0x83, 0x8a, 0xcc, 0x32, 0xf4, 0xc8, 0x8f,
-  0xe4, 0xd9, 0x70, 0x84, 0x19, 0x95, 0x75, 0x30, 0x7c, 0x77, 0x46, 0xc3,
-  0x0c, 0x37, 0x04, 0x3b, 0x1d, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0x80,
-  0x5a, 0x5a, 0x07, 0xc3, 0x57, 0x81, 0xa0, 0x27, 0x6a, 0xc3, 0x0c, 0x37,
-  0x04, 0x3e, 0x1d, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xf8, 0xc8, 0x9c,
-  0x04, 0x27, 0xd2, 0xc1, 0x30, 0x37, 0xc3, 0xc1, 0x30, 0x23, 0x06, 0x07,
-  0x00, 0x82, 0x60, 0x50, 0x91, 0x77, 0x00, 0xdb, 0x01, 0x5b, 0x07, 0xbe,
-  0x1d, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2,
-  0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x3c, 0xeb, 0x1d, 0xdc, 0x76, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x3c, 0xec, 0x1d, 0xe0, 0x76, 0xc0, 0x10, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x3c, 0xed, 0x1d, 0xe4, 0x76, 0x20, 0x11, 0xc1,
-  0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xeb, 0x1d, 0xe0, 0x76, 0x60,
-  0xd7, 0x41, 0x50, 0xde, 0x41, 0x69, 0x07, 0xe3, 0x1d, 0x8c, 0x26, 0x04,
-  0xc0, 0x68, 0x82, 0x10, 0x9c, 0x30, 0xc6, 0x09, 0x63, 0x54, 0xd0, 0xda,
-  0xc1, 0xd5, 0x10, 0xec, 0x05, 0x86, 0xcd, 0x12, 0xcc, 0xc9, 0x70, 0x83,
-  0xad, 0xc1, 0x77, 0x00, 0x06, 0xb3, 0x0c, 0x60, 0x12, 0x26, 0x41, 0xd9,
-  0x75, 0x00, 0xde, 0x01, 0x5c, 0x60, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20,
-  0x18, 0x2c, 0xfb, 0x1d, 0x84, 0x77, 0x50, 0x4a, 0xa4, 0x1d, 0x8c, 0x18,
-  0x1c, 0x00, 0x08, 0x82, 0xc1, 0xc2, 0xdf, 0x41, 0x78, 0x07, 0x81, 0x70,
-  0xc1, 0x30, 0x95, 0xd7, 0x41, 0x79, 0x07, 0x70, 0x81, 0x51, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0xb0, 0x80, 0x78, 0x60, 0xde, 0x01, 0xb8, 0xa5,
-  0x76, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x4b, 0x88, 0x07, 0xe6,
-  0x1d, 0x04, 0xc2, 0x05, 0xc3, 0x5c, 0x60, 0xd4, 0x1d, 0x46, 0x9d, 0x58,
-  0x07, 0xc3, 0xdc, 0x1c, 0x07, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0x65, 0xe2, 0x81, 0x7c, 0x07, 0xae,
-  0x1d, 0x80, 0x78, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a,
-  0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0xf0, 0xb4, 0x78, 0x90, 0xdf, 0x41, 0x42, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0xf0, 0xb8, 0x78, 0xa0, 0xdf, 0x41, 0x42, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0xbc, 0x78, 0xb0, 0xdf, 0x41,
-  0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xb5, 0x78, 0xa0,
-  0xdf, 0x01, 0x6e, 0x07, 0xc1, 0x89, 0x07, 0xe7, 0x1d, 0x94, 0x78, 0x30,
-  0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x70, 0xc2, 0x18, 0x27, 0x8c, 0x51,
-  0xc1, 0x7b, 0x07, 0x57, 0x43, 0xb0, 0x17, 0x18, 0x36, 0x4b, 0x30, 0x27,
-  0xc3, 0x0d, 0xf8, 0x16, 0xe3, 0x01, 0x18, 0xcc, 0x32, 0x88, 0xc9, 0x9c,
-  0x04, 0xf6, 0xda, 0x41, 0x6c, 0x07, 0xf1, 0x19, 0x8e, 0x30, 0x27, 0xd9,
-  0x0e, 0x88, 0x6f, 0x96, 0x61, 0x4c, 0xcc, 0x24, 0xb0, 0xd9, 0x0e, 0xce,
-  0x29, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0xc0,
-  0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xd8, 0xf1, 0x40, 0x87, 0x1b, 0x82,
-  0x1c, 0x0f, 0xc0, 0x60, 0x96, 0x81, 0x4c, 0xca, 0x24, 0xb0, 0x61, 0xb7,
-  0x03, 0xf8, 0xcc, 0x12, 0xa8, 0x89, 0xe9, 0x76, 0x40, 0xc4, 0x67, 0x96,
-  0x40, 0x4d, 0x86, 0x23, 0xe2, 0x69, 0xb7, 0x03, 0xe1, 0x9b, 0x65, 0x38,
-  0x13, 0x35, 0x09, 0x4c, 0x9e, 0x78, 0x3b, 0x88, 0x8f, 0x05, 0x0e, 0x7d,
-  0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f,
-  0x22, 0xcc, 0x3c, 0xd0, 0xe1, 0x86, 0x80, 0xcc, 0x03, 0x30, 0x98, 0x65,
-  0x40, 0x93, 0x34, 0x09, 0x8c, 0xbc, 0x83, 0x21, 0x3e, 0xb3, 0x04, 0x6a,
-  0x62, 0xc4, 0x79, 0x07, 0xf0, 0x99, 0x25, 0x50, 0x93, 0x81, 0x16, 0x43,
-  0x23, 0x13, 0xac, 0x4c, 0x08, 0x34, 0x11, 0xd2, 0x04, 0x3f, 0x03, 0x33,
-  0xb9, 0x60, 0x18, 0x33, 0xef, 0x40, 0xbd, 0x83, 0xf8, 0x0c, 0x47, 0xd4,
-  0xdc, 0x7a, 0x07, 0xc4, 0x37, 0xcb, 0xb0, 0x26, 0x6e, 0x12, 0x18, 0x7b,
-  0x07, 0x36, 0x17, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46,
-  0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xd0, 0x79, 0xa0, 0xc3,
-  0x0d, 0x81, 0x9c, 0x07, 0x60, 0x30, 0xcb, 0xc0, 0x26, 0x6d, 0x12, 0xd8,
-  0x40, 0xdf, 0x01, 0x7c, 0x66, 0x09, 0xe4, 0xc4, 0xe2, 0x3b, 0x20, 0xe2,
-  0x33, 0x4b, 0x20, 0x27, 0xc3, 0x11, 0x60, 0x27, 0xdf, 0x81, 0xf0, 0xcd,
-  0x32, 0xbc, 0x89, 0x9c, 0x04, 0x16, 0x76, 0xf3, 0x1d, 0xc4, 0xc7, 0x02,
-  0x87, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x16, 0x44, 0xf2, 0xb1, 0x22,
-  0x88, 0x4f, 0x11, 0x7f, 0x1e, 0xe8, 0x70, 0x43, 0xd0, 0xe7, 0x01, 0x18,
-  0xcc, 0x32, 0xc0, 0x49, 0x9c, 0x04, 0xb6, 0xdf, 0xc1, 0x10, 0x9f, 0x59,
-  0x02, 0x39, 0x31, 0x02, 0xc4, 0x03, 0xf8, 0xcc, 0x12, 0xc8, 0xc9, 0x40,
-  0x8b, 0xa1, 0xb1, 0x09, 0xd6, 0x26, 0x04, 0x9c, 0x08, 0x71, 0x62, 0xeb,
-  0x81, 0x9b, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0xdd, 0x66, 0xd4, 0xb1, 0x77,
-  0x30, 0xcc, 0xf5, 0x73, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88,
-  0xc1, 0x01, 0x80, 0x20, 0x18, 0x54, 0xb0, 0x1e, 0xf0, 0x79, 0x80, 0xe3,
-  0x81, 0xaa, 0x07, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09,
-  0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0xcf, 0xad, 0x07, 0xa3, 0x1e, 0x24, 0x44, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0x0f, 0xae, 0x07, 0xa4, 0x1e, 0x24, 0x44, 0x30,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x4f, 0xae, 0x07, 0xa5, 0x1e, 0x24,
-  0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0xdb, 0xad, 0x07, 0xa4,
-  0x1e, 0x88, 0x79, 0x10, 0xc4, 0x7a, 0x10, 0xe7, 0xc1, 0xab, 0x07, 0xa3,
-  0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0x27, 0x8c, 0x71, 0xc2, 0x18, 0x15,
-  0xe4, 0x79, 0x70, 0x35, 0x04, 0x7b, 0x81, 0x61, 0xb3, 0x04, 0x73, 0x32,
-  0xd0, 0x62, 0xe8, 0x86, 0x8f, 0xe8, 0xf2, 0xd0, 0x23, 0x36, 0x01, 0x26,
-  0x82, 0x9c, 0xe8, 0xf2, 0x10, 0x26, 0xb3, 0x0c, 0x74, 0x62, 0x27, 0xa2,
-  0x37, 0x1c, 0x51, 0x46, 0x6e, 0x1e, 0x0c, 0xdf, 0x99, 0xd1, 0x30, 0xc3,
-  0x0d, 0x01, 0x99, 0x07, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0xa9, 0x27,
-  0xe7, 0xc1, 0xf0, 0x55, 0x20, 0xe8, 0xad, 0xde, 0x30, 0xc3, 0x0d, 0xc1,
-  0x99, 0x07, 0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c, 0x75, 0xa2, 0x2a, 0xc1,
-  0xad, 0x78, 0x30, 0xcc, 0xf1, 0x74, 0x30, 0xcc, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x54, 0xed, 0x1e, 0xe4, 0x7a, 0x50, 0xe7, 0xc1, 0xb9, 0x07,
-  0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a,
-  0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x0f,
-  0xbd, 0x07, 0xe0, 0x1e, 0x1c, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x4f, 0xbd, 0x07, 0xe1, 0x1e, 0x30, 0x44, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x8f, 0xbd, 0x07, 0xe2, 0x1e, 0x48, 0x44, 0x30, 0x62,
-  0xa0, 0x00, 0x20, 0x08, 0x06, 0x1b, 0xbd, 0x07, 0xe1, 0x1e, 0xfc, 0x79,
-  0x10, 0xb8, 0x7b, 0xe0, 0xea, 0x01, 0xbb, 0x07, 0xa3, 0x09, 0x01, 0x30,
-  0x9a, 0x20, 0x04, 0x27, 0x8c, 0x71, 0xc2, 0x18, 0x15, 0xd8, 0x7a, 0x70,
-  0x35, 0x04, 0x7b, 0x81, 0x61, 0xb3, 0x04, 0xaa, 0x32, 0xdc, 0xf0, 0x7b,
-  0xf9, 0x1e, 0x80, 0xc1, 0x2c, 0xc3, 0x9d, 0xe0, 0x49, 0x50, 0x7f, 0x1e,
-  0xa4, 0x7b, 0x00, 0x17, 0x18, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x0b, 0xc9, 0x07, 0xea, 0x1e, 0x90, 0x52, 0xab, 0x07, 0x23, 0x06, 0x07,
-  0x00, 0x82, 0x60, 0xb0, 0x94, 0x7c, 0xa0, 0xee, 0x41, 0x20, 0x5c, 0x30,
-  0x4c, 0x89, 0x7a, 0xe0, 0xee, 0x01, 0x5c, 0x60, 0xd4, 0x88, 0xc1, 0x01,
-  0x80, 0x20, 0x18, 0x2c, 0x29, 0x1f, 0xbc, 0x7b, 0x90, 0x7e, 0xb2, 0x1e,
-  0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0xa2, 0xf2, 0xc1, 0xbb, 0x07,
-  0x81, 0x70, 0xc1, 0x30, 0x17, 0x18, 0x75, 0x87, 0x51, 0xb7, 0xe6, 0xc1,
-  0x30, 0xc7, 0xd7, 0xc1, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x50, 0xbd, 0x7c, 0xb0, 0xef, 0xc1, 0xad, 0x07,
-  0x29, 0x1f, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c,
-  0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x3c, 0x36, 0x1f, 0x88, 0x7c, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x3c, 0x37, 0x1f, 0x8c, 0x7c, 0x90, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x3c, 0x38, 0x1f, 0x90, 0x7c, 0x90, 0x10,
-  0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x36, 0x1f, 0x8c, 0x7c,
-  0x10, 0xee, 0x41, 0x00, 0xf3, 0x01, 0xbc, 0x07, 0x2e, 0x1f, 0x8c, 0x26,
-  0x04, 0xc0, 0x68, 0x82, 0x10, 0x9c, 0x30, 0xc6, 0x09, 0x63, 0x54, 0x80,
-  0xef, 0xc1, 0xd5, 0x10, 0xec, 0x05, 0x86, 0xcd, 0x12, 0xa8, 0xca, 0x70,
-  0x43, 0x08, 0x06, 0x3a, 0x1f, 0x80, 0xc1, 0x2c, 0x43, 0x9e, 0xa8, 0x4a,
-  0x60, 0xb8, 0x1e, 0xe8, 0x7a, 0x10, 0x9f, 0xe1, 0x88, 0x72, 0xda, 0xf5,
-  0x80, 0xf8, 0x66, 0x19, 0xf4, 0xa4, 0x4f, 0x02, 0xe3, 0xf5, 0xc0, 0x9c,
-  0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x0c,
-  0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0xb2, 0x0f, 0x74, 0xb8, 0x21, 0x10,
-  0xfb, 0x00, 0x0c, 0x66, 0x19, 0xf6, 0x84, 0x4f, 0x02, 0x1b, 0xc8, 0x3d,
-  0x80, 0xcf, 0x2c, 0x41, 0xa8, 0xd8, 0xb8, 0x07, 0x44, 0x7c, 0x66, 0x09,
-  0x42, 0x65, 0x38, 0x02, 0x9e, 0xc8, 0x3d, 0x10, 0xbe, 0x59, 0x06, 0x3f,
-  0x09, 0x95, 0xc0, 0xe2, 0xa9, 0xdc, 0x83, 0xf8, 0x58, 0xe0, 0xd0, 0xe7,
-  0x82, 0x61, 0x2e, 0x30, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29,
-  0xe2, 0xed, 0x03, 0x1d, 0x6e, 0x08, 0xda, 0x3e, 0x00, 0x83, 0x59, 0x86,
-  0x3f, 0x01, 0x95, 0xc0, 0xda, 0x3d, 0x18, 0xe2, 0x33, 0x4b, 0x10, 0x2a,
-  0x46, 0xc0, 0x7b, 0x00, 0x9f, 0x59, 0x82, 0x50, 0x19, 0x68, 0x31, 0xb4,
-  0x3d, 0xc1, 0xf8, 0x84, 0xf8, 0x13, 0x01, 0x54, 0xec, 0x33, 0xe8, 0x93,
-  0x0b, 0x86, 0xb1, 0x77, 0x0f, 0xe6, 0x3d, 0x88, 0xcf, 0x70, 0x84, 0x0f,
-  0x06, 0xf4, 0x1e, 0x10, 0xdf, 0x2c, 0x83, 0xa8, 0x94, 0x4a, 0x60, 0xf5,
-  0x1e, 0xfc, 0x60, 0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05,
-  0x46, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xf4, 0x7d, 0xa0,
-  0xc3, 0x0d, 0xc1, 0xde, 0x07, 0x60, 0x30, 0xcb, 0x30, 0x2a, 0xa4, 0x12,
-  0xd8, 0xd0, 0xef, 0x01, 0x7c, 0x66, 0x09, 0x52, 0xc5, 0xf4, 0x3d, 0x20,
-  0xe2, 0x33, 0x4b, 0x90, 0x2a, 0xc3, 0x11, 0x69, 0x18, 0xec, 0x7b, 0x20,
-  0x7c, 0xb3, 0x0c, 0xa6, 0x92, 0x2a, 0x81, 0xa9, 0x61, 0xc0, 0xef, 0x41,
-  0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65, 0x41, 0x24,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x81, 0xfa, 0x81, 0x0e, 0x37, 0x04, 0xa6,
-  0x1f, 0x80, 0xc1, 0x2c, 0xc3, 0xa9, 0xa0, 0x4a, 0x60, 0x24, 0x1f, 0x0c,
-  0xf1, 0x99, 0x25, 0x48, 0x15, 0x23, 0x52, 0x3e, 0x80, 0xcf, 0x2c, 0x41,
-  0xaa, 0x0c, 0xb4, 0x18, 0xda, 0xa8, 0x60, 0xa4, 0x42, 0x9c, 0x8a, 0x80,
-  0x2a, 0xe8, 0x28, 0x94, 0xca, 0x05, 0xc3, 0x5c, 0x60, 0xd4, 0x6d, 0x46,
-  0x5d, 0xbd, 0x07, 0xc3, 0x9c, 0x89, 0x07, 0xc3, 0x1c, 0x31, 0xcc, 0x11,
-  0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0x95, 0xfb, 0x41, 0xe9,
-  0x07, 0x61, 0x1f, 0xcc, 0x7e, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42,
-  0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0xf0, 0x80, 0x7f, 0xc0, 0xfa, 0x41, 0x42, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0x84, 0x7f, 0xd0, 0xfa, 0x41,
-  0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0x88, 0x7f, 0xe0,
-  0xfa, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0x81,
-  0x7f, 0xd0, 0xfa, 0xc1, 0xda, 0x07, 0x81, 0xee, 0x07, 0x7a, 0x1f, 0xe0,
-  0x7e, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x70, 0xc2, 0x18, 0x27,
-  0x8c, 0x51, 0x81, 0xe8, 0x07, 0x57, 0x43, 0xb0, 0x17, 0x18, 0x36, 0x4b,
-  0xa0, 0x2a, 0x03, 0x2d, 0x86, 0x6e, 0xd4, 0x09, 0xbb, 0x0f, 0x74, 0x62,
-  0x13, 0x77, 0x22, 0xa4, 0x0a, 0xbb, 0x0f, 0x78, 0x62, 0xae, 0x18, 0xe0,
-  0x7d, 0x00, 0x9f, 0x59, 0x86, 0x55, 0x69, 0x15, 0x56, 0x0c, 0x86, 0x23,
-  0x60, 0x31, 0xc0, 0xfb, 0x60, 0xf8, 0x2e, 0x16, 0x83, 0x61, 0x86, 0x1b,
-  0x02, 0xb7, 0x0f, 0xc8, 0xa0, 0x86, 0x40, 0x87, 0x23, 0x0a, 0xbe, 0x0f,
-  0x86, 0xaf, 0x02, 0x41, 0xef, 0x18, 0x66, 0xb8, 0x21, 0x88, 0xfb, 0x80,
-  0x0c, 0x2a, 0x18, 0x74, 0x96, 0x81, 0x55, 0xc2, 0x25, 0xb8, 0x9a, 0x0f,
-  0x86, 0x39, 0x33, 0x0f, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83,
-  0xea, 0xfe, 0x83, 0xf1, 0x0f, 0xfe, 0x3e, 0x88, 0xff, 0x60, 0x34, 0x21,
-  0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1,
-  0x88, 0x43, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xf1, 0xff, 0x40,
-  0xfd, 0x83, 0x83, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xf9,
-  0xff, 0x60, 0xfd, 0x03, 0x86, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
-  0xe0, 0x01, 0x41, 0x81, 0xfd, 0x03, 0x89, 0x08, 0x46, 0x0c, 0x14, 0x00,
-  0x04, 0xc1, 0x60, 0xf3, 0xff, 0x60, 0xfd, 0x83, 0xd4, 0x0f, 0x02, 0xfc,
-  0x0f, 0x70, 0x3f, 0xb0, 0xff, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84,
-  0xe0, 0x84, 0x31, 0x4e, 0x18, 0xa3, 0x02, 0xf0, 0x0f, 0xae, 0x86, 0x60,
-  0x2f, 0x30, 0x6c, 0x96, 0x20, 0x5c, 0x86, 0x1b, 0xd2, 0x31, 0x18, 0x41,
-  0x01, 0x0c, 0x66, 0x19, 0x5c, 0xe5, 0x55, 0x82, 0x4a, 0xfd, 0x60, 0xfe,
-  0x03, 0xb8, 0xc0, 0xa8, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0x5c,
-  0x50, 0xa0, 0xff, 0xe0, 0x1d, 0x83, 0xdb, 0x0f, 0x46, 0x0c, 0x0e, 0x00,
-  0x04, 0xc1, 0x60, 0x79, 0x41, 0x81, 0xfe, 0x83, 0x40, 0xb8, 0x60, 0x98,
-  0x62, 0xfd, 0x00, 0xff, 0x03, 0xb8, 0xc0, 0xa8, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0x58, 0x66, 0x50, 0xc8, 0xff, 0xa0, 0x0c, 0x78, 0x3f, 0x18,
-  0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x85, 0x06, 0x85, 0xfc, 0x0f, 0x02,
-  0xe1, 0x82, 0x61, 0x2e, 0x30, 0xea, 0x0e, 0xa3, 0xae, 0xee, 0x83, 0x61,
-  0xce, 0xd4, 0x83, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0xa0, 0xca, 0x41, 0xa1, 0x04, 0x85, 0xf0, 0x0f, 0x66,
-  0x50, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84,
-  0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0x78, 0xc0, 0x50, 0x60, 0x41, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0x78, 0xc2, 0x50, 0x68, 0x41, 0x21, 0x21, 0x82, 0x11, 0x03,
-  0x04, 0x00, 0x41, 0x30, 0x78, 0xc4, 0x50, 0x70, 0x41, 0x21, 0x21, 0x82,
-  0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0xd8, 0xc0, 0x50, 0x68, 0x41, 0x61,
-  0xfd, 0x83, 0x40, 0x07, 0x05, 0xfd, 0x0f, 0x70, 0x50, 0x18, 0x4d, 0x08,
-  0x80, 0xd1, 0x04, 0x21, 0x38, 0x61, 0x8c, 0x13, 0xc6, 0xa8, 0x40, 0x04,
-  0x85, 0xab, 0x21, 0xd8, 0x0b, 0x0c, 0x9b, 0x25, 0x08, 0x97, 0xe1, 0x86,
-  0x95, 0x0c, 0xc8, 0x50, 0x00, 0x83, 0x59, 0x06, 0x58, 0x09, 0x97, 0xc0,
-  0xc4, 0x3f, 0x20, 0xff, 0x20, 0x3e, 0xc3, 0x11, 0x30, 0x19, 0x94, 0x7f,
-  0x40, 0x7c, 0xb3, 0x0c, 0xb1, 0x42, 0x2b, 0x81, 0x99, 0x7f, 0x10, 0x93,
-  0x41, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65, 0x81,
-  0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xe1, 0x86, 0x82, 0x0e, 0x37, 0x04,
-  0x6c, 0x28, 0x80, 0xc1, 0x2c, 0x83, 0xac, 0xcc, 0x4a, 0x60, 0x83, 0xfb,
-  0x07, 0xf0, 0x99, 0x25, 0xc0, 0x15, 0x6b, 0xff, 0x80, 0x88, 0xcf, 0x2c,
-  0x01, 0xae, 0x0c, 0x47, 0xec, 0x64, 0xe0, 0xfe, 0x81, 0xf0, 0xcd, 0x32,
-  0xd4, 0x0a, 0xae, 0x04, 0xc6, 0x93, 0xc1, 0xfb, 0x07, 0xf1, 0xb1, 0xc0,
-  0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08,
-  0xe2, 0x53, 0x44, 0x1e, 0x0a, 0x3a, 0xdc, 0x10, 0xdc, 0xa1, 0x00, 0x06,
-  0xb3, 0x0c, 0xb6, 0x72, 0x2b, 0x81, 0xdd, 0x7f, 0x30, 0xc4, 0x67, 0x96,
-  0x00, 0x57, 0x8c, 0xd0, 0xff, 0x00, 0x3e, 0xb3, 0x04, 0xb8, 0x32, 0xd0,
-  0x62, 0x68, 0xb2, 0x82, 0xcd, 0x0a, 0x61, 0x2b, 0xc2, 0xad, 0xb0, 0xaa,
-  0x40, 0x2b, 0x17, 0x0c, 0x63, 0xf9, 0x1f, 0xf4, 0x7f, 0x10, 0x9f, 0xe1,
-  0x08, 0x5d, 0xf0, 0xff, 0x80, 0xf8, 0x66, 0x19, 0x72, 0x85, 0x57, 0x02,
-  0xfb, 0xff, 0x60, 0x17, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9,
-  0xc0, 0x28, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x53, 0x14,
-  0x74, 0xb8, 0x21, 0x28, 0x45, 0x01, 0x0c, 0x66, 0x19, 0x74, 0x65, 0x57,
-  0x02, 0x1b, 0x4e, 0x50, 0x80, 0xcf, 0x2c, 0x01, 0xb8, 0x18, 0x09, 0x0a,
-  0x44, 0x7c, 0x66, 0x09, 0xc0, 0x65, 0x38, 0xa2, 0x1c, 0x4a, 0x50, 0x10,
-  0xbe, 0x59, 0x86, 0x5e, 0x01, 0x97, 0xc0, 0xcc, 0xc1, 0x04, 0x85, 0xf8,
-  0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x82, 0x48, 0x3e,
-  0x56, 0x04, 0xf1, 0x29, 0x42, 0x16, 0x05, 0x1d, 0x6e, 0x08, 0x60, 0x51,
-  0x00, 0x83, 0x59, 0x06, 0x5f, 0xf9, 0x95, 0xc0, 0x5c, 0x50, 0x18, 0xe2,
-  0x33, 0x4b, 0x00, 0x2e, 0x46, 0xcc, 0xa0, 0x00, 0x9f, 0x59, 0x02, 0x70,
-  0x19, 0x68, 0x31, 0x34, 0x5d, 0xc1, 0x76, 0x85, 0xf0, 0x15, 0xe1, 0x57,
-  0xc0, 0x83, 0x57, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x6e, 0x33, 0xea, 0xfe,
-  0x3f, 0x18, 0xe6, 0x60, 0x3e, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xaa, 0x71, 0x14, 0x5e, 0x51, 0x58,
-  0x43, 0xa1, 0x17, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1,
-  0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x83, 0x47, 0x1d, 0x05, 0x5b, 0x14, 0x12, 0x22, 0x18, 0x31,
-  0x40, 0x00, 0x10, 0x04, 0x83, 0x67, 0x1d, 0x85, 0x5b, 0x14, 0x12, 0x22,
-  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x87, 0x1d, 0x05, 0x5c, 0x14,
-  0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x4d, 0x1d, 0x85,
-  0x5b, 0x14, 0xea, 0x50, 0x08, 0xc8, 0x51, 0x20, 0x45, 0x41, 0x1c, 0x85,
-  0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0x13, 0xc6, 0x38, 0x61, 0x8c,
-  0x0a, 0x58, 0x51, 0xb8, 0x1a, 0x82, 0xbd, 0xc0, 0xb0, 0x59, 0x82, 0x70,
-  0x19, 0x68, 0x31, 0x74, 0x83, 0x55, 0x40, 0x92, 0x58, 0x15, 0x9b, 0x70,
-  0x15, 0x01, 0x5c, 0x40, 0x92, 0x78, 0x95, 0x59, 0x06, 0x71, 0x21, 0x97,
-  0xda, 0x0c, 0x86, 0x23, 0x74, 0x33, 0x08, 0x45, 0x61, 0xf8, 0x6e, 0x37,
-  0x83, 0x61, 0x86, 0x1b, 0x82, 0x3b, 0x14, 0xc8, 0xa0, 0x86, 0x40, 0x87,
-  0x23, 0x5c, 0xa2, 0x14, 0x85, 0xe1, 0xab, 0x40, 0xd0, 0x83, 0x89, 0x61,
-  0x86, 0x1b, 0x02, 0x3d, 0x14, 0xc8, 0xa0, 0x82, 0x41, 0x67, 0x19, 0xc6,
-  0x05, 0x5f, 0x82, 0xf3, 0x41, 0x61, 0x98, 0x7b, 0xfb, 0x60, 0x98, 0x11,
-  0x83, 0x03, 0x00, 0x41, 0x30, 0xa8, 0x40, 0x52, 0x60, 0x47, 0x01, 0x15,
-  0x05, 0x7d, 0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13,
-  0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x38, 0x64, 0xc4, 0x00, 0x01, 0x40,
-  0x10, 0x0c, 0x9e, 0x93, 0x14, 0xe6, 0x51, 0x38, 0x88, 0x60, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0x1e, 0x94, 0x14, 0xe8, 0x51, 0x60, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x9e, 0x94, 0x14, 0xea, 0x51, 0x90,
-  0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0x93, 0x14, 0xe8,
-  0x51, 0x90, 0x45, 0x21, 0x08, 0x49, 0x21, 0x1c, 0x85, 0x7f, 0x14, 0x46,
-  0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x4e, 0x18, 0xe3, 0x84, 0x31, 0x2a,
-  0x48, 0x47, 0xe1, 0x6a, 0x08, 0xf6, 0x02, 0xc3, 0x66, 0x09, 0xf0, 0x65,
-  0xb8, 0x41, 0x3e, 0x03, 0x96, 0x14, 0xc0, 0x60, 0x96, 0xa1, 0x5c, 0xcc,
-  0x25, 0x28, 0x59, 0x14, 0xf8, 0x51, 0x80, 0x0b, 0x8c, 0x1a, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x83, 0xe5, 0x26, 0x85, 0x7e, 0x14, 0xf2, 0x33, 0x00,
-  0x47, 0x61, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x16, 0x9c, 0x14, 0xfa,
-  0x51, 0x08, 0x84, 0x0b, 0x86, 0xa9, 0x5a, 0x14, 0x42, 0x52, 0x80, 0x0b,
-  0x8c, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x85, 0x27, 0x05, 0x91,
-  0x14, 0xdc, 0xa2, 0x1c, 0x85, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x58,
-  0x7a, 0x52, 0x10, 0x49, 0x21, 0x10, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0xee,
-  0x30, 0xea, 0xfc, 0x50, 0x18, 0xe6, 0x5e, 0x3f, 0x18, 0xe6, 0x88, 0x61,
-  0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x2a, 0xb1, 0x14,
-  0x5c, 0x52, 0x50, 0x47, 0x81, 0x27, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d,
-  0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x27, 0x2d, 0x85, 0x9a, 0x14, 0x12,
-  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x47, 0x2d, 0x05, 0x9b,
-  0x14, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x67, 0x2d,
-  0x85, 0x9b, 0x14, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83,
-  0x2d, 0x2d, 0x05, 0x9b, 0x14, 0xe8, 0x51, 0x08, 0xc6, 0x52, 0x18, 0x49,
-  0x21, 0x2c, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0x13, 0xc6,
-  0x38, 0x61, 0x8c, 0x0a, 0x56, 0x52, 0xb8, 0x1a, 0x82, 0xbd, 0xc0, 0xb0,
-  0x59, 0x02, 0x7c, 0x19, 0x6e, 0xa0, 0xd1, 0xa0, 0x2d, 0x05, 0x30, 0x98,
-  0x65, 0x38, 0x17, 0x7c, 0x09, 0x6c, 0x1d, 0x85, 0x76, 0x14, 0xe2, 0x33,
-  0x1c, 0xa1, 0xa3, 0x81, 0x3b, 0x0a, 0xc4, 0x37, 0xcb, 0x80, 0x2e, 0xeb,
-  0x12, 0xd8, 0x3b, 0x0a, 0x3b, 0x1a, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17,
-  0x0c, 0x73, 0x81, 0x51, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11,
-  0x77, 0x29, 0xe8, 0x70, 0x43, 0x50, 0x97, 0x02, 0x18, 0xcc, 0x32, 0xa4,
-  0x8b, 0xba, 0x04, 0x36, 0xdc, 0xa3, 0x00, 0x9f, 0x59, 0x82, 0x77, 0x31,
-  0x7b, 0x14, 0x88, 0xf8, 0xcc, 0x12, 0xbc, 0xcb, 0x70, 0x44, 0x99, 0x06,
-  0xf7, 0x28, 0x08, 0xdf, 0x2c, 0x03, 0xbb, 0xbc, 0x4b, 0x60, 0x66, 0x1a,
-  0xe0, 0xa3, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46,
-  0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x88, 0xa6, 0xa0, 0xc3,
-  0x0d, 0x01, 0x68, 0x0a, 0x60, 0x30, 0xcb, 0xd0, 0x2e, 0xee, 0x12, 0x18,
-  0x48, 0x0a, 0x43, 0x7c, 0x66, 0x09, 0xde, 0xc5, 0x88, 0x91, 0x14, 0xe0,
-  0x33, 0x4b, 0xf0, 0x2e, 0x03, 0x2d, 0x86, 0x96, 0x2e, 0x98, 0xba, 0x10,
-  0xed, 0x22, 0xb8, 0x0b, 0x38, 0x0b, 0xeb, 0x72, 0xc1, 0x30, 0x26, 0x92,
-  0x82, 0x49, 0x0a, 0xf1, 0x19, 0x8e, 0x18, 0x8f, 0x93, 0x14, 0x88, 0x6f,
-  0x96, 0x01, 0x5e, 0xe6, 0x25, 0x30, 0x94, 0x14, 0xc8, 0x23, 0x3e, 0x16,
-  0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0xc0, 0x90, 0x8f, 0x15,
-  0x41, 0x7c, 0x8a, 0x80, 0x4d, 0x41, 0x87, 0x1b, 0x02, 0xd7, 0x14, 0xc0,
-  0x60, 0x96, 0x21, 0x5e, 0xe4, 0x25, 0xb0, 0x01, 0x26, 0x05, 0xf8, 0xcc,
-  0x12, 0xdc, 0x8b, 0xb5, 0xa4, 0x40, 0xc4, 0x67, 0x96, 0xe0, 0x5e, 0x86,
-  0x23, 0xdc, 0xc3, 0x25, 0x05, 0xe1, 0x9b, 0x65, 0xa0, 0x97, 0x7b, 0x09,
-  0xec, 0x3d, 0x5e, 0x52, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6,
-  0x02, 0xa3, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x76, 0x53,
-  0xd0, 0xe1, 0x86, 0x20, 0x37, 0x05, 0x30, 0x98, 0x65, 0xa8, 0x17, 0x7b,
-  0x09, 0xec, 0x26, 0x85, 0x21, 0x3e, 0xb3, 0x04, 0xf7, 0x62, 0x04, 0x4f,
-  0x0a, 0xf0, 0x99, 0x25, 0xb8, 0x97, 0x81, 0x16, 0x43, 0x8b, 0x17, 0x4c,
-  0x5e, 0x88, 0x7a, 0x11, 0xec, 0x45, 0x7e, 0xe6, 0xe5, 0x82, 0x61, 0x2e,
-  0x30, 0xea, 0x36, 0xa3, 0x0e, 0x25, 0x85, 0x61, 0x2e, 0x07, 0x85, 0x61,
-  0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xa0,
-  0x62, 0x4f, 0x01, 0x37, 0x05, 0xba, 0x14, 0xcc, 0x53, 0x18, 0x4d, 0x08,
-  0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28,
-  0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0xe6, 0x53, 0xf8,
-  0x4d, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0xe8,
-  0x53, 0x00, 0x4f, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0x78, 0xea, 0x53, 0x08, 0x4f, 0x21, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00,
-  0x41, 0x30, 0xd8, 0xe6, 0x53, 0x00, 0x4f, 0xc1, 0x2f, 0x85, 0xa0, 0x3d,
-  0x85, 0xd6, 0x14, 0xd6, 0x53, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21,
-  0x38, 0x61, 0x8c, 0x13, 0xc6, 0xa8, 0xa0, 0x36, 0x85, 0xab, 0x21, 0xd8,
-  0x0b, 0x0c, 0x9b, 0x25, 0xc0, 0x97, 0x81, 0x16, 0x43, 0x37, 0xc6, 0x45,
-  0x66, 0x09, 0x71, 0xb1, 0x89, 0x72, 0x11, 0xee, 0x45, 0x66, 0x09, 0x73,
-  0x99, 0x65, 0xc8, 0x97, 0x7d, 0xf1, 0xd5, 0x60, 0x38, 0x62, 0x15, 0x03,
-  0xd5, 0x14, 0x86, 0xef, 0x58, 0x31, 0x18, 0x66, 0xb8, 0x21, 0x00, 0x4d,
-  0x81, 0x0c, 0x6a, 0x08, 0x74, 0x38, 0xe2, 0x46, 0x5c, 0x53, 0x18, 0xbe,
-  0x0a, 0x04, 0xbd, 0x1c, 0x19, 0x66, 0xb8, 0x21, 0x18, 0x4d, 0x81, 0x0c,
-  0x2a, 0x18, 0x74, 0x96, 0x41, 0x5f, 0x5e, 0x26, 0xb8, 0xb3, 0x14, 0x86,
-  0x39, 0x3c, 0x14, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x2a,
-  0x45, 0x85, 0xfa, 0x14, 0x62, 0x53, 0x18, 0x51, 0x61, 0x34, 0x21, 0x00,
-  0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88,
-  0x43, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x81, 0x51, 0x81, 0x3f,
-  0x85, 0x83, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x89, 0x51,
-  0xa1, 0x3f, 0x05, 0x86, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0,
-  0x91, 0x51, 0xc1, 0x3f, 0x05, 0x89, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04,
-  0xc1, 0x60, 0x83, 0x51, 0xa1, 0x3f, 0x85, 0xdd, 0x14, 0x02, 0x15, 0x15,
-  0xd4, 0x53, 0x40, 0x51, 0x61, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0xe0,
-  0x84, 0x31, 0x4e, 0x18, 0xa3, 0x02, 0xf9, 0x14, 0xae, 0x86, 0x60, 0x2f,
-  0x30, 0x6c, 0x96, 0xe0, 0x65, 0x86, 0x1b, 0xf6, 0x35, 0xa8, 0x51, 0x01,
-  0x0c, 0x66, 0x19, 0xf8, 0xa5, 0x5f, 0x82, 0xda, 0x4d, 0xa1, 0x44, 0x05,
-  0xb8, 0xc0, 0xa8, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0xc0, 0x54,
-  0x30, 0x51, 0x41, 0x1d, 0x83, 0xf4, 0x14, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x60, 0x09, 0x53, 0xc1, 0x44, 0x85, 0x40, 0xb8, 0x60, 0x98, 0xf2,
-  0x4d, 0x41, 0x45, 0x05, 0xb8, 0xc0, 0xa8, 0x11, 0x83, 0x03, 0x00, 0x41,
-  0x30, 0x58, 0xca, 0x54, 0x58, 0x51, 0xe1, 0x4e, 0xdc, 0x53, 0x18, 0x31,
-  0x38, 0x00, 0x10, 0x04, 0x83, 0xc5, 0x4c, 0x85, 0x15, 0x15, 0x02, 0xe1,
-  0x82, 0x61, 0x2e, 0x30, 0xea, 0x0e, 0xa3, 0xee, 0x34, 0x85, 0x61, 0x0e,
-  0x17, 0x85, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00,
-  0x04, 0xc1, 0xa0, 0x5a, 0x53, 0xe1, 0x46, 0x85, 0xf9, 0x14, 0xca, 0x54,
-  0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1,
-  0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x78,
-  0xe4, 0x54, 0xf0, 0x51, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0x78, 0xe6, 0x54, 0xf8, 0x51, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04,
-  0x00, 0x41, 0x30, 0x78, 0xe8, 0x54, 0x00, 0x53, 0x21, 0x21, 0x82, 0x11,
-  0x03, 0x05, 0x00, 0x41, 0x30, 0xd8, 0xe4, 0x54, 0xf8, 0x51, 0xa1, 0x3f,
-  0x85, 0x80, 0x4d, 0x05, 0x16, 0x15, 0xd4, 0x54, 0x18, 0x4d, 0x08, 0x80,
-  0xd1, 0x04, 0x21, 0x38, 0x61, 0x8c, 0x13, 0xc6, 0xa8, 0x80, 0x46, 0x85,
-  0xab, 0x21, 0xd8, 0x0b, 0x0c, 0x9b, 0x25, 0x78, 0x99, 0xe1, 0x86, 0x9e,
-  0x0d, 0xec, 0x54, 0x00, 0x83, 0x59, 0x06, 0x7f, 0x79, 0x99, 0xc0, 0xe8,
-  0x53, 0xb0, 0x4f, 0x21, 0x3e, 0xc3, 0x11, 0x2b, 0x19, 0xdc, 0xa7, 0x40,
-  0x7c, 0xb3, 0x0c, 0xff, 0x22, 0x32, 0x81, 0xe1, 0xa7, 0xc0, 0x92, 0x41,
-  0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65, 0x81, 0x21,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x01, 0xaa, 0x82, 0x0e, 0x37, 0x04, 0x7e,
-  0x2a, 0x80, 0xc1, 0x2c, 0x03, 0xc8, 0x84, 0x4c, 0x60, 0x03, 0x88, 0x0a,
-  0xf0, 0x99, 0x25, 0x30, 0x19, 0xfb, 0x4f, 0x81, 0x88, 0xcf, 0x2c, 0x81,
-  0xc9, 0x0c, 0x47, 0xd8, 0x64, 0x00, 0xa2, 0x82, 0xf0, 0xcd, 0x32, 0x8c,
-  0x8c, 0xc9, 0x04, 0x76, 0x93, 0x41, 0x88, 0x0a, 0xf1, 0xb1, 0xc0, 0xa1,
-  0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2,
-  0x53, 0xc4, 0xaa, 0x0a, 0x3a, 0xdc, 0x10, 0xa4, 0xaa, 0x00, 0x06, 0xb3,
-  0x0c, 0x24, 0x53, 0x32, 0x81, 0xa5, 0xa8, 0x30, 0xc4, 0x67, 0x96, 0xc0,
-  0x64, 0x8c, 0x60, 0x51, 0x01, 0x3e, 0xb3, 0x04, 0x26, 0x33, 0xd0, 0x62,
-  0x68, 0x20, 0x83, 0x85, 0x0c, 0x41, 0x32, 0x42, 0xc9, 0x88, 0xaa, 0x20,
-  0x32, 0x17, 0x0c, 0x63, 0x2b, 0x2a, 0xbc, 0xa8, 0x10, 0x9f, 0xe1, 0x08,
-  0x76, 0x81, 0x51, 0x81, 0xf8, 0x66, 0x19, 0x4e, 0x46, 0x65, 0x02, 0x8b,
-  0x51, 0xa1, 0x5d, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0,
-  0x28, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x5c, 0x15, 0x74,
-  0xb8, 0x21, 0xb8, 0x55, 0x01, 0x0c, 0x66, 0x19, 0x50, 0x26, 0x65, 0x02,
-  0x1b, 0x72, 0x54, 0x80, 0xcf, 0x2c, 0x81, 0xcb, 0x98, 0x8d, 0x0a, 0x44,
-  0x7c, 0x66, 0x09, 0x5c, 0x66, 0x38, 0xe2, 0x5e, 0x6e, 0x54, 0x10, 0xbe,
-  0x59, 0x86, 0x95, 0x71, 0x99, 0xc0, 0xf0, 0x05, 0x47, 0x85, 0xf8, 0x58,
-  0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x82, 0x48, 0x3e, 0x56,
-  0x04, 0xf1, 0x29, 0x82, 0x5c, 0x05, 0x1d, 0x6e, 0x08, 0xc4, 0x55, 0x00,
-  0x83, 0x59, 0x06, 0x96, 0x69, 0x99, 0xc0, 0xc0, 0x54, 0x18, 0xe2, 0x33,
-  0x4b, 0xe0, 0x32, 0x46, 0x94, 0xa9, 0x00, 0x9f, 0x59, 0x02, 0x97, 0x19,
-  0x68, 0x31, 0x34, 0x94, 0xc1, 0x52, 0x86, 0x60, 0x19, 0xa1, 0x65, 0x48,
-  0x4c, 0x65, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x6e, 0x33, 0xea, 0x62, 0x54,
-  0x18, 0xe6, 0xc4, 0x52, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4,
-  0xe0, 0x00, 0x40, 0x10, 0x0c, 0xaa, 0x7a, 0x15, 0xc2, 0x55, 0xe8, 0x53,
-  0xe1, 0x5d, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84,
-  0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0x87, 0x5f, 0x05, 0x74, 0x15, 0x12, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x83, 0xa7, 0x5f, 0x85, 0x74, 0x15, 0x12, 0x22, 0x18,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xc7, 0x5f, 0x05, 0x75, 0x15, 0x12,
-  0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x8d, 0x5f, 0x85, 0x74,
-  0x15, 0x4e, 0x55, 0x08, 0xec, 0x55, 0xb0, 0x55, 0x81, 0x5e, 0x85, 0xd1,
-  0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0x13, 0xc6, 0x38, 0x61, 0x8c, 0x0a,
-  0x7c, 0x55, 0xb8, 0x1a, 0x82, 0xbd, 0xc0, 0xb0, 0x59, 0x82, 0x97, 0x19,
-  0x68, 0x31, 0x74, 0x43, 0x5f, 0xc8, 0x9a, 0xc8, 0x17, 0x9b, 0xe0, 0x17,
-  0xc1, 0x65, 0xc8, 0x9a, 0xe8, 0x97, 0x59, 0x06, 0x98, 0x91, 0x99, 0xf3,
-  0x0d, 0x86, 0x23, 0x54, 0x31, 0x98, 0x55, 0x61, 0xf8, 0x6e, 0x15, 0x83,
-  0x61, 0x86, 0x1b, 0x82, 0x54, 0x15, 0xc8, 0xa0, 0x86, 0x40, 0x87, 0x23,
-  0xc0, 0xe6, 0x56, 0x85, 0xe1, 0xab, 0x40, 0xd0, 0x13, 0x9b, 0x61, 0x86,
-  0x1b, 0x02, 0x56, 0x15, 0xc8, 0xa0, 0x82, 0x41, 0x67, 0x19, 0x62, 0xc6,
-  0x6c, 0x82, 0x83, 0x53, 0x61, 0x98, 0x0b, 0x4d, 0x61, 0x98, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0xa8, 0x64, 0x56, 0xf0, 0x57, 0x41, 0x57, 0x05,
-  0x96, 0x15, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06,
-  0x61, 0x34, 0x81, 0x18, 0x8a, 0x38, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10,
-  0x0c, 0x9e, 0x9c, 0x15, 0x4a, 0x56, 0x38, 0x88, 0x60, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0x1e, 0x9d, 0x15, 0x4c, 0x56, 0x60, 0x88, 0x60, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0x9e, 0x9d, 0x15, 0x4e, 0x56, 0x90, 0x88,
-  0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0x9c, 0x15, 0x4c, 0x56,
-  0x20, 0x57, 0x21, 0x98, 0x59, 0x61, 0x5e, 0x85, 0x98, 0x15, 0x46, 0x13,
-  0x02, 0x60, 0x34, 0x41, 0x08, 0x4e, 0x18, 0xe3, 0x84, 0x31, 0x2a, 0xd8,
-  0x57, 0xe1, 0x6a, 0x08, 0xf6, 0x02, 0xc3, 0x66, 0x09, 0xcc, 0x66, 0xb8,
-  0x81, 0x84, 0x03, 0x9f, 0x15, 0xc0, 0x60, 0x96, 0x61, 0x66, 0x68, 0x26,
-  0x28, 0x72, 0x15, 0x5c, 0x56, 0x80, 0x0b, 0x8c, 0x1a, 0x31, 0x38, 0x00,
-  0x10, 0x04, 0x83, 0x25, 0x6d, 0x85, 0x97, 0x15, 0xd2, 0x31, 0x90, 0x57,
-  0x61, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x16, 0xb5, 0x15, 0x5e, 0x56,
-  0x08, 0x84, 0x0b, 0x86, 0xa9, 0x73, 0x15, 0x66, 0x56, 0x80, 0x0b, 0x8c,
-  0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0xc5, 0x6d, 0x05, 0x9a, 0x15,
-  0x40, 0xe7, 0x5e, 0x85, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0xde,
-  0x56, 0xa0, 0x59, 0x21, 0x10, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0xee, 0x30,
-  0xea, 0x60, 0x55, 0x18, 0xe6, 0xc2, 0x53, 0x18, 0xe6, 0x88, 0x61, 0x8e,
-  0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x2a, 0xba, 0x15, 0xc0,
-  0x56, 0xe0, 0x57, 0xc1, 0x6d, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10,
-  0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31,
-  0x40, 0x00, 0x10, 0x04, 0x83, 0x67, 0x6f, 0x85, 0xb3, 0x15, 0x12, 0x22,
-  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x87, 0x6f, 0x05, 0xb4, 0x15,
-  0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xa7, 0x6f, 0x85,
-  0xb4, 0x15, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x6d,
-  0x6f, 0x05, 0xb4, 0x15, 0x4c, 0x56, 0x08, 0xea, 0x56, 0xa8, 0x59, 0x61,
-  0x6e, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0x13, 0xc6, 0x38,
-  0x61, 0x8c, 0x0a, 0x7a, 0x56, 0xb8, 0x1a, 0x82, 0xbd, 0xc0, 0xb0, 0x59,
-  0x02, 0xb3, 0x19, 0x6e, 0x30, 0xe3, 0xe0, 0x6f, 0x05, 0x30, 0x98, 0x65,
-  0xa8, 0x19, 0xb3, 0x09, 0xac, 0x5f, 0x85, 0x7f, 0x15, 0xe2, 0x33, 0x1c,
-  0xa1, 0x92, 0x01, 0xc8, 0x0a, 0xc4, 0x37, 0xcb, 0x60, 0x33, 0x39, 0x13,
-  0x58, 0xc8, 0x0a, 0x2b, 0x19, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c,
-  0x73, 0x81, 0x51, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xa9,
-  0x2b, 0xe8, 0x70, 0x43, 0x70, 0xba, 0x02, 0x18, 0xcc, 0x32, 0xdc, 0x0c,
-  0xce, 0x04, 0x36, 0xa4, 0xac, 0x00, 0x9f, 0x59, 0x82, 0x9e, 0x31, 0x94,
-  0x15, 0x88, 0xf8, 0xcc, 0x12, 0xf4, 0xcc, 0x70, 0x44, 0x4d, 0x06, 0x29,
-  0x2b, 0x08, 0xdf, 0x2c, 0x83, 0xce, 0xf4, 0x4c, 0x60, 0x36, 0x19, 0xa8,
-  0xac, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0x59,
-  0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xd0, 0xae, 0xa0, 0xc3, 0x0d,
-  0x81, 0xec, 0x0a, 0x60, 0x30, 0xcb, 0xb0, 0x33, 0x3c, 0x13, 0x98, 0xcc,
-  0x0a, 0x43, 0x7c, 0x66, 0x09, 0x7a, 0xc6, 0x88, 0x9a, 0x15, 0xe0, 0x33,
-  0x4b, 0xd0, 0x33, 0x03, 0x2d, 0x86, 0x76, 0x33, 0x18, 0xce, 0x10, 0x3b,
-  0x23, 0xf0, 0x0c, 0xa8, 0x0a, 0x39, 0x73, 0xc1, 0x30, 0x46, 0xb3, 0x02,
-  0xce, 0x0a, 0xf1, 0x19, 0x8e, 0xa8, 0x9f, 0x9c, 0x15, 0x88, 0x6f, 0x96,
-  0xc1, 0x67, 0xc2, 0x26, 0x30, 0x9d, 0x15, 0xec, 0x27, 0x3e, 0x16, 0x0c,
-  0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41,
-  0x7c, 0x8a, 0x10, 0x5f, 0x41, 0x87, 0x1b, 0x02, 0xf0, 0x15, 0xc0, 0x60,
-  0x96, 0xe1, 0x67, 0xc0, 0x26, 0xb0, 0x41, 0x6c, 0x05, 0xf8, 0xcc, 0x12,
-  0x94, 0x8d, 0xfd, 0xac, 0x40, 0xc4, 0x67, 0x96, 0xa0, 0x6c, 0x86, 0x23,
-  0x40, 0x08, 0x6c, 0x05, 0xe1, 0x9b, 0x65, 0x10, 0x9b, 0xb2, 0x09, 0x2c,
-  0x84, 0xc2, 0x56, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02,
-  0xa3, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xda, 0x57, 0xd0,
-  0xe1, 0x86, 0x60, 0x7d, 0x05, 0x30, 0x98, 0x65, 0x18, 0x1b, 0xb2, 0x09,
-  0x2c, 0x6d, 0x85, 0x21, 0x3e, 0xb3, 0x04, 0x65, 0x63, 0x84, 0xdb, 0x0a,
-  0xf0, 0x99, 0x25, 0x28, 0x9b, 0x81, 0x16, 0x43, 0xfb, 0x19, 0x0c, 0x6c,
-  0x88, 0xb1, 0x11, 0xc8, 0xc6, 0x06, 0x83, 0xb0, 0xb9, 0x60, 0x98, 0x0b,
-  0x8c, 0xba, 0xcd, 0xa8, 0xd3, 0x59, 0x61, 0x98, 0x5b, 0x53, 0x61, 0x98,
-  0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa8,
-  0xfc, 0x57, 0x50, 0x5f, 0xc1, 0x74, 0x05, 0xfc, 0x15, 0x46, 0x13, 0x02,
-  0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a,
-  0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x9e, 0x12, 0x16, 0xe2,
-  0x57, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x1e, 0x13,
-  0x16, 0xe4, 0x57, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
-  0x9e, 0x13, 0x16, 0xe6, 0x57, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40,
-  0x10, 0x0c, 0xb6, 0x12, 0x16, 0xe4, 0x57, 0x80, 0x5d, 0x21, 0xf8, 0x5f,
-  0xe1, 0x77, 0x85, 0xfe, 0x15, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
-  0x4e, 0x18, 0xe3, 0x84, 0x31, 0x2a, 0x38, 0x5f, 0xe1, 0x6a, 0x08, 0xf6,
-  0x02, 0xc3, 0x66, 0x09, 0xcc, 0x66, 0xa0, 0xc5, 0xd0, 0x8d, 0x98, 0xb1,
-  0x7b, 0x02, 0x66, 0x6c, 0x62, 0x66, 0x84, 0xb2, 0xb1, 0x7b, 0x82, 0x66,
-  0x6c, 0x9e, 0x03, 0xf0, 0x15, 0xe0, 0x33, 0xcb, 0x70, 0x36, 0x69, 0x13,
-  0xcf, 0xc1, 0x70, 0x44, 0x3d, 0x07, 0xbd, 0x2b, 0x0c, 0xdf, 0xd9, 0x73,
-  0x30, 0xcc, 0x70, 0x43, 0x30, 0xbb, 0x02, 0x19, 0xd4, 0x10, 0xe8, 0x70,
-  0x44, 0x11, 0xbe, 0xc2, 0xf0, 0x55, 0x20, 0xe8, 0x1d, 0xc3, 0x0c, 0x37,
-  0x04, 0xb6, 0x2b, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xa0, 0x4d, 0xdf,
-  0x04, 0xa7, 0xb7, 0xc2, 0x30, 0xb7, 0xaa, 0xc2, 0x30, 0x23, 0x06, 0x07,
-  0x00, 0x82, 0x60, 0x50, 0xf1, 0xb0, 0x80, 0xc2, 0x02, 0xf9, 0x0a, 0x36,
-  0x2c, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2,
-  0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x3c, 0x63, 0x2c, 0xbc, 0xb0, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x3c, 0x64, 0x2c, 0xc0, 0xb0, 0xc0, 0x10, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x3c, 0x65, 0x2c, 0xc4, 0xb0, 0x20, 0x11, 0xc1,
-  0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x63, 0x2c, 0xc0, 0xb0, 0xe0,
-  0xbe, 0x42, 0xd0, 0xc3, 0x42, 0xff, 0x0a, 0x3b, 0x2c, 0x8c, 0x26, 0x04,
-  0xc0, 0x68, 0x82, 0x10, 0x9c, 0x30, 0xc6, 0x09, 0x63, 0x54, 0x50, 0xc2,
-  0xc2, 0xd5, 0x10, 0xec, 0x05, 0x86, 0xcd, 0x12, 0xf4, 0xcd, 0x70, 0x83,
-  0x4b, 0x07, 0x68, 0x2c, 0x80, 0xc1, 0x2c, 0x83, 0xda, 0xac, 0x4d, 0x50,
-  0xee, 0x2b, 0xe0, 0xb0, 0x00, 0x17, 0x18, 0x35, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0xcb, 0x1c, 0x0b, 0x39, 0x2c, 0xd0, 0x74, 0xc0, 0xbf, 0xc2,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c, 0x74, 0x2c, 0xe4, 0xb0, 0x10,
-  0x08, 0x17, 0x0c, 0x53, 0xf1, 0x2b, 0xf4, 0xb0, 0x00, 0x17, 0x18, 0x35,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x0b, 0x1e, 0x0b, 0x3e, 0x2c, 0x94,
-  0x41, 0x08, 0x0b, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xe4, 0xb1,
-  0xe0, 0xc3, 0x42, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0xdd, 0x61, 0xd4,
-  0xe9, 0xae, 0x30, 0xcc, 0xad, 0xab, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31,
-  0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x54, 0x7e, 0x2c, 0xa8, 0xb1,
-  0x60, 0xc2, 0x02, 0x1e, 0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04,
-  0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0x4f, 0x29, 0x0b, 0x71, 0x2c, 0x24, 0x44, 0x30,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x8f, 0x29, 0x0b, 0x72, 0x2c, 0x24,
-  0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xcf, 0x29, 0x0b, 0x73,
-  0x2c, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x5b, 0x29,
-  0x0b, 0x72, 0x2c, 0xc0, 0xb0, 0x10, 0xfc, 0xb1, 0xf0, 0xc3, 0x42, 0x1f,
-  0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0x27, 0x8c, 0x71, 0xc2,
-  0x18, 0x15, 0x9c, 0xb1, 0x70, 0x35, 0x04, 0x7b, 0x81, 0x61, 0xb3, 0x04,
-  0x7d, 0x33, 0xdc, 0x00, 0xd7, 0x41, 0x2a, 0x0b, 0x60, 0x30, 0xcb, 0xc0,
-  0x36, 0x7d, 0x13, 0xd8, 0x09, 0x0b, 0x29, 0x2c, 0xc4, 0x67, 0x38, 0xa2,
-  0xae, 0x03, 0x15, 0x16, 0x88, 0x6f, 0x96, 0xa1, 0x6d, 0xe0, 0x26, 0xb0,
-  0x15, 0x16, 0xec, 0x3a, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6,
-  0x02, 0xa3, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x66, 0x59,
-  0xd0, 0xe1, 0x86, 0x20, 0x96, 0x05, 0x30, 0x98, 0x65, 0x70, 0x9b, 0xb7,
-  0x09, 0x6c, 0x98, 0x61, 0x01, 0x3e, 0xb3, 0x04, 0x74, 0x63, 0x32, 0x2c,
-  0x10, 0xf1, 0x99, 0x25, 0xa0, 0x9b, 0xe1, 0x08, 0xd0, 0x0e, 0x66, 0x58,
-  0x10, 0xbe, 0x59, 0x86, 0xb8, 0xa1, 0x9b, 0xc0, 0x42, 0x3b, 0xa0, 0x61,
-  0x21, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0x20,
-  0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xf0, 0x65, 0x41, 0x87, 0x1b, 0x02,
-  0x5e, 0x16, 0xc0, 0x60, 0x96, 0x41, 0x6e, 0xe6, 0x26, 0x30, 0x1e, 0x16,
-  0x86, 0xf8, 0xcc, 0x12, 0xd0, 0x8d, 0x11, 0x3f, 0x2c, 0xc0, 0x67, 0x96,
-  0x80, 0x6e, 0x06, 0x5a, 0x0c, 0xcd, 0x6d, 0xb0, 0xb7, 0x21, 0xe4, 0x46,
-  0x98, 0x1b, 0x9b, 0x1d, 0xe0, 0xe6, 0x82, 0x61, 0xcc, 0x87, 0x05, 0x31,
-  0x16, 0xe2, 0x33, 0x1c, 0xa1, 0x0b, 0x63, 0x2c, 0x10, 0xdf, 0x2c, 0x43,
-  0xdd, 0xe0, 0x4d, 0x60, 0x64, 0x2c, 0xec, 0x42, 0x7c, 0x2c, 0x18, 0xe8,
-  0x73, 0xc1, 0x30, 0x17, 0x18, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8,
-  0x14, 0xc1, 0xce, 0x82, 0x0e, 0x37, 0x04, 0xea, 0x2c, 0x80, 0xc1, 0x2c,
-  0x83, 0xdd, 0xdc, 0x4d, 0x60, 0x03, 0x1b, 0x0b, 0xf0, 0x99, 0x25, 0xe0,
-  0x1b, 0x4b, 0x63, 0x81, 0x88, 0xcf, 0x2c, 0x01, 0xdf, 0x0c, 0x47, 0x94,
-  0x83, 0x1a, 0x0b, 0xc2, 0x37, 0xcb, 0x90, 0x37, 0x7c, 0x13, 0x98, 0x39,
-  0xac, 0xb1, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46,
-  0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xdc, 0xb3, 0xa0, 0xc3,
-  0x0d, 0x41, 0x3d, 0x0b, 0x60, 0x30, 0xcb, 0xa0, 0x37, 0x7b, 0x13, 0xd8,
-  0x1c, 0x0b, 0x43, 0x7c, 0x66, 0x09, 0xf8, 0xc6, 0x08, 0x3c, 0x16, 0xe0,
-  0x33, 0x4b, 0xc0, 0x37, 0x03, 0x2d, 0x86, 0x66, 0x37, 0xd8, 0xdd, 0x10,
-  0x7a, 0x23, 0xec, 0x0d, 0x78, 0xe0, 0xcd, 0x05, 0xc3, 0x5c, 0x60, 0xd4,
-  0x6d, 0x46, 0x1d, 0x19, 0x0b, 0xc3, 0x5c, 0xdd, 0x0a, 0xc3, 0x1c, 0x31,
-  0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0x85, 0xd2,
-  0x02, 0x3d, 0x0b, 0xb0, 0x2c, 0x88, 0xb4, 0x30, 0x9a, 0x10, 0x00, 0xa3,
-  0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0xbc, 0xb4, 0xb0, 0xcf, 0x42,
-  0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0xc0, 0xb4, 0xc0,
-  0xcf, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0xc4,
-  0xb4, 0xd0, 0xcf, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60,
-  0xb0, 0xbd, 0xb4, 0xc0, 0xcf, 0x82, 0x2e, 0x0b, 0x41, 0x4a, 0x0b, 0xe9,
-  0x2c, 0x9c, 0xb4, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x70, 0xc2,
-  0x18, 0x27, 0x8c, 0x51, 0x41, 0x3c, 0x0b, 0x57, 0x43, 0xb0, 0x17, 0x18,
-  0x36, 0x4b, 0xd0, 0x37, 0x03, 0x2d, 0x86, 0x6e, 0xa0, 0x8d, 0x6a, 0x16,
-  0x67, 0x63, 0x13, 0x6a, 0x23, 0xf0, 0x8d, 0x6a, 0x16, 0x6b, 0x33, 0xcb,
-  0xe0, 0x37, 0xa0, 0xa3, 0xdf, 0xc1, 0x70, 0xc4, 0x7f, 0x07, 0xe6, 0x2c,
-  0x0c, 0xdf, 0x81, 0x78, 0x30, 0xcc, 0x70, 0x43, 0xc0, 0xcb, 0x02, 0x19,
-  0xd4, 0x10, 0xe8, 0x70, 0x84, 0x4b, 0xa8, 0xb3, 0x30, 0x7c, 0x15, 0x08,
-  0x7a, 0x30, 0x31, 0xcc, 0x70, 0x43, 0xf0, 0xcb, 0x02, 0x19, 0x54, 0x30,
-  0xe8, 0x2c, 0xc3, 0xdf, 0xd0, 0x4e, 0x70, 0xa3, 0x2c, 0x0c, 0x73, 0xb4,
-  0x2b, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x55, 0x59, 0x0b,
-  0x31, 0x2d, 0xb4, 0xb3, 0xf0, 0xd3, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xc3, 0xd6, 0x02, 0x4e, 0x0b, 0x07,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xd3, 0xd6, 0x42, 0x4e,
-  0x0b, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xe3, 0xd6,
-  0x82, 0x4e, 0x0b, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1,
-  0xc6, 0xd6, 0x42, 0x4e, 0x0b, 0xf7, 0x2c, 0x04, 0x66, 0x2d, 0x98, 0xb4,
-  0x40, 0xd6, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x09, 0x63,
-  0x9c, 0x30, 0x46, 0x05, 0x2e, 0x2d, 0x5c, 0x0d, 0xc1, 0x5e, 0x60, 0xd8,
-  0x2c, 0x01, 0xed, 0x0c, 0x37, 0xdc, 0x78, 0x10, 0xd7, 0x02, 0x18, 0xcc,
-  0x32, 0x84, 0x8e, 0xe8, 0x04, 0x75, 0xcf, 0x42, 0x58, 0x0b, 0x70, 0x81,
-  0x51, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xf0, 0xb5, 0x20, 0xd6,
-  0x82, 0x8f, 0x07, 0x25, 0x2d, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1,
-  0xd2, 0xd7, 0x82, 0x58, 0x0b, 0x81, 0x70, 0xc1, 0x30, 0xa5, 0xcf, 0x82,
-  0x59, 0x0b, 0x70, 0x81, 0x51, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0,
-  0x84, 0xb6, 0x70, 0xd6, 0x82, 0x5b, 0xa8, 0xb4, 0x30, 0x62, 0x70, 0x00,
-  0x20, 0x08, 0x06, 0x8b, 0x68, 0x0b, 0x67, 0x2d, 0x04, 0xc2, 0x05, 0xc3,
-  0x5c, 0x60, 0xd4, 0x1d, 0x46, 0xdd, 0x38, 0x0b, 0xc3, 0x1c, 0xfd, 0x0a,
-  0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82,
-  0x41, 0x75, 0xda, 0xc2, 0x5c, 0x0b, 0x2f, 0x2d, 0x84, 0xb6, 0x30, 0x9a,
-  0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4,
-  0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0xb8, 0xb6,
-  0xa0, 0xd7, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0,
-  0xbc, 0xb6, 0xb0, 0xd7, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0xf0, 0xc0, 0xb6, 0xc0, 0xd7, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a,
-  0x00, 0x82, 0x60, 0xb0, 0xb9, 0xb6, 0xb0, 0xd7, 0x42, 0x4e, 0x0b, 0x01,
-  0x6a, 0x0b, 0x68, 0x2d, 0x98, 0xb6, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09,
-  0x42, 0x70, 0xc2, 0x18, 0x27, 0x8c, 0x51, 0x01, 0x5c, 0x0b, 0x57, 0x43,
-  0xb0, 0x17, 0x18, 0x36, 0x4b, 0x40, 0x3b, 0xc3, 0x0d, 0x79, 0x1e, 0xc8,
-  0xb6, 0x00, 0x06, 0xb3, 0x0c, 0xa3, 0x43, 0x3b, 0x81, 0xc1, 0xb4, 0x20,
-  0xd3, 0x42, 0x7c, 0x86, 0x23, 0xfe, 0x3c, 0x98, 0x69, 0x81, 0xf8, 0x66,
-  0x19, 0x48, 0xe7, 0x74, 0x02, 0xa3, 0x69, 0x01, 0xd4, 0x83, 0xf8, 0x58,
-  0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x02, 0x43, 0x3e, 0x56,
-  0x04, 0xf1, 0x29, 0x82, 0xb7, 0x05, 0x1d, 0x6e, 0x08, 0x74, 0x5b, 0x00,
-  0x83, 0x59, 0x86, 0xd2, 0x31, 0x9d, 0xc0, 0x06, 0x9e, 0x16, 0xe0, 0x33,
-  0x4b, 0xb0, 0x3a, 0xb6, 0xd3, 0x02, 0x11, 0x9f, 0x59, 0x82, 0xd5, 0x19,
-  0x8e, 0x50, 0xf5, 0x80, 0xa7, 0x05, 0xe1, 0x9b, 0x65, 0x40, 0x9d, 0xd5,
-  0x09, 0x6c, 0xd5, 0x83, 0x9e, 0x16, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b,
-  0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88,
-  0xf3, 0x16, 0x74, 0xb8, 0x21, 0x28, 0x6f, 0x01, 0x0c, 0x66, 0x19, 0x52,
-  0x47, 0x75, 0x02, 0x2b, 0x6b, 0x61, 0x88, 0xcf, 0x2c, 0xc1, 0xea, 0x18,
-  0x81, 0xd6, 0x02, 0x7c, 0x66, 0x09, 0x56, 0x67, 0xa0, 0xc5, 0xd0, 0x4a,
-  0x07, 0x33, 0x1d, 0x22, 0x75, 0x04, 0xd5, 0x51, 0xeb, 0xe1, 0x74, 0x2e,
-  0x18, 0xc6, 0xce, 0x5a, 0x58, 0x6b, 0x21, 0x3e, 0xc3, 0x11, 0xe3, 0xc1,
-  0xd6, 0x02, 0xf1, 0xcd, 0x32, 0xb0, 0xce, 0xeb, 0x04, 0xd6, 0xd6, 0x02,
-  0x79, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x16,
-  0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xf5, 0x2d, 0xe8, 0x70, 0x43,
-  0x30, 0xdf, 0x02, 0x18, 0xcc, 0x32, 0xb4, 0x8e, 0xeb, 0x04, 0x36, 0xd4,
-  0xb5, 0x00, 0x9f, 0x59, 0x82, 0xd9, 0x31, 0xb9, 0x16, 0x88, 0xf8, 0xcc,
-  0x12, 0xcc, 0xce, 0x70, 0x84, 0x7b, 0xcc, 0xb5, 0x20, 0x7c, 0xb3, 0x0c,
-  0xb0, 0x33, 0x3b, 0x81, 0xbd, 0x07, 0x5d, 0x0b, 0xf1, 0xb1, 0xc0, 0xa1,
-  0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2,
-  0x53, 0x04, 0x88, 0x0b, 0x3a, 0xdc, 0x10, 0xf8, 0xb7, 0x00, 0x06, 0xb3,
-  0x0c, 0xb1, 0x23, 0x3b, 0x81, 0xf1, 0xb5, 0x30, 0xc4, 0x67, 0x96, 0x60,
-  0x76, 0x8c, 0x08, 0x6d, 0x01, 0x3e, 0xb3, 0x04, 0xb3, 0x33, 0xd0, 0x62,
-  0x68, 0xad, 0x83, 0xb9, 0x0e, 0x11, 0x3b, 0x82, 0xec, 0xc8, 0xcf, 0xeb,
-  0x5c, 0x30, 0xcc, 0x05, 0x46, 0xdd, 0x66, 0xd4, 0xb5, 0xb5, 0x30, 0xcc,
-  0xf9, 0xb1, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01,
-  0x80, 0x20, 0x18, 0x54, 0x31, 0x2e, 0xf4, 0xb7, 0x90, 0xdb, 0xc2, 0x8a,
-  0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30,
-  0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x0f, 0x8e, 0x0b, 0x24, 0x2e, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x4f, 0x8e, 0x0b, 0x25, 0x2e, 0x24, 0x44, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0x8f, 0x8e, 0x0b, 0x26, 0x2e, 0x24, 0x44, 0x30,
-  0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x1b, 0x8e, 0x0b, 0x25, 0x2e, 0x8c,
-  0xb7, 0x10, 0xc8, 0xb8, 0x20, 0xdf, 0x02, 0x8c, 0x0b, 0xa3, 0x09, 0x01,
-  0x30, 0x9a, 0x20, 0x04, 0x27, 0x8c, 0x71, 0xc2, 0x18, 0x15, 0xe8, 0xb7,
-  0x70, 0x35, 0x04, 0x7b, 0x81, 0x61, 0xb3, 0x04, 0xb4, 0x33, 0xd0, 0x62,
-  0xe8, 0xc6, 0xdf, 0xf0, 0x6e, 0xe1, 0x37, 0x36, 0x11, 0x3a, 0xc2, 0xec,
-  0xf0, 0x6e, 0x21, 0x3a, 0xb3, 0x0c, 0xb5, 0x73, 0x3b, 0x23, 0x1f, 0x0c,
-  0x47, 0xc0, 0x73, 0xf0, 0xde, 0xc2, 0xf0, 0x5d, 0x3c, 0x07, 0xc3, 0x0c,
-  0x37, 0x04, 0xe5, 0x2d, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0xdc, 0xc8,
-  0x7c, 0x0b, 0xc3, 0x57, 0x81, 0xa0, 0x97, 0x23, 0xc3, 0x0c, 0x37, 0x04,
-  0xe8, 0x2d, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xd8, 0xce, 0xfa, 0x04,
-  0xc7, 0xda, 0xc2, 0x30, 0xd7, 0xcb, 0xc2, 0x30, 0x23, 0x06, 0x07, 0x00,
-  0x82, 0x60, 0x50, 0xb9, 0xb9, 0xa0, 0xe3, 0x82, 0x7d, 0x0b, 0x68, 0x2e,
-  0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68,
-  0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x3c,
-  0x75, 0x2e, 0x84, 0xb9, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x3c, 0x76, 0x2e, 0x88, 0xb9, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x3c, 0x77, 0x2e, 0x8c, 0xb9, 0x20, 0x11, 0xc1, 0x88,
-  0x81, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x75, 0x2e, 0x88, 0xb9, 0x00, 0xe2,
-  0x42, 0xf0, 0xe6, 0xc2, 0x8b, 0x0b, 0x6d, 0x2e, 0x8c, 0x26, 0x04, 0xc0,
-  0x68, 0x82, 0x10, 0x9c, 0x30, 0xc6, 0x09, 0x63, 0x54, 0x70, 0xe3, 0xc2,
-  0xd5, 0x10, 0xec, 0x05, 0x86, 0xcd, 0x12, 0xac, 0xcf, 0x70, 0x03, 0xd8,
-  0x07, 0x7a, 0x2e, 0x80, 0xc1, 0x2c, 0x03, 0xee, 0xe4, 0x4e, 0x50, 0x20,
-  0x2e, 0xa8, 0xb9, 0x00, 0x17, 0x18, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x4b, 0xa9, 0x0b, 0x6b, 0x2e, 0xbc, 0x74, 0xe0, 0xe2, 0xc2, 0x88,
-  0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c, 0xa6, 0x2e, 0xac, 0xb9, 0x10, 0x08,
-  0x17, 0x0c, 0x53, 0x23, 0x2e, 0xbc, 0xb9, 0x00, 0x17, 0x18, 0x35, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x8b, 0xaa, 0x0b, 0x70, 0x2e, 0xdc, 0xc9,
-  0x8c, 0x0b, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xac, 0xba, 0x00,
-  0xe7, 0x42, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0xdd, 0x61, 0xd4, 0xb1,
-  0xb7, 0x30, 0xcc, 0xf5, 0xb3, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x54, 0xb0, 0x2e, 0xf0, 0xb9, 0x80,
-  0xe3, 0x82, 0xaa, 0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3,
-  0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0xcf, 0xad, 0x0b, 0xa3, 0x2e, 0x24, 0x44, 0x30, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x0f, 0xae, 0x0b, 0xa4, 0x2e, 0x24, 0x44,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x4f, 0xae, 0x0b, 0xa5, 0x2e,
-  0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0xdb, 0xad, 0x0b,
-  0xa4, 0x2e, 0x88, 0xb9, 0x10, 0xc4, 0xba, 0x10, 0xe7, 0xc2, 0xab, 0x0b,
-  0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0x27, 0x8c, 0x71, 0xc2, 0x18,
-  0x15, 0xe4, 0xb9, 0x70, 0x35, 0x04, 0x7b, 0x81, 0x61, 0xb3, 0x04, 0xeb,
-  0x33, 0xdc, 0x20, 0xfa, 0xc1, 0xae, 0x0b, 0x60, 0x30, 0xcb, 0xa0, 0x3b,
-  0xeb, 0x13, 0x58, 0x8e, 0x0b, 0x3b, 0x2e, 0xc4, 0x67, 0x38, 0x02, 0xae,
-  0x03, 0x1e, 0x17, 0x88, 0x6f, 0x96, 0x61, 0x77, 0x7c, 0x27, 0xb0, 0x1e,
-  0x17, 0xe2, 0x3a, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02,
-  0xa3, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xca, 0x5d, 0xd0,
-  0xe1, 0x86, 0x60, 0xdc, 0x05, 0x30, 0x98, 0x65, 0xe0, 0x9d, 0xde, 0x09,
-  0x6c, 0x28, 0x73, 0x01, 0x3e, 0xb3, 0x04, 0xe2, 0x63, 0x64, 0x2e, 0x10,
-  0xf1, 0x99, 0x25, 0x10, 0x9f, 0xe1, 0x88, 0xbd, 0x0e, 0xca, 0x5c, 0x10,
-  0xbe, 0x59, 0x86, 0xdf, 0x11, 0x9f, 0xc0, 0xf8, 0x3a, 0x30, 0x73, 0x21,
-  0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0x20, 0x92,
-  0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x80, 0x77, 0x41, 0x87, 0x1b, 0x02, 0x77,
-  0x17, 0xc0, 0x60, 0x96, 0x01, 0x7c, 0xc2, 0x27, 0x30, 0x37, 0x17, 0x86,
-  0xf8, 0xcc, 0x12, 0x88, 0x8f, 0x11, 0x71, 0x2e, 0xc0, 0x67, 0x96, 0x40,
-  0x7c, 0x06, 0x5a, 0x0c, 0x8d, 0x77, 0xb0, 0xde, 0x21, 0xc0, 0x47, 0x08,
-  0x1f, 0x96, 0x1d, 0x7c, 0xe7, 0x82, 0x61, 0x0c, 0xce, 0x05, 0x3a, 0x17,
-  0xe2, 0x33, 0x1c, 0xc1, 0x2e, 0x75, 0x2e, 0x10, 0xdf, 0x2c, 0xc3, 0xf8,
-  0x98, 0x4f, 0x60, 0x76, 0x2e, 0xb4, 0x4b, 0x7c, 0x2c, 0x18, 0xe8, 0x73,
-  0xc1, 0x30, 0x17, 0x18, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14,
-  0xe1, 0xef, 0x82, 0x0e, 0x37, 0x04, 0xfc, 0x2e, 0x80, 0xc1, 0x2c, 0x03,
-  0xf9, 0x94, 0x4f, 0x60, 0x83, 0x9f, 0x0b, 0xf0, 0x99, 0x25, 0x50, 0x1f,
-  0xdb, 0x73, 0x81, 0x88, 0xcf, 0x2c, 0x81, 0xfa, 0x0c, 0x47, 0xdc, 0x0b,
-  0x9f, 0x0b, 0xc2, 0x37, 0xcb, 0x70, 0x3e, 0xea, 0x13, 0x18, 0xbe, 0xf4,
-  0xb9, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0x59,
-  0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xa4, 0xbc, 0xa0, 0xc3, 0x0d,
-  0xc1, 0xc9, 0x0b, 0x60, 0x30, 0xcb, 0x80, 0x3e, 0xe9, 0x13, 0x58, 0xa9,
-  0x0b, 0x43, 0x7c, 0x66, 0x09, 0xd4, 0xc7, 0x08, 0x55, 0x17, 0xe0, 0x33,
-  0x4b, 0xa0, 0x3e, 0x03, 0x2d, 0x86, 0x46, 0x3e, 0x58, 0xf9, 0x10, 0xe8,
-  0x23, 0xa4, 0x0f, 0x89, 0x99, 0xcf, 0x05, 0xc3, 0x5c, 0x60, 0xd4, 0x6d,
-  0x46, 0x9d, 0x9d, 0x0b, 0xc3, 0xdc, 0x69, 0x0b, 0xc3, 0x1c, 0x31, 0xcc,
-  0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0xa5, 0xf3, 0x82,
-  0xc9, 0x0b, 0xe2, 0x2e, 0xd0, 0xbc, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09,
-  0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0x84, 0xbd, 0xd0, 0xf2, 0x42, 0x42,
-  0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0x88, 0xbd, 0xe0, 0xf2,
-  0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0x8c, 0xbd,
-  0xf0, 0xf2, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0,
-  0x85, 0xbd, 0xe0, 0xf2, 0x02, 0xbb, 0x0b, 0xc1, 0xce, 0x0b, 0xfb, 0x2e,
-  0xe4, 0xbc, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x70, 0xc2, 0x18,
-  0x27, 0x8c, 0x51, 0xc1, 0xc8, 0x0b, 0x57, 0x43, 0xb0, 0x17, 0x18, 0x36,
-  0x4b, 0xb0, 0x3e, 0x03, 0x2d, 0x86, 0x6e, 0xd8, 0x8e, 0x7b, 0x17, 0xb5,
-  0x63, 0x13, 0xb8, 0x23, 0xa8, 0x8f, 0x7b, 0x17, 0xb9, 0x33, 0xcb, 0xc0,
-  0x3e, 0xee, 0xc3, 0x82, 0xc2, 0x70, 0xc4, 0x3b, 0x07, 0xf8, 0x2e, 0x0c,
-  0xdf, 0xc1, 0x73, 0x30, 0xcc, 0x70, 0x43, 0xe0, 0xee, 0x02, 0x19, 0xd4,
-  0x10, 0xe8, 0x70, 0x04, 0xd8, 0xf0, 0xbb, 0x30, 0x7c, 0x15, 0x08, 0x7a,
-  0x62, 0x33, 0xcc, 0x70, 0x43, 0x10, 0xef, 0x02, 0x19, 0x54, 0x30, 0xe8,
-  0x2c, 0x43, 0xfb, 0x88, 0x50, 0x70, 0xb5, 0x2e, 0x0c, 0x73, 0xe6, 0x2d,
-  0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xd5, 0xdd, 0x0b, 0x63,
-  0x2f, 0xfc, 0xbb, 0x10, 0xf7, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08,
-  0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0xc1, 0xe3, 0xf7, 0x82, 0xda, 0x0b, 0x07, 0x11,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xf3, 0xf7, 0xc2, 0xda, 0x0b,
-  0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x03, 0xfa, 0x02,
-  0xdb, 0x0b, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0xe6,
-  0xf7, 0xc2, 0xda, 0x0b, 0x29, 0x2f, 0x04, 0x78, 0x2f, 0xe0, 0xbc, 0x60,
-  0xf7, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x09, 0x63, 0x9c,
-  0x30, 0x46, 0x05, 0x60, 0x2f, 0x5c, 0x0d, 0xc1, 0x5e, 0x60, 0xd8, 0x2c,
-  0x81, 0x08, 0x0d, 0x37, 0xa4, 0xa1, 0x30, 0xfa, 0x02, 0x18, 0xcc, 0x32,
-  0xbc, 0x0f, 0xfc, 0x04, 0x95, 0xf2, 0xc2, 0xdc, 0x0b, 0x70, 0x81, 0x51,
-  0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xb8, 0xbe, 0x40, 0xf7, 0x82,
-  0x4b, 0x07, 0x37, 0x2f, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0xf2,
-  0xfa, 0x02, 0xdd, 0x0b, 0x81, 0x70, 0xc1, 0x30, 0xc5, 0xf2, 0x02, 0xde,
-  0x0b, 0x70, 0x81, 0x51, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xcc,
-  0xbe, 0x90, 0xf7, 0x02, 0xe8, 0xf0, 0xbc, 0x30, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0x0b, 0xed, 0x0b, 0x79, 0x2f, 0x04, 0xc2, 0x05, 0xc3, 0x5c,
-  0x60, 0xd4, 0x1d, 0x46, 0x5d, 0xbd, 0x0b, 0xc3, 0x9c, 0x89, 0x0b, 0xc3,
-  0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x41,
-  0x95, 0xfb, 0x42, 0xe9, 0x0b, 0x61, 0x2f, 0xcc, 0xbe, 0x30, 0x9a, 0x10,
-  0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50,
-  0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0x80, 0xbf, 0xc0,
-  0xfa, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0x84,
-  0xbf, 0xd0, 0xfa, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0xf0, 0x88, 0xbf, 0xe0, 0xfa, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00,
-  0x82, 0x60, 0xb0, 0x81, 0xbf, 0xd0, 0xfa, 0xc2, 0xda, 0x0b, 0x81, 0xee,
-  0x0b, 0x7a, 0x2f, 0xe0, 0xbe, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42,
-  0x70, 0xc2, 0x18, 0x27, 0x8c, 0x51, 0x81, 0xe8, 0x0b, 0x57, 0x43, 0xb0,
-  0x17, 0x18, 0x36, 0x4b, 0x20, 0x42, 0xc3, 0x0d, 0xab, 0x28, 0x90, 0xbf,
-  0x00, 0x06, 0xb3, 0x0c, 0xf1, 0x23, 0x42, 0x81, 0x89, 0xbd, 0x40, 0xf6,
-  0x42, 0x7c, 0x86, 0x23, 0xde, 0x3a, 0x28, 0x7b, 0x81, 0xf8, 0x66, 0x19,
-  0xe4, 0xa7, 0x7e, 0x02, 0x33, 0x7b, 0x01, 0xae, 0x83, 0xf8, 0x58, 0x30,
-  0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04,
-  0xf1, 0x29, 0xc2, 0xfd, 0x05, 0x1d, 0x6e, 0x08, 0xd8, 0x5f, 0x00, 0x83,
-  0x59, 0x86, 0xf9, 0xa1, 0x9f, 0xc0, 0x06, 0xb7, 0x17, 0xe0, 0x33, 0x4b,
-  0x90, 0x3f, 0xd6, 0xf6, 0x02, 0x11, 0x9f, 0x59, 0x82, 0xfc, 0x19, 0x8e,
-  0xd0, 0xeb, 0xc0, 0xed, 0x05, 0xe1, 0x9b, 0x65, 0xb0, 0x9f, 0xfc, 0x09,
-  0x6c, 0xaf, 0x83, 0xb7, 0x17, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86,
-  0xb9, 0xc0, 0x28, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0xfc,
-  0x17, 0x74, 0xb8, 0x21, 0xb8, 0x7f, 0x01, 0x0c, 0x66, 0x19, 0xee, 0x07,
-  0x7f, 0x02, 0xbb, 0x7b, 0x61, 0x88, 0xcf, 0x2c, 0x41, 0xfe, 0x18, 0xa1,
-  0xf7, 0x02, 0x7c, 0x66, 0x09, 0xf2, 0x67, 0xa0, 0xc5, 0xd0, 0xe6, 0x07,
-  0xa3, 0x1f, 0xe2, 0x7e, 0x04, 0xfc, 0x51, 0xd9, 0xa1, 0x7e, 0x2e, 0x18,
-  0xc6, 0xf2, 0x5e, 0xe8, 0x7b, 0x21, 0x3e, 0xc3, 0x11, 0xf5, 0xe3, 0xf7,
-  0x02, 0xf1, 0xcd, 0x32, 0xe8, 0x4f, 0xff, 0x04, 0xf6, 0xf7, 0x82, 0xfd,
-  0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x16, 0x18,
-  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x27, 0x38, 0xe8, 0x70, 0x43, 0x50,
-  0x82, 0x03, 0x18, 0xcc, 0x32, 0xec, 0x0f, 0xff, 0x04, 0x36, 0x9c, 0xbe,
-  0x00, 0x9f, 0x59, 0x82, 0x10, 0x32, 0xd2, 0x17, 0x88, 0xf8, 0xcc, 0x12,
-  0x84, 0xd0, 0x70, 0x04, 0x08, 0x95, 0xbe, 0x20, 0x7c, 0xb3, 0x0c, 0xfe,
-  0x13, 0x42, 0x81, 0x85, 0x90, 0xe9, 0x0b, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf,
-  0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53,
-  0x84, 0x0c, 0x0e, 0x3a, 0xdc, 0x10, 0xc0, 0xe0, 0x00, 0x06, 0xb3, 0x0c,
-  0xff, 0x03, 0x42, 0x81, 0xb9, 0xbe, 0x30, 0xc4, 0x67, 0x96, 0x20, 0x84,
-  0x8c, 0x98, 0x7d, 0x01, 0x3e, 0xb3, 0x04, 0x21, 0x34, 0xd0, 0x62, 0x68,
-  0xfb, 0x83, 0xf1, 0x0f, 0xf1, 0x3f, 0x02, 0x08, 0xd9, 0x60, 0xd0, 0x3f,
-  0x17, 0x0c, 0x73, 0x81, 0x51, 0xb7, 0x19, 0x75, 0x7f, 0x2f, 0x0c, 0x73,
-  0xb0, 0x2e, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00,
-  0x20, 0x08, 0x06, 0xd5, 0x18, 0x0e, 0x2f, 0x38, 0xac, 0xbf, 0xd0, 0x83,
-  0xc3, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c,
-  0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1,
-  0xa3, 0x86, 0x83, 0x0d, 0x0e, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0xc1, 0xb3, 0x86, 0xc3, 0x0d, 0x0e, 0x09, 0x11, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0xc1, 0xc3, 0x86, 0x03, 0x0e, 0x0e, 0x09, 0x11, 0x8c,
-  0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0xa6, 0x86, 0xc3, 0x0d, 0x0e, 0xf5,
-  0x2f, 0x04, 0x64, 0x38, 0x90, 0xe0, 0x20, 0x86, 0xc3, 0x68, 0x42, 0x00,
-  0x8c, 0x26, 0x08, 0xc1, 0x09, 0x63, 0x9c, 0x30, 0x46, 0x05, 0x2c, 0x38,
-  0x5c, 0x0d, 0xc1, 0x5e, 0x60, 0xd8, 0x2c, 0x81, 0x08, 0x0d, 0xb4, 0x18,
-  0xba, 0xd1, 0x3e, 0x20, 0x68, 0xb0, 0x8f, 0x4d, 0xbc, 0x8f, 0x10, 0x42,
-  0x20, 0x68, 0xc0, 0xcf, 0x88, 0x81, 0x01, 0x80, 0x20, 0x18, 0x1c, 0x7a,
-  0x38, 0xdc, 0xe0, 0x70, 0xee, 0xc2, 0x88, 0x81, 0x01, 0x80, 0x20, 0x18,
-  0x1c, 0x7b, 0x38, 0xe0, 0xe0, 0x70, 0xee, 0x82, 0x05, 0x81, 0x7c, 0x2c,
-  0x10, 0xe4, 0x63, 0x30, 0x28, 0x9c, 0xe0, 0x20, 0x9f, 0x11, 0x03, 0x04,
-  0x00, 0x41, 0x30, 0x48, 0xfc, 0x70, 0x08, 0xc3, 0x21, 0x05, 0x07, 0xd3,
-  0x0b, 0x4c, 0x06, 0x05, 0x19, 0x1c, 0xe4, 0x33, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x09, 0x28, 0x0e, 0x63, 0x38, 0xc4, 0xe0, 0xd0, 0xbe, 0x41,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x49, 0x28, 0x0e, 0x64, 0x38,
-  0xd0, 0xe0, 0x90, 0x7a, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x24,
-  0xa2, 0x38, 0x94, 0xe1, 0xc0, 0x82, 0x43, 0xd9, 0x04, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0x90, 0x8c, 0xe2, 0x60, 0x86, 0xc3, 0x0c, 0x0e, 0xf0,
-  0x1b, 0x18, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x90, 0x90, 0xe2, 0x70,
-  0x86, 0xc3, 0x0c, 0x0e, 0xac, 0x17, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0x41, 0x52, 0x8a, 0x03, 0x1a, 0x0e, 0x30, 0x38, 0xa0, 0x4d, 0x30, 0x62,
-  0xd0, 0x00, 0x20, 0x08, 0x06, 0x0d, 0x29, 0x0e, 0x68, 0x38, 0xbc, 0xe0,
-  0xc0, 0x2c, 0xca, 0xfc, 0x06, 0x08, 0x11, 0x58, 0xcc, 0x07, 0x31, 0x38,
-  0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x12, 0x54, 0x1c, 0xd6,
-  0x70, 0x98, 0xc1, 0xe1, 0xd4, 0x02, 0x9b, 0xf9, 0x80, 0x07, 0x07, 0xf9,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41, 0xa2, 0x8a, 0x43, 0x1b, 0x0e,
-  0x3b, 0x38, 0xb8, 0x6b, 0x10, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41,
-  0xb2, 0x8a, 0x83, 0x1b, 0x0e, 0x3e, 0x38, 0xa8, 0x5a, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0x09, 0x2b, 0x0e, 0x6f, 0x38, 0xd8, 0xe0, 0x60,
-  0x26, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x24, 0xad, 0x38, 0xc0,
-  0xe1, 0xd0, 0x83, 0x43, 0xbc, 0x06, 0xc6, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x24, 0xae, 0x38, 0xc4, 0xe1, 0xd0, 0x83, 0x43, 0xab, 0x05, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0x90, 0xbc, 0xe2, 0x20, 0x87, 0x83, 0x0e,
-  0x0e, 0x69, 0x12, 0x8c, 0x18, 0x34, 0x00, 0x08, 0x82, 0x41, 0xe3, 0x8a,
-  0x83, 0x1c, 0x0e, 0x39, 0x38, 0x58, 0x15, 0x45, 0xaf, 0x01, 0x42, 0x04,
-  0x16, 0xe3, 0xc1, 0x0e, 0x0e, 0xf2, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x83, 0x44, 0x16, 0x87, 0x3a, 0x1c, 0x7a, 0x70, 0x40, 0xad, 0xc0, 0x66,
-  0x3c, 0x30, 0xc3, 0x41, 0x3e, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x90,
-  0xd0, 0xe2, 0x70, 0x87, 0x43, 0x19, 0x0e, 0xef, 0x19, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x90, 0xd4, 0xe2, 0x80, 0x87, 0x03, 0x1a, 0x0e,
-  0xab, 0x15, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41, 0x62, 0x8b, 0x43,
-  0x1e, 0x0e, 0x60, 0x38, 0x9c, 0x45, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0xc9, 0x2d, 0x0e, 0x7a, 0x38, 0x9c, 0xe1, 0x20, 0x9f, 0x81, 0x31,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x09, 0x2e, 0x0e, 0x7b, 0x38, 0x9c,
-  0xe1, 0xe0, 0x5a, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x24, 0xb9,
-  0x38, 0xf0, 0xe1, 0x40, 0x86, 0x83, 0x5a, 0x04, 0x23, 0x06, 0x0d, 0x00,
-  0x82, 0x60, 0xd0, 0xe0, 0xe2, 0xc0, 0x87, 0xc3, 0x18, 0x0e, 0x60, 0xf0,
-  0x79, 0xf5, 0x19, 0x20, 0x44, 0x60, 0x31, 0x1d, 0x94, 0xe1, 0x20, 0x9f,
-  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x48, 0x78, 0x71, 0xf8, 0xc3, 0xe1,
-  0x0c, 0x87, 0x54, 0x0a, 0x6c, 0xa6, 0x03, 0x38, 0x1c, 0xe4, 0x33, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x89, 0x2f, 0x0e, 0xa1, 0x38, 0xbc, 0xe1,
-  0x00, 0x8f, 0x41, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xc9, 0x2f,
-  0x0e, 0xa2, 0x38, 0xc8, 0xe1, 0xc0, 0x4a, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x24, 0xe0, 0x38, 0x8c, 0xe2, 0xa0, 0x86, 0x03, 0x1a, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x90, 0x84, 0xe3, 0x40, 0x8a, 0x43,
-  0x1c, 0x0e, 0xf3, 0x18, 0x18, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x90,
-  0x88, 0xe3, 0x50, 0x8a, 0x43, 0x1c, 0x0e, 0xaf, 0x14, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0x41, 0x32, 0x8e, 0x83, 0x29, 0x0e, 0x6e, 0x38, 0xac,
-  0x41, 0x30, 0x62, 0xd0, 0x00, 0x20, 0x08, 0x06, 0x8d, 0x38, 0x0e, 0xa6,
-  0x38, 0xb4, 0xe1, 0xa0, 0x06, 0x69, 0x80, 0x06, 0xf6, 0x18, 0x20, 0x44,
-  0x60, 0x6c, 0xc0, 0x06, 0xf2, 0xb1, 0xa0, 0x0d, 0xe4, 0x63, 0x61, 0x10,
-  0x87, 0x83, 0x7c, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x20, 0x41, 0xc7,
-  0x61, 0x15, 0x87, 0x39, 0x1c, 0x9c, 0xc0, 0xc6, 0x80, 0x0f, 0x07, 0xf9,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41, 0xa2, 0x8e, 0x43, 0x2b, 0x0e,
-  0x7b, 0x38, 0x68, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x24, 0xeb,
-  0x38, 0xb8, 0xe2, 0xe0, 0x87, 0x43, 0x14, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0x41, 0xc2, 0x8e, 0xc3, 0x2b, 0x0e, 0x76, 0x38, 0x20, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x24, 0xed, 0x38, 0xc0, 0xe2, 0xd0, 0x87,
-  0x43, 0x67, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41, 0xe2, 0x8e, 0x43,
-  0x2c, 0x0e, 0x7d, 0x38, 0x50, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x24, 0xef, 0x38, 0xc8, 0xe2, 0xa0, 0x87, 0xc3, 0x12, 0x8c, 0x18, 0x34,
-  0x00, 0x08, 0x82, 0x41, 0xe3, 0x8e, 0x83, 0x2c, 0x0e, 0x79, 0x38, 0xdc,
-  0xc1, 0xa2, 0x80, 0x01, 0x42, 0x04, 0x17, 0x8c, 0x74, 0xc1, 0x00, 0x25,
-  0xa8, 0xe2, 0x80, 0x17, 0x0c, 0x30, 0x62, 0xe0, 0x00, 0x20, 0x08, 0x06,
-  0x9d, 0x3b, 0x0e, 0xb2, 0x38, 0xf0, 0xe1, 0x50, 0x87, 0x43, 0x3a, 0x0e,
-  0x43, 0x60, 0x8b, 0x83, 0x2d, 0x0e, 0xa9, 0x38, 0x9c, 0xe3, 0x30, 0x4b,
-  0x30, 0x42, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00
-};
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_int_double.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_int_double.h
deleted file mode 100644
index c7f3531370128..0000000000000
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_int_double.h
+++ /dev/null
@@ -1,6296 +0,0 @@
-#if 0
-;
-; Note: shader requires additional functionality:
-;       Double-precision floating point
-;
-;
-; Input signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no parameters
-;
-; Output signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no parameters
-; shader hash: ea9c912f56af6feb988be51c1b10c096
-;
-; Pipeline Runtime Information: 
-;
-;
-;
-; Buffer Definitions:
-;
-; cbuffer 
-; {
-;
-;   [116 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [4 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [8 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [4 x i8] (type annotation not present)
-;
-; }
-;
-;
-; Resource Bindings:
-;
-; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-;                                   cbuffer      NA          NA     CB0            cb0     1
-;                                       UAV  struct         r/w      U0             u0     1
-;                                       UAV  struct         r/w      U1             u1     1
-;                                       UAV  struct         r/w      U2             u2     1
-;
-target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
-target triple = "dxil-ms-dx"
-
-%dx.types.Handle = type { i8* }
-%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
-%dx.types.ResRet.i32 = type { i32, i32, i32, i32, i32 }
-%"class.RWStructuredBuffer<int>" = type { i32 }
-%"class.RWStructuredBuffer<double>" = type { double }
-%Constants = type { i32, i32, i32, i32, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32 }
-
-define void @GridSample() {
-  %1 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 2, i32 2, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %2 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 1, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %3 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %4 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %5 = call i32 @dx.op.threadId.i32(i32 93, i32 0)  ; ThreadId(component)
-  %6 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
-  %7 = extractvalue %dx.types.CBufRet.i32 %6, 0
-  %8 = add i32 %7, %5
-  %9 = extractvalue %dx.types.CBufRet.i32 %6, 1
-  %10 = icmp ult i32 %8, %9
-  br i1 %10, label %11, label %3323
-
-; <label>:11                                      ; preds = %0
-  %12 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
-  %13 = extractvalue %dx.types.CBufRet.i32 %12, 3
-  %14 = uitofp i32 %13 to float
-  %15 = extractvalue %dx.types.CBufRet.i32 %12, 2
-  %16 = uitofp i32 %15 to float
-  %17 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 7)  ; CBufferLoadLegacy(handle,regIndex)
-  %18 = extractvalue %dx.types.CBufRet.i32 %17, 0
-  %19 = icmp eq i32 %18, 0
-  %20 = select i1 %19, float -5.000000e-01, float 0.000000e+00
-  %21 = select i1 %19, float -5.000000e-01, float -1.000000e+00
-  %22 = fadd float %14, %21
-  %23 = select i1 %19, float -5.000000e-01, float -1.000000e+00
-  %24 = fadd float %16, %23
-  %25 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
-  %26 = extractvalue %dx.types.CBufRet.i32 %25, 1
-  %27 = extractvalue %dx.types.CBufRet.i32 %25, 2
-  %28 = extractvalue %dx.types.CBufRet.i32 %25, 3
-  %29 = mul i32 %28, %27
-  %30 = mul i32 %27, %26
-  %31 = mul i32 %30, %28
-  %32 = udiv i32 %8, %31
-  %33 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 6)  ; CBufferLoadLegacy(handle,regIndex)
-  %34 = extractvalue %dx.types.CBufRet.i32 %33, 0
-  %35 = mul i32 %34, %32
-  %36 = sub i32 %8, %35
-  %37 = udiv i32 %36, %29
-  %38 = extractvalue %dx.types.CBufRet.i32 %33, 1
-  %39 = mul i32 %38, %37
-  %40 = sub i32 %36, %39
-  %41 = udiv i32 %40, %28
-  %42 = extractvalue %dx.types.CBufRet.i32 %33, 2
-  %43 = mul i32 %42, %41
-  %44 = sub i32 %40, %43
-  %45 = uitofp i32 %32 to float
-  %46 = uitofp i32 %41 to float
-  %47 = uitofp i32 %44 to float
-  %48 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
-  %49 = extractvalue %dx.types.CBufRet.i32 %48, 0
-  %50 = extractvalue %dx.types.CBufRet.i32 %48, 1
-  %51 = extractvalue %dx.types.CBufRet.i32 %48, 2
-  %52 = extractvalue %dx.types.CBufRet.i32 %48, 3
-  %53 = uitofp i32 %49 to float
-  %54 = uitofp i32 %50 to float
-  %55 = uitofp i32 %51 to float
-  %56 = uitofp i32 %52 to float
-  %57 = call float @dx.op.dot4.f32(i32 56, float %45, float %46, float %47, float 0.000000e+00, float %53, float %54, float %55, float %56)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %58 = fadd fast float %56, %57
-  %59 = fptoui float %57 to i32
-  %60 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %2, i32 %59, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %61 = extractvalue %dx.types.ResRet.i32 %60, 0
-  %62 = extractvalue %dx.types.ResRet.i32 %60, 1
-  %63 = call double @dx.op.makeDouble.f64(i32 101, i32 %61, i32 %62)  ; MakeDouble(lo,hi)
-  %64 = fptrunc double %63 to float
-  %65 = fptoui float %58 to i32
-  %66 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %2, i32 %65, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %67 = extractvalue %dx.types.ResRet.i32 %66, 0
-  %68 = extractvalue %dx.types.ResRet.i32 %66, 1
-  %69 = call double @dx.op.makeDouble.f64(i32 101, i32 %67, i32 %68)  ; MakeDouble(lo,hi)
-  %70 = fptrunc double %69 to float
-  %71 = icmp eq i32 %18, 1
-  %72 = fadd fast float %64, 1.000000e+00
-  %73 = fadd fast float %70, 1.000000e+00
-  br i1 %71, label %74, label %81
-
-; <label>:74                                      ; preds = %11
-  %75 = fmul fast float %72, 5.000000e-01
-  %76 = fmul fast float %73, 5.000000e-01
-  %77 = fadd fast float %14, -1.000000e+00
-  %78 = fadd fast float %16, -1.000000e+00
-  %79 = fmul fast float %75, %77
-  %80 = fmul fast float %76, %78
-  br label %88
-
-; <label>:81                                      ; preds = %11
-  %82 = fmul fast float %14, %72
-  %83 = fmul fast float %73, %16
-  %84 = fadd fast float %82, -1.000000e+00
-  %85 = fadd fast float %83, -1.000000e+00
-  %86 = fmul fast float %84, 5.000000e-01
-  %87 = fmul fast float %85, 5.000000e-01
-  br label %88
-
-; <label>:88                                      ; preds = %81, %74
-  %89 = phi float [ %79, %74 ], [ %86, %81 ]
-  %90 = phi float [ %80, %74 ], [ %87, %81 ]
-  %91 = extractvalue %dx.types.CBufRet.i32 %6, 2
-  %92 = icmp eq i32 %91, 1
-  br i1 %92, label %93, label %96
-
-; <label>:93                                      ; preds = %88
-  %94 = call float @dx.op.unary.f32(i32 26, float %89)  ; Round_ne(value)
-  %95 = call float @dx.op.unary.f32(i32 26, float %90)  ; Round_ne(value)
-  br label %96
-
-; <label>:96                                      ; preds = %93, %88
-  %97 = phi float [ %94, %93 ], [ %89, %88 ]
-  %98 = phi float [ %95, %93 ], [ %90, %88 ]
-  %99 = fcmp fast olt float %97, %20
-  %100 = fcmp fast ogt float %97, %22
-  %101 = or i1 %99, %100
-  %102 = fcmp fast olt float %98, %20
-  %103 = or i1 %101, %102
-  %104 = fcmp fast ogt float %98, %24
-  %105 = or i1 %104, %103
-  br i1 %105, label %106, label %179
-
-; <label>:106                                     ; preds = %96
-  %107 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %108 = icmp eq i32 %107, 1
-  br i1 %108, label %109, label %118
-
-; <label>:109                                     ; preds = %106
-  %110 = add i32 %13, -1
-  %111 = uitofp i32 %110 to float
-  %112 = call float @dx.op.binary.f32(i32 35, float %97, float 0.000000e+00)  ; FMax(a,b)
-  %113 = call float @dx.op.binary.f32(i32 36, float %112, float %111)  ; FMin(a,b)
-  %114 = add i32 %15, -1
-  %115 = uitofp i32 %114 to float
-  %116 = call float @dx.op.binary.f32(i32 35, float %98, float 0.000000e+00)  ; FMax(a,b)
-  %117 = call float @dx.op.binary.f32(i32 36, float %116, float %115)  ; FMin(a,b)
-  br label %179
-
-; <label>:118                                     ; preds = %106
-  %119 = icmp eq i32 %107, 2
-  br i1 %119, label %120, label %179
-
-; <label>:120                                     ; preds = %118
-  %121 = fsub fast float %22, %20
-  br i1 %99, label %122, label %135
-
-; <label>:122                                     ; preds = %120
-  %123 = fsub fast float %20, %97
-  %124 = fdiv fast float %123, %121
-  %125 = fptoui float %124 to i32
-  %126 = uitofp i32 %125 to float
-  %127 = fmul fast float %126, %121
-  %128 = fsub fast float %123, %127
-  %129 = and i32 %125, 1
-  %130 = icmp eq i32 %129, 0
-  br i1 %130, label %131, label %133
-
-; <label>:131                                     ; preds = %122
-  %132 = fadd fast float %128, %20
-  br label %149
-
-; <label>:133                                     ; preds = %122
-  %134 = fsub fast float %22, %128
-  br label %149
-
-; <label>:135                                     ; preds = %120
-  br i1 %100, label %136, label %149
-
-; <label>:136                                     ; preds = %135
-  %137 = fsub fast float %97, %22
-  %138 = fdiv fast float %137, %121
-  %139 = fptoui float %138 to i32
-  %140 = uitofp i32 %139 to float
-  %141 = fmul fast float %140, %121
-  %142 = fsub fast float %137, %141
-  %143 = and i32 %139, 1
-  %144 = icmp eq i32 %143, 0
-  br i1 %144, label %145, label %147
-
-; <label>:145                                     ; preds = %136
-  %146 = fsub fast float %22, %142
-  br label %149
-
-; <label>:147                                     ; preds = %136
-  %148 = fadd fast float %142, %20
-  br label %149
-
-; <label>:149                                     ; preds = %147, %145, %135, %133, %131
-  %150 = phi float [ %132, %131 ], [ %134, %133 ], [ %146, %145 ], [ %148, %147 ], [ %97, %135 ]
-  %151 = fsub fast float %24, %20
-  br i1 %102, label %152, label %165
-
-; <label>:152                                     ; preds = %149
-  %153 = fsub fast float %20, %98
-  %154 = fdiv fast float %153, %151
-  %155 = fptoui float %154 to i32
-  %156 = uitofp i32 %155 to float
-  %157 = fmul fast float %156, %151
-  %158 = fsub fast float %153, %157
-  %159 = and i32 %155, 1
-  %160 = icmp eq i32 %159, 0
-  br i1 %160, label %161, label %163
-
-; <label>:161                                     ; preds = %152
-  %162 = fadd fast float %158, %20
-  br label %179
-
-; <label>:163                                     ; preds = %152
-  %164 = fsub fast float %24, %158
-  br label %179
-
-; <label>:165                                     ; preds = %149
-  br i1 %104, label %166, label %179
-
-; <label>:166                                     ; preds = %165
-  %167 = fsub fast float %98, %24
-  %168 = fdiv fast float %167, %151
-  %169 = fptoui float %168 to i32
-  %170 = uitofp i32 %169 to float
-  %171 = fmul fast float %170, %151
-  %172 = fsub fast float %167, %171
-  %173 = and i32 %169, 1
-  %174 = icmp eq i32 %173, 0
-  br i1 %174, label %175, label %177
-
-; <label>:175                                     ; preds = %166
-  %176 = fsub fast float %24, %172
-  br label %179
-
-; <label>:177                                     ; preds = %166
-  %178 = fadd fast float %172, %20
-  br label %179
-
-; <label>:179                                     ; preds = %177, %175, %165, %163, %161, %118, %109, %96
-  %180 = phi float [ %113, %109 ], [ %97, %118 ], [ %97, %96 ], [ %150, %177 ], [ %150, %175 ], [ %150, %165 ], [ %150, %163 ], [ %150, %161 ]
-  %181 = phi float [ %117, %109 ], [ %98, %118 ], [ %98, %96 ], [ %178, %177 ], [ %176, %175 ], [ %98, %165 ], [ %164, %163 ], [ %162, %161 ]
-  %182 = uitofp i32 %37 to float
-  br i1 %92, label %183, label %332
-
-; <label>:183                                     ; preds = %179
-  %184 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %185 = icmp eq i32 %184, 0
-  br i1 %185, label %186, label %211
-
-; <label>:186                                     ; preds = %183
-  %187 = fcmp fast oge float %180, 0.000000e+00
-  %188 = fptoui float %180 to i32
-  %189 = icmp ult i32 %188, %13
-  %190 = and i1 %187, %189
-  %191 = fcmp fast oge float %181, 0.000000e+00
-  %192 = and i1 %191, %190
-  %193 = fptoui float %181 to i32
-  %194 = icmp ult i32 %193, %15
-  %195 = and i1 %194, %192
-  br i1 %195, label %196, label %329
-
-; <label>:196                                     ; preds = %186
-  %197 = fptoui float %45 to i32
-  %198 = fptoui float %182 to i32
-  %199 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %200 = extractvalue %dx.types.CBufRet.i32 %199, 0
-  %201 = extractvalue %dx.types.CBufRet.i32 %199, 1
-  %202 = extractvalue %dx.types.CBufRet.i32 %199, 2
-  %203 = extractvalue %dx.types.CBufRet.i32 %199, 3
-  %204 = mul i32 %200, %197
-  %205 = call i32 @dx.op.tertiary.i32(i32 48, i32 %198, i32 %201, i32 %204)  ; IMad(a,b,c)
-  %206 = call i32 @dx.op.tertiary.i32(i32 48, i32 %193, i32 %202, i32 %205)  ; IMad(a,b,c)
-  %207 = call i32 @dx.op.tertiary.i32(i32 48, i32 %188, i32 %203, i32 %206)  ; IMad(a,b,c)
-  %208 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %207, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %209 = extractvalue %dx.types.ResRet.i32 %208, 0
-  %210 = sitofp i32 %209 to float
-  br label %329
-
-; <label>:211                                     ; preds = %183
-  %212 = icmp eq i32 %184, 1
-  br i1 %212, label %213, label %242
-
-; <label>:213                                     ; preds = %211
-  %214 = add i32 %13, -1
-  %215 = uitofp i32 %214 to float
-  %216 = call float @dx.op.binary.f32(i32 35, float %180, float 0.000000e+00)  ; FMax(a,b)
-  %217 = call float @dx.op.binary.f32(i32 36, float %216, float %215)  ; FMin(a,b)
-  %218 = fptoui float %217 to i32
-  %219 = add i32 %15, -1
-  %220 = uitofp i32 %219 to float
-  %221 = call float @dx.op.binary.f32(i32 35, float %181, float 0.000000e+00)  ; FMax(a,b)
-  %222 = call float @dx.op.binary.f32(i32 36, float %221, float %220)  ; FMin(a,b)
-  %223 = fptoui float %222 to i32
-  %224 = uitofp i32 %223 to float
-  %225 = uitofp i32 %218 to float
-  %226 = fptoui float %45 to i32
-  %227 = fptoui float %182 to i32
-  %228 = fptoui float %224 to i32
-  %229 = fptoui float %225 to i32
-  %230 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %231 = extractvalue %dx.types.CBufRet.i32 %230, 0
-  %232 = extractvalue %dx.types.CBufRet.i32 %230, 1
-  %233 = extractvalue %dx.types.CBufRet.i32 %230, 2
-  %234 = extractvalue %dx.types.CBufRet.i32 %230, 3
-  %235 = mul i32 %231, %226
-  %236 = call i32 @dx.op.tertiary.i32(i32 48, i32 %227, i32 %232, i32 %235)  ; IMad(a,b,c)
-  %237 = call i32 @dx.op.tertiary.i32(i32 48, i32 %228, i32 %233, i32 %236)  ; IMad(a,b,c)
-  %238 = call i32 @dx.op.tertiary.i32(i32 48, i32 %229, i32 %234, i32 %237)  ; IMad(a,b,c)
-  %239 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %238, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %240 = extractvalue %dx.types.ResRet.i32 %239, 0
-  %241 = sitofp i32 %240 to float
-  br label %329
-
-; <label>:242                                     ; preds = %211
-  %243 = icmp eq i32 %184, 2
-  br i1 %243, label %244, label %329
-
-; <label>:244                                     ; preds = %242
-  %245 = fsub fast float %22, %20
-  %246 = fcmp fast olt float %180, %20
-  br i1 %246, label %247, label %260
-
-; <label>:247                                     ; preds = %244
-  %248 = fsub fast float %20, %180
-  %249 = fdiv fast float %248, %245
-  %250 = fptoui float %249 to i32
-  %251 = uitofp i32 %250 to float
-  %252 = fmul fast float %251, %245
-  %253 = fsub fast float %248, %252
-  %254 = and i32 %250, 1
-  %255 = icmp eq i32 %254, 0
-  br i1 %255, label %256, label %258
-
-; <label>:256                                     ; preds = %247
-  %257 = fadd fast float %253, %20
-  br label %275
-
-; <label>:258                                     ; preds = %247
-  %259 = fsub fast float %22, %253
-  br label %275
-
-; <label>:260                                     ; preds = %244
-  %261 = fcmp fast ogt float %180, %22
-  br i1 %261, label %262, label %275
-
-; <label>:262                                     ; preds = %260
-  %263 = fsub fast float %180, %22
-  %264 = fdiv fast float %263, %245
-  %265 = fptoui float %264 to i32
-  %266 = uitofp i32 %265 to float
-  %267 = fmul fast float %266, %245
-  %268 = fsub fast float %263, %267
-  %269 = and i32 %265, 1
-  %270 = icmp eq i32 %269, 0
-  br i1 %270, label %271, label %273
-
-; <label>:271                                     ; preds = %262
-  %272 = fsub fast float %22, %268
-  br label %275
-
-; <label>:273                                     ; preds = %262
-  %274 = fadd fast float %268, %20
-  br label %275
-
-; <label>:275                                     ; preds = %273, %271, %260, %258, %256
-  %276 = phi float [ %257, %256 ], [ %259, %258 ], [ %272, %271 ], [ %274, %273 ], [ %180, %260 ]
-  %277 = fptoui float %276 to i32
-  %278 = fsub fast float %24, %20
-  %279 = fcmp fast olt float %181, %20
-  br i1 %279, label %280, label %293
-
-; <label>:280                                     ; preds = %275
-  %281 = fsub fast float %20, %181
-  %282 = fdiv fast float %281, %278
-  %283 = fptoui float %282 to i32
-  %284 = uitofp i32 %283 to float
-  %285 = fmul fast float %284, %278
-  %286 = fsub fast float %281, %285
-  %287 = and i32 %283, 1
-  %288 = icmp eq i32 %287, 0
-  br i1 %288, label %289, label %291
-
-; <label>:289                                     ; preds = %280
-  %290 = fadd fast float %286, %20
-  br label %308
-
-; <label>:291                                     ; preds = %280
-  %292 = fsub fast float %24, %286
-  br label %308
-
-; <label>:293                                     ; preds = %275
-  %294 = fcmp fast ogt float %181, %24
-  br i1 %294, label %295, label %308
-
-; <label>:295                                     ; preds = %293
-  %296 = fsub fast float %181, %24
-  %297 = fdiv fast float %296, %278
-  %298 = fptoui float %297 to i32
-  %299 = uitofp i32 %298 to float
-  %300 = fmul fast float %299, %278
-  %301 = fsub fast float %296, %300
-  %302 = and i32 %298, 1
-  %303 = icmp eq i32 %302, 0
-  br i1 %303, label %304, label %306
-
-; <label>:304                                     ; preds = %295
-  %305 = fsub fast float %24, %301
-  br label %308
-
-; <label>:306                                     ; preds = %295
-  %307 = fadd fast float %301, %20
-  br label %308
-
-; <label>:308                                     ; preds = %306, %304, %293, %291, %289
-  %309 = phi float [ %290, %289 ], [ %292, %291 ], [ %305, %304 ], [ %307, %306 ], [ %181, %293 ]
-  %310 = fptoui float %309 to i32
-  %311 = uitofp i32 %310 to float
-  %312 = uitofp i32 %277 to float
-  %313 = fptoui float %45 to i32
-  %314 = fptoui float %182 to i32
-  %315 = fptoui float %311 to i32
-  %316 = fptoui float %312 to i32
-  %317 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %318 = extractvalue %dx.types.CBufRet.i32 %317, 0
-  %319 = extractvalue %dx.types.CBufRet.i32 %317, 1
-  %320 = extractvalue %dx.types.CBufRet.i32 %317, 2
-  %321 = extractvalue %dx.types.CBufRet.i32 %317, 3
-  %322 = mul i32 %318, %313
-  %323 = call i32 @dx.op.tertiary.i32(i32 48, i32 %314, i32 %319, i32 %322)  ; IMad(a,b,c)
-  %324 = call i32 @dx.op.tertiary.i32(i32 48, i32 %315, i32 %320, i32 %323)  ; IMad(a,b,c)
-  %325 = call i32 @dx.op.tertiary.i32(i32 48, i32 %316, i32 %321, i32 %324)  ; IMad(a,b,c)
-  %326 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %325, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %327 = extractvalue %dx.types.ResRet.i32 %326, 0
-  %328 = sitofp i32 %327 to float
-  br label %329
-
-; <label>:329                                     ; preds = %308, %242, %213, %196, %186
-  %330 = phi float [ %210, %196 ], [ 0.000000e+00, %186 ], [ %241, %213 ], [ %328, %308 ], [ 0.000000e+00, %242 ]
-  %331 = fptosi float %330 to i32
-  call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i32 %331, i32 undef, i32 undef, i32 undef, i8 1, i32 4)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3323
-
-; <label>:332                                     ; preds = %179
-  %333 = icmp eq i32 %91, 0
-  br i1 %333, label %334, label %933
-
-; <label>:334                                     ; preds = %332
-  %335 = call float @dx.op.unary.f32(i32 27, float %180)  ; Round_ni(value)
-  %336 = call float @dx.op.unary.f32(i32 27, float %181)  ; Round_ni(value)
-  %337 = fadd fast float %335, 1.000000e+00
-  %338 = fadd fast float %336, 1.000000e+00
-  %339 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %340 = icmp eq i32 %339, 0
-  br i1 %340, label %341, label %366
-
-; <label>:341                                     ; preds = %334
-  %342 = fcmp fast oge float %335, 0.000000e+00
-  %343 = fptoui float %335 to i32
-  %344 = icmp ult i32 %343, %13
-  %345 = and i1 %342, %344
-  %346 = fcmp fast oge float %336, 0.000000e+00
-  %347 = and i1 %346, %345
-  %348 = fptoui float %336 to i32
-  %349 = icmp ult i32 %348, %15
-  %350 = and i1 %349, %347
-  br i1 %350, label %351, label %484
-
-; <label>:351                                     ; preds = %341
-  %352 = fptoui float %45 to i32
-  %353 = fptoui float %182 to i32
-  %354 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %355 = extractvalue %dx.types.CBufRet.i32 %354, 0
-  %356 = extractvalue %dx.types.CBufRet.i32 %354, 1
-  %357 = extractvalue %dx.types.CBufRet.i32 %354, 2
-  %358 = extractvalue %dx.types.CBufRet.i32 %354, 3
-  %359 = mul i32 %355, %352
-  %360 = call i32 @dx.op.tertiary.i32(i32 48, i32 %353, i32 %356, i32 %359)  ; IMad(a,b,c)
-  %361 = call i32 @dx.op.tertiary.i32(i32 48, i32 %348, i32 %357, i32 %360)  ; IMad(a,b,c)
-  %362 = call i32 @dx.op.tertiary.i32(i32 48, i32 %343, i32 %358, i32 %361)  ; IMad(a,b,c)
-  %363 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %362, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %364 = extractvalue %dx.types.ResRet.i32 %363, 0
-  %365 = sitofp i32 %364 to float
-  br label %484
-
-; <label>:366                                     ; preds = %334
-  %367 = icmp eq i32 %339, 1
-  br i1 %367, label %368, label %397
-
-; <label>:368                                     ; preds = %366
-  %369 = add i32 %13, -1
-  %370 = uitofp i32 %369 to float
-  %371 = call float @dx.op.binary.f32(i32 35, float %335, float 0.000000e+00)  ; FMax(a,b)
-  %372 = call float @dx.op.binary.f32(i32 36, float %371, float %370)  ; FMin(a,b)
-  %373 = fptoui float %372 to i32
-  %374 = add i32 %15, -1
-  %375 = uitofp i32 %374 to float
-  %376 = call float @dx.op.binary.f32(i32 35, float %336, float 0.000000e+00)  ; FMax(a,b)
-  %377 = call float @dx.op.binary.f32(i32 36, float %376, float %375)  ; FMin(a,b)
-  %378 = fptoui float %377 to i32
-  %379 = uitofp i32 %378 to float
-  %380 = uitofp i32 %373 to float
-  %381 = fptoui float %45 to i32
-  %382 = fptoui float %182 to i32
-  %383 = fptoui float %379 to i32
-  %384 = fptoui float %380 to i32
-  %385 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %386 = extractvalue %dx.types.CBufRet.i32 %385, 0
-  %387 = extractvalue %dx.types.CBufRet.i32 %385, 1
-  %388 = extractvalue %dx.types.CBufRet.i32 %385, 2
-  %389 = extractvalue %dx.types.CBufRet.i32 %385, 3
-  %390 = mul i32 %386, %381
-  %391 = call i32 @dx.op.tertiary.i32(i32 48, i32 %382, i32 %387, i32 %390)  ; IMad(a,b,c)
-  %392 = call i32 @dx.op.tertiary.i32(i32 48, i32 %383, i32 %388, i32 %391)  ; IMad(a,b,c)
-  %393 = call i32 @dx.op.tertiary.i32(i32 48, i32 %384, i32 %389, i32 %392)  ; IMad(a,b,c)
-  %394 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %393, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %395 = extractvalue %dx.types.ResRet.i32 %394, 0
-  %396 = sitofp i32 %395 to float
-  br label %484
-
-; <label>:397                                     ; preds = %366
-  %398 = icmp eq i32 %339, 2
-  br i1 %398, label %399, label %484
-
-; <label>:399                                     ; preds = %397
-  %400 = fsub fast float %22, %20
-  %401 = fcmp fast olt float %335, %20
-  br i1 %401, label %402, label %415
-
-; <label>:402                                     ; preds = %399
-  %403 = fsub fast float %20, %335
-  %404 = fdiv fast float %403, %400
-  %405 = fptoui float %404 to i32
-  %406 = uitofp i32 %405 to float
-  %407 = fmul fast float %406, %400
-  %408 = fsub fast float %403, %407
-  %409 = and i32 %405, 1
-  %410 = icmp eq i32 %409, 0
-  br i1 %410, label %411, label %413
-
-; <label>:411                                     ; preds = %402
-  %412 = fadd fast float %408, %20
-  br label %430
-
-; <label>:413                                     ; preds = %402
-  %414 = fsub fast float %22, %408
-  br label %430
-
-; <label>:415                                     ; preds = %399
-  %416 = fcmp fast ogt float %335, %22
-  br i1 %416, label %417, label %430
-
-; <label>:417                                     ; preds = %415
-  %418 = fsub fast float %335, %22
-  %419 = fdiv fast float %418, %400
-  %420 = fptoui float %419 to i32
-  %421 = uitofp i32 %420 to float
-  %422 = fmul fast float %421, %400
-  %423 = fsub fast float %418, %422
-  %424 = and i32 %420, 1
-  %425 = icmp eq i32 %424, 0
-  br i1 %425, label %426, label %428
-
-; <label>:426                                     ; preds = %417
-  %427 = fsub fast float %22, %423
-  br label %430
-
-; <label>:428                                     ; preds = %417
-  %429 = fadd fast float %423, %20
-  br label %430
-
-; <label>:430                                     ; preds = %428, %426, %415, %413, %411
-  %431 = phi float [ %412, %411 ], [ %414, %413 ], [ %427, %426 ], [ %429, %428 ], [ %335, %415 ]
-  %432 = fptoui float %431 to i32
-  %433 = fsub fast float %24, %20
-  %434 = fcmp fast olt float %336, %20
-  br i1 %434, label %435, label %448
-
-; <label>:435                                     ; preds = %430
-  %436 = fsub fast float %20, %336
-  %437 = fdiv fast float %436, %433
-  %438 = fptoui float %437 to i32
-  %439 = uitofp i32 %438 to float
-  %440 = fmul fast float %439, %433
-  %441 = fsub fast float %436, %440
-  %442 = and i32 %438, 1
-  %443 = icmp eq i32 %442, 0
-  br i1 %443, label %444, label %446
-
-; <label>:444                                     ; preds = %435
-  %445 = fadd fast float %441, %20
-  br label %463
-
-; <label>:446                                     ; preds = %435
-  %447 = fsub fast float %24, %441
-  br label %463
-
-; <label>:448                                     ; preds = %430
-  %449 = fcmp fast ogt float %336, %24
-  br i1 %449, label %450, label %463
-
-; <label>:450                                     ; preds = %448
-  %451 = fsub fast float %336, %24
-  %452 = fdiv fast float %451, %433
-  %453 = fptoui float %452 to i32
-  %454 = uitofp i32 %453 to float
-  %455 = fmul fast float %454, %433
-  %456 = fsub fast float %451, %455
-  %457 = and i32 %453, 1
-  %458 = icmp eq i32 %457, 0
-  br i1 %458, label %459, label %461
-
-; <label>:459                                     ; preds = %450
-  %460 = fsub fast float %24, %456
-  br label %463
-
-; <label>:461                                     ; preds = %450
-  %462 = fadd fast float %456, %20
-  br label %463
-
-; <label>:463                                     ; preds = %461, %459, %448, %446, %444
-  %464 = phi float [ %445, %444 ], [ %447, %446 ], [ %460, %459 ], [ %462, %461 ], [ %336, %448 ]
-  %465 = fptoui float %464 to i32
-  %466 = uitofp i32 %465 to float
-  %467 = uitofp i32 %432 to float
-  %468 = fptoui float %45 to i32
-  %469 = fptoui float %182 to i32
-  %470 = fptoui float %466 to i32
-  %471 = fptoui float %467 to i32
-  %472 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %473 = extractvalue %dx.types.CBufRet.i32 %472, 0
-  %474 = extractvalue %dx.types.CBufRet.i32 %472, 1
-  %475 = extractvalue %dx.types.CBufRet.i32 %472, 2
-  %476 = extractvalue %dx.types.CBufRet.i32 %472, 3
-  %477 = mul i32 %473, %468
-  %478 = call i32 @dx.op.tertiary.i32(i32 48, i32 %469, i32 %474, i32 %477)  ; IMad(a,b,c)
-  %479 = call i32 @dx.op.tertiary.i32(i32 48, i32 %470, i32 %475, i32 %478)  ; IMad(a,b,c)
-  %480 = call i32 @dx.op.tertiary.i32(i32 48, i32 %471, i32 %476, i32 %479)  ; IMad(a,b,c)
-  %481 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %480, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %482 = extractvalue %dx.types.ResRet.i32 %481, 0
-  %483 = sitofp i32 %482 to float
-  br label %484
-
-; <label>:484                                     ; preds = %463, %397, %368, %351, %341
-  %485 = phi float [ %365, %351 ], [ 0.000000e+00, %341 ], [ %396, %368 ], [ %483, %463 ], [ 0.000000e+00, %397 ]
-  br i1 %340, label %486, label %511
-
-; <label>:486                                     ; preds = %484
-  %487 = fcmp fast oge float %337, 0.000000e+00
-  %488 = fptoui float %337 to i32
-  %489 = icmp ult i32 %488, %13
-  %490 = and i1 %487, %489
-  %491 = fcmp fast oge float %336, 0.000000e+00
-  %492 = and i1 %491, %490
-  %493 = fptoui float %336 to i32
-  %494 = icmp ult i32 %493, %15
-  %495 = and i1 %494, %492
-  br i1 %495, label %496, label %629
-
-; <label>:496                                     ; preds = %486
-  %497 = fptoui float %45 to i32
-  %498 = fptoui float %182 to i32
-  %499 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %500 = extractvalue %dx.types.CBufRet.i32 %499, 0
-  %501 = extractvalue %dx.types.CBufRet.i32 %499, 1
-  %502 = extractvalue %dx.types.CBufRet.i32 %499, 2
-  %503 = extractvalue %dx.types.CBufRet.i32 %499, 3
-  %504 = mul i32 %500, %497
-  %505 = call i32 @dx.op.tertiary.i32(i32 48, i32 %498, i32 %501, i32 %504)  ; IMad(a,b,c)
-  %506 = call i32 @dx.op.tertiary.i32(i32 48, i32 %493, i32 %502, i32 %505)  ; IMad(a,b,c)
-  %507 = call i32 @dx.op.tertiary.i32(i32 48, i32 %488, i32 %503, i32 %506)  ; IMad(a,b,c)
-  %508 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %507, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %509 = extractvalue %dx.types.ResRet.i32 %508, 0
-  %510 = sitofp i32 %509 to float
-  br label %629
-
-; <label>:511                                     ; preds = %484
-  %512 = icmp eq i32 %339, 1
-  br i1 %512, label %513, label %542
-
-; <label>:513                                     ; preds = %511
-  %514 = add i32 %13, -1
-  %515 = uitofp i32 %514 to float
-  %516 = call float @dx.op.binary.f32(i32 35, float %337, float 0.000000e+00)  ; FMax(a,b)
-  %517 = call float @dx.op.binary.f32(i32 36, float %516, float %515)  ; FMin(a,b)
-  %518 = fptoui float %517 to i32
-  %519 = add i32 %15, -1
-  %520 = uitofp i32 %519 to float
-  %521 = call float @dx.op.binary.f32(i32 35, float %336, float 0.000000e+00)  ; FMax(a,b)
-  %522 = call float @dx.op.binary.f32(i32 36, float %521, float %520)  ; FMin(a,b)
-  %523 = fptoui float %522 to i32
-  %524 = uitofp i32 %523 to float
-  %525 = uitofp i32 %518 to float
-  %526 = fptoui float %45 to i32
-  %527 = fptoui float %182 to i32
-  %528 = fptoui float %524 to i32
-  %529 = fptoui float %525 to i32
-  %530 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %531 = extractvalue %dx.types.CBufRet.i32 %530, 0
-  %532 = extractvalue %dx.types.CBufRet.i32 %530, 1
-  %533 = extractvalue %dx.types.CBufRet.i32 %530, 2
-  %534 = extractvalue %dx.types.CBufRet.i32 %530, 3
-  %535 = mul i32 %531, %526
-  %536 = call i32 @dx.op.tertiary.i32(i32 48, i32 %527, i32 %532, i32 %535)  ; IMad(a,b,c)
-  %537 = call i32 @dx.op.tertiary.i32(i32 48, i32 %528, i32 %533, i32 %536)  ; IMad(a,b,c)
-  %538 = call i32 @dx.op.tertiary.i32(i32 48, i32 %529, i32 %534, i32 %537)  ; IMad(a,b,c)
-  %539 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %538, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %540 = extractvalue %dx.types.ResRet.i32 %539, 0
-  %541 = sitofp i32 %540 to float
-  br label %629
-
-; <label>:542                                     ; preds = %511
-  %543 = icmp eq i32 %339, 2
-  br i1 %543, label %544, label %629
-
-; <label>:544                                     ; preds = %542
-  %545 = fsub fast float %22, %20
-  %546 = fcmp fast olt float %337, %20
-  br i1 %546, label %547, label %560
-
-; <label>:547                                     ; preds = %544
-  %548 = fsub fast float %20, %337
-  %549 = fdiv fast float %548, %545
-  %550 = fptoui float %549 to i32
-  %551 = uitofp i32 %550 to float
-  %552 = fmul fast float %551, %545
-  %553 = fsub fast float %548, %552
-  %554 = and i32 %550, 1
-  %555 = icmp eq i32 %554, 0
-  br i1 %555, label %556, label %558
-
-; <label>:556                                     ; preds = %547
-  %557 = fadd fast float %553, %20
-  br label %575
-
-; <label>:558                                     ; preds = %547
-  %559 = fsub fast float %22, %553
-  br label %575
-
-; <label>:560                                     ; preds = %544
-  %561 = fcmp fast ogt float %337, %22
-  br i1 %561, label %562, label %575
-
-; <label>:562                                     ; preds = %560
-  %563 = fsub fast float %337, %22
-  %564 = fdiv fast float %563, %545
-  %565 = fptoui float %564 to i32
-  %566 = uitofp i32 %565 to float
-  %567 = fmul fast float %566, %545
-  %568 = fsub fast float %563, %567
-  %569 = and i32 %565, 1
-  %570 = icmp eq i32 %569, 0
-  br i1 %570, label %571, label %573
-
-; <label>:571                                     ; preds = %562
-  %572 = fsub fast float %22, %568
-  br label %575
-
-; <label>:573                                     ; preds = %562
-  %574 = fadd fast float %568, %20
-  br label %575
-
-; <label>:575                                     ; preds = %573, %571, %560, %558, %556
-  %576 = phi float [ %557, %556 ], [ %559, %558 ], [ %572, %571 ], [ %574, %573 ], [ %337, %560 ]
-  %577 = fptoui float %576 to i32
-  %578 = fsub fast float %24, %20
-  %579 = fcmp fast olt float %336, %20
-  br i1 %579, label %580, label %593
-
-; <label>:580                                     ; preds = %575
-  %581 = fsub fast float %20, %336
-  %582 = fdiv fast float %581, %578
-  %583 = fptoui float %582 to i32
-  %584 = uitofp i32 %583 to float
-  %585 = fmul fast float %584, %578
-  %586 = fsub fast float %581, %585
-  %587 = and i32 %583, 1
-  %588 = icmp eq i32 %587, 0
-  br i1 %588, label %589, label %591
-
-; <label>:589                                     ; preds = %580
-  %590 = fadd fast float %586, %20
-  br label %608
-
-; <label>:591                                     ; preds = %580
-  %592 = fsub fast float %24, %586
-  br label %608
-
-; <label>:593                                     ; preds = %575
-  %594 = fcmp fast ogt float %336, %24
-  br i1 %594, label %595, label %608
-
-; <label>:595                                     ; preds = %593
-  %596 = fsub fast float %336, %24
-  %597 = fdiv fast float %596, %578
-  %598 = fptoui float %597 to i32
-  %599 = uitofp i32 %598 to float
-  %600 = fmul fast float %599, %578
-  %601 = fsub fast float %596, %600
-  %602 = and i32 %598, 1
-  %603 = icmp eq i32 %602, 0
-  br i1 %603, label %604, label %606
-
-; <label>:604                                     ; preds = %595
-  %605 = fsub fast float %24, %601
-  br label %608
-
-; <label>:606                                     ; preds = %595
-  %607 = fadd fast float %601, %20
-  br label %608
-
-; <label>:608                                     ; preds = %606, %604, %593, %591, %589
-  %609 = phi float [ %590, %589 ], [ %592, %591 ], [ %605, %604 ], [ %607, %606 ], [ %336, %593 ]
-  %610 = fptoui float %609 to i32
-  %611 = uitofp i32 %610 to float
-  %612 = uitofp i32 %577 to float
-  %613 = fptoui float %45 to i32
-  %614 = fptoui float %182 to i32
-  %615 = fptoui float %611 to i32
-  %616 = fptoui float %612 to i32
-  %617 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %618 = extractvalue %dx.types.CBufRet.i32 %617, 0
-  %619 = extractvalue %dx.types.CBufRet.i32 %617, 1
-  %620 = extractvalue %dx.types.CBufRet.i32 %617, 2
-  %621 = extractvalue %dx.types.CBufRet.i32 %617, 3
-  %622 = mul i32 %618, %613
-  %623 = call i32 @dx.op.tertiary.i32(i32 48, i32 %614, i32 %619, i32 %622)  ; IMad(a,b,c)
-  %624 = call i32 @dx.op.tertiary.i32(i32 48, i32 %615, i32 %620, i32 %623)  ; IMad(a,b,c)
-  %625 = call i32 @dx.op.tertiary.i32(i32 48, i32 %616, i32 %621, i32 %624)  ; IMad(a,b,c)
-  %626 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %625, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %627 = extractvalue %dx.types.ResRet.i32 %626, 0
-  %628 = sitofp i32 %627 to float
-  br label %629
-
-; <label>:629                                     ; preds = %608, %542, %513, %496, %486
-  %630 = phi float [ %510, %496 ], [ 0.000000e+00, %486 ], [ %541, %513 ], [ %628, %608 ], [ 0.000000e+00, %542 ]
-  br i1 %340, label %631, label %656
-
-; <label>:631                                     ; preds = %629
-  %632 = fcmp fast oge float %335, 0.000000e+00
-  %633 = fptoui float %335 to i32
-  %634 = icmp ult i32 %633, %13
-  %635 = and i1 %632, %634
-  %636 = fcmp fast oge float %338, 0.000000e+00
-  %637 = and i1 %636, %635
-  %638 = fptoui float %338 to i32
-  %639 = icmp ult i32 %638, %15
-  %640 = and i1 %639, %637
-  br i1 %640, label %641, label %774
-
-; <label>:641                                     ; preds = %631
-  %642 = fptoui float %45 to i32
-  %643 = fptoui float %182 to i32
-  %644 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %645 = extractvalue %dx.types.CBufRet.i32 %644, 0
-  %646 = extractvalue %dx.types.CBufRet.i32 %644, 1
-  %647 = extractvalue %dx.types.CBufRet.i32 %644, 2
-  %648 = extractvalue %dx.types.CBufRet.i32 %644, 3
-  %649 = mul i32 %645, %642
-  %650 = call i32 @dx.op.tertiary.i32(i32 48, i32 %643, i32 %646, i32 %649)  ; IMad(a,b,c)
-  %651 = call i32 @dx.op.tertiary.i32(i32 48, i32 %638, i32 %647, i32 %650)  ; IMad(a,b,c)
-  %652 = call i32 @dx.op.tertiary.i32(i32 48, i32 %633, i32 %648, i32 %651)  ; IMad(a,b,c)
-  %653 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %652, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %654 = extractvalue %dx.types.ResRet.i32 %653, 0
-  %655 = sitofp i32 %654 to float
-  br label %774
-
-; <label>:656                                     ; preds = %629
-  %657 = icmp eq i32 %339, 1
-  br i1 %657, label %658, label %687
-
-; <label>:658                                     ; preds = %656
-  %659 = add i32 %13, -1
-  %660 = uitofp i32 %659 to float
-  %661 = call float @dx.op.binary.f32(i32 35, float %335, float 0.000000e+00)  ; FMax(a,b)
-  %662 = call float @dx.op.binary.f32(i32 36, float %661, float %660)  ; FMin(a,b)
-  %663 = fptoui float %662 to i32
-  %664 = add i32 %15, -1
-  %665 = uitofp i32 %664 to float
-  %666 = call float @dx.op.binary.f32(i32 35, float %338, float 0.000000e+00)  ; FMax(a,b)
-  %667 = call float @dx.op.binary.f32(i32 36, float %666, float %665)  ; FMin(a,b)
-  %668 = fptoui float %667 to i32
-  %669 = uitofp i32 %668 to float
-  %670 = uitofp i32 %663 to float
-  %671 = fptoui float %45 to i32
-  %672 = fptoui float %182 to i32
-  %673 = fptoui float %669 to i32
-  %674 = fptoui float %670 to i32
-  %675 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %676 = extractvalue %dx.types.CBufRet.i32 %675, 0
-  %677 = extractvalue %dx.types.CBufRet.i32 %675, 1
-  %678 = extractvalue %dx.types.CBufRet.i32 %675, 2
-  %679 = extractvalue %dx.types.CBufRet.i32 %675, 3
-  %680 = mul i32 %676, %671
-  %681 = call i32 @dx.op.tertiary.i32(i32 48, i32 %672, i32 %677, i32 %680)  ; IMad(a,b,c)
-  %682 = call i32 @dx.op.tertiary.i32(i32 48, i32 %673, i32 %678, i32 %681)  ; IMad(a,b,c)
-  %683 = call i32 @dx.op.tertiary.i32(i32 48, i32 %674, i32 %679, i32 %682)  ; IMad(a,b,c)
-  %684 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %683, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %685 = extractvalue %dx.types.ResRet.i32 %684, 0
-  %686 = sitofp i32 %685 to float
-  br label %774
-
-; <label>:687                                     ; preds = %656
-  %688 = icmp eq i32 %339, 2
-  br i1 %688, label %689, label %774
-
-; <label>:689                                     ; preds = %687
-  %690 = fsub fast float %22, %20
-  %691 = fcmp fast olt float %335, %20
-  br i1 %691, label %692, label %705
-
-; <label>:692                                     ; preds = %689
-  %693 = fsub fast float %20, %335
-  %694 = fdiv fast float %693, %690
-  %695 = fptoui float %694 to i32
-  %696 = uitofp i32 %695 to float
-  %697 = fmul fast float %696, %690
-  %698 = fsub fast float %693, %697
-  %699 = and i32 %695, 1
-  %700 = icmp eq i32 %699, 0
-  br i1 %700, label %701, label %703
-
-; <label>:701                                     ; preds = %692
-  %702 = fadd fast float %698, %20
-  br label %720
-
-; <label>:703                                     ; preds = %692
-  %704 = fsub fast float %22, %698
-  br label %720
-
-; <label>:705                                     ; preds = %689
-  %706 = fcmp fast ogt float %335, %22
-  br i1 %706, label %707, label %720
-
-; <label>:707                                     ; preds = %705
-  %708 = fsub fast float %335, %22
-  %709 = fdiv fast float %708, %690
-  %710 = fptoui float %709 to i32
-  %711 = uitofp i32 %710 to float
-  %712 = fmul fast float %711, %690
-  %713 = fsub fast float %708, %712
-  %714 = and i32 %710, 1
-  %715 = icmp eq i32 %714, 0
-  br i1 %715, label %716, label %718
-
-; <label>:716                                     ; preds = %707
-  %717 = fsub fast float %22, %713
-  br label %720
-
-; <label>:718                                     ; preds = %707
-  %719 = fadd fast float %713, %20
-  br label %720
-
-; <label>:720                                     ; preds = %718, %716, %705, %703, %701
-  %721 = phi float [ %702, %701 ], [ %704, %703 ], [ %717, %716 ], [ %719, %718 ], [ %335, %705 ]
-  %722 = fptoui float %721 to i32
-  %723 = fsub fast float %24, %20
-  %724 = fcmp fast olt float %338, %20
-  br i1 %724, label %725, label %738
-
-; <label>:725                                     ; preds = %720
-  %726 = fsub fast float %20, %338
-  %727 = fdiv fast float %726, %723
-  %728 = fptoui float %727 to i32
-  %729 = uitofp i32 %728 to float
-  %730 = fmul fast float %729, %723
-  %731 = fsub fast float %726, %730
-  %732 = and i32 %728, 1
-  %733 = icmp eq i32 %732, 0
-  br i1 %733, label %734, label %736
-
-; <label>:734                                     ; preds = %725
-  %735 = fadd fast float %731, %20
-  br label %753
-
-; <label>:736                                     ; preds = %725
-  %737 = fsub fast float %24, %731
-  br label %753
-
-; <label>:738                                     ; preds = %720
-  %739 = fcmp fast ogt float %338, %24
-  br i1 %739, label %740, label %753
-
-; <label>:740                                     ; preds = %738
-  %741 = fsub fast float %338, %24
-  %742 = fdiv fast float %741, %723
-  %743 = fptoui float %742 to i32
-  %744 = uitofp i32 %743 to float
-  %745 = fmul fast float %744, %723
-  %746 = fsub fast float %741, %745
-  %747 = and i32 %743, 1
-  %748 = icmp eq i32 %747, 0
-  br i1 %748, label %749, label %751
-
-; <label>:749                                     ; preds = %740
-  %750 = fsub fast float %24, %746
-  br label %753
-
-; <label>:751                                     ; preds = %740
-  %752 = fadd fast float %746, %20
-  br label %753
-
-; <label>:753                                     ; preds = %751, %749, %738, %736, %734
-  %754 = phi float [ %735, %734 ], [ %737, %736 ], [ %750, %749 ], [ %752, %751 ], [ %338, %738 ]
-  %755 = fptoui float %754 to i32
-  %756 = uitofp i32 %755 to float
-  %757 = uitofp i32 %722 to float
-  %758 = fptoui float %45 to i32
-  %759 = fptoui float %182 to i32
-  %760 = fptoui float %756 to i32
-  %761 = fptoui float %757 to i32
-  %762 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %763 = extractvalue %dx.types.CBufRet.i32 %762, 0
-  %764 = extractvalue %dx.types.CBufRet.i32 %762, 1
-  %765 = extractvalue %dx.types.CBufRet.i32 %762, 2
-  %766 = extractvalue %dx.types.CBufRet.i32 %762, 3
-  %767 = mul i32 %763, %758
-  %768 = call i32 @dx.op.tertiary.i32(i32 48, i32 %759, i32 %764, i32 %767)  ; IMad(a,b,c)
-  %769 = call i32 @dx.op.tertiary.i32(i32 48, i32 %760, i32 %765, i32 %768)  ; IMad(a,b,c)
-  %770 = call i32 @dx.op.tertiary.i32(i32 48, i32 %761, i32 %766, i32 %769)  ; IMad(a,b,c)
-  %771 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %770, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %772 = extractvalue %dx.types.ResRet.i32 %771, 0
-  %773 = sitofp i32 %772 to float
-  br label %774
-
-; <label>:774                                     ; preds = %753, %687, %658, %641, %631
-  %775 = phi float [ %655, %641 ], [ 0.000000e+00, %631 ], [ %686, %658 ], [ %773, %753 ], [ 0.000000e+00, %687 ]
-  br i1 %340, label %776, label %801
-
-; <label>:776                                     ; preds = %774
-  %777 = fcmp fast oge float %337, 0.000000e+00
-  %778 = fptoui float %337 to i32
-  %779 = icmp ult i32 %778, %13
-  %780 = and i1 %777, %779
-  %781 = fcmp fast oge float %338, 0.000000e+00
-  %782 = and i1 %781, %780
-  %783 = fptoui float %338 to i32
-  %784 = icmp ult i32 %783, %15
-  %785 = and i1 %784, %782
-  br i1 %785, label %786, label %919
-
-; <label>:786                                     ; preds = %776
-  %787 = fptoui float %45 to i32
-  %788 = fptoui float %182 to i32
-  %789 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %790 = extractvalue %dx.types.CBufRet.i32 %789, 0
-  %791 = extractvalue %dx.types.CBufRet.i32 %789, 1
-  %792 = extractvalue %dx.types.CBufRet.i32 %789, 2
-  %793 = extractvalue %dx.types.CBufRet.i32 %789, 3
-  %794 = mul i32 %790, %787
-  %795 = call i32 @dx.op.tertiary.i32(i32 48, i32 %788, i32 %791, i32 %794)  ; IMad(a,b,c)
-  %796 = call i32 @dx.op.tertiary.i32(i32 48, i32 %783, i32 %792, i32 %795)  ; IMad(a,b,c)
-  %797 = call i32 @dx.op.tertiary.i32(i32 48, i32 %778, i32 %793, i32 %796)  ; IMad(a,b,c)
-  %798 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %797, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %799 = extractvalue %dx.types.ResRet.i32 %798, 0
-  %800 = sitofp i32 %799 to float
-  br label %919
-
-; <label>:801                                     ; preds = %774
-  %802 = icmp eq i32 %339, 1
-  br i1 %802, label %803, label %832
-
-; <label>:803                                     ; preds = %801
-  %804 = add i32 %13, -1
-  %805 = uitofp i32 %804 to float
-  %806 = call float @dx.op.binary.f32(i32 35, float %337, float 0.000000e+00)  ; FMax(a,b)
-  %807 = call float @dx.op.binary.f32(i32 36, float %806, float %805)  ; FMin(a,b)
-  %808 = fptoui float %807 to i32
-  %809 = add i32 %15, -1
-  %810 = uitofp i32 %809 to float
-  %811 = call float @dx.op.binary.f32(i32 35, float %338, float 0.000000e+00)  ; FMax(a,b)
-  %812 = call float @dx.op.binary.f32(i32 36, float %811, float %810)  ; FMin(a,b)
-  %813 = fptoui float %812 to i32
-  %814 = uitofp i32 %813 to float
-  %815 = uitofp i32 %808 to float
-  %816 = fptoui float %45 to i32
-  %817 = fptoui float %182 to i32
-  %818 = fptoui float %814 to i32
-  %819 = fptoui float %815 to i32
-  %820 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %821 = extractvalue %dx.types.CBufRet.i32 %820, 0
-  %822 = extractvalue %dx.types.CBufRet.i32 %820, 1
-  %823 = extractvalue %dx.types.CBufRet.i32 %820, 2
-  %824 = extractvalue %dx.types.CBufRet.i32 %820, 3
-  %825 = mul i32 %821, %816
-  %826 = call i32 @dx.op.tertiary.i32(i32 48, i32 %817, i32 %822, i32 %825)  ; IMad(a,b,c)
-  %827 = call i32 @dx.op.tertiary.i32(i32 48, i32 %818, i32 %823, i32 %826)  ; IMad(a,b,c)
-  %828 = call i32 @dx.op.tertiary.i32(i32 48, i32 %819, i32 %824, i32 %827)  ; IMad(a,b,c)
-  %829 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %828, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %830 = extractvalue %dx.types.ResRet.i32 %829, 0
-  %831 = sitofp i32 %830 to float
-  br label %919
-
-; <label>:832                                     ; preds = %801
-  %833 = icmp eq i32 %339, 2
-  br i1 %833, label %834, label %919
-
-; <label>:834                                     ; preds = %832
-  %835 = fsub fast float %22, %20
-  %836 = fcmp fast olt float %337, %20
-  br i1 %836, label %837, label %850
-
-; <label>:837                                     ; preds = %834
-  %838 = fsub fast float %20, %337
-  %839 = fdiv fast float %838, %835
-  %840 = fptoui float %839 to i32
-  %841 = uitofp i32 %840 to float
-  %842 = fmul fast float %841, %835
-  %843 = fsub fast float %838, %842
-  %844 = and i32 %840, 1
-  %845 = icmp eq i32 %844, 0
-  br i1 %845, label %846, label %848
-
-; <label>:846                                     ; preds = %837
-  %847 = fadd fast float %843, %20
-  br label %865
-
-; <label>:848                                     ; preds = %837
-  %849 = fsub fast float %22, %843
-  br label %865
-
-; <label>:850                                     ; preds = %834
-  %851 = fcmp fast ogt float %337, %22
-  br i1 %851, label %852, label %865
-
-; <label>:852                                     ; preds = %850
-  %853 = fsub fast float %337, %22
-  %854 = fdiv fast float %853, %835
-  %855 = fptoui float %854 to i32
-  %856 = uitofp i32 %855 to float
-  %857 = fmul fast float %856, %835
-  %858 = fsub fast float %853, %857
-  %859 = and i32 %855, 1
-  %860 = icmp eq i32 %859, 0
-  br i1 %860, label %861, label %863
-
-; <label>:861                                     ; preds = %852
-  %862 = fsub fast float %22, %858
-  br label %865
-
-; <label>:863                                     ; preds = %852
-  %864 = fadd fast float %858, %20
-  br label %865
-
-; <label>:865                                     ; preds = %863, %861, %850, %848, %846
-  %866 = phi float [ %847, %846 ], [ %849, %848 ], [ %862, %861 ], [ %864, %863 ], [ %337, %850 ]
-  %867 = fptoui float %866 to i32
-  %868 = fsub fast float %24, %20
-  %869 = fcmp fast olt float %338, %20
-  br i1 %869, label %870, label %883
-
-; <label>:870                                     ; preds = %865
-  %871 = fsub fast float %20, %338
-  %872 = fdiv fast float %871, %868
-  %873 = fptoui float %872 to i32
-  %874 = uitofp i32 %873 to float
-  %875 = fmul fast float %874, %868
-  %876 = fsub fast float %871, %875
-  %877 = and i32 %873, 1
-  %878 = icmp eq i32 %877, 0
-  br i1 %878, label %879, label %881
-
-; <label>:879                                     ; preds = %870
-  %880 = fadd fast float %876, %20
-  br label %898
-
-; <label>:881                                     ; preds = %870
-  %882 = fsub fast float %24, %876
-  br label %898
-
-; <label>:883                                     ; preds = %865
-  %884 = fcmp fast ogt float %338, %24
-  br i1 %884, label %885, label %898
-
-; <label>:885                                     ; preds = %883
-  %886 = fsub fast float %338, %24
-  %887 = fdiv fast float %886, %868
-  %888 = fptoui float %887 to i32
-  %889 = uitofp i32 %888 to float
-  %890 = fmul fast float %889, %868
-  %891 = fsub fast float %886, %890
-  %892 = and i32 %888, 1
-  %893 = icmp eq i32 %892, 0
-  br i1 %893, label %894, label %896
-
-; <label>:894                                     ; preds = %885
-  %895 = fsub fast float %24, %891
-  br label %898
-
-; <label>:896                                     ; preds = %885
-  %897 = fadd fast float %891, %20
-  br label %898
-
-; <label>:898                                     ; preds = %896, %894, %883, %881, %879
-  %899 = phi float [ %880, %879 ], [ %882, %881 ], [ %895, %894 ], [ %897, %896 ], [ %338, %883 ]
-  %900 = fptoui float %899 to i32
-  %901 = uitofp i32 %900 to float
-  %902 = uitofp i32 %867 to float
-  %903 = fptoui float %45 to i32
-  %904 = fptoui float %182 to i32
-  %905 = fptoui float %901 to i32
-  %906 = fptoui float %902 to i32
-  %907 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %908 = extractvalue %dx.types.CBufRet.i32 %907, 0
-  %909 = extractvalue %dx.types.CBufRet.i32 %907, 1
-  %910 = extractvalue %dx.types.CBufRet.i32 %907, 2
-  %911 = extractvalue %dx.types.CBufRet.i32 %907, 3
-  %912 = mul i32 %908, %903
-  %913 = call i32 @dx.op.tertiary.i32(i32 48, i32 %904, i32 %909, i32 %912)  ; IMad(a,b,c)
-  %914 = call i32 @dx.op.tertiary.i32(i32 48, i32 %905, i32 %910, i32 %913)  ; IMad(a,b,c)
-  %915 = call i32 @dx.op.tertiary.i32(i32 48, i32 %906, i32 %911, i32 %914)  ; IMad(a,b,c)
-  %916 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %915, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %917 = extractvalue %dx.types.ResRet.i32 %916, 0
-  %918 = sitofp i32 %917 to float
-  br label %919
-
-; <label>:919                                     ; preds = %898, %832, %803, %786, %776
-  %920 = phi float [ %800, %786 ], [ 0.000000e+00, %776 ], [ %831, %803 ], [ %918, %898 ], [ 0.000000e+00, %832 ]
-  %921 = call float @dx.op.unary.f32(i32 22, float %180)  ; Frc(value)
-  %922 = fsub fast float %630, %485
-  %923 = fmul fast float %921, %922
-  %924 = fadd fast float %923, %485
-  %925 = fsub fast float %920, %775
-  %926 = fmul fast float %921, %925
-  %927 = fadd fast float %926, %775
-  %928 = call float @dx.op.unary.f32(i32 22, float %181)  ; Frc(value)
-  %929 = fsub fast float %927, %924
-  %930 = fmul fast float %929, %928
-  %931 = fadd fast float %930, %924
-  %932 = fptosi float %931 to i32
-  call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i32 %932, i32 undef, i32 undef, i32 undef, i8 1, i32 4)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3323
-
-; <label>:933                                     ; preds = %332
-  %934 = icmp eq i32 %91, 2
-  br i1 %934, label %935, label %3323
-
-; <label>:935                                     ; preds = %933
-  %936 = call float @dx.op.unary.f32(i32 27, float %180)  ; Round_ni(value)
-  %937 = fadd fast float %936, -1.000000e+00
-  %938 = call float @dx.op.unary.f32(i32 27, float %181)  ; Round_ni(value)
-  %939 = fadd fast float %938, -1.000000e+00
-  %940 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %941 = icmp eq i32 %940, 0
-  br i1 %941, label %942, label %967
-
-; <label>:942                                     ; preds = %935
-  %943 = fcmp fast oge float %937, 0.000000e+00
-  %944 = fptoui float %937 to i32
-  %945 = icmp ult i32 %944, %13
-  %946 = and i1 %943, %945
-  %947 = fcmp fast oge float %939, 0.000000e+00
-  %948 = and i1 %947, %946
-  %949 = fptoui float %939 to i32
-  %950 = icmp ult i32 %949, %15
-  %951 = and i1 %950, %948
-  br i1 %951, label %952, label %1085
-
-; <label>:952                                     ; preds = %942
-  %953 = fptoui float %45 to i32
-  %954 = fptoui float %182 to i32
-  %955 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %956 = extractvalue %dx.types.CBufRet.i32 %955, 0
-  %957 = extractvalue %dx.types.CBufRet.i32 %955, 1
-  %958 = extractvalue %dx.types.CBufRet.i32 %955, 2
-  %959 = extractvalue %dx.types.CBufRet.i32 %955, 3
-  %960 = mul i32 %956, %953
-  %961 = call i32 @dx.op.tertiary.i32(i32 48, i32 %954, i32 %957, i32 %960)  ; IMad(a,b,c)
-  %962 = call i32 @dx.op.tertiary.i32(i32 48, i32 %949, i32 %958, i32 %961)  ; IMad(a,b,c)
-  %963 = call i32 @dx.op.tertiary.i32(i32 48, i32 %944, i32 %959, i32 %962)  ; IMad(a,b,c)
-  %964 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %963, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %965 = extractvalue %dx.types.ResRet.i32 %964, 0
-  %966 = sitofp i32 %965 to float
-  br label %1085
-
-; <label>:967                                     ; preds = %935
-  %968 = icmp eq i32 %940, 1
-  br i1 %968, label %969, label %998
-
-; <label>:969                                     ; preds = %967
-  %970 = add i32 %13, -1
-  %971 = uitofp i32 %970 to float
-  %972 = call float @dx.op.binary.f32(i32 35, float %937, float 0.000000e+00)  ; FMax(a,b)
-  %973 = call float @dx.op.binary.f32(i32 36, float %972, float %971)  ; FMin(a,b)
-  %974 = fptoui float %973 to i32
-  %975 = add i32 %15, -1
-  %976 = uitofp i32 %975 to float
-  %977 = call float @dx.op.binary.f32(i32 35, float %939, float 0.000000e+00)  ; FMax(a,b)
-  %978 = call float @dx.op.binary.f32(i32 36, float %977, float %976)  ; FMin(a,b)
-  %979 = fptoui float %978 to i32
-  %980 = uitofp i32 %979 to float
-  %981 = uitofp i32 %974 to float
-  %982 = fptoui float %45 to i32
-  %983 = fptoui float %182 to i32
-  %984 = fptoui float %980 to i32
-  %985 = fptoui float %981 to i32
-  %986 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %987 = extractvalue %dx.types.CBufRet.i32 %986, 0
-  %988 = extractvalue %dx.types.CBufRet.i32 %986, 1
-  %989 = extractvalue %dx.types.CBufRet.i32 %986, 2
-  %990 = extractvalue %dx.types.CBufRet.i32 %986, 3
-  %991 = mul i32 %987, %982
-  %992 = call i32 @dx.op.tertiary.i32(i32 48, i32 %983, i32 %988, i32 %991)  ; IMad(a,b,c)
-  %993 = call i32 @dx.op.tertiary.i32(i32 48, i32 %984, i32 %989, i32 %992)  ; IMad(a,b,c)
-  %994 = call i32 @dx.op.tertiary.i32(i32 48, i32 %985, i32 %990, i32 %993)  ; IMad(a,b,c)
-  %995 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %994, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %996 = extractvalue %dx.types.ResRet.i32 %995, 0
-  %997 = sitofp i32 %996 to float
-  br label %1085
-
-; <label>:998                                     ; preds = %967
-  %999 = icmp eq i32 %940, 2
-  br i1 %999, label %1000, label %1085
-
-; <label>:1000                                    ; preds = %998
-  %1001 = fsub fast float %22, %20
-  %1002 = fcmp fast olt float %937, %20
-  br i1 %1002, label %1003, label %1016
-
-; <label>:1003                                    ; preds = %1000
-  %1004 = fsub fast float %20, %937
-  %1005 = fdiv fast float %1004, %1001
-  %1006 = fptoui float %1005 to i32
-  %1007 = uitofp i32 %1006 to float
-  %1008 = fmul fast float %1007, %1001
-  %1009 = fsub fast float %1004, %1008
-  %1010 = and i32 %1006, 1
-  %1011 = icmp eq i32 %1010, 0
-  br i1 %1011, label %1012, label %1014
-
-; <label>:1012                                    ; preds = %1003
-  %1013 = fadd fast float %1009, %20
-  br label %1031
-
-; <label>:1014                                    ; preds = %1003
-  %1015 = fsub fast float %22, %1009
-  br label %1031
-
-; <label>:1016                                    ; preds = %1000
-  %1017 = fcmp fast ogt float %937, %22
-  br i1 %1017, label %1018, label %1031
-
-; <label>:1018                                    ; preds = %1016
-  %1019 = fsub fast float %937, %22
-  %1020 = fdiv fast float %1019, %1001
-  %1021 = fptoui float %1020 to i32
-  %1022 = uitofp i32 %1021 to float
-  %1023 = fmul fast float %1022, %1001
-  %1024 = fsub fast float %1019, %1023
-  %1025 = and i32 %1021, 1
-  %1026 = icmp eq i32 %1025, 0
-  br i1 %1026, label %1027, label %1029
-
-; <label>:1027                                    ; preds = %1018
-  %1028 = fsub fast float %22, %1024
-  br label %1031
-
-; <label>:1029                                    ; preds = %1018
-  %1030 = fadd fast float %1024, %20
-  br label %1031
-
-; <label>:1031                                    ; preds = %1029, %1027, %1016, %1014, %1012
-  %1032 = phi float [ %1013, %1012 ], [ %1015, %1014 ], [ %1028, %1027 ], [ %1030, %1029 ], [ %937, %1016 ]
-  %1033 = fptoui float %1032 to i32
-  %1034 = fsub fast float %24, %20
-  %1035 = fcmp fast olt float %939, %20
-  br i1 %1035, label %1036, label %1049
-
-; <label>:1036                                    ; preds = %1031
-  %1037 = fsub fast float %20, %939
-  %1038 = fdiv fast float %1037, %1034
-  %1039 = fptoui float %1038 to i32
-  %1040 = uitofp i32 %1039 to float
-  %1041 = fmul fast float %1040, %1034
-  %1042 = fsub fast float %1037, %1041
-  %1043 = and i32 %1039, 1
-  %1044 = icmp eq i32 %1043, 0
-  br i1 %1044, label %1045, label %1047
-
-; <label>:1045                                    ; preds = %1036
-  %1046 = fadd fast float %1042, %20
-  br label %1064
-
-; <label>:1047                                    ; preds = %1036
-  %1048 = fsub fast float %24, %1042
-  br label %1064
-
-; <label>:1049                                    ; preds = %1031
-  %1050 = fcmp fast ogt float %939, %24
-  br i1 %1050, label %1051, label %1064
-
-; <label>:1051                                    ; preds = %1049
-  %1052 = fsub fast float %939, %24
-  %1053 = fdiv fast float %1052, %1034
-  %1054 = fptoui float %1053 to i32
-  %1055 = uitofp i32 %1054 to float
-  %1056 = fmul fast float %1055, %1034
-  %1057 = fsub fast float %1052, %1056
-  %1058 = and i32 %1054, 1
-  %1059 = icmp eq i32 %1058, 0
-  br i1 %1059, label %1060, label %1062
-
-; <label>:1060                                    ; preds = %1051
-  %1061 = fsub fast float %24, %1057
-  br label %1064
-
-; <label>:1062                                    ; preds = %1051
-  %1063 = fadd fast float %1057, %20
-  br label %1064
-
-; <label>:1064                                    ; preds = %1062, %1060, %1049, %1047, %1045
-  %1065 = phi float [ %1046, %1045 ], [ %1048, %1047 ], [ %1061, %1060 ], [ %1063, %1062 ], [ %939, %1049 ]
-  %1066 = fptoui float %1065 to i32
-  %1067 = uitofp i32 %1066 to float
-  %1068 = uitofp i32 %1033 to float
-  %1069 = fptoui float %45 to i32
-  %1070 = fptoui float %182 to i32
-  %1071 = fptoui float %1067 to i32
-  %1072 = fptoui float %1068 to i32
-  %1073 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1074 = extractvalue %dx.types.CBufRet.i32 %1073, 0
-  %1075 = extractvalue %dx.types.CBufRet.i32 %1073, 1
-  %1076 = extractvalue %dx.types.CBufRet.i32 %1073, 2
-  %1077 = extractvalue %dx.types.CBufRet.i32 %1073, 3
-  %1078 = mul i32 %1074, %1069
-  %1079 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1070, i32 %1075, i32 %1078)  ; IMad(a,b,c)
-  %1080 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1071, i32 %1076, i32 %1079)  ; IMad(a,b,c)
-  %1081 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1072, i32 %1077, i32 %1080)  ; IMad(a,b,c)
-  %1082 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1081, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1083 = extractvalue %dx.types.ResRet.i32 %1082, 0
-  %1084 = sitofp i32 %1083 to float
-  br label %1085
-
-; <label>:1085                                    ; preds = %1064, %998, %969, %952, %942
-  %1086 = phi float [ %966, %952 ], [ 0.000000e+00, %942 ], [ %997, %969 ], [ %1084, %1064 ], [ 0.000000e+00, %998 ]
-  br i1 %941, label %1087, label %1112
-
-; <label>:1087                                    ; preds = %1085
-  %1088 = fcmp fast oge float %936, 0.000000e+00
-  %1089 = fptoui float %936 to i32
-  %1090 = icmp ult i32 %1089, %13
-  %1091 = and i1 %1088, %1090
-  %1092 = fcmp fast oge float %939, 0.000000e+00
-  %1093 = and i1 %1092, %1091
-  %1094 = fptoui float %939 to i32
-  %1095 = icmp ult i32 %1094, %15
-  %1096 = and i1 %1095, %1093
-  br i1 %1096, label %1097, label %1230
-
-; <label>:1097                                    ; preds = %1087
-  %1098 = fptoui float %45 to i32
-  %1099 = fptoui float %182 to i32
-  %1100 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1101 = extractvalue %dx.types.CBufRet.i32 %1100, 0
-  %1102 = extractvalue %dx.types.CBufRet.i32 %1100, 1
-  %1103 = extractvalue %dx.types.CBufRet.i32 %1100, 2
-  %1104 = extractvalue %dx.types.CBufRet.i32 %1100, 3
-  %1105 = mul i32 %1101, %1098
-  %1106 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1099, i32 %1102, i32 %1105)  ; IMad(a,b,c)
-  %1107 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1094, i32 %1103, i32 %1106)  ; IMad(a,b,c)
-  %1108 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1089, i32 %1104, i32 %1107)  ; IMad(a,b,c)
-  %1109 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1108, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1110 = extractvalue %dx.types.ResRet.i32 %1109, 0
-  %1111 = sitofp i32 %1110 to float
-  br label %1230
-
-; <label>:1112                                    ; preds = %1085
-  %1113 = icmp eq i32 %940, 1
-  br i1 %1113, label %1114, label %1143
-
-; <label>:1114                                    ; preds = %1112
-  %1115 = add i32 %13, -1
-  %1116 = uitofp i32 %1115 to float
-  %1117 = call float @dx.op.binary.f32(i32 35, float %936, float 0.000000e+00)  ; FMax(a,b)
-  %1118 = call float @dx.op.binary.f32(i32 36, float %1117, float %1116)  ; FMin(a,b)
-  %1119 = fptoui float %1118 to i32
-  %1120 = add i32 %15, -1
-  %1121 = uitofp i32 %1120 to float
-  %1122 = call float @dx.op.binary.f32(i32 35, float %939, float 0.000000e+00)  ; FMax(a,b)
-  %1123 = call float @dx.op.binary.f32(i32 36, float %1122, float %1121)  ; FMin(a,b)
-  %1124 = fptoui float %1123 to i32
-  %1125 = uitofp i32 %1124 to float
-  %1126 = uitofp i32 %1119 to float
-  %1127 = fptoui float %45 to i32
-  %1128 = fptoui float %182 to i32
-  %1129 = fptoui float %1125 to i32
-  %1130 = fptoui float %1126 to i32
-  %1131 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1132 = extractvalue %dx.types.CBufRet.i32 %1131, 0
-  %1133 = extractvalue %dx.types.CBufRet.i32 %1131, 1
-  %1134 = extractvalue %dx.types.CBufRet.i32 %1131, 2
-  %1135 = extractvalue %dx.types.CBufRet.i32 %1131, 3
-  %1136 = mul i32 %1132, %1127
-  %1137 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1128, i32 %1133, i32 %1136)  ; IMad(a,b,c)
-  %1138 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1129, i32 %1134, i32 %1137)  ; IMad(a,b,c)
-  %1139 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1130, i32 %1135, i32 %1138)  ; IMad(a,b,c)
-  %1140 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1139, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1141 = extractvalue %dx.types.ResRet.i32 %1140, 0
-  %1142 = sitofp i32 %1141 to float
-  br label %1230
-
-; <label>:1143                                    ; preds = %1112
-  %1144 = icmp eq i32 %940, 2
-  br i1 %1144, label %1145, label %1230
-
-; <label>:1145                                    ; preds = %1143
-  %1146 = fsub fast float %22, %20
-  %1147 = fcmp fast olt float %936, %20
-  br i1 %1147, label %1148, label %1161
-
-; <label>:1148                                    ; preds = %1145
-  %1149 = fsub fast float %20, %936
-  %1150 = fdiv fast float %1149, %1146
-  %1151 = fptoui float %1150 to i32
-  %1152 = uitofp i32 %1151 to float
-  %1153 = fmul fast float %1152, %1146
-  %1154 = fsub fast float %1149, %1153
-  %1155 = and i32 %1151, 1
-  %1156 = icmp eq i32 %1155, 0
-  br i1 %1156, label %1157, label %1159
-
-; <label>:1157                                    ; preds = %1148
-  %1158 = fadd fast float %1154, %20
-  br label %1176
-
-; <label>:1159                                    ; preds = %1148
-  %1160 = fsub fast float %22, %1154
-  br label %1176
-
-; <label>:1161                                    ; preds = %1145
-  %1162 = fcmp fast ogt float %936, %22
-  br i1 %1162, label %1163, label %1176
-
-; <label>:1163                                    ; preds = %1161
-  %1164 = fsub fast float %936, %22
-  %1165 = fdiv fast float %1164, %1146
-  %1166 = fptoui float %1165 to i32
-  %1167 = uitofp i32 %1166 to float
-  %1168 = fmul fast float %1167, %1146
-  %1169 = fsub fast float %1164, %1168
-  %1170 = and i32 %1166, 1
-  %1171 = icmp eq i32 %1170, 0
-  br i1 %1171, label %1172, label %1174
-
-; <label>:1172                                    ; preds = %1163
-  %1173 = fsub fast float %22, %1169
-  br label %1176
-
-; <label>:1174                                    ; preds = %1163
-  %1175 = fadd fast float %1169, %20
-  br label %1176
-
-; <label>:1176                                    ; preds = %1174, %1172, %1161, %1159, %1157
-  %1177 = phi float [ %1158, %1157 ], [ %1160, %1159 ], [ %1173, %1172 ], [ %1175, %1174 ], [ %936, %1161 ]
-  %1178 = fptoui float %1177 to i32
-  %1179 = fsub fast float %24, %20
-  %1180 = fcmp fast olt float %939, %20
-  br i1 %1180, label %1181, label %1194
-
-; <label>:1181                                    ; preds = %1176
-  %1182 = fsub fast float %20, %939
-  %1183 = fdiv fast float %1182, %1179
-  %1184 = fptoui float %1183 to i32
-  %1185 = uitofp i32 %1184 to float
-  %1186 = fmul fast float %1185, %1179
-  %1187 = fsub fast float %1182, %1186
-  %1188 = and i32 %1184, 1
-  %1189 = icmp eq i32 %1188, 0
-  br i1 %1189, label %1190, label %1192
-
-; <label>:1190                                    ; preds = %1181
-  %1191 = fadd fast float %1187, %20
-  br label %1209
-
-; <label>:1192                                    ; preds = %1181
-  %1193 = fsub fast float %24, %1187
-  br label %1209
-
-; <label>:1194                                    ; preds = %1176
-  %1195 = fcmp fast ogt float %939, %24
-  br i1 %1195, label %1196, label %1209
-
-; <label>:1196                                    ; preds = %1194
-  %1197 = fsub fast float %939, %24
-  %1198 = fdiv fast float %1197, %1179
-  %1199 = fptoui float %1198 to i32
-  %1200 = uitofp i32 %1199 to float
-  %1201 = fmul fast float %1200, %1179
-  %1202 = fsub fast float %1197, %1201
-  %1203 = and i32 %1199, 1
-  %1204 = icmp eq i32 %1203, 0
-  br i1 %1204, label %1205, label %1207
-
-; <label>:1205                                    ; preds = %1196
-  %1206 = fsub fast float %24, %1202
-  br label %1209
-
-; <label>:1207                                    ; preds = %1196
-  %1208 = fadd fast float %1202, %20
-  br label %1209
-
-; <label>:1209                                    ; preds = %1207, %1205, %1194, %1192, %1190
-  %1210 = phi float [ %1191, %1190 ], [ %1193, %1192 ], [ %1206, %1205 ], [ %1208, %1207 ], [ %939, %1194 ]
-  %1211 = fptoui float %1210 to i32
-  %1212 = uitofp i32 %1211 to float
-  %1213 = uitofp i32 %1178 to float
-  %1214 = fptoui float %45 to i32
-  %1215 = fptoui float %182 to i32
-  %1216 = fptoui float %1212 to i32
-  %1217 = fptoui float %1213 to i32
-  %1218 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1219 = extractvalue %dx.types.CBufRet.i32 %1218, 0
-  %1220 = extractvalue %dx.types.CBufRet.i32 %1218, 1
-  %1221 = extractvalue %dx.types.CBufRet.i32 %1218, 2
-  %1222 = extractvalue %dx.types.CBufRet.i32 %1218, 3
-  %1223 = mul i32 %1219, %1214
-  %1224 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1215, i32 %1220, i32 %1223)  ; IMad(a,b,c)
-  %1225 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1216, i32 %1221, i32 %1224)  ; IMad(a,b,c)
-  %1226 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1217, i32 %1222, i32 %1225)  ; IMad(a,b,c)
-  %1227 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1226, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1228 = extractvalue %dx.types.ResRet.i32 %1227, 0
-  %1229 = sitofp i32 %1228 to float
-  br label %1230
-
-; <label>:1230                                    ; preds = %1209, %1143, %1114, %1097, %1087
-  %1231 = phi float [ %1111, %1097 ], [ 0.000000e+00, %1087 ], [ %1142, %1114 ], [ %1229, %1209 ], [ 0.000000e+00, %1143 ]
-  %1232 = fadd fast float %936, 1.000000e+00
-  br i1 %941, label %1233, label %1258
-
-; <label>:1233                                    ; preds = %1230
-  %1234 = fcmp fast oge float %1232, 0.000000e+00
-  %1235 = fptoui float %1232 to i32
-  %1236 = icmp ult i32 %1235, %13
-  %1237 = and i1 %1234, %1236
-  %1238 = fcmp fast oge float %939, 0.000000e+00
-  %1239 = and i1 %1238, %1237
-  %1240 = fptoui float %939 to i32
-  %1241 = icmp ult i32 %1240, %15
-  %1242 = and i1 %1241, %1239
-  br i1 %1242, label %1243, label %1376
-
-; <label>:1243                                    ; preds = %1233
-  %1244 = fptoui float %45 to i32
-  %1245 = fptoui float %182 to i32
-  %1246 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1247 = extractvalue %dx.types.CBufRet.i32 %1246, 0
-  %1248 = extractvalue %dx.types.CBufRet.i32 %1246, 1
-  %1249 = extractvalue %dx.types.CBufRet.i32 %1246, 2
-  %1250 = extractvalue %dx.types.CBufRet.i32 %1246, 3
-  %1251 = mul i32 %1247, %1244
-  %1252 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1245, i32 %1248, i32 %1251)  ; IMad(a,b,c)
-  %1253 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1240, i32 %1249, i32 %1252)  ; IMad(a,b,c)
-  %1254 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1235, i32 %1250, i32 %1253)  ; IMad(a,b,c)
-  %1255 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1254, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1256 = extractvalue %dx.types.ResRet.i32 %1255, 0
-  %1257 = sitofp i32 %1256 to float
-  br label %1376
-
-; <label>:1258                                    ; preds = %1230
-  %1259 = icmp eq i32 %940, 1
-  br i1 %1259, label %1260, label %1289
-
-; <label>:1260                                    ; preds = %1258
-  %1261 = add i32 %13, -1
-  %1262 = uitofp i32 %1261 to float
-  %1263 = call float @dx.op.binary.f32(i32 35, float %1232, float 0.000000e+00)  ; FMax(a,b)
-  %1264 = call float @dx.op.binary.f32(i32 36, float %1263, float %1262)  ; FMin(a,b)
-  %1265 = fptoui float %1264 to i32
-  %1266 = add i32 %15, -1
-  %1267 = uitofp i32 %1266 to float
-  %1268 = call float @dx.op.binary.f32(i32 35, float %939, float 0.000000e+00)  ; FMax(a,b)
-  %1269 = call float @dx.op.binary.f32(i32 36, float %1268, float %1267)  ; FMin(a,b)
-  %1270 = fptoui float %1269 to i32
-  %1271 = uitofp i32 %1270 to float
-  %1272 = uitofp i32 %1265 to float
-  %1273 = fptoui float %45 to i32
-  %1274 = fptoui float %182 to i32
-  %1275 = fptoui float %1271 to i32
-  %1276 = fptoui float %1272 to i32
-  %1277 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1278 = extractvalue %dx.types.CBufRet.i32 %1277, 0
-  %1279 = extractvalue %dx.types.CBufRet.i32 %1277, 1
-  %1280 = extractvalue %dx.types.CBufRet.i32 %1277, 2
-  %1281 = extractvalue %dx.types.CBufRet.i32 %1277, 3
-  %1282 = mul i32 %1278, %1273
-  %1283 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1274, i32 %1279, i32 %1282)  ; IMad(a,b,c)
-  %1284 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1275, i32 %1280, i32 %1283)  ; IMad(a,b,c)
-  %1285 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1276, i32 %1281, i32 %1284)  ; IMad(a,b,c)
-  %1286 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1285, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1287 = extractvalue %dx.types.ResRet.i32 %1286, 0
-  %1288 = sitofp i32 %1287 to float
-  br label %1376
-
-; <label>:1289                                    ; preds = %1258
-  %1290 = icmp eq i32 %940, 2
-  br i1 %1290, label %1291, label %1376
-
-; <label>:1291                                    ; preds = %1289
-  %1292 = fsub fast float %22, %20
-  %1293 = fcmp fast olt float %1232, %20
-  br i1 %1293, label %1294, label %1307
-
-; <label>:1294                                    ; preds = %1291
-  %1295 = fsub fast float %20, %1232
-  %1296 = fdiv fast float %1295, %1292
-  %1297 = fptoui float %1296 to i32
-  %1298 = uitofp i32 %1297 to float
-  %1299 = fmul fast float %1298, %1292
-  %1300 = fsub fast float %1295, %1299
-  %1301 = and i32 %1297, 1
-  %1302 = icmp eq i32 %1301, 0
-  br i1 %1302, label %1303, label %1305
-
-; <label>:1303                                    ; preds = %1294
-  %1304 = fadd fast float %1300, %20
-  br label %1322
-
-; <label>:1305                                    ; preds = %1294
-  %1306 = fsub fast float %22, %1300
-  br label %1322
-
-; <label>:1307                                    ; preds = %1291
-  %1308 = fcmp fast ogt float %1232, %22
-  br i1 %1308, label %1309, label %1322
-
-; <label>:1309                                    ; preds = %1307
-  %1310 = fsub fast float %1232, %22
-  %1311 = fdiv fast float %1310, %1292
-  %1312 = fptoui float %1311 to i32
-  %1313 = uitofp i32 %1312 to float
-  %1314 = fmul fast float %1313, %1292
-  %1315 = fsub fast float %1310, %1314
-  %1316 = and i32 %1312, 1
-  %1317 = icmp eq i32 %1316, 0
-  br i1 %1317, label %1318, label %1320
-
-; <label>:1318                                    ; preds = %1309
-  %1319 = fsub fast float %22, %1315
-  br label %1322
-
-; <label>:1320                                    ; preds = %1309
-  %1321 = fadd fast float %1315, %20
-  br label %1322
-
-; <label>:1322                                    ; preds = %1320, %1318, %1307, %1305, %1303
-  %1323 = phi float [ %1304, %1303 ], [ %1306, %1305 ], [ %1319, %1318 ], [ %1321, %1320 ], [ %1232, %1307 ]
-  %1324 = fptoui float %1323 to i32
-  %1325 = fsub fast float %24, %20
-  %1326 = fcmp fast olt float %939, %20
-  br i1 %1326, label %1327, label %1340
-
-; <label>:1327                                    ; preds = %1322
-  %1328 = fsub fast float %20, %939
-  %1329 = fdiv fast float %1328, %1325
-  %1330 = fptoui float %1329 to i32
-  %1331 = uitofp i32 %1330 to float
-  %1332 = fmul fast float %1331, %1325
-  %1333 = fsub fast float %1328, %1332
-  %1334 = and i32 %1330, 1
-  %1335 = icmp eq i32 %1334, 0
-  br i1 %1335, label %1336, label %1338
-
-; <label>:1336                                    ; preds = %1327
-  %1337 = fadd fast float %1333, %20
-  br label %1355
-
-; <label>:1338                                    ; preds = %1327
-  %1339 = fsub fast float %24, %1333
-  br label %1355
-
-; <label>:1340                                    ; preds = %1322
-  %1341 = fcmp fast ogt float %939, %24
-  br i1 %1341, label %1342, label %1355
-
-; <label>:1342                                    ; preds = %1340
-  %1343 = fsub fast float %939, %24
-  %1344 = fdiv fast float %1343, %1325
-  %1345 = fptoui float %1344 to i32
-  %1346 = uitofp i32 %1345 to float
-  %1347 = fmul fast float %1346, %1325
-  %1348 = fsub fast float %1343, %1347
-  %1349 = and i32 %1345, 1
-  %1350 = icmp eq i32 %1349, 0
-  br i1 %1350, label %1351, label %1353
-
-; <label>:1351                                    ; preds = %1342
-  %1352 = fsub fast float %24, %1348
-  br label %1355
-
-; <label>:1353                                    ; preds = %1342
-  %1354 = fadd fast float %1348, %20
-  br label %1355
-
-; <label>:1355                                    ; preds = %1353, %1351, %1340, %1338, %1336
-  %1356 = phi float [ %1337, %1336 ], [ %1339, %1338 ], [ %1352, %1351 ], [ %1354, %1353 ], [ %939, %1340 ]
-  %1357 = fptoui float %1356 to i32
-  %1358 = uitofp i32 %1357 to float
-  %1359 = uitofp i32 %1324 to float
-  %1360 = fptoui float %45 to i32
-  %1361 = fptoui float %182 to i32
-  %1362 = fptoui float %1358 to i32
-  %1363 = fptoui float %1359 to i32
-  %1364 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1365 = extractvalue %dx.types.CBufRet.i32 %1364, 0
-  %1366 = extractvalue %dx.types.CBufRet.i32 %1364, 1
-  %1367 = extractvalue %dx.types.CBufRet.i32 %1364, 2
-  %1368 = extractvalue %dx.types.CBufRet.i32 %1364, 3
-  %1369 = mul i32 %1365, %1360
-  %1370 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1361, i32 %1366, i32 %1369)  ; IMad(a,b,c)
-  %1371 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1362, i32 %1367, i32 %1370)  ; IMad(a,b,c)
-  %1372 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1363, i32 %1368, i32 %1371)  ; IMad(a,b,c)
-  %1373 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1372, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1374 = extractvalue %dx.types.ResRet.i32 %1373, 0
-  %1375 = sitofp i32 %1374 to float
-  br label %1376
-
-; <label>:1376                                    ; preds = %1355, %1289, %1260, %1243, %1233
-  %1377 = phi float [ %1257, %1243 ], [ 0.000000e+00, %1233 ], [ %1288, %1260 ], [ %1375, %1355 ], [ 0.000000e+00, %1289 ]
-  %1378 = fadd fast float %936, 2.000000e+00
-  br i1 %941, label %1379, label %1404
-
-; <label>:1379                                    ; preds = %1376
-  %1380 = fcmp fast oge float %1378, 0.000000e+00
-  %1381 = fptoui float %1378 to i32
-  %1382 = icmp ult i32 %1381, %13
-  %1383 = and i1 %1380, %1382
-  %1384 = fcmp fast oge float %939, 0.000000e+00
-  %1385 = and i1 %1384, %1383
-  %1386 = fptoui float %939 to i32
-  %1387 = icmp ult i32 %1386, %15
-  %1388 = and i1 %1387, %1385
-  br i1 %1388, label %1389, label %1522
-
-; <label>:1389                                    ; preds = %1379
-  %1390 = fptoui float %45 to i32
-  %1391 = fptoui float %182 to i32
-  %1392 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1393 = extractvalue %dx.types.CBufRet.i32 %1392, 0
-  %1394 = extractvalue %dx.types.CBufRet.i32 %1392, 1
-  %1395 = extractvalue %dx.types.CBufRet.i32 %1392, 2
-  %1396 = extractvalue %dx.types.CBufRet.i32 %1392, 3
-  %1397 = mul i32 %1393, %1390
-  %1398 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1391, i32 %1394, i32 %1397)  ; IMad(a,b,c)
-  %1399 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1386, i32 %1395, i32 %1398)  ; IMad(a,b,c)
-  %1400 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1381, i32 %1396, i32 %1399)  ; IMad(a,b,c)
-  %1401 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1400, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1402 = extractvalue %dx.types.ResRet.i32 %1401, 0
-  %1403 = sitofp i32 %1402 to float
-  br label %1522
-
-; <label>:1404                                    ; preds = %1376
-  %1405 = icmp eq i32 %940, 1
-  br i1 %1405, label %1406, label %1435
-
-; <label>:1406                                    ; preds = %1404
-  %1407 = add i32 %13, -1
-  %1408 = uitofp i32 %1407 to float
-  %1409 = call float @dx.op.binary.f32(i32 35, float %1378, float 0.000000e+00)  ; FMax(a,b)
-  %1410 = call float @dx.op.binary.f32(i32 36, float %1409, float %1408)  ; FMin(a,b)
-  %1411 = fptoui float %1410 to i32
-  %1412 = add i32 %15, -1
-  %1413 = uitofp i32 %1412 to float
-  %1414 = call float @dx.op.binary.f32(i32 35, float %939, float 0.000000e+00)  ; FMax(a,b)
-  %1415 = call float @dx.op.binary.f32(i32 36, float %1414, float %1413)  ; FMin(a,b)
-  %1416 = fptoui float %1415 to i32
-  %1417 = uitofp i32 %1416 to float
-  %1418 = uitofp i32 %1411 to float
-  %1419 = fptoui float %45 to i32
-  %1420 = fptoui float %182 to i32
-  %1421 = fptoui float %1417 to i32
-  %1422 = fptoui float %1418 to i32
-  %1423 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1424 = extractvalue %dx.types.CBufRet.i32 %1423, 0
-  %1425 = extractvalue %dx.types.CBufRet.i32 %1423, 1
-  %1426 = extractvalue %dx.types.CBufRet.i32 %1423, 2
-  %1427 = extractvalue %dx.types.CBufRet.i32 %1423, 3
-  %1428 = mul i32 %1424, %1419
-  %1429 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1420, i32 %1425, i32 %1428)  ; IMad(a,b,c)
-  %1430 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1421, i32 %1426, i32 %1429)  ; IMad(a,b,c)
-  %1431 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1422, i32 %1427, i32 %1430)  ; IMad(a,b,c)
-  %1432 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1431, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1433 = extractvalue %dx.types.ResRet.i32 %1432, 0
-  %1434 = sitofp i32 %1433 to float
-  br label %1522
-
-; <label>:1435                                    ; preds = %1404
-  %1436 = icmp eq i32 %940, 2
-  br i1 %1436, label %1437, label %1522
-
-; <label>:1437                                    ; preds = %1435
-  %1438 = fsub fast float %22, %20
-  %1439 = fcmp fast olt float %1378, %20
-  br i1 %1439, label %1440, label %1453
-
-; <label>:1440                                    ; preds = %1437
-  %1441 = fsub fast float %20, %1378
-  %1442 = fdiv fast float %1441, %1438
-  %1443 = fptoui float %1442 to i32
-  %1444 = uitofp i32 %1443 to float
-  %1445 = fmul fast float %1444, %1438
-  %1446 = fsub fast float %1441, %1445
-  %1447 = and i32 %1443, 1
-  %1448 = icmp eq i32 %1447, 0
-  br i1 %1448, label %1449, label %1451
-
-; <label>:1449                                    ; preds = %1440
-  %1450 = fadd fast float %1446, %20
-  br label %1468
-
-; <label>:1451                                    ; preds = %1440
-  %1452 = fsub fast float %22, %1446
-  br label %1468
-
-; <label>:1453                                    ; preds = %1437
-  %1454 = fcmp fast ogt float %1378, %22
-  br i1 %1454, label %1455, label %1468
-
-; <label>:1455                                    ; preds = %1453
-  %1456 = fsub fast float %1378, %22
-  %1457 = fdiv fast float %1456, %1438
-  %1458 = fptoui float %1457 to i32
-  %1459 = uitofp i32 %1458 to float
-  %1460 = fmul fast float %1459, %1438
-  %1461 = fsub fast float %1456, %1460
-  %1462 = and i32 %1458, 1
-  %1463 = icmp eq i32 %1462, 0
-  br i1 %1463, label %1464, label %1466
-
-; <label>:1464                                    ; preds = %1455
-  %1465 = fsub fast float %22, %1461
-  br label %1468
-
-; <label>:1466                                    ; preds = %1455
-  %1467 = fadd fast float %1461, %20
-  br label %1468
-
-; <label>:1468                                    ; preds = %1466, %1464, %1453, %1451, %1449
-  %1469 = phi float [ %1450, %1449 ], [ %1452, %1451 ], [ %1465, %1464 ], [ %1467, %1466 ], [ %1378, %1453 ]
-  %1470 = fptoui float %1469 to i32
-  %1471 = fsub fast float %24, %20
-  %1472 = fcmp fast olt float %939, %20
-  br i1 %1472, label %1473, label %1486
-
-; <label>:1473                                    ; preds = %1468
-  %1474 = fsub fast float %20, %939
-  %1475 = fdiv fast float %1474, %1471
-  %1476 = fptoui float %1475 to i32
-  %1477 = uitofp i32 %1476 to float
-  %1478 = fmul fast float %1477, %1471
-  %1479 = fsub fast float %1474, %1478
-  %1480 = and i32 %1476, 1
-  %1481 = icmp eq i32 %1480, 0
-  br i1 %1481, label %1482, label %1484
-
-; <label>:1482                                    ; preds = %1473
-  %1483 = fadd fast float %1479, %20
-  br label %1501
-
-; <label>:1484                                    ; preds = %1473
-  %1485 = fsub fast float %24, %1479
-  br label %1501
-
-; <label>:1486                                    ; preds = %1468
-  %1487 = fcmp fast ogt float %939, %24
-  br i1 %1487, label %1488, label %1501
-
-; <label>:1488                                    ; preds = %1486
-  %1489 = fsub fast float %939, %24
-  %1490 = fdiv fast float %1489, %1471
-  %1491 = fptoui float %1490 to i32
-  %1492 = uitofp i32 %1491 to float
-  %1493 = fmul fast float %1492, %1471
-  %1494 = fsub fast float %1489, %1493
-  %1495 = and i32 %1491, 1
-  %1496 = icmp eq i32 %1495, 0
-  br i1 %1496, label %1497, label %1499
-
-; <label>:1497                                    ; preds = %1488
-  %1498 = fsub fast float %24, %1494
-  br label %1501
-
-; <label>:1499                                    ; preds = %1488
-  %1500 = fadd fast float %1494, %20
-  br label %1501
-
-; <label>:1501                                    ; preds = %1499, %1497, %1486, %1484, %1482
-  %1502 = phi float [ %1483, %1482 ], [ %1485, %1484 ], [ %1498, %1497 ], [ %1500, %1499 ], [ %939, %1486 ]
-  %1503 = fptoui float %1502 to i32
-  %1504 = uitofp i32 %1503 to float
-  %1505 = uitofp i32 %1470 to float
-  %1506 = fptoui float %45 to i32
-  %1507 = fptoui float %182 to i32
-  %1508 = fptoui float %1504 to i32
-  %1509 = fptoui float %1505 to i32
-  %1510 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1511 = extractvalue %dx.types.CBufRet.i32 %1510, 0
-  %1512 = extractvalue %dx.types.CBufRet.i32 %1510, 1
-  %1513 = extractvalue %dx.types.CBufRet.i32 %1510, 2
-  %1514 = extractvalue %dx.types.CBufRet.i32 %1510, 3
-  %1515 = mul i32 %1511, %1506
-  %1516 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1507, i32 %1512, i32 %1515)  ; IMad(a,b,c)
-  %1517 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1508, i32 %1513, i32 %1516)  ; IMad(a,b,c)
-  %1518 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1509, i32 %1514, i32 %1517)  ; IMad(a,b,c)
-  %1519 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1518, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1520 = extractvalue %dx.types.ResRet.i32 %1519, 0
-  %1521 = sitofp i32 %1520 to float
-  br label %1522
-
-; <label>:1522                                    ; preds = %1501, %1435, %1406, %1389, %1379
-  %1523 = phi float [ %1403, %1389 ], [ 0.000000e+00, %1379 ], [ %1434, %1406 ], [ %1521, %1501 ], [ 0.000000e+00, %1435 ]
-  br i1 %941, label %1524, label %1549
-
-; <label>:1524                                    ; preds = %1522
-  %1525 = fcmp fast oge float %937, 0.000000e+00
-  %1526 = fptoui float %937 to i32
-  %1527 = icmp ult i32 %1526, %13
-  %1528 = and i1 %1525, %1527
-  %1529 = fcmp fast oge float %938, 0.000000e+00
-  %1530 = and i1 %1529, %1528
-  %1531 = fptoui float %938 to i32
-  %1532 = icmp ult i32 %1531, %15
-  %1533 = and i1 %1532, %1530
-  br i1 %1533, label %1534, label %1667
-
-; <label>:1534                                    ; preds = %1524
-  %1535 = fptoui float %45 to i32
-  %1536 = fptoui float %182 to i32
-  %1537 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1538 = extractvalue %dx.types.CBufRet.i32 %1537, 0
-  %1539 = extractvalue %dx.types.CBufRet.i32 %1537, 1
-  %1540 = extractvalue %dx.types.CBufRet.i32 %1537, 2
-  %1541 = extractvalue %dx.types.CBufRet.i32 %1537, 3
-  %1542 = mul i32 %1538, %1535
-  %1543 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1536, i32 %1539, i32 %1542)  ; IMad(a,b,c)
-  %1544 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1531, i32 %1540, i32 %1543)  ; IMad(a,b,c)
-  %1545 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1526, i32 %1541, i32 %1544)  ; IMad(a,b,c)
-  %1546 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1545, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1547 = extractvalue %dx.types.ResRet.i32 %1546, 0
-  %1548 = sitofp i32 %1547 to float
-  br label %1667
-
-; <label>:1549                                    ; preds = %1522
-  %1550 = icmp eq i32 %940, 1
-  br i1 %1550, label %1551, label %1580
-
-; <label>:1551                                    ; preds = %1549
-  %1552 = add i32 %13, -1
-  %1553 = uitofp i32 %1552 to float
-  %1554 = call float @dx.op.binary.f32(i32 35, float %937, float 0.000000e+00)  ; FMax(a,b)
-  %1555 = call float @dx.op.binary.f32(i32 36, float %1554, float %1553)  ; FMin(a,b)
-  %1556 = fptoui float %1555 to i32
-  %1557 = add i32 %15, -1
-  %1558 = uitofp i32 %1557 to float
-  %1559 = call float @dx.op.binary.f32(i32 35, float %938, float 0.000000e+00)  ; FMax(a,b)
-  %1560 = call float @dx.op.binary.f32(i32 36, float %1559, float %1558)  ; FMin(a,b)
-  %1561 = fptoui float %1560 to i32
-  %1562 = uitofp i32 %1561 to float
-  %1563 = uitofp i32 %1556 to float
-  %1564 = fptoui float %45 to i32
-  %1565 = fptoui float %182 to i32
-  %1566 = fptoui float %1562 to i32
-  %1567 = fptoui float %1563 to i32
-  %1568 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1569 = extractvalue %dx.types.CBufRet.i32 %1568, 0
-  %1570 = extractvalue %dx.types.CBufRet.i32 %1568, 1
-  %1571 = extractvalue %dx.types.CBufRet.i32 %1568, 2
-  %1572 = extractvalue %dx.types.CBufRet.i32 %1568, 3
-  %1573 = mul i32 %1569, %1564
-  %1574 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1565, i32 %1570, i32 %1573)  ; IMad(a,b,c)
-  %1575 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1566, i32 %1571, i32 %1574)  ; IMad(a,b,c)
-  %1576 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1567, i32 %1572, i32 %1575)  ; IMad(a,b,c)
-  %1577 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1576, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1578 = extractvalue %dx.types.ResRet.i32 %1577, 0
-  %1579 = sitofp i32 %1578 to float
-  br label %1667
-
-; <label>:1580                                    ; preds = %1549
-  %1581 = icmp eq i32 %940, 2
-  br i1 %1581, label %1582, label %1667
-
-; <label>:1582                                    ; preds = %1580
-  %1583 = fsub fast float %22, %20
-  %1584 = fcmp fast olt float %937, %20
-  br i1 %1584, label %1585, label %1598
-
-; <label>:1585                                    ; preds = %1582
-  %1586 = fsub fast float %20, %937
-  %1587 = fdiv fast float %1586, %1583
-  %1588 = fptoui float %1587 to i32
-  %1589 = uitofp i32 %1588 to float
-  %1590 = fmul fast float %1589, %1583
-  %1591 = fsub fast float %1586, %1590
-  %1592 = and i32 %1588, 1
-  %1593 = icmp eq i32 %1592, 0
-  br i1 %1593, label %1594, label %1596
-
-; <label>:1594                                    ; preds = %1585
-  %1595 = fadd fast float %1591, %20
-  br label %1613
-
-; <label>:1596                                    ; preds = %1585
-  %1597 = fsub fast float %22, %1591
-  br label %1613
-
-; <label>:1598                                    ; preds = %1582
-  %1599 = fcmp fast ogt float %937, %22
-  br i1 %1599, label %1600, label %1613
-
-; <label>:1600                                    ; preds = %1598
-  %1601 = fsub fast float %937, %22
-  %1602 = fdiv fast float %1601, %1583
-  %1603 = fptoui float %1602 to i32
-  %1604 = uitofp i32 %1603 to float
-  %1605 = fmul fast float %1604, %1583
-  %1606 = fsub fast float %1601, %1605
-  %1607 = and i32 %1603, 1
-  %1608 = icmp eq i32 %1607, 0
-  br i1 %1608, label %1609, label %1611
-
-; <label>:1609                                    ; preds = %1600
-  %1610 = fsub fast float %22, %1606
-  br label %1613
-
-; <label>:1611                                    ; preds = %1600
-  %1612 = fadd fast float %1606, %20
-  br label %1613
-
-; <label>:1613                                    ; preds = %1611, %1609, %1598, %1596, %1594
-  %1614 = phi float [ %1595, %1594 ], [ %1597, %1596 ], [ %1610, %1609 ], [ %1612, %1611 ], [ %937, %1598 ]
-  %1615 = fptoui float %1614 to i32
-  %1616 = fsub fast float %24, %20
-  %1617 = fcmp fast olt float %938, %20
-  br i1 %1617, label %1618, label %1631
-
-; <label>:1618                                    ; preds = %1613
-  %1619 = fsub fast float %20, %938
-  %1620 = fdiv fast float %1619, %1616
-  %1621 = fptoui float %1620 to i32
-  %1622 = uitofp i32 %1621 to float
-  %1623 = fmul fast float %1622, %1616
-  %1624 = fsub fast float %1619, %1623
-  %1625 = and i32 %1621, 1
-  %1626 = icmp eq i32 %1625, 0
-  br i1 %1626, label %1627, label %1629
-
-; <label>:1627                                    ; preds = %1618
-  %1628 = fadd fast float %1624, %20
-  br label %1646
-
-; <label>:1629                                    ; preds = %1618
-  %1630 = fsub fast float %24, %1624
-  br label %1646
-
-; <label>:1631                                    ; preds = %1613
-  %1632 = fcmp fast ogt float %938, %24
-  br i1 %1632, label %1633, label %1646
-
-; <label>:1633                                    ; preds = %1631
-  %1634 = fsub fast float %938, %24
-  %1635 = fdiv fast float %1634, %1616
-  %1636 = fptoui float %1635 to i32
-  %1637 = uitofp i32 %1636 to float
-  %1638 = fmul fast float %1637, %1616
-  %1639 = fsub fast float %1634, %1638
-  %1640 = and i32 %1636, 1
-  %1641 = icmp eq i32 %1640, 0
-  br i1 %1641, label %1642, label %1644
-
-; <label>:1642                                    ; preds = %1633
-  %1643 = fsub fast float %24, %1639
-  br label %1646
-
-; <label>:1644                                    ; preds = %1633
-  %1645 = fadd fast float %1639, %20
-  br label %1646
-
-; <label>:1646                                    ; preds = %1644, %1642, %1631, %1629, %1627
-  %1647 = phi float [ %1628, %1627 ], [ %1630, %1629 ], [ %1643, %1642 ], [ %1645, %1644 ], [ %938, %1631 ]
-  %1648 = fptoui float %1647 to i32
-  %1649 = uitofp i32 %1648 to float
-  %1650 = uitofp i32 %1615 to float
-  %1651 = fptoui float %45 to i32
-  %1652 = fptoui float %182 to i32
-  %1653 = fptoui float %1649 to i32
-  %1654 = fptoui float %1650 to i32
-  %1655 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1656 = extractvalue %dx.types.CBufRet.i32 %1655, 0
-  %1657 = extractvalue %dx.types.CBufRet.i32 %1655, 1
-  %1658 = extractvalue %dx.types.CBufRet.i32 %1655, 2
-  %1659 = extractvalue %dx.types.CBufRet.i32 %1655, 3
-  %1660 = mul i32 %1656, %1651
-  %1661 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1652, i32 %1657, i32 %1660)  ; IMad(a,b,c)
-  %1662 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1653, i32 %1658, i32 %1661)  ; IMad(a,b,c)
-  %1663 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1654, i32 %1659, i32 %1662)  ; IMad(a,b,c)
-  %1664 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1663, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1665 = extractvalue %dx.types.ResRet.i32 %1664, 0
-  %1666 = sitofp i32 %1665 to float
-  br label %1667
-
-; <label>:1667                                    ; preds = %1646, %1580, %1551, %1534, %1524
-  %1668 = phi float [ %1548, %1534 ], [ 0.000000e+00, %1524 ], [ %1579, %1551 ], [ %1666, %1646 ], [ 0.000000e+00, %1580 ]
-  br i1 %941, label %1669, label %1694
-
-; <label>:1669                                    ; preds = %1667
-  %1670 = fcmp fast oge float %936, 0.000000e+00
-  %1671 = fptoui float %936 to i32
-  %1672 = icmp ult i32 %1671, %13
-  %1673 = and i1 %1670, %1672
-  %1674 = fcmp fast oge float %938, 0.000000e+00
-  %1675 = and i1 %1674, %1673
-  %1676 = fptoui float %938 to i32
-  %1677 = icmp ult i32 %1676, %15
-  %1678 = and i1 %1677, %1675
-  br i1 %1678, label %1679, label %1812
-
-; <label>:1679                                    ; preds = %1669
-  %1680 = fptoui float %45 to i32
-  %1681 = fptoui float %182 to i32
-  %1682 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1683 = extractvalue %dx.types.CBufRet.i32 %1682, 0
-  %1684 = extractvalue %dx.types.CBufRet.i32 %1682, 1
-  %1685 = extractvalue %dx.types.CBufRet.i32 %1682, 2
-  %1686 = extractvalue %dx.types.CBufRet.i32 %1682, 3
-  %1687 = mul i32 %1683, %1680
-  %1688 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1681, i32 %1684, i32 %1687)  ; IMad(a,b,c)
-  %1689 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1676, i32 %1685, i32 %1688)  ; IMad(a,b,c)
-  %1690 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1671, i32 %1686, i32 %1689)  ; IMad(a,b,c)
-  %1691 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1690, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1692 = extractvalue %dx.types.ResRet.i32 %1691, 0
-  %1693 = sitofp i32 %1692 to float
-  br label %1812
-
-; <label>:1694                                    ; preds = %1667
-  %1695 = icmp eq i32 %940, 1
-  br i1 %1695, label %1696, label %1725
-
-; <label>:1696                                    ; preds = %1694
-  %1697 = add i32 %13, -1
-  %1698 = uitofp i32 %1697 to float
-  %1699 = call float @dx.op.binary.f32(i32 35, float %936, float 0.000000e+00)  ; FMax(a,b)
-  %1700 = call float @dx.op.binary.f32(i32 36, float %1699, float %1698)  ; FMin(a,b)
-  %1701 = fptoui float %1700 to i32
-  %1702 = add i32 %15, -1
-  %1703 = uitofp i32 %1702 to float
-  %1704 = call float @dx.op.binary.f32(i32 35, float %938, float 0.000000e+00)  ; FMax(a,b)
-  %1705 = call float @dx.op.binary.f32(i32 36, float %1704, float %1703)  ; FMin(a,b)
-  %1706 = fptoui float %1705 to i32
-  %1707 = uitofp i32 %1706 to float
-  %1708 = uitofp i32 %1701 to float
-  %1709 = fptoui float %45 to i32
-  %1710 = fptoui float %182 to i32
-  %1711 = fptoui float %1707 to i32
-  %1712 = fptoui float %1708 to i32
-  %1713 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1714 = extractvalue %dx.types.CBufRet.i32 %1713, 0
-  %1715 = extractvalue %dx.types.CBufRet.i32 %1713, 1
-  %1716 = extractvalue %dx.types.CBufRet.i32 %1713, 2
-  %1717 = extractvalue %dx.types.CBufRet.i32 %1713, 3
-  %1718 = mul i32 %1714, %1709
-  %1719 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1710, i32 %1715, i32 %1718)  ; IMad(a,b,c)
-  %1720 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1711, i32 %1716, i32 %1719)  ; IMad(a,b,c)
-  %1721 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1712, i32 %1717, i32 %1720)  ; IMad(a,b,c)
-  %1722 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1721, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1723 = extractvalue %dx.types.ResRet.i32 %1722, 0
-  %1724 = sitofp i32 %1723 to float
-  br label %1812
-
-; <label>:1725                                    ; preds = %1694
-  %1726 = icmp eq i32 %940, 2
-  br i1 %1726, label %1727, label %1812
-
-; <label>:1727                                    ; preds = %1725
-  %1728 = fsub fast float %22, %20
-  %1729 = fcmp fast olt float %936, %20
-  br i1 %1729, label %1730, label %1743
-
-; <label>:1730                                    ; preds = %1727
-  %1731 = fsub fast float %20, %936
-  %1732 = fdiv fast float %1731, %1728
-  %1733 = fptoui float %1732 to i32
-  %1734 = uitofp i32 %1733 to float
-  %1735 = fmul fast float %1734, %1728
-  %1736 = fsub fast float %1731, %1735
-  %1737 = and i32 %1733, 1
-  %1738 = icmp eq i32 %1737, 0
-  br i1 %1738, label %1739, label %1741
-
-; <label>:1739                                    ; preds = %1730
-  %1740 = fadd fast float %1736, %20
-  br label %1758
-
-; <label>:1741                                    ; preds = %1730
-  %1742 = fsub fast float %22, %1736
-  br label %1758
-
-; <label>:1743                                    ; preds = %1727
-  %1744 = fcmp fast ogt float %936, %22
-  br i1 %1744, label %1745, label %1758
-
-; <label>:1745                                    ; preds = %1743
-  %1746 = fsub fast float %936, %22
-  %1747 = fdiv fast float %1746, %1728
-  %1748 = fptoui float %1747 to i32
-  %1749 = uitofp i32 %1748 to float
-  %1750 = fmul fast float %1749, %1728
-  %1751 = fsub fast float %1746, %1750
-  %1752 = and i32 %1748, 1
-  %1753 = icmp eq i32 %1752, 0
-  br i1 %1753, label %1754, label %1756
-
-; <label>:1754                                    ; preds = %1745
-  %1755 = fsub fast float %22, %1751
-  br label %1758
-
-; <label>:1756                                    ; preds = %1745
-  %1757 = fadd fast float %1751, %20
-  br label %1758
-
-; <label>:1758                                    ; preds = %1756, %1754, %1743, %1741, %1739
-  %1759 = phi float [ %1740, %1739 ], [ %1742, %1741 ], [ %1755, %1754 ], [ %1757, %1756 ], [ %936, %1743 ]
-  %1760 = fptoui float %1759 to i32
-  %1761 = fsub fast float %24, %20
-  %1762 = fcmp fast olt float %938, %20
-  br i1 %1762, label %1763, label %1776
-
-; <label>:1763                                    ; preds = %1758
-  %1764 = fsub fast float %20, %938
-  %1765 = fdiv fast float %1764, %1761
-  %1766 = fptoui float %1765 to i32
-  %1767 = uitofp i32 %1766 to float
-  %1768 = fmul fast float %1767, %1761
-  %1769 = fsub fast float %1764, %1768
-  %1770 = and i32 %1766, 1
-  %1771 = icmp eq i32 %1770, 0
-  br i1 %1771, label %1772, label %1774
-
-; <label>:1772                                    ; preds = %1763
-  %1773 = fadd fast float %1769, %20
-  br label %1791
-
-; <label>:1774                                    ; preds = %1763
-  %1775 = fsub fast float %24, %1769
-  br label %1791
-
-; <label>:1776                                    ; preds = %1758
-  %1777 = fcmp fast ogt float %938, %24
-  br i1 %1777, label %1778, label %1791
-
-; <label>:1778                                    ; preds = %1776
-  %1779 = fsub fast float %938, %24
-  %1780 = fdiv fast float %1779, %1761
-  %1781 = fptoui float %1780 to i32
-  %1782 = uitofp i32 %1781 to float
-  %1783 = fmul fast float %1782, %1761
-  %1784 = fsub fast float %1779, %1783
-  %1785 = and i32 %1781, 1
-  %1786 = icmp eq i32 %1785, 0
-  br i1 %1786, label %1787, label %1789
-
-; <label>:1787                                    ; preds = %1778
-  %1788 = fsub fast float %24, %1784
-  br label %1791
-
-; <label>:1789                                    ; preds = %1778
-  %1790 = fadd fast float %1784, %20
-  br label %1791
-
-; <label>:1791                                    ; preds = %1789, %1787, %1776, %1774, %1772
-  %1792 = phi float [ %1773, %1772 ], [ %1775, %1774 ], [ %1788, %1787 ], [ %1790, %1789 ], [ %938, %1776 ]
-  %1793 = fptoui float %1792 to i32
-  %1794 = uitofp i32 %1793 to float
-  %1795 = uitofp i32 %1760 to float
-  %1796 = fptoui float %45 to i32
-  %1797 = fptoui float %182 to i32
-  %1798 = fptoui float %1794 to i32
-  %1799 = fptoui float %1795 to i32
-  %1800 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1801 = extractvalue %dx.types.CBufRet.i32 %1800, 0
-  %1802 = extractvalue %dx.types.CBufRet.i32 %1800, 1
-  %1803 = extractvalue %dx.types.CBufRet.i32 %1800, 2
-  %1804 = extractvalue %dx.types.CBufRet.i32 %1800, 3
-  %1805 = mul i32 %1801, %1796
-  %1806 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1797, i32 %1802, i32 %1805)  ; IMad(a,b,c)
-  %1807 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1798, i32 %1803, i32 %1806)  ; IMad(a,b,c)
-  %1808 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1799, i32 %1804, i32 %1807)  ; IMad(a,b,c)
-  %1809 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1808, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1810 = extractvalue %dx.types.ResRet.i32 %1809, 0
-  %1811 = sitofp i32 %1810 to float
-  br label %1812
-
-; <label>:1812                                    ; preds = %1791, %1725, %1696, %1679, %1669
-  %1813 = phi float [ %1693, %1679 ], [ 0.000000e+00, %1669 ], [ %1724, %1696 ], [ %1811, %1791 ], [ 0.000000e+00, %1725 ]
-  br i1 %941, label %1814, label %1839
-
-; <label>:1814                                    ; preds = %1812
-  %1815 = fcmp fast oge float %1232, 0.000000e+00
-  %1816 = fptoui float %1232 to i32
-  %1817 = icmp ult i32 %1816, %13
-  %1818 = and i1 %1815, %1817
-  %1819 = fcmp fast oge float %938, 0.000000e+00
-  %1820 = and i1 %1819, %1818
-  %1821 = fptoui float %938 to i32
-  %1822 = icmp ult i32 %1821, %15
-  %1823 = and i1 %1822, %1820
-  br i1 %1823, label %1824, label %1957
-
-; <label>:1824                                    ; preds = %1814
-  %1825 = fptoui float %45 to i32
-  %1826 = fptoui float %182 to i32
-  %1827 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1828 = extractvalue %dx.types.CBufRet.i32 %1827, 0
-  %1829 = extractvalue %dx.types.CBufRet.i32 %1827, 1
-  %1830 = extractvalue %dx.types.CBufRet.i32 %1827, 2
-  %1831 = extractvalue %dx.types.CBufRet.i32 %1827, 3
-  %1832 = mul i32 %1828, %1825
-  %1833 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1826, i32 %1829, i32 %1832)  ; IMad(a,b,c)
-  %1834 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1821, i32 %1830, i32 %1833)  ; IMad(a,b,c)
-  %1835 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1816, i32 %1831, i32 %1834)  ; IMad(a,b,c)
-  %1836 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1835, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1837 = extractvalue %dx.types.ResRet.i32 %1836, 0
-  %1838 = sitofp i32 %1837 to float
-  br label %1957
-
-; <label>:1839                                    ; preds = %1812
-  %1840 = icmp eq i32 %940, 1
-  br i1 %1840, label %1841, label %1870
-
-; <label>:1841                                    ; preds = %1839
-  %1842 = add i32 %13, -1
-  %1843 = uitofp i32 %1842 to float
-  %1844 = call float @dx.op.binary.f32(i32 35, float %1232, float 0.000000e+00)  ; FMax(a,b)
-  %1845 = call float @dx.op.binary.f32(i32 36, float %1844, float %1843)  ; FMin(a,b)
-  %1846 = fptoui float %1845 to i32
-  %1847 = add i32 %15, -1
-  %1848 = uitofp i32 %1847 to float
-  %1849 = call float @dx.op.binary.f32(i32 35, float %938, float 0.000000e+00)  ; FMax(a,b)
-  %1850 = call float @dx.op.binary.f32(i32 36, float %1849, float %1848)  ; FMin(a,b)
-  %1851 = fptoui float %1850 to i32
-  %1852 = uitofp i32 %1851 to float
-  %1853 = uitofp i32 %1846 to float
-  %1854 = fptoui float %45 to i32
-  %1855 = fptoui float %182 to i32
-  %1856 = fptoui float %1852 to i32
-  %1857 = fptoui float %1853 to i32
-  %1858 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1859 = extractvalue %dx.types.CBufRet.i32 %1858, 0
-  %1860 = extractvalue %dx.types.CBufRet.i32 %1858, 1
-  %1861 = extractvalue %dx.types.CBufRet.i32 %1858, 2
-  %1862 = extractvalue %dx.types.CBufRet.i32 %1858, 3
-  %1863 = mul i32 %1859, %1854
-  %1864 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1855, i32 %1860, i32 %1863)  ; IMad(a,b,c)
-  %1865 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1856, i32 %1861, i32 %1864)  ; IMad(a,b,c)
-  %1866 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1857, i32 %1862, i32 %1865)  ; IMad(a,b,c)
-  %1867 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1866, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1868 = extractvalue %dx.types.ResRet.i32 %1867, 0
-  %1869 = sitofp i32 %1868 to float
-  br label %1957
-
-; <label>:1870                                    ; preds = %1839
-  %1871 = icmp eq i32 %940, 2
-  br i1 %1871, label %1872, label %1957
-
-; <label>:1872                                    ; preds = %1870
-  %1873 = fsub fast float %22, %20
-  %1874 = fcmp fast olt float %1232, %20
-  br i1 %1874, label %1875, label %1888
-
-; <label>:1875                                    ; preds = %1872
-  %1876 = fsub fast float %20, %1232
-  %1877 = fdiv fast float %1876, %1873
-  %1878 = fptoui float %1877 to i32
-  %1879 = uitofp i32 %1878 to float
-  %1880 = fmul fast float %1879, %1873
-  %1881 = fsub fast float %1876, %1880
-  %1882 = and i32 %1878, 1
-  %1883 = icmp eq i32 %1882, 0
-  br i1 %1883, label %1884, label %1886
-
-; <label>:1884                                    ; preds = %1875
-  %1885 = fadd fast float %1881, %20
-  br label %1903
-
-; <label>:1886                                    ; preds = %1875
-  %1887 = fsub fast float %22, %1881
-  br label %1903
-
-; <label>:1888                                    ; preds = %1872
-  %1889 = fcmp fast ogt float %1232, %22
-  br i1 %1889, label %1890, label %1903
-
-; <label>:1890                                    ; preds = %1888
-  %1891 = fsub fast float %1232, %22
-  %1892 = fdiv fast float %1891, %1873
-  %1893 = fptoui float %1892 to i32
-  %1894 = uitofp i32 %1893 to float
-  %1895 = fmul fast float %1894, %1873
-  %1896 = fsub fast float %1891, %1895
-  %1897 = and i32 %1893, 1
-  %1898 = icmp eq i32 %1897, 0
-  br i1 %1898, label %1899, label %1901
-
-; <label>:1899                                    ; preds = %1890
-  %1900 = fsub fast float %22, %1896
-  br label %1903
-
-; <label>:1901                                    ; preds = %1890
-  %1902 = fadd fast float %1896, %20
-  br label %1903
-
-; <label>:1903                                    ; preds = %1901, %1899, %1888, %1886, %1884
-  %1904 = phi float [ %1885, %1884 ], [ %1887, %1886 ], [ %1900, %1899 ], [ %1902, %1901 ], [ %1232, %1888 ]
-  %1905 = fptoui float %1904 to i32
-  %1906 = fsub fast float %24, %20
-  %1907 = fcmp fast olt float %938, %20
-  br i1 %1907, label %1908, label %1921
-
-; <label>:1908                                    ; preds = %1903
-  %1909 = fsub fast float %20, %938
-  %1910 = fdiv fast float %1909, %1906
-  %1911 = fptoui float %1910 to i32
-  %1912 = uitofp i32 %1911 to float
-  %1913 = fmul fast float %1912, %1906
-  %1914 = fsub fast float %1909, %1913
-  %1915 = and i32 %1911, 1
-  %1916 = icmp eq i32 %1915, 0
-  br i1 %1916, label %1917, label %1919
-
-; <label>:1917                                    ; preds = %1908
-  %1918 = fadd fast float %1914, %20
-  br label %1936
-
-; <label>:1919                                    ; preds = %1908
-  %1920 = fsub fast float %24, %1914
-  br label %1936
-
-; <label>:1921                                    ; preds = %1903
-  %1922 = fcmp fast ogt float %938, %24
-  br i1 %1922, label %1923, label %1936
-
-; <label>:1923                                    ; preds = %1921
-  %1924 = fsub fast float %938, %24
-  %1925 = fdiv fast float %1924, %1906
-  %1926 = fptoui float %1925 to i32
-  %1927 = uitofp i32 %1926 to float
-  %1928 = fmul fast float %1927, %1906
-  %1929 = fsub fast float %1924, %1928
-  %1930 = and i32 %1926, 1
-  %1931 = icmp eq i32 %1930, 0
-  br i1 %1931, label %1932, label %1934
-
-; <label>:1932                                    ; preds = %1923
-  %1933 = fsub fast float %24, %1929
-  br label %1936
-
-; <label>:1934                                    ; preds = %1923
-  %1935 = fadd fast float %1929, %20
-  br label %1936
-
-; <label>:1936                                    ; preds = %1934, %1932, %1921, %1919, %1917
-  %1937 = phi float [ %1918, %1917 ], [ %1920, %1919 ], [ %1933, %1932 ], [ %1935, %1934 ], [ %938, %1921 ]
-  %1938 = fptoui float %1937 to i32
-  %1939 = uitofp i32 %1938 to float
-  %1940 = uitofp i32 %1905 to float
-  %1941 = fptoui float %45 to i32
-  %1942 = fptoui float %182 to i32
-  %1943 = fptoui float %1939 to i32
-  %1944 = fptoui float %1940 to i32
-  %1945 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1946 = extractvalue %dx.types.CBufRet.i32 %1945, 0
-  %1947 = extractvalue %dx.types.CBufRet.i32 %1945, 1
-  %1948 = extractvalue %dx.types.CBufRet.i32 %1945, 2
-  %1949 = extractvalue %dx.types.CBufRet.i32 %1945, 3
-  %1950 = mul i32 %1946, %1941
-  %1951 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1942, i32 %1947, i32 %1950)  ; IMad(a,b,c)
-  %1952 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1943, i32 %1948, i32 %1951)  ; IMad(a,b,c)
-  %1953 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1944, i32 %1949, i32 %1952)  ; IMad(a,b,c)
-  %1954 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1953, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1955 = extractvalue %dx.types.ResRet.i32 %1954, 0
-  %1956 = sitofp i32 %1955 to float
-  br label %1957
-
-; <label>:1957                                    ; preds = %1936, %1870, %1841, %1824, %1814
-  %1958 = phi float [ %1838, %1824 ], [ 0.000000e+00, %1814 ], [ %1869, %1841 ], [ %1956, %1936 ], [ 0.000000e+00, %1870 ]
-  br i1 %941, label %1959, label %1984
-
-; <label>:1959                                    ; preds = %1957
-  %1960 = fcmp fast oge float %1378, 0.000000e+00
-  %1961 = fptoui float %1378 to i32
-  %1962 = icmp ult i32 %1961, %13
-  %1963 = and i1 %1960, %1962
-  %1964 = fcmp fast oge float %938, 0.000000e+00
-  %1965 = and i1 %1964, %1963
-  %1966 = fptoui float %938 to i32
-  %1967 = icmp ult i32 %1966, %15
-  %1968 = and i1 %1967, %1965
-  br i1 %1968, label %1969, label %2102
-
-; <label>:1969                                    ; preds = %1959
-  %1970 = fptoui float %45 to i32
-  %1971 = fptoui float %182 to i32
-  %1972 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1973 = extractvalue %dx.types.CBufRet.i32 %1972, 0
-  %1974 = extractvalue %dx.types.CBufRet.i32 %1972, 1
-  %1975 = extractvalue %dx.types.CBufRet.i32 %1972, 2
-  %1976 = extractvalue %dx.types.CBufRet.i32 %1972, 3
-  %1977 = mul i32 %1973, %1970
-  %1978 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1971, i32 %1974, i32 %1977)  ; IMad(a,b,c)
-  %1979 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1966, i32 %1975, i32 %1978)  ; IMad(a,b,c)
-  %1980 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1961, i32 %1976, i32 %1979)  ; IMad(a,b,c)
-  %1981 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1980, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1982 = extractvalue %dx.types.ResRet.i32 %1981, 0
-  %1983 = sitofp i32 %1982 to float
-  br label %2102
-
-; <label>:1984                                    ; preds = %1957
-  %1985 = icmp eq i32 %940, 1
-  br i1 %1985, label %1986, label %2015
-
-; <label>:1986                                    ; preds = %1984
-  %1987 = add i32 %13, -1
-  %1988 = uitofp i32 %1987 to float
-  %1989 = call float @dx.op.binary.f32(i32 35, float %1378, float 0.000000e+00)  ; FMax(a,b)
-  %1990 = call float @dx.op.binary.f32(i32 36, float %1989, float %1988)  ; FMin(a,b)
-  %1991 = fptoui float %1990 to i32
-  %1992 = add i32 %15, -1
-  %1993 = uitofp i32 %1992 to float
-  %1994 = call float @dx.op.binary.f32(i32 35, float %938, float 0.000000e+00)  ; FMax(a,b)
-  %1995 = call float @dx.op.binary.f32(i32 36, float %1994, float %1993)  ; FMin(a,b)
-  %1996 = fptoui float %1995 to i32
-  %1997 = uitofp i32 %1996 to float
-  %1998 = uitofp i32 %1991 to float
-  %1999 = fptoui float %45 to i32
-  %2000 = fptoui float %182 to i32
-  %2001 = fptoui float %1997 to i32
-  %2002 = fptoui float %1998 to i32
-  %2003 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2004 = extractvalue %dx.types.CBufRet.i32 %2003, 0
-  %2005 = extractvalue %dx.types.CBufRet.i32 %2003, 1
-  %2006 = extractvalue %dx.types.CBufRet.i32 %2003, 2
-  %2007 = extractvalue %dx.types.CBufRet.i32 %2003, 3
-  %2008 = mul i32 %2004, %1999
-  %2009 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2000, i32 %2005, i32 %2008)  ; IMad(a,b,c)
-  %2010 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2001, i32 %2006, i32 %2009)  ; IMad(a,b,c)
-  %2011 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2002, i32 %2007, i32 %2010)  ; IMad(a,b,c)
-  %2012 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2011, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2013 = extractvalue %dx.types.ResRet.i32 %2012, 0
-  %2014 = sitofp i32 %2013 to float
-  br label %2102
-
-; <label>:2015                                    ; preds = %1984
-  %2016 = icmp eq i32 %940, 2
-  br i1 %2016, label %2017, label %2102
-
-; <label>:2017                                    ; preds = %2015
-  %2018 = fsub fast float %22, %20
-  %2019 = fcmp fast olt float %1378, %20
-  br i1 %2019, label %2020, label %2033
-
-; <label>:2020                                    ; preds = %2017
-  %2021 = fsub fast float %20, %1378
-  %2022 = fdiv fast float %2021, %2018
-  %2023 = fptoui float %2022 to i32
-  %2024 = uitofp i32 %2023 to float
-  %2025 = fmul fast float %2024, %2018
-  %2026 = fsub fast float %2021, %2025
-  %2027 = and i32 %2023, 1
-  %2028 = icmp eq i32 %2027, 0
-  br i1 %2028, label %2029, label %2031
-
-; <label>:2029                                    ; preds = %2020
-  %2030 = fadd fast float %2026, %20
-  br label %2048
-
-; <label>:2031                                    ; preds = %2020
-  %2032 = fsub fast float %22, %2026
-  br label %2048
-
-; <label>:2033                                    ; preds = %2017
-  %2034 = fcmp fast ogt float %1378, %22
-  br i1 %2034, label %2035, label %2048
-
-; <label>:2035                                    ; preds = %2033
-  %2036 = fsub fast float %1378, %22
-  %2037 = fdiv fast float %2036, %2018
-  %2038 = fptoui float %2037 to i32
-  %2039 = uitofp i32 %2038 to float
-  %2040 = fmul fast float %2039, %2018
-  %2041 = fsub fast float %2036, %2040
-  %2042 = and i32 %2038, 1
-  %2043 = icmp eq i32 %2042, 0
-  br i1 %2043, label %2044, label %2046
-
-; <label>:2044                                    ; preds = %2035
-  %2045 = fsub fast float %22, %2041
-  br label %2048
-
-; <label>:2046                                    ; preds = %2035
-  %2047 = fadd fast float %2041, %20
-  br label %2048
-
-; <label>:2048                                    ; preds = %2046, %2044, %2033, %2031, %2029
-  %2049 = phi float [ %2030, %2029 ], [ %2032, %2031 ], [ %2045, %2044 ], [ %2047, %2046 ], [ %1378, %2033 ]
-  %2050 = fptoui float %2049 to i32
-  %2051 = fsub fast float %24, %20
-  %2052 = fcmp fast olt float %938, %20
-  br i1 %2052, label %2053, label %2066
-
-; <label>:2053                                    ; preds = %2048
-  %2054 = fsub fast float %20, %938
-  %2055 = fdiv fast float %2054, %2051
-  %2056 = fptoui float %2055 to i32
-  %2057 = uitofp i32 %2056 to float
-  %2058 = fmul fast float %2057, %2051
-  %2059 = fsub fast float %2054, %2058
-  %2060 = and i32 %2056, 1
-  %2061 = icmp eq i32 %2060, 0
-  br i1 %2061, label %2062, label %2064
-
-; <label>:2062                                    ; preds = %2053
-  %2063 = fadd fast float %2059, %20
-  br label %2081
-
-; <label>:2064                                    ; preds = %2053
-  %2065 = fsub fast float %24, %2059
-  br label %2081
-
-; <label>:2066                                    ; preds = %2048
-  %2067 = fcmp fast ogt float %938, %24
-  br i1 %2067, label %2068, label %2081
-
-; <label>:2068                                    ; preds = %2066
-  %2069 = fsub fast float %938, %24
-  %2070 = fdiv fast float %2069, %2051
-  %2071 = fptoui float %2070 to i32
-  %2072 = uitofp i32 %2071 to float
-  %2073 = fmul fast float %2072, %2051
-  %2074 = fsub fast float %2069, %2073
-  %2075 = and i32 %2071, 1
-  %2076 = icmp eq i32 %2075, 0
-  br i1 %2076, label %2077, label %2079
-
-; <label>:2077                                    ; preds = %2068
-  %2078 = fsub fast float %24, %2074
-  br label %2081
-
-; <label>:2079                                    ; preds = %2068
-  %2080 = fadd fast float %2074, %20
-  br label %2081
-
-; <label>:2081                                    ; preds = %2079, %2077, %2066, %2064, %2062
-  %2082 = phi float [ %2063, %2062 ], [ %2065, %2064 ], [ %2078, %2077 ], [ %2080, %2079 ], [ %938, %2066 ]
-  %2083 = fptoui float %2082 to i32
-  %2084 = uitofp i32 %2083 to float
-  %2085 = uitofp i32 %2050 to float
-  %2086 = fptoui float %45 to i32
-  %2087 = fptoui float %182 to i32
-  %2088 = fptoui float %2084 to i32
-  %2089 = fptoui float %2085 to i32
-  %2090 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2091 = extractvalue %dx.types.CBufRet.i32 %2090, 0
-  %2092 = extractvalue %dx.types.CBufRet.i32 %2090, 1
-  %2093 = extractvalue %dx.types.CBufRet.i32 %2090, 2
-  %2094 = extractvalue %dx.types.CBufRet.i32 %2090, 3
-  %2095 = mul i32 %2091, %2086
-  %2096 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2087, i32 %2092, i32 %2095)  ; IMad(a,b,c)
-  %2097 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2088, i32 %2093, i32 %2096)  ; IMad(a,b,c)
-  %2098 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2089, i32 %2094, i32 %2097)  ; IMad(a,b,c)
-  %2099 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2098, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2100 = extractvalue %dx.types.ResRet.i32 %2099, 0
-  %2101 = sitofp i32 %2100 to float
-  br label %2102
-
-; <label>:2102                                    ; preds = %2081, %2015, %1986, %1969, %1959
-  %2103 = phi float [ %1983, %1969 ], [ 0.000000e+00, %1959 ], [ %2014, %1986 ], [ %2101, %2081 ], [ 0.000000e+00, %2015 ]
-  %2104 = fadd fast float %938, 1.000000e+00
-  br i1 %941, label %2105, label %2130
-
-; <label>:2105                                    ; preds = %2102
-  %2106 = fcmp fast oge float %937, 0.000000e+00
-  %2107 = fptoui float %937 to i32
-  %2108 = icmp ult i32 %2107, %13
-  %2109 = and i1 %2106, %2108
-  %2110 = fcmp fast oge float %2104, 0.000000e+00
-  %2111 = and i1 %2110, %2109
-  %2112 = fptoui float %2104 to i32
-  %2113 = icmp ult i32 %2112, %15
-  %2114 = and i1 %2113, %2111
-  br i1 %2114, label %2115, label %2248
-
-; <label>:2115                                    ; preds = %2105
-  %2116 = fptoui float %45 to i32
-  %2117 = fptoui float %182 to i32
-  %2118 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2119 = extractvalue %dx.types.CBufRet.i32 %2118, 0
-  %2120 = extractvalue %dx.types.CBufRet.i32 %2118, 1
-  %2121 = extractvalue %dx.types.CBufRet.i32 %2118, 2
-  %2122 = extractvalue %dx.types.CBufRet.i32 %2118, 3
-  %2123 = mul i32 %2119, %2116
-  %2124 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2117, i32 %2120, i32 %2123)  ; IMad(a,b,c)
-  %2125 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2112, i32 %2121, i32 %2124)  ; IMad(a,b,c)
-  %2126 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2107, i32 %2122, i32 %2125)  ; IMad(a,b,c)
-  %2127 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2126, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2128 = extractvalue %dx.types.ResRet.i32 %2127, 0
-  %2129 = sitofp i32 %2128 to float
-  br label %2248
-
-; <label>:2130                                    ; preds = %2102
-  %2131 = icmp eq i32 %940, 1
-  br i1 %2131, label %2132, label %2161
-
-; <label>:2132                                    ; preds = %2130
-  %2133 = add i32 %13, -1
-  %2134 = uitofp i32 %2133 to float
-  %2135 = call float @dx.op.binary.f32(i32 35, float %937, float 0.000000e+00)  ; FMax(a,b)
-  %2136 = call float @dx.op.binary.f32(i32 36, float %2135, float %2134)  ; FMin(a,b)
-  %2137 = fptoui float %2136 to i32
-  %2138 = add i32 %15, -1
-  %2139 = uitofp i32 %2138 to float
-  %2140 = call float @dx.op.binary.f32(i32 35, float %2104, float 0.000000e+00)  ; FMax(a,b)
-  %2141 = call float @dx.op.binary.f32(i32 36, float %2140, float %2139)  ; FMin(a,b)
-  %2142 = fptoui float %2141 to i32
-  %2143 = uitofp i32 %2142 to float
-  %2144 = uitofp i32 %2137 to float
-  %2145 = fptoui float %45 to i32
-  %2146 = fptoui float %182 to i32
-  %2147 = fptoui float %2143 to i32
-  %2148 = fptoui float %2144 to i32
-  %2149 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2150 = extractvalue %dx.types.CBufRet.i32 %2149, 0
-  %2151 = extractvalue %dx.types.CBufRet.i32 %2149, 1
-  %2152 = extractvalue %dx.types.CBufRet.i32 %2149, 2
-  %2153 = extractvalue %dx.types.CBufRet.i32 %2149, 3
-  %2154 = mul i32 %2150, %2145
-  %2155 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2146, i32 %2151, i32 %2154)  ; IMad(a,b,c)
-  %2156 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2147, i32 %2152, i32 %2155)  ; IMad(a,b,c)
-  %2157 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2148, i32 %2153, i32 %2156)  ; IMad(a,b,c)
-  %2158 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2157, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2159 = extractvalue %dx.types.ResRet.i32 %2158, 0
-  %2160 = sitofp i32 %2159 to float
-  br label %2248
-
-; <label>:2161                                    ; preds = %2130
-  %2162 = icmp eq i32 %940, 2
-  br i1 %2162, label %2163, label %2248
-
-; <label>:2163                                    ; preds = %2161
-  %2164 = fsub fast float %22, %20
-  %2165 = fcmp fast olt float %937, %20
-  br i1 %2165, label %2166, label %2179
-
-; <label>:2166                                    ; preds = %2163
-  %2167 = fsub fast float %20, %937
-  %2168 = fdiv fast float %2167, %2164
-  %2169 = fptoui float %2168 to i32
-  %2170 = uitofp i32 %2169 to float
-  %2171 = fmul fast float %2170, %2164
-  %2172 = fsub fast float %2167, %2171
-  %2173 = and i32 %2169, 1
-  %2174 = icmp eq i32 %2173, 0
-  br i1 %2174, label %2175, label %2177
-
-; <label>:2175                                    ; preds = %2166
-  %2176 = fadd fast float %2172, %20
-  br label %2194
-
-; <label>:2177                                    ; preds = %2166
-  %2178 = fsub fast float %22, %2172
-  br label %2194
-
-; <label>:2179                                    ; preds = %2163
-  %2180 = fcmp fast ogt float %937, %22
-  br i1 %2180, label %2181, label %2194
-
-; <label>:2181                                    ; preds = %2179
-  %2182 = fsub fast float %937, %22
-  %2183 = fdiv fast float %2182, %2164
-  %2184 = fptoui float %2183 to i32
-  %2185 = uitofp i32 %2184 to float
-  %2186 = fmul fast float %2185, %2164
-  %2187 = fsub fast float %2182, %2186
-  %2188 = and i32 %2184, 1
-  %2189 = icmp eq i32 %2188, 0
-  br i1 %2189, label %2190, label %2192
-
-; <label>:2190                                    ; preds = %2181
-  %2191 = fsub fast float %22, %2187
-  br label %2194
-
-; <label>:2192                                    ; preds = %2181
-  %2193 = fadd fast float %2187, %20
-  br label %2194
-
-; <label>:2194                                    ; preds = %2192, %2190, %2179, %2177, %2175
-  %2195 = phi float [ %2176, %2175 ], [ %2178, %2177 ], [ %2191, %2190 ], [ %2193, %2192 ], [ %937, %2179 ]
-  %2196 = fptoui float %2195 to i32
-  %2197 = fsub fast float %24, %20
-  %2198 = fcmp fast olt float %2104, %20
-  br i1 %2198, label %2199, label %2212
-
-; <label>:2199                                    ; preds = %2194
-  %2200 = fsub fast float %20, %2104
-  %2201 = fdiv fast float %2200, %2197
-  %2202 = fptoui float %2201 to i32
-  %2203 = uitofp i32 %2202 to float
-  %2204 = fmul fast float %2203, %2197
-  %2205 = fsub fast float %2200, %2204
-  %2206 = and i32 %2202, 1
-  %2207 = icmp eq i32 %2206, 0
-  br i1 %2207, label %2208, label %2210
-
-; <label>:2208                                    ; preds = %2199
-  %2209 = fadd fast float %2205, %20
-  br label %2227
-
-; <label>:2210                                    ; preds = %2199
-  %2211 = fsub fast float %24, %2205
-  br label %2227
-
-; <label>:2212                                    ; preds = %2194
-  %2213 = fcmp fast ogt float %2104, %24
-  br i1 %2213, label %2214, label %2227
-
-; <label>:2214                                    ; preds = %2212
-  %2215 = fsub fast float %2104, %24
-  %2216 = fdiv fast float %2215, %2197
-  %2217 = fptoui float %2216 to i32
-  %2218 = uitofp i32 %2217 to float
-  %2219 = fmul fast float %2218, %2197
-  %2220 = fsub fast float %2215, %2219
-  %2221 = and i32 %2217, 1
-  %2222 = icmp eq i32 %2221, 0
-  br i1 %2222, label %2223, label %2225
-
-; <label>:2223                                    ; preds = %2214
-  %2224 = fsub fast float %24, %2220
-  br label %2227
-
-; <label>:2225                                    ; preds = %2214
-  %2226 = fadd fast float %2220, %20
-  br label %2227
-
-; <label>:2227                                    ; preds = %2225, %2223, %2212, %2210, %2208
-  %2228 = phi float [ %2209, %2208 ], [ %2211, %2210 ], [ %2224, %2223 ], [ %2226, %2225 ], [ %2104, %2212 ]
-  %2229 = fptoui float %2228 to i32
-  %2230 = uitofp i32 %2229 to float
-  %2231 = uitofp i32 %2196 to float
-  %2232 = fptoui float %45 to i32
-  %2233 = fptoui float %182 to i32
-  %2234 = fptoui float %2230 to i32
-  %2235 = fptoui float %2231 to i32
-  %2236 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2237 = extractvalue %dx.types.CBufRet.i32 %2236, 0
-  %2238 = extractvalue %dx.types.CBufRet.i32 %2236, 1
-  %2239 = extractvalue %dx.types.CBufRet.i32 %2236, 2
-  %2240 = extractvalue %dx.types.CBufRet.i32 %2236, 3
-  %2241 = mul i32 %2237, %2232
-  %2242 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2233, i32 %2238, i32 %2241)  ; IMad(a,b,c)
-  %2243 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2234, i32 %2239, i32 %2242)  ; IMad(a,b,c)
-  %2244 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2235, i32 %2240, i32 %2243)  ; IMad(a,b,c)
-  %2245 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2244, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2246 = extractvalue %dx.types.ResRet.i32 %2245, 0
-  %2247 = sitofp i32 %2246 to float
-  br label %2248
-
-; <label>:2248                                    ; preds = %2227, %2161, %2132, %2115, %2105
-  %2249 = phi float [ %2129, %2115 ], [ 0.000000e+00, %2105 ], [ %2160, %2132 ], [ %2247, %2227 ], [ 0.000000e+00, %2161 ]
-  br i1 %941, label %2250, label %2275
-
-; <label>:2250                                    ; preds = %2248
-  %2251 = fcmp fast oge float %936, 0.000000e+00
-  %2252 = fptoui float %936 to i32
-  %2253 = icmp ult i32 %2252, %13
-  %2254 = and i1 %2251, %2253
-  %2255 = fcmp fast oge float %2104, 0.000000e+00
-  %2256 = and i1 %2255, %2254
-  %2257 = fptoui float %2104 to i32
-  %2258 = icmp ult i32 %2257, %15
-  %2259 = and i1 %2258, %2256
-  br i1 %2259, label %2260, label %2393
-
-; <label>:2260                                    ; preds = %2250
-  %2261 = fptoui float %45 to i32
-  %2262 = fptoui float %182 to i32
-  %2263 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2264 = extractvalue %dx.types.CBufRet.i32 %2263, 0
-  %2265 = extractvalue %dx.types.CBufRet.i32 %2263, 1
-  %2266 = extractvalue %dx.types.CBufRet.i32 %2263, 2
-  %2267 = extractvalue %dx.types.CBufRet.i32 %2263, 3
-  %2268 = mul i32 %2264, %2261
-  %2269 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2262, i32 %2265, i32 %2268)  ; IMad(a,b,c)
-  %2270 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2257, i32 %2266, i32 %2269)  ; IMad(a,b,c)
-  %2271 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2252, i32 %2267, i32 %2270)  ; IMad(a,b,c)
-  %2272 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2271, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2273 = extractvalue %dx.types.ResRet.i32 %2272, 0
-  %2274 = sitofp i32 %2273 to float
-  br label %2393
-
-; <label>:2275                                    ; preds = %2248
-  %2276 = icmp eq i32 %940, 1
-  br i1 %2276, label %2277, label %2306
-
-; <label>:2277                                    ; preds = %2275
-  %2278 = add i32 %13, -1
-  %2279 = uitofp i32 %2278 to float
-  %2280 = call float @dx.op.binary.f32(i32 35, float %936, float 0.000000e+00)  ; FMax(a,b)
-  %2281 = call float @dx.op.binary.f32(i32 36, float %2280, float %2279)  ; FMin(a,b)
-  %2282 = fptoui float %2281 to i32
-  %2283 = add i32 %15, -1
-  %2284 = uitofp i32 %2283 to float
-  %2285 = call float @dx.op.binary.f32(i32 35, float %2104, float 0.000000e+00)  ; FMax(a,b)
-  %2286 = call float @dx.op.binary.f32(i32 36, float %2285, float %2284)  ; FMin(a,b)
-  %2287 = fptoui float %2286 to i32
-  %2288 = uitofp i32 %2287 to float
-  %2289 = uitofp i32 %2282 to float
-  %2290 = fptoui float %45 to i32
-  %2291 = fptoui float %182 to i32
-  %2292 = fptoui float %2288 to i32
-  %2293 = fptoui float %2289 to i32
-  %2294 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2295 = extractvalue %dx.types.CBufRet.i32 %2294, 0
-  %2296 = extractvalue %dx.types.CBufRet.i32 %2294, 1
-  %2297 = extractvalue %dx.types.CBufRet.i32 %2294, 2
-  %2298 = extractvalue %dx.types.CBufRet.i32 %2294, 3
-  %2299 = mul i32 %2295, %2290
-  %2300 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2291, i32 %2296, i32 %2299)  ; IMad(a,b,c)
-  %2301 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2292, i32 %2297, i32 %2300)  ; IMad(a,b,c)
-  %2302 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2293, i32 %2298, i32 %2301)  ; IMad(a,b,c)
-  %2303 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2302, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2304 = extractvalue %dx.types.ResRet.i32 %2303, 0
-  %2305 = sitofp i32 %2304 to float
-  br label %2393
-
-; <label>:2306                                    ; preds = %2275
-  %2307 = icmp eq i32 %940, 2
-  br i1 %2307, label %2308, label %2393
-
-; <label>:2308                                    ; preds = %2306
-  %2309 = fsub fast float %22, %20
-  %2310 = fcmp fast olt float %936, %20
-  br i1 %2310, label %2311, label %2324
-
-; <label>:2311                                    ; preds = %2308
-  %2312 = fsub fast float %20, %936
-  %2313 = fdiv fast float %2312, %2309
-  %2314 = fptoui float %2313 to i32
-  %2315 = uitofp i32 %2314 to float
-  %2316 = fmul fast float %2315, %2309
-  %2317 = fsub fast float %2312, %2316
-  %2318 = and i32 %2314, 1
-  %2319 = icmp eq i32 %2318, 0
-  br i1 %2319, label %2320, label %2322
-
-; <label>:2320                                    ; preds = %2311
-  %2321 = fadd fast float %2317, %20
-  br label %2339
-
-; <label>:2322                                    ; preds = %2311
-  %2323 = fsub fast float %22, %2317
-  br label %2339
-
-; <label>:2324                                    ; preds = %2308
-  %2325 = fcmp fast ogt float %936, %22
-  br i1 %2325, label %2326, label %2339
-
-; <label>:2326                                    ; preds = %2324
-  %2327 = fsub fast float %936, %22
-  %2328 = fdiv fast float %2327, %2309
-  %2329 = fptoui float %2328 to i32
-  %2330 = uitofp i32 %2329 to float
-  %2331 = fmul fast float %2330, %2309
-  %2332 = fsub fast float %2327, %2331
-  %2333 = and i32 %2329, 1
-  %2334 = icmp eq i32 %2333, 0
-  br i1 %2334, label %2335, label %2337
-
-; <label>:2335                                    ; preds = %2326
-  %2336 = fsub fast float %22, %2332
-  br label %2339
-
-; <label>:2337                                    ; preds = %2326
-  %2338 = fadd fast float %2332, %20
-  br label %2339
-
-; <label>:2339                                    ; preds = %2337, %2335, %2324, %2322, %2320
-  %2340 = phi float [ %2321, %2320 ], [ %2323, %2322 ], [ %2336, %2335 ], [ %2338, %2337 ], [ %936, %2324 ]
-  %2341 = fptoui float %2340 to i32
-  %2342 = fsub fast float %24, %20
-  %2343 = fcmp fast olt float %2104, %20
-  br i1 %2343, label %2344, label %2357
-
-; <label>:2344                                    ; preds = %2339
-  %2345 = fsub fast float %20, %2104
-  %2346 = fdiv fast float %2345, %2342
-  %2347 = fptoui float %2346 to i32
-  %2348 = uitofp i32 %2347 to float
-  %2349 = fmul fast float %2348, %2342
-  %2350 = fsub fast float %2345, %2349
-  %2351 = and i32 %2347, 1
-  %2352 = icmp eq i32 %2351, 0
-  br i1 %2352, label %2353, label %2355
-
-; <label>:2353                                    ; preds = %2344
-  %2354 = fadd fast float %2350, %20
-  br label %2372
-
-; <label>:2355                                    ; preds = %2344
-  %2356 = fsub fast float %24, %2350
-  br label %2372
-
-; <label>:2357                                    ; preds = %2339
-  %2358 = fcmp fast ogt float %2104, %24
-  br i1 %2358, label %2359, label %2372
-
-; <label>:2359                                    ; preds = %2357
-  %2360 = fsub fast float %2104, %24
-  %2361 = fdiv fast float %2360, %2342
-  %2362 = fptoui float %2361 to i32
-  %2363 = uitofp i32 %2362 to float
-  %2364 = fmul fast float %2363, %2342
-  %2365 = fsub fast float %2360, %2364
-  %2366 = and i32 %2362, 1
-  %2367 = icmp eq i32 %2366, 0
-  br i1 %2367, label %2368, label %2370
-
-; <label>:2368                                    ; preds = %2359
-  %2369 = fsub fast float %24, %2365
-  br label %2372
-
-; <label>:2370                                    ; preds = %2359
-  %2371 = fadd fast float %2365, %20
-  br label %2372
-
-; <label>:2372                                    ; preds = %2370, %2368, %2357, %2355, %2353
-  %2373 = phi float [ %2354, %2353 ], [ %2356, %2355 ], [ %2369, %2368 ], [ %2371, %2370 ], [ %2104, %2357 ]
-  %2374 = fptoui float %2373 to i32
-  %2375 = uitofp i32 %2374 to float
-  %2376 = uitofp i32 %2341 to float
-  %2377 = fptoui float %45 to i32
-  %2378 = fptoui float %182 to i32
-  %2379 = fptoui float %2375 to i32
-  %2380 = fptoui float %2376 to i32
-  %2381 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2382 = extractvalue %dx.types.CBufRet.i32 %2381, 0
-  %2383 = extractvalue %dx.types.CBufRet.i32 %2381, 1
-  %2384 = extractvalue %dx.types.CBufRet.i32 %2381, 2
-  %2385 = extractvalue %dx.types.CBufRet.i32 %2381, 3
-  %2386 = mul i32 %2382, %2377
-  %2387 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2378, i32 %2383, i32 %2386)  ; IMad(a,b,c)
-  %2388 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2379, i32 %2384, i32 %2387)  ; IMad(a,b,c)
-  %2389 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2380, i32 %2385, i32 %2388)  ; IMad(a,b,c)
-  %2390 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2389, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2391 = extractvalue %dx.types.ResRet.i32 %2390, 0
-  %2392 = sitofp i32 %2391 to float
-  br label %2393
-
-; <label>:2393                                    ; preds = %2372, %2306, %2277, %2260, %2250
-  %2394 = phi float [ %2274, %2260 ], [ 0.000000e+00, %2250 ], [ %2305, %2277 ], [ %2392, %2372 ], [ 0.000000e+00, %2306 ]
-  br i1 %941, label %2395, label %2420
-
-; <label>:2395                                    ; preds = %2393
-  %2396 = fcmp fast oge float %1232, 0.000000e+00
-  %2397 = fptoui float %1232 to i32
-  %2398 = icmp ult i32 %2397, %13
-  %2399 = and i1 %2396, %2398
-  %2400 = fcmp fast oge float %2104, 0.000000e+00
-  %2401 = and i1 %2400, %2399
-  %2402 = fptoui float %2104 to i32
-  %2403 = icmp ult i32 %2402, %15
-  %2404 = and i1 %2403, %2401
-  br i1 %2404, label %2405, label %2538
-
-; <label>:2405                                    ; preds = %2395
-  %2406 = fptoui float %45 to i32
-  %2407 = fptoui float %182 to i32
-  %2408 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2409 = extractvalue %dx.types.CBufRet.i32 %2408, 0
-  %2410 = extractvalue %dx.types.CBufRet.i32 %2408, 1
-  %2411 = extractvalue %dx.types.CBufRet.i32 %2408, 2
-  %2412 = extractvalue %dx.types.CBufRet.i32 %2408, 3
-  %2413 = mul i32 %2409, %2406
-  %2414 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2407, i32 %2410, i32 %2413)  ; IMad(a,b,c)
-  %2415 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2402, i32 %2411, i32 %2414)  ; IMad(a,b,c)
-  %2416 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2397, i32 %2412, i32 %2415)  ; IMad(a,b,c)
-  %2417 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2416, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2418 = extractvalue %dx.types.ResRet.i32 %2417, 0
-  %2419 = sitofp i32 %2418 to float
-  br label %2538
-
-; <label>:2420                                    ; preds = %2393
-  %2421 = icmp eq i32 %940, 1
-  br i1 %2421, label %2422, label %2451
-
-; <label>:2422                                    ; preds = %2420
-  %2423 = add i32 %13, -1
-  %2424 = uitofp i32 %2423 to float
-  %2425 = call float @dx.op.binary.f32(i32 35, float %1232, float 0.000000e+00)  ; FMax(a,b)
-  %2426 = call float @dx.op.binary.f32(i32 36, float %2425, float %2424)  ; FMin(a,b)
-  %2427 = fptoui float %2426 to i32
-  %2428 = add i32 %15, -1
-  %2429 = uitofp i32 %2428 to float
-  %2430 = call float @dx.op.binary.f32(i32 35, float %2104, float 0.000000e+00)  ; FMax(a,b)
-  %2431 = call float @dx.op.binary.f32(i32 36, float %2430, float %2429)  ; FMin(a,b)
-  %2432 = fptoui float %2431 to i32
-  %2433 = uitofp i32 %2432 to float
-  %2434 = uitofp i32 %2427 to float
-  %2435 = fptoui float %45 to i32
-  %2436 = fptoui float %182 to i32
-  %2437 = fptoui float %2433 to i32
-  %2438 = fptoui float %2434 to i32
-  %2439 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2440 = extractvalue %dx.types.CBufRet.i32 %2439, 0
-  %2441 = extractvalue %dx.types.CBufRet.i32 %2439, 1
-  %2442 = extractvalue %dx.types.CBufRet.i32 %2439, 2
-  %2443 = extractvalue %dx.types.CBufRet.i32 %2439, 3
-  %2444 = mul i32 %2440, %2435
-  %2445 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2436, i32 %2441, i32 %2444)  ; IMad(a,b,c)
-  %2446 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2437, i32 %2442, i32 %2445)  ; IMad(a,b,c)
-  %2447 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2438, i32 %2443, i32 %2446)  ; IMad(a,b,c)
-  %2448 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2447, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2449 = extractvalue %dx.types.ResRet.i32 %2448, 0
-  %2450 = sitofp i32 %2449 to float
-  br label %2538
-
-; <label>:2451                                    ; preds = %2420
-  %2452 = icmp eq i32 %940, 2
-  br i1 %2452, label %2453, label %2538
-
-; <label>:2453                                    ; preds = %2451
-  %2454 = fsub fast float %22, %20
-  %2455 = fcmp fast olt float %1232, %20
-  br i1 %2455, label %2456, label %2469
-
-; <label>:2456                                    ; preds = %2453
-  %2457 = fsub fast float %20, %1232
-  %2458 = fdiv fast float %2457, %2454
-  %2459 = fptoui float %2458 to i32
-  %2460 = uitofp i32 %2459 to float
-  %2461 = fmul fast float %2460, %2454
-  %2462 = fsub fast float %2457, %2461
-  %2463 = and i32 %2459, 1
-  %2464 = icmp eq i32 %2463, 0
-  br i1 %2464, label %2465, label %2467
-
-; <label>:2465                                    ; preds = %2456
-  %2466 = fadd fast float %2462, %20
-  br label %2484
-
-; <label>:2467                                    ; preds = %2456
-  %2468 = fsub fast float %22, %2462
-  br label %2484
-
-; <label>:2469                                    ; preds = %2453
-  %2470 = fcmp fast ogt float %1232, %22
-  br i1 %2470, label %2471, label %2484
-
-; <label>:2471                                    ; preds = %2469
-  %2472 = fsub fast float %1232, %22
-  %2473 = fdiv fast float %2472, %2454
-  %2474 = fptoui float %2473 to i32
-  %2475 = uitofp i32 %2474 to float
-  %2476 = fmul fast float %2475, %2454
-  %2477 = fsub fast float %2472, %2476
-  %2478 = and i32 %2474, 1
-  %2479 = icmp eq i32 %2478, 0
-  br i1 %2479, label %2480, label %2482
-
-; <label>:2480                                    ; preds = %2471
-  %2481 = fsub fast float %22, %2477
-  br label %2484
-
-; <label>:2482                                    ; preds = %2471
-  %2483 = fadd fast float %2477, %20
-  br label %2484
-
-; <label>:2484                                    ; preds = %2482, %2480, %2469, %2467, %2465
-  %2485 = phi float [ %2466, %2465 ], [ %2468, %2467 ], [ %2481, %2480 ], [ %2483, %2482 ], [ %1232, %2469 ]
-  %2486 = fptoui float %2485 to i32
-  %2487 = fsub fast float %24, %20
-  %2488 = fcmp fast olt float %2104, %20
-  br i1 %2488, label %2489, label %2502
-
-; <label>:2489                                    ; preds = %2484
-  %2490 = fsub fast float %20, %2104
-  %2491 = fdiv fast float %2490, %2487
-  %2492 = fptoui float %2491 to i32
-  %2493 = uitofp i32 %2492 to float
-  %2494 = fmul fast float %2493, %2487
-  %2495 = fsub fast float %2490, %2494
-  %2496 = and i32 %2492, 1
-  %2497 = icmp eq i32 %2496, 0
-  br i1 %2497, label %2498, label %2500
-
-; <label>:2498                                    ; preds = %2489
-  %2499 = fadd fast float %2495, %20
-  br label %2517
-
-; <label>:2500                                    ; preds = %2489
-  %2501 = fsub fast float %24, %2495
-  br label %2517
-
-; <label>:2502                                    ; preds = %2484
-  %2503 = fcmp fast ogt float %2104, %24
-  br i1 %2503, label %2504, label %2517
-
-; <label>:2504                                    ; preds = %2502
-  %2505 = fsub fast float %2104, %24
-  %2506 = fdiv fast float %2505, %2487
-  %2507 = fptoui float %2506 to i32
-  %2508 = uitofp i32 %2507 to float
-  %2509 = fmul fast float %2508, %2487
-  %2510 = fsub fast float %2505, %2509
-  %2511 = and i32 %2507, 1
-  %2512 = icmp eq i32 %2511, 0
-  br i1 %2512, label %2513, label %2515
-
-; <label>:2513                                    ; preds = %2504
-  %2514 = fsub fast float %24, %2510
-  br label %2517
-
-; <label>:2515                                    ; preds = %2504
-  %2516 = fadd fast float %2510, %20
-  br label %2517
-
-; <label>:2517                                    ; preds = %2515, %2513, %2502, %2500, %2498
-  %2518 = phi float [ %2499, %2498 ], [ %2501, %2500 ], [ %2514, %2513 ], [ %2516, %2515 ], [ %2104, %2502 ]
-  %2519 = fptoui float %2518 to i32
-  %2520 = uitofp i32 %2519 to float
-  %2521 = uitofp i32 %2486 to float
-  %2522 = fptoui float %45 to i32
-  %2523 = fptoui float %182 to i32
-  %2524 = fptoui float %2520 to i32
-  %2525 = fptoui float %2521 to i32
-  %2526 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2527 = extractvalue %dx.types.CBufRet.i32 %2526, 0
-  %2528 = extractvalue %dx.types.CBufRet.i32 %2526, 1
-  %2529 = extractvalue %dx.types.CBufRet.i32 %2526, 2
-  %2530 = extractvalue %dx.types.CBufRet.i32 %2526, 3
-  %2531 = mul i32 %2527, %2522
-  %2532 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2523, i32 %2528, i32 %2531)  ; IMad(a,b,c)
-  %2533 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2524, i32 %2529, i32 %2532)  ; IMad(a,b,c)
-  %2534 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2525, i32 %2530, i32 %2533)  ; IMad(a,b,c)
-  %2535 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2534, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2536 = extractvalue %dx.types.ResRet.i32 %2535, 0
-  %2537 = sitofp i32 %2536 to float
-  br label %2538
-
-; <label>:2538                                    ; preds = %2517, %2451, %2422, %2405, %2395
-  %2539 = phi float [ %2419, %2405 ], [ 0.000000e+00, %2395 ], [ %2450, %2422 ], [ %2537, %2517 ], [ 0.000000e+00, %2451 ]
-  br i1 %941, label %2540, label %2565
-
-; <label>:2540                                    ; preds = %2538
-  %2541 = fcmp fast oge float %1378, 0.000000e+00
-  %2542 = fptoui float %1378 to i32
-  %2543 = icmp ult i32 %2542, %13
-  %2544 = and i1 %2541, %2543
-  %2545 = fcmp fast oge float %2104, 0.000000e+00
-  %2546 = and i1 %2545, %2544
-  %2547 = fptoui float %2104 to i32
-  %2548 = icmp ult i32 %2547, %15
-  %2549 = and i1 %2548, %2546
-  br i1 %2549, label %2550, label %2683
-
-; <label>:2550                                    ; preds = %2540
-  %2551 = fptoui float %45 to i32
-  %2552 = fptoui float %182 to i32
-  %2553 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2554 = extractvalue %dx.types.CBufRet.i32 %2553, 0
-  %2555 = extractvalue %dx.types.CBufRet.i32 %2553, 1
-  %2556 = extractvalue %dx.types.CBufRet.i32 %2553, 2
-  %2557 = extractvalue %dx.types.CBufRet.i32 %2553, 3
-  %2558 = mul i32 %2554, %2551
-  %2559 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2552, i32 %2555, i32 %2558)  ; IMad(a,b,c)
-  %2560 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2547, i32 %2556, i32 %2559)  ; IMad(a,b,c)
-  %2561 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2542, i32 %2557, i32 %2560)  ; IMad(a,b,c)
-  %2562 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2561, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2563 = extractvalue %dx.types.ResRet.i32 %2562, 0
-  %2564 = sitofp i32 %2563 to float
-  br label %2683
-
-; <label>:2565                                    ; preds = %2538
-  %2566 = icmp eq i32 %940, 1
-  br i1 %2566, label %2567, label %2596
-
-; <label>:2567                                    ; preds = %2565
-  %2568 = add i32 %13, -1
-  %2569 = uitofp i32 %2568 to float
-  %2570 = call float @dx.op.binary.f32(i32 35, float %1378, float 0.000000e+00)  ; FMax(a,b)
-  %2571 = call float @dx.op.binary.f32(i32 36, float %2570, float %2569)  ; FMin(a,b)
-  %2572 = fptoui float %2571 to i32
-  %2573 = add i32 %15, -1
-  %2574 = uitofp i32 %2573 to float
-  %2575 = call float @dx.op.binary.f32(i32 35, float %2104, float 0.000000e+00)  ; FMax(a,b)
-  %2576 = call float @dx.op.binary.f32(i32 36, float %2575, float %2574)  ; FMin(a,b)
-  %2577 = fptoui float %2576 to i32
-  %2578 = uitofp i32 %2577 to float
-  %2579 = uitofp i32 %2572 to float
-  %2580 = fptoui float %45 to i32
-  %2581 = fptoui float %182 to i32
-  %2582 = fptoui float %2578 to i32
-  %2583 = fptoui float %2579 to i32
-  %2584 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2585 = extractvalue %dx.types.CBufRet.i32 %2584, 0
-  %2586 = extractvalue %dx.types.CBufRet.i32 %2584, 1
-  %2587 = extractvalue %dx.types.CBufRet.i32 %2584, 2
-  %2588 = extractvalue %dx.types.CBufRet.i32 %2584, 3
-  %2589 = mul i32 %2585, %2580
-  %2590 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2581, i32 %2586, i32 %2589)  ; IMad(a,b,c)
-  %2591 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2582, i32 %2587, i32 %2590)  ; IMad(a,b,c)
-  %2592 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2583, i32 %2588, i32 %2591)  ; IMad(a,b,c)
-  %2593 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2592, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2594 = extractvalue %dx.types.ResRet.i32 %2593, 0
-  %2595 = sitofp i32 %2594 to float
-  br label %2683
-
-; <label>:2596                                    ; preds = %2565
-  %2597 = icmp eq i32 %940, 2
-  br i1 %2597, label %2598, label %2683
-
-; <label>:2598                                    ; preds = %2596
-  %2599 = fsub fast float %22, %20
-  %2600 = fcmp fast olt float %1378, %20
-  br i1 %2600, label %2601, label %2614
-
-; <label>:2601                                    ; preds = %2598
-  %2602 = fsub fast float %20, %1378
-  %2603 = fdiv fast float %2602, %2599
-  %2604 = fptoui float %2603 to i32
-  %2605 = uitofp i32 %2604 to float
-  %2606 = fmul fast float %2605, %2599
-  %2607 = fsub fast float %2602, %2606
-  %2608 = and i32 %2604, 1
-  %2609 = icmp eq i32 %2608, 0
-  br i1 %2609, label %2610, label %2612
-
-; <label>:2610                                    ; preds = %2601
-  %2611 = fadd fast float %2607, %20
-  br label %2629
-
-; <label>:2612                                    ; preds = %2601
-  %2613 = fsub fast float %22, %2607
-  br label %2629
-
-; <label>:2614                                    ; preds = %2598
-  %2615 = fcmp fast ogt float %1378, %22
-  br i1 %2615, label %2616, label %2629
-
-; <label>:2616                                    ; preds = %2614
-  %2617 = fsub fast float %1378, %22
-  %2618 = fdiv fast float %2617, %2599
-  %2619 = fptoui float %2618 to i32
-  %2620 = uitofp i32 %2619 to float
-  %2621 = fmul fast float %2620, %2599
-  %2622 = fsub fast float %2617, %2621
-  %2623 = and i32 %2619, 1
-  %2624 = icmp eq i32 %2623, 0
-  br i1 %2624, label %2625, label %2627
-
-; <label>:2625                                    ; preds = %2616
-  %2626 = fsub fast float %22, %2622
-  br label %2629
-
-; <label>:2627                                    ; preds = %2616
-  %2628 = fadd fast float %2622, %20
-  br label %2629
-
-; <label>:2629                                    ; preds = %2627, %2625, %2614, %2612, %2610
-  %2630 = phi float [ %2611, %2610 ], [ %2613, %2612 ], [ %2626, %2625 ], [ %2628, %2627 ], [ %1378, %2614 ]
-  %2631 = fptoui float %2630 to i32
-  %2632 = fsub fast float %24, %20
-  %2633 = fcmp fast olt float %2104, %20
-  br i1 %2633, label %2634, label %2647
-
-; <label>:2634                                    ; preds = %2629
-  %2635 = fsub fast float %20, %2104
-  %2636 = fdiv fast float %2635, %2632
-  %2637 = fptoui float %2636 to i32
-  %2638 = uitofp i32 %2637 to float
-  %2639 = fmul fast float %2638, %2632
-  %2640 = fsub fast float %2635, %2639
-  %2641 = and i32 %2637, 1
-  %2642 = icmp eq i32 %2641, 0
-  br i1 %2642, label %2643, label %2645
-
-; <label>:2643                                    ; preds = %2634
-  %2644 = fadd fast float %2640, %20
-  br label %2662
-
-; <label>:2645                                    ; preds = %2634
-  %2646 = fsub fast float %24, %2640
-  br label %2662
-
-; <label>:2647                                    ; preds = %2629
-  %2648 = fcmp fast ogt float %2104, %24
-  br i1 %2648, label %2649, label %2662
-
-; <label>:2649                                    ; preds = %2647
-  %2650 = fsub fast float %2104, %24
-  %2651 = fdiv fast float %2650, %2632
-  %2652 = fptoui float %2651 to i32
-  %2653 = uitofp i32 %2652 to float
-  %2654 = fmul fast float %2653, %2632
-  %2655 = fsub fast float %2650, %2654
-  %2656 = and i32 %2652, 1
-  %2657 = icmp eq i32 %2656, 0
-  br i1 %2657, label %2658, label %2660
-
-; <label>:2658                                    ; preds = %2649
-  %2659 = fsub fast float %24, %2655
-  br label %2662
-
-; <label>:2660                                    ; preds = %2649
-  %2661 = fadd fast float %2655, %20
-  br label %2662
-
-; <label>:2662                                    ; preds = %2660, %2658, %2647, %2645, %2643
-  %2663 = phi float [ %2644, %2643 ], [ %2646, %2645 ], [ %2659, %2658 ], [ %2661, %2660 ], [ %2104, %2647 ]
-  %2664 = fptoui float %2663 to i32
-  %2665 = uitofp i32 %2664 to float
-  %2666 = uitofp i32 %2631 to float
-  %2667 = fptoui float %45 to i32
-  %2668 = fptoui float %182 to i32
-  %2669 = fptoui float %2665 to i32
-  %2670 = fptoui float %2666 to i32
-  %2671 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2672 = extractvalue %dx.types.CBufRet.i32 %2671, 0
-  %2673 = extractvalue %dx.types.CBufRet.i32 %2671, 1
-  %2674 = extractvalue %dx.types.CBufRet.i32 %2671, 2
-  %2675 = extractvalue %dx.types.CBufRet.i32 %2671, 3
-  %2676 = mul i32 %2672, %2667
-  %2677 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2668, i32 %2673, i32 %2676)  ; IMad(a,b,c)
-  %2678 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2669, i32 %2674, i32 %2677)  ; IMad(a,b,c)
-  %2679 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2670, i32 %2675, i32 %2678)  ; IMad(a,b,c)
-  %2680 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2679, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2681 = extractvalue %dx.types.ResRet.i32 %2680, 0
-  %2682 = sitofp i32 %2681 to float
-  br label %2683
-
-; <label>:2683                                    ; preds = %2662, %2596, %2567, %2550, %2540
-  %2684 = phi float [ %2564, %2550 ], [ 0.000000e+00, %2540 ], [ %2595, %2567 ], [ %2682, %2662 ], [ 0.000000e+00, %2596 ]
-  %2685 = fadd fast float %938, 2.000000e+00
-  br i1 %941, label %2686, label %2711
-
-; <label>:2686                                    ; preds = %2683
-  %2687 = fcmp fast oge float %937, 0.000000e+00
-  %2688 = fptoui float %937 to i32
-  %2689 = icmp ult i32 %2688, %13
-  %2690 = and i1 %2687, %2689
-  %2691 = fcmp fast oge float %2685, 0.000000e+00
-  %2692 = and i1 %2691, %2690
-  %2693 = fptoui float %2685 to i32
-  %2694 = icmp ult i32 %2693, %15
-  %2695 = and i1 %2694, %2692
-  br i1 %2695, label %2696, label %2829
-
-; <label>:2696                                    ; preds = %2686
-  %2697 = fptoui float %45 to i32
-  %2698 = fptoui float %182 to i32
-  %2699 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2700 = extractvalue %dx.types.CBufRet.i32 %2699, 0
-  %2701 = extractvalue %dx.types.CBufRet.i32 %2699, 1
-  %2702 = extractvalue %dx.types.CBufRet.i32 %2699, 2
-  %2703 = extractvalue %dx.types.CBufRet.i32 %2699, 3
-  %2704 = mul i32 %2700, %2697
-  %2705 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2698, i32 %2701, i32 %2704)  ; IMad(a,b,c)
-  %2706 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2693, i32 %2702, i32 %2705)  ; IMad(a,b,c)
-  %2707 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2688, i32 %2703, i32 %2706)  ; IMad(a,b,c)
-  %2708 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2707, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2709 = extractvalue %dx.types.ResRet.i32 %2708, 0
-  %2710 = sitofp i32 %2709 to float
-  br label %2829
-
-; <label>:2711                                    ; preds = %2683
-  %2712 = icmp eq i32 %940, 1
-  br i1 %2712, label %2713, label %2742
-
-; <label>:2713                                    ; preds = %2711
-  %2714 = add i32 %13, -1
-  %2715 = uitofp i32 %2714 to float
-  %2716 = call float @dx.op.binary.f32(i32 35, float %937, float 0.000000e+00)  ; FMax(a,b)
-  %2717 = call float @dx.op.binary.f32(i32 36, float %2716, float %2715)  ; FMin(a,b)
-  %2718 = fptoui float %2717 to i32
-  %2719 = add i32 %15, -1
-  %2720 = uitofp i32 %2719 to float
-  %2721 = call float @dx.op.binary.f32(i32 35, float %2685, float 0.000000e+00)  ; FMax(a,b)
-  %2722 = call float @dx.op.binary.f32(i32 36, float %2721, float %2720)  ; FMin(a,b)
-  %2723 = fptoui float %2722 to i32
-  %2724 = uitofp i32 %2723 to float
-  %2725 = uitofp i32 %2718 to float
-  %2726 = fptoui float %45 to i32
-  %2727 = fptoui float %182 to i32
-  %2728 = fptoui float %2724 to i32
-  %2729 = fptoui float %2725 to i32
-  %2730 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2731 = extractvalue %dx.types.CBufRet.i32 %2730, 0
-  %2732 = extractvalue %dx.types.CBufRet.i32 %2730, 1
-  %2733 = extractvalue %dx.types.CBufRet.i32 %2730, 2
-  %2734 = extractvalue %dx.types.CBufRet.i32 %2730, 3
-  %2735 = mul i32 %2731, %2726
-  %2736 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2727, i32 %2732, i32 %2735)  ; IMad(a,b,c)
-  %2737 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2728, i32 %2733, i32 %2736)  ; IMad(a,b,c)
-  %2738 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2729, i32 %2734, i32 %2737)  ; IMad(a,b,c)
-  %2739 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2738, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2740 = extractvalue %dx.types.ResRet.i32 %2739, 0
-  %2741 = sitofp i32 %2740 to float
-  br label %2829
-
-; <label>:2742                                    ; preds = %2711
-  %2743 = icmp eq i32 %940, 2
-  br i1 %2743, label %2744, label %2829
-
-; <label>:2744                                    ; preds = %2742
-  %2745 = fsub fast float %22, %20
-  %2746 = fcmp fast olt float %937, %20
-  br i1 %2746, label %2747, label %2760
-
-; <label>:2747                                    ; preds = %2744
-  %2748 = fsub fast float %20, %937
-  %2749 = fdiv fast float %2748, %2745
-  %2750 = fptoui float %2749 to i32
-  %2751 = uitofp i32 %2750 to float
-  %2752 = fmul fast float %2751, %2745
-  %2753 = fsub fast float %2748, %2752
-  %2754 = and i32 %2750, 1
-  %2755 = icmp eq i32 %2754, 0
-  br i1 %2755, label %2756, label %2758
-
-; <label>:2756                                    ; preds = %2747
-  %2757 = fadd fast float %2753, %20
-  br label %2775
-
-; <label>:2758                                    ; preds = %2747
-  %2759 = fsub fast float %22, %2753
-  br label %2775
-
-; <label>:2760                                    ; preds = %2744
-  %2761 = fcmp fast ogt float %937, %22
-  br i1 %2761, label %2762, label %2775
-
-; <label>:2762                                    ; preds = %2760
-  %2763 = fsub fast float %937, %22
-  %2764 = fdiv fast float %2763, %2745
-  %2765 = fptoui float %2764 to i32
-  %2766 = uitofp i32 %2765 to float
-  %2767 = fmul fast float %2766, %2745
-  %2768 = fsub fast float %2763, %2767
-  %2769 = and i32 %2765, 1
-  %2770 = icmp eq i32 %2769, 0
-  br i1 %2770, label %2771, label %2773
-
-; <label>:2771                                    ; preds = %2762
-  %2772 = fsub fast float %22, %2768
-  br label %2775
-
-; <label>:2773                                    ; preds = %2762
-  %2774 = fadd fast float %2768, %20
-  br label %2775
-
-; <label>:2775                                    ; preds = %2773, %2771, %2760, %2758, %2756
-  %2776 = phi float [ %2757, %2756 ], [ %2759, %2758 ], [ %2772, %2771 ], [ %2774, %2773 ], [ %937, %2760 ]
-  %2777 = fptoui float %2776 to i32
-  %2778 = fsub fast float %24, %20
-  %2779 = fcmp fast olt float %2685, %20
-  br i1 %2779, label %2780, label %2793
-
-; <label>:2780                                    ; preds = %2775
-  %2781 = fsub fast float %20, %2685
-  %2782 = fdiv fast float %2781, %2778
-  %2783 = fptoui float %2782 to i32
-  %2784 = uitofp i32 %2783 to float
-  %2785 = fmul fast float %2784, %2778
-  %2786 = fsub fast float %2781, %2785
-  %2787 = and i32 %2783, 1
-  %2788 = icmp eq i32 %2787, 0
-  br i1 %2788, label %2789, label %2791
-
-; <label>:2789                                    ; preds = %2780
-  %2790 = fadd fast float %2786, %20
-  br label %2808
-
-; <label>:2791                                    ; preds = %2780
-  %2792 = fsub fast float %24, %2786
-  br label %2808
-
-; <label>:2793                                    ; preds = %2775
-  %2794 = fcmp fast ogt float %2685, %24
-  br i1 %2794, label %2795, label %2808
-
-; <label>:2795                                    ; preds = %2793
-  %2796 = fsub fast float %2685, %24
-  %2797 = fdiv fast float %2796, %2778
-  %2798 = fptoui float %2797 to i32
-  %2799 = uitofp i32 %2798 to float
-  %2800 = fmul fast float %2799, %2778
-  %2801 = fsub fast float %2796, %2800
-  %2802 = and i32 %2798, 1
-  %2803 = icmp eq i32 %2802, 0
-  br i1 %2803, label %2804, label %2806
-
-; <label>:2804                                    ; preds = %2795
-  %2805 = fsub fast float %24, %2801
-  br label %2808
-
-; <label>:2806                                    ; preds = %2795
-  %2807 = fadd fast float %2801, %20
-  br label %2808
-
-; <label>:2808                                    ; preds = %2806, %2804, %2793, %2791, %2789
-  %2809 = phi float [ %2790, %2789 ], [ %2792, %2791 ], [ %2805, %2804 ], [ %2807, %2806 ], [ %2685, %2793 ]
-  %2810 = fptoui float %2809 to i32
-  %2811 = uitofp i32 %2810 to float
-  %2812 = uitofp i32 %2777 to float
-  %2813 = fptoui float %45 to i32
-  %2814 = fptoui float %182 to i32
-  %2815 = fptoui float %2811 to i32
-  %2816 = fptoui float %2812 to i32
-  %2817 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2818 = extractvalue %dx.types.CBufRet.i32 %2817, 0
-  %2819 = extractvalue %dx.types.CBufRet.i32 %2817, 1
-  %2820 = extractvalue %dx.types.CBufRet.i32 %2817, 2
-  %2821 = extractvalue %dx.types.CBufRet.i32 %2817, 3
-  %2822 = mul i32 %2818, %2813
-  %2823 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2814, i32 %2819, i32 %2822)  ; IMad(a,b,c)
-  %2824 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2815, i32 %2820, i32 %2823)  ; IMad(a,b,c)
-  %2825 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2816, i32 %2821, i32 %2824)  ; IMad(a,b,c)
-  %2826 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2825, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2827 = extractvalue %dx.types.ResRet.i32 %2826, 0
-  %2828 = sitofp i32 %2827 to float
-  br label %2829
-
-; <label>:2829                                    ; preds = %2808, %2742, %2713, %2696, %2686
-  %2830 = phi float [ %2710, %2696 ], [ 0.000000e+00, %2686 ], [ %2741, %2713 ], [ %2828, %2808 ], [ 0.000000e+00, %2742 ]
-  br i1 %941, label %2831, label %2856
-
-; <label>:2831                                    ; preds = %2829
-  %2832 = fcmp fast oge float %936, 0.000000e+00
-  %2833 = fptoui float %936 to i32
-  %2834 = icmp ult i32 %2833, %13
-  %2835 = and i1 %2832, %2834
-  %2836 = fcmp fast oge float %2685, 0.000000e+00
-  %2837 = and i1 %2836, %2835
-  %2838 = fptoui float %2685 to i32
-  %2839 = icmp ult i32 %2838, %15
-  %2840 = and i1 %2839, %2837
-  br i1 %2840, label %2841, label %2974
-
-; <label>:2841                                    ; preds = %2831
-  %2842 = fptoui float %45 to i32
-  %2843 = fptoui float %182 to i32
-  %2844 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2845 = extractvalue %dx.types.CBufRet.i32 %2844, 0
-  %2846 = extractvalue %dx.types.CBufRet.i32 %2844, 1
-  %2847 = extractvalue %dx.types.CBufRet.i32 %2844, 2
-  %2848 = extractvalue %dx.types.CBufRet.i32 %2844, 3
-  %2849 = mul i32 %2845, %2842
-  %2850 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2843, i32 %2846, i32 %2849)  ; IMad(a,b,c)
-  %2851 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2838, i32 %2847, i32 %2850)  ; IMad(a,b,c)
-  %2852 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2833, i32 %2848, i32 %2851)  ; IMad(a,b,c)
-  %2853 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2852, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2854 = extractvalue %dx.types.ResRet.i32 %2853, 0
-  %2855 = sitofp i32 %2854 to float
-  br label %2974
-
-; <label>:2856                                    ; preds = %2829
-  %2857 = icmp eq i32 %940, 1
-  br i1 %2857, label %2858, label %2887
-
-; <label>:2858                                    ; preds = %2856
-  %2859 = add i32 %13, -1
-  %2860 = uitofp i32 %2859 to float
-  %2861 = call float @dx.op.binary.f32(i32 35, float %936, float 0.000000e+00)  ; FMax(a,b)
-  %2862 = call float @dx.op.binary.f32(i32 36, float %2861, float %2860)  ; FMin(a,b)
-  %2863 = fptoui float %2862 to i32
-  %2864 = add i32 %15, -1
-  %2865 = uitofp i32 %2864 to float
-  %2866 = call float @dx.op.binary.f32(i32 35, float %2685, float 0.000000e+00)  ; FMax(a,b)
-  %2867 = call float @dx.op.binary.f32(i32 36, float %2866, float %2865)  ; FMin(a,b)
-  %2868 = fptoui float %2867 to i32
-  %2869 = uitofp i32 %2868 to float
-  %2870 = uitofp i32 %2863 to float
-  %2871 = fptoui float %45 to i32
-  %2872 = fptoui float %182 to i32
-  %2873 = fptoui float %2869 to i32
-  %2874 = fptoui float %2870 to i32
-  %2875 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2876 = extractvalue %dx.types.CBufRet.i32 %2875, 0
-  %2877 = extractvalue %dx.types.CBufRet.i32 %2875, 1
-  %2878 = extractvalue %dx.types.CBufRet.i32 %2875, 2
-  %2879 = extractvalue %dx.types.CBufRet.i32 %2875, 3
-  %2880 = mul i32 %2876, %2871
-  %2881 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2872, i32 %2877, i32 %2880)  ; IMad(a,b,c)
-  %2882 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2873, i32 %2878, i32 %2881)  ; IMad(a,b,c)
-  %2883 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2874, i32 %2879, i32 %2882)  ; IMad(a,b,c)
-  %2884 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2883, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2885 = extractvalue %dx.types.ResRet.i32 %2884, 0
-  %2886 = sitofp i32 %2885 to float
-  br label %2974
-
-; <label>:2887                                    ; preds = %2856
-  %2888 = icmp eq i32 %940, 2
-  br i1 %2888, label %2889, label %2974
-
-; <label>:2889                                    ; preds = %2887
-  %2890 = fsub fast float %22, %20
-  %2891 = fcmp fast olt float %936, %20
-  br i1 %2891, label %2892, label %2905
-
-; <label>:2892                                    ; preds = %2889
-  %2893 = fsub fast float %20, %936
-  %2894 = fdiv fast float %2893, %2890
-  %2895 = fptoui float %2894 to i32
-  %2896 = uitofp i32 %2895 to float
-  %2897 = fmul fast float %2896, %2890
-  %2898 = fsub fast float %2893, %2897
-  %2899 = and i32 %2895, 1
-  %2900 = icmp eq i32 %2899, 0
-  br i1 %2900, label %2901, label %2903
-
-; <label>:2901                                    ; preds = %2892
-  %2902 = fadd fast float %2898, %20
-  br label %2920
-
-; <label>:2903                                    ; preds = %2892
-  %2904 = fsub fast float %22, %2898
-  br label %2920
-
-; <label>:2905                                    ; preds = %2889
-  %2906 = fcmp fast ogt float %936, %22
-  br i1 %2906, label %2907, label %2920
-
-; <label>:2907                                    ; preds = %2905
-  %2908 = fsub fast float %936, %22
-  %2909 = fdiv fast float %2908, %2890
-  %2910 = fptoui float %2909 to i32
-  %2911 = uitofp i32 %2910 to float
-  %2912 = fmul fast float %2911, %2890
-  %2913 = fsub fast float %2908, %2912
-  %2914 = and i32 %2910, 1
-  %2915 = icmp eq i32 %2914, 0
-  br i1 %2915, label %2916, label %2918
-
-; <label>:2916                                    ; preds = %2907
-  %2917 = fsub fast float %22, %2913
-  br label %2920
-
-; <label>:2918                                    ; preds = %2907
-  %2919 = fadd fast float %2913, %20
-  br label %2920
-
-; <label>:2920                                    ; preds = %2918, %2916, %2905, %2903, %2901
-  %2921 = phi float [ %2902, %2901 ], [ %2904, %2903 ], [ %2917, %2916 ], [ %2919, %2918 ], [ %936, %2905 ]
-  %2922 = fptoui float %2921 to i32
-  %2923 = fsub fast float %24, %20
-  %2924 = fcmp fast olt float %2685, %20
-  br i1 %2924, label %2925, label %2938
-
-; <label>:2925                                    ; preds = %2920
-  %2926 = fsub fast float %20, %2685
-  %2927 = fdiv fast float %2926, %2923
-  %2928 = fptoui float %2927 to i32
-  %2929 = uitofp i32 %2928 to float
-  %2930 = fmul fast float %2929, %2923
-  %2931 = fsub fast float %2926, %2930
-  %2932 = and i32 %2928, 1
-  %2933 = icmp eq i32 %2932, 0
-  br i1 %2933, label %2934, label %2936
-
-; <label>:2934                                    ; preds = %2925
-  %2935 = fadd fast float %2931, %20
-  br label %2953
-
-; <label>:2936                                    ; preds = %2925
-  %2937 = fsub fast float %24, %2931
-  br label %2953
-
-; <label>:2938                                    ; preds = %2920
-  %2939 = fcmp fast ogt float %2685, %24
-  br i1 %2939, label %2940, label %2953
-
-; <label>:2940                                    ; preds = %2938
-  %2941 = fsub fast float %2685, %24
-  %2942 = fdiv fast float %2941, %2923
-  %2943 = fptoui float %2942 to i32
-  %2944 = uitofp i32 %2943 to float
-  %2945 = fmul fast float %2944, %2923
-  %2946 = fsub fast float %2941, %2945
-  %2947 = and i32 %2943, 1
-  %2948 = icmp eq i32 %2947, 0
-  br i1 %2948, label %2949, label %2951
-
-; <label>:2949                                    ; preds = %2940
-  %2950 = fsub fast float %24, %2946
-  br label %2953
-
-; <label>:2951                                    ; preds = %2940
-  %2952 = fadd fast float %2946, %20
-  br label %2953
-
-; <label>:2953                                    ; preds = %2951, %2949, %2938, %2936, %2934
-  %2954 = phi float [ %2935, %2934 ], [ %2937, %2936 ], [ %2950, %2949 ], [ %2952, %2951 ], [ %2685, %2938 ]
-  %2955 = fptoui float %2954 to i32
-  %2956 = uitofp i32 %2955 to float
-  %2957 = uitofp i32 %2922 to float
-  %2958 = fptoui float %45 to i32
-  %2959 = fptoui float %182 to i32
-  %2960 = fptoui float %2956 to i32
-  %2961 = fptoui float %2957 to i32
-  %2962 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2963 = extractvalue %dx.types.CBufRet.i32 %2962, 0
-  %2964 = extractvalue %dx.types.CBufRet.i32 %2962, 1
-  %2965 = extractvalue %dx.types.CBufRet.i32 %2962, 2
-  %2966 = extractvalue %dx.types.CBufRet.i32 %2962, 3
-  %2967 = mul i32 %2963, %2958
-  %2968 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2959, i32 %2964, i32 %2967)  ; IMad(a,b,c)
-  %2969 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2960, i32 %2965, i32 %2968)  ; IMad(a,b,c)
-  %2970 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2961, i32 %2966, i32 %2969)  ; IMad(a,b,c)
-  %2971 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2970, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2972 = extractvalue %dx.types.ResRet.i32 %2971, 0
-  %2973 = sitofp i32 %2972 to float
-  br label %2974
-
-; <label>:2974                                    ; preds = %2953, %2887, %2858, %2841, %2831
-  %2975 = phi float [ %2855, %2841 ], [ 0.000000e+00, %2831 ], [ %2886, %2858 ], [ %2973, %2953 ], [ 0.000000e+00, %2887 ]
-  br i1 %941, label %2976, label %3001
-
-; <label>:2976                                    ; preds = %2974
-  %2977 = fcmp fast oge float %1232, 0.000000e+00
-  %2978 = fptoui float %1232 to i32
-  %2979 = icmp ult i32 %2978, %13
-  %2980 = and i1 %2977, %2979
-  %2981 = fcmp fast oge float %2685, 0.000000e+00
-  %2982 = and i1 %2981, %2980
-  %2983 = fptoui float %2685 to i32
-  %2984 = icmp ult i32 %2983, %15
-  %2985 = and i1 %2984, %2982
-  br i1 %2985, label %2986, label %3119
-
-; <label>:2986                                    ; preds = %2976
-  %2987 = fptoui float %45 to i32
-  %2988 = fptoui float %182 to i32
-  %2989 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2990 = extractvalue %dx.types.CBufRet.i32 %2989, 0
-  %2991 = extractvalue %dx.types.CBufRet.i32 %2989, 1
-  %2992 = extractvalue %dx.types.CBufRet.i32 %2989, 2
-  %2993 = extractvalue %dx.types.CBufRet.i32 %2989, 3
-  %2994 = mul i32 %2990, %2987
-  %2995 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2988, i32 %2991, i32 %2994)  ; IMad(a,b,c)
-  %2996 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2983, i32 %2992, i32 %2995)  ; IMad(a,b,c)
-  %2997 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2978, i32 %2993, i32 %2996)  ; IMad(a,b,c)
-  %2998 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2997, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2999 = extractvalue %dx.types.ResRet.i32 %2998, 0
-  %3000 = sitofp i32 %2999 to float
-  br label %3119
-
-; <label>:3001                                    ; preds = %2974
-  %3002 = icmp eq i32 %940, 1
-  br i1 %3002, label %3003, label %3032
-
-; <label>:3003                                    ; preds = %3001
-  %3004 = add i32 %13, -1
-  %3005 = uitofp i32 %3004 to float
-  %3006 = call float @dx.op.binary.f32(i32 35, float %1232, float 0.000000e+00)  ; FMax(a,b)
-  %3007 = call float @dx.op.binary.f32(i32 36, float %3006, float %3005)  ; FMin(a,b)
-  %3008 = fptoui float %3007 to i32
-  %3009 = add i32 %15, -1
-  %3010 = uitofp i32 %3009 to float
-  %3011 = call float @dx.op.binary.f32(i32 35, float %2685, float 0.000000e+00)  ; FMax(a,b)
-  %3012 = call float @dx.op.binary.f32(i32 36, float %3011, float %3010)  ; FMin(a,b)
-  %3013 = fptoui float %3012 to i32
-  %3014 = uitofp i32 %3013 to float
-  %3015 = uitofp i32 %3008 to float
-  %3016 = fptoui float %45 to i32
-  %3017 = fptoui float %182 to i32
-  %3018 = fptoui float %3014 to i32
-  %3019 = fptoui float %3015 to i32
-  %3020 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3021 = extractvalue %dx.types.CBufRet.i32 %3020, 0
-  %3022 = extractvalue %dx.types.CBufRet.i32 %3020, 1
-  %3023 = extractvalue %dx.types.CBufRet.i32 %3020, 2
-  %3024 = extractvalue %dx.types.CBufRet.i32 %3020, 3
-  %3025 = mul i32 %3021, %3016
-  %3026 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3017, i32 %3022, i32 %3025)  ; IMad(a,b,c)
-  %3027 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3018, i32 %3023, i32 %3026)  ; IMad(a,b,c)
-  %3028 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3019, i32 %3024, i32 %3027)  ; IMad(a,b,c)
-  %3029 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3028, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3030 = extractvalue %dx.types.ResRet.i32 %3029, 0
-  %3031 = sitofp i32 %3030 to float
-  br label %3119
-
-; <label>:3032                                    ; preds = %3001
-  %3033 = icmp eq i32 %940, 2
-  br i1 %3033, label %3034, label %3119
-
-; <label>:3034                                    ; preds = %3032
-  %3035 = fsub fast float %22, %20
-  %3036 = fcmp fast olt float %1232, %20
-  br i1 %3036, label %3037, label %3050
-
-; <label>:3037                                    ; preds = %3034
-  %3038 = fsub fast float %20, %1232
-  %3039 = fdiv fast float %3038, %3035
-  %3040 = fptoui float %3039 to i32
-  %3041 = uitofp i32 %3040 to float
-  %3042 = fmul fast float %3041, %3035
-  %3043 = fsub fast float %3038, %3042
-  %3044 = and i32 %3040, 1
-  %3045 = icmp eq i32 %3044, 0
-  br i1 %3045, label %3046, label %3048
-
-; <label>:3046                                    ; preds = %3037
-  %3047 = fadd fast float %3043, %20
-  br label %3065
-
-; <label>:3048                                    ; preds = %3037
-  %3049 = fsub fast float %22, %3043
-  br label %3065
-
-; <label>:3050                                    ; preds = %3034
-  %3051 = fcmp fast ogt float %1232, %22
-  br i1 %3051, label %3052, label %3065
-
-; <label>:3052                                    ; preds = %3050
-  %3053 = fsub fast float %1232, %22
-  %3054 = fdiv fast float %3053, %3035
-  %3055 = fptoui float %3054 to i32
-  %3056 = uitofp i32 %3055 to float
-  %3057 = fmul fast float %3056, %3035
-  %3058 = fsub fast float %3053, %3057
-  %3059 = and i32 %3055, 1
-  %3060 = icmp eq i32 %3059, 0
-  br i1 %3060, label %3061, label %3063
-
-; <label>:3061                                    ; preds = %3052
-  %3062 = fsub fast float %22, %3058
-  br label %3065
-
-; <label>:3063                                    ; preds = %3052
-  %3064 = fadd fast float %3058, %20
-  br label %3065
-
-; <label>:3065                                    ; preds = %3063, %3061, %3050, %3048, %3046
-  %3066 = phi float [ %3047, %3046 ], [ %3049, %3048 ], [ %3062, %3061 ], [ %3064, %3063 ], [ %1232, %3050 ]
-  %3067 = fptoui float %3066 to i32
-  %3068 = fsub fast float %24, %20
-  %3069 = fcmp fast olt float %2685, %20
-  br i1 %3069, label %3070, label %3083
-
-; <label>:3070                                    ; preds = %3065
-  %3071 = fsub fast float %20, %2685
-  %3072 = fdiv fast float %3071, %3068
-  %3073 = fptoui float %3072 to i32
-  %3074 = uitofp i32 %3073 to float
-  %3075 = fmul fast float %3074, %3068
-  %3076 = fsub fast float %3071, %3075
-  %3077 = and i32 %3073, 1
-  %3078 = icmp eq i32 %3077, 0
-  br i1 %3078, label %3079, label %3081
-
-; <label>:3079                                    ; preds = %3070
-  %3080 = fadd fast float %3076, %20
-  br label %3098
-
-; <label>:3081                                    ; preds = %3070
-  %3082 = fsub fast float %24, %3076
-  br label %3098
-
-; <label>:3083                                    ; preds = %3065
-  %3084 = fcmp fast ogt float %2685, %24
-  br i1 %3084, label %3085, label %3098
-
-; <label>:3085                                    ; preds = %3083
-  %3086 = fsub fast float %2685, %24
-  %3087 = fdiv fast float %3086, %3068
-  %3088 = fptoui float %3087 to i32
-  %3089 = uitofp i32 %3088 to float
-  %3090 = fmul fast float %3089, %3068
-  %3091 = fsub fast float %3086, %3090
-  %3092 = and i32 %3088, 1
-  %3093 = icmp eq i32 %3092, 0
-  br i1 %3093, label %3094, label %3096
-
-; <label>:3094                                    ; preds = %3085
-  %3095 = fsub fast float %24, %3091
-  br label %3098
-
-; <label>:3096                                    ; preds = %3085
-  %3097 = fadd fast float %3091, %20
-  br label %3098
-
-; <label>:3098                                    ; preds = %3096, %3094, %3083, %3081, %3079
-  %3099 = phi float [ %3080, %3079 ], [ %3082, %3081 ], [ %3095, %3094 ], [ %3097, %3096 ], [ %2685, %3083 ]
-  %3100 = fptoui float %3099 to i32
-  %3101 = uitofp i32 %3100 to float
-  %3102 = uitofp i32 %3067 to float
-  %3103 = fptoui float %45 to i32
-  %3104 = fptoui float %182 to i32
-  %3105 = fptoui float %3101 to i32
-  %3106 = fptoui float %3102 to i32
-  %3107 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3108 = extractvalue %dx.types.CBufRet.i32 %3107, 0
-  %3109 = extractvalue %dx.types.CBufRet.i32 %3107, 1
-  %3110 = extractvalue %dx.types.CBufRet.i32 %3107, 2
-  %3111 = extractvalue %dx.types.CBufRet.i32 %3107, 3
-  %3112 = mul i32 %3108, %3103
-  %3113 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3104, i32 %3109, i32 %3112)  ; IMad(a,b,c)
-  %3114 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3105, i32 %3110, i32 %3113)  ; IMad(a,b,c)
-  %3115 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3106, i32 %3111, i32 %3114)  ; IMad(a,b,c)
-  %3116 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3115, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3117 = extractvalue %dx.types.ResRet.i32 %3116, 0
-  %3118 = sitofp i32 %3117 to float
-  br label %3119
-
-; <label>:3119                                    ; preds = %3098, %3032, %3003, %2986, %2976
-  %3120 = phi float [ %3000, %2986 ], [ 0.000000e+00, %2976 ], [ %3031, %3003 ], [ %3118, %3098 ], [ 0.000000e+00, %3032 ]
-  br i1 %941, label %3121, label %3146
-
-; <label>:3121                                    ; preds = %3119
-  %3122 = fcmp fast oge float %1378, 0.000000e+00
-  %3123 = fptoui float %1378 to i32
-  %3124 = icmp ult i32 %3123, %13
-  %3125 = and i1 %3122, %3124
-  %3126 = fcmp fast oge float %2685, 0.000000e+00
-  %3127 = and i1 %3126, %3125
-  %3128 = fptoui float %2685 to i32
-  %3129 = icmp ult i32 %3128, %15
-  %3130 = and i1 %3129, %3127
-  br i1 %3130, label %3131, label %3264
-
-; <label>:3131                                    ; preds = %3121
-  %3132 = fptoui float %45 to i32
-  %3133 = fptoui float %182 to i32
-  %3134 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3135 = extractvalue %dx.types.CBufRet.i32 %3134, 0
-  %3136 = extractvalue %dx.types.CBufRet.i32 %3134, 1
-  %3137 = extractvalue %dx.types.CBufRet.i32 %3134, 2
-  %3138 = extractvalue %dx.types.CBufRet.i32 %3134, 3
-  %3139 = mul i32 %3135, %3132
-  %3140 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3133, i32 %3136, i32 %3139)  ; IMad(a,b,c)
-  %3141 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3128, i32 %3137, i32 %3140)  ; IMad(a,b,c)
-  %3142 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3123, i32 %3138, i32 %3141)  ; IMad(a,b,c)
-  %3143 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3142, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3144 = extractvalue %dx.types.ResRet.i32 %3143, 0
-  %3145 = sitofp i32 %3144 to float
-  br label %3264
-
-; <label>:3146                                    ; preds = %3119
-  %3147 = icmp eq i32 %940, 1
-  br i1 %3147, label %3148, label %3177
-
-; <label>:3148                                    ; preds = %3146
-  %3149 = add i32 %13, -1
-  %3150 = uitofp i32 %3149 to float
-  %3151 = call float @dx.op.binary.f32(i32 35, float %1378, float 0.000000e+00)  ; FMax(a,b)
-  %3152 = call float @dx.op.binary.f32(i32 36, float %3151, float %3150)  ; FMin(a,b)
-  %3153 = fptoui float %3152 to i32
-  %3154 = add i32 %15, -1
-  %3155 = uitofp i32 %3154 to float
-  %3156 = call float @dx.op.binary.f32(i32 35, float %2685, float 0.000000e+00)  ; FMax(a,b)
-  %3157 = call float @dx.op.binary.f32(i32 36, float %3156, float %3155)  ; FMin(a,b)
-  %3158 = fptoui float %3157 to i32
-  %3159 = uitofp i32 %3158 to float
-  %3160 = uitofp i32 %3153 to float
-  %3161 = fptoui float %45 to i32
-  %3162 = fptoui float %182 to i32
-  %3163 = fptoui float %3159 to i32
-  %3164 = fptoui float %3160 to i32
-  %3165 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3166 = extractvalue %dx.types.CBufRet.i32 %3165, 0
-  %3167 = extractvalue %dx.types.CBufRet.i32 %3165, 1
-  %3168 = extractvalue %dx.types.CBufRet.i32 %3165, 2
-  %3169 = extractvalue %dx.types.CBufRet.i32 %3165, 3
-  %3170 = mul i32 %3166, %3161
-  %3171 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3162, i32 %3167, i32 %3170)  ; IMad(a,b,c)
-  %3172 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3163, i32 %3168, i32 %3171)  ; IMad(a,b,c)
-  %3173 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3164, i32 %3169, i32 %3172)  ; IMad(a,b,c)
-  %3174 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3173, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3175 = extractvalue %dx.types.ResRet.i32 %3174, 0
-  %3176 = sitofp i32 %3175 to float
-  br label %3264
-
-; <label>:3177                                    ; preds = %3146
-  %3178 = icmp eq i32 %940, 2
-  br i1 %3178, label %3179, label %3264
-
-; <label>:3179                                    ; preds = %3177
-  %3180 = fsub fast float %22, %20
-  %3181 = fcmp fast olt float %1378, %20
-  br i1 %3181, label %3182, label %3195
-
-; <label>:3182                                    ; preds = %3179
-  %3183 = fsub fast float %20, %1378
-  %3184 = fdiv fast float %3183, %3180
-  %3185 = fptoui float %3184 to i32
-  %3186 = uitofp i32 %3185 to float
-  %3187 = fmul fast float %3186, %3180
-  %3188 = fsub fast float %3183, %3187
-  %3189 = and i32 %3185, 1
-  %3190 = icmp eq i32 %3189, 0
-  br i1 %3190, label %3191, label %3193
-
-; <label>:3191                                    ; preds = %3182
-  %3192 = fadd fast float %3188, %20
-  br label %3210
-
-; <label>:3193                                    ; preds = %3182
-  %3194 = fsub fast float %22, %3188
-  br label %3210
-
-; <label>:3195                                    ; preds = %3179
-  %3196 = fcmp fast ogt float %1378, %22
-  br i1 %3196, label %3197, label %3210
-
-; <label>:3197                                    ; preds = %3195
-  %3198 = fsub fast float %1378, %22
-  %3199 = fdiv fast float %3198, %3180
-  %3200 = fptoui float %3199 to i32
-  %3201 = uitofp i32 %3200 to float
-  %3202 = fmul fast float %3201, %3180
-  %3203 = fsub fast float %3198, %3202
-  %3204 = and i32 %3200, 1
-  %3205 = icmp eq i32 %3204, 0
-  br i1 %3205, label %3206, label %3208
-
-; <label>:3206                                    ; preds = %3197
-  %3207 = fsub fast float %22, %3203
-  br label %3210
-
-; <label>:3208                                    ; preds = %3197
-  %3209 = fadd fast float %3203, %20
-  br label %3210
-
-; <label>:3210                                    ; preds = %3208, %3206, %3195, %3193, %3191
-  %3211 = phi float [ %3192, %3191 ], [ %3194, %3193 ], [ %3207, %3206 ], [ %3209, %3208 ], [ %1378, %3195 ]
-  %3212 = fptoui float %3211 to i32
-  %3213 = fsub fast float %24, %20
-  %3214 = fcmp fast olt float %2685, %20
-  br i1 %3214, label %3215, label %3228
-
-; <label>:3215                                    ; preds = %3210
-  %3216 = fsub fast float %20, %2685
-  %3217 = fdiv fast float %3216, %3213
-  %3218 = fptoui float %3217 to i32
-  %3219 = uitofp i32 %3218 to float
-  %3220 = fmul fast float %3219, %3213
-  %3221 = fsub fast float %3216, %3220
-  %3222 = and i32 %3218, 1
-  %3223 = icmp eq i32 %3222, 0
-  br i1 %3223, label %3224, label %3226
-
-; <label>:3224                                    ; preds = %3215
-  %3225 = fadd fast float %3221, %20
-  br label %3243
-
-; <label>:3226                                    ; preds = %3215
-  %3227 = fsub fast float %24, %3221
-  br label %3243
-
-; <label>:3228                                    ; preds = %3210
-  %3229 = fcmp fast ogt float %2685, %24
-  br i1 %3229, label %3230, label %3243
-
-; <label>:3230                                    ; preds = %3228
-  %3231 = fsub fast float %2685, %24
-  %3232 = fdiv fast float %3231, %3213
-  %3233 = fptoui float %3232 to i32
-  %3234 = uitofp i32 %3233 to float
-  %3235 = fmul fast float %3234, %3213
-  %3236 = fsub fast float %3231, %3235
-  %3237 = and i32 %3233, 1
-  %3238 = icmp eq i32 %3237, 0
-  br i1 %3238, label %3239, label %3241
-
-; <label>:3239                                    ; preds = %3230
-  %3240 = fsub fast float %24, %3236
-  br label %3243
-
-; <label>:3241                                    ; preds = %3230
-  %3242 = fadd fast float %3236, %20
-  br label %3243
-
-; <label>:3243                                    ; preds = %3241, %3239, %3228, %3226, %3224
-  %3244 = phi float [ %3225, %3224 ], [ %3227, %3226 ], [ %3240, %3239 ], [ %3242, %3241 ], [ %2685, %3228 ]
-  %3245 = fptoui float %3244 to i32
-  %3246 = uitofp i32 %3245 to float
-  %3247 = uitofp i32 %3212 to float
-  %3248 = fptoui float %45 to i32
-  %3249 = fptoui float %182 to i32
-  %3250 = fptoui float %3246 to i32
-  %3251 = fptoui float %3247 to i32
-  %3252 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3253 = extractvalue %dx.types.CBufRet.i32 %3252, 0
-  %3254 = extractvalue %dx.types.CBufRet.i32 %3252, 1
-  %3255 = extractvalue %dx.types.CBufRet.i32 %3252, 2
-  %3256 = extractvalue %dx.types.CBufRet.i32 %3252, 3
-  %3257 = mul i32 %3253, %3248
-  %3258 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3249, i32 %3254, i32 %3257)  ; IMad(a,b,c)
-  %3259 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3250, i32 %3255, i32 %3258)  ; IMad(a,b,c)
-  %3260 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3251, i32 %3256, i32 %3259)  ; IMad(a,b,c)
-  %3261 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3260, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3262 = extractvalue %dx.types.ResRet.i32 %3261, 0
-  %3263 = sitofp i32 %3262 to float
-  br label %3264
-
-; <label>:3264                                    ; preds = %3243, %3177, %3148, %3131, %3121
-  %3265 = phi float [ %3145, %3131 ], [ 0.000000e+00, %3121 ], [ %3176, %3148 ], [ %3263, %3243 ], [ 0.000000e+00, %3177 ]
-  %3266 = call float @dx.op.unary.f32(i32 22, float %180)  ; Frc(value)
-  %3267 = call float @dx.op.unary.f32(i32 22, float %181)  ; Frc(value)
-  %3268 = fmul fast float %3267, %3267
-  %3269 = fmul fast float %3268, %3267
-  %3270 = fmul fast float %1086, -7.500000e-01
-  %3271 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2249, float %3270)  ; FMad(a,b,c)
-  %3272 = fmul fast float %1086, 1.500000e+00
-  %3273 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1668, float %3272)  ; FMad(a,b,c)
-  %3274 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2249, float %3273)  ; FMad(a,b,c)
-  %3275 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %2830, float %3274)  ; FMad(a,b,c)
-  %3276 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1668, float %3270)  ; FMad(a,b,c)
-  %3277 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2249, float %3276)  ; FMad(a,b,c)
-  %3278 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2830, float %3277)  ; FMad(a,b,c)
-  %3279 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3267, float %3268, float %3269, float %1668, float %3271, float %3275, float %3278)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3280 = fmul fast float %1231, -7.500000e-01
-  %3281 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2394, float %3280)  ; FMad(a,b,c)
-  %3282 = fmul fast float %1231, 1.500000e+00
-  %3283 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1813, float %3282)  ; FMad(a,b,c)
-  %3284 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2394, float %3283)  ; FMad(a,b,c)
-  %3285 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %2975, float %3284)  ; FMad(a,b,c)
-  %3286 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1813, float %3280)  ; FMad(a,b,c)
-  %3287 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2394, float %3286)  ; FMad(a,b,c)
-  %3288 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2975, float %3287)  ; FMad(a,b,c)
-  %3289 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3267, float %3268, float %3269, float %1813, float %3281, float %3285, float %3288)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3290 = fmul fast float %1377, -7.500000e-01
-  %3291 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2539, float %3290)  ; FMad(a,b,c)
-  %3292 = fmul fast float %1377, 1.500000e+00
-  %3293 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1958, float %3292)  ; FMad(a,b,c)
-  %3294 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2539, float %3293)  ; FMad(a,b,c)
-  %3295 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3120, float %3294)  ; FMad(a,b,c)
-  %3296 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1958, float %3290)  ; FMad(a,b,c)
-  %3297 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2539, float %3296)  ; FMad(a,b,c)
-  %3298 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3120, float %3297)  ; FMad(a,b,c)
-  %3299 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3267, float %3268, float %3269, float %1958, float %3291, float %3295, float %3298)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3300 = fmul fast float %1523, -7.500000e-01
-  %3301 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2684, float %3300)  ; FMad(a,b,c)
-  %3302 = fmul fast float %1523, 1.500000e+00
-  %3303 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %2103, float %3302)  ; FMad(a,b,c)
-  %3304 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2684, float %3303)  ; FMad(a,b,c)
-  %3305 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3265, float %3304)  ; FMad(a,b,c)
-  %3306 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %2103, float %3300)  ; FMad(a,b,c)
-  %3307 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2684, float %3306)  ; FMad(a,b,c)
-  %3308 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3265, float %3307)  ; FMad(a,b,c)
-  %3309 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3267, float %3268, float %3269, float %2103, float %3301, float %3305, float %3308)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3310 = fmul fast float %3266, %3266
-  %3311 = fmul fast float %3310, %3266
-  %3312 = fmul fast float %3279, -7.500000e-01
-  %3313 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3299, float %3312)  ; FMad(a,b,c)
-  %3314 = fmul fast float %3279, 1.500000e+00
-  %3315 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %3289, float %3314)  ; FMad(a,b,c)
-  %3316 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %3299, float %3315)  ; FMad(a,b,c)
-  %3317 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3309, float %3316)  ; FMad(a,b,c)
-  %3318 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %3289, float %3312)  ; FMad(a,b,c)
-  %3319 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %3299, float %3318)  ; FMad(a,b,c)
-  %3320 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3309, float %3319)  ; FMad(a,b,c)
-  %3321 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3266, float %3310, float %3311, float %3289, float %3313, float %3317, float %3320)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3322 = fptosi float %3321 to i32
-  call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i32 %3322, i32 undef, i32 undef, i32 undef, i8 1, i32 4)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3323
-
-; <label>:3323                                    ; preds = %3264, %933, %919, %329, %0
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.threadId.i32(i32, i32) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32, %dx.types.Handle, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.rawBufferStore.i32(i32, %dx.types.Handle, i32, i32, i32, i32, i32, i32, i8, i32) #2
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.unary.f32(i32, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.tertiary.f32(i32, float, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.binary.f32(i32, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.dot4.f32(i32, float, float, float, float, float, float, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.tertiary.i32(i32, i32, i32, i32) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32, %dx.types.Handle, i32) #1
-
-; Function Attrs: nounwind readonly
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readnone
-declare double @dx.op.makeDouble.f64(i32, i32, i32) #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.valver = !{!2}
-!dx.shaderModel = !{!3}
-!dx.resources = !{!4}
-!dx.entryPoints = !{!13}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 1, i32 2}
-!2 = !{i32 1, i32 6}
-!3 = !{!"cs", i32 6, i32 2}
-!4 = !{null, !5, !11, null}
-!5 = !{!6, !8, !10}
-!6 = !{i32 0, %"class.RWStructuredBuffer<int>"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !7}
-!7 = !{i32 1, i32 4}
-!8 = !{i32 1, %"class.RWStructuredBuffer<double>"* undef, !"", i32 0, i32 1, i32 1, i32 12, i1 false, i1 false, i1 false, !9}
-!9 = !{i32 1, i32 8}
-!10 = !{i32 2, %"class.RWStructuredBuffer<int>"* undef, !"", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !7}
-!11 = !{!12}
-!12 = !{i32 0, %Constants* undef, !"", i32 0, i32 0, i32 1, i32 116, null}
-!13 = !{void ()* @GridSample, !"GridSample", null, !4, !14}
-!14 = !{i32 0, i64 8388628, i32 4, !15}
-!15 = !{i32 64, i32 1, i32 1}
-
-#endif
-
-const unsigned char g_GridSample[] = {
-  0x44, 0x58, 0x42, 0x43, 0xed, 0xf9, 0x90, 0x87, 0xb9, 0x7e, 0x5d, 0xc8,
-  0x24, 0x64, 0x14, 0x6e, 0x07, 0x23, 0xd9, 0x3e, 0x01, 0x00, 0x00, 0x00,
-  0x44, 0x54, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
-  0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
-  0x18, 0x01, 0x00, 0x00, 0x34, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30,
-  0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30,
-  0xa8, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-  0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0xea, 0x9c, 0x91, 0x2f, 0x56, 0xaf, 0x6f, 0xeb,
-  0x98, 0x8b, 0xe5, 0x1c, 0x1b, 0x10, 0xc0, 0x96, 0x44, 0x58, 0x49, 0x4c,
-  0x08, 0x53, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0xc2, 0x14, 0x00, 0x00,
-  0x44, 0x58, 0x49, 0x4c, 0x02, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-  0xf0, 0x52, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00,
-  0xb9, 0x14, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
-  0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49,
-  0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19,
-  0x1e, 0x04, 0x8b, 0x62, 0x80, 0x18, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42,
-  0xc4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x62, 0x88,
-  0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42,
-  0xe4, 0x48, 0x0e, 0x90, 0x11, 0x23, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c,
-  0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x31, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00,
-  0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07,
-  0x40, 0x02, 0xa8, 0x0d, 0x86, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20,
-  0x01, 0xd5, 0x06, 0x62, 0xf8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x90, 0x00,
-  0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42,
-  0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00,
-  0x4f, 0x00, 0x00, 0x00, 0x32, 0x22, 0x88, 0x09, 0x20, 0x64, 0x85, 0x04,
-  0x13, 0x23, 0xa4, 0x84, 0x04, 0x13, 0x23, 0xe3, 0x84, 0xa1, 0x90, 0x14,
-  0x12, 0x4c, 0x8c, 0x8c, 0x0b, 0x84, 0xc4, 0x4c, 0x10, 0xb0, 0xc1, 0x08,
-  0x40, 0x09, 0x00, 0x0a, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0xc3, 0x30, 0x10,
-  0x71, 0xd3, 0x70, 0xf9, 0x13, 0xf6, 0x10, 0x92, 0xbf, 0x12, 0xd2, 0x4a,
-  0x4c, 0x3e, 0x72, 0xdb, 0xa8, 0x18, 0x86, 0x61, 0x18, 0xe6, 0x08, 0x10,
-  0x3a, 0xee, 0x19, 0x2e, 0x7f, 0xc2, 0x1e, 0x42, 0xf2, 0x43, 0xa0, 0x19,
-  0x16, 0x02, 0x05, 0x48, 0x39, 0x8c, 0x21, 0x19, 0x86, 0x63, 0x20, 0xa5,
-  0x2c, 0xc0, 0x90, 0x0c, 0xc3, 0x30, 0x0c, 0xc3, 0x31, 0x10, 0x33, 0x03,
-  0x50, 0x06, 0x67, 0x70, 0xe8, 0x29, 0x85, 0x33, 0x38, 0x8e, 0x43, 0x51,
-  0x21, 0x9c, 0xc1, 0x71, 0x68, 0x2a, 0x8a, 0x33, 0x38, 0x8e, 0xe3, 0x38,
-  0x8e, 0xe3, 0x50, 0x55, 0x8a, 0x61, 0x18, 0x86, 0x81, 0xae, 0xa3, 0x86,
-  0xcb, 0x9f, 0xb0, 0x87, 0x90, 0x7c, 0x6e, 0xa3, 0x8a, 0x95, 0x98, 0x7c,
-  0xe4, 0xb6, 0x11, 0x31, 0x0c, 0xc3, 0x50, 0x88, 0x6c, 0x48, 0x06, 0xd2,
-  0xe6, 0x08, 0x82, 0x62, 0x24, 0xc3, 0x31, 0x0c, 0x1c, 0x75, 0x43, 0x00,
-  0x85, 0xf8, 0x86, 0x61, 0x20, 0x70, 0x20, 0x60, 0xa6, 0x6e, 0x1c, 0xd8,
-  0x21, 0x1c, 0xe6, 0x61, 0x1e, 0xdc, 0x40, 0x16, 0x6e, 0x61, 0x16, 0xe8,
-  0x41, 0x1e, 0xea, 0x61, 0x1c, 0xe8, 0xa1, 0x1e, 0xe4, 0xa1, 0x1c, 0xc8,
-  0x41, 0x14, 0xea, 0xc1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0x81, 0x0f, 0xd2,
-  0xc1, 0x1d, 0xe8, 0xc1, 0x0f, 0x50, 0x30, 0xd0, 0x38, 0x13, 0x18, 0x8c,
-  0x03, 0x3b, 0x84, 0xc3, 0x3c, 0xcc, 0x83, 0x1b, 0xc8, 0xc2, 0x2d, 0xcc,
-  0x02, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xd4, 0x83, 0x3c, 0x94,
-  0x03, 0x39, 0x88, 0x42, 0x3d, 0x98, 0x83, 0x39, 0x94, 0x83, 0x3c, 0xf0,
-  0x01, 0x39, 0xbc, 0x43, 0x3d, 0x88, 0x03, 0x3b, 0x94, 0x83, 0x1f, 0xa0,
-  0xe0, 0xa3, 0x72, 0x18, 0x81, 0x18, 0x2e, 0xe1, 0x9c, 0x46, 0x9a, 0x80,
-  0x66, 0x92, 0xd0, 0x32, 0x0c, 0xc3, 0x70, 0x9e, 0xe7, 0x79, 0x9e, 0x03,
-  0xa1, 0x73, 0x04, 0xa0, 0x30, 0x05, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0,
-  0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0,
-  0x87, 0x0d, 0xae, 0x50, 0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d,
-  0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d,
-  0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78,
-  0xa0, 0x07, 0x78, 0xd0, 0x06, 0xe9, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x71,
-  0x60, 0x07, 0x6d, 0x90, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72,
-  0xd0, 0x06, 0xe9, 0x60, 0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d,
-  0x60, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xd0, 0x06, 0xe6,
-  0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x76,
-  0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xee, 0x80, 0x07, 0x7a,
-  0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x7a, 0x60, 0x07, 0x74,
-  0x30, 0xe4, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x60, 0xc8, 0x43, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0xc0, 0x90, 0x47, 0x01, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x80, 0x21, 0x0f, 0x03, 0x04, 0xc0, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x43, 0x9e, 0x07, 0x08, 0x80, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x3c, 0x11, 0x10, 0x00, 0x01,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x79, 0x26, 0x20, 0x00,
-  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xf2, 0x54, 0x40,
-  0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xe4, 0xb9,
-  0x80, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8,
-  0xa3, 0x01, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0,
-  0x90, 0xa7, 0x03, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x80, 0x21, 0x0f, 0x18, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x40, 0x16, 0x08, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
-  0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47,
-  0xc6, 0x04, 0x43, 0x1a, 0x4a, 0xa0, 0x08, 0x8a, 0x61, 0x04, 0xa0, 0x30,
-  0x0a, 0xa1, 0x20, 0x0a, 0x3d, 0xa0, 0x00, 0x03, 0x88, 0x1b, 0x01, 0x20,
-  0xb5, 0x50, 0x01, 0x01, 0x11, 0xc8, 0x9c, 0x01, 0xa0, 0x74, 0x06, 0x80,
-  0xc8, 0x19, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x4b, 0x00, 0x00, 0x00,
-  0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b,
-  0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99,
-  0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62,
-  0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73,
-  0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84, 0x81, 0x99, 0x20, 0x0c,
-  0xcd, 0x06, 0x61, 0x20, 0x26, 0x08, 0x83, 0xb3, 0x41, 0x18, 0x0c, 0x0a,
-  0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08, 0xc3, 0x33, 0x41, 0x20,
-  0x83, 0x8c, 0xc0, 0x04, 0x61, 0x80, 0x26, 0x08, 0x5c, 0x35, 0x41, 0x18,
-  0xa2, 0x0d, 0xc2, 0xf0, 0x6c, 0x58, 0x94, 0x85, 0x51, 0x94, 0xa1, 0x71,
-  0x1c, 0x07, 0x9a, 0x20, 0x98, 0xc1, 0x35, 0x41, 0x18, 0xa4, 0x0d, 0xc2,
-  0x30, 0x6d, 0x58, 0x06, 0x89, 0x51, 0x86, 0xa1, 0x71, 0x1c, 0x87, 0xda,
-  0xb0, 0x10, 0x0b, 0xa3, 0x10, 0x43, 0xe3, 0x38, 0x0e, 0xb4, 0x61, 0x88,
-  0x2a, 0x6b, 0x82, 0x90, 0x06, 0xd8, 0x04, 0x61, 0x98, 0x36, 0x20, 0x0a,
-  0xc6, 0x28, 0xca, 0x90, 0x01, 0x1b, 0x02, 0x6d, 0x03, 0x01, 0x5c, 0x1b,
-  0x30, 0x41, 0x10, 0x00, 0x2a, 0x47, 0x72, 0x69, 0x64, 0x53, 0x61, 0x6d,
-  0x70, 0x6c, 0x65, 0x13, 0x04, 0x35, 0xb0, 0x26, 0x08, 0x03, 0xb5, 0x61,
-  0x00, 0x83, 0x61, 0xd8, 0x40, 0x28, 0xdf, 0x13, 0x06, 0x1b, 0x8a, 0xce,
-  0x03, 0x38, 0x31, 0xa8, 0xc2, 0xc6, 0x66, 0xd7, 0xe6, 0x92, 0x46, 0x56,
-  0xe6, 0x46, 0x37, 0x25, 0x08, 0xaa, 0x90, 0xe1, 0xb9, 0xd8, 0x95, 0xc9,
-  0xcd, 0xa5, 0xbd, 0xb9, 0x4d, 0x09, 0x88, 0x26, 0x64, 0x78, 0x2e, 0x76,
-  0x61, 0x6c, 0x76, 0x65, 0x72, 0x53, 0x02, 0xa3, 0x0e, 0x19, 0x9e, 0xcb,
-  0x1c, 0x5a, 0x18, 0x59, 0x99, 0x5c, 0xd3, 0x1b, 0x59, 0x19, 0xdb, 0x94,
-  0x00, 0x29, 0x43, 0x86, 0xe7, 0x22, 0x57, 0x36, 0xf7, 0x56, 0x27, 0x37,
-  0x56, 0x36, 0x37, 0x25, 0xd8, 0xea, 0x90, 0xe1, 0xb9, 0x94, 0xb9, 0xd1,
-  0xc9, 0xe5, 0x41, 0xbd, 0xa5, 0xb9, 0xd1, 0xcd, 0x4d, 0x09, 0xc4, 0x00,
-  0x79, 0x18, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c,
-  0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3,
-  0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6,
-  0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e,
-  0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43,
-  0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03,
-  0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48,
-  0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20,
-  0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e,
-  0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d,
-  0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89,
-  0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83,
-  0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68,
-  0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90,
-  0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78,
-  0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98,
-  0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5,
-  0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c,
-  0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c,
-  0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43,
-  0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43,
-  0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82,
-  0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70,
-  0x03, 0x7a, 0x28, 0x87, 0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8,
-  0xe0, 0x06, 0xe4, 0x20, 0x0e, 0xe7, 0xe0, 0x06, 0xf6, 0x10, 0x0e, 0xf2,
-  0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x30, 0x83, 0x81,
-  0xc8, 0x01, 0x1f, 0xdc, 0x40, 0x1c, 0xe4, 0xa1, 0x1c, 0xc2, 0x61, 0x1d,
-  0xdc, 0x40, 0x1c, 0xe4, 0x01, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00,
-  0x31, 0x00, 0x00, 0x00, 0x06, 0xa0, 0x80, 0x11, 0x32, 0xb0, 0x00, 0xf3,
-  0x2c, 0x84, 0x19, 0x40, 0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x20, 0x0d, 0x10,
-  0x61, 0x7e, 0x71, 0xdb, 0x96, 0xb0, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10,
-  0x50, 0x45, 0x41, 0x44, 0xa5, 0x03, 0x0c, 0x25, 0x61, 0x00, 0x02, 0xe6,
-  0x23, 0xb7, 0x6d, 0x0a, 0xd2, 0x70, 0xf9, 0xce, 0xe3, 0x0b, 0x11, 0x01,
-  0x4c, 0x44, 0x08, 0x34, 0xc3, 0x42, 0xd8, 0x81, 0x33, 0x5c, 0xbe, 0xf3,
-  0xf8, 0x83, 0x33, 0xe1, 0x7e, 0x71, 0xdb, 0xb6, 0x40, 0x0d, 0x97, 0xef,
-  0x3c, 0x3e, 0x03, 0x28, 0x44, 0xe7, 0x50, 0xc1, 0x42, 0xf8, 0x85, 0x8e,
-  0x9b, 0xc0, 0x35, 0x5c, 0xbe, 0xf3, 0xf8, 0x11, 0x60, 0x6d, 0x54, 0x51,
-  0x10, 0x51, 0xe9, 0x00, 0x83, 0x8f, 0xdc, 0xb6, 0x0d, 0x60, 0xc3, 0xe5,
-  0x3b, 0x8f, 0x1f, 0x01, 0xd6, 0x46, 0x15, 0x05, 0x11, 0xb1, 0x93, 0x13,
-  0x11, 0x3e, 0x72, 0xdb, 0x56, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xfe, 0x44,
-  0x44, 0x13, 0x02, 0x44, 0x98, 0x5f, 0xdc, 0xb6, 0x21, 0x48, 0xc3, 0xe5,
-  0x3b, 0x8f, 0x3f, 0x11, 0xd1, 0x84, 0x00, 0x11, 0xe6, 0x23, 0xb7, 0x6d,
-  0x01, 0xd2, 0x70, 0xf9, 0xce, 0xe3, 0x4f, 0x47, 0x44, 0x00, 0x83, 0x38,
-  0xf8, 0xc8, 0x6d, 0x1b, 0xc1, 0x33, 0x5c, 0xbe, 0xf3, 0xf8, 0x54, 0x03,
-  0x44, 0x98, 0x5f, 0xdc, 0x36, 0x00, 0x00, 0x00, 0x61, 0x20, 0x00, 0x00,
-  0x14, 0x13, 0x00, 0x00, 0x13, 0x04, 0x24, 0x14, 0x0b, 0x04, 0x00, 0x00,
-  0x1e, 0x00, 0x00, 0x00, 0x34, 0x14, 0x58, 0xd9, 0x95, 0xa5, 0x40, 0x0d,
-  0x94, 0x51, 0x21, 0x15, 0xd7, 0x0c, 0x40, 0xc1, 0x95, 0x5c, 0xd9, 0x14,
-  0x4b, 0x61, 0x0a, 0x94, 0x72, 0x40, 0xd1, 0x94, 0x6e, 0x40, 0x39, 0x94,
-  0x02, 0x1d, 0x25, 0x50, 0x06, 0x45, 0x40, 0xce, 0x08, 0xc0, 0x18, 0x01,
-  0x08, 0x82, 0x20, 0xfe, 0x8d, 0x11, 0x80, 0x20, 0x08, 0xd2, 0xbf, 0x30,
-  0x46, 0x00, 0x82, 0x20, 0x48, 0x7f, 0x63, 0x04, 0x20, 0x08, 0x82, 0xfc,
-  0x37, 0x46, 0x00, 0x82, 0x20, 0x88, 0xff, 0xc2, 0x18, 0x01, 0x08, 0x82,
-  0x60, 0x08, 0x0e, 0x63, 0x04, 0x20, 0x08, 0x82, 0xfa, 0x37, 0x46, 0x00,
-  0x82, 0x20, 0xa8, 0xff, 0xc2, 0x18, 0x01, 0x08, 0x82, 0x20, 0xfc, 0x8d,
-  0x11, 0x80, 0x20, 0x08, 0xc2, 0xbf, 0x30, 0x46, 0x00, 0x82, 0x20, 0x08,
-  0x82, 0x01, 0x00, 0x00, 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0xd0, 0xc5,
-  0x01, 0xf6, 0xb8, 0x81, 0x1b, 0x98, 0xc1, 0x88, 0x41, 0x02, 0x80, 0x20,
-  0x18, 0x74, 0x72, 0x90, 0x41, 0x70, 0x00, 0x07, 0x67, 0x30, 0x62, 0x90,
-  0x00, 0x20, 0x08, 0x06, 0xdd, 0x1c, 0x68, 0x91, 0x1b, 0xb8, 0x01, 0x1a,
-  0x8c, 0x18, 0x24, 0x00, 0x08, 0x82, 0x41, 0x47, 0x07, 0x1b, 0xf4, 0x06,
-  0x6f, 0x90, 0x06, 0x23, 0x06, 0x06, 0x00, 0x82, 0x60, 0x40, 0xf8, 0x81,
-  0x05, 0x07, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0, 0xdd, 0x41, 0x19,
-  0x08, 0x71, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c, 0x30, 0x9a, 0x30, 0x04,
-  0xc3, 0x0d, 0x42, 0x40, 0x06, 0xb3, 0x0c, 0xc1, 0x08, 0x05, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0xa0, 0xf1, 0x81, 0x1a, 0x1c, 0x79, 0x30, 0x9a,
-  0x10, 0x0c, 0x17, 0x38, 0x35, 0x9a, 0x30, 0x08, 0x17, 0x38, 0x35, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x5a, 0x28, 0xbc, 0x01, 0x03, 0x06, 0xa3,
-  0x09, 0x01, 0x30, 0xdc, 0x10, 0xf4, 0x01, 0x18, 0x4c, 0x37, 0x50, 0x5e,
-  0x30, 0xdd, 0x50, 0x69, 0x42, 0x21, 0x01, 0x4c, 0x37, 0x5c, 0x1c, 0x51,
-  0x48, 0x00, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0, 0xa5, 0xc2, 0x1d,
-  0x50, 0x67, 0x30, 0x9a, 0x10, 0x04, 0xa3, 0x09, 0x82, 0x30, 0x9a, 0x30,
-  0x0c, 0x15, 0x08, 0x52, 0x03, 0x21, 0x15, 0x0c, 0x52, 0x57, 0x30, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0xa0, 0xc5, 0xc2, 0x1f, 0x70, 0xac, 0x30,
-  0x9a, 0x10, 0x00, 0x15, 0x0c, 0x52, 0x5b, 0x10, 0x15, 0x20, 0x33, 0x9a,
-  0x50, 0x04, 0x15, 0x08, 0x52, 0x44, 0x10, 0x15, 0x34, 0x33, 0x9a, 0x90,
-  0x08, 0x15, 0x08, 0x52, 0x44, 0x10, 0xd7, 0x38, 0x75, 0x85, 0x53, 0x37,
-  0x38, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x1a, 0x38, 0xb8, 0xc2,
-  0x1a, 0xe0, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2,
-  0x20, 0x8c, 0x26, 0x10, 0xc3, 0x11, 0x4e, 0x1d, 0xe1, 0xd4, 0x11, 0x4e,
-  0x1d, 0xe1, 0xd4, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x54, 0xeb, 0x00,
-  0x0b, 0xcc, 0xa2, 0x8c, 0x02, 0x31, 0x08, 0x81, 0x09, 0x01, 0x7c, 0x4e,
-  0x18, 0x66, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x94, 0x79, 0xc8, 0x85,
-  0x3c, 0x08, 0xcc, 0x01, 0x15, 0xc6, 0x61, 0x34, 0x21, 0x00, 0x46, 0x13,
-  0x84, 0x60, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xc0, 0xa0, 0x1d, 0x62,
-  0x41, 0x08, 0x2e, 0x70, 0xee, 0x8e, 0x61, 0x46, 0x0c, 0x14, 0x00, 0x04,
-  0xc1, 0x40, 0xc9, 0x87, 0x5f, 0xf8, 0x83, 0x80, 0x1d, 0x5c, 0x21, 0x1d,
-  0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x00, 0x0c, 0xe6, 0xe1, 0x16, 0x84, 0xe0, 0x02, 0xe7, 0x86, 0x1b,
-  0xea, 0x80, 0x1e, 0xc0, 0xc0, 0x90, 0x58, 0x80, 0x8f, 0x0d, 0xb2, 0x00,
-  0x9f, 0x59, 0x06, 0x61, 0x18, 0x4c, 0x58, 0x05, 0xf9, 0x98, 0xc0, 0x0a,
-  0xf2, 0x31, 0x3f, 0x88, 0x05, 0xf8, 0x58, 0x1f, 0xc8, 0x02, 0x7c, 0x8c,
-  0x10, 0xe4, 0x63, 0x84, 0x20, 0x9f, 0x59, 0x02, 0xc2, 0x44, 0x01, 0x91,
-  0x8f, 0x21, 0xa1, 0x20, 0x1f, 0x13, 0x6e, 0x01, 0x3e, 0x26, 0xe0, 0x02,
-  0x7c, 0x4c, 0xa8, 0x05, 0xf9, 0x98, 0x60, 0x0b, 0xf2, 0x99, 0x25, 0x20,
-  0x06, 0x2a, 0x1c, 0x48, 0x20, 0x86, 0x81, 0x0a, 0x07, 0x12, 0x88, 0x61,
-  0x34, 0x21, 0x16, 0x84, 0xe1, 0x86, 0xc0, 0x24, 0xc0, 0x60, 0x96, 0xa1,
-  0x30, 0x82, 0x11, 0x03, 0x03, 0x00, 0x41, 0x30, 0x78, 0x5e, 0x62, 0x1d,
-  0x88, 0x11, 0x03, 0x03, 0x00, 0x41, 0x30, 0x78, 0x60, 0x82, 0x1d, 0x88,
-  0x59, 0x02, 0x63, 0xa0, 0xc2, 0x21, 0x0a, 0x86, 0x18, 0xa8, 0x70, 0x88,
-  0x82, 0x21, 0x86, 0x23, 0x04, 0x55, 0x20, 0xbe, 0xe1, 0x88, 0x21, 0x15,
-  0x84, 0xaf, 0x84, 0x60, 0x87, 0x23, 0x88, 0x56, 0x20, 0xbe, 0x12, 0x82,
-  0x1d, 0x8e, 0x30, 0x56, 0x41, 0xf8, 0x2a, 0x10, 0x76, 0x96, 0xe1, 0xd0,
-  0x82, 0xd1, 0x04, 0x5f, 0x18, 0x86, 0x1b, 0x82, 0x99, 0x00, 0x83, 0x59,
-  0x06, 0x24, 0x09, 0x4a, 0x17, 0x46, 0x02, 0x2e, 0x70, 0x6a, 0xc4, 0xe0,
-  0x00, 0x40, 0x10, 0x0c, 0x26, 0x9e, 0x20, 0x89, 0x66, 0x1e, 0x46, 0x0c,
-  0x0e, 0x00, 0x04, 0xc1, 0x60, 0xea, 0x09, 0x92, 0x08, 0x84, 0xe2, 0x85,
-  0x93, 0x80, 0x0b, 0x9c, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x09,
-  0x2c, 0x50, 0x02, 0xba, 0x87, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98,
-  0xc2, 0x02, 0x25, 0x02, 0x61, 0x96, 0x40, 0x1b, 0x6e, 0x50, 0x76, 0x02,
-  0x0c, 0x66, 0x19, 0x14, 0x2d, 0x30, 0x5d, 0xe0, 0x85, 0xf8, 0xcc, 0x32,
-  0x2c, 0xce, 0x64, 0xbd, 0x50, 0xc5, 0xc7, 0x02, 0x81, 0x3e, 0x17, 0x0c,
-  0x73, 0x81, 0x53, 0x16, 0x14, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x64,
-  0xa1, 0xc3, 0x0d, 0x81, 0x58, 0x80, 0xc1, 0x2c, 0x03, 0xd3, 0x04, 0x36,
-  0x94, 0x03, 0x7c, 0x66, 0x09, 0x24, 0x23, 0x07, 0x22, 0x3e, 0xb3, 0x04,
-  0xd2, 0x2c, 0xc3, 0x23, 0x71, 0xf6, 0x95, 0x43, 0x7c, 0x2c, 0x60, 0xe8,
-  0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0xc1, 0x23, 0x1f, 0x2b, 0x82, 0xf8,
-  0x14, 0xe1, 0x16, 0x3a, 0xdc, 0x10, 0xb0, 0x05, 0x18, 0xcc, 0x32, 0x40,
-  0x51, 0x60, 0xed, 0x30, 0xc4, 0x67, 0x96, 0x40, 0x32, 0x02, 0x1e, 0xe0,
-  0x33, 0x4b, 0x20, 0x0d, 0xb4, 0x38, 0x18, 0x63, 0x35, 0x04, 0x24, 0x44,
-  0xb2, 0xe0, 0x98, 0x3b, 0xc8, 0x43, 0x7c, 0x66, 0x19, 0x26, 0xcb, 0x0c,
-  0x6c, 0x1e, 0xd4, 0x20, 0x3e, 0x16, 0x08, 0xf4, 0xb9, 0x60, 0x98, 0x0b,
-  0x9c, 0xb2, 0xa0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xd0, 0x0b, 0x1d,
-  0x6e, 0x08, 0xf0, 0x02, 0x0c, 0x66, 0x19, 0xa8, 0x2a, 0xb0, 0x61, 0x1f,
-  0xe0, 0x33, 0x4b, 0xa0, 0x19, 0x3e, 0x10, 0xf1, 0x99, 0x25, 0xd0, 0x66,
-  0x19, 0x2e, 0xcd, 0x0d, 0x8c, 0x0e, 0xf2, 0x21, 0x3e, 0x16, 0x30, 0xf4,
-  0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2, 0xe0, 0x91, 0x8f, 0x15, 0x41, 0x7c,
-  0x8a, 0x20, 0x0d, 0x1d, 0x6e, 0x08, 0x44, 0x03, 0x0c, 0x66, 0x19, 0xb0,
-  0x2c, 0xb0, 0x90, 0x18, 0xe2, 0x33, 0x4b, 0xa0, 0x19, 0x61, 0x12, 0xf0,
-  0x99, 0x25, 0xd0, 0x06, 0x8a, 0x1c, 0x71, 0x40, 0xfc, 0x21, 0xf1, 0x07,
-  0x83, 0x0d, 0x32, 0x36, 0xc0, 0xd8, 0xc0, 0x62, 0x83, 0x8a, 0x0d, 0xa8,
-  0x81, 0x22, 0x87, 0x17, 0x10, 0x7f, 0x48, 0xfc, 0xc1, 0x20, 0x32, 0x03,
-  0xf3, 0x07, 0x0b, 0xab, 0x34, 0xea, 0xf0, 0xc1, 0xa9, 0x59, 0x86, 0x6d,
-  0x0e, 0x4a, 0x61, 0x34, 0xe1, 0x26, 0x86, 0xe1, 0x86, 0x20, 0x35, 0xc0,
-  0x60, 0x96, 0x81, 0xf3, 0x82, 0xe1, 0x88, 0x42, 0x2d, 0x86, 0xef, 0x8c,
-  0x61, 0x86, 0x1b, 0x82, 0x9a, 0x20, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0x40,
-  0xdc, 0x62, 0xf8, 0x2a, 0x10, 0xf4, 0x94, 0x61, 0x86, 0x1b, 0x02, 0x9c,
-  0x20, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0xe8, 0xe4, 0x20, 0x38, 0x7e, 0x18,
-  0xe6, 0x9a, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x40, 0xdb, 0x8d,
-  0xd4, 0x30, 0x8b, 0xdb, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18,
-  0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90, 0x11, 0x03, 0x04,
-  0x00, 0x41, 0x30, 0xb8, 0xc4, 0x03, 0x36, 0x0e, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x83, 0x6b, 0x3c, 0x62, 0x83, 0x21, 0x82, 0x11, 0x03,
-  0x04, 0x00, 0x41, 0x30, 0xb8, 0xc8, 0x43, 0x36, 0x24, 0x22, 0x18, 0x31,
-  0x50, 0x00, 0x10, 0x04, 0x03, 0x65, 0x3d, 0x62, 0x03, 0x2e, 0x02, 0xdf,
-  0x08, 0x0d, 0xde, 0x18, 0x4d, 0x08, 0x80, 0x0b, 0x1c, 0x9b, 0x25, 0x90,
-  0x83, 0xe1, 0x86, 0x8c, 0x3c, 0xc0, 0x60, 0x96, 0xe1, 0x03, 0x83, 0xa0,
-  0xd6, 0x82, 0x36, 0xe0, 0x02, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
-  0x60, 0x6a, 0x8f, 0xda, 0xf8, 0x48, 0x63, 0xc4, 0xe0, 0x00, 0x40, 0x10,
-  0x0c, 0x26, 0xf7, 0xa8, 0x8d, 0x40, 0xb8, 0x60, 0x98, 0x72, 0x8b, 0xdc,
-  0x80, 0x0b, 0x9c, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x49, 0x3e,
-  0x74, 0x63, 0x0c, 0x52, 0x63, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa6,
-  0xf9, 0xd0, 0x8d, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x9c, 0xba, 0xc3, 0xa9,
-  0xbb, 0x89, 0x61, 0x0e, 0x0d, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19,
-  0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x0d, 0x3f, 0xcc, 0x63, 0x34, 0xe8,
-  0x63, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46,
-  0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0,
-  0xfa, 0x8f, 0xf6, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
-  0x2e, 0x10, 0x71, 0x8f, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
-  0xe0, 0x0a, 0x91, 0xf7, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10,
-  0x0c, 0x14, 0x14, 0x71, 0x8f, 0xd6, 0x08, 0xf6, 0xc3, 0x37, 0xf2, 0x63,
-  0x34, 0x21, 0x00, 0x2e, 0x70, 0x6c, 0x96, 0x40, 0x0e, 0x86, 0x1b, 0xec,
-  0x00, 0x44, 0xc0, 0x60, 0x96, 0x21, 0x0c, 0xe4, 0x20, 0xb0, 0xbf, 0x08,
-  0x8d, 0xf8, 0x0c, 0x47, 0xec, 0x81, 0x68, 0x10, 0xdf, 0x2c, 0x83, 0x18,
-  0x94, 0x41, 0x60, 0xa3, 0xc1, 0x07, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05,
-  0xc3, 0x5c, 0xe0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84,
-  0x8a, 0xe8, 0x70, 0x43, 0x80, 0x22, 0x60, 0x30, 0xcb, 0x30, 0x06, 0x64,
-  0x10, 0xd8, 0xb0, 0x1a, 0xf0, 0x99, 0x25, 0x48, 0x03, 0x53, 0x0d, 0x22,
-  0x3e, 0xb3, 0x04, 0x69, 0x30, 0x1c, 0x61, 0x0a, 0xab, 0x21, 0x7c, 0xb3,
-  0x0c, 0x66, 0x90, 0x06, 0x81, 0x9d, 0x02, 0x6b, 0xc4, 0xc7, 0x02, 0x87,
-  0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88,
-  0x4f, 0x11, 0x35, 0xa2, 0xc3, 0x0d, 0xc1, 0x8c, 0x80, 0xc1, 0x2c, 0xc3,
-  0x19, 0xa0, 0x41, 0x60, 0xb4, 0x31, 0xc4, 0x67, 0x96, 0x20, 0x0d, 0x8c,
-  0xb8, 0x0d, 0xf8, 0xcc, 0x12, 0xa4, 0xc1, 0x40, 0x8b, 0xa3, 0x8d, 0x01,
-  0x46, 0x06, 0xc4, 0x19, 0x08, 0x68, 0x20, 0x16, 0x65, 0x70, 0xc1, 0x30,
-  0x66, 0x1b, 0xba, 0x11, 0x9f, 0xe1, 0x88, 0x59, 0xd8, 0x0d, 0xe2, 0x9b,
-  0x65, 0x50, 0x83, 0x36, 0x08, 0x8c, 0x37, 0x68, 0x21, 0x3e, 0x16, 0x0c,
-  0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41,
-  0x7c, 0x8a, 0x18, 0x13, 0x1d, 0x6e, 0x08, 0xc2, 0x04, 0x0c, 0x66, 0x19,
-  0xd6, 0x80, 0x0d, 0x02, 0x1b, 0xc8, 0x03, 0x3e, 0xb3, 0x04, 0x71, 0x60,
-  0xe1, 0x41, 0xc4, 0x67, 0x96, 0x20, 0x0e, 0x86, 0x23, 0x7c, 0x41, 0x3c,
-  0x84, 0x6f, 0x96, 0xc1, 0x0d, 0xe2, 0x20, 0xb0, 0x5f, 0x18, 0x8f, 0xf8,
-  0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70, 0xca, 0x82, 0x48, 0x3e,
-  0x56, 0x04, 0xf1, 0x29, 0xc2, 0x4d, 0x74, 0xb8, 0x21, 0x60, 0x13, 0x30,
-  0x98, 0x65, 0x78, 0x03, 0x38, 0x08, 0x6c, 0x3d, 0x86, 0xf8, 0xcc, 0x12,
-  0xc4, 0x81, 0x11, 0xf0, 0x01, 0x9f, 0x59, 0x82, 0x38, 0x18, 0x68, 0x71,
-  0xb4, 0x35, 0xc0, 0xd8, 0x80, 0x78, 0x03, 0x01, 0x0e, 0x64, 0xa3, 0x0d,
-  0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x6e, 0x73, 0xea, 0x78, 0x63, 0x98, 0x6b,
-  0x87, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x40, 0xeb, 0x93, 0x35, 0x41, 0x91, 0x3c, 0x19, 0x4d, 0x08, 0x80,
-  0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22,
-  0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0x48, 0x45, 0x4e, 0x12,
-  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xab, 0x54, 0xe6, 0x24,
-  0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0x4c, 0x85, 0x4e,
-  0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x03, 0xa5, 0x55, 0xe6,
-  0x44, 0x46, 0x02, 0x50, 0x19, 0x13, 0x3f, 0x19, 0x4d, 0x08, 0x80, 0x0b,
-  0x1c, 0x9b, 0x25, 0x90, 0x83, 0x81, 0x16, 0xc7, 0x34, 0x3a, 0x53, 0xe2,
-  0x58, 0xe2, 0x13, 0xe2, 0xc0, 0x94, 0xc0, 0xe0, 0x82, 0x81, 0x46, 0x0c,
-  0x1c, 0x00, 0x04, 0xc1, 0x80, 0x89, 0x15, 0x37, 0xc9, 0x11, 0x19, 0x29,
-  0x95, 0x60, 0x4e, 0xe6, 0x64, 0x4e, 0xd0, 0x64, 0x54, 0x66, 0x09, 0x46,
-  0x68, 0xb8, 0x61, 0x34, 0x4a, 0x05, 0x0c, 0x66, 0x19, 0xe8, 0x20, 0x26,
-  0x82, 0x11, 0x03, 0x03, 0x00, 0x41, 0x30, 0x78, 0x62, 0x25, 0x4e, 0x42,
-  0x62, 0xc4, 0xc0, 0x00, 0x40, 0x10, 0x0c, 0x1e, 0x59, 0x91, 0x93, 0x90,
-  0x30, 0xe1, 0x4c, 0xe0, 0x63, 0x02, 0x9a, 0xc0, 0x67, 0x34, 0x21, 0x47,
-  0x86, 0xe1, 0x86, 0x60, 0x55, 0xc0, 0x60, 0x96, 0xa1, 0x0e, 0xee, 0x20,
-  0x18, 0x8e, 0x30, 0xd8, 0x64, 0xf8, 0xee, 0x18, 0x66, 0xb8, 0x21, 0xb8,
-  0x11, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x04, 0x4e, 0x86, 0xaf, 0x02,
-  0x41, 0x6f, 0x19, 0x66, 0xb8, 0x21, 0xd0, 0x11, 0x32, 0xa8, 0x60, 0xd0,
-  0x59, 0x06, 0x3b, 0x58, 0x85, 0xe0, 0xfc, 0x63, 0x98, 0x7b, 0x89, 0x61,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x40, 0xeb, 0x95, 0x55, 0x41, 0x93,
-  0x5c, 0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84,
-  0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0xb8, 0xc8, 0x45, 0x56, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x83, 0xab, 0x5c, 0x66, 0x85, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0xb8, 0xcc, 0x85, 0x56, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10,
-  0x04, 0x03, 0xa5, 0x5d, 0x66, 0x45, 0x4e, 0x02, 0x70, 0x19, 0x15, 0x5f,
-  0x19, 0x4d, 0x08, 0x80, 0x0b, 0x1c, 0x9b, 0x25, 0x58, 0x85, 0xe1, 0x86,
-  0xcc, 0x5c, 0xc0, 0x60, 0x96, 0x01, 0x0f, 0xf2, 0x20, 0xa8, 0x36, 0xb1,
-  0x15, 0xb8, 0xc0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0xde,
-  0xe5, 0x56, 0xc0, 0xc0, 0x54, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60,
-  0x82, 0x97, 0x5b, 0x09, 0x84, 0x0b, 0x86, 0x29, 0x38, 0xd9, 0x15, 0xb8,
-  0xc0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0xe8, 0x85, 0x57,
-  0xc8, 0x60, 0x55, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xaa, 0x17,
-  0x5e, 0x09, 0x84, 0x0b, 0x86, 0xb9, 0xc0, 0xa9, 0x3b, 0x9c, 0xba, 0x1c,
-  0x19, 0xe6, 0xd4, 0x62, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0xd0, 0xf4, 0x05, 0x5d, 0x4a, 0xc5, 0x5e, 0x46,
-  0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81,
-  0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xae, 0x90,
-  0x79, 0x97, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x12,
-  0x19, 0x78, 0x49, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xae,
-  0x91, 0x89, 0x97, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x40,
-  0x51, 0x19, 0x78, 0x79, 0x95, 0xa0, 0x5f, 0xc0, 0x65, 0x5f, 0x46, 0x13,
-  0x02, 0xe0, 0x02, 0xc7, 0x66, 0x09, 0x56, 0x61, 0xb8, 0xc1, 0x0e, 0x44,
-  0x06, 0x0c, 0x66, 0x19, 0xf4, 0x60, 0x15, 0x02, 0x0b, 0x95, 0x51, 0x89,
-  0xcf, 0x70, 0x04, 0x1f, 0x90, 0x0a, 0xf1, 0xcd, 0x32, 0xec, 0x81, 0x1f,
-  0x04, 0x56, 0x2a, 0x7d, 0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc,
-  0x05, 0x4e, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xb0, 0x8c,
-  0x0e, 0x37, 0x04, 0x2a, 0x03, 0x06, 0xb3, 0x0c, 0x7c, 0xd0, 0x07, 0x81,
-  0x0d, 0xad, 0x02, 0x9f, 0x59, 0x02, 0x51, 0x30, 0x56, 0x21, 0xe2, 0x33,
-  0x4b, 0x20, 0x0a, 0xc3, 0x11, 0xa7, 0xd0, 0x2a, 0xc2, 0x37, 0xcb, 0xf0,
-  0x07, 0xa2, 0x10, 0x18, 0x2a, 0xb8, 0x4a, 0x7c, 0x2c, 0x70, 0xe8, 0x73,
-  0xc1, 0x30, 0x17, 0x38, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14,
-  0x71, 0x33, 0x3a, 0xdc, 0x10, 0xd4, 0x0c, 0x18, 0xcc, 0x32, 0x80, 0x42,
-  0x28, 0x04, 0x66, 0x2b, 0x43, 0x7c, 0x66, 0x09, 0x44, 0xc1, 0x88, 0x5c,
-  0x81, 0xcf, 0x2c, 0x81, 0x28, 0x0c, 0xb4, 0x38, 0x1a, 0x1f, 0x60, 0x7d,
-  0x40, 0x80, 0x82, 0x10, 0x0a, 0x64, 0xe1, 0x07, 0x17, 0x0c, 0x63, 0xb8,
-  0xc2, 0x2b, 0xf1, 0x19, 0x8e, 0xa0, 0x85, 0x5e, 0x21, 0xbe, 0x59, 0x86,
-  0x51, 0x30, 0x85, 0xc0, 0x7c, 0xa5, 0x16, 0xe2, 0x63, 0xc1, 0x40, 0x9f,
-  0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7,
-  0x88, 0xb2, 0xd1, 0xe1, 0x86, 0x60, 0x6c, 0xc0, 0x60, 0x96, 0x81, 0x14,
-  0x4a, 0x21, 0xb0, 0xc1, 0x5c, 0xe0, 0x33, 0x4b, 0xa0, 0x0a, 0x36, 0x2e,
-  0x44, 0x7c, 0x66, 0x09, 0x54, 0x61, 0x38, 0xe2, 0x17, 0xc8, 0x45, 0xf8,
-  0x66, 0x19, 0x4e, 0x41, 0x15, 0x02, 0x03, 0x87, 0x72, 0x89, 0x8f, 0x05,
-  0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45,
-  0x10, 0x9f, 0x22, 0xe0, 0x46, 0x87, 0x1b, 0x02, 0xb7, 0x01, 0x83, 0x59,
-  0x06, 0x54, 0x48, 0x85, 0xc0, 0xda, 0x65, 0x88, 0xcf, 0x2c, 0x81, 0x2a,
-  0x18, 0x21, 0x2f, 0xf0, 0x99, 0x25, 0x50, 0x85, 0x81, 0x16, 0x47, 0x23,
-  0x05, 0xac, 0x14, 0x08, 0x54, 0x10, 0x52, 0x81, 0x36, 0x4c, 0xe1, 0x82,
-  0x61, 0x2e, 0x70, 0xea, 0x36, 0xa7, 0xce, 0x57, 0x86, 0xb9, 0xf7, 0x18,
-  0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0xb4, 0xbf, 0x69, 0x1b, 0x95, 0xd9, 0x9b, 0xd1, 0x84, 0x00, 0x18, 0x4d,
-  0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xcb, 0x74, 0xe8, 0x26, 0x21, 0x82,
-  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0x4e, 0xa7, 0x6e, 0x12, 0x22,
-  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x0b, 0x75, 0xec, 0x26, 0x21,
-  0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x50, 0x5e, 0xa7, 0x6e, 0x68,
-  0x26, 0x10, 0x9d, 0xb2, 0x01, 0x9d, 0xd1, 0x84, 0x00, 0xb8, 0xc0, 0xb1,
-  0x59, 0x82, 0x55, 0x18, 0x68, 0x71, 0x4c, 0xc3, 0x0e, 0x54, 0xad, 0x0e,
-  0x58, 0x02, 0x0f, 0x04, 0x55, 0x50, 0xb5, 0x3c, 0x98, 0x65, 0x60, 0x05,
-  0x57, 0xd8, 0x87, 0xe1, 0x08, 0x7f, 0x30, 0x9b, 0xe1, 0xbb, 0x7f, 0x18,
-  0x66, 0xb8, 0x21, 0x88, 0x19, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x91,
-  0x50, 0x9b, 0xe1, 0xab, 0x40, 0xd0, 0x2b, 0x89, 0x61, 0x86, 0x1b, 0x02,
-  0x9a, 0x21, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0x68, 0x05, 0x71, 0x08, 0x0e,
-  0x5f, 0x86, 0xb9, 0x14, 0x19, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0xb4, 0xdb, 0x29, 0x1d, 0xb1, 0x99, 0x9d, 0xd1, 0x84, 0x00, 0x18, 0x4d,
-  0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xcb, 0x77, 0x58, 0xe7, 0x20, 0x82,
-  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0x7e, 0xa7, 0x75, 0x18, 0x22,
-  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x0b, 0x7c, 0x5c, 0x47, 0x22,
-  0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x50, 0xce, 0xa7, 0x75, 0xd8,
-  0x26, 0xd0, 0x9d, 0xbe, 0xc1, 0x9d, 0xd1, 0x84, 0x00, 0xb8, 0xc0, 0xb1,
-  0x59, 0x02, 0x71, 0x18, 0x6e, 0x98, 0x09, 0xf0, 0x01, 0x83, 0x59, 0x86,
-  0x57, 0x80, 0x85, 0xa0, 0xce, 0x06, 0x76, 0xe0, 0x02, 0xa7, 0x46, 0x0c,
-  0x0e, 0x00, 0x04, 0xc1, 0x60, 0x4a, 0x9f, 0xd8, 0xc1, 0x09, 0xd0, 0x19,
-  0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x49, 0x7d, 0x62, 0x27, 0x10, 0x2e,
-  0x18, 0xa6, 0xd4, 0xa6, 0x76, 0xe0, 0x02, 0xa7, 0x46, 0x0c, 0x0e, 0x00,
-  0x04, 0xc1, 0x60, 0x72, 0x1f, 0xdb, 0xf1, 0x89, 0xd2, 0x19, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x83, 0xe9, 0x7d, 0x6c, 0x27, 0x10, 0x2e, 0x18, 0xe6,
-  0x02, 0xa7, 0xee, 0x70, 0xea, 0x66, 0x66, 0x98, 0x23, 0x93, 0x61, 0x8e,
-  0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x40, 0xa3,
-  0x1f, 0xf1, 0xf9, 0x1b, 0xf8, 0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21,
-  0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03,
-  0x04, 0x00, 0x41, 0x30, 0xb8, 0xf6, 0x27, 0x7d, 0x12, 0x22, 0x18, 0x31,
-  0x40, 0x00, 0x10, 0x04, 0x83, 0x8b, 0x7f, 0xd4, 0x27, 0x21, 0x82, 0x11,
-  0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0xfa, 0x67, 0x7d, 0x12, 0x22, 0x18,
-  0x31, 0x50, 0x00, 0x10, 0x04, 0x03, 0x85, 0x84, 0xd4, 0x27, 0x75, 0x82,
-  0xfb, 0xd1, 0x9d, 0xfa, 0x19, 0x4d, 0x08, 0x80, 0x0b, 0x1c, 0x9b, 0x25,
-  0x10, 0x87, 0xe1, 0x06, 0xb8, 0xe0, 0x1f, 0x30, 0x98, 0x65, 0x88, 0x05,
-  0x71, 0x08, 0x6c, 0x6f, 0xfa, 0x26, 0x3e, 0xc3, 0x11, 0x74, 0xe1, 0x37,
-  0xc4, 0x37, 0xcb, 0x20, 0x0b, 0xb5, 0x10, 0xd8, 0xdf, 0xd4, 0x45, 0x7c,
-  0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x81, 0x21, 0x1f,
-  0x2b, 0x82, 0xf8, 0x14, 0x61, 0x42, 0x3a, 0xdc, 0x10, 0x90, 0x10, 0x18,
-  0xcc, 0x32, 0xcc, 0x02, 0x2d, 0x04, 0x36, 0x9c, 0x0e, 0x7c, 0x66, 0x09,
-  0x72, 0xc1, 0x4c, 0x87, 0x88, 0xcf, 0x2c, 0x41, 0x2e, 0x0c, 0x47, 0xfc,
-  0xc5, 0xe9, 0x08, 0xdf, 0x2c, 0x83, 0x2d, 0xe4, 0x42, 0x60, 0xa0, 0x81,
-  0x3a, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94, 0x05,
-  0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0x0c, 0xe9, 0x70, 0x43, 0xf0,
-  0x42, 0x60, 0x30, 0xcb, 0x70, 0x0b, 0xb8, 0x10, 0x18, 0xec, 0x0c, 0xf1,
-  0x99, 0x25, 0xc8, 0x05, 0x23, 0x66, 0x07, 0x3e, 0xb3, 0x04, 0xb9, 0x30,
-  0xd0, 0xe2, 0x68, 0xb3, 0x80, 0xd1, 0x02, 0x71, 0x0b, 0x02, 0x2e, 0xd0,
-  0x4c, 0x2d, 0x5c, 0x30, 0x8c, 0xc9, 0x8e, 0xed, 0xc4, 0x67, 0x38, 0xc2,
-  0x35, 0x6e, 0x87, 0xf8, 0x66, 0x19, 0x74, 0xa1, 0x17, 0x02, 0xc3, 0x9d,
-  0xd7, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x2c,
-  0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x7e, 0x48, 0x87, 0x1b, 0x82,
-  0x1e, 0x02, 0x83, 0x59, 0x86, 0x5d, 0xe0, 0x85, 0xc0, 0x06, 0xf0, 0x81,
-  0xcf, 0x2c, 0x41, 0x38, 0x58, 0xef, 0x10, 0xf1, 0x99, 0x25, 0x08, 0x87,
-  0xe1, 0x88, 0xdc, 0xf0, 0x1d, 0xe1, 0x9b, 0x65, 0xf0, 0x85, 0x70, 0x08,
-  0x4c, 0x37, 0x7e, 0x27, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b,
-  0x9c, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x50, 0x23, 0x1d,
-  0x6e, 0x08, 0xd0, 0x08, 0x0c, 0x66, 0x19, 0x7e, 0x01, 0x1c, 0x02, 0x3b,
-  0x9f, 0x21, 0x3e, 0xb3, 0x04, 0xe1, 0x60, 0x04, 0xfb, 0xc0, 0x67, 0x96,
-  0x20, 0x1c, 0x06, 0x5a, 0x1c, 0x6d, 0x17, 0x30, 0x5e, 0x20, 0x7e, 0x41,
-  0x00, 0x07, 0xd4, 0xe9, 0x85, 0x0b, 0x86, 0xb9, 0xc0, 0xa9, 0xdb, 0x9c,
-  0x3a, 0xdc, 0x19, 0xe6, 0xd2, 0x65, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98,
-  0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xd0, 0xf2, 0xe8, 0x8c, 0x48, 0xa8,
-  0x8e, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61,
-  0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
-  0x2e, 0x50, 0x72, 0xa3, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
-  0xe0, 0x0a, 0xa5, 0x37, 0x4a, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10,
-  0x0c, 0x2e, 0x51, 0x82, 0xa3, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04,
-  0xc1, 0x40, 0x49, 0xa5, 0x37, 0x72, 0xa1, 0x80, 0x8f, 0x7e, 0x48, 0x8f,
-  0x46, 0x13, 0x02, 0xe0, 0x02, 0xc7, 0x66, 0x09, 0xc4, 0x61, 0xa0, 0xc5,
-  0x31, 0x8d, 0x56, 0xf0, 0xc3, 0x80, 0x15, 0x58, 0xe2, 0x15, 0x84, 0x70,
-  0xf0, 0xc3, 0x00, 0x16, 0x66, 0x19, 0xc6, 0xa1, 0x1c, 0xea, 0x63, 0x38,
-  0x42, 0x3f, 0xc0, 0x68, 0xf8, 0x6e, 0x3f, 0x86, 0x19, 0x6e, 0x08, 0x56,
-  0x88, 0x0c, 0x6a, 0x08, 0x74, 0x38, 0x62, 0x3f, 0xc8, 0x68, 0xf8, 0x2a,
-  0x10, 0xf4, 0xfa, 0x63, 0x98, 0xe1, 0x86, 0xc0, 0x85, 0xc8, 0xa0, 0x82,
-  0x41, 0x67, 0x19, 0xc8, 0x21, 0x1f, 0x82, 0x93, 0x9f, 0x61, 0x6e, 0x64,
-  0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x2d, 0x96, 0xfe, 0x88,
-  0x87, 0x5a, 0x69, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61,
-  0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x43, 0x46, 0x0c, 0x10, 0x00, 0x04,
-  0xc1, 0xe0, 0xc2, 0x25, 0x53, 0x3a, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40,
-  0x10, 0x0c, 0xae, 0x5c, 0x3a, 0x25, 0x86, 0x08, 0x46, 0x0c, 0x10, 0x00,
-  0x04, 0xc1, 0xe0, 0xd2, 0x25, 0x54, 0x92, 0x88, 0x60, 0xc4, 0x40, 0x01,
-  0x40, 0x10, 0x0c, 0x94, 0x70, 0x3a, 0x25, 0x33, 0x0a, 0x68, 0xe9, 0x8e,
-  0x64, 0x69, 0x34, 0x21, 0x00, 0x2e, 0x70, 0x6c, 0x96, 0x20, 0x1f, 0x86,
-  0x1b, 0x5a, 0x44, 0x97, 0xc0, 0x60, 0x96, 0xc1, 0x1c, 0xce, 0x21, 0xa8,
-  0x30, 0x52, 0x25, 0xb8, 0xc0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30,
-  0x98, 0xc6, 0x69, 0x95, 0x68, 0x44, 0x8f, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x60, 0x22, 0xa7, 0x55, 0x0a, 0x84, 0x0b, 0x86, 0x29, 0x32, 0x7a,
-  0x25, 0xb8, 0xc0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0xd0,
-  0x09, 0x96, 0x6c, 0xe4, 0x8f, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60,
-  0x4a, 0x27, 0x58, 0x0a, 0x84, 0x0b, 0x86, 0xb9, 0xc0, 0xa9, 0x3b, 0x9c,
-  0xba, 0x16, 0x1a, 0xe6, 0x7c, 0x66, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98,
-  0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xd0, 0xdc, 0x89, 0x97, 0xf2, 0x48,
-  0x9d, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61,
-  0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
-  0xae, 0x7a, 0x1a, 0xa7, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
-  0xe0, 0xb2, 0x27, 0x72, 0x4a, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10,
-  0x0c, 0xae, 0x7b, 0x2a, 0xa7, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04,
-  0xc1, 0x40, 0xf1, 0x27, 0x72, 0x1a, 0xa5, 0x20, 0x9e, 0x68, 0xe9, 0x9d,
-  0x46, 0x13, 0x02, 0xe0, 0x02, 0xc7, 0x66, 0x09, 0xf2, 0x61, 0xb8, 0x41,
-  0x4d, 0xec, 0x09, 0x0c, 0x66, 0x19, 0xd0, 0x21, 0x1f, 0x02, 0xab, 0xa3,
-  0x3b, 0x8a, 0xcf, 0x70, 0x04, 0x9c, 0xe0, 0x11, 0xf1, 0xcd, 0x32, 0xa4,
-  0x03, 0x3b, 0x04, 0x96, 0x47, 0x71, 0x12, 0x1f, 0x0b, 0x06, 0xfa, 0x5c,
-  0x30, 0xcc, 0x05, 0x4e, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45,
-  0x80, 0x94, 0x0e, 0x37, 0x04, 0xfe, 0x04, 0x06, 0xb3, 0x0c, 0xea, 0xb0,
-  0x0e, 0x81, 0x0d, 0xa1, 0x04, 0x9f, 0x59, 0x02, 0x78, 0x30, 0x50, 0x22,
-  0xe2, 0x33, 0x4b, 0x00, 0x0f, 0xc3, 0x11, 0x7b, 0x12, 0x4a, 0xc2, 0x37,
-  0xcb, 0xd0, 0x0e, 0xf0, 0x10, 0x18, 0x9f, 0x88, 0x52, 0x7c, 0x2c, 0x70,
-  0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82,
-  0xf8, 0x14, 0xb1, 0x52, 0x3a, 0xdc, 0x10, 0xa4, 0x14, 0x18, 0xcc, 0x32,
-  0xb8, 0xc3, 0x3b, 0x04, 0xa6, 0x4a, 0x43, 0x7c, 0x66, 0x09, 0xe0, 0xc1,
-  0x88, 0x56, 0x82, 0xcf, 0x2c, 0x01, 0x3c, 0x0c, 0xb4, 0x38, 0x9a, 0x3a,
-  0x60, 0xeb, 0x40, 0xb8, 0x83, 0xf0, 0x0e, 0x2c, 0xc5, 0x0e, 0x17, 0x0c,
-  0x63, 0xac, 0x04, 0x4b, 0xf1, 0x19, 0x8e, 0x30, 0x95, 0x58, 0x22, 0xbe,
-  0x59, 0x86, 0x78, 0xa0, 0x87, 0xc0, 0x64, 0xe9, 0x54, 0xe2, 0x63, 0xc1,
-  0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11,
-  0xc4, 0xa7, 0x88, 0x9c, 0xd2, 0xe1, 0x86, 0xe0, 0xa6, 0xc0, 0x60, 0x96,
-  0x41, 0x1e, 0xe6, 0x21, 0xb0, 0x41, 0x97, 0xe0, 0x33, 0x4b, 0x80, 0x0f,
-  0x76, 0x4b, 0x44, 0x7c, 0x66, 0x09, 0xf0, 0x61, 0x38, 0x22, 0x56, 0x70,
-  0x49, 0xf8, 0x66, 0x19, 0xea, 0x01, 0x1f, 0x02, 0x93, 0x95, 0x5c, 0x8a,
-  0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x2c, 0x88, 0xe4,
-  0x63, 0x45, 0x10, 0x9f, 0x22, 0xc8, 0x4a, 0x87, 0x1b, 0x02, 0xb1, 0x02,
-  0x83, 0x59, 0x06, 0x7b, 0xb8, 0x87, 0xc0, 0xc2, 0x69, 0x88, 0xcf, 0x2c,
-  0x01, 0x3e, 0x18, 0x61, 0x4e, 0xf0, 0x99, 0x25, 0xc0, 0x87, 0x81, 0x16,
-  0x47, 0x93, 0x07, 0x6c, 0x1e, 0x08, 0x7b, 0x10, 0xee, 0x01, 0xaf, 0xe8,
-  0xe1, 0x82, 0x61, 0x2e, 0x70, 0xea, 0x36, 0xa7, 0x4e, 0x96, 0x86, 0xb9,
-  0xf1, 0x19, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40,
-  0x10, 0x0c, 0xb4, 0xb9, 0x0a, 0x2b, 0x7f, 0x7a, 0xab, 0xd1, 0x84, 0x00,
-  0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22,
-  0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x4b, 0xaf, 0xd0, 0x2a,
-  0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0xf6, 0x2a, 0xad,
-  0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x8b, 0xaf, 0xd4,
-  0x2a, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x50, 0x46, 0x2b,
-  0xad, 0x50, 0x2a, 0xb0, 0xab, 0x9c, 0xa2, 0xab, 0xd1, 0x84, 0x00, 0xb8,
-  0xc0, 0xb1, 0x59, 0x82, 0x7c, 0x18, 0x68, 0x71, 0x4c, 0x83, 0x1c, 0xe4,
-  0x34, 0x18, 0x07, 0x96, 0x30, 0x07, 0x01, 0x1f, 0xe4, 0x34, 0x38, 0x87,
-  0x59, 0x06, 0x7d, 0xe0, 0x87, 0x77, 0x19, 0x8e, 0x90, 0x17, 0x9d, 0x1a,
-  0xbe, 0x9b, 0x97, 0x61, 0x86, 0x1b, 0x82, 0x92, 0x22, 0x83, 0x1a, 0x02,
-  0x1d, 0x8e, 0xa8, 0x17, 0x9f, 0x1a, 0xbe, 0x0a, 0x04, 0xbd, 0x7b, 0x19,
-  0x66, 0xb8, 0x21, 0x40, 0x29, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x86, 0x7d,
-  0x80, 0x89, 0xe0, 0xd8, 0x69, 0x98, 0xeb, 0x9f, 0x61, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0x40, 0x5b, 0xad, 0xbc, 0xb2, 0xa9, 0xd3, 0x1a, 0x4d,
-  0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62,
-  0x28, 0xe2, 0x90, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0x64, 0x0b,
-  0xb4, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x6b, 0xb6,
-  0x42, 0x8b, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0x68,
-  0x4b, 0xb4, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x03, 0x65,
-  0xb7, 0x42, 0x0b, 0xac, 0x02, 0xd7, 0x8a, 0x2b, 0xd6, 0x1a, 0x4d, 0x08,
-  0x80, 0x0b, 0x1c, 0x9b, 0x25, 0x80, 0x89, 0xe1, 0x86, 0x93, 0xa1, 0x2d,
-  0x30, 0x98, 0x65, 0xe8, 0x07, 0x7f, 0x08, 0x6a, 0xa7, 0x48, 0x0b, 0x2e,
-  0x70, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa6, 0xde, 0x2a, 0x2d,
-  0x96, 0xa1, 0xab, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0x7c, 0xab,
-  0xb4, 0x02, 0xe1, 0x82, 0x61, 0xca, 0xa7, 0x52, 0x0b, 0x2e, 0x70, 0x6a,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x26, 0xf1, 0x52, 0x2d, 0x98, 0xc9,
-  0xab, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0xc6, 0x4b, 0xb5, 0x02,
-  0xe1, 0x82, 0x61, 0x2e, 0x70, 0xea, 0x0e, 0xa7, 0xee, 0xa4, 0x86, 0x39,
-  0x1c, 0x1a, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40,
-  0x10, 0x0c, 0x34, 0xf4, 0xb2, 0xad, 0xb9, 0x22, 0xaf, 0xd1, 0x84, 0x00,
-  0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22,
-  0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xeb, 0xbd, 0x7a, 0x2b,
-  0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0xe0, 0xcb, 0xb7,
-  0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x2b, 0xbe, 0x7e,
-  0x2b, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x50, 0xf0, 0xcb,
-  0xb7, 0xfa, 0x2a, 0x58, 0x2f, 0xd7, 0x4a, 0xaf, 0xd1, 0x84, 0x00, 0xb8,
-  0xc0, 0xb1, 0x59, 0x02, 0x98, 0x18, 0x6e, 0x20, 0x1b, 0xf8, 0x02, 0x83,
-  0x59, 0x86, 0x7f, 0x80, 0x89, 0xc0, 0xde, 0x2a, 0xae, 0xe2, 0x33, 0x1c,
-  0x81, 0x36, 0x72, 0x45, 0x7c, 0xb3, 0x0c, 0x20, 0x31, 0x12, 0x81, 0xcd,
-  0x55, 0xda, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53,
-  0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xfa, 0xa5, 0xc3, 0x0d,
-  0x01, 0x7e, 0x81, 0xc1, 0x2c, 0x43, 0x48, 0x88, 0x44, 0x60, 0xc3, 0x5e,
-  0xc1, 0x67, 0x96, 0xe0, 0x24, 0x4c, 0xaf, 0x88, 0xf8, 0xcc, 0x12, 0x9c,
-  0xc4, 0x70, 0xc4, 0xdc, 0xec, 0x95, 0xf0, 0xcd, 0x32, 0x90, 0xc4, 0x49,
-  0x04, 0x46, 0x37, 0x7c, 0x15, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc,
-  0x05, 0x4e, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x94, 0x98,
-  0x0e, 0x37, 0x04, 0x23, 0x06, 0x06, 0xb3, 0x0c, 0x25, 0x61, 0x12, 0x81,
-  0x91, 0xd6, 0x10, 0x9f, 0x59, 0x82, 0x93, 0x30, 0xe2, 0xb4, 0xe0, 0x33,
-  0x4b, 0x70, 0x12, 0x03, 0x2d, 0x8e, 0x16, 0x12, 0x98, 0x48, 0x10, 0x25,
-  0x21, 0x98, 0x04, 0xbf, 0x8d, 0xc4, 0x05, 0xc3, 0x98, 0x69, 0xa9, 0x56,
-  0x7c, 0x86, 0x23, 0x40, 0x67, 0xb5, 0x88, 0x6f, 0x96, 0x01, 0x25, 0x56,
-  0x22, 0x30, 0xd6, 0x0a, 0x9d, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61,
-  0x2e, 0x70, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x62, 0xc6,
-  0x74, 0xb8, 0x21, 0x88, 0x31, 0x30, 0x98, 0x65, 0x48, 0x09, 0x95, 0x08,
-  0x6c, 0xa0, 0x2d, 0xf8, 0xcc, 0x12, 0xbc, 0x84, 0xc5, 0x16, 0x11, 0x9f,
-  0x59, 0x82, 0x97, 0x18, 0x8e, 0x58, 0x1d, 0xd9, 0x12, 0xbe, 0x59, 0x06,
-  0x96, 0x78, 0x89, 0xc0, 0x58, 0x67, 0xb6, 0xe2, 0x63, 0x81, 0x43, 0x9f,
-  0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7,
-  0x08, 0x1f, 0xd3, 0xe1, 0x86, 0x80, 0xc7, 0xc0, 0x60, 0x96, 0xa1, 0x25,
-  0x5c, 0x22, 0xb0, 0xdd, 0x1a, 0xe2, 0x33, 0x4b, 0xf0, 0x12, 0x46, 0x80,
-  0x17, 0x7c, 0x66, 0x09, 0x5e, 0x62, 0xa0, 0xc5, 0xd1, 0x52, 0x02, 0x53,
-  0x09, 0xa2, 0x25, 0x04, 0x97, 0x60, 0xbb, 0x95, 0xb8, 0x60, 0x98, 0x0b,
-  0x9c, 0xba, 0xcd, 0xa9, 0x63, 0xad, 0x61, 0xae, 0x97, 0x86, 0x39, 0x62,
-  0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xad, 0xcd,
-  0x76, 0x0c, 0xbf, 0xd2, 0x6c, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60,
-  0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10,
-  0x00, 0x04, 0xc1, 0xe0, 0xa2, 0x33, 0x31, 0x4b, 0x88, 0x60, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0xae, 0x3a, 0x1b, 0xb3, 0x84, 0x08, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0xe0, 0xb2, 0x33, 0x32, 0x4b, 0x88, 0x60, 0xc4,
-  0x40, 0x01, 0x40, 0x10, 0x0c, 0x94, 0x3e, 0x1b, 0x33, 0x11, 0x0b, 0xe0,
-  0x6c, 0xc6, 0xdc, 0x6c, 0x34, 0x21, 0x00, 0x2e, 0x70, 0x6c, 0x96, 0x00,
-  0x26, 0x06, 0x5a, 0x1c, 0xd3, 0xd8, 0x07, 0x33, 0x0e, 0xf4, 0x81, 0x25,
-  0xfa, 0x41, 0x78, 0x09, 0x33, 0x0e, 0xfc, 0x61, 0xc4, 0xc0, 0x00, 0x40,
-  0x10, 0x0c, 0x9e, 0x3f, 0xf3, 0xb1, 0x77, 0x32, 0xfb, 0x80, 0x97, 0xf8,
-  0x98, 0x10, 0xc8, 0xc7, 0x02, 0x79, 0x81, 0x8f, 0x15, 0xff, 0x10, 0x1f,
-  0x2b, 0x02, 0xf9, 0x58, 0x10, 0x12, 0xf0, 0x19, 0x31, 0x30, 0x00, 0x10,
-  0x04, 0x83, 0xc7, 0xd4, 0xca, 0xac, 0x9e, 0x4c, 0x28, 0xe2, 0x63, 0x81,
-  0x20, 0x1f, 0x0b, 0x0e, 0xf8, 0x5c, 0x30, 0xd0, 0x88, 0x81, 0x03, 0x80,
-  0x20, 0x18, 0x30, 0xac, 0x96, 0x66, 0x34, 0xd6, 0x62, 0xa0, 0x16, 0xb8,
-  0x99, 0x9b, 0xb9, 0xd9, 0x98, 0xf9, 0xd9, 0x2c, 0xc1, 0x08, 0x0d, 0x37,
-  0xf8, 0x95, 0xa8, 0x81, 0xc1, 0x2c, 0x83, 0x4c, 0x8c, 0x50, 0x30, 0x62,
-  0x60, 0x00, 0x20, 0x08, 0x06, 0x0f, 0xab, 0xb1, 0x19, 0x3f, 0x59, 0xd0,
-  0x63, 0xf0, 0x19, 0x31, 0x30, 0x00, 0x10, 0x04, 0x83, 0xc7, 0xd5, 0xdc,
-  0xac, 0x9f, 0x2c, 0xf8, 0x31, 0xf8, 0x8c, 0x26, 0xd0, 0xd8, 0x30, 0xdc,
-  0x10, 0x98, 0x1a, 0x18, 0xcc, 0x32, 0xcc, 0x44, 0x4d, 0x04, 0xc3, 0x11,
-  0xc5, 0x99, 0x0d, 0xdf, 0x19, 0xc3, 0x0c, 0x37, 0x04, 0x32, 0x46, 0x06,
-  0x35, 0x04, 0x3a, 0x1c, 0x71, 0xac, 0xd9, 0xf0, 0x55, 0x20, 0xe8, 0x25,
-  0xc3, 0x0c, 0x37, 0x04, 0x35, 0x46, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0x40,
-  0x13, 0x69, 0x11, 0x5c, 0x7e, 0x0d, 0x73, 0x2a, 0x35, 0xcc, 0x88, 0xc1,
-  0x01, 0x80, 0x20, 0x18, 0x68, 0xb8, 0x66, 0x6a, 0x63, 0x46, 0x6b, 0xa3,
-  0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40,
-  0x0c, 0x45, 0x1c, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd7, 0xaf,
-  0xb5, 0xda, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0x81,
-  0x9b, 0xab, 0x31, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x57,
-  0xb8, 0xbd, 0x9a, 0x44, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xa0,
-  0xa0, 0x9b, 0xab, 0xb5, 0x59, 0xb0, 0x6b, 0x7e, 0x96, 0x6b, 0xa3, 0x09,
-  0x01, 0x70, 0x81, 0x63, 0xb3, 0x04, 0x69, 0x31, 0xdc, 0x90, 0x85, 0x1b,
-  0x18, 0xcc, 0x32, 0xd8, 0xc4, 0x4d, 0x04, 0x85, 0x66, 0xb1, 0x06, 0x17,
-  0x38, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x93, 0xba, 0xc9, 0xda,
-  0x17, 0x6a, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30, 0xad, 0x9b, 0xac,
-  0x05, 0xc2, 0x05, 0xc3, 0xd4, 0x9a, 0xd9, 0x1a, 0x5c, 0xe0, 0xd4, 0x88,
-  0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0xef, 0x76, 0x6b, 0x62, 0x60, 0x6a,
-  0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30, 0xc1, 0xdb, 0xad, 0x05, 0xc2,
-  0x05, 0xc3, 0x5c, 0xe0, 0xd4, 0x1d, 0x4e, 0x1d, 0x8d, 0x0d, 0x73, 0x65,
-  0x35, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20,
-  0x18, 0x68, 0xf5, 0x36, 0x6e, 0xa0, 0x16, 0x6f, 0xa3, 0x09, 0x01, 0x30,
-  0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24,
-  0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x17, 0xbf, 0xa9, 0x5b, 0x42,
-  0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xf5, 0xdb, 0xba, 0x25,
-  0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x97, 0xbf, 0xb1, 0x5b,
-  0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xa0, 0x94, 0xdc, 0xba,
-  0xa9, 0x5a, 0x80, 0x6f, 0xbb, 0x66, 0x6f, 0xa3, 0x09, 0x01, 0x70, 0x81,
-  0x63, 0xb3, 0x04, 0x69, 0x31, 0xdc, 0x60, 0x07, 0xfd, 0x06, 0x06, 0xb3,
-  0x0c, 0x38, 0x91, 0x16, 0x81, 0xf1, 0x99, 0x9f, 0xc5, 0x67, 0x38, 0x62,
-  0x0f, 0xfe, 0x8c, 0xf8, 0x66, 0x19, 0x72, 0x82, 0x27, 0x02, 0x03, 0x35,
-  0x3e, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x2c,
-  0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x4e, 0x4e, 0x87, 0x1b, 0x82,
-  0x92, 0x03, 0x83, 0x59, 0x06, 0x9d, 0xd8, 0x89, 0xc0, 0x06, 0x54, 0x83,
-  0xcf, 0x2c, 0x01, 0x58, 0xd8, 0xa9, 0x11, 0xf1, 0x99, 0x25, 0x00, 0x8b,
-  0xe1, 0x08, 0x53, 0x40, 0x35, 0xe1, 0x9b, 0x65, 0xe8, 0x09, 0xb0, 0x08,
-  0xec, 0x14, 0x52, 0x2d, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b,
-  0x9c, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x90, 0x39, 0x1d,
-  0x6e, 0x08, 0x60, 0x0e, 0x0c, 0x66, 0x19, 0x7c, 0xe2, 0x27, 0x02, 0x8b,
-  0xb5, 0x21, 0x3e, 0xb3, 0x04, 0x60, 0x61, 0x04, 0xad, 0xc1, 0x67, 0x96,
-  0x00, 0x2c, 0x06, 0x5a, 0x1c, 0x4d, 0x27, 0xb0, 0x9d, 0x20, 0x7c, 0x42,
-  0xf8, 0x09, 0xb1, 0xe0, 0x89, 0x0b, 0x86, 0xb1, 0x59, 0xbb, 0xb5, 0xf8,
-  0x0c, 0x47, 0xc8, 0x02, 0xae, 0x11, 0xdf, 0x2c, 0x43, 0x58, 0x90, 0x45,
-  0x60, 0xb9, 0x36, 0x0b, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c,
-  0xe0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0xd8, 0xe9,
-  0x70, 0x43, 0xe0, 0x73, 0x60, 0x30, 0xcb, 0x20, 0x16, 0x63, 0x11, 0xd8,
-  0x10, 0x6e, 0xf0, 0x99, 0x25, 0x40, 0x0b, 0xf3, 0x35, 0x22, 0x3e, 0xb3,
-  0x04, 0x68, 0x31, 0x1c, 0xd1, 0x0b, 0xbf, 0x26, 0x7c, 0xb3, 0x0c, 0x65,
-  0x81, 0x16, 0x81, 0xf9, 0x02, 0xb8, 0xc5, 0xc7, 0x02, 0x87, 0x3e, 0x17,
-  0x0c, 0x73, 0x81, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11,
-  0x6b, 0xa7, 0xc3, 0x0d, 0x41, 0xda, 0x81, 0xc1, 0x2c, 0x83, 0x59, 0x9c,
-  0x45, 0x60, 0xe8, 0x36, 0xc4, 0x67, 0x96, 0x00, 0x2d, 0x8c, 0x68, 0x37,
-  0xf8, 0xcc, 0x12, 0xa0, 0xc5, 0x40, 0x8b, 0xa3, 0x89, 0x05, 0x36, 0x16,
-  0x84, 0x59, 0x08, 0x67, 0x01, 0x1b, 0x64, 0x71, 0xc1, 0x30, 0x17, 0x38,
-  0x75, 0x9b, 0x53, 0x97, 0x6b, 0xc3, 0x9c, 0x7a, 0x0d, 0x73, 0xc4, 0x30,
-  0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x9a, 0xde, 0xa1,
-  0x5d, 0xc9, 0xd9, 0xdd, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68,
-  0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0xc1, 0x15, 0x7a, 0x6f, 0x97, 0x10, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x5c, 0xa2, 0x07, 0x77, 0x09, 0x11, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0xc1, 0x35, 0x7a, 0x71, 0x97, 0x10, 0xc1, 0x88, 0x81,
-  0x02, 0x80, 0x20, 0x18, 0x28, 0xaa, 0x07, 0x77, 0x2f, 0x17, 0xf4, 0x1d,
-  0xd8, 0xed, 0xdd, 0x68, 0x42, 0x00, 0x5c, 0xe0, 0xd8, 0x2c, 0x41, 0x5a,
-  0x0c, 0xb4, 0x38, 0xa6, 0x41, 0x13, 0xa0, 0x1e, 0xcc, 0x04, 0x4b, 0xd8,
-  0x84, 0x80, 0x16, 0xa0, 0x1e, 0xdc, 0xc4, 0x2c, 0x83, 0x5a, 0xb0, 0xc5,
-  0x3e, 0x0c, 0x47, 0x80, 0x44, 0xd8, 0x0d, 0xdf, 0x85, 0xc4, 0x30, 0xc3,
-  0x0d, 0x01, 0xcb, 0x91, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0x84, 0x44, 0xd9,
-  0x0d, 0x5f, 0x05, 0x82, 0xde, 0x48, 0x0c, 0x33, 0xdc, 0x10, 0xbc, 0x1c,
-  0x19, 0x54, 0x30, 0xe8, 0x2c, 0xc3, 0x5a, 0x80, 0x46, 0x70, 0xf3, 0x36,
-  0xcc, 0x91, 0xd8, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0, 0xc9,
-  0x1e, 0xe8, 0xf5, 0x9c, 0xeb, 0x8d, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10,
-  0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x5c, 0xb9, 0x77, 0x7a, 0x07, 0x11, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0xc1, 0xa5, 0x7b, 0xa8, 0xc7, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xbb, 0x97, 0x7a, 0x12, 0x11, 0x8c,
-  0x18, 0x28, 0x00, 0x08, 0x82, 0x81, 0x22, 0x7e, 0xa8, 0x77, 0x76, 0x41,
-  0xed, 0xe1, 0xdd, 0xec, 0x8d, 0x26, 0x04, 0xc0, 0x05, 0x8e, 0xcd, 0x12,
-  0x80, 0xc6, 0x70, 0xc3, 0x4c, 0xec, 0x1e, 0x18, 0xcc, 0x32, 0xb4, 0x85,
-  0x5b, 0x04, 0x25, 0x76, 0xab, 0x07, 0x17, 0x38, 0x35, 0x62, 0x70, 0x00,
-  0x20, 0x08, 0x06, 0x13, 0xf9, 0xb1, 0x9e, 0x4e, 0xec, 0xdd, 0x88, 0xc1,
-  0x01, 0x80, 0x20, 0x18, 0x4c, 0xe5, 0xc7, 0x7a, 0x81, 0x70, 0xc1, 0x30,
-  0x55, 0x76, 0xb0, 0x07, 0x17, 0x38, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x53, 0xfa, 0xc5, 0x1e, 0x4f, 0x80, 0xde, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x4c, 0xea, 0x17, 0x7b, 0x81, 0x70, 0xc1, 0x30, 0x17, 0x38,
-  0x75, 0x87, 0x53, 0xe7, 0x72, 0xc3, 0xdc, 0x8f, 0x0d, 0x73, 0xc4, 0x30,
-  0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xda, 0xfb, 0xf5,
-  0x9e, 0xde, 0xad, 0xdf, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68,
-  0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0xc1, 0x65, 0x7f, 0xe4, 0x97, 0x10, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x5c, 0xf7, 0x57, 0x7e, 0x09, 0x11, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0xc1, 0x85, 0x7f, 0xe6, 0x97, 0x10, 0xc1, 0x88, 0x81,
-  0x02, 0x80, 0x20, 0x18, 0x28, 0xff, 0x57, 0x7e, 0xa4, 0x17, 0xc8, 0x5f,
-  0xed, 0xc1, 0xdf, 0x68, 0x42, 0x00, 0x5c, 0xe0, 0xd8, 0x2c, 0x01, 0x68,
-  0x0c, 0x37, 0xc0, 0xc5, 0xfd, 0x81, 0xc1, 0x2c, 0xc3, 0x5b, 0x80, 0x46,
-  0x60, 0x76, 0x87, 0x77, 0xf1, 0x19, 0x8e, 0xb0, 0x8b, 0xbc, 0x23, 0xbe,
-  0x59, 0x06, 0xb8, 0x98, 0x8b, 0xc0, 0xf4, 0xee, 0x2e, 0xe2, 0x63, 0xc1,
-  0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11,
-  0xc4, 0xa7, 0x88, 0x10, 0x0c, 0x74, 0xb8, 0x21, 0xf8, 0x3f, 0x30, 0x98,
-  0x65, 0x88, 0x0b, 0xb9, 0x08, 0x6c, 0x10, 0x3d, 0xf8, 0xcc, 0x12, 0xdc,
-  0x85, 0x85, 0x1e, 0x11, 0x9f, 0x59, 0x82, 0xbb, 0x18, 0x8e, 0x08, 0x0d,
-  0xd1, 0x13, 0xbe, 0x59, 0x06, 0xba, 0xb8, 0x8b, 0xc0, 0x44, 0x63, 0xf4,
-  0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b, 0x22,
-  0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x16, 0x0c, 0x74, 0xb8, 0x21, 0x50,
-  0xc1, 0x00, 0x0c, 0x66, 0x19, 0xea, 0xc2, 0x2e, 0x02, 0x5b, 0xbd, 0x21,
-  0x3e, 0xb3, 0x04, 0x77, 0x61, 0x84, 0xeb, 0xc1, 0x67, 0x96, 0xe0, 0x2e,
-  0x06, 0x5a, 0x1c, 0x2d, 0x2e, 0x30, 0xb9, 0x20, 0xea, 0x42, 0xb0, 0x0b,
-  0x9c, 0x99, 0x8b, 0x0b, 0x86, 0xb1, 0xd6, 0x8b, 0xbd, 0xf8, 0x0c, 0x47,
-  0xb0, 0x86, 0xec, 0x11, 0xdf, 0x2c, 0x03, 0x5e, 0xec, 0x45, 0x60, 0xb3,
-  0xd7, 0x1a, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94,
-  0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84, 0x0e, 0x06, 0x3a, 0xdc,
-  0x10, 0xe0, 0x60, 0x00, 0x06, 0xb3, 0x0c, 0x79, 0xa1, 0x17, 0x81, 0x0d,
-  0xbb, 0x07, 0x9f, 0x59, 0x82, 0xbf, 0x30, 0xdc, 0x23, 0xe2, 0x33, 0x4b,
-  0xf0, 0x17, 0xc3, 0x11, 0xb7, 0x91, 0x7b, 0xc2, 0x37, 0xcb, 0xc0, 0x17,
-  0x7f, 0x11, 0x18, 0x6e, 0xe8, 0x5e, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1,
-  0x30, 0x17, 0x38, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x51,
-  0x86, 0x81, 0x0e, 0x37, 0x04, 0x63, 0x18, 0x80, 0xc1, 0x2c, 0x43, 0x5f,
-  0xf8, 0x45, 0x60, 0xe2, 0x37, 0xc4, 0x67, 0x96, 0xe0, 0x2f, 0x8c, 0x38,
-  0x3f, 0xf8, 0xcc, 0x12, 0xfc, 0xc5, 0x40, 0x8b, 0xa3, 0xe5, 0x05, 0xa6,
-  0x17, 0x44, 0x5f, 0x08, 0x7e, 0x41, 0x3a, 0x7b, 0x71, 0xc1, 0x30, 0x17,
-  0x38, 0x75, 0x9b, 0x53, 0x37, 0x7b, 0xc3, 0x1c, 0xb9, 0x0d, 0x73, 0xc4,
-  0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x1a, 0x1d,
-  0x06, 0x62, 0x18, 0xfc, 0x1f, 0x1c, 0x06, 0xa3, 0x09, 0x01, 0x30, 0x9a,
-  0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd7, 0x1e, 0x06, 0x69, 0x18, 0x24,
-  0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x17, 0x1f, 0x06, 0x6a,
-  0x18, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x57, 0x1f,
-  0x06, 0x6b, 0x18, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06,
-  0x0a, 0x29, 0x06, 0x6a, 0x18, 0xa4, 0x60, 0x10, 0xdc, 0x61, 0xa0, 0x83,
-  0x41, 0x1d, 0x06, 0xa3, 0x09, 0x01, 0x70, 0x81, 0x63, 0xb3, 0x04, 0xa0,
-  0x31, 0xd0, 0xe2, 0x98, 0xc6, 0x5a, 0xd0, 0xa1, 0xa0, 0x16, 0x2c, 0xd1,
-  0x16, 0xc2, 0x5f, 0xd0, 0xa1, 0xe0, 0x16, 0xa6, 0x1f, 0x3a, 0x18, 0xc0,
-  0x67, 0x96, 0x21, 0x34, 0x46, 0xc3, 0x3e, 0x86, 0x23, 0x02, 0x1e, 0x0c,
-  0x86, 0xef, 0x84, 0x61, 0x86, 0x1b, 0x82, 0x13, 0x0c, 0xc8, 0xa0, 0x86,
-  0x40, 0x87, 0x23, 0xf8, 0x03, 0x0c, 0x83, 0xe1, 0xab, 0x40, 0xd0, 0xf3,
-  0x8f, 0x61, 0x86, 0x1b, 0x02, 0x15, 0x0c, 0xc8, 0xa0, 0x82, 0x41, 0x67,
-  0x19, 0x44, 0xe3, 0x36, 0x82, 0x73, 0xbf, 0x61, 0xee, 0xdf, 0x86, 0x19,
-  0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xad, 0x15, 0x83, 0x3d, 0x0c, 0x70,
-  0x30, 0x48, 0xc5, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34,
-  0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x43, 0x46, 0x0c, 0x10, 0x00,
-  0x04, 0xc1, 0xe0, 0xa2, 0xc5, 0x40, 0x14, 0x83, 0x83, 0x08, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0xe0, 0xaa, 0xc5, 0x60, 0x14, 0x03, 0x86, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xb2, 0xc5, 0x80, 0x14, 0x03,
-  0x89, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x40, 0xe9, 0xc5, 0x60,
-  0x14, 0x03, 0x31, 0x0c, 0x02, 0x58, 0x0c, 0xe6, 0x30, 0x70, 0xc5, 0x60,
-  0x34, 0x21, 0x00, 0x2e, 0x70, 0x6c, 0x96, 0xe0, 0x36, 0x86, 0x1b, 0x5c,
-  0xc4, 0x16, 0x03, 0x30, 0x98, 0x65, 0x20, 0x8d, 0xd2, 0x08, 0xaa, 0x07,
-  0x03, 0x53, 0x0c, 0xe0, 0x02, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
-  0x60, 0xfa, 0xc5, 0xe0, 0x14, 0x83, 0xcd, 0x0e, 0x83, 0x11, 0x83, 0x03,
-  0x00, 0x41, 0x30, 0x98, 0xc0, 0x31, 0x38, 0xc5, 0x20, 0x10, 0x2e, 0x18,
-  0xa6, 0xc0, 0x30, 0x58, 0xc5, 0x00, 0x2e, 0x70, 0x6a, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0x26, 0x72, 0x0c, 0x58, 0x31, 0xb8, 0x91, 0x3d, 0x0c,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x2a, 0xc7, 0x80, 0x15, 0x83,
-  0x40, 0xb8, 0x60, 0x98, 0x0b, 0x9c, 0xba, 0xc3, 0xa9, 0x4b, 0xc1, 0x60,
-  0x98, 0xd3, 0xb9, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0x40, 0x53, 0xc7, 0x00, 0x17, 0x83, 0x3a, 0x0c, 0xcc,
-  0x31, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84,
-  0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0xb8, 0xe2, 0x31, 0xf8, 0xc5, 0x20, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0xb8, 0xe4, 0x31, 0x00, 0xc7, 0x20, 0x21, 0x82, 0x11, 0x03,
-  0x04, 0x00, 0x41, 0x30, 0xb8, 0xe6, 0x31, 0x08, 0xc7, 0x20, 0x21, 0x82,
-  0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x50, 0xf4, 0x31, 0x00, 0xc7, 0xe0,
-  0x0f, 0x83, 0xa0, 0x1d, 0x03, 0x58, 0x0c, 0xd6, 0x31, 0x18, 0x4d, 0x08,
-  0x80, 0x0b, 0x1c, 0x9b, 0x25, 0xb8, 0x8d, 0xe1, 0x86, 0x35, 0x91, 0xc7,
-  0x00, 0x0c, 0x66, 0x19, 0x4c, 0xe3, 0x36, 0x02, 0x8b, 0xc3, 0x60, 0x0e,
-  0x83, 0xf8, 0x0c, 0x47, 0xdc, 0x01, 0x1d, 0x06, 0xc4, 0x37, 0xcb, 0x70,
-  0x1a, 0xaa, 0x11, 0x58, 0x1d, 0x06, 0x78, 0x10, 0x1f, 0x0b, 0x06, 0xfa,
-  0x5c, 0x30, 0xcc, 0x05, 0x4e, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e,
-  0x45, 0xf0, 0x63, 0xa0, 0xc3, 0x0d, 0x81, 0x3e, 0x06, 0x60, 0x30, 0xcb,
-  0x80, 0x1a, 0xa9, 0x11, 0xd8, 0xd0, 0x87, 0x01, 0x7c, 0x66, 0x09, 0x5c,
-  0xc3, 0xf8, 0x30, 0x20, 0xe2, 0x33, 0x4b, 0xe0, 0x1a, 0xc3, 0x11, 0xa2,
-  0xd0, 0x87, 0x81, 0xf0, 0xcd, 0x32, 0xac, 0x86, 0x6b, 0x04, 0x36, 0x0a,
-  0x7e, 0x18, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53,
-  0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x27, 0x19, 0xe8, 0x70,
-  0x43, 0x50, 0x92, 0x01, 0x18, 0xcc, 0x32, 0xb0, 0x46, 0x6b, 0x04, 0x66,
-  0x8a, 0xc1, 0x10, 0x9f, 0x59, 0x02, 0xd7, 0x30, 0x22, 0x15, 0x03, 0xf8,
-  0xcc, 0x12, 0xb8, 0xc6, 0x40, 0x8b, 0xa3, 0xa1, 0x06, 0x96, 0x1a, 0x04,
-  0x6b, 0x08, 0xad, 0xa1, 0x13, 0xaa, 0x71, 0xc1, 0x30, 0x86, 0x8a, 0x01,
-  0x2b, 0x06, 0xf1, 0x19, 0x8e, 0x38, 0x95, 0x56, 0x0c, 0x88, 0x6f, 0x96,
-  0xe1, 0x35, 0x64, 0x23, 0x30, 0x57, 0x0c, 0x50, 0x25, 0x3e, 0x16, 0x0c,
-  0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41,
-  0x7c, 0x8a, 0xa8, 0xc9, 0x40, 0x87, 0x1b, 0x82, 0x99, 0x0c, 0xc0, 0x60,
-  0x96, 0x01, 0x36, 0x62, 0x23, 0xb0, 0xc1, 0x16, 0x03, 0xf8, 0xcc, 0x12,
-  0xd8, 0x86, 0xcd, 0x62, 0x40, 0xc4, 0x67, 0x96, 0xc0, 0x36, 0x86, 0x23,
-  0x64, 0x85, 0x16, 0x03, 0xe1, 0x9b, 0x65, 0x98, 0x0d, 0xdb, 0x08, 0x6c,
-  0x56, 0x6a, 0x31, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02,
-  0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xc0, 0x32, 0xd0,
-  0xe1, 0x86, 0xc0, 0x27, 0x03, 0x30, 0x98, 0x65, 0xa0, 0x8d, 0xda, 0x08,
-  0xac, 0x17, 0x83, 0x21, 0x3e, 0xb3, 0x04, 0xb6, 0x61, 0x84, 0x38, 0x06,
-  0xf0, 0x99, 0x25, 0xb0, 0x8d, 0x81, 0x16, 0x47, 0x83, 0x0d, 0x2c, 0x36,
-  0x08, 0xda, 0x10, 0x6a, 0x43, 0xaf, 0x64, 0xe3, 0x82, 0x61, 0x2e, 0x70,
-  0xea, 0x36, 0xa7, 0xce, 0x15, 0x83, 0x61, 0xee, 0xf7, 0x86, 0x39, 0x62,
-  0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xed, 0x2d,
-  0x83, 0x9e, 0x0c, 0xf4, 0x31, 0x58, 0xcb, 0x60, 0x34, 0x21, 0x00, 0x46,
-  0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xb2, 0xcb, 0x80, 0x2c, 0x83,
-  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xba, 0xcb, 0xa0,
-  0x2c, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xc2,
-  0xcb, 0xc0, 0x2c, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1,
-  0x40, 0xf9, 0xcb, 0xa0, 0x2c, 0x03, 0x92, 0x0c, 0x02, 0xb9, 0x0c, 0x6a,
-  0x32, 0x80, 0xcb, 0x60, 0x34, 0x21, 0x00, 0x2e, 0x70, 0x6c, 0x96, 0xe0,
-  0x36, 0x06, 0x5a, 0x1c, 0xd3, 0x10, 0x0d, 0x35, 0x15, 0x42, 0x83, 0x25,
-  0x48, 0x43, 0xb0, 0x0d, 0x35, 0x15, 0x4a, 0xc3, 0xea, 0x65, 0x25, 0x03,
-  0xf8, 0xcc, 0x32, 0xe0, 0x86, 0x6e, 0xc4, 0xcb, 0x70, 0x44, 0x70, 0x93,
-  0xc1, 0xf0, 0x9d, 0x30, 0xcc, 0x70, 0x43, 0x20, 0x92, 0x01, 0x19, 0xd4,
-  0x10, 0xe8, 0x70, 0xc4, 0xbd, 0xec, 0x64, 0x30, 0x7c, 0x15, 0x08, 0x7a,
-  0xf9, 0x32, 0xcc, 0x70, 0x43, 0x50, 0x92, 0x01, 0x19, 0x54, 0x30, 0xe8,
-  0x2c, 0x43, 0x6e, 0xb8, 0x47, 0x70, 0xe9, 0x18, 0x0c, 0x73, 0xfa, 0x37,
-  0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x68, 0xa8, 0x19, 0xd8, 0x65,
-  0x30, 0x93, 0x01, 0x69, 0x06, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04,
-  0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0xd7, 0x6b, 0x06, 0x7d, 0x19, 0x1c, 0x44, 0x30,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x17, 0x6c, 0x06, 0x7e, 0x19, 0x30,
-  0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x57, 0x6c, 0x06, 0x7f,
-  0x19, 0x48, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x0a, 0x6e,
-  0x06, 0x7e, 0x19, 0xf4, 0x64, 0x10, 0xac, 0x66, 0xe0, 0x96, 0x41, 0x6a,
-  0x06, 0xa3, 0x09, 0x01, 0x70, 0x81, 0x63, 0xb3, 0x04, 0xee, 0x31, 0xdc,
-  0x90, 0x32, 0xb1, 0x19, 0x80, 0xc1, 0x2c, 0xc3, 0x6e, 0xf0, 0x46, 0x50,
-  0x38, 0x19, 0x84, 0x66, 0x00, 0x17, 0x38, 0x35, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0x93, 0x6e, 0x06, 0xa2, 0x19, 0x6c, 0x71, 0x19, 0x8c, 0x18,
-  0x1c, 0x00, 0x08, 0x82, 0xc1, 0xb4, 0x9b, 0x81, 0x68, 0x06, 0x81, 0x70,
-  0xc1, 0x30, 0xb5, 0x93, 0x81, 0x69, 0x06, 0x70, 0x81, 0x53, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x30, 0xfd, 0x66, 0x70, 0x9a, 0x81, 0xcc, 0xd8,
-  0x65, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x13, 0x78, 0x06, 0xa7,
-  0x19, 0x04, 0xc2, 0x05, 0xc3, 0x5c, 0xe0, 0xd4, 0x1d, 0x4e, 0x1d, 0x49,
-  0x06, 0xc3, 0x5c, 0x0d, 0x06, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x56, 0x9e, 0xc1, 0x6c, 0x06, 0x70,
-  0x19, 0x84, 0x67, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a,
-  0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0x70, 0xb1, 0x67, 0xa0, 0x9b, 0x41, 0x42, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x70, 0xb5, 0x67, 0xb0, 0x9b, 0x41, 0x42, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xb9, 0x67, 0xc0, 0x9b, 0x41,
-  0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xa0, 0xd4, 0x67, 0xb0,
-  0x9b, 0x81, 0x5e, 0x06, 0x01, 0x7a, 0x06, 0xab, 0x19, 0x98, 0x67, 0x30,
-  0x9a, 0x10, 0x00, 0x17, 0x38, 0x36, 0x4b, 0xe0, 0x1e, 0xc3, 0x0d, 0x66,
-  0xd3, 0x9e, 0x01, 0x18, 0xcc, 0x32, 0xf4, 0x86, 0x7b, 0x04, 0xc6, 0x96,
-  0x81, 0x5b, 0x06, 0xf1, 0x19, 0x8e, 0xb8, 0x83, 0xb7, 0x0c, 0x88, 0x6f,
-  0x96, 0xc1, 0x37, 0xc2, 0x23, 0x30, 0xb8, 0x0c, 0xf0, 0x20, 0x3e, 0x16,
-  0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2, 0xc0, 0x90, 0x8f, 0x15,
-  0x41, 0x7c, 0x8a, 0xb8, 0xcf, 0x40, 0x87, 0x1b, 0x82, 0xfa, 0x0c, 0xc0,
-  0x60, 0x96, 0xe1, 0x37, 0xc0, 0x23, 0xb0, 0x01, 0x2f, 0x03, 0xf8, 0xcc,
-  0x12, 0x94, 0x87, 0xdd, 0x65, 0x40, 0xc4, 0x67, 0x96, 0xa0, 0x3c, 0x86,
-  0x23, 0x44, 0x01, 0x2f, 0x03, 0xe1, 0x9b, 0x65, 0x10, 0x8f, 0xf2, 0x08,
-  0x6c, 0x14, 0xf2, 0x32, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6,
-  0x02, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x44, 0x34,
-  0xd0, 0xe1, 0x86, 0x00, 0x44, 0x03, 0x30, 0x98, 0x65, 0x18, 0x0f, 0xf2,
-  0x08, 0x2c, 0x34, 0x83, 0x21, 0x3e, 0xb3, 0x04, 0xe5, 0x61, 0x04, 0x69,
-  0x06, 0xf0, 0x99, 0x25, 0x28, 0x8f, 0x81, 0x16, 0x47, 0xfb, 0x0d, 0x0c,
-  0x3c, 0x88, 0xf1, 0x10, 0xc8, 0x43, 0x27, 0xc2, 0xe3, 0x82, 0x61, 0x6c,
-  0x34, 0x83, 0xd3, 0x0c, 0xe2, 0x33, 0x1c, 0x21, 0x3a, 0xa8, 0x19, 0x10,
-  0xdf, 0x2c, 0x83, 0x79, 0xa4, 0x47, 0x60, 0xa9, 0x19, 0x8c, 0x4e, 0x7c,
-  0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x81, 0x21, 0x1f,
-  0x2b, 0x82, 0xf8, 0x14, 0x01, 0xa3, 0x81, 0x0e, 0x37, 0x04, 0x2e, 0x1a,
-  0x80, 0xc1, 0x2c, 0xc3, 0x79, 0xa0, 0x47, 0x60, 0x43, 0x6c, 0x06, 0xf0,
-  0x99, 0x25, 0x68, 0x0f, 0x73, 0xcd, 0x80, 0x88, 0xcf, 0x2c, 0x41, 0x7b,
-  0x0c, 0x47, 0xb4, 0xce, 0x6b, 0x06, 0xc2, 0x37, 0xcb, 0xa0, 0x1e, 0xed,
-  0x11, 0x98, 0xeb, 0xc0, 0x66, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30,
-  0xcc, 0x05, 0x4e, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xec,
-  0x68, 0xa0, 0xc3, 0x0d, 0x41, 0x8e, 0x06, 0x60, 0x30, 0xcb, 0xb0, 0x1e,
-  0xec, 0x11, 0x18, 0x6e, 0x06, 0x43, 0x7c, 0x66, 0x09, 0xda, 0xc3, 0x88,
-  0xde, 0x0c, 0xe0, 0x33, 0x4b, 0xd0, 0x1e, 0x03, 0x2d, 0x8e, 0x76, 0x1e,
-  0x18, 0x7a, 0x10, 0xeb, 0x21, 0xb0, 0x07, 0xdc, 0xa5, 0xc7, 0x05, 0xc3,
-  0x5c, 0xe0, 0xd4, 0x6d, 0x4e, 0x5d, 0x6a, 0x06, 0xc3, 0x9c, 0x2e, 0x06,
-  0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82,
-  0x81, 0xa6, 0xa6, 0x01, 0x8e, 0x06, 0xf5, 0x19, 0x98, 0x69, 0x30, 0x9a,
-  0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4,
-  0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xc5, 0x69,
-  0xf0, 0xa3, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70,
-  0xc9, 0x69, 0x00, 0xa6, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0x70, 0xcd, 0x69, 0x10, 0xa6, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a,
-  0x00, 0x82, 0x60, 0xa0, 0xe8, 0x69, 0x00, 0xa6, 0xc1, 0x7f, 0x06, 0x41,
-  0x9b, 0x06, 0x30, 0x1a, 0xac, 0x69, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x38,
-  0x36, 0x4b, 0xe0, 0x1e, 0x03, 0x2d, 0x8e, 0x69, 0xe4, 0x06, 0x18, 0x0b,
-  0xb8, 0xc1, 0x12, 0xbb, 0x21, 0xb4, 0x07, 0x18, 0x0b, 0xbc, 0x31, 0xcb,
-  0xf0, 0x1e, 0xf1, 0xb1, 0x3e, 0xc3, 0x11, 0xef, 0x13, 0xa3, 0xc1, 0xf0,
-  0x1d, 0xfc, 0x0c, 0x33, 0xdc, 0x10, 0xf0, 0x67, 0x40, 0x06, 0x35, 0x04,
-  0x3a, 0x1c, 0x21, 0x3f, 0x35, 0x1a, 0x0c, 0x5f, 0x05, 0x82, 0x1e, 0xfd,
-  0x0c, 0x33, 0xdc, 0x10, 0xfc, 0x67, 0x40, 0x06, 0x15, 0x0c, 0x3a, 0xcb,
-  0x00, 0x1f, 0x25, 0x12, 0xdc, 0x78, 0x06, 0xc3, 0x1c, 0x3d, 0x06, 0xc3,
-  0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x26, 0xaa, 0x01, 0x9c, 0x06,
-  0x2d, 0x1a, 0xf8, 0x69, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30,
-  0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0x70, 0xa5, 0x6a, 0x70, 0xa7, 0xc1, 0x41, 0x04, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xa9, 0x6a, 0x80, 0xa7, 0x01, 0x43,
-  0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xad, 0x6a, 0x90, 0xa7,
-  0x81, 0x44, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xa0, 0xc8, 0x6a,
-  0x80, 0xa7, 0xc1, 0x8d, 0x06, 0x41, 0xa9, 0x06, 0x68, 0x1a, 0x8c, 0x6a,
-  0x30, 0x9a, 0x10, 0x00, 0x17, 0x38, 0x36, 0x4b, 0x50, 0x22, 0xc3, 0x0d,
-  0x23, 0xb4, 0xaa, 0x01, 0x18, 0xcc, 0x32, 0xc8, 0xc7, 0x7c, 0x04, 0x25,
-  0xa3, 0xc1, 0x9e, 0x06, 0x70, 0x81, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82,
-  0x60, 0x30, 0xd1, 0x6a, 0xc0, 0xa7, 0x41, 0x0a, 0xad, 0x69, 0x30, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x53, 0xad, 0x06, 0x7c, 0x1a, 0x04, 0xc2,
-  0x05, 0xc3, 0x54, 0x8d, 0x06, 0xa0, 0x1a, 0xc0, 0x05, 0x4e, 0x8d, 0x18,
-  0x1c, 0x00, 0x08, 0x82, 0xc1, 0x94, 0xab, 0x41, 0xa8, 0x06, 0x2d, 0x04,
-  0xa7, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0xba, 0x1a, 0x84,
-  0x6a, 0x10, 0x08, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x77, 0x38, 0x75, 0xfe,
-  0x19, 0x0c, 0x73, 0x2f, 0x19, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xda, 0xaf, 0x06, 0xad, 0x1a, 0xa8,
-  0x69, 0xb0, 0xab, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68,
-  0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0xc1, 0x65, 0xae, 0x01, 0xad, 0x06, 0x09, 0x11, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0xc1, 0x75, 0xae, 0x41, 0xad, 0x06, 0x09, 0x11,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x85, 0xae, 0x81, 0xad, 0x06,
-  0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0x81, 0xf2, 0xae, 0x41,
-  0xad, 0x06, 0x74, 0x1a, 0x04, 0xe2, 0x1a, 0x94, 0x6a, 0x00, 0xae, 0xc1,
-  0x68, 0x42, 0x00, 0x5c, 0xe0, 0xd8, 0x2c, 0x41, 0x89, 0x0c, 0x37, 0x80,
-  0xd1, 0xb9, 0x06, 0x60, 0x30, 0xcb, 0x40, 0x1f, 0x25, 0x12, 0x98, 0x99,
-  0x06, 0x68, 0x1a, 0xc4, 0x67, 0x38, 0xa2, 0x8c, 0xd2, 0x34, 0x20, 0xbe,
-  0x59, 0x86, 0xfa, 0xc0, 0x8f, 0xc0, 0xd4, 0x34, 0x30, 0xa3, 0xf8, 0x58,
-  0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70, 0xca, 0x02, 0x43, 0x3e, 0x56,
-  0x04, 0xf1, 0x29, 0x22, 0x5e, 0x03, 0x1d, 0x6e, 0x08, 0xde, 0x35, 0x00,
-  0x83, 0x59, 0x06, 0xfb, 0xb8, 0x8f, 0xc0, 0x06, 0x39, 0x0d, 0xe0, 0x33,
-  0x4b, 0xc0, 0x1f, 0x16, 0xa7, 0x01, 0x11, 0x9f, 0x59, 0x02, 0xfe, 0x18,
-  0x8e, 0x80, 0x23, 0x39, 0x0d, 0x84, 0x6f, 0x96, 0x21, 0x3f, 0xf8, 0x23,
-  0xb0, 0x38, 0x9a, 0xd3, 0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98,
-  0x0b, 0x9c, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xe0, 0xd7,
-  0x40, 0x87, 0x1b, 0x02, 0x7d, 0x0d, 0xc0, 0x60, 0x96, 0x41, 0x3f, 0xf6,
-  0x23, 0xb0, 0x3d, 0x0d, 0x86, 0xf8, 0xcc, 0x12, 0xf0, 0x87, 0x11, 0x7e,
-  0x1a, 0xc0, 0x67, 0x96, 0x80, 0x3f, 0x06, 0x5a, 0x1c, 0xcd, 0x3e, 0xb0,
-  0xfb, 0x20, 0xf4, 0x43, 0xd8, 0x0f, 0x7b, 0x0c, 0xf0, 0xe3, 0x82, 0x61,
-  0xac, 0x4f, 0x83, 0x50, 0x0d, 0xe2, 0x33, 0x1c, 0xd1, 0x47, 0xa2, 0x1a,
-  0x10, 0xdf, 0x2c, 0x43, 0x7f, 0x80, 0x48, 0x60, 0xa3, 0x1a, 0xf8, 0x51,
-  0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x81, 0x21,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xa1, 0xb2, 0x81, 0x0e, 0x37, 0x04, 0x28,
-  0x1b, 0x80, 0xc1, 0x2c, 0x83, 0x7f, 0xfc, 0x47, 0x60, 0xc3, 0xaa, 0x06,
-  0xf0, 0x99, 0x25, 0x20, 0x11, 0x43, 0xd5, 0x80, 0x88, 0xcf, 0x2c, 0x01,
-  0x89, 0x0c, 0x47, 0xa0, 0x52, 0xaa, 0x06, 0xc2, 0x37, 0xcb, 0x10, 0x22,
-  0x24, 0x12, 0x58, 0x2a, 0xa9, 0x6a, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c,
-  0x30, 0xcc, 0x05, 0x4e, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45,
-  0xd4, 0x6c, 0xa0, 0xc3, 0x0d, 0xc1, 0xcc, 0x06, 0x60, 0x30, 0xcb, 0x20,
-  0x22, 0x23, 0x12, 0x98, 0xac, 0x06, 0x43, 0x7c, 0x66, 0x09, 0x48, 0xc4,
-  0x88, 0x5b, 0x0d, 0xe0, 0x33, 0x4b, 0x40, 0x22, 0x03, 0x2d, 0x8e, 0xe6,
-  0x1f, 0xd8, 0x7f, 0x10, 0x22, 0x22, 0x8c, 0x88, 0x59, 0x06, 0x20, 0x72,
-  0xc1, 0x30, 0x17, 0x38, 0x75, 0x9b, 0x53, 0x37, 0xaa, 0xc1, 0x30, 0x47,
-  0x9b, 0xc1, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00,
-  0x82, 0x60, 0xa0, 0x91, 0x6d, 0x20, 0xb3, 0xc1, 0xbb, 0x06, 0x60, 0x1b,
-  0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68,
-  0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c,
-  0x6b, 0x1b, 0xe4, 0x6c, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x5c, 0x6c, 0x1b, 0xe8, 0x6c, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x5c, 0x6d, 0x1b, 0xec, 0x6c, 0x90, 0x10, 0xc1, 0x88,
-  0x81, 0x02, 0x80, 0x20, 0x18, 0x28, 0x74, 0x1b, 0xe8, 0x6c, 0x90, 0xaf,
-  0x41, 0x70, 0xb6, 0x81, 0xca, 0x06, 0x65, 0x1b, 0x8c, 0x26, 0x04, 0xc0,
-  0x05, 0x8e, 0xcd, 0x12, 0x94, 0xc8, 0x40, 0x8b, 0x63, 0x1a, 0xf0, 0x41,
-  0xe3, 0xc2, 0x7b, 0xb0, 0x84, 0x7c, 0x08, 0x24, 0x42, 0xe3, 0xc2, 0x7c,
-  0xcc, 0x32, 0x98, 0x08, 0x8a, 0x94, 0xd3, 0x70, 0x84, 0x3a, 0xad, 0x6c,
-  0x30, 0x7c, 0xb7, 0x4e, 0xc3, 0x0c, 0x37, 0x04, 0xf6, 0x1a, 0x90, 0x41,
-  0x0d, 0x81, 0x0e, 0x47, 0xb0, 0xd3, 0xcb, 0x06, 0xc3, 0x57, 0x81, 0xa0,
-  0xe7, 0x4e, 0xc3, 0x0c, 0x37, 0x04, 0xf9, 0x1a, 0x90, 0x41, 0x05, 0x83,
-  0xce, 0x32, 0x9c, 0x08, 0x8f, 0x04, 0xd7, 0xab, 0xc1, 0x30, 0xe7, 0x9e,
-  0xc1, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0, 0xf1, 0x6d, 0xa0,
-  0xb6, 0xc1, 0xc9, 0x06, 0x78, 0x1b, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82,
-  0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xa3, 0x1b, 0xc4, 0x6d, 0x70, 0x10,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xa4, 0x1b, 0xc8, 0x6d,
-  0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xa5, 0x1b,
-  0xcc, 0x6d, 0x20, 0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x28,
-  0xac, 0x1b, 0xc8, 0x6d, 0x10, 0xb3, 0x41, 0xf0, 0xb7, 0x81, 0xd8, 0x06,
-  0x7d, 0x1b, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x8e, 0xcd, 0x12, 0xf0, 0xc8,
-  0x70, 0x43, 0x3f, 0x95, 0x6e, 0x00, 0x06, 0xb3, 0x0c, 0x29, 0xa2, 0x22,
-  0x41, 0xb1, 0x6c, 0x50, 0xb7, 0x01, 0x5c, 0xe0, 0xd4, 0x88, 0xc1, 0x01,
-  0x80, 0x20, 0x18, 0x4c, 0xae, 0x1b, 0xd8, 0x6d, 0x40, 0x52, 0x65, 0x1b,
-  0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0xf4, 0xba, 0x81, 0xdd, 0x06,
-  0x81, 0x70, 0xc1, 0x30, 0xf5, 0xb2, 0x81, 0xde, 0x06, 0x70, 0x81, 0x53,
-  0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30, 0xcd, 0x6e, 0xb0, 0xb7, 0xc1,
-  0x49, 0xa9, 0x6d, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x13, 0xed,
-  0x06, 0x7b, 0x1b, 0x04, 0xc2, 0x05, 0xc3, 0x5c, 0xe0, 0xd4, 0x1d, 0x4e,
-  0x1d, 0xbe, 0x06, 0xc3, 0x5c, 0x8a, 0x06, 0xc3, 0x1c, 0x31, 0xcc, 0x11,
-  0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x96, 0xbb, 0xc1, 0xe9,
-  0x06, 0x64, 0x1b, 0xd4, 0x6e, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42,
-  0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x70, 0x81, 0x6f, 0xe0, 0xba, 0x41, 0x42, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0x85, 0x6f, 0xf0, 0xba, 0x41,
-  0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0x89, 0x6f, 0x00,
-  0xbb, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xa0, 0xa4,
-  0x6f, 0xf0, 0xba, 0x81, 0xdb, 0x06, 0x01, 0xef, 0x06, 0x7f, 0x1b, 0xe8,
-  0x6e, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x38, 0x36, 0x4b, 0xc0, 0x23, 0xc3,
-  0x0d, 0x3a, 0x15, 0xbe, 0x01, 0x18, 0xcc, 0x32, 0xac, 0x08, 0x8f, 0x04,
-  0x06, 0xb6, 0x81, 0xd8, 0x06, 0xf1, 0x19, 0x8e, 0x00, 0xab, 0xb1, 0x0d,
-  0x88, 0x6f, 0x96, 0x81, 0x45, 0x5e, 0x24, 0x30, 0xb2, 0x0d, 0xc2, 0x2a,
-  0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2, 0xc0, 0x90,
-  0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x58, 0xdf, 0x40, 0x87, 0x1b, 0x82, 0xf4,
-  0x0d, 0xc0, 0x60, 0x96, 0xa1, 0x45, 0x5c, 0x24, 0xb0, 0x81, 0x6d, 0x03,
-  0xf8, 0xcc, 0x12, 0xcc, 0x88, 0xad, 0x6d, 0x40, 0xc4, 0x67, 0x96, 0x60,
-  0x46, 0x86, 0x23, 0xd6, 0x8a, 0x6d, 0x03, 0xe1, 0x9b, 0x65, 0x80, 0x91,
-  0x19, 0x09, 0x8c, 0xad, 0xda, 0x36, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e,
-  0x18, 0xe6, 0x02, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22,
-  0xec, 0x37, 0xd0, 0xe1, 0x86, 0x80, 0x7e, 0x03, 0x30, 0x98, 0x65, 0x88,
-  0x11, 0x19, 0x09, 0xac, 0x6e, 0x83, 0x21, 0x3e, 0xb3, 0x04, 0x33, 0x62,
-  0x04, 0xde, 0x06, 0xf0, 0x99, 0x25, 0x98, 0x91, 0x81, 0x16, 0x47, 0x6b,
-  0x11, 0xcc, 0x45, 0x88, 0x18, 0x11, 0x64, 0x84, 0x5d, 0x83, 0x17, 0xb9,
-  0x60, 0x18, 0xbb, 0xdb, 0x60, 0x6f, 0x83, 0xf8, 0x0c, 0x47, 0xdc, 0x15,
-  0xdf, 0x06, 0xc4, 0x37, 0xcb, 0x40, 0x23, 0x37, 0x12, 0x58, 0xdf, 0x06,
-  0x78, 0x15, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4e, 0x59,
-  0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x90, 0x70, 0xa0, 0xc3, 0x0d,
-  0x81, 0x08, 0x07, 0x60, 0x30, 0xcb, 0x50, 0x23, 0x36, 0x12, 0xd8, 0x50,
-  0xba, 0x01, 0x7c, 0x66, 0x09, 0x76, 0xc4, 0x44, 0x37, 0x20, 0xe2, 0x33,
-  0x4b, 0xb0, 0x23, 0xc3, 0x11, 0xa2, 0x35, 0xba, 0x81, 0xf0, 0xcd, 0x32,
-  0xe0, 0xc8, 0x8e, 0x04, 0x36, 0x5a, 0xa4, 0x1b, 0xc4, 0xc7, 0x02, 0x87,
-  0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88,
-  0x4f, 0x11, 0x2f, 0x1c, 0xe8, 0x70, 0x43, 0xd0, 0xc2, 0x01, 0x18, 0xcc,
-  0x32, 0xe4, 0x88, 0x8e, 0x04, 0xc6, 0xba, 0xc1, 0x10, 0x9f, 0x59, 0x82,
-  0x1d, 0x31, 0x22, 0x76, 0x03, 0xf8, 0xcc, 0x12, 0xec, 0xc8, 0x40, 0x8b,
-  0xa3, 0xd5, 0x08, 0x66, 0x23, 0x44, 0x8e, 0x08, 0x3a, 0xa2, 0xb3, 0xc1,
-  0x8d, 0x5c, 0x30, 0xcc, 0x05, 0x4e, 0xdd, 0xe6, 0xd4, 0xf5, 0x6d, 0x30,
-  0xcc, 0xb9, 0x6a, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1,
-  0x01, 0x80, 0x20, 0x18, 0x68, 0x3e, 0x1c, 0xb0, 0x70, 0x90, 0xbe, 0x81,
-  0x0e, 0x07, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83,
-  0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x57, 0x19, 0x07, 0x33, 0x1c, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x97, 0x19, 0x07, 0x34, 0x1c, 0x24, 0x44, 0x30, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0xd7, 0x19, 0x07, 0x35, 0x1c, 0x24, 0x44,
-  0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x8a, 0x1b, 0x07, 0x34, 0x1c,
-  0xcc, 0x6f, 0x10, 0x84, 0x71, 0x40, 0xc2, 0xc1, 0x0f, 0x07, 0xa3, 0x09,
-  0x01, 0x70, 0x81, 0x63, 0xb3, 0x04, 0x3c, 0x32, 0xd0, 0xe2, 0x98, 0xc6,
-  0x89, 0xa0, 0xe0, 0x60, 0x22, 0x2c, 0x91, 0x22, 0xc2, 0x8e, 0xa0, 0xe0,
-  0xa0, 0x22, 0xb3, 0x0c, 0x3d, 0xf2, 0x23, 0xbf, 0x35, 0x1c, 0xa1, 0x3e,
-  0x25, 0x1c, 0x0c, 0xdf, 0xad, 0xcf, 0x30, 0xc3, 0x0d, 0x01, 0xfc, 0x06,
-  0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0xe6, 0x95, 0xc2, 0xc1, 0xf0, 0x55,
-  0x20, 0xe8, 0xa1, 0xd7, 0x30, 0xc3, 0x0d, 0xc1, 0xfc, 0x06, 0x64, 0x50,
-  0xc1, 0xa0, 0xb3, 0x0c, 0x3e, 0x32, 0x27, 0xc1, 0xdd, 0x6e, 0x30, 0xcc,
-  0xa1, 0x6b, 0x30, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x68, 0x76,
-  0x1c, 0x90, 0x71, 0x10, 0xc2, 0x81, 0x1c, 0x07, 0xa3, 0x09, 0x01, 0x30,
-  0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x1c,
-  0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x57, 0x1f, 0x07, 0x6b, 0x1c,
-  0x1c, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x97, 0x1f, 0x07,
-  0x6c, 0x1c, 0x30, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd7,
-  0x1f, 0x07, 0x6d, 0x1c, 0x48, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08,
-  0x06, 0x8a, 0x29, 0x07, 0x6c, 0x1c, 0xac, 0x70, 0x10, 0xe4, 0x71, 0xc0,
-  0xc3, 0xc1, 0x1d, 0x07, 0xa3, 0x09, 0x01, 0x70, 0x81, 0x63, 0xb3, 0x04,
-  0x73, 0x32, 0xdc, 0x70, 0x5f, 0x7f, 0x1c, 0x80, 0xc1, 0x2c, 0x03, 0x98,
-  0x84, 0x49, 0x50, 0x26, 0x1c, 0xbc, 0x71, 0x00, 0x17, 0x38, 0x35, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x13, 0x2a, 0x07, 0x70, 0x1c, 0x90, 0xd0,
-  0x0f, 0x07, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30, 0xa5, 0x72, 0x00,
-  0xc7, 0x41, 0x20, 0x5c, 0x30, 0x4c, 0xa5, 0x70, 0x40, 0xc7, 0x01, 0x5c,
-  0xe0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0xad, 0x1c, 0xd4,
-  0x71, 0x10, 0x62, 0x64, 0x1c, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1,
-  0xe4, 0xca, 0x41, 0x1d, 0x07, 0x81, 0x70, 0xc1, 0x30, 0x17, 0x38, 0x75,
-  0x87, 0x53, 0x27, 0xbf, 0xc1, 0x30, 0x37, 0xb2, 0xc1, 0x30, 0x47, 0x0c,
-  0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0, 0xcd, 0x72,
-  0x10, 0xca, 0x81, 0x0f, 0x07, 0xaf, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x68,
-  0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xba, 0x1c, 0xa0, 0x72, 0x90,
-  0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xbb, 0x1c, 0xa4,
-  0x72, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xbc,
-  0x1c, 0xa8, 0x72, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18,
-  0x28, 0xe3, 0x1c, 0xa4, 0x72, 0x80, 0xc6, 0x41, 0x60, 0xcb, 0x41, 0x1e,
-  0x07, 0xb4, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x8e, 0xcd, 0x12, 0xcc,
-  0xc9, 0x70, 0x03, 0x8d, 0xed, 0x72, 0x00, 0x06, 0xb3, 0x0c, 0x62, 0x32,
-  0x27, 0x81, 0xe9, 0x70, 0xc0, 0xc3, 0x41, 0x7c, 0x86, 0x23, 0xc0, 0xa8,
-  0x87, 0x03, 0xe2, 0x9b, 0x65, 0x18, 0x13, 0x33, 0x09, 0xcc, 0x87, 0x83,
-  0x30, 0x8a, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x2c,
-  0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xca, 0x39, 0xd0, 0xe1, 0x86,
-  0x60, 0x9c, 0x03, 0x30, 0x98, 0x65, 0x20, 0x93, 0x32, 0x09, 0x6c, 0x30,
-  0xe3, 0x00, 0x3e, 0xb3, 0x04, 0x6a, 0x62, 0x65, 0x1c, 0x10, 0xf1, 0x99,
-  0x25, 0x50, 0x93, 0xe1, 0x88, 0x35, 0x32, 0xe3, 0x40, 0xf8, 0x66, 0x19,
-  0xce, 0x44, 0x4d, 0x02, 0x63, 0xa3, 0x33, 0x0e, 0xe2, 0x63, 0x81, 0x43,
-  0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4,
-  0xa7, 0x08, 0x78, 0x0e, 0x74, 0xb8, 0x21, 0x70, 0xe7, 0x00, 0x0c, 0x66,
-  0x19, 0xd0, 0x24, 0x4d, 0x02, 0x7b, 0xe3, 0x60, 0x88, 0xcf, 0x2c, 0x81,
-  0x9a, 0x18, 0x21, 0xc7, 0x01, 0x7c, 0x66, 0x09, 0xd4, 0x64, 0xa0, 0xc5,
-  0xd1, 0xc8, 0x04, 0x2b, 0x13, 0x02, 0x4d, 0x84, 0x34, 0x61, 0xc7, 0xc0,
-  0x4c, 0x2e, 0x18, 0xc6, 0xe2, 0x38, 0xa8, 0xe3, 0x20, 0x3e, 0xc3, 0x11,
-  0x71, 0x66, 0xc7, 0x01, 0xf1, 0xcd, 0x32, 0xac, 0x89, 0x9b, 0x04, 0x76,
-  0xc7, 0x81, 0x9c, 0xc5, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81,
-  0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xfe, 0x1c, 0xe8,
-  0x70, 0x43, 0xc0, 0xcf, 0x01, 0x18, 0xcc, 0x32, 0xb0, 0x49, 0x9b, 0x04,
-  0x36, 0xfc, 0x71, 0x00, 0x9f, 0x59, 0x02, 0x39, 0x31, 0x3e, 0x0e, 0x88,
-  0xf8, 0xcc, 0x12, 0xc8, 0xc9, 0x70, 0x04, 0x9f, 0xf5, 0x71, 0x20, 0x7c,
-  0xb3, 0x0c, 0x6f, 0x22, 0x27, 0x81, 0xf5, 0x99, 0x1f, 0x07, 0xf1, 0xb1,
-  0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94, 0x05, 0x91, 0x7c, 0xac,
-  0x08, 0xe2, 0x53, 0x44, 0x4a, 0x07, 0x3a, 0xdc, 0x10, 0x9c, 0x74, 0x00,
-  0x06, 0xb3, 0x0c, 0x70, 0x12, 0x27, 0x81, 0x99, 0x72, 0x30, 0xc4, 0x67,
-  0x96, 0x40, 0x4e, 0x8c, 0x58, 0xe5, 0x00, 0x3e, 0xb3, 0x04, 0x72, 0x32,
-  0xd0, 0xe2, 0x68, 0x6c, 0x82, 0xb5, 0x09, 0x01, 0x27, 0x42, 0x9c, 0xb8,
-  0x74, 0xe0, 0x26, 0x17, 0x0c, 0x73, 0x81, 0x53, 0xb7, 0x39, 0x75, 0x77,
-  0x1c, 0x0c, 0x73, 0xa8, 0x1b, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x1a, 0x4e, 0x07, 0x26, 0x1d, 0x8c,
-  0x73, 0x40, 0xd3, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68,
-  0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0xc1, 0xf5, 0xd3, 0x41, 0x4b, 0x07, 0x09, 0x11, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0xc1, 0x05, 0xd6, 0x81, 0x4b, 0x07, 0x09, 0x11,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x15, 0xd6, 0xc1, 0x4b, 0x07,
-  0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0x81, 0x82, 0xd6, 0x81,
-  0x4b, 0x07, 0xed, 0x1c, 0x04, 0x3b, 0x1d, 0xf8, 0x73, 0x90, 0xd3, 0xc1,
-  0x68, 0x42, 0x00, 0x5c, 0xe0, 0xd8, 0x2c, 0xc1, 0x9c, 0x0c, 0xb4, 0x38,
-  0xa6, 0xe1, 0x23, 0xfc, 0x39, 0xf4, 0x08, 0x4b, 0x80, 0x89, 0x20, 0x27,
-  0xfc, 0x39, 0x84, 0xc9, 0x2c, 0x03, 0x9d, 0xd8, 0x49, 0xae, 0x0d, 0x47,
-  0xa4, 0xcf, 0x3f, 0x07, 0xc3, 0x77, 0xea, 0x33, 0xcc, 0x70, 0x43, 0xa0,
-  0xce, 0x01, 0x19, 0xd4, 0x10, 0xe8, 0x70, 0x04, 0xb8, 0x8d, 0x74, 0x30,
-  0x7c, 0x15, 0x08, 0x7a, 0xe2, 0x36, 0xcc, 0x70, 0x43, 0xd0, 0xce, 0x01,
-  0x19, 0x54, 0x30, 0xe8, 0x2c, 0x43, 0x9d, 0xa8, 0x4a, 0x70, 0xb1, 0x1c,
-  0x0c, 0x73, 0xe2, 0x1b, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x1a, 0x5c, 0x07, 0x3e, 0x1d, 0xec, 0x73, 0xc0, 0xd6, 0xc1, 0x68, 0x42,
-  0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43,
-  0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x75, 0xd7, 0x41,
-  0x59, 0x07, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x85,
-  0xd7, 0x81, 0x59, 0x07, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0xc1, 0x95, 0xd7, 0xc1, 0x59, 0x07, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00,
-  0x08, 0x82, 0x81, 0x02, 0xda, 0x81, 0x59, 0x07, 0x25, 0x1d, 0x04, 0x73,
-  0x1d, 0xd8, 0x74, 0x10, 0xd7, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xe0, 0xd8,
-  0x2c, 0x81, 0xaa, 0x0c, 0x37, 0xc4, 0x5b, 0x5e, 0x07, 0x60, 0x30, 0xcb,
-  0x70, 0x27, 0x78, 0x12, 0x14, 0x48, 0x07, 0x69, 0x1d, 0xc0, 0x05, 0x4e,
-  0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x24, 0xda, 0x81, 0x5a, 0x07,
-  0x23, 0x94, 0xd3, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0xa3,
-  0x1d, 0xa8, 0x75, 0x10, 0x08, 0x17, 0x0c, 0x53, 0x23, 0x1d, 0xb8, 0x75,
-  0x00, 0x17, 0x38, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xd3, 0x69,
-  0x07, 0x6f, 0x1d, 0xec, 0x9b, 0x4f, 0x07, 0x23, 0x06, 0x07, 0x00, 0x82,
-  0x60, 0x30, 0xa1, 0x76, 0xf0, 0xd6, 0x41, 0x20, 0x5c, 0x30, 0xcc, 0x05,
-  0x4e, 0xdd, 0xe1, 0xd4, 0xb1, 0x73, 0x30, 0xcc, 0xf5, 0x6f, 0x30, 0xcc,
-  0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x68,
-  0xad, 0x1d, 0xec, 0x75, 0x80, 0xd3, 0x41, 0x6a, 0x07, 0xa3, 0x09, 0x01,
-  0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45,
-  0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x17, 0x6d, 0x07, 0xa2,
-  0x1d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x57, 0x6d,
-  0x07, 0xa3, 0x1d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x97, 0x6d, 0x07, 0xa4, 0x1d, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20,
-  0x08, 0x06, 0x4a, 0x6f, 0x07, 0xa3, 0x1d, 0x88, 0x75, 0x10, 0xc0, 0x76,
-  0x30, 0xd7, 0x81, 0x6b, 0x07, 0xa3, 0x09, 0x01, 0x70, 0x81, 0x63, 0xb3,
-  0x04, 0xaa, 0x32, 0xdc, 0xe0, 0x72, 0xb5, 0x1d, 0x80, 0xc1, 0x2c, 0x43,
-  0x9e, 0xa8, 0x4a, 0x60, 0x34, 0x1d, 0xd8, 0x74, 0x10, 0x9f, 0xe1, 0x88,
-  0x1f, 0xba, 0xe9, 0x80, 0xf8, 0x66, 0x19, 0xf4, 0xa4, 0x4f, 0x02, 0xc3,
-  0xe9, 0x00, 0x8c, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0,
-  0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0xdf, 0x0e, 0x74,
-  0xb8, 0x21, 0xe8, 0xed, 0x00, 0x0c, 0x66, 0x19, 0xf6, 0x84, 0x4f, 0x02,
-  0x1b, 0xc0, 0x3a, 0x80, 0xcf, 0x2c, 0x41, 0xa8, 0xd8, 0x4f, 0x07, 0x44,
-  0x7c, 0x66, 0x09, 0x42, 0x65, 0x38, 0x42, 0x8d, 0xc0, 0x3a, 0x10, 0xbe,
-  0x59, 0x06, 0x3f, 0x09, 0x95, 0xc0, 0xd6, 0x28, 0xac, 0x83, 0xf8, 0x58,
-  0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70, 0xca, 0x82, 0x48, 0x3e, 0x56,
-  0x04, 0xf1, 0x29, 0x42, 0xbd, 0x03, 0x1d, 0x6e, 0x08, 0xd0, 0x3b, 0x00,
-  0x83, 0x59, 0x86, 0x3f, 0x01, 0x95, 0xc0, 0xd2, 0x3a, 0x18, 0xe2, 0x33,
-  0x4b, 0x10, 0x2a, 0x46, 0xb0, 0x75, 0x00, 0x9f, 0x59, 0x82, 0x50, 0x19,
-  0x68, 0x71, 0xb4, 0x3d, 0xc1, 0xf8, 0x84, 0xf8, 0x13, 0x01, 0x54, 0xd4,
-  0x31, 0xe8, 0x93, 0x0b, 0x86, 0xb1, 0xb5, 0x0e, 0xde, 0x3a, 0x88, 0xcf,
-  0x70, 0xc4, 0xda, 0xc1, 0x75, 0x40, 0x7c, 0xb3, 0x0c, 0xa2, 0x52, 0x2a,
-  0x81, 0xc5, 0x75, 0xc0, 0x76, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3,
-  0x5c, 0xe0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0x7e,
-  0x07, 0x3a, 0xdc, 0x10, 0xd8, 0x77, 0x00, 0x06, 0xb3, 0x0c, 0xa3, 0x42,
-  0x2a, 0x81, 0x0d, 0x79, 0x1d, 0xc0, 0x67, 0x96, 0x20, 0x55, 0xcc, 0xae,
-  0x03, 0x22, 0x3e, 0xb3, 0x04, 0xa9, 0x32, 0x1c, 0x61, 0x77, 0x77, 0x1d,
-  0x08, 0xdf, 0x2c, 0x83, 0xa9, 0xa4, 0x4a, 0x60, 0x77, 0x87, 0xd7, 0x41,
-  0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x41, 0x24,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x31, 0xe2, 0x81, 0x0e, 0x37, 0x04, 0x21,
-  0x1e, 0x80, 0xc1, 0x2c, 0xc3, 0xa9, 0xa0, 0x4a, 0x60, 0xa0, 0x1d, 0x0c,
-  0xf1, 0x99, 0x25, 0x48, 0x15, 0x23, 0x4a, 0x3b, 0x80, 0xcf, 0x2c, 0x41,
-  0xaa, 0x0c, 0xb4, 0x38, 0xda, 0xa8, 0x60, 0xa4, 0x42, 0x9c, 0x8a, 0x80,
-  0x2a, 0x22, 0x1f, 0x94, 0xca, 0x05, 0xc3, 0x5c, 0xe0, 0xd4, 0x6d, 0x4e,
-  0x5d, 0x5c, 0x07, 0xc3, 0x9c, 0x28, 0x07, 0xc3, 0x1c, 0x31, 0xcc, 0x11,
-  0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x26, 0xe3, 0x01, 0x88,
-  0x07, 0xbd, 0x1d, 0xb8, 0x78, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42,
-  0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x70, 0xe5, 0x78, 0x70, 0xe2, 0x41, 0x42, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xe9, 0x78, 0x80, 0xe2, 0x41,
-  0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xed, 0x78, 0x90,
-  0xe2, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xa0, 0x88,
-  0x79, 0x80, 0xe2, 0xc1, 0x79, 0x07, 0x41, 0x8d, 0x07, 0xf8, 0x1d, 0xcc,
-  0x78, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x38, 0x36, 0x4b, 0xa0, 0x2a, 0x03,
-  0x2d, 0x8e, 0x69, 0xd4, 0x09, 0xfc, 0x0e, 0x74, 0xc2, 0x12, 0x77, 0x22,
-  0xa4, 0x0a, 0xfc, 0x0e, 0x78, 0x62, 0xb6, 0x87, 0xdf, 0x01, 0x7c, 0x66,
-  0x19, 0x56, 0xa5, 0x55, 0x68, 0x6f, 0x38, 0x02, 0xf7, 0xf4, 0x3b, 0x18,
-  0xbe, 0xcb, 0xbd, 0x61, 0x86, 0x1b, 0x82, 0xf2, 0x0e, 0xc8, 0xa0, 0x86,
-  0x40, 0x87, 0x23, 0x0a, 0xff, 0x0e, 0x86, 0xaf, 0x02, 0x41, 0xef, 0x18,
-  0x66, 0xb8, 0x21, 0x40, 0xef, 0x80, 0x0c, 0x2a, 0x18, 0x74, 0x96, 0x81,
-  0x55, 0xc2, 0x25, 0x38, 0xd6, 0x0e, 0x86, 0xb9, 0x5e, 0x0e, 0x86, 0x19,
-  0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x6d, 0xcd, 0x83, 0x1c, 0x0f, 0xec,
-  0x3b, 0x38, 0xf3, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34,
-  0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x43, 0x46, 0x0c, 0x10, 0x00,
-  0x04, 0xc1, 0xe0, 0x92, 0xf3, 0x00, 0xcc, 0x83, 0x83, 0x08, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0xe0, 0x9a, 0xf3, 0x20, 0xcc, 0x03, 0x86, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xa2, 0xf3, 0x40, 0xcc, 0x03,
-  0x89, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x40, 0xd9, 0xf3, 0x20,
-  0xcc, 0x03, 0x10, 0x0f, 0x02, 0x37, 0x0f, 0x62, 0x3c, 0x60, 0xf3, 0x60,
-  0x34, 0x21, 0x00, 0x2e, 0x70, 0x6c, 0x96, 0x20, 0x5c, 0x86, 0x1b, 0xd8,
-  0x8f, 0xce, 0x03, 0x30, 0x98, 0x65, 0x70, 0x95, 0x57, 0x09, 0x6a, 0xbf,
-  0x03, 0x32, 0x0f, 0xe0, 0x02, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
-  0x60, 0xea, 0xf3, 0xa0, 0xcc, 0x03, 0xf9, 0xa3, 0xf1, 0x60, 0xc4, 0xe0,
-  0x00, 0x40, 0x10, 0x0c, 0x26, 0x3f, 0x0f, 0xca, 0x3c, 0x08, 0x84, 0x0b,
-  0x86, 0x29, 0xff, 0x0e, 0xd2, 0x3c, 0x80, 0x0b, 0x9c, 0x1a, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x83, 0x49, 0xd4, 0x03, 0x35, 0x0f, 0xc0, 0x20, 0xc7,
-  0x83, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0x46, 0x3d, 0x50, 0xf3,
-  0x20, 0x10, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0xee, 0x70, 0xea, 0xce, 0x3b,
-  0x18, 0xe6, 0xf0, 0x39, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4,
-  0xe0, 0x00, 0x40, 0x10, 0x0c, 0x34, 0x54, 0x0f, 0xec, 0x3c, 0x98, 0xf1,
-  0x80, 0xd4, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84,
-  0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0xeb, 0xd5, 0x83, 0x3e, 0x0f, 0x12, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x83, 0x0b, 0xd6, 0x03, 0x3f, 0x0f, 0x12, 0x22, 0x18,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x2b, 0xd6, 0x83, 0x3f, 0x0f, 0x12,
-  0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x03, 0x05, 0xd7, 0x03, 0x3f,
-  0x0f, 0x7a, 0x3c, 0x08, 0x56, 0x3d, 0x70, 0xf3, 0x20, 0xd5, 0x83, 0xd1,
-  0x84, 0x00, 0xb8, 0xc0, 0xb1, 0x59, 0x82, 0x70, 0x19, 0x6e, 0x48, 0xc1,
-  0x00, 0xd6, 0x03, 0x30, 0x98, 0x65, 0x80, 0x95, 0x70, 0x09, 0xec, 0xc5,
-  0x83, 0x18, 0x0f, 0xe2, 0x33, 0x1c, 0xe1, 0x82, 0x81, 0x8c, 0x07, 0xc4,
-  0x37, 0xcb, 0x10, 0x2b, 0xb4, 0x12, 0xd8, 0x8c, 0x07, 0x2f, 0x18, 0xc4,
-  0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x16, 0x18, 0xf2,
-  0xb1, 0x22, 0x88, 0x4f, 0x11, 0xba, 0x1e, 0xe8, 0x70, 0x43, 0x80, 0xeb,
-  0x01, 0x18, 0xcc, 0x32, 0xc8, 0xca, 0xac, 0x04, 0x36, 0xec, 0x78, 0x00,
-  0x9f, 0x59, 0x02, 0x5c, 0x31, 0x1d, 0x0f, 0x88, 0xf8, 0xcc, 0x12, 0xe0,
-  0xca, 0x70, 0x44, 0x0e, 0x06, 0x3b, 0x1e, 0x08, 0xdf, 0x2c, 0x43, 0xad,
-  0xe0, 0x4a, 0x60, 0x3a, 0x18, 0xf0, 0x78, 0x10, 0x1f, 0x0b, 0x1c, 0xfa,
-  0x5c, 0x30, 0xcc, 0x05, 0x4e, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e,
-  0x45, 0x94, 0x7b, 0xa0, 0xc3, 0x0d, 0xc1, 0xb8, 0x07, 0x60, 0x30, 0xcb,
-  0x60, 0x2b, 0xb7, 0x12, 0x18, 0x99, 0x07, 0x43, 0x7c, 0x66, 0x09, 0x70,
-  0xc5, 0x88, 0x33, 0x0f, 0xe0, 0x33, 0x4b, 0x80, 0x2b, 0x03, 0x2d, 0x8e,
-  0x26, 0x2b, 0xd8, 0xac, 0x10, 0xb6, 0x22, 0xdc, 0x0a, 0x2a, 0x0a, 0xb4,
-  0x72, 0xc1, 0x30, 0x66, 0xe6, 0x81, 0x9a, 0x07, 0xf1, 0x19, 0x8e, 0x80,
-  0x85, 0x35, 0x0f, 0x88, 0x6f, 0x96, 0x21, 0x57, 0x78, 0x25, 0x30, 0x36,
-  0x0f, 0x62, 0x21, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c,
-  0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x98, 0xf7, 0x40, 0x87,
-  0x1b, 0x82, 0x78, 0x0f, 0xc0, 0x60, 0x96, 0x41, 0x57, 0x76, 0x25, 0xb0,
-  0x81, 0xce, 0x03, 0xf8, 0xcc, 0x12, 0x80, 0x8b, 0xc5, 0x79, 0x40, 0xc4,
-  0x67, 0x96, 0x00, 0x5c, 0x86, 0x23, 0x76, 0x41, 0xce, 0x03, 0xe1, 0x9b,
-  0x65, 0xe8, 0x15, 0x70, 0x09, 0x8c, 0x17, 0xe6, 0x3c, 0x88, 0x8f, 0x05,
-  0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45,
-  0x10, 0x9f, 0x22, 0xfc, 0x3d, 0xd0, 0xe1, 0x86, 0x80, 0xdf, 0x03, 0x30,
-  0x98, 0x65, 0xf0, 0x95, 0x5f, 0x09, 0x6c, 0xcf, 0x83, 0x21, 0x3e, 0xb3,
-  0x04, 0xe0, 0x62, 0x04, 0xa8, 0x07, 0xf0, 0x99, 0x25, 0x00, 0x97, 0x81,
-  0x16, 0x47, 0xd3, 0x15, 0x6c, 0x57, 0x08, 0x5f, 0x11, 0x7e, 0x85, 0x35,
-  0x78, 0xe5, 0x82, 0x61, 0x2e, 0x70, 0xea, 0x36, 0xa7, 0x8e, 0xcd, 0x83,
-  0x61, 0xae, 0xaf, 0x83, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c,
-  0x0e, 0x00, 0x04, 0xc1, 0x40, 0x6b, 0xf9, 0x60, 0xdf, 0x03, 0x5c, 0x0f,
-  0x52, 0x3e, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18,
-  0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0xb8, 0x68, 0x3e, 0x10, 0xf9, 0x20, 0x21, 0x82, 0x11, 0x03, 0x04,
-  0x00, 0x41, 0x30, 0xb8, 0x6a, 0x3e, 0x18, 0xf9, 0x20, 0x21, 0x82, 0x11,
-  0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0x6c, 0x3e, 0x20, 0xf9, 0x20, 0x21,
-  0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x50, 0x7a, 0x3e, 0x18, 0xf9,
-  0x40, 0xdc, 0x83, 0x00, 0xe6, 0x83, 0x79, 0x0f, 0x5c, 0x3e, 0x18, 0x4d,
-  0x08, 0x80, 0x0b, 0x1c, 0x9b, 0x25, 0x08, 0x97, 0x81, 0x16, 0xc7, 0x34,
-  0x58, 0xc5, 0xbc, 0x87, 0x55, 0x61, 0x09, 0x57, 0x11, 0xc0, 0xc5, 0xbc,
-  0x87, 0x57, 0x99, 0x65, 0x10, 0x17, 0x72, 0x71, 0xc5, 0x60, 0x38, 0x62,
-  0x16, 0x03, 0x7a, 0x0f, 0x86, 0xef, 0x68, 0x31, 0x18, 0x66, 0xb8, 0x21,
-  0xf8, 0xf5, 0x80, 0x0c, 0x6a, 0x08, 0x74, 0x38, 0xe2, 0x1f, 0xf0, 0x3d,
-  0x18, 0xbe, 0x0a, 0x04, 0xbd, 0x90, 0x18, 0x66, 0xb8, 0x21, 0x10, 0xf7,
-  0x80, 0x0c, 0x2a, 0x18, 0x74, 0x96, 0x61, 0x5c, 0xf0, 0x25, 0x38, 0x53,
-  0x0f, 0x86, 0xb9, 0xdb, 0x0e, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x03, 0xad, 0xec, 0x83, 0x99, 0x0f, 0xe0, 0x3d, 0x08, 0xfb, 0x60, 0x34,
-  0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88,
-  0xa1, 0x88, 0x43, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x62, 0xfb,
-  0x40, 0xe7, 0x83, 0x83, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0,
-  0x6a, 0xfb, 0x60, 0xe7, 0x03, 0x86, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04,
-  0xc1, 0xe0, 0x72, 0xfb, 0x80, 0xe7, 0x03, 0x89, 0x08, 0x46, 0x0c, 0x14,
-  0x00, 0x04, 0xc1, 0x40, 0xa9, 0xfb, 0x60, 0xe7, 0x03, 0x7d, 0x0f, 0x02,
-  0xb4, 0x0f, 0x56, 0x3e, 0x30, 0xfb, 0x60, 0x34, 0x21, 0x00, 0x2e, 0x70,
-  0x6c, 0x96, 0x00, 0x5f, 0x86, 0x1b, 0xcc, 0x31, 0x70, 0xfb, 0x00, 0x0c,
-  0x66, 0x19, 0xca, 0xc5, 0x5c, 0x82, 0xaa, 0xf7, 0xc0, 0xe7, 0x03, 0xb8,
-  0xc0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0xee, 0x3e, 0xf8,
-  0xf9, 0xa0, 0x1d, 0x03, 0x97, 0x0f, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
-  0x60, 0xc2, 0xfb, 0xe0, 0xe7, 0x83, 0x40, 0xb8, 0x60, 0x98, 0xc2, 0xf7,
-  0x60, 0xec, 0x03, 0xb8, 0xc0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30,
-  0x98, 0xf8, 0x3e, 0x20, 0xfb, 0x40, 0x27, 0x66, 0x3e, 0x18, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x83, 0xa9, 0xef, 0x03, 0xb2, 0x0f, 0x02, 0xe1, 0x82,
-  0x61, 0x2e, 0x70, 0xea, 0x0e, 0xa7, 0x2e, 0xdc, 0x83, 0x61, 0x4e, 0xbe,
-  0x83, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x40, 0x13, 0xfd, 0x00, 0xee, 0x83, 0x96, 0x0f, 0xfc, 0x3e, 0x18,
-  0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04,
-  0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0x52,
-  0x3f, 0xb8, 0xfb, 0x20, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0xb8, 0x54, 0x3f, 0xc0, 0xfb, 0x20, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0xb8, 0x56, 0x3f, 0xc8, 0xfb, 0x20, 0x21, 0x82, 0x11, 0x03,
-  0x05, 0x00, 0x41, 0x30, 0x50, 0x64, 0x3f, 0xc0, 0xfb, 0xe0, 0xe6, 0x83,
-  0xa0, 0xf4, 0x03, 0xb4, 0x0f, 0x46, 0x3f, 0x18, 0x4d, 0x08, 0x80, 0x0b,
-  0x1c, 0x9b, 0x25, 0xc0, 0x97, 0xe1, 0x86, 0x91, 0x0c, 0x54, 0x3f, 0x00,
-  0x83, 0x59, 0x86, 0x73, 0xc1, 0x97, 0xc0, 0x52, 0x3e, 0x58, 0xf9, 0x20,
-  0x3e, 0xc3, 0x11, 0x29, 0x19, 0xb0, 0x7c, 0x40, 0x7c, 0xb3, 0x0c, 0xe8,
-  0xb2, 0x2e, 0x81, 0xb5, 0x7c, 0xa0, 0x92, 0x41, 0x7c, 0x2c, 0x18, 0xe8,
-  0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8,
-  0x14, 0x41, 0xfb, 0x81, 0x0e, 0x37, 0x04, 0xb2, 0x1f, 0x80, 0xc1, 0x2c,
-  0x43, 0xba, 0xa8, 0x4b, 0x60, 0x43, 0xcd, 0x07, 0xf0, 0x99, 0x25, 0x78,
-  0x17, 0xa3, 0xf9, 0x80, 0x88, 0xcf, 0x2c, 0xc1, 0xbb, 0x0c, 0x47, 0xd0,
-  0x64, 0x50, 0xf3, 0x81, 0xf0, 0xcd, 0x32, 0xb0, 0xcb, 0xbb, 0x04, 0x56,
-  0x93, 0x81, 0xcd, 0x07, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c,
-  0xe0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4, 0xef, 0x07,
-  0x3a, 0xdc, 0x10, 0xf4, 0x7e, 0x00, 0x06, 0xb3, 0x0c, 0xed, 0xe2, 0x2e,
-  0x81, 0xf9, 0x7c, 0x30, 0xc4, 0x67, 0x96, 0xe0, 0x5d, 0x8c, 0x08, 0xfb,
-  0x00, 0x3e, 0xb3, 0x04, 0xef, 0x32, 0xd0, 0xe2, 0x68, 0xe9, 0x82, 0xa9,
-  0x0b, 0xd1, 0x2e, 0x82, 0xbb, 0xf8, 0xa9, 0xb0, 0x2e, 0x17, 0x0c, 0x63,
-  0x60, 0x1f, 0x90, 0x7d, 0x10, 0x9f, 0xe1, 0x08, 0xd5, 0x28, 0xfb, 0x80,
-  0xf8, 0x66, 0x19, 0xe0, 0x65, 0x5e, 0x02, 0x33, 0xfb, 0x60, 0x35, 0xe2,
-  0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b, 0x0c, 0xf9,
-  0x58, 0x11, 0xc4, 0xa7, 0x88, 0xf6, 0x0f, 0x74, 0xb8, 0x21, 0x58, 0xff,
-  0x00, 0x0c, 0x66, 0x19, 0xe2, 0x45, 0x5e, 0x02, 0x1b, 0xdc, 0x3e, 0x80,
-  0xcf, 0x2c, 0xc1, 0xbd, 0xd8, 0xda, 0x07, 0x44, 0x7c, 0x66, 0x09, 0xee,
-  0x65, 0x38, 0xa2, 0x36, 0xd8, 0x3e, 0x10, 0xbe, 0x59, 0x06, 0x7a, 0xb9,
-  0x97, 0xc0, 0x6c, 0xa3, 0xed, 0x83, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82,
-  0x61, 0x2e, 0x70, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x02,
-  0xff, 0x03, 0x1d, 0x6e, 0x08, 0xec, 0x3f, 0x00, 0x83, 0x59, 0x86, 0x7a,
-  0xb1, 0x97, 0xc0, 0xea, 0x3e, 0x18, 0xe2, 0x33, 0x4b, 0x70, 0x2f, 0x46,
-  0xe8, 0x7d, 0x00, 0x9f, 0x59, 0x82, 0x7b, 0x19, 0x68, 0x71, 0xb4, 0x78,
-  0xc1, 0xe4, 0x85, 0xa8, 0x17, 0xc1, 0x5e, 0x40, 0x67, 0x5e, 0x2e, 0x18,
-  0xe6, 0x02, 0xa7, 0x6e, 0x73, 0xea, 0xcc, 0x3e, 0x18, 0xe6, 0xee, 0x3c,
-  0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10,
-  0x0c, 0xb4, 0x13, 0x14, 0xea, 0x3f, 0x90, 0xfd, 0x60, 0x04, 0x85, 0xd1,
-  0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20,
-  0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xcb, 0x05,
-  0x05, 0xfe, 0x0f, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
-  0xeb, 0x05, 0x85, 0xfe, 0x0f, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0x0b, 0x06, 0x05, 0xff, 0x0f, 0x12, 0x22, 0x18, 0x31, 0x50,
-  0x00, 0x10, 0x04, 0x03, 0xe5, 0x06, 0x85, 0xfe, 0x0f, 0x78, 0x3f, 0x08,
-  0x54, 0x50, 0x68, 0xff, 0x00, 0x05, 0x85, 0xd1, 0x84, 0x00, 0xb8, 0xc0,
-  0xb1, 0x59, 0x02, 0x7c, 0x19, 0x68, 0x71, 0x4c, 0x63, 0x5c, 0x74, 0x7f,
-  0x10, 0x17, 0x96, 0x28, 0x17, 0xe1, 0x5e, 0x74, 0x7f, 0x30, 0x97, 0x59,
-  0x86, 0x7c, 0xd9, 0x17, 0xd4, 0x0c, 0x86, 0x23, 0x66, 0xcf, 0xfd, 0x83,
-  0xe1, 0x3b, 0xda, 0x1b, 0x66, 0xb8, 0x21, 0xc8, 0xfd, 0x80, 0x0c, 0x6a,
-  0x08, 0x74, 0x38, 0x22, 0x3f, 0xe4, 0x3f, 0x18, 0xbe, 0x0a, 0x04, 0xbd,
-  0xfd, 0x18, 0x66, 0xb8, 0x21, 0xe0, 0xfd, 0x80, 0x0c, 0x2a, 0x18, 0x74,
-  0x96, 0x41, 0x5f, 0x5e, 0x26, 0x38, 0xd0, 0x0f, 0x86, 0xb9, 0x58, 0x0f,
-  0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xed, 0x07, 0x85, 0x16,
-  0x14, 0xd4, 0x3f, 0xd8, 0x41, 0x61, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84,
-  0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x43, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0xe0, 0x32, 0x43, 0x81, 0x06, 0x85, 0x83, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x3a, 0x43, 0xa1, 0x06, 0x05,
-  0x86, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x42, 0x43, 0xc1,
-  0x06, 0x05, 0x89, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x40, 0x79,
-  0x43, 0xa1, 0x06, 0x05, 0xfa, 0x0f, 0x02, 0x31, 0x14, 0x4a, 0x50, 0x00,
-  0x43, 0x61, 0x34, 0x21, 0x00, 0x2e, 0x70, 0x6c, 0x96, 0xe0, 0x65, 0x86,
-  0x1b, 0xc0, 0x33, 0x40, 0x43, 0x01, 0x0c, 0x66, 0x19, 0xf8, 0xa5, 0x5f,
-  0x82, 0x7a, 0xff, 0x00, 0x07, 0x05, 0xb8, 0xc0, 0xa9, 0x11, 0x83, 0x03,
-  0x00, 0x41, 0x30, 0x98, 0xe2, 0x50, 0xc8, 0x41, 0xa1, 0xfd, 0x50, 0x50,
-  0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x49, 0x0e, 0x85, 0x1c, 0x14,
-  0x02, 0xe1, 0x82, 0x61, 0x4a, 0xfe, 0x83, 0x1e, 0x14, 0xe0, 0x02, 0xa7,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xb2, 0x43, 0xc1, 0x07, 0x05,
-  0x1a, 0x69, 0x41, 0x61, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa6, 0x3b,
-  0x14, 0x7c, 0x50, 0x08, 0x84, 0x0b, 0x86, 0xb9, 0xc0, 0xa9, 0x3b, 0x9c,
-  0xba, 0xdd, 0x0f, 0x86, 0x39, 0x76, 0x0f, 0x86, 0x39, 0x62, 0x98, 0x23,
-  0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x8d, 0x0f, 0x05, 0x35,
-  0x14, 0x4e, 0x50, 0xc0, 0x43, 0x61, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84,
-  0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0xe0, 0x1a, 0x45, 0x21, 0x0e, 0x85, 0x84, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x22, 0x45, 0x41, 0x0e, 0x85,
-  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x2a, 0x45, 0x61,
-  0x0e, 0x85, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x40, 0x61,
-  0x45, 0x41, 0x0e, 0x85, 0x18, 0x14, 0x82, 0x3f, 0x14, 0xc4, 0x50, 0xe8,
-  0x43, 0x61, 0x34, 0x21, 0x00, 0x2e, 0x70, 0x6c, 0x96, 0xe0, 0x65, 0x86,
-  0x1b, 0xfa, 0x33, 0x20, 0x45, 0x01, 0x0c, 0x66, 0x19, 0xfc, 0xe5, 0x65,
-  0x02, 0x1b, 0x41, 0xa1, 0x04, 0x85, 0xf8, 0x0c, 0x47, 0xa4, 0x60, 0x60,
-  0x82, 0x02, 0xf1, 0xcd, 0x32, 0xfc, 0x8b, 0xc8, 0x04, 0x76, 0x82, 0x82,
-  0x0a, 0x06, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94,
-  0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84, 0x2b, 0x0a, 0x3a, 0xdc,
-  0x10, 0xb0, 0xa2, 0x00, 0x06, 0xb3, 0x0c, 0x20, 0x13, 0x32, 0x81, 0x0d,
-  0x2f, 0x28, 0xc0, 0x67, 0x96, 0xc0, 0x64, 0xcc, 0x05, 0x05, 0x22, 0x3e,
-  0xb3, 0x04, 0x26, 0x33, 0x1c, 0x41, 0x83, 0xc1, 0x0b, 0x0a, 0xc2, 0x37,
-  0xcb, 0x30, 0x32, 0x26, 0x13, 0x58, 0x0d, 0x06, 0x30, 0x28, 0xc4, 0xc7,
-  0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x16, 0x44, 0xf2, 0xb1,
-  0x22, 0x88, 0x4f, 0x11, 0xb9, 0x28, 0xe8, 0x70, 0x43, 0x70, 0x8b, 0x02,
-  0x18, 0xcc, 0x32, 0x90, 0x4c, 0xc9, 0x04, 0x86, 0x83, 0xc2, 0x10, 0x9f,
-  0x59, 0x02, 0x93, 0x31, 0x62, 0x07, 0x05, 0xf8, 0xcc, 0x12, 0x98, 0xcc,
-  0x40, 0x8b, 0xa3, 0x81, 0x0c, 0x16, 0x32, 0x04, 0xc9, 0x08, 0x25, 0xe3,
-  0x87, 0x82, 0xc8, 0x5c, 0x30, 0x8c, 0xe9, 0xa0, 0xe0, 0x83, 0x42, 0x7c,
-  0x86, 0x23, 0x48, 0xe5, 0x07, 0x05, 0xe2, 0x9b, 0x65, 0x38, 0x19, 0x95,
-  0x09, 0x0c, 0x0c, 0x85, 0x52, 0x89, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18,
-  0xe6, 0x02, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xce,
-  0x51, 0xd0, 0xe1, 0x86, 0xa0, 0x1c, 0x05, 0x30, 0x98, 0x65, 0x40, 0x99,
-  0x94, 0x09, 0x6c, 0x40, 0x43, 0x01, 0x3e, 0xb3, 0x04, 0x2e, 0x63, 0x65,
-  0x28, 0x10, 0xf1, 0x99, 0x25, 0x70, 0x99, 0xe1, 0x88, 0x57, 0x31, 0x43,
-  0x41, 0xf8, 0x66, 0x19, 0x56, 0xc6, 0x65, 0x02, 0x83, 0x95, 0x33, 0x14,
-  0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b, 0x22,
-  0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x79, 0x14, 0x74, 0xb8, 0x21, 0x80,
-  0x47, 0x01, 0x0c, 0x66, 0x19, 0x58, 0xa6, 0x65, 0x02, 0x7b, 0x43, 0x61,
-  0x88, 0xcf, 0x2c, 0x81, 0xcb, 0x18, 0x41, 0x87, 0x02, 0x7c, 0x66, 0x09,
-  0x5c, 0x66, 0xa0, 0xc5, 0xd1, 0x50, 0x06, 0x4b, 0x19, 0x82, 0x65, 0x84,
-  0x96, 0xa1, 0x2b, 0x95, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xba, 0xcd, 0xa9,
-  0x03, 0x43, 0x61, 0x98, 0x8b, 0xfb, 0x60, 0x98, 0x23, 0x86, 0x39, 0x62,
-  0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xd0, 0x42, 0x52, 0x78, 0x47,
-  0x81, 0x15, 0x85, 0x7e, 0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
-  0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0x2e, 0x94, 0x14, 0xec, 0x51, 0x48, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xae, 0x94, 0x14, 0xee, 0x51, 0x48,
-  0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x2e, 0x95, 0x14, 0xf0,
-  0x51, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x94, 0x98,
-  0x14, 0xee, 0x51, 0xb0, 0x45, 0x21, 0x20, 0x49, 0xe1, 0x1c, 0x05, 0x91,
-  0x14, 0x46, 0x13, 0x02, 0xe0, 0x02, 0xc7, 0x66, 0x09, 0x5e, 0x66, 0xa0,
-  0xc5, 0x31, 0x0d, 0x7d, 0x71, 0x4d, 0x22, 0x5f, 0x58, 0x82, 0x5f, 0x04,
-  0x97, 0x71, 0x4d, 0xa2, 0x5f, 0x66, 0x19, 0x60, 0x46, 0x66, 0x44, 0x35,
-  0x18, 0x8e, 0x90, 0x3d, 0x74, 0x14, 0x86, 0xef, 0x66, 0x6f, 0x98, 0xe1,
-  0x86, 0x60, 0x16, 0x05, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x79, 0x61,
-  0x47, 0x61, 0xf8, 0x2a, 0x10, 0xf4, 0xea, 0x65, 0x98, 0xe1, 0x86, 0xc0,
-  0x16, 0x05, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x86, 0x98, 0x31, 0x9b, 0xe0,
-  0xf4, 0x50, 0x18, 0xe6, 0x56, 0x3f, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40,
-  0x10, 0x0c, 0xb4, 0x9c, 0x14, 0x4e, 0x52, 0x20, 0x47, 0xa1, 0x26, 0x85,
-  0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d,
-  0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x0b,
-  0x2c, 0x05, 0x97, 0x14, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x83, 0x2b, 0x2c, 0x85, 0x97, 0x14, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x83, 0x4b, 0x2c, 0x05, 0x98, 0x14, 0x24, 0x22, 0x18, 0x31,
-  0x50, 0x00, 0x10, 0x04, 0x03, 0x25, 0x2d, 0x85, 0x97, 0x14, 0xdc, 0x51,
-  0x08, 0x78, 0x52, 0xf8, 0x47, 0x41, 0x27, 0x85, 0xd1, 0x84, 0x00, 0xb8,
-  0xc0, 0xb1, 0x59, 0x02, 0xb3, 0x19, 0x6e, 0xd0, 0xd5, 0x40, 0x2c, 0x05,
-  0x30, 0x98, 0x65, 0x98, 0x19, 0x9a, 0x09, 0x2a, 0x1d, 0x05, 0x99, 0x14,
-  0xe0, 0x02, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x5a, 0x4b,
-  0x61, 0x26, 0x05, 0xf6, 0x13, 0x49, 0x61, 0xc4, 0xe0, 0x00, 0x40, 0x10,
-  0x0c, 0x26, 0xb6, 0x14, 0x66, 0x52, 0x08, 0x84, 0x0b, 0x86, 0x29, 0x76,
-  0x14, 0x6e, 0x52, 0x80, 0x0b, 0x9c, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x83, 0x09, 0x2e, 0x05, 0x9c, 0x14, 0x5c, 0xe6, 0x24, 0x85, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0x98, 0xe2, 0x52, 0xc0, 0x49, 0x21, 0x10, 0x2e,
-  0x18, 0xe6, 0x02, 0xa7, 0xee, 0x70, 0xea, 0x6a, 0x51, 0x18, 0xe6, 0xcc,
-  0x3f, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40,
-  0x10, 0x0c, 0x34, 0xbb, 0x14, 0xc8, 0x52, 0x08, 0x49, 0x41, 0x2e, 0x85,
-  0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d,
-  0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xab,
-  0x2f, 0x85, 0xb5, 0x14, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x83, 0xcb, 0x2f, 0x05, 0xb6, 0x14, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x83, 0xeb, 0x2f, 0x85, 0xb6, 0x14, 0x12, 0x22, 0x18, 0x31,
-  0x50, 0x00, 0x10, 0x04, 0x03, 0xc5, 0x34, 0x05, 0xb6, 0x14, 0x56, 0x52,
-  0x08, 0xf2, 0x52, 0xe0, 0x49, 0xe1, 0x2e, 0x85, 0xd1, 0x84, 0x00, 0xb8,
-  0xc0, 0xb1, 0x59, 0x02, 0xb3, 0x19, 0x6e, 0xb8, 0xd7, 0xc0, 0x2f, 0x05,
-  0x30, 0x98, 0x65, 0xa8, 0x19, 0xb3, 0x09, 0xac, 0x1f, 0x85, 0x7f, 0x14,
-  0xe2, 0x33, 0x1c, 0x81, 0x82, 0x01, 0x48, 0x0a, 0xc4, 0x37, 0xcb, 0x60,
-  0x33, 0x39, 0x13, 0x58, 0x48, 0x0a, 0x29, 0x18, 0xc4, 0xc7, 0x82, 0x81,
-  0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88,
-  0x4f, 0x11, 0xa8, 0x29, 0xe8, 0x70, 0x43, 0x60, 0x9a, 0x02, 0x18, 0xcc,
-  0x32, 0xdc, 0x0c, 0xce, 0x04, 0x36, 0xa4, 0xa4, 0x00, 0x9f, 0x59, 0x82,
-  0x9e, 0x31, 0x94, 0x14, 0x88, 0xf8, 0xcc, 0x12, 0xf4, 0xcc, 0x70, 0xc4,
-  0x0c, 0x06, 0x29, 0x29, 0x08, 0xdf, 0x2c, 0x83, 0xce, 0xf4, 0x4c, 0x60,
-  0x34, 0x18, 0xa8, 0xa4, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc,
-  0x05, 0x4e, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xcc, 0xa6,
-  0xa0, 0xc3, 0x0d, 0x41, 0x6c, 0x0a, 0x60, 0x30, 0xcb, 0xb0, 0x33, 0x3c,
-  0x13, 0x98, 0x4c, 0x0a, 0x43, 0x7c, 0x66, 0x09, 0x7a, 0xc6, 0x88, 0x9a,
-  0x14, 0xe0, 0x33, 0x4b, 0xd0, 0x33, 0x03, 0x2d, 0x8e, 0x76, 0x33, 0x18,
-  0xce, 0x10, 0x3b, 0x23, 0xf0, 0x0c, 0x1f, 0x0a, 0x39, 0x73, 0xc1, 0x30,
-  0x46, 0x93, 0x02, 0x4e, 0x0a, 0xf1, 0x19, 0x8e, 0xf0, 0x9b, 0x9c, 0x14,
-  0x88, 0x6f, 0x96, 0xc1, 0x67, 0xc2, 0x26, 0x30, 0x9d, 0x14, 0xfe, 0x26,
-  0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2, 0xc0, 0x90,
-  0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x08, 0x4f, 0x41, 0x87, 0x1b, 0x82, 0xdf,
-  0x14, 0xc0, 0x60, 0x96, 0xe1, 0x67, 0xc0, 0x26, 0xb0, 0x41, 0x2c, 0x05,
-  0xf8, 0xcc, 0x12, 0x94, 0x8d, 0xfd, 0xa4, 0x40, 0xc4, 0x67, 0x96, 0xa0,
-  0x6c, 0x86, 0x23, 0x52, 0x07, 0x2c, 0x05, 0xe1, 0x9b, 0x65, 0x10, 0x9b,
-  0xb2, 0x09, 0x4c, 0x75, 0xc2, 0x52, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e,
-  0x18, 0xe6, 0x02, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22,
-  0xd8, 0x53, 0xd0, 0xe1, 0x86, 0x40, 0x3d, 0x05, 0x30, 0x98, 0x65, 0x18,
-  0x1b, 0xb2, 0x09, 0x2c, 0x2d, 0x85, 0x21, 0x3e, 0xb3, 0x04, 0x65, 0x63,
-  0x84, 0x5b, 0x0a, 0xf0, 0x99, 0x25, 0x28, 0x9b, 0x81, 0x16, 0x47, 0xfb,
-  0x19, 0x0c, 0x6c, 0x88, 0xb1, 0x11, 0xc8, 0x06, 0xed, 0xc2, 0xe6, 0x82,
-  0x61, 0x2e, 0x70, 0xea, 0x36, 0xa7, 0x4e, 0x27, 0x85, 0x61, 0x6e, 0x0d,
-  0x85, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x40, 0xdb, 0x4f, 0x21, 0x3d, 0x05, 0xd3, 0x14, 0xee, 0x53, 0x18,
-  0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04,
-  0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0x44,
-  0x54, 0x80, 0x4f, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0xb8, 0x46, 0x54, 0x88, 0x4f, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0xb8, 0x48, 0x54, 0x90, 0x4f, 0x21, 0x21, 0x82, 0x11, 0x03,
-  0x05, 0x00, 0x41, 0x30, 0x50, 0x56, 0x54, 0x88, 0x4f, 0x01, 0x36, 0x85,
-  0xc0, 0x3f, 0x85, 0xf0, 0x14, 0xf8, 0x53, 0x18, 0x4d, 0x08, 0x80, 0x0b,
-  0x1c, 0x9b, 0x25, 0x30, 0x9b, 0x81, 0x16, 0xc7, 0x34, 0x62, 0x46, 0x74,
-  0x09, 0x98, 0x61, 0x89, 0x99, 0x11, 0xca, 0x46, 0x74, 0x09, 0x9a, 0xb1,
-  0xbf, 0x0d, 0x6e, 0x53, 0x80, 0xcf, 0x2c, 0xc3, 0xd9, 0xa4, 0x4d, 0xdf,
-  0x06, 0xc3, 0x11, 0xa1, 0x1b, 0x8c, 0xa7, 0x30, 0x7c, 0x27, 0xba, 0xc1,
-  0x30, 0xc3, 0x0d, 0x81, 0x6b, 0x0a, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11,
-  0xc5, 0x79, 0x0a, 0xc3, 0x57, 0x81, 0xa0, 0x77, 0x0c, 0x33, 0xdc, 0x10,
-  0xc4, 0xa6, 0x40, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0x80, 0x36, 0x7d, 0x13,
-  0x5c, 0x5d, 0x0a, 0xc3, 0x9c, 0x29, 0x0a, 0xc3, 0x8c, 0x18, 0x1c, 0x00,
-  0x08, 0x82, 0x81, 0x46, 0xa3, 0x82, 0x88, 0x0a, 0xbf, 0x29, 0xc0, 0xa8,
-  0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3,
-  0x09, 0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70,
-  0xed, 0xa8, 0x90, 0xa2, 0xc2, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0x70, 0xf1, 0xa8, 0xa0, 0xa2, 0x02, 0x43, 0x04, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0x70, 0xf5, 0xa8, 0xb0, 0xa2, 0x82, 0x44, 0x04, 0x23,
-  0x06, 0x0a, 0x00, 0x82, 0x60, 0xa0, 0x90, 0xa9, 0xa0, 0xa2, 0x42, 0x7a,
-  0x0a, 0xc1, 0x8d, 0x0a, 0xfa, 0x29, 0xd4, 0xa8, 0x30, 0x9a, 0x10, 0x00,
-  0x17, 0x38, 0x36, 0x4b, 0xd0, 0x37, 0xc3, 0x0d, 0xb5, 0x1b, 0xf4, 0xa8,
-  0x00, 0x06, 0xb3, 0x0c, 0x6a, 0xb3, 0x36, 0x41, 0x91, 0xa7, 0xd0, 0xa2,
-  0x02, 0x5c, 0xe0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0x66,
-  0x2a, 0xb8, 0xa8, 0xb0, 0xbb, 0x41, 0x7f, 0x0a, 0x23, 0x06, 0x07, 0x00,
-  0x82, 0x60, 0x30, 0x9d, 0xa9, 0xe0, 0xa2, 0x42, 0x20, 0x5c, 0x30, 0x4c,
-  0x9d, 0xa7, 0x20, 0xa3, 0x02, 0x5c, 0xe0, 0xd4, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x4c, 0x6b, 0x2a, 0xcc, 0xa8, 0x00, 0x06, 0x22, 0x2a, 0x8c,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0xc4, 0xa6, 0xc2, 0x8c, 0x0a, 0x81,
-  0x70, 0xc1, 0x30, 0x17, 0x38, 0x75, 0x87, 0x53, 0x07, 0x9b, 0xc2, 0x30,
-  0x17, 0x8e, 0xc2, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07,
-  0x00, 0x82, 0x60, 0xa0, 0xc5, 0xa9, 0xf0, 0xa3, 0x02, 0x7f, 0x0a, 0x6d,
-  0x2a, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2,
-  0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x5c, 0x78, 0x2a, 0x98, 0xa9, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x5c, 0x79, 0x2a, 0x9c, 0xa9, 0x90, 0x10, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x5c, 0x7a, 0x2a, 0xa0, 0xa9, 0x90, 0x10, 0xc1,
-  0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x28, 0xa1, 0x2a, 0x9c, 0xa9, 0x60,
-  0xa2, 0x42, 0x40, 0xa7, 0xc2, 0x8d, 0x0a, 0x72, 0x2a, 0x8c, 0x26, 0x04,
-  0xc0, 0x05, 0x8e, 0xcd, 0x12, 0xf4, 0xcd, 0x70, 0x83, 0xfc, 0x06, 0x79,
-  0x2a, 0x80, 0xc1, 0x2c, 0x03, 0xdb, 0xf4, 0x4d, 0x60, 0xf8, 0x29, 0xe8,
-  0xa7, 0x10, 0x9f, 0xe1, 0x88, 0xfb, 0x0d, 0xf6, 0x53, 0x20, 0xbe, 0x59,
-  0x86, 0xb6, 0x81, 0x9b, 0xc0, 0xf8, 0x53, 0xc0, 0xdf, 0x20, 0x3e, 0x16,
-  0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2, 0xc0, 0x90, 0x8f, 0x15,
-  0x41, 0x7c, 0x8a, 0x18, 0x55, 0x41, 0x87, 0x1b, 0x82, 0x50, 0x15, 0xc0,
-  0x60, 0x96, 0xc1, 0x6d, 0xde, 0x26, 0xb0, 0x81, 0x44, 0x05, 0xf8, 0xcc,
-  0x12, 0xd0, 0x8d, 0x8d, 0xa8, 0x40, 0xc4, 0x67, 0x96, 0x80, 0x6e, 0x86,
-  0x23, 0x44, 0x38, 0x20, 0x51, 0x41, 0xf8, 0x66, 0x19, 0xe2, 0x86, 0x6e,
-  0x02, 0x1b, 0xe1, 0xa0, 0x44, 0x85, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82,
-  0x61, 0x2e, 0x70, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xc2,
-  0x55, 0x05, 0x1d, 0x6e, 0x08, 0x58, 0x55, 0x00, 0x83, 0x59, 0x06, 0xb9,
-  0x99, 0x9b, 0xc0, 0x5a, 0x54, 0x18, 0xe2, 0x33, 0x4b, 0x40, 0x37, 0x46,
-  0xc0, 0xa8, 0x00, 0x9f, 0x59, 0x02, 0xba, 0x19, 0x68, 0x71, 0x34, 0xb7,
-  0xc1, 0xde, 0x86, 0x90, 0x1b, 0x61, 0x6e, 0x74, 0x70, 0x80, 0x9b, 0x0b,
-  0x86, 0xb1, 0x17, 0x15, 0x66, 0x54, 0x88, 0xcf, 0x70, 0x04, 0x2c, 0xd0,
-  0xa8, 0x40, 0x7c, 0xb3, 0x0c, 0x75, 0x83, 0x37, 0x81, 0xd5, 0xa8, 0x10,
-  0x0b, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94, 0x05,
-  0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0xaf, 0x0a, 0x3a, 0xdc, 0x10,
-  0xe8, 0xaa, 0x00, 0x06, 0xb3, 0x0c, 0x76, 0x73, 0x37, 0x81, 0x0d, 0x3d,
-  0x2a, 0xc0, 0x67, 0x96, 0x80, 0x6f, 0x4c, 0x47, 0x05, 0x22, 0x3e, 0xb3,
-  0x04, 0x7c, 0x33, 0x1c, 0xb1, 0x0b, 0x3b, 0x2a, 0x08, 0xdf, 0x2c, 0x43,
-  0xde, 0xf0, 0x4d, 0x60, 0xbc, 0xc0, 0xa3, 0x42, 0x7c, 0x2c, 0x70, 0xe8,
-  0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8,
-  0x14, 0x71, 0xae, 0x82, 0x0e, 0x37, 0x04, 0xe5, 0x2a, 0x80, 0xc1, 0x2c,
-  0x83, 0xde, 0xec, 0x4d, 0x60, 0x64, 0x2a, 0x0c, 0xf1, 0x99, 0x25, 0xe0,
-  0x1b, 0x23, 0xd2, 0x54, 0x80, 0xcf, 0x2c, 0x01, 0xdf, 0x0c, 0xb4, 0x38,
-  0x9a, 0xdd, 0x60, 0x77, 0x43, 0xe8, 0x8d, 0xb0, 0x37, 0xac, 0x81, 0x37,
-  0x17, 0x0c, 0x73, 0x81, 0x53, 0xb7, 0x39, 0x75, 0x35, 0x2a, 0x0c, 0x73,
-  0x66, 0x29, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00,
-  0x20, 0x08, 0x06, 0x9a, 0xbd, 0x0a, 0xe4, 0x2a, 0x84, 0xaa, 0x20, 0xaf,
-  0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c,
-  0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1,
-  0xd5, 0xaf, 0xc2, 0xba, 0x0a, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0xc1, 0xe5, 0xaf, 0x02, 0xbb, 0x0a, 0x09, 0x11, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0xc1, 0xf5, 0xaf, 0x42, 0xbb, 0x0a, 0x09, 0x11, 0x8c,
-  0x18, 0x28, 0x00, 0x08, 0x82, 0x81, 0x62, 0xb2, 0x02, 0xbb, 0x0a, 0xab,
-  0x2a, 0x04, 0xf9, 0x2a, 0xf0, 0xaa, 0x70, 0xaf, 0xc2, 0x68, 0x42, 0x00,
-  0x5c, 0xe0, 0xd8, 0x2c, 0x41, 0xdf, 0x0c, 0xb4, 0x38, 0xa6, 0x81, 0x36,
-  0x78, 0x4d, 0x9c, 0x0d, 0x4b, 0xa8, 0x8d, 0xc0, 0x37, 0x78, 0x4d, 0xac,
-  0xcd, 0x2c, 0x83, 0xdf, 0x80, 0xce, 0x1d, 0x07, 0xc3, 0x11, 0x7c, 0x1c,
-  0xf4, 0xaa, 0x30, 0x7c, 0xd7, 0xc7, 0xc1, 0x30, 0xc3, 0x0d, 0x01, 0xaa,
-  0x0a, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0xff, 0x10, 0xae, 0xc2, 0xf0,
-  0x55, 0x20, 0xe8, 0x85, 0xc4, 0x30, 0xc3, 0x0d, 0xc1, 0xaa, 0x0a, 0x64,
-  0x50, 0xc1, 0xa0, 0xb3, 0x0c, 0x7f, 0x43, 0x3b, 0xc1, 0xbd, 0xa9, 0x30,
-  0xcc, 0x81, 0xa6, 0x30, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x68,
-  0x2e, 0x2b, 0xf0, 0xab, 0x90, 0xab, 0x82, 0xca, 0x0a, 0xa3, 0x09, 0x01,
-  0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45,
-  0x1c, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x57, 0xcd, 0x0a, 0x23,
-  0x2b, 0x1c, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x97, 0xcd,
-  0x0a, 0x24, 0x2b, 0x30, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0xd7, 0xcd, 0x0a, 0x25, 0x2b, 0x48, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20,
-  0x08, 0x06, 0x8a, 0xcf, 0x0a, 0x24, 0x2b, 0x8c, 0xab, 0x10, 0xc4, 0xac,
-  0x40, 0xaf, 0xc2, 0xcb, 0x0a, 0xa3, 0x09, 0x01, 0x70, 0x81, 0x63, 0xb3,
-  0x04, 0xb4, 0x33, 0xdc, 0xf0, 0xca, 0xc1, 0xcd, 0x0a, 0x60, 0x30, 0xcb,
-  0x10, 0x3a, 0xa2, 0x13, 0x94, 0xaf, 0x0a, 0x27, 0x2b, 0xc0, 0x05, 0x4e,
-  0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x04, 0xb6, 0x02, 0xca, 0x0a,
-  0xb6, 0x1c, 0xdc, 0xab, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x53,
-  0xd8, 0x0a, 0x28, 0x2b, 0x04, 0xc2, 0x05, 0xc3, 0x54, 0xb8, 0x0a, 0x2c,
-  0x2b, 0xc0, 0x05, 0x4e, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x54,
-  0xb6, 0x42, 0xcb, 0x0a, 0x3a, 0xc1, 0xaf, 0xc2, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x4c, 0x66, 0x2b, 0xb4, 0xac, 0x10, 0x08, 0x17, 0x0c, 0x73,
-  0x81, 0x53, 0x77, 0x38, 0x75, 0xaa, 0x2a, 0x0c, 0x73, 0xbb, 0x29, 0x0c,
-  0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0xda, 0xda, 0x0a, 0x39, 0x2b, 0xd8, 0xab, 0x70, 0xb6, 0xc2, 0x68, 0x42,
-  0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43,
-  0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x25, 0xb7, 0x02,
-  0xd8, 0x0a, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x35,
-  0xb7, 0x42, 0xd8, 0x0a, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0xc1, 0x45, 0xb7, 0x82, 0xd8, 0x0a, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00,
-  0x08, 0x82, 0x81, 0xb2, 0xb7, 0x42, 0xd8, 0x0a, 0x20, 0x2b, 0x04, 0x6e,
-  0x2b, 0xc4, 0xac, 0xc0, 0xb6, 0xc2, 0x68, 0x42, 0x00, 0x5c, 0xe0, 0xd8,
-  0x2c, 0x01, 0xed, 0x0c, 0x37, 0xb0, 0x73, 0x30, 0xb7, 0x02, 0x18, 0xcc,
-  0x32, 0x8c, 0x0e, 0xed, 0x04, 0x26, 0xaf, 0x02, 0xbd, 0x0a, 0xf1, 0x19,
-  0x8e, 0x90, 0xe7, 0xa0, 0x5e, 0x05, 0xe2, 0x9b, 0x65, 0x20, 0x9d, 0xd3,
-  0x09, 0xcc, 0x5e, 0x85, 0x79, 0x0e, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b,
-  0x86, 0xb9, 0xc0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88,
-  0xbe, 0x15, 0x74, 0xb8, 0x21, 0xd8, 0x5b, 0x01, 0x0c, 0x66, 0x19, 0x4a,
-  0xc7, 0x74, 0x02, 0x1b, 0xfc, 0x55, 0x80, 0xcf, 0x2c, 0xc1, 0xea, 0x58,
-  0xbf, 0x0a, 0x44, 0x7c, 0x66, 0x09, 0x56, 0x67, 0x38, 0xa2, 0x9f, 0x03,
-  0x7f, 0x15, 0x84, 0x6f, 0x96, 0x01, 0x75, 0x56, 0x27, 0x30, 0x7f, 0x0e,
-  0xfe, 0x55, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa7,
-  0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x50, 0x57, 0xd0, 0xe1,
-  0x86, 0xc0, 0x74, 0x05, 0x30, 0x98, 0x65, 0x48, 0x1d, 0xd5, 0x09, 0xec,
-  0x64, 0x85, 0x21, 0x3e, 0xb3, 0x04, 0xab, 0x63, 0x84, 0xca, 0x0a, 0xf0,
-  0x99, 0x25, 0x58, 0x9d, 0x81, 0x16, 0x47, 0x2b, 0x1d, 0xcc, 0x74, 0x88,
-  0xd4, 0x11, 0x54, 0x07, 0x46, 0x87, 0xd3, 0xb9, 0x60, 0x18, 0x4b, 0x59,
-  0xa1, 0x65, 0x85, 0xf8, 0x0c, 0x47, 0xa8, 0x86, 0xcb, 0x0a, 0xc4, 0x37,
-  0xcb, 0xc0, 0x3a, 0xaf, 0x13, 0xd8, 0xcb, 0x0a, 0xab, 0x11, 0x1f, 0x0b,
-  0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4e, 0x59, 0x60, 0xc8, 0xc7, 0x8a,
-  0x20, 0x3e, 0x45, 0xd8, 0xae, 0xa0, 0xc3, 0x0d, 0x01, 0xed, 0x0a, 0x60,
-  0x30, 0xcb, 0xd0, 0x3a, 0xae, 0x13, 0xd8, 0x70, 0xb3, 0x02, 0x7c, 0x66,
-  0x09, 0x66, 0xc7, 0x68, 0x56, 0x20, 0xe2, 0x33, 0x4b, 0x30, 0x3b, 0xc3,
-  0x11, 0xb5, 0x51, 0xb3, 0x82, 0xf0, 0xcd, 0x32, 0xc0, 0xce, 0xec, 0x04,
-  0x66, 0x1b, 0x36, 0x2b, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73,
-  0x81, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xe1, 0x2b,
-  0xe8, 0x70, 0x43, 0xf0, 0xbb, 0x02, 0x18, 0xcc, 0x32, 0xc4, 0x8e, 0xec,
-  0x04, 0xe6, 0xb3, 0xc2, 0x10, 0x9f, 0x59, 0x82, 0xd9, 0x31, 0x62, 0x6c,
-  0x05, 0xf8, 0xcc, 0x12, 0xcc, 0xce, 0x40, 0x8b, 0xa3, 0xb5, 0x0e, 0xe6,
-  0x3a, 0x44, 0xec, 0x08, 0xb2, 0x03, 0x3a, 0xaf, 0x73, 0xc1, 0x30, 0x17,
-  0x38, 0x75, 0x9b, 0x53, 0xf7, 0xb2, 0xc2, 0x30, 0x07, 0xa6, 0xc2, 0x30,
-  0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0,
-  0xc1, 0xaf, 0xe0, 0xbb, 0xc2, 0xde, 0x0a, 0xec, 0x2b, 0x8c, 0x26, 0x04,
-  0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14,
-  0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xf7, 0x2b, 0x94,
-  0xaf, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xf8,
-  0x2b, 0x98, 0xaf, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x5c, 0xf9, 0x2b, 0x9c, 0xaf, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80,
-  0x20, 0x18, 0x28, 0x20, 0x2c, 0x98, 0xaf, 0x50, 0xba, 0x42, 0x30, 0xbf,
-  0x82, 0xed, 0x0a, 0xf1, 0x2b, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x8e, 0xcd,
-  0x12, 0xd0, 0xce, 0x40, 0x8b, 0x63, 0x1a, 0x7f, 0xc3, 0xf6, 0x84, 0xdf,
-  0xb0, 0x44, 0xe8, 0x08, 0xb3, 0xc3, 0xf6, 0x84, 0xe8, 0xcc, 0x32, 0xd4,
-  0xce, 0xed, 0xc4, 0x75, 0x30, 0x1c, 0xc1, 0xb7, 0xc1, 0xed, 0x0a, 0xc3,
-  0x77, 0x7d, 0x1b, 0x0c, 0x33, 0xdc, 0x10, 0x88, 0xae, 0x40, 0x06, 0x35,
-  0x04, 0x3a, 0x1c, 0x91, 0x1f, 0xbb, 0x2b, 0x0c, 0x5f, 0x05, 0x82, 0xde,
-  0x7e, 0x0c, 0x33, 0xdc, 0x10, 0x94, 0xae, 0x40, 0x06, 0x15, 0x0c, 0x3a,
-  0xcb, 0x60, 0x3b, 0xeb, 0x13, 0x5c, 0xda, 0x0a, 0xc3, 0x9c, 0x9e, 0x0a,
-  0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x86, 0xc2, 0x82, 0xfd,
-  0x0a, 0xb3, 0x2b, 0x90, 0xb0, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42,
-  0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x70, 0xbd, 0xb0, 0xd0, 0xbf, 0xc2, 0x41, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xc1, 0xb0, 0xe0, 0xbf, 0x02,
-  0x43, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xc5, 0xb0, 0xf0,
-  0xbf, 0x82, 0x44, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xa0, 0xe0,
-  0xb0, 0xe0, 0xbf, 0x42, 0xef, 0x0a, 0xc1, 0x0a, 0x0b, 0xee, 0x2b, 0xa4,
-  0xb0, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x38, 0x36, 0x4b, 0xb0, 0x3e, 0xc3,
-  0x0d, 0xa9, 0x1d, 0xc4, 0xb0, 0x00, 0x06, 0xb3, 0x0c, 0xb8, 0x93, 0x3b,
-  0x41, 0xe1, 0xae, 0x10, 0xc2, 0x02, 0x5c, 0xe0, 0xd4, 0x88, 0xc1, 0x01,
-  0x80, 0x20, 0x18, 0x4c, 0x3a, 0x2c, 0x88, 0xb0, 0x60, 0xbb, 0x41, 0xfc,
-  0x0a, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30, 0xed, 0xb0, 0x20, 0xc2,
-  0x42, 0x20, 0x5c, 0x30, 0x4c, 0xed, 0xae, 0x60, 0xc2, 0x02, 0x5c, 0xe0,
-  0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0x3f, 0x2c, 0x9c, 0xb0,
-  0x40, 0x23, 0xf6, 0x2b, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x04,
-  0xc6, 0xc2, 0x09, 0x0b, 0x81, 0x70, 0xc1, 0x30, 0x17, 0x38, 0x75, 0x87,
-  0x53, 0x47, 0xba, 0xc2, 0x30, 0x57, 0xab, 0xc2, 0x30, 0x47, 0x0c, 0x73,
-  0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0, 0x95, 0xb1, 0x30,
-  0xc3, 0x02, 0xfc, 0x0a, 0x61, 0x2c, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82,
-  0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0x6c, 0x2c, 0xe8, 0xb0, 0x90, 0x10,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0x6d, 0x2c, 0xec, 0xb0,
-  0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0x6e, 0x2c,
-  0xf0, 0xb0, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x28,
-  0x75, 0x2c, 0xec, 0xb0, 0xa0, 0xbf, 0x42, 0x80, 0xc6, 0xc2, 0x0a, 0x0b,
-  0x66, 0x2c, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x8e, 0xcd, 0x12, 0xac, 0xcf,
-  0x70, 0x83, 0x79, 0x07, 0x6d, 0x2c, 0x80, 0xc1, 0x2c, 0x83, 0xee, 0xac,
-  0x4f, 0x60, 0xec, 0x2b, 0xb8, 0xaf, 0x10, 0x9f, 0xe1, 0x08, 0xf9, 0x0d,
-  0xde, 0x57, 0x20, 0xbe, 0x59, 0x86, 0xdd, 0xf1, 0x9d, 0xc0, 0xe0, 0x57,
-  0x98, 0xdf, 0x20, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c,
-  0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xb8, 0x63, 0x41, 0x87,
-  0x1b, 0x82, 0x3a, 0x16, 0xc0, 0x60, 0x96, 0x81, 0x77, 0x7a, 0x27, 0xb0,
-  0x01, 0x7f, 0x05, 0xf8, 0xcc, 0x12, 0x88, 0x8f, 0xdd, 0xaf, 0x40, 0xc4,
-  0x67, 0x96, 0x40, 0x7c, 0x86, 0x23, 0xfa, 0x37, 0xc0, 0x5f, 0x41, 0xf8,
-  0x66, 0x19, 0x7e, 0x47, 0x7c, 0x02, 0xf3, 0xdf, 0x20, 0x7f, 0x85, 0xf8,
-  0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70, 0xca, 0x82, 0x48, 0x3e,
-  0x56, 0x04, 0xf1, 0x29, 0x42, 0x94, 0x05, 0x1d, 0x6e, 0x08, 0x40, 0x59,
-  0x00, 0x83, 0x59, 0x06, 0xf0, 0x09, 0x9f, 0xc0, 0x42, 0x58, 0x18, 0xe2,
-  0x33, 0x4b, 0x20, 0x3e, 0x46, 0x90, 0xb0, 0x00, 0x9f, 0x59, 0x02, 0xf1,
-  0x19, 0x68, 0x71, 0x34, 0xde, 0xc1, 0x7a, 0x87, 0x00, 0x1f, 0x21, 0x7c,
-  0x60, 0x70, 0xf0, 0x9d, 0x0b, 0x86, 0xb1, 0x11, 0x16, 0x4e, 0x58, 0x88,
-  0xcf, 0x70, 0x04, 0xa9, 0xa0, 0xb0, 0x40, 0x7c, 0xb3, 0x0c, 0xe3, 0x63,
-  0x3e, 0x81, 0xa5, 0xb0, 0x50, 0x2a, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05,
-  0xc3, 0x5c, 0xe0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04,
-  0x2c, 0x0b, 0x3a, 0xdc, 0x10, 0xb8, 0xb2, 0x00, 0x06, 0xb3, 0x0c, 0xe4,
-  0x53, 0x3e, 0x81, 0x0d, 0x31, 0x2c, 0xc0, 0x67, 0x96, 0x40, 0x7d, 0xcc,
-  0x85, 0x05, 0x22, 0x3e, 0xb3, 0x04, 0xea, 0x33, 0x1c, 0xf1, 0x2a, 0x2f,
-  0x2c, 0x08, 0xdf, 0x2c, 0xc3, 0xf9, 0xa8, 0x4f, 0x60, 0xb0, 0x02, 0xc3,
-  0x42, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x41,
-  0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xb1, 0xcb, 0x82, 0x0e, 0x37, 0x04,
-  0xb9, 0x2c, 0x80, 0xc1, 0x2c, 0x03, 0xfa, 0xa4, 0x4f, 0x60, 0x38, 0x2c,
-  0x0c, 0xf1, 0x99, 0x25, 0x50, 0x1f, 0x23, 0x7a, 0x58, 0x80, 0xcf, 0x2c,
-  0x81, 0xfa, 0x0c, 0xb4, 0x38, 0x1a, 0xf9, 0x60, 0xe5, 0x43, 0xa0, 0x8f,
-  0x90, 0x3e, 0x74, 0x65, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53, 0xb7, 0x39,
-  0x75, 0x29, 0x2c, 0x0c, 0x73, 0x3a, 0x2b, 0x0c, 0x73, 0xc4, 0x30, 0x47,
-  0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x9a, 0x3a, 0x0b, 0xb8,
-  0x2c, 0xd4, 0xb1, 0x60, 0xce, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08,
-  0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0xc1, 0x15, 0xcf, 0xc2, 0x2f, 0x0b, 0x09, 0x11,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x25, 0xcf, 0x02, 0x38, 0x0b,
-  0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x35, 0xcf, 0x42,
-  0x38, 0x0b, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0x81, 0xa2,
-  0xcf, 0x02, 0x38, 0x0b, 0x7f, 0x2c, 0x04, 0xed, 0x2c, 0xc0, 0xb2, 0xb0,
-  0xce, 0xc2, 0x68, 0x42, 0x00, 0x5c, 0xe0, 0xd8, 0x2c, 0xc1, 0xfa, 0x0c,
-  0xb4, 0x38, 0xa6, 0x61, 0x3b, 0x60, 0x59, 0xd4, 0x0e, 0x4b, 0xe0, 0x8e,
-  0xa0, 0x3e, 0x60, 0x59, 0xe4, 0xce, 0x2c, 0x03, 0xfb, 0xb8, 0xcf, 0x9a,
-  0x07, 0xc3, 0x11, 0x7b, 0x1b, 0xc4, 0xb2, 0x30, 0x7c, 0xc7, 0xb7, 0xc1,
-  0x30, 0xc3, 0x0d, 0x01, 0x1f, 0x0b, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11,
-  0xf3, 0x52, 0xcb, 0xc2, 0xf0, 0x55, 0x20, 0xe8, 0xd5, 0xcb, 0x30, 0xc3,
-  0x0d, 0xc1, 0x1f, 0x0b, 0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c, 0xed, 0x23,
-  0x42, 0xc1, 0x8d, 0xb1, 0x30, 0xcc, 0xd1, 0xad, 0x30, 0xcc, 0x88, 0xc1,
-  0x01, 0x80, 0x20, 0x18, 0x68, 0x22, 0x2d, 0xc0, 0xb3, 0xd0, 0xca, 0x82,
-  0x3f, 0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83,
-  0x30, 0x9a, 0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x57, 0x4a, 0x0b, 0xf7, 0x2c, 0x1c, 0x44, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x97, 0x4a, 0x0b, 0xf8, 0x2c, 0x30, 0x44, 0x30, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0xd7, 0x4a, 0x0b, 0xf9, 0x2c, 0x48, 0x44,
-  0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x8a, 0x4c, 0x0b, 0xf8, 0x2c,
-  0xdc, 0xb2, 0x10, 0x94, 0xb4, 0x80, 0xce, 0xc2, 0x48, 0x0b, 0xa3, 0x09,
-  0x01, 0x70, 0x81, 0x63, 0xb3, 0x04, 0x22, 0x34, 0xdc, 0x30, 0xea, 0xc1,
-  0x4a, 0x0b, 0x60, 0x30, 0xcb, 0xf0, 0x3e, 0xf0, 0x13, 0x94, 0x2c, 0x0b,
-  0xfb, 0x2c, 0xc0, 0x05, 0x4e, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1,
-  0x44, 0xd3, 0x02, 0x3f, 0x0b, 0xb5, 0x1b, 0xac, 0xb3, 0x30, 0x62, 0x70,
-  0x00, 0x20, 0x08, 0x06, 0x53, 0x4d, 0x0b, 0xfc, 0x2c, 0x04, 0xc2, 0x05,
-  0xc3, 0x54, 0x2d, 0x0b, 0x20, 0x2d, 0xc0, 0x05, 0x4e, 0x8d, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0xc1, 0x94, 0xd3, 0x42, 0x48, 0x0b, 0x2e, 0x03, 0xcf,
-  0xc2, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0x3a, 0x2d, 0x84, 0xb4,
-  0x10, 0x08, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x77, 0x38, 0x75, 0x7e, 0x2c,
-  0x0c, 0x73, 0xaf, 0x2b, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0xda, 0x4f, 0x0b, 0x2d, 0x2d, 0xa8, 0xb3,
-  0xb0, 0xd3, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2,
-  0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0xc1, 0x65, 0xd6, 0x02, 0x4d, 0x0b, 0x09, 0x11, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0xc1, 0x75, 0xd6, 0x42, 0x4d, 0x0b, 0x09, 0x11, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x85, 0xd6, 0x82, 0x4d, 0x0b, 0x09,
-  0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0x81, 0xf2, 0xd6, 0x42, 0x4d,
-  0x0b, 0xf4, 0x2c, 0x04, 0x62, 0x2d, 0x94, 0xb4, 0x00, 0xd6, 0xc2, 0x68,
-  0x42, 0x00, 0x5c, 0xe0, 0xd8, 0x2c, 0x81, 0x08, 0x0d, 0x37, 0x80, 0x7b,
-  0x70, 0xd6, 0x02, 0x18, 0xcc, 0x32, 0xc4, 0x8f, 0x08, 0x05, 0x66, 0xce,
-  0x02, 0x3a, 0x0b, 0xf1, 0x19, 0x8e, 0x88, 0xdf, 0x20, 0x9d, 0x05, 0xe2,
-  0x9b, 0x65, 0x90, 0x9f, 0xfa, 0x09, 0x4c, 0x9d, 0x05, 0xf9, 0x0d, 0xe2,
-  0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b, 0x0c, 0xf9,
-  0x58, 0x11, 0xc4, 0xa7, 0x88, 0xb8, 0x16, 0x74, 0xb8, 0x21, 0x78, 0x6b,
-  0x01, 0x0c, 0x66, 0x19, 0xe6, 0x87, 0x7e, 0x02, 0x1b, 0xe4, 0x59, 0x80,
-  0xcf, 0x2c, 0x41, 0xfe, 0x58, 0x3c, 0x0b, 0x44, 0x7c, 0x66, 0x09, 0xf2,
-  0x67, 0x38, 0x82, 0x7f, 0x03, 0x79, 0x16, 0x84, 0x6f, 0x96, 0xc1, 0x7e,
-  0xf2, 0x27, 0xb0, 0xfe, 0x0d, 0xe6, 0x59, 0x88, 0x8f, 0x05, 0x0e, 0x7d,
-  0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f,
-  0x22, 0xf8, 0x5a, 0xd0, 0xe1, 0x86, 0x40, 0xaf, 0x05, 0x30, 0x98, 0x65,
-  0xb8, 0x1f, 0xfc, 0x09, 0x6c, 0x9f, 0x85, 0x21, 0x3e, 0xb3, 0x04, 0xf9,
-  0x63, 0x84, 0x3f, 0x0b, 0xf0, 0x99, 0x25, 0xc8, 0x9f, 0x81, 0x16, 0x47,
-  0x9b, 0x1f, 0x8c, 0x7e, 0x88, 0xfb, 0x11, 0xf0, 0xc7, 0x05, 0x87, 0xfa,
-  0xb9, 0x60, 0x18, 0xeb, 0x67, 0x21, 0xa4, 0x85, 0xf8, 0x0c, 0x47, 0xf8,
-  0x8d, 0x48, 0x0b, 0xc4, 0x37, 0xcb, 0xa0, 0x3f, 0xfd, 0x13, 0xd8, 0x48,
-  0x0b, 0x7f, 0x13, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4e,
-  0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xa8, 0xb6, 0xa0, 0xc3,
-  0x0d, 0x01, 0x6a, 0x0b, 0x60, 0x30, 0xcb, 0xb0, 0x3f, 0xfc, 0x13, 0xd8,
-  0xb0, 0xd2, 0x02, 0x7c, 0x66, 0x09, 0x42, 0xc8, 0x50, 0x5a, 0x20, 0xe2,
-  0x33, 0x4b, 0x10, 0x42, 0xc3, 0x11, 0xa9, 0x93, 0xd2, 0x82, 0xf0, 0xcd,
-  0x32, 0xf8, 0x4f, 0x08, 0x05, 0xa6, 0x3a, 0x2a, 0x2d, 0xc4, 0xc7, 0x02,
-  0x87, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22,
-  0x88, 0x4f, 0x11, 0xb5, 0x2d, 0xe8, 0x70, 0x43, 0x30, 0xdb, 0x02, 0x18,
-  0xcc, 0x32, 0xfc, 0x0f, 0x08, 0x05, 0x26, 0xd3, 0xc2, 0x10, 0x9f, 0x59,
-  0x82, 0x10, 0x32, 0xe2, 0xa6, 0x05, 0xf8, 0xcc, 0x12, 0x84, 0xd0, 0x40,
-  0x8b, 0xa3, 0xed, 0x0f, 0xc6, 0x3f, 0xc4, 0xff, 0x08, 0x20, 0x84, 0x76,
-  0xfd, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x75, 0x9b, 0x53, 0x37, 0xd2, 0xc2,
-  0x30, 0x47, 0xc3, 0xc2, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0xa0, 0x91, 0xb7, 0x20, 0xdb, 0xc2, 0x5b, 0x0b,
-  0xe0, 0x2d, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c,
-  0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x5c, 0xeb, 0x2d, 0xe4, 0xb6, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x5c, 0xec, 0x2d, 0xe8, 0xb6, 0x90, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xed, 0x2d, 0xec, 0xb6, 0x90, 0x10,
-  0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x28, 0xf4, 0x2d, 0xe8, 0xb6,
-  0x90, 0xd7, 0x42, 0x70, 0xde, 0x82, 0x6a, 0x0b, 0xe5, 0x2d, 0x8c, 0x26,
-  0x04, 0xc0, 0x05, 0x8e, 0xcd, 0x12, 0x88, 0xd0, 0x40, 0x8b, 0x63, 0x1a,
-  0xed, 0x43, 0xb3, 0x05, 0xfb, 0xb0, 0xc4, 0xfb, 0x08, 0x21, 0x44, 0xb3,
-  0x05, 0xfc, 0x8c, 0x18, 0x18, 0x00, 0x08, 0x82, 0xc1, 0x63, 0xdf, 0x42,
-  0x6d, 0x0b, 0x66, 0x2c, 0x8c, 0x18, 0x18, 0x00, 0x08, 0x82, 0xc1, 0x73,
-  0xdf, 0x82, 0x6d, 0x0b, 0x66, 0x2c, 0x58, 0x10, 0xc8, 0xc7, 0x02, 0x41,
-  0x3e, 0xf6, 0xe6, 0x41, 0x6b, 0x0b, 0xf2, 0x19, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0x48, 0xbf, 0x85, 0xdf, 0x16, 0x5a, 0x5b, 0xe8, 0xb5, 0xc0,
-  0xe2, 0x3c, 0x68, 0x6d, 0x41, 0x3e, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0x10, 0xf1, 0xb7, 0x10, 0xde, 0x02, 0x6b, 0x0b, 0xa8, 0x1a, 0x04, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0x10, 0xf5, 0xb7, 0x20, 0xde, 0xc2, 0x6b,
-  0x0b, 0xe0, 0x16, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41, 0xe4, 0xdf,
-  0xc2, 0x78, 0x0b, 0xb2, 0x2d, 0xe0, 0x4b, 0x30, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0xd1, 0x7f, 0x0b, 0xe4, 0x2d, 0xb8, 0xb6, 0xb0, 0xaa, 0x81,
-  0x31, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x11, 0x88, 0x0b, 0xe5, 0x2d,
-  0xb8, 0xb6, 0x30, 0x6e, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x44,
-  0x21, 0x2e, 0x98, 0xb7, 0x40, 0xdb, 0xc2, 0xbe, 0x04, 0x23, 0x06, 0x0d,
-  0x00, 0x82, 0x60, 0x50, 0x81, 0xb8, 0x50, 0xde, 0xc2, 0x6d, 0x0b, 0xcc,
-  0xa2, 0xb8, 0x6a, 0x80, 0x10, 0x81, 0xfd, 0x75, 0x70, 0xdb, 0x82, 0x7c,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x20, 0x22, 0x71, 0x21, 0xbd, 0x85,
-  0xdb, 0x16, 0xda, 0x2b, 0xb0, 0xd0, 0x0e, 0x6e, 0x5b, 0x90, 0xcf, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x44, 0x26, 0x2e, 0xac, 0xb7, 0x60, 0xdb,
-  0x02, 0x6e, 0x06, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x44, 0x27,
-  0x2e, 0xb0, 0xb7, 0x90, 0xdb, 0x02, 0x7c, 0x05, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0x10, 0xa1, 0xb8, 0xd0, 0xde, 0x02, 0x6f, 0x0b, 0x28, 0x12,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41, 0x94, 0xe2, 0x82, 0x7b, 0x0b,
-  0xb8, 0x2d, 0xec, 0x66, 0x60, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41,
-  0xa4, 0xe2, 0xc2, 0x7b, 0x0b, 0xb8, 0x2d, 0xcc, 0x57, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0xd1, 0x8a, 0x0b, 0xf0, 0x2d, 0xf8, 0xb6, 0xb0,
-  0x22, 0xc1, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x54, 0x2a, 0x2e, 0xbc,
-  0xb7, 0x10, 0xde, 0x82, 0x55, 0x51, 0xbe, 0x19, 0x20, 0x44, 0x60, 0xae,
-  0x1c, 0x84, 0xb7, 0x20, 0x9f, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x88,
-  0x5c, 0x5c, 0x98, 0x6f, 0x21, 0xbc, 0x85, 0x7e, 0x0a, 0x0c, 0x96, 0x83,
-  0xf0, 0x16, 0xe4, 0x33, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x11, 0x8c,
-  0x0b, 0xf5, 0x2d, 0x80, 0xb7, 0x80, 0x8e, 0x41, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x51, 0x8c, 0x0b, 0xf6, 0x2d, 0x8c, 0xb7, 0x00, 0x52,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x44, 0x32, 0x2e, 0xdc, 0xb7,
-  0x60, 0xde, 0x02, 0x4e, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x10,
-  0xcd, 0xb8, 0x80, 0xdf, 0x82, 0x78, 0x0b, 0xeb, 0x18, 0x18, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x10, 0xd1, 0xb8, 0x90, 0xdf, 0x82, 0x78, 0x0b,
-  0x23, 0x15, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41, 0x54, 0xe3, 0x82,
-  0x7e, 0x0b, 0xe8, 0x2d, 0xec, 0x44, 0x30, 0x62, 0xd0, 0x00, 0x20, 0x08,
-  0x06, 0x15, 0x8d, 0x0b, 0xf9, 0x2d, 0xac, 0xb7, 0x00, 0x06, 0x9f, 0xe7,
-  0x8e, 0x01, 0x42, 0x04, 0xd6, 0xbb, 0xc1, 0x7a, 0x0b, 0xf2, 0x19, 0x31,
-  0x40, 0x00, 0x10, 0x04, 0x83, 0x08, 0xc7, 0x85, 0xfe, 0x16, 0xd6, 0x5b,
-  0x68, 0xa1, 0xc0, 0x7e, 0x37, 0x58, 0x6f, 0x41, 0x3e, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0x10, 0xe9, 0xb8, 0xf0, 0xdf, 0x82, 0x7a, 0x0b, 0xf8,
-  0x17, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41, 0xb4, 0xe3, 0x02, 0x88,
-  0x0b, 0xed, 0x2d, 0xc0, 0x50, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x11, 0x8f, 0x0b, 0x21, 0x2e, 0xc0, 0xb7, 0x80, 0x06, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x44, 0x3d, 0x2e, 0x88, 0xb8, 0xc0, 0xde, 0xc2,
-  0xfe, 0x19, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x10, 0xf9, 0xb8, 0x30,
-  0xe2, 0x02, 0x7b, 0x0b, 0x33, 0x14, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0x41, 0xf4, 0xe3, 0x02, 0x89, 0x0b, 0xf2, 0x2d, 0xac, 0x41, 0x30, 0x62,
-  0xd0, 0x00, 0x20, 0x08, 0x06, 0x95, 0x8f, 0x0b, 0x23, 0x2e, 0xd4, 0xb7,
-  0xa0, 0x06, 0x69, 0x80, 0x06, 0xfe, 0x87, 0x10, 0x81, 0xb1, 0x01, 0x1b,
-  0xc8, 0xc7, 0x82, 0x36, 0x90, 0x8f, 0x85, 0xc1, 0x7d, 0x0b, 0xf2, 0x19,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x88, 0xcc, 0x85, 0x14, 0x17, 0xee,
-  0x5b, 0x70, 0x02, 0x1b, 0x83, 0xfb, 0x16, 0xe4, 0x33, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x91, 0x99, 0x0b, 0x2b, 0x2e, 0xd8, 0xb7, 0xa0, 0x05,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x10, 0x9d, 0xb9, 0xc0, 0xe2, 0x42,
-  0x7e, 0x0b, 0x51, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x11, 0x9a,
-  0x0b, 0x2d, 0x2e, 0xf0, 0xb7, 0x80, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0x10, 0xa5, 0xb9, 0xe0, 0xe2, 0x02, 0x7e, 0x0b, 0x9d, 0x31, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x91, 0x9a, 0x0b, 0x2f, 0x2e, 0xe0, 0xb7,
-  0x40, 0x05, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x10, 0xad, 0xb9, 0x00,
-  0xe3, 0x82, 0x7f, 0x0b, 0x4b, 0x30, 0x62, 0xd0, 0x00, 0x20, 0x08, 0x06,
-  0x95, 0x9a, 0x0b, 0x2f, 0x2e, 0x84, 0xb8, 0x70, 0x07, 0x8b, 0x02, 0x06,
-  0x08, 0x11, 0x5c, 0x30, 0xd0, 0x88, 0x81, 0x03, 0x80, 0x20, 0x18, 0x30,
-  0x70, 0x2e, 0xb4, 0xb8, 0x80, 0xdf, 0x42, 0x7c, 0x0b, 0x64, 0x2e, 0x04,
-  0x32, 0x2e, 0xc8, 0xb8, 0x20, 0xe3, 0xc2, 0x89, 0x0b, 0x62, 0x2e, 0xcc,
-  0x12, 0x8c, 0x10, 0x02, 0x00, 0x00, 0x00, 0x00
-};
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_uint16_double.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_uint16_double.h
deleted file mode 100644
index 17bc195b96927..0000000000000
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_uint16_double.h
+++ /dev/null
@@ -1,6307 +0,0 @@
-#if 0
-;
-; Note: shader requires additional functionality:
-;       Double-precision floating point
-;       Use native low precision
-;
-;
-; Input signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no parameters
-;
-; Output signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no parameters
-; shader hash: 9756b447351cc4d1518fc66a9eb7e5c7
-;
-; Pipeline Runtime Information: 
-;
-;
-;
-; Buffer Definitions:
-;
-; cbuffer 
-; {
-;
-;   [116 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [2 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [8 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [2 x i8] (type annotation not present)
-;
-; }
-;
-;
-; Resource Bindings:
-;
-; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-;                                   cbuffer      NA          NA     CB0            cb0     1
-;                                       UAV  struct         r/w      U0             u0     1
-;                                       UAV  struct         r/w      U1             u1     1
-;                                       UAV  struct         r/w      U2             u2     1
-;
-target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
-target triple = "dxil-ms-dx"
-
-%dx.types.Handle = type { i8* }
-%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
-%dx.types.ResRet.i32 = type { i32, i32, i32, i32, i32 }
-%dx.types.ResRet.i16 = type { i16, i16, i16, i16, i32 }
-%"class.RWStructuredBuffer<unsigned short>" = type { i16 }
-%"class.RWStructuredBuffer<double>" = type { double }
-%Constants = type { i32, i32, i32, i32, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32 }
-
-define void @GridSample() {
-  %1 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 2, i32 2, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %2 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 1, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %3 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %4 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %5 = call i32 @dx.op.threadId.i32(i32 93, i32 0)  ; ThreadId(component)
-  %6 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
-  %7 = extractvalue %dx.types.CBufRet.i32 %6, 0
-  %8 = add i32 %7, %5
-  %9 = extractvalue %dx.types.CBufRet.i32 %6, 1
-  %10 = icmp ult i32 %8, %9
-  br i1 %10, label %11, label %3323
-
-; <label>:11                                      ; preds = %0
-  %12 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
-  %13 = extractvalue %dx.types.CBufRet.i32 %12, 3
-  %14 = uitofp i32 %13 to float
-  %15 = extractvalue %dx.types.CBufRet.i32 %12, 2
-  %16 = uitofp i32 %15 to float
-  %17 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 7)  ; CBufferLoadLegacy(handle,regIndex)
-  %18 = extractvalue %dx.types.CBufRet.i32 %17, 0
-  %19 = icmp eq i32 %18, 0
-  %20 = select i1 %19, float -5.000000e-01, float 0.000000e+00
-  %21 = select i1 %19, float -5.000000e-01, float -1.000000e+00
-  %22 = fadd float %14, %21
-  %23 = select i1 %19, float -5.000000e-01, float -1.000000e+00
-  %24 = fadd float %16, %23
-  %25 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
-  %26 = extractvalue %dx.types.CBufRet.i32 %25, 1
-  %27 = extractvalue %dx.types.CBufRet.i32 %25, 2
-  %28 = extractvalue %dx.types.CBufRet.i32 %25, 3
-  %29 = mul i32 %28, %27
-  %30 = mul i32 %27, %26
-  %31 = mul i32 %30, %28
-  %32 = udiv i32 %8, %31
-  %33 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 6)  ; CBufferLoadLegacy(handle,regIndex)
-  %34 = extractvalue %dx.types.CBufRet.i32 %33, 0
-  %35 = mul i32 %34, %32
-  %36 = sub i32 %8, %35
-  %37 = udiv i32 %36, %29
-  %38 = extractvalue %dx.types.CBufRet.i32 %33, 1
-  %39 = mul i32 %38, %37
-  %40 = sub i32 %36, %39
-  %41 = udiv i32 %40, %28
-  %42 = extractvalue %dx.types.CBufRet.i32 %33, 2
-  %43 = mul i32 %42, %41
-  %44 = sub i32 %40, %43
-  %45 = uitofp i32 %32 to float
-  %46 = uitofp i32 %41 to float
-  %47 = uitofp i32 %44 to float
-  %48 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
-  %49 = extractvalue %dx.types.CBufRet.i32 %48, 0
-  %50 = extractvalue %dx.types.CBufRet.i32 %48, 1
-  %51 = extractvalue %dx.types.CBufRet.i32 %48, 2
-  %52 = extractvalue %dx.types.CBufRet.i32 %48, 3
-  %53 = uitofp i32 %49 to float
-  %54 = uitofp i32 %50 to float
-  %55 = uitofp i32 %51 to float
-  %56 = uitofp i32 %52 to float
-  %57 = call float @dx.op.dot4.f32(i32 56, float %45, float %46, float %47, float 0.000000e+00, float %53, float %54, float %55, float %56)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %58 = fadd fast float %56, %57
-  %59 = fptoui float %57 to i32
-  %60 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %2, i32 %59, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %61 = extractvalue %dx.types.ResRet.i32 %60, 0
-  %62 = extractvalue %dx.types.ResRet.i32 %60, 1
-  %63 = call double @dx.op.makeDouble.f64(i32 101, i32 %61, i32 %62)  ; MakeDouble(lo,hi)
-  %64 = fptrunc double %63 to float
-  %65 = fptoui float %58 to i32
-  %66 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %2, i32 %65, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %67 = extractvalue %dx.types.ResRet.i32 %66, 0
-  %68 = extractvalue %dx.types.ResRet.i32 %66, 1
-  %69 = call double @dx.op.makeDouble.f64(i32 101, i32 %67, i32 %68)  ; MakeDouble(lo,hi)
-  %70 = fptrunc double %69 to float
-  %71 = icmp eq i32 %18, 1
-  %72 = fadd fast float %64, 1.000000e+00
-  %73 = fadd fast float %70, 1.000000e+00
-  br i1 %71, label %74, label %81
-
-; <label>:74                                      ; preds = %11
-  %75 = fmul fast float %72, 5.000000e-01
-  %76 = fmul fast float %73, 5.000000e-01
-  %77 = fadd fast float %14, -1.000000e+00
-  %78 = fadd fast float %16, -1.000000e+00
-  %79 = fmul fast float %75, %77
-  %80 = fmul fast float %76, %78
-  br label %88
-
-; <label>:81                                      ; preds = %11
-  %82 = fmul fast float %14, %72
-  %83 = fmul fast float %73, %16
-  %84 = fadd fast float %82, -1.000000e+00
-  %85 = fadd fast float %83, -1.000000e+00
-  %86 = fmul fast float %84, 5.000000e-01
-  %87 = fmul fast float %85, 5.000000e-01
-  br label %88
-
-; <label>:88                                      ; preds = %81, %74
-  %89 = phi float [ %79, %74 ], [ %86, %81 ]
-  %90 = phi float [ %80, %74 ], [ %87, %81 ]
-  %91 = extractvalue %dx.types.CBufRet.i32 %6, 2
-  %92 = icmp eq i32 %91, 1
-  br i1 %92, label %93, label %96
-
-; <label>:93                                      ; preds = %88
-  %94 = call float @dx.op.unary.f32(i32 26, float %89)  ; Round_ne(value)
-  %95 = call float @dx.op.unary.f32(i32 26, float %90)  ; Round_ne(value)
-  br label %96
-
-; <label>:96                                      ; preds = %93, %88
-  %97 = phi float [ %94, %93 ], [ %89, %88 ]
-  %98 = phi float [ %95, %93 ], [ %90, %88 ]
-  %99 = fcmp fast olt float %97, %20
-  %100 = fcmp fast ogt float %97, %22
-  %101 = or i1 %99, %100
-  %102 = fcmp fast olt float %98, %20
-  %103 = or i1 %101, %102
-  %104 = fcmp fast ogt float %98, %24
-  %105 = or i1 %104, %103
-  br i1 %105, label %106, label %179
-
-; <label>:106                                     ; preds = %96
-  %107 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %108 = icmp eq i32 %107, 1
-  br i1 %108, label %109, label %118
-
-; <label>:109                                     ; preds = %106
-  %110 = add i32 %13, -1
-  %111 = uitofp i32 %110 to float
-  %112 = call float @dx.op.binary.f32(i32 35, float %97, float 0.000000e+00)  ; FMax(a,b)
-  %113 = call float @dx.op.binary.f32(i32 36, float %112, float %111)  ; FMin(a,b)
-  %114 = add i32 %15, -1
-  %115 = uitofp i32 %114 to float
-  %116 = call float @dx.op.binary.f32(i32 35, float %98, float 0.000000e+00)  ; FMax(a,b)
-  %117 = call float @dx.op.binary.f32(i32 36, float %116, float %115)  ; FMin(a,b)
-  br label %179
-
-; <label>:118                                     ; preds = %106
-  %119 = icmp eq i32 %107, 2
-  br i1 %119, label %120, label %179
-
-; <label>:120                                     ; preds = %118
-  %121 = fsub fast float %22, %20
-  br i1 %99, label %122, label %135
-
-; <label>:122                                     ; preds = %120
-  %123 = fsub fast float %20, %97
-  %124 = fdiv fast float %123, %121
-  %125 = fptoui float %124 to i32
-  %126 = uitofp i32 %125 to float
-  %127 = fmul fast float %126, %121
-  %128 = fsub fast float %123, %127
-  %129 = and i32 %125, 1
-  %130 = icmp eq i32 %129, 0
-  br i1 %130, label %131, label %133
-
-; <label>:131                                     ; preds = %122
-  %132 = fadd fast float %128, %20
-  br label %149
-
-; <label>:133                                     ; preds = %122
-  %134 = fsub fast float %22, %128
-  br label %149
-
-; <label>:135                                     ; preds = %120
-  br i1 %100, label %136, label %149
-
-; <label>:136                                     ; preds = %135
-  %137 = fsub fast float %97, %22
-  %138 = fdiv fast float %137, %121
-  %139 = fptoui float %138 to i32
-  %140 = uitofp i32 %139 to float
-  %141 = fmul fast float %140, %121
-  %142 = fsub fast float %137, %141
-  %143 = and i32 %139, 1
-  %144 = icmp eq i32 %143, 0
-  br i1 %144, label %145, label %147
-
-; <label>:145                                     ; preds = %136
-  %146 = fsub fast float %22, %142
-  br label %149
-
-; <label>:147                                     ; preds = %136
-  %148 = fadd fast float %142, %20
-  br label %149
-
-; <label>:149                                     ; preds = %147, %145, %135, %133, %131
-  %150 = phi float [ %132, %131 ], [ %134, %133 ], [ %146, %145 ], [ %148, %147 ], [ %97, %135 ]
-  %151 = fsub fast float %24, %20
-  br i1 %102, label %152, label %165
-
-; <label>:152                                     ; preds = %149
-  %153 = fsub fast float %20, %98
-  %154 = fdiv fast float %153, %151
-  %155 = fptoui float %154 to i32
-  %156 = uitofp i32 %155 to float
-  %157 = fmul fast float %156, %151
-  %158 = fsub fast float %153, %157
-  %159 = and i32 %155, 1
-  %160 = icmp eq i32 %159, 0
-  br i1 %160, label %161, label %163
-
-; <label>:161                                     ; preds = %152
-  %162 = fadd fast float %158, %20
-  br label %179
-
-; <label>:163                                     ; preds = %152
-  %164 = fsub fast float %24, %158
-  br label %179
-
-; <label>:165                                     ; preds = %149
-  br i1 %104, label %166, label %179
-
-; <label>:166                                     ; preds = %165
-  %167 = fsub fast float %98, %24
-  %168 = fdiv fast float %167, %151
-  %169 = fptoui float %168 to i32
-  %170 = uitofp i32 %169 to float
-  %171 = fmul fast float %170, %151
-  %172 = fsub fast float %167, %171
-  %173 = and i32 %169, 1
-  %174 = icmp eq i32 %173, 0
-  br i1 %174, label %175, label %177
-
-; <label>:175                                     ; preds = %166
-  %176 = fsub fast float %24, %172
-  br label %179
-
-; <label>:177                                     ; preds = %166
-  %178 = fadd fast float %172, %20
-  br label %179
-
-; <label>:179                                     ; preds = %177, %175, %165, %163, %161, %118, %109, %96
-  %180 = phi float [ %113, %109 ], [ %97, %118 ], [ %97, %96 ], [ %150, %177 ], [ %150, %175 ], [ %150, %165 ], [ %150, %163 ], [ %150, %161 ]
-  %181 = phi float [ %117, %109 ], [ %98, %118 ], [ %98, %96 ], [ %178, %177 ], [ %176, %175 ], [ %98, %165 ], [ %164, %163 ], [ %162, %161 ]
-  %182 = uitofp i32 %37 to float
-  br i1 %92, label %183, label %332
-
-; <label>:183                                     ; preds = %179
-  %184 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %185 = icmp eq i32 %184, 0
-  br i1 %185, label %186, label %211
-
-; <label>:186                                     ; preds = %183
-  %187 = fcmp fast oge float %180, 0.000000e+00
-  %188 = fptoui float %180 to i32
-  %189 = icmp ult i32 %188, %13
-  %190 = and i1 %187, %189
-  %191 = fcmp fast oge float %181, 0.000000e+00
-  %192 = and i1 %191, %190
-  %193 = fptoui float %181 to i32
-  %194 = icmp ult i32 %193, %15
-  %195 = and i1 %194, %192
-  br i1 %195, label %196, label %329
-
-; <label>:196                                     ; preds = %186
-  %197 = fptoui float %45 to i32
-  %198 = fptoui float %182 to i32
-  %199 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %200 = extractvalue %dx.types.CBufRet.i32 %199, 0
-  %201 = extractvalue %dx.types.CBufRet.i32 %199, 1
-  %202 = extractvalue %dx.types.CBufRet.i32 %199, 2
-  %203 = extractvalue %dx.types.CBufRet.i32 %199, 3
-  %204 = mul i32 %200, %197
-  %205 = call i32 @dx.op.tertiary.i32(i32 48, i32 %198, i32 %201, i32 %204)  ; IMad(a,b,c)
-  %206 = call i32 @dx.op.tertiary.i32(i32 48, i32 %193, i32 %202, i32 %205)  ; IMad(a,b,c)
-  %207 = call i32 @dx.op.tertiary.i32(i32 48, i32 %188, i32 %203, i32 %206)  ; IMad(a,b,c)
-  %208 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %207, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %209 = extractvalue %dx.types.ResRet.i16 %208, 0
-  %210 = uitofp i16 %209 to float
-  br label %329
-
-; <label>:211                                     ; preds = %183
-  %212 = icmp eq i32 %184, 1
-  br i1 %212, label %213, label %242
-
-; <label>:213                                     ; preds = %211
-  %214 = add i32 %13, -1
-  %215 = uitofp i32 %214 to float
-  %216 = call float @dx.op.binary.f32(i32 35, float %180, float 0.000000e+00)  ; FMax(a,b)
-  %217 = call float @dx.op.binary.f32(i32 36, float %216, float %215)  ; FMin(a,b)
-  %218 = fptoui float %217 to i32
-  %219 = add i32 %15, -1
-  %220 = uitofp i32 %219 to float
-  %221 = call float @dx.op.binary.f32(i32 35, float %181, float 0.000000e+00)  ; FMax(a,b)
-  %222 = call float @dx.op.binary.f32(i32 36, float %221, float %220)  ; FMin(a,b)
-  %223 = fptoui float %222 to i32
-  %224 = uitofp i32 %223 to float
-  %225 = uitofp i32 %218 to float
-  %226 = fptoui float %45 to i32
-  %227 = fptoui float %182 to i32
-  %228 = fptoui float %224 to i32
-  %229 = fptoui float %225 to i32
-  %230 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %231 = extractvalue %dx.types.CBufRet.i32 %230, 0
-  %232 = extractvalue %dx.types.CBufRet.i32 %230, 1
-  %233 = extractvalue %dx.types.CBufRet.i32 %230, 2
-  %234 = extractvalue %dx.types.CBufRet.i32 %230, 3
-  %235 = mul i32 %231, %226
-  %236 = call i32 @dx.op.tertiary.i32(i32 48, i32 %227, i32 %232, i32 %235)  ; IMad(a,b,c)
-  %237 = call i32 @dx.op.tertiary.i32(i32 48, i32 %228, i32 %233, i32 %236)  ; IMad(a,b,c)
-  %238 = call i32 @dx.op.tertiary.i32(i32 48, i32 %229, i32 %234, i32 %237)  ; IMad(a,b,c)
-  %239 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %238, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %240 = extractvalue %dx.types.ResRet.i16 %239, 0
-  %241 = uitofp i16 %240 to float
-  br label %329
-
-; <label>:242                                     ; preds = %211
-  %243 = icmp eq i32 %184, 2
-  br i1 %243, label %244, label %329
-
-; <label>:244                                     ; preds = %242
-  %245 = fsub fast float %22, %20
-  %246 = fcmp fast olt float %180, %20
-  br i1 %246, label %247, label %260
-
-; <label>:247                                     ; preds = %244
-  %248 = fsub fast float %20, %180
-  %249 = fdiv fast float %248, %245
-  %250 = fptoui float %249 to i32
-  %251 = uitofp i32 %250 to float
-  %252 = fmul fast float %251, %245
-  %253 = fsub fast float %248, %252
-  %254 = and i32 %250, 1
-  %255 = icmp eq i32 %254, 0
-  br i1 %255, label %256, label %258
-
-; <label>:256                                     ; preds = %247
-  %257 = fadd fast float %253, %20
-  br label %275
-
-; <label>:258                                     ; preds = %247
-  %259 = fsub fast float %22, %253
-  br label %275
-
-; <label>:260                                     ; preds = %244
-  %261 = fcmp fast ogt float %180, %22
-  br i1 %261, label %262, label %275
-
-; <label>:262                                     ; preds = %260
-  %263 = fsub fast float %180, %22
-  %264 = fdiv fast float %263, %245
-  %265 = fptoui float %264 to i32
-  %266 = uitofp i32 %265 to float
-  %267 = fmul fast float %266, %245
-  %268 = fsub fast float %263, %267
-  %269 = and i32 %265, 1
-  %270 = icmp eq i32 %269, 0
-  br i1 %270, label %271, label %273
-
-; <label>:271                                     ; preds = %262
-  %272 = fsub fast float %22, %268
-  br label %275
-
-; <label>:273                                     ; preds = %262
-  %274 = fadd fast float %268, %20
-  br label %275
-
-; <label>:275                                     ; preds = %273, %271, %260, %258, %256
-  %276 = phi float [ %257, %256 ], [ %259, %258 ], [ %272, %271 ], [ %274, %273 ], [ %180, %260 ]
-  %277 = fptoui float %276 to i32
-  %278 = fsub fast float %24, %20
-  %279 = fcmp fast olt float %181, %20
-  br i1 %279, label %280, label %293
-
-; <label>:280                                     ; preds = %275
-  %281 = fsub fast float %20, %181
-  %282 = fdiv fast float %281, %278
-  %283 = fptoui float %282 to i32
-  %284 = uitofp i32 %283 to float
-  %285 = fmul fast float %284, %278
-  %286 = fsub fast float %281, %285
-  %287 = and i32 %283, 1
-  %288 = icmp eq i32 %287, 0
-  br i1 %288, label %289, label %291
-
-; <label>:289                                     ; preds = %280
-  %290 = fadd fast float %286, %20
-  br label %308
-
-; <label>:291                                     ; preds = %280
-  %292 = fsub fast float %24, %286
-  br label %308
-
-; <label>:293                                     ; preds = %275
-  %294 = fcmp fast ogt float %181, %24
-  br i1 %294, label %295, label %308
-
-; <label>:295                                     ; preds = %293
-  %296 = fsub fast float %181, %24
-  %297 = fdiv fast float %296, %278
-  %298 = fptoui float %297 to i32
-  %299 = uitofp i32 %298 to float
-  %300 = fmul fast float %299, %278
-  %301 = fsub fast float %296, %300
-  %302 = and i32 %298, 1
-  %303 = icmp eq i32 %302, 0
-  br i1 %303, label %304, label %306
-
-; <label>:304                                     ; preds = %295
-  %305 = fsub fast float %24, %301
-  br label %308
-
-; <label>:306                                     ; preds = %295
-  %307 = fadd fast float %301, %20
-  br label %308
-
-; <label>:308                                     ; preds = %306, %304, %293, %291, %289
-  %309 = phi float [ %290, %289 ], [ %292, %291 ], [ %305, %304 ], [ %307, %306 ], [ %181, %293 ]
-  %310 = fptoui float %309 to i32
-  %311 = uitofp i32 %310 to float
-  %312 = uitofp i32 %277 to float
-  %313 = fptoui float %45 to i32
-  %314 = fptoui float %182 to i32
-  %315 = fptoui float %311 to i32
-  %316 = fptoui float %312 to i32
-  %317 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %318 = extractvalue %dx.types.CBufRet.i32 %317, 0
-  %319 = extractvalue %dx.types.CBufRet.i32 %317, 1
-  %320 = extractvalue %dx.types.CBufRet.i32 %317, 2
-  %321 = extractvalue %dx.types.CBufRet.i32 %317, 3
-  %322 = mul i32 %318, %313
-  %323 = call i32 @dx.op.tertiary.i32(i32 48, i32 %314, i32 %319, i32 %322)  ; IMad(a,b,c)
-  %324 = call i32 @dx.op.tertiary.i32(i32 48, i32 %315, i32 %320, i32 %323)  ; IMad(a,b,c)
-  %325 = call i32 @dx.op.tertiary.i32(i32 48, i32 %316, i32 %321, i32 %324)  ; IMad(a,b,c)
-  %326 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %325, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %327 = extractvalue %dx.types.ResRet.i16 %326, 0
-  %328 = uitofp i16 %327 to float
-  br label %329
-
-; <label>:329                                     ; preds = %308, %242, %213, %196, %186
-  %330 = phi float [ %210, %196 ], [ 0.000000e+00, %186 ], [ %241, %213 ], [ %328, %308 ], [ 0.000000e+00, %242 ]
-  %331 = fptoui float %330 to i16
-  call void @dx.op.rawBufferStore.i16(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i16 %331, i16 undef, i16 undef, i16 undef, i8 1, i32 2)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3323
-
-; <label>:332                                     ; preds = %179
-  %333 = icmp eq i32 %91, 0
-  br i1 %333, label %334, label %933
-
-; <label>:334                                     ; preds = %332
-  %335 = call float @dx.op.unary.f32(i32 27, float %180)  ; Round_ni(value)
-  %336 = call float @dx.op.unary.f32(i32 27, float %181)  ; Round_ni(value)
-  %337 = fadd fast float %335, 1.000000e+00
-  %338 = fadd fast float %336, 1.000000e+00
-  %339 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %340 = icmp eq i32 %339, 0
-  br i1 %340, label %341, label %366
-
-; <label>:341                                     ; preds = %334
-  %342 = fcmp fast oge float %335, 0.000000e+00
-  %343 = fptoui float %335 to i32
-  %344 = icmp ult i32 %343, %13
-  %345 = and i1 %342, %344
-  %346 = fcmp fast oge float %336, 0.000000e+00
-  %347 = and i1 %346, %345
-  %348 = fptoui float %336 to i32
-  %349 = icmp ult i32 %348, %15
-  %350 = and i1 %349, %347
-  br i1 %350, label %351, label %484
-
-; <label>:351                                     ; preds = %341
-  %352 = fptoui float %45 to i32
-  %353 = fptoui float %182 to i32
-  %354 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %355 = extractvalue %dx.types.CBufRet.i32 %354, 0
-  %356 = extractvalue %dx.types.CBufRet.i32 %354, 1
-  %357 = extractvalue %dx.types.CBufRet.i32 %354, 2
-  %358 = extractvalue %dx.types.CBufRet.i32 %354, 3
-  %359 = mul i32 %355, %352
-  %360 = call i32 @dx.op.tertiary.i32(i32 48, i32 %353, i32 %356, i32 %359)  ; IMad(a,b,c)
-  %361 = call i32 @dx.op.tertiary.i32(i32 48, i32 %348, i32 %357, i32 %360)  ; IMad(a,b,c)
-  %362 = call i32 @dx.op.tertiary.i32(i32 48, i32 %343, i32 %358, i32 %361)  ; IMad(a,b,c)
-  %363 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %362, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %364 = extractvalue %dx.types.ResRet.i16 %363, 0
-  %365 = uitofp i16 %364 to float
-  br label %484
-
-; <label>:366                                     ; preds = %334
-  %367 = icmp eq i32 %339, 1
-  br i1 %367, label %368, label %397
-
-; <label>:368                                     ; preds = %366
-  %369 = add i32 %13, -1
-  %370 = uitofp i32 %369 to float
-  %371 = call float @dx.op.binary.f32(i32 35, float %335, float 0.000000e+00)  ; FMax(a,b)
-  %372 = call float @dx.op.binary.f32(i32 36, float %371, float %370)  ; FMin(a,b)
-  %373 = fptoui float %372 to i32
-  %374 = add i32 %15, -1
-  %375 = uitofp i32 %374 to float
-  %376 = call float @dx.op.binary.f32(i32 35, float %336, float 0.000000e+00)  ; FMax(a,b)
-  %377 = call float @dx.op.binary.f32(i32 36, float %376, float %375)  ; FMin(a,b)
-  %378 = fptoui float %377 to i32
-  %379 = uitofp i32 %378 to float
-  %380 = uitofp i32 %373 to float
-  %381 = fptoui float %45 to i32
-  %382 = fptoui float %182 to i32
-  %383 = fptoui float %379 to i32
-  %384 = fptoui float %380 to i32
-  %385 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %386 = extractvalue %dx.types.CBufRet.i32 %385, 0
-  %387 = extractvalue %dx.types.CBufRet.i32 %385, 1
-  %388 = extractvalue %dx.types.CBufRet.i32 %385, 2
-  %389 = extractvalue %dx.types.CBufRet.i32 %385, 3
-  %390 = mul i32 %386, %381
-  %391 = call i32 @dx.op.tertiary.i32(i32 48, i32 %382, i32 %387, i32 %390)  ; IMad(a,b,c)
-  %392 = call i32 @dx.op.tertiary.i32(i32 48, i32 %383, i32 %388, i32 %391)  ; IMad(a,b,c)
-  %393 = call i32 @dx.op.tertiary.i32(i32 48, i32 %384, i32 %389, i32 %392)  ; IMad(a,b,c)
-  %394 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %393, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %395 = extractvalue %dx.types.ResRet.i16 %394, 0
-  %396 = uitofp i16 %395 to float
-  br label %484
-
-; <label>:397                                     ; preds = %366
-  %398 = icmp eq i32 %339, 2
-  br i1 %398, label %399, label %484
-
-; <label>:399                                     ; preds = %397
-  %400 = fsub fast float %22, %20
-  %401 = fcmp fast olt float %335, %20
-  br i1 %401, label %402, label %415
-
-; <label>:402                                     ; preds = %399
-  %403 = fsub fast float %20, %335
-  %404 = fdiv fast float %403, %400
-  %405 = fptoui float %404 to i32
-  %406 = uitofp i32 %405 to float
-  %407 = fmul fast float %406, %400
-  %408 = fsub fast float %403, %407
-  %409 = and i32 %405, 1
-  %410 = icmp eq i32 %409, 0
-  br i1 %410, label %411, label %413
-
-; <label>:411                                     ; preds = %402
-  %412 = fadd fast float %408, %20
-  br label %430
-
-; <label>:413                                     ; preds = %402
-  %414 = fsub fast float %22, %408
-  br label %430
-
-; <label>:415                                     ; preds = %399
-  %416 = fcmp fast ogt float %335, %22
-  br i1 %416, label %417, label %430
-
-; <label>:417                                     ; preds = %415
-  %418 = fsub fast float %335, %22
-  %419 = fdiv fast float %418, %400
-  %420 = fptoui float %419 to i32
-  %421 = uitofp i32 %420 to float
-  %422 = fmul fast float %421, %400
-  %423 = fsub fast float %418, %422
-  %424 = and i32 %420, 1
-  %425 = icmp eq i32 %424, 0
-  br i1 %425, label %426, label %428
-
-; <label>:426                                     ; preds = %417
-  %427 = fsub fast float %22, %423
-  br label %430
-
-; <label>:428                                     ; preds = %417
-  %429 = fadd fast float %423, %20
-  br label %430
-
-; <label>:430                                     ; preds = %428, %426, %415, %413, %411
-  %431 = phi float [ %412, %411 ], [ %414, %413 ], [ %427, %426 ], [ %429, %428 ], [ %335, %415 ]
-  %432 = fptoui float %431 to i32
-  %433 = fsub fast float %24, %20
-  %434 = fcmp fast olt float %336, %20
-  br i1 %434, label %435, label %448
-
-; <label>:435                                     ; preds = %430
-  %436 = fsub fast float %20, %336
-  %437 = fdiv fast float %436, %433
-  %438 = fptoui float %437 to i32
-  %439 = uitofp i32 %438 to float
-  %440 = fmul fast float %439, %433
-  %441 = fsub fast float %436, %440
-  %442 = and i32 %438, 1
-  %443 = icmp eq i32 %442, 0
-  br i1 %443, label %444, label %446
-
-; <label>:444                                     ; preds = %435
-  %445 = fadd fast float %441, %20
-  br label %463
-
-; <label>:446                                     ; preds = %435
-  %447 = fsub fast float %24, %441
-  br label %463
-
-; <label>:448                                     ; preds = %430
-  %449 = fcmp fast ogt float %336, %24
-  br i1 %449, label %450, label %463
-
-; <label>:450                                     ; preds = %448
-  %451 = fsub fast float %336, %24
-  %452 = fdiv fast float %451, %433
-  %453 = fptoui float %452 to i32
-  %454 = uitofp i32 %453 to float
-  %455 = fmul fast float %454, %433
-  %456 = fsub fast float %451, %455
-  %457 = and i32 %453, 1
-  %458 = icmp eq i32 %457, 0
-  br i1 %458, label %459, label %461
-
-; <label>:459                                     ; preds = %450
-  %460 = fsub fast float %24, %456
-  br label %463
-
-; <label>:461                                     ; preds = %450
-  %462 = fadd fast float %456, %20
-  br label %463
-
-; <label>:463                                     ; preds = %461, %459, %448, %446, %444
-  %464 = phi float [ %445, %444 ], [ %447, %446 ], [ %460, %459 ], [ %462, %461 ], [ %336, %448 ]
-  %465 = fptoui float %464 to i32
-  %466 = uitofp i32 %465 to float
-  %467 = uitofp i32 %432 to float
-  %468 = fptoui float %45 to i32
-  %469 = fptoui float %182 to i32
-  %470 = fptoui float %466 to i32
-  %471 = fptoui float %467 to i32
-  %472 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %473 = extractvalue %dx.types.CBufRet.i32 %472, 0
-  %474 = extractvalue %dx.types.CBufRet.i32 %472, 1
-  %475 = extractvalue %dx.types.CBufRet.i32 %472, 2
-  %476 = extractvalue %dx.types.CBufRet.i32 %472, 3
-  %477 = mul i32 %473, %468
-  %478 = call i32 @dx.op.tertiary.i32(i32 48, i32 %469, i32 %474, i32 %477)  ; IMad(a,b,c)
-  %479 = call i32 @dx.op.tertiary.i32(i32 48, i32 %470, i32 %475, i32 %478)  ; IMad(a,b,c)
-  %480 = call i32 @dx.op.tertiary.i32(i32 48, i32 %471, i32 %476, i32 %479)  ; IMad(a,b,c)
-  %481 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %480, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %482 = extractvalue %dx.types.ResRet.i16 %481, 0
-  %483 = uitofp i16 %482 to float
-  br label %484
-
-; <label>:484                                     ; preds = %463, %397, %368, %351, %341
-  %485 = phi float [ %365, %351 ], [ 0.000000e+00, %341 ], [ %396, %368 ], [ %483, %463 ], [ 0.000000e+00, %397 ]
-  br i1 %340, label %486, label %511
-
-; <label>:486                                     ; preds = %484
-  %487 = fcmp fast oge float %337, 0.000000e+00
-  %488 = fptoui float %337 to i32
-  %489 = icmp ult i32 %488, %13
-  %490 = and i1 %487, %489
-  %491 = fcmp fast oge float %336, 0.000000e+00
-  %492 = and i1 %491, %490
-  %493 = fptoui float %336 to i32
-  %494 = icmp ult i32 %493, %15
-  %495 = and i1 %494, %492
-  br i1 %495, label %496, label %629
-
-; <label>:496                                     ; preds = %486
-  %497 = fptoui float %45 to i32
-  %498 = fptoui float %182 to i32
-  %499 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %500 = extractvalue %dx.types.CBufRet.i32 %499, 0
-  %501 = extractvalue %dx.types.CBufRet.i32 %499, 1
-  %502 = extractvalue %dx.types.CBufRet.i32 %499, 2
-  %503 = extractvalue %dx.types.CBufRet.i32 %499, 3
-  %504 = mul i32 %500, %497
-  %505 = call i32 @dx.op.tertiary.i32(i32 48, i32 %498, i32 %501, i32 %504)  ; IMad(a,b,c)
-  %506 = call i32 @dx.op.tertiary.i32(i32 48, i32 %493, i32 %502, i32 %505)  ; IMad(a,b,c)
-  %507 = call i32 @dx.op.tertiary.i32(i32 48, i32 %488, i32 %503, i32 %506)  ; IMad(a,b,c)
-  %508 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %507, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %509 = extractvalue %dx.types.ResRet.i16 %508, 0
-  %510 = uitofp i16 %509 to float
-  br label %629
-
-; <label>:511                                     ; preds = %484
-  %512 = icmp eq i32 %339, 1
-  br i1 %512, label %513, label %542
-
-; <label>:513                                     ; preds = %511
-  %514 = add i32 %13, -1
-  %515 = uitofp i32 %514 to float
-  %516 = call float @dx.op.binary.f32(i32 35, float %337, float 0.000000e+00)  ; FMax(a,b)
-  %517 = call float @dx.op.binary.f32(i32 36, float %516, float %515)  ; FMin(a,b)
-  %518 = fptoui float %517 to i32
-  %519 = add i32 %15, -1
-  %520 = uitofp i32 %519 to float
-  %521 = call float @dx.op.binary.f32(i32 35, float %336, float 0.000000e+00)  ; FMax(a,b)
-  %522 = call float @dx.op.binary.f32(i32 36, float %521, float %520)  ; FMin(a,b)
-  %523 = fptoui float %522 to i32
-  %524 = uitofp i32 %523 to float
-  %525 = uitofp i32 %518 to float
-  %526 = fptoui float %45 to i32
-  %527 = fptoui float %182 to i32
-  %528 = fptoui float %524 to i32
-  %529 = fptoui float %525 to i32
-  %530 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %531 = extractvalue %dx.types.CBufRet.i32 %530, 0
-  %532 = extractvalue %dx.types.CBufRet.i32 %530, 1
-  %533 = extractvalue %dx.types.CBufRet.i32 %530, 2
-  %534 = extractvalue %dx.types.CBufRet.i32 %530, 3
-  %535 = mul i32 %531, %526
-  %536 = call i32 @dx.op.tertiary.i32(i32 48, i32 %527, i32 %532, i32 %535)  ; IMad(a,b,c)
-  %537 = call i32 @dx.op.tertiary.i32(i32 48, i32 %528, i32 %533, i32 %536)  ; IMad(a,b,c)
-  %538 = call i32 @dx.op.tertiary.i32(i32 48, i32 %529, i32 %534, i32 %537)  ; IMad(a,b,c)
-  %539 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %538, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %540 = extractvalue %dx.types.ResRet.i16 %539, 0
-  %541 = uitofp i16 %540 to float
-  br label %629
-
-; <label>:542                                     ; preds = %511
-  %543 = icmp eq i32 %339, 2
-  br i1 %543, label %544, label %629
-
-; <label>:544                                     ; preds = %542
-  %545 = fsub fast float %22, %20
-  %546 = fcmp fast olt float %337, %20
-  br i1 %546, label %547, label %560
-
-; <label>:547                                     ; preds = %544
-  %548 = fsub fast float %20, %337
-  %549 = fdiv fast float %548, %545
-  %550 = fptoui float %549 to i32
-  %551 = uitofp i32 %550 to float
-  %552 = fmul fast float %551, %545
-  %553 = fsub fast float %548, %552
-  %554 = and i32 %550, 1
-  %555 = icmp eq i32 %554, 0
-  br i1 %555, label %556, label %558
-
-; <label>:556                                     ; preds = %547
-  %557 = fadd fast float %553, %20
-  br label %575
-
-; <label>:558                                     ; preds = %547
-  %559 = fsub fast float %22, %553
-  br label %575
-
-; <label>:560                                     ; preds = %544
-  %561 = fcmp fast ogt float %337, %22
-  br i1 %561, label %562, label %575
-
-; <label>:562                                     ; preds = %560
-  %563 = fsub fast float %337, %22
-  %564 = fdiv fast float %563, %545
-  %565 = fptoui float %564 to i32
-  %566 = uitofp i32 %565 to float
-  %567 = fmul fast float %566, %545
-  %568 = fsub fast float %563, %567
-  %569 = and i32 %565, 1
-  %570 = icmp eq i32 %569, 0
-  br i1 %570, label %571, label %573
-
-; <label>:571                                     ; preds = %562
-  %572 = fsub fast float %22, %568
-  br label %575
-
-; <label>:573                                     ; preds = %562
-  %574 = fadd fast float %568, %20
-  br label %575
-
-; <label>:575                                     ; preds = %573, %571, %560, %558, %556
-  %576 = phi float [ %557, %556 ], [ %559, %558 ], [ %572, %571 ], [ %574, %573 ], [ %337, %560 ]
-  %577 = fptoui float %576 to i32
-  %578 = fsub fast float %24, %20
-  %579 = fcmp fast olt float %336, %20
-  br i1 %579, label %580, label %593
-
-; <label>:580                                     ; preds = %575
-  %581 = fsub fast float %20, %336
-  %582 = fdiv fast float %581, %578
-  %583 = fptoui float %582 to i32
-  %584 = uitofp i32 %583 to float
-  %585 = fmul fast float %584, %578
-  %586 = fsub fast float %581, %585
-  %587 = and i32 %583, 1
-  %588 = icmp eq i32 %587, 0
-  br i1 %588, label %589, label %591
-
-; <label>:589                                     ; preds = %580
-  %590 = fadd fast float %586, %20
-  br label %608
-
-; <label>:591                                     ; preds = %580
-  %592 = fsub fast float %24, %586
-  br label %608
-
-; <label>:593                                     ; preds = %575
-  %594 = fcmp fast ogt float %336, %24
-  br i1 %594, label %595, label %608
-
-; <label>:595                                     ; preds = %593
-  %596 = fsub fast float %336, %24
-  %597 = fdiv fast float %596, %578
-  %598 = fptoui float %597 to i32
-  %599 = uitofp i32 %598 to float
-  %600 = fmul fast float %599, %578
-  %601 = fsub fast float %596, %600
-  %602 = and i32 %598, 1
-  %603 = icmp eq i32 %602, 0
-  br i1 %603, label %604, label %606
-
-; <label>:604                                     ; preds = %595
-  %605 = fsub fast float %24, %601
-  br label %608
-
-; <label>:606                                     ; preds = %595
-  %607 = fadd fast float %601, %20
-  br label %608
-
-; <label>:608                                     ; preds = %606, %604, %593, %591, %589
-  %609 = phi float [ %590, %589 ], [ %592, %591 ], [ %605, %604 ], [ %607, %606 ], [ %336, %593 ]
-  %610 = fptoui float %609 to i32
-  %611 = uitofp i32 %610 to float
-  %612 = uitofp i32 %577 to float
-  %613 = fptoui float %45 to i32
-  %614 = fptoui float %182 to i32
-  %615 = fptoui float %611 to i32
-  %616 = fptoui float %612 to i32
-  %617 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %618 = extractvalue %dx.types.CBufRet.i32 %617, 0
-  %619 = extractvalue %dx.types.CBufRet.i32 %617, 1
-  %620 = extractvalue %dx.types.CBufRet.i32 %617, 2
-  %621 = extractvalue %dx.types.CBufRet.i32 %617, 3
-  %622 = mul i32 %618, %613
-  %623 = call i32 @dx.op.tertiary.i32(i32 48, i32 %614, i32 %619, i32 %622)  ; IMad(a,b,c)
-  %624 = call i32 @dx.op.tertiary.i32(i32 48, i32 %615, i32 %620, i32 %623)  ; IMad(a,b,c)
-  %625 = call i32 @dx.op.tertiary.i32(i32 48, i32 %616, i32 %621, i32 %624)  ; IMad(a,b,c)
-  %626 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %625, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %627 = extractvalue %dx.types.ResRet.i16 %626, 0
-  %628 = uitofp i16 %627 to float
-  br label %629
-
-; <label>:629                                     ; preds = %608, %542, %513, %496, %486
-  %630 = phi float [ %510, %496 ], [ 0.000000e+00, %486 ], [ %541, %513 ], [ %628, %608 ], [ 0.000000e+00, %542 ]
-  br i1 %340, label %631, label %656
-
-; <label>:631                                     ; preds = %629
-  %632 = fcmp fast oge float %335, 0.000000e+00
-  %633 = fptoui float %335 to i32
-  %634 = icmp ult i32 %633, %13
-  %635 = and i1 %632, %634
-  %636 = fcmp fast oge float %338, 0.000000e+00
-  %637 = and i1 %636, %635
-  %638 = fptoui float %338 to i32
-  %639 = icmp ult i32 %638, %15
-  %640 = and i1 %639, %637
-  br i1 %640, label %641, label %774
-
-; <label>:641                                     ; preds = %631
-  %642 = fptoui float %45 to i32
-  %643 = fptoui float %182 to i32
-  %644 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %645 = extractvalue %dx.types.CBufRet.i32 %644, 0
-  %646 = extractvalue %dx.types.CBufRet.i32 %644, 1
-  %647 = extractvalue %dx.types.CBufRet.i32 %644, 2
-  %648 = extractvalue %dx.types.CBufRet.i32 %644, 3
-  %649 = mul i32 %645, %642
-  %650 = call i32 @dx.op.tertiary.i32(i32 48, i32 %643, i32 %646, i32 %649)  ; IMad(a,b,c)
-  %651 = call i32 @dx.op.tertiary.i32(i32 48, i32 %638, i32 %647, i32 %650)  ; IMad(a,b,c)
-  %652 = call i32 @dx.op.tertiary.i32(i32 48, i32 %633, i32 %648, i32 %651)  ; IMad(a,b,c)
-  %653 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %652, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %654 = extractvalue %dx.types.ResRet.i16 %653, 0
-  %655 = uitofp i16 %654 to float
-  br label %774
-
-; <label>:656                                     ; preds = %629
-  %657 = icmp eq i32 %339, 1
-  br i1 %657, label %658, label %687
-
-; <label>:658                                     ; preds = %656
-  %659 = add i32 %13, -1
-  %660 = uitofp i32 %659 to float
-  %661 = call float @dx.op.binary.f32(i32 35, float %335, float 0.000000e+00)  ; FMax(a,b)
-  %662 = call float @dx.op.binary.f32(i32 36, float %661, float %660)  ; FMin(a,b)
-  %663 = fptoui float %662 to i32
-  %664 = add i32 %15, -1
-  %665 = uitofp i32 %664 to float
-  %666 = call float @dx.op.binary.f32(i32 35, float %338, float 0.000000e+00)  ; FMax(a,b)
-  %667 = call float @dx.op.binary.f32(i32 36, float %666, float %665)  ; FMin(a,b)
-  %668 = fptoui float %667 to i32
-  %669 = uitofp i32 %668 to float
-  %670 = uitofp i32 %663 to float
-  %671 = fptoui float %45 to i32
-  %672 = fptoui float %182 to i32
-  %673 = fptoui float %669 to i32
-  %674 = fptoui float %670 to i32
-  %675 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %676 = extractvalue %dx.types.CBufRet.i32 %675, 0
-  %677 = extractvalue %dx.types.CBufRet.i32 %675, 1
-  %678 = extractvalue %dx.types.CBufRet.i32 %675, 2
-  %679 = extractvalue %dx.types.CBufRet.i32 %675, 3
-  %680 = mul i32 %676, %671
-  %681 = call i32 @dx.op.tertiary.i32(i32 48, i32 %672, i32 %677, i32 %680)  ; IMad(a,b,c)
-  %682 = call i32 @dx.op.tertiary.i32(i32 48, i32 %673, i32 %678, i32 %681)  ; IMad(a,b,c)
-  %683 = call i32 @dx.op.tertiary.i32(i32 48, i32 %674, i32 %679, i32 %682)  ; IMad(a,b,c)
-  %684 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %683, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %685 = extractvalue %dx.types.ResRet.i16 %684, 0
-  %686 = uitofp i16 %685 to float
-  br label %774
-
-; <label>:687                                     ; preds = %656
-  %688 = icmp eq i32 %339, 2
-  br i1 %688, label %689, label %774
-
-; <label>:689                                     ; preds = %687
-  %690 = fsub fast float %22, %20
-  %691 = fcmp fast olt float %335, %20
-  br i1 %691, label %692, label %705
-
-; <label>:692                                     ; preds = %689
-  %693 = fsub fast float %20, %335
-  %694 = fdiv fast float %693, %690
-  %695 = fptoui float %694 to i32
-  %696 = uitofp i32 %695 to float
-  %697 = fmul fast float %696, %690
-  %698 = fsub fast float %693, %697
-  %699 = and i32 %695, 1
-  %700 = icmp eq i32 %699, 0
-  br i1 %700, label %701, label %703
-
-; <label>:701                                     ; preds = %692
-  %702 = fadd fast float %698, %20
-  br label %720
-
-; <label>:703                                     ; preds = %692
-  %704 = fsub fast float %22, %698
-  br label %720
-
-; <label>:705                                     ; preds = %689
-  %706 = fcmp fast ogt float %335, %22
-  br i1 %706, label %707, label %720
-
-; <label>:707                                     ; preds = %705
-  %708 = fsub fast float %335, %22
-  %709 = fdiv fast float %708, %690
-  %710 = fptoui float %709 to i32
-  %711 = uitofp i32 %710 to float
-  %712 = fmul fast float %711, %690
-  %713 = fsub fast float %708, %712
-  %714 = and i32 %710, 1
-  %715 = icmp eq i32 %714, 0
-  br i1 %715, label %716, label %718
-
-; <label>:716                                     ; preds = %707
-  %717 = fsub fast float %22, %713
-  br label %720
-
-; <label>:718                                     ; preds = %707
-  %719 = fadd fast float %713, %20
-  br label %720
-
-; <label>:720                                     ; preds = %718, %716, %705, %703, %701
-  %721 = phi float [ %702, %701 ], [ %704, %703 ], [ %717, %716 ], [ %719, %718 ], [ %335, %705 ]
-  %722 = fptoui float %721 to i32
-  %723 = fsub fast float %24, %20
-  %724 = fcmp fast olt float %338, %20
-  br i1 %724, label %725, label %738
-
-; <label>:725                                     ; preds = %720
-  %726 = fsub fast float %20, %338
-  %727 = fdiv fast float %726, %723
-  %728 = fptoui float %727 to i32
-  %729 = uitofp i32 %728 to float
-  %730 = fmul fast float %729, %723
-  %731 = fsub fast float %726, %730
-  %732 = and i32 %728, 1
-  %733 = icmp eq i32 %732, 0
-  br i1 %733, label %734, label %736
-
-; <label>:734                                     ; preds = %725
-  %735 = fadd fast float %731, %20
-  br label %753
-
-; <label>:736                                     ; preds = %725
-  %737 = fsub fast float %24, %731
-  br label %753
-
-; <label>:738                                     ; preds = %720
-  %739 = fcmp fast ogt float %338, %24
-  br i1 %739, label %740, label %753
-
-; <label>:740                                     ; preds = %738
-  %741 = fsub fast float %338, %24
-  %742 = fdiv fast float %741, %723
-  %743 = fptoui float %742 to i32
-  %744 = uitofp i32 %743 to float
-  %745 = fmul fast float %744, %723
-  %746 = fsub fast float %741, %745
-  %747 = and i32 %743, 1
-  %748 = icmp eq i32 %747, 0
-  br i1 %748, label %749, label %751
-
-; <label>:749                                     ; preds = %740
-  %750 = fsub fast float %24, %746
-  br label %753
-
-; <label>:751                                     ; preds = %740
-  %752 = fadd fast float %746, %20
-  br label %753
-
-; <label>:753                                     ; preds = %751, %749, %738, %736, %734
-  %754 = phi float [ %735, %734 ], [ %737, %736 ], [ %750, %749 ], [ %752, %751 ], [ %338, %738 ]
-  %755 = fptoui float %754 to i32
-  %756 = uitofp i32 %755 to float
-  %757 = uitofp i32 %722 to float
-  %758 = fptoui float %45 to i32
-  %759 = fptoui float %182 to i32
-  %760 = fptoui float %756 to i32
-  %761 = fptoui float %757 to i32
-  %762 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %763 = extractvalue %dx.types.CBufRet.i32 %762, 0
-  %764 = extractvalue %dx.types.CBufRet.i32 %762, 1
-  %765 = extractvalue %dx.types.CBufRet.i32 %762, 2
-  %766 = extractvalue %dx.types.CBufRet.i32 %762, 3
-  %767 = mul i32 %763, %758
-  %768 = call i32 @dx.op.tertiary.i32(i32 48, i32 %759, i32 %764, i32 %767)  ; IMad(a,b,c)
-  %769 = call i32 @dx.op.tertiary.i32(i32 48, i32 %760, i32 %765, i32 %768)  ; IMad(a,b,c)
-  %770 = call i32 @dx.op.tertiary.i32(i32 48, i32 %761, i32 %766, i32 %769)  ; IMad(a,b,c)
-  %771 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %770, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %772 = extractvalue %dx.types.ResRet.i16 %771, 0
-  %773 = uitofp i16 %772 to float
-  br label %774
-
-; <label>:774                                     ; preds = %753, %687, %658, %641, %631
-  %775 = phi float [ %655, %641 ], [ 0.000000e+00, %631 ], [ %686, %658 ], [ %773, %753 ], [ 0.000000e+00, %687 ]
-  br i1 %340, label %776, label %801
-
-; <label>:776                                     ; preds = %774
-  %777 = fcmp fast oge float %337, 0.000000e+00
-  %778 = fptoui float %337 to i32
-  %779 = icmp ult i32 %778, %13
-  %780 = and i1 %777, %779
-  %781 = fcmp fast oge float %338, 0.000000e+00
-  %782 = and i1 %781, %780
-  %783 = fptoui float %338 to i32
-  %784 = icmp ult i32 %783, %15
-  %785 = and i1 %784, %782
-  br i1 %785, label %786, label %919
-
-; <label>:786                                     ; preds = %776
-  %787 = fptoui float %45 to i32
-  %788 = fptoui float %182 to i32
-  %789 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %790 = extractvalue %dx.types.CBufRet.i32 %789, 0
-  %791 = extractvalue %dx.types.CBufRet.i32 %789, 1
-  %792 = extractvalue %dx.types.CBufRet.i32 %789, 2
-  %793 = extractvalue %dx.types.CBufRet.i32 %789, 3
-  %794 = mul i32 %790, %787
-  %795 = call i32 @dx.op.tertiary.i32(i32 48, i32 %788, i32 %791, i32 %794)  ; IMad(a,b,c)
-  %796 = call i32 @dx.op.tertiary.i32(i32 48, i32 %783, i32 %792, i32 %795)  ; IMad(a,b,c)
-  %797 = call i32 @dx.op.tertiary.i32(i32 48, i32 %778, i32 %793, i32 %796)  ; IMad(a,b,c)
-  %798 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %797, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %799 = extractvalue %dx.types.ResRet.i16 %798, 0
-  %800 = uitofp i16 %799 to float
-  br label %919
-
-; <label>:801                                     ; preds = %774
-  %802 = icmp eq i32 %339, 1
-  br i1 %802, label %803, label %832
-
-; <label>:803                                     ; preds = %801
-  %804 = add i32 %13, -1
-  %805 = uitofp i32 %804 to float
-  %806 = call float @dx.op.binary.f32(i32 35, float %337, float 0.000000e+00)  ; FMax(a,b)
-  %807 = call float @dx.op.binary.f32(i32 36, float %806, float %805)  ; FMin(a,b)
-  %808 = fptoui float %807 to i32
-  %809 = add i32 %15, -1
-  %810 = uitofp i32 %809 to float
-  %811 = call float @dx.op.binary.f32(i32 35, float %338, float 0.000000e+00)  ; FMax(a,b)
-  %812 = call float @dx.op.binary.f32(i32 36, float %811, float %810)  ; FMin(a,b)
-  %813 = fptoui float %812 to i32
-  %814 = uitofp i32 %813 to float
-  %815 = uitofp i32 %808 to float
-  %816 = fptoui float %45 to i32
-  %817 = fptoui float %182 to i32
-  %818 = fptoui float %814 to i32
-  %819 = fptoui float %815 to i32
-  %820 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %821 = extractvalue %dx.types.CBufRet.i32 %820, 0
-  %822 = extractvalue %dx.types.CBufRet.i32 %820, 1
-  %823 = extractvalue %dx.types.CBufRet.i32 %820, 2
-  %824 = extractvalue %dx.types.CBufRet.i32 %820, 3
-  %825 = mul i32 %821, %816
-  %826 = call i32 @dx.op.tertiary.i32(i32 48, i32 %817, i32 %822, i32 %825)  ; IMad(a,b,c)
-  %827 = call i32 @dx.op.tertiary.i32(i32 48, i32 %818, i32 %823, i32 %826)  ; IMad(a,b,c)
-  %828 = call i32 @dx.op.tertiary.i32(i32 48, i32 %819, i32 %824, i32 %827)  ; IMad(a,b,c)
-  %829 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %828, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %830 = extractvalue %dx.types.ResRet.i16 %829, 0
-  %831 = uitofp i16 %830 to float
-  br label %919
-
-; <label>:832                                     ; preds = %801
-  %833 = icmp eq i32 %339, 2
-  br i1 %833, label %834, label %919
-
-; <label>:834                                     ; preds = %832
-  %835 = fsub fast float %22, %20
-  %836 = fcmp fast olt float %337, %20
-  br i1 %836, label %837, label %850
-
-; <label>:837                                     ; preds = %834
-  %838 = fsub fast float %20, %337
-  %839 = fdiv fast float %838, %835
-  %840 = fptoui float %839 to i32
-  %841 = uitofp i32 %840 to float
-  %842 = fmul fast float %841, %835
-  %843 = fsub fast float %838, %842
-  %844 = and i32 %840, 1
-  %845 = icmp eq i32 %844, 0
-  br i1 %845, label %846, label %848
-
-; <label>:846                                     ; preds = %837
-  %847 = fadd fast float %843, %20
-  br label %865
-
-; <label>:848                                     ; preds = %837
-  %849 = fsub fast float %22, %843
-  br label %865
-
-; <label>:850                                     ; preds = %834
-  %851 = fcmp fast ogt float %337, %22
-  br i1 %851, label %852, label %865
-
-; <label>:852                                     ; preds = %850
-  %853 = fsub fast float %337, %22
-  %854 = fdiv fast float %853, %835
-  %855 = fptoui float %854 to i32
-  %856 = uitofp i32 %855 to float
-  %857 = fmul fast float %856, %835
-  %858 = fsub fast float %853, %857
-  %859 = and i32 %855, 1
-  %860 = icmp eq i32 %859, 0
-  br i1 %860, label %861, label %863
-
-; <label>:861                                     ; preds = %852
-  %862 = fsub fast float %22, %858
-  br label %865
-
-; <label>:863                                     ; preds = %852
-  %864 = fadd fast float %858, %20
-  br label %865
-
-; <label>:865                                     ; preds = %863, %861, %850, %848, %846
-  %866 = phi float [ %847, %846 ], [ %849, %848 ], [ %862, %861 ], [ %864, %863 ], [ %337, %850 ]
-  %867 = fptoui float %866 to i32
-  %868 = fsub fast float %24, %20
-  %869 = fcmp fast olt float %338, %20
-  br i1 %869, label %870, label %883
-
-; <label>:870                                     ; preds = %865
-  %871 = fsub fast float %20, %338
-  %872 = fdiv fast float %871, %868
-  %873 = fptoui float %872 to i32
-  %874 = uitofp i32 %873 to float
-  %875 = fmul fast float %874, %868
-  %876 = fsub fast float %871, %875
-  %877 = and i32 %873, 1
-  %878 = icmp eq i32 %877, 0
-  br i1 %878, label %879, label %881
-
-; <label>:879                                     ; preds = %870
-  %880 = fadd fast float %876, %20
-  br label %898
-
-; <label>:881                                     ; preds = %870
-  %882 = fsub fast float %24, %876
-  br label %898
-
-; <label>:883                                     ; preds = %865
-  %884 = fcmp fast ogt float %338, %24
-  br i1 %884, label %885, label %898
-
-; <label>:885                                     ; preds = %883
-  %886 = fsub fast float %338, %24
-  %887 = fdiv fast float %886, %868
-  %888 = fptoui float %887 to i32
-  %889 = uitofp i32 %888 to float
-  %890 = fmul fast float %889, %868
-  %891 = fsub fast float %886, %890
-  %892 = and i32 %888, 1
-  %893 = icmp eq i32 %892, 0
-  br i1 %893, label %894, label %896
-
-; <label>:894                                     ; preds = %885
-  %895 = fsub fast float %24, %891
-  br label %898
-
-; <label>:896                                     ; preds = %885
-  %897 = fadd fast float %891, %20
-  br label %898
-
-; <label>:898                                     ; preds = %896, %894, %883, %881, %879
-  %899 = phi float [ %880, %879 ], [ %882, %881 ], [ %895, %894 ], [ %897, %896 ], [ %338, %883 ]
-  %900 = fptoui float %899 to i32
-  %901 = uitofp i32 %900 to float
-  %902 = uitofp i32 %867 to float
-  %903 = fptoui float %45 to i32
-  %904 = fptoui float %182 to i32
-  %905 = fptoui float %901 to i32
-  %906 = fptoui float %902 to i32
-  %907 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %908 = extractvalue %dx.types.CBufRet.i32 %907, 0
-  %909 = extractvalue %dx.types.CBufRet.i32 %907, 1
-  %910 = extractvalue %dx.types.CBufRet.i32 %907, 2
-  %911 = extractvalue %dx.types.CBufRet.i32 %907, 3
-  %912 = mul i32 %908, %903
-  %913 = call i32 @dx.op.tertiary.i32(i32 48, i32 %904, i32 %909, i32 %912)  ; IMad(a,b,c)
-  %914 = call i32 @dx.op.tertiary.i32(i32 48, i32 %905, i32 %910, i32 %913)  ; IMad(a,b,c)
-  %915 = call i32 @dx.op.tertiary.i32(i32 48, i32 %906, i32 %911, i32 %914)  ; IMad(a,b,c)
-  %916 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %915, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %917 = extractvalue %dx.types.ResRet.i16 %916, 0
-  %918 = uitofp i16 %917 to float
-  br label %919
-
-; <label>:919                                     ; preds = %898, %832, %803, %786, %776
-  %920 = phi float [ %800, %786 ], [ 0.000000e+00, %776 ], [ %831, %803 ], [ %918, %898 ], [ 0.000000e+00, %832 ]
-  %921 = call float @dx.op.unary.f32(i32 22, float %180)  ; Frc(value)
-  %922 = fsub fast float %630, %485
-  %923 = fmul fast float %921, %922
-  %924 = fadd fast float %923, %485
-  %925 = fsub fast float %920, %775
-  %926 = fmul fast float %921, %925
-  %927 = fadd fast float %926, %775
-  %928 = call float @dx.op.unary.f32(i32 22, float %181)  ; Frc(value)
-  %929 = fsub fast float %927, %924
-  %930 = fmul fast float %929, %928
-  %931 = fadd fast float %930, %924
-  %932 = fptoui float %931 to i16
-  call void @dx.op.rawBufferStore.i16(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i16 %932, i16 undef, i16 undef, i16 undef, i8 1, i32 2)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3323
-
-; <label>:933                                     ; preds = %332
-  %934 = icmp eq i32 %91, 2
-  br i1 %934, label %935, label %3323
-
-; <label>:935                                     ; preds = %933
-  %936 = call float @dx.op.unary.f32(i32 27, float %180)  ; Round_ni(value)
-  %937 = fadd fast float %936, -1.000000e+00
-  %938 = call float @dx.op.unary.f32(i32 27, float %181)  ; Round_ni(value)
-  %939 = fadd fast float %938, -1.000000e+00
-  %940 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %941 = icmp eq i32 %940, 0
-  br i1 %941, label %942, label %967
-
-; <label>:942                                     ; preds = %935
-  %943 = fcmp fast oge float %937, 0.000000e+00
-  %944 = fptoui float %937 to i32
-  %945 = icmp ult i32 %944, %13
-  %946 = and i1 %943, %945
-  %947 = fcmp fast oge float %939, 0.000000e+00
-  %948 = and i1 %947, %946
-  %949 = fptoui float %939 to i32
-  %950 = icmp ult i32 %949, %15
-  %951 = and i1 %950, %948
-  br i1 %951, label %952, label %1085
-
-; <label>:952                                     ; preds = %942
-  %953 = fptoui float %45 to i32
-  %954 = fptoui float %182 to i32
-  %955 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %956 = extractvalue %dx.types.CBufRet.i32 %955, 0
-  %957 = extractvalue %dx.types.CBufRet.i32 %955, 1
-  %958 = extractvalue %dx.types.CBufRet.i32 %955, 2
-  %959 = extractvalue %dx.types.CBufRet.i32 %955, 3
-  %960 = mul i32 %956, %953
-  %961 = call i32 @dx.op.tertiary.i32(i32 48, i32 %954, i32 %957, i32 %960)  ; IMad(a,b,c)
-  %962 = call i32 @dx.op.tertiary.i32(i32 48, i32 %949, i32 %958, i32 %961)  ; IMad(a,b,c)
-  %963 = call i32 @dx.op.tertiary.i32(i32 48, i32 %944, i32 %959, i32 %962)  ; IMad(a,b,c)
-  %964 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %963, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %965 = extractvalue %dx.types.ResRet.i16 %964, 0
-  %966 = uitofp i16 %965 to float
-  br label %1085
-
-; <label>:967                                     ; preds = %935
-  %968 = icmp eq i32 %940, 1
-  br i1 %968, label %969, label %998
-
-; <label>:969                                     ; preds = %967
-  %970 = add i32 %13, -1
-  %971 = uitofp i32 %970 to float
-  %972 = call float @dx.op.binary.f32(i32 35, float %937, float 0.000000e+00)  ; FMax(a,b)
-  %973 = call float @dx.op.binary.f32(i32 36, float %972, float %971)  ; FMin(a,b)
-  %974 = fptoui float %973 to i32
-  %975 = add i32 %15, -1
-  %976 = uitofp i32 %975 to float
-  %977 = call float @dx.op.binary.f32(i32 35, float %939, float 0.000000e+00)  ; FMax(a,b)
-  %978 = call float @dx.op.binary.f32(i32 36, float %977, float %976)  ; FMin(a,b)
-  %979 = fptoui float %978 to i32
-  %980 = uitofp i32 %979 to float
-  %981 = uitofp i32 %974 to float
-  %982 = fptoui float %45 to i32
-  %983 = fptoui float %182 to i32
-  %984 = fptoui float %980 to i32
-  %985 = fptoui float %981 to i32
-  %986 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %987 = extractvalue %dx.types.CBufRet.i32 %986, 0
-  %988 = extractvalue %dx.types.CBufRet.i32 %986, 1
-  %989 = extractvalue %dx.types.CBufRet.i32 %986, 2
-  %990 = extractvalue %dx.types.CBufRet.i32 %986, 3
-  %991 = mul i32 %987, %982
-  %992 = call i32 @dx.op.tertiary.i32(i32 48, i32 %983, i32 %988, i32 %991)  ; IMad(a,b,c)
-  %993 = call i32 @dx.op.tertiary.i32(i32 48, i32 %984, i32 %989, i32 %992)  ; IMad(a,b,c)
-  %994 = call i32 @dx.op.tertiary.i32(i32 48, i32 %985, i32 %990, i32 %993)  ; IMad(a,b,c)
-  %995 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %994, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %996 = extractvalue %dx.types.ResRet.i16 %995, 0
-  %997 = uitofp i16 %996 to float
-  br label %1085
-
-; <label>:998                                     ; preds = %967
-  %999 = icmp eq i32 %940, 2
-  br i1 %999, label %1000, label %1085
-
-; <label>:1000                                    ; preds = %998
-  %1001 = fsub fast float %22, %20
-  %1002 = fcmp fast olt float %937, %20
-  br i1 %1002, label %1003, label %1016
-
-; <label>:1003                                    ; preds = %1000
-  %1004 = fsub fast float %20, %937
-  %1005 = fdiv fast float %1004, %1001
-  %1006 = fptoui float %1005 to i32
-  %1007 = uitofp i32 %1006 to float
-  %1008 = fmul fast float %1007, %1001
-  %1009 = fsub fast float %1004, %1008
-  %1010 = and i32 %1006, 1
-  %1011 = icmp eq i32 %1010, 0
-  br i1 %1011, label %1012, label %1014
-
-; <label>:1012                                    ; preds = %1003
-  %1013 = fadd fast float %1009, %20
-  br label %1031
-
-; <label>:1014                                    ; preds = %1003
-  %1015 = fsub fast float %22, %1009
-  br label %1031
-
-; <label>:1016                                    ; preds = %1000
-  %1017 = fcmp fast ogt float %937, %22
-  br i1 %1017, label %1018, label %1031
-
-; <label>:1018                                    ; preds = %1016
-  %1019 = fsub fast float %937, %22
-  %1020 = fdiv fast float %1019, %1001
-  %1021 = fptoui float %1020 to i32
-  %1022 = uitofp i32 %1021 to float
-  %1023 = fmul fast float %1022, %1001
-  %1024 = fsub fast float %1019, %1023
-  %1025 = and i32 %1021, 1
-  %1026 = icmp eq i32 %1025, 0
-  br i1 %1026, label %1027, label %1029
-
-; <label>:1027                                    ; preds = %1018
-  %1028 = fsub fast float %22, %1024
-  br label %1031
-
-; <label>:1029                                    ; preds = %1018
-  %1030 = fadd fast float %1024, %20
-  br label %1031
-
-; <label>:1031                                    ; preds = %1029, %1027, %1016, %1014, %1012
-  %1032 = phi float [ %1013, %1012 ], [ %1015, %1014 ], [ %1028, %1027 ], [ %1030, %1029 ], [ %937, %1016 ]
-  %1033 = fptoui float %1032 to i32
-  %1034 = fsub fast float %24, %20
-  %1035 = fcmp fast olt float %939, %20
-  br i1 %1035, label %1036, label %1049
-
-; <label>:1036                                    ; preds = %1031
-  %1037 = fsub fast float %20, %939
-  %1038 = fdiv fast float %1037, %1034
-  %1039 = fptoui float %1038 to i32
-  %1040 = uitofp i32 %1039 to float
-  %1041 = fmul fast float %1040, %1034
-  %1042 = fsub fast float %1037, %1041
-  %1043 = and i32 %1039, 1
-  %1044 = icmp eq i32 %1043, 0
-  br i1 %1044, label %1045, label %1047
-
-; <label>:1045                                    ; preds = %1036
-  %1046 = fadd fast float %1042, %20
-  br label %1064
-
-; <label>:1047                                    ; preds = %1036
-  %1048 = fsub fast float %24, %1042
-  br label %1064
-
-; <label>:1049                                    ; preds = %1031
-  %1050 = fcmp fast ogt float %939, %24
-  br i1 %1050, label %1051, label %1064
-
-; <label>:1051                                    ; preds = %1049
-  %1052 = fsub fast float %939, %24
-  %1053 = fdiv fast float %1052, %1034
-  %1054 = fptoui float %1053 to i32
-  %1055 = uitofp i32 %1054 to float
-  %1056 = fmul fast float %1055, %1034
-  %1057 = fsub fast float %1052, %1056
-  %1058 = and i32 %1054, 1
-  %1059 = icmp eq i32 %1058, 0
-  br i1 %1059, label %1060, label %1062
-
-; <label>:1060                                    ; preds = %1051
-  %1061 = fsub fast float %24, %1057
-  br label %1064
-
-; <label>:1062                                    ; preds = %1051
-  %1063 = fadd fast float %1057, %20
-  br label %1064
-
-; <label>:1064                                    ; preds = %1062, %1060, %1049, %1047, %1045
-  %1065 = phi float [ %1046, %1045 ], [ %1048, %1047 ], [ %1061, %1060 ], [ %1063, %1062 ], [ %939, %1049 ]
-  %1066 = fptoui float %1065 to i32
-  %1067 = uitofp i32 %1066 to float
-  %1068 = uitofp i32 %1033 to float
-  %1069 = fptoui float %45 to i32
-  %1070 = fptoui float %182 to i32
-  %1071 = fptoui float %1067 to i32
-  %1072 = fptoui float %1068 to i32
-  %1073 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1074 = extractvalue %dx.types.CBufRet.i32 %1073, 0
-  %1075 = extractvalue %dx.types.CBufRet.i32 %1073, 1
-  %1076 = extractvalue %dx.types.CBufRet.i32 %1073, 2
-  %1077 = extractvalue %dx.types.CBufRet.i32 %1073, 3
-  %1078 = mul i32 %1074, %1069
-  %1079 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1070, i32 %1075, i32 %1078)  ; IMad(a,b,c)
-  %1080 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1071, i32 %1076, i32 %1079)  ; IMad(a,b,c)
-  %1081 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1072, i32 %1077, i32 %1080)  ; IMad(a,b,c)
-  %1082 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1081, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1083 = extractvalue %dx.types.ResRet.i16 %1082, 0
-  %1084 = uitofp i16 %1083 to float
-  br label %1085
-
-; <label>:1085                                    ; preds = %1064, %998, %969, %952, %942
-  %1086 = phi float [ %966, %952 ], [ 0.000000e+00, %942 ], [ %997, %969 ], [ %1084, %1064 ], [ 0.000000e+00, %998 ]
-  br i1 %941, label %1087, label %1112
-
-; <label>:1087                                    ; preds = %1085
-  %1088 = fcmp fast oge float %936, 0.000000e+00
-  %1089 = fptoui float %936 to i32
-  %1090 = icmp ult i32 %1089, %13
-  %1091 = and i1 %1088, %1090
-  %1092 = fcmp fast oge float %939, 0.000000e+00
-  %1093 = and i1 %1092, %1091
-  %1094 = fptoui float %939 to i32
-  %1095 = icmp ult i32 %1094, %15
-  %1096 = and i1 %1095, %1093
-  br i1 %1096, label %1097, label %1230
-
-; <label>:1097                                    ; preds = %1087
-  %1098 = fptoui float %45 to i32
-  %1099 = fptoui float %182 to i32
-  %1100 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1101 = extractvalue %dx.types.CBufRet.i32 %1100, 0
-  %1102 = extractvalue %dx.types.CBufRet.i32 %1100, 1
-  %1103 = extractvalue %dx.types.CBufRet.i32 %1100, 2
-  %1104 = extractvalue %dx.types.CBufRet.i32 %1100, 3
-  %1105 = mul i32 %1101, %1098
-  %1106 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1099, i32 %1102, i32 %1105)  ; IMad(a,b,c)
-  %1107 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1094, i32 %1103, i32 %1106)  ; IMad(a,b,c)
-  %1108 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1089, i32 %1104, i32 %1107)  ; IMad(a,b,c)
-  %1109 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1108, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1110 = extractvalue %dx.types.ResRet.i16 %1109, 0
-  %1111 = uitofp i16 %1110 to float
-  br label %1230
-
-; <label>:1112                                    ; preds = %1085
-  %1113 = icmp eq i32 %940, 1
-  br i1 %1113, label %1114, label %1143
-
-; <label>:1114                                    ; preds = %1112
-  %1115 = add i32 %13, -1
-  %1116 = uitofp i32 %1115 to float
-  %1117 = call float @dx.op.binary.f32(i32 35, float %936, float 0.000000e+00)  ; FMax(a,b)
-  %1118 = call float @dx.op.binary.f32(i32 36, float %1117, float %1116)  ; FMin(a,b)
-  %1119 = fptoui float %1118 to i32
-  %1120 = add i32 %15, -1
-  %1121 = uitofp i32 %1120 to float
-  %1122 = call float @dx.op.binary.f32(i32 35, float %939, float 0.000000e+00)  ; FMax(a,b)
-  %1123 = call float @dx.op.binary.f32(i32 36, float %1122, float %1121)  ; FMin(a,b)
-  %1124 = fptoui float %1123 to i32
-  %1125 = uitofp i32 %1124 to float
-  %1126 = uitofp i32 %1119 to float
-  %1127 = fptoui float %45 to i32
-  %1128 = fptoui float %182 to i32
-  %1129 = fptoui float %1125 to i32
-  %1130 = fptoui float %1126 to i32
-  %1131 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1132 = extractvalue %dx.types.CBufRet.i32 %1131, 0
-  %1133 = extractvalue %dx.types.CBufRet.i32 %1131, 1
-  %1134 = extractvalue %dx.types.CBufRet.i32 %1131, 2
-  %1135 = extractvalue %dx.types.CBufRet.i32 %1131, 3
-  %1136 = mul i32 %1132, %1127
-  %1137 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1128, i32 %1133, i32 %1136)  ; IMad(a,b,c)
-  %1138 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1129, i32 %1134, i32 %1137)  ; IMad(a,b,c)
-  %1139 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1130, i32 %1135, i32 %1138)  ; IMad(a,b,c)
-  %1140 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1139, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1141 = extractvalue %dx.types.ResRet.i16 %1140, 0
-  %1142 = uitofp i16 %1141 to float
-  br label %1230
-
-; <label>:1143                                    ; preds = %1112
-  %1144 = icmp eq i32 %940, 2
-  br i1 %1144, label %1145, label %1230
-
-; <label>:1145                                    ; preds = %1143
-  %1146 = fsub fast float %22, %20
-  %1147 = fcmp fast olt float %936, %20
-  br i1 %1147, label %1148, label %1161
-
-; <label>:1148                                    ; preds = %1145
-  %1149 = fsub fast float %20, %936
-  %1150 = fdiv fast float %1149, %1146
-  %1151 = fptoui float %1150 to i32
-  %1152 = uitofp i32 %1151 to float
-  %1153 = fmul fast float %1152, %1146
-  %1154 = fsub fast float %1149, %1153
-  %1155 = and i32 %1151, 1
-  %1156 = icmp eq i32 %1155, 0
-  br i1 %1156, label %1157, label %1159
-
-; <label>:1157                                    ; preds = %1148
-  %1158 = fadd fast float %1154, %20
-  br label %1176
-
-; <label>:1159                                    ; preds = %1148
-  %1160 = fsub fast float %22, %1154
-  br label %1176
-
-; <label>:1161                                    ; preds = %1145
-  %1162 = fcmp fast ogt float %936, %22
-  br i1 %1162, label %1163, label %1176
-
-; <label>:1163                                    ; preds = %1161
-  %1164 = fsub fast float %936, %22
-  %1165 = fdiv fast float %1164, %1146
-  %1166 = fptoui float %1165 to i32
-  %1167 = uitofp i32 %1166 to float
-  %1168 = fmul fast float %1167, %1146
-  %1169 = fsub fast float %1164, %1168
-  %1170 = and i32 %1166, 1
-  %1171 = icmp eq i32 %1170, 0
-  br i1 %1171, label %1172, label %1174
-
-; <label>:1172                                    ; preds = %1163
-  %1173 = fsub fast float %22, %1169
-  br label %1176
-
-; <label>:1174                                    ; preds = %1163
-  %1175 = fadd fast float %1169, %20
-  br label %1176
-
-; <label>:1176                                    ; preds = %1174, %1172, %1161, %1159, %1157
-  %1177 = phi float [ %1158, %1157 ], [ %1160, %1159 ], [ %1173, %1172 ], [ %1175, %1174 ], [ %936, %1161 ]
-  %1178 = fptoui float %1177 to i32
-  %1179 = fsub fast float %24, %20
-  %1180 = fcmp fast olt float %939, %20
-  br i1 %1180, label %1181, label %1194
-
-; <label>:1181                                    ; preds = %1176
-  %1182 = fsub fast float %20, %939
-  %1183 = fdiv fast float %1182, %1179
-  %1184 = fptoui float %1183 to i32
-  %1185 = uitofp i32 %1184 to float
-  %1186 = fmul fast float %1185, %1179
-  %1187 = fsub fast float %1182, %1186
-  %1188 = and i32 %1184, 1
-  %1189 = icmp eq i32 %1188, 0
-  br i1 %1189, label %1190, label %1192
-
-; <label>:1190                                    ; preds = %1181
-  %1191 = fadd fast float %1187, %20
-  br label %1209
-
-; <label>:1192                                    ; preds = %1181
-  %1193 = fsub fast float %24, %1187
-  br label %1209
-
-; <label>:1194                                    ; preds = %1176
-  %1195 = fcmp fast ogt float %939, %24
-  br i1 %1195, label %1196, label %1209
-
-; <label>:1196                                    ; preds = %1194
-  %1197 = fsub fast float %939, %24
-  %1198 = fdiv fast float %1197, %1179
-  %1199 = fptoui float %1198 to i32
-  %1200 = uitofp i32 %1199 to float
-  %1201 = fmul fast float %1200, %1179
-  %1202 = fsub fast float %1197, %1201
-  %1203 = and i32 %1199, 1
-  %1204 = icmp eq i32 %1203, 0
-  br i1 %1204, label %1205, label %1207
-
-; <label>:1205                                    ; preds = %1196
-  %1206 = fsub fast float %24, %1202
-  br label %1209
-
-; <label>:1207                                    ; preds = %1196
-  %1208 = fadd fast float %1202, %20
-  br label %1209
-
-; <label>:1209                                    ; preds = %1207, %1205, %1194, %1192, %1190
-  %1210 = phi float [ %1191, %1190 ], [ %1193, %1192 ], [ %1206, %1205 ], [ %1208, %1207 ], [ %939, %1194 ]
-  %1211 = fptoui float %1210 to i32
-  %1212 = uitofp i32 %1211 to float
-  %1213 = uitofp i32 %1178 to float
-  %1214 = fptoui float %45 to i32
-  %1215 = fptoui float %182 to i32
-  %1216 = fptoui float %1212 to i32
-  %1217 = fptoui float %1213 to i32
-  %1218 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1219 = extractvalue %dx.types.CBufRet.i32 %1218, 0
-  %1220 = extractvalue %dx.types.CBufRet.i32 %1218, 1
-  %1221 = extractvalue %dx.types.CBufRet.i32 %1218, 2
-  %1222 = extractvalue %dx.types.CBufRet.i32 %1218, 3
-  %1223 = mul i32 %1219, %1214
-  %1224 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1215, i32 %1220, i32 %1223)  ; IMad(a,b,c)
-  %1225 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1216, i32 %1221, i32 %1224)  ; IMad(a,b,c)
-  %1226 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1217, i32 %1222, i32 %1225)  ; IMad(a,b,c)
-  %1227 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1226, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1228 = extractvalue %dx.types.ResRet.i16 %1227, 0
-  %1229 = uitofp i16 %1228 to float
-  br label %1230
-
-; <label>:1230                                    ; preds = %1209, %1143, %1114, %1097, %1087
-  %1231 = phi float [ %1111, %1097 ], [ 0.000000e+00, %1087 ], [ %1142, %1114 ], [ %1229, %1209 ], [ 0.000000e+00, %1143 ]
-  %1232 = fadd fast float %936, 1.000000e+00
-  br i1 %941, label %1233, label %1258
-
-; <label>:1233                                    ; preds = %1230
-  %1234 = fcmp fast oge float %1232, 0.000000e+00
-  %1235 = fptoui float %1232 to i32
-  %1236 = icmp ult i32 %1235, %13
-  %1237 = and i1 %1234, %1236
-  %1238 = fcmp fast oge float %939, 0.000000e+00
-  %1239 = and i1 %1238, %1237
-  %1240 = fptoui float %939 to i32
-  %1241 = icmp ult i32 %1240, %15
-  %1242 = and i1 %1241, %1239
-  br i1 %1242, label %1243, label %1376
-
-; <label>:1243                                    ; preds = %1233
-  %1244 = fptoui float %45 to i32
-  %1245 = fptoui float %182 to i32
-  %1246 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1247 = extractvalue %dx.types.CBufRet.i32 %1246, 0
-  %1248 = extractvalue %dx.types.CBufRet.i32 %1246, 1
-  %1249 = extractvalue %dx.types.CBufRet.i32 %1246, 2
-  %1250 = extractvalue %dx.types.CBufRet.i32 %1246, 3
-  %1251 = mul i32 %1247, %1244
-  %1252 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1245, i32 %1248, i32 %1251)  ; IMad(a,b,c)
-  %1253 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1240, i32 %1249, i32 %1252)  ; IMad(a,b,c)
-  %1254 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1235, i32 %1250, i32 %1253)  ; IMad(a,b,c)
-  %1255 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1254, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1256 = extractvalue %dx.types.ResRet.i16 %1255, 0
-  %1257 = uitofp i16 %1256 to float
-  br label %1376
-
-; <label>:1258                                    ; preds = %1230
-  %1259 = icmp eq i32 %940, 1
-  br i1 %1259, label %1260, label %1289
-
-; <label>:1260                                    ; preds = %1258
-  %1261 = add i32 %13, -1
-  %1262 = uitofp i32 %1261 to float
-  %1263 = call float @dx.op.binary.f32(i32 35, float %1232, float 0.000000e+00)  ; FMax(a,b)
-  %1264 = call float @dx.op.binary.f32(i32 36, float %1263, float %1262)  ; FMin(a,b)
-  %1265 = fptoui float %1264 to i32
-  %1266 = add i32 %15, -1
-  %1267 = uitofp i32 %1266 to float
-  %1268 = call float @dx.op.binary.f32(i32 35, float %939, float 0.000000e+00)  ; FMax(a,b)
-  %1269 = call float @dx.op.binary.f32(i32 36, float %1268, float %1267)  ; FMin(a,b)
-  %1270 = fptoui float %1269 to i32
-  %1271 = uitofp i32 %1270 to float
-  %1272 = uitofp i32 %1265 to float
-  %1273 = fptoui float %45 to i32
-  %1274 = fptoui float %182 to i32
-  %1275 = fptoui float %1271 to i32
-  %1276 = fptoui float %1272 to i32
-  %1277 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1278 = extractvalue %dx.types.CBufRet.i32 %1277, 0
-  %1279 = extractvalue %dx.types.CBufRet.i32 %1277, 1
-  %1280 = extractvalue %dx.types.CBufRet.i32 %1277, 2
-  %1281 = extractvalue %dx.types.CBufRet.i32 %1277, 3
-  %1282 = mul i32 %1278, %1273
-  %1283 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1274, i32 %1279, i32 %1282)  ; IMad(a,b,c)
-  %1284 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1275, i32 %1280, i32 %1283)  ; IMad(a,b,c)
-  %1285 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1276, i32 %1281, i32 %1284)  ; IMad(a,b,c)
-  %1286 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1285, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1287 = extractvalue %dx.types.ResRet.i16 %1286, 0
-  %1288 = uitofp i16 %1287 to float
-  br label %1376
-
-; <label>:1289                                    ; preds = %1258
-  %1290 = icmp eq i32 %940, 2
-  br i1 %1290, label %1291, label %1376
-
-; <label>:1291                                    ; preds = %1289
-  %1292 = fsub fast float %22, %20
-  %1293 = fcmp fast olt float %1232, %20
-  br i1 %1293, label %1294, label %1307
-
-; <label>:1294                                    ; preds = %1291
-  %1295 = fsub fast float %20, %1232
-  %1296 = fdiv fast float %1295, %1292
-  %1297 = fptoui float %1296 to i32
-  %1298 = uitofp i32 %1297 to float
-  %1299 = fmul fast float %1298, %1292
-  %1300 = fsub fast float %1295, %1299
-  %1301 = and i32 %1297, 1
-  %1302 = icmp eq i32 %1301, 0
-  br i1 %1302, label %1303, label %1305
-
-; <label>:1303                                    ; preds = %1294
-  %1304 = fadd fast float %1300, %20
-  br label %1322
-
-; <label>:1305                                    ; preds = %1294
-  %1306 = fsub fast float %22, %1300
-  br label %1322
-
-; <label>:1307                                    ; preds = %1291
-  %1308 = fcmp fast ogt float %1232, %22
-  br i1 %1308, label %1309, label %1322
-
-; <label>:1309                                    ; preds = %1307
-  %1310 = fsub fast float %1232, %22
-  %1311 = fdiv fast float %1310, %1292
-  %1312 = fptoui float %1311 to i32
-  %1313 = uitofp i32 %1312 to float
-  %1314 = fmul fast float %1313, %1292
-  %1315 = fsub fast float %1310, %1314
-  %1316 = and i32 %1312, 1
-  %1317 = icmp eq i32 %1316, 0
-  br i1 %1317, label %1318, label %1320
-
-; <label>:1318                                    ; preds = %1309
-  %1319 = fsub fast float %22, %1315
-  br label %1322
-
-; <label>:1320                                    ; preds = %1309
-  %1321 = fadd fast float %1315, %20
-  br label %1322
-
-; <label>:1322                                    ; preds = %1320, %1318, %1307, %1305, %1303
-  %1323 = phi float [ %1304, %1303 ], [ %1306, %1305 ], [ %1319, %1318 ], [ %1321, %1320 ], [ %1232, %1307 ]
-  %1324 = fptoui float %1323 to i32
-  %1325 = fsub fast float %24, %20
-  %1326 = fcmp fast olt float %939, %20
-  br i1 %1326, label %1327, label %1340
-
-; <label>:1327                                    ; preds = %1322
-  %1328 = fsub fast float %20, %939
-  %1329 = fdiv fast float %1328, %1325
-  %1330 = fptoui float %1329 to i32
-  %1331 = uitofp i32 %1330 to float
-  %1332 = fmul fast float %1331, %1325
-  %1333 = fsub fast float %1328, %1332
-  %1334 = and i32 %1330, 1
-  %1335 = icmp eq i32 %1334, 0
-  br i1 %1335, label %1336, label %1338
-
-; <label>:1336                                    ; preds = %1327
-  %1337 = fadd fast float %1333, %20
-  br label %1355
-
-; <label>:1338                                    ; preds = %1327
-  %1339 = fsub fast float %24, %1333
-  br label %1355
-
-; <label>:1340                                    ; preds = %1322
-  %1341 = fcmp fast ogt float %939, %24
-  br i1 %1341, label %1342, label %1355
-
-; <label>:1342                                    ; preds = %1340
-  %1343 = fsub fast float %939, %24
-  %1344 = fdiv fast float %1343, %1325
-  %1345 = fptoui float %1344 to i32
-  %1346 = uitofp i32 %1345 to float
-  %1347 = fmul fast float %1346, %1325
-  %1348 = fsub fast float %1343, %1347
-  %1349 = and i32 %1345, 1
-  %1350 = icmp eq i32 %1349, 0
-  br i1 %1350, label %1351, label %1353
-
-; <label>:1351                                    ; preds = %1342
-  %1352 = fsub fast float %24, %1348
-  br label %1355
-
-; <label>:1353                                    ; preds = %1342
-  %1354 = fadd fast float %1348, %20
-  br label %1355
-
-; <label>:1355                                    ; preds = %1353, %1351, %1340, %1338, %1336
-  %1356 = phi float [ %1337, %1336 ], [ %1339, %1338 ], [ %1352, %1351 ], [ %1354, %1353 ], [ %939, %1340 ]
-  %1357 = fptoui float %1356 to i32
-  %1358 = uitofp i32 %1357 to float
-  %1359 = uitofp i32 %1324 to float
-  %1360 = fptoui float %45 to i32
-  %1361 = fptoui float %182 to i32
-  %1362 = fptoui float %1358 to i32
-  %1363 = fptoui float %1359 to i32
-  %1364 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1365 = extractvalue %dx.types.CBufRet.i32 %1364, 0
-  %1366 = extractvalue %dx.types.CBufRet.i32 %1364, 1
-  %1367 = extractvalue %dx.types.CBufRet.i32 %1364, 2
-  %1368 = extractvalue %dx.types.CBufRet.i32 %1364, 3
-  %1369 = mul i32 %1365, %1360
-  %1370 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1361, i32 %1366, i32 %1369)  ; IMad(a,b,c)
-  %1371 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1362, i32 %1367, i32 %1370)  ; IMad(a,b,c)
-  %1372 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1363, i32 %1368, i32 %1371)  ; IMad(a,b,c)
-  %1373 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1372, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1374 = extractvalue %dx.types.ResRet.i16 %1373, 0
-  %1375 = uitofp i16 %1374 to float
-  br label %1376
-
-; <label>:1376                                    ; preds = %1355, %1289, %1260, %1243, %1233
-  %1377 = phi float [ %1257, %1243 ], [ 0.000000e+00, %1233 ], [ %1288, %1260 ], [ %1375, %1355 ], [ 0.000000e+00, %1289 ]
-  %1378 = fadd fast float %936, 2.000000e+00
-  br i1 %941, label %1379, label %1404
-
-; <label>:1379                                    ; preds = %1376
-  %1380 = fcmp fast oge float %1378, 0.000000e+00
-  %1381 = fptoui float %1378 to i32
-  %1382 = icmp ult i32 %1381, %13
-  %1383 = and i1 %1380, %1382
-  %1384 = fcmp fast oge float %939, 0.000000e+00
-  %1385 = and i1 %1384, %1383
-  %1386 = fptoui float %939 to i32
-  %1387 = icmp ult i32 %1386, %15
-  %1388 = and i1 %1387, %1385
-  br i1 %1388, label %1389, label %1522
-
-; <label>:1389                                    ; preds = %1379
-  %1390 = fptoui float %45 to i32
-  %1391 = fptoui float %182 to i32
-  %1392 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1393 = extractvalue %dx.types.CBufRet.i32 %1392, 0
-  %1394 = extractvalue %dx.types.CBufRet.i32 %1392, 1
-  %1395 = extractvalue %dx.types.CBufRet.i32 %1392, 2
-  %1396 = extractvalue %dx.types.CBufRet.i32 %1392, 3
-  %1397 = mul i32 %1393, %1390
-  %1398 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1391, i32 %1394, i32 %1397)  ; IMad(a,b,c)
-  %1399 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1386, i32 %1395, i32 %1398)  ; IMad(a,b,c)
-  %1400 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1381, i32 %1396, i32 %1399)  ; IMad(a,b,c)
-  %1401 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1400, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1402 = extractvalue %dx.types.ResRet.i16 %1401, 0
-  %1403 = uitofp i16 %1402 to float
-  br label %1522
-
-; <label>:1404                                    ; preds = %1376
-  %1405 = icmp eq i32 %940, 1
-  br i1 %1405, label %1406, label %1435
-
-; <label>:1406                                    ; preds = %1404
-  %1407 = add i32 %13, -1
-  %1408 = uitofp i32 %1407 to float
-  %1409 = call float @dx.op.binary.f32(i32 35, float %1378, float 0.000000e+00)  ; FMax(a,b)
-  %1410 = call float @dx.op.binary.f32(i32 36, float %1409, float %1408)  ; FMin(a,b)
-  %1411 = fptoui float %1410 to i32
-  %1412 = add i32 %15, -1
-  %1413 = uitofp i32 %1412 to float
-  %1414 = call float @dx.op.binary.f32(i32 35, float %939, float 0.000000e+00)  ; FMax(a,b)
-  %1415 = call float @dx.op.binary.f32(i32 36, float %1414, float %1413)  ; FMin(a,b)
-  %1416 = fptoui float %1415 to i32
-  %1417 = uitofp i32 %1416 to float
-  %1418 = uitofp i32 %1411 to float
-  %1419 = fptoui float %45 to i32
-  %1420 = fptoui float %182 to i32
-  %1421 = fptoui float %1417 to i32
-  %1422 = fptoui float %1418 to i32
-  %1423 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1424 = extractvalue %dx.types.CBufRet.i32 %1423, 0
-  %1425 = extractvalue %dx.types.CBufRet.i32 %1423, 1
-  %1426 = extractvalue %dx.types.CBufRet.i32 %1423, 2
-  %1427 = extractvalue %dx.types.CBufRet.i32 %1423, 3
-  %1428 = mul i32 %1424, %1419
-  %1429 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1420, i32 %1425, i32 %1428)  ; IMad(a,b,c)
-  %1430 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1421, i32 %1426, i32 %1429)  ; IMad(a,b,c)
-  %1431 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1422, i32 %1427, i32 %1430)  ; IMad(a,b,c)
-  %1432 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1431, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1433 = extractvalue %dx.types.ResRet.i16 %1432, 0
-  %1434 = uitofp i16 %1433 to float
-  br label %1522
-
-; <label>:1435                                    ; preds = %1404
-  %1436 = icmp eq i32 %940, 2
-  br i1 %1436, label %1437, label %1522
-
-; <label>:1437                                    ; preds = %1435
-  %1438 = fsub fast float %22, %20
-  %1439 = fcmp fast olt float %1378, %20
-  br i1 %1439, label %1440, label %1453
-
-; <label>:1440                                    ; preds = %1437
-  %1441 = fsub fast float %20, %1378
-  %1442 = fdiv fast float %1441, %1438
-  %1443 = fptoui float %1442 to i32
-  %1444 = uitofp i32 %1443 to float
-  %1445 = fmul fast float %1444, %1438
-  %1446 = fsub fast float %1441, %1445
-  %1447 = and i32 %1443, 1
-  %1448 = icmp eq i32 %1447, 0
-  br i1 %1448, label %1449, label %1451
-
-; <label>:1449                                    ; preds = %1440
-  %1450 = fadd fast float %1446, %20
-  br label %1468
-
-; <label>:1451                                    ; preds = %1440
-  %1452 = fsub fast float %22, %1446
-  br label %1468
-
-; <label>:1453                                    ; preds = %1437
-  %1454 = fcmp fast ogt float %1378, %22
-  br i1 %1454, label %1455, label %1468
-
-; <label>:1455                                    ; preds = %1453
-  %1456 = fsub fast float %1378, %22
-  %1457 = fdiv fast float %1456, %1438
-  %1458 = fptoui float %1457 to i32
-  %1459 = uitofp i32 %1458 to float
-  %1460 = fmul fast float %1459, %1438
-  %1461 = fsub fast float %1456, %1460
-  %1462 = and i32 %1458, 1
-  %1463 = icmp eq i32 %1462, 0
-  br i1 %1463, label %1464, label %1466
-
-; <label>:1464                                    ; preds = %1455
-  %1465 = fsub fast float %22, %1461
-  br label %1468
-
-; <label>:1466                                    ; preds = %1455
-  %1467 = fadd fast float %1461, %20
-  br label %1468
-
-; <label>:1468                                    ; preds = %1466, %1464, %1453, %1451, %1449
-  %1469 = phi float [ %1450, %1449 ], [ %1452, %1451 ], [ %1465, %1464 ], [ %1467, %1466 ], [ %1378, %1453 ]
-  %1470 = fptoui float %1469 to i32
-  %1471 = fsub fast float %24, %20
-  %1472 = fcmp fast olt float %939, %20
-  br i1 %1472, label %1473, label %1486
-
-; <label>:1473                                    ; preds = %1468
-  %1474 = fsub fast float %20, %939
-  %1475 = fdiv fast float %1474, %1471
-  %1476 = fptoui float %1475 to i32
-  %1477 = uitofp i32 %1476 to float
-  %1478 = fmul fast float %1477, %1471
-  %1479 = fsub fast float %1474, %1478
-  %1480 = and i32 %1476, 1
-  %1481 = icmp eq i32 %1480, 0
-  br i1 %1481, label %1482, label %1484
-
-; <label>:1482                                    ; preds = %1473
-  %1483 = fadd fast float %1479, %20
-  br label %1501
-
-; <label>:1484                                    ; preds = %1473
-  %1485 = fsub fast float %24, %1479
-  br label %1501
-
-; <label>:1486                                    ; preds = %1468
-  %1487 = fcmp fast ogt float %939, %24
-  br i1 %1487, label %1488, label %1501
-
-; <label>:1488                                    ; preds = %1486
-  %1489 = fsub fast float %939, %24
-  %1490 = fdiv fast float %1489, %1471
-  %1491 = fptoui float %1490 to i32
-  %1492 = uitofp i32 %1491 to float
-  %1493 = fmul fast float %1492, %1471
-  %1494 = fsub fast float %1489, %1493
-  %1495 = and i32 %1491, 1
-  %1496 = icmp eq i32 %1495, 0
-  br i1 %1496, label %1497, label %1499
-
-; <label>:1497                                    ; preds = %1488
-  %1498 = fsub fast float %24, %1494
-  br label %1501
-
-; <label>:1499                                    ; preds = %1488
-  %1500 = fadd fast float %1494, %20
-  br label %1501
-
-; <label>:1501                                    ; preds = %1499, %1497, %1486, %1484, %1482
-  %1502 = phi float [ %1483, %1482 ], [ %1485, %1484 ], [ %1498, %1497 ], [ %1500, %1499 ], [ %939, %1486 ]
-  %1503 = fptoui float %1502 to i32
-  %1504 = uitofp i32 %1503 to float
-  %1505 = uitofp i32 %1470 to float
-  %1506 = fptoui float %45 to i32
-  %1507 = fptoui float %182 to i32
-  %1508 = fptoui float %1504 to i32
-  %1509 = fptoui float %1505 to i32
-  %1510 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1511 = extractvalue %dx.types.CBufRet.i32 %1510, 0
-  %1512 = extractvalue %dx.types.CBufRet.i32 %1510, 1
-  %1513 = extractvalue %dx.types.CBufRet.i32 %1510, 2
-  %1514 = extractvalue %dx.types.CBufRet.i32 %1510, 3
-  %1515 = mul i32 %1511, %1506
-  %1516 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1507, i32 %1512, i32 %1515)  ; IMad(a,b,c)
-  %1517 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1508, i32 %1513, i32 %1516)  ; IMad(a,b,c)
-  %1518 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1509, i32 %1514, i32 %1517)  ; IMad(a,b,c)
-  %1519 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1518, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1520 = extractvalue %dx.types.ResRet.i16 %1519, 0
-  %1521 = uitofp i16 %1520 to float
-  br label %1522
-
-; <label>:1522                                    ; preds = %1501, %1435, %1406, %1389, %1379
-  %1523 = phi float [ %1403, %1389 ], [ 0.000000e+00, %1379 ], [ %1434, %1406 ], [ %1521, %1501 ], [ 0.000000e+00, %1435 ]
-  br i1 %941, label %1524, label %1549
-
-; <label>:1524                                    ; preds = %1522
-  %1525 = fcmp fast oge float %937, 0.000000e+00
-  %1526 = fptoui float %937 to i32
-  %1527 = icmp ult i32 %1526, %13
-  %1528 = and i1 %1525, %1527
-  %1529 = fcmp fast oge float %938, 0.000000e+00
-  %1530 = and i1 %1529, %1528
-  %1531 = fptoui float %938 to i32
-  %1532 = icmp ult i32 %1531, %15
-  %1533 = and i1 %1532, %1530
-  br i1 %1533, label %1534, label %1667
-
-; <label>:1534                                    ; preds = %1524
-  %1535 = fptoui float %45 to i32
-  %1536 = fptoui float %182 to i32
-  %1537 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1538 = extractvalue %dx.types.CBufRet.i32 %1537, 0
-  %1539 = extractvalue %dx.types.CBufRet.i32 %1537, 1
-  %1540 = extractvalue %dx.types.CBufRet.i32 %1537, 2
-  %1541 = extractvalue %dx.types.CBufRet.i32 %1537, 3
-  %1542 = mul i32 %1538, %1535
-  %1543 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1536, i32 %1539, i32 %1542)  ; IMad(a,b,c)
-  %1544 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1531, i32 %1540, i32 %1543)  ; IMad(a,b,c)
-  %1545 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1526, i32 %1541, i32 %1544)  ; IMad(a,b,c)
-  %1546 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1545, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1547 = extractvalue %dx.types.ResRet.i16 %1546, 0
-  %1548 = uitofp i16 %1547 to float
-  br label %1667
-
-; <label>:1549                                    ; preds = %1522
-  %1550 = icmp eq i32 %940, 1
-  br i1 %1550, label %1551, label %1580
-
-; <label>:1551                                    ; preds = %1549
-  %1552 = add i32 %13, -1
-  %1553 = uitofp i32 %1552 to float
-  %1554 = call float @dx.op.binary.f32(i32 35, float %937, float 0.000000e+00)  ; FMax(a,b)
-  %1555 = call float @dx.op.binary.f32(i32 36, float %1554, float %1553)  ; FMin(a,b)
-  %1556 = fptoui float %1555 to i32
-  %1557 = add i32 %15, -1
-  %1558 = uitofp i32 %1557 to float
-  %1559 = call float @dx.op.binary.f32(i32 35, float %938, float 0.000000e+00)  ; FMax(a,b)
-  %1560 = call float @dx.op.binary.f32(i32 36, float %1559, float %1558)  ; FMin(a,b)
-  %1561 = fptoui float %1560 to i32
-  %1562 = uitofp i32 %1561 to float
-  %1563 = uitofp i32 %1556 to float
-  %1564 = fptoui float %45 to i32
-  %1565 = fptoui float %182 to i32
-  %1566 = fptoui float %1562 to i32
-  %1567 = fptoui float %1563 to i32
-  %1568 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1569 = extractvalue %dx.types.CBufRet.i32 %1568, 0
-  %1570 = extractvalue %dx.types.CBufRet.i32 %1568, 1
-  %1571 = extractvalue %dx.types.CBufRet.i32 %1568, 2
-  %1572 = extractvalue %dx.types.CBufRet.i32 %1568, 3
-  %1573 = mul i32 %1569, %1564
-  %1574 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1565, i32 %1570, i32 %1573)  ; IMad(a,b,c)
-  %1575 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1566, i32 %1571, i32 %1574)  ; IMad(a,b,c)
-  %1576 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1567, i32 %1572, i32 %1575)  ; IMad(a,b,c)
-  %1577 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1576, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1578 = extractvalue %dx.types.ResRet.i16 %1577, 0
-  %1579 = uitofp i16 %1578 to float
-  br label %1667
-
-; <label>:1580                                    ; preds = %1549
-  %1581 = icmp eq i32 %940, 2
-  br i1 %1581, label %1582, label %1667
-
-; <label>:1582                                    ; preds = %1580
-  %1583 = fsub fast float %22, %20
-  %1584 = fcmp fast olt float %937, %20
-  br i1 %1584, label %1585, label %1598
-
-; <label>:1585                                    ; preds = %1582
-  %1586 = fsub fast float %20, %937
-  %1587 = fdiv fast float %1586, %1583
-  %1588 = fptoui float %1587 to i32
-  %1589 = uitofp i32 %1588 to float
-  %1590 = fmul fast float %1589, %1583
-  %1591 = fsub fast float %1586, %1590
-  %1592 = and i32 %1588, 1
-  %1593 = icmp eq i32 %1592, 0
-  br i1 %1593, label %1594, label %1596
-
-; <label>:1594                                    ; preds = %1585
-  %1595 = fadd fast float %1591, %20
-  br label %1613
-
-; <label>:1596                                    ; preds = %1585
-  %1597 = fsub fast float %22, %1591
-  br label %1613
-
-; <label>:1598                                    ; preds = %1582
-  %1599 = fcmp fast ogt float %937, %22
-  br i1 %1599, label %1600, label %1613
-
-; <label>:1600                                    ; preds = %1598
-  %1601 = fsub fast float %937, %22
-  %1602 = fdiv fast float %1601, %1583
-  %1603 = fptoui float %1602 to i32
-  %1604 = uitofp i32 %1603 to float
-  %1605 = fmul fast float %1604, %1583
-  %1606 = fsub fast float %1601, %1605
-  %1607 = and i32 %1603, 1
-  %1608 = icmp eq i32 %1607, 0
-  br i1 %1608, label %1609, label %1611
-
-; <label>:1609                                    ; preds = %1600
-  %1610 = fsub fast float %22, %1606
-  br label %1613
-
-; <label>:1611                                    ; preds = %1600
-  %1612 = fadd fast float %1606, %20
-  br label %1613
-
-; <label>:1613                                    ; preds = %1611, %1609, %1598, %1596, %1594
-  %1614 = phi float [ %1595, %1594 ], [ %1597, %1596 ], [ %1610, %1609 ], [ %1612, %1611 ], [ %937, %1598 ]
-  %1615 = fptoui float %1614 to i32
-  %1616 = fsub fast float %24, %20
-  %1617 = fcmp fast olt float %938, %20
-  br i1 %1617, label %1618, label %1631
-
-; <label>:1618                                    ; preds = %1613
-  %1619 = fsub fast float %20, %938
-  %1620 = fdiv fast float %1619, %1616
-  %1621 = fptoui float %1620 to i32
-  %1622 = uitofp i32 %1621 to float
-  %1623 = fmul fast float %1622, %1616
-  %1624 = fsub fast float %1619, %1623
-  %1625 = and i32 %1621, 1
-  %1626 = icmp eq i32 %1625, 0
-  br i1 %1626, label %1627, label %1629
-
-; <label>:1627                                    ; preds = %1618
-  %1628 = fadd fast float %1624, %20
-  br label %1646
-
-; <label>:1629                                    ; preds = %1618
-  %1630 = fsub fast float %24, %1624
-  br label %1646
-
-; <label>:1631                                    ; preds = %1613
-  %1632 = fcmp fast ogt float %938, %24
-  br i1 %1632, label %1633, label %1646
-
-; <label>:1633                                    ; preds = %1631
-  %1634 = fsub fast float %938, %24
-  %1635 = fdiv fast float %1634, %1616
-  %1636 = fptoui float %1635 to i32
-  %1637 = uitofp i32 %1636 to float
-  %1638 = fmul fast float %1637, %1616
-  %1639 = fsub fast float %1634, %1638
-  %1640 = and i32 %1636, 1
-  %1641 = icmp eq i32 %1640, 0
-  br i1 %1641, label %1642, label %1644
-
-; <label>:1642                                    ; preds = %1633
-  %1643 = fsub fast float %24, %1639
-  br label %1646
-
-; <label>:1644                                    ; preds = %1633
-  %1645 = fadd fast float %1639, %20
-  br label %1646
-
-; <label>:1646                                    ; preds = %1644, %1642, %1631, %1629, %1627
-  %1647 = phi float [ %1628, %1627 ], [ %1630, %1629 ], [ %1643, %1642 ], [ %1645, %1644 ], [ %938, %1631 ]
-  %1648 = fptoui float %1647 to i32
-  %1649 = uitofp i32 %1648 to float
-  %1650 = uitofp i32 %1615 to float
-  %1651 = fptoui float %45 to i32
-  %1652 = fptoui float %182 to i32
-  %1653 = fptoui float %1649 to i32
-  %1654 = fptoui float %1650 to i32
-  %1655 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1656 = extractvalue %dx.types.CBufRet.i32 %1655, 0
-  %1657 = extractvalue %dx.types.CBufRet.i32 %1655, 1
-  %1658 = extractvalue %dx.types.CBufRet.i32 %1655, 2
-  %1659 = extractvalue %dx.types.CBufRet.i32 %1655, 3
-  %1660 = mul i32 %1656, %1651
-  %1661 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1652, i32 %1657, i32 %1660)  ; IMad(a,b,c)
-  %1662 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1653, i32 %1658, i32 %1661)  ; IMad(a,b,c)
-  %1663 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1654, i32 %1659, i32 %1662)  ; IMad(a,b,c)
-  %1664 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1663, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1665 = extractvalue %dx.types.ResRet.i16 %1664, 0
-  %1666 = uitofp i16 %1665 to float
-  br label %1667
-
-; <label>:1667                                    ; preds = %1646, %1580, %1551, %1534, %1524
-  %1668 = phi float [ %1548, %1534 ], [ 0.000000e+00, %1524 ], [ %1579, %1551 ], [ %1666, %1646 ], [ 0.000000e+00, %1580 ]
-  br i1 %941, label %1669, label %1694
-
-; <label>:1669                                    ; preds = %1667
-  %1670 = fcmp fast oge float %936, 0.000000e+00
-  %1671 = fptoui float %936 to i32
-  %1672 = icmp ult i32 %1671, %13
-  %1673 = and i1 %1670, %1672
-  %1674 = fcmp fast oge float %938, 0.000000e+00
-  %1675 = and i1 %1674, %1673
-  %1676 = fptoui float %938 to i32
-  %1677 = icmp ult i32 %1676, %15
-  %1678 = and i1 %1677, %1675
-  br i1 %1678, label %1679, label %1812
-
-; <label>:1679                                    ; preds = %1669
-  %1680 = fptoui float %45 to i32
-  %1681 = fptoui float %182 to i32
-  %1682 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1683 = extractvalue %dx.types.CBufRet.i32 %1682, 0
-  %1684 = extractvalue %dx.types.CBufRet.i32 %1682, 1
-  %1685 = extractvalue %dx.types.CBufRet.i32 %1682, 2
-  %1686 = extractvalue %dx.types.CBufRet.i32 %1682, 3
-  %1687 = mul i32 %1683, %1680
-  %1688 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1681, i32 %1684, i32 %1687)  ; IMad(a,b,c)
-  %1689 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1676, i32 %1685, i32 %1688)  ; IMad(a,b,c)
-  %1690 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1671, i32 %1686, i32 %1689)  ; IMad(a,b,c)
-  %1691 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1690, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1692 = extractvalue %dx.types.ResRet.i16 %1691, 0
-  %1693 = uitofp i16 %1692 to float
-  br label %1812
-
-; <label>:1694                                    ; preds = %1667
-  %1695 = icmp eq i32 %940, 1
-  br i1 %1695, label %1696, label %1725
-
-; <label>:1696                                    ; preds = %1694
-  %1697 = add i32 %13, -1
-  %1698 = uitofp i32 %1697 to float
-  %1699 = call float @dx.op.binary.f32(i32 35, float %936, float 0.000000e+00)  ; FMax(a,b)
-  %1700 = call float @dx.op.binary.f32(i32 36, float %1699, float %1698)  ; FMin(a,b)
-  %1701 = fptoui float %1700 to i32
-  %1702 = add i32 %15, -1
-  %1703 = uitofp i32 %1702 to float
-  %1704 = call float @dx.op.binary.f32(i32 35, float %938, float 0.000000e+00)  ; FMax(a,b)
-  %1705 = call float @dx.op.binary.f32(i32 36, float %1704, float %1703)  ; FMin(a,b)
-  %1706 = fptoui float %1705 to i32
-  %1707 = uitofp i32 %1706 to float
-  %1708 = uitofp i32 %1701 to float
-  %1709 = fptoui float %45 to i32
-  %1710 = fptoui float %182 to i32
-  %1711 = fptoui float %1707 to i32
-  %1712 = fptoui float %1708 to i32
-  %1713 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1714 = extractvalue %dx.types.CBufRet.i32 %1713, 0
-  %1715 = extractvalue %dx.types.CBufRet.i32 %1713, 1
-  %1716 = extractvalue %dx.types.CBufRet.i32 %1713, 2
-  %1717 = extractvalue %dx.types.CBufRet.i32 %1713, 3
-  %1718 = mul i32 %1714, %1709
-  %1719 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1710, i32 %1715, i32 %1718)  ; IMad(a,b,c)
-  %1720 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1711, i32 %1716, i32 %1719)  ; IMad(a,b,c)
-  %1721 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1712, i32 %1717, i32 %1720)  ; IMad(a,b,c)
-  %1722 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1721, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1723 = extractvalue %dx.types.ResRet.i16 %1722, 0
-  %1724 = uitofp i16 %1723 to float
-  br label %1812
-
-; <label>:1725                                    ; preds = %1694
-  %1726 = icmp eq i32 %940, 2
-  br i1 %1726, label %1727, label %1812
-
-; <label>:1727                                    ; preds = %1725
-  %1728 = fsub fast float %22, %20
-  %1729 = fcmp fast olt float %936, %20
-  br i1 %1729, label %1730, label %1743
-
-; <label>:1730                                    ; preds = %1727
-  %1731 = fsub fast float %20, %936
-  %1732 = fdiv fast float %1731, %1728
-  %1733 = fptoui float %1732 to i32
-  %1734 = uitofp i32 %1733 to float
-  %1735 = fmul fast float %1734, %1728
-  %1736 = fsub fast float %1731, %1735
-  %1737 = and i32 %1733, 1
-  %1738 = icmp eq i32 %1737, 0
-  br i1 %1738, label %1739, label %1741
-
-; <label>:1739                                    ; preds = %1730
-  %1740 = fadd fast float %1736, %20
-  br label %1758
-
-; <label>:1741                                    ; preds = %1730
-  %1742 = fsub fast float %22, %1736
-  br label %1758
-
-; <label>:1743                                    ; preds = %1727
-  %1744 = fcmp fast ogt float %936, %22
-  br i1 %1744, label %1745, label %1758
-
-; <label>:1745                                    ; preds = %1743
-  %1746 = fsub fast float %936, %22
-  %1747 = fdiv fast float %1746, %1728
-  %1748 = fptoui float %1747 to i32
-  %1749 = uitofp i32 %1748 to float
-  %1750 = fmul fast float %1749, %1728
-  %1751 = fsub fast float %1746, %1750
-  %1752 = and i32 %1748, 1
-  %1753 = icmp eq i32 %1752, 0
-  br i1 %1753, label %1754, label %1756
-
-; <label>:1754                                    ; preds = %1745
-  %1755 = fsub fast float %22, %1751
-  br label %1758
-
-; <label>:1756                                    ; preds = %1745
-  %1757 = fadd fast float %1751, %20
-  br label %1758
-
-; <label>:1758                                    ; preds = %1756, %1754, %1743, %1741, %1739
-  %1759 = phi float [ %1740, %1739 ], [ %1742, %1741 ], [ %1755, %1754 ], [ %1757, %1756 ], [ %936, %1743 ]
-  %1760 = fptoui float %1759 to i32
-  %1761 = fsub fast float %24, %20
-  %1762 = fcmp fast olt float %938, %20
-  br i1 %1762, label %1763, label %1776
-
-; <label>:1763                                    ; preds = %1758
-  %1764 = fsub fast float %20, %938
-  %1765 = fdiv fast float %1764, %1761
-  %1766 = fptoui float %1765 to i32
-  %1767 = uitofp i32 %1766 to float
-  %1768 = fmul fast float %1767, %1761
-  %1769 = fsub fast float %1764, %1768
-  %1770 = and i32 %1766, 1
-  %1771 = icmp eq i32 %1770, 0
-  br i1 %1771, label %1772, label %1774
-
-; <label>:1772                                    ; preds = %1763
-  %1773 = fadd fast float %1769, %20
-  br label %1791
-
-; <label>:1774                                    ; preds = %1763
-  %1775 = fsub fast float %24, %1769
-  br label %1791
-
-; <label>:1776                                    ; preds = %1758
-  %1777 = fcmp fast ogt float %938, %24
-  br i1 %1777, label %1778, label %1791
-
-; <label>:1778                                    ; preds = %1776
-  %1779 = fsub fast float %938, %24
-  %1780 = fdiv fast float %1779, %1761
-  %1781 = fptoui float %1780 to i32
-  %1782 = uitofp i32 %1781 to float
-  %1783 = fmul fast float %1782, %1761
-  %1784 = fsub fast float %1779, %1783
-  %1785 = and i32 %1781, 1
-  %1786 = icmp eq i32 %1785, 0
-  br i1 %1786, label %1787, label %1789
-
-; <label>:1787                                    ; preds = %1778
-  %1788 = fsub fast float %24, %1784
-  br label %1791
-
-; <label>:1789                                    ; preds = %1778
-  %1790 = fadd fast float %1784, %20
-  br label %1791
-
-; <label>:1791                                    ; preds = %1789, %1787, %1776, %1774, %1772
-  %1792 = phi float [ %1773, %1772 ], [ %1775, %1774 ], [ %1788, %1787 ], [ %1790, %1789 ], [ %938, %1776 ]
-  %1793 = fptoui float %1792 to i32
-  %1794 = uitofp i32 %1793 to float
-  %1795 = uitofp i32 %1760 to float
-  %1796 = fptoui float %45 to i32
-  %1797 = fptoui float %182 to i32
-  %1798 = fptoui float %1794 to i32
-  %1799 = fptoui float %1795 to i32
-  %1800 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1801 = extractvalue %dx.types.CBufRet.i32 %1800, 0
-  %1802 = extractvalue %dx.types.CBufRet.i32 %1800, 1
-  %1803 = extractvalue %dx.types.CBufRet.i32 %1800, 2
-  %1804 = extractvalue %dx.types.CBufRet.i32 %1800, 3
-  %1805 = mul i32 %1801, %1796
-  %1806 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1797, i32 %1802, i32 %1805)  ; IMad(a,b,c)
-  %1807 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1798, i32 %1803, i32 %1806)  ; IMad(a,b,c)
-  %1808 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1799, i32 %1804, i32 %1807)  ; IMad(a,b,c)
-  %1809 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1808, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1810 = extractvalue %dx.types.ResRet.i16 %1809, 0
-  %1811 = uitofp i16 %1810 to float
-  br label %1812
-
-; <label>:1812                                    ; preds = %1791, %1725, %1696, %1679, %1669
-  %1813 = phi float [ %1693, %1679 ], [ 0.000000e+00, %1669 ], [ %1724, %1696 ], [ %1811, %1791 ], [ 0.000000e+00, %1725 ]
-  br i1 %941, label %1814, label %1839
-
-; <label>:1814                                    ; preds = %1812
-  %1815 = fcmp fast oge float %1232, 0.000000e+00
-  %1816 = fptoui float %1232 to i32
-  %1817 = icmp ult i32 %1816, %13
-  %1818 = and i1 %1815, %1817
-  %1819 = fcmp fast oge float %938, 0.000000e+00
-  %1820 = and i1 %1819, %1818
-  %1821 = fptoui float %938 to i32
-  %1822 = icmp ult i32 %1821, %15
-  %1823 = and i1 %1822, %1820
-  br i1 %1823, label %1824, label %1957
-
-; <label>:1824                                    ; preds = %1814
-  %1825 = fptoui float %45 to i32
-  %1826 = fptoui float %182 to i32
-  %1827 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1828 = extractvalue %dx.types.CBufRet.i32 %1827, 0
-  %1829 = extractvalue %dx.types.CBufRet.i32 %1827, 1
-  %1830 = extractvalue %dx.types.CBufRet.i32 %1827, 2
-  %1831 = extractvalue %dx.types.CBufRet.i32 %1827, 3
-  %1832 = mul i32 %1828, %1825
-  %1833 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1826, i32 %1829, i32 %1832)  ; IMad(a,b,c)
-  %1834 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1821, i32 %1830, i32 %1833)  ; IMad(a,b,c)
-  %1835 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1816, i32 %1831, i32 %1834)  ; IMad(a,b,c)
-  %1836 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1835, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1837 = extractvalue %dx.types.ResRet.i16 %1836, 0
-  %1838 = uitofp i16 %1837 to float
-  br label %1957
-
-; <label>:1839                                    ; preds = %1812
-  %1840 = icmp eq i32 %940, 1
-  br i1 %1840, label %1841, label %1870
-
-; <label>:1841                                    ; preds = %1839
-  %1842 = add i32 %13, -1
-  %1843 = uitofp i32 %1842 to float
-  %1844 = call float @dx.op.binary.f32(i32 35, float %1232, float 0.000000e+00)  ; FMax(a,b)
-  %1845 = call float @dx.op.binary.f32(i32 36, float %1844, float %1843)  ; FMin(a,b)
-  %1846 = fptoui float %1845 to i32
-  %1847 = add i32 %15, -1
-  %1848 = uitofp i32 %1847 to float
-  %1849 = call float @dx.op.binary.f32(i32 35, float %938, float 0.000000e+00)  ; FMax(a,b)
-  %1850 = call float @dx.op.binary.f32(i32 36, float %1849, float %1848)  ; FMin(a,b)
-  %1851 = fptoui float %1850 to i32
-  %1852 = uitofp i32 %1851 to float
-  %1853 = uitofp i32 %1846 to float
-  %1854 = fptoui float %45 to i32
-  %1855 = fptoui float %182 to i32
-  %1856 = fptoui float %1852 to i32
-  %1857 = fptoui float %1853 to i32
-  %1858 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1859 = extractvalue %dx.types.CBufRet.i32 %1858, 0
-  %1860 = extractvalue %dx.types.CBufRet.i32 %1858, 1
-  %1861 = extractvalue %dx.types.CBufRet.i32 %1858, 2
-  %1862 = extractvalue %dx.types.CBufRet.i32 %1858, 3
-  %1863 = mul i32 %1859, %1854
-  %1864 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1855, i32 %1860, i32 %1863)  ; IMad(a,b,c)
-  %1865 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1856, i32 %1861, i32 %1864)  ; IMad(a,b,c)
-  %1866 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1857, i32 %1862, i32 %1865)  ; IMad(a,b,c)
-  %1867 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1866, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1868 = extractvalue %dx.types.ResRet.i16 %1867, 0
-  %1869 = uitofp i16 %1868 to float
-  br label %1957
-
-; <label>:1870                                    ; preds = %1839
-  %1871 = icmp eq i32 %940, 2
-  br i1 %1871, label %1872, label %1957
-
-; <label>:1872                                    ; preds = %1870
-  %1873 = fsub fast float %22, %20
-  %1874 = fcmp fast olt float %1232, %20
-  br i1 %1874, label %1875, label %1888
-
-; <label>:1875                                    ; preds = %1872
-  %1876 = fsub fast float %20, %1232
-  %1877 = fdiv fast float %1876, %1873
-  %1878 = fptoui float %1877 to i32
-  %1879 = uitofp i32 %1878 to float
-  %1880 = fmul fast float %1879, %1873
-  %1881 = fsub fast float %1876, %1880
-  %1882 = and i32 %1878, 1
-  %1883 = icmp eq i32 %1882, 0
-  br i1 %1883, label %1884, label %1886
-
-; <label>:1884                                    ; preds = %1875
-  %1885 = fadd fast float %1881, %20
-  br label %1903
-
-; <label>:1886                                    ; preds = %1875
-  %1887 = fsub fast float %22, %1881
-  br label %1903
-
-; <label>:1888                                    ; preds = %1872
-  %1889 = fcmp fast ogt float %1232, %22
-  br i1 %1889, label %1890, label %1903
-
-; <label>:1890                                    ; preds = %1888
-  %1891 = fsub fast float %1232, %22
-  %1892 = fdiv fast float %1891, %1873
-  %1893 = fptoui float %1892 to i32
-  %1894 = uitofp i32 %1893 to float
-  %1895 = fmul fast float %1894, %1873
-  %1896 = fsub fast float %1891, %1895
-  %1897 = and i32 %1893, 1
-  %1898 = icmp eq i32 %1897, 0
-  br i1 %1898, label %1899, label %1901
-
-; <label>:1899                                    ; preds = %1890
-  %1900 = fsub fast float %22, %1896
-  br label %1903
-
-; <label>:1901                                    ; preds = %1890
-  %1902 = fadd fast float %1896, %20
-  br label %1903
-
-; <label>:1903                                    ; preds = %1901, %1899, %1888, %1886, %1884
-  %1904 = phi float [ %1885, %1884 ], [ %1887, %1886 ], [ %1900, %1899 ], [ %1902, %1901 ], [ %1232, %1888 ]
-  %1905 = fptoui float %1904 to i32
-  %1906 = fsub fast float %24, %20
-  %1907 = fcmp fast olt float %938, %20
-  br i1 %1907, label %1908, label %1921
-
-; <label>:1908                                    ; preds = %1903
-  %1909 = fsub fast float %20, %938
-  %1910 = fdiv fast float %1909, %1906
-  %1911 = fptoui float %1910 to i32
-  %1912 = uitofp i32 %1911 to float
-  %1913 = fmul fast float %1912, %1906
-  %1914 = fsub fast float %1909, %1913
-  %1915 = and i32 %1911, 1
-  %1916 = icmp eq i32 %1915, 0
-  br i1 %1916, label %1917, label %1919
-
-; <label>:1917                                    ; preds = %1908
-  %1918 = fadd fast float %1914, %20
-  br label %1936
-
-; <label>:1919                                    ; preds = %1908
-  %1920 = fsub fast float %24, %1914
-  br label %1936
-
-; <label>:1921                                    ; preds = %1903
-  %1922 = fcmp fast ogt float %938, %24
-  br i1 %1922, label %1923, label %1936
-
-; <label>:1923                                    ; preds = %1921
-  %1924 = fsub fast float %938, %24
-  %1925 = fdiv fast float %1924, %1906
-  %1926 = fptoui float %1925 to i32
-  %1927 = uitofp i32 %1926 to float
-  %1928 = fmul fast float %1927, %1906
-  %1929 = fsub fast float %1924, %1928
-  %1930 = and i32 %1926, 1
-  %1931 = icmp eq i32 %1930, 0
-  br i1 %1931, label %1932, label %1934
-
-; <label>:1932                                    ; preds = %1923
-  %1933 = fsub fast float %24, %1929
-  br label %1936
-
-; <label>:1934                                    ; preds = %1923
-  %1935 = fadd fast float %1929, %20
-  br label %1936
-
-; <label>:1936                                    ; preds = %1934, %1932, %1921, %1919, %1917
-  %1937 = phi float [ %1918, %1917 ], [ %1920, %1919 ], [ %1933, %1932 ], [ %1935, %1934 ], [ %938, %1921 ]
-  %1938 = fptoui float %1937 to i32
-  %1939 = uitofp i32 %1938 to float
-  %1940 = uitofp i32 %1905 to float
-  %1941 = fptoui float %45 to i32
-  %1942 = fptoui float %182 to i32
-  %1943 = fptoui float %1939 to i32
-  %1944 = fptoui float %1940 to i32
-  %1945 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1946 = extractvalue %dx.types.CBufRet.i32 %1945, 0
-  %1947 = extractvalue %dx.types.CBufRet.i32 %1945, 1
-  %1948 = extractvalue %dx.types.CBufRet.i32 %1945, 2
-  %1949 = extractvalue %dx.types.CBufRet.i32 %1945, 3
-  %1950 = mul i32 %1946, %1941
-  %1951 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1942, i32 %1947, i32 %1950)  ; IMad(a,b,c)
-  %1952 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1943, i32 %1948, i32 %1951)  ; IMad(a,b,c)
-  %1953 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1944, i32 %1949, i32 %1952)  ; IMad(a,b,c)
-  %1954 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1953, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1955 = extractvalue %dx.types.ResRet.i16 %1954, 0
-  %1956 = uitofp i16 %1955 to float
-  br label %1957
-
-; <label>:1957                                    ; preds = %1936, %1870, %1841, %1824, %1814
-  %1958 = phi float [ %1838, %1824 ], [ 0.000000e+00, %1814 ], [ %1869, %1841 ], [ %1956, %1936 ], [ 0.000000e+00, %1870 ]
-  br i1 %941, label %1959, label %1984
-
-; <label>:1959                                    ; preds = %1957
-  %1960 = fcmp fast oge float %1378, 0.000000e+00
-  %1961 = fptoui float %1378 to i32
-  %1962 = icmp ult i32 %1961, %13
-  %1963 = and i1 %1960, %1962
-  %1964 = fcmp fast oge float %938, 0.000000e+00
-  %1965 = and i1 %1964, %1963
-  %1966 = fptoui float %938 to i32
-  %1967 = icmp ult i32 %1966, %15
-  %1968 = and i1 %1967, %1965
-  br i1 %1968, label %1969, label %2102
-
-; <label>:1969                                    ; preds = %1959
-  %1970 = fptoui float %45 to i32
-  %1971 = fptoui float %182 to i32
-  %1972 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1973 = extractvalue %dx.types.CBufRet.i32 %1972, 0
-  %1974 = extractvalue %dx.types.CBufRet.i32 %1972, 1
-  %1975 = extractvalue %dx.types.CBufRet.i32 %1972, 2
-  %1976 = extractvalue %dx.types.CBufRet.i32 %1972, 3
-  %1977 = mul i32 %1973, %1970
-  %1978 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1971, i32 %1974, i32 %1977)  ; IMad(a,b,c)
-  %1979 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1966, i32 %1975, i32 %1978)  ; IMad(a,b,c)
-  %1980 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1961, i32 %1976, i32 %1979)  ; IMad(a,b,c)
-  %1981 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %1980, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1982 = extractvalue %dx.types.ResRet.i16 %1981, 0
-  %1983 = uitofp i16 %1982 to float
-  br label %2102
-
-; <label>:1984                                    ; preds = %1957
-  %1985 = icmp eq i32 %940, 1
-  br i1 %1985, label %1986, label %2015
-
-; <label>:1986                                    ; preds = %1984
-  %1987 = add i32 %13, -1
-  %1988 = uitofp i32 %1987 to float
-  %1989 = call float @dx.op.binary.f32(i32 35, float %1378, float 0.000000e+00)  ; FMax(a,b)
-  %1990 = call float @dx.op.binary.f32(i32 36, float %1989, float %1988)  ; FMin(a,b)
-  %1991 = fptoui float %1990 to i32
-  %1992 = add i32 %15, -1
-  %1993 = uitofp i32 %1992 to float
-  %1994 = call float @dx.op.binary.f32(i32 35, float %938, float 0.000000e+00)  ; FMax(a,b)
-  %1995 = call float @dx.op.binary.f32(i32 36, float %1994, float %1993)  ; FMin(a,b)
-  %1996 = fptoui float %1995 to i32
-  %1997 = uitofp i32 %1996 to float
-  %1998 = uitofp i32 %1991 to float
-  %1999 = fptoui float %45 to i32
-  %2000 = fptoui float %182 to i32
-  %2001 = fptoui float %1997 to i32
-  %2002 = fptoui float %1998 to i32
-  %2003 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2004 = extractvalue %dx.types.CBufRet.i32 %2003, 0
-  %2005 = extractvalue %dx.types.CBufRet.i32 %2003, 1
-  %2006 = extractvalue %dx.types.CBufRet.i32 %2003, 2
-  %2007 = extractvalue %dx.types.CBufRet.i32 %2003, 3
-  %2008 = mul i32 %2004, %1999
-  %2009 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2000, i32 %2005, i32 %2008)  ; IMad(a,b,c)
-  %2010 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2001, i32 %2006, i32 %2009)  ; IMad(a,b,c)
-  %2011 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2002, i32 %2007, i32 %2010)  ; IMad(a,b,c)
-  %2012 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2011, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2013 = extractvalue %dx.types.ResRet.i16 %2012, 0
-  %2014 = uitofp i16 %2013 to float
-  br label %2102
-
-; <label>:2015                                    ; preds = %1984
-  %2016 = icmp eq i32 %940, 2
-  br i1 %2016, label %2017, label %2102
-
-; <label>:2017                                    ; preds = %2015
-  %2018 = fsub fast float %22, %20
-  %2019 = fcmp fast olt float %1378, %20
-  br i1 %2019, label %2020, label %2033
-
-; <label>:2020                                    ; preds = %2017
-  %2021 = fsub fast float %20, %1378
-  %2022 = fdiv fast float %2021, %2018
-  %2023 = fptoui float %2022 to i32
-  %2024 = uitofp i32 %2023 to float
-  %2025 = fmul fast float %2024, %2018
-  %2026 = fsub fast float %2021, %2025
-  %2027 = and i32 %2023, 1
-  %2028 = icmp eq i32 %2027, 0
-  br i1 %2028, label %2029, label %2031
-
-; <label>:2029                                    ; preds = %2020
-  %2030 = fadd fast float %2026, %20
-  br label %2048
-
-; <label>:2031                                    ; preds = %2020
-  %2032 = fsub fast float %22, %2026
-  br label %2048
-
-; <label>:2033                                    ; preds = %2017
-  %2034 = fcmp fast ogt float %1378, %22
-  br i1 %2034, label %2035, label %2048
-
-; <label>:2035                                    ; preds = %2033
-  %2036 = fsub fast float %1378, %22
-  %2037 = fdiv fast float %2036, %2018
-  %2038 = fptoui float %2037 to i32
-  %2039 = uitofp i32 %2038 to float
-  %2040 = fmul fast float %2039, %2018
-  %2041 = fsub fast float %2036, %2040
-  %2042 = and i32 %2038, 1
-  %2043 = icmp eq i32 %2042, 0
-  br i1 %2043, label %2044, label %2046
-
-; <label>:2044                                    ; preds = %2035
-  %2045 = fsub fast float %22, %2041
-  br label %2048
-
-; <label>:2046                                    ; preds = %2035
-  %2047 = fadd fast float %2041, %20
-  br label %2048
-
-; <label>:2048                                    ; preds = %2046, %2044, %2033, %2031, %2029
-  %2049 = phi float [ %2030, %2029 ], [ %2032, %2031 ], [ %2045, %2044 ], [ %2047, %2046 ], [ %1378, %2033 ]
-  %2050 = fptoui float %2049 to i32
-  %2051 = fsub fast float %24, %20
-  %2052 = fcmp fast olt float %938, %20
-  br i1 %2052, label %2053, label %2066
-
-; <label>:2053                                    ; preds = %2048
-  %2054 = fsub fast float %20, %938
-  %2055 = fdiv fast float %2054, %2051
-  %2056 = fptoui float %2055 to i32
-  %2057 = uitofp i32 %2056 to float
-  %2058 = fmul fast float %2057, %2051
-  %2059 = fsub fast float %2054, %2058
-  %2060 = and i32 %2056, 1
-  %2061 = icmp eq i32 %2060, 0
-  br i1 %2061, label %2062, label %2064
-
-; <label>:2062                                    ; preds = %2053
-  %2063 = fadd fast float %2059, %20
-  br label %2081
-
-; <label>:2064                                    ; preds = %2053
-  %2065 = fsub fast float %24, %2059
-  br label %2081
-
-; <label>:2066                                    ; preds = %2048
-  %2067 = fcmp fast ogt float %938, %24
-  br i1 %2067, label %2068, label %2081
-
-; <label>:2068                                    ; preds = %2066
-  %2069 = fsub fast float %938, %24
-  %2070 = fdiv fast float %2069, %2051
-  %2071 = fptoui float %2070 to i32
-  %2072 = uitofp i32 %2071 to float
-  %2073 = fmul fast float %2072, %2051
-  %2074 = fsub fast float %2069, %2073
-  %2075 = and i32 %2071, 1
-  %2076 = icmp eq i32 %2075, 0
-  br i1 %2076, label %2077, label %2079
-
-; <label>:2077                                    ; preds = %2068
-  %2078 = fsub fast float %24, %2074
-  br label %2081
-
-; <label>:2079                                    ; preds = %2068
-  %2080 = fadd fast float %2074, %20
-  br label %2081
-
-; <label>:2081                                    ; preds = %2079, %2077, %2066, %2064, %2062
-  %2082 = phi float [ %2063, %2062 ], [ %2065, %2064 ], [ %2078, %2077 ], [ %2080, %2079 ], [ %938, %2066 ]
-  %2083 = fptoui float %2082 to i32
-  %2084 = uitofp i32 %2083 to float
-  %2085 = uitofp i32 %2050 to float
-  %2086 = fptoui float %45 to i32
-  %2087 = fptoui float %182 to i32
-  %2088 = fptoui float %2084 to i32
-  %2089 = fptoui float %2085 to i32
-  %2090 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2091 = extractvalue %dx.types.CBufRet.i32 %2090, 0
-  %2092 = extractvalue %dx.types.CBufRet.i32 %2090, 1
-  %2093 = extractvalue %dx.types.CBufRet.i32 %2090, 2
-  %2094 = extractvalue %dx.types.CBufRet.i32 %2090, 3
-  %2095 = mul i32 %2091, %2086
-  %2096 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2087, i32 %2092, i32 %2095)  ; IMad(a,b,c)
-  %2097 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2088, i32 %2093, i32 %2096)  ; IMad(a,b,c)
-  %2098 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2089, i32 %2094, i32 %2097)  ; IMad(a,b,c)
-  %2099 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2098, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2100 = extractvalue %dx.types.ResRet.i16 %2099, 0
-  %2101 = uitofp i16 %2100 to float
-  br label %2102
-
-; <label>:2102                                    ; preds = %2081, %2015, %1986, %1969, %1959
-  %2103 = phi float [ %1983, %1969 ], [ 0.000000e+00, %1959 ], [ %2014, %1986 ], [ %2101, %2081 ], [ 0.000000e+00, %2015 ]
-  %2104 = fadd fast float %938, 1.000000e+00
-  br i1 %941, label %2105, label %2130
-
-; <label>:2105                                    ; preds = %2102
-  %2106 = fcmp fast oge float %937, 0.000000e+00
-  %2107 = fptoui float %937 to i32
-  %2108 = icmp ult i32 %2107, %13
-  %2109 = and i1 %2106, %2108
-  %2110 = fcmp fast oge float %2104, 0.000000e+00
-  %2111 = and i1 %2110, %2109
-  %2112 = fptoui float %2104 to i32
-  %2113 = icmp ult i32 %2112, %15
-  %2114 = and i1 %2113, %2111
-  br i1 %2114, label %2115, label %2248
-
-; <label>:2115                                    ; preds = %2105
-  %2116 = fptoui float %45 to i32
-  %2117 = fptoui float %182 to i32
-  %2118 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2119 = extractvalue %dx.types.CBufRet.i32 %2118, 0
-  %2120 = extractvalue %dx.types.CBufRet.i32 %2118, 1
-  %2121 = extractvalue %dx.types.CBufRet.i32 %2118, 2
-  %2122 = extractvalue %dx.types.CBufRet.i32 %2118, 3
-  %2123 = mul i32 %2119, %2116
-  %2124 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2117, i32 %2120, i32 %2123)  ; IMad(a,b,c)
-  %2125 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2112, i32 %2121, i32 %2124)  ; IMad(a,b,c)
-  %2126 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2107, i32 %2122, i32 %2125)  ; IMad(a,b,c)
-  %2127 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2126, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2128 = extractvalue %dx.types.ResRet.i16 %2127, 0
-  %2129 = uitofp i16 %2128 to float
-  br label %2248
-
-; <label>:2130                                    ; preds = %2102
-  %2131 = icmp eq i32 %940, 1
-  br i1 %2131, label %2132, label %2161
-
-; <label>:2132                                    ; preds = %2130
-  %2133 = add i32 %13, -1
-  %2134 = uitofp i32 %2133 to float
-  %2135 = call float @dx.op.binary.f32(i32 35, float %937, float 0.000000e+00)  ; FMax(a,b)
-  %2136 = call float @dx.op.binary.f32(i32 36, float %2135, float %2134)  ; FMin(a,b)
-  %2137 = fptoui float %2136 to i32
-  %2138 = add i32 %15, -1
-  %2139 = uitofp i32 %2138 to float
-  %2140 = call float @dx.op.binary.f32(i32 35, float %2104, float 0.000000e+00)  ; FMax(a,b)
-  %2141 = call float @dx.op.binary.f32(i32 36, float %2140, float %2139)  ; FMin(a,b)
-  %2142 = fptoui float %2141 to i32
-  %2143 = uitofp i32 %2142 to float
-  %2144 = uitofp i32 %2137 to float
-  %2145 = fptoui float %45 to i32
-  %2146 = fptoui float %182 to i32
-  %2147 = fptoui float %2143 to i32
-  %2148 = fptoui float %2144 to i32
-  %2149 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2150 = extractvalue %dx.types.CBufRet.i32 %2149, 0
-  %2151 = extractvalue %dx.types.CBufRet.i32 %2149, 1
-  %2152 = extractvalue %dx.types.CBufRet.i32 %2149, 2
-  %2153 = extractvalue %dx.types.CBufRet.i32 %2149, 3
-  %2154 = mul i32 %2150, %2145
-  %2155 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2146, i32 %2151, i32 %2154)  ; IMad(a,b,c)
-  %2156 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2147, i32 %2152, i32 %2155)  ; IMad(a,b,c)
-  %2157 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2148, i32 %2153, i32 %2156)  ; IMad(a,b,c)
-  %2158 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2157, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2159 = extractvalue %dx.types.ResRet.i16 %2158, 0
-  %2160 = uitofp i16 %2159 to float
-  br label %2248
-
-; <label>:2161                                    ; preds = %2130
-  %2162 = icmp eq i32 %940, 2
-  br i1 %2162, label %2163, label %2248
-
-; <label>:2163                                    ; preds = %2161
-  %2164 = fsub fast float %22, %20
-  %2165 = fcmp fast olt float %937, %20
-  br i1 %2165, label %2166, label %2179
-
-; <label>:2166                                    ; preds = %2163
-  %2167 = fsub fast float %20, %937
-  %2168 = fdiv fast float %2167, %2164
-  %2169 = fptoui float %2168 to i32
-  %2170 = uitofp i32 %2169 to float
-  %2171 = fmul fast float %2170, %2164
-  %2172 = fsub fast float %2167, %2171
-  %2173 = and i32 %2169, 1
-  %2174 = icmp eq i32 %2173, 0
-  br i1 %2174, label %2175, label %2177
-
-; <label>:2175                                    ; preds = %2166
-  %2176 = fadd fast float %2172, %20
-  br label %2194
-
-; <label>:2177                                    ; preds = %2166
-  %2178 = fsub fast float %22, %2172
-  br label %2194
-
-; <label>:2179                                    ; preds = %2163
-  %2180 = fcmp fast ogt float %937, %22
-  br i1 %2180, label %2181, label %2194
-
-; <label>:2181                                    ; preds = %2179
-  %2182 = fsub fast float %937, %22
-  %2183 = fdiv fast float %2182, %2164
-  %2184 = fptoui float %2183 to i32
-  %2185 = uitofp i32 %2184 to float
-  %2186 = fmul fast float %2185, %2164
-  %2187 = fsub fast float %2182, %2186
-  %2188 = and i32 %2184, 1
-  %2189 = icmp eq i32 %2188, 0
-  br i1 %2189, label %2190, label %2192
-
-; <label>:2190                                    ; preds = %2181
-  %2191 = fsub fast float %22, %2187
-  br label %2194
-
-; <label>:2192                                    ; preds = %2181
-  %2193 = fadd fast float %2187, %20
-  br label %2194
-
-; <label>:2194                                    ; preds = %2192, %2190, %2179, %2177, %2175
-  %2195 = phi float [ %2176, %2175 ], [ %2178, %2177 ], [ %2191, %2190 ], [ %2193, %2192 ], [ %937, %2179 ]
-  %2196 = fptoui float %2195 to i32
-  %2197 = fsub fast float %24, %20
-  %2198 = fcmp fast olt float %2104, %20
-  br i1 %2198, label %2199, label %2212
-
-; <label>:2199                                    ; preds = %2194
-  %2200 = fsub fast float %20, %2104
-  %2201 = fdiv fast float %2200, %2197
-  %2202 = fptoui float %2201 to i32
-  %2203 = uitofp i32 %2202 to float
-  %2204 = fmul fast float %2203, %2197
-  %2205 = fsub fast float %2200, %2204
-  %2206 = and i32 %2202, 1
-  %2207 = icmp eq i32 %2206, 0
-  br i1 %2207, label %2208, label %2210
-
-; <label>:2208                                    ; preds = %2199
-  %2209 = fadd fast float %2205, %20
-  br label %2227
-
-; <label>:2210                                    ; preds = %2199
-  %2211 = fsub fast float %24, %2205
-  br label %2227
-
-; <label>:2212                                    ; preds = %2194
-  %2213 = fcmp fast ogt float %2104, %24
-  br i1 %2213, label %2214, label %2227
-
-; <label>:2214                                    ; preds = %2212
-  %2215 = fsub fast float %2104, %24
-  %2216 = fdiv fast float %2215, %2197
-  %2217 = fptoui float %2216 to i32
-  %2218 = uitofp i32 %2217 to float
-  %2219 = fmul fast float %2218, %2197
-  %2220 = fsub fast float %2215, %2219
-  %2221 = and i32 %2217, 1
-  %2222 = icmp eq i32 %2221, 0
-  br i1 %2222, label %2223, label %2225
-
-; <label>:2223                                    ; preds = %2214
-  %2224 = fsub fast float %24, %2220
-  br label %2227
-
-; <label>:2225                                    ; preds = %2214
-  %2226 = fadd fast float %2220, %20
-  br label %2227
-
-; <label>:2227                                    ; preds = %2225, %2223, %2212, %2210, %2208
-  %2228 = phi float [ %2209, %2208 ], [ %2211, %2210 ], [ %2224, %2223 ], [ %2226, %2225 ], [ %2104, %2212 ]
-  %2229 = fptoui float %2228 to i32
-  %2230 = uitofp i32 %2229 to float
-  %2231 = uitofp i32 %2196 to float
-  %2232 = fptoui float %45 to i32
-  %2233 = fptoui float %182 to i32
-  %2234 = fptoui float %2230 to i32
-  %2235 = fptoui float %2231 to i32
-  %2236 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2237 = extractvalue %dx.types.CBufRet.i32 %2236, 0
-  %2238 = extractvalue %dx.types.CBufRet.i32 %2236, 1
-  %2239 = extractvalue %dx.types.CBufRet.i32 %2236, 2
-  %2240 = extractvalue %dx.types.CBufRet.i32 %2236, 3
-  %2241 = mul i32 %2237, %2232
-  %2242 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2233, i32 %2238, i32 %2241)  ; IMad(a,b,c)
-  %2243 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2234, i32 %2239, i32 %2242)  ; IMad(a,b,c)
-  %2244 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2235, i32 %2240, i32 %2243)  ; IMad(a,b,c)
-  %2245 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2244, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2246 = extractvalue %dx.types.ResRet.i16 %2245, 0
-  %2247 = uitofp i16 %2246 to float
-  br label %2248
-
-; <label>:2248                                    ; preds = %2227, %2161, %2132, %2115, %2105
-  %2249 = phi float [ %2129, %2115 ], [ 0.000000e+00, %2105 ], [ %2160, %2132 ], [ %2247, %2227 ], [ 0.000000e+00, %2161 ]
-  br i1 %941, label %2250, label %2275
-
-; <label>:2250                                    ; preds = %2248
-  %2251 = fcmp fast oge float %936, 0.000000e+00
-  %2252 = fptoui float %936 to i32
-  %2253 = icmp ult i32 %2252, %13
-  %2254 = and i1 %2251, %2253
-  %2255 = fcmp fast oge float %2104, 0.000000e+00
-  %2256 = and i1 %2255, %2254
-  %2257 = fptoui float %2104 to i32
-  %2258 = icmp ult i32 %2257, %15
-  %2259 = and i1 %2258, %2256
-  br i1 %2259, label %2260, label %2393
-
-; <label>:2260                                    ; preds = %2250
-  %2261 = fptoui float %45 to i32
-  %2262 = fptoui float %182 to i32
-  %2263 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2264 = extractvalue %dx.types.CBufRet.i32 %2263, 0
-  %2265 = extractvalue %dx.types.CBufRet.i32 %2263, 1
-  %2266 = extractvalue %dx.types.CBufRet.i32 %2263, 2
-  %2267 = extractvalue %dx.types.CBufRet.i32 %2263, 3
-  %2268 = mul i32 %2264, %2261
-  %2269 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2262, i32 %2265, i32 %2268)  ; IMad(a,b,c)
-  %2270 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2257, i32 %2266, i32 %2269)  ; IMad(a,b,c)
-  %2271 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2252, i32 %2267, i32 %2270)  ; IMad(a,b,c)
-  %2272 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2271, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2273 = extractvalue %dx.types.ResRet.i16 %2272, 0
-  %2274 = uitofp i16 %2273 to float
-  br label %2393
-
-; <label>:2275                                    ; preds = %2248
-  %2276 = icmp eq i32 %940, 1
-  br i1 %2276, label %2277, label %2306
-
-; <label>:2277                                    ; preds = %2275
-  %2278 = add i32 %13, -1
-  %2279 = uitofp i32 %2278 to float
-  %2280 = call float @dx.op.binary.f32(i32 35, float %936, float 0.000000e+00)  ; FMax(a,b)
-  %2281 = call float @dx.op.binary.f32(i32 36, float %2280, float %2279)  ; FMin(a,b)
-  %2282 = fptoui float %2281 to i32
-  %2283 = add i32 %15, -1
-  %2284 = uitofp i32 %2283 to float
-  %2285 = call float @dx.op.binary.f32(i32 35, float %2104, float 0.000000e+00)  ; FMax(a,b)
-  %2286 = call float @dx.op.binary.f32(i32 36, float %2285, float %2284)  ; FMin(a,b)
-  %2287 = fptoui float %2286 to i32
-  %2288 = uitofp i32 %2287 to float
-  %2289 = uitofp i32 %2282 to float
-  %2290 = fptoui float %45 to i32
-  %2291 = fptoui float %182 to i32
-  %2292 = fptoui float %2288 to i32
-  %2293 = fptoui float %2289 to i32
-  %2294 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2295 = extractvalue %dx.types.CBufRet.i32 %2294, 0
-  %2296 = extractvalue %dx.types.CBufRet.i32 %2294, 1
-  %2297 = extractvalue %dx.types.CBufRet.i32 %2294, 2
-  %2298 = extractvalue %dx.types.CBufRet.i32 %2294, 3
-  %2299 = mul i32 %2295, %2290
-  %2300 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2291, i32 %2296, i32 %2299)  ; IMad(a,b,c)
-  %2301 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2292, i32 %2297, i32 %2300)  ; IMad(a,b,c)
-  %2302 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2293, i32 %2298, i32 %2301)  ; IMad(a,b,c)
-  %2303 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2302, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2304 = extractvalue %dx.types.ResRet.i16 %2303, 0
-  %2305 = uitofp i16 %2304 to float
-  br label %2393
-
-; <label>:2306                                    ; preds = %2275
-  %2307 = icmp eq i32 %940, 2
-  br i1 %2307, label %2308, label %2393
-
-; <label>:2308                                    ; preds = %2306
-  %2309 = fsub fast float %22, %20
-  %2310 = fcmp fast olt float %936, %20
-  br i1 %2310, label %2311, label %2324
-
-; <label>:2311                                    ; preds = %2308
-  %2312 = fsub fast float %20, %936
-  %2313 = fdiv fast float %2312, %2309
-  %2314 = fptoui float %2313 to i32
-  %2315 = uitofp i32 %2314 to float
-  %2316 = fmul fast float %2315, %2309
-  %2317 = fsub fast float %2312, %2316
-  %2318 = and i32 %2314, 1
-  %2319 = icmp eq i32 %2318, 0
-  br i1 %2319, label %2320, label %2322
-
-; <label>:2320                                    ; preds = %2311
-  %2321 = fadd fast float %2317, %20
-  br label %2339
-
-; <label>:2322                                    ; preds = %2311
-  %2323 = fsub fast float %22, %2317
-  br label %2339
-
-; <label>:2324                                    ; preds = %2308
-  %2325 = fcmp fast ogt float %936, %22
-  br i1 %2325, label %2326, label %2339
-
-; <label>:2326                                    ; preds = %2324
-  %2327 = fsub fast float %936, %22
-  %2328 = fdiv fast float %2327, %2309
-  %2329 = fptoui float %2328 to i32
-  %2330 = uitofp i32 %2329 to float
-  %2331 = fmul fast float %2330, %2309
-  %2332 = fsub fast float %2327, %2331
-  %2333 = and i32 %2329, 1
-  %2334 = icmp eq i32 %2333, 0
-  br i1 %2334, label %2335, label %2337
-
-; <label>:2335                                    ; preds = %2326
-  %2336 = fsub fast float %22, %2332
-  br label %2339
-
-; <label>:2337                                    ; preds = %2326
-  %2338 = fadd fast float %2332, %20
-  br label %2339
-
-; <label>:2339                                    ; preds = %2337, %2335, %2324, %2322, %2320
-  %2340 = phi float [ %2321, %2320 ], [ %2323, %2322 ], [ %2336, %2335 ], [ %2338, %2337 ], [ %936, %2324 ]
-  %2341 = fptoui float %2340 to i32
-  %2342 = fsub fast float %24, %20
-  %2343 = fcmp fast olt float %2104, %20
-  br i1 %2343, label %2344, label %2357
-
-; <label>:2344                                    ; preds = %2339
-  %2345 = fsub fast float %20, %2104
-  %2346 = fdiv fast float %2345, %2342
-  %2347 = fptoui float %2346 to i32
-  %2348 = uitofp i32 %2347 to float
-  %2349 = fmul fast float %2348, %2342
-  %2350 = fsub fast float %2345, %2349
-  %2351 = and i32 %2347, 1
-  %2352 = icmp eq i32 %2351, 0
-  br i1 %2352, label %2353, label %2355
-
-; <label>:2353                                    ; preds = %2344
-  %2354 = fadd fast float %2350, %20
-  br label %2372
-
-; <label>:2355                                    ; preds = %2344
-  %2356 = fsub fast float %24, %2350
-  br label %2372
-
-; <label>:2357                                    ; preds = %2339
-  %2358 = fcmp fast ogt float %2104, %24
-  br i1 %2358, label %2359, label %2372
-
-; <label>:2359                                    ; preds = %2357
-  %2360 = fsub fast float %2104, %24
-  %2361 = fdiv fast float %2360, %2342
-  %2362 = fptoui float %2361 to i32
-  %2363 = uitofp i32 %2362 to float
-  %2364 = fmul fast float %2363, %2342
-  %2365 = fsub fast float %2360, %2364
-  %2366 = and i32 %2362, 1
-  %2367 = icmp eq i32 %2366, 0
-  br i1 %2367, label %2368, label %2370
-
-; <label>:2368                                    ; preds = %2359
-  %2369 = fsub fast float %24, %2365
-  br label %2372
-
-; <label>:2370                                    ; preds = %2359
-  %2371 = fadd fast float %2365, %20
-  br label %2372
-
-; <label>:2372                                    ; preds = %2370, %2368, %2357, %2355, %2353
-  %2373 = phi float [ %2354, %2353 ], [ %2356, %2355 ], [ %2369, %2368 ], [ %2371, %2370 ], [ %2104, %2357 ]
-  %2374 = fptoui float %2373 to i32
-  %2375 = uitofp i32 %2374 to float
-  %2376 = uitofp i32 %2341 to float
-  %2377 = fptoui float %45 to i32
-  %2378 = fptoui float %182 to i32
-  %2379 = fptoui float %2375 to i32
-  %2380 = fptoui float %2376 to i32
-  %2381 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2382 = extractvalue %dx.types.CBufRet.i32 %2381, 0
-  %2383 = extractvalue %dx.types.CBufRet.i32 %2381, 1
-  %2384 = extractvalue %dx.types.CBufRet.i32 %2381, 2
-  %2385 = extractvalue %dx.types.CBufRet.i32 %2381, 3
-  %2386 = mul i32 %2382, %2377
-  %2387 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2378, i32 %2383, i32 %2386)  ; IMad(a,b,c)
-  %2388 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2379, i32 %2384, i32 %2387)  ; IMad(a,b,c)
-  %2389 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2380, i32 %2385, i32 %2388)  ; IMad(a,b,c)
-  %2390 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2389, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2391 = extractvalue %dx.types.ResRet.i16 %2390, 0
-  %2392 = uitofp i16 %2391 to float
-  br label %2393
-
-; <label>:2393                                    ; preds = %2372, %2306, %2277, %2260, %2250
-  %2394 = phi float [ %2274, %2260 ], [ 0.000000e+00, %2250 ], [ %2305, %2277 ], [ %2392, %2372 ], [ 0.000000e+00, %2306 ]
-  br i1 %941, label %2395, label %2420
-
-; <label>:2395                                    ; preds = %2393
-  %2396 = fcmp fast oge float %1232, 0.000000e+00
-  %2397 = fptoui float %1232 to i32
-  %2398 = icmp ult i32 %2397, %13
-  %2399 = and i1 %2396, %2398
-  %2400 = fcmp fast oge float %2104, 0.000000e+00
-  %2401 = and i1 %2400, %2399
-  %2402 = fptoui float %2104 to i32
-  %2403 = icmp ult i32 %2402, %15
-  %2404 = and i1 %2403, %2401
-  br i1 %2404, label %2405, label %2538
-
-; <label>:2405                                    ; preds = %2395
-  %2406 = fptoui float %45 to i32
-  %2407 = fptoui float %182 to i32
-  %2408 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2409 = extractvalue %dx.types.CBufRet.i32 %2408, 0
-  %2410 = extractvalue %dx.types.CBufRet.i32 %2408, 1
-  %2411 = extractvalue %dx.types.CBufRet.i32 %2408, 2
-  %2412 = extractvalue %dx.types.CBufRet.i32 %2408, 3
-  %2413 = mul i32 %2409, %2406
-  %2414 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2407, i32 %2410, i32 %2413)  ; IMad(a,b,c)
-  %2415 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2402, i32 %2411, i32 %2414)  ; IMad(a,b,c)
-  %2416 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2397, i32 %2412, i32 %2415)  ; IMad(a,b,c)
-  %2417 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2416, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2418 = extractvalue %dx.types.ResRet.i16 %2417, 0
-  %2419 = uitofp i16 %2418 to float
-  br label %2538
-
-; <label>:2420                                    ; preds = %2393
-  %2421 = icmp eq i32 %940, 1
-  br i1 %2421, label %2422, label %2451
-
-; <label>:2422                                    ; preds = %2420
-  %2423 = add i32 %13, -1
-  %2424 = uitofp i32 %2423 to float
-  %2425 = call float @dx.op.binary.f32(i32 35, float %1232, float 0.000000e+00)  ; FMax(a,b)
-  %2426 = call float @dx.op.binary.f32(i32 36, float %2425, float %2424)  ; FMin(a,b)
-  %2427 = fptoui float %2426 to i32
-  %2428 = add i32 %15, -1
-  %2429 = uitofp i32 %2428 to float
-  %2430 = call float @dx.op.binary.f32(i32 35, float %2104, float 0.000000e+00)  ; FMax(a,b)
-  %2431 = call float @dx.op.binary.f32(i32 36, float %2430, float %2429)  ; FMin(a,b)
-  %2432 = fptoui float %2431 to i32
-  %2433 = uitofp i32 %2432 to float
-  %2434 = uitofp i32 %2427 to float
-  %2435 = fptoui float %45 to i32
-  %2436 = fptoui float %182 to i32
-  %2437 = fptoui float %2433 to i32
-  %2438 = fptoui float %2434 to i32
-  %2439 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2440 = extractvalue %dx.types.CBufRet.i32 %2439, 0
-  %2441 = extractvalue %dx.types.CBufRet.i32 %2439, 1
-  %2442 = extractvalue %dx.types.CBufRet.i32 %2439, 2
-  %2443 = extractvalue %dx.types.CBufRet.i32 %2439, 3
-  %2444 = mul i32 %2440, %2435
-  %2445 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2436, i32 %2441, i32 %2444)  ; IMad(a,b,c)
-  %2446 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2437, i32 %2442, i32 %2445)  ; IMad(a,b,c)
-  %2447 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2438, i32 %2443, i32 %2446)  ; IMad(a,b,c)
-  %2448 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2447, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2449 = extractvalue %dx.types.ResRet.i16 %2448, 0
-  %2450 = uitofp i16 %2449 to float
-  br label %2538
-
-; <label>:2451                                    ; preds = %2420
-  %2452 = icmp eq i32 %940, 2
-  br i1 %2452, label %2453, label %2538
-
-; <label>:2453                                    ; preds = %2451
-  %2454 = fsub fast float %22, %20
-  %2455 = fcmp fast olt float %1232, %20
-  br i1 %2455, label %2456, label %2469
-
-; <label>:2456                                    ; preds = %2453
-  %2457 = fsub fast float %20, %1232
-  %2458 = fdiv fast float %2457, %2454
-  %2459 = fptoui float %2458 to i32
-  %2460 = uitofp i32 %2459 to float
-  %2461 = fmul fast float %2460, %2454
-  %2462 = fsub fast float %2457, %2461
-  %2463 = and i32 %2459, 1
-  %2464 = icmp eq i32 %2463, 0
-  br i1 %2464, label %2465, label %2467
-
-; <label>:2465                                    ; preds = %2456
-  %2466 = fadd fast float %2462, %20
-  br label %2484
-
-; <label>:2467                                    ; preds = %2456
-  %2468 = fsub fast float %22, %2462
-  br label %2484
-
-; <label>:2469                                    ; preds = %2453
-  %2470 = fcmp fast ogt float %1232, %22
-  br i1 %2470, label %2471, label %2484
-
-; <label>:2471                                    ; preds = %2469
-  %2472 = fsub fast float %1232, %22
-  %2473 = fdiv fast float %2472, %2454
-  %2474 = fptoui float %2473 to i32
-  %2475 = uitofp i32 %2474 to float
-  %2476 = fmul fast float %2475, %2454
-  %2477 = fsub fast float %2472, %2476
-  %2478 = and i32 %2474, 1
-  %2479 = icmp eq i32 %2478, 0
-  br i1 %2479, label %2480, label %2482
-
-; <label>:2480                                    ; preds = %2471
-  %2481 = fsub fast float %22, %2477
-  br label %2484
-
-; <label>:2482                                    ; preds = %2471
-  %2483 = fadd fast float %2477, %20
-  br label %2484
-
-; <label>:2484                                    ; preds = %2482, %2480, %2469, %2467, %2465
-  %2485 = phi float [ %2466, %2465 ], [ %2468, %2467 ], [ %2481, %2480 ], [ %2483, %2482 ], [ %1232, %2469 ]
-  %2486 = fptoui float %2485 to i32
-  %2487 = fsub fast float %24, %20
-  %2488 = fcmp fast olt float %2104, %20
-  br i1 %2488, label %2489, label %2502
-
-; <label>:2489                                    ; preds = %2484
-  %2490 = fsub fast float %20, %2104
-  %2491 = fdiv fast float %2490, %2487
-  %2492 = fptoui float %2491 to i32
-  %2493 = uitofp i32 %2492 to float
-  %2494 = fmul fast float %2493, %2487
-  %2495 = fsub fast float %2490, %2494
-  %2496 = and i32 %2492, 1
-  %2497 = icmp eq i32 %2496, 0
-  br i1 %2497, label %2498, label %2500
-
-; <label>:2498                                    ; preds = %2489
-  %2499 = fadd fast float %2495, %20
-  br label %2517
-
-; <label>:2500                                    ; preds = %2489
-  %2501 = fsub fast float %24, %2495
-  br label %2517
-
-; <label>:2502                                    ; preds = %2484
-  %2503 = fcmp fast ogt float %2104, %24
-  br i1 %2503, label %2504, label %2517
-
-; <label>:2504                                    ; preds = %2502
-  %2505 = fsub fast float %2104, %24
-  %2506 = fdiv fast float %2505, %2487
-  %2507 = fptoui float %2506 to i32
-  %2508 = uitofp i32 %2507 to float
-  %2509 = fmul fast float %2508, %2487
-  %2510 = fsub fast float %2505, %2509
-  %2511 = and i32 %2507, 1
-  %2512 = icmp eq i32 %2511, 0
-  br i1 %2512, label %2513, label %2515
-
-; <label>:2513                                    ; preds = %2504
-  %2514 = fsub fast float %24, %2510
-  br label %2517
-
-; <label>:2515                                    ; preds = %2504
-  %2516 = fadd fast float %2510, %20
-  br label %2517
-
-; <label>:2517                                    ; preds = %2515, %2513, %2502, %2500, %2498
-  %2518 = phi float [ %2499, %2498 ], [ %2501, %2500 ], [ %2514, %2513 ], [ %2516, %2515 ], [ %2104, %2502 ]
-  %2519 = fptoui float %2518 to i32
-  %2520 = uitofp i32 %2519 to float
-  %2521 = uitofp i32 %2486 to float
-  %2522 = fptoui float %45 to i32
-  %2523 = fptoui float %182 to i32
-  %2524 = fptoui float %2520 to i32
-  %2525 = fptoui float %2521 to i32
-  %2526 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2527 = extractvalue %dx.types.CBufRet.i32 %2526, 0
-  %2528 = extractvalue %dx.types.CBufRet.i32 %2526, 1
-  %2529 = extractvalue %dx.types.CBufRet.i32 %2526, 2
-  %2530 = extractvalue %dx.types.CBufRet.i32 %2526, 3
-  %2531 = mul i32 %2527, %2522
-  %2532 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2523, i32 %2528, i32 %2531)  ; IMad(a,b,c)
-  %2533 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2524, i32 %2529, i32 %2532)  ; IMad(a,b,c)
-  %2534 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2525, i32 %2530, i32 %2533)  ; IMad(a,b,c)
-  %2535 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2534, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2536 = extractvalue %dx.types.ResRet.i16 %2535, 0
-  %2537 = uitofp i16 %2536 to float
-  br label %2538
-
-; <label>:2538                                    ; preds = %2517, %2451, %2422, %2405, %2395
-  %2539 = phi float [ %2419, %2405 ], [ 0.000000e+00, %2395 ], [ %2450, %2422 ], [ %2537, %2517 ], [ 0.000000e+00, %2451 ]
-  br i1 %941, label %2540, label %2565
-
-; <label>:2540                                    ; preds = %2538
-  %2541 = fcmp fast oge float %1378, 0.000000e+00
-  %2542 = fptoui float %1378 to i32
-  %2543 = icmp ult i32 %2542, %13
-  %2544 = and i1 %2541, %2543
-  %2545 = fcmp fast oge float %2104, 0.000000e+00
-  %2546 = and i1 %2545, %2544
-  %2547 = fptoui float %2104 to i32
-  %2548 = icmp ult i32 %2547, %15
-  %2549 = and i1 %2548, %2546
-  br i1 %2549, label %2550, label %2683
-
-; <label>:2550                                    ; preds = %2540
-  %2551 = fptoui float %45 to i32
-  %2552 = fptoui float %182 to i32
-  %2553 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2554 = extractvalue %dx.types.CBufRet.i32 %2553, 0
-  %2555 = extractvalue %dx.types.CBufRet.i32 %2553, 1
-  %2556 = extractvalue %dx.types.CBufRet.i32 %2553, 2
-  %2557 = extractvalue %dx.types.CBufRet.i32 %2553, 3
-  %2558 = mul i32 %2554, %2551
-  %2559 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2552, i32 %2555, i32 %2558)  ; IMad(a,b,c)
-  %2560 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2547, i32 %2556, i32 %2559)  ; IMad(a,b,c)
-  %2561 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2542, i32 %2557, i32 %2560)  ; IMad(a,b,c)
-  %2562 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2561, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2563 = extractvalue %dx.types.ResRet.i16 %2562, 0
-  %2564 = uitofp i16 %2563 to float
-  br label %2683
-
-; <label>:2565                                    ; preds = %2538
-  %2566 = icmp eq i32 %940, 1
-  br i1 %2566, label %2567, label %2596
-
-; <label>:2567                                    ; preds = %2565
-  %2568 = add i32 %13, -1
-  %2569 = uitofp i32 %2568 to float
-  %2570 = call float @dx.op.binary.f32(i32 35, float %1378, float 0.000000e+00)  ; FMax(a,b)
-  %2571 = call float @dx.op.binary.f32(i32 36, float %2570, float %2569)  ; FMin(a,b)
-  %2572 = fptoui float %2571 to i32
-  %2573 = add i32 %15, -1
-  %2574 = uitofp i32 %2573 to float
-  %2575 = call float @dx.op.binary.f32(i32 35, float %2104, float 0.000000e+00)  ; FMax(a,b)
-  %2576 = call float @dx.op.binary.f32(i32 36, float %2575, float %2574)  ; FMin(a,b)
-  %2577 = fptoui float %2576 to i32
-  %2578 = uitofp i32 %2577 to float
-  %2579 = uitofp i32 %2572 to float
-  %2580 = fptoui float %45 to i32
-  %2581 = fptoui float %182 to i32
-  %2582 = fptoui float %2578 to i32
-  %2583 = fptoui float %2579 to i32
-  %2584 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2585 = extractvalue %dx.types.CBufRet.i32 %2584, 0
-  %2586 = extractvalue %dx.types.CBufRet.i32 %2584, 1
-  %2587 = extractvalue %dx.types.CBufRet.i32 %2584, 2
-  %2588 = extractvalue %dx.types.CBufRet.i32 %2584, 3
-  %2589 = mul i32 %2585, %2580
-  %2590 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2581, i32 %2586, i32 %2589)  ; IMad(a,b,c)
-  %2591 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2582, i32 %2587, i32 %2590)  ; IMad(a,b,c)
-  %2592 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2583, i32 %2588, i32 %2591)  ; IMad(a,b,c)
-  %2593 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2592, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2594 = extractvalue %dx.types.ResRet.i16 %2593, 0
-  %2595 = uitofp i16 %2594 to float
-  br label %2683
-
-; <label>:2596                                    ; preds = %2565
-  %2597 = icmp eq i32 %940, 2
-  br i1 %2597, label %2598, label %2683
-
-; <label>:2598                                    ; preds = %2596
-  %2599 = fsub fast float %22, %20
-  %2600 = fcmp fast olt float %1378, %20
-  br i1 %2600, label %2601, label %2614
-
-; <label>:2601                                    ; preds = %2598
-  %2602 = fsub fast float %20, %1378
-  %2603 = fdiv fast float %2602, %2599
-  %2604 = fptoui float %2603 to i32
-  %2605 = uitofp i32 %2604 to float
-  %2606 = fmul fast float %2605, %2599
-  %2607 = fsub fast float %2602, %2606
-  %2608 = and i32 %2604, 1
-  %2609 = icmp eq i32 %2608, 0
-  br i1 %2609, label %2610, label %2612
-
-; <label>:2610                                    ; preds = %2601
-  %2611 = fadd fast float %2607, %20
-  br label %2629
-
-; <label>:2612                                    ; preds = %2601
-  %2613 = fsub fast float %22, %2607
-  br label %2629
-
-; <label>:2614                                    ; preds = %2598
-  %2615 = fcmp fast ogt float %1378, %22
-  br i1 %2615, label %2616, label %2629
-
-; <label>:2616                                    ; preds = %2614
-  %2617 = fsub fast float %1378, %22
-  %2618 = fdiv fast float %2617, %2599
-  %2619 = fptoui float %2618 to i32
-  %2620 = uitofp i32 %2619 to float
-  %2621 = fmul fast float %2620, %2599
-  %2622 = fsub fast float %2617, %2621
-  %2623 = and i32 %2619, 1
-  %2624 = icmp eq i32 %2623, 0
-  br i1 %2624, label %2625, label %2627
-
-; <label>:2625                                    ; preds = %2616
-  %2626 = fsub fast float %22, %2622
-  br label %2629
-
-; <label>:2627                                    ; preds = %2616
-  %2628 = fadd fast float %2622, %20
-  br label %2629
-
-; <label>:2629                                    ; preds = %2627, %2625, %2614, %2612, %2610
-  %2630 = phi float [ %2611, %2610 ], [ %2613, %2612 ], [ %2626, %2625 ], [ %2628, %2627 ], [ %1378, %2614 ]
-  %2631 = fptoui float %2630 to i32
-  %2632 = fsub fast float %24, %20
-  %2633 = fcmp fast olt float %2104, %20
-  br i1 %2633, label %2634, label %2647
-
-; <label>:2634                                    ; preds = %2629
-  %2635 = fsub fast float %20, %2104
-  %2636 = fdiv fast float %2635, %2632
-  %2637 = fptoui float %2636 to i32
-  %2638 = uitofp i32 %2637 to float
-  %2639 = fmul fast float %2638, %2632
-  %2640 = fsub fast float %2635, %2639
-  %2641 = and i32 %2637, 1
-  %2642 = icmp eq i32 %2641, 0
-  br i1 %2642, label %2643, label %2645
-
-; <label>:2643                                    ; preds = %2634
-  %2644 = fadd fast float %2640, %20
-  br label %2662
-
-; <label>:2645                                    ; preds = %2634
-  %2646 = fsub fast float %24, %2640
-  br label %2662
-
-; <label>:2647                                    ; preds = %2629
-  %2648 = fcmp fast ogt float %2104, %24
-  br i1 %2648, label %2649, label %2662
-
-; <label>:2649                                    ; preds = %2647
-  %2650 = fsub fast float %2104, %24
-  %2651 = fdiv fast float %2650, %2632
-  %2652 = fptoui float %2651 to i32
-  %2653 = uitofp i32 %2652 to float
-  %2654 = fmul fast float %2653, %2632
-  %2655 = fsub fast float %2650, %2654
-  %2656 = and i32 %2652, 1
-  %2657 = icmp eq i32 %2656, 0
-  br i1 %2657, label %2658, label %2660
-
-; <label>:2658                                    ; preds = %2649
-  %2659 = fsub fast float %24, %2655
-  br label %2662
-
-; <label>:2660                                    ; preds = %2649
-  %2661 = fadd fast float %2655, %20
-  br label %2662
-
-; <label>:2662                                    ; preds = %2660, %2658, %2647, %2645, %2643
-  %2663 = phi float [ %2644, %2643 ], [ %2646, %2645 ], [ %2659, %2658 ], [ %2661, %2660 ], [ %2104, %2647 ]
-  %2664 = fptoui float %2663 to i32
-  %2665 = uitofp i32 %2664 to float
-  %2666 = uitofp i32 %2631 to float
-  %2667 = fptoui float %45 to i32
-  %2668 = fptoui float %182 to i32
-  %2669 = fptoui float %2665 to i32
-  %2670 = fptoui float %2666 to i32
-  %2671 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2672 = extractvalue %dx.types.CBufRet.i32 %2671, 0
-  %2673 = extractvalue %dx.types.CBufRet.i32 %2671, 1
-  %2674 = extractvalue %dx.types.CBufRet.i32 %2671, 2
-  %2675 = extractvalue %dx.types.CBufRet.i32 %2671, 3
-  %2676 = mul i32 %2672, %2667
-  %2677 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2668, i32 %2673, i32 %2676)  ; IMad(a,b,c)
-  %2678 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2669, i32 %2674, i32 %2677)  ; IMad(a,b,c)
-  %2679 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2670, i32 %2675, i32 %2678)  ; IMad(a,b,c)
-  %2680 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2679, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2681 = extractvalue %dx.types.ResRet.i16 %2680, 0
-  %2682 = uitofp i16 %2681 to float
-  br label %2683
-
-; <label>:2683                                    ; preds = %2662, %2596, %2567, %2550, %2540
-  %2684 = phi float [ %2564, %2550 ], [ 0.000000e+00, %2540 ], [ %2595, %2567 ], [ %2682, %2662 ], [ 0.000000e+00, %2596 ]
-  %2685 = fadd fast float %938, 2.000000e+00
-  br i1 %941, label %2686, label %2711
-
-; <label>:2686                                    ; preds = %2683
-  %2687 = fcmp fast oge float %937, 0.000000e+00
-  %2688 = fptoui float %937 to i32
-  %2689 = icmp ult i32 %2688, %13
-  %2690 = and i1 %2687, %2689
-  %2691 = fcmp fast oge float %2685, 0.000000e+00
-  %2692 = and i1 %2691, %2690
-  %2693 = fptoui float %2685 to i32
-  %2694 = icmp ult i32 %2693, %15
-  %2695 = and i1 %2694, %2692
-  br i1 %2695, label %2696, label %2829
-
-; <label>:2696                                    ; preds = %2686
-  %2697 = fptoui float %45 to i32
-  %2698 = fptoui float %182 to i32
-  %2699 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2700 = extractvalue %dx.types.CBufRet.i32 %2699, 0
-  %2701 = extractvalue %dx.types.CBufRet.i32 %2699, 1
-  %2702 = extractvalue %dx.types.CBufRet.i32 %2699, 2
-  %2703 = extractvalue %dx.types.CBufRet.i32 %2699, 3
-  %2704 = mul i32 %2700, %2697
-  %2705 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2698, i32 %2701, i32 %2704)  ; IMad(a,b,c)
-  %2706 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2693, i32 %2702, i32 %2705)  ; IMad(a,b,c)
-  %2707 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2688, i32 %2703, i32 %2706)  ; IMad(a,b,c)
-  %2708 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2707, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2709 = extractvalue %dx.types.ResRet.i16 %2708, 0
-  %2710 = uitofp i16 %2709 to float
-  br label %2829
-
-; <label>:2711                                    ; preds = %2683
-  %2712 = icmp eq i32 %940, 1
-  br i1 %2712, label %2713, label %2742
-
-; <label>:2713                                    ; preds = %2711
-  %2714 = add i32 %13, -1
-  %2715 = uitofp i32 %2714 to float
-  %2716 = call float @dx.op.binary.f32(i32 35, float %937, float 0.000000e+00)  ; FMax(a,b)
-  %2717 = call float @dx.op.binary.f32(i32 36, float %2716, float %2715)  ; FMin(a,b)
-  %2718 = fptoui float %2717 to i32
-  %2719 = add i32 %15, -1
-  %2720 = uitofp i32 %2719 to float
-  %2721 = call float @dx.op.binary.f32(i32 35, float %2685, float 0.000000e+00)  ; FMax(a,b)
-  %2722 = call float @dx.op.binary.f32(i32 36, float %2721, float %2720)  ; FMin(a,b)
-  %2723 = fptoui float %2722 to i32
-  %2724 = uitofp i32 %2723 to float
-  %2725 = uitofp i32 %2718 to float
-  %2726 = fptoui float %45 to i32
-  %2727 = fptoui float %182 to i32
-  %2728 = fptoui float %2724 to i32
-  %2729 = fptoui float %2725 to i32
-  %2730 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2731 = extractvalue %dx.types.CBufRet.i32 %2730, 0
-  %2732 = extractvalue %dx.types.CBufRet.i32 %2730, 1
-  %2733 = extractvalue %dx.types.CBufRet.i32 %2730, 2
-  %2734 = extractvalue %dx.types.CBufRet.i32 %2730, 3
-  %2735 = mul i32 %2731, %2726
-  %2736 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2727, i32 %2732, i32 %2735)  ; IMad(a,b,c)
-  %2737 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2728, i32 %2733, i32 %2736)  ; IMad(a,b,c)
-  %2738 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2729, i32 %2734, i32 %2737)  ; IMad(a,b,c)
-  %2739 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2738, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2740 = extractvalue %dx.types.ResRet.i16 %2739, 0
-  %2741 = uitofp i16 %2740 to float
-  br label %2829
-
-; <label>:2742                                    ; preds = %2711
-  %2743 = icmp eq i32 %940, 2
-  br i1 %2743, label %2744, label %2829
-
-; <label>:2744                                    ; preds = %2742
-  %2745 = fsub fast float %22, %20
-  %2746 = fcmp fast olt float %937, %20
-  br i1 %2746, label %2747, label %2760
-
-; <label>:2747                                    ; preds = %2744
-  %2748 = fsub fast float %20, %937
-  %2749 = fdiv fast float %2748, %2745
-  %2750 = fptoui float %2749 to i32
-  %2751 = uitofp i32 %2750 to float
-  %2752 = fmul fast float %2751, %2745
-  %2753 = fsub fast float %2748, %2752
-  %2754 = and i32 %2750, 1
-  %2755 = icmp eq i32 %2754, 0
-  br i1 %2755, label %2756, label %2758
-
-; <label>:2756                                    ; preds = %2747
-  %2757 = fadd fast float %2753, %20
-  br label %2775
-
-; <label>:2758                                    ; preds = %2747
-  %2759 = fsub fast float %22, %2753
-  br label %2775
-
-; <label>:2760                                    ; preds = %2744
-  %2761 = fcmp fast ogt float %937, %22
-  br i1 %2761, label %2762, label %2775
-
-; <label>:2762                                    ; preds = %2760
-  %2763 = fsub fast float %937, %22
-  %2764 = fdiv fast float %2763, %2745
-  %2765 = fptoui float %2764 to i32
-  %2766 = uitofp i32 %2765 to float
-  %2767 = fmul fast float %2766, %2745
-  %2768 = fsub fast float %2763, %2767
-  %2769 = and i32 %2765, 1
-  %2770 = icmp eq i32 %2769, 0
-  br i1 %2770, label %2771, label %2773
-
-; <label>:2771                                    ; preds = %2762
-  %2772 = fsub fast float %22, %2768
-  br label %2775
-
-; <label>:2773                                    ; preds = %2762
-  %2774 = fadd fast float %2768, %20
-  br label %2775
-
-; <label>:2775                                    ; preds = %2773, %2771, %2760, %2758, %2756
-  %2776 = phi float [ %2757, %2756 ], [ %2759, %2758 ], [ %2772, %2771 ], [ %2774, %2773 ], [ %937, %2760 ]
-  %2777 = fptoui float %2776 to i32
-  %2778 = fsub fast float %24, %20
-  %2779 = fcmp fast olt float %2685, %20
-  br i1 %2779, label %2780, label %2793
-
-; <label>:2780                                    ; preds = %2775
-  %2781 = fsub fast float %20, %2685
-  %2782 = fdiv fast float %2781, %2778
-  %2783 = fptoui float %2782 to i32
-  %2784 = uitofp i32 %2783 to float
-  %2785 = fmul fast float %2784, %2778
-  %2786 = fsub fast float %2781, %2785
-  %2787 = and i32 %2783, 1
-  %2788 = icmp eq i32 %2787, 0
-  br i1 %2788, label %2789, label %2791
-
-; <label>:2789                                    ; preds = %2780
-  %2790 = fadd fast float %2786, %20
-  br label %2808
-
-; <label>:2791                                    ; preds = %2780
-  %2792 = fsub fast float %24, %2786
-  br label %2808
-
-; <label>:2793                                    ; preds = %2775
-  %2794 = fcmp fast ogt float %2685, %24
-  br i1 %2794, label %2795, label %2808
-
-; <label>:2795                                    ; preds = %2793
-  %2796 = fsub fast float %2685, %24
-  %2797 = fdiv fast float %2796, %2778
-  %2798 = fptoui float %2797 to i32
-  %2799 = uitofp i32 %2798 to float
-  %2800 = fmul fast float %2799, %2778
-  %2801 = fsub fast float %2796, %2800
-  %2802 = and i32 %2798, 1
-  %2803 = icmp eq i32 %2802, 0
-  br i1 %2803, label %2804, label %2806
-
-; <label>:2804                                    ; preds = %2795
-  %2805 = fsub fast float %24, %2801
-  br label %2808
-
-; <label>:2806                                    ; preds = %2795
-  %2807 = fadd fast float %2801, %20
-  br label %2808
-
-; <label>:2808                                    ; preds = %2806, %2804, %2793, %2791, %2789
-  %2809 = phi float [ %2790, %2789 ], [ %2792, %2791 ], [ %2805, %2804 ], [ %2807, %2806 ], [ %2685, %2793 ]
-  %2810 = fptoui float %2809 to i32
-  %2811 = uitofp i32 %2810 to float
-  %2812 = uitofp i32 %2777 to float
-  %2813 = fptoui float %45 to i32
-  %2814 = fptoui float %182 to i32
-  %2815 = fptoui float %2811 to i32
-  %2816 = fptoui float %2812 to i32
-  %2817 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2818 = extractvalue %dx.types.CBufRet.i32 %2817, 0
-  %2819 = extractvalue %dx.types.CBufRet.i32 %2817, 1
-  %2820 = extractvalue %dx.types.CBufRet.i32 %2817, 2
-  %2821 = extractvalue %dx.types.CBufRet.i32 %2817, 3
-  %2822 = mul i32 %2818, %2813
-  %2823 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2814, i32 %2819, i32 %2822)  ; IMad(a,b,c)
-  %2824 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2815, i32 %2820, i32 %2823)  ; IMad(a,b,c)
-  %2825 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2816, i32 %2821, i32 %2824)  ; IMad(a,b,c)
-  %2826 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2825, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2827 = extractvalue %dx.types.ResRet.i16 %2826, 0
-  %2828 = uitofp i16 %2827 to float
-  br label %2829
-
-; <label>:2829                                    ; preds = %2808, %2742, %2713, %2696, %2686
-  %2830 = phi float [ %2710, %2696 ], [ 0.000000e+00, %2686 ], [ %2741, %2713 ], [ %2828, %2808 ], [ 0.000000e+00, %2742 ]
-  br i1 %941, label %2831, label %2856
-
-; <label>:2831                                    ; preds = %2829
-  %2832 = fcmp fast oge float %936, 0.000000e+00
-  %2833 = fptoui float %936 to i32
-  %2834 = icmp ult i32 %2833, %13
-  %2835 = and i1 %2832, %2834
-  %2836 = fcmp fast oge float %2685, 0.000000e+00
-  %2837 = and i1 %2836, %2835
-  %2838 = fptoui float %2685 to i32
-  %2839 = icmp ult i32 %2838, %15
-  %2840 = and i1 %2839, %2837
-  br i1 %2840, label %2841, label %2974
-
-; <label>:2841                                    ; preds = %2831
-  %2842 = fptoui float %45 to i32
-  %2843 = fptoui float %182 to i32
-  %2844 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2845 = extractvalue %dx.types.CBufRet.i32 %2844, 0
-  %2846 = extractvalue %dx.types.CBufRet.i32 %2844, 1
-  %2847 = extractvalue %dx.types.CBufRet.i32 %2844, 2
-  %2848 = extractvalue %dx.types.CBufRet.i32 %2844, 3
-  %2849 = mul i32 %2845, %2842
-  %2850 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2843, i32 %2846, i32 %2849)  ; IMad(a,b,c)
-  %2851 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2838, i32 %2847, i32 %2850)  ; IMad(a,b,c)
-  %2852 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2833, i32 %2848, i32 %2851)  ; IMad(a,b,c)
-  %2853 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2852, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2854 = extractvalue %dx.types.ResRet.i16 %2853, 0
-  %2855 = uitofp i16 %2854 to float
-  br label %2974
-
-; <label>:2856                                    ; preds = %2829
-  %2857 = icmp eq i32 %940, 1
-  br i1 %2857, label %2858, label %2887
-
-; <label>:2858                                    ; preds = %2856
-  %2859 = add i32 %13, -1
-  %2860 = uitofp i32 %2859 to float
-  %2861 = call float @dx.op.binary.f32(i32 35, float %936, float 0.000000e+00)  ; FMax(a,b)
-  %2862 = call float @dx.op.binary.f32(i32 36, float %2861, float %2860)  ; FMin(a,b)
-  %2863 = fptoui float %2862 to i32
-  %2864 = add i32 %15, -1
-  %2865 = uitofp i32 %2864 to float
-  %2866 = call float @dx.op.binary.f32(i32 35, float %2685, float 0.000000e+00)  ; FMax(a,b)
-  %2867 = call float @dx.op.binary.f32(i32 36, float %2866, float %2865)  ; FMin(a,b)
-  %2868 = fptoui float %2867 to i32
-  %2869 = uitofp i32 %2868 to float
-  %2870 = uitofp i32 %2863 to float
-  %2871 = fptoui float %45 to i32
-  %2872 = fptoui float %182 to i32
-  %2873 = fptoui float %2869 to i32
-  %2874 = fptoui float %2870 to i32
-  %2875 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2876 = extractvalue %dx.types.CBufRet.i32 %2875, 0
-  %2877 = extractvalue %dx.types.CBufRet.i32 %2875, 1
-  %2878 = extractvalue %dx.types.CBufRet.i32 %2875, 2
-  %2879 = extractvalue %dx.types.CBufRet.i32 %2875, 3
-  %2880 = mul i32 %2876, %2871
-  %2881 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2872, i32 %2877, i32 %2880)  ; IMad(a,b,c)
-  %2882 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2873, i32 %2878, i32 %2881)  ; IMad(a,b,c)
-  %2883 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2874, i32 %2879, i32 %2882)  ; IMad(a,b,c)
-  %2884 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2883, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2885 = extractvalue %dx.types.ResRet.i16 %2884, 0
-  %2886 = uitofp i16 %2885 to float
-  br label %2974
-
-; <label>:2887                                    ; preds = %2856
-  %2888 = icmp eq i32 %940, 2
-  br i1 %2888, label %2889, label %2974
-
-; <label>:2889                                    ; preds = %2887
-  %2890 = fsub fast float %22, %20
-  %2891 = fcmp fast olt float %936, %20
-  br i1 %2891, label %2892, label %2905
-
-; <label>:2892                                    ; preds = %2889
-  %2893 = fsub fast float %20, %936
-  %2894 = fdiv fast float %2893, %2890
-  %2895 = fptoui float %2894 to i32
-  %2896 = uitofp i32 %2895 to float
-  %2897 = fmul fast float %2896, %2890
-  %2898 = fsub fast float %2893, %2897
-  %2899 = and i32 %2895, 1
-  %2900 = icmp eq i32 %2899, 0
-  br i1 %2900, label %2901, label %2903
-
-; <label>:2901                                    ; preds = %2892
-  %2902 = fadd fast float %2898, %20
-  br label %2920
-
-; <label>:2903                                    ; preds = %2892
-  %2904 = fsub fast float %22, %2898
-  br label %2920
-
-; <label>:2905                                    ; preds = %2889
-  %2906 = fcmp fast ogt float %936, %22
-  br i1 %2906, label %2907, label %2920
-
-; <label>:2907                                    ; preds = %2905
-  %2908 = fsub fast float %936, %22
-  %2909 = fdiv fast float %2908, %2890
-  %2910 = fptoui float %2909 to i32
-  %2911 = uitofp i32 %2910 to float
-  %2912 = fmul fast float %2911, %2890
-  %2913 = fsub fast float %2908, %2912
-  %2914 = and i32 %2910, 1
-  %2915 = icmp eq i32 %2914, 0
-  br i1 %2915, label %2916, label %2918
-
-; <label>:2916                                    ; preds = %2907
-  %2917 = fsub fast float %22, %2913
-  br label %2920
-
-; <label>:2918                                    ; preds = %2907
-  %2919 = fadd fast float %2913, %20
-  br label %2920
-
-; <label>:2920                                    ; preds = %2918, %2916, %2905, %2903, %2901
-  %2921 = phi float [ %2902, %2901 ], [ %2904, %2903 ], [ %2917, %2916 ], [ %2919, %2918 ], [ %936, %2905 ]
-  %2922 = fptoui float %2921 to i32
-  %2923 = fsub fast float %24, %20
-  %2924 = fcmp fast olt float %2685, %20
-  br i1 %2924, label %2925, label %2938
-
-; <label>:2925                                    ; preds = %2920
-  %2926 = fsub fast float %20, %2685
-  %2927 = fdiv fast float %2926, %2923
-  %2928 = fptoui float %2927 to i32
-  %2929 = uitofp i32 %2928 to float
-  %2930 = fmul fast float %2929, %2923
-  %2931 = fsub fast float %2926, %2930
-  %2932 = and i32 %2928, 1
-  %2933 = icmp eq i32 %2932, 0
-  br i1 %2933, label %2934, label %2936
-
-; <label>:2934                                    ; preds = %2925
-  %2935 = fadd fast float %2931, %20
-  br label %2953
-
-; <label>:2936                                    ; preds = %2925
-  %2937 = fsub fast float %24, %2931
-  br label %2953
-
-; <label>:2938                                    ; preds = %2920
-  %2939 = fcmp fast ogt float %2685, %24
-  br i1 %2939, label %2940, label %2953
-
-; <label>:2940                                    ; preds = %2938
-  %2941 = fsub fast float %2685, %24
-  %2942 = fdiv fast float %2941, %2923
-  %2943 = fptoui float %2942 to i32
-  %2944 = uitofp i32 %2943 to float
-  %2945 = fmul fast float %2944, %2923
-  %2946 = fsub fast float %2941, %2945
-  %2947 = and i32 %2943, 1
-  %2948 = icmp eq i32 %2947, 0
-  br i1 %2948, label %2949, label %2951
-
-; <label>:2949                                    ; preds = %2940
-  %2950 = fsub fast float %24, %2946
-  br label %2953
-
-; <label>:2951                                    ; preds = %2940
-  %2952 = fadd fast float %2946, %20
-  br label %2953
-
-; <label>:2953                                    ; preds = %2951, %2949, %2938, %2936, %2934
-  %2954 = phi float [ %2935, %2934 ], [ %2937, %2936 ], [ %2950, %2949 ], [ %2952, %2951 ], [ %2685, %2938 ]
-  %2955 = fptoui float %2954 to i32
-  %2956 = uitofp i32 %2955 to float
-  %2957 = uitofp i32 %2922 to float
-  %2958 = fptoui float %45 to i32
-  %2959 = fptoui float %182 to i32
-  %2960 = fptoui float %2956 to i32
-  %2961 = fptoui float %2957 to i32
-  %2962 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2963 = extractvalue %dx.types.CBufRet.i32 %2962, 0
-  %2964 = extractvalue %dx.types.CBufRet.i32 %2962, 1
-  %2965 = extractvalue %dx.types.CBufRet.i32 %2962, 2
-  %2966 = extractvalue %dx.types.CBufRet.i32 %2962, 3
-  %2967 = mul i32 %2963, %2958
-  %2968 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2959, i32 %2964, i32 %2967)  ; IMad(a,b,c)
-  %2969 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2960, i32 %2965, i32 %2968)  ; IMad(a,b,c)
-  %2970 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2961, i32 %2966, i32 %2969)  ; IMad(a,b,c)
-  %2971 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2970, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2972 = extractvalue %dx.types.ResRet.i16 %2971, 0
-  %2973 = uitofp i16 %2972 to float
-  br label %2974
-
-; <label>:2974                                    ; preds = %2953, %2887, %2858, %2841, %2831
-  %2975 = phi float [ %2855, %2841 ], [ 0.000000e+00, %2831 ], [ %2886, %2858 ], [ %2973, %2953 ], [ 0.000000e+00, %2887 ]
-  br i1 %941, label %2976, label %3001
-
-; <label>:2976                                    ; preds = %2974
-  %2977 = fcmp fast oge float %1232, 0.000000e+00
-  %2978 = fptoui float %1232 to i32
-  %2979 = icmp ult i32 %2978, %13
-  %2980 = and i1 %2977, %2979
-  %2981 = fcmp fast oge float %2685, 0.000000e+00
-  %2982 = and i1 %2981, %2980
-  %2983 = fptoui float %2685 to i32
-  %2984 = icmp ult i32 %2983, %15
-  %2985 = and i1 %2984, %2982
-  br i1 %2985, label %2986, label %3119
-
-; <label>:2986                                    ; preds = %2976
-  %2987 = fptoui float %45 to i32
-  %2988 = fptoui float %182 to i32
-  %2989 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2990 = extractvalue %dx.types.CBufRet.i32 %2989, 0
-  %2991 = extractvalue %dx.types.CBufRet.i32 %2989, 1
-  %2992 = extractvalue %dx.types.CBufRet.i32 %2989, 2
-  %2993 = extractvalue %dx.types.CBufRet.i32 %2989, 3
-  %2994 = mul i32 %2990, %2987
-  %2995 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2988, i32 %2991, i32 %2994)  ; IMad(a,b,c)
-  %2996 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2983, i32 %2992, i32 %2995)  ; IMad(a,b,c)
-  %2997 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2978, i32 %2993, i32 %2996)  ; IMad(a,b,c)
-  %2998 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %2997, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2999 = extractvalue %dx.types.ResRet.i16 %2998, 0
-  %3000 = uitofp i16 %2999 to float
-  br label %3119
-
-; <label>:3001                                    ; preds = %2974
-  %3002 = icmp eq i32 %940, 1
-  br i1 %3002, label %3003, label %3032
-
-; <label>:3003                                    ; preds = %3001
-  %3004 = add i32 %13, -1
-  %3005 = uitofp i32 %3004 to float
-  %3006 = call float @dx.op.binary.f32(i32 35, float %1232, float 0.000000e+00)  ; FMax(a,b)
-  %3007 = call float @dx.op.binary.f32(i32 36, float %3006, float %3005)  ; FMin(a,b)
-  %3008 = fptoui float %3007 to i32
-  %3009 = add i32 %15, -1
-  %3010 = uitofp i32 %3009 to float
-  %3011 = call float @dx.op.binary.f32(i32 35, float %2685, float 0.000000e+00)  ; FMax(a,b)
-  %3012 = call float @dx.op.binary.f32(i32 36, float %3011, float %3010)  ; FMin(a,b)
-  %3013 = fptoui float %3012 to i32
-  %3014 = uitofp i32 %3013 to float
-  %3015 = uitofp i32 %3008 to float
-  %3016 = fptoui float %45 to i32
-  %3017 = fptoui float %182 to i32
-  %3018 = fptoui float %3014 to i32
-  %3019 = fptoui float %3015 to i32
-  %3020 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3021 = extractvalue %dx.types.CBufRet.i32 %3020, 0
-  %3022 = extractvalue %dx.types.CBufRet.i32 %3020, 1
-  %3023 = extractvalue %dx.types.CBufRet.i32 %3020, 2
-  %3024 = extractvalue %dx.types.CBufRet.i32 %3020, 3
-  %3025 = mul i32 %3021, %3016
-  %3026 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3017, i32 %3022, i32 %3025)  ; IMad(a,b,c)
-  %3027 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3018, i32 %3023, i32 %3026)  ; IMad(a,b,c)
-  %3028 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3019, i32 %3024, i32 %3027)  ; IMad(a,b,c)
-  %3029 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %3028, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3030 = extractvalue %dx.types.ResRet.i16 %3029, 0
-  %3031 = uitofp i16 %3030 to float
-  br label %3119
-
-; <label>:3032                                    ; preds = %3001
-  %3033 = icmp eq i32 %940, 2
-  br i1 %3033, label %3034, label %3119
-
-; <label>:3034                                    ; preds = %3032
-  %3035 = fsub fast float %22, %20
-  %3036 = fcmp fast olt float %1232, %20
-  br i1 %3036, label %3037, label %3050
-
-; <label>:3037                                    ; preds = %3034
-  %3038 = fsub fast float %20, %1232
-  %3039 = fdiv fast float %3038, %3035
-  %3040 = fptoui float %3039 to i32
-  %3041 = uitofp i32 %3040 to float
-  %3042 = fmul fast float %3041, %3035
-  %3043 = fsub fast float %3038, %3042
-  %3044 = and i32 %3040, 1
-  %3045 = icmp eq i32 %3044, 0
-  br i1 %3045, label %3046, label %3048
-
-; <label>:3046                                    ; preds = %3037
-  %3047 = fadd fast float %3043, %20
-  br label %3065
-
-; <label>:3048                                    ; preds = %3037
-  %3049 = fsub fast float %22, %3043
-  br label %3065
-
-; <label>:3050                                    ; preds = %3034
-  %3051 = fcmp fast ogt float %1232, %22
-  br i1 %3051, label %3052, label %3065
-
-; <label>:3052                                    ; preds = %3050
-  %3053 = fsub fast float %1232, %22
-  %3054 = fdiv fast float %3053, %3035
-  %3055 = fptoui float %3054 to i32
-  %3056 = uitofp i32 %3055 to float
-  %3057 = fmul fast float %3056, %3035
-  %3058 = fsub fast float %3053, %3057
-  %3059 = and i32 %3055, 1
-  %3060 = icmp eq i32 %3059, 0
-  br i1 %3060, label %3061, label %3063
-
-; <label>:3061                                    ; preds = %3052
-  %3062 = fsub fast float %22, %3058
-  br label %3065
-
-; <label>:3063                                    ; preds = %3052
-  %3064 = fadd fast float %3058, %20
-  br label %3065
-
-; <label>:3065                                    ; preds = %3063, %3061, %3050, %3048, %3046
-  %3066 = phi float [ %3047, %3046 ], [ %3049, %3048 ], [ %3062, %3061 ], [ %3064, %3063 ], [ %1232, %3050 ]
-  %3067 = fptoui float %3066 to i32
-  %3068 = fsub fast float %24, %20
-  %3069 = fcmp fast olt float %2685, %20
-  br i1 %3069, label %3070, label %3083
-
-; <label>:3070                                    ; preds = %3065
-  %3071 = fsub fast float %20, %2685
-  %3072 = fdiv fast float %3071, %3068
-  %3073 = fptoui float %3072 to i32
-  %3074 = uitofp i32 %3073 to float
-  %3075 = fmul fast float %3074, %3068
-  %3076 = fsub fast float %3071, %3075
-  %3077 = and i32 %3073, 1
-  %3078 = icmp eq i32 %3077, 0
-  br i1 %3078, label %3079, label %3081
-
-; <label>:3079                                    ; preds = %3070
-  %3080 = fadd fast float %3076, %20
-  br label %3098
-
-; <label>:3081                                    ; preds = %3070
-  %3082 = fsub fast float %24, %3076
-  br label %3098
-
-; <label>:3083                                    ; preds = %3065
-  %3084 = fcmp fast ogt float %2685, %24
-  br i1 %3084, label %3085, label %3098
-
-; <label>:3085                                    ; preds = %3083
-  %3086 = fsub fast float %2685, %24
-  %3087 = fdiv fast float %3086, %3068
-  %3088 = fptoui float %3087 to i32
-  %3089 = uitofp i32 %3088 to float
-  %3090 = fmul fast float %3089, %3068
-  %3091 = fsub fast float %3086, %3090
-  %3092 = and i32 %3088, 1
-  %3093 = icmp eq i32 %3092, 0
-  br i1 %3093, label %3094, label %3096
-
-; <label>:3094                                    ; preds = %3085
-  %3095 = fsub fast float %24, %3091
-  br label %3098
-
-; <label>:3096                                    ; preds = %3085
-  %3097 = fadd fast float %3091, %20
-  br label %3098
-
-; <label>:3098                                    ; preds = %3096, %3094, %3083, %3081, %3079
-  %3099 = phi float [ %3080, %3079 ], [ %3082, %3081 ], [ %3095, %3094 ], [ %3097, %3096 ], [ %2685, %3083 ]
-  %3100 = fptoui float %3099 to i32
-  %3101 = uitofp i32 %3100 to float
-  %3102 = uitofp i32 %3067 to float
-  %3103 = fptoui float %45 to i32
-  %3104 = fptoui float %182 to i32
-  %3105 = fptoui float %3101 to i32
-  %3106 = fptoui float %3102 to i32
-  %3107 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3108 = extractvalue %dx.types.CBufRet.i32 %3107, 0
-  %3109 = extractvalue %dx.types.CBufRet.i32 %3107, 1
-  %3110 = extractvalue %dx.types.CBufRet.i32 %3107, 2
-  %3111 = extractvalue %dx.types.CBufRet.i32 %3107, 3
-  %3112 = mul i32 %3108, %3103
-  %3113 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3104, i32 %3109, i32 %3112)  ; IMad(a,b,c)
-  %3114 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3105, i32 %3110, i32 %3113)  ; IMad(a,b,c)
-  %3115 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3106, i32 %3111, i32 %3114)  ; IMad(a,b,c)
-  %3116 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %3115, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3117 = extractvalue %dx.types.ResRet.i16 %3116, 0
-  %3118 = uitofp i16 %3117 to float
-  br label %3119
-
-; <label>:3119                                    ; preds = %3098, %3032, %3003, %2986, %2976
-  %3120 = phi float [ %3000, %2986 ], [ 0.000000e+00, %2976 ], [ %3031, %3003 ], [ %3118, %3098 ], [ 0.000000e+00, %3032 ]
-  br i1 %941, label %3121, label %3146
-
-; <label>:3121                                    ; preds = %3119
-  %3122 = fcmp fast oge float %1378, 0.000000e+00
-  %3123 = fptoui float %1378 to i32
-  %3124 = icmp ult i32 %3123, %13
-  %3125 = and i1 %3122, %3124
-  %3126 = fcmp fast oge float %2685, 0.000000e+00
-  %3127 = and i1 %3126, %3125
-  %3128 = fptoui float %2685 to i32
-  %3129 = icmp ult i32 %3128, %15
-  %3130 = and i1 %3129, %3127
-  br i1 %3130, label %3131, label %3264
-
-; <label>:3131                                    ; preds = %3121
-  %3132 = fptoui float %45 to i32
-  %3133 = fptoui float %182 to i32
-  %3134 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3135 = extractvalue %dx.types.CBufRet.i32 %3134, 0
-  %3136 = extractvalue %dx.types.CBufRet.i32 %3134, 1
-  %3137 = extractvalue %dx.types.CBufRet.i32 %3134, 2
-  %3138 = extractvalue %dx.types.CBufRet.i32 %3134, 3
-  %3139 = mul i32 %3135, %3132
-  %3140 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3133, i32 %3136, i32 %3139)  ; IMad(a,b,c)
-  %3141 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3128, i32 %3137, i32 %3140)  ; IMad(a,b,c)
-  %3142 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3123, i32 %3138, i32 %3141)  ; IMad(a,b,c)
-  %3143 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %3142, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3144 = extractvalue %dx.types.ResRet.i16 %3143, 0
-  %3145 = uitofp i16 %3144 to float
-  br label %3264
-
-; <label>:3146                                    ; preds = %3119
-  %3147 = icmp eq i32 %940, 1
-  br i1 %3147, label %3148, label %3177
-
-; <label>:3148                                    ; preds = %3146
-  %3149 = add i32 %13, -1
-  %3150 = uitofp i32 %3149 to float
-  %3151 = call float @dx.op.binary.f32(i32 35, float %1378, float 0.000000e+00)  ; FMax(a,b)
-  %3152 = call float @dx.op.binary.f32(i32 36, float %3151, float %3150)  ; FMin(a,b)
-  %3153 = fptoui float %3152 to i32
-  %3154 = add i32 %15, -1
-  %3155 = uitofp i32 %3154 to float
-  %3156 = call float @dx.op.binary.f32(i32 35, float %2685, float 0.000000e+00)  ; FMax(a,b)
-  %3157 = call float @dx.op.binary.f32(i32 36, float %3156, float %3155)  ; FMin(a,b)
-  %3158 = fptoui float %3157 to i32
-  %3159 = uitofp i32 %3158 to float
-  %3160 = uitofp i32 %3153 to float
-  %3161 = fptoui float %45 to i32
-  %3162 = fptoui float %182 to i32
-  %3163 = fptoui float %3159 to i32
-  %3164 = fptoui float %3160 to i32
-  %3165 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3166 = extractvalue %dx.types.CBufRet.i32 %3165, 0
-  %3167 = extractvalue %dx.types.CBufRet.i32 %3165, 1
-  %3168 = extractvalue %dx.types.CBufRet.i32 %3165, 2
-  %3169 = extractvalue %dx.types.CBufRet.i32 %3165, 3
-  %3170 = mul i32 %3166, %3161
-  %3171 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3162, i32 %3167, i32 %3170)  ; IMad(a,b,c)
-  %3172 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3163, i32 %3168, i32 %3171)  ; IMad(a,b,c)
-  %3173 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3164, i32 %3169, i32 %3172)  ; IMad(a,b,c)
-  %3174 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %3173, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3175 = extractvalue %dx.types.ResRet.i16 %3174, 0
-  %3176 = uitofp i16 %3175 to float
-  br label %3264
-
-; <label>:3177                                    ; preds = %3146
-  %3178 = icmp eq i32 %940, 2
-  br i1 %3178, label %3179, label %3264
-
-; <label>:3179                                    ; preds = %3177
-  %3180 = fsub fast float %22, %20
-  %3181 = fcmp fast olt float %1378, %20
-  br i1 %3181, label %3182, label %3195
-
-; <label>:3182                                    ; preds = %3179
-  %3183 = fsub fast float %20, %1378
-  %3184 = fdiv fast float %3183, %3180
-  %3185 = fptoui float %3184 to i32
-  %3186 = uitofp i32 %3185 to float
-  %3187 = fmul fast float %3186, %3180
-  %3188 = fsub fast float %3183, %3187
-  %3189 = and i32 %3185, 1
-  %3190 = icmp eq i32 %3189, 0
-  br i1 %3190, label %3191, label %3193
-
-; <label>:3191                                    ; preds = %3182
-  %3192 = fadd fast float %3188, %20
-  br label %3210
-
-; <label>:3193                                    ; preds = %3182
-  %3194 = fsub fast float %22, %3188
-  br label %3210
-
-; <label>:3195                                    ; preds = %3179
-  %3196 = fcmp fast ogt float %1378, %22
-  br i1 %3196, label %3197, label %3210
-
-; <label>:3197                                    ; preds = %3195
-  %3198 = fsub fast float %1378, %22
-  %3199 = fdiv fast float %3198, %3180
-  %3200 = fptoui float %3199 to i32
-  %3201 = uitofp i32 %3200 to float
-  %3202 = fmul fast float %3201, %3180
-  %3203 = fsub fast float %3198, %3202
-  %3204 = and i32 %3200, 1
-  %3205 = icmp eq i32 %3204, 0
-  br i1 %3205, label %3206, label %3208
-
-; <label>:3206                                    ; preds = %3197
-  %3207 = fsub fast float %22, %3203
-  br label %3210
-
-; <label>:3208                                    ; preds = %3197
-  %3209 = fadd fast float %3203, %20
-  br label %3210
-
-; <label>:3210                                    ; preds = %3208, %3206, %3195, %3193, %3191
-  %3211 = phi float [ %3192, %3191 ], [ %3194, %3193 ], [ %3207, %3206 ], [ %3209, %3208 ], [ %1378, %3195 ]
-  %3212 = fptoui float %3211 to i32
-  %3213 = fsub fast float %24, %20
-  %3214 = fcmp fast olt float %2685, %20
-  br i1 %3214, label %3215, label %3228
-
-; <label>:3215                                    ; preds = %3210
-  %3216 = fsub fast float %20, %2685
-  %3217 = fdiv fast float %3216, %3213
-  %3218 = fptoui float %3217 to i32
-  %3219 = uitofp i32 %3218 to float
-  %3220 = fmul fast float %3219, %3213
-  %3221 = fsub fast float %3216, %3220
-  %3222 = and i32 %3218, 1
-  %3223 = icmp eq i32 %3222, 0
-  br i1 %3223, label %3224, label %3226
-
-; <label>:3224                                    ; preds = %3215
-  %3225 = fadd fast float %3221, %20
-  br label %3243
-
-; <label>:3226                                    ; preds = %3215
-  %3227 = fsub fast float %24, %3221
-  br label %3243
-
-; <label>:3228                                    ; preds = %3210
-  %3229 = fcmp fast ogt float %2685, %24
-  br i1 %3229, label %3230, label %3243
-
-; <label>:3230                                    ; preds = %3228
-  %3231 = fsub fast float %2685, %24
-  %3232 = fdiv fast float %3231, %3213
-  %3233 = fptoui float %3232 to i32
-  %3234 = uitofp i32 %3233 to float
-  %3235 = fmul fast float %3234, %3213
-  %3236 = fsub fast float %3231, %3235
-  %3237 = and i32 %3233, 1
-  %3238 = icmp eq i32 %3237, 0
-  br i1 %3238, label %3239, label %3241
-
-; <label>:3239                                    ; preds = %3230
-  %3240 = fsub fast float %24, %3236
-  br label %3243
-
-; <label>:3241                                    ; preds = %3230
-  %3242 = fadd fast float %3236, %20
-  br label %3243
-
-; <label>:3243                                    ; preds = %3241, %3239, %3228, %3226, %3224
-  %3244 = phi float [ %3225, %3224 ], [ %3227, %3226 ], [ %3240, %3239 ], [ %3242, %3241 ], [ %2685, %3228 ]
-  %3245 = fptoui float %3244 to i32
-  %3246 = uitofp i32 %3245 to float
-  %3247 = uitofp i32 %3212 to float
-  %3248 = fptoui float %45 to i32
-  %3249 = fptoui float %182 to i32
-  %3250 = fptoui float %3246 to i32
-  %3251 = fptoui float %3247 to i32
-  %3252 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3253 = extractvalue %dx.types.CBufRet.i32 %3252, 0
-  %3254 = extractvalue %dx.types.CBufRet.i32 %3252, 1
-  %3255 = extractvalue %dx.types.CBufRet.i32 %3252, 2
-  %3256 = extractvalue %dx.types.CBufRet.i32 %3252, 3
-  %3257 = mul i32 %3253, %3248
-  %3258 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3249, i32 %3254, i32 %3257)  ; IMad(a,b,c)
-  %3259 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3250, i32 %3255, i32 %3258)  ; IMad(a,b,c)
-  %3260 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3251, i32 %3256, i32 %3259)  ; IMad(a,b,c)
-  %3261 = call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %3, i32 %3260, i32 0, i8 1, i32 2)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3262 = extractvalue %dx.types.ResRet.i16 %3261, 0
-  %3263 = uitofp i16 %3262 to float
-  br label %3264
-
-; <label>:3264                                    ; preds = %3243, %3177, %3148, %3131, %3121
-  %3265 = phi float [ %3145, %3131 ], [ 0.000000e+00, %3121 ], [ %3176, %3148 ], [ %3263, %3243 ], [ 0.000000e+00, %3177 ]
-  %3266 = call float @dx.op.unary.f32(i32 22, float %180)  ; Frc(value)
-  %3267 = call float @dx.op.unary.f32(i32 22, float %181)  ; Frc(value)
-  %3268 = fmul fast float %3267, %3267
-  %3269 = fmul fast float %3268, %3267
-  %3270 = fmul fast float %1086, -7.500000e-01
-  %3271 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2249, float %3270)  ; FMad(a,b,c)
-  %3272 = fmul fast float %1086, 1.500000e+00
-  %3273 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1668, float %3272)  ; FMad(a,b,c)
-  %3274 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2249, float %3273)  ; FMad(a,b,c)
-  %3275 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %2830, float %3274)  ; FMad(a,b,c)
-  %3276 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1668, float %3270)  ; FMad(a,b,c)
-  %3277 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2249, float %3276)  ; FMad(a,b,c)
-  %3278 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2830, float %3277)  ; FMad(a,b,c)
-  %3279 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3267, float %3268, float %3269, float %1668, float %3271, float %3275, float %3278)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3280 = fmul fast float %1231, -7.500000e-01
-  %3281 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2394, float %3280)  ; FMad(a,b,c)
-  %3282 = fmul fast float %1231, 1.500000e+00
-  %3283 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1813, float %3282)  ; FMad(a,b,c)
-  %3284 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2394, float %3283)  ; FMad(a,b,c)
-  %3285 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %2975, float %3284)  ; FMad(a,b,c)
-  %3286 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1813, float %3280)  ; FMad(a,b,c)
-  %3287 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2394, float %3286)  ; FMad(a,b,c)
-  %3288 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2975, float %3287)  ; FMad(a,b,c)
-  %3289 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3267, float %3268, float %3269, float %1813, float %3281, float %3285, float %3288)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3290 = fmul fast float %1377, -7.500000e-01
-  %3291 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2539, float %3290)  ; FMad(a,b,c)
-  %3292 = fmul fast float %1377, 1.500000e+00
-  %3293 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1958, float %3292)  ; FMad(a,b,c)
-  %3294 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2539, float %3293)  ; FMad(a,b,c)
-  %3295 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3120, float %3294)  ; FMad(a,b,c)
-  %3296 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1958, float %3290)  ; FMad(a,b,c)
-  %3297 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2539, float %3296)  ; FMad(a,b,c)
-  %3298 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3120, float %3297)  ; FMad(a,b,c)
-  %3299 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3267, float %3268, float %3269, float %1958, float %3291, float %3295, float %3298)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3300 = fmul fast float %1523, -7.500000e-01
-  %3301 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2684, float %3300)  ; FMad(a,b,c)
-  %3302 = fmul fast float %1523, 1.500000e+00
-  %3303 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %2103, float %3302)  ; FMad(a,b,c)
-  %3304 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2684, float %3303)  ; FMad(a,b,c)
-  %3305 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3265, float %3304)  ; FMad(a,b,c)
-  %3306 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %2103, float %3300)  ; FMad(a,b,c)
-  %3307 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2684, float %3306)  ; FMad(a,b,c)
-  %3308 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3265, float %3307)  ; FMad(a,b,c)
-  %3309 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3267, float %3268, float %3269, float %2103, float %3301, float %3305, float %3308)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3310 = fmul fast float %3266, %3266
-  %3311 = fmul fast float %3310, %3266
-  %3312 = fmul fast float %3279, -7.500000e-01
-  %3313 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3299, float %3312)  ; FMad(a,b,c)
-  %3314 = fmul fast float %3279, 1.500000e+00
-  %3315 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %3289, float %3314)  ; FMad(a,b,c)
-  %3316 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %3299, float %3315)  ; FMad(a,b,c)
-  %3317 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3309, float %3316)  ; FMad(a,b,c)
-  %3318 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %3289, float %3312)  ; FMad(a,b,c)
-  %3319 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %3299, float %3318)  ; FMad(a,b,c)
-  %3320 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3309, float %3319)  ; FMad(a,b,c)
-  %3321 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3266, float %3310, float %3311, float %3289, float %3313, float %3317, float %3320)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3322 = fptoui float %3321 to i16
-  call void @dx.op.rawBufferStore.i16(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i16 %3322, i16 undef, i16 undef, i16 undef, i8 1, i32 2)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3323
-
-; <label>:3323                                    ; preds = %3264, %933, %919, %329, %0
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.threadId.i32(i32, i32) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32, %dx.types.Handle, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.rawBufferStore.i16(i32, %dx.types.Handle, i32, i32, i16, i16, i16, i16, i8, i32) #2
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.unary.f32(i32, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.tertiary.f32(i32, float, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.binary.f32(i32, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.dot4.f32(i32, float, float, float, float, float, float, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.tertiary.i32(i32, i32, i32, i32) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32, %dx.types.Handle, i32) #1
-
-; Function Attrs: nounwind readonly
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32, %dx.types.Handle, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind readnone
-declare double @dx.op.makeDouble.f64(i32, i32, i32) #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.valver = !{!2}
-!dx.shaderModel = !{!3}
-!dx.resources = !{!4}
-!dx.entryPoints = !{!12}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 1, i32 2}
-!2 = !{i32 1, i32 6}
-!3 = !{!"cs", i32 6, i32 2}
-!4 = !{null, !5, !10, null}
-!5 = !{!6, !7, !9}
-!6 = !{i32 0, %"class.RWStructuredBuffer<unsigned short>"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !1}
-!7 = !{i32 1, %"class.RWStructuredBuffer<double>"* undef, !"", i32 0, i32 1, i32 1, i32 12, i1 false, i1 false, i1 false, !8}
-!8 = !{i32 1, i32 8}
-!9 = !{i32 2, %"class.RWStructuredBuffer<unsigned short>"* undef, !"", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !1}
-!10 = !{!11}
-!11 = !{i32 0, %Constants* undef, !"", i32 0, i32 0, i32 1, i32 116, null}
-!12 = !{void ()* @GridSample, !"GridSample", null, !4, !13}
-!13 = !{i32 0, i64 8388660, i32 4, !14}
-!14 = !{i32 64, i32 1, i32 1}
-
-#endif
-
-const unsigned char g_GridSample[] = {
-  0x44, 0x58, 0x42, 0x43, 0xfe, 0xb4, 0x29, 0xe7, 0xf7, 0xf9, 0xe7, 0xfa,
-  0x76, 0x31, 0x17, 0xfe, 0x41, 0x54, 0x68, 0x3d, 0x01, 0x00, 0x00, 0x00,
-  0x94, 0x54, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
-  0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
-  0x18, 0x01, 0x00, 0x00, 0x34, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30,
-  0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30,
-  0xa8, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-  0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x97, 0x56, 0xb4, 0x47, 0x35, 0x1c, 0xc4, 0xd1,
-  0x51, 0x8f, 0xc6, 0x6a, 0x9e, 0xb7, 0xe5, 0xc7, 0x44, 0x58, 0x49, 0x4c,
-  0x58, 0x53, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0xd6, 0x14, 0x00, 0x00,
-  0x44, 0x58, 0x49, 0x4c, 0x02, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-  0x40, 0x53, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00,
-  0xcd, 0x14, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
-  0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49,
-  0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19,
-  0x1e, 0x04, 0x8b, 0x62, 0x80, 0x18, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42,
-  0xc4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x62, 0x88,
-  0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42,
-  0xe4, 0x48, 0x0e, 0x90, 0x11, 0x23, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c,
-  0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x31, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00,
-  0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07,
-  0x40, 0x02, 0xa8, 0x0d, 0x86, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20,
-  0x01, 0xd5, 0x06, 0x62, 0xf8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x90, 0x00,
-  0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42,
-  0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00,
-  0x5b, 0x00, 0x00, 0x00, 0x32, 0x22, 0x88, 0x09, 0x20, 0x64, 0x85, 0x04,
-  0x13, 0x23, 0xa4, 0x84, 0x04, 0x13, 0x23, 0xe3, 0x84, 0xa1, 0x90, 0x14,
-  0x12, 0x4c, 0x8c, 0x8c, 0x0b, 0x84, 0xc4, 0x4c, 0x10, 0xc0, 0xc1, 0x08,
-  0x40, 0x09, 0x00, 0x0a, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0xc3, 0x30, 0x10,
-  0x31, 0x47, 0x00, 0xdd, 0x34, 0x5c, 0xfe, 0x84, 0x3d, 0x84, 0xe4, 0xaf,
-  0x84, 0xb4, 0x12, 0x93, 0x8f, 0xd4, 0x3a, 0x2a, 0x0c, 0xc3, 0x30, 0x86,
-  0x39, 0x02, 0x84, 0x90, 0x7b, 0x86, 0xcb, 0x9f, 0xb0, 0x87, 0x90, 0xfc,
-  0x10, 0x68, 0x86, 0x85, 0x40, 0x41, 0x52, 0x8e, 0x63, 0x50, 0x86, 0x01,
-  0x19, 0x68, 0x29, 0x0b, 0x30, 0x28, 0xc3, 0x60, 0x18, 0x86, 0x81, 0x0c,
-  0xd4, 0xcc, 0x00, 0x94, 0xe1, 0x19, 0x1e, 0x82, 0x4a, 0xf1, 0x0c, 0xcf,
-  0xf3, 0x90, 0x54, 0x88, 0x67, 0x78, 0x1e, 0xa2, 0x8a, 0xf2, 0x0c, 0xcf,
-  0xf3, 0x3c, 0xcf, 0xf3, 0x3c, 0x64, 0x95, 0x62, 0x18, 0x86, 0x61, 0x20,
-  0xec, 0xa8, 0xe1, 0xf2, 0x27, 0xec, 0x21, 0x24, 0x9f, 0xdb, 0xa8, 0x62,
-  0x25, 0x26, 0x1f, 0xb9, 0x6d, 0x44, 0x0c, 0xc3, 0x30, 0x14, 0x42, 0x1b,
-  0x94, 0x81, 0xb6, 0x39, 0x82, 0xa0, 0x18, 0xca, 0x80, 0x0c, 0x43, 0x47,
-  0xde, 0x4d, 0xc3, 0xe5, 0x4f, 0xd8, 0x43, 0x48, 0xfe, 0x4a, 0x48, 0x2b,
-  0x31, 0xf9, 0xc8, 0x6d, 0xa3, 0x62, 0x18, 0x86, 0x61, 0x28, 0x07, 0x38,
-  0x28, 0xc3, 0x80, 0x0c, 0x14, 0x0e, 0x01, 0x14, 0x62, 0x1c, 0x86, 0x81,
-  0xc8, 0x81, 0x80, 0x99, 0xd0, 0x60, 0x1c, 0xd8, 0x21, 0x1c, 0xe6, 0x61,
-  0x1e, 0xdc, 0x40, 0x16, 0x6e, 0x61, 0x16, 0xe8, 0x41, 0x1e, 0xea, 0x61,
-  0x1c, 0xe8, 0xa1, 0x1e, 0xe4, 0xa1, 0x1c, 0xc8, 0x41, 0x14, 0xea, 0xc1,
-  0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0x81, 0x0f, 0xea, 0xc1, 0x1d, 0xe6, 0x21,
-  0x1d, 0xce, 0xc1, 0x1d, 0xca, 0x81, 0x1c, 0xc0, 0x60, 0x1e, 0xd0, 0xe1,
-  0x1d, 0xe4, 0x81, 0x1e, 0xfc, 0x00, 0x05, 0x06, 0x9d, 0x33, 0x81, 0xc1,
-  0x38, 0xb0, 0x43, 0x38, 0xcc, 0xc3, 0x3c, 0xb8, 0x81, 0x2c, 0xdc, 0xc2,
-  0x2c, 0xd0, 0x83, 0x3c, 0xd4, 0xc3, 0x38, 0xd0, 0x43, 0x3d, 0xc8, 0x43,
-  0x39, 0x90, 0x83, 0x28, 0xd4, 0x83, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x03,
-  0x1f, 0x90, 0xc3, 0x3b, 0xd4, 0x83, 0x38, 0xb0, 0x43, 0x39, 0xf8, 0x01,
-  0x0a, 0x46, 0x4a, 0x87, 0x11, 0x88, 0xe1, 0x12, 0xce, 0x69, 0xa4, 0x09,
-  0x68, 0x26, 0x09, 0x2d, 0xc3, 0x30, 0x0c, 0xeb, 0xba, 0xae, 0xeb, 0x3a,
-  0x10, 0x3b, 0x47, 0x00, 0x0a, 0x53, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0,
-  0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0,
-  0x87, 0x0d, 0xae, 0x50, 0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d,
-  0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d,
-  0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78,
-  0xa0, 0x07, 0x78, 0xd0, 0x06, 0xe9, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x71,
-  0x60, 0x07, 0x6d, 0x90, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72,
-  0xd0, 0x06, 0xe9, 0x60, 0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d,
-  0x60, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xd0, 0x06, 0xe6,
-  0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x76,
-  0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xee, 0x80, 0x07, 0x7a,
-  0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x7a, 0x60, 0x07, 0x74,
-  0x30, 0xe4, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x60, 0xc8, 0x43, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0xc0, 0x90, 0x67, 0x01, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x80, 0x21, 0x4f, 0x03, 0x04, 0xc0, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x43, 0x1e, 0x08, 0x08, 0x80, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x3c, 0x12, 0x10, 0x00, 0x01,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x79, 0x28, 0x20, 0x00,
-  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xf2, 0x58, 0x40,
-  0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xe4, 0xc1,
-  0x80, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8,
-  0xb3, 0x01, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0,
-  0x90, 0xc7, 0x03, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x80, 0x21, 0x4f, 0x18, 0x00, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0xc0, 0x90, 0x87, 0x0c, 0x80, 0x00, 0x08, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x20, 0x0b, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00,
-  0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47,
-  0xc6, 0x04, 0x43, 0x1a, 0x4a, 0xa0, 0x08, 0x8a, 0x61, 0x04, 0xa0, 0x30,
-  0x0a, 0xa2, 0xd0, 0x03, 0x0a, 0xa1, 0x00, 0x03, 0xa8, 0x1b, 0x01, 0x20,
-  0xb7, 0xd0, 0x01, 0x01, 0x11, 0x48, 0x9d, 0x01, 0xa0, 0x76, 0x06, 0x80,
-  0xd0, 0x19, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00,
-  0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b,
-  0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99,
-  0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62,
-  0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73,
-  0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84, 0xa1, 0x99, 0x20, 0x0c,
-  0xce, 0x06, 0x61, 0x20, 0x26, 0x08, 0xc3, 0xb3, 0x41, 0x18, 0x0c, 0x0a,
-  0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08, 0x03, 0x34, 0x41, 0x40,
-  0x03, 0x8d, 0xc0, 0x04, 0x61, 0x88, 0x26, 0x08, 0x9d, 0xb5, 0x61, 0x51,
-  0x16, 0x46, 0x51, 0x86, 0xc6, 0x71, 0x9c, 0x62, 0x82, 0xa0, 0x06, 0xd8,
-  0x04, 0x61, 0x90, 0x36, 0x08, 0x43, 0xb4, 0x61, 0x19, 0x20, 0x46, 0x19,
-  0x86, 0xc6, 0x71, 0x1c, 0x69, 0xc3, 0x42, 0x2c, 0x8c, 0x42, 0x0c, 0x8d,
-  0xe3, 0x38, 0xc5, 0x86, 0xe1, 0x99, 0xa8, 0x09, 0x42, 0x1b, 0x64, 0x13,
-  0x84, 0x61, 0xda, 0x80, 0x28, 0x16, 0xa3, 0x28, 0xc3, 0x05, 0x6c, 0x08,
-  0xb0, 0x0d, 0x04, 0x50, 0x65, 0xc0, 0x04, 0x41, 0x00, 0xa8, 0x1c, 0xc9,
-  0xa5, 0x91, 0x4d, 0x85, 0xb5, 0xc1, 0xb1, 0x95, 0x4d, 0x10, 0xdc, 0xe0,
-  0x9a, 0x20, 0x0c, 0xd4, 0x04, 0x61, 0xa8, 0x36, 0x0c, 0xdf, 0x30, 0x6c,
-  0x20, 0x94, 0xce, 0x03, 0x83, 0x0d, 0xc5, 0xc6, 0x01, 0x5a, 0x18, 0x54,
-  0x61, 0x63, 0xb3, 0x6b, 0x73, 0x49, 0x23, 0x2b, 0x73, 0xa3, 0x9b, 0x12,
-  0x04, 0x55, 0xc8, 0xf0, 0x5c, 0xec, 0xca, 0xe4, 0xe6, 0xd2, 0xde, 0xdc,
-  0xa6, 0x04, 0x44, 0x13, 0x32, 0x3c, 0x17, 0xbb, 0x30, 0x36, 0xbb, 0x32,
-  0xb9, 0x29, 0x81, 0x51, 0x87, 0x0c, 0xcf, 0x65, 0x0e, 0x2d, 0x8c, 0xac,
-  0x4c, 0xae, 0xe9, 0x8d, 0xac, 0x8c, 0x6d, 0x4a, 0x80, 0x94, 0x21, 0xc3,
-  0x73, 0x91, 0x2b, 0x9b, 0x7b, 0xab, 0x93, 0x1b, 0x2b, 0x9b, 0x9b, 0x12,
-  0x64, 0x75, 0xc8, 0xf0, 0x5c, 0xca, 0xdc, 0xe8, 0xe4, 0xf2, 0xa0, 0xde,
-  0xd2, 0xdc, 0xe8, 0xe6, 0xa6, 0x04, 0x61, 0x00, 0x79, 0x18, 0x00, 0x00,
-  0x51, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66,
-  0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07,
-  0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10,
-  0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce,
-  0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b,
-  0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c,
-  0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07,
-  0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11,
-  0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0,
-  0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8,
-  0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b,
-  0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b,
-  0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87,
-  0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07,
-  0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87,
-  0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81,
-  0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30,
-  0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4,
-  0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca,
-  0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39,
-  0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b,
-  0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b,
-  0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87,
-  0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20,
-  0x0e, 0xe7, 0xe0, 0x06, 0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90,
-  0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x30, 0x83, 0x81, 0xc8, 0x01, 0x1f, 0xdc,
-  0x40, 0x1c, 0xe4, 0xa1, 0x1c, 0xc2, 0x61, 0x1d, 0xdc, 0x40, 0x1c, 0xe4,
-  0x01, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00,
-  0x06, 0xa0, 0x80, 0x11, 0x32, 0xb0, 0x00, 0xf3, 0x2c, 0x84, 0x19, 0x40,
-  0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x20, 0x0d, 0x10, 0x61, 0x7e, 0x71, 0xdb,
-  0x96, 0xb0, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10, 0x50, 0x45, 0x41, 0x44,
-  0xa5, 0x03, 0x0c, 0x25, 0x61, 0x00, 0x02, 0xe6, 0x23, 0xb7, 0x6d, 0x0a,
-  0xd2, 0x70, 0xf9, 0xce, 0xe3, 0x0b, 0x11, 0x01, 0x4c, 0x44, 0x08, 0x34,
-  0xc3, 0x42, 0xd8, 0x81, 0x33, 0x5c, 0xbe, 0xf3, 0xf8, 0x83, 0x33, 0xe1,
-  0x7e, 0x71, 0xdb, 0xc6, 0x40, 0x0d, 0x97, 0xef, 0x3c, 0x3e, 0x03, 0x28,
-  0x44, 0xe7, 0x50, 0xc1, 0x42, 0xf8, 0x85, 0x8e, 0x9b, 0xc0, 0x35, 0x5c,
-  0xbe, 0xf3, 0xf8, 0x11, 0x60, 0x6d, 0x54, 0x51, 0x10, 0x51, 0xe9, 0x00,
-  0x83, 0x8f, 0xd4, 0xba, 0x2d, 0x5c, 0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x01,
-  0xd6, 0x46, 0x15, 0x05, 0x11, 0x95, 0x0e, 0x30, 0xf8, 0xc8, 0x6d, 0xdb,
-  0x00, 0x36, 0x5c, 0xbe, 0xf3, 0xf8, 0x11, 0x60, 0x6d, 0x54, 0x51, 0x10,
-  0x11, 0x3b, 0x39, 0x11, 0xe1, 0x23, 0xb5, 0x6e, 0x05, 0xd2, 0x70, 0xf9,
-  0xce, 0xe3, 0x4f, 0x44, 0x34, 0x21, 0x40, 0x84, 0xf9, 0xc5, 0x6d, 0x1b,
-  0x82, 0x34, 0x5c, 0xbe, 0xf3, 0xf8, 0x13, 0x11, 0x4d, 0x08, 0x10, 0x61,
-  0x3e, 0x72, 0xdb, 0x16, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xfe, 0x74, 0x44,
-  0x04, 0x30, 0x88, 0x83, 0x8f, 0xdc, 0xb6, 0x11, 0x3c, 0xc3, 0xe5, 0x3b,
-  0x8f, 0x4f, 0x35, 0x40, 0x84, 0xf9, 0xc5, 0x6d, 0x03, 0x00, 0x00, 0x00,
-  0x61, 0x20, 0x00, 0x00, 0x15, 0x13, 0x00, 0x00, 0x13, 0x04, 0x24, 0x14,
-  0x0b, 0x04, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x34, 0x14, 0x58, 0xd9,
-  0x95, 0xa5, 0x40, 0x0d, 0x94, 0x51, 0x21, 0x15, 0x57, 0xc1, 0x95, 0x5c,
-  0xd9, 0x14, 0x4b, 0x61, 0x0a, 0x94, 0x72, 0x40, 0xd1, 0x94, 0x6e, 0x40,
-  0x39, 0x94, 0x02, 0x19, 0x33, 0x00, 0x84, 0x94, 0x40, 0x19, 0x14, 0x01,
-  0x3d, 0x23, 0x00, 0x63, 0x04, 0x20, 0x08, 0x82, 0xf8, 0x37, 0x46, 0x00,
-  0x82, 0x20, 0x48, 0xff, 0xc2, 0x18, 0x01, 0x08, 0x82, 0x20, 0xfd, 0x8d,
-  0x11, 0x80, 0x20, 0x08, 0xf2, 0xdf, 0x18, 0x01, 0x08, 0x82, 0x20, 0xfe,
-  0x0b, 0x63, 0x04, 0x20, 0x08, 0x82, 0x21, 0x38, 0x8c, 0x11, 0x80, 0x20,
-  0x08, 0xea, 0xdf, 0x18, 0x01, 0x08, 0x82, 0xa0, 0xfe, 0x0b, 0x63, 0x04,
-  0x20, 0x08, 0x82, 0xf0, 0x37, 0x46, 0x00, 0x82, 0x20, 0x08, 0xff, 0xc2,
-  0x18, 0x01, 0x08, 0x82, 0x20, 0x08, 0x06, 0x00, 0x23, 0x06, 0x09, 0x00,
-  0x82, 0x60, 0xe0, 0xc9, 0x41, 0xf6, 0xb8, 0x81, 0x1b, 0x98, 0xc1, 0x88,
-  0x41, 0x02, 0x80, 0x20, 0x18, 0x78, 0x73, 0xa0, 0x41, 0x70, 0x00, 0x07,
-  0x67, 0x30, 0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0x1e, 0x1d, 0x6c, 0x91,
-  0x1b, 0xb8, 0x01, 0x1a, 0x8c, 0x18, 0x24, 0x00, 0x08, 0x82, 0x81, 0x57,
-  0x07, 0x1c, 0xf4, 0x06, 0x6f, 0x90, 0x06, 0x23, 0x06, 0x06, 0x00, 0x82,
-  0x60, 0x40, 0xfc, 0xc1, 0x05, 0x07, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0xb0, 0xe1, 0x41, 0x19, 0x08, 0x71, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c,
-  0x30, 0x9a, 0x30, 0x04, 0xc3, 0x0d, 0x42, 0x40, 0x06, 0xb3, 0x0c, 0xc1,
-  0x08, 0x05, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xf5, 0x81, 0x1a,
-  0x1c, 0x79, 0x30, 0x9a, 0x10, 0x0c, 0x17, 0x3c, 0x35, 0x9a, 0x30, 0x08,
-  0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x9b, 0x28, 0xbc,
-  0x01, 0x13, 0x06, 0xa3, 0x09, 0x01, 0x30, 0xdc, 0x10, 0xf4, 0x01, 0x18,
-  0x4c, 0x37, 0x50, 0x5e, 0x30, 0xdd, 0x50, 0x69, 0x42, 0x21, 0x01, 0x4c,
-  0x37, 0x5c, 0x1c, 0x51, 0x48, 0x00, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0xb0, 0xa9, 0xc2, 0x1d, 0x50, 0x68, 0x30, 0x9a, 0x10, 0x04, 0xa3, 0x09,
-  0x82, 0x30, 0x9a, 0x30, 0x0c, 0x15, 0x08, 0x52, 0x03, 0x21, 0x15, 0x0c,
-  0x52, 0x57, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xc9, 0xc2,
-  0x1f, 0x70, 0xac, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c, 0x52, 0x5b, 0x10,
-  0x15, 0x20, 0x33, 0x9a, 0x50, 0x04, 0x15, 0x08, 0x52, 0x44, 0x10, 0x15,
-  0x34, 0x33, 0x9a, 0x90, 0x08, 0x15, 0x08, 0x52, 0x44, 0x10, 0xd7, 0x3c,
-  0x75, 0xc5, 0x53, 0x37, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x5b, 0x38, 0xb8, 0xc2, 0x1a, 0xd8, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0xc3, 0x11, 0x4f, 0x1d,
-  0xf1, 0xd4, 0x11, 0x4f, 0x1d, 0xf1, 0xd4, 0x88, 0x41, 0x03, 0x80, 0x20,
-  0x18, 0x58, 0xec, 0x10, 0x0b, 0xcc, 0xa2, 0x8c, 0x02, 0x31, 0x08, 0x81,
-  0x09, 0x01, 0x7c, 0x4e, 0x18, 0x66, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c,
-  0xc2, 0x60, 0x1d, 0x72, 0x21, 0x0f, 0x02, 0x73, 0x40, 0x05, 0x72, 0x18,
-  0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x03, 0x32, 0x68, 0x07, 0x59, 0x10, 0x82, 0x0b, 0x9e, 0xbb, 0x63, 0x98,
-  0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x08, 0x83, 0x78, 0xf8, 0x85, 0x3f,
-  0x08, 0xd8, 0xc1, 0x15, 0xd4, 0x61, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84,
-  0x60, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xc8, 0x60, 0x1e, 0x70, 0x41,
-  0x08, 0x2e, 0x78, 0x6e, 0xb8, 0xa1, 0x0e, 0xe8, 0x01, 0x0c, 0x0c, 0x89,
-  0x05, 0xf8, 0xd8, 0x20, 0x0b, 0xf0, 0x99, 0x65, 0x10, 0x86, 0xc1, 0x84,
-  0x55, 0x90, 0x8f, 0x09, 0xac, 0x20, 0x1f, 0xf3, 0x83, 0x58, 0x80, 0x8f,
-  0xf5, 0x81, 0x2c, 0xc0, 0xc7, 0x08, 0x41, 0x3e, 0x46, 0x08, 0xf2, 0x99,
-  0x25, 0x20, 0x4c, 0x14, 0x10, 0xf9, 0x18, 0x12, 0x0a, 0xf2, 0x31, 0xe1,
-  0x16, 0xe0, 0x63, 0x02, 0x2e, 0xc0, 0xc7, 0x84, 0x5a, 0x90, 0x8f, 0x09,
-  0xb6, 0x20, 0x9f, 0x59, 0x02, 0x62, 0xa0, 0xe2, 0x81, 0x04, 0x62, 0x18,
-  0xa8, 0x78, 0x20, 0x81, 0x18, 0x46, 0x13, 0x62, 0x41, 0x18, 0x6e, 0x08,
-  0x4c, 0x02, 0x0c, 0x66, 0x19, 0x0a, 0x23, 0x18, 0x31, 0x30, 0x00, 0x10,
-  0x04, 0x03, 0x08, 0x26, 0xd8, 0x81, 0x18, 0x31, 0x30, 0x00, 0x10, 0x04,
-  0x03, 0x28, 0x26, 0xda, 0x81, 0x98, 0x25, 0x30, 0x06, 0x2a, 0x1e, 0xa2,
-  0x60, 0x88, 0x81, 0x8a, 0x87, 0x28, 0x18, 0x62, 0x38, 0x42, 0x50, 0x05,
-  0xe2, 0x1b, 0x8e, 0x18, 0x52, 0x41, 0xf8, 0x4a, 0x08, 0x76, 0x38, 0x82,
-  0x68, 0x05, 0xe2, 0x2b, 0x21, 0xd8, 0xe1, 0x08, 0x63, 0x15, 0x84, 0xaf,
-  0x02, 0x61, 0x67, 0x19, 0x0e, 0x2d, 0x18, 0x4d, 0xf0, 0x85, 0x61, 0xb8,
-  0x21, 0x98, 0x09, 0x30, 0x98, 0x65, 0x40, 0x92, 0xa0, 0x74, 0x61, 0x24,
-  0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0xea, 0x09,
-  0x92, 0x68, 0xe6, 0x61, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0x9f,
-  0x20, 0x89, 0x40, 0x28, 0x5e, 0x38, 0x09, 0xb8, 0xe0, 0xa9, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0xa0, 0xc2, 0x02, 0x25, 0xa0, 0x7b, 0x18, 0x31,
-  0x38, 0x00, 0x10, 0x04, 0x03, 0x4a, 0x2c, 0x50, 0x22, 0x10, 0x66, 0x09,
-  0xb4, 0xe1, 0x06, 0x65, 0x27, 0xc0, 0x60, 0x96, 0x41, 0xd1, 0x02, 0xd3,
-  0x05, 0x5e, 0x88, 0xcf, 0x2c, 0xc3, 0xe2, 0x4c, 0xd6, 0x0b, 0x55, 0x7c,
-  0x2c, 0x10, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x21, 0x1f,
-  0x2b, 0x82, 0xf8, 0x14, 0x41, 0x16, 0x3a, 0xdc, 0x10, 0x88, 0x05, 0x18,
-  0xcc, 0x32, 0x30, 0x4d, 0x60, 0x43, 0x39, 0xc0, 0x67, 0x96, 0x40, 0x32,
-  0x72, 0x20, 0xe2, 0x33, 0x4b, 0x20, 0xcd, 0x32, 0x3c, 0x12, 0x67, 0x5f,
-  0x39, 0xc4, 0xc7, 0x02, 0x86, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16,
-  0x3c, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x6e, 0xa1, 0xc3, 0x0d, 0x01,
-  0x5b, 0x80, 0xc1, 0x2c, 0x03, 0x14, 0x05, 0xd6, 0x0e, 0x43, 0x7c, 0x66,
-  0x09, 0x24, 0x23, 0xe0, 0x01, 0x3e, 0xb3, 0x04, 0xd2, 0x40, 0xcb, 0x83,
-  0x31, 0x56, 0x43, 0x40, 0x42, 0x24, 0x0b, 0x8e, 0xb9, 0x83, 0x3c, 0xc4,
-  0x67, 0x96, 0x61, 0xb2, 0xcc, 0xc0, 0xe6, 0x41, 0x0d, 0xe2, 0x63, 0x81,
-  0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x0a, 0xf9, 0x58, 0x11,
-  0xc4, 0xa7, 0x08, 0xbd, 0xd0, 0xe1, 0x86, 0x00, 0x2f, 0xc0, 0x60, 0x96,
-  0x81, 0xaa, 0x02, 0x1b, 0xf6, 0x01, 0x3e, 0xb3, 0x04, 0x9a, 0xe1, 0x03,
-  0x11, 0x9f, 0x59, 0x02, 0x6d, 0x96, 0xe1, 0xd2, 0xdc, 0xc0, 0xe8, 0x20,
-  0x1f, 0xe2, 0x63, 0x01, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b,
-  0x1e, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0xd2, 0xd0, 0xe1, 0x86, 0x40,
-  0x34, 0xc0, 0x60, 0x96, 0x01, 0xcb, 0x02, 0x0b, 0x89, 0x21, 0x3e, 0xb3,
-  0x04, 0x9a, 0x11, 0x26, 0x01, 0x9f, 0x59, 0x02, 0x6d, 0xa0, 0xe8, 0x11,
-  0x07, 0xc4, 0x1f, 0x12, 0x7f, 0x30, 0xd8, 0x20, 0x63, 0x03, 0x8c, 0x0d,
-  0x2c, 0x36, 0xa8, 0xd8, 0x80, 0x1a, 0x28, 0x7a, 0x78, 0x01, 0xf1, 0x87,
-  0xc4, 0x1f, 0x0c, 0x22, 0x33, 0x30, 0x7f, 0xb0, 0xb0, 0x4a, 0xa3, 0x0e,
-  0x1f, 0x9e, 0x9a, 0x65, 0xd8, 0xe6, 0xa0, 0x14, 0x46, 0x13, 0x6e, 0x62,
-  0x18, 0x6e, 0x08, 0x52, 0x03, 0x0c, 0x66, 0x19, 0x38, 0x2f, 0x18, 0x8e,
-  0x28, 0xd4, 0x62, 0xf8, 0xce, 0x18, 0x66, 0xb8, 0x21, 0xa8, 0x09, 0x32,
-  0xa8, 0x21, 0xd0, 0xe1, 0x08, 0xc4, 0x2d, 0x86, 0xaf, 0x02, 0x41, 0x4f,
-  0x19, 0x66, 0xb8, 0x21, 0xc0, 0x09, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x86,
-  0x4e, 0x0e, 0x82, 0xe3, 0x87, 0x61, 0xae, 0x19, 0x66, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0x36, 0xde, 0x48, 0x0d, 0xb3, 0xb8, 0x8d, 0xd1, 0x84,
-  0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86,
-  0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x6c, 0x3c, 0x60,
-  0xe3, 0x20, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xc0, 0xc8, 0x23,
-  0x36, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xac, 0x3c,
-  0x64, 0x43, 0x22, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0xd8,
-  0x23, 0x36, 0xe0, 0x22, 0xf0, 0x8d, 0xd0, 0x00, 0x8f, 0xd1, 0x84, 0x00,
-  0xb8, 0xe0, 0xa9, 0x59, 0x02, 0x39, 0x18, 0x6e, 0xc8, 0xc8, 0x03, 0x0c,
-  0x66, 0x19, 0x3e, 0x30, 0x08, 0x6a, 0x2d, 0x68, 0x03, 0x2e, 0x78, 0x6a,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0xf7, 0xa8, 0x8d, 0x8f, 0x34,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x7a, 0x8f, 0xda, 0x08, 0x84,
-  0x0b, 0x86, 0x29, 0xb7, 0xc8, 0x0d, 0xb8, 0xe0, 0xa9, 0x11, 0x83, 0x03,
-  0x00, 0x41, 0x30, 0xa0, 0xe6, 0x43, 0x37, 0xc6, 0x20, 0x35, 0x46, 0x0c,
-  0x0e, 0x00, 0x04, 0xc1, 0x80, 0xa2, 0x0f, 0xdd, 0x08, 0x84, 0x0b, 0x86,
-  0xb9, 0xe0, 0xa9, 0x3b, 0x9e, 0xba, 0x9b, 0x18, 0xe6, 0xd0, 0x60, 0x98,
-  0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xd8,
-  0xf2, 0xc3, 0x3c, 0x46, 0x83, 0x3e, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41,
-  0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0x30, 0x10, 0x69, 0x8f, 0x84, 0x08, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0x0b, 0x11, 0xf7, 0x48, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x30, 0x11, 0x79, 0x8f, 0x84, 0x08,
-  0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x49, 0x11, 0xf7, 0x68, 0x8d,
-  0x60, 0x3f, 0x7c, 0xa3, 0x3f, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xa7, 0x66,
-  0x09, 0xe4, 0x60, 0xb8, 0xc1, 0x0e, 0x40, 0x04, 0x0c, 0x66, 0x19, 0xc2,
-  0x40, 0x0e, 0x02, 0xfb, 0x8b, 0xd0, 0x88, 0xcf, 0x70, 0xc4, 0x1e, 0x88,
-  0x06, 0xf1, 0xcd, 0x32, 0x88, 0x41, 0x19, 0x04, 0x36, 0x1a, 0x7c, 0x10,
-  0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8,
-  0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xa8, 0x88, 0x0e, 0x37, 0x04, 0x28, 0x02,
-  0x06, 0xb3, 0x0c, 0x63, 0x40, 0x06, 0x81, 0x0d, 0xab, 0x01, 0x9f, 0x59,
-  0x82, 0x34, 0x30, 0xd5, 0x20, 0xe2, 0x33, 0x4b, 0x90, 0x06, 0xc3, 0x11,
-  0xa6, 0xb0, 0x1a, 0xc2, 0x37, 0xcb, 0x60, 0x06, 0x69, 0x10, 0xd8, 0x29,
-  0xb0, 0x46, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65,
-  0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x51, 0x23, 0x3a, 0xdc, 0x10,
-  0xcc, 0x08, 0x18, 0xcc, 0x32, 0x9c, 0x01, 0x1a, 0x04, 0x46, 0x1b, 0x43,
-  0x7c, 0x66, 0x09, 0xd2, 0xc0, 0x88, 0xdb, 0x80, 0xcf, 0x2c, 0x41, 0x1a,
-  0x0c, 0xb4, 0x3c, 0xda, 0x18, 0x60, 0x64, 0x40, 0x9c, 0x81, 0x80, 0x06,
-  0x62, 0x51, 0x06, 0x17, 0x0c, 0x63, 0xb6, 0xa1, 0x1b, 0xf1, 0x19, 0x8e,
-  0x98, 0x85, 0xdd, 0x20, 0xbe, 0x59, 0x06, 0x35, 0x68, 0x83, 0xc0, 0x78,
-  0x83, 0x16, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29,
-  0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x31, 0xd1, 0xe1, 0x86,
-  0x20, 0x4c, 0xc0, 0x60, 0x96, 0x61, 0x0d, 0xd8, 0x20, 0xb0, 0x81, 0x3c,
-  0xe0, 0x33, 0x4b, 0x10, 0x07, 0x16, 0x1e, 0x44, 0x7c, 0x66, 0x09, 0xe2,
-  0x60, 0x38, 0xc2, 0x17, 0xc4, 0x43, 0xf8, 0x66, 0x19, 0xdc, 0x20, 0x0e,
-  0x02, 0xfb, 0x85, 0xf1, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6,
-  0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xdc, 0x44,
-  0x87, 0x1b, 0x02, 0x36, 0x01, 0x83, 0x59, 0x86, 0x37, 0x80, 0x83, 0xc0,
-  0xd6, 0x63, 0x88, 0xcf, 0x2c, 0x41, 0x1c, 0x18, 0x01, 0x1f, 0xf0, 0x99,
-  0x25, 0x88, 0x83, 0x81, 0x96, 0x47, 0x5b, 0x03, 0x8c, 0x0d, 0x88, 0x37,
-  0x10, 0xe0, 0x40, 0x36, 0xda, 0xe0, 0x82, 0x61, 0x2e, 0x78, 0xea, 0xb6,
-  0xa7, 0x8e, 0x37, 0x86, 0xb9, 0x76, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18,
-  0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x36, 0x3f, 0x59, 0x13, 0x14,
-  0xc9, 0x93, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41,
-  0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0xac, 0x54, 0xe4, 0x24, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0xc0, 0x4c, 0x65, 0x4e, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x03, 0xec, 0x54, 0xe8, 0x24, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00,
-  0x41, 0x30, 0x58, 0x5c, 0x65, 0x4e, 0x64, 0x24, 0x00, 0x95, 0x31, 0x11,
-  0x95, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x02, 0x39, 0x18, 0x68,
-  0x79, 0x4c, 0xa3, 0x33, 0x25, 0x8e, 0x25, 0x3e, 0x21, 0x0e, 0x4c, 0x09,
-  0x0c, 0x2e, 0x30, 0x66, 0xc4, 0xc0, 0x01, 0x40, 0x10, 0x0c, 0x1a, 0x59,
-  0x79, 0x93, 0x1c, 0x91, 0x91, 0x52, 0x09, 0xd2, 0x24, 0x4d, 0xd2, 0x04,
-  0x4d, 0x4e, 0x65, 0x96, 0x60, 0x84, 0x86, 0x1b, 0x46, 0xa3, 0x54, 0xc0,
-  0x60, 0x96, 0x81, 0x0e, 0x62, 0x22, 0x18, 0x31, 0x30, 0x00, 0x10, 0x04,
-  0x03, 0x48, 0x56, 0xe4, 0x24, 0x24, 0x46, 0x0c, 0x0c, 0x00, 0x04, 0xc1,
-  0x00, 0x9a, 0x95, 0x39, 0x09, 0x09, 0x13, 0xce, 0x04, 0x3e, 0x26, 0xa0,
-  0x09, 0x7c, 0x46, 0x13, 0x72, 0x64, 0x18, 0x6e, 0x08, 0x56, 0x05, 0x0c,
-  0x66, 0x19, 0xea, 0xe0, 0x0e, 0x82, 0xe1, 0x08, 0x83, 0x4d, 0x86, 0xef,
-  0x8e, 0x61, 0x86, 0x1b, 0x82, 0x1b, 0x21, 0x83, 0x1a, 0x02, 0x1d, 0x8e,
-  0x48, 0xe0, 0x64, 0xf8, 0x2a, 0x10, 0xf4, 0x96, 0x61, 0x86, 0x1b, 0x02,
-  0x1d, 0x21, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0xb0, 0x83, 0x55, 0x08, 0xce,
-  0x3f, 0x86, 0xb9, 0x97, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0x36, 0x5f, 0x59, 0x15, 0x34, 0xc9, 0x95, 0xd1, 0x84, 0x00, 0x18, 0x4d,
-  0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xac, 0x5c, 0x64, 0xe5, 0x20, 0x82,
-  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xc0, 0xcc, 0x65, 0x56, 0x18, 0x22,
-  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xec, 0x5c, 0x68, 0x45, 0x22,
-  0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0xdc, 0x65, 0x56, 0xe4,
-  0x24, 0x00, 0x97, 0x51, 0x11, 0x97, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9,
-  0x59, 0x82, 0x55, 0x18, 0x6e, 0xc8, 0xcc, 0x05, 0x0c, 0x66, 0x19, 0xf0,
-  0x20, 0x0f, 0x82, 0x6a, 0x13, 0x5b, 0x81, 0x0b, 0x9e, 0x1a, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x03, 0x0a, 0x5e, 0x6e, 0x05, 0x0c, 0x4c, 0x65, 0xc4,
-  0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0x78, 0xb9, 0x95, 0x40, 0xb8, 0x60,
-  0x98, 0x82, 0x93, 0x5d, 0x81, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x03, 0xaa, 0x5e, 0x78, 0x85, 0x0c, 0x56, 0x65, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0x28, 0x7b, 0xe1, 0x95, 0x40, 0xb8, 0x60, 0x98, 0x0b,
-  0x9e, 0xba, 0xe3, 0xa9, 0xcb, 0x91, 0x61, 0x4e, 0x2d, 0x86, 0x39, 0x62,
-  0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x6d, 0x5f,
-  0xd0, 0xa5, 0x54, 0xec, 0x65, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60,
-  0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10,
-  0x00, 0x04, 0xc1, 0x00, 0x13, 0x99, 0x77, 0x49, 0x88, 0x60, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0xb0, 0x91, 0x81, 0x97, 0x84, 0x08, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0x00, 0x23, 0x99, 0x78, 0x49, 0x88, 0x60, 0xc4,
-  0x40, 0x01, 0x40, 0x10, 0x0c, 0x96, 0x95, 0x81, 0x97, 0x57, 0x09, 0xfa,
-  0x05, 0x5c, 0xfe, 0x65, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6a, 0x96, 0x60,
-  0x15, 0x86, 0x1b, 0xec, 0x40, 0x64, 0xc0, 0x60, 0x96, 0x41, 0x0f, 0x56,
-  0x21, 0xb0, 0x50, 0x19, 0x95, 0xf8, 0x0c, 0x47, 0xf0, 0x01, 0xa9, 0x10,
-  0xdf, 0x2c, 0xc3, 0x1e, 0xf8, 0x41, 0x60, 0xa5, 0xd2, 0x07, 0xf1, 0xb1,
-  0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac,
-  0x08, 0xe2, 0x53, 0x04, 0xcb, 0xe8, 0x70, 0x43, 0xa0, 0x32, 0x60, 0x30,
-  0xcb, 0xc0, 0x07, 0x7d, 0x10, 0xd8, 0xd0, 0x2a, 0xf0, 0x99, 0x25, 0x10,
-  0x05, 0x63, 0x15, 0x22, 0x3e, 0xb3, 0x04, 0xa2, 0x30, 0x1c, 0x71, 0x0a,
-  0xad, 0x22, 0x7c, 0xb3, 0x0c, 0x7f, 0x20, 0x0a, 0x81, 0xa1, 0x82, 0xab,
-  0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x44,
-  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x37, 0xa3, 0xc3, 0x0d, 0x41, 0xcd,
-  0x80, 0xc1, 0x2c, 0x03, 0x28, 0x84, 0x42, 0x60, 0xb6, 0x32, 0xc4, 0x67,
-  0x96, 0x40, 0x14, 0x8c, 0xc8, 0x15, 0xf8, 0xcc, 0x12, 0x88, 0xc2, 0x40,
-  0xcb, 0xa3, 0xf1, 0x01, 0xd6, 0x07, 0x04, 0x28, 0x08, 0xa1, 0x40, 0x16,
-  0x7e, 0x70, 0xc1, 0x30, 0x86, 0x2b, 0xbc, 0x12, 0x9f, 0xe1, 0x08, 0x5a,
-  0xe8, 0x15, 0xe2, 0x9b, 0x65, 0x18, 0x05, 0x53, 0x08, 0xcc, 0x57, 0x6a,
-  0x21, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xc0,
-  0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x28, 0x1b, 0x1d, 0x6e, 0x08, 0xc6,
-  0x06, 0x0c, 0x66, 0x19, 0x48, 0xa1, 0x14, 0x02, 0x1b, 0xcc, 0x05, 0x3e,
-  0xb3, 0x04, 0xaa, 0x60, 0xe3, 0x42, 0xc4, 0x67, 0x96, 0x40, 0x15, 0x86,
-  0x23, 0x7e, 0x81, 0x5c, 0x84, 0x6f, 0x96, 0xe1, 0x14, 0x54, 0x21, 0x30,
-  0x70, 0x28, 0x97, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78,
-  0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x02, 0x6e, 0x74, 0xb8,
-  0x21, 0x70, 0x1b, 0x30, 0x98, 0x65, 0x40, 0x85, 0x54, 0x08, 0xac, 0x5d,
-  0x86, 0xf8, 0xcc, 0x12, 0xa8, 0x82, 0x11, 0xf2, 0x02, 0x9f, 0x59, 0x02,
-  0x55, 0x18, 0x68, 0x79, 0x34, 0x52, 0xc0, 0x4a, 0x81, 0x40, 0x05, 0x21,
-  0x15, 0x68, 0xc3, 0x14, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x6e, 0x7b, 0xea,
-  0x7c, 0x65, 0x98, 0x7b, 0x8f, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x03, 0x9d, 0xb6, 0x51, 0x99, 0xbd,
-  0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1,
-  0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xc0,
-  0x4e, 0x87, 0x6e, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03,
-  0x0c, 0x75, 0xea, 0x26, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0xc0, 0x52, 0xc7, 0x6e, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04,
-  0x83, 0x05, 0x76, 0xea, 0x86, 0x66, 0x02, 0xd1, 0x29, 0x1b, 0xd2, 0x19,
-  0x4d, 0x08, 0x80, 0x0b, 0x9e, 0x9a, 0x25, 0x58, 0x85, 0x81, 0x96, 0xc7,
-  0x34, 0xec, 0x40, 0xd5, 0xea, 0x80, 0x25, 0xf0, 0x40, 0x50, 0x05, 0x55,
-  0xcb, 0x83, 0x59, 0x06, 0x56, 0x70, 0x85, 0x7d, 0x18, 0x8e, 0xf0, 0x07,
-  0xb3, 0x19, 0xbe, 0xfb, 0x87, 0x61, 0x86, 0x1b, 0x82, 0x98, 0x21, 0x83,
-  0x1a, 0x02, 0x1d, 0x8e, 0x18, 0x09, 0xb5, 0x19, 0xbe, 0x0a, 0x04, 0xbd,
-  0x92, 0x18, 0x66, 0xb8, 0x21, 0xa0, 0x19, 0x32, 0xa8, 0x60, 0xd0, 0x59,
-  0x86, 0x56, 0x10, 0x87, 0xe0, 0xf0, 0x65, 0x98, 0x4b, 0x91, 0x61, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xc3, 0x9d, 0xd2, 0x11, 0x9b, 0xd9,
-  0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1,
-  0x04, 0x62, 0x28, 0xe2, 0x90, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xc0,
-  0x7e, 0x87, 0x75, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03,
-  0x0c, 0x7c, 0x5a, 0x87, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0xc0, 0xc2, 0xc7, 0x75, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04,
-  0x83, 0x05, 0x7d, 0x5a, 0x87, 0x6d, 0x02, 0xdd, 0xe9, 0x1b, 0xde, 0x19,
-  0x4d, 0x08, 0x80, 0x0b, 0x9e, 0x9a, 0x25, 0x10, 0x87, 0xe1, 0x86, 0x99,
-  0x00, 0x1f, 0x30, 0x98, 0x65, 0x78, 0x05, 0x58, 0x08, 0xea, 0x6c, 0x60,
-  0x07, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0xf5,
-  0x89, 0x1d, 0x9c, 0x00, 0x9d, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0,
-  0xd6, 0x27, 0x76, 0x02, 0xe1, 0x82, 0x61, 0x4a, 0x6d, 0x6a, 0x07, 0x2e,
-  0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0xf7, 0xb1, 0x1d,
-  0x9f, 0x28, 0x9d, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xe0, 0xc7,
-  0x76, 0x02, 0xe1, 0x82, 0x61, 0x2e, 0x78, 0xea, 0x8e, 0xa7, 0x6e, 0x66,
-  0x86, 0x39, 0x32, 0x19, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0,
-  0x00, 0x40, 0x10, 0x0c, 0xb6, 0xfa, 0x11, 0x9f, 0xbf, 0x81, 0x9f, 0xd1,
-  0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20,
-  0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x8c, 0x7f,
-  0xd2, 0x27, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xc0, 0xfa,
-  0x47, 0x7d, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xcc,
-  0x7f, 0xd6, 0x27, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58,
-  0x4a, 0x48, 0x7d, 0x52, 0x27, 0xb8, 0x1f, 0xdd, 0xc9, 0x9f, 0xd1, 0x84,
-  0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x02, 0x71, 0x18, 0x6e, 0x80, 0x0b, 0xfe,
-  0x01, 0x83, 0x59, 0x86, 0x58, 0x10, 0x87, 0xc0, 0xf6, 0xa6, 0x6f, 0xe2,
-  0x33, 0x1c, 0x41, 0x17, 0x7e, 0x43, 0x7c, 0xb3, 0x0c, 0xb2, 0x50, 0x0b,
-  0x81, 0xfd, 0x4d, 0x5d, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73,
-  0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x26, 0xa4,
-  0xc3, 0x0d, 0x01, 0x09, 0x81, 0xc1, 0x2c, 0xc3, 0x2c, 0xd0, 0x42, 0x60,
-  0xc3, 0xe9, 0xc0, 0x67, 0x96, 0x20, 0x17, 0xcc, 0x74, 0x88, 0xf8, 0xcc,
-  0x12, 0xe4, 0xc2, 0x70, 0xc4, 0x5f, 0x9c, 0x8e, 0xf0, 0xcd, 0x32, 0xd8,
-  0x42, 0x2e, 0x04, 0x06, 0x1a, 0xa8, 0x13, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c,
-  0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45,
-  0xc4, 0x90, 0x0e, 0x37, 0x04, 0x2f, 0x04, 0x06, 0xb3, 0x0c, 0xb7, 0x80,
-  0x0b, 0x81, 0xc1, 0xce, 0x10, 0x9f, 0x59, 0x82, 0x5c, 0x30, 0x62, 0x76,
-  0xe0, 0x33, 0x4b, 0x90, 0x0b, 0x03, 0x2d, 0x8f, 0x36, 0x0b, 0x18, 0x2d,
-  0x10, 0xb7, 0x20, 0xe0, 0x02, 0xcd, 0xd4, 0xc2, 0x05, 0xc3, 0x98, 0xec,
-  0xd8, 0x4e, 0x7c, 0x86, 0x23, 0x5c, 0xe3, 0x76, 0x88, 0x6f, 0x96, 0x41,
-  0x17, 0x7a, 0x21, 0x30, 0xdc, 0x79, 0x8d, 0xf8, 0x58, 0x30, 0xd0, 0xe7,
-  0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29,
-  0xe2, 0x87, 0x74, 0xb8, 0x21, 0xe8, 0x21, 0x30, 0x98, 0x65, 0xd8, 0x05,
-  0x5e, 0x08, 0x6c, 0x00, 0x1f, 0xf8, 0xcc, 0x12, 0x84, 0x83, 0xf5, 0x0e,
-  0x11, 0x9f, 0x59, 0x82, 0x70, 0x18, 0x8e, 0xc8, 0x0d, 0xdf, 0x11, 0xbe,
-  0x59, 0x06, 0x5f, 0x08, 0x87, 0xc0, 0x74, 0xe3, 0x77, 0xe2, 0x63, 0x81,
-  0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11,
-  0xc4, 0xa7, 0x08, 0x35, 0xd2, 0xe1, 0x86, 0x00, 0x8d, 0xc0, 0x60, 0x96,
-  0xe1, 0x17, 0xc0, 0x21, 0xb0, 0xf3, 0x19, 0xe2, 0x33, 0x4b, 0x10, 0x0e,
-  0x46, 0xb0, 0x0f, 0x7c, 0x66, 0x09, 0xc2, 0x61, 0xa0, 0xe5, 0xd1, 0x76,
-  0x01, 0xe3, 0x05, 0xe2, 0x17, 0x04, 0x70, 0x40, 0x9d, 0x5e, 0xb8, 0x60,
-  0x98, 0x0b, 0x9e, 0xba, 0xed, 0xa9, 0xc3, 0x9d, 0x61, 0x2e, 0x5d, 0x86,
-  0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83,
-  0x4d, 0x8f, 0xce, 0x88, 0x84, 0xea, 0x68, 0x34, 0x21, 0x00, 0x46, 0x13,
-  0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0x0b, 0x25, 0x37, 0x4a, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x30, 0x51, 0x7a, 0xa3, 0x84, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0x1b, 0x25, 0x38, 0x4a, 0x88,
-  0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x16, 0x55, 0x7a, 0x23, 0x17,
-  0x0a, 0xf8, 0xe8, 0x87, 0xfc, 0x68, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6a,
-  0x96, 0x40, 0x1c, 0x06, 0x5a, 0x1e, 0xd3, 0x68, 0x05, 0x3f, 0x0c, 0x58,
-  0x81, 0x25, 0x5e, 0x41, 0x08, 0x07, 0x3f, 0x0c, 0x60, 0x61, 0x96, 0x61,
-  0x1c, 0xca, 0xa1, 0x3e, 0x86, 0x23, 0xf4, 0x03, 0x8c, 0x86, 0xef, 0xf6,
-  0x63, 0x98, 0xe1, 0x86, 0x60, 0x85, 0xc8, 0xa0, 0x86, 0x40, 0x87, 0x23,
-  0xf6, 0x83, 0x8c, 0x86, 0xaf, 0x02, 0x41, 0xaf, 0x3f, 0x86, 0x19, 0x6e,
-  0x08, 0x5c, 0x88, 0x0c, 0x2a, 0x18, 0x74, 0x96, 0x81, 0x1c, 0xf2, 0x21,
-  0x38, 0xf9, 0x19, 0xe6, 0x46, 0x66, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41,
-  0x30, 0xd8, 0x64, 0xe9, 0x8f, 0x78, 0xa8, 0x95, 0x46, 0x13, 0x02, 0x60,
-  0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x38,
-  0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb0, 0x5c, 0x32, 0xa5, 0x83,
-  0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0xd3, 0xa5, 0x53, 0x62,
-  0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb0, 0x5d, 0x42, 0x25,
-  0x89, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x11, 0xa7, 0x53,
-  0x32, 0xa3, 0x80, 0x96, 0xee, 0xc8, 0x96, 0x46, 0x13, 0x02, 0xe0, 0x82,
-  0xa7, 0x66, 0x09, 0xf2, 0x61, 0xb8, 0xa1, 0x45, 0x74, 0x09, 0x0c, 0x66,
-  0x19, 0xcc, 0xe1, 0x1c, 0x82, 0x0a, 0x23, 0x55, 0x82, 0x0b, 0x9e, 0x1a,
-  0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x8a, 0x9c, 0x56, 0x89, 0x46, 0xf4,
-  0x68, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0x72, 0x5a, 0xa5, 0x40,
-  0xb8, 0x60, 0x98, 0x22, 0xa3, 0x57, 0x82, 0x0b, 0x9e, 0x1a, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x03, 0x2a, 0x9d, 0x60, 0xc9, 0x46, 0xfe, 0x68, 0xc4,
-  0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0x75, 0x82, 0xa5, 0x40, 0xb8, 0x60,
-  0x98, 0x0b, 0x9e, 0xba, 0xe3, 0xa9, 0x6b, 0xa1, 0x61, 0xce, 0x67, 0x86,
-  0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83,
-  0xed, 0x9d, 0x78, 0x29, 0x8f, 0xd4, 0x69, 0x34, 0x21, 0x00, 0x46, 0x13,
-  0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0xb3, 0xa7, 0x71, 0x4a, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb0, 0x7b, 0x22, 0xa7, 0x84, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0xc3, 0xa7, 0x72, 0x4a, 0x88,
-  0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x96, 0x7f, 0x22, 0xa7, 0x51,
-  0x0a, 0xe2, 0x89, 0x96, 0xe6, 0x69, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6a,
-  0x96, 0x20, 0x1f, 0x86, 0x1b, 0xd4, 0xc4, 0x9e, 0xc0, 0x60, 0x96, 0x01,
-  0x1d, 0xf2, 0x21, 0xb0, 0x3a, 0xba, 0xa3, 0xf8, 0x0c, 0x47, 0xc0, 0x09,
-  0x1e, 0x11, 0xdf, 0x2c, 0x43, 0x3a, 0xb0, 0x43, 0x60, 0x79, 0x14, 0x27,
-  0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86,
-  0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0x48, 0xe9, 0x70, 0x43, 0xe0, 0x4f,
-  0x60, 0x30, 0xcb, 0xa0, 0x0e, 0xeb, 0x10, 0xd8, 0x10, 0x4a, 0xf0, 0x99,
-  0x25, 0x80, 0x07, 0x03, 0x25, 0x22, 0x3e, 0xb3, 0x04, 0xf0, 0x30, 0x1c,
-  0xb1, 0x27, 0xa1, 0x24, 0x7c, 0xb3, 0x0c, 0xed, 0x00, 0x0f, 0x81, 0xf1,
-  0x89, 0x28, 0xc5, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53,
-  0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x2b, 0xa5, 0xc3, 0x0d,
-  0x41, 0x4a, 0x81, 0xc1, 0x2c, 0x83, 0x3b, 0xbc, 0x43, 0x60, 0xaa, 0x34,
-  0xc4, 0x67, 0x96, 0x00, 0x1e, 0x8c, 0x68, 0x25, 0xf8, 0xcc, 0x12, 0xc0,
-  0xc3, 0x40, 0xcb, 0xa3, 0xa9, 0x03, 0xb6, 0x0e, 0x84, 0x3b, 0x08, 0xef,
-  0xc0, 0x52, 0xec, 0x70, 0xc1, 0x30, 0xc6, 0x4a, 0xb0, 0x14, 0x9f, 0xe1,
-  0x08, 0x53, 0x89, 0x25, 0xe2, 0x9b, 0x65, 0x88, 0x07, 0x7a, 0x08, 0x4c,
-  0x96, 0x4e, 0x25, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e,
-  0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xc8, 0x29, 0x1d, 0x6e,
-  0x08, 0x6e, 0x0a, 0x0c, 0x66, 0x19, 0xe4, 0x61, 0x1e, 0x02, 0x1b, 0x74,
-  0x09, 0x3e, 0xb3, 0x04, 0xf8, 0x60, 0xb7, 0x44, 0xc4, 0x67, 0x96, 0x00,
-  0x1f, 0x86, 0x23, 0x62, 0x05, 0x97, 0x84, 0x6f, 0x96, 0xa1, 0x1e, 0xf0,
-  0x21, 0x30, 0x59, 0xc9, 0xa5, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61,
-  0x2e, 0x78, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x82, 0xac,
-  0x74, 0xb8, 0x21, 0x10, 0x2b, 0x30, 0x98, 0x65, 0xb0, 0x87, 0x7b, 0x08,
-  0x2c, 0x9c, 0x86, 0xf8, 0xcc, 0x12, 0xe0, 0x83, 0x11, 0xe6, 0x04, 0x9f,
-  0x59, 0x02, 0x7c, 0x18, 0x68, 0x79, 0x34, 0x79, 0xc0, 0xe6, 0x81, 0xb0,
-  0x07, 0xe1, 0x1e, 0xf0, 0x8a, 0x1e, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x6e,
-  0x7b, 0xea, 0x64, 0x69, 0x98, 0x1b, 0x9f, 0x61, 0x8e, 0x18, 0xe6, 0x88,
-  0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xa3, 0xab, 0xb0, 0xf2,
-  0xa7, 0xb7, 0x1a, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18,
-  0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0xc0, 0xf6, 0x0a, 0xad, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x03, 0x8c, 0xaf, 0xd2, 0x2a, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0xc0, 0xfa, 0x4a, 0xad, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00,
-  0x10, 0x04, 0x83, 0x85, 0xb4, 0xd2, 0x0a, 0xa5, 0x02, 0xbb, 0xca, 0x29,
-  0xbc, 0x1a, 0x4d, 0x08, 0x80, 0x0b, 0x9e, 0x9a, 0x25, 0xc8, 0x87, 0x81,
-  0x96, 0xc7, 0x34, 0xc8, 0x41, 0x4e, 0x83, 0x71, 0x60, 0x09, 0x73, 0x10,
-  0xf0, 0x41, 0x4e, 0x83, 0x73, 0x98, 0x65, 0xd0, 0x07, 0x7e, 0x78, 0x97,
-  0xe1, 0x08, 0x79, 0xd1, 0xa9, 0xe1, 0xbb, 0x79, 0x19, 0x66, 0xb8, 0x21,
-  0x28, 0x29, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x7a, 0xf1, 0xa9, 0xe1,
-  0xab, 0x40, 0xd0, 0xbb, 0x97, 0x61, 0x86, 0x1b, 0x02, 0x94, 0x22, 0x83,
-  0x0a, 0x06, 0x9d, 0x65, 0xd8, 0x07, 0x98, 0x08, 0x8e, 0x9d, 0x86, 0xb9,
-  0xfe, 0x19, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x36, 0xd6, 0xca,
-  0x2b, 0x9b, 0x3a, 0xad, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1,
-  0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x03, 0x6c, 0xb6, 0x40, 0xeb, 0x20, 0x82, 0x11, 0x03, 0x04,
-  0x00, 0x41, 0x30, 0xc0, 0x68, 0x2b, 0xb4, 0x18, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x03, 0xac, 0xb6, 0x44, 0x4b, 0x22, 0x82, 0x11, 0x03,
-  0x05, 0x00, 0x41, 0x30, 0x58, 0x78, 0x2b, 0xb4, 0xc0, 0x2a, 0x70, 0xad,
-  0xb8, 0x82, 0xad, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x02, 0x98,
-  0x18, 0x6e, 0x38, 0x19, 0xda, 0x02, 0x83, 0x59, 0x86, 0x7e, 0xf0, 0x87,
-  0xa0, 0x76, 0x8a, 0xb4, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x80, 0xf2, 0xad, 0xd2, 0x62, 0x19, 0xba, 0x1a, 0x31, 0x38, 0x00,
-  0x10, 0x04, 0x03, 0xea, 0xb7, 0x4a, 0x2b, 0x10, 0x2e, 0x18, 0xa6, 0x7c,
-  0x2a, 0xb5, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80,
-  0x1a, 0x2f, 0xd5, 0x82, 0x99, 0xbc, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x03, 0x8a, 0xbc, 0x54, 0x2b, 0x10, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0xee,
-  0x78, 0xea, 0x4e, 0x6a, 0x98, 0xc3, 0xa1, 0x61, 0x8e, 0x18, 0xe6, 0x88,
-  0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x4b, 0x2f, 0xdb, 0x9a,
-  0x2b, 0xf2, 0x1a, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18,
-  0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0xc0, 0xe0, 0xab, 0xb7, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x03, 0x2c, 0xbe, 0x7c, 0x2b, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0xc0, 0xe4, 0xeb, 0xb7, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00,
-  0x10, 0x04, 0x83, 0x25, 0xbf, 0x7c, 0xab, 0xaf, 0x82, 0xf5, 0x72, 0xad,
-  0xf6, 0x1a, 0x4d, 0x08, 0x80, 0x0b, 0x9e, 0x9a, 0x25, 0x80, 0x89, 0xe1,
-  0x06, 0xb2, 0x81, 0x2f, 0x30, 0x98, 0x65, 0xf8, 0x07, 0x98, 0x08, 0xec,
-  0xad, 0xe2, 0x2a, 0x3e, 0xc3, 0x11, 0x68, 0x23, 0x57, 0xc4, 0x37, 0xcb,
-  0x00, 0x12, 0x23, 0x11, 0xd8, 0x5c, 0xa5, 0x4d, 0x7c, 0x2c, 0x18, 0xe8,
-  0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8,
-  0x14, 0xa1, 0x5f, 0x3a, 0xdc, 0x10, 0xe0, 0x17, 0x18, 0xcc, 0x32, 0x84,
-  0x84, 0x48, 0x04, 0x36, 0xec, 0x15, 0x7c, 0x66, 0x09, 0x4e, 0xc2, 0xf4,
-  0x8a, 0x88, 0xcf, 0x2c, 0xc1, 0x49, 0x0c, 0x47, 0xcc, 0xcd, 0x5e, 0x09,
-  0xdf, 0x2c, 0x03, 0x49, 0x9c, 0x44, 0x60, 0x74, 0xc3, 0x57, 0xf1, 0xb1,
-  0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac,
-  0x08, 0xe2, 0x53, 0x44, 0x89, 0xe9, 0x70, 0x43, 0x30, 0x62, 0x60, 0x30,
-  0xcb, 0x50, 0x12, 0x26, 0x11, 0x18, 0x69, 0x0d, 0xf1, 0x99, 0x25, 0x38,
-  0x09, 0x23, 0x4e, 0x0b, 0x3e, 0xb3, 0x04, 0x27, 0x31, 0xd0, 0xf2, 0x68,
-  0x21, 0x81, 0x89, 0x04, 0x51, 0x12, 0x82, 0x49, 0xf0, 0xdb, 0x48, 0x5c,
-  0x30, 0x8c, 0x99, 0x96, 0x6a, 0xc5, 0x67, 0x38, 0x02, 0x74, 0x56, 0x8b,
-  0xf8, 0x66, 0x19, 0x50, 0x62, 0x25, 0x02, 0x63, 0xad, 0xd0, 0x89, 0x8f,
-  0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63,
-  0x45, 0x10, 0x9f, 0x22, 0x66, 0x4c, 0x87, 0x1b, 0x82, 0x18, 0x03, 0x83,
-  0x59, 0x86, 0x94, 0x50, 0x89, 0xc0, 0x06, 0xda, 0x82, 0xcf, 0x2c, 0xc1,
-  0x4b, 0x58, 0x6c, 0x11, 0xf1, 0x99, 0x25, 0x78, 0x89, 0xe1, 0x88, 0xd5,
-  0x91, 0x2d, 0xe1, 0x9b, 0x65, 0x60, 0x89, 0x97, 0x08, 0x8c, 0x75, 0x66,
-  0x2b, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20,
-  0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xf0, 0x31, 0x1d, 0x6e, 0x08, 0x78,
-  0x0c, 0x0c, 0x66, 0x19, 0x5a, 0xc2, 0x25, 0x02, 0xdb, 0xad, 0x21, 0x3e,
-  0xb3, 0x04, 0x2f, 0x61, 0x04, 0x78, 0xc1, 0x67, 0x96, 0xe0, 0x25, 0x06,
-  0x5a, 0x1e, 0x2d, 0x25, 0x30, 0x95, 0x20, 0x5a, 0x42, 0x70, 0x09, 0xb6,
-  0x5b, 0x89, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e, 0x3a, 0xd6, 0x1a,
-  0xe6, 0x7a, 0x69, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03,
-  0x00, 0x41, 0x30, 0xd8, 0xdc, 0x6c, 0xc7, 0xf0, 0x2b, 0xcd, 0x46, 0x13,
-  0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18,
-  0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb0, 0x3a, 0x13,
-  0xb3, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0xb3, 0xb3,
-  0x31, 0x4b, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb0, 0x3b,
-  0x23, 0xb3, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0xf1,
-  0xb3, 0x31, 0x13, 0xb1, 0x00, 0xce, 0x66, 0x4c, 0xce, 0x46, 0x13, 0x02,
-  0xe0, 0x82, 0xa7, 0x66, 0x09, 0x60, 0x62, 0xa0, 0xe5, 0x31, 0x8d, 0x7d,
-  0x30, 0xe3, 0x40, 0x1f, 0x58, 0xa2, 0x1f, 0x84, 0x97, 0x30, 0xe3, 0xc0,
-  0x1f, 0x46, 0x0c, 0x0c, 0x00, 0x04, 0xc1, 0x00, 0x02, 0xb5, 0x1f, 0x7b,
-  0x27, 0xb3, 0x0f, 0x78, 0x89, 0x8f, 0x09, 0x81, 0x7c, 0x2c, 0x90, 0x17,
-  0xf8, 0x58, 0xf1, 0x0f, 0xf1, 0xb1, 0x22, 0x90, 0x8f, 0x05, 0x21, 0x01,
-  0x9f, 0x11, 0x03, 0x03, 0x00, 0x41, 0x30, 0x80, 0x4e, 0xcd, 0xcc, 0xea,
-  0xc9, 0x84, 0x22, 0x3e, 0x16, 0x08, 0xf2, 0xb1, 0xe0, 0x80, 0xcf, 0x05,
-  0xc6, 0x8c, 0x18, 0x38, 0x00, 0x08, 0x82, 0x41, 0xd3, 0x6a, 0x6a, 0x46,
-  0x63, 0x2d, 0x06, 0x6a, 0x01, 0x99, 0x91, 0x19, 0x99, 0x8d, 0x99, 0xa8,
-  0xcd, 0x12, 0x8c, 0xd0, 0x70, 0x83, 0x5f, 0x89, 0x1a, 0x18, 0xcc, 0x32,
-  0xc8, 0xc4, 0x08, 0x05, 0x23, 0x06, 0x06, 0x00, 0x82, 0x60, 0x00, 0xb5,
-  0x5a, 0x9b, 0xf1, 0x93, 0x05, 0x3d, 0x06, 0x9f, 0x11, 0x03, 0x03, 0x00,
-  0x41, 0x30, 0x80, 0x5e, 0xed, 0xcd, 0xfa, 0xc9, 0x82, 0x1f, 0x83, 0xcf,
-  0x68, 0x02, 0x8d, 0x0d, 0xc3, 0x0d, 0x81, 0xa9, 0x81, 0xc1, 0x2c, 0xc3,
-  0x4c, 0xd4, 0x44, 0x30, 0x1c, 0x51, 0x9c, 0xd9, 0xf0, 0x9d, 0x31, 0xcc,
-  0x70, 0x43, 0x20, 0x63, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0xc7, 0x9a,
-  0x0d, 0x5f, 0x05, 0x82, 0x5e, 0x32, 0xcc, 0x70, 0x43, 0x50, 0x63, 0x64,
-  0x50, 0xc1, 0xa0, 0xb3, 0x0c, 0x34, 0x91, 0x16, 0xc1, 0xe5, 0xd7, 0x30,
-  0xa7, 0x52, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x96, 0x6b,
-  0xa6, 0x36, 0x66, 0xb4, 0x36, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30,
-  0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0x80, 0x81, 0x5b, 0xab, 0x1d, 0x44, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0x58, 0xb8, 0xb9, 0x1a, 0x43, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x80, 0x89, 0xdb, 0xab, 0x49, 0x44, 0x30, 0x62,
-  0xa0, 0x00, 0x20, 0x08, 0x06, 0x4b, 0xba, 0xb9, 0x5a, 0x9b, 0x05, 0xbb,
-  0xe6, 0x67, 0xbd, 0x36, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x35, 0x4b, 0x90,
-  0x16, 0xc3, 0x0d, 0x59, 0xb8, 0x81, 0xc1, 0x2c, 0x83, 0x4d, 0xdc, 0x44,
-  0x50, 0x68, 0x16, 0x6b, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82,
-  0x60, 0x40, 0xad, 0x9b, 0xac, 0x7d, 0xa1, 0x36, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0x14, 0xbb, 0xc9, 0x5a, 0x20, 0x5c, 0x30, 0x4c, 0xad, 0x99,
-  0xad, 0xc1, 0x05, 0x4f, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x05,
-  0x6f, 0xb7, 0x26, 0x06, 0xa6, 0x36, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x54, 0xbc, 0xdd, 0x5a, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf1,
-  0xd4, 0xd1, 0xd8, 0x30, 0x57, 0x56, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3,
-  0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x66, 0x6f, 0xe3, 0x06, 0x6a,
-  0xf1, 0x36, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08,
-  0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0x80, 0xf5, 0x9b, 0xba, 0x25, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x98, 0xbf, 0xad, 0x5b, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0x80, 0xfd, 0x1b, 0xbb, 0x25, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20,
-  0x08, 0x06, 0x8b, 0xc9, 0xad, 0x9b, 0xaa, 0x05, 0xf8, 0xb6, 0x6b, 0xfa,
-  0x36, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x35, 0x4b, 0x90, 0x16, 0xc3, 0x0d,
-  0x76, 0xd0, 0x6f, 0x60, 0x30, 0xcb, 0x80, 0x13, 0x69, 0x11, 0x18, 0x9f,
-  0xf9, 0x59, 0x7c, 0x86, 0x23, 0xf6, 0xe0, 0xcf, 0x88, 0x6f, 0x96, 0x21,
-  0x27, 0x78, 0x22, 0x30, 0x50, 0xe3, 0x83, 0xf8, 0x58, 0x30, 0xd0, 0xe7,
-  0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29,
-  0xe2, 0xe4, 0x74, 0xb8, 0x21, 0x28, 0x39, 0x30, 0x98, 0x65, 0xd0, 0x89,
-  0x9d, 0x08, 0x6c, 0x40, 0x35, 0xf8, 0xcc, 0x12, 0x80, 0x85, 0x9d, 0x1a,
-  0x11, 0x9f, 0x59, 0x02, 0xb0, 0x18, 0x8e, 0x30, 0x05, 0x54, 0x13, 0xbe,
-  0x59, 0x86, 0x9e, 0x00, 0x8b, 0xc0, 0x4e, 0x21, 0xd5, 0xe2, 0x63, 0x81,
-  0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11,
-  0xc4, 0xa7, 0x08, 0x99, 0xd3, 0xe1, 0x86, 0x00, 0xe6, 0xc0, 0x60, 0x96,
-  0xc1, 0x27, 0x7e, 0x22, 0xb0, 0x58, 0x1b, 0xe2, 0x33, 0x4b, 0x00, 0x16,
-  0x46, 0xd0, 0x1a, 0x7c, 0x66, 0x09, 0xc0, 0x62, 0xa0, 0xe5, 0xd1, 0x74,
-  0x02, 0xdb, 0x09, 0xc2, 0x27, 0x84, 0x9f, 0x10, 0x0b, 0x9e, 0xb8, 0x60,
-  0x18, 0x9b, 0xb5, 0x5b, 0x8b, 0xcf, 0x70, 0x84, 0x2c, 0xe0, 0x1a, 0xf1,
-  0xcd, 0x32, 0x84, 0x05, 0x59, 0x04, 0x96, 0x6b, 0xb3, 0x10, 0x1f, 0x0b,
-  0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a,
-  0x20, 0x3e, 0x45, 0x80, 0x9d, 0x0e, 0x37, 0x04, 0x3e, 0x07, 0x06, 0xb3,
-  0x0c, 0x62, 0x31, 0x16, 0x81, 0x0d, 0xe1, 0x06, 0x9f, 0x59, 0x02, 0xb4,
-  0x30, 0x5f, 0x23, 0xe2, 0x33, 0x4b, 0x80, 0x16, 0xc3, 0x11, 0xbd, 0xf0,
-  0x6b, 0xc2, 0x37, 0xcb, 0x50, 0x16, 0x68, 0x11, 0x98, 0x2f, 0x80, 0x5b,
-  0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xb1, 0x76, 0x3a, 0xdc, 0x10, 0xa4, 0x1d,
-  0x18, 0xcc, 0x32, 0x98, 0xc5, 0x59, 0x04, 0x86, 0x6e, 0x43, 0x7c, 0x66,
-  0x09, 0xd0, 0xc2, 0x88, 0x76, 0x83, 0xcf, 0x2c, 0x01, 0x5a, 0x0c, 0xb4,
-  0x3c, 0x9a, 0x58, 0x60, 0x63, 0x41, 0x98, 0x85, 0x70, 0x16, 0xb0, 0x41,
-  0x16, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0xb7, 0x3d, 0x75, 0xb9, 0x36, 0xcc,
-  0xa9, 0xd7, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00,
-  0x82, 0x60, 0xb0, 0xed, 0x1d, 0xda, 0x95, 0x9c, 0xdd, 0x8d, 0x26, 0x04,
-  0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14,
-  0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0xa2, 0xf7, 0x76,
-  0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x36, 0x7a, 0x70,
-  0x97, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0xa4, 0x17,
-  0x77, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0xb2, 0x7a,
-  0x70, 0xf7, 0x72, 0x41, 0xdf, 0x81, 0xdd, 0xdf, 0x8d, 0x26, 0x04, 0xc0,
-  0x05, 0x4f, 0xcd, 0x12, 0xa4, 0xc5, 0x40, 0xcb, 0x63, 0x1a, 0x34, 0x01,
-  0xea, 0xc1, 0x4c, 0xb0, 0x84, 0x4d, 0x08, 0x68, 0x01, 0xea, 0xc1, 0x4d,
-  0xcc, 0x32, 0xa8, 0x05, 0x5b, 0xec, 0xc3, 0x70, 0x04, 0x48, 0x84, 0xdd,
-  0xf0, 0x5d, 0x48, 0x0c, 0x33, 0xdc, 0x10, 0xb0, 0x1c, 0x19, 0xd4, 0x10,
-  0xe8, 0x70, 0x44, 0x48, 0x94, 0xdd, 0xf0, 0x55, 0x20, 0xe8, 0x8d, 0xc4,
-  0x30, 0xc3, 0x0d, 0xc1, 0xcb, 0x91, 0x41, 0x05, 0x83, 0xce, 0x32, 0xac,
-  0x05, 0x68, 0x04, 0x37, 0x6f, 0xc3, 0x1c, 0x89, 0x0d, 0x33, 0x62, 0x70,
-  0x00, 0x20, 0x08, 0x06, 0xdb, 0xec, 0x81, 0x5e, 0xcf, 0xb9, 0xde, 0x68,
-  0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10,
-  0x43, 0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xa6, 0x7b,
-  0xa7, 0x77, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0xbb,
-  0x87, 0x7a, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xc6,
-  0x7b, 0xa9, 0x27, 0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c,
-  0xe3, 0x87, 0x7a, 0x67, 0x17, 0xd4, 0x1e, 0xde, 0xdd, 0xde, 0x68, 0x42,
-  0x00, 0x5c, 0xf0, 0xd4, 0x2c, 0x01, 0x68, 0x0c, 0x37, 0xcc, 0xc4, 0xee,
-  0x81, 0xc1, 0x2c, 0x43, 0x5b, 0xb8, 0x45, 0x50, 0x62, 0xb7, 0x7a, 0x70,
-  0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0x95, 0x1f, 0xeb,
-  0xe9, 0xc4, 0xde, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x65, 0x7e,
-  0xac, 0x17, 0x08, 0x17, 0x0c, 0x53, 0x65, 0x07, 0x7b, 0x70, 0xc1, 0x53,
-  0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xa9, 0x5f, 0xec, 0xf1, 0x04,
-  0xe8, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0xb5, 0x7e, 0xb1, 0x17,
-  0x08, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x77, 0x3c, 0x75, 0x2e, 0x37, 0xcc,
-  0xfd, 0xd8, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00,
-  0x82, 0x60, 0xb0, 0xc1, 0x5f, 0xef, 0xe9, 0xdd, 0xfa, 0x8d, 0x26, 0x04,
-  0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14,
-  0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0xf7, 0x47, 0x7e,
-  0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x86, 0x7f, 0xe5,
-  0x97, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0xf9, 0x67,
-  0x7e, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x02, 0x82,
-  0x41, 0xf9, 0x91, 0x5e, 0x20, 0x7f, 0xb5, 0x47, 0x7f, 0xa3, 0x09, 0x01,
-  0x70, 0xc1, 0x53, 0xb3, 0x04, 0xa0, 0x31, 0xdc, 0x00, 0x17, 0xf7, 0x07,
-  0x06, 0xb3, 0x0c, 0x6f, 0x01, 0x1a, 0x81, 0xd9, 0x1d, 0xde, 0xc5, 0x67,
-  0x38, 0xc2, 0x2e, 0xf2, 0x8e, 0xf8, 0x66, 0x19, 0xe0, 0x62, 0x2e, 0x02,
-  0xd3, 0xbb, 0xbb, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82,
-  0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x42, 0x30, 0xd0,
-  0xe1, 0x86, 0xe0, 0xff, 0xc0, 0x60, 0x96, 0x21, 0x2e, 0xe4, 0x22, 0xb0,
-  0x41, 0xf4, 0xe0, 0x33, 0x4b, 0x70, 0x17, 0x16, 0x7a, 0x44, 0x7c, 0x66,
-  0x09, 0xee, 0x62, 0x38, 0x22, 0x34, 0x44, 0x4f, 0xf8, 0x66, 0x19, 0xe8,
-  0xe2, 0x2e, 0x02, 0x13, 0x8d, 0xd1, 0x8b, 0x8f, 0x05, 0x0e, 0x7d, 0x2e,
-  0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22,
-  0x58, 0x30, 0xd0, 0xe1, 0x86, 0x40, 0x05, 0x03, 0x30, 0x98, 0x65, 0xa8,
-  0x0b, 0xbb, 0x08, 0x6c, 0xf5, 0x86, 0xf8, 0xcc, 0x12, 0xdc, 0x85, 0x11,
-  0xae, 0x07, 0x9f, 0x59, 0x82, 0xbb, 0x18, 0x68, 0x79, 0xb4, 0xb8, 0xc0,
-  0xe4, 0x82, 0xa8, 0x0b, 0xc1, 0x2e, 0x70, 0x66, 0x2e, 0x2e, 0x18, 0xc6,
-  0x5a, 0x2f, 0xf6, 0xe2, 0x33, 0x1c, 0xc1, 0x1a, 0xb2, 0x47, 0x7c, 0xb3,
-  0x0c, 0x78, 0xb1, 0x17, 0x81, 0xcd, 0x5e, 0x6b, 0xc4, 0xc7, 0x82, 0x81,
-  0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88,
-  0x4f, 0x11, 0x3a, 0x18, 0xe8, 0x70, 0x43, 0x80, 0x83, 0x01, 0x18, 0xcc,
-  0x32, 0xe4, 0x85, 0x5e, 0x04, 0x36, 0xec, 0x1e, 0x7c, 0x66, 0x09, 0xfe,
-  0xc2, 0x70, 0x8f, 0x88, 0xcf, 0x2c, 0xc1, 0x5f, 0x0c, 0x47, 0xdc, 0x46,
-  0xee, 0x09, 0xdf, 0x2c, 0x03, 0x5f, 0xfc, 0x45, 0x60, 0xb8, 0xa1, 0x7b,
-  0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91,
-  0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0x19, 0x06, 0x3a, 0xdc, 0x10, 0x8c,
-  0x61, 0x00, 0x06, 0xb3, 0x0c, 0x7d, 0xe1, 0x17, 0x81, 0x89, 0xdf, 0x10,
-  0x9f, 0x59, 0x82, 0xbf, 0x30, 0xe2, 0xfc, 0xe0, 0x33, 0x4b, 0xf0, 0x17,
-  0x03, 0x2d, 0x8f, 0x96, 0x17, 0x98, 0x5e, 0x10, 0x7d, 0x21, 0xf8, 0x05,
-  0xe9, 0xec, 0xc5, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x6d, 0x4f, 0xdd, 0xec,
-  0x0d, 0x73, 0xe4, 0x36, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1,
-  0x01, 0x80, 0x20, 0x18, 0x6c, 0x75, 0x18, 0x88, 0x61, 0xf0, 0x7f, 0x70,
-  0x18, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2,
-  0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x60, 0x7c, 0x18, 0xa4, 0x61, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x60, 0x7d, 0x18, 0xa8, 0x61, 0x90, 0x10, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x60, 0x7e, 0x18, 0xac, 0x61, 0x90, 0x10, 0xc1,
-  0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0xa5, 0x18, 0xa8, 0x61, 0x90,
-  0x82, 0x41, 0x70, 0x87, 0x81, 0x0e, 0x06, 0x79, 0x18, 0x8c, 0x26, 0x04,
-  0xc0, 0x05, 0x4f, 0xcd, 0x12, 0x80, 0xc6, 0x40, 0xcb, 0x63, 0x1a, 0x6b,
-  0x41, 0x87, 0x82, 0x5a, 0xb0, 0x44, 0x5b, 0x08, 0x7f, 0x41, 0x87, 0x82,
-  0x5b, 0x98, 0x7e, 0xe8, 0x60, 0x00, 0x9f, 0x59, 0x86, 0xd0, 0x18, 0x0d,
-  0xfb, 0x18, 0x8e, 0x08, 0x78, 0x30, 0x18, 0xbe, 0x13, 0x86, 0x19, 0x6e,
-  0x08, 0x4e, 0x30, 0x20, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0xe0, 0x0f, 0x30,
-  0x0c, 0x86, 0xaf, 0x02, 0x41, 0xcf, 0x3f, 0x86, 0x19, 0x6e, 0x08, 0x54,
-  0x30, 0x20, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0x10, 0x8d, 0xdb, 0x08, 0xce,
-  0xfd, 0x86, 0xb9, 0x7f, 0x1b, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0x36, 0x57, 0x0c, 0xf6, 0x30, 0xc0, 0xc1, 0x20, 0x15, 0x83, 0xd1, 0x84,
-  0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86,
-  0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xac, 0x16, 0x03,
-  0x51, 0x0c, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xcc,
-  0x16, 0x83, 0x51, 0x0c, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0xec, 0x16, 0x03, 0x52, 0x0c, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00,
-  0x10, 0x04, 0x83, 0xc5, 0x17, 0x83, 0x51, 0x0c, 0xc4, 0x30, 0x08, 0x60,
-  0x31, 0x98, 0xc3, 0x40, 0x16, 0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9,
-  0x59, 0x82, 0xdb, 0x18, 0x6e, 0x70, 0x11, 0x5b, 0x0c, 0xc0, 0x60, 0x96,
-  0x81, 0x34, 0x4a, 0x23, 0xa8, 0x1e, 0x0c, 0x4c, 0x31, 0x80, 0x0b, 0x9e,
-  0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x0a, 0x1c, 0x83, 0x53, 0x0c,
-  0x36, 0x3b, 0x0c, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x0a, 0xc7,
-  0xe0, 0x14, 0x83, 0x40, 0xb8, 0x60, 0x98, 0x02, 0xc3, 0x60, 0x15, 0x03,
-  0xb8, 0xe0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xca, 0x31,
-  0x60, 0xc5, 0xe0, 0x46, 0xf6, 0x30, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x03, 0xca, 0x1c, 0x03, 0x56, 0x0c, 0x02, 0xe1, 0x82, 0x61, 0x2e, 0x78,
-  0xea, 0x8e, 0xa7, 0x2e, 0x05, 0x83, 0x61, 0x4e, 0xe7, 0x86, 0x39, 0x62,
-  0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x6d, 0x1d,
-  0x03, 0x5c, 0x0c, 0xea, 0x30, 0x30, 0xc7, 0x60, 0x34, 0x21, 0x00, 0x46,
-  0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0x93, 0xc7, 0xe0, 0x17, 0x83,
-  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0x9b, 0xc7, 0x00,
-  0x1c, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0xa3,
-  0xc7, 0x20, 0x1c, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1,
-  0x60, 0xd9, 0xc7, 0x00, 0x1c, 0x83, 0x3f, 0x0c, 0x82, 0x76, 0x0c, 0x60,
-  0x31, 0x78, 0xc7, 0x60, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6a, 0x96, 0xe0,
-  0x36, 0x86, 0x1b, 0xd6, 0x44, 0x1e, 0x03, 0x30, 0x98, 0x65, 0x30, 0x8d,
-  0xdb, 0x08, 0x2c, 0x0e, 0x83, 0x39, 0x0c, 0xe2, 0x33, 0x1c, 0x71, 0x07,
-  0x74, 0x18, 0x10, 0xdf, 0x2c, 0xc3, 0x69, 0xa8, 0x46, 0x60, 0x75, 0x18,
-  0xe0, 0x41, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65,
-  0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xc1, 0x8f, 0x81, 0x0e, 0x37,
-  0x04, 0xfa, 0x18, 0x80, 0xc1, 0x2c, 0x03, 0x6a, 0xa4, 0x46, 0x60, 0x43,
-  0x1f, 0x06, 0xf0, 0x99, 0x25, 0x70, 0x0d, 0xe3, 0xc3, 0x80, 0x88, 0xcf,
-  0x2c, 0x81, 0x6b, 0x0c, 0x47, 0x88, 0x42, 0x1f, 0x06, 0xc2, 0x37, 0xcb,
-  0xb0, 0x1a, 0xae, 0x11, 0xd8, 0x28, 0xf8, 0x61, 0x10, 0x1f, 0x0b, 0x1c,
-  0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20,
-  0x3e, 0x45, 0x9c, 0x64, 0xa0, 0xc3, 0x0d, 0x41, 0x49, 0x06, 0x60, 0x30,
-  0xcb, 0xc0, 0x1a, 0xad, 0x11, 0x98, 0x29, 0x06, 0x43, 0x7c, 0x66, 0x09,
-  0x5c, 0xc3, 0x88, 0x54, 0x0c, 0xe0, 0x33, 0x4b, 0xe0, 0x1a, 0x03, 0x2d,
-  0x8f, 0x86, 0x1a, 0x58, 0x6a, 0x10, 0xac, 0x21, 0xb4, 0x86, 0x4e, 0xa8,
-  0xc6, 0x05, 0xc3, 0x18, 0x2a, 0x06, 0xac, 0x18, 0xc4, 0x67, 0x38, 0xe2,
-  0x54, 0x5a, 0x31, 0x20, 0xbe, 0x59, 0x86, 0xd7, 0x90, 0x8d, 0xc0, 0x5c,
-  0x31, 0x40, 0x95, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78,
-  0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xa2, 0x26, 0x03, 0x1d,
-  0x6e, 0x08, 0x66, 0x32, 0x00, 0x83, 0x59, 0x06, 0xd8, 0x88, 0x8d, 0xc0,
-  0x06, 0x5b, 0x0c, 0xe0, 0x33, 0x4b, 0x60, 0x1b, 0x36, 0x8b, 0x01, 0x11,
-  0x9f, 0x59, 0x02, 0xdb, 0x18, 0x8e, 0x90, 0x15, 0x5a, 0x0c, 0x84, 0x6f,
-  0x96, 0x61, 0x36, 0x6c, 0x23, 0xb0, 0x59, 0xa9, 0xc5, 0x20, 0x3e, 0x16,
-  0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15,
-  0x41, 0x7c, 0x8a, 0x00, 0xcb, 0x40, 0x87, 0x1b, 0x02, 0x9f, 0x0c, 0xc0,
-  0x60, 0x96, 0x81, 0x36, 0x6a, 0x23, 0xb0, 0x5e, 0x0c, 0x86, 0xf8, 0xcc,
-  0x12, 0xd8, 0x86, 0x11, 0xe2, 0x18, 0xc0, 0x67, 0x96, 0xc0, 0x36, 0x06,
-  0x5a, 0x1e, 0x0d, 0x36, 0xb0, 0xd8, 0x20, 0x68, 0x43, 0xa8, 0x0d, 0xbd,
-  0x92, 0x8d, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e, 0x3a, 0x57, 0x0c,
-  0x86, 0xb9, 0xdf, 0x1b, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0,
-  0x00, 0x40, 0x10, 0x0c, 0x36, 0xb8, 0x0c, 0x7a, 0x32, 0xd0, 0xc7, 0x60,
-  0x2d, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41,
-  0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0xec, 0x2e, 0x03, 0xb2, 0x0c, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x03, 0x0c, 0x2f, 0x83, 0xb2, 0x0c, 0x12, 0x22, 0x18, 0x31,
-  0x40, 0x00, 0x10, 0x04, 0x03, 0x2c, 0x2f, 0x03, 0xb3, 0x0c, 0x12, 0x22,
-  0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x05, 0x34, 0x83, 0xb2, 0x0c,
-  0x48, 0x32, 0x08, 0xe4, 0x32, 0xa8, 0xc9, 0x80, 0x2e, 0x83, 0xd1, 0x84,
-  0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x82, 0xdb, 0x18, 0x68, 0x79, 0x4c, 0x43,
-  0x34, 0xd4, 0x54, 0x08, 0x0d, 0x96, 0x20, 0x0d, 0xc1, 0x36, 0xd4, 0x54,
-  0x28, 0x0d, 0xab, 0x97, 0x95, 0x0c, 0xe0, 0x33, 0xcb, 0x80, 0x1b, 0xba,
-  0x11, 0x2f, 0xc3, 0x11, 0xc1, 0x4d, 0x06, 0xc3, 0x77, 0xc2, 0x30, 0xc3,
-  0x0d, 0x81, 0x48, 0x06, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0xf7, 0xb2,
-  0x93, 0xc1, 0xf0, 0x55, 0x20, 0xe8, 0xe5, 0xcb, 0x30, 0xc3, 0x0d, 0x41,
-  0x49, 0x06, 0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c, 0xb9, 0xe1, 0x1e, 0xc1,
-  0xa5, 0x63, 0x30, 0xcc, 0xe9, 0xdf, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82,
-  0x60, 0xb0, 0xa5, 0x66, 0x60, 0x97, 0xc1, 0x4c, 0x06, 0xa4, 0x19, 0x8c,
-  0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02,
-  0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0xb0,
-  0x19, 0xf4, 0x65, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x60, 0xb1, 0x19, 0xf8, 0x65, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x60, 0xb2, 0x19, 0xfc, 0x65, 0x20, 0x11, 0xc1, 0x88, 0x81,
-  0x02, 0x80, 0x20, 0x18, 0x2c, 0xb9, 0x19, 0xf8, 0x65, 0xd0, 0x93, 0x41,
-  0xb0, 0x9a, 0x81, 0x5b, 0x06, 0xad, 0x19, 0x8c, 0x26, 0x04, 0xc0, 0x05,
-  0x4f, 0xcd, 0x12, 0xb8, 0xc7, 0x70, 0x43, 0xca, 0xc4, 0x66, 0x00, 0x06,
-  0xb3, 0x0c, 0xbb, 0xc1, 0x1b, 0x41, 0xe1, 0x64, 0x10, 0x9a, 0x01, 0x5c,
-  0xf0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xbb, 0x19, 0x88,
-  0x66, 0xb0, 0xc5, 0x65, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x14,
-  0x6f, 0x06, 0xa2, 0x19, 0x04, 0xc2, 0x05, 0xc3, 0xd4, 0x4e, 0x06, 0xa6,
-  0x19, 0xc0, 0x05, 0x4f, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x05,
-  0x9e, 0xc1, 0x69, 0x06, 0x32, 0x63, 0x97, 0xc1, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x50, 0xe1, 0x19, 0x9c, 0x66, 0x10, 0x08, 0x17, 0x0c, 0x73,
-  0xc1, 0x53, 0x77, 0x3c, 0x75, 0x24, 0x19, 0x0c, 0x73, 0x35, 0x18, 0x0c,
-  0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x9b, 0x79, 0x06, 0xb3, 0x19, 0xc0, 0x65, 0x10, 0x9e, 0xc1, 0x68, 0x42,
-  0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43,
-  0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xd6, 0x9e, 0x81,
-  0x6e, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xe6,
-  0x9e, 0xc1, 0x6e, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0x01, 0xf6, 0x9e, 0x01, 0x6f, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00,
-  0x08, 0x82, 0xc1, 0x62, 0x9f, 0xc1, 0x6e, 0x06, 0x7a, 0x19, 0x04, 0xe8,
-  0x19, 0xac, 0x66, 0xa0, 0x9e, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd4,
-  0x2c, 0x81, 0x7b, 0x0c, 0x37, 0x98, 0x4d, 0x7b, 0x06, 0x60, 0x30, 0xcb,
-  0xd0, 0x1b, 0xee, 0x11, 0x18, 0x5b, 0x06, 0x6e, 0x19, 0xc4, 0x67, 0x38,
-  0xe2, 0x0e, 0xde, 0x32, 0x20, 0xbe, 0x59, 0x06, 0xdf, 0x08, 0x8f, 0xc0,
-  0xe0, 0x32, 0xc0, 0x83, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e,
-  0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xe2, 0x3e, 0x03,
-  0x1d, 0x6e, 0x08, 0xea, 0x33, 0x00, 0x83, 0x59, 0x86, 0xdf, 0x00, 0x8f,
-  0xc0, 0x06, 0xbc, 0x0c, 0xe0, 0x33, 0x4b, 0x50, 0x1e, 0x76, 0x97, 0x01,
-  0x11, 0x9f, 0x59, 0x82, 0xf2, 0x18, 0x8e, 0x10, 0x05, 0xbc, 0x0c, 0x84,
-  0x6f, 0x96, 0x41, 0x3c, 0xca, 0x23, 0xb0, 0x51, 0xc8, 0xcb, 0x20, 0x3e,
-  0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f,
-  0x15, 0x41, 0x7c, 0x8a, 0x10, 0xd1, 0x40, 0x87, 0x1b, 0x02, 0x10, 0x0d,
-  0xc0, 0x60, 0x96, 0x61, 0x3c, 0xc8, 0x23, 0xb0, 0xd0, 0x0c, 0x86, 0xf8,
-  0xcc, 0x12, 0x94, 0x87, 0x11, 0xa4, 0x19, 0xc0, 0x67, 0x96, 0xa0, 0x3c,
-  0x06, 0x5a, 0x1e, 0xed, 0x37, 0x30, 0xf0, 0x20, 0xc6, 0x43, 0x20, 0x0f,
-  0x9d, 0x08, 0x8f, 0x0b, 0x86, 0xb1, 0xd1, 0x0c, 0x4e, 0x33, 0x88, 0xcf,
-  0x70, 0x84, 0xe8, 0xa0, 0x66, 0x40, 0x7c, 0xb3, 0x0c, 0xe6, 0x91, 0x1e,
-  0x81, 0xa5, 0x66, 0x30, 0x3a, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3,
-  0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0x8c,
-  0x06, 0x3a, 0xdc, 0x10, 0xb8, 0x68, 0x00, 0x06, 0xb3, 0x0c, 0xe7, 0x81,
-  0x1e, 0x81, 0x0d, 0xb1, 0x19, 0xc0, 0x67, 0x96, 0xa0, 0x3d, 0xcc, 0x35,
-  0x03, 0x22, 0x3e, 0xb3, 0x04, 0xed, 0x31, 0x1c, 0xd1, 0x3a, 0xaf, 0x19,
-  0x08, 0xdf, 0x2c, 0x83, 0x7a, 0xb4, 0x47, 0x60, 0xae, 0x03, 0x9b, 0x41,
-  0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xb1, 0xa3, 0x81, 0x0e, 0x37, 0x04, 0x39,
-  0x1a, 0x80, 0xc1, 0x2c, 0xc3, 0x7a, 0xb0, 0x47, 0x60, 0xb8, 0x19, 0x0c,
-  0xf1, 0x99, 0x25, 0x68, 0x0f, 0x23, 0x7a, 0x33, 0x80, 0xcf, 0x2c, 0x41,
-  0x7b, 0x0c, 0xb4, 0x3c, 0xda, 0x79, 0x60, 0xe8, 0x41, 0xac, 0x87, 0xc0,
-  0x1e, 0x70, 0x97, 0x1e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0xb7, 0x3d, 0x75,
-  0xa9, 0x19, 0x0c, 0x73, 0xba, 0x18, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c,
-  0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xdb, 0x9a, 0x06, 0x38, 0x1a,
-  0xd4, 0x67, 0x60, 0xa6, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1,
-  0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0x01, 0x26, 0xa7, 0xc1, 0x8f, 0x06, 0x09, 0x11, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x36, 0xa7, 0x01, 0x98, 0x06, 0x09,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x46, 0xa7, 0x41, 0x98,
-  0x06, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0xb2, 0xa7,
-  0x01, 0x98, 0x06, 0xff, 0x19, 0x04, 0x6d, 0x1a, 0xc0, 0x68, 0xf0, 0xa6,
-  0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd4, 0x2c, 0x81, 0x7b, 0x0c, 0xb4,
-  0x3c, 0xa6, 0x91, 0x1b, 0x60, 0x2c, 0xe0, 0x06, 0x4b, 0xec, 0x86, 0xd0,
-  0x1e, 0x60, 0x2c, 0xf0, 0xc6, 0x2c, 0xc3, 0x7b, 0xc4, 0xc7, 0xfa, 0x0c,
-  0x47, 0xbc, 0x4f, 0x8c, 0x06, 0xc3, 0x77, 0xf0, 0x33, 0xcc, 0x70, 0x43,
-  0xc0, 0x9f, 0x01, 0x19, 0xd4, 0x10, 0xe8, 0x70, 0x84, 0xfc, 0xd4, 0x68,
-  0x30, 0x7c, 0x15, 0x08, 0x7a, 0xf4, 0x33, 0xcc, 0x70, 0x43, 0xf0, 0x9f,
-  0x01, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0x03, 0x7c, 0x94, 0x48, 0x70, 0xe3,
-  0x19, 0x0c, 0x73, 0xf4, 0x18, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0xdb, 0xa8, 0x06, 0x70, 0x1a, 0xb4, 0x68, 0xe0, 0xa7, 0xc1, 0x68,
-  0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10,
-  0x43, 0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xa6, 0xaa,
-  0xc1, 0x9d, 0x06, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01,
-  0xb6, 0xaa, 0x01, 0x9e, 0x06, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0x01, 0xc6, 0xaa, 0x41, 0x9e, 0x06, 0x12, 0x11, 0x8c, 0x18, 0x28,
-  0x00, 0x08, 0x82, 0xc1, 0x32, 0xab, 0x01, 0x9e, 0x06, 0x37, 0x1a, 0x04,
-  0xa5, 0x1a, 0xa0, 0x69, 0x70, 0xaa, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0,
-  0xd4, 0x2c, 0x41, 0x89, 0x0c, 0x37, 0x8c, 0xd0, 0xaa, 0x06, 0x60, 0x30,
-  0xcb, 0x20, 0x1f, 0xf3, 0x11, 0x94, 0x8c, 0x06, 0x7b, 0x1a, 0xc0, 0x05,
-  0x4f, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x55, 0xab, 0x01, 0x9f,
-  0x06, 0x29, 0xb4, 0xa6, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50,
-  0xb6, 0x1a, 0xf0, 0x69, 0x10, 0x08, 0x17, 0x0c, 0x53, 0x35, 0x1a, 0x80,
-  0x6a, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x94,
-  0xae, 0x06, 0xa1, 0x1a, 0xb4, 0x10, 0x9c, 0x06, 0x23, 0x06, 0x07, 0x00,
-  0x82, 0x60, 0x40, 0xed, 0x6a, 0x10, 0xaa, 0x41, 0x20, 0x5c, 0x30, 0xcc,
-  0x05, 0x4f, 0xdd, 0xf1, 0xd4, 0xf9, 0x67, 0x30, 0xcc, 0xbd, 0x64, 0x30,
-  0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
-  0x6c, 0xe0, 0x1a, 0xb4, 0x6a, 0xa0, 0xa6, 0xc1, 0xae, 0x06, 0xa3, 0x09,
-  0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c,
-  0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd8, 0xb9, 0x06,
-  0xb4, 0x1a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x18,
-  0xba, 0x06, 0xb5, 0x1a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x58, 0xba, 0x06, 0xb6, 0x1a, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00,
-  0x20, 0x08, 0x06, 0x0b, 0xbc, 0x06, 0xb5, 0x1a, 0xd0, 0x69, 0x10, 0x88,
-  0x6b, 0x50, 0xaa, 0x01, 0xb9, 0x06, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x53,
-  0xb3, 0x04, 0x25, 0x32, 0xdc, 0x00, 0x46, 0xe7, 0x1a, 0x80, 0xc1, 0x2c,
-  0x03, 0x7d, 0x94, 0x48, 0x60, 0x66, 0x1a, 0xa0, 0x69, 0x10, 0x9f, 0xe1,
-  0x88, 0x32, 0x4a, 0xd3, 0x80, 0xf8, 0x66, 0x19, 0xea, 0x03, 0x3f, 0x02,
-  0x53, 0xd3, 0xc0, 0x8c, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9,
-  0xe0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x78, 0x0d,
-  0x74, 0xb8, 0x21, 0x78, 0xd7, 0x00, 0x0c, 0x66, 0x19, 0xec, 0xe3, 0x3e,
-  0x02, 0x1b, 0xe4, 0x34, 0x80, 0xcf, 0x2c, 0x01, 0x7f, 0x58, 0x9c, 0x06,
-  0x44, 0x7c, 0x66, 0x09, 0xf8, 0x63, 0x38, 0x02, 0x8e, 0xe4, 0x34, 0x10,
-  0xbe, 0x59, 0x86, 0xfc, 0xe0, 0x8f, 0xc0, 0xe2, 0x68, 0x4e, 0x83, 0xf8,
-  0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x82, 0x48, 0x3e,
-  0x56, 0x04, 0xf1, 0x29, 0x82, 0x5f, 0x03, 0x1d, 0x6e, 0x08, 0xf4, 0x35,
-  0x00, 0x83, 0x59, 0x06, 0xfd, 0xd8, 0x8f, 0xc0, 0xf6, 0x34, 0x18, 0xe2,
-  0x33, 0x4b, 0xc0, 0x1f, 0x46, 0xf8, 0x69, 0x00, 0x9f, 0x59, 0x02, 0xfe,
-  0x18, 0x68, 0x79, 0x34, 0xfb, 0xc0, 0xee, 0x83, 0xd0, 0x0f, 0x61, 0x3f,
-  0xec, 0x31, 0xc0, 0x8f, 0x0b, 0x86, 0xb1, 0x3e, 0x0d, 0x42, 0x35, 0x88,
-  0xcf, 0x70, 0x44, 0x1f, 0x89, 0x6a, 0x40, 0x7c, 0xb3, 0x0c, 0xfd, 0x01,
-  0x22, 0x81, 0x8d, 0x6a, 0xe0, 0x47, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05,
-  0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84,
-  0xca, 0x06, 0x3a, 0xdc, 0x10, 0xa0, 0x6c, 0x00, 0x06, 0xb3, 0x0c, 0xfe,
-  0xf1, 0x1f, 0x81, 0x0d, 0xab, 0x1a, 0xc0, 0x67, 0x96, 0x80, 0x44, 0x0c,
-  0x55, 0x03, 0x22, 0x3e, 0xb3, 0x04, 0x24, 0x32, 0x1c, 0x81, 0x4a, 0xa9,
-  0x1a, 0x08, 0xdf, 0x2c, 0x43, 0x88, 0x90, 0x48, 0x60, 0xa9, 0xa4, 0xaa,
-  0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41,
-  0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x51, 0xb3, 0x81, 0x0e, 0x37, 0x04,
-  0x33, 0x1b, 0x80, 0xc1, 0x2c, 0x83, 0x88, 0x8c, 0x48, 0x60, 0xb2, 0x1a,
-  0x0c, 0xf1, 0x99, 0x25, 0x20, 0x11, 0x23, 0x6e, 0x35, 0x80, 0xcf, 0x2c,
-  0x01, 0x89, 0x0c, 0xb4, 0x3c, 0x9a, 0x7f, 0x60, 0xff, 0x41, 0x88, 0x88,
-  0x30, 0x22, 0x66, 0x19, 0x80, 0xc8, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x6d,
-  0x4f, 0xdd, 0xa8, 0x06, 0xc3, 0x1c, 0x6d, 0x06, 0xc3, 0x1c, 0x31, 0xcc,
-  0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x56, 0xb6, 0x81,
-  0xcc, 0x06, 0xef, 0x1a, 0x80, 0x6d, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09,
-  0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xb1, 0x6d, 0x90, 0xb3, 0x41, 0x42,
-  0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xb5, 0x6d, 0xa0, 0xb3,
-  0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xb9, 0x6d,
-  0xb0, 0xb3, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0,
-  0xd4, 0x6d, 0xa0, 0xb3, 0x41, 0xbe, 0x06, 0xc1, 0xd9, 0x06, 0x2a, 0x1b,
-  0xa4, 0x6d, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x35, 0x4b, 0x50, 0x22,
-  0x03, 0x2d, 0x8f, 0x69, 0xc0, 0x07, 0x8d, 0x0b, 0xef, 0xc1, 0x12, 0xf2,
-  0x21, 0x90, 0x08, 0x8d, 0x0b, 0xf3, 0x31, 0xcb, 0x60, 0x22, 0x28, 0x52,
-  0x4e, 0xc3, 0x11, 0xea, 0xb4, 0xb2, 0xc1, 0xf0, 0xdd, 0x3a, 0x0d, 0x33,
-  0xdc, 0x10, 0xd8, 0x6b, 0x40, 0x06, 0x35, 0x04, 0x3a, 0x1c, 0xc1, 0x4e,
-  0x2f, 0x1b, 0x0c, 0x5f, 0x05, 0x82, 0x9e, 0x3b, 0x0d, 0x33, 0xdc, 0x10,
-  0xe4, 0x6b, 0x40, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0x70, 0x22, 0x3c, 0x12,
-  0x5c, 0xaf, 0x06, 0xc3, 0x9c, 0x7b, 0x06, 0xc3, 0x8c, 0x18, 0x1c, 0x00,
-  0x08, 0x82, 0xc1, 0xd6, 0xb7, 0x81, 0xda, 0x06, 0x27, 0x1b, 0xe0, 0x6d,
-  0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3,
-  0x09, 0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80,
-  0x91, 0x6e, 0x10, 0xb7, 0xc1, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0x80, 0x95, 0x6e, 0x20, 0xb7, 0x01, 0x43, 0x04, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0x80, 0x99, 0x6e, 0x30, 0xb7, 0x81, 0x44, 0x04, 0x23,
-  0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xb4, 0x6e, 0x20, 0xb7, 0x41, 0xcc,
-  0x06, 0xc1, 0xdf, 0x06, 0x62, 0x1b, 0x84, 0x6e, 0x30, 0x9a, 0x10, 0x00,
-  0x17, 0x3c, 0x35, 0x4b, 0xc0, 0x23, 0xc3, 0x0d, 0xfd, 0x54, 0xba, 0x01,
-  0x18, 0xcc, 0x32, 0xa4, 0x88, 0x8a, 0x04, 0xc5, 0xb2, 0x41, 0xdd, 0x06,
-  0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xbd, 0x6e,
-  0x60, 0xb7, 0x01, 0x49, 0x95, 0x6d, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x14, 0xec, 0x06, 0x76, 0x1b, 0x04, 0xc2, 0x05, 0xc3, 0xd4, 0xcb,
-  0x06, 0x7a, 0x1b, 0xc0, 0x05, 0x4f, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82,
-  0x01, 0x45, 0xbb, 0xc1, 0xde, 0x06, 0x27, 0xa5, 0xb6, 0xc1, 0x88, 0xc1,
-  0x01, 0x80, 0x20, 0x18, 0x50, 0xb5, 0x1b, 0xec, 0x6d, 0x10, 0x08, 0x17,
-  0x0c, 0x73, 0xc1, 0x53, 0x77, 0x3c, 0x75, 0xf8, 0x1a, 0x0c, 0x73, 0x29,
-  0x1a, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0x9b, 0xee, 0x06, 0xa7, 0x1b, 0x90, 0x6d, 0x50, 0xbb, 0xc1,
-  0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26,
-  0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x16,
-  0xbe, 0x81, 0xeb, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0x01, 0x26, 0xbe, 0xc1, 0xeb, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0x01, 0x36, 0xbe, 0x01, 0xec, 0x06, 0x09, 0x11, 0x8c, 0x18,
-  0x28, 0x00, 0x08, 0x82, 0xc1, 0xa2, 0xbe, 0xc1, 0xeb, 0x06, 0x6e, 0x1b,
-  0x04, 0xbc, 0x1b, 0xfc, 0x6d, 0xe0, 0xbb, 0xc1, 0x68, 0x42, 0x00, 0x5c,
-  0xf0, 0xd4, 0x2c, 0x01, 0x8f, 0x0c, 0x37, 0xe8, 0x54, 0xf8, 0x06, 0x60,
-  0x30, 0xcb, 0xb0, 0x22, 0x3c, 0x12, 0x18, 0xd8, 0x06, 0x62, 0x1b, 0xc4,
-  0x67, 0x38, 0x02, 0xac, 0xc6, 0x36, 0x20, 0xbe, 0x59, 0x06, 0x16, 0x79,
-  0x91, 0xc0, 0xc8, 0x36, 0x08, 0xab, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82,
-  0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x62,
-  0x7d, 0x03, 0x1d, 0x6e, 0x08, 0xd2, 0x37, 0x00, 0x83, 0x59, 0x86, 0x16,
-  0x71, 0x91, 0xc0, 0x06, 0xb6, 0x0d, 0xe0, 0x33, 0x4b, 0x30, 0x23, 0xb6,
-  0xb6, 0x01, 0x11, 0x9f, 0x59, 0x82, 0x19, 0x19, 0x8e, 0x58, 0x2b, 0xb6,
-  0x0d, 0x84, 0x6f, 0x96, 0x01, 0x46, 0x66, 0x24, 0x30, 0xb6, 0x6a, 0xdb,
-  0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20,
-  0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xb0, 0xdf, 0x40, 0x87, 0x1b, 0x02,
-  0xfa, 0x0d, 0xc0, 0x60, 0x96, 0x21, 0x46, 0x64, 0x24, 0xb0, 0xba, 0x0d,
-  0x86, 0xf8, 0xcc, 0x12, 0xcc, 0x88, 0x11, 0x78, 0x1b, 0xc0, 0x67, 0x96,
-  0x60, 0x46, 0x06, 0x5a, 0x1e, 0xad, 0x45, 0x30, 0x17, 0x21, 0x62, 0x44,
-  0x90, 0x11, 0x76, 0x0d, 0x5e, 0xe4, 0x82, 0x61, 0xec, 0x6e, 0x83, 0xbd,
-  0x0d, 0xe2, 0x33, 0x1c, 0x71, 0x57, 0x7c, 0x1b, 0x10, 0xdf, 0x2c, 0x03,
-  0x8d, 0xdc, 0x48, 0x60, 0x7d, 0x1b, 0xe0, 0x55, 0x7c, 0x2c, 0x18, 0xe8,
-  0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8,
-  0x14, 0x41, 0xc2, 0x81, 0x0e, 0x37, 0x04, 0x22, 0x1c, 0x80, 0xc1, 0x2c,
-  0x43, 0x8d, 0xd8, 0x48, 0x60, 0x43, 0xe9, 0x06, 0xf0, 0x99, 0x25, 0xd8,
-  0x11, 0x13, 0xdd, 0x80, 0x88, 0xcf, 0x2c, 0xc1, 0x8e, 0x0c, 0x47, 0x88,
-  0xd6, 0xe8, 0x06, 0xc2, 0x37, 0xcb, 0x80, 0x23, 0x3b, 0x12, 0xd8, 0x68,
-  0x91, 0x6e, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f,
-  0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xbc, 0x70, 0xa0, 0xc3,
-  0x0d, 0x41, 0x0b, 0x07, 0x60, 0x30, 0xcb, 0x90, 0x23, 0x3a, 0x12, 0x18,
-  0xeb, 0x06, 0x43, 0x7c, 0x66, 0x09, 0x76, 0xc4, 0x88, 0xd8, 0x0d, 0xe0,
-  0x33, 0x4b, 0xb0, 0x23, 0x03, 0x2d, 0x8f, 0x56, 0x23, 0x98, 0x8d, 0x10,
-  0x39, 0x22, 0xe8, 0x88, 0xce, 0x06, 0x37, 0x72, 0xc1, 0x30, 0x17, 0x3c,
-  0x75, 0xdb, 0x53, 0xd7, 0xb7, 0xc1, 0x30, 0xe7, 0xaa, 0xc1, 0x30, 0x47,
-  0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xfd,
-  0x70, 0xc0, 0xc2, 0x41, 0xfa, 0x06, 0x3a, 0x1c, 0x8c, 0x26, 0x04, 0xc0,
-  0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91,
-  0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0x66, 0x1c, 0xcc, 0x70,
-  0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0x67, 0x1c,
-  0xd0, 0x70, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60,
-  0x68, 0x1c, 0xd4, 0x70, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20,
-  0x18, 0x2c, 0x6f, 0x1c, 0xd0, 0x70, 0x30, 0xbf, 0x41, 0x10, 0xc6, 0x01,
-  0x09, 0x07, 0x63, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x4f, 0xcd, 0x12,
-  0xf0, 0xc8, 0x40, 0xcb, 0x63, 0x1a, 0x27, 0x82, 0x82, 0x83, 0x89, 0xb0,
-  0x44, 0x8a, 0x08, 0x3b, 0x82, 0x82, 0x83, 0x8a, 0xcc, 0x32, 0xf4, 0xc8,
-  0x8f, 0xfc, 0xd6, 0x70, 0x84, 0xfa, 0x94, 0x70, 0x30, 0x7c, 0xb7, 0x3e,
-  0xc3, 0x0c, 0x37, 0x04, 0xf0, 0x1b, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47,
-  0x98, 0x57, 0x0a, 0x07, 0xc3, 0x57, 0x81, 0xa0, 0x87, 0x5e, 0xc3, 0x0c,
-  0x37, 0x04, 0xf3, 0x1b, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xf8, 0xc8,
-  0x9c, 0x04, 0x77, 0xbb, 0xc1, 0x30, 0x87, 0xae, 0xc1, 0x30, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0xb0, 0xdd, 0x71, 0x40, 0xc6, 0x41, 0x08, 0x07,
-  0x72, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c,
-  0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x60, 0x7e, 0x1c, 0xac, 0x71, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x60, 0x7f, 0x1c, 0xb0, 0x71, 0xc0, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0xa0, 0x1c, 0xb4, 0x71, 0x20, 0x11,
-  0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0xa7, 0x1c, 0xb0, 0x71,
-  0xb0, 0xc2, 0x41, 0x90, 0xc7, 0x01, 0x0f, 0x07, 0x7b, 0x1c, 0x8c, 0x26,
-  0x04, 0xc0, 0x05, 0x4f, 0xcd, 0x12, 0xcc, 0xc9, 0x70, 0xc3, 0x7d, 0xfd,
-  0x71, 0x00, 0x06, 0xb3, 0x0c, 0x60, 0x12, 0x26, 0x41, 0x99, 0x70, 0xf0,
-  0xc6, 0x01, 0x5c, 0xf0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50,
-  0xa9, 0x1c, 0xc0, 0x71, 0x40, 0x42, 0x3f, 0x1c, 0x8c, 0x18, 0x1c, 0x00,
-  0x08, 0x82, 0x01, 0xa5, 0xca, 0x01, 0x1c, 0x07, 0x81, 0x70, 0xc1, 0x30,
-  0x95, 0xc2, 0x01, 0x1d, 0x07, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00,
-  0x82, 0x60, 0x40, 0xb9, 0x72, 0x50, 0xc7, 0x41, 0x88, 0x91, 0x71, 0x30,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xd4, 0x2b, 0x07, 0x75, 0x1c, 0x04,
-  0xc2, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x1d, 0x4f, 0x9d, 0xfc, 0x06, 0xc3,
-  0xdc, 0xc8, 0x06, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0xc1, 0x46, 0xcb, 0x41, 0x28, 0x07, 0x3e, 0x1c, 0xbc,
-  0x72, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08,
-  0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0x80, 0xed, 0x72, 0x80, 0xca, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0x80, 0xf1, 0x72, 0x90, 0xca, 0x41, 0x42, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x80, 0xf5, 0x72, 0xa0, 0xca, 0x41, 0x42, 0x04,
-  0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0x90, 0x73, 0x90, 0xca, 0x01,
-  0x1a, 0x07, 0x81, 0x2d, 0x07, 0x79, 0x1c, 0xe0, 0x72, 0x30, 0x9a, 0x10,
-  0x00, 0x17, 0x3c, 0x35, 0x4b, 0x30, 0x27, 0xc3, 0x0d, 0x34, 0xb6, 0xcb,
-  0x01, 0x18, 0xcc, 0x32, 0x88, 0xc9, 0x9c, 0x04, 0xa6, 0xc3, 0x01, 0x0f,
-  0x07, 0xf1, 0x19, 0x8e, 0x00, 0xa3, 0x1e, 0x0e, 0x88, 0x6f, 0x96, 0x61,
-  0x4c, 0xcc, 0x24, 0x30, 0x1f, 0x0e, 0xc2, 0x28, 0x3e, 0x16, 0x0c, 0xf4,
-  0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c,
-  0x8a, 0x28, 0xe7, 0x40, 0x87, 0x1b, 0x82, 0x71, 0x0e, 0xc0, 0x60, 0x96,
-  0x81, 0x4c, 0xca, 0x24, 0xb0, 0xc1, 0x8c, 0x03, 0xf8, 0xcc, 0x12, 0xa8,
-  0x89, 0x95, 0x71, 0x40, 0xc4, 0x67, 0x96, 0x40, 0x4d, 0x86, 0x23, 0xd6,
-  0xc8, 0x8c, 0x03, 0xe1, 0x9b, 0x65, 0x38, 0x13, 0x35, 0x09, 0x8c, 0x8d,
-  0xce, 0x38, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7,
-  0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xe0, 0x39, 0xd0, 0xe1,
-  0x86, 0xc0, 0x9d, 0x03, 0x30, 0x98, 0x65, 0x40, 0x93, 0x34, 0x09, 0xec,
-  0x8d, 0x83, 0x21, 0x3e, 0xb3, 0x04, 0x6a, 0x62, 0x84, 0x1c, 0x07, 0xf0,
-  0x99, 0x25, 0x50, 0x93, 0x81, 0x96, 0x47, 0x23, 0x13, 0xac, 0x4c, 0x08,
-  0x34, 0x11, 0xd2, 0x84, 0x1d, 0x03, 0x33, 0xb9, 0x60, 0x18, 0x8b, 0xe3,
-  0xa0, 0x8e, 0x83, 0xf8, 0x0c, 0x47, 0xc4, 0x99, 0x1d, 0x07, 0xc4, 0x37,
-  0xcb, 0xb0, 0x26, 0x6e, 0x12, 0xd8, 0x1d, 0x07, 0x72, 0x16, 0x1f, 0x0b,
-  0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a,
-  0x20, 0x3e, 0x45, 0xf8, 0x73, 0xa0, 0xc3, 0x0d, 0x01, 0x3f, 0x07, 0x60,
-  0x30, 0xcb, 0xc0, 0x26, 0x6d, 0x12, 0xd8, 0xf0, 0xc7, 0x01, 0x7c, 0x66,
-  0x09, 0xe4, 0xc4, 0xf8, 0x38, 0x20, 0xe2, 0x33, 0x4b, 0x20, 0x27, 0xc3,
-  0x11, 0x7c, 0xd6, 0xc7, 0x81, 0xf0, 0xcd, 0x32, 0xbc, 0x89, 0x9c, 0x04,
-  0xd6, 0x67, 0x7e, 0x1c, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73,
-  0xc1, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x29, 0x1d,
-  0xe8, 0x70, 0x43, 0x70, 0xd2, 0x01, 0x18, 0xcc, 0x32, 0xc0, 0x49, 0x9c,
-  0x04, 0x66, 0xca, 0xc1, 0x10, 0x9f, 0x59, 0x02, 0x39, 0x31, 0x62, 0x95,
-  0x03, 0xf8, 0xcc, 0x12, 0xc8, 0xc9, 0x40, 0xcb, 0xa3, 0xb1, 0x09, 0xd6,
-  0x26, 0x04, 0x9c, 0x08, 0x71, 0xe2, 0xd2, 0x81, 0x9b, 0x5c, 0x30, 0xcc,
-  0x05, 0x4f, 0xdd, 0xf6, 0xd4, 0xdd, 0x71, 0x30, 0xcc, 0xa1, 0x6e, 0x30,
-  0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
-  0x6c, 0x39, 0x1d, 0x98, 0x74, 0x30, 0xce, 0x01, 0x4d, 0x07, 0xa3, 0x09,
-  0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c,
-  0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x18, 0x58, 0x07,
-  0x2d, 0x1d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x58,
-  0x58, 0x07, 0x2e, 0x1d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x98, 0x58, 0x07, 0x2f, 0x1d, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00,
-  0x20, 0x08, 0x06, 0x4b, 0x5a, 0x07, 0x2e, 0x1d, 0xb4, 0x73, 0x10, 0xec,
-  0x74, 0xe0, 0xcf, 0x41, 0x4f, 0x07, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x53,
-  0xb3, 0x04, 0x73, 0x32, 0xd0, 0xf2, 0x98, 0x86, 0x8f, 0xf0, 0xe7, 0xd0,
-  0x23, 0x2c, 0x01, 0x26, 0x82, 0x9c, 0xf0, 0xe7, 0x10, 0x26, 0xb3, 0x0c,
-  0x74, 0x62, 0x27, 0xb9, 0x36, 0x1c, 0x91, 0x3e, 0xff, 0x1c, 0x0c, 0xdf,
-  0xa9, 0xcf, 0x30, 0xc3, 0x0d, 0x81, 0x3a, 0x07, 0x64, 0x50, 0x43, 0xa0,
-  0xc3, 0x11, 0xe0, 0x36, 0xd2, 0xc1, 0xf0, 0x55, 0x20, 0xe8, 0x89, 0xdb,
-  0x30, 0xc3, 0x0d, 0x41, 0x3b, 0x07, 0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c,
-  0x75, 0xa2, 0x2a, 0xc1, 0xc5, 0x72, 0x30, 0xcc, 0x89, 0x6f, 0x30, 0xcc,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x6c, 0x71, 0x1d, 0xf8, 0x74, 0xb0,
-  0xcf, 0x01, 0x5b, 0x07, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3,
-  0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x18, 0x5e, 0x07, 0x65, 0x1d, 0x1c, 0x44, 0x30, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x58, 0x5e, 0x07, 0x66, 0x1d, 0x30, 0x44,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x98, 0x5e, 0x07, 0x67, 0x1d,
-  0x48, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x4b, 0x68, 0x07,
-  0x66, 0x1d, 0x94, 0x74, 0x10, 0xcc, 0x75, 0x60, 0xd3, 0x41, 0x5d, 0x07,
-  0xa3, 0x09, 0x01, 0x70, 0xc1, 0x53, 0xb3, 0x04, 0xaa, 0x32, 0xdc, 0x10,
-  0x6f, 0x79, 0x1d, 0x80, 0xc1, 0x2c, 0xc3, 0x9d, 0xe0, 0x49, 0x50, 0x20,
-  0x1d, 0xa4, 0x75, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0xd4, 0x68, 0x07, 0x6a, 0x1d, 0x8c, 0x50, 0x4e, 0x07, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x40, 0x91, 0x76, 0xa0, 0xd6, 0x41, 0x20, 0x5c,
-  0x30, 0x4c, 0x8d, 0x74, 0xe0, 0xd6, 0x01, 0x5c, 0xf0, 0xd4, 0x88, 0xc1,
-  0x01, 0x80, 0x20, 0x18, 0x50, 0xa8, 0x1d, 0xbc, 0x75, 0xb0, 0x6f, 0x3e,
-  0x1d, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x95, 0xda, 0xc1, 0x5b,
-  0x07, 0x81, 0x70, 0xc1, 0x30, 0x17, 0x3c, 0x75, 0xc7, 0x53, 0xc7, 0xce,
-  0xc1, 0x30, 0xd7, 0xbf, 0xc1, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xb9, 0x76, 0xb0, 0xd7, 0x01, 0x4e,
-  0x07, 0xa9, 0x1d, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26,
-  0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x60, 0xb5, 0x1d, 0x88, 0x76, 0x90, 0x10, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x60, 0xb6, 0x1d, 0x8c, 0x76, 0x90, 0x10, 0xc1,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0xb7, 0x1d, 0x90, 0x76, 0x90,
-  0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0xbe, 0x1d, 0x8c,
-  0x76, 0x20, 0xd6, 0x41, 0x00, 0xdb, 0xc1, 0x5c, 0x07, 0xb2, 0x1d, 0x8c,
-  0x26, 0x04, 0xc0, 0x05, 0x4f, 0xcd, 0x12, 0xa8, 0xca, 0x70, 0x83, 0xcb,
-  0xd5, 0x76, 0x00, 0x06, 0xb3, 0x0c, 0x79, 0xa2, 0x2a, 0x81, 0xd1, 0x74,
-  0x60, 0xd3, 0x41, 0x7c, 0x86, 0x23, 0x7e, 0xe8, 0xa6, 0x03, 0xe2, 0x9b,
-  0x65, 0xd0, 0x93, 0x3e, 0x09, 0x0c, 0xa7, 0x03, 0x30, 0x8a, 0x8f, 0x05,
-  0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45,
-  0x10, 0x9f, 0x22, 0x7e, 0x3b, 0xd0, 0xe1, 0x86, 0xa0, 0xb7, 0x03, 0x30,
-  0x98, 0x65, 0xd8, 0x13, 0x3e, 0x09, 0x6c, 0x00, 0xeb, 0x00, 0x3e, 0xb3,
-  0x04, 0xa1, 0x62, 0x3f, 0x1d, 0x10, 0xf1, 0x99, 0x25, 0x08, 0x95, 0xe1,
-  0x08, 0x35, 0x02, 0xeb, 0x40, 0xf8, 0x66, 0x19, 0xfc, 0x24, 0x54, 0x02,
-  0x5b, 0xa3, 0xb0, 0x0e, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9,
-  0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0xf5, 0x0e,
-  0x74, 0xb8, 0x21, 0x40, 0xef, 0x00, 0x0c, 0x66, 0x19, 0xfe, 0x04, 0x54,
-  0x02, 0x4b, 0xeb, 0x60, 0x88, 0xcf, 0x2c, 0x41, 0xa8, 0x18, 0xc1, 0xd6,
-  0x01, 0x7c, 0x66, 0x09, 0x42, 0x65, 0xa0, 0xe5, 0xd1, 0xf6, 0x04, 0xe3,
-  0x13, 0xe2, 0x4f, 0x04, 0x50, 0x51, 0xc7, 0xa0, 0x4f, 0x2e, 0x18, 0xc6,
-  0xd6, 0x3a, 0x78, 0xeb, 0x20, 0x3e, 0xc3, 0x11, 0x6b, 0x07, 0xd7, 0x01,
-  0xf1, 0xcd, 0x32, 0x88, 0x4a, 0xa9, 0x04, 0x16, 0xd7, 0x01, 0xdb, 0xc5,
-  0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2,
-  0xb1, 0x22, 0x88, 0x4f, 0x11, 0xf8, 0x1d, 0xe8, 0x70, 0x43, 0x60, 0xdf,
-  0x01, 0x18, 0xcc, 0x32, 0x8c, 0x0a, 0xa9, 0x04, 0x36, 0xe4, 0x75, 0x00,
-  0x9f, 0x59, 0x82, 0x54, 0x31, 0xbb, 0x0e, 0x88, 0xf8, 0xcc, 0x12, 0xa4,
-  0xca, 0x70, 0x84, 0xdd, 0xdd, 0x75, 0x20, 0x7c, 0xb3, 0x0c, 0xa6, 0x92,
-  0x2a, 0x81, 0xdd, 0x1d, 0x5e, 0x07, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05,
-  0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4,
-  0x88, 0x07, 0x3a, 0xdc, 0x10, 0x84, 0x78, 0x00, 0x06, 0xb3, 0x0c, 0xa7,
-  0x82, 0x2a, 0x81, 0x81, 0x76, 0x30, 0xc4, 0x67, 0x96, 0x20, 0x55, 0x8c,
-  0x28, 0xed, 0x00, 0x3e, 0xb3, 0x04, 0xa9, 0x32, 0xd0, 0xf2, 0x68, 0xa3,
-  0x82, 0x91, 0x0a, 0x71, 0x2a, 0x02, 0xaa, 0x88, 0x7c, 0x50, 0x2a, 0x17,
-  0x0c, 0x73, 0xc1, 0x53, 0xb7, 0x3d, 0x75, 0x71, 0x1d, 0x0c, 0x73, 0xa2,
-  0x1c, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0xdb, 0x8c, 0x07, 0x20, 0x1e, 0xf4, 0x76, 0xe0, 0xe2, 0xc1,
-  0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26,
-  0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xa6,
-  0xe3, 0xc1, 0x89, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0x01, 0xb6, 0xe3, 0x01, 0x8a, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0x01, 0xc6, 0xe3, 0x41, 0x8a, 0x07, 0x09, 0x11, 0x8c, 0x18,
-  0x28, 0x00, 0x08, 0x82, 0xc1, 0x32, 0xe6, 0x01, 0x8a, 0x07, 0xe7, 0x1d,
-  0x04, 0x35, 0x1e, 0xe0, 0x77, 0x70, 0xe3, 0xc1, 0x68, 0x42, 0x00, 0x5c,
-  0xf0, 0xd4, 0x2c, 0x81, 0xaa, 0x0c, 0xb4, 0x3c, 0xa6, 0x51, 0x27, 0xf0,
-  0x3b, 0xd0, 0x09, 0x4b, 0xdc, 0x89, 0x90, 0x2a, 0xf0, 0x3b, 0xe0, 0x89,
-  0xd9, 0x1e, 0x7e, 0x07, 0xf0, 0x99, 0x65, 0x58, 0x95, 0x56, 0xa1, 0xbd,
-  0xe1, 0x08, 0xdc, 0xd3, 0xef, 0x60, 0xf8, 0x2e, 0xf7, 0x86, 0x19, 0x6e,
-  0x08, 0xca, 0x3b, 0x20, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0x28, 0xfc, 0x3b,
-  0x18, 0xbe, 0x0a, 0x04, 0xbd, 0x63, 0x98, 0xe1, 0x86, 0x00, 0xbd, 0x03,
-  0x32, 0xa8, 0x60, 0xd0, 0x59, 0x06, 0x56, 0x09, 0x97, 0xe0, 0x58, 0x3b,
-  0x18, 0xe6, 0x7a, 0x39, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0x36, 0x36, 0x0f, 0x72, 0x3c, 0xb0, 0xef, 0xe0, 0xcc, 0x83, 0xd1, 0x84,
-  0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86,
-  0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x6c, 0xce, 0x03,
-  0x30, 0x0f, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x8c,
-  0xce, 0x83, 0x30, 0x0f, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0xac, 0xce, 0x03, 0x31, 0x0f, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00,
-  0x10, 0x04, 0x83, 0x85, 0xcf, 0x83, 0x30, 0x0f, 0x40, 0x3c, 0x08, 0xdc,
-  0x3c, 0x88, 0xf1, 0x00, 0xce, 0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9,
-  0x59, 0x82, 0x70, 0x19, 0x6e, 0x60, 0x3f, 0x3a, 0x0f, 0xc0, 0x60, 0x96,
-  0xc1, 0x55, 0x5e, 0x25, 0xa8, 0xfd, 0x0e, 0xc8, 0x3c, 0x80, 0x0b, 0x9e,
-  0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xca, 0xcf, 0x83, 0x32, 0x0f,
-  0xe4, 0x8f, 0xc6, 0x83, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xfe,
-  0x3c, 0x28, 0xf3, 0x20, 0x10, 0x2e, 0x18, 0xa6, 0xfc, 0x3b, 0x48, 0xf3,
-  0x00, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0x51,
-  0x0f, 0xd4, 0x3c, 0x00, 0x83, 0x1c, 0x0f, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x80, 0x22, 0xf5, 0x40, 0xcd, 0x83, 0x40, 0xb8, 0x60, 0x98, 0x0b,
-  0x9e, 0xba, 0xe3, 0xa9, 0x3b, 0xef, 0x60, 0x98, 0xc3, 0xe7, 0x60, 0x98,
-  0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xd8,
-  0x52, 0x3d, 0xb0, 0xf3, 0x60, 0xc6, 0x03, 0x52, 0x0f, 0x46, 0x13, 0x02,
-  0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a,
-  0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x30, 0x58, 0x0f, 0xfa,
-  0x3c, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb0, 0x58,
-  0x0f, 0xfc, 0x3c, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
-  0x30, 0x59, 0x0f, 0xfe, 0x3c, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40,
-  0x10, 0x0c, 0x96, 0x5c, 0x0f, 0xfc, 0x3c, 0xe8, 0xf1, 0x20, 0x58, 0xf5,
-  0xc0, 0xcd, 0x83, 0x56, 0x0f, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xa7, 0x66,
-  0x09, 0xc2, 0x65, 0xb8, 0x21, 0x05, 0x03, 0x58, 0x0f, 0xc0, 0x60, 0x96,
-  0x01, 0x56, 0xc2, 0x25, 0xb0, 0x17, 0x0f, 0x62, 0x3c, 0x88, 0xcf, 0x70,
-  0x84, 0x0b, 0x06, 0x32, 0x1e, 0x10, 0xdf, 0x2c, 0x43, 0xac, 0xd0, 0x4a,
-  0x60, 0x33, 0x1e, 0xbc, 0x60, 0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30,
-  0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xe8,
-  0x7a, 0xa0, 0xc3, 0x0d, 0x01, 0xae, 0x07, 0x60, 0x30, 0xcb, 0x20, 0x2b,
-  0xb3, 0x12, 0xd8, 0xb0, 0xe3, 0x01, 0x7c, 0x66, 0x09, 0x70, 0xc5, 0x74,
-  0x3c, 0x20, 0xe2, 0x33, 0x4b, 0x80, 0x2b, 0xc3, 0x11, 0x39, 0x18, 0xec,
-  0x78, 0x20, 0x7c, 0xb3, 0x0c, 0xb5, 0x82, 0x2b, 0x81, 0xe9, 0x60, 0xc0,
-  0xe3, 0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65,
-  0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x51, 0xee, 0x81, 0x0e, 0x37,
-  0x04, 0xe3, 0x1e, 0x80, 0xc1, 0x2c, 0x83, 0xad, 0xdc, 0x4a, 0x60, 0x64,
-  0x1e, 0x0c, 0xf1, 0x99, 0x25, 0xc0, 0x15, 0x23, 0xce, 0x3c, 0x80, 0xcf,
-  0x2c, 0x01, 0xae, 0x0c, 0xb4, 0x3c, 0x9a, 0xac, 0x60, 0xb3, 0x42, 0xd8,
-  0x8a, 0x70, 0x2b, 0xa8, 0x28, 0xd0, 0xca, 0x05, 0xc3, 0x98, 0x99, 0x07,
-  0x6a, 0x1e, 0xc4, 0x67, 0x38, 0x02, 0x16, 0xd6, 0x3c, 0x20, 0xbe, 0x59,
-  0x86, 0x5c, 0xe1, 0x95, 0xc0, 0xd8, 0x3c, 0x88, 0x85, 0xf8, 0x58, 0x30,
-  0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04,
-  0xf1, 0x29, 0x62, 0xde, 0x03, 0x1d, 0x6e, 0x08, 0xe2, 0x3d, 0x00, 0x83,
-  0x59, 0x06, 0x5d, 0xd9, 0x95, 0xc0, 0x06, 0x3a, 0x0f, 0xe0, 0x33, 0x4b,
-  0x00, 0x2e, 0x16, 0xe7, 0x01, 0x11, 0x9f, 0x59, 0x02, 0x70, 0x19, 0x8e,
-  0xd8, 0x05, 0x39, 0x0f, 0x84, 0x6f, 0x96, 0xa1, 0x57, 0xc0, 0x25, 0x30,
-  0x5e, 0x98, 0xf3, 0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b,
-  0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xf0, 0xf7, 0x40,
-  0x87, 0x1b, 0x02, 0x7e, 0x0f, 0xc0, 0x60, 0x96, 0xc1, 0x57, 0x7e, 0x25,
-  0xb0, 0x3d, 0x0f, 0x86, 0xf8, 0xcc, 0x12, 0x80, 0x8b, 0x11, 0xa0, 0x1e,
-  0xc0, 0x67, 0x96, 0x00, 0x5c, 0x06, 0x5a, 0x1e, 0x4d, 0x57, 0xb0, 0x5d,
-  0x21, 0x7c, 0x45, 0xf8, 0x15, 0xd6, 0xe0, 0x95, 0x0b, 0x86, 0xb9, 0xe0,
-  0xa9, 0xdb, 0x9e, 0x3a, 0x36, 0x0f, 0x86, 0xb9, 0xbe, 0x0e, 0x86, 0x39,
-  0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0xcd,
-  0xe5, 0x83, 0x7d, 0x0f, 0x70, 0x3d, 0x48, 0xf9, 0x60, 0x34, 0x21, 0x00,
-  0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88,
-  0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0xab, 0xf9, 0x40, 0xe4,
-  0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0xb3, 0xf9,
-  0x60, 0xe4, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00,
-  0xbb, 0xf9, 0x80, 0xe4, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04,
-  0xc1, 0x60, 0xf1, 0xf9, 0x60, 0xe4, 0x03, 0x71, 0x0f, 0x02, 0x98, 0x0f,
-  0xe6, 0x3d, 0x90, 0xf9, 0x60, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6a, 0x96,
-  0x20, 0x5c, 0x06, 0x5a, 0x1e, 0xd3, 0x60, 0x15, 0xf3, 0x1e, 0x56, 0x85,
-  0x25, 0x5c, 0x45, 0x00, 0x17, 0xf3, 0x1e, 0x5e, 0x65, 0x96, 0x41, 0x5c,
-  0xc8, 0xc5, 0x15, 0x83, 0xe1, 0x88, 0x59, 0x0c, 0xe8, 0x3d, 0x18, 0xbe,
-  0xa3, 0xc5, 0x60, 0x98, 0xe1, 0x86, 0xe0, 0xd7, 0x03, 0x32, 0xa8, 0x21,
-  0xd0, 0xe1, 0x88, 0x7f, 0xc0, 0xf7, 0x60, 0xf8, 0x2a, 0x10, 0xf4, 0x42,
-  0x62, 0x98, 0xe1, 0x86, 0x40, 0xdc, 0x03, 0x32, 0xa8, 0x60, 0xd0, 0x59,
-  0x86, 0x71, 0xc1, 0x97, 0xe0, 0x4c, 0x3d, 0x18, 0xe6, 0x6e, 0x3b, 0x18,
-  0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x36, 0xb3, 0x0f, 0x66, 0x3e,
-  0x80, 0xf7, 0x20, 0xec, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82,
-  0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x03, 0xac, 0xed, 0x03, 0x9d, 0x0f, 0x0e, 0x22, 0x18,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xcc, 0xed, 0x83, 0x9d, 0x0f, 0x18,
-  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xec, 0xed, 0x03, 0x9e,
-  0x0f, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0xc5, 0xee,
-  0x83, 0x9d, 0x0f, 0xf4, 0x3d, 0x08, 0xd0, 0x3e, 0x58, 0xf9, 0x40, 0xed,
-  0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x02, 0x7c, 0x19, 0x6e,
-  0x30, 0xc7, 0xc0, 0xed, 0x03, 0x30, 0x98, 0x65, 0x28, 0x17, 0x73, 0x09,
-  0xaa, 0xde, 0x03, 0x9f, 0x0f, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00,
-  0x04, 0xc1, 0x80, 0xc2, 0xfb, 0xe0, 0xe7, 0x83, 0x76, 0x0c, 0x5c, 0x3e,
-  0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x2a, 0xef, 0x83, 0x9f, 0x0f,
-  0x02, 0xe1, 0x82, 0x61, 0x0a, 0xdf, 0x83, 0xb1, 0x0f, 0xe0, 0x82, 0xa7,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0xea, 0xfb, 0x80, 0xec, 0x03,
-  0x9d, 0x98, 0xf9, 0x60, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0xbf,
-  0x0f, 0xc8, 0x3e, 0x08, 0x84, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0x3b, 0x9e,
-  0xba, 0x70, 0x0f, 0x86, 0x39, 0xf9, 0x0e, 0x86, 0x39, 0x62, 0x98, 0x23,
-  0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x6d, 0xf4, 0x03, 0xb8,
-  0x0f, 0x5a, 0x3e, 0xf0, 0xfb, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84,
-  0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0x00, 0x53, 0xfd, 0xe0, 0xee, 0x83, 0x84, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0x5b, 0xfd, 0x00, 0xef, 0x83,
-  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0x63, 0xfd, 0x20,
-  0xef, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x99,
-  0xfd, 0x00, 0xef, 0x83, 0x9b, 0x0f, 0x82, 0xd2, 0x0f, 0xd0, 0x3e, 0x38,
-  0xfd, 0x60, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6a, 0x96, 0x00, 0x5f, 0x86,
-  0x1b, 0x46, 0x32, 0x50, 0xfd, 0x00, 0x0c, 0x66, 0x19, 0xce, 0x05, 0x5f,
-  0x02, 0x4b, 0xf9, 0x60, 0xe5, 0x83, 0xf8, 0x0c, 0x47, 0xa4, 0x64, 0xc0,
-  0xf2, 0x01, 0xf1, 0xcd, 0x32, 0xa0, 0xcb, 0xba, 0x04, 0xd6, 0xf2, 0x81,
-  0x4a, 0x06, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94,
-  0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0xed, 0x07, 0x3a, 0xdc,
-  0x10, 0xc8, 0x7e, 0x00, 0x06, 0xb3, 0x0c, 0xe9, 0xa2, 0x2e, 0x81, 0x0d,
-  0x35, 0x1f, 0xc0, 0x67, 0x96, 0xe0, 0x5d, 0x8c, 0xe6, 0x03, 0x22, 0x3e,
-  0xb3, 0x04, 0xef, 0x32, 0x1c, 0x41, 0x93, 0x41, 0xcd, 0x07, 0xc2, 0x37,
-  0xcb, 0xc0, 0x2e, 0xef, 0x12, 0x58, 0x4d, 0x06, 0x36, 0x1f, 0xc4, 0xc7,
-  0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x44, 0xf2, 0xb1,
-  0x22, 0x88, 0x4f, 0x11, 0xbf, 0x1f, 0xe8, 0x70, 0x43, 0xd0, 0xfb, 0x01,
-  0x18, 0xcc, 0x32, 0xb4, 0x8b, 0xbb, 0x04, 0xe6, 0xf3, 0xc1, 0x10, 0x9f,
-  0x59, 0x82, 0x77, 0x31, 0x22, 0xec, 0x03, 0xf8, 0xcc, 0x12, 0xbc, 0xcb,
-  0x40, 0xcb, 0xa3, 0xa5, 0x0b, 0xa6, 0x2e, 0x44, 0xbb, 0x08, 0xee, 0xe2,
-  0xa7, 0xc2, 0xba, 0x5c, 0x30, 0x8c, 0x81, 0x7d, 0x40, 0xf6, 0x41, 0x7c,
-  0x86, 0x23, 0x54, 0xa3, 0xec, 0x03, 0xe2, 0x9b, 0x65, 0x80, 0x97, 0x79,
-  0x09, 0xcc, 0xec, 0x83, 0xd5, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18,
-  0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xda,
-  0x3f, 0xd0, 0xe1, 0x86, 0x60, 0xfd, 0x03, 0x30, 0x98, 0x65, 0x88, 0x17,
-  0x79, 0x09, 0x6c, 0x70, 0xfb, 0x00, 0x3e, 0xb3, 0x04, 0xf7, 0x62, 0x6b,
-  0x1f, 0x10, 0xf1, 0x99, 0x25, 0xb8, 0x97, 0xe1, 0x88, 0xda, 0x60, 0xfb,
-  0x40, 0xf8, 0x66, 0x19, 0xe8, 0xe5, 0x5e, 0x02, 0xb3, 0x8d, 0xb6, 0x0f,
-  0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22,
-  0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0xfc, 0x0f, 0x74, 0xb8, 0x21, 0xb0,
-  0xff, 0x00, 0x0c, 0x66, 0x19, 0xea, 0xc5, 0x5e, 0x02, 0xab, 0xfb, 0x60,
-  0x88, 0xcf, 0x2c, 0xc1, 0xbd, 0x18, 0xa1, 0xf7, 0x01, 0x7c, 0x66, 0x09,
-  0xee, 0x65, 0xa0, 0xe5, 0xd1, 0xe2, 0x05, 0x93, 0x17, 0xa2, 0x5e, 0x04,
-  0x7b, 0x01, 0x9d, 0x79, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xba, 0xed, 0xa9,
-  0x33, 0xfb, 0x60, 0x98, 0xbb, 0xf3, 0x60, 0x98, 0x23, 0x86, 0x39, 0x62,
-  0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xd8, 0x50, 0x50, 0xa8, 0xff,
-  0x40, 0xf6, 0x83, 0x11, 0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
-  0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0xb0, 0x17, 0x14, 0xf8, 0x3f, 0x48, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x30, 0x18, 0x14, 0xfa, 0x3f, 0x48,
-  0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb0, 0x18, 0x14, 0xfc,
-  0x3f, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x16, 0x1c,
-  0x14, 0xfa, 0x3f, 0xe0, 0xfd, 0x20, 0x50, 0x41, 0xa1, 0xfd, 0x03, 0x16,
-  0x14, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xa7, 0x66, 0x09, 0xf0, 0x65, 0xa0,
-  0xe5, 0x31, 0x8d, 0x71, 0xd1, 0xfd, 0x41, 0x5c, 0x58, 0xa2, 0x5c, 0x84,
-  0x7b, 0xd1, 0xfd, 0xc1, 0x5c, 0x66, 0x19, 0xf2, 0x65, 0x5f, 0x50, 0x33,
-  0x18, 0x8e, 0x98, 0x3d, 0xf7, 0x0f, 0x86, 0xef, 0x68, 0x6f, 0x98, 0xe1,
-  0x86, 0x20, 0xf7, 0x03, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0xfc, 0x90,
-  0xff, 0x60, 0xf8, 0x2a, 0x10, 0xf4, 0xf6, 0x63, 0x98, 0xe1, 0x86, 0x80,
-  0xf7, 0x03, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x06, 0x7d, 0x79, 0x99, 0xe0,
-  0x40, 0x3f, 0x18, 0xe6, 0x62, 0x3d, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40,
-  0x10, 0x0c, 0x36, 0x30, 0x14, 0x5a, 0x50, 0x50, 0xff, 0x60, 0x07, 0x85,
-  0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d,
-  0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xec,
-  0x0c, 0x05, 0x1a, 0x14, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0x0c, 0x0d, 0x85, 0x1a, 0x14, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x03, 0x2c, 0x0d, 0x05, 0x1b, 0x14, 0x24, 0x22, 0x18, 0x31,
-  0x50, 0x00, 0x10, 0x04, 0x83, 0x05, 0x0e, 0x85, 0x1a, 0x14, 0xe8, 0x3f,
-  0x08, 0xc4, 0x50, 0x28, 0x41, 0x81, 0x0c, 0x85, 0xd1, 0x84, 0x00, 0xb8,
-  0xe0, 0xa9, 0x59, 0x82, 0x97, 0x19, 0x6e, 0x00, 0xcf, 0x00, 0x0d, 0x05,
-  0x30, 0x98, 0x65, 0xe0, 0x97, 0x7e, 0x09, 0xea, 0xfd, 0x03, 0x1c, 0x14,
-  0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x92, 0x43,
-  0x21, 0x07, 0x85, 0xf6, 0x43, 0x41, 0x61, 0xc4, 0xe0, 0x00, 0x40, 0x10,
-  0x0c, 0xa8, 0x39, 0x14, 0x72, 0x50, 0x08, 0x84, 0x0b, 0x86, 0x29, 0xf9,
-  0x0f, 0x7a, 0x50, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x03, 0xea, 0x0e, 0x05, 0x1f, 0x14, 0x68, 0xa4, 0x05, 0x85, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0xa0, 0xf0, 0x50, 0xf0, 0x41, 0x21, 0x10, 0x2e,
-  0x18, 0xe6, 0x82, 0xa7, 0xee, 0x78, 0xea, 0x76, 0x3f, 0x18, 0xe6, 0xd8,
-  0x3d, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40,
-  0x10, 0x0c, 0xb6, 0x3e, 0x14, 0xd4, 0x50, 0x38, 0x41, 0x01, 0x0f, 0x85,
-  0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d,
-  0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x8c,
-  0x14, 0x85, 0x38, 0x14, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0xac, 0x14, 0x05, 0x39, 0x14, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x03, 0xcc, 0x14, 0x85, 0x39, 0x14, 0x12, 0x22, 0x18, 0x31,
-  0x50, 0x00, 0x10, 0x04, 0x83, 0xa5, 0x15, 0x05, 0x39, 0x14, 0x62, 0x50,
-  0x08, 0xfe, 0x50, 0x10, 0x43, 0x21, 0x14, 0x85, 0xd1, 0x84, 0x00, 0xb8,
-  0xe0, 0xa9, 0x59, 0x82, 0x97, 0x19, 0x6e, 0xe8, 0xcf, 0x80, 0x14, 0x05,
-  0x30, 0x98, 0x65, 0xf0, 0x97, 0x97, 0x09, 0x6c, 0x04, 0x85, 0x12, 0x14,
-  0xe2, 0x33, 0x1c, 0x91, 0x82, 0x81, 0x09, 0x0a, 0xc4, 0x37, 0xcb, 0xf0,
-  0x2f, 0x22, 0x13, 0xd8, 0x09, 0x0a, 0x2a, 0x18, 0xc4, 0xc7, 0x82, 0x81,
-  0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88,
-  0x4f, 0x11, 0xae, 0x28, 0xe8, 0x70, 0x43, 0xc0, 0x8a, 0x02, 0x18, 0xcc,
-  0x32, 0x80, 0x4c, 0xc8, 0x04, 0x36, 0xbc, 0xa0, 0x00, 0x9f, 0x59, 0x02,
-  0x93, 0x31, 0x17, 0x14, 0x88, 0xf8, 0xcc, 0x12, 0x98, 0xcc, 0x70, 0x04,
-  0x0d, 0x06, 0x2f, 0x28, 0x08, 0xdf, 0x2c, 0xc3, 0xc8, 0x98, 0x4c, 0x60,
-  0x35, 0x18, 0xc0, 0xa0, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc,
-  0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xe4, 0xa2,
-  0xa0, 0xc3, 0x0d, 0xc1, 0x2d, 0x0a, 0x60, 0x30, 0xcb, 0x40, 0x32, 0x25,
-  0x13, 0x18, 0x0e, 0x0a, 0x43, 0x7c, 0x66, 0x09, 0x4c, 0xc6, 0x88, 0x1d,
-  0x14, 0xe0, 0x33, 0x4b, 0x60, 0x32, 0x03, 0x2d, 0x8f, 0x06, 0x32, 0x58,
-  0xc8, 0x10, 0x24, 0x23, 0x94, 0x8c, 0x1f, 0x0a, 0x22, 0x73, 0xc1, 0x30,
-  0xa6, 0x83, 0x82, 0x0f, 0x0a, 0xf1, 0x19, 0x8e, 0x20, 0x95, 0x1f, 0x14,
-  0x88, 0x6f, 0x96, 0xe1, 0x64, 0x54, 0x26, 0x30, 0x30, 0x14, 0x4a, 0x25,
-  0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xc0, 0x90,
-  0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x38, 0x47, 0x41, 0x87, 0x1b, 0x82, 0x72,
-  0x14, 0xc0, 0x60, 0x96, 0x01, 0x65, 0x52, 0x26, 0xb0, 0x01, 0x0d, 0x05,
-  0xf8, 0xcc, 0x12, 0xb8, 0x8c, 0x95, 0xa1, 0x40, 0xc4, 0x67, 0x96, 0xc0,
-  0x65, 0x86, 0x23, 0x5e, 0xc5, 0x0c, 0x05, 0xe1, 0x9b, 0x65, 0x58, 0x19,
-  0x97, 0x09, 0x0c, 0x56, 0xce, 0x50, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e,
-  0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22,
-  0xe4, 0x51, 0xd0, 0xe1, 0x86, 0x00, 0x1e, 0x05, 0x30, 0x98, 0x65, 0x60,
-  0x99, 0x96, 0x09, 0xec, 0x0d, 0x85, 0x21, 0x3e, 0xb3, 0x04, 0x2e, 0x63,
-  0x04, 0x1d, 0x0a, 0xf0, 0x99, 0x25, 0x70, 0x99, 0x81, 0x96, 0x47, 0x43,
-  0x19, 0x2c, 0x65, 0x08, 0x96, 0x11, 0x5a, 0x86, 0xae, 0x54, 0xe6, 0x82,
-  0x61, 0x2e, 0x78, 0xea, 0xb6, 0xa7, 0x0e, 0x0c, 0x85, 0x61, 0x2e, 0xee,
-  0x83, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x60, 0x13, 0x49, 0xe1, 0x1d, 0x05, 0x56, 0x14, 0xfa, 0x51, 0x18,
-  0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04,
-  0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xc0, 0x52,
-  0x52, 0xb0, 0x47, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0xc0, 0x54, 0x52, 0xb8, 0x47, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0xc0, 0x56, 0x52, 0xc0, 0x47, 0x21, 0x21, 0x82, 0x11, 0x03,
-  0x05, 0x00, 0x41, 0x30, 0x58, 0x64, 0x52, 0xb8, 0x47, 0xc1, 0x16, 0x85,
-  0x80, 0x24, 0x85, 0x73, 0x14, 0x4c, 0x52, 0x18, 0x4d, 0x08, 0x80, 0x0b,
-  0x9e, 0x9a, 0x25, 0x78, 0x99, 0x81, 0x96, 0xc7, 0x34, 0xf4, 0xc5, 0x35,
-  0x89, 0x7c, 0x61, 0x09, 0x7e, 0x11, 0x5c, 0xc6, 0x35, 0x89, 0x7e, 0x99,
-  0x65, 0x80, 0x19, 0x99, 0x11, 0xd5, 0x60, 0x38, 0x42, 0xf6, 0xd0, 0x51,
-  0x18, 0xbe, 0x9b, 0xbd, 0x61, 0x86, 0x1b, 0x82, 0x59, 0x14, 0xc8, 0xa0,
-  0x86, 0x40, 0x87, 0x23, 0xe6, 0x85, 0x1d, 0x85, 0xe1, 0xab, 0x40, 0xd0,
-  0xab, 0x97, 0x61, 0x86, 0x1b, 0x02, 0x5b, 0x14, 0xc8, 0xa0, 0x82, 0x41,
-  0x67, 0x19, 0x62, 0xc6, 0x6c, 0x82, 0xd3, 0x43, 0x61, 0x98, 0x5b, 0xfd,
-  0x60, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xd8, 0x74, 0x52, 0x38,
-  0x49, 0x81, 0x1c, 0x85, 0x9a, 0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41,
-  0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x38, 0x64, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0xb0, 0xb0, 0x14, 0x5c, 0x52, 0x38, 0x88,
-  0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x30, 0xb1, 0x14, 0x5e, 0x52,
-  0x60, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb0, 0xb1, 0x14,
-  0x60, 0x52, 0x90, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x16,
-  0xb5, 0x14, 0x5e, 0x52, 0x70, 0x47, 0x21, 0xe0, 0x49, 0xe1, 0x1f, 0x05,
-  0x9f, 0x14, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xa7, 0x66, 0x09, 0xcc, 0x66,
-  0xb8, 0x41, 0x57, 0x03, 0xb1, 0x14, 0xc0, 0x60, 0x96, 0x61, 0x66, 0x68,
-  0x26, 0xa8, 0x74, 0x14, 0x64, 0x52, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x03, 0x8a, 0x2d, 0x85, 0x99, 0x14, 0xd8, 0x4f, 0x24,
-  0x85, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xda, 0x52, 0x98, 0x49,
-  0x21, 0x10, 0x2e, 0x18, 0xa6, 0xd8, 0x51, 0xb8, 0x49, 0x01, 0x2e, 0x78,
-  0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0xb8, 0x14, 0x70, 0x52,
-  0x70, 0x99, 0x93, 0x14, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x92,
-  0x4b, 0x01, 0x27, 0x85, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x9e, 0xba, 0xe3,
-  0xa9, 0xab, 0x45, 0x61, 0x98, 0x33, 0xff, 0x60, 0x98, 0x23, 0x86, 0x39,
-  0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xd8, 0xee, 0x52, 0x20,
-  0x4b, 0x21, 0x24, 0x05, 0xb9, 0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41,
-  0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0x30, 0xbf, 0x14, 0xd6, 0x52, 0x48, 0x88,
-  0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb0, 0xbf, 0x14, 0xd8, 0x52,
-  0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x30, 0xd0, 0x14,
-  0xda, 0x52, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x96,
-  0xd3, 0x14, 0xd8, 0x52, 0x58, 0x49, 0x21, 0xc8, 0x4b, 0x81, 0x27, 0x85,
-  0xbd, 0x14, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xa7, 0x66, 0x09, 0xcc, 0x66,
-  0xb8, 0xe1, 0x5e, 0x03, 0xbf, 0x14, 0xc0, 0x60, 0x96, 0xa1, 0x66, 0xcc,
-  0x26, 0xb0, 0x7e, 0x14, 0xfe, 0x51, 0x88, 0xcf, 0x70, 0x04, 0x0a, 0x06,
-  0x20, 0x29, 0x10, 0xdf, 0x2c, 0x83, 0xcd, 0xe4, 0x4c, 0x60, 0x21, 0x29,
-  0xa4, 0x60, 0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f,
-  0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xa0, 0xa6, 0xa0, 0xc3,
-  0x0d, 0x81, 0x69, 0x0a, 0x60, 0x30, 0xcb, 0x70, 0x33, 0x38, 0x13, 0xd8,
-  0x90, 0x92, 0x02, 0x7c, 0x66, 0x09, 0x7a, 0xc6, 0x50, 0x52, 0x20, 0xe2,
-  0x33, 0x4b, 0xd0, 0x33, 0xc3, 0x11, 0x33, 0x18, 0xa4, 0xa4, 0x20, 0x7c,
-  0xb3, 0x0c, 0x3a, 0xd3, 0x33, 0x81, 0xd1, 0x60, 0xa0, 0x92, 0x42, 0x7c,
-  0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f,
-  0x2b, 0x82, 0xf8, 0x14, 0x31, 0x9b, 0x82, 0x0e, 0x37, 0x04, 0xb1, 0x29,
-  0x80, 0xc1, 0x2c, 0xc3, 0xce, 0xf0, 0x4c, 0x60, 0x32, 0x29, 0x0c, 0xf1,
-  0x99, 0x25, 0xe8, 0x19, 0x23, 0x6a, 0x52, 0x80, 0xcf, 0x2c, 0x41, 0xcf,
-  0x0c, 0xb4, 0x3c, 0xda, 0xcd, 0x60, 0x38, 0x43, 0xec, 0x8c, 0xc0, 0x33,
-  0x7c, 0x28, 0xe4, 0xcc, 0x05, 0xc3, 0x18, 0x4d, 0x0a, 0x38, 0x29, 0xc4,
-  0x67, 0x38, 0xc2, 0x6f, 0x72, 0x52, 0x20, 0xbe, 0x59, 0x06, 0x9f, 0x09,
-  0x9b, 0xc0, 0x74, 0x52, 0xf8, 0x9b, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82,
-  0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x22,
-  0x3c, 0x05, 0x1d, 0x6e, 0x08, 0x7e, 0x53, 0x00, 0x83, 0x59, 0x86, 0x9f,
-  0x01, 0x9b, 0xc0, 0x06, 0xb1, 0x14, 0xe0, 0x33, 0x4b, 0x50, 0x36, 0xf6,
-  0x93, 0x02, 0x11, 0x9f, 0x59, 0x82, 0xb2, 0x19, 0x8e, 0x48, 0x1d, 0xb0,
-  0x14, 0x84, 0x6f, 0x96, 0x41, 0x6c, 0xca, 0x26, 0x30, 0xd5, 0x09, 0x4b,
-  0x21, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20,
-  0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x60, 0x4f, 0x41, 0x87, 0x1b, 0x02,
-  0xf5, 0x14, 0xc0, 0x60, 0x96, 0x61, 0x6c, 0xc8, 0x26, 0xb0, 0xb4, 0x14,
-  0x86, 0xf8, 0xcc, 0x12, 0x94, 0x8d, 0x11, 0x6e, 0x29, 0xc0, 0x67, 0x96,
-  0xa0, 0x6c, 0x06, 0x5a, 0x1e, 0xed, 0x67, 0x30, 0xb0, 0x21, 0xc6, 0x46,
-  0x20, 0x1b, 0xb4, 0x0b, 0x9b, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e,
-  0x3a, 0x9d, 0x14, 0x86, 0xb9, 0x35, 0x14, 0x86, 0x39, 0x62, 0x98, 0x23,
-  0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x8d, 0x3f, 0x85, 0xf4,
-  0x14, 0x4c, 0x53, 0xb8, 0x4f, 0x61, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84,
-  0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0x00, 0x1b, 0x51, 0x01, 0x3e, 0x85, 0x84, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0x23, 0x51, 0x21, 0x3e, 0x85,
-  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x00, 0x2b, 0x51, 0x41,
-  0x3e, 0x85, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x61,
-  0x51, 0x21, 0x3e, 0x05, 0xd8, 0x14, 0x02, 0xff, 0x14, 0xc2, 0x53, 0x00,
-  0x51, 0x61, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6a, 0x96, 0xc0, 0x6c, 0x06,
-  0x5a, 0x1e, 0xd3, 0x88, 0x19, 0xd1, 0x25, 0x60, 0x86, 0x25, 0x66, 0x46,
-  0x28, 0x1b, 0xd1, 0x25, 0x68, 0xc6, 0xfe, 0x36, 0xb8, 0x4d, 0x01, 0x3e,
-  0xb3, 0x0c, 0x67, 0x93, 0x36, 0x7d, 0x1b, 0x0c, 0x47, 0x84, 0x6e, 0x30,
-  0x9e, 0xc2, 0xf0, 0x9d, 0xe8, 0x06, 0xc3, 0x0c, 0x37, 0x04, 0xae, 0x29,
-  0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0x14, 0xe7, 0x29, 0x0c, 0x5f, 0x05,
-  0x82, 0xde, 0x31, 0xcc, 0x70, 0x43, 0x10, 0x9b, 0x02, 0x19, 0x54, 0x30,
-  0xe8, 0x2c, 0x03, 0xda, 0xf4, 0x4d, 0x70, 0x75, 0x29, 0x0c, 0x73, 0xa6,
-  0x28, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x5b, 0x8d, 0x0a,
-  0x22, 0x2a, 0xfc, 0xa6, 0x00, 0xa3, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xc6, 0xa3, 0x42, 0x8a, 0x0a, 0x07,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xd6, 0xa3, 0x82, 0x8a,
-  0x0a, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xe6, 0xa3,
-  0xc2, 0x8a, 0x0a, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1,
-  0x52, 0xa6, 0x82, 0x8a, 0x0a, 0xe9, 0x29, 0x04, 0x37, 0x2a, 0xe8, 0xa7,
-  0x90, 0xa3, 0xc2, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd4, 0x2c, 0x41, 0xdf,
-  0x0c, 0x37, 0xd4, 0x6e, 0xd0, 0xa3, 0x02, 0x18, 0xcc, 0x32, 0xa8, 0xcd,
-  0xda, 0x04, 0x45, 0x9e, 0x42, 0x8b, 0x0a, 0x70, 0xc1, 0x53, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x40, 0x9d, 0xa9, 0xe0, 0xa2, 0xc2, 0xee, 0x06,
-  0xfd, 0x29, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x85, 0xa6, 0x82,
-  0x8b, 0x0a, 0x81, 0x70, 0xc1, 0x30, 0x75, 0x9e, 0x82, 0x8c, 0x0a, 0x70,
-  0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xb1, 0xa9, 0x30,
-  0xa3, 0x02, 0x18, 0x88, 0xa8, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x54, 0x9b, 0x0a, 0x33, 0x2a, 0x04, 0xc2, 0x05, 0xc3, 0x5c, 0xf0, 0xd4,
-  0x1d, 0x4f, 0x1d, 0x6c, 0x0a, 0xc3, 0x5c, 0x38, 0x0a, 0xc3, 0x1c, 0x31,
-  0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x26, 0xa7,
-  0xc2, 0x8f, 0x0a, 0xfc, 0x29, 0xb4, 0xa9, 0x30, 0x9a, 0x10, 0x00, 0xa3,
-  0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xe5, 0xa9, 0x60, 0xa6, 0x42,
-  0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xe9, 0xa9, 0x70,
-  0xa6, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xed,
-  0xa9, 0x80, 0xa6, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60,
-  0xb0, 0x88, 0xaa, 0x70, 0xa6, 0x82, 0x89, 0x0a, 0x01, 0x9d, 0x0a, 0x37,
-  0x2a, 0xd8, 0xa9, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x35, 0x4b, 0xd0,
-  0x37, 0xc3, 0x0d, 0xf2, 0x1b, 0xe4, 0xa9, 0x00, 0x06, 0xb3, 0x0c, 0x6c,
-  0xd3, 0x37, 0x81, 0xe1, 0xa7, 0xa0, 0x9f, 0x42, 0x7c, 0x86, 0x23, 0xee,
-  0x37, 0xd8, 0x4f, 0x81, 0xf8, 0x66, 0x19, 0xda, 0x06, 0x6e, 0x02, 0xe3,
-  0x4f, 0x01, 0x7f, 0x83, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e,
-  0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x62, 0x54, 0x05,
-  0x1d, 0x6e, 0x08, 0x42, 0x55, 0x00, 0x83, 0x59, 0x06, 0xb7, 0x79, 0x9b,
-  0xc0, 0x06, 0x12, 0x15, 0xe0, 0x33, 0x4b, 0x40, 0x37, 0x36, 0xa2, 0x02,
-  0x11, 0x9f, 0x59, 0x02, 0xba, 0x19, 0x8e, 0x10, 0xe1, 0x80, 0x44, 0x05,
-  0xe1, 0x9b, 0x65, 0x88, 0x1b, 0xba, 0x09, 0x6c, 0x84, 0x83, 0x12, 0x15,
-  0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22,
-  0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x57, 0x15, 0x74, 0xb8, 0x21, 0x60,
-  0x55, 0x01, 0x0c, 0x66, 0x19, 0xe4, 0x66, 0x6e, 0x02, 0x6b, 0x51, 0x61,
-  0x88, 0xcf, 0x2c, 0x01, 0xdd, 0x18, 0x01, 0xa3, 0x02, 0x7c, 0x66, 0x09,
-  0xe8, 0x66, 0xa0, 0xe5, 0xd1, 0xdc, 0x06, 0x7b, 0x1b, 0x42, 0x6e, 0x84,
-  0xb9, 0xd1, 0xc1, 0x01, 0x6e, 0x2e, 0x18, 0xc6, 0x5e, 0x54, 0x98, 0x51,
-  0x21, 0x3e, 0xc3, 0x11, 0xb0, 0x40, 0xa3, 0x02, 0xf1, 0xcd, 0x32, 0xd4,
-  0x0d, 0xde, 0x04, 0x56, 0xa3, 0x42, 0x2c, 0xc4, 0xc7, 0x82, 0x81, 0x3e,
-  0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f,
-  0x11, 0xbc, 0x2a, 0xe8, 0x70, 0x43, 0xa0, 0xab, 0x02, 0x18, 0xcc, 0x32,
-  0xd8, 0xcd, 0xdd, 0x04, 0x36, 0xf4, 0xa8, 0x00, 0x9f, 0x59, 0x02, 0xbe,
-  0x31, 0x1d, 0x15, 0x88, 0xf8, 0xcc, 0x12, 0xf0, 0xcd, 0x70, 0xc4, 0x2e,
-  0xec, 0xa8, 0x20, 0x7c, 0xb3, 0x0c, 0x79, 0xc3, 0x37, 0x81, 0xf1, 0x02,
-  0x8f, 0x0a, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94,
-  0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4, 0xb9, 0x0a, 0x3a, 0xdc,
-  0x10, 0x94, 0xab, 0x00, 0x06, 0xb3, 0x0c, 0x7a, 0xb3, 0x37, 0x81, 0x91,
-  0xa9, 0x30, 0xc4, 0x67, 0x96, 0x80, 0x6f, 0x8c, 0x48, 0x53, 0x01, 0x3e,
-  0xb3, 0x04, 0x7c, 0x33, 0xd0, 0xf2, 0x68, 0x76, 0x83, 0xdd, 0x0d, 0xa1,
-  0x37, 0xc2, 0xde, 0xb0, 0x06, 0xde, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd,
-  0xf6, 0xd4, 0xd5, 0xa8, 0x30, 0xcc, 0x99, 0xa5, 0x30, 0xcc, 0x11, 0xc3,
-  0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x6c, 0xf7, 0x2a,
-  0x90, 0xab, 0x10, 0xaa, 0x82, 0xbc, 0x0a, 0xa3, 0x09, 0x01, 0x30, 0x9a,
-  0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x98, 0xbf, 0x0a, 0xeb, 0x2a, 0x24,
-  0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd8, 0xbf, 0x0a, 0xec,
-  0x2a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x18, 0xc8,
-  0x0a, 0xed, 0x2a, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06,
-  0xcb, 0xc9, 0x0a, 0xec, 0x2a, 0xac, 0xaa, 0x10, 0xe4, 0xab, 0xc0, 0xab,
-  0xc2, 0xbe, 0x0a, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x53, 0xb3, 0x04, 0x7d,
-  0x33, 0xd0, 0xf2, 0x98, 0x06, 0xda, 0xe0, 0x35, 0x71, 0x36, 0x2c, 0xa1,
-  0x36, 0x02, 0xdf, 0xe0, 0x35, 0xb1, 0x36, 0xb3, 0x0c, 0x7e, 0x03, 0x3a,
-  0x77, 0x1c, 0x0c, 0x47, 0xf0, 0x71, 0xd0, 0xab, 0xc2, 0xf0, 0x5d, 0x1f,
-  0x07, 0xc3, 0x0c, 0x37, 0x04, 0xa8, 0x2a, 0x90, 0x41, 0x0d, 0x81, 0x0e,
-  0x47, 0xfc, 0x43, 0xb8, 0x0a, 0xc3, 0x57, 0x81, 0xa0, 0x17, 0x12, 0xc3,
-  0x0c, 0x37, 0x04, 0xab, 0x2a, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xfc,
-  0x0d, 0xed, 0x04, 0xf7, 0xa6, 0xc2, 0x30, 0x07, 0x9a, 0xc2, 0x30, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xbd, 0xac, 0xc0, 0xaf, 0x42, 0xae,
-  0x0a, 0x2a, 0x2b, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26,
-  0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x60, 0x36, 0x2b, 0x8c, 0xac, 0x70, 0x10, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x60, 0x37, 0x2b, 0x90, 0xac, 0xc0, 0x10, 0xc1,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0x38, 0x2b, 0x94, 0xac, 0x20,
-  0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x3f, 0x2b, 0x90,
-  0xac, 0x30, 0xae, 0x42, 0x10, 0xb3, 0x02, 0xbd, 0x0a, 0x33, 0x2b, 0x8c,
-  0x26, 0x04, 0xc0, 0x05, 0x4f, 0xcd, 0x12, 0xd0, 0xce, 0x70, 0xc3, 0x2b,
-  0x07, 0x37, 0x2b, 0x80, 0xc1, 0x2c, 0x43, 0xe8, 0x88, 0x4e, 0x50, 0xbe,
-  0x2a, 0x9c, 0xac, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x54, 0xd8, 0x0a, 0x28, 0x2b, 0xd8, 0x72, 0x70, 0xaf, 0xc2, 0x88,
-  0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0x62, 0x2b, 0xa0, 0xac, 0x10, 0x08,
-  0x17, 0x0c, 0x53, 0xe1, 0x2a, 0xb0, 0xac, 0x00, 0x17, 0x3c, 0x35, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x94, 0xd9, 0x0a, 0x2d, 0x2b, 0xe8, 0x04,
-  0xbf, 0x0a, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0x9d, 0xad, 0xd0,
-  0xb2, 0x42, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf1, 0xd4, 0xa9,
-  0xaa, 0x30, 0xcc, 0xed, 0xa6, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x6c, 0x6c, 0x2b, 0xe4, 0xac, 0x60,
-  0xaf, 0xc2, 0xd9, 0x0a, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3,
-  0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0xd8, 0xdc, 0x0a, 0x60, 0x2b, 0x24, 0x44, 0x30, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x18, 0xdd, 0x0a, 0x61, 0x2b, 0x24, 0x44,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x58, 0xdd, 0x0a, 0x62, 0x2b,
-  0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x0b, 0xdf, 0x0a,
-  0x61, 0x2b, 0x80, 0xac, 0x10, 0xb8, 0xad, 0x10, 0xb3, 0x02, 0xdc, 0x0a,
-  0xa3, 0x09, 0x01, 0x70, 0xc1, 0x53, 0xb3, 0x04, 0xb4, 0x33, 0xdc, 0xc0,
-  0xce, 0xc1, 0xdc, 0x0a, 0x60, 0x30, 0xcb, 0x30, 0x3a, 0xb4, 0x13, 0x98,
-  0xbc, 0x0a, 0xf4, 0x2a, 0xc4, 0x67, 0x38, 0x42, 0x9e, 0x83, 0x7a, 0x15,
-  0x88, 0x6f, 0x96, 0x81, 0x74, 0x4e, 0x27, 0x30, 0x7b, 0x15, 0xe6, 0x39,
-  0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30,
-  0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xfa, 0x56, 0xd0, 0xe1, 0x86, 0x60,
-  0x6f, 0x05, 0x30, 0x98, 0x65, 0x28, 0x1d, 0xd3, 0x09, 0x6c, 0xf0, 0x57,
-  0x01, 0x3e, 0xb3, 0x04, 0xab, 0x63, 0xfd, 0x2a, 0x10, 0xf1, 0x99, 0x25,
-  0x58, 0x9d, 0xe1, 0x88, 0x7e, 0x0e, 0xfc, 0x55, 0x10, 0xbe, 0x59, 0x06,
-  0xd4, 0x59, 0x9d, 0xc0, 0xfc, 0x39, 0xf8, 0x57, 0x21, 0x3e, 0x16, 0x38,
-  0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41,
-  0x7c, 0x8a, 0x40, 0x5d, 0x41, 0x87, 0x1b, 0x02, 0xd3, 0x15, 0xc0, 0x60,
-  0x96, 0x21, 0x75, 0x54, 0x27, 0xb0, 0x93, 0x15, 0x86, 0xf8, 0xcc, 0x12,
-  0xac, 0x8e, 0x11, 0x2a, 0x2b, 0xc0, 0x67, 0x96, 0x60, 0x75, 0x06, 0x5a,
-  0x1e, 0xad, 0x74, 0x30, 0xd3, 0x21, 0x52, 0x47, 0x50, 0x1d, 0x18, 0x1d,
-  0x4e, 0xe7, 0x82, 0x61, 0x2c, 0x65, 0x85, 0x96, 0x15, 0xe2, 0x33, 0x1c,
-  0xa1, 0x1a, 0x2e, 0x2b, 0x10, 0xdf, 0x2c, 0x03, 0xeb, 0xbc, 0x4e, 0x60,
-  0x2f, 0x2b, 0xac, 0x46, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17,
-  0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x61, 0xbb, 0x82,
-  0x0e, 0x37, 0x04, 0xb4, 0x2b, 0x80, 0xc1, 0x2c, 0x43, 0xeb, 0xb8, 0x4e,
-  0x60, 0xc3, 0xcd, 0x0a, 0xf0, 0x99, 0x25, 0x98, 0x1d, 0xa3, 0x59, 0x81,
-  0x88, 0xcf, 0x2c, 0xc1, 0xec, 0x0c, 0x47, 0xd4, 0x46, 0xcd, 0x0a, 0xc2,
-  0x37, 0xcb, 0x00, 0x3b, 0xb3, 0x13, 0x98, 0x6d, 0xd8, 0xac, 0x10, 0x1f,
-  0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7,
-  0x8a, 0x20, 0x3e, 0x45, 0x84, 0xaf, 0xa0, 0xc3, 0x0d, 0xc1, 0xef, 0x0a,
-  0x60, 0x30, 0xcb, 0x10, 0x3b, 0xb2, 0x13, 0x98, 0xcf, 0x0a, 0x43, 0x7c,
-  0x66, 0x09, 0x66, 0xc7, 0x88, 0xb1, 0x15, 0xe0, 0x33, 0x4b, 0x30, 0x3b,
-  0x03, 0x2d, 0x8f, 0xd6, 0x3a, 0x98, 0xeb, 0x10, 0xb1, 0x23, 0xc8, 0x0e,
-  0xe8, 0xbc, 0xce, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x6d, 0x4f, 0xdd, 0xcb,
-  0x0a, 0xc3, 0x1c, 0x98, 0x0a, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x16, 0xbf, 0x82, 0xef, 0x0a, 0x7b,
-  0x2b, 0xb0, 0xaf, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a,
-  0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0x80, 0xe1, 0xaf, 0x50, 0xbe, 0x42, 0x42, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x80, 0xe5, 0xaf, 0x60, 0xbe, 0x42, 0x42, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xe9, 0xaf, 0x70, 0xbe, 0x42,
-  0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0x84, 0xb0, 0x60,
-  0xbe, 0x42, 0xe9, 0x0a, 0xc1, 0xfc, 0x0a, 0xb6, 0x2b, 0xd4, 0xaf, 0x30,
-  0x9a, 0x10, 0x00, 0x17, 0x3c, 0x35, 0x4b, 0x40, 0x3b, 0x03, 0x2d, 0x8f,
-  0x69, 0xfc, 0x0d, 0xdb, 0x13, 0x7e, 0xc3, 0x12, 0xa1, 0x23, 0xcc, 0x0e,
-  0xdb, 0x13, 0xa2, 0x33, 0xcb, 0x50, 0x3b, 0xb7, 0x13, 0xd7, 0xc1, 0x70,
-  0x04, 0xdf, 0x06, 0xb7, 0x2b, 0x0c, 0xdf, 0xf5, 0x6d, 0x30, 0xcc, 0x70,
-  0x43, 0x20, 0xba, 0x02, 0x19, 0xd4, 0x10, 0xe8, 0x70, 0x44, 0x7e, 0xec,
-  0xae, 0x30, 0x7c, 0x15, 0x08, 0x7a, 0xfb, 0x31, 0xcc, 0x70, 0x43, 0x50,
-  0xba, 0x02, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0x83, 0xed, 0xac, 0x4f, 0x70,
-  0x69, 0x2b, 0x0c, 0x73, 0x7a, 0x2a, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0x5b, 0x0a, 0x0b, 0xf6, 0x2b, 0xcc, 0xae, 0x40, 0xc2, 0xc2,
-  0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26,
-  0x10, 0x43, 0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x06,
-  0xc3, 0x42, 0xff, 0x0a, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0x01, 0x16, 0xc3, 0x82, 0xff, 0x0a, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0x01, 0x26, 0xc3, 0xc2, 0xff, 0x0a, 0x12, 0x11, 0x8c, 0x18,
-  0x28, 0x00, 0x08, 0x82, 0xc1, 0x92, 0xc3, 0x82, 0xff, 0x0a, 0xbd, 0x2b,
-  0x04, 0x2b, 0x2c, 0xb8, 0xaf, 0xd0, 0xc2, 0xc2, 0x68, 0x42, 0x00, 0x5c,
-  0xf0, 0xd4, 0x2c, 0xc1, 0xfa, 0x0c, 0x37, 0xa4, 0x76, 0x10, 0xc3, 0x02,
-  0x18, 0xcc, 0x32, 0xe0, 0x4e, 0xee, 0x04, 0x85, 0xbb, 0x42, 0x08, 0x0b,
-  0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xed, 0xb0,
-  0x20, 0xc2, 0x82, 0xed, 0x06, 0xf1, 0x2b, 0x8c, 0x18, 0x1c, 0x00, 0x08,
-  0x82, 0x01, 0xc5, 0xc3, 0x82, 0x08, 0x0b, 0x81, 0x70, 0xc1, 0x30, 0xb5,
-  0xbb, 0x82, 0x09, 0x0b, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82,
-  0x60, 0x40, 0x81, 0xb1, 0x70, 0xc2, 0x02, 0x8d, 0xd8, 0xaf, 0x30, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x54, 0x18, 0x0b, 0x27, 0x2c, 0x04, 0xc2,
-  0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x1d, 0x4f, 0x1d, 0xe9, 0x0a, 0xc3, 0x5c,
-  0xad, 0x0a, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00,
-  0x08, 0x82, 0xc1, 0x66, 0xc6, 0xc2, 0x0c, 0x0b, 0xf0, 0x2b, 0x84, 0xb1,
-  0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3,
-  0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80,
-  0xb5, 0xb1, 0xa0, 0xc3, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0x80, 0xb9, 0xb1, 0xb0, 0xc3, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0x80, 0xbd, 0xb1, 0xc0, 0xc3, 0x42, 0x42, 0x04, 0x23,
-  0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xd8, 0xb1, 0xb0, 0xc3, 0x82, 0xfe,
-  0x0a, 0x01, 0x1a, 0x0b, 0x2b, 0x2c, 0xa8, 0xb1, 0x30, 0x9a, 0x10, 0x00,
-  0x17, 0x3c, 0x35, 0x4b, 0xb0, 0x3e, 0xc3, 0x0d, 0xe6, 0x1d, 0xb4, 0xb1,
-  0x00, 0x06, 0xb3, 0x0c, 0xba, 0xb3, 0x3e, 0x81, 0xb1, 0xaf, 0xe0, 0xbe,
-  0x42, 0x7c, 0x86, 0x23, 0xe4, 0x37, 0x78, 0x5f, 0x81, 0xf8, 0x66, 0x19,
-  0x76, 0xc7, 0x77, 0x02, 0x83, 0x5f, 0x61, 0x7e, 0x83, 0xf8, 0x58, 0x30,
-  0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04,
-  0xf1, 0x29, 0xe2, 0x8e, 0x05, 0x1d, 0x6e, 0x08, 0xea, 0x58, 0x00, 0x83,
-  0x59, 0x06, 0xde, 0xe9, 0x9d, 0xc0, 0x06, 0xfc, 0x15, 0xe0, 0x33, 0x4b,
-  0x20, 0x3e, 0x76, 0xbf, 0x02, 0x11, 0x9f, 0x59, 0x02, 0xf1, 0x19, 0x8e,
-  0xe8, 0xdf, 0x00, 0x7f, 0x05, 0xe1, 0x9b, 0x65, 0xf8, 0x1d, 0xf1, 0x09,
-  0xcc, 0x7f, 0x83, 0xfc, 0x15, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86,
-  0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x51,
-  0x16, 0x74, 0xb8, 0x21, 0x00, 0x65, 0x01, 0x0c, 0x66, 0x19, 0xc0, 0x27,
-  0x7c, 0x02, 0x0b, 0x61, 0x61, 0x88, 0xcf, 0x2c, 0x81, 0xf8, 0x18, 0x41,
-  0xc2, 0x02, 0x7c, 0x66, 0x09, 0xc4, 0x67, 0xa0, 0xe5, 0xd1, 0x78, 0x07,
-  0xeb, 0x1d, 0x02, 0x7c, 0x84, 0xf0, 0x81, 0xc1, 0xc1, 0x77, 0x2e, 0x18,
-  0xc6, 0x46, 0x58, 0x38, 0x61, 0x21, 0x3e, 0xc3, 0x11, 0xa4, 0x82, 0xc2,
-  0x02, 0xf1, 0xcd, 0x32, 0x8c, 0x8f, 0xf9, 0x04, 0x96, 0xc2, 0x42, 0xa9,
-  0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18,
-  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xb0, 0x2c, 0xe8, 0x70, 0x43, 0xe0,
-  0xca, 0x02, 0x18, 0xcc, 0x32, 0x90, 0x4f, 0xf9, 0x04, 0x36, 0xc4, 0xb0,
-  0x00, 0x9f, 0x59, 0x02, 0xf5, 0x31, 0x17, 0x16, 0x88, 0xf8, 0xcc, 0x12,
-  0xa8, 0xcf, 0x70, 0xc4, 0xab, 0xbc, 0xb0, 0x20, 0x7c, 0xb3, 0x0c, 0xe7,
-  0xa3, 0x3e, 0x81, 0xc1, 0x0a, 0x0c, 0x0b, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf,
-  0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53,
-  0xc4, 0x2e, 0x0b, 0x3a, 0xdc, 0x10, 0xe4, 0xb2, 0x00, 0x06, 0xb3, 0x0c,
-  0xe8, 0x93, 0x3e, 0x81, 0xe1, 0xb0, 0x30, 0xc4, 0x67, 0x96, 0x40, 0x7d,
-  0x8c, 0xe8, 0x61, 0x01, 0x3e, 0xb3, 0x04, 0xea, 0x33, 0xd0, 0xf2, 0x68,
-  0xe4, 0x83, 0x95, 0x0f, 0x81, 0x3e, 0x42, 0xfa, 0xd0, 0x95, 0xf9, 0x5c,
-  0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf6, 0xd4, 0xa5, 0xb0, 0x30, 0xcc, 0xe9,
-  0xac, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x6c, 0xeb, 0x2c, 0xe0, 0xb2, 0x50, 0xc7, 0x82, 0x39, 0x0b,
-  0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a,
-  0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x98,
-  0x3c, 0x0b, 0xbf, 0x2c, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0xd8, 0x3c, 0x0b, 0xe0, 0x2c, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x18, 0x3d, 0x0b, 0xe1, 0x2c, 0x24, 0x44, 0x30, 0x62,
-  0xa0, 0x00, 0x20, 0x08, 0x06, 0xcb, 0x3e, 0x0b, 0xe0, 0x2c, 0xfc, 0xb1,
-  0x10, 0xb4, 0xb3, 0x00, 0xcb, 0xc2, 0x3b, 0x0b, 0xa3, 0x09, 0x01, 0x70,
-  0xc1, 0x53, 0xb3, 0x04, 0xeb, 0x33, 0xd0, 0xf2, 0x98, 0x86, 0xed, 0x80,
-  0x65, 0x51, 0x3b, 0x2c, 0x81, 0x3b, 0x82, 0xfa, 0x80, 0x65, 0x91, 0x3b,
-  0xb3, 0x0c, 0xec, 0xe3, 0x3e, 0x6b, 0x1e, 0x0c, 0x47, 0xec, 0x6d, 0x10,
-  0xcb, 0xc2, 0xf0, 0x1d, 0xdf, 0x06, 0xc3, 0x0c, 0x37, 0x04, 0x7c, 0x2c,
-  0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0xcc, 0x4b, 0x2d, 0x0b, 0xc3, 0x57,
-  0x81, 0xa0, 0x57, 0x2f, 0xc3, 0x0c, 0x37, 0x04, 0x7f, 0x2c, 0x90, 0x41,
-  0x05, 0x83, 0xce, 0x32, 0xb4, 0x8f, 0x08, 0x05, 0x37, 0xc6, 0xc2, 0x30,
-  0x47, 0xb7, 0xc2, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0x8d,
-  0xb4, 0x00, 0xcf, 0x42, 0x2b, 0x0b, 0xfe, 0x2c, 0x8c, 0x26, 0x04, 0xc0,
-  0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71,
-  0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0x2a, 0x2d, 0xdc, 0xb3,
-  0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60, 0x2b, 0x2d,
-  0xe0, 0xb3, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x60,
-  0x2c, 0x2d, 0xe4, 0xb3, 0x20, 0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20,
-  0x18, 0x2c, 0x33, 0x2d, 0xe0, 0xb3, 0x70, 0xcb, 0x42, 0x50, 0xd2, 0x02,
-  0x3a, 0x0b, 0x27, 0x2d, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x4f, 0xcd, 0x12,
-  0x88, 0xd0, 0x70, 0xc3, 0xa8, 0x07, 0x2b, 0x2d, 0x80, 0xc1, 0x2c, 0xc3,
-  0xfb, 0xc0, 0x4f, 0x50, 0xb2, 0x2c, 0xec, 0xb3, 0x00, 0x17, 0x3c, 0x35,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x54, 0x4d, 0x0b, 0xfc, 0x2c, 0xd4,
-  0x6e, 0xb0, 0xce, 0xc2, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0x36,
-  0x2d, 0xf0, 0xb3, 0x10, 0x08, 0x17, 0x0c, 0x53, 0xb5, 0x2c, 0x80, 0xb4,
-  0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x94, 0x4e,
-  0x0b, 0x21, 0x2d, 0xb8, 0x0c, 0x3c, 0x0b, 0x23, 0x06, 0x07, 0x00, 0x82,
-  0x60, 0x40, 0xed, 0xb4, 0x10, 0xd2, 0x42, 0x20, 0x5c, 0x30, 0xcc, 0x05,
-  0x4f, 0xdd, 0xf1, 0xd4, 0xf9, 0xb1, 0x30, 0xcc, 0xbd, 0xae, 0x30, 0xcc,
-  0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x6c,
-  0x60, 0x2d, 0xb4, 0xb4, 0xa0, 0xce, 0xc2, 0x4e, 0x0b, 0xa3, 0x09, 0x01,
-  0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45,
-  0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd8, 0x59, 0x0b, 0x34,
-  0x2d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x18, 0x5a,
-  0x0b, 0x35, 0x2d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x58, 0x5a, 0x0b, 0x36, 0x2d, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20,
-  0x08, 0x06, 0x0b, 0x5c, 0x0b, 0x35, 0x2d, 0xd0, 0xb3, 0x10, 0x88, 0xb5,
-  0x50, 0xd2, 0x02, 0x59, 0x0b, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x53, 0xb3,
-  0x04, 0x22, 0x34, 0xdc, 0x00, 0xee, 0xc1, 0x59, 0x0b, 0x60, 0x30, 0xcb,
-  0x10, 0x3f, 0x22, 0x14, 0x98, 0x39, 0x0b, 0xe8, 0x2c, 0xc4, 0x67, 0x38,
-  0x22, 0x7e, 0x83, 0x74, 0x16, 0x88, 0x6f, 0x96, 0x41, 0x7e, 0xea, 0x27,
-  0x30, 0x75, 0x16, 0xe4, 0x37, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18,
-  0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xe2,
-  0x5a, 0xd0, 0xe1, 0x86, 0xe0, 0xad, 0x05, 0x30, 0x98, 0x65, 0x98, 0x1f,
-  0xfa, 0x09, 0x6c, 0x90, 0x67, 0x01, 0x3e, 0xb3, 0x04, 0xf9, 0x63, 0xf1,
-  0x2c, 0x10, 0xf1, 0x99, 0x25, 0xc8, 0x9f, 0xe1, 0x08, 0xfe, 0x0d, 0xe4,
-  0x59, 0x10, 0xbe, 0x59, 0x06, 0xfb, 0xc9, 0x9f, 0xc0, 0xfa, 0x37, 0x98,
-  0x67, 0x21, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2,
-  0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xe0, 0x6b, 0x41, 0x87, 0x1b,
-  0x02, 0xbd, 0x16, 0xc0, 0x60, 0x96, 0xe1, 0x7e, 0xf0, 0x27, 0xb0, 0x7d,
-  0x16, 0x86, 0xf8, 0xcc, 0x12, 0xe4, 0x8f, 0x11, 0xfe, 0x2c, 0xc0, 0x67,
-  0x96, 0x20, 0x7f, 0x06, 0x5a, 0x1e, 0x6d, 0x7e, 0x30, 0xfa, 0x21, 0xee,
-  0x47, 0xc0, 0x1f, 0x17, 0x1c, 0xea, 0xe7, 0x82, 0x61, 0xac, 0x9f, 0x85,
-  0x90, 0x16, 0xe2, 0x33, 0x1c, 0xe1, 0x37, 0x22, 0x2d, 0x10, 0xdf, 0x2c,
-  0x83, 0xfe, 0xf4, 0x4f, 0x60, 0x23, 0x2d, 0xfc, 0x4d, 0x7c, 0x2c, 0x18,
-  0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82,
-  0xf8, 0x14, 0xa1, 0xda, 0x82, 0x0e, 0x37, 0x04, 0xa8, 0x2d, 0x80, 0xc1,
-  0x2c, 0xc3, 0xfe, 0xf0, 0x4f, 0x60, 0xc3, 0x4a, 0x0b, 0xf0, 0x99, 0x25,
-  0x08, 0x21, 0x43, 0x69, 0x81, 0x88, 0xcf, 0x2c, 0x41, 0x08, 0x0d, 0x47,
-  0xa4, 0x4e, 0x4a, 0x0b, 0xc2, 0x37, 0xcb, 0xe0, 0x3f, 0x21, 0x14, 0x98,
-  0xea, 0xa8, 0xb4, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05,
-  0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xd4, 0xb6, 0xa0,
-  0xc3, 0x0d, 0xc1, 0x6c, 0x0b, 0x60, 0x30, 0xcb, 0xf0, 0x3f, 0x20, 0x14,
-  0x98, 0x4c, 0x0b, 0x43, 0x7c, 0x66, 0x09, 0x42, 0xc8, 0x88, 0x9b, 0x16,
-  0xe0, 0x33, 0x4b, 0x10, 0x42, 0x03, 0x2d, 0x8f, 0xb6, 0x3f, 0x18, 0xff,
-  0x10, 0xff, 0x23, 0x80, 0x10, 0xda, 0xf5, 0xcf, 0x05, 0xc3, 0x5c, 0xf0,
-  0xd4, 0x6d, 0x4f, 0xdd, 0x48, 0x0b, 0xc3, 0x1c, 0x0d, 0x0b, 0xc3, 0x1c,
-  0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x56,
-  0xde, 0x82, 0x6c, 0x0b, 0x6f, 0x2d, 0x80, 0xb7, 0x30, 0x9a, 0x10, 0x00,
-  0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44,
-  0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xb1, 0xb7, 0x90, 0xdb,
-  0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0xb5, 0xb7,
-  0xa0, 0xdb, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80,
-  0xb9, 0xb7, 0xb0, 0xdb, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82,
-  0x60, 0xb0, 0xd4, 0xb7, 0xa0, 0xdb, 0x42, 0x5e, 0x0b, 0xc1, 0x79, 0x0b,
-  0xaa, 0x2d, 0xa4, 0xb7, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x35, 0x4b,
-  0x20, 0x42, 0x03, 0x2d, 0x8f, 0x69, 0xb4, 0x0f, 0xcd, 0x16, 0xec, 0xc3,
-  0x12, 0xef, 0x23, 0x84, 0x10, 0xcd, 0x16, 0xf0, 0x33, 0x62, 0x60, 0x00,
-  0x20, 0x08, 0x06, 0xd0, 0x7d, 0x0b, 0xb6, 0x2d, 0x98, 0xb1, 0x30, 0x62,
-  0x60, 0x00, 0x20, 0x08, 0x06, 0x10, 0x7e, 0x0b, 0xb7, 0x2d, 0x98, 0xb1,
-  0x60, 0x41, 0x20, 0x1f, 0x0b, 0x04, 0xf9, 0xd8, 0x9b, 0x07, 0xad, 0x2d,
-  0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xa4, 0xfd, 0x16, 0x7e,
-  0x5b, 0x68, 0x6d, 0xa1, 0xd7, 0x02, 0x8b, 0xf3, 0xa0, 0xb5, 0x05, 0xf9,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0xd4, 0xdf, 0x42, 0x78, 0x0b,
-  0xac, 0x2d, 0xa0, 0x6a, 0x10, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81,
-  0xe4, 0xdf, 0x82, 0x78, 0x0b, 0xaf, 0x2d, 0x80, 0x5b, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0xd2, 0x7f, 0x0b, 0xe3, 0x2d, 0xc8, 0xb6, 0x80,
-  0x2f, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x20, 0x2e, 0x90,
-  0xb7, 0xe0, 0xda, 0xc2, 0xaa, 0x06, 0xc6, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x48, 0x21, 0x2e, 0x94, 0xb7, 0xe0, 0xda, 0xc2, 0xb8, 0x05, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0x20, 0x89, 0xb8, 0x60, 0xde, 0x02, 0x6d,
-  0x0b, 0xfb, 0x12, 0x8c, 0x18, 0x34, 0x00, 0x08, 0x82, 0x81, 0x15, 0xe2,
-  0x82, 0x79, 0x0b, 0xb7, 0x2d, 0x30, 0x8b, 0xe2, 0xaa, 0x01, 0x42, 0x04,
-  0xf6, 0xd7, 0xc1, 0x6d, 0x0b, 0xf2, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0xa9, 0xc4, 0x85, 0xf4, 0x16, 0x6e, 0x5b, 0x68, 0xaf, 0xc0, 0x42,
-  0x3b, 0xb8, 0x6d, 0x41, 0x3e, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x20,
-  0x9d, 0xb8, 0xb0, 0xde, 0x82, 0x6d, 0x0b, 0xb8, 0x19, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x20, 0xa1, 0xb8, 0xc0, 0xde, 0x42, 0x6e, 0x0b,
-  0xf0, 0x15, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x94, 0xe2, 0x42,
-  0x7b, 0x0b, 0xbc, 0x2d, 0xa0, 0x48, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x92, 0x8a, 0x0b, 0xee, 0x2d, 0xe0, 0xb6, 0xb0, 0x9b, 0x81, 0x31,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd2, 0x8a, 0x0b, 0xef, 0x2d, 0xe0,
-  0xb6, 0x30, 0x5f, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x2c,
-  0x2e, 0xc0, 0xb7, 0xe0, 0xdb, 0xc2, 0x8a, 0x04, 0x23, 0x06, 0x0d, 0x00,
-  0x82, 0x60, 0x60, 0xad, 0xb8, 0x00, 0xdf, 0x42, 0x78, 0x0b, 0x56, 0x45,
-  0xf9, 0x66, 0x80, 0x10, 0x81, 0xb9, 0x72, 0x10, 0xde, 0x82, 0x7c, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0x40, 0x7a, 0x71, 0x61, 0xbe, 0x85, 0xf0,
-  0x16, 0xfa, 0x29, 0x30, 0x58, 0x0e, 0xc2, 0x5b, 0x90, 0xcf, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x48, 0x31, 0x2e, 0xd4, 0xb7, 0x00, 0xde, 0x02,
-  0x3a, 0x06, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x32, 0x2e,
-  0xd8, 0xb7, 0x30, 0xde, 0x02, 0x48, 0x05, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0x20, 0xcd, 0xb8, 0x70, 0xdf, 0x82, 0x79, 0x0b, 0x38, 0x11, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x44, 0xe3, 0x02, 0x7e, 0x0b, 0xe2,
-  0x2d, 0xac, 0x63, 0x60, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x54,
-  0xe3, 0x42, 0x7e, 0x0b, 0xe2, 0x2d, 0x8c, 0x54, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x92, 0x8d, 0x0b, 0xfa, 0x2d, 0xa0, 0xb7, 0xb0, 0x13,
-  0xc1, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x58, 0x35, 0x2e, 0xe8, 0xb7,
-  0xb0, 0xde, 0x02, 0x18, 0x7c, 0x9e, 0x3b, 0x06, 0x08, 0x11, 0x58, 0xef,
-  0x06, 0xeb, 0x2d, 0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xa4,
-  0x1c, 0x17, 0xfa, 0x5b, 0x58, 0x6f, 0xa1, 0x85, 0x02, 0xfb, 0xdd, 0x60,
-  0xbd, 0x05, 0xf9, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0xb4, 0xe3,
-  0xc2, 0x7f, 0x0b, 0xea, 0x2d, 0xe0, 0x5f, 0x30, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x12, 0x8f, 0x0b, 0x20, 0x2e, 0xb4, 0xb7, 0x00, 0x43, 0xc1,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x3d, 0x2e, 0x84, 0xb8, 0x00,
-  0xdf, 0x02, 0x1a, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x20, 0xf9,
-  0xb8, 0x20, 0xe2, 0x02, 0x7b, 0x0b, 0xfb, 0x67, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0x81, 0xf4, 0xe3, 0xc2, 0x88, 0x0b, 0xec, 0x2d, 0xcc, 0x50,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x12, 0x98, 0x0b, 0x24, 0x2e,
-  0xc8, 0xb7, 0xb0, 0x06, 0xc1, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x58,
-  0x3f, 0x2e, 0x90, 0xb8, 0x50, 0xdf, 0x82, 0x1a, 0xa4, 0x01, 0x1a, 0xf8,
-  0x1f, 0x42, 0x04, 0xc6, 0x06, 0x6c, 0x20, 0x1f, 0x0b, 0xda, 0x40, 0x3e,
-  0x16, 0x06, 0xf7, 0x2d, 0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
-  0xa4, 0x32, 0x17, 0x52, 0x5c, 0xb8, 0x6f, 0xc1, 0x09, 0x6c, 0x0c, 0xee,
-  0x5b, 0x90, 0xcf, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x67, 0x2e,
-  0xac, 0xb8, 0x60, 0xdf, 0x82, 0x16, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0x81, 0x84, 0xe6, 0x02, 0x8b, 0x0b, 0xf9, 0x2d, 0x44, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x48, 0x69, 0x2e, 0xb4, 0xb8, 0xc0, 0xdf, 0x02,
-  0x12, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0xa4, 0xe6, 0x82, 0x8b,
-  0x0b, 0xf8, 0x2d, 0x74, 0xc6, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48,
-  0x6b, 0x2e, 0xbc, 0xb8, 0x80, 0xdf, 0x02, 0x15, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0x81, 0xc4, 0xe6, 0x02, 0x8c, 0x0b, 0xfe, 0x2d, 0x2c, 0xc1,
-  0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x58, 0x6b, 0x2e, 0xc0, 0xb8, 0x10,
-  0xe2, 0xc2, 0x1d, 0x2c, 0x0a, 0x18, 0x20, 0x44, 0x70, 0x81, 0x31, 0x23,
-  0x06, 0x0e, 0x00, 0x82, 0x60, 0xd0, 0xc4, 0xb9, 0xe0, 0xe2, 0x02, 0x7e,
-  0x0b, 0xf1, 0x2d, 0x90, 0xb9, 0x10, 0xa0, 0xb8, 0x80, 0xe2, 0x02, 0x8a,
-  0x0b, 0x27, 0x2e, 0x98, 0xb9, 0x30, 0x4b, 0x30, 0x42, 0x08, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00
-};
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_uint16_float.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_uint16_float.h
index 76baba6b6fb6b..9477617935a76 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_uint16_float.h
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_uint16_float.h
@@ -15,7 +15,7 @@
 ; Name                 Index   Mask Register SysValue  Format   Used
 ; -------------------- ----- ------ -------- -------- ------- ------
 ; no parameters
-; shader hash: ba9d0e133349aaafd23272810ce3ef5d
+; shader hash: 53261f05f9a353f9047267f2f0de0f6c
 ;
 ; Pipeline Runtime Information: 
 ;
@@ -68,7 +68,7 @@ target triple = "dxil-ms-dx"
 %dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
 %dx.types.ResRet.f32 = type { float, float, float, float, i32 }
 %dx.types.ResRet.i16 = type { i16, i16, i16, i16, i32 }
-%"class.RWStructuredBuffer<unsigned short>" = type { i16 }
+%"class.RWStructuredBuffer<uint16_t>" = type { i16 }
 %"class.RWStructuredBuffer<float>" = type { float }
 %Constants = type { i32, i32, i32, i32, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32 }
 
@@ -4476,10 +4476,10 @@ attributes #2 = { nounwind }
 !3 = !{!"cs", i32 6, i32 2}
 !4 = !{null, !5, !10, null}
 !5 = !{!6, !7, !9}
-!6 = !{i32 0, %"class.RWStructuredBuffer<unsigned short>"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !1}
+!6 = !{i32 0, %"class.RWStructuredBuffer<uint16_t>"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !1}
 !7 = !{i32 1, %"class.RWStructuredBuffer<float>"* undef, !"", i32 0, i32 1, i32 1, i32 12, i1 false, i1 false, i1 false, !8}
 !8 = !{i32 1, i32 4}
-!9 = !{i32 2, %"class.RWStructuredBuffer<unsigned short>"* undef, !"", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !1}
+!9 = !{i32 2, %"class.RWStructuredBuffer<uint16_t>"* undef, !"", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !1}
 !10 = !{!11}
 !11 = !{i32 0, %Constants* undef, !"", i32 0, i32 0, i32 1, i32 116, null}
 !12 = !{void ()* @GridSample, !"GridSample", null, !4, !13}
@@ -4489,9 +4489,9 @@ attributes #2 = { nounwind }
 #endif
 
 const unsigned char g_GridSample[] = {
-  0x44, 0x58, 0x42, 0x43, 0x88, 0x55, 0xa2, 0x66, 0xf8, 0x8d, 0x7c, 0x33,
-  0xdd, 0x62, 0x99, 0x2c, 0xca, 0xaa, 0x8b, 0x48, 0x01, 0x00, 0x00, 0x00,
-  0x44, 0x54, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
+  0x44, 0x58, 0x42, 0x43, 0xea, 0x8d, 0xff, 0xe8, 0xf4, 0xd2, 0x32, 0x30,
+  0x7c, 0xc4, 0x54, 0x3e, 0x03, 0x91, 0xd9, 0xd5, 0x01, 0x00, 0x00, 0x00,
+  0x3c, 0x54, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
   0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
   0x18, 0x01, 0x00, 0x00, 0x34, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30,
   0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -4513,12 +4513,12 @@ const unsigned char g_GridSample[] = {
   0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0xba, 0x9d, 0x0e, 0x13, 0x33, 0x49, 0xaa, 0xaf,
-  0xd2, 0x32, 0x72, 0x81, 0x0c, 0xe3, 0xef, 0x5d, 0x44, 0x58, 0x49, 0x4c,
-  0x08, 0x53, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0xc2, 0x14, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x53, 0x26, 0x1f, 0x05, 0xf9, 0xa3, 0x53, 0xf9,
+  0x04, 0x72, 0x67, 0xf2, 0xf0, 0xde, 0x0f, 0x6c, 0x44, 0x58, 0x49, 0x4c,
+  0x00, 0x53, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0xc0, 0x14, 0x00, 0x00,
   0x44, 0x58, 0x49, 0x4c, 0x02, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-  0xf0, 0x52, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00,
-  0xb9, 0x14, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0xe8, 0x52, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00,
+  0xb7, 0x14, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
   0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49,
   0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19,
   0x1e, 0x04, 0x8b, 0x62, 0x80, 0x18, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42,
@@ -4531,7 +4531,7 @@ const unsigned char g_GridSample[] = {
   0x01, 0xd5, 0x06, 0x62, 0xf8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x90, 0x00,
   0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42,
   0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00,
-  0x59, 0x00, 0x00, 0x00, 0x32, 0x22, 0x88, 0x09, 0x20, 0x64, 0x85, 0x04,
+  0x57, 0x00, 0x00, 0x00, 0x32, 0x22, 0x88, 0x09, 0x20, 0x64, 0x85, 0x04,
   0x13, 0x23, 0xa4, 0x84, 0x04, 0x13, 0x23, 0xe3, 0x84, 0xa1, 0x90, 0x14,
   0x12, 0x4c, 0x8c, 0x8c, 0x0b, 0x84, 0xc4, 0x4c, 0x10, 0xb4, 0xc1, 0x08,
   0x40, 0x09, 0x00, 0x0a, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0xc3, 0x30, 0x10,
@@ -4548,1743 +4548,1742 @@ const unsigned char g_GridSample[] = {
   0x86, 0x61, 0x18, 0x06, 0xda, 0x8e, 0x1a, 0x2e, 0x7f, 0xc2, 0x1e, 0x42,
   0xf2, 0xb9, 0x8d, 0x2a, 0x56, 0x62, 0xf2, 0x91, 0xdb, 0x46, 0xc4, 0x30,
   0x0c, 0x43, 0x21, 0xba, 0x41, 0x19, 0xc8, 0x9b, 0x23, 0x08, 0x8a, 0xa1,
-  0x0c, 0xc8, 0x30, 0x80, 0x14, 0x0e, 0x04, 0xcc, 0x84, 0x06, 0xe3, 0xc0,
+  0x0c, 0xc8, 0x30, 0x80, 0x14, 0x0e, 0x04, 0xcc, 0x24, 0x06, 0xe3, 0xc0,
   0x0e, 0xe1, 0x30, 0x0f, 0xf3, 0xe0, 0x06, 0xb2, 0x70, 0x0b, 0xb3, 0x40,
   0x0f, 0xf2, 0x50, 0x0f, 0xe3, 0x40, 0x0f, 0xf5, 0x20, 0x0f, 0xe5, 0x40,
   0x0e, 0xa2, 0x50, 0x0f, 0xe6, 0x60, 0x0e, 0xe5, 0x20, 0x0f, 0x7c, 0x50,
-  0x0f, 0xee, 0x30, 0x0f, 0xe9, 0x70, 0x0e, 0xee, 0x50, 0x0e, 0xe4, 0x00,
-  0x06, 0xf3, 0x80, 0x0e, 0xef, 0x20, 0x0f, 0xf4, 0xe0, 0x07, 0x28, 0x30,
-  0x88, 0x9c, 0xe9, 0x1b, 0x07, 0x76, 0x08, 0x87, 0x79, 0x98, 0x07, 0x37,
-  0x90, 0x85, 0x5b, 0x98, 0x05, 0x7a, 0x90, 0x87, 0x7a, 0x18, 0x07, 0x7a,
-  0xa8, 0x07, 0x79, 0x28, 0x07, 0x72, 0x10, 0x85, 0x7a, 0x30, 0x07, 0x73,
-  0x28, 0x07, 0x79, 0xe0, 0x03, 0x73, 0x60, 0x87, 0x77, 0x08, 0x07, 0x7a,
-  0xf0, 0x03, 0x14, 0x3c, 0x64, 0x0e, 0x23, 0x10, 0xc3, 0x25, 0x9c, 0xd3,
-  0x48, 0x13, 0xd0, 0x4c, 0x12, 0x5a, 0x86, 0x61, 0x18, 0x50, 0x14, 0x45,
-  0x51, 0x74, 0xa0, 0x74, 0x8e, 0x00, 0x14, 0xa6, 0x00, 0x00, 0x00, 0x00,
-  0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79,
-  0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xae, 0x50, 0x0e, 0x6d, 0xd0, 0x0e,
-  0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07,
-  0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07,
-  0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x78, 0xd0, 0x06, 0xe9, 0x10, 0x07,
-  0x76, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x6d, 0x90, 0x0e, 0x73, 0x20, 0x07,
-  0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x60, 0x07, 0x74, 0xa0, 0x07,
-  0x76, 0x40, 0x07, 0x6d, 0x60, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x10, 0x07,
-  0x76, 0xd0, 0x06, 0xe6, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07,
-  0x6d, 0x60, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06,
-  0xee, 0x80, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07,
-  0x7a, 0x60, 0x07, 0x74, 0x30, 0xe4, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0x43, 0x00, 0x01, 0x10, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90, 0x67, 0x01, 0x02, 0x40,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x21, 0x4f, 0x03, 0x04,
-  0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x43, 0x1e, 0x08,
-  0x08, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x3c,
-  0x12, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c,
-  0x79, 0x28, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x18, 0xf2, 0x58, 0x40, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x30, 0xe4, 0xc9, 0x80, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x60, 0xc8, 0xb3, 0x01, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0xc0, 0x90, 0xc7, 0x03, 0x02, 0x40, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x80, 0x21, 0x4f, 0x18, 0x00, 0x01, 0x20, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x16, 0x08, 0x00, 0x00, 0x00,
-  0x0d, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90,
-  0x8c, 0x09, 0x26, 0x47, 0xc6, 0x04, 0x43, 0x1a, 0x4a, 0xa0, 0x08, 0x8a,
-  0x61, 0x04, 0xa0, 0x30, 0x0a, 0xa1, 0xd0, 0x03, 0x0a, 0x30, 0x80, 0xc0,
-  0x11, 0x00, 0x5a, 0x0b, 0x1c, 0x10, 0x10, 0x81, 0xce, 0x19, 0x00, 0x52,
-  0x67, 0x00, 0xa8, 0x9c, 0x01, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00,
-  0x4a, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44,
-  0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b,
-  0x7b, 0x73, 0x03, 0x99, 0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b,
-  0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81,
-  0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84,
-  0x81, 0x99, 0x20, 0x0c, 0xcd, 0x06, 0x61, 0x20, 0x26, 0x08, 0x83, 0xb3,
-  0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08,
-  0xc3, 0x33, 0x41, 0x28, 0x03, 0x8c, 0xc0, 0x04, 0x61, 0x80, 0x26, 0x08,
-  0x60, 0x40, 0x6d, 0x58, 0x94, 0x85, 0x51, 0x94, 0xa1, 0x71, 0x1c, 0xa7,
-  0x98, 0x20, 0x9c, 0x81, 0x35, 0x41, 0x18, 0xa2, 0x0d, 0xc2, 0x10, 0x6d,
-  0x58, 0x06, 0x88, 0x51, 0x86, 0xa1, 0x71, 0x1c, 0x47, 0xda, 0xb0, 0x10,
-  0x0b, 0xa3, 0x10, 0x43, 0xe3, 0x38, 0x4e, 0xb1, 0x61, 0x78, 0x26, 0x6a,
-  0x82, 0xa0, 0x06, 0xd7, 0x04, 0x61, 0x90, 0x36, 0x20, 0x8a, 0xc5, 0x28,
-  0xca, 0x70, 0x01, 0x1b, 0x02, 0x6c, 0x03, 0x01, 0x54, 0x19, 0x30, 0x41,
-  0x10, 0x00, 0x2a, 0x47, 0x72, 0x69, 0x64, 0x53, 0x61, 0x6d, 0x70, 0x6c,
-  0x65, 0x13, 0x84, 0x35, 0xa8, 0x26, 0x08, 0xc3, 0xb4, 0x61, 0xf0, 0x86,
-  0x61, 0x03, 0xa1, 0x74, 0xd1, 0xb7, 0xa1, 0xd8, 0x38, 0x40, 0x03, 0x83,
-  0x2a, 0x6c, 0x6c, 0x76, 0x6d, 0x2e, 0x69, 0x64, 0x65, 0x6e, 0x74, 0x53,
-  0x82, 0xa0, 0x0a, 0x19, 0x9e, 0x8b, 0x5d, 0x99, 0xdc, 0x5c, 0xda, 0x9b,
-  0xdb, 0x94, 0x80, 0x68, 0x42, 0x86, 0xe7, 0x62, 0x17, 0xc6, 0x66, 0x57,
-  0x26, 0x37, 0x25, 0x30, 0xea, 0x90, 0xe1, 0xb9, 0xcc, 0xa1, 0x85, 0x91,
-  0x95, 0xc9, 0x35, 0xbd, 0x91, 0x95, 0xb1, 0x4d, 0x09, 0x90, 0x32, 0x64,
-  0x78, 0x2e, 0x72, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x73, 0x53,
-  0x82, 0xac, 0x0e, 0x19, 0x9e, 0x4b, 0x99, 0x1b, 0x9d, 0x5c, 0x1e, 0xd4,
-  0x5b, 0x9a, 0x1b, 0xdd, 0xdc, 0x94, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00,
-  0x79, 0x18, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c,
-  0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3,
-  0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6,
-  0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e,
-  0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43,
-  0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03,
-  0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48,
-  0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20,
-  0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e,
-  0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d,
-  0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89,
-  0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83,
-  0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68,
-  0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90,
-  0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78,
-  0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98,
-  0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5,
-  0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c,
-  0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c,
-  0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43,
-  0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43,
-  0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82,
-  0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70,
-  0x03, 0x7a, 0x28, 0x87, 0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8,
-  0xe0, 0x06, 0xe4, 0x20, 0x0e, 0xe7, 0xe0, 0x06, 0xf6, 0x10, 0x0e, 0xf2,
-  0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x30, 0x83, 0x81,
-  0xc8, 0x01, 0x1f, 0xdc, 0x40, 0x1c, 0xe4, 0xa1, 0x1c, 0xc2, 0x61, 0x1d,
-  0xdc, 0x40, 0x1c, 0xe4, 0x01, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00,
-  0x31, 0x00, 0x00, 0x00, 0x06, 0xa0, 0x80, 0x11, 0x32, 0xb0, 0x00, 0xf3,
-  0x2c, 0x84, 0x19, 0x40, 0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x20, 0x0d, 0x10,
-  0x61, 0x7e, 0x71, 0xdb, 0xa6, 0xb0, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10,
-  0x50, 0x45, 0x41, 0x44, 0xa5, 0x03, 0x0c, 0x25, 0x61, 0x00, 0x02, 0xe6,
-  0x23, 0xb7, 0x6d, 0x0b, 0xd2, 0x70, 0xf9, 0xce, 0xe3, 0x0b, 0x11, 0x01,
-  0x4c, 0x44, 0x08, 0x34, 0xc3, 0x42, 0xd8, 0x81, 0x33, 0x5c, 0xbe, 0xf3,
-  0xf8, 0x83, 0x33, 0xe1, 0x7e, 0x71, 0xdb, 0x86, 0x70, 0x0d, 0x97, 0xef,
-  0x3c, 0x7e, 0x04, 0x58, 0x1b, 0x55, 0x14, 0x44, 0x54, 0x3a, 0xc0, 0xe0,
-  0x17, 0xb7, 0x6d, 0x02, 0xd7, 0x70, 0xf9, 0xce, 0xe3, 0x47, 0x80, 0xb5,
-  0x51, 0x45, 0x41, 0x44, 0xa5, 0x03, 0x0c, 0x3e, 0x52, 0xeb, 0x36, 0x80,
-  0x0d, 0x97, 0xef, 0x3c, 0x7e, 0x04, 0x58, 0x1b, 0x55, 0x14, 0x44, 0xc4,
-  0x4e, 0x4e, 0x44, 0xf8, 0x48, 0xad, 0x5b, 0x81, 0x34, 0x5c, 0xbe, 0xf3,
-  0xf8, 0x13, 0x11, 0x4d, 0x08, 0x10, 0x61, 0x7e, 0x71, 0xdb, 0x96, 0x20,
-  0x0d, 0x97, 0xef, 0x3c, 0xfe, 0x44, 0x44, 0x13, 0x02, 0x44, 0x98, 0x8f,
-  0xdc, 0xb6, 0x05, 0x48, 0xc3, 0xe5, 0x3b, 0x8f, 0x3f, 0x1d, 0x11, 0x01,
-  0x0c, 0xe2, 0xe0, 0x23, 0xb7, 0x6d, 0x04, 0xcf, 0x70, 0xf9, 0xce, 0xe3,
-  0x53, 0x0d, 0x10, 0x61, 0x7e, 0x71, 0xdb, 0x00, 0x61, 0x20, 0x00, 0x00,
-  0x0b, 0x13, 0x00, 0x00, 0x13, 0x04, 0x24, 0x14, 0x0b, 0x04, 0x00, 0x00,
-  0x1d, 0x00, 0x00, 0x00, 0x34, 0x14, 0x58, 0xd9, 0x95, 0xa5, 0x40, 0x0d,
-  0x94, 0x51, 0x21, 0x15, 0x57, 0xc1, 0x95, 0x5c, 0xd9, 0x14, 0x4b, 0x61,
-  0x0a, 0x14, 0x4d, 0xe9, 0x06, 0x94, 0x43, 0x29, 0x90, 0x31, 0x03, 0x40,
-  0x48, 0x09, 0x14, 0x01, 0x3d, 0x23, 0x00, 0x63, 0x04, 0x20, 0x08, 0x82,
-  0xf8, 0x37, 0x46, 0x00, 0x82, 0x20, 0x48, 0xff, 0xc2, 0x18, 0x01, 0x08,
-  0x82, 0x20, 0xfd, 0x8d, 0x11, 0x80, 0x20, 0x08, 0xf2, 0xdf, 0x18, 0x01,
-  0x08, 0x82, 0x20, 0xfe, 0x0b, 0x63, 0x04, 0x20, 0x08, 0x82, 0x21, 0x38,
-  0x8c, 0x11, 0x80, 0x20, 0x08, 0xea, 0xdf, 0x18, 0x01, 0x08, 0x82, 0xa0,
-  0xfe, 0x0b, 0x63, 0x04, 0x20, 0x08, 0x82, 0xf0, 0x37, 0x46, 0x00, 0x82,
-  0x20, 0x08, 0xff, 0xc2, 0x18, 0x01, 0x08, 0x82, 0x20, 0x08, 0x06, 0x00,
-  0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0x10, 0x06, 0x6d, 0x70, 0x39, 0x6b,
-  0xb0, 0x06, 0x64, 0x30, 0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0x61, 0xe0,
-  0x06, 0xd8, 0xd3, 0x06, 0x6d, 0x50, 0x06, 0x23, 0x06, 0x09, 0x00, 0x82,
-  0x60, 0x10, 0x06, 0x6f, 0x90, 0x41, 0x6b, 0xb0, 0x06, 0x66, 0x30, 0x62,
-  0x90, 0x00, 0x20, 0x08, 0x06, 0x61, 0x00, 0x07, 0x1a, 0xc4, 0x06, 0x6c,
-  0x70, 0x06, 0x23, 0x06, 0x06, 0x00, 0x82, 0x60, 0x40, 0xec, 0x81, 0xd5,
-  0x06, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0, 0xcd, 0xc1, 0x18, 0x08,
-  0x6e, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c, 0x30, 0x9a, 0x30, 0x04, 0xc3,
-  0x0d, 0x42, 0x40, 0x06, 0xb3, 0x0c, 0xc1, 0x08, 0x05, 0x23, 0x06, 0x07,
-  0x00, 0x82, 0x60, 0xe0, 0xe1, 0x01, 0x1a, 0x1c, 0x76, 0x30, 0x9a, 0x10,
-  0x0c, 0x17, 0x3c, 0x35, 0x9a, 0x30, 0x08, 0x17, 0x3c, 0x35, 0x62, 0x70,
-  0x00, 0x20, 0x08, 0x06, 0x5e, 0x1f, 0xb4, 0x01, 0x03, 0x06, 0xa3, 0x09,
-  0x01, 0x30, 0xdc, 0x10, 0xe8, 0x01, 0x18, 0x4c, 0x37, 0x50, 0x5e, 0x30,
-  0xdd, 0x50, 0x69, 0x42, 0x21, 0x01, 0x4c, 0x37, 0x5c, 0x1c, 0x51, 0x48,
-  0x00, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0, 0x95, 0x42, 0x1d, 0x50,
-  0x67, 0x30, 0x9a, 0x10, 0x04, 0xa3, 0x09, 0x82, 0x30, 0x9a, 0x30, 0x0c,
-  0x15, 0x08, 0x52, 0x03, 0x21, 0x15, 0x0c, 0x52, 0x57, 0x30, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0xe0, 0xb5, 0x42, 0x1f, 0x70, 0xa9, 0x30, 0x9a,
-  0x10, 0x00, 0x15, 0x0c, 0x52, 0x5b, 0x10, 0x15, 0x20, 0x33, 0x9a, 0x50,
-  0x04, 0x15, 0x08, 0x52, 0x44, 0x10, 0x15, 0x34, 0x33, 0x9a, 0x90, 0x08,
-  0x15, 0x08, 0x52, 0x44, 0x10, 0xd7, 0x3c, 0x75, 0xc5, 0x53, 0x37, 0x3c,
-  0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x1e, 0x2f, 0xb0, 0xc2, 0x1a,
-  0xd4, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20,
-  0x8c, 0x26, 0x10, 0xc3, 0x11, 0x4f, 0x1d, 0xf1, 0xd4, 0x11, 0x4f, 0x1d,
-  0xf1, 0xd4, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x58, 0xe8, 0xf0, 0x0a,
-  0xcc, 0xa2, 0x8c, 0x02, 0x31, 0x08, 0x81, 0x09, 0x01, 0x7c, 0x4e, 0x18,
-  0x66, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x32, 0x75, 0xb8, 0x85, 0x3c,
-  0x08, 0xc6, 0x01, 0x15, 0xc2, 0x61, 0x34, 0x21, 0x00, 0x8e, 0x18, 0x66,
-  0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0xb2, 0x76, 0xd0, 0x05, 0x3e, 0x08,
-  0xcc, 0x61, 0x15, 0xc8, 0x61, 0x34, 0x21, 0x00, 0x86, 0x1b, 0xde, 0x60,
-  0x1d, 0xc0, 0xc0, 0x8a, 0x55, 0x80, 0x8f, 0x0d, 0xac, 0x00, 0x9f, 0x59,
-  0x06, 0x61, 0x18, 0x4c, 0x28, 0x05, 0xf9, 0x98, 0x60, 0x0a, 0xf2, 0x31,
-  0x3c, 0x58, 0x05, 0xf8, 0xd8, 0x1d, 0xb0, 0x02, 0x7c, 0x8c, 0x10, 0xe4,
-  0x63, 0x84, 0x20, 0x9f, 0x59, 0x02, 0xc2, 0xf8, 0x00, 0x91, 0x8f, 0xed,
-  0x01, 0x22, 0x1f, 0x13, 0x62, 0x01, 0x3e, 0x26, 0xc8, 0x02, 0x7c, 0x4c,
-  0x78, 0x05, 0xf9, 0x98, 0x00, 0x0b, 0xf2, 0x99, 0x25, 0x20, 0x06, 0x2a,
-  0x1e, 0x48, 0x20, 0x86, 0x81, 0x8a, 0x07, 0x12, 0x88, 0x61, 0x34, 0x61,
-  0x15, 0x84, 0xe1, 0x86, 0xa0, 0x1f, 0xc0, 0x60, 0x96, 0xa1, 0x30, 0x82,
-  0x11, 0x03, 0x03, 0x00, 0x41, 0x30, 0x80, 0x4c, 0xa2, 0x1c, 0x88, 0x11,
-  0x03, 0x03, 0x00, 0x41, 0x30, 0x80, 0x4e, 0xc2, 0x1c, 0x88, 0x59, 0x02,
-  0x63, 0xa0, 0xe2, 0x21, 0x0a, 0x86, 0x18, 0xa8, 0x78, 0x88, 0x82, 0x21,
-  0x86, 0x23, 0x04, 0x52, 0x20, 0xbe, 0xe1, 0x88, 0x61, 0x14, 0x84, 0xaf,
-  0x84, 0x60, 0x87, 0x23, 0x88, 0x53, 0x20, 0xbe, 0x12, 0x82, 0x1d, 0x8e,
-  0x30, 0x4a, 0x41, 0xf8, 0x2a, 0x10, 0x76, 0x96, 0xe1, 0xd0, 0x82, 0xd1,
-  0x04, 0x5c, 0x18, 0x86, 0x1b, 0x02, 0x95, 0x00, 0x83, 0x59, 0x06, 0x24,
-  0x09, 0x8a, 0x16, 0xf6, 0x01, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40,
-  0x10, 0x0c, 0xa8, 0x99, 0xe0, 0x87, 0xa6, 0x1d, 0x46, 0x0c, 0x0e, 0x00,
-  0x04, 0xc1, 0x80, 0xa2, 0x09, 0x7e, 0x08, 0x84, 0xb2, 0x85, 0x7f, 0x80,
-  0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xea, 0x26, 0x40,
-  0x02, 0x8a, 0x87, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0x70, 0x02,
-  0x24, 0x02, 0x61, 0x96, 0x40, 0x1b, 0x6e, 0x50, 0x64, 0x02, 0x0c, 0x66,
-  0x19, 0x14, 0x2d, 0x30, 0x5a, 0xb0, 0x85, 0xf8, 0xcc, 0x32, 0x2c, 0xce,
-  0x64, 0xb7, 0x50, 0xc5, 0xc7, 0x02, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1,
-  0x53, 0x16, 0x14, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x3b, 0xa1, 0xc3,
-  0x0d, 0x41, 0x4e, 0x80, 0xc1, 0x2c, 0x03, 0xd3, 0x04, 0x36, 0xfc, 0x02,
-  0x7c, 0x66, 0x09, 0x24, 0xf3, 0x05, 0x22, 0x3e, 0xb3, 0x04, 0xd2, 0x2c,
-  0xc3, 0x23, 0x71, 0xf6, 0xfd, 0x42, 0x7c, 0x2c, 0x60, 0xe8, 0x73, 0xc1,
-  0x30, 0x17, 0x3c, 0x65, 0xc1, 0x23, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x51,
-  0x16, 0x3a, 0xdc, 0x10, 0x8c, 0x05, 0x18, 0xcc, 0x32, 0x40, 0x51, 0x60,
-  0xe7, 0x30, 0xc4, 0x67, 0x96, 0x40, 0x32, 0x42, 0x1d, 0xe0, 0x33, 0x4b,
-  0x20, 0x0d, 0xb4, 0x3c, 0x18, 0x63, 0x35, 0x04, 0x24, 0x44, 0xb2, 0xe0,
-  0x18, 0x3a, 0xb0, 0x43, 0x7c, 0x66, 0x19, 0x26, 0xcb, 0x0c, 0xac, 0x1d,
-  0xd4, 0x20, 0x3e, 0x16, 0x08, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2,
-  0xa0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x88, 0x0b, 0x1d, 0x6e, 0x08,
-  0xde, 0x02, 0x0c, 0x66, 0x19, 0xa8, 0x2a, 0xb0, 0xa1, 0x1e, 0xe0, 0x33,
-  0x4b, 0xa0, 0x99, 0x3c, 0x10, 0xf1, 0x99, 0x25, 0xd0, 0x66, 0x19, 0x2e,
-  0xcd, 0x0d, 0x8c, 0x0e, 0xe6, 0x21, 0x3e, 0x16, 0x30, 0xf4, 0xb9, 0x60,
-  0x98, 0x0b, 0x9e, 0xb2, 0xe0, 0x91, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xd8,
-  0x0b, 0x1d, 0x6e, 0x08, 0xf2, 0x02, 0x0c, 0x66, 0x19, 0xb0, 0x2c, 0xb0,
-  0x7d, 0x18, 0xe2, 0x33, 0x4b, 0xa0, 0x19, 0x01, 0x12, 0xf0, 0x99, 0x25,
-  0xd0, 0x06, 0x8a, 0x1e, 0x71, 0x40, 0xfc, 0x21, 0xf1, 0x07, 0x83, 0x0d,
-  0x32, 0x36, 0xc0, 0xd8, 0xc0, 0x62, 0x83, 0x8a, 0x0d, 0xa8, 0x81, 0xa2,
-  0x87, 0x17, 0x10, 0x7f, 0x48, 0xfc, 0xc1, 0x20, 0x32, 0x03, 0xf3, 0x07,
-  0x0b, 0xab, 0x34, 0xea, 0xe4, 0xe1, 0xa9, 0x59, 0x86, 0x6d, 0x0e, 0x4a,
-  0x61, 0x34, 0x21, 0x26, 0x86, 0xe1, 0x86, 0x00, 0x34, 0xc0, 0x60, 0x96,
-  0x81, 0xf3, 0x82, 0xe1, 0x88, 0x82, 0x2c, 0x86, 0xef, 0x8c, 0x61, 0x86,
-  0x1b, 0x82, 0x97, 0x20, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0x40, 0xd0, 0x62,
-  0xf8, 0x2a, 0x10, 0xf4, 0x94, 0x61, 0x86, 0x1b, 0x02, 0x99, 0x20, 0x83,
-  0x0a, 0x06, 0x9d, 0x65, 0xe8, 0xe4, 0x20, 0x38, 0x7b, 0x18, 0xe6, 0x9a,
-  0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xc0, 0x8b, 0x8d, 0xd0, 0x00,
-  0x0b, 0xd7, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18,
-  0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0xd8, 0x70, 0x03, 0x35, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0x2d, 0x37, 0x52, 0x83, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0xd8, 0x74, 0x43, 0x35, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00,
-  0x10, 0x04, 0x83, 0x45, 0x3c, 0x52, 0x43, 0x2d, 0x82, 0xda, 0xd0, 0x8b,
-  0xdb, 0x18, 0x4d, 0x08, 0x80, 0x0b, 0x9e, 0x9a, 0x25, 0x90, 0x83, 0xe1,
-  0x86, 0x6c, 0x37, 0xc0, 0x60, 0x96, 0xe1, 0x03, 0x83, 0xa0, 0xca, 0x82,
-  0x35, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x22,
-  0x8f, 0xd6, 0xf8, 0xfc, 0x62, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8,
-  0xf2, 0x68, 0x8d, 0x40, 0xb8, 0x60, 0x98, 0x42, 0x8b, 0xd8, 0x80, 0x0b,
-  0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x2a, 0x3d, 0x64, 0x63,
-  0x0c, 0x46, 0x63, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0xf5, 0x90,
-  0x8d, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x9e, 0xba, 0xe3, 0xa9, 0x8b, 0x89,
-  0x61, 0x0e, 0x0d, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x03, 0xcf, 0x3d, 0x7c, 0xa3, 0x2f, 0xd6, 0x63, 0x34,
-  0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88,
-  0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0xab, 0x8f,
-  0xf2, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x36, 0xfb,
-  0x30, 0x8f, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0xbb,
-  0x8f, 0xf3, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x96,
-  0xff, 0x30, 0x8f, 0xd3, 0x08, 0xe4, 0xe3, 0x36, 0xe8, 0x63, 0x34, 0x21,
-  0x00, 0x2e, 0x78, 0x6a, 0x96, 0x40, 0x0e, 0x86, 0x1b, 0xec, 0xe0, 0x3e,
-  0xc0, 0x60, 0x96, 0x21, 0x0c, 0xe4, 0x20, 0xb0, 0xbc, 0xd8, 0x8b, 0xf8,
-  0x0c, 0x47, 0xec, 0x01, 0x5f, 0x10, 0xdf, 0x2c, 0x83, 0x18, 0x94, 0x41,
-  0x60, 0x7d, 0xc1, 0x07, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c,
-  0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0x88, 0xe8,
-  0x70, 0x43, 0xf0, 0x1f, 0x60, 0x30, 0xcb, 0x30, 0x06, 0x64, 0x10, 0xd8,
-  0x50, 0x1a, 0xf0, 0x99, 0x25, 0x48, 0x03, 0x23, 0x0d, 0x22, 0x3e, 0xb3,
-  0x04, 0x69, 0x30, 0x1c, 0x61, 0x0a, 0xa5, 0x21, 0x7c, 0xb3, 0x0c, 0x66,
-  0x90, 0x06, 0x81, 0x9d, 0x82, 0x69, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17,
-  0x0c, 0x73, 0xc1, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11,
-  0x2c, 0xa2, 0xc3, 0x0d, 0x81, 0x8a, 0x80, 0xc1, 0x2c, 0xc3, 0x19, 0xa0,
-  0x41, 0x60, 0xae, 0x31, 0xc4, 0x67, 0x96, 0x20, 0x0d, 0x8c, 0x88, 0x0d,
-  0xf8, 0xcc, 0x12, 0xa4, 0xc1, 0x40, 0xcb, 0xa3, 0x8d, 0x01, 0x46, 0x06,
-  0xc4, 0x19, 0x08, 0x68, 0x20, 0x16, 0x65, 0x70, 0xc1, 0x30, 0x06, 0x1b,
-  0xb4, 0x11, 0x9f, 0xe1, 0x88, 0x59, 0xa8, 0x0d, 0xe2, 0x9b, 0x65, 0x50,
-  0x83, 0x36, 0x08, 0xcc, 0x36, 0x68, 0x21, 0x3e, 0x16, 0x0c, 0xf4, 0xb9,
-  0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a,
-  0xd0, 0x11, 0x1d, 0x6e, 0x08, 0x70, 0x04, 0x0c, 0x66, 0x19, 0xd6, 0x80,
-  0x0d, 0x02, 0x1b, 0x7c, 0x03, 0x3e, 0xb3, 0x04, 0x71, 0x60, 0xbb, 0x41,
-  0xc4, 0x67, 0x96, 0x20, 0x0e, 0x86, 0x23, 0x7c, 0x81, 0x37, 0x84, 0x6f,
-  0x96, 0xc1, 0x0d, 0xe2, 0x20, 0xb0, 0x5f, 0xe8, 0x8d, 0xf8, 0x58, 0xe0,
-  0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04,
-  0xf1, 0x29, 0xa2, 0x4c, 0x74, 0xb8, 0x21, 0x18, 0x13, 0x30, 0x98, 0x65,
-  0x78, 0x03, 0x38, 0x08, 0xac, 0x3c, 0x86, 0xf8, 0xcc, 0x12, 0xc4, 0x81,
-  0x11, 0xea, 0x01, 0x9f, 0x59, 0x82, 0x38, 0x18, 0x68, 0x79, 0xb4, 0x35,
-  0xc0, 0xd8, 0x80, 0x78, 0x03, 0x01, 0x0e, 0x64, 0xa3, 0x0d, 0x2e, 0x18,
-  0xe6, 0x82, 0xa7, 0x6e, 0x7b, 0xea, 0x6c, 0x63, 0x98, 0x6b, 0x87, 0x61,
-  0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xc0,
-  0x9b, 0x93, 0x31, 0x11, 0x11, 0x38, 0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04,
-  0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11,
-  0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xf4, 0x44, 0x4d, 0x12, 0x22, 0x18,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x6d, 0x4f, 0xd6, 0x24, 0x21, 0x82,
-  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xf8, 0x84, 0x4d, 0x12, 0x22,
-  0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x85, 0x54, 0xd6, 0x84, 0x45,
-  0x82, 0x3b, 0xe1, 0x91, 0x3c, 0x19, 0x4d, 0x08, 0x80, 0x0b, 0x9e, 0x9a,
-  0x25, 0x90, 0x83, 0x81, 0x96, 0xc7, 0x34, 0x3a, 0x3d, 0xe2, 0x58, 0xe2,
-  0x13, 0xe2, 0x40, 0x8f, 0xc0, 0xe0, 0x02, 0x63, 0x46, 0x0c, 0x1c, 0x00,
-  0x04, 0xc1, 0xa0, 0x41, 0x95, 0x33, 0x99, 0x11, 0x16, 0xe1, 0x93, 0x40,
-  0x4c, 0xc4, 0x44, 0x4c, 0xc2, 0xc4, 0x4f, 0x66, 0x09, 0x46, 0x68, 0xb8,
-  0x61, 0x34, 0xf8, 0x04, 0x0c, 0x66, 0x19, 0xe8, 0x20, 0x26, 0x82, 0x11,
-  0x03, 0x03, 0x00, 0x41, 0x30, 0x80, 0x50, 0x45, 0x4d, 0x42, 0x62, 0xc4,
-  0xc0, 0x00, 0x40, 0x10, 0x0c, 0xa0, 0x54, 0x59, 0x93, 0x90, 0x30, 0x21,
-  0x4c, 0xe0, 0x63, 0x82, 0x98, 0xc0, 0x67, 0x34, 0x61, 0x46, 0x86, 0xe1,
-  0x86, 0x40, 0x54, 0xc0, 0x60, 0x96, 0xa1, 0x0e, 0xee, 0x20, 0x18, 0x8e,
-  0x30, 0xcc, 0x64, 0xf8, 0xee, 0x18, 0x66, 0xb8, 0x21, 0x88, 0x11, 0x32,
-  0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x44, 0x4d, 0x86, 0xaf, 0x02, 0x41, 0x6f,
-  0x19, 0x66, 0xb8, 0x21, 0xa0, 0x11, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x06,
-  0x3b, 0x58, 0x85, 0xe0, 0xf0, 0x63, 0x98, 0x7b, 0x89, 0x61, 0x46, 0x0c,
-  0x0e, 0x00, 0x04, 0xc1, 0xc0, 0x9b, 0x95, 0x51, 0x11, 0x13, 0x58, 0x19,
-  0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04,
-  0x62, 0x28, 0xe2, 0x90, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0x74,
-  0x45, 0x55, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x6d,
-  0x57, 0x56, 0x85, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8,
-  0x78, 0x85, 0x55, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83,
-  0x85, 0x5c, 0x56, 0x85, 0x4d, 0x82, 0x5b, 0xe1, 0x93, 0x5c, 0x19, 0x4d,
-  0x08, 0x80, 0x0b, 0x9e, 0x9a, 0x25, 0x58, 0x85, 0xe1, 0x86, 0xac, 0x57,
-  0xc0, 0x60, 0x96, 0x01, 0x0f, 0xf2, 0x20, 0xa8, 0x33, 0x71, 0x15, 0xb8,
-  0xe0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xcc, 0xe5, 0x55,
-  0xc0, 0x00, 0x54, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x3a, 0x97,
-  0x57, 0x09, 0x84, 0x0b, 0x86, 0x29, 0x35, 0x99, 0x15, 0xb8, 0xe0, 0xa9,
-  0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xd6, 0x85, 0x56, 0xc8, 0xa0,
-  0x54, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x62, 0x17, 0x5a, 0x09,
-  0x84, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0x3b, 0x9e, 0xba, 0x19, 0x19, 0xe6,
-  0xd4, 0x62, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0xf0, 0xe0, 0x05, 0x5c, 0xfe, 0xa4, 0x5d, 0x46, 0x13, 0x02,
-  0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a,
-  0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0x7b, 0x39, 0x97,
-  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0xc3, 0x17, 0x74,
-  0x49, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0x7c, 0x49,
-  0x97, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x09, 0x19,
-  0x74, 0x49, 0x95, 0x80, 0x5e, 0x72, 0xc5, 0x5e, 0x46, 0x13, 0x02, 0xe0,
-  0x82, 0xa7, 0x66, 0x09, 0x56, 0x61, 0xb8, 0xc1, 0x0e, 0xf2, 0x05, 0x0c,
-  0x66, 0x19, 0xf4, 0x60, 0x15, 0x02, 0xdb, 0x93, 0x3e, 0x89, 0xcf, 0x70,
-  0x04, 0x1f, 0xf8, 0x09, 0xf1, 0xcd, 0x32, 0xec, 0x81, 0x1f, 0x04, 0xf6,
-  0x27, 0x7d, 0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f,
-  0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x8c, 0x8c, 0x0e, 0x37,
-  0x04, 0x21, 0x03, 0x06, 0xb3, 0x0c, 0x7c, 0xd0, 0x07, 0x81, 0x0d, 0xa7,
-  0x02, 0x9f, 0x59, 0x02, 0x51, 0x30, 0x53, 0x21, 0xe2, 0x33, 0x4b, 0x20,
-  0x0a, 0xc3, 0x11, 0xa7, 0x70, 0x2a, 0xc2, 0x37, 0xcb, 0xf0, 0x07, 0xa2,
-  0x10, 0x18, 0x2a, 0xa0, 0x4a, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30,
-  0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xe1, 0x32,
-  0x3a, 0xdc, 0x10, 0xb0, 0x0c, 0x18, 0xcc, 0x32, 0x80, 0x42, 0x28, 0x04,
-  0x06, 0x2b, 0x43, 0x7c, 0x66, 0x09, 0x44, 0xc1, 0x88, 0x59, 0x81, 0xcf,
-  0x2c, 0x81, 0x28, 0x0c, 0xb4, 0x3c, 0x1a, 0x1f, 0x60, 0x7d, 0x40, 0x80,
-  0x82, 0x10, 0x0a, 0x64, 0xe1, 0x07, 0x17, 0x0c, 0x63, 0xb2, 0x62, 0x2b,
-  0xf1, 0x19, 0x8e, 0xa0, 0x85, 0x5b, 0x21, 0xbe, 0x59, 0x86, 0x51, 0x30,
-  0x85, 0xc0, 0x70, 0xa5, 0x16, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86,
-  0xb9, 0xe0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x9e,
-  0xd1, 0xe1, 0x86, 0x40, 0x67, 0xc0, 0x60, 0x96, 0x81, 0x14, 0x4a, 0x21,
-  0xb0, 0x01, 0x5c, 0xe0, 0x33, 0x4b, 0xa0, 0x0a, 0xd6, 0x2b, 0x44, 0x7c,
-  0x66, 0x09, 0x54, 0x61, 0x38, 0xe2, 0x17, 0x7c, 0x45, 0xf8, 0x66, 0x19,
-  0x4e, 0x41, 0x15, 0x02, 0x03, 0x87, 0x5f, 0x89, 0x8f, 0x05, 0x0e, 0x7d,
-  0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f,
-  0x22, 0xce, 0x46, 0x87, 0x1b, 0x82, 0xb2, 0x01, 0x83, 0x59, 0x06, 0x54,
-  0x48, 0x85, 0xc0, 0xce, 0x65, 0x88, 0xcf, 0x2c, 0x81, 0x2a, 0x18, 0xc1,
-  0x2e, 0xf0, 0x99, 0x25, 0x50, 0x85, 0x81, 0x96, 0x47, 0x23, 0x05, 0xac,
-  0x14, 0x08, 0x54, 0x10, 0x52, 0x81, 0x36, 0x4c, 0xe1, 0x82, 0x61, 0x2e,
-  0x78, 0xea, 0xb6, 0xa7, 0x0e, 0x57, 0x86, 0xb9, 0xf7, 0x18, 0xe6, 0x88,
-  0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc, 0xba,
-  0x29, 0x1b, 0x92, 0x91, 0x9b, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82,
-  0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x83, 0x8d, 0x6f, 0xd8, 0x26, 0x21, 0x82, 0x11, 0x03,
-  0x04, 0x00, 0x41, 0x30, 0xd8, 0xfa, 0xa6, 0x6d, 0x12, 0x22, 0x18, 0x31,
-  0x40, 0x00, 0x10, 0x04, 0x83, 0xcd, 0x6f, 0xdc, 0x26, 0x21, 0x82, 0x11,
-  0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0x4c, 0xa7, 0x6d, 0x5c, 0x26, 0xc8,
-  0x1b, 0x9f, 0xd9, 0x9b, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x82,
-  0x55, 0x18, 0x68, 0x79, 0x4c, 0xc3, 0x0e, 0xfc, 0xac, 0x0e, 0x58, 0x02,
-  0x0f, 0x04, 0x55, 0xf0, 0xb3, 0x3c, 0x98, 0x65, 0x60, 0x05, 0x57, 0xd8,
-  0x87, 0xe1, 0x08, 0x7f, 0x00, 0x9b, 0xe1, 0xbb, 0x7f, 0x18, 0x66, 0xb8,
-  0x21, 0x58, 0x19, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x91, 0x20, 0x9b,
-  0xe1, 0xab, 0x40, 0xd0, 0x2b, 0x89, 0x61, 0x86, 0x1b, 0x02, 0x97, 0x21,
-  0x83, 0x0a, 0x06, 0x9d, 0x65, 0x68, 0x05, 0x71, 0x08, 0x4e, 0x5e, 0x86,
-  0xb9, 0x14, 0x19, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc, 0xd6,
-  0xe9, 0x1b, 0x9e, 0x51, 0x9d, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82,
+  0x0f, 0xe9, 0xe0, 0x0e, 0xf4, 0x10, 0x07, 0x76, 0xf0, 0x0b, 0xf4, 0xe0,
+  0x07, 0x28, 0x30, 0x88, 0x9c, 0xe9, 0x1b, 0x07, 0x76, 0x08, 0x87, 0x79,
+  0x98, 0x07, 0x37, 0x90, 0x85, 0x5b, 0x98, 0x05, 0x7a, 0x90, 0x87, 0x7a,
+  0x18, 0x07, 0x7a, 0xa8, 0x07, 0x79, 0x28, 0x07, 0x72, 0x10, 0x85, 0x7a,
+  0x30, 0x07, 0x73, 0x28, 0x07, 0x79, 0xe0, 0x03, 0x73, 0x60, 0x87, 0x77,
+  0x08, 0x07, 0x7a, 0xf0, 0x03, 0x14, 0x3c, 0x64, 0x0e, 0x23, 0x10, 0xc3,
+  0x25, 0x9c, 0xd3, 0x48, 0x13, 0xd0, 0x4c, 0x12, 0x5a, 0x86, 0x61, 0x18,
+  0x50, 0x14, 0x45, 0x51, 0x74, 0xa0, 0x74, 0x8e, 0x00, 0x14, 0xa6, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87,
+  0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xae, 0x50,
+  0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30,
+  0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0,
+  0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x78, 0xd0,
+  0x06, 0xe9, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x6d, 0x90,
+  0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x60,
+  0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d, 0x60, 0x0e, 0x71, 0x60,
+  0x07, 0x7a, 0x10, 0x07, 0x76, 0xd0, 0x06, 0xe6, 0x30, 0x07, 0x72, 0xa0,
+  0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60,
+  0x07, 0x74, 0xd0, 0x06, 0xee, 0x80, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xa0,
+  0x07, 0x73, 0x20, 0x07, 0x7a, 0x60, 0x07, 0x74, 0x30, 0xe4, 0x09, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0x43,
+  0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90,
+  0x67, 0x01, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
+  0x21, 0x4f, 0x03, 0x04, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x43, 0x1e, 0x08, 0x08, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x86, 0x3c, 0x12, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x0c, 0x79, 0x28, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x18, 0xf2, 0x58, 0x40, 0x00, 0x04, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xe4, 0xc9, 0x80, 0x00, 0x10, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0xb3, 0x01, 0x01, 0x10,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90, 0xc7, 0x03, 0x02,
+  0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x21, 0x4f, 0x18,
+  0x00, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x16,
+  0x08, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14,
+  0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47, 0xc6, 0x04, 0x43, 0x1a,
+  0x4a, 0xa0, 0x08, 0x8a, 0x61, 0x04, 0xa0, 0x30, 0x0a, 0xa1, 0xd0, 0x03,
+  0x0a, 0x30, 0x80, 0xc0, 0x11, 0x00, 0x5a, 0x0b, 0x1c, 0x10, 0x10, 0x81,
+  0xce, 0x19, 0x00, 0x52, 0x67, 0x00, 0xa8, 0x9c, 0x01, 0x00, 0x00, 0x00,
+  0x79, 0x18, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90,
+  0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1,
+  0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99, 0x71, 0xb9, 0x01, 0x41,
+  0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a,
+  0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9,
+  0x10, 0x04, 0x13, 0x84, 0x81, 0x99, 0x20, 0x0c, 0xcd, 0x06, 0x61, 0x20,
+  0x26, 0x08, 0x83, 0xb3, 0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06,
+  0xc4, 0x20, 0x26, 0x08, 0xc3, 0x33, 0x41, 0x28, 0x03, 0x8c, 0xc0, 0x04,
+  0x61, 0x80, 0x26, 0x08, 0x60, 0x40, 0x6d, 0x58, 0x94, 0x85, 0x51, 0x94,
+  0xa1, 0x71, 0x1c, 0xa7, 0x98, 0x20, 0x9c, 0x81, 0x35, 0x41, 0x18, 0xa2,
+  0x0d, 0xc2, 0x10, 0x6d, 0x58, 0x06, 0x88, 0x51, 0x86, 0xa1, 0x71, 0x1c,
+  0x47, 0xda, 0xb0, 0x10, 0x0b, 0xa3, 0x10, 0x43, 0xe3, 0x38, 0x4e, 0xb1,
+  0x61, 0x78, 0x26, 0x6a, 0x82, 0xa0, 0x06, 0xd7, 0x04, 0x61, 0x90, 0x36,
+  0x20, 0x8a, 0xc5, 0x28, 0xca, 0x70, 0x01, 0x1b, 0x02, 0x6c, 0x03, 0x01,
+  0x54, 0x19, 0x30, 0x41, 0x10, 0x00, 0x2a, 0x47, 0x72, 0x69, 0x64, 0x53,
+  0x61, 0x6d, 0x70, 0x6c, 0x65, 0x13, 0x84, 0x35, 0xa8, 0x26, 0x08, 0xc3,
+  0xb4, 0x61, 0xf0, 0x86, 0x61, 0x03, 0xa1, 0x74, 0xd1, 0xb7, 0xa1, 0xd8,
+  0x38, 0x40, 0x03, 0x83, 0x2a, 0x6c, 0x6c, 0x76, 0x6d, 0x2e, 0x69, 0x64,
+  0x65, 0x6e, 0x74, 0x53, 0x82, 0xa0, 0x0a, 0x19, 0x9e, 0x8b, 0x5d, 0x99,
+  0xdc, 0x5c, 0xda, 0x9b, 0xdb, 0x94, 0x80, 0x68, 0x42, 0x86, 0xe7, 0x62,
+  0x17, 0xc6, 0x66, 0x57, 0x26, 0x37, 0x25, 0x30, 0xea, 0x90, 0xe1, 0xb9,
+  0xcc, 0xa1, 0x85, 0x91, 0x95, 0xc9, 0x35, 0xbd, 0x91, 0x95, 0xb1, 0x4d,
+  0x09, 0x90, 0x32, 0x64, 0x78, 0x2e, 0x72, 0x65, 0x73, 0x6f, 0x75, 0x72,
+  0x63, 0x65, 0x73, 0x53, 0x82, 0xac, 0x0e, 0x19, 0x9e, 0x4b, 0x99, 0x1b,
+  0x9d, 0x5c, 0x1e, 0xd4, 0x5b, 0x9a, 0x1b, 0xdd, 0xdc, 0x94, 0x00, 0x0c,
+  0x00, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00,
+  0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88,
+  0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73,
+  0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e,
+  0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30,
+  0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8,
+  0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b,
+  0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76,
+  0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e,
+  0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e,
+  0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61,
+  0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4,
+  0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76,
+  0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37,
+  0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76,
+  0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71,
+  0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e,
+  0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1,
+  0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61,
+  0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90,
+  0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8,
+  0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc,
+  0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4,
+  0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87, 0x76, 0x80, 0x87, 0x19,
+  0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20, 0x0e, 0xe7, 0xe0, 0x06,
+  0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f,
+  0xf4, 0x30, 0x83, 0x81, 0xc8, 0x01, 0x1f, 0xdc, 0x40, 0x1c, 0xe4, 0xa1,
+  0x1c, 0xc2, 0x61, 0x1d, 0xdc, 0x40, 0x1c, 0xe4, 0x01, 0x00, 0x00, 0x00,
+  0x71, 0x20, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x06, 0xa0, 0x80, 0x11,
+  0x32, 0xb0, 0x00, 0xf3, 0x2c, 0x84, 0x19, 0x40, 0xc3, 0xe5, 0x3b, 0x8f,
+  0x1f, 0x20, 0x0d, 0x10, 0x61, 0x7e, 0x71, 0xdb, 0xa6, 0xb0, 0x0d, 0x97,
+  0xef, 0x3c, 0xbe, 0x10, 0x50, 0x45, 0x41, 0x44, 0xa5, 0x03, 0x0c, 0x25,
+  0x61, 0x00, 0x02, 0xe6, 0x23, 0xb7, 0x6d, 0x0b, 0xd2, 0x70, 0xf9, 0xce,
+  0xe3, 0x0b, 0x11, 0x01, 0x4c, 0x44, 0x08, 0x34, 0xc3, 0x42, 0xd8, 0x81,
+  0x33, 0x5c, 0xbe, 0xf3, 0xf8, 0x83, 0x33, 0xe1, 0x7e, 0x71, 0xdb, 0x86,
+  0x70, 0x0d, 0x97, 0xef, 0x3c, 0x7e, 0x04, 0x58, 0x1b, 0x55, 0x14, 0x44,
+  0x54, 0x3a, 0xc0, 0xe0, 0x17, 0xb7, 0x6d, 0x02, 0xd7, 0x70, 0xf9, 0xce,
+  0xe3, 0x47, 0x80, 0xb5, 0x51, 0x45, 0x41, 0x44, 0xa5, 0x03, 0x0c, 0x3e,
+  0x52, 0xeb, 0x36, 0x80, 0x0d, 0x97, 0xef, 0x3c, 0x7e, 0x04, 0x58, 0x1b,
+  0x55, 0x14, 0x44, 0xc4, 0x4e, 0x4e, 0x44, 0xf8, 0x48, 0xad, 0x5b, 0x81,
+  0x34, 0x5c, 0xbe, 0xf3, 0xf8, 0x13, 0x11, 0x4d, 0x08, 0x10, 0x61, 0x7e,
+  0x71, 0xdb, 0x96, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xfe, 0x44, 0x44, 0x13,
+  0x02, 0x44, 0x98, 0x8f, 0xdc, 0xb6, 0x05, 0x48, 0xc3, 0xe5, 0x3b, 0x8f,
+  0x3f, 0x1d, 0x11, 0x01, 0x0c, 0xe2, 0xe0, 0x23, 0xb7, 0x6d, 0x04, 0xcf,
+  0x70, 0xf9, 0xce, 0xe3, 0x53, 0x0d, 0x10, 0x61, 0x7e, 0x71, 0xdb, 0x00,
+  0x61, 0x20, 0x00, 0x00, 0x0b, 0x13, 0x00, 0x00, 0x13, 0x04, 0x24, 0x14,
+  0x0b, 0x04, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x34, 0x14, 0x58, 0xd9,
+  0x95, 0xa5, 0x40, 0x0d, 0x94, 0x51, 0x21, 0x15, 0x57, 0xc1, 0x95, 0x5c,
+  0xd9, 0x14, 0x4b, 0x61, 0x0a, 0x14, 0x4d, 0xe9, 0x06, 0x94, 0x43, 0x29,
+  0x90, 0x31, 0x03, 0x40, 0x48, 0x09, 0x14, 0x01, 0x3d, 0x23, 0x00, 0x63,
+  0x04, 0x20, 0x08, 0x82, 0xf8, 0x37, 0x46, 0x00, 0x82, 0x20, 0x48, 0xff,
+  0xc2, 0x18, 0x01, 0x08, 0x82, 0x20, 0xfd, 0x8d, 0x11, 0x80, 0x20, 0x08,
+  0xf2, 0xdf, 0x18, 0x01, 0x08, 0x82, 0x20, 0xfe, 0x0b, 0x63, 0x04, 0x20,
+  0x08, 0x82, 0x21, 0x38, 0x8c, 0x11, 0x80, 0x20, 0x08, 0xea, 0xdf, 0x18,
+  0x01, 0x08, 0x82, 0xa0, 0xfe, 0x0b, 0x63, 0x04, 0x20, 0x08, 0x82, 0xf0,
+  0x37, 0x46, 0x00, 0x82, 0x20, 0x08, 0xff, 0xc2, 0x18, 0x01, 0x08, 0x82,
+  0x20, 0x08, 0x06, 0x00, 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0x10, 0x06,
+  0x6d, 0x70, 0x39, 0x6b, 0xb0, 0x06, 0x64, 0x30, 0x62, 0x90, 0x00, 0x20,
+  0x08, 0x06, 0x61, 0xe0, 0x06, 0xd8, 0xd3, 0x06, 0x6d, 0x50, 0x06, 0x23,
+  0x06, 0x09, 0x00, 0x82, 0x60, 0x10, 0x06, 0x6f, 0x90, 0x41, 0x6b, 0xb0,
+  0x06, 0x66, 0x30, 0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0x61, 0x00, 0x07,
+  0x1a, 0xc4, 0x06, 0x6c, 0x70, 0x06, 0x23, 0x06, 0x06, 0x00, 0x82, 0x60,
+  0x40, 0xec, 0x81, 0xd5, 0x06, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0,
+  0xcd, 0xc1, 0x18, 0x08, 0x6e, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c, 0x30,
+  0x9a, 0x30, 0x04, 0xc3, 0x0d, 0x42, 0x40, 0x06, 0xb3, 0x0c, 0xc1, 0x08,
+  0x05, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0, 0xe1, 0x01, 0x1a, 0x1c,
+  0x76, 0x30, 0x9a, 0x10, 0x0c, 0x17, 0x3c, 0x35, 0x9a, 0x30, 0x08, 0x17,
+  0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x5e, 0x1f, 0xb4, 0x01,
+  0x03, 0x06, 0xa3, 0x09, 0x01, 0x30, 0xdc, 0x10, 0xe8, 0x01, 0x18, 0x4c,
+  0x37, 0x50, 0x5e, 0x30, 0xdd, 0x50, 0x69, 0x42, 0x21, 0x01, 0x4c, 0x37,
+  0x5c, 0x1c, 0x51, 0x48, 0x00, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0,
+  0x95, 0x42, 0x1d, 0x50, 0x67, 0x30, 0x9a, 0x10, 0x04, 0xa3, 0x09, 0x82,
+  0x30, 0x9a, 0x30, 0x0c, 0x15, 0x08, 0x52, 0x03, 0x21, 0x15, 0x0c, 0x52,
+  0x57, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0, 0xb5, 0x42, 0x1f,
+  0x70, 0xa9, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c, 0x52, 0x5b, 0x10, 0x15,
+  0x20, 0x33, 0x9a, 0x50, 0x04, 0x15, 0x08, 0x52, 0x44, 0x10, 0x15, 0x34,
+  0x33, 0x9a, 0x90, 0x08, 0x15, 0x08, 0x52, 0x44, 0x10, 0xd7, 0x3c, 0x75,
+  0xc5, 0x53, 0x37, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x1e,
+  0x2f, 0xb0, 0xc2, 0x1a, 0xd4, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08,
+  0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0xc3, 0x11, 0x4f, 0x1d, 0xf1,
+  0xd4, 0x11, 0x4f, 0x1d, 0xf1, 0xd4, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18,
+  0x58, 0xe8, 0xf0, 0x0a, 0xcc, 0xa2, 0x8c, 0x02, 0x31, 0x08, 0x81, 0x09,
+  0x01, 0x7c, 0x4e, 0x18, 0x66, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x32,
+  0x75, 0xb8, 0x85, 0x3c, 0x08, 0xc6, 0x01, 0x15, 0xc2, 0x61, 0x34, 0x21,
+  0x00, 0x8e, 0x18, 0x66, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0xb2, 0x76,
+  0xd0, 0x05, 0x3e, 0x08, 0xcc, 0x61, 0x15, 0xc8, 0x61, 0x34, 0x21, 0x00,
+  0x86, 0x1b, 0xde, 0x60, 0x1d, 0xc0, 0xc0, 0x8a, 0x55, 0x80, 0x8f, 0x0d,
+  0xac, 0x00, 0x9f, 0x59, 0x06, 0x61, 0x18, 0x4c, 0x28, 0x05, 0xf9, 0x98,
+  0x60, 0x0a, 0xf2, 0x31, 0x3c, 0x58, 0x05, 0xf8, 0xd8, 0x1d, 0xb0, 0x02,
+  0x7c, 0x8c, 0x10, 0xe4, 0x63, 0x84, 0x20, 0x9f, 0x59, 0x02, 0xc2, 0xf8,
+  0x00, 0x91, 0x8f, 0xed, 0x01, 0x22, 0x1f, 0x13, 0x62, 0x01, 0x3e, 0x26,
+  0xc8, 0x02, 0x7c, 0x4c, 0x78, 0x05, 0xf9, 0x98, 0x00, 0x0b, 0xf2, 0x99,
+  0x25, 0x20, 0x06, 0x2a, 0x1e, 0x48, 0x20, 0x86, 0x81, 0x8a, 0x07, 0x12,
+  0x88, 0x61, 0x34, 0x61, 0x15, 0x84, 0xe1, 0x86, 0xa0, 0x1f, 0xc0, 0x60,
+  0x96, 0xa1, 0x30, 0x82, 0x11, 0x03, 0x03, 0x00, 0x41, 0x30, 0x80, 0x4c,
+  0xa2, 0x1c, 0x88, 0x11, 0x03, 0x03, 0x00, 0x41, 0x30, 0x80, 0x4e, 0xc2,
+  0x1c, 0x88, 0x59, 0x02, 0x63, 0xa0, 0xe2, 0x21, 0x0a, 0x86, 0x18, 0xa8,
+  0x78, 0x88, 0x82, 0x21, 0x86, 0x23, 0x04, 0x52, 0x20, 0xbe, 0xe1, 0x88,
+  0x61, 0x14, 0x84, 0xaf, 0x84, 0x60, 0x87, 0x23, 0x88, 0x53, 0x20, 0xbe,
+  0x12, 0x82, 0x1d, 0x8e, 0x30, 0x4a, 0x41, 0xf8, 0x2a, 0x10, 0x76, 0x96,
+  0xe1, 0xd0, 0x82, 0xd1, 0x04, 0x5c, 0x18, 0x86, 0x1b, 0x02, 0x95, 0x00,
+  0x83, 0x59, 0x06, 0x24, 0x09, 0x8a, 0x16, 0xf6, 0x01, 0x2e, 0x78, 0x6a,
+  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0x99, 0xe0, 0x87, 0xa6, 0x1d,
+  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0xa2, 0x09, 0x7e, 0x08, 0x84,
+  0xb2, 0x85, 0x7f, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04,
+  0x03, 0xea, 0x26, 0x40, 0x02, 0x8a, 0x87, 0x11, 0x83, 0x03, 0x00, 0x41,
+  0x30, 0xa0, 0x70, 0x02, 0x24, 0x02, 0x61, 0x96, 0x40, 0x1b, 0x6e, 0x50,
+  0x64, 0x02, 0x0c, 0x66, 0x19, 0x14, 0x2d, 0x30, 0x5a, 0xb0, 0x85, 0xf8,
+  0xcc, 0x32, 0x2c, 0xce, 0x64, 0xb7, 0x50, 0xc5, 0xc7, 0x02, 0x81, 0x3e,
+  0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x14, 0xf2, 0xb1, 0x22, 0x88, 0x4f,
+  0x11, 0x3b, 0xa1, 0xc3, 0x0d, 0x41, 0x4e, 0x80, 0xc1, 0x2c, 0x03, 0xd3,
+  0x04, 0x36, 0xfc, 0x02, 0x7c, 0x66, 0x09, 0x24, 0xf3, 0x05, 0x22, 0x3e,
+  0xb3, 0x04, 0xd2, 0x2c, 0xc3, 0x23, 0x71, 0xf6, 0xfd, 0x42, 0x7c, 0x2c,
+  0x60, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0xc1, 0x23, 0x1f, 0x2b,
+  0x82, 0xf8, 0x14, 0x51, 0x16, 0x3a, 0xdc, 0x10, 0x8c, 0x05, 0x18, 0xcc,
+  0x32, 0x40, 0x51, 0x60, 0xe7, 0x30, 0xc4, 0x67, 0x96, 0x40, 0x32, 0x42,
+  0x1d, 0xe0, 0x33, 0x4b, 0x20, 0x0d, 0xb4, 0x3c, 0x18, 0x63, 0x35, 0x04,
+  0x24, 0x44, 0xb2, 0xe0, 0x18, 0x3a, 0xb0, 0x43, 0x7c, 0x66, 0x19, 0x26,
+  0xcb, 0x0c, 0xac, 0x1d, 0xd4, 0x20, 0x3e, 0x16, 0x08, 0xf4, 0xb9, 0x60,
+  0x98, 0x0b, 0x9e, 0xb2, 0xa0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x88,
+  0x0b, 0x1d, 0x6e, 0x08, 0xde, 0x02, 0x0c, 0x66, 0x19, 0xa8, 0x2a, 0xb0,
+  0xa1, 0x1e, 0xe0, 0x33, 0x4b, 0xa0, 0x99, 0x3c, 0x10, 0xf1, 0x99, 0x25,
+  0xd0, 0x66, 0x19, 0x2e, 0xcd, 0x0d, 0x8c, 0x0e, 0xe6, 0x21, 0x3e, 0x16,
+  0x30, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xe0, 0x91, 0x8f, 0x15,
+  0x41, 0x7c, 0x8a, 0xd8, 0x0b, 0x1d, 0x6e, 0x08, 0xf2, 0x02, 0x0c, 0x66,
+  0x19, 0xb0, 0x2c, 0xb0, 0x7d, 0x18, 0xe2, 0x33, 0x4b, 0xa0, 0x19, 0x01,
+  0x12, 0xf0, 0x99, 0x25, 0xd0, 0x06, 0x8a, 0x1e, 0x71, 0x40, 0xfc, 0x21,
+  0xf1, 0x07, 0x83, 0x0d, 0x32, 0x36, 0xc0, 0xd8, 0xc0, 0x62, 0x83, 0x8a,
+  0x0d, 0xa8, 0x81, 0xa2, 0x87, 0x17, 0x10, 0x7f, 0x48, 0xfc, 0xc1, 0x20,
+  0x32, 0x03, 0xf3, 0x07, 0x0b, 0xab, 0x34, 0xea, 0xe4, 0xe1, 0xa9, 0x59,
+  0x86, 0x6d, 0x0e, 0x4a, 0x61, 0x34, 0x21, 0x26, 0x86, 0xe1, 0x86, 0x00,
+  0x34, 0xc0, 0x60, 0x96, 0x81, 0xf3, 0x82, 0xe1, 0x88, 0x82, 0x2c, 0x86,
+  0xef, 0x8c, 0x61, 0x86, 0x1b, 0x82, 0x97, 0x20, 0x83, 0x1a, 0x02, 0x1d,
+  0x8e, 0x40, 0xd0, 0x62, 0xf8, 0x2a, 0x10, 0xf4, 0x94, 0x61, 0x86, 0x1b,
+  0x02, 0x99, 0x20, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0xe8, 0xe4, 0x20, 0x38,
+  0x7b, 0x18, 0xe6, 0x9a, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xc0,
+  0x8b, 0x8d, 0xd0, 0x00, 0x0b, 0xd7, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04,
+  0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90, 0x11,
+  0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0x70, 0x03, 0x35, 0x0e, 0x22, 0x18,
+  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x2d, 0x37, 0x52, 0x83, 0x21, 0x82,
+  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0x74, 0x43, 0x35, 0x24, 0x22,
+  0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x45, 0x3c, 0x52, 0x43, 0x2d,
+  0x82, 0xda, 0xd0, 0x8b, 0xdb, 0x18, 0x4d, 0x08, 0x80, 0x0b, 0x9e, 0x9a,
+  0x25, 0x90, 0x83, 0xe1, 0x86, 0x6c, 0x37, 0xc0, 0x60, 0x96, 0xe1, 0x03,
+  0x83, 0xa0, 0xca, 0x82, 0x35, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00,
+  0x04, 0xc1, 0x80, 0x22, 0x8f, 0xd6, 0xf8, 0xfc, 0x62, 0xc4, 0xe0, 0x00,
+  0x40, 0x10, 0x0c, 0xa8, 0xf2, 0x68, 0x8d, 0x40, 0xb8, 0x60, 0x98, 0x42,
+  0x8b, 0xd8, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03,
+  0x2a, 0x3d, 0x64, 0x63, 0x0c, 0x46, 0x63, 0xc4, 0xe0, 0x00, 0x40, 0x10,
+  0x0c, 0x28, 0xf5, 0x90, 0x8d, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x9e, 0xba,
+  0xe3, 0xa9, 0x8b, 0x89, 0x61, 0x0e, 0x0d, 0x86, 0x39, 0x62, 0x98, 0x23,
+  0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xcf, 0x3d, 0x7c, 0xa3,
+  0x2f, 0xd6, 0x63, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61,
+  0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04,
+  0xc1, 0x60, 0xab, 0x8f, 0xf2, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40,
+  0x10, 0x0c, 0x36, 0xfb, 0x30, 0x8f, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00,
+  0x04, 0xc1, 0x60, 0xbb, 0x8f, 0xf3, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01,
+  0x40, 0x10, 0x0c, 0x96, 0xff, 0x30, 0x8f, 0xd3, 0x08, 0xe4, 0xe3, 0x36,
+  0xe8, 0x63, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6a, 0x96, 0x40, 0x0e, 0x86,
+  0x1b, 0xec, 0xe0, 0x3e, 0xc0, 0x60, 0x96, 0x21, 0x0c, 0xe4, 0x20, 0xb0,
+  0xbc, 0xd8, 0x8b, 0xf8, 0x0c, 0x47, 0xec, 0x01, 0x5f, 0x10, 0xdf, 0x2c,
+  0x83, 0x18, 0x94, 0x41, 0x60, 0x7d, 0xc1, 0x07, 0xf1, 0xb1, 0x60, 0xa0,
+  0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2,
+  0x53, 0x44, 0x88, 0xe8, 0x70, 0x43, 0xf0, 0x1f, 0x60, 0x30, 0xcb, 0x30,
+  0x06, 0x64, 0x10, 0xd8, 0x50, 0x1a, 0xf0, 0x99, 0x25, 0x48, 0x03, 0x23,
+  0x0d, 0x22, 0x3e, 0xb3, 0x04, 0x69, 0x30, 0x1c, 0x61, 0x0a, 0xa5, 0x21,
+  0x7c, 0xb3, 0x0c, 0x66, 0x90, 0x06, 0x81, 0x9d, 0x82, 0x69, 0xc4, 0xc7,
+  0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x44, 0xf2, 0xb1,
+  0x22, 0x88, 0x4f, 0x11, 0x2c, 0xa2, 0xc3, 0x0d, 0x81, 0x8a, 0x80, 0xc1,
+  0x2c, 0xc3, 0x19, 0xa0, 0x41, 0x60, 0xae, 0x31, 0xc4, 0x67, 0x96, 0x20,
+  0x0d, 0x8c, 0x88, 0x0d, 0xf8, 0xcc, 0x12, 0xa4, 0xc1, 0x40, 0xcb, 0xa3,
+  0x8d, 0x01, 0x46, 0x06, 0xc4, 0x19, 0x08, 0x68, 0x20, 0x16, 0x65, 0x70,
+  0xc1, 0x30, 0x06, 0x1b, 0xb4, 0x11, 0x9f, 0xe1, 0x88, 0x59, 0xa8, 0x0d,
+  0xe2, 0x9b, 0x65, 0x50, 0x83, 0x36, 0x08, 0xcc, 0x36, 0x68, 0x21, 0x3e,
+  0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xc0, 0x90, 0x8f,
+  0x15, 0x41, 0x7c, 0x8a, 0xd0, 0x11, 0x1d, 0x6e, 0x08, 0x70, 0x04, 0x0c,
+  0x66, 0x19, 0xd6, 0x80, 0x0d, 0x02, 0x1b, 0x7c, 0x03, 0x3e, 0xb3, 0x04,
+  0x71, 0x60, 0xbb, 0x41, 0xc4, 0x67, 0x96, 0x20, 0x0e, 0x86, 0x23, 0x7c,
+  0x81, 0x37, 0x84, 0x6f, 0x96, 0xc1, 0x0d, 0xe2, 0x20, 0xb0, 0x5f, 0xe8,
+  0x8d, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x82,
+  0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xa2, 0x4c, 0x74, 0xb8, 0x21, 0x18,
+  0x13, 0x30, 0x98, 0x65, 0x78, 0x03, 0x38, 0x08, 0xac, 0x3c, 0x86, 0xf8,
+  0xcc, 0x12, 0xc4, 0x81, 0x11, 0xea, 0x01, 0x9f, 0x59, 0x82, 0x38, 0x18,
+  0x68, 0x79, 0xb4, 0x35, 0xc0, 0xd8, 0x80, 0x78, 0x03, 0x01, 0x0e, 0x64,
+  0xa3, 0x0d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x6e, 0x7b, 0xea, 0x6c, 0x63,
+  0x98, 0x6b, 0x87, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e,
+  0x00, 0x04, 0xc1, 0xc0, 0x9b, 0x93, 0x31, 0x11, 0x11, 0x38, 0x19, 0x4d,
+  0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62,
+  0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xf4, 0x44,
+  0x4d, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x6d, 0x4f,
+  0xd6, 0x24, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xf8,
+  0x84, 0x4d, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x85,
+  0x54, 0xd6, 0x84, 0x45, 0x82, 0x3b, 0xe1, 0x91, 0x3c, 0x19, 0x4d, 0x08,
+  0x80, 0x0b, 0x9e, 0x9a, 0x25, 0x90, 0x83, 0x81, 0x96, 0xc7, 0x34, 0x3a,
+  0x3d, 0xe2, 0x58, 0xe2, 0x13, 0xe2, 0x40, 0x8f, 0xc0, 0xe0, 0x02, 0x63,
+  0x46, 0x0c, 0x1c, 0x00, 0x04, 0xc1, 0xa0, 0x41, 0x95, 0x33, 0x99, 0x11,
+  0x16, 0xe1, 0x93, 0x40, 0x4c, 0xc4, 0x44, 0x4c, 0xc2, 0xc4, 0x4f, 0x66,
+  0x09, 0x46, 0x68, 0xb8, 0x61, 0x34, 0xf8, 0x04, 0x0c, 0x66, 0x19, 0xe8,
+  0x20, 0x26, 0x82, 0x11, 0x03, 0x03, 0x00, 0x41, 0x30, 0x80, 0x50, 0x45,
+  0x4d, 0x42, 0x62, 0xc4, 0xc0, 0x00, 0x40, 0x10, 0x0c, 0xa0, 0x54, 0x59,
+  0x93, 0x90, 0x30, 0x21, 0x4c, 0xe0, 0x63, 0x82, 0x98, 0xc0, 0x67, 0x34,
+  0x61, 0x46, 0x86, 0xe1, 0x86, 0x40, 0x54, 0xc0, 0x60, 0x96, 0xa1, 0x0e,
+  0xee, 0x20, 0x18, 0x8e, 0x30, 0xcc, 0x64, 0xf8, 0xee, 0x18, 0x66, 0xb8,
+  0x21, 0x88, 0x11, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x44, 0x4d, 0x86,
+  0xaf, 0x02, 0x41, 0x6f, 0x19, 0x66, 0xb8, 0x21, 0xa0, 0x11, 0x32, 0xa8,
+  0x60, 0xd0, 0x59, 0x06, 0x3b, 0x58, 0x85, 0xe0, 0xf0, 0x63, 0x98, 0x7b,
+  0x89, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xc0, 0x9b, 0x95, 0x51,
+  0x11, 0x13, 0x58, 0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d,
+  0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90, 0x11, 0x03, 0x04, 0x00,
+  0x41, 0x30, 0xd8, 0x74, 0x45, 0x55, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00,
+  0x10, 0x04, 0x83, 0x6d, 0x57, 0x56, 0x85, 0x21, 0x82, 0x11, 0x03, 0x04,
+  0x00, 0x41, 0x30, 0xd8, 0x78, 0x85, 0x55, 0x24, 0x22, 0x18, 0x31, 0x50,
+  0x00, 0x10, 0x04, 0x83, 0x85, 0x5c, 0x56, 0x85, 0x4d, 0x82, 0x5b, 0xe1,
+  0x93, 0x5c, 0x19, 0x4d, 0x08, 0x80, 0x0b, 0x9e, 0x9a, 0x25, 0x58, 0x85,
+  0xe1, 0x86, 0xac, 0x57, 0xc0, 0x60, 0x96, 0x01, 0x0f, 0xf2, 0x20, 0xa8,
+  0x33, 0x71, 0x15, 0xb8, 0xe0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30,
+  0xa0, 0xcc, 0xe5, 0x55, 0xc0, 0x00, 0x54, 0x46, 0x0c, 0x0e, 0x00, 0x04,
+  0xc1, 0x80, 0x3a, 0x97, 0x57, 0x09, 0x84, 0x0b, 0x86, 0x29, 0x35, 0x99,
+  0x15, 0xb8, 0xe0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xd6,
+  0x85, 0x56, 0xc8, 0xa0, 0x54, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80,
+  0x62, 0x17, 0x5a, 0x09, 0x84, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0x3b, 0x9e,
+  0xba, 0x19, 0x19, 0xe6, 0xd4, 0x62, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98,
+  0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf0, 0xe0, 0x05, 0x5c, 0xfe, 0xa4,
+  0x5d, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61,
+  0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
+  0xb6, 0x7b, 0x39, 0x97, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
+  0x60, 0xc3, 0x17, 0x74, 0x49, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10,
+  0x0c, 0xb6, 0x7c, 0x49, 0x97, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04,
+  0xc1, 0x60, 0x09, 0x19, 0x74, 0x49, 0x95, 0x80, 0x5e, 0x72, 0xc5, 0x5e,
+  0x46, 0x13, 0x02, 0xe0, 0x82, 0xa7, 0x66, 0x09, 0x56, 0x61, 0xb8, 0xc1,
+  0x0e, 0xf2, 0x05, 0x0c, 0x66, 0x19, 0xf4, 0x60, 0x15, 0x02, 0xdb, 0x93,
+  0x3e, 0x89, 0xcf, 0x70, 0x04, 0x1f, 0xf8, 0x09, 0xf1, 0xcd, 0x32, 0xec,
+  0x81, 0x1f, 0x04, 0xf6, 0x27, 0x7d, 0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c,
+  0x30, 0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45,
+  0x8c, 0x8c, 0x0e, 0x37, 0x04, 0x21, 0x03, 0x06, 0xb3, 0x0c, 0x7c, 0xd0,
+  0x07, 0x81, 0x0d, 0xa7, 0x02, 0x9f, 0x59, 0x02, 0x51, 0x30, 0x53, 0x21,
+  0xe2, 0x33, 0x4b, 0x20, 0x0a, 0xc3, 0x11, 0xa7, 0x70, 0x2a, 0xc2, 0x37,
+  0xcb, 0xf0, 0x07, 0xa2, 0x10, 0x18, 0x2a, 0xa0, 0x4a, 0x7c, 0x2c, 0x70,
+  0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82,
+  0xf8, 0x14, 0xe1, 0x32, 0x3a, 0xdc, 0x10, 0xb0, 0x0c, 0x18, 0xcc, 0x32,
+  0x80, 0x42, 0x28, 0x04, 0x06, 0x2b, 0x43, 0x7c, 0x66, 0x09, 0x44, 0xc1,
+  0x88, 0x59, 0x81, 0xcf, 0x2c, 0x81, 0x28, 0x0c, 0xb4, 0x3c, 0x1a, 0x1f,
+  0x60, 0x7d, 0x40, 0x80, 0x82, 0x10, 0x0a, 0x64, 0xe1, 0x07, 0x17, 0x0c,
+  0x63, 0xb2, 0x62, 0x2b, 0xf1, 0x19, 0x8e, 0xa0, 0x85, 0x5b, 0x21, 0xbe,
+  0x59, 0x86, 0x51, 0x30, 0x85, 0xc0, 0x70, 0xa5, 0x16, 0xe2, 0x63, 0xc1,
+  0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11,
+  0xc4, 0xa7, 0x08, 0x9e, 0xd1, 0xe1, 0x86, 0x40, 0x67, 0xc0, 0x60, 0x96,
+  0x81, 0x14, 0x4a, 0x21, 0xb0, 0x01, 0x5c, 0xe0, 0x33, 0x4b, 0xa0, 0x0a,
+  0xd6, 0x2b, 0x44, 0x7c, 0x66, 0x09, 0x54, 0x61, 0x38, 0xe2, 0x17, 0x7c,
+  0x45, 0xf8, 0x66, 0x19, 0x4e, 0x41, 0x15, 0x02, 0x03, 0x87, 0x5f, 0x89,
+  0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4,
+  0x63, 0x45, 0x10, 0x9f, 0x22, 0xce, 0x46, 0x87, 0x1b, 0x82, 0xb2, 0x01,
+  0x83, 0x59, 0x06, 0x54, 0x48, 0x85, 0xc0, 0xce, 0x65, 0x88, 0xcf, 0x2c,
+  0x81, 0x2a, 0x18, 0xc1, 0x2e, 0xf0, 0x99, 0x25, 0x50, 0x85, 0x81, 0x96,
+  0x47, 0x23, 0x05, 0xac, 0x14, 0x08, 0x54, 0x10, 0x52, 0x81, 0x36, 0x4c,
+  0xe1, 0x82, 0x61, 0x2e, 0x78, 0xea, 0xb6, 0xa7, 0x0e, 0x57, 0x86, 0xb9,
+  0xf7, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40,
+  0x10, 0x0c, 0xbc, 0xba, 0x29, 0x1b, 0x92, 0x91, 0x9b, 0xd1, 0x84, 0x00,
+  0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22,
+  0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x8d, 0x6f, 0xd8, 0x26,
+  0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xfa, 0xa6, 0x6d,
+  0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xcd, 0x6f, 0xdc,
+  0x26, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0x4c, 0xa7,
+  0x6d, 0x5c, 0x26, 0xc8, 0x1b, 0x9f, 0xd9, 0x9b, 0xd1, 0x84, 0x00, 0xb8,
+  0xe0, 0xa9, 0x59, 0x82, 0x55, 0x18, 0x68, 0x79, 0x4c, 0xc3, 0x0e, 0xfc,
+  0xac, 0x0e, 0x58, 0x02, 0x0f, 0x04, 0x55, 0xf0, 0xb3, 0x3c, 0x98, 0x65,
+  0x60, 0x05, 0x57, 0xd8, 0x87, 0xe1, 0x08, 0x7f, 0x00, 0x9b, 0xe1, 0xbb,
+  0x7f, 0x18, 0x66, 0xb8, 0x21, 0x58, 0x19, 0x32, 0xa8, 0x21, 0xd0, 0xe1,
+  0x88, 0x91, 0x20, 0x9b, 0xe1, 0xab, 0x40, 0xd0, 0x2b, 0x89, 0x61, 0x86,
+  0x1b, 0x02, 0x97, 0x21, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0x68, 0x05, 0x71,
+  0x08, 0x4e, 0x5e, 0x86, 0xb9, 0x14, 0x19, 0x66, 0xc4, 0xe0, 0x00, 0x40,
+  0x10, 0x0c, 0xbc, 0xd6, 0xe9, 0x1b, 0x9e, 0x51, 0x9d, 0xd1, 0x84, 0x00,
+  0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22,
+  0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x8d, 0x76, 0x48, 0xe7,
+  0x20, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0x6a, 0xa7, 0x74,
+  0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xcd, 0x76, 0x4c,
+  0x47, 0x22, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0x7c, 0xa7,
+  0x74, 0xcc, 0x26, 0x88, 0x1d, 0xbb, 0x99, 0x9d, 0xd1, 0x84, 0x00, 0xb8,
+  0xe0, 0xa9, 0x59, 0x02, 0x71, 0x18, 0x6e, 0x98, 0x89, 0xdb, 0x01, 0x83,
+  0x59, 0x86, 0x57, 0x80, 0x85, 0xa0, 0xc2, 0x06, 0x75, 0xe0, 0x82, 0xa7,
+  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x02, 0x9f, 0xd4, 0xc1, 0x09,
+  0xbd, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x2a, 0x7c, 0x52, 0x27,
+  0x10, 0x2e, 0x18, 0xa6, 0xc8, 0xa6, 0x75, 0xe0, 0x82, 0xa7, 0x46, 0x0c,
+  0x0e, 0x00, 0x04, 0xc1, 0x80, 0x2a, 0x1f, 0xd7, 0xf1, 0x89, 0xbf, 0x19,
+  0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xca, 0x7c, 0x5c, 0x27, 0x10, 0x2e,
+  0x18, 0xe6, 0x82, 0xa7, 0xee, 0x78, 0xea, 0x5a, 0x66, 0x98, 0x23, 0x93,
+  0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
+  0xc0, 0x53, 0x1f, 0xdd, 0xc9, 0x9b, 0xf3, 0x19, 0x4d, 0x08, 0x80, 0xd1,
+  0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91,
+  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xe2, 0x27, 0x7c, 0x12, 0x22,
+  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x4d, 0x7e, 0xc4, 0x27, 0x21,
+  0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xe6, 0x67, 0x7c, 0x12,
+  0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x65, 0x7f, 0xc4, 0x67,
+  0x74, 0x02, 0xf7, 0x99, 0x1d, 0xf8, 0x19, 0x4d, 0x08, 0x80, 0x0b, 0x9e,
+  0x9a, 0x25, 0x10, 0x87, 0xe1, 0x06, 0xb8, 0x98, 0x1f, 0x30, 0x98, 0x65,
+  0x88, 0x05, 0x71, 0x08, 0xac, 0x6e, 0xee, 0x26, 0x3e, 0xc3, 0x11, 0x74,
+  0x81, 0x37, 0xc4, 0x37, 0xcb, 0x20, 0x0b, 0xb5, 0x10, 0x58, 0xde, 0xd4,
+  0x45, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81,
+  0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xd1, 0x3f, 0x3a, 0xdc, 0x10, 0xec,
+  0x0f, 0x18, 0xcc, 0x32, 0xcc, 0x02, 0x2d, 0x04, 0x36, 0x84, 0x0e, 0x7c,
+  0x66, 0x09, 0x72, 0xc1, 0x40, 0x87, 0x88, 0xcf, 0x2c, 0x41, 0x2e, 0x0c,
+  0x47, 0xfc, 0x45, 0xe8, 0x08, 0xdf, 0x2c, 0x83, 0x2d, 0xe4, 0x42, 0x60,
+  0xa0, 0x21, 0x3a, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0,
+  0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0x0a, 0xe9, 0x70,
+  0x43, 0x60, 0x42, 0x60, 0x30, 0xcb, 0x70, 0x0b, 0xb8, 0x10, 0x98, 0xea,
+  0x0c, 0xf1, 0x99, 0x25, 0xc8, 0x05, 0x23, 0x5a, 0x07, 0x3e, 0xb3, 0x04,
+  0xb9, 0x30, 0xd0, 0xf2, 0x68, 0xb3, 0x80, 0xd1, 0x02, 0x71, 0x0b, 0x02,
+  0x2e, 0xd0, 0x4c, 0x2d, 0x5c, 0x30, 0x8c, 0xb1, 0x0e, 0xec, 0xc4, 0x67,
+  0x38, 0xc2, 0x35, 0x62, 0x87, 0xf8, 0x66, 0x19, 0x74, 0xa1, 0x17, 0x02,
+  0x93, 0x9d, 0xd7, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82,
+  0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x6c, 0x48, 0x87,
+  0x1b, 0x02, 0x1a, 0x02, 0x83, 0x59, 0x86, 0x5d, 0xe0, 0x85, 0xc0, 0x06,
+  0xdd, 0x81, 0xcf, 0x2c, 0x41, 0x38, 0xd8, 0xed, 0x10, 0xf1, 0x99, 0x25,
+  0x08, 0x87, 0xe1, 0x88, 0xdc, 0xc0, 0x1d, 0xe1, 0x9b, 0x65, 0xf0, 0x85,
+  0x70, 0x08, 0x4c, 0x37, 0x72, 0x27, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60,
+  0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x08,
+  0x23, 0x1d, 0x6e, 0x08, 0x7e, 0x08, 0x0c, 0x66, 0x19, 0x7e, 0x01, 0x1c,
+  0x02, 0x0b, 0x9f, 0x21, 0x3e, 0xb3, 0x04, 0xe1, 0x60, 0x84, 0xf9, 0xc0,
+  0x67, 0x96, 0x20, 0x1c, 0x06, 0x5a, 0x1e, 0x6d, 0x17, 0x30, 0x5e, 0x20,
+  0x7e, 0x41, 0x00, 0x07, 0xd4, 0xe9, 0x85, 0x0b, 0x86, 0xb9, 0xe0, 0xa9,
+  0xdb, 0x9e, 0x3a, 0xd9, 0x19, 0xe6, 0xd2, 0x65, 0x98, 0x23, 0x86, 0x39,
+  0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf0, 0xde, 0xe8, 0x87,
+  0xfc, 0x87, 0x8d, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13,
+  0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40,
+  0x10, 0x0c, 0x36, 0x3b, 0x32, 0xa3, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00,
+  0x04, 0xc1, 0x60, 0xbb, 0xa3, 0x33, 0x4a, 0x88, 0x60, 0xc4, 0x00, 0x01,
+  0x40, 0x10, 0x0c, 0x36, 0x3c, 0x42, 0xa3, 0x84, 0x08, 0x46, 0x0c, 0x14,
+  0x00, 0x04, 0xc1, 0x60, 0x01, 0xa5, 0x33, 0x42, 0xa1, 0x60, 0x8e, 0x70,
+  0xa8, 0x8e, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xa7, 0x66, 0x09, 0xc4, 0x61,
+  0xa0, 0xe5, 0x31, 0x8d, 0x56, 0x90, 0xc3, 0x80, 0x15, 0x58, 0xe2, 0x15,
+  0x84, 0x70, 0x90, 0xc3, 0x00, 0x16, 0x66, 0x19, 0xc6, 0xa1, 0x1c, 0xea,
+  0x63, 0x38, 0x42, 0x3f, 0x74, 0x68, 0xf8, 0x6e, 0x3f, 0x86, 0x19, 0x6e,
+  0x08, 0x4a, 0x88, 0x0c, 0x6a, 0x08, 0x74, 0x38, 0x62, 0x3f, 0x7c, 0x68,
+  0xf8, 0x2a, 0x10, 0xf4, 0xfa, 0x63, 0x98, 0xe1, 0x86, 0x00, 0x85, 0xc8,
+  0xa0, 0x82, 0x41, 0x67, 0x19, 0xc8, 0x21, 0x1f, 0x82, 0x63, 0x9f, 0x61,
+  0x6e, 0x64, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xef, 0x94,
+  0xee, 0xc8, 0x86, 0x48, 0x69, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60,
+  0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x43, 0x46, 0x0c, 0x10,
+  0x00, 0x04, 0xc1, 0x60, 0x73, 0x25, 0x3f, 0x3a, 0x88, 0x60, 0xc4, 0x00,
+  0x01, 0x40, 0x10, 0x0c, 0xb6, 0x57, 0xfa, 0x23, 0x86, 0x08, 0x46, 0x0c,
+  0x10, 0x00, 0x04, 0xc1, 0x60, 0x83, 0x25, 0x50, 0x92, 0x88, 0x60, 0xc4,
+  0x40, 0x01, 0x40, 0x10, 0x0c, 0x16, 0x5c, 0xfa, 0x23, 0x30, 0x0a, 0x56,
+  0x09, 0x8e, 0x5a, 0x69, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6a, 0x96, 0x20,
+  0x1f, 0x86, 0x1b, 0x5a, 0x24, 0x96, 0xc0, 0x60, 0x96, 0xc1, 0x1c, 0xce,
+  0x21, 0xa8, 0x1d, 0x12, 0x25, 0xb8, 0xe0, 0xa9, 0x11, 0x83, 0x03, 0x00,
+  0x41, 0x30, 0xa0, 0x74, 0x69, 0x94, 0x68, 0x84, 0x8e, 0x46, 0x0c, 0x0e,
+  0x00, 0x04, 0xc1, 0x80, 0xda, 0xa5, 0x51, 0x0a, 0x84, 0x0b, 0x86, 0x29,
+  0x1f, 0x3a, 0x25, 0xb8, 0xe0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30,
+  0xa0, 0x7e, 0x09, 0x95, 0x6c, 0x24, 0x8f, 0x46, 0x0c, 0x0e, 0x00, 0x04,
+  0xc1, 0x80, 0x02, 0x27, 0x54, 0x0a, 0x84, 0x0b, 0x86, 0xb9, 0xe0, 0xa9,
+  0x3b, 0x9e, 0xba, 0x13, 0x1a, 0xe6, 0x7c, 0x66, 0x98, 0x23, 0x86, 0x39,
+  0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf0, 0xc8, 0x89, 0x96,
+  0xe6, 0x28, 0x9c, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13,
+  0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40,
+  0x10, 0x0c, 0xb6, 0x75, 0xda, 0xa5, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00,
+  0x04, 0xc1, 0x60, 0x63, 0x27, 0x5e, 0x4a, 0x88, 0x60, 0xc4, 0x00, 0x01,
+  0x40, 0x10, 0x0c, 0xb6, 0x76, 0xea, 0xa5, 0x84, 0x08, 0x46, 0x0c, 0x14,
+  0x00, 0x04, 0xc1, 0x60, 0xa9, 0x27, 0x5e, 0xea, 0xa3, 0x00, 0x9d, 0x5a,
+  0x49, 0x9d, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xa7, 0x66, 0x09, 0xf2, 0x61,
+  0xb8, 0x41, 0x4d, 0xda, 0x09, 0x0c, 0x66, 0x19, 0xd0, 0x21, 0x1f, 0x02,
+  0x7b, 0xa3, 0x38, 0x8a, 0xcf, 0x70, 0x04, 0x9c, 0xc8, 0x11, 0xf1, 0xcd,
+  0x32, 0xa4, 0x03, 0x3b, 0x04, 0x36, 0x47, 0x71, 0x12, 0x1f, 0x0b, 0x06,
+  0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20,
+  0x3e, 0x45, 0xdc, 0x93, 0x0e, 0x37, 0x04, 0xf5, 0x04, 0x06, 0xb3, 0x0c,
+  0xea, 0xb0, 0x0e, 0x81, 0x0d, 0x7b, 0x04, 0x9f, 0x59, 0x02, 0x78, 0x30,
+  0x3d, 0x22, 0xe2, 0x33, 0x4b, 0x00, 0x0f, 0xc3, 0x11, 0x7b, 0xb2, 0x47,
+  0xc2, 0x37, 0xcb, 0xd0, 0x0e, 0xf0, 0x10, 0x18, 0x9f, 0xf0, 0x51, 0x7c,
+  0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f,
+  0x2b, 0x82, 0xf8, 0x14, 0x21, 0x52, 0x3a, 0xdc, 0x10, 0x80, 0x14, 0x18,
+  0xcc, 0x32, 0xb8, 0xc3, 0x3b, 0x04, 0x46, 0x4a, 0x43, 0x7c, 0x66, 0x09,
+  0xe0, 0xc1, 0x88, 0x53, 0x82, 0xcf, 0x2c, 0x01, 0x3c, 0x0c, 0xb4, 0x3c,
+  0x9a, 0x3a, 0x60, 0xeb, 0x40, 0xb8, 0x83, 0xf0, 0x0e, 0x2c, 0xc5, 0x0e,
+  0x17, 0x0c, 0x63, 0xa6, 0xa4, 0x4a, 0xf1, 0x19, 0x8e, 0x30, 0x95, 0x55,
+  0x22, 0xbe, 0x59, 0x86, 0x78, 0xa0, 0x87, 0xc0, 0x58, 0xe9, 0x54, 0xe2,
+  0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x0c, 0xf9,
+  0x58, 0x11, 0xc4, 0xa7, 0x08, 0x98, 0xd2, 0xe1, 0x86, 0xc0, 0xa5, 0xc0,
+  0x60, 0x96, 0x41, 0x1e, 0xe6, 0x21, 0xb0, 0x81, 0x96, 0xe0, 0x33, 0x4b,
+  0x80, 0x0f, 0x16, 0x4b, 0x44, 0x7c, 0x66, 0x09, 0xf0, 0x61, 0x38, 0x22,
+  0x56, 0x64, 0x49, 0xf8, 0x66, 0x19, 0xea, 0x01, 0x1f, 0x02, 0x93, 0x95,
+  0x59, 0x8a, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c,
+  0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x76, 0x4a, 0x87, 0x1b, 0x82,
+  0x9c, 0x02, 0x83, 0x59, 0x06, 0x7b, 0xb8, 0x87, 0xc0, 0x76, 0x69, 0x88,
+  0xcf, 0x2c, 0x01, 0x3e, 0x18, 0x01, 0x4e, 0xf0, 0x99, 0x25, 0xc0, 0x87,
+  0x81, 0x96, 0x47, 0x93, 0x07, 0x6c, 0x1e, 0x08, 0x7b, 0x10, 0xee, 0x01,
+  0xaf, 0xe8, 0xe1, 0x82, 0x61, 0x2e, 0x78, 0xea, 0xb6, 0xa7, 0x8e, 0x95,
+  0x86, 0xb9, 0xf1, 0x19, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0,
+  0x00, 0x40, 0x10, 0x0c, 0xbc, 0xb4, 0xca, 0x29, 0x7c, 0x32, 0xab, 0xd1,
+  0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20,
+  0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x0d, 0xae,
+  0xc0, 0x2a, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xe2,
+  0x2a, 0xac, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x4d,
+  0xae, 0xc4, 0x2a, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58,
+  0xf4, 0x2a, 0xac, 0x44, 0x2a, 0x68, 0x2b, 0x99, 0x7a, 0xab, 0xd1, 0x84,
+  0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x82, 0x7c, 0x18, 0x68, 0x79, 0x4c, 0x83,
+  0x1c, 0xcc, 0x34, 0x18, 0x07, 0x96, 0x30, 0x07, 0x01, 0x1f, 0xcc, 0x34,
+  0x38, 0x87, 0x59, 0x06, 0x7d, 0xe0, 0x87, 0x77, 0x19, 0x8e, 0x90, 0x17,
+  0x9a, 0x1a, 0xbe, 0x9b, 0x97, 0x61, 0x86, 0x1b, 0x82, 0x7f, 0x22, 0x83,
+  0x1a, 0x02, 0x1d, 0x8e, 0xa8, 0x17, 0x9c, 0x1a, 0xbe, 0x0a, 0x04, 0xbd,
+  0x7b, 0x19, 0x66, 0xb8, 0x21, 0x10, 0x29, 0x32, 0xa8, 0x60, 0xd0, 0x59,
+  0x86, 0x7d, 0x80, 0x89, 0xe0, 0xcc, 0x69, 0x98, 0xeb, 0x9f, 0x61, 0x46,
+  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xc0, 0x0b, 0xad, 0xb8, 0x82, 0x29, 0xbf,
+  0x1a, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1,
+  0x04, 0x62, 0x28, 0xe2, 0x90, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8,
+  0x50, 0x0b, 0xaf, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
+  0x2d, 0xb5, 0xf2, 0x8a, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
+  0xd8, 0x54, 0x4b, 0xaf, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04,
+  0x83, 0x45, 0xb6, 0xf2, 0x4a, 0xa7, 0x82, 0xd2, 0x52, 0xab, 0xd3, 0x1a,
+  0x4d, 0x08, 0x80, 0x0b, 0x9e, 0x9a, 0x25, 0x80, 0x89, 0xe1, 0x86, 0x93,
+  0x59, 0x2d, 0x30, 0x98, 0x65, 0xe8, 0x07, 0x7f, 0x08, 0xaa, 0xa6, 0xf8,
+  0x0a, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0xda,
+  0xea, 0x2b, 0x96, 0x71, 0xab, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0,
+  0x6a, 0xab, 0xaf, 0x02, 0xe1, 0x82, 0x61, 0x0a, 0xa7, 0x42, 0x0b, 0x2e,
+  0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0xdc, 0x12, 0x2d,
+  0x98, 0x99, 0xab, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0x74, 0x4b,
+  0xb4, 0x02, 0xe1, 0x82, 0x61, 0x2e, 0x78, 0xea, 0x8e, 0xa7, 0x2e, 0xa4,
+  0x86, 0x39, 0x1c, 0x1a, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0,
+  0x00, 0x40, 0x10, 0x0c, 0x3c, 0xdf, 0x72, 0xad, 0xb6, 0xda, 0xad, 0xd1,
+  0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20,
+  0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xad, 0xbc,
+  0x6a, 0x2b, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xcc,
+  0xcb, 0xb6, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xed,
+  0xbc, 0x6e, 0x2b, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58,
+  0xde, 0xcb, 0xb6, 0xee, 0x2a, 0x10, 0xaf, 0xd3, 0x22, 0xaf, 0xd1, 0x84,
+  0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x02, 0x98, 0x18, 0x6e, 0x20, 0x9b, 0xf3,
+  0x02, 0x83, 0x59, 0x86, 0x7f, 0x80, 0x89, 0xc0, 0xd2, 0x6a, 0xad, 0xe2,
+  0x33, 0x1c, 0x81, 0x36, 0x6c, 0x45, 0x7c, 0xb3, 0x0c, 0x20, 0x31, 0x12,
+  0x81, 0xb5, 0x55, 0xda, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73,
+  0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xf1, 0xa5,
+  0xc3, 0x0d, 0xc1, 0x7b, 0x81, 0xc1, 0x2c, 0x43, 0x48, 0x88, 0x44, 0x60,
+  0x43, 0x5d, 0xc1, 0x67, 0x96, 0xe0, 0x24, 0x8c, 0xae, 0x88, 0xf8, 0xcc,
+  0x12, 0x9c, 0xc4, 0x70, 0xc4, 0xdc, 0xd4, 0x95, 0xf0, 0xcd, 0x32, 0x90,
+  0xc4, 0x49, 0x04, 0x46, 0x37, 0x76, 0x15, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c,
+  0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45,
+  0xf0, 0x97, 0x0e, 0x37, 0x04, 0xfa, 0x05, 0x06, 0xb3, 0x0c, 0x25, 0x61,
+  0x12, 0x81, 0xf9, 0xd5, 0x10, 0x9f, 0x59, 0x82, 0x93, 0x30, 0x22, 0xb4,
+  0xe0, 0x33, 0x4b, 0x70, 0x12, 0x03, 0x2d, 0x8f, 0x16, 0x12, 0x98, 0x48,
+  0x10, 0x25, 0x21, 0x98, 0x04, 0xbf, 0x8d, 0xc4, 0x05, 0xc3, 0x18, 0x68,
+  0x91, 0x56, 0x7c, 0x86, 0x23, 0x40, 0xa7, 0xb4, 0x88, 0x6f, 0x96, 0x01,
+  0x25, 0x56, 0x22, 0x30, 0xd3, 0x0a, 0x9d, 0xf8, 0x58, 0x30, 0xd0, 0xe7,
+  0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29,
+  0x42, 0xc5, 0x74, 0xb8, 0x21, 0x40, 0x31, 0x30, 0x98, 0x65, 0x48, 0x09,
+  0x95, 0x08, 0x6c, 0x70, 0x2d, 0xf8, 0xcc, 0x12, 0xbc, 0x84, 0xad, 0x16,
+  0x11, 0x9f, 0x59, 0x82, 0x97, 0x18, 0x8e, 0x58, 0x1d, 0xd6, 0x12, 0xbe,
+  0x59, 0x06, 0x96, 0x78, 0x89, 0xc0, 0x58, 0xa7, 0xb5, 0xe2, 0x63, 0x81,
+  0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11,
+  0xc4, 0xa7, 0x88, 0x1a, 0xd3, 0xe1, 0x86, 0x60, 0xc6, 0xc0, 0x60, 0x96,
+  0xa1, 0x25, 0x5c, 0x22, 0xb0, 0xda, 0x1a, 0xe2, 0x33, 0x4b, 0xf0, 0x12,
+  0x46, 0xe8, 0x16, 0x7c, 0x66, 0x09, 0x5e, 0x62, 0xa0, 0xe5, 0xd1, 0x52,
+  0x02, 0x53, 0x09, 0xa2, 0x25, 0x04, 0x97, 0x60, 0xbb, 0x95, 0xb8, 0x60,
+  0x98, 0x0b, 0x9e, 0xba, 0xed, 0xa9, 0x33, 0xad, 0x61, 0xae, 0x97, 0x86,
+  0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03,
+  0x6f, 0xcc, 0x66, 0x4c, 0xbe, 0xc0, 0x6c, 0x34, 0x21, 0x00, 0x46, 0x13,
+  0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46,
+  0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x53, 0x33, 0x1d, 0x4b, 0x88, 0x60,
+  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0x35, 0xdb, 0xb1, 0x84, 0x08,
+  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x63, 0x33, 0x1e, 0x4b, 0x88,
+  0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x16, 0x3a, 0xdb, 0x31, 0xfe,
+  0x0a, 0xce, 0x8c, 0xc5, 0xd2, 0x6c, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6a,
+  0x96, 0x00, 0x26, 0x06, 0x5a, 0x1e, 0xd3, 0xd8, 0x07, 0x1d, 0x0e, 0xf4,
+  0x81, 0x25, 0xfa, 0x41, 0x78, 0x09, 0x1d, 0x0e, 0xfc, 0x61, 0xc4, 0xc0,
+  0x00, 0x40, 0x10, 0x0c, 0x20, 0x3b, 0xbb, 0xb1, 0x77, 0x32, 0xfb, 0x80,
+  0x97, 0xf8, 0x98, 0x10, 0xc8, 0xc7, 0x02, 0x79, 0x81, 0x8f, 0x15, 0xff,
+  0x10, 0x1f, 0x2b, 0x02, 0xf9, 0x58, 0x10, 0x12, 0xf0, 0x19, 0x31, 0x30,
+  0x00, 0x10, 0x04, 0x03, 0xa8, 0xcf, 0x7c, 0xac, 0x9e, 0x4c, 0x28, 0xe2,
+  0x63, 0x81, 0x20, 0x1f, 0x0b, 0x0e, 0xf8, 0x5c, 0x60, 0xcc, 0x88, 0x81,
+  0x03, 0x80, 0x20, 0x18, 0x34, 0xa3, 0x26, 0x66, 0x2e, 0x76, 0x62, 0x77,
+  0x16, 0xf4, 0x58, 0x8f, 0xf5, 0x18, 0x8f, 0xe5, 0xd9, 0x2c, 0xc1, 0x08,
+  0x0d, 0x37, 0xf8, 0x55, 0x9e, 0x81, 0xc1, 0x2c, 0x83, 0x4c, 0x8c, 0x50,
+  0x30, 0x62, 0x60, 0x00, 0x20, 0x08, 0x06, 0xd0, 0xa8, 0x95, 0x19, 0x3f,
+  0x59, 0x70, 0x63, 0xf0, 0x19, 0x31, 0x30, 0x00, 0x10, 0x04, 0x03, 0xa8,
+  0xd4, 0xce, 0xac, 0x9f, 0x2c, 0xc8, 0x31, 0xf8, 0x8c, 0x26, 0xb8, 0xd8,
+  0x30, 0xdc, 0x10, 0xf4, 0x19, 0x18, 0xcc, 0x32, 0xcc, 0x44, 0x4d, 0x04,
+  0xc3, 0x11, 0x45, 0x98, 0x0d, 0xdf, 0x19, 0xc3, 0x0c, 0x37, 0x04, 0x2c,
+  0x46, 0x06, 0x35, 0x04, 0x3a, 0x1c, 0x71, 0x94, 0xd9, 0xf0, 0x55, 0x20,
+  0xe8, 0x25, 0xc3, 0x0c, 0x37, 0x04, 0x2f, 0x46, 0x06, 0x15, 0x0c, 0x3a,
+  0xcb, 0x40, 0x13, 0x69, 0x11, 0xdc, 0x7c, 0x0d, 0x73, 0x2a, 0x35, 0xcc,
+  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x78, 0xae, 0xe6, 0x67, 0x3d, 0xb6,
+  0x6a, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30,
+  0x9a, 0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
+  0x5b, 0xad, 0x95, 0xda, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
+  0xb0, 0xd9, 0x9a, 0xa9, 0x31, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
+  0x06, 0xdb, 0xad, 0x9d, 0x9a, 0x44, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82,
+  0x60, 0xb0, 0xfc, 0x9a, 0xa9, 0x9d, 0x59, 0x20, 0x6b, 0x77, 0x46, 0x6b,
+  0xa3, 0x09, 0x01, 0x70, 0xc1, 0x53, 0xb3, 0x04, 0x69, 0x31, 0xdc, 0x90,
+  0xe1, 0x1a, 0x18, 0xcc, 0x32, 0xd8, 0xc4, 0x4d, 0x04, 0x25, 0x66, 0xa9,
+  0x06, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x54, 0xb8,
+  0xa9, 0xda, 0xb7, 0x67, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0x89,
+  0x9b, 0xaa, 0x05, 0xc2, 0x05, 0xc3, 0x54, 0x99, 0xb9, 0x1a, 0x5c, 0xf0,
+  0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xe6, 0xf6, 0x6a, 0x62,
+  0x00, 0x6a, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0x9d, 0xdb, 0xab,
+  0x05, 0xc2, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x1d, 0x4f, 0x9d, 0x8b, 0x0d,
+  0x73, 0x65, 0x35, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01,
+  0x80, 0x20, 0x18, 0x78, 0xeb, 0xb6, 0x6b, 0x7a, 0x86, 0x6e, 0xa3, 0x09,
+  0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c,
+  0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x9b, 0xbc, 0x89,
+  0x5b, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xcd, 0xdb,
+  0xb8, 0x25, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x1b, 0xbd,
+  0x91, 0x5b, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xf0,
+  0xdb, 0xb8, 0x91, 0x5a, 0xf0, 0x6e, 0xb4, 0x16, 0x6f, 0xa3, 0x09, 0x01,
+  0x70, 0xc1, 0x53, 0xb3, 0x04, 0x69, 0x31, 0xdc, 0x60, 0x07, 0xf4, 0x06,
+  0x06, 0xb3, 0x0c, 0x38, 0x91, 0x16, 0x81, 0xd9, 0x19, 0x9e, 0xc5, 0x67,
+  0x38, 0x62, 0x0f, 0xf2, 0x8c, 0xf8, 0x66, 0x19, 0x72, 0x82, 0x27, 0x02,
+  0xd3, 0x33, 0x3e, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82,
+  0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xfc, 0x4d, 0x87,
+  0x1b, 0x02, 0x7e, 0x03, 0x83, 0x59, 0x06, 0x9d, 0xd8, 0x89, 0xc0, 0x06,
+  0x51, 0x83, 0xcf, 0x2c, 0x01, 0x58, 0x58, 0xa8, 0x11, 0xf1, 0x99, 0x25,
+  0x00, 0x8b, 0xe1, 0x08, 0x53, 0x10, 0x35, 0xe1, 0x9b, 0x65, 0xe8, 0x09,
+  0xb0, 0x08, 0xec, 0x14, 0x46, 0x2d, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60,
+  0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x48,
+  0x39, 0x1d, 0x6e, 0x08, 0x4e, 0x0e, 0x0c, 0x66, 0x19, 0x7c, 0xe2, 0x27,
+  0x02, 0x5b, 0xb5, 0x21, 0x3e, 0xb3, 0x04, 0x60, 0x61, 0x84, 0xab, 0xc1,
+  0x67, 0x96, 0x00, 0x2c, 0x06, 0x5a, 0x1e, 0x4d, 0x27, 0xb0, 0x9d, 0x20,
+  0x7c, 0x42, 0xf8, 0x09, 0xb1, 0xe0, 0x89, 0x0b, 0x86, 0xb1, 0x56, 0x8b,
+  0xb5, 0xf8, 0x0c, 0x47, 0xc8, 0x82, 0xac, 0x11, 0xdf, 0x2c, 0x43, 0x58,
+  0x90, 0x45, 0x60, 0xb3, 0x36, 0x0b, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05,
+  0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4,
+  0xcd, 0xe9, 0x70, 0x43, 0x50, 0x73, 0x60, 0x30, 0xcb, 0x20, 0x16, 0x63,
+  0x11, 0xd8, 0xb0, 0x6b, 0xf0, 0x99, 0x25, 0x40, 0x0b, 0xc3, 0x35, 0x22,
+  0x3e, 0xb3, 0x04, 0x68, 0x31, 0x1c, 0xd1, 0x0b, 0xb9, 0x26, 0x7c, 0xb3,
+  0x0c, 0x65, 0x81, 0x16, 0x81, 0xf9, 0x82, 0xae, 0xc5, 0xc7, 0x02, 0x87,
+  0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88,
+  0x4f, 0x11, 0x62, 0xa7, 0xc3, 0x0d, 0x01, 0xd8, 0x81, 0xc1, 0x2c, 0x83,
+  0x59, 0x9c, 0x45, 0x60, 0xe2, 0x36, 0xc4, 0x67, 0x96, 0x00, 0x2d, 0x8c,
+  0x38, 0x37, 0xf8, 0xcc, 0x12, 0xa0, 0xc5, 0x40, 0xcb, 0xa3, 0x89, 0x05,
+  0x36, 0x16, 0x84, 0x59, 0x08, 0x67, 0x01, 0x1b, 0x64, 0x71, 0xc1, 0x30,
+  0x17, 0x3c, 0x75, 0xdb, 0x53, 0x37, 0x6b, 0xc3, 0x9c, 0x7a, 0x0d, 0x73,
+  0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x1e,
+  0xdc, 0x81, 0xdd, 0xbf, 0xb5, 0xdd, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08,
+  0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18,
+  0x20, 0x00, 0x08, 0x82, 0xc1, 0x76, 0x77, 0x67, 0x97, 0x10, 0xc1, 0x88,
+  0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x78, 0x87, 0x76, 0x09, 0x11, 0x8c,
+  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x96, 0x77, 0x69, 0x97, 0x10, 0xc1,
+  0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0xa1, 0x87, 0x76, 0x29, 0x17,
+  0xd0, 0x5d, 0xce, 0xd9, 0xdd, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd4, 0x2c,
+  0x41, 0x5a, 0x0c, 0xb4, 0x3c, 0xa6, 0x41, 0x13, 0x74, 0x1e, 0xcc, 0x04,
+  0x4b, 0xd8, 0x84, 0x80, 0x16, 0x74, 0x1e, 0xdc, 0xc4, 0x2c, 0x83, 0x5a,
+  0xb0, 0xc5, 0x3e, 0x0c, 0x47, 0x80, 0xc4, 0xce, 0x0d, 0xdf, 0x85, 0xc4,
+  0x30, 0xc3, 0x0d, 0x81, 0xc9, 0x91, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0x84,
+  0xc4, 0xcf, 0x0d, 0x5f, 0x05, 0x82, 0xde, 0x48, 0x0c, 0x33, 0xdc, 0x10,
+  0xa4, 0x1c, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0xc3, 0x5a, 0x80, 0x46, 0x70,
+  0xed, 0x36, 0xcc, 0x91, 0xd8, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
+  0xe0, 0xa1, 0x1e, 0xde, 0xdd, 0x5c, 0xe9, 0x8d, 0x26, 0x04, 0xc0, 0x68,
+  0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8,
+  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xaf, 0xf7, 0x77, 0x07, 0x11,
+  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x06, 0x7b, 0xa0, 0xc7, 0x10,
+  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xb1, 0x17, 0x7a, 0x12,
+  0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x92, 0x7b, 0xa0, 0x17,
+  0x76, 0x01, 0xeb, 0xc5, 0x9d, 0xeb, 0x8d, 0x26, 0x04, 0xc0, 0x05, 0x4f,
+  0xcd, 0x12, 0x80, 0xc6, 0x70, 0xc3, 0x4c, 0xc8, 0x1e, 0x18, 0xcc, 0x32,
+  0xb4, 0x85, 0x5b, 0x04, 0xc5, 0x73, 0xa3, 0x07, 0x17, 0x3c, 0x35, 0x62,
+  0x70, 0x00, 0x20, 0x08, 0x06, 0xd4, 0xee, 0x91, 0x9e, 0x4e, 0xd4, 0xdd,
+  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xbc, 0x47, 0x7a, 0x81, 0x70,
+  0xc1, 0x30, 0xf5, 0x73, 0xa8, 0x07, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00,
+  0x20, 0x08, 0x06, 0x14, 0xf8, 0xa5, 0x1e, 0x4f, 0xe8, 0xdd, 0x88, 0xc1,
+  0x01, 0x80, 0x20, 0x18, 0x50, 0xe1, 0x97, 0x7a, 0x81, 0x70, 0xc1, 0x30,
+  0x17, 0x3c, 0x75, 0xc7, 0x53, 0x87, 0x72, 0xc3, 0xdc, 0x8f, 0x0d, 0x73,
+  0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x5e,
+  0xf9, 0xd5, 0x1e, 0xdd, 0x89, 0xdf, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08,
+  0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18,
+  0x20, 0x00, 0x08, 0x82, 0xc1, 0xc6, 0x7e, 0xbc, 0x97, 0x10, 0xc1, 0x88,
+  0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xed, 0xd7, 0x7b, 0x09, 0x11, 0x8c,
+  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xe6, 0x7e, 0xbe, 0x97, 0x10, 0xc1,
+  0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0xf6, 0xd7, 0x7b, 0x7e, 0x17,
+  0xa4, 0x9f, 0xeb, 0xad, 0xdf, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd4, 0x2c,
+  0x01, 0x68, 0x0c, 0x37, 0xc0, 0x85, 0xfb, 0x81, 0xc1, 0x2c, 0xc3, 0x5b,
+  0x80, 0x46, 0x60, 0x70, 0x27, 0x77, 0xf1, 0x19, 0x8e, 0xb0, 0x8b, 0xb9,
+  0x23, 0xbe, 0x59, 0x06, 0xb8, 0x98, 0x8b, 0xc0, 0xe8, 0xee, 0x2e, 0xe2,
+  0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x0c, 0xf9,
+  0x58, 0x11, 0xc4, 0xa7, 0x08, 0xfc, 0xd3, 0xe1, 0x86, 0xc0, 0xfe, 0xc0,
+  0x60, 0x96, 0x21, 0x2e, 0xe4, 0x22, 0xb0, 0x81, 0xef, 0xe0, 0x33, 0x4b,
+  0x70, 0x17, 0xb6, 0x77, 0x44, 0x7c, 0x66, 0x09, 0xee, 0x62, 0x38, 0x22,
+  0x34, 0xf8, 0x4e, 0xf8, 0x66, 0x19, 0xe8, 0xe2, 0x2e, 0x02, 0x13, 0x8d,
+  0xbe, 0x8b, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c,
+  0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x46, 0x30, 0xd0, 0xe1, 0x86,
+  0x20, 0x04, 0x03, 0x30, 0x98, 0x65, 0xa8, 0x0b, 0xbb, 0x08, 0xac, 0xf4,
+  0x86, 0xf8, 0xcc, 0x12, 0xdc, 0x85, 0x11, 0xa8, 0x07, 0x9f, 0x59, 0x82,
+  0xbb, 0x18, 0x68, 0x79, 0xb4, 0xb8, 0xc0, 0xe4, 0x82, 0xa8, 0x0b, 0xc1,
+  0x2e, 0x70, 0x66, 0x2e, 0x2e, 0x18, 0xc6, 0x4e, 0x6f, 0xf5, 0xe2, 0x33,
+  0x1c, 0xc1, 0x1a, 0xac, 0x47, 0x7c, 0xb3, 0x0c, 0x78, 0xb1, 0x17, 0x81,
+  0xb5, 0x5e, 0x6b, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1,
+  0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x31, 0x18, 0xe8,
+  0x70, 0x43, 0xf0, 0x82, 0x01, 0x18, 0xcc, 0x32, 0xe4, 0x85, 0x5e, 0x04,
+  0x36, 0xd4, 0x1e, 0x7c, 0x66, 0x09, 0xfe, 0xc2, 0x64, 0x8f, 0x88, 0xcf,
+  0x2c, 0xc1, 0x5f, 0x0c, 0x47, 0xdc, 0xc6, 0xec, 0x09, 0xdf, 0x2c, 0x03,
+  0x5f, 0xfc, 0x45, 0x60, 0xb8, 0x41, 0x7b, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf,
+  0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53,
+  0x04, 0x0f, 0x06, 0x3a, 0xdc, 0x10, 0xe8, 0x60, 0x00, 0x06, 0xb3, 0x0c,
+  0x7d, 0xe1, 0x17, 0x81, 0xf1, 0xde, 0x10, 0x9f, 0x59, 0x82, 0xbf, 0x30,
+  0x22, 0xfc, 0xe0, 0x33, 0x4b, 0xf0, 0x17, 0x03, 0x2d, 0x8f, 0x96, 0x17,
+  0x98, 0x5e, 0x10, 0x7d, 0x21, 0xf8, 0x05, 0xe9, 0xec, 0xc5, 0x05, 0xc3,
+  0x5c, 0xf0, 0xd4, 0x6d, 0x4f, 0x5d, 0xeb, 0x0d, 0x73, 0xe4, 0x36, 0xcc,
+  0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x78,
+  0x6a, 0x18, 0xe8, 0x60, 0x90, 0x7f, 0x67, 0x18, 0x8c, 0x26, 0x04, 0xc0,
+  0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91,
+  0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x71, 0x18, 0x84, 0x61,
+  0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x72, 0x18,
+  0x88, 0x61, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c,
+  0x73, 0x18, 0x8c, 0x61, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20,
+  0x18, 0x2c, 0x7b, 0x18, 0x88, 0x61, 0x30, 0x82, 0x41, 0xe0, 0x86, 0xc1,
+  0x0c, 0x06, 0x70, 0x18, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x4f, 0xcd, 0x12,
+  0x80, 0xc6, 0x40, 0xcb, 0x63, 0x1a, 0x6b, 0x81, 0x86, 0x82, 0x5a, 0xb0,
+  0x44, 0x5b, 0x08, 0x7f, 0x81, 0x86, 0x82, 0x5b, 0x98, 0x7e, 0xd0, 0x60,
+  0x00, 0x9f, 0x59, 0x86, 0xd0, 0x18, 0x0d, 0xfb, 0x18, 0x8e, 0x08, 0x6c,
+  0x30, 0x18, 0xbe, 0x13, 0x86, 0x19, 0x6e, 0x08, 0x42, 0x30, 0x20, 0x83,
+  0x1a, 0x02, 0x1d, 0x8e, 0xe0, 0x0f, 0x1d, 0x0c, 0x86, 0xaf, 0x02, 0x41,
+  0xcf, 0x3f, 0x86, 0x19, 0x6e, 0x08, 0x48, 0x30, 0x20, 0x83, 0x0a, 0x06,
+  0x9d, 0x65, 0x10, 0x8d, 0xdb, 0x08, 0x0e, 0xfd, 0x86, 0xb9, 0x7f, 0x1b,
+  0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc, 0x51, 0x0c, 0xe6, 0x30,
+  0x90, 0xc1, 0x00, 0x14, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82,
   0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x83, 0x8d, 0x76, 0x48, 0xe7, 0x20, 0x82, 0x11, 0x03,
-  0x04, 0x00, 0x41, 0x30, 0xd8, 0x6a, 0xa7, 0x74, 0x18, 0x22, 0x18, 0x31,
-  0x40, 0x00, 0x10, 0x04, 0x83, 0xcd, 0x76, 0x4c, 0x47, 0x22, 0x82, 0x11,
-  0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0x7c, 0xa7, 0x74, 0xcc, 0x26, 0x88,
-  0x1d, 0xbb, 0x99, 0x9d, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x02,
-  0x71, 0x18, 0x6e, 0x98, 0x89, 0xdb, 0x01, 0x83, 0x59, 0x86, 0x57, 0x80,
-  0x85, 0xa0, 0xc2, 0x06, 0x75, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00,
-  0x04, 0xc1, 0x80, 0x02, 0x9f, 0xd4, 0xc1, 0x09, 0xbd, 0x19, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x03, 0x2a, 0x7c, 0x52, 0x27, 0x10, 0x2e, 0x18, 0xa6,
-  0xc8, 0xa6, 0x75, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
-  0x80, 0x2a, 0x1f, 0xd7, 0xf1, 0x89, 0xbf, 0x19, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x03, 0xca, 0x7c, 0x5c, 0x27, 0x10, 0x2e, 0x18, 0xe6, 0x82, 0xa7,
-  0xee, 0x78, 0xea, 0x5a, 0x66, 0x98, 0x23, 0x93, 0x61, 0x8e, 0x18, 0xe6,
-  0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xc0, 0x53, 0x1f, 0xdd,
-  0xc9, 0x9b, 0xf3, 0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d,
-  0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0xd8, 0xe2, 0x27, 0x7c, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x83, 0x4d, 0x7e, 0xc4, 0x27, 0x21, 0x82, 0x11, 0x03, 0x04,
-  0x00, 0x41, 0x30, 0xd8, 0xe6, 0x67, 0x7c, 0x12, 0x22, 0x18, 0x31, 0x50,
-  0x00, 0x10, 0x04, 0x83, 0x65, 0x7f, 0xc4, 0x67, 0x74, 0x02, 0xf7, 0x99,
-  0x1d, 0xf8, 0x19, 0x4d, 0x08, 0x80, 0x0b, 0x9e, 0x9a, 0x25, 0x10, 0x87,
-  0xe1, 0x06, 0xb8, 0x98, 0x1f, 0x30, 0x98, 0x65, 0x88, 0x05, 0x71, 0x08,
-  0xac, 0x6e, 0xee, 0x26, 0x3e, 0xc3, 0x11, 0x74, 0x81, 0x37, 0xc4, 0x37,
-  0xcb, 0x20, 0x0b, 0xb5, 0x10, 0x58, 0xde, 0xd4, 0x45, 0x7c, 0x2c, 0x18,
+  0x00, 0x10, 0x04, 0x83, 0x4d, 0x15, 0x03, 0x3d, 0x0c, 0x0e, 0x22, 0x18,
+  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x6d, 0x15, 0x83, 0x3d, 0x0c, 0x18,
+  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x8d, 0x15, 0x03, 0x3e,
+  0x0c, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x85, 0x16,
+  0x83, 0x3d, 0x0c, 0x78, 0x30, 0x08, 0x4e, 0x31, 0x60, 0xc3, 0x20, 0x15,
+  0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x82, 0xdb, 0x18, 0x6e,
+  0x70, 0x91, 0x56, 0x0c, 0xc0, 0x60, 0x96, 0x81, 0x34, 0x4a, 0x23, 0xa8,
+  0x1b, 0x0c, 0xfc, 0x30, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10,
+  0x04, 0x03, 0xca, 0x16, 0x83, 0x3f, 0x0c, 0x36, 0x38, 0x0c, 0x46, 0x0c,
+  0x0e, 0x00, 0x04, 0xc1, 0x80, 0xba, 0xc5, 0xe0, 0x0f, 0x83, 0x40, 0xb8,
+  0x60, 0x98, 0xd2, 0xc1, 0x60, 0x14, 0x03, 0xb8, 0xe0, 0xa9, 0x11, 0x83,
+  0x03, 0x00, 0x41, 0x30, 0xa0, 0x76, 0x31, 0x20, 0xc5, 0xe0, 0x46, 0xea,
+  0x30, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x8a, 0x17, 0x03, 0x52,
+  0x0c, 0x02, 0xe1, 0x82, 0x61, 0x2e, 0x78, 0xea, 0x8e, 0xa7, 0x6e, 0x04,
+  0x83, 0x61, 0x4e, 0xe7, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31,
+  0x38, 0x00, 0x10, 0x04, 0x03, 0x0f, 0x1c, 0x03, 0x58, 0x0c, 0xde, 0x30,
+  0xe8, 0xc5, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61,
+  0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04,
+  0xc1, 0x60, 0x3b, 0xc7, 0xe0, 0x16, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10,
+  0x00, 0x04, 0xc1, 0x60, 0x43, 0xc7, 0x00, 0x17, 0x83, 0x84, 0x08, 0x46,
+  0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x4b, 0xc7, 0x20, 0x17, 0x83, 0x84,
+  0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x89, 0xc7, 0x00, 0x17,
+  0x83, 0x3c, 0x0c, 0x02, 0x72, 0x0c, 0x52, 0x31, 0x30, 0xc7, 0x60, 0x34,
+  0x21, 0x00, 0x2e, 0x78, 0x6a, 0x96, 0xe0, 0x36, 0x86, 0x1b, 0xd6, 0x24,
+  0x1d, 0x03, 0x30, 0x98, 0x65, 0x30, 0x8d, 0xdb, 0x08, 0x6c, 0x0d, 0x83,
+  0x36, 0x0c, 0xe2, 0x33, 0x1c, 0x71, 0x07, 0x6e, 0x18, 0x10, 0xdf, 0x2c,
+  0xc3, 0x69, 0xa8, 0x46, 0x60, 0x6f, 0x18, 0xe0, 0x41, 0x7c, 0x2c, 0x18,
   0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82,
-  0xf8, 0x14, 0xd1, 0x3f, 0x3a, 0xdc, 0x10, 0xec, 0x0f, 0x18, 0xcc, 0x32,
-  0xcc, 0x02, 0x2d, 0x04, 0x36, 0x84, 0x0e, 0x7c, 0x66, 0x09, 0x72, 0xc1,
-  0x40, 0x87, 0x88, 0xcf, 0x2c, 0x41, 0x2e, 0x0c, 0x47, 0xfc, 0x45, 0xe8,
-  0x08, 0xdf, 0x2c, 0x83, 0x2d, 0xe4, 0x42, 0x60, 0xa0, 0x21, 0x3a, 0xf1,
-  0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c,
-  0xac, 0x08, 0xe2, 0x53, 0x04, 0x0a, 0xe9, 0x70, 0x43, 0x60, 0x42, 0x60,
-  0x30, 0xcb, 0x70, 0x0b, 0xb8, 0x10, 0x98, 0xea, 0x0c, 0xf1, 0x99, 0x25,
-  0xc8, 0x05, 0x23, 0x5a, 0x07, 0x3e, 0xb3, 0x04, 0xb9, 0x30, 0xd0, 0xf2,
-  0x68, 0xb3, 0x80, 0xd1, 0x02, 0x71, 0x0b, 0x02, 0x2e, 0xd0, 0x4c, 0x2d,
-  0x5c, 0x30, 0x8c, 0xb1, 0x0e, 0xec, 0xc4, 0x67, 0x38, 0xc2, 0x35, 0x62,
-  0x87, 0xf8, 0x66, 0x19, 0x74, 0xa1, 0x17, 0x02, 0x93, 0x9d, 0xd7, 0x88,
-  0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4,
-  0x63, 0x45, 0x10, 0x9f, 0x22, 0x6c, 0x48, 0x87, 0x1b, 0x02, 0x1a, 0x02,
-  0x83, 0x59, 0x86, 0x5d, 0xe0, 0x85, 0xc0, 0x06, 0xdd, 0x81, 0xcf, 0x2c,
-  0x41, 0x38, 0xd8, 0xed, 0x10, 0xf1, 0x99, 0x25, 0x08, 0x87, 0xe1, 0x88,
-  0xdc, 0xc0, 0x1d, 0xe1, 0x9b, 0x65, 0xf0, 0x85, 0x70, 0x08, 0x4c, 0x37,
-  0x72, 0x27, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2,
-  0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x08, 0x23, 0x1d, 0x6e, 0x08,
-  0x7e, 0x08, 0x0c, 0x66, 0x19, 0x7e, 0x01, 0x1c, 0x02, 0x0b, 0x9f, 0x21,
-  0x3e, 0xb3, 0x04, 0xe1, 0x60, 0x84, 0xf9, 0xc0, 0x67, 0x96, 0x20, 0x1c,
-  0x06, 0x5a, 0x1e, 0x6d, 0x17, 0x30, 0x5e, 0x20, 0x7e, 0x41, 0x00, 0x07,
-  0xd4, 0xe9, 0x85, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e, 0x3a, 0xd9,
-  0x19, 0xe6, 0xd2, 0x65, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0xf0, 0xde, 0xe8, 0x87, 0xfc, 0x87, 0x8d, 0x46,
-  0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81,
-  0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x36, 0x3b,
-  0x32, 0xa3, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0xbb,
-  0xa3, 0x33, 0x4a, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x36,
-  0x3c, 0x42, 0xa3, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60,
-  0x01, 0xa5, 0x33, 0x42, 0xa1, 0x60, 0x8e, 0x70, 0xa8, 0x8e, 0x46, 0x13,
-  0x02, 0xe0, 0x82, 0xa7, 0x66, 0x09, 0xc4, 0x61, 0xa0, 0xe5, 0x31, 0x8d,
-  0x56, 0x90, 0xc3, 0x80, 0x15, 0x58, 0xe2, 0x15, 0x84, 0x70, 0x90, 0xc3,
-  0x00, 0x16, 0x66, 0x19, 0xc6, 0xa1, 0x1c, 0xea, 0x63, 0x38, 0x42, 0x3f,
-  0x74, 0x68, 0xf8, 0x6e, 0x3f, 0x86, 0x19, 0x6e, 0x08, 0x4a, 0x88, 0x0c,
-  0x6a, 0x08, 0x74, 0x38, 0x62, 0x3f, 0x7c, 0x68, 0xf8, 0x2a, 0x10, 0xf4,
-  0xfa, 0x63, 0x98, 0xe1, 0x86, 0x00, 0x85, 0xc8, 0xa0, 0x82, 0x41, 0x67,
-  0x19, 0xc8, 0x21, 0x1f, 0x82, 0x63, 0x9f, 0x61, 0x6e, 0x64, 0x86, 0x19,
-  0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xef, 0x94, 0xee, 0xc8, 0x86, 0x48,
-  0x69, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46,
-  0x13, 0x88, 0xa1, 0x88, 0x43, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60,
-  0x73, 0x25, 0x3f, 0x3a, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
-  0xb6, 0x57, 0xfa, 0x23, 0x86, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
-  0x60, 0x83, 0x25, 0x50, 0x92, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10,
-  0x0c, 0x16, 0x5c, 0xfa, 0x23, 0x30, 0x0a, 0x56, 0x09, 0x8e, 0x5a, 0x69,
-  0x34, 0x21, 0x00, 0x2e, 0x78, 0x6a, 0x96, 0x20, 0x1f, 0x86, 0x1b, 0x5a,
-  0x24, 0x96, 0xc0, 0x60, 0x96, 0xc1, 0x1c, 0xce, 0x21, 0xa8, 0x1d, 0x12,
-  0x25, 0xb8, 0xe0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0x74,
-  0x69, 0x94, 0x68, 0x84, 0x8e, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80,
-  0xda, 0xa5, 0x51, 0x0a, 0x84, 0x0b, 0x86, 0x29, 0x1f, 0x3a, 0x25, 0xb8,
-  0xe0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0x7e, 0x09, 0x95,
-  0x6c, 0x24, 0x8f, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x02, 0x27,
-  0x54, 0x0a, 0x84, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0x3b, 0x9e, 0xba, 0x13,
-  0x1a, 0xe6, 0x7c, 0x66, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0xf0, 0xc8, 0x89, 0x96, 0xe6, 0x28, 0x9c, 0x46,
-  0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81,
-  0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0x75,
-  0xda, 0xa5, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x63,
-  0x27, 0x5e, 0x4a, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6,
-  0x76, 0xea, 0xa5, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60,
-  0xa9, 0x27, 0x5e, 0xea, 0xa3, 0x00, 0x9d, 0x5a, 0x49, 0x9d, 0x46, 0x13,
-  0x02, 0xe0, 0x82, 0xa7, 0x66, 0x09, 0xf2, 0x61, 0xb8, 0x41, 0x4d, 0xda,
-  0x09, 0x0c, 0x66, 0x19, 0xd0, 0x21, 0x1f, 0x02, 0x7b, 0xa3, 0x38, 0x8a,
-  0xcf, 0x70, 0x04, 0x9c, 0xc8, 0x11, 0xf1, 0xcd, 0x32, 0xa4, 0x03, 0x3b,
-  0x04, 0x36, 0x47, 0x71, 0x12, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc,
-  0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xdc, 0x93,
-  0x0e, 0x37, 0x04, 0xf5, 0x04, 0x06, 0xb3, 0x0c, 0xea, 0xb0, 0x0e, 0x81,
-  0x0d, 0x7b, 0x04, 0x9f, 0x59, 0x02, 0x78, 0x30, 0x3d, 0x22, 0xe2, 0x33,
-  0x4b, 0x00, 0x0f, 0xc3, 0x11, 0x7b, 0xb2, 0x47, 0xc2, 0x37, 0xcb, 0xd0,
-  0x0e, 0xf0, 0x10, 0x18, 0x9f, 0xf0, 0x51, 0x7c, 0x2c, 0x70, 0xe8, 0x73,
+  0xf8, 0x14, 0x31, 0x8f, 0x81, 0x0e, 0x37, 0x04, 0xf1, 0x18, 0x80, 0xc1,
+  0x2c, 0x03, 0x6a, 0xa4, 0x46, 0x60, 0xc3, 0x1d, 0x06, 0xf0, 0x99, 0x25,
+  0x70, 0x0d, 0xb3, 0xc3, 0x80, 0x88, 0xcf, 0x2c, 0x81, 0x6b, 0x0c, 0x47,
+  0x88, 0xc2, 0x1d, 0x06, 0xc2, 0x37, 0xcb, 0xb0, 0x1a, 0xae, 0x11, 0xd8,
+  0x28, 0xe0, 0x61, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05,
+  0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xf8, 0x63, 0xa0,
+  0xc3, 0x0d, 0x01, 0x3f, 0x06, 0x60, 0x30, 0xcb, 0xc0, 0x1a, 0xad, 0x11,
+  0x18, 0x28, 0x06, 0x43, 0x7c, 0x66, 0x09, 0x5c, 0xc3, 0x88, 0x51, 0x0c,
+  0xe0, 0x33, 0x4b, 0xe0, 0x1a, 0x03, 0x2d, 0x8f, 0x86, 0x1a, 0x58, 0x6a,
+  0x10, 0xac, 0x21, 0xb4, 0x86, 0x4e, 0xa8, 0xc6, 0x05, 0xc3, 0x98, 0x28,
+  0x06, 0xa6, 0x18, 0xc4, 0x67, 0x38, 0xe2, 0x54, 0x4e, 0x31, 0x20, 0xbe,
+  0x59, 0x86, 0xd7, 0x90, 0x8d, 0xc0, 0x50, 0x31, 0x40, 0x95, 0xf8, 0x58,
+  0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56,
+  0x04, 0xf1, 0x29, 0x82, 0x25, 0x03, 0x1d, 0x6e, 0x08, 0x54, 0x32, 0x00,
+  0x83, 0x59, 0x06, 0xd8, 0x88, 0x8d, 0xc0, 0x06, 0x58, 0x0c, 0xe0, 0x33,
+  0x4b, 0x60, 0x1b, 0xd6, 0x8a, 0x01, 0x11, 0x9f, 0x59, 0x02, 0xdb, 0x18,
+  0x8e, 0x90, 0x15, 0x57, 0x0c, 0x84, 0x6f, 0x96, 0x61, 0x36, 0x6c, 0x23,
+  0xb0, 0x59, 0x79, 0xc5, 0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98,
+  0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xb8, 0xc9,
+  0x40, 0x87, 0x1b, 0x82, 0x9a, 0x0c, 0xc0, 0x60, 0x96, 0x81, 0x36, 0x6a,
+  0x23, 0xb0, 0x5b, 0x0c, 0x86, 0xf8, 0xcc, 0x12, 0xd8, 0x86, 0x11, 0xbc,
+  0x18, 0xc0, 0x67, 0x96, 0xc0, 0x36, 0x06, 0x5a, 0x1e, 0x0d, 0x36, 0xb0,
+  0xd8, 0x20, 0x68, 0x43, 0xa8, 0x0d, 0xbd, 0x92, 0x8d, 0x0b, 0x86, 0xb9,
+  0xe0, 0xa9, 0xdb, 0x9e, 0x3a, 0x54, 0x0c, 0x86, 0xb9, 0xdf, 0x1b, 0xe6,
+  0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc,
+  0xb2, 0x0c, 0x6a, 0x32, 0xa0, 0xc7, 0x40, 0x2c, 0x83, 0xd1, 0x84, 0x00,
+  0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22,
+  0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x8d, 0x2d, 0x03, 0x9e,
+  0x0c, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xad, 0x2d,
+  0x83, 0x9e, 0x0c, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
+  0xcd, 0x2d, 0x03, 0x9f, 0x0c, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10,
+  0x04, 0x83, 0xc5, 0x2e, 0x83, 0x9e, 0x0c, 0xfc, 0x31, 0x08, 0xd2, 0x32,
+  0x70, 0xc9, 0x60, 0x2d, 0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9, 0x59,
+  0x82, 0xdb, 0x18, 0x68, 0x79, 0x4c, 0x43, 0x34, 0x7c, 0x54, 0x08, 0x0d,
+  0x96, 0x20, 0x0d, 0xc1, 0x36, 0x7c, 0x54, 0x28, 0x0d, 0xab, 0x97, 0x92,
+  0x0c, 0xe0, 0x33, 0xcb, 0x80, 0x1b, 0xba, 0x11, 0x2f, 0xc3, 0x11, 0x41,
+  0x4c, 0x06, 0xc3, 0x77, 0xc2, 0x30, 0xc3, 0x0d, 0x01, 0x3f, 0x06, 0x64,
+  0x50, 0x43, 0xa0, 0xc3, 0x11, 0xf7, 0x52, 0x93, 0xc1, 0xf0, 0x55, 0x20,
+  0xe8, 0xe5, 0xcb, 0x30, 0xc3, 0x0d, 0xc1, 0x3f, 0x06, 0x64, 0x50, 0xc1,
+  0xa0, 0xb3, 0x0c, 0xb9, 0xe1, 0x1e, 0xc1, 0x8d, 0x63, 0x30, 0xcc, 0xe9,
+  0xdf, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0, 0xf9, 0x65, 0xe0,
+  0x96, 0x41, 0x4b, 0x06, 0x7b, 0x19, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82,
+  0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88,
+  0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xa5, 0x19, 0xd4, 0x65, 0x70, 0x10,
+  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xa6, 0x19, 0xd8, 0x65,
+  0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xa7, 0x19,
+  0xdc, 0x65, 0x20, 0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c,
+  0xaf, 0x19, 0xd8, 0x65, 0x70, 0x93, 0x41, 0x20, 0x9a, 0xc1, 0x59, 0x06,
+  0xa4, 0x19, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x4f, 0xcd, 0x12, 0xb8, 0xc7,
+  0x70, 0x43, 0xca, 0xa0, 0x66, 0x00, 0x06, 0xb3, 0x0c, 0xbb, 0xc1, 0x1b,
+  0x41, 0xc9, 0x64, 0x90, 0x97, 0x01, 0x5c, 0xf0, 0xd4, 0x88, 0xc1, 0x01,
+  0x80, 0x20, 0x18, 0x50, 0xb1, 0x19, 0xe8, 0x65, 0xb0, 0xad, 0x65, 0x30,
+  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x94, 0x6c, 0x06, 0x7a, 0x19, 0x04,
+  0xc2, 0x05, 0xc3, 0x54, 0x4d, 0x06, 0x7e, 0x19, 0xc0, 0x05, 0x4f, 0x8d,
+  0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x65, 0x9b, 0xc1, 0x5f, 0x06, 0x32,
+  0x03, 0x97, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xb7, 0x19,
+  0xfc, 0x65, 0x10, 0x08, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x77, 0x3c, 0x75,
+  0xfe, 0x18, 0x0c, 0x73, 0x35, 0x18, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c,
+  0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xde, 0x6e, 0x06, 0xab, 0x19,
+  0xa8, 0x65, 0x80, 0x9b, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1,
+  0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20,
+  0x00, 0x08, 0x82, 0xc1, 0x26, 0x9e, 0x81, 0x6c, 0x06, 0x09, 0x11, 0x8c,
+  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x36, 0x9e, 0xc1, 0x6c, 0x06, 0x09,
+  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x46, 0x9e, 0x01, 0x6d,
+  0x06, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0xc2, 0x9e,
+  0xc1, 0x6c, 0x06, 0x74, 0x19, 0x04, 0xbf, 0x19, 0x90, 0x66, 0x10, 0x9e,
+  0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd4, 0x2c, 0x81, 0x7b, 0x0c, 0x37,
+  0x98, 0x0d, 0x79, 0x06, 0x60, 0x30, 0xcb, 0xd0, 0x1b, 0xee, 0x11, 0x98,
+  0x59, 0x06, 0x68, 0x19, 0xc4, 0x67, 0x38, 0xe2, 0x0e, 0xd2, 0x32, 0x20,
+  0xbe, 0x59, 0x06, 0xdf, 0x08, 0x8f, 0xc0, 0xd4, 0x32, 0xc0, 0x83, 0xf8,
+  0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e,
+  0x56, 0x04, 0xf1, 0x29, 0xc2, 0x3d, 0x03, 0x1d, 0x6e, 0x08, 0xd8, 0x33,
+  0x00, 0x83, 0x59, 0x86, 0xdf, 0x00, 0x8f, 0xc0, 0x06, 0xb9, 0x0c, 0xe0,
+  0x33, 0x4b, 0x50, 0x1e, 0x16, 0x97, 0x01, 0x11, 0x9f, 0x59, 0x82, 0xf2,
+  0x18, 0x8e, 0x10, 0x05, 0xb9, 0x0c, 0x84, 0x6f, 0x96, 0x41, 0x3c, 0xca,
+  0x23, 0xb0, 0x51, 0x98, 0xcb, 0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60,
+  0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xc8,
+  0xcf, 0x40, 0x87, 0x1b, 0x82, 0xfb, 0x0c, 0xc0, 0x60, 0x96, 0x61, 0x3c,
+  0xc8, 0x23, 0xb0, 0xbd, 0x0c, 0x86, 0xf8, 0xcc, 0x12, 0x94, 0x87, 0x11,
+  0x7e, 0x19, 0xc0, 0x67, 0x96, 0xa0, 0x3c, 0x06, 0x5a, 0x1e, 0xed, 0x37,
+  0x30, 0xf0, 0x20, 0xc6, 0x43, 0x20, 0x0f, 0x9d, 0x08, 0x8f, 0x0b, 0x86,
+  0xb1, 0xbe, 0x0c, 0x42, 0x33, 0x88, 0xcf, 0x70, 0x84, 0xe8, 0x88, 0x66,
+  0x40, 0x7c, 0xb3, 0x0c, 0xe6, 0x91, 0x1e, 0x81, 0x8d, 0x66, 0x30, 0x3a,
+  0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86,
+  0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4, 0x89, 0x06, 0x3a, 0xdc, 0x10, 0x94,
+  0x68, 0x00, 0x06, 0xb3, 0x0c, 0xe7, 0x81, 0x1e, 0x81, 0x0d, 0xab, 0x19,
+  0xc0, 0x67, 0x96, 0xa0, 0x3d, 0x0c, 0x35, 0x03, 0x22, 0x3e, 0xb3, 0x04,
+  0xed, 0x31, 0x1c, 0xd1, 0x3a, 0xa9, 0x19, 0x08, 0xdf, 0x2c, 0x83, 0x7a,
+  0xb4, 0x47, 0x60, 0xae, 0xa3, 0x9a, 0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73,
   0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14,
-  0x21, 0x52, 0x3a, 0xdc, 0x10, 0x80, 0x14, 0x18, 0xcc, 0x32, 0xb8, 0xc3,
-  0x3b, 0x04, 0x46, 0x4a, 0x43, 0x7c, 0x66, 0x09, 0xe0, 0xc1, 0x88, 0x53,
-  0x82, 0xcf, 0x2c, 0x01, 0x3c, 0x0c, 0xb4, 0x3c, 0x9a, 0x3a, 0x60, 0xeb,
-  0x40, 0xb8, 0x83, 0xf0, 0x0e, 0x2c, 0xc5, 0x0e, 0x17, 0x0c, 0x63, 0xa6,
-  0xa4, 0x4a, 0xf1, 0x19, 0x8e, 0x30, 0x95, 0x55, 0x22, 0xbe, 0x59, 0x86,
-  0x78, 0xa0, 0x87, 0xc0, 0x58, 0xe9, 0x54, 0xe2, 0x63, 0xc1, 0x40, 0x9f,
-  0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7,
-  0x08, 0x98, 0xd2, 0xe1, 0x86, 0xc0, 0xa5, 0xc0, 0x60, 0x96, 0x41, 0x1e,
-  0xe6, 0x21, 0xb0, 0x81, 0x96, 0xe0, 0x33, 0x4b, 0x80, 0x0f, 0x16, 0x4b,
-  0x44, 0x7c, 0x66, 0x09, 0xf0, 0x61, 0x38, 0x22, 0x56, 0x64, 0x49, 0xf8,
-  0x66, 0x19, 0xea, 0x01, 0x1f, 0x02, 0x93, 0x95, 0x59, 0x8a, 0x8f, 0x05,
+  0x21, 0xa3, 0x81, 0x0e, 0x37, 0x04, 0x30, 0x1a, 0x80, 0xc1, 0x2c, 0xc3,
+  0x7a, 0xb0, 0x47, 0x60, 0xb2, 0x19, 0x0c, 0xf1, 0x99, 0x25, 0x68, 0x0f,
+  0x23, 0x6e, 0x33, 0x80, 0xcf, 0x2c, 0x41, 0x7b, 0x0c, 0xb4, 0x3c, 0xda,
+  0x79, 0x60, 0xe8, 0x41, 0xac, 0x87, 0xc0, 0x1e, 0x70, 0x97, 0x1e, 0x17,
+  0x0c, 0x73, 0xc1, 0x53, 0xb7, 0x3d, 0x75, 0xa3, 0x19, 0x0c, 0x73, 0xba,
+  0x18, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20,
+  0x08, 0x06, 0x1e, 0x98, 0x06, 0x30, 0x1a, 0xbc, 0x67, 0xd0, 0xa3, 0xc1,
+  0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26,
+  0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x76,
+  0xa6, 0xc1, 0x8d, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
+  0xc1, 0x86, 0xa6, 0x01, 0x8e, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00,
+  0x08, 0x82, 0xc1, 0x96, 0xa6, 0x41, 0x8e, 0x06, 0x09, 0x11, 0x8c, 0x18,
+  0x28, 0x00, 0x08, 0x82, 0xc1, 0x12, 0xa7, 0x01, 0x8e, 0x06, 0xf9, 0x19,
+  0x04, 0x64, 0x1a, 0xa4, 0x68, 0x60, 0xa6, 0xc1, 0x68, 0x42, 0x00, 0x5c,
+  0xf0, 0xd4, 0x2c, 0x81, 0x7b, 0x0c, 0xb4, 0x3c, 0xa6, 0x91, 0x1b, 0x34,
+  0x2c, 0xe0, 0x06, 0x4b, 0xec, 0x86, 0xd0, 0x1e, 0x34, 0x2c, 0xf0, 0xc6,
+  0x2c, 0xc3, 0x7b, 0xc4, 0xc7, 0xfa, 0x0c, 0x47, 0xbc, 0xcf, 0x8a, 0x06,
+  0xc3, 0x77, 0xf0, 0x33, 0xcc, 0x70, 0x43, 0x60, 0x9f, 0x01, 0x19, 0xd4,
+  0x10, 0xe8, 0x70, 0x84, 0xfc, 0xbc, 0x68, 0x30, 0x7c, 0x15, 0x08, 0x7a,
+  0xf4, 0x33, 0xcc, 0x70, 0x43, 0x90, 0x9f, 0x01, 0x19, 0x54, 0x30, 0xe8,
+  0x2c, 0x03, 0x7c, 0x94, 0x48, 0x70, 0xbd, 0x19, 0x0c, 0x73, 0xf4, 0x18,
+  0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x1e, 0x9e, 0x06, 0x68,
+  0x1a, 0x9c, 0x68, 0x50, 0xa7, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08,
+  0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c, 0x18,
+  0x20, 0x00, 0x08, 0x82, 0xc1, 0xf6, 0xa7, 0xc1, 0x9b, 0x06, 0x07, 0x11,
+  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x06, 0xaa, 0x01, 0x9c, 0x06,
+  0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x16, 0xaa, 0x41,
+  0x9c, 0x06, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x92,
+  0xaa, 0x01, 0x9c, 0x06, 0x31, 0x1a, 0x04, 0x7c, 0x1a, 0x84, 0x69, 0xe0,
+  0xa7, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd4, 0x2c, 0x41, 0x89, 0x0c,
+  0x37, 0x8c, 0x90, 0xa8, 0x06, 0x60, 0x30, 0xcb, 0x20, 0x1f, 0xf3, 0x11,
+  0x14, 0x8b, 0x06, 0x73, 0x1a, 0xc0, 0x05, 0x4f, 0x8d, 0x18, 0x1c, 0x00,
+  0x08, 0x82, 0x01, 0xb5, 0xaa, 0x01, 0x9d, 0x06, 0x29, 0x54, 0xa6, 0xc1,
+  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xac, 0x1a, 0xd0, 0x69, 0x10,
+  0x08, 0x17, 0x0c, 0x53, 0x2f, 0x1a, 0xe0, 0x69, 0x00, 0x17, 0x3c, 0x35,
+  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x14, 0xac, 0x06, 0x79, 0x1a, 0xb4,
+  0x90, 0x9a, 0x06, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xc5, 0x6a,
+  0x90, 0xa7, 0x41, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf1, 0xd4,
+  0xe1, 0x67, 0x30, 0xcc, 0xbd, 0x64, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31,
+  0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x78, 0xb5, 0x1a, 0x94, 0x6a,
+  0x40, 0xa6, 0x81, 0xac, 0x06, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04,
+  0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80,
+  0x00, 0x20, 0x08, 0x06, 0x1b, 0xaf, 0x06, 0xac, 0x1a, 0x24, 0x44, 0x30,
+  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5b, 0xaf, 0x06, 0xad, 0x1a, 0x24,
+  0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x9b, 0xaf, 0x06, 0xae,
+  0x1a, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x8b, 0xb9,
+  0x06, 0xad, 0x1a, 0xb8, 0x69, 0x10, 0xe4, 0x6a, 0xe0, 0xa7, 0xc1, 0xae,
+  0x06, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x53, 0xb3, 0x04, 0x25, 0x32, 0xdc,
+  0x00, 0x46, 0xbe, 0x1a, 0x80, 0xc1, 0x2c, 0x03, 0x7d, 0x94, 0x48, 0x60,
+  0x60, 0x1a, 0x88, 0x69, 0x10, 0x9f, 0xe1, 0x88, 0x32, 0x1a, 0xd3, 0x80,
+  0xf8, 0x66, 0x19, 0xea, 0x03, 0x3f, 0x02, 0x23, 0xd3, 0xc0, 0x8c, 0xe2,
+  0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x0c, 0xf9,
+  0x58, 0x11, 0xc4, 0xa7, 0x08, 0x74, 0x0d, 0x74, 0xb8, 0x21, 0x30, 0xd7,
+  0x00, 0x0c, 0x66, 0x19, 0xec, 0xe3, 0x3e, 0x02, 0x1b, 0xd8, 0x34, 0x80,
+  0xcf, 0x2c, 0x01, 0x7f, 0xd8, 0x9a, 0x06, 0x44, 0x7c, 0x66, 0x09, 0xf8,
+  0x63, 0x38, 0x02, 0x8e, 0xd8, 0x34, 0x10, 0xbe, 0x59, 0x86, 0xfc, 0xe0,
+  0x8f, 0xc0, 0xe2, 0xa8, 0x4d, 0x83, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82,
+  0x61, 0x2e, 0x78, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x62,
+  0x5e, 0x03, 0x1d, 0x6e, 0x08, 0xe2, 0x35, 0x00, 0x83, 0x59, 0x06, 0xfd,
+  0xd8, 0x8f, 0xc0, 0xea, 0x34, 0x18, 0xe2, 0x33, 0x4b, 0xc0, 0x1f, 0x46,
+  0xe0, 0x69, 0x00, 0x9f, 0x59, 0x02, 0xfe, 0x18, 0x68, 0x79, 0x34, 0xfb,
+  0xc0, 0xee, 0x83, 0xd0, 0x0f, 0x61, 0x3f, 0xec, 0x31, 0xc0, 0x8f, 0x0b,
+  0x86, 0xb1, 0x3b, 0x0d, 0xf6, 0x34, 0x88, 0xcf, 0x70, 0x44, 0x1f, 0xf1,
+  0x69, 0x40, 0x7c, 0xb3, 0x0c, 0xfd, 0x01, 0x22, 0x81, 0xf5, 0x69, 0xe0,
+  0x47, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05,
+  0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0xc8, 0x06, 0x3a, 0xdc, 0x10,
+  0xfc, 0x6b, 0x00, 0x06, 0xb3, 0x0c, 0xfe, 0xf1, 0x1f, 0x81, 0x0d, 0xa5,
+  0x1a, 0xc0, 0x67, 0x96, 0x80, 0x44, 0x4c, 0x54, 0x03, 0x22, 0x3e, 0xb3,
+  0x04, 0x24, 0x32, 0x1c, 0x81, 0x4a, 0xa3, 0x1a, 0x08, 0xdf, 0x2c, 0x43,
+  0x88, 0x90, 0x48, 0x60, 0xa9, 0x44, 0xaa, 0x41, 0x7c, 0x2c, 0x70, 0xe8,
+  0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8,
+  0x14, 0xc1, 0xb2, 0x81, 0x0e, 0x37, 0x04, 0x2a, 0x1b, 0x80, 0xc1, 0x2c,
+  0x83, 0x88, 0x8c, 0x48, 0x60, 0xac, 0x1a, 0x0c, 0xf1, 0x99, 0x25, 0x20,
+  0x11, 0x23, 0x62, 0x35, 0x80, 0xcf, 0x2c, 0x01, 0x89, 0x0c, 0xb4, 0x3c,
+  0x9a, 0x7f, 0x60, 0xff, 0x41, 0x88, 0x88, 0x30, 0x22, 0x66, 0x19, 0x80,
+  0xc8, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x6d, 0x4f, 0x5d, 0x9f, 0x06, 0xc3,
+  0x1c, 0x6d, 0x06, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c,
+  0x00, 0x08, 0x82, 0x81, 0xa7, 0xb3, 0x81, 0xca, 0x06, 0xe9, 0x1a, 0xdc,
+  0x6c, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08,
+  0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
+  0xb0, 0x85, 0x6d, 0x10, 0xb3, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00,
+  0x82, 0x60, 0xb0, 0x89, 0x6d, 0x20, 0xb3, 0x41, 0x42, 0x04, 0x23, 0x06,
+  0x08, 0x00, 0x82, 0x60, 0xb0, 0x8d, 0x6d, 0x30, 0xb3, 0x41, 0x42, 0x04,
+  0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xac, 0x6d, 0x20, 0xb3, 0xc1,
+  0xbc, 0x06, 0x81, 0xcf, 0x06, 0x23, 0x1b, 0x80, 0x6d, 0x30, 0x9a, 0x10,
+  0x00, 0x17, 0x3c, 0x35, 0x4b, 0x50, 0x22, 0x03, 0x2d, 0x8f, 0x69, 0xc0,
+  0x07, 0x8a, 0x0b, 0xef, 0xc1, 0x12, 0xf2, 0x21, 0x90, 0x08, 0x8a, 0x0b,
+  0xf3, 0x31, 0xcb, 0x60, 0x22, 0x28, 0x52, 0x4e, 0xc3, 0x11, 0xea, 0x54,
+  0xb2, 0xc1, 0xf0, 0xdd, 0x3a, 0x0d, 0x33, 0xdc, 0x10, 0xc0, 0x6b, 0x40,
+  0x06, 0x35, 0x04, 0x3a, 0x1c, 0xc1, 0x4e, 0x29, 0x1b, 0x0c, 0x5f, 0x05,
+  0x82, 0x9e, 0x3b, 0x0d, 0x33, 0xdc, 0x10, 0xcc, 0x6b, 0x40, 0x06, 0x15,
+  0x0c, 0x3a, 0xcb, 0x70, 0x22, 0x3c, 0x12, 0xdc, 0xad, 0x06, 0xc3, 0x9c,
+  0x7b, 0x06, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x27, 0xb7,
+  0x81, 0xd8, 0x06, 0x21, 0x1b, 0xbc, 0x6d, 0x30, 0x9a, 0x10, 0x00, 0xa3,
+  0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0xc4, 0x21,
+  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xe5, 0x6d, 0x90, 0xb6, 0xc1,
+  0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xe9, 0x6d, 0xa0,
+  0xb6, 0x01, 0x43, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xed,
+  0x6d, 0xb0, 0xb6, 0x81, 0x44, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60,
+  0xb0, 0x8c, 0x6e, 0xa0, 0xb6, 0xc1, 0xca, 0x06, 0x81, 0xdd, 0x06, 0x3b,
+  0x1b, 0xe0, 0x6d, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x35, 0x4b, 0xc0,
+  0x23, 0xc3, 0x0d, 0xfd, 0xc4, 0xb7, 0x01, 0x18, 0xcc, 0x32, 0xa4, 0x88,
+  0x8a, 0x04, 0x65, 0xb2, 0x41, 0xdb, 0x06, 0x70, 0xc1, 0x53, 0x23, 0x06,
+  0x07, 0x00, 0x82, 0x60, 0x40, 0x95, 0x6e, 0xe0, 0xb6, 0x01, 0x49, 0xfd,
+  0x6c, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x94, 0xe9, 0x06, 0x6e,
+  0x1b, 0x04, 0xc2, 0x05, 0xc3, 0x54, 0xca, 0x06, 0x72, 0x1b, 0xc0, 0x05,
+  0x4f, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0xa5, 0xba, 0xc1, 0xdc,
+  0x06, 0x27, 0x45, 0xb6, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50,
+  0xab, 0x1b, 0xcc, 0x6d, 0x10, 0x08, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x77,
+  0x3c, 0x75, 0xf2, 0x1a, 0x0c, 0x73, 0x29, 0x1a, 0x0c, 0x73, 0xc4, 0x30,
+  0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xde, 0xeb, 0x06,
+  0x7f, 0x1b, 0xf8, 0x6c, 0xc0, 0xba, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26,
+  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c,
+  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x66, 0xbb, 0x81, 0xe9, 0x06, 0x09,
+  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x76, 0xbb, 0xc1, 0xe9,
+  0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x86, 0xbb,
+  0x01, 0xea, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1,
+  0x02, 0xbe, 0xc1, 0xe9, 0x06, 0x68, 0x1b, 0x04, 0xb3, 0x1b, 0xe0, 0x6d,
+  0x50, 0xbb, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd4, 0x2c, 0x01, 0x8f,
+  0x0c, 0x37, 0xe8, 0x14, 0xee, 0x06, 0x60, 0x30, 0xcb, 0xb0, 0x22, 0x3c,
+  0x12, 0x98, 0xce, 0x06, 0x3c, 0x1b, 0xc4, 0x67, 0x38, 0x02, 0xac, 0x7a,
+  0x36, 0x20, 0xbe, 0x59, 0x06, 0x16, 0x79, 0x91, 0xc0, 0x7c, 0x36, 0x08,
+  0xab, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02,
+  0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x42, 0x7c, 0x03, 0x1d, 0x6e, 0x08,
+  0xc0, 0x37, 0x00, 0x83, 0x59, 0x86, 0x16, 0x71, 0x91, 0xc0, 0x06, 0xb3,
+  0x0d, 0xe0, 0x33, 0x4b, 0x30, 0x23, 0x56, 0xb6, 0x01, 0x11, 0x9f, 0x59,
+  0x82, 0x19, 0x19, 0x8e, 0x58, 0x2b, 0xb3, 0x0d, 0x84, 0x6f, 0x96, 0x01,
+  0x46, 0x66, 0x24, 0x30, 0xb6, 0x3a, 0xdb, 0x20, 0x3e, 0x16, 0x38, 0xf4,
+  0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c,
+  0x8a, 0x68, 0xdf, 0x40, 0x87, 0x1b, 0x82, 0xf5, 0x0d, 0xc0, 0x60, 0x96,
+  0x21, 0x46, 0x64, 0x24, 0xb0, 0xb7, 0x0d, 0x86, 0xf8, 0xcc, 0x12, 0xcc,
+  0x88, 0x11, 0x72, 0x1b, 0xc0, 0x67, 0x96, 0x60, 0x46, 0x06, 0x5a, 0x1e,
+  0xad, 0x45, 0x30, 0x17, 0x21, 0x62, 0x44, 0x90, 0x11, 0x76, 0x0d, 0x5e,
+  0xe4, 0x82, 0x61, 0x2c, 0x6e, 0x83, 0xba, 0x0d, 0xe2, 0x33, 0x1c, 0x71,
+  0x57, 0x76, 0x1b, 0x10, 0xdf, 0x2c, 0x03, 0x8d, 0xdc, 0x48, 0x60, 0x77,
+  0x1b, 0xe0, 0x55, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c,
+  0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xb1, 0xbf, 0x81, 0x0e,
+  0x37, 0x04, 0xf9, 0x1b, 0x80, 0xc1, 0x2c, 0x43, 0x8d, 0xd8, 0x48, 0x60,
+  0xc3, 0xdf, 0x06, 0xf0, 0x99, 0x25, 0xd8, 0x11, 0xe3, 0xdb, 0x80, 0x88,
+  0xcf, 0x2c, 0xc1, 0x8e, 0x0c, 0x47, 0x88, 0x56, 0xdf, 0x06, 0xc2, 0x37,
+  0xcb, 0x80, 0x23, 0x3b, 0x12, 0xd8, 0x68, 0xf9, 0x6d, 0x10, 0x1f, 0x0b,
+  0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a,
+  0x20, 0x3e, 0x45, 0x98, 0x70, 0xa0, 0xc3, 0x0d, 0x01, 0x09, 0x07, 0x60,
+  0x30, 0xcb, 0x90, 0x23, 0x3a, 0x12, 0x98, 0xe9, 0x06, 0x43, 0x7c, 0x66,
+  0x09, 0x76, 0xc4, 0x88, 0xd5, 0x0d, 0xe0, 0x33, 0x4b, 0xb0, 0x23, 0x03,
+  0x2d, 0x8f, 0x56, 0x23, 0x98, 0x8d, 0x10, 0x39, 0x22, 0xe8, 0x88, 0xce,
+  0x06, 0x37, 0x72, 0xc1, 0x30, 0x17, 0x3c, 0x75, 0xdb, 0x53, 0x77, 0xb7,
+  0xc1, 0x30, 0xe7, 0xaa, 0xc1, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23,
+  0x06, 0x07, 0x00, 0x82, 0x60, 0xe0, 0xd1, 0x70, 0x40, 0xc2, 0xc1, 0xf8,
+  0x06, 0x31, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26,
+  0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80,
+  0x20, 0x18, 0x6c, 0x3b, 0x1c, 0xac, 0x70, 0x90, 0x10, 0xc1, 0x88, 0x01,
+  0x02, 0x80, 0x20, 0x18, 0x6c, 0x3c, 0x1c, 0xb0, 0x70, 0x90, 0x10, 0xc1,
+  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x3d, 0x1c, 0xb4, 0x70, 0x90,
+  0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x65, 0x1c, 0xb0,
+  0x70, 0xd0, 0xbe, 0x41, 0x80, 0xc3, 0x41, 0xff, 0x06, 0x3a, 0x1c, 0x8c,
+  0x26, 0x04, 0xc0, 0x05, 0x4f, 0xcd, 0x12, 0xf0, 0xc8, 0x40, 0xcb, 0x63,
+  0x1a, 0x27, 0xc2, 0xff, 0x82, 0x89, 0xb0, 0x44, 0x8a, 0x08, 0x3b, 0xc2,
+  0xff, 0x82, 0x8a, 0xcc, 0x32, 0xf4, 0xc8, 0x8f, 0xfc, 0xd6, 0x70, 0x84,
+  0xfa, 0xfc, 0x6f, 0x30, 0x7c, 0xb7, 0x3e, 0xc3, 0x0c, 0x37, 0x04, 0xea,
+  0x1b, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0x98, 0xd7, 0x08, 0x07, 0xc3,
+  0x57, 0x81, 0xa0, 0x87, 0x5e, 0xc3, 0x0c, 0x37, 0x04, 0xed, 0x1b, 0x90,
+  0x41, 0x05, 0x83, 0xce, 0x32, 0xf8, 0xc8, 0x9c, 0x04, 0x17, 0xbb, 0xc1,
+  0x30, 0x87, 0xae, 0xc1, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0,
+  0xb1, 0x71, 0xc0, 0xc3, 0xc1, 0xfe, 0x06, 0x69, 0x1c, 0x8c, 0x26, 0x04,
+  0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14,
+  0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x73, 0x1c, 0x8c,
+  0x71, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x74,
+  0x1c, 0x90, 0x71, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
+  0x6c, 0x75, 0x1c, 0x94, 0x71, 0x20, 0x11, 0xc1, 0x88, 0x81, 0x02, 0x80,
+  0x20, 0x18, 0x2c, 0x7d, 0x1c, 0x90, 0x71, 0x50, 0xc2, 0x41, 0x00, 0xc7,
+  0x41, 0x0d, 0x07, 0x72, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x4f, 0xcd,
+  0x12, 0xcc, 0xc9, 0x70, 0xc3, 0x7d, 0xd9, 0x71, 0x00, 0x06, 0xb3, 0x0c,
+  0x60, 0x12, 0x26, 0x41, 0x81, 0x70, 0x70, 0xc6, 0x01, 0x5c, 0xf0, 0xd4,
+  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0x7f, 0x1c, 0xa0, 0x71, 0x40,
+  0x42, 0x39, 0x1c, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x05, 0xca,
+  0x01, 0x1a, 0x07, 0x81, 0x70, 0xc1, 0x30, 0x35, 0xc2, 0x01, 0x1b, 0x07,
+  0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0x91, 0x72,
+  0xd0, 0xc6, 0x41, 0x88, 0xf9, 0x70, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08,
+  0x06, 0x54, 0x29, 0x07, 0x6d, 0x1c, 0x04, 0xc2, 0x05, 0xc3, 0x5c, 0xf0,
+  0xd4, 0x1d, 0x4f, 0x1d, 0xfb, 0x06, 0xc3, 0xdc, 0xc8, 0x06, 0xc3, 0x1c,
+  0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x97,
+  0xca, 0x41, 0x1e, 0x07, 0x38, 0x1c, 0x98, 0x72, 0x30, 0x9a, 0x10, 0x00,
+  0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44,
+  0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xc1, 0x72, 0x00, 0xca,
+  0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xc5, 0x72,
+  0x10, 0xca, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0,
+  0xc9, 0x72, 0x20, 0xca, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82,
+  0x60, 0xb0, 0xe8, 0x72, 0x10, 0xca, 0x81, 0x18, 0x07, 0x41, 0x2b, 0x07,
+  0x72, 0x1c, 0xbc, 0x72, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x35, 0x4b,
+  0x30, 0x27, 0xc3, 0x0d, 0x34, 0x26, 0xcb, 0x01, 0x18, 0xcc, 0x32, 0x88,
+  0xc9, 0x9c, 0x04, 0x46, 0xc3, 0x81, 0x0d, 0x07, 0xf1, 0x19, 0x8e, 0x00,
+  0xa3, 0x1b, 0x0e, 0x88, 0x6f, 0x96, 0x61, 0x4c, 0xcc, 0x24, 0x30, 0x1c,
+  0x0e, 0xc2, 0x28, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e,
+  0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xe0, 0xe5, 0x40, 0x87,
+  0x1b, 0x02, 0x5d, 0x0e, 0xc0, 0x60, 0x96, 0x81, 0x4c, 0xca, 0x24, 0xb0,
+  0x01, 0x8c, 0x03, 0xf8, 0xcc, 0x12, 0xa8, 0x89, 0xfd, 0x70, 0x40, 0xc4,
+  0x67, 0x96, 0x40, 0x4d, 0x86, 0x23, 0xd6, 0x08, 0x8c, 0x03, 0xe1, 0x9b,
+  0x65, 0x38, 0x13, 0x35, 0x09, 0x8c, 0x8d, 0xc2, 0x38, 0x88, 0x8f, 0x05,
   0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45,
-  0x10, 0x9f, 0x22, 0x76, 0x4a, 0x87, 0x1b, 0x82, 0x9c, 0x02, 0x83, 0x59,
-  0x06, 0x7b, 0xb8, 0x87, 0xc0, 0x76, 0x69, 0x88, 0xcf, 0x2c, 0x01, 0x3e,
-  0x18, 0x01, 0x4e, 0xf0, 0x99, 0x25, 0xc0, 0x87, 0x81, 0x96, 0x47, 0x93,
-  0x07, 0x6c, 0x1e, 0x08, 0x7b, 0x10, 0xee, 0x01, 0xaf, 0xe8, 0xe1, 0x82,
-  0x61, 0x2e, 0x78, 0xea, 0xb6, 0xa7, 0x8e, 0x95, 0x86, 0xb9, 0xf1, 0x19,
-  0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0xbc, 0xb4, 0xca, 0x29, 0x7c, 0x32, 0xab, 0xd1, 0x84, 0x00, 0x18, 0x4d,
-  0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x0d, 0xae, 0xc0, 0x2a, 0x21, 0x82,
-  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xe2, 0x2a, 0xac, 0x12, 0x22,
-  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x4d, 0xae, 0xc4, 0x2a, 0x21,
-  0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0xf4, 0x2a, 0xac, 0x44,
-  0x2a, 0x68, 0x2b, 0x99, 0x7a, 0xab, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9,
-  0x59, 0x82, 0x7c, 0x18, 0x68, 0x79, 0x4c, 0x83, 0x1c, 0xcc, 0x34, 0x18,
-  0x07, 0x96, 0x30, 0x07, 0x01, 0x1f, 0xcc, 0x34, 0x38, 0x87, 0x59, 0x06,
-  0x7d, 0xe0, 0x87, 0x77, 0x19, 0x8e, 0x90, 0x17, 0x9a, 0x1a, 0xbe, 0x9b,
-  0x97, 0x61, 0x86, 0x1b, 0x82, 0x7f, 0x22, 0x83, 0x1a, 0x02, 0x1d, 0x8e,
-  0xa8, 0x17, 0x9c, 0x1a, 0xbe, 0x0a, 0x04, 0xbd, 0x7b, 0x19, 0x66, 0xb8,
-  0x21, 0x10, 0x29, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x86, 0x7d, 0x80, 0x89,
-  0xe0, 0xcc, 0x69, 0x98, 0xeb, 0x9f, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0xc0, 0x0b, 0xad, 0xb8, 0x82, 0x29, 0xbf, 0x1a, 0x4d, 0x08, 0x80,
-  0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2,
-  0x90, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0x50, 0x0b, 0xaf, 0x0e,
-  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x2d, 0xb5, 0xf2, 0x8a,
-  0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0x54, 0x4b, 0xaf,
-  0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x45, 0xb6, 0xf2,
-  0x4a, 0xa7, 0x82, 0xd2, 0x52, 0xab, 0xd3, 0x1a, 0x4d, 0x08, 0x80, 0x0b,
-  0x9e, 0x9a, 0x25, 0x80, 0x89, 0xe1, 0x86, 0x93, 0x59, 0x2d, 0x30, 0x98,
-  0x65, 0xe8, 0x07, 0x7f, 0x08, 0xaa, 0xa6, 0xf8, 0x0a, 0x2e, 0x78, 0x6a,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0xda, 0xea, 0x2b, 0x96, 0x71,
-  0xab, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0x6a, 0xab, 0xaf, 0x02,
-  0xe1, 0x82, 0x61, 0x0a, 0xa7, 0x42, 0x0b, 0x2e, 0x78, 0x6a, 0xc4, 0xe0,
-  0x00, 0x40, 0x10, 0x0c, 0xa8, 0xdc, 0x12, 0x2d, 0x98, 0x99, 0xab, 0x11,
-  0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0x74, 0x4b, 0xb4, 0x02, 0xe1, 0x82,
-  0x61, 0x2e, 0x78, 0xea, 0x8e, 0xa7, 0x2e, 0xa4, 0x86, 0x39, 0x1c, 0x1a,
-  0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0x3c, 0xdf, 0x72, 0xad, 0xb6, 0xda, 0xad, 0xd1, 0x84, 0x00, 0x18, 0x4d,
+  0x10, 0x9f, 0x22, 0xce, 0x39, 0xd0, 0xe1, 0x86, 0xa0, 0x9c, 0x03, 0x30,
+  0x98, 0x65, 0x40, 0x93, 0x34, 0x09, 0x2c, 0x8d, 0x83, 0x21, 0x3e, 0xb3,
+  0x04, 0x6a, 0x62, 0x04, 0x1b, 0x07, 0xf0, 0x99, 0x25, 0x50, 0x93, 0x81,
+  0x96, 0x47, 0x23, 0x13, 0xac, 0x4c, 0x08, 0x34, 0x11, 0xd2, 0x84, 0x1d,
+  0x03, 0x33, 0xb9, 0x60, 0x18, 0x5b, 0xe3, 0xe0, 0x8d, 0x83, 0xf8, 0x0c,
+  0x47, 0xc4, 0x19, 0x1c, 0x07, 0xc4, 0x37, 0xcb, 0xb0, 0x26, 0x6e, 0x12,
+  0x58, 0x1c, 0x07, 0x72, 0x16, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc,
+  0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xd4, 0x73,
+  0xa0, 0xc3, 0x0d, 0xc1, 0x3c, 0x07, 0x60, 0x30, 0xcb, 0xc0, 0x26, 0x6d,
+  0x12, 0xd8, 0x90, 0xc7, 0x01, 0x7c, 0x66, 0x09, 0xe4, 0xc4, 0xec, 0x38,
+  0x20, 0xe2, 0x33, 0x4b, 0x20, 0x27, 0xc3, 0x11, 0x7c, 0x76, 0xc7, 0x81,
+  0xf0, 0xcd, 0x32, 0xbc, 0x89, 0x9c, 0x04, 0xd6, 0x67, 0x78, 0x1c, 0xc4,
+  0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x44, 0xf2,
+  0xb1, 0x22, 0x88, 0x4f, 0x11, 0x20, 0x1d, 0xe8, 0x70, 0x43, 0xe0, 0xcf,
+  0x01, 0x18, 0xcc, 0x32, 0xc0, 0x49, 0x9c, 0x04, 0x06, 0xca, 0xc1, 0x10,
+  0x9f, 0x59, 0x02, 0x39, 0x31, 0xa2, 0x94, 0x03, 0xf8, 0xcc, 0x12, 0xc8,
+  0xc9, 0x40, 0xcb, 0xa3, 0xb1, 0x09, 0xd6, 0x26, 0x04, 0x9c, 0x08, 0x71,
+  0xe2, 0xd2, 0x81, 0x9b, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf6, 0xd4,
+  0xc5, 0x71, 0x30, 0xcc, 0xa1, 0x6e, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31,
+  0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x78, 0x2e, 0x1d, 0xf8, 0x73,
+  0xd0, 0xcb, 0xc1, 0x4a, 0x07, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04,
+  0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80,
+  0x00, 0x20, 0x08, 0x06, 0x5b, 0x4d, 0x07, 0x25, 0x1d, 0x24, 0x44, 0x30,
+  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x9b, 0x4d, 0x07, 0x26, 0x1d, 0x24,
+  0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdb, 0x4d, 0x07, 0x27,
+  0x1d, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0xcb, 0x4f,
+  0x07, 0x26, 0x1d, 0x9c, 0x73, 0x10, 0xc8, 0x74, 0x70, 0xcf, 0x01, 0x4d,
+  0x07, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x53, 0xb3, 0x04, 0x73, 0x32, 0xd0,
+  0xf2, 0x98, 0x86, 0x8f, 0xc0, 0xe7, 0xd0, 0x23, 0x2c, 0x01, 0x26, 0x82,
+  0x9c, 0xc0, 0xe7, 0x10, 0x26, 0xb3, 0x0c, 0x74, 0x62, 0x27, 0xb9, 0x36,
+  0x1c, 0x91, 0x3e, 0xf9, 0x1c, 0x0c, 0xdf, 0xa9, 0xcf, 0x30, 0xc3, 0x0d,
+  0x01, 0x39, 0x07, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0xe0, 0xd6, 0xcf,
+  0xc1, 0xf0, 0x55, 0x20, 0xe8, 0x89, 0xdb, 0x30, 0xc3, 0x0d, 0xc1, 0x39,
+  0x07, 0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c, 0x75, 0xa2, 0x2a, 0xc1, 0xad,
+  0x72, 0x30, 0xcc, 0x89, 0x6f, 0x30, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20,
+  0x18, 0x78, 0x66, 0x1d, 0xd8, 0x74, 0x50, 0xcf, 0xc1, 0x58, 0x07, 0xa3,
+  0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40,
+  0x0c, 0x45, 0x1c, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5b, 0x5b,
+  0x07, 0x3d, 0x1d, 0x1c, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
+  0x9b, 0x5b, 0x07, 0x3e, 0x1d, 0x30, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20,
+  0x08, 0x06, 0xdb, 0x5b, 0x07, 0x3f, 0x1d, 0x48, 0x44, 0x30, 0x62, 0xa0,
+  0x00, 0x20, 0x08, 0x06, 0xcb, 0x5d, 0x07, 0x3e, 0x1d, 0xfc, 0x73, 0x10,
+  0xa8, 0x75, 0xf0, 0xd2, 0x01, 0x5b, 0x07, 0xa3, 0x09, 0x01, 0x70, 0xc1,
+  0x53, 0xb3, 0x04, 0xaa, 0x32, 0xdc, 0x10, 0x6f, 0x70, 0x1d, 0x80, 0xc1,
+  0x2c, 0xc3, 0x9d, 0xe0, 0x49, 0x50, 0xfa, 0x1c, 0x84, 0x75, 0x00, 0x17,
+  0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x54, 0x5e, 0x07, 0x62,
+  0x1d, 0x8c, 0xd0, 0x4c, 0x07, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40,
+  0xe9, 0x75, 0x20, 0xd6, 0x41, 0x20, 0x5c, 0x30, 0x4c, 0xf5, 0x73, 0x60,
+  0xd6, 0x01, 0x5c, 0xf0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50,
+  0x7e, 0x1d, 0x9c, 0x75, 0xb0, 0x6f, 0x38, 0x1d, 0x8c, 0x18, 0x1c, 0x00,
+  0x08, 0x82, 0x01, 0xf5, 0xd7, 0xc1, 0x59, 0x07, 0x81, 0x70, 0xc1, 0x30,
+  0x17, 0x3c, 0x75, 0xc7, 0x53, 0x67, 0xce, 0xc1, 0x30, 0xd7, 0xbf, 0xc1,
+  0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
+  0xe0, 0x8d, 0x76, 0x30, 0xd7, 0x81, 0x4c, 0x07, 0xa0, 0x1d, 0x8c, 0x26,
+  0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31,
+  0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xaa, 0x1d,
+  0xe8, 0x75, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c,
+  0xab, 0x1d, 0xec, 0x75, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20,
+  0x18, 0x6c, 0xac, 0x1d, 0xf0, 0x75, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02,
+  0x80, 0x20, 0x18, 0x2c, 0xb4, 0x1d, 0xec, 0x75, 0xc0, 0xd3, 0x41, 0x70,
+  0xda, 0x01, 0x5b, 0x07, 0xa9, 0x1d, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x4f,
+  0xcd, 0x12, 0xa8, 0xca, 0x70, 0x83, 0xcb, 0xb1, 0x76, 0x00, 0x06, 0xb3,
+  0x0c, 0x79, 0xa2, 0x2a, 0x81, 0xb9, 0x74, 0x00, 0xd3, 0x41, 0x7c, 0x86,
+  0x23, 0x7e, 0x28, 0xa6, 0x03, 0xe2, 0x9b, 0x65, 0xd0, 0x93, 0x3e, 0x09,
+  0x4c, 0xa6, 0x03, 0x30, 0x8a, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6,
+  0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x6c, 0x3b,
+  0xd0, 0xe1, 0x86, 0x80, 0xb6, 0x03, 0x30, 0x98, 0x65, 0xd8, 0x13, 0x3e,
+  0x09, 0x6c, 0xd0, 0xe9, 0x00, 0x3e, 0xb3, 0x04, 0xa1, 0x62, 0x39, 0x1d,
+  0x10, 0xf1, 0x99, 0x25, 0x08, 0x95, 0xe1, 0x08, 0x35, 0xd2, 0xe9, 0x40,
+  0xf8, 0x66, 0x19, 0xfc, 0x24, 0x54, 0x02, 0x5b, 0xa3, 0x9d, 0x0e, 0xe2,
+  0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9,
+  0x58, 0x11, 0xc4, 0xa7, 0x88, 0xf0, 0x0e, 0x74, 0xb8, 0x21, 0xf8, 0xed,
+  0x00, 0x0c, 0x66, 0x19, 0xfe, 0x04, 0x54, 0x02, 0x1b, 0xeb, 0x60, 0x88,
+  0xcf, 0x2c, 0x41, 0xa8, 0x18, 0x61, 0xd6, 0x01, 0x7c, 0x66, 0x09, 0x42,
+  0x65, 0xa0, 0xe5, 0xd1, 0xf6, 0x04, 0xe3, 0x13, 0xe2, 0x4f, 0x04, 0x50,
+  0x51, 0xc7, 0xa0, 0x4f, 0x2e, 0x18, 0xc6, 0xca, 0x3a, 0x48, 0xeb, 0x20,
+  0x3e, 0xc3, 0x11, 0x6b, 0xa7, 0xd6, 0x01, 0xf1, 0xcd, 0x32, 0x88, 0x4a,
+  0xa9, 0x04, 0xb6, 0xd6, 0x01, 0xdb, 0xc5, 0xc7, 0x82, 0x81, 0x3e, 0x17,
+  0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11,
+  0xef, 0x1d, 0xe8, 0x70, 0x43, 0xd0, 0xde, 0x01, 0x18, 0xcc, 0x32, 0x8c,
+  0x0a, 0xa9, 0x04, 0x36, 0xcc, 0x75, 0x00, 0x9f, 0x59, 0x82, 0x54, 0x31,
+  0xb8, 0x0e, 0x88, 0xf8, 0xcc, 0x12, 0xa4, 0xca, 0x70, 0x84, 0xdd, 0xc5,
+  0x75, 0x20, 0x7c, 0xb3, 0x0c, 0xa6, 0x92, 0x2a, 0x81, 0xdd, 0x9d, 0x5c,
+  0x07, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05,
+  0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84, 0x7e, 0x07, 0x3a, 0xdc, 0x10,
+  0xe0, 0x77, 0x00, 0x06, 0xb3, 0x0c, 0xa7, 0x82, 0x2a, 0x81, 0xe9, 0x75,
+  0x30, 0xc4, 0x67, 0x96, 0x20, 0x55, 0x8c, 0xf8, 0xeb, 0x00, 0x3e, 0xb3,
+  0x04, 0xa9, 0x32, 0xd0, 0xf2, 0x68, 0xa3, 0x82, 0x91, 0x0a, 0x71, 0x2a,
+  0x02, 0xaa, 0x88, 0x7c, 0x50, 0x2a, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0xb7,
+  0x3d, 0x75, 0x6b, 0x1d, 0x0c, 0x73, 0xa2, 0x1c, 0x0c, 0x73, 0xc4, 0x30,
+  0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x1e, 0x8a, 0x07,
+  0xf8, 0x1d, 0xdc, 0x76, 0x50, 0xe2, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26,
+  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c,
+  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xf6, 0xe2, 0xc1, 0x7f, 0x07, 0x09,
+  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x06, 0xe3, 0x01, 0x88,
+  0x07, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x16, 0xe3,
+  0x41, 0x88, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1,
+  0x92, 0xe3, 0x01, 0x88, 0x07, 0xe1, 0x1d, 0x04, 0x2c, 0x1e, 0xc4, 0x77,
+  0xe0, 0xe2, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd4, 0x2c, 0x81, 0xaa,
+  0x0c, 0xb4, 0x3c, 0xa6, 0x51, 0x27, 0xe4, 0x3b, 0xd0, 0x09, 0x4b, 0xdc,
+  0x89, 0x90, 0x2a, 0xe4, 0x3b, 0xe0, 0x89, 0xd9, 0x9e, 0x7c, 0x07, 0xf0,
+  0x99, 0x65, 0x58, 0x95, 0x56, 0xa1, 0xbd, 0xe1, 0x08, 0xdc, 0xa3, 0xef,
+  0x60, 0xf8, 0x2e, 0xf7, 0x86, 0x19, 0x6e, 0x08, 0x7e, 0x3b, 0x20, 0x83,
+  0x1a, 0x02, 0x1d, 0x8e, 0x28, 0xf0, 0x3b, 0x18, 0xbe, 0x0a, 0x04, 0xbd,
+  0x63, 0x98, 0xe1, 0x86, 0x40, 0xbc, 0x03, 0x32, 0xa8, 0x60, 0xd0, 0x59,
+  0x06, 0x56, 0x09, 0x97, 0xe0, 0x4c, 0x3b, 0x18, 0xe6, 0x7a, 0x39, 0x18,
+  0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc, 0x30, 0x0f, 0x62, 0x3c,
+  0x80, 0xef, 0xc0, 0xc7, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82,
+  0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40,
+  0x00, 0x10, 0x04, 0x83, 0x0d, 0xcd, 0x03, 0x1c, 0x0f, 0x0e, 0x22, 0x18,
+  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x2d, 0xcd, 0x83, 0x1c, 0x0f, 0x18,
+  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x4d, 0xcd, 0x03, 0x1d,
+  0x0f, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x45, 0xce,
+  0x83, 0x1c, 0x0f, 0xf4, 0x3b, 0x08, 0xca, 0x3c, 0x50, 0xf1, 0xe0, 0xcc,
+  0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x82, 0x70, 0x19, 0x6e,
+  0x60, 0xbf, 0x35, 0x0f, 0xc0, 0x60, 0x96, 0xc1, 0x55, 0x5e, 0x25, 0xa8,
+  0xfa, 0x0e, 0x78, 0x3c, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10,
+  0x04, 0x03, 0x8a, 0xce, 0x83, 0x1e, 0x0f, 0xe4, 0xcf, 0xc5, 0x83, 0x11,
+  0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xea, 0x3c, 0xe8, 0xf1, 0x20, 0x10,
+  0x2e, 0x18, 0xa6, 0xf0, 0x3b, 0x08, 0xf3, 0x00, 0x2e, 0x78, 0x6a, 0xc4,
+  0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0x3c, 0x0f, 0xc4, 0x3c, 0x00, 0x83,
+  0x19, 0x0f, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0xd2, 0xf3, 0x40,
+  0xcc, 0x83, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x9e, 0xba, 0xe3, 0xa9, 0x0b,
+  0xef, 0x60, 0x98, 0xc3, 0xe7, 0x60, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98,
+  0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf0, 0xfc, 0x3c, 0x70, 0xf3, 0xa0,
+  0xc5, 0x83, 0x3d, 0x0f, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46,
+  0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01,
+  0x40, 0x10, 0x0c, 0xb6, 0x52, 0x0f, 0xea, 0x3c, 0x48, 0x88, 0x60, 0xc4,
+  0x00, 0x01, 0x40, 0x10, 0x0c, 0x36, 0x53, 0x0f, 0xec, 0x3c, 0x48, 0x88,
+  0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0x53, 0x0f, 0xee, 0x3c,
+  0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x96, 0x57, 0x0f,
+  0xec, 0x3c, 0xb8, 0xf1, 0x20, 0x10, 0xf5, 0xe0, 0xcc, 0x03, 0x52, 0x0f,
+  0x46, 0x13, 0x02, 0xe0, 0x82, 0xa7, 0x66, 0x09, 0xc2, 0x65, 0xb8, 0x21,
+  0x05, 0x83, 0x53, 0x0f, 0xc0, 0x60, 0x96, 0x01, 0x56, 0xc2, 0x25, 0xb0,
+  0x14, 0x0f, 0x56, 0x3c, 0x88, 0xcf, 0x70, 0x84, 0x0b, 0x06, 0x2c, 0x1e,
+  0x10, 0xdf, 0x2c, 0x43, 0xac, 0xd0, 0x4a, 0x60, 0x2d, 0x1e, 0xbc, 0x60,
+  0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x60,
+  0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xc4, 0x7a, 0xa0, 0xc3, 0x0d, 0xc1,
+  0xab, 0x07, 0x60, 0x30, 0xcb, 0x20, 0x2b, 0xb3, 0x12, 0xd8, 0x50, 0xe3,
+  0x01, 0x7c, 0x66, 0x09, 0x70, 0xc5, 0x68, 0x3c, 0x20, 0xe2, 0x33, 0x4b,
+  0x80, 0x2b, 0xc3, 0x11, 0x39, 0x18, 0xd4, 0x78, 0x20, 0x7c, 0xb3, 0x0c,
+  0xb5, 0x82, 0x2b, 0x81, 0xe9, 0x60, 0x60, 0xe3, 0x41, 0x7c, 0x2c, 0x70,
+  0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82,
+  0xf8, 0x14, 0xc1, 0xeb, 0x81, 0x0e, 0x37, 0x04, 0xba, 0x1e, 0x80, 0xc1,
+  0x2c, 0x83, 0xad, 0xdc, 0x4a, 0x60, 0x3e, 0x1e, 0x0c, 0xf1, 0x99, 0x25,
+  0xc0, 0x15, 0x23, 0xc2, 0x3c, 0x80, 0xcf, 0x2c, 0x01, 0xae, 0x0c, 0xb4,
+  0x3c, 0x9a, 0xac, 0x60, 0xb3, 0x42, 0xd8, 0x8a, 0x70, 0x2b, 0xa8, 0x28,
+  0xd0, 0xca, 0x05, 0xc3, 0x18, 0x98, 0x07, 0x64, 0x1e, 0xc4, 0x67, 0x38,
+  0x02, 0x16, 0xca, 0x3c, 0x20, 0xbe, 0x59, 0x86, 0x5c, 0xe1, 0x95, 0xc0,
+  0xcc, 0x3c, 0x88, 0x85, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e,
+  0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x42, 0xdd, 0x03,
+  0x1d, 0x6e, 0x08, 0xd0, 0x3d, 0x00, 0x83, 0x59, 0x06, 0x5d, 0xd9, 0x95,
+  0xc0, 0x06, 0x37, 0x0f, 0xe0, 0x33, 0x4b, 0x00, 0x2e, 0xb6, 0xe6, 0x01,
+  0x11, 0x9f, 0x59, 0x02, 0x70, 0x19, 0x8e, 0xd8, 0x05, 0x36, 0x0f, 0x84,
+  0x6f, 0x96, 0xa1, 0x57, 0xc0, 0x25, 0x30, 0x5e, 0x68, 0xf3, 0x20, 0x3e,
+  0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f,
+  0x15, 0x41, 0x7c, 0x8a, 0xa8, 0xf7, 0x40, 0x87, 0x1b, 0x82, 0x79, 0x0f,
+  0xc0, 0x60, 0x96, 0xc1, 0x57, 0x7e, 0x25, 0xb0, 0x3a, 0x0f, 0x86, 0xf8,
+  0xcc, 0x12, 0x80, 0x8b, 0x11, 0x7a, 0x1e, 0xc0, 0x67, 0x96, 0x00, 0x5c,
+  0x06, 0x5a, 0x1e, 0x4d, 0x57, 0xb0, 0x5d, 0x21, 0x7c, 0x45, 0xf8, 0x15,
+  0xd6, 0xe0, 0x95, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e, 0x3a, 0x33,
+  0x0f, 0x86, 0xb9, 0xbe, 0x0e, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19,
+  0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x6f, 0xe4, 0x83, 0x79, 0x0f, 0x64,
+  0x3d, 0x00, 0xf9, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34,
+  0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00,
+  0x04, 0xc1, 0x60, 0x53, 0xf9, 0x40, 0xdf, 0x83, 0x84, 0x08, 0x46, 0x0c,
+  0x10, 0x00, 0x04, 0xc1, 0x60, 0x5b, 0xf9, 0x60, 0xdf, 0x83, 0x84, 0x08,
+  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x63, 0xf9, 0x80, 0xdf, 0x83,
+  0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0xa1, 0xf9, 0x60,
+  0xdf, 0x03, 0x5e, 0x0f, 0x82, 0x93, 0x0f, 0xd8, 0x3d, 0x48, 0xf9, 0x60,
+  0x34, 0x21, 0x00, 0x2e, 0x78, 0x6a, 0x96, 0x20, 0x5c, 0x06, 0x5a, 0x1e,
+  0xd3, 0x60, 0x15, 0xdd, 0x1e, 0x56, 0x85, 0x25, 0x5c, 0x45, 0x00, 0x17,
+  0xdd, 0x1e, 0x5e, 0x65, 0x96, 0x41, 0x5c, 0xc8, 0xc5, 0x15, 0x83, 0xe1,
+  0x88, 0x59, 0x0c, 0xdc, 0x3d, 0x18, 0xbe, 0xa3, 0xc5, 0x60, 0x98, 0xe1,
+  0x86, 0x20, 0xd7, 0x03, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x7f, 0x90,
+  0xf7, 0x60, 0xf8, 0x2a, 0x10, 0xf4, 0x42, 0x62, 0x98, 0xe1, 0x86, 0x80,
+  0xd7, 0x03, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x86, 0x71, 0xc1, 0x97, 0xe0,
+  0x40, 0x3d, 0x18, 0xe6, 0x6e, 0x3b, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40,
+  0x10, 0x0c, 0xbc, 0x9d, 0x0f, 0x56, 0x3e, 0x50, 0xf7, 0x00, 0xe7, 0x83,
+  0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d,
+  0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x4d,
+  0xec, 0x03, 0x99, 0x0f, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
+  0x83, 0x6d, 0xec, 0x83, 0x99, 0x0f, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00,
+  0x10, 0x04, 0x83, 0x8d, 0xec, 0x03, 0x9a, 0x0f, 0x24, 0x22, 0x18, 0x31,
+  0x50, 0x00, 0x10, 0x04, 0x83, 0x85, 0xed, 0x83, 0x99, 0x0f, 0xe8, 0x3d,
+  0x08, 0x7e, 0x3e, 0x20, 0xf9, 0x20, 0xec, 0x83, 0xd1, 0x84, 0x00, 0xb8,
+  0xe0, 0xa9, 0x59, 0x02, 0x7c, 0x19, 0x6e, 0x30, 0xc7, 0xa0, 0xec, 0x03,
+  0x30, 0x98, 0x65, 0x28, 0x17, 0x73, 0x09, 0xea, 0xdd, 0x03, 0x9b, 0x0f,
+  0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x72, 0xfb,
+  0xe0, 0xe6, 0x83, 0x76, 0x0c, 0x50, 0x3e, 0x18, 0x31, 0x38, 0x00, 0x10,
+  0x04, 0x03, 0xea, 0xed, 0x83, 0x9b, 0x0f, 0x02, 0xe1, 0x82, 0x61, 0x4a,
+  0xde, 0x83, 0x9d, 0x0f, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04,
+  0xc1, 0x80, 0x9a, 0xfb, 0x80, 0xe7, 0x03, 0x9d, 0x68, 0xf9, 0x60, 0xc4,
+  0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0xba, 0x0f, 0x78, 0x3e, 0x08, 0x84,
+  0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0x3b, 0x9e, 0xba, 0x5d, 0x0f, 0x86, 0x39,
+  0xf9, 0x0e, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00,
+  0x10, 0x04, 0x03, 0x0f, 0xef, 0x03, 0xb4, 0x0f, 0x4e, 0x3e, 0xa8, 0xfb,
+  0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46,
+  0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60,
+  0xfb, 0xfb, 0xe0, 0xed, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04,
+  0xc1, 0x60, 0x03, 0xfd, 0x00, 0xee, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10,
+  0x00, 0x04, 0xc1, 0x60, 0x0b, 0xfd, 0x20, 0xee, 0x83, 0x84, 0x08, 0x46,
+  0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x49, 0xfd, 0x00, 0xee, 0x83, 0x98,
+  0x0f, 0x02, 0xbe, 0x0f, 0xc2, 0x3e, 0xf0, 0xfb, 0x60, 0x34, 0x21, 0x00,
+  0x2e, 0x78, 0x6a, 0x96, 0x00, 0x5f, 0x86, 0x1b, 0x46, 0x32, 0x08, 0xfd,
+  0x00, 0x0c, 0x66, 0x19, 0xce, 0x05, 0x5f, 0x02, 0x1b, 0xf9, 0xa0, 0xe4,
+  0x83, 0xf8, 0x0c, 0x47, 0xa4, 0x64, 0x60, 0xf2, 0x01, 0xf1, 0xcd, 0x32,
+  0xa0, 0xcb, 0xba, 0x04, 0x76, 0xf2, 0x81, 0x4a, 0x06, 0xf1, 0xb1, 0x60,
+  0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08,
+  0xe2, 0x53, 0xc4, 0xea, 0x07, 0x3a, 0xdc, 0x10, 0xa4, 0x7e, 0x00, 0x06,
+  0xb3, 0x0c, 0xe9, 0xa2, 0x2e, 0x81, 0x0d, 0x2f, 0x1f, 0xc0, 0x67, 0x96,
+  0xe0, 0x5d, 0xcc, 0xe5, 0x03, 0x22, 0x3e, 0xb3, 0x04, 0xef, 0x32, 0x1c,
+  0x41, 0x93, 0xc1, 0xcb, 0x07, 0xc2, 0x37, 0xcb, 0xc0, 0x2e, 0xef, 0x12,
+  0x58, 0x4d, 0x06, 0x30, 0x1f, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c,
+  0x73, 0xc1, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xb6,
+  0x1f, 0xe8, 0x70, 0x43, 0x40, 0xfb, 0x01, 0x18, 0xcc, 0x32, 0xb4, 0x8b,
+  0xbb, 0x04, 0x86, 0xf3, 0xc1, 0x10, 0x9f, 0x59, 0x82, 0x77, 0x31, 0x62,
+  0xe7, 0x03, 0xf8, 0xcc, 0x12, 0xbc, 0xcb, 0x40, 0xcb, 0xa3, 0xa5, 0x0b,
+  0xa6, 0x2e, 0x44, 0xbb, 0x08, 0xee, 0xe2, 0xa7, 0xc2, 0xba, 0x5c, 0x30,
+  0x8c, 0xe9, 0x7c, 0xe0, 0xf3, 0x41, 0x7c, 0x86, 0x23, 0x54, 0xe3, 0xe7,
+  0x03, 0xe2, 0x9b, 0x65, 0x80, 0x97, 0x79, 0x09, 0x0c, 0xec, 0x83, 0xd5,
+  0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30,
+  0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xc8, 0x3f, 0xd0, 0xe1, 0x86, 0x40,
+  0xfc, 0x03, 0x30, 0x98, 0x65, 0x88, 0x17, 0x79, 0x09, 0x6c, 0x40, 0xfb,
+  0x00, 0x3e, 0xb3, 0x04, 0xf7, 0x62, 0x65, 0x1f, 0x10, 0xf1, 0x99, 0x25,
+  0xb8, 0x97, 0xe1, 0x88, 0xda, 0x30, 0xfb, 0x40, 0xf8, 0x66, 0x19, 0xe8,
+  0xe5, 0x5e, 0x02, 0xb3, 0x8d, 0xb3, 0x0f, 0xe2, 0x63, 0x81, 0x43, 0x9f,
+  0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7,
+  0x88, 0xf7, 0x0f, 0x74, 0xb8, 0x21, 0x68, 0xff, 0x00, 0x0c, 0x66, 0x19,
+  0xea, 0xc5, 0x5e, 0x02, 0x7b, 0xfb, 0x60, 0x88, 0xcf, 0x2c, 0xc1, 0xbd,
+  0x18, 0x41, 0xf7, 0x01, 0x7c, 0x66, 0x09, 0xee, 0x65, 0xa0, 0xe5, 0xd1,
+  0xe2, 0x05, 0x93, 0x17, 0xa2, 0x5e, 0x04, 0x7b, 0x01, 0x9d, 0x79, 0xb9,
+  0x60, 0x98, 0x0b, 0x9e, 0xba, 0xed, 0xa9, 0x03, 0xfb, 0x60, 0x98, 0xbb,
+  0xf3, 0x60, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00,
+  0x41, 0x30, 0xf0, 0xfa, 0x3f, 0x68, 0xff, 0x80, 0xf5, 0x03, 0xfd, 0x0f,
+  0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34,
+  0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x36,
+  0x12, 0x14, 0xe8, 0x3f, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10,
+  0x0c, 0xb6, 0x12, 0x14, 0xea, 0x3f, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01,
+  0x40, 0x10, 0x0c, 0x36, 0x13, 0x14, 0xec, 0x3f, 0x48, 0x88, 0x60, 0xc4,
+  0x40, 0x01, 0x40, 0x10, 0x0c, 0x16, 0x17, 0x14, 0xea, 0x3f, 0xb0, 0xfd,
+  0x20, 0x08, 0x41, 0xc1, 0xfc, 0x83, 0x11, 0x14, 0x46, 0x13, 0x02, 0xe0,
+  0x82, 0xa7, 0x66, 0x09, 0xf0, 0x65, 0xa0, 0xe5, 0x31, 0x8d, 0x71, 0x71,
+  0xfd, 0x41, 0x5c, 0x58, 0xa2, 0x5c, 0x84, 0x7b, 0x71, 0xfd, 0xc1, 0x5c,
+  0x66, 0x19, 0xf2, 0x65, 0x5f, 0x50, 0x33, 0x18, 0x8e, 0x98, 0x3d, 0xf4,
+  0x0f, 0x86, 0xef, 0x68, 0x6f, 0x98, 0xe1, 0x86, 0x60, 0xf6, 0x03, 0x32,
+  0xa8, 0x21, 0xd0, 0xe1, 0x88, 0xfc, 0x60, 0xff, 0x60, 0xf8, 0x2a, 0x10,
+  0xf4, 0xf6, 0x63, 0x98, 0xe1, 0x86, 0xc0, 0xf6, 0x03, 0x32, 0xa8, 0x60,
+  0xd0, 0x59, 0x06, 0x7d, 0x79, 0x99, 0xe0, 0xf4, 0x3e, 0x18, 0xe6, 0x62,
+  0x3d, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc, 0x1a, 0x14,
+  0x4a, 0x50, 0x20, 0xff, 0x40, 0x06, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d,
+  0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19,
+  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x8d, 0x07, 0x05, 0x16, 0x14, 0x0e,
+  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xad, 0x07, 0x85, 0x16,
+  0x14, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xcd, 0x07,
+  0x05, 0x17, 0x14, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83,
+  0xc5, 0x0c, 0x85, 0x16, 0x14, 0xdc, 0x3f, 0x08, 0x72, 0x50, 0xf0, 0xff,
+  0x60, 0x07, 0x85, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x82, 0x97,
+  0x19, 0x6e, 0x00, 0xcf, 0xe0, 0x07, 0x05, 0x30, 0x98, 0x65, 0xe0, 0x97,
+  0x7e, 0x09, 0x2a, 0xfd, 0x03, 0x18, 0x14, 0xe0, 0x82, 0xa7, 0x46, 0x0c,
+  0x0e, 0x00, 0x04, 0xc1, 0x80, 0x42, 0x43, 0x21, 0x06, 0x85, 0xf6, 0x13,
+  0x41, 0x61, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0x34, 0x14, 0x62,
+  0x50, 0x08, 0x84, 0x0b, 0x86, 0x29, 0xf6, 0x0f, 0x6a, 0x50, 0x80, 0x0b,
+  0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xaa, 0x0d, 0x05, 0x1b,
+  0x14, 0x68, 0xe4, 0x04, 0x85, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0,
+  0xdc, 0x50, 0xb0, 0x41, 0x21, 0x10, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0xee,
+  0x78, 0xea, 0x6a, 0x3f, 0x18, 0xe6, 0xd8, 0x3d, 0x18, 0xe6, 0x88, 0x61,
+  0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x3c, 0x39, 0x14,
+  0xc4, 0x50, 0x08, 0x41, 0xe1, 0x0d, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d,
   0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xad, 0xbc, 0x6a, 0x2b, 0x21, 0x82,
-  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xcc, 0xcb, 0xb6, 0x12, 0x22,
-  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xed, 0xbc, 0x6e, 0x2b, 0x21,
-  0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0xde, 0xcb, 0xb6, 0xee,
-  0x2a, 0x10, 0xaf, 0xd3, 0x22, 0xaf, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9,
-  0x59, 0x02, 0x98, 0x18, 0x6e, 0x20, 0x9b, 0xf3, 0x02, 0x83, 0x59, 0x86,
-  0x7f, 0x80, 0x89, 0xc0, 0xd2, 0x6a, 0xad, 0xe2, 0x33, 0x1c, 0x81, 0x36,
-  0x6c, 0x45, 0x7c, 0xb3, 0x0c, 0x20, 0x31, 0x12, 0x81, 0xb5, 0x55, 0xda,
-  0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18,
-  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xf1, 0xa5, 0xc3, 0x0d, 0xc1, 0x7b,
-  0x81, 0xc1, 0x2c, 0x43, 0x48, 0x88, 0x44, 0x60, 0x43, 0x5d, 0xc1, 0x67,
-  0x96, 0xe0, 0x24, 0x8c, 0xae, 0x88, 0xf8, 0xcc, 0x12, 0x9c, 0xc4, 0x70,
-  0xc4, 0xdc, 0xd4, 0x95, 0xf0, 0xcd, 0x32, 0x90, 0xc4, 0x49, 0x04, 0x46,
-  0x37, 0x76, 0x15, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f,
-  0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xf0, 0x97, 0x0e, 0x37,
-  0x04, 0xfa, 0x05, 0x06, 0xb3, 0x0c, 0x25, 0x61, 0x12, 0x81, 0xf9, 0xd5,
-  0x10, 0x9f, 0x59, 0x82, 0x93, 0x30, 0x22, 0xb4, 0xe0, 0x33, 0x4b, 0x70,
-  0x12, 0x03, 0x2d, 0x8f, 0x16, 0x12, 0x98, 0x48, 0x10, 0x25, 0x21, 0x98,
-  0x04, 0xbf, 0x8d, 0xc4, 0x05, 0xc3, 0x18, 0x68, 0x91, 0x56, 0x7c, 0x86,
-  0x23, 0x40, 0xa7, 0xb4, 0x88, 0x6f, 0x96, 0x01, 0x25, 0x56, 0x22, 0x30,
-  0xd3, 0x0a, 0x9d, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78,
-  0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x42, 0xc5, 0x74, 0xb8,
-  0x21, 0x40, 0x31, 0x30, 0x98, 0x65, 0x48, 0x09, 0x95, 0x08, 0x6c, 0x70,
-  0x2d, 0xf8, 0xcc, 0x12, 0xbc, 0x84, 0xad, 0x16, 0x11, 0x9f, 0x59, 0x82,
-  0x97, 0x18, 0x8e, 0x58, 0x1d, 0xd6, 0x12, 0xbe, 0x59, 0x06, 0x96, 0x78,
-  0x89, 0xc0, 0x58, 0xa7, 0xb5, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86,
-  0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x1a,
-  0xd3, 0xe1, 0x86, 0x60, 0xc6, 0xc0, 0x60, 0x96, 0xa1, 0x25, 0x5c, 0x22,
-  0xb0, 0xda, 0x1a, 0xe2, 0x33, 0x4b, 0xf0, 0x12, 0x46, 0xe8, 0x16, 0x7c,
-  0x66, 0x09, 0x5e, 0x62, 0xa0, 0xe5, 0xd1, 0x52, 0x02, 0x53, 0x09, 0xa2,
-  0x25, 0x04, 0x97, 0x60, 0xbb, 0x95, 0xb8, 0x60, 0x98, 0x0b, 0x9e, 0xba,
-  0xed, 0xa9, 0x33, 0xad, 0x61, 0xae, 0x97, 0x86, 0x39, 0x62, 0x98, 0x23,
-  0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x6f, 0xcc, 0x66, 0x4c,
-  0xbe, 0xc0, 0x6c, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61,
-  0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04,
-  0xc1, 0x60, 0x53, 0x33, 0x1d, 0x4b, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40,
-  0x10, 0x0c, 0xb6, 0x35, 0xdb, 0xb1, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00,
-  0x04, 0xc1, 0x60, 0x63, 0x33, 0x1e, 0x4b, 0x88, 0x60, 0xc4, 0x40, 0x01,
-  0x40, 0x10, 0x0c, 0x16, 0x3a, 0xdb, 0x31, 0xfe, 0x0a, 0xce, 0x8c, 0xc5,
-  0xd2, 0x6c, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6a, 0x96, 0x00, 0x26, 0x06,
-  0x5a, 0x1e, 0xd3, 0xd8, 0x07, 0x1d, 0x0e, 0xf4, 0x81, 0x25, 0xfa, 0x41,
-  0x78, 0x09, 0x1d, 0x0e, 0xfc, 0x61, 0xc4, 0xc0, 0x00, 0x40, 0x10, 0x0c,
-  0x20, 0x3b, 0xbb, 0xb1, 0x77, 0x32, 0xfb, 0x80, 0x97, 0xf8, 0x98, 0x10,
-  0xc8, 0xc7, 0x02, 0x79, 0x81, 0x8f, 0x15, 0xff, 0x10, 0x1f, 0x2b, 0x02,
-  0xf9, 0x58, 0x10, 0x12, 0xf0, 0x19, 0x31, 0x30, 0x00, 0x10, 0x04, 0x03,
-  0xa8, 0xcf, 0x7c, 0xac, 0x9e, 0x4c, 0x28, 0xe2, 0x63, 0x81, 0x20, 0x1f,
-  0x0b, 0x0e, 0xf8, 0x5c, 0x60, 0xcc, 0x88, 0x81, 0x03, 0x80, 0x20, 0x18,
-  0x34, 0xa3, 0x26, 0x66, 0x2e, 0x76, 0x62, 0x77, 0x16, 0xf4, 0x58, 0x8f,
-  0xf5, 0x18, 0x8f, 0xe5, 0xd9, 0x2c, 0xc1, 0x08, 0x0d, 0x37, 0xf8, 0x55,
-  0x9e, 0x81, 0xc1, 0x2c, 0x83, 0x4c, 0x8c, 0x50, 0x30, 0x62, 0x60, 0x00,
-  0x20, 0x08, 0x06, 0xd0, 0xa8, 0x95, 0x19, 0x3f, 0x59, 0x70, 0x63, 0xf0,
-  0x19, 0x31, 0x30, 0x00, 0x10, 0x04, 0x03, 0xa8, 0xd4, 0xce, 0xac, 0x9f,
-  0x2c, 0xc8, 0x31, 0xf8, 0x8c, 0x26, 0xb8, 0xd8, 0x30, 0xdc, 0x10, 0xf4,
-  0x19, 0x18, 0xcc, 0x32, 0xcc, 0x44, 0x4d, 0x04, 0xc3, 0x11, 0x45, 0x98,
-  0x0d, 0xdf, 0x19, 0xc3, 0x0c, 0x37, 0x04, 0x2c, 0x46, 0x06, 0x35, 0x04,
-  0x3a, 0x1c, 0x71, 0x94, 0xd9, 0xf0, 0x55, 0x20, 0xe8, 0x25, 0xc3, 0x0c,
-  0x37, 0x04, 0x2f, 0x46, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0x40, 0x13, 0x69,
-  0x11, 0xdc, 0x7c, 0x0d, 0x73, 0x2a, 0x35, 0xcc, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x78, 0xae, 0xe6, 0x67, 0x3d, 0xb6, 0x6a, 0xa3, 0x09, 0x01,
-  0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45,
-  0x1c, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5b, 0xad, 0x95, 0xda,
-  0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xd9, 0x9a, 0xa9,
-  0x31, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdb, 0xad, 0x9d,
-  0x9a, 0x44, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xfc, 0x9a,
-  0xa9, 0x9d, 0x59, 0x20, 0x6b, 0x77, 0x46, 0x6b, 0xa3, 0x09, 0x01, 0x70,
-  0xc1, 0x53, 0xb3, 0x04, 0x69, 0x31, 0xdc, 0x90, 0xe1, 0x1a, 0x18, 0xcc,
-  0x32, 0xd8, 0xc4, 0x4d, 0x04, 0x25, 0x66, 0xa9, 0x06, 0x17, 0x3c, 0x35,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x54, 0xb8, 0xa9, 0xda, 0xb7, 0x67,
-  0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0x89, 0x9b, 0xaa, 0x05, 0xc2,
-  0x05, 0xc3, 0x54, 0x99, 0xb9, 0x1a, 0x5c, 0xf0, 0xd4, 0x88, 0xc1, 0x01,
-  0x80, 0x20, 0x18, 0x50, 0xe6, 0xf6, 0x6a, 0x62, 0x00, 0x6a, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x40, 0x9d, 0xdb, 0xab, 0x05, 0xc2, 0x05, 0xc3,
-  0x5c, 0xf0, 0xd4, 0x1d, 0x4f, 0x9d, 0x8b, 0x0d, 0x73, 0x65, 0x35, 0xcc,
-  0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x78,
-  0xeb, 0xb6, 0x6b, 0x7a, 0x86, 0x6e, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20,
-  0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x9b, 0xbc, 0x89, 0x5b, 0x42, 0x04, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xcd, 0xdb, 0xb8, 0x25, 0x44, 0x30,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x1b, 0xbd, 0x91, 0x5b, 0x42, 0x04,
-  0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xf0, 0xdb, 0xb8, 0x91, 0x5a,
-  0xf0, 0x6e, 0xb4, 0x16, 0x6f, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x53, 0xb3,
-  0x04, 0x69, 0x31, 0xdc, 0x60, 0x07, 0xf4, 0x06, 0x06, 0xb3, 0x0c, 0x38,
-  0x91, 0x16, 0x81, 0xd9, 0x19, 0x9e, 0xc5, 0x67, 0x38, 0x62, 0x0f, 0xf2,
-  0x8c, 0xf8, 0x66, 0x19, 0x72, 0x82, 0x27, 0x02, 0xd3, 0x33, 0x3e, 0x88,
-  0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4,
-  0x63, 0x45, 0x10, 0x9f, 0x22, 0xfc, 0x4d, 0x87, 0x1b, 0x02, 0x7e, 0x03,
-  0x83, 0x59, 0x06, 0x9d, 0xd8, 0x89, 0xc0, 0x06, 0x51, 0x83, 0xcf, 0x2c,
-  0x01, 0x58, 0x58, 0xa8, 0x11, 0xf1, 0x99, 0x25, 0x00, 0x8b, 0xe1, 0x08,
-  0x53, 0x10, 0x35, 0xe1, 0x9b, 0x65, 0xe8, 0x09, 0xb0, 0x08, 0xec, 0x14,
-  0x46, 0x2d, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2,
-  0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x48, 0x39, 0x1d, 0x6e, 0x08,
-  0x4e, 0x0e, 0x0c, 0x66, 0x19, 0x7c, 0xe2, 0x27, 0x02, 0x5b, 0xb5, 0x21,
-  0x3e, 0xb3, 0x04, 0x60, 0x61, 0x84, 0xab, 0xc1, 0x67, 0x96, 0x00, 0x2c,
-  0x06, 0x5a, 0x1e, 0x4d, 0x27, 0xb0, 0x9d, 0x20, 0x7c, 0x42, 0xf8, 0x09,
-  0xb1, 0xe0, 0x89, 0x0b, 0x86, 0xb1, 0x56, 0x8b, 0xb5, 0xf8, 0x0c, 0x47,
-  0xc8, 0x82, 0xac, 0x11, 0xdf, 0x2c, 0x43, 0x58, 0x90, 0x45, 0x60, 0xb3,
-  0x36, 0x0b, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94,
-  0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4, 0xcd, 0xe9, 0x70, 0x43,
-  0x50, 0x73, 0x60, 0x30, 0xcb, 0x20, 0x16, 0x63, 0x11, 0xd8, 0xb0, 0x6b,
-  0xf0, 0x99, 0x25, 0x40, 0x0b, 0xc3, 0x35, 0x22, 0x3e, 0xb3, 0x04, 0x68,
-  0x31, 0x1c, 0xd1, 0x0b, 0xb9, 0x26, 0x7c, 0xb3, 0x0c, 0x65, 0x81, 0x16,
-  0x81, 0xf9, 0x82, 0xae, 0xc5, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73,
-  0xc1, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x62, 0xa7,
-  0xc3, 0x0d, 0x01, 0xd8, 0x81, 0xc1, 0x2c, 0x83, 0x59, 0x9c, 0x45, 0x60,
-  0xe2, 0x36, 0xc4, 0x67, 0x96, 0x00, 0x2d, 0x8c, 0x38, 0x37, 0xf8, 0xcc,
-  0x12, 0xa0, 0xc5, 0x40, 0xcb, 0xa3, 0x89, 0x05, 0x36, 0x16, 0x84, 0x59,
-  0x08, 0x67, 0x01, 0x1b, 0x64, 0x71, 0xc1, 0x30, 0x17, 0x3c, 0x75, 0xdb,
-  0x53, 0x37, 0x6b, 0xc3, 0x9c, 0x7a, 0x0d, 0x73, 0xc4, 0x30, 0x47, 0x0c,
-  0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x1e, 0xdc, 0x81, 0xdd, 0xbf,
-  0xb5, 0xdd, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20,
-  0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0xc1, 0x76, 0x77, 0x67, 0x97, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x6c, 0x78, 0x87, 0x76, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0xc1, 0x96, 0x77, 0x69, 0x97, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80,
-  0x20, 0x18, 0x2c, 0xa1, 0x87, 0x76, 0x29, 0x17, 0xd0, 0x5d, 0xce, 0xd9,
-  0xdd, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd4, 0x2c, 0x41, 0x5a, 0x0c, 0xb4,
-  0x3c, 0xa6, 0x41, 0x13, 0x74, 0x1e, 0xcc, 0x04, 0x4b, 0xd8, 0x84, 0x80,
-  0x16, 0x74, 0x1e, 0xdc, 0xc4, 0x2c, 0x83, 0x5a, 0xb0, 0xc5, 0x3e, 0x0c,
-  0x47, 0x80, 0xc4, 0xce, 0x0d, 0xdf, 0x85, 0xc4, 0x30, 0xc3, 0x0d, 0x81,
-  0xc9, 0x91, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0x84, 0xc4, 0xcf, 0x0d, 0x5f,
-  0x05, 0x82, 0xde, 0x48, 0x0c, 0x33, 0xdc, 0x10, 0xa4, 0x1c, 0x19, 0x54,
-  0x30, 0xe8, 0x2c, 0xc3, 0x5a, 0x80, 0x46, 0x70, 0xed, 0x36, 0xcc, 0x91,
-  0xd8, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0, 0xa1, 0x1e, 0xde,
-  0xdd, 0x5c, 0xe9, 0x8d, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26,
-  0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x6c, 0xaf, 0xf7, 0x77, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0xc1, 0x06, 0x7b, 0xa0, 0xc7, 0x10, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x6c, 0xb1, 0x17, 0x7a, 0x12, 0x11, 0x8c, 0x18, 0x28,
-  0x00, 0x08, 0x82, 0xc1, 0x92, 0x7b, 0xa0, 0x17, 0x76, 0x01, 0xeb, 0xc5,
-  0x9d, 0xeb, 0x8d, 0x26, 0x04, 0xc0, 0x05, 0x4f, 0xcd, 0x12, 0x80, 0xc6,
-  0x70, 0xc3, 0x4c, 0xc8, 0x1e, 0x18, 0xcc, 0x32, 0xb4, 0x85, 0x5b, 0x04,
-  0xc5, 0x73, 0xa3, 0x07, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0xd4, 0xee, 0x91, 0x9e, 0x4e, 0xd4, 0xdd, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x50, 0xbc, 0x47, 0x7a, 0x81, 0x70, 0xc1, 0x30, 0xf5, 0x73,
-  0xa8, 0x07, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x14,
-  0xf8, 0xa5, 0x1e, 0x4f, 0xe8, 0xdd, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
-  0x50, 0xe1, 0x97, 0x7a, 0x81, 0x70, 0xc1, 0x30, 0x17, 0x3c, 0x75, 0xc7,
-  0x53, 0x87, 0x72, 0xc3, 0xdc, 0x8f, 0x0d, 0x73, 0xc4, 0x30, 0x47, 0x0c,
-  0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x5e, 0xf9, 0xd5, 0x1e, 0xdd,
-  0x89, 0xdf, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20,
-  0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0xc1, 0xc6, 0x7e, 0xbc, 0x97, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x6c, 0xed, 0xd7, 0x7b, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0xc1, 0xe6, 0x7e, 0xbe, 0x97, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80,
-  0x20, 0x18, 0x2c, 0xf6, 0xd7, 0x7b, 0x7e, 0x17, 0xa4, 0x9f, 0xeb, 0xad,
-  0xdf, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd4, 0x2c, 0x01, 0x68, 0x0c, 0x37,
-  0xc0, 0x85, 0xfb, 0x81, 0xc1, 0x2c, 0xc3, 0x5b, 0x80, 0x46, 0x60, 0x70,
-  0x27, 0x77, 0xf1, 0x19, 0x8e, 0xb0, 0x8b, 0xb9, 0x23, 0xbe, 0x59, 0x06,
-  0xb8, 0x98, 0x8b, 0xc0, 0xe8, 0xee, 0x2e, 0xe2, 0x63, 0xc1, 0x40, 0x9f,
-  0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7,
-  0x08, 0xfc, 0xd3, 0xe1, 0x86, 0xc0, 0xfe, 0xc0, 0x60, 0x96, 0x21, 0x2e,
-  0xe4, 0x22, 0xb0, 0x81, 0xef, 0xe0, 0x33, 0x4b, 0x70, 0x17, 0xb6, 0x77,
-  0x44, 0x7c, 0x66, 0x09, 0xee, 0x62, 0x38, 0x22, 0x34, 0xf8, 0x4e, 0xf8,
-  0x66, 0x19, 0xe8, 0xe2, 0x2e, 0x02, 0x13, 0x8d, 0xbe, 0x8b, 0x8f, 0x05,
-  0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45,
-  0x10, 0x9f, 0x22, 0x46, 0x30, 0xd0, 0xe1, 0x86, 0x20, 0x04, 0x03, 0x30,
-  0x98, 0x65, 0xa8, 0x0b, 0xbb, 0x08, 0xac, 0xf4, 0x86, 0xf8, 0xcc, 0x12,
-  0xdc, 0x85, 0x11, 0xa8, 0x07, 0x9f, 0x59, 0x82, 0xbb, 0x18, 0x68, 0x79,
-  0xb4, 0xb8, 0xc0, 0xe4, 0x82, 0xa8, 0x0b, 0xc1, 0x2e, 0x70, 0x66, 0x2e,
-  0x2e, 0x18, 0xc6, 0x4e, 0x6f, 0xf5, 0xe2, 0x33, 0x1c, 0xc1, 0x1a, 0xac,
-  0x47, 0x7c, 0xb3, 0x0c, 0x78, 0xb1, 0x17, 0x81, 0xb5, 0x5e, 0x6b, 0xc4,
-  0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2,
-  0xb1, 0x22, 0x88, 0x4f, 0x11, 0x31, 0x18, 0xe8, 0x70, 0x43, 0xf0, 0x82,
-  0x01, 0x18, 0xcc, 0x32, 0xe4, 0x85, 0x5e, 0x04, 0x36, 0xd4, 0x1e, 0x7c,
-  0x66, 0x09, 0xfe, 0xc2, 0x64, 0x8f, 0x88, 0xcf, 0x2c, 0xc1, 0x5f, 0x0c,
-  0x47, 0xdc, 0xc6, 0xec, 0x09, 0xdf, 0x2c, 0x03, 0x5f, 0xfc, 0x45, 0x60,
-  0xb8, 0x41, 0x7b, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0,
-  0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0x0f, 0x06, 0x3a,
-  0xdc, 0x10, 0xe8, 0x60, 0x00, 0x06, 0xb3, 0x0c, 0x7d, 0xe1, 0x17, 0x81,
-  0xf1, 0xde, 0x10, 0x9f, 0x59, 0x82, 0xbf, 0x30, 0x22, 0xfc, 0xe0, 0x33,
-  0x4b, 0xf0, 0x17, 0x03, 0x2d, 0x8f, 0x96, 0x17, 0x98, 0x5e, 0x10, 0x7d,
-  0x21, 0xf8, 0x05, 0xe9, 0xec, 0xc5, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x6d,
-  0x4f, 0x5d, 0xeb, 0x0d, 0x73, 0xe4, 0x36, 0xcc, 0x11, 0xc3, 0x1c, 0x31,
-  0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x78, 0x6a, 0x18, 0xe8, 0x60,
-  0x90, 0x7f, 0x67, 0x18, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c,
-  0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x6c, 0x71, 0x18, 0x84, 0x61, 0x90, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x72, 0x18, 0x88, 0x61, 0x90, 0x10,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x73, 0x18, 0x8c, 0x61,
-  0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x7b, 0x18,
-  0x88, 0x61, 0x30, 0x82, 0x41, 0xe0, 0x86, 0xc1, 0x0c, 0x06, 0x70, 0x18,
-  0x8c, 0x26, 0x04, 0xc0, 0x05, 0x4f, 0xcd, 0x12, 0x80, 0xc6, 0x40, 0xcb,
-  0x63, 0x1a, 0x6b, 0x81, 0x86, 0x82, 0x5a, 0xb0, 0x44, 0x5b, 0x08, 0x7f,
-  0x81, 0x86, 0x82, 0x5b, 0x98, 0x7e, 0xd0, 0x60, 0x00, 0x9f, 0x59, 0x86,
-  0xd0, 0x18, 0x0d, 0xfb, 0x18, 0x8e, 0x08, 0x6c, 0x30, 0x18, 0xbe, 0x13,
-  0x86, 0x19, 0x6e, 0x08, 0x42, 0x30, 0x20, 0x83, 0x1a, 0x02, 0x1d, 0x8e,
-  0xe0, 0x0f, 0x1d, 0x0c, 0x86, 0xaf, 0x02, 0x41, 0xcf, 0x3f, 0x86, 0x19,
-  0x6e, 0x08, 0x48, 0x30, 0x20, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0x10, 0x8d,
-  0xdb, 0x08, 0x0e, 0xfd, 0x86, 0xb9, 0x7f, 0x1b, 0x66, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0xbc, 0x51, 0x0c, 0xe6, 0x30, 0x90, 0xc1, 0x00, 0x14,
-  0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18,
-  0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
-  0x4d, 0x15, 0x03, 0x3d, 0x0c, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0x6d, 0x15, 0x83, 0x3d, 0x0c, 0x18, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x83, 0x8d, 0x15, 0x03, 0x3e, 0x0c, 0x24, 0x22, 0x18,
-  0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x85, 0x16, 0x83, 0x3d, 0x0c, 0x78,
-  0x30, 0x08, 0x4e, 0x31, 0x60, 0xc3, 0x20, 0x15, 0x83, 0xd1, 0x84, 0x00,
-  0xb8, 0xe0, 0xa9, 0x59, 0x82, 0xdb, 0x18, 0x6e, 0x70, 0x91, 0x56, 0x0c,
-  0xc0, 0x60, 0x96, 0x81, 0x34, 0x4a, 0x23, 0xa8, 0x1b, 0x0c, 0xfc, 0x30,
-  0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xca, 0x16,
-  0x83, 0x3f, 0x0c, 0x36, 0x38, 0x0c, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
-  0x80, 0xba, 0xc5, 0xe0, 0x0f, 0x83, 0x40, 0xb8, 0x60, 0x98, 0xd2, 0xc1,
-  0x60, 0x14, 0x03, 0xb8, 0xe0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30,
-  0xa0, 0x76, 0x31, 0x20, 0xc5, 0xe0, 0x46, 0xea, 0x30, 0x18, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x03, 0x8a, 0x17, 0x03, 0x52, 0x0c, 0x02, 0xe1, 0x82,
-  0x61, 0x2e, 0x78, 0xea, 0x8e, 0xa7, 0x6e, 0x04, 0x83, 0x61, 0x4e, 0xe7,
-  0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x03, 0x0f, 0x1c, 0x03, 0x58, 0x0c, 0xde, 0x30, 0xe8, 0xc5, 0x60, 0x34,
-  0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88,
-  0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x3b, 0xc7,
-  0xe0, 0x16, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60,
-  0x43, 0xc7, 0x00, 0x17, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04,
-  0xc1, 0x60, 0x4b, 0xc7, 0x20, 0x17, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x14,
-  0x00, 0x04, 0xc1, 0x60, 0x89, 0xc7, 0x00, 0x17, 0x83, 0x3c, 0x0c, 0x02,
-  0x72, 0x0c, 0x52, 0x31, 0x30, 0xc7, 0x60, 0x34, 0x21, 0x00, 0x2e, 0x78,
-  0x6a, 0x96, 0xe0, 0x36, 0x86, 0x1b, 0xd6, 0x24, 0x1d, 0x03, 0x30, 0x98,
-  0x65, 0x30, 0x8d, 0xdb, 0x08, 0x6c, 0x0d, 0x83, 0x36, 0x0c, 0xe2, 0x33,
-  0x1c, 0x71, 0x07, 0x6e, 0x18, 0x10, 0xdf, 0x2c, 0xc3, 0x69, 0xa8, 0x46,
-  0x60, 0x6f, 0x18, 0xe0, 0x41, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30,
-  0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x31, 0x8f,
-  0x81, 0x0e, 0x37, 0x04, 0xf1, 0x18, 0x80, 0xc1, 0x2c, 0x03, 0x6a, 0xa4,
-  0x46, 0x60, 0xc3, 0x1d, 0x06, 0xf0, 0x99, 0x25, 0x70, 0x0d, 0xb3, 0xc3,
-  0x80, 0x88, 0xcf, 0x2c, 0x81, 0x6b, 0x0c, 0x47, 0x88, 0xc2, 0x1d, 0x06,
-  0xc2, 0x37, 0xcb, 0xb0, 0x1a, 0xae, 0x11, 0xd8, 0x28, 0xe0, 0x61, 0x10,
+  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x2d, 0x0f, 0x85, 0x34, 0x14, 0x12,
+  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x4d, 0x0f, 0x05, 0x35,
+  0x14, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x6d, 0x0f,
+  0x85, 0x35, 0x14, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83,
+  0x65, 0x14, 0x05, 0x35, 0x14, 0x56, 0x50, 0x08, 0xec, 0x50, 0xd8, 0x41,
+  0x01, 0x0f, 0x85, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x82, 0x97,
+  0x19, 0x6e, 0xe8, 0xcf, 0x60, 0x0f, 0x05, 0x30, 0x98, 0x65, 0xf0, 0x97,
+  0x97, 0x09, 0xac, 0xff, 0x83, 0xff, 0x0f, 0xe2, 0x33, 0x1c, 0x91, 0x82,
+  0x01, 0x08, 0x0a, 0xc4, 0x37, 0xcb, 0xf0, 0x2f, 0x22, 0x13, 0x58, 0x08,
+  0x0a, 0x2a, 0x18, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1,
+  0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xa5, 0x28, 0xe8,
+  0x70, 0x43, 0x30, 0x8a, 0x02, 0x18, 0xcc, 0x32, 0x80, 0x4c, 0xc8, 0x04,
+  0x36, 0xa4, 0xa0, 0x00, 0x9f, 0x59, 0x02, 0x93, 0x31, 0x14, 0x14, 0x88,
+  0xf8, 0xcc, 0x12, 0x98, 0xcc, 0x70, 0x04, 0x0d, 0x06, 0x29, 0x28, 0x08,
+  0xdf, 0x2c, 0xc3, 0xc8, 0x98, 0x4c, 0x60, 0x35, 0x18, 0xa8, 0xa0, 0x10,
   0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9,
-  0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xf8, 0x63, 0xa0, 0xc3, 0x0d, 0x01, 0x3f,
-  0x06, 0x60, 0x30, 0xcb, 0xc0, 0x1a, 0xad, 0x11, 0x18, 0x28, 0x06, 0x43,
-  0x7c, 0x66, 0x09, 0x5c, 0xc3, 0x88, 0x51, 0x0c, 0xe0, 0x33, 0x4b, 0xe0,
-  0x1a, 0x03, 0x2d, 0x8f, 0x86, 0x1a, 0x58, 0x6a, 0x10, 0xac, 0x21, 0xb4,
-  0x86, 0x4e, 0xa8, 0xc6, 0x05, 0xc3, 0x98, 0x28, 0x06, 0xa6, 0x18, 0xc4,
-  0x67, 0x38, 0xe2, 0x54, 0x4e, 0x31, 0x20, 0xbe, 0x59, 0x86, 0xd7, 0x90,
-  0x8d, 0xc0, 0x50, 0x31, 0x40, 0x95, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82,
-  0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x82,
-  0x25, 0x03, 0x1d, 0x6e, 0x08, 0x54, 0x32, 0x00, 0x83, 0x59, 0x06, 0xd8,
-  0x88, 0x8d, 0xc0, 0x06, 0x58, 0x0c, 0xe0, 0x33, 0x4b, 0x60, 0x1b, 0xd6,
-  0x8a, 0x01, 0x11, 0x9f, 0x59, 0x02, 0xdb, 0x18, 0x8e, 0x90, 0x15, 0x57,
-  0x0c, 0x84, 0x6f, 0x96, 0x61, 0x36, 0x6c, 0x23, 0xb0, 0x59, 0x79, 0xc5,
-  0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20,
-  0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xb8, 0xc9, 0x40, 0x87, 0x1b, 0x82,
-  0x9a, 0x0c, 0xc0, 0x60, 0x96, 0x81, 0x36, 0x6a, 0x23, 0xb0, 0x5b, 0x0c,
-  0x86, 0xf8, 0xcc, 0x12, 0xd8, 0x86, 0x11, 0xbc, 0x18, 0xc0, 0x67, 0x96,
-  0xc0, 0x36, 0x06, 0x5a, 0x1e, 0x0d, 0x36, 0xb0, 0xd8, 0x20, 0x68, 0x43,
-  0xa8, 0x0d, 0xbd, 0x92, 0x8d, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e,
-  0x3a, 0x54, 0x0c, 0x86, 0xb9, 0xdf, 0x1b, 0xe6, 0x88, 0x61, 0x8e, 0x18,
-  0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc, 0xb2, 0x0c, 0x6a, 0x32,
-  0xa0, 0xc7, 0x40, 0x2c, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82,
-  0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x83, 0x8d, 0x2d, 0x03, 0x9e, 0x0c, 0x12, 0x22, 0x18,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xad, 0x2d, 0x83, 0x9e, 0x0c, 0x12,
-  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xcd, 0x2d, 0x03, 0x9f,
-  0x0c, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0xc5, 0x2e,
-  0x83, 0x9e, 0x0c, 0xfc, 0x31, 0x08, 0xd2, 0x32, 0x70, 0xc9, 0x60, 0x2d,
-  0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x82, 0xdb, 0x18, 0x68,
-  0x79, 0x4c, 0x43, 0x34, 0x7c, 0x54, 0x08, 0x0d, 0x96, 0x20, 0x0d, 0xc1,
-  0x36, 0x7c, 0x54, 0x28, 0x0d, 0xab, 0x97, 0x92, 0x0c, 0xe0, 0x33, 0xcb,
-  0x80, 0x1b, 0xba, 0x11, 0x2f, 0xc3, 0x11, 0x41, 0x4c, 0x06, 0xc3, 0x77,
-  0xc2, 0x30, 0xc3, 0x0d, 0x01, 0x3f, 0x06, 0x64, 0x50, 0x43, 0xa0, 0xc3,
-  0x11, 0xf7, 0x52, 0x93, 0xc1, 0xf0, 0x55, 0x20, 0xe8, 0xe5, 0xcb, 0x30,
-  0xc3, 0x0d, 0xc1, 0x3f, 0x06, 0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c, 0xb9,
-  0xe1, 0x1e, 0xc1, 0x8d, 0x63, 0x30, 0xcc, 0xe9, 0xdf, 0x30, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0xe0, 0xf9, 0x65, 0xe0, 0x96, 0x41, 0x4b, 0x06,
-  0x7b, 0x19, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c,
-  0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x6c, 0xa5, 0x19, 0xd4, 0x65, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x6c, 0xa6, 0x19, 0xd8, 0x65, 0xc0, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xa7, 0x19, 0xdc, 0x65, 0x20, 0x11,
-  0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0xaf, 0x19, 0xd8, 0x65,
-  0x70, 0x93, 0x41, 0x20, 0x9a, 0xc1, 0x59, 0x06, 0xa4, 0x19, 0x8c, 0x26,
-  0x04, 0xc0, 0x05, 0x4f, 0xcd, 0x12, 0xb8, 0xc7, 0x70, 0x43, 0xca, 0xa0,
-  0x66, 0x00, 0x06, 0xb3, 0x0c, 0xbb, 0xc1, 0x1b, 0x41, 0xc9, 0x64, 0x90,
-  0x97, 0x01, 0x5c, 0xf0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50,
-  0xb1, 0x19, 0xe8, 0x65, 0xb0, 0xad, 0x65, 0x30, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0x94, 0x6c, 0x06, 0x7a, 0x19, 0x04, 0xc2, 0x05, 0xc3, 0x54,
-  0x4d, 0x06, 0x7e, 0x19, 0xc0, 0x05, 0x4f, 0x8d, 0x18, 0x1c, 0x00, 0x08,
-  0x82, 0x01, 0x65, 0x9b, 0xc1, 0x5f, 0x06, 0x32, 0x03, 0x97, 0xc1, 0x88,
-  0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xb7, 0x19, 0xfc, 0x65, 0x10, 0x08,
-  0x17, 0x0c, 0x73, 0xc1, 0x53, 0x77, 0x3c, 0x75, 0xfe, 0x18, 0x0c, 0x73,
-  0x35, 0x18, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00,
-  0x20, 0x08, 0x06, 0xde, 0x6e, 0x06, 0xab, 0x19, 0xa8, 0x65, 0x80, 0x9b,
-  0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c,
-  0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1,
-  0x26, 0x9e, 0x81, 0x6c, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0xc1, 0x36, 0x9e, 0xc1, 0x6c, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0xc1, 0x46, 0x9e, 0x01, 0x6d, 0x06, 0x09, 0x11, 0x8c,
-  0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0xc2, 0x9e, 0xc1, 0x6c, 0x06, 0x74,
-  0x19, 0x04, 0xbf, 0x19, 0x90, 0x66, 0x10, 0x9e, 0xc1, 0x68, 0x42, 0x00,
-  0x5c, 0xf0, 0xd4, 0x2c, 0x81, 0x7b, 0x0c, 0x37, 0x98, 0x0d, 0x79, 0x06,
-  0x60, 0x30, 0xcb, 0xd0, 0x1b, 0xee, 0x11, 0x98, 0x59, 0x06, 0x68, 0x19,
-  0xc4, 0x67, 0x38, 0xe2, 0x0e, 0xd2, 0x32, 0x20, 0xbe, 0x59, 0x06, 0xdf,
-  0x08, 0x8f, 0xc0, 0xd4, 0x32, 0xc0, 0x83, 0xf8, 0x58, 0x30, 0xd0, 0xe7,
-  0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29,
-  0xc2, 0x3d, 0x03, 0x1d, 0x6e, 0x08, 0xd8, 0x33, 0x00, 0x83, 0x59, 0x86,
-  0xdf, 0x00, 0x8f, 0xc0, 0x06, 0xb9, 0x0c, 0xe0, 0x33, 0x4b, 0x50, 0x1e,
-  0x16, 0x97, 0x01, 0x11, 0x9f, 0x59, 0x82, 0xf2, 0x18, 0x8e, 0x10, 0x05,
-  0xb9, 0x0c, 0x84, 0x6f, 0x96, 0x41, 0x3c, 0xca, 0x23, 0xb0, 0x51, 0x98,
-  0xcb, 0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2,
-  0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xc8, 0xcf, 0x40, 0x87, 0x1b,
-  0x82, 0xfb, 0x0c, 0xc0, 0x60, 0x96, 0x61, 0x3c, 0xc8, 0x23, 0xb0, 0xbd,
-  0x0c, 0x86, 0xf8, 0xcc, 0x12, 0x94, 0x87, 0x11, 0x7e, 0x19, 0xc0, 0x67,
-  0x96, 0xa0, 0x3c, 0x06, 0x5a, 0x1e, 0xed, 0x37, 0x30, 0xf0, 0x20, 0xc6,
-  0x43, 0x20, 0x0f, 0x9d, 0x08, 0x8f, 0x0b, 0x86, 0xb1, 0xbe, 0x0c, 0x42,
-  0x33, 0x88, 0xcf, 0x70, 0x84, 0xe8, 0x88, 0x66, 0x40, 0x7c, 0xb3, 0x0c,
-  0xe6, 0x91, 0x1e, 0x81, 0x8d, 0x66, 0x30, 0x3a, 0xf1, 0xb1, 0x60, 0xa0,
-  0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2,
-  0x53, 0xc4, 0x89, 0x06, 0x3a, 0xdc, 0x10, 0x94, 0x68, 0x00, 0x06, 0xb3,
-  0x0c, 0xe7, 0x81, 0x1e, 0x81, 0x0d, 0xab, 0x19, 0xc0, 0x67, 0x96, 0xa0,
-  0x3d, 0x0c, 0x35, 0x03, 0x22, 0x3e, 0xb3, 0x04, 0xed, 0x31, 0x1c, 0xd1,
-  0x3a, 0xa9, 0x19, 0x08, 0xdf, 0x2c, 0x83, 0x7a, 0xb4, 0x47, 0x60, 0xae,
-  0xa3, 0x9a, 0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c,
-  0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x21, 0xa3, 0x81, 0x0e,
-  0x37, 0x04, 0x30, 0x1a, 0x80, 0xc1, 0x2c, 0xc3, 0x7a, 0xb0, 0x47, 0x60,
-  0xb2, 0x19, 0x0c, 0xf1, 0x99, 0x25, 0x68, 0x0f, 0x23, 0x6e, 0x33, 0x80,
-  0xcf, 0x2c, 0x41, 0x7b, 0x0c, 0xb4, 0x3c, 0xda, 0x79, 0x60, 0xe8, 0x41,
-  0xac, 0x87, 0xc0, 0x1e, 0x70, 0x97, 0x1e, 0x17, 0x0c, 0x73, 0xc1, 0x53,
-  0xb7, 0x3d, 0x75, 0xa3, 0x19, 0x0c, 0x73, 0xba, 0x18, 0x0c, 0x73, 0xc4,
-  0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x1e, 0x98,
-  0x06, 0x30, 0x1a, 0xbc, 0x67, 0xd0, 0xa3, 0xc1, 0x68, 0x42, 0x00, 0x8c,
-  0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x76, 0xa6, 0xc1, 0x8d, 0x06,
-  0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x86, 0xa6, 0x01,
-  0x8e, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x96,
-  0xa6, 0x41, 0x8e, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82,
-  0xc1, 0x12, 0xa7, 0x01, 0x8e, 0x06, 0xf9, 0x19, 0x04, 0x64, 0x1a, 0xa4,
-  0x68, 0x60, 0xa6, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd4, 0x2c, 0x81,
-  0x7b, 0x0c, 0xb4, 0x3c, 0xa6, 0x91, 0x1b, 0x34, 0x2c, 0xe0, 0x06, 0x4b,
-  0xec, 0x86, 0xd0, 0x1e, 0x34, 0x2c, 0xf0, 0xc6, 0x2c, 0xc3, 0x7b, 0xc4,
-  0xc7, 0xfa, 0x0c, 0x47, 0xbc, 0xcf, 0x8a, 0x06, 0xc3, 0x77, 0xf0, 0x33,
-  0xcc, 0x70, 0x43, 0x60, 0x9f, 0x01, 0x19, 0xd4, 0x10, 0xe8, 0x70, 0x84,
-  0xfc, 0xbc, 0x68, 0x30, 0x7c, 0x15, 0x08, 0x7a, 0xf4, 0x33, 0xcc, 0x70,
-  0x43, 0x90, 0x9f, 0x01, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0x03, 0x7c, 0x94,
-  0x48, 0x70, 0xbd, 0x19, 0x0c, 0x73, 0xf4, 0x18, 0x0c, 0x33, 0x62, 0x70,
-  0x00, 0x20, 0x08, 0x06, 0x1e, 0x9e, 0x06, 0x68, 0x1a, 0x9c, 0x68, 0x50,
-  0xa7, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20,
+  0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xc0, 0xa2, 0xa0, 0xc3, 0x0d, 0x81, 0x2b,
+  0x0a, 0x60, 0x30, 0xcb, 0x40, 0x32, 0x25, 0x13, 0x98, 0x0c, 0x0a, 0x43,
+  0x7c, 0x66, 0x09, 0x4c, 0xc6, 0x88, 0x1a, 0x14, 0xe0, 0x33, 0x4b, 0x60,
+  0x32, 0x03, 0x2d, 0x8f, 0x06, 0x32, 0x58, 0xc8, 0x10, 0x24, 0x23, 0x94,
+  0x8c, 0x1f, 0x0a, 0x22, 0x73, 0xc1, 0x30, 0x46, 0x83, 0x02, 0x0e, 0x0a,
+  0xf1, 0x19, 0x8e, 0x20, 0x95, 0x1c, 0x14, 0x88, 0x6f, 0x96, 0xe1, 0x64,
+  0x54, 0x26, 0x30, 0x1d, 0x14, 0x4a, 0x25, 0x3e, 0x16, 0x0c, 0xf4, 0xb9,
+  0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a,
+  0xf0, 0x45, 0x41, 0x87, 0x1b, 0x02, 0x5e, 0x14, 0xc0, 0x60, 0x96, 0x01,
+  0x65, 0x52, 0x26, 0xb0, 0x41, 0x0c, 0x05, 0xf8, 0xcc, 0x12, 0xb8, 0x8c,
+  0xfd, 0xa0, 0x40, 0xc4, 0x67, 0x96, 0xc0, 0x65, 0x86, 0x23, 0x5e, 0x05,
+  0x0c, 0x05, 0xe1, 0x9b, 0x65, 0x58, 0x19, 0x97, 0x09, 0x0c, 0x56, 0xc2,
+  0x50, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c,
+  0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xd2, 0x51, 0xd0, 0xe1, 0x86,
+  0xe0, 0x1c, 0x05, 0x30, 0x98, 0x65, 0x60, 0x99, 0x96, 0x09, 0x2c, 0x0d,
+  0x85, 0x21, 0x3e, 0xb3, 0x04, 0x2e, 0x63, 0x84, 0x1b, 0x0a, 0xf0, 0x99,
+  0x25, 0x70, 0x99, 0x81, 0x96, 0x47, 0x43, 0x19, 0x2c, 0x65, 0x08, 0x96,
+  0x11, 0x5a, 0x86, 0xae, 0x54, 0xe6, 0x82, 0x61, 0x2e, 0x78, 0xea, 0xb6,
+  0xa7, 0x4e, 0x07, 0x85, 0x61, 0x2e, 0xee, 0x83, 0x61, 0x8e, 0x18, 0xe6,
+  0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xc0, 0xbb, 0x47, 0xe1,
+  0x1c, 0x05, 0x53, 0x14, 0xe8, 0x51, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04,
+  0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11,
+  0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xfc, 0x51, 0x70, 0x47, 0x21, 0x21,
+  0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0xfe, 0x51, 0x78, 0x47,
+  0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0x40, 0x52,
+  0x80, 0x47, 0x21, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58,
+  0x50, 0x52, 0x78, 0x47, 0x01, 0x16, 0x85, 0x60, 0x1f, 0x05, 0x70, 0x14,
+  0xfa, 0x51, 0x18, 0x4d, 0x08, 0x80, 0x0b, 0x9e, 0x9a, 0x25, 0x78, 0x99,
+  0x81, 0x96, 0xc7, 0x34, 0xf4, 0x45, 0x34, 0x89, 0x7c, 0x61, 0x09, 0x7e,
+  0x11, 0x5c, 0x46, 0x34, 0x89, 0x7e, 0x99, 0x65, 0x80, 0x19, 0x99, 0x11,
+  0xd5, 0x60, 0x38, 0x42, 0xf6, 0xc4, 0x51, 0x18, 0xbe, 0x9b, 0xbd, 0x61,
+  0x86, 0x1b, 0x82, 0x56, 0x14, 0xc8, 0xa0, 0x86, 0x40, 0x87, 0x23, 0xe6,
+  0xc5, 0x1c, 0x85, 0xe1, 0xab, 0x40, 0xd0, 0xab, 0x97, 0x61, 0x86, 0x1b,
+  0x02, 0x58, 0x14, 0xc8, 0xa0, 0x82, 0x41, 0x67, 0x19, 0x62, 0xc6, 0x6c,
+  0x82, 0xa3, 0x43, 0x61, 0x98, 0x5b, 0xfd, 0x60, 0x98, 0x11, 0x83, 0x03,
+  0x00, 0x41, 0x30, 0xf0, 0x5e, 0x52, 0xf8, 0x47, 0xc1, 0x17, 0x05, 0x96,
+  0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61,
+  0x34, 0x81, 0x18, 0x8a, 0x38, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
+  0x36, 0x9b, 0x14, 0x4c, 0x52, 0x38, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40,
+  0x10, 0x0c, 0xb6, 0x9b, 0x14, 0x4e, 0x52, 0x60, 0x88, 0x60, 0xc4, 0x00,
+  0x01, 0x40, 0x10, 0x0c, 0x36, 0x9c, 0x14, 0x50, 0x52, 0x90, 0x88, 0x60,
+  0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x16, 0xb0, 0x14, 0x4e, 0x52, 0x40,
+  0x47, 0x21, 0x98, 0x49, 0x01, 0x1f, 0x85, 0x9a, 0x14, 0x46, 0x13, 0x02,
+  0xe0, 0x82, 0xa7, 0x66, 0x09, 0xcc, 0x66, 0xb8, 0x41, 0x57, 0x83, 0x9c,
+  0x14, 0xc0, 0x60, 0x96, 0x61, 0x66, 0x68, 0x26, 0xa8, 0x71, 0x14, 0x54,
+  0x52, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x4a,
+  0x2c, 0x85, 0x95, 0x14, 0xd8, 0x8f, 0x1f, 0x85, 0x11, 0x83, 0x03, 0x00,
+  0x41, 0x30, 0xa0, 0xc6, 0x52, 0x58, 0x49, 0x21, 0x10, 0x2e, 0x18, 0xa6,
+  0xcc, 0x51, 0x78, 0x49, 0x01, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40,
+  0x10, 0x0c, 0xa8, 0xb3, 0x14, 0x60, 0x52, 0x70, 0x99, 0x90, 0x14, 0x46,
+  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x42, 0x4b, 0x01, 0x26, 0x85, 0x40,
+  0xb8, 0x60, 0x98, 0x0b, 0x9e, 0xba, 0xe3, 0xa9, 0x7b, 0x45, 0x61, 0x98,
+  0x33, 0xff, 0x60, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03,
+  0x00, 0x41, 0x30, 0xf0, 0xd8, 0x52, 0xe0, 0x49, 0x61, 0x1f, 0x85, 0xb4,
+  0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61,
+  0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
+  0xb6, 0xb9, 0x14, 0xc6, 0x52, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40,
+  0x10, 0x0c, 0x36, 0xba, 0x14, 0xc8, 0x52, 0x48, 0x88, 0x60, 0xc4, 0x00,
+  0x01, 0x40, 0x10, 0x0c, 0xb6, 0xba, 0x14, 0xca, 0x52, 0x48, 0x88, 0x60,
+  0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x96, 0xbe, 0x14, 0xc8, 0x52, 0x28,
+  0x49, 0x21, 0x80, 0x4b, 0xa1, 0x26, 0x05, 0xb9, 0x14, 0x46, 0x13, 0x02,
+  0xe0, 0x82, 0xa7, 0x66, 0x09, 0xcc, 0x66, 0xb8, 0xe1, 0x5e, 0x83, 0xba,
+  0x14, 0xc0, 0x60, 0x96, 0xa1, 0x66, 0xcc, 0x26, 0xb0, 0x7b, 0x14, 0xf2,
+  0x51, 0x88, 0xcf, 0x70, 0x04, 0x0a, 0x06, 0xfa, 0x28, 0x10, 0xdf, 0x2c,
+  0x83, 0xcd, 0xe4, 0x4c, 0x60, 0xfb, 0x28, 0xa4, 0x60, 0x10, 0x1f, 0x0b,
+  0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a,
+  0x20, 0x3e, 0x45, 0xfc, 0xa5, 0xa0, 0xc3, 0x0d, 0x41, 0x5f, 0x0a, 0x60,
+  0x30, 0xcb, 0x70, 0x33, 0x38, 0x13, 0xd8, 0x30, 0x92, 0x02, 0x7c, 0x66,
+  0x09, 0x7a, 0xc6, 0x44, 0x52, 0x20, 0xe2, 0x33, 0x4b, 0xd0, 0x33, 0xc3,
+  0x11, 0x33, 0x18, 0x8c, 0xa4, 0x20, 0x7c, 0xb3, 0x0c, 0x3a, 0xd3, 0x33,
+  0x81, 0xd1, 0x60, 0x40, 0x92, 0x42, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1,
+  0x30, 0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xa1,
+  0x9a, 0x82, 0x0e, 0x37, 0x04, 0xa8, 0x29, 0x80, 0xc1, 0x2c, 0xc3, 0xce,
+  0xf0, 0x4c, 0x60, 0x2c, 0x29, 0x0c, 0xf1, 0x99, 0x25, 0xe8, 0x19, 0x23,
+  0x5e, 0x52, 0x80, 0xcf, 0x2c, 0x41, 0xcf, 0x0c, 0xb4, 0x3c, 0xda, 0xcd,
+  0x60, 0x38, 0x43, 0xec, 0x8c, 0xc0, 0x33, 0x7c, 0x28, 0xe4, 0xcc, 0x05,
+  0xc3, 0x98, 0x4b, 0x0a, 0x32, 0x29, 0xc4, 0x67, 0x38, 0xc2, 0x6f, 0x66,
+  0x52, 0x20, 0xbe, 0x59, 0x06, 0x9f, 0x09, 0x9b, 0xc0, 0x68, 0x52, 0xf8,
+  0x9b, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02,
+  0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x02, 0x37, 0x05, 0x1d, 0x6e, 0x08,
+  0x6c, 0x53, 0x00, 0x83, 0x59, 0x86, 0x9f, 0x01, 0x9b, 0xc0, 0x06, 0x9e,
+  0x14, 0xe0, 0x33, 0x4b, 0x50, 0x36, 0x96, 0x93, 0x02, 0x11, 0x9f, 0x59,
+  0x82, 0xb2, 0x19, 0x8e, 0x48, 0x1d, 0x9d, 0x14, 0x84, 0x6f, 0x96, 0x41,
+  0x6c, 0xca, 0x26, 0x30, 0xd5, 0xd9, 0x49, 0x21, 0x3e, 0x16, 0x38, 0xf4,
+  0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c,
+  0x8a, 0x18, 0x4f, 0x41, 0x87, 0x1b, 0x82, 0xf0, 0x14, 0xc0, 0x60, 0x96,
+  0x61, 0x6c, 0xc8, 0x26, 0xb0, 0xb1, 0x14, 0x86, 0xf8, 0xcc, 0x12, 0x94,
+  0x8d, 0x11, 0x68, 0x29, 0xc0, 0x67, 0x96, 0xa0, 0x6c, 0x06, 0x5a, 0x1e,
+  0xed, 0x67, 0x30, 0xb0, 0x21, 0xc6, 0x46, 0x20, 0x1b, 0xb4, 0x0b, 0x9b,
+  0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e, 0x3a, 0x9a, 0x14, 0x86, 0xb9,
+  0x35, 0x14, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00,
+  0x10, 0x04, 0x03, 0x2f, 0x3e, 0x85, 0xf0, 0x14, 0x40, 0x53, 0x70, 0x4f,
+  0x61, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46,
+  0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60,
+  0xc3, 0x4f, 0x01, 0x3d, 0x85, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04,
+  0xc1, 0x60, 0xcb, 0x4f, 0x21, 0x3d, 0x85, 0x84, 0x08, 0x46, 0x0c, 0x10,
+  0x00, 0x04, 0xc1, 0x60, 0xd3, 0x4f, 0x41, 0x3d, 0x85, 0x84, 0x08, 0x46,
+  0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x11, 0x51, 0x21, 0x3d, 0x05, 0xd5,
+  0x14, 0x82, 0xfa, 0x14, 0x74, 0x53, 0xb8, 0x4f, 0x61, 0x34, 0x21, 0x00,
+  0x2e, 0x78, 0x6a, 0x96, 0xc0, 0x6c, 0x06, 0x5a, 0x1e, 0xd3, 0x88, 0x19,
+  0xbb, 0x25, 0x60, 0x86, 0x25, 0x66, 0x46, 0x28, 0x1b, 0xbb, 0x25, 0x68,
+  0xc6, 0xfe, 0x36, 0x88, 0x4d, 0x01, 0x3e, 0xb3, 0x0c, 0x67, 0x93, 0x36,
+  0x7d, 0x1b, 0x0c, 0x47, 0x84, 0x6e, 0xd0, 0x9b, 0xc2, 0xf0, 0x9d, 0xe8,
+  0x06, 0xc3, 0x0c, 0x37, 0x04, 0xa8, 0x29, 0x90, 0x41, 0x0d, 0x81, 0x0e,
+  0x47, 0x14, 0xe1, 0x29, 0x0c, 0x5f, 0x05, 0x82, 0xde, 0x31, 0xcc, 0x70,
+  0x43, 0xb0, 0x9a, 0x02, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0x03, 0xda, 0xf4,
+  0x4d, 0x70, 0x6f, 0x29, 0x0c, 0x73, 0xa6, 0x28, 0x0c, 0x33, 0x62, 0x70,
+  0x00, 0x20, 0x08, 0x06, 0x9e, 0x8a, 0x0a, 0xfa, 0x29, 0xe4, 0xa6, 0x70,
+  0xa2, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20,
   0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0xc1, 0xf6, 0xa7, 0xc1, 0x9b, 0x06, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0xc1, 0x06, 0xaa, 0x01, 0x9c, 0x06, 0x0c, 0x11, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0xc1, 0x16, 0xaa, 0x41, 0x9c, 0x06, 0x12, 0x11,
-  0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x92, 0xaa, 0x01, 0x9c, 0x06,
-  0x31, 0x1a, 0x04, 0x7c, 0x1a, 0x84, 0x69, 0xe0, 0xa7, 0xc1, 0x68, 0x42,
-  0x00, 0x5c, 0xf0, 0xd4, 0x2c, 0x41, 0x89, 0x0c, 0x37, 0x8c, 0x90, 0xa8,
-  0x06, 0x60, 0x30, 0xcb, 0x20, 0x1f, 0xf3, 0x11, 0x14, 0x8b, 0x06, 0x73,
-  0x1a, 0xc0, 0x05, 0x4f, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0xb5,
-  0xaa, 0x01, 0x9d, 0x06, 0x29, 0x54, 0xa6, 0xc1, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x50, 0xac, 0x1a, 0xd0, 0x69, 0x10, 0x08, 0x17, 0x0c, 0x53,
-  0x2f, 0x1a, 0xe0, 0x69, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0x14, 0xac, 0x06, 0x79, 0x1a, 0xb4, 0x90, 0x9a, 0x06, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xc5, 0x6a, 0x90, 0xa7, 0x41, 0x20,
-  0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf1, 0xd4, 0xe1, 0x67, 0x30, 0xcc,
-  0xbd, 0x64, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01,
-  0x80, 0x20, 0x18, 0x78, 0xb5, 0x1a, 0x94, 0x6a, 0x40, 0xa6, 0x81, 0xac,
-  0x06, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30,
-  0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x1b, 0xaf, 0x06, 0xac, 0x1a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x5b, 0xaf, 0x06, 0xad, 0x1a, 0x24, 0x44, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0x9b, 0xaf, 0x06, 0xae, 0x1a, 0x24, 0x44, 0x30,
-  0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x8b, 0xb9, 0x06, 0xad, 0x1a, 0xb8,
-  0x69, 0x10, 0xe4, 0x6a, 0xe0, 0xa7, 0xc1, 0xae, 0x06, 0xa3, 0x09, 0x01,
-  0x70, 0xc1, 0x53, 0xb3, 0x04, 0x25, 0x32, 0xdc, 0x00, 0x46, 0xbe, 0x1a,
-  0x80, 0xc1, 0x2c, 0x03, 0x7d, 0x94, 0x48, 0x60, 0x60, 0x1a, 0x88, 0x69,
-  0x10, 0x9f, 0xe1, 0x88, 0x32, 0x1a, 0xd3, 0x80, 0xf8, 0x66, 0x19, 0xea,
-  0x03, 0x3f, 0x02, 0x23, 0xd3, 0xc0, 0x8c, 0xe2, 0x63, 0xc1, 0x40, 0x9f,
-  0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7,
-  0x08, 0x74, 0x0d, 0x74, 0xb8, 0x21, 0x30, 0xd7, 0x00, 0x0c, 0x66, 0x19,
-  0xec, 0xe3, 0x3e, 0x02, 0x1b, 0xd8, 0x34, 0x80, 0xcf, 0x2c, 0x01, 0x7f,
-  0xd8, 0x9a, 0x06, 0x44, 0x7c, 0x66, 0x09, 0xf8, 0x63, 0x38, 0x02, 0x8e,
-  0xd8, 0x34, 0x10, 0xbe, 0x59, 0x86, 0xfc, 0xe0, 0x8f, 0xc0, 0xe2, 0xa8,
-  0x4d, 0x83, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca,
-  0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x62, 0x5e, 0x03, 0x1d, 0x6e,
-  0x08, 0xe2, 0x35, 0x00, 0x83, 0x59, 0x06, 0xfd, 0xd8, 0x8f, 0xc0, 0xea,
-  0x34, 0x18, 0xe2, 0x33, 0x4b, 0xc0, 0x1f, 0x46, 0xe0, 0x69, 0x00, 0x9f,
-  0x59, 0x02, 0xfe, 0x18, 0x68, 0x79, 0x34, 0xfb, 0xc0, 0xee, 0x83, 0xd0,
-  0x0f, 0x61, 0x3f, 0xec, 0x31, 0xc0, 0x8f, 0x0b, 0x86, 0xb1, 0x3b, 0x0d,
-  0xf6, 0x34, 0x88, 0xcf, 0x70, 0x44, 0x1f, 0xf1, 0x69, 0x40, 0x7c, 0xb3,
-  0x0c, 0xfd, 0x01, 0x22, 0x81, 0xf5, 0x69, 0xe0, 0x47, 0xf1, 0xb1, 0x60,
-  0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08,
-  0xe2, 0x53, 0x44, 0xc8, 0x06, 0x3a, 0xdc, 0x10, 0xfc, 0x6b, 0x00, 0x06,
-  0xb3, 0x0c, 0xfe, 0xf1, 0x1f, 0x81, 0x0d, 0xa5, 0x1a, 0xc0, 0x67, 0x96,
-  0x80, 0x44, 0x4c, 0x54, 0x03, 0x22, 0x3e, 0xb3, 0x04, 0x24, 0x32, 0x1c,
-  0x81, 0x4a, 0xa3, 0x1a, 0x08, 0xdf, 0x2c, 0x43, 0x88, 0x90, 0x48, 0x60,
-  0xa9, 0x44, 0xaa, 0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17,
-  0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xc1, 0xb2, 0x81,
-  0x0e, 0x37, 0x04, 0x2a, 0x1b, 0x80, 0xc1, 0x2c, 0x83, 0x88, 0x8c, 0x48,
-  0x60, 0xac, 0x1a, 0x0c, 0xf1, 0x99, 0x25, 0x20, 0x11, 0x23, 0x62, 0x35,
-  0x80, 0xcf, 0x2c, 0x01, 0x89, 0x0c, 0xb4, 0x3c, 0x9a, 0x7f, 0x60, 0xff,
-  0x41, 0x88, 0x88, 0x30, 0x22, 0x66, 0x19, 0x80, 0xc8, 0x05, 0xc3, 0x5c,
-  0xf0, 0xd4, 0x6d, 0x4f, 0x5d, 0x9f, 0x06, 0xc3, 0x1c, 0x6d, 0x06, 0xc3,
-  0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81,
-  0xa7, 0xb3, 0x81, 0xca, 0x06, 0xe9, 0x1a, 0xdc, 0x6c, 0x30, 0x9a, 0x10,
-  0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50,
-  0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0x85, 0x6d, 0x10,
-  0xb3, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0x89,
-  0x6d, 0x20, 0xb3, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0xb0, 0x8d, 0x6d, 0x30, 0xb3, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00,
-  0x82, 0x60, 0xb0, 0xac, 0x6d, 0x20, 0xb3, 0xc1, 0xbc, 0x06, 0x81, 0xcf,
-  0x06, 0x23, 0x1b, 0x80, 0x6d, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x35,
-  0x4b, 0x50, 0x22, 0x03, 0x2d, 0x8f, 0x69, 0xc0, 0x07, 0x8a, 0x0b, 0xef,
-  0xc1, 0x12, 0xf2, 0x21, 0x90, 0x08, 0x8a, 0x0b, 0xf3, 0x31, 0xcb, 0x60,
-  0x22, 0x28, 0x52, 0x4e, 0xc3, 0x11, 0xea, 0x54, 0xb2, 0xc1, 0xf0, 0xdd,
-  0x3a, 0x0d, 0x33, 0xdc, 0x10, 0xc0, 0x6b, 0x40, 0x06, 0x35, 0x04, 0x3a,
-  0x1c, 0xc1, 0x4e, 0x29, 0x1b, 0x0c, 0x5f, 0x05, 0x82, 0x9e, 0x3b, 0x0d,
-  0x33, 0xdc, 0x10, 0xcc, 0x6b, 0x40, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0x70,
-  0x22, 0x3c, 0x12, 0xdc, 0xad, 0x06, 0xc3, 0x9c, 0x7b, 0x06, 0xc3, 0x8c,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x27, 0xb7, 0x81, 0xd8, 0x06, 0x21,
-  0x1b, 0xbc, 0x6d, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a,
-  0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0xb0, 0xe5, 0x6d, 0x90, 0xb6, 0xc1, 0x41, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0xb0, 0xe9, 0x6d, 0xa0, 0xb6, 0x01, 0x43, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xed, 0x6d, 0xb0, 0xb6, 0x81,
-  0x44, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0x8c, 0x6e, 0xa0,
-  0xb6, 0xc1, 0xca, 0x06, 0x81, 0xdd, 0x06, 0x3b, 0x1b, 0xe0, 0x6d, 0x30,
-  0x9a, 0x10, 0x00, 0x17, 0x3c, 0x35, 0x4b, 0xc0, 0x23, 0xc3, 0x0d, 0xfd,
-  0xc4, 0xb7, 0x01, 0x18, 0xcc, 0x32, 0xa4, 0x88, 0x8a, 0x04, 0x65, 0xb2,
-  0x41, 0xdb, 0x06, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0x40, 0x95, 0x6e, 0xe0, 0xb6, 0x01, 0x49, 0xfd, 0x6c, 0x30, 0x62, 0x70,
-  0x00, 0x20, 0x08, 0x06, 0x94, 0xe9, 0x06, 0x6e, 0x1b, 0x04, 0xc2, 0x05,
-  0xc3, 0x54, 0xca, 0x06, 0x72, 0x1b, 0xc0, 0x05, 0x4f, 0x8d, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0x01, 0xa5, 0xba, 0xc1, 0xdc, 0x06, 0x27, 0x45, 0xb6,
-  0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xab, 0x1b, 0xcc, 0x6d,
-  0x10, 0x08, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x77, 0x3c, 0x75, 0xf2, 0x1a,
-  0x0c, 0x73, 0x29, 0x1a, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0xde, 0xeb, 0x06, 0x7f, 0x1b, 0xf8, 0x6c,
-  0xc0, 0xba, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2,
-  0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0xc1, 0x66, 0xbb, 0x81, 0xe9, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0xc1, 0x76, 0xbb, 0xc1, 0xe9, 0x06, 0x09, 0x11, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x86, 0xbb, 0x01, 0xea, 0x06, 0x09,
-  0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x02, 0xbe, 0xc1, 0xe9,
-  0x06, 0x68, 0x1b, 0x04, 0xb3, 0x1b, 0xe0, 0x6d, 0x50, 0xbb, 0xc1, 0x68,
-  0x42, 0x00, 0x5c, 0xf0, 0xd4, 0x2c, 0x01, 0x8f, 0x0c, 0x37, 0xe8, 0x14,
-  0xee, 0x06, 0x60, 0x30, 0xcb, 0xb0, 0x22, 0x3c, 0x12, 0x98, 0xce, 0x06,
-  0x3c, 0x1b, 0xc4, 0x67, 0x38, 0x02, 0xac, 0x7a, 0x36, 0x20, 0xbe, 0x59,
-  0x06, 0x16, 0x79, 0x91, 0xc0, 0x7c, 0x36, 0x08, 0xab, 0xf8, 0x58, 0x30,
-  0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04,
-  0xf1, 0x29, 0x42, 0x7c, 0x03, 0x1d, 0x6e, 0x08, 0xc0, 0x37, 0x00, 0x83,
-  0x59, 0x86, 0x16, 0x71, 0x91, 0xc0, 0x06, 0xb3, 0x0d, 0xe0, 0x33, 0x4b,
-  0x30, 0x23, 0x56, 0xb6, 0x01, 0x11, 0x9f, 0x59, 0x82, 0x19, 0x19, 0x8e,
-  0x58, 0x2b, 0xb3, 0x0d, 0x84, 0x6f, 0x96, 0x01, 0x46, 0x66, 0x24, 0x30,
-  0xb6, 0x3a, 0xdb, 0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b,
-  0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x68, 0xdf, 0x40,
-  0x87, 0x1b, 0x82, 0xf5, 0x0d, 0xc0, 0x60, 0x96, 0x21, 0x46, 0x64, 0x24,
-  0xb0, 0xb7, 0x0d, 0x86, 0xf8, 0xcc, 0x12, 0xcc, 0x88, 0x11, 0x72, 0x1b,
-  0xc0, 0x67, 0x96, 0x60, 0x46, 0x06, 0x5a, 0x1e, 0xad, 0x45, 0x30, 0x17,
-  0x21, 0x62, 0x44, 0x90, 0x11, 0x76, 0x0d, 0x5e, 0xe4, 0x82, 0x61, 0x2c,
-  0x6e, 0x83, 0xba, 0x0d, 0xe2, 0x33, 0x1c, 0x71, 0x57, 0x76, 0x1b, 0x10,
-  0xdf, 0x2c, 0x03, 0x8d, 0xdc, 0x48, 0x60, 0x77, 0x1b, 0xe0, 0x55, 0x7c,
-  0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f,
-  0x2b, 0x82, 0xf8, 0x14, 0xb1, 0xbf, 0x81, 0x0e, 0x37, 0x04, 0xf9, 0x1b,
-  0x80, 0xc1, 0x2c, 0x43, 0x8d, 0xd8, 0x48, 0x60, 0xc3, 0xdf, 0x06, 0xf0,
-  0x99, 0x25, 0xd8, 0x11, 0xe3, 0xdb, 0x80, 0x88, 0xcf, 0x2c, 0xc1, 0x8e,
-  0x0c, 0x47, 0x88, 0x56, 0xdf, 0x06, 0xc2, 0x37, 0xcb, 0x80, 0x23, 0x3b,
-  0x12, 0xd8, 0x68, 0xf9, 0x6d, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30,
-  0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x98,
-  0x70, 0xa0, 0xc3, 0x0d, 0x01, 0x09, 0x07, 0x60, 0x30, 0xcb, 0x90, 0x23,
-  0x3a, 0x12, 0x98, 0xe9, 0x06, 0x43, 0x7c, 0x66, 0x09, 0x76, 0xc4, 0x88,
-  0xd5, 0x0d, 0xe0, 0x33, 0x4b, 0xb0, 0x23, 0x03, 0x2d, 0x8f, 0x56, 0x23,
-  0x98, 0x8d, 0x10, 0x39, 0x22, 0xe8, 0x88, 0xce, 0x06, 0x37, 0x72, 0xc1,
-  0x30, 0x17, 0x3c, 0x75, 0xdb, 0x53, 0x77, 0xb7, 0xc1, 0x30, 0xe7, 0xaa,
-  0xc1, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82,
-  0x60, 0xe0, 0xd1, 0x70, 0x40, 0xc2, 0xc1, 0xf8, 0x06, 0x31, 0x1c, 0x8c,
-  0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02,
-  0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x3b,
-  0x1c, 0xac, 0x70, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x6c, 0x3c, 0x1c, 0xb0, 0x70, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x6c, 0x3d, 0x1c, 0xb4, 0x70, 0x90, 0x10, 0xc1, 0x88, 0x81,
-  0x02, 0x80, 0x20, 0x18, 0x2c, 0x65, 0x1c, 0xb0, 0x70, 0xd0, 0xbe, 0x41,
-  0x80, 0xc3, 0x41, 0xff, 0x06, 0x3a, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x05,
-  0x4f, 0xcd, 0x12, 0xf0, 0xc8, 0x40, 0xcb, 0x63, 0x1a, 0x27, 0xc2, 0xff,
-  0x82, 0x89, 0xb0, 0x44, 0x8a, 0x08, 0x3b, 0xc2, 0xff, 0x82, 0x8a, 0xcc,
-  0x32, 0xf4, 0xc8, 0x8f, 0xfc, 0xd6, 0x70, 0x84, 0xfa, 0xfc, 0x6f, 0x30,
-  0x7c, 0xb7, 0x3e, 0xc3, 0x0c, 0x37, 0x04, 0xea, 0x1b, 0x90, 0x41, 0x0d,
-  0x81, 0x0e, 0x47, 0x98, 0xd7, 0x08, 0x07, 0xc3, 0x57, 0x81, 0xa0, 0x87,
-  0x5e, 0xc3, 0x0c, 0x37, 0x04, 0xed, 0x1b, 0x90, 0x41, 0x05, 0x83, 0xce,
-  0x32, 0xf8, 0xc8, 0x9c, 0x04, 0x17, 0xbb, 0xc1, 0x30, 0x87, 0xae, 0xc1,
-  0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0, 0xb1, 0x71, 0xc0, 0xc3,
-  0xc1, 0xfe, 0x06, 0x69, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10,
-  0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x6c, 0x73, 0x1c, 0x8c, 0x71, 0x70, 0x10, 0xc1,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x74, 0x1c, 0x90, 0x71, 0xc0,
-  0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x75, 0x1c, 0x94,
-  0x71, 0x20, 0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x7d,
-  0x1c, 0x90, 0x71, 0x50, 0xc2, 0x41, 0x00, 0xc7, 0x41, 0x0d, 0x07, 0x72,
-  0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x4f, 0xcd, 0x12, 0xcc, 0xc9, 0x70,
-  0xc3, 0x7d, 0xd9, 0x71, 0x00, 0x06, 0xb3, 0x0c, 0x60, 0x12, 0x26, 0x41,
-  0x81, 0x70, 0x70, 0xc6, 0x01, 0x5c, 0xf0, 0xd4, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x50, 0x7f, 0x1c, 0xa0, 0x71, 0x40, 0x42, 0x39, 0x1c, 0x8c,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x05, 0xca, 0x01, 0x1a, 0x07, 0x81,
-  0x70, 0xc1, 0x30, 0x35, 0xc2, 0x01, 0x1b, 0x07, 0x70, 0xc1, 0x53, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0x91, 0x72, 0xd0, 0xc6, 0x41, 0x88,
-  0xf9, 0x70, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x54, 0x29, 0x07,
-  0x6d, 0x1c, 0x04, 0xc2, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x1d, 0x4f, 0x1d,
-  0xfb, 0x06, 0xc3, 0xdc, 0xc8, 0x06, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3,
-  0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x97, 0xca, 0x41, 0x1e, 0x07,
-  0x38, 0x1c, 0x98, 0x72, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30,
-  0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0xb0, 0xc1, 0x72, 0x00, 0xca, 0x41, 0x42, 0x04, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xc5, 0x72, 0x10, 0xca, 0x41, 0x42,
-  0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xc9, 0x72, 0x20, 0xca,
-  0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xe8, 0x72,
-  0x10, 0xca, 0x81, 0x18, 0x07, 0x41, 0x2b, 0x07, 0x72, 0x1c, 0xbc, 0x72,
-  0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x35, 0x4b, 0x30, 0x27, 0xc3, 0x0d,
-  0x34, 0x26, 0xcb, 0x01, 0x18, 0xcc, 0x32, 0x88, 0xc9, 0x9c, 0x04, 0x46,
-  0xc3, 0x81, 0x0d, 0x07, 0xf1, 0x19, 0x8e, 0x00, 0xa3, 0x1b, 0x0e, 0x88,
-  0x6f, 0x96, 0x61, 0x4c, 0xcc, 0x24, 0x30, 0x1c, 0x0e, 0xc2, 0x28, 0x3e,
-  0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xc0, 0x90, 0x8f,
-  0x15, 0x41, 0x7c, 0x8a, 0xe0, 0xe5, 0x40, 0x87, 0x1b, 0x02, 0x5d, 0x0e,
-  0xc0, 0x60, 0x96, 0x81, 0x4c, 0xca, 0x24, 0xb0, 0x01, 0x8c, 0x03, 0xf8,
-  0xcc, 0x12, 0xa8, 0x89, 0xfd, 0x70, 0x40, 0xc4, 0x67, 0x96, 0x40, 0x4d,
-  0x86, 0x23, 0xd6, 0x08, 0x8c, 0x03, 0xe1, 0x9b, 0x65, 0x38, 0x13, 0x35,
-  0x09, 0x8c, 0x8d, 0xc2, 0x38, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18,
-  0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xce,
-  0x39, 0xd0, 0xe1, 0x86, 0xa0, 0x9c, 0x03, 0x30, 0x98, 0x65, 0x40, 0x93,
-  0x34, 0x09, 0x2c, 0x8d, 0x83, 0x21, 0x3e, 0xb3, 0x04, 0x6a, 0x62, 0x04,
-  0x1b, 0x07, 0xf0, 0x99, 0x25, 0x50, 0x93, 0x81, 0x96, 0x47, 0x23, 0x13,
-  0xac, 0x4c, 0x08, 0x34, 0x11, 0xd2, 0x84, 0x1d, 0x03, 0x33, 0xb9, 0x60,
-  0x18, 0x5b, 0xe3, 0xe0, 0x8d, 0x83, 0xf8, 0x0c, 0x47, 0xc4, 0x19, 0x1c,
-  0x07, 0xc4, 0x37, 0xcb, 0xb0, 0x26, 0x6e, 0x12, 0x58, 0x1c, 0x07, 0x72,
-  0x16, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x60,
-  0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xd4, 0x73, 0xa0, 0xc3, 0x0d, 0xc1,
-  0x3c, 0x07, 0x60, 0x30, 0xcb, 0xc0, 0x26, 0x6d, 0x12, 0xd8, 0x90, 0xc7,
-  0x01, 0x7c, 0x66, 0x09, 0xe4, 0xc4, 0xec, 0x38, 0x20, 0xe2, 0x33, 0x4b,
-  0x20, 0x27, 0xc3, 0x11, 0x7c, 0x76, 0xc7, 0x81, 0xf0, 0xcd, 0x32, 0xbc,
-  0x89, 0x9c, 0x04, 0xd6, 0x67, 0x78, 0x1c, 0xc4, 0xc7, 0x02, 0x87, 0x3e,
-  0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f,
-  0x11, 0x20, 0x1d, 0xe8, 0x70, 0x43, 0xe0, 0xcf, 0x01, 0x18, 0xcc, 0x32,
-  0xc0, 0x49, 0x9c, 0x04, 0x06, 0xca, 0xc1, 0x10, 0x9f, 0x59, 0x02, 0x39,
-  0x31, 0xa2, 0x94, 0x03, 0xf8, 0xcc, 0x12, 0xc8, 0xc9, 0x40, 0xcb, 0xa3,
-  0xb1, 0x09, 0xd6, 0x26, 0x04, 0x9c, 0x08, 0x71, 0xe2, 0xd2, 0x81, 0x9b,
-  0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf6, 0xd4, 0xc5, 0x71, 0x30, 0xcc,
-  0xa1, 0x6e, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01,
-  0x80, 0x20, 0x18, 0x78, 0x2e, 0x1d, 0xf8, 0x73, 0xd0, 0xcb, 0xc1, 0x4a,
-  0x07, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30,
-  0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x5b, 0x4d, 0x07, 0x25, 0x1d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x9b, 0x4d, 0x07, 0x26, 0x1d, 0x24, 0x44, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0xdb, 0x4d, 0x07, 0x27, 0x1d, 0x24, 0x44, 0x30,
-  0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0xcb, 0x4f, 0x07, 0x26, 0x1d, 0x9c,
-  0x73, 0x10, 0xc8, 0x74, 0x70, 0xcf, 0x01, 0x4d, 0x07, 0xa3, 0x09, 0x01,
-  0x70, 0xc1, 0x53, 0xb3, 0x04, 0x73, 0x32, 0xd0, 0xf2, 0x98, 0x86, 0x8f,
-  0xc0, 0xe7, 0xd0, 0x23, 0x2c, 0x01, 0x26, 0x82, 0x9c, 0xc0, 0xe7, 0x10,
-  0x26, 0xb3, 0x0c, 0x74, 0x62, 0x27, 0xb9, 0x36, 0x1c, 0x91, 0x3e, 0xf9,
-  0x1c, 0x0c, 0xdf, 0xa9, 0xcf, 0x30, 0xc3, 0x0d, 0x01, 0x39, 0x07, 0x64,
-  0x50, 0x43, 0xa0, 0xc3, 0x11, 0xe0, 0xd6, 0xcf, 0xc1, 0xf0, 0x55, 0x20,
-  0xe8, 0x89, 0xdb, 0x30, 0xc3, 0x0d, 0xc1, 0x39, 0x07, 0x64, 0x50, 0xc1,
-  0xa0, 0xb3, 0x0c, 0x75, 0xa2, 0x2a, 0xc1, 0xad, 0x72, 0x30, 0xcc, 0x89,
-  0x6f, 0x30, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x78, 0x66, 0x1d,
-  0xd8, 0x74, 0x50, 0xcf, 0xc1, 0x58, 0x07, 0xa3, 0x09, 0x01, 0x30, 0x9a,
-  0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x1c, 0x32,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5b, 0x5b, 0x07, 0x3d, 0x1d, 0x1c,
-  0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x9b, 0x5b, 0x07, 0x3e,
-  0x1d, 0x30, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdb, 0x5b,
-  0x07, 0x3f, 0x1d, 0x48, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06,
-  0xcb, 0x5d, 0x07, 0x3e, 0x1d, 0xfc, 0x73, 0x10, 0xa8, 0x75, 0xf0, 0xd2,
-  0x01, 0x5b, 0x07, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x53, 0xb3, 0x04, 0xaa,
-  0x32, 0xdc, 0x10, 0x6f, 0x70, 0x1d, 0x80, 0xc1, 0x2c, 0xc3, 0x9d, 0xe0,
-  0x49, 0x50, 0xfa, 0x1c, 0x84, 0x75, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70,
-  0x00, 0x20, 0x08, 0x06, 0x54, 0x5e, 0x07, 0x62, 0x1d, 0x8c, 0xd0, 0x4c,
-  0x07, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xe9, 0x75, 0x20, 0xd6,
-  0x41, 0x20, 0x5c, 0x30, 0x4c, 0xf5, 0x73, 0x60, 0xd6, 0x01, 0x5c, 0xf0,
-  0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0x7e, 0x1d, 0x9c, 0x75,
-  0xb0, 0x6f, 0x38, 0x1d, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0xf5,
-  0xd7, 0xc1, 0x59, 0x07, 0x81, 0x70, 0xc1, 0x30, 0x17, 0x3c, 0x75, 0xc7,
-  0x53, 0x67, 0xce, 0xc1, 0x30, 0xd7, 0xbf, 0xc1, 0x30, 0x47, 0x0c, 0x73,
-  0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0, 0x8d, 0x76, 0x30,
-  0xd7, 0x81, 0x4c, 0x07, 0xa0, 0x1d, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82,
-  0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xaa, 0x1d, 0xe8, 0x75, 0x90, 0x10,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xab, 0x1d, 0xec, 0x75,
-  0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xac, 0x1d,
-  0xf0, 0x75, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c,
-  0xb4, 0x1d, 0xec, 0x75, 0xc0, 0xd3, 0x41, 0x70, 0xda, 0x01, 0x5b, 0x07,
-  0xa9, 0x1d, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x4f, 0xcd, 0x12, 0xa8, 0xca,
-  0x70, 0x83, 0xcb, 0xb1, 0x76, 0x00, 0x06, 0xb3, 0x0c, 0x79, 0xa2, 0x2a,
-  0x81, 0xb9, 0x74, 0x00, 0xd3, 0x41, 0x7c, 0x86, 0x23, 0x7e, 0x28, 0xa6,
-  0x03, 0xe2, 0x9b, 0x65, 0xd0, 0x93, 0x3e, 0x09, 0x4c, 0xa6, 0x03, 0x30,
-  0x8a, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30,
-  0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x6c, 0x3b, 0xd0, 0xe1, 0x86, 0x80,
-  0xb6, 0x03, 0x30, 0x98, 0x65, 0xd8, 0x13, 0x3e, 0x09, 0x6c, 0xd0, 0xe9,
-  0x00, 0x3e, 0xb3, 0x04, 0xa1, 0x62, 0x39, 0x1d, 0x10, 0xf1, 0x99, 0x25,
-  0x08, 0x95, 0xe1, 0x08, 0x35, 0xd2, 0xe9, 0x40, 0xf8, 0x66, 0x19, 0xfc,
-  0x24, 0x54, 0x02, 0x5b, 0xa3, 0x9d, 0x0e, 0xe2, 0x63, 0x81, 0x43, 0x9f,
+  0xc1, 0x16, 0xa3, 0x42, 0x88, 0x0a, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00,
+  0x08, 0x82, 0xc1, 0x26, 0xa3, 0x82, 0x88, 0x0a, 0x0c, 0x11, 0x8c, 0x18,
+  0x20, 0x00, 0x08, 0x82, 0xc1, 0x36, 0xa3, 0xc2, 0x88, 0x0a, 0x12, 0x11,
+  0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0xb2, 0xa3, 0x82, 0x88, 0x0a,
+  0xe3, 0x29, 0x04, 0x2e, 0x2a, 0xcc, 0xa7, 0x00, 0xa3, 0xc2, 0x68, 0x42,
+  0x00, 0x5c, 0xf0, 0xd4, 0x2c, 0x41, 0xdf, 0x0c, 0x37, 0xd4, 0x6e, 0x40,
+  0xa3, 0x02, 0x18, 0xcc, 0x32, 0xa8, 0xcd, 0xda, 0x04, 0xe5, 0x9b, 0x42,
+  0x89, 0x0a, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40,
+  0xf5, 0xa8, 0x60, 0xa2, 0xc2, 0xee, 0x06, 0xf7, 0x29, 0x8c, 0x18, 0x1c,
+  0x00, 0x08, 0x82, 0x01, 0xe5, 0xa3, 0x82, 0x89, 0x0a, 0x81, 0x70, 0xc1,
+  0x30, 0x15, 0x9e, 0x82, 0x8a, 0x0a, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07,
+  0x00, 0x82, 0x60, 0x40, 0x89, 0xa9, 0xb0, 0xa2, 0x02, 0x18, 0xf0, 0xa7,
+  0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xd4, 0x98, 0x0a, 0x2b, 0x2a,
+  0x04, 0xc2, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x1d, 0x4f, 0x9d, 0x6a, 0x0a,
+  0xc3, 0x5c, 0x38, 0x0a, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18,
+  0x1c, 0x00, 0x08, 0x82, 0x81, 0x77, 0xa6, 0xc2, 0x8d, 0x0a, 0xf6, 0x29,
+  0x90, 0xa9, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30,
+  0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82,
+  0x60, 0xb0, 0xb9, 0xa9, 0xe0, 0xa3, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08,
+  0x00, 0x82, 0x60, 0xb0, 0xbd, 0xa9, 0xf0, 0xa3, 0x42, 0x42, 0x04, 0x23,
+  0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xc1, 0xa9, 0x00, 0xa6, 0x42, 0x42,
+  0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xe0, 0xa9, 0xf0, 0xa3,
+  0x02, 0x88, 0x0a, 0xc1, 0x9a, 0x0a, 0x30, 0x2a, 0xb4, 0xa9, 0x30, 0x9a,
+  0x10, 0x00, 0x17, 0x3c, 0x35, 0x4b, 0xd0, 0x37, 0xc3, 0x0d, 0xf2, 0x1b,
+  0xc0, 0xa9, 0x00, 0x06, 0xb3, 0x0c, 0x6c, 0xd3, 0x37, 0x81, 0xc9, 0xa7,
+  0x40, 0x9f, 0x42, 0x7c, 0x86, 0x23, 0xee, 0x37, 0xa8, 0x4f, 0x81, 0xf8,
+  0x66, 0x19, 0xda, 0x06, 0x6e, 0x02, 0xb3, 0x4f, 0x01, 0x7f, 0x83, 0xf8,
+  0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e,
+  0x56, 0x04, 0xf1, 0x29, 0x42, 0x4f, 0x05, 0x1d, 0x6e, 0x08, 0xf0, 0x54,
+  0x00, 0x83, 0x59, 0x06, 0xb7, 0x79, 0x9b, 0xc0, 0x06, 0xff, 0x14, 0xe0,
+  0x33, 0x4b, 0x40, 0x37, 0xd6, 0x9f, 0x02, 0x11, 0x9f, 0x59, 0x02, 0xba,
+  0x19, 0x8e, 0x10, 0xe1, 0xc0, 0x3f, 0x05, 0xe1, 0x9b, 0x65, 0x88, 0x1b,
+  0xba, 0x09, 0x6c, 0x84, 0x83, 0xff, 0x14, 0xe2, 0x63, 0x81, 0x43, 0x9f,
   0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7,
-  0x88, 0xf0, 0x0e, 0x74, 0xb8, 0x21, 0xf8, 0xed, 0x00, 0x0c, 0x66, 0x19,
-  0xfe, 0x04, 0x54, 0x02, 0x1b, 0xeb, 0x60, 0x88, 0xcf, 0x2c, 0x41, 0xa8,
-  0x18, 0x61, 0xd6, 0x01, 0x7c, 0x66, 0x09, 0x42, 0x65, 0xa0, 0xe5, 0xd1,
-  0xf6, 0x04, 0xe3, 0x13, 0xe2, 0x4f, 0x04, 0x50, 0x51, 0xc7, 0xa0, 0x4f,
-  0x2e, 0x18, 0xc6, 0xca, 0x3a, 0x48, 0xeb, 0x20, 0x3e, 0xc3, 0x11, 0x6b,
-  0xa7, 0xd6, 0x01, 0xf1, 0xcd, 0x32, 0x88, 0x4a, 0xa9, 0x04, 0xb6, 0xd6,
-  0x01, 0xdb, 0xc5, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53,
-  0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xef, 0x1d, 0xe8, 0x70,
-  0x43, 0xd0, 0xde, 0x01, 0x18, 0xcc, 0x32, 0x8c, 0x0a, 0xa9, 0x04, 0x36,
-  0xcc, 0x75, 0x00, 0x9f, 0x59, 0x82, 0x54, 0x31, 0xb8, 0x0e, 0x88, 0xf8,
-  0xcc, 0x12, 0xa4, 0xca, 0x70, 0x84, 0xdd, 0xc5, 0x75, 0x20, 0x7c, 0xb3,
-  0x0c, 0xa6, 0x92, 0x2a, 0x81, 0xdd, 0x9d, 0x5c, 0x07, 0xf1, 0xb1, 0xc0,
+  0x88, 0x52, 0x15, 0x74, 0xb8, 0x21, 0x18, 0x55, 0x01, 0x0c, 0x66, 0x19,
+  0xe4, 0x66, 0x6e, 0x02, 0x3b, 0x51, 0x61, 0x88, 0xcf, 0x2c, 0x01, 0xdd,
+  0x18, 0xa1, 0xa2, 0x02, 0x7c, 0x66, 0x09, 0xe8, 0x66, 0xa0, 0xe5, 0xd1,
+  0xdc, 0x06, 0x7b, 0x1b, 0x42, 0x6e, 0x84, 0xb9, 0xd1, 0xc1, 0x01, 0x6e,
+  0x2e, 0x18, 0xc6, 0x52, 0x54, 0x68, 0x51, 0x21, 0x3e, 0xc3, 0x11, 0xb0,
+  0xe0, 0xa2, 0x02, 0xf1, 0xcd, 0x32, 0xd4, 0x0d, 0xde, 0x04, 0xf6, 0xa2,
+  0x42, 0x2c, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53,
+  0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xb3, 0x2a, 0xe8, 0x70,
+  0x43, 0x10, 0xab, 0x02, 0x18, 0xcc, 0x32, 0xd8, 0xcd, 0xdd, 0x04, 0x36,
+  0xdc, 0xa8, 0x00, 0x9f, 0x59, 0x02, 0xbe, 0x31, 0x1a, 0x15, 0x88, 0xf8,
+  0xcc, 0x12, 0xf0, 0xcd, 0x70, 0xc4, 0x2e, 0xd4, 0xa8, 0x20, 0x7c, 0xb3,
+  0x0c, 0x79, 0xc3, 0x37, 0x81, 0xf1, 0x82, 0x8d, 0x0a, 0xf1, 0xb1, 0xc0,
   0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08,
-  0xe2, 0x53, 0x84, 0x7e, 0x07, 0x3a, 0xdc, 0x10, 0xe0, 0x77, 0x00, 0x06,
-  0xb3, 0x0c, 0xa7, 0x82, 0x2a, 0x81, 0xe9, 0x75, 0x30, 0xc4, 0x67, 0x96,
-  0x20, 0x55, 0x8c, 0xf8, 0xeb, 0x00, 0x3e, 0xb3, 0x04, 0xa9, 0x32, 0xd0,
-  0xf2, 0x68, 0xa3, 0x82, 0x91, 0x0a, 0x71, 0x2a, 0x02, 0xaa, 0x88, 0x7c,
-  0x50, 0x2a, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0xb7, 0x3d, 0x75, 0x6b, 0x1d,
-  0x0c, 0x73, 0xa2, 0x1c, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x1e, 0x8a, 0x07, 0xf8, 0x1d, 0xdc, 0x76,
-  0x50, 0xe2, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2,
-  0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0xc1, 0xf6, 0xe2, 0xc1, 0x7f, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0xc1, 0x06, 0xe3, 0x01, 0x88, 0x07, 0x09, 0x11, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x16, 0xe3, 0x41, 0x88, 0x07, 0x09,
-  0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x92, 0xe3, 0x01, 0x88,
-  0x07, 0xe1, 0x1d, 0x04, 0x2c, 0x1e, 0xc4, 0x77, 0xe0, 0xe2, 0xc1, 0x68,
-  0x42, 0x00, 0x5c, 0xf0, 0xd4, 0x2c, 0x81, 0xaa, 0x0c, 0xb4, 0x3c, 0xa6,
-  0x51, 0x27, 0xe4, 0x3b, 0xd0, 0x09, 0x4b, 0xdc, 0x89, 0x90, 0x2a, 0xe4,
-  0x3b, 0xe0, 0x89, 0xd9, 0x9e, 0x7c, 0x07, 0xf0, 0x99, 0x65, 0x58, 0x95,
-  0x56, 0xa1, 0xbd, 0xe1, 0x08, 0xdc, 0xa3, 0xef, 0x60, 0xf8, 0x2e, 0xf7,
-  0x86, 0x19, 0x6e, 0x08, 0x7e, 0x3b, 0x20, 0x83, 0x1a, 0x02, 0x1d, 0x8e,
-  0x28, 0xf0, 0x3b, 0x18, 0xbe, 0x0a, 0x04, 0xbd, 0x63, 0x98, 0xe1, 0x86,
-  0x40, 0xbc, 0x03, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x06, 0x56, 0x09, 0x97,
-  0xe0, 0x4c, 0x3b, 0x18, 0xe6, 0x7a, 0x39, 0x18, 0x66, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0xbc, 0x30, 0x0f, 0x62, 0x3c, 0x80, 0xef, 0xc0, 0xc7,
-  0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18,
-  0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
-  0x0d, 0xcd, 0x03, 0x1c, 0x0f, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0x2d, 0xcd, 0x83, 0x1c, 0x0f, 0x18, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x83, 0x4d, 0xcd, 0x03, 0x1d, 0x0f, 0x24, 0x22, 0x18,
-  0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x45, 0xce, 0x83, 0x1c, 0x0f, 0xf4,
-  0x3b, 0x08, 0xca, 0x3c, 0x50, 0xf1, 0xe0, 0xcc, 0x83, 0xd1, 0x84, 0x00,
-  0xb8, 0xe0, 0xa9, 0x59, 0x82, 0x70, 0x19, 0x6e, 0x60, 0xbf, 0x35, 0x0f,
-  0xc0, 0x60, 0x96, 0xc1, 0x55, 0x5e, 0x25, 0xa8, 0xfa, 0x0e, 0x78, 0x3c,
-  0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x8a, 0xce,
-  0x83, 0x1e, 0x0f, 0xe4, 0xcf, 0xc5, 0x83, 0x11, 0x83, 0x03, 0x00, 0x41,
-  0x30, 0xa0, 0xea, 0x3c, 0xe8, 0xf1, 0x20, 0x10, 0x2e, 0x18, 0xa6, 0xf0,
-  0x3b, 0x08, 0xf3, 0x00, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10,
-  0x0c, 0xa8, 0x3c, 0x0f, 0xc4, 0x3c, 0x00, 0x83, 0x19, 0x0f, 0x46, 0x0c,
-  0x0e, 0x00, 0x04, 0xc1, 0x80, 0xd2, 0xf3, 0x40, 0xcc, 0x83, 0x40, 0xb8,
-  0x60, 0x98, 0x0b, 0x9e, 0xba, 0xe3, 0xa9, 0x0b, 0xef, 0x60, 0x98, 0xc3,
-  0xe7, 0x60, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0xf0, 0xfc, 0x3c, 0x70, 0xf3, 0xa0, 0xc5, 0x83, 0x3d, 0x0f,
-  0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34,
-  0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6,
-  0x52, 0x0f, 0xea, 0x3c, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10,
-  0x0c, 0x36, 0x53, 0x0f, 0xec, 0x3c, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0xb6, 0x53, 0x0f, 0xee, 0x3c, 0x48, 0x88, 0x60, 0xc4,
-  0x40, 0x01, 0x40, 0x10, 0x0c, 0x96, 0x57, 0x0f, 0xec, 0x3c, 0xb8, 0xf1,
-  0x20, 0x10, 0xf5, 0xe0, 0xcc, 0x03, 0x52, 0x0f, 0x46, 0x13, 0x02, 0xe0,
-  0x82, 0xa7, 0x66, 0x09, 0xc2, 0x65, 0xb8, 0x21, 0x05, 0x83, 0x53, 0x0f,
-  0xc0, 0x60, 0x96, 0x01, 0x56, 0xc2, 0x25, 0xb0, 0x14, 0x0f, 0x56, 0x3c,
-  0x88, 0xcf, 0x70, 0x84, 0x0b, 0x06, 0x2c, 0x1e, 0x10, 0xdf, 0x2c, 0x43,
-  0xac, 0xd0, 0x4a, 0x60, 0x2d, 0x1e, 0xbc, 0x60, 0x10, 0x1f, 0x0b, 0x06,
-  0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20,
-  0x3e, 0x45, 0xc4, 0x7a, 0xa0, 0xc3, 0x0d, 0xc1, 0xab, 0x07, 0x60, 0x30,
-  0xcb, 0x20, 0x2b, 0xb3, 0x12, 0xd8, 0x50, 0xe3, 0x01, 0x7c, 0x66, 0x09,
-  0x70, 0xc5, 0x68, 0x3c, 0x20, 0xe2, 0x33, 0x4b, 0x80, 0x2b, 0xc3, 0x11,
-  0x39, 0x18, 0xd4, 0x78, 0x20, 0x7c, 0xb3, 0x0c, 0xb5, 0x82, 0x2b, 0x81,
-  0xe9, 0x60, 0x60, 0xe3, 0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30,
-  0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xc1, 0xeb,
-  0x81, 0x0e, 0x37, 0x04, 0xba, 0x1e, 0x80, 0xc1, 0x2c, 0x83, 0xad, 0xdc,
-  0x4a, 0x60, 0x3e, 0x1e, 0x0c, 0xf1, 0x99, 0x25, 0xc0, 0x15, 0x23, 0xc2,
-  0x3c, 0x80, 0xcf, 0x2c, 0x01, 0xae, 0x0c, 0xb4, 0x3c, 0x9a, 0xac, 0x60,
-  0xb3, 0x42, 0xd8, 0x8a, 0x70, 0x2b, 0xa8, 0x28, 0xd0, 0xca, 0x05, 0xc3,
-  0x18, 0x98, 0x07, 0x64, 0x1e, 0xc4, 0x67, 0x38, 0x02, 0x16, 0xca, 0x3c,
-  0x20, 0xbe, 0x59, 0x86, 0x5c, 0xe1, 0x95, 0xc0, 0xcc, 0x3c, 0x88, 0x85,
-  0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43,
-  0x3e, 0x56, 0x04, 0xf1, 0x29, 0x42, 0xdd, 0x03, 0x1d, 0x6e, 0x08, 0xd0,
-  0x3d, 0x00, 0x83, 0x59, 0x06, 0x5d, 0xd9, 0x95, 0xc0, 0x06, 0x37, 0x0f,
-  0xe0, 0x33, 0x4b, 0x00, 0x2e, 0xb6, 0xe6, 0x01, 0x11, 0x9f, 0x59, 0x02,
-  0x70, 0x19, 0x8e, 0xd8, 0x05, 0x36, 0x0f, 0x84, 0x6f, 0x96, 0xa1, 0x57,
-  0xc0, 0x25, 0x30, 0x5e, 0x68, 0xf3, 0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9,
-  0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a,
-  0xa8, 0xf7, 0x40, 0x87, 0x1b, 0x82, 0x79, 0x0f, 0xc0, 0x60, 0x96, 0xc1,
-  0x57, 0x7e, 0x25, 0xb0, 0x3a, 0x0f, 0x86, 0xf8, 0xcc, 0x12, 0x80, 0x8b,
-  0x11, 0x7a, 0x1e, 0xc0, 0x67, 0x96, 0x00, 0x5c, 0x06, 0x5a, 0x1e, 0x4d,
-  0x57, 0xb0, 0x5d, 0x21, 0x7c, 0x45, 0xf8, 0x15, 0xd6, 0xe0, 0x95, 0x0b,
-  0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e, 0x3a, 0x33, 0x0f, 0x86, 0xb9, 0xbe,
-  0x0e, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x03, 0x6f, 0xe4, 0x83, 0x79, 0x0f, 0x64, 0x3d, 0x00, 0xf9, 0x60,
-  0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13,
-  0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x53,
-  0xf9, 0x40, 0xdf, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
-  0x60, 0x5b, 0xf9, 0x60, 0xdf, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00,
-  0x04, 0xc1, 0x60, 0x63, 0xf9, 0x80, 0xdf, 0x83, 0x84, 0x08, 0x46, 0x0c,
-  0x14, 0x00, 0x04, 0xc1, 0x60, 0xa1, 0xf9, 0x60, 0xdf, 0x03, 0x5e, 0x0f,
-  0x82, 0x93, 0x0f, 0xd8, 0x3d, 0x48, 0xf9, 0x60, 0x34, 0x21, 0x00, 0x2e,
-  0x78, 0x6a, 0x96, 0x20, 0x5c, 0x06, 0x5a, 0x1e, 0xd3, 0x60, 0x15, 0xdd,
-  0x1e, 0x56, 0x85, 0x25, 0x5c, 0x45, 0x00, 0x17, 0xdd, 0x1e, 0x5e, 0x65,
-  0x96, 0x41, 0x5c, 0xc8, 0xc5, 0x15, 0x83, 0xe1, 0x88, 0x59, 0x0c, 0xdc,
-  0x3d, 0x18, 0xbe, 0xa3, 0xc5, 0x60, 0x98, 0xe1, 0x86, 0x20, 0xd7, 0x03,
-  0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x7f, 0x90, 0xf7, 0x60, 0xf8, 0x2a,
-  0x10, 0xf4, 0x42, 0x62, 0x98, 0xe1, 0x86, 0x80, 0xd7, 0x03, 0x32, 0xa8,
-  0x60, 0xd0, 0x59, 0x86, 0x71, 0xc1, 0x97, 0xe0, 0x40, 0x3d, 0x18, 0xe6,
-  0x6e, 0x3b, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc, 0x9d,
-  0x0f, 0x56, 0x3e, 0x50, 0xf7, 0x00, 0xe7, 0x83, 0xd1, 0x84, 0x00, 0x18,
-  0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e,
-  0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x4d, 0xec, 0x03, 0x99, 0x0f,
-  0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x6d, 0xec, 0x83,
-  0x99, 0x0f, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x8d,
-  0xec, 0x03, 0x9a, 0x0f, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04,
-  0x83, 0x85, 0xed, 0x83, 0x99, 0x0f, 0xe8, 0x3d, 0x08, 0x7e, 0x3e, 0x20,
-  0xf9, 0x20, 0xec, 0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x02,
-  0x7c, 0x19, 0x6e, 0x30, 0xc7, 0xa0, 0xec, 0x03, 0x30, 0x98, 0x65, 0x28,
-  0x17, 0x73, 0x09, 0xea, 0xdd, 0x03, 0x9b, 0x0f, 0xe0, 0x82, 0xa7, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x72, 0xfb, 0xe0, 0xe6, 0x83, 0x76,
-  0x0c, 0x50, 0x3e, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xea, 0xed,
-  0x83, 0x9b, 0x0f, 0x02, 0xe1, 0x82, 0x61, 0x4a, 0xde, 0x83, 0x9d, 0x0f,
-  0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x9a, 0xfb,
-  0x80, 0xe7, 0x03, 0x9d, 0x68, 0xf9, 0x60, 0xc4, 0xe0, 0x00, 0x40, 0x10,
-  0x0c, 0x28, 0xba, 0x0f, 0x78, 0x3e, 0x08, 0x84, 0x0b, 0x86, 0xb9, 0xe0,
-  0xa9, 0x3b, 0x9e, 0xba, 0x5d, 0x0f, 0x86, 0x39, 0xf9, 0x0e, 0x86, 0x39,
-  0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x0f,
-  0xef, 0x03, 0xb4, 0x0f, 0x4e, 0x3e, 0xa8, 0xfb, 0x60, 0x34, 0x21, 0x00,
-  0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88,
-  0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0xfb, 0xfb, 0xe0, 0xed,
-  0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0x03, 0xfd,
-  0x00, 0xee, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60,
-  0x0b, 0xfd, 0x20, 0xee, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04,
-  0xc1, 0x60, 0x49, 0xfd, 0x00, 0xee, 0x83, 0x98, 0x0f, 0x02, 0xbe, 0x0f,
-  0xc2, 0x3e, 0xf0, 0xfb, 0x60, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6a, 0x96,
-  0x00, 0x5f, 0x86, 0x1b, 0x46, 0x32, 0x08, 0xfd, 0x00, 0x0c, 0x66, 0x19,
-  0xce, 0x05, 0x5f, 0x02, 0x1b, 0xf9, 0xa0, 0xe4, 0x83, 0xf8, 0x0c, 0x47,
-  0xa4, 0x64, 0x60, 0xf2, 0x01, 0xf1, 0xcd, 0x32, 0xa0, 0xcb, 0xba, 0x04,
-  0x76, 0xf2, 0x81, 0x4a, 0x06, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3,
-  0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4, 0xea,
-  0x07, 0x3a, 0xdc, 0x10, 0xa4, 0x7e, 0x00, 0x06, 0xb3, 0x0c, 0xe9, 0xa2,
-  0x2e, 0x81, 0x0d, 0x2f, 0x1f, 0xc0, 0x67, 0x96, 0xe0, 0x5d, 0xcc, 0xe5,
-  0x03, 0x22, 0x3e, 0xb3, 0x04, 0xef, 0x32, 0x1c, 0x41, 0x93, 0xc1, 0xcb,
-  0x07, 0xc2, 0x37, 0xcb, 0xc0, 0x2e, 0xef, 0x12, 0x58, 0x4d, 0x06, 0x30,
-  0x1f, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16,
-  0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xb6, 0x1f, 0xe8, 0x70, 0x43,
-  0x40, 0xfb, 0x01, 0x18, 0xcc, 0x32, 0xb4, 0x8b, 0xbb, 0x04, 0x86, 0xf3,
-  0xc1, 0x10, 0x9f, 0x59, 0x82, 0x77, 0x31, 0x62, 0xe7, 0x03, 0xf8, 0xcc,
-  0x12, 0xbc, 0xcb, 0x40, 0xcb, 0xa3, 0xa5, 0x0b, 0xa6, 0x2e, 0x44, 0xbb,
-  0x08, 0xee, 0xe2, 0xa7, 0xc2, 0xba, 0x5c, 0x30, 0x8c, 0xe9, 0x7c, 0xe0,
-  0xf3, 0x41, 0x7c, 0x86, 0x23, 0x54, 0xe3, 0xe7, 0x03, 0xe2, 0x9b, 0x65,
-  0x80, 0x97, 0x79, 0x09, 0x0c, 0xec, 0x83, 0xd5, 0x88, 0x8f, 0x05, 0x03,
-  0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10,
-  0x9f, 0x22, 0xc8, 0x3f, 0xd0, 0xe1, 0x86, 0x40, 0xfc, 0x03, 0x30, 0x98,
-  0x65, 0x88, 0x17, 0x79, 0x09, 0x6c, 0x40, 0xfb, 0x00, 0x3e, 0xb3, 0x04,
-  0xf7, 0x62, 0x65, 0x1f, 0x10, 0xf1, 0x99, 0x25, 0xb8, 0x97, 0xe1, 0x88,
-  0xda, 0x30, 0xfb, 0x40, 0xf8, 0x66, 0x19, 0xe8, 0xe5, 0x5e, 0x02, 0xb3,
-  0x8d, 0xb3, 0x0f, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0,
-  0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0xf7, 0x0f, 0x74,
-  0xb8, 0x21, 0x68, 0xff, 0x00, 0x0c, 0x66, 0x19, 0xea, 0xc5, 0x5e, 0x02,
-  0x7b, 0xfb, 0x60, 0x88, 0xcf, 0x2c, 0xc1, 0xbd, 0x18, 0x41, 0xf7, 0x01,
-  0x7c, 0x66, 0x09, 0xee, 0x65, 0xa0, 0xe5, 0xd1, 0xe2, 0x05, 0x93, 0x17,
-  0xa2, 0x5e, 0x04, 0x7b, 0x01, 0x9d, 0x79, 0xb9, 0x60, 0x98, 0x0b, 0x9e,
-  0xba, 0xed, 0xa9, 0x03, 0xfb, 0x60, 0x98, 0xbb, 0xf3, 0x60, 0x98, 0x23,
-  0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf0, 0xfa,
-  0x3f, 0x68, 0xff, 0x80, 0xf5, 0x03, 0xfd, 0x0f, 0x46, 0x13, 0x02, 0x60,
-  0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48,
-  0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x36, 0x12, 0x14, 0xe8, 0x3f,
-  0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0x12, 0x14,
-  0xea, 0x3f, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x36,
-  0x13, 0x14, 0xec, 0x3f, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10,
-  0x0c, 0x16, 0x17, 0x14, 0xea, 0x3f, 0xb0, 0xfd, 0x20, 0x08, 0x41, 0xc1,
-  0xfc, 0x83, 0x11, 0x14, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xa7, 0x66, 0x09,
-  0xf0, 0x65, 0xa0, 0xe5, 0x31, 0x8d, 0x71, 0x71, 0xfd, 0x41, 0x5c, 0x58,
-  0xa2, 0x5c, 0x84, 0x7b, 0x71, 0xfd, 0xc1, 0x5c, 0x66, 0x19, 0xf2, 0x65,
-  0x5f, 0x50, 0x33, 0x18, 0x8e, 0x98, 0x3d, 0xf4, 0x0f, 0x86, 0xef, 0x68,
-  0x6f, 0x98, 0xe1, 0x86, 0x60, 0xf6, 0x03, 0x32, 0xa8, 0x21, 0xd0, 0xe1,
-  0x88, 0xfc, 0x60, 0xff, 0x60, 0xf8, 0x2a, 0x10, 0xf4, 0xf6, 0x63, 0x98,
-  0xe1, 0x86, 0xc0, 0xf6, 0x03, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x06, 0x7d,
-  0x79, 0x99, 0xe0, 0xf4, 0x3e, 0x18, 0xe6, 0x62, 0x3d, 0x18, 0x66, 0xc4,
-  0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbc, 0x1a, 0x14, 0x4a, 0x50, 0x20, 0xff,
-  0x40, 0x06, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84,
-  0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0x8d, 0x07, 0x05, 0x16, 0x14, 0x0e, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x83, 0xad, 0x07, 0x85, 0x16, 0x14, 0x18, 0x22, 0x18,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xcd, 0x07, 0x05, 0x17, 0x14, 0x24,
-  0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0xc5, 0x0c, 0x85, 0x16,
-  0x14, 0xdc, 0x3f, 0x08, 0x72, 0x50, 0xf0, 0xff, 0x60, 0x07, 0x85, 0xd1,
-  0x84, 0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x82, 0x97, 0x19, 0x6e, 0x00, 0xcf,
-  0xe0, 0x07, 0x05, 0x30, 0x98, 0x65, 0xe0, 0x97, 0x7e, 0x09, 0x2a, 0xfd,
-  0x03, 0x18, 0x14, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
-  0x80, 0x42, 0x43, 0x21, 0x06, 0x85, 0xf6, 0x13, 0x41, 0x61, 0xc4, 0xe0,
-  0x00, 0x40, 0x10, 0x0c, 0xa8, 0x34, 0x14, 0x62, 0x50, 0x08, 0x84, 0x0b,
-  0x86, 0x29, 0xf6, 0x0f, 0x6a, 0x50, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x03, 0xaa, 0x0d, 0x05, 0x1b, 0x14, 0x68, 0xe4, 0x04,
-  0x85, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xdc, 0x50, 0xb0, 0x41,
-  0x21, 0x10, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0xee, 0x78, 0xea, 0x6a, 0x3f,
-  0x18, 0xe6, 0xd8, 0x3d, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4,
-  0xe0, 0x00, 0x40, 0x10, 0x0c, 0x3c, 0x39, 0x14, 0xc4, 0x50, 0x08, 0x41,
-  0xe1, 0x0d, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84,
-  0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0x2d, 0x0f, 0x85, 0x34, 0x14, 0x12, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x83, 0x4d, 0x0f, 0x05, 0x35, 0x14, 0x12, 0x22, 0x18,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x6d, 0x0f, 0x85, 0x35, 0x14, 0x12,
-  0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x65, 0x14, 0x05, 0x35,
-  0x14, 0x56, 0x50, 0x08, 0xec, 0x50, 0xd8, 0x41, 0x01, 0x0f, 0x85, 0xd1,
-  0x84, 0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x82, 0x97, 0x19, 0x6e, 0xe8, 0xcf,
-  0x60, 0x0f, 0x05, 0x30, 0x98, 0x65, 0xf0, 0x97, 0x97, 0x09, 0xac, 0xff,
-  0x83, 0xff, 0x0f, 0xe2, 0x33, 0x1c, 0x91, 0x82, 0x01, 0x08, 0x0a, 0xc4,
-  0x37, 0xcb, 0xf0, 0x2f, 0x22, 0x13, 0x58, 0x08, 0x0a, 0x2a, 0x18, 0xc4,
-  0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2,
-  0xb1, 0x22, 0x88, 0x4f, 0x11, 0xa5, 0x28, 0xe8, 0x70, 0x43, 0x30, 0x8a,
-  0x02, 0x18, 0xcc, 0x32, 0x80, 0x4c, 0xc8, 0x04, 0x36, 0xa4, 0xa0, 0x00,
-  0x9f, 0x59, 0x02, 0x93, 0x31, 0x14, 0x14, 0x88, 0xf8, 0xcc, 0x12, 0x98,
-  0xcc, 0x70, 0x04, 0x0d, 0x06, 0x29, 0x28, 0x08, 0xdf, 0x2c, 0xc3, 0xc8,
-  0x98, 0x4c, 0x60, 0x35, 0x18, 0xa8, 0xa0, 0x10, 0x1f, 0x0b, 0x1c, 0xfa,
-  0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e,
-  0x45, 0xc0, 0xa2, 0xa0, 0xc3, 0x0d, 0x81, 0x2b, 0x0a, 0x60, 0x30, 0xcb,
-  0x40, 0x32, 0x25, 0x13, 0x98, 0x0c, 0x0a, 0x43, 0x7c, 0x66, 0x09, 0x4c,
-  0xc6, 0x88, 0x1a, 0x14, 0xe0, 0x33, 0x4b, 0x60, 0x32, 0x03, 0x2d, 0x8f,
-  0x06, 0x32, 0x58, 0xc8, 0x10, 0x24, 0x23, 0x94, 0x8c, 0x1f, 0x0a, 0x22,
-  0x73, 0xc1, 0x30, 0x46, 0x83, 0x02, 0x0e, 0x0a, 0xf1, 0x19, 0x8e, 0x20,
-  0x95, 0x1c, 0x14, 0x88, 0x6f, 0x96, 0xe1, 0x64, 0x54, 0x26, 0x30, 0x1d,
-  0x14, 0x4a, 0x25, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e,
-  0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xf0, 0x45, 0x41, 0x87,
-  0x1b, 0x02, 0x5e, 0x14, 0xc0, 0x60, 0x96, 0x01, 0x65, 0x52, 0x26, 0xb0,
-  0x41, 0x0c, 0x05, 0xf8, 0xcc, 0x12, 0xb8, 0x8c, 0xfd, 0xa0, 0x40, 0xc4,
-  0x67, 0x96, 0xc0, 0x65, 0x86, 0x23, 0x5e, 0x05, 0x0c, 0x05, 0xe1, 0x9b,
-  0x65, 0x58, 0x19, 0x97, 0x09, 0x0c, 0x56, 0xc2, 0x50, 0x88, 0x8f, 0x05,
-  0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45,
-  0x10, 0x9f, 0x22, 0xd2, 0x51, 0xd0, 0xe1, 0x86, 0xe0, 0x1c, 0x05, 0x30,
-  0x98, 0x65, 0x60, 0x99, 0x96, 0x09, 0x2c, 0x0d, 0x85, 0x21, 0x3e, 0xb3,
-  0x04, 0x2e, 0x63, 0x84, 0x1b, 0x0a, 0xf0, 0x99, 0x25, 0x70, 0x99, 0x81,
-  0x96, 0x47, 0x43, 0x19, 0x2c, 0x65, 0x08, 0x96, 0x11, 0x5a, 0x86, 0xae,
-  0x54, 0xe6, 0x82, 0x61, 0x2e, 0x78, 0xea, 0xb6, 0xa7, 0x4e, 0x07, 0x85,
-  0x61, 0x2e, 0xee, 0x83, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c,
-  0x0e, 0x00, 0x04, 0xc1, 0xc0, 0xbb, 0x47, 0xe1, 0x1c, 0x05, 0x53, 0x14,
-  0xe8, 0x51, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18,
-  0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0xd8, 0xfc, 0x51, 0x70, 0x47, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04,
-  0x00, 0x41, 0x30, 0xd8, 0xfe, 0x51, 0x78, 0x47, 0x21, 0x21, 0x82, 0x11,
-  0x03, 0x04, 0x00, 0x41, 0x30, 0xd8, 0x40, 0x52, 0x80, 0x47, 0x21, 0x21,
-  0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0x50, 0x52, 0x78, 0x47,
-  0x01, 0x16, 0x85, 0x60, 0x1f, 0x05, 0x70, 0x14, 0xfa, 0x51, 0x18, 0x4d,
-  0x08, 0x80, 0x0b, 0x9e, 0x9a, 0x25, 0x78, 0x99, 0x81, 0x96, 0xc7, 0x34,
-  0xf4, 0x45, 0x34, 0x89, 0x7c, 0x61, 0x09, 0x7e, 0x11, 0x5c, 0x46, 0x34,
-  0x89, 0x7e, 0x99, 0x65, 0x80, 0x19, 0x99, 0x11, 0xd5, 0x60, 0x38, 0x42,
-  0xf6, 0xc4, 0x51, 0x18, 0xbe, 0x9b, 0xbd, 0x61, 0x86, 0x1b, 0x82, 0x56,
-  0x14, 0xc8, 0xa0, 0x86, 0x40, 0x87, 0x23, 0xe6, 0xc5, 0x1c, 0x85, 0xe1,
-  0xab, 0x40, 0xd0, 0xab, 0x97, 0x61, 0x86, 0x1b, 0x02, 0x58, 0x14, 0xc8,
-  0xa0, 0x82, 0x41, 0x67, 0x19, 0x62, 0xc6, 0x6c, 0x82, 0xa3, 0x43, 0x61,
-  0x98, 0x5b, 0xfd, 0x60, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf0,
-  0x5e, 0x52, 0xf8, 0x47, 0xc1, 0x17, 0x05, 0x96, 0x14, 0x46, 0x13, 0x02,
-  0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a,
-  0x38, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x36, 0x9b, 0x14, 0x4c,
-  0x52, 0x38, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0x9b,
-  0x14, 0x4e, 0x52, 0x60, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
-  0x36, 0x9c, 0x14, 0x50, 0x52, 0x90, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40,
-  0x10, 0x0c, 0x16, 0xb0, 0x14, 0x4e, 0x52, 0x40, 0x47, 0x21, 0x98, 0x49,
-  0x01, 0x1f, 0x85, 0x9a, 0x14, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xa7, 0x66,
-  0x09, 0xcc, 0x66, 0xb8, 0x41, 0x57, 0x83, 0x9c, 0x14, 0xc0, 0x60, 0x96,
-  0x61, 0x66, 0x68, 0x26, 0xa8, 0x71, 0x14, 0x54, 0x52, 0x80, 0x0b, 0x9e,
-  0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x4a, 0x2c, 0x85, 0x95, 0x14,
-  0xd8, 0x8f, 0x1f, 0x85, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xc6,
-  0x52, 0x58, 0x49, 0x21, 0x10, 0x2e, 0x18, 0xa6, 0xcc, 0x51, 0x78, 0x49,
-  0x01, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0xb3,
-  0x14, 0x60, 0x52, 0x70, 0x99, 0x90, 0x14, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x80, 0x42, 0x4b, 0x01, 0x26, 0x85, 0x40, 0xb8, 0x60, 0x98, 0x0b,
-  0x9e, 0xba, 0xe3, 0xa9, 0x7b, 0x45, 0x61, 0x98, 0x33, 0xff, 0x60, 0x98,
-  0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf0,
-  0xd8, 0x52, 0xe0, 0x49, 0x61, 0x1f, 0x85, 0xb4, 0x14, 0x46, 0x13, 0x02,
-  0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a,
-  0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0xb9, 0x14, 0xc6,
-  0x52, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x36, 0xba,
-  0x14, 0xc8, 0x52, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
-  0xb6, 0xba, 0x14, 0xca, 0x52, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40,
-  0x10, 0x0c, 0x96, 0xbe, 0x14, 0xc8, 0x52, 0x28, 0x49, 0x21, 0x80, 0x4b,
-  0xa1, 0x26, 0x05, 0xb9, 0x14, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xa7, 0x66,
-  0x09, 0xcc, 0x66, 0xb8, 0xe1, 0x5e, 0x83, 0xba, 0x14, 0xc0, 0x60, 0x96,
-  0xa1, 0x66, 0xcc, 0x26, 0xb0, 0x7b, 0x14, 0xf2, 0x51, 0x88, 0xcf, 0x70,
-  0x04, 0x0a, 0x06, 0xfa, 0x28, 0x10, 0xdf, 0x2c, 0x83, 0xcd, 0xe4, 0x4c,
-  0x60, 0xfb, 0x28, 0xa4, 0x60, 0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30,
-  0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xfc,
-  0xa5, 0xa0, 0xc3, 0x0d, 0x41, 0x5f, 0x0a, 0x60, 0x30, 0xcb, 0x70, 0x33,
-  0x38, 0x13, 0xd8, 0x30, 0x92, 0x02, 0x7c, 0x66, 0x09, 0x7a, 0xc6, 0x44,
-  0x52, 0x20, 0xe2, 0x33, 0x4b, 0xd0, 0x33, 0xc3, 0x11, 0x33, 0x18, 0x8c,
-  0xa4, 0x20, 0x7c, 0xb3, 0x0c, 0x3a, 0xd3, 0x33, 0x81, 0xd1, 0x60, 0x40,
-  0x92, 0x42, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65,
-  0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xa1, 0x9a, 0x82, 0x0e, 0x37,
-  0x04, 0xa8, 0x29, 0x80, 0xc1, 0x2c, 0xc3, 0xce, 0xf0, 0x4c, 0x60, 0x2c,
-  0x29, 0x0c, 0xf1, 0x99, 0x25, 0xe8, 0x19, 0x23, 0x5e, 0x52, 0x80, 0xcf,
-  0x2c, 0x41, 0xcf, 0x0c, 0xb4, 0x3c, 0xda, 0xcd, 0x60, 0x38, 0x43, 0xec,
-  0x8c, 0xc0, 0x33, 0x7c, 0x28, 0xe4, 0xcc, 0x05, 0xc3, 0x98, 0x4b, 0x0a,
-  0x32, 0x29, 0xc4, 0x67, 0x38, 0xc2, 0x6f, 0x66, 0x52, 0x20, 0xbe, 0x59,
-  0x06, 0x9f, 0x09, 0x9b, 0xc0, 0x68, 0x52, 0xf8, 0x9b, 0xf8, 0x58, 0x30,
-  0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04,
-  0xf1, 0x29, 0x02, 0x37, 0x05, 0x1d, 0x6e, 0x08, 0x6c, 0x53, 0x00, 0x83,
-  0x59, 0x86, 0x9f, 0x01, 0x9b, 0xc0, 0x06, 0x9e, 0x14, 0xe0, 0x33, 0x4b,
-  0x50, 0x36, 0x96, 0x93, 0x02, 0x11, 0x9f, 0x59, 0x82, 0xb2, 0x19, 0x8e,
-  0x48, 0x1d, 0x9d, 0x14, 0x84, 0x6f, 0x96, 0x41, 0x6c, 0xca, 0x26, 0x30,
-  0xd5, 0xd9, 0x49, 0x21, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b,
-  0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x18, 0x4f, 0x41,
-  0x87, 0x1b, 0x82, 0xf0, 0x14, 0xc0, 0x60, 0x96, 0x61, 0x6c, 0xc8, 0x26,
-  0xb0, 0xb1, 0x14, 0x86, 0xf8, 0xcc, 0x12, 0x94, 0x8d, 0x11, 0x68, 0x29,
-  0xc0, 0x67, 0x96, 0xa0, 0x6c, 0x06, 0x5a, 0x1e, 0xed, 0x67, 0x30, 0xb0,
-  0x21, 0xc6, 0x46, 0x20, 0x1b, 0xb4, 0x0b, 0x9b, 0x0b, 0x86, 0xb9, 0xe0,
-  0xa9, 0xdb, 0x9e, 0x3a, 0x9a, 0x14, 0x86, 0xb9, 0x35, 0x14, 0x86, 0x39,
-  0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x2f,
-  0x3e, 0x85, 0xf0, 0x14, 0x40, 0x53, 0x70, 0x4f, 0x61, 0x34, 0x21, 0x00,
-  0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88,
-  0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0xc3, 0x4f, 0x01, 0x3d,
-  0x85, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60, 0xcb, 0x4f,
-  0x21, 0x3d, 0x85, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x60,
-  0xd3, 0x4f, 0x41, 0x3d, 0x85, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04,
-  0xc1, 0x60, 0x11, 0x51, 0x21, 0x3d, 0x05, 0xd5, 0x14, 0x82, 0xfa, 0x14,
-  0x74, 0x53, 0xb8, 0x4f, 0x61, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6a, 0x96,
-  0xc0, 0x6c, 0x06, 0x5a, 0x1e, 0xd3, 0x88, 0x19, 0xbb, 0x25, 0x60, 0x86,
-  0x25, 0x66, 0x46, 0x28, 0x1b, 0xbb, 0x25, 0x68, 0xc6, 0xfe, 0x36, 0x88,
-  0x4d, 0x01, 0x3e, 0xb3, 0x0c, 0x67, 0x93, 0x36, 0x7d, 0x1b, 0x0c, 0x47,
-  0x84, 0x6e, 0xd0, 0x9b, 0xc2, 0xf0, 0x9d, 0xe8, 0x06, 0xc3, 0x0c, 0x37,
-  0x04, 0xa8, 0x29, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0x14, 0xe1, 0x29,
-  0x0c, 0x5f, 0x05, 0x82, 0xde, 0x31, 0xcc, 0x70, 0x43, 0xb0, 0x9a, 0x02,
-  0x19, 0x54, 0x30, 0xe8, 0x2c, 0x03, 0xda, 0xf4, 0x4d, 0x70, 0x6f, 0x29,
-  0x0c, 0x73, 0xa6, 0x28, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x9e, 0x8a, 0x0a, 0xfa, 0x29, 0xe4, 0xa6, 0x70, 0xa2, 0xc2, 0x68, 0x42,
-  0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43,
-  0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x16, 0xa3, 0x42,
-  0x88, 0x0a, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x26,
-  0xa3, 0x82, 0x88, 0x0a, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0xc1, 0x36, 0xa3, 0xc2, 0x88, 0x0a, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00,
-  0x08, 0x82, 0xc1, 0xb2, 0xa3, 0x82, 0x88, 0x0a, 0xe3, 0x29, 0x04, 0x2e,
-  0x2a, 0xcc, 0xa7, 0x00, 0xa3, 0xc2, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd4,
-  0x2c, 0x41, 0xdf, 0x0c, 0x37, 0xd4, 0x6e, 0x40, 0xa3, 0x02, 0x18, 0xcc,
-  0x32, 0xa8, 0xcd, 0xda, 0x04, 0xe5, 0x9b, 0x42, 0x89, 0x0a, 0x70, 0xc1,
-  0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xf5, 0xa8, 0x60, 0xa2,
-  0xc2, 0xee, 0x06, 0xf7, 0x29, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01,
-  0xe5, 0xa3, 0x82, 0x89, 0x0a, 0x81, 0x70, 0xc1, 0x30, 0x15, 0x9e, 0x82,
-  0x8a, 0x0a, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40,
-  0x89, 0xa9, 0xb0, 0xa2, 0x02, 0x18, 0xf0, 0xa7, 0x30, 0x62, 0x70, 0x00,
-  0x20, 0x08, 0x06, 0xd4, 0x98, 0x0a, 0x2b, 0x2a, 0x04, 0xc2, 0x05, 0xc3,
-  0x5c, 0xf0, 0xd4, 0x1d, 0x4f, 0x9d, 0x6a, 0x0a, 0xc3, 0x5c, 0x38, 0x0a,
+  0xe2, 0x53, 0x84, 0xaf, 0x0a, 0x3a, 0xdc, 0x10, 0xf0, 0xaa, 0x00, 0x06,
+  0xb3, 0x0c, 0x7a, 0xb3, 0x37, 0x81, 0xf9, 0xa8, 0x30, 0xc4, 0x67, 0x96,
+  0x80, 0x6f, 0x8c, 0x18, 0x53, 0x01, 0x3e, 0xb3, 0x04, 0x7c, 0x33, 0xd0,
+  0xf2, 0x68, 0x76, 0x83, 0xdd, 0x0d, 0xa1, 0x37, 0xc2, 0xde, 0xb0, 0x06,
+  0xde, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf6, 0xd4, 0xbd, 0xa8, 0x30,
+  0xcc, 0x99, 0xa5, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1,
+  0x01, 0x80, 0x20, 0x18, 0x78, 0xec, 0x2a, 0xf0, 0xaa, 0xb0, 0xa7, 0x42,
+  0xba, 0x0a, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83,
+  0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08,
+  0x06, 0xdb, 0xbc, 0x0a, 0xe3, 0x2a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00,
+  0x20, 0x08, 0x06, 0x1b, 0xbd, 0x0a, 0xe4, 0x2a, 0x24, 0x44, 0x30, 0x62,
+  0x80, 0x00, 0x20, 0x08, 0x06, 0x5b, 0xbd, 0x0a, 0xe5, 0x2a, 0x24, 0x44,
+  0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x4b, 0xbf, 0x0a, 0xe4, 0x2a,
+  0x94, 0xaa, 0x10, 0xc0, 0xab, 0x50, 0xab, 0x82, 0xbc, 0x0a, 0xa3, 0x09,
+  0x01, 0x70, 0xc1, 0x53, 0xb3, 0x04, 0x7d, 0x33, 0xd0, 0xf2, 0x98, 0x06,
+  0xda, 0xb0, 0x35, 0x71, 0x36, 0x2c, 0xa1, 0x36, 0x02, 0xdf, 0xb0, 0x35,
+  0xb1, 0x36, 0xb3, 0x0c, 0x7e, 0x03, 0x3a, 0x77, 0x1c, 0x0c, 0x47, 0xf0,
+  0x71, 0x70, 0xab, 0xc2, 0xf0, 0x5d, 0x1f, 0x07, 0xc3, 0x0c, 0x37, 0x04,
+  0xa2, 0x2a, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0xfc, 0xc3, 0xae, 0x0a,
+  0xc3, 0x57, 0x81, 0xa0, 0x17, 0x12, 0xc3, 0x0c, 0x37, 0x04, 0xa5, 0x2a,
+  0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xfc, 0x0d, 0xed, 0x04, 0x97, 0xa6,
+  0xc2, 0x30, 0x07, 0x9a, 0xc2, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
+  0xe0, 0x91, 0xac, 0x40, 0xaf, 0xc2, 0xac, 0x0a, 0x21, 0x2b, 0x8c, 0x26,
+  0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31,
+  0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x2b, 0x2b,
+  0xec, 0xab, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c,
+  0x2c, 0x2b, 0xf0, 0xab, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20,
+  0x18, 0x6c, 0x2d, 0x2b, 0xf4, 0xab, 0x20, 0x11, 0xc1, 0x88, 0x81, 0x02,
+  0x80, 0x20, 0x18, 0x2c, 0x35, 0x2b, 0xf0, 0xab, 0xd0, 0xab, 0x42, 0x80,
+  0xb2, 0x42, 0xbb, 0x0a, 0x2a, 0x2b, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x4f,
+  0xcd, 0x12, 0xd0, 0xce, 0x70, 0xc3, 0x2b, 0x07, 0x2e, 0x2b, 0x80, 0xc1,
+  0x2c, 0x43, 0xe8, 0x88, 0x4e, 0x50, 0xb8, 0x2a, 0xfc, 0xab, 0x00, 0x17,
+  0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xd4, 0xcd, 0x0a, 0x20,
+  0x2b, 0xd8, 0x72, 0x10, 0xaf, 0xc2, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
+  0x50, 0x38, 0x2b, 0x80, 0xac, 0x10, 0x08, 0x17, 0x0c, 0x53, 0xbb, 0x2a,
+  0x90, 0xac, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
+  0x14, 0xcf, 0x0a, 0x25, 0x2b, 0xe8, 0x84, 0xbd, 0x0a, 0x23, 0x06, 0x07,
+  0x00, 0x82, 0x60, 0x40, 0xf5, 0xac, 0x50, 0xb2, 0x42, 0x20, 0x5c, 0x30,
+  0xcc, 0x05, 0x4f, 0xdd, 0xf1, 0xd4, 0x91, 0xaa, 0x30, 0xcc, 0xed, 0xa6,
+  0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20,
+  0x18, 0x78, 0x61, 0x2b, 0xc4, 0xac, 0x00, 0xaf, 0x82, 0xcf, 0x0a, 0xa3,
+  0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40,
+  0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x1b, 0xda,
+  0x0a, 0x38, 0x2b, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
+  0x5b, 0xda, 0x0a, 0x39, 0x2b, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20,
+  0x08, 0x06, 0x9b, 0xda, 0x0a, 0x3a, 0x2b, 0x24, 0x44, 0x30, 0x62, 0xa0,
+  0x00, 0x20, 0x08, 0x06, 0x8b, 0xdc, 0x0a, 0x39, 0x2b, 0xe8, 0xab, 0x10,
+  0x94, 0xad, 0xa0, 0xb2, 0xc2, 0xd9, 0x0a, 0xa3, 0x09, 0x01, 0x70, 0xc1,
+  0x53, 0xb3, 0x04, 0xb4, 0x33, 0xdc, 0xc0, 0xce, 0x81, 0xda, 0x0a, 0x60,
+  0x30, 0xcb, 0x30, 0x3a, 0xb4, 0x13, 0x18, 0xbb, 0x0a, 0xee, 0x2a, 0xc4,
+  0x67, 0x38, 0x42, 0x9e, 0x83, 0x77, 0x15, 0x88, 0x6f, 0x96, 0x81, 0x74,
+  0x4e, 0x27, 0x30, 0x78, 0x15, 0xe6, 0x39, 0x88, 0x8f, 0x05, 0x03, 0x7d,
+  0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f,
+  0x22, 0xe8, 0x56, 0xd0, 0xe1, 0x86, 0x40, 0x6e, 0x05, 0x30, 0x98, 0x65,
+  0x28, 0x1d, 0xd3, 0x09, 0x6c, 0xc0, 0x57, 0x01, 0x3e, 0xb3, 0x04, 0xab,
+  0x63, 0xf7, 0x2a, 0x10, 0xf1, 0x99, 0x25, 0x58, 0x9d, 0xe1, 0x88, 0x7e,
+  0x0e, 0xf0, 0x55, 0x10, 0xbe, 0x59, 0x06, 0xd4, 0x59, 0x9d, 0xc0, 0xfc,
+  0x39, 0xc8, 0x57, 0x21, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b,
+  0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xf8, 0x5b, 0x41,
+  0x87, 0x1b, 0x82, 0xbe, 0x15, 0xc0, 0x60, 0x96, 0x21, 0x75, 0x54, 0x27,
+  0xb0, 0x90, 0x15, 0x86, 0xf8, 0xcc, 0x12, 0xac, 0x8e, 0x11, 0x24, 0x2b,
+  0xc0, 0x67, 0x96, 0x60, 0x75, 0x06, 0x5a, 0x1e, 0xad, 0x74, 0x30, 0xd3,
+  0x21, 0x52, 0x47, 0x50, 0x1d, 0x18, 0x1d, 0x4e, 0xe7, 0x82, 0x61, 0x6c,
+  0x64, 0x85, 0x93, 0x15, 0xe2, 0x33, 0x1c, 0xa1, 0x1a, 0x28, 0x2b, 0x10,
+  0xdf, 0x2c, 0x03, 0xeb, 0xbc, 0x4e, 0x60, 0x29, 0x2b, 0xac, 0x46, 0x7c,
+  0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f,
+  0x2b, 0x82, 0xf8, 0x14, 0xd1, 0xba, 0x82, 0x0e, 0x37, 0x04, 0xab, 0x2b,
+  0x80, 0xc1, 0x2c, 0x43, 0xeb, 0xb8, 0x4e, 0x60, 0x43, 0xcc, 0x0a, 0xf0,
+  0x99, 0x25, 0x98, 0x1d, 0x73, 0x59, 0x81, 0x88, 0xcf, 0x2c, 0xc1, 0xec,
+  0x0c, 0x47, 0xd4, 0xc6, 0xcb, 0x0a, 0xc2, 0x37, 0xcb, 0x00, 0x3b, 0xb3,
+  0x13, 0x98, 0x6d, 0xc0, 0xac, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30,
+  0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xe0,
+  0xae, 0xa0, 0xc3, 0x0d, 0x81, 0xed, 0x0a, 0x60, 0x30, 0xcb, 0x10, 0x3b,
+  0xb2, 0x13, 0x18, 0xce, 0x0a, 0x43, 0x7c, 0x66, 0x09, 0x66, 0xc7, 0x88,
+  0x9e, 0x15, 0xe0, 0x33, 0x4b, 0x30, 0x3b, 0x03, 0x2d, 0x8f, 0xd6, 0x3a,
+  0x98, 0xeb, 0x10, 0xb1, 0x23, 0xc8, 0x0e, 0xe8, 0xbc, 0xce, 0x05, 0xc3,
+  0x5c, 0xf0, 0xd4, 0x6d, 0x4f, 0x5d, 0xca, 0x0a, 0xc3, 0x1c, 0x98, 0x0a,
   0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82,
-  0x81, 0x77, 0xa6, 0xc2, 0x8d, 0x0a, 0xf6, 0x29, 0x90, 0xa9, 0x30, 0x9a,
+  0x81, 0x67, 0xbe, 0x82, 0xed, 0x0a, 0x75, 0x2b, 0x8c, 0xaf, 0x30, 0x9a,
   0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4,
-  0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xb9, 0xa9,
-  0xe0, 0xa3, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0,
-  0xbd, 0xa9, 0xf0, 0xa3, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0xb0, 0xc1, 0xa9, 0x00, 0xa6, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a,
-  0x00, 0x82, 0x60, 0xb0, 0xe0, 0xa9, 0xf0, 0xa3, 0x02, 0x88, 0x0a, 0xc1,
-  0x9a, 0x0a, 0x30, 0x2a, 0xb4, 0xa9, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c,
-  0x35, 0x4b, 0xd0, 0x37, 0xc3, 0x0d, 0xf2, 0x1b, 0xc0, 0xa9, 0x00, 0x06,
-  0xb3, 0x0c, 0x6c, 0xd3, 0x37, 0x81, 0xc9, 0xa7, 0x40, 0x9f, 0x42, 0x7c,
-  0x86, 0x23, 0xee, 0x37, 0xa8, 0x4f, 0x81, 0xf8, 0x66, 0x19, 0xda, 0x06,
-  0x6e, 0x02, 0xb3, 0x4f, 0x01, 0x7f, 0x83, 0xf8, 0x58, 0x30, 0xd0, 0xe7,
-  0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29,
-  0x42, 0x4f, 0x05, 0x1d, 0x6e, 0x08, 0xf0, 0x54, 0x00, 0x83, 0x59, 0x06,
-  0xb7, 0x79, 0x9b, 0xc0, 0x06, 0xff, 0x14, 0xe0, 0x33, 0x4b, 0x40, 0x37,
-  0xd6, 0x9f, 0x02, 0x11, 0x9f, 0x59, 0x02, 0xba, 0x19, 0x8e, 0x10, 0xe1,
-  0xc0, 0x3f, 0x05, 0xe1, 0x9b, 0x65, 0x88, 0x1b, 0xba, 0x09, 0x6c, 0x84,
-  0x83, 0xff, 0x14, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0,
-  0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x52, 0x15, 0x74,
-  0xb8, 0x21, 0x18, 0x55, 0x01, 0x0c, 0x66, 0x19, 0xe4, 0x66, 0x6e, 0x02,
-  0x3b, 0x51, 0x61, 0x88, 0xcf, 0x2c, 0x01, 0xdd, 0x18, 0xa1, 0xa2, 0x02,
-  0x7c, 0x66, 0x09, 0xe8, 0x66, 0xa0, 0xe5, 0xd1, 0xdc, 0x06, 0x7b, 0x1b,
-  0x42, 0x6e, 0x84, 0xb9, 0xd1, 0xc1, 0x01, 0x6e, 0x2e, 0x18, 0xc6, 0x52,
-  0x54, 0x68, 0x51, 0x21, 0x3e, 0xc3, 0x11, 0xb0, 0xe0, 0xa2, 0x02, 0xf1,
-  0xcd, 0x32, 0xd4, 0x0d, 0xde, 0x04, 0xf6, 0xa2, 0x42, 0x2c, 0xc4, 0xc7,
-  0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1,
-  0x22, 0x88, 0x4f, 0x11, 0xb3, 0x2a, 0xe8, 0x70, 0x43, 0x10, 0xab, 0x02,
-  0x18, 0xcc, 0x32, 0xd8, 0xcd, 0xdd, 0x04, 0x36, 0xdc, 0xa8, 0x00, 0x9f,
-  0x59, 0x02, 0xbe, 0x31, 0x1a, 0x15, 0x88, 0xf8, 0xcc, 0x12, 0xf0, 0xcd,
-  0x70, 0xc4, 0x2e, 0xd4, 0xa8, 0x20, 0x7c, 0xb3, 0x0c, 0x79, 0xc3, 0x37,
-  0x81, 0xf1, 0x82, 0x8d, 0x0a, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3,
-  0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84, 0xaf,
-  0x0a, 0x3a, 0xdc, 0x10, 0xf0, 0xaa, 0x00, 0x06, 0xb3, 0x0c, 0x7a, 0xb3,
-  0x37, 0x81, 0xf9, 0xa8, 0x30, 0xc4, 0x67, 0x96, 0x80, 0x6f, 0x8c, 0x18,
-  0x53, 0x01, 0x3e, 0xb3, 0x04, 0x7c, 0x33, 0xd0, 0xf2, 0x68, 0x76, 0x83,
-  0xdd, 0x0d, 0xa1, 0x37, 0xc2, 0xde, 0xb0, 0x06, 0xde, 0x5c, 0x30, 0xcc,
-  0x05, 0x4f, 0xdd, 0xf6, 0xd4, 0xbd, 0xa8, 0x30, 0xcc, 0x99, 0xa5, 0x30,
-  0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
-  0x78, 0xec, 0x2a, 0xf0, 0xaa, 0xb0, 0xa7, 0x42, 0xba, 0x0a, 0xa3, 0x09,
-  0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c,
-  0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdb, 0xbc, 0x0a,
-  0xe3, 0x2a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x1b,
-  0xbd, 0x0a, 0xe4, 0x2a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x5b, 0xbd, 0x0a, 0xe5, 0x2a, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00,
-  0x20, 0x08, 0x06, 0x4b, 0xbf, 0x0a, 0xe4, 0x2a, 0x94, 0xaa, 0x10, 0xc0,
-  0xab, 0x50, 0xab, 0x82, 0xbc, 0x0a, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x53,
-  0xb3, 0x04, 0x7d, 0x33, 0xd0, 0xf2, 0x98, 0x06, 0xda, 0xb0, 0x35, 0x71,
-  0x36, 0x2c, 0xa1, 0x36, 0x02, 0xdf, 0xb0, 0x35, 0xb1, 0x36, 0xb3, 0x0c,
-  0x7e, 0x03, 0x3a, 0x77, 0x1c, 0x0c, 0x47, 0xf0, 0x71, 0x70, 0xab, 0xc2,
-  0xf0, 0x5d, 0x1f, 0x07, 0xc3, 0x0c, 0x37, 0x04, 0xa2, 0x2a, 0x90, 0x41,
-  0x0d, 0x81, 0x0e, 0x47, 0xfc, 0xc3, 0xae, 0x0a, 0xc3, 0x57, 0x81, 0xa0,
-  0x17, 0x12, 0xc3, 0x0c, 0x37, 0x04, 0xa5, 0x2a, 0x90, 0x41, 0x05, 0x83,
-  0xce, 0x32, 0xfc, 0x0d, 0xed, 0x04, 0x97, 0xa6, 0xc2, 0x30, 0x07, 0x9a,
-  0xc2, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xe0, 0x91, 0xac, 0x40,
-  0xaf, 0xc2, 0xac, 0x0a, 0x21, 0x2b, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82,
-  0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x2b, 0x2b, 0xec, 0xab, 0x70, 0x10,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x2c, 0x2b, 0xf0, 0xab,
-  0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x2d, 0x2b,
-  0xf4, 0xab, 0x20, 0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c,
-  0x35, 0x2b, 0xf0, 0xab, 0xd0, 0xab, 0x42, 0x80, 0xb2, 0x42, 0xbb, 0x0a,
-  0x2a, 0x2b, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x4f, 0xcd, 0x12, 0xd0, 0xce,
-  0x70, 0xc3, 0x2b, 0x07, 0x2e, 0x2b, 0x80, 0xc1, 0x2c, 0x43, 0xe8, 0x88,
-  0x4e, 0x50, 0xb8, 0x2a, 0xfc, 0xab, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70,
-  0x00, 0x20, 0x08, 0x06, 0xd4, 0xcd, 0x0a, 0x20, 0x2b, 0xd8, 0x72, 0x10,
-  0xaf, 0xc2, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0x38, 0x2b, 0x80,
-  0xac, 0x10, 0x08, 0x17, 0x0c, 0x53, 0xbb, 0x2a, 0x90, 0xac, 0x00, 0x17,
-  0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x14, 0xcf, 0x0a, 0x25,
-  0x2b, 0xe8, 0x84, 0xbd, 0x0a, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40,
-  0xf5, 0xac, 0x50, 0xb2, 0x42, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd,
-  0xf1, 0xd4, 0x91, 0xaa, 0x30, 0xcc, 0xed, 0xa6, 0x30, 0xcc, 0x11, 0xc3,
-  0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x78, 0x61, 0x2b,
-  0xc4, 0xac, 0x00, 0xaf, 0x82, 0xcf, 0x0a, 0xa3, 0x09, 0x01, 0x30, 0x9a,
+  0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xb5, 0xaf,
+  0xd0, 0xbb, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0,
+  0xb9, 0xaf, 0xe0, 0xbb, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
+  0x60, 0xb0, 0xbd, 0xaf, 0xf0, 0xbb, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a,
+  0x00, 0x82, 0x60, 0xb0, 0xdc, 0xaf, 0xe0, 0xbb, 0xc2, 0xdf, 0x0a, 0x81,
+  0xfa, 0x0a, 0xaf, 0x2b, 0xb0, 0xaf, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c,
+  0x35, 0x4b, 0x40, 0x3b, 0x03, 0x2d, 0x8f, 0x69, 0xfc, 0x0d, 0xd8, 0x13,
+  0x7e, 0xc3, 0x12, 0xa1, 0x23, 0xcc, 0x0e, 0xd8, 0x13, 0xa2, 0x33, 0xcb,
+  0x50, 0x3b, 0xb7, 0x13, 0xd7, 0xc1, 0x70, 0x04, 0xdf, 0x06, 0xb1, 0x2b,
+  0x0c, 0xdf, 0xf5, 0x6d, 0x30, 0xcc, 0x70, 0x43, 0xc0, 0xb7, 0x02, 0x19,
+  0xd4, 0x10, 0xe8, 0x70, 0x44, 0x7e, 0xd4, 0xae, 0x30, 0x7c, 0x15, 0x08,
+  0x7a, 0xfb, 0x31, 0xcc, 0x70, 0x43, 0xf0, 0xb7, 0x02, 0x19, 0x54, 0x30,
+  0xe8, 0x2c, 0x83, 0xed, 0xac, 0x4f, 0x70, 0x63, 0x2b, 0x0c, 0x73, 0x7a,
+  0x2a, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x9e, 0xff, 0x0a,
+  0xee, 0x2b, 0xb4, 0xae, 0xb0, 0xbf, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26,
+  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c,
+  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x56, 0xc2, 0x42, 0xfd, 0x0a, 0x07,
+  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x66, 0xc2, 0x82, 0xfd,
+  0x0a, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x76, 0xc2,
+  0xc2, 0xfd, 0x0a, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1,
+  0xf2, 0xc2, 0x82, 0xfd, 0x0a, 0xb7, 0x2b, 0x04, 0x22, 0x2c, 0x9c, 0xaf,
+  0x40, 0xc2, 0xc2, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd4, 0x2c, 0xc1, 0xfa,
+  0x0c, 0x37, 0xa4, 0x76, 0x80, 0xc2, 0x02, 0x18, 0xcc, 0x32, 0xe0, 0x4e,
+  0xee, 0x04, 0x25, 0xbb, 0x42, 0xfe, 0x0a, 0x70, 0xc1, 0x53, 0x23, 0x06,
+  0x07, 0x00, 0x82, 0x60, 0x40, 0xc5, 0xb0, 0xa0, 0xbf, 0x82, 0xed, 0x06,
+  0xeb, 0x2b, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x25, 0xc3, 0x82,
+  0xfe, 0x0a, 0x81, 0x70, 0xc1, 0x30, 0x55, 0xbb, 0x82, 0xff, 0x0a, 0x70,
+  0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xd9, 0xb0, 0xf0,
+  0xbf, 0x02, 0x8d, 0xc0, 0xaf, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
+  0xd4, 0x0d, 0x0b, 0xff, 0x2b, 0x04, 0xc2, 0x05, 0xc3, 0x5c, 0xf0, 0xd4,
+  0x1d, 0x4f, 0x9d, 0xdf, 0x0a, 0xc3, 0x5c, 0xad, 0x0a, 0xc3, 0x1c, 0x31,
+  0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0xb7, 0xc3,
+  0xc2, 0x0a, 0x0b, 0xea, 0x2b, 0xe0, 0xb0, 0x30, 0x9a, 0x10, 0x00, 0xa3,
+  0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22,
+  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0x89, 0xb1, 0x20, 0xc3, 0x42,
+  0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0x8d, 0xb1, 0x30,
+  0xc3, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0x91,
+  0xb1, 0x40, 0xc3, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60,
+  0xb0, 0xb0, 0xb1, 0x30, 0xc3, 0x02, 0xfd, 0x0a, 0xc1, 0x0f, 0x0b, 0x24,
+  0x2c, 0x84, 0xb1, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x35, 0x4b, 0xb0,
+  0x3e, 0xc3, 0x0d, 0xe6, 0x1d, 0x90, 0xb1, 0x00, 0x06, 0xb3, 0x0c, 0xba,
+  0xb3, 0x3e, 0x81, 0x99, 0xaf, 0x80, 0xbe, 0x42, 0x7c, 0x86, 0x23, 0xe4,
+  0x37, 0x48, 0x5f, 0x81, 0xf8, 0x66, 0x19, 0x76, 0xc7, 0x77, 0x02, 0x53,
+  0x5f, 0x61, 0x7e, 0x83, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e,
+  0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xc2, 0x8d, 0x05,
+  0x1d, 0x6e, 0x08, 0xd8, 0x58, 0x00, 0x83, 0x59, 0x06, 0xde, 0xe9, 0x9d,
+  0xc0, 0x06, 0xf9, 0x15, 0xe0, 0x33, 0x4b, 0x20, 0x3e, 0x16, 0xbf, 0x02,
+  0x11, 0x9f, 0x59, 0x02, 0xf1, 0x19, 0x8e, 0xe8, 0xdf, 0x40, 0x7e, 0x05,
+  0xe1, 0x9b, 0x65, 0xf8, 0x1d, 0xf1, 0x09, 0xcc, 0x7f, 0x83, 0xf9, 0x15,
+  0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22,
+  0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x3c, 0x16, 0x74, 0xb8, 0x21, 0xb8,
+  0x63, 0x01, 0x0c, 0x66, 0x19, 0xc0, 0x27, 0x7c, 0x02, 0xdb, 0x5f, 0x61,
+  0x88, 0xcf, 0x2c, 0x81, 0xf8, 0x18, 0xe1, 0xbf, 0x02, 0x7c, 0x66, 0x09,
+  0xc4, 0x67, 0xa0, 0xe5, 0xd1, 0x78, 0x07, 0xeb, 0x1d, 0x02, 0x7c, 0x84,
+  0xf0, 0x81, 0xc1, 0xc1, 0x77, 0x2e, 0x18, 0xc6, 0xfa, 0x57, 0x08, 0x61,
+  0x21, 0x3e, 0xc3, 0x11, 0xa4, 0x22, 0xc2, 0x02, 0xf1, 0xcd, 0x32, 0x8c,
+  0x8f, 0xf9, 0x04, 0x36, 0xc2, 0x42, 0xa9, 0xc4, 0xc7, 0x82, 0x81, 0x3e,
+  0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f,
+  0x11, 0xa7, 0x2c, 0xe8, 0x70, 0x43, 0x50, 0xca, 0x02, 0x18, 0xcc, 0x32,
+  0x90, 0x4f, 0xf9, 0x04, 0x36, 0xac, 0xb0, 0x00, 0x9f, 0x59, 0x02, 0xf5,
+  0x31, 0x14, 0x16, 0x88, 0xf8, 0xcc, 0x12, 0xa8, 0xcf, 0x70, 0xc4, 0xab,
+  0xa4, 0xb0, 0x20, 0x7c, 0xb3, 0x0c, 0xe7, 0xa3, 0x3e, 0x81, 0xc1, 0x8a,
+  0x0a, 0x0b, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94,
+  0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84, 0x2c, 0x0b, 0x3a, 0xdc,
+  0x10, 0xc0, 0xb2, 0x00, 0x06, 0xb3, 0x0c, 0xe8, 0x93, 0x3e, 0x81, 0xc9,
+  0xb0, 0x30, 0xc4, 0x67, 0x96, 0x40, 0x7d, 0x8c, 0xb8, 0x61, 0x01, 0x3e,
+  0xb3, 0x04, 0xea, 0x33, 0xd0, 0xf2, 0x68, 0xe4, 0x83, 0x95, 0x0f, 0x81,
+  0x3e, 0x42, 0xfa, 0xd0, 0x95, 0xf9, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd,
+  0xf6, 0xd4, 0x8d, 0xb0, 0x30, 0xcc, 0xe9, 0xac, 0x30, 0xcc, 0x11, 0xc3,
+  0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x78, 0xe0, 0x2c,
+  0xc0, 0xb2, 0xf0, 0xc6, 0x42, 0x2f, 0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a,
   0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x1b, 0xda, 0x0a, 0x38, 0x2b, 0x24,
-  0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5b, 0xda, 0x0a, 0x39,
-  0x2b, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x9b, 0xda,
-  0x0a, 0x3a, 0x2b, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06,
-  0x8b, 0xdc, 0x0a, 0x39, 0x2b, 0xe8, 0xab, 0x10, 0x94, 0xad, 0xa0, 0xb2,
-  0xc2, 0xd9, 0x0a, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x53, 0xb3, 0x04, 0xb4,
-  0x33, 0xdc, 0xc0, 0xce, 0x81, 0xda, 0x0a, 0x60, 0x30, 0xcb, 0x30, 0x3a,
-  0xb4, 0x13, 0x18, 0xbb, 0x0a, 0xee, 0x2a, 0xc4, 0x67, 0x38, 0x42, 0x9e,
-  0x83, 0x77, 0x15, 0x88, 0x6f, 0x96, 0x81, 0x74, 0x4e, 0x27, 0x30, 0x78,
-  0x15, 0xe6, 0x39, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82,
-  0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xe8, 0x56, 0xd0,
-  0xe1, 0x86, 0x40, 0x6e, 0x05, 0x30, 0x98, 0x65, 0x28, 0x1d, 0xd3, 0x09,
-  0x6c, 0xc0, 0x57, 0x01, 0x3e, 0xb3, 0x04, 0xab, 0x63, 0xf7, 0x2a, 0x10,
-  0xf1, 0x99, 0x25, 0x58, 0x9d, 0xe1, 0x88, 0x7e, 0x0e, 0xf0, 0x55, 0x10,
-  0xbe, 0x59, 0x06, 0xd4, 0x59, 0x9d, 0xc0, 0xfc, 0x39, 0xc8, 0x57, 0x21,
-  0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92,
-  0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xf8, 0x5b, 0x41, 0x87, 0x1b, 0x82, 0xbe,
-  0x15, 0xc0, 0x60, 0x96, 0x21, 0x75, 0x54, 0x27, 0xb0, 0x90, 0x15, 0x86,
-  0xf8, 0xcc, 0x12, 0xac, 0x8e, 0x11, 0x24, 0x2b, 0xc0, 0x67, 0x96, 0x60,
-  0x75, 0x06, 0x5a, 0x1e, 0xad, 0x74, 0x30, 0xd3, 0x21, 0x52, 0x47, 0x50,
-  0x1d, 0x18, 0x1d, 0x4e, 0xe7, 0x82, 0x61, 0x6c, 0x64, 0x85, 0x93, 0x15,
-  0xe2, 0x33, 0x1c, 0xa1, 0x1a, 0x28, 0x2b, 0x10, 0xdf, 0x2c, 0x03, 0xeb,
-  0xbc, 0x4e, 0x60, 0x29, 0x2b, 0xac, 0x46, 0x7c, 0x2c, 0x18, 0xe8, 0x73,
-  0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14,
-  0xd1, 0xba, 0x82, 0x0e, 0x37, 0x04, 0xab, 0x2b, 0x80, 0xc1, 0x2c, 0x43,
-  0xeb, 0xb8, 0x4e, 0x60, 0x43, 0xcc, 0x0a, 0xf0, 0x99, 0x25, 0x98, 0x1d,
-  0x73, 0x59, 0x81, 0x88, 0xcf, 0x2c, 0xc1, 0xec, 0x0c, 0x47, 0xd4, 0xc6,
-  0xcb, 0x0a, 0xc2, 0x37, 0xcb, 0x00, 0x3b, 0xb3, 0x13, 0x98, 0x6d, 0xc0,
-  0xac, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59,
-  0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xe0, 0xae, 0xa0, 0xc3, 0x0d,
-  0x81, 0xed, 0x0a, 0x60, 0x30, 0xcb, 0x10, 0x3b, 0xb2, 0x13, 0x18, 0xce,
-  0x0a, 0x43, 0x7c, 0x66, 0x09, 0x66, 0xc7, 0x88, 0x9e, 0x15, 0xe0, 0x33,
-  0x4b, 0x30, 0x3b, 0x03, 0x2d, 0x8f, 0xd6, 0x3a, 0x98, 0xeb, 0x10, 0xb1,
-  0x23, 0xc8, 0x0e, 0xe8, 0xbc, 0xce, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x6d,
-  0x4f, 0x5d, 0xca, 0x0a, 0xc3, 0x1c, 0x98, 0x0a, 0xc3, 0x1c, 0x31, 0xcc,
-  0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x67, 0xbe, 0x82,
-  0xed, 0x0a, 0x75, 0x2b, 0x8c, 0xaf, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09,
-  0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xb5, 0xaf, 0xd0, 0xbb, 0x42, 0x42,
-  0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xb9, 0xaf, 0xe0, 0xbb,
-  0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0xbd, 0xaf,
-  0xf0, 0xbb, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0,
-  0xdc, 0xaf, 0xe0, 0xbb, 0xc2, 0xdf, 0x0a, 0x81, 0xfa, 0x0a, 0xaf, 0x2b,
-  0xb0, 0xaf, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x35, 0x4b, 0x40, 0x3b,
-  0x03, 0x2d, 0x8f, 0x69, 0xfc, 0x0d, 0xd8, 0x13, 0x7e, 0xc3, 0x12, 0xa1,
-  0x23, 0xcc, 0x0e, 0xd8, 0x13, 0xa2, 0x33, 0xcb, 0x50, 0x3b, 0xb7, 0x13,
-  0xd7, 0xc1, 0x70, 0x04, 0xdf, 0x06, 0xb1, 0x2b, 0x0c, 0xdf, 0xf5, 0x6d,
-  0x30, 0xcc, 0x70, 0x43, 0xc0, 0xb7, 0x02, 0x19, 0xd4, 0x10, 0xe8, 0x70,
-  0x44, 0x7e, 0xd4, 0xae, 0x30, 0x7c, 0x15, 0x08, 0x7a, 0xfb, 0x31, 0xcc,
-  0x70, 0x43, 0xf0, 0xb7, 0x02, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0x83, 0xed,
-  0xac, 0x4f, 0x70, 0x63, 0x2b, 0x0c, 0x73, 0x7a, 0x2a, 0x0c, 0x33, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x9e, 0xff, 0x0a, 0xee, 0x2b, 0xb4, 0xae,
-  0xb0, 0xbf, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2,
-  0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0xc1, 0x56, 0xc2, 0x42, 0xfd, 0x0a, 0x07, 0x11, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0xc1, 0x66, 0xc2, 0x82, 0xfd, 0x0a, 0x0c, 0x11, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x76, 0xc2, 0xc2, 0xfd, 0x0a, 0x12,
-  0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0xf2, 0xc2, 0x82, 0xfd,
-  0x0a, 0xb7, 0x2b, 0x04, 0x22, 0x2c, 0x9c, 0xaf, 0x40, 0xc2, 0xc2, 0x68,
-  0x42, 0x00, 0x5c, 0xf0, 0xd4, 0x2c, 0xc1, 0xfa, 0x0c, 0x37, 0xa4, 0x76,
-  0x80, 0xc2, 0x02, 0x18, 0xcc, 0x32, 0xe0, 0x4e, 0xee, 0x04, 0x25, 0xbb,
-  0x42, 0xfe, 0x0a, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0x40, 0xc5, 0xb0, 0xa0, 0xbf, 0x82, 0xed, 0x06, 0xeb, 0x2b, 0x8c, 0x18,
-  0x1c, 0x00, 0x08, 0x82, 0x01, 0x25, 0xc3, 0x82, 0xfe, 0x0a, 0x81, 0x70,
-  0xc1, 0x30, 0x55, 0xbb, 0x82, 0xff, 0x0a, 0x70, 0xc1, 0x53, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x40, 0xd9, 0xb0, 0xf0, 0xbf, 0x02, 0x8d, 0xc0,
-  0xaf, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xd4, 0x0d, 0x0b, 0xff,
-  0x2b, 0x04, 0xc2, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x1d, 0x4f, 0x9d, 0xdf,
-  0x0a, 0xc3, 0x5c, 0xad, 0x0a, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0xb7, 0xc3, 0xc2, 0x0a, 0x0b, 0xea,
-  0x2b, 0xe0, 0xb0, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a,
+  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdb, 0x39, 0x0b, 0xb7, 0x2c, 0x24,
+  0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x1b, 0x3a, 0x0b, 0xb8,
+  0x2c, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5b, 0x3a,
+  0x0b, 0xb9, 0x2c, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06,
+  0x4b, 0x3c, 0x0b, 0xb8, 0x2c, 0xe4, 0xb1, 0x10, 0x90, 0xb3, 0x90, 0xca,
+  0x82, 0x39, 0x0b, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x53, 0xb3, 0x04, 0xeb,
+  0x33, 0xd0, 0xf2, 0x98, 0x86, 0xed, 0xd0, 0x64, 0x51, 0x3b, 0x2c, 0x81,
+  0x3b, 0x82, 0xfa, 0xd0, 0x64, 0x91, 0x3b, 0xb3, 0x0c, 0xec, 0xe3, 0x3e,
+  0x6b, 0x1e, 0x0c, 0x47, 0xec, 0x6d, 0xb0, 0xca, 0xc2, 0xf0, 0x1d, 0xdf,
+  0x06, 0xc3, 0x0c, 0x37, 0x04, 0x76, 0x2c, 0x90, 0x41, 0x0d, 0x81, 0x0e,
+  0x47, 0xcc, 0xcb, 0x2b, 0x0b, 0xc3, 0x57, 0x81, 0xa0, 0x57, 0x2f, 0xc3,
+  0x0c, 0x37, 0x04, 0x79, 0x2c, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xb4,
+  0x8f, 0x08, 0x05, 0xd7, 0xc3, 0xc2, 0x30, 0x47, 0xb7, 0xc2, 0x30, 0x23,
+  0x06, 0x07, 0x00, 0x82, 0x60, 0xe0, 0xe1, 0xb3, 0x80, 0xce, 0xc2, 0x29,
+  0x0b, 0xf5, 0x2c, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26,
+  0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80,
+  0x20, 0x18, 0x6c, 0xff, 0x2c, 0xbc, 0xb3, 0x70, 0x10, 0xc1, 0x88, 0x01,
+  0x02, 0x80, 0x20, 0x18, 0x6c, 0x20, 0x2d, 0xc0, 0xb3, 0xc0, 0x10, 0xc1,
+  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x21, 0x2d, 0xc4, 0xb3, 0x20,
+  0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x29, 0x2d, 0xc0,
+  0xb3, 0x10, 0xcb, 0x42, 0xc0, 0xcf, 0x42, 0x38, 0x0b, 0xfe, 0x2c, 0x8c,
+  0x26, 0x04, 0xc0, 0x05, 0x4f, 0xcd, 0x12, 0x88, 0xd0, 0x70, 0xc3, 0xa8,
+  0x07, 0x22, 0x2d, 0x80, 0xc1, 0x2c, 0xc3, 0xfb, 0xc0, 0x4f, 0x50, 0xac,
+  0x2c, 0xcc, 0xb3, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08,
+  0x06, 0xd4, 0x4a, 0x0b, 0xf4, 0x2c, 0xd4, 0x6e, 0x50, 0xce, 0xc2, 0x88,
+  0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0x2c, 0x2d, 0xd0, 0xb3, 0x10, 0x08,
+  0x17, 0x0c, 0x53, 0xaf, 0x2c, 0xe0, 0xb3, 0x00, 0x17, 0x3c, 0x35, 0x62,
+  0x70, 0x00, 0x20, 0x08, 0x06, 0x14, 0x4c, 0x0b, 0xf9, 0x2c, 0xb8, 0x8c,
+  0x3a, 0x0b, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xc5, 0xb4, 0x90,
+  0xcf, 0x42, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf1, 0xd4, 0xe1,
+  0xb1, 0x30, 0xcc, 0xbd, 0xae, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc,
+  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x78, 0x35, 0x2d, 0x94, 0xb4, 0x40,
+  0xce, 0x82, 0x4c, 0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3,
+  0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00,
+  0x20, 0x08, 0x06, 0x1b, 0x4f, 0x0b, 0x2c, 0x2d, 0x24, 0x44, 0x30, 0x62,
+  0x80, 0x00, 0x20, 0x08, 0x06, 0x5b, 0x4f, 0x0b, 0x2d, 0x2d, 0x24, 0x44,
+  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x9b, 0x4f, 0x0b, 0x2e, 0x2d,
+  0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x8b, 0x59, 0x0b,
+  0x2d, 0x2d, 0xb8, 0xb3, 0x10, 0xe4, 0xb4, 0xe0, 0xcf, 0xc2, 0x4e, 0x0b,
+  0xa3, 0x09, 0x01, 0x70, 0xc1, 0x53, 0xb3, 0x04, 0x22, 0x34, 0xdc, 0x00,
+  0xee, 0x81, 0x4f, 0x0b, 0x60, 0x30, 0xcb, 0x10, 0x3f, 0x22, 0x14, 0x18,
+  0x38, 0x0b, 0xe2, 0x2c, 0xc4, 0x67, 0x38, 0x22, 0x7e, 0x83, 0x71, 0x16,
+  0x88, 0x6f, 0x96, 0x41, 0x7e, 0xea, 0x27, 0x30, 0x72, 0x16, 0xe4, 0x37,
+  0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30,
+  0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xd0, 0x5a, 0xd0, 0xe1, 0x86, 0xc0,
+  0xac, 0x05, 0x30, 0x98, 0x65, 0x98, 0x1f, 0xfa, 0x09, 0x6c, 0x60, 0x67,
+  0x01, 0x3e, 0xb3, 0x04, 0xf9, 0x63, 0xeb, 0x2c, 0x10, 0xf1, 0x99, 0x25,
+  0xc8, 0x9f, 0xe1, 0x08, 0xfe, 0x0d, 0xd8, 0x59, 0x10, 0xbe, 0x59, 0x06,
+  0xfb, 0xc9, 0x9f, 0xc0, 0xfa, 0x37, 0x68, 0x67, 0x21, 0x3e, 0x16, 0x38,
+  0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41,
+  0x7c, 0x8a, 0x98, 0x6b, 0x41, 0x87, 0x1b, 0x82, 0xb8, 0x16, 0xc0, 0x60,
+  0x96, 0xe1, 0x7e, 0xf0, 0x27, 0xb0, 0x7a, 0x16, 0x86, 0xf8, 0xcc, 0x12,
+  0xe4, 0x8f, 0x11, 0xf8, 0x2c, 0xc0, 0x67, 0x96, 0x20, 0x7f, 0x06, 0x5a,
+  0x1e, 0x6d, 0x7e, 0x30, 0xfa, 0x21, 0xee, 0x47, 0xc0, 0x1f, 0x17, 0x1c,
+  0xea, 0xe7, 0x82, 0x61, 0xec, 0x9e, 0x85, 0x7d, 0x16, 0xe2, 0x33, 0x1c,
+  0xe1, 0x37, 0xfc, 0x2c, 0x10, 0xdf, 0x2c, 0x83, 0xfe, 0xf4, 0x4f, 0x60,
+  0xfd, 0x2c, 0xfc, 0x4d, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17,
+  0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x11, 0xda, 0x82,
+  0x0e, 0x37, 0x04, 0x7f, 0x2d, 0x80, 0xc1, 0x2c, 0xc3, 0xfe, 0xf0, 0x4f,
+  0x60, 0x43, 0x49, 0x0b, 0xf0, 0x99, 0x25, 0x08, 0x21, 0x13, 0x69, 0x81,
+  0x88, 0xcf, 0x2c, 0x41, 0x08, 0x0d, 0x47, 0xa4, 0xce, 0x48, 0x0b, 0xc2,
+  0x37, 0xcb, 0xe0, 0x3f, 0x21, 0x14, 0x98, 0xea, 0x90, 0xb4, 0x10, 0x1f,
+  0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7,
+  0x8a, 0x20, 0x3e, 0x45, 0xb0, 0xb6, 0xa0, 0xc3, 0x0d, 0x81, 0x6a, 0x0b,
+  0x60, 0x30, 0xcb, 0xf0, 0x3f, 0x20, 0x14, 0x18, 0x4b, 0x0b, 0x43, 0x7c,
+  0x66, 0x09, 0x42, 0xc8, 0x88, 0x98, 0x16, 0xe0, 0x33, 0x4b, 0x10, 0x42,
+  0x03, 0x2d, 0x8f, 0xb6, 0x3f, 0x18, 0xff, 0x10, 0xff, 0x23, 0x80, 0x10,
+  0xda, 0xf5, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x6d, 0x4f, 0x5d, 0x3f,
+  0x0b, 0xc3, 0x1c, 0x0d, 0x0b, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c,
+  0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0xa7, 0xdb, 0x82, 0x6a, 0x0b, 0x69,
+  0x2d, 0xdc, 0xb6, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a,
   0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0xb0, 0x89, 0xb1, 0x20, 0xc3, 0x42, 0x42, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0xb0, 0x8d, 0xb1, 0x30, 0xc3, 0x42, 0x42, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0x91, 0xb1, 0x40, 0xc3, 0x42,
-  0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xb0, 0xb1, 0x30,
-  0xc3, 0x02, 0xfd, 0x0a, 0xc1, 0x0f, 0x0b, 0x24, 0x2c, 0x84, 0xb1, 0x30,
-  0x9a, 0x10, 0x00, 0x17, 0x3c, 0x35, 0x4b, 0xb0, 0x3e, 0xc3, 0x0d, 0xe6,
-  0x1d, 0x90, 0xb1, 0x00, 0x06, 0xb3, 0x0c, 0xba, 0xb3, 0x3e, 0x81, 0x99,
-  0xaf, 0x80, 0xbe, 0x42, 0x7c, 0x86, 0x23, 0xe4, 0x37, 0x48, 0x5f, 0x81,
-  0xf8, 0x66, 0x19, 0x76, 0xc7, 0x77, 0x02, 0x53, 0x5f, 0x61, 0x7e, 0x83,
-  0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43,
-  0x3e, 0x56, 0x04, 0xf1, 0x29, 0xc2, 0x8d, 0x05, 0x1d, 0x6e, 0x08, 0xd8,
-  0x58, 0x00, 0x83, 0x59, 0x06, 0xde, 0xe9, 0x9d, 0xc0, 0x06, 0xf9, 0x15,
-  0xe0, 0x33, 0x4b, 0x20, 0x3e, 0x16, 0xbf, 0x02, 0x11, 0x9f, 0x59, 0x02,
-  0xf1, 0x19, 0x8e, 0xe8, 0xdf, 0x40, 0x7e, 0x05, 0xe1, 0x9b, 0x65, 0xf8,
-  0x1d, 0xf1, 0x09, 0xcc, 0x7f, 0x83, 0xf9, 0x15, 0xe2, 0x63, 0x81, 0x43,
-  0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4,
-  0xa7, 0x88, 0x3c, 0x16, 0x74, 0xb8, 0x21, 0xb8, 0x63, 0x01, 0x0c, 0x66,
-  0x19, 0xc0, 0x27, 0x7c, 0x02, 0xdb, 0x5f, 0x61, 0x88, 0xcf, 0x2c, 0x81,
-  0xf8, 0x18, 0xe1, 0xbf, 0x02, 0x7c, 0x66, 0x09, 0xc4, 0x67, 0xa0, 0xe5,
-  0xd1, 0x78, 0x07, 0xeb, 0x1d, 0x02, 0x7c, 0x84, 0xf0, 0x81, 0xc1, 0xc1,
-  0x77, 0x2e, 0x18, 0xc6, 0xfa, 0x57, 0x08, 0x61, 0x21, 0x3e, 0xc3, 0x11,
-  0xa4, 0x22, 0xc2, 0x02, 0xf1, 0xcd, 0x32, 0x8c, 0x8f, 0xf9, 0x04, 0x36,
-  0xc2, 0x42, 0xa9, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1,
-  0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xa7, 0x2c, 0xe8,
-  0x70, 0x43, 0x50, 0xca, 0x02, 0x18, 0xcc, 0x32, 0x90, 0x4f, 0xf9, 0x04,
-  0x36, 0xac, 0xb0, 0x00, 0x9f, 0x59, 0x02, 0xf5, 0x31, 0x14, 0x16, 0x88,
-  0xf8, 0xcc, 0x12, 0xa8, 0xcf, 0x70, 0xc4, 0xab, 0xa4, 0xb0, 0x20, 0x7c,
-  0xb3, 0x0c, 0xe7, 0xa3, 0x3e, 0x81, 0xc1, 0x8a, 0x0a, 0x0b, 0xf1, 0xb1,
-  0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac,
-  0x08, 0xe2, 0x53, 0x84, 0x2c, 0x0b, 0x3a, 0xdc, 0x10, 0xc0, 0xb2, 0x00,
-  0x06, 0xb3, 0x0c, 0xe8, 0x93, 0x3e, 0x81, 0xc9, 0xb0, 0x30, 0xc4, 0x67,
-  0x96, 0x40, 0x7d, 0x8c, 0xb8, 0x61, 0x01, 0x3e, 0xb3, 0x04, 0xea, 0x33,
-  0xd0, 0xf2, 0x68, 0xe4, 0x83, 0x95, 0x0f, 0x81, 0x3e, 0x42, 0xfa, 0xd0,
-  0x95, 0xf9, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf6, 0xd4, 0x8d, 0xb0,
-  0x30, 0xcc, 0xe9, 0xac, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88,
-  0xc1, 0x01, 0x80, 0x20, 0x18, 0x78, 0xe0, 0x2c, 0xc0, 0xb2, 0xf0, 0xc6,
-  0x42, 0x2f, 0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09,
-  0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0xdb, 0x39, 0x0b, 0xb7, 0x2c, 0x24, 0x44, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0x1b, 0x3a, 0x0b, 0xb8, 0x2c, 0x24, 0x44, 0x30,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5b, 0x3a, 0x0b, 0xb9, 0x2c, 0x24,
-  0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x4b, 0x3c, 0x0b, 0xb8,
-  0x2c, 0xe4, 0xb1, 0x10, 0x90, 0xb3, 0x90, 0xca, 0x82, 0x39, 0x0b, 0xa3,
-  0x09, 0x01, 0x70, 0xc1, 0x53, 0xb3, 0x04, 0xeb, 0x33, 0xd0, 0xf2, 0x98,
-  0x86, 0xed, 0xd0, 0x64, 0x51, 0x3b, 0x2c, 0x81, 0x3b, 0x82, 0xfa, 0xd0,
-  0x64, 0x91, 0x3b, 0xb3, 0x0c, 0xec, 0xe3, 0x3e, 0x6b, 0x1e, 0x0c, 0x47,
-  0xec, 0x6d, 0xb0, 0xca, 0xc2, 0xf0, 0x1d, 0xdf, 0x06, 0xc3, 0x0c, 0x37,
-  0x04, 0x76, 0x2c, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0xcc, 0xcb, 0x2b,
-  0x0b, 0xc3, 0x57, 0x81, 0xa0, 0x57, 0x2f, 0xc3, 0x0c, 0x37, 0x04, 0x79,
-  0x2c, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xb4, 0x8f, 0x08, 0x05, 0xd7,
-  0xc3, 0xc2, 0x30, 0x47, 0xb7, 0xc2, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82,
-  0x60, 0xe0, 0xe1, 0xb3, 0x80, 0xce, 0xc2, 0x29, 0x0b, 0xf5, 0x2c, 0x8c,
-  0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02,
-  0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xff,
-  0x2c, 0xbc, 0xb3, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x6c, 0x20, 0x2d, 0xc0, 0xb3, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x6c, 0x21, 0x2d, 0xc4, 0xb3, 0x20, 0x11, 0xc1, 0x88, 0x81,
-  0x02, 0x80, 0x20, 0x18, 0x2c, 0x29, 0x2d, 0xc0, 0xb3, 0x10, 0xcb, 0x42,
-  0xc0, 0xcf, 0x42, 0x38, 0x0b, 0xfe, 0x2c, 0x8c, 0x26, 0x04, 0xc0, 0x05,
-  0x4f, 0xcd, 0x12, 0x88, 0xd0, 0x70, 0xc3, 0xa8, 0x07, 0x22, 0x2d, 0x80,
-  0xc1, 0x2c, 0xc3, 0xfb, 0xc0, 0x4f, 0x50, 0xac, 0x2c, 0xcc, 0xb3, 0x00,
-  0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xd4, 0x4a, 0x0b,
-  0xf4, 0x2c, 0xd4, 0x6e, 0x50, 0xce, 0xc2, 0x88, 0xc1, 0x01, 0x80, 0x20,
-  0x18, 0x50, 0x2c, 0x2d, 0xd0, 0xb3, 0x10, 0x08, 0x17, 0x0c, 0x53, 0xaf,
-  0x2c, 0xe0, 0xb3, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x14, 0x4c, 0x0b, 0xf9, 0x2c, 0xb8, 0x8c, 0x3a, 0x0b, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x40, 0xc5, 0xb4, 0x90, 0xcf, 0x42, 0x20, 0x5c,
-  0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf1, 0xd4, 0xe1, 0xb1, 0x30, 0xcc, 0xbd,
-  0xae, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x78, 0x35, 0x2d, 0x94, 0xb4, 0x40, 0xce, 0x82, 0x4c, 0x0b,
-  0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a,
-  0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x1b,
-  0x4f, 0x0b, 0x2c, 0x2d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x5b, 0x4f, 0x0b, 0x2d, 0x2d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x9b, 0x4f, 0x0b, 0x2e, 0x2d, 0x24, 0x44, 0x30, 0x62,
-  0xa0, 0x00, 0x20, 0x08, 0x06, 0x8b, 0x59, 0x0b, 0x2d, 0x2d, 0xb8, 0xb3,
-  0x10, 0xe4, 0xb4, 0xe0, 0xcf, 0xc2, 0x4e, 0x0b, 0xa3, 0x09, 0x01, 0x70,
-  0xc1, 0x53, 0xb3, 0x04, 0x22, 0x34, 0xdc, 0x00, 0xee, 0x81, 0x4f, 0x0b,
-  0x60, 0x30, 0xcb, 0x10, 0x3f, 0x22, 0x14, 0x18, 0x38, 0x0b, 0xe2, 0x2c,
-  0xc4, 0x67, 0x38, 0x22, 0x7e, 0x83, 0x71, 0x16, 0x88, 0x6f, 0x96, 0x41,
-  0x7e, 0xea, 0x27, 0x30, 0x72, 0x16, 0xe4, 0x37, 0x88, 0x8f, 0x05, 0x03,
-  0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10,
-  0x9f, 0x22, 0xd0, 0x5a, 0xd0, 0xe1, 0x86, 0xc0, 0xac, 0x05, 0x30, 0x98,
-  0x65, 0x98, 0x1f, 0xfa, 0x09, 0x6c, 0x60, 0x67, 0x01, 0x3e, 0xb3, 0x04,
-  0xf9, 0x63, 0xeb, 0x2c, 0x10, 0xf1, 0x99, 0x25, 0xc8, 0x9f, 0xe1, 0x08,
-  0xfe, 0x0d, 0xd8, 0x59, 0x10, 0xbe, 0x59, 0x06, 0xfb, 0xc9, 0x9f, 0xc0,
-  0xfa, 0x37, 0x68, 0x67, 0x21, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98,
-  0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x98, 0x6b,
-  0x41, 0x87, 0x1b, 0x82, 0xb8, 0x16, 0xc0, 0x60, 0x96, 0xe1, 0x7e, 0xf0,
-  0x27, 0xb0, 0x7a, 0x16, 0x86, 0xf8, 0xcc, 0x12, 0xe4, 0x8f, 0x11, 0xf8,
-  0x2c, 0xc0, 0x67, 0x96, 0x20, 0x7f, 0x06, 0x5a, 0x1e, 0x6d, 0x7e, 0x30,
-  0xfa, 0x21, 0xee, 0x47, 0xc0, 0x1f, 0x17, 0x1c, 0xea, 0xe7, 0x82, 0x61,
-  0xec, 0x9e, 0x85, 0x7d, 0x16, 0xe2, 0x33, 0x1c, 0xe1, 0x37, 0xfc, 0x2c,
-  0x10, 0xdf, 0x2c, 0x83, 0xfe, 0xf4, 0x4f, 0x60, 0xfd, 0x2c, 0xfc, 0x4d,
-  0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x11, 0xda, 0x82, 0x0e, 0x37, 0x04, 0x7f,
-  0x2d, 0x80, 0xc1, 0x2c, 0xc3, 0xfe, 0xf0, 0x4f, 0x60, 0x43, 0x49, 0x0b,
-  0xf0, 0x99, 0x25, 0x08, 0x21, 0x13, 0x69, 0x81, 0x88, 0xcf, 0x2c, 0x41,
-  0x08, 0x0d, 0x47, 0xa4, 0xce, 0x48, 0x0b, 0xc2, 0x37, 0xcb, 0xe0, 0x3f,
-  0x21, 0x14, 0x98, 0xea, 0x90, 0xb4, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c,
-  0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45,
-  0xb0, 0xb6, 0xa0, 0xc3, 0x0d, 0x81, 0x6a, 0x0b, 0x60, 0x30, 0xcb, 0xf0,
-  0x3f, 0x20, 0x14, 0x18, 0x4b, 0x0b, 0x43, 0x7c, 0x66, 0x09, 0x42, 0xc8,
-  0x88, 0x98, 0x16, 0xe0, 0x33, 0x4b, 0x10, 0x42, 0x03, 0x2d, 0x8f, 0xb6,
-  0x3f, 0x18, 0xff, 0x10, 0xff, 0x23, 0x80, 0x10, 0xda, 0xf5, 0xcf, 0x05,
-  0xc3, 0x5c, 0xf0, 0xd4, 0x6d, 0x4f, 0x5d, 0x3f, 0x0b, 0xc3, 0x1c, 0x0d,
-  0x0b, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08,
-  0x82, 0x81, 0xa7, 0xdb, 0x82, 0x6a, 0x0b, 0x69, 0x2d, 0xdc, 0xb6, 0x30,
-  0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09,
-  0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0x85,
-  0xb7, 0x10, 0xdb, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0xb0, 0x89, 0xb7, 0x20, 0xdb, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0xb0, 0x8d, 0xb7, 0x30, 0xdb, 0x42, 0x42, 0x04, 0x23, 0x06,
-  0x0a, 0x00, 0x82, 0x60, 0xb0, 0xac, 0xb7, 0x20, 0xdb, 0xc2, 0x5c, 0x0b,
-  0x81, 0x6f, 0x0b, 0xa3, 0x2d, 0x80, 0xb7, 0x30, 0x9a, 0x10, 0x00, 0x17,
-  0x3c, 0x35, 0x4b, 0x20, 0x42, 0x03, 0x2d, 0x8f, 0x69, 0xb4, 0x0f, 0xca,
-  0x16, 0xec, 0xc3, 0x12, 0xef, 0x23, 0x84, 0x10, 0xca, 0x16, 0xf0, 0x33,
-  0x62, 0x60, 0x00, 0x20, 0x08, 0x06, 0x50, 0x7b, 0x0b, 0xae, 0x2d, 0x98,
-  0xb1, 0x30, 0x62, 0x60, 0x00, 0x20, 0x08, 0x06, 0x90, 0x7b, 0x0b, 0xaf,
-  0x2d, 0x98, 0xb1, 0x60, 0x41, 0x20, 0x1f, 0x0b, 0x04, 0xf9, 0xd8, 0x9b,
-  0x07, 0xa7, 0x2d, 0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xa4,
-  0xf8, 0x16, 0x6e, 0x5b, 0x38, 0x6d, 0xa1, 0xd7, 0x02, 0x8b, 0xf3, 0xe0,
-  0xb4, 0x05, 0xf9, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x34, 0xdf,
-  0x42, 0x6e, 0x0b, 0xa6, 0x2d, 0xa0, 0x6a, 0x10, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0x81, 0x44, 0xdf, 0x82, 0x6e, 0x0b, 0xa9, 0x2d, 0x80, 0x5b,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x52, 0x7d, 0x0b, 0xbb, 0x2d,
-  0xb0, 0xb6, 0x80, 0x2f, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48,
-  0xf6, 0x2d, 0xf0, 0xb6, 0x80, 0xda, 0xc2, 0xaa, 0x06, 0xc6, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x48, 0xf7, 0x2d, 0xf4, 0xb6, 0x80, 0xda, 0xc2,
-  0xb8, 0x05, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x20, 0xe1, 0xb7, 0xe0,
-  0xdb, 0x82, 0x6b, 0x0b, 0xfb, 0x12, 0x8c, 0x18, 0x34, 0x00, 0x08, 0x82,
-  0x81, 0x75, 0xdf, 0x82, 0x6f, 0x0b, 0xb1, 0x2d, 0x30, 0x8b, 0xe2, 0xaa,
-  0x01, 0x42, 0x04, 0xf6, 0xd7, 0x41, 0x6c, 0x0b, 0xf2, 0x19, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x03, 0x69, 0xbf, 0x85, 0xf0, 0x16, 0x62, 0x5b, 0x68,
-  0xaf, 0xc0, 0x42, 0x3b, 0x88, 0x6d, 0x41, 0x3e, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0x20, 0xf5, 0xb7, 0x30, 0xde, 0x02, 0x6c, 0x0b, 0xb8, 0x19,
-  0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x20, 0xf9, 0xb7, 0x40, 0xde,
-  0xc2, 0x6c, 0x0b, 0xf0, 0x15, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81,
-  0xf4, 0xdf, 0x42, 0x79, 0x0b, 0xb6, 0x2d, 0xa0, 0x48, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0x12, 0x88, 0x0b, 0xe6, 0x2d, 0xc8, 0xb6, 0xb0,
-  0x9b, 0x81, 0x31, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x52, 0x88, 0x0b,
-  0xe7, 0x2d, 0xc8, 0xb6, 0x30, 0x5f, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x48, 0x22, 0x2e, 0xa0, 0xb7, 0x80, 0xdb, 0xc2, 0x8a, 0x04, 0x23,
-  0x06, 0x0d, 0x00, 0x82, 0x60, 0x60, 0x85, 0xb8, 0x80, 0xde, 0xc2, 0x6e,
-  0x0b, 0x56, 0x45, 0xf9, 0x66, 0x80, 0x10, 0x81, 0xb9, 0x72, 0xb0, 0xdb,
-  0x82, 0x7c, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x40, 0x2a, 0x71, 0x61,
-  0xbd, 0x85, 0xdd, 0x16, 0xfa, 0x29, 0x30, 0x58, 0x0e, 0x76, 0x5b, 0x90,
-  0xcf, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x27, 0x2e, 0xb4, 0xb7,
-  0xa0, 0xdb, 0x02, 0x3a, 0x06, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x48, 0x28, 0x2e, 0xb8, 0xb7, 0xd0, 0xdb, 0x02, 0x48, 0x05, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x20, 0xa5, 0xb8, 0xf0, 0xde, 0x02, 0x78, 0x0b,
-  0x38, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0xa4, 0xe2, 0x02,
-  0x7c, 0x0b, 0xbc, 0x2d, 0xac, 0x63, 0x60, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0x81, 0xb4, 0xe2, 0x42, 0x7c, 0x0b, 0xbc, 0x2d, 0x8c, 0x54, 0x30,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x12, 0x8b, 0x0b, 0xf2, 0x2d, 0x88,
-  0xb7, 0xb0, 0x13, 0xc1, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x58, 0x2b,
-  0x2e, 0xc8, 0xb7, 0x50, 0xde, 0x02, 0x18, 0x7c, 0x9e, 0x3b, 0x06, 0x08,
-  0x11, 0x58, 0xef, 0x06, 0xe5, 0x2d, 0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40,
-  0x10, 0x0c, 0xa4, 0x17, 0x17, 0xea, 0x5b, 0x28, 0x6f, 0xa1, 0x85, 0x02,
-  0xfb, 0xdd, 0xa0, 0xbc, 0x05, 0xf9, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0x81, 0x14, 0xe3, 0xc2, 0x7d, 0x0b, 0xe4, 0x2d, 0xe0, 0x5f, 0x30, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x92, 0x8c, 0x0b, 0xf8, 0x2d, 0x9c, 0xb7,
-  0x00, 0x43, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x33, 0x2e,
-  0xe4, 0xb7, 0xa0, 0xde, 0x02, 0x1a, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0x20, 0xd1, 0xb8, 0xa0, 0xdf, 0x82, 0x79, 0x0b, 0xfb, 0x67, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x54, 0xe3, 0xc2, 0x7e, 0x0b, 0xe6,
-  0x2d, 0xcc, 0x50, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x92, 0x8d,
-  0x0b, 0xfc, 0x2d, 0xb0, 0xb7, 0xb0, 0x06, 0xc1, 0x88, 0x41, 0x03, 0x80,
-  0x20, 0x18, 0x58, 0x35, 0x2e, 0xf0, 0xb7, 0xf0, 0xde, 0x82, 0x1a, 0xa4,
-  0x01, 0x1a, 0xf8, 0x1f, 0x42, 0x04, 0xc6, 0x06, 0x6c, 0x20, 0x1f, 0x0b,
-  0xda, 0x40, 0x3e, 0x16, 0x06, 0xf1, 0x2d, 0xc8, 0x67, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0xa4, 0x1d, 0x17, 0x42, 0x5c, 0x88, 0x6f, 0xc1, 0x09,
-  0x6c, 0x0c, 0xe2, 0x5b, 0x90, 0xcf, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x48, 0x3d, 0x2e, 0x8c, 0xb8, 0x00, 0xdf, 0x82, 0x16, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0x81, 0xe4, 0xe3, 0x02, 0x89, 0x0b, 0xf3, 0x2d, 0x44,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x3f, 0x2e, 0x94, 0xb8,
-  0x60, 0xdf, 0x02, 0x12, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x04,
-  0xe6, 0x82, 0x89, 0x0b, 0xf2, 0x2d, 0x74, 0xc6, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x48, 0x61, 0x2e, 0x9c, 0xb8, 0x20, 0xdf, 0x02, 0x15, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x24, 0xe6, 0x02, 0x8a, 0x0b, 0xf8,
-  0x2d, 0x2c, 0xc1, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x58, 0x61, 0x2e,
-  0xa0, 0xb8, 0xb0, 0xdf, 0xc2, 0x1d, 0x2c, 0x0a, 0x18, 0x20, 0x44, 0x70,
-  0x81, 0x31, 0x23, 0x06, 0x0e, 0x00, 0x82, 0x60, 0xd0, 0x9c, 0xb9, 0x60,
-  0xe2, 0x82, 0x7c, 0x0b, 0xeb, 0x2d, 0xec, 0xb8, 0x10, 0x84, 0xb8, 0x10,
-  0xe2, 0x42, 0x88, 0x0b, 0x20, 0x2e, 0xf4, 0xb8, 0x30, 0x4b, 0x30, 0x42,
-  0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+  0x82, 0x60, 0xb0, 0x85, 0xb7, 0x10, 0xdb, 0x42, 0x42, 0x04, 0x23, 0x06,
+  0x08, 0x00, 0x82, 0x60, 0xb0, 0x89, 0xb7, 0x20, 0xdb, 0x42, 0x42, 0x04,
+  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xb0, 0x8d, 0xb7, 0x30, 0xdb, 0x42,
+  0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xac, 0xb7, 0x20,
+  0xdb, 0xc2, 0x5c, 0x0b, 0x81, 0x6f, 0x0b, 0xa3, 0x2d, 0x80, 0xb7, 0x30,
+  0x9a, 0x10, 0x00, 0x17, 0x3c, 0x35, 0x4b, 0x20, 0x42, 0x03, 0x2d, 0x8f,
+  0x69, 0xb4, 0x0f, 0xca, 0x16, 0xec, 0xc3, 0x12, 0xef, 0x23, 0x84, 0x10,
+  0xca, 0x16, 0xf0, 0x33, 0x62, 0x60, 0x00, 0x20, 0x08, 0x06, 0x50, 0x7b,
+  0x0b, 0xae, 0x2d, 0x98, 0xb1, 0x30, 0x62, 0x60, 0x00, 0x20, 0x08, 0x06,
+  0x90, 0x7b, 0x0b, 0xaf, 0x2d, 0x98, 0xb1, 0x60, 0x41, 0x20, 0x1f, 0x0b,
+  0x04, 0xf9, 0xd8, 0x9b, 0x07, 0xa7, 0x2d, 0xc8, 0x67, 0xc4, 0x00, 0x01,
+  0x40, 0x10, 0x0c, 0xa4, 0xf8, 0x16, 0x6e, 0x5b, 0x38, 0x6d, 0xa1, 0xd7,
+  0x02, 0x8b, 0xf3, 0xe0, 0xb4, 0x05, 0xf9, 0x8c, 0x18, 0x20, 0x00, 0x08,
+  0x82, 0x81, 0x34, 0xdf, 0x42, 0x6e, 0x0b, 0xa6, 0x2d, 0xa0, 0x6a, 0x10,
+  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x44, 0xdf, 0x82, 0x6e, 0x0b,
+  0xa9, 0x2d, 0x80, 0x5b, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x52,
+  0x7d, 0x0b, 0xbb, 0x2d, 0xb0, 0xb6, 0x80, 0x2f, 0xc1, 0x88, 0x01, 0x02,
+  0x80, 0x20, 0x18, 0x48, 0xf6, 0x2d, 0xf0, 0xb6, 0x80, 0xda, 0xc2, 0xaa,
+  0x06, 0xc6, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0xf7, 0x2d, 0xf4,
+  0xb6, 0x80, 0xda, 0xc2, 0xb8, 0x05, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
+  0x20, 0xe1, 0xb7, 0xe0, 0xdb, 0x82, 0x6b, 0x0b, 0xfb, 0x12, 0x8c, 0x18,
+  0x34, 0x00, 0x08, 0x82, 0x81, 0x75, 0xdf, 0x82, 0x6f, 0x0b, 0xb1, 0x2d,
+  0x30, 0x8b, 0xe2, 0xaa, 0x01, 0x42, 0x04, 0xf6, 0xd7, 0x41, 0x6c, 0x0b,
+  0xf2, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x69, 0xbf, 0x85, 0xf0,
+  0x16, 0x62, 0x5b, 0x68, 0xaf, 0xc0, 0x42, 0x3b, 0x88, 0x6d, 0x41, 0x3e,
+  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x20, 0xf5, 0xb7, 0x30, 0xde, 0x02,
+  0x6c, 0x0b, 0xb8, 0x19, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x20,
+  0xf9, 0xb7, 0x40, 0xde, 0xc2, 0x6c, 0x0b, 0xf0, 0x15, 0x8c, 0x18, 0x20,
+  0x00, 0x08, 0x82, 0x81, 0xf4, 0xdf, 0x42, 0x79, 0x0b, 0xb6, 0x2d, 0xa0,
+  0x48, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x12, 0x88, 0x0b, 0xe6,
+  0x2d, 0xc8, 0xb6, 0xb0, 0x9b, 0x81, 0x31, 0x62, 0x80, 0x00, 0x20, 0x08,
+  0x06, 0x52, 0x88, 0x0b, 0xe7, 0x2d, 0xc8, 0xb6, 0x30, 0x5f, 0xc1, 0x88,
+  0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x22, 0x2e, 0xa0, 0xb7, 0x80, 0xdb,
+  0xc2, 0x8a, 0x04, 0x23, 0x06, 0x0d, 0x00, 0x82, 0x60, 0x60, 0x85, 0xb8,
+  0x80, 0xde, 0xc2, 0x6e, 0x0b, 0x56, 0x45, 0xf9, 0x66, 0x80, 0x10, 0x81,
+  0xb9, 0x72, 0xb0, 0xdb, 0x82, 0x7c, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
+  0x40, 0x2a, 0x71, 0x61, 0xbd, 0x85, 0xdd, 0x16, 0xfa, 0x29, 0x30, 0x58,
+  0x0e, 0x76, 0x5b, 0x90, 0xcf, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48,
+  0x27, 0x2e, 0xb4, 0xb7, 0xa0, 0xdb, 0x02, 0x3a, 0x06, 0xc1, 0x88, 0x01,
+  0x02, 0x80, 0x20, 0x18, 0x48, 0x28, 0x2e, 0xb8, 0xb7, 0xd0, 0xdb, 0x02,
+  0x48, 0x05, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x20, 0xa5, 0xb8, 0xf0,
+  0xde, 0x02, 0x78, 0x0b, 0x38, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
+  0x81, 0xa4, 0xe2, 0x02, 0x7c, 0x0b, 0xbc, 0x2d, 0xac, 0x63, 0x60, 0x8c,
+  0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0xb4, 0xe2, 0x42, 0x7c, 0x0b, 0xbc,
+  0x2d, 0x8c, 0x54, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x12, 0x8b,
+  0x0b, 0xf2, 0x2d, 0x88, 0xb7, 0xb0, 0x13, 0xc1, 0x88, 0x41, 0x03, 0x80,
+  0x20, 0x18, 0x58, 0x2b, 0x2e, 0xc8, 0xb7, 0x50, 0xde, 0x02, 0x18, 0x7c,
+  0x9e, 0x3b, 0x06, 0x08, 0x11, 0x58, 0xef, 0x06, 0xe5, 0x2d, 0xc8, 0x67,
+  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xa4, 0x17, 0x17, 0xea, 0x5b, 0x28,
+  0x6f, 0xa1, 0x85, 0x02, 0xfb, 0xdd, 0xa0, 0xbc, 0x05, 0xf9, 0x8c, 0x18,
+  0x20, 0x00, 0x08, 0x82, 0x81, 0x14, 0xe3, 0xc2, 0x7d, 0x0b, 0xe4, 0x2d,
+  0xe0, 0x5f, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x92, 0x8c, 0x0b,
+  0xf8, 0x2d, 0x9c, 0xb7, 0x00, 0x43, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20,
+  0x18, 0x48, 0x33, 0x2e, 0xe4, 0xb7, 0xa0, 0xde, 0x02, 0x1a, 0x04, 0x23,
+  0x06, 0x08, 0x00, 0x82, 0x60, 0x20, 0xd1, 0xb8, 0xa0, 0xdf, 0x82, 0x79,
+  0x0b, 0xfb, 0x67, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x54, 0xe3,
+  0xc2, 0x7e, 0x0b, 0xe6, 0x2d, 0xcc, 0x50, 0x30, 0x62, 0x80, 0x00, 0x20,
+  0x08, 0x06, 0x92, 0x8d, 0x0b, 0xfc, 0x2d, 0xb0, 0xb7, 0xb0, 0x06, 0xc1,
+  0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x58, 0x35, 0x2e, 0xf0, 0xb7, 0xf0,
+  0xde, 0x82, 0x1a, 0xa4, 0x01, 0x1a, 0xf8, 0x1f, 0x42, 0x04, 0xc6, 0x06,
+  0x6c, 0x20, 0x1f, 0x0b, 0xda, 0x40, 0x3e, 0x16, 0x06, 0xf1, 0x2d, 0xc8,
+  0x67, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xa4, 0x1d, 0x17, 0x42, 0x5c,
+  0x88, 0x6f, 0xc1, 0x09, 0x6c, 0x0c, 0xe2, 0x5b, 0x90, 0xcf, 0x88, 0x01,
+  0x02, 0x80, 0x20, 0x18, 0x48, 0x3d, 0x2e, 0x8c, 0xb8, 0x00, 0xdf, 0x82,
+  0x16, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0xe4, 0xe3, 0x02, 0x89,
+  0x0b, 0xf3, 0x2d, 0x44, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48,
+  0x3f, 0x2e, 0x94, 0xb8, 0x60, 0xdf, 0x02, 0x12, 0x8c, 0x18, 0x20, 0x00,
+  0x08, 0x82, 0x81, 0x04, 0xe6, 0x82, 0x89, 0x0b, 0xf2, 0x2d, 0x74, 0xc6,
+  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x61, 0x2e, 0x9c, 0xb8, 0x20,
+  0xdf, 0x02, 0x15, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x24, 0xe6,
+  0x02, 0x8a, 0x0b, 0xf8, 0x2d, 0x2c, 0xc1, 0x88, 0x41, 0x03, 0x80, 0x20,
+  0x18, 0x58, 0x61, 0x2e, 0xa0, 0xb8, 0xb0, 0xdf, 0xc2, 0x1d, 0x2c, 0x0a,
+  0x18, 0x20, 0x44, 0x70, 0x81, 0x31, 0x23, 0x06, 0x0e, 0x00, 0x82, 0x60,
+  0xd0, 0x9c, 0xb9, 0x60, 0xe2, 0x82, 0x7c, 0x0b, 0xeb, 0x2d, 0xec, 0xb8,
+  0x10, 0x84, 0xb8, 0x10, 0xe2, 0x42, 0x88, 0x0b, 0x20, 0x2e, 0xf4, 0xb8,
+  0x30, 0x4b, 0x30, 0x42, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
 };
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_uint16_fp16.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_uint16_fp16.h
index d83ec13d24e0f..4a9652f4cd01d 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_uint16_fp16.h
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_uint16_fp16.h
@@ -15,7 +15,7 @@
 ; Name                 Index   Mask Register SysValue  Format   Used
 ; -------------------- ----- ------ -------- -------- ------- ------
 ; no parameters
-; shader hash: 5dcb15fd28f9fe728d74bc3e26040e9a
+; shader hash: 07d5436baa2b3c614d71486f0516b97e
 ;
 ; Pipeline Runtime Information: 
 ;
@@ -68,7 +68,7 @@ target triple = "dxil-ms-dx"
 %dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
 %dx.types.ResRet.f16 = type { half, half, half, half, i32 }
 %dx.types.ResRet.i16 = type { i16, i16, i16, i16, i32 }
-%"class.RWStructuredBuffer<unsigned short>" = type { i16 }
+%"class.RWStructuredBuffer<uint16_t>" = type { i16 }
 %"class.RWStructuredBuffer<half>" = type { half }
 %Constants = type { i32, i32, i32, i32, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32 }
 
@@ -4478,9 +4478,9 @@ attributes #2 = { nounwind }
 !3 = !{!"cs", i32 6, i32 2}
 !4 = !{null, !5, !9, null}
 !5 = !{!6, !7, !8}
-!6 = !{i32 0, %"class.RWStructuredBuffer<unsigned short>"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !1}
+!6 = !{i32 0, %"class.RWStructuredBuffer<uint16_t>"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !1}
 !7 = !{i32 1, %"class.RWStructuredBuffer<half>"* undef, !"", i32 0, i32 1, i32 1, i32 12, i1 false, i1 false, i1 false, !1}
-!8 = !{i32 2, %"class.RWStructuredBuffer<unsigned short>"* undef, !"", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !1}
+!8 = !{i32 2, %"class.RWStructuredBuffer<uint16_t>"* undef, !"", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !1}
 !9 = !{!10}
 !10 = !{i32 0, %Constants* undef, !"", i32 0, i32 0, i32 1, i32 116, null}
 !11 = !{void ()* @GridSample, !"GridSample", null, !4, !12}
@@ -4490,9 +4490,9 @@ attributes #2 = { nounwind }
 #endif
 
 const unsigned char g_GridSample[] = {
-  0x44, 0x58, 0x42, 0x43, 0xba, 0x7b, 0x00, 0x02, 0x46, 0x2a, 0x6a, 0xf2,
-  0x6c, 0x44, 0x77, 0xf6, 0x55, 0x80, 0xef, 0xa6, 0x01, 0x00, 0x00, 0x00,
-  0x40, 0x54, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
+  0x44, 0x58, 0x42, 0x43, 0x23, 0xc3, 0x11, 0x48, 0x17, 0xe5, 0x90, 0xac,
+  0xb3, 0xa0, 0x4d, 0x6f, 0xd2, 0x95, 0xce, 0xa2, 0x01, 0x00, 0x00, 0x00,
+  0x38, 0x54, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
   0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
   0x18, 0x01, 0x00, 0x00, 0x34, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30,
   0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -4514,12 +4514,12 @@ const unsigned char g_GridSample[] = {
   0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x5d, 0xcb, 0x15, 0xfd, 0x28, 0xf9, 0xfe, 0x72,
-  0x8d, 0x74, 0xbc, 0x3e, 0x26, 0x04, 0x0e, 0x9a, 0x44, 0x58, 0x49, 0x4c,
-  0x04, 0x53, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0xc1, 0x14, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x07, 0xd5, 0x43, 0x6b, 0xaa, 0x2b, 0x3c, 0x61,
+  0x4d, 0x71, 0x48, 0x6f, 0x05, 0x16, 0xb9, 0x7e, 0x44, 0x58, 0x49, 0x4c,
+  0xfc, 0x52, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0xbf, 0x14, 0x00, 0x00,
   0x44, 0x58, 0x49, 0x4c, 0x02, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-  0xec, 0x52, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00,
-  0xb8, 0x14, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0xe4, 0x52, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00,
+  0xb6, 0x14, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
   0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49,
   0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19,
   0x1e, 0x04, 0x8b, 0x62, 0x80, 0x18, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42,
@@ -4532,7 +4532,7 @@ const unsigned char g_GridSample[] = {
   0x01, 0xd5, 0x06, 0x62, 0xf8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x90, 0x00,
   0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42,
   0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00,
-  0x59, 0x00, 0x00, 0x00, 0x32, 0x22, 0x88, 0x09, 0x20, 0x64, 0x85, 0x04,
+  0x57, 0x00, 0x00, 0x00, 0x32, 0x22, 0x88, 0x09, 0x20, 0x64, 0x85, 0x04,
   0x13, 0x23, 0xa4, 0x84, 0x04, 0x13, 0x23, 0xe3, 0x84, 0xa1, 0x90, 0x14,
   0x12, 0x4c, 0x8c, 0x8c, 0x0b, 0x84, 0xc4, 0x4c, 0x10, 0xb8, 0xc1, 0x08,
   0x40, 0x09, 0x00, 0x0a, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0xc3, 0x30, 0x10,
@@ -4549,1743 +4549,1742 @@ const unsigned char g_GridSample[] = {
   0x5a, 0x29, 0x86, 0x61, 0x18, 0x06, 0xe2, 0x8e, 0x1a, 0x2e, 0x7f, 0xc2,
   0x1e, 0x42, 0xf2, 0xb9, 0x8d, 0x2a, 0x56, 0x62, 0xf2, 0x91, 0xdb, 0x46,
   0xc4, 0x30, 0x0c, 0x43, 0x21, 0xbc, 0x41, 0x19, 0xe8, 0x9b, 0x23, 0x08,
-  0x8a, 0xa1, 0x0c, 0xc8, 0x30, 0x84, 0x24, 0x0e, 0x04, 0xcc, 0x84, 0x06,
+  0x8a, 0xa1, 0x0c, 0xc8, 0x30, 0x84, 0x24, 0x0e, 0x04, 0xcc, 0x24, 0x06,
   0xe3, 0xc0, 0x0e, 0xe1, 0x30, 0x0f, 0xf3, 0xe0, 0x06, 0xb2, 0x70, 0x0b,
   0xb3, 0x40, 0x0f, 0xf2, 0x50, 0x0f, 0xe3, 0x40, 0x0f, 0xf5, 0x20, 0x0f,
   0xe5, 0x40, 0x0e, 0xa2, 0x50, 0x0f, 0xe6, 0x60, 0x0e, 0xe5, 0x20, 0x0f,
-  0x7c, 0x50, 0x0f, 0xee, 0x30, 0x0f, 0xe9, 0x70, 0x0e, 0xee, 0x50, 0x0e,
-  0xe4, 0x00, 0x06, 0xf3, 0x80, 0x0e, 0xef, 0x20, 0x0f, 0xf4, 0xe0, 0x07,
-  0x28, 0x30, 0xa8, 0x9c, 0xc9, 0x1b, 0x07, 0x76, 0x08, 0x87, 0x79, 0x98,
-  0x07, 0x37, 0x90, 0x85, 0x5b, 0x98, 0x05, 0x7a, 0x90, 0x87, 0x7a, 0x18,
-  0x07, 0x7a, 0xa8, 0x07, 0x79, 0x28, 0x07, 0x72, 0x10, 0x85, 0x7a, 0x30,
-  0x07, 0x73, 0x28, 0x07, 0x79, 0xe0, 0x03, 0x74, 0x08, 0x07, 0x76, 0x30,
-  0x07, 0x3f, 0x40, 0x01, 0x46, 0xe7, 0x30, 0x02, 0x31, 0x5c, 0xc2, 0x39,
-  0x8d, 0x34, 0x01, 0xcd, 0x24, 0xa1, 0x65, 0x18, 0x86, 0x21, 0x4d, 0xd3,
-  0x34, 0x4d, 0x07, 0x52, 0xe7, 0x08, 0x40, 0x61, 0x0a, 0x00, 0x00, 0x00,
-  0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79,
-  0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xae, 0x50, 0x0e, 0x6d, 0xd0, 0x0e,
-  0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07,
-  0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07,
-  0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x78, 0xd0, 0x06, 0xe9, 0x10, 0x07,
-  0x76, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x6d, 0x90, 0x0e, 0x73, 0x20, 0x07,
-  0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x60, 0x07, 0x74, 0xa0, 0x07,
-  0x76, 0x40, 0x07, 0x6d, 0x60, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x10, 0x07,
-  0x76, 0xd0, 0x06, 0xe6, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07,
-  0x6d, 0x60, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06,
-  0xee, 0x80, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07,
-  0x7a, 0x60, 0x07, 0x74, 0x30, 0xe4, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0x43, 0x00, 0x01, 0x10, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90, 0x67, 0x01, 0x02, 0x40,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x21, 0x4f, 0x03, 0x04,
-  0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x43, 0x1e, 0x08,
-  0x08, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x3c,
-  0x12, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c,
-  0x79, 0x28, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x18, 0xf2, 0x58, 0x40, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x30, 0xe4, 0xd1, 0x80, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x60, 0xc8, 0xc3, 0x01, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0xc0, 0x90, 0xe7, 0x03, 0x02, 0x40, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x80, 0x21, 0x8f, 0x18, 0x00, 0x01, 0x20, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x16, 0x08, 0x00, 0x00, 0x00,
-  0x0d, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90,
-  0x8c, 0x09, 0x26, 0x47, 0xc6, 0x04, 0x43, 0x1a, 0x4a, 0xa0, 0x08, 0x8a,
-  0x61, 0x04, 0xa0, 0x30, 0x0a, 0x3d, 0xa0, 0x10, 0x0a, 0x30, 0x80, 0xc2,
-  0x11, 0x00, 0x62, 0x0b, 0x1c, 0x10, 0x10, 0x81, 0xd0, 0x19, 0x00, 0x5a,
-  0x67, 0x00, 0xc8, 0x9c, 0x01, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00,
-  0x48, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44,
-  0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b,
-  0x7b, 0x73, 0x03, 0x99, 0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b,
-  0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81,
-  0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84,
-  0x81, 0x99, 0x20, 0x0c, 0xcd, 0x06, 0x61, 0x20, 0x26, 0x08, 0x83, 0xb3,
-  0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08,
-  0xc3, 0x33, 0x41, 0x30, 0x03, 0x8c, 0xc0, 0x04, 0x61, 0x80, 0x26, 0x08,
-  0x61, 0x40, 0x6d, 0x58, 0x94, 0x85, 0x51, 0x94, 0xa1, 0x71, 0x1c, 0xa7,
-  0x98, 0x20, 0xa0, 0x81, 0xb5, 0x61, 0x19, 0x20, 0x46, 0x19, 0x86, 0xc6,
-  0x71, 0x9c, 0x62, 0xc3, 0x42, 0x2c, 0x8c, 0x42, 0x0c, 0x8d, 0xe3, 0x38,
-  0xc5, 0x86, 0xe1, 0x89, 0xa4, 0x09, 0xc2, 0x1a, 0x5c, 0x13, 0x84, 0x21,
-  0xda, 0x80, 0x28, 0x14, 0xa3, 0x28, 0x43, 0x05, 0x6c, 0x08, 0xac, 0x0d,
-  0x04, 0x30, 0x5d, 0xc0, 0x04, 0x41, 0x00, 0xa8, 0x1c, 0xc9, 0xa5, 0x91,
-  0x4d, 0x85, 0xb5, 0xc1, 0xb1, 0x95, 0x4d, 0x10, 0xd8, 0xa0, 0x9a, 0x20,
-  0x0c, 0xd2, 0x04, 0x61, 0x98, 0x36, 0x0c, 0xdd, 0x30, 0x6c, 0x20, 0x94,
-  0x8d, 0xf3, 0x36, 0x14, 0x99, 0x06, 0x60, 0x5f, 0x15, 0x36, 0x36, 0xbb,
-  0x36, 0x97, 0x34, 0xb2, 0x32, 0x37, 0xba, 0x29, 0x41, 0x50, 0x85, 0x0c,
-  0xcf, 0xc5, 0xae, 0x4c, 0x6e, 0x2e, 0xed, 0xcd, 0x6d, 0x4a, 0x40, 0x34,
-  0x21, 0xc3, 0x73, 0xb1, 0x0b, 0x63, 0xb3, 0x2b, 0x93, 0x9b, 0x12, 0x18,
-  0x75, 0xc8, 0xf0, 0x5c, 0xe6, 0xd0, 0xc2, 0xc8, 0xca, 0xe4, 0x9a, 0xde,
-  0xc8, 0xca, 0xd8, 0xa6, 0x04, 0x48, 0x19, 0x32, 0x3c, 0x17, 0xb9, 0xb2,
-  0xb9, 0xb7, 0x3a, 0xb9, 0xb1, 0xb2, 0xb9, 0x29, 0xc1, 0x55, 0x87, 0x0c,
-  0xcf, 0xa5, 0xcc, 0x8d, 0x4e, 0x2e, 0x0f, 0xea, 0x2d, 0xcd, 0x8d, 0x6e,
-  0x6e, 0x4a, 0xf0, 0x01, 0x79, 0x18, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00,
-  0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88,
-  0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73,
-  0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e,
-  0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30,
-  0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8,
-  0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b,
-  0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76,
-  0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e,
-  0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e,
-  0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61,
-  0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4,
-  0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76,
-  0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37,
-  0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76,
-  0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71,
-  0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e,
-  0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1,
-  0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61,
-  0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90,
-  0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8,
-  0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc,
-  0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4,
-  0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87, 0x76, 0x80, 0x87, 0x19,
-  0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20, 0x0e, 0xe7, 0xe0, 0x06,
-  0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f,
-  0xf4, 0x30, 0x83, 0x81, 0xc8, 0x01, 0x1f, 0xdc, 0x40, 0x1c, 0xe4, 0xa1,
-  0x1c, 0xc2, 0x61, 0x1d, 0xdc, 0x40, 0x1c, 0xe4, 0x01, 0x00, 0x00, 0x00,
-  0x71, 0x20, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x06, 0xa0, 0x80, 0x11,
-  0x32, 0xb0, 0x00, 0xf3, 0x2c, 0x84, 0x19, 0x40, 0xc3, 0xe5, 0x3b, 0x8f,
-  0x1f, 0x20, 0x0d, 0x10, 0x61, 0x7e, 0x71, 0xdb, 0xa6, 0xb0, 0x0d, 0x97,
-  0xef, 0x3c, 0xbe, 0x10, 0x50, 0x45, 0x41, 0x44, 0xa5, 0x03, 0x0c, 0x25,
-  0x61, 0x00, 0x02, 0xe6, 0x23, 0xb7, 0x6d, 0x0b, 0xd2, 0x70, 0xf9, 0xce,
-  0xe3, 0x0b, 0x11, 0x01, 0x4c, 0x44, 0x08, 0x34, 0xc3, 0x42, 0xd8, 0x81,
-  0x33, 0x5c, 0xbe, 0xf3, 0xf8, 0x83, 0x33, 0xe1, 0x7e, 0x71, 0xdb, 0x86,
-  0x70, 0x0d, 0x97, 0xef, 0x3c, 0x7e, 0x04, 0x58, 0x1b, 0x55, 0x14, 0x44,
-  0x54, 0x3a, 0xc0, 0xe0, 0x17, 0xb5, 0x6e, 0x02, 0xd7, 0x70, 0xf9, 0xce,
-  0xe3, 0x47, 0x80, 0xb5, 0x51, 0x45, 0x41, 0x44, 0xa5, 0x03, 0x0c, 0x3e,
-  0x52, 0xeb, 0x36, 0x80, 0x0d, 0x97, 0xef, 0x3c, 0x7e, 0x04, 0x58, 0x1b,
-  0x55, 0x14, 0x44, 0xc4, 0x4e, 0x4e, 0x44, 0xf8, 0x48, 0xad, 0x5b, 0x81,
-  0x34, 0x5c, 0xbe, 0xf3, 0xf8, 0x13, 0x11, 0x4d, 0x08, 0x10, 0x61, 0x7e,
-  0x71, 0xdb, 0x96, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xfe, 0x44, 0x44, 0x13,
-  0x02, 0x44, 0x98, 0x8f, 0xdc, 0xb6, 0x05, 0x48, 0xc3, 0xe5, 0x3b, 0x8f,
-  0x3f, 0x1d, 0x11, 0x01, 0x0c, 0xe2, 0xe0, 0x23, 0xb7, 0x6d, 0x04, 0xcf,
-  0x70, 0xf9, 0xce, 0xe3, 0x53, 0x0d, 0x10, 0x61, 0x7e, 0x71, 0xdb, 0x00,
-  0x61, 0x20, 0x00, 0x00, 0x0c, 0x13, 0x00, 0x00, 0x13, 0x04, 0x24, 0x14,
-  0x0b, 0x04, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x34, 0x14, 0x58, 0xd9,
-  0x95, 0xa5, 0x40, 0x0d, 0x94, 0x51, 0x21, 0x15, 0x57, 0xc1, 0x95, 0x5c,
-  0xd9, 0x14, 0x4b, 0x61, 0x0a, 0x14, 0x4d, 0xe9, 0x06, 0x94, 0x43, 0x29,
-  0x90, 0x31, 0x03, 0x40, 0x48, 0x09, 0x14, 0x01, 0x3d, 0x23, 0x00, 0x63,
-  0x04, 0x20, 0x08, 0x82, 0xf8, 0x37, 0x46, 0x00, 0x82, 0x20, 0x48, 0xff,
-  0xc2, 0x18, 0x01, 0x08, 0x82, 0x20, 0xfd, 0x8d, 0x11, 0x80, 0x20, 0x08,
-  0xf2, 0xdf, 0x18, 0x01, 0x08, 0x82, 0x20, 0xfe, 0x0b, 0x63, 0x04, 0x20,
-  0x08, 0x82, 0x21, 0x38, 0x8c, 0x11, 0x80, 0x20, 0x08, 0xea, 0xdf, 0x18,
-  0x01, 0x08, 0x82, 0xa0, 0xfe, 0x0b, 0x63, 0x04, 0x20, 0x08, 0x82, 0xf0,
-  0x37, 0x46, 0x00, 0x82, 0x20, 0x08, 0xff, 0xc2, 0x18, 0x01, 0x08, 0x82,
-  0x20, 0x08, 0x06, 0x00, 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0x20, 0x06,
-  0x6d, 0x70, 0x39, 0x6b, 0xb0, 0x06, 0x64, 0x30, 0x62, 0x90, 0x00, 0x20,
-  0x08, 0x06, 0x62, 0xe0, 0x06, 0xd8, 0xd3, 0x06, 0x6d, 0x50, 0x06, 0x23,
-  0x06, 0x09, 0x00, 0x82, 0x60, 0x20, 0x06, 0x6f, 0x90, 0x41, 0x6b, 0xb0,
-  0x06, 0x66, 0x30, 0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0x62, 0x00, 0x07,
-  0x1a, 0xc4, 0x06, 0x6c, 0x70, 0x06, 0x23, 0x06, 0x06, 0x00, 0x82, 0x60,
-  0x40, 0xec, 0x81, 0xd5, 0x06, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xf0,
-  0xcd, 0xc1, 0x18, 0x08, 0x6e, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c, 0x30,
-  0x9a, 0x30, 0x04, 0xc3, 0x0d, 0x42, 0x40, 0x06, 0xb3, 0x0c, 0xc1, 0x08,
-  0x05, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xf0, 0xe1, 0x01, 0x1a, 0x1c,
-  0x76, 0x30, 0x9a, 0x10, 0x0c, 0x17, 0x3c, 0x35, 0x9a, 0x30, 0x08, 0x17,
-  0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x5f, 0x1f, 0xb4, 0x01,
-  0x03, 0x06, 0xa3, 0x09, 0x01, 0x30, 0xdc, 0x10, 0xe8, 0x01, 0x18, 0x4c,
-  0x37, 0x50, 0x5e, 0x30, 0xdd, 0x50, 0x69, 0x42, 0x21, 0x01, 0x4c, 0x37,
-  0x5c, 0x1c, 0x51, 0x48, 0x00, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xf0,
-  0x95, 0x42, 0x1d, 0x50, 0x67, 0x30, 0x9a, 0x10, 0x04, 0xa3, 0x09, 0x82,
-  0x30, 0x9a, 0x30, 0x0c, 0x15, 0x08, 0x52, 0x03, 0x21, 0x15, 0x0c, 0x52,
-  0x57, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xf0, 0xb5, 0x42, 0x1f,
-  0x70, 0xa9, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c, 0x52, 0x5b, 0x10, 0x15,
-  0x20, 0x33, 0x9a, 0x50, 0x04, 0x15, 0x08, 0x52, 0x44, 0x10, 0x15, 0x34,
-  0x33, 0x9a, 0x90, 0x08, 0x15, 0x08, 0x52, 0x44, 0x10, 0xd7, 0x3c, 0x75,
-  0xc5, 0x53, 0x37, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x1f,
-  0x2f, 0xb0, 0xc2, 0x1a, 0xd0, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08,
-  0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0xc3, 0x11, 0x4f, 0x1d, 0xf1,
-  0xd4, 0x11, 0x4f, 0x1d, 0xf1, 0xd4, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18,
-  0x58, 0xe8, 0xf0, 0x0a, 0xcc, 0xa2, 0x8c, 0x02, 0x31, 0x08, 0x81, 0x09,
-  0x01, 0x7c, 0x4e, 0x18, 0x66, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x34,
-  0x75, 0xb8, 0x85, 0x3c, 0x08, 0xc6, 0x01, 0x15, 0xca, 0x61, 0x34, 0x21,
-  0x00, 0x2e, 0x78, 0xf0, 0x8a, 0x61, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1,
-  0x40, 0x73, 0x87, 0x5d, 0xe8, 0x83, 0xe0, 0x1c, 0x58, 0x21, 0x1d, 0x46,
-  0x13, 0x02, 0xe0, 0x82, 0x07, 0x87, 0x1b, 0xe2, 0xa0, 0x1d, 0xc0, 0xc0,
-  0x8c, 0x56, 0x80, 0x8f, 0x0d, 0xae, 0x00, 0x9f, 0x59, 0x06, 0x61, 0x18,
-  0x4c, 0x38, 0x05, 0xf9, 0x98, 0x80, 0x0a, 0xf2, 0x31, 0x3d, 0x68, 0x05,
-  0xf8, 0x58, 0x1e, 0xb8, 0x02, 0x7c, 0x8c, 0x10, 0xe4, 0x63, 0x84, 0x20,
-  0x9f, 0x59, 0x02, 0xc2, 0xfc, 0x00, 0x91, 0x8f, 0x21, 0x7d, 0x20, 0x1f,
-  0x13, 0x66, 0x01, 0x3e, 0x26, 0xd0, 0x02, 0x7c, 0x4c, 0x88, 0x05, 0xf9,
-  0x98, 0x20, 0x0b, 0xf2, 0x99, 0x25, 0x20, 0x06, 0x2a, 0x1e, 0x48, 0x20,
-  0x86, 0x81, 0x8a, 0x07, 0x12, 0x88, 0x61, 0x34, 0xa1, 0x15, 0x84, 0xe1,
-  0x86, 0xe0, 0x1f, 0xc0, 0x60, 0x96, 0xa1, 0x30, 0x82, 0x11, 0x03, 0x03,
-  0x00, 0x41, 0x30, 0x80, 0x50, 0xe2, 0x1c, 0x88, 0x11, 0x03, 0x03, 0x00,
-  0x41, 0x30, 0x80, 0x52, 0x02, 0x1d, 0x88, 0x59, 0x02, 0x63, 0xa0, 0xe2,
-  0x21, 0x0a, 0x86, 0x18, 0xa8, 0x78, 0x88, 0x82, 0x21, 0x86, 0x23, 0x04,
-  0x53, 0x20, 0xbe, 0xe1, 0x88, 0xa1, 0x14, 0x84, 0xaf, 0x84, 0x60, 0x87,
-  0x23, 0x88, 0x54, 0x20, 0xbe, 0x12, 0x82, 0x1d, 0x8e, 0x30, 0x4e, 0x41,
-  0xf8, 0x2a, 0x10, 0x76, 0x96, 0xe1, 0xd0, 0x82, 0xd1, 0x04, 0x5d, 0x18,
-  0x86, 0x1b, 0x02, 0x96, 0x00, 0x83, 0x59, 0x06, 0x24, 0x09, 0xca, 0x16,
-  0xfa, 0x01, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8,
-  0x9a, 0xf0, 0x87, 0xe6, 0x1d, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80,
-  0xb2, 0x09, 0x7f, 0x08, 0x84, 0xc2, 0x85, 0x90, 0x80, 0x0b, 0x9e, 0x1a,
-  0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x2a, 0x27, 0x44, 0x02, 0x9a, 0x87,
-  0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0x74, 0x42, 0x24, 0x02, 0x61,
-  0x96, 0x40, 0x1b, 0x6e, 0x50, 0x68, 0x02, 0x0c, 0x66, 0x19, 0x14, 0x2d,
-  0x30, 0x5b, 0xc0, 0x85, 0xf8, 0xcc, 0x32, 0x2c, 0xce, 0x64, 0xb9, 0x50,
-  0xc5, 0xc7, 0x02, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x14,
-  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x3d, 0xa1, 0xc3, 0x0d, 0xc1, 0x4e,
-  0x80, 0xc1, 0x2c, 0x03, 0xd3, 0x04, 0x36, 0x84, 0x03, 0x7c, 0x66, 0x09,
-  0x24, 0x03, 0x07, 0x22, 0x3e, 0xb3, 0x04, 0xd2, 0x2c, 0xc3, 0x23, 0x71,
-  0xf6, 0x85, 0x43, 0x7c, 0x2c, 0x60, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c,
-  0x65, 0xc1, 0x23, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x71, 0x16, 0x3a, 0xdc,
-  0x10, 0x94, 0x05, 0x18, 0xcc, 0x32, 0x40, 0x51, 0x60, 0xe9, 0x30, 0xc4,
-  0x67, 0x96, 0x40, 0x32, 0x82, 0x1d, 0xe0, 0x33, 0x4b, 0x20, 0x0d, 0xb4,
-  0x3c, 0x18, 0x63, 0x35, 0x04, 0x24, 0x44, 0xb2, 0xe0, 0x98, 0x3a, 0xb8,
-  0x43, 0x7c, 0x66, 0x19, 0x26, 0xcb, 0x0c, 0xec, 0x1d, 0xd4, 0x20, 0x3e,
-  0x16, 0x08, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xa0, 0x90, 0x8f,
-  0x15, 0x41, 0x7c, 0x8a, 0x98, 0x0b, 0x1d, 0x6e, 0x08, 0xe2, 0x02, 0x0c,
-  0x66, 0x19, 0xa8, 0x2a, 0xb0, 0xe1, 0x1e, 0xe0, 0x33, 0x4b, 0xa0, 0x19,
-  0x3d, 0x10, 0xf1, 0x99, 0x25, 0xd0, 0x66, 0x19, 0x2e, 0xcd, 0x0d, 0x8c,
-  0x0e, 0xea, 0x21, 0x3e, 0x16, 0x30, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e,
-  0xb2, 0xe0, 0x91, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xe8, 0x0b, 0x1d, 0x6e,
-  0x08, 0xf6, 0x02, 0x0c, 0x66, 0x19, 0xb0, 0x2c, 0xb0, 0x7e, 0x18, 0xe2,
-  0x33, 0x4b, 0xa0, 0x19, 0x21, 0x12, 0xf0, 0x99, 0x25, 0xd0, 0x06, 0x8a,
-  0x1e, 0x71, 0x40, 0xfc, 0x21, 0xf1, 0x07, 0x83, 0x0d, 0x32, 0x36, 0xc0,
-  0xd8, 0xc0, 0x62, 0x83, 0x8a, 0x0d, 0xa8, 0x81, 0xa2, 0x87, 0x17, 0x10,
-  0x7f, 0x48, 0xfc, 0xc1, 0x20, 0x32, 0x03, 0xf3, 0x07, 0x0b, 0xab, 0x34,
-  0xea, 0xe8, 0xe1, 0xa9, 0x59, 0x86, 0x6d, 0x0e, 0x4a, 0x61, 0x34, 0x61,
-  0x26, 0x86, 0xe1, 0x86, 0x40, 0x34, 0xc0, 0x60, 0x96, 0x81, 0xf3, 0x82,
-  0xe1, 0x88, 0xc2, 0x2c, 0x86, 0xef, 0x8c, 0x61, 0x86, 0x1b, 0x82, 0x98,
-  0x20, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0x40, 0xd4, 0x62, 0xf8, 0x2a, 0x10,
-  0xf4, 0x94, 0x61, 0x86, 0x1b, 0x02, 0x9a, 0x20, 0x83, 0x0a, 0x06, 0x9d,
-  0x65, 0xe8, 0xe4, 0x20, 0x38, 0x7c, 0x18, 0xe6, 0x9a, 0x61, 0x46, 0x0c,
-  0x0e, 0x00, 0x04, 0xc1, 0xe0, 0x9b, 0x8d, 0xd1, 0x10, 0x0b, 0xd8, 0x18,
-  0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04,
-  0x62, 0x28, 0xe2, 0x90, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0x74,
-  0x43, 0x35, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x6e,
-  0x37, 0x56, 0x83, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0,
-  0x78, 0x83, 0x35, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83,
-  0x85, 0x3c, 0x56, 0x83, 0x2d, 0x82, 0xdb, 0xe0, 0x8b, 0xdc, 0x18, 0x4d,
-  0x08, 0x80, 0x0b, 0x9e, 0x9a, 0x25, 0x90, 0x83, 0xe1, 0x86, 0xac, 0x37,
-  0xc0, 0x60, 0x96, 0xe1, 0x03, 0x83, 0xa0, 0xce, 0xc2, 0x35, 0xe0, 0x82,
-  0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x32, 0x8f, 0xd7, 0xf8,
-  0x40, 0x63, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0xf3, 0x78, 0x8d,
-  0x40, 0xb8, 0x60, 0x98, 0x52, 0x8b, 0xd9, 0x80, 0x0b, 0x9e, 0x1a, 0x31,
-  0x38, 0x00, 0x10, 0x04, 0x03, 0x6a, 0x3d, 0x68, 0x63, 0x0c, 0x4a, 0x63,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0xf6, 0xa0, 0x8d, 0x40, 0xb8,
-  0x60, 0x98, 0x0b, 0x9e, 0xba, 0xe3, 0xa9, 0x9b, 0x89, 0x61, 0x0e, 0x0d,
-  0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x83, 0x0f, 0x3e, 0xc0, 0xe3, 0x2f, 0xda, 0x63, 0x34, 0x21, 0x00, 0x46,
-  0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0xbb, 0x8f, 0xf3, 0x48, 0x88,
-  0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x38, 0xfc, 0x40, 0x8f, 0x84,
-  0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0xcb, 0x8f, 0xf4, 0x48,
-  0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x96, 0x10, 0x41, 0x8f,
-  0xd4, 0x08, 0xe8, 0x23, 0x37, 0xec, 0x63, 0x34, 0x21, 0x00, 0x2e, 0x78,
-  0x6a, 0x96, 0x40, 0x0e, 0x86, 0x1b, 0xec, 0x20, 0x3f, 0xc0, 0x60, 0x96,
-  0x21, 0x0c, 0xe4, 0x20, 0xb0, 0xbd, 0xe8, 0x8b, 0xf8, 0x0c, 0x47, 0xec,
-  0x81, 0x5f, 0x10, 0xdf, 0x2c, 0x83, 0x18, 0x94, 0x41, 0x60, 0x7f, 0xc1,
-  0x07, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05,
-  0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4, 0x88, 0xe8, 0x70, 0x43, 0x10,
-  0x22, 0x60, 0x30, 0xcb, 0x30, 0x06, 0x64, 0x10, 0xd8, 0x70, 0x1a, 0xf0,
-  0x99, 0x25, 0x48, 0x03, 0x33, 0x0d, 0x22, 0x3e, 0xb3, 0x04, 0x69, 0x30,
-  0x1c, 0x61, 0x0a, 0xa7, 0x21, 0x7c, 0xb3, 0x0c, 0x66, 0x90, 0x06, 0x81,
-  0x9d, 0x02, 0x6a, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0xc1,
-  0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x2e, 0xa2, 0xc3,
-  0x0d, 0x01, 0x8b, 0x80, 0xc1, 0x2c, 0xc3, 0x19, 0xa0, 0x41, 0x60, 0xb0,
-  0x31, 0xc4, 0x67, 0x96, 0x20, 0x0d, 0x8c, 0x98, 0x0d, 0xf8, 0xcc, 0x12,
-  0xa4, 0xc1, 0x40, 0xcb, 0xa3, 0x8d, 0x01, 0x46, 0x06, 0xc4, 0x19, 0x08,
-  0x68, 0x20, 0x16, 0x65, 0x70, 0xc1, 0x30, 0x26, 0x1b, 0xb6, 0x11, 0x9f,
-  0xe1, 0x88, 0x59, 0xb8, 0x0d, 0xe2, 0x9b, 0x65, 0x50, 0x83, 0x36, 0x08,
-  0x0c, 0x37, 0x68, 0x21, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b,
-  0x9e, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xe0, 0x11, 0x1d,
-  0x6e, 0x08, 0x74, 0x04, 0x0c, 0x66, 0x19, 0xd6, 0x80, 0x0d, 0x02, 0x1b,
-  0xc0, 0x03, 0x3e, 0xb3, 0x04, 0x71, 0x60, 0xbd, 0x41, 0xc4, 0x67, 0x96,
-  0x20, 0x0e, 0x86, 0x23, 0x7c, 0xc1, 0x37, 0x84, 0x6f, 0x96, 0xc1, 0x0d,
-  0xe2, 0x20, 0xb0, 0x5f, 0xf8, 0x8d, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82,
-  0x61, 0x2e, 0x78, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xe2,
-  0x4c, 0x74, 0xb8, 0x21, 0x28, 0x13, 0x30, 0x98, 0x65, 0x78, 0x03, 0x38,
-  0x08, 0xec, 0x3c, 0x86, 0xf8, 0xcc, 0x12, 0xc4, 0x81, 0x11, 0xec, 0x01,
-  0x9f, 0x59, 0x82, 0x38, 0x18, 0x68, 0x79, 0xb4, 0x35, 0xc0, 0xd8, 0x80,
-  0x78, 0x03, 0x01, 0x0e, 0x64, 0xa3, 0x0d, 0x2e, 0x18, 0xe6, 0x82, 0xa7,
-  0x6e, 0x7b, 0xea, 0x70, 0x63, 0x98, 0x6b, 0x87, 0x61, 0x8e, 0x18, 0xe6,
-  0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xe0, 0xab, 0x93, 0x32,
-  0x21, 0x11, 0x39, 0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d,
-  0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0xe0, 0xf8, 0x84, 0x4d, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x03, 0xae, 0x4f, 0xda, 0x24, 0x21, 0x82, 0x11, 0x03, 0x04,
-  0x00, 0x41, 0x30, 0xe0, 0xfc, 0xc4, 0x4d, 0x12, 0x22, 0x18, 0x31, 0x50,
-  0x00, 0x10, 0x04, 0x83, 0xc5, 0x54, 0xda, 0xc4, 0x45, 0x82, 0x3c, 0xf1,
-  0x91, 0x3d, 0x19, 0x4d, 0x08, 0x80, 0x0b, 0x9e, 0x9a, 0x25, 0x90, 0x83,
-  0x81, 0x96, 0xc7, 0x34, 0x3a, 0x3f, 0xe2, 0x58, 0xe2, 0x13, 0xe2, 0xc0,
-  0x8f, 0xc0, 0xe0, 0x02, 0x63, 0x46, 0x0c, 0x1c, 0x00, 0x04, 0xc1, 0xa0,
-  0x51, 0x95, 0x34, 0xa9, 0x11, 0x17, 0xf1, 0x93, 0x80, 0x4c, 0xc8, 0x84,
-  0x4c, 0xc6, 0x04, 0x54, 0x66, 0x09, 0x46, 0x68, 0xb8, 0x61, 0x34, 0xfc,
-  0x04, 0x0c, 0x66, 0x19, 0xe8, 0x20, 0x26, 0x82, 0x11, 0x03, 0x03, 0x00,
-  0x41, 0x30, 0x80, 0x54, 0x85, 0x4d, 0x42, 0x62, 0xc4, 0xc0, 0x00, 0x40,
-  0x10, 0x0c, 0xa0, 0x55, 0x69, 0x93, 0x90, 0x30, 0x61, 0x4c, 0xe0, 0x63,
-  0x02, 0x99, 0xc0, 0x67, 0x34, 0xa1, 0x46, 0x86, 0xe1, 0x86, 0x80, 0x54,
-  0xc0, 0x60, 0x96, 0xa1, 0x0e, 0xee, 0x20, 0x18, 0x8e, 0x30, 0xd0, 0x64,
-  0xf8, 0xee, 0x18, 0x66, 0xb8, 0x21, 0x98, 0x11, 0x32, 0xa8, 0x21, 0xd0,
-  0xe1, 0x88, 0x84, 0x4d, 0x86, 0xaf, 0x02, 0x41, 0x6f, 0x19, 0x66, 0xb8,
-  0x21, 0xb0, 0x11, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x06, 0x3b, 0x58, 0x85,
-  0xe0, 0xf4, 0x63, 0x98, 0x7b, 0x89, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0xe0, 0xab, 0x95, 0x52, 0x21, 0x13, 0x59, 0x19, 0x4d, 0x08, 0x80,
-  0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2,
-  0x90, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0x78, 0x85, 0x55, 0x0e,
-  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xae, 0x57, 0x5a, 0x85,
-  0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0x7c, 0xc5, 0x55,
-  0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0xc5, 0x5c, 0x5a,
-  0xc5, 0x4d, 0x82, 0x5c, 0xf1, 0x93, 0x5d, 0x19, 0x4d, 0x08, 0x80, 0x0b,
-  0x9e, 0x9a, 0x25, 0x58, 0x85, 0xe1, 0x86, 0xec, 0x57, 0xc0, 0x60, 0x96,
-  0x01, 0x0f, 0xf2, 0x20, 0xa8, 0x34, 0x81, 0x15, 0xb8, 0xe0, 0xa9, 0x11,
-  0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xd0, 0x25, 0x56, 0xc0, 0x40, 0x54,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x4a, 0x97, 0x58, 0x09, 0x84,
-  0x0b, 0x86, 0x29, 0x36, 0xa9, 0x15, 0xb8, 0xe0, 0xa9, 0x11, 0x83, 0x03,
-  0x00, 0x41, 0x30, 0xa0, 0xda, 0xc5, 0x56, 0xc8, 0xe0, 0x54, 0x46, 0x0c,
-  0x0e, 0x00, 0x04, 0xc1, 0x80, 0x72, 0x17, 0x5b, 0x09, 0x84, 0x0b, 0x86,
-  0xb9, 0xe0, 0xa9, 0x3b, 0x9e, 0xba, 0x1a, 0x19, 0xe6, 0xd4, 0x62, 0x98,
-  0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf8,
-  0xe4, 0x45, 0x5c, 0x42, 0xe5, 0x5d, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41,
-  0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0x7c, 0x49, 0x97, 0x84, 0x08, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0xd3, 0x17, 0x75, 0x49, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0x7d, 0x59, 0x97, 0x84, 0x08,
-  0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x19, 0x19, 0x75, 0x59, 0x95,
-  0xc0, 0x5e, 0x76, 0x05, 0x5f, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xa7, 0x66,
-  0x09, 0x56, 0x61, 0xb8, 0xc1, 0x0e, 0xf6, 0x05, 0x0c, 0x66, 0x19, 0xf4,
-  0x60, 0x15, 0x02, 0xeb, 0x93, 0x3f, 0x89, 0xcf, 0x70, 0x04, 0x1f, 0x80,
-  0x0a, 0xf1, 0xcd, 0x32, 0xec, 0x81, 0x1f, 0x04, 0x16, 0x2a, 0x7d, 0x10,
-  0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8,
-  0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x94, 0x8c, 0x0e, 0x37, 0x04, 0x23, 0x03,
-  0x06, 0xb3, 0x0c, 0x7c, 0xd0, 0x07, 0x81, 0x0d, 0xa9, 0x02, 0x9f, 0x59,
-  0x02, 0x51, 0x30, 0x54, 0x21, 0xe2, 0x33, 0x4b, 0x20, 0x0a, 0xc3, 0x11,
-  0xa7, 0x90, 0x2a, 0xc2, 0x37, 0xcb, 0xf0, 0x07, 0xa2, 0x10, 0x18, 0x2a,
-  0xa8, 0x4a, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65,
-  0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x01, 0x33, 0x3a, 0xdc, 0x10,
-  0xb8, 0x0c, 0x18, 0xcc, 0x32, 0x80, 0x42, 0x28, 0x04, 0x26, 0x2b, 0x43,
-  0x7c, 0x66, 0x09, 0x44, 0xc1, 0x88, 0x5a, 0x81, 0xcf, 0x2c, 0x81, 0x28,
-  0x0c, 0xb4, 0x3c, 0x1a, 0x1f, 0x60, 0x7d, 0x40, 0x80, 0x82, 0x10, 0x0a,
-  0x64, 0xe1, 0x07, 0x17, 0x0c, 0x63, 0xb4, 0x82, 0x2b, 0xf1, 0x19, 0x8e,
-  0xa0, 0x85, 0x5c, 0x21, 0xbe, 0x59, 0x86, 0x51, 0x30, 0x85, 0xc0, 0x74,
-  0xa5, 0x16, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29,
-  0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x9f, 0xd1, 0xe1, 0x86,
-  0x80, 0x67, 0xc0, 0x60, 0x96, 0x81, 0x14, 0x4a, 0x21, 0xb0, 0x41, 0x5c,
-  0xe0, 0x33, 0x4b, 0xa0, 0x0a, 0xf6, 0x2b, 0x44, 0x7c, 0x66, 0x09, 0x54,
-  0x61, 0x38, 0xe2, 0x17, 0xc0, 0x45, 0xf8, 0x66, 0x19, 0x4e, 0x41, 0x15,
-  0x02, 0x03, 0x87, 0x70, 0x89, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6,
-  0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xd2, 0x46,
-  0x87, 0x1b, 0x82, 0xb3, 0x01, 0x83, 0x59, 0x06, 0x54, 0x48, 0x85, 0xc0,
-  0xd2, 0x65, 0x88, 0xcf, 0x2c, 0x81, 0x2a, 0x18, 0xe1, 0x2e, 0xf0, 0x99,
-  0x25, 0x50, 0x85, 0x81, 0x96, 0x47, 0x23, 0x05, 0xac, 0x14, 0x08, 0x54,
-  0x10, 0x52, 0x81, 0x36, 0x4c, 0xe1, 0x82, 0x61, 0x2e, 0x78, 0xea, 0xb6,
-  0xa7, 0x4e, 0x57, 0x86, 0xb9, 0xf7, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18,
-  0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbe, 0xbb, 0x39, 0x1b, 0x93,
-  0xa1, 0x9b, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41,
-  0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0xce, 0x6f, 0xdc, 0x26, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0xe0, 0xfe, 0xe6, 0x6d, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x03, 0x0e, 0x74, 0xe0, 0x26, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00,
-  0x41, 0x30, 0x58, 0x50, 0xe7, 0x6d, 0x60, 0x26, 0xd8, 0x1b, 0xb0, 0xe9,
-  0x9b, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x82, 0x55, 0x18, 0x68,
-  0x79, 0x4c, 0xc3, 0x0e, 0x44, 0xad, 0x0e, 0x58, 0x02, 0x0f, 0x04, 0x55,
-  0x10, 0xb5, 0x3c, 0x98, 0x65, 0x60, 0x05, 0x57, 0xd8, 0x87, 0xe1, 0x08,
-  0x7f, 0x10, 0x9b, 0xe1, 0xbb, 0x7f, 0x18, 0x66, 0xb8, 0x21, 0x68, 0x19,
-  0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x91, 0x30, 0x9b, 0xe1, 0xab, 0x40,
-  0xd0, 0x2b, 0x89, 0x61, 0x86, 0x1b, 0x02, 0x98, 0x21, 0x83, 0x0a, 0x06,
-  0x9d, 0x65, 0x68, 0x05, 0x71, 0x08, 0x8e, 0x5e, 0x86, 0xb9, 0x14, 0x19,
-  0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbe, 0xd7, 0xf9, 0x1b, 0x9f,
-  0x61, 0x9d, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41,
-  0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0xce, 0x76, 0x4c, 0xe7, 0x20, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0xe0, 0x6e, 0xe7, 0x74, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x03, 0x0e, 0x77, 0x50, 0x47, 0x22, 0x82, 0x11, 0x03, 0x05, 0x00,
-  0x41, 0x30, 0x58, 0xc0, 0xe7, 0x74, 0xd0, 0x26, 0x98, 0x1d, 0xbc, 0xa9,
-  0x9d, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x02, 0x71, 0x18, 0x6e,
-  0x98, 0x89, 0xdc, 0x01, 0x83, 0x59, 0x86, 0x57, 0x80, 0x85, 0xa0, 0xc6,
-  0x46, 0x75, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80,
-  0x12, 0x9f, 0xd5, 0xc1, 0x09, 0xbe, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x03, 0x6a, 0x7c, 0x56, 0x27, 0x10, 0x2e, 0x18, 0xa6, 0xcc, 0xe6, 0x75,
-  0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x3a, 0x1f,
-  0xd8, 0xf1, 0x89, 0xd0, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x0a,
-  0x7d, 0x60, 0x27, 0x10, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0xee, 0x78, 0xea,
-  0x5e, 0x66, 0x98, 0x23, 0x93, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xe0, 0x63, 0x1f, 0xde, 0xd9, 0x9b, 0xf4,
-  0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1,
-  0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0,
-  0xe6, 0x67, 0x7c, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03,
-  0x8e, 0x7e, 0xc8, 0x27, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0xe0, 0xea, 0xa7, 0x7c, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04,
-  0x83, 0xa5, 0x7f, 0xc8, 0xa7, 0x74, 0x02, 0xf8, 0xa9, 0x1d, 0xf9, 0x19,
-  0x4d, 0x08, 0x80, 0x0b, 0x9e, 0x9a, 0x25, 0x10, 0x87, 0xe1, 0x06, 0xb8,
-  0xa8, 0x1f, 0x30, 0x98, 0x65, 0x88, 0x05, 0x71, 0x08, 0xec, 0x6e, 0xf2,
-  0x26, 0x3e, 0xc3, 0x11, 0x74, 0xa1, 0x37, 0xc4, 0x37, 0xcb, 0x20, 0x0b,
-  0xb5, 0x10, 0xd8, 0xde, 0xd4, 0x45, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1,
-  0x30, 0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xf1,
-  0x3f, 0x3a, 0xdc, 0x10, 0xf4, 0x0f, 0x18, 0xcc, 0x32, 0xcc, 0x02, 0x2d,
-  0x04, 0x36, 0x8c, 0x0e, 0x7c, 0x66, 0x09, 0x72, 0xc1, 0x44, 0x87, 0x88,
-  0xcf, 0x2c, 0x41, 0x2e, 0x0c, 0x47, 0xfc, 0xc5, 0xe8, 0x08, 0xdf, 0x2c,
-  0x83, 0x2d, 0xe4, 0x42, 0x60, 0xa0, 0x41, 0x3a, 0xf1, 0xb1, 0xc0, 0xa1,
-  0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2,
-  0x53, 0x84, 0x0a, 0xe9, 0x70, 0x43, 0x80, 0x42, 0x60, 0x30, 0xcb, 0x70,
-  0x0b, 0xb8, 0x10, 0x18, 0xeb, 0x0c, 0xf1, 0x99, 0x25, 0xc8, 0x05, 0x23,
-  0x5e, 0x07, 0x3e, 0xb3, 0x04, 0xb9, 0x30, 0xd0, 0xf2, 0x68, 0xb3, 0x80,
-  0xd1, 0x02, 0x71, 0x0b, 0x02, 0x2e, 0xd0, 0x4c, 0x2d, 0x5c, 0x30, 0x8c,
-  0xb9, 0x8e, 0xec, 0xc4, 0x67, 0x38, 0xc2, 0x35, 0x66, 0x87, 0xf8, 0x66,
-  0x19, 0x74, 0xa1, 0x17, 0x02, 0xa3, 0x9d, 0xd7, 0x88, 0x8f, 0x05, 0x03,
-  0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10,
-  0x9f, 0x22, 0x70, 0x48, 0x87, 0x1b, 0x02, 0x1b, 0x02, 0x83, 0x59, 0x86,
-  0x5d, 0xe0, 0x85, 0xc0, 0x06, 0xde, 0x81, 0xcf, 0x2c, 0x41, 0x38, 0x58,
-  0xee, 0x10, 0xf1, 0x99, 0x25, 0x08, 0x87, 0xe1, 0x88, 0xdc, 0xd0, 0x1d,
-  0xe1, 0x9b, 0x65, 0xf0, 0x85, 0x70, 0x08, 0x4c, 0x37, 0x76, 0x27, 0x3e,
-  0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f,
-  0x15, 0x41, 0x7c, 0x8a, 0x18, 0x23, 0x1d, 0x6e, 0x08, 0xc2, 0x08, 0x0c,
-  0x66, 0x19, 0x7e, 0x01, 0x1c, 0x02, 0x1b, 0x9f, 0x21, 0x3e, 0xb3, 0x04,
-  0xe1, 0x60, 0x04, 0xfa, 0xc0, 0x67, 0x96, 0x20, 0x1c, 0x06, 0x5a, 0x1e,
-  0x6d, 0x17, 0x30, 0x5e, 0x20, 0x7e, 0x41, 0x00, 0x07, 0xd4, 0xe9, 0x85,
-  0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e, 0x3a, 0xda, 0x19, 0xe6, 0xd2,
-  0x65, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41,
-  0x30, 0xf8, 0xe2, 0x28, 0x8c, 0x40, 0xc8, 0x8d, 0x46, 0x13, 0x02, 0x60,
-  0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48,
-  0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x38, 0x3c, 0x42, 0xa3, 0x84,
-  0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0xcb, 0xa3, 0x34, 0x4a,
-  0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x38, 0x3d, 0x52, 0xa3,
-  0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x11, 0xa5, 0x34,
-  0x52, 0xa1, 0xa0, 0x8e, 0x74, 0xe8, 0x8e, 0x46, 0x13, 0x02, 0xe0, 0x82,
-  0xa7, 0x66, 0x09, 0xc4, 0x61, 0xa0, 0xe5, 0x31, 0x8d, 0x56, 0xb0, 0xc3,
-  0x80, 0x15, 0x58, 0xe2, 0x15, 0x84, 0x70, 0xb0, 0xc3, 0x00, 0x16, 0x66,
-  0x19, 0xc6, 0xa1, 0x1c, 0xea, 0x63, 0x38, 0x42, 0x3f, 0x78, 0x68, 0xf8,
-  0x6e, 0x3f, 0x86, 0x19, 0x6e, 0x08, 0x4e, 0x88, 0x0c, 0x6a, 0x08, 0x74,
-  0x38, 0x62, 0x3f, 0xc0, 0x68, 0xf8, 0x2a, 0x10, 0xf4, 0xfa, 0x63, 0x98,
-  0xe1, 0x86, 0x40, 0x85, 0xc8, 0xa0, 0x82, 0x41, 0x67, 0x19, 0xc8, 0x21,
-  0x1f, 0x82, 0x73, 0x9f, 0x61, 0x6e, 0x64, 0x86, 0x19, 0x31, 0x38, 0x00,
-  0x10, 0x04, 0x83, 0x2f, 0x95, 0xf2, 0x08, 0x87, 0x4c, 0x69, 0x34, 0x21,
-  0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1,
-  0x88, 0x43, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x83, 0x25, 0x50,
-  0x3a, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0x58, 0x0a,
-  0x25, 0x86, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x93, 0x25,
-  0x51, 0x92, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x16, 0x5d,
-  0x0a, 0x25, 0x31, 0x0a, 0x5a, 0x49, 0x8e, 0x5e, 0x69, 0x34, 0x21, 0x00,
-  0x2e, 0x78, 0x6a, 0x96, 0x20, 0x1f, 0x86, 0x1b, 0x5a, 0x64, 0x96, 0xc0,
-  0x60, 0x96, 0xc1, 0x1c, 0xce, 0x21, 0xa8, 0x1e, 0x22, 0x25, 0xb8, 0xe0,
-  0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0x78, 0xa9, 0x94, 0x68,
-  0xc4, 0x8e, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0xea, 0xa5, 0x52,
-  0x0a, 0x84, 0x0b, 0x86, 0x29, 0x30, 0x4a, 0x25, 0xb8, 0xe0, 0xa9, 0x11,
-  0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xc2, 0x49, 0x95, 0x6c, 0x64, 0x8f,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x12, 0x27, 0x55, 0x0a, 0x84,
-  0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0x3b, 0x9e, 0xba, 0x14, 0x1a, 0xe6, 0x7c,
-  0x66, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41,
-  0x30, 0xf8, 0xcc, 0xc9, 0x96, 0xea, 0x68, 0x9c, 0x46, 0x13, 0x02, 0x60,
-  0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48,
-  0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0x76, 0xea, 0xa5, 0x84,
-  0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x73, 0x27, 0x5f, 0x4a,
-  0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0x77, 0xfa, 0xa5,
-  0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0xb9, 0x27, 0x5f,
-  0xfa, 0xa3, 0x40, 0x9d, 0x5e, 0x89, 0x9d, 0x46, 0x13, 0x02, 0xe0, 0x82,
-  0xa7, 0x66, 0x09, 0xf2, 0x61, 0xb8, 0x41, 0x4d, 0xde, 0x09, 0x0c, 0x66,
-  0x19, 0xd0, 0x21, 0x1f, 0x02, 0x8b, 0xa3, 0x39, 0x8a, 0xcf, 0x70, 0x04,
-  0x9c, 0xd0, 0x11, 0xf1, 0xcd, 0x32, 0xa4, 0x03, 0x3b, 0x04, 0x56, 0x47,
-  0x71, 0x12, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59,
-  0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xe4, 0x93, 0x0e, 0x37, 0x04,
-  0xf7, 0x04, 0x06, 0xb3, 0x0c, 0xea, 0xb0, 0x0e, 0x81, 0x0d, 0x7d, 0x04,
-  0x9f, 0x59, 0x02, 0x78, 0x30, 0x3e, 0x22, 0xe2, 0x33, 0x4b, 0x00, 0x0f,
-  0xc3, 0x11, 0x7b, 0xd2, 0x47, 0xc2, 0x37, 0xcb, 0xd0, 0x0e, 0xf0, 0x10,
-  0x18, 0x9f, 0xf8, 0x51, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17,
-  0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x41, 0x52, 0x3a,
-  0xdc, 0x10, 0x88, 0x14, 0x18, 0xcc, 0x32, 0xb8, 0xc3, 0x3b, 0x04, 0x66,
-  0x4a, 0x43, 0x7c, 0x66, 0x09, 0xe0, 0xc1, 0x88, 0x54, 0x82, 0xcf, 0x2c,
-  0x01, 0x3c, 0x0c, 0xb4, 0x3c, 0x9a, 0x3a, 0x60, 0xeb, 0x40, 0xb8, 0x83,
-  0xf0, 0x0e, 0x2c, 0xc5, 0x0e, 0x17, 0x0c, 0x63, 0xa8, 0xc4, 0x4a, 0xf1,
-  0x19, 0x8e, 0x30, 0x95, 0x56, 0x22, 0xbe, 0x59, 0x86, 0x78, 0xa0, 0x87,
-  0xc0, 0x5c, 0xe9, 0x54, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9,
-  0xe0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x99, 0xd2,
-  0xe1, 0x86, 0x00, 0xa6, 0xc0, 0x60, 0x96, 0x41, 0x1e, 0xe6, 0x21, 0xb0,
-  0xc1, 0x96, 0xe0, 0x33, 0x4b, 0x80, 0x0f, 0x36, 0x4b, 0x44, 0x7c, 0x66,
-  0x09, 0xf0, 0x61, 0x38, 0x22, 0x56, 0x68, 0x49, 0xf8, 0x66, 0x19, 0xea,
-  0x01, 0x1f, 0x02, 0x93, 0x95, 0x5a, 0x8a, 0x8f, 0x05, 0x0e, 0x7d, 0x2e,
-  0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22,
-  0x7a, 0x4a, 0x87, 0x1b, 0x82, 0x9d, 0x02, 0x83, 0x59, 0x06, 0x7b, 0xb8,
-  0x87, 0xc0, 0x7a, 0x69, 0x88, 0xcf, 0x2c, 0x01, 0x3e, 0x18, 0x21, 0x4e,
-  0xf0, 0x99, 0x25, 0xc0, 0x87, 0x81, 0x96, 0x47, 0x93, 0x07, 0x6c, 0x1e,
-  0x08, 0x7b, 0x10, 0xee, 0x01, 0xaf, 0xe8, 0xe1, 0x82, 0x61, 0x2e, 0x78,
-  0xea, 0xb6, 0xa7, 0xce, 0x95, 0x86, 0xb9, 0xf1, 0x19, 0xe6, 0x88, 0x61,
-  0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbe, 0xb5, 0xda,
-  0x29, 0x7d, 0x42, 0xab, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1,
-  0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x03, 0x4e, 0xae, 0xc4, 0x2a, 0x21, 0x82, 0x11, 0x03, 0x04,
-  0x00, 0x41, 0x30, 0xe0, 0xe6, 0x6a, 0xac, 0x12, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x03, 0x8e, 0xae, 0xc8, 0x2a, 0x21, 0x82, 0x11, 0x03,
-  0x05, 0x00, 0x41, 0x30, 0x58, 0xf8, 0x6a, 0xac, 0x48, 0x2a, 0x78, 0x2b,
-  0x9a, 0x8a, 0xab, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x82, 0x7c,
-  0x18, 0x68, 0x79, 0x4c, 0x83, 0x1c, 0xd4, 0x34, 0x18, 0x07, 0x96, 0x30,
-  0x07, 0x01, 0x1f, 0xd4, 0x34, 0x38, 0x87, 0x59, 0x06, 0x7d, 0xe0, 0x87,
-  0x77, 0x19, 0x8e, 0x90, 0x17, 0x9b, 0x1a, 0xbe, 0x9b, 0x97, 0x61, 0x86,
-  0x1b, 0x82, 0x90, 0x22, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0xa8, 0x17, 0x9d,
-  0x1a, 0xbe, 0x0a, 0x04, 0xbd, 0x7b, 0x19, 0x66, 0xb8, 0x21, 0x20, 0x29,
-  0x32, 0xa8, 0x60, 0xd0, 0x59, 0x86, 0x7d, 0x80, 0x89, 0xe0, 0xd0, 0x69,
-  0x98, 0xeb, 0x9f, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xe0, 0x1b,
-  0xad, 0xb9, 0x92, 0x29, 0xd0, 0x1a, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21,
-  0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90, 0x11, 0x03,
-  0x04, 0x00, 0x41, 0x30, 0xe0, 0x54, 0x4b, 0xaf, 0x0e, 0x22, 0x18, 0x31,
-  0x40, 0x00, 0x10, 0x04, 0x03, 0x6e, 0xb5, 0xf6, 0x8a, 0x21, 0x82, 0x11,
-  0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0x58, 0x8b, 0xaf, 0x24, 0x22, 0x18,
-  0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x85, 0xb6, 0xf6, 0x8a, 0xa7, 0x82,
-  0xd3, 0x62, 0xab, 0xd4, 0x1a, 0x4d, 0x08, 0x80, 0x0b, 0x9e, 0x9a, 0x25,
-  0x80, 0x89, 0xe1, 0x86, 0x93, 0x69, 0x2d, 0x30, 0x98, 0x65, 0xe8, 0x07,
-  0x7f, 0x08, 0xea, 0xa6, 0xfc, 0x0a, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0x28, 0xdb, 0xfa, 0x2b, 0x96, 0x81, 0xab, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0xa0, 0x6e, 0xeb, 0xaf, 0x02, 0xe1, 0x82, 0x61,
-  0x4a, 0xa7, 0x46, 0x0b, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10,
-  0x0c, 0xa8, 0xdd, 0x22, 0x2d, 0x98, 0xa9, 0xab, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0xa0, 0x78, 0x8b, 0xb4, 0x02, 0xe1, 0x82, 0x61, 0x2e, 0x78,
-  0xea, 0x8e, 0xa7, 0x6e, 0xa4, 0x86, 0x39, 0x1c, 0x1a, 0xe6, 0x88, 0x61,
-  0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x3e, 0xf0, 0x82,
-  0xad, 0xb7, 0xea, 0xad, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1,
-  0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x03, 0xee, 0xbc, 0x6e, 0x2b, 0x21, 0x82, 0x11, 0x03, 0x04,
-  0x00, 0x41, 0x30, 0xe0, 0xd0, 0x0b, 0xb7, 0x12, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x03, 0x2e, 0xbd, 0x72, 0x2b, 0x21, 0x82, 0x11, 0x03,
-  0x05, 0x00, 0x41, 0x30, 0x58, 0xe2, 0x0b, 0xb7, 0xf2, 0x2a, 0x20, 0xaf,
-  0xd4, 0x32, 0xaf, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x02, 0x98,
-  0x18, 0x6e, 0x20, 0x9b, 0xf4, 0x02, 0x83, 0x59, 0x86, 0x7f, 0x80, 0x89,
-  0xc0, 0xd6, 0xaa, 0xad, 0xe2, 0x33, 0x1c, 0x81, 0x36, 0x6e, 0x45, 0x7c,
-  0xb3, 0x0c, 0x20, 0x31, 0x12, 0x81, 0xbd, 0x55, 0xda, 0xc4, 0xc7, 0x82,
-  0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22,
-  0x88, 0x4f, 0x11, 0xf3, 0xa5, 0xc3, 0x0d, 0x41, 0x7c, 0x81, 0xc1, 0x2c,
-  0x43, 0x48, 0x88, 0x44, 0x60, 0xc3, 0x5d, 0xc1, 0x67, 0x96, 0xe0, 0x24,
-  0xcc, 0xae, 0x88, 0xf8, 0xcc, 0x12, 0x9c, 0xc4, 0x70, 0xc4, 0xdc, 0xdc,
-  0x95, 0xf0, 0xcd, 0x32, 0x90, 0xc4, 0x49, 0x04, 0x46, 0x37, 0x78, 0x15,
-  0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9,
-  0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xf8, 0x97, 0x0e, 0x37, 0x04, 0xfc, 0x05,
-  0x06, 0xb3, 0x0c, 0x25, 0x61, 0x12, 0x81, 0x81, 0xd6, 0x10, 0x9f, 0x59,
-  0x82, 0x93, 0x30, 0x62, 0xb4, 0xe0, 0x33, 0x4b, 0x70, 0x12, 0x03, 0x2d,
-  0x8f, 0x16, 0x12, 0x98, 0x48, 0x10, 0x25, 0x21, 0x98, 0x04, 0xbf, 0x8d,
-  0xc4, 0x05, 0xc3, 0x98, 0x68, 0x99, 0x56, 0x7c, 0x86, 0x23, 0x40, 0xe7,
-  0xb4, 0x88, 0x6f, 0x96, 0x01, 0x25, 0x56, 0x22, 0x30, 0xd4, 0x0a, 0x9d,
-  0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43,
-  0x3e, 0x56, 0x04, 0xf1, 0x29, 0x82, 0xc5, 0x74, 0xb8, 0x21, 0x50, 0x31,
-  0x30, 0x98, 0x65, 0x48, 0x09, 0x95, 0x08, 0x6c, 0x80, 0x2d, 0xf8, 0xcc,
-  0x12, 0xbc, 0x84, 0xb5, 0x16, 0x11, 0x9f, 0x59, 0x82, 0x97, 0x18, 0x8e,
-  0x58, 0x1d, 0xd7, 0x12, 0xbe, 0x59, 0x06, 0x96, 0x78, 0x89, 0xc0, 0x58,
-  0xe7, 0xb5, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29,
-  0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x1b, 0xd3, 0xe1, 0x86,
-  0xa0, 0xc6, 0xc0, 0x60, 0x96, 0xa1, 0x25, 0x5c, 0x22, 0xb0, 0xdb, 0x1a,
-  0xe2, 0x33, 0x4b, 0xf0, 0x12, 0x46, 0xf0, 0x16, 0x7c, 0x66, 0x09, 0x5e,
-  0x62, 0xa0, 0xe5, 0xd1, 0x52, 0x02, 0x53, 0x09, 0xa2, 0x25, 0x04, 0x97,
-  0x60, 0xbb, 0x95, 0xb8, 0x60, 0x98, 0x0b, 0x9e, 0xba, 0xed, 0xa9, 0x43,
-  0xad, 0x61, 0xae, 0x97, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31,
-  0x38, 0x00, 0x10, 0x04, 0x83, 0xaf, 0xcc, 0x6a, 0x8c, 0xbe, 0xc4, 0x6c,
+  0x7c, 0x50, 0x0f, 0xe9, 0xe0, 0x0e, 0xf4, 0x10, 0x07, 0x76, 0xf0, 0x0b,
+  0xf4, 0xe0, 0x07, 0x28, 0x30, 0xa8, 0x9c, 0xc9, 0x1b, 0x07, 0x76, 0x08,
+  0x87, 0x79, 0x98, 0x07, 0x37, 0x90, 0x85, 0x5b, 0x98, 0x05, 0x7a, 0x90,
+  0x87, 0x7a, 0x18, 0x07, 0x7a, 0xa8, 0x07, 0x79, 0x28, 0x07, 0x72, 0x10,
+  0x85, 0x7a, 0x30, 0x07, 0x73, 0x28, 0x07, 0x79, 0xe0, 0x03, 0x74, 0x08,
+  0x07, 0x76, 0x30, 0x07, 0x3f, 0x40, 0x01, 0x46, 0xe7, 0x30, 0x02, 0x31,
+  0x5c, 0xc2, 0x39, 0x8d, 0x34, 0x01, 0xcd, 0x24, 0xa1, 0x65, 0x18, 0x86,
+  0x21, 0x4d, 0xd3, 0x34, 0x4d, 0x07, 0x52, 0xe7, 0x08, 0x40, 0x61, 0x0a,
+  0x00, 0x00, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87,
+  0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xae, 0x50,
+  0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30,
+  0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0,
+  0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x78, 0xd0,
+  0x06, 0xe9, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x6d, 0x90,
+  0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x60,
+  0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d, 0x60, 0x0e, 0x71, 0x60,
+  0x07, 0x7a, 0x10, 0x07, 0x76, 0xd0, 0x06, 0xe6, 0x30, 0x07, 0x72, 0xa0,
+  0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60,
+  0x07, 0x74, 0xd0, 0x06, 0xee, 0x80, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xa0,
+  0x07, 0x73, 0x20, 0x07, 0x7a, 0x60, 0x07, 0x74, 0x30, 0xe4, 0x09, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0x43,
+  0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90,
+  0x67, 0x01, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
+  0x21, 0x4f, 0x03, 0x04, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x43, 0x1e, 0x08, 0x08, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x86, 0x3c, 0x12, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x0c, 0x79, 0x28, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x18, 0xf2, 0x58, 0x40, 0x00, 0x04, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xe4, 0xd1, 0x80, 0x00, 0x10, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0xc3, 0x01, 0x01, 0x10,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90, 0xe7, 0x03, 0x02,
+  0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x21, 0x8f, 0x18,
+  0x00, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x16,
+  0x08, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14,
+  0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47, 0xc6, 0x04, 0x43, 0x1a,
+  0x4a, 0xa0, 0x08, 0x8a, 0x61, 0x04, 0xa0, 0x30, 0x0a, 0x3d, 0xa0, 0x10,
+  0x0a, 0x30, 0x80, 0xc2, 0x11, 0x00, 0x62, 0x0b, 0x1c, 0x10, 0x10, 0x81,
+  0xd0, 0x19, 0x00, 0x5a, 0x67, 0x00, 0xc8, 0x9c, 0x01, 0x00, 0x00, 0x00,
+  0x79, 0x18, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90,
+  0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1,
+  0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99, 0x71, 0xb9, 0x01, 0x41,
+  0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a,
+  0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9,
+  0x10, 0x04, 0x13, 0x84, 0x81, 0x99, 0x20, 0x0c, 0xcd, 0x06, 0x61, 0x20,
+  0x26, 0x08, 0x83, 0xb3, 0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06,
+  0xc4, 0x20, 0x26, 0x08, 0xc3, 0x33, 0x41, 0x30, 0x03, 0x8c, 0xc0, 0x04,
+  0x61, 0x80, 0x26, 0x08, 0x61, 0x40, 0x6d, 0x58, 0x94, 0x85, 0x51, 0x94,
+  0xa1, 0x71, 0x1c, 0xa7, 0x98, 0x20, 0xa0, 0x81, 0xb5, 0x61, 0x19, 0x20,
+  0x46, 0x19, 0x86, 0xc6, 0x71, 0x9c, 0x62, 0xc3, 0x42, 0x2c, 0x8c, 0x42,
+  0x0c, 0x8d, 0xe3, 0x38, 0xc5, 0x86, 0xe1, 0x89, 0xa4, 0x09, 0xc2, 0x1a,
+  0x5c, 0x13, 0x84, 0x21, 0xda, 0x80, 0x28, 0x14, 0xa3, 0x28, 0x43, 0x05,
+  0x6c, 0x08, 0xac, 0x0d, 0x04, 0x30, 0x5d, 0xc0, 0x04, 0x41, 0x00, 0xa8,
+  0x1c, 0xc9, 0xa5, 0x91, 0x4d, 0x85, 0xb5, 0xc1, 0xb1, 0x95, 0x4d, 0x10,
+  0xd8, 0xa0, 0x9a, 0x20, 0x0c, 0xd2, 0x04, 0x61, 0x98, 0x36, 0x0c, 0xdd,
+  0x30, 0x6c, 0x20, 0x94, 0x8d, 0xf3, 0x36, 0x14, 0x99, 0x06, 0x60, 0x5f,
+  0x15, 0x36, 0x36, 0xbb, 0x36, 0x97, 0x34, 0xb2, 0x32, 0x37, 0xba, 0x29,
+  0x41, 0x50, 0x85, 0x0c, 0xcf, 0xc5, 0xae, 0x4c, 0x6e, 0x2e, 0xed, 0xcd,
+  0x6d, 0x4a, 0x40, 0x34, 0x21, 0xc3, 0x73, 0xb1, 0x0b, 0x63, 0xb3, 0x2b,
+  0x93, 0x9b, 0x12, 0x18, 0x75, 0xc8, 0xf0, 0x5c, 0xe6, 0xd0, 0xc2, 0xc8,
+  0xca, 0xe4, 0x9a, 0xde, 0xc8, 0xca, 0xd8, 0xa6, 0x04, 0x48, 0x19, 0x32,
+  0x3c, 0x17, 0xb9, 0xb2, 0xb9, 0xb7, 0x3a, 0xb9, 0xb1, 0xb2, 0xb9, 0x29,
+  0xc1, 0x55, 0x87, 0x0c, 0xcf, 0xa5, 0xcc, 0x8d, 0x4e, 0x2e, 0x0f, 0xea,
+  0x2d, 0xcd, 0x8d, 0x6e, 0x6e, 0x4a, 0xf0, 0x01, 0x79, 0x18, 0x00, 0x00,
+  0x51, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66,
+  0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07,
+  0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10,
+  0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce,
+  0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b,
+  0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c,
+  0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07,
+  0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11,
+  0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0,
+  0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8,
+  0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b,
+  0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b,
+  0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87,
+  0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07,
+  0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87,
+  0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81,
+  0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30,
+  0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4,
+  0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca,
+  0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39,
+  0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b,
+  0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b,
+  0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87,
+  0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20,
+  0x0e, 0xe7, 0xe0, 0x06, 0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90,
+  0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x30, 0x83, 0x81, 0xc8, 0x01, 0x1f, 0xdc,
+  0x40, 0x1c, 0xe4, 0xa1, 0x1c, 0xc2, 0x61, 0x1d, 0xdc, 0x40, 0x1c, 0xe4,
+  0x01, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00,
+  0x06, 0xa0, 0x80, 0x11, 0x32, 0xb0, 0x00, 0xf3, 0x2c, 0x84, 0x19, 0x40,
+  0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x20, 0x0d, 0x10, 0x61, 0x7e, 0x71, 0xdb,
+  0xa6, 0xb0, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10, 0x50, 0x45, 0x41, 0x44,
+  0xa5, 0x03, 0x0c, 0x25, 0x61, 0x00, 0x02, 0xe6, 0x23, 0xb7, 0x6d, 0x0b,
+  0xd2, 0x70, 0xf9, 0xce, 0xe3, 0x0b, 0x11, 0x01, 0x4c, 0x44, 0x08, 0x34,
+  0xc3, 0x42, 0xd8, 0x81, 0x33, 0x5c, 0xbe, 0xf3, 0xf8, 0x83, 0x33, 0xe1,
+  0x7e, 0x71, 0xdb, 0x86, 0x70, 0x0d, 0x97, 0xef, 0x3c, 0x7e, 0x04, 0x58,
+  0x1b, 0x55, 0x14, 0x44, 0x54, 0x3a, 0xc0, 0xe0, 0x17, 0xb5, 0x6e, 0x02,
+  0xd7, 0x70, 0xf9, 0xce, 0xe3, 0x47, 0x80, 0xb5, 0x51, 0x45, 0x41, 0x44,
+  0xa5, 0x03, 0x0c, 0x3e, 0x52, 0xeb, 0x36, 0x80, 0x0d, 0x97, 0xef, 0x3c,
+  0x7e, 0x04, 0x58, 0x1b, 0x55, 0x14, 0x44, 0xc4, 0x4e, 0x4e, 0x44, 0xf8,
+  0x48, 0xad, 0x5b, 0x81, 0x34, 0x5c, 0xbe, 0xf3, 0xf8, 0x13, 0x11, 0x4d,
+  0x08, 0x10, 0x61, 0x7e, 0x71, 0xdb, 0x96, 0x20, 0x0d, 0x97, 0xef, 0x3c,
+  0xfe, 0x44, 0x44, 0x13, 0x02, 0x44, 0x98, 0x8f, 0xdc, 0xb6, 0x05, 0x48,
+  0xc3, 0xe5, 0x3b, 0x8f, 0x3f, 0x1d, 0x11, 0x01, 0x0c, 0xe2, 0xe0, 0x23,
+  0xb7, 0x6d, 0x04, 0xcf, 0x70, 0xf9, 0xce, 0xe3, 0x53, 0x0d, 0x10, 0x61,
+  0x7e, 0x71, 0xdb, 0x00, 0x61, 0x20, 0x00, 0x00, 0x0c, 0x13, 0x00, 0x00,
+  0x13, 0x04, 0x24, 0x14, 0x0b, 0x04, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
+  0x34, 0x14, 0x58, 0xd9, 0x95, 0xa5, 0x40, 0x0d, 0x94, 0x51, 0x21, 0x15,
+  0x57, 0xc1, 0x95, 0x5c, 0xd9, 0x14, 0x4b, 0x61, 0x0a, 0x14, 0x4d, 0xe9,
+  0x06, 0x94, 0x43, 0x29, 0x90, 0x31, 0x03, 0x40, 0x48, 0x09, 0x14, 0x01,
+  0x3d, 0x23, 0x00, 0x63, 0x04, 0x20, 0x08, 0x82, 0xf8, 0x37, 0x46, 0x00,
+  0x82, 0x20, 0x48, 0xff, 0xc2, 0x18, 0x01, 0x08, 0x82, 0x20, 0xfd, 0x8d,
+  0x11, 0x80, 0x20, 0x08, 0xf2, 0xdf, 0x18, 0x01, 0x08, 0x82, 0x20, 0xfe,
+  0x0b, 0x63, 0x04, 0x20, 0x08, 0x82, 0x21, 0x38, 0x8c, 0x11, 0x80, 0x20,
+  0x08, 0xea, 0xdf, 0x18, 0x01, 0x08, 0x82, 0xa0, 0xfe, 0x0b, 0x63, 0x04,
+  0x20, 0x08, 0x82, 0xf0, 0x37, 0x46, 0x00, 0x82, 0x20, 0x08, 0xff, 0xc2,
+  0x18, 0x01, 0x08, 0x82, 0x20, 0x08, 0x06, 0x00, 0x23, 0x06, 0x09, 0x00,
+  0x82, 0x60, 0x20, 0x06, 0x6d, 0x70, 0x39, 0x6b, 0xb0, 0x06, 0x64, 0x30,
+  0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0x62, 0xe0, 0x06, 0xd8, 0xd3, 0x06,
+  0x6d, 0x50, 0x06, 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0x20, 0x06, 0x6f,
+  0x90, 0x41, 0x6b, 0xb0, 0x06, 0x66, 0x30, 0x62, 0x90, 0x00, 0x20, 0x08,
+  0x06, 0x62, 0x00, 0x07, 0x1a, 0xc4, 0x06, 0x6c, 0x70, 0x06, 0x23, 0x06,
+  0x06, 0x00, 0x82, 0x60, 0x40, 0xec, 0x81, 0xd5, 0x06, 0x23, 0x06, 0x07,
+  0x00, 0x82, 0x60, 0xf0, 0xcd, 0xc1, 0x18, 0x08, 0x6e, 0x30, 0x9a, 0x10,
+  0x00, 0x15, 0x0c, 0x30, 0x9a, 0x30, 0x04, 0xc3, 0x0d, 0x42, 0x40, 0x06,
+  0xb3, 0x0c, 0xc1, 0x08, 0x05, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xf0,
+  0xe1, 0x01, 0x1a, 0x1c, 0x76, 0x30, 0x9a, 0x10, 0x0c, 0x17, 0x3c, 0x35,
+  0x9a, 0x30, 0x08, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
+  0x5f, 0x1f, 0xb4, 0x01, 0x03, 0x06, 0xa3, 0x09, 0x01, 0x30, 0xdc, 0x10,
+  0xe8, 0x01, 0x18, 0x4c, 0x37, 0x50, 0x5e, 0x30, 0xdd, 0x50, 0x69, 0x42,
+  0x21, 0x01, 0x4c, 0x37, 0x5c, 0x1c, 0x51, 0x48, 0x00, 0x23, 0x06, 0x07,
+  0x00, 0x82, 0x60, 0xf0, 0x95, 0x42, 0x1d, 0x50, 0x67, 0x30, 0x9a, 0x10,
+  0x04, 0xa3, 0x09, 0x82, 0x30, 0x9a, 0x30, 0x0c, 0x15, 0x08, 0x52, 0x03,
+  0x21, 0x15, 0x0c, 0x52, 0x57, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
+  0xf0, 0xb5, 0x42, 0x1f, 0x70, 0xa9, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c,
+  0x52, 0x5b, 0x10, 0x15, 0x20, 0x33, 0x9a, 0x50, 0x04, 0x15, 0x08, 0x52,
+  0x44, 0x10, 0x15, 0x34, 0x33, 0x9a, 0x90, 0x08, 0x15, 0x08, 0x52, 0x44,
+  0x10, 0xd7, 0x3c, 0x75, 0xc5, 0x53, 0x37, 0x3c, 0x35, 0x62, 0x70, 0x00,
+  0x20, 0x08, 0x06, 0x1f, 0x2f, 0xb0, 0xc2, 0x1a, 0xd0, 0xc2, 0x68, 0x42,
+  0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0xc3,
+  0x11, 0x4f, 0x1d, 0xf1, 0xd4, 0x11, 0x4f, 0x1d, 0xf1, 0xd4, 0x88, 0x41,
+  0x03, 0x80, 0x20, 0x18, 0x58, 0xe8, 0xf0, 0x0a, 0xcc, 0xa2, 0x8c, 0x02,
+  0x31, 0x08, 0x81, 0x09, 0x01, 0x7c, 0x4e, 0x18, 0x66, 0xc4, 0x40, 0x01,
+  0x40, 0x10, 0x0c, 0x34, 0x75, 0xb8, 0x85, 0x3c, 0x08, 0xc6, 0x01, 0x15,
+  0xca, 0x61, 0x34, 0x21, 0x00, 0x2e, 0x78, 0xf0, 0x8a, 0x61, 0x46, 0x0c,
+  0x14, 0x00, 0x04, 0xc1, 0x40, 0x73, 0x87, 0x5d, 0xe8, 0x83, 0xe0, 0x1c,
+  0x58, 0x21, 0x1d, 0x46, 0x13, 0x02, 0xe0, 0x82, 0x07, 0x87, 0x1b, 0xe2,
+  0xa0, 0x1d, 0xc0, 0xc0, 0x8c, 0x56, 0x80, 0x8f, 0x0d, 0xae, 0x00, 0x9f,
+  0x59, 0x06, 0x61, 0x18, 0x4c, 0x38, 0x05, 0xf9, 0x98, 0x80, 0x0a, 0xf2,
+  0x31, 0x3d, 0x68, 0x05, 0xf8, 0x58, 0x1e, 0xb8, 0x02, 0x7c, 0x8c, 0x10,
+  0xe4, 0x63, 0x84, 0x20, 0x9f, 0x59, 0x02, 0xc2, 0xfc, 0x00, 0x91, 0x8f,
+  0x21, 0x7d, 0x20, 0x1f, 0x13, 0x66, 0x01, 0x3e, 0x26, 0xd0, 0x02, 0x7c,
+  0x4c, 0x88, 0x05, 0xf9, 0x98, 0x20, 0x0b, 0xf2, 0x99, 0x25, 0x20, 0x06,
+  0x2a, 0x1e, 0x48, 0x20, 0x86, 0x81, 0x8a, 0x07, 0x12, 0x88, 0x61, 0x34,
+  0xa1, 0x15, 0x84, 0xe1, 0x86, 0xe0, 0x1f, 0xc0, 0x60, 0x96, 0xa1, 0x30,
+  0x82, 0x11, 0x03, 0x03, 0x00, 0x41, 0x30, 0x80, 0x50, 0xe2, 0x1c, 0x88,
+  0x11, 0x03, 0x03, 0x00, 0x41, 0x30, 0x80, 0x52, 0x02, 0x1d, 0x88, 0x59,
+  0x02, 0x63, 0xa0, 0xe2, 0x21, 0x0a, 0x86, 0x18, 0xa8, 0x78, 0x88, 0x82,
+  0x21, 0x86, 0x23, 0x04, 0x53, 0x20, 0xbe, 0xe1, 0x88, 0xa1, 0x14, 0x84,
+  0xaf, 0x84, 0x60, 0x87, 0x23, 0x88, 0x54, 0x20, 0xbe, 0x12, 0x82, 0x1d,
+  0x8e, 0x30, 0x4e, 0x41, 0xf8, 0x2a, 0x10, 0x76, 0x96, 0xe1, 0xd0, 0x82,
+  0xd1, 0x04, 0x5d, 0x18, 0x86, 0x1b, 0x02, 0x96, 0x00, 0x83, 0x59, 0x06,
+  0x24, 0x09, 0xca, 0x16, 0xfa, 0x01, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00,
+  0x40, 0x10, 0x0c, 0xa8, 0x9a, 0xf0, 0x87, 0xe6, 0x1d, 0x46, 0x0c, 0x0e,
+  0x00, 0x04, 0xc1, 0x80, 0xb2, 0x09, 0x7f, 0x08, 0x84, 0xc2, 0x85, 0x90,
+  0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x2a, 0x27,
+  0x44, 0x02, 0x9a, 0x87, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0x74,
+  0x42, 0x24, 0x02, 0x61, 0x96, 0x40, 0x1b, 0x6e, 0x50, 0x68, 0x02, 0x0c,
+  0x66, 0x19, 0x14, 0x2d, 0x30, 0x5b, 0xc0, 0x85, 0xf8, 0xcc, 0x32, 0x2c,
+  0xce, 0x64, 0xb9, 0x50, 0xc5, 0xc7, 0x02, 0x81, 0x3e, 0x17, 0x0c, 0x73,
+  0xc1, 0x53, 0x16, 0x14, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x3d, 0xa1,
+  0xc3, 0x0d, 0xc1, 0x4e, 0x80, 0xc1, 0x2c, 0x03, 0xd3, 0x04, 0x36, 0x84,
+  0x03, 0x7c, 0x66, 0x09, 0x24, 0x03, 0x07, 0x22, 0x3e, 0xb3, 0x04, 0xd2,
+  0x2c, 0xc3, 0x23, 0x71, 0xf6, 0x85, 0x43, 0x7c, 0x2c, 0x60, 0xe8, 0x73,
+  0xc1, 0x30, 0x17, 0x3c, 0x65, 0xc1, 0x23, 0x1f, 0x2b, 0x82, 0xf8, 0x14,
+  0x71, 0x16, 0x3a, 0xdc, 0x10, 0x94, 0x05, 0x18, 0xcc, 0x32, 0x40, 0x51,
+  0x60, 0xe9, 0x30, 0xc4, 0x67, 0x96, 0x40, 0x32, 0x82, 0x1d, 0xe0, 0x33,
+  0x4b, 0x20, 0x0d, 0xb4, 0x3c, 0x18, 0x63, 0x35, 0x04, 0x24, 0x44, 0xb2,
+  0xe0, 0x98, 0x3a, 0xb8, 0x43, 0x7c, 0x66, 0x19, 0x26, 0xcb, 0x0c, 0xec,
+  0x1d, 0xd4, 0x20, 0x3e, 0x16, 0x08, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e,
+  0xb2, 0xa0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x98, 0x0b, 0x1d, 0x6e,
+  0x08, 0xe2, 0x02, 0x0c, 0x66, 0x19, 0xa8, 0x2a, 0xb0, 0xe1, 0x1e, 0xe0,
+  0x33, 0x4b, 0xa0, 0x19, 0x3d, 0x10, 0xf1, 0x99, 0x25, 0xd0, 0x66, 0x19,
+  0x2e, 0xcd, 0x0d, 0x8c, 0x0e, 0xea, 0x21, 0x3e, 0x16, 0x30, 0xf4, 0xb9,
+  0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xe0, 0x91, 0x8f, 0x15, 0x41, 0x7c, 0x8a,
+  0xe8, 0x0b, 0x1d, 0x6e, 0x08, 0xf6, 0x02, 0x0c, 0x66, 0x19, 0xb0, 0x2c,
+  0xb0, 0x7e, 0x18, 0xe2, 0x33, 0x4b, 0xa0, 0x19, 0x21, 0x12, 0xf0, 0x99,
+  0x25, 0xd0, 0x06, 0x8a, 0x1e, 0x71, 0x40, 0xfc, 0x21, 0xf1, 0x07, 0x83,
+  0x0d, 0x32, 0x36, 0xc0, 0xd8, 0xc0, 0x62, 0x83, 0x8a, 0x0d, 0xa8, 0x81,
+  0xa2, 0x87, 0x17, 0x10, 0x7f, 0x48, 0xfc, 0xc1, 0x20, 0x32, 0x03, 0xf3,
+  0x07, 0x0b, 0xab, 0x34, 0xea, 0xe8, 0xe1, 0xa9, 0x59, 0x86, 0x6d, 0x0e,
+  0x4a, 0x61, 0x34, 0x61, 0x26, 0x86, 0xe1, 0x86, 0x40, 0x34, 0xc0, 0x60,
+  0x96, 0x81, 0xf3, 0x82, 0xe1, 0x88, 0xc2, 0x2c, 0x86, 0xef, 0x8c, 0x61,
+  0x86, 0x1b, 0x82, 0x98, 0x20, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0x40, 0xd4,
+  0x62, 0xf8, 0x2a, 0x10, 0xf4, 0x94, 0x61, 0x86, 0x1b, 0x02, 0x9a, 0x20,
+  0x83, 0x0a, 0x06, 0x9d, 0x65, 0xe8, 0xe4, 0x20, 0x38, 0x7c, 0x18, 0xe6,
+  0x9a, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xe0, 0x9b, 0x8d, 0xd1,
+  0x10, 0x0b, 0xd8, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d,
+  0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90, 0x11, 0x03, 0x04, 0x00,
+  0x41, 0x30, 0xe0, 0x74, 0x43, 0x35, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00,
+  0x10, 0x04, 0x03, 0x6e, 0x37, 0x56, 0x83, 0x21, 0x82, 0x11, 0x03, 0x04,
+  0x00, 0x41, 0x30, 0xe0, 0x78, 0x83, 0x35, 0x24, 0x22, 0x18, 0x31, 0x50,
+  0x00, 0x10, 0x04, 0x83, 0x85, 0x3c, 0x56, 0x83, 0x2d, 0x82, 0xdb, 0xe0,
+  0x8b, 0xdc, 0x18, 0x4d, 0x08, 0x80, 0x0b, 0x9e, 0x9a, 0x25, 0x90, 0x83,
+  0xe1, 0x86, 0xac, 0x37, 0xc0, 0x60, 0x96, 0xe1, 0x03, 0x83, 0xa0, 0xce,
+  0xc2, 0x35, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80,
+  0x32, 0x8f, 0xd7, 0xf8, 0x40, 0x63, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
+  0xa8, 0xf3, 0x78, 0x8d, 0x40, 0xb8, 0x60, 0x98, 0x52, 0x8b, 0xd9, 0x80,
+  0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x6a, 0x3d, 0x68,
+  0x63, 0x0c, 0x4a, 0x63, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0xf6,
+  0xa0, 0x8d, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x9e, 0xba, 0xe3, 0xa9, 0x9b,
+  0x89, 0x61, 0x0e, 0x0d, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31,
+  0x38, 0x00, 0x10, 0x04, 0x83, 0x0f, 0x3e, 0xc0, 0xe3, 0x2f, 0xda, 0x63,
   0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13,
-  0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x63,
-  0x33, 0x1e, 0x4b, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8,
-  0x36, 0xeb, 0xb1, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80,
-  0x73, 0x33, 0x1f, 0x4b, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c,
-  0x16, 0x3b, 0xeb, 0x31, 0xff, 0x0a, 0xd2, 0xcc, 0xc5, 0xd6, 0x6c, 0x34,
-  0x21, 0x00, 0x2e, 0x78, 0x6a, 0x96, 0x00, 0x26, 0x06, 0x5a, 0x1e, 0xd3,
-  0xd8, 0x07, 0x1f, 0x0e, 0xf4, 0x81, 0x25, 0xfa, 0x41, 0x78, 0x09, 0x1f,
-  0x0e, 0xfc, 0x61, 0xc4, 0xc0, 0x00, 0x40, 0x10, 0x0c, 0x20, 0x3c, 0xcb,
-  0xb1, 0x77, 0x32, 0xfb, 0x80, 0x97, 0xf8, 0x98, 0x10, 0xc8, 0xc7, 0x02,
-  0x79, 0x81, 0x8f, 0x15, 0xff, 0x10, 0x1f, 0x2b, 0x02, 0xf9, 0x58, 0x10,
-  0x12, 0xf0, 0x19, 0x31, 0x30, 0x00, 0x10, 0x04, 0x03, 0xe8, 0xcf, 0xc0,
-  0xac, 0x9e, 0x4c, 0x28, 0xe2, 0x63, 0x81, 0x20, 0x1f, 0x0b, 0x0e, 0xf8,
-  0x5c, 0x60, 0xcc, 0x88, 0x81, 0x03, 0x80, 0x20, 0x18, 0x34, 0xa5, 0x46,
-  0x66, 0x30, 0x96, 0x62, 0x79, 0x16, 0xfc, 0xd8, 0x8f, 0xfd, 0x98, 0x8f,
-  0xed, 0xd9, 0x2c, 0xc1, 0x08, 0x0d, 0x37, 0xf8, 0xd5, 0x9e, 0x81, 0xc1,
-  0x2c, 0x83, 0x4c, 0x8c, 0x50, 0x30, 0x62, 0x60, 0x00, 0x20, 0x08, 0x06,
-  0x50, 0xa9, 0x9d, 0x19, 0x3f, 0x59, 0x90, 0x63, 0xf0, 0x19, 0x31, 0x30,
-  0x00, 0x10, 0x04, 0x03, 0xe8, 0xd4, 0xd2, 0xac, 0x9f, 0x2c, 0xd8, 0x31,
-  0xf8, 0x8c, 0x26, 0xc0, 0xd8, 0x30, 0xdc, 0x10, 0xfc, 0x19, 0x18, 0xcc,
-  0x32, 0xcc, 0x44, 0x4d, 0x04, 0xc3, 0x11, 0xc5, 0x98, 0x0d, 0xdf, 0x19,
-  0xc3, 0x0c, 0x37, 0x04, 0x2e, 0x46, 0x06, 0x35, 0x04, 0x3a, 0x1c, 0x71,
-  0x9c, 0xd9, 0xf0, 0x55, 0x20, 0xe8, 0x25, 0xc3, 0x0c, 0x37, 0x04, 0x31,
-  0x46, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0x40, 0x13, 0x69, 0x11, 0x5c, 0x7d,
-  0x0d, 0x73, 0x2a, 0x35, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x7c,
-  0xb0, 0x06, 0x6a, 0x3f, 0xd6, 0x6a, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20,
-  0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0xdc, 0xad, 0x9d, 0xda, 0x41, 0x04, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xe1, 0x1a, 0xaa, 0x31, 0x44, 0x30,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5c, 0xae, 0xa5, 0x9a, 0x44, 0x04,
-  0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0x84, 0x1b, 0xaa, 0xa5, 0x59,
-  0x40, 0x6b, 0x79, 0x66, 0x6b, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x53, 0xb3,
-  0x04, 0x69, 0x31, 0xdc, 0x90, 0xe9, 0x1a, 0x18, 0xcc, 0x32, 0xd8, 0xc4,
-  0x4d, 0x04, 0x45, 0x66, 0xab, 0x06, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00,
-  0x20, 0x08, 0x06, 0xd4, 0xb8, 0xb1, 0xda, 0xd7, 0x67, 0x23, 0x06, 0x07,
-  0x00, 0x82, 0x60, 0x40, 0x91, 0x1b, 0xab, 0x05, 0xc2, 0x05, 0xc3, 0xd4,
-  0x99, 0xc1, 0x1a, 0x5c, 0xf0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
-  0x50, 0xe8, 0x16, 0x6b, 0x62, 0x20, 0x6a, 0x23, 0x06, 0x07, 0x00, 0x82,
-  0x60, 0x40, 0xa5, 0x5b, 0xac, 0x05, 0xc2, 0x05, 0xc3, 0x5c, 0xf0, 0xd4,
-  0x1d, 0x4f, 0x1d, 0x8c, 0x0d, 0x73, 0x65, 0x35, 0xcc, 0x11, 0xc3, 0x1c,
-  0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x7c, 0xed, 0xd6, 0x6b,
-  0x7c, 0xa6, 0x6e, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09,
-  0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x1c, 0xbd, 0x91, 0x5b, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0xc0, 0xd5, 0x5b, 0xb9, 0x25, 0x44, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x9c, 0xbd, 0x99, 0x5b, 0x42, 0x04, 0x23, 0x06, 0x0a,
-  0x00, 0x82, 0x60, 0xb0, 0xf8, 0x5b, 0xb9, 0x99, 0x5a, 0x10, 0x6f, 0xb6,
-  0x36, 0x6f, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x53, 0xb3, 0x04, 0x69, 0x31,
-  0xdc, 0x60, 0x07, 0xf6, 0x06, 0x06, 0xb3, 0x0c, 0x38, 0x91, 0x16, 0x81,
-  0xe1, 0x99, 0x9e, 0xc5, 0x67, 0x38, 0x62, 0x0f, 0xf6, 0x8c, 0xf8, 0x66,
-  0x19, 0x72, 0x82, 0x27, 0x02, 0xe3, 0x33, 0x3e, 0x88, 0x8f, 0x05, 0x03,
-  0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10,
-  0x9f, 0x22, 0x40, 0x4e, 0x87, 0x1b, 0x02, 0x7f, 0x03, 0x83, 0x59, 0x06,
-  0x9d, 0xd8, 0x89, 0xc0, 0x06, 0x52, 0x83, 0xcf, 0x2c, 0x01, 0x58, 0xd8,
-  0xa8, 0x11, 0xf1, 0x99, 0x25, 0x00, 0x8b, 0xe1, 0x08, 0x53, 0x20, 0x35,
-  0xe1, 0x9b, 0x65, 0xe8, 0x09, 0xb0, 0x08, 0xec, 0x14, 0x4a, 0x2d, 0x3e,
-  0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f,
-  0x15, 0x41, 0x7c, 0x8a, 0x58, 0x39, 0x1d, 0x6e, 0x08, 0x52, 0x0e, 0x0c,
-  0x66, 0x19, 0x7c, 0xe2, 0x27, 0x02, 0x6b, 0xb5, 0x21, 0x3e, 0xb3, 0x04,
-  0x60, 0x61, 0x04, 0xac, 0xc1, 0x67, 0x96, 0x00, 0x2c, 0x06, 0x5a, 0x1e,
-  0x4d, 0x27, 0xb0, 0x9d, 0x20, 0x7c, 0x42, 0xf8, 0x09, 0xb1, 0xe0, 0x89,
-  0x0b, 0x86, 0xb1, 0x57, 0x9b, 0xb5, 0xf8, 0x0c, 0x47, 0xc8, 0x02, 0xad,
-  0x11, 0xdf, 0x2c, 0x43, 0x58, 0x90, 0x45, 0x60, 0xb5, 0x36, 0x0b, 0xf1,
-  0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c,
-  0xac, 0x08, 0xe2, 0x53, 0x44, 0xce, 0xe9, 0x70, 0x43, 0x70, 0x73, 0x60,
-  0x30, 0xcb, 0x20, 0x16, 0x63, 0x11, 0xd8, 0xd0, 0x6b, 0xf0, 0x99, 0x25,
-  0x40, 0x0b, 0xd3, 0x35, 0x22, 0x3e, 0xb3, 0x04, 0x68, 0x31, 0x1c, 0xd1,
-  0x0b, 0xbb, 0x26, 0x7c, 0xb3, 0x0c, 0x65, 0x81, 0x16, 0x81, 0xf9, 0x02,
-  0xaf, 0xc5, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16,
-  0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x64, 0xa7, 0xc3, 0x0d, 0x81,
-  0xd8, 0x81, 0xc1, 0x2c, 0x83, 0x59, 0x9c, 0x45, 0x60, 0xe4, 0x36, 0xc4,
-  0x67, 0x96, 0x00, 0x2d, 0x8c, 0x48, 0x37, 0xf8, 0xcc, 0x12, 0xa0, 0xc5,
-  0x40, 0xcb, 0xa3, 0x89, 0x05, 0x36, 0x16, 0x84, 0x59, 0x08, 0x67, 0x01,
-  0x1b, 0x64, 0x71, 0xc1, 0x30, 0x17, 0x3c, 0x75, 0xdb, 0x53, 0x57, 0x6b,
-  0xc3, 0x9c, 0x7a, 0x0d, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70,
-  0x00, 0x20, 0x08, 0x06, 0x9f, 0xdc, 0x89, 0x5d, 0xc8, 0xbd, 0xdd, 0x68,
-  0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10,
-  0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x97, 0x77,
-  0x69, 0x97, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x7a,
-  0xa7, 0x76, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xb7,
-  0x77, 0x6b, 0x97, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c,
-  0xa3, 0xa7, 0x76, 0x2b, 0x17, 0xd8, 0xdd, 0xce, 0xe1, 0xdd, 0x68, 0x42,
-  0x00, 0x5c, 0xf0, 0xd4, 0x2c, 0x41, 0x5a, 0x0c, 0xb4, 0x3c, 0xa6, 0x41,
-  0x13, 0x78, 0x1e, 0xcc, 0x04, 0x4b, 0xd8, 0x84, 0x80, 0x16, 0x78, 0x1e,
-  0xdc, 0xc4, 0x2c, 0x83, 0x5a, 0xb0, 0xc5, 0x3e, 0x0c, 0x47, 0x80, 0x44,
-  0xcf, 0x0d, 0xdf, 0x85, 0xc4, 0x30, 0xc3, 0x0d, 0x01, 0xca, 0x91, 0x41,
-  0x0d, 0x81, 0x0e, 0x47, 0x84, 0x44, 0xd8, 0x0d, 0x5f, 0x05, 0x82, 0xde,
-  0x48, 0x0c, 0x33, 0xdc, 0x10, 0xac, 0x1c, 0x19, 0x54, 0x30, 0xe8, 0x2c,
-  0xc3, 0x5a, 0x80, 0x46, 0x70, 0xef, 0x36, 0xcc, 0x91, 0xd8, 0x30, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0xf0, 0xa9, 0x9e, 0xde, 0xe5, 0xdc, 0xe9,
-  0x8d, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68,
-  0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70,
-  0xb1, 0x17, 0x7a, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01,
-  0x27, 0x7b, 0xa2, 0xc7, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x70, 0xb3, 0x37, 0x7a, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82,
-  0xc1, 0xb2, 0x7b, 0xa2, 0x37, 0x76, 0x81, 0xeb, 0xcd, 0x1d, 0xec, 0x8d,
-  0x26, 0x04, 0xc0, 0x05, 0x4f, 0xcd, 0x12, 0x80, 0xc6, 0x70, 0xc3, 0x4c,
-  0xd0, 0x1e, 0x18, 0xcc, 0x32, 0xb4, 0x85, 0x5b, 0x04, 0xe5, 0x73, 0xa5,
-  0x07, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x54, 0xef,
-  0x99, 0x9e, 0x4e, 0xdc, 0xdd, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50,
-  0xbe, 0x67, 0x7a, 0x81, 0x70, 0xc1, 0x30, 0x15, 0x76, 0xaa, 0x07, 0x17,
-  0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x94, 0xf8, 0xad, 0x1e,
-  0x4f, 0xf0, 0xdd, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xe3, 0xb7,
-  0x7a, 0x81, 0x70, 0xc1, 0x30, 0x17, 0x3c, 0x75, 0xc7, 0x53, 0xa7, 0x72,
-  0xc3, 0xdc, 0x8f, 0x0d, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70,
-  0x00, 0x20, 0x08, 0x06, 0xdf, 0xf9, 0xdd, 0x9e, 0xdd, 0x91, 0xdf, 0x68,
-  0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10,
-  0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xe7, 0x7e,
-  0xbe, 0x97, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0xef,
-  0xf7, 0x7b, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x07,
-  0x7f, 0xe0, 0x97, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c,
-  0xf8, 0xf7, 0x7b, 0xa0, 0x17, 0xac, 0x1f, 0xec, 0xb5, 0xdf, 0x68, 0x42,
-  0x00, 0x5c, 0xf0, 0xd4, 0x2c, 0x01, 0x68, 0x0c, 0x37, 0xc0, 0x05, 0xfc,
-  0x81, 0xc1, 0x2c, 0xc3, 0x5b, 0x80, 0x46, 0x60, 0x72, 0x47, 0x77, 0xf1,
-  0x19, 0x8e, 0xb0, 0x8b, 0xba, 0x23, 0xbe, 0x59, 0x06, 0xb8, 0x98, 0x8b,
-  0xc0, 0xec, 0xee, 0x2e, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9,
-  0xe0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0xfd, 0xd3,
-  0xe1, 0x86, 0x00, 0xff, 0xc0, 0x60, 0x96, 0x21, 0x2e, 0xe4, 0x22, 0xb0,
-  0xc1, 0xef, 0xe0, 0x33, 0x4b, 0x70, 0x17, 0xd6, 0x77, 0x44, 0x7c, 0x66,
-  0x09, 0xee, 0x62, 0x38, 0x22, 0x34, 0xfc, 0x4e, 0xf8, 0x66, 0x19, 0xe8,
-  0xe2, 0x2e, 0x02, 0x13, 0x8d, 0xbf, 0x8b, 0x8f, 0x05, 0x0e, 0x7d, 0x2e,
-  0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22,
-  0x4a, 0x30, 0xd0, 0xe1, 0x86, 0x60, 0x04, 0x03, 0x30, 0x98, 0x65, 0xa8,
-  0x0b, 0xbb, 0x08, 0xec, 0xf4, 0x86, 0xf8, 0xcc, 0x12, 0xdc, 0x85, 0x11,
-  0xaa, 0x07, 0x9f, 0x59, 0x82, 0xbb, 0x18, 0x68, 0x79, 0xb4, 0xb8, 0xc0,
-  0xe4, 0x82, 0xa8, 0x0b, 0xc1, 0x2e, 0x70, 0x66, 0x2e, 0x2e, 0x18, 0xc6,
-  0x52, 0xaf, 0xf5, 0xe2, 0x33, 0x1c, 0xc1, 0x1a, 0xae, 0x47, 0x7c, 0xb3,
-  0x0c, 0x78, 0xb1, 0x17, 0x81, 0xbd, 0x5e, 0x6b, 0xc4, 0xc7, 0x82, 0x81,
-  0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88,
-  0x4f, 0x11, 0x33, 0x18, 0xe8, 0x70, 0x43, 0x10, 0x83, 0x01, 0x18, 0xcc,
-  0x32, 0xe4, 0x85, 0x5e, 0x04, 0x36, 0xdc, 0x1e, 0x7c, 0x66, 0x09, 0xfe,
-  0xc2, 0x68, 0x8f, 0x88, 0xcf, 0x2c, 0xc1, 0x5f, 0x0c, 0x47, 0xdc, 0x46,
-  0xed, 0x09, 0xdf, 0x2c, 0x03, 0x5f, 0xfc, 0x45, 0x60, 0xb8, 0x61, 0x7b,
+  0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0xbb,
+  0x8f, 0xf3, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x38,
+  0xfc, 0x40, 0x8f, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80,
+  0xcb, 0x8f, 0xf4, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c,
+  0x96, 0x10, 0x41, 0x8f, 0xd4, 0x08, 0xe8, 0x23, 0x37, 0xec, 0x63, 0x34,
+  0x21, 0x00, 0x2e, 0x78, 0x6a, 0x96, 0x40, 0x0e, 0x86, 0x1b, 0xec, 0x20,
+  0x3f, 0xc0, 0x60, 0x96, 0x21, 0x0c, 0xe4, 0x20, 0xb0, 0xbd, 0xe8, 0x8b,
+  0xf8, 0x0c, 0x47, 0xec, 0x81, 0x5f, 0x10, 0xdf, 0x2c, 0x83, 0x18, 0x94,
+  0x41, 0x60, 0x7f, 0xc1, 0x07, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3,
+  0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4, 0x88,
+  0xe8, 0x70, 0x43, 0x10, 0x22, 0x60, 0x30, 0xcb, 0x30, 0x06, 0x64, 0x10,
+  0xd8, 0x70, 0x1a, 0xf0, 0x99, 0x25, 0x48, 0x03, 0x33, 0x0d, 0x22, 0x3e,
+  0xb3, 0x04, 0x69, 0x30, 0x1c, 0x61, 0x0a, 0xa7, 0x21, 0x7c, 0xb3, 0x0c,
+  0x66, 0x90, 0x06, 0x81, 0x9d, 0x02, 0x6a, 0xc4, 0xc7, 0x02, 0x87, 0x3e,
+  0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f,
+  0x11, 0x2e, 0xa2, 0xc3, 0x0d, 0x01, 0x8b, 0x80, 0xc1, 0x2c, 0xc3, 0x19,
+  0xa0, 0x41, 0x60, 0xb0, 0x31, 0xc4, 0x67, 0x96, 0x20, 0x0d, 0x8c, 0x98,
+  0x0d, 0xf8, 0xcc, 0x12, 0xa4, 0xc1, 0x40, 0xcb, 0xa3, 0x8d, 0x01, 0x46,
+  0x06, 0xc4, 0x19, 0x08, 0x68, 0x20, 0x16, 0x65, 0x70, 0xc1, 0x30, 0x26,
+  0x1b, 0xb6, 0x11, 0x9f, 0xe1, 0x88, 0x59, 0xb8, 0x0d, 0xe2, 0x9b, 0x65,
+  0x50, 0x83, 0x36, 0x08, 0x0c, 0x37, 0x68, 0x21, 0x3e, 0x16, 0x0c, 0xf4,
+  0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c,
+  0x8a, 0xe0, 0x11, 0x1d, 0x6e, 0x08, 0x74, 0x04, 0x0c, 0x66, 0x19, 0xd6,
+  0x80, 0x0d, 0x02, 0x1b, 0xc0, 0x03, 0x3e, 0xb3, 0x04, 0x71, 0x60, 0xbd,
+  0x41, 0xc4, 0x67, 0x96, 0x20, 0x0e, 0x86, 0x23, 0x7c, 0xc1, 0x37, 0x84,
+  0x6f, 0x96, 0xc1, 0x0d, 0xe2, 0x20, 0xb0, 0x5f, 0xf8, 0x8d, 0xf8, 0x58,
+  0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x82, 0x48, 0x3e, 0x56,
+  0x04, 0xf1, 0x29, 0xe2, 0x4c, 0x74, 0xb8, 0x21, 0x28, 0x13, 0x30, 0x98,
+  0x65, 0x78, 0x03, 0x38, 0x08, 0xec, 0x3c, 0x86, 0xf8, 0xcc, 0x12, 0xc4,
+  0x81, 0x11, 0xec, 0x01, 0x9f, 0x59, 0x82, 0x38, 0x18, 0x68, 0x79, 0xb4,
+  0x35, 0xc0, 0xd8, 0x80, 0x78, 0x03, 0x01, 0x0e, 0x64, 0xa3, 0x0d, 0x2e,
+  0x18, 0xe6, 0x82, 0xa7, 0x6e, 0x7b, 0xea, 0x70, 0x63, 0x98, 0x6b, 0x87,
+  0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
+  0xe0, 0xab, 0x93, 0x32, 0x21, 0x11, 0x39, 0x19, 0x4d, 0x08, 0x80, 0xd1,
+  0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91,
+  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0xf8, 0x84, 0x4d, 0x12, 0x22,
+  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xae, 0x4f, 0xda, 0x24, 0x21,
+  0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0xfc, 0xc4, 0x4d, 0x12,
+  0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0xc5, 0x54, 0xda, 0xc4,
+  0x45, 0x82, 0x3c, 0xf1, 0x91, 0x3d, 0x19, 0x4d, 0x08, 0x80, 0x0b, 0x9e,
+  0x9a, 0x25, 0x90, 0x83, 0x81, 0x96, 0xc7, 0x34, 0x3a, 0x3f, 0xe2, 0x58,
+  0xe2, 0x13, 0xe2, 0xc0, 0x8f, 0xc0, 0xe0, 0x02, 0x63, 0x46, 0x0c, 0x1c,
+  0x00, 0x04, 0xc1, 0xa0, 0x51, 0x95, 0x34, 0xa9, 0x11, 0x17, 0xf1, 0x93,
+  0x80, 0x4c, 0xc8, 0x84, 0x4c, 0xc6, 0x04, 0x54, 0x66, 0x09, 0x46, 0x68,
+  0xb8, 0x61, 0x34, 0xfc, 0x04, 0x0c, 0x66, 0x19, 0xe8, 0x20, 0x26, 0x82,
+  0x11, 0x03, 0x03, 0x00, 0x41, 0x30, 0x80, 0x54, 0x85, 0x4d, 0x42, 0x62,
+  0xc4, 0xc0, 0x00, 0x40, 0x10, 0x0c, 0xa0, 0x55, 0x69, 0x93, 0x90, 0x30,
+  0x61, 0x4c, 0xe0, 0x63, 0x02, 0x99, 0xc0, 0x67, 0x34, 0xa1, 0x46, 0x86,
+  0xe1, 0x86, 0x80, 0x54, 0xc0, 0x60, 0x96, 0xa1, 0x0e, 0xee, 0x20, 0x18,
+  0x8e, 0x30, 0xd0, 0x64, 0xf8, 0xee, 0x18, 0x66, 0xb8, 0x21, 0x98, 0x11,
+  0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x84, 0x4d, 0x86, 0xaf, 0x02, 0x41,
+  0x6f, 0x19, 0x66, 0xb8, 0x21, 0xb0, 0x11, 0x32, 0xa8, 0x60, 0xd0, 0x59,
+  0x06, 0x3b, 0x58, 0x85, 0xe0, 0xf4, 0x63, 0x98, 0x7b, 0x89, 0x61, 0x46,
+  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xe0, 0xab, 0x95, 0x52, 0x21, 0x13, 0x59,
+  0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1,
+  0x04, 0x62, 0x28, 0xe2, 0x90, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0,
+  0x78, 0x85, 0x55, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03,
+  0xae, 0x57, 0x5a, 0x85, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
+  0xe0, 0x7c, 0xc5, 0x55, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04,
+  0x83, 0xc5, 0x5c, 0x5a, 0xc5, 0x4d, 0x82, 0x5c, 0xf1, 0x93, 0x5d, 0x19,
+  0x4d, 0x08, 0x80, 0x0b, 0x9e, 0x9a, 0x25, 0x58, 0x85, 0xe1, 0x86, 0xec,
+  0x57, 0xc0, 0x60, 0x96, 0x01, 0x0f, 0xf2, 0x20, 0xa8, 0x34, 0x81, 0x15,
+  0xb8, 0xe0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xd0, 0x25,
+  0x56, 0xc0, 0x40, 0x54, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x4a,
+  0x97, 0x58, 0x09, 0x84, 0x0b, 0x86, 0x29, 0x36, 0xa9, 0x15, 0xb8, 0xe0,
+  0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xda, 0xc5, 0x56, 0xc8,
+  0xe0, 0x54, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x72, 0x17, 0x5b,
+  0x09, 0x84, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0x3b, 0x9e, 0xba, 0x1a, 0x19,
+  0xe6, 0xd4, 0x62, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03,
+  0x00, 0x41, 0x30, 0xf8, 0xe4, 0x45, 0x5c, 0x42, 0xe5, 0x5d, 0x46, 0x13,
+  0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18,
+  0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0x7c, 0x49,
+  0x97, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0xd3, 0x17,
+  0x75, 0x49, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0x7d,
+  0x59, 0x97, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x19,
+  0x19, 0x75, 0x59, 0x95, 0xc0, 0x5e, 0x76, 0x05, 0x5f, 0x46, 0x13, 0x02,
+  0xe0, 0x82, 0xa7, 0x66, 0x09, 0x56, 0x61, 0xb8, 0xc1, 0x0e, 0xf6, 0x05,
+  0x0c, 0x66, 0x19, 0xf4, 0x60, 0x15, 0x02, 0xeb, 0x93, 0x3f, 0x89, 0xcf,
+  0x70, 0x04, 0x1f, 0x80, 0x0a, 0xf1, 0xcd, 0x32, 0xec, 0x81, 0x1f, 0x04,
+  0x16, 0x2a, 0x7d, 0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05,
+  0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x94, 0x8c, 0x0e,
+  0x37, 0x04, 0x23, 0x03, 0x06, 0xb3, 0x0c, 0x7c, 0xd0, 0x07, 0x81, 0x0d,
+  0xa9, 0x02, 0x9f, 0x59, 0x02, 0x51, 0x30, 0x54, 0x21, 0xe2, 0x33, 0x4b,
+  0x20, 0x0a, 0xc3, 0x11, 0xa7, 0x90, 0x2a, 0xc2, 0x37, 0xcb, 0xf0, 0x07,
+  0xa2, 0x10, 0x18, 0x2a, 0xa8, 0x4a, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1,
+  0x30, 0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x01,
+  0x33, 0x3a, 0xdc, 0x10, 0xb8, 0x0c, 0x18, 0xcc, 0x32, 0x80, 0x42, 0x28,
+  0x04, 0x26, 0x2b, 0x43, 0x7c, 0x66, 0x09, 0x44, 0xc1, 0x88, 0x5a, 0x81,
+  0xcf, 0x2c, 0x81, 0x28, 0x0c, 0xb4, 0x3c, 0x1a, 0x1f, 0x60, 0x7d, 0x40,
+  0x80, 0x82, 0x10, 0x0a, 0x64, 0xe1, 0x07, 0x17, 0x0c, 0x63, 0xb4, 0x82,
+  0x2b, 0xf1, 0x19, 0x8e, 0xa0, 0x85, 0x5c, 0x21, 0xbe, 0x59, 0x86, 0x51,
+  0x30, 0x85, 0xc0, 0x74, 0xa5, 0x16, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b,
+  0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08,
+  0x9f, 0xd1, 0xe1, 0x86, 0x80, 0x67, 0xc0, 0x60, 0x96, 0x81, 0x14, 0x4a,
+  0x21, 0xb0, 0x41, 0x5c, 0xe0, 0x33, 0x4b, 0xa0, 0x0a, 0xf6, 0x2b, 0x44,
+  0x7c, 0x66, 0x09, 0x54, 0x61, 0x38, 0xe2, 0x17, 0xc0, 0x45, 0xf8, 0x66,
+  0x19, 0x4e, 0x41, 0x15, 0x02, 0x03, 0x87, 0x70, 0x89, 0x8f, 0x05, 0x0e,
+  0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10,
+  0x9f, 0x22, 0xd2, 0x46, 0x87, 0x1b, 0x82, 0xb3, 0x01, 0x83, 0x59, 0x06,
+  0x54, 0x48, 0x85, 0xc0, 0xd2, 0x65, 0x88, 0xcf, 0x2c, 0x81, 0x2a, 0x18,
+  0xe1, 0x2e, 0xf0, 0x99, 0x25, 0x50, 0x85, 0x81, 0x96, 0x47, 0x23, 0x05,
+  0xac, 0x14, 0x08, 0x54, 0x10, 0x52, 0x81, 0x36, 0x4c, 0xe1, 0x82, 0x61,
+  0x2e, 0x78, 0xea, 0xb6, 0xa7, 0x4e, 0x57, 0x86, 0xb9, 0xf7, 0x18, 0xe6,
+  0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbe,
+  0xbb, 0x39, 0x1b, 0x93, 0xa1, 0x9b, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10,
+  0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31,
+  0x40, 0x00, 0x10, 0x04, 0x03, 0xce, 0x6f, 0xdc, 0x26, 0x21, 0x82, 0x11,
+  0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0xfe, 0xe6, 0x6d, 0x12, 0x22, 0x18,
+  0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x0e, 0x74, 0xe0, 0x26, 0x21, 0x82,
+  0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0x50, 0xe7, 0x6d, 0x60, 0x26,
+  0xd8, 0x1b, 0xb0, 0xe9, 0x9b, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9, 0x59,
+  0x82, 0x55, 0x18, 0x68, 0x79, 0x4c, 0xc3, 0x0e, 0x44, 0xad, 0x0e, 0x58,
+  0x02, 0x0f, 0x04, 0x55, 0x10, 0xb5, 0x3c, 0x98, 0x65, 0x60, 0x05, 0x57,
+  0xd8, 0x87, 0xe1, 0x08, 0x7f, 0x10, 0x9b, 0xe1, 0xbb, 0x7f, 0x18, 0x66,
+  0xb8, 0x21, 0x68, 0x19, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x91, 0x30,
+  0x9b, 0xe1, 0xab, 0x40, 0xd0, 0x2b, 0x89, 0x61, 0x86, 0x1b, 0x02, 0x98,
+  0x21, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0x68, 0x05, 0x71, 0x08, 0x8e, 0x5e,
+  0x86, 0xb9, 0x14, 0x19, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbe,
+  0xd7, 0xf9, 0x1b, 0x9f, 0x61, 0x9d, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10,
+  0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31,
+  0x40, 0x00, 0x10, 0x04, 0x03, 0xce, 0x76, 0x4c, 0xe7, 0x20, 0x82, 0x11,
+  0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0x6e, 0xe7, 0x74, 0x18, 0x22, 0x18,
+  0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x0e, 0x77, 0x50, 0x47, 0x22, 0x82,
+  0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0xc0, 0xe7, 0x74, 0xd0, 0x26,
+  0x98, 0x1d, 0xbc, 0xa9, 0x9d, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9, 0x59,
+  0x02, 0x71, 0x18, 0x6e, 0x98, 0x89, 0xdc, 0x01, 0x83, 0x59, 0x86, 0x57,
+  0x80, 0x85, 0xa0, 0xc6, 0x46, 0x75, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e,
+  0x00, 0x04, 0xc1, 0x80, 0x12, 0x9f, 0xd5, 0xc1, 0x09, 0xbe, 0x19, 0x31,
+  0x38, 0x00, 0x10, 0x04, 0x03, 0x6a, 0x7c, 0x56, 0x27, 0x10, 0x2e, 0x18,
+  0xa6, 0xcc, 0xe6, 0x75, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04,
+  0xc1, 0x80, 0x3a, 0x1f, 0xd8, 0xf1, 0x89, 0xd0, 0x19, 0x31, 0x38, 0x00,
+  0x10, 0x04, 0x03, 0x0a, 0x7d, 0x60, 0x27, 0x10, 0x2e, 0x18, 0xe6, 0x82,
+  0xa7, 0xee, 0x78, 0xea, 0x5e, 0x66, 0x98, 0x23, 0x93, 0x61, 0x8e, 0x18,
+  0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xe0, 0x63, 0x1f,
+  0xde, 0xd9, 0x9b, 0xf4, 0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18,
+  0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04,
+  0x00, 0x41, 0x30, 0xe0, 0xe6, 0x67, 0x7c, 0x12, 0x22, 0x18, 0x31, 0x40,
+  0x00, 0x10, 0x04, 0x03, 0x8e, 0x7e, 0xc8, 0x27, 0x21, 0x82, 0x11, 0x03,
+  0x04, 0x00, 0x41, 0x30, 0xe0, 0xea, 0xa7, 0x7c, 0x12, 0x22, 0x18, 0x31,
+  0x50, 0x00, 0x10, 0x04, 0x83, 0xa5, 0x7f, 0xc8, 0xa7, 0x74, 0x02, 0xf8,
+  0xa9, 0x1d, 0xf9, 0x19, 0x4d, 0x08, 0x80, 0x0b, 0x9e, 0x9a, 0x25, 0x10,
+  0x87, 0xe1, 0x06, 0xb8, 0xa8, 0x1f, 0x30, 0x98, 0x65, 0x88, 0x05, 0x71,
+  0x08, 0xec, 0x6e, 0xf2, 0x26, 0x3e, 0xc3, 0x11, 0x74, 0xa1, 0x37, 0xc4,
+  0x37, 0xcb, 0x20, 0x0b, 0xb5, 0x10, 0xd8, 0xde, 0xd4, 0x45, 0x7c, 0x2c,
+  0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b,
+  0x82, 0xf8, 0x14, 0xf1, 0x3f, 0x3a, 0xdc, 0x10, 0xf4, 0x0f, 0x18, 0xcc,
+  0x32, 0xcc, 0x02, 0x2d, 0x04, 0x36, 0x8c, 0x0e, 0x7c, 0x66, 0x09, 0x72,
+  0xc1, 0x44, 0x87, 0x88, 0xcf, 0x2c, 0x41, 0x2e, 0x0c, 0x47, 0xfc, 0xc5,
+  0xe8, 0x08, 0xdf, 0x2c, 0x83, 0x2d, 0xe4, 0x42, 0x60, 0xa0, 0x41, 0x3a,
   0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91,
-  0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84, 0x0f, 0x06, 0x3a, 0xdc, 0x10, 0xf0,
-  0x60, 0x00, 0x06, 0xb3, 0x0c, 0x7d, 0xe1, 0x17, 0x81, 0xf9, 0xde, 0x10,
-  0x9f, 0x59, 0x82, 0xbf, 0x30, 0x62, 0xfc, 0xe0, 0x33, 0x4b, 0xf0, 0x17,
-  0x03, 0x2d, 0x8f, 0x96, 0x17, 0x98, 0x5e, 0x10, 0x7d, 0x21, 0xf8, 0x05,
-  0xe9, 0xec, 0xc5, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x6d, 0x4f, 0xdd, 0xeb,
-  0x0d, 0x73, 0xe4, 0x36, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1,
-  0x01, 0x80, 0x20, 0x18, 0x7c, 0x6c, 0x18, 0xf0, 0x60, 0xb0, 0x7f, 0x69,
-  0x18, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2,
-  0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x70, 0x73, 0x18, 0x8c, 0x61, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x70, 0x74, 0x18, 0x90, 0x61, 0x90, 0x10, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x70, 0x75, 0x18, 0x94, 0x61, 0x90, 0x10, 0xc1,
-  0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x7d, 0x18, 0x90, 0x61, 0x50,
-  0x82, 0x41, 0x00, 0x87, 0x41, 0x0d, 0x06, 0x72, 0x18, 0x8c, 0x26, 0x04,
-  0xc0, 0x05, 0x4f, 0xcd, 0x12, 0x80, 0xc6, 0x40, 0xcb, 0x63, 0x1a, 0x6b,
-  0xc1, 0x86, 0x82, 0x5a, 0xb0, 0x44, 0x5b, 0x08, 0x7f, 0xc1, 0x86, 0x82,
-  0x5b, 0x98, 0x7e, 0xd8, 0x60, 0x00, 0x9f, 0x59, 0x86, 0xd0, 0x18, 0x0d,
-  0xfb, 0x18, 0x8e, 0x08, 0x70, 0x30, 0x18, 0xbe, 0x13, 0x86, 0x19, 0x6e,
-  0x08, 0x46, 0x30, 0x20, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0xe0, 0x0f, 0x1e,
-  0x0c, 0x86, 0xaf, 0x02, 0x41, 0xcf, 0x3f, 0x86, 0x19, 0x6e, 0x08, 0x4c,
-  0x30, 0x20, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0x10, 0x8d, 0xdb, 0x08, 0x4e,
-  0xfd, 0x86, 0xb9, 0x7f, 0x1b, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0xbe, 0x52, 0x0c, 0xea, 0x30, 0xa0, 0xc1, 0x40, 0x14, 0x83, 0xd1, 0x84,
-  0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86,
-  0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x8e, 0x15, 0x03,
-  0x3e, 0x0c, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xae,
-  0x15, 0x83, 0x3e, 0x0c, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0xce, 0x15, 0x03, 0x3f, 0x0c, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00,
-  0x10, 0x04, 0x83, 0xc5, 0x16, 0x83, 0x3e, 0x0c, 0x7c, 0x30, 0x08, 0x52,
-  0x31, 0x70, 0xc3, 0x60, 0x15, 0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9,
-  0x59, 0x82, 0xdb, 0x18, 0x6e, 0x70, 0x91, 0x57, 0x0c, 0xc0, 0x60, 0x96,
-  0x81, 0x34, 0x4a, 0x23, 0xa8, 0x1c, 0x0c, 0x40, 0x31, 0x80, 0x0b, 0x9e,
-  0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x0a, 0x17, 0x83, 0x50, 0x0c,
-  0x36, 0x39, 0x0c, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0xca, 0xc5,
-  0x20, 0x14, 0x83, 0x40, 0xb8, 0x60, 0x98, 0xe2, 0xc1, 0xa0, 0x14, 0x03,
-  0xb8, 0xe0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0x7a, 0x31,
-  0x30, 0xc5, 0xe0, 0x46, 0xee, 0x30, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x03, 0xca, 0x17, 0x03, 0x53, 0x0c, 0x02, 0xe1, 0x82, 0x61, 0x2e, 0x78,
-  0xea, 0x8e, 0xa7, 0xae, 0x04, 0x83, 0x61, 0x4e, 0xe7, 0x86, 0x39, 0x62,
-  0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x4f, 0x1c,
-  0x03, 0x59, 0x0c, 0xe2, 0x30, 0xf8, 0xc5, 0x60, 0x34, 0x21, 0x00, 0x46,
-  0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x4b, 0xc7, 0x20, 0x17, 0x83,
-  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x53, 0xc7, 0x40,
-  0x17, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x5b,
-  0xc7, 0x60, 0x17, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1,
-  0x60, 0x99, 0xc7, 0x40, 0x17, 0x83, 0x3d, 0x0c, 0x02, 0x73, 0x0c, 0x56,
-  0x31, 0x40, 0xc7, 0x60, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6a, 0x96, 0xe0,
-  0x36, 0x86, 0x1b, 0xd6, 0x64, 0x1d, 0x03, 0x30, 0x98, 0x65, 0x30, 0x8d,
-  0xdb, 0x08, 0xac, 0x0d, 0x83, 0x37, 0x0c, 0xe2, 0x33, 0x1c, 0x71, 0x07,
-  0x70, 0x18, 0x10, 0xdf, 0x2c, 0xc3, 0x69, 0xa8, 0x46, 0x60, 0x71, 0x18,
-  0xe0, 0x41, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65,
-  0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x51, 0x8f, 0x81, 0x0e, 0x37,
-  0x04, 0xf3, 0x18, 0x80, 0xc1, 0x2c, 0x03, 0x6a, 0xa4, 0x46, 0x60, 0x43,
-  0x1e, 0x06, 0xf0, 0x99, 0x25, 0x70, 0x0d, 0xc3, 0xc3, 0x80, 0x88, 0xcf,
-  0x2c, 0x81, 0x6b, 0x0c, 0x47, 0x88, 0x42, 0x1e, 0x06, 0xc2, 0x37, 0xcb,
-  0xb0, 0x1a, 0xae, 0x11, 0xd8, 0x28, 0xe8, 0x61, 0x10, 0x1f, 0x0b, 0x1c,
-  0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20,
-  0x3e, 0x45, 0x80, 0x64, 0xa0, 0xc3, 0x0d, 0x81, 0x3f, 0x06, 0x60, 0x30,
-  0xcb, 0xc0, 0x1a, 0xad, 0x11, 0x98, 0x28, 0x06, 0x43, 0x7c, 0x66, 0x09,
-  0x5c, 0xc3, 0x88, 0x52, 0x0c, 0xe0, 0x33, 0x4b, 0xe0, 0x1a, 0x03, 0x2d,
-  0x8f, 0x86, 0x1a, 0x58, 0x6a, 0x10, 0xac, 0x21, 0xb4, 0x86, 0x4e, 0xa8,
-  0xc6, 0x05, 0xc3, 0x18, 0x29, 0x06, 0xa8, 0x18, 0xc4, 0x67, 0x38, 0xe2,
-  0x54, 0x52, 0x31, 0x20, 0xbe, 0x59, 0x86, 0xd7, 0x90, 0x8d, 0xc0, 0x54,
-  0x31, 0x40, 0x95, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78,
-  0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xc2, 0x25, 0x03, 0x1d,
-  0x6e, 0x08, 0x58, 0x32, 0x00, 0x83, 0x59, 0x06, 0xd8, 0x88, 0x8d, 0xc0,
-  0x06, 0x59, 0x0c, 0xe0, 0x33, 0x4b, 0x60, 0x1b, 0xf6, 0x8a, 0x01, 0x11,
-  0x9f, 0x59, 0x02, 0xdb, 0x18, 0x8e, 0x90, 0x15, 0x58, 0x0c, 0x84, 0x6f,
-  0x96, 0x61, 0x36, 0x6c, 0x23, 0xb0, 0x59, 0x89, 0xc5, 0x20, 0x3e, 0x16,
-  0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15,
-  0x41, 0x7c, 0x8a, 0xc8, 0xc9, 0x40, 0x87, 0x1b, 0x82, 0x9b, 0x0c, 0xc0,
-  0x60, 0x96, 0x81, 0x36, 0x6a, 0x23, 0xb0, 0x5c, 0x0c, 0x86, 0xf8, 0xcc,
-  0x12, 0xd8, 0x86, 0x11, 0xbe, 0x18, 0xc0, 0x67, 0x96, 0xc0, 0x36, 0x06,
-  0x5a, 0x1e, 0x0d, 0x36, 0xb0, 0xd8, 0x20, 0x68, 0x43, 0xa8, 0x0d, 0xbd,
-  0x92, 0x8d, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e, 0x3a, 0x55, 0x0c,
-  0x86, 0xb9, 0xdf, 0x1b, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0,
-  0x00, 0x40, 0x10, 0x0c, 0xbe, 0xb3, 0x0c, 0x6e, 0x32, 0xb0, 0xc7, 0x80,
-  0x2c, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41,
-  0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0xce, 0x2d, 0x03, 0x9f, 0x0c, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x03, 0xee, 0x2d, 0x83, 0x9f, 0x0c, 0x12, 0x22, 0x18, 0x31,
-  0x40, 0x00, 0x10, 0x04, 0x03, 0x0e, 0x2e, 0x03, 0xb0, 0x0c, 0x12, 0x22,
-  0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x05, 0x2f, 0x83, 0x9f, 0x0c,
-  0x40, 0x32, 0x08, 0xd6, 0x32, 0x80, 0xc9, 0xa0, 0x2d, 0x83, 0xd1, 0x84,
-  0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x82, 0xdb, 0x18, 0x68, 0x79, 0x4c, 0x43,
-  0x34, 0xc4, 0x54, 0x08, 0x0d, 0x96, 0x20, 0x0d, 0xc1, 0x36, 0xc4, 0x54,
-  0x28, 0x0d, 0xab, 0x97, 0x93, 0x0c, 0xe0, 0x33, 0xcb, 0x80, 0x1b, 0xba,
-  0x11, 0x2f, 0xc3, 0x11, 0xc1, 0x4c, 0x06, 0xc3, 0x77, 0xc2, 0x30, 0xc3,
-  0x0d, 0x81, 0x3f, 0x06, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0xf7, 0x72,
-  0x93, 0xc1, 0xf0, 0x55, 0x20, 0xe8, 0xe5, 0xcb, 0x30, 0xc3, 0x0d, 0x41,
-  0x48, 0x06, 0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c, 0xb9, 0xe1, 0x1e, 0xc1,
-  0x95, 0x63, 0x30, 0xcc, 0xe9, 0xdf, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82,
-  0x60, 0xf0, 0x81, 0x66, 0x00, 0x97, 0xc1, 0x4b, 0x06, 0x7d, 0x19, 0x8c,
-  0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02,
-  0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0xa7,
-  0x19, 0xdc, 0x65, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x70, 0xa8, 0x19, 0xe0, 0x65, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x70, 0xa9, 0x19, 0xe4, 0x65, 0x20, 0x11, 0xc1, 0x88, 0x81,
-  0x02, 0x80, 0x20, 0x18, 0x2c, 0xb1, 0x19, 0xe0, 0x65, 0x90, 0x93, 0x41,
-  0x40, 0x9a, 0x41, 0x5a, 0x06, 0xa6, 0x19, 0x8c, 0x26, 0x04, 0xc0, 0x05,
-  0x4f, 0xcd, 0x12, 0xb8, 0xc7, 0x70, 0x43, 0xca, 0xa8, 0x66, 0x00, 0x06,
-  0xb3, 0x0c, 0xbb, 0xc1, 0x1b, 0x41, 0xd1, 0x64, 0xb0, 0x97, 0x01, 0x5c,
-  0xf0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xb3, 0x19, 0xf0,
-  0x65, 0xb0, 0xb5, 0x65, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x14,
-  0x6d, 0x06, 0x7c, 0x19, 0x04, 0xc2, 0x05, 0xc3, 0xd4, 0x4d, 0x06, 0xa0,
-  0x19, 0xc0, 0x05, 0x4f, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x85,
-  0x9b, 0x41, 0x68, 0x06, 0x32, 0x23, 0x97, 0xc1, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x50, 0xb9, 0x19, 0x84, 0x66, 0x10, 0x08, 0x17, 0x0c, 0x73,
-  0xc1, 0x53, 0x77, 0x3c, 0x75, 0x20, 0x19, 0x0c, 0x73, 0x35, 0x18, 0x0c,
-  0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x5f, 0x6f, 0x06, 0xad, 0x19, 0xb0, 0x65, 0xa0, 0x9b, 0xc1, 0x68, 0x42,
-  0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43,
-  0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x47, 0x9e, 0x01,
-  0x6d, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x57,
-  0x9e, 0x41, 0x6d, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0x01, 0x67, 0x9e, 0x81, 0x6d, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00,
-  0x08, 0x82, 0xc1, 0xe2, 0x9e, 0x41, 0x6d, 0x06, 0x76, 0x19, 0x04, 0xe1,
-  0x19, 0x98, 0x66, 0x30, 0x9e, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd4,
-  0x2c, 0x81, 0x7b, 0x0c, 0x37, 0x98, 0x8d, 0x79, 0x06, 0x60, 0x30, 0xcb,
-  0xd0, 0x1b, 0xee, 0x11, 0x18, 0x5a, 0x06, 0x6a, 0x19, 0xc4, 0x67, 0x38,
-  0xe2, 0x0e, 0xd6, 0x32, 0x20, 0xbe, 0x59, 0x06, 0xdf, 0x08, 0x8f, 0xc0,
-  0xd8, 0x32, 0xc0, 0x83, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e,
-  0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x02, 0x3e, 0x03,
-  0x1d, 0x6e, 0x08, 0xdc, 0x33, 0x00, 0x83, 0x59, 0x86, 0xdf, 0x00, 0x8f,
-  0xc0, 0x06, 0xba, 0x0c, 0xe0, 0x33, 0x4b, 0x50, 0x1e, 0x36, 0x97, 0x01,
-  0x11, 0x9f, 0x59, 0x82, 0xf2, 0x18, 0x8e, 0x10, 0x05, 0xba, 0x0c, 0x84,
-  0x6f, 0x96, 0x41, 0x3c, 0xca, 0x23, 0xb0, 0x51, 0xa8, 0xcb, 0x20, 0x3e,
-  0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f,
-  0x15, 0x41, 0x7c, 0x8a, 0xd8, 0xcf, 0x40, 0x87, 0x1b, 0x82, 0xfc, 0x0c,
-  0xc0, 0x60, 0x96, 0x61, 0x3c, 0xc8, 0x23, 0xb0, 0xbe, 0x0c, 0x86, 0xf8,
-  0xcc, 0x12, 0x94, 0x87, 0x11, 0xa0, 0x19, 0xc0, 0x67, 0x96, 0xa0, 0x3c,
-  0x06, 0x5a, 0x1e, 0xed, 0x37, 0x30, 0xf0, 0x20, 0xc6, 0x43, 0x20, 0x0f,
-  0x9d, 0x08, 0x8f, 0x0b, 0x86, 0xb1, 0xbf, 0x0c, 0x46, 0x33, 0x88, 0xcf,
-  0x70, 0x84, 0xe8, 0x90, 0x66, 0x40, 0x7c, 0xb3, 0x0c, 0xe6, 0x91, 0x1e,
-  0x81, 0x95, 0x66, 0x30, 0x3a, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3,
-  0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0x8a,
-  0x06, 0x3a, 0xdc, 0x10, 0x9c, 0x68, 0x00, 0x06, 0xb3, 0x0c, 0xe7, 0x81,
-  0x1e, 0x81, 0x0d, 0xad, 0x19, 0xc0, 0x67, 0x96, 0xa0, 0x3d, 0x4c, 0x35,
-  0x03, 0x22, 0x3e, 0xb3, 0x04, 0xed, 0x31, 0x1c, 0xd1, 0x3a, 0xab, 0x19,
-  0x08, 0xdf, 0x2c, 0x83, 0x7a, 0xb4, 0x47, 0x60, 0xae, 0xc3, 0x9a, 0x41,
-  0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x41, 0xa3, 0x81, 0x0e, 0x37, 0x04, 0x32,
-  0x1a, 0x80, 0xc1, 0x2c, 0xc3, 0x7a, 0xb0, 0x47, 0x60, 0xb4, 0x19, 0x0c,
-  0xf1, 0x99, 0x25, 0x68, 0x0f, 0x23, 0x72, 0x33, 0x80, 0xcf, 0x2c, 0x41,
-  0x7b, 0x0c, 0xb4, 0x3c, 0xda, 0x79, 0x60, 0xe8, 0x41, 0xac, 0x87, 0xc0,
-  0x1e, 0x70, 0x97, 0x1e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0xb7, 0x3d, 0x75,
-  0xa5, 0x19, 0x0c, 0x73, 0xba, 0x18, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c,
-  0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x9f, 0x98, 0x06, 0x32, 0x1a,
-  0xc4, 0x67, 0xf0, 0xa3, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1,
-  0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0x01, 0x97, 0xa6, 0x41, 0x8e, 0x06, 0x09, 0x11, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xa7, 0xa6, 0x81, 0x8e, 0x06, 0x09,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xb7, 0xa6, 0xc1, 0x8e,
-  0x06, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x32, 0xa7,
-  0x81, 0x8e, 0x06, 0xfb, 0x19, 0x04, 0x66, 0x1a, 0xac, 0x68, 0x80, 0xa6,
-  0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd4, 0x2c, 0x81, 0x7b, 0x0c, 0xb4,
-  0x3c, 0xa6, 0x91, 0x1b, 0x38, 0x2c, 0xe0, 0x06, 0x4b, 0xec, 0x86, 0xd0,
-  0x1e, 0x38, 0x2c, 0xf0, 0xc6, 0x2c, 0xc3, 0x7b, 0xc4, 0xc7, 0xfa, 0x0c,
-  0x47, 0xbc, 0x4f, 0x8b, 0x06, 0xc3, 0x77, 0xf0, 0x33, 0xcc, 0x70, 0x43,
-  0x80, 0x9f, 0x01, 0x19, 0xd4, 0x10, 0xe8, 0x70, 0x84, 0xfc, 0xc4, 0x68,
-  0x30, 0x7c, 0x15, 0x08, 0x7a, 0xf4, 0x33, 0xcc, 0x70, 0x43, 0xb0, 0x9f,
-  0x01, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0x03, 0x7c, 0x94, 0x48, 0x70, 0xbf,
-  0x19, 0x0c, 0x73, 0xf4, 0x18, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x9f, 0x9e, 0x06, 0x6a, 0x1a, 0xa4, 0x68, 0x70, 0xa7, 0xc1, 0x68,
-  0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10,
-  0x43, 0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x17, 0xaa,
-  0x41, 0x9c, 0x06, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01,
-  0x27, 0xaa, 0x81, 0x9c, 0x06, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0x01, 0x37, 0xaa, 0xc1, 0x9c, 0x06, 0x12, 0x11, 0x8c, 0x18, 0x28,
-  0x00, 0x08, 0x82, 0xc1, 0xb2, 0xaa, 0x81, 0x9c, 0x06, 0x33, 0x1a, 0x04,
-  0x7e, 0x1a, 0x8c, 0x69, 0x00, 0xaa, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0,
-  0xd4, 0x2c, 0x41, 0x89, 0x0c, 0x37, 0x8c, 0x10, 0xa9, 0x06, 0x60, 0x30,
-  0xcb, 0x20, 0x1f, 0xf3, 0x11, 0x94, 0x8b, 0x06, 0x75, 0x1a, 0xc0, 0x05,
-  0x4f, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0xd5, 0xaa, 0x81, 0x9d,
-  0x06, 0x29, 0x74, 0xa6, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50,
-  0xae, 0x1a, 0xd8, 0x69, 0x10, 0x08, 0x17, 0x0c, 0x53, 0x31, 0x1a, 0xe8,
-  0x69, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x94,
-  0xac, 0x06, 0x7b, 0x1a, 0xb4, 0x10, 0x9b, 0x06, 0x23, 0x06, 0x07, 0x00,
-  0x82, 0x60, 0x40, 0xcd, 0x6a, 0xb0, 0xa7, 0x41, 0x20, 0x5c, 0x30, 0xcc,
-  0x05, 0x4f, 0xdd, 0xf1, 0xd4, 0xe9, 0x67, 0x30, 0xcc, 0xbd, 0x64, 0x30,
-  0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
-  0x7c, 0xb7, 0x1a, 0x9c, 0x6a, 0x60, 0xa6, 0x01, 0xad, 0x06, 0xa3, 0x09,
+  0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84, 0x0a, 0xe9, 0x70, 0x43, 0x80, 0x42,
+  0x60, 0x30, 0xcb, 0x70, 0x0b, 0xb8, 0x10, 0x18, 0xeb, 0x0c, 0xf1, 0x99,
+  0x25, 0xc8, 0x05, 0x23, 0x5e, 0x07, 0x3e, 0xb3, 0x04, 0xb9, 0x30, 0xd0,
+  0xf2, 0x68, 0xb3, 0x80, 0xd1, 0x02, 0x71, 0x0b, 0x02, 0x2e, 0xd0, 0x4c,
+  0x2d, 0x5c, 0x30, 0x8c, 0xb9, 0x8e, 0xec, 0xc4, 0x67, 0x38, 0xc2, 0x35,
+  0x66, 0x87, 0xf8, 0x66, 0x19, 0x74, 0xa1, 0x17, 0x02, 0xa3, 0x9d, 0xd7,
+  0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30,
+  0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x70, 0x48, 0x87, 0x1b, 0x02, 0x1b,
+  0x02, 0x83, 0x59, 0x86, 0x5d, 0xe0, 0x85, 0xc0, 0x06, 0xde, 0x81, 0xcf,
+  0x2c, 0x41, 0x38, 0x58, 0xee, 0x10, 0xf1, 0x99, 0x25, 0x08, 0x87, 0xe1,
+  0x88, 0xdc, 0xd0, 0x1d, 0xe1, 0x9b, 0x65, 0xf0, 0x85, 0x70, 0x08, 0x4c,
+  0x37, 0x76, 0x27, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e,
+  0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x18, 0x23, 0x1d, 0x6e,
+  0x08, 0xc2, 0x08, 0x0c, 0x66, 0x19, 0x7e, 0x01, 0x1c, 0x02, 0x1b, 0x9f,
+  0x21, 0x3e, 0xb3, 0x04, 0xe1, 0x60, 0x04, 0xfa, 0xc0, 0x67, 0x96, 0x20,
+  0x1c, 0x06, 0x5a, 0x1e, 0x6d, 0x17, 0x30, 0x5e, 0x20, 0x7e, 0x41, 0x00,
+  0x07, 0xd4, 0xe9, 0x85, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e, 0x3a,
+  0xda, 0x19, 0xe6, 0xd2, 0x65, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11,
+  0x83, 0x03, 0x00, 0x41, 0x30, 0xf8, 0xe2, 0x28, 0x8c, 0x40, 0xc8, 0x8d,
+  0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34,
+  0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x38,
+  0x3c, 0x42, 0xa3, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80,
+  0xcb, 0xa3, 0x34, 0x4a, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
+  0x38, 0x3d, 0x52, 0xa3, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1,
+  0x60, 0x11, 0xa5, 0x34, 0x52, 0xa1, 0xa0, 0x8e, 0x74, 0xe8, 0x8e, 0x46,
+  0x13, 0x02, 0xe0, 0x82, 0xa7, 0x66, 0x09, 0xc4, 0x61, 0xa0, 0xe5, 0x31,
+  0x8d, 0x56, 0xb0, 0xc3, 0x80, 0x15, 0x58, 0xe2, 0x15, 0x84, 0x70, 0xb0,
+  0xc3, 0x00, 0x16, 0x66, 0x19, 0xc6, 0xa1, 0x1c, 0xea, 0x63, 0x38, 0x42,
+  0x3f, 0x78, 0x68, 0xf8, 0x6e, 0x3f, 0x86, 0x19, 0x6e, 0x08, 0x4e, 0x88,
+  0x0c, 0x6a, 0x08, 0x74, 0x38, 0x62, 0x3f, 0xc0, 0x68, 0xf8, 0x2a, 0x10,
+  0xf4, 0xfa, 0x63, 0x98, 0xe1, 0x86, 0x40, 0x85, 0xc8, 0xa0, 0x82, 0x41,
+  0x67, 0x19, 0xc8, 0x21, 0x1f, 0x82, 0x73, 0x9f, 0x61, 0x6e, 0x64, 0x86,
+  0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x2f, 0x95, 0xf2, 0x08, 0x87,
+  0x4c, 0x69, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10,
+  0x46, 0x13, 0x88, 0xa1, 0x88, 0x43, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
+  0x80, 0x83, 0x25, 0x50, 0x3a, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10,
+  0x0c, 0xb8, 0x58, 0x0a, 0x25, 0x86, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04,
+  0xc1, 0x80, 0x93, 0x25, 0x51, 0x92, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40,
+  0x10, 0x0c, 0x16, 0x5d, 0x0a, 0x25, 0x31, 0x0a, 0x5a, 0x49, 0x8e, 0x5e,
+  0x69, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6a, 0x96, 0x20, 0x1f, 0x86, 0x1b,
+  0x5a, 0x64, 0x96, 0xc0, 0x60, 0x96, 0xc1, 0x1c, 0xce, 0x21, 0xa8, 0x1e,
+  0x22, 0x25, 0xb8, 0xe0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0,
+  0x78, 0xa9, 0x94, 0x68, 0xc4, 0x8e, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1,
+  0x80, 0xea, 0xa5, 0x52, 0x0a, 0x84, 0x0b, 0x86, 0x29, 0x30, 0x4a, 0x25,
+  0xb8, 0xe0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xc2, 0x49,
+  0x95, 0x6c, 0x64, 0x8f, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x12,
+  0x27, 0x55, 0x0a, 0x84, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0x3b, 0x9e, 0xba,
+  0x14, 0x1a, 0xe6, 0x7c, 0x66, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11,
+  0x83, 0x03, 0x00, 0x41, 0x30, 0xf8, 0xcc, 0xc9, 0x96, 0xea, 0x68, 0x9c,
+  0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34,
+  0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8,
+  0x76, 0xea, 0xa5, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80,
+  0x73, 0x27, 0x5f, 0x4a, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
+  0xb8, 0x77, 0xfa, 0xa5, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1,
+  0x60, 0xb9, 0x27, 0x5f, 0xfa, 0xa3, 0x40, 0x9d, 0x5e, 0x89, 0x9d, 0x46,
+  0x13, 0x02, 0xe0, 0x82, 0xa7, 0x66, 0x09, 0xf2, 0x61, 0xb8, 0x41, 0x4d,
+  0xde, 0x09, 0x0c, 0x66, 0x19, 0xd0, 0x21, 0x1f, 0x02, 0x8b, 0xa3, 0x39,
+  0x8a, 0xcf, 0x70, 0x04, 0x9c, 0xd0, 0x11, 0xf1, 0xcd, 0x32, 0xa4, 0x03,
+  0x3b, 0x04, 0x56, 0x47, 0x71, 0x12, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30,
+  0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xe4,
+  0x93, 0x0e, 0x37, 0x04, 0xf7, 0x04, 0x06, 0xb3, 0x0c, 0xea, 0xb0, 0x0e,
+  0x81, 0x0d, 0x7d, 0x04, 0x9f, 0x59, 0x02, 0x78, 0x30, 0x3e, 0x22, 0xe2,
+  0x33, 0x4b, 0x00, 0x0f, 0xc3, 0x11, 0x7b, 0xd2, 0x47, 0xc2, 0x37, 0xcb,
+  0xd0, 0x0e, 0xf0, 0x10, 0x18, 0x9f, 0xf8, 0x51, 0x7c, 0x2c, 0x70, 0xe8,
+  0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8,
+  0x14, 0x41, 0x52, 0x3a, 0xdc, 0x10, 0x88, 0x14, 0x18, 0xcc, 0x32, 0xb8,
+  0xc3, 0x3b, 0x04, 0x66, 0x4a, 0x43, 0x7c, 0x66, 0x09, 0xe0, 0xc1, 0x88,
+  0x54, 0x82, 0xcf, 0x2c, 0x01, 0x3c, 0x0c, 0xb4, 0x3c, 0x9a, 0x3a, 0x60,
+  0xeb, 0x40, 0xb8, 0x83, 0xf0, 0x0e, 0x2c, 0xc5, 0x0e, 0x17, 0x0c, 0x63,
+  0xa8, 0xc4, 0x4a, 0xf1, 0x19, 0x8e, 0x30, 0x95, 0x56, 0x22, 0xbe, 0x59,
+  0x86, 0x78, 0xa0, 0x87, 0xc0, 0x5c, 0xe9, 0x54, 0xe2, 0x63, 0xc1, 0x40,
+  0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4,
+  0xa7, 0x08, 0x99, 0xd2, 0xe1, 0x86, 0x00, 0xa6, 0xc0, 0x60, 0x96, 0x41,
+  0x1e, 0xe6, 0x21, 0xb0, 0xc1, 0x96, 0xe0, 0x33, 0x4b, 0x80, 0x0f, 0x36,
+  0x4b, 0x44, 0x7c, 0x66, 0x09, 0xf0, 0x61, 0x38, 0x22, 0x56, 0x68, 0x49,
+  0xf8, 0x66, 0x19, 0xea, 0x01, 0x1f, 0x02, 0x93, 0x95, 0x5a, 0x8a, 0x8f,
+  0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63,
+  0x45, 0x10, 0x9f, 0x22, 0x7a, 0x4a, 0x87, 0x1b, 0x82, 0x9d, 0x02, 0x83,
+  0x59, 0x06, 0x7b, 0xb8, 0x87, 0xc0, 0x7a, 0x69, 0x88, 0xcf, 0x2c, 0x01,
+  0x3e, 0x18, 0x21, 0x4e, 0xf0, 0x99, 0x25, 0xc0, 0x87, 0x81, 0x96, 0x47,
+  0x93, 0x07, 0x6c, 0x1e, 0x08, 0x7b, 0x10, 0xee, 0x01, 0xaf, 0xe8, 0xe1,
+  0x82, 0x61, 0x2e, 0x78, 0xea, 0xb6, 0xa7, 0xce, 0x95, 0x86, 0xb9, 0xf1,
+  0x19, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10,
+  0x0c, 0xbe, 0xb5, 0xda, 0x29, 0x7d, 0x42, 0xab, 0xd1, 0x84, 0x00, 0x18,
+  0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12,
+  0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x4e, 0xae, 0xc4, 0x2a, 0x21,
+  0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0xe6, 0x6a, 0xac, 0x12,
+  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x8e, 0xae, 0xc8, 0x2a,
+  0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0xf8, 0x6a, 0xac,
+  0x48, 0x2a, 0x78, 0x2b, 0x9a, 0x8a, 0xab, 0xd1, 0x84, 0x00, 0xb8, 0xe0,
+  0xa9, 0x59, 0x82, 0x7c, 0x18, 0x68, 0x79, 0x4c, 0x83, 0x1c, 0xd4, 0x34,
+  0x18, 0x07, 0x96, 0x30, 0x07, 0x01, 0x1f, 0xd4, 0x34, 0x38, 0x87, 0x59,
+  0x06, 0x7d, 0xe0, 0x87, 0x77, 0x19, 0x8e, 0x90, 0x17, 0x9b, 0x1a, 0xbe,
+  0x9b, 0x97, 0x61, 0x86, 0x1b, 0x82, 0x90, 0x22, 0x83, 0x1a, 0x02, 0x1d,
+  0x8e, 0xa8, 0x17, 0x9d, 0x1a, 0xbe, 0x0a, 0x04, 0xbd, 0x7b, 0x19, 0x66,
+  0xb8, 0x21, 0x20, 0x29, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x86, 0x7d, 0x80,
+  0x89, 0xe0, 0xd0, 0x69, 0x98, 0xeb, 0x9f, 0x61, 0x46, 0x0c, 0x0e, 0x00,
+  0x04, 0xc1, 0xe0, 0x1b, 0xad, 0xb9, 0x92, 0x29, 0xd0, 0x1a, 0x4d, 0x08,
+  0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28,
+  0xe2, 0x90, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0x54, 0x4b, 0xaf,
+  0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x6e, 0xb5, 0xf6,
+  0x8a, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0x58, 0x8b,
+  0xaf, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x85, 0xb6,
+  0xf6, 0x8a, 0xa7, 0x82, 0xd3, 0x62, 0xab, 0xd4, 0x1a, 0x4d, 0x08, 0x80,
+  0x0b, 0x9e, 0x9a, 0x25, 0x80, 0x89, 0xe1, 0x86, 0x93, 0x69, 0x2d, 0x30,
+  0x98, 0x65, 0xe8, 0x07, 0x7f, 0x08, 0xea, 0xa6, 0xfc, 0x0a, 0x2e, 0x78,
+  0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0xdb, 0xfa, 0x2b, 0x96,
+  0x81, 0xab, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0x6e, 0xeb, 0xaf,
+  0x02, 0xe1, 0x82, 0x61, 0x4a, 0xa7, 0x46, 0x0b, 0x2e, 0x78, 0x6a, 0xc4,
+  0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0xdd, 0x22, 0x2d, 0x98, 0xa9, 0xab,
+  0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0x78, 0x8b, 0xb4, 0x02, 0xe1,
+  0x82, 0x61, 0x2e, 0x78, 0xea, 0x8e, 0xa7, 0x6e, 0xa4, 0x86, 0x39, 0x1c,
+  0x1a, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10,
+  0x0c, 0x3e, 0xf0, 0x82, 0xad, 0xb7, 0xea, 0xad, 0xd1, 0x84, 0x00, 0x18,
+  0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12,
+  0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xee, 0xbc, 0x6e, 0x2b, 0x21,
+  0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0xd0, 0x0b, 0xb7, 0x12,
+  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x2e, 0xbd, 0x72, 0x2b,
+  0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0xe2, 0x0b, 0xb7,
+  0xf2, 0x2a, 0x20, 0xaf, 0xd4, 0x32, 0xaf, 0xd1, 0x84, 0x00, 0xb8, 0xe0,
+  0xa9, 0x59, 0x02, 0x98, 0x18, 0x6e, 0x20, 0x9b, 0xf4, 0x02, 0x83, 0x59,
+  0x86, 0x7f, 0x80, 0x89, 0xc0, 0xd6, 0xaa, 0xad, 0xe2, 0x33, 0x1c, 0x81,
+  0x36, 0x6e, 0x45, 0x7c, 0xb3, 0x0c, 0x20, 0x31, 0x12, 0x81, 0xbd, 0x55,
+  0xda, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16,
+  0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xf3, 0xa5, 0xc3, 0x0d, 0x41,
+  0x7c, 0x81, 0xc1, 0x2c, 0x43, 0x48, 0x88, 0x44, 0x60, 0xc3, 0x5d, 0xc1,
+  0x67, 0x96, 0xe0, 0x24, 0xcc, 0xae, 0x88, 0xf8, 0xcc, 0x12, 0x9c, 0xc4,
+  0x70, 0xc4, 0xdc, 0xdc, 0x95, 0xf0, 0xcd, 0x32, 0x90, 0xc4, 0x49, 0x04,
+  0x46, 0x37, 0x78, 0x15, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05,
+  0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xf8, 0x97, 0x0e,
+  0x37, 0x04, 0xfc, 0x05, 0x06, 0xb3, 0x0c, 0x25, 0x61, 0x12, 0x81, 0x81,
+  0xd6, 0x10, 0x9f, 0x59, 0x82, 0x93, 0x30, 0x62, 0xb4, 0xe0, 0x33, 0x4b,
+  0x70, 0x12, 0x03, 0x2d, 0x8f, 0x16, 0x12, 0x98, 0x48, 0x10, 0x25, 0x21,
+  0x98, 0x04, 0xbf, 0x8d, 0xc4, 0x05, 0xc3, 0x98, 0x68, 0x99, 0x56, 0x7c,
+  0x86, 0x23, 0x40, 0xe7, 0xb4, 0x88, 0x6f, 0x96, 0x01, 0x25, 0x56, 0x22,
+  0x30, 0xd4, 0x0a, 0x9d, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e,
+  0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x82, 0xc5, 0x74,
+  0xb8, 0x21, 0x50, 0x31, 0x30, 0x98, 0x65, 0x48, 0x09, 0x95, 0x08, 0x6c,
+  0x80, 0x2d, 0xf8, 0xcc, 0x12, 0xbc, 0x84, 0xb5, 0x16, 0x11, 0x9f, 0x59,
+  0x82, 0x97, 0x18, 0x8e, 0x58, 0x1d, 0xd7, 0x12, 0xbe, 0x59, 0x06, 0x96,
+  0x78, 0x89, 0xc0, 0x58, 0xe7, 0xb5, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b,
+  0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88,
+  0x1b, 0xd3, 0xe1, 0x86, 0xa0, 0xc6, 0xc0, 0x60, 0x96, 0xa1, 0x25, 0x5c,
+  0x22, 0xb0, 0xdb, 0x1a, 0xe2, 0x33, 0x4b, 0xf0, 0x12, 0x46, 0xf0, 0x16,
+  0x7c, 0x66, 0x09, 0x5e, 0x62, 0xa0, 0xe5, 0xd1, 0x52, 0x02, 0x53, 0x09,
+  0xa2, 0x25, 0x04, 0x97, 0x60, 0xbb, 0x95, 0xb8, 0x60, 0x98, 0x0b, 0x9e,
+  0xba, 0xed, 0xa9, 0x43, 0xad, 0x61, 0xae, 0x97, 0x86, 0x39, 0x62, 0x98,
+  0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0xaf, 0xcc, 0x6a,
+  0x8c, 0xbe, 0xc4, 0x6c, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34,
+  0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00,
+  0x04, 0xc1, 0x80, 0x63, 0x33, 0x1e, 0x4b, 0x88, 0x60, 0xc4, 0x00, 0x01,
+  0x40, 0x10, 0x0c, 0xb8, 0x36, 0xeb, 0xb1, 0x84, 0x08, 0x46, 0x0c, 0x10,
+  0x00, 0x04, 0xc1, 0x80, 0x73, 0x33, 0x1f, 0x4b, 0x88, 0x60, 0xc4, 0x40,
+  0x01, 0x40, 0x10, 0x0c, 0x16, 0x3b, 0xeb, 0x31, 0xff, 0x0a, 0xd2, 0xcc,
+  0xc5, 0xd6, 0x6c, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6a, 0x96, 0x00, 0x26,
+  0x06, 0x5a, 0x1e, 0xd3, 0xd8, 0x07, 0x1f, 0x0e, 0xf4, 0x81, 0x25, 0xfa,
+  0x41, 0x78, 0x09, 0x1f, 0x0e, 0xfc, 0x61, 0xc4, 0xc0, 0x00, 0x40, 0x10,
+  0x0c, 0x20, 0x3c, 0xcb, 0xb1, 0x77, 0x32, 0xfb, 0x80, 0x97, 0xf8, 0x98,
+  0x10, 0xc8, 0xc7, 0x02, 0x79, 0x81, 0x8f, 0x15, 0xff, 0x10, 0x1f, 0x2b,
+  0x02, 0xf9, 0x58, 0x10, 0x12, 0xf0, 0x19, 0x31, 0x30, 0x00, 0x10, 0x04,
+  0x03, 0xe8, 0xcf, 0xc0, 0xac, 0x9e, 0x4c, 0x28, 0xe2, 0x63, 0x81, 0x20,
+  0x1f, 0x0b, 0x0e, 0xf8, 0x5c, 0x60, 0xcc, 0x88, 0x81, 0x03, 0x80, 0x20,
+  0x18, 0x34, 0xa5, 0x46, 0x66, 0x30, 0x96, 0x62, 0x79, 0x16, 0xfc, 0xd8,
+  0x8f, 0xfd, 0x98, 0x8f, 0xed, 0xd9, 0x2c, 0xc1, 0x08, 0x0d, 0x37, 0xf8,
+  0xd5, 0x9e, 0x81, 0xc1, 0x2c, 0x83, 0x4c, 0x8c, 0x50, 0x30, 0x62, 0x60,
+  0x00, 0x20, 0x08, 0x06, 0x50, 0xa9, 0x9d, 0x19, 0x3f, 0x59, 0x90, 0x63,
+  0xf0, 0x19, 0x31, 0x30, 0x00, 0x10, 0x04, 0x03, 0xe8, 0xd4, 0xd2, 0xac,
+  0x9f, 0x2c, 0xd8, 0x31, 0xf8, 0x8c, 0x26, 0xc0, 0xd8, 0x30, 0xdc, 0x10,
+  0xfc, 0x19, 0x18, 0xcc, 0x32, 0xcc, 0x44, 0x4d, 0x04, 0xc3, 0x11, 0xc5,
+  0x98, 0x0d, 0xdf, 0x19, 0xc3, 0x0c, 0x37, 0x04, 0x2e, 0x46, 0x06, 0x35,
+  0x04, 0x3a, 0x1c, 0x71, 0x9c, 0xd9, 0xf0, 0x55, 0x20, 0xe8, 0x25, 0xc3,
+  0x0c, 0x37, 0x04, 0x31, 0x46, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0x40, 0x13,
+  0x69, 0x11, 0x5c, 0x7d, 0x0d, 0x73, 0x2a, 0x35, 0xcc, 0x88, 0xc1, 0x01,
+  0x80, 0x20, 0x18, 0x7c, 0xb0, 0x06, 0x6a, 0x3f, 0xd6, 0x6a, 0xa3, 0x09,
   0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c,
-  0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x9c, 0xaf, 0x06,
-  0xae, 0x1a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdc,
-  0xaf, 0x06, 0xaf, 0x1a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x1c, 0xb8, 0x06, 0xb0, 0x1a, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00,
-  0x20, 0x08, 0x06, 0x0b, 0xba, 0x06, 0xaf, 0x1a, 0xc0, 0x69, 0x10, 0xec,
-  0x6a, 0x00, 0xaa, 0x41, 0xaf, 0x06, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x53,
-  0xb3, 0x04, 0x25, 0x32, 0xdc, 0x00, 0x46, 0xe0, 0x1a, 0x80, 0xc1, 0x2c,
-  0x03, 0x7d, 0x94, 0x48, 0x60, 0x62, 0x1a, 0x90, 0x69, 0x10, 0x9f, 0xe1,
-  0x88, 0x32, 0x2a, 0xd3, 0x80, 0xf8, 0x66, 0x19, 0xea, 0x03, 0x3f, 0x02,
-  0x33, 0xd3, 0xc0, 0x8c, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9,
-  0xe0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x75, 0x0d,
-  0x74, 0xb8, 0x21, 0x40, 0xd7, 0x00, 0x0c, 0x66, 0x19, 0xec, 0xe3, 0x3e,
-  0x02, 0x1b, 0xdc, 0x34, 0x80, 0xcf, 0x2c, 0x01, 0x7f, 0x58, 0x9b, 0x06,
-  0x44, 0x7c, 0x66, 0x09, 0xf8, 0x63, 0x38, 0x02, 0x8e, 0xdc, 0x34, 0x10,
-  0xbe, 0x59, 0x86, 0xfc, 0xe0, 0x8f, 0xc0, 0xe2, 0xe8, 0x4d, 0x83, 0xf8,
-  0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x82, 0x48, 0x3e,
-  0x56, 0x04, 0xf1, 0x29, 0xa2, 0x5e, 0x03, 0x1d, 0x6e, 0x08, 0xe6, 0x35,
-  0x00, 0x83, 0x59, 0x06, 0xfd, 0xd8, 0x8f, 0xc0, 0xee, 0x34, 0x18, 0xe2,
-  0x33, 0x4b, 0xc0, 0x1f, 0x46, 0xe8, 0x69, 0x00, 0x9f, 0x59, 0x02, 0xfe,
-  0x18, 0x68, 0x79, 0x34, 0xfb, 0xc0, 0xee, 0x83, 0xd0, 0x0f, 0x61, 0x3f,
-  0xec, 0x31, 0xc0, 0x8f, 0x0b, 0x86, 0xb1, 0x3c, 0x0d, 0xfa, 0x34, 0x88,
-  0xcf, 0x70, 0x44, 0x1f, 0xf9, 0x69, 0x40, 0x7c, 0xb3, 0x0c, 0xfd, 0x01,
-  0x22, 0x81, 0xfd, 0x69, 0xe0, 0x47, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05,
-  0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0xc4,
-  0xc8, 0x06, 0x3a, 0xdc, 0x10, 0x84, 0x6c, 0x00, 0x06, 0xb3, 0x0c, 0xfe,
-  0xf1, 0x1f, 0x81, 0x0d, 0xa7, 0x1a, 0xc0, 0x67, 0x96, 0x80, 0x44, 0x8c,
-  0x54, 0x03, 0x22, 0x3e, 0xb3, 0x04, 0x24, 0x32, 0x1c, 0x81, 0x4a, 0xa5,
-  0x1a, 0x08, 0xdf, 0x2c, 0x43, 0x88, 0x90, 0x48, 0x60, 0xa9, 0x64, 0xaa,
-  0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41,
-  0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xe1, 0xb2, 0x81, 0x0e, 0x37, 0x04,
-  0x2c, 0x1b, 0x80, 0xc1, 0x2c, 0x83, 0x88, 0x8c, 0x48, 0x60, 0xae, 0x1a,
-  0x0c, 0xf1, 0x99, 0x25, 0x20, 0x11, 0x23, 0x66, 0x35, 0x80, 0xcf, 0x2c,
-  0x01, 0x89, 0x0c, 0xb4, 0x3c, 0x9a, 0x7f, 0x60, 0xff, 0x41, 0x88, 0x88,
-  0x30, 0x22, 0x66, 0x19, 0x80, 0xc8, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x6d,
-  0x4f, 0xdd, 0x9f, 0x06, 0xc3, 0x1c, 0x6d, 0x06, 0xc3, 0x1c, 0x31, 0xcc,
-  0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0xc7, 0xb3, 0x01,
-  0xcb, 0x06, 0xeb, 0x1a, 0xe4, 0x6c, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09,
-  0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0x8d, 0x6d, 0x30, 0xb3, 0x41, 0x42,
-  0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0x91, 0x6d, 0x40, 0xb3,
-  0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0x95, 0x6d,
-  0x50, 0xb3, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0,
-  0xb4, 0x6d, 0x40, 0xb3, 0x41, 0xbd, 0x06, 0x01, 0xd8, 0x06, 0x25, 0x1b,
-  0x88, 0x6d, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x35, 0x4b, 0x50, 0x22,
-  0x03, 0x2d, 0x8f, 0x69, 0xc0, 0x07, 0x8b, 0x0b, 0xef, 0xc1, 0x12, 0xf2,
-  0x21, 0x90, 0x08, 0x8b, 0x0b, 0xf3, 0x31, 0xcb, 0x60, 0x22, 0x28, 0x52,
-  0x4e, 0xc3, 0x11, 0xea, 0x74, 0xb2, 0xc1, 0xf0, 0xdd, 0x3a, 0x0d, 0x33,
-  0xdc, 0x10, 0xc8, 0x6b, 0x40, 0x06, 0x35, 0x04, 0x3a, 0x1c, 0xc1, 0x4e,
-  0x2b, 0x1b, 0x0c, 0x5f, 0x05, 0x82, 0x9e, 0x3b, 0x0d, 0x33, 0xdc, 0x10,
-  0xd4, 0x6b, 0x40, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0x70, 0x22, 0x3c, 0x12,
-  0x5c, 0xae, 0x06, 0xc3, 0x9c, 0x7b, 0x06, 0xc3, 0x8c, 0x18, 0x1c, 0x00,
-  0x08, 0x82, 0xc1, 0x47, 0xb7, 0x01, 0xd9, 0x06, 0x23, 0x1b, 0xc4, 0x6d,
-  0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3,
-  0x09, 0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0,
-  0xed, 0x6d, 0xb0, 0xb6, 0xc1, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0xc0, 0xf1, 0x6d, 0xc0, 0xb6, 0x01, 0x43, 0x04, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0xc0, 0xf5, 0x6d, 0xd0, 0xb6, 0x81, 0x44, 0x04, 0x23,
-  0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0x94, 0x6e, 0xc0, 0xb6, 0x41, 0xcb,
-  0x06, 0x01, 0xde, 0x06, 0x3d, 0x1b, 0xe8, 0x6d, 0x30, 0x9a, 0x10, 0x00,
-  0x17, 0x3c, 0x35, 0x4b, 0xc0, 0x23, 0xc3, 0x0d, 0xfd, 0xe4, 0xb7, 0x01,
-  0x18, 0xcc, 0x32, 0xa4, 0x88, 0x8a, 0x04, 0x85, 0xb2, 0xc1, 0xdb, 0x06,
-  0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0x9d, 0x6e,
-  0x00, 0xb7, 0x01, 0x49, 0x85, 0x6d, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x14, 0xea, 0x06, 0x70, 0x1b, 0x04, 0xc2, 0x05, 0xc3, 0xd4, 0xca,
-  0x06, 0x74, 0x1b, 0xc0, 0x05, 0x4f, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82,
-  0x01, 0xc5, 0xba, 0x41, 0xdd, 0x06, 0x27, 0x65, 0xb6, 0xc1, 0x88, 0xc1,
-  0x01, 0x80, 0x20, 0x18, 0x50, 0xad, 0x1b, 0xd4, 0x6d, 0x10, 0x08, 0x17,
-  0x0c, 0x73, 0xc1, 0x53, 0x77, 0x3c, 0x75, 0xf4, 0x1a, 0x0c, 0x73, 0x29,
-  0x1a, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0x5f, 0xec, 0x06, 0xa1, 0x1b, 0x80, 0x6d, 0xe0, 0xba, 0xc1,
-  0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26,
-  0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x87,
-  0xbb, 0x01, 0xea, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0x01, 0x97, 0xbb, 0x41, 0xea, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0x01, 0xa7, 0xbb, 0x81, 0xea, 0x06, 0x09, 0x11, 0x8c, 0x18,
-  0x28, 0x00, 0x08, 0x82, 0xc1, 0x22, 0xbe, 0x41, 0xea, 0x06, 0x6a, 0x1b,
-  0x04, 0xb5, 0x1b, 0xe8, 0x6d, 0x70, 0xbb, 0xc1, 0x68, 0x42, 0x00, 0x5c,
-  0xf0, 0xd4, 0x2c, 0x01, 0x8f, 0x0c, 0x37, 0xe8, 0x94, 0xee, 0x06, 0x60,
-  0x30, 0xcb, 0xb0, 0x22, 0x3c, 0x12, 0x18, 0xcf, 0x06, 0x3e, 0x1b, 0xc4,
-  0x67, 0x38, 0x02, 0xac, 0x7e, 0x36, 0x20, 0xbe, 0x59, 0x06, 0x16, 0x79,
-  0x91, 0xc0, 0xc0, 0x36, 0x08, 0xab, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82,
-  0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x82,
-  0x7c, 0x03, 0x1d, 0x6e, 0x08, 0xc4, 0x37, 0x00, 0x83, 0x59, 0x86, 0x16,
-  0x71, 0x91, 0xc0, 0x06, 0xb4, 0x0d, 0xe0, 0x33, 0x4b, 0x30, 0x23, 0x76,
-  0xb6, 0x01, 0x11, 0x9f, 0x59, 0x82, 0x19, 0x19, 0x8e, 0x58, 0x2b, 0xb4,
-  0x0d, 0x84, 0x6f, 0x96, 0x01, 0x46, 0x66, 0x24, 0x30, 0xb6, 0x4a, 0xdb,
-  0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20,
-  0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x78, 0xdf, 0x40, 0x87, 0x1b, 0x82,
-  0xf6, 0x0d, 0xc0, 0x60, 0x96, 0x21, 0x46, 0x64, 0x24, 0xb0, 0xb8, 0x0d,
-  0x86, 0xf8, 0xcc, 0x12, 0xcc, 0x88, 0x11, 0x74, 0x1b, 0xc0, 0x67, 0x96,
-  0x60, 0x46, 0x06, 0x5a, 0x1e, 0xad, 0x45, 0x30, 0x17, 0x21, 0x62, 0x44,
-  0x90, 0x11, 0x76, 0x0d, 0x5e, 0xe4, 0x82, 0x61, 0x6c, 0x6e, 0x83, 0xbb,
-  0x0d, 0xe2, 0x33, 0x1c, 0x71, 0x57, 0x78, 0x1b, 0x10, 0xdf, 0x2c, 0x03,
-  0x8d, 0xdc, 0x48, 0x60, 0x79, 0x1b, 0xe0, 0x55, 0x7c, 0x2c, 0x18, 0xe8,
-  0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8,
-  0x14, 0xd1, 0xbf, 0x81, 0x0e, 0x37, 0x04, 0xfb, 0x1b, 0x80, 0xc1, 0x2c,
-  0x43, 0x8d, 0xd8, 0x48, 0x60, 0x43, 0xe8, 0x06, 0xf0, 0x99, 0x25, 0xd8,
-  0x11, 0xf3, 0xdb, 0x80, 0x88, 0xcf, 0x2c, 0xc1, 0x8e, 0x0c, 0x47, 0x88,
-  0xd6, 0xdf, 0x06, 0xc2, 0x37, 0xcb, 0x80, 0x23, 0x3b, 0x12, 0xd8, 0x68,
-  0x81, 0x6e, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f,
-  0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xa0, 0x70, 0xa0, 0xc3,
-  0x0d, 0x81, 0x09, 0x07, 0x60, 0x30, 0xcb, 0x90, 0x23, 0x3a, 0x12, 0x18,
-  0xea, 0x06, 0x43, 0x7c, 0x66, 0x09, 0x76, 0xc4, 0x88, 0xd6, 0x0d, 0xe0,
-  0x33, 0x4b, 0xb0, 0x23, 0x03, 0x2d, 0x8f, 0x56, 0x23, 0x98, 0x8d, 0x10,
-  0x39, 0x22, 0xe8, 0x88, 0xce, 0x06, 0x37, 0x72, 0xc1, 0x30, 0x17, 0x3c,
-  0x75, 0xdb, 0x53, 0x97, 0xb7, 0xc1, 0x30, 0xe7, 0xaa, 0xc1, 0x30, 0x47,
-  0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xf0, 0xd9,
-  0x70, 0x60, 0xc2, 0x41, 0xf9, 0x06, 0x33, 0x1c, 0x8c, 0x26, 0x04, 0xc0,
-  0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91,
-  0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x3d, 0x1c, 0xb4, 0x70,
-  0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x3e, 0x1c,
-  0xb8, 0x70, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70,
-  0x3f, 0x1c, 0xbc, 0x70, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20,
-  0x18, 0x2c, 0x67, 0x1c, 0xb8, 0x70, 0xf0, 0xbe, 0x41, 0xa0, 0xc3, 0xc1,
-  0xff, 0x06, 0x3c, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x4f, 0xcd, 0x12,
-  0xf0, 0xc8, 0x40, 0xcb, 0x63, 0x1a, 0x27, 0x02, 0x82, 0x83, 0x89, 0xb0,
-  0x44, 0x8a, 0x08, 0x3b, 0x02, 0x82, 0x83, 0x8a, 0xcc, 0x32, 0xf4, 0xc8,
-  0x8f, 0xfc, 0xd6, 0x70, 0x84, 0xfa, 0x84, 0x70, 0x30, 0x7c, 0xb7, 0x3e,
-  0xc3, 0x0c, 0x37, 0x04, 0xec, 0x1b, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47,
-  0x98, 0x57, 0x09, 0x07, 0xc3, 0x57, 0x81, 0xa0, 0x87, 0x5e, 0xc3, 0x0c,
-  0x37, 0x04, 0xef, 0x1b, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xf8, 0xc8,
-  0x9c, 0x04, 0x37, 0xbb, 0xc1, 0x30, 0x87, 0xae, 0xc1, 0x30, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0xf0, 0xb9, 0x71, 0xe0, 0xc3, 0x41, 0xff, 0x06,
-  0x6b, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c,
-  0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x70, 0x75, 0x1c, 0x94, 0x71, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x70, 0x76, 0x1c, 0x98, 0x71, 0xc0, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x77, 0x1c, 0x9c, 0x71, 0x20, 0x11,
-  0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x7f, 0x1c, 0x98, 0x71,
-  0x70, 0xc2, 0x41, 0x20, 0xc7, 0xc1, 0x0d, 0x07, 0x74, 0x1c, 0x8c, 0x26,
-  0x04, 0xc0, 0x05, 0x4f, 0xcd, 0x12, 0xcc, 0xc9, 0x70, 0xc3, 0x7d, 0xe1,
-  0x71, 0x00, 0x06, 0xb3, 0x0c, 0x60, 0x12, 0x26, 0x41, 0x89, 0x70, 0x90,
-  0xc6, 0x01, 0x5c, 0xf0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50,
-  0xa1, 0x1c, 0xa8, 0x71, 0x40, 0x42, 0x3b, 0x1c, 0x8c, 0x18, 0x1c, 0x00,
-  0x08, 0x82, 0x01, 0x25, 0xca, 0x81, 0x1a, 0x07, 0x81, 0x70, 0xc1, 0x30,
-  0x55, 0xc2, 0x81, 0x1b, 0x07, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00,
-  0x82, 0x60, 0x40, 0x99, 0x72, 0xf0, 0xc6, 0x41, 0x88, 0x81, 0x71, 0x30,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xd4, 0x29, 0x07, 0x6f, 0x1c, 0x04,
-  0xc2, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x1d, 0x4f, 0x9d, 0xfb, 0x06, 0xc3,
-  0xdc, 0xc8, 0x06, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0xc1, 0xb7, 0xca, 0xc1, 0x1e, 0x07, 0x3a, 0x1c, 0xa0,
-  0x72, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08,
-  0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0xc0, 0xc9, 0x72, 0x20, 0xca, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0xc0, 0xcd, 0x72, 0x30, 0xca, 0x41, 0x42, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0xc0, 0xd1, 0x72, 0x40, 0xca, 0x41, 0x42, 0x04,
-  0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xf0, 0x72, 0x30, 0xca, 0x01,
-  0x19, 0x07, 0xc1, 0x2b, 0x07, 0x74, 0x1c, 0xc4, 0x72, 0x30, 0x9a, 0x10,
-  0x00, 0x17, 0x3c, 0x35, 0x4b, 0x30, 0x27, 0xc3, 0x0d, 0x34, 0x46, 0xcb,
-  0x01, 0x18, 0xcc, 0x32, 0x88, 0xc9, 0x9c, 0x04, 0x66, 0xc3, 0x01, 0x0e,
-  0x07, 0xf1, 0x19, 0x8e, 0x00, 0xa3, 0x1c, 0x0e, 0x88, 0x6f, 0x96, 0x61,
-  0x4c, 0xcc, 0x24, 0x30, 0x1d, 0x0e, 0xc2, 0x28, 0x3e, 0x16, 0x0c, 0xf4,
-  0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c,
-  0x8a, 0xf0, 0xe5, 0x40, 0x87, 0x1b, 0x02, 0x5e, 0x0e, 0xc0, 0x60, 0x96,
-  0x81, 0x4c, 0xca, 0x24, 0xb0, 0x41, 0x8c, 0x03, 0xf8, 0xcc, 0x12, 0xa8,
-  0x89, 0x85, 0x71, 0x40, 0xc4, 0x67, 0x96, 0x40, 0x4d, 0x86, 0x23, 0xd6,
-  0x48, 0x8c, 0x03, 0xe1, 0x9b, 0x65, 0x38, 0x13, 0x35, 0x09, 0x8c, 0x8d,
-  0xc6, 0x38, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7,
-  0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xd2, 0x39, 0xd0, 0xe1,
-  0x86, 0xe0, 0x9c, 0x03, 0x30, 0x98, 0x65, 0x40, 0x93, 0x34, 0x09, 0x6c,
-  0x8d, 0x83, 0x21, 0x3e, 0xb3, 0x04, 0x6a, 0x62, 0x84, 0x1b, 0x07, 0xf0,
-  0x99, 0x25, 0x50, 0x93, 0x81, 0x96, 0x47, 0x23, 0x13, 0xac, 0x4c, 0x08,
-  0x34, 0x11, 0xd2, 0x84, 0x1d, 0x03, 0x33, 0xb9, 0x60, 0x18, 0x6b, 0xe3,
-  0x20, 0x8e, 0x83, 0xf8, 0x0c, 0x47, 0xc4, 0x99, 0x1c, 0x07, 0xc4, 0x37,
-  0xcb, 0xb0, 0x26, 0x6e, 0x12, 0xd8, 0x1c, 0x07, 0x72, 0x16, 0x1f, 0x0b,
-  0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a,
-  0x20, 0x3e, 0x45, 0xdc, 0x73, 0xa0, 0xc3, 0x0d, 0x41, 0x3d, 0x07, 0x60,
-  0x30, 0xcb, 0xc0, 0x26, 0x6d, 0x12, 0xd8, 0xb0, 0xc7, 0x01, 0x7c, 0x66,
-  0x09, 0xe4, 0xc4, 0xf0, 0x38, 0x20, 0xe2, 0x33, 0x4b, 0x20, 0x27, 0xc3,
-  0x11, 0x7c, 0x96, 0xc7, 0x81, 0xf0, 0xcd, 0x32, 0xbc, 0x89, 0x9c, 0x04,
-  0xd6, 0x67, 0x7a, 0x1c, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73,
-  0xc1, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x22, 0x1d,
-  0xe8, 0x70, 0x43, 0x00, 0xd2, 0x01, 0x18, 0xcc, 0x32, 0xc0, 0x49, 0x9c,
-  0x04, 0x26, 0xca, 0xc1, 0x10, 0x9f, 0x59, 0x02, 0x39, 0x31, 0xe2, 0x94,
-  0x03, 0xf8, 0xcc, 0x12, 0xc8, 0xc9, 0x40, 0xcb, 0xa3, 0xb1, 0x09, 0xd6,
-  0x26, 0x04, 0x9c, 0x08, 0x71, 0xe2, 0xd2, 0x81, 0x9b, 0x5c, 0x30, 0xcc,
-  0x05, 0x4f, 0xdd, 0xf6, 0xd4, 0xcd, 0x71, 0x30, 0xcc, 0xa1, 0x6e, 0x30,
+  0x45, 0x1c, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdc, 0xad, 0x9d,
+  0xda, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xe1, 0x1a,
+  0xaa, 0x31, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5c, 0xae,
+  0xa5, 0x9a, 0x44, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0x84,
+  0x1b, 0xaa, 0xa5, 0x59, 0x40, 0x6b, 0x79, 0x66, 0x6b, 0xa3, 0x09, 0x01,
+  0x70, 0xc1, 0x53, 0xb3, 0x04, 0x69, 0x31, 0xdc, 0x90, 0xe9, 0x1a, 0x18,
+  0xcc, 0x32, 0xd8, 0xc4, 0x4d, 0x04, 0x45, 0x66, 0xab, 0x06, 0x17, 0x3c,
+  0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xd4, 0xb8, 0xb1, 0xda, 0xd7,
+  0x67, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0x91, 0x1b, 0xab, 0x05,
+  0xc2, 0x05, 0xc3, 0xd4, 0x99, 0xc1, 0x1a, 0x5c, 0xf0, 0xd4, 0x88, 0xc1,
+  0x01, 0x80, 0x20, 0x18, 0x50, 0xe8, 0x16, 0x6b, 0x62, 0x20, 0x6a, 0x23,
+  0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xa5, 0x5b, 0xac, 0x05, 0xc2, 0x05,
+  0xc3, 0x5c, 0xf0, 0xd4, 0x1d, 0x4f, 0x1d, 0x8c, 0x0d, 0x73, 0x65, 0x35,
   0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
-  0x7c, 0x30, 0x1d, 0x80, 0x74, 0xf0, 0xcb, 0x41, 0x4b, 0x07, 0xa3, 0x09,
-  0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c,
-  0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdc, 0x4d, 0x07,
-  0x27, 0x1d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x1c,
-  0x4e, 0x07, 0x28, 0x1d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x5c, 0x4e, 0x07, 0x29, 0x1d, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00,
-  0x20, 0x08, 0x06, 0x4b, 0x58, 0x07, 0x28, 0x1d, 0xa4, 0x73, 0x10, 0xd0,
-  0x74, 0x90, 0xcf, 0x81, 0x4d, 0x07, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x53,
-  0xb3, 0x04, 0x73, 0x32, 0xd0, 0xf2, 0x98, 0x86, 0x8f, 0xd0, 0xe7, 0xd0,
-  0x23, 0x2c, 0x01, 0x26, 0x82, 0x9c, 0xd0, 0xe7, 0x10, 0x26, 0xb3, 0x0c,
-  0x74, 0x62, 0x27, 0xb9, 0x36, 0x1c, 0x91, 0x3e, 0xfb, 0x1c, 0x0c, 0xdf,
-  0xa9, 0xcf, 0x30, 0xc3, 0x0d, 0x81, 0x39, 0x07, 0x64, 0x50, 0x43, 0xa0,
-  0xc3, 0x11, 0xe0, 0xf6, 0xcf, 0xc1, 0xf0, 0x55, 0x20, 0xe8, 0x89, 0xdb,
-  0x30, 0xc3, 0x0d, 0x41, 0x3a, 0x07, 0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c,
-  0x75, 0xa2, 0x2a, 0xc1, 0xb5, 0x72, 0x30, 0xcc, 0x89, 0x6f, 0x30, 0xcc,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x7c, 0x68, 0x1d, 0xe0, 0x74, 0x70,
-  0xcf, 0x41, 0x59, 0x07, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3,
-  0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0xdc, 0x5b, 0x07, 0x3f, 0x1d, 0x1c, 0x44, 0x30, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x1c, 0x5c, 0x07, 0x60, 0x1d, 0x30, 0x44,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5c, 0x5c, 0x07, 0x61, 0x1d,
-  0x48, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x4b, 0x5e, 0x07,
-  0x60, 0x1d, 0x84, 0x74, 0x10, 0xb0, 0x75, 0x10, 0xd3, 0x81, 0x5b, 0x07,
-  0xa3, 0x09, 0x01, 0x70, 0xc1, 0x53, 0xb3, 0x04, 0xaa, 0x32, 0xdc, 0x10,
-  0x6f, 0x72, 0x1d, 0x80, 0xc1, 0x2c, 0xc3, 0x9d, 0xe0, 0x49, 0x50, 0xfc,
-  0x1c, 0x8c, 0x75, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0xd4, 0x5e, 0x07, 0x64, 0x1d, 0x8c, 0x50, 0x4d, 0x07, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x40, 0xf1, 0x75, 0x40, 0xd6, 0x41, 0x20, 0x5c,
-  0x30, 0x4c, 0xfd, 0x73, 0x80, 0xd6, 0x01, 0x5c, 0xf0, 0xd4, 0x88, 0xc1,
-  0x01, 0x80, 0x20, 0x18, 0x50, 0xa0, 0x1d, 0xa4, 0x75, 0xb0, 0x6f, 0x3a,
-  0x1d, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x15, 0xda, 0x41, 0x5a,
-  0x07, 0x81, 0x70, 0xc1, 0x30, 0x17, 0x3c, 0x75, 0xc7, 0x53, 0x87, 0xce,
-  0xc1, 0x30, 0xd7, 0xbf, 0xc1, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0xf0, 0x95, 0x76, 0x50, 0xd7, 0x01, 0x4d,
-  0x07, 0xa2, 0x1d, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26,
-  0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x70, 0xac, 0x1d, 0xf0, 0x75, 0x90, 0x10, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x70, 0xad, 0x1d, 0xf4, 0x75, 0x90, 0x10, 0xc1,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0xae, 0x1d, 0xf8, 0x75, 0x90,
-  0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0xb6, 0x1d, 0xf4,
-  0x75, 0xe0, 0xd3, 0x41, 0x90, 0xda, 0x81, 0x5b, 0x07, 0xab, 0x1d, 0x8c,
-  0x26, 0x04, 0xc0, 0x05, 0x4f, 0xcd, 0x12, 0xa8, 0xca, 0x70, 0x83, 0xcb,
-  0xb9, 0x76, 0x00, 0x06, 0xb3, 0x0c, 0x79, 0xa2, 0x2a, 0x81, 0xc1, 0x74,
-  0x20, 0xd3, 0x41, 0x7c, 0x86, 0x23, 0x7e, 0x68, 0xa6, 0x03, 0xe2, 0x9b,
-  0x65, 0xd0, 0x93, 0x3e, 0x09, 0x8c, 0xa6, 0x03, 0x30, 0x8a, 0x8f, 0x05,
+  0x7c, 0xed, 0xd6, 0x6b, 0x7c, 0xa6, 0x6e, 0xa3, 0x09, 0x01, 0x30, 0x9a,
+  0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32,
+  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x1c, 0xbd, 0x91, 0x5b, 0x42, 0x04,
+  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xd5, 0x5b, 0xb9, 0x25, 0x44,
+  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x9c, 0xbd, 0x99, 0x5b, 0x42,
+  0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xf8, 0x5b, 0xb9, 0x99,
+  0x5a, 0x10, 0x6f, 0xb6, 0x36, 0x6f, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x53,
+  0xb3, 0x04, 0x69, 0x31, 0xdc, 0x60, 0x07, 0xf6, 0x06, 0x06, 0xb3, 0x0c,
+  0x38, 0x91, 0x16, 0x81, 0xe1, 0x99, 0x9e, 0xc5, 0x67, 0x38, 0x62, 0x0f,
+  0xf6, 0x8c, 0xf8, 0x66, 0x19, 0x72, 0x82, 0x27, 0x02, 0xe3, 0x33, 0x3e,
+  0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30,
+  0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x40, 0x4e, 0x87, 0x1b, 0x02, 0x7f,
+  0x03, 0x83, 0x59, 0x06, 0x9d, 0xd8, 0x89, 0xc0, 0x06, 0x52, 0x83, 0xcf,
+  0x2c, 0x01, 0x58, 0xd8, 0xa8, 0x11, 0xf1, 0x99, 0x25, 0x00, 0x8b, 0xe1,
+  0x08, 0x53, 0x20, 0x35, 0xe1, 0x9b, 0x65, 0xe8, 0x09, 0xb0, 0x08, 0xec,
+  0x14, 0x4a, 0x2d, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e,
+  0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x58, 0x39, 0x1d, 0x6e,
+  0x08, 0x52, 0x0e, 0x0c, 0x66, 0x19, 0x7c, 0xe2, 0x27, 0x02, 0x6b, 0xb5,
+  0x21, 0x3e, 0xb3, 0x04, 0x60, 0x61, 0x04, 0xac, 0xc1, 0x67, 0x96, 0x00,
+  0x2c, 0x06, 0x5a, 0x1e, 0x4d, 0x27, 0xb0, 0x9d, 0x20, 0x7c, 0x42, 0xf8,
+  0x09, 0xb1, 0xe0, 0x89, 0x0b, 0x86, 0xb1, 0x57, 0x9b, 0xb5, 0xf8, 0x0c,
+  0x47, 0xc8, 0x02, 0xad, 0x11, 0xdf, 0x2c, 0x43, 0x58, 0x90, 0x45, 0x60,
+  0xb5, 0x36, 0x0b, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0,
+  0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0xce, 0xe9, 0x70,
+  0x43, 0x70, 0x73, 0x60, 0x30, 0xcb, 0x20, 0x16, 0x63, 0x11, 0xd8, 0xd0,
+  0x6b, 0xf0, 0x99, 0x25, 0x40, 0x0b, 0xd3, 0x35, 0x22, 0x3e, 0xb3, 0x04,
+  0x68, 0x31, 0x1c, 0xd1, 0x0b, 0xbb, 0x26, 0x7c, 0xb3, 0x0c, 0x65, 0x81,
+  0x16, 0x81, 0xf9, 0x02, 0xaf, 0xc5, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c,
+  0x73, 0xc1, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x64,
+  0xa7, 0xc3, 0x0d, 0x81, 0xd8, 0x81, 0xc1, 0x2c, 0x83, 0x59, 0x9c, 0x45,
+  0x60, 0xe4, 0x36, 0xc4, 0x67, 0x96, 0x00, 0x2d, 0x8c, 0x48, 0x37, 0xf8,
+  0xcc, 0x12, 0xa0, 0xc5, 0x40, 0xcb, 0xa3, 0x89, 0x05, 0x36, 0x16, 0x84,
+  0x59, 0x08, 0x67, 0x01, 0x1b, 0x64, 0x71, 0xc1, 0x30, 0x17, 0x3c, 0x75,
+  0xdb, 0x53, 0x57, 0x6b, 0xc3, 0x9c, 0x7a, 0x0d, 0x73, 0xc4, 0x30, 0x47,
+  0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x9f, 0xdc, 0x89, 0x5d,
+  0xc8, 0xbd, 0xdd, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2,
+  0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08,
+  0x82, 0x01, 0x97, 0x77, 0x69, 0x97, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
+  0x20, 0x18, 0x70, 0x7a, 0xa7, 0x76, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00,
+  0x08, 0x82, 0x01, 0xb7, 0x77, 0x6b, 0x97, 0x10, 0xc1, 0x88, 0x81, 0x02,
+  0x80, 0x20, 0x18, 0x2c, 0xa3, 0xa7, 0x76, 0x2b, 0x17, 0xd8, 0xdd, 0xce,
+  0xe1, 0xdd, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd4, 0x2c, 0x41, 0x5a, 0x0c,
+  0xb4, 0x3c, 0xa6, 0x41, 0x13, 0x78, 0x1e, 0xcc, 0x04, 0x4b, 0xd8, 0x84,
+  0x80, 0x16, 0x78, 0x1e, 0xdc, 0xc4, 0x2c, 0x83, 0x5a, 0xb0, 0xc5, 0x3e,
+  0x0c, 0x47, 0x80, 0x44, 0xcf, 0x0d, 0xdf, 0x85, 0xc4, 0x30, 0xc3, 0x0d,
+  0x01, 0xca, 0x91, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0x84, 0x44, 0xd8, 0x0d,
+  0x5f, 0x05, 0x82, 0xde, 0x48, 0x0c, 0x33, 0xdc, 0x10, 0xac, 0x1c, 0x19,
+  0x54, 0x30, 0xe8, 0x2c, 0xc3, 0x5a, 0x80, 0x46, 0x70, 0xef, 0x36, 0xcc,
+  0x91, 0xd8, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xf0, 0xa9, 0x9e,
+  0xde, 0xe5, 0xdc, 0xe9, 0x8d, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c,
+  0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02,
+  0x80, 0x20, 0x18, 0x70, 0xb1, 0x17, 0x7a, 0x07, 0x11, 0x8c, 0x18, 0x20,
+  0x00, 0x08, 0x82, 0x01, 0x27, 0x7b, 0xa2, 0xc7, 0x10, 0xc1, 0x88, 0x01,
+  0x02, 0x80, 0x20, 0x18, 0x70, 0xb3, 0x37, 0x7a, 0x12, 0x11, 0x8c, 0x18,
+  0x28, 0x00, 0x08, 0x82, 0xc1, 0xb2, 0x7b, 0xa2, 0x37, 0x76, 0x81, 0xeb,
+  0xcd, 0x1d, 0xec, 0x8d, 0x26, 0x04, 0xc0, 0x05, 0x4f, 0xcd, 0x12, 0x80,
+  0xc6, 0x70, 0xc3, 0x4c, 0xd0, 0x1e, 0x18, 0xcc, 0x32, 0xb4, 0x85, 0x5b,
+  0x04, 0xe5, 0x73, 0xa5, 0x07, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20,
+  0x08, 0x06, 0x54, 0xef, 0x99, 0x9e, 0x4e, 0xdc, 0xdd, 0x88, 0xc1, 0x01,
+  0x80, 0x20, 0x18, 0x50, 0xbe, 0x67, 0x7a, 0x81, 0x70, 0xc1, 0x30, 0x15,
+  0x76, 0xaa, 0x07, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
+  0x94, 0xf8, 0xad, 0x1e, 0x4f, 0xf0, 0xdd, 0x88, 0xc1, 0x01, 0x80, 0x20,
+  0x18, 0x50, 0xe3, 0xb7, 0x7a, 0x81, 0x70, 0xc1, 0x30, 0x17, 0x3c, 0x75,
+  0xc7, 0x53, 0xa7, 0x72, 0xc3, 0xdc, 0x8f, 0x0d, 0x73, 0xc4, 0x30, 0x47,
+  0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xdf, 0xf9, 0xdd, 0x9e,
+  0xdd, 0x91, 0xdf, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2,
+  0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08,
+  0x82, 0x01, 0xe7, 0x7e, 0xbe, 0x97, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
+  0x20, 0x18, 0x70, 0xef, 0xf7, 0x7b, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00,
+  0x08, 0x82, 0x01, 0x07, 0x7f, 0xe0, 0x97, 0x10, 0xc1, 0x88, 0x81, 0x02,
+  0x80, 0x20, 0x18, 0x2c, 0xf8, 0xf7, 0x7b, 0xa0, 0x17, 0xac, 0x1f, 0xec,
+  0xb5, 0xdf, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd4, 0x2c, 0x01, 0x68, 0x0c,
+  0x37, 0xc0, 0x05, 0xfc, 0x81, 0xc1, 0x2c, 0xc3, 0x5b, 0x80, 0x46, 0x60,
+  0x72, 0x47, 0x77, 0xf1, 0x19, 0x8e, 0xb0, 0x8b, 0xba, 0x23, 0xbe, 0x59,
+  0x06, 0xb8, 0x98, 0x8b, 0xc0, 0xec, 0xee, 0x2e, 0xe2, 0x63, 0xc1, 0x40,
+  0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4,
+  0xa7, 0x08, 0xfd, 0xd3, 0xe1, 0x86, 0x00, 0xff, 0xc0, 0x60, 0x96, 0x21,
+  0x2e, 0xe4, 0x22, 0xb0, 0xc1, 0xef, 0xe0, 0x33, 0x4b, 0x70, 0x17, 0xd6,
+  0x77, 0x44, 0x7c, 0x66, 0x09, 0xee, 0x62, 0x38, 0x22, 0x34, 0xfc, 0x4e,
+  0xf8, 0x66, 0x19, 0xe8, 0xe2, 0x2e, 0x02, 0x13, 0x8d, 0xbf, 0x8b, 0x8f,
+  0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63,
+  0x45, 0x10, 0x9f, 0x22, 0x4a, 0x30, 0xd0, 0xe1, 0x86, 0x60, 0x04, 0x03,
+  0x30, 0x98, 0x65, 0xa8, 0x0b, 0xbb, 0x08, 0xec, 0xf4, 0x86, 0xf8, 0xcc,
+  0x12, 0xdc, 0x85, 0x11, 0xaa, 0x07, 0x9f, 0x59, 0x82, 0xbb, 0x18, 0x68,
+  0x79, 0xb4, 0xb8, 0xc0, 0xe4, 0x82, 0xa8, 0x0b, 0xc1, 0x2e, 0x70, 0x66,
+  0x2e, 0x2e, 0x18, 0xc6, 0x52, 0xaf, 0xf5, 0xe2, 0x33, 0x1c, 0xc1, 0x1a,
+  0xae, 0x47, 0x7c, 0xb3, 0x0c, 0x78, 0xb1, 0x17, 0x81, 0xbd, 0x5e, 0x6b,
+  0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18,
+  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x33, 0x18, 0xe8, 0x70, 0x43, 0x10,
+  0x83, 0x01, 0x18, 0xcc, 0x32, 0xe4, 0x85, 0x5e, 0x04, 0x36, 0xdc, 0x1e,
+  0x7c, 0x66, 0x09, 0xfe, 0xc2, 0x68, 0x8f, 0x88, 0xcf, 0x2c, 0xc1, 0x5f,
+  0x0c, 0x47, 0xdc, 0x46, 0xed, 0x09, 0xdf, 0x2c, 0x03, 0x5f, 0xfc, 0x45,
+  0x60, 0xb8, 0x61, 0x7b, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c,
+  0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84, 0x0f, 0x06,
+  0x3a, 0xdc, 0x10, 0xf0, 0x60, 0x00, 0x06, 0xb3, 0x0c, 0x7d, 0xe1, 0x17,
+  0x81, 0xf9, 0xde, 0x10, 0x9f, 0x59, 0x82, 0xbf, 0x30, 0x62, 0xfc, 0xe0,
+  0x33, 0x4b, 0xf0, 0x17, 0x03, 0x2d, 0x8f, 0x96, 0x17, 0x98, 0x5e, 0x10,
+  0x7d, 0x21, 0xf8, 0x05, 0xe9, 0xec, 0xc5, 0x05, 0xc3, 0x5c, 0xf0, 0xd4,
+  0x6d, 0x4f, 0xdd, 0xeb, 0x0d, 0x73, 0xe4, 0x36, 0xcc, 0x11, 0xc3, 0x1c,
+  0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x7c, 0x6c, 0x18, 0xf0,
+  0x60, 0xb0, 0x7f, 0x69, 0x18, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10,
+  0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01,
+  0x02, 0x80, 0x20, 0x18, 0x70, 0x73, 0x18, 0x8c, 0x61, 0x90, 0x10, 0xc1,
+  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x74, 0x18, 0x90, 0x61, 0x90,
+  0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x75, 0x18, 0x94,
+  0x61, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x7d,
+  0x18, 0x90, 0x61, 0x50, 0x82, 0x41, 0x00, 0x87, 0x41, 0x0d, 0x06, 0x72,
+  0x18, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x4f, 0xcd, 0x12, 0x80, 0xc6, 0x40,
+  0xcb, 0x63, 0x1a, 0x6b, 0xc1, 0x86, 0x82, 0x5a, 0xb0, 0x44, 0x5b, 0x08,
+  0x7f, 0xc1, 0x86, 0x82, 0x5b, 0x98, 0x7e, 0xd8, 0x60, 0x00, 0x9f, 0x59,
+  0x86, 0xd0, 0x18, 0x0d, 0xfb, 0x18, 0x8e, 0x08, 0x70, 0x30, 0x18, 0xbe,
+  0x13, 0x86, 0x19, 0x6e, 0x08, 0x46, 0x30, 0x20, 0x83, 0x1a, 0x02, 0x1d,
+  0x8e, 0xe0, 0x0f, 0x1e, 0x0c, 0x86, 0xaf, 0x02, 0x41, 0xcf, 0x3f, 0x86,
+  0x19, 0x6e, 0x08, 0x4c, 0x30, 0x20, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0x10,
+  0x8d, 0xdb, 0x08, 0x4e, 0xfd, 0x86, 0xb9, 0x7f, 0x1b, 0x66, 0xc4, 0xe0,
+  0x00, 0x40, 0x10, 0x0c, 0xbe, 0x52, 0x0c, 0xea, 0x30, 0xa0, 0xc1, 0x40,
+  0x14, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41,
+  0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04,
+  0x03, 0x8e, 0x15, 0x03, 0x3e, 0x0c, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00,
+  0x10, 0x04, 0x03, 0xae, 0x15, 0x83, 0x3e, 0x0c, 0x18, 0x22, 0x18, 0x31,
+  0x40, 0x00, 0x10, 0x04, 0x03, 0xce, 0x15, 0x03, 0x3f, 0x0c, 0x24, 0x22,
+  0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0xc5, 0x16, 0x83, 0x3e, 0x0c,
+  0x7c, 0x30, 0x08, 0x52, 0x31, 0x70, 0xc3, 0x60, 0x15, 0x83, 0xd1, 0x84,
+  0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x82, 0xdb, 0x18, 0x6e, 0x70, 0x91, 0x57,
+  0x0c, 0xc0, 0x60, 0x96, 0x81, 0x34, 0x4a, 0x23, 0xa8, 0x1c, 0x0c, 0x40,
+  0x31, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x0a,
+  0x17, 0x83, 0x50, 0x0c, 0x36, 0x39, 0x0c, 0x46, 0x0c, 0x0e, 0x00, 0x04,
+  0xc1, 0x80, 0xca, 0xc5, 0x20, 0x14, 0x83, 0x40, 0xb8, 0x60, 0x98, 0xe2,
+  0xc1, 0xa0, 0x14, 0x03, 0xb8, 0xe0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41,
+  0x30, 0xa0, 0x7a, 0x31, 0x30, 0xc5, 0xe0, 0x46, 0xee, 0x30, 0x18, 0x31,
+  0x38, 0x00, 0x10, 0x04, 0x03, 0xca, 0x17, 0x03, 0x53, 0x0c, 0x02, 0xe1,
+  0x82, 0x61, 0x2e, 0x78, 0xea, 0x8e, 0xa7, 0xae, 0x04, 0x83, 0x61, 0x4e,
+  0xe7, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10,
+  0x04, 0x83, 0x4f, 0x1c, 0x03, 0x59, 0x0c, 0xe2, 0x30, 0xf8, 0xc5, 0x60,
+  0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13,
+  0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x4b,
+  0xc7, 0x20, 0x17, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
+  0x80, 0x53, 0xc7, 0x40, 0x17, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00,
+  0x04, 0xc1, 0x80, 0x5b, 0xc7, 0x60, 0x17, 0x83, 0x84, 0x08, 0x46, 0x0c,
+  0x14, 0x00, 0x04, 0xc1, 0x60, 0x99, 0xc7, 0x40, 0x17, 0x83, 0x3d, 0x0c,
+  0x02, 0x73, 0x0c, 0x56, 0x31, 0x40, 0xc7, 0x60, 0x34, 0x21, 0x00, 0x2e,
+  0x78, 0x6a, 0x96, 0xe0, 0x36, 0x86, 0x1b, 0xd6, 0x64, 0x1d, 0x03, 0x30,
+  0x98, 0x65, 0x30, 0x8d, 0xdb, 0x08, 0xac, 0x0d, 0x83, 0x37, 0x0c, 0xe2,
+  0x33, 0x1c, 0x71, 0x07, 0x70, 0x18, 0x10, 0xdf, 0x2c, 0xc3, 0x69, 0xa8,
+  0x46, 0x60, 0x71, 0x18, 0xe0, 0x41, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1,
+  0x30, 0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x51,
+  0x8f, 0x81, 0x0e, 0x37, 0x04, 0xf3, 0x18, 0x80, 0xc1, 0x2c, 0x03, 0x6a,
+  0xa4, 0x46, 0x60, 0x43, 0x1e, 0x06, 0xf0, 0x99, 0x25, 0x70, 0x0d, 0xc3,
+  0xc3, 0x80, 0x88, 0xcf, 0x2c, 0x81, 0x6b, 0x0c, 0x47, 0x88, 0x42, 0x1e,
+  0x06, 0xc2, 0x37, 0xcb, 0xb0, 0x1a, 0xae, 0x11, 0xd8, 0x28, 0xe8, 0x61,
+  0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10,
+  0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x80, 0x64, 0xa0, 0xc3, 0x0d, 0x81,
+  0x3f, 0x06, 0x60, 0x30, 0xcb, 0xc0, 0x1a, 0xad, 0x11, 0x98, 0x28, 0x06,
+  0x43, 0x7c, 0x66, 0x09, 0x5c, 0xc3, 0x88, 0x52, 0x0c, 0xe0, 0x33, 0x4b,
+  0xe0, 0x1a, 0x03, 0x2d, 0x8f, 0x86, 0x1a, 0x58, 0x6a, 0x10, 0xac, 0x21,
+  0xb4, 0x86, 0x4e, 0xa8, 0xc6, 0x05, 0xc3, 0x18, 0x29, 0x06, 0xa8, 0x18,
+  0xc4, 0x67, 0x38, 0xe2, 0x54, 0x52, 0x31, 0x20, 0xbe, 0x59, 0x86, 0xd7,
+  0x90, 0x8d, 0xc0, 0x54, 0x31, 0x40, 0x95, 0xf8, 0x58, 0x30, 0xd0, 0xe7,
+  0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29,
+  0xc2, 0x25, 0x03, 0x1d, 0x6e, 0x08, 0x58, 0x32, 0x00, 0x83, 0x59, 0x06,
+  0xd8, 0x88, 0x8d, 0xc0, 0x06, 0x59, 0x0c, 0xe0, 0x33, 0x4b, 0x60, 0x1b,
+  0xf6, 0x8a, 0x01, 0x11, 0x9f, 0x59, 0x02, 0xdb, 0x18, 0x8e, 0x90, 0x15,
+  0x58, 0x0c, 0x84, 0x6f, 0x96, 0x61, 0x36, 0x6c, 0x23, 0xb0, 0x59, 0x89,
+  0xc5, 0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2,
+  0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xc8, 0xc9, 0x40, 0x87, 0x1b,
+  0x82, 0x9b, 0x0c, 0xc0, 0x60, 0x96, 0x81, 0x36, 0x6a, 0x23, 0xb0, 0x5c,
+  0x0c, 0x86, 0xf8, 0xcc, 0x12, 0xd8, 0x86, 0x11, 0xbe, 0x18, 0xc0, 0x67,
+  0x96, 0xc0, 0x36, 0x06, 0x5a, 0x1e, 0x0d, 0x36, 0xb0, 0xd8, 0x20, 0x68,
+  0x43, 0xa8, 0x0d, 0xbd, 0x92, 0x8d, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0xdb,
+  0x9e, 0x3a, 0x55, 0x0c, 0x86, 0xb9, 0xdf, 0x1b, 0xe6, 0x88, 0x61, 0x8e,
+  0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbe, 0xb3, 0x0c, 0x6e,
+  0x32, 0xb0, 0xc7, 0x80, 0x2c, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10,
+  0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31,
+  0x40, 0x00, 0x10, 0x04, 0x03, 0xce, 0x2d, 0x03, 0x9f, 0x0c, 0x12, 0x22,
+  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xee, 0x2d, 0x83, 0x9f, 0x0c,
+  0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x0e, 0x2e, 0x03,
+  0xb0, 0x0c, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x05,
+  0x2f, 0x83, 0x9f, 0x0c, 0x40, 0x32, 0x08, 0xd6, 0x32, 0x80, 0xc9, 0xa0,
+  0x2d, 0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x82, 0xdb, 0x18,
+  0x68, 0x79, 0x4c, 0x43, 0x34, 0xc4, 0x54, 0x08, 0x0d, 0x96, 0x20, 0x0d,
+  0xc1, 0x36, 0xc4, 0x54, 0x28, 0x0d, 0xab, 0x97, 0x93, 0x0c, 0xe0, 0x33,
+  0xcb, 0x80, 0x1b, 0xba, 0x11, 0x2f, 0xc3, 0x11, 0xc1, 0x4c, 0x06, 0xc3,
+  0x77, 0xc2, 0x30, 0xc3, 0x0d, 0x81, 0x3f, 0x06, 0x64, 0x50, 0x43, 0xa0,
+  0xc3, 0x11, 0xf7, 0x72, 0x93, 0xc1, 0xf0, 0x55, 0x20, 0xe8, 0xe5, 0xcb,
+  0x30, 0xc3, 0x0d, 0x41, 0x48, 0x06, 0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c,
+  0xb9, 0xe1, 0x1e, 0xc1, 0x95, 0x63, 0x30, 0xcc, 0xe9, 0xdf, 0x30, 0x23,
+  0x06, 0x07, 0x00, 0x82, 0x60, 0xf0, 0x81, 0x66, 0x00, 0x97, 0xc1, 0x4b,
+  0x06, 0x7d, 0x19, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26,
+  0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80,
+  0x20, 0x18, 0x70, 0xa7, 0x19, 0xdc, 0x65, 0x70, 0x10, 0xc1, 0x88, 0x01,
+  0x02, 0x80, 0x20, 0x18, 0x70, 0xa8, 0x19, 0xe0, 0x65, 0xc0, 0x10, 0xc1,
+  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0xa9, 0x19, 0xe4, 0x65, 0x20,
+  0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0xb1, 0x19, 0xe0,
+  0x65, 0x90, 0x93, 0x41, 0x40, 0x9a, 0x41, 0x5a, 0x06, 0xa6, 0x19, 0x8c,
+  0x26, 0x04, 0xc0, 0x05, 0x4f, 0xcd, 0x12, 0xb8, 0xc7, 0x70, 0x43, 0xca,
+  0xa8, 0x66, 0x00, 0x06, 0xb3, 0x0c, 0xbb, 0xc1, 0x1b, 0x41, 0xd1, 0x64,
+  0xb0, 0x97, 0x01, 0x5c, 0xf0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
+  0x50, 0xb3, 0x19, 0xf0, 0x65, 0xb0, 0xb5, 0x65, 0x30, 0x62, 0x70, 0x00,
+  0x20, 0x08, 0x06, 0x14, 0x6d, 0x06, 0x7c, 0x19, 0x04, 0xc2, 0x05, 0xc3,
+  0xd4, 0x4d, 0x06, 0xa0, 0x19, 0xc0, 0x05, 0x4f, 0x8d, 0x18, 0x1c, 0x00,
+  0x08, 0x82, 0x01, 0x85, 0x9b, 0x41, 0x68, 0x06, 0x32, 0x23, 0x97, 0xc1,
+  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xb9, 0x19, 0x84, 0x66, 0x10,
+  0x08, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x77, 0x3c, 0x75, 0x20, 0x19, 0x0c,
+  0x73, 0x35, 0x18, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70,
+  0x00, 0x20, 0x08, 0x06, 0x5f, 0x6f, 0x06, 0xad, 0x19, 0xb0, 0x65, 0xa0,
+  0x9b, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20,
+  0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
+  0x01, 0x47, 0x9e, 0x01, 0x6d, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00,
+  0x08, 0x82, 0x01, 0x57, 0x9e, 0x41, 0x6d, 0x06, 0x09, 0x11, 0x8c, 0x18,
+  0x20, 0x00, 0x08, 0x82, 0x01, 0x67, 0x9e, 0x81, 0x6d, 0x06, 0x09, 0x11,
+  0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0xe2, 0x9e, 0x41, 0x6d, 0x06,
+  0x76, 0x19, 0x04, 0xe1, 0x19, 0x98, 0x66, 0x30, 0x9e, 0xc1, 0x68, 0x42,
+  0x00, 0x5c, 0xf0, 0xd4, 0x2c, 0x81, 0x7b, 0x0c, 0x37, 0x98, 0x8d, 0x79,
+  0x06, 0x60, 0x30, 0xcb, 0xd0, 0x1b, 0xee, 0x11, 0x18, 0x5a, 0x06, 0x6a,
+  0x19, 0xc4, 0x67, 0x38, 0xe2, 0x0e, 0xd6, 0x32, 0x20, 0xbe, 0x59, 0x06,
+  0xdf, 0x08, 0x8f, 0xc0, 0xd8, 0x32, 0xc0, 0x83, 0xf8, 0x58, 0x30, 0xd0,
+  0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1,
+  0x29, 0x02, 0x3e, 0x03, 0x1d, 0x6e, 0x08, 0xdc, 0x33, 0x00, 0x83, 0x59,
+  0x86, 0xdf, 0x00, 0x8f, 0xc0, 0x06, 0xba, 0x0c, 0xe0, 0x33, 0x4b, 0x50,
+  0x1e, 0x36, 0x97, 0x01, 0x11, 0x9f, 0x59, 0x82, 0xf2, 0x18, 0x8e, 0x10,
+  0x05, 0xba, 0x0c, 0x84, 0x6f, 0x96, 0x41, 0x3c, 0xca, 0x23, 0xb0, 0x51,
+  0xa8, 0xcb, 0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e,
+  0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xd8, 0xcf, 0x40, 0x87,
+  0x1b, 0x82, 0xfc, 0x0c, 0xc0, 0x60, 0x96, 0x61, 0x3c, 0xc8, 0x23, 0xb0,
+  0xbe, 0x0c, 0x86, 0xf8, 0xcc, 0x12, 0x94, 0x87, 0x11, 0xa0, 0x19, 0xc0,
+  0x67, 0x96, 0xa0, 0x3c, 0x06, 0x5a, 0x1e, 0xed, 0x37, 0x30, 0xf0, 0x20,
+  0xc6, 0x43, 0x20, 0x0f, 0x9d, 0x08, 0x8f, 0x0b, 0x86, 0xb1, 0xbf, 0x0c,
+  0x46, 0x33, 0x88, 0xcf, 0x70, 0x84, 0xe8, 0x90, 0x66, 0x40, 0x7c, 0xb3,
+  0x0c, 0xe6, 0x91, 0x1e, 0x81, 0x95, 0x66, 0x30, 0x3a, 0xf1, 0xb1, 0x60,
+  0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08,
+  0xe2, 0x53, 0x44, 0x8a, 0x06, 0x3a, 0xdc, 0x10, 0x9c, 0x68, 0x00, 0x06,
+  0xb3, 0x0c, 0xe7, 0x81, 0x1e, 0x81, 0x0d, 0xad, 0x19, 0xc0, 0x67, 0x96,
+  0xa0, 0x3d, 0x4c, 0x35, 0x03, 0x22, 0x3e, 0xb3, 0x04, 0xed, 0x31, 0x1c,
+  0xd1, 0x3a, 0xab, 0x19, 0x08, 0xdf, 0x2c, 0x83, 0x7a, 0xb4, 0x47, 0x60,
+  0xae, 0xc3, 0x9a, 0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17,
+  0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x41, 0xa3, 0x81,
+  0x0e, 0x37, 0x04, 0x32, 0x1a, 0x80, 0xc1, 0x2c, 0xc3, 0x7a, 0xb0, 0x47,
+  0x60, 0xb4, 0x19, 0x0c, 0xf1, 0x99, 0x25, 0x68, 0x0f, 0x23, 0x72, 0x33,
+  0x80, 0xcf, 0x2c, 0x41, 0x7b, 0x0c, 0xb4, 0x3c, 0xda, 0x79, 0x60, 0xe8,
+  0x41, 0xac, 0x87, 0xc0, 0x1e, 0x70, 0x97, 0x1e, 0x17, 0x0c, 0x73, 0xc1,
+  0x53, 0xb7, 0x3d, 0x75, 0xa5, 0x19, 0x0c, 0x73, 0xba, 0x18, 0x0c, 0x73,
+  0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x9f,
+  0x98, 0x06, 0x32, 0x1a, 0xc4, 0x67, 0xf0, 0xa3, 0xc1, 0x68, 0x42, 0x00,
+  0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11,
+  0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x97, 0xa6, 0x41, 0x8e,
+  0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xa7, 0xa6,
+  0x81, 0x8e, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01,
+  0xb7, 0xa6, 0xc1, 0x8e, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08,
+  0x82, 0xc1, 0x32, 0xa7, 0x81, 0x8e, 0x06, 0xfb, 0x19, 0x04, 0x66, 0x1a,
+  0xac, 0x68, 0x80, 0xa6, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd4, 0x2c,
+  0x81, 0x7b, 0x0c, 0xb4, 0x3c, 0xa6, 0x91, 0x1b, 0x38, 0x2c, 0xe0, 0x06,
+  0x4b, 0xec, 0x86, 0xd0, 0x1e, 0x38, 0x2c, 0xf0, 0xc6, 0x2c, 0xc3, 0x7b,
+  0xc4, 0xc7, 0xfa, 0x0c, 0x47, 0xbc, 0x4f, 0x8b, 0x06, 0xc3, 0x77, 0xf0,
+  0x33, 0xcc, 0x70, 0x43, 0x80, 0x9f, 0x01, 0x19, 0xd4, 0x10, 0xe8, 0x70,
+  0x84, 0xfc, 0xc4, 0x68, 0x30, 0x7c, 0x15, 0x08, 0x7a, 0xf4, 0x33, 0xcc,
+  0x70, 0x43, 0xb0, 0x9f, 0x01, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0x03, 0x7c,
+  0x94, 0x48, 0x70, 0xbf, 0x19, 0x0c, 0x73, 0xf4, 0x18, 0x0c, 0x33, 0x62,
+  0x70, 0x00, 0x20, 0x08, 0x06, 0x9f, 0x9e, 0x06, 0x6a, 0x1a, 0xa4, 0x68,
+  0x70, 0xa7, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2,
+  0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08,
+  0x82, 0x01, 0x17, 0xaa, 0x41, 0x9c, 0x06, 0x07, 0x11, 0x8c, 0x18, 0x20,
+  0x00, 0x08, 0x82, 0x01, 0x27, 0xaa, 0x81, 0x9c, 0x06, 0x0c, 0x11, 0x8c,
+  0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x37, 0xaa, 0xc1, 0x9c, 0x06, 0x12,
+  0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0xb2, 0xaa, 0x81, 0x9c,
+  0x06, 0x33, 0x1a, 0x04, 0x7e, 0x1a, 0x8c, 0x69, 0x00, 0xaa, 0xc1, 0x68,
+  0x42, 0x00, 0x5c, 0xf0, 0xd4, 0x2c, 0x41, 0x89, 0x0c, 0x37, 0x8c, 0x10,
+  0xa9, 0x06, 0x60, 0x30, 0xcb, 0x20, 0x1f, 0xf3, 0x11, 0x94, 0x8b, 0x06,
+  0x75, 0x1a, 0xc0, 0x05, 0x4f, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01,
+  0xd5, 0xaa, 0x81, 0x9d, 0x06, 0x29, 0x74, 0xa6, 0xc1, 0x88, 0xc1, 0x01,
+  0x80, 0x20, 0x18, 0x50, 0xae, 0x1a, 0xd8, 0x69, 0x10, 0x08, 0x17, 0x0c,
+  0x53, 0x31, 0x1a, 0xe8, 0x69, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00,
+  0x20, 0x08, 0x06, 0x94, 0xac, 0x06, 0x7b, 0x1a, 0xb4, 0x10, 0x9b, 0x06,
+  0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xcd, 0x6a, 0xb0, 0xa7, 0x41,
+  0x20, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf1, 0xd4, 0xe9, 0x67, 0x30,
+  0xcc, 0xbd, 0x64, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1,
+  0x01, 0x80, 0x20, 0x18, 0x7c, 0xb7, 0x1a, 0x9c, 0x6a, 0x60, 0xa6, 0x01,
+  0xad, 0x06, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83,
+  0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08,
+  0x06, 0x9c, 0xaf, 0x06, 0xae, 0x1a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00,
+  0x20, 0x08, 0x06, 0xdc, 0xaf, 0x06, 0xaf, 0x1a, 0x24, 0x44, 0x30, 0x62,
+  0x80, 0x00, 0x20, 0x08, 0x06, 0x1c, 0xb8, 0x06, 0xb0, 0x1a, 0x24, 0x44,
+  0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x0b, 0xba, 0x06, 0xaf, 0x1a,
+  0xc0, 0x69, 0x10, 0xec, 0x6a, 0x00, 0xaa, 0x41, 0xaf, 0x06, 0xa3, 0x09,
+  0x01, 0x70, 0xc1, 0x53, 0xb3, 0x04, 0x25, 0x32, 0xdc, 0x00, 0x46, 0xe0,
+  0x1a, 0x80, 0xc1, 0x2c, 0x03, 0x7d, 0x94, 0x48, 0x60, 0x62, 0x1a, 0x90,
+  0x69, 0x10, 0x9f, 0xe1, 0x88, 0x32, 0x2a, 0xd3, 0x80, 0xf8, 0x66, 0x19,
+  0xea, 0x03, 0x3f, 0x02, 0x33, 0xd3, 0xc0, 0x8c, 0xe2, 0x63, 0xc1, 0x40,
+  0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4,
+  0xa7, 0x08, 0x75, 0x0d, 0x74, 0xb8, 0x21, 0x40, 0xd7, 0x00, 0x0c, 0x66,
+  0x19, 0xec, 0xe3, 0x3e, 0x02, 0x1b, 0xdc, 0x34, 0x80, 0xcf, 0x2c, 0x01,
+  0x7f, 0x58, 0x9b, 0x06, 0x44, 0x7c, 0x66, 0x09, 0xf8, 0x63, 0x38, 0x02,
+  0x8e, 0xdc, 0x34, 0x10, 0xbe, 0x59, 0x86, 0xfc, 0xe0, 0x8f, 0xc0, 0xe2,
+  0xe8, 0x4d, 0x83, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78,
+  0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xa2, 0x5e, 0x03, 0x1d,
+  0x6e, 0x08, 0xe6, 0x35, 0x00, 0x83, 0x59, 0x06, 0xfd, 0xd8, 0x8f, 0xc0,
+  0xee, 0x34, 0x18, 0xe2, 0x33, 0x4b, 0xc0, 0x1f, 0x46, 0xe8, 0x69, 0x00,
+  0x9f, 0x59, 0x02, 0xfe, 0x18, 0x68, 0x79, 0x34, 0xfb, 0xc0, 0xee, 0x83,
+  0xd0, 0x0f, 0x61, 0x3f, 0xec, 0x31, 0xc0, 0x8f, 0x0b, 0x86, 0xb1, 0x3c,
+  0x0d, 0xfa, 0x34, 0x88, 0xcf, 0x70, 0x44, 0x1f, 0xf9, 0x69, 0x40, 0x7c,
+  0xb3, 0x0c, 0xfd, 0x01, 0x22, 0x81, 0xfd, 0x69, 0xe0, 0x47, 0xf1, 0xb1,
+  0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac,
+  0x08, 0xe2, 0x53, 0xc4, 0xc8, 0x06, 0x3a, 0xdc, 0x10, 0x84, 0x6c, 0x00,
+  0x06, 0xb3, 0x0c, 0xfe, 0xf1, 0x1f, 0x81, 0x0d, 0xa7, 0x1a, 0xc0, 0x67,
+  0x96, 0x80, 0x44, 0x8c, 0x54, 0x03, 0x22, 0x3e, 0xb3, 0x04, 0x24, 0x32,
+  0x1c, 0x81, 0x4a, 0xa5, 0x1a, 0x08, 0xdf, 0x2c, 0x43, 0x88, 0x90, 0x48,
+  0x60, 0xa9, 0x64, 0xaa, 0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30,
+  0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xe1, 0xb2,
+  0x81, 0x0e, 0x37, 0x04, 0x2c, 0x1b, 0x80, 0xc1, 0x2c, 0x83, 0x88, 0x8c,
+  0x48, 0x60, 0xae, 0x1a, 0x0c, 0xf1, 0x99, 0x25, 0x20, 0x11, 0x23, 0x66,
+  0x35, 0x80, 0xcf, 0x2c, 0x01, 0x89, 0x0c, 0xb4, 0x3c, 0x9a, 0x7f, 0x60,
+  0xff, 0x41, 0x88, 0x88, 0x30, 0x22, 0x66, 0x19, 0x80, 0xc8, 0x05, 0xc3,
+  0x5c, 0xf0, 0xd4, 0x6d, 0x4f, 0xdd, 0x9f, 0x06, 0xc3, 0x1c, 0x6d, 0x06,
+  0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82,
+  0xc1, 0xc7, 0xb3, 0x01, 0xcb, 0x06, 0xeb, 0x1a, 0xe4, 0x6c, 0x30, 0x9a,
+  0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4,
+  0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0x8d, 0x6d,
+  0x30, 0xb3, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0,
+  0x91, 0x6d, 0x40, 0xb3, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
+  0x60, 0xc0, 0x95, 0x6d, 0x50, 0xb3, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a,
+  0x00, 0x82, 0x60, 0xb0, 0xb4, 0x6d, 0x40, 0xb3, 0x41, 0xbd, 0x06, 0x01,
+  0xd8, 0x06, 0x25, 0x1b, 0x88, 0x6d, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c,
+  0x35, 0x4b, 0x50, 0x22, 0x03, 0x2d, 0x8f, 0x69, 0xc0, 0x07, 0x8b, 0x0b,
+  0xef, 0xc1, 0x12, 0xf2, 0x21, 0x90, 0x08, 0x8b, 0x0b, 0xf3, 0x31, 0xcb,
+  0x60, 0x22, 0x28, 0x52, 0x4e, 0xc3, 0x11, 0xea, 0x74, 0xb2, 0xc1, 0xf0,
+  0xdd, 0x3a, 0x0d, 0x33, 0xdc, 0x10, 0xc8, 0x6b, 0x40, 0x06, 0x35, 0x04,
+  0x3a, 0x1c, 0xc1, 0x4e, 0x2b, 0x1b, 0x0c, 0x5f, 0x05, 0x82, 0x9e, 0x3b,
+  0x0d, 0x33, 0xdc, 0x10, 0xd4, 0x6b, 0x40, 0x06, 0x15, 0x0c, 0x3a, 0xcb,
+  0x70, 0x22, 0x3c, 0x12, 0x5c, 0xae, 0x06, 0xc3, 0x9c, 0x7b, 0x06, 0xc3,
+  0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x47, 0xb7, 0x01, 0xd9, 0x06,
+  0x23, 0x1b, 0xc4, 0x6d, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30,
+  0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06, 0x08,
+  0x00, 0x82, 0x60, 0xc0, 0xed, 0x6d, 0xb0, 0xb6, 0xc1, 0x41, 0x04, 0x23,
+  0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xf1, 0x6d, 0xc0, 0xb6, 0x01, 0x43,
+  0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xf5, 0x6d, 0xd0, 0xb6,
+  0x81, 0x44, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0x94, 0x6e,
+  0xc0, 0xb6, 0x41, 0xcb, 0x06, 0x01, 0xde, 0x06, 0x3d, 0x1b, 0xe8, 0x6d,
+  0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x35, 0x4b, 0xc0, 0x23, 0xc3, 0x0d,
+  0xfd, 0xe4, 0xb7, 0x01, 0x18, 0xcc, 0x32, 0xa4, 0x88, 0x8a, 0x04, 0x85,
+  0xb2, 0xc1, 0xdb, 0x06, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82,
+  0x60, 0x40, 0x9d, 0x6e, 0x00, 0xb7, 0x01, 0x49, 0x85, 0x6d, 0x30, 0x62,
+  0x70, 0x00, 0x20, 0x08, 0x06, 0x14, 0xea, 0x06, 0x70, 0x1b, 0x04, 0xc2,
+  0x05, 0xc3, 0xd4, 0xca, 0x06, 0x74, 0x1b, 0xc0, 0x05, 0x4f, 0x8d, 0x18,
+  0x1c, 0x00, 0x08, 0x82, 0x01, 0xc5, 0xba, 0x41, 0xdd, 0x06, 0x27, 0x65,
+  0xb6, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xad, 0x1b, 0xd4,
+  0x6d, 0x10, 0x08, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x77, 0x3c, 0x75, 0xf4,
+  0x1a, 0x0c, 0x73, 0x29, 0x1a, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33,
+  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x5f, 0xec, 0x06, 0xa1, 0x1b, 0x80,
+  0x6d, 0xe0, 0xba, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68,
+  0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00,
+  0x08, 0x82, 0x01, 0x87, 0xbb, 0x01, 0xea, 0x06, 0x09, 0x11, 0x8c, 0x18,
+  0x20, 0x00, 0x08, 0x82, 0x01, 0x97, 0xbb, 0x41, 0xea, 0x06, 0x09, 0x11,
+  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xa7, 0xbb, 0x81, 0xea, 0x06,
+  0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x22, 0xbe, 0x41,
+  0xea, 0x06, 0x6a, 0x1b, 0x04, 0xb5, 0x1b, 0xe8, 0x6d, 0x70, 0xbb, 0xc1,
+  0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd4, 0x2c, 0x01, 0x8f, 0x0c, 0x37, 0xe8,
+  0x94, 0xee, 0x06, 0x60, 0x30, 0xcb, 0xb0, 0x22, 0x3c, 0x12, 0x18, 0xcf,
+  0x06, 0x3e, 0x1b, 0xc4, 0x67, 0x38, 0x02, 0xac, 0x7e, 0x36, 0x20, 0xbe,
+  0x59, 0x06, 0x16, 0x79, 0x91, 0xc0, 0xc0, 0x36, 0x08, 0xab, 0xf8, 0x58,
+  0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56,
+  0x04, 0xf1, 0x29, 0x82, 0x7c, 0x03, 0x1d, 0x6e, 0x08, 0xc4, 0x37, 0x00,
+  0x83, 0x59, 0x86, 0x16, 0x71, 0x91, 0xc0, 0x06, 0xb4, 0x0d, 0xe0, 0x33,
+  0x4b, 0x30, 0x23, 0x76, 0xb6, 0x01, 0x11, 0x9f, 0x59, 0x82, 0x19, 0x19,
+  0x8e, 0x58, 0x2b, 0xb4, 0x0d, 0x84, 0x6f, 0x96, 0x01, 0x46, 0x66, 0x24,
+  0x30, 0xb6, 0x4a, 0xdb, 0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98,
+  0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x78, 0xdf,
+  0x40, 0x87, 0x1b, 0x82, 0xf6, 0x0d, 0xc0, 0x60, 0x96, 0x21, 0x46, 0x64,
+  0x24, 0xb0, 0xb8, 0x0d, 0x86, 0xf8, 0xcc, 0x12, 0xcc, 0x88, 0x11, 0x74,
+  0x1b, 0xc0, 0x67, 0x96, 0x60, 0x46, 0x06, 0x5a, 0x1e, 0xad, 0x45, 0x30,
+  0x17, 0x21, 0x62, 0x44, 0x90, 0x11, 0x76, 0x0d, 0x5e, 0xe4, 0x82, 0x61,
+  0x6c, 0x6e, 0x83, 0xbb, 0x0d, 0xe2, 0x33, 0x1c, 0x71, 0x57, 0x78, 0x1b,
+  0x10, 0xdf, 0x2c, 0x03, 0x8d, 0xdc, 0x48, 0x60, 0x79, 0x1b, 0xe0, 0x55,
+  0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21,
+  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xd1, 0xbf, 0x81, 0x0e, 0x37, 0x04, 0xfb,
+  0x1b, 0x80, 0xc1, 0x2c, 0x43, 0x8d, 0xd8, 0x48, 0x60, 0x43, 0xe8, 0x06,
+  0xf0, 0x99, 0x25, 0xd8, 0x11, 0xf3, 0xdb, 0x80, 0x88, 0xcf, 0x2c, 0xc1,
+  0x8e, 0x0c, 0x47, 0x88, 0xd6, 0xdf, 0x06, 0xc2, 0x37, 0xcb, 0x80, 0x23,
+  0x3b, 0x12, 0xd8, 0x68, 0x81, 0x6e, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c,
+  0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45,
+  0xa0, 0x70, 0xa0, 0xc3, 0x0d, 0x81, 0x09, 0x07, 0x60, 0x30, 0xcb, 0x90,
+  0x23, 0x3a, 0x12, 0x18, 0xea, 0x06, 0x43, 0x7c, 0x66, 0x09, 0x76, 0xc4,
+  0x88, 0xd6, 0x0d, 0xe0, 0x33, 0x4b, 0xb0, 0x23, 0x03, 0x2d, 0x8f, 0x56,
+  0x23, 0x98, 0x8d, 0x10, 0x39, 0x22, 0xe8, 0x88, 0xce, 0x06, 0x37, 0x72,
+  0xc1, 0x30, 0x17, 0x3c, 0x75, 0xdb, 0x53, 0x97, 0xb7, 0xc1, 0x30, 0xe7,
+  0xaa, 0xc1, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00,
+  0x82, 0x60, 0xf0, 0xd9, 0x70, 0x60, 0xc2, 0x41, 0xf9, 0x06, 0x33, 0x1c,
+  0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68,
+  0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70,
+  0x3d, 0x1c, 0xb4, 0x70, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20,
+  0x18, 0x70, 0x3e, 0x1c, 0xb8, 0x70, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02,
+  0x80, 0x20, 0x18, 0x70, 0x3f, 0x1c, 0xbc, 0x70, 0x90, 0x10, 0xc1, 0x88,
+  0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x67, 0x1c, 0xb8, 0x70, 0xf0, 0xbe,
+  0x41, 0xa0, 0xc3, 0xc1, 0xff, 0x06, 0x3c, 0x1c, 0x8c, 0x26, 0x04, 0xc0,
+  0x05, 0x4f, 0xcd, 0x12, 0xf0, 0xc8, 0x40, 0xcb, 0x63, 0x1a, 0x27, 0x02,
+  0x82, 0x83, 0x89, 0xb0, 0x44, 0x8a, 0x08, 0x3b, 0x02, 0x82, 0x83, 0x8a,
+  0xcc, 0x32, 0xf4, 0xc8, 0x8f, 0xfc, 0xd6, 0x70, 0x84, 0xfa, 0x84, 0x70,
+  0x30, 0x7c, 0xb7, 0x3e, 0xc3, 0x0c, 0x37, 0x04, 0xec, 0x1b, 0x90, 0x41,
+  0x0d, 0x81, 0x0e, 0x47, 0x98, 0x57, 0x09, 0x07, 0xc3, 0x57, 0x81, 0xa0,
+  0x87, 0x5e, 0xc3, 0x0c, 0x37, 0x04, 0xef, 0x1b, 0x90, 0x41, 0x05, 0x83,
+  0xce, 0x32, 0xf8, 0xc8, 0x9c, 0x04, 0x37, 0xbb, 0xc1, 0x30, 0x87, 0xae,
+  0xc1, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xf0, 0xb9, 0x71, 0xe0,
+  0xc3, 0x41, 0xff, 0x06, 0x6b, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82,
+  0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88,
+  0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x75, 0x1c, 0x94, 0x71, 0x70, 0x10,
+  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x76, 0x1c, 0x98, 0x71,
+  0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x77, 0x1c,
+  0x9c, 0x71, 0x20, 0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c,
+  0x7f, 0x1c, 0x98, 0x71, 0x70, 0xc2, 0x41, 0x20, 0xc7, 0xc1, 0x0d, 0x07,
+  0x74, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x4f, 0xcd, 0x12, 0xcc, 0xc9,
+  0x70, 0xc3, 0x7d, 0xe1, 0x71, 0x00, 0x06, 0xb3, 0x0c, 0x60, 0x12, 0x26,
+  0x41, 0x89, 0x70, 0x90, 0xc6, 0x01, 0x5c, 0xf0, 0xd4, 0x88, 0xc1, 0x01,
+  0x80, 0x20, 0x18, 0x50, 0xa1, 0x1c, 0xa8, 0x71, 0x40, 0x42, 0x3b, 0x1c,
+  0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x25, 0xca, 0x81, 0x1a, 0x07,
+  0x81, 0x70, 0xc1, 0x30, 0x55, 0xc2, 0x81, 0x1b, 0x07, 0x70, 0xc1, 0x53,
+  0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0x99, 0x72, 0xf0, 0xc6, 0x41,
+  0x88, 0x81, 0x71, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xd4, 0x29,
+  0x07, 0x6f, 0x1c, 0x04, 0xc2, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x1d, 0x4f,
+  0x9d, 0xfb, 0x06, 0xc3, 0xdc, 0xc8, 0x06, 0xc3, 0x1c, 0x31, 0xcc, 0x11,
+  0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0xb7, 0xca, 0xc1, 0x1e,
+  0x07, 0x3a, 0x1c, 0xa0, 0x72, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42,
+  0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06,
+  0x08, 0x00, 0x82, 0x60, 0xc0, 0xc9, 0x72, 0x20, 0xca, 0x41, 0x42, 0x04,
+  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xcd, 0x72, 0x30, 0xca, 0x41,
+  0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xd1, 0x72, 0x40,
+  0xca, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xf0,
+  0x72, 0x30, 0xca, 0x01, 0x19, 0x07, 0xc1, 0x2b, 0x07, 0x74, 0x1c, 0xc4,
+  0x72, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x35, 0x4b, 0x30, 0x27, 0xc3,
+  0x0d, 0x34, 0x46, 0xcb, 0x01, 0x18, 0xcc, 0x32, 0x88, 0xc9, 0x9c, 0x04,
+  0x66, 0xc3, 0x01, 0x0e, 0x07, 0xf1, 0x19, 0x8e, 0x00, 0xa3, 0x1c, 0x0e,
+  0x88, 0x6f, 0x96, 0x61, 0x4c, 0xcc, 0x24, 0x30, 0x1d, 0x0e, 0xc2, 0x28,
+  0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xc0, 0x90,
+  0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xf0, 0xe5, 0x40, 0x87, 0x1b, 0x02, 0x5e,
+  0x0e, 0xc0, 0x60, 0x96, 0x81, 0x4c, 0xca, 0x24, 0xb0, 0x41, 0x8c, 0x03,
+  0xf8, 0xcc, 0x12, 0xa8, 0x89, 0x85, 0x71, 0x40, 0xc4, 0x67, 0x96, 0x40,
+  0x4d, 0x86, 0x23, 0xd6, 0x48, 0x8c, 0x03, 0xe1, 0x9b, 0x65, 0x38, 0x13,
+  0x35, 0x09, 0x8c, 0x8d, 0xc6, 0x38, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e,
+  0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22,
+  0xd2, 0x39, 0xd0, 0xe1, 0x86, 0xe0, 0x9c, 0x03, 0x30, 0x98, 0x65, 0x40,
+  0x93, 0x34, 0x09, 0x6c, 0x8d, 0x83, 0x21, 0x3e, 0xb3, 0x04, 0x6a, 0x62,
+  0x84, 0x1b, 0x07, 0xf0, 0x99, 0x25, 0x50, 0x93, 0x81, 0x96, 0x47, 0x23,
+  0x13, 0xac, 0x4c, 0x08, 0x34, 0x11, 0xd2, 0x84, 0x1d, 0x03, 0x33, 0xb9,
+  0x60, 0x18, 0x6b, 0xe3, 0x20, 0x8e, 0x83, 0xf8, 0x0c, 0x47, 0xc4, 0x99,
+  0x1c, 0x07, 0xc4, 0x37, 0xcb, 0xb0, 0x26, 0x6e, 0x12, 0xd8, 0x1c, 0x07,
+  0x72, 0x16, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59,
+  0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xdc, 0x73, 0xa0, 0xc3, 0x0d,
+  0x41, 0x3d, 0x07, 0x60, 0x30, 0xcb, 0xc0, 0x26, 0x6d, 0x12, 0xd8, 0xb0,
+  0xc7, 0x01, 0x7c, 0x66, 0x09, 0xe4, 0xc4, 0xf0, 0x38, 0x20, 0xe2, 0x33,
+  0x4b, 0x20, 0x27, 0xc3, 0x11, 0x7c, 0x96, 0xc7, 0x81, 0xf0, 0xcd, 0x32,
+  0xbc, 0x89, 0x9c, 0x04, 0xd6, 0x67, 0x7a, 0x1c, 0xc4, 0xc7, 0x02, 0x87,
+  0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88,
+  0x4f, 0x11, 0x22, 0x1d, 0xe8, 0x70, 0x43, 0x00, 0xd2, 0x01, 0x18, 0xcc,
+  0x32, 0xc0, 0x49, 0x9c, 0x04, 0x26, 0xca, 0xc1, 0x10, 0x9f, 0x59, 0x02,
+  0x39, 0x31, 0xe2, 0x94, 0x03, 0xf8, 0xcc, 0x12, 0xc8, 0xc9, 0x40, 0xcb,
+  0xa3, 0xb1, 0x09, 0xd6, 0x26, 0x04, 0x9c, 0x08, 0x71, 0xe2, 0xd2, 0x81,
+  0x9b, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf6, 0xd4, 0xcd, 0x71, 0x30,
+  0xcc, 0xa1, 0x6e, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1,
+  0x01, 0x80, 0x20, 0x18, 0x7c, 0x30, 0x1d, 0x80, 0x74, 0xf0, 0xcb, 0x41,
+  0x4b, 0x07, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83,
+  0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08,
+  0x06, 0xdc, 0x4d, 0x07, 0x27, 0x1d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00,
+  0x20, 0x08, 0x06, 0x1c, 0x4e, 0x07, 0x28, 0x1d, 0x24, 0x44, 0x30, 0x62,
+  0x80, 0x00, 0x20, 0x08, 0x06, 0x5c, 0x4e, 0x07, 0x29, 0x1d, 0x24, 0x44,
+  0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x4b, 0x58, 0x07, 0x28, 0x1d,
+  0xa4, 0x73, 0x10, 0xd0, 0x74, 0x90, 0xcf, 0x81, 0x4d, 0x07, 0xa3, 0x09,
+  0x01, 0x70, 0xc1, 0x53, 0xb3, 0x04, 0x73, 0x32, 0xd0, 0xf2, 0x98, 0x86,
+  0x8f, 0xd0, 0xe7, 0xd0, 0x23, 0x2c, 0x01, 0x26, 0x82, 0x9c, 0xd0, 0xe7,
+  0x10, 0x26, 0xb3, 0x0c, 0x74, 0x62, 0x27, 0xb9, 0x36, 0x1c, 0x91, 0x3e,
+  0xfb, 0x1c, 0x0c, 0xdf, 0xa9, 0xcf, 0x30, 0xc3, 0x0d, 0x81, 0x39, 0x07,
+  0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0xe0, 0xf6, 0xcf, 0xc1, 0xf0, 0x55,
+  0x20, 0xe8, 0x89, 0xdb, 0x30, 0xc3, 0x0d, 0x41, 0x3a, 0x07, 0x64, 0x50,
+  0xc1, 0xa0, 0xb3, 0x0c, 0x75, 0xa2, 0x2a, 0xc1, 0xb5, 0x72, 0x30, 0xcc,
+  0x89, 0x6f, 0x30, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x7c, 0x68,
+  0x1d, 0xe0, 0x74, 0x70, 0xcf, 0x41, 0x59, 0x07, 0xa3, 0x09, 0x01, 0x30,
+  0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x1c,
+  0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdc, 0x5b, 0x07, 0x3f, 0x1d,
+  0x1c, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x1c, 0x5c, 0x07,
+  0x60, 0x1d, 0x30, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5c,
+  0x5c, 0x07, 0x61, 0x1d, 0x48, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08,
+  0x06, 0x4b, 0x5e, 0x07, 0x60, 0x1d, 0x84, 0x74, 0x10, 0xb0, 0x75, 0x10,
+  0xd3, 0x81, 0x5b, 0x07, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x53, 0xb3, 0x04,
+  0xaa, 0x32, 0xdc, 0x10, 0x6f, 0x72, 0x1d, 0x80, 0xc1, 0x2c, 0xc3, 0x9d,
+  0xe0, 0x49, 0x50, 0xfc, 0x1c, 0x8c, 0x75, 0x00, 0x17, 0x3c, 0x35, 0x62,
+  0x70, 0x00, 0x20, 0x08, 0x06, 0xd4, 0x5e, 0x07, 0x64, 0x1d, 0x8c, 0x50,
+  0x4d, 0x07, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xf1, 0x75, 0x40,
+  0xd6, 0x41, 0x20, 0x5c, 0x30, 0x4c, 0xfd, 0x73, 0x80, 0xd6, 0x01, 0x5c,
+  0xf0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0xa0, 0x1d, 0xa4,
+  0x75, 0xb0, 0x6f, 0x3a, 0x1d, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01,
+  0x15, 0xda, 0x41, 0x5a, 0x07, 0x81, 0x70, 0xc1, 0x30, 0x17, 0x3c, 0x75,
+  0xc7, 0x53, 0x87, 0xce, 0xc1, 0x30, 0xd7, 0xbf, 0xc1, 0x30, 0x47, 0x0c,
+  0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xf0, 0x95, 0x76,
+  0x50, 0xd7, 0x01, 0x4d, 0x07, 0xa2, 0x1d, 0x8c, 0x26, 0x04, 0xc0, 0x68,
+  0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8,
+  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0xac, 0x1d, 0xf0, 0x75, 0x90,
+  0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0xad, 0x1d, 0xf4,
+  0x75, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0xae,
+  0x1d, 0xf8, 0x75, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18,
+  0x2c, 0xb6, 0x1d, 0xf4, 0x75, 0xe0, 0xd3, 0x41, 0x90, 0xda, 0x81, 0x5b,
+  0x07, 0xab, 0x1d, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x4f, 0xcd, 0x12, 0xa8,
+  0xca, 0x70, 0x83, 0xcb, 0xb9, 0x76, 0x00, 0x06, 0xb3, 0x0c, 0x79, 0xa2,
+  0x2a, 0x81, 0xc1, 0x74, 0x20, 0xd3, 0x41, 0x7c, 0x86, 0x23, 0x7e, 0x68,
+  0xa6, 0x03, 0xe2, 0x9b, 0x65, 0xd0, 0x93, 0x3e, 0x09, 0x8c, 0xa6, 0x03,
+  0x30, 0x8a, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c,
+  0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x70, 0x3b, 0xd0, 0xe1, 0x86,
+  0xc0, 0xb6, 0x03, 0x30, 0x98, 0x65, 0xd8, 0x13, 0x3e, 0x09, 0x6c, 0xe0,
+  0xe9, 0x00, 0x3e, 0xb3, 0x04, 0xa1, 0x62, 0x3b, 0x1d, 0x10, 0xf1, 0x99,
+  0x25, 0x08, 0x95, 0xe1, 0x08, 0x35, 0xe2, 0xe9, 0x40, 0xf8, 0x66, 0x19,
+  0xfc, 0x24, 0x54, 0x02, 0x5b, 0xa3, 0x9e, 0x0e, 0xe2, 0x63, 0x81, 0x43,
+  0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4,
+  0xa7, 0x88, 0xf1, 0x0e, 0x74, 0xb8, 0x21, 0x08, 0xef, 0x00, 0x0c, 0x66,
+  0x19, 0xfe, 0x04, 0x54, 0x02, 0x2b, 0xeb, 0x60, 0x88, 0xcf, 0x2c, 0x41,
+  0xa8, 0x18, 0x81, 0xd6, 0x01, 0x7c, 0x66, 0x09, 0x42, 0x65, 0xa0, 0xe5,
+  0xd1, 0xf6, 0x04, 0xe3, 0x13, 0xe2, 0x4f, 0x04, 0x50, 0x51, 0xc7, 0xa0,
+  0x4f, 0x2e, 0x18, 0xc6, 0xce, 0x3a, 0x58, 0xeb, 0x20, 0x3e, 0xc3, 0x11,
+  0x6b, 0xc7, 0xd6, 0x01, 0xf1, 0xcd, 0x32, 0x88, 0x4a, 0xa9, 0x04, 0xd6,
+  0xd6, 0x01, 0xdb, 0xc5, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1,
+  0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xf1, 0x1d, 0xe8,
+  0x70, 0x43, 0xf0, 0xde, 0x01, 0x18, 0xcc, 0x32, 0x8c, 0x0a, 0xa9, 0x04,
+  0x36, 0xd4, 0x75, 0x00, 0x9f, 0x59, 0x82, 0x54, 0x31, 0xb9, 0x0e, 0x88,
+  0xf8, 0xcc, 0x12, 0xa4, 0xca, 0x70, 0x84, 0xdd, 0xcd, 0x75, 0x20, 0x7c,
+  0xb3, 0x0c, 0xa6, 0x92, 0x2a, 0x81, 0xdd, 0x1d, 0x5d, 0x07, 0xf1, 0xb1,
+  0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac,
+  0x08, 0xe2, 0x53, 0x04, 0x7f, 0x07, 0x3a, 0xdc, 0x10, 0xe8, 0x77, 0x00,
+  0x06, 0xb3, 0x0c, 0xa7, 0x82, 0x2a, 0x81, 0xf1, 0x75, 0x30, 0xc4, 0x67,
+  0x96, 0x20, 0x55, 0x8c, 0x08, 0xed, 0x00, 0x3e, 0xb3, 0x04, 0xa9, 0x32,
+  0xd0, 0xf2, 0x68, 0xa3, 0x82, 0x91, 0x0a, 0x71, 0x2a, 0x02, 0xaa, 0x88,
+  0x7c, 0x50, 0x2a, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0xb7, 0x3d, 0x75, 0x6d,
+  0x1d, 0x0c, 0x73, 0xa2, 0x1c, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33,
+  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x9f, 0x8a, 0x07, 0xfa, 0x1d, 0xe4,
+  0x76, 0x70, 0xe2, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68,
+  0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00,
+  0x08, 0x82, 0x01, 0x17, 0xe3, 0x41, 0x88, 0x07, 0x09, 0x11, 0x8c, 0x18,
+  0x20, 0x00, 0x08, 0x82, 0x01, 0x27, 0xe3, 0x81, 0x88, 0x07, 0x09, 0x11,
+  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x37, 0xe3, 0xc1, 0x88, 0x07,
+  0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0xb2, 0xe3, 0x81,
+  0x88, 0x07, 0xe3, 0x1d, 0x04, 0x2e, 0x1e, 0xcc, 0x77, 0x00, 0xe3, 0xc1,
+  0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd4, 0x2c, 0x81, 0xaa, 0x0c, 0xb4, 0x3c,
+  0xa6, 0x51, 0x27, 0xe8, 0x3b, 0xd0, 0x09, 0x4b, 0xdc, 0x89, 0x90, 0x2a,
+  0xe8, 0x3b, 0xe0, 0x89, 0xd9, 0x1e, 0x7d, 0x07, 0xf0, 0x99, 0x65, 0x58,
+  0x95, 0x56, 0xa1, 0xbd, 0xe1, 0x08, 0xdc, 0xb3, 0xef, 0x60, 0xf8, 0x2e,
+  0xf7, 0x86, 0x19, 0x6e, 0x08, 0xc2, 0x3b, 0x20, 0x83, 0x1a, 0x02, 0x1d,
+  0x8e, 0x28, 0xf4, 0x3b, 0x18, 0xbe, 0x0a, 0x04, 0xbd, 0x63, 0x98, 0xe1,
+  0x86, 0x80, 0xbc, 0x03, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x06, 0x56, 0x09,
+  0x97, 0xe0, 0x50, 0x3b, 0x18, 0xe6, 0x7a, 0x39, 0x18, 0x66, 0xc4, 0xe0,
+  0x00, 0x40, 0x10, 0x0c, 0xbe, 0x31, 0x0f, 0x66, 0x3c, 0x90, 0xef, 0x00,
+  0xcc, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41,
+  0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04,
+  0x03, 0x4e, 0xcd, 0x03, 0x1d, 0x0f, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00,
+  0x10, 0x04, 0x03, 0x6e, 0xcd, 0x83, 0x1d, 0x0f, 0x18, 0x22, 0x18, 0x31,
+  0x40, 0x00, 0x10, 0x04, 0x03, 0x8e, 0xcd, 0x03, 0x1e, 0x0f, 0x24, 0x22,
+  0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x85, 0xce, 0x83, 0x1d, 0x0f,
+  0xf8, 0x3b, 0x08, 0xce, 0x3c, 0x60, 0xf1, 0x20, 0xcd, 0x83, 0xd1, 0x84,
+  0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x82, 0x70, 0x19, 0x6e, 0x60, 0xbf, 0x36,
+  0x0f, 0xc0, 0x60, 0x96, 0xc1, 0x55, 0x5e, 0x25, 0xa8, 0xfb, 0x0e, 0x7c,
+  0x3c, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xca,
+  0xce, 0x83, 0x1f, 0x0f, 0xe4, 0x0f, 0xc6, 0x83, 0x11, 0x83, 0x03, 0x00,
+  0x41, 0x30, 0xa0, 0xee, 0x3c, 0xf8, 0xf1, 0x20, 0x10, 0x2e, 0x18, 0xa6,
+  0xf4, 0x3b, 0x18, 0xf3, 0x00, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40,
+  0x10, 0x0c, 0xa8, 0x3d, 0x0f, 0xc8, 0x3c, 0x00, 0x83, 0x1a, 0x0f, 0x46,
+  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0xe2, 0xf3, 0x80, 0xcc, 0x83, 0x40,
+  0xb8, 0x60, 0x98, 0x0b, 0x9e, 0xba, 0xe3, 0xa9, 0x1b, 0xef, 0x60, 0x98,
+  0xc3, 0xe7, 0x60, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03,
+  0x00, 0x41, 0x30, 0xf8, 0x40, 0x3d, 0x80, 0xf3, 0xe0, 0xc5, 0x83, 0x3e,
+  0x0f, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61,
+  0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
+  0xb8, 0x53, 0x0f, 0xee, 0x3c, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40,
+  0x10, 0x0c, 0x38, 0x54, 0x0f, 0xf0, 0x3c, 0x48, 0x88, 0x60, 0xc4, 0x00,
+  0x01, 0x40, 0x10, 0x0c, 0xb8, 0x54, 0x0f, 0xf2, 0x3c, 0x48, 0x88, 0x60,
+  0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x96, 0x58, 0x0f, 0xf0, 0x3c, 0xc8,
+  0xf1, 0x20, 0x20, 0xf5, 0x20, 0xcd, 0x03, 0x53, 0x0f, 0x46, 0x13, 0x02,
+  0xe0, 0x82, 0xa7, 0x66, 0x09, 0xc2, 0x65, 0xb8, 0x21, 0x05, 0x83, 0x54,
+  0x0f, 0xc0, 0x60, 0x96, 0x01, 0x56, 0xc2, 0x25, 0xb0, 0x15, 0x0f, 0x5a,
+  0x3c, 0x88, 0xcf, 0x70, 0x84, 0x0b, 0x06, 0x2e, 0x1e, 0x10, 0xdf, 0x2c,
+  0x43, 0xac, 0xd0, 0x4a, 0x60, 0x2f, 0x1e, 0xbc, 0x60, 0x10, 0x1f, 0x0b,
+  0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a,
+  0x20, 0x3e, 0x45, 0xcc, 0x7a, 0xa0, 0xc3, 0x0d, 0x41, 0xac, 0x07, 0x60,
+  0x30, 0xcb, 0x20, 0x2b, 0xb3, 0x12, 0xd8, 0x70, 0xe3, 0x01, 0x7c, 0x66,
+  0x09, 0x70, 0xc5, 0x6c, 0x3c, 0x20, 0xe2, 0x33, 0x4b, 0x80, 0x2b, 0xc3,
+  0x11, 0x39, 0x18, 0xdc, 0x78, 0x20, 0x7c, 0xb3, 0x0c, 0xb5, 0x82, 0x2b,
+  0x81, 0xe9, 0x60, 0x80, 0xe3, 0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1,
+  0x30, 0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xe1,
+  0xeb, 0x81, 0x0e, 0x37, 0x04, 0xbc, 0x1e, 0x80, 0xc1, 0x2c, 0x83, 0xad,
+  0xdc, 0x4a, 0x60, 0x60, 0x1e, 0x0c, 0xf1, 0x99, 0x25, 0xc0, 0x15, 0x23,
+  0xc6, 0x3c, 0x80, 0xcf, 0x2c, 0x01, 0xae, 0x0c, 0xb4, 0x3c, 0x9a, 0xac,
+  0x60, 0xb3, 0x42, 0xd8, 0x8a, 0x70, 0x2b, 0xa8, 0x28, 0xd0, 0xca, 0x05,
+  0xc3, 0x98, 0x98, 0x07, 0x66, 0x1e, 0xc4, 0x67, 0x38, 0x02, 0x16, 0xce,
+  0x3c, 0x20, 0xbe, 0x59, 0x86, 0x5c, 0xe1, 0x95, 0xc0, 0xd0, 0x3c, 0x88,
+  0x85, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02,
+  0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x82, 0xdd, 0x03, 0x1d, 0x6e, 0x08,
+  0xd4, 0x3d, 0x00, 0x83, 0x59, 0x06, 0x5d, 0xd9, 0x95, 0xc0, 0x06, 0x38,
+  0x0f, 0xe0, 0x33, 0x4b, 0x00, 0x2e, 0xd6, 0xe6, 0x01, 0x11, 0x9f, 0x59,
+  0x02, 0x70, 0x19, 0x8e, 0xd8, 0x05, 0x37, 0x0f, 0x84, 0x6f, 0x96, 0xa1,
+  0x57, 0xc0, 0x25, 0x30, 0x5e, 0x78, 0xf3, 0x20, 0x3e, 0x16, 0x38, 0xf4,
+  0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c,
+  0x8a, 0xb8, 0xf7, 0x40, 0x87, 0x1b, 0x82, 0x7a, 0x0f, 0xc0, 0x60, 0x96,
+  0xc1, 0x57, 0x7e, 0x25, 0xb0, 0x3b, 0x0f, 0x86, 0xf8, 0xcc, 0x12, 0x80,
+  0x8b, 0x11, 0x7c, 0x1e, 0xc0, 0x67, 0x96, 0x00, 0x5c, 0x06, 0x5a, 0x1e,
+  0x4d, 0x57, 0xb0, 0x5d, 0x21, 0x7c, 0x45, 0xf8, 0x15, 0xd6, 0xe0, 0x95,
+  0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e, 0x3a, 0x34, 0x0f, 0x86, 0xb9,
+  0xbe, 0x0e, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00,
+  0x10, 0x04, 0x83, 0xaf, 0xe4, 0x83, 0x7a, 0x0f, 0x68, 0x3d, 0x10, 0xf9,
+  0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46,
+  0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80,
+  0x63, 0xf9, 0x80, 0xdf, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04,
+  0xc1, 0x80, 0x6b, 0xf9, 0xa0, 0xdf, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10,
+  0x00, 0x04, 0xc1, 0x80, 0x73, 0xf9, 0xc0, 0xdf, 0x83, 0x84, 0x08, 0x46,
+  0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0xb1, 0xf9, 0xa0, 0xdf, 0x03, 0x5f,
+  0x0f, 0x82, 0x94, 0x0f, 0xdc, 0x3d, 0x58, 0xf9, 0x60, 0x34, 0x21, 0x00,
+  0x2e, 0x78, 0x6a, 0x96, 0x20, 0x5c, 0x06, 0x5a, 0x1e, 0xd3, 0x60, 0x15,
+  0xdf, 0x1e, 0x56, 0x85, 0x25, 0x5c, 0x45, 0x00, 0x17, 0xdf, 0x1e, 0x5e,
+  0x65, 0x96, 0x41, 0x5c, 0xc8, 0xc5, 0x15, 0x83, 0xe1, 0x88, 0x59, 0x0c,
+  0xe0, 0x3d, 0x18, 0xbe, 0xa3, 0xc5, 0x60, 0x98, 0xe1, 0x86, 0x60, 0xd7,
+  0x03, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x7f, 0xa0, 0xf7, 0x60, 0xf8,
+  0x2a, 0x10, 0xf4, 0x42, 0x62, 0x98, 0xe1, 0x86, 0xc0, 0xd7, 0x03, 0x32,
+  0xa8, 0x60, 0xd0, 0x59, 0x86, 0x71, 0xc1, 0x97, 0xe0, 0x44, 0x3d, 0x18,
+  0xe6, 0x6e, 0x3b, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbe,
+  0x9e, 0x0f, 0x5a, 0x3e, 0x60, 0xf7, 0x40, 0xe7, 0x83, 0xd1, 0x84, 0x00,
+  0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22,
+  0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x8e, 0xec, 0x03, 0x9a,
+  0x0f, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xae, 0xec,
+  0x83, 0x9a, 0x0f, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03,
+  0xce, 0xec, 0x03, 0x9b, 0x0f, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10,
+  0x04, 0x83, 0xc5, 0xed, 0x83, 0x9a, 0x0f, 0xec, 0x3d, 0x08, 0xc2, 0x3e,
+  0x30, 0xf9, 0x60, 0xec, 0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9, 0x59,
+  0x02, 0x7c, 0x19, 0x6e, 0x30, 0xc7, 0xe0, 0xec, 0x03, 0x30, 0x98, 0x65,
+  0x28, 0x17, 0x73, 0x09, 0x2a, 0xde, 0x03, 0x9c, 0x0f, 0xe0, 0x82, 0xa7,
+  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x82, 0xfb, 0x20, 0xe7, 0x83,
+  0x76, 0x0c, 0x54, 0x3e, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x2a,
+  0xee, 0x83, 0x9c, 0x0f, 0x02, 0xe1, 0x82, 0x61, 0x8a, 0xde, 0x83, 0x9e,
+  0x0f, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0xaa,
+  0xfb, 0xc0, 0xe7, 0x03, 0x9d, 0x78, 0xf9, 0x60, 0xc4, 0xe0, 0x00, 0x40,
+  0x10, 0x0c, 0x28, 0xbb, 0x0f, 0x7c, 0x3e, 0x08, 0x84, 0x0b, 0x86, 0xb9,
+  0xe0, 0xa9, 0x3b, 0x9e, 0xba, 0x5e, 0x0f, 0x86, 0x39, 0xf9, 0x0e, 0x86,
+  0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83,
+  0x4f, 0xef, 0x03, 0xb5, 0x0f, 0x52, 0x3e, 0xb8, 0xfb, 0x60, 0x34, 0x21,
+  0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1,
+  0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x0b, 0xfd, 0x20,
+  0xee, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x13,
+  0xfd, 0x40, 0xee, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
+  0x80, 0x1b, 0xfd, 0x60, 0xee, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00,
+  0x04, 0xc1, 0x60, 0x59, 0xfd, 0x40, 0xee, 0x83, 0x99, 0x0f, 0x02, 0xbf,
+  0x0f, 0xc6, 0x3e, 0x00, 0xfd, 0x60, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6a,
+  0x96, 0x00, 0x5f, 0x86, 0x1b, 0x46, 0x32, 0x18, 0xfd, 0x00, 0x0c, 0x66,
+  0x19, 0xce, 0x05, 0x5f, 0x02, 0x2b, 0xf9, 0xe0, 0xe4, 0x83, 0xf8, 0x0c,
+  0x47, 0xa4, 0x64, 0x80, 0xf2, 0x01, 0xf1, 0xcd, 0x32, 0xa0, 0xcb, 0xba,
+  0x04, 0x96, 0xf2, 0x81, 0x4a, 0x06, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05,
+  0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44,
+  0xeb, 0x07, 0x3a, 0xdc, 0x10, 0xac, 0x7e, 0x00, 0x06, 0xb3, 0x0c, 0xe9,
+  0xa2, 0x2e, 0x81, 0x0d, 0x31, 0x1f, 0xc0, 0x67, 0x96, 0xe0, 0x5d, 0x0c,
+  0xe6, 0x03, 0x22, 0x3e, 0xb3, 0x04, 0xef, 0x32, 0x1c, 0x41, 0x93, 0x41,
+  0xcc, 0x07, 0xc2, 0x37, 0xcb, 0xc0, 0x2e, 0xef, 0x12, 0x58, 0x4d, 0x06,
+  0x32, 0x1f, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53,
+  0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xb8, 0x1f, 0xe8, 0x70,
+  0x43, 0x60, 0xfb, 0x01, 0x18, 0xcc, 0x32, 0xb4, 0x8b, 0xbb, 0x04, 0xa6,
+  0xf3, 0xc1, 0x10, 0x9f, 0x59, 0x82, 0x77, 0x31, 0xa2, 0xe7, 0x03, 0xf8,
+  0xcc, 0x12, 0xbc, 0xcb, 0x40, 0xcb, 0xa3, 0xa5, 0x0b, 0xa6, 0x2e, 0x44,
+  0xbb, 0x08, 0xee, 0xe2, 0xa7, 0xc2, 0xba, 0x5c, 0x30, 0x8c, 0xf1, 0x7c,
+  0x00, 0xf6, 0x41, 0x7c, 0x86, 0x23, 0x54, 0x23, 0xec, 0x03, 0xe2, 0x9b,
+  0x65, 0x80, 0x97, 0x79, 0x09, 0x4c, 0xec, 0x83, 0xd5, 0x88, 0x8f, 0x05,
   0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45,
-  0x10, 0x9f, 0x22, 0x70, 0x3b, 0xd0, 0xe1, 0x86, 0xc0, 0xb6, 0x03, 0x30,
-  0x98, 0x65, 0xd8, 0x13, 0x3e, 0x09, 0x6c, 0xe0, 0xe9, 0x00, 0x3e, 0xb3,
-  0x04, 0xa1, 0x62, 0x3b, 0x1d, 0x10, 0xf1, 0x99, 0x25, 0x08, 0x95, 0xe1,
-  0x08, 0x35, 0xe2, 0xe9, 0x40, 0xf8, 0x66, 0x19, 0xfc, 0x24, 0x54, 0x02,
-  0x5b, 0xa3, 0x9e, 0x0e, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9,
-  0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0xf1, 0x0e,
-  0x74, 0xb8, 0x21, 0x08, 0xef, 0x00, 0x0c, 0x66, 0x19, 0xfe, 0x04, 0x54,
-  0x02, 0x2b, 0xeb, 0x60, 0x88, 0xcf, 0x2c, 0x41, 0xa8, 0x18, 0x81, 0xd6,
-  0x01, 0x7c, 0x66, 0x09, 0x42, 0x65, 0xa0, 0xe5, 0xd1, 0xf6, 0x04, 0xe3,
-  0x13, 0xe2, 0x4f, 0x04, 0x50, 0x51, 0xc7, 0xa0, 0x4f, 0x2e, 0x18, 0xc6,
-  0xce, 0x3a, 0x58, 0xeb, 0x20, 0x3e, 0xc3, 0x11, 0x6b, 0xc7, 0xd6, 0x01,
-  0xf1, 0xcd, 0x32, 0x88, 0x4a, 0xa9, 0x04, 0xd6, 0xd6, 0x01, 0xdb, 0xc5,
-  0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2,
-  0xb1, 0x22, 0x88, 0x4f, 0x11, 0xf1, 0x1d, 0xe8, 0x70, 0x43, 0xf0, 0xde,
-  0x01, 0x18, 0xcc, 0x32, 0x8c, 0x0a, 0xa9, 0x04, 0x36, 0xd4, 0x75, 0x00,
-  0x9f, 0x59, 0x82, 0x54, 0x31, 0xb9, 0x0e, 0x88, 0xf8, 0xcc, 0x12, 0xa4,
-  0xca, 0x70, 0x84, 0xdd, 0xcd, 0x75, 0x20, 0x7c, 0xb3, 0x0c, 0xa6, 0x92,
-  0x2a, 0x81, 0xdd, 0x1d, 0x5d, 0x07, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05,
-  0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04,
-  0x7f, 0x07, 0x3a, 0xdc, 0x10, 0xe8, 0x77, 0x00, 0x06, 0xb3, 0x0c, 0xa7,
-  0x82, 0x2a, 0x81, 0xf1, 0x75, 0x30, 0xc4, 0x67, 0x96, 0x20, 0x55, 0x8c,
-  0x08, 0xed, 0x00, 0x3e, 0xb3, 0x04, 0xa9, 0x32, 0xd0, 0xf2, 0x68, 0xa3,
-  0x82, 0x91, 0x0a, 0x71, 0x2a, 0x02, 0xaa, 0x88, 0x7c, 0x50, 0x2a, 0x17,
-  0x0c, 0x73, 0xc1, 0x53, 0xb7, 0x3d, 0x75, 0x6d, 0x1d, 0x0c, 0x73, 0xa2,
-  0x1c, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0x9f, 0x8a, 0x07, 0xfa, 0x1d, 0xe4, 0x76, 0x70, 0xe2, 0xc1,
-  0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26,
-  0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x17,
-  0xe3, 0x41, 0x88, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0x01, 0x27, 0xe3, 0x81, 0x88, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0x01, 0x37, 0xe3, 0xc1, 0x88, 0x07, 0x09, 0x11, 0x8c, 0x18,
-  0x28, 0x00, 0x08, 0x82, 0xc1, 0xb2, 0xe3, 0x81, 0x88, 0x07, 0xe3, 0x1d,
-  0x04, 0x2e, 0x1e, 0xcc, 0x77, 0x00, 0xe3, 0xc1, 0x68, 0x42, 0x00, 0x5c,
-  0xf0, 0xd4, 0x2c, 0x81, 0xaa, 0x0c, 0xb4, 0x3c, 0xa6, 0x51, 0x27, 0xe8,
-  0x3b, 0xd0, 0x09, 0x4b, 0xdc, 0x89, 0x90, 0x2a, 0xe8, 0x3b, 0xe0, 0x89,
-  0xd9, 0x1e, 0x7d, 0x07, 0xf0, 0x99, 0x65, 0x58, 0x95, 0x56, 0xa1, 0xbd,
-  0xe1, 0x08, 0xdc, 0xb3, 0xef, 0x60, 0xf8, 0x2e, 0xf7, 0x86, 0x19, 0x6e,
-  0x08, 0xc2, 0x3b, 0x20, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0x28, 0xf4, 0x3b,
-  0x18, 0xbe, 0x0a, 0x04, 0xbd, 0x63, 0x98, 0xe1, 0x86, 0x80, 0xbc, 0x03,
-  0x32, 0xa8, 0x60, 0xd0, 0x59, 0x06, 0x56, 0x09, 0x97, 0xe0, 0x50, 0x3b,
-  0x18, 0xe6, 0x7a, 0x39, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0xbe, 0x31, 0x0f, 0x66, 0x3c, 0x90, 0xef, 0x00, 0xcc, 0x83, 0xd1, 0x84,
-  0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86,
-  0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x4e, 0xcd, 0x03,
-  0x1d, 0x0f, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x6e,
-  0xcd, 0x83, 0x1d, 0x0f, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0x8e, 0xcd, 0x03, 0x1e, 0x0f, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00,
-  0x10, 0x04, 0x83, 0x85, 0xce, 0x83, 0x1d, 0x0f, 0xf8, 0x3b, 0x08, 0xce,
-  0x3c, 0x60, 0xf1, 0x20, 0xcd, 0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9,
-  0x59, 0x82, 0x70, 0x19, 0x6e, 0x60, 0xbf, 0x36, 0x0f, 0xc0, 0x60, 0x96,
-  0xc1, 0x55, 0x5e, 0x25, 0xa8, 0xfb, 0x0e, 0x7c, 0x3c, 0x80, 0x0b, 0x9e,
-  0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xca, 0xce, 0x83, 0x1f, 0x0f,
-  0xe4, 0x0f, 0xc6, 0x83, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xee,
-  0x3c, 0xf8, 0xf1, 0x20, 0x10, 0x2e, 0x18, 0xa6, 0xf4, 0x3b, 0x18, 0xf3,
-  0x00, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0x3d,
-  0x0f, 0xc8, 0x3c, 0x00, 0x83, 0x1a, 0x0f, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x80, 0xe2, 0xf3, 0x80, 0xcc, 0x83, 0x40, 0xb8, 0x60, 0x98, 0x0b,
-  0x9e, 0xba, 0xe3, 0xa9, 0x1b, 0xef, 0x60, 0x98, 0xc3, 0xe7, 0x60, 0x98,
+  0x10, 0x9f, 0x22, 0xcc, 0x3f, 0xd0, 0xe1, 0x86, 0x80, 0xfc, 0x03, 0x30,
+  0x98, 0x65, 0x88, 0x17, 0x79, 0x09, 0x6c, 0x50, 0xfb, 0x00, 0x3e, 0xb3,
+  0x04, 0xf7, 0x62, 0x67, 0x1f, 0x10, 0xf1, 0x99, 0x25, 0xb8, 0x97, 0xe1,
+  0x88, 0xda, 0x40, 0xfb, 0x40, 0xf8, 0x66, 0x19, 0xe8, 0xe5, 0x5e, 0x02,
+  0xb3, 0x8d, 0xb4, 0x0f, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9,
+  0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0xf8, 0x0f,
+  0x74, 0xb8, 0x21, 0x78, 0xff, 0x00, 0x0c, 0x66, 0x19, 0xea, 0xc5, 0x5e,
+  0x02, 0x8b, 0xfb, 0x60, 0x88, 0xcf, 0x2c, 0xc1, 0xbd, 0x18, 0x61, 0xf7,
+  0x01, 0x7c, 0x66, 0x09, 0xee, 0x65, 0xa0, 0xe5, 0xd1, 0xe2, 0x05, 0x93,
+  0x17, 0xa2, 0x5e, 0x04, 0x7b, 0x01, 0x9d, 0x79, 0xb9, 0x60, 0x98, 0x0b,
+  0x9e, 0xba, 0xed, 0xa9, 0x13, 0xfb, 0x60, 0x98, 0xbb, 0xf3, 0x60, 0x98,
   0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf8,
-  0x40, 0x3d, 0x80, 0xf3, 0xe0, 0xc5, 0x83, 0x3e, 0x0f, 0x46, 0x13, 0x02,
+  0xfe, 0x3f, 0x78, 0xff, 0xc0, 0xf5, 0x03, 0xfe, 0x0f, 0x46, 0x13, 0x02,
   0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a,
-  0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0x53, 0x0f, 0xee,
-  0x3c, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x38, 0x54,
-  0x0f, 0xf0, 0x3c, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
-  0xb8, 0x54, 0x0f, 0xf2, 0x3c, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40,
-  0x10, 0x0c, 0x96, 0x58, 0x0f, 0xf0, 0x3c, 0xc8, 0xf1, 0x20, 0x20, 0xf5,
-  0x20, 0xcd, 0x03, 0x53, 0x0f, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xa7, 0x66,
-  0x09, 0xc2, 0x65, 0xb8, 0x21, 0x05, 0x83, 0x54, 0x0f, 0xc0, 0x60, 0x96,
-  0x01, 0x56, 0xc2, 0x25, 0xb0, 0x15, 0x0f, 0x5a, 0x3c, 0x88, 0xcf, 0x70,
-  0x84, 0x0b, 0x06, 0x2e, 0x1e, 0x10, 0xdf, 0x2c, 0x43, 0xac, 0xd0, 0x4a,
-  0x60, 0x2f, 0x1e, 0xbc, 0x60, 0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30,
-  0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xcc,
-  0x7a, 0xa0, 0xc3, 0x0d, 0x41, 0xac, 0x07, 0x60, 0x30, 0xcb, 0x20, 0x2b,
-  0xb3, 0x12, 0xd8, 0x70, 0xe3, 0x01, 0x7c, 0x66, 0x09, 0x70, 0xc5, 0x6c,
-  0x3c, 0x20, 0xe2, 0x33, 0x4b, 0x80, 0x2b, 0xc3, 0x11, 0x39, 0x18, 0xdc,
-  0x78, 0x20, 0x7c, 0xb3, 0x0c, 0xb5, 0x82, 0x2b, 0x81, 0xe9, 0x60, 0x80,
-  0xe3, 0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65,
-  0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xe1, 0xeb, 0x81, 0x0e, 0x37,
-  0x04, 0xbc, 0x1e, 0x80, 0xc1, 0x2c, 0x83, 0xad, 0xdc, 0x4a, 0x60, 0x60,
-  0x1e, 0x0c, 0xf1, 0x99, 0x25, 0xc0, 0x15, 0x23, 0xc6, 0x3c, 0x80, 0xcf,
-  0x2c, 0x01, 0xae, 0x0c, 0xb4, 0x3c, 0x9a, 0xac, 0x60, 0xb3, 0x42, 0xd8,
-  0x8a, 0x70, 0x2b, 0xa8, 0x28, 0xd0, 0xca, 0x05, 0xc3, 0x98, 0x98, 0x07,
-  0x66, 0x1e, 0xc4, 0x67, 0x38, 0x02, 0x16, 0xce, 0x3c, 0x20, 0xbe, 0x59,
-  0x86, 0x5c, 0xe1, 0x95, 0xc0, 0xd0, 0x3c, 0x88, 0x85, 0xf8, 0x58, 0x30,
-  0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04,
-  0xf1, 0x29, 0x82, 0xdd, 0x03, 0x1d, 0x6e, 0x08, 0xd4, 0x3d, 0x00, 0x83,
-  0x59, 0x06, 0x5d, 0xd9, 0x95, 0xc0, 0x06, 0x38, 0x0f, 0xe0, 0x33, 0x4b,
-  0x00, 0x2e, 0xd6, 0xe6, 0x01, 0x11, 0x9f, 0x59, 0x02, 0x70, 0x19, 0x8e,
-  0xd8, 0x05, 0x37, 0x0f, 0x84, 0x6f, 0x96, 0xa1, 0x57, 0xc0, 0x25, 0x30,
-  0x5e, 0x78, 0xf3, 0x20, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b,
-  0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xb8, 0xf7, 0x40,
-  0x87, 0x1b, 0x82, 0x7a, 0x0f, 0xc0, 0x60, 0x96, 0xc1, 0x57, 0x7e, 0x25,
-  0xb0, 0x3b, 0x0f, 0x86, 0xf8, 0xcc, 0x12, 0x80, 0x8b, 0x11, 0x7c, 0x1e,
-  0xc0, 0x67, 0x96, 0x00, 0x5c, 0x06, 0x5a, 0x1e, 0x4d, 0x57, 0xb0, 0x5d,
-  0x21, 0x7c, 0x45, 0xf8, 0x15, 0xd6, 0xe0, 0x95, 0x0b, 0x86, 0xb9, 0xe0,
-  0xa9, 0xdb, 0x9e, 0x3a, 0x34, 0x0f, 0x86, 0xb9, 0xbe, 0x0e, 0x86, 0x39,
-  0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0xaf,
-  0xe4, 0x83, 0x7a, 0x0f, 0x68, 0x3d, 0x10, 0xf9, 0x60, 0x34, 0x21, 0x00,
-  0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88,
-  0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x63, 0xf9, 0x80, 0xdf,
-  0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x6b, 0xf9,
-  0xa0, 0xdf, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80,
-  0x73, 0xf9, 0xc0, 0xdf, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04,
-  0xc1, 0x60, 0xb1, 0xf9, 0xa0, 0xdf, 0x03, 0x5f, 0x0f, 0x82, 0x94, 0x0f,
-  0xdc, 0x3d, 0x58, 0xf9, 0x60, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6a, 0x96,
-  0x20, 0x5c, 0x06, 0x5a, 0x1e, 0xd3, 0x60, 0x15, 0xdf, 0x1e, 0x56, 0x85,
-  0x25, 0x5c, 0x45, 0x00, 0x17, 0xdf, 0x1e, 0x5e, 0x65, 0x96, 0x41, 0x5c,
-  0xc8, 0xc5, 0x15, 0x83, 0xe1, 0x88, 0x59, 0x0c, 0xe0, 0x3d, 0x18, 0xbe,
-  0xa3, 0xc5, 0x60, 0x98, 0xe1, 0x86, 0x60, 0xd7, 0x03, 0x32, 0xa8, 0x21,
-  0xd0, 0xe1, 0x88, 0x7f, 0xa0, 0xf7, 0x60, 0xf8, 0x2a, 0x10, 0xf4, 0x42,
-  0x62, 0x98, 0xe1, 0x86, 0xc0, 0xd7, 0x03, 0x32, 0xa8, 0x60, 0xd0, 0x59,
-  0x86, 0x71, 0xc1, 0x97, 0xe0, 0x44, 0x3d, 0x18, 0xe6, 0x6e, 0x3b, 0x18,
-  0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbe, 0x9e, 0x0f, 0x5a, 0x3e,
-  0x60, 0xf7, 0x40, 0xe7, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82,
-  0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x03, 0x8e, 0xec, 0x03, 0x9a, 0x0f, 0x0e, 0x22, 0x18,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xae, 0xec, 0x83, 0x9a, 0x0f, 0x18,
-  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xce, 0xec, 0x03, 0x9b,
-  0x0f, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0xc5, 0xed,
-  0x83, 0x9a, 0x0f, 0xec, 0x3d, 0x08, 0xc2, 0x3e, 0x30, 0xf9, 0x60, 0xec,
-  0x83, 0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x02, 0x7c, 0x19, 0x6e,
-  0x30, 0xc7, 0xe0, 0xec, 0x03, 0x30, 0x98, 0x65, 0x28, 0x17, 0x73, 0x09,
-  0x2a, 0xde, 0x03, 0x9c, 0x0f, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00,
-  0x04, 0xc1, 0x80, 0x82, 0xfb, 0x20, 0xe7, 0x83, 0x76, 0x0c, 0x54, 0x3e,
-  0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x2a, 0xee, 0x83, 0x9c, 0x0f,
-  0x02, 0xe1, 0x82, 0x61, 0x8a, 0xde, 0x83, 0x9e, 0x0f, 0xe0, 0x82, 0xa7,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0xaa, 0xfb, 0xc0, 0xe7, 0x03,
-  0x9d, 0x78, 0xf9, 0x60, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x28, 0xbb,
-  0x0f, 0x7c, 0x3e, 0x08, 0x84, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0x3b, 0x9e,
-  0xba, 0x5e, 0x0f, 0x86, 0x39, 0xf9, 0x0e, 0x86, 0x39, 0x62, 0x98, 0x23,
-  0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x4f, 0xef, 0x03, 0xb5,
-  0x0f, 0x52, 0x3e, 0xb8, 0xfb, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84,
-  0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0x80, 0x0b, 0xfd, 0x20, 0xee, 0x83, 0x84, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x13, 0xfd, 0x40, 0xee, 0x83,
-  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x1b, 0xfd, 0x60,
-  0xee, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x59,
-  0xfd, 0x40, 0xee, 0x83, 0x99, 0x0f, 0x02, 0xbf, 0x0f, 0xc6, 0x3e, 0x00,
-  0xfd, 0x60, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6a, 0x96, 0x00, 0x5f, 0x86,
-  0x1b, 0x46, 0x32, 0x18, 0xfd, 0x00, 0x0c, 0x66, 0x19, 0xce, 0x05, 0x5f,
-  0x02, 0x2b, 0xf9, 0xe0, 0xe4, 0x83, 0xf8, 0x0c, 0x47, 0xa4, 0x64, 0x80,
-  0xf2, 0x01, 0xf1, 0xcd, 0x32, 0xa0, 0xcb, 0xba, 0x04, 0x96, 0xf2, 0x81,
-  0x4a, 0x06, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94,
-  0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0xeb, 0x07, 0x3a, 0xdc,
-  0x10, 0xac, 0x7e, 0x00, 0x06, 0xb3, 0x0c, 0xe9, 0xa2, 0x2e, 0x81, 0x0d,
-  0x31, 0x1f, 0xc0, 0x67, 0x96, 0xe0, 0x5d, 0x0c, 0xe6, 0x03, 0x22, 0x3e,
-  0xb3, 0x04, 0xef, 0x32, 0x1c, 0x41, 0x93, 0x41, 0xcc, 0x07, 0xc2, 0x37,
-  0xcb, 0xc0, 0x2e, 0xef, 0x12, 0x58, 0x4d, 0x06, 0x32, 0x1f, 0xc4, 0xc7,
-  0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x44, 0xf2, 0xb1,
-  0x22, 0x88, 0x4f, 0x11, 0xb8, 0x1f, 0xe8, 0x70, 0x43, 0x60, 0xfb, 0x01,
-  0x18, 0xcc, 0x32, 0xb4, 0x8b, 0xbb, 0x04, 0xa6, 0xf3, 0xc1, 0x10, 0x9f,
-  0x59, 0x82, 0x77, 0x31, 0xa2, 0xe7, 0x03, 0xf8, 0xcc, 0x12, 0xbc, 0xcb,
-  0x40, 0xcb, 0xa3, 0xa5, 0x0b, 0xa6, 0x2e, 0x44, 0xbb, 0x08, 0xee, 0xe2,
-  0xa7, 0xc2, 0xba, 0x5c, 0x30, 0x8c, 0xf1, 0x7c, 0x00, 0xf6, 0x41, 0x7c,
-  0x86, 0x23, 0x54, 0x23, 0xec, 0x03, 0xe2, 0x9b, 0x65, 0x80, 0x97, 0x79,
-  0x09, 0x4c, 0xec, 0x83, 0xd5, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18,
-  0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xcc,
-  0x3f, 0xd0, 0xe1, 0x86, 0x80, 0xfc, 0x03, 0x30, 0x98, 0x65, 0x88, 0x17,
-  0x79, 0x09, 0x6c, 0x50, 0xfb, 0x00, 0x3e, 0xb3, 0x04, 0xf7, 0x62, 0x67,
-  0x1f, 0x10, 0xf1, 0x99, 0x25, 0xb8, 0x97, 0xe1, 0x88, 0xda, 0x40, 0xfb,
-  0x40, 0xf8, 0x66, 0x19, 0xe8, 0xe5, 0x5e, 0x02, 0xb3, 0x8d, 0xb4, 0x0f,
-  0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22,
-  0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0xf8, 0x0f, 0x74, 0xb8, 0x21, 0x78,
-  0xff, 0x00, 0x0c, 0x66, 0x19, 0xea, 0xc5, 0x5e, 0x02, 0x8b, 0xfb, 0x60,
-  0x88, 0xcf, 0x2c, 0xc1, 0xbd, 0x18, 0x61, 0xf7, 0x01, 0x7c, 0x66, 0x09,
-  0xee, 0x65, 0xa0, 0xe5, 0xd1, 0xe2, 0x05, 0x93, 0x17, 0xa2, 0x5e, 0x04,
-  0x7b, 0x01, 0x9d, 0x79, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xba, 0xed, 0xa9,
-  0x13, 0xfb, 0x60, 0x98, 0xbb, 0xf3, 0x60, 0x98, 0x23, 0x86, 0x39, 0x62,
-  0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf8, 0xfe, 0x3f, 0x78, 0xff,
-  0xc0, 0xf5, 0x03, 0xfe, 0x0f, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
-  0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0x38, 0x13, 0x14, 0xec, 0x3f, 0x48, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0x13, 0x14, 0xee, 0x3f, 0x48,
-  0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x38, 0x14, 0x14, 0xf0,
-  0x3f, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x16, 0x18,
-  0x14, 0xee, 0x3f, 0xc0, 0xfd, 0x20, 0x18, 0x41, 0x01, 0xfd, 0x83, 0x12,
-  0x14, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xa7, 0x66, 0x09, 0xf0, 0x65, 0xa0,
-  0xe5, 0x31, 0x8d, 0x71, 0x91, 0xfd, 0x41, 0x5c, 0x58, 0xa2, 0x5c, 0x84,
-  0x7b, 0x91, 0xfd, 0xc1, 0x5c, 0x66, 0x19, 0xf2, 0x65, 0x5f, 0x50, 0x33,
-  0x18, 0x8e, 0x98, 0x3d, 0xf5, 0x0f, 0x86, 0xef, 0x68, 0x6f, 0x98, 0xe1,
-  0x86, 0xa0, 0xf6, 0x03, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0xfc, 0x70,
-  0xff, 0x60, 0xf8, 0x2a, 0x10, 0xf4, 0xf6, 0x63, 0x98, 0xe1, 0x86, 0x00,
-  0xf7, 0x03, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x06, 0x7d, 0x79, 0x99, 0xe0,
-  0xf8, 0x3e, 0x18, 0xe6, 0x62, 0x3d, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40,
-  0x10, 0x0c, 0xbe, 0x1b, 0x14, 0x4e, 0x50, 0x30, 0xff, 0x80, 0x06, 0x85,
-  0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d,
-  0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xce,
-  0x07, 0x05, 0x17, 0x14, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0xee, 0x07, 0x85, 0x17, 0x14, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x03, 0x0e, 0x0c, 0x05, 0x18, 0x14, 0x24, 0x22, 0x18, 0x31,
-  0x50, 0x00, 0x10, 0x04, 0x83, 0x05, 0x0d, 0x85, 0x17, 0x14, 0xe0, 0x3f,
-  0x08, 0x76, 0x50, 0x00, 0x41, 0xa1, 0x07, 0x85, 0xd1, 0x84, 0x00, 0xb8,
-  0xe0, 0xa9, 0x59, 0x82, 0x97, 0x19, 0x6e, 0x00, 0xcf, 0x20, 0x0c, 0x05,
-  0x30, 0x98, 0x65, 0xe0, 0x97, 0x7e, 0x09, 0x6a, 0xfd, 0x03, 0x19, 0x14,
-  0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x52, 0x43,
-  0x61, 0x06, 0x85, 0xf6, 0x23, 0x41, 0x61, 0xc4, 0xe0, 0x00, 0x40, 0x10,
-  0x0c, 0xa8, 0x35, 0x14, 0x66, 0x50, 0x08, 0x84, 0x0b, 0x86, 0x29, 0xf7,
-  0x0f, 0x6e, 0x50, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04,
-  0x03, 0xea, 0x0d, 0x05, 0x1c, 0x14, 0x68, 0x24, 0x05, 0x85, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0xa0, 0xe0, 0x50, 0xc0, 0x41, 0x21, 0x10, 0x2e,
-  0x18, 0xe6, 0x82, 0xa7, 0xee, 0x78, 0xea, 0x6e, 0x3f, 0x18, 0xe6, 0xd8,
-  0x3d, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40,
-  0x10, 0x0c, 0x3e, 0x3a, 0x14, 0xc8, 0x50, 0x18, 0x41, 0x21, 0x0e, 0x85,
-  0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d,
-  0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x6e,
-  0x0f, 0x85, 0x35, 0x14, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0x8e, 0x0f, 0x05, 0x36, 0x14, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x03, 0xae, 0x0f, 0x85, 0x36, 0x14, 0x12, 0x22, 0x18, 0x31,
-  0x50, 0x00, 0x10, 0x04, 0x83, 0xa5, 0x14, 0x05, 0x36, 0x14, 0x5a, 0x50,
-  0x08, 0xf0, 0x50, 0xe8, 0x41, 0x41, 0x0f, 0x85, 0xd1, 0x84, 0x00, 0xb8,
-  0xe0, 0xa9, 0x59, 0x82, 0x97, 0x19, 0x6e, 0xe8, 0xcf, 0xa0, 0x0f, 0x05,
-  0x30, 0x98, 0x65, 0xf0, 0x97, 0x97, 0x09, 0xec, 0xff, 0x83, 0x10, 0x14,
-  0xe2, 0x33, 0x1c, 0x91, 0x82, 0x81, 0x08, 0x0a, 0xc4, 0x37, 0xcb, 0xf0,
-  0x2f, 0x22, 0x13, 0xd8, 0x08, 0x0a, 0x2a, 0x18, 0xc4, 0xc7, 0x82, 0x81,
-  0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88,
-  0x4f, 0x11, 0xa7, 0x28, 0xe8, 0x70, 0x43, 0x50, 0x8a, 0x02, 0x18, 0xcc,
-  0x32, 0x80, 0x4c, 0xc8, 0x04, 0x36, 0xac, 0xa0, 0x00, 0x9f, 0x59, 0x02,
-  0x93, 0x31, 0x15, 0x14, 0x88, 0xf8, 0xcc, 0x12, 0x98, 0xcc, 0x70, 0x04,
-  0x0d, 0x06, 0x2b, 0x28, 0x08, 0xdf, 0x2c, 0xc3, 0xc8, 0x98, 0x4c, 0x60,
-  0x35, 0x18, 0xb0, 0xa0, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc,
-  0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xc8, 0xa2,
-  0xa0, 0xc3, 0x0d, 0x01, 0x2c, 0x0a, 0x60, 0x30, 0xcb, 0x40, 0x32, 0x25,
-  0x13, 0x18, 0x0d, 0x0a, 0x43, 0x7c, 0x66, 0x09, 0x4c, 0xc6, 0x88, 0x1b,
-  0x14, 0xe0, 0x33, 0x4b, 0x60, 0x32, 0x03, 0x2d, 0x8f, 0x06, 0x32, 0x58,
-  0xc8, 0x10, 0x24, 0x23, 0x94, 0x8c, 0x1f, 0x0a, 0x22, 0x73, 0xc1, 0x30,
-  0x66, 0x83, 0x82, 0x0e, 0x0a, 0xf1, 0x19, 0x8e, 0x20, 0x95, 0x1d, 0x14,
-  0x88, 0x6f, 0x96, 0xe1, 0x64, 0x54, 0x26, 0x30, 0x1e, 0x14, 0x4a, 0x25,
-  0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0xc0, 0x90,
-  0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x00, 0x47, 0x41, 0x87, 0x1b, 0x02, 0x5f,
-  0x14, 0xc0, 0x60, 0x96, 0x01, 0x65, 0x52, 0x26, 0xb0, 0x81, 0x0c, 0x05,
-  0xf8, 0xcc, 0x12, 0xb8, 0x8c, 0x85, 0xa1, 0x40, 0xc4, 0x67, 0x96, 0xc0,
-  0x65, 0x86, 0x23, 0x5e, 0x45, 0x0c, 0x05, 0xe1, 0x9b, 0x65, 0x58, 0x19,
-  0x97, 0x09, 0x0c, 0x56, 0xc6, 0x50, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e,
-  0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22,
-  0xd6, 0x51, 0xd0, 0xe1, 0x86, 0x20, 0x1d, 0x05, 0x30, 0x98, 0x65, 0x60,
-  0x99, 0x96, 0x09, 0x6c, 0x0d, 0x85, 0x21, 0x3e, 0xb3, 0x04, 0x2e, 0x63,
-  0x04, 0x1c, 0x0a, 0xf0, 0x99, 0x25, 0x70, 0x99, 0x81, 0x96, 0x47, 0x43,
-  0x19, 0x2c, 0x65, 0x08, 0x96, 0x11, 0x5a, 0x86, 0xae, 0x54, 0xe6, 0x82,
-  0x61, 0x2e, 0x78, 0xea, 0xb6, 0xa7, 0x8e, 0x07, 0x85, 0x61, 0x2e, 0xee,
-  0x83, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0xe0, 0xcb, 0x47, 0x21, 0x1d, 0x05, 0x54, 0x14, 0xec, 0x51, 0x18,
-  0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04,
-  0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0x40,
-  0x52, 0x80, 0x47, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0xe0, 0x42, 0x52, 0x88, 0x47, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0xe0, 0x44, 0x52, 0x90, 0x47, 0x21, 0x21, 0x82, 0x11, 0x03,
-  0x05, 0x00, 0x41, 0x30, 0x58, 0x54, 0x52, 0x88, 0x47, 0x41, 0x16, 0x85,
-  0xa0, 0x1f, 0x05, 0x71, 0x14, 0xfe, 0x51, 0x18, 0x4d, 0x08, 0x80, 0x0b,
-  0x9e, 0x9a, 0x25, 0x78, 0x99, 0x81, 0x96, 0xc7, 0x34, 0xf4, 0xc5, 0x34,
-  0x89, 0x7c, 0x61, 0x09, 0x7e, 0x11, 0x5c, 0xc6, 0x34, 0x89, 0x7e, 0x99,
-  0x65, 0x80, 0x19, 0x99, 0x11, 0xd5, 0x60, 0x38, 0x42, 0xf6, 0xc8, 0x51,
-  0x18, 0xbe, 0x9b, 0xbd, 0x61, 0x86, 0x1b, 0x82, 0x57, 0x14, 0xc8, 0xa0,
-  0x86, 0x40, 0x87, 0x23, 0xe6, 0x05, 0x1d, 0x85, 0xe1, 0xab, 0x40, 0xd0,
-  0xab, 0x97, 0x61, 0x86, 0x1b, 0x02, 0x59, 0x14, 0xc8, 0xa0, 0x82, 0x41,
-  0x67, 0x19, 0x62, 0xc6, 0x6c, 0x82, 0xb3, 0x43, 0x61, 0x98, 0x5b, 0xfd,
-  0x60, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf8, 0x62, 0x52, 0x08,
-  0x49, 0x01, 0x1c, 0x05, 0x97, 0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41,
-  0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x38, 0x64, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0x38, 0x9c, 0x14, 0x50, 0x52, 0x38, 0x88,
-  0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0x9c, 0x14, 0x52, 0x52,
-  0x60, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x38, 0x9d, 0x14,
-  0x54, 0x52, 0x90, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x16,
-  0xb1, 0x14, 0x52, 0x52, 0x50, 0x47, 0x21, 0xa8, 0x49, 0x41, 0x1f, 0x85,
-  0x9b, 0x14, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xa7, 0x66, 0x09, 0xcc, 0x66,
-  0xb8, 0x41, 0x57, 0x83, 0x9d, 0x14, 0xc0, 0x60, 0x96, 0x61, 0x66, 0x68,
-  0x26, 0xa8, 0x72, 0x14, 0x58, 0x52, 0x80, 0x0b, 0x9e, 0x1a, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x03, 0x8a, 0x2c, 0x85, 0x96, 0x14, 0xd8, 0xcf, 0x1f,
-  0x85, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xca, 0x52, 0x68, 0x49,
-  0x21, 0x10, 0x2e, 0x18, 0xa6, 0xd0, 0x51, 0x88, 0x49, 0x01, 0x2e, 0x78,
-  0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0xb4, 0x14, 0x64, 0x52,
-  0x70, 0x99, 0x91, 0x14, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x80, 0x52,
-  0x4b, 0x41, 0x26, 0x85, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x9e, 0xba, 0xe3,
-  0xa9, 0x8b, 0x45, 0x61, 0x98, 0x33, 0xff, 0x60, 0x98, 0x23, 0x86, 0x39,
-  0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xf8, 0xdc, 0x52, 0xf0,
-  0x49, 0xa1, 0x1f, 0x85, 0xb5, 0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41,
-  0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0xba, 0x14, 0xca, 0x52, 0x48, 0x88,
-  0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x38, 0xbb, 0x14, 0xcc, 0x52,
-  0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0xbb, 0x14,
-  0xce, 0x52, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x96,
-  0xbf, 0x14, 0xcc, 0x52, 0x38, 0x49, 0x21, 0x90, 0x4b, 0xe1, 0x26, 0x05,
-  0xba, 0x14, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xa7, 0x66, 0x09, 0xcc, 0x66,
-  0xb8, 0xe1, 0x5e, 0x83, 0xbb, 0x14, 0xc0, 0x60, 0x96, 0xa1, 0x66, 0xcc,
-  0x26, 0xb0, 0x7c, 0x14, 0xf6, 0x51, 0x88, 0xcf, 0x70, 0x04, 0x0a, 0x06,
-  0xfc, 0x28, 0x10, 0xdf, 0x2c, 0x83, 0xcd, 0xe4, 0x4c, 0x60, 0xfd, 0x28,
-  0xa4, 0x60, 0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f,
-  0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x84, 0xa6, 0xa0, 0xc3,
-  0x0d, 0xc1, 0x5f, 0x0a, 0x60, 0x30, 0xcb, 0x70, 0x33, 0x38, 0x13, 0xd8,
-  0x50, 0x92, 0x02, 0x7c, 0x66, 0x09, 0x7a, 0xc6, 0x48, 0x52, 0x20, 0xe2,
-  0x33, 0x4b, 0xd0, 0x33, 0xc3, 0x11, 0x33, 0x18, 0x94, 0xa4, 0x20, 0x7c,
-  0xb3, 0x0c, 0x3a, 0xd3, 0x33, 0x81, 0xd1, 0x60, 0x60, 0x92, 0x42, 0x7c,
-  0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x41, 0x24, 0x1f,
-  0x2b, 0x82, 0xf8, 0x14, 0xc1, 0x9a, 0x82, 0x0e, 0x37, 0x04, 0xaa, 0x29,
-  0x80, 0xc1, 0x2c, 0xc3, 0xce, 0xf0, 0x4c, 0x60, 0x2e, 0x29, 0x0c, 0xf1,
-  0x99, 0x25, 0xe8, 0x19, 0x23, 0x62, 0x52, 0x80, 0xcf, 0x2c, 0x41, 0xcf,
-  0x0c, 0xb4, 0x3c, 0xda, 0xcd, 0x60, 0x38, 0x43, 0xec, 0x8c, 0xc0, 0x33,
-  0x7c, 0x28, 0xe4, 0xcc, 0x05, 0xc3, 0x18, 0x4c, 0x0a, 0x34, 0x29, 0xc4,
-  0x67, 0x38, 0xc2, 0x6f, 0x6a, 0x52, 0x20, 0xbe, 0x59, 0x06, 0x9f, 0x09,
-  0x9b, 0xc0, 0x6c, 0x52, 0xf8, 0x9b, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82,
-  0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x42,
-  0x37, 0x05, 0x1d, 0x6e, 0x08, 0x70, 0x53, 0x00, 0x83, 0x59, 0x86, 0x9f,
-  0x01, 0x9b, 0xc0, 0x06, 0x9f, 0x14, 0xe0, 0x33, 0x4b, 0x50, 0x36, 0xb6,
-  0x93, 0x02, 0x11, 0x9f, 0x59, 0x82, 0xb2, 0x19, 0x8e, 0x48, 0x1d, 0x9e,
-  0x14, 0x84, 0x6f, 0x96, 0x41, 0x6c, 0xca, 0x26, 0x30, 0xd5, 0xe9, 0x49,
+  0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x38, 0x13, 0x14, 0xec,
+  0x3f, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0x13,
+  0x14, 0xee, 0x3f, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
+  0x38, 0x14, 0x14, 0xf0, 0x3f, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40,
+  0x10, 0x0c, 0x16, 0x18, 0x14, 0xee, 0x3f, 0xc0, 0xfd, 0x20, 0x18, 0x41,
+  0x01, 0xfd, 0x83, 0x12, 0x14, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xa7, 0x66,
+  0x09, 0xf0, 0x65, 0xa0, 0xe5, 0x31, 0x8d, 0x71, 0x91, 0xfd, 0x41, 0x5c,
+  0x58, 0xa2, 0x5c, 0x84, 0x7b, 0x91, 0xfd, 0xc1, 0x5c, 0x66, 0x19, 0xf2,
+  0x65, 0x5f, 0x50, 0x33, 0x18, 0x8e, 0x98, 0x3d, 0xf5, 0x0f, 0x86, 0xef,
+  0x68, 0x6f, 0x98, 0xe1, 0x86, 0xa0, 0xf6, 0x03, 0x32, 0xa8, 0x21, 0xd0,
+  0xe1, 0x88, 0xfc, 0x70, 0xff, 0x60, 0xf8, 0x2a, 0x10, 0xf4, 0xf6, 0x63,
+  0x98, 0xe1, 0x86, 0x00, 0xf7, 0x03, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x06,
+  0x7d, 0x79, 0x99, 0xe0, 0xf8, 0x3e, 0x18, 0xe6, 0x62, 0x3d, 0x18, 0x66,
+  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xbe, 0x1b, 0x14, 0x4e, 0x50, 0x30,
+  0xff, 0x80, 0x06, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1,
+  0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00,
+  0x10, 0x04, 0x03, 0xce, 0x07, 0x05, 0x17, 0x14, 0x0e, 0x22, 0x18, 0x31,
+  0x40, 0x00, 0x10, 0x04, 0x03, 0xee, 0x07, 0x85, 0x17, 0x14, 0x18, 0x22,
+  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x0e, 0x0c, 0x05, 0x18, 0x14,
+  0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x05, 0x0d, 0x85,
+  0x17, 0x14, 0xe0, 0x3f, 0x08, 0x76, 0x50, 0x00, 0x41, 0xa1, 0x07, 0x85,
+  0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x82, 0x97, 0x19, 0x6e, 0x00,
+  0xcf, 0x20, 0x0c, 0x05, 0x30, 0x98, 0x65, 0xe0, 0x97, 0x7e, 0x09, 0x6a,
+  0xfd, 0x03, 0x19, 0x14, 0xe0, 0x82, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04,
+  0xc1, 0x80, 0x52, 0x43, 0x61, 0x06, 0x85, 0xf6, 0x23, 0x41, 0x61, 0xc4,
+  0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8, 0x35, 0x14, 0x66, 0x50, 0x08, 0x84,
+  0x0b, 0x86, 0x29, 0xf7, 0x0f, 0x6e, 0x50, 0x80, 0x0b, 0x9e, 0x1a, 0x31,
+  0x38, 0x00, 0x10, 0x04, 0x03, 0xea, 0x0d, 0x05, 0x1c, 0x14, 0x68, 0x24,
+  0x05, 0x85, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0, 0xe0, 0x50, 0xc0,
+  0x41, 0x21, 0x10, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0xee, 0x78, 0xea, 0x6e,
+  0x3f, 0x18, 0xe6, 0xd8, 0x3d, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66,
+  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x3e, 0x3a, 0x14, 0xc8, 0x50, 0x18,
+  0x41, 0x21, 0x0e, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1,
+  0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00,
+  0x10, 0x04, 0x03, 0x6e, 0x0f, 0x85, 0x35, 0x14, 0x12, 0x22, 0x18, 0x31,
+  0x40, 0x00, 0x10, 0x04, 0x03, 0x8e, 0x0f, 0x05, 0x36, 0x14, 0x12, 0x22,
+  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xae, 0x0f, 0x85, 0x36, 0x14,
+  0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0xa5, 0x14, 0x05,
+  0x36, 0x14, 0x5a, 0x50, 0x08, 0xf0, 0x50, 0xe8, 0x41, 0x41, 0x0f, 0x85,
+  0xd1, 0x84, 0x00, 0xb8, 0xe0, 0xa9, 0x59, 0x82, 0x97, 0x19, 0x6e, 0xe8,
+  0xcf, 0xa0, 0x0f, 0x05, 0x30, 0x98, 0x65, 0xf0, 0x97, 0x97, 0x09, 0xec,
+  0xff, 0x83, 0x10, 0x14, 0xe2, 0x33, 0x1c, 0x91, 0x82, 0x81, 0x08, 0x0a,
+  0xc4, 0x37, 0xcb, 0xf0, 0x2f, 0x22, 0x13, 0xd8, 0x08, 0x0a, 0x2a, 0x18,
+  0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18,
+  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xa7, 0x28, 0xe8, 0x70, 0x43, 0x50,
+  0x8a, 0x02, 0x18, 0xcc, 0x32, 0x80, 0x4c, 0xc8, 0x04, 0x36, 0xac, 0xa0,
+  0x00, 0x9f, 0x59, 0x02, 0x93, 0x31, 0x15, 0x14, 0x88, 0xf8, 0xcc, 0x12,
+  0x98, 0xcc, 0x70, 0x04, 0x0d, 0x06, 0x2b, 0x28, 0x08, 0xdf, 0x2c, 0xc3,
+  0xc8, 0x98, 0x4c, 0x60, 0x35, 0x18, 0xb0, 0xa0, 0x10, 0x1f, 0x0b, 0x1c,
+  0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20,
+  0x3e, 0x45, 0xc8, 0xa2, 0xa0, 0xc3, 0x0d, 0x01, 0x2c, 0x0a, 0x60, 0x30,
+  0xcb, 0x40, 0x32, 0x25, 0x13, 0x18, 0x0d, 0x0a, 0x43, 0x7c, 0x66, 0x09,
+  0x4c, 0xc6, 0x88, 0x1b, 0x14, 0xe0, 0x33, 0x4b, 0x60, 0x32, 0x03, 0x2d,
+  0x8f, 0x06, 0x32, 0x58, 0xc8, 0x10, 0x24, 0x23, 0x94, 0x8c, 0x1f, 0x0a,
+  0x22, 0x73, 0xc1, 0x30, 0x66, 0x83, 0x82, 0x0e, 0x0a, 0xf1, 0x19, 0x8e,
+  0x20, 0x95, 0x1d, 0x14, 0x88, 0x6f, 0x96, 0xe1, 0x64, 0x54, 0x26, 0x30,
+  0x1e, 0x14, 0x4a, 0x25, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b,
+  0x9e, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x00, 0x47, 0x41,
+  0x87, 0x1b, 0x02, 0x5f, 0x14, 0xc0, 0x60, 0x96, 0x01, 0x65, 0x52, 0x26,
+  0xb0, 0x81, 0x0c, 0x05, 0xf8, 0xcc, 0x12, 0xb8, 0x8c, 0x85, 0xa1, 0x40,
+  0xc4, 0x67, 0x96, 0xc0, 0x65, 0x86, 0x23, 0x5e, 0x45, 0x0c, 0x05, 0xe1,
+  0x9b, 0x65, 0x58, 0x19, 0x97, 0x09, 0x0c, 0x56, 0xc6, 0x50, 0x88, 0x8f,
+  0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x88, 0xe4, 0x63,
+  0x45, 0x10, 0x9f, 0x22, 0xd6, 0x51, 0xd0, 0xe1, 0x86, 0x20, 0x1d, 0x05,
+  0x30, 0x98, 0x65, 0x60, 0x99, 0x96, 0x09, 0x6c, 0x0d, 0x85, 0x21, 0x3e,
+  0xb3, 0x04, 0x2e, 0x63, 0x04, 0x1c, 0x0a, 0xf0, 0x99, 0x25, 0x70, 0x99,
+  0x81, 0x96, 0x47, 0x43, 0x19, 0x2c, 0x65, 0x08, 0x96, 0x11, 0x5a, 0x86,
+  0xae, 0x54, 0xe6, 0x82, 0x61, 0x2e, 0x78, 0xea, 0xb6, 0xa7, 0x8e, 0x07,
+  0x85, 0x61, 0x2e, 0xee, 0x83, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46,
+  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xe0, 0xcb, 0x47, 0x21, 0x1d, 0x05, 0x54,
+  0x14, 0xec, 0x51, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d,
+  0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00,
+  0x41, 0x30, 0xe0, 0x40, 0x52, 0x80, 0x47, 0x21, 0x21, 0x82, 0x11, 0x03,
+  0x04, 0x00, 0x41, 0x30, 0xe0, 0x42, 0x52, 0x88, 0x47, 0x21, 0x21, 0x82,
+  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xe0, 0x44, 0x52, 0x90, 0x47, 0x21,
+  0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x58, 0x54, 0x52, 0x88,
+  0x47, 0x41, 0x16, 0x85, 0xa0, 0x1f, 0x05, 0x71, 0x14, 0xfe, 0x51, 0x18,
+  0x4d, 0x08, 0x80, 0x0b, 0x9e, 0x9a, 0x25, 0x78, 0x99, 0x81, 0x96, 0xc7,
+  0x34, 0xf4, 0xc5, 0x34, 0x89, 0x7c, 0x61, 0x09, 0x7e, 0x11, 0x5c, 0xc6,
+  0x34, 0x89, 0x7e, 0x99, 0x65, 0x80, 0x19, 0x99, 0x11, 0xd5, 0x60, 0x38,
+  0x42, 0xf6, 0xc8, 0x51, 0x18, 0xbe, 0x9b, 0xbd, 0x61, 0x86, 0x1b, 0x82,
+  0x57, 0x14, 0xc8, 0xa0, 0x86, 0x40, 0x87, 0x23, 0xe6, 0x05, 0x1d, 0x85,
+  0xe1, 0xab, 0x40, 0xd0, 0xab, 0x97, 0x61, 0x86, 0x1b, 0x02, 0x59, 0x14,
+  0xc8, 0xa0, 0x82, 0x41, 0x67, 0x19, 0x62, 0xc6, 0x6c, 0x82, 0xb3, 0x43,
+  0x61, 0x98, 0x5b, 0xfd, 0x60, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30,
+  0xf8, 0x62, 0x52, 0x08, 0x49, 0x01, 0x1c, 0x05, 0x97, 0x14, 0x46, 0x13,
+  0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18,
+  0x8a, 0x38, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x38, 0x9c, 0x14,
+  0x50, 0x52, 0x38, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8,
+  0x9c, 0x14, 0x52, 0x52, 0x60, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10,
+  0x0c, 0x38, 0x9d, 0x14, 0x54, 0x52, 0x90, 0x88, 0x60, 0xc4, 0x40, 0x01,
+  0x40, 0x10, 0x0c, 0x16, 0xb1, 0x14, 0x52, 0x52, 0x50, 0x47, 0x21, 0xa8,
+  0x49, 0x41, 0x1f, 0x85, 0x9b, 0x14, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xa7,
+  0x66, 0x09, 0xcc, 0x66, 0xb8, 0x41, 0x57, 0x83, 0x9d, 0x14, 0xc0, 0x60,
+  0x96, 0x61, 0x66, 0x68, 0x26, 0xa8, 0x72, 0x14, 0x58, 0x52, 0x80, 0x0b,
+  0x9e, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x8a, 0x2c, 0x85, 0x96,
+  0x14, 0xd8, 0xcf, 0x1f, 0x85, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa0,
+  0xca, 0x52, 0x68, 0x49, 0x21, 0x10, 0x2e, 0x18, 0xa6, 0xd0, 0x51, 0x88,
+  0x49, 0x01, 0x2e, 0x78, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xa8,
+  0xb4, 0x14, 0x64, 0x52, 0x70, 0x99, 0x91, 0x14, 0x46, 0x0c, 0x0e, 0x00,
+  0x04, 0xc1, 0x80, 0x52, 0x4b, 0x41, 0x26, 0x85, 0x40, 0xb8, 0x60, 0x98,
+  0x0b, 0x9e, 0xba, 0xe3, 0xa9, 0x8b, 0x45, 0x61, 0x98, 0x33, 0xff, 0x60,
+  0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30,
+  0xf8, 0xdc, 0x52, 0xf0, 0x49, 0xa1, 0x1f, 0x85, 0xb5, 0x14, 0x46, 0x13,
+  0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18,
+  0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xb8, 0xba, 0x14,
+  0xca, 0x52, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x38,
+  0xbb, 0x14, 0xcc, 0x52, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10,
+  0x0c, 0xb8, 0xbb, 0x14, 0xce, 0x52, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01,
+  0x40, 0x10, 0x0c, 0x96, 0xbf, 0x14, 0xcc, 0x52, 0x38, 0x49, 0x21, 0x90,
+  0x4b, 0xe1, 0x26, 0x05, 0xba, 0x14, 0x46, 0x13, 0x02, 0xe0, 0x82, 0xa7,
+  0x66, 0x09, 0xcc, 0x66, 0xb8, 0xe1, 0x5e, 0x83, 0xbb, 0x14, 0xc0, 0x60,
+  0x96, 0xa1, 0x66, 0xcc, 0x26, 0xb0, 0x7c, 0x14, 0xf6, 0x51, 0x88, 0xcf,
+  0x70, 0x04, 0x0a, 0x06, 0xfc, 0x28, 0x10, 0xdf, 0x2c, 0x83, 0xcd, 0xe4,
+  0x4c, 0x60, 0xfd, 0x28, 0xa4, 0x60, 0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c,
+  0x30, 0xcc, 0x05, 0x4f, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45,
+  0x84, 0xa6, 0xa0, 0xc3, 0x0d, 0xc1, 0x5f, 0x0a, 0x60, 0x30, 0xcb, 0x70,
+  0x33, 0x38, 0x13, 0xd8, 0x50, 0x92, 0x02, 0x7c, 0x66, 0x09, 0x7a, 0xc6,
+  0x48, 0x52, 0x20, 0xe2, 0x33, 0x4b, 0xd0, 0x33, 0xc3, 0x11, 0x33, 0x18,
+  0x94, 0xa4, 0x20, 0x7c, 0xb3, 0x0c, 0x3a, 0xd3, 0x33, 0x81, 0xd1, 0x60,
+  0x60, 0x92, 0x42, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c,
+  0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xc1, 0x9a, 0x82, 0x0e,
+  0x37, 0x04, 0xaa, 0x29, 0x80, 0xc1, 0x2c, 0xc3, 0xce, 0xf0, 0x4c, 0x60,
+  0x2e, 0x29, 0x0c, 0xf1, 0x99, 0x25, 0xe8, 0x19, 0x23, 0x62, 0x52, 0x80,
+  0xcf, 0x2c, 0x41, 0xcf, 0x0c, 0xb4, 0x3c, 0xda, 0xcd, 0x60, 0x38, 0x43,
+  0xec, 0x8c, 0xc0, 0x33, 0x7c, 0x28, 0xe4, 0xcc, 0x05, 0xc3, 0x18, 0x4c,
+  0x0a, 0x34, 0x29, 0xc4, 0x67, 0x38, 0xc2, 0x6f, 0x6a, 0x52, 0x20, 0xbe,
+  0x59, 0x06, 0x9f, 0x09, 0x9b, 0xc0, 0x6c, 0x52, 0xf8, 0x9b, 0xf8, 0x58,
+  0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56,
+  0x04, 0xf1, 0x29, 0x42, 0x37, 0x05, 0x1d, 0x6e, 0x08, 0x70, 0x53, 0x00,
+  0x83, 0x59, 0x86, 0x9f, 0x01, 0x9b, 0xc0, 0x06, 0x9f, 0x14, 0xe0, 0x33,
+  0x4b, 0x50, 0x36, 0xb6, 0x93, 0x02, 0x11, 0x9f, 0x59, 0x82, 0xb2, 0x19,
+  0x8e, 0x48, 0x1d, 0x9e, 0x14, 0x84, 0x6f, 0x96, 0x41, 0x6c, 0xca, 0x26,
+  0x30, 0xd5, 0xe9, 0x49, 0x21, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98,
+  0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x28, 0x4f,
+  0x41, 0x87, 0x1b, 0x82, 0xf1, 0x14, 0xc0, 0x60, 0x96, 0x61, 0x6c, 0xc8,
+  0x26, 0xb0, 0xb2, 0x14, 0x86, 0xf8, 0xcc, 0x12, 0x94, 0x8d, 0x11, 0x6a,
+  0x29, 0xc0, 0x67, 0x96, 0xa0, 0x6c, 0x06, 0x5a, 0x1e, 0xed, 0x67, 0x30,
+  0xb0, 0x21, 0xc6, 0x46, 0x20, 0x1b, 0xb4, 0x0b, 0x9b, 0x0b, 0x86, 0xb9,
+  0xe0, 0xa9, 0xdb, 0x9e, 0x3a, 0x9b, 0x14, 0x86, 0xb9, 0x35, 0x14, 0x86,
+  0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83,
+  0x6f, 0x3e, 0x85, 0xf1, 0x14, 0x44, 0x53, 0x80, 0x4f, 0x61, 0x34, 0x21,
+  0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1,
+  0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0xd3, 0x4f, 0x41,
+  0x3d, 0x85, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0xdb,
+  0x4f, 0x61, 0x3d, 0x85, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
+  0x80, 0xe3, 0x4f, 0x81, 0x3d, 0x85, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00,
+  0x04, 0xc1, 0x60, 0x21, 0x51, 0x61, 0x3d, 0x05, 0xd6, 0x14, 0x82, 0xfb,
+  0x14, 0x78, 0x53, 0xc8, 0x4f, 0x61, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6a,
+  0x96, 0xc0, 0x6c, 0x06, 0x5a, 0x1e, 0xd3, 0x88, 0x19, 0xbd, 0x25, 0x60,
+  0x86, 0x25, 0x66, 0x46, 0x28, 0x1b, 0xbd, 0x25, 0x68, 0xc6, 0xfe, 0x36,
+  0x98, 0x4d, 0x01, 0x3e, 0xb3, 0x0c, 0x67, 0x93, 0x36, 0x7d, 0x1b, 0x0c,
+  0x47, 0x84, 0x6e, 0xf0, 0x9b, 0xc2, 0xf0, 0x9d, 0xe8, 0x06, 0xc3, 0x0c,
+  0x37, 0x04, 0xaa, 0x29, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0x14, 0xe3,
+  0x29, 0x0c, 0x5f, 0x05, 0x82, 0xde, 0x31, 0xcc, 0x70, 0x43, 0xd0, 0x9a,
+  0x02, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0x03, 0xda, 0xf4, 0x4d, 0x70, 0x71,
+  0x29, 0x0c, 0x73, 0xa6, 0x28, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08,
+  0x06, 0x1f, 0x8b, 0x0a, 0xfc, 0x29, 0xec, 0xa6, 0x90, 0xa2, 0xc2, 0x68,
+  0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10,
+  0x43, 0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x37, 0xa3,
+  0xc2, 0x88, 0x0a, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01,
+  0x47, 0xa3, 0x02, 0x89, 0x0a, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
+  0x82, 0x01, 0x57, 0xa3, 0x42, 0x89, 0x0a, 0x12, 0x11, 0x8c, 0x18, 0x28,
+  0x00, 0x08, 0x82, 0xc1, 0xd2, 0xa3, 0x02, 0x89, 0x0a, 0xe5, 0x29, 0x04,
+  0x30, 0x2a, 0xd4, 0xa7, 0x20, 0xa3, 0xc2, 0x68, 0x42, 0x00, 0x5c, 0xf0,
+  0xd4, 0x2c, 0x41, 0xdf, 0x0c, 0x37, 0xd4, 0x6e, 0x60, 0xa3, 0x02, 0x18,
+  0xcc, 0x32, 0xa8, 0xcd, 0xda, 0x04, 0x05, 0x9e, 0xc2, 0x89, 0x0a, 0x70,
+  0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xfd, 0xa8, 0x80,
+  0xa2, 0xc2, 0xee, 0x06, 0xf9, 0x29, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82,
+  0x01, 0x05, 0xa6, 0x02, 0x8a, 0x0a, 0x81, 0x70, 0xc1, 0x30, 0x35, 0x9e,
+  0x02, 0x8b, 0x0a, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
+  0x40, 0x91, 0xa9, 0xd0, 0xa2, 0x02, 0x18, 0xf8, 0xa7, 0x30, 0x62, 0x70,
+  0x00, 0x20, 0x08, 0x06, 0x54, 0x99, 0x0a, 0x2d, 0x2a, 0x04, 0xc2, 0x05,
+  0xc3, 0x5c, 0xf0, 0xd4, 0x1d, 0x4f, 0x1d, 0x6b, 0x0a, 0xc3, 0x5c, 0x38,
+  0x0a, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08,
+  0x82, 0xc1, 0x97, 0xa6, 0x42, 0x8e, 0x0a, 0xf8, 0x29, 0x98, 0xa9, 0x30,
+  0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09,
+  0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xc1,
+  0xa9, 0x00, 0xa6, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
+  0xc0, 0xc5, 0xa9, 0x10, 0xa6, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00,
+  0x82, 0x60, 0xc0, 0xc9, 0xa9, 0x20, 0xa6, 0x42, 0x42, 0x04, 0x23, 0x06,
+  0x0a, 0x00, 0x82, 0x60, 0xb0, 0xe8, 0xa9, 0x10, 0xa6, 0x82, 0x88, 0x0a,
+  0x41, 0x9b, 0x0a, 0x32, 0x2a, 0xbc, 0xa9, 0x30, 0x9a, 0x10, 0x00, 0x17,
+  0x3c, 0x35, 0x4b, 0xd0, 0x37, 0xc3, 0x0d, 0xf2, 0x1b, 0xc8, 0xa9, 0x00,
+  0x06, 0xb3, 0x0c, 0x6c, 0xd3, 0x37, 0x81, 0xd1, 0xa7, 0x60, 0x9f, 0x42,
+  0x7c, 0x86, 0x23, 0xee, 0x37, 0xb8, 0x4f, 0x81, 0xf8, 0x66, 0x19, 0xda,
+  0x06, 0x6e, 0x02, 0xc3, 0x4f, 0x01, 0x7f, 0x83, 0xf8, 0x58, 0x30, 0xd0,
+  0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1,
+  0x29, 0x82, 0x4f, 0x05, 0x1d, 0x6e, 0x08, 0xf4, 0x54, 0x00, 0x83, 0x59,
+  0x06, 0xb7, 0x79, 0x9b, 0xc0, 0x06, 0x10, 0x15, 0xe0, 0x33, 0x4b, 0x40,
+  0x37, 0xf6, 0x9f, 0x02, 0x11, 0x9f, 0x59, 0x02, 0xba, 0x19, 0x8e, 0x10,
+  0xe1, 0x00, 0x44, 0x05, 0xe1, 0x9b, 0x65, 0x88, 0x1b, 0xba, 0x09, 0x6c,
+  0x84, 0x83, 0x10, 0x15, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9,
+  0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x53, 0x15,
+  0x74, 0xb8, 0x21, 0x28, 0x55, 0x01, 0x0c, 0x66, 0x19, 0xe4, 0x66, 0x6e,
+  0x02, 0x4b, 0x51, 0x61, 0x88, 0xcf, 0x2c, 0x01, 0xdd, 0x18, 0xc1, 0xa2,
+  0x02, 0x7c, 0x66, 0x09, 0xe8, 0x66, 0xa0, 0xe5, 0xd1, 0xdc, 0x06, 0x7b,
+  0x1b, 0x42, 0x6e, 0x84, 0xb9, 0xd1, 0xc1, 0x01, 0x6e, 0x2e, 0x18, 0xc6,
+  0x56, 0x54, 0x78, 0x51, 0x21, 0x3e, 0xc3, 0x11, 0xb0, 0x00, 0xa3, 0x02,
+  0xf1, 0xcd, 0x32, 0xd4, 0x0d, 0xde, 0x04, 0x16, 0xa3, 0x42, 0x2c, 0xc4,
+  0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2,
+  0xb1, 0x22, 0x88, 0x4f, 0x11, 0xb5, 0x2a, 0xe8, 0x70, 0x43, 0x30, 0xab,
+  0x02, 0x18, 0xcc, 0x32, 0xd8, 0xcd, 0xdd, 0x04, 0x36, 0xe4, 0xa8, 0x00,
+  0x9f, 0x59, 0x02, 0xbe, 0x31, 0x1b, 0x15, 0x88, 0xf8, 0xcc, 0x12, 0xf0,
+  0xcd, 0x70, 0xc4, 0x2e, 0xdc, 0xa8, 0x20, 0x7c, 0xb3, 0x0c, 0x79, 0xc3,
+  0x37, 0x81, 0xf1, 0x02, 0x8e, 0x0a, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05,
+  0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04,
+  0xb8, 0x0a, 0x3a, 0xdc, 0x10, 0xf8, 0xaa, 0x00, 0x06, 0xb3, 0x0c, 0x7a,
+  0xb3, 0x37, 0x81, 0x81, 0xa9, 0x30, 0xc4, 0x67, 0x96, 0x80, 0x6f, 0x8c,
+  0x28, 0x53, 0x01, 0x3e, 0xb3, 0x04, 0x7c, 0x33, 0xd0, 0xf2, 0x68, 0x76,
+  0x83, 0xdd, 0x0d, 0xa1, 0x37, 0xc2, 0xde, 0xb0, 0x06, 0xde, 0x5c, 0x30,
+  0xcc, 0x05, 0x4f, 0xdd, 0xf6, 0xd4, 0xc5, 0xa8, 0x30, 0xcc, 0x99, 0xa5,
+  0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20,
+  0x18, 0x7c, 0xee, 0x2a, 0xf8, 0xaa, 0xd0, 0xa7, 0xc2, 0xba, 0x0a, 0xa3,
+  0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40,
+  0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5c, 0xbd,
+  0x0a, 0xe5, 0x2a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
+  0x9c, 0xbd, 0x0a, 0xe6, 0x2a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20,
+  0x08, 0x06, 0xdc, 0xbd, 0x0a, 0xe7, 0x2a, 0x24, 0x44, 0x30, 0x62, 0xa0,
+  0x00, 0x20, 0x08, 0x06, 0xcb, 0xbf, 0x0a, 0xe6, 0x2a, 0x9c, 0xaa, 0x10,
+  0xc8, 0xab, 0x70, 0xab, 0x02, 0xbd, 0x0a, 0xa3, 0x09, 0x01, 0x70, 0xc1,
+  0x53, 0xb3, 0x04, 0x7d, 0x33, 0xd0, 0xf2, 0x98, 0x06, 0xda, 0xc0, 0x35,
+  0x71, 0x36, 0x2c, 0xa1, 0x36, 0x02, 0xdf, 0xc0, 0x35, 0xb1, 0x36, 0xb3,
+  0x0c, 0x7e, 0x03, 0x3a, 0x77, 0x1c, 0x0c, 0x47, 0xf0, 0x71, 0x90, 0xab,
+  0xc2, 0xf0, 0x5d, 0x1f, 0x07, 0xc3, 0x0c, 0x37, 0x04, 0xa4, 0x2a, 0x90,
+  0x41, 0x0d, 0x81, 0x0e, 0x47, 0xfc, 0x43, 0xaf, 0x0a, 0xc3, 0x57, 0x81,
+  0xa0, 0x17, 0x12, 0xc3, 0x0c, 0x37, 0x04, 0xa7, 0x2a, 0x90, 0x41, 0x05,
+  0x83, 0xce, 0x32, 0xfc, 0x0d, 0xed, 0x04, 0xb7, 0xa6, 0xc2, 0x30, 0x07,
+  0x9a, 0xc2, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xf0, 0x99, 0xac,
+  0x60, 0xaf, 0x42, 0xad, 0x0a, 0x23, 0x2b, 0x8c, 0x26, 0x04, 0xc0, 0x68,
+  0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8,
+  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x2d, 0x2b, 0xf4, 0xab, 0x70,
+  0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x2e, 0x2b, 0xf8,
+  0xab, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x2f,
+  0x2b, 0xfc, 0xab, 0x20, 0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18,
+  0x2c, 0x37, 0x2b, 0xf8, 0xab, 0xf0, 0xab, 0x42, 0xa0, 0xb2, 0xc2, 0xbb,
+  0x0a, 0x2c, 0x2b, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x4f, 0xcd, 0x12, 0xd0,
+  0xce, 0x70, 0xc3, 0x2b, 0x07, 0x30, 0x2b, 0x80, 0xc1, 0x2c, 0x43, 0xe8,
+  0x88, 0x4e, 0x50, 0xba, 0x2a, 0x84, 0xac, 0x00, 0x17, 0x3c, 0x35, 0x62,
+  0x70, 0x00, 0x20, 0x08, 0x06, 0x54, 0xce, 0x0a, 0x22, 0x2b, 0xd8, 0x72,
+  0x30, 0xaf, 0xc2, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0x3a, 0x2b,
+  0x88, 0xac, 0x10, 0x08, 0x17, 0x0c, 0x53, 0xbd, 0x2a, 0x98, 0xac, 0x00,
+  0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x94, 0xcf, 0x0a,
+  0x27, 0x2b, 0xe8, 0x04, 0xbe, 0x0a, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
+  0x40, 0xfd, 0xac, 0x70, 0xb2, 0x42, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x4f,
+  0xdd, 0xf1, 0xd4, 0x99, 0xaa, 0x30, 0xcc, 0xed, 0xa6, 0x30, 0xcc, 0x11,
+  0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x7c, 0x63,
+  0x2b, 0xcc, 0xac, 0x20, 0xaf, 0x02, 0xd8, 0x0a, 0xa3, 0x09, 0x01, 0x30,
+  0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24,
+  0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x9c, 0xda, 0x0a, 0x3a, 0x2b,
+  0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdc, 0xda, 0x0a,
+  0x3b, 0x2b, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x1c,
+  0xdb, 0x0a, 0x3c, 0x2b, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08,
+  0x06, 0x0b, 0xdd, 0x0a, 0x3b, 0x2b, 0xf0, 0xab, 0x10, 0x9c, 0xad, 0xc0,
+  0xb2, 0x42, 0xda, 0x0a, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x53, 0xb3, 0x04,
+  0xb4, 0x33, 0xdc, 0xc0, 0xce, 0x01, 0xdb, 0x0a, 0x60, 0x30, 0xcb, 0x30,
+  0x3a, 0xb4, 0x13, 0x98, 0xbb, 0x0a, 0xf0, 0x2a, 0xc4, 0x67, 0x38, 0x42,
+  0x9e, 0x83, 0x78, 0x15, 0x88, 0x6f, 0x96, 0x81, 0x74, 0x4e, 0x27, 0x30,
+  0x79, 0x15, 0xe6, 0x39, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6,
+  0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xec, 0x56,
+  0xd0, 0xe1, 0x86, 0x80, 0x6e, 0x05, 0x30, 0x98, 0x65, 0x28, 0x1d, 0xd3,
+  0x09, 0x6c, 0xd0, 0x57, 0x01, 0x3e, 0xb3, 0x04, 0xab, 0x63, 0xf9, 0x2a,
+  0x10, 0xf1, 0x99, 0x25, 0x58, 0x9d, 0xe1, 0x88, 0x7e, 0x0e, 0xf4, 0x55,
+  0x10, 0xbe, 0x59, 0x06, 0xd4, 0x59, 0x9d, 0xc0, 0xfc, 0x39, 0xd8, 0x57,
   0x21, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20,
-  0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x28, 0x4f, 0x41, 0x87, 0x1b, 0x82,
-  0xf1, 0x14, 0xc0, 0x60, 0x96, 0x61, 0x6c, 0xc8, 0x26, 0xb0, 0xb2, 0x14,
-  0x86, 0xf8, 0xcc, 0x12, 0x94, 0x8d, 0x11, 0x6a, 0x29, 0xc0, 0x67, 0x96,
-  0xa0, 0x6c, 0x06, 0x5a, 0x1e, 0xed, 0x67, 0x30, 0xb0, 0x21, 0xc6, 0x46,
-  0x20, 0x1b, 0xb4, 0x0b, 0x9b, 0x0b, 0x86, 0xb9, 0xe0, 0xa9, 0xdb, 0x9e,
-  0x3a, 0x9b, 0x14, 0x86, 0xb9, 0x35, 0x14, 0x86, 0x39, 0x62, 0x98, 0x23,
-  0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x6f, 0x3e, 0x85, 0xf1,
-  0x14, 0x44, 0x53, 0x80, 0x4f, 0x61, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84,
-  0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0x80, 0xd3, 0x4f, 0x41, 0x3d, 0x85, 0x84, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0xdb, 0x4f, 0x61, 0x3d, 0x85,
-  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0xe3, 0x4f, 0x81,
-  0x3d, 0x85, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x21,
-  0x51, 0x61, 0x3d, 0x05, 0xd6, 0x14, 0x82, 0xfb, 0x14, 0x78, 0x53, 0xc8,
-  0x4f, 0x61, 0x34, 0x21, 0x00, 0x2e, 0x78, 0x6a, 0x96, 0xc0, 0x6c, 0x06,
-  0x5a, 0x1e, 0xd3, 0x88, 0x19, 0xbd, 0x25, 0x60, 0x86, 0x25, 0x66, 0x46,
-  0x28, 0x1b, 0xbd, 0x25, 0x68, 0xc6, 0xfe, 0x36, 0x98, 0x4d, 0x01, 0x3e,
-  0xb3, 0x0c, 0x67, 0x93, 0x36, 0x7d, 0x1b, 0x0c, 0x47, 0x84, 0x6e, 0xf0,
-  0x9b, 0xc2, 0xf0, 0x9d, 0xe8, 0x06, 0xc3, 0x0c, 0x37, 0x04, 0xaa, 0x29,
-  0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0x14, 0xe3, 0x29, 0x0c, 0x5f, 0x05,
-  0x82, 0xde, 0x31, 0xcc, 0x70, 0x43, 0xd0, 0x9a, 0x02, 0x19, 0x54, 0x30,
-  0xe8, 0x2c, 0x03, 0xda, 0xf4, 0x4d, 0x70, 0x71, 0x29, 0x0c, 0x73, 0xa6,
-  0x28, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x1f, 0x8b, 0x0a,
-  0xfc, 0x29, 0xec, 0xa6, 0x90, 0xa2, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x37, 0xa3, 0xc2, 0x88, 0x0a, 0x07,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x47, 0xa3, 0x02, 0x89,
-  0x0a, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x57, 0xa3,
-  0x42, 0x89, 0x0a, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1,
-  0xd2, 0xa3, 0x02, 0x89, 0x0a, 0xe5, 0x29, 0x04, 0x30, 0x2a, 0xd4, 0xa7,
-  0x20, 0xa3, 0xc2, 0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd4, 0x2c, 0x41, 0xdf,
-  0x0c, 0x37, 0xd4, 0x6e, 0x60, 0xa3, 0x02, 0x18, 0xcc, 0x32, 0xa8, 0xcd,
-  0xda, 0x04, 0x05, 0x9e, 0xc2, 0x89, 0x0a, 0x70, 0xc1, 0x53, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x40, 0xfd, 0xa8, 0x80, 0xa2, 0xc2, 0xee, 0x06,
-  0xf9, 0x29, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x05, 0xa6, 0x02,
-  0x8a, 0x0a, 0x81, 0x70, 0xc1, 0x30, 0x35, 0x9e, 0x02, 0x8b, 0x0a, 0x70,
-  0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0x91, 0xa9, 0xd0,
-  0xa2, 0x02, 0x18, 0xf8, 0xa7, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x54, 0x99, 0x0a, 0x2d, 0x2a, 0x04, 0xc2, 0x05, 0xc3, 0x5c, 0xf0, 0xd4,
-  0x1d, 0x4f, 0x1d, 0x6b, 0x0a, 0xc3, 0x5c, 0x38, 0x0a, 0xc3, 0x1c, 0x31,
-  0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x97, 0xa6,
-  0x42, 0x8e, 0x0a, 0xf8, 0x29, 0x98, 0xa9, 0x30, 0x9a, 0x10, 0x00, 0xa3,
+  0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x08, 0x5d, 0x41, 0x87, 0x1b, 0x82,
+  0xbf, 0x15, 0xc0, 0x60, 0x96, 0x21, 0x75, 0x54, 0x27, 0xb0, 0x91, 0x15,
+  0x86, 0xf8, 0xcc, 0x12, 0xac, 0x8e, 0x11, 0x26, 0x2b, 0xc0, 0x67, 0x96,
+  0x60, 0x75, 0x06, 0x5a, 0x1e, 0xad, 0x74, 0x30, 0xd3, 0x21, 0x52, 0x47,
+  0x50, 0x1d, 0x18, 0x1d, 0x4e, 0xe7, 0x82, 0x61, 0xac, 0x64, 0x85, 0x94,
+  0x15, 0xe2, 0x33, 0x1c, 0xa1, 0x1a, 0x2a, 0x2b, 0x10, 0xdf, 0x2c, 0x03,
+  0xeb, 0xbc, 0x4e, 0x60, 0x2b, 0x2b, 0xac, 0x46, 0x7c, 0x2c, 0x18, 0xe8,
+  0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8,
+  0x14, 0xf1, 0xba, 0x82, 0x0e, 0x37, 0x04, 0xad, 0x2b, 0x80, 0xc1, 0x2c,
+  0x43, 0xeb, 0xb8, 0x4e, 0x60, 0xc3, 0xcc, 0x0a, 0xf0, 0x99, 0x25, 0x98,
+  0x1d, 0x83, 0x59, 0x81, 0x88, 0xcf, 0x2c, 0xc1, 0xec, 0x0c, 0x47, 0xd4,
+  0x46, 0xcc, 0x0a, 0xc2, 0x37, 0xcb, 0x00, 0x3b, 0xb3, 0x13, 0x98, 0x6d,
+  0xc8, 0xac, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f,
+  0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xe8, 0xae, 0xa0, 0xc3,
+  0x0d, 0x01, 0xee, 0x0a, 0x60, 0x30, 0xcb, 0x10, 0x3b, 0xb2, 0x13, 0x98,
+  0xce, 0x0a, 0x43, 0x7c, 0x66, 0x09, 0x66, 0xc7, 0x88, 0x9f, 0x15, 0xe0,
+  0x33, 0x4b, 0x30, 0x3b, 0x03, 0x2d, 0x8f, 0xd6, 0x3a, 0x98, 0xeb, 0x10,
+  0xb1, 0x23, 0xc8, 0x0e, 0xe8, 0xbc, 0xce, 0x05, 0xc3, 0x5c, 0xf0, 0xd4,
+  0x6d, 0x4f, 0xdd, 0xca, 0x0a, 0xc3, 0x1c, 0x98, 0x0a, 0xc3, 0x1c, 0x31,
+  0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x87, 0xbe,
+  0x02, 0xee, 0x0a, 0x77, 0x2b, 0x94, 0xaf, 0x30, 0x9a, 0x10, 0x00, 0xa3,
   0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xc1, 0xa9, 0x00, 0xa6, 0x42,
-  0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xc5, 0xa9, 0x10,
-  0xa6, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xc9,
-  0xa9, 0x20, 0xa6, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60,
-  0xb0, 0xe8, 0xa9, 0x10, 0xa6, 0x82, 0x88, 0x0a, 0x41, 0x9b, 0x0a, 0x32,
-  0x2a, 0xbc, 0xa9, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x35, 0x4b, 0xd0,
-  0x37, 0xc3, 0x0d, 0xf2, 0x1b, 0xc8, 0xa9, 0x00, 0x06, 0xb3, 0x0c, 0x6c,
-  0xd3, 0x37, 0x81, 0xd1, 0xa7, 0x60, 0x9f, 0x42, 0x7c, 0x86, 0x23, 0xee,
-  0x37, 0xb8, 0x4f, 0x81, 0xf8, 0x66, 0x19, 0xda, 0x06, 0x6e, 0x02, 0xc3,
-  0x4f, 0x01, 0x7f, 0x83, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e,
-  0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x82, 0x4f, 0x05,
-  0x1d, 0x6e, 0x08, 0xf4, 0x54, 0x00, 0x83, 0x59, 0x06, 0xb7, 0x79, 0x9b,
-  0xc0, 0x06, 0x10, 0x15, 0xe0, 0x33, 0x4b, 0x40, 0x37, 0xf6, 0x9f, 0x02,
-  0x11, 0x9f, 0x59, 0x02, 0xba, 0x19, 0x8e, 0x10, 0xe1, 0x00, 0x44, 0x05,
-  0xe1, 0x9b, 0x65, 0x88, 0x1b, 0xba, 0x09, 0x6c, 0x84, 0x83, 0x10, 0x15,
-  0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22,
-  0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x53, 0x15, 0x74, 0xb8, 0x21, 0x28,
-  0x55, 0x01, 0x0c, 0x66, 0x19, 0xe4, 0x66, 0x6e, 0x02, 0x4b, 0x51, 0x61,
-  0x88, 0xcf, 0x2c, 0x01, 0xdd, 0x18, 0xc1, 0xa2, 0x02, 0x7c, 0x66, 0x09,
-  0xe8, 0x66, 0xa0, 0xe5, 0xd1, 0xdc, 0x06, 0x7b, 0x1b, 0x42, 0x6e, 0x84,
-  0xb9, 0xd1, 0xc1, 0x01, 0x6e, 0x2e, 0x18, 0xc6, 0x56, 0x54, 0x78, 0x51,
-  0x21, 0x3e, 0xc3, 0x11, 0xb0, 0x00, 0xa3, 0x02, 0xf1, 0xcd, 0x32, 0xd4,
-  0x0d, 0xde, 0x04, 0x16, 0xa3, 0x42, 0x2c, 0xc4, 0xc7, 0x82, 0x81, 0x3e,
-  0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f,
-  0x11, 0xb5, 0x2a, 0xe8, 0x70, 0x43, 0x30, 0xab, 0x02, 0x18, 0xcc, 0x32,
-  0xd8, 0xcd, 0xdd, 0x04, 0x36, 0xe4, 0xa8, 0x00, 0x9f, 0x59, 0x02, 0xbe,
-  0x31, 0x1b, 0x15, 0x88, 0xf8, 0xcc, 0x12, 0xf0, 0xcd, 0x70, 0xc4, 0x2e,
-  0xdc, 0xa8, 0x20, 0x7c, 0xb3, 0x0c, 0x79, 0xc3, 0x37, 0x81, 0xf1, 0x02,
-  0x8e, 0x0a, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94,
-  0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0xb8, 0x0a, 0x3a, 0xdc,
-  0x10, 0xf8, 0xaa, 0x00, 0x06, 0xb3, 0x0c, 0x7a, 0xb3, 0x37, 0x81, 0x81,
-  0xa9, 0x30, 0xc4, 0x67, 0x96, 0x80, 0x6f, 0x8c, 0x28, 0x53, 0x01, 0x3e,
-  0xb3, 0x04, 0x7c, 0x33, 0xd0, 0xf2, 0x68, 0x76, 0x83, 0xdd, 0x0d, 0xa1,
-  0x37, 0xc2, 0xde, 0xb0, 0x06, 0xde, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd,
-  0xf6, 0xd4, 0xc5, 0xa8, 0x30, 0xcc, 0x99, 0xa5, 0x30, 0xcc, 0x11, 0xc3,
-  0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x7c, 0xee, 0x2a,
-  0xf8, 0xaa, 0xd0, 0xa7, 0xc2, 0xba, 0x0a, 0xa3, 0x09, 0x01, 0x30, 0x9a,
-  0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5c, 0xbd, 0x0a, 0xe5, 0x2a, 0x24,
-  0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x9c, 0xbd, 0x0a, 0xe6,
-  0x2a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdc, 0xbd,
-  0x0a, 0xe7, 0x2a, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06,
-  0xcb, 0xbf, 0x0a, 0xe6, 0x2a, 0x9c, 0xaa, 0x10, 0xc8, 0xab, 0x70, 0xab,
-  0x02, 0xbd, 0x0a, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x53, 0xb3, 0x04, 0x7d,
-  0x33, 0xd0, 0xf2, 0x98, 0x06, 0xda, 0xc0, 0x35, 0x71, 0x36, 0x2c, 0xa1,
-  0x36, 0x02, 0xdf, 0xc0, 0x35, 0xb1, 0x36, 0xb3, 0x0c, 0x7e, 0x03, 0x3a,
-  0x77, 0x1c, 0x0c, 0x47, 0xf0, 0x71, 0x90, 0xab, 0xc2, 0xf0, 0x5d, 0x1f,
-  0x07, 0xc3, 0x0c, 0x37, 0x04, 0xa4, 0x2a, 0x90, 0x41, 0x0d, 0x81, 0x0e,
-  0x47, 0xfc, 0x43, 0xaf, 0x0a, 0xc3, 0x57, 0x81, 0xa0, 0x17, 0x12, 0xc3,
-  0x0c, 0x37, 0x04, 0xa7, 0x2a, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xfc,
-  0x0d, 0xed, 0x04, 0xb7, 0xa6, 0xc2, 0x30, 0x07, 0x9a, 0xc2, 0x30, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0xf0, 0x99, 0xac, 0x60, 0xaf, 0x42, 0xad,
-  0x0a, 0x23, 0x2b, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26,
-  0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x70, 0x2d, 0x2b, 0xf4, 0xab, 0x70, 0x10, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x70, 0x2e, 0x2b, 0xf8, 0xab, 0xc0, 0x10, 0xc1,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x2f, 0x2b, 0xfc, 0xab, 0x20,
-  0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x37, 0x2b, 0xf8,
-  0xab, 0xf0, 0xab, 0x42, 0xa0, 0xb2, 0xc2, 0xbb, 0x0a, 0x2c, 0x2b, 0x8c,
-  0x26, 0x04, 0xc0, 0x05, 0x4f, 0xcd, 0x12, 0xd0, 0xce, 0x70, 0xc3, 0x2b,
-  0x07, 0x30, 0x2b, 0x80, 0xc1, 0x2c, 0x43, 0xe8, 0x88, 0x4e, 0x50, 0xba,
-  0x2a, 0x84, 0xac, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x54, 0xce, 0x0a, 0x22, 0x2b, 0xd8, 0x72, 0x30, 0xaf, 0xc2, 0x88,
-  0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0x3a, 0x2b, 0x88, 0xac, 0x10, 0x08,
-  0x17, 0x0c, 0x53, 0xbd, 0x2a, 0x98, 0xac, 0x00, 0x17, 0x3c, 0x35, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x94, 0xcf, 0x0a, 0x27, 0x2b, 0xe8, 0x04,
-  0xbe, 0x0a, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xfd, 0xac, 0x70,
-  0xb2, 0x42, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf1, 0xd4, 0x99,
-  0xaa, 0x30, 0xcc, 0xed, 0xa6, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x7c, 0x63, 0x2b, 0xcc, 0xac, 0x20,
-  0xaf, 0x02, 0xd8, 0x0a, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3,
+  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xbd, 0xaf, 0xf0, 0xbb, 0x42,
+  0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xc1, 0xaf, 0x00,
+  0xbe, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xc5,
+  0xaf, 0x10, 0xbe, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60,
+  0xb0, 0xe4, 0xaf, 0x00, 0xbe, 0x42, 0xe8, 0x0a, 0x01, 0xfb, 0x0a, 0xb1,
+  0x2b, 0xb8, 0xaf, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x35, 0x4b, 0x40,
+  0x3b, 0x03, 0x2d, 0x8f, 0x69, 0xfc, 0x0d, 0xd9, 0x13, 0x7e, 0xc3, 0x12,
+  0xa1, 0x23, 0xcc, 0x0e, 0xd9, 0x13, 0xa2, 0x33, 0xcb, 0x50, 0x3b, 0xb7,
+  0x13, 0xd7, 0xc1, 0x70, 0x04, 0xdf, 0x06, 0xb3, 0x2b, 0x0c, 0xdf, 0xf5,
+  0x6d, 0x30, 0xcc, 0x70, 0x43, 0xe0, 0xb7, 0x02, 0x19, 0xd4, 0x10, 0xe8,
+  0x70, 0x44, 0x7e, 0xdc, 0xae, 0x30, 0x7c, 0x15, 0x08, 0x7a, 0xfb, 0x31,
+  0xcc, 0x70, 0x43, 0x10, 0xba, 0x02, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0x83,
+  0xed, 0xac, 0x4f, 0x70, 0x65, 0x2b, 0x0c, 0x73, 0x7a, 0x2a, 0x0c, 0x33,
+  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x1f, 0x08, 0x0b, 0xf0, 0x2b, 0xbc,
+  0xae, 0xd0, 0xbf, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68,
+  0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c, 0x18, 0x20, 0x00,
+  0x08, 0x82, 0x01, 0x77, 0xc2, 0xc2, 0xfd, 0x0a, 0x07, 0x11, 0x8c, 0x18,
+  0x20, 0x00, 0x08, 0x82, 0x01, 0x87, 0xc2, 0x02, 0xfe, 0x0a, 0x0c, 0x11,
+  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x97, 0xc2, 0x42, 0xfe, 0x0a,
+  0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x12, 0xc3, 0x02,
+  0xfe, 0x0a, 0xb9, 0x2b, 0x04, 0x24, 0x2c, 0xa4, 0xaf, 0x60, 0xc2, 0xc2,
+  0x68, 0x42, 0x00, 0x5c, 0xf0, 0xd4, 0x2c, 0xc1, 0xfa, 0x0c, 0x37, 0xa4,
+  0x76, 0xa0, 0xc2, 0x02, 0x18, 0xcc, 0x32, 0xe0, 0x4e, 0xee, 0x04, 0x45,
+  0xbb, 0xc2, 0xfe, 0x0a, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82,
+  0x60, 0x40, 0xcd, 0xb0, 0xc0, 0xbf, 0x82, 0xed, 0x06, 0xed, 0x2b, 0x8c,
+  0x18, 0x1c, 0x00, 0x08, 0x82, 0x01, 0x45, 0xc3, 0x02, 0xff, 0x0a, 0x81,
+  0x70, 0xc1, 0x30, 0x75, 0xbb, 0x02, 0x08, 0x0b, 0x70, 0xc1, 0x53, 0x23,
+  0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xe1, 0xb0, 0x10, 0xc2, 0x02, 0x8d,
+  0xc8, 0xaf, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x54, 0x0e, 0x0b,
+  0x21, 0x2c, 0x04, 0xc2, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x1d, 0x4f, 0x1d,
+  0xe8, 0x0a, 0xc3, 0x5c, 0xad, 0x0a, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3,
+  0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0xd7, 0xc3, 0x42, 0x0b, 0x0b,
+  0xec, 0x2b, 0xe8, 0xb0, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30,
+  0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08,
+  0x00, 0x82, 0x60, 0xc0, 0x91, 0xb1, 0x40, 0xc3, 0x42, 0x42, 0x04, 0x23,
+  0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0x95, 0xb1, 0x50, 0xc3, 0x42, 0x42,
+  0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0x99, 0xb1, 0x60, 0xc3,
+  0x42, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xb8, 0xb1,
+  0x50, 0xc3, 0x82, 0xfd, 0x0a, 0x41, 0x18, 0x0b, 0x26, 0x2c, 0x8c, 0xb1,
+  0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x35, 0x4b, 0xb0, 0x3e, 0xc3, 0x0d,
+  0xe6, 0x1d, 0x98, 0xb1, 0x00, 0x06, 0xb3, 0x0c, 0xba, 0xb3, 0x3e, 0x81,
+  0xa1, 0xaf, 0xa0, 0xbe, 0x42, 0x7c, 0x86, 0x23, 0xe4, 0x37, 0x58, 0x5f,
+  0x81, 0xf8, 0x66, 0x19, 0x76, 0xc7, 0x77, 0x02, 0x63, 0x5f, 0x61, 0x7e,
+  0x83, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02,
+  0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x02, 0x8e, 0x05, 0x1d, 0x6e, 0x08,
+  0xdc, 0x58, 0x00, 0x83, 0x59, 0x06, 0xde, 0xe9, 0x9d, 0xc0, 0x06, 0xfa,
+  0x15, 0xe0, 0x33, 0x4b, 0x20, 0x3e, 0x36, 0xbf, 0x02, 0x11, 0x9f, 0x59,
+  0x02, 0xf1, 0x19, 0x8e, 0xe8, 0xdf, 0x80, 0x7e, 0x05, 0xe1, 0x9b, 0x65,
+  0xf8, 0x1d, 0xf1, 0x09, 0xcc, 0x7f, 0x83, 0xfa, 0x15, 0xe2, 0x63, 0x81,
+  0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11,
+  0xc4, 0xa7, 0x88, 0x3d, 0x16, 0x74, 0xb8, 0x21, 0xc8, 0x63, 0x01, 0x0c,
+  0x66, 0x19, 0xc0, 0x27, 0x7c, 0x02, 0xeb, 0x5f, 0x61, 0x88, 0xcf, 0x2c,
+  0x81, 0xf8, 0x18, 0x01, 0xc2, 0x02, 0x7c, 0x66, 0x09, 0xc4, 0x67, 0xa0,
+  0xe5, 0xd1, 0x78, 0x07, 0xeb, 0x1d, 0x02, 0x7c, 0x84, 0xf0, 0x81, 0xc1,
+  0xc1, 0x77, 0x2e, 0x18, 0xc6, 0xfe, 0x57, 0x18, 0x61, 0x21, 0x3e, 0xc3,
+  0x11, 0xa4, 0x42, 0xc2, 0x02, 0xf1, 0xcd, 0x32, 0x8c, 0x8f, 0xf9, 0x04,
+  0x56, 0xc2, 0x42, 0xa9, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73,
+  0xc1, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xa9, 0x2c,
+  0xe8, 0x70, 0x43, 0x70, 0xca, 0x02, 0x18, 0xcc, 0x32, 0x90, 0x4f, 0xf9,
+  0x04, 0x36, 0xb4, 0xb0, 0x00, 0x9f, 0x59, 0x02, 0xf5, 0x31, 0x15, 0x16,
+  0x88, 0xf8, 0xcc, 0x12, 0xa8, 0xcf, 0x70, 0xc4, 0xab, 0xac, 0xb0, 0x20,
+  0x7c, 0xb3, 0x0c, 0xe7, 0xa3, 0x3e, 0x81, 0xc1, 0x0a, 0x0b, 0x0b, 0xf1,
+  0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c,
+  0xac, 0x08, 0xe2, 0x53, 0x04, 0x2d, 0x0b, 0x3a, 0xdc, 0x10, 0xc8, 0xb2,
+  0x00, 0x06, 0xb3, 0x0c, 0xe8, 0x93, 0x3e, 0x81, 0xd1, 0xb0, 0x30, 0xc4,
+  0x67, 0x96, 0x40, 0x7d, 0x8c, 0xc8, 0x61, 0x01, 0x3e, 0xb3, 0x04, 0xea,
+  0x33, 0xd0, 0xf2, 0x68, 0xe4, 0x83, 0x95, 0x0f, 0x81, 0x3e, 0x42, 0xfa,
+  0xd0, 0x95, 0xf9, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf6, 0xd4, 0x95,
+  0xb0, 0x30, 0xcc, 0xe9, 0xac, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc,
+  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x7c, 0xe2, 0x2c, 0xc8, 0xb2, 0x10,
+  0xc7, 0xc2, 0x2f, 0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3,
   0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x9c, 0xda, 0x0a, 0x3a, 0x2b, 0x24, 0x44, 0x30, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0xdc, 0xda, 0x0a, 0x3b, 0x2b, 0x24, 0x44,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x1c, 0xdb, 0x0a, 0x3c, 0x2b,
-  0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x0b, 0xdd, 0x0a,
-  0x3b, 0x2b, 0xf0, 0xab, 0x10, 0x9c, 0xad, 0xc0, 0xb2, 0x42, 0xda, 0x0a,
-  0xa3, 0x09, 0x01, 0x70, 0xc1, 0x53, 0xb3, 0x04, 0xb4, 0x33, 0xdc, 0xc0,
-  0xce, 0x01, 0xdb, 0x0a, 0x60, 0x30, 0xcb, 0x30, 0x3a, 0xb4, 0x13, 0x98,
-  0xbb, 0x0a, 0xf0, 0x2a, 0xc4, 0x67, 0x38, 0x42, 0x9e, 0x83, 0x78, 0x15,
-  0x88, 0x6f, 0x96, 0x81, 0x74, 0x4e, 0x27, 0x30, 0x79, 0x15, 0xe6, 0x39,
-  0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30,
-  0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xec, 0x56, 0xd0, 0xe1, 0x86, 0x80,
-  0x6e, 0x05, 0x30, 0x98, 0x65, 0x28, 0x1d, 0xd3, 0x09, 0x6c, 0xd0, 0x57,
-  0x01, 0x3e, 0xb3, 0x04, 0xab, 0x63, 0xf9, 0x2a, 0x10, 0xf1, 0x99, 0x25,
-  0x58, 0x9d, 0xe1, 0x88, 0x7e, 0x0e, 0xf4, 0x55, 0x10, 0xbe, 0x59, 0x06,
-  0xd4, 0x59, 0x9d, 0xc0, 0xfc, 0x39, 0xd8, 0x57, 0x21, 0x3e, 0x16, 0x38,
-  0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41,
-  0x7c, 0x8a, 0x08, 0x5d, 0x41, 0x87, 0x1b, 0x82, 0xbf, 0x15, 0xc0, 0x60,
-  0x96, 0x21, 0x75, 0x54, 0x27, 0xb0, 0x91, 0x15, 0x86, 0xf8, 0xcc, 0x12,
-  0xac, 0x8e, 0x11, 0x26, 0x2b, 0xc0, 0x67, 0x96, 0x60, 0x75, 0x06, 0x5a,
-  0x1e, 0xad, 0x74, 0x30, 0xd3, 0x21, 0x52, 0x47, 0x50, 0x1d, 0x18, 0x1d,
-  0x4e, 0xe7, 0x82, 0x61, 0xac, 0x64, 0x85, 0x94, 0x15, 0xe2, 0x33, 0x1c,
-  0xa1, 0x1a, 0x2a, 0x2b, 0x10, 0xdf, 0x2c, 0x03, 0xeb, 0xbc, 0x4e, 0x60,
-  0x2b, 0x2b, 0xac, 0x46, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17,
-  0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xf1, 0xba, 0x82,
-  0x0e, 0x37, 0x04, 0xad, 0x2b, 0x80, 0xc1, 0x2c, 0x43, 0xeb, 0xb8, 0x4e,
-  0x60, 0xc3, 0xcc, 0x0a, 0xf0, 0x99, 0x25, 0x98, 0x1d, 0x83, 0x59, 0x81,
-  0x88, 0xcf, 0x2c, 0xc1, 0xec, 0x0c, 0x47, 0xd4, 0x46, 0xcc, 0x0a, 0xc2,
-  0x37, 0xcb, 0x00, 0x3b, 0xb3, 0x13, 0x98, 0x6d, 0xc8, 0xac, 0x10, 0x1f,
-  0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7,
-  0x8a, 0x20, 0x3e, 0x45, 0xe8, 0xae, 0xa0, 0xc3, 0x0d, 0x01, 0xee, 0x0a,
-  0x60, 0x30, 0xcb, 0x10, 0x3b, 0xb2, 0x13, 0x98, 0xce, 0x0a, 0x43, 0x7c,
-  0x66, 0x09, 0x66, 0xc7, 0x88, 0x9f, 0x15, 0xe0, 0x33, 0x4b, 0x30, 0x3b,
-  0x03, 0x2d, 0x8f, 0xd6, 0x3a, 0x98, 0xeb, 0x10, 0xb1, 0x23, 0xc8, 0x0e,
-  0xe8, 0xbc, 0xce, 0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x6d, 0x4f, 0xdd, 0xca,
-  0x0a, 0xc3, 0x1c, 0x98, 0x0a, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x87, 0xbe, 0x02, 0xee, 0x0a, 0x77,
-  0x2b, 0x94, 0xaf, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a,
-  0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0xc0, 0xbd, 0xaf, 0xf0, 0xbb, 0x42, 0x42, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0xc0, 0xc1, 0xaf, 0x00, 0xbe, 0x42, 0x42, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xc5, 0xaf, 0x10, 0xbe, 0x42,
-  0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xe4, 0xaf, 0x00,
-  0xbe, 0x42, 0xe8, 0x0a, 0x01, 0xfb, 0x0a, 0xb1, 0x2b, 0xb8, 0xaf, 0x30,
-  0x9a, 0x10, 0x00, 0x17, 0x3c, 0x35, 0x4b, 0x40, 0x3b, 0x03, 0x2d, 0x8f,
-  0x69, 0xfc, 0x0d, 0xd9, 0x13, 0x7e, 0xc3, 0x12, 0xa1, 0x23, 0xcc, 0x0e,
-  0xd9, 0x13, 0xa2, 0x33, 0xcb, 0x50, 0x3b, 0xb7, 0x13, 0xd7, 0xc1, 0x70,
-  0x04, 0xdf, 0x06, 0xb3, 0x2b, 0x0c, 0xdf, 0xf5, 0x6d, 0x30, 0xcc, 0x70,
-  0x43, 0xe0, 0xb7, 0x02, 0x19, 0xd4, 0x10, 0xe8, 0x70, 0x44, 0x7e, 0xdc,
-  0xae, 0x30, 0x7c, 0x15, 0x08, 0x7a, 0xfb, 0x31, 0xcc, 0x70, 0x43, 0x10,
-  0xba, 0x02, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0x83, 0xed, 0xac, 0x4f, 0x70,
-  0x65, 0x2b, 0x0c, 0x73, 0x7a, 0x2a, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0x1f, 0x08, 0x0b, 0xf0, 0x2b, 0xbc, 0xae, 0xd0, 0xbf, 0xc2,
-  0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26,
-  0x10, 0x43, 0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x77,
-  0xc2, 0xc2, 0xfd, 0x0a, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0x01, 0x87, 0xc2, 0x02, 0xfe, 0x0a, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0x01, 0x97, 0xc2, 0x42, 0xfe, 0x0a, 0x12, 0x11, 0x8c, 0x18,
-  0x28, 0x00, 0x08, 0x82, 0xc1, 0x12, 0xc3, 0x02, 0xfe, 0x0a, 0xb9, 0x2b,
-  0x04, 0x24, 0x2c, 0xa4, 0xaf, 0x60, 0xc2, 0xc2, 0x68, 0x42, 0x00, 0x5c,
-  0xf0, 0xd4, 0x2c, 0xc1, 0xfa, 0x0c, 0x37, 0xa4, 0x76, 0xa0, 0xc2, 0x02,
-  0x18, 0xcc, 0x32, 0xe0, 0x4e, 0xee, 0x04, 0x45, 0xbb, 0xc2, 0xfe, 0x0a,
-  0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xcd, 0xb0,
-  0xc0, 0xbf, 0x82, 0xed, 0x06, 0xed, 0x2b, 0x8c, 0x18, 0x1c, 0x00, 0x08,
-  0x82, 0x01, 0x45, 0xc3, 0x02, 0xff, 0x0a, 0x81, 0x70, 0xc1, 0x30, 0x75,
-  0xbb, 0x02, 0x08, 0x0b, 0x70, 0xc1, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82,
-  0x60, 0x40, 0xe1, 0xb0, 0x10, 0xc2, 0x02, 0x8d, 0xc8, 0xaf, 0x30, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x54, 0x0e, 0x0b, 0x21, 0x2c, 0x04, 0xc2,
-  0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x1d, 0x4f, 0x1d, 0xe8, 0x0a, 0xc3, 0x5c,
-  0xad, 0x0a, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00,
-  0x08, 0x82, 0xc1, 0xd7, 0xc3, 0x42, 0x0b, 0x0b, 0xec, 0x2b, 0xe8, 0xb0,
+  0x20, 0x08, 0x06, 0x5c, 0x3a, 0x0b, 0xb9, 0x2c, 0x24, 0x44, 0x30, 0x62,
+  0x80, 0x00, 0x20, 0x08, 0x06, 0x9c, 0x3a, 0x0b, 0xba, 0x2c, 0x24, 0x44,
+  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdc, 0x3a, 0x0b, 0xbb, 0x2c,
+  0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0xcb, 0x3c, 0x0b,
+  0xba, 0x2c, 0xec, 0xb1, 0x10, 0x98, 0xb3, 0xb0, 0xca, 0x02, 0x3a, 0x0b,
+  0xa3, 0x09, 0x01, 0x70, 0xc1, 0x53, 0xb3, 0x04, 0xeb, 0x33, 0xd0, 0xf2,
+  0x98, 0x86, 0xed, 0xe0, 0x64, 0x51, 0x3b, 0x2c, 0x81, 0x3b, 0x82, 0xfa,
+  0xe0, 0x64, 0x91, 0x3b, 0xb3, 0x0c, 0xec, 0xe3, 0x3e, 0x6b, 0x1e, 0x0c,
+  0x47, 0xec, 0x6d, 0xd0, 0xca, 0xc2, 0xf0, 0x1d, 0xdf, 0x06, 0xc3, 0x0c,
+  0x37, 0x04, 0x78, 0x2c, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0xcc, 0x4b,
+  0x2c, 0x0b, 0xc3, 0x57, 0x81, 0xa0, 0x57, 0x2f, 0xc3, 0x0c, 0x37, 0x04,
+  0x7b, 0x2c, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xb4, 0x8f, 0x08, 0x05,
+  0xf7, 0xc3, 0xc2, 0x30, 0x47, 0xb7, 0xc2, 0x30, 0x23, 0x06, 0x07, 0x00,
+  0x82, 0x60, 0xf0, 0xe9, 0xb3, 0xa0, 0xce, 0x42, 0x2a, 0x0b, 0xf7, 0x2c,
+  0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68,
+  0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70,
+  0x21, 0x2d, 0xc4, 0xb3, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20,
+  0x18, 0x70, 0x22, 0x2d, 0xc8, 0xb3, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02,
+  0x80, 0x20, 0x18, 0x70, 0x23, 0x2d, 0xcc, 0xb3, 0x20, 0x11, 0xc1, 0x88,
+  0x81, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x2b, 0x2d, 0xc8, 0xb3, 0x30, 0xcb,
+  0x42, 0xe0, 0xcf, 0xc2, 0x38, 0x0b, 0x20, 0x2d, 0x8c, 0x26, 0x04, 0xc0,
+  0x05, 0x4f, 0xcd, 0x12, 0x88, 0xd0, 0x70, 0xc3, 0xa8, 0x07, 0x24, 0x2d,
+  0x80, 0xc1, 0x2c, 0xc3, 0xfb, 0xc0, 0x4f, 0x50, 0xae, 0x2c, 0xd4, 0xb3,
+  0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x54, 0x4b,
+  0x0b, 0xf6, 0x2c, 0xd4, 0x6e, 0x70, 0xce, 0xc2, 0x88, 0xc1, 0x01, 0x80,
+  0x20, 0x18, 0x50, 0x2e, 0x2d, 0xd8, 0xb3, 0x10, 0x08, 0x17, 0x0c, 0x53,
+  0xb1, 0x2c, 0xe8, 0xb3, 0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20,
+  0x08, 0x06, 0x94, 0x4c, 0x0b, 0xfb, 0x2c, 0xb8, 0x0c, 0x3b, 0x0b, 0x23,
+  0x06, 0x07, 0x00, 0x82, 0x60, 0x40, 0xcd, 0xb4, 0xb0, 0xcf, 0x42, 0x20,
+  0x5c, 0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf1, 0xd4, 0xe9, 0xb1, 0x30, 0xcc,
+  0xbd, 0xae, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01,
+  0x80, 0x20, 0x18, 0x7c, 0x37, 0x2d, 0x9c, 0xb4, 0x60, 0xce, 0x02, 0x4d,
+  0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30,
+  0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
+  0x9c, 0x4f, 0x0b, 0x2e, 0x2d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20,
+  0x08, 0x06, 0xdc, 0x4f, 0x0b, 0x2f, 0x2d, 0x24, 0x44, 0x30, 0x62, 0x80,
+  0x00, 0x20, 0x08, 0x06, 0x1c, 0x58, 0x0b, 0x30, 0x2d, 0x24, 0x44, 0x30,
+  0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x0b, 0x5a, 0x0b, 0x2f, 0x2d, 0xc0,
+  0xb3, 0x10, 0xec, 0xb4, 0x00, 0xd2, 0x42, 0x4f, 0x0b, 0xa3, 0x09, 0x01,
+  0x70, 0xc1, 0x53, 0xb3, 0x04, 0x22, 0x34, 0xdc, 0x00, 0xee, 0x01, 0x58,
+  0x0b, 0x60, 0x30, 0xcb, 0x10, 0x3f, 0x22, 0x14, 0x98, 0x38, 0x0b, 0xe4,
+  0x2c, 0xc4, 0x67, 0x38, 0x22, 0x7e, 0x83, 0x72, 0x16, 0x88, 0x6f, 0x96,
+  0x41, 0x7e, 0xea, 0x27, 0x30, 0x73, 0x16, 0xe4, 0x37, 0x88, 0x8f, 0x05,
+  0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45,
+  0x10, 0x9f, 0x22, 0xd4, 0x5a, 0xd0, 0xe1, 0x86, 0x00, 0xad, 0x05, 0x30,
+  0x98, 0x65, 0x98, 0x1f, 0xfa, 0x09, 0x6c, 0x70, 0x67, 0x01, 0x3e, 0xb3,
+  0x04, 0xf9, 0x63, 0xed, 0x2c, 0x10, 0xf1, 0x99, 0x25, 0xc8, 0x9f, 0xe1,
+  0x08, 0xfe, 0x0d, 0xdc, 0x59, 0x10, 0xbe, 0x59, 0x06, 0xfb, 0xc9, 0x9f,
+  0xc0, 0xfa, 0x37, 0x78, 0x67, 0x21, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60,
+  0x98, 0x0b, 0x9e, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xa8,
+  0x6b, 0x41, 0x87, 0x1b, 0x82, 0xb9, 0x16, 0xc0, 0x60, 0x96, 0xe1, 0x7e,
+  0xf0, 0x27, 0xb0, 0x7b, 0x16, 0x86, 0xf8, 0xcc, 0x12, 0xe4, 0x8f, 0x11,
+  0xfa, 0x2c, 0xc0, 0x67, 0x96, 0x20, 0x7f, 0x06, 0x5a, 0x1e, 0x6d, 0x7e,
+  0x30, 0xfa, 0x21, 0xee, 0x47, 0xc0, 0x1f, 0x17, 0x1c, 0xea, 0xe7, 0x82,
+  0x61, 0x2c, 0x9f, 0x85, 0x7e, 0x16, 0xe2, 0x33, 0x1c, 0xe1, 0x37, 0xfe,
+  0x2c, 0x10, 0xdf, 0x2c, 0x83, 0xfe, 0xf4, 0x4f, 0x60, 0xff, 0x2c, 0xfc,
+  0x4d, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81,
+  0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x31, 0xda, 0x82, 0x0e, 0x37, 0x04,
+  0xa1, 0x2d, 0x80, 0xc1, 0x2c, 0xc3, 0xfe, 0xf0, 0x4f, 0x60, 0xc3, 0x49,
+  0x0b, 0xf0, 0x99, 0x25, 0x08, 0x21, 0x23, 0x69, 0x81, 0x88, 0xcf, 0x2c,
+  0x41, 0x08, 0x0d, 0x47, 0xa4, 0x4e, 0x49, 0x0b, 0xc2, 0x37, 0xcb, 0xe0,
+  0x3f, 0x21, 0x14, 0x98, 0xea, 0x98, 0xb4, 0x10, 0x1f, 0x0b, 0x1c, 0xfa,
+  0x5c, 0x30, 0xcc, 0x05, 0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e,
+  0x45, 0xb8, 0xb6, 0xa0, 0xc3, 0x0d, 0x01, 0x6b, 0x0b, 0x60, 0x30, 0xcb,
+  0xf0, 0x3f, 0x20, 0x14, 0x98, 0x4b, 0x0b, 0x43, 0x7c, 0x66, 0x09, 0x42,
+  0xc8, 0x88, 0x99, 0x16, 0xe0, 0x33, 0x4b, 0x10, 0x42, 0x03, 0x2d, 0x8f,
+  0xb6, 0x3f, 0x18, 0xff, 0x10, 0xff, 0x23, 0x80, 0x10, 0xda, 0xf5, 0xcf,
+  0x05, 0xc3, 0x5c, 0xf0, 0xd4, 0x6d, 0x4f, 0xdd, 0x3f, 0x0b, 0xc3, 0x1c,
+  0x0d, 0x0b, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00,
+  0x08, 0x82, 0xc1, 0xc7, 0xdb, 0x02, 0x6b, 0x0b, 0x6b, 0x2d, 0xe4, 0xb6,
   0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3,
   0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0,
-  0x91, 0xb1, 0x40, 0xc3, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0xc0, 0x95, 0xb1, 0x50, 0xc3, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0xc0, 0x99, 0xb1, 0x60, 0xc3, 0x42, 0x42, 0x04, 0x23,
-  0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xb8, 0xb1, 0x50, 0xc3, 0x82, 0xfd,
-  0x0a, 0x41, 0x18, 0x0b, 0x26, 0x2c, 0x8c, 0xb1, 0x30, 0x9a, 0x10, 0x00,
-  0x17, 0x3c, 0x35, 0x4b, 0xb0, 0x3e, 0xc3, 0x0d, 0xe6, 0x1d, 0x98, 0xb1,
-  0x00, 0x06, 0xb3, 0x0c, 0xba, 0xb3, 0x3e, 0x81, 0xa1, 0xaf, 0xa0, 0xbe,
-  0x42, 0x7c, 0x86, 0x23, 0xe4, 0x37, 0x58, 0x5f, 0x81, 0xf8, 0x66, 0x19,
-  0x76, 0xc7, 0x77, 0x02, 0x63, 0x5f, 0x61, 0x7e, 0x83, 0xf8, 0x58, 0x30,
-  0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x78, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04,
-  0xf1, 0x29, 0x02, 0x8e, 0x05, 0x1d, 0x6e, 0x08, 0xdc, 0x58, 0x00, 0x83,
-  0x59, 0x06, 0xde, 0xe9, 0x9d, 0xc0, 0x06, 0xfa, 0x15, 0xe0, 0x33, 0x4b,
-  0x20, 0x3e, 0x36, 0xbf, 0x02, 0x11, 0x9f, 0x59, 0x02, 0xf1, 0x19, 0x8e,
-  0xe8, 0xdf, 0x80, 0x7e, 0x05, 0xe1, 0x9b, 0x65, 0xf8, 0x1d, 0xf1, 0x09,
-  0xcc, 0x7f, 0x83, 0xfa, 0x15, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86,
-  0xb9, 0xe0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x3d,
-  0x16, 0x74, 0xb8, 0x21, 0xc8, 0x63, 0x01, 0x0c, 0x66, 0x19, 0xc0, 0x27,
-  0x7c, 0x02, 0xeb, 0x5f, 0x61, 0x88, 0xcf, 0x2c, 0x81, 0xf8, 0x18, 0x01,
-  0xc2, 0x02, 0x7c, 0x66, 0x09, 0xc4, 0x67, 0xa0, 0xe5, 0xd1, 0x78, 0x07,
-  0xeb, 0x1d, 0x02, 0x7c, 0x84, 0xf0, 0x81, 0xc1, 0xc1, 0x77, 0x2e, 0x18,
-  0xc6, 0xfe, 0x57, 0x18, 0x61, 0x21, 0x3e, 0xc3, 0x11, 0xa4, 0x42, 0xc2,
-  0x02, 0xf1, 0xcd, 0x32, 0x8c, 0x8f, 0xf9, 0x04, 0x56, 0xc2, 0x42, 0xa9,
-  0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0xc1, 0x53, 0x16, 0x18,
-  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xa9, 0x2c, 0xe8, 0x70, 0x43, 0x70,
-  0xca, 0x02, 0x18, 0xcc, 0x32, 0x90, 0x4f, 0xf9, 0x04, 0x36, 0xb4, 0xb0,
-  0x00, 0x9f, 0x59, 0x02, 0xf5, 0x31, 0x15, 0x16, 0x88, 0xf8, 0xcc, 0x12,
-  0xa8, 0xcf, 0x70, 0xc4, 0xab, 0xac, 0xb0, 0x20, 0x7c, 0xb3, 0x0c, 0xe7,
-  0xa3, 0x3e, 0x81, 0xc1, 0x0a, 0x0b, 0x0b, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf,
-  0x05, 0xc3, 0x5c, 0xf0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53,
-  0x04, 0x2d, 0x0b, 0x3a, 0xdc, 0x10, 0xc8, 0xb2, 0x00, 0x06, 0xb3, 0x0c,
-  0xe8, 0x93, 0x3e, 0x81, 0xd1, 0xb0, 0x30, 0xc4, 0x67, 0x96, 0x40, 0x7d,
-  0x8c, 0xc8, 0x61, 0x01, 0x3e, 0xb3, 0x04, 0xea, 0x33, 0xd0, 0xf2, 0x68,
-  0xe4, 0x83, 0x95, 0x0f, 0x81, 0x3e, 0x42, 0xfa, 0xd0, 0x95, 0xf9, 0x5c,
-  0x30, 0xcc, 0x05, 0x4f, 0xdd, 0xf6, 0xd4, 0x95, 0xb0, 0x30, 0xcc, 0xe9,
-  0xac, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x7c, 0xe2, 0x2c, 0xc8, 0xb2, 0x10, 0xc7, 0xc2, 0x2f, 0x0b,
-  0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a,
-  0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x5c,
-  0x3a, 0x0b, 0xb9, 0x2c, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x9c, 0x3a, 0x0b, 0xba, 0x2c, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0xdc, 0x3a, 0x0b, 0xbb, 0x2c, 0x24, 0x44, 0x30, 0x62,
-  0xa0, 0x00, 0x20, 0x08, 0x06, 0xcb, 0x3c, 0x0b, 0xba, 0x2c, 0xec, 0xb1,
-  0x10, 0x98, 0xb3, 0xb0, 0xca, 0x02, 0x3a, 0x0b, 0xa3, 0x09, 0x01, 0x70,
-  0xc1, 0x53, 0xb3, 0x04, 0xeb, 0x33, 0xd0, 0xf2, 0x98, 0x86, 0xed, 0xe0,
-  0x64, 0x51, 0x3b, 0x2c, 0x81, 0x3b, 0x82, 0xfa, 0xe0, 0x64, 0x91, 0x3b,
-  0xb3, 0x0c, 0xec, 0xe3, 0x3e, 0x6b, 0x1e, 0x0c, 0x47, 0xec, 0x6d, 0xd0,
-  0xca, 0xc2, 0xf0, 0x1d, 0xdf, 0x06, 0xc3, 0x0c, 0x37, 0x04, 0x78, 0x2c,
-  0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0xcc, 0x4b, 0x2c, 0x0b, 0xc3, 0x57,
-  0x81, 0xa0, 0x57, 0x2f, 0xc3, 0x0c, 0x37, 0x04, 0x7b, 0x2c, 0x90, 0x41,
-  0x05, 0x83, 0xce, 0x32, 0xb4, 0x8f, 0x08, 0x05, 0xf7, 0xc3, 0xc2, 0x30,
-  0x47, 0xb7, 0xc2, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xf0, 0xe9,
-  0xb3, 0xa0, 0xce, 0x42, 0x2a, 0x0b, 0xf7, 0x2c, 0x8c, 0x26, 0x04, 0xc0,
-  0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x71,
-  0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x21, 0x2d, 0xc4, 0xb3,
-  0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70, 0x22, 0x2d,
-  0xc8, 0xb3, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x70,
-  0x23, 0x2d, 0xcc, 0xb3, 0x20, 0x11, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20,
-  0x18, 0x2c, 0x2b, 0x2d, 0xc8, 0xb3, 0x30, 0xcb, 0x42, 0xe0, 0xcf, 0xc2,
-  0x38, 0x0b, 0x20, 0x2d, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x4f, 0xcd, 0x12,
-  0x88, 0xd0, 0x70, 0xc3, 0xa8, 0x07, 0x24, 0x2d, 0x80, 0xc1, 0x2c, 0xc3,
-  0xfb, 0xc0, 0x4f, 0x50, 0xae, 0x2c, 0xd4, 0xb3, 0x00, 0x17, 0x3c, 0x35,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x54, 0x4b, 0x0b, 0xf6, 0x2c, 0xd4,
-  0x6e, 0x70, 0xce, 0xc2, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x50, 0x2e,
-  0x2d, 0xd8, 0xb3, 0x10, 0x08, 0x17, 0x0c, 0x53, 0xb1, 0x2c, 0xe8, 0xb3,
-  0x00, 0x17, 0x3c, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x94, 0x4c,
-  0x0b, 0xfb, 0x2c, 0xb8, 0x0c, 0x3b, 0x0b, 0x23, 0x06, 0x07, 0x00, 0x82,
-  0x60, 0x40, 0xcd, 0xb4, 0xb0, 0xcf, 0x42, 0x20, 0x5c, 0x30, 0xcc, 0x05,
-  0x4f, 0xdd, 0xf1, 0xd4, 0xe9, 0xb1, 0x30, 0xcc, 0xbd, 0xae, 0x30, 0xcc,
-  0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x7c,
-  0x37, 0x2d, 0x9c, 0xb4, 0x60, 0xce, 0x02, 0x4d, 0x0b, 0xa3, 0x09, 0x01,
-  0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45,
-  0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x9c, 0x4f, 0x0b, 0x2e,
-  0x2d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xdc, 0x4f,
-  0x0b, 0x2f, 0x2d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x1c, 0x58, 0x0b, 0x30, 0x2d, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20,
-  0x08, 0x06, 0x0b, 0x5a, 0x0b, 0x2f, 0x2d, 0xc0, 0xb3, 0x10, 0xec, 0xb4,
-  0x00, 0xd2, 0x42, 0x4f, 0x0b, 0xa3, 0x09, 0x01, 0x70, 0xc1, 0x53, 0xb3,
-  0x04, 0x22, 0x34, 0xdc, 0x00, 0xee, 0x01, 0x58, 0x0b, 0x60, 0x30, 0xcb,
-  0x10, 0x3f, 0x22, 0x14, 0x98, 0x38, 0x0b, 0xe4, 0x2c, 0xc4, 0x67, 0x38,
-  0x22, 0x7e, 0x83, 0x72, 0x16, 0x88, 0x6f, 0x96, 0x41, 0x7e, 0xea, 0x27,
-  0x30, 0x73, 0x16, 0xe4, 0x37, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18,
-  0xe6, 0x82, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xd4,
-  0x5a, 0xd0, 0xe1, 0x86, 0x00, 0xad, 0x05, 0x30, 0x98, 0x65, 0x98, 0x1f,
-  0xfa, 0x09, 0x6c, 0x70, 0x67, 0x01, 0x3e, 0xb3, 0x04, 0xf9, 0x63, 0xed,
-  0x2c, 0x10, 0xf1, 0x99, 0x25, 0xc8, 0x9f, 0xe1, 0x08, 0xfe, 0x0d, 0xdc,
-  0x59, 0x10, 0xbe, 0x59, 0x06, 0xfb, 0xc9, 0x9f, 0xc0, 0xfa, 0x37, 0x78,
-  0x67, 0x21, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9e, 0xb2,
-  0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xa8, 0x6b, 0x41, 0x87, 0x1b,
-  0x82, 0xb9, 0x16, 0xc0, 0x60, 0x96, 0xe1, 0x7e, 0xf0, 0x27, 0xb0, 0x7b,
-  0x16, 0x86, 0xf8, 0xcc, 0x12, 0xe4, 0x8f, 0x11, 0xfa, 0x2c, 0xc0, 0x67,
-  0x96, 0x20, 0x7f, 0x06, 0x5a, 0x1e, 0x6d, 0x7e, 0x30, 0xfa, 0x21, 0xee,
-  0x47, 0xc0, 0x1f, 0x17, 0x1c, 0xea, 0xe7, 0x82, 0x61, 0x2c, 0x9f, 0x85,
-  0x7e, 0x16, 0xe2, 0x33, 0x1c, 0xe1, 0x37, 0xfe, 0x2c, 0x10, 0xdf, 0x2c,
-  0x83, 0xfe, 0xf4, 0x4f, 0x60, 0xff, 0x2c, 0xfc, 0x4d, 0x7c, 0x2c, 0x18,
-  0xe8, 0x73, 0xc1, 0x30, 0x17, 0x3c, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82,
-  0xf8, 0x14, 0x31, 0xda, 0x82, 0x0e, 0x37, 0x04, 0xa1, 0x2d, 0x80, 0xc1,
-  0x2c, 0xc3, 0xfe, 0xf0, 0x4f, 0x60, 0xc3, 0x49, 0x0b, 0xf0, 0x99, 0x25,
-  0x08, 0x21, 0x23, 0x69, 0x81, 0x88, 0xcf, 0x2c, 0x41, 0x08, 0x0d, 0x47,
-  0xa4, 0x4e, 0x49, 0x0b, 0xc2, 0x37, 0xcb, 0xe0, 0x3f, 0x21, 0x14, 0x98,
-  0xea, 0x98, 0xb4, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05,
-  0x4f, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xb8, 0xb6, 0xa0,
-  0xc3, 0x0d, 0x01, 0x6b, 0x0b, 0x60, 0x30, 0xcb, 0xf0, 0x3f, 0x20, 0x14,
-  0x98, 0x4b, 0x0b, 0x43, 0x7c, 0x66, 0x09, 0x42, 0xc8, 0x88, 0x99, 0x16,
-  0xe0, 0x33, 0x4b, 0x10, 0x42, 0x03, 0x2d, 0x8f, 0xb6, 0x3f, 0x18, 0xff,
-  0x10, 0xff, 0x23, 0x80, 0x10, 0xda, 0xf5, 0xcf, 0x05, 0xc3, 0x5c, 0xf0,
-  0xd4, 0x6d, 0x4f, 0xdd, 0x3f, 0x0b, 0xc3, 0x1c, 0x0d, 0x0b, 0xc3, 0x1c,
-  0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0xc7,
-  0xdb, 0x02, 0x6b, 0x0b, 0x6b, 0x2d, 0xe4, 0xb6, 0x30, 0x9a, 0x10, 0x00,
-  0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44,
-  0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0x8d, 0xb7, 0x30, 0xdb,
-  0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0x91, 0xb7,
-  0x40, 0xdb, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0,
-  0x95, 0xb7, 0x50, 0xdb, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82,
-  0x60, 0xb0, 0xb4, 0xb7, 0x40, 0xdb, 0x42, 0x5d, 0x0b, 0x01, 0x78, 0x0b,
-  0xa5, 0x2d, 0x88, 0xb7, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x3c, 0x35, 0x4b,
-  0x20, 0x42, 0x03, 0x2d, 0x8f, 0x69, 0xb4, 0x0f, 0xcb, 0x16, 0xec, 0xc3,
-  0x12, 0xef, 0x23, 0x84, 0x10, 0xcb, 0x16, 0xf0, 0x33, 0x62, 0x60, 0x00,
-  0x20, 0x08, 0x06, 0xd0, 0x7b, 0x0b, 0xb0, 0x2d, 0x98, 0xb1, 0x30, 0x62,
-  0x60, 0x00, 0x20, 0x08, 0x06, 0x10, 0x7c, 0x0b, 0xb1, 0x2d, 0x98, 0xb1,
-  0x60, 0x41, 0x20, 0x1f, 0x0b, 0x04, 0xf9, 0xd8, 0x9b, 0x07, 0xa9, 0x2d,
-  0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xa4, 0xf9, 0x16, 0x72,
-  0x5b, 0x48, 0x6d, 0xa1, 0xd7, 0x02, 0x8b, 0xf3, 0x20, 0xb5, 0x05, 0xf9,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x54, 0xdf, 0xc2, 0x6e, 0x0b,
-  0xa8, 0x2d, 0xa0, 0x6a, 0x10, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81,
-  0x64, 0xdf, 0x02, 0x6f, 0x0b, 0xab, 0x2d, 0x80, 0x5b, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0xd2, 0x7d, 0x0b, 0xbd, 0x2d, 0xb8, 0xb6, 0x80,
-  0x2f, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0xf8, 0x2d, 0xf8,
-  0xb6, 0xa0, 0xda, 0xc2, 0xaa, 0x06, 0xc6, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x48, 0xf9, 0x2d, 0xfc, 0xb6, 0xa0, 0xda, 0xc2, 0xb8, 0x05, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0x20, 0xe9, 0xb7, 0x00, 0xde, 0x02, 0x6c,
-  0x0b, 0xfb, 0x12, 0x8c, 0x18, 0x34, 0x00, 0x08, 0x82, 0x81, 0x95, 0xdf,
-  0x02, 0x78, 0x0b, 0xb3, 0x2d, 0x30, 0x8b, 0xe2, 0xaa, 0x01, 0x42, 0x04,
-  0xf6, 0xd7, 0xc1, 0x6c, 0x0b, 0xf2, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x03, 0xa9, 0xbf, 0x85, 0xf1, 0x16, 0x66, 0x5b, 0x68, 0xaf, 0xc0, 0x42,
-  0x3b, 0x98, 0x6d, 0x41, 0x3e, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x20,
-  0xfd, 0xb7, 0x50, 0xde, 0x82, 0x6c, 0x0b, 0xb8, 0x19, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x20, 0x81, 0xb8, 0x60, 0xde, 0x42, 0x6d, 0x0b,
-  0xf0, 0x15, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x14, 0xe2, 0xc2,
-  0x79, 0x0b, 0xb8, 0x2d, 0xa0, 0x48, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x92, 0x88, 0x0b, 0xe8, 0x2d, 0xd0, 0xb6, 0xb0, 0x9b, 0x81, 0x31,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd2, 0x88, 0x0b, 0xe9, 0x2d, 0xd0,
-  0xb6, 0x30, 0x5f, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x24,
-  0x2e, 0xa8, 0xb7, 0xa0, 0xdb, 0xc2, 0x8a, 0x04, 0x23, 0x06, 0x0d, 0x00,
-  0x82, 0x60, 0x60, 0x8d, 0xb8, 0xa0, 0xde, 0x42, 0x6f, 0x0b, 0x56, 0x45,
-  0xf9, 0x66, 0x80, 0x10, 0x81, 0xb9, 0x72, 0xd0, 0xdb, 0x82, 0x7c, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0x40, 0x3a, 0x71, 0xa1, 0xbd, 0x85, 0xde,
-  0x16, 0xfa, 0x29, 0x30, 0x58, 0x0e, 0x7a, 0x5b, 0x90, 0xcf, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x48, 0x29, 0x2e, 0xbc, 0xb7, 0xc0, 0xdb, 0x02,
-  0x3a, 0x06, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x2a, 0x2e,
-  0xc0, 0xb7, 0xf0, 0xdb, 0x02, 0x48, 0x05, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0x20, 0xad, 0xb8, 0x10, 0xdf, 0x82, 0x78, 0x0b, 0x38, 0x11, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0xc4, 0xe2, 0x82, 0x7c, 0x0b, 0xbe,
-  0x2d, 0xac, 0x63, 0x60, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0xd4,
-  0xe2, 0xc2, 0x7c, 0x0b, 0xbe, 0x2d, 0x8c, 0x54, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x92, 0x8b, 0x0b, 0xf4, 0x2d, 0x90, 0xb7, 0xb0, 0x13,
-  0xc1, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x58, 0x2d, 0x2e, 0xd0, 0xb7,
-  0x70, 0xde, 0x02, 0x18, 0x7c, 0x9e, 0x3b, 0x06, 0x08, 0x11, 0x58, 0xef,
-  0x06, 0xe7, 0x2d, 0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xa4,
-  0x18, 0x17, 0xee, 0x5b, 0x38, 0x6f, 0xa1, 0x85, 0x02, 0xfb, 0xdd, 0xe0,
-  0xbc, 0x05, 0xf9, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x34, 0xe3,
-  0x42, 0x7e, 0x0b, 0xe6, 0x2d, 0xe0, 0x5f, 0x30, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x12, 0x8d, 0x0b, 0xfa, 0x2d, 0xa4, 0xb7, 0x00, 0x43, 0xc1,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x35, 0x2e, 0xec, 0xb7, 0xc0,
-  0xde, 0x02, 0x1a, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x20, 0xd9,
-  0xb8, 0xc0, 0xdf, 0x02, 0x7a, 0x0b, 0xfb, 0x67, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0x81, 0x74, 0xe3, 0x42, 0x7f, 0x0b, 0xe8, 0x2d, 0xcc, 0x50,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x12, 0x8e, 0x0b, 0xfe, 0x2d,
-  0xb8, 0xb7, 0xb0, 0x06, 0xc1, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x58,
-  0x37, 0x2e, 0xf8, 0xb7, 0x10, 0xdf, 0x82, 0x1a, 0xa4, 0x01, 0x1a, 0xf8,
-  0x1f, 0x42, 0x04, 0xc6, 0x06, 0x6c, 0x20, 0x1f, 0x0b, 0xda, 0x40, 0x3e,
-  0x16, 0x06, 0xf3, 0x2d, 0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
-  0xa4, 0x1e, 0x17, 0x46, 0x5c, 0x98, 0x6f, 0xc1, 0x09, 0x6c, 0x0c, 0xe6,
-  0x5b, 0x90, 0xcf, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x3f, 0x2e,
-  0x94, 0xb8, 0x20, 0xdf, 0x82, 0x16, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0x81, 0x04, 0xe6, 0x82, 0x89, 0x0b, 0xf5, 0x2d, 0x44, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x48, 0x61, 0x2e, 0x9c, 0xb8, 0x80, 0xdf, 0x02,
-  0x12, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x24, 0xe6, 0x02, 0x8a,
-  0x0b, 0xf4, 0x2d, 0x74, 0xc6, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48,
-  0x63, 0x2e, 0xa4, 0xb8, 0x40, 0xdf, 0x02, 0x15, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0x81, 0x44, 0xe6, 0x82, 0x8a, 0x0b, 0xfa, 0x2d, 0x2c, 0xc1,
-  0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x58, 0x63, 0x2e, 0xa8, 0xb8, 0xd0,
-  0xdf, 0xc2, 0x1d, 0x2c, 0x0a, 0x18, 0x20, 0x44, 0x70, 0x81, 0x31, 0x23,
-  0x06, 0x0e, 0x00, 0x82, 0x60, 0xd0, 0xa4, 0xb9, 0x80, 0xe2, 0x02, 0x7d,
-  0x0b, 0xed, 0x2d, 0xf4, 0xb8, 0x10, 0x8c, 0xb8, 0x30, 0xe2, 0xc2, 0x88,
-  0x0b, 0x22, 0x2e, 0xfc, 0xb8, 0x30, 0x4b, 0x30, 0x42, 0x08, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00
+  0x8d, 0xb7, 0x30, 0xdb, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
+  0x60, 0xc0, 0x91, 0xb7, 0x40, 0xdb, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08,
+  0x00, 0x82, 0x60, 0xc0, 0x95, 0xb7, 0x50, 0xdb, 0x42, 0x42, 0x04, 0x23,
+  0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xb4, 0xb7, 0x40, 0xdb, 0x42, 0x5d,
+  0x0b, 0x01, 0x78, 0x0b, 0xa5, 0x2d, 0x88, 0xb7, 0x30, 0x9a, 0x10, 0x00,
+  0x17, 0x3c, 0x35, 0x4b, 0x20, 0x42, 0x03, 0x2d, 0x8f, 0x69, 0xb4, 0x0f,
+  0xcb, 0x16, 0xec, 0xc3, 0x12, 0xef, 0x23, 0x84, 0x10, 0xcb, 0x16, 0xf0,
+  0x33, 0x62, 0x60, 0x00, 0x20, 0x08, 0x06, 0xd0, 0x7b, 0x0b, 0xb0, 0x2d,
+  0x98, 0xb1, 0x30, 0x62, 0x60, 0x00, 0x20, 0x08, 0x06, 0x10, 0x7c, 0x0b,
+  0xb1, 0x2d, 0x98, 0xb1, 0x60, 0x41, 0x20, 0x1f, 0x0b, 0x04, 0xf9, 0xd8,
+  0x9b, 0x07, 0xa9, 0x2d, 0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
+  0xa4, 0xf9, 0x16, 0x72, 0x5b, 0x48, 0x6d, 0xa1, 0xd7, 0x02, 0x8b, 0xf3,
+  0x20, 0xb5, 0x05, 0xf9, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x54,
+  0xdf, 0xc2, 0x6e, 0x0b, 0xa8, 0x2d, 0xa0, 0x6a, 0x10, 0x8c, 0x18, 0x20,
+  0x00, 0x08, 0x82, 0x81, 0x64, 0xdf, 0x02, 0x6f, 0x0b, 0xab, 0x2d, 0x80,
+  0x5b, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd2, 0x7d, 0x0b, 0xbd,
+  0x2d, 0xb8, 0xb6, 0x80, 0x2f, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
+  0x48, 0xf8, 0x2d, 0xf8, 0xb6, 0xa0, 0xda, 0xc2, 0xaa, 0x06, 0xc6, 0x88,
+  0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0xf9, 0x2d, 0xfc, 0xb6, 0xa0, 0xda,
+  0xc2, 0xb8, 0x05, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x20, 0xe9, 0xb7,
+  0x00, 0xde, 0x02, 0x6c, 0x0b, 0xfb, 0x12, 0x8c, 0x18, 0x34, 0x00, 0x08,
+  0x82, 0x81, 0x95, 0xdf, 0x02, 0x78, 0x0b, 0xb3, 0x2d, 0x30, 0x8b, 0xe2,
+  0xaa, 0x01, 0x42, 0x04, 0xf6, 0xd7, 0xc1, 0x6c, 0x0b, 0xf2, 0x19, 0x31,
+  0x40, 0x00, 0x10, 0x04, 0x03, 0xa9, 0xbf, 0x85, 0xf1, 0x16, 0x66, 0x5b,
+  0x68, 0xaf, 0xc0, 0x42, 0x3b, 0x98, 0x6d, 0x41, 0x3e, 0x23, 0x06, 0x08,
+  0x00, 0x82, 0x60, 0x20, 0xfd, 0xb7, 0x50, 0xde, 0x82, 0x6c, 0x0b, 0xb8,
+  0x19, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x20, 0x81, 0xb8, 0x60,
+  0xde, 0x42, 0x6d, 0x0b, 0xf0, 0x15, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
+  0x81, 0x14, 0xe2, 0xc2, 0x79, 0x0b, 0xb8, 0x2d, 0xa0, 0x48, 0x30, 0x62,
+  0x80, 0x00, 0x20, 0x08, 0x06, 0x92, 0x88, 0x0b, 0xe8, 0x2d, 0xd0, 0xb6,
+  0xb0, 0x9b, 0x81, 0x31, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd2, 0x88,
+  0x0b, 0xe9, 0x2d, 0xd0, 0xb6, 0x30, 0x5f, 0xc1, 0x88, 0x01, 0x02, 0x80,
+  0x20, 0x18, 0x48, 0x24, 0x2e, 0xa8, 0xb7, 0xa0, 0xdb, 0xc2, 0x8a, 0x04,
+  0x23, 0x06, 0x0d, 0x00, 0x82, 0x60, 0x60, 0x8d, 0xb8, 0xa0, 0xde, 0x42,
+  0x6f, 0x0b, 0x56, 0x45, 0xf9, 0x66, 0x80, 0x10, 0x81, 0xb9, 0x72, 0xd0,
+  0xdb, 0x82, 0x7c, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x40, 0x3a, 0x71,
+  0xa1, 0xbd, 0x85, 0xde, 0x16, 0xfa, 0x29, 0x30, 0x58, 0x0e, 0x7a, 0x5b,
+  0x90, 0xcf, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x29, 0x2e, 0xbc,
+  0xb7, 0xc0, 0xdb, 0x02, 0x3a, 0x06, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20,
+  0x18, 0x48, 0x2a, 0x2e, 0xc0, 0xb7, 0xf0, 0xdb, 0x02, 0x48, 0x05, 0x23,
+  0x06, 0x08, 0x00, 0x82, 0x60, 0x20, 0xad, 0xb8, 0x10, 0xdf, 0x82, 0x78,
+  0x0b, 0x38, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0xc4, 0xe2,
+  0x82, 0x7c, 0x0b, 0xbe, 0x2d, 0xac, 0x63, 0x60, 0x8c, 0x18, 0x20, 0x00,
+  0x08, 0x82, 0x81, 0xd4, 0xe2, 0xc2, 0x7c, 0x0b, 0xbe, 0x2d, 0x8c, 0x54,
+  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x92, 0x8b, 0x0b, 0xf4, 0x2d,
+  0x90, 0xb7, 0xb0, 0x13, 0xc1, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x58,
+  0x2d, 0x2e, 0xd0, 0xb7, 0x70, 0xde, 0x02, 0x18, 0x7c, 0x9e, 0x3b, 0x06,
+  0x08, 0x11, 0x58, 0xef, 0x06, 0xe7, 0x2d, 0xc8, 0x67, 0xc4, 0x00, 0x01,
+  0x40, 0x10, 0x0c, 0xa4, 0x18, 0x17, 0xee, 0x5b, 0x38, 0x6f, 0xa1, 0x85,
+  0x02, 0xfb, 0xdd, 0xe0, 0xbc, 0x05, 0xf9, 0x8c, 0x18, 0x20, 0x00, 0x08,
+  0x82, 0x81, 0x34, 0xe3, 0x42, 0x7e, 0x0b, 0xe6, 0x2d, 0xe0, 0x5f, 0x30,
+  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x12, 0x8d, 0x0b, 0xfa, 0x2d, 0xa4,
+  0xb7, 0x00, 0x43, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x35,
+  0x2e, 0xec, 0xb7, 0xc0, 0xde, 0x02, 0x1a, 0x04, 0x23, 0x06, 0x08, 0x00,
+  0x82, 0x60, 0x20, 0xd9, 0xb8, 0xc0, 0xdf, 0x02, 0x7a, 0x0b, 0xfb, 0x67,
+  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x74, 0xe3, 0x42, 0x7f, 0x0b,
+  0xe8, 0x2d, 0xcc, 0x50, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x12,
+  0x8e, 0x0b, 0xfe, 0x2d, 0xb8, 0xb7, 0xb0, 0x06, 0xc1, 0x88, 0x41, 0x03,
+  0x80, 0x20, 0x18, 0x58, 0x37, 0x2e, 0xf8, 0xb7, 0x10, 0xdf, 0x82, 0x1a,
+  0xa4, 0x01, 0x1a, 0xf8, 0x1f, 0x42, 0x04, 0xc6, 0x06, 0x6c, 0x20, 0x1f,
+  0x0b, 0xda, 0x40, 0x3e, 0x16, 0x06, 0xf3, 0x2d, 0xc8, 0x67, 0xc4, 0x00,
+  0x01, 0x40, 0x10, 0x0c, 0xa4, 0x1e, 0x17, 0x46, 0x5c, 0x98, 0x6f, 0xc1,
+  0x09, 0x6c, 0x0c, 0xe6, 0x5b, 0x90, 0xcf, 0x88, 0x01, 0x02, 0x80, 0x20,
+  0x18, 0x48, 0x3f, 0x2e, 0x94, 0xb8, 0x20, 0xdf, 0x82, 0x16, 0x8c, 0x18,
+  0x20, 0x00, 0x08, 0x82, 0x81, 0x04, 0xe6, 0x82, 0x89, 0x0b, 0xf5, 0x2d,
+  0x44, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x48, 0x61, 0x2e, 0x9c,
+  0xb8, 0x80, 0xdf, 0x02, 0x12, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81,
+  0x24, 0xe6, 0x02, 0x8a, 0x0b, 0xf4, 0x2d, 0x74, 0xc6, 0x88, 0x01, 0x02,
+  0x80, 0x20, 0x18, 0x48, 0x63, 0x2e, 0xa4, 0xb8, 0x40, 0xdf, 0x02, 0x15,
+  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x44, 0xe6, 0x82, 0x8a, 0x0b,
+  0xfa, 0x2d, 0x2c, 0xc1, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x58, 0x63,
+  0x2e, 0xa8, 0xb8, 0xd0, 0xdf, 0xc2, 0x1d, 0x2c, 0x0a, 0x18, 0x20, 0x44,
+  0x70, 0x81, 0x31, 0x23, 0x06, 0x0e, 0x00, 0x82, 0x60, 0xd0, 0xa4, 0xb9,
+  0x80, 0xe2, 0x02, 0x7d, 0x0b, 0xed, 0x2d, 0xf4, 0xb8, 0x10, 0x8c, 0xb8,
+  0x30, 0xe2, 0xc2, 0x88, 0x0b, 0x22, 0x2e, 0xfc, 0xb8, 0x30, 0x4b, 0x30,
+  0x42, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
 };
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_uint64_double.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_uint64_double.h
deleted file mode 100644
index a259ce07e318b..0000000000000
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_uint64_double.h
+++ /dev/null
@@ -1,6709 +0,0 @@
-#if 0
-;
-; Note: shader requires additional functionality:
-;       Double-precision floating point
-;       64-Bit integer
-;
-;
-; Input signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no parameters
-;
-; Output signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no parameters
-; shader hash: 1b7fa6acf02581c9f6da70587d3d5c58
-;
-; Pipeline Runtime Information: 
-;
-;
-;
-; Buffer Definitions:
-;
-; cbuffer 
-; {
-;
-;   [116 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [8 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [8 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [8 x i8] (type annotation not present)
-;
-; }
-;
-;
-; Resource Bindings:
-;
-; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-;                                   cbuffer      NA          NA     CB0            cb0     1
-;                                       UAV  struct         r/w      U0             u0     1
-;                                       UAV  struct         r/w      U1             u1     1
-;                                       UAV  struct         r/w      U2             u2     1
-;
-target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
-target triple = "dxil-ms-dx"
-
-%dx.types.Handle = type { i8* }
-%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
-%dx.types.ResRet.i32 = type { i32, i32, i32, i32, i32 }
-%"class.RWStructuredBuffer<unsigned long long>" = type { i64 }
-%"class.RWStructuredBuffer<double>" = type { double }
-%Constants = type { i32, i32, i32, i32, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32 }
-
-define void @GridSample() {
-  %1 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 2, i32 2, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %2 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 1, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %3 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %4 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %5 = call i32 @dx.op.threadId.i32(i32 93, i32 0)  ; ThreadId(component)
-  %6 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
-  %7 = extractvalue %dx.types.CBufRet.i32 %6, 0
-  %8 = add i32 %7, %5
-  %9 = extractvalue %dx.types.CBufRet.i32 %6, 1
-  %10 = icmp ult i32 %8, %9
-  br i1 %10, label %11, label %3647
-
-; <label>:11                                      ; preds = %0
-  %12 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
-  %13 = extractvalue %dx.types.CBufRet.i32 %12, 3
-  %14 = uitofp i32 %13 to float
-  %15 = extractvalue %dx.types.CBufRet.i32 %12, 2
-  %16 = uitofp i32 %15 to float
-  %17 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 7)  ; CBufferLoadLegacy(handle,regIndex)
-  %18 = extractvalue %dx.types.CBufRet.i32 %17, 0
-  %19 = icmp eq i32 %18, 0
-  %20 = select i1 %19, float -5.000000e-01, float 0.000000e+00
-  %21 = select i1 %19, float -5.000000e-01, float -1.000000e+00
-  %22 = fadd float %14, %21
-  %23 = select i1 %19, float -5.000000e-01, float -1.000000e+00
-  %24 = fadd float %16, %23
-  %25 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
-  %26 = extractvalue %dx.types.CBufRet.i32 %25, 1
-  %27 = extractvalue %dx.types.CBufRet.i32 %25, 2
-  %28 = extractvalue %dx.types.CBufRet.i32 %25, 3
-  %29 = mul i32 %28, %27
-  %30 = mul i32 %27, %26
-  %31 = mul i32 %30, %28
-  %32 = udiv i32 %8, %31
-  %33 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 6)  ; CBufferLoadLegacy(handle,regIndex)
-  %34 = extractvalue %dx.types.CBufRet.i32 %33, 0
-  %35 = mul i32 %34, %32
-  %36 = sub i32 %8, %35
-  %37 = udiv i32 %36, %29
-  %38 = extractvalue %dx.types.CBufRet.i32 %33, 1
-  %39 = mul i32 %38, %37
-  %40 = sub i32 %36, %39
-  %41 = udiv i32 %40, %28
-  %42 = extractvalue %dx.types.CBufRet.i32 %33, 2
-  %43 = mul i32 %42, %41
-  %44 = sub i32 %40, %43
-  %45 = uitofp i32 %32 to float
-  %46 = uitofp i32 %41 to float
-  %47 = uitofp i32 %44 to float
-  %48 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
-  %49 = extractvalue %dx.types.CBufRet.i32 %48, 0
-  %50 = extractvalue %dx.types.CBufRet.i32 %48, 1
-  %51 = extractvalue %dx.types.CBufRet.i32 %48, 2
-  %52 = extractvalue %dx.types.CBufRet.i32 %48, 3
-  %53 = uitofp i32 %49 to float
-  %54 = uitofp i32 %50 to float
-  %55 = uitofp i32 %51 to float
-  %56 = uitofp i32 %52 to float
-  %57 = call float @dx.op.dot4.f32(i32 56, float %45, float %46, float %47, float 0.000000e+00, float %53, float %54, float %55, float %56)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %58 = fadd fast float %56, %57
-  %59 = fptoui float %57 to i32
-  %60 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %2, i32 %59, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %61 = extractvalue %dx.types.ResRet.i32 %60, 0
-  %62 = extractvalue %dx.types.ResRet.i32 %60, 1
-  %63 = call double @dx.op.makeDouble.f64(i32 101, i32 %61, i32 %62)  ; MakeDouble(lo,hi)
-  %64 = fptrunc double %63 to float
-  %65 = fptoui float %58 to i32
-  %66 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %2, i32 %65, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %67 = extractvalue %dx.types.ResRet.i32 %66, 0
-  %68 = extractvalue %dx.types.ResRet.i32 %66, 1
-  %69 = call double @dx.op.makeDouble.f64(i32 101, i32 %67, i32 %68)  ; MakeDouble(lo,hi)
-  %70 = fptrunc double %69 to float
-  %71 = icmp eq i32 %18, 1
-  %72 = fadd fast float %64, 1.000000e+00
-  %73 = fadd fast float %70, 1.000000e+00
-  br i1 %71, label %74, label %81
-
-; <label>:74                                      ; preds = %11
-  %75 = fmul fast float %72, 5.000000e-01
-  %76 = fmul fast float %73, 5.000000e-01
-  %77 = fadd fast float %14, -1.000000e+00
-  %78 = fadd fast float %16, -1.000000e+00
-  %79 = fmul fast float %75, %77
-  %80 = fmul fast float %76, %78
-  br label %88
-
-; <label>:81                                      ; preds = %11
-  %82 = fmul fast float %14, %72
-  %83 = fmul fast float %73, %16
-  %84 = fadd fast float %82, -1.000000e+00
-  %85 = fadd fast float %83, -1.000000e+00
-  %86 = fmul fast float %84, 5.000000e-01
-  %87 = fmul fast float %85, 5.000000e-01
-  br label %88
-
-; <label>:88                                      ; preds = %81, %74
-  %89 = phi float [ %79, %74 ], [ %86, %81 ]
-  %90 = phi float [ %80, %74 ], [ %87, %81 ]
-  %91 = extractvalue %dx.types.CBufRet.i32 %6, 2
-  %92 = icmp eq i32 %91, 1
-  br i1 %92, label %93, label %96
-
-; <label>:93                                      ; preds = %88
-  %94 = call float @dx.op.unary.f32(i32 26, float %89)  ; Round_ne(value)
-  %95 = call float @dx.op.unary.f32(i32 26, float %90)  ; Round_ne(value)
-  br label %96
-
-; <label>:96                                      ; preds = %93, %88
-  %97 = phi float [ %94, %93 ], [ %89, %88 ]
-  %98 = phi float [ %95, %93 ], [ %90, %88 ]
-  %99 = fcmp fast olt float %97, %20
-  %100 = fcmp fast ogt float %97, %22
-  %101 = or i1 %99, %100
-  %102 = fcmp fast olt float %98, %20
-  %103 = or i1 %101, %102
-  %104 = fcmp fast ogt float %98, %24
-  %105 = or i1 %104, %103
-  br i1 %105, label %106, label %179
-
-; <label>:106                                     ; preds = %96
-  %107 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %108 = icmp eq i32 %107, 1
-  br i1 %108, label %109, label %118
-
-; <label>:109                                     ; preds = %106
-  %110 = add i32 %13, -1
-  %111 = uitofp i32 %110 to float
-  %112 = call float @dx.op.binary.f32(i32 35, float %97, float 0.000000e+00)  ; FMax(a,b)
-  %113 = call float @dx.op.binary.f32(i32 36, float %112, float %111)  ; FMin(a,b)
-  %114 = add i32 %15, -1
-  %115 = uitofp i32 %114 to float
-  %116 = call float @dx.op.binary.f32(i32 35, float %98, float 0.000000e+00)  ; FMax(a,b)
-  %117 = call float @dx.op.binary.f32(i32 36, float %116, float %115)  ; FMin(a,b)
-  br label %179
-
-; <label>:118                                     ; preds = %106
-  %119 = icmp eq i32 %107, 2
-  br i1 %119, label %120, label %179
-
-; <label>:120                                     ; preds = %118
-  %121 = fsub fast float %22, %20
-  br i1 %99, label %122, label %135
-
-; <label>:122                                     ; preds = %120
-  %123 = fsub fast float %20, %97
-  %124 = fdiv fast float %123, %121
-  %125 = fptoui float %124 to i32
-  %126 = uitofp i32 %125 to float
-  %127 = fmul fast float %126, %121
-  %128 = fsub fast float %123, %127
-  %129 = and i32 %125, 1
-  %130 = icmp eq i32 %129, 0
-  br i1 %130, label %131, label %133
-
-; <label>:131                                     ; preds = %122
-  %132 = fadd fast float %128, %20
-  br label %149
-
-; <label>:133                                     ; preds = %122
-  %134 = fsub fast float %22, %128
-  br label %149
-
-; <label>:135                                     ; preds = %120
-  br i1 %100, label %136, label %149
-
-; <label>:136                                     ; preds = %135
-  %137 = fsub fast float %97, %22
-  %138 = fdiv fast float %137, %121
-  %139 = fptoui float %138 to i32
-  %140 = uitofp i32 %139 to float
-  %141 = fmul fast float %140, %121
-  %142 = fsub fast float %137, %141
-  %143 = and i32 %139, 1
-  %144 = icmp eq i32 %143, 0
-  br i1 %144, label %145, label %147
-
-; <label>:145                                     ; preds = %136
-  %146 = fsub fast float %22, %142
-  br label %149
-
-; <label>:147                                     ; preds = %136
-  %148 = fadd fast float %142, %20
-  br label %149
-
-; <label>:149                                     ; preds = %147, %145, %135, %133, %131
-  %150 = phi float [ %132, %131 ], [ %134, %133 ], [ %146, %145 ], [ %148, %147 ], [ %97, %135 ]
-  %151 = fsub fast float %24, %20
-  br i1 %102, label %152, label %165
-
-; <label>:152                                     ; preds = %149
-  %153 = fsub fast float %20, %98
-  %154 = fdiv fast float %153, %151
-  %155 = fptoui float %154 to i32
-  %156 = uitofp i32 %155 to float
-  %157 = fmul fast float %156, %151
-  %158 = fsub fast float %153, %157
-  %159 = and i32 %155, 1
-  %160 = icmp eq i32 %159, 0
-  br i1 %160, label %161, label %163
-
-; <label>:161                                     ; preds = %152
-  %162 = fadd fast float %158, %20
-  br label %179
-
-; <label>:163                                     ; preds = %152
-  %164 = fsub fast float %24, %158
-  br label %179
-
-; <label>:165                                     ; preds = %149
-  br i1 %104, label %166, label %179
-
-; <label>:166                                     ; preds = %165
-  %167 = fsub fast float %98, %24
-  %168 = fdiv fast float %167, %151
-  %169 = fptoui float %168 to i32
-  %170 = uitofp i32 %169 to float
-  %171 = fmul fast float %170, %151
-  %172 = fsub fast float %167, %171
-  %173 = and i32 %169, 1
-  %174 = icmp eq i32 %173, 0
-  br i1 %174, label %175, label %177
-
-; <label>:175                                     ; preds = %166
-  %176 = fsub fast float %24, %172
-  br label %179
-
-; <label>:177                                     ; preds = %166
-  %178 = fadd fast float %172, %20
-  br label %179
-
-; <label>:179                                     ; preds = %177, %175, %165, %163, %161, %118, %109, %96
-  %180 = phi float [ %113, %109 ], [ %97, %118 ], [ %97, %96 ], [ %150, %177 ], [ %150, %175 ], [ %150, %165 ], [ %150, %163 ], [ %150, %161 ]
-  %181 = phi float [ %117, %109 ], [ %98, %118 ], [ %98, %96 ], [ %178, %177 ], [ %176, %175 ], [ %98, %165 ], [ %164, %163 ], [ %162, %161 ]
-  %182 = uitofp i32 %37 to float
-  br i1 %92, label %183, label %350
-
-; <label>:183                                     ; preds = %179
-  %184 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %185 = icmp eq i32 %184, 0
-  br i1 %185, label %186, label %216
-
-; <label>:186                                     ; preds = %183
-  %187 = fcmp fast oge float %180, 0.000000e+00
-  %188 = fptoui float %180 to i32
-  %189 = icmp ult i32 %188, %13
-  %190 = and i1 %187, %189
-  %191 = fcmp fast oge float %181, 0.000000e+00
-  %192 = and i1 %191, %190
-  %193 = fptoui float %181 to i32
-  %194 = icmp ult i32 %193, %15
-  %195 = and i1 %194, %192
-  br i1 %195, label %196, label %344
-
-; <label>:196                                     ; preds = %186
-  %197 = fptoui float %45 to i32
-  %198 = fptoui float %182 to i32
-  %199 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %200 = extractvalue %dx.types.CBufRet.i32 %199, 0
-  %201 = extractvalue %dx.types.CBufRet.i32 %199, 1
-  %202 = extractvalue %dx.types.CBufRet.i32 %199, 2
-  %203 = extractvalue %dx.types.CBufRet.i32 %199, 3
-  %204 = mul i32 %200, %197
-  %205 = call i32 @dx.op.tertiary.i32(i32 48, i32 %198, i32 %201, i32 %204)  ; IMad(a,b,c)
-  %206 = call i32 @dx.op.tertiary.i32(i32 48, i32 %193, i32 %202, i32 %205)  ; IMad(a,b,c)
-  %207 = call i32 @dx.op.tertiary.i32(i32 48, i32 %188, i32 %203, i32 %206)  ; IMad(a,b,c)
-  %208 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %207, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %209 = extractvalue %dx.types.ResRet.i32 %208, 0
-  %210 = extractvalue %dx.types.ResRet.i32 %208, 1
-  %211 = zext i32 %209 to i64
-  %212 = zext i32 %210 to i64
-  %213 = shl i64 %212, 32
-  %214 = or i64 %211, %213
-  %215 = uitofp i64 %214 to float
-  br label %344
-
-; <label>:216                                     ; preds = %183
-  %217 = icmp eq i32 %184, 1
-  br i1 %217, label %218, label %252
-
-; <label>:218                                     ; preds = %216
-  %219 = add i32 %13, -1
-  %220 = uitofp i32 %219 to float
-  %221 = call float @dx.op.binary.f32(i32 35, float %180, float 0.000000e+00)  ; FMax(a,b)
-  %222 = call float @dx.op.binary.f32(i32 36, float %221, float %220)  ; FMin(a,b)
-  %223 = fptoui float %222 to i32
-  %224 = add i32 %15, -1
-  %225 = uitofp i32 %224 to float
-  %226 = call float @dx.op.binary.f32(i32 35, float %181, float 0.000000e+00)  ; FMax(a,b)
-  %227 = call float @dx.op.binary.f32(i32 36, float %226, float %225)  ; FMin(a,b)
-  %228 = fptoui float %227 to i32
-  %229 = uitofp i32 %228 to float
-  %230 = uitofp i32 %223 to float
-  %231 = fptoui float %45 to i32
-  %232 = fptoui float %182 to i32
-  %233 = fptoui float %229 to i32
-  %234 = fptoui float %230 to i32
-  %235 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %236 = extractvalue %dx.types.CBufRet.i32 %235, 0
-  %237 = extractvalue %dx.types.CBufRet.i32 %235, 1
-  %238 = extractvalue %dx.types.CBufRet.i32 %235, 2
-  %239 = extractvalue %dx.types.CBufRet.i32 %235, 3
-  %240 = mul i32 %236, %231
-  %241 = call i32 @dx.op.tertiary.i32(i32 48, i32 %232, i32 %237, i32 %240)  ; IMad(a,b,c)
-  %242 = call i32 @dx.op.tertiary.i32(i32 48, i32 %233, i32 %238, i32 %241)  ; IMad(a,b,c)
-  %243 = call i32 @dx.op.tertiary.i32(i32 48, i32 %234, i32 %239, i32 %242)  ; IMad(a,b,c)
-  %244 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %243, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %245 = extractvalue %dx.types.ResRet.i32 %244, 0
-  %246 = extractvalue %dx.types.ResRet.i32 %244, 1
-  %247 = zext i32 %245 to i64
-  %248 = zext i32 %246 to i64
-  %249 = shl i64 %248, 32
-  %250 = or i64 %247, %249
-  %251 = uitofp i64 %250 to float
-  br label %344
-
-; <label>:252                                     ; preds = %216
-  %253 = icmp eq i32 %184, 2
-  br i1 %253, label %254, label %344
-
-; <label>:254                                     ; preds = %252
-  %255 = fsub fast float %22, %20
-  %256 = fcmp fast olt float %180, %20
-  br i1 %256, label %257, label %270
-
-; <label>:257                                     ; preds = %254
-  %258 = fsub fast float %20, %180
-  %259 = fdiv fast float %258, %255
-  %260 = fptoui float %259 to i32
-  %261 = uitofp i32 %260 to float
-  %262 = fmul fast float %261, %255
-  %263 = fsub fast float %258, %262
-  %264 = and i32 %260, 1
-  %265 = icmp eq i32 %264, 0
-  br i1 %265, label %266, label %268
-
-; <label>:266                                     ; preds = %257
-  %267 = fadd fast float %263, %20
-  br label %285
-
-; <label>:268                                     ; preds = %257
-  %269 = fsub fast float %22, %263
-  br label %285
-
-; <label>:270                                     ; preds = %254
-  %271 = fcmp fast ogt float %180, %22
-  br i1 %271, label %272, label %285
-
-; <label>:272                                     ; preds = %270
-  %273 = fsub fast float %180, %22
-  %274 = fdiv fast float %273, %255
-  %275 = fptoui float %274 to i32
-  %276 = uitofp i32 %275 to float
-  %277 = fmul fast float %276, %255
-  %278 = fsub fast float %273, %277
-  %279 = and i32 %275, 1
-  %280 = icmp eq i32 %279, 0
-  br i1 %280, label %281, label %283
-
-; <label>:281                                     ; preds = %272
-  %282 = fsub fast float %22, %278
-  br label %285
-
-; <label>:283                                     ; preds = %272
-  %284 = fadd fast float %278, %20
-  br label %285
-
-; <label>:285                                     ; preds = %283, %281, %270, %268, %266
-  %286 = phi float [ %267, %266 ], [ %269, %268 ], [ %282, %281 ], [ %284, %283 ], [ %180, %270 ]
-  %287 = fptoui float %286 to i32
-  %288 = fsub fast float %24, %20
-  %289 = fcmp fast olt float %181, %20
-  br i1 %289, label %290, label %303
-
-; <label>:290                                     ; preds = %285
-  %291 = fsub fast float %20, %181
-  %292 = fdiv fast float %291, %288
-  %293 = fptoui float %292 to i32
-  %294 = uitofp i32 %293 to float
-  %295 = fmul fast float %294, %288
-  %296 = fsub fast float %291, %295
-  %297 = and i32 %293, 1
-  %298 = icmp eq i32 %297, 0
-  br i1 %298, label %299, label %301
-
-; <label>:299                                     ; preds = %290
-  %300 = fadd fast float %296, %20
-  br label %318
-
-; <label>:301                                     ; preds = %290
-  %302 = fsub fast float %24, %296
-  br label %318
-
-; <label>:303                                     ; preds = %285
-  %304 = fcmp fast ogt float %181, %24
-  br i1 %304, label %305, label %318
-
-; <label>:305                                     ; preds = %303
-  %306 = fsub fast float %181, %24
-  %307 = fdiv fast float %306, %288
-  %308 = fptoui float %307 to i32
-  %309 = uitofp i32 %308 to float
-  %310 = fmul fast float %309, %288
-  %311 = fsub fast float %306, %310
-  %312 = and i32 %308, 1
-  %313 = icmp eq i32 %312, 0
-  br i1 %313, label %314, label %316
-
-; <label>:314                                     ; preds = %305
-  %315 = fsub fast float %24, %311
-  br label %318
-
-; <label>:316                                     ; preds = %305
-  %317 = fadd fast float %311, %20
-  br label %318
-
-; <label>:318                                     ; preds = %316, %314, %303, %301, %299
-  %319 = phi float [ %300, %299 ], [ %302, %301 ], [ %315, %314 ], [ %317, %316 ], [ %181, %303 ]
-  %320 = fptoui float %319 to i32
-  %321 = uitofp i32 %320 to float
-  %322 = uitofp i32 %287 to float
-  %323 = fptoui float %45 to i32
-  %324 = fptoui float %182 to i32
-  %325 = fptoui float %321 to i32
-  %326 = fptoui float %322 to i32
-  %327 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %328 = extractvalue %dx.types.CBufRet.i32 %327, 0
-  %329 = extractvalue %dx.types.CBufRet.i32 %327, 1
-  %330 = extractvalue %dx.types.CBufRet.i32 %327, 2
-  %331 = extractvalue %dx.types.CBufRet.i32 %327, 3
-  %332 = mul i32 %328, %323
-  %333 = call i32 @dx.op.tertiary.i32(i32 48, i32 %324, i32 %329, i32 %332)  ; IMad(a,b,c)
-  %334 = call i32 @dx.op.tertiary.i32(i32 48, i32 %325, i32 %330, i32 %333)  ; IMad(a,b,c)
-  %335 = call i32 @dx.op.tertiary.i32(i32 48, i32 %326, i32 %331, i32 %334)  ; IMad(a,b,c)
-  %336 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %335, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %337 = extractvalue %dx.types.ResRet.i32 %336, 0
-  %338 = extractvalue %dx.types.ResRet.i32 %336, 1
-  %339 = zext i32 %337 to i64
-  %340 = zext i32 %338 to i64
-  %341 = shl i64 %340, 32
-  %342 = or i64 %339, %341
-  %343 = uitofp i64 %342 to float
-  br label %344
-
-; <label>:344                                     ; preds = %318, %252, %218, %196, %186
-  %345 = phi float [ %215, %196 ], [ 0.000000e+00, %186 ], [ %251, %218 ], [ %343, %318 ], [ 0.000000e+00, %252 ]
-  %346 = fptoui float %345 to i64
-  %347 = trunc i64 %346 to i32
-  %348 = lshr i64 %346, 32
-  %349 = trunc i64 %348 to i32
-  call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i32 %347, i32 %349, i32 undef, i32 undef, i8 3, i32 8)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3647
-
-; <label>:350                                     ; preds = %179
-  %351 = icmp eq i32 %91, 0
-  br i1 %351, label %352, label %1014
-
-; <label>:352                                     ; preds = %350
-  %353 = call float @dx.op.unary.f32(i32 27, float %180)  ; Round_ni(value)
-  %354 = call float @dx.op.unary.f32(i32 27, float %181)  ; Round_ni(value)
-  %355 = fadd fast float %353, 1.000000e+00
-  %356 = fadd fast float %354, 1.000000e+00
-  %357 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %358 = icmp eq i32 %357, 0
-  br i1 %358, label %359, label %389
-
-; <label>:359                                     ; preds = %352
-  %360 = fcmp fast oge float %353, 0.000000e+00
-  %361 = fptoui float %353 to i32
-  %362 = icmp ult i32 %361, %13
-  %363 = and i1 %360, %362
-  %364 = fcmp fast oge float %354, 0.000000e+00
-  %365 = and i1 %364, %363
-  %366 = fptoui float %354 to i32
-  %367 = icmp ult i32 %366, %15
-  %368 = and i1 %367, %365
-  br i1 %368, label %369, label %517
-
-; <label>:369                                     ; preds = %359
-  %370 = fptoui float %45 to i32
-  %371 = fptoui float %182 to i32
-  %372 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %373 = extractvalue %dx.types.CBufRet.i32 %372, 0
-  %374 = extractvalue %dx.types.CBufRet.i32 %372, 1
-  %375 = extractvalue %dx.types.CBufRet.i32 %372, 2
-  %376 = extractvalue %dx.types.CBufRet.i32 %372, 3
-  %377 = mul i32 %373, %370
-  %378 = call i32 @dx.op.tertiary.i32(i32 48, i32 %371, i32 %374, i32 %377)  ; IMad(a,b,c)
-  %379 = call i32 @dx.op.tertiary.i32(i32 48, i32 %366, i32 %375, i32 %378)  ; IMad(a,b,c)
-  %380 = call i32 @dx.op.tertiary.i32(i32 48, i32 %361, i32 %376, i32 %379)  ; IMad(a,b,c)
-  %381 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %380, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %382 = extractvalue %dx.types.ResRet.i32 %381, 0
-  %383 = extractvalue %dx.types.ResRet.i32 %381, 1
-  %384 = zext i32 %382 to i64
-  %385 = zext i32 %383 to i64
-  %386 = shl i64 %385, 32
-  %387 = or i64 %384, %386
-  %388 = uitofp i64 %387 to float
-  br label %517
-
-; <label>:389                                     ; preds = %352
-  %390 = icmp eq i32 %357, 1
-  br i1 %390, label %391, label %425
-
-; <label>:391                                     ; preds = %389
-  %392 = add i32 %13, -1
-  %393 = uitofp i32 %392 to float
-  %394 = call float @dx.op.binary.f32(i32 35, float %353, float 0.000000e+00)  ; FMax(a,b)
-  %395 = call float @dx.op.binary.f32(i32 36, float %394, float %393)  ; FMin(a,b)
-  %396 = fptoui float %395 to i32
-  %397 = add i32 %15, -1
-  %398 = uitofp i32 %397 to float
-  %399 = call float @dx.op.binary.f32(i32 35, float %354, float 0.000000e+00)  ; FMax(a,b)
-  %400 = call float @dx.op.binary.f32(i32 36, float %399, float %398)  ; FMin(a,b)
-  %401 = fptoui float %400 to i32
-  %402 = uitofp i32 %401 to float
-  %403 = uitofp i32 %396 to float
-  %404 = fptoui float %45 to i32
-  %405 = fptoui float %182 to i32
-  %406 = fptoui float %402 to i32
-  %407 = fptoui float %403 to i32
-  %408 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %409 = extractvalue %dx.types.CBufRet.i32 %408, 0
-  %410 = extractvalue %dx.types.CBufRet.i32 %408, 1
-  %411 = extractvalue %dx.types.CBufRet.i32 %408, 2
-  %412 = extractvalue %dx.types.CBufRet.i32 %408, 3
-  %413 = mul i32 %409, %404
-  %414 = call i32 @dx.op.tertiary.i32(i32 48, i32 %405, i32 %410, i32 %413)  ; IMad(a,b,c)
-  %415 = call i32 @dx.op.tertiary.i32(i32 48, i32 %406, i32 %411, i32 %414)  ; IMad(a,b,c)
-  %416 = call i32 @dx.op.tertiary.i32(i32 48, i32 %407, i32 %412, i32 %415)  ; IMad(a,b,c)
-  %417 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %416, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %418 = extractvalue %dx.types.ResRet.i32 %417, 0
-  %419 = extractvalue %dx.types.ResRet.i32 %417, 1
-  %420 = zext i32 %418 to i64
-  %421 = zext i32 %419 to i64
-  %422 = shl i64 %421, 32
-  %423 = or i64 %420, %422
-  %424 = uitofp i64 %423 to float
-  br label %517
-
-; <label>:425                                     ; preds = %389
-  %426 = icmp eq i32 %357, 2
-  br i1 %426, label %427, label %517
-
-; <label>:427                                     ; preds = %425
-  %428 = fsub fast float %22, %20
-  %429 = fcmp fast olt float %353, %20
-  br i1 %429, label %430, label %443
-
-; <label>:430                                     ; preds = %427
-  %431 = fsub fast float %20, %353
-  %432 = fdiv fast float %431, %428
-  %433 = fptoui float %432 to i32
-  %434 = uitofp i32 %433 to float
-  %435 = fmul fast float %434, %428
-  %436 = fsub fast float %431, %435
-  %437 = and i32 %433, 1
-  %438 = icmp eq i32 %437, 0
-  br i1 %438, label %439, label %441
-
-; <label>:439                                     ; preds = %430
-  %440 = fadd fast float %436, %20
-  br label %458
-
-; <label>:441                                     ; preds = %430
-  %442 = fsub fast float %22, %436
-  br label %458
-
-; <label>:443                                     ; preds = %427
-  %444 = fcmp fast ogt float %353, %22
-  br i1 %444, label %445, label %458
-
-; <label>:445                                     ; preds = %443
-  %446 = fsub fast float %353, %22
-  %447 = fdiv fast float %446, %428
-  %448 = fptoui float %447 to i32
-  %449 = uitofp i32 %448 to float
-  %450 = fmul fast float %449, %428
-  %451 = fsub fast float %446, %450
-  %452 = and i32 %448, 1
-  %453 = icmp eq i32 %452, 0
-  br i1 %453, label %454, label %456
-
-; <label>:454                                     ; preds = %445
-  %455 = fsub fast float %22, %451
-  br label %458
-
-; <label>:456                                     ; preds = %445
-  %457 = fadd fast float %451, %20
-  br label %458
-
-; <label>:458                                     ; preds = %456, %454, %443, %441, %439
-  %459 = phi float [ %440, %439 ], [ %442, %441 ], [ %455, %454 ], [ %457, %456 ], [ %353, %443 ]
-  %460 = fptoui float %459 to i32
-  %461 = fsub fast float %24, %20
-  %462 = fcmp fast olt float %354, %20
-  br i1 %462, label %463, label %476
-
-; <label>:463                                     ; preds = %458
-  %464 = fsub fast float %20, %354
-  %465 = fdiv fast float %464, %461
-  %466 = fptoui float %465 to i32
-  %467 = uitofp i32 %466 to float
-  %468 = fmul fast float %467, %461
-  %469 = fsub fast float %464, %468
-  %470 = and i32 %466, 1
-  %471 = icmp eq i32 %470, 0
-  br i1 %471, label %472, label %474
-
-; <label>:472                                     ; preds = %463
-  %473 = fadd fast float %469, %20
-  br label %491
-
-; <label>:474                                     ; preds = %463
-  %475 = fsub fast float %24, %469
-  br label %491
-
-; <label>:476                                     ; preds = %458
-  %477 = fcmp fast ogt float %354, %24
-  br i1 %477, label %478, label %491
-
-; <label>:478                                     ; preds = %476
-  %479 = fsub fast float %354, %24
-  %480 = fdiv fast float %479, %461
-  %481 = fptoui float %480 to i32
-  %482 = uitofp i32 %481 to float
-  %483 = fmul fast float %482, %461
-  %484 = fsub fast float %479, %483
-  %485 = and i32 %481, 1
-  %486 = icmp eq i32 %485, 0
-  br i1 %486, label %487, label %489
-
-; <label>:487                                     ; preds = %478
-  %488 = fsub fast float %24, %484
-  br label %491
-
-; <label>:489                                     ; preds = %478
-  %490 = fadd fast float %484, %20
-  br label %491
-
-; <label>:491                                     ; preds = %489, %487, %476, %474, %472
-  %492 = phi float [ %473, %472 ], [ %475, %474 ], [ %488, %487 ], [ %490, %489 ], [ %354, %476 ]
-  %493 = fptoui float %492 to i32
-  %494 = uitofp i32 %493 to float
-  %495 = uitofp i32 %460 to float
-  %496 = fptoui float %45 to i32
-  %497 = fptoui float %182 to i32
-  %498 = fptoui float %494 to i32
-  %499 = fptoui float %495 to i32
-  %500 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %501 = extractvalue %dx.types.CBufRet.i32 %500, 0
-  %502 = extractvalue %dx.types.CBufRet.i32 %500, 1
-  %503 = extractvalue %dx.types.CBufRet.i32 %500, 2
-  %504 = extractvalue %dx.types.CBufRet.i32 %500, 3
-  %505 = mul i32 %501, %496
-  %506 = call i32 @dx.op.tertiary.i32(i32 48, i32 %497, i32 %502, i32 %505)  ; IMad(a,b,c)
-  %507 = call i32 @dx.op.tertiary.i32(i32 48, i32 %498, i32 %503, i32 %506)  ; IMad(a,b,c)
-  %508 = call i32 @dx.op.tertiary.i32(i32 48, i32 %499, i32 %504, i32 %507)  ; IMad(a,b,c)
-  %509 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %508, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %510 = extractvalue %dx.types.ResRet.i32 %509, 0
-  %511 = extractvalue %dx.types.ResRet.i32 %509, 1
-  %512 = zext i32 %510 to i64
-  %513 = zext i32 %511 to i64
-  %514 = shl i64 %513, 32
-  %515 = or i64 %512, %514
-  %516 = uitofp i64 %515 to float
-  br label %517
-
-; <label>:517                                     ; preds = %491, %425, %391, %369, %359
-  %518 = phi float [ %388, %369 ], [ 0.000000e+00, %359 ], [ %424, %391 ], [ %516, %491 ], [ 0.000000e+00, %425 ]
-  br i1 %358, label %519, label %549
-
-; <label>:519                                     ; preds = %517
-  %520 = fcmp fast oge float %355, 0.000000e+00
-  %521 = fptoui float %355 to i32
-  %522 = icmp ult i32 %521, %13
-  %523 = and i1 %520, %522
-  %524 = fcmp fast oge float %354, 0.000000e+00
-  %525 = and i1 %524, %523
-  %526 = fptoui float %354 to i32
-  %527 = icmp ult i32 %526, %15
-  %528 = and i1 %527, %525
-  br i1 %528, label %529, label %677
-
-; <label>:529                                     ; preds = %519
-  %530 = fptoui float %45 to i32
-  %531 = fptoui float %182 to i32
-  %532 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %533 = extractvalue %dx.types.CBufRet.i32 %532, 0
-  %534 = extractvalue %dx.types.CBufRet.i32 %532, 1
-  %535 = extractvalue %dx.types.CBufRet.i32 %532, 2
-  %536 = extractvalue %dx.types.CBufRet.i32 %532, 3
-  %537 = mul i32 %533, %530
-  %538 = call i32 @dx.op.tertiary.i32(i32 48, i32 %531, i32 %534, i32 %537)  ; IMad(a,b,c)
-  %539 = call i32 @dx.op.tertiary.i32(i32 48, i32 %526, i32 %535, i32 %538)  ; IMad(a,b,c)
-  %540 = call i32 @dx.op.tertiary.i32(i32 48, i32 %521, i32 %536, i32 %539)  ; IMad(a,b,c)
-  %541 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %540, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %542 = extractvalue %dx.types.ResRet.i32 %541, 0
-  %543 = extractvalue %dx.types.ResRet.i32 %541, 1
-  %544 = zext i32 %542 to i64
-  %545 = zext i32 %543 to i64
-  %546 = shl i64 %545, 32
-  %547 = or i64 %544, %546
-  %548 = uitofp i64 %547 to float
-  br label %677
-
-; <label>:549                                     ; preds = %517
-  %550 = icmp eq i32 %357, 1
-  br i1 %550, label %551, label %585
-
-; <label>:551                                     ; preds = %549
-  %552 = add i32 %13, -1
-  %553 = uitofp i32 %552 to float
-  %554 = call float @dx.op.binary.f32(i32 35, float %355, float 0.000000e+00)  ; FMax(a,b)
-  %555 = call float @dx.op.binary.f32(i32 36, float %554, float %553)  ; FMin(a,b)
-  %556 = fptoui float %555 to i32
-  %557 = add i32 %15, -1
-  %558 = uitofp i32 %557 to float
-  %559 = call float @dx.op.binary.f32(i32 35, float %354, float 0.000000e+00)  ; FMax(a,b)
-  %560 = call float @dx.op.binary.f32(i32 36, float %559, float %558)  ; FMin(a,b)
-  %561 = fptoui float %560 to i32
-  %562 = uitofp i32 %561 to float
-  %563 = uitofp i32 %556 to float
-  %564 = fptoui float %45 to i32
-  %565 = fptoui float %182 to i32
-  %566 = fptoui float %562 to i32
-  %567 = fptoui float %563 to i32
-  %568 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %569 = extractvalue %dx.types.CBufRet.i32 %568, 0
-  %570 = extractvalue %dx.types.CBufRet.i32 %568, 1
-  %571 = extractvalue %dx.types.CBufRet.i32 %568, 2
-  %572 = extractvalue %dx.types.CBufRet.i32 %568, 3
-  %573 = mul i32 %569, %564
-  %574 = call i32 @dx.op.tertiary.i32(i32 48, i32 %565, i32 %570, i32 %573)  ; IMad(a,b,c)
-  %575 = call i32 @dx.op.tertiary.i32(i32 48, i32 %566, i32 %571, i32 %574)  ; IMad(a,b,c)
-  %576 = call i32 @dx.op.tertiary.i32(i32 48, i32 %567, i32 %572, i32 %575)  ; IMad(a,b,c)
-  %577 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %576, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %578 = extractvalue %dx.types.ResRet.i32 %577, 0
-  %579 = extractvalue %dx.types.ResRet.i32 %577, 1
-  %580 = zext i32 %578 to i64
-  %581 = zext i32 %579 to i64
-  %582 = shl i64 %581, 32
-  %583 = or i64 %580, %582
-  %584 = uitofp i64 %583 to float
-  br label %677
-
-; <label>:585                                     ; preds = %549
-  %586 = icmp eq i32 %357, 2
-  br i1 %586, label %587, label %677
-
-; <label>:587                                     ; preds = %585
-  %588 = fsub fast float %22, %20
-  %589 = fcmp fast olt float %355, %20
-  br i1 %589, label %590, label %603
-
-; <label>:590                                     ; preds = %587
-  %591 = fsub fast float %20, %355
-  %592 = fdiv fast float %591, %588
-  %593 = fptoui float %592 to i32
-  %594 = uitofp i32 %593 to float
-  %595 = fmul fast float %594, %588
-  %596 = fsub fast float %591, %595
-  %597 = and i32 %593, 1
-  %598 = icmp eq i32 %597, 0
-  br i1 %598, label %599, label %601
-
-; <label>:599                                     ; preds = %590
-  %600 = fadd fast float %596, %20
-  br label %618
-
-; <label>:601                                     ; preds = %590
-  %602 = fsub fast float %22, %596
-  br label %618
-
-; <label>:603                                     ; preds = %587
-  %604 = fcmp fast ogt float %355, %22
-  br i1 %604, label %605, label %618
-
-; <label>:605                                     ; preds = %603
-  %606 = fsub fast float %355, %22
-  %607 = fdiv fast float %606, %588
-  %608 = fptoui float %607 to i32
-  %609 = uitofp i32 %608 to float
-  %610 = fmul fast float %609, %588
-  %611 = fsub fast float %606, %610
-  %612 = and i32 %608, 1
-  %613 = icmp eq i32 %612, 0
-  br i1 %613, label %614, label %616
-
-; <label>:614                                     ; preds = %605
-  %615 = fsub fast float %22, %611
-  br label %618
-
-; <label>:616                                     ; preds = %605
-  %617 = fadd fast float %611, %20
-  br label %618
-
-; <label>:618                                     ; preds = %616, %614, %603, %601, %599
-  %619 = phi float [ %600, %599 ], [ %602, %601 ], [ %615, %614 ], [ %617, %616 ], [ %355, %603 ]
-  %620 = fptoui float %619 to i32
-  %621 = fsub fast float %24, %20
-  %622 = fcmp fast olt float %354, %20
-  br i1 %622, label %623, label %636
-
-; <label>:623                                     ; preds = %618
-  %624 = fsub fast float %20, %354
-  %625 = fdiv fast float %624, %621
-  %626 = fptoui float %625 to i32
-  %627 = uitofp i32 %626 to float
-  %628 = fmul fast float %627, %621
-  %629 = fsub fast float %624, %628
-  %630 = and i32 %626, 1
-  %631 = icmp eq i32 %630, 0
-  br i1 %631, label %632, label %634
-
-; <label>:632                                     ; preds = %623
-  %633 = fadd fast float %629, %20
-  br label %651
-
-; <label>:634                                     ; preds = %623
-  %635 = fsub fast float %24, %629
-  br label %651
-
-; <label>:636                                     ; preds = %618
-  %637 = fcmp fast ogt float %354, %24
-  br i1 %637, label %638, label %651
-
-; <label>:638                                     ; preds = %636
-  %639 = fsub fast float %354, %24
-  %640 = fdiv fast float %639, %621
-  %641 = fptoui float %640 to i32
-  %642 = uitofp i32 %641 to float
-  %643 = fmul fast float %642, %621
-  %644 = fsub fast float %639, %643
-  %645 = and i32 %641, 1
-  %646 = icmp eq i32 %645, 0
-  br i1 %646, label %647, label %649
-
-; <label>:647                                     ; preds = %638
-  %648 = fsub fast float %24, %644
-  br label %651
-
-; <label>:649                                     ; preds = %638
-  %650 = fadd fast float %644, %20
-  br label %651
-
-; <label>:651                                     ; preds = %649, %647, %636, %634, %632
-  %652 = phi float [ %633, %632 ], [ %635, %634 ], [ %648, %647 ], [ %650, %649 ], [ %354, %636 ]
-  %653 = fptoui float %652 to i32
-  %654 = uitofp i32 %653 to float
-  %655 = uitofp i32 %620 to float
-  %656 = fptoui float %45 to i32
-  %657 = fptoui float %182 to i32
-  %658 = fptoui float %654 to i32
-  %659 = fptoui float %655 to i32
-  %660 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %661 = extractvalue %dx.types.CBufRet.i32 %660, 0
-  %662 = extractvalue %dx.types.CBufRet.i32 %660, 1
-  %663 = extractvalue %dx.types.CBufRet.i32 %660, 2
-  %664 = extractvalue %dx.types.CBufRet.i32 %660, 3
-  %665 = mul i32 %661, %656
-  %666 = call i32 @dx.op.tertiary.i32(i32 48, i32 %657, i32 %662, i32 %665)  ; IMad(a,b,c)
-  %667 = call i32 @dx.op.tertiary.i32(i32 48, i32 %658, i32 %663, i32 %666)  ; IMad(a,b,c)
-  %668 = call i32 @dx.op.tertiary.i32(i32 48, i32 %659, i32 %664, i32 %667)  ; IMad(a,b,c)
-  %669 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %668, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %670 = extractvalue %dx.types.ResRet.i32 %669, 0
-  %671 = extractvalue %dx.types.ResRet.i32 %669, 1
-  %672 = zext i32 %670 to i64
-  %673 = zext i32 %671 to i64
-  %674 = shl i64 %673, 32
-  %675 = or i64 %672, %674
-  %676 = uitofp i64 %675 to float
-  br label %677
-
-; <label>:677                                     ; preds = %651, %585, %551, %529, %519
-  %678 = phi float [ %548, %529 ], [ 0.000000e+00, %519 ], [ %584, %551 ], [ %676, %651 ], [ 0.000000e+00, %585 ]
-  br i1 %358, label %679, label %709
-
-; <label>:679                                     ; preds = %677
-  %680 = fcmp fast oge float %353, 0.000000e+00
-  %681 = fptoui float %353 to i32
-  %682 = icmp ult i32 %681, %13
-  %683 = and i1 %680, %682
-  %684 = fcmp fast oge float %356, 0.000000e+00
-  %685 = and i1 %684, %683
-  %686 = fptoui float %356 to i32
-  %687 = icmp ult i32 %686, %15
-  %688 = and i1 %687, %685
-  br i1 %688, label %689, label %837
-
-; <label>:689                                     ; preds = %679
-  %690 = fptoui float %45 to i32
-  %691 = fptoui float %182 to i32
-  %692 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %693 = extractvalue %dx.types.CBufRet.i32 %692, 0
-  %694 = extractvalue %dx.types.CBufRet.i32 %692, 1
-  %695 = extractvalue %dx.types.CBufRet.i32 %692, 2
-  %696 = extractvalue %dx.types.CBufRet.i32 %692, 3
-  %697 = mul i32 %693, %690
-  %698 = call i32 @dx.op.tertiary.i32(i32 48, i32 %691, i32 %694, i32 %697)  ; IMad(a,b,c)
-  %699 = call i32 @dx.op.tertiary.i32(i32 48, i32 %686, i32 %695, i32 %698)  ; IMad(a,b,c)
-  %700 = call i32 @dx.op.tertiary.i32(i32 48, i32 %681, i32 %696, i32 %699)  ; IMad(a,b,c)
-  %701 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %700, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %702 = extractvalue %dx.types.ResRet.i32 %701, 0
-  %703 = extractvalue %dx.types.ResRet.i32 %701, 1
-  %704 = zext i32 %702 to i64
-  %705 = zext i32 %703 to i64
-  %706 = shl i64 %705, 32
-  %707 = or i64 %704, %706
-  %708 = uitofp i64 %707 to float
-  br label %837
-
-; <label>:709                                     ; preds = %677
-  %710 = icmp eq i32 %357, 1
-  br i1 %710, label %711, label %745
-
-; <label>:711                                     ; preds = %709
-  %712 = add i32 %13, -1
-  %713 = uitofp i32 %712 to float
-  %714 = call float @dx.op.binary.f32(i32 35, float %353, float 0.000000e+00)  ; FMax(a,b)
-  %715 = call float @dx.op.binary.f32(i32 36, float %714, float %713)  ; FMin(a,b)
-  %716 = fptoui float %715 to i32
-  %717 = add i32 %15, -1
-  %718 = uitofp i32 %717 to float
-  %719 = call float @dx.op.binary.f32(i32 35, float %356, float 0.000000e+00)  ; FMax(a,b)
-  %720 = call float @dx.op.binary.f32(i32 36, float %719, float %718)  ; FMin(a,b)
-  %721 = fptoui float %720 to i32
-  %722 = uitofp i32 %721 to float
-  %723 = uitofp i32 %716 to float
-  %724 = fptoui float %45 to i32
-  %725 = fptoui float %182 to i32
-  %726 = fptoui float %722 to i32
-  %727 = fptoui float %723 to i32
-  %728 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %729 = extractvalue %dx.types.CBufRet.i32 %728, 0
-  %730 = extractvalue %dx.types.CBufRet.i32 %728, 1
-  %731 = extractvalue %dx.types.CBufRet.i32 %728, 2
-  %732 = extractvalue %dx.types.CBufRet.i32 %728, 3
-  %733 = mul i32 %729, %724
-  %734 = call i32 @dx.op.tertiary.i32(i32 48, i32 %725, i32 %730, i32 %733)  ; IMad(a,b,c)
-  %735 = call i32 @dx.op.tertiary.i32(i32 48, i32 %726, i32 %731, i32 %734)  ; IMad(a,b,c)
-  %736 = call i32 @dx.op.tertiary.i32(i32 48, i32 %727, i32 %732, i32 %735)  ; IMad(a,b,c)
-  %737 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %736, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %738 = extractvalue %dx.types.ResRet.i32 %737, 0
-  %739 = extractvalue %dx.types.ResRet.i32 %737, 1
-  %740 = zext i32 %738 to i64
-  %741 = zext i32 %739 to i64
-  %742 = shl i64 %741, 32
-  %743 = or i64 %740, %742
-  %744 = uitofp i64 %743 to float
-  br label %837
-
-; <label>:745                                     ; preds = %709
-  %746 = icmp eq i32 %357, 2
-  br i1 %746, label %747, label %837
-
-; <label>:747                                     ; preds = %745
-  %748 = fsub fast float %22, %20
-  %749 = fcmp fast olt float %353, %20
-  br i1 %749, label %750, label %763
-
-; <label>:750                                     ; preds = %747
-  %751 = fsub fast float %20, %353
-  %752 = fdiv fast float %751, %748
-  %753 = fptoui float %752 to i32
-  %754 = uitofp i32 %753 to float
-  %755 = fmul fast float %754, %748
-  %756 = fsub fast float %751, %755
-  %757 = and i32 %753, 1
-  %758 = icmp eq i32 %757, 0
-  br i1 %758, label %759, label %761
-
-; <label>:759                                     ; preds = %750
-  %760 = fadd fast float %756, %20
-  br label %778
-
-; <label>:761                                     ; preds = %750
-  %762 = fsub fast float %22, %756
-  br label %778
-
-; <label>:763                                     ; preds = %747
-  %764 = fcmp fast ogt float %353, %22
-  br i1 %764, label %765, label %778
-
-; <label>:765                                     ; preds = %763
-  %766 = fsub fast float %353, %22
-  %767 = fdiv fast float %766, %748
-  %768 = fptoui float %767 to i32
-  %769 = uitofp i32 %768 to float
-  %770 = fmul fast float %769, %748
-  %771 = fsub fast float %766, %770
-  %772 = and i32 %768, 1
-  %773 = icmp eq i32 %772, 0
-  br i1 %773, label %774, label %776
-
-; <label>:774                                     ; preds = %765
-  %775 = fsub fast float %22, %771
-  br label %778
-
-; <label>:776                                     ; preds = %765
-  %777 = fadd fast float %771, %20
-  br label %778
-
-; <label>:778                                     ; preds = %776, %774, %763, %761, %759
-  %779 = phi float [ %760, %759 ], [ %762, %761 ], [ %775, %774 ], [ %777, %776 ], [ %353, %763 ]
-  %780 = fptoui float %779 to i32
-  %781 = fsub fast float %24, %20
-  %782 = fcmp fast olt float %356, %20
-  br i1 %782, label %783, label %796
-
-; <label>:783                                     ; preds = %778
-  %784 = fsub fast float %20, %356
-  %785 = fdiv fast float %784, %781
-  %786 = fptoui float %785 to i32
-  %787 = uitofp i32 %786 to float
-  %788 = fmul fast float %787, %781
-  %789 = fsub fast float %784, %788
-  %790 = and i32 %786, 1
-  %791 = icmp eq i32 %790, 0
-  br i1 %791, label %792, label %794
-
-; <label>:792                                     ; preds = %783
-  %793 = fadd fast float %789, %20
-  br label %811
-
-; <label>:794                                     ; preds = %783
-  %795 = fsub fast float %24, %789
-  br label %811
-
-; <label>:796                                     ; preds = %778
-  %797 = fcmp fast ogt float %356, %24
-  br i1 %797, label %798, label %811
-
-; <label>:798                                     ; preds = %796
-  %799 = fsub fast float %356, %24
-  %800 = fdiv fast float %799, %781
-  %801 = fptoui float %800 to i32
-  %802 = uitofp i32 %801 to float
-  %803 = fmul fast float %802, %781
-  %804 = fsub fast float %799, %803
-  %805 = and i32 %801, 1
-  %806 = icmp eq i32 %805, 0
-  br i1 %806, label %807, label %809
-
-; <label>:807                                     ; preds = %798
-  %808 = fsub fast float %24, %804
-  br label %811
-
-; <label>:809                                     ; preds = %798
-  %810 = fadd fast float %804, %20
-  br label %811
-
-; <label>:811                                     ; preds = %809, %807, %796, %794, %792
-  %812 = phi float [ %793, %792 ], [ %795, %794 ], [ %808, %807 ], [ %810, %809 ], [ %356, %796 ]
-  %813 = fptoui float %812 to i32
-  %814 = uitofp i32 %813 to float
-  %815 = uitofp i32 %780 to float
-  %816 = fptoui float %45 to i32
-  %817 = fptoui float %182 to i32
-  %818 = fptoui float %814 to i32
-  %819 = fptoui float %815 to i32
-  %820 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %821 = extractvalue %dx.types.CBufRet.i32 %820, 0
-  %822 = extractvalue %dx.types.CBufRet.i32 %820, 1
-  %823 = extractvalue %dx.types.CBufRet.i32 %820, 2
-  %824 = extractvalue %dx.types.CBufRet.i32 %820, 3
-  %825 = mul i32 %821, %816
-  %826 = call i32 @dx.op.tertiary.i32(i32 48, i32 %817, i32 %822, i32 %825)  ; IMad(a,b,c)
-  %827 = call i32 @dx.op.tertiary.i32(i32 48, i32 %818, i32 %823, i32 %826)  ; IMad(a,b,c)
-  %828 = call i32 @dx.op.tertiary.i32(i32 48, i32 %819, i32 %824, i32 %827)  ; IMad(a,b,c)
-  %829 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %828, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %830 = extractvalue %dx.types.ResRet.i32 %829, 0
-  %831 = extractvalue %dx.types.ResRet.i32 %829, 1
-  %832 = zext i32 %830 to i64
-  %833 = zext i32 %831 to i64
-  %834 = shl i64 %833, 32
-  %835 = or i64 %832, %834
-  %836 = uitofp i64 %835 to float
-  br label %837
-
-; <label>:837                                     ; preds = %811, %745, %711, %689, %679
-  %838 = phi float [ %708, %689 ], [ 0.000000e+00, %679 ], [ %744, %711 ], [ %836, %811 ], [ 0.000000e+00, %745 ]
-  br i1 %358, label %839, label %869
-
-; <label>:839                                     ; preds = %837
-  %840 = fcmp fast oge float %355, 0.000000e+00
-  %841 = fptoui float %355 to i32
-  %842 = icmp ult i32 %841, %13
-  %843 = and i1 %840, %842
-  %844 = fcmp fast oge float %356, 0.000000e+00
-  %845 = and i1 %844, %843
-  %846 = fptoui float %356 to i32
-  %847 = icmp ult i32 %846, %15
-  %848 = and i1 %847, %845
-  br i1 %848, label %849, label %997
-
-; <label>:849                                     ; preds = %839
-  %850 = fptoui float %45 to i32
-  %851 = fptoui float %182 to i32
-  %852 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %853 = extractvalue %dx.types.CBufRet.i32 %852, 0
-  %854 = extractvalue %dx.types.CBufRet.i32 %852, 1
-  %855 = extractvalue %dx.types.CBufRet.i32 %852, 2
-  %856 = extractvalue %dx.types.CBufRet.i32 %852, 3
-  %857 = mul i32 %853, %850
-  %858 = call i32 @dx.op.tertiary.i32(i32 48, i32 %851, i32 %854, i32 %857)  ; IMad(a,b,c)
-  %859 = call i32 @dx.op.tertiary.i32(i32 48, i32 %846, i32 %855, i32 %858)  ; IMad(a,b,c)
-  %860 = call i32 @dx.op.tertiary.i32(i32 48, i32 %841, i32 %856, i32 %859)  ; IMad(a,b,c)
-  %861 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %860, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %862 = extractvalue %dx.types.ResRet.i32 %861, 0
-  %863 = extractvalue %dx.types.ResRet.i32 %861, 1
-  %864 = zext i32 %862 to i64
-  %865 = zext i32 %863 to i64
-  %866 = shl i64 %865, 32
-  %867 = or i64 %864, %866
-  %868 = uitofp i64 %867 to float
-  br label %997
-
-; <label>:869                                     ; preds = %837
-  %870 = icmp eq i32 %357, 1
-  br i1 %870, label %871, label %905
-
-; <label>:871                                     ; preds = %869
-  %872 = add i32 %13, -1
-  %873 = uitofp i32 %872 to float
-  %874 = call float @dx.op.binary.f32(i32 35, float %355, float 0.000000e+00)  ; FMax(a,b)
-  %875 = call float @dx.op.binary.f32(i32 36, float %874, float %873)  ; FMin(a,b)
-  %876 = fptoui float %875 to i32
-  %877 = add i32 %15, -1
-  %878 = uitofp i32 %877 to float
-  %879 = call float @dx.op.binary.f32(i32 35, float %356, float 0.000000e+00)  ; FMax(a,b)
-  %880 = call float @dx.op.binary.f32(i32 36, float %879, float %878)  ; FMin(a,b)
-  %881 = fptoui float %880 to i32
-  %882 = uitofp i32 %881 to float
-  %883 = uitofp i32 %876 to float
-  %884 = fptoui float %45 to i32
-  %885 = fptoui float %182 to i32
-  %886 = fptoui float %882 to i32
-  %887 = fptoui float %883 to i32
-  %888 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %889 = extractvalue %dx.types.CBufRet.i32 %888, 0
-  %890 = extractvalue %dx.types.CBufRet.i32 %888, 1
-  %891 = extractvalue %dx.types.CBufRet.i32 %888, 2
-  %892 = extractvalue %dx.types.CBufRet.i32 %888, 3
-  %893 = mul i32 %889, %884
-  %894 = call i32 @dx.op.tertiary.i32(i32 48, i32 %885, i32 %890, i32 %893)  ; IMad(a,b,c)
-  %895 = call i32 @dx.op.tertiary.i32(i32 48, i32 %886, i32 %891, i32 %894)  ; IMad(a,b,c)
-  %896 = call i32 @dx.op.tertiary.i32(i32 48, i32 %887, i32 %892, i32 %895)  ; IMad(a,b,c)
-  %897 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %896, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %898 = extractvalue %dx.types.ResRet.i32 %897, 0
-  %899 = extractvalue %dx.types.ResRet.i32 %897, 1
-  %900 = zext i32 %898 to i64
-  %901 = zext i32 %899 to i64
-  %902 = shl i64 %901, 32
-  %903 = or i64 %900, %902
-  %904 = uitofp i64 %903 to float
-  br label %997
-
-; <label>:905                                     ; preds = %869
-  %906 = icmp eq i32 %357, 2
-  br i1 %906, label %907, label %997
-
-; <label>:907                                     ; preds = %905
-  %908 = fsub fast float %22, %20
-  %909 = fcmp fast olt float %355, %20
-  br i1 %909, label %910, label %923
-
-; <label>:910                                     ; preds = %907
-  %911 = fsub fast float %20, %355
-  %912 = fdiv fast float %911, %908
-  %913 = fptoui float %912 to i32
-  %914 = uitofp i32 %913 to float
-  %915 = fmul fast float %914, %908
-  %916 = fsub fast float %911, %915
-  %917 = and i32 %913, 1
-  %918 = icmp eq i32 %917, 0
-  br i1 %918, label %919, label %921
-
-; <label>:919                                     ; preds = %910
-  %920 = fadd fast float %916, %20
-  br label %938
-
-; <label>:921                                     ; preds = %910
-  %922 = fsub fast float %22, %916
-  br label %938
-
-; <label>:923                                     ; preds = %907
-  %924 = fcmp fast ogt float %355, %22
-  br i1 %924, label %925, label %938
-
-; <label>:925                                     ; preds = %923
-  %926 = fsub fast float %355, %22
-  %927 = fdiv fast float %926, %908
-  %928 = fptoui float %927 to i32
-  %929 = uitofp i32 %928 to float
-  %930 = fmul fast float %929, %908
-  %931 = fsub fast float %926, %930
-  %932 = and i32 %928, 1
-  %933 = icmp eq i32 %932, 0
-  br i1 %933, label %934, label %936
-
-; <label>:934                                     ; preds = %925
-  %935 = fsub fast float %22, %931
-  br label %938
-
-; <label>:936                                     ; preds = %925
-  %937 = fadd fast float %931, %20
-  br label %938
-
-; <label>:938                                     ; preds = %936, %934, %923, %921, %919
-  %939 = phi float [ %920, %919 ], [ %922, %921 ], [ %935, %934 ], [ %937, %936 ], [ %355, %923 ]
-  %940 = fptoui float %939 to i32
-  %941 = fsub fast float %24, %20
-  %942 = fcmp fast olt float %356, %20
-  br i1 %942, label %943, label %956
-
-; <label>:943                                     ; preds = %938
-  %944 = fsub fast float %20, %356
-  %945 = fdiv fast float %944, %941
-  %946 = fptoui float %945 to i32
-  %947 = uitofp i32 %946 to float
-  %948 = fmul fast float %947, %941
-  %949 = fsub fast float %944, %948
-  %950 = and i32 %946, 1
-  %951 = icmp eq i32 %950, 0
-  br i1 %951, label %952, label %954
-
-; <label>:952                                     ; preds = %943
-  %953 = fadd fast float %949, %20
-  br label %971
-
-; <label>:954                                     ; preds = %943
-  %955 = fsub fast float %24, %949
-  br label %971
-
-; <label>:956                                     ; preds = %938
-  %957 = fcmp fast ogt float %356, %24
-  br i1 %957, label %958, label %971
-
-; <label>:958                                     ; preds = %956
-  %959 = fsub fast float %356, %24
-  %960 = fdiv fast float %959, %941
-  %961 = fptoui float %960 to i32
-  %962 = uitofp i32 %961 to float
-  %963 = fmul fast float %962, %941
-  %964 = fsub fast float %959, %963
-  %965 = and i32 %961, 1
-  %966 = icmp eq i32 %965, 0
-  br i1 %966, label %967, label %969
-
-; <label>:967                                     ; preds = %958
-  %968 = fsub fast float %24, %964
-  br label %971
-
-; <label>:969                                     ; preds = %958
-  %970 = fadd fast float %964, %20
-  br label %971
-
-; <label>:971                                     ; preds = %969, %967, %956, %954, %952
-  %972 = phi float [ %953, %952 ], [ %955, %954 ], [ %968, %967 ], [ %970, %969 ], [ %356, %956 ]
-  %973 = fptoui float %972 to i32
-  %974 = uitofp i32 %973 to float
-  %975 = uitofp i32 %940 to float
-  %976 = fptoui float %45 to i32
-  %977 = fptoui float %182 to i32
-  %978 = fptoui float %974 to i32
-  %979 = fptoui float %975 to i32
-  %980 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %981 = extractvalue %dx.types.CBufRet.i32 %980, 0
-  %982 = extractvalue %dx.types.CBufRet.i32 %980, 1
-  %983 = extractvalue %dx.types.CBufRet.i32 %980, 2
-  %984 = extractvalue %dx.types.CBufRet.i32 %980, 3
-  %985 = mul i32 %981, %976
-  %986 = call i32 @dx.op.tertiary.i32(i32 48, i32 %977, i32 %982, i32 %985)  ; IMad(a,b,c)
-  %987 = call i32 @dx.op.tertiary.i32(i32 48, i32 %978, i32 %983, i32 %986)  ; IMad(a,b,c)
-  %988 = call i32 @dx.op.tertiary.i32(i32 48, i32 %979, i32 %984, i32 %987)  ; IMad(a,b,c)
-  %989 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %988, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %990 = extractvalue %dx.types.ResRet.i32 %989, 0
-  %991 = extractvalue %dx.types.ResRet.i32 %989, 1
-  %992 = zext i32 %990 to i64
-  %993 = zext i32 %991 to i64
-  %994 = shl i64 %993, 32
-  %995 = or i64 %992, %994
-  %996 = uitofp i64 %995 to float
-  br label %997
-
-; <label>:997                                     ; preds = %971, %905, %871, %849, %839
-  %998 = phi float [ %868, %849 ], [ 0.000000e+00, %839 ], [ %904, %871 ], [ %996, %971 ], [ 0.000000e+00, %905 ]
-  %999 = call float @dx.op.unary.f32(i32 22, float %180)  ; Frc(value)
-  %1000 = fsub fast float %678, %518
-  %1001 = fmul fast float %999, %1000
-  %1002 = fadd fast float %1001, %518
-  %1003 = fsub fast float %998, %838
-  %1004 = fmul fast float %999, %1003
-  %1005 = fadd fast float %1004, %838
-  %1006 = call float @dx.op.unary.f32(i32 22, float %181)  ; Frc(value)
-  %1007 = fsub fast float %1005, %1002
-  %1008 = fmul fast float %1007, %1006
-  %1009 = fadd fast float %1008, %1002
-  %1010 = fptoui float %1009 to i64
-  %1011 = trunc i64 %1010 to i32
-  %1012 = lshr i64 %1010, 32
-  %1013 = trunc i64 %1012 to i32
-  call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i32 %1011, i32 %1013, i32 undef, i32 undef, i8 3, i32 8)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3647
-
-; <label>:1014                                    ; preds = %350
-  %1015 = icmp eq i32 %91, 2
-  br i1 %1015, label %1016, label %3647
-
-; <label>:1016                                    ; preds = %1014
-  %1017 = call float @dx.op.unary.f32(i32 27, float %180)  ; Round_ni(value)
-  %1018 = fadd fast float %1017, -1.000000e+00
-  %1019 = call float @dx.op.unary.f32(i32 27, float %181)  ; Round_ni(value)
-  %1020 = fadd fast float %1019, -1.000000e+00
-  %1021 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %1022 = icmp eq i32 %1021, 0
-  br i1 %1022, label %1023, label %1053
-
-; <label>:1023                                    ; preds = %1016
-  %1024 = fcmp fast oge float %1018, 0.000000e+00
-  %1025 = fptoui float %1018 to i32
-  %1026 = icmp ult i32 %1025, %13
-  %1027 = and i1 %1024, %1026
-  %1028 = fcmp fast oge float %1020, 0.000000e+00
-  %1029 = and i1 %1028, %1027
-  %1030 = fptoui float %1020 to i32
-  %1031 = icmp ult i32 %1030, %15
-  %1032 = and i1 %1031, %1029
-  br i1 %1032, label %1033, label %1181
-
-; <label>:1033                                    ; preds = %1023
-  %1034 = fptoui float %45 to i32
-  %1035 = fptoui float %182 to i32
-  %1036 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1037 = extractvalue %dx.types.CBufRet.i32 %1036, 0
-  %1038 = extractvalue %dx.types.CBufRet.i32 %1036, 1
-  %1039 = extractvalue %dx.types.CBufRet.i32 %1036, 2
-  %1040 = extractvalue %dx.types.CBufRet.i32 %1036, 3
-  %1041 = mul i32 %1037, %1034
-  %1042 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1035, i32 %1038, i32 %1041)  ; IMad(a,b,c)
-  %1043 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1030, i32 %1039, i32 %1042)  ; IMad(a,b,c)
-  %1044 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1025, i32 %1040, i32 %1043)  ; IMad(a,b,c)
-  %1045 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1044, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1046 = extractvalue %dx.types.ResRet.i32 %1045, 0
-  %1047 = extractvalue %dx.types.ResRet.i32 %1045, 1
-  %1048 = zext i32 %1046 to i64
-  %1049 = zext i32 %1047 to i64
-  %1050 = shl i64 %1049, 32
-  %1051 = or i64 %1048, %1050
-  %1052 = uitofp i64 %1051 to float
-  br label %1181
-
-; <label>:1053                                    ; preds = %1016
-  %1054 = icmp eq i32 %1021, 1
-  br i1 %1054, label %1055, label %1089
-
-; <label>:1055                                    ; preds = %1053
-  %1056 = add i32 %13, -1
-  %1057 = uitofp i32 %1056 to float
-  %1058 = call float @dx.op.binary.f32(i32 35, float %1018, float 0.000000e+00)  ; FMax(a,b)
-  %1059 = call float @dx.op.binary.f32(i32 36, float %1058, float %1057)  ; FMin(a,b)
-  %1060 = fptoui float %1059 to i32
-  %1061 = add i32 %15, -1
-  %1062 = uitofp i32 %1061 to float
-  %1063 = call float @dx.op.binary.f32(i32 35, float %1020, float 0.000000e+00)  ; FMax(a,b)
-  %1064 = call float @dx.op.binary.f32(i32 36, float %1063, float %1062)  ; FMin(a,b)
-  %1065 = fptoui float %1064 to i32
-  %1066 = uitofp i32 %1065 to float
-  %1067 = uitofp i32 %1060 to float
-  %1068 = fptoui float %45 to i32
-  %1069 = fptoui float %182 to i32
-  %1070 = fptoui float %1066 to i32
-  %1071 = fptoui float %1067 to i32
-  %1072 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1073 = extractvalue %dx.types.CBufRet.i32 %1072, 0
-  %1074 = extractvalue %dx.types.CBufRet.i32 %1072, 1
-  %1075 = extractvalue %dx.types.CBufRet.i32 %1072, 2
-  %1076 = extractvalue %dx.types.CBufRet.i32 %1072, 3
-  %1077 = mul i32 %1073, %1068
-  %1078 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1069, i32 %1074, i32 %1077)  ; IMad(a,b,c)
-  %1079 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1070, i32 %1075, i32 %1078)  ; IMad(a,b,c)
-  %1080 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1071, i32 %1076, i32 %1079)  ; IMad(a,b,c)
-  %1081 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1080, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1082 = extractvalue %dx.types.ResRet.i32 %1081, 0
-  %1083 = extractvalue %dx.types.ResRet.i32 %1081, 1
-  %1084 = zext i32 %1082 to i64
-  %1085 = zext i32 %1083 to i64
-  %1086 = shl i64 %1085, 32
-  %1087 = or i64 %1084, %1086
-  %1088 = uitofp i64 %1087 to float
-  br label %1181
-
-; <label>:1089                                    ; preds = %1053
-  %1090 = icmp eq i32 %1021, 2
-  br i1 %1090, label %1091, label %1181
-
-; <label>:1091                                    ; preds = %1089
-  %1092 = fsub fast float %22, %20
-  %1093 = fcmp fast olt float %1018, %20
-  br i1 %1093, label %1094, label %1107
-
-; <label>:1094                                    ; preds = %1091
-  %1095 = fsub fast float %20, %1018
-  %1096 = fdiv fast float %1095, %1092
-  %1097 = fptoui float %1096 to i32
-  %1098 = uitofp i32 %1097 to float
-  %1099 = fmul fast float %1098, %1092
-  %1100 = fsub fast float %1095, %1099
-  %1101 = and i32 %1097, 1
-  %1102 = icmp eq i32 %1101, 0
-  br i1 %1102, label %1103, label %1105
-
-; <label>:1103                                    ; preds = %1094
-  %1104 = fadd fast float %1100, %20
-  br label %1122
-
-; <label>:1105                                    ; preds = %1094
-  %1106 = fsub fast float %22, %1100
-  br label %1122
-
-; <label>:1107                                    ; preds = %1091
-  %1108 = fcmp fast ogt float %1018, %22
-  br i1 %1108, label %1109, label %1122
-
-; <label>:1109                                    ; preds = %1107
-  %1110 = fsub fast float %1018, %22
-  %1111 = fdiv fast float %1110, %1092
-  %1112 = fptoui float %1111 to i32
-  %1113 = uitofp i32 %1112 to float
-  %1114 = fmul fast float %1113, %1092
-  %1115 = fsub fast float %1110, %1114
-  %1116 = and i32 %1112, 1
-  %1117 = icmp eq i32 %1116, 0
-  br i1 %1117, label %1118, label %1120
-
-; <label>:1118                                    ; preds = %1109
-  %1119 = fsub fast float %22, %1115
-  br label %1122
-
-; <label>:1120                                    ; preds = %1109
-  %1121 = fadd fast float %1115, %20
-  br label %1122
-
-; <label>:1122                                    ; preds = %1120, %1118, %1107, %1105, %1103
-  %1123 = phi float [ %1104, %1103 ], [ %1106, %1105 ], [ %1119, %1118 ], [ %1121, %1120 ], [ %1018, %1107 ]
-  %1124 = fptoui float %1123 to i32
-  %1125 = fsub fast float %24, %20
-  %1126 = fcmp fast olt float %1020, %20
-  br i1 %1126, label %1127, label %1140
-
-; <label>:1127                                    ; preds = %1122
-  %1128 = fsub fast float %20, %1020
-  %1129 = fdiv fast float %1128, %1125
-  %1130 = fptoui float %1129 to i32
-  %1131 = uitofp i32 %1130 to float
-  %1132 = fmul fast float %1131, %1125
-  %1133 = fsub fast float %1128, %1132
-  %1134 = and i32 %1130, 1
-  %1135 = icmp eq i32 %1134, 0
-  br i1 %1135, label %1136, label %1138
-
-; <label>:1136                                    ; preds = %1127
-  %1137 = fadd fast float %1133, %20
-  br label %1155
-
-; <label>:1138                                    ; preds = %1127
-  %1139 = fsub fast float %24, %1133
-  br label %1155
-
-; <label>:1140                                    ; preds = %1122
-  %1141 = fcmp fast ogt float %1020, %24
-  br i1 %1141, label %1142, label %1155
-
-; <label>:1142                                    ; preds = %1140
-  %1143 = fsub fast float %1020, %24
-  %1144 = fdiv fast float %1143, %1125
-  %1145 = fptoui float %1144 to i32
-  %1146 = uitofp i32 %1145 to float
-  %1147 = fmul fast float %1146, %1125
-  %1148 = fsub fast float %1143, %1147
-  %1149 = and i32 %1145, 1
-  %1150 = icmp eq i32 %1149, 0
-  br i1 %1150, label %1151, label %1153
-
-; <label>:1151                                    ; preds = %1142
-  %1152 = fsub fast float %24, %1148
-  br label %1155
-
-; <label>:1153                                    ; preds = %1142
-  %1154 = fadd fast float %1148, %20
-  br label %1155
-
-; <label>:1155                                    ; preds = %1153, %1151, %1140, %1138, %1136
-  %1156 = phi float [ %1137, %1136 ], [ %1139, %1138 ], [ %1152, %1151 ], [ %1154, %1153 ], [ %1020, %1140 ]
-  %1157 = fptoui float %1156 to i32
-  %1158 = uitofp i32 %1157 to float
-  %1159 = uitofp i32 %1124 to float
-  %1160 = fptoui float %45 to i32
-  %1161 = fptoui float %182 to i32
-  %1162 = fptoui float %1158 to i32
-  %1163 = fptoui float %1159 to i32
-  %1164 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1165 = extractvalue %dx.types.CBufRet.i32 %1164, 0
-  %1166 = extractvalue %dx.types.CBufRet.i32 %1164, 1
-  %1167 = extractvalue %dx.types.CBufRet.i32 %1164, 2
-  %1168 = extractvalue %dx.types.CBufRet.i32 %1164, 3
-  %1169 = mul i32 %1165, %1160
-  %1170 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1161, i32 %1166, i32 %1169)  ; IMad(a,b,c)
-  %1171 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1162, i32 %1167, i32 %1170)  ; IMad(a,b,c)
-  %1172 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1163, i32 %1168, i32 %1171)  ; IMad(a,b,c)
-  %1173 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1172, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1174 = extractvalue %dx.types.ResRet.i32 %1173, 0
-  %1175 = extractvalue %dx.types.ResRet.i32 %1173, 1
-  %1176 = zext i32 %1174 to i64
-  %1177 = zext i32 %1175 to i64
-  %1178 = shl i64 %1177, 32
-  %1179 = or i64 %1176, %1178
-  %1180 = uitofp i64 %1179 to float
-  br label %1181
-
-; <label>:1181                                    ; preds = %1155, %1089, %1055, %1033, %1023
-  %1182 = phi float [ %1052, %1033 ], [ 0.000000e+00, %1023 ], [ %1088, %1055 ], [ %1180, %1155 ], [ 0.000000e+00, %1089 ]
-  br i1 %1022, label %1183, label %1213
-
-; <label>:1183                                    ; preds = %1181
-  %1184 = fcmp fast oge float %1017, 0.000000e+00
-  %1185 = fptoui float %1017 to i32
-  %1186 = icmp ult i32 %1185, %13
-  %1187 = and i1 %1184, %1186
-  %1188 = fcmp fast oge float %1020, 0.000000e+00
-  %1189 = and i1 %1188, %1187
-  %1190 = fptoui float %1020 to i32
-  %1191 = icmp ult i32 %1190, %15
-  %1192 = and i1 %1191, %1189
-  br i1 %1192, label %1193, label %1341
-
-; <label>:1193                                    ; preds = %1183
-  %1194 = fptoui float %45 to i32
-  %1195 = fptoui float %182 to i32
-  %1196 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1197 = extractvalue %dx.types.CBufRet.i32 %1196, 0
-  %1198 = extractvalue %dx.types.CBufRet.i32 %1196, 1
-  %1199 = extractvalue %dx.types.CBufRet.i32 %1196, 2
-  %1200 = extractvalue %dx.types.CBufRet.i32 %1196, 3
-  %1201 = mul i32 %1197, %1194
-  %1202 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1195, i32 %1198, i32 %1201)  ; IMad(a,b,c)
-  %1203 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1190, i32 %1199, i32 %1202)  ; IMad(a,b,c)
-  %1204 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1185, i32 %1200, i32 %1203)  ; IMad(a,b,c)
-  %1205 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1204, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1206 = extractvalue %dx.types.ResRet.i32 %1205, 0
-  %1207 = extractvalue %dx.types.ResRet.i32 %1205, 1
-  %1208 = zext i32 %1206 to i64
-  %1209 = zext i32 %1207 to i64
-  %1210 = shl i64 %1209, 32
-  %1211 = or i64 %1208, %1210
-  %1212 = uitofp i64 %1211 to float
-  br label %1341
-
-; <label>:1213                                    ; preds = %1181
-  %1214 = icmp eq i32 %1021, 1
-  br i1 %1214, label %1215, label %1249
-
-; <label>:1215                                    ; preds = %1213
-  %1216 = add i32 %13, -1
-  %1217 = uitofp i32 %1216 to float
-  %1218 = call float @dx.op.binary.f32(i32 35, float %1017, float 0.000000e+00)  ; FMax(a,b)
-  %1219 = call float @dx.op.binary.f32(i32 36, float %1218, float %1217)  ; FMin(a,b)
-  %1220 = fptoui float %1219 to i32
-  %1221 = add i32 %15, -1
-  %1222 = uitofp i32 %1221 to float
-  %1223 = call float @dx.op.binary.f32(i32 35, float %1020, float 0.000000e+00)  ; FMax(a,b)
-  %1224 = call float @dx.op.binary.f32(i32 36, float %1223, float %1222)  ; FMin(a,b)
-  %1225 = fptoui float %1224 to i32
-  %1226 = uitofp i32 %1225 to float
-  %1227 = uitofp i32 %1220 to float
-  %1228 = fptoui float %45 to i32
-  %1229 = fptoui float %182 to i32
-  %1230 = fptoui float %1226 to i32
-  %1231 = fptoui float %1227 to i32
-  %1232 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1233 = extractvalue %dx.types.CBufRet.i32 %1232, 0
-  %1234 = extractvalue %dx.types.CBufRet.i32 %1232, 1
-  %1235 = extractvalue %dx.types.CBufRet.i32 %1232, 2
-  %1236 = extractvalue %dx.types.CBufRet.i32 %1232, 3
-  %1237 = mul i32 %1233, %1228
-  %1238 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1229, i32 %1234, i32 %1237)  ; IMad(a,b,c)
-  %1239 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1230, i32 %1235, i32 %1238)  ; IMad(a,b,c)
-  %1240 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1231, i32 %1236, i32 %1239)  ; IMad(a,b,c)
-  %1241 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1240, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1242 = extractvalue %dx.types.ResRet.i32 %1241, 0
-  %1243 = extractvalue %dx.types.ResRet.i32 %1241, 1
-  %1244 = zext i32 %1242 to i64
-  %1245 = zext i32 %1243 to i64
-  %1246 = shl i64 %1245, 32
-  %1247 = or i64 %1244, %1246
-  %1248 = uitofp i64 %1247 to float
-  br label %1341
-
-; <label>:1249                                    ; preds = %1213
-  %1250 = icmp eq i32 %1021, 2
-  br i1 %1250, label %1251, label %1341
-
-; <label>:1251                                    ; preds = %1249
-  %1252 = fsub fast float %22, %20
-  %1253 = fcmp fast olt float %1017, %20
-  br i1 %1253, label %1254, label %1267
-
-; <label>:1254                                    ; preds = %1251
-  %1255 = fsub fast float %20, %1017
-  %1256 = fdiv fast float %1255, %1252
-  %1257 = fptoui float %1256 to i32
-  %1258 = uitofp i32 %1257 to float
-  %1259 = fmul fast float %1258, %1252
-  %1260 = fsub fast float %1255, %1259
-  %1261 = and i32 %1257, 1
-  %1262 = icmp eq i32 %1261, 0
-  br i1 %1262, label %1263, label %1265
-
-; <label>:1263                                    ; preds = %1254
-  %1264 = fadd fast float %1260, %20
-  br label %1282
-
-; <label>:1265                                    ; preds = %1254
-  %1266 = fsub fast float %22, %1260
-  br label %1282
-
-; <label>:1267                                    ; preds = %1251
-  %1268 = fcmp fast ogt float %1017, %22
-  br i1 %1268, label %1269, label %1282
-
-; <label>:1269                                    ; preds = %1267
-  %1270 = fsub fast float %1017, %22
-  %1271 = fdiv fast float %1270, %1252
-  %1272 = fptoui float %1271 to i32
-  %1273 = uitofp i32 %1272 to float
-  %1274 = fmul fast float %1273, %1252
-  %1275 = fsub fast float %1270, %1274
-  %1276 = and i32 %1272, 1
-  %1277 = icmp eq i32 %1276, 0
-  br i1 %1277, label %1278, label %1280
-
-; <label>:1278                                    ; preds = %1269
-  %1279 = fsub fast float %22, %1275
-  br label %1282
-
-; <label>:1280                                    ; preds = %1269
-  %1281 = fadd fast float %1275, %20
-  br label %1282
-
-; <label>:1282                                    ; preds = %1280, %1278, %1267, %1265, %1263
-  %1283 = phi float [ %1264, %1263 ], [ %1266, %1265 ], [ %1279, %1278 ], [ %1281, %1280 ], [ %1017, %1267 ]
-  %1284 = fptoui float %1283 to i32
-  %1285 = fsub fast float %24, %20
-  %1286 = fcmp fast olt float %1020, %20
-  br i1 %1286, label %1287, label %1300
-
-; <label>:1287                                    ; preds = %1282
-  %1288 = fsub fast float %20, %1020
-  %1289 = fdiv fast float %1288, %1285
-  %1290 = fptoui float %1289 to i32
-  %1291 = uitofp i32 %1290 to float
-  %1292 = fmul fast float %1291, %1285
-  %1293 = fsub fast float %1288, %1292
-  %1294 = and i32 %1290, 1
-  %1295 = icmp eq i32 %1294, 0
-  br i1 %1295, label %1296, label %1298
-
-; <label>:1296                                    ; preds = %1287
-  %1297 = fadd fast float %1293, %20
-  br label %1315
-
-; <label>:1298                                    ; preds = %1287
-  %1299 = fsub fast float %24, %1293
-  br label %1315
-
-; <label>:1300                                    ; preds = %1282
-  %1301 = fcmp fast ogt float %1020, %24
-  br i1 %1301, label %1302, label %1315
-
-; <label>:1302                                    ; preds = %1300
-  %1303 = fsub fast float %1020, %24
-  %1304 = fdiv fast float %1303, %1285
-  %1305 = fptoui float %1304 to i32
-  %1306 = uitofp i32 %1305 to float
-  %1307 = fmul fast float %1306, %1285
-  %1308 = fsub fast float %1303, %1307
-  %1309 = and i32 %1305, 1
-  %1310 = icmp eq i32 %1309, 0
-  br i1 %1310, label %1311, label %1313
-
-; <label>:1311                                    ; preds = %1302
-  %1312 = fsub fast float %24, %1308
-  br label %1315
-
-; <label>:1313                                    ; preds = %1302
-  %1314 = fadd fast float %1308, %20
-  br label %1315
-
-; <label>:1315                                    ; preds = %1313, %1311, %1300, %1298, %1296
-  %1316 = phi float [ %1297, %1296 ], [ %1299, %1298 ], [ %1312, %1311 ], [ %1314, %1313 ], [ %1020, %1300 ]
-  %1317 = fptoui float %1316 to i32
-  %1318 = uitofp i32 %1317 to float
-  %1319 = uitofp i32 %1284 to float
-  %1320 = fptoui float %45 to i32
-  %1321 = fptoui float %182 to i32
-  %1322 = fptoui float %1318 to i32
-  %1323 = fptoui float %1319 to i32
-  %1324 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1325 = extractvalue %dx.types.CBufRet.i32 %1324, 0
-  %1326 = extractvalue %dx.types.CBufRet.i32 %1324, 1
-  %1327 = extractvalue %dx.types.CBufRet.i32 %1324, 2
-  %1328 = extractvalue %dx.types.CBufRet.i32 %1324, 3
-  %1329 = mul i32 %1325, %1320
-  %1330 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1321, i32 %1326, i32 %1329)  ; IMad(a,b,c)
-  %1331 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1322, i32 %1327, i32 %1330)  ; IMad(a,b,c)
-  %1332 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1323, i32 %1328, i32 %1331)  ; IMad(a,b,c)
-  %1333 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1332, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1334 = extractvalue %dx.types.ResRet.i32 %1333, 0
-  %1335 = extractvalue %dx.types.ResRet.i32 %1333, 1
-  %1336 = zext i32 %1334 to i64
-  %1337 = zext i32 %1335 to i64
-  %1338 = shl i64 %1337, 32
-  %1339 = or i64 %1336, %1338
-  %1340 = uitofp i64 %1339 to float
-  br label %1341
-
-; <label>:1341                                    ; preds = %1315, %1249, %1215, %1193, %1183
-  %1342 = phi float [ %1212, %1193 ], [ 0.000000e+00, %1183 ], [ %1248, %1215 ], [ %1340, %1315 ], [ 0.000000e+00, %1249 ]
-  %1343 = fadd fast float %1017, 1.000000e+00
-  br i1 %1022, label %1344, label %1374
-
-; <label>:1344                                    ; preds = %1341
-  %1345 = fcmp fast oge float %1343, 0.000000e+00
-  %1346 = fptoui float %1343 to i32
-  %1347 = icmp ult i32 %1346, %13
-  %1348 = and i1 %1345, %1347
-  %1349 = fcmp fast oge float %1020, 0.000000e+00
-  %1350 = and i1 %1349, %1348
-  %1351 = fptoui float %1020 to i32
-  %1352 = icmp ult i32 %1351, %15
-  %1353 = and i1 %1352, %1350
-  br i1 %1353, label %1354, label %1502
-
-; <label>:1354                                    ; preds = %1344
-  %1355 = fptoui float %45 to i32
-  %1356 = fptoui float %182 to i32
-  %1357 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1358 = extractvalue %dx.types.CBufRet.i32 %1357, 0
-  %1359 = extractvalue %dx.types.CBufRet.i32 %1357, 1
-  %1360 = extractvalue %dx.types.CBufRet.i32 %1357, 2
-  %1361 = extractvalue %dx.types.CBufRet.i32 %1357, 3
-  %1362 = mul i32 %1358, %1355
-  %1363 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1356, i32 %1359, i32 %1362)  ; IMad(a,b,c)
-  %1364 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1351, i32 %1360, i32 %1363)  ; IMad(a,b,c)
-  %1365 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1346, i32 %1361, i32 %1364)  ; IMad(a,b,c)
-  %1366 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1365, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1367 = extractvalue %dx.types.ResRet.i32 %1366, 0
-  %1368 = extractvalue %dx.types.ResRet.i32 %1366, 1
-  %1369 = zext i32 %1367 to i64
-  %1370 = zext i32 %1368 to i64
-  %1371 = shl i64 %1370, 32
-  %1372 = or i64 %1369, %1371
-  %1373 = uitofp i64 %1372 to float
-  br label %1502
-
-; <label>:1374                                    ; preds = %1341
-  %1375 = icmp eq i32 %1021, 1
-  br i1 %1375, label %1376, label %1410
-
-; <label>:1376                                    ; preds = %1374
-  %1377 = add i32 %13, -1
-  %1378 = uitofp i32 %1377 to float
-  %1379 = call float @dx.op.binary.f32(i32 35, float %1343, float 0.000000e+00)  ; FMax(a,b)
-  %1380 = call float @dx.op.binary.f32(i32 36, float %1379, float %1378)  ; FMin(a,b)
-  %1381 = fptoui float %1380 to i32
-  %1382 = add i32 %15, -1
-  %1383 = uitofp i32 %1382 to float
-  %1384 = call float @dx.op.binary.f32(i32 35, float %1020, float 0.000000e+00)  ; FMax(a,b)
-  %1385 = call float @dx.op.binary.f32(i32 36, float %1384, float %1383)  ; FMin(a,b)
-  %1386 = fptoui float %1385 to i32
-  %1387 = uitofp i32 %1386 to float
-  %1388 = uitofp i32 %1381 to float
-  %1389 = fptoui float %45 to i32
-  %1390 = fptoui float %182 to i32
-  %1391 = fptoui float %1387 to i32
-  %1392 = fptoui float %1388 to i32
-  %1393 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1394 = extractvalue %dx.types.CBufRet.i32 %1393, 0
-  %1395 = extractvalue %dx.types.CBufRet.i32 %1393, 1
-  %1396 = extractvalue %dx.types.CBufRet.i32 %1393, 2
-  %1397 = extractvalue %dx.types.CBufRet.i32 %1393, 3
-  %1398 = mul i32 %1394, %1389
-  %1399 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1390, i32 %1395, i32 %1398)  ; IMad(a,b,c)
-  %1400 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1391, i32 %1396, i32 %1399)  ; IMad(a,b,c)
-  %1401 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1392, i32 %1397, i32 %1400)  ; IMad(a,b,c)
-  %1402 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1401, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1403 = extractvalue %dx.types.ResRet.i32 %1402, 0
-  %1404 = extractvalue %dx.types.ResRet.i32 %1402, 1
-  %1405 = zext i32 %1403 to i64
-  %1406 = zext i32 %1404 to i64
-  %1407 = shl i64 %1406, 32
-  %1408 = or i64 %1405, %1407
-  %1409 = uitofp i64 %1408 to float
-  br label %1502
-
-; <label>:1410                                    ; preds = %1374
-  %1411 = icmp eq i32 %1021, 2
-  br i1 %1411, label %1412, label %1502
-
-; <label>:1412                                    ; preds = %1410
-  %1413 = fsub fast float %22, %20
-  %1414 = fcmp fast olt float %1343, %20
-  br i1 %1414, label %1415, label %1428
-
-; <label>:1415                                    ; preds = %1412
-  %1416 = fsub fast float %20, %1343
-  %1417 = fdiv fast float %1416, %1413
-  %1418 = fptoui float %1417 to i32
-  %1419 = uitofp i32 %1418 to float
-  %1420 = fmul fast float %1419, %1413
-  %1421 = fsub fast float %1416, %1420
-  %1422 = and i32 %1418, 1
-  %1423 = icmp eq i32 %1422, 0
-  br i1 %1423, label %1424, label %1426
-
-; <label>:1424                                    ; preds = %1415
-  %1425 = fadd fast float %1421, %20
-  br label %1443
-
-; <label>:1426                                    ; preds = %1415
-  %1427 = fsub fast float %22, %1421
-  br label %1443
-
-; <label>:1428                                    ; preds = %1412
-  %1429 = fcmp fast ogt float %1343, %22
-  br i1 %1429, label %1430, label %1443
-
-; <label>:1430                                    ; preds = %1428
-  %1431 = fsub fast float %1343, %22
-  %1432 = fdiv fast float %1431, %1413
-  %1433 = fptoui float %1432 to i32
-  %1434 = uitofp i32 %1433 to float
-  %1435 = fmul fast float %1434, %1413
-  %1436 = fsub fast float %1431, %1435
-  %1437 = and i32 %1433, 1
-  %1438 = icmp eq i32 %1437, 0
-  br i1 %1438, label %1439, label %1441
-
-; <label>:1439                                    ; preds = %1430
-  %1440 = fsub fast float %22, %1436
-  br label %1443
-
-; <label>:1441                                    ; preds = %1430
-  %1442 = fadd fast float %1436, %20
-  br label %1443
-
-; <label>:1443                                    ; preds = %1441, %1439, %1428, %1426, %1424
-  %1444 = phi float [ %1425, %1424 ], [ %1427, %1426 ], [ %1440, %1439 ], [ %1442, %1441 ], [ %1343, %1428 ]
-  %1445 = fptoui float %1444 to i32
-  %1446 = fsub fast float %24, %20
-  %1447 = fcmp fast olt float %1020, %20
-  br i1 %1447, label %1448, label %1461
-
-; <label>:1448                                    ; preds = %1443
-  %1449 = fsub fast float %20, %1020
-  %1450 = fdiv fast float %1449, %1446
-  %1451 = fptoui float %1450 to i32
-  %1452 = uitofp i32 %1451 to float
-  %1453 = fmul fast float %1452, %1446
-  %1454 = fsub fast float %1449, %1453
-  %1455 = and i32 %1451, 1
-  %1456 = icmp eq i32 %1455, 0
-  br i1 %1456, label %1457, label %1459
-
-; <label>:1457                                    ; preds = %1448
-  %1458 = fadd fast float %1454, %20
-  br label %1476
-
-; <label>:1459                                    ; preds = %1448
-  %1460 = fsub fast float %24, %1454
-  br label %1476
-
-; <label>:1461                                    ; preds = %1443
-  %1462 = fcmp fast ogt float %1020, %24
-  br i1 %1462, label %1463, label %1476
-
-; <label>:1463                                    ; preds = %1461
-  %1464 = fsub fast float %1020, %24
-  %1465 = fdiv fast float %1464, %1446
-  %1466 = fptoui float %1465 to i32
-  %1467 = uitofp i32 %1466 to float
-  %1468 = fmul fast float %1467, %1446
-  %1469 = fsub fast float %1464, %1468
-  %1470 = and i32 %1466, 1
-  %1471 = icmp eq i32 %1470, 0
-  br i1 %1471, label %1472, label %1474
-
-; <label>:1472                                    ; preds = %1463
-  %1473 = fsub fast float %24, %1469
-  br label %1476
-
-; <label>:1474                                    ; preds = %1463
-  %1475 = fadd fast float %1469, %20
-  br label %1476
-
-; <label>:1476                                    ; preds = %1474, %1472, %1461, %1459, %1457
-  %1477 = phi float [ %1458, %1457 ], [ %1460, %1459 ], [ %1473, %1472 ], [ %1475, %1474 ], [ %1020, %1461 ]
-  %1478 = fptoui float %1477 to i32
-  %1479 = uitofp i32 %1478 to float
-  %1480 = uitofp i32 %1445 to float
-  %1481 = fptoui float %45 to i32
-  %1482 = fptoui float %182 to i32
-  %1483 = fptoui float %1479 to i32
-  %1484 = fptoui float %1480 to i32
-  %1485 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1486 = extractvalue %dx.types.CBufRet.i32 %1485, 0
-  %1487 = extractvalue %dx.types.CBufRet.i32 %1485, 1
-  %1488 = extractvalue %dx.types.CBufRet.i32 %1485, 2
-  %1489 = extractvalue %dx.types.CBufRet.i32 %1485, 3
-  %1490 = mul i32 %1486, %1481
-  %1491 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1482, i32 %1487, i32 %1490)  ; IMad(a,b,c)
-  %1492 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1483, i32 %1488, i32 %1491)  ; IMad(a,b,c)
-  %1493 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1484, i32 %1489, i32 %1492)  ; IMad(a,b,c)
-  %1494 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1493, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1495 = extractvalue %dx.types.ResRet.i32 %1494, 0
-  %1496 = extractvalue %dx.types.ResRet.i32 %1494, 1
-  %1497 = zext i32 %1495 to i64
-  %1498 = zext i32 %1496 to i64
-  %1499 = shl i64 %1498, 32
-  %1500 = or i64 %1497, %1499
-  %1501 = uitofp i64 %1500 to float
-  br label %1502
-
-; <label>:1502                                    ; preds = %1476, %1410, %1376, %1354, %1344
-  %1503 = phi float [ %1373, %1354 ], [ 0.000000e+00, %1344 ], [ %1409, %1376 ], [ %1501, %1476 ], [ 0.000000e+00, %1410 ]
-  %1504 = fadd fast float %1017, 2.000000e+00
-  br i1 %1022, label %1505, label %1535
-
-; <label>:1505                                    ; preds = %1502
-  %1506 = fcmp fast oge float %1504, 0.000000e+00
-  %1507 = fptoui float %1504 to i32
-  %1508 = icmp ult i32 %1507, %13
-  %1509 = and i1 %1506, %1508
-  %1510 = fcmp fast oge float %1020, 0.000000e+00
-  %1511 = and i1 %1510, %1509
-  %1512 = fptoui float %1020 to i32
-  %1513 = icmp ult i32 %1512, %15
-  %1514 = and i1 %1513, %1511
-  br i1 %1514, label %1515, label %1663
-
-; <label>:1515                                    ; preds = %1505
-  %1516 = fptoui float %45 to i32
-  %1517 = fptoui float %182 to i32
-  %1518 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1519 = extractvalue %dx.types.CBufRet.i32 %1518, 0
-  %1520 = extractvalue %dx.types.CBufRet.i32 %1518, 1
-  %1521 = extractvalue %dx.types.CBufRet.i32 %1518, 2
-  %1522 = extractvalue %dx.types.CBufRet.i32 %1518, 3
-  %1523 = mul i32 %1519, %1516
-  %1524 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1517, i32 %1520, i32 %1523)  ; IMad(a,b,c)
-  %1525 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1512, i32 %1521, i32 %1524)  ; IMad(a,b,c)
-  %1526 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1507, i32 %1522, i32 %1525)  ; IMad(a,b,c)
-  %1527 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1526, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1528 = extractvalue %dx.types.ResRet.i32 %1527, 0
-  %1529 = extractvalue %dx.types.ResRet.i32 %1527, 1
-  %1530 = zext i32 %1528 to i64
-  %1531 = zext i32 %1529 to i64
-  %1532 = shl i64 %1531, 32
-  %1533 = or i64 %1530, %1532
-  %1534 = uitofp i64 %1533 to float
-  br label %1663
-
-; <label>:1535                                    ; preds = %1502
-  %1536 = icmp eq i32 %1021, 1
-  br i1 %1536, label %1537, label %1571
-
-; <label>:1537                                    ; preds = %1535
-  %1538 = add i32 %13, -1
-  %1539 = uitofp i32 %1538 to float
-  %1540 = call float @dx.op.binary.f32(i32 35, float %1504, float 0.000000e+00)  ; FMax(a,b)
-  %1541 = call float @dx.op.binary.f32(i32 36, float %1540, float %1539)  ; FMin(a,b)
-  %1542 = fptoui float %1541 to i32
-  %1543 = add i32 %15, -1
-  %1544 = uitofp i32 %1543 to float
-  %1545 = call float @dx.op.binary.f32(i32 35, float %1020, float 0.000000e+00)  ; FMax(a,b)
-  %1546 = call float @dx.op.binary.f32(i32 36, float %1545, float %1544)  ; FMin(a,b)
-  %1547 = fptoui float %1546 to i32
-  %1548 = uitofp i32 %1547 to float
-  %1549 = uitofp i32 %1542 to float
-  %1550 = fptoui float %45 to i32
-  %1551 = fptoui float %182 to i32
-  %1552 = fptoui float %1548 to i32
-  %1553 = fptoui float %1549 to i32
-  %1554 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1555 = extractvalue %dx.types.CBufRet.i32 %1554, 0
-  %1556 = extractvalue %dx.types.CBufRet.i32 %1554, 1
-  %1557 = extractvalue %dx.types.CBufRet.i32 %1554, 2
-  %1558 = extractvalue %dx.types.CBufRet.i32 %1554, 3
-  %1559 = mul i32 %1555, %1550
-  %1560 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1551, i32 %1556, i32 %1559)  ; IMad(a,b,c)
-  %1561 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1552, i32 %1557, i32 %1560)  ; IMad(a,b,c)
-  %1562 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1553, i32 %1558, i32 %1561)  ; IMad(a,b,c)
-  %1563 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1562, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1564 = extractvalue %dx.types.ResRet.i32 %1563, 0
-  %1565 = extractvalue %dx.types.ResRet.i32 %1563, 1
-  %1566 = zext i32 %1564 to i64
-  %1567 = zext i32 %1565 to i64
-  %1568 = shl i64 %1567, 32
-  %1569 = or i64 %1566, %1568
-  %1570 = uitofp i64 %1569 to float
-  br label %1663
-
-; <label>:1571                                    ; preds = %1535
-  %1572 = icmp eq i32 %1021, 2
-  br i1 %1572, label %1573, label %1663
-
-; <label>:1573                                    ; preds = %1571
-  %1574 = fsub fast float %22, %20
-  %1575 = fcmp fast olt float %1504, %20
-  br i1 %1575, label %1576, label %1589
-
-; <label>:1576                                    ; preds = %1573
-  %1577 = fsub fast float %20, %1504
-  %1578 = fdiv fast float %1577, %1574
-  %1579 = fptoui float %1578 to i32
-  %1580 = uitofp i32 %1579 to float
-  %1581 = fmul fast float %1580, %1574
-  %1582 = fsub fast float %1577, %1581
-  %1583 = and i32 %1579, 1
-  %1584 = icmp eq i32 %1583, 0
-  br i1 %1584, label %1585, label %1587
-
-; <label>:1585                                    ; preds = %1576
-  %1586 = fadd fast float %1582, %20
-  br label %1604
-
-; <label>:1587                                    ; preds = %1576
-  %1588 = fsub fast float %22, %1582
-  br label %1604
-
-; <label>:1589                                    ; preds = %1573
-  %1590 = fcmp fast ogt float %1504, %22
-  br i1 %1590, label %1591, label %1604
-
-; <label>:1591                                    ; preds = %1589
-  %1592 = fsub fast float %1504, %22
-  %1593 = fdiv fast float %1592, %1574
-  %1594 = fptoui float %1593 to i32
-  %1595 = uitofp i32 %1594 to float
-  %1596 = fmul fast float %1595, %1574
-  %1597 = fsub fast float %1592, %1596
-  %1598 = and i32 %1594, 1
-  %1599 = icmp eq i32 %1598, 0
-  br i1 %1599, label %1600, label %1602
-
-; <label>:1600                                    ; preds = %1591
-  %1601 = fsub fast float %22, %1597
-  br label %1604
-
-; <label>:1602                                    ; preds = %1591
-  %1603 = fadd fast float %1597, %20
-  br label %1604
-
-; <label>:1604                                    ; preds = %1602, %1600, %1589, %1587, %1585
-  %1605 = phi float [ %1586, %1585 ], [ %1588, %1587 ], [ %1601, %1600 ], [ %1603, %1602 ], [ %1504, %1589 ]
-  %1606 = fptoui float %1605 to i32
-  %1607 = fsub fast float %24, %20
-  %1608 = fcmp fast olt float %1020, %20
-  br i1 %1608, label %1609, label %1622
-
-; <label>:1609                                    ; preds = %1604
-  %1610 = fsub fast float %20, %1020
-  %1611 = fdiv fast float %1610, %1607
-  %1612 = fptoui float %1611 to i32
-  %1613 = uitofp i32 %1612 to float
-  %1614 = fmul fast float %1613, %1607
-  %1615 = fsub fast float %1610, %1614
-  %1616 = and i32 %1612, 1
-  %1617 = icmp eq i32 %1616, 0
-  br i1 %1617, label %1618, label %1620
-
-; <label>:1618                                    ; preds = %1609
-  %1619 = fadd fast float %1615, %20
-  br label %1637
-
-; <label>:1620                                    ; preds = %1609
-  %1621 = fsub fast float %24, %1615
-  br label %1637
-
-; <label>:1622                                    ; preds = %1604
-  %1623 = fcmp fast ogt float %1020, %24
-  br i1 %1623, label %1624, label %1637
-
-; <label>:1624                                    ; preds = %1622
-  %1625 = fsub fast float %1020, %24
-  %1626 = fdiv fast float %1625, %1607
-  %1627 = fptoui float %1626 to i32
-  %1628 = uitofp i32 %1627 to float
-  %1629 = fmul fast float %1628, %1607
-  %1630 = fsub fast float %1625, %1629
-  %1631 = and i32 %1627, 1
-  %1632 = icmp eq i32 %1631, 0
-  br i1 %1632, label %1633, label %1635
-
-; <label>:1633                                    ; preds = %1624
-  %1634 = fsub fast float %24, %1630
-  br label %1637
-
-; <label>:1635                                    ; preds = %1624
-  %1636 = fadd fast float %1630, %20
-  br label %1637
-
-; <label>:1637                                    ; preds = %1635, %1633, %1622, %1620, %1618
-  %1638 = phi float [ %1619, %1618 ], [ %1621, %1620 ], [ %1634, %1633 ], [ %1636, %1635 ], [ %1020, %1622 ]
-  %1639 = fptoui float %1638 to i32
-  %1640 = uitofp i32 %1639 to float
-  %1641 = uitofp i32 %1606 to float
-  %1642 = fptoui float %45 to i32
-  %1643 = fptoui float %182 to i32
-  %1644 = fptoui float %1640 to i32
-  %1645 = fptoui float %1641 to i32
-  %1646 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1647 = extractvalue %dx.types.CBufRet.i32 %1646, 0
-  %1648 = extractvalue %dx.types.CBufRet.i32 %1646, 1
-  %1649 = extractvalue %dx.types.CBufRet.i32 %1646, 2
-  %1650 = extractvalue %dx.types.CBufRet.i32 %1646, 3
-  %1651 = mul i32 %1647, %1642
-  %1652 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1643, i32 %1648, i32 %1651)  ; IMad(a,b,c)
-  %1653 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1644, i32 %1649, i32 %1652)  ; IMad(a,b,c)
-  %1654 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1645, i32 %1650, i32 %1653)  ; IMad(a,b,c)
-  %1655 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1654, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1656 = extractvalue %dx.types.ResRet.i32 %1655, 0
-  %1657 = extractvalue %dx.types.ResRet.i32 %1655, 1
-  %1658 = zext i32 %1656 to i64
-  %1659 = zext i32 %1657 to i64
-  %1660 = shl i64 %1659, 32
-  %1661 = or i64 %1658, %1660
-  %1662 = uitofp i64 %1661 to float
-  br label %1663
-
-; <label>:1663                                    ; preds = %1637, %1571, %1537, %1515, %1505
-  %1664 = phi float [ %1534, %1515 ], [ 0.000000e+00, %1505 ], [ %1570, %1537 ], [ %1662, %1637 ], [ 0.000000e+00, %1571 ]
-  br i1 %1022, label %1665, label %1695
-
-; <label>:1665                                    ; preds = %1663
-  %1666 = fcmp fast oge float %1018, 0.000000e+00
-  %1667 = fptoui float %1018 to i32
-  %1668 = icmp ult i32 %1667, %13
-  %1669 = and i1 %1666, %1668
-  %1670 = fcmp fast oge float %1019, 0.000000e+00
-  %1671 = and i1 %1670, %1669
-  %1672 = fptoui float %1019 to i32
-  %1673 = icmp ult i32 %1672, %15
-  %1674 = and i1 %1673, %1671
-  br i1 %1674, label %1675, label %1823
-
-; <label>:1675                                    ; preds = %1665
-  %1676 = fptoui float %45 to i32
-  %1677 = fptoui float %182 to i32
-  %1678 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1679 = extractvalue %dx.types.CBufRet.i32 %1678, 0
-  %1680 = extractvalue %dx.types.CBufRet.i32 %1678, 1
-  %1681 = extractvalue %dx.types.CBufRet.i32 %1678, 2
-  %1682 = extractvalue %dx.types.CBufRet.i32 %1678, 3
-  %1683 = mul i32 %1679, %1676
-  %1684 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1677, i32 %1680, i32 %1683)  ; IMad(a,b,c)
-  %1685 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1672, i32 %1681, i32 %1684)  ; IMad(a,b,c)
-  %1686 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1667, i32 %1682, i32 %1685)  ; IMad(a,b,c)
-  %1687 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1686, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1688 = extractvalue %dx.types.ResRet.i32 %1687, 0
-  %1689 = extractvalue %dx.types.ResRet.i32 %1687, 1
-  %1690 = zext i32 %1688 to i64
-  %1691 = zext i32 %1689 to i64
-  %1692 = shl i64 %1691, 32
-  %1693 = or i64 %1690, %1692
-  %1694 = uitofp i64 %1693 to float
-  br label %1823
-
-; <label>:1695                                    ; preds = %1663
-  %1696 = icmp eq i32 %1021, 1
-  br i1 %1696, label %1697, label %1731
-
-; <label>:1697                                    ; preds = %1695
-  %1698 = add i32 %13, -1
-  %1699 = uitofp i32 %1698 to float
-  %1700 = call float @dx.op.binary.f32(i32 35, float %1018, float 0.000000e+00)  ; FMax(a,b)
-  %1701 = call float @dx.op.binary.f32(i32 36, float %1700, float %1699)  ; FMin(a,b)
-  %1702 = fptoui float %1701 to i32
-  %1703 = add i32 %15, -1
-  %1704 = uitofp i32 %1703 to float
-  %1705 = call float @dx.op.binary.f32(i32 35, float %1019, float 0.000000e+00)  ; FMax(a,b)
-  %1706 = call float @dx.op.binary.f32(i32 36, float %1705, float %1704)  ; FMin(a,b)
-  %1707 = fptoui float %1706 to i32
-  %1708 = uitofp i32 %1707 to float
-  %1709 = uitofp i32 %1702 to float
-  %1710 = fptoui float %45 to i32
-  %1711 = fptoui float %182 to i32
-  %1712 = fptoui float %1708 to i32
-  %1713 = fptoui float %1709 to i32
-  %1714 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1715 = extractvalue %dx.types.CBufRet.i32 %1714, 0
-  %1716 = extractvalue %dx.types.CBufRet.i32 %1714, 1
-  %1717 = extractvalue %dx.types.CBufRet.i32 %1714, 2
-  %1718 = extractvalue %dx.types.CBufRet.i32 %1714, 3
-  %1719 = mul i32 %1715, %1710
-  %1720 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1711, i32 %1716, i32 %1719)  ; IMad(a,b,c)
-  %1721 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1712, i32 %1717, i32 %1720)  ; IMad(a,b,c)
-  %1722 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1713, i32 %1718, i32 %1721)  ; IMad(a,b,c)
-  %1723 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1722, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1724 = extractvalue %dx.types.ResRet.i32 %1723, 0
-  %1725 = extractvalue %dx.types.ResRet.i32 %1723, 1
-  %1726 = zext i32 %1724 to i64
-  %1727 = zext i32 %1725 to i64
-  %1728 = shl i64 %1727, 32
-  %1729 = or i64 %1726, %1728
-  %1730 = uitofp i64 %1729 to float
-  br label %1823
-
-; <label>:1731                                    ; preds = %1695
-  %1732 = icmp eq i32 %1021, 2
-  br i1 %1732, label %1733, label %1823
-
-; <label>:1733                                    ; preds = %1731
-  %1734 = fsub fast float %22, %20
-  %1735 = fcmp fast olt float %1018, %20
-  br i1 %1735, label %1736, label %1749
-
-; <label>:1736                                    ; preds = %1733
-  %1737 = fsub fast float %20, %1018
-  %1738 = fdiv fast float %1737, %1734
-  %1739 = fptoui float %1738 to i32
-  %1740 = uitofp i32 %1739 to float
-  %1741 = fmul fast float %1740, %1734
-  %1742 = fsub fast float %1737, %1741
-  %1743 = and i32 %1739, 1
-  %1744 = icmp eq i32 %1743, 0
-  br i1 %1744, label %1745, label %1747
-
-; <label>:1745                                    ; preds = %1736
-  %1746 = fadd fast float %1742, %20
-  br label %1764
-
-; <label>:1747                                    ; preds = %1736
-  %1748 = fsub fast float %22, %1742
-  br label %1764
-
-; <label>:1749                                    ; preds = %1733
-  %1750 = fcmp fast ogt float %1018, %22
-  br i1 %1750, label %1751, label %1764
-
-; <label>:1751                                    ; preds = %1749
-  %1752 = fsub fast float %1018, %22
-  %1753 = fdiv fast float %1752, %1734
-  %1754 = fptoui float %1753 to i32
-  %1755 = uitofp i32 %1754 to float
-  %1756 = fmul fast float %1755, %1734
-  %1757 = fsub fast float %1752, %1756
-  %1758 = and i32 %1754, 1
-  %1759 = icmp eq i32 %1758, 0
-  br i1 %1759, label %1760, label %1762
-
-; <label>:1760                                    ; preds = %1751
-  %1761 = fsub fast float %22, %1757
-  br label %1764
-
-; <label>:1762                                    ; preds = %1751
-  %1763 = fadd fast float %1757, %20
-  br label %1764
-
-; <label>:1764                                    ; preds = %1762, %1760, %1749, %1747, %1745
-  %1765 = phi float [ %1746, %1745 ], [ %1748, %1747 ], [ %1761, %1760 ], [ %1763, %1762 ], [ %1018, %1749 ]
-  %1766 = fptoui float %1765 to i32
-  %1767 = fsub fast float %24, %20
-  %1768 = fcmp fast olt float %1019, %20
-  br i1 %1768, label %1769, label %1782
-
-; <label>:1769                                    ; preds = %1764
-  %1770 = fsub fast float %20, %1019
-  %1771 = fdiv fast float %1770, %1767
-  %1772 = fptoui float %1771 to i32
-  %1773 = uitofp i32 %1772 to float
-  %1774 = fmul fast float %1773, %1767
-  %1775 = fsub fast float %1770, %1774
-  %1776 = and i32 %1772, 1
-  %1777 = icmp eq i32 %1776, 0
-  br i1 %1777, label %1778, label %1780
-
-; <label>:1778                                    ; preds = %1769
-  %1779 = fadd fast float %1775, %20
-  br label %1797
-
-; <label>:1780                                    ; preds = %1769
-  %1781 = fsub fast float %24, %1775
-  br label %1797
-
-; <label>:1782                                    ; preds = %1764
-  %1783 = fcmp fast ogt float %1019, %24
-  br i1 %1783, label %1784, label %1797
-
-; <label>:1784                                    ; preds = %1782
-  %1785 = fsub fast float %1019, %24
-  %1786 = fdiv fast float %1785, %1767
-  %1787 = fptoui float %1786 to i32
-  %1788 = uitofp i32 %1787 to float
-  %1789 = fmul fast float %1788, %1767
-  %1790 = fsub fast float %1785, %1789
-  %1791 = and i32 %1787, 1
-  %1792 = icmp eq i32 %1791, 0
-  br i1 %1792, label %1793, label %1795
-
-; <label>:1793                                    ; preds = %1784
-  %1794 = fsub fast float %24, %1790
-  br label %1797
-
-; <label>:1795                                    ; preds = %1784
-  %1796 = fadd fast float %1790, %20
-  br label %1797
-
-; <label>:1797                                    ; preds = %1795, %1793, %1782, %1780, %1778
-  %1798 = phi float [ %1779, %1778 ], [ %1781, %1780 ], [ %1794, %1793 ], [ %1796, %1795 ], [ %1019, %1782 ]
-  %1799 = fptoui float %1798 to i32
-  %1800 = uitofp i32 %1799 to float
-  %1801 = uitofp i32 %1766 to float
-  %1802 = fptoui float %45 to i32
-  %1803 = fptoui float %182 to i32
-  %1804 = fptoui float %1800 to i32
-  %1805 = fptoui float %1801 to i32
-  %1806 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1807 = extractvalue %dx.types.CBufRet.i32 %1806, 0
-  %1808 = extractvalue %dx.types.CBufRet.i32 %1806, 1
-  %1809 = extractvalue %dx.types.CBufRet.i32 %1806, 2
-  %1810 = extractvalue %dx.types.CBufRet.i32 %1806, 3
-  %1811 = mul i32 %1807, %1802
-  %1812 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1803, i32 %1808, i32 %1811)  ; IMad(a,b,c)
-  %1813 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1804, i32 %1809, i32 %1812)  ; IMad(a,b,c)
-  %1814 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1805, i32 %1810, i32 %1813)  ; IMad(a,b,c)
-  %1815 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1814, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1816 = extractvalue %dx.types.ResRet.i32 %1815, 0
-  %1817 = extractvalue %dx.types.ResRet.i32 %1815, 1
-  %1818 = zext i32 %1816 to i64
-  %1819 = zext i32 %1817 to i64
-  %1820 = shl i64 %1819, 32
-  %1821 = or i64 %1818, %1820
-  %1822 = uitofp i64 %1821 to float
-  br label %1823
-
-; <label>:1823                                    ; preds = %1797, %1731, %1697, %1675, %1665
-  %1824 = phi float [ %1694, %1675 ], [ 0.000000e+00, %1665 ], [ %1730, %1697 ], [ %1822, %1797 ], [ 0.000000e+00, %1731 ]
-  br i1 %1022, label %1825, label %1855
-
-; <label>:1825                                    ; preds = %1823
-  %1826 = fcmp fast oge float %1017, 0.000000e+00
-  %1827 = fptoui float %1017 to i32
-  %1828 = icmp ult i32 %1827, %13
-  %1829 = and i1 %1826, %1828
-  %1830 = fcmp fast oge float %1019, 0.000000e+00
-  %1831 = and i1 %1830, %1829
-  %1832 = fptoui float %1019 to i32
-  %1833 = icmp ult i32 %1832, %15
-  %1834 = and i1 %1833, %1831
-  br i1 %1834, label %1835, label %1983
-
-; <label>:1835                                    ; preds = %1825
-  %1836 = fptoui float %45 to i32
-  %1837 = fptoui float %182 to i32
-  %1838 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1839 = extractvalue %dx.types.CBufRet.i32 %1838, 0
-  %1840 = extractvalue %dx.types.CBufRet.i32 %1838, 1
-  %1841 = extractvalue %dx.types.CBufRet.i32 %1838, 2
-  %1842 = extractvalue %dx.types.CBufRet.i32 %1838, 3
-  %1843 = mul i32 %1839, %1836
-  %1844 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1837, i32 %1840, i32 %1843)  ; IMad(a,b,c)
-  %1845 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1832, i32 %1841, i32 %1844)  ; IMad(a,b,c)
-  %1846 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1827, i32 %1842, i32 %1845)  ; IMad(a,b,c)
-  %1847 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1846, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1848 = extractvalue %dx.types.ResRet.i32 %1847, 0
-  %1849 = extractvalue %dx.types.ResRet.i32 %1847, 1
-  %1850 = zext i32 %1848 to i64
-  %1851 = zext i32 %1849 to i64
-  %1852 = shl i64 %1851, 32
-  %1853 = or i64 %1850, %1852
-  %1854 = uitofp i64 %1853 to float
-  br label %1983
-
-; <label>:1855                                    ; preds = %1823
-  %1856 = icmp eq i32 %1021, 1
-  br i1 %1856, label %1857, label %1891
-
-; <label>:1857                                    ; preds = %1855
-  %1858 = add i32 %13, -1
-  %1859 = uitofp i32 %1858 to float
-  %1860 = call float @dx.op.binary.f32(i32 35, float %1017, float 0.000000e+00)  ; FMax(a,b)
-  %1861 = call float @dx.op.binary.f32(i32 36, float %1860, float %1859)  ; FMin(a,b)
-  %1862 = fptoui float %1861 to i32
-  %1863 = add i32 %15, -1
-  %1864 = uitofp i32 %1863 to float
-  %1865 = call float @dx.op.binary.f32(i32 35, float %1019, float 0.000000e+00)  ; FMax(a,b)
-  %1866 = call float @dx.op.binary.f32(i32 36, float %1865, float %1864)  ; FMin(a,b)
-  %1867 = fptoui float %1866 to i32
-  %1868 = uitofp i32 %1867 to float
-  %1869 = uitofp i32 %1862 to float
-  %1870 = fptoui float %45 to i32
-  %1871 = fptoui float %182 to i32
-  %1872 = fptoui float %1868 to i32
-  %1873 = fptoui float %1869 to i32
-  %1874 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1875 = extractvalue %dx.types.CBufRet.i32 %1874, 0
-  %1876 = extractvalue %dx.types.CBufRet.i32 %1874, 1
-  %1877 = extractvalue %dx.types.CBufRet.i32 %1874, 2
-  %1878 = extractvalue %dx.types.CBufRet.i32 %1874, 3
-  %1879 = mul i32 %1875, %1870
-  %1880 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1871, i32 %1876, i32 %1879)  ; IMad(a,b,c)
-  %1881 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1872, i32 %1877, i32 %1880)  ; IMad(a,b,c)
-  %1882 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1873, i32 %1878, i32 %1881)  ; IMad(a,b,c)
-  %1883 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1882, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1884 = extractvalue %dx.types.ResRet.i32 %1883, 0
-  %1885 = extractvalue %dx.types.ResRet.i32 %1883, 1
-  %1886 = zext i32 %1884 to i64
-  %1887 = zext i32 %1885 to i64
-  %1888 = shl i64 %1887, 32
-  %1889 = or i64 %1886, %1888
-  %1890 = uitofp i64 %1889 to float
-  br label %1983
-
-; <label>:1891                                    ; preds = %1855
-  %1892 = icmp eq i32 %1021, 2
-  br i1 %1892, label %1893, label %1983
-
-; <label>:1893                                    ; preds = %1891
-  %1894 = fsub fast float %22, %20
-  %1895 = fcmp fast olt float %1017, %20
-  br i1 %1895, label %1896, label %1909
-
-; <label>:1896                                    ; preds = %1893
-  %1897 = fsub fast float %20, %1017
-  %1898 = fdiv fast float %1897, %1894
-  %1899 = fptoui float %1898 to i32
-  %1900 = uitofp i32 %1899 to float
-  %1901 = fmul fast float %1900, %1894
-  %1902 = fsub fast float %1897, %1901
-  %1903 = and i32 %1899, 1
-  %1904 = icmp eq i32 %1903, 0
-  br i1 %1904, label %1905, label %1907
-
-; <label>:1905                                    ; preds = %1896
-  %1906 = fadd fast float %1902, %20
-  br label %1924
-
-; <label>:1907                                    ; preds = %1896
-  %1908 = fsub fast float %22, %1902
-  br label %1924
-
-; <label>:1909                                    ; preds = %1893
-  %1910 = fcmp fast ogt float %1017, %22
-  br i1 %1910, label %1911, label %1924
-
-; <label>:1911                                    ; preds = %1909
-  %1912 = fsub fast float %1017, %22
-  %1913 = fdiv fast float %1912, %1894
-  %1914 = fptoui float %1913 to i32
-  %1915 = uitofp i32 %1914 to float
-  %1916 = fmul fast float %1915, %1894
-  %1917 = fsub fast float %1912, %1916
-  %1918 = and i32 %1914, 1
-  %1919 = icmp eq i32 %1918, 0
-  br i1 %1919, label %1920, label %1922
-
-; <label>:1920                                    ; preds = %1911
-  %1921 = fsub fast float %22, %1917
-  br label %1924
-
-; <label>:1922                                    ; preds = %1911
-  %1923 = fadd fast float %1917, %20
-  br label %1924
-
-; <label>:1924                                    ; preds = %1922, %1920, %1909, %1907, %1905
-  %1925 = phi float [ %1906, %1905 ], [ %1908, %1907 ], [ %1921, %1920 ], [ %1923, %1922 ], [ %1017, %1909 ]
-  %1926 = fptoui float %1925 to i32
-  %1927 = fsub fast float %24, %20
-  %1928 = fcmp fast olt float %1019, %20
-  br i1 %1928, label %1929, label %1942
-
-; <label>:1929                                    ; preds = %1924
-  %1930 = fsub fast float %20, %1019
-  %1931 = fdiv fast float %1930, %1927
-  %1932 = fptoui float %1931 to i32
-  %1933 = uitofp i32 %1932 to float
-  %1934 = fmul fast float %1933, %1927
-  %1935 = fsub fast float %1930, %1934
-  %1936 = and i32 %1932, 1
-  %1937 = icmp eq i32 %1936, 0
-  br i1 %1937, label %1938, label %1940
-
-; <label>:1938                                    ; preds = %1929
-  %1939 = fadd fast float %1935, %20
-  br label %1957
-
-; <label>:1940                                    ; preds = %1929
-  %1941 = fsub fast float %24, %1935
-  br label %1957
-
-; <label>:1942                                    ; preds = %1924
-  %1943 = fcmp fast ogt float %1019, %24
-  br i1 %1943, label %1944, label %1957
-
-; <label>:1944                                    ; preds = %1942
-  %1945 = fsub fast float %1019, %24
-  %1946 = fdiv fast float %1945, %1927
-  %1947 = fptoui float %1946 to i32
-  %1948 = uitofp i32 %1947 to float
-  %1949 = fmul fast float %1948, %1927
-  %1950 = fsub fast float %1945, %1949
-  %1951 = and i32 %1947, 1
-  %1952 = icmp eq i32 %1951, 0
-  br i1 %1952, label %1953, label %1955
-
-; <label>:1953                                    ; preds = %1944
-  %1954 = fsub fast float %24, %1950
-  br label %1957
-
-; <label>:1955                                    ; preds = %1944
-  %1956 = fadd fast float %1950, %20
-  br label %1957
-
-; <label>:1957                                    ; preds = %1955, %1953, %1942, %1940, %1938
-  %1958 = phi float [ %1939, %1938 ], [ %1941, %1940 ], [ %1954, %1953 ], [ %1956, %1955 ], [ %1019, %1942 ]
-  %1959 = fptoui float %1958 to i32
-  %1960 = uitofp i32 %1959 to float
-  %1961 = uitofp i32 %1926 to float
-  %1962 = fptoui float %45 to i32
-  %1963 = fptoui float %182 to i32
-  %1964 = fptoui float %1960 to i32
-  %1965 = fptoui float %1961 to i32
-  %1966 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1967 = extractvalue %dx.types.CBufRet.i32 %1966, 0
-  %1968 = extractvalue %dx.types.CBufRet.i32 %1966, 1
-  %1969 = extractvalue %dx.types.CBufRet.i32 %1966, 2
-  %1970 = extractvalue %dx.types.CBufRet.i32 %1966, 3
-  %1971 = mul i32 %1967, %1962
-  %1972 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1963, i32 %1968, i32 %1971)  ; IMad(a,b,c)
-  %1973 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1964, i32 %1969, i32 %1972)  ; IMad(a,b,c)
-  %1974 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1965, i32 %1970, i32 %1973)  ; IMad(a,b,c)
-  %1975 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1974, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1976 = extractvalue %dx.types.ResRet.i32 %1975, 0
-  %1977 = extractvalue %dx.types.ResRet.i32 %1975, 1
-  %1978 = zext i32 %1976 to i64
-  %1979 = zext i32 %1977 to i64
-  %1980 = shl i64 %1979, 32
-  %1981 = or i64 %1978, %1980
-  %1982 = uitofp i64 %1981 to float
-  br label %1983
-
-; <label>:1983                                    ; preds = %1957, %1891, %1857, %1835, %1825
-  %1984 = phi float [ %1854, %1835 ], [ 0.000000e+00, %1825 ], [ %1890, %1857 ], [ %1982, %1957 ], [ 0.000000e+00, %1891 ]
-  br i1 %1022, label %1985, label %2015
-
-; <label>:1985                                    ; preds = %1983
-  %1986 = fcmp fast oge float %1343, 0.000000e+00
-  %1987 = fptoui float %1343 to i32
-  %1988 = icmp ult i32 %1987, %13
-  %1989 = and i1 %1986, %1988
-  %1990 = fcmp fast oge float %1019, 0.000000e+00
-  %1991 = and i1 %1990, %1989
-  %1992 = fptoui float %1019 to i32
-  %1993 = icmp ult i32 %1992, %15
-  %1994 = and i1 %1993, %1991
-  br i1 %1994, label %1995, label %2143
-
-; <label>:1995                                    ; preds = %1985
-  %1996 = fptoui float %45 to i32
-  %1997 = fptoui float %182 to i32
-  %1998 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1999 = extractvalue %dx.types.CBufRet.i32 %1998, 0
-  %2000 = extractvalue %dx.types.CBufRet.i32 %1998, 1
-  %2001 = extractvalue %dx.types.CBufRet.i32 %1998, 2
-  %2002 = extractvalue %dx.types.CBufRet.i32 %1998, 3
-  %2003 = mul i32 %1999, %1996
-  %2004 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1997, i32 %2000, i32 %2003)  ; IMad(a,b,c)
-  %2005 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1992, i32 %2001, i32 %2004)  ; IMad(a,b,c)
-  %2006 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1987, i32 %2002, i32 %2005)  ; IMad(a,b,c)
-  %2007 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2006, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2008 = extractvalue %dx.types.ResRet.i32 %2007, 0
-  %2009 = extractvalue %dx.types.ResRet.i32 %2007, 1
-  %2010 = zext i32 %2008 to i64
-  %2011 = zext i32 %2009 to i64
-  %2012 = shl i64 %2011, 32
-  %2013 = or i64 %2010, %2012
-  %2014 = uitofp i64 %2013 to float
-  br label %2143
-
-; <label>:2015                                    ; preds = %1983
-  %2016 = icmp eq i32 %1021, 1
-  br i1 %2016, label %2017, label %2051
-
-; <label>:2017                                    ; preds = %2015
-  %2018 = add i32 %13, -1
-  %2019 = uitofp i32 %2018 to float
-  %2020 = call float @dx.op.binary.f32(i32 35, float %1343, float 0.000000e+00)  ; FMax(a,b)
-  %2021 = call float @dx.op.binary.f32(i32 36, float %2020, float %2019)  ; FMin(a,b)
-  %2022 = fptoui float %2021 to i32
-  %2023 = add i32 %15, -1
-  %2024 = uitofp i32 %2023 to float
-  %2025 = call float @dx.op.binary.f32(i32 35, float %1019, float 0.000000e+00)  ; FMax(a,b)
-  %2026 = call float @dx.op.binary.f32(i32 36, float %2025, float %2024)  ; FMin(a,b)
-  %2027 = fptoui float %2026 to i32
-  %2028 = uitofp i32 %2027 to float
-  %2029 = uitofp i32 %2022 to float
-  %2030 = fptoui float %45 to i32
-  %2031 = fptoui float %182 to i32
-  %2032 = fptoui float %2028 to i32
-  %2033 = fptoui float %2029 to i32
-  %2034 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2035 = extractvalue %dx.types.CBufRet.i32 %2034, 0
-  %2036 = extractvalue %dx.types.CBufRet.i32 %2034, 1
-  %2037 = extractvalue %dx.types.CBufRet.i32 %2034, 2
-  %2038 = extractvalue %dx.types.CBufRet.i32 %2034, 3
-  %2039 = mul i32 %2035, %2030
-  %2040 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2031, i32 %2036, i32 %2039)  ; IMad(a,b,c)
-  %2041 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2032, i32 %2037, i32 %2040)  ; IMad(a,b,c)
-  %2042 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2033, i32 %2038, i32 %2041)  ; IMad(a,b,c)
-  %2043 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2042, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2044 = extractvalue %dx.types.ResRet.i32 %2043, 0
-  %2045 = extractvalue %dx.types.ResRet.i32 %2043, 1
-  %2046 = zext i32 %2044 to i64
-  %2047 = zext i32 %2045 to i64
-  %2048 = shl i64 %2047, 32
-  %2049 = or i64 %2046, %2048
-  %2050 = uitofp i64 %2049 to float
-  br label %2143
-
-; <label>:2051                                    ; preds = %2015
-  %2052 = icmp eq i32 %1021, 2
-  br i1 %2052, label %2053, label %2143
-
-; <label>:2053                                    ; preds = %2051
-  %2054 = fsub fast float %22, %20
-  %2055 = fcmp fast olt float %1343, %20
-  br i1 %2055, label %2056, label %2069
-
-; <label>:2056                                    ; preds = %2053
-  %2057 = fsub fast float %20, %1343
-  %2058 = fdiv fast float %2057, %2054
-  %2059 = fptoui float %2058 to i32
-  %2060 = uitofp i32 %2059 to float
-  %2061 = fmul fast float %2060, %2054
-  %2062 = fsub fast float %2057, %2061
-  %2063 = and i32 %2059, 1
-  %2064 = icmp eq i32 %2063, 0
-  br i1 %2064, label %2065, label %2067
-
-; <label>:2065                                    ; preds = %2056
-  %2066 = fadd fast float %2062, %20
-  br label %2084
-
-; <label>:2067                                    ; preds = %2056
-  %2068 = fsub fast float %22, %2062
-  br label %2084
-
-; <label>:2069                                    ; preds = %2053
-  %2070 = fcmp fast ogt float %1343, %22
-  br i1 %2070, label %2071, label %2084
-
-; <label>:2071                                    ; preds = %2069
-  %2072 = fsub fast float %1343, %22
-  %2073 = fdiv fast float %2072, %2054
-  %2074 = fptoui float %2073 to i32
-  %2075 = uitofp i32 %2074 to float
-  %2076 = fmul fast float %2075, %2054
-  %2077 = fsub fast float %2072, %2076
-  %2078 = and i32 %2074, 1
-  %2079 = icmp eq i32 %2078, 0
-  br i1 %2079, label %2080, label %2082
-
-; <label>:2080                                    ; preds = %2071
-  %2081 = fsub fast float %22, %2077
-  br label %2084
-
-; <label>:2082                                    ; preds = %2071
-  %2083 = fadd fast float %2077, %20
-  br label %2084
-
-; <label>:2084                                    ; preds = %2082, %2080, %2069, %2067, %2065
-  %2085 = phi float [ %2066, %2065 ], [ %2068, %2067 ], [ %2081, %2080 ], [ %2083, %2082 ], [ %1343, %2069 ]
-  %2086 = fptoui float %2085 to i32
-  %2087 = fsub fast float %24, %20
-  %2088 = fcmp fast olt float %1019, %20
-  br i1 %2088, label %2089, label %2102
-
-; <label>:2089                                    ; preds = %2084
-  %2090 = fsub fast float %20, %1019
-  %2091 = fdiv fast float %2090, %2087
-  %2092 = fptoui float %2091 to i32
-  %2093 = uitofp i32 %2092 to float
-  %2094 = fmul fast float %2093, %2087
-  %2095 = fsub fast float %2090, %2094
-  %2096 = and i32 %2092, 1
-  %2097 = icmp eq i32 %2096, 0
-  br i1 %2097, label %2098, label %2100
-
-; <label>:2098                                    ; preds = %2089
-  %2099 = fadd fast float %2095, %20
-  br label %2117
-
-; <label>:2100                                    ; preds = %2089
-  %2101 = fsub fast float %24, %2095
-  br label %2117
-
-; <label>:2102                                    ; preds = %2084
-  %2103 = fcmp fast ogt float %1019, %24
-  br i1 %2103, label %2104, label %2117
-
-; <label>:2104                                    ; preds = %2102
-  %2105 = fsub fast float %1019, %24
-  %2106 = fdiv fast float %2105, %2087
-  %2107 = fptoui float %2106 to i32
-  %2108 = uitofp i32 %2107 to float
-  %2109 = fmul fast float %2108, %2087
-  %2110 = fsub fast float %2105, %2109
-  %2111 = and i32 %2107, 1
-  %2112 = icmp eq i32 %2111, 0
-  br i1 %2112, label %2113, label %2115
-
-; <label>:2113                                    ; preds = %2104
-  %2114 = fsub fast float %24, %2110
-  br label %2117
-
-; <label>:2115                                    ; preds = %2104
-  %2116 = fadd fast float %2110, %20
-  br label %2117
-
-; <label>:2117                                    ; preds = %2115, %2113, %2102, %2100, %2098
-  %2118 = phi float [ %2099, %2098 ], [ %2101, %2100 ], [ %2114, %2113 ], [ %2116, %2115 ], [ %1019, %2102 ]
-  %2119 = fptoui float %2118 to i32
-  %2120 = uitofp i32 %2119 to float
-  %2121 = uitofp i32 %2086 to float
-  %2122 = fptoui float %45 to i32
-  %2123 = fptoui float %182 to i32
-  %2124 = fptoui float %2120 to i32
-  %2125 = fptoui float %2121 to i32
-  %2126 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2127 = extractvalue %dx.types.CBufRet.i32 %2126, 0
-  %2128 = extractvalue %dx.types.CBufRet.i32 %2126, 1
-  %2129 = extractvalue %dx.types.CBufRet.i32 %2126, 2
-  %2130 = extractvalue %dx.types.CBufRet.i32 %2126, 3
-  %2131 = mul i32 %2127, %2122
-  %2132 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2123, i32 %2128, i32 %2131)  ; IMad(a,b,c)
-  %2133 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2124, i32 %2129, i32 %2132)  ; IMad(a,b,c)
-  %2134 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2125, i32 %2130, i32 %2133)  ; IMad(a,b,c)
-  %2135 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2134, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2136 = extractvalue %dx.types.ResRet.i32 %2135, 0
-  %2137 = extractvalue %dx.types.ResRet.i32 %2135, 1
-  %2138 = zext i32 %2136 to i64
-  %2139 = zext i32 %2137 to i64
-  %2140 = shl i64 %2139, 32
-  %2141 = or i64 %2138, %2140
-  %2142 = uitofp i64 %2141 to float
-  br label %2143
-
-; <label>:2143                                    ; preds = %2117, %2051, %2017, %1995, %1985
-  %2144 = phi float [ %2014, %1995 ], [ 0.000000e+00, %1985 ], [ %2050, %2017 ], [ %2142, %2117 ], [ 0.000000e+00, %2051 ]
-  br i1 %1022, label %2145, label %2175
-
-; <label>:2145                                    ; preds = %2143
-  %2146 = fcmp fast oge float %1504, 0.000000e+00
-  %2147 = fptoui float %1504 to i32
-  %2148 = icmp ult i32 %2147, %13
-  %2149 = and i1 %2146, %2148
-  %2150 = fcmp fast oge float %1019, 0.000000e+00
-  %2151 = and i1 %2150, %2149
-  %2152 = fptoui float %1019 to i32
-  %2153 = icmp ult i32 %2152, %15
-  %2154 = and i1 %2153, %2151
-  br i1 %2154, label %2155, label %2303
-
-; <label>:2155                                    ; preds = %2145
-  %2156 = fptoui float %45 to i32
-  %2157 = fptoui float %182 to i32
-  %2158 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2159 = extractvalue %dx.types.CBufRet.i32 %2158, 0
-  %2160 = extractvalue %dx.types.CBufRet.i32 %2158, 1
-  %2161 = extractvalue %dx.types.CBufRet.i32 %2158, 2
-  %2162 = extractvalue %dx.types.CBufRet.i32 %2158, 3
-  %2163 = mul i32 %2159, %2156
-  %2164 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2157, i32 %2160, i32 %2163)  ; IMad(a,b,c)
-  %2165 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2152, i32 %2161, i32 %2164)  ; IMad(a,b,c)
-  %2166 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2147, i32 %2162, i32 %2165)  ; IMad(a,b,c)
-  %2167 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2166, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2168 = extractvalue %dx.types.ResRet.i32 %2167, 0
-  %2169 = extractvalue %dx.types.ResRet.i32 %2167, 1
-  %2170 = zext i32 %2168 to i64
-  %2171 = zext i32 %2169 to i64
-  %2172 = shl i64 %2171, 32
-  %2173 = or i64 %2170, %2172
-  %2174 = uitofp i64 %2173 to float
-  br label %2303
-
-; <label>:2175                                    ; preds = %2143
-  %2176 = icmp eq i32 %1021, 1
-  br i1 %2176, label %2177, label %2211
-
-; <label>:2177                                    ; preds = %2175
-  %2178 = add i32 %13, -1
-  %2179 = uitofp i32 %2178 to float
-  %2180 = call float @dx.op.binary.f32(i32 35, float %1504, float 0.000000e+00)  ; FMax(a,b)
-  %2181 = call float @dx.op.binary.f32(i32 36, float %2180, float %2179)  ; FMin(a,b)
-  %2182 = fptoui float %2181 to i32
-  %2183 = add i32 %15, -1
-  %2184 = uitofp i32 %2183 to float
-  %2185 = call float @dx.op.binary.f32(i32 35, float %1019, float 0.000000e+00)  ; FMax(a,b)
-  %2186 = call float @dx.op.binary.f32(i32 36, float %2185, float %2184)  ; FMin(a,b)
-  %2187 = fptoui float %2186 to i32
-  %2188 = uitofp i32 %2187 to float
-  %2189 = uitofp i32 %2182 to float
-  %2190 = fptoui float %45 to i32
-  %2191 = fptoui float %182 to i32
-  %2192 = fptoui float %2188 to i32
-  %2193 = fptoui float %2189 to i32
-  %2194 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2195 = extractvalue %dx.types.CBufRet.i32 %2194, 0
-  %2196 = extractvalue %dx.types.CBufRet.i32 %2194, 1
-  %2197 = extractvalue %dx.types.CBufRet.i32 %2194, 2
-  %2198 = extractvalue %dx.types.CBufRet.i32 %2194, 3
-  %2199 = mul i32 %2195, %2190
-  %2200 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2191, i32 %2196, i32 %2199)  ; IMad(a,b,c)
-  %2201 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2192, i32 %2197, i32 %2200)  ; IMad(a,b,c)
-  %2202 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2193, i32 %2198, i32 %2201)  ; IMad(a,b,c)
-  %2203 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2202, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2204 = extractvalue %dx.types.ResRet.i32 %2203, 0
-  %2205 = extractvalue %dx.types.ResRet.i32 %2203, 1
-  %2206 = zext i32 %2204 to i64
-  %2207 = zext i32 %2205 to i64
-  %2208 = shl i64 %2207, 32
-  %2209 = or i64 %2206, %2208
-  %2210 = uitofp i64 %2209 to float
-  br label %2303
-
-; <label>:2211                                    ; preds = %2175
-  %2212 = icmp eq i32 %1021, 2
-  br i1 %2212, label %2213, label %2303
-
-; <label>:2213                                    ; preds = %2211
-  %2214 = fsub fast float %22, %20
-  %2215 = fcmp fast olt float %1504, %20
-  br i1 %2215, label %2216, label %2229
-
-; <label>:2216                                    ; preds = %2213
-  %2217 = fsub fast float %20, %1504
-  %2218 = fdiv fast float %2217, %2214
-  %2219 = fptoui float %2218 to i32
-  %2220 = uitofp i32 %2219 to float
-  %2221 = fmul fast float %2220, %2214
-  %2222 = fsub fast float %2217, %2221
-  %2223 = and i32 %2219, 1
-  %2224 = icmp eq i32 %2223, 0
-  br i1 %2224, label %2225, label %2227
-
-; <label>:2225                                    ; preds = %2216
-  %2226 = fadd fast float %2222, %20
-  br label %2244
-
-; <label>:2227                                    ; preds = %2216
-  %2228 = fsub fast float %22, %2222
-  br label %2244
-
-; <label>:2229                                    ; preds = %2213
-  %2230 = fcmp fast ogt float %1504, %22
-  br i1 %2230, label %2231, label %2244
-
-; <label>:2231                                    ; preds = %2229
-  %2232 = fsub fast float %1504, %22
-  %2233 = fdiv fast float %2232, %2214
-  %2234 = fptoui float %2233 to i32
-  %2235 = uitofp i32 %2234 to float
-  %2236 = fmul fast float %2235, %2214
-  %2237 = fsub fast float %2232, %2236
-  %2238 = and i32 %2234, 1
-  %2239 = icmp eq i32 %2238, 0
-  br i1 %2239, label %2240, label %2242
-
-; <label>:2240                                    ; preds = %2231
-  %2241 = fsub fast float %22, %2237
-  br label %2244
-
-; <label>:2242                                    ; preds = %2231
-  %2243 = fadd fast float %2237, %20
-  br label %2244
-
-; <label>:2244                                    ; preds = %2242, %2240, %2229, %2227, %2225
-  %2245 = phi float [ %2226, %2225 ], [ %2228, %2227 ], [ %2241, %2240 ], [ %2243, %2242 ], [ %1504, %2229 ]
-  %2246 = fptoui float %2245 to i32
-  %2247 = fsub fast float %24, %20
-  %2248 = fcmp fast olt float %1019, %20
-  br i1 %2248, label %2249, label %2262
-
-; <label>:2249                                    ; preds = %2244
-  %2250 = fsub fast float %20, %1019
-  %2251 = fdiv fast float %2250, %2247
-  %2252 = fptoui float %2251 to i32
-  %2253 = uitofp i32 %2252 to float
-  %2254 = fmul fast float %2253, %2247
-  %2255 = fsub fast float %2250, %2254
-  %2256 = and i32 %2252, 1
-  %2257 = icmp eq i32 %2256, 0
-  br i1 %2257, label %2258, label %2260
-
-; <label>:2258                                    ; preds = %2249
-  %2259 = fadd fast float %2255, %20
-  br label %2277
-
-; <label>:2260                                    ; preds = %2249
-  %2261 = fsub fast float %24, %2255
-  br label %2277
-
-; <label>:2262                                    ; preds = %2244
-  %2263 = fcmp fast ogt float %1019, %24
-  br i1 %2263, label %2264, label %2277
-
-; <label>:2264                                    ; preds = %2262
-  %2265 = fsub fast float %1019, %24
-  %2266 = fdiv fast float %2265, %2247
-  %2267 = fptoui float %2266 to i32
-  %2268 = uitofp i32 %2267 to float
-  %2269 = fmul fast float %2268, %2247
-  %2270 = fsub fast float %2265, %2269
-  %2271 = and i32 %2267, 1
-  %2272 = icmp eq i32 %2271, 0
-  br i1 %2272, label %2273, label %2275
-
-; <label>:2273                                    ; preds = %2264
-  %2274 = fsub fast float %24, %2270
-  br label %2277
-
-; <label>:2275                                    ; preds = %2264
-  %2276 = fadd fast float %2270, %20
-  br label %2277
-
-; <label>:2277                                    ; preds = %2275, %2273, %2262, %2260, %2258
-  %2278 = phi float [ %2259, %2258 ], [ %2261, %2260 ], [ %2274, %2273 ], [ %2276, %2275 ], [ %1019, %2262 ]
-  %2279 = fptoui float %2278 to i32
-  %2280 = uitofp i32 %2279 to float
-  %2281 = uitofp i32 %2246 to float
-  %2282 = fptoui float %45 to i32
-  %2283 = fptoui float %182 to i32
-  %2284 = fptoui float %2280 to i32
-  %2285 = fptoui float %2281 to i32
-  %2286 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2287 = extractvalue %dx.types.CBufRet.i32 %2286, 0
-  %2288 = extractvalue %dx.types.CBufRet.i32 %2286, 1
-  %2289 = extractvalue %dx.types.CBufRet.i32 %2286, 2
-  %2290 = extractvalue %dx.types.CBufRet.i32 %2286, 3
-  %2291 = mul i32 %2287, %2282
-  %2292 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2283, i32 %2288, i32 %2291)  ; IMad(a,b,c)
-  %2293 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2284, i32 %2289, i32 %2292)  ; IMad(a,b,c)
-  %2294 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2285, i32 %2290, i32 %2293)  ; IMad(a,b,c)
-  %2295 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2294, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2296 = extractvalue %dx.types.ResRet.i32 %2295, 0
-  %2297 = extractvalue %dx.types.ResRet.i32 %2295, 1
-  %2298 = zext i32 %2296 to i64
-  %2299 = zext i32 %2297 to i64
-  %2300 = shl i64 %2299, 32
-  %2301 = or i64 %2298, %2300
-  %2302 = uitofp i64 %2301 to float
-  br label %2303
-
-; <label>:2303                                    ; preds = %2277, %2211, %2177, %2155, %2145
-  %2304 = phi float [ %2174, %2155 ], [ 0.000000e+00, %2145 ], [ %2210, %2177 ], [ %2302, %2277 ], [ 0.000000e+00, %2211 ]
-  %2305 = fadd fast float %1019, 1.000000e+00
-  br i1 %1022, label %2306, label %2336
-
-; <label>:2306                                    ; preds = %2303
-  %2307 = fcmp fast oge float %1018, 0.000000e+00
-  %2308 = fptoui float %1018 to i32
-  %2309 = icmp ult i32 %2308, %13
-  %2310 = and i1 %2307, %2309
-  %2311 = fcmp fast oge float %2305, 0.000000e+00
-  %2312 = and i1 %2311, %2310
-  %2313 = fptoui float %2305 to i32
-  %2314 = icmp ult i32 %2313, %15
-  %2315 = and i1 %2314, %2312
-  br i1 %2315, label %2316, label %2464
-
-; <label>:2316                                    ; preds = %2306
-  %2317 = fptoui float %45 to i32
-  %2318 = fptoui float %182 to i32
-  %2319 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2320 = extractvalue %dx.types.CBufRet.i32 %2319, 0
-  %2321 = extractvalue %dx.types.CBufRet.i32 %2319, 1
-  %2322 = extractvalue %dx.types.CBufRet.i32 %2319, 2
-  %2323 = extractvalue %dx.types.CBufRet.i32 %2319, 3
-  %2324 = mul i32 %2320, %2317
-  %2325 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2318, i32 %2321, i32 %2324)  ; IMad(a,b,c)
-  %2326 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2313, i32 %2322, i32 %2325)  ; IMad(a,b,c)
-  %2327 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2308, i32 %2323, i32 %2326)  ; IMad(a,b,c)
-  %2328 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2327, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2329 = extractvalue %dx.types.ResRet.i32 %2328, 0
-  %2330 = extractvalue %dx.types.ResRet.i32 %2328, 1
-  %2331 = zext i32 %2329 to i64
-  %2332 = zext i32 %2330 to i64
-  %2333 = shl i64 %2332, 32
-  %2334 = or i64 %2331, %2333
-  %2335 = uitofp i64 %2334 to float
-  br label %2464
-
-; <label>:2336                                    ; preds = %2303
-  %2337 = icmp eq i32 %1021, 1
-  br i1 %2337, label %2338, label %2372
-
-; <label>:2338                                    ; preds = %2336
-  %2339 = add i32 %13, -1
-  %2340 = uitofp i32 %2339 to float
-  %2341 = call float @dx.op.binary.f32(i32 35, float %1018, float 0.000000e+00)  ; FMax(a,b)
-  %2342 = call float @dx.op.binary.f32(i32 36, float %2341, float %2340)  ; FMin(a,b)
-  %2343 = fptoui float %2342 to i32
-  %2344 = add i32 %15, -1
-  %2345 = uitofp i32 %2344 to float
-  %2346 = call float @dx.op.binary.f32(i32 35, float %2305, float 0.000000e+00)  ; FMax(a,b)
-  %2347 = call float @dx.op.binary.f32(i32 36, float %2346, float %2345)  ; FMin(a,b)
-  %2348 = fptoui float %2347 to i32
-  %2349 = uitofp i32 %2348 to float
-  %2350 = uitofp i32 %2343 to float
-  %2351 = fptoui float %45 to i32
-  %2352 = fptoui float %182 to i32
-  %2353 = fptoui float %2349 to i32
-  %2354 = fptoui float %2350 to i32
-  %2355 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2356 = extractvalue %dx.types.CBufRet.i32 %2355, 0
-  %2357 = extractvalue %dx.types.CBufRet.i32 %2355, 1
-  %2358 = extractvalue %dx.types.CBufRet.i32 %2355, 2
-  %2359 = extractvalue %dx.types.CBufRet.i32 %2355, 3
-  %2360 = mul i32 %2356, %2351
-  %2361 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2352, i32 %2357, i32 %2360)  ; IMad(a,b,c)
-  %2362 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2353, i32 %2358, i32 %2361)  ; IMad(a,b,c)
-  %2363 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2354, i32 %2359, i32 %2362)  ; IMad(a,b,c)
-  %2364 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2363, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2365 = extractvalue %dx.types.ResRet.i32 %2364, 0
-  %2366 = extractvalue %dx.types.ResRet.i32 %2364, 1
-  %2367 = zext i32 %2365 to i64
-  %2368 = zext i32 %2366 to i64
-  %2369 = shl i64 %2368, 32
-  %2370 = or i64 %2367, %2369
-  %2371 = uitofp i64 %2370 to float
-  br label %2464
-
-; <label>:2372                                    ; preds = %2336
-  %2373 = icmp eq i32 %1021, 2
-  br i1 %2373, label %2374, label %2464
-
-; <label>:2374                                    ; preds = %2372
-  %2375 = fsub fast float %22, %20
-  %2376 = fcmp fast olt float %1018, %20
-  br i1 %2376, label %2377, label %2390
-
-; <label>:2377                                    ; preds = %2374
-  %2378 = fsub fast float %20, %1018
-  %2379 = fdiv fast float %2378, %2375
-  %2380 = fptoui float %2379 to i32
-  %2381 = uitofp i32 %2380 to float
-  %2382 = fmul fast float %2381, %2375
-  %2383 = fsub fast float %2378, %2382
-  %2384 = and i32 %2380, 1
-  %2385 = icmp eq i32 %2384, 0
-  br i1 %2385, label %2386, label %2388
-
-; <label>:2386                                    ; preds = %2377
-  %2387 = fadd fast float %2383, %20
-  br label %2405
-
-; <label>:2388                                    ; preds = %2377
-  %2389 = fsub fast float %22, %2383
-  br label %2405
-
-; <label>:2390                                    ; preds = %2374
-  %2391 = fcmp fast ogt float %1018, %22
-  br i1 %2391, label %2392, label %2405
-
-; <label>:2392                                    ; preds = %2390
-  %2393 = fsub fast float %1018, %22
-  %2394 = fdiv fast float %2393, %2375
-  %2395 = fptoui float %2394 to i32
-  %2396 = uitofp i32 %2395 to float
-  %2397 = fmul fast float %2396, %2375
-  %2398 = fsub fast float %2393, %2397
-  %2399 = and i32 %2395, 1
-  %2400 = icmp eq i32 %2399, 0
-  br i1 %2400, label %2401, label %2403
-
-; <label>:2401                                    ; preds = %2392
-  %2402 = fsub fast float %22, %2398
-  br label %2405
-
-; <label>:2403                                    ; preds = %2392
-  %2404 = fadd fast float %2398, %20
-  br label %2405
-
-; <label>:2405                                    ; preds = %2403, %2401, %2390, %2388, %2386
-  %2406 = phi float [ %2387, %2386 ], [ %2389, %2388 ], [ %2402, %2401 ], [ %2404, %2403 ], [ %1018, %2390 ]
-  %2407 = fptoui float %2406 to i32
-  %2408 = fsub fast float %24, %20
-  %2409 = fcmp fast olt float %2305, %20
-  br i1 %2409, label %2410, label %2423
-
-; <label>:2410                                    ; preds = %2405
-  %2411 = fsub fast float %20, %2305
-  %2412 = fdiv fast float %2411, %2408
-  %2413 = fptoui float %2412 to i32
-  %2414 = uitofp i32 %2413 to float
-  %2415 = fmul fast float %2414, %2408
-  %2416 = fsub fast float %2411, %2415
-  %2417 = and i32 %2413, 1
-  %2418 = icmp eq i32 %2417, 0
-  br i1 %2418, label %2419, label %2421
-
-; <label>:2419                                    ; preds = %2410
-  %2420 = fadd fast float %2416, %20
-  br label %2438
-
-; <label>:2421                                    ; preds = %2410
-  %2422 = fsub fast float %24, %2416
-  br label %2438
-
-; <label>:2423                                    ; preds = %2405
-  %2424 = fcmp fast ogt float %2305, %24
-  br i1 %2424, label %2425, label %2438
-
-; <label>:2425                                    ; preds = %2423
-  %2426 = fsub fast float %2305, %24
-  %2427 = fdiv fast float %2426, %2408
-  %2428 = fptoui float %2427 to i32
-  %2429 = uitofp i32 %2428 to float
-  %2430 = fmul fast float %2429, %2408
-  %2431 = fsub fast float %2426, %2430
-  %2432 = and i32 %2428, 1
-  %2433 = icmp eq i32 %2432, 0
-  br i1 %2433, label %2434, label %2436
-
-; <label>:2434                                    ; preds = %2425
-  %2435 = fsub fast float %24, %2431
-  br label %2438
-
-; <label>:2436                                    ; preds = %2425
-  %2437 = fadd fast float %2431, %20
-  br label %2438
-
-; <label>:2438                                    ; preds = %2436, %2434, %2423, %2421, %2419
-  %2439 = phi float [ %2420, %2419 ], [ %2422, %2421 ], [ %2435, %2434 ], [ %2437, %2436 ], [ %2305, %2423 ]
-  %2440 = fptoui float %2439 to i32
-  %2441 = uitofp i32 %2440 to float
-  %2442 = uitofp i32 %2407 to float
-  %2443 = fptoui float %45 to i32
-  %2444 = fptoui float %182 to i32
-  %2445 = fptoui float %2441 to i32
-  %2446 = fptoui float %2442 to i32
-  %2447 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2448 = extractvalue %dx.types.CBufRet.i32 %2447, 0
-  %2449 = extractvalue %dx.types.CBufRet.i32 %2447, 1
-  %2450 = extractvalue %dx.types.CBufRet.i32 %2447, 2
-  %2451 = extractvalue %dx.types.CBufRet.i32 %2447, 3
-  %2452 = mul i32 %2448, %2443
-  %2453 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2444, i32 %2449, i32 %2452)  ; IMad(a,b,c)
-  %2454 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2445, i32 %2450, i32 %2453)  ; IMad(a,b,c)
-  %2455 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2446, i32 %2451, i32 %2454)  ; IMad(a,b,c)
-  %2456 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2455, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2457 = extractvalue %dx.types.ResRet.i32 %2456, 0
-  %2458 = extractvalue %dx.types.ResRet.i32 %2456, 1
-  %2459 = zext i32 %2457 to i64
-  %2460 = zext i32 %2458 to i64
-  %2461 = shl i64 %2460, 32
-  %2462 = or i64 %2459, %2461
-  %2463 = uitofp i64 %2462 to float
-  br label %2464
-
-; <label>:2464                                    ; preds = %2438, %2372, %2338, %2316, %2306
-  %2465 = phi float [ %2335, %2316 ], [ 0.000000e+00, %2306 ], [ %2371, %2338 ], [ %2463, %2438 ], [ 0.000000e+00, %2372 ]
-  br i1 %1022, label %2466, label %2496
-
-; <label>:2466                                    ; preds = %2464
-  %2467 = fcmp fast oge float %1017, 0.000000e+00
-  %2468 = fptoui float %1017 to i32
-  %2469 = icmp ult i32 %2468, %13
-  %2470 = and i1 %2467, %2469
-  %2471 = fcmp fast oge float %2305, 0.000000e+00
-  %2472 = and i1 %2471, %2470
-  %2473 = fptoui float %2305 to i32
-  %2474 = icmp ult i32 %2473, %15
-  %2475 = and i1 %2474, %2472
-  br i1 %2475, label %2476, label %2624
-
-; <label>:2476                                    ; preds = %2466
-  %2477 = fptoui float %45 to i32
-  %2478 = fptoui float %182 to i32
-  %2479 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2480 = extractvalue %dx.types.CBufRet.i32 %2479, 0
-  %2481 = extractvalue %dx.types.CBufRet.i32 %2479, 1
-  %2482 = extractvalue %dx.types.CBufRet.i32 %2479, 2
-  %2483 = extractvalue %dx.types.CBufRet.i32 %2479, 3
-  %2484 = mul i32 %2480, %2477
-  %2485 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2478, i32 %2481, i32 %2484)  ; IMad(a,b,c)
-  %2486 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2473, i32 %2482, i32 %2485)  ; IMad(a,b,c)
-  %2487 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2468, i32 %2483, i32 %2486)  ; IMad(a,b,c)
-  %2488 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2487, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2489 = extractvalue %dx.types.ResRet.i32 %2488, 0
-  %2490 = extractvalue %dx.types.ResRet.i32 %2488, 1
-  %2491 = zext i32 %2489 to i64
-  %2492 = zext i32 %2490 to i64
-  %2493 = shl i64 %2492, 32
-  %2494 = or i64 %2491, %2493
-  %2495 = uitofp i64 %2494 to float
-  br label %2624
-
-; <label>:2496                                    ; preds = %2464
-  %2497 = icmp eq i32 %1021, 1
-  br i1 %2497, label %2498, label %2532
-
-; <label>:2498                                    ; preds = %2496
-  %2499 = add i32 %13, -1
-  %2500 = uitofp i32 %2499 to float
-  %2501 = call float @dx.op.binary.f32(i32 35, float %1017, float 0.000000e+00)  ; FMax(a,b)
-  %2502 = call float @dx.op.binary.f32(i32 36, float %2501, float %2500)  ; FMin(a,b)
-  %2503 = fptoui float %2502 to i32
-  %2504 = add i32 %15, -1
-  %2505 = uitofp i32 %2504 to float
-  %2506 = call float @dx.op.binary.f32(i32 35, float %2305, float 0.000000e+00)  ; FMax(a,b)
-  %2507 = call float @dx.op.binary.f32(i32 36, float %2506, float %2505)  ; FMin(a,b)
-  %2508 = fptoui float %2507 to i32
-  %2509 = uitofp i32 %2508 to float
-  %2510 = uitofp i32 %2503 to float
-  %2511 = fptoui float %45 to i32
-  %2512 = fptoui float %182 to i32
-  %2513 = fptoui float %2509 to i32
-  %2514 = fptoui float %2510 to i32
-  %2515 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2516 = extractvalue %dx.types.CBufRet.i32 %2515, 0
-  %2517 = extractvalue %dx.types.CBufRet.i32 %2515, 1
-  %2518 = extractvalue %dx.types.CBufRet.i32 %2515, 2
-  %2519 = extractvalue %dx.types.CBufRet.i32 %2515, 3
-  %2520 = mul i32 %2516, %2511
-  %2521 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2512, i32 %2517, i32 %2520)  ; IMad(a,b,c)
-  %2522 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2513, i32 %2518, i32 %2521)  ; IMad(a,b,c)
-  %2523 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2514, i32 %2519, i32 %2522)  ; IMad(a,b,c)
-  %2524 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2523, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2525 = extractvalue %dx.types.ResRet.i32 %2524, 0
-  %2526 = extractvalue %dx.types.ResRet.i32 %2524, 1
-  %2527 = zext i32 %2525 to i64
-  %2528 = zext i32 %2526 to i64
-  %2529 = shl i64 %2528, 32
-  %2530 = or i64 %2527, %2529
-  %2531 = uitofp i64 %2530 to float
-  br label %2624
-
-; <label>:2532                                    ; preds = %2496
-  %2533 = icmp eq i32 %1021, 2
-  br i1 %2533, label %2534, label %2624
-
-; <label>:2534                                    ; preds = %2532
-  %2535 = fsub fast float %22, %20
-  %2536 = fcmp fast olt float %1017, %20
-  br i1 %2536, label %2537, label %2550
-
-; <label>:2537                                    ; preds = %2534
-  %2538 = fsub fast float %20, %1017
-  %2539 = fdiv fast float %2538, %2535
-  %2540 = fptoui float %2539 to i32
-  %2541 = uitofp i32 %2540 to float
-  %2542 = fmul fast float %2541, %2535
-  %2543 = fsub fast float %2538, %2542
-  %2544 = and i32 %2540, 1
-  %2545 = icmp eq i32 %2544, 0
-  br i1 %2545, label %2546, label %2548
-
-; <label>:2546                                    ; preds = %2537
-  %2547 = fadd fast float %2543, %20
-  br label %2565
-
-; <label>:2548                                    ; preds = %2537
-  %2549 = fsub fast float %22, %2543
-  br label %2565
-
-; <label>:2550                                    ; preds = %2534
-  %2551 = fcmp fast ogt float %1017, %22
-  br i1 %2551, label %2552, label %2565
-
-; <label>:2552                                    ; preds = %2550
-  %2553 = fsub fast float %1017, %22
-  %2554 = fdiv fast float %2553, %2535
-  %2555 = fptoui float %2554 to i32
-  %2556 = uitofp i32 %2555 to float
-  %2557 = fmul fast float %2556, %2535
-  %2558 = fsub fast float %2553, %2557
-  %2559 = and i32 %2555, 1
-  %2560 = icmp eq i32 %2559, 0
-  br i1 %2560, label %2561, label %2563
-
-; <label>:2561                                    ; preds = %2552
-  %2562 = fsub fast float %22, %2558
-  br label %2565
-
-; <label>:2563                                    ; preds = %2552
-  %2564 = fadd fast float %2558, %20
-  br label %2565
-
-; <label>:2565                                    ; preds = %2563, %2561, %2550, %2548, %2546
-  %2566 = phi float [ %2547, %2546 ], [ %2549, %2548 ], [ %2562, %2561 ], [ %2564, %2563 ], [ %1017, %2550 ]
-  %2567 = fptoui float %2566 to i32
-  %2568 = fsub fast float %24, %20
-  %2569 = fcmp fast olt float %2305, %20
-  br i1 %2569, label %2570, label %2583
-
-; <label>:2570                                    ; preds = %2565
-  %2571 = fsub fast float %20, %2305
-  %2572 = fdiv fast float %2571, %2568
-  %2573 = fptoui float %2572 to i32
-  %2574 = uitofp i32 %2573 to float
-  %2575 = fmul fast float %2574, %2568
-  %2576 = fsub fast float %2571, %2575
-  %2577 = and i32 %2573, 1
-  %2578 = icmp eq i32 %2577, 0
-  br i1 %2578, label %2579, label %2581
-
-; <label>:2579                                    ; preds = %2570
-  %2580 = fadd fast float %2576, %20
-  br label %2598
-
-; <label>:2581                                    ; preds = %2570
-  %2582 = fsub fast float %24, %2576
-  br label %2598
-
-; <label>:2583                                    ; preds = %2565
-  %2584 = fcmp fast ogt float %2305, %24
-  br i1 %2584, label %2585, label %2598
-
-; <label>:2585                                    ; preds = %2583
-  %2586 = fsub fast float %2305, %24
-  %2587 = fdiv fast float %2586, %2568
-  %2588 = fptoui float %2587 to i32
-  %2589 = uitofp i32 %2588 to float
-  %2590 = fmul fast float %2589, %2568
-  %2591 = fsub fast float %2586, %2590
-  %2592 = and i32 %2588, 1
-  %2593 = icmp eq i32 %2592, 0
-  br i1 %2593, label %2594, label %2596
-
-; <label>:2594                                    ; preds = %2585
-  %2595 = fsub fast float %24, %2591
-  br label %2598
-
-; <label>:2596                                    ; preds = %2585
-  %2597 = fadd fast float %2591, %20
-  br label %2598
-
-; <label>:2598                                    ; preds = %2596, %2594, %2583, %2581, %2579
-  %2599 = phi float [ %2580, %2579 ], [ %2582, %2581 ], [ %2595, %2594 ], [ %2597, %2596 ], [ %2305, %2583 ]
-  %2600 = fptoui float %2599 to i32
-  %2601 = uitofp i32 %2600 to float
-  %2602 = uitofp i32 %2567 to float
-  %2603 = fptoui float %45 to i32
-  %2604 = fptoui float %182 to i32
-  %2605 = fptoui float %2601 to i32
-  %2606 = fptoui float %2602 to i32
-  %2607 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2608 = extractvalue %dx.types.CBufRet.i32 %2607, 0
-  %2609 = extractvalue %dx.types.CBufRet.i32 %2607, 1
-  %2610 = extractvalue %dx.types.CBufRet.i32 %2607, 2
-  %2611 = extractvalue %dx.types.CBufRet.i32 %2607, 3
-  %2612 = mul i32 %2608, %2603
-  %2613 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2604, i32 %2609, i32 %2612)  ; IMad(a,b,c)
-  %2614 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2605, i32 %2610, i32 %2613)  ; IMad(a,b,c)
-  %2615 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2606, i32 %2611, i32 %2614)  ; IMad(a,b,c)
-  %2616 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2615, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2617 = extractvalue %dx.types.ResRet.i32 %2616, 0
-  %2618 = extractvalue %dx.types.ResRet.i32 %2616, 1
-  %2619 = zext i32 %2617 to i64
-  %2620 = zext i32 %2618 to i64
-  %2621 = shl i64 %2620, 32
-  %2622 = or i64 %2619, %2621
-  %2623 = uitofp i64 %2622 to float
-  br label %2624
-
-; <label>:2624                                    ; preds = %2598, %2532, %2498, %2476, %2466
-  %2625 = phi float [ %2495, %2476 ], [ 0.000000e+00, %2466 ], [ %2531, %2498 ], [ %2623, %2598 ], [ 0.000000e+00, %2532 ]
-  br i1 %1022, label %2626, label %2656
-
-; <label>:2626                                    ; preds = %2624
-  %2627 = fcmp fast oge float %1343, 0.000000e+00
-  %2628 = fptoui float %1343 to i32
-  %2629 = icmp ult i32 %2628, %13
-  %2630 = and i1 %2627, %2629
-  %2631 = fcmp fast oge float %2305, 0.000000e+00
-  %2632 = and i1 %2631, %2630
-  %2633 = fptoui float %2305 to i32
-  %2634 = icmp ult i32 %2633, %15
-  %2635 = and i1 %2634, %2632
-  br i1 %2635, label %2636, label %2784
-
-; <label>:2636                                    ; preds = %2626
-  %2637 = fptoui float %45 to i32
-  %2638 = fptoui float %182 to i32
-  %2639 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2640 = extractvalue %dx.types.CBufRet.i32 %2639, 0
-  %2641 = extractvalue %dx.types.CBufRet.i32 %2639, 1
-  %2642 = extractvalue %dx.types.CBufRet.i32 %2639, 2
-  %2643 = extractvalue %dx.types.CBufRet.i32 %2639, 3
-  %2644 = mul i32 %2640, %2637
-  %2645 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2638, i32 %2641, i32 %2644)  ; IMad(a,b,c)
-  %2646 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2633, i32 %2642, i32 %2645)  ; IMad(a,b,c)
-  %2647 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2628, i32 %2643, i32 %2646)  ; IMad(a,b,c)
-  %2648 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2647, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2649 = extractvalue %dx.types.ResRet.i32 %2648, 0
-  %2650 = extractvalue %dx.types.ResRet.i32 %2648, 1
-  %2651 = zext i32 %2649 to i64
-  %2652 = zext i32 %2650 to i64
-  %2653 = shl i64 %2652, 32
-  %2654 = or i64 %2651, %2653
-  %2655 = uitofp i64 %2654 to float
-  br label %2784
-
-; <label>:2656                                    ; preds = %2624
-  %2657 = icmp eq i32 %1021, 1
-  br i1 %2657, label %2658, label %2692
-
-; <label>:2658                                    ; preds = %2656
-  %2659 = add i32 %13, -1
-  %2660 = uitofp i32 %2659 to float
-  %2661 = call float @dx.op.binary.f32(i32 35, float %1343, float 0.000000e+00)  ; FMax(a,b)
-  %2662 = call float @dx.op.binary.f32(i32 36, float %2661, float %2660)  ; FMin(a,b)
-  %2663 = fptoui float %2662 to i32
-  %2664 = add i32 %15, -1
-  %2665 = uitofp i32 %2664 to float
-  %2666 = call float @dx.op.binary.f32(i32 35, float %2305, float 0.000000e+00)  ; FMax(a,b)
-  %2667 = call float @dx.op.binary.f32(i32 36, float %2666, float %2665)  ; FMin(a,b)
-  %2668 = fptoui float %2667 to i32
-  %2669 = uitofp i32 %2668 to float
-  %2670 = uitofp i32 %2663 to float
-  %2671 = fptoui float %45 to i32
-  %2672 = fptoui float %182 to i32
-  %2673 = fptoui float %2669 to i32
-  %2674 = fptoui float %2670 to i32
-  %2675 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2676 = extractvalue %dx.types.CBufRet.i32 %2675, 0
-  %2677 = extractvalue %dx.types.CBufRet.i32 %2675, 1
-  %2678 = extractvalue %dx.types.CBufRet.i32 %2675, 2
-  %2679 = extractvalue %dx.types.CBufRet.i32 %2675, 3
-  %2680 = mul i32 %2676, %2671
-  %2681 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2672, i32 %2677, i32 %2680)  ; IMad(a,b,c)
-  %2682 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2673, i32 %2678, i32 %2681)  ; IMad(a,b,c)
-  %2683 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2674, i32 %2679, i32 %2682)  ; IMad(a,b,c)
-  %2684 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2683, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2685 = extractvalue %dx.types.ResRet.i32 %2684, 0
-  %2686 = extractvalue %dx.types.ResRet.i32 %2684, 1
-  %2687 = zext i32 %2685 to i64
-  %2688 = zext i32 %2686 to i64
-  %2689 = shl i64 %2688, 32
-  %2690 = or i64 %2687, %2689
-  %2691 = uitofp i64 %2690 to float
-  br label %2784
-
-; <label>:2692                                    ; preds = %2656
-  %2693 = icmp eq i32 %1021, 2
-  br i1 %2693, label %2694, label %2784
-
-; <label>:2694                                    ; preds = %2692
-  %2695 = fsub fast float %22, %20
-  %2696 = fcmp fast olt float %1343, %20
-  br i1 %2696, label %2697, label %2710
-
-; <label>:2697                                    ; preds = %2694
-  %2698 = fsub fast float %20, %1343
-  %2699 = fdiv fast float %2698, %2695
-  %2700 = fptoui float %2699 to i32
-  %2701 = uitofp i32 %2700 to float
-  %2702 = fmul fast float %2701, %2695
-  %2703 = fsub fast float %2698, %2702
-  %2704 = and i32 %2700, 1
-  %2705 = icmp eq i32 %2704, 0
-  br i1 %2705, label %2706, label %2708
-
-; <label>:2706                                    ; preds = %2697
-  %2707 = fadd fast float %2703, %20
-  br label %2725
-
-; <label>:2708                                    ; preds = %2697
-  %2709 = fsub fast float %22, %2703
-  br label %2725
-
-; <label>:2710                                    ; preds = %2694
-  %2711 = fcmp fast ogt float %1343, %22
-  br i1 %2711, label %2712, label %2725
-
-; <label>:2712                                    ; preds = %2710
-  %2713 = fsub fast float %1343, %22
-  %2714 = fdiv fast float %2713, %2695
-  %2715 = fptoui float %2714 to i32
-  %2716 = uitofp i32 %2715 to float
-  %2717 = fmul fast float %2716, %2695
-  %2718 = fsub fast float %2713, %2717
-  %2719 = and i32 %2715, 1
-  %2720 = icmp eq i32 %2719, 0
-  br i1 %2720, label %2721, label %2723
-
-; <label>:2721                                    ; preds = %2712
-  %2722 = fsub fast float %22, %2718
-  br label %2725
-
-; <label>:2723                                    ; preds = %2712
-  %2724 = fadd fast float %2718, %20
-  br label %2725
-
-; <label>:2725                                    ; preds = %2723, %2721, %2710, %2708, %2706
-  %2726 = phi float [ %2707, %2706 ], [ %2709, %2708 ], [ %2722, %2721 ], [ %2724, %2723 ], [ %1343, %2710 ]
-  %2727 = fptoui float %2726 to i32
-  %2728 = fsub fast float %24, %20
-  %2729 = fcmp fast olt float %2305, %20
-  br i1 %2729, label %2730, label %2743
-
-; <label>:2730                                    ; preds = %2725
-  %2731 = fsub fast float %20, %2305
-  %2732 = fdiv fast float %2731, %2728
-  %2733 = fptoui float %2732 to i32
-  %2734 = uitofp i32 %2733 to float
-  %2735 = fmul fast float %2734, %2728
-  %2736 = fsub fast float %2731, %2735
-  %2737 = and i32 %2733, 1
-  %2738 = icmp eq i32 %2737, 0
-  br i1 %2738, label %2739, label %2741
-
-; <label>:2739                                    ; preds = %2730
-  %2740 = fadd fast float %2736, %20
-  br label %2758
-
-; <label>:2741                                    ; preds = %2730
-  %2742 = fsub fast float %24, %2736
-  br label %2758
-
-; <label>:2743                                    ; preds = %2725
-  %2744 = fcmp fast ogt float %2305, %24
-  br i1 %2744, label %2745, label %2758
-
-; <label>:2745                                    ; preds = %2743
-  %2746 = fsub fast float %2305, %24
-  %2747 = fdiv fast float %2746, %2728
-  %2748 = fptoui float %2747 to i32
-  %2749 = uitofp i32 %2748 to float
-  %2750 = fmul fast float %2749, %2728
-  %2751 = fsub fast float %2746, %2750
-  %2752 = and i32 %2748, 1
-  %2753 = icmp eq i32 %2752, 0
-  br i1 %2753, label %2754, label %2756
-
-; <label>:2754                                    ; preds = %2745
-  %2755 = fsub fast float %24, %2751
-  br label %2758
-
-; <label>:2756                                    ; preds = %2745
-  %2757 = fadd fast float %2751, %20
-  br label %2758
-
-; <label>:2758                                    ; preds = %2756, %2754, %2743, %2741, %2739
-  %2759 = phi float [ %2740, %2739 ], [ %2742, %2741 ], [ %2755, %2754 ], [ %2757, %2756 ], [ %2305, %2743 ]
-  %2760 = fptoui float %2759 to i32
-  %2761 = uitofp i32 %2760 to float
-  %2762 = uitofp i32 %2727 to float
-  %2763 = fptoui float %45 to i32
-  %2764 = fptoui float %182 to i32
-  %2765 = fptoui float %2761 to i32
-  %2766 = fptoui float %2762 to i32
-  %2767 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2768 = extractvalue %dx.types.CBufRet.i32 %2767, 0
-  %2769 = extractvalue %dx.types.CBufRet.i32 %2767, 1
-  %2770 = extractvalue %dx.types.CBufRet.i32 %2767, 2
-  %2771 = extractvalue %dx.types.CBufRet.i32 %2767, 3
-  %2772 = mul i32 %2768, %2763
-  %2773 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2764, i32 %2769, i32 %2772)  ; IMad(a,b,c)
-  %2774 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2765, i32 %2770, i32 %2773)  ; IMad(a,b,c)
-  %2775 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2766, i32 %2771, i32 %2774)  ; IMad(a,b,c)
-  %2776 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2775, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2777 = extractvalue %dx.types.ResRet.i32 %2776, 0
-  %2778 = extractvalue %dx.types.ResRet.i32 %2776, 1
-  %2779 = zext i32 %2777 to i64
-  %2780 = zext i32 %2778 to i64
-  %2781 = shl i64 %2780, 32
-  %2782 = or i64 %2779, %2781
-  %2783 = uitofp i64 %2782 to float
-  br label %2784
-
-; <label>:2784                                    ; preds = %2758, %2692, %2658, %2636, %2626
-  %2785 = phi float [ %2655, %2636 ], [ 0.000000e+00, %2626 ], [ %2691, %2658 ], [ %2783, %2758 ], [ 0.000000e+00, %2692 ]
-  br i1 %1022, label %2786, label %2816
-
-; <label>:2786                                    ; preds = %2784
-  %2787 = fcmp fast oge float %1504, 0.000000e+00
-  %2788 = fptoui float %1504 to i32
-  %2789 = icmp ult i32 %2788, %13
-  %2790 = and i1 %2787, %2789
-  %2791 = fcmp fast oge float %2305, 0.000000e+00
-  %2792 = and i1 %2791, %2790
-  %2793 = fptoui float %2305 to i32
-  %2794 = icmp ult i32 %2793, %15
-  %2795 = and i1 %2794, %2792
-  br i1 %2795, label %2796, label %2944
-
-; <label>:2796                                    ; preds = %2786
-  %2797 = fptoui float %45 to i32
-  %2798 = fptoui float %182 to i32
-  %2799 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2800 = extractvalue %dx.types.CBufRet.i32 %2799, 0
-  %2801 = extractvalue %dx.types.CBufRet.i32 %2799, 1
-  %2802 = extractvalue %dx.types.CBufRet.i32 %2799, 2
-  %2803 = extractvalue %dx.types.CBufRet.i32 %2799, 3
-  %2804 = mul i32 %2800, %2797
-  %2805 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2798, i32 %2801, i32 %2804)  ; IMad(a,b,c)
-  %2806 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2793, i32 %2802, i32 %2805)  ; IMad(a,b,c)
-  %2807 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2788, i32 %2803, i32 %2806)  ; IMad(a,b,c)
-  %2808 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2807, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2809 = extractvalue %dx.types.ResRet.i32 %2808, 0
-  %2810 = extractvalue %dx.types.ResRet.i32 %2808, 1
-  %2811 = zext i32 %2809 to i64
-  %2812 = zext i32 %2810 to i64
-  %2813 = shl i64 %2812, 32
-  %2814 = or i64 %2811, %2813
-  %2815 = uitofp i64 %2814 to float
-  br label %2944
-
-; <label>:2816                                    ; preds = %2784
-  %2817 = icmp eq i32 %1021, 1
-  br i1 %2817, label %2818, label %2852
-
-; <label>:2818                                    ; preds = %2816
-  %2819 = add i32 %13, -1
-  %2820 = uitofp i32 %2819 to float
-  %2821 = call float @dx.op.binary.f32(i32 35, float %1504, float 0.000000e+00)  ; FMax(a,b)
-  %2822 = call float @dx.op.binary.f32(i32 36, float %2821, float %2820)  ; FMin(a,b)
-  %2823 = fptoui float %2822 to i32
-  %2824 = add i32 %15, -1
-  %2825 = uitofp i32 %2824 to float
-  %2826 = call float @dx.op.binary.f32(i32 35, float %2305, float 0.000000e+00)  ; FMax(a,b)
-  %2827 = call float @dx.op.binary.f32(i32 36, float %2826, float %2825)  ; FMin(a,b)
-  %2828 = fptoui float %2827 to i32
-  %2829 = uitofp i32 %2828 to float
-  %2830 = uitofp i32 %2823 to float
-  %2831 = fptoui float %45 to i32
-  %2832 = fptoui float %182 to i32
-  %2833 = fptoui float %2829 to i32
-  %2834 = fptoui float %2830 to i32
-  %2835 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2836 = extractvalue %dx.types.CBufRet.i32 %2835, 0
-  %2837 = extractvalue %dx.types.CBufRet.i32 %2835, 1
-  %2838 = extractvalue %dx.types.CBufRet.i32 %2835, 2
-  %2839 = extractvalue %dx.types.CBufRet.i32 %2835, 3
-  %2840 = mul i32 %2836, %2831
-  %2841 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2832, i32 %2837, i32 %2840)  ; IMad(a,b,c)
-  %2842 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2833, i32 %2838, i32 %2841)  ; IMad(a,b,c)
-  %2843 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2834, i32 %2839, i32 %2842)  ; IMad(a,b,c)
-  %2844 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2843, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2845 = extractvalue %dx.types.ResRet.i32 %2844, 0
-  %2846 = extractvalue %dx.types.ResRet.i32 %2844, 1
-  %2847 = zext i32 %2845 to i64
-  %2848 = zext i32 %2846 to i64
-  %2849 = shl i64 %2848, 32
-  %2850 = or i64 %2847, %2849
-  %2851 = uitofp i64 %2850 to float
-  br label %2944
-
-; <label>:2852                                    ; preds = %2816
-  %2853 = icmp eq i32 %1021, 2
-  br i1 %2853, label %2854, label %2944
-
-; <label>:2854                                    ; preds = %2852
-  %2855 = fsub fast float %22, %20
-  %2856 = fcmp fast olt float %1504, %20
-  br i1 %2856, label %2857, label %2870
-
-; <label>:2857                                    ; preds = %2854
-  %2858 = fsub fast float %20, %1504
-  %2859 = fdiv fast float %2858, %2855
-  %2860 = fptoui float %2859 to i32
-  %2861 = uitofp i32 %2860 to float
-  %2862 = fmul fast float %2861, %2855
-  %2863 = fsub fast float %2858, %2862
-  %2864 = and i32 %2860, 1
-  %2865 = icmp eq i32 %2864, 0
-  br i1 %2865, label %2866, label %2868
-
-; <label>:2866                                    ; preds = %2857
-  %2867 = fadd fast float %2863, %20
-  br label %2885
-
-; <label>:2868                                    ; preds = %2857
-  %2869 = fsub fast float %22, %2863
-  br label %2885
-
-; <label>:2870                                    ; preds = %2854
-  %2871 = fcmp fast ogt float %1504, %22
-  br i1 %2871, label %2872, label %2885
-
-; <label>:2872                                    ; preds = %2870
-  %2873 = fsub fast float %1504, %22
-  %2874 = fdiv fast float %2873, %2855
-  %2875 = fptoui float %2874 to i32
-  %2876 = uitofp i32 %2875 to float
-  %2877 = fmul fast float %2876, %2855
-  %2878 = fsub fast float %2873, %2877
-  %2879 = and i32 %2875, 1
-  %2880 = icmp eq i32 %2879, 0
-  br i1 %2880, label %2881, label %2883
-
-; <label>:2881                                    ; preds = %2872
-  %2882 = fsub fast float %22, %2878
-  br label %2885
-
-; <label>:2883                                    ; preds = %2872
-  %2884 = fadd fast float %2878, %20
-  br label %2885
-
-; <label>:2885                                    ; preds = %2883, %2881, %2870, %2868, %2866
-  %2886 = phi float [ %2867, %2866 ], [ %2869, %2868 ], [ %2882, %2881 ], [ %2884, %2883 ], [ %1504, %2870 ]
-  %2887 = fptoui float %2886 to i32
-  %2888 = fsub fast float %24, %20
-  %2889 = fcmp fast olt float %2305, %20
-  br i1 %2889, label %2890, label %2903
-
-; <label>:2890                                    ; preds = %2885
-  %2891 = fsub fast float %20, %2305
-  %2892 = fdiv fast float %2891, %2888
-  %2893 = fptoui float %2892 to i32
-  %2894 = uitofp i32 %2893 to float
-  %2895 = fmul fast float %2894, %2888
-  %2896 = fsub fast float %2891, %2895
-  %2897 = and i32 %2893, 1
-  %2898 = icmp eq i32 %2897, 0
-  br i1 %2898, label %2899, label %2901
-
-; <label>:2899                                    ; preds = %2890
-  %2900 = fadd fast float %2896, %20
-  br label %2918
-
-; <label>:2901                                    ; preds = %2890
-  %2902 = fsub fast float %24, %2896
-  br label %2918
-
-; <label>:2903                                    ; preds = %2885
-  %2904 = fcmp fast ogt float %2305, %24
-  br i1 %2904, label %2905, label %2918
-
-; <label>:2905                                    ; preds = %2903
-  %2906 = fsub fast float %2305, %24
-  %2907 = fdiv fast float %2906, %2888
-  %2908 = fptoui float %2907 to i32
-  %2909 = uitofp i32 %2908 to float
-  %2910 = fmul fast float %2909, %2888
-  %2911 = fsub fast float %2906, %2910
-  %2912 = and i32 %2908, 1
-  %2913 = icmp eq i32 %2912, 0
-  br i1 %2913, label %2914, label %2916
-
-; <label>:2914                                    ; preds = %2905
-  %2915 = fsub fast float %24, %2911
-  br label %2918
-
-; <label>:2916                                    ; preds = %2905
-  %2917 = fadd fast float %2911, %20
-  br label %2918
-
-; <label>:2918                                    ; preds = %2916, %2914, %2903, %2901, %2899
-  %2919 = phi float [ %2900, %2899 ], [ %2902, %2901 ], [ %2915, %2914 ], [ %2917, %2916 ], [ %2305, %2903 ]
-  %2920 = fptoui float %2919 to i32
-  %2921 = uitofp i32 %2920 to float
-  %2922 = uitofp i32 %2887 to float
-  %2923 = fptoui float %45 to i32
-  %2924 = fptoui float %182 to i32
-  %2925 = fptoui float %2921 to i32
-  %2926 = fptoui float %2922 to i32
-  %2927 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2928 = extractvalue %dx.types.CBufRet.i32 %2927, 0
-  %2929 = extractvalue %dx.types.CBufRet.i32 %2927, 1
-  %2930 = extractvalue %dx.types.CBufRet.i32 %2927, 2
-  %2931 = extractvalue %dx.types.CBufRet.i32 %2927, 3
-  %2932 = mul i32 %2928, %2923
-  %2933 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2924, i32 %2929, i32 %2932)  ; IMad(a,b,c)
-  %2934 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2925, i32 %2930, i32 %2933)  ; IMad(a,b,c)
-  %2935 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2926, i32 %2931, i32 %2934)  ; IMad(a,b,c)
-  %2936 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2935, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2937 = extractvalue %dx.types.ResRet.i32 %2936, 0
-  %2938 = extractvalue %dx.types.ResRet.i32 %2936, 1
-  %2939 = zext i32 %2937 to i64
-  %2940 = zext i32 %2938 to i64
-  %2941 = shl i64 %2940, 32
-  %2942 = or i64 %2939, %2941
-  %2943 = uitofp i64 %2942 to float
-  br label %2944
-
-; <label>:2944                                    ; preds = %2918, %2852, %2818, %2796, %2786
-  %2945 = phi float [ %2815, %2796 ], [ 0.000000e+00, %2786 ], [ %2851, %2818 ], [ %2943, %2918 ], [ 0.000000e+00, %2852 ]
-  %2946 = fadd fast float %1019, 2.000000e+00
-  br i1 %1022, label %2947, label %2977
-
-; <label>:2947                                    ; preds = %2944
-  %2948 = fcmp fast oge float %1018, 0.000000e+00
-  %2949 = fptoui float %1018 to i32
-  %2950 = icmp ult i32 %2949, %13
-  %2951 = and i1 %2948, %2950
-  %2952 = fcmp fast oge float %2946, 0.000000e+00
-  %2953 = and i1 %2952, %2951
-  %2954 = fptoui float %2946 to i32
-  %2955 = icmp ult i32 %2954, %15
-  %2956 = and i1 %2955, %2953
-  br i1 %2956, label %2957, label %3105
-
-; <label>:2957                                    ; preds = %2947
-  %2958 = fptoui float %45 to i32
-  %2959 = fptoui float %182 to i32
-  %2960 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2961 = extractvalue %dx.types.CBufRet.i32 %2960, 0
-  %2962 = extractvalue %dx.types.CBufRet.i32 %2960, 1
-  %2963 = extractvalue %dx.types.CBufRet.i32 %2960, 2
-  %2964 = extractvalue %dx.types.CBufRet.i32 %2960, 3
-  %2965 = mul i32 %2961, %2958
-  %2966 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2959, i32 %2962, i32 %2965)  ; IMad(a,b,c)
-  %2967 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2954, i32 %2963, i32 %2966)  ; IMad(a,b,c)
-  %2968 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2949, i32 %2964, i32 %2967)  ; IMad(a,b,c)
-  %2969 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2968, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2970 = extractvalue %dx.types.ResRet.i32 %2969, 0
-  %2971 = extractvalue %dx.types.ResRet.i32 %2969, 1
-  %2972 = zext i32 %2970 to i64
-  %2973 = zext i32 %2971 to i64
-  %2974 = shl i64 %2973, 32
-  %2975 = or i64 %2972, %2974
-  %2976 = uitofp i64 %2975 to float
-  br label %3105
-
-; <label>:2977                                    ; preds = %2944
-  %2978 = icmp eq i32 %1021, 1
-  br i1 %2978, label %2979, label %3013
-
-; <label>:2979                                    ; preds = %2977
-  %2980 = add i32 %13, -1
-  %2981 = uitofp i32 %2980 to float
-  %2982 = call float @dx.op.binary.f32(i32 35, float %1018, float 0.000000e+00)  ; FMax(a,b)
-  %2983 = call float @dx.op.binary.f32(i32 36, float %2982, float %2981)  ; FMin(a,b)
-  %2984 = fptoui float %2983 to i32
-  %2985 = add i32 %15, -1
-  %2986 = uitofp i32 %2985 to float
-  %2987 = call float @dx.op.binary.f32(i32 35, float %2946, float 0.000000e+00)  ; FMax(a,b)
-  %2988 = call float @dx.op.binary.f32(i32 36, float %2987, float %2986)  ; FMin(a,b)
-  %2989 = fptoui float %2988 to i32
-  %2990 = uitofp i32 %2989 to float
-  %2991 = uitofp i32 %2984 to float
-  %2992 = fptoui float %45 to i32
-  %2993 = fptoui float %182 to i32
-  %2994 = fptoui float %2990 to i32
-  %2995 = fptoui float %2991 to i32
-  %2996 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2997 = extractvalue %dx.types.CBufRet.i32 %2996, 0
-  %2998 = extractvalue %dx.types.CBufRet.i32 %2996, 1
-  %2999 = extractvalue %dx.types.CBufRet.i32 %2996, 2
-  %3000 = extractvalue %dx.types.CBufRet.i32 %2996, 3
-  %3001 = mul i32 %2997, %2992
-  %3002 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2993, i32 %2998, i32 %3001)  ; IMad(a,b,c)
-  %3003 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2994, i32 %2999, i32 %3002)  ; IMad(a,b,c)
-  %3004 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2995, i32 %3000, i32 %3003)  ; IMad(a,b,c)
-  %3005 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3004, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3006 = extractvalue %dx.types.ResRet.i32 %3005, 0
-  %3007 = extractvalue %dx.types.ResRet.i32 %3005, 1
-  %3008 = zext i32 %3006 to i64
-  %3009 = zext i32 %3007 to i64
-  %3010 = shl i64 %3009, 32
-  %3011 = or i64 %3008, %3010
-  %3012 = uitofp i64 %3011 to float
-  br label %3105
-
-; <label>:3013                                    ; preds = %2977
-  %3014 = icmp eq i32 %1021, 2
-  br i1 %3014, label %3015, label %3105
-
-; <label>:3015                                    ; preds = %3013
-  %3016 = fsub fast float %22, %20
-  %3017 = fcmp fast olt float %1018, %20
-  br i1 %3017, label %3018, label %3031
-
-; <label>:3018                                    ; preds = %3015
-  %3019 = fsub fast float %20, %1018
-  %3020 = fdiv fast float %3019, %3016
-  %3021 = fptoui float %3020 to i32
-  %3022 = uitofp i32 %3021 to float
-  %3023 = fmul fast float %3022, %3016
-  %3024 = fsub fast float %3019, %3023
-  %3025 = and i32 %3021, 1
-  %3026 = icmp eq i32 %3025, 0
-  br i1 %3026, label %3027, label %3029
-
-; <label>:3027                                    ; preds = %3018
-  %3028 = fadd fast float %3024, %20
-  br label %3046
-
-; <label>:3029                                    ; preds = %3018
-  %3030 = fsub fast float %22, %3024
-  br label %3046
-
-; <label>:3031                                    ; preds = %3015
-  %3032 = fcmp fast ogt float %1018, %22
-  br i1 %3032, label %3033, label %3046
-
-; <label>:3033                                    ; preds = %3031
-  %3034 = fsub fast float %1018, %22
-  %3035 = fdiv fast float %3034, %3016
-  %3036 = fptoui float %3035 to i32
-  %3037 = uitofp i32 %3036 to float
-  %3038 = fmul fast float %3037, %3016
-  %3039 = fsub fast float %3034, %3038
-  %3040 = and i32 %3036, 1
-  %3041 = icmp eq i32 %3040, 0
-  br i1 %3041, label %3042, label %3044
-
-; <label>:3042                                    ; preds = %3033
-  %3043 = fsub fast float %22, %3039
-  br label %3046
-
-; <label>:3044                                    ; preds = %3033
-  %3045 = fadd fast float %3039, %20
-  br label %3046
-
-; <label>:3046                                    ; preds = %3044, %3042, %3031, %3029, %3027
-  %3047 = phi float [ %3028, %3027 ], [ %3030, %3029 ], [ %3043, %3042 ], [ %3045, %3044 ], [ %1018, %3031 ]
-  %3048 = fptoui float %3047 to i32
-  %3049 = fsub fast float %24, %20
-  %3050 = fcmp fast olt float %2946, %20
-  br i1 %3050, label %3051, label %3064
-
-; <label>:3051                                    ; preds = %3046
-  %3052 = fsub fast float %20, %2946
-  %3053 = fdiv fast float %3052, %3049
-  %3054 = fptoui float %3053 to i32
-  %3055 = uitofp i32 %3054 to float
-  %3056 = fmul fast float %3055, %3049
-  %3057 = fsub fast float %3052, %3056
-  %3058 = and i32 %3054, 1
-  %3059 = icmp eq i32 %3058, 0
-  br i1 %3059, label %3060, label %3062
-
-; <label>:3060                                    ; preds = %3051
-  %3061 = fadd fast float %3057, %20
-  br label %3079
-
-; <label>:3062                                    ; preds = %3051
-  %3063 = fsub fast float %24, %3057
-  br label %3079
-
-; <label>:3064                                    ; preds = %3046
-  %3065 = fcmp fast ogt float %2946, %24
-  br i1 %3065, label %3066, label %3079
-
-; <label>:3066                                    ; preds = %3064
-  %3067 = fsub fast float %2946, %24
-  %3068 = fdiv fast float %3067, %3049
-  %3069 = fptoui float %3068 to i32
-  %3070 = uitofp i32 %3069 to float
-  %3071 = fmul fast float %3070, %3049
-  %3072 = fsub fast float %3067, %3071
-  %3073 = and i32 %3069, 1
-  %3074 = icmp eq i32 %3073, 0
-  br i1 %3074, label %3075, label %3077
-
-; <label>:3075                                    ; preds = %3066
-  %3076 = fsub fast float %24, %3072
-  br label %3079
-
-; <label>:3077                                    ; preds = %3066
-  %3078 = fadd fast float %3072, %20
-  br label %3079
-
-; <label>:3079                                    ; preds = %3077, %3075, %3064, %3062, %3060
-  %3080 = phi float [ %3061, %3060 ], [ %3063, %3062 ], [ %3076, %3075 ], [ %3078, %3077 ], [ %2946, %3064 ]
-  %3081 = fptoui float %3080 to i32
-  %3082 = uitofp i32 %3081 to float
-  %3083 = uitofp i32 %3048 to float
-  %3084 = fptoui float %45 to i32
-  %3085 = fptoui float %182 to i32
-  %3086 = fptoui float %3082 to i32
-  %3087 = fptoui float %3083 to i32
-  %3088 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3089 = extractvalue %dx.types.CBufRet.i32 %3088, 0
-  %3090 = extractvalue %dx.types.CBufRet.i32 %3088, 1
-  %3091 = extractvalue %dx.types.CBufRet.i32 %3088, 2
-  %3092 = extractvalue %dx.types.CBufRet.i32 %3088, 3
-  %3093 = mul i32 %3089, %3084
-  %3094 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3085, i32 %3090, i32 %3093)  ; IMad(a,b,c)
-  %3095 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3086, i32 %3091, i32 %3094)  ; IMad(a,b,c)
-  %3096 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3087, i32 %3092, i32 %3095)  ; IMad(a,b,c)
-  %3097 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3096, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3098 = extractvalue %dx.types.ResRet.i32 %3097, 0
-  %3099 = extractvalue %dx.types.ResRet.i32 %3097, 1
-  %3100 = zext i32 %3098 to i64
-  %3101 = zext i32 %3099 to i64
-  %3102 = shl i64 %3101, 32
-  %3103 = or i64 %3100, %3102
-  %3104 = uitofp i64 %3103 to float
-  br label %3105
-
-; <label>:3105                                    ; preds = %3079, %3013, %2979, %2957, %2947
-  %3106 = phi float [ %2976, %2957 ], [ 0.000000e+00, %2947 ], [ %3012, %2979 ], [ %3104, %3079 ], [ 0.000000e+00, %3013 ]
-  br i1 %1022, label %3107, label %3137
-
-; <label>:3107                                    ; preds = %3105
-  %3108 = fcmp fast oge float %1017, 0.000000e+00
-  %3109 = fptoui float %1017 to i32
-  %3110 = icmp ult i32 %3109, %13
-  %3111 = and i1 %3108, %3110
-  %3112 = fcmp fast oge float %2946, 0.000000e+00
-  %3113 = and i1 %3112, %3111
-  %3114 = fptoui float %2946 to i32
-  %3115 = icmp ult i32 %3114, %15
-  %3116 = and i1 %3115, %3113
-  br i1 %3116, label %3117, label %3265
-
-; <label>:3117                                    ; preds = %3107
-  %3118 = fptoui float %45 to i32
-  %3119 = fptoui float %182 to i32
-  %3120 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3121 = extractvalue %dx.types.CBufRet.i32 %3120, 0
-  %3122 = extractvalue %dx.types.CBufRet.i32 %3120, 1
-  %3123 = extractvalue %dx.types.CBufRet.i32 %3120, 2
-  %3124 = extractvalue %dx.types.CBufRet.i32 %3120, 3
-  %3125 = mul i32 %3121, %3118
-  %3126 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3119, i32 %3122, i32 %3125)  ; IMad(a,b,c)
-  %3127 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3114, i32 %3123, i32 %3126)  ; IMad(a,b,c)
-  %3128 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3109, i32 %3124, i32 %3127)  ; IMad(a,b,c)
-  %3129 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3128, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3130 = extractvalue %dx.types.ResRet.i32 %3129, 0
-  %3131 = extractvalue %dx.types.ResRet.i32 %3129, 1
-  %3132 = zext i32 %3130 to i64
-  %3133 = zext i32 %3131 to i64
-  %3134 = shl i64 %3133, 32
-  %3135 = or i64 %3132, %3134
-  %3136 = uitofp i64 %3135 to float
-  br label %3265
-
-; <label>:3137                                    ; preds = %3105
-  %3138 = icmp eq i32 %1021, 1
-  br i1 %3138, label %3139, label %3173
-
-; <label>:3139                                    ; preds = %3137
-  %3140 = add i32 %13, -1
-  %3141 = uitofp i32 %3140 to float
-  %3142 = call float @dx.op.binary.f32(i32 35, float %1017, float 0.000000e+00)  ; FMax(a,b)
-  %3143 = call float @dx.op.binary.f32(i32 36, float %3142, float %3141)  ; FMin(a,b)
-  %3144 = fptoui float %3143 to i32
-  %3145 = add i32 %15, -1
-  %3146 = uitofp i32 %3145 to float
-  %3147 = call float @dx.op.binary.f32(i32 35, float %2946, float 0.000000e+00)  ; FMax(a,b)
-  %3148 = call float @dx.op.binary.f32(i32 36, float %3147, float %3146)  ; FMin(a,b)
-  %3149 = fptoui float %3148 to i32
-  %3150 = uitofp i32 %3149 to float
-  %3151 = uitofp i32 %3144 to float
-  %3152 = fptoui float %45 to i32
-  %3153 = fptoui float %182 to i32
-  %3154 = fptoui float %3150 to i32
-  %3155 = fptoui float %3151 to i32
-  %3156 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3157 = extractvalue %dx.types.CBufRet.i32 %3156, 0
-  %3158 = extractvalue %dx.types.CBufRet.i32 %3156, 1
-  %3159 = extractvalue %dx.types.CBufRet.i32 %3156, 2
-  %3160 = extractvalue %dx.types.CBufRet.i32 %3156, 3
-  %3161 = mul i32 %3157, %3152
-  %3162 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3153, i32 %3158, i32 %3161)  ; IMad(a,b,c)
-  %3163 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3154, i32 %3159, i32 %3162)  ; IMad(a,b,c)
-  %3164 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3155, i32 %3160, i32 %3163)  ; IMad(a,b,c)
-  %3165 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3164, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3166 = extractvalue %dx.types.ResRet.i32 %3165, 0
-  %3167 = extractvalue %dx.types.ResRet.i32 %3165, 1
-  %3168 = zext i32 %3166 to i64
-  %3169 = zext i32 %3167 to i64
-  %3170 = shl i64 %3169, 32
-  %3171 = or i64 %3168, %3170
-  %3172 = uitofp i64 %3171 to float
-  br label %3265
-
-; <label>:3173                                    ; preds = %3137
-  %3174 = icmp eq i32 %1021, 2
-  br i1 %3174, label %3175, label %3265
-
-; <label>:3175                                    ; preds = %3173
-  %3176 = fsub fast float %22, %20
-  %3177 = fcmp fast olt float %1017, %20
-  br i1 %3177, label %3178, label %3191
-
-; <label>:3178                                    ; preds = %3175
-  %3179 = fsub fast float %20, %1017
-  %3180 = fdiv fast float %3179, %3176
-  %3181 = fptoui float %3180 to i32
-  %3182 = uitofp i32 %3181 to float
-  %3183 = fmul fast float %3182, %3176
-  %3184 = fsub fast float %3179, %3183
-  %3185 = and i32 %3181, 1
-  %3186 = icmp eq i32 %3185, 0
-  br i1 %3186, label %3187, label %3189
-
-; <label>:3187                                    ; preds = %3178
-  %3188 = fadd fast float %3184, %20
-  br label %3206
-
-; <label>:3189                                    ; preds = %3178
-  %3190 = fsub fast float %22, %3184
-  br label %3206
-
-; <label>:3191                                    ; preds = %3175
-  %3192 = fcmp fast ogt float %1017, %22
-  br i1 %3192, label %3193, label %3206
-
-; <label>:3193                                    ; preds = %3191
-  %3194 = fsub fast float %1017, %22
-  %3195 = fdiv fast float %3194, %3176
-  %3196 = fptoui float %3195 to i32
-  %3197 = uitofp i32 %3196 to float
-  %3198 = fmul fast float %3197, %3176
-  %3199 = fsub fast float %3194, %3198
-  %3200 = and i32 %3196, 1
-  %3201 = icmp eq i32 %3200, 0
-  br i1 %3201, label %3202, label %3204
-
-; <label>:3202                                    ; preds = %3193
-  %3203 = fsub fast float %22, %3199
-  br label %3206
-
-; <label>:3204                                    ; preds = %3193
-  %3205 = fadd fast float %3199, %20
-  br label %3206
-
-; <label>:3206                                    ; preds = %3204, %3202, %3191, %3189, %3187
-  %3207 = phi float [ %3188, %3187 ], [ %3190, %3189 ], [ %3203, %3202 ], [ %3205, %3204 ], [ %1017, %3191 ]
-  %3208 = fptoui float %3207 to i32
-  %3209 = fsub fast float %24, %20
-  %3210 = fcmp fast olt float %2946, %20
-  br i1 %3210, label %3211, label %3224
-
-; <label>:3211                                    ; preds = %3206
-  %3212 = fsub fast float %20, %2946
-  %3213 = fdiv fast float %3212, %3209
-  %3214 = fptoui float %3213 to i32
-  %3215 = uitofp i32 %3214 to float
-  %3216 = fmul fast float %3215, %3209
-  %3217 = fsub fast float %3212, %3216
-  %3218 = and i32 %3214, 1
-  %3219 = icmp eq i32 %3218, 0
-  br i1 %3219, label %3220, label %3222
-
-; <label>:3220                                    ; preds = %3211
-  %3221 = fadd fast float %3217, %20
-  br label %3239
-
-; <label>:3222                                    ; preds = %3211
-  %3223 = fsub fast float %24, %3217
-  br label %3239
-
-; <label>:3224                                    ; preds = %3206
-  %3225 = fcmp fast ogt float %2946, %24
-  br i1 %3225, label %3226, label %3239
-
-; <label>:3226                                    ; preds = %3224
-  %3227 = fsub fast float %2946, %24
-  %3228 = fdiv fast float %3227, %3209
-  %3229 = fptoui float %3228 to i32
-  %3230 = uitofp i32 %3229 to float
-  %3231 = fmul fast float %3230, %3209
-  %3232 = fsub fast float %3227, %3231
-  %3233 = and i32 %3229, 1
-  %3234 = icmp eq i32 %3233, 0
-  br i1 %3234, label %3235, label %3237
-
-; <label>:3235                                    ; preds = %3226
-  %3236 = fsub fast float %24, %3232
-  br label %3239
-
-; <label>:3237                                    ; preds = %3226
-  %3238 = fadd fast float %3232, %20
-  br label %3239
-
-; <label>:3239                                    ; preds = %3237, %3235, %3224, %3222, %3220
-  %3240 = phi float [ %3221, %3220 ], [ %3223, %3222 ], [ %3236, %3235 ], [ %3238, %3237 ], [ %2946, %3224 ]
-  %3241 = fptoui float %3240 to i32
-  %3242 = uitofp i32 %3241 to float
-  %3243 = uitofp i32 %3208 to float
-  %3244 = fptoui float %45 to i32
-  %3245 = fptoui float %182 to i32
-  %3246 = fptoui float %3242 to i32
-  %3247 = fptoui float %3243 to i32
-  %3248 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3249 = extractvalue %dx.types.CBufRet.i32 %3248, 0
-  %3250 = extractvalue %dx.types.CBufRet.i32 %3248, 1
-  %3251 = extractvalue %dx.types.CBufRet.i32 %3248, 2
-  %3252 = extractvalue %dx.types.CBufRet.i32 %3248, 3
-  %3253 = mul i32 %3249, %3244
-  %3254 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3245, i32 %3250, i32 %3253)  ; IMad(a,b,c)
-  %3255 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3246, i32 %3251, i32 %3254)  ; IMad(a,b,c)
-  %3256 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3247, i32 %3252, i32 %3255)  ; IMad(a,b,c)
-  %3257 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3256, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3258 = extractvalue %dx.types.ResRet.i32 %3257, 0
-  %3259 = extractvalue %dx.types.ResRet.i32 %3257, 1
-  %3260 = zext i32 %3258 to i64
-  %3261 = zext i32 %3259 to i64
-  %3262 = shl i64 %3261, 32
-  %3263 = or i64 %3260, %3262
-  %3264 = uitofp i64 %3263 to float
-  br label %3265
-
-; <label>:3265                                    ; preds = %3239, %3173, %3139, %3117, %3107
-  %3266 = phi float [ %3136, %3117 ], [ 0.000000e+00, %3107 ], [ %3172, %3139 ], [ %3264, %3239 ], [ 0.000000e+00, %3173 ]
-  br i1 %1022, label %3267, label %3297
-
-; <label>:3267                                    ; preds = %3265
-  %3268 = fcmp fast oge float %1343, 0.000000e+00
-  %3269 = fptoui float %1343 to i32
-  %3270 = icmp ult i32 %3269, %13
-  %3271 = and i1 %3268, %3270
-  %3272 = fcmp fast oge float %2946, 0.000000e+00
-  %3273 = and i1 %3272, %3271
-  %3274 = fptoui float %2946 to i32
-  %3275 = icmp ult i32 %3274, %15
-  %3276 = and i1 %3275, %3273
-  br i1 %3276, label %3277, label %3425
-
-; <label>:3277                                    ; preds = %3267
-  %3278 = fptoui float %45 to i32
-  %3279 = fptoui float %182 to i32
-  %3280 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3281 = extractvalue %dx.types.CBufRet.i32 %3280, 0
-  %3282 = extractvalue %dx.types.CBufRet.i32 %3280, 1
-  %3283 = extractvalue %dx.types.CBufRet.i32 %3280, 2
-  %3284 = extractvalue %dx.types.CBufRet.i32 %3280, 3
-  %3285 = mul i32 %3281, %3278
-  %3286 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3279, i32 %3282, i32 %3285)  ; IMad(a,b,c)
-  %3287 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3274, i32 %3283, i32 %3286)  ; IMad(a,b,c)
-  %3288 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3269, i32 %3284, i32 %3287)  ; IMad(a,b,c)
-  %3289 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3288, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3290 = extractvalue %dx.types.ResRet.i32 %3289, 0
-  %3291 = extractvalue %dx.types.ResRet.i32 %3289, 1
-  %3292 = zext i32 %3290 to i64
-  %3293 = zext i32 %3291 to i64
-  %3294 = shl i64 %3293, 32
-  %3295 = or i64 %3292, %3294
-  %3296 = uitofp i64 %3295 to float
-  br label %3425
-
-; <label>:3297                                    ; preds = %3265
-  %3298 = icmp eq i32 %1021, 1
-  br i1 %3298, label %3299, label %3333
-
-; <label>:3299                                    ; preds = %3297
-  %3300 = add i32 %13, -1
-  %3301 = uitofp i32 %3300 to float
-  %3302 = call float @dx.op.binary.f32(i32 35, float %1343, float 0.000000e+00)  ; FMax(a,b)
-  %3303 = call float @dx.op.binary.f32(i32 36, float %3302, float %3301)  ; FMin(a,b)
-  %3304 = fptoui float %3303 to i32
-  %3305 = add i32 %15, -1
-  %3306 = uitofp i32 %3305 to float
-  %3307 = call float @dx.op.binary.f32(i32 35, float %2946, float 0.000000e+00)  ; FMax(a,b)
-  %3308 = call float @dx.op.binary.f32(i32 36, float %3307, float %3306)  ; FMin(a,b)
-  %3309 = fptoui float %3308 to i32
-  %3310 = uitofp i32 %3309 to float
-  %3311 = uitofp i32 %3304 to float
-  %3312 = fptoui float %45 to i32
-  %3313 = fptoui float %182 to i32
-  %3314 = fptoui float %3310 to i32
-  %3315 = fptoui float %3311 to i32
-  %3316 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3317 = extractvalue %dx.types.CBufRet.i32 %3316, 0
-  %3318 = extractvalue %dx.types.CBufRet.i32 %3316, 1
-  %3319 = extractvalue %dx.types.CBufRet.i32 %3316, 2
-  %3320 = extractvalue %dx.types.CBufRet.i32 %3316, 3
-  %3321 = mul i32 %3317, %3312
-  %3322 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3313, i32 %3318, i32 %3321)  ; IMad(a,b,c)
-  %3323 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3314, i32 %3319, i32 %3322)  ; IMad(a,b,c)
-  %3324 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3315, i32 %3320, i32 %3323)  ; IMad(a,b,c)
-  %3325 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3324, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3326 = extractvalue %dx.types.ResRet.i32 %3325, 0
-  %3327 = extractvalue %dx.types.ResRet.i32 %3325, 1
-  %3328 = zext i32 %3326 to i64
-  %3329 = zext i32 %3327 to i64
-  %3330 = shl i64 %3329, 32
-  %3331 = or i64 %3328, %3330
-  %3332 = uitofp i64 %3331 to float
-  br label %3425
-
-; <label>:3333                                    ; preds = %3297
-  %3334 = icmp eq i32 %1021, 2
-  br i1 %3334, label %3335, label %3425
-
-; <label>:3335                                    ; preds = %3333
-  %3336 = fsub fast float %22, %20
-  %3337 = fcmp fast olt float %1343, %20
-  br i1 %3337, label %3338, label %3351
-
-; <label>:3338                                    ; preds = %3335
-  %3339 = fsub fast float %20, %1343
-  %3340 = fdiv fast float %3339, %3336
-  %3341 = fptoui float %3340 to i32
-  %3342 = uitofp i32 %3341 to float
-  %3343 = fmul fast float %3342, %3336
-  %3344 = fsub fast float %3339, %3343
-  %3345 = and i32 %3341, 1
-  %3346 = icmp eq i32 %3345, 0
-  br i1 %3346, label %3347, label %3349
-
-; <label>:3347                                    ; preds = %3338
-  %3348 = fadd fast float %3344, %20
-  br label %3366
-
-; <label>:3349                                    ; preds = %3338
-  %3350 = fsub fast float %22, %3344
-  br label %3366
-
-; <label>:3351                                    ; preds = %3335
-  %3352 = fcmp fast ogt float %1343, %22
-  br i1 %3352, label %3353, label %3366
-
-; <label>:3353                                    ; preds = %3351
-  %3354 = fsub fast float %1343, %22
-  %3355 = fdiv fast float %3354, %3336
-  %3356 = fptoui float %3355 to i32
-  %3357 = uitofp i32 %3356 to float
-  %3358 = fmul fast float %3357, %3336
-  %3359 = fsub fast float %3354, %3358
-  %3360 = and i32 %3356, 1
-  %3361 = icmp eq i32 %3360, 0
-  br i1 %3361, label %3362, label %3364
-
-; <label>:3362                                    ; preds = %3353
-  %3363 = fsub fast float %22, %3359
-  br label %3366
-
-; <label>:3364                                    ; preds = %3353
-  %3365 = fadd fast float %3359, %20
-  br label %3366
-
-; <label>:3366                                    ; preds = %3364, %3362, %3351, %3349, %3347
-  %3367 = phi float [ %3348, %3347 ], [ %3350, %3349 ], [ %3363, %3362 ], [ %3365, %3364 ], [ %1343, %3351 ]
-  %3368 = fptoui float %3367 to i32
-  %3369 = fsub fast float %24, %20
-  %3370 = fcmp fast olt float %2946, %20
-  br i1 %3370, label %3371, label %3384
-
-; <label>:3371                                    ; preds = %3366
-  %3372 = fsub fast float %20, %2946
-  %3373 = fdiv fast float %3372, %3369
-  %3374 = fptoui float %3373 to i32
-  %3375 = uitofp i32 %3374 to float
-  %3376 = fmul fast float %3375, %3369
-  %3377 = fsub fast float %3372, %3376
-  %3378 = and i32 %3374, 1
-  %3379 = icmp eq i32 %3378, 0
-  br i1 %3379, label %3380, label %3382
-
-; <label>:3380                                    ; preds = %3371
-  %3381 = fadd fast float %3377, %20
-  br label %3399
-
-; <label>:3382                                    ; preds = %3371
-  %3383 = fsub fast float %24, %3377
-  br label %3399
-
-; <label>:3384                                    ; preds = %3366
-  %3385 = fcmp fast ogt float %2946, %24
-  br i1 %3385, label %3386, label %3399
-
-; <label>:3386                                    ; preds = %3384
-  %3387 = fsub fast float %2946, %24
-  %3388 = fdiv fast float %3387, %3369
-  %3389 = fptoui float %3388 to i32
-  %3390 = uitofp i32 %3389 to float
-  %3391 = fmul fast float %3390, %3369
-  %3392 = fsub fast float %3387, %3391
-  %3393 = and i32 %3389, 1
-  %3394 = icmp eq i32 %3393, 0
-  br i1 %3394, label %3395, label %3397
-
-; <label>:3395                                    ; preds = %3386
-  %3396 = fsub fast float %24, %3392
-  br label %3399
-
-; <label>:3397                                    ; preds = %3386
-  %3398 = fadd fast float %3392, %20
-  br label %3399
-
-; <label>:3399                                    ; preds = %3397, %3395, %3384, %3382, %3380
-  %3400 = phi float [ %3381, %3380 ], [ %3383, %3382 ], [ %3396, %3395 ], [ %3398, %3397 ], [ %2946, %3384 ]
-  %3401 = fptoui float %3400 to i32
-  %3402 = uitofp i32 %3401 to float
-  %3403 = uitofp i32 %3368 to float
-  %3404 = fptoui float %45 to i32
-  %3405 = fptoui float %182 to i32
-  %3406 = fptoui float %3402 to i32
-  %3407 = fptoui float %3403 to i32
-  %3408 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3409 = extractvalue %dx.types.CBufRet.i32 %3408, 0
-  %3410 = extractvalue %dx.types.CBufRet.i32 %3408, 1
-  %3411 = extractvalue %dx.types.CBufRet.i32 %3408, 2
-  %3412 = extractvalue %dx.types.CBufRet.i32 %3408, 3
-  %3413 = mul i32 %3409, %3404
-  %3414 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3405, i32 %3410, i32 %3413)  ; IMad(a,b,c)
-  %3415 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3406, i32 %3411, i32 %3414)  ; IMad(a,b,c)
-  %3416 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3407, i32 %3412, i32 %3415)  ; IMad(a,b,c)
-  %3417 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3416, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3418 = extractvalue %dx.types.ResRet.i32 %3417, 0
-  %3419 = extractvalue %dx.types.ResRet.i32 %3417, 1
-  %3420 = zext i32 %3418 to i64
-  %3421 = zext i32 %3419 to i64
-  %3422 = shl i64 %3421, 32
-  %3423 = or i64 %3420, %3422
-  %3424 = uitofp i64 %3423 to float
-  br label %3425
-
-; <label>:3425                                    ; preds = %3399, %3333, %3299, %3277, %3267
-  %3426 = phi float [ %3296, %3277 ], [ 0.000000e+00, %3267 ], [ %3332, %3299 ], [ %3424, %3399 ], [ 0.000000e+00, %3333 ]
-  br i1 %1022, label %3427, label %3457
-
-; <label>:3427                                    ; preds = %3425
-  %3428 = fcmp fast oge float %1504, 0.000000e+00
-  %3429 = fptoui float %1504 to i32
-  %3430 = icmp ult i32 %3429, %13
-  %3431 = and i1 %3428, %3430
-  %3432 = fcmp fast oge float %2946, 0.000000e+00
-  %3433 = and i1 %3432, %3431
-  %3434 = fptoui float %2946 to i32
-  %3435 = icmp ult i32 %3434, %15
-  %3436 = and i1 %3435, %3433
-  br i1 %3436, label %3437, label %3585
-
-; <label>:3437                                    ; preds = %3427
-  %3438 = fptoui float %45 to i32
-  %3439 = fptoui float %182 to i32
-  %3440 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3441 = extractvalue %dx.types.CBufRet.i32 %3440, 0
-  %3442 = extractvalue %dx.types.CBufRet.i32 %3440, 1
-  %3443 = extractvalue %dx.types.CBufRet.i32 %3440, 2
-  %3444 = extractvalue %dx.types.CBufRet.i32 %3440, 3
-  %3445 = mul i32 %3441, %3438
-  %3446 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3439, i32 %3442, i32 %3445)  ; IMad(a,b,c)
-  %3447 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3434, i32 %3443, i32 %3446)  ; IMad(a,b,c)
-  %3448 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3429, i32 %3444, i32 %3447)  ; IMad(a,b,c)
-  %3449 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3448, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3450 = extractvalue %dx.types.ResRet.i32 %3449, 0
-  %3451 = extractvalue %dx.types.ResRet.i32 %3449, 1
-  %3452 = zext i32 %3450 to i64
-  %3453 = zext i32 %3451 to i64
-  %3454 = shl i64 %3453, 32
-  %3455 = or i64 %3452, %3454
-  %3456 = uitofp i64 %3455 to float
-  br label %3585
-
-; <label>:3457                                    ; preds = %3425
-  %3458 = icmp eq i32 %1021, 1
-  br i1 %3458, label %3459, label %3493
-
-; <label>:3459                                    ; preds = %3457
-  %3460 = add i32 %13, -1
-  %3461 = uitofp i32 %3460 to float
-  %3462 = call float @dx.op.binary.f32(i32 35, float %1504, float 0.000000e+00)  ; FMax(a,b)
-  %3463 = call float @dx.op.binary.f32(i32 36, float %3462, float %3461)  ; FMin(a,b)
-  %3464 = fptoui float %3463 to i32
-  %3465 = add i32 %15, -1
-  %3466 = uitofp i32 %3465 to float
-  %3467 = call float @dx.op.binary.f32(i32 35, float %2946, float 0.000000e+00)  ; FMax(a,b)
-  %3468 = call float @dx.op.binary.f32(i32 36, float %3467, float %3466)  ; FMin(a,b)
-  %3469 = fptoui float %3468 to i32
-  %3470 = uitofp i32 %3469 to float
-  %3471 = uitofp i32 %3464 to float
-  %3472 = fptoui float %45 to i32
-  %3473 = fptoui float %182 to i32
-  %3474 = fptoui float %3470 to i32
-  %3475 = fptoui float %3471 to i32
-  %3476 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3477 = extractvalue %dx.types.CBufRet.i32 %3476, 0
-  %3478 = extractvalue %dx.types.CBufRet.i32 %3476, 1
-  %3479 = extractvalue %dx.types.CBufRet.i32 %3476, 2
-  %3480 = extractvalue %dx.types.CBufRet.i32 %3476, 3
-  %3481 = mul i32 %3477, %3472
-  %3482 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3473, i32 %3478, i32 %3481)  ; IMad(a,b,c)
-  %3483 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3474, i32 %3479, i32 %3482)  ; IMad(a,b,c)
-  %3484 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3475, i32 %3480, i32 %3483)  ; IMad(a,b,c)
-  %3485 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3484, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3486 = extractvalue %dx.types.ResRet.i32 %3485, 0
-  %3487 = extractvalue %dx.types.ResRet.i32 %3485, 1
-  %3488 = zext i32 %3486 to i64
-  %3489 = zext i32 %3487 to i64
-  %3490 = shl i64 %3489, 32
-  %3491 = or i64 %3488, %3490
-  %3492 = uitofp i64 %3491 to float
-  br label %3585
-
-; <label>:3493                                    ; preds = %3457
-  %3494 = icmp eq i32 %1021, 2
-  br i1 %3494, label %3495, label %3585
-
-; <label>:3495                                    ; preds = %3493
-  %3496 = fsub fast float %22, %20
-  %3497 = fcmp fast olt float %1504, %20
-  br i1 %3497, label %3498, label %3511
-
-; <label>:3498                                    ; preds = %3495
-  %3499 = fsub fast float %20, %1504
-  %3500 = fdiv fast float %3499, %3496
-  %3501 = fptoui float %3500 to i32
-  %3502 = uitofp i32 %3501 to float
-  %3503 = fmul fast float %3502, %3496
-  %3504 = fsub fast float %3499, %3503
-  %3505 = and i32 %3501, 1
-  %3506 = icmp eq i32 %3505, 0
-  br i1 %3506, label %3507, label %3509
-
-; <label>:3507                                    ; preds = %3498
-  %3508 = fadd fast float %3504, %20
-  br label %3526
-
-; <label>:3509                                    ; preds = %3498
-  %3510 = fsub fast float %22, %3504
-  br label %3526
-
-; <label>:3511                                    ; preds = %3495
-  %3512 = fcmp fast ogt float %1504, %22
-  br i1 %3512, label %3513, label %3526
-
-; <label>:3513                                    ; preds = %3511
-  %3514 = fsub fast float %1504, %22
-  %3515 = fdiv fast float %3514, %3496
-  %3516 = fptoui float %3515 to i32
-  %3517 = uitofp i32 %3516 to float
-  %3518 = fmul fast float %3517, %3496
-  %3519 = fsub fast float %3514, %3518
-  %3520 = and i32 %3516, 1
-  %3521 = icmp eq i32 %3520, 0
-  br i1 %3521, label %3522, label %3524
-
-; <label>:3522                                    ; preds = %3513
-  %3523 = fsub fast float %22, %3519
-  br label %3526
-
-; <label>:3524                                    ; preds = %3513
-  %3525 = fadd fast float %3519, %20
-  br label %3526
-
-; <label>:3526                                    ; preds = %3524, %3522, %3511, %3509, %3507
-  %3527 = phi float [ %3508, %3507 ], [ %3510, %3509 ], [ %3523, %3522 ], [ %3525, %3524 ], [ %1504, %3511 ]
-  %3528 = fptoui float %3527 to i32
-  %3529 = fsub fast float %24, %20
-  %3530 = fcmp fast olt float %2946, %20
-  br i1 %3530, label %3531, label %3544
-
-; <label>:3531                                    ; preds = %3526
-  %3532 = fsub fast float %20, %2946
-  %3533 = fdiv fast float %3532, %3529
-  %3534 = fptoui float %3533 to i32
-  %3535 = uitofp i32 %3534 to float
-  %3536 = fmul fast float %3535, %3529
-  %3537 = fsub fast float %3532, %3536
-  %3538 = and i32 %3534, 1
-  %3539 = icmp eq i32 %3538, 0
-  br i1 %3539, label %3540, label %3542
-
-; <label>:3540                                    ; preds = %3531
-  %3541 = fadd fast float %3537, %20
-  br label %3559
-
-; <label>:3542                                    ; preds = %3531
-  %3543 = fsub fast float %24, %3537
-  br label %3559
-
-; <label>:3544                                    ; preds = %3526
-  %3545 = fcmp fast ogt float %2946, %24
-  br i1 %3545, label %3546, label %3559
-
-; <label>:3546                                    ; preds = %3544
-  %3547 = fsub fast float %2946, %24
-  %3548 = fdiv fast float %3547, %3529
-  %3549 = fptoui float %3548 to i32
-  %3550 = uitofp i32 %3549 to float
-  %3551 = fmul fast float %3550, %3529
-  %3552 = fsub fast float %3547, %3551
-  %3553 = and i32 %3549, 1
-  %3554 = icmp eq i32 %3553, 0
-  br i1 %3554, label %3555, label %3557
-
-; <label>:3555                                    ; preds = %3546
-  %3556 = fsub fast float %24, %3552
-  br label %3559
-
-; <label>:3557                                    ; preds = %3546
-  %3558 = fadd fast float %3552, %20
-  br label %3559
-
-; <label>:3559                                    ; preds = %3557, %3555, %3544, %3542, %3540
-  %3560 = phi float [ %3541, %3540 ], [ %3543, %3542 ], [ %3556, %3555 ], [ %3558, %3557 ], [ %2946, %3544 ]
-  %3561 = fptoui float %3560 to i32
-  %3562 = uitofp i32 %3561 to float
-  %3563 = uitofp i32 %3528 to float
-  %3564 = fptoui float %45 to i32
-  %3565 = fptoui float %182 to i32
-  %3566 = fptoui float %3562 to i32
-  %3567 = fptoui float %3563 to i32
-  %3568 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3569 = extractvalue %dx.types.CBufRet.i32 %3568, 0
-  %3570 = extractvalue %dx.types.CBufRet.i32 %3568, 1
-  %3571 = extractvalue %dx.types.CBufRet.i32 %3568, 2
-  %3572 = extractvalue %dx.types.CBufRet.i32 %3568, 3
-  %3573 = mul i32 %3569, %3564
-  %3574 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3565, i32 %3570, i32 %3573)  ; IMad(a,b,c)
-  %3575 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3566, i32 %3571, i32 %3574)  ; IMad(a,b,c)
-  %3576 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3567, i32 %3572, i32 %3575)  ; IMad(a,b,c)
-  %3577 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3576, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3578 = extractvalue %dx.types.ResRet.i32 %3577, 0
-  %3579 = extractvalue %dx.types.ResRet.i32 %3577, 1
-  %3580 = zext i32 %3578 to i64
-  %3581 = zext i32 %3579 to i64
-  %3582 = shl i64 %3581, 32
-  %3583 = or i64 %3580, %3582
-  %3584 = uitofp i64 %3583 to float
-  br label %3585
-
-; <label>:3585                                    ; preds = %3559, %3493, %3459, %3437, %3427
-  %3586 = phi float [ %3456, %3437 ], [ 0.000000e+00, %3427 ], [ %3492, %3459 ], [ %3584, %3559 ], [ 0.000000e+00, %3493 ]
-  %3587 = call float @dx.op.unary.f32(i32 22, float %180)  ; Frc(value)
-  %3588 = call float @dx.op.unary.f32(i32 22, float %181)  ; Frc(value)
-  %3589 = fmul fast float %3588, %3588
-  %3590 = fmul fast float %3589, %3588
-  %3591 = fmul fast float %1182, -7.500000e-01
-  %3592 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2465, float %3591)  ; FMad(a,b,c)
-  %3593 = fmul fast float %1182, 1.500000e+00
-  %3594 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1824, float %3593)  ; FMad(a,b,c)
-  %3595 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2465, float %3594)  ; FMad(a,b,c)
-  %3596 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3106, float %3595)  ; FMad(a,b,c)
-  %3597 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1824, float %3591)  ; FMad(a,b,c)
-  %3598 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2465, float %3597)  ; FMad(a,b,c)
-  %3599 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3106, float %3598)  ; FMad(a,b,c)
-  %3600 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3588, float %3589, float %3590, float %1824, float %3592, float %3596, float %3599)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3601 = fmul fast float %1342, -7.500000e-01
-  %3602 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2625, float %3601)  ; FMad(a,b,c)
-  %3603 = fmul fast float %1342, 1.500000e+00
-  %3604 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1984, float %3603)  ; FMad(a,b,c)
-  %3605 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2625, float %3604)  ; FMad(a,b,c)
-  %3606 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3266, float %3605)  ; FMad(a,b,c)
-  %3607 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1984, float %3601)  ; FMad(a,b,c)
-  %3608 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2625, float %3607)  ; FMad(a,b,c)
-  %3609 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3266, float %3608)  ; FMad(a,b,c)
-  %3610 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3588, float %3589, float %3590, float %1984, float %3602, float %3606, float %3609)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3611 = fmul fast float %1503, -7.500000e-01
-  %3612 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2785, float %3611)  ; FMad(a,b,c)
-  %3613 = fmul fast float %1503, 1.500000e+00
-  %3614 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %2144, float %3613)  ; FMad(a,b,c)
-  %3615 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2785, float %3614)  ; FMad(a,b,c)
-  %3616 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3426, float %3615)  ; FMad(a,b,c)
-  %3617 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %2144, float %3611)  ; FMad(a,b,c)
-  %3618 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2785, float %3617)  ; FMad(a,b,c)
-  %3619 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3426, float %3618)  ; FMad(a,b,c)
-  %3620 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3588, float %3589, float %3590, float %2144, float %3612, float %3616, float %3619)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3621 = fmul fast float %1664, -7.500000e-01
-  %3622 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2945, float %3621)  ; FMad(a,b,c)
-  %3623 = fmul fast float %1664, 1.500000e+00
-  %3624 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %2304, float %3623)  ; FMad(a,b,c)
-  %3625 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2945, float %3624)  ; FMad(a,b,c)
-  %3626 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3586, float %3625)  ; FMad(a,b,c)
-  %3627 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %2304, float %3621)  ; FMad(a,b,c)
-  %3628 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2945, float %3627)  ; FMad(a,b,c)
-  %3629 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3586, float %3628)  ; FMad(a,b,c)
-  %3630 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3588, float %3589, float %3590, float %2304, float %3622, float %3626, float %3629)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3631 = fmul fast float %3587, %3587
-  %3632 = fmul fast float %3631, %3587
-  %3633 = fmul fast float %3600, -7.500000e-01
-  %3634 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3620, float %3633)  ; FMad(a,b,c)
-  %3635 = fmul fast float %3600, 1.500000e+00
-  %3636 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %3610, float %3635)  ; FMad(a,b,c)
-  %3637 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %3620, float %3636)  ; FMad(a,b,c)
-  %3638 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3630, float %3637)  ; FMad(a,b,c)
-  %3639 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %3610, float %3633)  ; FMad(a,b,c)
-  %3640 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %3620, float %3639)  ; FMad(a,b,c)
-  %3641 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3630, float %3640)  ; FMad(a,b,c)
-  %3642 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3587, float %3631, float %3632, float %3610, float %3634, float %3638, float %3641)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3643 = fptoui float %3642 to i64
-  %3644 = trunc i64 %3643 to i32
-  %3645 = lshr i64 %3643, 32
-  %3646 = trunc i64 %3645 to i32
-  call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i32 %3644, i32 %3646, i32 undef, i32 undef, i8 3, i32 8)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3647
-
-; <label>:3647                                    ; preds = %3585, %1014, %997, %344, %0
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.threadId.i32(i32, i32) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.unary.f32(i32, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.tertiary.f32(i32, float, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.binary.f32(i32, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.dot4.f32(i32, float, float, float, float, float, float, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.tertiary.i32(i32, i32, i32, i32) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32, %dx.types.Handle, i32) #1
-
-; Function Attrs: nounwind readonly
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32, %dx.types.Handle, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.rawBufferStore.i32(i32, %dx.types.Handle, i32, i32, i32, i32, i32, i32, i8, i32) #2
-
-; Function Attrs: nounwind readnone
-declare double @dx.op.makeDouble.f64(i32, i32, i32) #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.valver = !{!2}
-!dx.shaderModel = !{!3}
-!dx.resources = !{!4}
-!dx.entryPoints = !{!12}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 1, i32 2}
-!2 = !{i32 1, i32 6}
-!3 = !{!"cs", i32 6, i32 2}
-!4 = !{null, !5, !10, null}
-!5 = !{!6, !8, !9}
-!6 = !{i32 0, %"class.RWStructuredBuffer<unsigned long long>"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !7}
-!7 = !{i32 1, i32 8}
-!8 = !{i32 1, %"class.RWStructuredBuffer<double>"* undef, !"", i32 0, i32 1, i32 1, i32 12, i1 false, i1 false, i1 false, !7}
-!9 = !{i32 2, %"class.RWStructuredBuffer<unsigned long long>"* undef, !"", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !7}
-!10 = !{!11}
-!11 = !{i32 0, %Constants* undef, !"", i32 0, i32 0, i32 1, i32 116, null}
-!12 = !{void ()* @GridSample, !"GridSample", null, !4, !13}
-!13 = !{i32 0, i64 9437204, i32 4, !14}
-!14 = !{i32 64, i32 1, i32 1}
-
-#endif
-
-const unsigned char g_GridSample[] = {
-  0x44, 0x58, 0x42, 0x43, 0xc8, 0xb6, 0xef, 0xd2, 0xe1, 0x8c, 0x8f, 0x20,
-  0xc9, 0xf6, 0xe3, 0xcc, 0x6d, 0xd7, 0xf9, 0x1e, 0x01, 0x00, 0x00, 0x00,
-  0x70, 0x58, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
-  0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
-  0x18, 0x01, 0x00, 0x00, 0x34, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30,
-  0x08, 0x00, 0x00, 0x00, 0x01, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30,
-  0xa8, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-  0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x1b, 0x7f, 0xa6, 0xac, 0xf0, 0x25, 0x81, 0xc9,
-  0xf6, 0xda, 0x70, 0x58, 0x7d, 0x3d, 0x5c, 0x58, 0x44, 0x58, 0x49, 0x4c,
-  0x34, 0x57, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0xcd, 0x15, 0x00, 0x00,
-  0x44, 0x58, 0x49, 0x4c, 0x02, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-  0x1c, 0x57, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00,
-  0xc4, 0x15, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
-  0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49,
-  0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19,
-  0x1e, 0x04, 0x8b, 0x62, 0x80, 0x18, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42,
-  0xc4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x62, 0x88,
-  0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42,
-  0xe4, 0x48, 0x0e, 0x90, 0x11, 0x23, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c,
-  0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x31, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00,
-  0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07,
-  0x40, 0x02, 0xa8, 0x0d, 0x86, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20,
-  0x01, 0xd5, 0x06, 0x62, 0xf8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x90, 0x00,
-  0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42,
-  0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00,
-  0x55, 0x00, 0x00, 0x00, 0x32, 0x22, 0x88, 0x09, 0x20, 0x64, 0x85, 0x04,
-  0x13, 0x23, 0xa4, 0x84, 0x04, 0x13, 0x23, 0xe3, 0x84, 0xa1, 0x90, 0x14,
-  0x12, 0x4c, 0x8c, 0x8c, 0x0b, 0x84, 0xc4, 0x4c, 0x10, 0xb0, 0xc1, 0x08,
-  0x40, 0x09, 0x00, 0x0a, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0xc3, 0x30, 0x10,
-  0x31, 0x03, 0x50, 0x06, 0x63, 0x30, 0xe8, 0x28, 0x85, 0x31, 0x18, 0x86,
-  0x41, 0x49, 0x21, 0x8c, 0xc1, 0x30, 0x68, 0x29, 0x8a, 0x31, 0x18, 0x86,
-  0x61, 0x18, 0x86, 0x61, 0x50, 0x53, 0x8a, 0x61, 0x18, 0x86, 0x81, 0x9e,
-  0xa3, 0x86, 0xcb, 0x9f, 0xb0, 0x87, 0x90, 0x7c, 0x6e, 0xa3, 0x8a, 0x95,
-  0x98, 0x7c, 0xe4, 0xb6, 0x11, 0x31, 0x0c, 0xc3, 0x30, 0x47, 0x80, 0x90,
-  0x74, 0xcf, 0x70, 0xf9, 0x13, 0xf6, 0x10, 0x92, 0x1f, 0x02, 0xcd, 0xb0,
-  0x10, 0x28, 0x98, 0x0a, 0x11, 0x0d, 0xd4, 0x40, 0xd5, 0x1c, 0x41, 0x50,
-  0x0c, 0x6a, 0x90, 0x86, 0xe1, 0x22, 0xec, 0xa6, 0xe1, 0xf2, 0x27, 0xec,
-  0x21, 0x24, 0x7f, 0x25, 0xa4, 0x95, 0x98, 0x7c, 0xe4, 0xb6, 0x51, 0x31,
-  0x0c, 0xc3, 0x30, 0x94, 0x43, 0x1b, 0xa8, 0x61, 0x90, 0x06, 0xda, 0xca,
-  0x02, 0x0c, 0xd4, 0x30, 0x0c, 0xc3, 0x30, 0x48, 0x03, 0x75, 0x43, 0x00,
-  0x85, 0xf8, 0x86, 0x61, 0x20, 0x70, 0x20, 0x60, 0x8e, 0x00, 0x14, 0x66,
-  0x62, 0x83, 0x71, 0x60, 0x87, 0x70, 0x98, 0x87, 0x79, 0x70, 0x03, 0x59,
-  0xb8, 0x85, 0x59, 0xa0, 0x07, 0x79, 0xa8, 0x87, 0x71, 0xa0, 0x87, 0x7a,
-  0x90, 0x87, 0x72, 0x20, 0x07, 0x51, 0xa8, 0x07, 0x73, 0x30, 0x87, 0x72,
-  0x90, 0x07, 0x3e, 0xa8, 0x07, 0x77, 0x98, 0x87, 0x74, 0x38, 0x07, 0x77,
-  0x28, 0x07, 0x72, 0x00, 0x03, 0x76, 0x78, 0x07, 0x77, 0x38, 0x07, 0x30,
-  0x60, 0x87, 0x77, 0x70, 0x87, 0x73, 0xf0, 0x03, 0x14, 0x8c, 0x44, 0xce,
-  0x04, 0x06, 0xe3, 0xc0, 0x0e, 0xe1, 0x30, 0x0f, 0xf3, 0xe0, 0x06, 0xb2,
-  0x70, 0x0b, 0xb3, 0x40, 0x0f, 0xf2, 0x50, 0x0f, 0xe3, 0x40, 0x0f, 0xf5,
-  0x20, 0x0f, 0xe5, 0x40, 0x0e, 0xa2, 0x50, 0x0f, 0xe6, 0x60, 0x0e, 0xe5,
-  0x20, 0x0f, 0x7c, 0x40, 0x0e, 0xef, 0x50, 0x0f, 0xe2, 0xc0, 0x0e, 0xe5,
-  0xe0, 0x07, 0x28, 0xf8, 0xc8, 0x1c, 0x46, 0x20, 0x86, 0x4b, 0x38, 0xa7,
-  0x91, 0x26, 0xa0, 0x99, 0x24, 0xb4, 0x0c, 0xc3, 0x30, 0xa0, 0x28, 0x8a,
-  0xa2, 0xe8, 0x40, 0xe9, 0x14, 0x00, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0,
-  0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0,
-  0x87, 0x0d, 0xae, 0x50, 0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d,
-  0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d,
-  0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78,
-  0xa0, 0x07, 0x78, 0xd0, 0x06, 0xe9, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x71,
-  0x60, 0x07, 0x6d, 0x90, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72,
-  0xd0, 0x06, 0xe9, 0x60, 0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d,
-  0x60, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xd0, 0x06, 0xe6,
-  0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x76,
-  0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xee, 0x80, 0x07, 0x7a,
-  0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x7a, 0x60, 0x07, 0x74,
-  0x30, 0xe4, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x60, 0xc8, 0x43, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0xc0, 0x90, 0xe7, 0x00, 0x02, 0x20, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x80, 0x21, 0x4f, 0x02, 0x04, 0x40, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x43, 0x9e, 0x05, 0x08, 0x80, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x3c, 0x0d, 0x10, 0x00, 0x01,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x79, 0x1e, 0x20, 0x00,
-  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xf2, 0x54, 0x40,
-  0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xe4, 0xc1,
-  0x80, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8,
-  0xb3, 0x01, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0,
-  0x90, 0xa7, 0x03, 0x02, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x80, 0x21, 0x0f, 0x18, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x40, 0x16, 0x08, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
-  0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47,
-  0xc6, 0x04, 0x43, 0x1a, 0x4a, 0xa0, 0x08, 0x8a, 0x61, 0x04, 0xa0, 0x30,
-  0x0a, 0xa2, 0xd0, 0x03, 0x0a, 0xa1, 0x00, 0x03, 0xe8, 0x1a, 0x01, 0xa0,
-  0xb1, 0x50, 0x01, 0x01, 0x13, 0xa8, 0x9c, 0x01, 0xa0, 0x73, 0x06, 0x80,
-  0xd4, 0x19, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00,
-  0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b,
-  0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99,
-  0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62,
-  0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73,
-  0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84, 0x81, 0x99, 0x20, 0x0c,
-  0xcd, 0x06, 0x61, 0x20, 0x26, 0x08, 0x83, 0xb3, 0x41, 0x18, 0x0c, 0x0a,
-  0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08, 0xc3, 0x33, 0x41, 0x28,
-  0x83, 0x8b, 0xc0, 0x04, 0x61, 0x80, 0x26, 0x08, 0x57, 0x35, 0x41, 0x18,
-  0xa2, 0x0d, 0xc2, 0xf0, 0x6c, 0x58, 0x94, 0x85, 0x51, 0x94, 0xa1, 0x71,
-  0x1c, 0x07, 0x9a, 0x20, 0x9c, 0x01, 0xb6, 0x61, 0x19, 0x24, 0x46, 0x19,
-  0x86, 0xc6, 0x71, 0x1c, 0x68, 0xc3, 0x42, 0x2c, 0x8c, 0x42, 0x0c, 0x8d,
-  0xe3, 0x38, 0xd0, 0x86, 0x21, 0x9a, 0xa8, 0x09, 0x82, 0x1a, 0x64, 0x13,
-  0x84, 0x41, 0xda, 0x80, 0x28, 0x16, 0xa3, 0x28, 0xc3, 0x05, 0x6c, 0x08,
-  0xb0, 0x0d, 0x04, 0x50, 0x65, 0xc0, 0x04, 0x41, 0x00, 0xa8, 0x1c, 0xc9,
-  0xa5, 0x91, 0x4d, 0x85, 0xb5, 0xc1, 0xb1, 0x95, 0x4d, 0x10, 0xc6, 0xc0,
-  0x9a, 0x20, 0x0c, 0xd3, 0x04, 0x61, 0xa0, 0x36, 0x0c, 0xdf, 0x30, 0x6c,
-  0x20, 0x94, 0xce, 0x03, 0x83, 0x0d, 0xc5, 0xc6, 0x01, 0x5a, 0x18, 0x54,
-  0x61, 0x63, 0xb3, 0x6b, 0x73, 0x49, 0x23, 0x2b, 0x73, 0xa3, 0x9b, 0x12,
-  0x04, 0x55, 0xc8, 0xf0, 0x5c, 0xec, 0xca, 0xe4, 0xe6, 0xd2, 0xde, 0xdc,
-  0xa6, 0x04, 0x44, 0x13, 0x32, 0x3c, 0x17, 0xbb, 0x30, 0x36, 0xbb, 0x32,
-  0xb9, 0x29, 0x81, 0x51, 0x87, 0x0c, 0xcf, 0x65, 0x0e, 0x2d, 0x8c, 0xac,
-  0x4c, 0xae, 0xe9, 0x8d, 0xac, 0x8c, 0x6d, 0x4a, 0x80, 0x94, 0x21, 0xc3,
-  0x73, 0x91, 0x2b, 0x9b, 0x7b, 0xab, 0x93, 0x1b, 0x2b, 0x9b, 0x9b, 0x12,
-  0x64, 0x75, 0xc8, 0xf0, 0x5c, 0xca, 0xdc, 0xe8, 0xe4, 0xf2, 0xa0, 0xde,
-  0xd2, 0xdc, 0xe8, 0xe6, 0xa6, 0x04, 0x61, 0x00, 0x79, 0x18, 0x00, 0x00,
-  0x51, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66,
-  0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07,
-  0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10,
-  0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce,
-  0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b,
-  0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c,
-  0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07,
-  0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11,
-  0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0,
-  0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8,
-  0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b,
-  0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b,
-  0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87,
-  0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07,
-  0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87,
-  0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81,
-  0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30,
-  0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4,
-  0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca,
-  0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39,
-  0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b,
-  0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b,
-  0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87,
-  0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20,
-  0x0e, 0xe7, 0xe0, 0x06, 0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90,
-  0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x30, 0x83, 0x81, 0xc8, 0x01, 0x1f, 0xdc,
-  0x40, 0x1c, 0xe4, 0xa1, 0x1c, 0xc2, 0x61, 0x1d, 0xdc, 0x40, 0x1c, 0xe4,
-  0x01, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00,
-  0x06, 0xa0, 0x80, 0x11, 0x32, 0xb0, 0x00, 0xf3, 0x2c, 0x84, 0x11, 0x40,
-  0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x20, 0x0d, 0x10, 0x61, 0x7e, 0x71, 0xdb,
-  0x76, 0xb0, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10, 0x50, 0x45, 0x41, 0x44,
-  0xa5, 0x03, 0x0c, 0x25, 0x61, 0x00, 0x02, 0xe6, 0x23, 0xb7, 0x6d, 0x08,
-  0xd2, 0x70, 0xf9, 0xce, 0xe3, 0x0b, 0x11, 0x01, 0x4c, 0x44, 0x08, 0x34,
-  0xc3, 0x42, 0x58, 0x81, 0x33, 0x5c, 0xbe, 0xf3, 0xf8, 0x83, 0x33, 0xe1,
-  0x7e, 0x71, 0xdb, 0xb6, 0x40, 0x0d, 0x97, 0xef, 0x3c, 0x3e, 0x03, 0x28,
-  0x44, 0xe7, 0x50, 0xc1, 0x42, 0xf8, 0x85, 0x8e, 0x5b, 0xc2, 0x35, 0x5c,
-  0xbe, 0xf3, 0xf8, 0x11, 0x60, 0x6d, 0x54, 0x51, 0x10, 0x51, 0xe9, 0x00,
-  0x83, 0x8f, 0xdc, 0xb6, 0x29, 0x60, 0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x01,
-  0xd6, 0x46, 0x15, 0x05, 0x11, 0xb1, 0x93, 0x13, 0x11, 0x3e, 0x72, 0xdb,
-  0x36, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xfe, 0x44, 0x44, 0x13, 0x02, 0x44,
-  0x98, 0x5f, 0xdc, 0xb6, 0x19, 0x48, 0xc3, 0xe5, 0x3b, 0x8f, 0x3f, 0x11,
-  0xd1, 0x84, 0x00, 0x11, 0xe6, 0x23, 0xb7, 0x6d, 0x01, 0xd2, 0x70, 0xf9,
-  0xce, 0xe3, 0x4f, 0x47, 0x44, 0x00, 0x83, 0x38, 0xf8, 0xc8, 0x6d, 0x9b,
-  0xc0, 0x33, 0x5c, 0xbe, 0xf3, 0xf8, 0x54, 0x03, 0x44, 0x98, 0x5f, 0xdc,
-  0x36, 0x00, 0x00, 0x00, 0x61, 0x20, 0x00, 0x00, 0x1a, 0x14, 0x00, 0x00,
-  0x13, 0x04, 0x24, 0x14, 0x0b, 0x04, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00,
-  0x34, 0x14, 0x58, 0xd9, 0x95, 0xa5, 0x40, 0x0d, 0x94, 0x51, 0x21, 0x15,
-  0x57, 0xc1, 0xcd, 0x00, 0x94, 0x5c, 0xd9, 0x14, 0x4b, 0x61, 0x0a, 0x94,
-  0x72, 0x40, 0xd1, 0x94, 0x6e, 0x40, 0x39, 0x94, 0x02, 0x8d, 0x05, 0x44,
-  0x52, 0x11, 0x94, 0x40, 0x19, 0x90, 0x31, 0x46, 0x00, 0x82, 0x20, 0xc8,
-  0x7f, 0x63, 0x04, 0x20, 0x08, 0x82, 0xf8, 0x2f, 0x8c, 0x11, 0x80, 0x20,
-  0x08, 0x86, 0xe0, 0x30, 0x46, 0x00, 0x82, 0x20, 0xa8, 0x7f, 0x63, 0x04,
-  0x20, 0x08, 0x82, 0xfa, 0x2f, 0x8c, 0x11, 0x80, 0x20, 0x08, 0xc2, 0xdf,
-  0x18, 0x01, 0x08, 0x82, 0x20, 0xfc, 0x0b, 0x63, 0x04, 0x20, 0x08, 0x82,
-  0x20, 0x18, 0x8c, 0x11, 0x80, 0x20, 0x08, 0xd2, 0xdf, 0x18, 0x01, 0x08,
-  0x82, 0x20, 0xfd, 0x0b, 0x63, 0x04, 0x20, 0x08, 0x82, 0xf8, 0x37, 0x02,
-  0x00, 0x00, 0x00, 0x00, 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0x80, 0xd1,
-  0x41, 0xe6, 0xbc, 0xc1, 0x1b, 0x9c, 0xc1, 0x88, 0x41, 0x02, 0x80, 0x20,
-  0x18, 0x60, 0x75, 0xa0, 0x3d, 0x71, 0x10, 0x07, 0x68, 0x30, 0x62, 0x90,
-  0x00, 0x20, 0x08, 0x06, 0x98, 0x1d, 0x6c, 0xd0, 0x1b, 0xbc, 0x41, 0x1a,
-  0x8c, 0x18, 0x24, 0x00, 0x08, 0x82, 0x01, 0x76, 0x07, 0x9c, 0x04, 0x07,
-  0x70, 0xa0, 0x06, 0x23, 0x06, 0x06, 0x00, 0x82, 0x60, 0x40, 0xfc, 0xc1,
-  0x15, 0x07, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x50, 0xe9, 0x81, 0x19,
-  0x08, 0x72, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c, 0x30, 0x9a, 0x30, 0x04,
-  0xc3, 0x0d, 0x42, 0x40, 0x06, 0xb3, 0x0c, 0xc1, 0x08, 0x05, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x50, 0xfd, 0xc1, 0x1a, 0x1c, 0x7a, 0x30, 0x9a,
-  0x10, 0x0c, 0x17, 0x18, 0x35, 0x9a, 0x30, 0x08, 0x17, 0x18, 0x35, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x15, 0x29, 0xc0, 0x01, 0x13, 0x06, 0xa3,
-  0x09, 0x01, 0x30, 0xdc, 0x10, 0xf8, 0x01, 0x18, 0x4c, 0x37, 0x60, 0x53,
-  0x30, 0xdd, 0x90, 0x79, 0x42, 0x21, 0x01, 0x4c, 0x37, 0x6c, 0x60, 0x40,
-  0x14, 0x12, 0xc0, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x54, 0xac, 0x80,
-  0x07, 0x14, 0x1a, 0x8c, 0x26, 0x04, 0xc1, 0x68, 0x82, 0x20, 0x8c, 0x26,
-  0x0c, 0x43, 0x05, 0x82, 0xd4, 0x40, 0x48, 0x05, 0x83, 0xd4, 0x15, 0xcc,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x54, 0xb4, 0x00, 0x0a, 0x5c, 0x2b,
-  0x8c, 0x26, 0x04, 0x40, 0x05, 0x83, 0xd4, 0x16, 0x44, 0x05, 0xc8, 0x8c,
-  0x26, 0x14, 0x41, 0x05, 0x82, 0x14, 0x11, 0x44, 0x05, 0xcd, 0x8c, 0x26,
-  0x24, 0x42, 0x05, 0x82, 0x14, 0x11, 0xc4, 0x35, 0x46, 0x5d, 0x61, 0xd4,
-  0x0d, 0x46, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0x35, 0x0e, 0xaf,
-  0xb0, 0x06, 0xb7, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a,
-  0x30, 0x08, 0xa3, 0x09, 0xc4, 0x70, 0x84, 0x51, 0x47, 0x18, 0x75, 0x84,
-  0x51, 0x47, 0x18, 0x35, 0x62, 0xd0, 0x00, 0x20, 0x08, 0x06, 0x8d, 0x3b,
-  0xc8, 0x02, 0xb3, 0x28, 0x78, 0x40, 0x0c, 0x42, 0x60, 0x42, 0x00, 0x9f,
-  0x13, 0x86, 0x19, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0xad, 0x1d, 0x74,
-  0x21, 0x0f, 0x82, 0x73, 0x38, 0x85, 0x72, 0x18, 0x4d, 0x08, 0x80, 0xd1,
-  0x04, 0x21, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x30, 0x70, 0x07,
-  0x59, 0x10, 0x82, 0x0b, 0x8c, 0xbb, 0x63, 0x98, 0x11, 0x03, 0x05, 0x00,
-  0x41, 0x30, 0xd8, 0xe6, 0x01, 0x1c, 0xfe, 0x20, 0x68, 0x87, 0x56, 0x58,
-  0x87, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0x00, 0x03, 0x7a, 0xc0, 0x05, 0x21, 0xb8, 0xc0, 0xb8, 0xe1,
-  0x86, 0x3a, 0xa8, 0x07, 0x30, 0x30, 0x04, 0x15, 0xe0, 0x63, 0x43, 0x2a,
-  0xc0, 0x67, 0x96, 0x41, 0x18, 0x06, 0x13, 0x5e, 0x41, 0x3e, 0x26, 0xc0,
-  0x82, 0x7c, 0xcc, 0x0f, 0x6a, 0x01, 0x3e, 0xd6, 0x07, 0xb6, 0x00, 0x1f,
-  0x23, 0x04, 0xf9, 0x18, 0x21, 0xc8, 0x67, 0x96, 0x80, 0x30, 0x51, 0x40,
-  0xe4, 0x63, 0x48, 0x28, 0xc8, 0xc7, 0x84, 0x5d, 0x80, 0x8f, 0x09, 0xbc,
-  0x00, 0x1f, 0x13, 0x72, 0x41, 0x3e, 0x26, 0xe8, 0x82, 0x7c, 0x66, 0x09,
-  0x88, 0x81, 0x0a, 0x03, 0x12, 0x88, 0x61, 0xa0, 0xc2, 0x80, 0x04, 0x62,
-  0x18, 0x4d, 0x88, 0x05, 0x61, 0xb8, 0x21, 0x38, 0x09, 0x30, 0x98, 0x65,
-  0x28, 0x8c, 0x60, 0xc4, 0xc0, 0x00, 0x40, 0x10, 0x0c, 0x0e, 0x99, 0x60,
-  0x07, 0x62, 0xc4, 0xc0, 0x00, 0x40, 0x10, 0x0c, 0x8e, 0x99, 0x68, 0x07,
-  0x62, 0x96, 0xc0, 0x18, 0xa8, 0x30, 0x88, 0x82, 0x21, 0x06, 0x2a, 0x0c,
-  0xa2, 0x60, 0x88, 0xe1, 0x08, 0x41, 0x15, 0x88, 0x6f, 0x38, 0x62, 0x48,
-  0x05, 0xe1, 0x2b, 0x21, 0xd8, 0xe1, 0x08, 0xa2, 0x15, 0x88, 0xaf, 0x84,
-  0x60, 0x87, 0x23, 0x8c, 0x55, 0x10, 0xbe, 0x0a, 0x84, 0x9d, 0x65, 0x38,
-  0xb4, 0x60, 0x34, 0xc1, 0x17, 0x86, 0xe1, 0x86, 0x80, 0x26, 0xc0, 0x60,
-  0x96, 0x01, 0x49, 0x82, 0xd2, 0x05, 0x92, 0x80, 0x0b, 0x8c, 0x1a, 0x31,
-  0x38, 0x00, 0x10, 0x04, 0x83, 0xe5, 0x27, 0x4a, 0xa2, 0x41, 0x87, 0x11,
-  0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0xc0, 0xa2, 0x24, 0x02, 0xa1, 0x78,
-  0x01, 0x25, 0xe0, 0x02, 0xa3, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60,
-  0x19, 0x8b, 0x94, 0x80, 0xd8, 0x61, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0x16, 0xb2, 0x48, 0x89, 0x40, 0x98, 0x25, 0xd0, 0x86, 0x1b, 0x14, 0x9e,
-  0x00, 0x83, 0x59, 0x06, 0x45, 0x0b, 0x4c, 0x17, 0x78, 0x21, 0x3e, 0xb3,
-  0x0c, 0x8b, 0x33, 0x59, 0x2f, 0x54, 0xf1, 0xb1, 0x40, 0xa0, 0xcf, 0x05,
-  0xc3, 0x5c, 0x60, 0x94, 0x05, 0x85, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44,
-  0x59, 0xe8, 0x70, 0x43, 0x30, 0x16, 0x60, 0x30, 0xcb, 0xc0, 0x34, 0x81,
-  0x0d, 0xe5, 0x00, 0x9f, 0x59, 0x02, 0xc9, 0xc8, 0x81, 0x88, 0xcf, 0x2c,
-  0x81, 0x34, 0xcb, 0xf0, 0x48, 0x9c, 0x7d, 0xe5, 0x10, 0x1f, 0x0b, 0x18,
-  0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0x59, 0xf0, 0xc8, 0xc7, 0x8a, 0x20,
-  0x3e, 0x45, 0xbc, 0x85, 0x0e, 0x37, 0x04, 0x6d, 0x01, 0x06, 0xb3, 0x0c,
-  0x50, 0x14, 0x58, 0x3b, 0x0c, 0xf1, 0x99, 0x25, 0x90, 0x8c, 0x80, 0x07,
-  0xf8, 0xcc, 0x12, 0x48, 0x03, 0x2d, 0x06, 0xc6, 0x58, 0x0d, 0x01, 0x09,
-  0x91, 0x2c, 0x38, 0xe6, 0x0e, 0xf2, 0x10, 0x9f, 0x59, 0x86, 0xc9, 0x32,
-  0x03, 0x9b, 0x07, 0x35, 0x88, 0x8f, 0x05, 0x02, 0x7d, 0x2e, 0x18, 0xe6,
-  0x02, 0xa3, 0x2c, 0x28, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xf6, 0x42,
-  0x87, 0x1b, 0x82, 0xbc, 0x00, 0x83, 0x59, 0x06, 0xaa, 0x0a, 0x6c, 0xd8,
-  0x07, 0xf8, 0xcc, 0x12, 0x68, 0x86, 0x0f, 0x44, 0x7c, 0x66, 0x09, 0xb4,
-  0x59, 0x86, 0x4b, 0x73, 0x03, 0xa3, 0x83, 0x7c, 0x88, 0x8f, 0x05, 0x0c,
-  0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x78, 0xe4, 0x63, 0x45, 0x10,
-  0x9f, 0x22, 0x4a, 0x43, 0x87, 0x1b, 0x82, 0xd1, 0x00, 0x83, 0x59, 0x06,
-  0x2c, 0x0b, 0x2c, 0x24, 0x86, 0xf8, 0xcc, 0x12, 0x68, 0x46, 0x98, 0x04,
-  0x7c, 0x66, 0x09, 0xb4, 0x81, 0x22, 0x43, 0x1c, 0x10, 0x7f, 0x48, 0xfc,
-  0xc1, 0x60, 0x83, 0x8c, 0x0d, 0x30, 0x36, 0xb0, 0xd8, 0xa0, 0x62, 0x03,
-  0x6a, 0xa0, 0xc8, 0xe0, 0x05, 0xc4, 0x1f, 0x12, 0x7f, 0x30, 0x88, 0xcc,
-  0xc0, 0xfc, 0xc1, 0xc2, 0x2a, 0x8d, 0x3a, 0x7c, 0x30, 0x6a, 0x96, 0x61,
-  0x9b, 0x83, 0x52, 0x18, 0x4d, 0xb8, 0x89, 0x61, 0xb8, 0x21, 0x50, 0x0d,
-  0x30, 0x98, 0x65, 0xe0, 0xbc, 0x60, 0x38, 0xa2, 0xf8, 0x89, 0xe1, 0x3b,
-  0x63, 0x98, 0xe1, 0x86, 0xa0, 0x26, 0xc8, 0xa0, 0x86, 0x40, 0x87, 0x23,
-  0x90, 0xb1, 0x18, 0xbe, 0x0a, 0x04, 0x3d, 0x65, 0x98, 0xe1, 0x86, 0x00,
-  0x27, 0xc8, 0xa0, 0x82, 0x41, 0x67, 0x19, 0x3a, 0x39, 0x08, 0x8e, 0x1f,
-  0x86, 0xb9, 0x66, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa8, 0x7c,
-  0x43, 0x35, 0xcc, 0x02, 0x37, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
-  0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x38, 0x64, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0x9e, 0xf2, 0x88, 0x8d, 0x83, 0x08, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0xe0, 0x31, 0x0f, 0xd9, 0x60, 0x88, 0x60, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0x9e, 0xf3, 0x98, 0x0d, 0x89, 0x08, 0x46,
-  0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x2b, 0x0f, 0xd9, 0x80, 0x8b, 0xe0,
-  0x37, 0xfe, 0xa2, 0x37, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x4e,
-  0x18, 0xe3, 0x84, 0x31, 0x2a, 0x38, 0x8d, 0xab, 0x21, 0xd8, 0x0b, 0x8c,
-  0x9a, 0x25, 0x90, 0x83, 0xe1, 0x06, 0x4f, 0x3d, 0xc0, 0x60, 0x96, 0xe1,
-  0x03, 0x83, 0xa0, 0xe0, 0x42, 0x37, 0xe0, 0x02, 0xa3, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0x60, 0xa9, 0x8f, 0xdd, 0x20, 0x03, 0xbf, 0x18, 0x31,
-  0x38, 0x00, 0x10, 0x04, 0x83, 0xc5, 0x3e, 0x76, 0x23, 0x10, 0x2e, 0x18,
-  0xa6, 0xe6, 0xe2, 0x37, 0xe0, 0x02, 0xa3, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x60, 0xd1, 0x0f, 0xf0, 0x40, 0x83, 0xd1, 0x18, 0x31, 0x38, 0x00,
-  0x10, 0x04, 0x83, 0x65, 0x3f, 0xc0, 0x23, 0x10, 0x2e, 0x18, 0xe6, 0x02,
-  0xa3, 0xee, 0x30, 0xea, 0x78, 0x62, 0x98, 0x6b, 0x83, 0x61, 0x8e, 0x18,
-  0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xa0, 0x02, 0x11,
-  0xf6, 0x40, 0x0d, 0xfd, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18,
-  0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04,
-  0x00, 0x41, 0x30, 0x78, 0x4e, 0x64, 0x3e, 0x12, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x83, 0x07, 0x45, 0xe8, 0x23, 0x21, 0x82, 0x11, 0x03,
-  0x04, 0x00, 0x41, 0x30, 0x78, 0x52, 0xa4, 0x3e, 0x12, 0x22, 0x18, 0x31,
-  0x50, 0x00, 0x10, 0x04, 0x83, 0xed, 0x44, 0xe8, 0x43, 0x36, 0x82, 0x10,
-  0x09, 0x8f, 0xff, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x38, 0x61,
-  0x8c, 0x13, 0xc6, 0xa8, 0x20, 0x3d, 0xae, 0x86, 0x60, 0x2f, 0x30, 0x6a,
-  0x96, 0x40, 0x0e, 0x86, 0x1b, 0x40, 0x61, 0x45, 0xc0, 0x60, 0x96, 0x21,
-  0x0c, 0xe4, 0x20, 0xb0, 0xd4, 0x58, 0x8d, 0xf8, 0x0c, 0x47, 0x94, 0x02,
-  0x6b, 0x10, 0xdf, 0x2c, 0x83, 0x18, 0x94, 0x41, 0x60, 0xad, 0x61, 0x0a,
-  0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x86,
-  0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0x8d, 0xe8, 0x70, 0x43, 0x30, 0x23,
-  0x60, 0x30, 0xcb, 0x30, 0x06, 0x64, 0x10, 0xd8, 0x50, 0x1b, 0xf0, 0x99,
-  0x25, 0x48, 0x03, 0xa3, 0x0d, 0x22, 0x3e, 0xb3, 0x04, 0x69, 0x30, 0x1c,
-  0x01, 0x0b, 0xb5, 0x21, 0x7c, 0xb3, 0x0c, 0x66, 0x90, 0x06, 0x81, 0xc5,
-  0x82, 0x6d, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51,
-  0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x60, 0xa2, 0xc3, 0x0d,
-  0x81, 0x8f, 0x80, 0xc1, 0x2c, 0xc3, 0x19, 0xa0, 0x41, 0x60, 0xbe, 0x31,
-  0xc4, 0x67, 0x96, 0x20, 0x0d, 0x8c, 0x08, 0x0f, 0xf8, 0xcc, 0x12, 0xa4,
-  0xc1, 0x40, 0x8b, 0xa1, 0x8d, 0x01, 0x46, 0x06, 0xc4, 0x19, 0x08, 0x68,
-  0x60, 0x17, 0x65, 0x70, 0xc1, 0x30, 0x06, 0x1e, 0xe4, 0x11, 0x9f, 0xe1,
-  0x88, 0x5e, 0x28, 0x0f, 0xe2, 0x9b, 0x65, 0x50, 0x83, 0x36, 0x08, 0xcc,
-  0x3c, 0x7c, 0x21, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c,
-  0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x70, 0x13, 0x1d, 0x6e,
-  0x08, 0xd8, 0x04, 0x0c, 0x66, 0x19, 0xd6, 0x80, 0x0d, 0x02, 0x1b, 0xdc,
-  0x03, 0x3e, 0xb3, 0x04, 0x71, 0x60, 0xeb, 0x41, 0xc4, 0x67, 0x96, 0x20,
-  0x0e, 0x86, 0x23, 0xd0, 0x81, 0x3d, 0x84, 0x6f, 0x96, 0xc1, 0x0d, 0xe2,
-  0x20, 0xb0, 0x74, 0x68, 0x8f, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61,
-  0x2e, 0x30, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x22, 0x4f,
-  0x74, 0xb8, 0x21, 0xb8, 0x13, 0x30, 0x98, 0x65, 0x78, 0x03, 0x38, 0x08,
-  0xac, 0x3e, 0x86, 0xf8, 0xcc, 0x12, 0xc4, 0x81, 0x11, 0xfa, 0x01, 0x9f,
-  0x59, 0x82, 0x38, 0x18, 0x68, 0x31, 0xb4, 0x35, 0xc0, 0xd8, 0x80, 0x78,
-  0x03, 0x01, 0x0e, 0xcc, 0xa3, 0x0d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x6e,
-  0x33, 0xea, 0xcc, 0x63, 0x98, 0xbb, 0x87, 0x61, 0x8e, 0x18, 0xe6, 0x88,
-  0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xa0, 0x52, 0x15, 0x3b, 0x91,
-  0x11, 0x52, 0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18,
-  0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0x78, 0x62, 0xa5, 0x4f, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0x47, 0x56, 0xfc, 0x24, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0x78, 0x66, 0xe5, 0x4f, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00,
-  0x10, 0x04, 0x83, 0x2d, 0x56, 0xfc, 0x84, 0x47, 0x82, 0x55, 0x59, 0x93,
-  0x54, 0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x38, 0x61, 0x8c, 0x13,
-  0xc6, 0xa8, 0x60, 0x4e, 0xae, 0x86, 0x60, 0x2f, 0x30, 0x6a, 0x96, 0x40,
-  0x0e, 0x06, 0x5a, 0x0c, 0xdd, 0xe8, 0x5c, 0x89, 0xb3, 0x89, 0x4f, 0x88,
-  0x03, 0x57, 0x02, 0x83, 0x0b, 0xc6, 0xb9, 0x60, 0x80, 0x12, 0xf2, 0x04,
-  0x2f, 0x18, 0x60, 0xc4, 0xc0, 0x01, 0x40, 0x10, 0x0c, 0xba, 0x5e, 0x09,
-  0x95, 0x35, 0x21, 0x13, 0x5c, 0x19, 0x82, 0x52, 0x29, 0x15, 0x3c, 0xb1,
-  0x95, 0x59, 0x82, 0x11, 0x1a, 0x6e, 0xa8, 0x0d, 0x5c, 0x01, 0x83, 0x59,
-  0x06, 0x3a, 0x88, 0x89, 0x60, 0xc4, 0xc0, 0x00, 0x40, 0x10, 0x0c, 0x0e,
-  0x73, 0x21, 0x95, 0x99, 0x18, 0x31, 0x30, 0x00, 0x10, 0x04, 0x83, 0xe3,
-  0x5c, 0x4a, 0x65, 0x26, 0x4c, 0x80, 0x13, 0xf8, 0x98, 0x10, 0x27, 0xf0,
-  0x19, 0x4d, 0x58, 0x93, 0x61, 0xb8, 0x21, 0xf0, 0x15, 0x30, 0x98, 0x65,
-  0xa8, 0x83, 0x3b, 0x08, 0x86, 0x23, 0x8c, 0x39, 0x19, 0xbe, 0x3b, 0x86,
-  0x19, 0x6e, 0x08, 0xd2, 0x84, 0x0c, 0x6a, 0x08, 0x74, 0x38, 0x22, 0xb9,
-  0x93, 0xe1, 0xab, 0x40, 0xd0, 0x5b, 0x86, 0x19, 0x6e, 0x08, 0xd8, 0x84,
-  0x0c, 0x2a, 0x18, 0x74, 0x96, 0xc1, 0x0e, 0x56, 0x21, 0x38, 0x18, 0x19,
-  0xe6, 0xc2, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa8, 0xe4,
-  0xc5, 0x57, 0xf4, 0x84, 0x5d, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
-  0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x38, 0x64, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0x9e, 0x7c, 0x29, 0x97, 0x83, 0x08, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0xe0, 0xd1, 0x17, 0x73, 0x61, 0x88, 0x60, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0x9e, 0x7d, 0x39, 0x17, 0x89, 0x08, 0x46,
-  0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0xcb, 0x17, 0x73, 0x21, 0x95, 0x60,
-  0x5e, 0x66, 0x25, 0x5e, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x4e,
-  0x18, 0xe3, 0x84, 0x31, 0x2a, 0xd8, 0x95, 0xab, 0x21, 0xd8, 0x0b, 0x8c,
-  0x9a, 0x25, 0x58, 0x85, 0xe1, 0x06, 0xcf, 0x5f, 0xc0, 0x60, 0x96, 0x01,
-  0x0f, 0xf2, 0x20, 0x28, 0x52, 0x71, 0x17, 0xb8, 0xc0, 0xa8, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0x58, 0x52, 0xe6, 0x5d, 0xca, 0x40, 0x56, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x51, 0x99, 0x77, 0x09, 0x84, 0x0b,
-  0x86, 0xa9, 0x53, 0x99, 0x17, 0xb8, 0xc0, 0xa8, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0x58, 0x5c, 0x86, 0x5e, 0xd2, 0xe0, 0x56, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0x60, 0x79, 0x19, 0x7a, 0x09, 0x84, 0x0b, 0x86, 0xb9,
-  0xc0, 0xa8, 0x3b, 0x8c, 0x3a, 0x38, 0x19, 0xe6, 0x42, 0x63, 0x98, 0x23,
-  0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa8, 0x68,
-  0x06, 0x64, 0x78, 0xc5, 0x65, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
-  0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0x9e, 0x9d, 0x39, 0x99, 0x84, 0x08, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0xe0, 0xe1, 0x19, 0x94, 0x49, 0x88, 0x60, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0x9e, 0x9e, 0x49, 0x99, 0x84, 0x08, 0x46,
-  0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0xdb, 0x19, 0x94, 0x31, 0x97, 0xa0,
-  0x66, 0xea, 0x65, 0x66, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x4e,
-  0x18, 0xe3, 0x84, 0x31, 0x2a, 0xe8, 0x97, 0xab, 0x21, 0xd8, 0x0b, 0x8c,
-  0x9a, 0x25, 0x58, 0x85, 0xe1, 0x06, 0x50, 0xf8, 0x19, 0x30, 0x98, 0x65,
-  0xd0, 0x83, 0x55, 0x08, 0xac, 0x57, 0x7e, 0x25, 0x3e, 0xc3, 0x11, 0xa6,
-  0x00, 0x2e, 0xc4, 0x37, 0xcb, 0xb0, 0x07, 0x7e, 0x10, 0x58, 0xb8, 0x9c,
-  0x42, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65, 0x81,
-  0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x91, 0x36, 0x3a, 0xdc, 0x10, 0x9c,
-  0x0d, 0x18, 0xcc, 0x32, 0xf0, 0x41, 0x1f, 0x04, 0x36, 0xa4, 0x0b, 0x7c,
-  0x66, 0x09, 0x44, 0xc1, 0xd0, 0x85, 0x88, 0xcf, 0x2c, 0x81, 0x28, 0x0c,
-  0x47, 0xc4, 0x42, 0xba, 0x08, 0xdf, 0x2c, 0xc3, 0x1f, 0x88, 0x42, 0x60,
-  0xb2, 0xa0, 0x2e, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0x60,
-  0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0xdd, 0xe8, 0x70,
-  0x43, 0x20, 0x37, 0x60, 0x30, 0xcb, 0x00, 0x0a, 0xa1, 0x10, 0x98, 0xbc,
-  0x0c, 0xf1, 0x99, 0x25, 0x10, 0x05, 0x23, 0xea, 0x05, 0x3e, 0xb3, 0x04,
-  0xa2, 0x30, 0xd0, 0x62, 0x68, 0x7c, 0x80, 0xf5, 0x01, 0x01, 0x0a, 0x42,
-  0x28, 0xe0, 0x85, 0x1f, 0x5c, 0x30, 0x8c, 0xd1, 0x0b, 0xbe, 0xc4, 0x67,
-  0x38, 0xc2, 0x17, 0xf2, 0x85, 0xf8, 0x66, 0x19, 0x46, 0xc1, 0x14, 0x02,
-  0xd3, 0x97, 0x5f, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02,
-  0xa3, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x44, 0x47, 0x87,
-  0x1b, 0x02, 0xd0, 0x01, 0x83, 0x59, 0x06, 0x52, 0x28, 0x85, 0xc0, 0x06,
-  0x91, 0x81, 0xcf, 0x2c, 0x81, 0x2a, 0xd8, 0xbf, 0x10, 0xf1, 0x99, 0x25,
-  0x50, 0x85, 0xe1, 0x88, 0x74, 0x00, 0x19, 0xe1, 0x9b, 0x65, 0x38, 0x05,
-  0x55, 0x08, 0x4c, 0x1d, 0x42, 0x26, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60,
-  0x98, 0x0b, 0x8c, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x68,
-  0x1d, 0x1d, 0x6e, 0x08, 0x56, 0x07, 0x0c, 0x66, 0x19, 0x50, 0x21, 0x15,
-  0x02, 0x4b, 0x99, 0x21, 0x3e, 0xb3, 0x04, 0xaa, 0x60, 0x84, 0xcb, 0xc0,
-  0x67, 0x96, 0x40, 0x15, 0x06, 0x5a, 0x0c, 0x8d, 0x14, 0xb0, 0x52, 0x20,
-  0x50, 0x41, 0x48, 0x05, 0xf4, 0x30, 0x85, 0x0b, 0x86, 0xb9, 0xc0, 0xa8,
-  0xdb, 0x8c, 0x3a, 0x7d, 0x19, 0xe6, 0x56, 0x64, 0x98, 0x23, 0x86, 0x39,
-  0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa8, 0x7c, 0x47, 0x75,
-  0xcc, 0x06, 0x77, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13,
-  0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40,
-  0x10, 0x0c, 0x9e, 0xf2, 0x89, 0x9d, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00,
-  0x04, 0xc1, 0xe0, 0x31, 0x1f, 0xd9, 0x49, 0x88, 0x60, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0x9e, 0xf3, 0x99, 0x9d, 0x84, 0x08, 0x46, 0x0c, 0x14,
-  0x00, 0x04, 0xc1, 0x60, 0x2b, 0x1f, 0xd9, 0x81, 0x9b, 0xe0, 0x77, 0xfe,
-  0xa6, 0x77, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x4e, 0x18, 0xe3,
-  0x84, 0x31, 0x2a, 0x38, 0x9d, 0xab, 0x21, 0xd8, 0x0b, 0x8c, 0x9a, 0x25,
-  0x58, 0x85, 0x81, 0x16, 0x43, 0x37, 0xec, 0xc0, 0xde, 0xea, 0xc0, 0x26,
-  0xf0, 0x40, 0x50, 0x05, 0x7b, 0xcb, 0x83, 0x59, 0x06, 0x56, 0x70, 0x05,
-  0x95, 0x18, 0x8e, 0x68, 0x09, 0xbe, 0x19, 0xbe, 0x73, 0x89, 0x61, 0x86,
-  0x1b, 0x02, 0xb9, 0x21, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0x90, 0x09, 0xd0,
-  0x19, 0xbe, 0x0a, 0x04, 0x3d, 0x9a, 0x18, 0x66, 0xb8, 0x21, 0xa8, 0x1b,
-  0x32, 0xa8, 0x60, 0xd0, 0x59, 0x86, 0x56, 0x10, 0x87, 0xe0, 0x72, 0x66,
-  0x98, 0x53, 0x93, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xa0, 0xda,
-  0x9f, 0xf3, 0x19, 0x9d, 0xfa, 0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21,
-  0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90, 0x11, 0x03,
-  0x04, 0x00, 0x41, 0x30, 0x78, 0x44, 0xc8, 0x7d, 0x0e, 0x22, 0x18, 0x31,
-  0x40, 0x00, 0x10, 0x04, 0x83, 0x67, 0x84, 0xde, 0x87, 0x21, 0x82, 0x11,
-  0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0x48, 0x08, 0x7e, 0x24, 0x22, 0x18,
-  0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x4d, 0x84, 0xde, 0xa7, 0x75, 0x02,
-  0xfe, 0xe1, 0x1d, 0xfd, 0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x38,
-  0x61, 0x8c, 0x13, 0xc6, 0xa8, 0x80, 0x7c, 0xae, 0x86, 0x60, 0x2f, 0x30,
-  0x6a, 0x96, 0x40, 0x1c, 0x86, 0x1b, 0xce, 0xe2, 0x84, 0xc0, 0x60, 0x96,
-  0xe1, 0x15, 0x60, 0x21, 0xa8, 0xd6, 0xb9, 0x1f, 0xb8, 0xc0, 0xa8, 0x11,
-  0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0x64, 0x08, 0x7f, 0xd8, 0x62, 0x77,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x99, 0x21, 0xfc, 0x09, 0x84,
-  0x0b, 0x86, 0x29, 0xd8, 0xe1, 0x1f, 0xb8, 0xc0, 0xa8, 0x11, 0x83, 0x03,
-  0x00, 0x41, 0x30, 0x58, 0x6e, 0xa8, 0x7f, 0xe4, 0x02, 0x7c, 0x46, 0x0c,
-  0x0e, 0x00, 0x04, 0xc1, 0x60, 0xc1, 0xa1, 0xfe, 0x09, 0x84, 0x0b, 0x86,
-  0xb9, 0xc0, 0xa8, 0x3b, 0x8c, 0xba, 0xbc, 0x19, 0xe6, 0x54, 0x65, 0x98,
-  0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa8,
-  0x7a, 0x28, 0x85, 0xca, 0xe7, 0x86, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41,
-  0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0x1e, 0x32, 0x82, 0xa1, 0x84, 0x08, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x29, 0xa3, 0x18, 0x4a, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x1e, 0x33, 0x92, 0xa1, 0x84, 0x08,
-  0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x23, 0xa3, 0x18, 0x7a, 0x9f,
-  0xc0, 0x87, 0xfc, 0x87, 0x87, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
-  0x4e, 0x18, 0xe3, 0x84, 0x31, 0x2a, 0x30, 0xa1, 0xab, 0x21, 0xd8, 0x0b,
-  0x8c, 0x9a, 0x25, 0x10, 0x87, 0xe1, 0x86, 0xd4, 0x40, 0x23, 0x30, 0x98,
-  0x65, 0x88, 0x05, 0x71, 0x08, 0xcc, 0x7c, 0xd0, 0x27, 0x3e, 0xc3, 0x11,
-  0xad, 0x91, 0x3e, 0xc4, 0x37, 0xcb, 0x20, 0x0b, 0xb5, 0x10, 0x98, 0xfa,
-  0xb8, 0x46, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65,
-  0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x21, 0x47, 0x3a, 0xdc, 0x10,
-  0xc0, 0x11, 0x18, 0xcc, 0x32, 0xcc, 0x02, 0x2d, 0x04, 0x36, 0xc8, 0x0f,
-  0x7c, 0x66, 0x09, 0x72, 0xc1, 0xe2, 0x87, 0x88, 0xcf, 0x2c, 0x41, 0x2e,
-  0x0c, 0x47, 0xe0, 0x86, 0xfc, 0x08, 0xdf, 0x2c, 0x83, 0x2d, 0xe4, 0x42,
-  0x60, 0xb9, 0x31, 0x3f, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c,
-  0x60, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0x1f, 0xe9,
-  0x70, 0x43, 0xb0, 0x47, 0x60, 0x30, 0xcb, 0x70, 0x0b, 0xb8, 0x10, 0xd8,
-  0xfe, 0x0c, 0xf1, 0x99, 0x25, 0xc8, 0x05, 0x23, 0xfc, 0x07, 0x3e, 0xb3,
-  0x04, 0xb9, 0x30, 0xd0, 0x62, 0x68, 0xb3, 0x80, 0xd1, 0x02, 0x71, 0x0b,
-  0x02, 0x2e, 0x98, 0x4e, 0x2d, 0x5c, 0x30, 0x8c, 0xf5, 0x4f, 0x08, 0xc5,
-  0x67, 0x38, 0xe2, 0x3c, 0x44, 0x88, 0xf8, 0x66, 0x19, 0x74, 0xa1, 0x17,
-  0x02, 0x1b, 0x21, 0xf4, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6,
-  0x02, 0xa3, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x56, 0x49,
-  0x87, 0x1b, 0x82, 0x54, 0x02, 0x83, 0x59, 0x86, 0x5d, 0xe0, 0x85, 0xc0,
-  0x86, 0x15, 0x82, 0xcf, 0x2c, 0x41, 0x38, 0x18, 0x0a, 0x11, 0xf1, 0x99,
-  0x25, 0x08, 0x87, 0xe1, 0x08, 0xf9, 0x48, 0x21, 0xe1, 0x9b, 0x65, 0xf0,
-  0x85, 0x70, 0x08, 0x6c, 0x3e, 0x54, 0x28, 0x3e, 0x16, 0x38, 0xf4, 0xb9,
-  0x60, 0x98, 0x0b, 0x8c, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a,
-  0xb0, 0x25, 0x1d, 0x6e, 0x08, 0x68, 0x09, 0x0c, 0x66, 0x19, 0x7e, 0x01,
-  0x1c, 0x02, 0x93, 0xa1, 0x21, 0x3e, 0xb3, 0x04, 0xe1, 0x60, 0xc4, 0x0d,
-  0xc1, 0x67, 0x96, 0x20, 0x1c, 0x06, 0x5a, 0x0c, 0x6d, 0x17, 0x30, 0x5e,
-  0x20, 0x7e, 0x41, 0x00, 0x07, 0xfd, 0xe9, 0x85, 0x0b, 0x86, 0xb9, 0xc0,
-  0xa8, 0xdb, 0x8c, 0xba, 0x11, 0x1a, 0xe6, 0x68, 0x66, 0x98, 0x23, 0x86,
-  0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa8, 0xce, 0x69,
-  0x96, 0xde, 0x28, 0x9c, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46,
-  0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0x1e, 0x77, 0xd2, 0xa5, 0x84, 0x08, 0x46, 0x0c, 0x10,
-  0x00, 0x04, 0xc1, 0xe0, 0x79, 0xa7, 0x5d, 0x4a, 0x88, 0x60, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0x1e, 0x78, 0xe2, 0xa5, 0x84, 0x08, 0x46, 0x0c,
-  0x14, 0x00, 0x04, 0xc1, 0x60, 0x73, 0xa7, 0x5d, 0xca, 0xa3, 0x00, 0x9d,
-  0x50, 0xc9, 0x9c, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x4e, 0x18,
-  0xe3, 0x84, 0x31, 0x2a, 0x80, 0xa5, 0xab, 0x21, 0xd8, 0x0b, 0x8c, 0x9a,
-  0x25, 0x10, 0x87, 0x81, 0x16, 0x43, 0x37, 0x5a, 0x01, 0x25, 0x03, 0x56,
-  0xb0, 0x89, 0x57, 0x10, 0xc2, 0x01, 0x25, 0x03, 0x58, 0x98, 0x65, 0x18,
-  0x87, 0x72, 0x98, 0x91, 0xe1, 0x08, 0x1c, 0x29, 0xa5, 0xe1, 0xbb, 0x1c,
-  0x19, 0x66, 0xb8, 0x21, 0xd8, 0x23, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88,
-  0x1c, 0x49, 0xa5, 0xe1, 0xab, 0x40, 0xd0, 0xdb, 0x91, 0x61, 0x86, 0x1b,
-  0x02, 0x3f, 0x22, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0x20, 0x87, 0x7c, 0x08,
-  0x4e, 0x8c, 0x86, 0xb9, 0xb9, 0x19, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10,
-  0x0c, 0x2a, 0x92, 0x82, 0x27, 0x56, 0xf2, 0xa7, 0xd1, 0x84, 0x00, 0x18,
-  0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e,
-  0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x67, 0xa5, 0xee, 0xe9, 0x20,
-  0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0x58, 0x0a, 0x9f, 0x18,
-  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xa7, 0xa5, 0xf2, 0x49,
-  0x22, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0xd8, 0x56, 0x0a, 0x9f,
-  0x6c, 0x29, 0x28, 0xa9, 0x72, 0x1a, 0xa9, 0xd1, 0x84, 0x00, 0x18, 0x4d,
-  0x10, 0x82, 0x13, 0xc6, 0x38, 0x61, 0x8c, 0x0a, 0xda, 0xe9, 0x6a, 0x08,
-  0xf6, 0x02, 0xa3, 0x66, 0x09, 0xf2, 0x61, 0xb8, 0x01, 0x4e, 0x60, 0x0a,
-  0x0c, 0x66, 0x19, 0xcc, 0xe1, 0x1c, 0x82, 0xb2, 0x25, 0x90, 0x82, 0x0b,
-  0x8c, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x65, 0xa7, 0x42, 0xea,
-  0x4e, 0xc8, 0x69, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x16, 0x9e, 0x0a,
-  0xa9, 0x40, 0xb8, 0x60, 0x98, 0xca, 0xa5, 0x92, 0x82, 0x0b, 0x8c, 0x1a,
-  0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x05, 0xac, 0x4c, 0x2a, 0x4f, 0xd2,
-  0x69, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x96, 0xb0, 0x32, 0xa9, 0x40,
-  0xb8, 0x60, 0x98, 0x0b, 0x8c, 0xba, 0xc3, 0xa8, 0x13, 0xa5, 0x61, 0x6e,
-  0x76, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x83, 0xca, 0xac, 0x64, 0xca, 0x9d, 0xc0, 0x6a, 0x34, 0x21, 0x00,
-  0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88,
-  0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x69, 0xab, 0x9c, 0x4a,
-  0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x1e, 0xb7, 0xd2, 0xa9,
-  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x79, 0xab, 0x9d,
-  0x4a, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0xb6, 0xd2,
-  0x29, 0x7c, 0x0a, 0xce, 0xea, 0xa4, 0xca, 0x6a, 0x34, 0x21, 0x00, 0x46,
-  0x13, 0x84, 0xe0, 0x84, 0x31, 0x4e, 0x18, 0xa3, 0x82, 0x97, 0xba, 0x1a,
-  0x82, 0xbd, 0xc0, 0xa8, 0x59, 0x82, 0x7c, 0x18, 0x6e, 0x90, 0x95, 0xb8,
-  0x02, 0x83, 0x59, 0x06, 0x74, 0xc8, 0x87, 0xc0, 0xde, 0x29, 0x9e, 0xe2,
-  0x33, 0x1c, 0x81, 0x2b, 0xf2, 0x44, 0x7c, 0xb3, 0x0c, 0xe9, 0xc0, 0x0e,
-  0x81, 0xcd, 0x53, 0xae, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73,
-  0x81, 0x51, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x7b, 0xa5,
-  0xc3, 0x0d, 0x41, 0x5e, 0x81, 0xc1, 0x2c, 0x83, 0x3a, 0xac, 0x43, 0x60,
-  0xc3, 0x3e, 0xc1, 0x67, 0x96, 0x00, 0x1e, 0x4c, 0x9f, 0x88, 0xf8, 0xcc,
-  0x12, 0xc0, 0xc3, 0x70, 0xc4, 0xb8, 0xec, 0x93, 0xf0, 0xcd, 0x32, 0xb4,
-  0x03, 0x3c, 0x04, 0x46, 0x2e, 0xfc, 0x14, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c,
-  0x30, 0xcc, 0x05, 0x46, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45,
-  0x98, 0x96, 0x0e, 0x37, 0x04, 0xa4, 0x05, 0x06, 0xb3, 0x0c, 0xee, 0xf0,
-  0x0e, 0x81, 0x91, 0xd4, 0x10, 0x9f, 0x59, 0x02, 0x78, 0x30, 0xe2, 0xa4,
-  0xe0, 0x33, 0x4b, 0x00, 0x0f, 0x03, 0x2d, 0x86, 0xa6, 0x0e, 0xd8, 0x3a,
-  0x10, 0xee, 0x20, 0xbc, 0x03, 0x6f, 0xb1, 0xc3, 0x05, 0xc3, 0x98, 0x49,
-  0xa9, 0x54, 0x7c, 0x86, 0x23, 0xdc, 0x65, 0xa5, 0x88, 0x6f, 0x96, 0x21,
-  0x1e, 0xe8, 0x21, 0x30, 0x96, 0x7a, 0x97, 0xf8, 0x58, 0x30, 0xd0, 0xe7,
-  0x82, 0x61, 0x2e, 0x30, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29,
-  0x82, 0xb6, 0x74, 0xb8, 0x21, 0x90, 0x2d, 0x30, 0x98, 0x65, 0x90, 0x87,
-  0x79, 0x08, 0x6c, 0xa0, 0x29, 0xf8, 0xcc, 0x12, 0xe0, 0x83, 0xc5, 0x14,
-  0x11, 0x9f, 0x59, 0x02, 0x7c, 0x18, 0x8e, 0xc8, 0x17, 0x99, 0x12, 0xbe,
-  0x59, 0x86, 0x7a, 0xc0, 0x87, 0xc0, 0xf4, 0x65, 0xa6, 0xe2, 0x63, 0x81,
-  0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x22, 0xf9, 0x58, 0x11,
-  0xc4, 0xa7, 0x88, 0xdf, 0xd2, 0xe1, 0x86, 0xa0, 0xb7, 0xc0, 0x60, 0x96,
-  0xc1, 0x1e, 0xee, 0x21, 0xb0, 0x9d, 0x1a, 0xe2, 0x33, 0x4b, 0x80, 0x0f,
-  0x46, 0x80, 0x15, 0x7c, 0x66, 0x09, 0xf0, 0x61, 0xa0, 0xc5, 0xd0, 0xe4,
-  0x01, 0x9b, 0x07, 0xc2, 0x1e, 0x84, 0x7b, 0x40, 0x31, 0x7a, 0xb8, 0x60,
-  0x98, 0x0b, 0x8c, 0xba, 0xcd, 0xa8, 0x63, 0xa9, 0x61, 0xae, 0x87, 0x86,
-  0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83,
-  0x0a, 0xbe, 0x78, 0x0b, 0xaf, 0xd4, 0x6b, 0x34, 0x21, 0x00, 0x46, 0x13,
-  0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xb9, 0xaf, 0xf1, 0x4a, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x1e, 0xfc, 0x22, 0xaf, 0x84, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xc9, 0xaf, 0xf2, 0x4a, 0x88,
-  0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0xfb, 0x22, 0x2f, 0xd1,
-  0x0a, 0xe2, 0x2b, 0xb6, 0xde, 0x6b, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84,
-  0xe0, 0x84, 0x31, 0x4e, 0x18, 0xa3, 0x82, 0xdc, 0xba, 0x1a, 0x82, 0xbd,
-  0xc0, 0xa8, 0x59, 0x82, 0x7c, 0x18, 0x68, 0x31, 0x74, 0x83, 0x1c, 0x74,
-  0x36, 0x18, 0x07, 0x9b, 0x30, 0x07, 0x01, 0x1f, 0x74, 0x36, 0x38, 0x87,
-  0x59, 0x06, 0x7d, 0xe0, 0x07, 0x9e, 0x19, 0x8e, 0xf8, 0x19, 0xd7, 0x1a,
-  0xbe, 0x03, 0x9b, 0x61, 0x86, 0x1b, 0x02, 0xd2, 0x22, 0x83, 0x1a, 0x02,
-  0x1d, 0x8e, 0x10, 0x1b, 0xd9, 0x1a, 0xbe, 0x0a, 0x04, 0x3d, 0xb2, 0x19,
-  0x66, 0xb8, 0x21, 0x38, 0x2d, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x86, 0x7d,
-  0x80, 0x89, 0xe0, 0xd6, 0x6a, 0x98, 0xe3, 0xa3, 0x61, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0xa0, 0x6a, 0xb1, 0xfc, 0xaa, 0xad, 0x13, 0x1b, 0x4d,
-  0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62,
-  0x28, 0xe2, 0x90, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0x68, 0x0c,
-  0xc4, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xa7, 0xc6,
-  0x42, 0x8c, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0x6c,
-  0x4c, 0xc4, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x8d,
-  0xc6, 0x42, 0xec, 0xb7, 0x02, 0x17, 0x73, 0x2f, 0x16, 0x1b, 0x4d, 0x08,
-  0x80, 0xd1, 0x04, 0x21, 0x38, 0x61, 0x8c, 0x13, 0xc6, 0xa8, 0xc0, 0xbe,
-  0xae, 0x86, 0x60, 0x2f, 0x30, 0x6a, 0x96, 0x00, 0x26, 0x86, 0x1b, 0xf2,
-  0x26, 0xc7, 0xc0, 0x60, 0x96, 0xa1, 0x1f, 0xfc, 0x21, 0xa8, 0xdf, 0x4a,
-  0x31, 0xb8, 0xc0, 0xa8, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0xc8,
-  0x4c, 0xc5, 0xfc, 0xa6, 0xbd, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60,
-  0x29, 0x33, 0x15, 0x0b, 0x84, 0x0b, 0x86, 0x29, 0xf1, 0x72, 0x31, 0xb8,
-  0xc0, 0xa8, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0xd2, 0xec, 0xc5,
-  0x44, 0x47, 0xbe, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x51, 0xb3,
-  0x17, 0x0b, 0x84, 0x0b, 0x86, 0xb9, 0xc0, 0xa8, 0x3b, 0x8c, 0xba, 0xd5,
-  0x1a, 0xe6, 0x78, 0x69, 0x98, 0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0xa8, 0xde, 0x6c, 0xc7, 0xee, 0x2b, 0xcd, 0x46,
-  0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81,
-  0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x1e, 0x3b,
-  0x13, 0xb3, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xb9,
-  0xb3, 0x31, 0x4b, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x1e,
-  0x3c, 0x23, 0xb3, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60,
-  0xb3, 0xb3, 0x31, 0x0b, 0xb1, 0x00, 0xce, 0x60, 0xcc, 0xcd, 0x46, 0x13,
-  0x02, 0x60, 0x34, 0x41, 0x08, 0x4e, 0x18, 0xe3, 0x84, 0x31, 0x2a, 0xc0,
-  0xb1, 0xab, 0x21, 0xd8, 0x0b, 0x8c, 0x9a, 0x25, 0x80, 0x89, 0xe1, 0x86,
-  0xdd, 0xd1, 0x33, 0x30, 0x98, 0x65, 0xf8, 0x07, 0x98, 0x08, 0x0c, 0xbf,
-  0xf4, 0x2b, 0x3e, 0xc3, 0x11, 0xbf, 0xb3, 0x5f, 0xc4, 0x37, 0xcb, 0x00,
-  0x12, 0x23, 0x11, 0x18, 0x7f, 0x81, 0x4f, 0x7c, 0x2c, 0x18, 0xe8, 0x73,
-  0xc1, 0x30, 0x17, 0x18, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14,
-  0x41, 0x6a, 0x3a, 0xdc, 0x10, 0x88, 0x1a, 0x18, 0xcc, 0x32, 0x84, 0x84,
-  0x48, 0x04, 0x36, 0x90, 0x18, 0x7c, 0x66, 0x09, 0x4e, 0xc2, 0x46, 0x8c,
-  0x88, 0xcf, 0x2c, 0xc1, 0x49, 0x0c, 0x47, 0xa8, 0x0f, 0x89, 0x09, 0xdf,
-  0x2c, 0x03, 0x49, 0x9c, 0x44, 0x60, 0xeb, 0x53, 0x62, 0xf1, 0xb1, 0xc0,
-  0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08,
-  0xe2, 0x53, 0xc4, 0xab, 0xe9, 0x70, 0x43, 0xd0, 0x6a, 0x60, 0x30, 0xcb,
-  0x50, 0x12, 0x26, 0x11, 0x58, 0x8b, 0x0d, 0xf1, 0x99, 0x25, 0x38, 0x09,
-  0x23, 0x60, 0x0c, 0x3e, 0xb3, 0x04, 0x27, 0x31, 0xd0, 0x62, 0x68, 0x21,
-  0x81, 0x89, 0x04, 0x51, 0x12, 0x82, 0x49, 0xa8, 0xdf, 0x48, 0x5c, 0x30,
-  0x8c, 0xbd, 0xd8, 0x8c, 0xc5, 0x67, 0x38, 0xe2, 0x7e, 0x68, 0x8c, 0xf8,
-  0x66, 0x19, 0x50, 0x62, 0x25, 0x02, 0xab, 0x31, 0xfc, 0x89, 0x8f, 0x05,
-  0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x30, 0xe4, 0x63, 0x45,
-  0x10, 0x9f, 0x22, 0x7a, 0x4d, 0x87, 0x1b, 0x82, 0x5d, 0x03, 0x83, 0x59,
-  0x86, 0x94, 0x50, 0x89, 0xc0, 0x86, 0x1e, 0x83, 0xcf, 0x2c, 0xc1, 0x4b,
-  0x98, 0x8e, 0x11, 0xf1, 0x99, 0x25, 0x78, 0x89, 0xe1, 0x08, 0x11, 0xda,
-  0x31, 0xe1, 0x9b, 0x65, 0x60, 0x89, 0x97, 0x08, 0x6c, 0x84, 0x78, 0x2c,
-  0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0x20, 0x92,
-  0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x40, 0x37, 0x1d, 0x6e, 0x08, 0xcc, 0x0d,
-  0x0c, 0x66, 0x19, 0x5a, 0xc2, 0x25, 0x02, 0x23, 0xb3, 0x21, 0x3e, 0xb3,
-  0x04, 0x2f, 0x61, 0x44, 0x9a, 0xc1, 0x67, 0x96, 0xe0, 0x25, 0x06, 0x5a,
-  0x0c, 0x2d, 0x25, 0x30, 0x95, 0x20, 0x5a, 0x42, 0x70, 0x09, 0x1d, 0x0c,
-  0x56, 0xe2, 0x82, 0x61, 0x2e, 0x30, 0xea, 0x36, 0xa3, 0xae, 0xc6, 0x86,
-  0x39, 0xb3, 0x1a, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0xaa, 0x7c, 0x2b, 0xb7, 0x50, 0x9b, 0xb7, 0xd1, 0x84,
-  0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86,
-  0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x07, 0xe4, 0xd8,
-  0x2d, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0x42, 0xae,
-  0xdd, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x47, 0xe4,
-  0xdc, 0x2d, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0xd8, 0x40,
-  0xae, 0xdd, 0x56, 0x2d, 0xd0, 0x37, 0x5d, 0xc3, 0xb7, 0xd1, 0x84, 0x00,
-  0x18, 0x4d, 0x10, 0x82, 0x13, 0xc6, 0x38, 0x61, 0x8c, 0x0a, 0xc4, 0xed,
-  0x6a, 0x08, 0xf6, 0x02, 0xa3, 0x66, 0x09, 0x60, 0x62, 0xa0, 0xc5, 0xd0,
-  0x8d, 0x7d, 0x60, 0xeb, 0x40, 0x1f, 0x6c, 0xa2, 0x1f, 0x84, 0x97, 0x60,
-  0xeb, 0xc0, 0x1f, 0x46, 0x0c, 0x0c, 0x00, 0x04, 0xc1, 0xe0, 0x80, 0xb9,
-  0x76, 0xeb, 0x2b, 0xa3, 0x91, 0x9e, 0x89, 0x8f, 0x09, 0x81, 0x7c, 0x2c,
-  0xf8, 0x19, 0xf8, 0x58, 0xe1, 0x12, 0xf1, 0xb1, 0x22, 0x90, 0x8f, 0x05,
-  0x30, 0x01, 0x9f, 0x11, 0x03, 0x03, 0x00, 0x41, 0x30, 0x38, 0x6e, 0x8e,
-  0xde, 0x46, 0xcb, 0x84, 0x22, 0x3e, 0x16, 0x08, 0xf2, 0xb1, 0xe0, 0x80,
-  0xcf, 0x05, 0xe3, 0x5c, 0x30, 0x40, 0x09, 0xf3, 0x86, 0x17, 0x0c, 0x30,
-  0x62, 0xe0, 0x00, 0x20, 0x08, 0x06, 0xdd, 0xcd, 0xed, 0x5b, 0xb9, 0xf9,
-  0x9a, 0xcc, 0x0d, 0xc1, 0xbf, 0xfd, 0x9b, 0xbc, 0xc1, 0xdc, 0x2c, 0xc1,
-  0x08, 0x0d, 0x37, 0xbc, 0x18, 0xcd, 0x81, 0xc1, 0x2c, 0x83, 0x4c, 0x8c,
-  0x50, 0x30, 0x62, 0x60, 0x00, 0x20, 0x08, 0x06, 0x07, 0xd8, 0xf9, 0x5b,
-  0x6b, 0x59, 0x20, 0x6f, 0xf0, 0x19, 0x31, 0x30, 0x00, 0x10, 0x04, 0x83,
-  0x43, 0xec, 0x40, 0xce, 0xb5, 0x2c, 0xa0, 0x37, 0xf8, 0x8c, 0x26, 0x94,
-  0xdb, 0x30, 0xdc, 0x10, 0xe0, 0x1c, 0x18, 0xcc, 0x32, 0xcc, 0x44, 0x4d,
-  0x04, 0xc3, 0x11, 0x45, 0xbb, 0x0d, 0xdf, 0x19, 0xc3, 0x0c, 0x37, 0x04,
-  0xe3, 0x46, 0x06, 0x35, 0x04, 0x3a, 0x1c, 0x71, 0xc4, 0xdb, 0xf0, 0x55,
-  0x20, 0xe8, 0x25, 0xc3, 0x0c, 0x37, 0x04, 0xe6, 0x46, 0x06, 0x15, 0x0c,
-  0x3a, 0xcb, 0x40, 0x13, 0x69, 0x11, 0x9c, 0xaa, 0x0d, 0x73, 0xbb, 0x35,
-  0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x54, 0x6c, 0x87, 0x73, 0xf4,
-  0x66, 0x76, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83,
-  0x30, 0x9a, 0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0xcf, 0xdc, 0xfd, 0xdc, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0xf0, 0xd0, 0x1d, 0xd8, 0x31, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x4f, 0xdd, 0x85, 0x9d, 0x44, 0x04, 0x23, 0x06, 0x0a, 0x00,
-  0x82, 0x60, 0xb0, 0xcd, 0x1d, 0xd8, 0xf9, 0x5b, 0xd0, 0x76, 0x2d, 0xb7,
-  0x76, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0x27, 0x8c, 0x71, 0xc2,
-  0x18, 0x15, 0xd4, 0xdc, 0xd5, 0x10, 0xec, 0x05, 0x46, 0xcd, 0x12, 0xa4,
-  0xc5, 0x70, 0x83, 0x87, 0x77, 0x60, 0x30, 0xcb, 0x60, 0x13, 0x37, 0x11,
-  0x94, 0xbf, 0xa1, 0x1d, 0x5c, 0x60, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20,
-  0x18, 0x2c, 0xa3, 0x97, 0x76, 0x64, 0xc0, 0x72, 0x23, 0x06, 0x07, 0x00,
-  0x82, 0x60, 0xb0, 0x90, 0x5e, 0xda, 0x05, 0xc2, 0x05, 0xc3, 0x54, 0xc8,
-  0xb5, 0x1d, 0x5c, 0x60, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c,
-  0xa8, 0xe7, 0x76, 0x67, 0x10, 0x73, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0xb0, 0xa4, 0x9e, 0xdb, 0x05, 0xc2, 0x05, 0xc3, 0x5c, 0x60, 0xd4, 0x1d,
-  0x46, 0x9d, 0xba, 0x0d, 0x73, 0xfb, 0x35, 0xcc, 0x11, 0xc3, 0x1c, 0x31,
-  0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x54, 0xae, 0xa7, 0x77, 0x36,
-  0x87, 0x7a, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83,
-  0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x4f, 0xed, 0x85, 0x5e, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0xf0, 0xd8, 0x9e, 0xe8, 0x25, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0xcf, 0xed, 0x8d, 0x5e, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00,
-  0x82, 0x60, 0xb0, 0xd5, 0x9e, 0xe8, 0x81, 0x5d, 0xf0, 0x7a, 0x6f, 0xd7,
-  0x7a, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0x27, 0x8c, 0x71, 0xc2,
-  0x18, 0x15, 0xdc, 0xdd, 0xd5, 0x10, 0xec, 0x05, 0x46, 0xcd, 0x12, 0xa4,
-  0xc5, 0x70, 0x03, 0x28, 0xe4, 0x1e, 0x18, 0xcc, 0x32, 0xe0, 0x44, 0x5a,
-  0x04, 0x76, 0x73, 0x39, 0x17, 0x9f, 0xe1, 0x88, 0x52, 0xd0, 0x39, 0xe2,
-  0x9b, 0x65, 0xc8, 0x09, 0x9e, 0x08, 0x6c, 0xe7, 0x4c, 0x21, 0x3e, 0x16,
-  0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0xc0, 0x90, 0x8f, 0x15,
-  0x41, 0x7c, 0x8a, 0x18, 0x3f, 0x1d, 0x6e, 0x08, 0xc2, 0x0f, 0x0c, 0x66,
-  0x19, 0x74, 0x62, 0x27, 0x02, 0x1b, 0xc6, 0x0e, 0x3e, 0xb3, 0x04, 0x60,
-  0x61, 0x62, 0x47, 0xc4, 0x67, 0x96, 0x00, 0x2c, 0x86, 0x23, 0x60, 0x61,
-  0xec, 0x84, 0x6f, 0x96, 0xa1, 0x27, 0xc0, 0x22, 0xb0, 0x58, 0x20, 0xbb,
-  0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x82, 0x48,
-  0x3e, 0x56, 0x04, 0xf1, 0x29, 0xc2, 0xfd, 0x74, 0xb8, 0x21, 0x60, 0x3f,
-  0x30, 0x98, 0x65, 0xf0, 0x89, 0x9f, 0x08, 0x8c, 0xed, 0x86, 0xf8, 0xcc,
-  0x12, 0x80, 0x85, 0x11, 0x6f, 0x07, 0x9f, 0x59, 0x02, 0xb0, 0x18, 0x68,
-  0x31, 0x34, 0x9d, 0xc0, 0x76, 0x82, 0xf0, 0x09, 0xe1, 0x27, 0xec, 0x82,
-  0x27, 0x2e, 0x18, 0xc6, 0xdc, 0x4e, 0xee, 0xe2, 0x33, 0x1c, 0xc1, 0x0b,
-  0x73, 0x47, 0x7c, 0xb3, 0x0c, 0x61, 0x41, 0x16, 0x81, 0xd1, 0x5d, 0x2f,
-  0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x16, 0x18,
-  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xfc, 0xa7, 0xc3, 0x0d, 0x81, 0xfe,
-  0x81, 0xc1, 0x2c, 0x83, 0x58, 0x8c, 0x45, 0x60, 0x03, 0xdf, 0xc1, 0x67,
-  0x96, 0x00, 0x2d, 0x2c, 0xef, 0x88, 0xf8, 0xcc, 0x12, 0xa0, 0xc5, 0x70,
-  0xc4, 0x39, 0xe8, 0x9d, 0xf0, 0xcd, 0x32, 0x94, 0x05, 0x5a, 0x04, 0x86,
-  0x0e, 0x7b, 0x17, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46,
-  0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x9c, 0x60, 0xa0, 0xc3,
-  0x0d, 0x41, 0x09, 0x06, 0x60, 0x30, 0xcb, 0x60, 0x16, 0x67, 0x11, 0xd8,
-  0xe8, 0x0d, 0xf1, 0x99, 0x25, 0x40, 0x0b, 0x23, 0x50, 0x0f, 0x3e, 0xb3,
-  0x04, 0x68, 0x31, 0xd0, 0x62, 0x68, 0x62, 0x81, 0x8d, 0x05, 0x61, 0x16,
-  0xc2, 0x59, 0x90, 0x07, 0x59, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0xdd, 0x66,
-  0xd4, 0xd1, 0xdd, 0x30, 0x57, 0x6a, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3,
-  0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0x85, 0x83, 0x01, 0x09, 0x06,
-  0xe0, 0x27, 0x83, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68,
-  0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0xc1, 0xf3, 0x83, 0xc1, 0x0a, 0x06, 0x09, 0x11, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0xc1, 0x03, 0x86, 0x01, 0x0b, 0x06, 0x09, 0x11,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x13, 0x86, 0x41, 0x0b, 0x06,
-  0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0xf6, 0x83, 0x01,
-  0x0b, 0x06, 0xea, 0x17, 0xe4, 0x60, 0x90, 0x7f, 0x37, 0x18, 0x8c, 0x26,
-  0x04, 0xc0, 0x68, 0x82, 0x10, 0x9c, 0x30, 0xc6, 0x09, 0x63, 0x54, 0x10,
-  0x82, 0xc1, 0xd5, 0x10, 0xec, 0x05, 0x46, 0xcd, 0x12, 0xa4, 0xc5, 0x40,
-  0x8b, 0xa1, 0x1b, 0x34, 0xa1, 0xfe, 0xc1, 0x4c, 0xd8, 0x84, 0x4d, 0x08,
-  0x68, 0xa1, 0xfe, 0xc1, 0x4d, 0xcc, 0x32, 0xa8, 0x05, 0x5b, 0xa8, 0xc4,
-  0x70, 0xc4, 0x4b, 0xd8, 0xdf, 0xf0, 0x1d, 0x4c, 0x0c, 0x33, 0xdc, 0x10,
-  0xb0, 0x1f, 0x19, 0xd4, 0x10, 0xe8, 0x70, 0x04, 0x4c, 0xe8, 0xdf, 0xf0,
-  0x55, 0x20, 0xe8, 0xc9, 0xc4, 0x30, 0xc3, 0x0d, 0xc1, 0xfb, 0x91, 0x41,
-  0x05, 0x83, 0xce, 0x32, 0xac, 0x05, 0x68, 0x04, 0x37, 0x7b, 0xc3, 0x1c,
-  0xb9, 0x0d, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x55, 0x1d, 0x06,
-  0x61, 0x18, 0xf4, 0xdf, 0x1b, 0x06, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20,
-  0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x0f, 0x1f, 0x06, 0x68, 0x18, 0x1c, 0x44,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x4f, 0x1f, 0x06, 0x69, 0x18,
-  0x30, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x8f, 0x1f, 0x06,
-  0x6a, 0x18, 0x48, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x1b,
-  0x1f, 0x06, 0x69, 0x18, 0x9c, 0x60, 0x10, 0xd8, 0x61, 0x60, 0x83, 0x01,
-  0x1d, 0x06, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0x27, 0x8c, 0x71,
-  0xc2, 0x18, 0x15, 0xf8, 0x60, 0x70, 0x35, 0x04, 0x7b, 0x81, 0x51, 0xb3,
-  0x04, 0xa0, 0x31, 0xdc, 0x70, 0x16, 0xa1, 0x18, 0x80, 0xc1, 0x2c, 0x43,
-  0x5b, 0xb8, 0x45, 0x50, 0x27, 0x18, 0xc4, 0x61, 0x00, 0x17, 0x18, 0x35,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x0b, 0x2b, 0x06, 0x72, 0x18, 0xb8,
-  0x45, 0x0d, 0x06, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xb4, 0x62,
-  0x20, 0x87, 0x41, 0x20, 0x5c, 0x30, 0x4c, 0xa9, 0x60, 0x60, 0x87, 0x01,
-  0x5c, 0x60, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c, 0xb1, 0x18,
-  0xdc, 0x61, 0x00, 0x17, 0x3a, 0x18, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82,
-  0xc1, 0x22, 0x8b, 0xc1, 0x1d, 0x06, 0x81, 0x70, 0xc1, 0x30, 0x17, 0x18,
-  0x75, 0x87, 0x51, 0x37, 0x7f, 0xc3, 0x1c, 0xc9, 0x0d, 0x73, 0xc4, 0x30,
-  0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xd5, 0x2d, 0x06,
-  0xa3, 0x18, 0xfc, 0x60, 0x10, 0x8b, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xe3, 0x8b, 0x81, 0x2a, 0x06, 0x09,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xf3, 0x8b, 0xc1, 0x2a,
-  0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x03, 0x8e,
-  0x01, 0x2b, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1,
-  0xe6, 0x8b, 0xc1, 0x2a, 0x06, 0x69, 0x18, 0x04, 0xb8, 0x18, 0xe0, 0x61,
-  0x60, 0x8b, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x09, 0x63,
-  0x9c, 0x30, 0x46, 0x05, 0xa0, 0x18, 0x5c, 0x0d, 0xc1, 0x5e, 0x60, 0xd4,
-  0x2c, 0x01, 0x68, 0x0c, 0x37, 0xa4, 0x86, 0x38, 0x06, 0x60, 0x30, 0xcb,
-  0xf0, 0x16, 0xa0, 0x11, 0x18, 0x18, 0x06, 0x62, 0x18, 0xc4, 0x67, 0x38,
-  0xe2, 0x35, 0xc6, 0x30, 0x20, 0xbe, 0x59, 0x06, 0xb8, 0x98, 0x8b, 0xc0,
-  0xc8, 0x30, 0x80, 0x8d, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e,
-  0x30, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x82, 0x1d, 0x03,
-  0x1d, 0x6e, 0x08, 0xd4, 0x31, 0x00, 0x83, 0x59, 0x86, 0xb8, 0x90, 0x8b,
-  0xc0, 0x06, 0x36, 0x0c, 0xe0, 0x33, 0x4b, 0x70, 0x17, 0xb6, 0x86, 0x01,
-  0x11, 0x9f, 0x59, 0x82, 0xbb, 0x18, 0x8e, 0xd0, 0x0d, 0x36, 0x0c, 0x84,
-  0x6f, 0x96, 0x81, 0x2e, 0xee, 0x22, 0xb0, 0xdd, 0x68, 0xc3, 0x20, 0x3e,
-  0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0x20, 0x92, 0x8f,
-  0x15, 0x41, 0x7c, 0x8a, 0xb8, 0xc7, 0x40, 0x87, 0x1b, 0x82, 0x7a, 0x0c,
-  0xc0, 0x60, 0x96, 0xa1, 0x2e, 0xec, 0x22, 0xb0, 0x3a, 0x0c, 0x86, 0xf8,
-  0xcc, 0x12, 0xdc, 0x85, 0x11, 0x78, 0x18, 0xc0, 0x67, 0x96, 0xe0, 0x2e,
-  0x06, 0x5a, 0x0c, 0x2d, 0x2e, 0x30, 0xb9, 0x20, 0xea, 0x42, 0xb0, 0x0b,
-  0xd5, 0x99, 0x8b, 0x0b, 0x86, 0xb1, 0x3b, 0x0c, 0xf6, 0x30, 0x88, 0xcf,
-  0x70, 0x44, 0x79, 0xf0, 0x61, 0x40, 0x7c, 0xb3, 0x0c, 0x78, 0xb1, 0x17,
-  0x81, 0xf5, 0x61, 0x60, 0x1e, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3,
-  0x5c, 0x60, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0x49,
-  0x06, 0x3a, 0xdc, 0x10, 0x8c, 0x64, 0x00, 0x06, 0xb3, 0x0c, 0x79, 0xa1,
-  0x17, 0x81, 0x0d, 0xa5, 0x18, 0xc0, 0x67, 0x96, 0xe0, 0x2f, 0x4c, 0x14,
-  0x03, 0x22, 0x3e, 0xb3, 0x04, 0x7f, 0x31, 0x1c, 0x01, 0x1f, 0xa3, 0x18,
-  0x08, 0xdf, 0x2c, 0x03, 0x5f, 0xfc, 0x45, 0x60, 0xf1, 0x41, 0x8a, 0x41,
-  0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65, 0x41, 0x24,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x01, 0x93, 0x81, 0x0e, 0x37, 0x04, 0x2e,
-  0x19, 0x80, 0xc1, 0x2c, 0x43, 0x5f, 0xf8, 0x45, 0x60, 0xac, 0x18, 0x0c,
-  0xf1, 0x99, 0x25, 0xf8, 0x0b, 0x23, 0x62, 0x31, 0x80, 0xcf, 0x2c, 0xc1,
-  0x5f, 0x0c, 0xb4, 0x18, 0x5a, 0x5e, 0x60, 0x7a, 0x41, 0xf4, 0x85, 0xe0,
-  0x17, 0xf6, 0xb3, 0x17, 0x17, 0x0c, 0x73, 0x81, 0x51, 0xb7, 0x19, 0x75,
-  0x7d, 0x18, 0x0c, 0x73, 0xae, 0x37, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x54, 0x61, 0x19, 0xb4, 0x64, 0x90,
-  0x8e, 0xc1, 0x4e, 0x06, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3,
-  0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x0f, 0x5a, 0x06, 0x34, 0x19, 0x24, 0x44, 0x30, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x4f, 0x5a, 0x06, 0x35, 0x19, 0x24, 0x44,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x8f, 0x5a, 0x06, 0x36, 0x19,
-  0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x1b, 0x5a, 0x06,
-  0x35, 0x19, 0xcc, 0x63, 0x10, 0x88, 0x65, 0x20, 0x92, 0x01, 0x58, 0x06,
-  0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0x27, 0x8c, 0x71, 0xc2, 0x18,
-  0x15, 0xa8, 0x64, 0x70, 0x35, 0x04, 0x7b, 0x81, 0x51, 0xb3, 0x04, 0xa0,
-  0x31, 0xd0, 0x62, 0xe8, 0xc6, 0x5a, 0xf0, 0xa7, 0xa0, 0x16, 0x36, 0xd1,
-  0x16, 0xc2, 0x5f, 0xf0, 0xa7, 0xe0, 0x16, 0x86, 0x23, 0x20, 0x19, 0xc0,
-  0x67, 0x96, 0x21, 0x34, 0x46, 0x83, 0x46, 0x86, 0x23, 0x02, 0x90, 0x0c,
-  0x86, 0xef, 0x84, 0x61, 0x86, 0x1b, 0x02, 0x7b, 0x0c, 0xc8, 0xa0, 0x86,
-  0x40, 0x87, 0x23, 0x74, 0x84, 0x24, 0x83, 0xe1, 0xab, 0x40, 0xd0, 0xe3,
-  0x91, 0x61, 0x86, 0x1b, 0x82, 0x7c, 0x0c, 0xc8, 0xa0, 0x82, 0x41, 0x67,
-  0x19, 0x44, 0xe3, 0x36, 0x82, 0xeb, 0xc5, 0x60, 0x98, 0x73, 0xbf, 0x61,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xa0, 0xfa, 0xcb, 0x60, 0x2d, 0x83,
-  0x93, 0x0c, 0xf2, 0x32, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18,
-  0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90, 0x11, 0x03, 0x04,
-  0x00, 0x41, 0x30, 0x78, 0x4c, 0x33, 0x90, 0xcb, 0xe0, 0x20, 0x82, 0x11,
-  0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0x4e, 0x33, 0x98, 0xcb, 0x80, 0x21,
-  0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0x50, 0x33, 0xa0, 0xcb,
-  0x40, 0x22, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0xd8, 0x4c, 0x33,
-  0x98, 0xcb, 0x20, 0x26, 0x83, 0x00, 0x34, 0x03, 0xb0, 0x0c, 0xfc, 0x32,
-  0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x38, 0x61, 0x8c, 0x13, 0xc6,
-  0xa8, 0x00, 0x2d, 0x83, 0xab, 0x21, 0xd8, 0x0b, 0x8c, 0x9a, 0x25, 0xb8,
-  0x8d, 0xe1, 0x86, 0x38, 0x59, 0xcd, 0x00, 0x0c, 0x66, 0x19, 0x48, 0xa3,
-  0x34, 0x82, 0x8a, 0xc9, 0x60, 0x2f, 0x03, 0xb8, 0xc0, 0xa8, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0x58, 0x6c, 0x33, 0xe0, 0xcb, 0x00, 0x0c, 0x7e,
-  0x32, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0xe5, 0x36, 0x03, 0xbe,
-  0x0c, 0x02, 0xe1, 0x82, 0x61, 0x8a, 0x26, 0x03, 0xd0, 0x0c, 0xe0, 0x02,
-  0xa3, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xd9, 0xcd, 0x20, 0x34,
-  0x03, 0x3d, 0x21, 0xcb, 0x60, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x16,
-  0xde, 0x0c, 0x42, 0x33, 0x08, 0x84, 0x0b, 0x86, 0xb9, 0xc0, 0xa8, 0x3b,
-  0x8c, 0xba, 0x7e, 0x0c, 0x86, 0x39, 0x17, 0x0c, 0x86, 0x39, 0x62, 0x98,
-  0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x2a, 0x3c, 0x83,
-  0xd6, 0x0c, 0xd2, 0x32, 0xd8, 0xcd, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13,
-  0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x41, 0xcf, 0x80, 0x36, 0x83, 0x84,
-  0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x49, 0xcf, 0xa0, 0x36,
-  0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x51, 0xcf,
-  0xc0, 0x36, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60,
-  0x43, 0xcf, 0xa0, 0x36, 0x83, 0xb9, 0x0c, 0x02, 0xf1, 0x0c, 0x44, 0x33,
-  0x00, 0xcf, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0xe0, 0x84, 0x31,
-  0x4e, 0x18, 0xa3, 0x02, 0xd5, 0x0c, 0xae, 0x86, 0x60, 0x2f, 0x30, 0x6a,
-  0x96, 0xe0, 0x36, 0x86, 0x1b, 0x66, 0x85, 0x3d, 0x03, 0x30, 0x98, 0x65,
-  0x30, 0x8d, 0xdb, 0x08, 0x4c, 0x2d, 0x03, 0xb6, 0x0c, 0xe2, 0x33, 0x1c,
-  0x11, 0x0a, 0x6d, 0x19, 0x10, 0xdf, 0x2c, 0xc3, 0x69, 0xa8, 0x46, 0x60,
-  0x6e, 0x19, 0x88, 0x42, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17,
-  0x18, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x61, 0x9f, 0x81,
-  0x0e, 0x37, 0x04, 0xf4, 0x19, 0x80, 0xc1, 0x2c, 0x03, 0x6a, 0xa4, 0x46,
-  0x60, 0x83, 0x5d, 0x06, 0xf0, 0x99, 0x25, 0x70, 0x0d, 0xab, 0xcb, 0x80,
-  0x88, 0xcf, 0x2c, 0x81, 0x6b, 0x0c, 0x47, 0xb0, 0x82, 0x5d, 0x06, 0xc2,
-  0x37, 0xcb, 0xb0, 0x1a, 0xae, 0x11, 0x58, 0x2b, 0xdc, 0x65, 0x10, 0x1f,
-  0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0x59, 0x10, 0xc9, 0xc7,
-  0x8a, 0x20, 0x3e, 0x45, 0x84, 0x68, 0xa0, 0xc3, 0x0d, 0xc1, 0x7f, 0x06,
-  0x60, 0x30, 0xcb, 0xc0, 0x1a, 0xad, 0x11, 0xd8, 0x5f, 0x06, 0x43, 0x7c,
-  0x66, 0x09, 0x5c, 0xc3, 0x08, 0xd1, 0x0c, 0xe0, 0x33, 0x4b, 0xe0, 0x1a,
-  0x03, 0x2d, 0x86, 0x86, 0x1a, 0x58, 0x6a, 0x10, 0xac, 0x21, 0xb4, 0x86,
-  0x5b, 0xa8, 0xc6, 0x05, 0xc3, 0x58, 0x68, 0x06, 0xa5, 0x19, 0xc4, 0x67,
-  0x38, 0xe2, 0x5d, 0x4c, 0x33, 0x20, 0xbe, 0x59, 0x86, 0xd7, 0x90, 0x8d,
-  0xc0, 0x4e, 0x33, 0x80, 0x97, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61,
-  0x2e, 0x30, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xe2, 0x45,
-  0x03, 0x1d, 0x6e, 0x08, 0x5a, 0x34, 0x00, 0x83, 0x59, 0x06, 0xd8, 0x88,
-  0x8d, 0xc0, 0x86, 0xd7, 0x0c, 0xe0, 0x33, 0x4b, 0x60, 0x1b, 0xc6, 0x9a,
-  0x01, 0x11, 0x9f, 0x59, 0x02, 0xdb, 0x18, 0x8e, 0xd0, 0x97, 0xd6, 0x0c,
-  0x84, 0x6f, 0x96, 0x61, 0x36, 0x6c, 0x23, 0xb0, 0x7d, 0x71, 0xcd, 0x20,
-  0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0x20, 0x92,
-  0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xd0, 0xd1, 0x40, 0x87, 0x1b, 0x02, 0x1c,
-  0x0d, 0xc0, 0x60, 0x96, 0x81, 0x36, 0x6a, 0x23, 0x30, 0xdb, 0x0c, 0x86,
-  0xf8, 0xcc, 0x12, 0xd8, 0x86, 0x11, 0xbb, 0x19, 0xc0, 0x67, 0x96, 0xc0,
-  0x36, 0x06, 0x5a, 0x0c, 0x0d, 0x36, 0xb0, 0xd8, 0x20, 0x68, 0x43, 0xa8,
-  0x0d, 0x15, 0x93, 0x8d, 0x0b, 0x86, 0xb9, 0xc0, 0xa8, 0xdb, 0x8c, 0xba,
-  0xd3, 0x0c, 0x86, 0x39, 0x5c, 0x0c, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86,
-  0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x6a, 0x4d, 0x83, 0x1b, 0x0d,
-  0xe6, 0x33, 0x28, 0xd3, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60,
-  0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10,
-  0x00, 0x04, 0xc1, 0xe0, 0x91, 0xd3, 0xc0, 0x47, 0x83, 0x84, 0x08, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x99, 0xd3, 0xe0, 0x47, 0x83, 0x84,
-  0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xa1, 0xd3, 0x00, 0x4c,
-  0x83, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x60, 0x93, 0xd3,
-  0xe0, 0x47, 0x83, 0xfe, 0x0c, 0x02, 0x36, 0x0d, 0x58, 0x34, 0x50, 0xd3,
-  0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0xe0, 0x84, 0x31, 0x4e, 0x18,
-  0xa3, 0x02, 0x1a, 0x0d, 0xae, 0x86, 0x60, 0x2f, 0x30, 0x6a, 0x96, 0xe0,
-  0x36, 0x06, 0x5a, 0x0c, 0xdd, 0x10, 0x0d, 0x18, 0x16, 0x42, 0xc3, 0x26,
-  0x48, 0x43, 0xb0, 0x0d, 0x18, 0x16, 0x4a, 0xc3, 0xc4, 0xa6, 0x45, 0x03,
-  0xf8, 0xcc, 0x32, 0xe0, 0x86, 0x6e, 0xf8, 0xcc, 0x70, 0x44, 0xa0, 0xa2,
-  0xc1, 0xf0, 0x9d, 0x30, 0xcc, 0x70, 0x43, 0x00, 0xa2, 0x01, 0x19, 0xd4,
-  0x10, 0xe8, 0x70, 0x04, 0xd9, 0xb8, 0x68, 0x30, 0x7c, 0x15, 0x08, 0x7a,
-  0x66, 0x33, 0xcc, 0x70, 0x43, 0x30, 0xa2, 0x01, 0x19, 0x54, 0x30, 0xe8,
-  0x2c, 0x43, 0x6e, 0xb8, 0x47, 0x70, 0xe7, 0x19, 0x0c, 0x73, 0xf8, 0x18,
-  0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x55, 0xaa, 0x06, 0x75,
-  0x1a, 0xc4, 0x68, 0x30, 0xaa, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08,
-  0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0xc1, 0x03, 0xab, 0x01, 0x9f, 0x06, 0x07, 0x11,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x13, 0xab, 0x41, 0x9f, 0x06,
-  0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x23, 0xab, 0x81,
-  0x9f, 0x06, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x06,
-  0xab, 0x41, 0x9f, 0x06, 0x3b, 0x1a, 0x04, 0xaa, 0x1a, 0xa8, 0x69, 0x80,
-  0xaa, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x09, 0x63, 0x9c,
-  0x30, 0x46, 0x05, 0x72, 0x1a, 0x5c, 0x0d, 0xc1, 0x5e, 0x60, 0xd4, 0x2c,
-  0x81, 0x7b, 0x0c, 0x37, 0xec, 0x4d, 0xad, 0x06, 0x60, 0x30, 0xcb, 0xb0,
-  0x1b, 0xbc, 0x11, 0xd4, 0x8e, 0x06, 0xa5, 0x1a, 0xc0, 0x05, 0x46, 0x8d,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x02, 0xae, 0x81, 0xa9, 0x06, 0x60,
-  0x90, 0xa6, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c, 0xe1, 0x1a,
-  0x98, 0x6a, 0x10, 0x08, 0x17, 0x0c, 0x53, 0x3e, 0x1a, 0xa8, 0x6a, 0x00,
-  0x17, 0x18, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x4b, 0xb9, 0x06,
-  0xab, 0x1a, 0x90, 0x8e, 0x9b, 0x06, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0xb0, 0x98, 0x6b, 0xb0, 0xaa, 0x41, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x46,
-  0xdd, 0x61, 0xd4, 0x9d, 0x68, 0x30, 0xcc, 0xe1, 0x64, 0x30, 0xcc, 0x11,
-  0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x54, 0xeb,
-  0x1a, 0xdc, 0x6a, 0x30, 0xa7, 0x41, 0xb9, 0x06, 0xa3, 0x09, 0x01, 0x30,
-  0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24,
-  0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x8f, 0xbc, 0x06, 0xbe, 0x1a,
-  0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xcf, 0xbc, 0x06,
-  0xbf, 0x1a, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x0f,
-  0xbd, 0x06, 0xe0, 0x1a, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08,
-  0x06, 0x9b, 0xbc, 0x06, 0xbf, 0x1a, 0xf4, 0x69, 0x10, 0xb0, 0x6b, 0xc0,
-  0xaa, 0x81, 0xba, 0x06, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0x27,
-  0x8c, 0x71, 0xc2, 0x18, 0x15, 0xd0, 0x6a, 0x70, 0x35, 0x04, 0x7b, 0x81,
-  0x51, 0xb3, 0x04, 0xee, 0x31, 0xdc, 0xd0, 0x3b, 0xf6, 0x1a, 0x80, 0xc1,
-  0x2c, 0x43, 0x6f, 0xb8, 0x47, 0x60, 0x74, 0x1a, 0xd8, 0x69, 0x10, 0x9f,
-  0xe1, 0x88, 0x50, 0xb8, 0xd3, 0x80, 0xf8, 0x66, 0x19, 0x7c, 0x23, 0x3c,
-  0x02, 0xc3, 0xd3, 0x40, 0x14, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86,
-  0xb9, 0xc0, 0x28, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x90,
-  0x0d, 0x74, 0xb8, 0x21, 0xf0, 0xd7, 0x00, 0x0c, 0x66, 0x19, 0x7e, 0x03,
-  0x3c, 0x02, 0x1b, 0x40, 0x35, 0x80, 0xcf, 0x2c, 0x41, 0x79, 0xd8, 0x9f,
-  0x06, 0x44, 0x7c, 0x66, 0x09, 0xca, 0x63, 0x38, 0x82, 0x15, 0x40, 0x35,
-  0x10, 0xbe, 0x59, 0x06, 0xf1, 0x28, 0x8f, 0xc0, 0x5a, 0x21, 0x54, 0x83,
-  0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x82, 0x48,
-  0x3e, 0x56, 0x04, 0xf1, 0x29, 0x62, 0x65, 0x03, 0x1d, 0x6e, 0x08, 0x52,
-  0x36, 0x00, 0x83, 0x59, 0x86, 0xf1, 0x20, 0x8f, 0xc0, 0x52, 0x35, 0x18,
-  0xe2, 0x33, 0x4b, 0x50, 0x1e, 0x46, 0xb0, 0x6a, 0x00, 0x9f, 0x59, 0x82,
-  0xf2, 0x18, 0x68, 0x31, 0xb4, 0xdf, 0xc0, 0xc0, 0x83, 0x18, 0x0f, 0x81,
-  0x3c, 0xdc, 0x22, 0x3c, 0x2e, 0x18, 0xc6, 0x56, 0x35, 0x78, 0xd5, 0x20,
-  0x3e, 0xc3, 0x11, 0xf9, 0x03, 0xab, 0x01, 0xf1, 0xcd, 0x32, 0x98, 0x47,
-  0x7a, 0x04, 0x16, 0xab, 0x81, 0xfe, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17,
-  0x0c, 0x73, 0x81, 0x51, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11,
-  0x39, 0x1b, 0xe8, 0x70, 0x43, 0x70, 0xb3, 0x01, 0x18, 0xcc, 0x32, 0x9c,
-  0x07, 0x7a, 0x04, 0x36, 0xe4, 0x6a, 0x00, 0x9f, 0x59, 0x82, 0xf6, 0x30,
-  0x5b, 0x0d, 0x88, 0xf8, 0xcc, 0x12, 0xb4, 0xc7, 0x70, 0x04, 0x09, 0xdd,
-  0x6a, 0x20, 0x7c, 0xb3, 0x0c, 0xea, 0xd1, 0x1e, 0x81, 0x95, 0x10, 0xae,
-  0x06, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05,
-  0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0xd9, 0x06, 0x3a, 0xdc, 0x10,
-  0x88, 0x6d, 0x00, 0x06, 0xb3, 0x0c, 0xeb, 0xc1, 0x1e, 0x81, 0x81, 0x6b,
-  0x30, 0xc4, 0x67, 0x96, 0xa0, 0x3d, 0x8c, 0x28, 0xd7, 0x00, 0x3e, 0xb3,
-  0x04, 0xed, 0x31, 0xd0, 0x62, 0x68, 0xe7, 0x81, 0xa1, 0x07, 0xb1, 0x1e,
-  0x02, 0x7b, 0xf8, 0x60, 0x90, 0x1e, 0x17, 0x0c, 0x73, 0x81, 0x51, 0xb7,
-  0x19, 0x75, 0xb1, 0x1a, 0x0c, 0x73, 0xe2, 0x19, 0x0c, 0x73, 0xc4, 0x30,
-  0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x55, 0xdd, 0x06,
-  0x61, 0x1b, 0xf4, 0x6b, 0xf0, 0xb6, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xc3, 0xb7, 0x01, 0xda, 0x06, 0x09,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xd3, 0xb7, 0x41, 0xda,
-  0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xe3, 0xb7,
-  0x81, 0xda, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1,
-  0xc6, 0xb7, 0x41, 0xda, 0x06, 0x27, 0x1b, 0x04, 0x76, 0x1b, 0xd8, 0x6c,
-  0x40, 0xb7, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x09, 0x63,
-  0x9c, 0x30, 0x46, 0x05, 0x3e, 0x1b, 0x5c, 0x0d, 0xc1, 0x5e, 0x60, 0xd4,
-  0x2c, 0x81, 0x7b, 0x0c, 0xb4, 0x18, 0xba, 0x91, 0x1b, 0x64, 0x2e, 0xe0,
-  0x86, 0x4d, 0xec, 0x86, 0xd0, 0x1e, 0x64, 0x2e, 0xf0, 0xc6, 0x2c, 0xc3,
-  0x7b, 0xc4, 0xc7, 0x19, 0x0d, 0x47, 0xac, 0xd1, 0xcc, 0x06, 0xc3, 0x77,
-  0x6c, 0x34, 0xcc, 0x70, 0x43, 0x90, 0xb2, 0x01, 0x19, 0xd4, 0x10, 0xe8,
-  0x70, 0x84, 0x1b, 0xdd, 0x6c, 0x30, 0x7c, 0x15, 0x08, 0x7a, 0x70, 0x34,
-  0xcc, 0x70, 0x43, 0xc0, 0xb2, 0x01, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0x03,
-  0x7c, 0x94, 0x48, 0x70, 0xf0, 0x1a, 0x0c, 0x73, 0x21, 0x1a, 0x0c, 0x33,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x95, 0xec, 0x06, 0x7e, 0x1b, 0xe8,
-  0x6c, 0xc0, 0xba, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68,
-  0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0xc1, 0x93, 0xbb, 0x41, 0xe9, 0x06, 0x07, 0x11, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0xc1, 0xa3, 0xbb, 0x81, 0xe9, 0x06, 0x0c, 0x11,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xb3, 0xbb, 0xc1, 0xe9, 0x06,
-  0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x96, 0xbb, 0x81,
-  0xe9, 0x06, 0x64, 0x1b, 0x04, 0xb3, 0x1b, 0xcc, 0x6d, 0x10, 0xbb, 0xc1,
-  0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x09, 0x63, 0x9c, 0x30, 0x46,
-  0x05, 0x7b, 0x1b, 0x5c, 0x0d, 0xc1, 0x5e, 0x60, 0xd4, 0x2c, 0x41, 0x89,
-  0x0c, 0x37, 0x90, 0x92, 0xef, 0x06, 0x60, 0x30, 0xcb, 0x20, 0x1f, 0xf3,
-  0x11, 0x14, 0xd9, 0x06, 0xae, 0x1b, 0xc0, 0x05, 0x46, 0x8d, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0xc1, 0x92, 0xbe, 0xc1, 0xeb, 0x06, 0xaa, 0x24, 0xb7,
-  0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c, 0xea, 0x1b, 0xbc, 0x6e,
-  0x10, 0x08, 0x17, 0x0c, 0x53, 0x67, 0x1b, 0xcc, 0x6e, 0x00, 0x17, 0x18,
-  0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x8b, 0xfb, 0x06, 0xb4, 0x1b,
-  0xb8, 0xd2, 0xdd, 0x06, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xbc,
-  0x6f, 0x40, 0xbb, 0x41, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0xdd, 0x61,
-  0xd4, 0xc1, 0x6c, 0x30, 0xcc, 0x85, 0x69, 0x30, 0xcc, 0x11, 0xc3, 0x1c,
-  0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x54, 0xf4, 0x1b, 0x80,
-  0x6f, 0xc0, 0xb7, 0x81, 0xfb, 0x06, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20,
-  0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0xcf, 0xfe, 0x06, 0xe7, 0x1b, 0x24, 0x44,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x0f, 0xff, 0x06, 0xe8, 0x1b,
-  0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x4f, 0xff, 0x06,
-  0xe9, 0x1b, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0xdb,
-  0xfe, 0x06, 0xe8, 0x1b, 0x98, 0x6e, 0x10, 0xd4, 0x6f, 0x50, 0xbb, 0xc1,
-  0xfc, 0x06, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0x27, 0x8c, 0x71,
-  0xc2, 0x18, 0x15, 0xf4, 0x6e, 0x70, 0x35, 0x04, 0x7b, 0x81, 0x51, 0xb3,
-  0x04, 0x25, 0x32, 0xdc, 0x60, 0x4e, 0xff, 0x1b, 0x80, 0xc1, 0x2c, 0x03,
-  0x7d, 0x94, 0x48, 0x60, 0x7d, 0x1b, 0xfc, 0x6d, 0x10, 0x9f, 0xe1, 0x88,
-  0x75, 0x02, 0xdd, 0x80, 0xf8, 0x66, 0x19, 0xea, 0x03, 0x3f, 0x02, 0x0b,
-  0xdd, 0x80, 0x9d, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0,
-  0x28, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x14, 0x0e, 0x74,
-  0xb8, 0x21, 0x38, 0xe1, 0x00, 0x0c, 0x66, 0x19, 0xec, 0xe3, 0x3e, 0x02,
-  0x1b, 0x52, 0x37, 0x80, 0xcf, 0x2c, 0x01, 0x7f, 0x18, 0xea, 0x06, 0x44,
-  0x7c, 0x66, 0x09, 0xf8, 0x63, 0x38, 0xc2, 0x9e, 0x52, 0x37, 0x10, 0xbe,
-  0x59, 0x86, 0xfc, 0xe0, 0x8f, 0xc0, 0xee, 0x49, 0x75, 0x83, 0xf8, 0x58,
-  0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x82, 0x48, 0x3e, 0x56,
-  0x04, 0xf1, 0x29, 0x82, 0x86, 0x03, 0x1d, 0x6e, 0x08, 0x64, 0x38, 0x00,
-  0x83, 0x59, 0x06, 0xfd, 0xd8, 0x8f, 0xc0, 0x64, 0x37, 0x18, 0xe2, 0x33,
-  0x4b, 0xc0, 0x1f, 0x46, 0xd4, 0x6e, 0x00, 0x9f, 0x59, 0x02, 0xfe, 0x18,
-  0x68, 0x31, 0x34, 0xfb, 0xc0, 0xee, 0x83, 0xd0, 0x0f, 0x61, 0x3f, 0x44,
-  0x34, 0xc0, 0x8f, 0x0b, 0x86, 0x31, 0xda, 0x0d, 0x70, 0x37, 0x88, 0xcf,
-  0x70, 0xc4, 0x48, 0xe5, 0x6e, 0x40, 0x7c, 0xb3, 0x0c, 0xfd, 0x01, 0x22,
-  0x81, 0xe9, 0x6e, 0x40, 0x52, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3,
-  0x5c, 0x60, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84, 0x18,
-  0x07, 0x3a, 0xdc, 0x10, 0x80, 0x71, 0x00, 0x06, 0xb3, 0x0c, 0xfe, 0xf1,
-  0x1f, 0x81, 0x0d, 0xe2, 0x1b, 0xc0, 0x67, 0x96, 0x80, 0x44, 0xec, 0x77,
-  0x03, 0x22, 0x3e, 0xb3, 0x04, 0x24, 0x32, 0x1c, 0xe1, 0x52, 0xe0, 0x1b,
-  0x08, 0xdf, 0x2c, 0x43, 0x88, 0x90, 0x48, 0x60, 0x2f, 0x15, 0xbe, 0x41,
-  0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65, 0x41, 0x24,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xd1, 0xc6, 0x81, 0x0e, 0x37, 0x04, 0x6b,
-  0x1c, 0x80, 0xc1, 0x2c, 0x83, 0x88, 0x8c, 0x48, 0x60, 0xe9, 0x1b, 0x0c,
-  0xf1, 0x99, 0x25, 0x20, 0x11, 0x23, 0xdc, 0x37, 0x80, 0xcf, 0x2c, 0x01,
-  0x89, 0x0c, 0xb4, 0x18, 0x9a, 0x7f, 0x60, 0xff, 0x41, 0x88, 0x88, 0x30,
-  0x22, 0x72, 0x1a, 0x80, 0xc8, 0x05, 0xc3, 0x5c, 0x60, 0xd4, 0x6d, 0x46,
-  0x9d, 0xee, 0x06, 0xc3, 0xdc, 0xba, 0x06, 0xc3, 0x1c, 0x31, 0xcc, 0x11,
-  0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0xe5, 0xc7, 0x81, 0x1a,
-  0x07, 0x26, 0x1c, 0xe0, 0x71, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42,
-  0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0xf0, 0x94, 0x72, 0x10, 0xc7, 0x41, 0x42, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0x98, 0x72, 0x20, 0xc7, 0x41,
-  0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0x9c, 0x72, 0x30,
-  0xc7, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0x95,
-  0x72, 0x20, 0xc7, 0x01, 0x0c, 0x07, 0xc1, 0x1f, 0x07, 0x3f, 0x1c, 0xf4,
-  0x71, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x70, 0xc2, 0x18, 0x27,
-  0x8c, 0x51, 0xc1, 0x19, 0x07, 0x57, 0x43, 0xb0, 0x17, 0x18, 0x35, 0x4b,
-  0x50, 0x22, 0x03, 0x2d, 0x86, 0x6e, 0xc0, 0x87, 0x1d, 0x0e, 0xef, 0x61,
-  0x13, 0xf2, 0x21, 0x90, 0x88, 0x1d, 0x0e, 0xf3, 0x31, 0xcb, 0x60, 0x22,
-  0x28, 0x02, 0x57, 0xc3, 0x11, 0x75, 0xc5, 0xc3, 0xc1, 0xf0, 0x9d, 0x5d,
-  0x0d, 0x33, 0xdc, 0x10, 0xc8, 0x70, 0x40, 0x06, 0x35, 0x04, 0x3a, 0x1c,
-  0x71, 0x57, 0x60, 0x1c, 0x0c, 0x5f, 0x05, 0x82, 0x5e, 0x5e, 0x0d, 0x33,
-  0xdc, 0x10, 0xd4, 0x70, 0x40, 0x06, 0x15, 0x0c, 0x3a, 0xcb, 0x70, 0x22,
-  0x3c, 0x12, 0x5c, 0xfe, 0x06, 0xc3, 0x9c, 0xca, 0x06, 0xc3, 0x8c, 0x18,
-  0x1c, 0x00, 0x08, 0x82, 0x41, 0xb5, 0xcb, 0xc1, 0x29, 0x07, 0x63, 0x1c,
-  0xd4, 0x72, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30,
-  0x08, 0xa3, 0x09, 0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0xf0, 0x88, 0x73, 0xe0, 0xca, 0xc1, 0x41, 0x04, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0xf0, 0x8c, 0x73, 0xf0, 0xca, 0x01, 0x43, 0x04, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0x90, 0x73, 0x00, 0xcb, 0x81, 0x44,
-  0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0x89, 0x73, 0xf0, 0xca,
-  0x41, 0x1b, 0x07, 0x01, 0x2f, 0x07, 0x7c, 0x1c, 0xe8, 0x72, 0x30, 0x9a,
-  0x10, 0x00, 0xa3, 0x09, 0x42, 0x70, 0xc2, 0x18, 0x27, 0x8c, 0x51, 0x01,
-  0x29, 0x07, 0x57, 0x43, 0xb0, 0x17, 0x18, 0x35, 0x4b, 0xc0, 0x23, 0xc3,
-  0x0d, 0xad, 0x75, 0xce, 0x01, 0x18, 0xcc, 0x32, 0xa4, 0x88, 0x8a, 0x04,
-  0xd5, 0xc6, 0xc1, 0x2d, 0x07, 0x70, 0x81, 0x51, 0x23, 0x06, 0x07, 0x00,
-  0x82, 0x60, 0xb0, 0xc8, 0x73, 0x80, 0xcb, 0x01, 0x6d, 0xed, 0x71, 0x30,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xcb, 0x3c, 0x07, 0xb8, 0x1c, 0x04,
-  0xc2, 0x05, 0xc3, 0x14, 0x1c, 0x07, 0xbc, 0x1c, 0xc0, 0x05, 0x46, 0x8d,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x72, 0xcf, 0x41, 0x2f, 0x07, 0xb7,
-  0x05, 0xca, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c, 0xf8, 0x1c,
-  0xf4, 0x72, 0x10, 0x08, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x77, 0x18, 0x75,
-  0x39, 0x1c, 0x0c, 0x73, 0x6a, 0x1b, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c,
-  0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x55, 0x3f, 0x07, 0xe9, 0x1c,
-  0x94, 0x72, 0x70, 0xcf, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1,
-  0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0xc1, 0x43, 0xd2, 0x01, 0x3c, 0x07, 0x09, 0x11, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x53, 0xd2, 0x41, 0x3c, 0x07, 0x09,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x63, 0xd2, 0x81, 0x3c,
-  0x07, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0x46, 0xd2,
-  0x41, 0x3c, 0x07, 0xaf, 0x1c, 0x04, 0xfe, 0x1c, 0xf8, 0x72, 0xc0, 0xcf,
-  0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x09, 0x63, 0x9c, 0x30,
-  0x46, 0x05, 0xe6, 0x1c, 0x5c, 0x0d, 0xc1, 0x5e, 0x60, 0xd4, 0x2c, 0x01,
-  0x8f, 0x0c, 0x37, 0xbc, 0x17, 0x4a, 0x07, 0x60, 0x30, 0xcb, 0xb0, 0x22,
-  0x3c, 0x12, 0x98, 0x29, 0x07, 0xa8, 0x1c, 0xc4, 0x67, 0x38, 0xa2, 0xbe,
-  0x52, 0x39, 0x20, 0xbe, 0x59, 0x06, 0x16, 0x79, 0x91, 0xc0, 0x54, 0x39,
-  0xb0, 0xaf, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca,
-  0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x42, 0xa6, 0x03, 0x1d, 0x6e,
-  0x08, 0x60, 0x3a, 0x00, 0x83, 0x59, 0x86, 0x16, 0x71, 0x91, 0xc0, 0x06,
-  0x59, 0x0e, 0xe0, 0x33, 0x4b, 0x30, 0x23, 0x16, 0xcb, 0x01, 0x11, 0x9f,
-  0x59, 0x82, 0x19, 0x19, 0x8e, 0x00, 0x31, 0x59, 0x0e, 0x84, 0x6f, 0x96,
-  0x01, 0x46, 0x66, 0x24, 0xb0, 0x10, 0x9b, 0xe5, 0x20, 0x3e, 0x16, 0x38,
-  0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41,
-  0x7c, 0x8a, 0xe8, 0xe9, 0x40, 0x87, 0x1b, 0x82, 0x9d, 0x0e, 0xc0, 0x60,
-  0x96, 0x21, 0x46, 0x64, 0x24, 0xb0, 0x5d, 0x0e, 0x86, 0xf8, 0xcc, 0x12,
-  0xcc, 0x88, 0x11, 0xbe, 0x1c, 0xc0, 0x67, 0x96, 0x60, 0x46, 0x06, 0x5a,
-  0x0c, 0xad, 0x45, 0x30, 0x17, 0x21, 0x62, 0x44, 0x90, 0x11, 0x1b, 0x0e,
-  0x5e, 0xe4, 0x82, 0x61, 0xac, 0x97, 0x83, 0x70, 0x0e, 0xe2, 0x33, 0x1c,
-  0xc1, 0x62, 0xe2, 0x1c, 0x10, 0xdf, 0x2c, 0x03, 0x8d, 0xdc, 0x48, 0x60,
-  0xe3, 0x1c, 0xb4, 0x58, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17,
-  0x18, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xb1, 0xd6, 0x81,
-  0x0e, 0x37, 0x04, 0x69, 0x1d, 0x80, 0xc1, 0x2c, 0x43, 0x8d, 0xd8, 0x48,
-  0x60, 0xc3, 0x3a, 0x07, 0xf0, 0x99, 0x25, 0xd8, 0x11, 0x43, 0xe7, 0x80,
-  0x88, 0xcf, 0x2c, 0xc1, 0x8e, 0x0c, 0x47, 0xdc, 0x58, 0x3a, 0x07, 0xc2,
-  0x37, 0xcb, 0x80, 0x23, 0x3b, 0x12, 0x18, 0x8e, 0xa9, 0x73, 0x10, 0x1f,
-  0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0x59, 0x10, 0xc9, 0xc7,
-  0x8a, 0x20, 0x3e, 0x45, 0xd8, 0x75, 0xa0, 0xc3, 0x0d, 0x01, 0x5d, 0x07,
-  0x60, 0x30, 0xcb, 0x90, 0x23, 0x3a, 0x12, 0x98, 0x3c, 0x07, 0x43, 0x7c,
-  0x66, 0x09, 0x76, 0xc4, 0x88, 0x7b, 0x0e, 0xe0, 0x33, 0x4b, 0xb0, 0x23,
-  0x03, 0x2d, 0x86, 0x56, 0x23, 0x98, 0x8d, 0x10, 0x39, 0x22, 0xe8, 0x08,
-  0x29, 0x07, 0x37, 0x72, 0xc1, 0x30, 0x17, 0x18, 0x75, 0x9b, 0x51, 0x37,
-  0xce, 0xc1, 0x30, 0x47, 0xbf, 0xc1, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30,
-  0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x50, 0x9d, 0x76, 0x30, 0xd7, 0xc1,
-  0x4b, 0x07, 0xa1, 0x1d, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c,
-  0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x3c, 0xae, 0x1d, 0xe8, 0x75, 0x90, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x3c, 0xaf, 0x1d, 0xec, 0x75, 0x90, 0x10,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x3c, 0xb0, 0x1d, 0xf0, 0x75,
-  0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xae, 0x1d,
-  0xec, 0x75, 0x90, 0xd3, 0x41, 0x80, 0xda, 0x01, 0x5a, 0x07, 0xa6, 0x1d,
-  0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x9c, 0x30, 0xc6, 0x09, 0x63,
-  0x54, 0x00, 0xd7, 0xc1, 0xd5, 0x10, 0xec, 0x05, 0x46, 0xcd, 0x12, 0xf0,
-  0xc8, 0x40, 0x8b, 0xa1, 0x1b, 0x27, 0x82, 0xaa, 0x83, 0x89, 0xd8, 0x44,
-  0x8a, 0x08, 0x3b, 0x82, 0xaa, 0x83, 0x8a, 0xcc, 0x32, 0xf4, 0xc8, 0x8f,
-  0xe4, 0xd9, 0x70, 0x84, 0x19, 0x95, 0x75, 0x30, 0x7c, 0x77, 0x46, 0xc3,
-  0x0c, 0x37, 0x04, 0x3b, 0x1d, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0x80,
-  0x5a, 0x5a, 0x07, 0xc3, 0x57, 0x81, 0xa0, 0x27, 0x6a, 0xc3, 0x0c, 0x37,
-  0x04, 0x3e, 0x1d, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xf8, 0xc8, 0x9c,
-  0x04, 0x27, 0xd2, 0xc1, 0x30, 0x37, 0xc3, 0xc1, 0x30, 0x23, 0x06, 0x07,
-  0x00, 0x82, 0x60, 0x50, 0x91, 0x77, 0x00, 0xdb, 0x01, 0x5b, 0x07, 0xbe,
-  0x1d, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2,
-  0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x3c, 0xeb, 0x1d, 0xdc, 0x76, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x3c, 0xec, 0x1d, 0xe0, 0x76, 0xc0, 0x10, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x3c, 0xed, 0x1d, 0xe4, 0x76, 0x20, 0x11, 0xc1,
-  0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x6c, 0xeb, 0x1d, 0xe0, 0x76, 0x60,
-  0xd7, 0x41, 0x50, 0xde, 0x41, 0x69, 0x07, 0xe3, 0x1d, 0x8c, 0x26, 0x04,
-  0xc0, 0x68, 0x82, 0x10, 0x9c, 0x30, 0xc6, 0x09, 0x63, 0x54, 0xd0, 0xda,
-  0xc1, 0xd5, 0x10, 0xec, 0x05, 0x46, 0xcd, 0x12, 0xcc, 0xc9, 0x70, 0x83,
-  0xad, 0xc1, 0x77, 0x00, 0x06, 0xb3, 0x0c, 0x60, 0x12, 0x26, 0x41, 0xd9,
-  0x75, 0x00, 0xde, 0x01, 0x5c, 0x60, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20,
-  0x18, 0x2c, 0xfb, 0x1d, 0x84, 0x77, 0x50, 0x4a, 0xa4, 0x1d, 0x8c, 0x18,
-  0x1c, 0x00, 0x08, 0x82, 0xc1, 0xc2, 0xdf, 0x41, 0x78, 0x07, 0x81, 0x70,
-  0xc1, 0x30, 0x95, 0xd7, 0x41, 0x79, 0x07, 0x70, 0x81, 0x51, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0xb0, 0x80, 0x78, 0x60, 0xde, 0x01, 0xb8, 0xa5,
-  0x76, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x4b, 0x88, 0x07, 0xe6,
-  0x1d, 0x04, 0xc2, 0x05, 0xc3, 0x5c, 0x60, 0xd4, 0x1d, 0x46, 0x9d, 0x58,
-  0x07, 0xc3, 0xdc, 0x1c, 0x07, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c,
-  0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0x65, 0xe2, 0x81, 0x7c, 0x07, 0xae,
-  0x1d, 0x80, 0x78, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a,
-  0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0xf0, 0xb4, 0x78, 0x90, 0xdf, 0x41, 0x42, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0xf0, 0xb8, 0x78, 0xa0, 0xdf, 0x41, 0x42, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0xbc, 0x78, 0xb0, 0xdf, 0x41,
-  0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0xb5, 0x78, 0xa0,
-  0xdf, 0x01, 0x6e, 0x07, 0xc1, 0x89, 0x07, 0xe7, 0x1d, 0x94, 0x78, 0x30,
-  0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x70, 0xc2, 0x18, 0x27, 0x8c, 0x51,
-  0xc1, 0x7b, 0x07, 0x57, 0x43, 0xb0, 0x17, 0x18, 0x35, 0x4b, 0x30, 0x27,
-  0xc3, 0x0d, 0xf8, 0x16, 0xe3, 0x01, 0x18, 0xcc, 0x32, 0x88, 0xc9, 0x9c,
-  0x04, 0xf6, 0xda, 0x41, 0x6c, 0x07, 0xf1, 0x19, 0x8e, 0x30, 0x27, 0xd9,
-  0x0e, 0x88, 0x6f, 0x96, 0x61, 0x4c, 0xcc, 0x24, 0xb0, 0xd9, 0x0e, 0xce,
-  0x29, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0xc0,
-  0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xd8, 0xf1, 0x40, 0x87, 0x1b, 0x82,
-  0x1c, 0x0f, 0xc0, 0x60, 0x96, 0x81, 0x4c, 0xca, 0x24, 0xb0, 0x61, 0xb7,
-  0x03, 0xf8, 0xcc, 0x12, 0xa8, 0x89, 0xe9, 0x76, 0x40, 0xc4, 0x67, 0x96,
-  0x40, 0x4d, 0x86, 0x23, 0xe2, 0x69, 0xb7, 0x03, 0xe1, 0x9b, 0x65, 0x38,
-  0x13, 0x35, 0x09, 0x4c, 0x9e, 0x78, 0x3b, 0x88, 0x8f, 0x05, 0x0e, 0x7d,
-  0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f,
-  0x22, 0xcc, 0x3c, 0xd0, 0xe1, 0x86, 0x80, 0xcc, 0x03, 0x30, 0x98, 0x65,
-  0x40, 0x93, 0x34, 0x09, 0x8c, 0xbc, 0x83, 0x21, 0x3e, 0xb3, 0x04, 0x6a,
-  0x62, 0xc4, 0x79, 0x07, 0xf0, 0x99, 0x25, 0x50, 0x93, 0x81, 0x16, 0x43,
-  0x23, 0x13, 0xac, 0x4c, 0x08, 0x34, 0x11, 0xd2, 0x04, 0x3f, 0x03, 0x33,
-  0xb9, 0x60, 0x18, 0x33, 0xef, 0x40, 0xbd, 0x83, 0xf8, 0x0c, 0x47, 0xd4,
-  0xdc, 0x7a, 0x07, 0xc4, 0x37, 0xcb, 0xb0, 0x26, 0x6e, 0x12, 0x18, 0x7b,
-  0x07, 0x36, 0x17, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46,
-  0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xd0, 0x79, 0xa0, 0xc3,
-  0x0d, 0x81, 0x9c, 0x07, 0x60, 0x30, 0xcb, 0xc0, 0x26, 0x6d, 0x12, 0xd8,
-  0x40, 0xdf, 0x01, 0x7c, 0x66, 0x09, 0xe4, 0xc4, 0xe2, 0x3b, 0x20, 0xe2,
-  0x33, 0x4b, 0x20, 0x27, 0xc3, 0x11, 0x60, 0x27, 0xdf, 0x81, 0xf0, 0xcd,
-  0x32, 0xbc, 0x89, 0x9c, 0x04, 0x16, 0x76, 0xf3, 0x1d, 0xc4, 0xc7, 0x02,
-  0x87, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x16, 0x44, 0xf2, 0xb1, 0x22,
-  0x88, 0x4f, 0x11, 0x7f, 0x1e, 0xe8, 0x70, 0x43, 0xd0, 0xe7, 0x01, 0x18,
-  0xcc, 0x32, 0xc0, 0x49, 0x9c, 0x04, 0xb6, 0xdf, 0xc1, 0x10, 0x9f, 0x59,
-  0x02, 0x39, 0x31, 0x02, 0xc4, 0x03, 0xf8, 0xcc, 0x12, 0xc8, 0xc9, 0x40,
-  0x8b, 0xa1, 0xb1, 0x09, 0xd6, 0x26, 0x04, 0x9c, 0x08, 0x71, 0x62, 0xeb,
-  0x81, 0x9b, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0xdd, 0x66, 0xd4, 0xb1, 0x77,
-  0x30, 0xcc, 0xf5, 0x73, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88,
-  0xc1, 0x01, 0x80, 0x20, 0x18, 0x54, 0xb0, 0x1e, 0xf0, 0x79, 0x80, 0xe3,
-  0x81, 0xaa, 0x07, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09,
-  0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0xcf, 0xad, 0x07, 0xa3, 0x1e, 0x24, 0x44, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0x0f, 0xae, 0x07, 0xa4, 0x1e, 0x24, 0x44, 0x30,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x4f, 0xae, 0x07, 0xa5, 0x1e, 0x24,
-  0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0xdb, 0xad, 0x07, 0xa4,
-  0x1e, 0x88, 0x79, 0x10, 0xc4, 0x7a, 0x10, 0xe7, 0xc1, 0xab, 0x07, 0xa3,
-  0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0x27, 0x8c, 0x71, 0xc2, 0x18, 0x15,
-  0xe4, 0x79, 0x70, 0x35, 0x04, 0x7b, 0x81, 0x51, 0xb3, 0x04, 0x73, 0x32,
-  0xd0, 0x62, 0xe8, 0x86, 0x8f, 0xe8, 0xf2, 0xd0, 0x23, 0x36, 0x01, 0x26,
-  0x82, 0x9c, 0xe8, 0xf2, 0x10, 0x26, 0xb3, 0x0c, 0x74, 0x62, 0x27, 0xa2,
-  0x37, 0x1c, 0x51, 0x46, 0x6e, 0x1e, 0x0c, 0xdf, 0x99, 0xd1, 0x30, 0xc3,
-  0x0d, 0x01, 0x99, 0x07, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0xa9, 0x27,
-  0xe7, 0xc1, 0xf0, 0x55, 0x20, 0xe8, 0xad, 0xde, 0x30, 0xc3, 0x0d, 0xc1,
-  0x99, 0x07, 0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c, 0x75, 0xa2, 0x2a, 0xc1,
-  0xad, 0x78, 0x30, 0xcc, 0xf1, 0x74, 0x30, 0xcc, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x54, 0xed, 0x1e, 0xe4, 0x7a, 0x50, 0xe7, 0xc1, 0xb9, 0x07,
-  0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a,
-  0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x0f,
-  0xbd, 0x07, 0xe0, 0x1e, 0x1c, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x4f, 0xbd, 0x07, 0xe1, 0x1e, 0x30, 0x44, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x8f, 0xbd, 0x07, 0xe2, 0x1e, 0x48, 0x44, 0x30, 0x62,
-  0xa0, 0x00, 0x20, 0x08, 0x06, 0x1b, 0xbd, 0x07, 0xe1, 0x1e, 0xfc, 0x79,
-  0x10, 0xb8, 0x7b, 0xe0, 0xea, 0x01, 0xbb, 0x07, 0xa3, 0x09, 0x01, 0x30,
-  0x9a, 0x20, 0x04, 0x27, 0x8c, 0x71, 0xc2, 0x18, 0x15, 0xd8, 0x7a, 0x70,
-  0x35, 0x04, 0x7b, 0x81, 0x51, 0xb3, 0x04, 0xaa, 0x32, 0xdc, 0xf0, 0x7b,
-  0xf9, 0x1e, 0x80, 0xc1, 0x2c, 0xc3, 0x9d, 0xe0, 0x49, 0x50, 0x7f, 0x1e,
-  0xa4, 0x7b, 0x00, 0x17, 0x18, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x0b, 0xc9, 0x07, 0xea, 0x1e, 0x90, 0x52, 0xab, 0x07, 0x23, 0x06, 0x07,
-  0x00, 0x82, 0x60, 0xb0, 0x94, 0x7c, 0xa0, 0xee, 0x41, 0x20, 0x5c, 0x30,
-  0x4c, 0x89, 0x7a, 0xe0, 0xee, 0x01, 0x5c, 0x60, 0xd4, 0x88, 0xc1, 0x01,
-  0x80, 0x20, 0x18, 0x2c, 0x29, 0x1f, 0xbc, 0x7b, 0x90, 0x7e, 0xb2, 0x1e,
-  0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0xa2, 0xf2, 0xc1, 0xbb, 0x07,
-  0x81, 0x70, 0xc1, 0x30, 0x17, 0x18, 0x75, 0x87, 0x51, 0xb7, 0xe6, 0xc1,
-  0x30, 0xc7, 0xd7, 0xc1, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06,
-  0x07, 0x00, 0x82, 0x60, 0x50, 0xbd, 0x7c, 0xb0, 0xef, 0xc1, 0xad, 0x07,
-  0x29, 0x1f, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c,
-  0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x3c, 0x36, 0x1f, 0x88, 0x7c, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x3c, 0x37, 0x1f, 0x8c, 0x7c, 0x90, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x3c, 0x38, 0x1f, 0x90, 0x7c, 0x90, 0x10,
-  0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x36, 0x1f, 0x8c, 0x7c,
-  0x10, 0xee, 0x41, 0x00, 0xf3, 0x01, 0xbc, 0x07, 0x2e, 0x1f, 0x8c, 0x26,
-  0x04, 0xc0, 0x68, 0x82, 0x10, 0x9c, 0x30, 0xc6, 0x09, 0x63, 0x54, 0x80,
-  0xef, 0xc1, 0xd5, 0x10, 0xec, 0x05, 0x46, 0xcd, 0x12, 0xa8, 0xca, 0x70,
-  0x43, 0x08, 0x06, 0x3a, 0x1f, 0x80, 0xc1, 0x2c, 0x43, 0x9e, 0xa8, 0x4a,
-  0x60, 0xb8, 0x1e, 0xe8, 0x7a, 0x10, 0x9f, 0xe1, 0x88, 0x72, 0xda, 0xf5,
-  0x80, 0xf8, 0x66, 0x19, 0xf4, 0xa4, 0x4f, 0x02, 0xe3, 0xf5, 0xc0, 0x9c,
-  0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x0c,
-  0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0xb2, 0x0f, 0x74, 0xb8, 0x21, 0x10,
-  0xfb, 0x00, 0x0c, 0x66, 0x19, 0xf6, 0x84, 0x4f, 0x02, 0x1b, 0xc8, 0x3d,
-  0x80, 0xcf, 0x2c, 0x41, 0xa8, 0xd8, 0xb8, 0x07, 0x44, 0x7c, 0x66, 0x09,
-  0x42, 0x65, 0x38, 0x02, 0x9e, 0xc8, 0x3d, 0x10, 0xbe, 0x59, 0x06, 0x3f,
-  0x09, 0x95, 0xc0, 0xe2, 0xa9, 0xdc, 0x83, 0xf8, 0x58, 0xe0, 0xd0, 0xe7,
-  0x82, 0x61, 0x2e, 0x30, 0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29,
-  0xe2, 0xed, 0x03, 0x1d, 0x6e, 0x08, 0xda, 0x3e, 0x00, 0x83, 0x59, 0x86,
-  0x3f, 0x01, 0x95, 0xc0, 0xda, 0x3d, 0x18, 0xe2, 0x33, 0x4b, 0x10, 0x2a,
-  0x46, 0xc0, 0x7b, 0x00, 0x9f, 0x59, 0x82, 0x50, 0x19, 0x68, 0x31, 0xb4,
-  0x3d, 0xc1, 0xf8, 0x84, 0xf8, 0x13, 0x01, 0x54, 0xec, 0x33, 0xe8, 0x93,
-  0x0b, 0x86, 0xb1, 0x77, 0x0f, 0xe6, 0x3d, 0x88, 0xcf, 0x70, 0x84, 0x0f,
-  0x06, 0xf4, 0x1e, 0x10, 0xdf, 0x2c, 0x83, 0xa8, 0x94, 0x4a, 0x60, 0xf5,
-  0x1e, 0xfc, 0x60, 0x10, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05,
-  0x46, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xf4, 0x7d, 0xa0,
-  0xc3, 0x0d, 0xc1, 0xde, 0x07, 0x60, 0x30, 0xcb, 0x30, 0x2a, 0xa4, 0x12,
-  0xd8, 0xd0, 0xef, 0x01, 0x7c, 0x66, 0x09, 0x52, 0xc5, 0xf4, 0x3d, 0x20,
-  0xe2, 0x33, 0x4b, 0x90, 0x2a, 0xc3, 0x11, 0x69, 0x18, 0xec, 0x7b, 0x20,
-  0x7c, 0xb3, 0x0c, 0xa6, 0x92, 0x2a, 0x81, 0xa9, 0x61, 0xc0, 0xef, 0x41,
-  0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65, 0x41, 0x24,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x81, 0xfa, 0x81, 0x0e, 0x37, 0x04, 0xa6,
-  0x1f, 0x80, 0xc1, 0x2c, 0xc3, 0xa9, 0xa0, 0x4a, 0x60, 0x24, 0x1f, 0x0c,
-  0xf1, 0x99, 0x25, 0x48, 0x15, 0x23, 0x52, 0x3e, 0x80, 0xcf, 0x2c, 0x41,
-  0xaa, 0x0c, 0xb4, 0x18, 0xda, 0xa8, 0x60, 0xa4, 0x42, 0x9c, 0x8a, 0x80,
-  0x2a, 0xe8, 0x28, 0x94, 0xca, 0x05, 0xc3, 0x5c, 0x60, 0xd4, 0x6d, 0x46,
-  0x5d, 0xbd, 0x07, 0xc3, 0x9c, 0x89, 0x07, 0xc3, 0x1c, 0x31, 0xcc, 0x11,
-  0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0x95, 0xfb, 0x41, 0xe9,
-  0x07, 0x61, 0x1f, 0xcc, 0x7e, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42,
-  0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0xf0, 0x80, 0x7f, 0xc0, 0xfa, 0x41, 0x42, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0x84, 0x7f, 0xd0, 0xfa, 0x41,
-  0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0x88, 0x7f, 0xe0,
-  0xfa, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0x81,
-  0x7f, 0xd0, 0xfa, 0xc1, 0xda, 0x07, 0x81, 0xee, 0x07, 0x7a, 0x1f, 0xe0,
-  0x7e, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x70, 0xc2, 0x18, 0x27,
-  0x8c, 0x51, 0x81, 0xe8, 0x07, 0x57, 0x43, 0xb0, 0x17, 0x18, 0x35, 0x4b,
-  0xa0, 0x2a, 0x03, 0x2d, 0x86, 0x6e, 0xd4, 0x09, 0xbb, 0x0f, 0x74, 0x62,
-  0x13, 0x77, 0x22, 0xa4, 0x0a, 0xbb, 0x0f, 0x78, 0x62, 0xae, 0x18, 0xe0,
-  0x7d, 0x00, 0x9f, 0x59, 0x86, 0x55, 0x69, 0x15, 0x56, 0x0c, 0x86, 0x23,
-  0x60, 0x31, 0xc0, 0xfb, 0x60, 0xf8, 0x2e, 0x16, 0x83, 0x61, 0x86, 0x1b,
-  0x02, 0xb7, 0x0f, 0xc8, 0xa0, 0x86, 0x40, 0x87, 0x23, 0x0a, 0xbe, 0x0f,
-  0x86, 0xaf, 0x02, 0x41, 0xef, 0x18, 0x66, 0xb8, 0x21, 0x88, 0xfb, 0x80,
-  0x0c, 0x2a, 0x18, 0x74, 0x96, 0x81, 0x55, 0xc2, 0x25, 0xb8, 0x9a, 0x0f,
-  0x86, 0x39, 0x33, 0x0f, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83,
-  0xea, 0xfe, 0x83, 0xf1, 0x0f, 0xfe, 0x3e, 0x88, 0xff, 0x60, 0x34, 0x21,
-  0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1,
-  0x88, 0x43, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xf1, 0xff, 0x40,
-  0xfd, 0x83, 0x83, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xf9,
-  0xff, 0x60, 0xfd, 0x03, 0x86, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
-  0xe0, 0x01, 0x41, 0x81, 0xfd, 0x03, 0x89, 0x08, 0x46, 0x0c, 0x14, 0x00,
-  0x04, 0xc1, 0x60, 0xf3, 0xff, 0x60, 0xfd, 0x83, 0xd4, 0x0f, 0x02, 0xfc,
-  0x0f, 0x70, 0x3f, 0xb0, 0xff, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84,
-  0xe0, 0x84, 0x31, 0x4e, 0x18, 0xa3, 0x02, 0xf0, 0x0f, 0xae, 0x86, 0x60,
-  0x2f, 0x30, 0x6a, 0x96, 0x20, 0x5c, 0x86, 0x1b, 0xd2, 0x31, 0x18, 0x41,
-  0x01, 0x0c, 0x66, 0x19, 0x5c, 0xe5, 0x55, 0x82, 0x4a, 0xfd, 0x60, 0xfe,
-  0x03, 0xb8, 0xc0, 0xa8, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0x5c,
-  0x50, 0xa0, 0xff, 0xe0, 0x1d, 0x83, 0xdb, 0x0f, 0x46, 0x0c, 0x0e, 0x00,
-  0x04, 0xc1, 0x60, 0x79, 0x41, 0x81, 0xfe, 0x83, 0x40, 0xb8, 0x60, 0x98,
-  0x62, 0xfd, 0x00, 0xff, 0x03, 0xb8, 0xc0, 0xa8, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0x58, 0x66, 0x50, 0xc8, 0xff, 0xa0, 0x0c, 0x78, 0x3f, 0x18,
-  0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x85, 0x06, 0x85, 0xfc, 0x0f, 0x02,
-  0xe1, 0x82, 0x61, 0x2e, 0x30, 0xea, 0x0e, 0xa3, 0xae, 0xee, 0x83, 0x61,
-  0xce, 0xd4, 0x83, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0xa0, 0xca, 0x41, 0xa1, 0x04, 0x85, 0xf0, 0x0f, 0x66,
-  0x50, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84,
-  0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0x78, 0xc0, 0x50, 0x60, 0x41, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0x78, 0xc2, 0x50, 0x68, 0x41, 0x21, 0x21, 0x82, 0x11, 0x03,
-  0x04, 0x00, 0x41, 0x30, 0x78, 0xc4, 0x50, 0x70, 0x41, 0x21, 0x21, 0x82,
-  0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0xd8, 0xc0, 0x50, 0x68, 0x41, 0x61,
-  0xfd, 0x83, 0x40, 0x07, 0x05, 0xfd, 0x0f, 0x70, 0x50, 0x18, 0x4d, 0x08,
-  0x80, 0xd1, 0x04, 0x21, 0x38, 0x61, 0x8c, 0x13, 0xc6, 0xa8, 0x40, 0x04,
-  0x85, 0xab, 0x21, 0xd8, 0x0b, 0x8c, 0x9a, 0x25, 0x08, 0x97, 0xe1, 0x86,
-  0x95, 0x0c, 0xc8, 0x50, 0x00, 0x83, 0x59, 0x06, 0x58, 0x09, 0x97, 0xc0,
-  0xc4, 0x3f, 0x20, 0xff, 0x20, 0x3e, 0xc3, 0x11, 0x30, 0x19, 0x94, 0x7f,
-  0x40, 0x7c, 0xb3, 0x0c, 0xb1, 0x42, 0x2b, 0x81, 0x99, 0x7f, 0x10, 0x93,
-  0x41, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65, 0x81,
-  0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xe1, 0x86, 0x82, 0x0e, 0x37, 0x04,
-  0x6c, 0x28, 0x80, 0xc1, 0x2c, 0x83, 0xac, 0xcc, 0x4a, 0x60, 0x83, 0xfb,
-  0x07, 0xf0, 0x99, 0x25, 0xc0, 0x15, 0x6b, 0xff, 0x80, 0x88, 0xcf, 0x2c,
-  0x01, 0xae, 0x0c, 0x47, 0xec, 0x64, 0xe0, 0xfe, 0x81, 0xf0, 0xcd, 0x32,
-  0xd4, 0x0a, 0xae, 0x04, 0xc6, 0x93, 0xc1, 0xfb, 0x07, 0xf1, 0xb1, 0xc0,
-  0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08,
-  0xe2, 0x53, 0x44, 0x1e, 0x0a, 0x3a, 0xdc, 0x10, 0xdc, 0xa1, 0x00, 0x06,
-  0xb3, 0x0c, 0xb6, 0x72, 0x2b, 0x81, 0xdd, 0x7f, 0x30, 0xc4, 0x67, 0x96,
-  0x00, 0x57, 0x8c, 0xd0, 0xff, 0x00, 0x3e, 0xb3, 0x04, 0xb8, 0x32, 0xd0,
-  0x62, 0x68, 0xb2, 0x82, 0xcd, 0x0a, 0x61, 0x2b, 0xc2, 0xad, 0xb0, 0xaa,
-  0x40, 0x2b, 0x17, 0x0c, 0x63, 0xf9, 0x1f, 0xf4, 0x7f, 0x10, 0x9f, 0xe1,
-  0x08, 0x5d, 0xf0, 0xff, 0x80, 0xf8, 0x66, 0x19, 0x72, 0x85, 0x57, 0x02,
-  0xfb, 0xff, 0x60, 0x17, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9,
-  0xc0, 0x28, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x53, 0x14,
-  0x74, 0xb8, 0x21, 0x28, 0x45, 0x01, 0x0c, 0x66, 0x19, 0x74, 0x65, 0x57,
-  0x02, 0x1b, 0x4e, 0x50, 0x80, 0xcf, 0x2c, 0x01, 0xb8, 0x18, 0x09, 0x0a,
-  0x44, 0x7c, 0x66, 0x09, 0xc0, 0x65, 0x38, 0xa2, 0x1c, 0x4a, 0x50, 0x10,
-  0xbe, 0x59, 0x86, 0x5e, 0x01, 0x97, 0xc0, 0xcc, 0xc1, 0x04, 0x85, 0xf8,
-  0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x82, 0x48, 0x3e,
-  0x56, 0x04, 0xf1, 0x29, 0x42, 0x16, 0x05, 0x1d, 0x6e, 0x08, 0x60, 0x51,
-  0x00, 0x83, 0x59, 0x06, 0x5f, 0xf9, 0x95, 0xc0, 0x5c, 0x50, 0x18, 0xe2,
-  0x33, 0x4b, 0x00, 0x2e, 0x46, 0xcc, 0xa0, 0x00, 0x9f, 0x59, 0x02, 0x70,
-  0x19, 0x68, 0x31, 0x34, 0x5d, 0xc1, 0x76, 0x85, 0xf0, 0x15, 0xe1, 0x57,
-  0xc0, 0x83, 0x57, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x6e, 0x33, 0xea, 0xfe,
-  0x3f, 0x18, 0xe6, 0x60, 0x3e, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xaa, 0x71, 0x14, 0x5e, 0x51, 0x58,
-  0x43, 0xa1, 0x17, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1,
-  0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x83, 0x47, 0x1d, 0x05, 0x5b, 0x14, 0x12, 0x22, 0x18, 0x31,
-  0x40, 0x00, 0x10, 0x04, 0x83, 0x67, 0x1d, 0x85, 0x5b, 0x14, 0x12, 0x22,
-  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x87, 0x1d, 0x05, 0x5c, 0x14,
-  0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x4d, 0x1d, 0x85,
-  0x5b, 0x14, 0xea, 0x50, 0x08, 0xc8, 0x51, 0x20, 0x45, 0x41, 0x1c, 0x85,
-  0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0x13, 0xc6, 0x38, 0x61, 0x8c,
-  0x0a, 0x58, 0x51, 0xb8, 0x1a, 0x82, 0xbd, 0xc0, 0xa8, 0x59, 0x82, 0x70,
-  0x19, 0x68, 0x31, 0x74, 0x83, 0x55, 0x40, 0x92, 0x58, 0x15, 0x9b, 0x70,
-  0x15, 0x01, 0x5c, 0x40, 0x92, 0x78, 0x95, 0x59, 0x06, 0x71, 0x21, 0x97,
-  0xda, 0x0c, 0x86, 0x23, 0x74, 0x33, 0x08, 0x45, 0x61, 0xf8, 0x6e, 0x37,
-  0x83, 0x61, 0x86, 0x1b, 0x82, 0x3b, 0x14, 0xc8, 0xa0, 0x86, 0x40, 0x87,
-  0x23, 0x5c, 0xa2, 0x14, 0x85, 0xe1, 0xab, 0x40, 0xd0, 0x83, 0x89, 0x61,
-  0x86, 0x1b, 0x02, 0x3d, 0x14, 0xc8, 0xa0, 0x82, 0x41, 0x67, 0x19, 0xc6,
-  0x05, 0x5f, 0x82, 0xf3, 0x41, 0x61, 0x98, 0x7b, 0xfb, 0x60, 0x98, 0x11,
-  0x83, 0x03, 0x00, 0x41, 0x30, 0xa8, 0x40, 0x52, 0x60, 0x47, 0x01, 0x15,
-  0x05, 0x7d, 0x14, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13,
-  0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x38, 0x64, 0xc4, 0x00, 0x01, 0x40,
-  0x10, 0x0c, 0x9e, 0x93, 0x14, 0xe6, 0x51, 0x38, 0x88, 0x60, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0x1e, 0x94, 0x14, 0xe8, 0x51, 0x60, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x9e, 0x94, 0x14, 0xea, 0x51, 0x90,
-  0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0x93, 0x14, 0xe8,
-  0x51, 0x90, 0x45, 0x21, 0x08, 0x49, 0x21, 0x1c, 0x85, 0x7f, 0x14, 0x46,
-  0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x4e, 0x18, 0xe3, 0x84, 0x31, 0x2a,
-  0x48, 0x47, 0xe1, 0x6a, 0x08, 0xf6, 0x02, 0xa3, 0x66, 0x09, 0xf0, 0x65,
-  0xb8, 0x41, 0x3e, 0x03, 0x96, 0x14, 0xc0, 0x60, 0x96, 0xa1, 0x5c, 0xcc,
-  0x25, 0x28, 0x59, 0x14, 0xf8, 0x51, 0x80, 0x0b, 0x8c, 0x1a, 0x31, 0x38,
-  0x00, 0x10, 0x04, 0x83, 0xe5, 0x26, 0x85, 0x7e, 0x14, 0xf2, 0x33, 0x00,
-  0x47, 0x61, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x16, 0x9c, 0x14, 0xfa,
-  0x51, 0x08, 0x84, 0x0b, 0x86, 0xa9, 0x5a, 0x14, 0x42, 0x52, 0x80, 0x0b,
-  0x8c, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x85, 0x27, 0x05, 0x91,
-  0x14, 0xdc, 0xa2, 0x1c, 0x85, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x58,
-  0x7a, 0x52, 0x10, 0x49, 0x21, 0x10, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0xee,
-  0x30, 0xea, 0xfc, 0x50, 0x18, 0xe6, 0x5e, 0x3f, 0x18, 0xe6, 0x88, 0x61,
-  0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x2a, 0xb1, 0x14,
-  0x5c, 0x52, 0x50, 0x47, 0x81, 0x27, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d,
-  0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x27, 0x2d, 0x85, 0x9a, 0x14, 0x12,
-  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x47, 0x2d, 0x05, 0x9b,
-  0x14, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x67, 0x2d,
-  0x85, 0x9b, 0x14, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83,
-  0x2d, 0x2d, 0x05, 0x9b, 0x14, 0xe8, 0x51, 0x08, 0xc6, 0x52, 0x18, 0x49,
-  0x21, 0x2c, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0x13, 0xc6,
-  0x38, 0x61, 0x8c, 0x0a, 0x56, 0x52, 0xb8, 0x1a, 0x82, 0xbd, 0xc0, 0xa8,
-  0x59, 0x02, 0x7c, 0x19, 0x6e, 0xa0, 0xd1, 0xa0, 0x2d, 0x05, 0x30, 0x98,
-  0x65, 0x38, 0x17, 0x7c, 0x09, 0x6c, 0x1d, 0x85, 0x76, 0x14, 0xe2, 0x33,
-  0x1c, 0xa1, 0xa3, 0x81, 0x3b, 0x0a, 0xc4, 0x37, 0xcb, 0x80, 0x2e, 0xeb,
-  0x12, 0xd8, 0x3b, 0x0a, 0x3b, 0x1a, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17,
-  0x0c, 0x73, 0x81, 0x51, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11,
-  0x77, 0x29, 0xe8, 0x70, 0x43, 0x50, 0x97, 0x02, 0x18, 0xcc, 0x32, 0xa4,
-  0x8b, 0xba, 0x04, 0x36, 0xdc, 0xa3, 0x00, 0x9f, 0x59, 0x82, 0x77, 0x31,
-  0x7b, 0x14, 0x88, 0xf8, 0xcc, 0x12, 0xbc, 0xcb, 0x70, 0x44, 0x99, 0x06,
-  0xf7, 0x28, 0x08, 0xdf, 0x2c, 0x03, 0xbb, 0xbc, 0x4b, 0x60, 0x66, 0x1a,
-  0xe0, 0xa3, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46,
-  0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x88, 0xa6, 0xa0, 0xc3,
-  0x0d, 0x01, 0x68, 0x0a, 0x60, 0x30, 0xcb, 0xd0, 0x2e, 0xee, 0x12, 0x18,
-  0x48, 0x0a, 0x43, 0x7c, 0x66, 0x09, 0xde, 0xc5, 0x88, 0x91, 0x14, 0xe0,
-  0x33, 0x4b, 0xf0, 0x2e, 0x03, 0x2d, 0x86, 0x96, 0x2e, 0x98, 0xba, 0x10,
-  0xed, 0x22, 0xb8, 0x0b, 0x38, 0x0b, 0xeb, 0x72, 0xc1, 0x30, 0x26, 0x92,
-  0x82, 0x49, 0x0a, 0xf1, 0x19, 0x8e, 0x18, 0x8f, 0x93, 0x14, 0x88, 0x6f,
-  0x96, 0x01, 0x5e, 0xe6, 0x25, 0x30, 0x94, 0x14, 0xc8, 0x23, 0x3e, 0x16,
-  0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0xc0, 0x90, 0x8f, 0x15,
-  0x41, 0x7c, 0x8a, 0x80, 0x4d, 0x41, 0x87, 0x1b, 0x02, 0xd7, 0x14, 0xc0,
-  0x60, 0x96, 0x21, 0x5e, 0xe4, 0x25, 0xb0, 0x01, 0x26, 0x05, 0xf8, 0xcc,
-  0x12, 0xdc, 0x8b, 0xb5, 0xa4, 0x40, 0xc4, 0x67, 0x96, 0xe0, 0x5e, 0x86,
-  0x23, 0xdc, 0xc3, 0x25, 0x05, 0xe1, 0x9b, 0x65, 0xa0, 0x97, 0x7b, 0x09,
-  0xec, 0x3d, 0x5e, 0x52, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6,
-  0x02, 0xa3, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x76, 0x53,
-  0xd0, 0xe1, 0x86, 0x20, 0x37, 0x05, 0x30, 0x98, 0x65, 0xa8, 0x17, 0x7b,
-  0x09, 0xec, 0x26, 0x85, 0x21, 0x3e, 0xb3, 0x04, 0xf7, 0x62, 0x04, 0x4f,
-  0x0a, 0xf0, 0x99, 0x25, 0xb8, 0x97, 0x81, 0x16, 0x43, 0x8b, 0x17, 0x4c,
-  0x5e, 0x88, 0x7a, 0x11, 0xec, 0x45, 0x7e, 0xe6, 0xe5, 0x82, 0x61, 0x2e,
-  0x30, 0xea, 0x36, 0xa3, 0x0e, 0x25, 0x85, 0x61, 0x2e, 0x07, 0x85, 0x61,
-  0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0xa0,
-  0x62, 0x4f, 0x01, 0x37, 0x05, 0xba, 0x14, 0xcc, 0x53, 0x18, 0x4d, 0x08,
-  0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28,
-  0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0xe6, 0x53, 0xf8,
-  0x4d, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x78, 0xe8,
-  0x53, 0x00, 0x4f, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0x78, 0xea, 0x53, 0x08, 0x4f, 0x21, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00,
-  0x41, 0x30, 0xd8, 0xe6, 0x53, 0x00, 0x4f, 0xc1, 0x2f, 0x85, 0xa0, 0x3d,
-  0x85, 0xd6, 0x14, 0xd6, 0x53, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21,
-  0x38, 0x61, 0x8c, 0x13, 0xc6, 0xa8, 0xa0, 0x36, 0x85, 0xab, 0x21, 0xd8,
-  0x0b, 0x8c, 0x9a, 0x25, 0xc0, 0x97, 0x81, 0x16, 0x43, 0x37, 0xc6, 0x45,
-  0x66, 0x09, 0x71, 0xb1, 0x89, 0x72, 0x11, 0xee, 0x45, 0x66, 0x09, 0x73,
-  0x99, 0x65, 0xc8, 0x97, 0x7d, 0xf1, 0xd5, 0x60, 0x38, 0x62, 0x15, 0x03,
-  0xd5, 0x14, 0x86, 0xef, 0x58, 0x31, 0x18, 0x66, 0xb8, 0x21, 0x00, 0x4d,
-  0x81, 0x0c, 0x6a, 0x08, 0x74, 0x38, 0xe2, 0x46, 0x5c, 0x53, 0x18, 0xbe,
-  0x0a, 0x04, 0xbd, 0x1c, 0x19, 0x66, 0xb8, 0x21, 0x18, 0x4d, 0x81, 0x0c,
-  0x2a, 0x18, 0x74, 0x96, 0x41, 0x5f, 0x5e, 0x26, 0xb8, 0xb3, 0x14, 0x86,
-  0x39, 0x3c, 0x14, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x2a,
-  0x45, 0x85, 0xfa, 0x14, 0x62, 0x53, 0x18, 0x51, 0x61, 0x34, 0x21, 0x00,
-  0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88,
-  0x43, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x81, 0x51, 0x81, 0x3f,
-  0x85, 0x83, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x89, 0x51,
-  0xa1, 0x3f, 0x05, 0x86, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0,
-  0x91, 0x51, 0xc1, 0x3f, 0x05, 0x89, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04,
-  0xc1, 0x60, 0x83, 0x51, 0xa1, 0x3f, 0x85, 0xdd, 0x14, 0x02, 0x15, 0x15,
-  0xd4, 0x53, 0x40, 0x51, 0x61, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0xe0,
-  0x84, 0x31, 0x4e, 0x18, 0xa3, 0x02, 0xf9, 0x14, 0xae, 0x86, 0x60, 0x2f,
-  0x30, 0x6a, 0x96, 0xe0, 0x65, 0x86, 0x1b, 0xf6, 0x35, 0xa8, 0x51, 0x01,
-  0x0c, 0x66, 0x19, 0xf8, 0xa5, 0x5f, 0x82, 0xda, 0x4d, 0xa1, 0x44, 0x05,
-  0xb8, 0xc0, 0xa8, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0xc0, 0x54,
-  0x30, 0x51, 0x41, 0x1d, 0x83, 0xf4, 0x14, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x60, 0x09, 0x53, 0xc1, 0x44, 0x85, 0x40, 0xb8, 0x60, 0x98, 0xf2,
-  0x4d, 0x41, 0x45, 0x05, 0xb8, 0xc0, 0xa8, 0x11, 0x83, 0x03, 0x00, 0x41,
-  0x30, 0x58, 0xca, 0x54, 0x58, 0x51, 0xe1, 0x4e, 0xdc, 0x53, 0x18, 0x31,
-  0x38, 0x00, 0x10, 0x04, 0x83, 0xc5, 0x4c, 0x85, 0x15, 0x15, 0x02, 0xe1,
-  0x82, 0x61, 0x2e, 0x30, 0xea, 0x0e, 0xa3, 0xee, 0x34, 0x85, 0x61, 0x0e,
-  0x17, 0x85, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00,
-  0x04, 0xc1, 0xa0, 0x5a, 0x53, 0xe1, 0x46, 0x85, 0xf9, 0x14, 0xca, 0x54,
-  0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1,
-  0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x78,
-  0xe4, 0x54, 0xf0, 0x51, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0x78, 0xe6, 0x54, 0xf8, 0x51, 0x21, 0x21, 0x82, 0x11, 0x03, 0x04,
-  0x00, 0x41, 0x30, 0x78, 0xe8, 0x54, 0x00, 0x53, 0x21, 0x21, 0x82, 0x11,
-  0x03, 0x05, 0x00, 0x41, 0x30, 0xd8, 0xe4, 0x54, 0xf8, 0x51, 0xa1, 0x3f,
-  0x85, 0x80, 0x4d, 0x05, 0x16, 0x15, 0xd4, 0x54, 0x18, 0x4d, 0x08, 0x80,
-  0xd1, 0x04, 0x21, 0x38, 0x61, 0x8c, 0x13, 0xc6, 0xa8, 0x80, 0x46, 0x85,
-  0xab, 0x21, 0xd8, 0x0b, 0x8c, 0x9a, 0x25, 0x78, 0x99, 0xe1, 0x86, 0x9e,
-  0x0d, 0xec, 0x54, 0x00, 0x83, 0x59, 0x06, 0x7f, 0x79, 0x99, 0xc0, 0xe8,
-  0x53, 0xb0, 0x4f, 0x21, 0x3e, 0xc3, 0x11, 0x2b, 0x19, 0xdc, 0xa7, 0x40,
-  0x7c, 0xb3, 0x0c, 0xff, 0x22, 0x32, 0x81, 0xe1, 0xa7, 0xc0, 0x92, 0x41,
-  0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x18, 0x65, 0x81, 0x21,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x01, 0xaa, 0x82, 0x0e, 0x37, 0x04, 0x7e,
-  0x2a, 0x80, 0xc1, 0x2c, 0x03, 0xc8, 0x84, 0x4c, 0x60, 0x03, 0x88, 0x0a,
-  0xf0, 0x99, 0x25, 0x30, 0x19, 0xfb, 0x4f, 0x81, 0x88, 0xcf, 0x2c, 0x81,
-  0xc9, 0x0c, 0x47, 0xd8, 0x64, 0x00, 0xa2, 0x82, 0xf0, 0xcd, 0x32, 0x8c,
-  0x8c, 0xc9, 0x04, 0x76, 0x93, 0x41, 0x88, 0x0a, 0xf1, 0xb1, 0xc0, 0xa1,
-  0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2,
-  0x53, 0xc4, 0xaa, 0x0a, 0x3a, 0xdc, 0x10, 0xa4, 0xaa, 0x00, 0x06, 0xb3,
-  0x0c, 0x24, 0x53, 0x32, 0x81, 0xa5, 0xa8, 0x30, 0xc4, 0x67, 0x96, 0xc0,
-  0x64, 0x8c, 0x60, 0x51, 0x01, 0x3e, 0xb3, 0x04, 0x26, 0x33, 0xd0, 0x62,
-  0x68, 0x20, 0x83, 0x85, 0x0c, 0x41, 0x32, 0x42, 0xc9, 0x88, 0xaa, 0x20,
-  0x32, 0x17, 0x0c, 0x63, 0x2b, 0x2a, 0xbc, 0xa8, 0x10, 0x9f, 0xe1, 0x08,
-  0x76, 0x81, 0x51, 0x81, 0xf8, 0x66, 0x19, 0x4e, 0x46, 0x65, 0x02, 0x8b,
-  0x51, 0xa1, 0x5d, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0,
-  0x28, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x5c, 0x15, 0x74,
-  0xb8, 0x21, 0xb8, 0x55, 0x01, 0x0c, 0x66, 0x19, 0x50, 0x26, 0x65, 0x02,
-  0x1b, 0x72, 0x54, 0x80, 0xcf, 0x2c, 0x81, 0xcb, 0x98, 0x8d, 0x0a, 0x44,
-  0x7c, 0x66, 0x09, 0x5c, 0x66, 0x38, 0xe2, 0x5e, 0x6e, 0x54, 0x10, 0xbe,
-  0x59, 0x86, 0x95, 0x71, 0x99, 0xc0, 0xf0, 0x05, 0x47, 0x85, 0xf8, 0x58,
-  0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x82, 0x48, 0x3e, 0x56,
-  0x04, 0xf1, 0x29, 0x82, 0x5c, 0x05, 0x1d, 0x6e, 0x08, 0xc4, 0x55, 0x00,
-  0x83, 0x59, 0x06, 0x96, 0x69, 0x99, 0xc0, 0xc0, 0x54, 0x18, 0xe2, 0x33,
-  0x4b, 0xe0, 0x32, 0x46, 0x94, 0xa9, 0x00, 0x9f, 0x59, 0x02, 0x97, 0x19,
-  0x68, 0x31, 0x34, 0x94, 0xc1, 0x52, 0x86, 0x60, 0x19, 0xa1, 0x65, 0x48,
-  0x4c, 0x65, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0x6e, 0x33, 0xea, 0x62, 0x54,
-  0x18, 0xe6, 0xc4, 0x52, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4,
-  0xe0, 0x00, 0x40, 0x10, 0x0c, 0xaa, 0x7a, 0x15, 0xc2, 0x55, 0xe8, 0x53,
-  0xe1, 0x5d, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84,
-  0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0x87, 0x5f, 0x05, 0x74, 0x15, 0x12, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x83, 0xa7, 0x5f, 0x85, 0x74, 0x15, 0x12, 0x22, 0x18,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xc7, 0x5f, 0x05, 0x75, 0x15, 0x12,
-  0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x8d, 0x5f, 0x85, 0x74,
-  0x15, 0x4e, 0x55, 0x08, 0xec, 0x55, 0xb0, 0x55, 0x81, 0x5e, 0x85, 0xd1,
-  0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0x13, 0xc6, 0x38, 0x61, 0x8c, 0x0a,
-  0x7c, 0x55, 0xb8, 0x1a, 0x82, 0xbd, 0xc0, 0xa8, 0x59, 0x82, 0x97, 0x19,
-  0x68, 0x31, 0x74, 0x43, 0x5f, 0xc8, 0x9a, 0xc8, 0x17, 0x9b, 0xe0, 0x17,
-  0xc1, 0x65, 0xc8, 0x9a, 0xe8, 0x97, 0x59, 0x06, 0x98, 0x91, 0x99, 0xf3,
-  0x0d, 0x86, 0x23, 0x54, 0x31, 0x98, 0x55, 0x61, 0xf8, 0x6e, 0x15, 0x83,
-  0x61, 0x86, 0x1b, 0x82, 0x54, 0x15, 0xc8, 0xa0, 0x86, 0x40, 0x87, 0x23,
-  0xc0, 0xe6, 0x56, 0x85, 0xe1, 0xab, 0x40, 0xd0, 0x13, 0x9b, 0x61, 0x86,
-  0x1b, 0x02, 0x56, 0x15, 0xc8, 0xa0, 0x82, 0x41, 0x67, 0x19, 0x62, 0xc6,
-  0x6c, 0x82, 0x83, 0x53, 0x61, 0x98, 0x0b, 0x4d, 0x61, 0x98, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0xa8, 0x64, 0x56, 0xf0, 0x57, 0x41, 0x57, 0x05,
-  0x96, 0x15, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06,
-  0x61, 0x34, 0x81, 0x18, 0x8a, 0x38, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10,
-  0x0c, 0x9e, 0x9c, 0x15, 0x4a, 0x56, 0x38, 0x88, 0x60, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0x1e, 0x9d, 0x15, 0x4c, 0x56, 0x60, 0x88, 0x60, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0x9e, 0x9d, 0x15, 0x4e, 0x56, 0x90, 0x88,
-  0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0xb6, 0x9c, 0x15, 0x4c, 0x56,
-  0x20, 0x57, 0x21, 0x98, 0x59, 0x61, 0x5e, 0x85, 0x98, 0x15, 0x46, 0x13,
-  0x02, 0x60, 0x34, 0x41, 0x08, 0x4e, 0x18, 0xe3, 0x84, 0x31, 0x2a, 0xd8,
-  0x57, 0xe1, 0x6a, 0x08, 0xf6, 0x02, 0xa3, 0x66, 0x09, 0xcc, 0x66, 0xb8,
-  0x81, 0x84, 0x03, 0x9f, 0x15, 0xc0, 0x60, 0x96, 0x61, 0x66, 0x68, 0x26,
-  0x28, 0x72, 0x15, 0x5c, 0x56, 0x80, 0x0b, 0x8c, 0x1a, 0x31, 0x38, 0x00,
-  0x10, 0x04, 0x83, 0x25, 0x6d, 0x85, 0x97, 0x15, 0xd2, 0x31, 0x90, 0x57,
-  0x61, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x16, 0xb5, 0x15, 0x5e, 0x56,
-  0x08, 0x84, 0x0b, 0x86, 0xa9, 0x73, 0x15, 0x66, 0x56, 0x80, 0x0b, 0x8c,
-  0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0xc5, 0x6d, 0x05, 0x9a, 0x15,
-  0x40, 0xe7, 0x5e, 0x85, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x58, 0xde,
-  0x56, 0xa0, 0x59, 0x21, 0x10, 0x2e, 0x18, 0xe6, 0x02, 0xa3, 0xee, 0x30,
-  0xea, 0x60, 0x55, 0x18, 0xe6, 0xc2, 0x53, 0x18, 0xe6, 0x88, 0x61, 0x8e,
-  0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x2a, 0xba, 0x15, 0xc0,
-  0x56, 0xe0, 0x57, 0xc1, 0x6d, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10,
-  0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31,
-  0x40, 0x00, 0x10, 0x04, 0x83, 0x67, 0x6f, 0x85, 0xb3, 0x15, 0x12, 0x22,
-  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x87, 0x6f, 0x05, 0xb4, 0x15,
-  0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xa7, 0x6f, 0x85,
-  0xb4, 0x15, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x83, 0x6d,
-  0x6f, 0x05, 0xb4, 0x15, 0x4c, 0x56, 0x08, 0xea, 0x56, 0xa8, 0x59, 0x61,
-  0x6e, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0x13, 0xc6, 0x38,
-  0x61, 0x8c, 0x0a, 0x7a, 0x56, 0xb8, 0x1a, 0x82, 0xbd, 0xc0, 0xa8, 0x59,
-  0x02, 0xb3, 0x19, 0x6e, 0x30, 0xe3, 0xe0, 0x6f, 0x05, 0x30, 0x98, 0x65,
-  0xa8, 0x19, 0xb3, 0x09, 0xac, 0x5f, 0x85, 0x7f, 0x15, 0xe2, 0x33, 0x1c,
-  0xa1, 0x92, 0x01, 0xc8, 0x0a, 0xc4, 0x37, 0xcb, 0x60, 0x33, 0x39, 0x13,
-  0x58, 0xc8, 0x0a, 0x2b, 0x19, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c,
-  0x73, 0x81, 0x51, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xa9,
-  0x2b, 0xe8, 0x70, 0x43, 0x70, 0xba, 0x02, 0x18, 0xcc, 0x32, 0xdc, 0x0c,
-  0xce, 0x04, 0x36, 0xa4, 0xac, 0x00, 0x9f, 0x59, 0x82, 0x9e, 0x31, 0x94,
-  0x15, 0x88, 0xf8, 0xcc, 0x12, 0xf4, 0xcc, 0x70, 0x44, 0x4d, 0x06, 0x29,
-  0x2b, 0x08, 0xdf, 0x2c, 0x83, 0xce, 0xf4, 0x4c, 0x60, 0x36, 0x19, 0xa8,
-  0xac, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0x59,
-  0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xd0, 0xae, 0xa0, 0xc3, 0x0d,
-  0x81, 0xec, 0x0a, 0x60, 0x30, 0xcb, 0xb0, 0x33, 0x3c, 0x13, 0x98, 0xcc,
-  0x0a, 0x43, 0x7c, 0x66, 0x09, 0x7a, 0xc6, 0x88, 0x9a, 0x15, 0xe0, 0x33,
-  0x4b, 0xd0, 0x33, 0x03, 0x2d, 0x86, 0x76, 0x33, 0x18, 0xce, 0x10, 0x3b,
-  0x23, 0xf0, 0x0c, 0xa8, 0x0a, 0x39, 0x73, 0xc1, 0x30, 0x46, 0xb3, 0x02,
-  0xce, 0x0a, 0xf1, 0x19, 0x8e, 0xa8, 0x9f, 0x9c, 0x15, 0x88, 0x6f, 0x96,
-  0xc1, 0x67, 0xc2, 0x26, 0x30, 0x9d, 0x15, 0xec, 0x27, 0x3e, 0x16, 0x0c,
-  0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41,
-  0x7c, 0x8a, 0x10, 0x5f, 0x41, 0x87, 0x1b, 0x02, 0xf0, 0x15, 0xc0, 0x60,
-  0x96, 0xe1, 0x67, 0xc0, 0x26, 0xb0, 0x41, 0x6c, 0x05, 0xf8, 0xcc, 0x12,
-  0x94, 0x8d, 0xfd, 0xac, 0x40, 0xc4, 0x67, 0x96, 0xa0, 0x6c, 0x86, 0x23,
-  0x40, 0x08, 0x6c, 0x05, 0xe1, 0x9b, 0x65, 0x10, 0x9b, 0xb2, 0x09, 0x2c,
-  0x84, 0xc2, 0x56, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02,
-  0xa3, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xda, 0x57, 0xd0,
-  0xe1, 0x86, 0x60, 0x7d, 0x05, 0x30, 0x98, 0x65, 0x18, 0x1b, 0xb2, 0x09,
-  0x2c, 0x6d, 0x85, 0x21, 0x3e, 0xb3, 0x04, 0x65, 0x63, 0x84, 0xdb, 0x0a,
-  0xf0, 0x99, 0x25, 0x28, 0x9b, 0x81, 0x16, 0x43, 0xfb, 0x19, 0x0c, 0x6c,
-  0x88, 0xb1, 0x11, 0xc8, 0xc6, 0x06, 0x83, 0xb0, 0xb9, 0x60, 0x98, 0x0b,
-  0x8c, 0xba, 0xcd, 0xa8, 0xd3, 0x59, 0x61, 0x98, 0x5b, 0x53, 0x61, 0x98,
-  0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xa8,
-  0xfc, 0x57, 0x50, 0x5f, 0xc1, 0x74, 0x05, 0xfc, 0x15, 0x46, 0x13, 0x02,
-  0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a,
-  0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x9e, 0x12, 0x16, 0xe2,
-  0x57, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x1e, 0x13,
-  0x16, 0xe4, 0x57, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
-  0x9e, 0x13, 0x16, 0xe6, 0x57, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40,
-  0x10, 0x0c, 0xb6, 0x12, 0x16, 0xe4, 0x57, 0x80, 0x5d, 0x21, 0xf8, 0x5f,
-  0xe1, 0x77, 0x85, 0xfe, 0x15, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
-  0x4e, 0x18, 0xe3, 0x84, 0x31, 0x2a, 0x38, 0x5f, 0xe1, 0x6a, 0x08, 0xf6,
-  0x02, 0xa3, 0x66, 0x09, 0xcc, 0x66, 0xa0, 0xc5, 0xd0, 0x8d, 0x98, 0xb1,
-  0x7b, 0x02, 0x66, 0x6c, 0x62, 0x66, 0x84, 0xb2, 0xb1, 0x7b, 0x82, 0x66,
-  0x6c, 0x9e, 0x03, 0xf0, 0x15, 0xe0, 0x33, 0xcb, 0x70, 0x36, 0x69, 0x13,
-  0xcf, 0xc1, 0x70, 0x44, 0x3d, 0x07, 0xbd, 0x2b, 0x0c, 0xdf, 0xd9, 0x73,
-  0x30, 0xcc, 0x70, 0x43, 0x30, 0xbb, 0x02, 0x19, 0xd4, 0x10, 0xe8, 0x70,
-  0x44, 0x11, 0xbe, 0xc2, 0xf0, 0x55, 0x20, 0xe8, 0x1d, 0xc3, 0x0c, 0x37,
-  0x04, 0xb6, 0x2b, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xa0, 0x4d, 0xdf,
-  0x04, 0xa7, 0xb7, 0xc2, 0x30, 0xb7, 0xaa, 0xc2, 0x30, 0x23, 0x06, 0x07,
-  0x00, 0x82, 0x60, 0x50, 0xf1, 0xb0, 0x80, 0xc2, 0x02, 0xf9, 0x0a, 0x36,
-  0x2c, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2,
-  0x68, 0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x3c, 0x63, 0x2c, 0xbc, 0xb0, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x3c, 0x64, 0x2c, 0xc0, 0xb0, 0xc0, 0x10, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x3c, 0x65, 0x2c, 0xc4, 0xb0, 0x20, 0x11, 0xc1,
-  0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x63, 0x2c, 0xc0, 0xb0, 0xe0,
-  0xbe, 0x42, 0xd0, 0xc3, 0x42, 0xff, 0x0a, 0x3b, 0x2c, 0x8c, 0x26, 0x04,
-  0xc0, 0x68, 0x82, 0x10, 0x9c, 0x30, 0xc6, 0x09, 0x63, 0x54, 0x50, 0xc2,
-  0xc2, 0xd5, 0x10, 0xec, 0x05, 0x46, 0xcd, 0x12, 0xf4, 0xcd, 0x70, 0x83,
-  0x4b, 0x07, 0x68, 0x2c, 0x80, 0xc1, 0x2c, 0x83, 0xda, 0xac, 0x4d, 0x50,
-  0xee, 0x2b, 0xe0, 0xb0, 0x00, 0x17, 0x18, 0x35, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0xcb, 0x1c, 0x0b, 0x39, 0x2c, 0xd0, 0x74, 0xc0, 0xbf, 0xc2,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c, 0x74, 0x2c, 0xe4, 0xb0, 0x10,
-  0x08, 0x17, 0x0c, 0x53, 0xf1, 0x2b, 0xf4, 0xb0, 0x00, 0x17, 0x18, 0x35,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x0b, 0x1e, 0x0b, 0x3e, 0x2c, 0x94,
-  0x41, 0x08, 0x0b, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xe4, 0xb1,
-  0xe0, 0xc3, 0x42, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0xdd, 0x61, 0xd4,
-  0xe9, 0xae, 0x30, 0xcc, 0xad, 0xab, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31,
-  0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x54, 0x7e, 0x2c, 0xa8, 0xb1,
-  0x60, 0xc2, 0x02, 0x1e, 0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04,
-  0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0x4f, 0x29, 0x0b, 0x71, 0x2c, 0x24, 0x44, 0x30,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x8f, 0x29, 0x0b, 0x72, 0x2c, 0x24,
-  0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xcf, 0x29, 0x0b, 0x73,
-  0x2c, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x5b, 0x29,
-  0x0b, 0x72, 0x2c, 0xc0, 0xb0, 0x10, 0xfc, 0xb1, 0xf0, 0xc3, 0x42, 0x1f,
-  0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0x27, 0x8c, 0x71, 0xc2,
-  0x18, 0x15, 0x9c, 0xb1, 0x70, 0x35, 0x04, 0x7b, 0x81, 0x51, 0xb3, 0x04,
-  0x7d, 0x33, 0xdc, 0x00, 0xd7, 0x41, 0x2a, 0x0b, 0x60, 0x30, 0xcb, 0xc0,
-  0x36, 0x7d, 0x13, 0xd8, 0x09, 0x0b, 0x29, 0x2c, 0xc4, 0x67, 0x38, 0xa2,
-  0xae, 0x03, 0x15, 0x16, 0x88, 0x6f, 0x96, 0xa1, 0x6d, 0xe0, 0x26, 0xb0,
-  0x15, 0x16, 0xec, 0x3a, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6,
-  0x02, 0xa3, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x66, 0x59,
-  0xd0, 0xe1, 0x86, 0x20, 0x96, 0x05, 0x30, 0x98, 0x65, 0x70, 0x9b, 0xb7,
-  0x09, 0x6c, 0x98, 0x61, 0x01, 0x3e, 0xb3, 0x04, 0x74, 0x63, 0x32, 0x2c,
-  0x10, 0xf1, 0x99, 0x25, 0xa0, 0x9b, 0xe1, 0x08, 0xd0, 0x0e, 0x66, 0x58,
-  0x10, 0xbe, 0x59, 0x86, 0xb8, 0xa1, 0x9b, 0xc0, 0x42, 0x3b, 0xa0, 0x61,
-  0x21, 0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0x20,
-  0x92, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xf0, 0x65, 0x41, 0x87, 0x1b, 0x02,
-  0x5e, 0x16, 0xc0, 0x60, 0x96, 0x41, 0x6e, 0xe6, 0x26, 0x30, 0x1e, 0x16,
-  0x86, 0xf8, 0xcc, 0x12, 0xd0, 0x8d, 0x11, 0x3f, 0x2c, 0xc0, 0x67, 0x96,
-  0x80, 0x6e, 0x06, 0x5a, 0x0c, 0xcd, 0x6d, 0xb0, 0xb7, 0x21, 0xe4, 0x46,
-  0x98, 0x1b, 0x9b, 0x1d, 0xe0, 0xe6, 0x82, 0x61, 0xcc, 0x87, 0x05, 0x31,
-  0x16, 0xe2, 0x33, 0x1c, 0xa1, 0x0b, 0x63, 0x2c, 0x10, 0xdf, 0x2c, 0x43,
-  0xdd, 0xe0, 0x4d, 0x60, 0x64, 0x2c, 0xec, 0x42, 0x7c, 0x2c, 0x18, 0xe8,
-  0x73, 0xc1, 0x30, 0x17, 0x18, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8,
-  0x14, 0xc1, 0xce, 0x82, 0x0e, 0x37, 0x04, 0xea, 0x2c, 0x80, 0xc1, 0x2c,
-  0x83, 0xdd, 0xdc, 0x4d, 0x60, 0x03, 0x1b, 0x0b, 0xf0, 0x99, 0x25, 0xe0,
-  0x1b, 0x4b, 0x63, 0x81, 0x88, 0xcf, 0x2c, 0x01, 0xdf, 0x0c, 0x47, 0x94,
-  0x83, 0x1a, 0x0b, 0xc2, 0x37, 0xcb, 0x90, 0x37, 0x7c, 0x13, 0x98, 0x39,
-  0xac, 0xb1, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46,
-  0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xdc, 0xb3, 0xa0, 0xc3,
-  0x0d, 0x41, 0x3d, 0x0b, 0x60, 0x30, 0xcb, 0xa0, 0x37, 0x7b, 0x13, 0xd8,
-  0x1c, 0x0b, 0x43, 0x7c, 0x66, 0x09, 0xf8, 0xc6, 0x08, 0x3c, 0x16, 0xe0,
-  0x33, 0x4b, 0xc0, 0x37, 0x03, 0x2d, 0x86, 0x66, 0x37, 0xd8, 0xdd, 0x10,
-  0x7a, 0x23, 0xec, 0x0d, 0x78, 0xe0, 0xcd, 0x05, 0xc3, 0x5c, 0x60, 0xd4,
-  0x6d, 0x46, 0x1d, 0x19, 0x0b, 0xc3, 0x5c, 0xdd, 0x0a, 0xc3, 0x1c, 0x31,
-  0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0x85, 0xd2,
-  0x02, 0x3d, 0x0b, 0xb0, 0x2c, 0x88, 0xb4, 0x30, 0x9a, 0x10, 0x00, 0xa3,
-  0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0xbc, 0xb4, 0xb0, 0xcf, 0x42,
-  0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0xc0, 0xb4, 0xc0,
-  0xcf, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0xc4,
-  0xb4, 0xd0, 0xcf, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60,
-  0xb0, 0xbd, 0xb4, 0xc0, 0xcf, 0x82, 0x2e, 0x0b, 0x41, 0x4a, 0x0b, 0xe9,
-  0x2c, 0x9c, 0xb4, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x70, 0xc2,
-  0x18, 0x27, 0x8c, 0x51, 0x41, 0x3c, 0x0b, 0x57, 0x43, 0xb0, 0x17, 0x18,
-  0x35, 0x4b, 0xd0, 0x37, 0x03, 0x2d, 0x86, 0x6e, 0xa0, 0x8d, 0x6a, 0x16,
-  0x67, 0x63, 0x13, 0x6a, 0x23, 0xf0, 0x8d, 0x6a, 0x16, 0x6b, 0x33, 0xcb,
-  0xe0, 0x37, 0xa0, 0xa3, 0xdf, 0xc1, 0x70, 0xc4, 0x7f, 0x07, 0xe6, 0x2c,
-  0x0c, 0xdf, 0x81, 0x78, 0x30, 0xcc, 0x70, 0x43, 0xc0, 0xcb, 0x02, 0x19,
-  0xd4, 0x10, 0xe8, 0x70, 0x84, 0x4b, 0xa8, 0xb3, 0x30, 0x7c, 0x15, 0x08,
-  0x7a, 0x30, 0x31, 0xcc, 0x70, 0x43, 0xf0, 0xcb, 0x02, 0x19, 0x54, 0x30,
-  0xe8, 0x2c, 0xc3, 0xdf, 0xd0, 0x4e, 0x70, 0xa3, 0x2c, 0x0c, 0x73, 0xb4,
-  0x2b, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x55, 0x59, 0x0b,
-  0x31, 0x2d, 0xb4, 0xb3, 0xf0, 0xd3, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xc3, 0xd6, 0x02, 0x4e, 0x0b, 0x07,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xd3, 0xd6, 0x42, 0x4e,
-  0x0b, 0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xe3, 0xd6,
-  0x82, 0x4e, 0x0b, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1,
-  0xc6, 0xd6, 0x42, 0x4e, 0x0b, 0xf7, 0x2c, 0x04, 0x66, 0x2d, 0x98, 0xb4,
-  0x40, 0xd6, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x09, 0x63,
-  0x9c, 0x30, 0x46, 0x05, 0x2e, 0x2d, 0x5c, 0x0d, 0xc1, 0x5e, 0x60, 0xd4,
-  0x2c, 0x01, 0xed, 0x0c, 0x37, 0xdc, 0x78, 0x10, 0xd7, 0x02, 0x18, 0xcc,
-  0x32, 0x84, 0x8e, 0xe8, 0x04, 0x75, 0xcf, 0x42, 0x58, 0x0b, 0x70, 0x81,
-  0x51, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xf0, 0xb5, 0x20, 0xd6,
-  0x82, 0x8f, 0x07, 0x25, 0x2d, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1,
-  0xd2, 0xd7, 0x82, 0x58, 0x0b, 0x81, 0x70, 0xc1, 0x30, 0xa5, 0xcf, 0x82,
-  0x59, 0x0b, 0x70, 0x81, 0x51, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0,
-  0x84, 0xb6, 0x70, 0xd6, 0x82, 0x5b, 0xa8, 0xb4, 0x30, 0x62, 0x70, 0x00,
-  0x20, 0x08, 0x06, 0x8b, 0x68, 0x0b, 0x67, 0x2d, 0x04, 0xc2, 0x05, 0xc3,
-  0x5c, 0x60, 0xd4, 0x1d, 0x46, 0xdd, 0x38, 0x0b, 0xc3, 0x1c, 0xfd, 0x0a,
-  0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82,
-  0x41, 0x75, 0xda, 0xc2, 0x5c, 0x0b, 0x2f, 0x2d, 0x84, 0xb6, 0x30, 0x9a,
-  0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4,
-  0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0xb8, 0xb6,
-  0xa0, 0xd7, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0,
-  0xbc, 0xb6, 0xb0, 0xd7, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0xf0, 0xc0, 0xb6, 0xc0, 0xd7, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a,
-  0x00, 0x82, 0x60, 0xb0, 0xb9, 0xb6, 0xb0, 0xd7, 0x42, 0x4e, 0x0b, 0x01,
-  0x6a, 0x0b, 0x68, 0x2d, 0x98, 0xb6, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09,
-  0x42, 0x70, 0xc2, 0x18, 0x27, 0x8c, 0x51, 0x01, 0x5c, 0x0b, 0x57, 0x43,
-  0xb0, 0x17, 0x18, 0x35, 0x4b, 0x40, 0x3b, 0xc3, 0x0d, 0x79, 0x1e, 0xc8,
-  0xb6, 0x00, 0x06, 0xb3, 0x0c, 0xa3, 0x43, 0x3b, 0x81, 0xc1, 0xb4, 0x20,
-  0xd3, 0x42, 0x7c, 0x86, 0x23, 0xfe, 0x3c, 0x98, 0x69, 0x81, 0xf8, 0x66,
-  0x19, 0x48, 0xe7, 0x74, 0x02, 0xa3, 0x69, 0x01, 0xd4, 0x83, 0xf8, 0x58,
-  0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x02, 0x43, 0x3e, 0x56,
-  0x04, 0xf1, 0x29, 0x82, 0xb7, 0x05, 0x1d, 0x6e, 0x08, 0x74, 0x5b, 0x00,
-  0x83, 0x59, 0x86, 0xd2, 0x31, 0x9d, 0xc0, 0x06, 0x9e, 0x16, 0xe0, 0x33,
-  0x4b, 0xb0, 0x3a, 0xb6, 0xd3, 0x02, 0x11, 0x9f, 0x59, 0x82, 0xd5, 0x19,
-  0x8e, 0x50, 0xf5, 0x80, 0xa7, 0x05, 0xe1, 0x9b, 0x65, 0x40, 0x9d, 0xd5,
-  0x09, 0x6c, 0xd5, 0x83, 0x9e, 0x16, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b,
-  0x86, 0xb9, 0xc0, 0x28, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88,
-  0xf3, 0x16, 0x74, 0xb8, 0x21, 0x28, 0x6f, 0x01, 0x0c, 0x66, 0x19, 0x52,
-  0x47, 0x75, 0x02, 0x2b, 0x6b, 0x61, 0x88, 0xcf, 0x2c, 0xc1, 0xea, 0x18,
-  0x81, 0xd6, 0x02, 0x7c, 0x66, 0x09, 0x56, 0x67, 0xa0, 0xc5, 0xd0, 0x4a,
-  0x07, 0x33, 0x1d, 0x22, 0x75, 0x04, 0xd5, 0x51, 0xeb, 0xe1, 0x74, 0x2e,
-  0x18, 0xc6, 0xce, 0x5a, 0x58, 0x6b, 0x21, 0x3e, 0xc3, 0x11, 0xe3, 0xc1,
-  0xd6, 0x02, 0xf1, 0xcd, 0x32, 0xb0, 0xce, 0xeb, 0x04, 0xd6, 0xd6, 0x02,
-  0x79, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x16,
-  0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xf5, 0x2d, 0xe8, 0x70, 0x43,
-  0x30, 0xdf, 0x02, 0x18, 0xcc, 0x32, 0xb4, 0x8e, 0xeb, 0x04, 0x36, 0xd4,
-  0xb5, 0x00, 0x9f, 0x59, 0x82, 0xd9, 0x31, 0xb9, 0x16, 0x88, 0xf8, 0xcc,
-  0x12, 0xcc, 0xce, 0x70, 0x84, 0x7b, 0xcc, 0xb5, 0x20, 0x7c, 0xb3, 0x0c,
-  0xb0, 0x33, 0x3b, 0x81, 0xbd, 0x07, 0x5d, 0x0b, 0xf1, 0xb1, 0xc0, 0xa1,
-  0xcf, 0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2,
-  0x53, 0x04, 0x88, 0x0b, 0x3a, 0xdc, 0x10, 0xf8, 0xb7, 0x00, 0x06, 0xb3,
-  0x0c, 0xb1, 0x23, 0x3b, 0x81, 0xf1, 0xb5, 0x30, 0xc4, 0x67, 0x96, 0x60,
-  0x76, 0x8c, 0x08, 0x6d, 0x01, 0x3e, 0xb3, 0x04, 0xb3, 0x33, 0xd0, 0x62,
-  0x68, 0xad, 0x83, 0xb9, 0x0e, 0x11, 0x3b, 0x82, 0xec, 0xc8, 0xcf, 0xeb,
-  0x5c, 0x30, 0xcc, 0x05, 0x46, 0xdd, 0x66, 0xd4, 0xb5, 0xb5, 0x30, 0xcc,
-  0xf9, 0xb1, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01,
-  0x80, 0x20, 0x18, 0x54, 0x31, 0x2e, 0xf4, 0xb7, 0x90, 0xdb, 0xc2, 0x8a,
-  0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30,
-  0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x0f, 0x8e, 0x0b, 0x24, 0x2e, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x4f, 0x8e, 0x0b, 0x25, 0x2e, 0x24, 0x44, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0x8f, 0x8e, 0x0b, 0x26, 0x2e, 0x24, 0x44, 0x30,
-  0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x1b, 0x8e, 0x0b, 0x25, 0x2e, 0x8c,
-  0xb7, 0x10, 0xc8, 0xb8, 0x20, 0xdf, 0x02, 0x8c, 0x0b, 0xa3, 0x09, 0x01,
-  0x30, 0x9a, 0x20, 0x04, 0x27, 0x8c, 0x71, 0xc2, 0x18, 0x15, 0xe8, 0xb7,
-  0x70, 0x35, 0x04, 0x7b, 0x81, 0x51, 0xb3, 0x04, 0xb4, 0x33, 0xd0, 0x62,
-  0xe8, 0xc6, 0xdf, 0xf0, 0x6e, 0xe1, 0x37, 0x36, 0x11, 0x3a, 0xc2, 0xec,
-  0xf0, 0x6e, 0x21, 0x3a, 0xb3, 0x0c, 0xb5, 0x73, 0x3b, 0x23, 0x1f, 0x0c,
-  0x47, 0xc0, 0x73, 0xf0, 0xde, 0xc2, 0xf0, 0x5d, 0x3c, 0x07, 0xc3, 0x0c,
-  0x37, 0x04, 0xe5, 0x2d, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0xdc, 0xc8,
-  0x7c, 0x0b, 0xc3, 0x57, 0x81, 0xa0, 0x97, 0x23, 0xc3, 0x0c, 0x37, 0x04,
-  0xe8, 0x2d, 0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0xd8, 0xce, 0xfa, 0x04,
-  0xc7, 0xda, 0xc2, 0x30, 0xd7, 0xcb, 0xc2, 0x30, 0x23, 0x06, 0x07, 0x00,
-  0x82, 0x60, 0x50, 0xb9, 0xb9, 0xa0, 0xe3, 0x82, 0x7d, 0x0b, 0x68, 0x2e,
-  0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68,
-  0x02, 0x31, 0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x3c,
-  0x75, 0x2e, 0x84, 0xb9, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x3c, 0x76, 0x2e, 0x88, 0xb9, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x3c, 0x77, 0x2e, 0x8c, 0xb9, 0x20, 0x11, 0xc1, 0x88,
-  0x81, 0x02, 0x80, 0x20, 0x18, 0x6c, 0x75, 0x2e, 0x88, 0xb9, 0x00, 0xe2,
-  0x42, 0xf0, 0xe6, 0xc2, 0x8b, 0x0b, 0x6d, 0x2e, 0x8c, 0x26, 0x04, 0xc0,
-  0x68, 0x82, 0x10, 0x9c, 0x30, 0xc6, 0x09, 0x63, 0x54, 0x70, 0xe3, 0xc2,
-  0xd5, 0x10, 0xec, 0x05, 0x46, 0xcd, 0x12, 0xac, 0xcf, 0x70, 0x03, 0xd8,
-  0x07, 0x7a, 0x2e, 0x80, 0xc1, 0x2c, 0x03, 0xee, 0xe4, 0x4e, 0x50, 0x20,
-  0x2e, 0xa8, 0xb9, 0x00, 0x17, 0x18, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x4b, 0xa9, 0x0b, 0x6b, 0x2e, 0xbc, 0x74, 0xe0, 0xe2, 0xc2, 0x88,
-  0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c, 0xa6, 0x2e, 0xac, 0xb9, 0x10, 0x08,
-  0x17, 0x0c, 0x53, 0x23, 0x2e, 0xbc, 0xb9, 0x00, 0x17, 0x18, 0x35, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x8b, 0xaa, 0x0b, 0x70, 0x2e, 0xdc, 0xc9,
-  0x8c, 0x0b, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xac, 0xba, 0x00,
-  0xe7, 0x42, 0x20, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0xdd, 0x61, 0xd4, 0xb1,
-  0xb7, 0x30, 0xcc, 0xf5, 0xb3, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x54, 0xb0, 0x2e, 0xf0, 0xb9, 0x80,
-  0xe3, 0x82, 0xaa, 0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3,
-  0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0xcf, 0xad, 0x0b, 0xa3, 0x2e, 0x24, 0x44, 0x30, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x0f, 0xae, 0x0b, 0xa4, 0x2e, 0x24, 0x44,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x4f, 0xae, 0x0b, 0xa5, 0x2e,
-  0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0xdb, 0xad, 0x0b,
-  0xa4, 0x2e, 0x88, 0xb9, 0x10, 0xc4, 0xba, 0x10, 0xe7, 0xc2, 0xab, 0x0b,
-  0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0x27, 0x8c, 0x71, 0xc2, 0x18,
-  0x15, 0xe4, 0xb9, 0x70, 0x35, 0x04, 0x7b, 0x81, 0x51, 0xb3, 0x04, 0xeb,
-  0x33, 0xdc, 0x20, 0xfa, 0xc1, 0xae, 0x0b, 0x60, 0x30, 0xcb, 0xa0, 0x3b,
-  0xeb, 0x13, 0x58, 0x8e, 0x0b, 0x3b, 0x2e, 0xc4, 0x67, 0x38, 0x02, 0xae,
-  0x03, 0x1e, 0x17, 0x88, 0x6f, 0x96, 0x61, 0x77, 0x7c, 0x27, 0xb0, 0x1e,
-  0x17, 0xe2, 0x3a, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02,
-  0xa3, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xca, 0x5d, 0xd0,
-  0xe1, 0x86, 0x60, 0xdc, 0x05, 0x30, 0x98, 0x65, 0xe0, 0x9d, 0xde, 0x09,
-  0x6c, 0x28, 0x73, 0x01, 0x3e, 0xb3, 0x04, 0xe2, 0x63, 0x64, 0x2e, 0x10,
-  0xf1, 0x99, 0x25, 0x10, 0x9f, 0xe1, 0x88, 0xbd, 0x0e, 0xca, 0x5c, 0x10,
-  0xbe, 0x59, 0x86, 0xdf, 0x11, 0x9f, 0xc0, 0xf8, 0x3a, 0x30, 0x73, 0x21,
-  0x3e, 0x16, 0x38, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x8c, 0xb2, 0x20, 0x92,
-  0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x80, 0x77, 0x41, 0x87, 0x1b, 0x02, 0x77,
-  0x17, 0xc0, 0x60, 0x96, 0x01, 0x7c, 0xc2, 0x27, 0x30, 0x37, 0x17, 0x86,
-  0xf8, 0xcc, 0x12, 0x88, 0x8f, 0x11, 0x71, 0x2e, 0xc0, 0x67, 0x96, 0x40,
-  0x7c, 0x06, 0x5a, 0x0c, 0x8d, 0x77, 0xb0, 0xde, 0x21, 0xc0, 0x47, 0x08,
-  0x1f, 0x96, 0x1d, 0x7c, 0xe7, 0x82, 0x61, 0x0c, 0xce, 0x05, 0x3a, 0x17,
-  0xe2, 0x33, 0x1c, 0xc1, 0x2e, 0x75, 0x2e, 0x10, 0xdf, 0x2c, 0xc3, 0xf8,
-  0x98, 0x4f, 0x60, 0x76, 0x2e, 0xb4, 0x4b, 0x7c, 0x2c, 0x18, 0xe8, 0x73,
-  0xc1, 0x30, 0x17, 0x18, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14,
-  0xe1, 0xef, 0x82, 0x0e, 0x37, 0x04, 0xfc, 0x2e, 0x80, 0xc1, 0x2c, 0x03,
-  0xf9, 0x94, 0x4f, 0x60, 0x83, 0x9f, 0x0b, 0xf0, 0x99, 0x25, 0x50, 0x1f,
-  0xdb, 0x73, 0x81, 0x88, 0xcf, 0x2c, 0x81, 0xfa, 0x0c, 0x47, 0xdc, 0x0b,
-  0x9f, 0x0b, 0xc2, 0x37, 0xcb, 0x70, 0x3e, 0xea, 0x13, 0x18, 0xbe, 0xf4,
-  0xb9, 0x10, 0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x46, 0x59,
-  0x10, 0xc9, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xa4, 0xbc, 0xa0, 0xc3, 0x0d,
-  0xc1, 0xc9, 0x0b, 0x60, 0x30, 0xcb, 0x80, 0x3e, 0xe9, 0x13, 0x58, 0xa9,
-  0x0b, 0x43, 0x7c, 0x66, 0x09, 0xd4, 0xc7, 0x08, 0x55, 0x17, 0xe0, 0x33,
-  0x4b, 0xa0, 0x3e, 0x03, 0x2d, 0x86, 0x46, 0x3e, 0x58, 0xf9, 0x10, 0xe8,
-  0x23, 0xa4, 0x0f, 0x89, 0x99, 0xcf, 0x05, 0xc3, 0x5c, 0x60, 0xd4, 0x6d,
-  0x46, 0x9d, 0x9d, 0x0b, 0xc3, 0xdc, 0x69, 0x0b, 0xc3, 0x1c, 0x31, 0xcc,
-  0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0xa5, 0xf3, 0x82,
-  0xc9, 0x0b, 0xe2, 0x2e, 0xd0, 0xbc, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09,
-  0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0x84, 0xbd, 0xd0, 0xf2, 0x42, 0x42,
-  0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0x88, 0xbd, 0xe0, 0xf2,
-  0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0x8c, 0xbd,
-  0xf0, 0xf2, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0,
-  0x85, 0xbd, 0xe0, 0xf2, 0x02, 0xbb, 0x0b, 0xc1, 0xce, 0x0b, 0xfb, 0x2e,
-  0xe4, 0xbc, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x70, 0xc2, 0x18,
-  0x27, 0x8c, 0x51, 0xc1, 0xc8, 0x0b, 0x57, 0x43, 0xb0, 0x17, 0x18, 0x35,
-  0x4b, 0xb0, 0x3e, 0x03, 0x2d, 0x86, 0x6e, 0xd8, 0x8e, 0x7b, 0x17, 0xb5,
-  0x63, 0x13, 0xb8, 0x23, 0xa8, 0x8f, 0x7b, 0x17, 0xb9, 0x33, 0xcb, 0xc0,
-  0x3e, 0xee, 0xc3, 0x82, 0xc2, 0x70, 0xc4, 0x3b, 0x07, 0xf8, 0x2e, 0x0c,
-  0xdf, 0xc1, 0x73, 0x30, 0xcc, 0x70, 0x43, 0xe0, 0xee, 0x02, 0x19, 0xd4,
-  0x10, 0xe8, 0x70, 0x04, 0xd8, 0xf0, 0xbb, 0x30, 0x7c, 0x15, 0x08, 0x7a,
-  0x62, 0x33, 0xcc, 0x70, 0x43, 0x10, 0xef, 0x02, 0x19, 0x54, 0x30, 0xe8,
-  0x2c, 0x43, 0xfb, 0x88, 0x50, 0x70, 0xb5, 0x2e, 0x0c, 0x73, 0xe6, 0x2d,
-  0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xd5, 0xdd, 0x0b, 0x63,
-  0x2f, 0xfc, 0xbb, 0x10, 0xf7, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08,
-  0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0xc1, 0xe3, 0xf7, 0x82, 0xda, 0x0b, 0x07, 0x11,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xf3, 0xf7, 0xc2, 0xda, 0x0b,
-  0x0c, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x03, 0xfa, 0x02,
-  0xdb, 0x0b, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0xe6,
-  0xf7, 0xc2, 0xda, 0x0b, 0x29, 0x2f, 0x04, 0x78, 0x2f, 0xe0, 0xbc, 0x60,
-  0xf7, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x09, 0x63, 0x9c,
-  0x30, 0x46, 0x05, 0x60, 0x2f, 0x5c, 0x0d, 0xc1, 0x5e, 0x60, 0xd4, 0x2c,
-  0x81, 0x08, 0x0d, 0x37, 0xa4, 0xa1, 0x30, 0xfa, 0x02, 0x18, 0xcc, 0x32,
-  0xbc, 0x0f, 0xfc, 0x04, 0x95, 0xf2, 0xc2, 0xdc, 0x0b, 0x70, 0x81, 0x51,
-  0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xb8, 0xbe, 0x40, 0xf7, 0x82,
-  0x4b, 0x07, 0x37, 0x2f, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0xf2,
-  0xfa, 0x02, 0xdd, 0x0b, 0x81, 0x70, 0xc1, 0x30, 0xc5, 0xf2, 0x02, 0xde,
-  0x0b, 0x70, 0x81, 0x51, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xb0, 0xcc,
-  0xbe, 0x90, 0xf7, 0x02, 0xe8, 0xf0, 0xbc, 0x30, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0x0b, 0xed, 0x0b, 0x79, 0x2f, 0x04, 0xc2, 0x05, 0xc3, 0x5c,
-  0x60, 0xd4, 0x1d, 0x46, 0x5d, 0xbd, 0x0b, 0xc3, 0x9c, 0x89, 0x0b, 0xc3,
-  0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x41,
-  0x95, 0xfb, 0x42, 0xe9, 0x0b, 0x61, 0x2f, 0xcc, 0xbe, 0x30, 0x9a, 0x10,
-  0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50,
-  0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0x80, 0xbf, 0xc0,
-  0xfa, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xf0, 0x84,
-  0xbf, 0xd0, 0xfa, 0x42, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0xf0, 0x88, 0xbf, 0xe0, 0xfa, 0x42, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00,
-  0x82, 0x60, 0xb0, 0x81, 0xbf, 0xd0, 0xfa, 0xc2, 0xda, 0x0b, 0x81, 0xee,
-  0x0b, 0x7a, 0x2f, 0xe0, 0xbe, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42,
-  0x70, 0xc2, 0x18, 0x27, 0x8c, 0x51, 0x81, 0xe8, 0x0b, 0x57, 0x43, 0xb0,
-  0x17, 0x18, 0x35, 0x4b, 0x20, 0x42, 0xc3, 0x0d, 0xab, 0x28, 0x90, 0xbf,
-  0x00, 0x06, 0xb3, 0x0c, 0xf1, 0x23, 0x42, 0x81, 0x89, 0xbd, 0x40, 0xf6,
-  0x42, 0x7c, 0x86, 0x23, 0xde, 0x3a, 0x28, 0x7b, 0x81, 0xf8, 0x66, 0x19,
-  0xe4, 0xa7, 0x7e, 0x02, 0x33, 0x7b, 0x01, 0xae, 0x83, 0xf8, 0x58, 0x30,
-  0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x30, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04,
-  0xf1, 0x29, 0xc2, 0xfd, 0x05, 0x1d, 0x6e, 0x08, 0xd8, 0x5f, 0x00, 0x83,
-  0x59, 0x86, 0xf9, 0xa1, 0x9f, 0xc0, 0x06, 0xb7, 0x17, 0xe0, 0x33, 0x4b,
-  0x90, 0x3f, 0xd6, 0xf6, 0x02, 0x11, 0x9f, 0x59, 0x82, 0xfc, 0x19, 0x8e,
-  0xd0, 0xeb, 0xc0, 0xed, 0x05, 0xe1, 0x9b, 0x65, 0xb0, 0x9f, 0xfc, 0x09,
-  0x6c, 0xaf, 0x83, 0xb7, 0x17, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86,
-  0xb9, 0xc0, 0x28, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0xfc,
-  0x17, 0x74, 0xb8, 0x21, 0xb8, 0x7f, 0x01, 0x0c, 0x66, 0x19, 0xee, 0x07,
-  0x7f, 0x02, 0xbb, 0x7b, 0x61, 0x88, 0xcf, 0x2c, 0x41, 0xfe, 0x18, 0xa1,
-  0xf7, 0x02, 0x7c, 0x66, 0x09, 0xf2, 0x67, 0xa0, 0xc5, 0xd0, 0xe6, 0x07,
-  0xa3, 0x1f, 0xe2, 0x7e, 0x04, 0xfc, 0x51, 0xd9, 0xa1, 0x7e, 0x2e, 0x18,
-  0xc6, 0xf2, 0x5e, 0xe8, 0x7b, 0x21, 0x3e, 0xc3, 0x11, 0xf5, 0xe3, 0xf7,
-  0x02, 0xf1, 0xcd, 0x32, 0xe8, 0x4f, 0xff, 0x04, 0xf6, 0xf7, 0x82, 0xfd,
-  0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x51, 0x16, 0x18,
-  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x27, 0x38, 0xe8, 0x70, 0x43, 0x50,
-  0x82, 0x03, 0x18, 0xcc, 0x32, 0xec, 0x0f, 0xff, 0x04, 0x36, 0x9c, 0xbe,
-  0x00, 0x9f, 0x59, 0x82, 0x10, 0x32, 0xd2, 0x17, 0x88, 0xf8, 0xcc, 0x12,
-  0x84, 0xd0, 0x70, 0x04, 0x08, 0x95, 0xbe, 0x20, 0x7c, 0xb3, 0x0c, 0xfe,
-  0x13, 0x42, 0x81, 0x85, 0x90, 0xe9, 0x0b, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf,
-  0x05, 0xc3, 0x5c, 0x60, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53,
-  0x84, 0x0c, 0x0e, 0x3a, 0xdc, 0x10, 0xc0, 0xe0, 0x00, 0x06, 0xb3, 0x0c,
-  0xff, 0x03, 0x42, 0x81, 0xb9, 0xbe, 0x30, 0xc4, 0x67, 0x96, 0x20, 0x84,
-  0x8c, 0x98, 0x7d, 0x01, 0x3e, 0xb3, 0x04, 0x21, 0x34, 0xd0, 0x62, 0x68,
-  0xfb, 0x83, 0xf1, 0x0f, 0xf1, 0x3f, 0x02, 0x08, 0xd9, 0x60, 0xd0, 0x3f,
-  0x17, 0x0c, 0x73, 0x81, 0x51, 0xb7, 0x19, 0x75, 0x7f, 0x2f, 0x0c, 0x73,
-  0xb0, 0x2e, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00,
-  0x20, 0x08, 0x06, 0xd5, 0x18, 0x0e, 0x2f, 0x38, 0xac, 0xbf, 0xd0, 0x83,
-  0xc3, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c,
-  0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1,
-  0xa3, 0x86, 0x83, 0x0d, 0x0e, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0xc1, 0xb3, 0x86, 0xc3, 0x0d, 0x0e, 0x09, 0x11, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0xc1, 0xc3, 0x86, 0x03, 0x0e, 0x0e, 0x09, 0x11, 0x8c,
-  0x18, 0x28, 0x00, 0x08, 0x82, 0xc1, 0xa6, 0x86, 0xc3, 0x0d, 0x0e, 0xf5,
-  0x2f, 0x04, 0x64, 0x38, 0x90, 0xe0, 0x20, 0x86, 0xc3, 0x68, 0x42, 0x00,
-  0x8c, 0x26, 0x08, 0xc1, 0x09, 0x63, 0x9c, 0x30, 0x46, 0x05, 0x2c, 0x38,
-  0x5c, 0x0d, 0xc1, 0x5e, 0x60, 0xd4, 0x2c, 0x81, 0x08, 0x0d, 0xb4, 0x18,
-  0xba, 0xd1, 0x3e, 0x20, 0x68, 0xb0, 0x8f, 0x4d, 0xbc, 0x8f, 0x10, 0x42,
-  0x20, 0x68, 0xc0, 0xcf, 0x88, 0x81, 0x01, 0x80, 0x20, 0x18, 0x1c, 0x7a,
-  0x38, 0xdc, 0xe0, 0x70, 0xee, 0xc2, 0x88, 0x81, 0x01, 0x80, 0x20, 0x18,
-  0x1c, 0x7b, 0x38, 0xe0, 0xe0, 0x70, 0xee, 0x82, 0x05, 0x81, 0x7c, 0x2c,
-  0x10, 0xe4, 0x63, 0x30, 0x28, 0x9c, 0xe0, 0x20, 0x9f, 0x11, 0x03, 0x04,
-  0x00, 0x41, 0x30, 0x48, 0xfc, 0x70, 0x08, 0xc3, 0x21, 0x05, 0x07, 0xd3,
-  0x0b, 0x4c, 0x06, 0x05, 0x19, 0x1c, 0xe4, 0x33, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x09, 0x28, 0x0e, 0x63, 0x38, 0xc4, 0xe0, 0xd0, 0xbe, 0x41,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x49, 0x28, 0x0e, 0x64, 0x38,
-  0xd0, 0xe0, 0x90, 0x7a, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x24,
-  0xa2, 0x38, 0x94, 0xe1, 0xc0, 0x82, 0x43, 0xd9, 0x04, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0x90, 0x8c, 0xe2, 0x60, 0x86, 0xc3, 0x0c, 0x0e, 0xf0,
-  0x1b, 0x18, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x90, 0x90, 0xe2, 0x70,
-  0x86, 0xc3, 0x0c, 0x0e, 0xac, 0x17, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0x41, 0x52, 0x8a, 0x03, 0x1a, 0x0e, 0x30, 0x38, 0xa0, 0x4d, 0x30, 0x62,
-  0xd0, 0x00, 0x20, 0x08, 0x06, 0x0d, 0x29, 0x0e, 0x68, 0x38, 0xbc, 0xe0,
-  0xc0, 0x2c, 0xca, 0xfc, 0x06, 0x08, 0x11, 0x58, 0xcc, 0x07, 0x31, 0x38,
-  0xc8, 0x67, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x12, 0x54, 0x1c, 0xd6,
-  0x70, 0x98, 0xc1, 0xe1, 0xd4, 0x02, 0x9b, 0xf9, 0x80, 0x07, 0x07, 0xf9,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41, 0xa2, 0x8a, 0x43, 0x1b, 0x0e,
-  0x3b, 0x38, 0xb8, 0x6b, 0x10, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41,
-  0xb2, 0x8a, 0x83, 0x1b, 0x0e, 0x3e, 0x38, 0xa8, 0x5a, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0x09, 0x2b, 0x0e, 0x6f, 0x38, 0xd8, 0xe0, 0x60,
-  0x26, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x24, 0xad, 0x38, 0xc0,
-  0xe1, 0xd0, 0x83, 0x43, 0xbc, 0x06, 0xc6, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x24, 0xae, 0x38, 0xc4, 0xe1, 0xd0, 0x83, 0x43, 0xab, 0x05, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0x90, 0xbc, 0xe2, 0x20, 0x87, 0x83, 0x0e,
-  0x0e, 0x69, 0x12, 0x8c, 0x18, 0x34, 0x00, 0x08, 0x82, 0x41, 0xe3, 0x8a,
-  0x83, 0x1c, 0x0e, 0x39, 0x38, 0x58, 0x15, 0x45, 0xaf, 0x01, 0x42, 0x04,
-  0x16, 0xe3, 0xc1, 0x0e, 0x0e, 0xf2, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x83, 0x44, 0x16, 0x87, 0x3a, 0x1c, 0x7a, 0x70, 0x40, 0xad, 0xc0, 0x66,
-  0x3c, 0x30, 0xc3, 0x41, 0x3e, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x90,
-  0xd0, 0xe2, 0x70, 0x87, 0x43, 0x19, 0x0e, 0xef, 0x19, 0x04, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x90, 0xd4, 0xe2, 0x80, 0x87, 0x03, 0x1a, 0x0e,
-  0xab, 0x15, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41, 0x62, 0x8b, 0x43,
-  0x1e, 0x0e, 0x60, 0x38, 0x9c, 0x45, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0xc9, 0x2d, 0x0e, 0x7a, 0x38, 0x9c, 0xe1, 0x20, 0x9f, 0x81, 0x31,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x09, 0x2e, 0x0e, 0x7b, 0x38, 0x9c,
-  0xe1, 0xe0, 0x5a, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x24, 0xb9,
-  0x38, 0xf0, 0xe1, 0x40, 0x86, 0x83, 0x5a, 0x04, 0x23, 0x06, 0x0d, 0x00,
-  0x82, 0x60, 0xd0, 0xe0, 0xe2, 0xc0, 0x87, 0xc3, 0x18, 0x0e, 0x60, 0xf0,
-  0x79, 0xf5, 0x19, 0x20, 0x44, 0x60, 0x31, 0x1d, 0x94, 0xe1, 0x20, 0x9f,
-  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x48, 0x78, 0x71, 0xf8, 0xc3, 0xe1,
-  0x0c, 0x87, 0x54, 0x0a, 0x6c, 0xa6, 0x03, 0x38, 0x1c, 0xe4, 0x33, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x89, 0x2f, 0x0e, 0xa1, 0x38, 0xbc, 0xe1,
-  0x00, 0x8f, 0x41, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xc9, 0x2f,
-  0x0e, 0xa2, 0x38, 0xc8, 0xe1, 0xc0, 0x4a, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x24, 0xe0, 0x38, 0x8c, 0xe2, 0xa0, 0x86, 0x03, 0x1a, 0x04,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x90, 0x84, 0xe3, 0x40, 0x8a, 0x43,
-  0x1c, 0x0e, 0xf3, 0x18, 0x18, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x90,
-  0x88, 0xe3, 0x50, 0x8a, 0x43, 0x1c, 0x0e, 0xaf, 0x14, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0x41, 0x32, 0x8e, 0x83, 0x29, 0x0e, 0x6e, 0x38, 0xac,
-  0x41, 0x30, 0x62, 0xd0, 0x00, 0x20, 0x08, 0x06, 0x8d, 0x38, 0x0e, 0xa6,
-  0x38, 0xb4, 0xe1, 0xa0, 0x06, 0x69, 0x80, 0x06, 0xf6, 0x18, 0x20, 0x44,
-  0x60, 0x6c, 0xc0, 0x06, 0xf2, 0xb1, 0xa0, 0x0d, 0xe4, 0x63, 0x61, 0x10,
-  0x87, 0x83, 0x7c, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x20, 0x41, 0xc7,
-  0x61, 0x15, 0x87, 0x39, 0x1c, 0x9c, 0xc0, 0xc6, 0x80, 0x0f, 0x07, 0xf9,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41, 0xa2, 0x8e, 0x43, 0x2b, 0x0e,
-  0x7b, 0x38, 0x68, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x24, 0xeb,
-  0x38, 0xb8, 0xe2, 0xe0, 0x87, 0x43, 0x14, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0x41, 0xc2, 0x8e, 0xc3, 0x2b, 0x0e, 0x76, 0x38, 0x20, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x24, 0xed, 0x38, 0xc0, 0xe2, 0xd0, 0x87,
-  0x43, 0x67, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41, 0xe2, 0x8e, 0x43,
-  0x2c, 0x0e, 0x7d, 0x38, 0x50, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x24, 0xef, 0x38, 0xc8, 0xe2, 0xa0, 0x87, 0xc3, 0x12, 0x8c, 0x18, 0x34,
-  0x00, 0x08, 0x82, 0x41, 0xe3, 0x8e, 0x83, 0x2c, 0x0e, 0x79, 0x38, 0xdc,
-  0xc1, 0xa2, 0x80, 0x01, 0x42, 0x04, 0x17, 0x8c, 0x73, 0xc1, 0x00, 0x25,
-  0xa8, 0xe2, 0x80, 0x17, 0x0c, 0x30, 0x62, 0xe0, 0x00, 0x20, 0x08, 0x06,
-  0x9d, 0x3b, 0x0e, 0xb2, 0x38, 0xf0, 0xe1, 0x50, 0x87, 0x43, 0x3a, 0x0e,
-  0x43, 0x60, 0x8b, 0x83, 0x2d, 0x0e, 0xa9, 0x38, 0x9c, 0xe3, 0x30, 0x4b,
-  0x30, 0x42, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00
-};
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_uint_double.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_uint_double.h
deleted file mode 100644
index e7bdcbc0d137f..0000000000000
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/GeneratedShaders/grid_sample_uint_double.h
+++ /dev/null
@@ -1,6297 +0,0 @@
-#if 0
-;
-; Note: shader requires additional functionality:
-;       Double-precision floating point
-;
-;
-; Input signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no parameters
-;
-; Output signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no parameters
-; shader hash: 63554f229ff785c1ff0ad230504c7cf1
-;
-; Pipeline Runtime Information: 
-;
-;
-;
-; Buffer Definitions:
-;
-; cbuffer 
-; {
-;
-;   [116 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [4 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [8 x i8] (type annotation not present)
-;
-; }
-;
-; Resource bind info for 
-; {
-;
-;   [4 x i8] (type annotation not present)
-;
-; }
-;
-;
-; Resource Bindings:
-;
-; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-;                                   cbuffer      NA          NA     CB0            cb0     1
-;                                       UAV  struct         r/w      U0             u0     1
-;                                       UAV  struct         r/w      U1             u1     1
-;                                       UAV  struct         r/w      U2             u2     1
-;
-target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
-target triple = "dxil-ms-dx"
-
-%dx.types.Handle = type { i8* }
-%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
-%dx.types.ResRet.i32 = type { i32, i32, i32, i32, i32 }
-%"class.RWStructuredBuffer<unsigned int>" = type { i32 }
-%"class.RWStructuredBuffer<double>" = type { double }
-%Constants = type { i32, i32, i32, i32, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32 }
-
-define void @GridSample() {
-  %1 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 2, i32 2, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %2 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 1, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %3 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %4 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %5 = call i32 @dx.op.threadId.i32(i32 93, i32 0)  ; ThreadId(component)
-  %6 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
-  %7 = extractvalue %dx.types.CBufRet.i32 %6, 0
-  %8 = add i32 %7, %5
-  %9 = extractvalue %dx.types.CBufRet.i32 %6, 1
-  %10 = icmp ult i32 %8, %9
-  br i1 %10, label %11, label %3323
-
-; <label>:11                                      ; preds = %0
-  %12 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
-  %13 = extractvalue %dx.types.CBufRet.i32 %12, 3
-  %14 = uitofp i32 %13 to float
-  %15 = extractvalue %dx.types.CBufRet.i32 %12, 2
-  %16 = uitofp i32 %15 to float
-  %17 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 7)  ; CBufferLoadLegacy(handle,regIndex)
-  %18 = extractvalue %dx.types.CBufRet.i32 %17, 0
-  %19 = icmp eq i32 %18, 0
-  %20 = select i1 %19, float -5.000000e-01, float 0.000000e+00
-  %21 = select i1 %19, float -5.000000e-01, float -1.000000e+00
-  %22 = fadd float %14, %21
-  %23 = select i1 %19, float -5.000000e-01, float -1.000000e+00
-  %24 = fadd float %16, %23
-  %25 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
-  %26 = extractvalue %dx.types.CBufRet.i32 %25, 1
-  %27 = extractvalue %dx.types.CBufRet.i32 %25, 2
-  %28 = extractvalue %dx.types.CBufRet.i32 %25, 3
-  %29 = mul i32 %28, %27
-  %30 = mul i32 %27, %26
-  %31 = mul i32 %30, %28
-  %32 = udiv i32 %8, %31
-  %33 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 6)  ; CBufferLoadLegacy(handle,regIndex)
-  %34 = extractvalue %dx.types.CBufRet.i32 %33, 0
-  %35 = mul i32 %34, %32
-  %36 = sub i32 %8, %35
-  %37 = udiv i32 %36, %29
-  %38 = extractvalue %dx.types.CBufRet.i32 %33, 1
-  %39 = mul i32 %38, %37
-  %40 = sub i32 %36, %39
-  %41 = udiv i32 %40, %28
-  %42 = extractvalue %dx.types.CBufRet.i32 %33, 2
-  %43 = mul i32 %42, %41
-  %44 = sub i32 %40, %43
-  %45 = uitofp i32 %32 to float
-  %46 = uitofp i32 %41 to float
-  %47 = uitofp i32 %44 to float
-  %48 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
-  %49 = extractvalue %dx.types.CBufRet.i32 %48, 0
-  %50 = extractvalue %dx.types.CBufRet.i32 %48, 1
-  %51 = extractvalue %dx.types.CBufRet.i32 %48, 2
-  %52 = extractvalue %dx.types.CBufRet.i32 %48, 3
-  %53 = uitofp i32 %49 to float
-  %54 = uitofp i32 %50 to float
-  %55 = uitofp i32 %51 to float
-  %56 = uitofp i32 %52 to float
-  %57 = call float @dx.op.dot4.f32(i32 56, float %45, float %46, float %47, float 0.000000e+00, float %53, float %54, float %55, float %56)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %58 = fadd fast float %56, %57
-  %59 = fptoui float %57 to i32
-  %60 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %2, i32 %59, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %61 = extractvalue %dx.types.ResRet.i32 %60, 0
-  %62 = extractvalue %dx.types.ResRet.i32 %60, 1
-  %63 = call double @dx.op.makeDouble.f64(i32 101, i32 %61, i32 %62)  ; MakeDouble(lo,hi)
-  %64 = fptrunc double %63 to float
-  %65 = fptoui float %58 to i32
-  %66 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %2, i32 %65, i32 0, i8 3, i32 8)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %67 = extractvalue %dx.types.ResRet.i32 %66, 0
-  %68 = extractvalue %dx.types.ResRet.i32 %66, 1
-  %69 = call double @dx.op.makeDouble.f64(i32 101, i32 %67, i32 %68)  ; MakeDouble(lo,hi)
-  %70 = fptrunc double %69 to float
-  %71 = icmp eq i32 %18, 1
-  %72 = fadd fast float %64, 1.000000e+00
-  %73 = fadd fast float %70, 1.000000e+00
-  br i1 %71, label %74, label %81
-
-; <label>:74                                      ; preds = %11
-  %75 = fmul fast float %72, 5.000000e-01
-  %76 = fmul fast float %73, 5.000000e-01
-  %77 = fadd fast float %14, -1.000000e+00
-  %78 = fadd fast float %16, -1.000000e+00
-  %79 = fmul fast float %75, %77
-  %80 = fmul fast float %76, %78
-  br label %88
-
-; <label>:81                                      ; preds = %11
-  %82 = fmul fast float %14, %72
-  %83 = fmul fast float %73, %16
-  %84 = fadd fast float %82, -1.000000e+00
-  %85 = fadd fast float %83, -1.000000e+00
-  %86 = fmul fast float %84, 5.000000e-01
-  %87 = fmul fast float %85, 5.000000e-01
-  br label %88
-
-; <label>:88                                      ; preds = %81, %74
-  %89 = phi float [ %79, %74 ], [ %86, %81 ]
-  %90 = phi float [ %80, %74 ], [ %87, %81 ]
-  %91 = extractvalue %dx.types.CBufRet.i32 %6, 2
-  %92 = icmp eq i32 %91, 1
-  br i1 %92, label %93, label %96
-
-; <label>:93                                      ; preds = %88
-  %94 = call float @dx.op.unary.f32(i32 26, float %89)  ; Round_ne(value)
-  %95 = call float @dx.op.unary.f32(i32 26, float %90)  ; Round_ne(value)
-  br label %96
-
-; <label>:96                                      ; preds = %93, %88
-  %97 = phi float [ %94, %93 ], [ %89, %88 ]
-  %98 = phi float [ %95, %93 ], [ %90, %88 ]
-  %99 = fcmp fast olt float %97, %20
-  %100 = fcmp fast ogt float %97, %22
-  %101 = or i1 %99, %100
-  %102 = fcmp fast olt float %98, %20
-  %103 = or i1 %101, %102
-  %104 = fcmp fast ogt float %98, %24
-  %105 = or i1 %104, %103
-  br i1 %105, label %106, label %179
-
-; <label>:106                                     ; preds = %96
-  %107 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %108 = icmp eq i32 %107, 1
-  br i1 %108, label %109, label %118
-
-; <label>:109                                     ; preds = %106
-  %110 = add i32 %13, -1
-  %111 = uitofp i32 %110 to float
-  %112 = call float @dx.op.binary.f32(i32 35, float %97, float 0.000000e+00)  ; FMax(a,b)
-  %113 = call float @dx.op.binary.f32(i32 36, float %112, float %111)  ; FMin(a,b)
-  %114 = add i32 %15, -1
-  %115 = uitofp i32 %114 to float
-  %116 = call float @dx.op.binary.f32(i32 35, float %98, float 0.000000e+00)  ; FMax(a,b)
-  %117 = call float @dx.op.binary.f32(i32 36, float %116, float %115)  ; FMin(a,b)
-  br label %179
-
-; <label>:118                                     ; preds = %106
-  %119 = icmp eq i32 %107, 2
-  br i1 %119, label %120, label %179
-
-; <label>:120                                     ; preds = %118
-  %121 = fsub fast float %22, %20
-  br i1 %99, label %122, label %135
-
-; <label>:122                                     ; preds = %120
-  %123 = fsub fast float %20, %97
-  %124 = fdiv fast float %123, %121
-  %125 = fptoui float %124 to i32
-  %126 = uitofp i32 %125 to float
-  %127 = fmul fast float %126, %121
-  %128 = fsub fast float %123, %127
-  %129 = and i32 %125, 1
-  %130 = icmp eq i32 %129, 0
-  br i1 %130, label %131, label %133
-
-; <label>:131                                     ; preds = %122
-  %132 = fadd fast float %128, %20
-  br label %149
-
-; <label>:133                                     ; preds = %122
-  %134 = fsub fast float %22, %128
-  br label %149
-
-; <label>:135                                     ; preds = %120
-  br i1 %100, label %136, label %149
-
-; <label>:136                                     ; preds = %135
-  %137 = fsub fast float %97, %22
-  %138 = fdiv fast float %137, %121
-  %139 = fptoui float %138 to i32
-  %140 = uitofp i32 %139 to float
-  %141 = fmul fast float %140, %121
-  %142 = fsub fast float %137, %141
-  %143 = and i32 %139, 1
-  %144 = icmp eq i32 %143, 0
-  br i1 %144, label %145, label %147
-
-; <label>:145                                     ; preds = %136
-  %146 = fsub fast float %22, %142
-  br label %149
-
-; <label>:147                                     ; preds = %136
-  %148 = fadd fast float %142, %20
-  br label %149
-
-; <label>:149                                     ; preds = %147, %145, %135, %133, %131
-  %150 = phi float [ %132, %131 ], [ %134, %133 ], [ %146, %145 ], [ %148, %147 ], [ %97, %135 ]
-  %151 = fsub fast float %24, %20
-  br i1 %102, label %152, label %165
-
-; <label>:152                                     ; preds = %149
-  %153 = fsub fast float %20, %98
-  %154 = fdiv fast float %153, %151
-  %155 = fptoui float %154 to i32
-  %156 = uitofp i32 %155 to float
-  %157 = fmul fast float %156, %151
-  %158 = fsub fast float %153, %157
-  %159 = and i32 %155, 1
-  %160 = icmp eq i32 %159, 0
-  br i1 %160, label %161, label %163
-
-; <label>:161                                     ; preds = %152
-  %162 = fadd fast float %158, %20
-  br label %179
-
-; <label>:163                                     ; preds = %152
-  %164 = fsub fast float %24, %158
-  br label %179
-
-; <label>:165                                     ; preds = %149
-  br i1 %104, label %166, label %179
-
-; <label>:166                                     ; preds = %165
-  %167 = fsub fast float %98, %24
-  %168 = fdiv fast float %167, %151
-  %169 = fptoui float %168 to i32
-  %170 = uitofp i32 %169 to float
-  %171 = fmul fast float %170, %151
-  %172 = fsub fast float %167, %171
-  %173 = and i32 %169, 1
-  %174 = icmp eq i32 %173, 0
-  br i1 %174, label %175, label %177
-
-; <label>:175                                     ; preds = %166
-  %176 = fsub fast float %24, %172
-  br label %179
-
-; <label>:177                                     ; preds = %166
-  %178 = fadd fast float %172, %20
-  br label %179
-
-; <label>:179                                     ; preds = %177, %175, %165, %163, %161, %118, %109, %96
-  %180 = phi float [ %113, %109 ], [ %97, %118 ], [ %97, %96 ], [ %150, %177 ], [ %150, %175 ], [ %150, %165 ], [ %150, %163 ], [ %150, %161 ]
-  %181 = phi float [ %117, %109 ], [ %98, %118 ], [ %98, %96 ], [ %178, %177 ], [ %176, %175 ], [ %98, %165 ], [ %164, %163 ], [ %162, %161 ]
-  %182 = uitofp i32 %37 to float
-  br i1 %92, label %183, label %332
-
-; <label>:183                                     ; preds = %179
-  %184 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %185 = icmp eq i32 %184, 0
-  br i1 %185, label %186, label %211
-
-; <label>:186                                     ; preds = %183
-  %187 = fcmp fast oge float %180, 0.000000e+00
-  %188 = fptoui float %180 to i32
-  %189 = icmp ult i32 %188, %13
-  %190 = and i1 %187, %189
-  %191 = fcmp fast oge float %181, 0.000000e+00
-  %192 = and i1 %191, %190
-  %193 = fptoui float %181 to i32
-  %194 = icmp ult i32 %193, %15
-  %195 = and i1 %194, %192
-  br i1 %195, label %196, label %329
-
-; <label>:196                                     ; preds = %186
-  %197 = fptoui float %45 to i32
-  %198 = fptoui float %182 to i32
-  %199 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %200 = extractvalue %dx.types.CBufRet.i32 %199, 0
-  %201 = extractvalue %dx.types.CBufRet.i32 %199, 1
-  %202 = extractvalue %dx.types.CBufRet.i32 %199, 2
-  %203 = extractvalue %dx.types.CBufRet.i32 %199, 3
-  %204 = mul i32 %200, %197
-  %205 = call i32 @dx.op.tertiary.i32(i32 48, i32 %198, i32 %201, i32 %204)  ; IMad(a,b,c)
-  %206 = call i32 @dx.op.tertiary.i32(i32 48, i32 %193, i32 %202, i32 %205)  ; IMad(a,b,c)
-  %207 = call i32 @dx.op.tertiary.i32(i32 48, i32 %188, i32 %203, i32 %206)  ; IMad(a,b,c)
-  %208 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %207, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %209 = extractvalue %dx.types.ResRet.i32 %208, 0
-  %210 = uitofp i32 %209 to float
-  br label %329
-
-; <label>:211                                     ; preds = %183
-  %212 = icmp eq i32 %184, 1
-  br i1 %212, label %213, label %242
-
-; <label>:213                                     ; preds = %211
-  %214 = add i32 %13, -1
-  %215 = uitofp i32 %214 to float
-  %216 = call float @dx.op.binary.f32(i32 35, float %180, float 0.000000e+00)  ; FMax(a,b)
-  %217 = call float @dx.op.binary.f32(i32 36, float %216, float %215)  ; FMin(a,b)
-  %218 = fptoui float %217 to i32
-  %219 = add i32 %15, -1
-  %220 = uitofp i32 %219 to float
-  %221 = call float @dx.op.binary.f32(i32 35, float %181, float 0.000000e+00)  ; FMax(a,b)
-  %222 = call float @dx.op.binary.f32(i32 36, float %221, float %220)  ; FMin(a,b)
-  %223 = fptoui float %222 to i32
-  %224 = uitofp i32 %223 to float
-  %225 = uitofp i32 %218 to float
-  %226 = fptoui float %45 to i32
-  %227 = fptoui float %182 to i32
-  %228 = fptoui float %224 to i32
-  %229 = fptoui float %225 to i32
-  %230 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %231 = extractvalue %dx.types.CBufRet.i32 %230, 0
-  %232 = extractvalue %dx.types.CBufRet.i32 %230, 1
-  %233 = extractvalue %dx.types.CBufRet.i32 %230, 2
-  %234 = extractvalue %dx.types.CBufRet.i32 %230, 3
-  %235 = mul i32 %231, %226
-  %236 = call i32 @dx.op.tertiary.i32(i32 48, i32 %227, i32 %232, i32 %235)  ; IMad(a,b,c)
-  %237 = call i32 @dx.op.tertiary.i32(i32 48, i32 %228, i32 %233, i32 %236)  ; IMad(a,b,c)
-  %238 = call i32 @dx.op.tertiary.i32(i32 48, i32 %229, i32 %234, i32 %237)  ; IMad(a,b,c)
-  %239 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %238, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %240 = extractvalue %dx.types.ResRet.i32 %239, 0
-  %241 = uitofp i32 %240 to float
-  br label %329
-
-; <label>:242                                     ; preds = %211
-  %243 = icmp eq i32 %184, 2
-  br i1 %243, label %244, label %329
-
-; <label>:244                                     ; preds = %242
-  %245 = fsub fast float %22, %20
-  %246 = fcmp fast olt float %180, %20
-  br i1 %246, label %247, label %260
-
-; <label>:247                                     ; preds = %244
-  %248 = fsub fast float %20, %180
-  %249 = fdiv fast float %248, %245
-  %250 = fptoui float %249 to i32
-  %251 = uitofp i32 %250 to float
-  %252 = fmul fast float %251, %245
-  %253 = fsub fast float %248, %252
-  %254 = and i32 %250, 1
-  %255 = icmp eq i32 %254, 0
-  br i1 %255, label %256, label %258
-
-; <label>:256                                     ; preds = %247
-  %257 = fadd fast float %253, %20
-  br label %275
-
-; <label>:258                                     ; preds = %247
-  %259 = fsub fast float %22, %253
-  br label %275
-
-; <label>:260                                     ; preds = %244
-  %261 = fcmp fast ogt float %180, %22
-  br i1 %261, label %262, label %275
-
-; <label>:262                                     ; preds = %260
-  %263 = fsub fast float %180, %22
-  %264 = fdiv fast float %263, %245
-  %265 = fptoui float %264 to i32
-  %266 = uitofp i32 %265 to float
-  %267 = fmul fast float %266, %245
-  %268 = fsub fast float %263, %267
-  %269 = and i32 %265, 1
-  %270 = icmp eq i32 %269, 0
-  br i1 %270, label %271, label %273
-
-; <label>:271                                     ; preds = %262
-  %272 = fsub fast float %22, %268
-  br label %275
-
-; <label>:273                                     ; preds = %262
-  %274 = fadd fast float %268, %20
-  br label %275
-
-; <label>:275                                     ; preds = %273, %271, %260, %258, %256
-  %276 = phi float [ %257, %256 ], [ %259, %258 ], [ %272, %271 ], [ %274, %273 ], [ %180, %260 ]
-  %277 = fptoui float %276 to i32
-  %278 = fsub fast float %24, %20
-  %279 = fcmp fast olt float %181, %20
-  br i1 %279, label %280, label %293
-
-; <label>:280                                     ; preds = %275
-  %281 = fsub fast float %20, %181
-  %282 = fdiv fast float %281, %278
-  %283 = fptoui float %282 to i32
-  %284 = uitofp i32 %283 to float
-  %285 = fmul fast float %284, %278
-  %286 = fsub fast float %281, %285
-  %287 = and i32 %283, 1
-  %288 = icmp eq i32 %287, 0
-  br i1 %288, label %289, label %291
-
-; <label>:289                                     ; preds = %280
-  %290 = fadd fast float %286, %20
-  br label %308
-
-; <label>:291                                     ; preds = %280
-  %292 = fsub fast float %24, %286
-  br label %308
-
-; <label>:293                                     ; preds = %275
-  %294 = fcmp fast ogt float %181, %24
-  br i1 %294, label %295, label %308
-
-; <label>:295                                     ; preds = %293
-  %296 = fsub fast float %181, %24
-  %297 = fdiv fast float %296, %278
-  %298 = fptoui float %297 to i32
-  %299 = uitofp i32 %298 to float
-  %300 = fmul fast float %299, %278
-  %301 = fsub fast float %296, %300
-  %302 = and i32 %298, 1
-  %303 = icmp eq i32 %302, 0
-  br i1 %303, label %304, label %306
-
-; <label>:304                                     ; preds = %295
-  %305 = fsub fast float %24, %301
-  br label %308
-
-; <label>:306                                     ; preds = %295
-  %307 = fadd fast float %301, %20
-  br label %308
-
-; <label>:308                                     ; preds = %306, %304, %293, %291, %289
-  %309 = phi float [ %290, %289 ], [ %292, %291 ], [ %305, %304 ], [ %307, %306 ], [ %181, %293 ]
-  %310 = fptoui float %309 to i32
-  %311 = uitofp i32 %310 to float
-  %312 = uitofp i32 %277 to float
-  %313 = fptoui float %45 to i32
-  %314 = fptoui float %182 to i32
-  %315 = fptoui float %311 to i32
-  %316 = fptoui float %312 to i32
-  %317 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %318 = extractvalue %dx.types.CBufRet.i32 %317, 0
-  %319 = extractvalue %dx.types.CBufRet.i32 %317, 1
-  %320 = extractvalue %dx.types.CBufRet.i32 %317, 2
-  %321 = extractvalue %dx.types.CBufRet.i32 %317, 3
-  %322 = mul i32 %318, %313
-  %323 = call i32 @dx.op.tertiary.i32(i32 48, i32 %314, i32 %319, i32 %322)  ; IMad(a,b,c)
-  %324 = call i32 @dx.op.tertiary.i32(i32 48, i32 %315, i32 %320, i32 %323)  ; IMad(a,b,c)
-  %325 = call i32 @dx.op.tertiary.i32(i32 48, i32 %316, i32 %321, i32 %324)  ; IMad(a,b,c)
-  %326 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %325, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %327 = extractvalue %dx.types.ResRet.i32 %326, 0
-  %328 = uitofp i32 %327 to float
-  br label %329
-
-; <label>:329                                     ; preds = %308, %242, %213, %196, %186
-  %330 = phi float [ %210, %196 ], [ 0.000000e+00, %186 ], [ %241, %213 ], [ %328, %308 ], [ 0.000000e+00, %242 ]
-  %331 = fptoui float %330 to i32
-  call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i32 %331, i32 undef, i32 undef, i32 undef, i8 1, i32 4)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3323
-
-; <label>:332                                     ; preds = %179
-  %333 = icmp eq i32 %91, 0
-  br i1 %333, label %334, label %933
-
-; <label>:334                                     ; preds = %332
-  %335 = call float @dx.op.unary.f32(i32 27, float %180)  ; Round_ni(value)
-  %336 = call float @dx.op.unary.f32(i32 27, float %181)  ; Round_ni(value)
-  %337 = fadd fast float %335, 1.000000e+00
-  %338 = fadd fast float %336, 1.000000e+00
-  %339 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %340 = icmp eq i32 %339, 0
-  br i1 %340, label %341, label %366
-
-; <label>:341                                     ; preds = %334
-  %342 = fcmp fast oge float %335, 0.000000e+00
-  %343 = fptoui float %335 to i32
-  %344 = icmp ult i32 %343, %13
-  %345 = and i1 %342, %344
-  %346 = fcmp fast oge float %336, 0.000000e+00
-  %347 = and i1 %346, %345
-  %348 = fptoui float %336 to i32
-  %349 = icmp ult i32 %348, %15
-  %350 = and i1 %349, %347
-  br i1 %350, label %351, label %484
-
-; <label>:351                                     ; preds = %341
-  %352 = fptoui float %45 to i32
-  %353 = fptoui float %182 to i32
-  %354 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %355 = extractvalue %dx.types.CBufRet.i32 %354, 0
-  %356 = extractvalue %dx.types.CBufRet.i32 %354, 1
-  %357 = extractvalue %dx.types.CBufRet.i32 %354, 2
-  %358 = extractvalue %dx.types.CBufRet.i32 %354, 3
-  %359 = mul i32 %355, %352
-  %360 = call i32 @dx.op.tertiary.i32(i32 48, i32 %353, i32 %356, i32 %359)  ; IMad(a,b,c)
-  %361 = call i32 @dx.op.tertiary.i32(i32 48, i32 %348, i32 %357, i32 %360)  ; IMad(a,b,c)
-  %362 = call i32 @dx.op.tertiary.i32(i32 48, i32 %343, i32 %358, i32 %361)  ; IMad(a,b,c)
-  %363 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %362, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %364 = extractvalue %dx.types.ResRet.i32 %363, 0
-  %365 = uitofp i32 %364 to float
-  br label %484
-
-; <label>:366                                     ; preds = %334
-  %367 = icmp eq i32 %339, 1
-  br i1 %367, label %368, label %397
-
-; <label>:368                                     ; preds = %366
-  %369 = add i32 %13, -1
-  %370 = uitofp i32 %369 to float
-  %371 = call float @dx.op.binary.f32(i32 35, float %335, float 0.000000e+00)  ; FMax(a,b)
-  %372 = call float @dx.op.binary.f32(i32 36, float %371, float %370)  ; FMin(a,b)
-  %373 = fptoui float %372 to i32
-  %374 = add i32 %15, -1
-  %375 = uitofp i32 %374 to float
-  %376 = call float @dx.op.binary.f32(i32 35, float %336, float 0.000000e+00)  ; FMax(a,b)
-  %377 = call float @dx.op.binary.f32(i32 36, float %376, float %375)  ; FMin(a,b)
-  %378 = fptoui float %377 to i32
-  %379 = uitofp i32 %378 to float
-  %380 = uitofp i32 %373 to float
-  %381 = fptoui float %45 to i32
-  %382 = fptoui float %182 to i32
-  %383 = fptoui float %379 to i32
-  %384 = fptoui float %380 to i32
-  %385 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %386 = extractvalue %dx.types.CBufRet.i32 %385, 0
-  %387 = extractvalue %dx.types.CBufRet.i32 %385, 1
-  %388 = extractvalue %dx.types.CBufRet.i32 %385, 2
-  %389 = extractvalue %dx.types.CBufRet.i32 %385, 3
-  %390 = mul i32 %386, %381
-  %391 = call i32 @dx.op.tertiary.i32(i32 48, i32 %382, i32 %387, i32 %390)  ; IMad(a,b,c)
-  %392 = call i32 @dx.op.tertiary.i32(i32 48, i32 %383, i32 %388, i32 %391)  ; IMad(a,b,c)
-  %393 = call i32 @dx.op.tertiary.i32(i32 48, i32 %384, i32 %389, i32 %392)  ; IMad(a,b,c)
-  %394 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %393, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %395 = extractvalue %dx.types.ResRet.i32 %394, 0
-  %396 = uitofp i32 %395 to float
-  br label %484
-
-; <label>:397                                     ; preds = %366
-  %398 = icmp eq i32 %339, 2
-  br i1 %398, label %399, label %484
-
-; <label>:399                                     ; preds = %397
-  %400 = fsub fast float %22, %20
-  %401 = fcmp fast olt float %335, %20
-  br i1 %401, label %402, label %415
-
-; <label>:402                                     ; preds = %399
-  %403 = fsub fast float %20, %335
-  %404 = fdiv fast float %403, %400
-  %405 = fptoui float %404 to i32
-  %406 = uitofp i32 %405 to float
-  %407 = fmul fast float %406, %400
-  %408 = fsub fast float %403, %407
-  %409 = and i32 %405, 1
-  %410 = icmp eq i32 %409, 0
-  br i1 %410, label %411, label %413
-
-; <label>:411                                     ; preds = %402
-  %412 = fadd fast float %408, %20
-  br label %430
-
-; <label>:413                                     ; preds = %402
-  %414 = fsub fast float %22, %408
-  br label %430
-
-; <label>:415                                     ; preds = %399
-  %416 = fcmp fast ogt float %335, %22
-  br i1 %416, label %417, label %430
-
-; <label>:417                                     ; preds = %415
-  %418 = fsub fast float %335, %22
-  %419 = fdiv fast float %418, %400
-  %420 = fptoui float %419 to i32
-  %421 = uitofp i32 %420 to float
-  %422 = fmul fast float %421, %400
-  %423 = fsub fast float %418, %422
-  %424 = and i32 %420, 1
-  %425 = icmp eq i32 %424, 0
-  br i1 %425, label %426, label %428
-
-; <label>:426                                     ; preds = %417
-  %427 = fsub fast float %22, %423
-  br label %430
-
-; <label>:428                                     ; preds = %417
-  %429 = fadd fast float %423, %20
-  br label %430
-
-; <label>:430                                     ; preds = %428, %426, %415, %413, %411
-  %431 = phi float [ %412, %411 ], [ %414, %413 ], [ %427, %426 ], [ %429, %428 ], [ %335, %415 ]
-  %432 = fptoui float %431 to i32
-  %433 = fsub fast float %24, %20
-  %434 = fcmp fast olt float %336, %20
-  br i1 %434, label %435, label %448
-
-; <label>:435                                     ; preds = %430
-  %436 = fsub fast float %20, %336
-  %437 = fdiv fast float %436, %433
-  %438 = fptoui float %437 to i32
-  %439 = uitofp i32 %438 to float
-  %440 = fmul fast float %439, %433
-  %441 = fsub fast float %436, %440
-  %442 = and i32 %438, 1
-  %443 = icmp eq i32 %442, 0
-  br i1 %443, label %444, label %446
-
-; <label>:444                                     ; preds = %435
-  %445 = fadd fast float %441, %20
-  br label %463
-
-; <label>:446                                     ; preds = %435
-  %447 = fsub fast float %24, %441
-  br label %463
-
-; <label>:448                                     ; preds = %430
-  %449 = fcmp fast ogt float %336, %24
-  br i1 %449, label %450, label %463
-
-; <label>:450                                     ; preds = %448
-  %451 = fsub fast float %336, %24
-  %452 = fdiv fast float %451, %433
-  %453 = fptoui float %452 to i32
-  %454 = uitofp i32 %453 to float
-  %455 = fmul fast float %454, %433
-  %456 = fsub fast float %451, %455
-  %457 = and i32 %453, 1
-  %458 = icmp eq i32 %457, 0
-  br i1 %458, label %459, label %461
-
-; <label>:459                                     ; preds = %450
-  %460 = fsub fast float %24, %456
-  br label %463
-
-; <label>:461                                     ; preds = %450
-  %462 = fadd fast float %456, %20
-  br label %463
-
-; <label>:463                                     ; preds = %461, %459, %448, %446, %444
-  %464 = phi float [ %445, %444 ], [ %447, %446 ], [ %460, %459 ], [ %462, %461 ], [ %336, %448 ]
-  %465 = fptoui float %464 to i32
-  %466 = uitofp i32 %465 to float
-  %467 = uitofp i32 %432 to float
-  %468 = fptoui float %45 to i32
-  %469 = fptoui float %182 to i32
-  %470 = fptoui float %466 to i32
-  %471 = fptoui float %467 to i32
-  %472 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %473 = extractvalue %dx.types.CBufRet.i32 %472, 0
-  %474 = extractvalue %dx.types.CBufRet.i32 %472, 1
-  %475 = extractvalue %dx.types.CBufRet.i32 %472, 2
-  %476 = extractvalue %dx.types.CBufRet.i32 %472, 3
-  %477 = mul i32 %473, %468
-  %478 = call i32 @dx.op.tertiary.i32(i32 48, i32 %469, i32 %474, i32 %477)  ; IMad(a,b,c)
-  %479 = call i32 @dx.op.tertiary.i32(i32 48, i32 %470, i32 %475, i32 %478)  ; IMad(a,b,c)
-  %480 = call i32 @dx.op.tertiary.i32(i32 48, i32 %471, i32 %476, i32 %479)  ; IMad(a,b,c)
-  %481 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %480, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %482 = extractvalue %dx.types.ResRet.i32 %481, 0
-  %483 = uitofp i32 %482 to float
-  br label %484
-
-; <label>:484                                     ; preds = %463, %397, %368, %351, %341
-  %485 = phi float [ %365, %351 ], [ 0.000000e+00, %341 ], [ %396, %368 ], [ %483, %463 ], [ 0.000000e+00, %397 ]
-  br i1 %340, label %486, label %511
-
-; <label>:486                                     ; preds = %484
-  %487 = fcmp fast oge float %337, 0.000000e+00
-  %488 = fptoui float %337 to i32
-  %489 = icmp ult i32 %488, %13
-  %490 = and i1 %487, %489
-  %491 = fcmp fast oge float %336, 0.000000e+00
-  %492 = and i1 %491, %490
-  %493 = fptoui float %336 to i32
-  %494 = icmp ult i32 %493, %15
-  %495 = and i1 %494, %492
-  br i1 %495, label %496, label %629
-
-; <label>:496                                     ; preds = %486
-  %497 = fptoui float %45 to i32
-  %498 = fptoui float %182 to i32
-  %499 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %500 = extractvalue %dx.types.CBufRet.i32 %499, 0
-  %501 = extractvalue %dx.types.CBufRet.i32 %499, 1
-  %502 = extractvalue %dx.types.CBufRet.i32 %499, 2
-  %503 = extractvalue %dx.types.CBufRet.i32 %499, 3
-  %504 = mul i32 %500, %497
-  %505 = call i32 @dx.op.tertiary.i32(i32 48, i32 %498, i32 %501, i32 %504)  ; IMad(a,b,c)
-  %506 = call i32 @dx.op.tertiary.i32(i32 48, i32 %493, i32 %502, i32 %505)  ; IMad(a,b,c)
-  %507 = call i32 @dx.op.tertiary.i32(i32 48, i32 %488, i32 %503, i32 %506)  ; IMad(a,b,c)
-  %508 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %507, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %509 = extractvalue %dx.types.ResRet.i32 %508, 0
-  %510 = uitofp i32 %509 to float
-  br label %629
-
-; <label>:511                                     ; preds = %484
-  %512 = icmp eq i32 %339, 1
-  br i1 %512, label %513, label %542
-
-; <label>:513                                     ; preds = %511
-  %514 = add i32 %13, -1
-  %515 = uitofp i32 %514 to float
-  %516 = call float @dx.op.binary.f32(i32 35, float %337, float 0.000000e+00)  ; FMax(a,b)
-  %517 = call float @dx.op.binary.f32(i32 36, float %516, float %515)  ; FMin(a,b)
-  %518 = fptoui float %517 to i32
-  %519 = add i32 %15, -1
-  %520 = uitofp i32 %519 to float
-  %521 = call float @dx.op.binary.f32(i32 35, float %336, float 0.000000e+00)  ; FMax(a,b)
-  %522 = call float @dx.op.binary.f32(i32 36, float %521, float %520)  ; FMin(a,b)
-  %523 = fptoui float %522 to i32
-  %524 = uitofp i32 %523 to float
-  %525 = uitofp i32 %518 to float
-  %526 = fptoui float %45 to i32
-  %527 = fptoui float %182 to i32
-  %528 = fptoui float %524 to i32
-  %529 = fptoui float %525 to i32
-  %530 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %531 = extractvalue %dx.types.CBufRet.i32 %530, 0
-  %532 = extractvalue %dx.types.CBufRet.i32 %530, 1
-  %533 = extractvalue %dx.types.CBufRet.i32 %530, 2
-  %534 = extractvalue %dx.types.CBufRet.i32 %530, 3
-  %535 = mul i32 %531, %526
-  %536 = call i32 @dx.op.tertiary.i32(i32 48, i32 %527, i32 %532, i32 %535)  ; IMad(a,b,c)
-  %537 = call i32 @dx.op.tertiary.i32(i32 48, i32 %528, i32 %533, i32 %536)  ; IMad(a,b,c)
-  %538 = call i32 @dx.op.tertiary.i32(i32 48, i32 %529, i32 %534, i32 %537)  ; IMad(a,b,c)
-  %539 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %538, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %540 = extractvalue %dx.types.ResRet.i32 %539, 0
-  %541 = uitofp i32 %540 to float
-  br label %629
-
-; <label>:542                                     ; preds = %511
-  %543 = icmp eq i32 %339, 2
-  br i1 %543, label %544, label %629
-
-; <label>:544                                     ; preds = %542
-  %545 = fsub fast float %22, %20
-  %546 = fcmp fast olt float %337, %20
-  br i1 %546, label %547, label %560
-
-; <label>:547                                     ; preds = %544
-  %548 = fsub fast float %20, %337
-  %549 = fdiv fast float %548, %545
-  %550 = fptoui float %549 to i32
-  %551 = uitofp i32 %550 to float
-  %552 = fmul fast float %551, %545
-  %553 = fsub fast float %548, %552
-  %554 = and i32 %550, 1
-  %555 = icmp eq i32 %554, 0
-  br i1 %555, label %556, label %558
-
-; <label>:556                                     ; preds = %547
-  %557 = fadd fast float %553, %20
-  br label %575
-
-; <label>:558                                     ; preds = %547
-  %559 = fsub fast float %22, %553
-  br label %575
-
-; <label>:560                                     ; preds = %544
-  %561 = fcmp fast ogt float %337, %22
-  br i1 %561, label %562, label %575
-
-; <label>:562                                     ; preds = %560
-  %563 = fsub fast float %337, %22
-  %564 = fdiv fast float %563, %545
-  %565 = fptoui float %564 to i32
-  %566 = uitofp i32 %565 to float
-  %567 = fmul fast float %566, %545
-  %568 = fsub fast float %563, %567
-  %569 = and i32 %565, 1
-  %570 = icmp eq i32 %569, 0
-  br i1 %570, label %571, label %573
-
-; <label>:571                                     ; preds = %562
-  %572 = fsub fast float %22, %568
-  br label %575
-
-; <label>:573                                     ; preds = %562
-  %574 = fadd fast float %568, %20
-  br label %575
-
-; <label>:575                                     ; preds = %573, %571, %560, %558, %556
-  %576 = phi float [ %557, %556 ], [ %559, %558 ], [ %572, %571 ], [ %574, %573 ], [ %337, %560 ]
-  %577 = fptoui float %576 to i32
-  %578 = fsub fast float %24, %20
-  %579 = fcmp fast olt float %336, %20
-  br i1 %579, label %580, label %593
-
-; <label>:580                                     ; preds = %575
-  %581 = fsub fast float %20, %336
-  %582 = fdiv fast float %581, %578
-  %583 = fptoui float %582 to i32
-  %584 = uitofp i32 %583 to float
-  %585 = fmul fast float %584, %578
-  %586 = fsub fast float %581, %585
-  %587 = and i32 %583, 1
-  %588 = icmp eq i32 %587, 0
-  br i1 %588, label %589, label %591
-
-; <label>:589                                     ; preds = %580
-  %590 = fadd fast float %586, %20
-  br label %608
-
-; <label>:591                                     ; preds = %580
-  %592 = fsub fast float %24, %586
-  br label %608
-
-; <label>:593                                     ; preds = %575
-  %594 = fcmp fast ogt float %336, %24
-  br i1 %594, label %595, label %608
-
-; <label>:595                                     ; preds = %593
-  %596 = fsub fast float %336, %24
-  %597 = fdiv fast float %596, %578
-  %598 = fptoui float %597 to i32
-  %599 = uitofp i32 %598 to float
-  %600 = fmul fast float %599, %578
-  %601 = fsub fast float %596, %600
-  %602 = and i32 %598, 1
-  %603 = icmp eq i32 %602, 0
-  br i1 %603, label %604, label %606
-
-; <label>:604                                     ; preds = %595
-  %605 = fsub fast float %24, %601
-  br label %608
-
-; <label>:606                                     ; preds = %595
-  %607 = fadd fast float %601, %20
-  br label %608
-
-; <label>:608                                     ; preds = %606, %604, %593, %591, %589
-  %609 = phi float [ %590, %589 ], [ %592, %591 ], [ %605, %604 ], [ %607, %606 ], [ %336, %593 ]
-  %610 = fptoui float %609 to i32
-  %611 = uitofp i32 %610 to float
-  %612 = uitofp i32 %577 to float
-  %613 = fptoui float %45 to i32
-  %614 = fptoui float %182 to i32
-  %615 = fptoui float %611 to i32
-  %616 = fptoui float %612 to i32
-  %617 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %618 = extractvalue %dx.types.CBufRet.i32 %617, 0
-  %619 = extractvalue %dx.types.CBufRet.i32 %617, 1
-  %620 = extractvalue %dx.types.CBufRet.i32 %617, 2
-  %621 = extractvalue %dx.types.CBufRet.i32 %617, 3
-  %622 = mul i32 %618, %613
-  %623 = call i32 @dx.op.tertiary.i32(i32 48, i32 %614, i32 %619, i32 %622)  ; IMad(a,b,c)
-  %624 = call i32 @dx.op.tertiary.i32(i32 48, i32 %615, i32 %620, i32 %623)  ; IMad(a,b,c)
-  %625 = call i32 @dx.op.tertiary.i32(i32 48, i32 %616, i32 %621, i32 %624)  ; IMad(a,b,c)
-  %626 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %625, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %627 = extractvalue %dx.types.ResRet.i32 %626, 0
-  %628 = uitofp i32 %627 to float
-  br label %629
-
-; <label>:629                                     ; preds = %608, %542, %513, %496, %486
-  %630 = phi float [ %510, %496 ], [ 0.000000e+00, %486 ], [ %541, %513 ], [ %628, %608 ], [ 0.000000e+00, %542 ]
-  br i1 %340, label %631, label %656
-
-; <label>:631                                     ; preds = %629
-  %632 = fcmp fast oge float %335, 0.000000e+00
-  %633 = fptoui float %335 to i32
-  %634 = icmp ult i32 %633, %13
-  %635 = and i1 %632, %634
-  %636 = fcmp fast oge float %338, 0.000000e+00
-  %637 = and i1 %636, %635
-  %638 = fptoui float %338 to i32
-  %639 = icmp ult i32 %638, %15
-  %640 = and i1 %639, %637
-  br i1 %640, label %641, label %774
-
-; <label>:641                                     ; preds = %631
-  %642 = fptoui float %45 to i32
-  %643 = fptoui float %182 to i32
-  %644 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %645 = extractvalue %dx.types.CBufRet.i32 %644, 0
-  %646 = extractvalue %dx.types.CBufRet.i32 %644, 1
-  %647 = extractvalue %dx.types.CBufRet.i32 %644, 2
-  %648 = extractvalue %dx.types.CBufRet.i32 %644, 3
-  %649 = mul i32 %645, %642
-  %650 = call i32 @dx.op.tertiary.i32(i32 48, i32 %643, i32 %646, i32 %649)  ; IMad(a,b,c)
-  %651 = call i32 @dx.op.tertiary.i32(i32 48, i32 %638, i32 %647, i32 %650)  ; IMad(a,b,c)
-  %652 = call i32 @dx.op.tertiary.i32(i32 48, i32 %633, i32 %648, i32 %651)  ; IMad(a,b,c)
-  %653 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %652, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %654 = extractvalue %dx.types.ResRet.i32 %653, 0
-  %655 = uitofp i32 %654 to float
-  br label %774
-
-; <label>:656                                     ; preds = %629
-  %657 = icmp eq i32 %339, 1
-  br i1 %657, label %658, label %687
-
-; <label>:658                                     ; preds = %656
-  %659 = add i32 %13, -1
-  %660 = uitofp i32 %659 to float
-  %661 = call float @dx.op.binary.f32(i32 35, float %335, float 0.000000e+00)  ; FMax(a,b)
-  %662 = call float @dx.op.binary.f32(i32 36, float %661, float %660)  ; FMin(a,b)
-  %663 = fptoui float %662 to i32
-  %664 = add i32 %15, -1
-  %665 = uitofp i32 %664 to float
-  %666 = call float @dx.op.binary.f32(i32 35, float %338, float 0.000000e+00)  ; FMax(a,b)
-  %667 = call float @dx.op.binary.f32(i32 36, float %666, float %665)  ; FMin(a,b)
-  %668 = fptoui float %667 to i32
-  %669 = uitofp i32 %668 to float
-  %670 = uitofp i32 %663 to float
-  %671 = fptoui float %45 to i32
-  %672 = fptoui float %182 to i32
-  %673 = fptoui float %669 to i32
-  %674 = fptoui float %670 to i32
-  %675 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %676 = extractvalue %dx.types.CBufRet.i32 %675, 0
-  %677 = extractvalue %dx.types.CBufRet.i32 %675, 1
-  %678 = extractvalue %dx.types.CBufRet.i32 %675, 2
-  %679 = extractvalue %dx.types.CBufRet.i32 %675, 3
-  %680 = mul i32 %676, %671
-  %681 = call i32 @dx.op.tertiary.i32(i32 48, i32 %672, i32 %677, i32 %680)  ; IMad(a,b,c)
-  %682 = call i32 @dx.op.tertiary.i32(i32 48, i32 %673, i32 %678, i32 %681)  ; IMad(a,b,c)
-  %683 = call i32 @dx.op.tertiary.i32(i32 48, i32 %674, i32 %679, i32 %682)  ; IMad(a,b,c)
-  %684 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %683, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %685 = extractvalue %dx.types.ResRet.i32 %684, 0
-  %686 = uitofp i32 %685 to float
-  br label %774
-
-; <label>:687                                     ; preds = %656
-  %688 = icmp eq i32 %339, 2
-  br i1 %688, label %689, label %774
-
-; <label>:689                                     ; preds = %687
-  %690 = fsub fast float %22, %20
-  %691 = fcmp fast olt float %335, %20
-  br i1 %691, label %692, label %705
-
-; <label>:692                                     ; preds = %689
-  %693 = fsub fast float %20, %335
-  %694 = fdiv fast float %693, %690
-  %695 = fptoui float %694 to i32
-  %696 = uitofp i32 %695 to float
-  %697 = fmul fast float %696, %690
-  %698 = fsub fast float %693, %697
-  %699 = and i32 %695, 1
-  %700 = icmp eq i32 %699, 0
-  br i1 %700, label %701, label %703
-
-; <label>:701                                     ; preds = %692
-  %702 = fadd fast float %698, %20
-  br label %720
-
-; <label>:703                                     ; preds = %692
-  %704 = fsub fast float %22, %698
-  br label %720
-
-; <label>:705                                     ; preds = %689
-  %706 = fcmp fast ogt float %335, %22
-  br i1 %706, label %707, label %720
-
-; <label>:707                                     ; preds = %705
-  %708 = fsub fast float %335, %22
-  %709 = fdiv fast float %708, %690
-  %710 = fptoui float %709 to i32
-  %711 = uitofp i32 %710 to float
-  %712 = fmul fast float %711, %690
-  %713 = fsub fast float %708, %712
-  %714 = and i32 %710, 1
-  %715 = icmp eq i32 %714, 0
-  br i1 %715, label %716, label %718
-
-; <label>:716                                     ; preds = %707
-  %717 = fsub fast float %22, %713
-  br label %720
-
-; <label>:718                                     ; preds = %707
-  %719 = fadd fast float %713, %20
-  br label %720
-
-; <label>:720                                     ; preds = %718, %716, %705, %703, %701
-  %721 = phi float [ %702, %701 ], [ %704, %703 ], [ %717, %716 ], [ %719, %718 ], [ %335, %705 ]
-  %722 = fptoui float %721 to i32
-  %723 = fsub fast float %24, %20
-  %724 = fcmp fast olt float %338, %20
-  br i1 %724, label %725, label %738
-
-; <label>:725                                     ; preds = %720
-  %726 = fsub fast float %20, %338
-  %727 = fdiv fast float %726, %723
-  %728 = fptoui float %727 to i32
-  %729 = uitofp i32 %728 to float
-  %730 = fmul fast float %729, %723
-  %731 = fsub fast float %726, %730
-  %732 = and i32 %728, 1
-  %733 = icmp eq i32 %732, 0
-  br i1 %733, label %734, label %736
-
-; <label>:734                                     ; preds = %725
-  %735 = fadd fast float %731, %20
-  br label %753
-
-; <label>:736                                     ; preds = %725
-  %737 = fsub fast float %24, %731
-  br label %753
-
-; <label>:738                                     ; preds = %720
-  %739 = fcmp fast ogt float %338, %24
-  br i1 %739, label %740, label %753
-
-; <label>:740                                     ; preds = %738
-  %741 = fsub fast float %338, %24
-  %742 = fdiv fast float %741, %723
-  %743 = fptoui float %742 to i32
-  %744 = uitofp i32 %743 to float
-  %745 = fmul fast float %744, %723
-  %746 = fsub fast float %741, %745
-  %747 = and i32 %743, 1
-  %748 = icmp eq i32 %747, 0
-  br i1 %748, label %749, label %751
-
-; <label>:749                                     ; preds = %740
-  %750 = fsub fast float %24, %746
-  br label %753
-
-; <label>:751                                     ; preds = %740
-  %752 = fadd fast float %746, %20
-  br label %753
-
-; <label>:753                                     ; preds = %751, %749, %738, %736, %734
-  %754 = phi float [ %735, %734 ], [ %737, %736 ], [ %750, %749 ], [ %752, %751 ], [ %338, %738 ]
-  %755 = fptoui float %754 to i32
-  %756 = uitofp i32 %755 to float
-  %757 = uitofp i32 %722 to float
-  %758 = fptoui float %45 to i32
-  %759 = fptoui float %182 to i32
-  %760 = fptoui float %756 to i32
-  %761 = fptoui float %757 to i32
-  %762 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %763 = extractvalue %dx.types.CBufRet.i32 %762, 0
-  %764 = extractvalue %dx.types.CBufRet.i32 %762, 1
-  %765 = extractvalue %dx.types.CBufRet.i32 %762, 2
-  %766 = extractvalue %dx.types.CBufRet.i32 %762, 3
-  %767 = mul i32 %763, %758
-  %768 = call i32 @dx.op.tertiary.i32(i32 48, i32 %759, i32 %764, i32 %767)  ; IMad(a,b,c)
-  %769 = call i32 @dx.op.tertiary.i32(i32 48, i32 %760, i32 %765, i32 %768)  ; IMad(a,b,c)
-  %770 = call i32 @dx.op.tertiary.i32(i32 48, i32 %761, i32 %766, i32 %769)  ; IMad(a,b,c)
-  %771 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %770, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %772 = extractvalue %dx.types.ResRet.i32 %771, 0
-  %773 = uitofp i32 %772 to float
-  br label %774
-
-; <label>:774                                     ; preds = %753, %687, %658, %641, %631
-  %775 = phi float [ %655, %641 ], [ 0.000000e+00, %631 ], [ %686, %658 ], [ %773, %753 ], [ 0.000000e+00, %687 ]
-  br i1 %340, label %776, label %801
-
-; <label>:776                                     ; preds = %774
-  %777 = fcmp fast oge float %337, 0.000000e+00
-  %778 = fptoui float %337 to i32
-  %779 = icmp ult i32 %778, %13
-  %780 = and i1 %777, %779
-  %781 = fcmp fast oge float %338, 0.000000e+00
-  %782 = and i1 %781, %780
-  %783 = fptoui float %338 to i32
-  %784 = icmp ult i32 %783, %15
-  %785 = and i1 %784, %782
-  br i1 %785, label %786, label %919
-
-; <label>:786                                     ; preds = %776
-  %787 = fptoui float %45 to i32
-  %788 = fptoui float %182 to i32
-  %789 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %790 = extractvalue %dx.types.CBufRet.i32 %789, 0
-  %791 = extractvalue %dx.types.CBufRet.i32 %789, 1
-  %792 = extractvalue %dx.types.CBufRet.i32 %789, 2
-  %793 = extractvalue %dx.types.CBufRet.i32 %789, 3
-  %794 = mul i32 %790, %787
-  %795 = call i32 @dx.op.tertiary.i32(i32 48, i32 %788, i32 %791, i32 %794)  ; IMad(a,b,c)
-  %796 = call i32 @dx.op.tertiary.i32(i32 48, i32 %783, i32 %792, i32 %795)  ; IMad(a,b,c)
-  %797 = call i32 @dx.op.tertiary.i32(i32 48, i32 %778, i32 %793, i32 %796)  ; IMad(a,b,c)
-  %798 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %797, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %799 = extractvalue %dx.types.ResRet.i32 %798, 0
-  %800 = uitofp i32 %799 to float
-  br label %919
-
-; <label>:801                                     ; preds = %774
-  %802 = icmp eq i32 %339, 1
-  br i1 %802, label %803, label %832
-
-; <label>:803                                     ; preds = %801
-  %804 = add i32 %13, -1
-  %805 = uitofp i32 %804 to float
-  %806 = call float @dx.op.binary.f32(i32 35, float %337, float 0.000000e+00)  ; FMax(a,b)
-  %807 = call float @dx.op.binary.f32(i32 36, float %806, float %805)  ; FMin(a,b)
-  %808 = fptoui float %807 to i32
-  %809 = add i32 %15, -1
-  %810 = uitofp i32 %809 to float
-  %811 = call float @dx.op.binary.f32(i32 35, float %338, float 0.000000e+00)  ; FMax(a,b)
-  %812 = call float @dx.op.binary.f32(i32 36, float %811, float %810)  ; FMin(a,b)
-  %813 = fptoui float %812 to i32
-  %814 = uitofp i32 %813 to float
-  %815 = uitofp i32 %808 to float
-  %816 = fptoui float %45 to i32
-  %817 = fptoui float %182 to i32
-  %818 = fptoui float %814 to i32
-  %819 = fptoui float %815 to i32
-  %820 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %821 = extractvalue %dx.types.CBufRet.i32 %820, 0
-  %822 = extractvalue %dx.types.CBufRet.i32 %820, 1
-  %823 = extractvalue %dx.types.CBufRet.i32 %820, 2
-  %824 = extractvalue %dx.types.CBufRet.i32 %820, 3
-  %825 = mul i32 %821, %816
-  %826 = call i32 @dx.op.tertiary.i32(i32 48, i32 %817, i32 %822, i32 %825)  ; IMad(a,b,c)
-  %827 = call i32 @dx.op.tertiary.i32(i32 48, i32 %818, i32 %823, i32 %826)  ; IMad(a,b,c)
-  %828 = call i32 @dx.op.tertiary.i32(i32 48, i32 %819, i32 %824, i32 %827)  ; IMad(a,b,c)
-  %829 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %828, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %830 = extractvalue %dx.types.ResRet.i32 %829, 0
-  %831 = uitofp i32 %830 to float
-  br label %919
-
-; <label>:832                                     ; preds = %801
-  %833 = icmp eq i32 %339, 2
-  br i1 %833, label %834, label %919
-
-; <label>:834                                     ; preds = %832
-  %835 = fsub fast float %22, %20
-  %836 = fcmp fast olt float %337, %20
-  br i1 %836, label %837, label %850
-
-; <label>:837                                     ; preds = %834
-  %838 = fsub fast float %20, %337
-  %839 = fdiv fast float %838, %835
-  %840 = fptoui float %839 to i32
-  %841 = uitofp i32 %840 to float
-  %842 = fmul fast float %841, %835
-  %843 = fsub fast float %838, %842
-  %844 = and i32 %840, 1
-  %845 = icmp eq i32 %844, 0
-  br i1 %845, label %846, label %848
-
-; <label>:846                                     ; preds = %837
-  %847 = fadd fast float %843, %20
-  br label %865
-
-; <label>:848                                     ; preds = %837
-  %849 = fsub fast float %22, %843
-  br label %865
-
-; <label>:850                                     ; preds = %834
-  %851 = fcmp fast ogt float %337, %22
-  br i1 %851, label %852, label %865
-
-; <label>:852                                     ; preds = %850
-  %853 = fsub fast float %337, %22
-  %854 = fdiv fast float %853, %835
-  %855 = fptoui float %854 to i32
-  %856 = uitofp i32 %855 to float
-  %857 = fmul fast float %856, %835
-  %858 = fsub fast float %853, %857
-  %859 = and i32 %855, 1
-  %860 = icmp eq i32 %859, 0
-  br i1 %860, label %861, label %863
-
-; <label>:861                                     ; preds = %852
-  %862 = fsub fast float %22, %858
-  br label %865
-
-; <label>:863                                     ; preds = %852
-  %864 = fadd fast float %858, %20
-  br label %865
-
-; <label>:865                                     ; preds = %863, %861, %850, %848, %846
-  %866 = phi float [ %847, %846 ], [ %849, %848 ], [ %862, %861 ], [ %864, %863 ], [ %337, %850 ]
-  %867 = fptoui float %866 to i32
-  %868 = fsub fast float %24, %20
-  %869 = fcmp fast olt float %338, %20
-  br i1 %869, label %870, label %883
-
-; <label>:870                                     ; preds = %865
-  %871 = fsub fast float %20, %338
-  %872 = fdiv fast float %871, %868
-  %873 = fptoui float %872 to i32
-  %874 = uitofp i32 %873 to float
-  %875 = fmul fast float %874, %868
-  %876 = fsub fast float %871, %875
-  %877 = and i32 %873, 1
-  %878 = icmp eq i32 %877, 0
-  br i1 %878, label %879, label %881
-
-; <label>:879                                     ; preds = %870
-  %880 = fadd fast float %876, %20
-  br label %898
-
-; <label>:881                                     ; preds = %870
-  %882 = fsub fast float %24, %876
-  br label %898
-
-; <label>:883                                     ; preds = %865
-  %884 = fcmp fast ogt float %338, %24
-  br i1 %884, label %885, label %898
-
-; <label>:885                                     ; preds = %883
-  %886 = fsub fast float %338, %24
-  %887 = fdiv fast float %886, %868
-  %888 = fptoui float %887 to i32
-  %889 = uitofp i32 %888 to float
-  %890 = fmul fast float %889, %868
-  %891 = fsub fast float %886, %890
-  %892 = and i32 %888, 1
-  %893 = icmp eq i32 %892, 0
-  br i1 %893, label %894, label %896
-
-; <label>:894                                     ; preds = %885
-  %895 = fsub fast float %24, %891
-  br label %898
-
-; <label>:896                                     ; preds = %885
-  %897 = fadd fast float %891, %20
-  br label %898
-
-; <label>:898                                     ; preds = %896, %894, %883, %881, %879
-  %899 = phi float [ %880, %879 ], [ %882, %881 ], [ %895, %894 ], [ %897, %896 ], [ %338, %883 ]
-  %900 = fptoui float %899 to i32
-  %901 = uitofp i32 %900 to float
-  %902 = uitofp i32 %867 to float
-  %903 = fptoui float %45 to i32
-  %904 = fptoui float %182 to i32
-  %905 = fptoui float %901 to i32
-  %906 = fptoui float %902 to i32
-  %907 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %908 = extractvalue %dx.types.CBufRet.i32 %907, 0
-  %909 = extractvalue %dx.types.CBufRet.i32 %907, 1
-  %910 = extractvalue %dx.types.CBufRet.i32 %907, 2
-  %911 = extractvalue %dx.types.CBufRet.i32 %907, 3
-  %912 = mul i32 %908, %903
-  %913 = call i32 @dx.op.tertiary.i32(i32 48, i32 %904, i32 %909, i32 %912)  ; IMad(a,b,c)
-  %914 = call i32 @dx.op.tertiary.i32(i32 48, i32 %905, i32 %910, i32 %913)  ; IMad(a,b,c)
-  %915 = call i32 @dx.op.tertiary.i32(i32 48, i32 %906, i32 %911, i32 %914)  ; IMad(a,b,c)
-  %916 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %915, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %917 = extractvalue %dx.types.ResRet.i32 %916, 0
-  %918 = uitofp i32 %917 to float
-  br label %919
-
-; <label>:919                                     ; preds = %898, %832, %803, %786, %776
-  %920 = phi float [ %800, %786 ], [ 0.000000e+00, %776 ], [ %831, %803 ], [ %918, %898 ], [ 0.000000e+00, %832 ]
-  %921 = call float @dx.op.unary.f32(i32 22, float %180)  ; Frc(value)
-  %922 = fsub fast float %630, %485
-  %923 = fmul fast float %921, %922
-  %924 = fadd fast float %923, %485
-  %925 = fsub fast float %920, %775
-  %926 = fmul fast float %921, %925
-  %927 = fadd fast float %926, %775
-  %928 = call float @dx.op.unary.f32(i32 22, float %181)  ; Frc(value)
-  %929 = fsub fast float %927, %924
-  %930 = fmul fast float %929, %928
-  %931 = fadd fast float %930, %924
-  %932 = fptoui float %931 to i32
-  call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i32 %932, i32 undef, i32 undef, i32 undef, i8 1, i32 4)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3323
-
-; <label>:933                                     ; preds = %332
-  %934 = icmp eq i32 %91, 2
-  br i1 %934, label %935, label %3323
-
-; <label>:935                                     ; preds = %933
-  %936 = call float @dx.op.unary.f32(i32 27, float %180)  ; Round_ni(value)
-  %937 = fadd fast float %936, -1.000000e+00
-  %938 = call float @dx.op.unary.f32(i32 27, float %181)  ; Round_ni(value)
-  %939 = fadd fast float %938, -1.000000e+00
-  %940 = extractvalue %dx.types.CBufRet.i32 %6, 3
-  %941 = icmp eq i32 %940, 0
-  br i1 %941, label %942, label %967
-
-; <label>:942                                     ; preds = %935
-  %943 = fcmp fast oge float %937, 0.000000e+00
-  %944 = fptoui float %937 to i32
-  %945 = icmp ult i32 %944, %13
-  %946 = and i1 %943, %945
-  %947 = fcmp fast oge float %939, 0.000000e+00
-  %948 = and i1 %947, %946
-  %949 = fptoui float %939 to i32
-  %950 = icmp ult i32 %949, %15
-  %951 = and i1 %950, %948
-  br i1 %951, label %952, label %1085
-
-; <label>:952                                     ; preds = %942
-  %953 = fptoui float %45 to i32
-  %954 = fptoui float %182 to i32
-  %955 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %956 = extractvalue %dx.types.CBufRet.i32 %955, 0
-  %957 = extractvalue %dx.types.CBufRet.i32 %955, 1
-  %958 = extractvalue %dx.types.CBufRet.i32 %955, 2
-  %959 = extractvalue %dx.types.CBufRet.i32 %955, 3
-  %960 = mul i32 %956, %953
-  %961 = call i32 @dx.op.tertiary.i32(i32 48, i32 %954, i32 %957, i32 %960)  ; IMad(a,b,c)
-  %962 = call i32 @dx.op.tertiary.i32(i32 48, i32 %949, i32 %958, i32 %961)  ; IMad(a,b,c)
-  %963 = call i32 @dx.op.tertiary.i32(i32 48, i32 %944, i32 %959, i32 %962)  ; IMad(a,b,c)
-  %964 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %963, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %965 = extractvalue %dx.types.ResRet.i32 %964, 0
-  %966 = uitofp i32 %965 to float
-  br label %1085
-
-; <label>:967                                     ; preds = %935
-  %968 = icmp eq i32 %940, 1
-  br i1 %968, label %969, label %998
-
-; <label>:969                                     ; preds = %967
-  %970 = add i32 %13, -1
-  %971 = uitofp i32 %970 to float
-  %972 = call float @dx.op.binary.f32(i32 35, float %937, float 0.000000e+00)  ; FMax(a,b)
-  %973 = call float @dx.op.binary.f32(i32 36, float %972, float %971)  ; FMin(a,b)
-  %974 = fptoui float %973 to i32
-  %975 = add i32 %15, -1
-  %976 = uitofp i32 %975 to float
-  %977 = call float @dx.op.binary.f32(i32 35, float %939, float 0.000000e+00)  ; FMax(a,b)
-  %978 = call float @dx.op.binary.f32(i32 36, float %977, float %976)  ; FMin(a,b)
-  %979 = fptoui float %978 to i32
-  %980 = uitofp i32 %979 to float
-  %981 = uitofp i32 %974 to float
-  %982 = fptoui float %45 to i32
-  %983 = fptoui float %182 to i32
-  %984 = fptoui float %980 to i32
-  %985 = fptoui float %981 to i32
-  %986 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %987 = extractvalue %dx.types.CBufRet.i32 %986, 0
-  %988 = extractvalue %dx.types.CBufRet.i32 %986, 1
-  %989 = extractvalue %dx.types.CBufRet.i32 %986, 2
-  %990 = extractvalue %dx.types.CBufRet.i32 %986, 3
-  %991 = mul i32 %987, %982
-  %992 = call i32 @dx.op.tertiary.i32(i32 48, i32 %983, i32 %988, i32 %991)  ; IMad(a,b,c)
-  %993 = call i32 @dx.op.tertiary.i32(i32 48, i32 %984, i32 %989, i32 %992)  ; IMad(a,b,c)
-  %994 = call i32 @dx.op.tertiary.i32(i32 48, i32 %985, i32 %990, i32 %993)  ; IMad(a,b,c)
-  %995 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %994, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %996 = extractvalue %dx.types.ResRet.i32 %995, 0
-  %997 = uitofp i32 %996 to float
-  br label %1085
-
-; <label>:998                                     ; preds = %967
-  %999 = icmp eq i32 %940, 2
-  br i1 %999, label %1000, label %1085
-
-; <label>:1000                                    ; preds = %998
-  %1001 = fsub fast float %22, %20
-  %1002 = fcmp fast olt float %937, %20
-  br i1 %1002, label %1003, label %1016
-
-; <label>:1003                                    ; preds = %1000
-  %1004 = fsub fast float %20, %937
-  %1005 = fdiv fast float %1004, %1001
-  %1006 = fptoui float %1005 to i32
-  %1007 = uitofp i32 %1006 to float
-  %1008 = fmul fast float %1007, %1001
-  %1009 = fsub fast float %1004, %1008
-  %1010 = and i32 %1006, 1
-  %1011 = icmp eq i32 %1010, 0
-  br i1 %1011, label %1012, label %1014
-
-; <label>:1012                                    ; preds = %1003
-  %1013 = fadd fast float %1009, %20
-  br label %1031
-
-; <label>:1014                                    ; preds = %1003
-  %1015 = fsub fast float %22, %1009
-  br label %1031
-
-; <label>:1016                                    ; preds = %1000
-  %1017 = fcmp fast ogt float %937, %22
-  br i1 %1017, label %1018, label %1031
-
-; <label>:1018                                    ; preds = %1016
-  %1019 = fsub fast float %937, %22
-  %1020 = fdiv fast float %1019, %1001
-  %1021 = fptoui float %1020 to i32
-  %1022 = uitofp i32 %1021 to float
-  %1023 = fmul fast float %1022, %1001
-  %1024 = fsub fast float %1019, %1023
-  %1025 = and i32 %1021, 1
-  %1026 = icmp eq i32 %1025, 0
-  br i1 %1026, label %1027, label %1029
-
-; <label>:1027                                    ; preds = %1018
-  %1028 = fsub fast float %22, %1024
-  br label %1031
-
-; <label>:1029                                    ; preds = %1018
-  %1030 = fadd fast float %1024, %20
-  br label %1031
-
-; <label>:1031                                    ; preds = %1029, %1027, %1016, %1014, %1012
-  %1032 = phi float [ %1013, %1012 ], [ %1015, %1014 ], [ %1028, %1027 ], [ %1030, %1029 ], [ %937, %1016 ]
-  %1033 = fptoui float %1032 to i32
-  %1034 = fsub fast float %24, %20
-  %1035 = fcmp fast olt float %939, %20
-  br i1 %1035, label %1036, label %1049
-
-; <label>:1036                                    ; preds = %1031
-  %1037 = fsub fast float %20, %939
-  %1038 = fdiv fast float %1037, %1034
-  %1039 = fptoui float %1038 to i32
-  %1040 = uitofp i32 %1039 to float
-  %1041 = fmul fast float %1040, %1034
-  %1042 = fsub fast float %1037, %1041
-  %1043 = and i32 %1039, 1
-  %1044 = icmp eq i32 %1043, 0
-  br i1 %1044, label %1045, label %1047
-
-; <label>:1045                                    ; preds = %1036
-  %1046 = fadd fast float %1042, %20
-  br label %1064
-
-; <label>:1047                                    ; preds = %1036
-  %1048 = fsub fast float %24, %1042
-  br label %1064
-
-; <label>:1049                                    ; preds = %1031
-  %1050 = fcmp fast ogt float %939, %24
-  br i1 %1050, label %1051, label %1064
-
-; <label>:1051                                    ; preds = %1049
-  %1052 = fsub fast float %939, %24
-  %1053 = fdiv fast float %1052, %1034
-  %1054 = fptoui float %1053 to i32
-  %1055 = uitofp i32 %1054 to float
-  %1056 = fmul fast float %1055, %1034
-  %1057 = fsub fast float %1052, %1056
-  %1058 = and i32 %1054, 1
-  %1059 = icmp eq i32 %1058, 0
-  br i1 %1059, label %1060, label %1062
-
-; <label>:1060                                    ; preds = %1051
-  %1061 = fsub fast float %24, %1057
-  br label %1064
-
-; <label>:1062                                    ; preds = %1051
-  %1063 = fadd fast float %1057, %20
-  br label %1064
-
-; <label>:1064                                    ; preds = %1062, %1060, %1049, %1047, %1045
-  %1065 = phi float [ %1046, %1045 ], [ %1048, %1047 ], [ %1061, %1060 ], [ %1063, %1062 ], [ %939, %1049 ]
-  %1066 = fptoui float %1065 to i32
-  %1067 = uitofp i32 %1066 to float
-  %1068 = uitofp i32 %1033 to float
-  %1069 = fptoui float %45 to i32
-  %1070 = fptoui float %182 to i32
-  %1071 = fptoui float %1067 to i32
-  %1072 = fptoui float %1068 to i32
-  %1073 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1074 = extractvalue %dx.types.CBufRet.i32 %1073, 0
-  %1075 = extractvalue %dx.types.CBufRet.i32 %1073, 1
-  %1076 = extractvalue %dx.types.CBufRet.i32 %1073, 2
-  %1077 = extractvalue %dx.types.CBufRet.i32 %1073, 3
-  %1078 = mul i32 %1074, %1069
-  %1079 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1070, i32 %1075, i32 %1078)  ; IMad(a,b,c)
-  %1080 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1071, i32 %1076, i32 %1079)  ; IMad(a,b,c)
-  %1081 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1072, i32 %1077, i32 %1080)  ; IMad(a,b,c)
-  %1082 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1081, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1083 = extractvalue %dx.types.ResRet.i32 %1082, 0
-  %1084 = uitofp i32 %1083 to float
-  br label %1085
-
-; <label>:1085                                    ; preds = %1064, %998, %969, %952, %942
-  %1086 = phi float [ %966, %952 ], [ 0.000000e+00, %942 ], [ %997, %969 ], [ %1084, %1064 ], [ 0.000000e+00, %998 ]
-  br i1 %941, label %1087, label %1112
-
-; <label>:1087                                    ; preds = %1085
-  %1088 = fcmp fast oge float %936, 0.000000e+00
-  %1089 = fptoui float %936 to i32
-  %1090 = icmp ult i32 %1089, %13
-  %1091 = and i1 %1088, %1090
-  %1092 = fcmp fast oge float %939, 0.000000e+00
-  %1093 = and i1 %1092, %1091
-  %1094 = fptoui float %939 to i32
-  %1095 = icmp ult i32 %1094, %15
-  %1096 = and i1 %1095, %1093
-  br i1 %1096, label %1097, label %1230
-
-; <label>:1097                                    ; preds = %1087
-  %1098 = fptoui float %45 to i32
-  %1099 = fptoui float %182 to i32
-  %1100 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1101 = extractvalue %dx.types.CBufRet.i32 %1100, 0
-  %1102 = extractvalue %dx.types.CBufRet.i32 %1100, 1
-  %1103 = extractvalue %dx.types.CBufRet.i32 %1100, 2
-  %1104 = extractvalue %dx.types.CBufRet.i32 %1100, 3
-  %1105 = mul i32 %1101, %1098
-  %1106 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1099, i32 %1102, i32 %1105)  ; IMad(a,b,c)
-  %1107 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1094, i32 %1103, i32 %1106)  ; IMad(a,b,c)
-  %1108 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1089, i32 %1104, i32 %1107)  ; IMad(a,b,c)
-  %1109 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1108, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1110 = extractvalue %dx.types.ResRet.i32 %1109, 0
-  %1111 = uitofp i32 %1110 to float
-  br label %1230
-
-; <label>:1112                                    ; preds = %1085
-  %1113 = icmp eq i32 %940, 1
-  br i1 %1113, label %1114, label %1143
-
-; <label>:1114                                    ; preds = %1112
-  %1115 = add i32 %13, -1
-  %1116 = uitofp i32 %1115 to float
-  %1117 = call float @dx.op.binary.f32(i32 35, float %936, float 0.000000e+00)  ; FMax(a,b)
-  %1118 = call float @dx.op.binary.f32(i32 36, float %1117, float %1116)  ; FMin(a,b)
-  %1119 = fptoui float %1118 to i32
-  %1120 = add i32 %15, -1
-  %1121 = uitofp i32 %1120 to float
-  %1122 = call float @dx.op.binary.f32(i32 35, float %939, float 0.000000e+00)  ; FMax(a,b)
-  %1123 = call float @dx.op.binary.f32(i32 36, float %1122, float %1121)  ; FMin(a,b)
-  %1124 = fptoui float %1123 to i32
-  %1125 = uitofp i32 %1124 to float
-  %1126 = uitofp i32 %1119 to float
-  %1127 = fptoui float %45 to i32
-  %1128 = fptoui float %182 to i32
-  %1129 = fptoui float %1125 to i32
-  %1130 = fptoui float %1126 to i32
-  %1131 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1132 = extractvalue %dx.types.CBufRet.i32 %1131, 0
-  %1133 = extractvalue %dx.types.CBufRet.i32 %1131, 1
-  %1134 = extractvalue %dx.types.CBufRet.i32 %1131, 2
-  %1135 = extractvalue %dx.types.CBufRet.i32 %1131, 3
-  %1136 = mul i32 %1132, %1127
-  %1137 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1128, i32 %1133, i32 %1136)  ; IMad(a,b,c)
-  %1138 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1129, i32 %1134, i32 %1137)  ; IMad(a,b,c)
-  %1139 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1130, i32 %1135, i32 %1138)  ; IMad(a,b,c)
-  %1140 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1139, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1141 = extractvalue %dx.types.ResRet.i32 %1140, 0
-  %1142 = uitofp i32 %1141 to float
-  br label %1230
-
-; <label>:1143                                    ; preds = %1112
-  %1144 = icmp eq i32 %940, 2
-  br i1 %1144, label %1145, label %1230
-
-; <label>:1145                                    ; preds = %1143
-  %1146 = fsub fast float %22, %20
-  %1147 = fcmp fast olt float %936, %20
-  br i1 %1147, label %1148, label %1161
-
-; <label>:1148                                    ; preds = %1145
-  %1149 = fsub fast float %20, %936
-  %1150 = fdiv fast float %1149, %1146
-  %1151 = fptoui float %1150 to i32
-  %1152 = uitofp i32 %1151 to float
-  %1153 = fmul fast float %1152, %1146
-  %1154 = fsub fast float %1149, %1153
-  %1155 = and i32 %1151, 1
-  %1156 = icmp eq i32 %1155, 0
-  br i1 %1156, label %1157, label %1159
-
-; <label>:1157                                    ; preds = %1148
-  %1158 = fadd fast float %1154, %20
-  br label %1176
-
-; <label>:1159                                    ; preds = %1148
-  %1160 = fsub fast float %22, %1154
-  br label %1176
-
-; <label>:1161                                    ; preds = %1145
-  %1162 = fcmp fast ogt float %936, %22
-  br i1 %1162, label %1163, label %1176
-
-; <label>:1163                                    ; preds = %1161
-  %1164 = fsub fast float %936, %22
-  %1165 = fdiv fast float %1164, %1146
-  %1166 = fptoui float %1165 to i32
-  %1167 = uitofp i32 %1166 to float
-  %1168 = fmul fast float %1167, %1146
-  %1169 = fsub fast float %1164, %1168
-  %1170 = and i32 %1166, 1
-  %1171 = icmp eq i32 %1170, 0
-  br i1 %1171, label %1172, label %1174
-
-; <label>:1172                                    ; preds = %1163
-  %1173 = fsub fast float %22, %1169
-  br label %1176
-
-; <label>:1174                                    ; preds = %1163
-  %1175 = fadd fast float %1169, %20
-  br label %1176
-
-; <label>:1176                                    ; preds = %1174, %1172, %1161, %1159, %1157
-  %1177 = phi float [ %1158, %1157 ], [ %1160, %1159 ], [ %1173, %1172 ], [ %1175, %1174 ], [ %936, %1161 ]
-  %1178 = fptoui float %1177 to i32
-  %1179 = fsub fast float %24, %20
-  %1180 = fcmp fast olt float %939, %20
-  br i1 %1180, label %1181, label %1194
-
-; <label>:1181                                    ; preds = %1176
-  %1182 = fsub fast float %20, %939
-  %1183 = fdiv fast float %1182, %1179
-  %1184 = fptoui float %1183 to i32
-  %1185 = uitofp i32 %1184 to float
-  %1186 = fmul fast float %1185, %1179
-  %1187 = fsub fast float %1182, %1186
-  %1188 = and i32 %1184, 1
-  %1189 = icmp eq i32 %1188, 0
-  br i1 %1189, label %1190, label %1192
-
-; <label>:1190                                    ; preds = %1181
-  %1191 = fadd fast float %1187, %20
-  br label %1209
-
-; <label>:1192                                    ; preds = %1181
-  %1193 = fsub fast float %24, %1187
-  br label %1209
-
-; <label>:1194                                    ; preds = %1176
-  %1195 = fcmp fast ogt float %939, %24
-  br i1 %1195, label %1196, label %1209
-
-; <label>:1196                                    ; preds = %1194
-  %1197 = fsub fast float %939, %24
-  %1198 = fdiv fast float %1197, %1179
-  %1199 = fptoui float %1198 to i32
-  %1200 = uitofp i32 %1199 to float
-  %1201 = fmul fast float %1200, %1179
-  %1202 = fsub fast float %1197, %1201
-  %1203 = and i32 %1199, 1
-  %1204 = icmp eq i32 %1203, 0
-  br i1 %1204, label %1205, label %1207
-
-; <label>:1205                                    ; preds = %1196
-  %1206 = fsub fast float %24, %1202
-  br label %1209
-
-; <label>:1207                                    ; preds = %1196
-  %1208 = fadd fast float %1202, %20
-  br label %1209
-
-; <label>:1209                                    ; preds = %1207, %1205, %1194, %1192, %1190
-  %1210 = phi float [ %1191, %1190 ], [ %1193, %1192 ], [ %1206, %1205 ], [ %1208, %1207 ], [ %939, %1194 ]
-  %1211 = fptoui float %1210 to i32
-  %1212 = uitofp i32 %1211 to float
-  %1213 = uitofp i32 %1178 to float
-  %1214 = fptoui float %45 to i32
-  %1215 = fptoui float %182 to i32
-  %1216 = fptoui float %1212 to i32
-  %1217 = fptoui float %1213 to i32
-  %1218 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1219 = extractvalue %dx.types.CBufRet.i32 %1218, 0
-  %1220 = extractvalue %dx.types.CBufRet.i32 %1218, 1
-  %1221 = extractvalue %dx.types.CBufRet.i32 %1218, 2
-  %1222 = extractvalue %dx.types.CBufRet.i32 %1218, 3
-  %1223 = mul i32 %1219, %1214
-  %1224 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1215, i32 %1220, i32 %1223)  ; IMad(a,b,c)
-  %1225 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1216, i32 %1221, i32 %1224)  ; IMad(a,b,c)
-  %1226 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1217, i32 %1222, i32 %1225)  ; IMad(a,b,c)
-  %1227 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1226, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1228 = extractvalue %dx.types.ResRet.i32 %1227, 0
-  %1229 = uitofp i32 %1228 to float
-  br label %1230
-
-; <label>:1230                                    ; preds = %1209, %1143, %1114, %1097, %1087
-  %1231 = phi float [ %1111, %1097 ], [ 0.000000e+00, %1087 ], [ %1142, %1114 ], [ %1229, %1209 ], [ 0.000000e+00, %1143 ]
-  %1232 = fadd fast float %936, 1.000000e+00
-  br i1 %941, label %1233, label %1258
-
-; <label>:1233                                    ; preds = %1230
-  %1234 = fcmp fast oge float %1232, 0.000000e+00
-  %1235 = fptoui float %1232 to i32
-  %1236 = icmp ult i32 %1235, %13
-  %1237 = and i1 %1234, %1236
-  %1238 = fcmp fast oge float %939, 0.000000e+00
-  %1239 = and i1 %1238, %1237
-  %1240 = fptoui float %939 to i32
-  %1241 = icmp ult i32 %1240, %15
-  %1242 = and i1 %1241, %1239
-  br i1 %1242, label %1243, label %1376
-
-; <label>:1243                                    ; preds = %1233
-  %1244 = fptoui float %45 to i32
-  %1245 = fptoui float %182 to i32
-  %1246 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1247 = extractvalue %dx.types.CBufRet.i32 %1246, 0
-  %1248 = extractvalue %dx.types.CBufRet.i32 %1246, 1
-  %1249 = extractvalue %dx.types.CBufRet.i32 %1246, 2
-  %1250 = extractvalue %dx.types.CBufRet.i32 %1246, 3
-  %1251 = mul i32 %1247, %1244
-  %1252 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1245, i32 %1248, i32 %1251)  ; IMad(a,b,c)
-  %1253 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1240, i32 %1249, i32 %1252)  ; IMad(a,b,c)
-  %1254 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1235, i32 %1250, i32 %1253)  ; IMad(a,b,c)
-  %1255 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1254, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1256 = extractvalue %dx.types.ResRet.i32 %1255, 0
-  %1257 = uitofp i32 %1256 to float
-  br label %1376
-
-; <label>:1258                                    ; preds = %1230
-  %1259 = icmp eq i32 %940, 1
-  br i1 %1259, label %1260, label %1289
-
-; <label>:1260                                    ; preds = %1258
-  %1261 = add i32 %13, -1
-  %1262 = uitofp i32 %1261 to float
-  %1263 = call float @dx.op.binary.f32(i32 35, float %1232, float 0.000000e+00)  ; FMax(a,b)
-  %1264 = call float @dx.op.binary.f32(i32 36, float %1263, float %1262)  ; FMin(a,b)
-  %1265 = fptoui float %1264 to i32
-  %1266 = add i32 %15, -1
-  %1267 = uitofp i32 %1266 to float
-  %1268 = call float @dx.op.binary.f32(i32 35, float %939, float 0.000000e+00)  ; FMax(a,b)
-  %1269 = call float @dx.op.binary.f32(i32 36, float %1268, float %1267)  ; FMin(a,b)
-  %1270 = fptoui float %1269 to i32
-  %1271 = uitofp i32 %1270 to float
-  %1272 = uitofp i32 %1265 to float
-  %1273 = fptoui float %45 to i32
-  %1274 = fptoui float %182 to i32
-  %1275 = fptoui float %1271 to i32
-  %1276 = fptoui float %1272 to i32
-  %1277 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1278 = extractvalue %dx.types.CBufRet.i32 %1277, 0
-  %1279 = extractvalue %dx.types.CBufRet.i32 %1277, 1
-  %1280 = extractvalue %dx.types.CBufRet.i32 %1277, 2
-  %1281 = extractvalue %dx.types.CBufRet.i32 %1277, 3
-  %1282 = mul i32 %1278, %1273
-  %1283 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1274, i32 %1279, i32 %1282)  ; IMad(a,b,c)
-  %1284 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1275, i32 %1280, i32 %1283)  ; IMad(a,b,c)
-  %1285 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1276, i32 %1281, i32 %1284)  ; IMad(a,b,c)
-  %1286 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1285, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1287 = extractvalue %dx.types.ResRet.i32 %1286, 0
-  %1288 = uitofp i32 %1287 to float
-  br label %1376
-
-; <label>:1289                                    ; preds = %1258
-  %1290 = icmp eq i32 %940, 2
-  br i1 %1290, label %1291, label %1376
-
-; <label>:1291                                    ; preds = %1289
-  %1292 = fsub fast float %22, %20
-  %1293 = fcmp fast olt float %1232, %20
-  br i1 %1293, label %1294, label %1307
-
-; <label>:1294                                    ; preds = %1291
-  %1295 = fsub fast float %20, %1232
-  %1296 = fdiv fast float %1295, %1292
-  %1297 = fptoui float %1296 to i32
-  %1298 = uitofp i32 %1297 to float
-  %1299 = fmul fast float %1298, %1292
-  %1300 = fsub fast float %1295, %1299
-  %1301 = and i32 %1297, 1
-  %1302 = icmp eq i32 %1301, 0
-  br i1 %1302, label %1303, label %1305
-
-; <label>:1303                                    ; preds = %1294
-  %1304 = fadd fast float %1300, %20
-  br label %1322
-
-; <label>:1305                                    ; preds = %1294
-  %1306 = fsub fast float %22, %1300
-  br label %1322
-
-; <label>:1307                                    ; preds = %1291
-  %1308 = fcmp fast ogt float %1232, %22
-  br i1 %1308, label %1309, label %1322
-
-; <label>:1309                                    ; preds = %1307
-  %1310 = fsub fast float %1232, %22
-  %1311 = fdiv fast float %1310, %1292
-  %1312 = fptoui float %1311 to i32
-  %1313 = uitofp i32 %1312 to float
-  %1314 = fmul fast float %1313, %1292
-  %1315 = fsub fast float %1310, %1314
-  %1316 = and i32 %1312, 1
-  %1317 = icmp eq i32 %1316, 0
-  br i1 %1317, label %1318, label %1320
-
-; <label>:1318                                    ; preds = %1309
-  %1319 = fsub fast float %22, %1315
-  br label %1322
-
-; <label>:1320                                    ; preds = %1309
-  %1321 = fadd fast float %1315, %20
-  br label %1322
-
-; <label>:1322                                    ; preds = %1320, %1318, %1307, %1305, %1303
-  %1323 = phi float [ %1304, %1303 ], [ %1306, %1305 ], [ %1319, %1318 ], [ %1321, %1320 ], [ %1232, %1307 ]
-  %1324 = fptoui float %1323 to i32
-  %1325 = fsub fast float %24, %20
-  %1326 = fcmp fast olt float %939, %20
-  br i1 %1326, label %1327, label %1340
-
-; <label>:1327                                    ; preds = %1322
-  %1328 = fsub fast float %20, %939
-  %1329 = fdiv fast float %1328, %1325
-  %1330 = fptoui float %1329 to i32
-  %1331 = uitofp i32 %1330 to float
-  %1332 = fmul fast float %1331, %1325
-  %1333 = fsub fast float %1328, %1332
-  %1334 = and i32 %1330, 1
-  %1335 = icmp eq i32 %1334, 0
-  br i1 %1335, label %1336, label %1338
-
-; <label>:1336                                    ; preds = %1327
-  %1337 = fadd fast float %1333, %20
-  br label %1355
-
-; <label>:1338                                    ; preds = %1327
-  %1339 = fsub fast float %24, %1333
-  br label %1355
-
-; <label>:1340                                    ; preds = %1322
-  %1341 = fcmp fast ogt float %939, %24
-  br i1 %1341, label %1342, label %1355
-
-; <label>:1342                                    ; preds = %1340
-  %1343 = fsub fast float %939, %24
-  %1344 = fdiv fast float %1343, %1325
-  %1345 = fptoui float %1344 to i32
-  %1346 = uitofp i32 %1345 to float
-  %1347 = fmul fast float %1346, %1325
-  %1348 = fsub fast float %1343, %1347
-  %1349 = and i32 %1345, 1
-  %1350 = icmp eq i32 %1349, 0
-  br i1 %1350, label %1351, label %1353
-
-; <label>:1351                                    ; preds = %1342
-  %1352 = fsub fast float %24, %1348
-  br label %1355
-
-; <label>:1353                                    ; preds = %1342
-  %1354 = fadd fast float %1348, %20
-  br label %1355
-
-; <label>:1355                                    ; preds = %1353, %1351, %1340, %1338, %1336
-  %1356 = phi float [ %1337, %1336 ], [ %1339, %1338 ], [ %1352, %1351 ], [ %1354, %1353 ], [ %939, %1340 ]
-  %1357 = fptoui float %1356 to i32
-  %1358 = uitofp i32 %1357 to float
-  %1359 = uitofp i32 %1324 to float
-  %1360 = fptoui float %45 to i32
-  %1361 = fptoui float %182 to i32
-  %1362 = fptoui float %1358 to i32
-  %1363 = fptoui float %1359 to i32
-  %1364 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1365 = extractvalue %dx.types.CBufRet.i32 %1364, 0
-  %1366 = extractvalue %dx.types.CBufRet.i32 %1364, 1
-  %1367 = extractvalue %dx.types.CBufRet.i32 %1364, 2
-  %1368 = extractvalue %dx.types.CBufRet.i32 %1364, 3
-  %1369 = mul i32 %1365, %1360
-  %1370 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1361, i32 %1366, i32 %1369)  ; IMad(a,b,c)
-  %1371 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1362, i32 %1367, i32 %1370)  ; IMad(a,b,c)
-  %1372 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1363, i32 %1368, i32 %1371)  ; IMad(a,b,c)
-  %1373 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1372, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1374 = extractvalue %dx.types.ResRet.i32 %1373, 0
-  %1375 = uitofp i32 %1374 to float
-  br label %1376
-
-; <label>:1376                                    ; preds = %1355, %1289, %1260, %1243, %1233
-  %1377 = phi float [ %1257, %1243 ], [ 0.000000e+00, %1233 ], [ %1288, %1260 ], [ %1375, %1355 ], [ 0.000000e+00, %1289 ]
-  %1378 = fadd fast float %936, 2.000000e+00
-  br i1 %941, label %1379, label %1404
-
-; <label>:1379                                    ; preds = %1376
-  %1380 = fcmp fast oge float %1378, 0.000000e+00
-  %1381 = fptoui float %1378 to i32
-  %1382 = icmp ult i32 %1381, %13
-  %1383 = and i1 %1380, %1382
-  %1384 = fcmp fast oge float %939, 0.000000e+00
-  %1385 = and i1 %1384, %1383
-  %1386 = fptoui float %939 to i32
-  %1387 = icmp ult i32 %1386, %15
-  %1388 = and i1 %1387, %1385
-  br i1 %1388, label %1389, label %1522
-
-; <label>:1389                                    ; preds = %1379
-  %1390 = fptoui float %45 to i32
-  %1391 = fptoui float %182 to i32
-  %1392 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1393 = extractvalue %dx.types.CBufRet.i32 %1392, 0
-  %1394 = extractvalue %dx.types.CBufRet.i32 %1392, 1
-  %1395 = extractvalue %dx.types.CBufRet.i32 %1392, 2
-  %1396 = extractvalue %dx.types.CBufRet.i32 %1392, 3
-  %1397 = mul i32 %1393, %1390
-  %1398 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1391, i32 %1394, i32 %1397)  ; IMad(a,b,c)
-  %1399 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1386, i32 %1395, i32 %1398)  ; IMad(a,b,c)
-  %1400 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1381, i32 %1396, i32 %1399)  ; IMad(a,b,c)
-  %1401 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1400, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1402 = extractvalue %dx.types.ResRet.i32 %1401, 0
-  %1403 = uitofp i32 %1402 to float
-  br label %1522
-
-; <label>:1404                                    ; preds = %1376
-  %1405 = icmp eq i32 %940, 1
-  br i1 %1405, label %1406, label %1435
-
-; <label>:1406                                    ; preds = %1404
-  %1407 = add i32 %13, -1
-  %1408 = uitofp i32 %1407 to float
-  %1409 = call float @dx.op.binary.f32(i32 35, float %1378, float 0.000000e+00)  ; FMax(a,b)
-  %1410 = call float @dx.op.binary.f32(i32 36, float %1409, float %1408)  ; FMin(a,b)
-  %1411 = fptoui float %1410 to i32
-  %1412 = add i32 %15, -1
-  %1413 = uitofp i32 %1412 to float
-  %1414 = call float @dx.op.binary.f32(i32 35, float %939, float 0.000000e+00)  ; FMax(a,b)
-  %1415 = call float @dx.op.binary.f32(i32 36, float %1414, float %1413)  ; FMin(a,b)
-  %1416 = fptoui float %1415 to i32
-  %1417 = uitofp i32 %1416 to float
-  %1418 = uitofp i32 %1411 to float
-  %1419 = fptoui float %45 to i32
-  %1420 = fptoui float %182 to i32
-  %1421 = fptoui float %1417 to i32
-  %1422 = fptoui float %1418 to i32
-  %1423 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1424 = extractvalue %dx.types.CBufRet.i32 %1423, 0
-  %1425 = extractvalue %dx.types.CBufRet.i32 %1423, 1
-  %1426 = extractvalue %dx.types.CBufRet.i32 %1423, 2
-  %1427 = extractvalue %dx.types.CBufRet.i32 %1423, 3
-  %1428 = mul i32 %1424, %1419
-  %1429 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1420, i32 %1425, i32 %1428)  ; IMad(a,b,c)
-  %1430 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1421, i32 %1426, i32 %1429)  ; IMad(a,b,c)
-  %1431 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1422, i32 %1427, i32 %1430)  ; IMad(a,b,c)
-  %1432 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1431, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1433 = extractvalue %dx.types.ResRet.i32 %1432, 0
-  %1434 = uitofp i32 %1433 to float
-  br label %1522
-
-; <label>:1435                                    ; preds = %1404
-  %1436 = icmp eq i32 %940, 2
-  br i1 %1436, label %1437, label %1522
-
-; <label>:1437                                    ; preds = %1435
-  %1438 = fsub fast float %22, %20
-  %1439 = fcmp fast olt float %1378, %20
-  br i1 %1439, label %1440, label %1453
-
-; <label>:1440                                    ; preds = %1437
-  %1441 = fsub fast float %20, %1378
-  %1442 = fdiv fast float %1441, %1438
-  %1443 = fptoui float %1442 to i32
-  %1444 = uitofp i32 %1443 to float
-  %1445 = fmul fast float %1444, %1438
-  %1446 = fsub fast float %1441, %1445
-  %1447 = and i32 %1443, 1
-  %1448 = icmp eq i32 %1447, 0
-  br i1 %1448, label %1449, label %1451
-
-; <label>:1449                                    ; preds = %1440
-  %1450 = fadd fast float %1446, %20
-  br label %1468
-
-; <label>:1451                                    ; preds = %1440
-  %1452 = fsub fast float %22, %1446
-  br label %1468
-
-; <label>:1453                                    ; preds = %1437
-  %1454 = fcmp fast ogt float %1378, %22
-  br i1 %1454, label %1455, label %1468
-
-; <label>:1455                                    ; preds = %1453
-  %1456 = fsub fast float %1378, %22
-  %1457 = fdiv fast float %1456, %1438
-  %1458 = fptoui float %1457 to i32
-  %1459 = uitofp i32 %1458 to float
-  %1460 = fmul fast float %1459, %1438
-  %1461 = fsub fast float %1456, %1460
-  %1462 = and i32 %1458, 1
-  %1463 = icmp eq i32 %1462, 0
-  br i1 %1463, label %1464, label %1466
-
-; <label>:1464                                    ; preds = %1455
-  %1465 = fsub fast float %22, %1461
-  br label %1468
-
-; <label>:1466                                    ; preds = %1455
-  %1467 = fadd fast float %1461, %20
-  br label %1468
-
-; <label>:1468                                    ; preds = %1466, %1464, %1453, %1451, %1449
-  %1469 = phi float [ %1450, %1449 ], [ %1452, %1451 ], [ %1465, %1464 ], [ %1467, %1466 ], [ %1378, %1453 ]
-  %1470 = fptoui float %1469 to i32
-  %1471 = fsub fast float %24, %20
-  %1472 = fcmp fast olt float %939, %20
-  br i1 %1472, label %1473, label %1486
-
-; <label>:1473                                    ; preds = %1468
-  %1474 = fsub fast float %20, %939
-  %1475 = fdiv fast float %1474, %1471
-  %1476 = fptoui float %1475 to i32
-  %1477 = uitofp i32 %1476 to float
-  %1478 = fmul fast float %1477, %1471
-  %1479 = fsub fast float %1474, %1478
-  %1480 = and i32 %1476, 1
-  %1481 = icmp eq i32 %1480, 0
-  br i1 %1481, label %1482, label %1484
-
-; <label>:1482                                    ; preds = %1473
-  %1483 = fadd fast float %1479, %20
-  br label %1501
-
-; <label>:1484                                    ; preds = %1473
-  %1485 = fsub fast float %24, %1479
-  br label %1501
-
-; <label>:1486                                    ; preds = %1468
-  %1487 = fcmp fast ogt float %939, %24
-  br i1 %1487, label %1488, label %1501
-
-; <label>:1488                                    ; preds = %1486
-  %1489 = fsub fast float %939, %24
-  %1490 = fdiv fast float %1489, %1471
-  %1491 = fptoui float %1490 to i32
-  %1492 = uitofp i32 %1491 to float
-  %1493 = fmul fast float %1492, %1471
-  %1494 = fsub fast float %1489, %1493
-  %1495 = and i32 %1491, 1
-  %1496 = icmp eq i32 %1495, 0
-  br i1 %1496, label %1497, label %1499
-
-; <label>:1497                                    ; preds = %1488
-  %1498 = fsub fast float %24, %1494
-  br label %1501
-
-; <label>:1499                                    ; preds = %1488
-  %1500 = fadd fast float %1494, %20
-  br label %1501
-
-; <label>:1501                                    ; preds = %1499, %1497, %1486, %1484, %1482
-  %1502 = phi float [ %1483, %1482 ], [ %1485, %1484 ], [ %1498, %1497 ], [ %1500, %1499 ], [ %939, %1486 ]
-  %1503 = fptoui float %1502 to i32
-  %1504 = uitofp i32 %1503 to float
-  %1505 = uitofp i32 %1470 to float
-  %1506 = fptoui float %45 to i32
-  %1507 = fptoui float %182 to i32
-  %1508 = fptoui float %1504 to i32
-  %1509 = fptoui float %1505 to i32
-  %1510 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1511 = extractvalue %dx.types.CBufRet.i32 %1510, 0
-  %1512 = extractvalue %dx.types.CBufRet.i32 %1510, 1
-  %1513 = extractvalue %dx.types.CBufRet.i32 %1510, 2
-  %1514 = extractvalue %dx.types.CBufRet.i32 %1510, 3
-  %1515 = mul i32 %1511, %1506
-  %1516 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1507, i32 %1512, i32 %1515)  ; IMad(a,b,c)
-  %1517 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1508, i32 %1513, i32 %1516)  ; IMad(a,b,c)
-  %1518 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1509, i32 %1514, i32 %1517)  ; IMad(a,b,c)
-  %1519 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1518, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1520 = extractvalue %dx.types.ResRet.i32 %1519, 0
-  %1521 = uitofp i32 %1520 to float
-  br label %1522
-
-; <label>:1522                                    ; preds = %1501, %1435, %1406, %1389, %1379
-  %1523 = phi float [ %1403, %1389 ], [ 0.000000e+00, %1379 ], [ %1434, %1406 ], [ %1521, %1501 ], [ 0.000000e+00, %1435 ]
-  br i1 %941, label %1524, label %1549
-
-; <label>:1524                                    ; preds = %1522
-  %1525 = fcmp fast oge float %937, 0.000000e+00
-  %1526 = fptoui float %937 to i32
-  %1527 = icmp ult i32 %1526, %13
-  %1528 = and i1 %1525, %1527
-  %1529 = fcmp fast oge float %938, 0.000000e+00
-  %1530 = and i1 %1529, %1528
-  %1531 = fptoui float %938 to i32
-  %1532 = icmp ult i32 %1531, %15
-  %1533 = and i1 %1532, %1530
-  br i1 %1533, label %1534, label %1667
-
-; <label>:1534                                    ; preds = %1524
-  %1535 = fptoui float %45 to i32
-  %1536 = fptoui float %182 to i32
-  %1537 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1538 = extractvalue %dx.types.CBufRet.i32 %1537, 0
-  %1539 = extractvalue %dx.types.CBufRet.i32 %1537, 1
-  %1540 = extractvalue %dx.types.CBufRet.i32 %1537, 2
-  %1541 = extractvalue %dx.types.CBufRet.i32 %1537, 3
-  %1542 = mul i32 %1538, %1535
-  %1543 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1536, i32 %1539, i32 %1542)  ; IMad(a,b,c)
-  %1544 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1531, i32 %1540, i32 %1543)  ; IMad(a,b,c)
-  %1545 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1526, i32 %1541, i32 %1544)  ; IMad(a,b,c)
-  %1546 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1545, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1547 = extractvalue %dx.types.ResRet.i32 %1546, 0
-  %1548 = uitofp i32 %1547 to float
-  br label %1667
-
-; <label>:1549                                    ; preds = %1522
-  %1550 = icmp eq i32 %940, 1
-  br i1 %1550, label %1551, label %1580
-
-; <label>:1551                                    ; preds = %1549
-  %1552 = add i32 %13, -1
-  %1553 = uitofp i32 %1552 to float
-  %1554 = call float @dx.op.binary.f32(i32 35, float %937, float 0.000000e+00)  ; FMax(a,b)
-  %1555 = call float @dx.op.binary.f32(i32 36, float %1554, float %1553)  ; FMin(a,b)
-  %1556 = fptoui float %1555 to i32
-  %1557 = add i32 %15, -1
-  %1558 = uitofp i32 %1557 to float
-  %1559 = call float @dx.op.binary.f32(i32 35, float %938, float 0.000000e+00)  ; FMax(a,b)
-  %1560 = call float @dx.op.binary.f32(i32 36, float %1559, float %1558)  ; FMin(a,b)
-  %1561 = fptoui float %1560 to i32
-  %1562 = uitofp i32 %1561 to float
-  %1563 = uitofp i32 %1556 to float
-  %1564 = fptoui float %45 to i32
-  %1565 = fptoui float %182 to i32
-  %1566 = fptoui float %1562 to i32
-  %1567 = fptoui float %1563 to i32
-  %1568 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1569 = extractvalue %dx.types.CBufRet.i32 %1568, 0
-  %1570 = extractvalue %dx.types.CBufRet.i32 %1568, 1
-  %1571 = extractvalue %dx.types.CBufRet.i32 %1568, 2
-  %1572 = extractvalue %dx.types.CBufRet.i32 %1568, 3
-  %1573 = mul i32 %1569, %1564
-  %1574 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1565, i32 %1570, i32 %1573)  ; IMad(a,b,c)
-  %1575 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1566, i32 %1571, i32 %1574)  ; IMad(a,b,c)
-  %1576 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1567, i32 %1572, i32 %1575)  ; IMad(a,b,c)
-  %1577 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1576, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1578 = extractvalue %dx.types.ResRet.i32 %1577, 0
-  %1579 = uitofp i32 %1578 to float
-  br label %1667
-
-; <label>:1580                                    ; preds = %1549
-  %1581 = icmp eq i32 %940, 2
-  br i1 %1581, label %1582, label %1667
-
-; <label>:1582                                    ; preds = %1580
-  %1583 = fsub fast float %22, %20
-  %1584 = fcmp fast olt float %937, %20
-  br i1 %1584, label %1585, label %1598
-
-; <label>:1585                                    ; preds = %1582
-  %1586 = fsub fast float %20, %937
-  %1587 = fdiv fast float %1586, %1583
-  %1588 = fptoui float %1587 to i32
-  %1589 = uitofp i32 %1588 to float
-  %1590 = fmul fast float %1589, %1583
-  %1591 = fsub fast float %1586, %1590
-  %1592 = and i32 %1588, 1
-  %1593 = icmp eq i32 %1592, 0
-  br i1 %1593, label %1594, label %1596
-
-; <label>:1594                                    ; preds = %1585
-  %1595 = fadd fast float %1591, %20
-  br label %1613
-
-; <label>:1596                                    ; preds = %1585
-  %1597 = fsub fast float %22, %1591
-  br label %1613
-
-; <label>:1598                                    ; preds = %1582
-  %1599 = fcmp fast ogt float %937, %22
-  br i1 %1599, label %1600, label %1613
-
-; <label>:1600                                    ; preds = %1598
-  %1601 = fsub fast float %937, %22
-  %1602 = fdiv fast float %1601, %1583
-  %1603 = fptoui float %1602 to i32
-  %1604 = uitofp i32 %1603 to float
-  %1605 = fmul fast float %1604, %1583
-  %1606 = fsub fast float %1601, %1605
-  %1607 = and i32 %1603, 1
-  %1608 = icmp eq i32 %1607, 0
-  br i1 %1608, label %1609, label %1611
-
-; <label>:1609                                    ; preds = %1600
-  %1610 = fsub fast float %22, %1606
-  br label %1613
-
-; <label>:1611                                    ; preds = %1600
-  %1612 = fadd fast float %1606, %20
-  br label %1613
-
-; <label>:1613                                    ; preds = %1611, %1609, %1598, %1596, %1594
-  %1614 = phi float [ %1595, %1594 ], [ %1597, %1596 ], [ %1610, %1609 ], [ %1612, %1611 ], [ %937, %1598 ]
-  %1615 = fptoui float %1614 to i32
-  %1616 = fsub fast float %24, %20
-  %1617 = fcmp fast olt float %938, %20
-  br i1 %1617, label %1618, label %1631
-
-; <label>:1618                                    ; preds = %1613
-  %1619 = fsub fast float %20, %938
-  %1620 = fdiv fast float %1619, %1616
-  %1621 = fptoui float %1620 to i32
-  %1622 = uitofp i32 %1621 to float
-  %1623 = fmul fast float %1622, %1616
-  %1624 = fsub fast float %1619, %1623
-  %1625 = and i32 %1621, 1
-  %1626 = icmp eq i32 %1625, 0
-  br i1 %1626, label %1627, label %1629
-
-; <label>:1627                                    ; preds = %1618
-  %1628 = fadd fast float %1624, %20
-  br label %1646
-
-; <label>:1629                                    ; preds = %1618
-  %1630 = fsub fast float %24, %1624
-  br label %1646
-
-; <label>:1631                                    ; preds = %1613
-  %1632 = fcmp fast ogt float %938, %24
-  br i1 %1632, label %1633, label %1646
-
-; <label>:1633                                    ; preds = %1631
-  %1634 = fsub fast float %938, %24
-  %1635 = fdiv fast float %1634, %1616
-  %1636 = fptoui float %1635 to i32
-  %1637 = uitofp i32 %1636 to float
-  %1638 = fmul fast float %1637, %1616
-  %1639 = fsub fast float %1634, %1638
-  %1640 = and i32 %1636, 1
-  %1641 = icmp eq i32 %1640, 0
-  br i1 %1641, label %1642, label %1644
-
-; <label>:1642                                    ; preds = %1633
-  %1643 = fsub fast float %24, %1639
-  br label %1646
-
-; <label>:1644                                    ; preds = %1633
-  %1645 = fadd fast float %1639, %20
-  br label %1646
-
-; <label>:1646                                    ; preds = %1644, %1642, %1631, %1629, %1627
-  %1647 = phi float [ %1628, %1627 ], [ %1630, %1629 ], [ %1643, %1642 ], [ %1645, %1644 ], [ %938, %1631 ]
-  %1648 = fptoui float %1647 to i32
-  %1649 = uitofp i32 %1648 to float
-  %1650 = uitofp i32 %1615 to float
-  %1651 = fptoui float %45 to i32
-  %1652 = fptoui float %182 to i32
-  %1653 = fptoui float %1649 to i32
-  %1654 = fptoui float %1650 to i32
-  %1655 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1656 = extractvalue %dx.types.CBufRet.i32 %1655, 0
-  %1657 = extractvalue %dx.types.CBufRet.i32 %1655, 1
-  %1658 = extractvalue %dx.types.CBufRet.i32 %1655, 2
-  %1659 = extractvalue %dx.types.CBufRet.i32 %1655, 3
-  %1660 = mul i32 %1656, %1651
-  %1661 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1652, i32 %1657, i32 %1660)  ; IMad(a,b,c)
-  %1662 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1653, i32 %1658, i32 %1661)  ; IMad(a,b,c)
-  %1663 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1654, i32 %1659, i32 %1662)  ; IMad(a,b,c)
-  %1664 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1663, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1665 = extractvalue %dx.types.ResRet.i32 %1664, 0
-  %1666 = uitofp i32 %1665 to float
-  br label %1667
-
-; <label>:1667                                    ; preds = %1646, %1580, %1551, %1534, %1524
-  %1668 = phi float [ %1548, %1534 ], [ 0.000000e+00, %1524 ], [ %1579, %1551 ], [ %1666, %1646 ], [ 0.000000e+00, %1580 ]
-  br i1 %941, label %1669, label %1694
-
-; <label>:1669                                    ; preds = %1667
-  %1670 = fcmp fast oge float %936, 0.000000e+00
-  %1671 = fptoui float %936 to i32
-  %1672 = icmp ult i32 %1671, %13
-  %1673 = and i1 %1670, %1672
-  %1674 = fcmp fast oge float %938, 0.000000e+00
-  %1675 = and i1 %1674, %1673
-  %1676 = fptoui float %938 to i32
-  %1677 = icmp ult i32 %1676, %15
-  %1678 = and i1 %1677, %1675
-  br i1 %1678, label %1679, label %1812
-
-; <label>:1679                                    ; preds = %1669
-  %1680 = fptoui float %45 to i32
-  %1681 = fptoui float %182 to i32
-  %1682 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1683 = extractvalue %dx.types.CBufRet.i32 %1682, 0
-  %1684 = extractvalue %dx.types.CBufRet.i32 %1682, 1
-  %1685 = extractvalue %dx.types.CBufRet.i32 %1682, 2
-  %1686 = extractvalue %dx.types.CBufRet.i32 %1682, 3
-  %1687 = mul i32 %1683, %1680
-  %1688 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1681, i32 %1684, i32 %1687)  ; IMad(a,b,c)
-  %1689 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1676, i32 %1685, i32 %1688)  ; IMad(a,b,c)
-  %1690 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1671, i32 %1686, i32 %1689)  ; IMad(a,b,c)
-  %1691 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1690, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1692 = extractvalue %dx.types.ResRet.i32 %1691, 0
-  %1693 = uitofp i32 %1692 to float
-  br label %1812
-
-; <label>:1694                                    ; preds = %1667
-  %1695 = icmp eq i32 %940, 1
-  br i1 %1695, label %1696, label %1725
-
-; <label>:1696                                    ; preds = %1694
-  %1697 = add i32 %13, -1
-  %1698 = uitofp i32 %1697 to float
-  %1699 = call float @dx.op.binary.f32(i32 35, float %936, float 0.000000e+00)  ; FMax(a,b)
-  %1700 = call float @dx.op.binary.f32(i32 36, float %1699, float %1698)  ; FMin(a,b)
-  %1701 = fptoui float %1700 to i32
-  %1702 = add i32 %15, -1
-  %1703 = uitofp i32 %1702 to float
-  %1704 = call float @dx.op.binary.f32(i32 35, float %938, float 0.000000e+00)  ; FMax(a,b)
-  %1705 = call float @dx.op.binary.f32(i32 36, float %1704, float %1703)  ; FMin(a,b)
-  %1706 = fptoui float %1705 to i32
-  %1707 = uitofp i32 %1706 to float
-  %1708 = uitofp i32 %1701 to float
-  %1709 = fptoui float %45 to i32
-  %1710 = fptoui float %182 to i32
-  %1711 = fptoui float %1707 to i32
-  %1712 = fptoui float %1708 to i32
-  %1713 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1714 = extractvalue %dx.types.CBufRet.i32 %1713, 0
-  %1715 = extractvalue %dx.types.CBufRet.i32 %1713, 1
-  %1716 = extractvalue %dx.types.CBufRet.i32 %1713, 2
-  %1717 = extractvalue %dx.types.CBufRet.i32 %1713, 3
-  %1718 = mul i32 %1714, %1709
-  %1719 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1710, i32 %1715, i32 %1718)  ; IMad(a,b,c)
-  %1720 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1711, i32 %1716, i32 %1719)  ; IMad(a,b,c)
-  %1721 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1712, i32 %1717, i32 %1720)  ; IMad(a,b,c)
-  %1722 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1721, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1723 = extractvalue %dx.types.ResRet.i32 %1722, 0
-  %1724 = uitofp i32 %1723 to float
-  br label %1812
-
-; <label>:1725                                    ; preds = %1694
-  %1726 = icmp eq i32 %940, 2
-  br i1 %1726, label %1727, label %1812
-
-; <label>:1727                                    ; preds = %1725
-  %1728 = fsub fast float %22, %20
-  %1729 = fcmp fast olt float %936, %20
-  br i1 %1729, label %1730, label %1743
-
-; <label>:1730                                    ; preds = %1727
-  %1731 = fsub fast float %20, %936
-  %1732 = fdiv fast float %1731, %1728
-  %1733 = fptoui float %1732 to i32
-  %1734 = uitofp i32 %1733 to float
-  %1735 = fmul fast float %1734, %1728
-  %1736 = fsub fast float %1731, %1735
-  %1737 = and i32 %1733, 1
-  %1738 = icmp eq i32 %1737, 0
-  br i1 %1738, label %1739, label %1741
-
-; <label>:1739                                    ; preds = %1730
-  %1740 = fadd fast float %1736, %20
-  br label %1758
-
-; <label>:1741                                    ; preds = %1730
-  %1742 = fsub fast float %22, %1736
-  br label %1758
-
-; <label>:1743                                    ; preds = %1727
-  %1744 = fcmp fast ogt float %936, %22
-  br i1 %1744, label %1745, label %1758
-
-; <label>:1745                                    ; preds = %1743
-  %1746 = fsub fast float %936, %22
-  %1747 = fdiv fast float %1746, %1728
-  %1748 = fptoui float %1747 to i32
-  %1749 = uitofp i32 %1748 to float
-  %1750 = fmul fast float %1749, %1728
-  %1751 = fsub fast float %1746, %1750
-  %1752 = and i32 %1748, 1
-  %1753 = icmp eq i32 %1752, 0
-  br i1 %1753, label %1754, label %1756
-
-; <label>:1754                                    ; preds = %1745
-  %1755 = fsub fast float %22, %1751
-  br label %1758
-
-; <label>:1756                                    ; preds = %1745
-  %1757 = fadd fast float %1751, %20
-  br label %1758
-
-; <label>:1758                                    ; preds = %1756, %1754, %1743, %1741, %1739
-  %1759 = phi float [ %1740, %1739 ], [ %1742, %1741 ], [ %1755, %1754 ], [ %1757, %1756 ], [ %936, %1743 ]
-  %1760 = fptoui float %1759 to i32
-  %1761 = fsub fast float %24, %20
-  %1762 = fcmp fast olt float %938, %20
-  br i1 %1762, label %1763, label %1776
-
-; <label>:1763                                    ; preds = %1758
-  %1764 = fsub fast float %20, %938
-  %1765 = fdiv fast float %1764, %1761
-  %1766 = fptoui float %1765 to i32
-  %1767 = uitofp i32 %1766 to float
-  %1768 = fmul fast float %1767, %1761
-  %1769 = fsub fast float %1764, %1768
-  %1770 = and i32 %1766, 1
-  %1771 = icmp eq i32 %1770, 0
-  br i1 %1771, label %1772, label %1774
-
-; <label>:1772                                    ; preds = %1763
-  %1773 = fadd fast float %1769, %20
-  br label %1791
-
-; <label>:1774                                    ; preds = %1763
-  %1775 = fsub fast float %24, %1769
-  br label %1791
-
-; <label>:1776                                    ; preds = %1758
-  %1777 = fcmp fast ogt float %938, %24
-  br i1 %1777, label %1778, label %1791
-
-; <label>:1778                                    ; preds = %1776
-  %1779 = fsub fast float %938, %24
-  %1780 = fdiv fast float %1779, %1761
-  %1781 = fptoui float %1780 to i32
-  %1782 = uitofp i32 %1781 to float
-  %1783 = fmul fast float %1782, %1761
-  %1784 = fsub fast float %1779, %1783
-  %1785 = and i32 %1781, 1
-  %1786 = icmp eq i32 %1785, 0
-  br i1 %1786, label %1787, label %1789
-
-; <label>:1787                                    ; preds = %1778
-  %1788 = fsub fast float %24, %1784
-  br label %1791
-
-; <label>:1789                                    ; preds = %1778
-  %1790 = fadd fast float %1784, %20
-  br label %1791
-
-; <label>:1791                                    ; preds = %1789, %1787, %1776, %1774, %1772
-  %1792 = phi float [ %1773, %1772 ], [ %1775, %1774 ], [ %1788, %1787 ], [ %1790, %1789 ], [ %938, %1776 ]
-  %1793 = fptoui float %1792 to i32
-  %1794 = uitofp i32 %1793 to float
-  %1795 = uitofp i32 %1760 to float
-  %1796 = fptoui float %45 to i32
-  %1797 = fptoui float %182 to i32
-  %1798 = fptoui float %1794 to i32
-  %1799 = fptoui float %1795 to i32
-  %1800 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1801 = extractvalue %dx.types.CBufRet.i32 %1800, 0
-  %1802 = extractvalue %dx.types.CBufRet.i32 %1800, 1
-  %1803 = extractvalue %dx.types.CBufRet.i32 %1800, 2
-  %1804 = extractvalue %dx.types.CBufRet.i32 %1800, 3
-  %1805 = mul i32 %1801, %1796
-  %1806 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1797, i32 %1802, i32 %1805)  ; IMad(a,b,c)
-  %1807 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1798, i32 %1803, i32 %1806)  ; IMad(a,b,c)
-  %1808 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1799, i32 %1804, i32 %1807)  ; IMad(a,b,c)
-  %1809 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1808, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1810 = extractvalue %dx.types.ResRet.i32 %1809, 0
-  %1811 = uitofp i32 %1810 to float
-  br label %1812
-
-; <label>:1812                                    ; preds = %1791, %1725, %1696, %1679, %1669
-  %1813 = phi float [ %1693, %1679 ], [ 0.000000e+00, %1669 ], [ %1724, %1696 ], [ %1811, %1791 ], [ 0.000000e+00, %1725 ]
-  br i1 %941, label %1814, label %1839
-
-; <label>:1814                                    ; preds = %1812
-  %1815 = fcmp fast oge float %1232, 0.000000e+00
-  %1816 = fptoui float %1232 to i32
-  %1817 = icmp ult i32 %1816, %13
-  %1818 = and i1 %1815, %1817
-  %1819 = fcmp fast oge float %938, 0.000000e+00
-  %1820 = and i1 %1819, %1818
-  %1821 = fptoui float %938 to i32
-  %1822 = icmp ult i32 %1821, %15
-  %1823 = and i1 %1822, %1820
-  br i1 %1823, label %1824, label %1957
-
-; <label>:1824                                    ; preds = %1814
-  %1825 = fptoui float %45 to i32
-  %1826 = fptoui float %182 to i32
-  %1827 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1828 = extractvalue %dx.types.CBufRet.i32 %1827, 0
-  %1829 = extractvalue %dx.types.CBufRet.i32 %1827, 1
-  %1830 = extractvalue %dx.types.CBufRet.i32 %1827, 2
-  %1831 = extractvalue %dx.types.CBufRet.i32 %1827, 3
-  %1832 = mul i32 %1828, %1825
-  %1833 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1826, i32 %1829, i32 %1832)  ; IMad(a,b,c)
-  %1834 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1821, i32 %1830, i32 %1833)  ; IMad(a,b,c)
-  %1835 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1816, i32 %1831, i32 %1834)  ; IMad(a,b,c)
-  %1836 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1835, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1837 = extractvalue %dx.types.ResRet.i32 %1836, 0
-  %1838 = uitofp i32 %1837 to float
-  br label %1957
-
-; <label>:1839                                    ; preds = %1812
-  %1840 = icmp eq i32 %940, 1
-  br i1 %1840, label %1841, label %1870
-
-; <label>:1841                                    ; preds = %1839
-  %1842 = add i32 %13, -1
-  %1843 = uitofp i32 %1842 to float
-  %1844 = call float @dx.op.binary.f32(i32 35, float %1232, float 0.000000e+00)  ; FMax(a,b)
-  %1845 = call float @dx.op.binary.f32(i32 36, float %1844, float %1843)  ; FMin(a,b)
-  %1846 = fptoui float %1845 to i32
-  %1847 = add i32 %15, -1
-  %1848 = uitofp i32 %1847 to float
-  %1849 = call float @dx.op.binary.f32(i32 35, float %938, float 0.000000e+00)  ; FMax(a,b)
-  %1850 = call float @dx.op.binary.f32(i32 36, float %1849, float %1848)  ; FMin(a,b)
-  %1851 = fptoui float %1850 to i32
-  %1852 = uitofp i32 %1851 to float
-  %1853 = uitofp i32 %1846 to float
-  %1854 = fptoui float %45 to i32
-  %1855 = fptoui float %182 to i32
-  %1856 = fptoui float %1852 to i32
-  %1857 = fptoui float %1853 to i32
-  %1858 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1859 = extractvalue %dx.types.CBufRet.i32 %1858, 0
-  %1860 = extractvalue %dx.types.CBufRet.i32 %1858, 1
-  %1861 = extractvalue %dx.types.CBufRet.i32 %1858, 2
-  %1862 = extractvalue %dx.types.CBufRet.i32 %1858, 3
-  %1863 = mul i32 %1859, %1854
-  %1864 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1855, i32 %1860, i32 %1863)  ; IMad(a,b,c)
-  %1865 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1856, i32 %1861, i32 %1864)  ; IMad(a,b,c)
-  %1866 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1857, i32 %1862, i32 %1865)  ; IMad(a,b,c)
-  %1867 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1866, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1868 = extractvalue %dx.types.ResRet.i32 %1867, 0
-  %1869 = uitofp i32 %1868 to float
-  br label %1957
-
-; <label>:1870                                    ; preds = %1839
-  %1871 = icmp eq i32 %940, 2
-  br i1 %1871, label %1872, label %1957
-
-; <label>:1872                                    ; preds = %1870
-  %1873 = fsub fast float %22, %20
-  %1874 = fcmp fast olt float %1232, %20
-  br i1 %1874, label %1875, label %1888
-
-; <label>:1875                                    ; preds = %1872
-  %1876 = fsub fast float %20, %1232
-  %1877 = fdiv fast float %1876, %1873
-  %1878 = fptoui float %1877 to i32
-  %1879 = uitofp i32 %1878 to float
-  %1880 = fmul fast float %1879, %1873
-  %1881 = fsub fast float %1876, %1880
-  %1882 = and i32 %1878, 1
-  %1883 = icmp eq i32 %1882, 0
-  br i1 %1883, label %1884, label %1886
-
-; <label>:1884                                    ; preds = %1875
-  %1885 = fadd fast float %1881, %20
-  br label %1903
-
-; <label>:1886                                    ; preds = %1875
-  %1887 = fsub fast float %22, %1881
-  br label %1903
-
-; <label>:1888                                    ; preds = %1872
-  %1889 = fcmp fast ogt float %1232, %22
-  br i1 %1889, label %1890, label %1903
-
-; <label>:1890                                    ; preds = %1888
-  %1891 = fsub fast float %1232, %22
-  %1892 = fdiv fast float %1891, %1873
-  %1893 = fptoui float %1892 to i32
-  %1894 = uitofp i32 %1893 to float
-  %1895 = fmul fast float %1894, %1873
-  %1896 = fsub fast float %1891, %1895
-  %1897 = and i32 %1893, 1
-  %1898 = icmp eq i32 %1897, 0
-  br i1 %1898, label %1899, label %1901
-
-; <label>:1899                                    ; preds = %1890
-  %1900 = fsub fast float %22, %1896
-  br label %1903
-
-; <label>:1901                                    ; preds = %1890
-  %1902 = fadd fast float %1896, %20
-  br label %1903
-
-; <label>:1903                                    ; preds = %1901, %1899, %1888, %1886, %1884
-  %1904 = phi float [ %1885, %1884 ], [ %1887, %1886 ], [ %1900, %1899 ], [ %1902, %1901 ], [ %1232, %1888 ]
-  %1905 = fptoui float %1904 to i32
-  %1906 = fsub fast float %24, %20
-  %1907 = fcmp fast olt float %938, %20
-  br i1 %1907, label %1908, label %1921
-
-; <label>:1908                                    ; preds = %1903
-  %1909 = fsub fast float %20, %938
-  %1910 = fdiv fast float %1909, %1906
-  %1911 = fptoui float %1910 to i32
-  %1912 = uitofp i32 %1911 to float
-  %1913 = fmul fast float %1912, %1906
-  %1914 = fsub fast float %1909, %1913
-  %1915 = and i32 %1911, 1
-  %1916 = icmp eq i32 %1915, 0
-  br i1 %1916, label %1917, label %1919
-
-; <label>:1917                                    ; preds = %1908
-  %1918 = fadd fast float %1914, %20
-  br label %1936
-
-; <label>:1919                                    ; preds = %1908
-  %1920 = fsub fast float %24, %1914
-  br label %1936
-
-; <label>:1921                                    ; preds = %1903
-  %1922 = fcmp fast ogt float %938, %24
-  br i1 %1922, label %1923, label %1936
-
-; <label>:1923                                    ; preds = %1921
-  %1924 = fsub fast float %938, %24
-  %1925 = fdiv fast float %1924, %1906
-  %1926 = fptoui float %1925 to i32
-  %1927 = uitofp i32 %1926 to float
-  %1928 = fmul fast float %1927, %1906
-  %1929 = fsub fast float %1924, %1928
-  %1930 = and i32 %1926, 1
-  %1931 = icmp eq i32 %1930, 0
-  br i1 %1931, label %1932, label %1934
-
-; <label>:1932                                    ; preds = %1923
-  %1933 = fsub fast float %24, %1929
-  br label %1936
-
-; <label>:1934                                    ; preds = %1923
-  %1935 = fadd fast float %1929, %20
-  br label %1936
-
-; <label>:1936                                    ; preds = %1934, %1932, %1921, %1919, %1917
-  %1937 = phi float [ %1918, %1917 ], [ %1920, %1919 ], [ %1933, %1932 ], [ %1935, %1934 ], [ %938, %1921 ]
-  %1938 = fptoui float %1937 to i32
-  %1939 = uitofp i32 %1938 to float
-  %1940 = uitofp i32 %1905 to float
-  %1941 = fptoui float %45 to i32
-  %1942 = fptoui float %182 to i32
-  %1943 = fptoui float %1939 to i32
-  %1944 = fptoui float %1940 to i32
-  %1945 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1946 = extractvalue %dx.types.CBufRet.i32 %1945, 0
-  %1947 = extractvalue %dx.types.CBufRet.i32 %1945, 1
-  %1948 = extractvalue %dx.types.CBufRet.i32 %1945, 2
-  %1949 = extractvalue %dx.types.CBufRet.i32 %1945, 3
-  %1950 = mul i32 %1946, %1941
-  %1951 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1942, i32 %1947, i32 %1950)  ; IMad(a,b,c)
-  %1952 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1943, i32 %1948, i32 %1951)  ; IMad(a,b,c)
-  %1953 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1944, i32 %1949, i32 %1952)  ; IMad(a,b,c)
-  %1954 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1953, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1955 = extractvalue %dx.types.ResRet.i32 %1954, 0
-  %1956 = uitofp i32 %1955 to float
-  br label %1957
-
-; <label>:1957                                    ; preds = %1936, %1870, %1841, %1824, %1814
-  %1958 = phi float [ %1838, %1824 ], [ 0.000000e+00, %1814 ], [ %1869, %1841 ], [ %1956, %1936 ], [ 0.000000e+00, %1870 ]
-  br i1 %941, label %1959, label %1984
-
-; <label>:1959                                    ; preds = %1957
-  %1960 = fcmp fast oge float %1378, 0.000000e+00
-  %1961 = fptoui float %1378 to i32
-  %1962 = icmp ult i32 %1961, %13
-  %1963 = and i1 %1960, %1962
-  %1964 = fcmp fast oge float %938, 0.000000e+00
-  %1965 = and i1 %1964, %1963
-  %1966 = fptoui float %938 to i32
-  %1967 = icmp ult i32 %1966, %15
-  %1968 = and i1 %1967, %1965
-  br i1 %1968, label %1969, label %2102
-
-; <label>:1969                                    ; preds = %1959
-  %1970 = fptoui float %45 to i32
-  %1971 = fptoui float %182 to i32
-  %1972 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %1973 = extractvalue %dx.types.CBufRet.i32 %1972, 0
-  %1974 = extractvalue %dx.types.CBufRet.i32 %1972, 1
-  %1975 = extractvalue %dx.types.CBufRet.i32 %1972, 2
-  %1976 = extractvalue %dx.types.CBufRet.i32 %1972, 3
-  %1977 = mul i32 %1973, %1970
-  %1978 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1971, i32 %1974, i32 %1977)  ; IMad(a,b,c)
-  %1979 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1966, i32 %1975, i32 %1978)  ; IMad(a,b,c)
-  %1980 = call i32 @dx.op.tertiary.i32(i32 48, i32 %1961, i32 %1976, i32 %1979)  ; IMad(a,b,c)
-  %1981 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %1980, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %1982 = extractvalue %dx.types.ResRet.i32 %1981, 0
-  %1983 = uitofp i32 %1982 to float
-  br label %2102
-
-; <label>:1984                                    ; preds = %1957
-  %1985 = icmp eq i32 %940, 1
-  br i1 %1985, label %1986, label %2015
-
-; <label>:1986                                    ; preds = %1984
-  %1987 = add i32 %13, -1
-  %1988 = uitofp i32 %1987 to float
-  %1989 = call float @dx.op.binary.f32(i32 35, float %1378, float 0.000000e+00)  ; FMax(a,b)
-  %1990 = call float @dx.op.binary.f32(i32 36, float %1989, float %1988)  ; FMin(a,b)
-  %1991 = fptoui float %1990 to i32
-  %1992 = add i32 %15, -1
-  %1993 = uitofp i32 %1992 to float
-  %1994 = call float @dx.op.binary.f32(i32 35, float %938, float 0.000000e+00)  ; FMax(a,b)
-  %1995 = call float @dx.op.binary.f32(i32 36, float %1994, float %1993)  ; FMin(a,b)
-  %1996 = fptoui float %1995 to i32
-  %1997 = uitofp i32 %1996 to float
-  %1998 = uitofp i32 %1991 to float
-  %1999 = fptoui float %45 to i32
-  %2000 = fptoui float %182 to i32
-  %2001 = fptoui float %1997 to i32
-  %2002 = fptoui float %1998 to i32
-  %2003 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2004 = extractvalue %dx.types.CBufRet.i32 %2003, 0
-  %2005 = extractvalue %dx.types.CBufRet.i32 %2003, 1
-  %2006 = extractvalue %dx.types.CBufRet.i32 %2003, 2
-  %2007 = extractvalue %dx.types.CBufRet.i32 %2003, 3
-  %2008 = mul i32 %2004, %1999
-  %2009 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2000, i32 %2005, i32 %2008)  ; IMad(a,b,c)
-  %2010 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2001, i32 %2006, i32 %2009)  ; IMad(a,b,c)
-  %2011 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2002, i32 %2007, i32 %2010)  ; IMad(a,b,c)
-  %2012 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2011, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2013 = extractvalue %dx.types.ResRet.i32 %2012, 0
-  %2014 = uitofp i32 %2013 to float
-  br label %2102
-
-; <label>:2015                                    ; preds = %1984
-  %2016 = icmp eq i32 %940, 2
-  br i1 %2016, label %2017, label %2102
-
-; <label>:2017                                    ; preds = %2015
-  %2018 = fsub fast float %22, %20
-  %2019 = fcmp fast olt float %1378, %20
-  br i1 %2019, label %2020, label %2033
-
-; <label>:2020                                    ; preds = %2017
-  %2021 = fsub fast float %20, %1378
-  %2022 = fdiv fast float %2021, %2018
-  %2023 = fptoui float %2022 to i32
-  %2024 = uitofp i32 %2023 to float
-  %2025 = fmul fast float %2024, %2018
-  %2026 = fsub fast float %2021, %2025
-  %2027 = and i32 %2023, 1
-  %2028 = icmp eq i32 %2027, 0
-  br i1 %2028, label %2029, label %2031
-
-; <label>:2029                                    ; preds = %2020
-  %2030 = fadd fast float %2026, %20
-  br label %2048
-
-; <label>:2031                                    ; preds = %2020
-  %2032 = fsub fast float %22, %2026
-  br label %2048
-
-; <label>:2033                                    ; preds = %2017
-  %2034 = fcmp fast ogt float %1378, %22
-  br i1 %2034, label %2035, label %2048
-
-; <label>:2035                                    ; preds = %2033
-  %2036 = fsub fast float %1378, %22
-  %2037 = fdiv fast float %2036, %2018
-  %2038 = fptoui float %2037 to i32
-  %2039 = uitofp i32 %2038 to float
-  %2040 = fmul fast float %2039, %2018
-  %2041 = fsub fast float %2036, %2040
-  %2042 = and i32 %2038, 1
-  %2043 = icmp eq i32 %2042, 0
-  br i1 %2043, label %2044, label %2046
-
-; <label>:2044                                    ; preds = %2035
-  %2045 = fsub fast float %22, %2041
-  br label %2048
-
-; <label>:2046                                    ; preds = %2035
-  %2047 = fadd fast float %2041, %20
-  br label %2048
-
-; <label>:2048                                    ; preds = %2046, %2044, %2033, %2031, %2029
-  %2049 = phi float [ %2030, %2029 ], [ %2032, %2031 ], [ %2045, %2044 ], [ %2047, %2046 ], [ %1378, %2033 ]
-  %2050 = fptoui float %2049 to i32
-  %2051 = fsub fast float %24, %20
-  %2052 = fcmp fast olt float %938, %20
-  br i1 %2052, label %2053, label %2066
-
-; <label>:2053                                    ; preds = %2048
-  %2054 = fsub fast float %20, %938
-  %2055 = fdiv fast float %2054, %2051
-  %2056 = fptoui float %2055 to i32
-  %2057 = uitofp i32 %2056 to float
-  %2058 = fmul fast float %2057, %2051
-  %2059 = fsub fast float %2054, %2058
-  %2060 = and i32 %2056, 1
-  %2061 = icmp eq i32 %2060, 0
-  br i1 %2061, label %2062, label %2064
-
-; <label>:2062                                    ; preds = %2053
-  %2063 = fadd fast float %2059, %20
-  br label %2081
-
-; <label>:2064                                    ; preds = %2053
-  %2065 = fsub fast float %24, %2059
-  br label %2081
-
-; <label>:2066                                    ; preds = %2048
-  %2067 = fcmp fast ogt float %938, %24
-  br i1 %2067, label %2068, label %2081
-
-; <label>:2068                                    ; preds = %2066
-  %2069 = fsub fast float %938, %24
-  %2070 = fdiv fast float %2069, %2051
-  %2071 = fptoui float %2070 to i32
-  %2072 = uitofp i32 %2071 to float
-  %2073 = fmul fast float %2072, %2051
-  %2074 = fsub fast float %2069, %2073
-  %2075 = and i32 %2071, 1
-  %2076 = icmp eq i32 %2075, 0
-  br i1 %2076, label %2077, label %2079
-
-; <label>:2077                                    ; preds = %2068
-  %2078 = fsub fast float %24, %2074
-  br label %2081
-
-; <label>:2079                                    ; preds = %2068
-  %2080 = fadd fast float %2074, %20
-  br label %2081
-
-; <label>:2081                                    ; preds = %2079, %2077, %2066, %2064, %2062
-  %2082 = phi float [ %2063, %2062 ], [ %2065, %2064 ], [ %2078, %2077 ], [ %2080, %2079 ], [ %938, %2066 ]
-  %2083 = fptoui float %2082 to i32
-  %2084 = uitofp i32 %2083 to float
-  %2085 = uitofp i32 %2050 to float
-  %2086 = fptoui float %45 to i32
-  %2087 = fptoui float %182 to i32
-  %2088 = fptoui float %2084 to i32
-  %2089 = fptoui float %2085 to i32
-  %2090 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2091 = extractvalue %dx.types.CBufRet.i32 %2090, 0
-  %2092 = extractvalue %dx.types.CBufRet.i32 %2090, 1
-  %2093 = extractvalue %dx.types.CBufRet.i32 %2090, 2
-  %2094 = extractvalue %dx.types.CBufRet.i32 %2090, 3
-  %2095 = mul i32 %2091, %2086
-  %2096 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2087, i32 %2092, i32 %2095)  ; IMad(a,b,c)
-  %2097 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2088, i32 %2093, i32 %2096)  ; IMad(a,b,c)
-  %2098 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2089, i32 %2094, i32 %2097)  ; IMad(a,b,c)
-  %2099 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2098, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2100 = extractvalue %dx.types.ResRet.i32 %2099, 0
-  %2101 = uitofp i32 %2100 to float
-  br label %2102
-
-; <label>:2102                                    ; preds = %2081, %2015, %1986, %1969, %1959
-  %2103 = phi float [ %1983, %1969 ], [ 0.000000e+00, %1959 ], [ %2014, %1986 ], [ %2101, %2081 ], [ 0.000000e+00, %2015 ]
-  %2104 = fadd fast float %938, 1.000000e+00
-  br i1 %941, label %2105, label %2130
-
-; <label>:2105                                    ; preds = %2102
-  %2106 = fcmp fast oge float %937, 0.000000e+00
-  %2107 = fptoui float %937 to i32
-  %2108 = icmp ult i32 %2107, %13
-  %2109 = and i1 %2106, %2108
-  %2110 = fcmp fast oge float %2104, 0.000000e+00
-  %2111 = and i1 %2110, %2109
-  %2112 = fptoui float %2104 to i32
-  %2113 = icmp ult i32 %2112, %15
-  %2114 = and i1 %2113, %2111
-  br i1 %2114, label %2115, label %2248
-
-; <label>:2115                                    ; preds = %2105
-  %2116 = fptoui float %45 to i32
-  %2117 = fptoui float %182 to i32
-  %2118 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2119 = extractvalue %dx.types.CBufRet.i32 %2118, 0
-  %2120 = extractvalue %dx.types.CBufRet.i32 %2118, 1
-  %2121 = extractvalue %dx.types.CBufRet.i32 %2118, 2
-  %2122 = extractvalue %dx.types.CBufRet.i32 %2118, 3
-  %2123 = mul i32 %2119, %2116
-  %2124 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2117, i32 %2120, i32 %2123)  ; IMad(a,b,c)
-  %2125 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2112, i32 %2121, i32 %2124)  ; IMad(a,b,c)
-  %2126 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2107, i32 %2122, i32 %2125)  ; IMad(a,b,c)
-  %2127 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2126, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2128 = extractvalue %dx.types.ResRet.i32 %2127, 0
-  %2129 = uitofp i32 %2128 to float
-  br label %2248
-
-; <label>:2130                                    ; preds = %2102
-  %2131 = icmp eq i32 %940, 1
-  br i1 %2131, label %2132, label %2161
-
-; <label>:2132                                    ; preds = %2130
-  %2133 = add i32 %13, -1
-  %2134 = uitofp i32 %2133 to float
-  %2135 = call float @dx.op.binary.f32(i32 35, float %937, float 0.000000e+00)  ; FMax(a,b)
-  %2136 = call float @dx.op.binary.f32(i32 36, float %2135, float %2134)  ; FMin(a,b)
-  %2137 = fptoui float %2136 to i32
-  %2138 = add i32 %15, -1
-  %2139 = uitofp i32 %2138 to float
-  %2140 = call float @dx.op.binary.f32(i32 35, float %2104, float 0.000000e+00)  ; FMax(a,b)
-  %2141 = call float @dx.op.binary.f32(i32 36, float %2140, float %2139)  ; FMin(a,b)
-  %2142 = fptoui float %2141 to i32
-  %2143 = uitofp i32 %2142 to float
-  %2144 = uitofp i32 %2137 to float
-  %2145 = fptoui float %45 to i32
-  %2146 = fptoui float %182 to i32
-  %2147 = fptoui float %2143 to i32
-  %2148 = fptoui float %2144 to i32
-  %2149 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2150 = extractvalue %dx.types.CBufRet.i32 %2149, 0
-  %2151 = extractvalue %dx.types.CBufRet.i32 %2149, 1
-  %2152 = extractvalue %dx.types.CBufRet.i32 %2149, 2
-  %2153 = extractvalue %dx.types.CBufRet.i32 %2149, 3
-  %2154 = mul i32 %2150, %2145
-  %2155 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2146, i32 %2151, i32 %2154)  ; IMad(a,b,c)
-  %2156 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2147, i32 %2152, i32 %2155)  ; IMad(a,b,c)
-  %2157 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2148, i32 %2153, i32 %2156)  ; IMad(a,b,c)
-  %2158 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2157, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2159 = extractvalue %dx.types.ResRet.i32 %2158, 0
-  %2160 = uitofp i32 %2159 to float
-  br label %2248
-
-; <label>:2161                                    ; preds = %2130
-  %2162 = icmp eq i32 %940, 2
-  br i1 %2162, label %2163, label %2248
-
-; <label>:2163                                    ; preds = %2161
-  %2164 = fsub fast float %22, %20
-  %2165 = fcmp fast olt float %937, %20
-  br i1 %2165, label %2166, label %2179
-
-; <label>:2166                                    ; preds = %2163
-  %2167 = fsub fast float %20, %937
-  %2168 = fdiv fast float %2167, %2164
-  %2169 = fptoui float %2168 to i32
-  %2170 = uitofp i32 %2169 to float
-  %2171 = fmul fast float %2170, %2164
-  %2172 = fsub fast float %2167, %2171
-  %2173 = and i32 %2169, 1
-  %2174 = icmp eq i32 %2173, 0
-  br i1 %2174, label %2175, label %2177
-
-; <label>:2175                                    ; preds = %2166
-  %2176 = fadd fast float %2172, %20
-  br label %2194
-
-; <label>:2177                                    ; preds = %2166
-  %2178 = fsub fast float %22, %2172
-  br label %2194
-
-; <label>:2179                                    ; preds = %2163
-  %2180 = fcmp fast ogt float %937, %22
-  br i1 %2180, label %2181, label %2194
-
-; <label>:2181                                    ; preds = %2179
-  %2182 = fsub fast float %937, %22
-  %2183 = fdiv fast float %2182, %2164
-  %2184 = fptoui float %2183 to i32
-  %2185 = uitofp i32 %2184 to float
-  %2186 = fmul fast float %2185, %2164
-  %2187 = fsub fast float %2182, %2186
-  %2188 = and i32 %2184, 1
-  %2189 = icmp eq i32 %2188, 0
-  br i1 %2189, label %2190, label %2192
-
-; <label>:2190                                    ; preds = %2181
-  %2191 = fsub fast float %22, %2187
-  br label %2194
-
-; <label>:2192                                    ; preds = %2181
-  %2193 = fadd fast float %2187, %20
-  br label %2194
-
-; <label>:2194                                    ; preds = %2192, %2190, %2179, %2177, %2175
-  %2195 = phi float [ %2176, %2175 ], [ %2178, %2177 ], [ %2191, %2190 ], [ %2193, %2192 ], [ %937, %2179 ]
-  %2196 = fptoui float %2195 to i32
-  %2197 = fsub fast float %24, %20
-  %2198 = fcmp fast olt float %2104, %20
-  br i1 %2198, label %2199, label %2212
-
-; <label>:2199                                    ; preds = %2194
-  %2200 = fsub fast float %20, %2104
-  %2201 = fdiv fast float %2200, %2197
-  %2202 = fptoui float %2201 to i32
-  %2203 = uitofp i32 %2202 to float
-  %2204 = fmul fast float %2203, %2197
-  %2205 = fsub fast float %2200, %2204
-  %2206 = and i32 %2202, 1
-  %2207 = icmp eq i32 %2206, 0
-  br i1 %2207, label %2208, label %2210
-
-; <label>:2208                                    ; preds = %2199
-  %2209 = fadd fast float %2205, %20
-  br label %2227
-
-; <label>:2210                                    ; preds = %2199
-  %2211 = fsub fast float %24, %2205
-  br label %2227
-
-; <label>:2212                                    ; preds = %2194
-  %2213 = fcmp fast ogt float %2104, %24
-  br i1 %2213, label %2214, label %2227
-
-; <label>:2214                                    ; preds = %2212
-  %2215 = fsub fast float %2104, %24
-  %2216 = fdiv fast float %2215, %2197
-  %2217 = fptoui float %2216 to i32
-  %2218 = uitofp i32 %2217 to float
-  %2219 = fmul fast float %2218, %2197
-  %2220 = fsub fast float %2215, %2219
-  %2221 = and i32 %2217, 1
-  %2222 = icmp eq i32 %2221, 0
-  br i1 %2222, label %2223, label %2225
-
-; <label>:2223                                    ; preds = %2214
-  %2224 = fsub fast float %24, %2220
-  br label %2227
-
-; <label>:2225                                    ; preds = %2214
-  %2226 = fadd fast float %2220, %20
-  br label %2227
-
-; <label>:2227                                    ; preds = %2225, %2223, %2212, %2210, %2208
-  %2228 = phi float [ %2209, %2208 ], [ %2211, %2210 ], [ %2224, %2223 ], [ %2226, %2225 ], [ %2104, %2212 ]
-  %2229 = fptoui float %2228 to i32
-  %2230 = uitofp i32 %2229 to float
-  %2231 = uitofp i32 %2196 to float
-  %2232 = fptoui float %45 to i32
-  %2233 = fptoui float %182 to i32
-  %2234 = fptoui float %2230 to i32
-  %2235 = fptoui float %2231 to i32
-  %2236 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2237 = extractvalue %dx.types.CBufRet.i32 %2236, 0
-  %2238 = extractvalue %dx.types.CBufRet.i32 %2236, 1
-  %2239 = extractvalue %dx.types.CBufRet.i32 %2236, 2
-  %2240 = extractvalue %dx.types.CBufRet.i32 %2236, 3
-  %2241 = mul i32 %2237, %2232
-  %2242 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2233, i32 %2238, i32 %2241)  ; IMad(a,b,c)
-  %2243 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2234, i32 %2239, i32 %2242)  ; IMad(a,b,c)
-  %2244 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2235, i32 %2240, i32 %2243)  ; IMad(a,b,c)
-  %2245 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2244, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2246 = extractvalue %dx.types.ResRet.i32 %2245, 0
-  %2247 = uitofp i32 %2246 to float
-  br label %2248
-
-; <label>:2248                                    ; preds = %2227, %2161, %2132, %2115, %2105
-  %2249 = phi float [ %2129, %2115 ], [ 0.000000e+00, %2105 ], [ %2160, %2132 ], [ %2247, %2227 ], [ 0.000000e+00, %2161 ]
-  br i1 %941, label %2250, label %2275
-
-; <label>:2250                                    ; preds = %2248
-  %2251 = fcmp fast oge float %936, 0.000000e+00
-  %2252 = fptoui float %936 to i32
-  %2253 = icmp ult i32 %2252, %13
-  %2254 = and i1 %2251, %2253
-  %2255 = fcmp fast oge float %2104, 0.000000e+00
-  %2256 = and i1 %2255, %2254
-  %2257 = fptoui float %2104 to i32
-  %2258 = icmp ult i32 %2257, %15
-  %2259 = and i1 %2258, %2256
-  br i1 %2259, label %2260, label %2393
-
-; <label>:2260                                    ; preds = %2250
-  %2261 = fptoui float %45 to i32
-  %2262 = fptoui float %182 to i32
-  %2263 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2264 = extractvalue %dx.types.CBufRet.i32 %2263, 0
-  %2265 = extractvalue %dx.types.CBufRet.i32 %2263, 1
-  %2266 = extractvalue %dx.types.CBufRet.i32 %2263, 2
-  %2267 = extractvalue %dx.types.CBufRet.i32 %2263, 3
-  %2268 = mul i32 %2264, %2261
-  %2269 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2262, i32 %2265, i32 %2268)  ; IMad(a,b,c)
-  %2270 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2257, i32 %2266, i32 %2269)  ; IMad(a,b,c)
-  %2271 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2252, i32 %2267, i32 %2270)  ; IMad(a,b,c)
-  %2272 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2271, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2273 = extractvalue %dx.types.ResRet.i32 %2272, 0
-  %2274 = uitofp i32 %2273 to float
-  br label %2393
-
-; <label>:2275                                    ; preds = %2248
-  %2276 = icmp eq i32 %940, 1
-  br i1 %2276, label %2277, label %2306
-
-; <label>:2277                                    ; preds = %2275
-  %2278 = add i32 %13, -1
-  %2279 = uitofp i32 %2278 to float
-  %2280 = call float @dx.op.binary.f32(i32 35, float %936, float 0.000000e+00)  ; FMax(a,b)
-  %2281 = call float @dx.op.binary.f32(i32 36, float %2280, float %2279)  ; FMin(a,b)
-  %2282 = fptoui float %2281 to i32
-  %2283 = add i32 %15, -1
-  %2284 = uitofp i32 %2283 to float
-  %2285 = call float @dx.op.binary.f32(i32 35, float %2104, float 0.000000e+00)  ; FMax(a,b)
-  %2286 = call float @dx.op.binary.f32(i32 36, float %2285, float %2284)  ; FMin(a,b)
-  %2287 = fptoui float %2286 to i32
-  %2288 = uitofp i32 %2287 to float
-  %2289 = uitofp i32 %2282 to float
-  %2290 = fptoui float %45 to i32
-  %2291 = fptoui float %182 to i32
-  %2292 = fptoui float %2288 to i32
-  %2293 = fptoui float %2289 to i32
-  %2294 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2295 = extractvalue %dx.types.CBufRet.i32 %2294, 0
-  %2296 = extractvalue %dx.types.CBufRet.i32 %2294, 1
-  %2297 = extractvalue %dx.types.CBufRet.i32 %2294, 2
-  %2298 = extractvalue %dx.types.CBufRet.i32 %2294, 3
-  %2299 = mul i32 %2295, %2290
-  %2300 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2291, i32 %2296, i32 %2299)  ; IMad(a,b,c)
-  %2301 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2292, i32 %2297, i32 %2300)  ; IMad(a,b,c)
-  %2302 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2293, i32 %2298, i32 %2301)  ; IMad(a,b,c)
-  %2303 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2302, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2304 = extractvalue %dx.types.ResRet.i32 %2303, 0
-  %2305 = uitofp i32 %2304 to float
-  br label %2393
-
-; <label>:2306                                    ; preds = %2275
-  %2307 = icmp eq i32 %940, 2
-  br i1 %2307, label %2308, label %2393
-
-; <label>:2308                                    ; preds = %2306
-  %2309 = fsub fast float %22, %20
-  %2310 = fcmp fast olt float %936, %20
-  br i1 %2310, label %2311, label %2324
-
-; <label>:2311                                    ; preds = %2308
-  %2312 = fsub fast float %20, %936
-  %2313 = fdiv fast float %2312, %2309
-  %2314 = fptoui float %2313 to i32
-  %2315 = uitofp i32 %2314 to float
-  %2316 = fmul fast float %2315, %2309
-  %2317 = fsub fast float %2312, %2316
-  %2318 = and i32 %2314, 1
-  %2319 = icmp eq i32 %2318, 0
-  br i1 %2319, label %2320, label %2322
-
-; <label>:2320                                    ; preds = %2311
-  %2321 = fadd fast float %2317, %20
-  br label %2339
-
-; <label>:2322                                    ; preds = %2311
-  %2323 = fsub fast float %22, %2317
-  br label %2339
-
-; <label>:2324                                    ; preds = %2308
-  %2325 = fcmp fast ogt float %936, %22
-  br i1 %2325, label %2326, label %2339
-
-; <label>:2326                                    ; preds = %2324
-  %2327 = fsub fast float %936, %22
-  %2328 = fdiv fast float %2327, %2309
-  %2329 = fptoui float %2328 to i32
-  %2330 = uitofp i32 %2329 to float
-  %2331 = fmul fast float %2330, %2309
-  %2332 = fsub fast float %2327, %2331
-  %2333 = and i32 %2329, 1
-  %2334 = icmp eq i32 %2333, 0
-  br i1 %2334, label %2335, label %2337
-
-; <label>:2335                                    ; preds = %2326
-  %2336 = fsub fast float %22, %2332
-  br label %2339
-
-; <label>:2337                                    ; preds = %2326
-  %2338 = fadd fast float %2332, %20
-  br label %2339
-
-; <label>:2339                                    ; preds = %2337, %2335, %2324, %2322, %2320
-  %2340 = phi float [ %2321, %2320 ], [ %2323, %2322 ], [ %2336, %2335 ], [ %2338, %2337 ], [ %936, %2324 ]
-  %2341 = fptoui float %2340 to i32
-  %2342 = fsub fast float %24, %20
-  %2343 = fcmp fast olt float %2104, %20
-  br i1 %2343, label %2344, label %2357
-
-; <label>:2344                                    ; preds = %2339
-  %2345 = fsub fast float %20, %2104
-  %2346 = fdiv fast float %2345, %2342
-  %2347 = fptoui float %2346 to i32
-  %2348 = uitofp i32 %2347 to float
-  %2349 = fmul fast float %2348, %2342
-  %2350 = fsub fast float %2345, %2349
-  %2351 = and i32 %2347, 1
-  %2352 = icmp eq i32 %2351, 0
-  br i1 %2352, label %2353, label %2355
-
-; <label>:2353                                    ; preds = %2344
-  %2354 = fadd fast float %2350, %20
-  br label %2372
-
-; <label>:2355                                    ; preds = %2344
-  %2356 = fsub fast float %24, %2350
-  br label %2372
-
-; <label>:2357                                    ; preds = %2339
-  %2358 = fcmp fast ogt float %2104, %24
-  br i1 %2358, label %2359, label %2372
-
-; <label>:2359                                    ; preds = %2357
-  %2360 = fsub fast float %2104, %24
-  %2361 = fdiv fast float %2360, %2342
-  %2362 = fptoui float %2361 to i32
-  %2363 = uitofp i32 %2362 to float
-  %2364 = fmul fast float %2363, %2342
-  %2365 = fsub fast float %2360, %2364
-  %2366 = and i32 %2362, 1
-  %2367 = icmp eq i32 %2366, 0
-  br i1 %2367, label %2368, label %2370
-
-; <label>:2368                                    ; preds = %2359
-  %2369 = fsub fast float %24, %2365
-  br label %2372
-
-; <label>:2370                                    ; preds = %2359
-  %2371 = fadd fast float %2365, %20
-  br label %2372
-
-; <label>:2372                                    ; preds = %2370, %2368, %2357, %2355, %2353
-  %2373 = phi float [ %2354, %2353 ], [ %2356, %2355 ], [ %2369, %2368 ], [ %2371, %2370 ], [ %2104, %2357 ]
-  %2374 = fptoui float %2373 to i32
-  %2375 = uitofp i32 %2374 to float
-  %2376 = uitofp i32 %2341 to float
-  %2377 = fptoui float %45 to i32
-  %2378 = fptoui float %182 to i32
-  %2379 = fptoui float %2375 to i32
-  %2380 = fptoui float %2376 to i32
-  %2381 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2382 = extractvalue %dx.types.CBufRet.i32 %2381, 0
-  %2383 = extractvalue %dx.types.CBufRet.i32 %2381, 1
-  %2384 = extractvalue %dx.types.CBufRet.i32 %2381, 2
-  %2385 = extractvalue %dx.types.CBufRet.i32 %2381, 3
-  %2386 = mul i32 %2382, %2377
-  %2387 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2378, i32 %2383, i32 %2386)  ; IMad(a,b,c)
-  %2388 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2379, i32 %2384, i32 %2387)  ; IMad(a,b,c)
-  %2389 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2380, i32 %2385, i32 %2388)  ; IMad(a,b,c)
-  %2390 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2389, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2391 = extractvalue %dx.types.ResRet.i32 %2390, 0
-  %2392 = uitofp i32 %2391 to float
-  br label %2393
-
-; <label>:2393                                    ; preds = %2372, %2306, %2277, %2260, %2250
-  %2394 = phi float [ %2274, %2260 ], [ 0.000000e+00, %2250 ], [ %2305, %2277 ], [ %2392, %2372 ], [ 0.000000e+00, %2306 ]
-  br i1 %941, label %2395, label %2420
-
-; <label>:2395                                    ; preds = %2393
-  %2396 = fcmp fast oge float %1232, 0.000000e+00
-  %2397 = fptoui float %1232 to i32
-  %2398 = icmp ult i32 %2397, %13
-  %2399 = and i1 %2396, %2398
-  %2400 = fcmp fast oge float %2104, 0.000000e+00
-  %2401 = and i1 %2400, %2399
-  %2402 = fptoui float %2104 to i32
-  %2403 = icmp ult i32 %2402, %15
-  %2404 = and i1 %2403, %2401
-  br i1 %2404, label %2405, label %2538
-
-; <label>:2405                                    ; preds = %2395
-  %2406 = fptoui float %45 to i32
-  %2407 = fptoui float %182 to i32
-  %2408 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2409 = extractvalue %dx.types.CBufRet.i32 %2408, 0
-  %2410 = extractvalue %dx.types.CBufRet.i32 %2408, 1
-  %2411 = extractvalue %dx.types.CBufRet.i32 %2408, 2
-  %2412 = extractvalue %dx.types.CBufRet.i32 %2408, 3
-  %2413 = mul i32 %2409, %2406
-  %2414 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2407, i32 %2410, i32 %2413)  ; IMad(a,b,c)
-  %2415 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2402, i32 %2411, i32 %2414)  ; IMad(a,b,c)
-  %2416 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2397, i32 %2412, i32 %2415)  ; IMad(a,b,c)
-  %2417 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2416, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2418 = extractvalue %dx.types.ResRet.i32 %2417, 0
-  %2419 = uitofp i32 %2418 to float
-  br label %2538
-
-; <label>:2420                                    ; preds = %2393
-  %2421 = icmp eq i32 %940, 1
-  br i1 %2421, label %2422, label %2451
-
-; <label>:2422                                    ; preds = %2420
-  %2423 = add i32 %13, -1
-  %2424 = uitofp i32 %2423 to float
-  %2425 = call float @dx.op.binary.f32(i32 35, float %1232, float 0.000000e+00)  ; FMax(a,b)
-  %2426 = call float @dx.op.binary.f32(i32 36, float %2425, float %2424)  ; FMin(a,b)
-  %2427 = fptoui float %2426 to i32
-  %2428 = add i32 %15, -1
-  %2429 = uitofp i32 %2428 to float
-  %2430 = call float @dx.op.binary.f32(i32 35, float %2104, float 0.000000e+00)  ; FMax(a,b)
-  %2431 = call float @dx.op.binary.f32(i32 36, float %2430, float %2429)  ; FMin(a,b)
-  %2432 = fptoui float %2431 to i32
-  %2433 = uitofp i32 %2432 to float
-  %2434 = uitofp i32 %2427 to float
-  %2435 = fptoui float %45 to i32
-  %2436 = fptoui float %182 to i32
-  %2437 = fptoui float %2433 to i32
-  %2438 = fptoui float %2434 to i32
-  %2439 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2440 = extractvalue %dx.types.CBufRet.i32 %2439, 0
-  %2441 = extractvalue %dx.types.CBufRet.i32 %2439, 1
-  %2442 = extractvalue %dx.types.CBufRet.i32 %2439, 2
-  %2443 = extractvalue %dx.types.CBufRet.i32 %2439, 3
-  %2444 = mul i32 %2440, %2435
-  %2445 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2436, i32 %2441, i32 %2444)  ; IMad(a,b,c)
-  %2446 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2437, i32 %2442, i32 %2445)  ; IMad(a,b,c)
-  %2447 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2438, i32 %2443, i32 %2446)  ; IMad(a,b,c)
-  %2448 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2447, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2449 = extractvalue %dx.types.ResRet.i32 %2448, 0
-  %2450 = uitofp i32 %2449 to float
-  br label %2538
-
-; <label>:2451                                    ; preds = %2420
-  %2452 = icmp eq i32 %940, 2
-  br i1 %2452, label %2453, label %2538
-
-; <label>:2453                                    ; preds = %2451
-  %2454 = fsub fast float %22, %20
-  %2455 = fcmp fast olt float %1232, %20
-  br i1 %2455, label %2456, label %2469
-
-; <label>:2456                                    ; preds = %2453
-  %2457 = fsub fast float %20, %1232
-  %2458 = fdiv fast float %2457, %2454
-  %2459 = fptoui float %2458 to i32
-  %2460 = uitofp i32 %2459 to float
-  %2461 = fmul fast float %2460, %2454
-  %2462 = fsub fast float %2457, %2461
-  %2463 = and i32 %2459, 1
-  %2464 = icmp eq i32 %2463, 0
-  br i1 %2464, label %2465, label %2467
-
-; <label>:2465                                    ; preds = %2456
-  %2466 = fadd fast float %2462, %20
-  br label %2484
-
-; <label>:2467                                    ; preds = %2456
-  %2468 = fsub fast float %22, %2462
-  br label %2484
-
-; <label>:2469                                    ; preds = %2453
-  %2470 = fcmp fast ogt float %1232, %22
-  br i1 %2470, label %2471, label %2484
-
-; <label>:2471                                    ; preds = %2469
-  %2472 = fsub fast float %1232, %22
-  %2473 = fdiv fast float %2472, %2454
-  %2474 = fptoui float %2473 to i32
-  %2475 = uitofp i32 %2474 to float
-  %2476 = fmul fast float %2475, %2454
-  %2477 = fsub fast float %2472, %2476
-  %2478 = and i32 %2474, 1
-  %2479 = icmp eq i32 %2478, 0
-  br i1 %2479, label %2480, label %2482
-
-; <label>:2480                                    ; preds = %2471
-  %2481 = fsub fast float %22, %2477
-  br label %2484
-
-; <label>:2482                                    ; preds = %2471
-  %2483 = fadd fast float %2477, %20
-  br label %2484
-
-; <label>:2484                                    ; preds = %2482, %2480, %2469, %2467, %2465
-  %2485 = phi float [ %2466, %2465 ], [ %2468, %2467 ], [ %2481, %2480 ], [ %2483, %2482 ], [ %1232, %2469 ]
-  %2486 = fptoui float %2485 to i32
-  %2487 = fsub fast float %24, %20
-  %2488 = fcmp fast olt float %2104, %20
-  br i1 %2488, label %2489, label %2502
-
-; <label>:2489                                    ; preds = %2484
-  %2490 = fsub fast float %20, %2104
-  %2491 = fdiv fast float %2490, %2487
-  %2492 = fptoui float %2491 to i32
-  %2493 = uitofp i32 %2492 to float
-  %2494 = fmul fast float %2493, %2487
-  %2495 = fsub fast float %2490, %2494
-  %2496 = and i32 %2492, 1
-  %2497 = icmp eq i32 %2496, 0
-  br i1 %2497, label %2498, label %2500
-
-; <label>:2498                                    ; preds = %2489
-  %2499 = fadd fast float %2495, %20
-  br label %2517
-
-; <label>:2500                                    ; preds = %2489
-  %2501 = fsub fast float %24, %2495
-  br label %2517
-
-; <label>:2502                                    ; preds = %2484
-  %2503 = fcmp fast ogt float %2104, %24
-  br i1 %2503, label %2504, label %2517
-
-; <label>:2504                                    ; preds = %2502
-  %2505 = fsub fast float %2104, %24
-  %2506 = fdiv fast float %2505, %2487
-  %2507 = fptoui float %2506 to i32
-  %2508 = uitofp i32 %2507 to float
-  %2509 = fmul fast float %2508, %2487
-  %2510 = fsub fast float %2505, %2509
-  %2511 = and i32 %2507, 1
-  %2512 = icmp eq i32 %2511, 0
-  br i1 %2512, label %2513, label %2515
-
-; <label>:2513                                    ; preds = %2504
-  %2514 = fsub fast float %24, %2510
-  br label %2517
-
-; <label>:2515                                    ; preds = %2504
-  %2516 = fadd fast float %2510, %20
-  br label %2517
-
-; <label>:2517                                    ; preds = %2515, %2513, %2502, %2500, %2498
-  %2518 = phi float [ %2499, %2498 ], [ %2501, %2500 ], [ %2514, %2513 ], [ %2516, %2515 ], [ %2104, %2502 ]
-  %2519 = fptoui float %2518 to i32
-  %2520 = uitofp i32 %2519 to float
-  %2521 = uitofp i32 %2486 to float
-  %2522 = fptoui float %45 to i32
-  %2523 = fptoui float %182 to i32
-  %2524 = fptoui float %2520 to i32
-  %2525 = fptoui float %2521 to i32
-  %2526 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2527 = extractvalue %dx.types.CBufRet.i32 %2526, 0
-  %2528 = extractvalue %dx.types.CBufRet.i32 %2526, 1
-  %2529 = extractvalue %dx.types.CBufRet.i32 %2526, 2
-  %2530 = extractvalue %dx.types.CBufRet.i32 %2526, 3
-  %2531 = mul i32 %2527, %2522
-  %2532 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2523, i32 %2528, i32 %2531)  ; IMad(a,b,c)
-  %2533 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2524, i32 %2529, i32 %2532)  ; IMad(a,b,c)
-  %2534 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2525, i32 %2530, i32 %2533)  ; IMad(a,b,c)
-  %2535 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2534, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2536 = extractvalue %dx.types.ResRet.i32 %2535, 0
-  %2537 = uitofp i32 %2536 to float
-  br label %2538
-
-; <label>:2538                                    ; preds = %2517, %2451, %2422, %2405, %2395
-  %2539 = phi float [ %2419, %2405 ], [ 0.000000e+00, %2395 ], [ %2450, %2422 ], [ %2537, %2517 ], [ 0.000000e+00, %2451 ]
-  br i1 %941, label %2540, label %2565
-
-; <label>:2540                                    ; preds = %2538
-  %2541 = fcmp fast oge float %1378, 0.000000e+00
-  %2542 = fptoui float %1378 to i32
-  %2543 = icmp ult i32 %2542, %13
-  %2544 = and i1 %2541, %2543
-  %2545 = fcmp fast oge float %2104, 0.000000e+00
-  %2546 = and i1 %2545, %2544
-  %2547 = fptoui float %2104 to i32
-  %2548 = icmp ult i32 %2547, %15
-  %2549 = and i1 %2548, %2546
-  br i1 %2549, label %2550, label %2683
-
-; <label>:2550                                    ; preds = %2540
-  %2551 = fptoui float %45 to i32
-  %2552 = fptoui float %182 to i32
-  %2553 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2554 = extractvalue %dx.types.CBufRet.i32 %2553, 0
-  %2555 = extractvalue %dx.types.CBufRet.i32 %2553, 1
-  %2556 = extractvalue %dx.types.CBufRet.i32 %2553, 2
-  %2557 = extractvalue %dx.types.CBufRet.i32 %2553, 3
-  %2558 = mul i32 %2554, %2551
-  %2559 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2552, i32 %2555, i32 %2558)  ; IMad(a,b,c)
-  %2560 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2547, i32 %2556, i32 %2559)  ; IMad(a,b,c)
-  %2561 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2542, i32 %2557, i32 %2560)  ; IMad(a,b,c)
-  %2562 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2561, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2563 = extractvalue %dx.types.ResRet.i32 %2562, 0
-  %2564 = uitofp i32 %2563 to float
-  br label %2683
-
-; <label>:2565                                    ; preds = %2538
-  %2566 = icmp eq i32 %940, 1
-  br i1 %2566, label %2567, label %2596
-
-; <label>:2567                                    ; preds = %2565
-  %2568 = add i32 %13, -1
-  %2569 = uitofp i32 %2568 to float
-  %2570 = call float @dx.op.binary.f32(i32 35, float %1378, float 0.000000e+00)  ; FMax(a,b)
-  %2571 = call float @dx.op.binary.f32(i32 36, float %2570, float %2569)  ; FMin(a,b)
-  %2572 = fptoui float %2571 to i32
-  %2573 = add i32 %15, -1
-  %2574 = uitofp i32 %2573 to float
-  %2575 = call float @dx.op.binary.f32(i32 35, float %2104, float 0.000000e+00)  ; FMax(a,b)
-  %2576 = call float @dx.op.binary.f32(i32 36, float %2575, float %2574)  ; FMin(a,b)
-  %2577 = fptoui float %2576 to i32
-  %2578 = uitofp i32 %2577 to float
-  %2579 = uitofp i32 %2572 to float
-  %2580 = fptoui float %45 to i32
-  %2581 = fptoui float %182 to i32
-  %2582 = fptoui float %2578 to i32
-  %2583 = fptoui float %2579 to i32
-  %2584 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2585 = extractvalue %dx.types.CBufRet.i32 %2584, 0
-  %2586 = extractvalue %dx.types.CBufRet.i32 %2584, 1
-  %2587 = extractvalue %dx.types.CBufRet.i32 %2584, 2
-  %2588 = extractvalue %dx.types.CBufRet.i32 %2584, 3
-  %2589 = mul i32 %2585, %2580
-  %2590 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2581, i32 %2586, i32 %2589)  ; IMad(a,b,c)
-  %2591 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2582, i32 %2587, i32 %2590)  ; IMad(a,b,c)
-  %2592 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2583, i32 %2588, i32 %2591)  ; IMad(a,b,c)
-  %2593 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2592, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2594 = extractvalue %dx.types.ResRet.i32 %2593, 0
-  %2595 = uitofp i32 %2594 to float
-  br label %2683
-
-; <label>:2596                                    ; preds = %2565
-  %2597 = icmp eq i32 %940, 2
-  br i1 %2597, label %2598, label %2683
-
-; <label>:2598                                    ; preds = %2596
-  %2599 = fsub fast float %22, %20
-  %2600 = fcmp fast olt float %1378, %20
-  br i1 %2600, label %2601, label %2614
-
-; <label>:2601                                    ; preds = %2598
-  %2602 = fsub fast float %20, %1378
-  %2603 = fdiv fast float %2602, %2599
-  %2604 = fptoui float %2603 to i32
-  %2605 = uitofp i32 %2604 to float
-  %2606 = fmul fast float %2605, %2599
-  %2607 = fsub fast float %2602, %2606
-  %2608 = and i32 %2604, 1
-  %2609 = icmp eq i32 %2608, 0
-  br i1 %2609, label %2610, label %2612
-
-; <label>:2610                                    ; preds = %2601
-  %2611 = fadd fast float %2607, %20
-  br label %2629
-
-; <label>:2612                                    ; preds = %2601
-  %2613 = fsub fast float %22, %2607
-  br label %2629
-
-; <label>:2614                                    ; preds = %2598
-  %2615 = fcmp fast ogt float %1378, %22
-  br i1 %2615, label %2616, label %2629
-
-; <label>:2616                                    ; preds = %2614
-  %2617 = fsub fast float %1378, %22
-  %2618 = fdiv fast float %2617, %2599
-  %2619 = fptoui float %2618 to i32
-  %2620 = uitofp i32 %2619 to float
-  %2621 = fmul fast float %2620, %2599
-  %2622 = fsub fast float %2617, %2621
-  %2623 = and i32 %2619, 1
-  %2624 = icmp eq i32 %2623, 0
-  br i1 %2624, label %2625, label %2627
-
-; <label>:2625                                    ; preds = %2616
-  %2626 = fsub fast float %22, %2622
-  br label %2629
-
-; <label>:2627                                    ; preds = %2616
-  %2628 = fadd fast float %2622, %20
-  br label %2629
-
-; <label>:2629                                    ; preds = %2627, %2625, %2614, %2612, %2610
-  %2630 = phi float [ %2611, %2610 ], [ %2613, %2612 ], [ %2626, %2625 ], [ %2628, %2627 ], [ %1378, %2614 ]
-  %2631 = fptoui float %2630 to i32
-  %2632 = fsub fast float %24, %20
-  %2633 = fcmp fast olt float %2104, %20
-  br i1 %2633, label %2634, label %2647
-
-; <label>:2634                                    ; preds = %2629
-  %2635 = fsub fast float %20, %2104
-  %2636 = fdiv fast float %2635, %2632
-  %2637 = fptoui float %2636 to i32
-  %2638 = uitofp i32 %2637 to float
-  %2639 = fmul fast float %2638, %2632
-  %2640 = fsub fast float %2635, %2639
-  %2641 = and i32 %2637, 1
-  %2642 = icmp eq i32 %2641, 0
-  br i1 %2642, label %2643, label %2645
-
-; <label>:2643                                    ; preds = %2634
-  %2644 = fadd fast float %2640, %20
-  br label %2662
-
-; <label>:2645                                    ; preds = %2634
-  %2646 = fsub fast float %24, %2640
-  br label %2662
-
-; <label>:2647                                    ; preds = %2629
-  %2648 = fcmp fast ogt float %2104, %24
-  br i1 %2648, label %2649, label %2662
-
-; <label>:2649                                    ; preds = %2647
-  %2650 = fsub fast float %2104, %24
-  %2651 = fdiv fast float %2650, %2632
-  %2652 = fptoui float %2651 to i32
-  %2653 = uitofp i32 %2652 to float
-  %2654 = fmul fast float %2653, %2632
-  %2655 = fsub fast float %2650, %2654
-  %2656 = and i32 %2652, 1
-  %2657 = icmp eq i32 %2656, 0
-  br i1 %2657, label %2658, label %2660
-
-; <label>:2658                                    ; preds = %2649
-  %2659 = fsub fast float %24, %2655
-  br label %2662
-
-; <label>:2660                                    ; preds = %2649
-  %2661 = fadd fast float %2655, %20
-  br label %2662
-
-; <label>:2662                                    ; preds = %2660, %2658, %2647, %2645, %2643
-  %2663 = phi float [ %2644, %2643 ], [ %2646, %2645 ], [ %2659, %2658 ], [ %2661, %2660 ], [ %2104, %2647 ]
-  %2664 = fptoui float %2663 to i32
-  %2665 = uitofp i32 %2664 to float
-  %2666 = uitofp i32 %2631 to float
-  %2667 = fptoui float %45 to i32
-  %2668 = fptoui float %182 to i32
-  %2669 = fptoui float %2665 to i32
-  %2670 = fptoui float %2666 to i32
-  %2671 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2672 = extractvalue %dx.types.CBufRet.i32 %2671, 0
-  %2673 = extractvalue %dx.types.CBufRet.i32 %2671, 1
-  %2674 = extractvalue %dx.types.CBufRet.i32 %2671, 2
-  %2675 = extractvalue %dx.types.CBufRet.i32 %2671, 3
-  %2676 = mul i32 %2672, %2667
-  %2677 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2668, i32 %2673, i32 %2676)  ; IMad(a,b,c)
-  %2678 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2669, i32 %2674, i32 %2677)  ; IMad(a,b,c)
-  %2679 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2670, i32 %2675, i32 %2678)  ; IMad(a,b,c)
-  %2680 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2679, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2681 = extractvalue %dx.types.ResRet.i32 %2680, 0
-  %2682 = uitofp i32 %2681 to float
-  br label %2683
-
-; <label>:2683                                    ; preds = %2662, %2596, %2567, %2550, %2540
-  %2684 = phi float [ %2564, %2550 ], [ 0.000000e+00, %2540 ], [ %2595, %2567 ], [ %2682, %2662 ], [ 0.000000e+00, %2596 ]
-  %2685 = fadd fast float %938, 2.000000e+00
-  br i1 %941, label %2686, label %2711
-
-; <label>:2686                                    ; preds = %2683
-  %2687 = fcmp fast oge float %937, 0.000000e+00
-  %2688 = fptoui float %937 to i32
-  %2689 = icmp ult i32 %2688, %13
-  %2690 = and i1 %2687, %2689
-  %2691 = fcmp fast oge float %2685, 0.000000e+00
-  %2692 = and i1 %2691, %2690
-  %2693 = fptoui float %2685 to i32
-  %2694 = icmp ult i32 %2693, %15
-  %2695 = and i1 %2694, %2692
-  br i1 %2695, label %2696, label %2829
-
-; <label>:2696                                    ; preds = %2686
-  %2697 = fptoui float %45 to i32
-  %2698 = fptoui float %182 to i32
-  %2699 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2700 = extractvalue %dx.types.CBufRet.i32 %2699, 0
-  %2701 = extractvalue %dx.types.CBufRet.i32 %2699, 1
-  %2702 = extractvalue %dx.types.CBufRet.i32 %2699, 2
-  %2703 = extractvalue %dx.types.CBufRet.i32 %2699, 3
-  %2704 = mul i32 %2700, %2697
-  %2705 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2698, i32 %2701, i32 %2704)  ; IMad(a,b,c)
-  %2706 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2693, i32 %2702, i32 %2705)  ; IMad(a,b,c)
-  %2707 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2688, i32 %2703, i32 %2706)  ; IMad(a,b,c)
-  %2708 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2707, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2709 = extractvalue %dx.types.ResRet.i32 %2708, 0
-  %2710 = uitofp i32 %2709 to float
-  br label %2829
-
-; <label>:2711                                    ; preds = %2683
-  %2712 = icmp eq i32 %940, 1
-  br i1 %2712, label %2713, label %2742
-
-; <label>:2713                                    ; preds = %2711
-  %2714 = add i32 %13, -1
-  %2715 = uitofp i32 %2714 to float
-  %2716 = call float @dx.op.binary.f32(i32 35, float %937, float 0.000000e+00)  ; FMax(a,b)
-  %2717 = call float @dx.op.binary.f32(i32 36, float %2716, float %2715)  ; FMin(a,b)
-  %2718 = fptoui float %2717 to i32
-  %2719 = add i32 %15, -1
-  %2720 = uitofp i32 %2719 to float
-  %2721 = call float @dx.op.binary.f32(i32 35, float %2685, float 0.000000e+00)  ; FMax(a,b)
-  %2722 = call float @dx.op.binary.f32(i32 36, float %2721, float %2720)  ; FMin(a,b)
-  %2723 = fptoui float %2722 to i32
-  %2724 = uitofp i32 %2723 to float
-  %2725 = uitofp i32 %2718 to float
-  %2726 = fptoui float %45 to i32
-  %2727 = fptoui float %182 to i32
-  %2728 = fptoui float %2724 to i32
-  %2729 = fptoui float %2725 to i32
-  %2730 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2731 = extractvalue %dx.types.CBufRet.i32 %2730, 0
-  %2732 = extractvalue %dx.types.CBufRet.i32 %2730, 1
-  %2733 = extractvalue %dx.types.CBufRet.i32 %2730, 2
-  %2734 = extractvalue %dx.types.CBufRet.i32 %2730, 3
-  %2735 = mul i32 %2731, %2726
-  %2736 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2727, i32 %2732, i32 %2735)  ; IMad(a,b,c)
-  %2737 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2728, i32 %2733, i32 %2736)  ; IMad(a,b,c)
-  %2738 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2729, i32 %2734, i32 %2737)  ; IMad(a,b,c)
-  %2739 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2738, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2740 = extractvalue %dx.types.ResRet.i32 %2739, 0
-  %2741 = uitofp i32 %2740 to float
-  br label %2829
-
-; <label>:2742                                    ; preds = %2711
-  %2743 = icmp eq i32 %940, 2
-  br i1 %2743, label %2744, label %2829
-
-; <label>:2744                                    ; preds = %2742
-  %2745 = fsub fast float %22, %20
-  %2746 = fcmp fast olt float %937, %20
-  br i1 %2746, label %2747, label %2760
-
-; <label>:2747                                    ; preds = %2744
-  %2748 = fsub fast float %20, %937
-  %2749 = fdiv fast float %2748, %2745
-  %2750 = fptoui float %2749 to i32
-  %2751 = uitofp i32 %2750 to float
-  %2752 = fmul fast float %2751, %2745
-  %2753 = fsub fast float %2748, %2752
-  %2754 = and i32 %2750, 1
-  %2755 = icmp eq i32 %2754, 0
-  br i1 %2755, label %2756, label %2758
-
-; <label>:2756                                    ; preds = %2747
-  %2757 = fadd fast float %2753, %20
-  br label %2775
-
-; <label>:2758                                    ; preds = %2747
-  %2759 = fsub fast float %22, %2753
-  br label %2775
-
-; <label>:2760                                    ; preds = %2744
-  %2761 = fcmp fast ogt float %937, %22
-  br i1 %2761, label %2762, label %2775
-
-; <label>:2762                                    ; preds = %2760
-  %2763 = fsub fast float %937, %22
-  %2764 = fdiv fast float %2763, %2745
-  %2765 = fptoui float %2764 to i32
-  %2766 = uitofp i32 %2765 to float
-  %2767 = fmul fast float %2766, %2745
-  %2768 = fsub fast float %2763, %2767
-  %2769 = and i32 %2765, 1
-  %2770 = icmp eq i32 %2769, 0
-  br i1 %2770, label %2771, label %2773
-
-; <label>:2771                                    ; preds = %2762
-  %2772 = fsub fast float %22, %2768
-  br label %2775
-
-; <label>:2773                                    ; preds = %2762
-  %2774 = fadd fast float %2768, %20
-  br label %2775
-
-; <label>:2775                                    ; preds = %2773, %2771, %2760, %2758, %2756
-  %2776 = phi float [ %2757, %2756 ], [ %2759, %2758 ], [ %2772, %2771 ], [ %2774, %2773 ], [ %937, %2760 ]
-  %2777 = fptoui float %2776 to i32
-  %2778 = fsub fast float %24, %20
-  %2779 = fcmp fast olt float %2685, %20
-  br i1 %2779, label %2780, label %2793
-
-; <label>:2780                                    ; preds = %2775
-  %2781 = fsub fast float %20, %2685
-  %2782 = fdiv fast float %2781, %2778
-  %2783 = fptoui float %2782 to i32
-  %2784 = uitofp i32 %2783 to float
-  %2785 = fmul fast float %2784, %2778
-  %2786 = fsub fast float %2781, %2785
-  %2787 = and i32 %2783, 1
-  %2788 = icmp eq i32 %2787, 0
-  br i1 %2788, label %2789, label %2791
-
-; <label>:2789                                    ; preds = %2780
-  %2790 = fadd fast float %2786, %20
-  br label %2808
-
-; <label>:2791                                    ; preds = %2780
-  %2792 = fsub fast float %24, %2786
-  br label %2808
-
-; <label>:2793                                    ; preds = %2775
-  %2794 = fcmp fast ogt float %2685, %24
-  br i1 %2794, label %2795, label %2808
-
-; <label>:2795                                    ; preds = %2793
-  %2796 = fsub fast float %2685, %24
-  %2797 = fdiv fast float %2796, %2778
-  %2798 = fptoui float %2797 to i32
-  %2799 = uitofp i32 %2798 to float
-  %2800 = fmul fast float %2799, %2778
-  %2801 = fsub fast float %2796, %2800
-  %2802 = and i32 %2798, 1
-  %2803 = icmp eq i32 %2802, 0
-  br i1 %2803, label %2804, label %2806
-
-; <label>:2804                                    ; preds = %2795
-  %2805 = fsub fast float %24, %2801
-  br label %2808
-
-; <label>:2806                                    ; preds = %2795
-  %2807 = fadd fast float %2801, %20
-  br label %2808
-
-; <label>:2808                                    ; preds = %2806, %2804, %2793, %2791, %2789
-  %2809 = phi float [ %2790, %2789 ], [ %2792, %2791 ], [ %2805, %2804 ], [ %2807, %2806 ], [ %2685, %2793 ]
-  %2810 = fptoui float %2809 to i32
-  %2811 = uitofp i32 %2810 to float
-  %2812 = uitofp i32 %2777 to float
-  %2813 = fptoui float %45 to i32
-  %2814 = fptoui float %182 to i32
-  %2815 = fptoui float %2811 to i32
-  %2816 = fptoui float %2812 to i32
-  %2817 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2818 = extractvalue %dx.types.CBufRet.i32 %2817, 0
-  %2819 = extractvalue %dx.types.CBufRet.i32 %2817, 1
-  %2820 = extractvalue %dx.types.CBufRet.i32 %2817, 2
-  %2821 = extractvalue %dx.types.CBufRet.i32 %2817, 3
-  %2822 = mul i32 %2818, %2813
-  %2823 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2814, i32 %2819, i32 %2822)  ; IMad(a,b,c)
-  %2824 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2815, i32 %2820, i32 %2823)  ; IMad(a,b,c)
-  %2825 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2816, i32 %2821, i32 %2824)  ; IMad(a,b,c)
-  %2826 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2825, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2827 = extractvalue %dx.types.ResRet.i32 %2826, 0
-  %2828 = uitofp i32 %2827 to float
-  br label %2829
-
-; <label>:2829                                    ; preds = %2808, %2742, %2713, %2696, %2686
-  %2830 = phi float [ %2710, %2696 ], [ 0.000000e+00, %2686 ], [ %2741, %2713 ], [ %2828, %2808 ], [ 0.000000e+00, %2742 ]
-  br i1 %941, label %2831, label %2856
-
-; <label>:2831                                    ; preds = %2829
-  %2832 = fcmp fast oge float %936, 0.000000e+00
-  %2833 = fptoui float %936 to i32
-  %2834 = icmp ult i32 %2833, %13
-  %2835 = and i1 %2832, %2834
-  %2836 = fcmp fast oge float %2685, 0.000000e+00
-  %2837 = and i1 %2836, %2835
-  %2838 = fptoui float %2685 to i32
-  %2839 = icmp ult i32 %2838, %15
-  %2840 = and i1 %2839, %2837
-  br i1 %2840, label %2841, label %2974
-
-; <label>:2841                                    ; preds = %2831
-  %2842 = fptoui float %45 to i32
-  %2843 = fptoui float %182 to i32
-  %2844 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2845 = extractvalue %dx.types.CBufRet.i32 %2844, 0
-  %2846 = extractvalue %dx.types.CBufRet.i32 %2844, 1
-  %2847 = extractvalue %dx.types.CBufRet.i32 %2844, 2
-  %2848 = extractvalue %dx.types.CBufRet.i32 %2844, 3
-  %2849 = mul i32 %2845, %2842
-  %2850 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2843, i32 %2846, i32 %2849)  ; IMad(a,b,c)
-  %2851 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2838, i32 %2847, i32 %2850)  ; IMad(a,b,c)
-  %2852 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2833, i32 %2848, i32 %2851)  ; IMad(a,b,c)
-  %2853 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2852, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2854 = extractvalue %dx.types.ResRet.i32 %2853, 0
-  %2855 = uitofp i32 %2854 to float
-  br label %2974
-
-; <label>:2856                                    ; preds = %2829
-  %2857 = icmp eq i32 %940, 1
-  br i1 %2857, label %2858, label %2887
-
-; <label>:2858                                    ; preds = %2856
-  %2859 = add i32 %13, -1
-  %2860 = uitofp i32 %2859 to float
-  %2861 = call float @dx.op.binary.f32(i32 35, float %936, float 0.000000e+00)  ; FMax(a,b)
-  %2862 = call float @dx.op.binary.f32(i32 36, float %2861, float %2860)  ; FMin(a,b)
-  %2863 = fptoui float %2862 to i32
-  %2864 = add i32 %15, -1
-  %2865 = uitofp i32 %2864 to float
-  %2866 = call float @dx.op.binary.f32(i32 35, float %2685, float 0.000000e+00)  ; FMax(a,b)
-  %2867 = call float @dx.op.binary.f32(i32 36, float %2866, float %2865)  ; FMin(a,b)
-  %2868 = fptoui float %2867 to i32
-  %2869 = uitofp i32 %2868 to float
-  %2870 = uitofp i32 %2863 to float
-  %2871 = fptoui float %45 to i32
-  %2872 = fptoui float %182 to i32
-  %2873 = fptoui float %2869 to i32
-  %2874 = fptoui float %2870 to i32
-  %2875 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2876 = extractvalue %dx.types.CBufRet.i32 %2875, 0
-  %2877 = extractvalue %dx.types.CBufRet.i32 %2875, 1
-  %2878 = extractvalue %dx.types.CBufRet.i32 %2875, 2
-  %2879 = extractvalue %dx.types.CBufRet.i32 %2875, 3
-  %2880 = mul i32 %2876, %2871
-  %2881 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2872, i32 %2877, i32 %2880)  ; IMad(a,b,c)
-  %2882 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2873, i32 %2878, i32 %2881)  ; IMad(a,b,c)
-  %2883 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2874, i32 %2879, i32 %2882)  ; IMad(a,b,c)
-  %2884 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2883, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2885 = extractvalue %dx.types.ResRet.i32 %2884, 0
-  %2886 = uitofp i32 %2885 to float
-  br label %2974
-
-; <label>:2887                                    ; preds = %2856
-  %2888 = icmp eq i32 %940, 2
-  br i1 %2888, label %2889, label %2974
-
-; <label>:2889                                    ; preds = %2887
-  %2890 = fsub fast float %22, %20
-  %2891 = fcmp fast olt float %936, %20
-  br i1 %2891, label %2892, label %2905
-
-; <label>:2892                                    ; preds = %2889
-  %2893 = fsub fast float %20, %936
-  %2894 = fdiv fast float %2893, %2890
-  %2895 = fptoui float %2894 to i32
-  %2896 = uitofp i32 %2895 to float
-  %2897 = fmul fast float %2896, %2890
-  %2898 = fsub fast float %2893, %2897
-  %2899 = and i32 %2895, 1
-  %2900 = icmp eq i32 %2899, 0
-  br i1 %2900, label %2901, label %2903
-
-; <label>:2901                                    ; preds = %2892
-  %2902 = fadd fast float %2898, %20
-  br label %2920
-
-; <label>:2903                                    ; preds = %2892
-  %2904 = fsub fast float %22, %2898
-  br label %2920
-
-; <label>:2905                                    ; preds = %2889
-  %2906 = fcmp fast ogt float %936, %22
-  br i1 %2906, label %2907, label %2920
-
-; <label>:2907                                    ; preds = %2905
-  %2908 = fsub fast float %936, %22
-  %2909 = fdiv fast float %2908, %2890
-  %2910 = fptoui float %2909 to i32
-  %2911 = uitofp i32 %2910 to float
-  %2912 = fmul fast float %2911, %2890
-  %2913 = fsub fast float %2908, %2912
-  %2914 = and i32 %2910, 1
-  %2915 = icmp eq i32 %2914, 0
-  br i1 %2915, label %2916, label %2918
-
-; <label>:2916                                    ; preds = %2907
-  %2917 = fsub fast float %22, %2913
-  br label %2920
-
-; <label>:2918                                    ; preds = %2907
-  %2919 = fadd fast float %2913, %20
-  br label %2920
-
-; <label>:2920                                    ; preds = %2918, %2916, %2905, %2903, %2901
-  %2921 = phi float [ %2902, %2901 ], [ %2904, %2903 ], [ %2917, %2916 ], [ %2919, %2918 ], [ %936, %2905 ]
-  %2922 = fptoui float %2921 to i32
-  %2923 = fsub fast float %24, %20
-  %2924 = fcmp fast olt float %2685, %20
-  br i1 %2924, label %2925, label %2938
-
-; <label>:2925                                    ; preds = %2920
-  %2926 = fsub fast float %20, %2685
-  %2927 = fdiv fast float %2926, %2923
-  %2928 = fptoui float %2927 to i32
-  %2929 = uitofp i32 %2928 to float
-  %2930 = fmul fast float %2929, %2923
-  %2931 = fsub fast float %2926, %2930
-  %2932 = and i32 %2928, 1
-  %2933 = icmp eq i32 %2932, 0
-  br i1 %2933, label %2934, label %2936
-
-; <label>:2934                                    ; preds = %2925
-  %2935 = fadd fast float %2931, %20
-  br label %2953
-
-; <label>:2936                                    ; preds = %2925
-  %2937 = fsub fast float %24, %2931
-  br label %2953
-
-; <label>:2938                                    ; preds = %2920
-  %2939 = fcmp fast ogt float %2685, %24
-  br i1 %2939, label %2940, label %2953
-
-; <label>:2940                                    ; preds = %2938
-  %2941 = fsub fast float %2685, %24
-  %2942 = fdiv fast float %2941, %2923
-  %2943 = fptoui float %2942 to i32
-  %2944 = uitofp i32 %2943 to float
-  %2945 = fmul fast float %2944, %2923
-  %2946 = fsub fast float %2941, %2945
-  %2947 = and i32 %2943, 1
-  %2948 = icmp eq i32 %2947, 0
-  br i1 %2948, label %2949, label %2951
-
-; <label>:2949                                    ; preds = %2940
-  %2950 = fsub fast float %24, %2946
-  br label %2953
-
-; <label>:2951                                    ; preds = %2940
-  %2952 = fadd fast float %2946, %20
-  br label %2953
-
-; <label>:2953                                    ; preds = %2951, %2949, %2938, %2936, %2934
-  %2954 = phi float [ %2935, %2934 ], [ %2937, %2936 ], [ %2950, %2949 ], [ %2952, %2951 ], [ %2685, %2938 ]
-  %2955 = fptoui float %2954 to i32
-  %2956 = uitofp i32 %2955 to float
-  %2957 = uitofp i32 %2922 to float
-  %2958 = fptoui float %45 to i32
-  %2959 = fptoui float %182 to i32
-  %2960 = fptoui float %2956 to i32
-  %2961 = fptoui float %2957 to i32
-  %2962 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2963 = extractvalue %dx.types.CBufRet.i32 %2962, 0
-  %2964 = extractvalue %dx.types.CBufRet.i32 %2962, 1
-  %2965 = extractvalue %dx.types.CBufRet.i32 %2962, 2
-  %2966 = extractvalue %dx.types.CBufRet.i32 %2962, 3
-  %2967 = mul i32 %2963, %2958
-  %2968 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2959, i32 %2964, i32 %2967)  ; IMad(a,b,c)
-  %2969 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2960, i32 %2965, i32 %2968)  ; IMad(a,b,c)
-  %2970 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2961, i32 %2966, i32 %2969)  ; IMad(a,b,c)
-  %2971 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2970, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2972 = extractvalue %dx.types.ResRet.i32 %2971, 0
-  %2973 = uitofp i32 %2972 to float
-  br label %2974
-
-; <label>:2974                                    ; preds = %2953, %2887, %2858, %2841, %2831
-  %2975 = phi float [ %2855, %2841 ], [ 0.000000e+00, %2831 ], [ %2886, %2858 ], [ %2973, %2953 ], [ 0.000000e+00, %2887 ]
-  br i1 %941, label %2976, label %3001
-
-; <label>:2976                                    ; preds = %2974
-  %2977 = fcmp fast oge float %1232, 0.000000e+00
-  %2978 = fptoui float %1232 to i32
-  %2979 = icmp ult i32 %2978, %13
-  %2980 = and i1 %2977, %2979
-  %2981 = fcmp fast oge float %2685, 0.000000e+00
-  %2982 = and i1 %2981, %2980
-  %2983 = fptoui float %2685 to i32
-  %2984 = icmp ult i32 %2983, %15
-  %2985 = and i1 %2984, %2982
-  br i1 %2985, label %2986, label %3119
-
-; <label>:2986                                    ; preds = %2976
-  %2987 = fptoui float %45 to i32
-  %2988 = fptoui float %182 to i32
-  %2989 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %2990 = extractvalue %dx.types.CBufRet.i32 %2989, 0
-  %2991 = extractvalue %dx.types.CBufRet.i32 %2989, 1
-  %2992 = extractvalue %dx.types.CBufRet.i32 %2989, 2
-  %2993 = extractvalue %dx.types.CBufRet.i32 %2989, 3
-  %2994 = mul i32 %2990, %2987
-  %2995 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2988, i32 %2991, i32 %2994)  ; IMad(a,b,c)
-  %2996 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2983, i32 %2992, i32 %2995)  ; IMad(a,b,c)
-  %2997 = call i32 @dx.op.tertiary.i32(i32 48, i32 %2978, i32 %2993, i32 %2996)  ; IMad(a,b,c)
-  %2998 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %2997, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %2999 = extractvalue %dx.types.ResRet.i32 %2998, 0
-  %3000 = uitofp i32 %2999 to float
-  br label %3119
-
-; <label>:3001                                    ; preds = %2974
-  %3002 = icmp eq i32 %940, 1
-  br i1 %3002, label %3003, label %3032
-
-; <label>:3003                                    ; preds = %3001
-  %3004 = add i32 %13, -1
-  %3005 = uitofp i32 %3004 to float
-  %3006 = call float @dx.op.binary.f32(i32 35, float %1232, float 0.000000e+00)  ; FMax(a,b)
-  %3007 = call float @dx.op.binary.f32(i32 36, float %3006, float %3005)  ; FMin(a,b)
-  %3008 = fptoui float %3007 to i32
-  %3009 = add i32 %15, -1
-  %3010 = uitofp i32 %3009 to float
-  %3011 = call float @dx.op.binary.f32(i32 35, float %2685, float 0.000000e+00)  ; FMax(a,b)
-  %3012 = call float @dx.op.binary.f32(i32 36, float %3011, float %3010)  ; FMin(a,b)
-  %3013 = fptoui float %3012 to i32
-  %3014 = uitofp i32 %3013 to float
-  %3015 = uitofp i32 %3008 to float
-  %3016 = fptoui float %45 to i32
-  %3017 = fptoui float %182 to i32
-  %3018 = fptoui float %3014 to i32
-  %3019 = fptoui float %3015 to i32
-  %3020 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3021 = extractvalue %dx.types.CBufRet.i32 %3020, 0
-  %3022 = extractvalue %dx.types.CBufRet.i32 %3020, 1
-  %3023 = extractvalue %dx.types.CBufRet.i32 %3020, 2
-  %3024 = extractvalue %dx.types.CBufRet.i32 %3020, 3
-  %3025 = mul i32 %3021, %3016
-  %3026 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3017, i32 %3022, i32 %3025)  ; IMad(a,b,c)
-  %3027 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3018, i32 %3023, i32 %3026)  ; IMad(a,b,c)
-  %3028 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3019, i32 %3024, i32 %3027)  ; IMad(a,b,c)
-  %3029 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3028, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3030 = extractvalue %dx.types.ResRet.i32 %3029, 0
-  %3031 = uitofp i32 %3030 to float
-  br label %3119
-
-; <label>:3032                                    ; preds = %3001
-  %3033 = icmp eq i32 %940, 2
-  br i1 %3033, label %3034, label %3119
-
-; <label>:3034                                    ; preds = %3032
-  %3035 = fsub fast float %22, %20
-  %3036 = fcmp fast olt float %1232, %20
-  br i1 %3036, label %3037, label %3050
-
-; <label>:3037                                    ; preds = %3034
-  %3038 = fsub fast float %20, %1232
-  %3039 = fdiv fast float %3038, %3035
-  %3040 = fptoui float %3039 to i32
-  %3041 = uitofp i32 %3040 to float
-  %3042 = fmul fast float %3041, %3035
-  %3043 = fsub fast float %3038, %3042
-  %3044 = and i32 %3040, 1
-  %3045 = icmp eq i32 %3044, 0
-  br i1 %3045, label %3046, label %3048
-
-; <label>:3046                                    ; preds = %3037
-  %3047 = fadd fast float %3043, %20
-  br label %3065
-
-; <label>:3048                                    ; preds = %3037
-  %3049 = fsub fast float %22, %3043
-  br label %3065
-
-; <label>:3050                                    ; preds = %3034
-  %3051 = fcmp fast ogt float %1232, %22
-  br i1 %3051, label %3052, label %3065
-
-; <label>:3052                                    ; preds = %3050
-  %3053 = fsub fast float %1232, %22
-  %3054 = fdiv fast float %3053, %3035
-  %3055 = fptoui float %3054 to i32
-  %3056 = uitofp i32 %3055 to float
-  %3057 = fmul fast float %3056, %3035
-  %3058 = fsub fast float %3053, %3057
-  %3059 = and i32 %3055, 1
-  %3060 = icmp eq i32 %3059, 0
-  br i1 %3060, label %3061, label %3063
-
-; <label>:3061                                    ; preds = %3052
-  %3062 = fsub fast float %22, %3058
-  br label %3065
-
-; <label>:3063                                    ; preds = %3052
-  %3064 = fadd fast float %3058, %20
-  br label %3065
-
-; <label>:3065                                    ; preds = %3063, %3061, %3050, %3048, %3046
-  %3066 = phi float [ %3047, %3046 ], [ %3049, %3048 ], [ %3062, %3061 ], [ %3064, %3063 ], [ %1232, %3050 ]
-  %3067 = fptoui float %3066 to i32
-  %3068 = fsub fast float %24, %20
-  %3069 = fcmp fast olt float %2685, %20
-  br i1 %3069, label %3070, label %3083
-
-; <label>:3070                                    ; preds = %3065
-  %3071 = fsub fast float %20, %2685
-  %3072 = fdiv fast float %3071, %3068
-  %3073 = fptoui float %3072 to i32
-  %3074 = uitofp i32 %3073 to float
-  %3075 = fmul fast float %3074, %3068
-  %3076 = fsub fast float %3071, %3075
-  %3077 = and i32 %3073, 1
-  %3078 = icmp eq i32 %3077, 0
-  br i1 %3078, label %3079, label %3081
-
-; <label>:3079                                    ; preds = %3070
-  %3080 = fadd fast float %3076, %20
-  br label %3098
-
-; <label>:3081                                    ; preds = %3070
-  %3082 = fsub fast float %24, %3076
-  br label %3098
-
-; <label>:3083                                    ; preds = %3065
-  %3084 = fcmp fast ogt float %2685, %24
-  br i1 %3084, label %3085, label %3098
-
-; <label>:3085                                    ; preds = %3083
-  %3086 = fsub fast float %2685, %24
-  %3087 = fdiv fast float %3086, %3068
-  %3088 = fptoui float %3087 to i32
-  %3089 = uitofp i32 %3088 to float
-  %3090 = fmul fast float %3089, %3068
-  %3091 = fsub fast float %3086, %3090
-  %3092 = and i32 %3088, 1
-  %3093 = icmp eq i32 %3092, 0
-  br i1 %3093, label %3094, label %3096
-
-; <label>:3094                                    ; preds = %3085
-  %3095 = fsub fast float %24, %3091
-  br label %3098
-
-; <label>:3096                                    ; preds = %3085
-  %3097 = fadd fast float %3091, %20
-  br label %3098
-
-; <label>:3098                                    ; preds = %3096, %3094, %3083, %3081, %3079
-  %3099 = phi float [ %3080, %3079 ], [ %3082, %3081 ], [ %3095, %3094 ], [ %3097, %3096 ], [ %2685, %3083 ]
-  %3100 = fptoui float %3099 to i32
-  %3101 = uitofp i32 %3100 to float
-  %3102 = uitofp i32 %3067 to float
-  %3103 = fptoui float %45 to i32
-  %3104 = fptoui float %182 to i32
-  %3105 = fptoui float %3101 to i32
-  %3106 = fptoui float %3102 to i32
-  %3107 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3108 = extractvalue %dx.types.CBufRet.i32 %3107, 0
-  %3109 = extractvalue %dx.types.CBufRet.i32 %3107, 1
-  %3110 = extractvalue %dx.types.CBufRet.i32 %3107, 2
-  %3111 = extractvalue %dx.types.CBufRet.i32 %3107, 3
-  %3112 = mul i32 %3108, %3103
-  %3113 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3104, i32 %3109, i32 %3112)  ; IMad(a,b,c)
-  %3114 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3105, i32 %3110, i32 %3113)  ; IMad(a,b,c)
-  %3115 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3106, i32 %3111, i32 %3114)  ; IMad(a,b,c)
-  %3116 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3115, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3117 = extractvalue %dx.types.ResRet.i32 %3116, 0
-  %3118 = uitofp i32 %3117 to float
-  br label %3119
-
-; <label>:3119                                    ; preds = %3098, %3032, %3003, %2986, %2976
-  %3120 = phi float [ %3000, %2986 ], [ 0.000000e+00, %2976 ], [ %3031, %3003 ], [ %3118, %3098 ], [ 0.000000e+00, %3032 ]
-  br i1 %941, label %3121, label %3146
-
-; <label>:3121                                    ; preds = %3119
-  %3122 = fcmp fast oge float %1378, 0.000000e+00
-  %3123 = fptoui float %1378 to i32
-  %3124 = icmp ult i32 %3123, %13
-  %3125 = and i1 %3122, %3124
-  %3126 = fcmp fast oge float %2685, 0.000000e+00
-  %3127 = and i1 %3126, %3125
-  %3128 = fptoui float %2685 to i32
-  %3129 = icmp ult i32 %3128, %15
-  %3130 = and i1 %3129, %3127
-  br i1 %3130, label %3131, label %3264
-
-; <label>:3131                                    ; preds = %3121
-  %3132 = fptoui float %45 to i32
-  %3133 = fptoui float %182 to i32
-  %3134 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3135 = extractvalue %dx.types.CBufRet.i32 %3134, 0
-  %3136 = extractvalue %dx.types.CBufRet.i32 %3134, 1
-  %3137 = extractvalue %dx.types.CBufRet.i32 %3134, 2
-  %3138 = extractvalue %dx.types.CBufRet.i32 %3134, 3
-  %3139 = mul i32 %3135, %3132
-  %3140 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3133, i32 %3136, i32 %3139)  ; IMad(a,b,c)
-  %3141 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3128, i32 %3137, i32 %3140)  ; IMad(a,b,c)
-  %3142 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3123, i32 %3138, i32 %3141)  ; IMad(a,b,c)
-  %3143 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3142, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3144 = extractvalue %dx.types.ResRet.i32 %3143, 0
-  %3145 = uitofp i32 %3144 to float
-  br label %3264
-
-; <label>:3146                                    ; preds = %3119
-  %3147 = icmp eq i32 %940, 1
-  br i1 %3147, label %3148, label %3177
-
-; <label>:3148                                    ; preds = %3146
-  %3149 = add i32 %13, -1
-  %3150 = uitofp i32 %3149 to float
-  %3151 = call float @dx.op.binary.f32(i32 35, float %1378, float 0.000000e+00)  ; FMax(a,b)
-  %3152 = call float @dx.op.binary.f32(i32 36, float %3151, float %3150)  ; FMin(a,b)
-  %3153 = fptoui float %3152 to i32
-  %3154 = add i32 %15, -1
-  %3155 = uitofp i32 %3154 to float
-  %3156 = call float @dx.op.binary.f32(i32 35, float %2685, float 0.000000e+00)  ; FMax(a,b)
-  %3157 = call float @dx.op.binary.f32(i32 36, float %3156, float %3155)  ; FMin(a,b)
-  %3158 = fptoui float %3157 to i32
-  %3159 = uitofp i32 %3158 to float
-  %3160 = uitofp i32 %3153 to float
-  %3161 = fptoui float %45 to i32
-  %3162 = fptoui float %182 to i32
-  %3163 = fptoui float %3159 to i32
-  %3164 = fptoui float %3160 to i32
-  %3165 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3166 = extractvalue %dx.types.CBufRet.i32 %3165, 0
-  %3167 = extractvalue %dx.types.CBufRet.i32 %3165, 1
-  %3168 = extractvalue %dx.types.CBufRet.i32 %3165, 2
-  %3169 = extractvalue %dx.types.CBufRet.i32 %3165, 3
-  %3170 = mul i32 %3166, %3161
-  %3171 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3162, i32 %3167, i32 %3170)  ; IMad(a,b,c)
-  %3172 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3163, i32 %3168, i32 %3171)  ; IMad(a,b,c)
-  %3173 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3164, i32 %3169, i32 %3172)  ; IMad(a,b,c)
-  %3174 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3173, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3175 = extractvalue %dx.types.ResRet.i32 %3174, 0
-  %3176 = uitofp i32 %3175 to float
-  br label %3264
-
-; <label>:3177                                    ; preds = %3146
-  %3178 = icmp eq i32 %940, 2
-  br i1 %3178, label %3179, label %3264
-
-; <label>:3179                                    ; preds = %3177
-  %3180 = fsub fast float %22, %20
-  %3181 = fcmp fast olt float %1378, %20
-  br i1 %3181, label %3182, label %3195
-
-; <label>:3182                                    ; preds = %3179
-  %3183 = fsub fast float %20, %1378
-  %3184 = fdiv fast float %3183, %3180
-  %3185 = fptoui float %3184 to i32
-  %3186 = uitofp i32 %3185 to float
-  %3187 = fmul fast float %3186, %3180
-  %3188 = fsub fast float %3183, %3187
-  %3189 = and i32 %3185, 1
-  %3190 = icmp eq i32 %3189, 0
-  br i1 %3190, label %3191, label %3193
-
-; <label>:3191                                    ; preds = %3182
-  %3192 = fadd fast float %3188, %20
-  br label %3210
-
-; <label>:3193                                    ; preds = %3182
-  %3194 = fsub fast float %22, %3188
-  br label %3210
-
-; <label>:3195                                    ; preds = %3179
-  %3196 = fcmp fast ogt float %1378, %22
-  br i1 %3196, label %3197, label %3210
-
-; <label>:3197                                    ; preds = %3195
-  %3198 = fsub fast float %1378, %22
-  %3199 = fdiv fast float %3198, %3180
-  %3200 = fptoui float %3199 to i32
-  %3201 = uitofp i32 %3200 to float
-  %3202 = fmul fast float %3201, %3180
-  %3203 = fsub fast float %3198, %3202
-  %3204 = and i32 %3200, 1
-  %3205 = icmp eq i32 %3204, 0
-  br i1 %3205, label %3206, label %3208
-
-; <label>:3206                                    ; preds = %3197
-  %3207 = fsub fast float %22, %3203
-  br label %3210
-
-; <label>:3208                                    ; preds = %3197
-  %3209 = fadd fast float %3203, %20
-  br label %3210
-
-; <label>:3210                                    ; preds = %3208, %3206, %3195, %3193, %3191
-  %3211 = phi float [ %3192, %3191 ], [ %3194, %3193 ], [ %3207, %3206 ], [ %3209, %3208 ], [ %1378, %3195 ]
-  %3212 = fptoui float %3211 to i32
-  %3213 = fsub fast float %24, %20
-  %3214 = fcmp fast olt float %2685, %20
-  br i1 %3214, label %3215, label %3228
-
-; <label>:3215                                    ; preds = %3210
-  %3216 = fsub fast float %20, %2685
-  %3217 = fdiv fast float %3216, %3213
-  %3218 = fptoui float %3217 to i32
-  %3219 = uitofp i32 %3218 to float
-  %3220 = fmul fast float %3219, %3213
-  %3221 = fsub fast float %3216, %3220
-  %3222 = and i32 %3218, 1
-  %3223 = icmp eq i32 %3222, 0
-  br i1 %3223, label %3224, label %3226
-
-; <label>:3224                                    ; preds = %3215
-  %3225 = fadd fast float %3221, %20
-  br label %3243
-
-; <label>:3226                                    ; preds = %3215
-  %3227 = fsub fast float %24, %3221
-  br label %3243
-
-; <label>:3228                                    ; preds = %3210
-  %3229 = fcmp fast ogt float %2685, %24
-  br i1 %3229, label %3230, label %3243
-
-; <label>:3230                                    ; preds = %3228
-  %3231 = fsub fast float %2685, %24
-  %3232 = fdiv fast float %3231, %3213
-  %3233 = fptoui float %3232 to i32
-  %3234 = uitofp i32 %3233 to float
-  %3235 = fmul fast float %3234, %3213
-  %3236 = fsub fast float %3231, %3235
-  %3237 = and i32 %3233, 1
-  %3238 = icmp eq i32 %3237, 0
-  br i1 %3238, label %3239, label %3241
-
-; <label>:3239                                    ; preds = %3230
-  %3240 = fsub fast float %24, %3236
-  br label %3243
-
-; <label>:3241                                    ; preds = %3230
-  %3242 = fadd fast float %3236, %20
-  br label %3243
-
-; <label>:3243                                    ; preds = %3241, %3239, %3228, %3226, %3224
-  %3244 = phi float [ %3225, %3224 ], [ %3227, %3226 ], [ %3240, %3239 ], [ %3242, %3241 ], [ %2685, %3228 ]
-  %3245 = fptoui float %3244 to i32
-  %3246 = uitofp i32 %3245 to float
-  %3247 = uitofp i32 %3212 to float
-  %3248 = fptoui float %45 to i32
-  %3249 = fptoui float %182 to i32
-  %3250 = fptoui float %3246 to i32
-  %3251 = fptoui float %3247 to i32
-  %3252 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %4, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
-  %3253 = extractvalue %dx.types.CBufRet.i32 %3252, 0
-  %3254 = extractvalue %dx.types.CBufRet.i32 %3252, 1
-  %3255 = extractvalue %dx.types.CBufRet.i32 %3252, 2
-  %3256 = extractvalue %dx.types.CBufRet.i32 %3252, 3
-  %3257 = mul i32 %3253, %3248
-  %3258 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3249, i32 %3254, i32 %3257)  ; IMad(a,b,c)
-  %3259 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3250, i32 %3255, i32 %3258)  ; IMad(a,b,c)
-  %3260 = call i32 @dx.op.tertiary.i32(i32 48, i32 %3251, i32 %3256, i32 %3259)  ; IMad(a,b,c)
-  %3261 = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %3, i32 %3260, i32 0, i8 1, i32 4)  ; RawBufferLoad(srv,index,elementOffset,mask,alignment)
-  %3262 = extractvalue %dx.types.ResRet.i32 %3261, 0
-  %3263 = uitofp i32 %3262 to float
-  br label %3264
-
-; <label>:3264                                    ; preds = %3243, %3177, %3148, %3131, %3121
-  %3265 = phi float [ %3145, %3131 ], [ 0.000000e+00, %3121 ], [ %3176, %3148 ], [ %3263, %3243 ], [ 0.000000e+00, %3177 ]
-  %3266 = call float @dx.op.unary.f32(i32 22, float %180)  ; Frc(value)
-  %3267 = call float @dx.op.unary.f32(i32 22, float %181)  ; Frc(value)
-  %3268 = fmul fast float %3267, %3267
-  %3269 = fmul fast float %3268, %3267
-  %3270 = fmul fast float %1086, -7.500000e-01
-  %3271 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2249, float %3270)  ; FMad(a,b,c)
-  %3272 = fmul fast float %1086, 1.500000e+00
-  %3273 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1668, float %3272)  ; FMad(a,b,c)
-  %3274 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2249, float %3273)  ; FMad(a,b,c)
-  %3275 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %2830, float %3274)  ; FMad(a,b,c)
-  %3276 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1668, float %3270)  ; FMad(a,b,c)
-  %3277 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2249, float %3276)  ; FMad(a,b,c)
-  %3278 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2830, float %3277)  ; FMad(a,b,c)
-  %3279 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3267, float %3268, float %3269, float %1668, float %3271, float %3275, float %3278)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3280 = fmul fast float %1231, -7.500000e-01
-  %3281 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2394, float %3280)  ; FMad(a,b,c)
-  %3282 = fmul fast float %1231, 1.500000e+00
-  %3283 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1813, float %3282)  ; FMad(a,b,c)
-  %3284 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2394, float %3283)  ; FMad(a,b,c)
-  %3285 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %2975, float %3284)  ; FMad(a,b,c)
-  %3286 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1813, float %3280)  ; FMad(a,b,c)
-  %3287 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2394, float %3286)  ; FMad(a,b,c)
-  %3288 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2975, float %3287)  ; FMad(a,b,c)
-  %3289 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3267, float %3268, float %3269, float %1813, float %3281, float %3285, float %3288)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3290 = fmul fast float %1377, -7.500000e-01
-  %3291 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2539, float %3290)  ; FMad(a,b,c)
-  %3292 = fmul fast float %1377, 1.500000e+00
-  %3293 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %1958, float %3292)  ; FMad(a,b,c)
-  %3294 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2539, float %3293)  ; FMad(a,b,c)
-  %3295 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3120, float %3294)  ; FMad(a,b,c)
-  %3296 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %1958, float %3290)  ; FMad(a,b,c)
-  %3297 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2539, float %3296)  ; FMad(a,b,c)
-  %3298 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3120, float %3297)  ; FMad(a,b,c)
-  %3299 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3267, float %3268, float %3269, float %1958, float %3291, float %3295, float %3298)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3300 = fmul fast float %1523, -7.500000e-01
-  %3301 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %2684, float %3300)  ; FMad(a,b,c)
-  %3302 = fmul fast float %1523, 1.500000e+00
-  %3303 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %2103, float %3302)  ; FMad(a,b,c)
-  %3304 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %2684, float %3303)  ; FMad(a,b,c)
-  %3305 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3265, float %3304)  ; FMad(a,b,c)
-  %3306 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %2103, float %3300)  ; FMad(a,b,c)
-  %3307 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %2684, float %3306)  ; FMad(a,b,c)
-  %3308 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3265, float %3307)  ; FMad(a,b,c)
-  %3309 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3267, float %3268, float %3269, float %2103, float %3301, float %3305, float %3308)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3310 = fmul fast float %3266, %3266
-  %3311 = fmul fast float %3310, %3266
-  %3312 = fmul fast float %3279, -7.500000e-01
-  %3313 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3299, float %3312)  ; FMad(a,b,c)
-  %3314 = fmul fast float %3279, 1.500000e+00
-  %3315 = call float @dx.op.tertiary.f32(i32 46, float -2.250000e+00, float %3289, float %3314)  ; FMad(a,b,c)
-  %3316 = call float @dx.op.tertiary.f32(i32 46, float 1.500000e+00, float %3299, float %3315)  ; FMad(a,b,c)
-  %3317 = call float @dx.op.tertiary.f32(i32 46, float -7.500000e-01, float %3309, float %3316)  ; FMad(a,b,c)
-  %3318 = call float @dx.op.tertiary.f32(i32 46, float 1.250000e+00, float %3289, float %3312)  ; FMad(a,b,c)
-  %3319 = call float @dx.op.tertiary.f32(i32 46, float -1.250000e+00, float %3299, float %3318)  ; FMad(a,b,c)
-  %3320 = call float @dx.op.tertiary.f32(i32 46, float 7.500000e-01, float %3309, float %3319)  ; FMad(a,b,c)
-  %3321 = call float @dx.op.dot4.f32(i32 56, float 1.000000e+00, float %3266, float %3310, float %3311, float %3289, float %3313, float %3317, float %3320)  ; Dot4(ax,ay,az,aw,bx,by,bz,bw)
-  %3322 = fptoui float %3321 to i32
-  call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %1, i32 %8, i32 0, i32 %3322, i32 undef, i32 undef, i32 undef, i8 1, i32 4)  ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
-  br label %3323
-
-; <label>:3323                                    ; preds = %3264, %933, %919, %329, %0
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.threadId.i32(i32, i32) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32, %dx.types.Handle, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.rawBufferStore.i32(i32, %dx.types.Handle, i32, i32, i32, i32, i32, i32, i8, i32) #2
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.unary.f32(i32, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.tertiary.f32(i32, float, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.binary.f32(i32, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.dot4.f32(i32, float, float, float, float, float, float, float, float) #0
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.tertiary.i32(i32, i32, i32, i32) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32, %dx.types.Handle, i32) #1
-
-; Function Attrs: nounwind readonly
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readnone
-declare double @dx.op.makeDouble.f64(i32, i32, i32) #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.valver = !{!2}
-!dx.shaderModel = !{!3}
-!dx.resources = !{!4}
-!dx.entryPoints = !{!13}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 1, i32 2}
-!2 = !{i32 1, i32 6}
-!3 = !{!"cs", i32 6, i32 2}
-!4 = !{null, !5, !11, null}
-!5 = !{!6, !8, !10}
-!6 = !{i32 0, %"class.RWStructuredBuffer<unsigned int>"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !7}
-!7 = !{i32 1, i32 4}
-!8 = !{i32 1, %"class.RWStructuredBuffer<double>"* undef, !"", i32 0, i32 1, i32 1, i32 12, i1 false, i1 false, i1 false, !9}
-!9 = !{i32 1, i32 8}
-!10 = !{i32 2, %"class.RWStructuredBuffer<unsigned int>"* undef, !"", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !7}
-!11 = !{!12}
-!12 = !{i32 0, %Constants* undef, !"", i32 0, i32 0, i32 1, i32 116, null}
-!13 = !{void ()* @GridSample, !"GridSample", null, !4, !14}
-!14 = !{i32 0, i64 8388628, i32 4, !15}
-!15 = !{i32 64, i32 1, i32 1}
-
-#endif
-
-const unsigned char g_GridSample[] = {
-  0x44, 0x58, 0x42, 0x43, 0x29, 0x14, 0x50, 0x00, 0x3e, 0xd0, 0x1c, 0x68,
-  0x91, 0x36, 0x14, 0xc0, 0xfc, 0x3e, 0x73, 0x65, 0x01, 0x00, 0x00, 0x00,
-  0x54, 0x54, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
-  0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
-  0x18, 0x01, 0x00, 0x00, 0x34, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30,
-  0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30,
-  0xa8, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-  0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x63, 0x55, 0x4f, 0x22, 0x9f, 0xf7, 0x85, 0xc1,
-  0xff, 0x0a, 0xd2, 0x30, 0x50, 0x4c, 0x7c, 0xf1, 0x44, 0x58, 0x49, 0x4c,
-  0x18, 0x53, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0xc6, 0x14, 0x00, 0x00,
-  0x44, 0x58, 0x49, 0x4c, 0x02, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-  0x00, 0x53, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00,
-  0xbd, 0x14, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
-  0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49,
-  0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19,
-  0x1e, 0x04, 0x8b, 0x62, 0x80, 0x18, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42,
-  0xc4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x62, 0x88,
-  0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42,
-  0xe4, 0x48, 0x0e, 0x90, 0x11, 0x23, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c,
-  0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x31, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00,
-  0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07,
-  0x40, 0x02, 0xa8, 0x0d, 0x86, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20,
-  0x01, 0xd5, 0x06, 0x62, 0xf8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x90, 0x00,
-  0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42,
-  0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00,
-  0x53, 0x00, 0x00, 0x00, 0x32, 0x22, 0x88, 0x09, 0x20, 0x64, 0x85, 0x04,
-  0x13, 0x23, 0xa4, 0x84, 0x04, 0x13, 0x23, 0xe3, 0x84, 0xa1, 0x90, 0x14,
-  0x12, 0x4c, 0x8c, 0x8c, 0x0b, 0x84, 0xc4, 0x4c, 0x10, 0xb0, 0xc1, 0x08,
-  0x40, 0x09, 0x00, 0x0a, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0xc3, 0x30, 0x10,
-  0x71, 0xd3, 0x70, 0xf9, 0x13, 0xf6, 0x10, 0x92, 0xbf, 0x12, 0xd2, 0x4a,
-  0x4c, 0x3e, 0x72, 0xdb, 0xa8, 0x18, 0x86, 0x61, 0x18, 0xe6, 0x08, 0x10,
-  0x3a, 0xee, 0x19, 0x2e, 0x7f, 0xc2, 0x1e, 0x42, 0xf2, 0x43, 0xa0, 0x19,
-  0x16, 0x02, 0x05, 0x48, 0x39, 0x8c, 0x21, 0x19, 0x86, 0x63, 0x20, 0xa5,
-  0x2c, 0xc0, 0x90, 0x0c, 0xc3, 0x30, 0x0c, 0xc3, 0x31, 0x10, 0x33, 0x03,
-  0x50, 0x06, 0x67, 0x70, 0xe8, 0x29, 0x85, 0x33, 0x38, 0x8e, 0x43, 0x51,
-  0x21, 0x9c, 0xc1, 0x71, 0x68, 0x2a, 0x8a, 0x33, 0x38, 0x8e, 0xe3, 0x38,
-  0x8e, 0xe3, 0x50, 0x55, 0x8a, 0x61, 0x18, 0x86, 0x81, 0xae, 0xa3, 0x86,
-  0xcb, 0x9f, 0xb0, 0x87, 0x90, 0x7c, 0x6e, 0xa3, 0x8a, 0x95, 0x98, 0x7c,
-  0xe4, 0xb6, 0x11, 0x31, 0x0c, 0xc3, 0x50, 0x88, 0x6c, 0x48, 0x06, 0xd2,
-  0xe6, 0x08, 0x82, 0x62, 0x24, 0xc3, 0x31, 0x0c, 0x1c, 0x75, 0x43, 0x00,
-  0x85, 0xf8, 0x86, 0x61, 0x20, 0x70, 0x20, 0x60, 0x26, 0x33, 0x18, 0x07,
-  0x76, 0x08, 0x87, 0x79, 0x98, 0x07, 0x37, 0x90, 0x85, 0x5b, 0x98, 0x05,
-  0x7a, 0x90, 0x87, 0x7a, 0x18, 0x07, 0x7a, 0xa8, 0x07, 0x79, 0x28, 0x07,
-  0x72, 0x10, 0x85, 0x7a, 0x30, 0x07, 0x73, 0x28, 0x07, 0x79, 0xe0, 0x83,
-  0x7a, 0x70, 0x87, 0x79, 0x48, 0x87, 0x73, 0x70, 0x87, 0x72, 0x20, 0x07,
-  0x30, 0x48, 0x07, 0x77, 0xa0, 0x07, 0x3f, 0x40, 0xc1, 0x40, 0xe3, 0x4c,
-  0x60, 0x30, 0x0e, 0xec, 0x10, 0x0e, 0xf3, 0x30, 0x0f, 0x6e, 0x20, 0x0b,
-  0xb7, 0x30, 0x0b, 0xf4, 0x20, 0x0f, 0xf5, 0x30, 0x0e, 0xf4, 0x50, 0x0f,
-  0xf2, 0x50, 0x0e, 0xe4, 0x20, 0x0a, 0xf5, 0x60, 0x0e, 0xe6, 0x50, 0x0e,
-  0xf2, 0xc0, 0x07, 0xe4, 0xf0, 0x0e, 0xf5, 0x20, 0x0e, 0xec, 0x50, 0x0e,
-  0x7e, 0x80, 0x82, 0x8f, 0xca, 0x61, 0x04, 0x62, 0xb8, 0x84, 0x73, 0x1a,
-  0x69, 0x02, 0x9a, 0x49, 0x42, 0xcb, 0x30, 0x0c, 0xc3, 0x79, 0x9e, 0xe7,
-  0x79, 0x0e, 0x84, 0xce, 0x11, 0x80, 0xc2, 0x14, 0x00, 0x00, 0x00, 0x00,
-  0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79,
-  0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xae, 0x50, 0x0e, 0x6d, 0xd0, 0x0e,
-  0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07,
-  0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07,
-  0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x78, 0xd0, 0x06, 0xe9, 0x10, 0x07,
-  0x76, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x6d, 0x90, 0x0e, 0x73, 0x20, 0x07,
-  0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x60, 0x07, 0x74, 0xa0, 0x07,
-  0x76, 0x40, 0x07, 0x6d, 0x60, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x10, 0x07,
-  0x76, 0xd0, 0x06, 0xe6, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07,
-  0x6d, 0x60, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06,
-  0xee, 0x80, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07,
-  0x7a, 0x60, 0x07, 0x74, 0x30, 0xe4, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0x43, 0x00, 0x01, 0x10, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90, 0x47, 0x01, 0x02, 0x40,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x21, 0x0f, 0x03, 0x04,
-  0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x43, 0x9e, 0x07,
-  0x08, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x3c,
-  0x11, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c,
-  0x79, 0x26, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x18, 0xf2, 0x54, 0x40, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x30, 0xe4, 0xb9, 0x80, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x60, 0xc8, 0xa3, 0x01, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0xc0, 0x90, 0xa7, 0x03, 0x02, 0x40, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x80, 0x21, 0x0f, 0x18, 0x00, 0x01, 0x10, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x16, 0x08, 0x00, 0x00, 0x00,
-  0x0d, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90,
-  0x8c, 0x09, 0x26, 0x47, 0xc6, 0x04, 0x43, 0x1a, 0x4a, 0xa0, 0x08, 0x8a,
-  0x61, 0x04, 0xa0, 0x30, 0x0a, 0xa1, 0x20, 0x0a, 0x3d, 0xa0, 0x00, 0x03,
-  0x88, 0x1b, 0x01, 0x20, 0xb5, 0x50, 0x01, 0x01, 0x11, 0xc8, 0x9c, 0x01,
-  0xa0, 0x74, 0x06, 0x80, 0xc8, 0x19, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00,
-  0x4b, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44,
-  0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b,
-  0x7b, 0x73, 0x03, 0x99, 0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b,
-  0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81,
-  0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84,
-  0x81, 0x99, 0x20, 0x0c, 0xcd, 0x06, 0x61, 0x20, 0x26, 0x08, 0x83, 0xb3,
-  0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08,
-  0xc3, 0x33, 0x41, 0x20, 0x83, 0x8c, 0xc0, 0x04, 0x61, 0x80, 0x26, 0x08,
-  0x5c, 0x35, 0x41, 0x18, 0xa2, 0x0d, 0xc2, 0xf0, 0x6c, 0x58, 0x94, 0x85,
-  0x51, 0x94, 0xa1, 0x71, 0x1c, 0x07, 0x9a, 0x20, 0x98, 0xc1, 0x35, 0x41,
-  0x18, 0xa4, 0x0d, 0xc2, 0x30, 0x6d, 0x58, 0x06, 0x89, 0x51, 0x86, 0xa1,
-  0x71, 0x1c, 0x87, 0xda, 0xb0, 0x10, 0x0b, 0xa3, 0x10, 0x43, 0xe3, 0x38,
-  0x0e, 0xb4, 0x61, 0x88, 0x2a, 0x6b, 0x82, 0x90, 0x06, 0xd8, 0x04, 0x61,
-  0x98, 0x36, 0x20, 0x0a, 0xc6, 0x28, 0xca, 0x90, 0x01, 0x1b, 0x02, 0x6d,
-  0x03, 0x01, 0x5c, 0x1b, 0x30, 0x41, 0x10, 0x00, 0x2a, 0x47, 0x72, 0x69,
-  0x64, 0x53, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x13, 0x04, 0x35, 0xb0, 0x26,
-  0x08, 0x03, 0xb5, 0x61, 0x00, 0x83, 0x61, 0xd8, 0x40, 0x28, 0xdf, 0x13,
-  0x06, 0x1b, 0x8a, 0xce, 0x03, 0x38, 0x31, 0xa8, 0xc2, 0xc6, 0x66, 0xd7,
-  0xe6, 0x92, 0x46, 0x56, 0xe6, 0x46, 0x37, 0x25, 0x08, 0xaa, 0x90, 0xe1,
-  0xb9, 0xd8, 0x95, 0xc9, 0xcd, 0xa5, 0xbd, 0xb9, 0x4d, 0x09, 0x88, 0x26,
-  0x64, 0x78, 0x2e, 0x76, 0x61, 0x6c, 0x76, 0x65, 0x72, 0x53, 0x02, 0xa3,
-  0x0e, 0x19, 0x9e, 0xcb, 0x1c, 0x5a, 0x18, 0x59, 0x99, 0x5c, 0xd3, 0x1b,
-  0x59, 0x19, 0xdb, 0x94, 0x00, 0x29, 0x43, 0x86, 0xe7, 0x22, 0x57, 0x36,
-  0xf7, 0x56, 0x27, 0x37, 0x56, 0x36, 0x37, 0x25, 0xd8, 0xea, 0x90, 0xe1,
-  0xb9, 0x94, 0xb9, 0xd1, 0xc9, 0xe5, 0x41, 0xbd, 0xa5, 0xb9, 0xd1, 0xcd,
-  0x4d, 0x09, 0xc4, 0x00, 0x79, 0x18, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00,
-  0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88,
-  0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73,
-  0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e,
-  0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30,
-  0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8,
-  0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b,
-  0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76,
-  0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e,
-  0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e,
-  0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61,
-  0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4,
-  0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76,
-  0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37,
-  0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76,
-  0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71,
-  0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e,
-  0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1,
-  0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61,
-  0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90,
-  0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8,
-  0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc,
-  0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4,
-  0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87, 0x76, 0x80, 0x87, 0x19,
-  0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20, 0x0e, 0xe7, 0xe0, 0x06,
-  0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f,
-  0xf4, 0x30, 0x83, 0x81, 0xc8, 0x01, 0x1f, 0xdc, 0x40, 0x1c, 0xe4, 0xa1,
-  0x1c, 0xc2, 0x61, 0x1d, 0xdc, 0x40, 0x1c, 0xe4, 0x01, 0x00, 0x00, 0x00,
-  0x71, 0x20, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x06, 0xa0, 0x80, 0x11,
-  0x32, 0xb0, 0x00, 0xf3, 0x2c, 0x84, 0x19, 0x40, 0xc3, 0xe5, 0x3b, 0x8f,
-  0x1f, 0x20, 0x0d, 0x10, 0x61, 0x7e, 0x71, 0xdb, 0x96, 0xb0, 0x0d, 0x97,
-  0xef, 0x3c, 0xbe, 0x10, 0x50, 0x45, 0x41, 0x44, 0xa5, 0x03, 0x0c, 0x25,
-  0x61, 0x00, 0x02, 0xe6, 0x23, 0xb7, 0x6d, 0x0a, 0xd2, 0x70, 0xf9, 0xce,
-  0xe3, 0x0b, 0x11, 0x01, 0x4c, 0x44, 0x08, 0x34, 0xc3, 0x42, 0xd8, 0x81,
-  0x33, 0x5c, 0xbe, 0xf3, 0xf8, 0x83, 0x33, 0xe1, 0x7e, 0x71, 0xdb, 0xb6,
-  0x40, 0x0d, 0x97, 0xef, 0x3c, 0x3e, 0x03, 0x28, 0x44, 0xe7, 0x50, 0xc1,
-  0x42, 0xf8, 0x85, 0x8e, 0x9b, 0xc0, 0x35, 0x5c, 0xbe, 0xf3, 0xf8, 0x11,
-  0x60, 0x6d, 0x54, 0x51, 0x10, 0x51, 0xe9, 0x00, 0x83, 0x8f, 0xdc, 0xb6,
-  0x0d, 0x60, 0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x01, 0xd6, 0x46, 0x15, 0x05,
-  0x11, 0xb1, 0x93, 0x13, 0x11, 0x3e, 0x72, 0xdb, 0x56, 0x20, 0x0d, 0x97,
-  0xef, 0x3c, 0xfe, 0x44, 0x44, 0x13, 0x02, 0x44, 0x98, 0x5f, 0xdc, 0xb6,
-  0x21, 0x48, 0xc3, 0xe5, 0x3b, 0x8f, 0x3f, 0x11, 0xd1, 0x84, 0x00, 0x11,
-  0xe6, 0x23, 0xb7, 0x6d, 0x01, 0xd2, 0x70, 0xf9, 0xce, 0xe3, 0x4f, 0x47,
-  0x44, 0x00, 0x83, 0x38, 0xf8, 0xc8, 0x6d, 0x1b, 0xc1, 0x33, 0x5c, 0xbe,
-  0xf3, 0xf8, 0x54, 0x03, 0x44, 0x98, 0x5f, 0xdc, 0x36, 0x00, 0x00, 0x00,
-  0x61, 0x20, 0x00, 0x00, 0x14, 0x13, 0x00, 0x00, 0x13, 0x04, 0x24, 0x14,
-  0x0b, 0x04, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x34, 0x14, 0x58, 0xd9,
-  0x95, 0xa5, 0x40, 0x0d, 0x94, 0x51, 0x21, 0x15, 0xd7, 0x0c, 0x40, 0xc1,
-  0x95, 0x5c, 0xd9, 0x14, 0x4b, 0x61, 0x0a, 0x94, 0x72, 0x40, 0xd1, 0x94,
-  0x6e, 0x40, 0x39, 0x94, 0x02, 0x1d, 0x25, 0x50, 0x06, 0x45, 0x40, 0xce,
-  0x08, 0xc0, 0x18, 0x01, 0x08, 0x82, 0x20, 0xfe, 0x8d, 0x11, 0x80, 0x20,
-  0x08, 0xd2, 0xbf, 0x30, 0x46, 0x00, 0x82, 0x20, 0x48, 0x7f, 0x63, 0x04,
-  0x20, 0x08, 0x82, 0xfc, 0x37, 0x46, 0x00, 0x82, 0x20, 0x88, 0xff, 0xc2,
-  0x18, 0x01, 0x08, 0x82, 0x60, 0x08, 0x0e, 0x63, 0x04, 0x20, 0x08, 0x82,
-  0xfa, 0x37, 0x46, 0x00, 0x82, 0x20, 0xa8, 0xff, 0xc2, 0x18, 0x01, 0x08,
-  0x82, 0x20, 0xfc, 0x8d, 0x11, 0x80, 0x20, 0x08, 0xc2, 0xbf, 0x30, 0x46,
-  0x00, 0x82, 0x20, 0x08, 0x82, 0x01, 0x00, 0x00, 0x23, 0x06, 0x09, 0x00,
-  0x82, 0x60, 0xd0, 0xc5, 0x01, 0xf6, 0xb8, 0x81, 0x1b, 0x98, 0xc1, 0x88,
-  0x41, 0x02, 0x80, 0x20, 0x18, 0x74, 0x72, 0x90, 0x41, 0x70, 0x00, 0x07,
-  0x67, 0x30, 0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0xdd, 0x1c, 0x68, 0x91,
-  0x1b, 0xb8, 0x01, 0x1a, 0x8c, 0x18, 0x24, 0x00, 0x08, 0x82, 0x41, 0x47,
-  0x07, 0x1b, 0xf4, 0x06, 0x6f, 0x90, 0x06, 0x23, 0x06, 0x06, 0x00, 0x82,
-  0x60, 0x40, 0xf8, 0x81, 0x05, 0x07, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0xa0, 0xdd, 0x41, 0x19, 0x08, 0x71, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c,
-  0x30, 0x9a, 0x30, 0x04, 0xc3, 0x0d, 0x42, 0x40, 0x06, 0xb3, 0x0c, 0xc1,
-  0x08, 0x05, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0, 0xf1, 0x81, 0x1a,
-  0x1c, 0x79, 0x30, 0x9a, 0x10, 0x0c, 0x17, 0x38, 0x35, 0x9a, 0x30, 0x08,
-  0x17, 0x38, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x5a, 0x28, 0xbc,
-  0x01, 0x03, 0x06, 0xa3, 0x09, 0x01, 0x30, 0xdc, 0x10, 0xf4, 0x01, 0x18,
-  0x4c, 0x37, 0x50, 0x5e, 0x30, 0xdd, 0x50, 0x69, 0x42, 0x21, 0x01, 0x4c,
-  0x37, 0x5c, 0x1c, 0x51, 0x48, 0x00, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0xa0, 0xa5, 0xc2, 0x1d, 0x50, 0x67, 0x30, 0x9a, 0x10, 0x04, 0xa3, 0x09,
-  0x82, 0x30, 0x9a, 0x30, 0x0c, 0x15, 0x08, 0x52, 0x03, 0x21, 0x15, 0x0c,
-  0x52, 0x57, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0, 0xc5, 0xc2,
-  0x1f, 0x70, 0xac, 0x30, 0x9a, 0x10, 0x00, 0x15, 0x0c, 0x52, 0x5b, 0x10,
-  0x15, 0x20, 0x33, 0x9a, 0x50, 0x04, 0x15, 0x08, 0x52, 0x44, 0x10, 0x15,
-  0x34, 0x33, 0x9a, 0x90, 0x08, 0x15, 0x08, 0x52, 0x44, 0x10, 0xd7, 0x38,
-  0x75, 0x85, 0x53, 0x37, 0x38, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x1a, 0x38, 0xb8, 0xc2, 0x1a, 0xe0, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0xc3, 0x11, 0x4e, 0x1d,
-  0xe1, 0xd4, 0x11, 0x4e, 0x1d, 0xe1, 0xd4, 0x88, 0x41, 0x03, 0x80, 0x20,
-  0x18, 0x54, 0xeb, 0x00, 0x0b, 0xcc, 0xa2, 0x8c, 0x02, 0x31, 0x08, 0x81,
-  0x09, 0x01, 0x7c, 0x4e, 0x18, 0x66, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c,
-  0x94, 0x79, 0xc8, 0x85, 0x3c, 0x08, 0xcc, 0x01, 0x15, 0xc6, 0x61, 0x34,
-  0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0xc0, 0xa0, 0x1d, 0x62, 0x41, 0x08, 0x2e, 0x70, 0xee, 0x8e, 0x61, 0x46,
-  0x0c, 0x14, 0x00, 0x04, 0xc1, 0x40, 0xc9, 0x87, 0x5f, 0xf8, 0x83, 0x80,
-  0x1d, 0x5c, 0x21, 0x1d, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x00, 0x0c, 0xe6, 0xe1, 0x16, 0x84, 0xe0,
-  0x02, 0xe7, 0x86, 0x1b, 0xea, 0x80, 0x1e, 0xc0, 0xc0, 0x90, 0x58, 0x80,
-  0x8f, 0x0d, 0xb2, 0x00, 0x9f, 0x59, 0x06, 0x61, 0x18, 0x4c, 0x58, 0x05,
-  0xf9, 0x98, 0xc0, 0x0a, 0xf2, 0x31, 0x3f, 0x88, 0x05, 0xf8, 0x58, 0x1f,
-  0xc8, 0x02, 0x7c, 0x8c, 0x10, 0xe4, 0x63, 0x84, 0x20, 0x9f, 0x59, 0x02,
-  0xc2, 0x44, 0x01, 0x91, 0x8f, 0x21, 0xa1, 0x20, 0x1f, 0x13, 0x6e, 0x01,
-  0x3e, 0x26, 0xe0, 0x02, 0x7c, 0x4c, 0xa8, 0x05, 0xf9, 0x98, 0x60, 0x0b,
-  0xf2, 0x99, 0x25, 0x20, 0x06, 0x2a, 0x1c, 0x48, 0x20, 0x86, 0x81, 0x0a,
-  0x07, 0x12, 0x88, 0x61, 0x34, 0x21, 0x16, 0x84, 0xe1, 0x86, 0xc0, 0x24,
-  0xc0, 0x60, 0x96, 0xa1, 0x30, 0x82, 0x11, 0x03, 0x03, 0x00, 0x41, 0x30,
-  0x78, 0x5e, 0x62, 0x1d, 0x88, 0x11, 0x03, 0x03, 0x00, 0x41, 0x30, 0x78,
-  0x60, 0x82, 0x1d, 0x88, 0x59, 0x02, 0x63, 0xa0, 0xc2, 0x21, 0x0a, 0x86,
-  0x18, 0xa8, 0x70, 0x88, 0x82, 0x21, 0x86, 0x23, 0x04, 0x55, 0x20, 0xbe,
-  0xe1, 0x88, 0x21, 0x15, 0x84, 0xaf, 0x84, 0x60, 0x87, 0x23, 0x88, 0x56,
-  0x20, 0xbe, 0x12, 0x82, 0x1d, 0x8e, 0x30, 0x56, 0x41, 0xf8, 0x2a, 0x10,
-  0x76, 0x96, 0xe1, 0xd0, 0x82, 0xd1, 0x04, 0x5f, 0x18, 0x86, 0x1b, 0x82,
-  0x99, 0x00, 0x83, 0x59, 0x06, 0x24, 0x09, 0x4a, 0x17, 0x46, 0x02, 0x2e,
-  0x70, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x26, 0x9e, 0x20, 0x89,
-  0x66, 0x1e, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xea, 0x09, 0x92,
-  0x08, 0x84, 0xe2, 0x85, 0x93, 0x80, 0x0b, 0x9c, 0x1a, 0x31, 0x38, 0x00,
-  0x10, 0x04, 0x83, 0x09, 0x2c, 0x50, 0x02, 0xba, 0x87, 0x11, 0x83, 0x03,
-  0x00, 0x41, 0x30, 0x98, 0xc2, 0x02, 0x25, 0x02, 0x61, 0x96, 0x40, 0x1b,
-  0x6e, 0x50, 0x76, 0x02, 0x0c, 0x66, 0x19, 0x14, 0x2d, 0x30, 0x5d, 0xe0,
-  0x85, 0xf8, 0xcc, 0x32, 0x2c, 0xce, 0x64, 0xbd, 0x50, 0xc5, 0xc7, 0x02,
-  0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x16, 0x14, 0xf2, 0xb1, 0x22,
-  0x88, 0x4f, 0x11, 0x64, 0xa1, 0xc3, 0x0d, 0x81, 0x58, 0x80, 0xc1, 0x2c,
-  0x03, 0xd3, 0x04, 0x36, 0x94, 0x03, 0x7c, 0x66, 0x09, 0x24, 0x23, 0x07,
-  0x22, 0x3e, 0xb3, 0x04, 0xd2, 0x2c, 0xc3, 0x23, 0x71, 0xf6, 0x95, 0x43,
-  0x7c, 0x2c, 0x60, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0xc1, 0x23,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xe1, 0x16, 0x3a, 0xdc, 0x10, 0xb0, 0x05,
-  0x18, 0xcc, 0x32, 0x40, 0x51, 0x60, 0xed, 0x30, 0xc4, 0x67, 0x96, 0x40,
-  0x32, 0x02, 0x1e, 0xe0, 0x33, 0x4b, 0x20, 0x0d, 0xb4, 0x38, 0x18, 0x63,
-  0x35, 0x04, 0x24, 0x44, 0xb2, 0xe0, 0x98, 0x3b, 0xc8, 0x43, 0x7c, 0x66,
-  0x19, 0x26, 0xcb, 0x0c, 0x6c, 0x1e, 0xd4, 0x20, 0x3e, 0x16, 0x08, 0xf4,
-  0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2, 0xa0, 0x90, 0x8f, 0x15, 0x41, 0x7c,
-  0x8a, 0xd0, 0x0b, 0x1d, 0x6e, 0x08, 0xf0, 0x02, 0x0c, 0x66, 0x19, 0xa8,
-  0x2a, 0xb0, 0x61, 0x1f, 0xe0, 0x33, 0x4b, 0xa0, 0x19, 0x3e, 0x10, 0xf1,
-  0x99, 0x25, 0xd0, 0x66, 0x19, 0x2e, 0xcd, 0x0d, 0x8c, 0x0e, 0xf2, 0x21,
-  0x3e, 0x16, 0x30, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2, 0xe0, 0x91,
-  0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x20, 0x0d, 0x1d, 0x6e, 0x08, 0x44, 0x03,
-  0x0c, 0x66, 0x19, 0xb0, 0x2c, 0xb0, 0x90, 0x18, 0xe2, 0x33, 0x4b, 0xa0,
-  0x19, 0x61, 0x12, 0xf0, 0x99, 0x25, 0xd0, 0x06, 0x8a, 0x1c, 0x71, 0x40,
-  0xfc, 0x21, 0xf1, 0x07, 0x83, 0x0d, 0x32, 0x36, 0xc0, 0xd8, 0xc0, 0x62,
-  0x83, 0x8a, 0x0d, 0xa8, 0x81, 0x22, 0x87, 0x17, 0x10, 0x7f, 0x48, 0xfc,
-  0xc1, 0x20, 0x32, 0x03, 0xf3, 0x07, 0x0b, 0xab, 0x34, 0xea, 0xf0, 0xc1,
-  0xa9, 0x59, 0x86, 0x6d, 0x0e, 0x4a, 0x61, 0x34, 0xe1, 0x26, 0x86, 0xe1,
-  0x86, 0x20, 0x35, 0xc0, 0x60, 0x96, 0x81, 0xf3, 0x82, 0xe1, 0x88, 0x42,
-  0x2d, 0x86, 0xef, 0x8c, 0x61, 0x86, 0x1b, 0x82, 0x9a, 0x20, 0x83, 0x1a,
-  0x02, 0x1d, 0x8e, 0x40, 0xdc, 0x62, 0xf8, 0x2a, 0x10, 0xf4, 0x94, 0x61,
-  0x86, 0x1b, 0x02, 0x9c, 0x20, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0xe8, 0xe4,
-  0x20, 0x38, 0x7e, 0x18, 0xe6, 0x9a, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x40, 0xdb, 0x8d, 0xd4, 0x30, 0x8b, 0xdb, 0x18, 0x4d, 0x08, 0x80,
-  0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2,
-  0x90, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0xc4, 0x03, 0x36, 0x0e,
-  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x6b, 0x3c, 0x62, 0x83,
-  0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0xc8, 0x43, 0x36,
-  0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x03, 0x65, 0x3d, 0x62,
-  0x03, 0x2e, 0x02, 0xdf, 0x08, 0x0d, 0xde, 0x18, 0x4d, 0x08, 0x80, 0x0b,
-  0x9c, 0x9a, 0x25, 0x90, 0x83, 0xe1, 0x86, 0x8c, 0x3c, 0xc0, 0x60, 0x96,
-  0xe1, 0x03, 0x83, 0xa0, 0xd6, 0x82, 0x36, 0xe0, 0x02, 0xa7, 0x46, 0x0c,
-  0x0e, 0x00, 0x04, 0xc1, 0x60, 0x6a, 0x8f, 0xda, 0xf8, 0x48, 0x63, 0xc4,
-  0xe0, 0x00, 0x40, 0x10, 0x0c, 0x26, 0xf7, 0xa8, 0x8d, 0x40, 0xb8, 0x60,
-  0x98, 0x72, 0x8b, 0xdc, 0x80, 0x0b, 0x9c, 0x1a, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x83, 0x49, 0x3e, 0x74, 0x63, 0x0c, 0x52, 0x63, 0xc4, 0xe0, 0x00,
-  0x40, 0x10, 0x0c, 0xa6, 0xf9, 0xd0, 0x8d, 0x40, 0xb8, 0x60, 0x98, 0x0b,
-  0x9c, 0xba, 0xc3, 0xa9, 0xbb, 0x89, 0x61, 0x0e, 0x0d, 0x86, 0x39, 0x62,
-  0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x0d, 0x3f,
-  0xcc, 0x63, 0x34, 0xe8, 0x63, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60,
-  0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10,
-  0x00, 0x04, 0xc1, 0xe0, 0xfa, 0x8f, 0xf6, 0x48, 0x88, 0x60, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0x2e, 0x10, 0x71, 0x8f, 0x84, 0x08, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0xe0, 0x0a, 0x91, 0xf7, 0x48, 0x88, 0x60, 0xc4,
-  0x40, 0x01, 0x40, 0x10, 0x0c, 0x14, 0x14, 0x71, 0x8f, 0xd6, 0x08, 0xf6,
-  0xc3, 0x37, 0xf2, 0x63, 0x34, 0x21, 0x00, 0x2e, 0x70, 0x6a, 0x96, 0x40,
-  0x0e, 0x86, 0x1b, 0xec, 0x00, 0x44, 0xc0, 0x60, 0x96, 0x21, 0x0c, 0xe4,
-  0x20, 0xb0, 0xbf, 0x08, 0x8d, 0xf8, 0x0c, 0x47, 0xec, 0x81, 0x68, 0x10,
-  0xdf, 0x2c, 0x83, 0x18, 0x94, 0x41, 0x60, 0xa3, 0xc1, 0x07, 0xf1, 0xb1,
-  0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94, 0x05, 0x86, 0x7c, 0xac,
-  0x08, 0xe2, 0x53, 0x84, 0x8a, 0xe8, 0x70, 0x43, 0x80, 0x22, 0x60, 0x30,
-  0xcb, 0x30, 0x06, 0x64, 0x10, 0xd8, 0xb0, 0x1a, 0xf0, 0x99, 0x25, 0x48,
-  0x03, 0x53, 0x0d, 0x22, 0x3e, 0xb3, 0x04, 0x69, 0x30, 0x1c, 0x61, 0x0a,
-  0xab, 0x21, 0x7c, 0xb3, 0x0c, 0x66, 0x90, 0x06, 0x81, 0x9d, 0x02, 0x6b,
-  0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x16, 0x44,
-  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x35, 0xa2, 0xc3, 0x0d, 0xc1, 0x8c,
-  0x80, 0xc1, 0x2c, 0xc3, 0x19, 0xa0, 0x41, 0x60, 0xb4, 0x31, 0xc4, 0x67,
-  0x96, 0x20, 0x0d, 0x8c, 0xb8, 0x0d, 0xf8, 0xcc, 0x12, 0xa4, 0xc1, 0x40,
-  0x8b, 0xa3, 0x8d, 0x01, 0x46, 0x06, 0xc4, 0x19, 0x08, 0x68, 0x20, 0x16,
-  0x65, 0x70, 0xc1, 0x30, 0x66, 0x1b, 0xba, 0x11, 0x9f, 0xe1, 0x88, 0x59,
-  0xd8, 0x0d, 0xe2, 0x9b, 0x65, 0x50, 0x83, 0x36, 0x08, 0x8c, 0x37, 0x68,
-  0x21, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2, 0xc0,
-  0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x18, 0x13, 0x1d, 0x6e, 0x08, 0xc2,
-  0x04, 0x0c, 0x66, 0x19, 0xd6, 0x80, 0x0d, 0x02, 0x1b, 0xc8, 0x03, 0x3e,
-  0xb3, 0x04, 0x71, 0x60, 0xe1, 0x41, 0xc4, 0x67, 0x96, 0x20, 0x0e, 0x86,
-  0x23, 0x7c, 0x41, 0x3c, 0x84, 0x6f, 0x96, 0xc1, 0x0d, 0xe2, 0x20, 0xb0,
-  0x5f, 0x18, 0x8f, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70,
-  0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0xc2, 0x4d, 0x74, 0xb8,
-  0x21, 0x60, 0x13, 0x30, 0x98, 0x65, 0x78, 0x03, 0x38, 0x08, 0x6c, 0x3d,
-  0x86, 0xf8, 0xcc, 0x12, 0xc4, 0x81, 0x11, 0xf0, 0x01, 0x9f, 0x59, 0x82,
-  0x38, 0x18, 0x68, 0x71, 0xb4, 0x35, 0xc0, 0xd8, 0x80, 0x78, 0x03, 0x01,
-  0x0e, 0x64, 0xa3, 0x0d, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x6e, 0x73, 0xea,
-  0x78, 0x63, 0x98, 0x6b, 0x87, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x40, 0xeb, 0x93, 0x35, 0x41, 0x91, 0x3c,
-  0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1,
-  0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8,
-  0x48, 0x45, 0x4e, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
-  0xab, 0x54, 0xe6, 0x24, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0xb8, 0x4c, 0x85, 0x4e, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04,
-  0x03, 0xa5, 0x55, 0xe6, 0x44, 0x46, 0x02, 0x50, 0x19, 0x13, 0x3f, 0x19,
-  0x4d, 0x08, 0x80, 0x0b, 0x9c, 0x9a, 0x25, 0x90, 0x83, 0x81, 0x16, 0xc7,
-  0x34, 0x3a, 0x53, 0xe2, 0x58, 0xe2, 0x13, 0xe2, 0xc0, 0x94, 0xc0, 0xe0,
-  0x82, 0x61, 0x46, 0x0c, 0x1c, 0x00, 0x04, 0xc1, 0x80, 0x89, 0x15, 0x37,
-  0xc9, 0x11, 0x19, 0x29, 0x95, 0x60, 0x4e, 0xe6, 0x64, 0x4e, 0xd0, 0x64,
-  0x54, 0x66, 0x09, 0x46, 0x68, 0xb8, 0x61, 0x34, 0x4a, 0x05, 0x0c, 0x66,
-  0x19, 0xe8, 0x20, 0x26, 0x82, 0x11, 0x03, 0x03, 0x00, 0x41, 0x30, 0x78,
-  0x62, 0x25, 0x4e, 0x42, 0x62, 0xc4, 0xc0, 0x00, 0x40, 0x10, 0x0c, 0x1e,
-  0x59, 0x91, 0x93, 0x90, 0x30, 0xe1, 0x4c, 0xe0, 0x63, 0x02, 0x9a, 0xc0,
-  0x67, 0x34, 0x21, 0x47, 0x86, 0xe1, 0x86, 0x60, 0x55, 0xc0, 0x60, 0x96,
-  0xa1, 0x0e, 0xee, 0x20, 0x18, 0x8e, 0x30, 0xd8, 0x64, 0xf8, 0xee, 0x18,
-  0x66, 0xb8, 0x21, 0xb8, 0x11, 0x32, 0xa8, 0x21, 0xd0, 0xe1, 0x88, 0x04,
-  0x4e, 0x86, 0xaf, 0x02, 0x41, 0x6f, 0x19, 0x66, 0xb8, 0x21, 0xd0, 0x11,
-  0x32, 0xa8, 0x60, 0xd0, 0x59, 0x06, 0x3b, 0x58, 0x85, 0xe0, 0xfc, 0x63,
-  0x98, 0x7b, 0x89, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x40, 0xeb,
-  0x95, 0x55, 0x41, 0x93, 0x5c, 0x19, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21,
-  0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90, 0x11, 0x03,
-  0x04, 0x00, 0x41, 0x30, 0xb8, 0xc8, 0x45, 0x56, 0x0e, 0x22, 0x18, 0x31,
-  0x40, 0x00, 0x10, 0x04, 0x83, 0xab, 0x5c, 0x66, 0x85, 0x21, 0x82, 0x11,
-  0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0xcc, 0x85, 0x56, 0x24, 0x22, 0x18,
-  0x31, 0x50, 0x00, 0x10, 0x04, 0x03, 0xa5, 0x5d, 0x66, 0x45, 0x4e, 0x02,
-  0x70, 0x19, 0x15, 0x5f, 0x19, 0x4d, 0x08, 0x80, 0x0b, 0x9c, 0x9a, 0x25,
-  0x58, 0x85, 0xe1, 0x86, 0xcc, 0x5c, 0xc0, 0x60, 0x96, 0x01, 0x0f, 0xf2,
-  0x20, 0xa8, 0x36, 0xb1, 0x15, 0xb8, 0xc0, 0xa9, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0x98, 0xde, 0xe5, 0x56, 0xc0, 0xc0, 0x54, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0x60, 0x82, 0x97, 0x5b, 0x09, 0x84, 0x0b, 0x86, 0x29,
-  0x38, 0xd9, 0x15, 0xb8, 0xc0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30,
-  0x98, 0xe8, 0x85, 0x57, 0xc8, 0x60, 0x55, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x60, 0xaa, 0x17, 0x5e, 0x09, 0x84, 0x0b, 0x86, 0xb9, 0xc0, 0xa9,
-  0x3b, 0x9c, 0xba, 0x1c, 0x19, 0xe6, 0xd4, 0x62, 0x98, 0x23, 0x86, 0x39,
-  0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xd0, 0xf4, 0x05, 0x5d,
-  0x4a, 0xc5, 0x5e, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08, 0x46, 0x13,
-  0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00, 0x01, 0x40,
-  0x10, 0x0c, 0xae, 0x90, 0x79, 0x97, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00,
-  0x04, 0xc1, 0xe0, 0x12, 0x19, 0x78, 0x49, 0x88, 0x60, 0xc4, 0x00, 0x01,
-  0x40, 0x10, 0x0c, 0xae, 0x91, 0x89, 0x97, 0x84, 0x08, 0x46, 0x0c, 0x14,
-  0x00, 0x04, 0xc1, 0x40, 0x51, 0x19, 0x78, 0x79, 0x95, 0xa0, 0x5f, 0xc0,
-  0x65, 0x5f, 0x46, 0x13, 0x02, 0xe0, 0x02, 0xa7, 0x66, 0x09, 0x56, 0x61,
-  0xb8, 0xc1, 0x0e, 0x44, 0x06, 0x0c, 0x66, 0x19, 0xf4, 0x60, 0x15, 0x02,
-  0x0b, 0x95, 0x51, 0x89, 0xcf, 0x70, 0x04, 0x1f, 0x90, 0x0a, 0xf1, 0xcd,
-  0x32, 0xec, 0x81, 0x1f, 0x04, 0x56, 0x2a, 0x7d, 0x10, 0x1f, 0x0b, 0x06,
-  0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4e, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20,
-  0x3e, 0x45, 0xb0, 0x8c, 0x0e, 0x37, 0x04, 0x2a, 0x03, 0x06, 0xb3, 0x0c,
-  0x7c, 0xd0, 0x07, 0x81, 0x0d, 0xad, 0x02, 0x9f, 0x59, 0x02, 0x51, 0x30,
-  0x56, 0x21, 0xe2, 0x33, 0x4b, 0x20, 0x0a, 0xc3, 0x11, 0xa7, 0xd0, 0x2a,
-  0xc2, 0x37, 0xcb, 0xf0, 0x07, 0xa2, 0x10, 0x18, 0x2a, 0xb8, 0x4a, 0x7c,
-  0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x41, 0x24, 0x1f,
-  0x2b, 0x82, 0xf8, 0x14, 0x71, 0x33, 0x3a, 0xdc, 0x10, 0xd4, 0x0c, 0x18,
-  0xcc, 0x32, 0x80, 0x42, 0x28, 0x04, 0x66, 0x2b, 0x43, 0x7c, 0x66, 0x09,
-  0x44, 0xc1, 0x88, 0x5c, 0x81, 0xcf, 0x2c, 0x81, 0x28, 0x0c, 0xb4, 0x38,
-  0x1a, 0x1f, 0x60, 0x7d, 0x40, 0x80, 0x82, 0x10, 0x0a, 0x64, 0xe1, 0x07,
-  0x17, 0x0c, 0x63, 0xb8, 0xc2, 0x2b, 0xf1, 0x19, 0x8e, 0xa0, 0x85, 0x5e,
-  0x21, 0xbe, 0x59, 0x86, 0x51, 0x30, 0x85, 0xc0, 0x7c, 0xa5, 0x16, 0xe2,
-  0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b, 0x0c, 0xf9,
-  0x58, 0x11, 0xc4, 0xa7, 0x88, 0xb2, 0xd1, 0xe1, 0x86, 0x60, 0x6c, 0xc0,
-  0x60, 0x96, 0x81, 0x14, 0x4a, 0x21, 0xb0, 0xc1, 0x5c, 0xe0, 0x33, 0x4b,
-  0xa0, 0x0a, 0x36, 0x2e, 0x44, 0x7c, 0x66, 0x09, 0x54, 0x61, 0x38, 0xe2,
-  0x17, 0xc8, 0x45, 0xf8, 0x66, 0x19, 0x4e, 0x41, 0x15, 0x02, 0x03, 0x87,
-  0x72, 0x89, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x2c,
-  0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xe0, 0x46, 0x87, 0x1b, 0x02,
-  0xb7, 0x01, 0x83, 0x59, 0x06, 0x54, 0x48, 0x85, 0xc0, 0xda, 0x65, 0x88,
-  0xcf, 0x2c, 0x81, 0x2a, 0x18, 0x21, 0x2f, 0xf0, 0x99, 0x25, 0x50, 0x85,
-  0x81, 0x16, 0x47, 0x23, 0x05, 0xac, 0x14, 0x08, 0x54, 0x10, 0x52, 0x81,
-  0x36, 0x4c, 0xe1, 0x82, 0x61, 0x2e, 0x70, 0xea, 0x36, 0xa7, 0xce, 0x57,
-  0x86, 0xb9, 0xf7, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4, 0xe0,
-  0x00, 0x40, 0x10, 0x0c, 0xb4, 0xbf, 0x69, 0x1b, 0x95, 0xd9, 0x9b, 0xd1,
-  0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20,
-  0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xcb, 0x74,
-  0xe8, 0x26, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0x4e,
-  0xa7, 0x6e, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x0b,
-  0x75, 0xec, 0x26, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x50,
-  0x5e, 0xa7, 0x6e, 0x68, 0x26, 0x10, 0x9d, 0xb2, 0x01, 0x9d, 0xd1, 0x84,
-  0x00, 0xb8, 0xc0, 0xa9, 0x59, 0x82, 0x55, 0x18, 0x68, 0x71, 0x4c, 0xc3,
-  0x0e, 0x54, 0xad, 0x0e, 0x58, 0x02, 0x0f, 0x04, 0x55, 0x50, 0xb5, 0x3c,
-  0x98, 0x65, 0x60, 0x05, 0x57, 0xd8, 0x87, 0xe1, 0x08, 0x7f, 0x30, 0x9b,
-  0xe1, 0xbb, 0x7f, 0x18, 0x66, 0xb8, 0x21, 0x88, 0x19, 0x32, 0xa8, 0x21,
-  0xd0, 0xe1, 0x88, 0x91, 0x50, 0x9b, 0xe1, 0xab, 0x40, 0xd0, 0x2b, 0x89,
-  0x61, 0x86, 0x1b, 0x02, 0x9a, 0x21, 0x83, 0x0a, 0x06, 0x9d, 0x65, 0x68,
-  0x05, 0x71, 0x08, 0x0e, 0x5f, 0x86, 0xb9, 0x14, 0x19, 0x66, 0xc4, 0xe0,
-  0x00, 0x40, 0x10, 0x0c, 0xb4, 0xdb, 0x29, 0x1d, 0xb1, 0x99, 0x9d, 0xd1,
-  0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20,
-  0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xcb, 0x77,
-  0x58, 0xe7, 0x20, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0x7e,
-  0xa7, 0x75, 0x18, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x0b,
-  0x7c, 0x5c, 0x47, 0x22, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x50,
-  0xce, 0xa7, 0x75, 0xd8, 0x26, 0xd0, 0x9d, 0xbe, 0xc1, 0x9d, 0xd1, 0x84,
-  0x00, 0xb8, 0xc0, 0xa9, 0x59, 0x02, 0x71, 0x18, 0x6e, 0x98, 0x09, 0xf0,
-  0x01, 0x83, 0x59, 0x86, 0x57, 0x80, 0x85, 0xa0, 0xce, 0x06, 0x76, 0xe0,
-  0x02, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x4a, 0x9f, 0xd8,
-  0xc1, 0x09, 0xd0, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x49, 0x7d,
-  0x62, 0x27, 0x10, 0x2e, 0x18, 0xa6, 0xd4, 0xa6, 0x76, 0xe0, 0x02, 0xa7,
-  0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x72, 0x1f, 0xdb, 0xf1, 0x89,
-  0xd2, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0xe9, 0x7d, 0x6c, 0x27,
-  0x10, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0xee, 0x70, 0xea, 0x66, 0x66, 0x98,
-  0x23, 0x93, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00,
-  0x04, 0xc1, 0x40, 0xa3, 0x1f, 0xf1, 0xf9, 0x1b, 0xf8, 0x19, 0x4d, 0x08,
-  0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28,
-  0x22, 0x91, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0xf6, 0x27, 0x7d,
-  0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x8b, 0x7f, 0xd4,
-  0x27, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0xfa, 0x67,
-  0x7d, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x03, 0x85, 0x84,
-  0xd4, 0x27, 0x75, 0x82, 0xfb, 0xd1, 0x9d, 0xfa, 0x19, 0x4d, 0x08, 0x80,
-  0x0b, 0x9c, 0x9a, 0x25, 0x10, 0x87, 0xe1, 0x06, 0xb8, 0xe0, 0x1f, 0x30,
-  0x98, 0x65, 0x88, 0x05, 0x71, 0x08, 0x6c, 0x6f, 0xfa, 0x26, 0x3e, 0xc3,
-  0x11, 0x74, 0xe1, 0x37, 0xc4, 0x37, 0xcb, 0x20, 0x0b, 0xb5, 0x10, 0xd8,
-  0xdf, 0xd4, 0x45, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38,
-  0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x61, 0x42, 0x3a, 0xdc,
-  0x10, 0x90, 0x10, 0x18, 0xcc, 0x32, 0xcc, 0x02, 0x2d, 0x04, 0x36, 0x9c,
-  0x0e, 0x7c, 0x66, 0x09, 0x72, 0xc1, 0x4c, 0x87, 0x88, 0xcf, 0x2c, 0x41,
-  0x2e, 0x0c, 0x47, 0xfc, 0xc5, 0xe9, 0x08, 0xdf, 0x2c, 0x83, 0x2d, 0xe4,
-  0x42, 0x60, 0xa0, 0x81, 0x3a, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3,
-  0x5c, 0xe0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0x0c,
-  0xe9, 0x70, 0x43, 0xf0, 0x42, 0x60, 0x30, 0xcb, 0x70, 0x0b, 0xb8, 0x10,
-  0x18, 0xec, 0x0c, 0xf1, 0x99, 0x25, 0xc8, 0x05, 0x23, 0x66, 0x07, 0x3e,
-  0xb3, 0x04, 0xb9, 0x30, 0xd0, 0xe2, 0x68, 0xb3, 0x80, 0xd1, 0x02, 0x71,
-  0x0b, 0x02, 0x2e, 0xd0, 0x4c, 0x2d, 0x5c, 0x30, 0x8c, 0xc9, 0x8e, 0xed,
-  0xc4, 0x67, 0x38, 0xc2, 0x35, 0x6e, 0x87, 0xf8, 0x66, 0x19, 0x74, 0xa1,
-  0x17, 0x02, 0xc3, 0x9d, 0xd7, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18,
-  0xe6, 0x02, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x7e,
-  0x48, 0x87, 0x1b, 0x82, 0x1e, 0x02, 0x83, 0x59, 0x86, 0x5d, 0xe0, 0x85,
-  0xc0, 0x06, 0xf0, 0x81, 0xcf, 0x2c, 0x41, 0x38, 0x58, 0xef, 0x10, 0xf1,
-  0x99, 0x25, 0x08, 0x87, 0xe1, 0x88, 0xdc, 0xf0, 0x1d, 0xe1, 0x9b, 0x65,
-  0xf0, 0x85, 0x70, 0x08, 0x4c, 0x37, 0x7e, 0x27, 0x3e, 0x16, 0x38, 0xf4,
-  0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c,
-  0x8a, 0x50, 0x23, 0x1d, 0x6e, 0x08, 0xd0, 0x08, 0x0c, 0x66, 0x19, 0x7e,
-  0x01, 0x1c, 0x02, 0x3b, 0x9f, 0x21, 0x3e, 0xb3, 0x04, 0xe1, 0x60, 0x04,
-  0xfb, 0xc0, 0x67, 0x96, 0x20, 0x1c, 0x06, 0x5a, 0x1c, 0x6d, 0x17, 0x30,
-  0x5e, 0x20, 0x7e, 0x41, 0x00, 0x07, 0xd4, 0xe9, 0x85, 0x0b, 0x86, 0xb9,
-  0xc0, 0xa9, 0xdb, 0x9c, 0x3a, 0xdc, 0x19, 0xe6, 0xd2, 0x65, 0x98, 0x23,
-  0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xd0, 0xf2,
-  0xe8, 0x8c, 0x48, 0xa8, 0x8e, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
-  0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0x2e, 0x50, 0x72, 0xa3, 0x84, 0x08, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0xe0, 0x0a, 0xa5, 0x37, 0x4a, 0x88, 0x60, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0x2e, 0x51, 0x82, 0xa3, 0x84, 0x08, 0x46,
-  0x0c, 0x14, 0x00, 0x04, 0xc1, 0x40, 0x49, 0xa5, 0x37, 0x72, 0xa1, 0x80,
-  0x8f, 0x7e, 0x48, 0x8f, 0x46, 0x13, 0x02, 0xe0, 0x02, 0xa7, 0x66, 0x09,
-  0xc4, 0x61, 0xa0, 0xc5, 0x31, 0x8d, 0x56, 0xf0, 0xc3, 0x80, 0x15, 0x58,
-  0xe2, 0x15, 0x84, 0x70, 0xf0, 0xc3, 0x00, 0x16, 0x66, 0x19, 0xc6, 0xa1,
-  0x1c, 0xea, 0x63, 0x38, 0x42, 0x3f, 0xc0, 0x68, 0xf8, 0x6e, 0x3f, 0x86,
-  0x19, 0x6e, 0x08, 0x56, 0x88, 0x0c, 0x6a, 0x08, 0x74, 0x38, 0x62, 0x3f,
-  0xc8, 0x68, 0xf8, 0x2a, 0x10, 0xf4, 0xfa, 0x63, 0x98, 0xe1, 0x86, 0xc0,
-  0x85, 0xc8, 0xa0, 0x82, 0x41, 0x67, 0x19, 0xc8, 0x21, 0x1f, 0x82, 0x93,
-  0x9f, 0x61, 0x6e, 0x64, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03,
-  0x2d, 0x96, 0xfe, 0x88, 0x87, 0x5a, 0x69, 0x34, 0x21, 0x00, 0x46, 0x13,
-  0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x43, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xc2, 0x25, 0x53, 0x3a, 0x88, 0x60,
-  0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xae, 0x5c, 0x3a, 0x25, 0x86, 0x08,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xd2, 0x25, 0x54, 0x92, 0x88,
-  0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x94, 0x70, 0x3a, 0x25, 0x33,
-  0x0a, 0x68, 0xe9, 0x8e, 0x64, 0x69, 0x34, 0x21, 0x00, 0x2e, 0x70, 0x6a,
-  0x96, 0x20, 0x1f, 0x86, 0x1b, 0x5a, 0x44, 0x97, 0xc0, 0x60, 0x96, 0xc1,
-  0x1c, 0xce, 0x21, 0xa8, 0x30, 0x52, 0x25, 0xb8, 0xc0, 0xa9, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0x98, 0xc6, 0x69, 0x95, 0x68, 0x44, 0x8f, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x22, 0xa7, 0x55, 0x0a, 0x84, 0x0b,
-  0x86, 0x29, 0x32, 0x7a, 0x25, 0xb8, 0xc0, 0xa9, 0x11, 0x83, 0x03, 0x00,
-  0x41, 0x30, 0x98, 0xd0, 0x09, 0x96, 0x6c, 0xe4, 0x8f, 0x46, 0x0c, 0x0e,
-  0x00, 0x04, 0xc1, 0x60, 0x4a, 0x27, 0x58, 0x0a, 0x84, 0x0b, 0x86, 0xb9,
-  0xc0, 0xa9, 0x3b, 0x9c, 0xba, 0x16, 0x1a, 0xe6, 0x7c, 0x66, 0x98, 0x23,
-  0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xd0, 0xdc,
-  0x89, 0x97, 0xf2, 0x48, 0x9d, 0x46, 0x13, 0x02, 0x60, 0x34, 0x41, 0x08,
-  0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a, 0x48, 0x64, 0xc4, 0x00,
-  0x01, 0x40, 0x10, 0x0c, 0xae, 0x7a, 0x1a, 0xa7, 0x84, 0x08, 0x46, 0x0c,
-  0x10, 0x00, 0x04, 0xc1, 0xe0, 0xb2, 0x27, 0x72, 0x4a, 0x88, 0x60, 0xc4,
-  0x00, 0x01, 0x40, 0x10, 0x0c, 0xae, 0x7b, 0x2a, 0xa7, 0x84, 0x08, 0x46,
-  0x0c, 0x14, 0x00, 0x04, 0xc1, 0x40, 0xf1, 0x27, 0x72, 0x1a, 0xa5, 0x20,
-  0x9e, 0x68, 0xe9, 0x9d, 0x46, 0x13, 0x02, 0xe0, 0x02, 0xa7, 0x66, 0x09,
-  0xf2, 0x61, 0xb8, 0x41, 0x4d, 0xec, 0x09, 0x0c, 0x66, 0x19, 0xd0, 0x21,
-  0x1f, 0x02, 0xab, 0xa3, 0x3b, 0x8a, 0xcf, 0x70, 0x04, 0x9c, 0xe0, 0x11,
-  0xf1, 0xcd, 0x32, 0xa4, 0x03, 0x3b, 0x04, 0x96, 0x47, 0x71, 0x12, 0x1f,
-  0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4e, 0x59, 0x60, 0xc8, 0xc7,
-  0x8a, 0x20, 0x3e, 0x45, 0x80, 0x94, 0x0e, 0x37, 0x04, 0xfe, 0x04, 0x06,
-  0xb3, 0x0c, 0xea, 0xb0, 0x0e, 0x81, 0x0d, 0xa1, 0x04, 0x9f, 0x59, 0x02,
-  0x78, 0x30, 0x50, 0x22, 0xe2, 0x33, 0x4b, 0x00, 0x0f, 0xc3, 0x11, 0x7b,
-  0x12, 0x4a, 0xc2, 0x37, 0xcb, 0xd0, 0x0e, 0xf0, 0x10, 0x18, 0x9f, 0x88,
-  0x52, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x41,
-  0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xb1, 0x52, 0x3a, 0xdc, 0x10, 0xa4,
-  0x14, 0x18, 0xcc, 0x32, 0xb8, 0xc3, 0x3b, 0x04, 0xa6, 0x4a, 0x43, 0x7c,
-  0x66, 0x09, 0xe0, 0xc1, 0x88, 0x56, 0x82, 0xcf, 0x2c, 0x01, 0x3c, 0x0c,
-  0xb4, 0x38, 0x9a, 0x3a, 0x60, 0xeb, 0x40, 0xb8, 0x83, 0xf0, 0x0e, 0x2c,
-  0xc5, 0x0e, 0x17, 0x0c, 0x63, 0xac, 0x04, 0x4b, 0xf1, 0x19, 0x8e, 0x30,
-  0x95, 0x58, 0x22, 0xbe, 0x59, 0x86, 0x78, 0xa0, 0x87, 0xc0, 0x64, 0xe9,
-  0x54, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b,
-  0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x9c, 0xd2, 0xe1, 0x86, 0xe0,
-  0xa6, 0xc0, 0x60, 0x96, 0x41, 0x1e, 0xe6, 0x21, 0xb0, 0x41, 0x97, 0xe0,
-  0x33, 0x4b, 0x80, 0x0f, 0x76, 0x4b, 0x44, 0x7c, 0x66, 0x09, 0xf0, 0x61,
-  0x38, 0x22, 0x56, 0x70, 0x49, 0xf8, 0x66, 0x19, 0xea, 0x01, 0x1f, 0x02,
-  0x93, 0x95, 0x5c, 0x8a, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02,
-  0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xc8, 0x4a, 0x87,
-  0x1b, 0x02, 0xb1, 0x02, 0x83, 0x59, 0x06, 0x7b, 0xb8, 0x87, 0xc0, 0xc2,
-  0x69, 0x88, 0xcf, 0x2c, 0x01, 0x3e, 0x18, 0x61, 0x4e, 0xf0, 0x99, 0x25,
-  0xc0, 0x87, 0x81, 0x16, 0x47, 0x93, 0x07, 0x6c, 0x1e, 0x08, 0x7b, 0x10,
-  0xee, 0x01, 0xaf, 0xe8, 0xe1, 0x82, 0x61, 0x2e, 0x70, 0xea, 0x36, 0xa7,
-  0x4e, 0x96, 0x86, 0xb9, 0xf1, 0x19, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xb4, 0xb9, 0x0a, 0x2b, 0x7f, 0x7a,
-  0xab, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18,
-  0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
-  0x4b, 0xaf, 0xd0, 0x2a, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0xb8, 0xf6, 0x2a, 0xad, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x83, 0x8b, 0xaf, 0xd4, 0x2a, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41,
-  0x30, 0x50, 0x46, 0x2b, 0xad, 0x50, 0x2a, 0xb0, 0xab, 0x9c, 0xa2, 0xab,
-  0xd1, 0x84, 0x00, 0xb8, 0xc0, 0xa9, 0x59, 0x82, 0x7c, 0x18, 0x68, 0x71,
-  0x4c, 0x83, 0x1c, 0xe4, 0x34, 0x18, 0x07, 0x96, 0x30, 0x07, 0x01, 0x1f,
-  0xe4, 0x34, 0x38, 0x87, 0x59, 0x06, 0x7d, 0xe0, 0x87, 0x77, 0x19, 0x8e,
-  0x90, 0x17, 0x9d, 0x1a, 0xbe, 0x9b, 0x97, 0x61, 0x86, 0x1b, 0x82, 0x92,
-  0x22, 0x83, 0x1a, 0x02, 0x1d, 0x8e, 0xa8, 0x17, 0x9f, 0x1a, 0xbe, 0x0a,
-  0x04, 0xbd, 0x7b, 0x19, 0x66, 0xb8, 0x21, 0x40, 0x29, 0x32, 0xa8, 0x60,
-  0xd0, 0x59, 0x86, 0x7d, 0x80, 0x89, 0xe0, 0xd8, 0x69, 0x98, 0xeb, 0x9f,
-  0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x40, 0x5b, 0xad, 0xbc, 0xb2,
-  0xa9, 0xd3, 0x1a, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d, 0x18,
-  0x84, 0xd1, 0x04, 0x62, 0x28, 0xe2, 0x90, 0x11, 0x03, 0x04, 0x00, 0x41,
-  0x30, 0xb8, 0x64, 0x0b, 0xb4, 0x0e, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0x6b, 0xb6, 0x42, 0x8b, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0xb8, 0x68, 0x4b, 0xb4, 0x24, 0x22, 0x18, 0x31, 0x50, 0x00,
-  0x10, 0x04, 0x03, 0x65, 0xb7, 0x42, 0x0b, 0xac, 0x02, 0xd7, 0x8a, 0x2b,
-  0xd6, 0x1a, 0x4d, 0x08, 0x80, 0x0b, 0x9c, 0x9a, 0x25, 0x80, 0x89, 0xe1,
-  0x86, 0x93, 0xa1, 0x2d, 0x30, 0x98, 0x65, 0xe8, 0x07, 0x7f, 0x08, 0x6a,
-  0xa7, 0x48, 0x0b, 0x2e, 0x70, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c,
-  0xa6, 0xde, 0x2a, 0x2d, 0x96, 0xa1, 0xab, 0x11, 0x83, 0x03, 0x00, 0x41,
-  0x30, 0x98, 0x7c, 0xab, 0xb4, 0x02, 0xe1, 0x82, 0x61, 0xca, 0xa7, 0x52,
-  0x0b, 0x2e, 0x70, 0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x26, 0xf1,
-  0x52, 0x2d, 0x98, 0xc9, 0xab, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98,
-  0xc6, 0x4b, 0xb5, 0x02, 0xe1, 0x82, 0x61, 0x2e, 0x70, 0xea, 0x0e, 0xa7,
-  0xee, 0xa4, 0x86, 0x39, 0x1c, 0x1a, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x34, 0xf4, 0xb2, 0xad, 0xb9, 0x22,
-  0xaf, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84, 0x41, 0x18,
-  0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83,
-  0xeb, 0xbd, 0x7a, 0x2b, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30,
-  0xb8, 0xe0, 0xcb, 0xb7, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04,
-  0x83, 0x2b, 0xbe, 0x7e, 0x2b, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41,
-  0x30, 0x50, 0xf0, 0xcb, 0xb7, 0xfa, 0x2a, 0x58, 0x2f, 0xd7, 0x4a, 0xaf,
-  0xd1, 0x84, 0x00, 0xb8, 0xc0, 0xa9, 0x59, 0x02, 0x98, 0x18, 0x6e, 0x20,
-  0x1b, 0xf8, 0x02, 0x83, 0x59, 0x86, 0x7f, 0x80, 0x89, 0xc0, 0xde, 0x2a,
-  0xae, 0xe2, 0x33, 0x1c, 0x81, 0x36, 0x72, 0x45, 0x7c, 0xb3, 0x0c, 0x20,
-  0x31, 0x12, 0x81, 0xcd, 0x55, 0xda, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17,
-  0x0c, 0x73, 0x81, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11,
-  0xfa, 0xa5, 0xc3, 0x0d, 0x01, 0x7e, 0x81, 0xc1, 0x2c, 0x43, 0x48, 0x88,
-  0x44, 0x60, 0xc3, 0x5e, 0xc1, 0x67, 0x96, 0xe0, 0x24, 0x4c, 0xaf, 0x88,
-  0xf8, 0xcc, 0x12, 0x9c, 0xc4, 0x70, 0xc4, 0xdc, 0xec, 0x95, 0xf0, 0xcd,
-  0x32, 0x90, 0xc4, 0x49, 0x04, 0x46, 0x37, 0x7c, 0x15, 0x1f, 0x0b, 0x1c,
-  0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4e, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20,
-  0x3e, 0x45, 0x94, 0x98, 0x0e, 0x37, 0x04, 0x23, 0x06, 0x06, 0xb3, 0x0c,
-  0x25, 0x61, 0x12, 0x81, 0x91, 0xd6, 0x10, 0x9f, 0x59, 0x82, 0x93, 0x30,
-  0xe2, 0xb4, 0xe0, 0x33, 0x4b, 0x70, 0x12, 0x03, 0x2d, 0x8e, 0x16, 0x12,
-  0x98, 0x48, 0x10, 0x25, 0x21, 0x98, 0x04, 0xbf, 0x8d, 0xc4, 0x05, 0xc3,
-  0x98, 0x69, 0xa9, 0x56, 0x7c, 0x86, 0x23, 0x40, 0x67, 0xb5, 0x88, 0x6f,
-  0x96, 0x01, 0x25, 0x56, 0x22, 0x30, 0xd6, 0x0a, 0x9d, 0xf8, 0x58, 0x30,
-  0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70, 0xca, 0x02, 0x43, 0x3e, 0x56, 0x04,
-  0xf1, 0x29, 0x62, 0xc6, 0x74, 0xb8, 0x21, 0x88, 0x31, 0x30, 0x98, 0x65,
-  0x48, 0x09, 0x95, 0x08, 0x6c, 0xa0, 0x2d, 0xf8, 0xcc, 0x12, 0xbc, 0x84,
-  0xc5, 0x16, 0x11, 0x9f, 0x59, 0x82, 0x97, 0x18, 0x8e, 0x58, 0x1d, 0xd9,
-  0x12, 0xbe, 0x59, 0x06, 0x96, 0x78, 0x89, 0xc0, 0x58, 0x67, 0xb6, 0xe2,
-  0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b, 0x22, 0xf9,
-  0x58, 0x11, 0xc4, 0xa7, 0x08, 0x1f, 0xd3, 0xe1, 0x86, 0x80, 0xc7, 0xc0,
-  0x60, 0x96, 0xa1, 0x25, 0x5c, 0x22, 0xb0, 0xdd, 0x1a, 0xe2, 0x33, 0x4b,
-  0xf0, 0x12, 0x46, 0x80, 0x17, 0x7c, 0x66, 0x09, 0x5e, 0x62, 0xa0, 0xc5,
-  0xd1, 0x52, 0x02, 0x53, 0x09, 0xa2, 0x25, 0x04, 0x97, 0x60, 0xbb, 0x95,
-  0xb8, 0x60, 0x98, 0x0b, 0x9c, 0xba, 0xcd, 0xa9, 0x63, 0xad, 0x61, 0xae,
-  0x97, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x03, 0xad, 0xcd, 0x76, 0x0c, 0xbf, 0xd2, 0x6c, 0x34, 0x21, 0x00,
-  0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88,
-  0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xa2, 0x33, 0x31, 0x4b,
-  0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xae, 0x3a, 0x1b, 0xb3,
-  0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xb2, 0x33, 0x32,
-  0x4b, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40, 0x10, 0x0c, 0x94, 0x3e, 0x1b,
-  0x33, 0x11, 0x0b, 0xe0, 0x6c, 0xc6, 0xdc, 0x6c, 0x34, 0x21, 0x00, 0x2e,
-  0x70, 0x6a, 0x96, 0x00, 0x26, 0x06, 0x5a, 0x1c, 0xd3, 0xd8, 0x07, 0x33,
-  0x0e, 0xf4, 0x81, 0x25, 0xfa, 0x41, 0x78, 0x09, 0x33, 0x0e, 0xfc, 0x61,
-  0xc4, 0xc0, 0x00, 0x40, 0x10, 0x0c, 0x9e, 0x3f, 0xf3, 0xb1, 0x77, 0x32,
-  0xfb, 0x80, 0x97, 0xf8, 0x98, 0x10, 0xc8, 0xc7, 0x02, 0x79, 0x81, 0x8f,
-  0x15, 0xff, 0x10, 0x1f, 0x2b, 0x02, 0xf9, 0x58, 0x10, 0x12, 0xf0, 0x19,
-  0x31, 0x30, 0x00, 0x10, 0x04, 0x83, 0xc7, 0xd4, 0xca, 0xac, 0x9e, 0x4c,
-  0x28, 0xe2, 0x63, 0x81, 0x20, 0x1f, 0x0b, 0x0e, 0xf8, 0x5c, 0x30, 0xcc,
-  0x88, 0x81, 0x03, 0x80, 0x20, 0x18, 0x30, 0xac, 0x96, 0x66, 0x34, 0xd6,
-  0x62, 0xa0, 0x16, 0xb8, 0x99, 0x9b, 0xb9, 0xd9, 0x98, 0xf9, 0xd9, 0x2c,
-  0xc1, 0x08, 0x0d, 0x37, 0xf8, 0x95, 0xa8, 0x81, 0xc1, 0x2c, 0x83, 0x4c,
-  0x8c, 0x50, 0x30, 0x62, 0x60, 0x00, 0x20, 0x08, 0x06, 0x0f, 0xab, 0xb1,
-  0x19, 0x3f, 0x59, 0xd0, 0x63, 0xf0, 0x19, 0x31, 0x30, 0x00, 0x10, 0x04,
-  0x83, 0xc7, 0xd5, 0xdc, 0xac, 0x9f, 0x2c, 0xf8, 0x31, 0xf8, 0x8c, 0x26,
-  0xd0, 0xd8, 0x30, 0xdc, 0x10, 0x98, 0x1a, 0x18, 0xcc, 0x32, 0xcc, 0x44,
-  0x4d, 0x04, 0xc3, 0x11, 0xc5, 0x99, 0x0d, 0xdf, 0x19, 0xc3, 0x0c, 0x37,
-  0x04, 0x32, 0x46, 0x06, 0x35, 0x04, 0x3a, 0x1c, 0x71, 0xac, 0xd9, 0xf0,
-  0x55, 0x20, 0xe8, 0x25, 0xc3, 0x0c, 0x37, 0x04, 0x35, 0x46, 0x06, 0x15,
-  0x0c, 0x3a, 0xcb, 0x40, 0x13, 0x69, 0x11, 0x5c, 0x7e, 0x0d, 0x73, 0x2a,
-  0x35, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x68, 0xb8, 0x66, 0x6a,
-  0x63, 0x46, 0x6b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09,
-  0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0xd7, 0xaf, 0xb5, 0xda, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00,
-  0x82, 0x60, 0x70, 0x81, 0x9b, 0xab, 0x31, 0x44, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0x57, 0xb8, 0xbd, 0x9a, 0x44, 0x04, 0x23, 0x06, 0x0a,
-  0x00, 0x82, 0x60, 0xa0, 0xa0, 0x9b, 0xab, 0xb5, 0x59, 0xb0, 0x6b, 0x7e,
-  0x96, 0x6b, 0xa3, 0x09, 0x01, 0x70, 0x81, 0x53, 0xb3, 0x04, 0x69, 0x31,
-  0xdc, 0x90, 0x85, 0x1b, 0x18, 0xcc, 0x32, 0xd8, 0xc4, 0x4d, 0x04, 0x85,
-  0x66, 0xb1, 0x06, 0x17, 0x38, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x93, 0xba, 0xc9, 0xda, 0x17, 0x6a, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0x30, 0xad, 0x9b, 0xac, 0x05, 0xc2, 0x05, 0xc3, 0xd4, 0x9a, 0xd9, 0x1a,
-  0x5c, 0xe0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0xef, 0x76,
-  0x6b, 0x62, 0x60, 0x6a, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30, 0xc1,
-  0xdb, 0xad, 0x05, 0xc2, 0x05, 0xc3, 0x5c, 0xe0, 0xd4, 0x1d, 0x4e, 0x1d,
-  0x8d, 0x0d, 0x73, 0x65, 0x35, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88,
-  0xc1, 0x01, 0x80, 0x20, 0x18, 0x68, 0xf5, 0x36, 0x6e, 0xa0, 0x16, 0x6f,
-  0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a,
-  0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x17,
-  0xbf, 0xa9, 0x5b, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70,
-  0xf5, 0xdb, 0xba, 0x25, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x97, 0xbf, 0xb1, 0x5b, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60,
-  0xa0, 0x94, 0xdc, 0xba, 0xa9, 0x5a, 0x80, 0x6f, 0xbb, 0x66, 0x6f, 0xa3,
-  0x09, 0x01, 0x70, 0x81, 0x53, 0xb3, 0x04, 0x69, 0x31, 0xdc, 0x60, 0x07,
-  0xfd, 0x06, 0x06, 0xb3, 0x0c, 0x38, 0x91, 0x16, 0x81, 0xf1, 0x99, 0x9f,
-  0xc5, 0x67, 0x38, 0x62, 0x0f, 0xfe, 0x8c, 0xf8, 0x66, 0x19, 0x72, 0x82,
-  0x27, 0x02, 0x03, 0x35, 0x3e, 0x88, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18,
-  0xe6, 0x02, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0x4e,
-  0x4e, 0x87, 0x1b, 0x82, 0x92, 0x03, 0x83, 0x59, 0x06, 0x9d, 0xd8, 0x89,
-  0xc0, 0x06, 0x54, 0x83, 0xcf, 0x2c, 0x01, 0x58, 0xd8, 0xa9, 0x11, 0xf1,
-  0x99, 0x25, 0x00, 0x8b, 0xe1, 0x08, 0x53, 0x40, 0x35, 0xe1, 0x9b, 0x65,
-  0xe8, 0x09, 0xb0, 0x08, 0xec, 0x14, 0x52, 0x2d, 0x3e, 0x16, 0x38, 0xf4,
-  0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41, 0x7c,
-  0x8a, 0x90, 0x39, 0x1d, 0x6e, 0x08, 0x60, 0x0e, 0x0c, 0x66, 0x19, 0x7c,
-  0xe2, 0x27, 0x02, 0x8b, 0xb5, 0x21, 0x3e, 0xb3, 0x04, 0x60, 0x61, 0x04,
-  0xad, 0xc1, 0x67, 0x96, 0x00, 0x2c, 0x06, 0x5a, 0x1c, 0x4d, 0x27, 0xb0,
-  0x9d, 0x20, 0x7c, 0x42, 0xf8, 0x09, 0xb1, 0xe0, 0x89, 0x0b, 0x86, 0xb1,
-  0x59, 0xbb, 0xb5, 0xf8, 0x0c, 0x47, 0xc8, 0x02, 0xae, 0x11, 0xdf, 0x2c,
-  0x43, 0x58, 0x90, 0x45, 0x60, 0xb9, 0x36, 0x0b, 0xf1, 0xb1, 0x60, 0xa0,
-  0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2,
-  0x53, 0x04, 0xd8, 0xe9, 0x70, 0x43, 0xe0, 0x73, 0x60, 0x30, 0xcb, 0x20,
-  0x16, 0x63, 0x11, 0xd8, 0x10, 0x6e, 0xf0, 0x99, 0x25, 0x40, 0x0b, 0xf3,
-  0x35, 0x22, 0x3e, 0xb3, 0x04, 0x68, 0x31, 0x1c, 0xd1, 0x0b, 0xbf, 0x26,
-  0x7c, 0xb3, 0x0c, 0x65, 0x81, 0x16, 0x81, 0xf9, 0x02, 0xb8, 0xc5, 0xc7,
-  0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x16, 0x44, 0xf2, 0xb1,
-  0x22, 0x88, 0x4f, 0x11, 0x6b, 0xa7, 0xc3, 0x0d, 0x41, 0xda, 0x81, 0xc1,
-  0x2c, 0x83, 0x59, 0x9c, 0x45, 0x60, 0xe8, 0x36, 0xc4, 0x67, 0x96, 0x00,
-  0x2d, 0x8c, 0x68, 0x37, 0xf8, 0xcc, 0x12, 0xa0, 0xc5, 0x40, 0x8b, 0xa3,
-  0x89, 0x05, 0x36, 0x16, 0x84, 0x59, 0x08, 0x67, 0x01, 0x1b, 0x64, 0x71,
-  0xc1, 0x30, 0x17, 0x38, 0x75, 0x9b, 0x53, 0x97, 0x6b, 0xc3, 0x9c, 0x7a,
-  0x0d, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0x9a, 0xde, 0xa1, 0x5d, 0xc9, 0xd9, 0xdd, 0x68, 0x42, 0x00, 0x8c,
-  0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x15, 0x7a, 0x6f, 0x97, 0x10,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xa2, 0x07, 0x77, 0x09,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x35, 0x7a, 0x71, 0x97,
-  0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x28, 0xaa, 0x07, 0x77,
-  0x2f, 0x17, 0xf4, 0x1d, 0xd8, 0xed, 0xdd, 0x68, 0x42, 0x00, 0x5c, 0xe0,
-  0xd4, 0x2c, 0x41, 0x5a, 0x0c, 0xb4, 0x38, 0xa6, 0x41, 0x13, 0xa0, 0x1e,
-  0xcc, 0x04, 0x4b, 0xd8, 0x84, 0x80, 0x16, 0xa0, 0x1e, 0xdc, 0xc4, 0x2c,
-  0x83, 0x5a, 0xb0, 0xc5, 0x3e, 0x0c, 0x47, 0x80, 0x44, 0xd8, 0x0d, 0xdf,
-  0x85, 0xc4, 0x30, 0xc3, 0x0d, 0x01, 0xcb, 0x91, 0x41, 0x0d, 0x81, 0x0e,
-  0x47, 0x84, 0x44, 0xd9, 0x0d, 0x5f, 0x05, 0x82, 0xde, 0x48, 0x0c, 0x33,
-  0xdc, 0x10, 0xbc, 0x1c, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0xc3, 0x5a, 0x80,
-  0x46, 0x70, 0xf3, 0x36, 0xcc, 0x91, 0xd8, 0x30, 0x23, 0x06, 0x07, 0x00,
-  0x82, 0x60, 0xa0, 0xc9, 0x1e, 0xe8, 0xf5, 0x9c, 0xeb, 0x8d, 0x26, 0x04,
-  0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14,
-  0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xb9, 0x77, 0x7a,
-  0x07, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xa5, 0x7b, 0xa8,
-  0xc7, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xbb, 0x97,
-  0x7a, 0x12, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0x81, 0x22, 0x7e,
-  0xa8, 0x77, 0x76, 0x41, 0xed, 0xe1, 0xdd, 0xec, 0x8d, 0x26, 0x04, 0xc0,
-  0x05, 0x4e, 0xcd, 0x12, 0x80, 0xc6, 0x70, 0xc3, 0x4c, 0xec, 0x1e, 0x18,
-  0xcc, 0x32, 0xb4, 0x85, 0x5b, 0x04, 0x25, 0x76, 0xab, 0x07, 0x17, 0x38,
-  0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x13, 0xf9, 0xb1, 0x9e, 0x4e,
-  0xec, 0xdd, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0xe5, 0xc7, 0x7a,
-  0x81, 0x70, 0xc1, 0x30, 0x55, 0x76, 0xb0, 0x07, 0x17, 0x38, 0x35, 0x62,
-  0x70, 0x00, 0x20, 0x08, 0x06, 0x53, 0xfa, 0xc5, 0x1e, 0x4f, 0x80, 0xde,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0xea, 0x17, 0x7b, 0x81, 0x70,
-  0xc1, 0x30, 0x17, 0x38, 0x75, 0x87, 0x53, 0xe7, 0x72, 0xc3, 0xdc, 0x8f,
-  0x0d, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08,
-  0x06, 0xda, 0xfb, 0xf5, 0x9e, 0xde, 0xad, 0xdf, 0x68, 0x42, 0x00, 0x8c,
-  0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x65, 0x7f, 0xe4, 0x97, 0x10,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xf7, 0x57, 0x7e, 0x09,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x85, 0x7f, 0xe6, 0x97,
-  0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x28, 0xff, 0x57, 0x7e,
-  0xa4, 0x17, 0xc8, 0x5f, 0xed, 0xc1, 0xdf, 0x68, 0x42, 0x00, 0x5c, 0xe0,
-  0xd4, 0x2c, 0x01, 0x68, 0x0c, 0x37, 0xc0, 0xc5, 0xfd, 0x81, 0xc1, 0x2c,
-  0xc3, 0x5b, 0x80, 0x46, 0x60, 0x76, 0x87, 0x77, 0xf1, 0x19, 0x8e, 0xb0,
-  0x8b, 0xbc, 0x23, 0xbe, 0x59, 0x06, 0xb8, 0x98, 0x8b, 0xc0, 0xf4, 0xee,
-  0x2e, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b,
-  0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0x10, 0x0c, 0x74, 0xb8, 0x21,
-  0xf8, 0x3f, 0x30, 0x98, 0x65, 0x88, 0x0b, 0xb9, 0x08, 0x6c, 0x10, 0x3d,
-  0xf8, 0xcc, 0x12, 0xdc, 0x85, 0x85, 0x1e, 0x11, 0x9f, 0x59, 0x82, 0xbb,
-  0x18, 0x8e, 0x08, 0x0d, 0xd1, 0x13, 0xbe, 0x59, 0x06, 0xba, 0xb8, 0x8b,
-  0xc0, 0x44, 0x63, 0xf4, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9,
-  0xc0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x16, 0x0c,
-  0x74, 0xb8, 0x21, 0x50, 0xc1, 0x00, 0x0c, 0x66, 0x19, 0xea, 0xc2, 0x2e,
-  0x02, 0x5b, 0xbd, 0x21, 0x3e, 0xb3, 0x04, 0x77, 0x61, 0x84, 0xeb, 0xc1,
-  0x67, 0x96, 0xe0, 0x2e, 0x06, 0x5a, 0x1c, 0x2d, 0x2e, 0x30, 0xb9, 0x20,
-  0xea, 0x42, 0xb0, 0x0b, 0x9c, 0x99, 0x8b, 0x0b, 0x86, 0xb1, 0xd6, 0x8b,
-  0xbd, 0xf8, 0x0c, 0x47, 0xb0, 0x86, 0xec, 0x11, 0xdf, 0x2c, 0x03, 0x5e,
-  0xec, 0x45, 0x60, 0xb3, 0xd7, 0x1a, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05,
-  0xc3, 0x5c, 0xe0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84,
-  0x0e, 0x06, 0x3a, 0xdc, 0x10, 0xe0, 0x60, 0x00, 0x06, 0xb3, 0x0c, 0x79,
-  0xa1, 0x17, 0x81, 0x0d, 0xbb, 0x07, 0x9f, 0x59, 0x82, 0xbf, 0x30, 0xdc,
-  0x23, 0xe2, 0x33, 0x4b, 0xf0, 0x17, 0xc3, 0x11, 0xb7, 0x91, 0x7b, 0xc2,
-  0x37, 0xcb, 0xc0, 0x17, 0x7f, 0x11, 0x18, 0x6e, 0xe8, 0x5e, 0x7c, 0x2c,
-  0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x41, 0x24, 0x1f, 0x2b,
-  0x82, 0xf8, 0x14, 0x51, 0x86, 0x81, 0x0e, 0x37, 0x04, 0x63, 0x18, 0x80,
-  0xc1, 0x2c, 0x43, 0x5f, 0xf8, 0x45, 0x60, 0xe2, 0x37, 0xc4, 0x67, 0x96,
-  0xe0, 0x2f, 0x8c, 0x38, 0x3f, 0xf8, 0xcc, 0x12, 0xfc, 0xc5, 0x40, 0x8b,
-  0xa3, 0xe5, 0x05, 0xa6, 0x17, 0x44, 0x5f, 0x08, 0x7e, 0x41, 0x3a, 0x7b,
-  0x71, 0xc1, 0x30, 0x17, 0x38, 0x75, 0x9b, 0x53, 0x37, 0x7b, 0xc3, 0x1c,
-  0xb9, 0x0d, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0x1a, 0x1d, 0x06, 0x62, 0x18, 0xfc, 0x1f, 0x1c, 0x06, 0xa3,
-  0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40,
-  0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd7, 0x1e,
-  0x06, 0x69, 0x18, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x17, 0x1f, 0x06, 0x6a, 0x18, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x57, 0x1f, 0x06, 0x6b, 0x18, 0x24, 0x44, 0x30, 0x62, 0xa0,
-  0x00, 0x20, 0x08, 0x06, 0x0a, 0x29, 0x06, 0x6a, 0x18, 0xa4, 0x60, 0x10,
-  0xdc, 0x61, 0xa0, 0x83, 0x41, 0x1d, 0x06, 0xa3, 0x09, 0x01, 0x70, 0x81,
-  0x53, 0xb3, 0x04, 0xa0, 0x31, 0xd0, 0xe2, 0x98, 0xc6, 0x5a, 0xd0, 0xa1,
-  0xa0, 0x16, 0x2c, 0xd1, 0x16, 0xc2, 0x5f, 0xd0, 0xa1, 0xe0, 0x16, 0xa6,
-  0x1f, 0x3a, 0x18, 0xc0, 0x67, 0x96, 0x21, 0x34, 0x46, 0xc3, 0x3e, 0x86,
-  0x23, 0x02, 0x1e, 0x0c, 0x86, 0xef, 0x84, 0x61, 0x86, 0x1b, 0x82, 0x13,
-  0x0c, 0xc8, 0xa0, 0x86, 0x40, 0x87, 0x23, 0xf8, 0x03, 0x0c, 0x83, 0xe1,
-  0xab, 0x40, 0xd0, 0xf3, 0x8f, 0x61, 0x86, 0x1b, 0x02, 0x15, 0x0c, 0xc8,
-  0xa0, 0x82, 0x41, 0x67, 0x19, 0x44, 0xe3, 0x36, 0x82, 0x73, 0xbf, 0x61,
-  0xee, 0xdf, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0xad, 0x15,
-  0x83, 0x3d, 0x0c, 0x70, 0x30, 0x48, 0xc5, 0x60, 0x34, 0x21, 0x00, 0x46,
-  0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x43,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xa2, 0xc5, 0x40, 0x14, 0x83,
-  0x83, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xaa, 0xc5, 0x60,
-  0x14, 0x03, 0x86, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xb2,
-  0xc5, 0x80, 0x14, 0x03, 0x89, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1,
-  0x40, 0xe9, 0xc5, 0x60, 0x14, 0x03, 0x31, 0x0c, 0x02, 0x58, 0x0c, 0xe6,
-  0x30, 0x70, 0xc5, 0x60, 0x34, 0x21, 0x00, 0x2e, 0x70, 0x6a, 0x96, 0xe0,
-  0x36, 0x86, 0x1b, 0x5c, 0xc4, 0x16, 0x03, 0x30, 0x98, 0x65, 0x20, 0x8d,
-  0xd2, 0x08, 0xaa, 0x07, 0x03, 0x53, 0x0c, 0xe0, 0x02, 0xa7, 0x46, 0x0c,
-  0x0e, 0x00, 0x04, 0xc1, 0x60, 0xfa, 0xc5, 0xe0, 0x14, 0x83, 0xcd, 0x0e,
-  0x83, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0xc0, 0x31, 0x38, 0xc5,
-  0x20, 0x10, 0x2e, 0x18, 0xa6, 0xc0, 0x30, 0x58, 0xc5, 0x00, 0x2e, 0x70,
-  0x6a, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x26, 0x72, 0x0c, 0x58, 0x31,
-  0xb8, 0x91, 0x3d, 0x0c, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0x2a,
-  0xc7, 0x80, 0x15, 0x83, 0x40, 0xb8, 0x60, 0x98, 0x0b, 0x9c, 0xba, 0xc3,
-  0xa9, 0x4b, 0xc1, 0x60, 0x98, 0xd3, 0xb9, 0x61, 0x8e, 0x18, 0xe6, 0x88,
-  0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x40, 0x53, 0xc7, 0x00, 0x17,
-  0x83, 0x3a, 0x0c, 0xcc, 0x31, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21,
-  0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03,
-  0x04, 0x00, 0x41, 0x30, 0xb8, 0xe2, 0x31, 0xf8, 0xc5, 0x20, 0x21, 0x82,
-  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0xe4, 0x31, 0x00, 0xc7, 0x20,
-  0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0xe6, 0x31, 0x08,
-  0xc7, 0x20, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x50, 0xf4,
-  0x31, 0x00, 0xc7, 0xe0, 0x0f, 0x83, 0xa0, 0x1d, 0x03, 0x58, 0x0c, 0xd6,
-  0x31, 0x18, 0x4d, 0x08, 0x80, 0x0b, 0x9c, 0x9a, 0x25, 0xb8, 0x8d, 0xe1,
-  0x86, 0x35, 0x91, 0xc7, 0x00, 0x0c, 0x66, 0x19, 0x4c, 0xe3, 0x36, 0x02,
-  0x8b, 0xc3, 0x60, 0x0e, 0x83, 0xf8, 0x0c, 0x47, 0xdc, 0x01, 0x1d, 0x06,
-  0xc4, 0x37, 0xcb, 0x70, 0x1a, 0xaa, 0x11, 0x58, 0x1d, 0x06, 0x78, 0x10,
-  0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4e, 0x59, 0x60, 0xc8,
-  0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xf0, 0x63, 0xa0, 0xc3, 0x0d, 0x81, 0x3e,
-  0x06, 0x60, 0x30, 0xcb, 0x80, 0x1a, 0xa9, 0x11, 0xd8, 0xd0, 0x87, 0x01,
-  0x7c, 0x66, 0x09, 0x5c, 0xc3, 0xf8, 0x30, 0x20, 0xe2, 0x33, 0x4b, 0xe0,
-  0x1a, 0xc3, 0x11, 0xa2, 0xd0, 0x87, 0x81, 0xf0, 0xcd, 0x32, 0xac, 0x86,
-  0x6b, 0x04, 0x36, 0x0a, 0x7e, 0x18, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17,
-  0x0c, 0x73, 0x81, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11,
-  0x27, 0x19, 0xe8, 0x70, 0x43, 0x50, 0x92, 0x01, 0x18, 0xcc, 0x32, 0xb0,
-  0x46, 0x6b, 0x04, 0x66, 0x8a, 0xc1, 0x10, 0x9f, 0x59, 0x02, 0xd7, 0x30,
-  0x22, 0x15, 0x03, 0xf8, 0xcc, 0x12, 0xb8, 0xc6, 0x40, 0x8b, 0xa3, 0xa1,
-  0x06, 0x96, 0x1a, 0x04, 0x6b, 0x08, 0xad, 0xa1, 0x13, 0xaa, 0x71, 0xc1,
-  0x30, 0x86, 0x8a, 0x01, 0x2b, 0x06, 0xf1, 0x19, 0x8e, 0x38, 0x95, 0x56,
-  0x0c, 0x88, 0x6f, 0x96, 0xe1, 0x35, 0x64, 0x23, 0x30, 0x57, 0x0c, 0x50,
-  0x25, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2, 0xc0,
-  0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xa8, 0xc9, 0x40, 0x87, 0x1b, 0x82,
-  0x99, 0x0c, 0xc0, 0x60, 0x96, 0x01, 0x36, 0x62, 0x23, 0xb0, 0xc1, 0x16,
-  0x03, 0xf8, 0xcc, 0x12, 0xd8, 0x86, 0xcd, 0x62, 0x40, 0xc4, 0x67, 0x96,
-  0xc0, 0x36, 0x86, 0x23, 0x64, 0x85, 0x16, 0x03, 0xe1, 0x9b, 0x65, 0x98,
-  0x0d, 0xdb, 0x08, 0x6c, 0x56, 0x6a, 0x31, 0x88, 0x8f, 0x05, 0x0e, 0x7d,
-  0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f,
-  0x22, 0xc0, 0x32, 0xd0, 0xe1, 0x86, 0xc0, 0x27, 0x03, 0x30, 0x98, 0x65,
-  0xa0, 0x8d, 0xda, 0x08, 0xac, 0x17, 0x83, 0x21, 0x3e, 0xb3, 0x04, 0xb6,
-  0x61, 0x84, 0x38, 0x06, 0xf0, 0x99, 0x25, 0xb0, 0x8d, 0x81, 0x16, 0x47,
-  0x83, 0x0d, 0x2c, 0x36, 0x08, 0xda, 0x10, 0x6a, 0x43, 0xaf, 0x64, 0xe3,
-  0x82, 0x61, 0x2e, 0x70, 0xea, 0x36, 0xa7, 0xce, 0x15, 0x83, 0x61, 0xee,
-  0xf7, 0x86, 0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10,
-  0x04, 0x03, 0xed, 0x2d, 0x83, 0x9e, 0x0c, 0xf4, 0x31, 0x58, 0xcb, 0x60,
-  0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13,
-  0x88, 0xa1, 0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xb2,
-  0xcb, 0x80, 0x2c, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
-  0xe0, 0xba, 0xcb, 0xa0, 0x2c, 0x83, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00,
-  0x04, 0xc1, 0xe0, 0xc2, 0xcb, 0xc0, 0x2c, 0x83, 0x84, 0x08, 0x46, 0x0c,
-  0x14, 0x00, 0x04, 0xc1, 0x40, 0xf9, 0xcb, 0xa0, 0x2c, 0x03, 0x92, 0x0c,
-  0x02, 0xb9, 0x0c, 0x6a, 0x32, 0x80, 0xcb, 0x60, 0x34, 0x21, 0x00, 0x2e,
-  0x70, 0x6a, 0x96, 0xe0, 0x36, 0x06, 0x5a, 0x1c, 0xd3, 0x10, 0x0d, 0x35,
-  0x15, 0x42, 0x83, 0x25, 0x48, 0x43, 0xb0, 0x0d, 0x35, 0x15, 0x4a, 0xc3,
-  0xea, 0x65, 0x25, 0x03, 0xf8, 0xcc, 0x32, 0xe0, 0x86, 0x6e, 0xc4, 0xcb,
-  0x70, 0x44, 0x70, 0x93, 0xc1, 0xf0, 0x9d, 0x30, 0xcc, 0x70, 0x43, 0x20,
-  0x92, 0x01, 0x19, 0xd4, 0x10, 0xe8, 0x70, 0xc4, 0xbd, 0xec, 0x64, 0x30,
-  0x7c, 0x15, 0x08, 0x7a, 0xf9, 0x32, 0xcc, 0x70, 0x43, 0x50, 0x92, 0x01,
-  0x19, 0x54, 0x30, 0xe8, 0x2c, 0x43, 0x6e, 0xb8, 0x47, 0x70, 0xe9, 0x18,
-  0x0c, 0x73, 0xfa, 0x37, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x68,
-  0xa8, 0x19, 0xd8, 0x65, 0x30, 0x93, 0x01, 0x69, 0x06, 0xa3, 0x09, 0x01,
-  0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45,
-  0x1c, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd7, 0x6b, 0x06, 0x7d,
-  0x19, 0x1c, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x17, 0x6c,
-  0x06, 0x7e, 0x19, 0x30, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x57, 0x6c, 0x06, 0x7f, 0x19, 0x48, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20,
-  0x08, 0x06, 0x0a, 0x6e, 0x06, 0x7e, 0x19, 0xf4, 0x64, 0x10, 0xac, 0x66,
-  0xe0, 0x96, 0x41, 0x6a, 0x06, 0xa3, 0x09, 0x01, 0x70, 0x81, 0x53, 0xb3,
-  0x04, 0xee, 0x31, 0xdc, 0x90, 0x32, 0xb1, 0x19, 0x80, 0xc1, 0x2c, 0xc3,
-  0x6e, 0xf0, 0x46, 0x50, 0x38, 0x19, 0x84, 0x66, 0x00, 0x17, 0x38, 0x35,
-  0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x93, 0x6e, 0x06, 0xa2, 0x19, 0x6c,
-  0x71, 0x19, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0xb4, 0x9b, 0x81,
-  0x68, 0x06, 0x81, 0x70, 0xc1, 0x30, 0xb5, 0x93, 0x81, 0x69, 0x06, 0x70,
-  0x81, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30, 0xfd, 0x66, 0x70,
-  0x9a, 0x81, 0xcc, 0xd8, 0x65, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x13, 0x78, 0x06, 0xa7, 0x19, 0x04, 0xc2, 0x05, 0xc3, 0x5c, 0xe0, 0xd4,
-  0x1d, 0x4e, 0x1d, 0x49, 0x06, 0xc3, 0x5c, 0x0d, 0x06, 0xc3, 0x1c, 0x31,
-  0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x56, 0x9e,
-  0xc1, 0x6c, 0x06, 0x70, 0x19, 0x84, 0x67, 0x30, 0x9a, 0x10, 0x00, 0xa3,
-  0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xb1, 0x67, 0xa0, 0x9b, 0x41,
-  0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xb5, 0x67, 0xb0,
-  0x9b, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xb9,
-  0x67, 0xc0, 0x9b, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60,
-  0xa0, 0xd4, 0x67, 0xb0, 0x9b, 0x81, 0x5e, 0x06, 0x01, 0x7a, 0x06, 0xab,
-  0x19, 0x98, 0x67, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x38, 0x35, 0x4b, 0xe0,
-  0x1e, 0xc3, 0x0d, 0x66, 0xd3, 0x9e, 0x01, 0x18, 0xcc, 0x32, 0xf4, 0x86,
-  0x7b, 0x04, 0xc6, 0x96, 0x81, 0x5b, 0x06, 0xf1, 0x19, 0x8e, 0xb8, 0x83,
-  0xb7, 0x0c, 0x88, 0x6f, 0x96, 0xc1, 0x37, 0xc2, 0x23, 0x30, 0xb8, 0x0c,
-  0xf0, 0x20, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2,
-  0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0xb8, 0xcf, 0x40, 0x87, 0x1b,
-  0x82, 0xfa, 0x0c, 0xc0, 0x60, 0x96, 0xe1, 0x37, 0xc0, 0x23, 0xb0, 0x01,
-  0x2f, 0x03, 0xf8, 0xcc, 0x12, 0x94, 0x87, 0xdd, 0x65, 0x40, 0xc4, 0x67,
-  0x96, 0xa0, 0x3c, 0x86, 0x23, 0x44, 0x01, 0x2f, 0x03, 0xe1, 0x9b, 0x65,
-  0x10, 0x8f, 0xf2, 0x08, 0x6c, 0x14, 0xf2, 0x32, 0x88, 0x8f, 0x05, 0x0e,
-  0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10,
-  0x9f, 0x22, 0x44, 0x34, 0xd0, 0xe1, 0x86, 0x00, 0x44, 0x03, 0x30, 0x98,
-  0x65, 0x18, 0x0f, 0xf2, 0x08, 0x2c, 0x34, 0x83, 0x21, 0x3e, 0xb3, 0x04,
-  0xe5, 0x61, 0x04, 0x69, 0x06, 0xf0, 0x99, 0x25, 0x28, 0x8f, 0x81, 0x16,
-  0x47, 0xfb, 0x0d, 0x0c, 0x3c, 0x88, 0xf1, 0x10, 0xc8, 0x43, 0x27, 0xc2,
-  0xe3, 0x82, 0x61, 0x6c, 0x34, 0x83, 0xd3, 0x0c, 0xe2, 0x33, 0x1c, 0x21,
-  0x3a, 0xa8, 0x19, 0x10, 0xdf, 0x2c, 0x83, 0x79, 0xa4, 0x47, 0x60, 0xa9,
-  0x19, 0x8c, 0x4e, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38,
-  0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x01, 0xa3, 0x81, 0x0e,
-  0x37, 0x04, 0x2e, 0x1a, 0x80, 0xc1, 0x2c, 0xc3, 0x79, 0xa0, 0x47, 0x60,
-  0x43, 0x6c, 0x06, 0xf0, 0x99, 0x25, 0x68, 0x0f, 0x73, 0xcd, 0x80, 0x88,
-  0xcf, 0x2c, 0x41, 0x7b, 0x0c, 0x47, 0xb4, 0xce, 0x6b, 0x06, 0xc2, 0x37,
-  0xcb, 0xa0, 0x1e, 0xed, 0x11, 0x98, 0xeb, 0xc0, 0x66, 0x10, 0x1f, 0x0b,
-  0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4e, 0x59, 0x10, 0xc9, 0xc7, 0x8a,
-  0x20, 0x3e, 0x45, 0xec, 0x68, 0xa0, 0xc3, 0x0d, 0x41, 0x8e, 0x06, 0x60,
-  0x30, 0xcb, 0xb0, 0x1e, 0xec, 0x11, 0x18, 0x6e, 0x06, 0x43, 0x7c, 0x66,
-  0x09, 0xda, 0xc3, 0x88, 0xde, 0x0c, 0xe0, 0x33, 0x4b, 0xd0, 0x1e, 0x03,
-  0x2d, 0x8e, 0x76, 0x1e, 0x18, 0x7a, 0x10, 0xeb, 0x21, 0xb0, 0x07, 0xdc,
-  0xa5, 0xc7, 0x05, 0xc3, 0x5c, 0xe0, 0xd4, 0x6d, 0x4e, 0x5d, 0x6a, 0x06,
-  0xc3, 0x9c, 0x2e, 0x06, 0xc3, 0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18,
-  0x1c, 0x00, 0x08, 0x82, 0x81, 0xa6, 0xa6, 0x01, 0x8e, 0x06, 0xf5, 0x19,
-  0x98, 0x69, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30,
-  0x08, 0xa3, 0x09, 0xc4, 0x50, 0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82,
-  0x60, 0x70, 0xc5, 0x69, 0xf0, 0xa3, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0x70, 0xc9, 0x69, 0x00, 0xa6, 0x41, 0x42, 0x04, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xcd, 0x69, 0x10, 0xa6, 0x41, 0x42,
-  0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xa0, 0xe8, 0x69, 0x00, 0xa6,
-  0xc1, 0x7f, 0x06, 0x41, 0x9b, 0x06, 0x30, 0x1a, 0xac, 0x69, 0x30, 0x9a,
-  0x10, 0x00, 0x17, 0x38, 0x35, 0x4b, 0xe0, 0x1e, 0x03, 0x2d, 0x8e, 0x69,
-  0xe4, 0x06, 0x18, 0x0b, 0xb8, 0xc1, 0x12, 0xbb, 0x21, 0xb4, 0x07, 0x18,
-  0x0b, 0xbc, 0x31, 0xcb, 0xf0, 0x1e, 0xf1, 0xb1, 0x3e, 0xc3, 0x11, 0xef,
-  0x13, 0xa3, 0xc1, 0xf0, 0x1d, 0xfc, 0x0c, 0x33, 0xdc, 0x10, 0xf0, 0x67,
-  0x40, 0x06, 0x35, 0x04, 0x3a, 0x1c, 0x21, 0x3f, 0x35, 0x1a, 0x0c, 0x5f,
-  0x05, 0x82, 0x1e, 0xfd, 0x0c, 0x33, 0xdc, 0x10, 0xfc, 0x67, 0x40, 0x06,
-  0x15, 0x0c, 0x3a, 0xcb, 0x00, 0x1f, 0x25, 0x12, 0xdc, 0x78, 0x06, 0xc3,
-  0x1c, 0x3d, 0x06, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x26,
-  0xaa, 0x01, 0x9c, 0x06, 0x2d, 0x1a, 0xf8, 0x69, 0x30, 0x9a, 0x10, 0x00,
-  0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0xc4,
-  0x21, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xa5, 0x6a, 0x70, 0xa7,
-  0xc1, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xa9, 0x6a,
-  0x80, 0xa7, 0x01, 0x43, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70,
-  0xad, 0x6a, 0x90, 0xa7, 0x81, 0x44, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82,
-  0x60, 0xa0, 0xc8, 0x6a, 0x80, 0xa7, 0xc1, 0x8d, 0x06, 0x41, 0xa9, 0x06,
-  0x68, 0x1a, 0x8c, 0x6a, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x38, 0x35, 0x4b,
-  0x50, 0x22, 0xc3, 0x0d, 0x23, 0xb4, 0xaa, 0x01, 0x18, 0xcc, 0x32, 0xc8,
-  0xc7, 0x7c, 0x04, 0x25, 0xa3, 0xc1, 0x9e, 0x06, 0x70, 0x81, 0x53, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0x30, 0xd1, 0x6a, 0xc0, 0xa7, 0x41, 0x0a,
-  0xad, 0x69, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x53, 0xad, 0x06,
-  0x7c, 0x1a, 0x04, 0xc2, 0x05, 0xc3, 0x54, 0x8d, 0x06, 0xa0, 0x1a, 0xc0,
-  0x05, 0x4e, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x94, 0xab, 0x41,
-  0xa8, 0x06, 0x2d, 0x04, 0xa7, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
-  0x4c, 0xba, 0x1a, 0x84, 0x6a, 0x10, 0x08, 0x17, 0x0c, 0x73, 0x81, 0x53,
-  0x77, 0x38, 0x75, 0xfe, 0x19, 0x0c, 0x73, 0x2f, 0x19, 0x0c, 0x73, 0xc4,
-  0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xda, 0xaf,
-  0x06, 0xad, 0x1a, 0xa8, 0x69, 0xb0, 0xab, 0xc1, 0x68, 0x42, 0x00, 0x8c,
-  0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x65, 0xae, 0x01, 0xad, 0x06,
-  0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x75, 0xae, 0x41,
-  0xad, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x85,
-  0xae, 0x81, 0xad, 0x06, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82,
-  0x81, 0xf2, 0xae, 0x41, 0xad, 0x06, 0x74, 0x1a, 0x04, 0xe2, 0x1a, 0x94,
-  0x6a, 0x00, 0xae, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xe0, 0xd4, 0x2c, 0x41,
-  0x89, 0x0c, 0x37, 0x80, 0xd1, 0xb9, 0x06, 0x60, 0x30, 0xcb, 0x40, 0x1f,
-  0x25, 0x12, 0x98, 0x99, 0x06, 0x68, 0x1a, 0xc4, 0x67, 0x38, 0xa2, 0x8c,
-  0xd2, 0x34, 0x20, 0xbe, 0x59, 0x86, 0xfa, 0xc0, 0x8f, 0xc0, 0xd4, 0x34,
-  0x30, 0xa3, 0xf8, 0x58, 0x30, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70, 0xca,
-  0x02, 0x43, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x22, 0x5e, 0x03, 0x1d, 0x6e,
-  0x08, 0xde, 0x35, 0x00, 0x83, 0x59, 0x06, 0xfb, 0xb8, 0x8f, 0xc0, 0x06,
-  0x39, 0x0d, 0xe0, 0x33, 0x4b, 0xc0, 0x1f, 0x16, 0xa7, 0x01, 0x11, 0x9f,
-  0x59, 0x02, 0xfe, 0x18, 0x8e, 0x80, 0x23, 0x39, 0x0d, 0x84, 0x6f, 0x96,
-  0x21, 0x3f, 0xf8, 0x23, 0xb0, 0x38, 0x9a, 0xd3, 0x20, 0x3e, 0x16, 0x38,
-  0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2, 0x20, 0x92, 0x8f, 0x15, 0x41,
-  0x7c, 0x8a, 0xe0, 0xd7, 0x40, 0x87, 0x1b, 0x02, 0x7d, 0x0d, 0xc0, 0x60,
-  0x96, 0x41, 0x3f, 0xf6, 0x23, 0xb0, 0x3d, 0x0d, 0x86, 0xf8, 0xcc, 0x12,
-  0xf0, 0x87, 0x11, 0x7e, 0x1a, 0xc0, 0x67, 0x96, 0x80, 0x3f, 0x06, 0x5a,
-  0x1c, 0xcd, 0x3e, 0xb0, 0xfb, 0x20, 0xf4, 0x43, 0xd8, 0x0f, 0x7b, 0x0c,
-  0xf0, 0xe3, 0x82, 0x61, 0xac, 0x4f, 0x83, 0x50, 0x0d, 0xe2, 0x33, 0x1c,
-  0xd1, 0x47, 0xa2, 0x1a, 0x10, 0xdf, 0x2c, 0x43, 0x7f, 0x80, 0x48, 0x60,
-  0xa3, 0x1a, 0xf8, 0x51, 0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17,
-  0x38, 0x65, 0x81, 0x21, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xa1, 0xb2, 0x81,
-  0x0e, 0x37, 0x04, 0x28, 0x1b, 0x80, 0xc1, 0x2c, 0x83, 0x7f, 0xfc, 0x47,
-  0x60, 0xc3, 0xaa, 0x06, 0xf0, 0x99, 0x25, 0x20, 0x11, 0x43, 0xd5, 0x80,
-  0x88, 0xcf, 0x2c, 0x01, 0x89, 0x0c, 0x47, 0xa0, 0x52, 0xaa, 0x06, 0xc2,
-  0x37, 0xcb, 0x10, 0x22, 0x24, 0x12, 0x58, 0x2a, 0xa9, 0x6a, 0x10, 0x1f,
-  0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4e, 0x59, 0x10, 0xc9, 0xc7,
-  0x8a, 0x20, 0x3e, 0x45, 0xd4, 0x6c, 0xa0, 0xc3, 0x0d, 0xc1, 0xcc, 0x06,
-  0x60, 0x30, 0xcb, 0x20, 0x22, 0x23, 0x12, 0x98, 0xac, 0x06, 0x43, 0x7c,
-  0x66, 0x09, 0x48, 0xc4, 0x88, 0x5b, 0x0d, 0xe0, 0x33, 0x4b, 0x40, 0x22,
-  0x03, 0x2d, 0x8e, 0xe6, 0x1f, 0xd8, 0x7f, 0x10, 0x22, 0x22, 0x8c, 0x88,
-  0x59, 0x06, 0x20, 0x72, 0xc1, 0x30, 0x17, 0x38, 0x75, 0x9b, 0x53, 0x37,
-  0xaa, 0xc1, 0x30, 0x47, 0x9b, 0xc1, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30,
-  0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0, 0x91, 0x6d, 0x20, 0xb3, 0xc1,
-  0xbb, 0x06, 0x60, 0x1b, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c,
-  0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x5c, 0x6b, 0x1b, 0xe4, 0x6c, 0x90, 0x10, 0xc1, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0x6c, 0x1b, 0xe8, 0x6c, 0x90, 0x10,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0x6d, 0x1b, 0xec, 0x6c,
-  0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x28, 0x74, 0x1b,
-  0xe8, 0x6c, 0x90, 0xaf, 0x41, 0x70, 0xb6, 0x81, 0xca, 0x06, 0x65, 0x1b,
-  0x8c, 0x26, 0x04, 0xc0, 0x05, 0x4e, 0xcd, 0x12, 0x94, 0xc8, 0x40, 0x8b,
-  0x63, 0x1a, 0xf0, 0x41, 0xe3, 0xc2, 0x7b, 0xb0, 0x84, 0x7c, 0x08, 0x24,
-  0x42, 0xe3, 0xc2, 0x7c, 0xcc, 0x32, 0x98, 0x08, 0x8a, 0x94, 0xd3, 0x70,
-  0x84, 0x3a, 0xad, 0x6c, 0x30, 0x7c, 0xb7, 0x4e, 0xc3, 0x0c, 0x37, 0x04,
-  0xf6, 0x1a, 0x90, 0x41, 0x0d, 0x81, 0x0e, 0x47, 0xb0, 0xd3, 0xcb, 0x06,
-  0xc3, 0x57, 0x81, 0xa0, 0xe7, 0x4e, 0xc3, 0x0c, 0x37, 0x04, 0xf9, 0x1a,
-  0x90, 0x41, 0x05, 0x83, 0xce, 0x32, 0x9c, 0x08, 0x8f, 0x04, 0xd7, 0xab,
-  0xc1, 0x30, 0xe7, 0x9e, 0xc1, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0xa0, 0xf1, 0x6d, 0xa0, 0xb6, 0xc1, 0xc9, 0x06, 0x78, 0x1b, 0x8c, 0x26,
-  0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31,
-  0x14, 0x71, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xa3, 0x1b,
-  0xc4, 0x6d, 0x70, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c,
-  0xa4, 0x1b, 0xc8, 0x6d, 0xc0, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x5c, 0xa5, 0x1b, 0xcc, 0x6d, 0x20, 0x11, 0xc1, 0x88, 0x81, 0x02,
-  0x80, 0x20, 0x18, 0x28, 0xac, 0x1b, 0xc8, 0x6d, 0x10, 0xb3, 0x41, 0xf0,
-  0xb7, 0x81, 0xd8, 0x06, 0x7d, 0x1b, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x4e,
-  0xcd, 0x12, 0xf0, 0xc8, 0x70, 0x43, 0x3f, 0x95, 0x6e, 0x00, 0x06, 0xb3,
-  0x0c, 0x29, 0xa2, 0x22, 0x41, 0xb1, 0x6c, 0x50, 0xb7, 0x01, 0x5c, 0xe0,
-  0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0xae, 0x1b, 0xd8, 0x6d,
-  0x40, 0x52, 0x65, 0x1b, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0xf4,
-  0xba, 0x81, 0xdd, 0x06, 0x81, 0x70, 0xc1, 0x30, 0xf5, 0xb2, 0x81, 0xde,
-  0x06, 0x70, 0x81, 0x53, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30, 0xcd,
-  0x6e, 0xb0, 0xb7, 0xc1, 0x49, 0xa9, 0x6d, 0x30, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0x13, 0xed, 0x06, 0x7b, 0x1b, 0x04, 0xc2, 0x05, 0xc3, 0x5c,
-  0xe0, 0xd4, 0x1d, 0x4e, 0x1d, 0xbe, 0x06, 0xc3, 0x5c, 0x8a, 0x06, 0xc3,
-  0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81,
-  0x96, 0xbb, 0xc1, 0xe9, 0x06, 0x64, 0x1b, 0xd4, 0x6e, 0x30, 0x9a, 0x10,
-  0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50,
-  0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0x81, 0x6f, 0xe0,
-  0xba, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0x85,
-  0x6f, 0xf0, 0xba, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0x70, 0x89, 0x6f, 0x00, 0xbb, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00,
-  0x82, 0x60, 0xa0, 0xa4, 0x6f, 0xf0, 0xba, 0x81, 0xdb, 0x06, 0x01, 0xef,
-  0x06, 0x7f, 0x1b, 0xe8, 0x6e, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x38, 0x35,
-  0x4b, 0xc0, 0x23, 0xc3, 0x0d, 0x3a, 0x15, 0xbe, 0x01, 0x18, 0xcc, 0x32,
-  0xac, 0x08, 0x8f, 0x04, 0x06, 0xb6, 0x81, 0xd8, 0x06, 0xf1, 0x19, 0x8e,
-  0x00, 0xab, 0xb1, 0x0d, 0x88, 0x6f, 0x96, 0x81, 0x45, 0x5e, 0x24, 0x30,
-  0xb2, 0x0d, 0xc2, 0x2a, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b,
-  0x9c, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x58, 0xdf, 0x40,
-  0x87, 0x1b, 0x82, 0xf4, 0x0d, 0xc0, 0x60, 0x96, 0xa1, 0x45, 0x5c, 0x24,
-  0xb0, 0x81, 0x6d, 0x03, 0xf8, 0xcc, 0x12, 0xcc, 0x88, 0xad, 0x6d, 0x40,
-  0xc4, 0x67, 0x96, 0x60, 0x46, 0x86, 0x23, 0xd6, 0x8a, 0x6d, 0x03, 0xe1,
-  0x9b, 0x65, 0x80, 0x91, 0x19, 0x09, 0x8c, 0xad, 0xda, 0x36, 0x88, 0x8f,
-  0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x2c, 0x88, 0xe4, 0x63,
-  0x45, 0x10, 0x9f, 0x22, 0xec, 0x37, 0xd0, 0xe1, 0x86, 0x80, 0x7e, 0x03,
-  0x30, 0x98, 0x65, 0x88, 0x11, 0x19, 0x09, 0xac, 0x6e, 0x83, 0x21, 0x3e,
-  0xb3, 0x04, 0x33, 0x62, 0x04, 0xde, 0x06, 0xf0, 0x99, 0x25, 0x98, 0x91,
-  0x81, 0x16, 0x47, 0x6b, 0x11, 0xcc, 0x45, 0x88, 0x18, 0x11, 0x64, 0x84,
-  0x5d, 0x83, 0x17, 0xb9, 0x60, 0x18, 0xbb, 0xdb, 0x60, 0x6f, 0x83, 0xf8,
-  0x0c, 0x47, 0xdc, 0x15, 0xdf, 0x06, 0xc4, 0x37, 0xcb, 0x40, 0x23, 0x37,
-  0x12, 0x58, 0xdf, 0x06, 0x78, 0x15, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30,
-  0xcc, 0x05, 0x4e, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x90,
-  0x70, 0xa0, 0xc3, 0x0d, 0x81, 0x08, 0x07, 0x60, 0x30, 0xcb, 0x50, 0x23,
-  0x36, 0x12, 0xd8, 0x50, 0xba, 0x01, 0x7c, 0x66, 0x09, 0x76, 0xc4, 0x44,
-  0x37, 0x20, 0xe2, 0x33, 0x4b, 0xb0, 0x23, 0xc3, 0x11, 0xa2, 0x35, 0xba,
-  0x81, 0xf0, 0xcd, 0x32, 0xe0, 0xc8, 0x8e, 0x04, 0x36, 0x5a, 0xa4, 0x1b,
-  0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x16, 0x44,
-  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0x2f, 0x1c, 0xe8, 0x70, 0x43, 0xd0,
-  0xc2, 0x01, 0x18, 0xcc, 0x32, 0xe4, 0x88, 0x8e, 0x04, 0xc6, 0xba, 0xc1,
-  0x10, 0x9f, 0x59, 0x82, 0x1d, 0x31, 0x22, 0x76, 0x03, 0xf8, 0xcc, 0x12,
-  0xec, 0xc8, 0x40, 0x8b, 0xa3, 0xd5, 0x08, 0x66, 0x23, 0x44, 0x8e, 0x08,
-  0x3a, 0xa2, 0xb3, 0xc1, 0x8d, 0x5c, 0x30, 0xcc, 0x05, 0x4e, 0xdd, 0xe6,
-  0xd4, 0xf5, 0x6d, 0x30, 0xcc, 0xb9, 0x6a, 0x30, 0xcc, 0x11, 0xc3, 0x1c,
-  0x31, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x68, 0x3e, 0x1c, 0xb0,
-  0x70, 0x90, 0xbe, 0x81, 0x0e, 0x07, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20,
-  0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x57, 0x19, 0x07, 0x33, 0x1c, 0x24, 0x44,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x97, 0x19, 0x07, 0x34, 0x1c,
-  0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd7, 0x19, 0x07,
-  0x35, 0x1c, 0x24, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x8a,
-  0x1b, 0x07, 0x34, 0x1c, 0xcc, 0x6f, 0x10, 0x84, 0x71, 0x40, 0xc2, 0xc1,
-  0x0f, 0x07, 0xa3, 0x09, 0x01, 0x70, 0x81, 0x53, 0xb3, 0x04, 0x3c, 0x32,
-  0xd0, 0xe2, 0x98, 0xc6, 0x89, 0xa0, 0xe0, 0x60, 0x22, 0x2c, 0x91, 0x22,
-  0xc2, 0x8e, 0xa0, 0xe0, 0xa0, 0x22, 0xb3, 0x0c, 0x3d, 0xf2, 0x23, 0xbf,
-  0x35, 0x1c, 0xa1, 0x3e, 0x25, 0x1c, 0x0c, 0xdf, 0xad, 0xcf, 0x30, 0xc3,
-  0x0d, 0x01, 0xfc, 0x06, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0xe6, 0x95,
-  0xc2, 0xc1, 0xf0, 0x55, 0x20, 0xe8, 0xa1, 0xd7, 0x30, 0xc3, 0x0d, 0xc1,
-  0xfc, 0x06, 0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c, 0x3e, 0x32, 0x27, 0xc1,
-  0xdd, 0x6e, 0x30, 0xcc, 0xa1, 0x6b, 0x30, 0xcc, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x68, 0x76, 0x1c, 0x90, 0x71, 0x10, 0xc2, 0x81, 0x1c, 0x07,
-  0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a,
-  0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x57,
-  0x1f, 0x07, 0x6b, 0x1c, 0x1c, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08,
-  0x06, 0x97, 0x1f, 0x07, 0x6c, 0x1c, 0x30, 0x44, 0x30, 0x62, 0x80, 0x00,
-  0x20, 0x08, 0x06, 0xd7, 0x1f, 0x07, 0x6d, 0x1c, 0x48, 0x44, 0x30, 0x62,
-  0xa0, 0x00, 0x20, 0x08, 0x06, 0x8a, 0x29, 0x07, 0x6c, 0x1c, 0xac, 0x70,
-  0x10, 0xe4, 0x71, 0xc0, 0xc3, 0xc1, 0x1d, 0x07, 0xa3, 0x09, 0x01, 0x70,
-  0x81, 0x53, 0xb3, 0x04, 0x73, 0x32, 0xdc, 0x70, 0x5f, 0x7f, 0x1c, 0x80,
-  0xc1, 0x2c, 0x03, 0x98, 0x84, 0x49, 0x50, 0x26, 0x1c, 0xbc, 0x71, 0x00,
-  0x17, 0x38, 0x35, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x13, 0x2a, 0x07,
-  0x70, 0x1c, 0x90, 0xd0, 0x0f, 0x07, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0x30, 0xa5, 0x72, 0x00, 0xc7, 0x41, 0x20, 0x5c, 0x30, 0x4c, 0xa5, 0x70,
-  0x40, 0xc7, 0x01, 0x5c, 0xe0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18,
-  0x4c, 0xad, 0x1c, 0xd4, 0x71, 0x10, 0x62, 0x64, 0x1c, 0x8c, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0xc1, 0xe4, 0xca, 0x41, 0x1d, 0x07, 0x81, 0x70, 0xc1,
-  0x30, 0x17, 0x38, 0x75, 0x87, 0x53, 0x27, 0xbf, 0xc1, 0x30, 0x37, 0xb2,
-  0xc1, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82,
-  0x60, 0xa0, 0xcd, 0x72, 0x10, 0xca, 0x81, 0x0f, 0x07, 0xaf, 0x1c, 0x8c,
-  0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02,
-  0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xba,
-  0x1c, 0xa0, 0x72, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x5c, 0xbb, 0x1c, 0xa4, 0x72, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x5c, 0xbc, 0x1c, 0xa8, 0x72, 0x90, 0x10, 0xc1, 0x88, 0x81,
-  0x02, 0x80, 0x20, 0x18, 0x28, 0xe3, 0x1c, 0xa4, 0x72, 0x80, 0xc6, 0x41,
-  0x60, 0xcb, 0x41, 0x1e, 0x07, 0xb4, 0x1c, 0x8c, 0x26, 0x04, 0xc0, 0x05,
-  0x4e, 0xcd, 0x12, 0xcc, 0xc9, 0x70, 0x03, 0x8d, 0xed, 0x72, 0x00, 0x06,
-  0xb3, 0x0c, 0x62, 0x32, 0x27, 0x81, 0xe9, 0x70, 0xc0, 0xc3, 0x41, 0x7c,
-  0x86, 0x23, 0xc0, 0xa8, 0x87, 0x03, 0xe2, 0x9b, 0x65, 0x18, 0x13, 0x33,
-  0x09, 0xcc, 0x87, 0x83, 0x30, 0x8a, 0x8f, 0x05, 0x03, 0x7d, 0x2e, 0x18,
-  0xe6, 0x02, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xca,
-  0x39, 0xd0, 0xe1, 0x86, 0x60, 0x9c, 0x03, 0x30, 0x98, 0x65, 0x20, 0x93,
-  0x32, 0x09, 0x6c, 0x30, 0xe3, 0x00, 0x3e, 0xb3, 0x04, 0x6a, 0x62, 0x65,
-  0x1c, 0x10, 0xf1, 0x99, 0x25, 0x50, 0x93, 0xe1, 0x88, 0x35, 0x32, 0xe3,
-  0x40, 0xf8, 0x66, 0x19, 0xce, 0x44, 0x4d, 0x02, 0x63, 0xa3, 0x33, 0x0e,
-  0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b, 0x22,
-  0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x78, 0x0e, 0x74, 0xb8, 0x21, 0x70,
-  0xe7, 0x00, 0x0c, 0x66, 0x19, 0xd0, 0x24, 0x4d, 0x02, 0x7b, 0xe3, 0x60,
-  0x88, 0xcf, 0x2c, 0x81, 0x9a, 0x18, 0x21, 0xc7, 0x01, 0x7c, 0x66, 0x09,
-  0xd4, 0x64, 0xa0, 0xc5, 0xd1, 0xc8, 0x04, 0x2b, 0x13, 0x02, 0x4d, 0x84,
-  0x34, 0x61, 0xc7, 0xc0, 0x4c, 0x2e, 0x18, 0xc6, 0xe2, 0x38, 0xa8, 0xe3,
-  0x20, 0x3e, 0xc3, 0x11, 0x71, 0x66, 0xc7, 0x01, 0xf1, 0xcd, 0x32, 0xac,
-  0x89, 0x9b, 0x04, 0x76, 0xc7, 0x81, 0x9c, 0xc5, 0xc7, 0x82, 0x81, 0x3e,
-  0x17, 0x0c, 0x73, 0x81, 0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f,
-  0x11, 0xfe, 0x1c, 0xe8, 0x70, 0x43, 0xc0, 0xcf, 0x01, 0x18, 0xcc, 0x32,
-  0xb0, 0x49, 0x9b, 0x04, 0x36, 0xfc, 0x71, 0x00, 0x9f, 0x59, 0x02, 0x39,
-  0x31, 0x3e, 0x0e, 0x88, 0xf8, 0xcc, 0x12, 0xc8, 0xc9, 0x70, 0x04, 0x9f,
-  0xf5, 0x71, 0x20, 0x7c, 0xb3, 0x0c, 0x6f, 0x22, 0x27, 0x81, 0xf5, 0x99,
-  0x1f, 0x07, 0xf1, 0xb1, 0xc0, 0xa1, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94,
-  0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x44, 0x4a, 0x07, 0x3a, 0xdc,
-  0x10, 0x9c, 0x74, 0x00, 0x06, 0xb3, 0x0c, 0x70, 0x12, 0x27, 0x81, 0x99,
-  0x72, 0x30, 0xc4, 0x67, 0x96, 0x40, 0x4e, 0x8c, 0x58, 0xe5, 0x00, 0x3e,
-  0xb3, 0x04, 0x72, 0x32, 0xd0, 0xe2, 0x68, 0x6c, 0x82, 0xb5, 0x09, 0x01,
-  0x27, 0x42, 0x9c, 0xb8, 0x74, 0xe0, 0x26, 0x17, 0x0c, 0x73, 0x81, 0x53,
-  0xb7, 0x39, 0x75, 0x77, 0x1c, 0x0c, 0x73, 0xa8, 0x1b, 0x0c, 0x73, 0xc4,
-  0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x1a, 0x4e,
-  0x07, 0x26, 0x1d, 0x8c, 0x73, 0x40, 0xd3, 0xc1, 0x68, 0x42, 0x00, 0x8c,
-  0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89,
-  0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xf5, 0xd3, 0x41, 0x4b, 0x07,
-  0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x05, 0xd6, 0x81,
-  0x4b, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x15,
-  0xd6, 0xc1, 0x4b, 0x07, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82,
-  0x81, 0x82, 0xd6, 0x81, 0x4b, 0x07, 0xed, 0x1c, 0x04, 0x3b, 0x1d, 0xf8,
-  0x73, 0x90, 0xd3, 0xc1, 0x68, 0x42, 0x00, 0x5c, 0xe0, 0xd4, 0x2c, 0xc1,
-  0x9c, 0x0c, 0xb4, 0x38, 0xa6, 0xe1, 0x23, 0xfc, 0x39, 0xf4, 0x08, 0x4b,
-  0x80, 0x89, 0x20, 0x27, 0xfc, 0x39, 0x84, 0xc9, 0x2c, 0x03, 0x9d, 0xd8,
-  0x49, 0xae, 0x0d, 0x47, 0xa4, 0xcf, 0x3f, 0x07, 0xc3, 0x77, 0xea, 0x33,
-  0xcc, 0x70, 0x43, 0xa0, 0xce, 0x01, 0x19, 0xd4, 0x10, 0xe8, 0x70, 0x04,
-  0xb8, 0x8d, 0x74, 0x30, 0x7c, 0x15, 0x08, 0x7a, 0xe2, 0x36, 0xcc, 0x70,
-  0x43, 0xd0, 0xce, 0x01, 0x19, 0x54, 0x30, 0xe8, 0x2c, 0x43, 0x9d, 0xa8,
-  0x4a, 0x70, 0xb1, 0x1c, 0x0c, 0x73, 0xe2, 0x1b, 0x0c, 0x33, 0x62, 0x70,
-  0x00, 0x20, 0x08, 0x06, 0x1a, 0x5c, 0x07, 0x3e, 0x1d, 0xec, 0x73, 0xc0,
-  0xd6, 0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20,
-  0x8c, 0x26, 0x10, 0x43, 0x11, 0x87, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0xc1, 0x75, 0xd7, 0x41, 0x59, 0x07, 0x07, 0x11, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0xc1, 0x85, 0xd7, 0x81, 0x59, 0x07, 0x0c, 0x11, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0xc1, 0x95, 0xd7, 0xc1, 0x59, 0x07, 0x12, 0x11,
-  0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0x81, 0x02, 0xda, 0x81, 0x59, 0x07,
-  0x25, 0x1d, 0x04, 0x73, 0x1d, 0xd8, 0x74, 0x10, 0xd7, 0xc1, 0x68, 0x42,
-  0x00, 0x5c, 0xe0, 0xd4, 0x2c, 0x81, 0xaa, 0x0c, 0x37, 0xc4, 0x5b, 0x5e,
-  0x07, 0x60, 0x30, 0xcb, 0x70, 0x27, 0x78, 0x12, 0x14, 0x48, 0x07, 0x69,
-  0x1d, 0xc0, 0x05, 0x4e, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x24,
-  0xda, 0x81, 0x5a, 0x07, 0x23, 0x94, 0xd3, 0xc1, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x4c, 0xa3, 0x1d, 0xa8, 0x75, 0x10, 0x08, 0x17, 0x0c, 0x53,
-  0x23, 0x1d, 0xb8, 0x75, 0x00, 0x17, 0x38, 0x35, 0x62, 0x70, 0x00, 0x20,
-  0x08, 0x06, 0xd3, 0x69, 0x07, 0x6f, 0x1d, 0xec, 0x9b, 0x4f, 0x07, 0x23,
-  0x06, 0x07, 0x00, 0x82, 0x60, 0x30, 0xa1, 0x76, 0xf0, 0xd6, 0x41, 0x20,
-  0x5c, 0x30, 0xcc, 0x05, 0x4e, 0xdd, 0xe1, 0xd4, 0xb1, 0x73, 0x30, 0xcc,
-  0xf5, 0x6f, 0x30, 0xcc, 0x11, 0xc3, 0x1c, 0x31, 0xcc, 0x88, 0xc1, 0x01,
-  0x80, 0x20, 0x18, 0x68, 0xad, 0x1d, 0xec, 0x75, 0x80, 0xd3, 0x41, 0x6a,
-  0x07, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30,
-  0x9a, 0x40, 0x0c, 0x45, 0x24, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x17, 0x6d, 0x07, 0xa2, 0x1d, 0x24, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x57, 0x6d, 0x07, 0xa3, 0x1d, 0x24, 0x44, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0x97, 0x6d, 0x07, 0xa4, 0x1d, 0x24, 0x44, 0x30,
-  0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x4a, 0x6f, 0x07, 0xa3, 0x1d, 0x88,
-  0x75, 0x10, 0xc0, 0x76, 0x30, 0xd7, 0x81, 0x6b, 0x07, 0xa3, 0x09, 0x01,
-  0x70, 0x81, 0x53, 0xb3, 0x04, 0xaa, 0x32, 0xdc, 0xe0, 0x72, 0xb5, 0x1d,
-  0x80, 0xc1, 0x2c, 0x43, 0x9e, 0xa8, 0x4a, 0x60, 0x34, 0x1d, 0xd8, 0x74,
-  0x10, 0x9f, 0xe1, 0x88, 0x1f, 0xba, 0xe9, 0x80, 0xf8, 0x66, 0x19, 0xf4,
-  0xa4, 0x4f, 0x02, 0xc3, 0xe9, 0x00, 0x8c, 0xe2, 0x63, 0xc1, 0x40, 0x9f,
-  0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7,
-  0x88, 0xdf, 0x0e, 0x74, 0xb8, 0x21, 0xe8, 0xed, 0x00, 0x0c, 0x66, 0x19,
-  0xf6, 0x84, 0x4f, 0x02, 0x1b, 0xc0, 0x3a, 0x80, 0xcf, 0x2c, 0x41, 0xa8,
-  0xd8, 0x4f, 0x07, 0x44, 0x7c, 0x66, 0x09, 0x42, 0x65, 0x38, 0x42, 0x8d,
-  0xc0, 0x3a, 0x10, 0xbe, 0x59, 0x06, 0x3f, 0x09, 0x95, 0xc0, 0xd6, 0x28,
-  0xac, 0x83, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70, 0xca,
-  0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x42, 0xbd, 0x03, 0x1d, 0x6e,
-  0x08, 0xd0, 0x3b, 0x00, 0x83, 0x59, 0x86, 0x3f, 0x01, 0x95, 0xc0, 0xd2,
-  0x3a, 0x18, 0xe2, 0x33, 0x4b, 0x10, 0x2a, 0x46, 0xb0, 0x75, 0x00, 0x9f,
-  0x59, 0x82, 0x50, 0x19, 0x68, 0x71, 0xb4, 0x3d, 0xc1, 0xf8, 0x84, 0xf8,
-  0x13, 0x01, 0x54, 0xd4, 0x31, 0xe8, 0x93, 0x0b, 0x86, 0xb1, 0xb5, 0x0e,
-  0xde, 0x3a, 0x88, 0xcf, 0x70, 0xc4, 0xda, 0xc1, 0x75, 0x40, 0x7c, 0xb3,
-  0x0c, 0xa2, 0x52, 0x2a, 0x81, 0xc5, 0x75, 0xc0, 0x76, 0xf1, 0xb1, 0x60,
-  0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08,
-  0xe2, 0x53, 0x04, 0x7e, 0x07, 0x3a, 0xdc, 0x10, 0xd8, 0x77, 0x00, 0x06,
-  0xb3, 0x0c, 0xa3, 0x42, 0x2a, 0x81, 0x0d, 0x79, 0x1d, 0xc0, 0x67, 0x96,
-  0x20, 0x55, 0xcc, 0xae, 0x03, 0x22, 0x3e, 0xb3, 0x04, 0xa9, 0x32, 0x1c,
-  0x61, 0x77, 0x77, 0x1d, 0x08, 0xdf, 0x2c, 0x83, 0xa9, 0xa4, 0x4a, 0x60,
-  0x77, 0x87, 0xd7, 0x41, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17,
-  0x38, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x31, 0xe2, 0x81,
-  0x0e, 0x37, 0x04, 0x21, 0x1e, 0x80, 0xc1, 0x2c, 0xc3, 0xa9, 0xa0, 0x4a,
-  0x60, 0xa0, 0x1d, 0x0c, 0xf1, 0x99, 0x25, 0x48, 0x15, 0x23, 0x4a, 0x3b,
-  0x80, 0xcf, 0x2c, 0x41, 0xaa, 0x0c, 0xb4, 0x38, 0xda, 0xa8, 0x60, 0xa4,
-  0x42, 0x9c, 0x8a, 0x80, 0x2a, 0x22, 0x1f, 0x94, 0xca, 0x05, 0xc3, 0x5c,
-  0xe0, 0xd4, 0x6d, 0x4e, 0x5d, 0x5c, 0x07, 0xc3, 0x9c, 0x28, 0x07, 0xc3,
-  0x1c, 0x31, 0xcc, 0x11, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81,
-  0x26, 0xe3, 0x01, 0x88, 0x07, 0xbd, 0x1d, 0xb8, 0x78, 0x30, 0x9a, 0x10,
-  0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50,
-  0x44, 0x22, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xe5, 0x78, 0x70,
-  0xe2, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xe9,
-  0x78, 0x80, 0xe2, 0x41, 0x42, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0x70, 0xed, 0x78, 0x90, 0xe2, 0x41, 0x42, 0x04, 0x23, 0x06, 0x0a, 0x00,
-  0x82, 0x60, 0xa0, 0x88, 0x79, 0x80, 0xe2, 0xc1, 0x79, 0x07, 0x41, 0x8d,
-  0x07, 0xf8, 0x1d, 0xcc, 0x78, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x38, 0x35,
-  0x4b, 0xa0, 0x2a, 0x03, 0x2d, 0x8e, 0x69, 0xd4, 0x09, 0xfc, 0x0e, 0x74,
-  0xc2, 0x12, 0x77, 0x22, 0xa4, 0x0a, 0xfc, 0x0e, 0x78, 0x62, 0xb6, 0x87,
-  0xdf, 0x01, 0x7c, 0x66, 0x19, 0x56, 0xa5, 0x55, 0x68, 0x6f, 0x38, 0x02,
-  0xf7, 0xf4, 0x3b, 0x18, 0xbe, 0xcb, 0xbd, 0x61, 0x86, 0x1b, 0x82, 0xf2,
-  0x0e, 0xc8, 0xa0, 0x86, 0x40, 0x87, 0x23, 0x0a, 0xff, 0x0e, 0x86, 0xaf,
-  0x02, 0x41, 0xef, 0x18, 0x66, 0xb8, 0x21, 0x40, 0xef, 0x80, 0x0c, 0x2a,
-  0x18, 0x74, 0x96, 0x81, 0x55, 0xc2, 0x25, 0x38, 0xd6, 0x0e, 0x86, 0xb9,
-  0x5e, 0x0e, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03, 0x6d, 0xcd,
-  0x83, 0x1c, 0x0f, 0xec, 0x3b, 0x38, 0xf3, 0x60, 0x34, 0x21, 0x00, 0x46,
-  0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x43,
-  0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x92, 0xf3, 0x00, 0xcc, 0x83,
-  0x83, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x9a, 0xf3, 0x20,
-  0xcc, 0x03, 0x86, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0xa2,
-  0xf3, 0x40, 0xcc, 0x03, 0x89, 0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1,
-  0x40, 0xd9, 0xf3, 0x20, 0xcc, 0x03, 0x10, 0x0f, 0x02, 0x37, 0x0f, 0x62,
-  0x3c, 0x60, 0xf3, 0x60, 0x34, 0x21, 0x00, 0x2e, 0x70, 0x6a, 0x96, 0x20,
-  0x5c, 0x86, 0x1b, 0xd8, 0x8f, 0xce, 0x03, 0x30, 0x98, 0x65, 0x70, 0x95,
-  0x57, 0x09, 0x6a, 0xbf, 0x03, 0x32, 0x0f, 0xe0, 0x02, 0xa7, 0x46, 0x0c,
-  0x0e, 0x00, 0x04, 0xc1, 0x60, 0xea, 0xf3, 0xa0, 0xcc, 0x03, 0xf9, 0xa3,
-  0xf1, 0x60, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x26, 0x3f, 0x0f, 0xca,
-  0x3c, 0x08, 0x84, 0x0b, 0x86, 0x29, 0xff, 0x0e, 0xd2, 0x3c, 0x80, 0x0b,
-  0x9c, 0x1a, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x49, 0xd4, 0x03, 0x35,
-  0x0f, 0xc0, 0x20, 0xc7, 0x83, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98,
-  0x46, 0x3d, 0x50, 0xf3, 0x20, 0x10, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0xee,
-  0x70, 0xea, 0xce, 0x3b, 0x18, 0xe6, 0xf0, 0x39, 0x18, 0xe6, 0x88, 0x61,
-  0x8e, 0x18, 0x66, 0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x34, 0x54, 0x0f,
-  0xec, 0x3c, 0x98, 0xf1, 0x80, 0xd4, 0x83, 0xd1, 0x84, 0x00, 0x18, 0x4d,
-  0x10, 0x82, 0xd1, 0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xeb, 0xd5, 0x83, 0x3e, 0x0f, 0x12,
-  0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x0b, 0xd6, 0x03, 0x3f,
-  0x0f, 0x12, 0x22, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x2b, 0xd6,
-  0x83, 0x3f, 0x0f, 0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x03,
-  0x05, 0xd7, 0x03, 0x3f, 0x0f, 0x7a, 0x3c, 0x08, 0x56, 0x3d, 0x70, 0xf3,
-  0x20, 0xd5, 0x83, 0xd1, 0x84, 0x00, 0xb8, 0xc0, 0xa9, 0x59, 0x82, 0x70,
-  0x19, 0x6e, 0x48, 0xc1, 0x00, 0xd6, 0x03, 0x30, 0x98, 0x65, 0x80, 0x95,
-  0x70, 0x09, 0xec, 0xc5, 0x83, 0x18, 0x0f, 0xe2, 0x33, 0x1c, 0xe1, 0x82,
-  0x81, 0x8c, 0x07, 0xc4, 0x37, 0xcb, 0x10, 0x2b, 0xb4, 0x12, 0xd8, 0x8c,
-  0x07, 0x2f, 0x18, 0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81,
-  0x53, 0x16, 0x18, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xba, 0x1e, 0xe8,
-  0x70, 0x43, 0x80, 0xeb, 0x01, 0x18, 0xcc, 0x32, 0xc8, 0xca, 0xac, 0x04,
-  0x36, 0xec, 0x78, 0x00, 0x9f, 0x59, 0x02, 0x5c, 0x31, 0x1d, 0x0f, 0x88,
-  0xf8, 0xcc, 0x12, 0xe0, 0xca, 0x70, 0x44, 0x0e, 0x06, 0x3b, 0x1e, 0x08,
-  0xdf, 0x2c, 0x43, 0xad, 0xe0, 0x4a, 0x60, 0x3a, 0x18, 0xf0, 0x78, 0x10,
-  0x1f, 0x0b, 0x1c, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4e, 0x59, 0x10, 0xc9,
-  0xc7, 0x8a, 0x20, 0x3e, 0x45, 0x94, 0x7b, 0xa0, 0xc3, 0x0d, 0xc1, 0xb8,
-  0x07, 0x60, 0x30, 0xcb, 0x60, 0x2b, 0xb7, 0x12, 0x18, 0x99, 0x07, 0x43,
-  0x7c, 0x66, 0x09, 0x70, 0xc5, 0x88, 0x33, 0x0f, 0xe0, 0x33, 0x4b, 0x80,
-  0x2b, 0x03, 0x2d, 0x8e, 0x26, 0x2b, 0xd8, 0xac, 0x10, 0xb6, 0x22, 0xdc,
-  0x0a, 0x2a, 0x0a, 0xb4, 0x72, 0xc1, 0x30, 0x66, 0xe6, 0x81, 0x9a, 0x07,
-  0xf1, 0x19, 0x8e, 0x80, 0x85, 0x35, 0x0f, 0x88, 0x6f, 0x96, 0x21, 0x57,
-  0x78, 0x25, 0x30, 0x36, 0x0f, 0x62, 0x21, 0x3e, 0x16, 0x0c, 0xf4, 0xb9,
-  0x60, 0x98, 0x0b, 0x9c, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a,
-  0x98, 0xf7, 0x40, 0x87, 0x1b, 0x82, 0x78, 0x0f, 0xc0, 0x60, 0x96, 0x41,
-  0x57, 0x76, 0x25, 0xb0, 0x81, 0xce, 0x03, 0xf8, 0xcc, 0x12, 0x80, 0x8b,
-  0xc5, 0x79, 0x40, 0xc4, 0x67, 0x96, 0x00, 0x5c, 0x86, 0x23, 0x76, 0x41,
-  0xce, 0x03, 0xe1, 0x9b, 0x65, 0xe8, 0x15, 0x70, 0x09, 0x8c, 0x17, 0xe6,
-  0x3c, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x2c,
-  0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22, 0xfc, 0x3d, 0xd0, 0xe1, 0x86,
-  0x80, 0xdf, 0x03, 0x30, 0x98, 0x65, 0xf0, 0x95, 0x5f, 0x09, 0x6c, 0xcf,
-  0x83, 0x21, 0x3e, 0xb3, 0x04, 0xe0, 0x62, 0x04, 0xa8, 0x07, 0xf0, 0x99,
-  0x25, 0x00, 0x97, 0x81, 0x16, 0x47, 0xd3, 0x15, 0x6c, 0x57, 0x08, 0x5f,
-  0x11, 0x7e, 0x85, 0x35, 0x78, 0xe5, 0x82, 0x61, 0x2e, 0x70, 0xea, 0x36,
-  0xa7, 0x8e, 0xcd, 0x83, 0x61, 0xae, 0xaf, 0x83, 0x61, 0x8e, 0x18, 0xe6,
-  0x88, 0x61, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x40, 0x6b, 0xf9, 0x60,
-  0xdf, 0x03, 0x5c, 0x0f, 0x52, 0x3e, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04,
-  0x21, 0x18, 0x4d, 0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11,
-  0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0x68, 0x3e, 0x10, 0xf9, 0x20, 0x21,
-  0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0x6a, 0x3e, 0x18, 0xf9,
-  0x20, 0x21, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0x6c, 0x3e,
-  0x20, 0xf9, 0x20, 0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x50,
-  0x7a, 0x3e, 0x18, 0xf9, 0x40, 0xdc, 0x83, 0x00, 0xe6, 0x83, 0x79, 0x0f,
-  0x5c, 0x3e, 0x18, 0x4d, 0x08, 0x80, 0x0b, 0x9c, 0x9a, 0x25, 0x08, 0x97,
-  0x81, 0x16, 0xc7, 0x34, 0x58, 0xc5, 0xbc, 0x87, 0x55, 0x61, 0x09, 0x57,
-  0x11, 0xc0, 0xc5, 0xbc, 0x87, 0x57, 0x99, 0x65, 0x10, 0x17, 0x72, 0x71,
-  0xc5, 0x60, 0x38, 0x62, 0x16, 0x03, 0x7a, 0x0f, 0x86, 0xef, 0x68, 0x31,
-  0x18, 0x66, 0xb8, 0x21, 0xf8, 0xf5, 0x80, 0x0c, 0x6a, 0x08, 0x74, 0x38,
-  0xe2, 0x1f, 0xf0, 0x3d, 0x18, 0xbe, 0x0a, 0x04, 0xbd, 0x90, 0x18, 0x66,
-  0xb8, 0x21, 0x10, 0xf7, 0x80, 0x0c, 0x2a, 0x18, 0x74, 0x96, 0x61, 0x5c,
-  0xf0, 0x25, 0x38, 0x53, 0x0f, 0x86, 0xb9, 0xdb, 0x0e, 0x86, 0x19, 0x31,
-  0x38, 0x00, 0x10, 0x04, 0x03, 0xad, 0xec, 0x83, 0x99, 0x0f, 0xe0, 0x3d,
-  0x08, 0xfb, 0x60, 0x34, 0x21, 0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61,
-  0x10, 0x46, 0x13, 0x88, 0xa1, 0x88, 0x43, 0x46, 0x0c, 0x10, 0x00, 0x04,
-  0xc1, 0xe0, 0x62, 0xfb, 0x40, 0xe7, 0x83, 0x83, 0x08, 0x46, 0x0c, 0x10,
-  0x00, 0x04, 0xc1, 0xe0, 0x6a, 0xfb, 0x60, 0xe7, 0x03, 0x86, 0x08, 0x46,
-  0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x72, 0xfb, 0x80, 0xe7, 0x03, 0x89,
-  0x08, 0x46, 0x0c, 0x14, 0x00, 0x04, 0xc1, 0x40, 0xa9, 0xfb, 0x60, 0xe7,
-  0x03, 0x7d, 0x0f, 0x02, 0xb4, 0x0f, 0x56, 0x3e, 0x30, 0xfb, 0x60, 0x34,
-  0x21, 0x00, 0x2e, 0x70, 0x6a, 0x96, 0x00, 0x5f, 0x86, 0x1b, 0xcc, 0x31,
-  0x70, 0xfb, 0x00, 0x0c, 0x66, 0x19, 0xca, 0xc5, 0x5c, 0x82, 0xaa, 0xf7,
-  0xc0, 0xe7, 0x03, 0xb8, 0xc0, 0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30,
-  0x98, 0xee, 0x3e, 0xf8, 0xf9, 0xa0, 0x1d, 0x03, 0x97, 0x0f, 0x46, 0x0c,
-  0x0e, 0x00, 0x04, 0xc1, 0x60, 0xc2, 0xfb, 0xe0, 0xe7, 0x83, 0x40, 0xb8,
-  0x60, 0x98, 0xc2, 0xf7, 0x60, 0xec, 0x03, 0xb8, 0xc0, 0xa9, 0x11, 0x83,
-  0x03, 0x00, 0x41, 0x30, 0x98, 0xf8, 0x3e, 0x20, 0xfb, 0x40, 0x27, 0x66,
-  0x3e, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0xa9, 0xef, 0x03, 0xb2,
-  0x0f, 0x02, 0xe1, 0x82, 0x61, 0x2e, 0x70, 0xea, 0x0e, 0xa7, 0x2e, 0xdc,
-  0x83, 0x61, 0x4e, 0xbe, 0x83, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x40, 0x13, 0xfd, 0x00, 0xee, 0x83, 0x96,
-  0x0f, 0xfc, 0x3e, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d,
-  0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0xb8, 0x52, 0x3f, 0xb8, 0xfb, 0x20, 0x21, 0x82, 0x11, 0x03,
-  0x04, 0x00, 0x41, 0x30, 0xb8, 0x54, 0x3f, 0xc0, 0xfb, 0x20, 0x21, 0x82,
-  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0x56, 0x3f, 0xc8, 0xfb, 0x20,
-  0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x50, 0x64, 0x3f, 0xc0,
-  0xfb, 0xe0, 0xe6, 0x83, 0xa0, 0xf4, 0x03, 0xb4, 0x0f, 0x46, 0x3f, 0x18,
-  0x4d, 0x08, 0x80, 0x0b, 0x9c, 0x9a, 0x25, 0xc0, 0x97, 0xe1, 0x86, 0x91,
-  0x0c, 0x54, 0x3f, 0x00, 0x83, 0x59, 0x86, 0x73, 0xc1, 0x97, 0xc0, 0x52,
-  0x3e, 0x58, 0xf9, 0x20, 0x3e, 0xc3, 0x11, 0x29, 0x19, 0xb0, 0x7c, 0x40,
-  0x7c, 0xb3, 0x0c, 0xe8, 0xb2, 0x2e, 0x81, 0xb5, 0x7c, 0xa0, 0x92, 0x41,
-  0x7c, 0x2c, 0x18, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x81, 0x21,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x41, 0xfb, 0x81, 0x0e, 0x37, 0x04, 0xb2,
-  0x1f, 0x80, 0xc1, 0x2c, 0x43, 0xba, 0xa8, 0x4b, 0x60, 0x43, 0xcd, 0x07,
-  0xf0, 0x99, 0x25, 0x78, 0x17, 0xa3, 0xf9, 0x80, 0x88, 0xcf, 0x2c, 0xc1,
-  0xbb, 0x0c, 0x47, 0xd0, 0x64, 0x50, 0xf3, 0x81, 0xf0, 0xcd, 0x32, 0xb0,
-  0xcb, 0xbb, 0x04, 0x56, 0x93, 0x81, 0xcd, 0x07, 0xf1, 0xb1, 0xc0, 0xa1,
-  0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94, 0x05, 0x91, 0x7c, 0xac, 0x08, 0xe2,
-  0x53, 0xc4, 0xef, 0x07, 0x3a, 0xdc, 0x10, 0xf4, 0x7e, 0x00, 0x06, 0xb3,
-  0x0c, 0xed, 0xe2, 0x2e, 0x81, 0xf9, 0x7c, 0x30, 0xc4, 0x67, 0x96, 0xe0,
-  0x5d, 0x8c, 0x08, 0xfb, 0x00, 0x3e, 0xb3, 0x04, 0xef, 0x32, 0xd0, 0xe2,
-  0x68, 0xe9, 0x82, 0xa9, 0x0b, 0xd1, 0x2e, 0x82, 0xbb, 0xf8, 0xa9, 0xb0,
-  0x2e, 0x17, 0x0c, 0x63, 0x60, 0x1f, 0x90, 0x7d, 0x10, 0x9f, 0xe1, 0x08,
-  0xd5, 0x28, 0xfb, 0x80, 0xf8, 0x66, 0x19, 0xe0, 0x65, 0x5e, 0x02, 0x33,
-  0xfb, 0x60, 0x35, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0,
-  0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0xf6, 0x0f, 0x74,
-  0xb8, 0x21, 0x58, 0xff, 0x00, 0x0c, 0x66, 0x19, 0xe2, 0x45, 0x5e, 0x02,
-  0x1b, 0xdc, 0x3e, 0x80, 0xcf, 0x2c, 0xc1, 0xbd, 0xd8, 0xda, 0x07, 0x44,
-  0x7c, 0x66, 0x09, 0xee, 0x65, 0x38, 0xa2, 0x36, 0xd8, 0x3e, 0x10, 0xbe,
-  0x59, 0x06, 0x7a, 0xb9, 0x97, 0xc0, 0x6c, 0xa3, 0xed, 0x83, 0xf8, 0x58,
-  0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70, 0xca, 0x82, 0x48, 0x3e, 0x56,
-  0x04, 0xf1, 0x29, 0x02, 0xff, 0x03, 0x1d, 0x6e, 0x08, 0xec, 0x3f, 0x00,
-  0x83, 0x59, 0x86, 0x7a, 0xb1, 0x97, 0xc0, 0xea, 0x3e, 0x18, 0xe2, 0x33,
-  0x4b, 0x70, 0x2f, 0x46, 0xe8, 0x7d, 0x00, 0x9f, 0x59, 0x82, 0x7b, 0x19,
-  0x68, 0x71, 0xb4, 0x78, 0xc1, 0xe4, 0x85, 0xa8, 0x17, 0xc1, 0x5e, 0x40,
-  0x67, 0x5e, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x6e, 0x73, 0xea, 0xcc, 0x3e,
-  0x18, 0xe6, 0xee, 0x3c, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66, 0xc4,
-  0xe0, 0x00, 0x40, 0x10, 0x0c, 0xb4, 0x13, 0x14, 0xea, 0x3f, 0x90, 0xfd,
-  0x60, 0x04, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1, 0x84,
-  0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00, 0x10,
-  0x04, 0x83, 0xcb, 0x05, 0x05, 0xfe, 0x0f, 0x12, 0x22, 0x18, 0x31, 0x40,
-  0x00, 0x10, 0x04, 0x83, 0xeb, 0x05, 0x85, 0xfe, 0x0f, 0x12, 0x22, 0x18,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x0b, 0x06, 0x05, 0xff, 0x0f, 0x12,
-  0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x03, 0xe5, 0x06, 0x85, 0xfe,
-  0x0f, 0x78, 0x3f, 0x08, 0x54, 0x50, 0x68, 0xff, 0x00, 0x05, 0x85, 0xd1,
-  0x84, 0x00, 0xb8, 0xc0, 0xa9, 0x59, 0x02, 0x7c, 0x19, 0x68, 0x71, 0x4c,
-  0x63, 0x5c, 0x74, 0x7f, 0x10, 0x17, 0x96, 0x28, 0x17, 0xe1, 0x5e, 0x74,
-  0x7f, 0x30, 0x97, 0x59, 0x86, 0x7c, 0xd9, 0x17, 0xd4, 0x0c, 0x86, 0x23,
-  0x66, 0xcf, 0xfd, 0x83, 0xe1, 0x3b, 0xda, 0x1b, 0x66, 0xb8, 0x21, 0xc8,
-  0xfd, 0x80, 0x0c, 0x6a, 0x08, 0x74, 0x38, 0x22, 0x3f, 0xe4, 0x3f, 0x18,
-  0xbe, 0x0a, 0x04, 0xbd, 0xfd, 0x18, 0x66, 0xb8, 0x21, 0xe0, 0xfd, 0x80,
-  0x0c, 0x2a, 0x18, 0x74, 0x96, 0x41, 0x5f, 0x5e, 0x26, 0x38, 0xd0, 0x0f,
-  0x86, 0xb9, 0x58, 0x0f, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03,
-  0xed, 0x07, 0x85, 0x16, 0x14, 0xd4, 0x3f, 0xd8, 0x41, 0x61, 0x34, 0x21,
-  0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1,
-  0x88, 0x43, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x32, 0x43, 0x81,
-  0x06, 0x85, 0x83, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x3a,
-  0x43, 0xa1, 0x06, 0x05, 0x86, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
-  0xe0, 0x42, 0x43, 0xc1, 0x06, 0x05, 0x89, 0x08, 0x46, 0x0c, 0x14, 0x00,
-  0x04, 0xc1, 0x40, 0x79, 0x43, 0xa1, 0x06, 0x05, 0xfa, 0x0f, 0x02, 0x31,
-  0x14, 0x4a, 0x50, 0x00, 0x43, 0x61, 0x34, 0x21, 0x00, 0x2e, 0x70, 0x6a,
-  0x96, 0xe0, 0x65, 0x86, 0x1b, 0xc0, 0x33, 0x40, 0x43, 0x01, 0x0c, 0x66,
-  0x19, 0xf8, 0xa5, 0x5f, 0x82, 0x7a, 0xff, 0x00, 0x07, 0x05, 0xb8, 0xc0,
-  0xa9, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0xe2, 0x50, 0xc8, 0x41,
-  0xa1, 0xfd, 0x50, 0x50, 0x18, 0x31, 0x38, 0x00, 0x10, 0x04, 0x83, 0x49,
-  0x0e, 0x85, 0x1c, 0x14, 0x02, 0xe1, 0x82, 0x61, 0x4a, 0xfe, 0x83, 0x1e,
-  0x14, 0xe0, 0x02, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x60, 0xb2,
-  0x43, 0xc1, 0x07, 0x05, 0x1a, 0x69, 0x41, 0x61, 0xc4, 0xe0, 0x00, 0x40,
-  0x10, 0x0c, 0xa6, 0x3b, 0x14, 0x7c, 0x50, 0x08, 0x84, 0x0b, 0x86, 0xb9,
-  0xc0, 0xa9, 0x3b, 0x9c, 0xba, 0xdd, 0x0f, 0x86, 0x39, 0x76, 0x0f, 0x86,
-  0x39, 0x62, 0x98, 0x23, 0x86, 0x19, 0x31, 0x38, 0x00, 0x10, 0x04, 0x03,
-  0x8d, 0x0f, 0x05, 0x35, 0x14, 0x4e, 0x50, 0xc0, 0x43, 0x61, 0x34, 0x21,
-  0x00, 0x46, 0x13, 0x84, 0x60, 0x34, 0x61, 0x10, 0x46, 0x13, 0x88, 0xa1,
-  0x88, 0x44, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x1a, 0x45, 0x21,
-  0x0e, 0x85, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0xe0, 0x22,
-  0x45, 0x41, 0x0e, 0x85, 0x84, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1,
-  0xe0, 0x2a, 0x45, 0x61, 0x0e, 0x85, 0x84, 0x08, 0x46, 0x0c, 0x14, 0x00,
-  0x04, 0xc1, 0x40, 0x61, 0x45, 0x41, 0x0e, 0x85, 0x18, 0x14, 0x82, 0x3f,
-  0x14, 0xc4, 0x50, 0xe8, 0x43, 0x61, 0x34, 0x21, 0x00, 0x2e, 0x70, 0x6a,
-  0x96, 0xe0, 0x65, 0x86, 0x1b, 0xfa, 0x33, 0x20, 0x45, 0x01, 0x0c, 0x66,
-  0x19, 0xfc, 0xe5, 0x65, 0x02, 0x1b, 0x41, 0xa1, 0x04, 0x85, 0xf8, 0x0c,
-  0x47, 0xa4, 0x60, 0x60, 0x82, 0x02, 0xf1, 0xcd, 0x32, 0xfc, 0x8b, 0xc8,
-  0x04, 0x76, 0x82, 0x82, 0x0a, 0x06, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05,
-  0xc3, 0x5c, 0xe0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x84,
-  0x2b, 0x0a, 0x3a, 0xdc, 0x10, 0xb0, 0xa2, 0x00, 0x06, 0xb3, 0x0c, 0x20,
-  0x13, 0x32, 0x81, 0x0d, 0x2f, 0x28, 0xc0, 0x67, 0x96, 0xc0, 0x64, 0xcc,
-  0x05, 0x05, 0x22, 0x3e, 0xb3, 0x04, 0x26, 0x33, 0x1c, 0x41, 0x83, 0xc1,
-  0x0b, 0x0a, 0xc2, 0x37, 0xcb, 0x30, 0x32, 0x26, 0x13, 0x58, 0x0d, 0x06,
-  0x30, 0x28, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53,
-  0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xb9, 0x28, 0xe8, 0x70,
-  0x43, 0x70, 0x8b, 0x02, 0x18, 0xcc, 0x32, 0x90, 0x4c, 0xc9, 0x04, 0x86,
-  0x83, 0xc2, 0x10, 0x9f, 0x59, 0x02, 0x93, 0x31, 0x62, 0x07, 0x05, 0xf8,
-  0xcc, 0x12, 0x98, 0xcc, 0x40, 0x8b, 0xa3, 0x81, 0x0c, 0x16, 0x32, 0x04,
-  0xc9, 0x08, 0x25, 0xe3, 0x87, 0x82, 0xc8, 0x5c, 0x30, 0x8c, 0xe9, 0xa0,
-  0xe0, 0x83, 0x42, 0x7c, 0x86, 0x23, 0x48, 0xe5, 0x07, 0x05, 0xe2, 0x9b,
-  0x65, 0x38, 0x19, 0x95, 0x09, 0x0c, 0x0c, 0x85, 0x52, 0x89, 0x8f, 0x05,
-  0x03, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x2c, 0x30, 0xe4, 0x63, 0x45,
-  0x10, 0x9f, 0x22, 0xce, 0x51, 0xd0, 0xe1, 0x86, 0xa0, 0x1c, 0x05, 0x30,
-  0x98, 0x65, 0x40, 0x99, 0x94, 0x09, 0x6c, 0x40, 0x43, 0x01, 0x3e, 0xb3,
-  0x04, 0x2e, 0x63, 0x65, 0x28, 0x10, 0xf1, 0x99, 0x25, 0x70, 0x99, 0xe1,
-  0x88, 0x57, 0x31, 0x43, 0x41, 0xf8, 0x66, 0x19, 0x56, 0xc6, 0x65, 0x02,
-  0x83, 0x95, 0x33, 0x14, 0xe2, 0x63, 0x81, 0x43, 0x9f, 0x0b, 0x86, 0xb9,
-  0xc0, 0x29, 0x0b, 0x22, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x08, 0x79, 0x14,
-  0x74, 0xb8, 0x21, 0x80, 0x47, 0x01, 0x0c, 0x66, 0x19, 0x58, 0xa6, 0x65,
-  0x02, 0x7b, 0x43, 0x61, 0x88, 0xcf, 0x2c, 0x81, 0xcb, 0x18, 0x41, 0x87,
-  0x02, 0x7c, 0x66, 0x09, 0x5c, 0x66, 0xa0, 0xc5, 0xd1, 0x50, 0x06, 0x4b,
-  0x19, 0x82, 0x65, 0x84, 0x96, 0xa1, 0x2b, 0x95, 0xb9, 0x60, 0x98, 0x0b,
-  0x9c, 0xba, 0xcd, 0xa9, 0x03, 0x43, 0x61, 0x98, 0x8b, 0xfb, 0x60, 0x98,
-  0x23, 0x86, 0x39, 0x62, 0x98, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0xd0,
-  0x42, 0x52, 0x78, 0x47, 0x81, 0x15, 0x85, 0x7e, 0x14, 0x46, 0x13, 0x02,
-  0x60, 0x34, 0x41, 0x08, 0x46, 0x13, 0x06, 0x61, 0x34, 0x81, 0x18, 0x8a,
-  0x48, 0x64, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x2e, 0x94, 0x14, 0xec,
-  0x51, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0xae, 0x94,
-  0x14, 0xee, 0x51, 0x48, 0x88, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c,
-  0x2e, 0x95, 0x14, 0xf0, 0x51, 0x48, 0x88, 0x60, 0xc4, 0x40, 0x01, 0x40,
-  0x10, 0x0c, 0x94, 0x98, 0x14, 0xee, 0x51, 0xb0, 0x45, 0x21, 0x20, 0x49,
-  0xe1, 0x1c, 0x05, 0x91, 0x14, 0x46, 0x13, 0x02, 0xe0, 0x02, 0xa7, 0x66,
-  0x09, 0x5e, 0x66, 0xa0, 0xc5, 0x31, 0x0d, 0x7d, 0x71, 0x4d, 0x22, 0x5f,
-  0x58, 0x82, 0x5f, 0x04, 0x97, 0x71, 0x4d, 0xa2, 0x5f, 0x66, 0x19, 0x60,
-  0x46, 0x66, 0x44, 0x35, 0x18, 0x8e, 0x90, 0x3d, 0x74, 0x14, 0x86, 0xef,
-  0x66, 0x6f, 0x98, 0xe1, 0x86, 0x60, 0x16, 0x05, 0x32, 0xa8, 0x21, 0xd0,
-  0xe1, 0x88, 0x79, 0x61, 0x47, 0x61, 0xf8, 0x2a, 0x10, 0xf4, 0xea, 0x65,
-  0x98, 0xe1, 0x86, 0xc0, 0x16, 0x05, 0x32, 0xa8, 0x60, 0xd0, 0x59, 0x86,
-  0x98, 0x31, 0x9b, 0xe0, 0xf4, 0x50, 0x18, 0xe6, 0x56, 0x3f, 0x18, 0x66,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0xb4, 0x9c, 0x14, 0x4e, 0x52, 0x20,
-  0x47, 0xa1, 0x26, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1,
-  0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x0e, 0x19, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x83, 0x0b, 0x2c, 0x05, 0x97, 0x14, 0x0e, 0x22, 0x18, 0x31,
-  0x40, 0x00, 0x10, 0x04, 0x83, 0x2b, 0x2c, 0x85, 0x97, 0x14, 0x18, 0x22,
-  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x4b, 0x2c, 0x05, 0x98, 0x14,
-  0x24, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x03, 0x25, 0x2d, 0x85,
-  0x97, 0x14, 0xdc, 0x51, 0x08, 0x78, 0x52, 0xf8, 0x47, 0x41, 0x27, 0x85,
-  0xd1, 0x84, 0x00, 0xb8, 0xc0, 0xa9, 0x59, 0x02, 0xb3, 0x19, 0x6e, 0xd0,
-  0xd5, 0x40, 0x2c, 0x05, 0x30, 0x98, 0x65, 0x98, 0x19, 0x9a, 0x09, 0x2a,
-  0x1d, 0x05, 0x99, 0x14, 0xe0, 0x02, 0xa7, 0x46, 0x0c, 0x0e, 0x00, 0x04,
-  0xc1, 0x60, 0x5a, 0x4b, 0x61, 0x26, 0x05, 0xf6, 0x13, 0x49, 0x61, 0xc4,
-  0xe0, 0x00, 0x40, 0x10, 0x0c, 0x26, 0xb6, 0x14, 0x66, 0x52, 0x08, 0x84,
-  0x0b, 0x86, 0x29, 0x76, 0x14, 0x6e, 0x52, 0x80, 0x0b, 0x9c, 0x1a, 0x31,
-  0x38, 0x00, 0x10, 0x04, 0x83, 0x09, 0x2e, 0x05, 0x9c, 0x14, 0x5c, 0xe6,
-  0x24, 0x85, 0x11, 0x83, 0x03, 0x00, 0x41, 0x30, 0x98, 0xe2, 0x52, 0xc0,
-  0x49, 0x21, 0x10, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0xee, 0x70, 0xea, 0x6a,
-  0x51, 0x18, 0xe6, 0xcc, 0x3f, 0x18, 0xe6, 0x88, 0x61, 0x8e, 0x18, 0x66,
-  0xc4, 0xe0, 0x00, 0x40, 0x10, 0x0c, 0x34, 0xbb, 0x14, 0xc8, 0x52, 0x08,
-  0x49, 0x41, 0x2e, 0x85, 0xd1, 0x84, 0x00, 0x18, 0x4d, 0x10, 0x82, 0xd1,
-  0x84, 0x41, 0x18, 0x4d, 0x20, 0x86, 0x22, 0x12, 0x19, 0x31, 0x40, 0x00,
-  0x10, 0x04, 0x83, 0xab, 0x2f, 0x85, 0xb5, 0x14, 0x12, 0x22, 0x18, 0x31,
-  0x40, 0x00, 0x10, 0x04, 0x83, 0xcb, 0x2f, 0x05, 0xb6, 0x14, 0x12, 0x22,
-  0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xeb, 0x2f, 0x85, 0xb6, 0x14,
-  0x12, 0x22, 0x18, 0x31, 0x50, 0x00, 0x10, 0x04, 0x03, 0xc5, 0x34, 0x05,
-  0xb6, 0x14, 0x56, 0x52, 0x08, 0xf2, 0x52, 0xe0, 0x49, 0xe1, 0x2e, 0x85,
-  0xd1, 0x84, 0x00, 0xb8, 0xc0, 0xa9, 0x59, 0x02, 0xb3, 0x19, 0x6e, 0xb8,
-  0xd7, 0xc0, 0x2f, 0x05, 0x30, 0x98, 0x65, 0xa8, 0x19, 0xb3, 0x09, 0xac,
-  0x1f, 0x85, 0x7f, 0x14, 0xe2, 0x33, 0x1c, 0x81, 0x82, 0x01, 0x48, 0x0a,
-  0xc4, 0x37, 0xcb, 0x60, 0x33, 0x39, 0x13, 0x58, 0x48, 0x0a, 0x29, 0x18,
-  0xc4, 0xc7, 0x82, 0x81, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x16, 0x18,
-  0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xa8, 0x29, 0xe8, 0x70, 0x43, 0x60,
-  0x9a, 0x02, 0x18, 0xcc, 0x32, 0xdc, 0x0c, 0xce, 0x04, 0x36, 0xa4, 0xa4,
-  0x00, 0x9f, 0x59, 0x82, 0x9e, 0x31, 0x94, 0x14, 0x88, 0xf8, 0xcc, 0x12,
-  0xf4, 0xcc, 0x70, 0xc4, 0x0c, 0x06, 0x29, 0x29, 0x08, 0xdf, 0x2c, 0x83,
-  0xce, 0xf4, 0x4c, 0x60, 0x34, 0x18, 0xa8, 0xa4, 0x10, 0x1f, 0x0b, 0x1c,
-  0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4e, 0x59, 0x10, 0xc9, 0xc7, 0x8a, 0x20,
-  0x3e, 0x45, 0xcc, 0xa6, 0xa0, 0xc3, 0x0d, 0x41, 0x6c, 0x0a, 0x60, 0x30,
-  0xcb, 0xb0, 0x33, 0x3c, 0x13, 0x98, 0x4c, 0x0a, 0x43, 0x7c, 0x66, 0x09,
-  0x7a, 0xc6, 0x88, 0x9a, 0x14, 0xe0, 0x33, 0x4b, 0xd0, 0x33, 0x03, 0x2d,
-  0x8e, 0x76, 0x33, 0x18, 0xce, 0x10, 0x3b, 0x23, 0xf0, 0x0c, 0x1f, 0x0a,
-  0x39, 0x73, 0xc1, 0x30, 0x46, 0x93, 0x02, 0x4e, 0x0a, 0xf1, 0x19, 0x8e,
-  0xf0, 0x9b, 0x9c, 0x14, 0x88, 0x6f, 0x96, 0xc1, 0x67, 0xc2, 0x26, 0x30,
-  0x9d, 0x14, 0xfe, 0x26, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b,
-  0x9c, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x08, 0x4f, 0x41,
-  0x87, 0x1b, 0x82, 0xdf, 0x14, 0xc0, 0x60, 0x96, 0xe1, 0x67, 0xc0, 0x26,
-  0xb0, 0x41, 0x2c, 0x05, 0xf8, 0xcc, 0x12, 0x94, 0x8d, 0xfd, 0xa4, 0x40,
-  0xc4, 0x67, 0x96, 0xa0, 0x6c, 0x86, 0x23, 0x52, 0x07, 0x2c, 0x05, 0xe1,
-  0x9b, 0x65, 0x10, 0x9b, 0xb2, 0x09, 0x4c, 0x75, 0xc2, 0x52, 0x88, 0x8f,
-  0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x2c, 0x88, 0xe4, 0x63,
-  0x45, 0x10, 0x9f, 0x22, 0xd8, 0x53, 0xd0, 0xe1, 0x86, 0x40, 0x3d, 0x05,
-  0x30, 0x98, 0x65, 0x18, 0x1b, 0xb2, 0x09, 0x2c, 0x2d, 0x85, 0x21, 0x3e,
-  0xb3, 0x04, 0x65, 0x63, 0x84, 0x5b, 0x0a, 0xf0, 0x99, 0x25, 0x28, 0x9b,
-  0x81, 0x16, 0x47, 0xfb, 0x19, 0x0c, 0x6c, 0x88, 0xb1, 0x11, 0xc8, 0x06,
-  0xed, 0xc2, 0xe6, 0x82, 0x61, 0x2e, 0x70, 0xea, 0x36, 0xa7, 0x4e, 0x27,
-  0x85, 0x61, 0x6e, 0x0d, 0x85, 0x61, 0x8e, 0x18, 0xe6, 0x88, 0x61, 0x46,
-  0x0c, 0x0e, 0x00, 0x04, 0xc1, 0x40, 0xdb, 0x4f, 0x21, 0x3d, 0x05, 0xd3,
-  0x14, 0xee, 0x53, 0x18, 0x4d, 0x08, 0x80, 0xd1, 0x04, 0x21, 0x18, 0x4d,
-  0x18, 0x84, 0xd1, 0x04, 0x62, 0x28, 0x22, 0x91, 0x11, 0x03, 0x04, 0x00,
-  0x41, 0x30, 0xb8, 0x44, 0x54, 0x80, 0x4f, 0x21, 0x21, 0x82, 0x11, 0x03,
-  0x04, 0x00, 0x41, 0x30, 0xb8, 0x46, 0x54, 0x88, 0x4f, 0x21, 0x21, 0x82,
-  0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0xb8, 0x48, 0x54, 0x90, 0x4f, 0x21,
-  0x21, 0x82, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30, 0x50, 0x56, 0x54, 0x88,
-  0x4f, 0x01, 0x36, 0x85, 0xc0, 0x3f, 0x85, 0xf0, 0x14, 0xf8, 0x53, 0x18,
-  0x4d, 0x08, 0x80, 0x0b, 0x9c, 0x9a, 0x25, 0x30, 0x9b, 0x81, 0x16, 0xc7,
-  0x34, 0x62, 0x46, 0x74, 0x09, 0x98, 0x61, 0x89, 0x99, 0x11, 0xca, 0x46,
-  0x74, 0x09, 0x9a, 0xb1, 0xbf, 0x0d, 0x6e, 0x53, 0x80, 0xcf, 0x2c, 0xc3,
-  0xd9, 0xa4, 0x4d, 0xdf, 0x06, 0xc3, 0x11, 0xa1, 0x1b, 0x8c, 0xa7, 0x30,
-  0x7c, 0x27, 0xba, 0xc1, 0x30, 0xc3, 0x0d, 0x81, 0x6b, 0x0a, 0x64, 0x50,
-  0x43, 0xa0, 0xc3, 0x11, 0xc5, 0x79, 0x0a, 0xc3, 0x57, 0x81, 0xa0, 0x77,
-  0x0c, 0x33, 0xdc, 0x10, 0xc4, 0xa6, 0x40, 0x06, 0x15, 0x0c, 0x3a, 0xcb,
-  0x80, 0x36, 0x7d, 0x13, 0x5c, 0x5d, 0x0a, 0xc3, 0x9c, 0x29, 0x0a, 0xc3,
-  0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81, 0x46, 0xa3, 0x82, 0x88, 0x0a,
-  0xbf, 0x29, 0xc0, 0xa8, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x30,
-  0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50, 0xc4, 0x21, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0x70, 0xed, 0xa8, 0x90, 0xa2, 0xc2, 0x41, 0x04, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xf1, 0xa8, 0xa0, 0xa2, 0x02, 0x43,
-  0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xf5, 0xa8, 0xb0, 0xa2,
-  0x82, 0x44, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xa0, 0x90, 0xa9,
-  0xa0, 0xa2, 0x42, 0x7a, 0x0a, 0xc1, 0x8d, 0x0a, 0xfa, 0x29, 0xd4, 0xa8,
-  0x30, 0x9a, 0x10, 0x00, 0x17, 0x38, 0x35, 0x4b, 0xd0, 0x37, 0xc3, 0x0d,
-  0xb5, 0x1b, 0xf4, 0xa8, 0x00, 0x06, 0xb3, 0x0c, 0x6a, 0xb3, 0x36, 0x41,
-  0x91, 0xa7, 0xd0, 0xa2, 0x02, 0x5c, 0xe0, 0xd4, 0x88, 0xc1, 0x01, 0x80,
-  0x20, 0x18, 0x4c, 0x66, 0x2a, 0xb8, 0xa8, 0xb0, 0xbb, 0x41, 0x7f, 0x0a,
-  0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30, 0x9d, 0xa9, 0xe0, 0xa2, 0x42,
-  0x20, 0x5c, 0x30, 0x4c, 0x9d, 0xa7, 0x20, 0xa3, 0x02, 0x5c, 0xe0, 0xd4,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0x6b, 0x2a, 0xcc, 0xa8, 0x00,
-  0x06, 0x22, 0x2a, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0xc4, 0xa6,
-  0xc2, 0x8c, 0x0a, 0x81, 0x70, 0xc1, 0x30, 0x17, 0x38, 0x75, 0x87, 0x53,
-  0x07, 0x9b, 0xc2, 0x30, 0x17, 0x8e, 0xc2, 0x30, 0x47, 0x0c, 0x73, 0xc4,
-  0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0, 0xc5, 0xa9, 0xf0, 0xa3,
-  0x02, 0x7f, 0x0a, 0x6d, 0x2a, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10,
-  0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x5c, 0x78, 0x2a, 0x98, 0xa9, 0x90, 0x10, 0xc1,
-  0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0x79, 0x2a, 0x9c, 0xa9, 0x90,
-  0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0x7a, 0x2a, 0xa0,
-  0xa9, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x28, 0xa1,
-  0x2a, 0x9c, 0xa9, 0x60, 0xa2, 0x42, 0x40, 0xa7, 0xc2, 0x8d, 0x0a, 0x72,
-  0x2a, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x4e, 0xcd, 0x12, 0xf4, 0xcd, 0x70,
-  0x83, 0xfc, 0x06, 0x79, 0x2a, 0x80, 0xc1, 0x2c, 0x03, 0xdb, 0xf4, 0x4d,
-  0x60, 0xf8, 0x29, 0xe8, 0xa7, 0x10, 0x9f, 0xe1, 0x88, 0xfb, 0x0d, 0xf6,
-  0x53, 0x20, 0xbe, 0x59, 0x86, 0xb6, 0x81, 0x9b, 0xc0, 0xf8, 0x53, 0xc0,
-  0xdf, 0x20, 0x3e, 0x16, 0x0c, 0xf4, 0xb9, 0x60, 0x98, 0x0b, 0x9c, 0xb2,
-  0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a, 0x18, 0x55, 0x41, 0x87, 0x1b,
-  0x82, 0x50, 0x15, 0xc0, 0x60, 0x96, 0xc1, 0x6d, 0xde, 0x26, 0xb0, 0x81,
-  0x44, 0x05, 0xf8, 0xcc, 0x12, 0xd0, 0x8d, 0x8d, 0xa8, 0x40, 0xc4, 0x67,
-  0x96, 0x80, 0x6e, 0x86, 0x23, 0x44, 0x38, 0x20, 0x51, 0x41, 0xf8, 0x66,
-  0x19, 0xe2, 0x86, 0x6e, 0x02, 0x1b, 0xe1, 0xa0, 0x44, 0x85, 0xf8, 0x58,
-  0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70, 0xca, 0x82, 0x48, 0x3e, 0x56,
-  0x04, 0xf1, 0x29, 0xc2, 0x55, 0x05, 0x1d, 0x6e, 0x08, 0x58, 0x55, 0x00,
-  0x83, 0x59, 0x06, 0xb9, 0x99, 0x9b, 0xc0, 0x5a, 0x54, 0x18, 0xe2, 0x33,
-  0x4b, 0x40, 0x37, 0x46, 0xc0, 0xa8, 0x00, 0x9f, 0x59, 0x02, 0xba, 0x19,
-  0x68, 0x71, 0x34, 0xb7, 0xc1, 0xde, 0x86, 0x90, 0x1b, 0x61, 0x6e, 0x74,
-  0x70, 0x80, 0x9b, 0x0b, 0x86, 0xb1, 0x17, 0x15, 0x66, 0x54, 0x88, 0xcf,
-  0x70, 0x04, 0x2c, 0xd0, 0xa8, 0x40, 0x7c, 0xb3, 0x0c, 0x75, 0x83, 0x37,
-  0x81, 0xd5, 0xa8, 0x10, 0x0b, 0xf1, 0xb1, 0x60, 0xa0, 0xcf, 0x05, 0xc3,
-  0x5c, 0xe0, 0x94, 0x05, 0x86, 0x7c, 0xac, 0x08, 0xe2, 0x53, 0x04, 0xaf,
-  0x0a, 0x3a, 0xdc, 0x10, 0xe8, 0xaa, 0x00, 0x06, 0xb3, 0x0c, 0x76, 0x73,
-  0x37, 0x81, 0x0d, 0x3d, 0x2a, 0xc0, 0x67, 0x96, 0x80, 0x6f, 0x4c, 0x47,
-  0x05, 0x22, 0x3e, 0xb3, 0x04, 0x7c, 0x33, 0x1c, 0xb1, 0x0b, 0x3b, 0x2a,
-  0x08, 0xdf, 0x2c, 0x43, 0xde, 0xf0, 0x4d, 0x60, 0xbc, 0xc0, 0xa3, 0x42,
-  0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x65, 0x41, 0x24,
-  0x1f, 0x2b, 0x82, 0xf8, 0x14, 0x71, 0xae, 0x82, 0x0e, 0x37, 0x04, 0xe5,
-  0x2a, 0x80, 0xc1, 0x2c, 0x83, 0xde, 0xec, 0x4d, 0x60, 0x64, 0x2a, 0x0c,
-  0xf1, 0x99, 0x25, 0xe0, 0x1b, 0x23, 0xd2, 0x54, 0x80, 0xcf, 0x2c, 0x01,
-  0xdf, 0x0c, 0xb4, 0x38, 0x9a, 0xdd, 0x60, 0x77, 0x43, 0xe8, 0x8d, 0xb0,
-  0x37, 0xac, 0x81, 0x37, 0x17, 0x0c, 0x73, 0x81, 0x53, 0xb7, 0x39, 0x75,
-  0x35, 0x2a, 0x0c, 0x73, 0x66, 0x29, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c,
-  0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x9a, 0xbd, 0x0a, 0xe4, 0x2a,
-  0x84, 0xaa, 0x20, 0xaf, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1,
-  0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0xc1, 0xd5, 0xaf, 0xc2, 0xba, 0x0a, 0x09, 0x11, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xe5, 0xaf, 0x02, 0xbb, 0x0a, 0x09,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xf5, 0xaf, 0x42, 0xbb,
-  0x0a, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0x81, 0x62, 0xb2,
-  0x02, 0xbb, 0x0a, 0xab, 0x2a, 0x04, 0xf9, 0x2a, 0xf0, 0xaa, 0x70, 0xaf,
-  0xc2, 0x68, 0x42, 0x00, 0x5c, 0xe0, 0xd4, 0x2c, 0x41, 0xdf, 0x0c, 0xb4,
-  0x38, 0xa6, 0x81, 0x36, 0x78, 0x4d, 0x9c, 0x0d, 0x4b, 0xa8, 0x8d, 0xc0,
-  0x37, 0x78, 0x4d, 0xac, 0xcd, 0x2c, 0x83, 0xdf, 0x80, 0xce, 0x1d, 0x07,
-  0xc3, 0x11, 0x7c, 0x1c, 0xf4, 0xaa, 0x30, 0x7c, 0xd7, 0xc7, 0xc1, 0x30,
-  0xc3, 0x0d, 0x01, 0xaa, 0x0a, 0x64, 0x50, 0x43, 0xa0, 0xc3, 0x11, 0xff,
-  0x10, 0xae, 0xc2, 0xf0, 0x55, 0x20, 0xe8, 0x85, 0xc4, 0x30, 0xc3, 0x0d,
-  0xc1, 0xaa, 0x0a, 0x64, 0x50, 0xc1, 0xa0, 0xb3, 0x0c, 0x7f, 0x43, 0x3b,
-  0xc1, 0xbd, 0xa9, 0x30, 0xcc, 0x81, 0xa6, 0x30, 0xcc, 0x88, 0xc1, 0x01,
-  0x80, 0x20, 0x18, 0x68, 0x2e, 0x2b, 0xf0, 0xab, 0x90, 0xab, 0x82, 0xca,
-  0x0a, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30,
-  0x9a, 0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06,
-  0x57, 0xcd, 0x0a, 0x23, 0x2b, 0x1c, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x97, 0xcd, 0x0a, 0x24, 0x2b, 0x30, 0x44, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0xd7, 0xcd, 0x0a, 0x25, 0x2b, 0x48, 0x44, 0x30,
-  0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x8a, 0xcf, 0x0a, 0x24, 0x2b, 0x8c,
-  0xab, 0x10, 0xc4, 0xac, 0x40, 0xaf, 0xc2, 0xcb, 0x0a, 0xa3, 0x09, 0x01,
-  0x70, 0x81, 0x53, 0xb3, 0x04, 0xb4, 0x33, 0xdc, 0xf0, 0xca, 0xc1, 0xcd,
-  0x0a, 0x60, 0x30, 0xcb, 0x10, 0x3a, 0xa2, 0x13, 0x94, 0xaf, 0x0a, 0x27,
-  0x2b, 0xc0, 0x05, 0x4e, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x04,
-  0xb6, 0x02, 0xca, 0x0a, 0xb6, 0x1c, 0xdc, 0xab, 0x30, 0x62, 0x70, 0x00,
-  0x20, 0x08, 0x06, 0x53, 0xd8, 0x0a, 0x28, 0x2b, 0x04, 0xc2, 0x05, 0xc3,
-  0x54, 0xb8, 0x0a, 0x2c, 0x2b, 0xc0, 0x05, 0x4e, 0x8d, 0x18, 0x1c, 0x00,
-  0x08, 0x82, 0xc1, 0x54, 0xb6, 0x42, 0xcb, 0x0a, 0x3a, 0xc1, 0xaf, 0xc2,
-  0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0x66, 0x2b, 0xb4, 0xac, 0x10,
-  0x08, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x77, 0x38, 0x75, 0xaa, 0x2a, 0x0c,
-  0x73, 0xbb, 0x29, 0x0c, 0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70,
-  0x00, 0x20, 0x08, 0x06, 0xda, 0xda, 0x0a, 0x39, 0x2b, 0xd8, 0xab, 0x70,
-  0xb6, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20,
-  0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0xc1, 0x25, 0xb7, 0x02, 0xd8, 0x0a, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00,
-  0x08, 0x82, 0xc1, 0x35, 0xb7, 0x42, 0xd8, 0x0a, 0x09, 0x11, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0xc1, 0x45, 0xb7, 0x82, 0xd8, 0x0a, 0x09, 0x11,
-  0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0x81, 0xb2, 0xb7, 0x42, 0xd8, 0x0a,
-  0x20, 0x2b, 0x04, 0x6e, 0x2b, 0xc4, 0xac, 0xc0, 0xb6, 0xc2, 0x68, 0x42,
-  0x00, 0x5c, 0xe0, 0xd4, 0x2c, 0x01, 0xed, 0x0c, 0x37, 0xb0, 0x73, 0x30,
-  0xb7, 0x02, 0x18, 0xcc, 0x32, 0x8c, 0x0e, 0xed, 0x04, 0x26, 0xaf, 0x02,
-  0xbd, 0x0a, 0xf1, 0x19, 0x8e, 0x90, 0xe7, 0xa0, 0x5e, 0x05, 0xe2, 0x9b,
-  0x65, 0x20, 0x9d, 0xd3, 0x09, 0xcc, 0x5e, 0x85, 0x79, 0x0e, 0xe2, 0x63,
-  0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0, 0x29, 0x0b, 0x0c, 0xf9, 0x58,
-  0x11, 0xc4, 0xa7, 0x88, 0xbe, 0x15, 0x74, 0xb8, 0x21, 0xd8, 0x5b, 0x01,
-  0x0c, 0x66, 0x19, 0x4a, 0xc7, 0x74, 0x02, 0x1b, 0xfc, 0x55, 0x80, 0xcf,
-  0x2c, 0xc1, 0xea, 0x58, 0xbf, 0x0a, 0x44, 0x7c, 0x66, 0x09, 0x56, 0x67,
-  0x38, 0xa2, 0x9f, 0x03, 0x7f, 0x15, 0x84, 0x6f, 0x96, 0x01, 0x75, 0x56,
-  0x27, 0x30, 0x7f, 0x0e, 0xfe, 0x55, 0x88, 0x8f, 0x05, 0x0e, 0x7d, 0x2e,
-  0x18, 0xe6, 0x02, 0xa7, 0x2c, 0x88, 0xe4, 0x63, 0x45, 0x10, 0x9f, 0x22,
-  0x50, 0x57, 0xd0, 0xe1, 0x86, 0xc0, 0x74, 0x05, 0x30, 0x98, 0x65, 0x48,
-  0x1d, 0xd5, 0x09, 0xec, 0x64, 0x85, 0x21, 0x3e, 0xb3, 0x04, 0xab, 0x63,
-  0x84, 0xca, 0x0a, 0xf0, 0x99, 0x25, 0x58, 0x9d, 0x81, 0x16, 0x47, 0x2b,
-  0x1d, 0xcc, 0x74, 0x88, 0xd4, 0x11, 0x54, 0x07, 0x46, 0x87, 0xd3, 0xb9,
-  0x60, 0x18, 0x4b, 0x59, 0xa1, 0x65, 0x85, 0xf8, 0x0c, 0x47, 0xa8, 0x86,
-  0xcb, 0x0a, 0xc4, 0x37, 0xcb, 0xc0, 0x3a, 0xaf, 0x13, 0xd8, 0xcb, 0x0a,
-  0xab, 0x11, 0x1f, 0x0b, 0x06, 0xfa, 0x5c, 0x30, 0xcc, 0x05, 0x4e, 0x59,
-  0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45, 0xd8, 0xae, 0xa0, 0xc3, 0x0d,
-  0x01, 0xed, 0x0a, 0x60, 0x30, 0xcb, 0xd0, 0x3a, 0xae, 0x13, 0xd8, 0x70,
-  0xb3, 0x02, 0x7c, 0x66, 0x09, 0x66, 0xc7, 0x68, 0x56, 0x20, 0xe2, 0x33,
-  0x4b, 0x30, 0x3b, 0xc3, 0x11, 0xb5, 0x51, 0xb3, 0x82, 0xf0, 0xcd, 0x32,
-  0xc0, 0xce, 0xec, 0x04, 0x66, 0x1b, 0x36, 0x2b, 0xc4, 0xc7, 0x02, 0x87,
-  0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x16, 0x44, 0xf2, 0xb1, 0x22, 0x88,
-  0x4f, 0x11, 0xe1, 0x2b, 0xe8, 0x70, 0x43, 0xf0, 0xbb, 0x02, 0x18, 0xcc,
-  0x32, 0xc4, 0x8e, 0xec, 0x04, 0xe6, 0xb3, 0xc2, 0x10, 0x9f, 0x59, 0x82,
-  0xd9, 0x31, 0x62, 0x6c, 0x05, 0xf8, 0xcc, 0x12, 0xcc, 0xce, 0x40, 0x8b,
-  0xa3, 0xb5, 0x0e, 0xe6, 0x3a, 0x44, 0xec, 0x08, 0xb2, 0x03, 0x3a, 0xaf,
-  0x73, 0xc1, 0x30, 0x17, 0x38, 0x75, 0x9b, 0x53, 0xf7, 0xb2, 0xc2, 0x30,
-  0x07, 0xa6, 0xc2, 0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07,
-  0x00, 0x82, 0x60, 0xa0, 0xc1, 0xaf, 0xe0, 0xbb, 0xc2, 0xde, 0x0a, 0xec,
-  0x2b, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2,
-  0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18,
-  0x5c, 0xf7, 0x2b, 0x94, 0xaf, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x5c, 0xf8, 0x2b, 0x98, 0xaf, 0x90, 0x10, 0xc1, 0x88, 0x01,
-  0x02, 0x80, 0x20, 0x18, 0x5c, 0xf9, 0x2b, 0x9c, 0xaf, 0x90, 0x10, 0xc1,
-  0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x28, 0x20, 0x2c, 0x98, 0xaf, 0x50,
-  0xba, 0x42, 0x30, 0xbf, 0x82, 0xed, 0x0a, 0xf1, 0x2b, 0x8c, 0x26, 0x04,
-  0xc0, 0x05, 0x4e, 0xcd, 0x12, 0xd0, 0xce, 0x40, 0x8b, 0x63, 0x1a, 0x7f,
-  0xc3, 0xf6, 0x84, 0xdf, 0xb0, 0x44, 0xe8, 0x08, 0xb3, 0xc3, 0xf6, 0x84,
-  0xe8, 0xcc, 0x32, 0xd4, 0xce, 0xed, 0xc4, 0x75, 0x30, 0x1c, 0xc1, 0xb7,
-  0xc1, 0xed, 0x0a, 0xc3, 0x77, 0x7d, 0x1b, 0x0c, 0x33, 0xdc, 0x10, 0x88,
-  0xae, 0x40, 0x06, 0x35, 0x04, 0x3a, 0x1c, 0x91, 0x1f, 0xbb, 0x2b, 0x0c,
-  0x5f, 0x05, 0x82, 0xde, 0x7e, 0x0c, 0x33, 0xdc, 0x10, 0x94, 0xae, 0x40,
-  0x06, 0x15, 0x0c, 0x3a, 0xcb, 0x60, 0x3b, 0xeb, 0x13, 0x5c, 0xda, 0x0a,
-  0xc3, 0x9c, 0x9e, 0x0a, 0xc3, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x81,
-  0x86, 0xc2, 0x82, 0xfd, 0x0a, 0xb3, 0x2b, 0x90, 0xb0, 0x30, 0x9a, 0x10,
-  0x00, 0xa3, 0x09, 0x42, 0x30, 0x9a, 0x30, 0x08, 0xa3, 0x09, 0xc4, 0x50,
-  0xc4, 0x21, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xbd, 0xb0, 0xd0,
-  0xbf, 0xc2, 0x41, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0xc1,
-  0xb0, 0xe0, 0xbf, 0x02, 0x43, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0x70, 0xc5, 0xb0, 0xf0, 0xbf, 0x82, 0x44, 0x04, 0x23, 0x06, 0x0a, 0x00,
-  0x82, 0x60, 0xa0, 0xe0, 0xb0, 0xe0, 0xbf, 0x42, 0xef, 0x0a, 0xc1, 0x0a,
-  0x0b, 0xee, 0x2b, 0xa4, 0xb0, 0x30, 0x9a, 0x10, 0x00, 0x17, 0x38, 0x35,
-  0x4b, 0xb0, 0x3e, 0xc3, 0x0d, 0xa9, 0x1d, 0xc4, 0xb0, 0x00, 0x06, 0xb3,
-  0x0c, 0xb8, 0x93, 0x3b, 0x41, 0xe1, 0xae, 0x10, 0xc2, 0x02, 0x5c, 0xe0,
-  0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c, 0x3a, 0x2c, 0x88, 0xb0,
-  0x60, 0xbb, 0x41, 0xfc, 0x0a, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0x30,
-  0xed, 0xb0, 0x20, 0xc2, 0x42, 0x20, 0x5c, 0x30, 0x4c, 0xed, 0xae, 0x60,
-  0xc2, 0x02, 0x5c, 0xe0, 0xd4, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c,
-  0x3f, 0x2c, 0x9c, 0xb0, 0x40, 0x23, 0xf6, 0x2b, 0x8c, 0x18, 0x1c, 0x00,
-  0x08, 0x82, 0xc1, 0x04, 0xc6, 0xc2, 0x09, 0x0b, 0x81, 0x70, 0xc1, 0x30,
-  0x17, 0x38, 0x75, 0x87, 0x53, 0x47, 0xba, 0xc2, 0x30, 0x57, 0xab, 0xc2,
-  0x30, 0x47, 0x0c, 0x73, 0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
-  0xa0, 0x95, 0xb1, 0x30, 0xc3, 0x02, 0xfc, 0x0a, 0x61, 0x2c, 0x8c, 0x26,
-  0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31,
-  0x14, 0x91, 0xc8, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0x6c, 0x2c,
-  0xe8, 0xb0, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c,
-  0x6d, 0x2c, 0xec, 0xb0, 0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20,
-  0x18, 0x5c, 0x6e, 0x2c, 0xf0, 0xb0, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02,
-  0x80, 0x20, 0x18, 0x28, 0x75, 0x2c, 0xec, 0xb0, 0xa0, 0xbf, 0x42, 0x80,
-  0xc6, 0xc2, 0x0a, 0x0b, 0x66, 0x2c, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x4e,
-  0xcd, 0x12, 0xac, 0xcf, 0x70, 0x83, 0x79, 0x07, 0x6d, 0x2c, 0x80, 0xc1,
-  0x2c, 0x83, 0xee, 0xac, 0x4f, 0x60, 0xec, 0x2b, 0xb8, 0xaf, 0x10, 0x9f,
-  0xe1, 0x08, 0xf9, 0x0d, 0xde, 0x57, 0x20, 0xbe, 0x59, 0x86, 0xdd, 0xf1,
-  0x9d, 0xc0, 0xe0, 0x57, 0x98, 0xdf, 0x20, 0x3e, 0x16, 0x0c, 0xf4, 0xb9,
-  0x60, 0x98, 0x0b, 0x9c, 0xb2, 0xc0, 0x90, 0x8f, 0x15, 0x41, 0x7c, 0x8a,
-  0xb8, 0x63, 0x41, 0x87, 0x1b, 0x82, 0x3a, 0x16, 0xc0, 0x60, 0x96, 0x81,
-  0x77, 0x7a, 0x27, 0xb0, 0x01, 0x7f, 0x05, 0xf8, 0xcc, 0x12, 0x88, 0x8f,
-  0xdd, 0xaf, 0x40, 0xc4, 0x67, 0x96, 0x40, 0x7c, 0x86, 0x23, 0xfa, 0x37,
-  0xc0, 0x5f, 0x41, 0xf8, 0x66, 0x19, 0x7e, 0x47, 0x7c, 0x02, 0xf3, 0xdf,
-  0x20, 0x7f, 0x85, 0xf8, 0x58, 0xe0, 0xd0, 0xe7, 0x82, 0x61, 0x2e, 0x70,
-  0xca, 0x82, 0x48, 0x3e, 0x56, 0x04, 0xf1, 0x29, 0x42, 0x94, 0x05, 0x1d,
-  0x6e, 0x08, 0x40, 0x59, 0x00, 0x83, 0x59, 0x06, 0xf0, 0x09, 0x9f, 0xc0,
-  0x42, 0x58, 0x18, 0xe2, 0x33, 0x4b, 0x20, 0x3e, 0x46, 0x90, 0xb0, 0x00,
-  0x9f, 0x59, 0x02, 0xf1, 0x19, 0x68, 0x71, 0x34, 0xde, 0xc1, 0x7a, 0x87,
-  0x00, 0x1f, 0x21, 0x7c, 0x60, 0x70, 0xf0, 0x9d, 0x0b, 0x86, 0xb1, 0x11,
-  0x16, 0x4e, 0x58, 0x88, 0xcf, 0x70, 0x04, 0xa9, 0xa0, 0xb0, 0x40, 0x7c,
-  0xb3, 0x0c, 0xe3, 0x63, 0x3e, 0x81, 0xa5, 0xb0, 0x50, 0x2a, 0xf1, 0xb1,
-  0x60, 0xa0, 0xcf, 0x05, 0xc3, 0x5c, 0xe0, 0x94, 0x05, 0x86, 0x7c, 0xac,
-  0x08, 0xe2, 0x53, 0x04, 0x2c, 0x0b, 0x3a, 0xdc, 0x10, 0xb8, 0xb2, 0x00,
-  0x06, 0xb3, 0x0c, 0xe4, 0x53, 0x3e, 0x81, 0x0d, 0x31, 0x2c, 0xc0, 0x67,
-  0x96, 0x40, 0x7d, 0xcc, 0x85, 0x05, 0x22, 0x3e, 0xb3, 0x04, 0xea, 0x33,
-  0x1c, 0xf1, 0x2a, 0x2f, 0x2c, 0x08, 0xdf, 0x2c, 0xc3, 0xf9, 0xa8, 0x4f,
-  0x60, 0xb0, 0x02, 0xc3, 0x42, 0x7c, 0x2c, 0x70, 0xe8, 0x73, 0xc1, 0x30,
-  0x17, 0x38, 0x65, 0x41, 0x24, 0x1f, 0x2b, 0x82, 0xf8, 0x14, 0xb1, 0xcb,
-  0x82, 0x0e, 0x37, 0x04, 0xb9, 0x2c, 0x80, 0xc1, 0x2c, 0x03, 0xfa, 0xa4,
-  0x4f, 0x60, 0x38, 0x2c, 0x0c, 0xf1, 0x99, 0x25, 0x50, 0x1f, 0x23, 0x7a,
-  0x58, 0x80, 0xcf, 0x2c, 0x81, 0xfa, 0x0c, 0xb4, 0x38, 0x1a, 0xf9, 0x60,
-  0xe5, 0x43, 0xa0, 0x8f, 0x90, 0x3e, 0x74, 0x65, 0x3e, 0x17, 0x0c, 0x73,
-  0x81, 0x53, 0xb7, 0x39, 0x75, 0x29, 0x2c, 0x0c, 0x73, 0x3a, 0x2b, 0x0c,
-  0x73, 0xc4, 0x30, 0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06,
-  0x9a, 0x3a, 0x0b, 0xb8, 0x2c, 0xd4, 0xb1, 0x60, 0xce, 0xc2, 0x68, 0x42,
-  0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43,
-  0x11, 0x89, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x15, 0xcf, 0xc2,
-  0x2f, 0x0b, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x25,
-  0xcf, 0x02, 0x38, 0x0b, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0xc1, 0x35, 0xcf, 0x42, 0x38, 0x0b, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00,
-  0x08, 0x82, 0x81, 0xa2, 0xcf, 0x02, 0x38, 0x0b, 0x7f, 0x2c, 0x04, 0xed,
-  0x2c, 0xc0, 0xb2, 0xb0, 0xce, 0xc2, 0x68, 0x42, 0x00, 0x5c, 0xe0, 0xd4,
-  0x2c, 0xc1, 0xfa, 0x0c, 0xb4, 0x38, 0xa6, 0x61, 0x3b, 0x60, 0x59, 0xd4,
-  0x0e, 0x4b, 0xe0, 0x8e, 0xa0, 0x3e, 0x60, 0x59, 0xe4, 0xce, 0x2c, 0x03,
-  0xfb, 0xb8, 0xcf, 0x9a, 0x07, 0xc3, 0x11, 0x7b, 0x1b, 0xc4, 0xb2, 0x30,
-  0x7c, 0xc7, 0xb7, 0xc1, 0x30, 0xc3, 0x0d, 0x01, 0x1f, 0x0b, 0x64, 0x50,
-  0x43, 0xa0, 0xc3, 0x11, 0xf3, 0x52, 0xcb, 0xc2, 0xf0, 0x55, 0x20, 0xe8,
-  0xd5, 0xcb, 0x30, 0xc3, 0x0d, 0xc1, 0x1f, 0x0b, 0x64, 0x50, 0xc1, 0xa0,
-  0xb3, 0x0c, 0xed, 0x23, 0x42, 0xc1, 0x8d, 0xb1, 0x30, 0xcc, 0xd1, 0xad,
-  0x30, 0xcc, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x68, 0x22, 0x2d, 0xc0,
-  0xb3, 0xd0, 0xca, 0x82, 0x3f, 0x0b, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20,
-  0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x45, 0x1c, 0x32, 0x62,
-  0x80, 0x00, 0x20, 0x08, 0x06, 0x57, 0x4a, 0x0b, 0xf7, 0x2c, 0x1c, 0x44,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x97, 0x4a, 0x0b, 0xf8, 0x2c,
-  0x30, 0x44, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd7, 0x4a, 0x0b,
-  0xf9, 0x2c, 0x48, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x8a,
-  0x4c, 0x0b, 0xf8, 0x2c, 0xdc, 0xb2, 0x10, 0x94, 0xb4, 0x80, 0xce, 0xc2,
-  0x48, 0x0b, 0xa3, 0x09, 0x01, 0x70, 0x81, 0x53, 0xb3, 0x04, 0x22, 0x34,
-  0xdc, 0x30, 0xea, 0xc1, 0x4a, 0x0b, 0x60, 0x30, 0xcb, 0xf0, 0x3e, 0xf0,
-  0x13, 0x94, 0x2c, 0x0b, 0xfb, 0x2c, 0xc0, 0x05, 0x4e, 0x8d, 0x18, 0x1c,
-  0x00, 0x08, 0x82, 0xc1, 0x44, 0xd3, 0x02, 0x3f, 0x0b, 0xb5, 0x1b, 0xac,
-  0xb3, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x53, 0x4d, 0x0b, 0xfc,
-  0x2c, 0x04, 0xc2, 0x05, 0xc3, 0x54, 0x2d, 0x0b, 0x20, 0x2d, 0xc0, 0x05,
-  0x4e, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0x94, 0xd3, 0x42, 0x48,
-  0x0b, 0x2e, 0x03, 0xcf, 0xc2, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x4c,
-  0x3a, 0x2d, 0x84, 0xb4, 0x10, 0x08, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x77,
-  0x38, 0x75, 0x7e, 0x2c, 0x0c, 0x73, 0xaf, 0x2b, 0x0c, 0x73, 0xc4, 0x30,
-  0x47, 0x0c, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0xda, 0x4f, 0x0b,
-  0x2d, 0x2d, 0xa8, 0xb3, 0xb0, 0xd3, 0xc2, 0x68, 0x42, 0x00, 0x8c, 0x26,
-  0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x11, 0x89, 0x8c,
-  0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x65, 0xd6, 0x02, 0x4d, 0x0b, 0x09,
-  0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x75, 0xd6, 0x42, 0x4d,
-  0x0b, 0x09, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0x85, 0xd6,
-  0x82, 0x4d, 0x0b, 0x09, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0x81,
-  0xf2, 0xd6, 0x42, 0x4d, 0x0b, 0xf4, 0x2c, 0x04, 0x62, 0x2d, 0x94, 0xb4,
-  0x00, 0xd6, 0xc2, 0x68, 0x42, 0x00, 0x5c, 0xe0, 0xd4, 0x2c, 0x81, 0x08,
-  0x0d, 0x37, 0x80, 0x7b, 0x70, 0xd6, 0x02, 0x18, 0xcc, 0x32, 0xc4, 0x8f,
-  0x08, 0x05, 0x66, 0xce, 0x02, 0x3a, 0x0b, 0xf1, 0x19, 0x8e, 0x88, 0xdf,
-  0x20, 0x9d, 0x05, 0xe2, 0x9b, 0x65, 0x90, 0x9f, 0xfa, 0x09, 0x4c, 0x9d,
-  0x05, 0xf9, 0x0d, 0xe2, 0x63, 0xc1, 0x40, 0x9f, 0x0b, 0x86, 0xb9, 0xc0,
-  0x29, 0x0b, 0x0c, 0xf9, 0x58, 0x11, 0xc4, 0xa7, 0x88, 0xb8, 0x16, 0x74,
-  0xb8, 0x21, 0x78, 0x6b, 0x01, 0x0c, 0x66, 0x19, 0xe6, 0x87, 0x7e, 0x02,
-  0x1b, 0xe4, 0x59, 0x80, 0xcf, 0x2c, 0x41, 0xfe, 0x58, 0x3c, 0x0b, 0x44,
-  0x7c, 0x66, 0x09, 0xf2, 0x67, 0x38, 0x82, 0x7f, 0x03, 0x79, 0x16, 0x84,
-  0x6f, 0x96, 0xc1, 0x7e, 0xf2, 0x27, 0xb0, 0xfe, 0x0d, 0xe6, 0x59, 0x88,
-  0x8f, 0x05, 0x0e, 0x7d, 0x2e, 0x18, 0xe6, 0x02, 0xa7, 0x2c, 0x88, 0xe4,
-  0x63, 0x45, 0x10, 0x9f, 0x22, 0xf8, 0x5a, 0xd0, 0xe1, 0x86, 0x40, 0xaf,
-  0x05, 0x30, 0x98, 0x65, 0xb8, 0x1f, 0xfc, 0x09, 0x6c, 0x9f, 0x85, 0x21,
-  0x3e, 0xb3, 0x04, 0xf9, 0x63, 0x84, 0x3f, 0x0b, 0xf0, 0x99, 0x25, 0xc8,
-  0x9f, 0x81, 0x16, 0x47, 0x9b, 0x1f, 0x8c, 0x7e, 0x88, 0xfb, 0x11, 0xf0,
-  0xc7, 0x05, 0x87, 0xfa, 0xb9, 0x60, 0x18, 0xeb, 0x67, 0x21, 0xa4, 0x85,
-  0xf8, 0x0c, 0x47, 0xf8, 0x8d, 0x48, 0x0b, 0xc4, 0x37, 0xcb, 0xa0, 0x3f,
-  0xfd, 0x13, 0xd8, 0x48, 0x0b, 0x7f, 0x13, 0x1f, 0x0b, 0x06, 0xfa, 0x5c,
-  0x30, 0xcc, 0x05, 0x4e, 0x59, 0x60, 0xc8, 0xc7, 0x8a, 0x20, 0x3e, 0x45,
-  0xa8, 0xb6, 0xa0, 0xc3, 0x0d, 0x01, 0x6a, 0x0b, 0x60, 0x30, 0xcb, 0xb0,
-  0x3f, 0xfc, 0x13, 0xd8, 0xb0, 0xd2, 0x02, 0x7c, 0x66, 0x09, 0x42, 0xc8,
-  0x50, 0x5a, 0x20, 0xe2, 0x33, 0x4b, 0x10, 0x42, 0xc3, 0x11, 0xa9, 0x93,
-  0xd2, 0x82, 0xf0, 0xcd, 0x32, 0xf8, 0x4f, 0x08, 0x05, 0xa6, 0x3a, 0x2a,
-  0x2d, 0xc4, 0xc7, 0x02, 0x87, 0x3e, 0x17, 0x0c, 0x73, 0x81, 0x53, 0x16,
-  0x44, 0xf2, 0xb1, 0x22, 0x88, 0x4f, 0x11, 0xb5, 0x2d, 0xe8, 0x70, 0x43,
-  0x30, 0xdb, 0x02, 0x18, 0xcc, 0x32, 0xfc, 0x0f, 0x08, 0x05, 0x26, 0xd3,
-  0xc2, 0x10, 0x9f, 0x59, 0x82, 0x10, 0x32, 0xe2, 0xa6, 0x05, 0xf8, 0xcc,
-  0x12, 0x84, 0xd0, 0x40, 0x8b, 0xa3, 0xed, 0x0f, 0xc6, 0x3f, 0xc4, 0xff,
-  0x08, 0x20, 0x84, 0x76, 0xfd, 0x73, 0xc1, 0x30, 0x17, 0x38, 0x75, 0x9b,
-  0x53, 0x37, 0xd2, 0xc2, 0x30, 0x47, 0xc3, 0xc2, 0x30, 0x47, 0x0c, 0x73,
-  0xc4, 0x30, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xa0, 0x91, 0xb7, 0x20,
-  0xdb, 0xc2, 0x5b, 0x0b, 0xe0, 0x2d, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82,
-  0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x14, 0x91, 0xc8, 0x88,
-  0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xeb, 0x2d, 0xe4, 0xb6, 0x90, 0x10,
-  0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xec, 0x2d, 0xe8, 0xb6,
-  0x90, 0x10, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x5c, 0xed, 0x2d,
-  0xec, 0xb6, 0x90, 0x10, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x28,
-  0xf4, 0x2d, 0xe8, 0xb6, 0x90, 0xd7, 0x42, 0x70, 0xde, 0x82, 0x6a, 0x0b,
-  0xe5, 0x2d, 0x8c, 0x26, 0x04, 0xc0, 0x05, 0x4e, 0xcd, 0x12, 0x88, 0xd0,
-  0x40, 0x8b, 0x63, 0x1a, 0xed, 0x43, 0xb3, 0x05, 0xfb, 0xb0, 0xc4, 0xfb,
-  0x08, 0x21, 0x44, 0xb3, 0x05, 0xfc, 0x8c, 0x18, 0x18, 0x00, 0x08, 0x82,
-  0xc1, 0x63, 0xdf, 0x42, 0x6d, 0x0b, 0x66, 0x2c, 0x8c, 0x18, 0x18, 0x00,
-  0x08, 0x82, 0xc1, 0x73, 0xdf, 0x82, 0x6d, 0x0b, 0x66, 0x2c, 0x58, 0x10,
-  0xc8, 0xc7, 0x02, 0x41, 0x3e, 0xf6, 0xe6, 0x41, 0x6b, 0x0b, 0xf2, 0x19,
-  0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x48, 0xbf, 0x85, 0xdf, 0x16, 0x5a,
-  0x5b, 0xe8, 0xb5, 0xc0, 0xe2, 0x3c, 0x68, 0x6d, 0x41, 0x3e, 0x23, 0x06,
-  0x08, 0x00, 0x82, 0x60, 0x10, 0xf1, 0xb7, 0x10, 0xde, 0x02, 0x6b, 0x0b,
-  0xa8, 0x1a, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x10, 0xf5, 0xb7,
-  0x20, 0xde, 0xc2, 0x6b, 0x0b, 0xe0, 0x16, 0x8c, 0x18, 0x20, 0x00, 0x08,
-  0x82, 0x41, 0xe4, 0xdf, 0xc2, 0x78, 0x0b, 0xb2, 0x2d, 0xe0, 0x4b, 0x30,
-  0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd1, 0x7f, 0x0b, 0xe4, 0x2d, 0xb8,
-  0xb6, 0xb0, 0xaa, 0x81, 0x31, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x11,
-  0x88, 0x0b, 0xe5, 0x2d, 0xb8, 0xb6, 0x30, 0x6e, 0xc1, 0x88, 0x01, 0x02,
-  0x80, 0x20, 0x18, 0x44, 0x21, 0x2e, 0x98, 0xb7, 0x40, 0xdb, 0xc2, 0xbe,
-  0x04, 0x23, 0x06, 0x0d, 0x00, 0x82, 0x60, 0x50, 0x81, 0xb8, 0x50, 0xde,
-  0xc2, 0x6d, 0x0b, 0xcc, 0xa2, 0xb8, 0x6a, 0x80, 0x10, 0x81, 0xfd, 0x75,
-  0x70, 0xdb, 0x82, 0x7c, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x20, 0x22,
-  0x71, 0x21, 0xbd, 0x85, 0xdb, 0x16, 0xda, 0x2b, 0xb0, 0xd0, 0x0e, 0x6e,
-  0x5b, 0x90, 0xcf, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x44, 0x26, 0x2e,
-  0xac, 0xb7, 0x60, 0xdb, 0x02, 0x6e, 0x06, 0xc1, 0x88, 0x01, 0x02, 0x80,
-  0x20, 0x18, 0x44, 0x27, 0x2e, 0xb0, 0xb7, 0x90, 0xdb, 0x02, 0x7c, 0x05,
-  0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x10, 0xa1, 0xb8, 0xd0, 0xde, 0x02,
-  0x6f, 0x0b, 0x28, 0x12, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41, 0x94,
-  0xe2, 0x82, 0x7b, 0x0b, 0xb8, 0x2d, 0xec, 0x66, 0x60, 0x8c, 0x18, 0x20,
-  0x00, 0x08, 0x82, 0x41, 0xa4, 0xe2, 0xc2, 0x7b, 0x0b, 0xb8, 0x2d, 0xcc,
-  0x57, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0xd1, 0x8a, 0x0b, 0xf0,
-  0x2d, 0xf8, 0xb6, 0xb0, 0x22, 0xc1, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18,
-  0x54, 0x2a, 0x2e, 0xbc, 0xb7, 0x10, 0xde, 0x82, 0x55, 0x51, 0xbe, 0x19,
-  0x20, 0x44, 0x60, 0xae, 0x1c, 0x84, 0xb7, 0x20, 0x9f, 0x11, 0x03, 0x04,
-  0x00, 0x41, 0x30, 0x88, 0x5c, 0x5c, 0x98, 0x6f, 0x21, 0xbc, 0x85, 0x7e,
-  0x0a, 0x0c, 0x96, 0x83, 0xf0, 0x16, 0xe4, 0x33, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x11, 0x8c, 0x0b, 0xf5, 0x2d, 0x80, 0xb7, 0x80, 0x8e, 0x41,
-  0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x51, 0x8c, 0x0b, 0xf6, 0x2d,
-  0x8c, 0xb7, 0x00, 0x52, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x44,
-  0x32, 0x2e, 0xdc, 0xb7, 0x60, 0xde, 0x02, 0x4e, 0x04, 0x23, 0x06, 0x08,
-  0x00, 0x82, 0x60, 0x10, 0xcd, 0xb8, 0x80, 0xdf, 0x82, 0x78, 0x0b, 0xeb,
-  0x18, 0x18, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x10, 0xd1, 0xb8, 0x90,
-  0xdf, 0x82, 0x78, 0x0b, 0x23, 0x15, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82,
-  0x41, 0x54, 0xe3, 0x82, 0x7e, 0x0b, 0xe8, 0x2d, 0xec, 0x44, 0x30, 0x62,
-  0xd0, 0x00, 0x20, 0x08, 0x06, 0x15, 0x8d, 0x0b, 0xf9, 0x2d, 0xac, 0xb7,
-  0x00, 0x06, 0x9f, 0xe7, 0x8e, 0x01, 0x42, 0x04, 0xd6, 0xbb, 0xc1, 0x7a,
-  0x0b, 0xf2, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x08, 0xc7, 0x85,
-  0xfe, 0x16, 0xd6, 0x5b, 0x68, 0xa1, 0xc0, 0x7e, 0x37, 0x58, 0x6f, 0x41,
-  0x3e, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x10, 0xe9, 0xb8, 0xf0, 0xdf,
-  0x82, 0x7a, 0x0b, 0xf8, 0x17, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41,
-  0xb4, 0xe3, 0x02, 0x88, 0x0b, 0xed, 0x2d, 0xc0, 0x50, 0x30, 0x62, 0x80,
-  0x00, 0x20, 0x08, 0x06, 0x11, 0x8f, 0x0b, 0x21, 0x2e, 0xc0, 0xb7, 0x80,
-  0x06, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x44, 0x3d, 0x2e, 0x88,
-  0xb8, 0xc0, 0xde, 0xc2, 0xfe, 0x19, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0x10, 0xf9, 0xb8, 0x30, 0xe2, 0x02, 0x7b, 0x0b, 0x33, 0x14, 0x8c, 0x18,
-  0x20, 0x00, 0x08, 0x82, 0x41, 0xf4, 0xe3, 0x02, 0x89, 0x0b, 0xf2, 0x2d,
-  0xac, 0x41, 0x30, 0x62, 0xd0, 0x00, 0x20, 0x08, 0x06, 0x95, 0x8f, 0x0b,
-  0x23, 0x2e, 0xd4, 0xb7, 0xa0, 0x06, 0x69, 0x80, 0x06, 0xfe, 0x87, 0x10,
-  0x81, 0xb1, 0x01, 0x1b, 0xc8, 0xc7, 0x82, 0x36, 0x90, 0x8f, 0x85, 0xc1,
-  0x7d, 0x0b, 0xf2, 0x19, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x88, 0xcc,
-  0x85, 0x14, 0x17, 0xee, 0x5b, 0x70, 0x02, 0x1b, 0x83, 0xfb, 0x16, 0xe4,
-  0x33, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x91, 0x99, 0x0b, 0x2b, 0x2e,
-  0xd8, 0xb7, 0xa0, 0x05, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x10, 0x9d,
-  0xb9, 0xc0, 0xe2, 0x42, 0x7e, 0x0b, 0x51, 0x30, 0x62, 0x80, 0x00, 0x20,
-  0x08, 0x06, 0x11, 0x9a, 0x0b, 0x2d, 0x2e, 0xf0, 0xb7, 0x80, 0x04, 0x23,
-  0x06, 0x08, 0x00, 0x82, 0x60, 0x10, 0xa5, 0xb9, 0xe0, 0xe2, 0x02, 0x7e,
-  0x0b, 0x9d, 0x31, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x91, 0x9a, 0x0b,
-  0x2f, 0x2e, 0xe0, 0xb7, 0x40, 0x05, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60,
-  0x10, 0xad, 0xb9, 0x00, 0xe3, 0x82, 0x7f, 0x0b, 0x4b, 0x30, 0x62, 0xd0,
-  0x00, 0x20, 0x08, 0x06, 0x95, 0x9a, 0x0b, 0x2f, 0x2e, 0x84, 0xb8, 0x70,
-  0x07, 0x8b, 0x02, 0x06, 0x08, 0x11, 0x5c, 0x30, 0xcc, 0x88, 0x81, 0x03,
-  0x80, 0x20, 0x18, 0x30, 0x70, 0x2e, 0xb4, 0xb8, 0x80, 0xdf, 0x42, 0x7c,
-  0x0b, 0x64, 0x2e, 0x04, 0x32, 0x2e, 0xc8, 0xb8, 0x20, 0xe3, 0xc2, 0x89,
-  0x0b, 0x62, 0x2e, 0xcc, 0x12, 0x8c, 0x10, 0x02, 0x00, 0x00, 0x00, 0x00
-};
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/OperatorRegistration.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/OperatorRegistration.cpp
index 30bc6e5e275a0..28360f09bcba3 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/OperatorRegistration.cpp
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/OperatorRegistration.cpp
@@ -510,6 +510,7 @@ DML_OP_EXTERN_CREATION_FUNCTION(BitwiseAnd);
 DML_OP_EXTERN_CREATION_FUNCTION(BitwiseOr);
 DML_OP_EXTERN_CREATION_FUNCTION(BitwiseXor);
 DML_OP_EXTERN_CREATION_FUNCTION(BitwiseNot);
+DML_OP_EXTERN_CREATION_FUNCTION(RotaryEmbedding);
 
 DML_OP_EXTERN_QUERY_FUNCTION(MaxPool);
 DML_OP_EXTERN_QUERY_FUNCTION(Slice);
@@ -527,6 +528,7 @@ DML_OP_EXTERN_QUERY_FUNCTION(Attention);
 constexpr static std::array<const char*, 1> typeNameListDefault = {"T"};
 constexpr static std::array<const char*, 1> typeNameListDefaultV = {"V"};
 constexpr static std::array<const char*, 2> typeNameListAttention = {"T", "M"};
+constexpr static std::array<const char*, 2> typeNameListRotaryEmbedding = {"T", "M"};
 constexpr static std::array<const char*, 2> typeNameListTwo = { "T1", "T2" };
 constexpr static std::array<const char*, 2> typeNameListLayerNorm = { "T", "U" };
 constexpr static std::array<const char*, 2> typeNameListLayerNormContrib = { "T", "V" };
@@ -597,6 +599,7 @@ constexpr static std::array<SupportedTensorDataTypes, 2> supportedTypeListShape
 constexpr static std::array<SupportedTensorDataTypes, 2> supportedTypeListSize = {SupportedTensorDataTypes::All, SupportedTensorDataTypes::Int64};
 constexpr static std::array<SupportedTensorDataTypes, 1> supportedTypeListQLinearSigmoid = {SupportedTensorDataTypes::UInt8 | SupportedTensorDataTypes::Int8};
 constexpr static std::array<SupportedTensorDataTypes, 2> supportedTypeListAttention = {SupportedTensorDataTypes::Float16to32, SupportedTensorDataTypes::Int32};
+constexpr static std::array<SupportedTensorDataTypes, 2> supportedTypeListRotaryEmbedding = {SupportedTensorDataTypes::Float16to32, SupportedTensorDataTypes::Int64};
 constexpr static std::array<SupportedTensorDataTypes, 2> supportedTypeListGroupNorm = {SupportedTensorDataTypes::Float16to32, SupportedTensorDataTypes::Float16to32};
 constexpr static std::array<SupportedTensorDataTypes, 1> supportedTypeListNonZero = {SupportedTensorDataTypes::Float16to32 | SupportedTensorDataTypes::Ints8Bit | SupportedTensorDataTypes::Ints16Bit | SupportedTensorDataTypes::Ints32Bit | SupportedTensorDataTypes::Bool};
 
@@ -1006,6 +1009,7 @@ constexpr static OperatorRegistrationInformation operatorRegistrationInformation
     {REG_INFO_MS(   1,  QLinearSigmoid,                     typeNameListDefault,            supportedTypeListQLinearSigmoid,        DmlGraphSupport::Supported, requiredConstantCpuInputs(), std::nullopt, QueryQLinearSigmoid)},
     {REG_INFO_MS(   1,  Attention,                          typeNameListAttention,          supportedTypeListAttention,             DmlGraphSupport::Supported, requiredConstantCpuInputs(), std::nullopt, QueryAttention)},
     {REG_INFO_MS(   1,  MultiHeadAttention,                 typeNameListAttention,          supportedTypeListAttention,             DmlGraphSupport::Supported)},
+    {REG_INFO_MS(   1,  RotaryEmbedding,                    typeNameListRotaryEmbedding,    supportedTypeListRotaryEmbedding,       DmlGraphSupport::Supported)},
 
     {REG_INFO(     10,  IsInf,                              typeNameListTwo,                supportedTypeListIsInf,                 DmlGraphSupport::Supported)},
     {REG_INFO(     10,  Mod,                                typeNameListDefault,            supportedTypeListNumericDefault,        DmlGraphSupport::Supported)},
diff --git a/onnxruntime/core/providers/dml/OperatorAuthorHelper/Attributes.h b/onnxruntime/core/providers/dml/OperatorAuthorHelper/Attributes.h
index dac128f92ae0c..e9591cfce6870 100644
--- a/onnxruntime/core/providers/dml/OperatorAuthorHelper/Attributes.h
+++ b/onnxruntime/core/providers/dml/OperatorAuthorHelper/Attributes.h
@@ -122,6 +122,7 @@ namespace AttrName
 
     static constexpr const char* GraphFusedActivation = "activation";
     static constexpr const char* GraphFusedAxis = "activation_axis";
+    static constexpr const char* Interleaved = "interleaved";
 
 } // namespace AttrName
 
diff --git a/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h b/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h
index 485e20c1dfe1e..f7e545d9d99a9 100644
--- a/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h
+++ b/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h
@@ -1584,6 +1584,7 @@ using ShapeInferenceHelper_DequantizeLinear = GetOutputShapeAsInputShapeHelper;
 using ShapeInferenceHelper_QLinearSigmoid = GetOutputShapeAsInputShapeHelper;
 using ShapeInferenceHelper_Attention = AttentionHelper;
 using ShapeInferenceHelper_MultiHeadAttention = MultiHeadAttentionHelper;
+using ShapeInferenceHelper_RotaryEmbedding = GetOutputShapeAsInputShapeHelper;
 using ShapeInferenceHelper_Sign = GetBroadcastedOutputShapeHelper;
 using ShapeInferenceHelper_IsNaN = GetBroadcastedOutputShapeHelper;
 using ShapeInferenceHelper_Erf = GetBroadcastedOutputShapeHelper;
diff --git a/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorVersions.h b/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorVersions.h
index c1e525400be1a..e18ba31def48a 100644
--- a/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorVersions.h
+++ b/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorVersions.h
@@ -437,6 +437,7 @@ namespace OperatorHelper
         static const int sc_sinceVer_BiasAdd = 1;
         static const int sc_sinceVer_QuickGelu = 1;
         static const int sc_sinceVer_GroupNorm = 1;
+        static const int sc_sinceVer_RotaryEmbedding = 1;
     } // namespace MsftOperatorSet1
 
 } // namespace OperatorHelper
diff --git a/onnxruntime/core/providers/dml/dml_provider_factory.cc b/onnxruntime/core/providers/dml/dml_provider_factory.cc
index fde61e73c2124..33f1f59e07f3f 100644
--- a/onnxruntime/core/providers/dml/dml_provider_factory.cc
+++ b/onnxruntime/core/providers/dml/dml_provider_factory.cc
@@ -1,6 +1,9 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#include <dxcore.h>
+#include <vector>
+
 #include <DirectML.h>
 #ifndef _GAMING_XBOX
 #include <dxgi1_4.h>
@@ -27,8 +30,12 @@ namespace onnxruntime {
 
 struct DMLProviderFactory : IExecutionProviderFactory {
   DMLProviderFactory(IDMLDevice* dml_device,
-                     ID3D12CommandQueue* cmd_queue) : dml_device_(dml_device),
-                                                      cmd_queue_(cmd_queue) {}
+                     ID3D12CommandQueue* cmd_queue,
+                     bool disable_metacommands,
+                     bool enable_dynamic_graph_fusion) : dml_device_(dml_device),
+                                                         cmd_queue_(cmd_queue),
+                                                         metacommands_enabled_(!disable_metacommands),
+                                                         dynamic_graph_fusion_enabled_(enable_dynamic_graph_fusion) {}
   ~DMLProviderFactory() override {}
 
   std::unique_ptr<IExecutionProvider> CreateProvider() override;
@@ -39,10 +46,11 @@ struct DMLProviderFactory : IExecutionProviderFactory {
   ComPtr<IDMLDevice> dml_device_{};
   ComPtr<ID3D12CommandQueue> cmd_queue_{};
   bool metacommands_enabled_ = true;
+  bool dynamic_graph_fusion_enabled_ = false;
 };
 
 std::unique_ptr<IExecutionProvider> DMLProviderFactory::CreateProvider() {
-  auto provider = Dml::CreateExecutionProvider(dml_device_.Get(), cmd_queue_.Get(), metacommands_enabled_);
+  auto provider = Dml::CreateExecutionProvider(dml_device_.Get(), cmd_queue_.Get(), metacommands_enabled_, dynamic_graph_fusion_enabled_);
   return provider;
 }
 
@@ -51,7 +59,9 @@ void DMLProviderFactory::SetMetacommandsEnabled(bool metacommands_enabled) {
 }
 
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_DML(IDMLDevice* dml_device,
-                                                                              ID3D12CommandQueue* cmd_queue) {
+                                                                              ID3D12CommandQueue* cmd_queue,
+                                                                              bool disable_metacommands,
+                                                                              bool enable_dynamic_graph_fusion) {
 #ifndef _GAMING_XBOX
   // Validate that the D3D12 devices match between DML and the command queue. This specifically asks for IUnknown in
   // order to be able to compare the pointers for COM object identity.
@@ -70,7 +80,7 @@ std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_DML(ID
   const Env& env = Env::Default();
   auto luid = d3d12_device->GetAdapterLuid();
   env.GetTelemetryProvider().LogExecutionProviderEvent(&luid);
-  return std::make_shared<onnxruntime::DMLProviderFactory>(dml_device, cmd_queue);
+  return std::make_shared<onnxruntime::DMLProviderFactory>(dml_device, cmd_queue, disable_metacommands, enable_dynamic_graph_fusion);
 }
 
 void DmlConfigureProviderFactoryMetacommandsEnabled(IExecutionProviderFactory* factory, bool metacommandsEnabled) {
@@ -92,12 +102,325 @@ bool IsSoftwareAdapter(IDXGIAdapter1* adapter) {
     return isSoftwareAdapter || (isBasicRenderDriverVendorId && isBasicRenderDriverDeviceId);
 }
 
-std::shared_ptr<IExecutionProviderFactory> DMLProviderFactoryCreator::Create(int device_id) {
-  return Create(device_id, /*skip_software_device_check*/ false);
+static bool IsHardwareAdapter(IDXCoreAdapter* adapter) {
+  bool is_hardware = false;
+  THROW_IF_FAILED(adapter->GetProperty(
+    DXCoreAdapterProperty::IsHardware,
+    &is_hardware));
+  return is_hardware;
+}
+
+static bool IsGPU(IDXCoreAdapter* compute_adapter) {
+  // Only considering hardware adapters
+  if (!IsHardwareAdapter(compute_adapter)) {
+    return false;
+  }
+  return compute_adapter->IsAttributeSupported(DXCORE_ADAPTER_ATTRIBUTE_D3D12_GRAPHICS);
+}
+
+#ifdef ENABLE_NPU_ADAPTER_ENUMERATION
+static bool IsNPU(IDXCoreAdapter* compute_adapter) {
+  // Only considering hardware adapters
+  if (!IsHardwareAdapter(compute_adapter)) {
+    return false;
+  }
+  return !(compute_adapter->IsAttributeSupported(DXCORE_ADAPTER_ATTRIBUTE_D3D12_GRAPHICS));
+}
+#endif
+
+enum class DeviceType { GPU, NPU, BadDevice };
+
+static DeviceType FilterAdapterTypeQuery(IDXCoreAdapter* adapter, OrtDmlDeviceFilter filter) {
+  auto allow_gpus = (filter & OrtDmlDeviceFilter::Gpu) == OrtDmlDeviceFilter::Gpu;
+  if (IsGPU(adapter) && allow_gpus) {
+    return DeviceType::GPU;
+  }
+
+#ifdef ENABLE_NPU_ADAPTER_ENUMERATION
+  auto allow_npus = (filter & OrtDmlDeviceFilter::Npu) == OrtDmlDeviceFilter::Npu;
+  if (IsNPU(adapter) && allow_npus) {
+    return DeviceType::NPU;
+  }
+#endif
+
+  return DeviceType::BadDevice;
+}
+
+// Struct for holding each adapter
+struct AdapterInfo {
+  ComPtr<IDXCoreAdapter> Adapter;
+  DeviceType Type; // GPU or NPU
+};
+
+static ComPtr<IDXCoreAdapterList> EnumerateDXCoreAdapters(IDXCoreAdapterFactory* adapter_factory) {
+  ComPtr<IDXCoreAdapterList> adapter_list;
+
+  // TODO: use_dxcore_workload_enumeration should be determined by QI
+  // When DXCore APIs are available QI for relevant enumeration interfaces
+  constexpr bool use_dxcore_workload_enumeration = false;
+  if (!use_dxcore_workload_enumeration) {
+    // Get a list of all the adapters that support compute
+    GUID attributes[]{ DXCORE_ADAPTER_ATTRIBUTE_D3D12_CORE_COMPUTE };
+    ORT_THROW_IF_FAILED(
+      adapter_factory->CreateAdapterList(_countof(attributes),
+        attributes,
+        adapter_list.GetAddressOf()));
+  }
+
+  return adapter_list;
+}
+
+static void SortDXCoreAdaptersByPreference(
+  IDXCoreAdapterList* adapter_list,
+  OrtDmlPerformancePreference preference) {
+  if (adapter_list->GetAdapterCount() <= 1) {
+    return;
+  }
+
+  // DML prefers the HighPerformance adapter by default
+  std::array<DXCoreAdapterPreference, 1> adapter_list_preferences = {
+    DXCoreAdapterPreference::HighPerformance
+  };
+
+  // If callers specify minimum power change the DXCore sort policy
+  // NOTE DXCoreAdapterPrefernce does not apply to mixed adapter lists - only to GPU lists
+  if (preference == OrtDmlPerformancePreference::MinimumPower) {
+    adapter_list_preferences[0] = DXCoreAdapterPreference::MinimumPower;
+  }
+
+  ORT_THROW_IF_FAILED(adapter_list->Sort(
+    static_cast<uint32_t>(adapter_list_preferences.size()),
+    adapter_list_preferences.data()));
+}
+
+static std::vector<AdapterInfo> FilterDXCoreAdapters(
+  IDXCoreAdapterList* adapter_list,
+  OrtDmlDeviceFilter filter) {
+  auto adapter_infos = std::vector<AdapterInfo>();
+  const uint32_t count = adapter_list->GetAdapterCount();
+  for (uint32_t i = 0; i < count; ++i) {
+    ComPtr<IDXCoreAdapter> candidate_adapter;
+    ORT_THROW_IF_FAILED(adapter_list->GetAdapter(i, candidate_adapter.GetAddressOf()));
+
+    // Add the adapters that are valid based on the device filter (GPU, NPU, or Both)
+    auto adapter_type = FilterAdapterTypeQuery(candidate_adapter.Get(), filter);
+    if (adapter_type != DeviceType::BadDevice) {
+      adapter_infos.push_back(AdapterInfo{candidate_adapter, adapter_type});
+    }
+  }
+
+  return adapter_infos;
+}
+
+static void SortHeterogenousDXCoreAdapterList(
+  std::vector<AdapterInfo>& adapter_infos,
+  OrtDmlDeviceFilter filter,
+  OrtDmlPerformancePreference preference) {
+  if (adapter_infos.size() <= 1) {
+    return;
+  }
+
+#ifdef ENABLE_NPU_ADAPTER_ENUMERATION
+  // When considering both GPUs and NPUs sort them by performance preference
+  // of Default (Gpus first), HighPerformance (GPUs first), or LowPower (NPUs first)
+  auto keep_npus = (filter & OrtDmlDeviceFilter::Npu) == OrtDmlDeviceFilter::Npu;
+  auto only_npus =  filter == OrtDmlDeviceFilter::Npu;
+  if (!keep_npus || only_npus) {
+    return;
+  }
+#endif
+
+  struct SortingPolicy {
+    // default is false because GPUs are considered higher priority in
+    // a mixed adapter environment
+    bool npus_first_ = false;
+
+    SortingPolicy(bool npus_first = false) : npus_first_(npus_first) { }
+
+    bool operator()(const AdapterInfo& a, const AdapterInfo& b) {
+      return npus_first_ ? a.Type < b.Type : a.Type > b.Type;
+    }
+  };
+
+  auto npus_first = (preference == OrtDmlPerformancePreference::MinimumPower);
+  auto policy = SortingPolicy(npus_first);
+  std::sort(adapter_infos.begin(), adapter_infos.end(), policy);
 }
 
-Microsoft::WRL::ComPtr<ID3D12Device> DMLProviderFactoryCreator::CreateD3D12Device(int device_id, bool skip_software_device_check)
-{
+std::shared_ptr<IExecutionProviderFactory> DMLProviderFactoryCreator::CreateFromOptions(
+    OrtDmlDeviceOptions* device_options,
+    bool disable_metacommands,
+    bool enable_dynamic_graph_fusion) {
+  auto default_device_options = OrtDmlDeviceOptions { Default, Gpu };
+  if (device_options == nullptr) {
+    device_options = &default_device_options;
+  }
+
+  OrtDmlPerformancePreference preference = device_options->Preference;
+  OrtDmlDeviceFilter filter = device_options->Filter;
+
+  // Create DXCore Adapter Factory
+  ComPtr<IDXCoreAdapterFactory> adapter_factory;
+  ORT_THROW_IF_FAILED(::DXCoreCreateAdapterFactory(adapter_factory.GetAddressOf()));
+
+  // Get all DML compatible DXCore adapters
+  ComPtr<IDXCoreAdapterList> adapter_list;
+  adapter_list = EnumerateDXCoreAdapters(adapter_factory.Get());
+
+  if (adapter_list->GetAdapterCount() == 0) {
+    ORT_THROW("No GPUs or NPUs detected.");
+  }
+
+  // Sort the adapter list to honor DXCore hardware ordering
+  SortDXCoreAdaptersByPreference(adapter_list.Get(), preference);
+
+  // TODO: use_dxcore_workload_enumeration should be determined by QI
+  // When DXCore APIs are available QI for relevant enumeration interfaces
+  constexpr bool use_dxcore_workload_enumeration = false;
+
+  std::vector<AdapterInfo> adapter_infos;
+  if (!use_dxcore_workload_enumeration) {
+    // Filter all DXCore adapters to hardware type specified by the device filter
+    adapter_infos = FilterDXCoreAdapters(adapter_list.Get(), filter);
+    if (adapter_infos.size() == 0) {
+      ORT_THROW("No devices detected that match the filter criteria.");
+    }
+  }
+
+  // DXCore Sort ignores NPUs. When both GPUs and NPUs are present, manually sort them.
+  SortHeterogenousDXCoreAdapterList(adapter_infos, filter, preference);
+
+  // Extract just the adapters
+  auto adapters = std::vector<ComPtr<IDXCoreAdapter>>(adapter_infos.size());
+  std::transform(
+    adapter_infos.begin(), adapter_infos.end(),
+    adapters.begin(),
+    [](auto& a){ return a.Adapter; });
+
+  return onnxruntime::DMLProviderFactoryCreator::CreateFromAdapterList(std::move(adapters), disable_metacommands, enable_dynamic_graph_fusion);
+}
+
+static std::optional<OrtDmlPerformancePreference> ParsePerformancePreference(const ProviderOptions& provider_options) {
+  static const std::string PerformancePreference = "performance_preference";
+  static const std::string Default = "default";
+  static const std::string HighPerformance = "high_performance";
+  static const std::string MinimumPower = "minimum_power";
+
+  auto preference_it = provider_options.find(PerformancePreference);
+  if (preference_it != provider_options.end()) {
+    if (preference_it->second == Default) {
+      return OrtDmlPerformancePreference::Default;
+    }
+
+    if (preference_it->second == HighPerformance) {
+      return OrtDmlPerformancePreference::HighPerformance;
+    }
+
+    if (preference_it->second == MinimumPower) {
+      return OrtDmlPerformancePreference::MinimumPower;
+    }
+
+    ORT_THROW("Invalid PerformancePreference provided for DirectML EP device selection.");
+  }
+
+  return {};
+}
+
+static std::optional<OrtDmlDeviceFilter> ParseFilter(const ProviderOptions& provider_options) {
+  static const std::string Filter = "filter";
+  static const std::string Gpu = "gpu";
+#ifdef ENABLE_NPU_ADAPTER_ENUMERATION
+  static const std::string Any = "any";
+  static const std::string Npu = "npu";
+#endif
+
+  auto preference_it = provider_options.find(Filter);
+  if (preference_it != provider_options.end()) {
+    if (preference_it->second == Gpu) {
+      return OrtDmlDeviceFilter::Gpu;
+    }
+
+#ifdef ENABLE_NPU_ADAPTER_ENUMERATION
+    if (preference_it->second == Any) {
+      return OrtDmlDeviceFilter::Any;
+    }
+    if (preference_it->second == Npu) {
+      return OrtDmlDeviceFilter::Npu;
+    }
+#endif
+
+    ORT_THROW("Invalid Filter provided for DirectML EP device selection.");
+  }
+
+  return {};
+}
+
+static std::optional<int> ParseDeviceId(const ProviderOptions& provider_options) {
+  static const std::string DeviceId = "device_id";
+
+  auto preference_it = provider_options.find(DeviceId);
+  if (preference_it != provider_options.end()) {
+     if (!preference_it->second.empty()) {
+       return std::stoi(preference_it->second);
+     }
+  }
+
+  return {};
+}
+
+static bool ParseBoolean(const ProviderOptions& provider_options, const std::string& key) {
+  auto preference_it = provider_options.find(key);
+  if (preference_it != provider_options.end() && !preference_it->second.empty()) {
+      if (preference_it->second == "True" || preference_it->second == "true") {
+        return true;
+      } else if (preference_it->second == "False" || preference_it->second == "false") {
+        return false;
+      } else {
+        ORT_THROW("[ERROR] [DirectML] The value for the key '" + key + "' should be 'True' or 'False'. Default value is 'False'.\n");
+      }
+  }
+
+  return false;
+}
+
+std::shared_ptr<IExecutionProviderFactory> DMLProviderFactoryCreator::CreateFromProviderOptions(
+    const ProviderOptions& provider_options) {
+
+  bool disable_metacommands = ParseBoolean(provider_options, "disable_metacommands");
+  bool enable_dynamic_graph_fusion = ParseBoolean(provider_options, "enable_dynamic_graph_fusion");
+  bool skip_software_device_check = false;
+  auto device_id = ParseDeviceId(provider_options);
+
+  if (device_id.has_value())
+  {
+    return onnxruntime::DMLProviderFactoryCreator::Create(device_id.value(), skip_software_device_check, disable_metacommands, enable_dynamic_graph_fusion);
+  }
+
+  auto preference = ParsePerformancePreference(provider_options);
+  auto filter = ParseFilter(provider_options);
+
+  // If no preference/filters are specified then create with default preference/filters.
+  if (!preference.has_value() && !filter.has_value()) {
+    return onnxruntime::DMLProviderFactoryCreator::CreateFromOptions(nullptr, disable_metacommands, enable_dynamic_graph_fusion);
+  }
+
+  if (!preference.has_value()) {
+    preference = OrtDmlPerformancePreference::Default;
+  }
+
+  if (!filter.has_value()) {
+    filter = OrtDmlDeviceFilter::Gpu;
+  }
+
+  OrtDmlDeviceOptions device_options;
+  device_options.Preference = preference.value();
+  device_options.Filter = filter.value();
+  return onnxruntime::DMLProviderFactoryCreator::CreateFromOptions(&device_options, disable_metacommands, enable_dynamic_graph_fusion);
+}
+
+Microsoft::WRL::ComPtr<ID3D12Device> DMLProviderFactoryCreator::CreateD3D12Device(
+  int device_id,
+  bool skip_software_device_check) {
 #ifdef _GAMING_XBOX
     ComPtr<ID3D12Device> d3d12_device;
     D3D12XBOX_CREATE_DEVICE_PARAMETERS params = {};
@@ -128,8 +451,7 @@ Microsoft::WRL::ComPtr<ID3D12Device> DMLProviderFactoryCreator::CreateD3D12Devic
   return d3d12_device;
 }
 
-Microsoft::WRL::ComPtr<IDMLDevice> DMLProviderFactoryCreator::CreateDMLDevice(ID3D12Device* d3d12_device)
-{
+Microsoft::WRL::ComPtr<IDMLDevice> DMLProviderFactoryCreator::CreateDMLDevice(ID3D12Device* d3d12_device) {
   DML_CREATE_DEVICE_FLAGS flags = DML_CREATE_DEVICE_FLAG_NONE;
 
   // In debug builds, enable the DML debug layer if the D3D12 debug layer is also enabled
@@ -153,9 +475,10 @@ Microsoft::WRL::ComPtr<IDMLDevice> DMLProviderFactoryCreator::CreateDMLDevice(ID
   return dml_device;
 }
 
-std::shared_ptr<IExecutionProviderFactory> DMLProviderFactoryCreator::Create(int device_id, bool skip_software_device_check) {
-  ComPtr<ID3D12Device> d3d12_device = CreateD3D12Device(device_id, skip_software_device_check);
-
+std::shared_ptr<IExecutionProviderFactory> CreateDMLDeviceAndProviderFactory(
+    ID3D12Device* d3d12_device,
+    bool disable_metacommands,
+    bool enable_dynamic_graph_fusion) {
   D3D12_COMMAND_QUEUE_DESC cmd_queue_desc = {};
   cmd_queue_desc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
   cmd_queue_desc.Flags = D3D12_COMMAND_QUEUE_FLAG_DISABLE_GPU_TIMEOUT;
@@ -163,8 +486,31 @@ std::shared_ptr<IExecutionProviderFactory> DMLProviderFactoryCreator::Create(int
   ComPtr<ID3D12CommandQueue> cmd_queue;
   ORT_THROW_IF_FAILED(d3d12_device->CreateCommandQueue(&cmd_queue_desc, IID_GRAPHICS_PPV_ARGS(cmd_queue.ReleaseAndGetAddressOf())));
 
-  auto dml_device = CreateDMLDevice(d3d12_device.Get());
-  return CreateExecutionProviderFactory_DML(dml_device.Get(), cmd_queue.Get());
+  auto dml_device = onnxruntime::DMLProviderFactoryCreator::CreateDMLDevice(d3d12_device);
+  return CreateExecutionProviderFactory_DML(dml_device.Get(), cmd_queue.Get(), disable_metacommands, enable_dynamic_graph_fusion);
+}
+
+std::shared_ptr<IExecutionProviderFactory> DMLProviderFactoryCreator::Create(
+    int device_id,
+    bool skip_software_device_check,
+    bool disable_metacommands,
+    bool enable_dynamic_graph_fusion) {
+  ComPtr<ID3D12Device> d3d12_device = CreateD3D12Device(device_id, skip_software_device_check);
+  return CreateDMLDeviceAndProviderFactory(d3d12_device.Get(), disable_metacommands, enable_dynamic_graph_fusion);
+}
+
+std::shared_ptr<IExecutionProviderFactory> DMLProviderFactoryCreator::CreateFromAdapterList(
+    std::vector<ComPtr<IDXCoreAdapter>>&& dxcore_devices,
+    bool disable_metacommands,
+    bool enable_dynamic_graph_fusion) {
+  // Choose the first device from the list since it's the highest priority
+  auto dxcore_device = dxcore_devices[0];
+
+  // Create D3D12 Device from DXCore Adapter
+  ComPtr<ID3D12Device> d3d12_device;
+  ORT_THROW_IF_FAILED(D3D12CreateDevice(dxcore_device.Get(), D3D_FEATURE_LEVEL_11_0, IID_GRAPHICS_PPV_ARGS(d3d12_device.ReleaseAndGetAddressOf())));
+
+  return CreateDMLDeviceAndProviderFactory(d3d12_device.Get(), disable_metacommands, enable_dynamic_graph_fusion);
 }
 
 }  // namespace onnxruntime
@@ -174,7 +520,7 @@ std::shared_ptr<IExecutionProviderFactory> DMLProviderFactoryCreator::Create(int
 // The OrtSessionOptionsAppendExecutionProvider_DML export on the OrtDmlApi should be used instead.
 ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_DML, _In_ OrtSessionOptions* options, int device_id) {
 API_IMPL_BEGIN
-  options->provider_factories.push_back(onnxruntime::DMLProviderFactoryCreator::Create(device_id));
+  options->provider_factories.push_back(onnxruntime::DMLProviderFactoryCreator::Create(device_id, false, false, false));
 API_IMPL_END
   return nullptr;
 }
@@ -186,7 +532,9 @@ ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProviderEx_DML, _In_ OrtSess
                     _In_ IDMLDevice* dml_device, _In_ ID3D12CommandQueue* cmd_queue) {
 API_IMPL_BEGIN
   options->provider_factories.push_back(onnxruntime::CreateExecutionProviderFactory_DML(dml_device,
-                                                                                        cmd_queue));
+                                                                                        cmd_queue,
+                                                                                        false,
+                                                                                        false));
 API_IMPL_END
   return nullptr;
 }
@@ -211,6 +559,17 @@ ORT_API_STATUS_IMPL(FreeGPUAllocation, _In_ void* ptr) {
   API_IMPL_END
 }
 
+ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_DML2, _In_ OrtSessionOptions* options, OrtDmlDeviceOptions* device_options) {
+API_IMPL_BEGIN
+#ifdef USE_DML
+  auto factory = onnxruntime::DMLProviderFactoryCreator::CreateFromOptions(device_options, false, false);
+  // return the create function for a dxcore device
+  options->provider_factories.push_back(factory);
+#endif  // USE_DML
+  return nullptr;
+  API_IMPL_END
+}
+
 ORT_API_STATUS_IMPL(GetD3D12ResourceFromAllocation, _In_ OrtAllocator* ort_allocator, _In_ void* allocation, _Out_ ID3D12Resource** d3d_resource) {
   API_IMPL_BEGIN
 #ifdef USE_DML
@@ -233,7 +592,8 @@ static constexpr OrtDmlApi ort_dml_api_10_to_x = {
   &OrtSessionOptionsAppendExecutionProviderEx_DML,
   &CreateGPUAllocationFromD3DResource,
   &FreeGPUAllocation,
-  &GetD3D12ResourceFromAllocation
+  &GetD3D12ResourceFromAllocation,
+  &OrtSessionOptionsAppendExecutionProvider_DML2,
 };
 
 const OrtDmlApi* GetOrtDmlApi(_In_ uint32_t /*version*/) NO_EXCEPTION {
diff --git a/onnxruntime/core/providers/dml/dml_provider_factory_creator.h b/onnxruntime/core/providers/dml/dml_provider_factory_creator.h
index 574f4410fe3e3..0fab9fe902526 100644
--- a/onnxruntime/core/providers/dml/dml_provider_factory_creator.h
+++ b/onnxruntime/core/providers/dml/dml_provider_factory_creator.h
@@ -7,14 +7,35 @@
 
 #include <wrl/client.h>
 #include <d3d12.h>
+#include "core/framework/provider_options.h"
 #include "core/providers/providers.h"
 #include "core/providers/dml/dml_provider_factory.h"
 
+#include <dxcore.h>
+#include <vector>
+
 namespace onnxruntime {
 
 struct DMLProviderFactoryCreator {
-  static std::shared_ptr<IExecutionProviderFactory> Create(int device_id);
-  static std::shared_ptr<IExecutionProviderFactory> Create(int device_id, bool skip_software_device_check);
+  static std::shared_ptr<IExecutionProviderFactory> Create(
+    int device_id,
+    bool skip_software_device_check,
+    bool disable_metacommands,
+    bool enable_dynamic_graph_fusion);
+
+  static std::shared_ptr<IExecutionProviderFactory> CreateFromProviderOptions(
+    const ProviderOptions& provider_options_map);
+
+  static std::shared_ptr<IExecutionProviderFactory> CreateFromOptions(
+    OrtDmlDeviceOptions* device_options,
+    bool disable_metacommands,
+    bool enable_dynamic_graph_fusion);
+
+  static std::shared_ptr<IExecutionProviderFactory> CreateFromAdapterList(
+    std::vector<Microsoft::WRL::ComPtr<IDXCoreAdapter>>&& dxcore_devices,
+    bool disable_metacommands,
+    bool enable_dynamic_graph_fusion);
+
   static Microsoft::WRL::ComPtr<ID3D12Device> CreateD3D12Device(int device_id, bool skip_software_device_check);
   static Microsoft::WRL::ComPtr<IDMLDevice> CreateDMLDevice(ID3D12Device* d3d12_device);
 };
diff --git a/onnxruntime/core/providers/dnnl/subgraph/dnnl_matmul.cc b/onnxruntime/core/providers/dnnl/subgraph/dnnl_matmul.cc
index c3eab9dd8e557..54528011850be 100644
--- a/onnxruntime/core/providers/dnnl/subgraph/dnnl_matmul.cc
+++ b/onnxruntime/core/providers/dnnl/subgraph/dnnl_matmul.cc
@@ -12,6 +12,25 @@
 namespace onnxruntime {
 namespace ort_dnnl {
 
+inline static dnnl::memory::format_tag get_default_format(const dnnl::memory::dims& tensor_dims) {
+  switch (tensor_dims.size()) {
+    case 1:
+      return dnnl::memory::format_tag::a;
+    case 2:
+      return dnnl::memory::format_tag::ab;
+    case 3:
+      return dnnl::memory::format_tag::abc;
+    case 4:
+      return dnnl::memory::format_tag::abcd;
+    case 5:
+      return dnnl::memory::format_tag::abcde;
+    case 6:
+      return dnnl::memory::format_tag::abcdef;
+    default:
+      return dnnl::memory::format_tag::undef;
+  }
+}
+
 DnnlMatMul::DnnlMatMul() {}
 
 // This handles ONNX defined "MatMul" as well as two other variations of MatMul
@@ -139,14 +158,14 @@ void DnnlMatMul::CreatePrimitive(DnnlSubgraphPrimitive& sp, DnnlNode& node) {
   if (transA || transBatchA) {
     src_md = transposedA_md;
   } else {
-    src_md = dnnl::memory::desc(src_dims, node.Input(IN_A).Type(), dnnl::memory::format_tag::any);
+    src_md = dnnl::memory::desc(src_dims, node.Input(IN_A).Type(), get_default_format(src_dims));
   }
 
   dnnl::memory::desc weights_md;
   if (transB || transBatchB) {
     weights_md = transposedB_md;
   } else {
-    weights_md = dnnl::memory::desc(weights_dims, node.Input(IN_B).Type(), dnnl::memory::format_tag::any);
+    weights_md = dnnl::memory::desc(weights_dims, node.Input(IN_B).Type(), get_default_format(weights_dims));
   }
 
   auto output_shape = src_dims;
@@ -241,7 +260,7 @@ void DnnlMatMul::CreatePrimitive(DnnlSubgraphPrimitive& sp, DnnlNode& node) {
     attr.set_scales_mask(DNNL_ARG_SRC, 0);
   }
 
-  auto dst_md = dnnl::memory::desc(output_shape, node.Output(OUT_Y).Type(), dnnl::memory::format_tag::any);
+  auto dst_md = dnnl::memory::desc(output_shape, node.Output(OUT_Y).Type(), get_default_format(output_shape));
 
   auto matmul_pd = dnnl::matmul::primitive_desc(eng, src_md, weights_md, dst_md, attr);
 
diff --git a/onnxruntime/core/providers/js/allocator.cc b/onnxruntime/core/providers/js/allocator.cc
index c1d0aa9abbf6b..574c507222a5c 100644
--- a/onnxruntime/core/providers/js/allocator.cc
+++ b/onnxruntime/core/providers/js/allocator.cc
@@ -10,6 +10,10 @@ namespace onnxruntime {
 namespace js {
 
 void* JsCustomAllocator::Alloc(size_t size) {
+  if (size == 0) {
+    return nullptr;
+  }
+
   void* p = EM_ASM_PTR({ return Module.jsepAlloc($0); }, size);
   stats_.num_allocs++;
   stats_.bytes_in_use += size;
@@ -17,8 +21,10 @@ void* JsCustomAllocator::Alloc(size_t size) {
 }
 
 void JsCustomAllocator::Free(void* p) {
-  size_t size = (size_t)(void*)EM_ASM_PTR({ return Module.jsepFree($0); }, p);
-  stats_.bytes_in_use -= size;
+  if (p != nullptr) {
+    size_t size = (size_t)(void*)EM_ASM_PTR({ return Module.jsepFree($0); }, p);
+    stats_.bytes_in_use -= size;
+  }
 }
 
 void JsCustomAllocator::GetStats(AllocatorStats* stats) {
diff --git a/onnxruntime/core/providers/js/data_transfer.cc b/onnxruntime/core/providers/js/data_transfer.cc
index c62362d90867f..ebea041b80128 100644
--- a/onnxruntime/core/providers/js/data_transfer.cc
+++ b/onnxruntime/core/providers/js/data_transfer.cc
@@ -20,23 +20,25 @@ bool DataTransfer::CanCopy(const OrtDevice& src_device, const OrtDevice& dst_dev
 
 common::Status DataTransfer::CopyTensor(const Tensor& src, Tensor& dst) const {
   size_t bytes = src.SizeInBytes();
-  const void* src_data = src.DataRaw();
-  void* dst_data = dst.MutableDataRaw();
-
-  auto& src_device = src.Location().device;
-  auto& dst_device = dst.Location().device;
-
-  if (dst_device.Type() == OrtDevice::GPU) {
-    if (src_device.Type() == OrtDevice::GPU) {
-      // copy from GPU to GPU
-      EM_ASM({ Module.jsepCopy($0, $1, $2, true); }, src_data, dst_data, bytes);
-    } else {
-      // copy from CPU to GPU
-      EM_ASM({ Module.jsepCopy($0, $1, $2); }, src_data, dst_data, bytes);
+  if (bytes > 0) {
+    const void* src_data = src.DataRaw();
+    void* dst_data = dst.MutableDataRaw();
+
+    auto& src_device = src.Location().device;
+    auto& dst_device = dst.Location().device;
+
+    if (dst_device.Type() == OrtDevice::GPU) {
+      if (src_device.Type() == OrtDevice::GPU) {
+        // copy from GPU to GPU
+        EM_ASM({ Module.jsepCopy($0, $1, $2, true); }, src_data, dst_data, bytes);
+      } else {
+        // copy from CPU to GPU
+        EM_ASM({ Module.jsepCopy($0, $1, $2); }, src_data, dst_data, bytes);
+      }
+    } else /* if (src_device.Type() == OrtDevice::GPU) */ {
+      // copy from GPU to CPU
+      jsepDownload(src_data, dst_data, bytes);
     }
-  } else /* if (src_device.Type() == OrtDevice::GPU) */ {
-    // copy from GPU to CPU
-    jsepDownload(src_data, dst_data, bytes);
   }
 
   return Status::OK();
diff --git a/onnxruntime/core/providers/js/js_execution_provider.cc b/onnxruntime/core/providers/js/js_execution_provider.cc
index 72e36a161e9aa..68ceafb1d4bf6 100644
--- a/onnxruntime/core/providers/js/js_execution_provider.cc
+++ b/onnxruntime/core/providers/js/js_execution_provider.cc
@@ -5,6 +5,7 @@
 #include <unordered_map>
 #include <unordered_set>
 #include <utility>
+#include <vector>
 
 #include "js_execution_provider.h"
 
@@ -13,9 +14,11 @@
 #endif
 
 #include "core/graph/function_utils.h"
+#include "core/graph/indexed_sub_graph.h"
 #include "core/framework/compute_capability.h"
 #include "core/framework/data_transfer_manager.h"
 #include "core/framework/kernel_registry.h"
+#include "core/framework/fallback_cpu_capability.h"
 #include "core/providers/shared/node_unit/node_unit.h"
 #include "allocator.h"
 #include "data_transfer.h"
@@ -229,21 +232,46 @@ class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomai
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, 12, Unsqueeze);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, Unsqueeze);
 
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 9, 15, Where);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 16, Where);
+
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, 12, Transpose);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, Transpose);
 
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 11, float, Conv);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 11, float, ConvTranspose);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, 10, Conv);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, Conv);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 1, 10, Conv);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 11, Conv);
+
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, 10, ConvTranspose);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, ConvTranspose);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 1, 10, ConvTranspose);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 11, ConvTranspose);
+
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, 7, MaxPool);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 8, 9, MaxPool);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 10, 10, MaxPool);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, 11, MaxPool);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 12, MaxPool);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 1, 7, MaxPool);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 8, 9, MaxPool);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 10, 10, MaxPool);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 11, 11, MaxPool);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 12, MaxPool);
+
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 7, 9, AveragePool);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 10, 10, AveragePool);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, AveragePool);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 7, 9, AveragePool);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 10, 10, AveragePool);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 11, AveragePool);
+
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, GlobalAveragePool);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 1, GlobalAveragePool);
+
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, GlobalMaxPool);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 1, GlobalMaxPool);
 
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, 10, float, Conv);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, float, Conv);
-class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, 10, float, ConvTranspose);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, float, ConvTranspose);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 7, 8, Gemm);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 9, 10, Gemm);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, 12, Gemm);
@@ -251,17 +279,6 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, Gem
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, 12, MatMul);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, MatMul);
 
-class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 7, 9, AveragePool);
-class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 10, 10, AveragePool);
-class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, AveragePool);
-class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, GlobalAveragePool);
-class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, 7, MaxPool);
-class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 8, 9, MaxPool);
-class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 10, 10, MaxPool);
-class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, 11, MaxPool);
-class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 12, MaxPool);
-class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, GlobalMaxPool);
-
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, 10, float, ArgMax);
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, 12, float, ArgMax);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, float, ArgMax);
@@ -285,11 +302,17 @@ class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomai
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 18, Split);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 8, 12, Expand);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, Expand);
+
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 10, 10, Resize);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, 12, Resize);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, 17, Resize);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 18, 18, Resize);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 19, Resize);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 10, 10, Resize);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 11, 12, Resize);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 13, 17, Resize);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 18, 18, Resize);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 19, Resize);
 
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, 10, Gather);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, 12, Gather);
@@ -298,11 +321,6 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, Gat
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, 12, GatherElements);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, GatherElements);
 
-class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 11, 12, Resize);
-class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 13, 17, Resize);
-class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 18, 18, Resize);
-class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 19, Resize);
-
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, 9, Slice);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 10, 10, Slice);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, 12, Slice);
@@ -315,9 +333,13 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, Fla
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 6, 12, Tile);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, Tile);
 
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 17, float, LayerNormalization);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 6, float, InstanceNormalization);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 6, float, InstanceNormalization);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 17, LayerNormalization);
+
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 6, InstanceNormalization);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 6, InstanceNormalization);
+
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, Range);
+
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 12, float, Einsum);
 
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 2, 10, Pad);
@@ -331,6 +353,15 @@ class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomai
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, 18, If);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 19, If);
 
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 7, 8, BatchNormalization);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 9, 13, BatchNormalization);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 14, 14, BatchNormalization);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 15, BatchNormalization);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 7, 8, BatchNormalization);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 9, 13, BatchNormalization);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 14, 14, BatchNormalization);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 15, BatchNormalization);
+
 std::unique_ptr<KernelRegistry> RegisterKernels() {
   auto kernel_registry = std::make_unique<onnxruntime::KernelRegistry>();
 
@@ -493,21 +524,46 @@ std::unique_ptr<KernelRegistry> RegisterKernels() {
       BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, 17, ReduceLogSumExp)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 18, ReduceLogSumExp)>,
 
+      KERNEL_CREATE_INFO_VERSIONED(9, 15, Where),
+      KERNEL_CREATE_INFO(16, Where),
+
       BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, 12, Transpose)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, Transpose)>,
 
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 11, float, Conv)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 11, float, ConvTranspose)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, 10, Conv)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, Conv)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 1, 10, Conv)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 11, Conv)>,
+
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, 10, ConvTranspose)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, ConvTranspose)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 1, 10, ConvTranspose)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 11, ConvTranspose)>,
+
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, 7, MaxPool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 8, 9, MaxPool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 10, 10, MaxPool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, 11, MaxPool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 12, MaxPool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 1, 7, MaxPool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 8, 9, MaxPool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 10, 10, MaxPool)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 11, 11, MaxPool)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 12, MaxPool)>,
+
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 7, 9, AveragePool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 10, 10, AveragePool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, AveragePool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 7, 9, AveragePool)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 10, 10, AveragePool)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 11, AveragePool)>,
+
+      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, GlobalAveragePool)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 1, GlobalAveragePool)>,
+
+      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, GlobalMaxPool)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 1, GlobalMaxPool)>,
 
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, 10, float, Conv)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, float, Conv)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, 10, float, ConvTranspose)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, float, ConvTranspose)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 7, 8, Gemm)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 9, 10, Gemm)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, 12, Gemm)>,
@@ -515,17 +571,6 @@ std::unique_ptr<KernelRegistry> RegisterKernels() {
       BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, 12, MatMul)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, MatMul)>,
 
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 7, 9, AveragePool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 10, 10, AveragePool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, AveragePool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, GlobalAveragePool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, 7, MaxPool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 8, 9, MaxPool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 10, 10, MaxPool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, 11, MaxPool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 12, MaxPool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, GlobalMaxPool)>,
-
       BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, 10, float, ArgMax)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, 12, float, ArgMax)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, float, ArgMax)>,
@@ -563,7 +608,7 @@ std::unique_ptr<KernelRegistry> RegisterKernels() {
       BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, 17, Resize)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 18, 18, Resize)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 19, Resize)>,
-
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 10, 10, Resize)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 11, 12, Resize)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 13, 17, Resize)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 18, 18, Resize)>,
@@ -581,10 +626,15 @@ std::unique_ptr<KernelRegistry> RegisterKernels() {
       BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 6, 12, Tile)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, Tile)>,
 
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 17, float, LayerNormalization)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 6, float, InstanceNormalization)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 6, float, InstanceNormalization)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 17, LayerNormalization)>,
+
+      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 6, InstanceNormalization)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 6, InstanceNormalization)>,
+
+      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, Range)>,
+
       BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 12, float, Einsum)>,
+
       BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 2, 10, Pad)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, 12, Pad)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, 17, Pad)>,
@@ -595,6 +645,15 @@ std::unique_ptr<KernelRegistry> RegisterKernels() {
       BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, 12, If)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, 18, If)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 19, If)>,
+
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 7, 8, BatchNormalization)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 9, 13, BatchNormalization)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 14, 14, BatchNormalization)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 15, BatchNormalization)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 7, 8, BatchNormalization)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 9, 13, BatchNormalization)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 14, 14, BatchNormalization)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 15, BatchNormalization)>,
   };
 
   for (auto& function_table_entry : function_table) {
@@ -617,7 +676,8 @@ std::unique_ptr<KernelRegistry> RegisterKernels() {
 using namespace js;
 
 JsExecutionProvider::JsExecutionProvider(const JsExecutionProviderInfo& info)
-    : IExecutionProvider{kJsExecutionProvider, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, 0), true} {
+    : IExecutionProvider{kJsExecutionProvider, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, 0), true},
+      preferred_data_layout_{info.data_layout} {
 }
 
 std::vector<AllocatorPtr> JsExecutionProvider::CreatePreferredAllocators() {
@@ -631,7 +691,45 @@ std::vector<AllocatorPtr> JsExecutionProvider::CreatePreferredAllocators() {
 std::vector<std::unique_ptr<ComputeCapability>> JsExecutionProvider::GetCapability(
     const onnxruntime::GraphViewer& graph,
     const IKernelLookup& kernel_lookup) const {
-  return IExecutionProvider::GetCapability(graph, kernel_lookup);
+  InlinedVector<NodeIndex> candidates;
+  // `tenative_candidates` is a subset of `candidates`.
+  InlinedVector<NodeIndex> tenative_candidates;
+  for (auto& node_index : graph.GetNodesInTopologicalOrder()) {
+    const auto* p_node = graph.GetNode(node_index);
+    if (p_node == nullptr)
+      continue;
+
+    const auto& node = *p_node;
+    if (!node.GetExecutionProviderType().empty()) {
+      // If the node was added by layout transformer, do not move it to CPU
+      if (node.GetExecutionProviderType() == kJsExecutionProvider) {
+        candidates.push_back(node.Index());
+      }
+      continue;
+    }
+
+    const KernelCreateInfo* webgpu_kernel_def = kernel_lookup.LookUpKernel(node);
+    // none of the provided registries has a webgpu kernel for this node
+    if (webgpu_kernel_def == nullptr) {
+      LOGS(*GetLogger(), INFO) << "webgpu kernel not found in registries for Op type: "
+                               << node.OpType() << " node name: " << node.Name();
+      continue;
+    }
+    candidates.push_back(node.Index());
+    tenative_candidates.push_back(node.Index());
+  }
+  auto cpu_nodes = GetCpuPreferredNodes(graph, kernel_lookup, tenative_candidates);
+  std::vector<std::unique_ptr<ComputeCapability>> result;
+  for (auto& node_index : candidates) {
+    if (cpu_nodes.count(node_index) > 0) {
+      continue;
+    }
+
+    auto sub_graph = std::make_unique<IndexedSubGraph>();
+    sub_graph->nodes.push_back(node_index);
+    result.emplace_back(std::make_unique<ComputeCapability>(std::move(sub_graph)));
+  }
+  return result;
 }
 
 std::shared_ptr<KernelRegistry> JsExecutionProvider::GetKernelRegistry() const {
diff --git a/onnxruntime/core/providers/js/js_execution_provider.h b/onnxruntime/core/providers/js/js_execution_provider.h
index 091aa2904604a..39d43498c0717 100644
--- a/onnxruntime/core/providers/js/js_execution_provider.h
+++ b/onnxruntime/core/providers/js/js_execution_provider.h
@@ -19,12 +19,21 @@ KernelCreateInfo BuildKernelCreateInfo();
 
 }  // namespace js
 
-// placeholder for future use. no options currently
 struct JsExecutionProviderInfo {
-  JsExecutionProviderInfo() = default;
-
   JsExecutionProviderInfo(const ProviderOptions& po) {
+    auto it = po.find("preferred_layout");
+    if (it != po.end()) {
+      auto& value = it->second;
+      if (value == "NCHW") {
+        data_layout = DataLayout::NCHW;
+      } else if (value == "NHWC") {
+        data_layout = DataLayout::NHWC;
+      }
+    }
   }
+
+  // JSEP default preferred layout is NHWC
+  DataLayout data_layout = DataLayout::NHWC;
 };
 
 class JsExecutionProvider : public IExecutionProvider {
@@ -39,7 +48,7 @@ class JsExecutionProvider : public IExecutionProvider {
   std::shared_ptr<KernelRegistry> GetKernelRegistry() const override;
   std::unique_ptr<onnxruntime::IDataTransfer> GetDataTransfer() const override;
 
-  DataLayout GetPreferredLayout() const override { return DataLayout::NHWC; }
+  DataLayout GetPreferredLayout() const override { return preferred_data_layout_; }
 
   FusionStyle GetFusionStyle() const override { return FusionStyle::FilteredGraphViewer; }
 
@@ -48,6 +57,7 @@ class JsExecutionProvider : public IExecutionProvider {
   bool ConcurrentRunSupported() const override { return false; }
 
   std::vector<AllocatorPtr> CreatePreferredAllocators() override;
+  DataLayout preferred_data_layout_;
 };
 
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/js/js_kernel.h b/onnxruntime/core/providers/js/js_kernel.h
index 177c0a9e691ed..fdd5c7dee5bfc 100644
--- a/onnxruntime/core/providers/js/js_kernel.h
+++ b/onnxruntime/core/providers/js/js_kernel.h
@@ -196,7 +196,7 @@ class JsKernel : public OpKernel {
     }
 
     int status_code = EM_ASM_INT(
-        { return Module.jsepRunKernel($0, $1, Module.jsepSessionState); },
+        { return Module.jsepRunKernel($0, $1, Module.jsepSessionState.sessionHandle, Module.jsepSessionState.errors); },
         this, reinterpret_cast<int32_t>(p_serialized_kernel_context));
 
     LOGS_DEFAULT(VERBOSE) << "outputs = " << context->OutputCount() << ". Y.data="
diff --git a/onnxruntime/core/providers/js/operators/batch_norm.cc b/onnxruntime/core/providers/js/operators/batch_norm.cc
new file mode 100644
index 0000000000000..e18ad835792f7
--- /dev/null
+++ b/onnxruntime/core/providers/js/operators/batch_norm.cc
@@ -0,0 +1,32 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "batch_norm.h"
+
+namespace onnxruntime {
+namespace js {
+
+#define REGISTER_BATCHNORM_KERNEL(OP_TYPE, DOMAIN, KERNEL_CLASS)                         \
+  ONNX_OPERATOR_VERSIONED_KERNEL_EX(                                                     \
+      OP_TYPE, DOMAIN, 7, 8, kJsExecutionProvider,                                       \
+      KernelDefBuilder().TypeConstraint("T", JsepSupportedFloatTypes()), KERNEL_CLASS);  \
+  ONNX_OPERATOR_VERSIONED_KERNEL_EX(                                                     \
+      OP_TYPE, DOMAIN, 9, 13, kJsExecutionProvider,                                      \
+      KernelDefBuilder().TypeConstraint("T", JsepSupportedFloatTypes()), KERNEL_CLASS);  \
+  ONNX_OPERATOR_VERSIONED_KERNEL_EX(OP_TYPE, DOMAIN, 14, 14, kJsExecutionProvider,       \
+                                    KernelDefBuilder()                                   \
+                                        .TypeConstraint("T", JsepSupportedFloatTypes())  \
+                                        .TypeConstraint("U", JsepSupportedFloatTypes()), \
+                                    KERNEL_CLASS);                                       \
+  ONNX_OPERATOR_KERNEL_EX(OP_TYPE, DOMAIN, 15, kJsExecutionProvider,                     \
+                          KernelDefBuilder()                                             \
+                              .TypeConstraint("T", JsepSupportedFloatTypes())            \
+                              .TypeConstraint("T1", JsepSupportedFloatTypes())           \
+                              .TypeConstraint("T2", JsepSupportedFloatTypes()),          \
+                          KERNEL_CLASS);
+
+REGISTER_BATCHNORM_KERNEL(BatchNormalization, kMSInternalNHWCDomain, BatchNorm<true>);
+REGISTER_BATCHNORM_KERNEL(BatchNormalization, kOnnxDomain, BatchNorm<false>);
+
+}  // namespace js
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/js/operators/batch_norm.h b/onnxruntime/core/providers/js/operators/batch_norm.h
new file mode 100644
index 0000000000000..bb987a8aeab44
--- /dev/null
+++ b/onnxruntime/core/providers/js/operators/batch_norm.h
@@ -0,0 +1,37 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "core/providers/js/js_kernel.h"
+
+namespace onnxruntime {
+namespace js {
+
+template <bool is_channels_last>
+class BatchNorm final : public JsKernel {
+ public:
+  explicit BatchNorm(const OpKernelInfo& info) : JsKernel(info) {
+    float epsilon = info.GetAttrOrDefault<float>("epsilon", 1e-5);
+    float momentum = info.GetAttrOrDefault<float>("momentum", 0.9);
+    int64_t spatial = info.GetAttrOrDefault<int64_t>("spatial", 1);
+
+    const auto& node = info.node();
+    int opset = node.SinceVersion();
+    int64_t training_mode = opset <= 9 ? info.GetOutputCount() > 1 : info.GetAttrOrDefault<int64_t>("training_mode", 0);
+
+    JSEP_INIT_KERNEL_ATTRIBUTE(BatchNormalization, ({
+                                 "epsilon" : $1,
+                                 "momentum" : $2,
+                                 "spatial" : !!$4,
+                                 "trainingMode" : !!$3,
+                                 "format" : $5 ? "NHWC" : "NCHW",
+                               }),
+                               static_cast<float>(epsilon), static_cast<float>(momentum),
+                               static_cast<int32_t>(training_mode), static_cast<int32_t>(spatial),
+                               static_cast<int32_t>(is_channels_last));
+  }
+};
+
+}  // namespace js
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/js/operators/cast.cc b/onnxruntime/core/providers/js/operators/cast.cc
index f05e1eac4329c..9b6ac6d7e253b 100644
--- a/onnxruntime/core/providers/js/operators/cast.cc
+++ b/onnxruntime/core/providers/js/operators/cast.cc
@@ -14,8 +14,7 @@ const std::vector<MLDataType>& CastOpTypeConstraints() {
   // https://gpuweb.github.io/gpuweb/wgsl/#plain-types-section
   //
   static std::vector<MLDataType> types{
-      // TODO(fs-eire): support f16 when it's ready
-      // DataTypeImpl::GetTensorType<MLFloat16>(),
+      DataTypeImpl::GetTensorType<MLFloat16>(),
       DataTypeImpl::GetTensorType<float>(),
       DataTypeImpl::GetTensorType<int32_t>(),
       DataTypeImpl::GetTensorType<uint32_t>(),
diff --git a/onnxruntime/core/providers/js/operators/concat.cc b/onnxruntime/core/providers/js/operators/concat.cc
index 3a6a7e1cafd7a..17c6b0466c3a5 100644
--- a/onnxruntime/core/providers/js/operators/concat.cc
+++ b/onnxruntime/core/providers/js/operators/concat.cc
@@ -12,7 +12,8 @@ ONNX_OPERATOR_VERSIONED_KERNEL_EX(
     1, 3,
     kJsExecutionProvider,
     (*KernelDefBuilder::Create())
-        .TypeConstraint("T", {DataTypeImpl::GetTensorType<float>(),
+        .TypeConstraint("T", {DataTypeImpl::GetTensorType<MLFloat16>(),
+                              DataTypeImpl::GetTensorType<float>(),
                               DataTypeImpl::GetTensorType<int32_t>()}),
     Concat);
 
@@ -22,7 +23,8 @@ ONNX_OPERATOR_VERSIONED_KERNEL_EX(
     4, 10,
     kJsExecutionProvider,
     (*KernelDefBuilder::Create())
-        .TypeConstraint("T", {DataTypeImpl::GetTensorType<float>(),
+        .TypeConstraint("T", {DataTypeImpl::GetTensorType<MLFloat16>(),
+                              DataTypeImpl::GetTensorType<float>(),
                               DataTypeImpl::GetTensorType<int32_t>()}),
     Concat);
 
@@ -32,7 +34,8 @@ ONNX_OPERATOR_VERSIONED_KERNEL_EX(
     11, 12,
     kJsExecutionProvider,
     (*KernelDefBuilder::Create())
-        .TypeConstraint("T", {DataTypeImpl::GetTensorType<float>(),
+        .TypeConstraint("T", {DataTypeImpl::GetTensorType<MLFloat16>(),
+                              DataTypeImpl::GetTensorType<float>(),
                               DataTypeImpl::GetTensorType<int32_t>()}),
     Concat);
 
@@ -42,7 +45,8 @@ ONNX_OPERATOR_KERNEL_EX(
     13,
     kJsExecutionProvider,
     (*KernelDefBuilder::Create())
-        .TypeConstraint("T", {DataTypeImpl::GetTensorType<float>(),
+        .TypeConstraint("T", {DataTypeImpl::GetTensorType<MLFloat16>(),
+                              DataTypeImpl::GetTensorType<float>(),
                               DataTypeImpl::GetTensorType<int32_t>()}),
     Concat);
 
diff --git a/onnxruntime/core/providers/js/operators/conv.cc b/onnxruntime/core/providers/js/operators/conv.cc
index c7c9f7f7c3f0e..474fd260880ce 100644
--- a/onnxruntime/core/providers/js/operators/conv.cc
+++ b/onnxruntime/core/providers/js/operators/conv.cc
@@ -9,33 +9,36 @@
 namespace onnxruntime {
 namespace js {
 
-#define REGISTER_KERNEL_TYPED(T)                                                           \
-  ONNX_OPERATOR_TYPED_KERNEL_EX(                                                           \
-      Conv,                                                                                \
-      kMSInternalNHWCDomain,                                                               \
-      11,                                                                                  \
-      T,                                                                                   \
-      kJsExecutionProvider,                                                                \
-      (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
-      Conv<T, true>);                                                                      \
-  ONNX_OPERATOR_TYPED_KERNEL_EX(                                                           \
-      Conv,                                                                                \
-      kOnnxDomain,                                                                         \
-      11,                                                                                  \
-      T,                                                                                   \
-      kJsExecutionProvider,                                                                \
-      (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
-      Conv<T, false>);                                                                     \
-  ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(                                                 \
-      Conv,                                                                                \
-      kOnnxDomain,                                                                         \
-      1, 10,                                                                               \
-      T,                                                                                   \
-      kJsExecutionProvider,                                                                \
-      (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
-      Conv<T, false>);
+ONNX_OPERATOR_KERNEL_EX(
+    Conv,
+    kMSInternalNHWCDomain,
+    11,
+    kJsExecutionProvider,
+    (*KernelDefBuilder::Create()).TypeConstraint("T", JsepSupportedFloatTypes()),
+    Conv<true>);
 
-REGISTER_KERNEL_TYPED(float)
+ONNX_OPERATOR_KERNEL_EX(
+    Conv,
+    kOnnxDomain,
+    11,
+    kJsExecutionProvider,
+    (*KernelDefBuilder::Create()).TypeConstraint("T", JsepSupportedFloatTypes()),
+    Conv<false>);
+
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(
+    Conv,
+    kMSInternalNHWCDomain,
+    1, 10,
+    kJsExecutionProvider,
+    (*KernelDefBuilder::Create()).TypeConstraint("T", JsepSupportedFloatTypes()),
+    Conv<true>);
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(
+    Conv,
+    kOnnxDomain,
+    1, 10,
+    kJsExecutionProvider,
+    (*KernelDefBuilder::Create()).TypeConstraint("T", JsepSupportedFloatTypes()),
+    Conv<false>);
 
 }  // namespace js
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/js/operators/conv.h b/onnxruntime/core/providers/js/operators/conv.h
index 22f7721276677..3a01a4aa46be4 100644
--- a/onnxruntime/core/providers/js/operators/conv.h
+++ b/onnxruntime/core/providers/js/operators/conv.h
@@ -3,23 +3,42 @@
 
 #pragma once
 
+#include <vector>
+#include <string>
+
 #include "core/providers/js/js_kernel.h"
 #include "core/providers/cpu/nn/conv_attributes.h"
 
 namespace onnxruntime {
 namespace js {
 
-template <typename T, bool is_channels_last>
-class Conv : public JsKernel {
+class ConvBase : public JsKernel {
  public:
-  Conv(const OpKernelInfo& info) : JsKernel(info), conv_attrs_(info), w_is_const_(false) {
+  ConvBase(const OpKernelInfo& info, bool is_channels_last, bool is_fused_conv) : JsKernel(info),
+                                                                                  conv_attrs_(info),
+                                                                                  w_is_const_(false) {
+    std::vector<float> activation_params;
     TensorShapeVector kernel_shape;
+    const size_t pads_vec_size = conv_attrs_.pads.size() == 0 ? 4 : conv_attrs_.pads.size();
+    std::vector<int32_t> local_pads(pads_vec_size, 0);
+    for (size_t i = 0; i < conv_attrs_.pads.size() && i < pads_vec_size; ++i) {
+      local_pads[i] = gsl::narrow_cast<int32_t>(conv_attrs_.pads[i]);
+    }
+
     if (conv_attrs_.kernel_shape_specified) {
       ORT_ENFORCE(info.GetAttrs("kernel_shape", kernel_shape).IsOK());
     }
-
+    if (is_fused_conv) {
+      ORT_THROW_IF_ERROR(info.GetAttr<std::string>("activation", &conv_attrs_.activation));
+      ORT_ENFORCE(info.GetAttrs<float>("activation_params", activation_params).IsOK());
+    } else {
+      conv_attrs_.activation = info.GetAttrOrDefault<std::string>("activation", "");
+      activation_params = info.GetAttrsOrDefault<float>("activation_params", activation_params);
+    }
+    const auto* activation_params_ptr = activation_params.size() > 0 ? activation_params.data() : nullptr;
     int64_t channels_last = is_channels_last ? 1 : info.GetAttrOrDefault<int64_t>("channels_last", 0);
-
+    auto kernel_shape_0 = conv_attrs_.kernel_shape_specified && kernel_shape.size() > 0 ? kernel_shape[0] : 0;
+    auto kernel_shape_1 = conv_attrs_.kernel_shape_specified && kernel_shape.size() > 1 ? kernel_shape[1] : 0;
     // currently only support Conv 1D/2D. TODO: support Conv3D and other
     if (conv_attrs_.dilations.size() == 1 ||
         (conv_attrs_.kernel_shape_specified && kernel_shape.size() == 1) ||
@@ -30,44 +49,52 @@ class Conv : public JsKernel {
                                    "dilations" : [$2],
                                    "group" : $3,
                                    "kernel_shape" : [$4],
-                                   "pads" : [ $5, $6 ],
+                                   "pads" : $5 ? Array.from(HEAP32.subarray($6, $6 + $5)) : [],
                                    "strides" : [$7],
-                                   "w_is_const" : () JS_ARROW(!!HEAP8[$9])
+                                   "w_is_const" : () JS_ARROW(!!HEAP8[$9]),
+                                   "activation" : UTF8ToString($10),
+                                   "activation_params" : $11 ? Array.from(HEAPF32.subarray($12, $12 + $11)) : []
                                  }),
                                  static_cast<int32_t>(conv_attrs_.auto_pad),
                                  static_cast<int32_t>(conv_attrs_.dilations.size() > 0 ? conv_attrs_.dilations[0] : 0),
                                  static_cast<int32_t>(conv_attrs_.group),
-                                 static_cast<int32_t>(conv_attrs_.kernel_shape_specified && kernel_shape.size() > 0 ? kernel_shape[0] : 0),
-                                 static_cast<int32_t>(conv_attrs_.pads.size() > 0 ? conv_attrs_.pads[0] : 0),
-                                 static_cast<int32_t>(conv_attrs_.pads.size() > 1 ? conv_attrs_.pads[1] : 0),
+                                 static_cast<int32_t>(kernel_shape_0),
+                                 static_cast<int32_t>(local_pads.size()),
+                                 reinterpret_cast<int32_t>(local_pads.size() > 0 ? local_pads.data() : nullptr) >> 2,
                                  static_cast<int32_t>(conv_attrs_.strides.size() > 0 ? conv_attrs_.strides[0] : 0),
                                  static_cast<int32_t>(channels_last),
-                                 reinterpret_cast<int32_t>(&w_is_const_));
+                                 reinterpret_cast<int32_t>(&w_is_const_),
+                                 conv_attrs_.activation.c_str(),
+                                 activation_params.size(),
+                                 reinterpret_cast<int32_t>(activation_params_ptr) >> 2);
     } else {
       JSEP_INIT_KERNEL_ATTRIBUTE(Conv, ({
-                                   "format" : $13 ? "NHWC" : "NCHW",
+                                   "format" : $11 ? "NHWC" : "NCHW",
                                    "auto_pad" : $1,
                                    "dilations" : [ $2, $3 ],
                                    "group" : $4,
                                    "kernel_shape" : [ $5, $6 ],
-                                   "pads" : [ $7, $8, $9, $10 ],
-                                   "strides" : [ $11, $12 ],
-                                   "w_is_const" : () JS_ARROW(!!HEAP8[$14])
+                                   "pads" : $7 ? Array.from(HEAP32.subarray($8, $8 + $7)) : [],
+                                   "strides" : [ $9, $10 ],
+                                   "w_is_const" : () JS_ARROW(!!HEAP8[$12]),
+                                   "activation" : UTF8ToString($13),
+                                   "activation_params" : $14 ? Array.from(HEAPF32.subarray($15, $15 + $14)) : []
                                  }),
                                  static_cast<int32_t>(conv_attrs_.auto_pad),
                                  static_cast<int32_t>(conv_attrs_.dilations.size() > 0 ? conv_attrs_.dilations[0] : 0),
                                  static_cast<int32_t>(conv_attrs_.dilations.size() > 1 ? conv_attrs_.dilations[1] : 0),
                                  static_cast<int32_t>(conv_attrs_.group),
-                                 static_cast<int32_t>(conv_attrs_.kernel_shape_specified && kernel_shape.size() > 0 ? kernel_shape[0] : 0),
-                                 static_cast<int32_t>(conv_attrs_.kernel_shape_specified && kernel_shape.size() > 1 ? kernel_shape[1] : 0),
-                                 static_cast<int32_t>(conv_attrs_.pads.size() > 0 ? conv_attrs_.pads[0] : 0),
-                                 static_cast<int32_t>(conv_attrs_.pads.size() > 1 ? conv_attrs_.pads[1] : 0),
-                                 static_cast<int32_t>(conv_attrs_.pads.size() > 2 ? conv_attrs_.pads[2] : 0),
-                                 static_cast<int32_t>(conv_attrs_.pads.size() > 3 ? conv_attrs_.pads[3] : 0),
+                                 static_cast<int32_t>(kernel_shape_0),
+                                 static_cast<int32_t>(kernel_shape_1),
+                                 static_cast<int32_t>(local_pads.size()),
+                                 reinterpret_cast<int32_t>(local_pads.size() > 0 ? local_pads.data() : nullptr) >> 2,
                                  static_cast<int32_t>(conv_attrs_.strides.size() > 0 ? conv_attrs_.strides[0] : 0),
                                  static_cast<int32_t>(conv_attrs_.strides.size() > 1 ? conv_attrs_.strides[1] : 0),
                                  static_cast<int32_t>(channels_last),
-                                 reinterpret_cast<int32_t>(&w_is_const_));
+                                 reinterpret_cast<int32_t>(&w_is_const_),
+                                 conv_attrs_.activation.c_str(),
+                                 activation_params.size(),
+                                 reinterpret_cast<int32_t>(activation_params_ptr) >> 2);
     }
   }
 
@@ -94,5 +121,12 @@ class Conv : public JsKernel {
   // Tensor w_transposed_;
 };
 
+template <bool is_channels_last, bool is_fused_conv = false>
+class Conv : public ConvBase {
+ public:
+  explicit Conv(const OpKernelInfo& info) : ConvBase(info, is_channels_last, is_fused_conv) {
+  }
+};
+
 }  // namespace js
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/js/operators/conv_transpose.cc b/onnxruntime/core/providers/js/operators/conv_transpose.cc
index 1a2fc99eada6a..2aaf438f30d4d 100644
--- a/onnxruntime/core/providers/js/operators/conv_transpose.cc
+++ b/onnxruntime/core/providers/js/operators/conv_transpose.cc
@@ -7,33 +7,37 @@
 #include "conv_transpose.h"
 namespace onnxruntime {
 namespace js {
-#define REGISTER_KERNEL_TYPED(T)                                                           \
-  ONNX_OPERATOR_TYPED_KERNEL_EX(                                                           \
-      ConvTranspose,                                                                       \
-      kMSInternalNHWCDomain,                                                               \
-      11,                                                                                  \
-      T,                                                                                   \
-      kJsExecutionProvider,                                                                \
-      (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
-      ConvTranspose<T, true>);                                                             \
-  ONNX_OPERATOR_TYPED_KERNEL_EX(                                                           \
-      ConvTranspose,                                                                       \
-      kOnnxDomain,                                                                         \
-      11,                                                                                  \
-      T,                                                                                   \
-      kJsExecutionProvider,                                                                \
-      (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
-      ConvTranspose<T, false>);                                                            \
-  ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(                                                 \
-      ConvTranspose,                                                                       \
-      kOnnxDomain,                                                                         \
-      1, 10,                                                                               \
-      T,                                                                                   \
-      kJsExecutionProvider,                                                                \
-      (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
-      ConvTranspose<T, false>);
 
-REGISTER_KERNEL_TYPED(float)
+ONNX_OPERATOR_KERNEL_EX(
+    ConvTranspose,
+    kMSInternalNHWCDomain,
+    11,
+    kJsExecutionProvider,
+    (*KernelDefBuilder::Create()).TypeConstraint("T", JsepSupportedFloatTypes()),
+    ConvTranspose<true>);
+
+ONNX_OPERATOR_KERNEL_EX(
+    ConvTranspose,
+    kOnnxDomain,
+    11,
+    kJsExecutionProvider,
+    (*KernelDefBuilder::Create()).TypeConstraint("T", JsepSupportedFloatTypes()),
+    ConvTranspose<false>);
+
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(
+    ConvTranspose,
+    kMSInternalNHWCDomain,
+    1, 10,
+    kJsExecutionProvider,
+    (*KernelDefBuilder::Create()).TypeConstraint("T", JsepSupportedFloatTypes()),
+    ConvTranspose<true>);
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(
+    ConvTranspose,
+    kOnnxDomain,
+    1, 10,
+    kJsExecutionProvider,
+    (*KernelDefBuilder::Create()).TypeConstraint("T", JsepSupportedFloatTypes()),
+    ConvTranspose<false>);
 
 }  // namespace js
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/js/operators/conv_transpose.h b/onnxruntime/core/providers/js/operators/conv_transpose.h
index a5aeae8646373..5d30dc851e00f 100644
--- a/onnxruntime/core/providers/js/operators/conv_transpose.h
+++ b/onnxruntime/core/providers/js/operators/conv_transpose.h
@@ -4,26 +4,45 @@
 #pragma once
 
 #include <algorithm>
+#include <string>
 #include "core/common/gsl.h"
 #include "core/providers/cpu/nn/conv_transpose_attributes.h"
 #include "core/providers/js/js_kernel.h"
 namespace onnxruntime {
 namespace js {
-template <typename T, bool is_channels_last>
+template <bool is_channels_last, bool is_fused_convtranspose = false>
 class ConvTranspose : public JsKernel {
  public:
   ConvTranspose(const OpKernelInfo& info) : JsKernel(info), conv_transpose_attrs_(info), w_is_const_(false) {
     TensorShapeVector kernel_shape;
+    if (is_fused_convtranspose) {
+      ORT_THROW_IF_ERROR(info.GetAttr<std::string>("activation", &conv_transpose_attrs_.activation));
+    } else {
+      conv_transpose_attrs_.activation = info.GetAttrOrDefault<std::string>("activation", "");
+    }
+
     if (conv_transpose_attrs_.kernel_shape_specified) {
       ORT_ENFORCE(info.GetAttrs("kernel_shape", kernel_shape).IsOK());
     }
-
     int64_t channels_last = is_channels_last ? 1 : info.GetAttrOrDefault<int64_t>("channels_last", 0);
+    std::vector<int32_t> local_output_shape(conv_transpose_attrs_.output_shape.begin(),
+                                            conv_transpose_attrs_.output_shape.end());
+    std::vector<int32_t> local_output_padding(conv_transpose_attrs_.output_padding.begin(),
+                                              conv_transpose_attrs_.output_padding.end());
+    const auto* local_output_padding_ptr =
+        local_output_padding.size() > 0 ? local_output_padding.data() : nullptr;
+    const auto* local_output_shape_ptr =
+        local_output_shape.size() > 0 ? local_output_shape.data() : nullptr;
 
     // currently only support Conv 1D/2D. TODO: support Conv3D and other
     if (conv_transpose_attrs_.dilations.size() == 1 ||
         (conv_transpose_attrs_.kernel_shape_specified && kernel_shape.size() == 1) ||
         conv_transpose_attrs_.strides.size() == 1) {
+      auto dilations = conv_transpose_attrs_.dilations.size() > 0 ? conv_transpose_attrs_.dilations[0] : 0;
+      auto kernel_shape_0 = conv_transpose_attrs_.kernel_shape_specified && kernel_shape.size() > 0 ? kernel_shape[0] : 0;
+      auto pads_0 = conv_transpose_attrs_.pads.size() > 0 ? conv_transpose_attrs_.pads[0] : 0;
+      auto pads_1 = conv_transpose_attrs_.pads.size() > 1 ? conv_transpose_attrs_.pads[1] : 0;
+      auto strides = conv_transpose_attrs_.strides.size() > 0 ? conv_transpose_attrs_.strides[0] : 0;
       JSEP_INIT_KERNEL_ATTRIBUTE(ConvTranspose, ({
                                    "format" : $8 ? "NHWC" : "NCHW",
                                    "autoPad" : $1,
@@ -34,21 +53,23 @@ class ConvTranspose : public JsKernel {
                                    "strides" : [$7],
                                    "wIsConst" : () JS_ARROW(!!HEAP8[$9]),
                                    "outputPadding" : $10 ? Array.from(HEAP32.subarray($11, $11 + $10)) : [],
-                                   "outputShape" : $12 ? Array.from(HEAP32.subarray($13, $13 + $12)) : []
+                                   "outputShape" : $12 ? Array.from(HEAP32.subarray($13, $13 + $12)) : [],
+                                   "activation" : UTF8ToString($14)
                                  }),
                                  static_cast<int32_t>(conv_transpose_attrs_.auto_pad),
-                                 static_cast<int32_t>(conv_transpose_attrs_.dilations.size() > 0 ? conv_transpose_attrs_.dilations[0] : 0),
+                                 static_cast<int32_t>(dilations),
                                  static_cast<int32_t>(conv_transpose_attrs_.group),
-                                 static_cast<int32_t>(conv_transpose_attrs_.kernel_shape_specified && kernel_shape.size() > 0) ? kernel_shape[0] : 0,
-                                 static_cast<int32_t>(conv_transpose_attrs_.pads.size()),
-                                 static_cast<int32_t>(conv_transpose_attrs_.pads.size() > 1) ? conv_transpose_attrs_.pads[1] : 0,
-                                 static_cast<int32_t>(conv_transpose_attrs_.strides.size() > 0) ? conv_transpose_attrs_.strides[0] : 0,
+                                 static_cast<int32_t>(kernel_shape_0),
+                                 static_cast<int32_t>(pads_0),
+                                 static_cast<int32_t>(pads_1),
+                                 static_cast<int32_t>(strides),
                                  static_cast<int32_t>(channels_last),
                                  reinterpret_cast<int32_t>(&w_is_const_),
-                                 gsl::narrow_cast<int32_t>(conv_transpose_attrs_.output_shape.size()),
-                                 reinterpret_cast<int32_t>(conv_transpose_attrs_.output_padding.size() > 0 ? conv_transpose_attrs_.output_padding.data() : nullptr) >> 2,
-                                 gsl::narrow_cast<int32_t>(conv_transpose_attrs_.output_shape.size()),
-                                 reinterpret_cast<int32_t>(conv_transpose_attrs_.output_shape.size() > 0 ? conv_transpose_attrs_.output_shape.data() : nullptr) >> 2);
+                                 gsl::narrow_cast<int32_t>(local_output_padding.size()),
+                                 reinterpret_cast<int32_t>(local_output_padding_ptr) >> 2,
+                                 gsl::narrow_cast<int32_t>(local_output_shape.size()),
+                                 reinterpret_cast<int32_t>(local_output_shape_ptr) >> 2,
+                                 conv_transpose_attrs_.activation.c_str());
     } else {
       constexpr size_t pads_vec_size = 4;
       constexpr size_t strides_vec_size = 2;
@@ -59,8 +80,6 @@ class ConvTranspose : public JsKernel {
       std::vector<int32_t> local_strides(strides_vec_size, 0);
       std::vector<int32_t> local_dilations(dialations_vec_size, 0);
       std::vector<int32_t> local_kernel_shape;
-      std::vector<int32_t> local_output_shape(conv_transpose_attrs_.output_shape.begin(), conv_transpose_attrs_.output_shape.end());
-      std::vector<int32_t> local_output_padding(conv_transpose_attrs_.output_padding.begin(), conv_transpose_attrs_.output_padding.end());
       if (conv_transpose_attrs_.kernel_shape_specified) {
         for (size_t i = 0; i < kernel_shape.size() && i < kernel_shape_vec_size; ++i) {
           local_kernel_shape.push_back(gsl::narrow_cast<int32_t>(kernel_shape[i]));
@@ -91,7 +110,8 @@ class ConvTranspose : public JsKernel {
                                    "strides" : Array.from(HEAP32.subarray($6, $6 + /* strides_vec_size */ 2)),
                                    "wIsConst" : () JS_ARROW(!!HEAP8[$8]),
                                    "outputPadding" : ($9 > 0) ? Array.from(HEAP32.subarray($10, $10 + $9)) : [],
-                                   "outputShape" : ($11 > 0) ? Array.from(HEAP32.subarray($12, $12 + $11)) : []
+                                   "outputShape" : ($11 > 0) ? Array.from(HEAP32.subarray($12, $12 + $11)) : [],
+                                   "activation" : UTF8ToString($13)
                                  }),
                                  static_cast<int32_t>(conv_transpose_attrs_.auto_pad),
                                  reinterpret_cast<int32_t>(local_dilations.data()) >> 2,
@@ -102,12 +122,30 @@ class ConvTranspose : public JsKernel {
                                  static_cast<int32_t>(channels_last),
                                  reinterpret_cast<int32_t>(&w_is_const_),
                                  gsl::narrow_cast<int32_t>(local_output_padding.size()),
-                                 reinterpret_cast<int32_t>(local_output_padding.size() > 0 ? local_output_padding.data() : nullptr) >> 2,
+                                 reinterpret_cast<int32_t>(local_output_padding_ptr) >> 2,
                                  gsl::narrow_cast<int32_t>(local_output_shape.size()),
-                                 reinterpret_cast<int32_t>(local_output_shape.size() > 0 ? local_output_shape.data() : nullptr) >> 2);
+                                 reinterpret_cast<int32_t>(local_output_shape_ptr) >> 2,
+                                 conv_transpose_attrs_.activation.c_str());
     }
   }
 
+  Status PrePack(const Tensor& tensor, int input_idx, AllocatorPtr alloc,
+                 /*out*/ bool& is_packed,
+                 /*out*/ PrePackedWeights* /* prepacked_weights */) override {
+    is_packed = false;
+
+    if (input_idx == 1) {
+      // Only handle the common case of conv2D
+      if (tensor.Shape().NumDimensions() != 4 || tensor.SizeInBytes() == 0) {
+        return Status::OK();
+      }
+
+      w_is_const_ = true;
+    }
+
+    return Status::OK();
+  }
+
  protected:
   ConvTransposeAttributes conv_transpose_attrs_;
   bool w_is_const_;
diff --git a/onnxruntime/core/providers/js/operators/instance_norm.cc b/onnxruntime/core/providers/js/operators/instance_norm.cc
index 9d674766a866d..b8e67a69b24d3 100644
--- a/onnxruntime/core/providers/js/operators/instance_norm.cc
+++ b/onnxruntime/core/providers/js/operators/instance_norm.cc
@@ -6,18 +6,17 @@
 namespace onnxruntime {
 namespace js {
 
-#define INSTANCE_NORM_KERNEL(op_name, domain, data_type, since_version, is_channels_last)          \
-  ONNX_OPERATOR_TYPED_KERNEL_EX(                                                                   \
-      op_name,                                                                                     \
-      domain,                                                                                      \
-      since_version,                                                                               \
-      data_type,                                                                                   \
-      kJsExecutionProvider,                                                                        \
-      (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<data_type>()), \
+#define INSTANCE_NORM_KERNEL(op_name, domain, since_version, is_channels_last)      \
+  ONNX_OPERATOR_KERNEL_EX(                                                          \
+      op_name,                                                                      \
+      domain,                                                                       \
+      since_version,                                                                \
+      kJsExecutionProvider,                                                         \
+      (*KernelDefBuilder::Create()).TypeConstraint("T", JsepSupportedFloatTypes()), \
       InstanceNorm<is_channels_last>);
 
-INSTANCE_NORM_KERNEL(InstanceNormalization, kOnnxDomain, float, 6, false)
-INSTANCE_NORM_KERNEL(InstanceNormalization, kMSInternalNHWCDomain, float, 6, true)
+INSTANCE_NORM_KERNEL(InstanceNormalization, kOnnxDomain, 6, false)
+INSTANCE_NORM_KERNEL(InstanceNormalization, kMSInternalNHWCDomain, 6, true)
 
 }  // namespace js
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/js/operators/layer_norm.cc b/onnxruntime/core/providers/js/operators/layer_norm.cc
index 46baedf5ac7af..9ba379ed09247 100644
--- a/onnxruntime/core/providers/js/operators/layer_norm.cc
+++ b/onnxruntime/core/providers/js/operators/layer_norm.cc
@@ -8,21 +8,15 @@
 namespace onnxruntime {
 namespace js {
 
-#define REGISTER_KERNEL_TYPED(T)                                      \
-  ONNX_OPERATOR_TYPED_KERNEL_EX(                                      \
-      LayerNormalization,                                             \
-      kOnnxDomain,                                                    \
-      17,                                                             \
-      T,                                                              \
-      kJsExecutionProvider,                                           \
-      (*KernelDefBuilder::Create())                                   \
-          .TypeConstraint("T", DataTypeImpl::GetTensorType<T>())      \
-          .TypeConstraint("U", DataTypeImpl::GetTensorType<float>()), \
-      LayerNorm<T, float>);
-
-REGISTER_KERNEL_TYPED(float)
-// REGISTER_KERNEL_TYPED(double)
-// REGISTER_KERNEL_TYPED(MLFloat16)
+ONNX_OPERATOR_KERNEL_EX(
+    LayerNormalization,
+    kOnnxDomain,
+    17,
+    kJsExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .TypeConstraint("T", JsepSupportedFloatTypes())
+        .TypeConstraint("U", JsepSupportedFloatTypes()),
+    LayerNorm);
 
 }  // namespace js
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/js/operators/layer_norm.h b/onnxruntime/core/providers/js/operators/layer_norm.h
index 040fb256ff6e2..791329f3e880d 100644
--- a/onnxruntime/core/providers/js/operators/layer_norm.h
+++ b/onnxruntime/core/providers/js/operators/layer_norm.h
@@ -8,7 +8,6 @@
 namespace onnxruntime {
 namespace js {
 
-template <typename T, typename U>
 class LayerNorm : public JsKernel {
  public:
   LayerNorm(const OpKernelInfo& info) : JsKernel(info) {
diff --git a/onnxruntime/core/providers/js/operators/matmul.cc b/onnxruntime/core/providers/js/operators/matmul.cc
index ddfbb454def07..6e6f906f7b42c 100644
--- a/onnxruntime/core/providers/js/operators/matmul.cc
+++ b/onnxruntime/core/providers/js/operators/matmul.cc
@@ -9,11 +9,11 @@ namespace js {
 JSEP_KERNEL_IMPL(MatMul, MatMul)
 
 ONNX_OPERATOR_VERSIONED_KERNEL_EX(MatMul, kOnnxDomain, 1, 12, kJsExecutionProvider,
-                                  KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
+                                  KernelDefBuilder().TypeConstraint("T", JsepSupportedFloatTypes()),
                                   MatMul);
 
 ONNX_OPERATOR_KERNEL_EX(MatMul, kOnnxDomain, 13, kJsExecutionProvider,
-                        KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
+                        KernelDefBuilder().TypeConstraint("T", JsepSupportedFloatTypes()),
                         MatMul);
 
 }  // namespace js
diff --git a/onnxruntime/core/providers/js/operators/pool.cc b/onnxruntime/core/providers/js/operators/pool.cc
index 7fdb4e5d114ea..7df1e483f52a1 100644
--- a/onnxruntime/core/providers/js/operators/pool.cc
+++ b/onnxruntime/core/providers/js/operators/pool.cc
@@ -52,15 +52,20 @@ namespace js {
       Pool<pool_type, is_channels_last>);
 
 POOLING_KERNEL_VERSIONED(AveragePool, kOnnxDomain, false, AveragePool, 7, 9)
+POOLING_KERNEL_VERSIONED(AveragePool, kMSInternalNHWCDomain, true, AveragePool, 7, 9)
 POOLING_KERNEL_VERSIONED(AveragePool, kOnnxDomain, false, AveragePool, 10, 10)
+POOLING_KERNEL_VERSIONED(AveragePool, kMSInternalNHWCDomain, true, AveragePool, 10, 10)
 POOLING_KERNEL(AveragePool, kOnnxDomain, false, AveragePool, 11)
 POOLING_KERNEL(AveragePool, kMSInternalNHWCDomain, true, AveragePool, 11)
 POOLING_KERNEL(GlobalAveragePool, kOnnxDomain, false, AveragePool, 1)
 POOLING_KERNEL(GlobalAveragePool, kMSInternalNHWCDomain, true, AveragePool, 1)
 
 POOLING_KERNEL_VERSIONED(MaxPool, kOnnxDomain, false, MaxPool<1>, 1, 7)
+POOLING_KERNEL_VERSIONED(MaxPool, kMSInternalNHWCDomain, true, MaxPool<1>, 1, 7)
 POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, kOnnxDomain, false, MaxPool<8>, 8, 9)
+POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, kMSInternalNHWCDomain, true, MaxPool<8>, 8, 9)
 POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, kOnnxDomain, false, MaxPool<8>, 10, 10)
+POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, kMSInternalNHWCDomain, true, MaxPool<8>, 10, 10)
 POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, kOnnxDomain, false, MaxPool<8>, 11, 11)
 POOLING_KERNEL_VERSIONED_WITH_INDICES(MaxPool, kMSInternalNHWCDomain, true, MaxPool<8>, 11, 11)
 POOLING_KERNEL_WITH_INDICES(MaxPool, kOnnxDomain, false, MaxPool<8>, 12)
diff --git a/onnxruntime/core/providers/js/operators/range.cc b/onnxruntime/core/providers/js/operators/range.cc
new file mode 100644
index 0000000000000..e15861f7f227a
--- /dev/null
+++ b/onnxruntime/core/providers/js/operators/range.cc
@@ -0,0 +1,22 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/providers/js/js_kernel.h"
+
+#include "range.h"
+
+namespace onnxruntime {
+namespace js {
+ONNX_OPERATOR_KERNEL_EX(
+    Range,
+    kOnnxDomain,
+    11,
+    kJsExecutionProvider,
+    KernelDefBuilder()
+        .TypeConstraint("T", {DataTypeImpl::GetTensorType<float>(), DataTypeImpl::GetTensorType<int32_t>()})
+        .InputMemoryType(OrtMemTypeCPU, 0)
+        .InputMemoryType(OrtMemTypeCPU, 1)
+        .InputMemoryType(OrtMemTypeCPU, 2),
+    Range);
+}  // namespace js
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/rocm/rocm_fwd.h b/onnxruntime/core/providers/js/operators/range.h
similarity index 57%
rename from onnxruntime/core/providers/rocm/rocm_fwd.h
rename to onnxruntime/core/providers/js/operators/range.h
index b123446fa9be1..8b32bfc3d984b 100644
--- a/onnxruntime/core/providers/rocm/rocm_fwd.h
+++ b/onnxruntime/core/providers/js/operators/range.h
@@ -3,11 +3,12 @@
 
 #pragma once
 
-#include "core/framework/op_kernel.h"
+#include "core/providers/js/js_kernel.h"
 
 namespace onnxruntime {
-namespace rocm {
-template <typename T>
-KernelCreateInfo BuildKernelCreateInfo();
-}
+namespace js {
+
+JSEP_KERNEL_IMPL(Range, Range);
+
+}  // namespace js
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/js/operators/resize.cc b/onnxruntime/core/providers/js/operators/resize.cc
index 7619c33a477aa..2514ab75dff26 100644
--- a/onnxruntime/core/providers/js/operators/resize.cc
+++ b/onnxruntime/core/providers/js/operators/resize.cc
@@ -5,15 +5,15 @@
 
 namespace onnxruntime {
 namespace js {
-#define REGISTER_RESIZE_VERSIONED_10_10_KERNEL(domain)                \
-  ONNX_OPERATOR_VERSIONED_KERNEL_EX(                                  \
-      Resize,                                                         \
-      domain,                                                         \
-      10, 10,                                                         \
-      kJsExecutionProvider,                                           \
-      (*KernelDefBuilder::Create())                                   \
-          .InputMemoryType(OrtMemTypeCPUInput, 1)                     \
-          .TypeConstraint("T", DataTypeImpl::GetTensorType<float>()), \
+#define REGISTER_RESIZE_VERSIONED_10_10_KERNEL(domain)     \
+  ONNX_OPERATOR_VERSIONED_KERNEL_EX(                       \
+      Resize,                                              \
+      domain,                                              \
+      10, 10,                                              \
+      kJsExecutionProvider,                                \
+      (*KernelDefBuilder::Create())                        \
+          .InputMemoryType(OrtMemTypeCPUInput, 1)          \
+          .TypeConstraint("T", JsepSupportedFloatTypes()), \
       Resize);
 
 #define REGISTER_RESIZE_VERSIONED_KERNEL(domain, sinceVersion, endVerion) \
@@ -26,22 +26,22 @@ namespace js {
           .InputMemoryType(OrtMemTypeCPUInput, 1)                         \
           .InputMemoryType(OrtMemTypeCPUInput, 2)                         \
           .InputMemoryType(OrtMemTypeCPUInput, 3)                         \
-          .TypeConstraint("T1", DataTypeImpl::GetTensorType<float>())     \
-          .TypeConstraint("T2", DataTypeImpl::GetTensorType<float>()),    \
+          .TypeConstraint("T1", JsepSupportedFloatTypes())                \
+          .TypeConstraint("T2", JsepSupportedFloatTypes()),               \
       Resize);
 
-#define REGISTER_RESIZE_KERNEL(domain, sinceVersion)                   \
-  ONNX_OPERATOR_KERNEL_EX(                                             \
-      Resize,                                                          \
-      domain,                                                          \
-      sinceVersion,                                                    \
-      kJsExecutionProvider,                                            \
-      (*KernelDefBuilder::Create())                                    \
-          .InputMemoryType(OrtMemTypeCPUInput, 1)                      \
-          .InputMemoryType(OrtMemTypeCPUInput, 2)                      \
-          .InputMemoryType(OrtMemTypeCPUInput, 3)                      \
-          .TypeConstraint("T1", DataTypeImpl::GetTensorType<float>())  \
-          .TypeConstraint("T2", DataTypeImpl::GetTensorType<float>()), \
+#define REGISTER_RESIZE_KERNEL(domain, sinceVersion)        \
+  ONNX_OPERATOR_KERNEL_EX(                                  \
+      Resize,                                               \
+      domain,                                               \
+      sinceVersion,                                         \
+      kJsExecutionProvider,                                 \
+      (*KernelDefBuilder::Create())                         \
+          .InputMemoryType(OrtMemTypeCPUInput, 1)           \
+          .InputMemoryType(OrtMemTypeCPUInput, 2)           \
+          .InputMemoryType(OrtMemTypeCPUInput, 3)           \
+          .TypeConstraint("T1", JsepSupportedFloatTypes())  \
+          .TypeConstraint("T2", JsepSupportedFloatTypes()), \
       Resize);
 
 #define REGISTER_RESIZE_KERNEL_DOMAIN(domain)       \
@@ -51,6 +51,7 @@ namespace js {
   REGISTER_RESIZE_KERNEL(domain, 19);
 
 REGISTER_RESIZE_VERSIONED_10_10_KERNEL(kOnnxDomain);
+REGISTER_RESIZE_VERSIONED_10_10_KERNEL(kMSInternalNHWCDomain);
 REGISTER_RESIZE_KERNEL_DOMAIN(kOnnxDomain);
 REGISTER_RESIZE_KERNEL_DOMAIN(kMSInternalNHWCDomain);
 
diff --git a/onnxruntime/core/providers/js/operators/unary.cc b/onnxruntime/core/providers/js/operators/unary.cc
index e9bbfabcf86bd..78563d30b0136 100644
--- a/onnxruntime/core/providers/js/operators/unary.cc
+++ b/onnxruntime/core/providers/js/operators/unary.cc
@@ -123,7 +123,7 @@ JSEP_ELEMENTWISE_TYPED_KERNEL(Not, 1, bool, Not)
 
 // activation
 
-JSEP_CLASS_IMPL_ATTRIBUTE_FLOAT_2_DEFAULT(ClipV10, ClipV10, min, 3.402823e+38f, max, -3.402823e+38f)
+JSEP_CLASS_IMPL_ATTRIBUTE_FLOAT_2_DEFAULT(ClipV10, Clip, min, 3.402823e+38f, max, -3.402823e+38f)
 JSEP_ELEMENTWISE_VERSIONED_KERNEL(Clip, 6, 10, ClipV10)
 JSEP_KERNEL_IMPL(Clip, Clip)
 ONNX_OPERATOR_VERSIONED_KERNEL_EX(Clip, kOnnxDomain, 11, 11, kJsExecutionProvider,
diff --git a/onnxruntime/core/providers/js/operators/where.cc b/onnxruntime/core/providers/js/operators/where.cc
new file mode 100644
index 0000000000000..2f8f5e275aa98
--- /dev/null
+++ b/onnxruntime/core/providers/js/operators/where.cc
@@ -0,0 +1,41 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/providers/js/js_kernel.h"
+
+namespace onnxruntime {
+namespace js {
+
+#define REG_ELEMENTWISE_KERNEL(OP_TYPE, VERSION, KERNEL_CLASS)      \
+  ONNX_OPERATOR_KERNEL_EX(                                          \
+      OP_TYPE,                                                      \
+      kOnnxDomain,                                                  \
+      VERSION,                                                      \
+      kJsExecutionProvider,                                         \
+      KernelDefBuilder()                                            \
+          .TypeConstraint("T",                                      \
+                          {DataTypeImpl::GetTensorType<float>(),    \
+                           DataTypeImpl::GetTensorType<int32_t>(),  \
+                           DataTypeImpl::GetTensorType<uint32_t>(), \
+                           DataTypeImpl::GetTensorType<bool>()}),   \
+      KERNEL_CLASS);
+
+#define REG_ELEMENTWISE_VERSIONED_KERNEL(OP_TYPE, VERSION_FROM, VERSION_TO, KERNEL_CLASS) \
+  ONNX_OPERATOR_VERSIONED_KERNEL_EX(                                                      \
+      OP_TYPE,                                                                            \
+      kOnnxDomain,                                                                        \
+      VERSION_FROM, VERSION_TO,                                                           \
+      kJsExecutionProvider,                                                               \
+      KernelDefBuilder()                                                                  \
+          .TypeConstraint("T",                                                            \
+                          {DataTypeImpl::GetTensorType<float>(),                          \
+                           DataTypeImpl::GetTensorType<int32_t>(),                        \
+                           DataTypeImpl::GetTensorType<uint32_t>(),                       \
+                           DataTypeImpl::GetTensorType<bool>()}),                         \
+      KERNEL_CLASS);
+
+JSEP_KERNEL_IMPL(Where, Where)
+REG_ELEMENTWISE_VERSIONED_KERNEL(Where, 9, 15, Where);
+REG_ELEMENTWISE_KERNEL(Where, 16, Where);
+}  // namespace js
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/migraphx/migraphx_call.cc b/onnxruntime/core/providers/migraphx/migraphx_call.cc
index cd947420b7615..5248ac2f39214 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_call.cc
+++ b/onnxruntime/core/providers/migraphx/migraphx_call.cc
@@ -1,14 +1,14 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-#include "core/providers/shared_library/provider_api.h"
 #include <unistd.h>
 #include <string.h>
 #include <miopen/miopen.h>
 #include <rocblas/rocblas.h>
-#include "migraphx_call.h"
 #include "core/common/common.h"
 #include "core/common/status.h"
+#include "core/providers/shared_library/provider_api.h"
+#include "core/providers/migraphx/migraphx_call.h"
 
 namespace onnxruntime {
 
diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc
index d2538544db60e..d1b3f19100942 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc
+++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc
@@ -1,5 +1,10 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License
+#include <fstream>
+#include <algorithm>
+#include <iterator>
+#include <unordered_map>
+#include <set>
 
 #include "core/providers/shared_library/provider_api.h"
 #define ORT_API_MANUAL_INIT
@@ -12,10 +17,6 @@
 #include "gpu_data_transfer.h"
 #include "migraphx_inc.h"
 
-#include <fstream>
-#include <algorithm>
-#include <iterator>
-
 // TODO: find a better way to share this
 #include "core/providers/rocm/rocm_stream_handle.h"
 
@@ -113,6 +114,45 @@ MIGraphXExecutionProvider::MIGraphXExecutionProvider(const MIGraphXExecutionProv
     fp16_enable_ = (std::stoi(fp16_enable_env) == 0 ? false : true);
   }
 
+  // whether int8 is enabled
+  const std::string int8_enable_env = onnxruntime::GetEnvironmentVar(migraphx_env_vars::kINT8Enable);
+  if (!int8_enable_env.empty()) {
+    int8_enable_ = (std::stoi(int8_enable_env) == 0 ? false : true);
+  }
+
+  if (int8_enable_) {
+    const std::string int8_calibration_cache_name_env =
+        onnxruntime::GetEnvironmentVar(migraphx_env_vars::kINT8CalibrationTableName);
+    if (!int8_calibration_cache_name_env.empty()) {
+      int8_calibration_cache_name_ = int8_calibration_cache_name_env;
+    }
+
+    const std::string cache_path = onnxruntime::GetEnvironmentVar(migraphx_env_vars::kCachePath);
+    if (!cache_path.empty()) {
+      calibration_cache_path_ = cache_path;
+    }
+
+    const std::string int8_use_native_migraphx_calibration_table_env =
+        onnxruntime::GetEnvironmentVar(migraphx_env_vars::kINT8UseNativeMIGraphXCalibrationTable);
+    if (!int8_use_native_migraphx_calibration_table_env.empty()) {
+      int8_use_native_migraphx_calibration_table_ =
+          (std::stoi(int8_use_native_migraphx_calibration_table_env) == 0 ? false : true);
+    }
+  }
+
+  if (int8_enable_) {
+    int8_calibration_cache_available_ = !int8_calibration_cache_name_.empty();
+  }
+
+  // Load INT8 calibration table
+  std::unordered_map<std::string, float> dynamic_range_map;
+  if (int8_enable_ && int8_calibration_cache_available_) {
+    const std::string calibration_cache_path = GetCachePath(calibration_cache_path_, int8_calibration_cache_name_);
+    if (!ReadDynamicRange(calibration_cache_path, int8_use_native_migraphx_calibration_table_, dynamic_range_map)) {
+      throw std::runtime_error("Failed to read INT8 calibration table " + calibration_cache_path);
+    }
+  }
+
   // dump unsupported ops
   const std::string dump_model_ops_env = onnxruntime::GetEnvironmentVar(migraphx_env_vars::dumpModelOps);
   if (!dump_model_ops_env.empty()) {
@@ -124,6 +164,15 @@ MIGraphXExecutionProvider::MIGraphXExecutionProvider(const MIGraphXExecutionProv
 
   MIOPEN_CALL_THROW(miopenCreate(&external_miopen_handle_));
   MIOPEN_CALL_THROW(miopenSetStream(external_miopen_handle_, stream_));
+
+  LOGS_DEFAULT(VERBOSE) << "[MIGraphX EP] MIGraphX provider options: "
+                        << "device_id: " << device_id_
+                        << ", migraphx_fp16_enable: " << fp16_enable_
+                        << ", migraphx_int8_enable: " << int8_enable_
+                        << ", dump_model_ops: " << dump_model_ops_
+                        << ", migraphx_int8_calibration_cache_name: " << int8_calibration_cache_name_
+                        << ", int8_calibration_cache_available: " << int8_calibration_cache_available_
+                        << ", use_native_migraphx_calibration_table: " << int8_use_native_migraphx_calibration_table_;
 }
 
 MIGraphXExecutionProvider::~MIGraphXExecutionProvider() {
@@ -467,7 +516,8 @@ static bool IsUnsupportedOpMode(const onnxruntime::GraphViewer& graph_viewer, co
   return false;
 }
 
-void SubgraphPostProcessing(const onnxruntime::GraphViewer& graph_viewer, std::vector<std::vector<NodeIndex>>& clusters, const logging::Logger& logger) {
+void SubgraphPostProcessing(const onnxruntime::GraphViewer& graph_viewer, std::vector<std::vector<NodeIndex>>& clusters,
+                            const logging::Logger& logger) {
   // Then check whether a subgraph should fallback to CPU
   // 1. Check whether a subgraph contains a RNN operator
   std::unordered_set<std::string> rnn_names = {"RNN", "GRU", "LSTM"};
@@ -642,7 +692,8 @@ std::unique_ptr<IndexedSubGraph> MIGraphXExecutionProvider::GetSubGraph(const st
             fused_inputs.erase(iter);
             erased.insert(output);
           } else if (erased.find(output) == erased.end()) {
-            if (std::find(graph_output_names.begin(), graph_output_names.end(), output->Name()) != graph_output_names.end()) {
+            if (std::find(graph_output_names.begin(),
+                          graph_output_names.end(), output->Name()) != graph_output_names.end()) {
               graph_outputs_to_add[output] = output_order;
             }
             fused_outputs[output] = output_order++;
@@ -660,7 +711,8 @@ std::unique_ptr<IndexedSubGraph> MIGraphXExecutionProvider::GetSubGraph(const st
         }
         // Only when output is neither in input list nor erased list, add the output to output list
         else if (erased.find(output) == erased.end()) {
-          if (std::find(graph_output_names.begin(), graph_output_names.end(), output->Name()) != graph_output_names.end()) {
+          if (std::find(graph_output_names.begin(),
+                        graph_output_names.end(), output->Name()) != graph_output_names.end()) {
             graph_outputs_to_add[output] = output_order;
           }
           fused_outputs[output] = output_order++;
@@ -733,31 +785,156 @@ static std::vector<NodeIndex>
 GetUnsupportedNodeIndices(const GraphViewer& graph_viewer,
                           /*out*/ std::unordered_set<std::string>& mgx_required_initializers,
                           const logging::Logger& logger) {
-  static std::set<std::string> mgx_supported_ops = {"Abs", "Acos", "Acosh", "Add", "And",
-                                                    "ArgMax", "ArgMin", "Asin", "Asinh", "Atan", "Atanh", "ATen", "AveragePool",
-                                                    "BatchNormalization", "Cast", "Ceil", "Celu", "Clip", "Concat", "Constant", "ConstantFill",
-                                                    "ConstantOfShape", "Conv", "ConvInteger", "ConvTranspose", "Cos", "Cosh", "CumSum",
-                                                    "DepthToSpace", "DequantizeLinear", "Div", "Dropout", "Elu", "Equal", "Erf", "Exp",
-                                                    "Expand", "EyeLike", "Flatten", "Floor", "GRU", "Gather", "GatherElements", "GatherND", "Gemm", "GlobalAveragePool",
-                                                    "GlobalMaxPool", "Greater", "GreaterOrEqual", "HardSigmoid", "HardSwish", "Identity",
-                                                    "If", "ImageScaler", "InstanceNormalization", "IsNan", "LeakyRelu", "Less", "LessOrEqual",
-                                                    "Log", "LogSoftmax", "Loop", "LpNormalization", "LRN", "LSTM", "MatMul", "MatMulInteger", "Max", "MaxPool",
-                                                    "Mean", "Min", "Mod", "Mul", "Multinomial", "Neg", "NonMaxSuppression", "NonZero", "Not",
-                                                    "OneHot", "Or", "Pad", "Pow", "PRelu", "QuantizeLinear", "RandomNormal", "RandomNormalLike",
-                                                    "RandomUniform", "RandomUniformLike", "Range", "Reciprocal", "ReduceL1", "ReduceL2",
-                                                    "ReduceLogSum", "ReduceLogSumExp", "ReduceMax", "ReduceMean", "ReduceMin", "ReduceProd",
-                                                    "ReduceSum", "ReduceSumSquare", "Relu", "Reshape", "Resize", "ReverseSequence", "RNN", "Roialign", "Round",
-                                                    "Scatter", "ScatterElements", "ScatterND", "Selu", "Shape", "Sigmoid", "Sign", "Sin", "Sinh", "Slice", "Softmax", "Softplus",
-                                                    "Softsign", "SpaceToDepth", "Split", "Sqrt", "Squeeze", "Sub", "Sum", "Tan", "Tanh",
-                                                    "ThresholdedRelu", "Tile", "TopK", "Transpose", "Trilu", "Unsqueeze", "Upsample", "Where", "Xor"};
+  static std::set<std::string> mgx_supported_ops = {"Abs",
+                                                    "Acos",
+                                                    "Acosh",
+                                                    "Add",
+                                                    "And",
+                                                    "ArgMax",
+                                                    "ArgMin",
+                                                    "Asin",
+                                                    "Asinh",
+                                                    "Atan",
+                                                    "Atanh",
+                                                    "ATen",
+                                                    "AveragePool",
+                                                    "BatchNormalization",
+                                                    "Cast",
+                                                    "Ceil",
+                                                    "Celu",
+                                                    "Clip",
+                                                    "Concat",
+                                                    "Constant",
+                                                    "ConstantFill",
+                                                    "ConstantOfShape",
+                                                    "Conv",
+                                                    "ConvInteger",
+                                                    "ConvTranspose",
+                                                    "Cos",
+                                                    "Cosh",
+                                                    "CumSum",
+                                                    "DepthToSpace",
+                                                    "DequantizeLinear",
+                                                    "Div",
+                                                    "Dropout",
+                                                    "Elu",
+                                                    "Equal",
+                                                    "Erf",
+                                                    "Exp",
+                                                    "Expand",
+                                                    "EyeLike",
+                                                    "Flatten",
+                                                    "Floor",
+                                                    "GRU",
+                                                    "Gather",
+                                                    "GatherElements",
+                                                    "GatherND",
+                                                    "Gemm",
+                                                    "GlobalAveragePool",
+                                                    "GlobalMaxPool",
+                                                    "Greater",
+                                                    "GreaterOrEqual",
+                                                    "HardSigmoid",
+                                                    "HardSwish",
+                                                    "Identity",
+                                                    "If",
+                                                    "ImageScaler",
+                                                    "InstanceNormalization",
+                                                    "IsNan",
+                                                    "LeakyRelu",
+                                                    "Less",
+                                                    "LessOrEqual",
+                                                    "Log",
+                                                    "LogSoftmax",
+                                                    "Loop",
+                                                    "LpNormalization",
+                                                    "LRN",
+                                                    "LSTM",
+                                                    "MatMul",
+                                                    "MatMulInteger",
+                                                    "Max",
+                                                    "MaxPool",
+                                                    "Mean",
+                                                    "Min",
+                                                    "Mod",
+                                                    "Mul",
+                                                    "Multinomial",
+                                                    "Neg",
+                                                    "NonMaxSuppression",
+                                                    "NonZero",
+                                                    "Not",
+                                                    "OneHot",
+                                                    "Or",
+                                                    "Pad",
+                                                    "Pow",
+                                                    "PRelu",
+                                                    "QLinearAdd",
+                                                    "QLinearConv",
+                                                    "QLinearMatMul",
+                                                    "QuantizeLinear",
+                                                    "RandomNormal",
+                                                    "RandomNormalLike",
+                                                    "RandomUniform",
+                                                    "RandomUniformLike",
+                                                    "Range",
+                                                    "Reciprocal",
+                                                    "ReduceL1",
+                                                    "ReduceL2",
+                                                    "ReduceLogSum",
+                                                    "ReduceLogSumExp",
+                                                    "ReduceMax",
+                                                    "ReduceMean",
+                                                    "ReduceMin",
+                                                    "ReduceProd",
+                                                    "ReduceSum",
+                                                    "ReduceSumSquare",
+                                                    "Relu",
+                                                    "Reshape",
+                                                    "Resize",
+                                                    "ReverseSequence",
+                                                    "RNN",
+                                                    "Roialign",
+                                                    "Round",
+                                                    "Scatter",
+                                                    "ScatterElements",
+                                                    "ScatterND",
+                                                    "Selu",
+                                                    "Shape",
+                                                    "Sigmoid",
+                                                    "Sign",
+                                                    "Sin",
+                                                    "Sinh",
+                                                    "Slice",
+                                                    "Softmax",
+                                                    "Softplus",
+                                                    "Softsign",
+                                                    "SpaceToDepth",
+                                                    "Split",
+                                                    "Sqrt",
+                                                    "Squeeze",
+                                                    "Sub",
+                                                    "Sum",
+                                                    "Tan",
+                                                    "Tanh",
+                                                    "ThresholdedRelu",
+                                                    "Tile",
+                                                    "TopK",
+                                                    "Transpose",
+                                                    "Trilu",
+                                                    "Unsqueeze",
+                                                    "Upsample",
+                                                    "Where",
+                                                    "Xor"};
   std::vector<NodeIndex> unsupported_nodes_idx;
   for (const auto& node_idx : graph_viewer.GetNodesInTopologicalOrder()) {
     if (IsNodeSupported(mgx_supported_ops, graph_viewer, node_idx, logger)) {
       // Collect inputs that are initializers
-      graph_viewer.GetNode(node_idx)->ForEachDef([&mgx_required_initializers, &graph_viewer](const onnxruntime::NodeArg& node_arg, bool is_input) {
+      graph_viewer.GetNode(node_idx)->ForEachDef([&mgx_required_initializers,
+                                                  &graph_viewer](const onnxruntime::NodeArg& node_arg, bool is_input) {
               if(is_input && graph_viewer.GetAllInitializedTensors().count(node_arg.Name())) {
                 mgx_required_initializers.insert(node_arg.Name());
-              } }, true);
+              } },
+                                                 true);
     } else {
       unsupported_nodes_idx.push_back(node_idx);
     }
@@ -770,7 +947,8 @@ GetUnsupportedNodeIndices(const GraphViewer& graph_viewer,
 // is split into 3 parts. supported_cluster + (UNsupported_node + rest_of_the_graph).
 // This functions returns vector of all supported_subgraphx by amdmigraphx
 static std::vector<std::vector<NodeIndex>>
-GetPartitionedSubgraphs(const std::vector<NodeIndex>& topological_order, const std::vector<NodeIndex>& unsupported_nodes) {
+GetPartitionedSubgraphs(const std::vector<NodeIndex>& topological_order,
+                        const std::vector<NodeIndex>& unsupported_nodes) {
   std::vector<std::vector<NodeIndex>> mgx_subgraphx;
 
   auto prev = topological_order.begin();
@@ -948,6 +1126,24 @@ Status MIGraphXExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>&
         migraphx::quantize_fp16(prog);
       }
 
+      // Read in the calibration data and map it to an migraphx paramater map for the calibration ops
+      if (int8_enable_ && int8_calibration_cache_available_) {
+        migraphx::quantize_int8_options quant_opts;
+        migraphx::program_parameters quant_params;
+
+        auto param_shapes = prog.get_parameter_shapes();
+
+        for (auto&& name : param_shapes.names()) {
+          auto dynamic_range_i = dynamic_range_map.find(name);
+          if (dynamic_range_i != dynamic_range_map.end()) {
+            quant_params.add(name, migraphx::argument(param_shapes[name], &(dynamic_range_i->second)));
+          }
+        }
+
+        quant_opts.add_calibration_data(quant_params);
+        // perform static quantization on the programs
+        migraphx::quantize_int8(prog, t_, quant_opts);
+      }
       prog.compile(t_);
       auto prog_output_shapes = prog.get_output_shapes();
       for (std::size_t i = 0; i < output_names.size(); ++i) {
@@ -967,7 +1163,8 @@ Status MIGraphXExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>&
       std::unique_ptr<MIGraphXFuncState> p = std::make_unique<MIGraphXFuncState>();
       *p = {context->allocate_func, context->release_func, context->allocator_handle, map_progs_[context->node_name],
             map_onnx_string_[context->node_name], options, t_, map_input_index_[context->node_name], &mgx_mu_,
-            map_no_input_shape_[context->node_name], fp16_enable_, dump_model_ops_};
+            map_no_input_shape_[context->node_name], fp16_enable_, int8_enable_,
+            int8_calibration_cache_available_, dynamic_range_map, dump_model_ops_};
       *state = p.release();
       return 0;
     };
@@ -982,12 +1179,15 @@ Status MIGraphXExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>&
       MIGraphXFuncState* mgx_state = reinterpret_cast<MIGraphXFuncState*>(state);
 
       std::unordered_map<std::string, std::size_t>& map_input_name_index = mgx_state->input_name_indexes;
+      std::unordered_map<std::string, float>& map_dynamic_range = mgx_state->dynamic_range_map;
       migraphx::target t = mgx_state->t;
       migraphx::program& prog = mgx_state->prog;
       std::string& onnx_string = mgx_state->onnx_string;
       migraphx::onnx_options& cmp_options = mgx_state->options;
       bool& no_input_shape = mgx_state->no_input_shape;
       bool fp16_enable = mgx_state->fp16_enable;
+      bool int8_enable = mgx_state->int8_enable;
+      bool int8_calibration_cache_available = mgx_state->int8_calibration_cache_available;
 
       // mean no program at all, so need to get the input shape info
       // from input data
@@ -1043,6 +1243,25 @@ Status MIGraphXExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>&
           migraphx::quantize_fp16(prog);
         }
 
+        // Read in the calibration data and map it to an migraphx paramater map for the calibration ops
+        if (int8_enable && int8_calibration_cache_available) {
+          migraphx::quantize_int8_options quant_opts;
+          migraphx::program_parameters quant_params;
+
+          auto param_shapes = prog.get_parameter_shapes();
+
+          for (auto&& name : param_shapes.names()) {
+            auto dynamic_range_i = map_dynamic_range.find(name);
+            if (dynamic_range_i != map_dynamic_range.end()) {
+              quant_params.add(name, migraphx::argument(param_shapes[name], &(dynamic_range_i->second)));
+            }
+          }
+
+          quant_opts.add_calibration_data(quant_params);
+          // perform static quantization on the programs
+          migraphx::quantize_int8(prog, t, quant_opts);
+        }
+
         prog.compile(t);
         mgx_state->prog = prog;
         param_shapes = prog.get_parameter_shapes();
@@ -1137,9 +1356,11 @@ Status MIGraphXExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>&
   return Status::OK();
 }
 
-void MIGraphXExecutionProvider::RegisterStreamHandlers(IStreamCommandHandleRegistry& stream_handle_registry, AllocatorMap& allocators) const {
+void MIGraphXExecutionProvider::RegisterStreamHandlers(IStreamCommandHandleRegistry& stream_handle_registry,
+                                                       AllocatorMap& allocators) const {
   auto allocator = allocators[GetOrtDeviceByMemType(OrtMemTypeCPU)];
-  RegisterRocmStreamHandles(stream_handle_registry, OrtDevice::GPU, allocator, true, stream_, false /*TODO:external_stream_*/, external_miopen_handle_, external_rocblas_handle_);
+  RegisterRocmStreamHandles(stream_handle_registry, OrtDevice::GPU, allocator, true, stream_,
+                            false /*TODO:external_stream_*/, external_miopen_handle_, external_rocblas_handle_);
 }
 
 OrtDevice MIGraphXExecutionProvider::GetOrtDeviceByMemType(OrtMemType mem_type) const {
diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h
index 1f591f9a1c0a5..c094be51012e4 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h
+++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h
@@ -3,23 +3,29 @@
 
 #pragma once
 
+#include <miopen/miopen.h>
+#include <rocblas/rocblas.h>
+
 #include "core/framework/arena_extend_strategy.h"
 #include "core/framework/execution_provider.h"
 #include "core/platform/ort_mutex.h"
-#include "migraphx_execution_provider_info.h"
+#include "core/providers/migraphx/migraphx_execution_provider_info.h"
+#include "core/providers/migraphx/migraphx_inc.h"
 
 #include <map>
-#include "migraphx_inc.h"
+#include <unordered_map>
 // TODO: find a better way to share this
 // #include "core/providers/cuda/rocm_stream_handle.h"
-#include <miopen/miopen.h>
-#include <rocblas/rocblas.h>
 
 namespace onnxruntime {
 
 namespace migraphx_env_vars {
-static const std::string kFP16Enable = "ORT_MIGRAPHX_FP16_ENABLE";
-static const std::string dumpModelOps = "ORT_MIGRAPHX_DUMP_MODEL_OPS";
+static const char kFP16Enable[] = "ORT_MIGRAPHX_FP16_ENABLE";
+static const char kINT8Enable[] = "ORT_MIGRAPHX_INT8_ENABLE";
+static const char dumpModelOps[] = "ORT_MIGRAPHX_DUMP_MODEL_OPS";
+static const char kINT8CalibrationTableName[] = "ORT_MIGRAPHX_INT8_CALIBRATION_TABLE_NAME";
+static const char kCachePath[] = "ORT_MIGRAPHX_CACHE_PATH";
+static const char kINT8UseNativeMIGraphXCalibrationTable[] = "ORT_MIGRAPHX_INT8_USE_NATIVE_CALIBRATION_TABLE";
 };  // namespace migraphx_env_vars
 
 // Information to construct kernel function state.
@@ -35,6 +41,9 @@ struct MIGraphXFuncState {
   OrtMutex* mgx_mu_ptr = nullptr;
   bool no_input_shape = false;
   bool fp16_enable = false;
+  bool int8_enable = false;
+  bool int8_calibration_cache_available = false;
+  std::unordered_map<std::string, float> dynamic_range_map;
   bool dump_model_ops = false;
 };
 
@@ -69,6 +78,12 @@ class MIGraphXExecutionProvider : public IExecutionProvider {
 
  private:
   bool fp16_enable_ = false;
+  bool int8_enable_ = false;
+  std::string int8_calibration_cache_name_;
+  bool int8_calibration_cache_available_ = false;
+  bool int8_use_native_migraphx_calibration_table_ = false;
+  std::string calibration_cache_path_;
+  std::unordered_map<std::string, float> dynamic_range_map;
   bool dump_model_ops_ = false;
   int device_id_;
   migraphx::target t_;
diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc
index bdf8388e75c15..b7d7a77853df6 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc
+++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc
@@ -14,7 +14,10 @@ namespace migraphx {
 namespace provider_option_names {
 constexpr const char* kDeviceId = "device_id";
 constexpr const char* kFp16Enable = "trt_fp16_enable";
-constexpr const char* kInt8Enable = "trt_int8_enable";
+constexpr const char* kInt8Enable = "migx_int8_enable";
+constexpr const char* kInt8CalibTable = "migx_int8_calibration_table_name";
+constexpr const char* kInt8UseNativeCalibTable = "migx_int8_use_native_calibration_table";
+
 }  // namespace provider_option_names
 }  // namespace migraphx
 
@@ -45,7 +48,8 @@ ProviderOptions MIGraphXExecutionProviderInfo::ToProviderOptions(const MIGraphXE
   const ProviderOptions options{
       {migraphx::provider_option_names::kDeviceId, MakeStringWithClassicLocale(info.device_id)},
       {migraphx::provider_option_names::kFp16Enable, MakeStringWithClassicLocale(info.fp16_enable)},
-      {migraphx::provider_option_names::kInt8Enable, MakeStringWithClassicLocale(info.int8_enable)}};
+      {migraphx::provider_option_names::kInt8Enable, MakeStringWithClassicLocale(info.int8_enable)},
+  };
   return options;
 }
 
@@ -53,7 +57,8 @@ ProviderOptions MIGraphXExecutionProviderInfo::ToProviderOptions(const OrtMIGrap
   const ProviderOptions options{
       {migraphx::provider_option_names::kDeviceId, MakeStringWithClassicLocale(info.device_id)},
       {migraphx::provider_option_names::kFp16Enable, MakeStringWithClassicLocale(info.migraphx_fp16_enable)},
-      {migraphx::provider_option_names::kInt8Enable, MakeStringWithClassicLocale(info.migraphx_int8_enable)}};
+      {migraphx::provider_option_names::kInt8Enable, MakeStringWithClassicLocale(info.migraphx_int8_enable)},
+  };
   return options;
 }
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h
index 472d418c9099c..18ac30fdc1283 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h
+++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h
@@ -4,6 +4,7 @@
 #pragma once
 
 #include <limits>
+#include <string>
 
 #include "core/framework/ortdevice.h"
 #include "core/framework/provider_options.h"
@@ -16,6 +17,8 @@ struct MIGraphXExecutionProviderInfo {
   int device_id{0};
   bool fp16_enable{false};
   bool int8_enable{false};
+  std::string int8_calibration_table_name{""};
+  bool int8_use_native_calibration_table{false};
 
   static MIGraphXExecutionProviderInfo FromProviderOptions(const ProviderOptions& options);
   static ProviderOptions ToProviderOptions(const MIGraphXExecutionProviderInfo& info);
diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_utils.h b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_utils.h
index fb0be15986111..071070e92a209 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_utils.h
+++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_utils.h
@@ -2,8 +2,20 @@
 // Licensed under the MIT License
 
 #pragma once
+
+#include <fstream>
+#include <unordered_map>
+#include <string>
+#include <iostream>
+#include <filesystem>
+#include <memory>
+#include "flatbuffers/idl.h"
+#include "core/providers/migraphx/ort_trt_int8_cal_table.fbs.h"
 #include "core/session/onnxruntime_cxx_api.h"
 #include "core/framework/execution_provider.h"
+#include "core/common/path_string.h"
+
+namespace fs = std::filesystem;
 
 namespace onnxruntime {
 
@@ -101,7 +113,10 @@ bool canEvalShapeGeneral(const GraphViewer& graph, const Node* node, std::vector
   return true;
 }
 
-bool canEvalNodeArgument(const GraphViewer& graph, const Node* node, std::vector<std::size_t> indices, std::vector<NodeIndex>& input_nodes) {
+bool canEvalNodeArgument(const GraphViewer& graph,
+                         const Node* node,
+                         std::vector<std::size_t> indices,
+                         std::vector<NodeIndex>& input_nodes) {
   input_nodes.clear();
   std::vector<const Node*> in_nodes;
   for (auto nit = node->InputNodesBegin(); nit != node->InputNodesEnd(); ++nit) {
@@ -137,4 +152,102 @@ bool canEvalNodeArgument(const GraphViewer& graph, const Node* node, std::vector
   return true;
 }
 
+float ConvertSinglePrecisionIEEE754ToFloat(uint32_t input) {
+  int s = (input >> 31) & 0x01;
+  int e = ((input & 0x7f800000) >> 23) - 127;
+  int p = -1;
+  double m = 0.0;
+  for (int i = 0; i < 23; ++i) {
+    m += ((input >> (23 - i - 1)) & 0x01) * pow(2.0, p--);
+  }
+  return static_cast<float>((s ? -1 : 1) * pow(2.0, e) * (m + 1.0));
+}
+
+/*
+ * Read calibration table for INT8 quantization
+ * Two kind of calibration tables are supported,
+ * 1. ORT generated calibration table
+ * The table is pre-serialized by flatbuffers.
+ * Each entry in the table is a key-value pair,
+ * key: tensor name, value: maximum absolute value in floating point
+ * For example,
+ *   data_0 2.008338
+ *   ...
+ * 2. Native TensorRT generated calibration table
+ * Data format is defined by TensorRT as,
+ * tensor name : scale in 32-bit single precision IEEE754 format
+ * For example,
+ *   TRT-7103-EntropyCalibration2
+ *   data_0: 4000889d
+ *   ...
+ *
+ * Taken from the tensorRT EP to allow MIGraphX EP to reuse calibration tables for existing models
+ *
+ */
+bool ReadDynamicRange(const std::string file_name,
+                      const bool is_calibration_table,
+                      std::unordered_map<std::string,
+                                         float>& dynamic_range_map) {
+  std::ifstream infile(file_name, std::ios::binary | std::ios::in);
+  if (!infile) {
+    return false;
+  }
+
+  if (is_calibration_table) {
+    // Native TensorRT generated calibration table
+    std::string line;
+    char delim = ':';
+    if (std::getline(infile, line)) {
+      std::istringstream first_line(line);
+      std::string version;
+      std::getline(first_line, version, delim);
+      std::size_t found = version.find("TRT-");
+      if (found != std::string::npos) {
+        while (std::getline(infile, line)) {
+          std::istringstream in_line(line);
+          std::string str;
+          std::getline(in_line, str, delim);
+          std::string tensor_name = str;
+          std::getline(in_line, str, delim);
+          uint32_t scale_int = std::strtoul(str.c_str(), nullptr, 16);
+          float scale_float = ConvertSinglePrecisionIEEE754ToFloat(scale_int);
+          float dynamic_range = scale_float * 127.0f;
+          dynamic_range_map[tensor_name] = dynamic_range;
+        }
+      } else {
+        throw std::runtime_error("This is not a TensorRT generated calibration table " + file_name);
+      }
+    }
+  } else {
+    // ORT generated calibration table
+    infile.seekg(0, std::ios::end);
+    size_t length = infile.tellg();
+    infile.seekg(0, std::ios::beg);
+    std::unique_ptr<char[]> data{new char[length]};
+    infile.read(reinterpret_cast<char*>(data.get()), length);
+    infile.close();
+    auto flat_table = flatbuffers::GetRoot<CalTableFlatBuffers::TrtTable>(reinterpret_cast<char*>(data.get()));
+    auto flat_dict = flat_table->dict();
+    for (size_t i = 0, end = flat_dict->size(); i < end; ++i) {
+      flatbuffers::uoffset_t idx = static_cast<flatbuffers::uoffset_t>(i);
+      dynamic_range_map[flat_dict->Get(idx)->key()->str()] = std::stof(flat_dict->Get(idx)->value()->str());
+    }
+  }
+  return true;
+}
+
+/*
+ * Get cache by name
+ *
+ */
+std::string GetCachePath(const std::string& root, const std::string& name) {
+  if (root.empty()) {
+    return name;
+  } else {
+    fs::path path = root;
+    path.append(name);
+    return path.string();
+  }
+}
+
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc
index 8358ca5fcda95..f985682ddc735 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc
+++ b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc
@@ -1,5 +1,6 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License
+#include <atomic>
 
 #include "core/providers/shared_library/provider_api.h"
 #include "core/providers/migraphx/migraphx_provider_factory.h"
@@ -8,7 +9,6 @@
 #include "hip_allocator.h"
 #include "gpu_data_transfer.h"
 #include "core/framework/provider_options.h"
-#include <atomic>
 
 #include "core/session/onnxruntime_c_api.h"
 
@@ -48,15 +48,37 @@ struct MIGraphX_Provider : Provider {
     info.target_device = "gpu";
     info.fp16_enable = options.migraphx_fp16_enable;
     info.int8_enable = options.migraphx_int8_enable;
+    info.int8_calibration_table_name = "";
+    if (options.migraphx_int8_calibration_table_name != nullptr) {
+      info.int8_calibration_table_name = options.migraphx_int8_calibration_table_name;
+    }
+    info.int8_use_native_calibration_table = options.migraphx_use_native_calibration_table != 0;
     return std::make_shared<MIGraphXProviderFactory>(info);
   }
 
   void UpdateProviderOptions(void* provider_options, const ProviderOptions& options) override {
     auto internal_options = onnxruntime::MIGraphXExecutionProviderInfo::FromProviderOptions(options);
-    auto& trt_options = *reinterpret_cast<OrtMIGraphXProviderOptions*>(provider_options);
-    trt_options.device_id = internal_options.device_id;
-    trt_options.migraphx_fp16_enable = internal_options.fp16_enable;
-    trt_options.migraphx_int8_enable = internal_options.int8_enable;
+    auto& migx_options = *reinterpret_cast<OrtMIGraphXProviderOptions*>(provider_options);
+    migx_options.device_id = internal_options.device_id;
+    migx_options.migraphx_fp16_enable = internal_options.fp16_enable;
+    migx_options.migraphx_int8_enable = internal_options.int8_enable;
+
+    char* dest = nullptr;
+    auto str_size = internal_options.int8_calibration_table_name.size();
+    if (str_size == 0) {
+      migx_options.migraphx_int8_calibration_table_name = nullptr;
+    } else {
+      dest = new char[str_size + 1];
+#ifdef _MSC_VER
+      strncpy_s(dest, str_size + 1, internal_options.int8_calibration_table_name.c_str(), str_size);
+#else
+      strncpy(dest, internal_options.int8_calibration_table_name.c_str(), str_size);
+#endif
+      dest[str_size] = '\0';
+      migx_options.migraphx_int8_calibration_table_name = (const char*)dest;
+    }
+
+    migx_options.migraphx_use_native_calibration_table = internal_options.int8_use_native_calibration_table;
   }
 
   ProviderOptions GetProviderOptions(const void* provider_options) override {
diff --git a/onnxruntime/core/providers/migraphx/ort_trt_int8_cal_table.fbs.h b/onnxruntime/core/providers/migraphx/ort_trt_int8_cal_table.fbs.h
new file mode 100644
index 0000000000000..9639040f772da
--- /dev/null
+++ b/onnxruntime/core/providers/migraphx/ort_trt_int8_cal_table.fbs.h
@@ -0,0 +1,145 @@
+// automatically generated by the FlatBuffers compiler, do not modify
+
+#ifndef ONNXRUNTIME_CORE_PROVIDERS_MIGRAPHX_ORT_TRT_INT8_CAL_TABLE_FBS_H_
+#define ONNXRUNTIME_CORE_PROVIDERS_MIGRAPHX_ORT_TRT_INT8_CAL_TABLE_FBS_H_
+
+#include <vector>
+#include "flatbuffers/flatbuffers.h"
+
+namespace CalTableFlatBuffers {
+
+struct KeyValue;
+struct KeyValueBuilder;
+
+struct TrtTable;
+struct TrtTableBuilder;
+
+struct KeyValue FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef KeyValueBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_KEY = 4,
+    VT_VALUE = 6
+  };
+  const flatbuffers::String* key() const {
+    return GetPointer<const flatbuffers::String*>(VT_KEY);
+  }
+  bool KeyCompareLessThan(const KeyValue* o) const {
+    return *key() < *o->key();
+  }
+  int KeyCompareWithValue(const char* val) const {
+    return strcmp(key()->c_str(), val);
+  }
+  const flatbuffers::String* value() const {
+    return GetPointer<const flatbuffers::String*>(VT_VALUE);
+  }
+  bool Verify(flatbuffers::Verifier& verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffsetRequired(verifier, VT_KEY) &&
+           verifier.VerifyString(key()) &&
+           VerifyOffset(verifier, VT_VALUE) &&
+           verifier.VerifyString(value()) &&
+           verifier.EndTable();
+  }
+};
+
+struct KeyValueBuilder {
+  typedef KeyValue Table;
+  flatbuffers::FlatBufferBuilder& fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_key(flatbuffers::Offset<flatbuffers::String> key) {
+    fbb_.AddOffset(KeyValue::VT_KEY, key);
+  }
+  void add_value(flatbuffers::Offset<flatbuffers::String> value) {
+    fbb_.AddOffset(KeyValue::VT_VALUE, value);
+  }
+  explicit KeyValueBuilder(flatbuffers::FlatBufferBuilder& _fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  KeyValueBuilder& operator=(const KeyValueBuilder&);
+  flatbuffers::Offset<KeyValue> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<KeyValue>(end);
+    fbb_.Required(o, KeyValue::VT_KEY);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<KeyValue> CreateKeyValue(
+    flatbuffers::FlatBufferBuilder& _fbb,
+    flatbuffers::Offset<flatbuffers::String> key = 0,
+    flatbuffers::Offset<flatbuffers::String> value = 0) {
+  KeyValueBuilder builder_(_fbb);
+  builder_.add_value(value);
+  builder_.add_key(key);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<KeyValue> CreateKeyValueDirect(
+    flatbuffers::FlatBufferBuilder& _fbb,
+    const char* key = nullptr,
+    const char* value = nullptr) {
+  auto key__ = key ? _fbb.CreateString(key) : 0;
+  auto value__ = value ? _fbb.CreateString(value) : 0;
+  return CalTableFlatBuffers::CreateKeyValue(
+      _fbb,
+      key__,
+      value__);
+}
+
+struct TrtTable FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef TrtTableBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_DICT = 4
+  };
+  const flatbuffers::Vector<flatbuffers::Offset<CalTableFlatBuffers::KeyValue>>* dict() const {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<CalTableFlatBuffers::KeyValue>>*>(VT_DICT);
+  }
+  bool Verify(flatbuffers::Verifier& verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_DICT) &&
+           verifier.VerifyVector(dict()) &&
+           verifier.VerifyVectorOfTables(dict()) &&
+           verifier.EndTable();
+  }
+};
+
+struct TrtTableBuilder {
+  typedef TrtTable Table;
+  flatbuffers::FlatBufferBuilder& fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_dict(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<CalTableFlatBuffers::KeyValue>>> dict) {
+    fbb_.AddOffset(TrtTable::VT_DICT, dict);
+  }
+  explicit TrtTableBuilder(flatbuffers::FlatBufferBuilder& _fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  TrtTableBuilder& operator=(const TrtTableBuilder&);
+  flatbuffers::Offset<TrtTable> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<TrtTable>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<TrtTable> CreateTrtTable(
+    flatbuffers::FlatBufferBuilder& _fbb,
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<CalTableFlatBuffers::KeyValue>>> dict = 0) {
+  TrtTableBuilder builder_(_fbb);
+  builder_.add_dict(dict);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<TrtTable> CreateTrtTableDirect(
+    flatbuffers::FlatBufferBuilder& _fbb,
+    std::vector<flatbuffers::Offset<CalTableFlatBuffers::KeyValue>>* dict = nullptr) {
+  auto dict__ = dict ? _fbb.CreateVectorOfSortedTables<CalTableFlatBuffers::KeyValue>(dict) : 0;
+  return CalTableFlatBuffers::CreateTrtTable(
+      _fbb,
+      dict__);
+}
+
+}  // namespace CalTableFlatBuffers
+
+#endif  // ONNXRUNTIME_CORE_PROVIDERS_MIGRAPHX_ORT_TRT_INT8_CAL_TABLE_FBS_H_
diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
index 78467b646b195..7e4c0dc8d7267 100644
--- a/onnxruntime/core/providers/openvino/backend_manager.cc
+++ b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -2,9 +2,7 @@
 // Licensed under the MIT License
 
 #include <fstream>
-#include <vector>
-#include <string>
-#include <memory>
+#include <utility>
 
 #include "core/providers/shared_library/provider_api.h"
 #include "contexts.h"
@@ -18,7 +16,8 @@ namespace openvino_ep {
 static std::unique_ptr<GlobalContext> g_global_context;
 
 GlobalContext& BackendManager::GetGlobalContext() {
-  // This is not thread safe to call for the first time, but it is first called on the main thread by the constructor so it is safe.
+  // This is not thread safe to call for the first time,
+  // but it is first called on the main thread by the constructor so it is safe.
   if (!g_global_context)
     g_global_context = std::make_unique<GlobalContext>();
   return *g_global_context;
@@ -88,7 +87,9 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
                          << "Backend created for graph " << subgraph_context_.subgraph_name;
     }
   } else {
-    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has concrete input dims. Initializing backend for graph " << subgraph_context_.subgraph_name;
+    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has concrete input dims. "
+                       << "Initializing backend for graph "
+                       << subgraph_context_.subgraph_name;
 
     subgraph_context_.has_dynamic_input_shape = false;
     try {
@@ -104,7 +105,7 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
 bool BackendManager::ModelHasBatchedInputs(const ONNX_NAMESPACE::ModelProto& model_proto) const {
   bool has_batched_inputs = true;
 
-  for (int i = 0; i < (int)subgraph_context_.input_indexes.size(); i++) {
+  for (int i = 0; i < static_cast<int>(subgraph_context_.input_indexes.size()); i++) {
     auto& input = model_proto.graph().input(subgraph_context_.input_indexes[i]);
 
     // Batch-process only raw image inputs (NCHW or NHWC layouts)
@@ -215,7 +216,10 @@ BackendManager::ReWriteInputShapeInfo(const ONNX_NAMESPACE::ModelProto& model_pr
   auto graph_proto = model_copy->mutable_graph();
 
   for (size_t i = 0, limit = input_shapes.size(); i < limit; i++) {
-    auto g_in_shape = graph_proto->mutable_input((int)i)->mutable_type()->mutable_tensor_type()->mutable_shape();
+    auto g_in_shape = graph_proto->mutable_input(static_cast<int>(i))
+                          ->mutable_type()
+                          ->mutable_tensor_type()
+                          ->mutable_shape();
     g_in_shape->clear_dim();
     const auto& shape = input_shapes[i];
     for (size_t dim = 0, end = shape.size(); dim < end; dim++) {
@@ -234,7 +238,11 @@ BackendManager::ReWriteBatchDimWithOne(const ONNX_NAMESPACE::ModelProto& model_p
   auto graph_proto = model_copy->mutable_graph();
 
   for (int i = 0; i < graph_proto->input_size(); i++) {
-    ONNX_NAMESPACE::TensorShapeProto* g_in_shape = graph_proto->mutable_input((int)i)->mutable_type()->mutable_tensor_type()->mutable_shape();
+    ONNX_NAMESPACE::TensorShapeProto* g_in_shape =
+        graph_proto->mutable_input(static_cast<int>(i))
+            ->mutable_type()
+            ->mutable_tensor_type()
+            ->mutable_shape();
     g_in_shape->mutable_dim(0)->clear_dim_value();
     g_in_shape->mutable_dim(0)->set_dim_value(1);
   }
diff --git a/onnxruntime/core/providers/openvino/backend_manager.h b/onnxruntime/core/providers/openvino/backend_manager.h
index c247ab60d3a6f..a177324b23f7d 100644
--- a/onnxruntime/core/providers/openvino/backend_manager.h
+++ b/onnxruntime/core/providers/openvino/backend_manager.h
@@ -3,6 +3,11 @@
 
 #pragma once
 
+#include <vector>
+#include <map>
+#include <memory>
+#include <string>
+
 #include "ov_interface.h"
 #include "contexts.h"
 #include "ibackend.h"
@@ -13,7 +18,9 @@ namespace openvino_ep {
 // Singleton class that manages all the backends
 class BackendManager {
  public:
-  BackendManager(const onnxruntime::Node& fused_node, const onnxruntime::GraphViewer& subgraph, const logging::Logger& logger);
+  BackendManager(const onnxruntime::Node& fused_node,
+                 const onnxruntime::GraphViewer& subgraph,
+                 const logging::Logger& logger);
   void Compute(OrtKernelContext* context);
   void ShutdownBackendManager();
   static GlobalContext& GetGlobalContext();
@@ -21,7 +28,9 @@ class BackendManager {
 
  private:
   std::unique_ptr<ONNX_NAMESPACE::ModelProto> GetModelProtoFromFusedNode(
-      const onnxruntime::Node& fused_node, const onnxruntime::GraphViewer& subgraph, const logging::Logger& logger) const;
+      const onnxruntime::Node& fused_node,
+      const onnxruntime::GraphViewer& subgraph,
+      const logging::Logger& logger) const;
   bool ModelHasSymbolicInputDims(const onnxruntime::GraphViewer& subgraph) const;
   bool ModelHasBatchedInputs(const ONNX_NAMESPACE::ModelProto& model_proto) const;
 
diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc
index d49968cdb7f3d..d47c91dd46622 100644
--- a/onnxruntime/core/providers/openvino/backend_utils.cc
+++ b/onnxruntime/core/providers/openvino/backend_utils.cc
@@ -1,9 +1,7 @@
 // Copyright (C) 2019-2022 Intel Corporation
 // Licensed under the MIT License
 
-#include <map>
-#include <string>
-#include <memory>
+#include <algorithm>
 #include <sstream>
 #include <fstream>
 
@@ -58,7 +56,7 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
   try {
     auto cnn_network = global_context.ie_core.ReadModel(model);
     if ((subgraph_context.precision == "FP16") &&
-        (global_context.device_type.find("VPUX") == std::string::npos)) {
+        (global_context.device_type.find("NPU") == std::string::npos)) {
       // FP16 transformations
       ov::pass::ConvertFP32ToFP16 pass_obj;
       pass_obj.run_on_model(cnn_network);
@@ -88,7 +86,8 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
       size_t index = results.size() - 1;
 
       for (auto it = results.rbegin(); it != results.rend(); ++it) {
-        if (auto const_node = std::dynamic_pointer_cast<ov::op::v0::Constant>((*it)->input_value(0).get_node_shared_ptr())) {
+        if (auto const_node =
+                std::dynamic_pointer_cast<ov::op::v0::Constant>((*it)->input_value(0).get_node_shared_ptr())) {
           const_outputs_map[(*it)->get_friendly_name()] = const_node;
           results.erase(results.begin() + index);
         }
@@ -254,7 +253,7 @@ void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor,
 
 void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,
                             std::ostream& stream, std::string deviceName) {
-  long long totalTime = 0;
+  int64_t totalTime = 0;
   // Print performance counts
   stream << std::endl
          << "performance counts:" << std::endl
diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h
index de78a150fe2dd..82b0351e87da5 100644
--- a/onnxruntime/core/providers/openvino/backend_utils.h
+++ b/onnxruntime/core/providers/openvino/backend_utils.h
@@ -4,9 +4,15 @@
 #pragma once
 
 #define ORT_API_MANUAL_INIT
+#include <iomanip>
+#include <unordered_map>
+#include <map>
+#include <memory>
+#include <vector>
+#include <string>
+
 #include "core/session/onnxruntime_cxx_api.h"
 #include "contexts.h"
-#include <iomanip>
 #include "ov_interface.h"
 #ifdef _WIN32
 #include <direct.h>
@@ -57,7 +63,9 @@ void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor,
                     size_t batch_slice_idx);
 
 std::shared_ptr<OVNetwork>
-CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, const SubGraphContext& subgraph_context,
+CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto,
+              const GlobalContext& global_context,
+              const SubGraphContext& subgraph_context,
               std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);
 
 void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,
diff --git a/onnxruntime/core/providers/openvino/backends/backend_factory.cc b/onnxruntime/core/providers/openvino/backends/backend_factory.cc
index c339f24e7022f..c586dd8b38af9 100644
--- a/onnxruntime/core/providers/openvino/backends/backend_factory.cc
+++ b/onnxruntime/core/providers/openvino/backends/backend_factory.cc
@@ -16,7 +16,7 @@ BackendFactory::MakeBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
                             const SubGraphContext& subgraph_context) {
   std::string type = global_context.device_type;
   if (type == "CPU" || type.find("GPU") != std::string::npos ||
-      type.find("VPUX") != std::string::npos ||
+      type.find("NPU") != std::string::npos ||
       type.find("HETERO") != std::string::npos ||
       type.find("MULTI") != std::string::npos ||
       type.find("AUTO") != std::string::npos) {
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
index f9517d7942664..09e1322ff59fb 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -6,10 +6,10 @@
 #include <memory>
 #include <sstream>
 #include <fstream>
+#include <utility>
 
 #include "core/providers/shared_library/provider_api.h"
 #include "../backend_utils.h"
-// #include <ngraph/pass/constant_folding.hpp>
 #include "basic_backend.h"
 #include "../backend_manager.h"
 
@@ -57,33 +57,39 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
         cl_context ctx = static_cast<cl_context>(global_context_.context);
         remote_context_ = new ov::intel_gpu::ocl::ClContext(global_context_.ie_core.Get(), ctx);
         ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
-        exe_network_ = global_context_.ie_core.LoadNetwork(ie_cnn_network_, remote_context_, subgraph_context_.subgraph_name);
+        exe_network_ = global_context_.ie_core.LoadNetwork(
+            ie_cnn_network_, remote_context_, subgraph_context_.subgraph_name);
         LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
       } else {
         ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
-        exe_network_ = global_context_.ie_core.LoadNetwork(ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
+        exe_network_ = global_context_.ie_core.LoadNetwork(
+            ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
         LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
       }
 #else
 #if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
       if (!subgraph_context_.has_dynamic_input_shape && dev_prec != "CPU_FP16") {
         const std::string model = model_proto.SerializeAsString();
-        exe_network_ = global_context_.ie_core.LoadNetwork(model, hw_target, device_config, subgraph_context_.subgraph_name);
+        exe_network_ = global_context_.ie_core.LoadNetwork(
+            model, hw_target, device_config, subgraph_context_.subgraph_name);
         LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
       } else {
         ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
-        exe_network_ = global_context_.ie_core.LoadNetwork(ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
+        exe_network_ = global_context_.ie_core.LoadNetwork(
+            ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
         LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
       }
 #else
       ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
-      exe_network_ = global_context_.ie_core.LoadNetwork(ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
+      exe_network_ = global_context_.ie_core.LoadNetwork(
+          ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
       LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
 #endif
 #endif
     } else {
       ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
-      exe_network_ = global_context_.ie_core.LoadNetwork(ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
+      exe_network_ = global_context_.ie_core.LoadNetwork(
+          ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
       LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
     }
   } catch (const char* msg) {
@@ -127,10 +133,10 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
   }
 #endif
 #if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
-  if (global_context_.device_type.find("VPUX") != std::string::npos) {
+  if (global_context_.device_type.find("NPU") != std::string::npos) {
     std::pair<std::string, ov::Any> device_property;
-    device_property = std::make_pair("VPU_COMPILER_TYPE", "MLIR");
-    device_config.emplace(ov::device::properties("VPUX", device_property));
+    device_property = std::make_pair("NPU_COMPILER_TYPE", "DRIVER");
+    device_config.emplace(ov::device::properties("NPU", device_property));
   }
 #endif
 }
@@ -152,12 +158,12 @@ void BasicBackend::EnableCaching() {
 }
 
 void BasicBackend::EnableGPUThrottling(ov::AnyMap& device_config) {
-  if (global_context_.enable_opencl_throttling == true && global_context_.device_type.find("GPU") != std::string::npos) {
+  if (global_context_.enable_opencl_throttling == true &&
+      global_context_.device_type.find("GPU") != std::string::npos) {
     LOGS_DEFAULT(INFO) << log_tag << "Enabled OpenCL queue throttling for GPU device";
     std::pair<std::string, ov::Any> device_property;
     device_property = std::make_pair("PLUGIN_THROTTLE", "1");
     device_config.emplace(ov::device::properties("GPU_CONFIG_KEY", device_property));
-    // device_config[GPU_CONFIG_KEY(PLUGIN_THROTTLE)] = "1";
   }
 }
 
@@ -187,7 +193,9 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
       if (input_names.find(onnx_input_name) != input_names.end()) {
         input_name = onnx_input_name;
       } else {
-        throw(log_tag + "Input names mismatch between OpenVINO and ONNX. " + onnx_input_name + " doesn't exist in the list of OpenVINO input tensor names");
+        throw(log_tag +
+              "Input names mismatch between OpenVINO and ONNX. " + onnx_input_name +
+              " doesn't exist in the list of OpenVINO input tensor names");
       }
       size_t batch_slice_idx = 0;
       if (subgraph_context_.has_dynamic_input_shape &&
@@ -197,6 +205,7 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
         auto tensor_info = tensor.GetTensorTypeAndShapeInfo();
         auto tensor_shape = tensor_info.GetShape();
         auto tensor_size = tensor_shape.size();
+        const char* tensor_data = tensor.GetTensorData<char>();
         auto tensor_iter = 0;
         ov::Shape input_tensor_shape = ov::Shape(tensor_size, 0);
         for (auto i = tensor_shape.begin(); i != tensor_shape.end(); ++i) {
@@ -204,8 +213,16 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
           tensor_iter += 1;
         }
         auto input = ie_cnn_network_->get_parameters().at(input_idx);
-        OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(input->get_element_type(), input_tensor_shape);
-        FillInputBlob(tensor_ptr, batch_slice_idx, input_name, context, subgraph_context_);
+        OVTensorPtr tensor_ptr;
+        // avoid input copies on the CPU device
+        if (global_context_.device_type.find("CPU") != std::string::npos) {
+          tensor_ptr = std::make_shared<ov::Tensor>(input->get_element_type(), input_tensor_shape,
+                                                    (void*)tensor_data);
+        } else {
+          tensor_ptr = std::make_shared<ov::Tensor>(input->get_element_type(), input_tensor_shape);
+          FillInputBlob(tensor_ptr, batch_slice_idx, input_name, context, subgraph_context_);
+        }
+
         try {
           infer_request->SetTensor(input_name, tensor_ptr);
         } catch (const char* msg) {
@@ -251,7 +268,10 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe
       if (input_names.find(onnx_input_name) != input_names.end()) {
         input_name = onnx_input_name;
       } else {
-        throw(log_tag + "Input names mismatch between OpenVINO and ONNX. " + onnx_input_name + " doesn't exist in the list of OpenVINO input tensor names");
+        throw(log_tag +
+              "Input names mismatch between OpenVINO and ONNX. " +
+              onnx_input_name +
+              " doesn't exist in the list of OpenVINO input tensor names");
       }
       input_idx++;
       // Kernel Context Input Buffer
@@ -264,9 +284,10 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe
         const cl::Buffer* shared_buffer_const = static_cast<const cl::Buffer*>(tensor_data);
         // Create an Input Remote Blob
         auto input = ie_cnn_network_->get_parameters().at(0);
-        auto remote_blob = remote_context_->create_tensor(input->get_element_type(), input->get_shape(), *shared_buffer_const);
-        ov::Tensor tensor = static_cast<ov::Tensor>(remote_blob);
-        OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(tensor);
+        auto remote_blob = remote_context_->create_tensor(
+            input->get_element_type(), input->get_shape(), *shared_buffer_const);
+        ov::Tensor tensor_remote = static_cast<ov::Tensor>(remote_blob);
+        OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(tensor_remote);
         infer_request->SetTensor(input_name, tensor_ptr);
       } else {
         OVTensorPtr graph_input_blob;
@@ -295,7 +316,10 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe
         }
       }
       if (!output_name_found) {
-        throw std::string(log_tag + "Output names mismatch between OpenVINO and ONNX. [ONNX Output: ] " + onnx_output_name + " doesn't exist in the list of OpenVINO output tensor names");
+        throw std::string(
+            log_tag +
+            "Output names mismatch between OpenVINO and ONNX. [ONNX Output: ] " +
+            onnx_output_name + " doesn't exist in the list of OpenVINO output tensor names");
       }
 
       size_t batch_size = 1;
@@ -307,9 +331,10 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe
         const cl::Buffer* shared_buffer_const = static_cast<const cl::Buffer*>(tensor_data);
         // Create a shared Blob, set the Infer Request Output Blob
         auto output = ie_cnn_network_->get_results().at(0);
-        auto remote_tensor = remote_context_->create_tensor(output->get_element_type(), output->get_shape(), *shared_buffer_const);
-        ov::Tensor tensor = static_cast<ov::Tensor>(remote_tensor);
-        OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(tensor);
+        auto remote_tensor =
+            remote_context_->create_tensor(output->get_element_type(), output->get_shape(), *shared_buffer_const);
+        ov::Tensor tensor_t = static_cast<ov::Tensor>(remote_tensor);
+        OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(tensor_t);
         try {
           infer_request->SetTensor(output_name, tensor_ptr);
         } catch (const char* msg) {
@@ -364,7 +389,8 @@ void BasicBackend::CompleteAsyncInference(Ort::KernelContext& context, OVInferRe
         throw(msg);
       }
       size_t batch_size = 1;
-      auto output_tensor = GetOutputTensor(context, batch_size, infer_request, output_name, subgraph_context_.output_names);
+      auto output_tensor =
+          GetOutputTensor(context, batch_size, infer_request, output_name, subgraph_context_.output_names);
       auto mem_info = output_tensor.GetTensorMemoryInfo();
       if (mem_info.GetAllocatorName() == OpenVINO_GPU) {
         return;
@@ -465,7 +491,8 @@ void BasicBackend::Infer(OrtKernelContext* ctx) {
 #ifndef IO_BUFFER_ENABLED  // Printing performance counts is disabled when IO_BUFFER_ENABLED
     if (openvino_ep::backend_utils::IsDebugEnabled()) {
       inferRequestsQueue_->printstatus();  // Printing the elements of infer_requests_ vector pool only in debug mode
-      std::string& hw_target = (global_context_.device_id != "") ? global_context_.device_id : global_context_.device_type;
+      std::string& hw_target =
+          (global_context_.device_id != "") ? global_context_.device_id : global_context_.device_type;
       printPerformanceCounts(infer_request, std::cout, hw_target);
     }
 #endif
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h
index 2f1d603640809..6eda641451a72 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.h
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h
@@ -6,16 +6,17 @@
 #include <memory>
 
 #define ORT_API_MANUAL_INIT
-#include "core/session/onnxruntime_cxx_api.h"
-#include "core/providers/openvino/contexts.h"
-#include "core/providers/openvino/ibackend.h"
-#include "core/providers/openvino/ov_interface.h"
 #include <vector>
 #include <iostream>
 #include <string>
 #include <condition_variable>
 #include <mutex>
 
+#include "core/session/onnxruntime_cxx_api.h"
+#include "core/providers/openvino/contexts.h"
+#include "core/providers/openvino/ibackend.h"
+#include "core/providers/openvino/ov_interface.h"
+
 namespace onnxruntime {
 namespace openvino_ep {
 
@@ -29,7 +30,7 @@ class BasicBackend : public IBackend {
   void Infer(OrtKernelContext* context) override;
 
  private:
-  bool ImportBlob(std::string hw_target, bool vpu_status);
+  bool ImportBlob(std::string hw_target, bool npu_status);
   void PopulateCompiledDirectory(std::string, std::string&, std::string&, bool&);
   bool ValidateSubgraph(std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);
   void PopulateConfigValue(ov::AnyMap& device_config);
diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h
index b61dcf8ca4922..29233e72c33b9 100644
--- a/onnxruntime/core/providers/openvino/contexts.h
+++ b/onnxruntime/core/providers/openvino/contexts.h
@@ -3,6 +3,9 @@
 
 #pragma once
 
+#include <vector>
+#include <unordered_map>
+#include <string>
 #include "ov_interface.h"
 
 namespace onnxruntime {
@@ -12,7 +15,7 @@ namespace openvino_ep {
 struct GlobalContext {
   OVCore ie_core;
   bool is_wholly_supported_graph = false;
-  bool enable_vpu_fast_compile = false;
+  bool enable_npu_fast_compile = false;
   bool enable_opencl_throttling = false;
   bool enable_dynamic_shapes = false;
   size_t num_of_threads;
@@ -34,7 +37,7 @@ struct GlobalContext {
 struct SubGraphContext {
   bool has_dynamic_input_shape = false;
   bool enable_batching = false;
-  bool set_vpu_config = false;
+  bool set_npu_config = false;
   bool is_constant = false;
   void* context = 0;
   std::string subgraph_name;
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
index 990809926299e..a4c6b0f851c04 100644
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@@ -17,17 +17,18 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
 
   openvino_ep::BackendManager::GetGlobalContext().device_type = info.device_type_;
   openvino_ep::BackendManager::GetGlobalContext().precision_str = info.precision_;
-  openvino_ep::BackendManager::GetGlobalContext().enable_vpu_fast_compile = info.enable_vpu_fast_compile_;
+  openvino_ep::BackendManager::GetGlobalContext().enable_npu_fast_compile = info.enable_npu_fast_compile_;
   openvino_ep::BackendManager::GetGlobalContext().cache_dir = info.cache_dir_;
   openvino_ep::BackendManager::GetGlobalContext().num_streams = info.num_streams_;
   openvino_ep::BackendManager::GetGlobalContext().context = info.context_;
   openvino_ep::BackendManager::GetGlobalContext().enable_opencl_throttling = info.enable_opencl_throttling_;
   openvino_ep::BackendManager::GetGlobalContext().enable_dynamic_shapes = info.enable_dynamic_shapes_;
 
-  if ((int)info.num_of_threads_ <= 0) {
+  if (static_cast<int>(info.num_of_threads_) <= 0) {
     openvino_ep::BackendManager::GetGlobalContext().num_of_threads = 8;
-  } else if ((int)info.num_of_threads_ > 8) {
-    std::string err_msg = std::string("\n [ERROR] num_of_threads configured during runtime is: ") + std::to_string(info.num_of_threads_) + "\nnum_of_threads configured should be >0 and <=8.\n";
+  } else if (static_cast<int>(info.num_of_threads_) > 8) {
+    std::string err_msg = std::string("\n [ERROR] num_of_threads configured during runtime is: ") +
+                          std::to_string(info.num_of_threads_) + "\nnum_of_threads configured should be >0 and <=8.\n";
     ORT_THROW(err_msg);
   } else {
     openvino_ep::BackendManager::GetGlobalContext().num_of_threads = info.num_of_threads_;
@@ -56,7 +57,8 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
               device_found = true;
               break;
             }
-            if (info.device_type_.find("VPUX") != std::string::npos && (info.precision_ == "FP16" || info.precision_ == "U8")) {
+            if ((info.device_type_.find("NPU") != std::string::npos) &&
+                (info.precision_ == "FP16" || info.precision_ == "U8")) {
               device_found = true;
               break;
             }
@@ -109,11 +111,14 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer,
   openvino_ep::BackendManager::GetGlobalContext().onnx_model_name = graph_viewer.Name();
 #ifdef _WIN32
   std::wstring onnx_path = graph_viewer.ModelPath().ToPathString();
-  openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name = std::string(onnx_path.begin(), onnx_path.end());
+  openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name =
+      std::string(onnx_path.begin(), onnx_path.end());
 #else
-  openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name = graph_viewer.ModelPath().ToPathString();
+  openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name =
+      graph_viewer.ModelPath().ToPathString();
 #endif
-  openvino_ep::BackendManager::GetGlobalContext().onnx_opset_version = graph_viewer.DomainToVersionMap().at(kOnnxDomain);
+  openvino_ep::BackendManager::GetGlobalContext().onnx_opset_version =
+      graph_viewer.DomainToVersionMap().at(kOnnxDomain);
 
 #if defined(OPENVINO_2022_1)
   openvino_ep::GetCapability obj(graph_viewer,
@@ -151,7 +156,8 @@ common::Status OpenVINOExecutionProvider::Compile(
 
     openvino_ep::BackendManager::GetGlobalContext().use_api_2 = true;
 
-    std::shared_ptr<openvino_ep::BackendManager> backend_manager = std::make_shared<openvino_ep::BackendManager>(fused_node, graph_body_viewer, *GetLogger());
+    std::shared_ptr<openvino_ep::BackendManager> backend_manager =
+        std::make_shared<openvino_ep::BackendManager>(fused_node, graph_body_viewer, *GetLogger());
 
     compute_info.create_state_func =
         [backend_manager](ComputeContext* context, FunctionState* state) {
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
index a4fc09362fa23..3b56b54410e40 100644
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.h
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
@@ -3,19 +3,28 @@
 
 #pragma once
 
-#include "backend_manager.h"
 #include <map>
 #include <algorithm>
 #include <iostream>
+#include <string>
+#include <memory>
+#include <vector>
+
+#include "backend_manager.h"
 
 namespace onnxruntime {
 
 static void print_build_options() {
   std::cout << "[ERROR] INVALID DEVICE BUILD TYPE SPECIFIED" << std::endl;
-  std::cout << "Specify the keyword HETERO (or) MULTI (or) AUTO followed by the devices in the order of priority you want to build" << std::endl;
-  std::cout << "The different hardware devices that can be added with HETERO/MULTI/AUTO build ";
-  std::cout << "are ['CPU','GPU','VPUX']" << std::endl;
-  std::cout << "An example of how to specify the HETERO or MULTI or AUTO build type. Ex: HETERO:GPU,CPU  Ex: MULTI:GPU,CPU Ex: AUTO:GPU,CPU" << std::endl;
+  std::cout << "Specify the keyword HETERO (or) MULTI (or) AUTO followed by the devices in the order of priority "
+            << "you want to build"
+            << std::endl;
+  std::cout << "The different hardware devices that can be added with HETERO/MULTI/AUTO build "
+            << "are ['CPU','GPU']"
+            << std::endl;
+  std::cout << "An example of how to specify the HETERO or MULTI or AUTO build type. "
+            << "Ex: HETERO:GPU,CPU  Ex: MULTI:GPU,CPU Ex: AUTO:GPU,CPU"
+            << std::endl;
 }
 
 static std::vector<std::string> split(const std::string& s, char delim) {
@@ -39,7 +48,7 @@ static std::vector<std::string> parseDevices(const std::string& device_string) {
     print_build_options();
     ORT_THROW("Invalid device string: " + device_string);
   }
-  std::vector<std::string> dev_options = {"CPU", "GPU", "VPUX"};
+  std::vector<std::string> dev_options = {"CPU", "GPU"};
   for (std::string dev : devices) {
     if (!std::count(dev_options.begin(), dev_options.end(), dev)) {
       print_build_options();
@@ -53,7 +62,7 @@ static std::vector<std::string> parseDevices(const std::string& device_string) {
 struct OpenVINOExecutionProviderInfo {
   std::string device_type_;
   std::string precision_;
-  bool enable_vpu_fast_compile_;
+  bool enable_npu_fast_compile_;
   std::string device_id_;
   size_t num_of_threads_;
   std::string cache_dir_;
@@ -62,11 +71,18 @@ struct OpenVINOExecutionProviderInfo {
   bool enable_opencl_throttling_;
   bool enable_dynamic_shapes_;
 
-  explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_vpu_fast_compile, std::string dev_id,
+  explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_npu_fast_compile, std::string dev_id,
                                          size_t num_of_threads, std::string cache_dir, int num_streams,
                                          void* context, bool enable_opencl_throttling,
                                          bool enable_dynamic_shapes)
-      : enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads), cache_dir_(cache_dir), num_streams_(num_streams), context_(context), enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
+      : enable_npu_fast_compile_(enable_npu_fast_compile),
+        device_id_(dev_id),
+        num_of_threads_(num_of_threads),
+        cache_dir_(cache_dir),
+        num_streams_(num_streams),
+        context_(context),
+        enable_opencl_throttling_(enable_opencl_throttling),
+        enable_dynamic_shapes_(enable_dynamic_shapes) {
     if (dev_type == "") {
       LOGS_DEFAULT(INFO) << "[OpenVINO-EP]"
                          << "No runtime device selection option provided.";
@@ -82,11 +98,11 @@ struct OpenVINOExecutionProviderInfo {
 #elif defined OPENVINO_CONFIG_GPU_FP16
       device_type_ = "GPU";
       precision_ = "FP16";
-#elif defined OPENVINO_CONFIG_VPUX_FP16
-      device_type_ = "VPUX";
+#elif defined OPENVINO_CONFIG_NPU_FP16
+      device_type_ = "NPU";
       precision_ = "FP16";
-#elif defined OPENVINO_CONFIG_VPUX_U8
-      device_type_ = "VPUX";
+#elif defined OPENVINO_CONFIG_NPU_U8
+      device_type_ = "NPU";
       precision_ = "U8";
 #elif defined OPENVINO_CONFIG_HETERO || defined OPENVINO_CONFIG_MULTI || defined OPENVINO_CONFIG_AUTO
 #ifdef DEVICE_NAME
@@ -126,11 +142,11 @@ struct OpenVINOExecutionProviderInfo {
     } else if (dev_type == "GPU.1_FP16") {
       device_type_ = "GPU.1";
       precision_ = "FP16";
-    } else if (dev_type == "VPUX_FP16") {
-      device_type_ = "VPUX";
+    } else if (dev_type == "NPU_FP16") {
+      device_type_ = "NPU";
       precision_ = "FP16";
-    } else if (dev_type == "VPUX_U8") {
-      device_type_ = "VPUX";
+    } else if (dev_type == "NPU_U8") {
+      device_type_ = "NPU";
       precision_ = "U8";
     } else if (dev_type.find("HETERO") == 0 || dev_type.find("MULTI") == 0) {
       std::vector<std::string> devices = parseDevices(dev_type);
diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
index 95b39bcc05983..fbb89710c8008 100644
--- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
+++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
@@ -8,11 +8,16 @@
 
 namespace onnxruntime {
 struct OpenVINOProviderFactory : IExecutionProviderFactory {
-  OpenVINOProviderFactory(const char* device_type, bool enable_vpu_fast_compile,
+  OpenVINOProviderFactory(const char* device_type, bool enable_npu_fast_compile,
                           const char* device_id, size_t num_of_threads,
                           const char* cache_dir, int num_streams, void* context,
                           bool enable_opencl_throttling, bool enable_dynamic_shapes)
-      : enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads), num_streams_(num_streams), context_(context), enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
+      : enable_npu_fast_compile_(enable_npu_fast_compile),
+        num_of_threads_(num_of_threads),
+        num_streams_(num_streams),
+        context_(context),
+        enable_opencl_throttling_(enable_opencl_throttling),
+        enable_dynamic_shapes_(enable_dynamic_shapes) {
     device_type_ = (device_type == nullptr) ? "" : device_type;
     device_id_ = (device_id == nullptr) ? "" : device_id;
     cache_dir_ = (cache_dir == nullptr) ? "" : cache_dir;
@@ -24,7 +29,7 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
 
  private:
   std::string device_type_;
-  bool enable_vpu_fast_compile_;
+  bool enable_npu_fast_compile_;
   std::string device_id_;
   size_t num_of_threads_;
   std::string cache_dir_;
@@ -35,7 +40,7 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
 };
 
 std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
-  OpenVINOExecutionProviderInfo info(device_type_, enable_vpu_fast_compile_, device_id_, num_of_threads_,
+  OpenVINOExecutionProviderInfo info(device_type_, enable_npu_fast_compile_, device_id_, num_of_threads_,
                                      cache_dir_, num_streams_, context_, enable_opencl_throttling_,
                                      enable_dynamic_shapes_);
   return std::make_unique<OpenVINOExecutionProvider>(info);
@@ -59,17 +64,18 @@ struct OpenVINO_Provider : Provider {
 
     std::string device_type = "";           // [device_type]: Overrides the accelerator hardware type and precision
                                             //   with these values at runtime.
-    bool enable_vpu_fast_compile = false;   // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to
-                                            // speeds up the model's compilation to VPU device specific format.
+    bool enable_npu_fast_compile = false;   // [enable_npu_fast_compile]: Fast-compile may be optionally enabled to
+                                            // speeds up the model's compilation to NPU device specific format.
     const char* device_id = "";             // [device_id]: Selects a particular hardware device for inference.
-    size_t num_of_threads = 8;              // [num_of_threads]: Overrides the accelerator default value of number of
+    int num_of_threads = 8;                 // [num_of_threads]: Overrides the accelerator default value of number of
                                             //  threads with this value at runtime.
     const char* cache_dir = "";             // [cache_dir]: specify the path to
                                             // dump and load the blobs for the model caching/kernel caching (GPU)
                                             // feature. If blob files are already present, it will be directly loaded.
     int num_streams = 1;                    // [num_streams]: Option that specifies the number of parallel inference
                                             // requests to be processed on a given `device_type`. Overrides the
-                                            // accelerator default value of number of streams with this value at runtime.
+                                            // accelerator default value of number of streams
+                                            // with this value at runtime.
     bool enable_opencl_throttling = false;  // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU
                                             // device (Reduces CPU Utilization when using GPU)
     bool enable_dynamic_shapes = false;     // [enable_dynamic_shapes]: Enables Dynamic Shapes feature for CPU device)
@@ -80,14 +86,15 @@ struct OpenVINO_Provider : Provider {
 
       std::set<std::string> ov_supported_device_types = {"CPU_FP32", "CPU_FP16", "GPU_FP32",
                                                          "GPU.0_FP32", "GPU.1_FP32", "GPU_FP16",
-                                                         "GPU.0_FP16", "GPU.1_FP16",
-                                                         "VPUX_FP16", "VPUX_U8"};
+                                                         "GPU.0_FP16", "GPU.1_FP16"};
       if (!((ov_supported_device_types.find(device_type) != ov_supported_device_types.end()) ||
-            (device_type.find("HETERO:") == 0) || (device_type.find("MULTI:") == 0) || (device_type.find("AUTO:") == 0))) {
+            (device_type.find("HETERO:") == 0) ||
+            (device_type.find("MULTI:") == 0) ||
+            (device_type.find("AUTO:") == 0))) {
         ORT_THROW(
             "[ERROR] [OpenVINO] You have selcted wrong configuration value for the key 'device_type'. "
             "Select from 'CPU_FP32', 'CPU_FP16', 'GPU_FP32', 'GPU.0_FP32', 'GPU.1_FP32', 'GPU_FP16', "
-            "'GPU.0_FP16', 'GPU.1_FP16', 'VPUX_FP16', 'VPUX_U8' or from"
+            "'GPU.0_FP16', 'GPU.1_FP16' or from"
             " HETERO/MULTI/AUTO options available. \n");
       }
     }
@@ -97,30 +104,37 @@ struct OpenVINO_Provider : Provider {
     if (provider_options_map.find("cache_dir") != provider_options_map.end()) {
       cache_dir = provider_options_map.at("cache_dir").c_str();
     }
+
     if (provider_options_map.find("context") != provider_options_map.end()) {
-      context = (void*)provider_options_map.at("context").c_str();
+      std::string str = provider_options_map.at("context");
+      uint64_t number = std::strtoull(str.c_str(), nullptr, 16);
+      context = reinterpret_cast<void*>(number);
     }
 
     if (provider_options_map.find("num_of_threads") != provider_options_map.end()) {
       num_of_threads = std::stoi(provider_options_map.at("num_of_threads"));
       if (num_of_threads <= 0) {
         num_of_threads = 1;
+        LOGS_DEFAULT(WARNING) << "[OpenVINO-EP] The value for the key 'num_threads' should be in the positive range.\n "
+                              << "Executing with num_threads=1";
       }
     }
 
     if (provider_options_map.find("num_streams") != provider_options_map.end()) {
       num_streams = std::stoi(provider_options_map.at("num_streams"));
-      if (num_streams <= 0 && num_streams > 8) {
-        ORT_THROW("[ERROR] [OpenVINO] The value for the key 'num_streams' should be in the range of 1-8 \n");
+      if (num_streams <= 0) {
+        num_streams = 1;
+        LOGS_DEFAULT(WARNING) << "[OpenVINO-EP] The value for the key 'num_streams' should be in the range of 1-8.\n "
+                              << "Executing with num_streams=1";
       }
     }
     std::string bool_flag = "";
-    if (provider_options_map.find("enable_vpu_fast_compile") != provider_options_map.end()) {
-      bool_flag = provider_options_map.at("enable_vpu_fast_compile");
+    if (provider_options_map.find("enable_npu_fast_compile") != provider_options_map.end()) {
+      bool_flag = provider_options_map.at("enable_npu_fast_compile");
       if (bool_flag == "true" || bool_flag == "True")
-        enable_vpu_fast_compile = true;
+        enable_npu_fast_compile = true;
       else if (bool_flag == "false" || bool_flag == "False")
-        enable_vpu_fast_compile = false;
+        enable_npu_fast_compile = false;
       bool_flag = "";
     }
 
@@ -141,7 +155,7 @@ struct OpenVINO_Provider : Provider {
         enable_dynamic_shapes = false;
     }
     return std::make_shared<OpenVINOProviderFactory>(const_cast<char*>(device_type.c_str()),
-                                                     enable_vpu_fast_compile,
+                                                     enable_npu_fast_compile,
                                                      device_id,
                                                      num_of_threads,
                                                      cache_dir,
@@ -157,7 +171,6 @@ struct OpenVINO_Provider : Provider {
   void Shutdown() override {
     openvino_ep::BackendManager::ReleaseGlobalContext();
   }
-
 } g_provider;
 
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc
index 3914488fc523b..d2ce378c97e02 100644
--- a/onnxruntime/core/providers/openvino/ov_interface.cc
+++ b/onnxruntime/core/providers/openvino/ov_interface.cc
@@ -29,7 +29,10 @@ std::shared_ptr<OVNetwork> OVCore::ReadModel(const std::string& model) const {
   }
 }
 
-OVExeNetwork OVCore::LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network, std::string& hw_target, ov::AnyMap& device_config, std::string name) {
+OVExeNetwork OVCore::LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network,
+                                 std::string& hw_target,
+                                 ov::AnyMap& device_config,
+                                 std::string name) {
   ov::CompiledModel obj;
   try {
     obj = oe.compile_model(ie_cnn_network, hw_target, device_config);
@@ -43,7 +46,10 @@ OVExeNetwork OVCore::LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network, std
 }
 
 #if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
-OVExeNetwork OVCore::LoadNetwork(const std::string& model, std::string& hw_target, ov::AnyMap& device_config, std::string name) {
+OVExeNetwork OVCore::LoadNetwork(const std::string& model,
+                                 std::string& hw_target,
+                                 ov::AnyMap& device_config,
+                                 std::string name) {
   ov::CompiledModel obj;
   try {
     obj = oe.compile_model(model, ov::Tensor(), hw_target, device_config);
diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h
index ed9583033ab34..935ac8f68411d 100644
--- a/onnxruntime/core/providers/openvino/ov_interface.h
+++ b/onnxruntime/core/providers/openvino/ov_interface.h
@@ -4,6 +4,7 @@
 #pragma once
 
 #include <vector>
+#include <memory>
 
 #if defined(OPENVINO_2022_1) || (OPENVINO_2022_2) || (OPENVINO_2022_3) || (OPENVINO_2023_0) || (OPENVINO_2023_1)
 #define OV_API_20
@@ -43,9 +44,15 @@ class OVCore {
 
  public:
   std::shared_ptr<OVNetwork> ReadModel(const std::string& model_stream) const;
-  OVExeNetwork LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network, std::string& hw_target, ov::AnyMap& device_config, std::string name);
+  OVExeNetwork LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network,
+                           std::string& hw_target,
+                           ov::AnyMap& device_config,
+                           std::string name);
 #if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
-  OVExeNetwork LoadNetwork(const std::string& model_stream, std::string& hw_target, ov::AnyMap& device_config, std::string name);
+  OVExeNetwork LoadNetwork(const std::string& model_stream,
+                           std::string& hw_target,
+                           ov::AnyMap& device_config,
+                           std::string name);
 #endif
   void SetCache(std::string cache_dir_path);
 #ifdef IO_BUFFER_ENABLED
@@ -62,7 +69,7 @@ class OVExeNetwork {
   ov::CompiledModel obj;
 
  public:
-  OVExeNetwork(ov::CompiledModel md) { obj = md; }
+  explicit OVExeNetwork(ov::CompiledModel md) { obj = md; }
   OVExeNetwork() { obj = ov::CompiledModel(); }
   ov::CompiledModel& Get() { return obj; }
   OVInferRequest CreateInferRequest();
diff --git a/onnxruntime/core/providers/openvino/ov_versions/capabilities.h b/onnxruntime/core/providers/openvino/ov_versions/capabilities.h
index b76d1cf534c2a..5bcf9d68cd94e 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/capabilities.h
+++ b/onnxruntime/core/providers/openvino/ov_versions/capabilities.h
@@ -3,6 +3,8 @@
 
 #pragma once
 #include <vector>
+#include <string>
+#include <memory>
 #include "data_ops.h"
 
 namespace onnxruntime {
diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.cc b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
index 171dd45c508cc..454f3dd5eb3cc 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/capability.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
@@ -24,7 +24,8 @@ namespace openvino_ep {
 
 // Constructor
 GetCapability::GetCapability(const GraphViewer& graph_viewer_param, std::string device_type_param,
-                             const std::string version_param) : graph_viewer_(graph_viewer_param), device_type_(device_type_param) {
+                             const std::string version_param)
+    : graph_viewer_(graph_viewer_param), device_type_(device_type_param) {
   if (version_param == "V_2022_1") {
     data_ops_ = new DataOps(graph_viewer_, V_2022_1, device_type_);
   } else if (version_param == "V_2022_2") {
@@ -114,11 +115,11 @@ std::vector<std::unique_ptr<ComputeCapability>> GetCapability::Execute() {
     }
     openvino_ep::BackendManager::GetGlobalContext().is_wholly_supported_graph = true;
 
-  } else {  // unsupported_nodes_idx.empty()
-
+  } else {                                     // unsupported_nodes_idx.empty()
 #if defined(OPENVINO_DISABLE_GRAPH_PARTITION)  // disables graph partition at build time
     LOGS_DEFAULT(INFO) << "[OpenVINO-EP] DISABLE_GRAPH_PARTITION option is set";
-    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model is not fully supported by OpenVINO, so making the full model fall back to default CPU Execution Provider";
+    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model is not fully supported by OpenVINO, "
+                       << "so making the full model fall back to default CPU Execution Provider";
     return result;
 #endif
 
@@ -145,21 +146,16 @@ std::vector<std::unique_ptr<ComputeCapability>> GetCapability::Execute() {
       // If subgraph has less then three, graph is considered trivial
       if (this_cluster.size() < 3) {
         continue;
-      } else {
-        // If subgraph only has Identity node, EyeLike or Dropout, OpenVINO EP doesn't support it.
-        if (this_cluster.size() == 1) {
-          const auto& node = graph_viewer_.GetNode(this_cluster[0]);
-          if (IsOpSupportedOnlyInModel(node->OpType()))
-            continue;
-          // If reshape is not an intermediate node, shape needs to be an initializer
-          if (data_ops_->SpecialConditionForClusterSizeOne(ng_required_initializers, node))
-            continue;
-        }
       }
 
-      std::vector<std::string> cluster_graph_inputs, cluster_inputs, const_inputs, cluster_outputs;
+      std::vector<std::string> cluster_graph_inputs, cluster_inputs, cluster_outputs;
 
-      GetInputsOutputsOfCluster(graph_viewer_, this_cluster, ng_required_initializers, cluster_graph_inputs, cluster_inputs, const_inputs, cluster_outputs);
+      GetInputsOutputsOfCluster(graph_viewer_,
+                                this_cluster,
+                                ng_required_initializers,
+                                cluster_graph_inputs,
+                                cluster_inputs,
+                                cluster_outputs);
 
       bool omit_subgraph = false;
       // Omitting zero dim subgraphs
diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
index 70118c94f9ff8..a5a0faa3a8f24 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
@@ -2,11 +2,15 @@
 // Licensed under the MIT License
 
 #include <unordered_set>
+#include <string>
+#include <vector>
+#include <utility>
+#include <map>
+#include <set>
+
 #include "core/providers/shared_library/provider_api.h"
 #include "../backend_utils.h"
 #include "../backend_manager.h"
-#include <string>
-#include <vector>
 #include "data_ops.h"
 #include "capabilities.h"
 #include "utils.h"
@@ -72,269 +76,355 @@ std::set<std::string> ops_supported_as_function = {
 
 std::vector<SupportedOp> supported_op_mode = {
     {"Abs", V_2020_4, {"CPU", "GPU"}},
-    {"Abs", V_2023_0, {"VPUX"}},
+    {"Abs", V_2023_0, {"NPU"}},
     {"Acos", V_2020_4, {"CPU"}},
     {"Acos", V_2022_1, {"GPU"}},
+    {"Acos", V_2023_1, {"NPU"}},
     {"Acosh", V_2020_4, {"CPU"}},
     {"Acosh", V_2022_1, {"GPU"}},
+    {"Acosh", V_2023_1, {"NPU"}},
     {"Add", V_2020_4, {"CPU", "GPU"}},
-    {"Add", V_2023_0, {"VPUX"}},
+    {"Add", V_2023_0, {"NPU"}},
     {"And", V_2020_4, {"CPU", "GPU"}},
+    {"And", V_2023_1, {"NPU"}},
     {"ArgMax", V_2020_4, {"CPU"}},
     {"ArgMax", V_2021_1, {"GPU"}},
     {"ArgMin", V_2020_4, {"CPU"}},
     {"ArgMin", V_2022_1, {"GPU"}},
     {"Asin", V_2020_4, {"CPU", "GPU"}},
+    {"Asin", V_2023_1, {"NPU"}},
     {"Asinh", V_2020_4, {"CPU", "GPU"}},
+    {"Asinh", V_2023_1, {"NPU"}},
     {"Atan", V_2020_4, {"CPU", "GPU"}},
+    {"Atan", V_2023_1, {"NPU"}},
     {"Atanh", V_2020_4, {"CPU"}},
     {"Atanh", V_2022_1, {"GPU"}},
+    {"Atanh", V_2023_1, {"NPU"}},
     {"AveragePool", V_2020_4, {"CPU", "GPU"}},
-    {"AveragePool", V_2023_0, {"VPUX"}},
+    {"AveragePool", V_2023_0, {"NPU"}},
     {"BatchNormalization", V_2020_4, {"CPU", "GPU"}},
-    {"BatchNormalization", V_2023_0, {"VPUX"}},
+    {"BatchNormalization", V_2023_0, {"NPU"}},
     {"BitShift", V_2022_1, {"CPU"}},
+    {"BitShift", V_2023_1, {"NPU"}},
     {"Cast", V_2020_4, {"CPU", "GPU"}},
-    {"Cast", V_2023_0, {"VPUX"}},
+    {"Cast", V_2023_0, {"NPU"}},
+    {"CastLike", V_2023_1, {"CPU", "GPU", "NPU"}},
     {"Ceil", V_2020_4, {"GPU"}},
     {"Ceil", V_2021_4, {"CPU"}},
+    {"Ceil", V_2023_1, {"NPU"}},
     {"Celu", V_2022_1, {"CPU", "GPU"}},
     {"Clip", V_2020_4, {"CPU", "GPU"}},
-    {"Clip", V_2023_0, {"VPUX"}},
+    {"Clip", V_2023_0, {"NPU"}},
+    {"Compress", V_2023_1, {"CPU", "GPU"}},
     {"Concat", V_2020_4, {"CPU", "GPU"}},
-    {"Concat", V_2023_0, {"VPUX"}},
+    {"Concat", V_2023_0, {"NPU"}},
     {"Constant", V_2020_4, {"CPU", "GPU"}},
-    {"Constant", V_2023_0, {"VPUX"}},
+    {"Constant", V_2023_0, {"NPU"}},
     {"ConstantOfShape", V_2020_4, {"CPU", "GPU"}},
-    {"ConstantOfShape", V_2023_0, {"VPUX"}},  // Gets mapped to broadcast op in the plugin.
+    {"ConstantOfShape", V_2023_0, {"NPU"}},  // Gets mapped to broadcast op in the plugin.
     {"Conv", V_2020_4, {"CPU", "GPU"}},
-    {"Conv", V_2023_0, {"VPUX"}},
+    {"Conv", V_2023_0, {"NPU"}},
     {"ConvInteger", V_2022_1, {"CPU", "GPU"}},
+    {"ConvInteger", V_2023_1, {"NPU"}},
     {"ConvTranspose", V_2020_4, {"CPU", "GPU"}},
+    {"ConvTranspose", V_2023_1, {"NPU"}},
     {"Cos", V_2020_4, {"CPU"}},
     {"Cos", V_2022_1, {"GPU"}},
-    {"Cos", V_2023_0, {"VPUX"}},
+    {"Cos", V_2023_0, {"NPU"}},
     {"Cosh", V_2020_4, {"CPU"}},
     {"Cosh", V_2022_1, {"GPU"}},
+    {"Cosh", V_2023_1, {"NPU"}},
     {"CumSum", V_2022_1, {"CPU", "GPU"}},
-    {"CumSum", V_2023_0, {"VPUX"}},
+    {"CumSum", V_2023_0, {"NPU"}},
     {"DepthToSpace", V_2020_4, {"CPU", "GPU"}},
-    {"DepthToSpace", V_2023_0, {"VPUX"}},
+    {"DepthToSpace", V_2023_0, {"NPU"}},
     {"DequantizeLinear", V_2021_4, {"CPU", "GPU"}},
-    {"DequantizeLinear", V_2023_0, {"VPUX"}},
+    {"DequantizeLinear", V_2023_0, {"NPU"}},
     {"Div", V_2020_4, {"CPU", "GPU"}},
-    {"Div", V_2023_0, {"VPUX"}},
+    {"Div", V_2023_0, {"NPU"}},
     {"Dropout", V_2020_4, {"CPU", "GPU"}},
-    {"Dropout", V_2023_0, {"VPUX"}},
+    {"Dropout", V_2023_0, {"NPU"}},
     {"Elu", V_2020_4, {"CPU", "GPU"}},
-    {"Elu", V_2023_0, {"VPUX"}},
+    {"Elu", V_2023_0, {"NPU"}},
     // {"Einsum", V_2023_0, {"CPU", "GPU"}},
     {"Equal", V_2020_4, {"CPU", "GPU"}},
-    {"Equal", V_2023_0, {"VPUX"}},  // Added for whisper decoder model.
+    {"Equal", V_2023_0, {"NPU"}},  // Added for whisper decoder model.
     {"Erf", V_2020_4, {"CPU", "GPU"}},
-    {"Erf", V_2023_0, {"VPUX"}},
+    {"Erf", V_2023_0, {"NPU"}},
     {"Exp", V_2020_4, {"CPU", "GPU"}},
-    {"Exp", V_2023_0, {"VPUX"}},
+    {"Exp", V_2023_0, {"NPU"}},
     {"Expand", V_2022_1, {"CPU", "GPU"}},
-    {"Expand", V_2023_0, {"VPUX"}},  // Gets mapped to broadcast op and multiply op in the plugin.
+    {"Expand", V_2023_0, {"NPU"}},  // Gets mapped to broadcast op and multiply op in the plugin.
     {"EyeLike", V_2022_1, {"CPU"}},
-    {"EyeLike", V_2023_0, {"VPUX"}},  // NoOP
+    {"EyeLike", V_2023_0, {"NPU"}},  // NoOP
     {"Flatten", V_2020_4, {"CPU", "GPU"}},
-    {"Flatten", V_2023_0, {"VPUX"}},
+    {"Flatten", V_2023_0, {"NPU"}},
     {"Floor", V_2020_4, {"CPU", "GPU"}},
+    {"Floor", V_2023_1, {"NPU"}},
     {"Gather", V_2020_4, {"CPU", "GPU"}},
-    {"Gather", V_2023_0, {"VPUX"}},
+    {"Gather", V_2023_0, {"NPU"}},
     {"GatherElements", V_2022_2, {"CPU", "GPU"}},
+    {"GatherElements", V_2023_1, {"NPU"}},
     {"GatherND", V_2021_4, {"CPU", "GPU"}},
+    {"GatherND", V_2023_1, {"NPU"}},
     {"Gemm", V_2020_4, {"CPU", "GPU"}},
-    {"Gemm", V_2023_0, {"VPUX"}},
+    {"Gemm", V_2023_0, {"NPU"}},
     {"GlobalAveragePool", V_2020_4, {"CPU", "GPU"}},
-    {"GlobalAveragePool", V_2023_0, {"VPUX"}},
+    {"GlobalAveragePool", V_2023_0, {"NPU"}},
     {"GlobalLpPool", V_2020_4, {"CPU", "GPU"}},
+    {"GlobalLpPool", V_2023_1, {"NPU"}},
     {"GlobalMaxPool", V_2022_1, {"CPU", "GPU"}},
+    {"GlobalMaxPool", V_2023_1, {"NPU"}},
     {"Greater", V_2020_4, {"CPU", "GPU"}},
-    {"Greater", V_2023_0, {"VPUX"}},
+    {"Greater", V_2023_0, {"NPU"}},
     {"GreaterOrEqual", V_2022_1, {"CPU", "GPU"}},
-    {"GreaterOrEqual", V_2023_0, {"VPUX"}},
+    {"GreaterOrEqual", V_2023_0, {"NPU"}},
     {"GridSample", V_2022_3, {"CPU"}},
     {"GridSample", V_2023_0, {"GPU"}},
+    {"GridSample", V_2023_1, {"NPU"}},
+    {"HardMax", V_2023_1, {"CPU", "GPU", "NPU"}},
     {"Identity", V_2020_4, {"CPU", "GPU"}},
-    {"Identity", V_2023_0, {"VPUX"}},  // NoOP
+    {"Identity", V_2023_0, {"NPU"}},  // NoOP
     {"If", V_2022_3, {"CPU", "GPU"}},
+    {"If", V_2023_1, {"NPU"}},
     {"ImageScaler", V_2022_1, {"CPU", "GPU"}},
-    {"ImageScaler", V_2023_0, {"VPUX"}},
+    {"ImageScaler", V_2023_0, {"NPU"}},
     {"InstanceNormalization", V_2020_4, {"CPU", "GPU"}},
-    {"InstanceNormalization", V_2023_0, {"VPUX"}},
+    {"InstanceNormalization", V_2023_0, {"NPU"}},
     {"HardSigmoid", V_2020_4, {"CPU", "GPU"}},
+    {"HardSigmoid", V_2023_1, {"NPU"}},
     {"HardMax", V_2022_1, {"CPU", "GPU"}},
     {"LeakyRelu", V_2020_4, {"CPU", "GPU"}},
-    {"LeakyRelu", V_2023_0, {"VPUX"}},
+    {"LeakyRelu", V_2023_0, {"NPU"}},
     {"Less", V_2020_4, {"CPU", "GPU"}},
-    {"Less", V_2023_0, {"VPUX"}},  // Added for whisper decoder model.
+    {"Less", V_2023_0, {"NPU"}},  // Added for whisper decoder model.
     {"LessOrEqual", V_2022_1, {"CPU", "GPU"}},
-    {"LessOrEqual", V_2023_0, {"VPUX"}},
+    {"LessOrEqual", V_2023_0, {"NPU"}},
     {"Log", V_2020_4, {"CPU", "GPU"}},
-    {"Log", V_2023_0, {"VPUX"}},
+    {"Log", V_2023_0, {"NPU"}},
     {"LogSoftMax", V_2022_1, {"CPU", "GPU"}},
     {"Loop", V_2021_4, {"CPU", "GPU"}},
+    {"LpNormalization", V_2023_1, {"CPU", "GPU", "NPU"}},
+    {"LpPool", V_2023_1, {"CPU", "GPU", "NPU"}},
     {"LRN", V_2020_4, {"CPU", "GPU"}},
-    {"LRN", V_2023_0, {"VPUX"}},
+    {"LRN", V_2023_0, {"NPU"}},
     {"LSTM", V_2020_4, {"CPU", "GPU"}},
+    {"LSTM", V_2023_1, {"NPU"}},
     {"MatMul", V_2020_4, {"CPU", "GPU"}},
-    {"MatMul", V_2023_0, {"VPUX"}},
+    {"MatMul", V_2023_0, {"NPU"}},
     {"MatMulInteger", V_2022_1, {"CPU"}},
+    {"MatMulInteger", V_2023_1, {"NPU"}},
     {"Max", V_2020_4, {"CPU", "GPU"}},
-    {"Max", V_2023_0, {"VPUX"}},
+    {"Max", V_2023_0, {"NPU"}},
     {"MaxPool", V_2020_4, {"CPU", "GPU"}},
-    {"MaxPool", V_2023_0, {"VPUX"}},
+    {"MaxPool", V_2023_0, {"NPU"}},
     {"Mean", V_2020_4, {"CPU", "GPU"}},
-    {"Mean", V_2023_0, {"VPUX"}},
+    {"Mean", V_2023_0, {"NPU"}},
     {"MeanVarianceNormalization", V_2022_1, {"CPU", "GPU"}},
+    {"MeanVarianceNormalization", V_2023_1, {"NPU"}},
     {"Min", V_2020_4, {"CPU", "GPU"}},
-    {"Min", V_2023_0, {"VPUX"}},
+    {"Min", V_2023_0, {"NPU"}},
     {"Mod", V_2022_1, {"CPU", "GPU"}},
     {"Mul", V_2020_4, {"CPU", "GPU"}},
-    {"Mul", V_2023_0, {"VPUX"}},
+    {"Mul", V_2023_0, {"NPU"}},
     {"Neg", V_2020_4, {"CPU", "GPU"}},
-    {"Neg", V_2023_0, {"VPUX"}},
+    {"Neg", V_2023_0, {"NPU"}},
     {"NonMaxSuppression", V_2021_1, {"CPU", "GPU"}},
+    {"NonMaxSuppression", V_2023_1, {"NPU"}},
     {"NonZero", V_2021_1, {"CPU"}},
     {"NonZero", V_2023_0, {"GPU"}},
     {"Not", V_2021_1, {"CPU", "GPU"}},
     {"Not", V_2020_4, {"CPU", "GPU"}},
+    {"Not", V_2023_1, {"NPU"}},
     {"OneHot", V_2020_4, {"CPU", "GPU"}},
+    {"OneHot", V_2023_1, {"NPU"}},
     {"Or", V_2022_1, {"CPU", "GPU"}},
+    {"Or", V_2023_1, {"NPU"}},
     {"Pad", V_2020_4, {"CPU", "GPU"}},
-    {"Pad", V_2023_0, {"VPUX"}},
+    {"Pad", V_2023_0, {"NPU"}},
     {"Pow", V_2020_4, {"CPU", "GPU"}},
-    {"Pow", V_2023_0, {"VPUX"}},
+    {"Pow", V_2023_0, {"NPU"}},
     {"PRelu", V_2020_4, {"CPU", "GPU"}},
-    {"PRelu", V_2023_0, {"VPUX"}},
+    {"PRelu", V_2023_0, {"NPU"}},
     {"QLinearMatMul", V_2022_3, {"CPU"}},
+    // {"QLinearMatMul", V_2023_1, {"NPU"}},
     {"QuantizeLinear", V_2021_4, {"CPU", "GPU"}},
-    {"QuantizeLinear", V_2023_0, {"VPUX"}},
+    {"QuantizeLinear", V_2023_0, {"NPU"}},
+    {"RNN", V_2023_1, {"CPU", "GPU"}},
+    {"RandomNormalLike", V_2023_0, {"CPU", "GPU"}},
     {"RandomNormalLike", V_2023_0, {"CPU", "GPU"}},
+    {"RandomNormalLike", V_2023_1, {"NPU"}},
     {"RandomNormal", V_2023_0, {"CPU", "GPU"}},
+    {"RandomNormal", V_2023_1, {"NPU"}},
     {"Range", V_2022_1, {"CPU", "GPU"}},
-    {"Range", V_2023_0, {"VPUX"}},
+    {"Range", V_2023_0, {"NPU"}},
     {"Reciprocal", V_2020_4, {"CPU", "GPU"}},
-    {"Reciprocal", V_2023_0, {"VPUX"}},
+    {"Reciprocal", V_2023_0, {"NPU"}},
     {"ReduceL1", V_2022_1, {"CPU", "GPU"}},
+    {"ReduceL1", V_2023_1, {"NPU"}},
     {"ReduceL2", V_2022_1, {"CPU", "GPU"}},
+    {"ReduceL2", V_2023_1, {"NPU"}},
     {"ReduceLogSum", V_2020_4, {"CPU"}},
     {"ReduceLogSum", V_2022_1, {"CPU", "GPU"}},
+    {"ReduceLogSum", V_2023_1, {"NPU"}},
     {"ReduceLogSumExp", V_2022_1, {"CPU", "GPU"}},
+    {"ReduceLogSumExp", V_2023_1, {"NPU"}},
     {"ReduceMax", V_2020_4, {"CPU", "GPU"}},
+    {"ReduceMax", V_2023_1, {"NPU"}},
     {"ReduceMean", V_2020_4, {"CPU", "GPU"}},
-    {"ReduceMean", V_2023_0, {"VPUX"}},
+    {"ReduceMean", V_2023_0, {"NPU"}},
     {"ReduceMin", V_2020_4, {"CPU", "GPU"}},
+    {"ReduceMin", V_2023_1, {"NPU"}},
     {"ReduceProd", V_2020_4, {"CPU"}},
     {"ReduceProd", V_2022_1, {"GPU"}},
+    {"ReduceProd", V_2023_1, {"NPU"}},
     {"ReduceSum", V_2020_4, {"CPU", "GPU"}},
+    // {"ReduceSum", V_2023_1, {"NPU"}},
     {"ReduceSumSquare", V_2020_4, {"CPU"}},
     {"ReduceSumSquare", V_2022_1, {"CPU", "GPU"}},
+    {"ReduceSumSquare", V_2023_1, {"NPU"}},
     {"Relu", V_2020_4, {"CPU", "GPU"}},
-    {"Relu", V_2023_0, {"VPUX"}},
+    {"Relu", V_2023_0, {"NPU"}},
     {"Resize", V_2020_4, {"CPU"}},
     {"Resize", V_2022_1, {"GPU"}},
+    {"Resize", V_2023_1, {"NPU"}},
     {"Reshape", V_2020_4, {"CPU", "GPU"}},
-    {"Reshape", V_2023_0, {"VPUX"}},
+    {"Reshape", V_2023_0, {"NPU"}},
     {"ReverseSequence", V_2022_1, {"CPU", "GPU"}},
     {"RoiAlign", V_2021_1, {"CPU", "GPU"}},
+    {"RoiAlign", V_2023_1, {"NPU"}},
     {"Round", V_2021_4, {"CPU", "GPU"}},
+    {"Round", V_2023_1, {"NPU"}},
     {"Scatter", V_2022_1, {"CPU", "GPU"}},
+    {"Scatter", V_2023_1, {"NPU"}},
     {"ScatterElements", V_2022_1, {"CPU", "GPU"}},
+    {"ScatterElements", V_2023_1, {"NPU"}},
     {"ScatterND", V_2022_1, {"CPU", "GPU"}},
+    {"ScatterND", V_2023_1, {"NPU"}},
     {"Selu", V_2020_4, {"CPU", "GPU"}},
+    {"Selu", V_2023_1, {"NPU"}},
     {"Shape", V_2020_4, {"CPU", "GPU"}},
-    {"Shape", V_2023_0, {"VPUX"}},
+    {"Shape", V_2023_0, {"NPU"}},
     {"Shrink", V_2022_1, {"CPU", "GPU"}},
-    {"Shrink", V_2023_0, {"VPUX"}},
+    {"Shrink", V_2023_0, {"NPU"}},
     {"Sigmoid", V_2020_4, {"CPU", "GPU"}},
-    {"Sigmoid", V_2023_0, {"VPUX"}},
+    {"Sigmoid", V_2023_0, {"NPU"}},
     {"Sign", V_2020_4, {"CPU"}},
     {"Sign", V_2022_1, {"GPU"}},
-    {"Sign", V_2023_0, {"VPUX"}},
+    {"Sign", V_2023_0, {"NPU"}},
     {"Sin", V_2022_1, {"CPU", "GPU"}},
-    {"Sin", V_2023_0, {"VPUX"}},
+    {"Sin", V_2023_0, {"NPU"}},
     {"Sinh", V_2020_4, {"CPU"}},
+    {"Sinh", V_2023_1, {"NPU"}},
     {"Size", V_2022_1, {"CPU", "GPU"}},
+    {"Size", V_2023_1, {"NPU"}},
     {"Slice", V_2020_4, {"CPU", "GPU"}},
-    {"Slice", V_2023_0, {"VPUX"}},
+    {"Slice", V_2023_0, {"NPU"}},
     {"Softmax", V_2020_4, {"CPU", "GPU"}},
-    {"Softmax", V_2023_0, {"VPUX"}},
+    {"Softmax", V_2023_0, {"NPU"}},
     {"Softplus", V_2022_1, {"CPU", "GPU"}},
-    {"Softplus", V_2023_0, {"VPUX"}},
+    {"Softplus", V_2023_0, {"NPU"}},
     {"Softsign", V_2022_1, {"CPU", "GPU"}},
     {"SpaceToDepth", V_2020_4, {"CPU", "GPU"}},
-    {"SpaceToDepth", V_2023_0, {"VPUX"}},
+    {"SpaceToDepth", V_2023_0, {"NPU"}},
     {"Split", V_2020_4, {"CPU", "GPU"}},
-    {"Split", V_2023_0, {"VPUX"}},
+    {"Split", V_2023_0, {"NPU"}},
     {"Sqrt", V_2020_4, {"CPU", "GPU"}},
-    {"Sqrt", V_2023_0, {"VPUX"}},
+    {"Sqrt", V_2023_0, {"NPU"}},
     {"Squeeze", V_2020_4, {"CPU", "GPU"}},
-    {"Squeeze", V_2023_0, {"VPUX"}},
+    {"Squeeze", V_2023_0, {"NPU"}},
     {"Softsign", V_2020_4, {"CPU"}},
     {"Sub", V_2020_4, {"CPU", "GPU"}},
-    {"Sub", V_2023_0, {"VPUX"}},
+    {"Sub", V_2023_0, {"NPU"}},
     {"Sum", V_2020_4, {"CPU", "GPU"}},
-    {"Sum", V_2023_0, {"VPUX"}},
+    {"Sum", V_2023_0, {"NPU"}},
     {"Tan", V_2020_4, {"CPU", "GPU"}},
+    {"Tan", V_2023_1, {"NPU"}},
     {"Tanh", V_2020_4, {"CPU", "GPU"}},
-    {"Tanh", V_2023_0, {"VPUX"}},
+    {"Tanh", V_2023_0, {"NPU"}},
     {"ThresholdedRelu", V_2022_1, {"CPU", "GPU"}},
-    {"ThresholdedRelu", V_2023_0, {"VPUX"}},
+    {"ThresholdedRelu", V_2023_0, {"NPU"}},
     {"Tile", V_2021_3, {"CPU", "GPU"}},
-    {"Tile", V_2023_0, {"VPUX"}},
+    {"Tile", V_2023_0, {"NPU"}},
     {"Transpose", V_2020_4, {"CPU", "GPU"}},
-    {"Transpose", V_2023_0, {"VPUX"}},
+    {"Transpose", V_2023_0, {"NPU"}},
     {"Trilu", V_2023_0, {"CPU", "GPU"}},
+    {"Trilu", V_2023_1, {"NPU"}},
     {"TopK", V_2020_4, {"CPU", "GPU"}},
-    {"TopK", V_2023_0, {"VPUX"}},
+    {"TopK", V_2023_0, {"NPU"}},
+    {"Upsample", V_2020_4, {"CPU", "GPU"}},
     {"Unsqueeze", V_2020_4, {"CPU", "GPU"}},
-    {"Unsqueeze", V_2023_0, {"VPUX"}},
-    {"Upsample", V_2021_1, {"CPU"}},
-    {"Upsample", V_2021_4, {"GPU"}},
-    {"Upsample", V_2023_0, {"VPUX"}},
+    {"Unsqueeze", V_2023_0, {"NPU"}},
     {"Where", V_2022_1, {"CPU", "GPU"}},
-    {"Where", V_2023_0, {"VPUX"}},  // Added for whisper decoder model.
+    {"Where", V_2023_0, {"NPU"}},  // Added for whisper decoder model.
     {"Xor", V_2022_1, {"CPU", "GPU"}},
+    {"Xor", V_2023_1, {"NPU"}},
 };
 
 void DataOps::populate_types_supported() {
-  supported_types_initializer_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
-  supported_types_initializer_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
-  supported_types_initializer_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
-  supported_types_initializer_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
-  supported_types_initializer_.insert(std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
-  supported_types_initializer_.insert(std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
-  supported_types_initializer_.insert(std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
+  supported_types_initializer_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
+  supported_types_initializer_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
+  supported_types_initializer_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
+  supported_types_initializer_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
+  supported_types_initializer_.insert(
+      std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
+  supported_types_initializer_.insert(
+      std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
+  supported_types_initializer_.insert(
+      std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
 
-  supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
-  supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
-  supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
-  supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
-  supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16));
-  supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
-  supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
-  supported_types_vpu_.insert(std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
+  supported_types_npu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
+  supported_types_npu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
+  supported_types_npu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
+  supported_types_npu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
+  supported_types_npu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16));
+  supported_types_npu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
+  supported_types_npu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
+  supported_types_npu_.insert(
+      std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
 
-  supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
-  supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
-  supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
-  supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16));
-  supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
-  supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
-  supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
-  supported_types_cpu_.insert(std::make_pair(V_2022_2, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
+  supported_types_cpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
+  supported_types_cpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
+  supported_types_cpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
+  supported_types_cpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16));
+  supported_types_cpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
+  supported_types_cpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
+  supported_types_cpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
+  supported_types_cpu_.insert(
+      std::make_pair(V_2022_2, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
 
-  supported_types_gpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
-  supported_types_gpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
-  supported_types_gpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
-  supported_types_gpu_.insert(std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
-  supported_types_gpu_.insert(std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
-  supported_types_gpu_.insert(std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
-  supported_types_gpu_.insert(std::make_pair(V_2022_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
+  supported_types_gpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
+  supported_types_gpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
+  supported_types_gpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
+  supported_types_gpu_.insert(
+      std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
+  supported_types_gpu_.insert(
+      std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
+  supported_types_gpu_.insert(
+      std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
+  supported_types_gpu_.insert(
+      std::make_pair(V_2022_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
 }
 
 void DataOps::populate_op_mode_supported() {
@@ -349,10 +439,10 @@ void DataOps::populate_op_mode_supported() {
   no_dimension_supported_.push_back({"Equal", V_2023_0, {"GPU"}});
   no_dimension_supported_.push_back({"Floor", V_2020_4, {"All"}});
   no_dimension_supported_.push_back({"Gather", V_2020_4, {"All"}});
-  no_dimension_supported_.push_back({"Greater", V_2023_0, {"VPUX"}});
+  no_dimension_supported_.push_back({"Greater", V_2023_0, {"NPU"}});
   no_dimension_supported_.push_back({"Less", V_2022_1, {"CPU"}});
   no_dimension_supported_.push_back({"Loop", V_2021_4, {"All"}});
-  no_dimension_supported_.push_back({"Max", V_2023_0, {"VPUX"}});
+  no_dimension_supported_.push_back({"Max", V_2023_0, {"NPU"}});
   no_dimension_supported_.push_back({"Min", V_2020_4, {"All"}});
   no_dimension_supported_.push_back({"Mul", V_2020_4, {"All"}});
   no_dimension_supported_.push_back({"QuantizeLinear", V_2021_4, {"All"}});
@@ -382,11 +472,14 @@ void DataOps::populate_op_mode_supported() {
   {
     UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3},
                              [this](const Node* node, const InitializedTensorSet&) {
-                               // Abs is not supproted with INT8 or INT32 as input data type on GPU
-                               if (device_id_.find("GPU") != std::string::npos) {
+                               // Abs is not supproted with INT8 or INT32 as input data type on GPU and NPU
+                               if ((device_id_.find("GPU") != std::string::npos) ||
+                                   (device_id_.find("NPU") != std::string::npos)) {
                                  for (size_t i = 0; i < node->InputDefs().size(); i++) {
-                                   if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8 ||
-                                       node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32)
+                                   if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
+                                           ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8 ||
+                                       node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
+                                           ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32)
                                      return true;
                                  }
                                }
@@ -399,11 +492,14 @@ void DataOps::populate_op_mode_supported() {
                              [this](const Node* node, const InitializedTensorSet&) {
                                // tensor type does not support select last index
                                auto& attributes = node->GetAttributes();
-                               auto last_index_arg = attributes.count("select_last_index") > 0 ? attributes.at("select_last_index").i() : 0;
+                               auto last_index_arg =
+                                   attributes.count("select_last_index") > 0 ? attributes.at("select_last_index").i()
+                                                                             : 0;
                                if (last_index_arg != 0)
                                  return true;
                                // tensor type supports float as input for argmax and argmin
-                               if (node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type() != ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT)
+                               if (node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type() !=
+                                   ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT)
                                  return true;
                                return false;
                              }};
@@ -415,7 +511,8 @@ void DataOps::populate_op_mode_supported() {
                              [this](const Node* node, const InitializedTensorSet&) {
                                if (device_id_.find("GPU") != std::string::npos) {
                                  // int64 data type is not supported on GPU
-                                 const bool data_is_int64 = node->InputDefs()[0]->Type()->find("int64") != std::string::npos;
+                                 const bool data_is_int64 =
+                                     node->InputDefs()[0]->Type()->find("int64") != std::string::npos;
                                  return data_is_int64;
                                }
                                return false;
@@ -506,9 +603,12 @@ void DataOps::populate_op_mode_supported() {
                                if (device_id_.find("GPU") != std::string::npos) {
                                  auto x_data_type = node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
                                  auto y_data_type = node->InputDefs()[1]->TypeAsProto()->tensor_type().elem_type();
-                                 // currently both inputs with int32 are not supported and also both input datatypes should be same
-                                 const bool A_is_int32 = node->InputDefs()[0]->Type()->find("int32") != std::string::npos;
-                                 const bool B_is_int32 = node->InputDefs()[1]->Type()->find("int32") != std::string::npos;
+                                 // currently both inputs with int32 are not supported
+                                 // and also both input datatypes should be same
+                                 const bool A_is_int32 =
+                                     node->InputDefs()[0]->Type()->find("int32") != std::string::npos;
+                                 const bool B_is_int32 =
+                                     node->InputDefs()[1]->Type()->find("int32") != std::string::npos;
                                  if ((A_is_int32 && B_is_int32) || (x_data_type != y_data_type))
                                    return true;
                                }
@@ -589,11 +689,13 @@ void DataOps::populate_op_mode_supported() {
                                if (device_id_.find("GPU") != std::string::npos) {
                                  auto slope = node->InputDefs()[1];
                                  // PRelu slope has to be an initializer or needs to come from a constant node
-                                 if (initializers.count(slope->Name()))
+                                 if (initializers.count(slope->Name())) {
                                    return false;
-                                 else {
-                                   for (auto input_node = node->InputNodesBegin(); input_node != node->InputNodesEnd(); ++input_node) {
-                                     if (GetInputCount(this->graph_viewer_.GetNode((*input_node).Index()), initializers) == 0)
+                                 } else {
+                                   for (auto input_node = node->InputNodesBegin();
+                                        input_node != node->InputNodesEnd(); ++input_node) {
+                                     if (GetInputCount(
+                                             this->graph_viewer_.GetNode((*input_node).Index()), initializers) == 0)
                                        return false;
                                    }
                                  }
@@ -603,12 +705,12 @@ void DataOps::populate_op_mode_supported() {
     op_list_.insert({"PRelu", obj});
   }
   {
-    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0},
+    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0, V_2023_1},
                              [this](const Node* node, const InitializedTensorSet&) {
                                const auto& input_arg = node->InputDefs()[1];
                                auto shape = input_arg->Shape();
                                // Reshape op with empty dim is Rejected for Myriad
-                               //[TODO] Is this condition required anymore with Myriad removed?
+                               // [TODO] Is this condition required anymore with Myriad removed?
                                if (shape != nullptr) {
                                  for (const auto& dim : input_arg->Shape()->dim()) {
                                    if (utils::HasDimValue(dim) && dim.dim_value() == 0)
@@ -638,7 +740,8 @@ void DataOps::populate_op_mode_supported() {
                                if (device_id_.find("GPU") != std::string::npos) {
                                  // INT32 dataype is not supported as input
                                  for (size_t i = 0; i < node->InputDefs().size(); i++) {
-                                   if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32)
+                                   if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
+                                       ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32)
                                      return true;
                                  }
                                }
@@ -650,9 +753,11 @@ void DataOps::populate_op_mode_supported() {
     UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3},
                              [this](const Node* node, const InitializedTensorSet&) {
                                if (device_id_.find("GPU") != std::string::npos) {
-                                 auto output_data_type = node->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
+                                 auto output_data_type =
+                                     node->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
                                  // If the output of ScatterND op is BOOL, it is rejected for GPU.
-                                 if (output_data_type == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL)
+                                 if (output_data_type ==
+                                     ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL)
                                    return true;
                                }
                                return false;
@@ -666,7 +771,8 @@ void DataOps::populate_op_mode_supported() {
                              [this](const Node* node, const InitializedTensorSet&) {
                                // If the Input of Shrink op is UINT8, it is rejected (Due to output mismatch)
                                for (size_t i = 0; i < node->InputDefs().size(); i++) {
-                                 if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8)
+                                 if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
+                                     ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8)
                                    return true;
                                }
                                return false;
@@ -714,10 +820,11 @@ void DataOps::populate_op_mode_supported() {
     op_list_.insert({"Squeeze", obj});
   }
   {
-    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0},
+    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0, V_2023_1},
                              [this](const Node* node, const InitializedTensorSet&) {
                                // If the operator is unsqueeze
-                               // If axes is an input, then we cannot produce a static graph. Conversion fails in convert_function_to_cnn_network.
+                               // If axes is an input, then we cannot produce a static graph.
+                               // Conversion fails in convert_function_to_cnn_network.
                                for (size_t i = 0; i < node->InputDefs().size(); i++) {
                                  if (node->InputDefs()[i]->Name() == "axes") {
                                    return true;
@@ -728,14 +835,15 @@ void DataOps::populate_op_mode_supported() {
     op_list_.insert({"Unsqueeze", obj});
   }
   {
-    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0},
+    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0, V_2023_1},
                              [this](const Node* node, const InitializedTensorSet&) {
                                // check for attributes
                                auto& upsample_attr = node->GetAttributes();
                                if (upsample_attr.count("scales") > 0) {
                                  auto& upsample_arg = upsample_attr.at("scales");
                                  auto float_size = upsample_arg.floats_size();
-                                 if (float_size > 2 && (upsample_arg.floats(0) != 1.f || upsample_arg.floats(1) != 1.f)) {
+                                 if (float_size > 2 &&
+                                     (upsample_arg.floats(0) != 1.f || upsample_arg.floats(1) != 1.f)) {
                                    return true;
                                  }
                                }
@@ -750,9 +858,12 @@ void DataOps::populate_op_mode_supported() {
                                  }
                                }
                                // x_arg supports only float, int8 and float16 type
-                               if ((x_arg->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT) ||
-                                   (x_arg->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8) ||
-                                   (x_arg->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16)) {
+                               if ((x_arg->TypeAsProto()->tensor_type().elem_type() ==
+                                    ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT) ||
+                                   (x_arg->TypeAsProto()->tensor_type().elem_type() ==
+                                    ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8) ||
+                                   (x_arg->TypeAsProto()->tensor_type().elem_type() ==
+                                    ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16)) {
                                  return false;
                                } else {
                                  return true;
@@ -849,9 +960,9 @@ bool DataOps::type_is_supported(const NodeArg* node_arg, bool is_initializer) {
   } else {
     auto dtype = type_proto->tensor_type().elem_type();
 
-    if (device_id_.find("VPUX") != std::string::npos || device_id_.find("HETERO") != std::string::npos ||
+    if (device_id_.find("NPU") != std::string::npos || device_id_.find("HETERO") != std::string::npos ||
         device_id_.find("MULTI") != std::string::npos || device_id_.find("AUTO") != std::string::npos) {
-      for (auto const& var : supported_types_vpu_) {
+      for (auto const& var : supported_types_npu_) {
         if ((var.first <= version_id_) &&
             (var.second == dtype)) {
           return true;
@@ -1079,7 +1190,9 @@ bool DataOps::node_is_supported(const std::map<std::string, std::set<std::string
   if (opset->second.find(optype) == opset->second.end() && op_fun == ops_supported_as_function.end()) {
 #ifndef NDEBUG
     if (openvino_ep::backend_utils::IsDebugEnabled()) {
-      std::cout << "The operator is not available in OpenVINO ngraph operators list nor the operator is a special ONNX function" << std::endl;
+      std::cout << "The operator is not available in OpenVINO ngraph operators list"
+                << "nor the operator is a special ONNX function"
+                << std::endl;
     }
 #endif
     return false;
@@ -1095,10 +1208,12 @@ std::vector<NodeIndex> DataOps::GetUnsupportedNodeIndices(std::unordered_set<std
   for (const auto& node_idx : graph_viewer_.GetNodesInTopologicalOrder()) {
     if (node_is_supported(ng_supported_ops, node_idx)) {
       // Collect inputs that are initializers
-      graph_viewer_.GetNode(node_idx)->ForEachDef([&ng_required_initializers, this](const NodeArg& node_arg, bool is_input) {
-            if(is_input && this->graph_viewer_.GetAllInitializedTensors().count(node_arg.Name())) {
+      graph_viewer_.GetNode(node_idx)->ForEachDef([&ng_required_initializers, this](const NodeArg& node_arg,
+                                                                                    bool is_input) {
+            if (is_input && this->graph_viewer_.GetAllInitializedTensors().count(node_arg.Name())) {
                 ng_required_initializers.insert(node_arg.Name());
-              } }, true);
+              } },
+                                                  true);
     } else {
       unsupported_nodes_idx.push_back(node_idx);
     }
@@ -1110,7 +1225,8 @@ bool DataOps::IsOpSupportedOnlyInModel(std::string name) {
   return ops_supported_only_in_model.find(name) != ops_supported_only_in_model.end();
 }
 
-bool DataOps::SpecialConditionForClusterSizeOne(std::unordered_set<std::string>& ng_required_initializers, const Node* node) {
+bool DataOps::SpecialConditionForClusterSizeOne(std::unordered_set<std::string>& ng_required_initializers,
+                                                const Node* node) {
   if (node->OpType() == "Reshape") {
     const auto& shape_arg = node->InputDefs()[1];
     if (ng_required_initializers.find(shape_arg->Name()) == ng_required_initializers.end()) {
@@ -1119,15 +1235,20 @@ bool DataOps::SpecialConditionForClusterSizeOne(std::unordered_set<std::string>&
   } else if (node->OpType() == "Expand") {
     // nGraph only supports constant shape input values
     const auto& output = node->OutputDefs()[0];
-    if (output->TypeAsProto()->tensor_type().elem_type() != ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16)
+    if (output->TypeAsProto()->tensor_type().elem_type() !=
+        ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16)
       return true;
   } else if (node->OpType() == "RoiAlign") {
     using onnx_dtype = ONNX_NAMESPACE::TensorProto_DataType;
 
-    onnx_dtype input_0_data_type = (ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
-    onnx_dtype input_1_data_type = (ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[1]->TypeAsProto()->tensor_type().elem_type();
-    onnx_dtype input_2_data_type = (ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[2]->TypeAsProto()->tensor_type().elem_type();
-    onnx_dtype output_data_type = (ONNX_NAMESPACE::TensorProto_DataType)node->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
+    onnx_dtype input_0_data_type =
+        (ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
+    onnx_dtype input_1_data_type =
+        (ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[1]->TypeAsProto()->tensor_type().elem_type();
+    onnx_dtype input_2_data_type =
+        (ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[2]->TypeAsProto()->tensor_type().elem_type();
+    onnx_dtype output_data_type =
+        (ONNX_NAMESPACE::TensorProto_DataType)node->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
 
     if ((input_0_data_type != onnx_dtype::TensorProto_DataType_FLOAT16) ||
         (input_1_data_type != onnx_dtype::TensorProto_DataType_FLOAT16) ||
diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
index cc968d02ea644..a5aa3f825602c 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
@@ -3,6 +3,11 @@
 
 #pragma once
 #include <unordered_set>
+#include <utility>
+#include <map>
+#include <set>
+#include <vector>
+#include <string>
 
 namespace onnxruntime {
 namespace openvino_ep {
@@ -47,7 +52,7 @@ class DataOps {
   std::multimap<std::string, UnsupportedOpMode> op_list_;
   std::vector<SupportedOp> subgraph_supported_;
   std::vector<SupportedOp> no_dimension_supported_;
-  std::set<Pairs> supported_types_vpu_;
+  std::set<Pairs> supported_types_npu_;
   std::set<Pairs> supported_types_cpu_;
   std::set<Pairs> supported_types_gpu_;
   std::set<Pairs> supported_types_initializer_;
@@ -64,14 +69,16 @@ class DataOps {
                          const NodeIndex node_idx);
 
  public:
-  DataOps(const GraphViewer& graph_viewer_param, VersionNum ver, std::string dev_id) : graph_viewer_(graph_viewer_param), version_id_(ver), device_id_(dev_id) {
+  DataOps(const GraphViewer& graph_viewer_param, VersionNum ver, std::string dev_id)
+      : graph_viewer_(graph_viewer_param), version_id_(ver), device_id_(dev_id) {
     populate_op_mode_supported();
     populate_types_supported();
   }
 
   virtual std::vector<NodeIndex> GetUnsupportedNodeIndices(std::unordered_set<std::string>& ng_required_initializers);
   virtual bool IsOpSupportedOnlyInModel(std::string name);
-  virtual bool SpecialConditionForClusterSizeOne(std::unordered_set<std::string>& ng_required_initializers, const Node* node);
+  virtual bool SpecialConditionForClusterSizeOne(
+      std::unordered_set<std::string>& ng_required_initializers, const Node* node);
   virtual bool DoNotOmitSubGraph(const std::string& name);
   virtual bool InsertNode(const std::string& name);
   VersionNum GetVersion() const { return version_id_; }
diff --git a/onnxruntime/core/providers/openvino/ov_versions/utils.cc b/onnxruntime/core/providers/openvino/ov_versions/utils.cc
index be509b6743621..ee0bfddb7dc83 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/utils.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/utils.cc
@@ -2,6 +2,7 @@
 // Licensed under the MIT License
 
 #include "core/providers/shared_library/provider_api.h"
+#include "utils.h"
 
 #if defined(_MSC_VER)
 #pragma warning(disable : 4244 4245 5208)
@@ -113,7 +114,8 @@ std::map<std::string, std::set<std::string>> GetNgSupportedOps(const int onnx_op
  * supported_cluster + (UNsupported_node + rest_of_the_graph). This functions returns vector of all supported_clusters by nGraph
  */
 std::vector<std::vector<NodeIndex>>
-GetPartitionedClusters(const std::vector<NodeIndex>& topological_order, const std::vector<NodeIndex>& unsupported_nodes) {
+GetPartitionedClusters(const std::vector<NodeIndex>& topological_order,
+                       const std::vector<NodeIndex>& unsupported_nodes) {
   std::vector<std::vector<NodeIndex>> ng_clusters;
 
   auto prev = topological_order.begin();
@@ -140,7 +142,10 @@ GetPartitionedClusters(const std::vector<NodeIndex>& topological_order, const st
   return ng_clusters;
 }
 
-void IdentifyConnectedNodes(const GraphViewer& graph_viewer, NodeIndex curr_node_index, std::vector<NodeIndex>& cluster, std::vector<NodeIndex>& sub_cluster) {
+void IdentifyConnectedNodes(const GraphViewer& graph_viewer,
+                            NodeIndex curr_node_index,
+                            std::vector<NodeIndex>& cluster,
+                            std::vector<NodeIndex>& sub_cluster) {
   if (std::find(cluster.begin(), cluster.end(), curr_node_index) == cluster.end())
     return;
 
@@ -175,12 +180,12 @@ void GetInputsOutputsOfCluster(const GraphViewer& graph_viewer,
                                const std::unordered_set<std::string>& ng_required_initializers,
                                /*out*/ std::vector<std::string>& cluster_graph_inputs,
                                /*out*/ std::vector<std::string>& cluster_inputs,
-                               /*out*/ std::vector<std::string>& constant_inputs,
                                /*out*/ std::vector<std::string>& cluster_outputs) {
   std::unordered_set<std::string> input_args;
   std::vector<std::string> ordered_input_args;
   std::unordered_set<std::string> output_args;
   std::unordered_set<std::string> external_output_args;
+  std::vector<std::string> constant_inputs;
 
   for (const auto& node_idx : cluster) {
     const auto& node = graph_viewer.GetNode(node_idx);
@@ -205,7 +210,8 @@ void GetInputsOutputsOfCluster(const GraphViewer& graph_viewer,
       const auto& ext_node = graph_viewer.GetNode((*it).Index());
 
       if (std::find(cluster.begin(), cluster.end(), ext_node->Index()) == cluster.end()) {
-        // Node is external to this_cluster. Search through its inputs to find the output that is generated by this_cluster.
+        // Node is external to this_cluster. Search through its inputs to
+        // find the output that is generated by this_cluster.
         std::set<std::string> ext_node_inputs;
         ext_node->ForEachDef(
             [&ext_node_inputs](const NodeArg& arg, bool is_input) {
diff --git a/onnxruntime/core/providers/openvino/ov_versions/utils.h b/onnxruntime/core/providers/openvino/ov_versions/utils.h
index 70f6954ea991c..b3edeef88dfec 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/utils.h
+++ b/onnxruntime/core/providers/openvino/ov_versions/utils.h
@@ -1,5 +1,15 @@
 // Copyright (C) 2019-2022 Intel Corporation
 // Licensed under the MIT License
+#pragma once
+
+#include <memory>
+#include <map>
+#include <utility>
+#include <vector>
+#include <set>
+#include <algorithm>
+#include <string>
+#include <unordered_set>
 
 namespace onnxruntime {
 namespace openvino_ep {
@@ -18,9 +28,14 @@ int GetOnnxOpSet(const GraphViewer& graph_viewer);
 std::map<std::string, std::set<std::string>> GetNgSupportedOps(const int onnx_opset);
 
 std::vector<std::vector<NodeIndex>>
-GetPartitionedClusters(const std::vector<NodeIndex>& topological_order, const std::vector<NodeIndex>& unsupported_nodes);
+GetPartitionedClusters(
+    const std::vector<NodeIndex>& topological_order, const std::vector<NodeIndex>& unsupported_nodes);
 
-void IdentifyConnectedNodes(const GraphViewer& graph_viewer, NodeIndex curr_node_index, std::vector<NodeIndex>& cluster, std::vector<NodeIndex>& sub_cluster);
+void IdentifyConnectedNodes(
+    const GraphViewer& graph_viewer,
+    NodeIndex curr_node_index,
+    std::vector<NodeIndex>& cluster,
+    std::vector<NodeIndex>& sub_cluster);
 
 std::vector<std::vector<NodeIndex>>
 GetConnectedClusters(const GraphViewer& graph_viewer, const std::vector<std::vector<NodeIndex>>& clusters);
@@ -30,7 +45,6 @@ void GetInputsOutputsOfCluster(const GraphViewer& graph_viewer,
                                const std::unordered_set<std::string>& ng_required_initializers,
                                /*out*/ std::vector<std::string>& cluster_graph_inputs,
                                /*out*/ std::vector<std::string>& cluster_inputs,
-                               /*out*/ std::vector<std::string>& constant_inputs,
                                /*out*/ std::vector<std::string>& cluster_outputs);
 
 }  // namespace openvino_ep
diff --git a/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc
new file mode 100644
index 0000000000000..234b957816662
--- /dev/null
+++ b/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc
@@ -0,0 +1,282 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/providers/qnn/builder/onnx_ctx_model_helper.h"
+#include "core/graph/constants.h"
+#include "core/providers/qnn/builder/qnn_model.h"
+
+#include <iostream>
+#include <fstream>
+#include <filesystem>
+
+namespace onnxruntime {
+namespace qnn {
+
+Status IsFusedGraphHasCtxNode(const std::vector<IExecutionProvider::FusedNodeAndGraph>& fused_nodes_and_graphs,
+                              bool& is_qnn_ctx_model) {
+  is_qnn_ctx_model = false;
+  for (const auto& fused_node_graph : fused_nodes_and_graphs) {
+    const onnxruntime::GraphViewer& graph_viewer(fused_node_graph.filtered_graph);
+    // It's an Onnx model with Qnn context cache binary if it only has a node with EPContext type
+    int count = 0;
+    for (const auto& node : graph_viewer.Nodes()) {
+      if (EPCONTEXT_OP == node.OpType()) {
+        is_qnn_ctx_model = true;
+      }
+      ++count;
+    }
+    ORT_RETURN_IF(is_qnn_ctx_model && count > 1, "Fused graph should only has 1 single EPContext node.");
+  }
+  return Status::OK();
+}
+
+bool IsQnnCtxModel(const onnxruntime::GraphViewer& graph_viewer) {
+  // It's an Onnx model with Qnn context cache binary if it only has a node with EPContext type
+  for (const auto& node : graph_viewer.Nodes()) {
+    if (EPCONTEXT_OP == node.OpType()) {
+      return true;
+    }
+  }
+  return false;
+}
+
+Status CreateNodeArgs(const std::vector<std::string>& names,
+                      const std::unordered_map<std::string, OnnxTensorInfo>& tensor_info_table,
+                      std::vector<NodeArg*>& node_args,
+                      onnxruntime::Graph& graph) {
+  using namespace ONNX_NAMESPACE;
+  for (size_t i = 0; i < names.size(); ++i) {
+    std::string name = names[i];
+    ORT_RETURN_IF(tensor_info_table.find(name) == tensor_info_table.end(), "Tensor name: ", name, " not found in tensor_info_table");
+    const OnnxTensorInfo& tensor_info = tensor_info_table.at(name);
+    TypeProto tensor_type;
+    tensor_type.mutable_tensor_type()->set_elem_type(tensor_info.data_type_);
+    for (size_t j = 0; j < tensor_info.shape_.size(); ++j) {
+      tensor_type.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_value(tensor_info.shape_[j]);
+    }
+    auto& input_arg = graph.GetOrCreateNodeArg(name, &tensor_type);
+    node_args.push_back(&input_arg);
+  }
+  return Status::OK();
+}
+
+Status GetEpContextFromModel(const onnxruntime::PathString& ctx_onnx_model_path,
+                             QnnBackendManager* qnn_backend_manager,
+                             QnnModel& qnn_model,
+                             const logging::Logger& logger) {
+  using namespace onnxruntime;
+  std::shared_ptr<Model> model;
+  ORT_RETURN_IF_ERROR(Model::Load(ToPathString(ctx_onnx_model_path), model, {}, logger));
+  const auto& graph = model->MainGraph();
+  return GetEpContextFromGraph(GraphViewer(graph),
+                               ctx_onnx_model_path,
+                               qnn_backend_manager,
+                               qnn_model);
+}
+
+Status GetEpContextFromGraph(const onnxruntime::GraphViewer& graph_viewer,
+                             const onnxruntime::PathString& ctx_onnx_model_path,
+                             QnnBackendManager* qnn_backend_manager,
+                             QnnModel& qnn_model) {
+  const auto& node = graph_viewer.Nodes().begin();
+  NodeAttrHelper node_helper(*node);
+  bool is_embed_mode = node_helper.Get(EMBED_MODE, true);
+  if (is_embed_mode) {
+    const std::string& context_binary = node_helper.Get(EP_CACHE_CONTEXT, "");
+    return qnn_backend_manager->LoadCachedQnnContextFromBuffer(const_cast<char*>(context_binary.c_str()),
+                                                               static_cast<uint64_t>(context_binary.length()),
+                                                               qnn_model);
+  }
+
+  std::string external_qnn_context_binary_file_name = node_helper.Get(EP_CACHE_CONTEXT, "");
+  std::filesystem::path folder_path = std::filesystem::path(ctx_onnx_model_path).parent_path();
+  std::filesystem::path context_binary_path = folder_path.append(external_qnn_context_binary_file_name);
+
+  size_t buffer_size{0};
+  std::ifstream cache_file(context_binary_path.string().c_str(), std::ifstream::binary);
+  ORT_RETURN_IF(!cache_file || !cache_file.good(), "Failed to open cache file.");
+
+  cache_file.seekg(0, cache_file.end);
+  buffer_size = static_cast<size_t>(cache_file.tellg());
+  ORT_RETURN_IF(0 == buffer_size, "Empty cache file encountered.");
+
+  cache_file.seekg(0, cache_file.beg);
+  std::unique_ptr<char[]> buffer = std::make_unique<char[]>(buffer_size);
+  ORT_RETURN_IF(nullptr == buffer, "Failed to allocate memory for cache file.");
+  // Load file into buffer
+  const auto& read_result = cache_file.read(buffer.get(), buffer_size);
+  ORT_RETURN_IF(!read_result, "Failed to read contents from cached context file.");
+  cache_file.close();
+  return qnn_backend_manager->LoadCachedQnnContextFromBuffer(buffer.get(),
+                                                             static_cast<uint64_t>(buffer_size),
+                                                             qnn_model);
+}
+
+Status LoadQnnCtxFromOnnxModel(const onnxruntime::GraphViewer& graph_viewer,
+                               const onnxruntime::PathString& ctx_onnx_model_path,
+                               bool is_qnn_ctx_model,
+                               bool is_ctx_cache_file_exist,
+                               QnnBackendManager* qnn_backend_manager,
+                               QnnModel& qnn_model,
+                               const logging::Logger& logger) {
+  Status status;
+  if (is_qnn_ctx_model) {
+    status = GetEpContextFromGraph(graph_viewer, ctx_onnx_model_path, qnn_backend_manager, qnn_model);
+  } else if (is_ctx_cache_file_exist) {
+    status = GetEpContextFromModel(ctx_onnx_model_path, qnn_backend_manager, qnn_model, logger);
+  }
+
+  if (!status.IsOK()) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH, "Failed to load from EpContextModel. ", status.ErrorMessage());
+  }
+
+  return Status::OK();
+}
+
+Status GetMetadataFromEpContextModel(const onnxruntime::PathString& ctx_onnx_model_path,
+                                     std::string& model_name,
+                                     std::string& model_description,
+                                     std::string& graph_partition_name,
+                                     std::string& cache_source,
+                                     const logging::Logger& logger) {
+  using namespace onnxruntime;
+  std::shared_ptr<Model> model;
+  ORT_RETURN_IF_ERROR(Model::Load(ctx_onnx_model_path, model, {}, logger));
+  const auto& graph = GraphViewer(model->MainGraph());
+  const auto& node = graph.Nodes().begin();
+  NodeAttrHelper node_helper(*node);
+  model_name = graph.Name();
+  model_description = graph.Description();
+  graph_partition_name = node_helper.Get(PARTITION_NAME, "");
+  cache_source = node_helper.Get(SOURCE, "");
+
+  return Status::OK();
+}
+
+bool IsContextCacheFileExists(const std::string& customer_context_cache_path,
+                              const onnxruntime::PathString& model_pathstring,
+                              onnxruntime::PathString& context_cache_path) {
+  // Use user provided context cache file path if exist, otherwise try model_file.onnx_ctx.onnx by default
+  if (!customer_context_cache_path.empty()) {
+    context_cache_path = ToPathString(customer_context_cache_path);
+  } else if (!model_pathstring.empty()) {
+    context_cache_path = model_pathstring + ToPathString("_qnn_ctx.onnx");
+  }
+
+  return std::filesystem::is_regular_file(context_cache_path) && std::filesystem::exists(context_cache_path);
+}
+
+Status ValidateWithContextFile(const onnxruntime::PathString& context_cache_path,
+                               const std::string& model_name,
+                               const std::string& model_description,
+                               const std::string& graph_partition_name,
+                               const logging::Logger& logger) {
+  std::string model_name_from_ctx_cache;
+  std::string model_description_from_ctx_cache;
+  std::string graph_partition_name_from_ctx_cache;
+  std::string cache_source;
+  auto status = GetMetadataFromEpContextModel(context_cache_path,
+                                              model_name_from_ctx_cache,
+                                              model_description_from_ctx_cache,
+                                              graph_partition_name_from_ctx_cache,
+                                              cache_source,
+                                              logger);
+  if (!status.IsOK()) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH, "Failed to get metadata from EpContextModel.");
+  }
+
+  // The source attribute from the skeleton onnx file indicate whether it's generated from QNN toolchain or ORT
+  if (cache_source != kQnnExecutionProvider) {
+    LOGS(logger, VERBOSE) << "Context binary cache is not generated by Ort.";
+    return Status::OK();
+  }
+
+  if (model_name != model_name_from_ctx_cache ||
+      model_description != model_description_from_ctx_cache ||
+      graph_partition_name != graph_partition_name_from_ctx_cache) {
+    std::string message = onnxruntime::MakeString("Metadata mismatch. onnx: ",
+                                                  model_name, " ", model_description, " ", graph_partition_name,
+                                                  " vs epcontext: ",
+                                                  model_name_from_ctx_cache, " ",
+                                                  model_description_from_ctx_cache, " ",
+                                                  graph_partition_name_from_ctx_cache);
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH, message);
+  }
+
+  return Status::OK();
+}
+
+Status GenerateCtxCacheOnnxModel(const std::string model_name,
+                                 const std::string model_description,
+                                 unsigned char* buffer,
+                                 uint64_t buffer_size,
+                                 const std::string& sdk_build_version,
+                                 const std::vector<IExecutionProvider::FusedNodeAndGraph>& fused_nodes_and_graphs,
+                                 const std::unordered_map<std::string, std::unique_ptr<QnnModel>>& qnn_models,
+                                 const onnxruntime::PathString& context_cache_path,
+                                 bool qnn_context_embed_mode,
+                                 const logging::Logger& logger) {
+  std::unordered_map<std::string, int> domain_to_version = {{kOnnxDomain, 11}, {kMSDomain, 1}};
+  Model model(model_name, false, ModelMetaData(), PathString(), IOnnxRuntimeOpSchemaRegistryList(),
+              domain_to_version, {}, logger);
+  auto& graph = model.MainGraph();
+  graph.SetDescription(model_description);
+
+  using namespace ONNX_NAMESPACE;
+  int index = 0;
+  // Still need more work to support multiple partition, it's out of EP's scope.
+  // Already have code to make sure it's single partition before this method get invoked.
+  for (const auto& fused_node_graph : fused_nodes_and_graphs) {
+    Node& fused_node = fused_node_graph.fused_node;
+    auto qnn_model_kv = qnn_models.find(fused_node.Name());
+    ORT_RETURN_IF(qnn_model_kv == qnn_models.end(), fused_node.Name(), " not exist in QnnModel table.");
+
+    auto qnn_model = qnn_model_kv->second.get();
+    std::vector<NodeArg*> inputs;
+    std::vector<NodeArg*> outputs;
+    ORT_RETURN_IF_ERROR(CreateNodeArgs(qnn_model->GetInputNames(), qnn_model->GetInputsInfo(), inputs, graph));
+    ORT_RETURN_IF_ERROR(CreateNodeArgs(qnn_model->GetOutputNames(), qnn_model->GetOutputsInfo(), outputs, graph));
+
+    const std::string& graph_name = fused_node.Name();
+    auto& ep_node = graph.AddNode(graph_name,
+                                  EPCONTEXT_OP,
+                                  "Onnx Qnn context binary cache for graph partition: " + graph_name,
+                                  inputs,
+                                  outputs,
+                                  nullptr,
+                                  kMSDomain);
+
+    // Only dump the context buffer once since all QNN graph are in one single context
+    if (0 == index) {
+      if (qnn_context_embed_mode) {
+        std::string cache_payload(buffer, buffer + buffer_size);
+        ep_node.AddAttribute(EP_CACHE_CONTEXT, cache_payload);
+      } else {
+        onnxruntime::PathString context_bin_path = context_cache_path + ToPathString("_" + graph_name + ".bin");
+        std::string context_cache_name(std::filesystem::path(context_bin_path).filename().string());
+        std::ofstream of_stream(context_bin_path.c_str(), std::ofstream::binary);
+        if (!of_stream) {
+          LOGS(logger, ERROR) << "Failed to open create context file.";
+          return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to open context cache file.");
+        }
+        of_stream.write(reinterpret_cast<char*>(buffer), buffer_size);
+        ep_node.AddAttribute(EP_CACHE_CONTEXT, context_cache_name);
+      }
+    } else {
+      ep_node.AddAttribute(MAIN_CONTEXT, static_cast<int64_t>(0));
+    }
+    int64_t embed_mode = qnn_context_embed_mode ? static_cast<int64_t>(1) : static_cast<int64_t>(0);
+    ep_node.AddAttribute(EMBED_MODE, embed_mode);
+    ep_node.AddAttribute(EP_SDK_VER, sdk_build_version);
+    ep_node.AddAttribute(PARTITION_NAME, graph_name);
+    ep_node.AddAttribute(SOURCE, kQnnExecutionProvider);
+    ++index;
+  }
+  ORT_RETURN_IF_ERROR(graph.Resolve());
+  ORT_RETURN_IF_ERROR(Model::Save(model, context_cache_path));
+
+  return Status::OK();
+}
+
+}  // namespace qnn
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.h b/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.h
new file mode 100644
index 0000000000000..0011d0f43f5bc
--- /dev/null
+++ b/onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.h
@@ -0,0 +1,87 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "qnn_def.h"
+#include "core/common/logging/logging.h"
+#include "core/graph/graph_viewer.h"
+#include "core/providers/shared/utils/utils.h"
+#include "core/graph/model.h"
+#include "core/framework/execution_provider.h"
+
+namespace onnxruntime {
+
+namespace qnn {
+
+class QnnModel;
+class QnnBackendManager;
+
+static const std::string EPCONTEXT_OP = "EPContext";
+static const std::string MAIN_CONTEXT = "main_context";
+static const std::string EMBED_MODE = "embed_mode";
+static const std::string EP_CACHE_CONTEXT = "ep_cache_context";
+static const std::string EP_SDK_VER = "ep_sdk_version";
+static const std::string PARTITION_NAME = "partition_name";
+static const std::string SOURCE = "source";
+
+Status IsFusedGraphHasCtxNode(const std::vector<IExecutionProvider::FusedNodeAndGraph>& fused_nodes_and_graphs,
+                              bool& is_qnn_ctx_model);
+
+bool IsQnnCtxModel(const onnxruntime::GraphViewer& graph_viewer);
+
+Status CreateNodeArgs(const std::vector<std::string>& names,
+                      const std::unordered_map<std::string, OnnxTensorInfo>& tensor_info_table,
+                      std::vector<NodeArg*>& node_args,
+                      onnxruntime::Graph& graph);
+
+bool IsContextCacheFileExists(const std::string& customer_context_cache_path,
+                              const onnxruntime::PathString& model_pathstring,
+                              onnxruntime::PathString& context_cache_path);
+
+Status GetEpContextFromModel(const onnxruntime::PathString& ctx_onnx_model_path,
+                             QnnBackendManager* qnn_backend_manager,
+                             QnnModel& qnn_model,
+                             const logging::Logger& logger);
+
+Status GetEpContextFromGraph(const onnxruntime::GraphViewer& graph_viewer,
+                             const onnxruntime::PathString& ctx_onnx_model_path,
+                             QnnBackendManager* qnn_backend_manager,
+                             QnnModel& qnn_model);
+
+Status LoadQnnCtxFromOnnxModel(const onnxruntime::GraphViewer& graph_viewer,
+                               const onnxruntime::PathString& ctx_onnx_model_path,
+                               bool is_qnn_ctx_model,
+                               bool is_ctx_cache_file_exist,
+                               QnnBackendManager* qnn_backend_manager,
+                               QnnModel& qnn_model,
+                               const logging::Logger& logger);
+
+Status ValidateWithContextFile(const onnxruntime::PathString& context_cache_path,
+                               const std::string& model_name,
+                               const std::string& model_description,
+                               const std::string& graph_partition_name,
+                               const logging::Logger& logger);
+
+Status GetMetadataFromEpContextModel(const onnxruntime::PathString& ctx_onnx_model_path,
+                                     std::string& model_name,
+                                     std::string& model_description,
+                                     std::string& graph_partition_name,
+                                     std::string& cache_source,
+                                     const logging::Logger& logger);
+
+Status GenerateCtxCacheOnnxModel(const std::string model_name,
+                                 const std::string model_description,
+                                 unsigned char* buffer,
+                                 uint64_t buffer_size,
+                                 const std::string& sdk_build_version,
+                                 const std::vector<IExecutionProvider::FusedNodeAndGraph>& fused_nodes_and_graphs,
+                                 const std::unordered_map<std::string, std::unique_ptr<QnnModel>>& qnn_models,
+                                 const onnxruntime::PathString& context_cache_path,
+                                 bool qnn_context_embed_mode,
+                                 const logging::Logger& logger);
+}  // namespace qnn
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc b/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc
index fc8c2efc7a80f..f1a5d41a8a6ff 100644
--- a/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc
+++ b/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc
@@ -47,12 +47,10 @@ OpBuilderRegistrations::OpBuilderRegistrations() {
     CreateSimpleOpBuilder("Where", *this);
     CreateSimpleOpBuilder("Sigmoid", *this);
     CreateSimpleOpBuilder("Sin", *this);
-    CreateSimpleOpBuilder("Softmax", *this);
     CreateSimpleOpBuilder("Sqrt", *this);
     CreateSimpleOpBuilder("Sub", *this);
     CreateSimpleOpBuilder("Tanh", *this);
 
-    CreateSimpleOpBuilder("LogSoftmax", *this);
     CreateSimpleOpBuilder("MatMul", *this);
     CreateSimpleOpBuilder("Concat", *this);
 
@@ -65,6 +63,13 @@ OpBuilderRegistrations::OpBuilderRegistrations() {
     CreateSimpleOpBuilder("SpaceToDepth", *this);
 
     CreateSimpleOpBuilder("GridSample", *this);
+
+    CreateSimpleOpBuilder("LpNormalization", *this);
+  }
+
+  {
+    CreateSoftmaxOpBuilder("Softmax", *this);
+    CreateSoftmaxOpBuilder("LogSoftmax", *this);
   }
 
   {
@@ -158,6 +163,10 @@ OpBuilderRegistrations::OpBuilderRegistrations() {
   {
     CreatePadOpBuilder("Pad", *this);
   }
+
+  {
+    CreateExpandOpBuilder("Expand", *this);
+  }
 }
 
 const IOpBuilder* GetOpBuilder(const std::string& onnx_op_type) {
diff --git a/onnxruntime/core/providers/qnn/builder/op_builder_factory.h b/onnxruntime/core/providers/qnn/builder/op_builder_factory.h
index 5d59f4343d773..d95e2baa9457f 100644
--- a/onnxruntime/core/providers/qnn/builder/op_builder_factory.h
+++ b/onnxruntime/core/providers/qnn/builder/op_builder_factory.h
@@ -50,6 +50,8 @@ const IOpBuilder* GetOpBuilder(const std::string& onnx_op_type);
 
 void CreateSimpleOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
 
+void CreateSoftmaxOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
+
 void CreateCastOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
 
 void CreateConvOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
@@ -90,5 +92,7 @@ void CreateTransposeOpBuilder(const std::string& op_type, OpBuilderRegistrations
 
 void CreatePadOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
 
+void CreateExpandOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
+
 }  // namespace qnn
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc
index e0f060f758b2e..6d8c80bd2aaa1 100644
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc
@@ -56,8 +56,8 @@ Status BaseOpBuilder::ProcessInput(QnnModelWrapper& qnn_model_wrapper,
     return Status::OK();
   }
 
-  OnnxInputInfo input_info = {};
-  ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(input, input_info));
+  TensorInfo input_info = {};
+  ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(input, input_info));
 
   std::vector<uint8_t> unpacked_tensor;
   if (input_info.is_initializer) {
@@ -126,44 +126,38 @@ Status BaseOpBuilder::ProcessOutputs(QnnModelWrapper& qnn_model_wrapper,
   for (size_t output_i = 0; output_i < output_count; ++output_i) {
     const auto& output_name = outputs[output_i].node_arg.Name();
 
-    Qnn_QuantizeParams_t quantize_param = QNN_QUANTIZE_PARAMS_INIT;
-    bool is_quantized_tensor = outputs[output_i].quant_param.has_value();
-    utils::InitializeQuantizeParam(quantize_param, is_quantized_tensor);
-
-    const auto* type_proto = outputs[output_i].node_arg.TypeAsProto();
-    Qnn_DataType_t qnn_data_type = QNN_DATATYPE_UNDEFINED;
-    ORT_RETURN_IF_ERROR(utils::GetQnnDataType(is_quantized_tensor, type_proto, qnn_data_type));
-    ORT_RETURN_IF_NOT(qnn_model_wrapper.ProcessQuantizationParameter(outputs[output_i].quant_param,
-                                                                     quantize_param.scaleOffsetEncoding.scale,
-                                                                     quantize_param.scaleOffsetEncoding.offset),
-                      "Cannot get quantization parameter");
-    std::vector<uint32_t> output_shape;
-    ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(outputs[output_i].node_arg, output_shape),
-                      "Cannot get shape");
-    Qnn_DataType_t supported_qnn_data_type = GetSupportedOutputDataType(output_i, qnn_data_type);
+    TensorInfo output_info = {};
+    ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(outputs[output_i], output_info));
+
+    if (output_info.quant_param.encodingDefinition == QNN_DEFINITION_DEFINED) {
+      ORT_RETURN_IF_ERROR(OverrideOutputQuantParam(qnn_model_wrapper, node_unit, logger, input_names,
+                                                   output_i, output_info.qnn_data_type, output_info.quant_param));
+    }
+
+    Qnn_DataType_t supported_qnn_data_type = GetSupportedOutputDataType(output_i, output_info.qnn_data_type);
     bool is_graph_output = qnn_model_wrapper.IsGraphOutput(output_name);
-    if (supported_qnn_data_type != qnn_data_type && is_graph_output && !do_op_validation) {
+    if (supported_qnn_data_type != output_info.qnn_data_type && is_graph_output && !do_op_validation) {
       std::string cast_node_name = output_name + "_ort_qnn_ep_cast";
       std::string cast_input_name = output_name + "_ort_qnn_ep_aux";
-      std::vector<uint32_t> cast_output_shape = output_shape;
+      std::vector<uint32_t> cast_output_shape = output_info.shape;
       QnnTensorWrapper cast_input_tensorwrapper(cast_input_name,
                                                 QNN_TENSOR_TYPE_NATIVE,
                                                 supported_qnn_data_type,
-                                                quantize_param,
+                                                output_info.quant_param,
                                                 std::move(cast_output_shape));
       ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(cast_input_tensorwrapper)), "Failed to add tensor.");
       output_names.push_back(cast_input_name);
       cast_node_info_vec.push_back({cast_node_name, cast_input_name, output_name});
     } else {
-      qnn_data_type = supported_qnn_data_type;
+      output_info.qnn_data_type = supported_qnn_data_type;
       output_names.push_back(output_name);
     }
     Qnn_TensorType_t tensor_type = is_graph_output ? QNN_TENSOR_TYPE_APP_READ : QNN_TENSOR_TYPE_NATIVE;
     QnnTensorWrapper output_tensorwrapper(output_name,
                                           tensor_type,
-                                          qnn_data_type,
-                                          quantize_param,
-                                          std::move(output_shape));
+                                          output_info.qnn_data_type,
+                                          output_info.quant_param,
+                                          std::move(output_info.shape));
     ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(output_tensorwrapper)), "Failed to add tensor.");
   }
 
@@ -188,6 +182,46 @@ Status BaseOpBuilder::ProcessOutputs(QnnModelWrapper& qnn_model_wrapper,
   return Status::OK();
 }
 
+Status BaseOpBuilder::SetOutputQParamEqualToInputIfNearlyEqual(QnnModelWrapper& qnn_model_wrapper,
+                                                               const NodeUnit& node_unit,
+                                                               const logging::Logger& logger,
+                                                               const std::vector<std::string>& input_names,
+                                                               size_t input_index,
+                                                               size_t output_index,
+                                                               Qnn_DataType_t qnn_data_type,
+                                                               Qnn_QuantizeParams_t& quant_param) const {
+  const QnnTensorWrapper& input_tensor_wrapper = qnn_model_wrapper.GetQnnTensorWrapper(input_names[input_index]);
+  ORT_RETURN_IF_NOT(input_tensor_wrapper.GetTensorDataType() == qnn_data_type,
+                    "Input and output data types do not match");
+  Qnn_QuantizeParams_t input_quant_param = GetQnnTensorQParams(input_tensor_wrapper.GetQnnTensor());
+
+  float scale_diff = 0.0f;
+  int32_t offset_diff = 0;
+  ORT_RETURN_IF_ERROR(CompareQnnQuantParams(quant_param, input_quant_param, scale_diff, offset_diff));
+  constexpr float NEARLY_EQUAL_THRESHOLD = 1e-9f;
+  constexpr float WARN_THRESHOLD = 1e-6f;
+
+  if (scale_diff != 0.0f && offset_diff == 0) {
+    if (scale_diff <= NEARLY_EQUAL_THRESHOLD) {
+      // Quantization params are nearly equal, so make them equal. This may allow QNN backends to employ certain graph
+      // optimizations that improve inference latency.
+      LOGS(logger, WARNING) << "QNN EP will override the output quantization parameters for " << node_unit.OpType()
+                            << " operators to be equal to the input quantization parameters. Operator name: "
+                            << node_unit.Name() << ", input_index: " << input_index << ", output index: "
+                            << output_index << ".";
+      quant_param = input_quant_param;  // Copy input quantization params to the output.
+    } else if (scale_diff <= WARN_THRESHOLD) {
+      // Quantization params are just outside of the "nearly equal" threshold, so warn user of potential latency
+      // degradation.
+      LOGS(logger, WARNING) << "The quantization parameters for the " << node_unit.OpType() << " operator '"
+                            << node_unit.Name() << "' are not equal, which may result in latency degradation. "
+                            << "input_index: " << input_index << ", output index: " << output_index << ".";
+    }
+  }
+
+  return Status::OK();
+}
+
 Status BaseOpBuilder::TransposeInitializer(const QnnModelWrapper& qnn_model_wrapper,
                                            const onnx::TensorProto& initializer,
                                            const std::vector<size_t>& perm,
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h
index 0431d605bc843..4eb599eb50175 100644
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h
@@ -38,6 +38,37 @@ class BaseOpBuilder : public IOpBuilder {
     return qnn_data_type;
   }
 
+  /**
+   * Allows operator builders that override this function to override output quantization parameters.
+   * Called by BaseOpBuilder::ProcessOutputs().
+   *
+   * \param qnn_model_wrapper The QNN model that is being built.
+   * \param node_unit The node unit for which to return output information.
+   * \param logger The logger.
+   * \param input_names Names of all inputs consumed by this QNN node.
+   * \param output_index The index in node_unit.Outputs() of the output for which to return information.
+   * \param qnn_data_type The output's data type.
+   * \param quant_param The quantization parameter object that is overridden.
+   * \return An onnxruntime::Status object indicating failure or success.
+   */
+  virtual Status OverrideOutputQuantParam(QnnModelWrapper& qnn_model_wrapper,
+                                          const NodeUnit& node_unit,
+                                          const logging::Logger& logger,
+                                          const std::vector<std::string>& input_names,
+                                          size_t output_index,
+                                          Qnn_DataType_t qnn_data_type,
+                                          Qnn_QuantizeParams_t& quant_param) const ORT_MUST_USE_RESULT {
+    // Do nothing by default. Op builders like Split implement this function to override output quant params.
+    ORT_UNUSED_PARAMETER(qnn_model_wrapper);
+    ORT_UNUSED_PARAMETER(node_unit);
+    ORT_UNUSED_PARAMETER(logger);
+    ORT_UNUSED_PARAMETER(input_names);
+    ORT_UNUSED_PARAMETER(output_index);
+    ORT_UNUSED_PARAMETER(qnn_data_type);
+    ORT_UNUSED_PARAMETER(quant_param);
+    return Status::OK();
+  }
+
   virtual Status ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
                                const NodeUnit& node_unit,
                                const logging::Logger& logger,
@@ -72,6 +103,15 @@ class BaseOpBuilder : public IOpBuilder {
     return node_name;
   }
 
+  Status SetOutputQParamEqualToInputIfNearlyEqual(QnnModelWrapper& qnn_model_wrapper,
+                                                  const NodeUnit& node_unit,
+                                                  const logging::Logger& logger,
+                                                  const std::vector<std::string>& input_names,
+                                                  size_t input_index,
+                                                  size_t output_index,
+                                                  Qnn_DataType_t qnn_data_type,
+                                                  Qnn_QuantizeParams_t& quant_param) const ORT_MUST_USE_RESULT;
+
   static const std::string& GetQnnOpType(const std::string& onnx_op_type) {
     // TODO: Use QNN operator names defined in "QnnOpDef.h"
     static const std::unordered_map<std::string, std::string> onnx_op_type_to_qnn_op_type = {
@@ -121,6 +161,7 @@ class BaseOpBuilder : public IOpBuilder {
         {"Tanh", QNN_OP_TANH},
         {"Transpose", QNN_OP_TRANSPOSE},
         {"GridSample", QNN_OP_GRID_SAMPLE},
+        {"LpNormalization", QNN_OP_L2_NORM},
 
         {"DequantizeLinear", QNN_OP_DEQUANTIZE},
         {"QuantizeLinear", QNN_OP_QUANTIZE},
@@ -164,7 +205,9 @@ class BaseOpBuilder : public IOpBuilder {
 
         {"LRN", QNN_OP_LRN},
 
-        {"Pad", QNN_OP_PAD}};
+        {"Pad", QNN_OP_PAD},
+
+        {"Expand", QNN_OP_ELEMENT_WISE_MULTIPLY}};
     auto it = onnx_op_type_to_qnn_op_type.find(onnx_op_type);
     ORT_ENFORCE(it != onnx_op_type_to_qnn_op_type.end());
     return it->second;
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/batch_norm_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/batch_norm_op_builder.cc
index ccbc1acaa2f9e..294aa659872c4 100644
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/batch_norm_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/batch_norm_op_builder.cc
@@ -1,16 +1,19 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#include <limits>
+#include <cmath>
+#include <utility>
+
 #include "core/providers/common.h"
 #include "core/providers/shared/utils/utils.h"
 #include "core/framework/tensorprotoutils.h"
 #include "core/providers/qnn/builder/qnn_model_wrapper.h"
+#include "core/providers/qnn/builder/qnn_utils.h"
 #include "core/providers/qnn/builder/op_builder_factory.h"
 
 #include "base_op_builder.h"
 
-#include <limits>
-
 namespace onnxruntime {
 namespace qnn {
 class BatchNormOpBuilder : public BaseOpBuilder {
@@ -18,9 +21,375 @@ class BatchNormOpBuilder : public BaseOpBuilder {
   BatchNormOpBuilder() : BaseOpBuilder("BatchNormOpBuilder") {}
   ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(BatchNormOpBuilder);
 
+  Status ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
+                       const NodeUnit& node_unit,
+                       const logging::Logger& logger,
+                       std::vector<std::string>& input_names,
+                       bool do_op_validation) const override ORT_MUST_USE_RESULT;
+
   Status IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
                        const NodeUnit& node_unit,
                        const logging::Logger& logger) const override final ORT_MUST_USE_RESULT;
+
+  inline Status GetValueOnQnnDataType(const Qnn_DataType_t qnn_data_type,
+                                      const uint8_t* raw_ptr,
+                                      double& value,
+                                      int& offset) const {
+    switch (qnn_data_type) {
+      case QNN_DATATYPE_INT_8:
+      case QNN_DATATYPE_SFIXED_POINT_8: {
+        value = static_cast<double>(*reinterpret_cast<const int8_t*>(raw_ptr));
+        offset += sizeof(int8_t);
+        break;
+      }
+      case QNN_DATATYPE_INT_16:
+      case QNN_DATATYPE_SFIXED_POINT_16: {
+        value = static_cast<double>(*reinterpret_cast<const int16_t*>(raw_ptr));
+        offset += sizeof(int16_t);
+        break;
+      }
+      case QNN_DATATYPE_INT_32:
+      case QNN_DATATYPE_SFIXED_POINT_32: {
+        value = static_cast<double>(*reinterpret_cast<const int32_t*>(raw_ptr));
+        offset += sizeof(int32_t);
+        break;
+      }
+      case QNN_DATATYPE_INT_64: {
+        value = static_cast<double>(*reinterpret_cast<const int64_t*>(raw_ptr));
+        offset += sizeof(int64_t);
+        break;
+      }
+      case QNN_DATATYPE_UINT_8:
+      case QNN_DATATYPE_UFIXED_POINT_8: {
+        value = static_cast<double>(*reinterpret_cast<const uint8_t*>(raw_ptr));
+        offset += sizeof(uint8_t);
+        break;
+      }
+      case QNN_DATATYPE_UINT_16:
+      case QNN_DATATYPE_UFIXED_POINT_16: {
+        value = static_cast<double>(*reinterpret_cast<const uint16_t*>(raw_ptr));
+        offset += sizeof(uint16_t);
+        break;
+      }
+      case QNN_DATATYPE_UINT_32:
+      case QNN_DATATYPE_UFIXED_POINT_32: {
+        value = static_cast<double>(*reinterpret_cast<const uint32_t*>(raw_ptr));
+        offset += sizeof(uint32_t);
+        break;
+      }
+      case QNN_DATATYPE_UINT_64: {
+        value = static_cast<double>(*reinterpret_cast<const uint64_t*>(raw_ptr));
+        offset += sizeof(uint64_t);
+        break;
+      }
+      case QNN_DATATYPE_FLOAT_32: {
+        value = static_cast<double>(*reinterpret_cast<const float*>(raw_ptr));
+        offset += sizeof(float);
+        break;
+      }
+      case QNN_DATATYPE_BOOL_8:
+      case QNN_DATATYPE_STRING:
+      case QNN_DATATYPE_FLOAT_16:
+      default:
+        ORT_RETURN_IF(true, "Qnn Data Type: %d not supported yet.", qnn_data_type);
+    }
+    return Status::OK();
+  }
+
+  inline Status AssertUnpackedTensorSize(const Qnn_DataType_t qnn_data_type,
+                                         const uint32_t channel,
+                                         const size_t raw_ptr_length) const {
+    switch (qnn_data_type) {
+      case QNN_DATATYPE_INT_8:
+      case QNN_DATATYPE_SFIXED_POINT_8: {
+        ORT_ENFORCE(channel == static_cast<uint32_t>(raw_ptr_length / sizeof(int8_t)),
+                    "initializer size not match Qnn data type.");
+        break;
+      }
+      case QNN_DATATYPE_INT_16:
+      case QNN_DATATYPE_SFIXED_POINT_16: {
+        ORT_ENFORCE(channel == static_cast<uint32_t>(raw_ptr_length / sizeof(int16_t)),
+                    "initializer size not match Qnn data type.");
+        break;
+      }
+      case QNN_DATATYPE_INT_32:
+      case QNN_DATATYPE_SFIXED_POINT_32: {
+        ORT_ENFORCE(channel == static_cast<uint32_t>(raw_ptr_length / sizeof(int32_t)),
+                    "initializer size not match Qnn data type.");
+        break;
+      }
+      case QNN_DATATYPE_INT_64: {
+        ORT_ENFORCE(channel == static_cast<uint32_t>(raw_ptr_length / sizeof(int64_t)),
+                    "initializer size not match Qnn data type.");
+        break;
+      }
+      case QNN_DATATYPE_UINT_8:
+      case QNN_DATATYPE_UFIXED_POINT_8: {
+        ORT_ENFORCE(channel == static_cast<uint32_t>(raw_ptr_length / sizeof(uint8_t)),
+                    "initializer size not match Qnn data type.");
+        break;
+      }
+      case QNN_DATATYPE_UINT_16:
+      case QNN_DATATYPE_UFIXED_POINT_16: {
+        ORT_ENFORCE(channel == static_cast<uint32_t>(raw_ptr_length / sizeof(uint16_t)),
+                    "initializer size not match Qnn data type.");
+        break;
+      }
+      case QNN_DATATYPE_UINT_32:
+      case QNN_DATATYPE_UFIXED_POINT_32: {
+        ORT_ENFORCE(channel == static_cast<uint32_t>(raw_ptr_length / sizeof(uint32_t)),
+                    "initializer size not match Qnn data type.");
+        break;
+      }
+      case QNN_DATATYPE_UINT_64: {
+        ORT_ENFORCE(channel == static_cast<uint32_t>(raw_ptr_length / sizeof(uint64_t)),
+                    "initializer size not match Qnn data type.");
+        break;
+      }
+      case QNN_DATATYPE_FLOAT_32: {
+        ORT_ENFORCE(channel == static_cast<uint32_t>(raw_ptr_length / sizeof(float)),
+                    "initializer size not match Qnn data type.");
+        break;
+      }
+      case QNN_DATATYPE_BOOL_8:
+      case QNN_DATATYPE_STRING:
+      case QNN_DATATYPE_FLOAT_16:
+      default:
+        ORT_RETURN_IF(true, "Qnn Data Type: %d not supported yet.", qnn_data_type);
+    }
+    return Status::OK();
+  }
+
+  inline Status ConvertToRawOnQnnDataType(const Qnn_DataType_t qnn_data_type,
+                                          const std::vector<double>& double_tensor,
+                                          std::vector<uint8_t>& raw_tensor) const {
+    switch (qnn_data_type) {
+      case QNN_DATATYPE_INT_8: {
+        raw_tensor.resize(double_tensor.size() * sizeof(int8_t));
+        int8_t* raw_ptr = reinterpret_cast<int8_t*>(raw_tensor.data());
+        for (size_t i = 0; i < double_tensor.size(); ++i) {
+          raw_ptr[i] = static_cast<int8_t>(double_tensor[i]);
+        }
+        break;
+      }
+      case QNN_DATATYPE_INT_16: {
+        raw_tensor.resize(double_tensor.size() * sizeof(int16_t));
+        int16_t* raw_ptr = reinterpret_cast<int16_t*>(raw_tensor.data());
+        for (size_t i = 0; i < double_tensor.size(); ++i) {
+          raw_ptr[i] = static_cast<int16_t>(double_tensor[i]);
+        }
+        break;
+      }
+      case QNN_DATATYPE_INT_32: {
+        raw_tensor.resize(double_tensor.size() * sizeof(int32_t));
+        int32_t* raw_ptr = reinterpret_cast<int32_t*>(raw_tensor.data());
+        for (size_t i = 0; i < double_tensor.size(); ++i) {
+          raw_ptr[i] = static_cast<int32_t>(double_tensor[i]);
+        }
+        break;
+      }
+      case QNN_DATATYPE_INT_64: {
+        raw_tensor.resize(double_tensor.size() * sizeof(int64_t));
+        int64_t* raw_ptr = reinterpret_cast<int64_t*>(raw_tensor.data());
+        for (size_t i = 0; i < double_tensor.size(); ++i) {
+          raw_ptr[i] = static_cast<int64_t>(double_tensor[i]);
+        }
+        break;
+      }
+      case QNN_DATATYPE_UINT_8: {
+        raw_tensor.resize(double_tensor.size() * sizeof(uint8_t));
+        uint8_t* raw_ptr = reinterpret_cast<uint8_t*>(raw_tensor.data());
+        for (size_t i = 0; i < double_tensor.size(); ++i) {
+          raw_ptr[i] = static_cast<uint8_t>(double_tensor[i]);
+        }
+        break;
+      }
+      case QNN_DATATYPE_UINT_16: {
+        raw_tensor.resize(double_tensor.size() * sizeof(uint16_t));
+        uint16_t* raw_ptr = reinterpret_cast<uint16_t*>(raw_tensor.data());
+        for (size_t i = 0; i < double_tensor.size(); ++i) {
+          raw_ptr[i] = static_cast<uint16_t>(double_tensor[i]);
+        }
+        break;
+      }
+      case QNN_DATATYPE_UINT_32: {
+        raw_tensor.resize(double_tensor.size() * sizeof(uint32_t));
+        uint32_t* raw_ptr = reinterpret_cast<uint32_t*>(raw_tensor.data());
+        for (size_t i = 0; i < double_tensor.size(); ++i) {
+          raw_ptr[i] = static_cast<uint32_t>(double_tensor[i]);
+        }
+        break;
+      }
+      case QNN_DATATYPE_UINT_64: {
+        raw_tensor.resize(double_tensor.size() * sizeof(uint64_t));
+        uint64_t* raw_ptr = reinterpret_cast<uint64_t*>(raw_tensor.data());
+        for (size_t i = 0; i < double_tensor.size(); ++i) {
+          raw_ptr[i] = static_cast<uint64_t>(double_tensor[i]);
+        }
+        break;
+      }
+      case QNN_DATATYPE_FLOAT_32: {
+        raw_tensor.resize(double_tensor.size() * sizeof(float));
+        float* raw_ptr = reinterpret_cast<float*>(raw_tensor.data());
+        for (size_t i = 0; i < double_tensor.size(); ++i) {
+          raw_ptr[i] = static_cast<float>(double_tensor[i]);
+        }
+        break;
+      }
+      case QNN_DATATYPE_UFIXED_POINT_32:
+      case QNN_DATATYPE_UFIXED_POINT_16:
+      case QNN_DATATYPE_UFIXED_POINT_8:
+      case QNN_DATATYPE_SFIXED_POINT_32:
+      case QNN_DATATYPE_SFIXED_POINT_16:
+      case QNN_DATATYPE_SFIXED_POINT_8:
+      case QNN_DATATYPE_BOOL_8:
+      case QNN_DATATYPE_STRING:
+      case QNN_DATATYPE_FLOAT_16:
+      default:
+        ORT_RETURN_IF(true, "Qnn Data Type: %d not supported yet.", qnn_data_type);
+    }
+    return Status::OK();
+  }
+
+  Status PreprocessMean(const TensorInfo& mean_info,
+                        const bool is_npu_backend,
+                        const uint8_t* mean_raw_ptr,
+                        const size_t mean_raw_ptr_length,
+                        std::vector<double>& mean_out) const {
+    // tensor length (channel)
+    uint32_t channel = mean_info.shape[0];
+    mean_out.resize(channel);
+    ORT_RETURN_IF_ERROR(AssertUnpackedTensorSize(mean_info.qnn_data_type, channel, mean_raw_ptr_length));
+    int i = 0;
+    int offset = 0;
+    for (; i < static_cast<int>(channel); ++i) {
+      double mean_value = 0.0;
+      ORT_RETURN_IF_ERROR(GetValueOnQnnDataType(mean_info.qnn_data_type, mean_raw_ptr + offset, mean_value, offset));
+      mean_out[i] = (is_npu_backend) ? utils::Dequantize(mean_info.quant_param.scaleOffsetEncoding.offset,
+                                                         mean_info.quant_param.scaleOffsetEncoding.scale,
+                                                         mean_value)
+                                     : mean_value;
+    }
+    return Status::OK();
+  }
+
+  Status PreprocessStd(const TensorInfo& var_info,
+                       const bool is_npu_backend,
+                       const uint8_t* var_raw_ptr,
+                       const size_t var_raw_ptr_length,
+                       const float epsilon,
+                       std::vector<double>& std_out) const {
+    // tensor length (channel)
+    uint32_t channel = var_info.shape[0];
+    std_out.resize(channel);
+    ORT_RETURN_IF_ERROR(AssertUnpackedTensorSize(var_info.qnn_data_type, channel, var_raw_ptr_length));
+    int i = 0;
+    int offset = 0;
+    for (; i < static_cast<int>(channel); ++i) {
+      double var_value = 0.0;
+      ORT_RETURN_IF_ERROR(GetValueOnQnnDataType(var_info.qnn_data_type, var_raw_ptr + offset, var_value, offset));
+      std_out[i] = (is_npu_backend) ? utils::Dequantize(var_info.quant_param.scaleOffsetEncoding.offset,
+                                                        var_info.quant_param.scaleOffsetEncoding.scale,
+                                                        var_value)
+                                    : var_value;
+      std_out[i] = std::sqrt(std_out[i] + static_cast<double>(epsilon));
+    }
+    return Status::OK();
+  }
+
+  Status PreprocessScale(const TensorInfo& scale_info,
+                         const bool is_npu_backend,
+                         const uint8_t* scale_raw_ptr,
+                         const size_t scale_raw_ptr_length,
+                         const std::vector<double>& std_double_tensor,
+                         double& rmax,
+                         double& rmin,
+                         std::vector<double>& scale_out) const {
+    // tensor length (channel)
+    uint32_t channel = scale_info.shape[0];
+    scale_out.resize(channel);
+    ORT_RETURN_IF_ERROR(AssertUnpackedTensorSize(scale_info.qnn_data_type, channel, scale_raw_ptr_length));
+    int i = 0;
+    int offset = 0;
+    for (; i < static_cast<int>(channel); ++i) {
+      double scale_value = 0.0;
+      ORT_RETURN_IF_ERROR(GetValueOnQnnDataType(scale_info.qnn_data_type, scale_raw_ptr + offset, scale_value, offset));
+      scale_out[i] = (is_npu_backend) ? utils::Dequantize(scale_info.quant_param.scaleOffsetEncoding.offset,
+                                                          scale_info.quant_param.scaleOffsetEncoding.scale,
+                                                          scale_value)
+                                      : scale_value;
+      scale_out[i] = scale_out[i] / std_double_tensor[i];
+      rmax = std::max(rmax, scale_out[i]);
+      rmin = std::min(rmin, scale_out[i]);
+    }
+    return Status::OK();
+  }
+
+  Status PreprocessBias(const TensorInfo& bias_info,
+                        const bool is_npu_backend,
+                        const uint8_t* bias_raw_ptr,
+                        const size_t bias_raw_ptr_length,
+                        const std::vector<double>& scale_double_tensor,
+                        const std::vector<double>& mean_double_tensor,
+                        double& rmax,
+                        double& rmin,
+                        std::vector<double>& bias_out) const {
+    // tensor length (channel)
+    uint32_t channel = bias_info.shape[0];
+    bias_out.resize(channel);
+    ORT_RETURN_IF_ERROR(AssertUnpackedTensorSize(bias_info.qnn_data_type, channel, bias_raw_ptr_length));
+    int i = 0;
+    int offset = 0;
+    for (; i < static_cast<int>(channel); ++i) {
+      double bias_value = 0.0;
+      ORT_RETURN_IF_ERROR(GetValueOnQnnDataType(bias_info.qnn_data_type, bias_raw_ptr + offset, bias_value, offset));
+      bias_out[i] = (is_npu_backend) ? utils::Dequantize(bias_info.quant_param.scaleOffsetEncoding.offset,
+                                                         bias_info.quant_param.scaleOffsetEncoding.scale,
+                                                         bias_value)
+                                     : bias_value;
+      bias_out[i] = bias_out[i] - (mean_double_tensor[i] * scale_double_tensor[i]);
+      rmax = std::max(rmax, bias_out[i]);
+      rmin = std::min(rmin, bias_out[i]);
+    }
+    return Status::OK();
+  }
+
+  Status Postprocess(const TensorInfo& info,
+                     const bool is_npu_backend,
+                     const std::vector<double>& double_tensor,
+                     const double rmax,
+                     const double rmin,
+                     Qnn_QuantizeParams_t& quant_param,
+                     std::vector<uint8_t>& raw_tensor) const {
+    if (is_npu_backend) {
+      raw_tensor.resize(double_tensor.size());
+      float scale = 0.0f;
+      int zero_point = 0;
+      ORT_RETURN_IF_ERROR(utils::GetQuantParams(static_cast<float>(rmin),
+                                                static_cast<float>(rmax),
+                                                info.qnn_data_type,
+                                                scale,
+                                                zero_point));
+      quant_param = QNN_QUANTIZE_PARAMS_INIT;
+      utils::InitializeQuantizeParam(quant_param, true, scale, zero_point);
+      for (size_t i = 0; i < double_tensor.size(); ++i) {
+        // onnx only supports 8 bits quantization
+        int quant_value_int = 0;
+        ORT_RETURN_IF_ERROR(utils::Quantize(double_tensor[i], scale, zero_point, info.qnn_data_type, quant_value_int));
+        if (info.qnn_data_type == QNN_DATATYPE_UFIXED_POINT_8) {
+          raw_tensor[i] = static_cast<uint8_t>(quant_value_int);
+        } else if (info.qnn_data_type == QNN_DATATYPE_SFIXED_POINT_8) {
+          int8_t quant_value = static_cast<int8_t>(quant_value_int);
+          raw_tensor[i] = *reinterpret_cast<uint8_t*>(&quant_value);
+        } else {
+          ORT_RETURN_IF(true, "Qnn Data Type: %d not supported yet.", info.qnn_data_type);
+        }
+      }
+    } else {
+      ORT_RETURN_IF_ERROR(ConvertToRawOnQnnDataType(info.qnn_data_type, double_tensor, raw_tensor));
+    }
+    return Status::OK();
+  }
 };
 
 // BatchNorm is sensitive with data layout, no special validation so far
@@ -34,11 +403,6 @@ Status BatchNormOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
     // Still do it here so hopefully QNN Op validation API can tell us some details why it's not supported
     return AddToModelBuilder(qnn_model_wrapper, node_unit, logger, true);
   } else {
-    NodeAttrHelper node_helper(node_unit);
-    const float default_epsilon = 1e-05f;
-    const float epsilon = node_helper.Get("epsilon", 1e-05f);  // Default is 1e-05 according to ONNX spec.
-    ORT_RETURN_IF(abs(epsilon - default_epsilon) > default_epsilon, "QNN BatchNorm doesn't support epsilon.");
-
     const auto& inputs = node_unit.Inputs();
     ORT_ENFORCE(inputs.size() == 5, "5 input expected per BatchNorm Onnx Spec.");
 
@@ -56,11 +420,16 @@ Status BatchNormOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
 
     std::vector<uint32_t> scale_shape;
     ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(inputs[1].node_arg, scale_shape), "Cannot get shape of input 1 (scale).");
+    ORT_RETURN_IF_NOT(qnn_model_wrapper.IsInitializerInput(inputs[1].node_arg.Name()),
+                      "QNN BatchNorm doesn't support dynamic scale.");
     ORT_RETURN_IF(scale_shape.size() != 1 || scale_shape[0] != num_channels,
                   "QNN BatchNorm input 1 (scale) must have 1D shape [channel].");
 
     std::vector<uint32_t> bias_shape;
     ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(inputs[2].node_arg, bias_shape), "Cannot get shape of input 2 (bias).");
+    ORT_RETURN_IF_NOT(qnn_model_wrapper.IsInitializerInput(inputs[2].node_arg.Name()),
+                      "QNN BatchNorm doesn't support dynamic bias.");
+
     ORT_RETURN_IF(bias_shape.size() != 1 || bias_shape[0] != num_channels,
                   "QNN BatchNorm input 2 (bias) must have 1D shape [channel].");
 
@@ -68,13 +437,15 @@ Status BatchNormOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
     ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(inputs[3].node_arg, mean_shape), "Cannot get shape of input 3 (mean).");
     ORT_RETURN_IF(mean_shape.size() != 1 || mean_shape[0] != num_channels,
                   "QNN BatchNorm input 3 (mean) must have 1D shape [channel].");
-    ORT_RETURN_IF_NOT(qnn_model_wrapper.IsInitializerInput(inputs[3].node_arg.Name()), "QNN BatchNorm doesn't support dynamic mean.");
+    ORT_RETURN_IF_NOT(qnn_model_wrapper.IsInitializerInput(inputs[3].node_arg.Name()),
+                      "QNN BatchNorm doesn't support dynamic mean.");
 
     std::vector<uint32_t> var_shape;
     ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(inputs[4].node_arg, var_shape), "Cannot get shape of input 4 (var).");
     ORT_RETURN_IF(var_shape.size() != 1 || var_shape[0] != num_channels,
                   "QNN BatchNorm input 4 (var) must have 1D shape [channel].");
-    ORT_RETURN_IF_NOT(qnn_model_wrapper.IsInitializerInput(inputs[4].node_arg.Name()), "QNN BatchNorm doesn't support dynamic var.");
+    ORT_RETURN_IF_NOT(qnn_model_wrapper.IsInitializerInput(inputs[4].node_arg.Name()),
+                      "QNN BatchNorm doesn't support dynamic var.");
 
     ORT_RETURN_IF(node_unit.Outputs().size() > 1, "QNN BatchNorm only support 1 output.");
   }
@@ -82,6 +453,134 @@ Status BatchNormOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
   return Status::OK();
 }
 
+Status BatchNormOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
+                                         const NodeUnit& node_unit,
+                                         const logging::Logger& logger,
+                                         std::vector<std::string>& input_names,
+                                         bool do_op_validation) const {
+  ORT_UNUSED_PARAMETER(do_op_validation);
+  ORT_UNUSED_PARAMETER(logger);
+
+  const auto& inputs = node_unit.Inputs();
+  bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType());
+  //
+  // Input 0
+  //
+  ORT_RETURN_IF_ERROR(ProcessInput(qnn_model_wrapper, inputs[0], logger, input_names));
+
+  //
+  // Input 1: scale
+  // Input 2: bias
+  // QNN only accept 3 input. We need to first combine mean and variance into scale and bias.
+  //
+  {
+    const std::string& scale_name = inputs[1].node_arg.Name();
+    const std::string& bias_name = inputs[2].node_arg.Name();
+    TensorInfo var_info = {};
+    TensorInfo mean_info = {};
+    TensorInfo scale_info = {};
+    TensorInfo bias_info = {};
+    ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[1], scale_info));
+    ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[2], bias_info));
+    ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[3], mean_info));
+    ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[4], var_info));
+
+    // scale, bias, mean, and var must be initializers
+    ORT_RETURN_IF_NOT(scale_info.is_initializer, "scale must be initializers");
+    ORT_RETURN_IF_NOT(bias_info.is_initializer, "bias must be initializers");
+    ORT_RETURN_IF_NOT(mean_info.is_initializer, "mean must be initializers");
+    ORT_RETURN_IF_NOT(var_info.is_initializer, "var must be initializers");
+
+    std::vector<uint8_t> scale_unpacked_tensor;
+    std::vector<uint8_t> bias_unpacked_tensor;
+    std::vector<uint8_t> var_unpacked_tensor;
+    std::vector<uint8_t> mean_unpacked_tensor;
+    ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*scale_info.initializer_tensor, scale_unpacked_tensor));
+    ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*bias_info.initializer_tensor, bias_unpacked_tensor));
+    ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*mean_info.initializer_tensor, mean_unpacked_tensor));
+    ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*var_info.initializer_tensor, var_unpacked_tensor));
+
+    std::vector<double> mean_double_tensor;
+    std::vector<double> std_double_tensor;
+    std::vector<double> scale_double_tensor;
+    std::vector<double> bias_double_tensor;
+
+    NodeAttrHelper node_helper(node_unit);
+    const float epsilon = node_helper.Get("epsilon", 1e-05f);  // Default is 1e-05 according to ONNX spec.
+
+    double scale_rmax = std::numeric_limits<double>::min();
+    double scale_rmin = std::numeric_limits<double>::max();
+    double bias_rmax = std::numeric_limits<double>::min();
+    double bias_rmin = std::numeric_limits<double>::max();
+
+    // Calculate and convert new scale, new bias, mean and std to double array (may be dequantized)
+    ORT_RETURN_IF_ERROR(PreprocessMean(mean_info,
+                                       is_npu_backend,
+                                       mean_unpacked_tensor.data(),
+                                       mean_unpacked_tensor.size(),
+                                       mean_double_tensor));
+    ORT_RETURN_IF_ERROR(PreprocessStd(var_info,
+                                      is_npu_backend,
+                                      var_unpacked_tensor.data(),
+                                      var_unpacked_tensor.size(),
+                                      epsilon,
+                                      std_double_tensor));
+    ORT_RETURN_IF_ERROR(PreprocessScale(scale_info,
+                                        is_npu_backend,
+                                        scale_unpacked_tensor.data(),
+                                        scale_unpacked_tensor.size(),
+                                        std_double_tensor,
+                                        scale_rmax,
+                                        scale_rmin,
+                                        scale_double_tensor));
+    ORT_RETURN_IF_ERROR(PreprocessBias(bias_info,
+                                       is_npu_backend,
+                                       bias_unpacked_tensor.data(),
+                                       bias_unpacked_tensor.size(),
+                                       scale_double_tensor,
+                                       mean_double_tensor,
+                                       bias_rmax,
+                                       bias_rmin,
+                                       bias_double_tensor));
+
+    if (!qnn_model_wrapper.IsQnnTensorWrapperExist(scale_name)) {
+      std::vector<uint8_t> scale_raw_tensor;
+      Qnn_QuantizeParams_t scale_quant_param = scale_info.quant_param;
+      ORT_RETURN_IF_ERROR(Postprocess(scale_info,
+                                      is_npu_backend,
+                                      scale_double_tensor,
+                                      scale_rmax,
+                                      scale_rmin,
+                                      scale_quant_param,
+                                      scale_raw_tensor));
+      Qnn_TensorType_t scale_tensor_type = GetInputTensorType(qnn_model_wrapper, scale_name);
+      QnnTensorWrapper input_tensorwrapper(scale_name, scale_tensor_type, scale_info.qnn_data_type, scale_quant_param,
+                                           std::move(scale_info.shape), std::move(scale_raw_tensor));
+      ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(input_tensorwrapper)), "Failed to add tensor.");
+    }
+    input_names.push_back(scale_name);
+
+    if (!qnn_model_wrapper.IsQnnTensorWrapperExist(bias_name)) {
+      std::vector<uint8_t> bias_raw_tensor;
+      Qnn_QuantizeParams_t bias_quant_param = bias_info.quant_param;
+      ORT_RETURN_IF_ERROR(Postprocess(bias_info,
+                                      is_npu_backend,
+                                      bias_double_tensor,
+                                      bias_rmax,
+                                      bias_rmin,
+                                      bias_quant_param,
+                                      bias_raw_tensor));
+      Qnn_TensorType_t bias_tensor_type = GetInputTensorType(qnn_model_wrapper, bias_name);
+      QnnTensorWrapper input_tensorwrapper(bias_name, bias_tensor_type, bias_info.qnn_data_type, bias_quant_param,
+                                           std::move(bias_info.shape), std::move(bias_raw_tensor));
+      ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(input_tensorwrapper)), "Failed to add tensor.");
+    }
+    input_names.push_back(bias_name);
+  }
+
+  return Status::OK();
+}
+
 void CreateBatchNormOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
   op_registrations.AddOpBuilder(op_type, std::make_unique<BatchNormOpBuilder>());
 }
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/clip_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/clip_op_builder.cc
index df4c718949269..0a9f9889ad2d8 100644
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/clip_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/clip_op_builder.cc
@@ -84,8 +84,8 @@ Status ClipOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wra
     std::vector<uint8_t> min_val_bytes;
 
     if (num_inputs > 1 && !inputs[1].node_arg.Name().empty()) {
-      OnnxInputInfo min_input_info = {};
-      ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[1], min_input_info));
+      TensorInfo min_input_info = {};
+      ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[1], min_input_info));
       ORT_RETURN_IF_NOT(min_input_info.qnn_data_type == qnn_data_type,
                         "QNN EP: The 'min' input of the Clip operator must be of type float32.");
       assert(min_input_info.is_initializer);  // Checked by ExplicitOpCheck().
@@ -106,8 +106,8 @@ Status ClipOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wra
     std::vector<uint8_t> max_val_bytes;
 
     if (num_inputs > 2 && !inputs[2].node_arg.Name().empty()) {
-      OnnxInputInfo max_input_info = {};
-      ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[2], max_input_info));
+      TensorInfo max_input_info = {};
+      ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[2], max_input_info));
       ORT_RETURN_IF_NOT(max_input_info.qnn_data_type == qnn_data_type,
                         "QNN EP: The 'max' input of the Clip operator must of type float32.");
       assert(max_input_info.is_initializer);  // Checked by ExplicitOpCheck().
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc
index e8c5b98129a1e..84b6cad9c41c1 100644
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc
@@ -175,8 +175,8 @@ Status ConvOpBuilder::ProcessConv2DInputs(QnnModelWrapper& qnn_model_wrapper,
   //
   {
     const std::string& input1_name = inputs[1].node_arg.Name();
-    OnnxInputInfo input_info = {};
-    ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[1], input_info));
+    TensorInfo input_info = {};
+    ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[1], input_info));
 
     std::string actual_name = input_info.is_initializer ? input1_name : input1_name + "_ort_qnn_ep_transpose";
     input_names.push_back(actual_name);
@@ -267,8 +267,8 @@ Status ConvOpBuilder::ProcessConv1DInputs(QnnModelWrapper& qnn_model_wrapper,
 
   {
     const std::string& input0_name = inputs[0].node_arg.Name();
-    OnnxInputInfo input0_info = {};
-    ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[0], input0_info));
+    TensorInfo input0_info = {};
+    ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[0], input0_info));
 
     const std::string conv_input0_name = input0_info.is_initializer ? input0_name
                                                                     : input0_name + "_ort_qnn_ep_reshape";
@@ -318,8 +318,8 @@ Status ConvOpBuilder::ProcessConv1DInputs(QnnModelWrapper& qnn_model_wrapper,
   //
   {
     const std::string& input1_name = inputs[1].node_arg.Name();
-    OnnxInputInfo input_info = {};
-    ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[1], input_info));
+    TensorInfo input_info = {};
+    ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[1], input_info));
 
     std::string conv_weight_input_name = input_info.is_initializer ? input1_name : input1_name + "_ort_qnn_ep_transpose";
     input_names.push_back(conv_weight_input_name);
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/expand_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/expand_op_builder.cc
new file mode 100644
index 0000000000000..90e18e9fd0496
--- /dev/null
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/expand_op_builder.cc
@@ -0,0 +1,159 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/providers/common.h"
+#include "core/providers/shared/utils/utils.h"
+#include "core/providers/qnn/builder/qnn_model_wrapper.h"
+#include "core/providers/qnn/builder/op_builder_factory.h"
+#include "core/providers/qnn/builder/qnn_utils.h"
+#include "core/common/safeint.h"
+
+#include "base_op_builder.h"
+
+namespace onnxruntime {
+namespace qnn {
+
+class ExpandOpBuilder : public BaseOpBuilder {
+ public:
+  ExpandOpBuilder() : BaseOpBuilder("ExpandOpBuilder") {}
+  ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(ExpandOpBuilder);
+
+ protected:
+  Status ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
+                       const NodeUnit& node_unit,
+                       const logging::Logger& logger,
+                       std::vector<std::string>& input_names,
+                       bool do_op_validation) const override ORT_MUST_USE_RESULT;
+  Status OverrideOutputQuantParam(QnnModelWrapper& qnn_model_wrapper,
+                                  const NodeUnit& node_unit,
+                                  const logging::Logger& logger,
+                                  const std::vector<std::string>& input_names,
+                                  size_t output_index,
+                                  Qnn_DataType_t qnn_data_type,
+                                  Qnn_QuantizeParams_t& quant_param) const override ORT_MUST_USE_RESULT;
+};
+
+template <typename T>
+void FillShapeInputData(std::vector<uint8_t>& shape_data, int shape_size, T ini_value) {
+  shape_data.resize(shape_size * sizeof(T));
+  T* shape_data_float = reinterpret_cast<T*>(shape_data.data());
+  std::fill(shape_data_float, shape_data_float + shape_size, ini_value);
+}
+
+// Use ElementWiseMultiply to implement data broadcast
+// Get the shape data, and create a initializer input with value 1 and same shape
+// input[0] * input[1]
+Status ExpandOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
+                                      const NodeUnit& node_unit,
+                                      const logging::Logger& logger,
+                                      std::vector<std::string>& input_names,
+                                      bool do_op_validation) const {
+  ORT_UNUSED_PARAMETER(do_op_validation);
+  const auto& inputs = node_unit.Inputs();
+  ORT_RETURN_IF(inputs.size() != 2, "Expand should has 2 inputs!");
+
+  ORT_RETURN_IF_ERROR(ProcessInput(qnn_model_wrapper, inputs[0], logger, input_names));
+
+  // Process shape input
+  const auto& input_name = inputs[1].node_arg.Name();
+  bool is_initializer_input = qnn_model_wrapper.IsInitializerInput(input_name);
+  ORT_RETURN_IF_NOT(is_initializer_input, "QNN doesn't support dynamic shape.");
+
+  std::vector<uint32_t> shape;
+  ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(inputs[1].node_arg, shape), "Cannot get shape");
+  uint32_t shape_rank = shape[0];
+  std::vector<uint8_t> unpacked_tensor;
+  const auto& input_tensor = qnn_model_wrapper.GetInitializerTensors().at(input_name);
+  ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input_tensor, unpacked_tensor));
+  const int64_t* shape_data_int64 = reinterpret_cast<const int64_t*>(unpacked_tensor.data());
+  std::vector<uint32_t> input_shape(shape_rank, 0);
+  std::transform(shape_data_int64, shape_data_int64 + shape_rank, input_shape.begin(),
+                 [](int64_t item) { return SafeInt<uint32_t>(item); });
+  int shape_size = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies<uint32_t>());
+
+  std::vector<uint8_t> shape_data;
+  bool is_quantized_tensor = inputs[0].quant_param.has_value();
+  Qnn_DataType_t qnn_data_type = QNN_DATATYPE_FLOAT_32;
+  const auto* type_proto = inputs[0].node_arg.TypeAsProto();
+  Qnn_QuantizeParams_t quantize_param = QNN_QUANTIZE_PARAMS_INIT;
+  if (is_quantized_tensor) {
+    ORT_RETURN_IF_ERROR(utils::GetQnnDataType(true, type_proto, qnn_data_type));
+    float scale = 0.0f;
+    int zero_point = 0;
+    float rmax = 1.0f;
+    float rmin = 1.0f;
+    ORT_RETURN_IF_ERROR(utils::GetQuantParams(rmin,
+                                              rmax,
+                                              qnn_data_type,
+                                              scale,
+                                              zero_point));
+    utils::InitializeQuantizeParam(quantize_param, true, scale, zero_point);
+    int quant_value_int = 0;
+    double ini_value = 1.0;
+    ORT_RETURN_IF_ERROR(utils::Quantize(ini_value, scale, zero_point, qnn_data_type, quant_value_int));
+    switch (qnn_data_type) {
+      case QNN_DATATYPE_SFIXED_POINT_8: {
+        FillShapeInputData(shape_data, shape_size, static_cast<int8_t>(quant_value_int));
+        break;
+      }
+      case QNN_DATATYPE_UFIXED_POINT_8: {
+        FillShapeInputData(shape_data, shape_size, static_cast<uint8_t>(quant_value_int));
+        break;
+      }
+      case QNN_DATATYPE_UFIXED_POINT_16: {
+        FillShapeInputData(shape_data, shape_size, static_cast<uint16_t>(quant_value_int));
+        break;
+      }
+      default:
+        return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Type not supported.");
+    }  // switch
+  } else {
+    ORT_RETURN_IF_ERROR(utils::GetQnnDataType(false, type_proto, qnn_data_type));
+    switch (qnn_data_type) {
+      case QNN_DATATYPE_FLOAT_32: {
+        FillShapeInputData(shape_data, shape_size, static_cast<float>(1.0));
+        break;
+      }
+      case QNN_DATATYPE_INT_32: {
+        FillShapeInputData(shape_data, shape_size, static_cast<int32_t>(1));
+        break;
+      }
+      case QNN_DATATYPE_UINT_32: {
+        FillShapeInputData(shape_data, shape_size, static_cast<uint32_t>(1));
+        break;
+      }
+      default:
+        return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Type not supported.");
+    }  // switch
+  }    // if-else
+
+  const std::string& output_name = node_unit.Outputs()[0].node_arg.Name();
+  std::string shape_input_name(input_name + "_" + output_name);
+  QnnTensorWrapper input_tensorwrapper(shape_input_name, QNN_TENSOR_TYPE_STATIC, qnn_data_type, quantize_param,
+                                       std::move(input_shape), std::move(shape_data));
+  ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(input_tensorwrapper)), "Failed to add tensor.");
+
+  input_names.push_back(shape_input_name);
+
+  return Status::OK();
+}
+
+Status ExpandOpBuilder::OverrideOutputQuantParam(QnnModelWrapper& qnn_model_wrapper,
+                                                 const NodeUnit& node_unit,
+                                                 const logging::Logger& logger,
+                                                 const std::vector<std::string>& input_names,
+                                                 size_t output_index,
+                                                 Qnn_DataType_t qnn_data_type,
+                                                 Qnn_QuantizeParams_t& quant_param) const {
+  // Force Expand output to use the same quantization parameters as the input if they are nearly equal.
+  // This enables the HTP backend to employ certain optimizations.
+  return SetOutputQParamEqualToInputIfNearlyEqual(qnn_model_wrapper, node_unit, logger, input_names,
+                                                  0 /*input_index*/, output_index, qnn_data_type, quant_param);
+}
+
+void CreateExpandOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
+  op_registrations.AddOpBuilder(op_type, std::make_unique<ExpandOpBuilder>());
+}
+
+}  // namespace qnn
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/gather_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/gather_op_builder.cc
index e203667576447..9f396a27369e7 100644
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/gather_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/gather_op_builder.cc
@@ -37,7 +37,6 @@ Status GatherOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
                                       const logging::Logger& logger,
                                       std::vector<std::string>& input_names,
                                       bool do_op_validation) const {
-  ORT_UNUSED_PARAMETER(do_op_validation);
   const auto& inputs = node_unit.Inputs();
   ORT_RETURN_IF(inputs.size() != 2, "Gather should has 2 inputs at least!");
   ORT_RETURN_IF_ERROR(ProcessInput(qnn_model_wrapper, inputs[0], logger, input_names));
@@ -169,6 +168,13 @@ Status GatherOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w
                                                                    quantize_param.scaleOffsetEncoding.scale,
                                                                    quantize_param.scaleOffsetEncoding.offset),
                     "Cannot get quantization parameter");
+  if (is_quantized_tensor) {
+    // Make sure the output quantization parameters are equal to the input.
+    ORT_RETURN_IF_ERROR(SetOutputQParamEqualToInputIfNearlyEqual(qnn_model_wrapper, node_unit, logger, input_names,
+                                                                 0 /*input_index*/, 0 /*output_index*/, qnn_data_type,
+                                                                 quantize_param));
+  }
+
   std::vector<uint32_t> target_output_shape;
   ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(gather_output.node_arg, target_output_shape),
                     "Cannot get shape");
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc
index e1ea22b886268..38172caa03768 100644
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc
@@ -24,6 +24,12 @@ class InstanceNormOpBuilder : public BaseOpBuilder {
                        const logging::Logger& logger) const override final ORT_MUST_USE_RESULT;
 
  protected:
+  Status ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
+                       const NodeUnit& node_unit,
+                       const logging::Logger& logger,
+                       std::vector<std::string>& input_names,
+                       bool do_op_validation) const override ORT_MUST_USE_RESULT;
+
   Status ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
                                      const NodeUnit& node_unit,
                                      std::vector<std::string>&& input_names,
@@ -81,6 +87,66 @@ Status InstanceNormOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
   return Status::OK();
 }
 
+Status InstanceNormOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
+                                            const NodeUnit& node_unit,
+                                            const logging::Logger& logger,
+                                            std::vector<std::string>& input_names,
+                                            bool do_op_validation) const {
+  const auto& inputs = node_unit.Inputs();
+
+  TensorInfo input0_info = {};
+  ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[0], input0_info));
+
+  // HTP backend can only handle rank 3 inputs if the batch size is 1. If the batch size is not 1,
+  // QNN EP must reshape the input and output to (N, 1, W, C) and process the InstanceNorm as rank 4.
+  if (IsNpuBackend(qnn_model_wrapper.GetQnnBackendType()) &&
+      input0_info.shape.size() == 3 && input0_info.shape[0] != 1) {
+    const std::string& orig_input0_name = inputs[0].node_arg.Name();
+    const std::string op_input0_name = input0_info.is_initializer ? orig_input0_name
+                                                                  : orig_input0_name + "_ort_qnn_ep_reshape";
+    input_names.push_back(op_input0_name);
+
+    std::vector<uint8_t> initializer_data;
+    if (input0_info.is_initializer) {
+      ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input0_info.initializer_tensor, initializer_data));
+    }
+
+    std::vector<uint32_t> op_shape = {
+        input0_info.shape[0],  // N
+        1,                     // Height == 1
+        input0_info.shape[1],  // Width
+        input0_info.shape[2]   // Channels
+    };
+
+    if (!input0_info.is_initializer) {
+      // Add Reshape node to transform 1D input to 2D (i.e., set height to 1).
+      // We don't need to do this for initializers, because the element layout does not change. We can just
+      // modify the shape dimensions.
+      bool is_graph_input = qnn_model_wrapper.IsGraphInput(orig_input0_name);
+      ORT_RETURN_IF_ERROR(qnn_model_wrapper.AddReshapeNode(orig_input0_name,
+                                                           op_input0_name,
+                                                           input0_info.shape,
+                                                           op_shape,
+                                                           input0_info.qnn_data_type,
+                                                           input0_info.quant_param,
+                                                           do_op_validation,
+                                                           is_graph_input));
+    }
+
+    Qnn_TensorType_t tensor_type = GetInputTensorType(qnn_model_wrapper, op_input0_name);
+    QnnTensorWrapper input_tensorwrapper(op_input0_name, tensor_type, input0_info.qnn_data_type, input0_info.quant_param,
+                                         std::move(op_shape), std::move(initializer_data));
+    ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(input_tensorwrapper)), "Failed to add tensor.");
+  } else {
+    ORT_RETURN_IF_ERROR(ProcessInput(qnn_model_wrapper, inputs[0], logger, input_names));  // Input 0
+  }
+
+  ORT_RETURN_IF_ERROR(ProcessInput(qnn_model_wrapper, inputs[1], logger, input_names));  // Scale
+  ORT_RETURN_IF_ERROR(ProcessInput(qnn_model_wrapper, inputs[2], logger, input_names));  // Bias
+
+  return Status::OK();
+}
+
 Status InstanceNormOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
                                                           const NodeUnit& node_unit,
                                                           std::vector<std::string>&& input_names,
@@ -100,11 +166,59 @@ Status InstanceNormOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_m
   param_tensor_names.push_back(epsilon_param_wrapper.GetParamTensorName());
   qnn_model_wrapper.AddParamWrapper(std::move(epsilon_param_wrapper));
 
-  ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit,
-                                     std::move(input_names),
-                                     std::move(param_tensor_names),
-                                     logger, do_op_validation, GetQnnOpType(node_unit.OpType())));
+  const auto& outputs = node_unit.Outputs();
+
+  TensorInfo output_info = {};
+  ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(outputs[0], output_info));
+
+  // HTP backend can only handle rank 3 inputs/outputs if the batch size is 1. If the batch size is not 1,
+  // QNN EP must reshape the input and output to (N, 1, W, C) and process the InstanceNorm as rank 4.
+  if (!IsNpuBackend(qnn_model_wrapper.GetQnnBackendType()) ||
+      output_info.shape.size() != 3 || output_info.shape[0] == 1) {
+    return ProcessOutputs(qnn_model_wrapper, node_unit,
+                          std::move(input_names),
+                          std::move(param_tensor_names),
+                          logger, do_op_validation, GetQnnOpType(node_unit.OpType()));
+  }
 
+  //
+  // The output is meant to be rank 3 with batch size != 1. Must create a QNN InstanceNorm op with a rank 4 output
+  // that is then reshaped to rank 3 again.
+  //
+
+  const std::string& orig_output_name = outputs[0].node_arg.Name();
+  std::string op_output_name = orig_output_name + "_ort_qnn_ep_reshape";
+
+  std::vector<uint32_t> op_output_shape = {
+      output_info.shape[0],  // N
+      1,                     // H == 1
+      output_info.shape[1],  // W
+      output_info.shape[2],  // C
+  };
+
+  QnnTensorWrapper output_tensorwrapper(op_output_name, QNN_TENSOR_TYPE_NATIVE, output_info.qnn_data_type,
+                                        output_info.quant_param, std::vector<uint32_t>(op_output_shape));
+  ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(output_tensorwrapper)), "Failed to add tensor.");
+  ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(GetNodeName(node_unit),
+                                                    QNN_OP_PACKAGE_NAME_QTI_AISW,
+                                                    GetQnnOpType(node_unit.OpType()),
+                                                    std::move(input_names),
+                                                    {op_output_name},
+                                                    std::move(param_tensor_names)),
+                    "Failed to add node.");
+
+  const bool is_graph_output = qnn_model_wrapper.IsGraphOutput(orig_output_name);
+
+  // Add Reshape to convert QNN InstanceNorm output back to rank 3 (as expected by the rest of the ONNX graph).
+  ORT_RETURN_IF_ERROR(qnn_model_wrapper.AddReshapeNode(op_output_name,
+                                                       orig_output_name,
+                                                       op_output_shape,
+                                                       output_info.shape,
+                                                       output_info.qnn_data_type,
+                                                       output_info.quant_param,
+                                                       do_op_validation,
+                                                       false,
+                                                       is_graph_output));
   return Status::OK();
 }
 
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/pad_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/pad_op_builder.cc
index 2dfdfffe5fa54..d6752f76ef478 100644
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/pad_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/pad_op_builder.cc
@@ -10,6 +10,7 @@
 #include "core/common/safeint.h"
 
 #include "core/providers/qnn/builder/opbuilder/base_op_builder.h"
+#include "core/providers/qnn/builder/qnn_utils.h"
 
 namespace onnxruntime {
 namespace qnn {
@@ -62,17 +63,12 @@ Status PadOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
   return Status::OK();
 }
 
-template <typename T>
-float DequantizeValue(T value, int32_t offset, float scale) {
-  return static_cast<float>(static_cast<int32_t>(value) - offset) * scale;
-}
-
 Status ProcessConstantValue(QnnModelWrapper& qnn_model_wrapper,
                             std::vector<std::string>& param_tensor_names,
                             const NodeUnit& node_unit,
                             const NodeUnitIODef& input) {
-  OnnxInputInfo input_info = {};
-  ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(input, input_info));
+  TensorInfo input_info = {};
+  ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(input, input_info));
   std::vector<uint8_t> unpacked_tensor;
   // Already confirmed constant_value input is initializer in ProcessInputs()
   ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input_info.initializer_tensor, unpacked_tensor));
@@ -86,43 +82,43 @@ Status ProcessConstantValue(QnnModelWrapper& qnn_model_wrapper,
     switch (input_info.qnn_data_type) {
       case QNN_DATATYPE_SFIXED_POINT_8: {
         auto int8_span = ReinterpretAsSpan<const int8_t>(gsl::make_span(unpacked_tensor));
-        constant_value = DequantizeValue(int8_span.data()[0],
-                                         input_info.quant_param.scaleOffsetEncoding.offset,
-                                         input_info.quant_param.scaleOffsetEncoding.scale);
+        constant_value = static_cast<float>(utils::Dequantize(input_info.quant_param.scaleOffsetEncoding.offset,
+                                                              input_info.quant_param.scaleOffsetEncoding.scale,
+                                                              static_cast<double>(int8_span.data()[0])));
         break;
       }
       case QNN_DATATYPE_SFIXED_POINT_16: {
         auto int16_span = ReinterpretAsSpan<const int16_t>(gsl::make_span(unpacked_tensor));
-        constant_value = DequantizeValue(int16_span.data()[0],
-                                         input_info.quant_param.scaleOffsetEncoding.offset,
-                                         input_info.quant_param.scaleOffsetEncoding.scale);
+        constant_value = static_cast<float>(utils::Dequantize(input_info.quant_param.scaleOffsetEncoding.offset,
+                                                              input_info.quant_param.scaleOffsetEncoding.scale,
+                                                              static_cast<double>(int16_span.data()[0])));
         break;
       }
       case QNN_DATATYPE_SFIXED_POINT_32: {
         auto int32_span = ReinterpretAsSpan<const int32_t>(gsl::make_span(unpacked_tensor));
-        constant_value = DequantizeValue(int32_span.data()[0],
-                                         input_info.quant_param.scaleOffsetEncoding.offset,
-                                         input_info.quant_param.scaleOffsetEncoding.scale);
+        constant_value = static_cast<float>(utils::Dequantize(input_info.quant_param.scaleOffsetEncoding.offset,
+                                                              input_info.quant_param.scaleOffsetEncoding.scale,
+                                                              static_cast<double>(int32_span.data()[0])));
         break;
       }
       case QNN_DATATYPE_UFIXED_POINT_8: {
-        constant_value = DequantizeValue(unpacked_tensor.data()[0],
-                                         input_info.quant_param.scaleOffsetEncoding.offset,
-                                         input_info.quant_param.scaleOffsetEncoding.scale);
+        constant_value = static_cast<float>(utils::Dequantize(input_info.quant_param.scaleOffsetEncoding.offset,
+                                                              input_info.quant_param.scaleOffsetEncoding.scale,
+                                                              static_cast<double>(unpacked_tensor.data()[0])));
         break;
       }
       case QNN_DATATYPE_UFIXED_POINT_16: {
         auto uint16_span = ReinterpretAsSpan<const uint16_t>(gsl::make_span(unpacked_tensor));
-        constant_value = DequantizeValue(uint16_span.data()[0],
-                                         input_info.quant_param.scaleOffsetEncoding.offset,
-                                         input_info.quant_param.scaleOffsetEncoding.scale);
+        constant_value = static_cast<float>(utils::Dequantize(input_info.quant_param.scaleOffsetEncoding.offset,
+                                                              input_info.quant_param.scaleOffsetEncoding.scale,
+                                                              static_cast<double>(uint16_span.data()[0])));
         break;
       }
       case QNN_DATATYPE_UFIXED_POINT_32: {
         auto uint32_span = ReinterpretAsSpan<const uint32_t>(gsl::make_span(unpacked_tensor));
-        constant_value = DequantizeValue(uint32_span.data()[0],
-                                         input_info.quant_param.scaleOffsetEncoding.offset,
-                                         input_info.quant_param.scaleOffsetEncoding.scale);
+        constant_value = static_cast<float>(utils::Dequantize(input_info.quant_param.scaleOffsetEncoding.offset,
+                                                              input_info.quant_param.scaleOffsetEncoding.scale,
+                                                              static_cast<double>(uint32_span.data()[0])));
         break;
       }
       default:
@@ -202,16 +198,8 @@ Status PadOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrap
   // Qnn format is begin_0, end_0, begin_1, end_1, ...
   ReArranagePads(pad_amount);
 
-  std::vector<uint32_t> pad_amount_dim{static_cast<uint32_t>(pad_amount.size() / 2), static_cast<uint32_t>(2)};
-  QnnParamWrapper multiples_param(node_unit.Index(), node_unit.Name(), QNN_OP_PAD_PARAM_PAD_AMOUNT, std::move(pad_amount_dim),
-                                  std::move(pad_amount));
-  param_tensor_names.push_back(multiples_param.GetParamTensorName());
-  qnn_model_wrapper.AddParamWrapper(std::move(multiples_param));
-
-  // Process optional input constant_value
-  if (node_unit.Inputs().size() > 2) {
-    ORT_RETURN_IF_ERROR(ProcessConstantValue(qnn_model_wrapper, param_tensor_names, node_unit, inputs[2]));
-  }  // constant_value
+  std::vector<uint32_t> input_shape;
+  ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(inputs[0].node_arg, input_shape), "Cannot get shape of input 0.");
 
   NodeAttrHelper node_helper(node_unit);
   std::string mode = node_helper.Get("mode", "constant");
@@ -220,6 +208,10 @@ Status PadOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrap
   if ("constant" == mode) {
     mode_qnn_scalar.uint32Value = QNN_OP_PAD_SCHEME_CONSTANT;
   } else if ("reflect" == mode) {
+    for (size_t i = 0; i < input_shape.size(); i++) {
+      ORT_RETURN_IF(pad_amount[i * 2] > input_shape[i] - 1 || pad_amount[(i * 2) + 1] > input_shape[i] - 1,
+                    "Pad amount should not be greater than shape(input[0])[i] - 1");
+    }
     mode_qnn_scalar.uint32Value = QNN_OP_PAD_SCHEME_MIRROR_REFLECT;
   } else if ("edge" == mode) {
     mode_qnn_scalar.uint32Value = QNN_OP_PAD_SCHEME_EDGE;
@@ -227,10 +219,21 @@ Status PadOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrap
     return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Pad mode only support constant.");
   }
 
+  std::vector<uint32_t> pad_amount_dim{static_cast<uint32_t>(pad_amount.size() / 2), static_cast<uint32_t>(2)};
   QnnParamWrapper mode_param(node_unit.Index(), node_unit.Name(), QNN_OP_PAD_PARAM_SCHEME, mode_qnn_scalar);
   param_tensor_names.push_back(mode_param.GetParamTensorName());
   qnn_model_wrapper.AddParamWrapper(std::move(mode_param));
 
+  QnnParamWrapper multiples_param(node_unit.Index(), node_unit.Name(), QNN_OP_PAD_PARAM_PAD_AMOUNT,
+                                  std::move(pad_amount_dim), std::move(pad_amount));
+  param_tensor_names.push_back(multiples_param.GetParamTensorName());
+  qnn_model_wrapper.AddParamWrapper(std::move(multiples_param));
+
+  // Process optional input constant_value
+  if (node_unit.Inputs().size() > 2) {
+    ORT_RETURN_IF_ERROR(ProcessConstantValue(qnn_model_wrapper, param_tensor_names, node_unit, inputs[2]));
+  }  // constant_value
+
   ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit,
                                      std::move(input_names),
                                      std::move(param_tensor_names),
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/pool_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/pool_op_builder.cc
index a44640b37ae36..872d9682b8355 100644
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/pool_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/pool_op_builder.cc
@@ -29,6 +29,13 @@ class PoolOpBuilder : public BaseOpBuilder {
                                      std::vector<std::string>&& input_names,
                                      const logging::Logger& logger,
                                      bool do_op_validation) const override ORT_MUST_USE_RESULT;
+  Status OverrideOutputQuantParam(QnnModelWrapper& qnn_model_wrapper,
+                                  const NodeUnit& node_unit,
+                                  const logging::Logger& logger,
+                                  const std::vector<std::string>& input_names,
+                                  size_t output_index,
+                                  Qnn_DataType_t qnn_data_type,
+                                  Qnn_QuantizeParams_t& quant_param) const override ORT_MUST_USE_RESULT;
 
  private:
   Status SetCommonPoolParams(const NodeAttrHelper& node_helper, std::vector<uint32_t>& filter_size,
@@ -237,6 +244,23 @@ Status PoolOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wra
   return Status::OK();
 }
 
+Status PoolOpBuilder::OverrideOutputQuantParam(QnnModelWrapper& qnn_model_wrapper,
+                                               const NodeUnit& node_unit,
+                                               const logging::Logger& logger,
+                                               const std::vector<std::string>& input_names,
+                                               size_t output_index,
+                                               Qnn_DataType_t qnn_data_type,
+                                               Qnn_QuantizeParams_t& quant_param) const {
+  // Force MaxPool outputs to use the same quantization parameters as the input if they are nearly equal.
+  // This helps the HTP backend employ certain optimizations.
+  if (node_unit.OpType() == "MaxPool") {
+    return SetOutputQParamEqualToInputIfNearlyEqual(qnn_model_wrapper, node_unit, logger, input_names,
+                                                    0 /*input_index*/, output_index, qnn_data_type, quant_param);
+  }
+
+  return Status::OK();
+}
+
 void CreatePoolOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
   op_registrations.AddOpBuilder(op_type, std::make_unique<PoolOpBuilder>());
 }
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/reduce_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/reduce_op_builder.cc
index d8188f8ae0048..ca18f94d8e83d 100644
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/reduce_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/reduce_op_builder.cc
@@ -188,13 +188,6 @@ Status ReduceOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
     return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN EP: ReduceProd operator not supported by HTP backend.");
   }
 
-  if (is_npu_backend) {
-    std::vector<uint32_t> input_shape;
-    ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(node_unit.Inputs()[0].node_arg, input_shape),
-                      "QNN EP: Cannot get input shape for");
-    ORT_RETURN_IF(input_shape.size() > 4, "QNN EP: HTP backend does not support Reduce ops with rank > 4.");
-  }
-
   return AddToModelBuilder(qnn_model_wrapper, node_unit, logger, true);
 }
 
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/reshape_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/reshape_op_builder.cc
index 73ac81bfc8aef..4b06df6a0e632 100644
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/reshape_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/reshape_op_builder.cc
@@ -23,6 +23,13 @@ class ReshapeOpBuilder : public BaseOpBuilder {
                        const logging::Logger& logger,
                        std::vector<std::string>& input_names,
                        bool do_op_validation) const override ORT_MUST_USE_RESULT;
+  Status OverrideOutputQuantParam(QnnModelWrapper& qnn_model_wrapper,
+                                  const NodeUnit& node_unit,
+                                  const logging::Logger& logger,
+                                  const std::vector<std::string>& input_names,
+                                  size_t output_index,
+                                  Qnn_DataType_t qnn_data_type,
+                                  Qnn_QuantizeParams_t& quant_param) const override ORT_MUST_USE_RESULT;
 };
 
 Status ReshapeOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
@@ -44,6 +51,19 @@ Status ReshapeOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
   return Status::OK();
 }
 
+Status ReshapeOpBuilder::OverrideOutputQuantParam(QnnModelWrapper& qnn_model_wrapper,
+                                                  const NodeUnit& node_unit,
+                                                  const logging::Logger& logger,
+                                                  const std::vector<std::string>& input_names,
+                                                  size_t output_index,
+                                                  Qnn_DataType_t qnn_data_type,
+                                                  Qnn_QuantizeParams_t& quant_param) const {
+  // Force Reshape output to use the same quantization parameters as the input if nearly equal.
+  // This helps the HTP backend emply certain optimizations.
+  return SetOutputQParamEqualToInputIfNearlyEqual(qnn_model_wrapper, node_unit, logger, input_names,
+                                                  0 /*input_index*/, output_index, qnn_data_type, quant_param);
+}
+
 void CreateReshapeOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
   op_registrations.AddOpBuilder(op_type, std::make_unique<ReshapeOpBuilder>());
 }
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/resize_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/resize_op_builder.cc
index 511f2a5149f2e..cc620b7a86a18 100644
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/resize_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/resize_op_builder.cc
@@ -2,7 +2,8 @@
 // Licensed under the MIT License.
 
 #include <array>
-#include <string_view>
+#include <cassert>
+#include <unordered_map>
 
 #include "core/providers/common.h"
 #include "core/providers/shared/utils/utils.h"
@@ -41,77 +42,15 @@ class ResizeOpBuilder : public BaseOpBuilder {
                                      const logging::Logger& logger,
                                      bool do_op_validation) const override ORT_MUST_USE_RESULT;
 
- private:
-  /**
-   * Returns the QNN integer value that corresponds to the given ONNX mode (string).
-   *
-   * /param onnx_modes Array of ONNX modes supported by QNN. The index of each mode corresponds to the QNN value.
-   * /param onnx_mode The ONNX mode for which to get the corresponding QNN value.
-   * /param onnx_model_label Mode label to print out in case of error (e.g., "nearest_mode").
-   * /param qnn_mode Output parameter that is set to the appropriate QNN value from the given ONNX mode.
-   *
-   * /returns A status indicating failure or success.
-   */
-  template <typename QnnValType, std::size_t N>
-  Status GetQnnModeFromString(const std::array<std::string_view, N>& onnx_modes, std::string_view onnx_mode,
-                              const char* onnx_mode_label, QnnValType& qnn_mode) const ORT_MUST_USE_RESULT;
-
-  /**
-   * Called by IsOpSupported to validate the op for non-quantized models.
-   *
-   * /param qnn_model_wrapper The QNN model wrapper instance.
-   * /param node_unit The node unit containing metadata for the ONNX Resize operator.
-   *
-   * /returns A status indicating failure or success.
-   */
-  Status ValidateOp(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const ORT_MUST_USE_RESULT;
-
-  /**
-   * Called by IsOpSupported to validate the op for quantized models.
-   *
-   * /param qnn_model_wrapper The QNN model wrapper instance.
-   * /param node_unit The node unit containing metadata for the ONNX Resize operator and its Q/DQ nodes.
-   *
-   * /returns A status indicating failure or success.
-   */
-  Status ValidateQDQOp(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const ORT_MUST_USE_RESULT;
-
-  /**
-   * Called by ProcessAttributesAndOutputs to process the op's attributes and outputs
-   * for non-quantized models.
-   *
-   * /param qnn_model_wrapper The QNN model wrapper instance.
-   * /param node_unit The node unit containing metadata for the ONNX Resize operator.
-   * /param input_names The operator's input names.
-   * /param logger A logger.
-   * /param do_op_validation Set to true if the op should be validated using QNN's validation API.
-   *
-   * /returns A status indicating failure or success.
-   */
-  Status ProcessOpAttrsAndOutputs(QnnModelWrapper& qnn_model_wrapper,
+  Status OverrideOutputQuantParam(QnnModelWrapper& qnn_model_wrapper,
                                   const NodeUnit& node_unit,
-                                  std::vector<std::string>&& input_names,
                                   const logging::Logger& logger,
-                                  bool do_op_validation) const ORT_MUST_USE_RESULT;
-
-  /**
-   * Called by ProcessAttributesAndOutputs to process the op's attributes and outputs
-   * for quantized models.
-   *
-   * /param qnn_model_wrapper The QNN model wrapper instance.
-   * /param node_unit The node unit containing metadata for the ONNX Resize operator and its Q/DQ nodes.
-   * /param input_names The operator's input names.
-   * /param logger A logger.
-   * /param do_op_validation Set to true if the op should be validated using QNN's validation API.
-   *
-   * /returns A status indicating failure or success.
-   */
-  Status ProcessQDQOpAttrsAndOutputs(QnnModelWrapper& qnn_model_wrapper,
-                                     const NodeUnit& node_unit,
-                                     std::vector<std::string>&& input_names,
-                                     const logging::Logger& logger,
-                                     bool do_op_validation) const ORT_MUST_USE_RESULT;
+                                  const std::vector<std::string>& input_names,
+                                  size_t output_index,
+                                  Qnn_DataType_t qnn_data_type,
+                                  Qnn_QuantizeParams_t& quant_param) const override ORT_MUST_USE_RESULT;
 
+ private:
   // Info for each ONNX attribute of interest (attribute name + default value)
   static const OnnxAttrInfo<std::string> onnx_mode_attr;
   static const OnnxAttrInfo<std::string> onnx_coord_transf_mode_attr;
@@ -119,21 +58,29 @@ class ResizeOpBuilder : public BaseOpBuilder {
   static const OnnxAttrInfo<int64_t> onnx_antialias_attr;
   static const OnnxAttrInfo<int64_t> onnx_exclude_outside_attr;
 
-  // Arrays of supported QNN modes for QNN's Resize op. The index of each mode is used as the corresponding
-  // QNN parameter value. Ex: The "nearest" mode is represented as the value 0 in QNN. Note, that
-  // not all modes are supported by every QNN backend.
+  // Tables that map an ONNX attribute value (string) to the corresponding integer (enum) QNN parameter value.
+  // Ex: The "half_pixel" coordinate_transformation_mode is represented as the value 0 in QNN.
+  // Only the modes supported by QNN Resize are mapped by these tables.
+  static const std::unordered_map<std::string, uint32_t> supported_modes;
+  static const std::unordered_map<std::string, uint32_t> supported_coord_transf_modes;
+  static const std::unordered_map<std::string, uint32_t> supported_nearest_modes;
+};
 
-  // QNN values: NEAREST = 0, LINEAR = 1
-  static constexpr std::array<std::string_view, 2> supported_modes = {"nearest", "linear"};
+const std::unordered_map<std::string, uint32_t> ResizeOpBuilder::supported_modes = {
+    {"nearest", QNN_OP_RESIZE_INTERPOLATION_MODE_NEAREST},
+    {"linear", QNN_OP_RESIZE_INTERPOLATION_MODE_LINEAR}};
 
-  // QNN values: HALF_PIXEL = 0, PYTORCH_HALF_PIXEL = 1, ALIGN_CORNERS = 2, ASYMMETRIC = 3
-  static constexpr std::array<std::string_view, 4> supported_coord_transf_modes = {"half_pixel", "pytorch_half_pixel",
-                                                                                   "align_corners", "asymmetric"};
+const std::unordered_map<std::string, uint32_t> ResizeOpBuilder::supported_coord_transf_modes = {
+    {"half_pixel", QNN_OP_RESIZE_TRANSFORMATION_MODE_HALF_PIXEL},
+    {"pytorch_half_pixel", QNN_OP_RESIZE_TRANSFORMATION_MODE_PYTORCH_HALF_PIXEL},
+    {"align_corners", QNN_OP_RESIZE_TRANSFORMATION_MODE_ALIGN_CORNERS},
+    {"asymmetric", QNN_OP_RESIZE_TRANSFORMATION_MODE_ASYMMETRIC}};
 
-  // QNN values: ROUND_PREFER_FLOOR = 0, ROUND_PREFER_CEIL = 1, FLOOR = 2, CEIL = 3
-  static constexpr std::array<std::string_view, 4> supported_nearest_modes = {"round_prefer_floor", "round_prefer_ceil",
-                                                                              "floor", "ceil"};
-};
+const std::unordered_map<std::string, uint32_t> ResizeOpBuilder::supported_nearest_modes = {
+    {"round_prefer_floor", QNN_OP_RESIZE_NEAREST_MODE_ROUND_PREFER_FLOOR},
+    {"round_prefer_ceil", QNN_OP_RESIZE_NEAREST_MODE_ROUND_PREFER_CEIL},
+    {"floor", QNN_OP_RESIZE_NEAREST_MODE_FLOOR},
+    {"ceil", QNN_OP_RESIZE_NEAREST_MODE_CEIL}};
 
 const OnnxAttrInfo<std::string> ResizeOpBuilder::onnx_mode_attr = {"mode", "nearest"};
 const OnnxAttrInfo<std::string> ResizeOpBuilder::onnx_coord_transf_mode_attr = {"coordinate_transformation_mode",
@@ -143,19 +90,26 @@ const OnnxAttrInfo<std::string> ResizeOpBuilder::onnx_nearest_mode_attr = {"near
 const OnnxAttrInfo<int64_t> ResizeOpBuilder::onnx_antialias_attr = {"antialias", 0};
 const OnnxAttrInfo<int64_t> ResizeOpBuilder::onnx_exclude_outside_attr = {"exclude_outside", 0};
 
-template <typename QnnValType, std::size_t N>
-Status ResizeOpBuilder::GetQnnModeFromString(const std::array<std::string_view, N>& onnx_modes,
-                                             std::string_view onnx_mode, const char* onnx_mode_label,
-                                             QnnValType& qnn_mode) const {
-  for (size_t i = 0; i < onnx_modes.size(); ++i) {
-    if (onnx_modes[i] == onnx_mode) {
-      qnn_mode = SafeInt<QnnValType>(i);
-      return Status::OK();
-    }
+// Returns the QNN parameter integer value that corresponds to the given ONNX attribute mode string value.
+static Status GetQnnModeValFromOnnxString(const std::unordered_map<std::string, uint32_t>& supported_qnn_modes,
+                                          const std::string& onnx_attr_value,
+                                          const char* onnx_attr_name,
+                                          uint32_t& qnn_mode_value) {
+  auto it = supported_qnn_modes.find(onnx_attr_value);
+  if (it != supported_qnn_modes.end()) {
+    qnn_mode_value = it->second;
+    return Status::OK();
   }
 
-  return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN EP: Resize operator does not support ", onnx_mode_label,
-                         " ", std::string(onnx_mode));
+  return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN EP: Resize operator does not support ", onnx_attr_name,
+                         " ", std::string(onnx_attr_value));
+}
+
+// Returns true if the given ONNX attribute mode value is generally supported on QNN. Note that
+// different QNN backends may support a smaller subset of modes.
+static bool IsOnnxAttrModeSupported(const std::unordered_map<std::string, uint32_t>& supported_qnn_modes,
+                                    const std::string& onnx_attr_value) {
+  return supported_qnn_modes.find(onnx_attr_value) != supported_qnn_modes.end();
 }
 
 // Resize ops are sensitive with data layout, no special validation so far
@@ -169,118 +123,95 @@ Status ResizeOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
     return AddToModelBuilder(qnn_model_wrapper, node_unit, logger, true);
   }
 
+  const bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType());
+  NodeAttrHelper node_helper(node_unit);
+
   // QNN doesn't support anti-aliasing (added in opset 18)
   if (node_unit.SinceVersion() >= 18) {
-    NodeAttrHelper node_helper(node_unit);
     const bool antialias = GetOnnxAttr(node_helper, onnx_antialias_attr) != 0;
     ORT_RETURN_IF(antialias, "QNN EP: Resize doesn't support anti-aliasing.");
   }
 
-  // The QNN Resize op does not currently work with the QNN cpu backend, but works with the HTP backend. Therefore, we
-  // currently use QNN's Resize op for quantized models and either ResizeBilinear or ResizeNearestNeighbor for
-  // non-quantized models. This requires separate validation for quantized models.
-  // TODO: Use only Resize once QNN's Resize op works in the QNN cpu backend.
-  bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType());
-  return is_npu_backend ? ValidateQDQOp(qnn_model_wrapper, node_unit) : ValidateOp(qnn_model_wrapper, node_unit);
-}
-
-Status ResizeOpBuilder::ValidateOp(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const {
-  NodeAttrHelper node_helper(node_unit);
-  const std::string resize_mode = GetOnnxAttr(node_helper, onnx_mode_attr);
-  ORT_RETURN_IF((resize_mode != "nearest") && (resize_mode != "linear"),
-                "QNN EP: Resize doesn't support mode '", resize_mode.c_str(), "'.",
-                "Only 'nearest' and 'linear' are supported.");
-
-  const std::string coordinate_mode = GetOnnxAttr(node_helper, onnx_coord_transf_mode_attr);
-  ORT_RETURN_IF((coordinate_mode != "half_pixel") && (coordinate_mode != "align_corners"),
-                "QNN EP: coordinate transformation mode '", coordinate_mode.c_str(), "' not supported for Resize op.",
-                "Only 'align_corners' and 'half_pixel' are supported.");
-
-  // Check for a valid "nearest_mode" if the mode is "nearest".
-  if (resize_mode == "nearest") {
-    // NOTE: QNN's ResizeNearestNeighbor operator does not have a way to specify rounding (i.e., "nearest_mode").
-    // The output of the QNN ResizeNearestNeighbor operator is not always equivalent to ONNX's Resize
-    // operator with any single specific "nearest_mode".
-    //
-    // For some input/output shapes, QNN's ResizeNearestNeighbor is equivalent to ONNX's Resize with "round_prefer_floor".
-    // For other shapes, QNN's ResizeNearestNeighbor is equivalent to ONNX Resize with "round_prefer_ceil".
-    //
-    // From unit tests, I've found a relationship between input/output shapes and the equivalent ONNX "nearest_mode".
-    // If the new and old spatial dimensions are evenly divisible, the "nearest_mode" is "round_prefer_floor".
-    // Otherwise, the "nearest_mode" is "round_prefer_ceil".
-    //
-    // This relationship is probably incomplete/wrong.
-    //
-    // TODO: Ask Qualcomm what the correct "nearest_mode" should be,
-    // OR use QNN's own Resize operator once it works on QnnCpu.
-    const std::string& nearest_mode = GetOnnxAttr(node_helper, onnx_nearest_mode_attr);
-    ORT_RETURN_IF_NOT("floor" == nearest_mode, "QNN Resize only supports nearest_mode: floor!");  // This is wrong!
-  }
-
-  auto& input_0 = node_unit.Inputs()[0];
-  std::vector<uint32_t> input_shape;
-  ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(input_0.node_arg, input_shape),
-                    "QNN EP: Cannot get input shape for Resize op");
-
-  const auto& output_0 = node_unit.Outputs()[0];
-  std::vector<uint32_t> output_shape;
-  ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(output_0.node_arg, output_shape),
-                    "QNN EP: Cannot get output shape for Resize op");
-
-  ORT_RETURN_IF(input_shape.size() != 4 || output_shape.size() != 4, "QNN Resize only supports 4D!");
-
-  ONNX_NAMESPACE::DataType input_data_type = input_0.node_arg.Type();
-  ORT_RETURN_IF(input_data_type != ONNX_NAMESPACE::Utils::DataTypeUtils::ToType("float"),
-                "QNN EP: Data type ", input_data_type->c_str(),
-                " is not supported for Resize operator in CPU backend.");
-
-  return Status::OK();
-}
-
-Status ResizeOpBuilder::ValidateQDQOp(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const {
-  NodeAttrHelper node_helper(node_unit);
-
-  using namespace onnxruntime::qnn::utils;
   // Check mode
   const std::string interp_mode = GetOnnxAttr(node_helper, onnx_mode_attr);
-  ORT_RETURN_IF_NOT(ArrayHasString(supported_modes, interp_mode), "QNN EP: Resize does not support mode ",
+  ORT_RETURN_IF_NOT(IsOnnxAttrModeSupported(supported_modes, interp_mode), "QNN EP: Resize does not support mode ",
                     interp_mode.c_str());
 
   // Check coordinate transformation mode
   const std::string transformation_mode = GetOnnxAttr(node_helper, onnx_coord_transf_mode_attr);
-  ORT_RETURN_IF_NOT(ArrayHasString(supported_coord_transf_modes, transformation_mode),
+  ORT_RETURN_IF_NOT(IsOnnxAttrModeSupported(supported_coord_transf_modes, transformation_mode),
                     "QNN EP: Resize does not support coordinate_transformation_mode ", transformation_mode.c_str());
 
-  // Check nearest mode
+  const auto& input_0 = node_unit.Inputs()[0];
+  std::vector<uint32_t> input_shape;
+  ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(input_0.node_arg, input_shape),
+                    "QNN EP: Cannot get shape for Resize input");
+  const size_t input_rank = input_shape.size();
+
+  // Validate Resize w/ "nearest" mode.
+  // Translation matrix of ONNX Resize w/ "nearest" mode on HTP backend.
+  // Table entries correspond to the QNN operator used for the given configuration
+  // (Resize = QNN Resize op, RNN = QNN ResizeNearestNeighbor op, X = Unsupported).
+  //
+  //                                                   nearest_mode:
+  // coordinate_transformation_mode: | round_prefer_floor  round_prefer_ceil  floor  ceil
+  // -----------------------------------------------------------------------------------------
+  //                      half_pixel |     Resize               X              RNN     X
+  //              pytorch_half_pixel |     Resize               X               X      X
+  //                   align_corners |     Resize               X              RNN     X
+  //                      asymmetric |     Resize               X              RNN     X
+
   if (interp_mode == "nearest") {
     const std::string nearest_mode = GetOnnxAttr(node_helper, onnx_nearest_mode_attr);
-    ORT_RETURN_IF_NOT(ArrayHasString(supported_nearest_modes, nearest_mode),
+    ORT_RETURN_IF_NOT(IsOnnxAttrModeSupported(supported_nearest_modes, nearest_mode),
                       "QNN EP: Resize does not support nearest_mode ", nearest_mode.c_str());
 
-    // TODO: Support 'asymmetric' transformation mode with nearest_mode != 'floor'.
-    //
-    // QNN's ONNX converter tool translates 'nearest' + 'asymmetric' (regardless of rounding mode)
-    // to QNN's ResizeNearestNeighbor with {align_corners: 0, half_pixel: 0}.
-    // This is only accurate if the rounding mode is "floor". Need to investigate how to handle
-    // other rounding modes with Qualcomm. Ideally, we would use QNN's Resize operator, but it doesn't support
-    // the "asymmetric" coordinate transformation mode on HTP.
-    ORT_RETURN_IF(transformation_mode == "asymmetric" && nearest_mode != "floor",
-                  "QNN EP: Resize with coordinate_transformation_mode 'asymmetric' and nearest_mode '", nearest_mode,
-                  "' is not currently supported on the HTP backend.");
+    if (is_npu_backend) {
+      // QNN only supports the following nearest_mode values on HTP:
+      // - "round_prefer_floor" via QNN's Resize operator
+      // - "floor" via QNN's ResizeNearestNeighbor operator
+      //
+      // QNN validation does not throw an error if unsupported nearest_mode values are used, so we have to
+      // catch them here. Otherwise, accuracy is significantly degraded.
+      ORT_RETURN_IF_NOT(nearest_mode == "round_prefer_floor" || nearest_mode == "floor",
+                        "QNN EP: Resize on the NPU does not support nearest_mode ", nearest_mode.c_str());
+
+      const bool use_resize_nn_op = nearest_mode == "floor";
+
+      // If HTP uses ResizeNearestNeighbor ("floor"), then the "pytorch_half_pixel" coordinate_transformation_mode
+      // is not supported.
+      ORT_RETURN_IF(use_resize_nn_op && transformation_mode == "pytorch_half_pixel",
+                    "QNN EP: Resize on the NPU does not support the combination of nearest_mode == 'floor' ",
+                    " and coordinate_transformation_mode == 'pytorch_half_pixel'.");
+
+      // QNN's ResizeNearestNeighbor requires rank 4 inputs.
+      ORT_RETURN_IF(use_resize_nn_op && input_rank != 4,
+                    "QNN EP: Resize on the NPU with nearest_mode == 'floor' requires an input with rank 4.");
+    }
   }
 
-  // Check that input shape has at least a rank of 3.
-  const auto& input_0 = node_unit.Inputs()[0];
-  std::vector<uint32_t> input_shape;
-  ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(input_0.node_arg, input_shape),
-                    "QNN EP: Cannot get shape for Resize input");
-  ORT_RETURN_IF(input_shape.size() < 3, "QNN EP: Resize input must have a rank >= 3.");
+  // Check that the input shape has at least a rank of 3 (and a max of 5 on HTP).
+  ORT_RETURN_IF(input_rank < 3 || (is_npu_backend && input_rank > 5),
+                "QNN EP: Resize input must have a rank >= 3. The maximum rank is 5 on the NPU.");
 
   const auto& output_0 = node_unit.Outputs()[0];
   std::vector<uint32_t> output_shape;
   ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(output_0.node_arg, output_shape),
                     "QNN EP: Cannot get shape for Resize output");
-  ORT_RETURN_IF(output_shape.size() < 3, "QNN EP: Resize output must have a rank >= 3.");
+
+  // Check that only the spatial dimensions (width, height) are resized. The batch_size (N) and channels (C) should
+  // be untouched. This code runs before layout transformation, so we know that the current layout is "channel first"
+  // (e.g., N, C, S1, S2, ..., SN), and that the minimum rank is 3.
+  assert(node_unit.Domain() != kMSInternalNHWCDomain);
+  ORT_RETURN_IF_NOT(input_shape[0] == output_shape[0] && input_shape[1] == output_shape[1],
+                    "QNN EP: Resize may only change the spatial dimensions.");
+
+  if (!is_npu_backend) {
+    ONNX_NAMESPACE::DataType input_data_type = input_0.node_arg.Type();
+    ORT_RETURN_IF(input_data_type != ONNX_NAMESPACE::Utils::DataTypeUtils::ToType("float"),
+                  "QNN EP: Data type ", input_data_type->c_str(),
+                  " is not supported for Resize operator in CPU backend.");
+  }
 
   return Status::OK();
 }
@@ -305,92 +236,34 @@ Status ResizeOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w
                                                     std::vector<std::string>&& input_names,
                                                     const logging::Logger& logger,
                                                     bool do_op_validation) const {
-  // The QNN Resize op does not currently work with the QNN cpu backend, but works with the HTP backend. Therefore, we
-  // currently use QNN's Resize op for quantized models and either ResizeBilinear or ResizeNearestNeighbor for
-  // non-quantized models. This requires separate handling for quantized models.
-  // TODO: Use only Resize once QNN's Resize op works in the QNN cpu backend.
-  bool is_quantized_node = NodeUnit::Type::QDQGroup == node_unit.UnitType();
-  return is_quantized_node ? ProcessQDQOpAttrsAndOutputs(qnn_model_wrapper, node_unit, std::move(input_names), logger, do_op_validation) : ProcessOpAttrsAndOutputs(qnn_model_wrapper, node_unit, std::move(input_names), logger, do_op_validation);
-}
-
-Status ResizeOpBuilder::ProcessOpAttrsAndOutputs(QnnModelWrapper& qnn_model_wrapper,
-                                                 const NodeUnit& node_unit,
-                                                 std::vector<std::string>&& input_names,
-                                                 const logging::Logger& logger,
-                                                 bool do_op_validation) const {
-  ORT_UNUSED_PARAMETER(logger);
-  NodeAttrHelper node_helper(node_unit);
-  const std::string resize_mode = GetOnnxAttr(node_helper, onnx_mode_attr);
-  std::string qnn_node_type = "ResizeNearestNeighbor";
-  if ("linear" == resize_mode) {
-    qnn_node_type = "ResizeBilinear";
-  }
-
-  const std::string coordinate_mode = GetOnnxAttr(node_helper, onnx_coord_transf_mode_attr);
-
-  Qnn_Scalar_t qnn_align_corners = QNN_SCALAR_INIT;
-  qnn_align_corners.dataType = QNN_DATATYPE_BOOL_8;
-  qnn_align_corners.bool8Value = static_cast<uint8_t>(0);
-
-  Qnn_Scalar_t qnn_half_pixel = QNN_SCALAR_INIT;
-  qnn_half_pixel.dataType = QNN_DATATYPE_BOOL_8;
-  qnn_half_pixel.bool8Value = static_cast<uint8_t>(0);
-
-  if ("align_corners" == coordinate_mode) {
-    qnn_align_corners.bool8Value = static_cast<uint8_t>(1);
-  } else if ("half_pixel" == coordinate_mode) {
-    qnn_half_pixel.bool8Value = static_cast<uint8_t>(1);
-  }
-  QnnParamWrapper qnn_align_corners_param(node_unit.Index(), node_unit.Name(),
-                                          QNN_OP_RESIZE_BILINEAR_PARAM_ALIGN_CORNERS, qnn_align_corners);
-  QnnParamWrapper qnn_half_pixel_param(node_unit.Index(), node_unit.Name(),
-                                       QNN_OP_RESIZE_BILINEAR_PARAM_HALF_PIXEL_CENTERS, qnn_half_pixel);
-
-  std::vector<std::string> param_tensor_names;
-  param_tensor_names.push_back(qnn_align_corners_param.GetParamTensorName());
-  qnn_model_wrapper.AddParamWrapper(std::move(qnn_align_corners_param));
-  param_tensor_names.push_back(qnn_half_pixel_param.GetParamTensorName());
-  qnn_model_wrapper.AddParamWrapper(std::move(qnn_half_pixel_param));
-
-  return ProcessOutputs(qnn_model_wrapper, node_unit, std::move(input_names), std::move(param_tensor_names),
-                        logger, do_op_validation, qnn_node_type);
-}
-
-Status ResizeOpBuilder::ProcessQDQOpAttrsAndOutputs(QnnModelWrapper& qnn_model_wrapper,
-                                                    const NodeUnit& node_unit,
-                                                    std::vector<std::string>&& input_names,
-                                                    const logging::Logger& logger,
-                                                    bool do_op_validation) const {
   std::vector<std::string> param_tensor_names;
   NodeAttrHelper node_helper(node_unit);
 
   const std::string interp_mode = GetOnnxAttr(node_helper, onnx_mode_attr);
   const std::string transformation_mode = GetOnnxAttr(node_helper, onnx_coord_transf_mode_attr);
+  const std::string nearest_mode = GetOnnxAttr(node_helper, onnx_nearest_mode_attr);
+  const bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType());
   std::string qnn_op_type = "Resize";
 
-  // Handle Resize with {mode: "nearest", coordinate_transformation_mode: "asymmetric"} uniquely.
-  // QNN's ONNX converter tool translates this configuration (regardless of rounding mode)
-  // to QNN's ResizeNearestNeighbor with {align_corners: 0, half_pixel: 0}.
-  //
-  // NOTE: This is only accurate if the rounding mode is "floor". Need to investigate how to handle
-  // other rounding modes with Qualcomm. Ideally, we would use QNN's Resize operator, but it doesn't support
-  // the "asymmetric" coordinate transformation mode on HTP.
-  if (interp_mode == "nearest" && transformation_mode == "asymmetric") {
+  // Translate Resize with {mode: "nearest", nearest_mode: "floor", coordinate_transformation_mode: XXX} to
+  // QNN's ResizeNearestNeighbor operator on the HTP backend. This combination of parameters is not supported on HTP
+  // via QNN's Resize operator. Note that QNN's ResizeNearestNeighbor operator always uses "floor" rounding.
+  if (is_npu_backend && interp_mode == "nearest" && nearest_mode == "floor") {
     qnn_op_type = "ResizeNearestNeighbor";
 
-    // Set parameter 'align_corners' to 0
+    // Parameter 'align_corners'
     Qnn_Scalar_t qnn_align_corners = QNN_SCALAR_INIT;
     qnn_align_corners.dataType = QNN_DATATYPE_BOOL_8;
-    qnn_align_corners.bool8Value = static_cast<uint8_t>(0);
+    qnn_align_corners.bool8Value = static_cast<uint8_t>(transformation_mode == "align_corners");
     QnnParamWrapper qnn_align_corners_param(node_unit.Index(), node_unit.Name(),
                                             QNN_OP_RESIZE_BILINEAR_PARAM_ALIGN_CORNERS, qnn_align_corners);
     param_tensor_names.push_back(qnn_align_corners_param.GetParamTensorName());
     qnn_model_wrapper.AddParamWrapper(std::move(qnn_align_corners_param));
 
-    // Set parameter 'half_pixel_centers' to 0
+    // Parameter 'half_pixel_centers'
     Qnn_Scalar_t qnn_half_pixel = QNN_SCALAR_INIT;
     qnn_half_pixel.dataType = QNN_DATATYPE_BOOL_8;
-    qnn_half_pixel.bool8Value = static_cast<uint8_t>(0);
+    qnn_half_pixel.bool8Value = static_cast<uint8_t>(transformation_mode == "half_pixel");
     QnnParamWrapper qnn_half_pixel_param(node_unit.Index(), node_unit.Name(),
                                          QNN_OP_RESIZE_BILINEAR_PARAM_HALF_PIXEL_CENTERS, qnn_half_pixel);
     param_tensor_names.push_back(qnn_half_pixel_param.GetParamTensorName());
@@ -399,11 +272,12 @@ Status ResizeOpBuilder::ProcessQDQOpAttrsAndOutputs(QnnModelWrapper& qnn_model_w
     // Parameter 'transformation_mode'
     Qnn_Scalar_t qnn_transformation_mode = QNN_SCALAR_INIT;
     qnn_transformation_mode.dataType = QNN_DATATYPE_UINT_32;
-    ORT_RETURN_IF_ERROR(GetQnnModeFromString(supported_coord_transf_modes, transformation_mode,
-                                             "coordinate_transformation_mode", qnn_transformation_mode.uint32Value));
+    ORT_RETURN_IF_ERROR(GetQnnModeValFromOnnxString(supported_coord_transf_modes, transformation_mode,
+                                                    "coordinate_transformation_mode",
+                                                    qnn_transformation_mode.uint32Value));
 
-    QnnParamWrapper qnn_transformation_mode_param(node_unit.Index(), node_unit.Name(), QNN_OP_RESIZE_PARAM_TRANSFORMATION_MODE,
-                                                  qnn_transformation_mode);
+    QnnParamWrapper qnn_transformation_mode_param(node_unit.Index(), node_unit.Name(),
+                                                  QNN_OP_RESIZE_PARAM_TRANSFORMATION_MODE, qnn_transformation_mode);
     param_tensor_names.push_back(qnn_transformation_mode_param.GetParamTensorName());
     qnn_model_wrapper.AddParamWrapper(std::move(qnn_transformation_mode_param));
 
@@ -420,7 +294,7 @@ Status ResizeOpBuilder::ProcessQDQOpAttrsAndOutputs(QnnModelWrapper& qnn_model_w
     // Parameter 'interpolation_mode'
     Qnn_Scalar_t qnn_interp_mode = QNN_SCALAR_INIT;
     qnn_interp_mode.dataType = QNN_DATATYPE_UINT_32;
-    ORT_RETURN_IF_ERROR(GetQnnModeFromString(supported_modes, interp_mode, "mode", qnn_interp_mode.uint32Value));
+    ORT_RETURN_IF_ERROR(GetQnnModeValFromOnnxString(supported_modes, interp_mode, "mode", qnn_interp_mode.uint32Value));
 
     QnnParamWrapper qnn_interp_mode_param(node_unit.Index(), node_unit.Name(), QNN_OP_RESIZE_PARAM_INTERPOLATION_MODE,
                                           qnn_interp_mode);
@@ -429,11 +303,10 @@ Status ResizeOpBuilder::ProcessQDQOpAttrsAndOutputs(QnnModelWrapper& qnn_model_w
 
     // Parameter 'nearest_mode'. Processed only when 'interpolation_mode' is NEAREST(0).
     if (qnn_interp_mode.uint32Value == 0) {
-      const std::string nearest_mode = GetOnnxAttr(node_helper, onnx_nearest_mode_attr);
       Qnn_Scalar_t qnn_nearest_mode = QNN_SCALAR_INIT;
       qnn_nearest_mode.dataType = QNN_DATATYPE_UINT_32;
-      ORT_RETURN_IF_ERROR(GetQnnModeFromString(supported_nearest_modes, nearest_mode, "nearest_mode",
-                                               qnn_nearest_mode.uint32Value));
+      ORT_RETURN_IF_ERROR(GetQnnModeValFromOnnxString(supported_nearest_modes, nearest_mode, "nearest_mode",
+                                                      qnn_nearest_mode.uint32Value));
 
       QnnParamWrapper qnn_nearest_mode_param(node_unit.Index(), node_unit.Name(), QNN_OP_RESIZE_PARAM_NEAREST_MODE,
                                              qnn_nearest_mode);
@@ -446,6 +319,19 @@ Status ResizeOpBuilder::ProcessQDQOpAttrsAndOutputs(QnnModelWrapper& qnn_model_w
                         logger, do_op_validation, qnn_op_type);
 }
 
+Status ResizeOpBuilder::OverrideOutputQuantParam(QnnModelWrapper& qnn_model_wrapper,
+                                                 const NodeUnit& node_unit,
+                                                 const logging::Logger& logger,
+                                                 const std::vector<std::string>& input_names,
+                                                 size_t output_index,
+                                                 Qnn_DataType_t qnn_data_type,
+                                                 Qnn_QuantizeParams_t& quant_param) const {
+  // Force Resize op's output to use the same quantization parameters as the input if nearly equal.
+  // This helps the HTP backend employ certain optimizations.
+  return SetOutputQParamEqualToInputIfNearlyEqual(qnn_model_wrapper, node_unit, logger, input_names,
+                                                  0 /*input_index*/, output_index, qnn_data_type, quant_param);
+}
+
 void CreateResizeOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
   op_registrations.AddOpBuilder(op_type, std::make_unique<ResizeOpBuilder>());
 }
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc
index 8081033c35618..dd678ab5467ed 100644
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc
@@ -22,14 +22,26 @@ class SimpleOpBuilder : public BaseOpBuilder {
   ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(SimpleOpBuilder);
 
  protected:
+  Status ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
+                       const NodeUnit& node_unit,
+                       const logging::Logger& logger,
+                       std::vector<std::string>& input_names,
+                       bool do_op_validation) const override ORT_MUST_USE_RESULT;
   Status ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
                                      const NodeUnit& node_unit,
                                      std::vector<std::string>&& input_names,
                                      const logging::Logger& logger,
                                      bool do_op_validation) const override ORT_MUST_USE_RESULT;
+  Status OverrideOutputQuantParam(QnnModelWrapper& qnn_model_wrapper,
+                                  const NodeUnit& node_unit,
+                                  const logging::Logger& logger,
+                                  const std::vector<std::string>& input_names,
+                                  size_t output_index,
+                                  Qnn_DataType_t qnn_data_type,
+                                  Qnn_QuantizeParams_t& quant_param) const override ORT_MUST_USE_RESULT;
 
  private:
-  Status ExplicitOpCheck(const QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const;
+  Status ExplicitOpCheck(const NodeUnit& node_unit) const;
   Status ProcessSigmoidOrTanhOutput(QnnModelWrapper& qnn_model_wrapper,
                                     const NodeUnit& node_unit,
                                     std::vector<std::string>&& input_names,
@@ -41,30 +53,93 @@ class SimpleOpBuilder : public BaseOpBuilder {
   static constexpr std::array<std::string_view, 3> gridsample_supported_padding_modes = {"zeros", "border", "reflection"};
 };
 
-static int32_t GetDefaultAxisAttribute(const std::string& op_type, int opset_version) {
-  if (op_type == "Softmax" || op_type == "LogSoftmax") {
-    // Default axis changed from 1 to -1 in opset 13.
-    return opset_version < 13 ? 1 : -1;
-  }
-
-  return 0;
+// Move to qnn_utils if it's re-usable
+Status InsertConvertOp(QnnModelWrapper& qnn_model_wrapper,
+                       const std::string& convert_input_name,
+                       const std::string& convert_output_name,
+                       Qnn_DataType_t input_qnn_data_type,
+                       Qnn_DataType_t output_qnn_data_type,
+                       int32_t input_offset,
+                       float input_scale,
+                       const std::vector<uint32_t>& output_shape,
+                       bool do_op_validation) {
+  // Assume input is already handled.
+  float qmin = 0.0f;
+  float qmax = 255.0f;
+  ORT_RETURN_IF_ERROR(qnn::utils::GetQminQmax(input_qnn_data_type, qmin, qmax));
+  double value_min = qnn::utils::Dequantize(input_offset, input_scale, qmin);
+  double value_max = qnn::utils::Dequantize(input_offset, input_scale, qmax);
+
+  Qnn_QuantizeParams_t convert_output_quant_param = QNN_QUANTIZE_PARAMS_INIT;
+  convert_output_quant_param.encodingDefinition = QNN_DEFINITION_DEFINED;
+  convert_output_quant_param.quantizationEncoding = QNN_QUANTIZATION_ENCODING_SCALE_OFFSET;
+  ORT_RETURN_IF_ERROR(qnn::utils::GetQuantParams(static_cast<float>(value_min),
+                                                 static_cast<float>(value_max),
+                                                 output_qnn_data_type,
+                                                 convert_output_quant_param.scaleOffsetEncoding.scale,
+                                                 convert_output_quant_param.scaleOffsetEncoding.offset));
+
+  std::vector<uint32_t> output_shape_copy = output_shape;
+  QnnTensorWrapper convert_output_tensorwrapper(convert_output_name,
+                                                QNN_TENSOR_TYPE_NATIVE,
+                                                output_qnn_data_type,
+                                                convert_output_quant_param,
+                                                std::move(output_shape_copy));
+  ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(convert_output_tensorwrapper)), "Failed to add tensor.");
+
+  ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(convert_output_name,
+                                                    QNN_OP_PACKAGE_NAME_QTI_AISW,
+                                                    "Convert",
+                                                    {convert_input_name},
+                                                    {convert_output_name},
+                                                    {},
+                                                    do_op_validation),
+                    "Failed to add node.");
+  return Status::OK();
 }
 
-Status SimpleOpBuilder::ExplicitOpCheck(const QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const {
+Status SimpleOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
+                                      const NodeUnit& node_unit,
+                                      const logging::Logger& logger,
+                                      std::vector<std::string>& input_names,
+                                      bool do_op_validation) const {
   const std::string& op_type = node_unit.OpType();
+  ORT_RETURN_IF_ERROR(BaseOpBuilder::ProcessInputs(qnn_model_wrapper, node_unit, logger, input_names, do_op_validation));
 
-  // QNN Softmax and LogSoftmax only support an axis value equal to input_rank - 1 (i.e., same as -1).
-  if (op_type == "Softmax" || op_type == "LogSoftmax") {
-    int32_t axis = GetDefaultAxisAttribute(op_type, node_unit.SinceVersion());
-    Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
-    ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, axis));
-    std::vector<uint32_t> input_shape;
-    ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(node_unit.Inputs()[0].node_arg, input_shape),
-                      "QNN EP: Cannot get shape for Softmax input");
-    ORT_RETURN_IF(axis != static_cast<int32_t>(input_shape.size() - 1),
-                  "QNN ", op_type.c_str(), " only supports an `axis` attribute equal to input_rank-1 (or -1)");
+  if (op_type == "MatMul") {
+    const auto& inputs = node_unit.Inputs();
+    TensorInfo input0_info = {};
+    TensorInfo input1_info = {};
+    ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[0], input0_info));
+    ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[1], input1_info));
+    // Need to insert Convert op if both inputs are dynamic inputs and are ufixed_16
+    if (!input0_info.is_initializer && !input1_info.is_initializer &&
+        input0_info.qnn_data_type == input1_info.qnn_data_type &&
+        input0_info.qnn_data_type == QNN_DATATYPE_UFIXED_POINT_16) {
+      // insert Convert op after input1
+      std::string convert_input_name = input_names.back();
+      input_names.pop_back();
+      const std::string& matmul_output_name = node_unit.Outputs()[0].node_arg.Name();
+      std::string convert_output_name = convert_input_name + "_convert_" + matmul_output_name;
+      ORT_RETURN_IF_ERROR(InsertConvertOp(qnn_model_wrapper,
+                                          convert_input_name,
+                                          convert_output_name,
+                                          input1_info.qnn_data_type,
+                                          QNN_DATATYPE_UFIXED_POINT_8,
+                                          input1_info.quant_param.scaleOffsetEncoding.offset,
+                                          input1_info.quant_param.scaleOffsetEncoding.scale,
+                                          input1_info.shape,
+                                          do_op_validation));
+      input_names.push_back(convert_output_name);
+    }
   }
 
+  return Status::OK();
+}
+
+Status SimpleOpBuilder::ExplicitOpCheck(const NodeUnit& node_unit) const {
+  const std::string& op_type = node_unit.OpType();
+
   if (op_type == "GridSample") {
     NodeAttrHelper node_helper(node_unit);
     std::string mode = node_helper.Get("mode", "linear");
@@ -231,17 +306,28 @@ Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w
   const std::string& op_type = node_unit.OpType();
 
   if (do_op_validation) {
-    ORT_RETURN_IF_ERROR(ExplicitOpCheck(qnn_model_wrapper, node_unit));
+    ORT_RETURN_IF_ERROR(ExplicitOpCheck(node_unit));
     // Skip the op validation for DepthToSpace & SpaceToDepth if it's not NHWC data layout
     if (node_unit.Domain() != kMSInternalNHWCDomain && (op_type == "DepthToSpace" || op_type == "SpaceToDepth" || op_type == "GridSample")) {
       return Status::OK();
     }
+
+    // Explicitly skip the Op validation for Q & DQ node with 5D because of QNN bug.
+    // TODO (hecli), remove once QNN v2.17 is ready
+    if (op_type == "QuantizeLinear" || op_type == "DequantizeLinear") {
+      std::vector<uint32_t> input_shape;
+      ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(node_unit.Inputs()[0].node_arg, input_shape),
+                        "QNN EP: Cannot get input shape");
+      if (input_shape.size() == 5) {
+        return Status::OK();
+      }
+    }
   }
 
   std::vector<std::string> param_tensor_names;
   // Add attribute
-  if (op_type == "LogSoftmax" || op_type == "Softmax" || op_type == "Concat") {
-    int32_t default_axis = GetDefaultAxisAttribute(op_type, node_unit.SinceVersion());
+  if (op_type == "Concat") {
+    int32_t default_axis = 0;
     Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
     ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, default_axis));
     QnnParamWrapper axis_param(node_unit.Index(), node_unit.Name(), QNN_OP_SOFTMAX_PARAM_AXIS, axis_qnn_scalar);
@@ -249,6 +335,19 @@ Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w
     qnn_model_wrapper.AddParamWrapper(std::move(axis_param));
   }
 
+  if (op_type == "LpNormalization") {
+    int32_t default_axis = -1;
+    Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
+    ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, default_axis));
+    QnnParamWrapper axis_param(node_unit.Index(), node_unit.Name(), QNN_OP_L2_NORM_PARAM_AXIS, axis_qnn_scalar);
+    param_tensor_names.push_back(axis_param.GetParamTensorName());
+    qnn_model_wrapper.AddParamWrapper(std::move(axis_param));
+
+    NodeAttrHelper node_helper(node_unit);
+    int64_t norm_p_order = node_helper.Get("p", static_cast<int64_t>(2));
+    ORT_RETURN_IF(norm_p_order != 2, "QNN EP only supports LpNormalization with 'p' attribute equal to 2.");
+  }
+
   if (op_type == "MatMul") {
     Qnn_Scalar_t scalar_param = QNN_SCALAR_INIT;
     scalar_param.dataType = QNN_DATATYPE_BOOL_8;
@@ -285,16 +384,6 @@ Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w
     ORT_RETURN_IF_ERROR(ProcessGridSampleAttributes(qnn_model_wrapper, node_unit, param_tensor_names));
   }
 
-  if (op_type == "Sigmoid" || op_type == "Tanh") {
-    // QNN requires 16-bit QDQ Sigmoid and Tanh to use specific output scale and zero-point values
-    // regardless of floating-point range.
-    return ProcessSigmoidOrTanhOutput(qnn_model_wrapper,
-                                      node_unit,
-                                      std::move(input_names),
-                                      std::move(param_tensor_names),
-                                      logger, do_op_validation);
-  }
-
   return ProcessOutputs(qnn_model_wrapper, node_unit,
                         std::move(input_names),
                         std::move(param_tensor_names),
@@ -348,57 +437,43 @@ static bool OverrideQuantParams(const std::string& op_type, Qnn_DataType_t qnn_d
   return quant_params.offset != orig_offset || quant_params.scale != orig_scale;
 }
 
-/**
- * Processes the output for Sigmoid or Tanh operators and creates the corresponding QNN operator.
- * These operator types are handled separately because QNN requires 16-bit QDQ Sigmoid and Tanh operators to use
- * specific scale and zero-point values regardless of floating-point range.
- *
- * \param qnn_model_wrapper The QNN model wrapper object.
- * \param node_unit The QDQ node unit for the Sigmoid or Tanh node.
- * \param input_names List of input names.
- * \param param_tensor_names List of param tensor names.
- * \param logger Logger used to report information.
- * \param do_op_validation True if the new QNN node should be validated.
- */
-Status SimpleOpBuilder::ProcessSigmoidOrTanhOutput(QnnModelWrapper& qnn_model_wrapper,
-                                                   const NodeUnit& node_unit,
-                                                   std::vector<std::string>&& input_names,
-                                                   std::vector<std::string>&& param_tensor_names,
-                                                   const logging::Logger& logger,
-                                                   bool do_op_validation) const {
+Status SimpleOpBuilder::OverrideOutputQuantParam(QnnModelWrapper& qnn_model_wrapper,
+                                                 const NodeUnit& node_unit,
+                                                 const logging::Logger& logger,
+                                                 const std::vector<std::string>& input_names,
+                                                 size_t output_index,
+                                                 Qnn_DataType_t qnn_data_type,
+                                                 Qnn_QuantizeParams_t& quant_param) const {
+  ORT_UNUSED_PARAMETER(input_names);
   const std::string& op_type = node_unit.OpType();
-  const auto& output = node_unit.Outputs()[0];
-  const std::string& output_name = output.node_arg.Name();
-
-  OnnxInputInfo output_info = {};
-
-  // TODO(adrianlizarraga): Rename GetOnnxInputInfo() since it can be used for outputs as well.
-  ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(output, output_info));
 
-  if (output_info.quant_param.quantizationEncoding == QNN_QUANTIZATION_ENCODING_SCALE_OFFSET) {
-    if (OverrideQuantParams(op_type, output_info.qnn_data_type, output_info.quant_param.scaleOffsetEncoding)) {
-      const int32_t offset = output_info.quant_param.scaleOffsetEncoding.offset;
-      const float scale = output_info.quant_param.scaleOffsetEncoding.scale;
-
-      LOGS(logger, VERBOSE) << "QNN requires that 16-bit quantized " << op_type << " operators use offset/scale values "
-                            << "of <" << offset << ", " << scale << ">. QNN EP will override the original values.";
+  // Override output quantization parameters for uint16 QDQ Sigmoid or Tanh.
+  // QNN requires 16-bit QDQ Sigmoid and Tanh to use specific output scale and zero-point values
+  // regardless of floating-point range.
+  if (op_type == "Sigmoid" || op_type == "Tanh") {
+    const auto& outputs = node_unit.Outputs();
+    ORT_RETURN_IF_NOT(output_index < outputs.size(),
+                      "Invalid output index in OverrideOutputQuantParam for op ", op_type.c_str());
+
+    const auto& output = node_unit.Outputs()[0];
+    const std::string& output_name = output.node_arg.Name();
+
+    if (quant_param.quantizationEncoding == QNN_QUANTIZATION_ENCODING_SCALE_OFFSET) {
+      if (OverrideQuantParams(op_type, qnn_data_type, quant_param.scaleOffsetEncoding)) {
+        const int32_t offset = quant_param.scaleOffsetEncoding.offset;
+        const float scale = quant_param.scaleOffsetEncoding.scale;
+
+        LOGS(logger, VERBOSE) << "QNN requires that 16-bit quantized " << op_type
+                              << " operators use offset/scale values "
+                              << "of <" << offset << ", " << scale
+                              << ">. QNN EP will override the original values for output " << output_name;
+        ORT_RETURN_IF(qnn_model_wrapper.IsQnnTensorWrapperExist(output_name),
+                      "QNN EP is unable to override output quantization parameters for ", op_type.c_str(),
+                      " operator. Node name: ", node_unit.Name().c_str(), ", output name: ", output_name.c_str());
+      }
     }
   }
 
-  Qnn_TensorType_t tensor_type = qnn_model_wrapper.IsGraphOutput(output_name) ? QNN_TENSOR_TYPE_APP_READ
-                                                                              : QNN_TENSOR_TYPE_NATIVE;
-  QnnTensorWrapper output_tensorwrapper(output_name, tensor_type, output_info.qnn_data_type, output_info.quant_param,
-                                        std::move(output_info.shape));
-  ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(output_tensorwrapper)), "Failed to add tensor.");
-  ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(GetNodeName(node_unit),
-                                                    QNN_OP_PACKAGE_NAME_QTI_AISW,
-                                                    GetQnnOpType(op_type),
-                                                    std::move(input_names),
-                                                    {output_name},
-                                                    std::move(param_tensor_names),
-                                                    do_op_validation),
-                    "Failed to add node.");
-
   return Status::OK();
 }
 
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc
index 15fc55b5b59b6..88c94581a8887 100644
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc
@@ -8,6 +8,8 @@
 #include "core/providers/qnn/builder/qnn_utils.h"
 #include "core/providers/cpu/tensor/slice_helper.h"
 
+#include "core/framework/tensorprotoutils.h"
+
 #include "base_op_builder.h"
 
 namespace onnxruntime {
@@ -37,16 +39,13 @@ class SliceOpBuilder : public BaseOpBuilder {
                             TensorShapeVector& raw_starts,
                             TensorShapeVector& raw_ends,
                             TensorShapeVector& raw_axes) const;
-  typedef struct {
-    int32_t begin, end, stride;
-  } Range;
-  mutable std::vector<Range> ranges_;
 };
 
 Status SliceOpBuilder::ExplictOpCheck(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const {
   size_t input_count = node_unit.Inputs().size();
-  // Op set 9 only has 1 input with starts, ends, axes attribute
-  // Op set > 9, starts, ends, axes are from node input
+
+  // Opset < 10: Only has 1 data input. The starts, ends, and axes values are attributes.
+  // Opset >= 10: Everything is an input. The data, starts, and ends inputs are required.
   if (input_count > 1) {
     // Skip the first input. All other input need to be initializer
     for (size_t i = 1; i < input_count; i++) {
@@ -75,6 +74,46 @@ void SliceOpBuilder::GetDataFromAttribute(const NodeUnit& node_unit,
   }
 }
 
+// Gets the data from initializer inputs (e.g., starts, ends, axes, or steps) as a TensorShapeVector.
+static Status GetInitializerInputData(const NodeUnitIODef& input, const QnnModelWrapper& qnn_model_wrapper,
+                                      TensorShapeVector& output) {
+  const auto& input_name = input.node_arg.Name();
+  const bool is_initializer = qnn_model_wrapper.IsInitializerInput(input_name);
+  ORT_RETURN_IF_NOT(is_initializer, "Expected input ", input_name.c_str(), " to be an initializer.");
+  gsl::not_null<const ONNX_NAMESPACE::TensorProto*> initializer_proto = qnn_model_wrapper
+                                                                            .GetInitializerTensors()
+                                                                            .at(input_name);
+  ORT_RETURN_IF_NOT(initializer_proto->has_data_type(), "Expected initializer ", input_name.c_str(),
+                    " to have a proto data type.");
+
+  // Create empty Tensor.
+  const auto* dtype = DataTypeImpl::TensorTypeFromONNXEnum(initializer_proto->data_type())->GetElementType();
+  TensorShape shape = onnxruntime::utils::GetTensorShapeFromTensorProto(*initializer_proto);
+  Tensor tensor(dtype, shape, std::make_shared<CPUAllocator>());
+
+  // Deserialize initializer into Tensor.
+  onnxruntime::PathString model_path = qnn_model_wrapper.GetGraphViewer().ModelPath().ToPathString();
+  const ORTCHAR_T* model_path_str = model_path.empty() ? nullptr : model_path.c_str();
+  ORT_RETURN_IF_ERROR(onnxruntime::utils::TensorProtoToTensor(onnxruntime::Env::Default(), model_path_str,
+                                                              *initializer_proto, tensor));
+
+  Status status;
+
+  // Copy Tensor of int32_t or int64_t elems into output (int64_ts).
+  if (tensor.IsDataType<int64_t>()) {
+    gsl::span<const int64_t> tensor_elems = tensor.DataAsSpan<int64_t>();
+    output.insert(output.end(), tensor_elems.begin(), tensor_elems.end());
+  } else if (tensor.IsDataType<int32_t>()) {
+    gsl::span<const int32_t> tensor_elems = tensor.DataAsSpan<int32_t>();
+    output.insert(output.end(), tensor_elems.begin(), tensor_elems.end());
+  } else {
+    status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Data type ", DataTypeImpl::ToString(dtype),
+                             " is not supported for Slice initializer input ", input.node_arg.Name().c_str());
+  }
+
+  return status;
+}
+
 // Note: For ONNX Slice operation the expected number of inputs is between 3 and 5
 Status SliceOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
                                      const NodeUnit& node_unit,
@@ -84,123 +123,71 @@ Status SliceOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
   if (do_op_validation) {
     ORT_RETURN_IF_ERROR(ExplictOpCheck(qnn_model_wrapper, node_unit));
   }
-  Qnn_DataType_t qnn_data_type = QNN_DATATYPE_FLOAT_32;
+
+  // Only need to add input 0. The other inputs (if any) contain static data that is passed to QNN APIs
+  // as static parameters.
+  return ProcessInput(qnn_model_wrapper, node_unit.Inputs()[0], logger, input_names);
+}
+
+Status SliceOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
+                                                   const NodeUnit& node_unit,
+                                                   std::vector<std::string>&& input_names,
+                                                   const logging::Logger& logger,
+                                                   bool do_op_validation) const {
+  // Extract starts, ends, axes, and steps data from attributes (opset < 10) or initializer inputs (opset >= 10).
   TensorShapeVector raw_starts;
   TensorShapeVector raw_ends;
   TensorShapeVector raw_axes;
   TensorShapeVector raw_steps;
-  std::vector<uint32_t> input0_shape;
 
-  auto inputs = node_unit.Inputs();
-  auto input_count = inputs.size();
-  // Opset 9, only 1 input, starts, ends, axes are in attribute
-  if (1 == input_count) {
+  const auto& inputs = node_unit.Inputs();
+  const size_t input_count = inputs.size();
+
+  // Opset 9 only has 1 input. The starts, ends, axes values are attributes.
+  if (node_unit.SinceVersion() < 10) {
     GetDataFromAttribute(node_unit, raw_starts, raw_ends, raw_axes);
-  }
+  } else {
+    constexpr size_t starts_index = 1;
+    constexpr size_t ends_index = 2;
+    constexpr size_t axes_index = 3;
+    constexpr size_t steps_index = 4;
 
-  for (size_t input_i = 0; input_i < input_count; ++input_i) {
-    auto& input_name = inputs[input_i].node_arg.Name();
-    if (input_name.empty()) {
-      // Ignore unspecified/unused optional input
-      continue;
-    }
-    if (qnn_model_wrapper.IsQnnTensorWrapperExist(input_name)) {
-      LOGS(logger, VERBOSE) << "Tensor already added or the input is not named, skip it: " << input_name;
-      input_names.push_back(input_name);
-      ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(inputs[input_i].node_arg, input0_shape), "Cannot get shape");
-      continue;
+    // Starts input (required).
+    ORT_RETURN_IF_ERROR(GetInitializerInputData(inputs[starts_index], qnn_model_wrapper, raw_starts));
+
+    // Ends input (required).
+    ORT_RETURN_IF_ERROR(GetInitializerInputData(inputs[ends_index], qnn_model_wrapper, raw_ends));
+
+    // Axes input (optional).
+    if (input_count > axes_index && !inputs[axes_index].node_arg.Name().empty()) {
+      ORT_RETURN_IF_ERROR(GetInitializerInputData(inputs[axes_index], qnn_model_wrapper, raw_axes));
     }
 
-    bool is_quantized_tensor = inputs[input_i].quant_param.has_value();
-    const auto* type_proto = inputs[input_i].node_arg.TypeAsProto();
-    ORT_RETURN_IF_ERROR(utils::GetQnnDataType(is_quantized_tensor, type_proto, qnn_data_type));
-
-    std::vector<uint32_t> input_shape;
-    ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(inputs[input_i].node_arg, input_shape), "Cannot get shape");
-
-    Qnn_QuantizeParams_t quantize_param = QNN_QUANTIZE_PARAMS_INIT;
-    utils::InitializeQuantizeParam(quantize_param, is_quantized_tensor);
-    ORT_RETURN_IF_NOT(qnn_model_wrapper.ProcessQuantizationParameter(inputs[input_i].quant_param,
-                                                                     quantize_param.scaleOffsetEncoding.scale,
-                                                                     quantize_param.scaleOffsetEncoding.offset),
-                      "Cannot get quantization parameter");
-
-    std::vector<uint8_t> unpacked_tensor;
-    bool is_initializer_input = qnn_model_wrapper.IsInitializerInput(input_name);
-    if (is_initializer_input) {
-      const auto& input_tensor = qnn_model_wrapper.GetInitializerTensors().at(input_name);
-      ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input_tensor, unpacked_tensor));
-      size_t tensor_byte_size = unpacked_tensor.size();
-      const auto data_type = input_tensor->data_type();
-      TensorShapeVector data;
-      if (data_type == ONNX_NAMESPACE::TensorProto_DataType_INT64) {
-        const int64_t* tensor_data = reinterpret_cast<const int64_t*>(unpacked_tensor.data());
-        size_t size = tensor_byte_size / sizeof(int64_t);
-        data.insert(data.end(), tensor_data, tensor_data + size);
-      } else if (data_type == ONNX_NAMESPACE::TensorProto_DataType_INT32) {
-        const int32_t* tensor_data = reinterpret_cast<const int32_t*>(unpacked_tensor.data());
-        size_t size = tensor_byte_size / sizeof(int32_t);
-        data.insert(data.end(), tensor_data, tensor_data + size);
-      } else {
-        return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
-                               "Data type for starts and ends inputs' is not supported in this build. Got ",
-                               data_type);
-      }
-      if (input_i == 0) {
-        // Do nothing!
-      } else if (input_i == 1) {
-        // Starts
-        raw_starts = data;
-        continue;
-      } else if (input_i == 2) {
-        // Ends
-        raw_ends = data;
-        continue;
-      } else if (input_i == 3) {
-        // Axes
-        raw_axes = data;
-        continue;
-      } else if (input_i == 4) {
-        // Steps
-        raw_steps = data;
-        continue;
-      }
+    // Steps input (optional).
+    if (input_count > steps_index && !inputs[steps_index].node_arg.Name().empty()) {
+      ORT_RETURN_IF_ERROR(GetInitializerInputData(inputs[steps_index], qnn_model_wrapper, raw_steps));
     }
-    input0_shape = input_shape;
-
-    input_names.push_back(input_name);
-    Qnn_TensorType_t tensor_type = GetInputTensorType(qnn_model_wrapper, input_name);
-    Qnn_QuantizeParams_t quantize_params = QNN_QUANTIZE_PARAMS_INIT;
-    QnnTensorWrapper input_tensorwrapper(input_name, tensor_type, qnn_data_type, quantize_params,
-                                         std::move(input_shape), std::move(unpacked_tensor));
-    ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(input_tensorwrapper)), "Failed to add tensor.");
   }
+
+  std::vector<uint32_t> input0_shape;
+  ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(inputs[0].node_arg, input0_shape),
+                    "Cannot get shape for Slice input 0.");
+
   TensorShapeVector input_dimensions(input0_shape.cbegin(), input0_shape.cend());
   onnxruntime::SliceOp::PrepareForComputeMetadata compute_metadata(input_dimensions);
-  ORT_RETURN_IF_ERROR(
-      SliceOp::PrepareForComputeHelper(raw_starts, raw_ends, raw_axes, raw_steps, compute_metadata));
-  ranges_.clear();
-  for (size_t i = 0; i < input_dimensions.size(); i++) {
-    auto start = static_cast<int32_t>(compute_metadata.starts_[i]);
-    auto end = static_cast<int32_t>(compute_metadata.ends_[i]);
-    auto step = static_cast<int32_t>(compute_metadata.steps_[i]);
-    ranges_.push_back(Range({start, end, step}));
-  }
-  return Status::OK();
-}
+  ORT_RETURN_IF_ERROR(SliceOp::PrepareForComputeHelper(raw_starts, raw_ends, raw_axes, raw_steps, compute_metadata));
 
-Status SliceOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
-                                                   const NodeUnit& node_unit,
-                                                   std::vector<std::string>&& input_names,
-                                                   const logging::Logger& logger,
-                                                   bool do_op_validation) const {
-  std::vector<uint32_t> ranges_dims{static_cast<uint32_t>(ranges_.size()), 3};
+  const size_t input_rank = input_dimensions.size();
+  std::vector<uint32_t> ranges_dims{static_cast<uint32_t>(input_rank), 3};
   std::vector<uint32_t> ranges_data;
-  for (auto range : ranges_) {
-    ranges_data.push_back(static_cast<uint32_t>(range.begin));
-    ranges_data.push_back(static_cast<uint32_t>(range.end));
-    ranges_data.push_back(static_cast<uint32_t>(range.stride));
+  ranges_data.reserve(input_rank);
+
+  for (size_t i = 0; i < input_rank; i++) {
+    ranges_data.push_back(static_cast<uint32_t>(compute_metadata.starts_[i]));
+    ranges_data.push_back(static_cast<uint32_t>(compute_metadata.ends_[i]));
+    ranges_data.push_back(static_cast<uint32_t>(compute_metadata.steps_[i]));
   }
+
   QnnParamWrapper ranges_paramwrapper(node_unit.Index(),
                                       node_unit.Name(),
                                       QNN_OP_STRIDED_SLICE_PARAM_RANGES,
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc
new file mode 100644
index 0000000000000..9059f7459200a
--- /dev/null
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc
@@ -0,0 +1,237 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/providers/common.h"
+#include "core/providers/shared/utils/utils.h"
+#include "core/framework/tensorprotoutils.h"
+#include "core/providers/qnn/builder/qnn_model_wrapper.h"
+#include "core/providers/qnn/builder/op_builder_factory.h"
+#include "core/common/safeint.h"
+
+#include "base_op_builder.h"
+
+namespace onnxruntime {
+namespace qnn {
+
+class SoftmaxOpBuilder : public BaseOpBuilder {
+ public:
+  SoftmaxOpBuilder() : BaseOpBuilder("SoftmaxOpBuilder") {}
+  ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(SoftmaxOpBuilder);
+
+  Status IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
+                       const NodeUnit& node_unit,
+                       const logging::Logger& logger) const override final ORT_MUST_USE_RESULT;
+
+ protected:
+  Status ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
+                       const NodeUnit& node_unit,
+                       const logging::Logger& logger,
+                       std::vector<std::string>& input_names,
+                       bool do_op_validation) const override ORT_MUST_USE_RESULT;
+
+  Status ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
+                                     const NodeUnit& node_unit,
+                                     std::vector<std::string>&& input_names,
+                                     const logging::Logger& logger,
+                                     bool do_op_validation) const override ORT_MUST_USE_RESULT;
+};
+
+constexpr int32_t GetDefaultAxisAttribute(int opset_version) {
+  // Default axis changed from 1 to -1 in opset 13.
+  return opset_version < 13 ? 1 : -1;
+}
+
+Status SoftmaxOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
+                                       const NodeUnit& node_unit,
+                                       const logging::Logger& logger) const {
+  ORT_UNUSED_PARAMETER(logger);
+  const int opset_version = node_unit.SinceVersion();
+
+  // The QNN HTP backend only supports an `axis` attribute that refers to the last input dimension.
+  // QNN EP is able to support arbitrary axis attributes by wrapping the QNN operator with transposes.
+  // However, the exception is Softmax/LogSoftmax with opset < 13. For these older ONNX operators, only
+  // axis == input_rank - 1 is supported.
+  if (opset_version < 13) {
+    const std::string& op_type = node_unit.OpType();
+
+    int32_t axis = GetDefaultAxisAttribute(opset_version);
+    Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
+    ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, axis));
+    std::vector<uint32_t> input_shape;
+    ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(node_unit.Inputs()[0].node_arg, input_shape),
+                      "QNN EP: Cannot get shape for Softmax input");
+    ORT_RETURN_IF(axis != static_cast<int32_t>(input_shape.size() - 1),
+                  "QNN ", op_type.c_str(),
+                  " only supports an `axis` attribute equal to input_rank-1 (or -1) for ONNX opset < 13");
+  }
+
+  return AddToModelBuilder(qnn_model_wrapper, node_unit, logger, true);
+}
+
+static std::vector<uint32_t> GetTransposePermToUseLastAxis(uint32_t input_rank, uint32_t axis) {
+  assert(axis < input_rank);
+  std::vector<uint32_t> transpose_perm;
+  transpose_perm.reserve(input_rank);
+
+  for (uint32_t dim = 0; dim < input_rank; dim++) {
+    transpose_perm.push_back(dim);
+  }
+
+  // Swap axis dim with last dim.
+  transpose_perm[axis] = input_rank - 1;
+  transpose_perm[input_rank - 1] = axis;
+
+  return transpose_perm;
+}
+
+Status SoftmaxOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
+                                       const NodeUnit& node_unit,
+                                       const logging::Logger& logger,
+                                       std::vector<std::string>& input_names,
+                                       bool do_op_validation) const {
+  const bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType());
+  const auto& inputs = node_unit.Inputs();
+  assert(inputs.size() == 1);
+
+  int32_t axis = GetDefaultAxisAttribute(node_unit.SinceVersion());
+  Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
+  ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, axis));
+
+  TensorInfo input_info = {};
+  ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[0], input_info));
+  const size_t input_rank = input_info.shape.size();
+
+  // If the axis attribute refers to the last dimension, then process the input as normal.
+  if (!is_npu_backend || axis == static_cast<int32_t>(input_rank) - 1) {
+    return ProcessInput(qnn_model_wrapper, inputs[0], logger, input_names);
+  }
+
+  //
+  // The axis does **not** refer to the last input dimension. Must wrap transposes around the operator to be able to use
+  // QNN's Softmax operator, which always uses an axis value that refers to the last dimension.
+  //
+
+  std::vector<uint32_t> transpose_perm = GetTransposePermToUseLastAxis(static_cast<uint32_t>(input_rank),
+                                                                       static_cast<uint32_t>(axis));
+
+  const std::string& input_name = inputs[0].node_arg.Name();
+  std::string op_input_name = input_info.is_initializer ? input_name : input_name + "_ort_qnn_ep_transpose";
+  input_names.push_back(op_input_name);
+
+  std::vector<uint32_t> op_input_shape = input_info.shape;
+  op_input_shape[input_rank - 1] = input_info.shape[axis];
+  op_input_shape[axis] = input_info.shape[input_rank - 1];
+
+  ORT_RETURN_IF(input_info.is_initializer, "QNN EP does not support (Log)Softmax with an initializer input, ",
+                "which should be optimized away by the ORT optimizer");
+
+  // Input is dynamic, so add transpose node before input.
+  const bool is_graph_input = qnn_model_wrapper.IsGraphInput(input_name);
+
+  ORT_RETURN_IF_ERROR(qnn_model_wrapper.AddTransposeNode(node_unit.Index(),
+                                                         input_name,
+                                                         op_input_name,
+                                                         input_info.shape,
+                                                         transpose_perm,
+                                                         op_input_shape,
+                                                         input_info.qnn_data_type,
+                                                         input_info.quant_param,
+                                                         do_op_validation,
+                                                         is_graph_input));
+
+  Qnn_TensorType_t tensor_type = GetInputTensorType(qnn_model_wrapper, op_input_name);
+  QnnTensorWrapper input_tensorwrapper(op_input_name, tensor_type, input_info.qnn_data_type, input_info.quant_param,
+                                       std::move(op_input_shape), {});
+  ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(input_tensorwrapper)), "Failed to add tensor.");
+
+  return Status::OK();
+}
+
+Status SoftmaxOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
+                                                     const NodeUnit& node_unit,
+                                                     std::vector<std::string>&& input_names,
+                                                     const logging::Logger& logger,
+                                                     bool do_op_validation) const {
+  const bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType());
+  const std::string& op_type = node_unit.OpType();
+  const auto& outputs = node_unit.Outputs();
+  assert(outputs.size() == 1);
+
+  int32_t axis = GetDefaultAxisAttribute(node_unit.SinceVersion());
+  Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
+  ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, axis));
+
+  TensorInfo output_info = {};
+  ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(outputs[0], output_info));
+  const size_t output_rank = output_info.shape.size();
+  const bool axis_is_last_dim = static_cast<size_t>(axis) == output_rank - 1;
+
+  // If axis refers to the last dimension, process outputs as usual.
+  if (!is_npu_backend || axis_is_last_dim) {
+    QnnParamWrapper axis_param(node_unit.Index(), node_unit.Name(), QNN_OP_SOFTMAX_PARAM_AXIS, axis_qnn_scalar);
+
+    std::vector<std::string> param_tensor_names;
+    param_tensor_names.push_back(axis_param.GetParamTensorName());
+    qnn_model_wrapper.AddParamWrapper(std::move(axis_param));
+
+    return ProcessOutputs(qnn_model_wrapper, node_unit,
+                          std::move(input_names),
+                          std::move(param_tensor_names),
+                          logger, do_op_validation, GetQnnOpType(op_type));
+  }
+
+  //
+  // The axis **does** not refer to the last dimension. Must wrap the operator with Transposes to be able to use
+  // QNN's Softmax operator, which only supports an axis that refers to the last dimension.
+  //
+
+  axis_qnn_scalar.uint32Value = static_cast<uint32_t>(output_rank - 1);  // NOTE: override axis.
+  QnnParamWrapper axis_param(node_unit.Index(), node_unit.Name(), QNN_OP_SOFTMAX_PARAM_AXIS, axis_qnn_scalar);
+
+  std::vector<std::string> param_tensor_names;
+  param_tensor_names.push_back(axis_param.GetParamTensorName());
+  qnn_model_wrapper.AddParamWrapper(std::move(axis_param));
+
+  const std::string& orig_output_name = outputs[0].node_arg.Name();
+  std::string op_output_name = orig_output_name + "_ort_qnn_ep_transpose";
+
+  std::vector<uint32_t> op_output_shape = output_info.shape;
+  op_output_shape[output_rank - 1] = output_info.shape[axis];
+  op_output_shape[axis] = output_info.shape[output_rank - 1];
+
+  QnnTensorWrapper output_tensorwrapper(op_output_name, QNN_TENSOR_TYPE_NATIVE, output_info.qnn_data_type, output_info.quant_param,
+                                        std::vector<uint32_t>(op_output_shape));
+  ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(output_tensorwrapper)), "Failed to add tensor.");
+  ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(GetNodeName(node_unit),
+                                                    QNN_OP_PACKAGE_NAME_QTI_AISW,
+                                                    GetQnnOpType(node_unit.OpType()),
+                                                    std::move(input_names),
+                                                    {op_output_name},
+                                                    std::move(param_tensor_names)),
+                    "Failed to add node.");
+
+  const bool is_graph_output = qnn_model_wrapper.IsGraphOutput(orig_output_name);
+  std::vector<uint32_t> transpose_perm = GetTransposePermToUseLastAxis(static_cast<uint32_t>(output_rank),
+                                                                       static_cast<uint32_t>(axis));
+
+  ORT_RETURN_IF_ERROR(qnn_model_wrapper.AddTransposeNode(node_unit.Index(),
+                                                         op_output_name,
+                                                         orig_output_name,
+                                                         op_output_shape,
+                                                         transpose_perm,
+                                                         output_info.shape,
+                                                         output_info.qnn_data_type,
+                                                         output_info.quant_param,
+                                                         do_op_validation,
+                                                         false,
+                                                         is_graph_output));
+
+  return Status::OK();
+}
+
+void CreateSoftmaxOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
+  op_registrations.AddOpBuilder(op_type, std::make_unique<SoftmaxOpBuilder>());
+}
+
+}  // namespace qnn
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/split_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/split_op_builder.cc
index 6812c223f7c90..f4b0d1ff59175 100644
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/split_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/split_op_builder.cc
@@ -30,6 +30,14 @@ class SplitOpBuilder : public BaseOpBuilder {
                                      std::vector<std::string>&& input_names,
                                      const logging::Logger& logger,
                                      bool do_op_validation) const override ORT_MUST_USE_RESULT;
+
+  Status OverrideOutputQuantParam(QnnModelWrapper& qnn_model_wrapper,
+                                  const NodeUnit& node_unit,
+                                  const logging::Logger& logger,
+                                  const std::vector<std::string>& input_names,
+                                  size_t output_index,
+                                  Qnn_DataType_t qnn_data_type,
+                                  Qnn_QuantizeParams_t& quant_param) const override ORT_MUST_USE_RESULT;
 };
 
 Status SplitOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
@@ -121,6 +129,23 @@ Status SplitOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wr
   return Status::OK();
 }
 
+Status SplitOpBuilder::OverrideOutputQuantParam(QnnModelWrapper& qnn_model_wrapper,
+                                                const NodeUnit& node_unit,
+                                                const logging::Logger& logger,
+                                                const std::vector<std::string>& input_names,
+                                                size_t output_index,
+                                                Qnn_DataType_t qnn_data_type,
+                                                Qnn_QuantizeParams_t& quant_param) const {
+  // Force Split outputs to use the same quantization parameters as the input if nearly equal.
+  // This helps the HTP backend employ certain optimizations.
+  //
+  // The quantization tool assigns equal qparams to the input and outputs.
+  // However, Sigmoid/Tanh may override their output qparams,
+  // which requires us to explicitly handle this in case a Split is consumer of a Sigmoid/Tanh node.
+  return SetOutputQParamEqualToInputIfNearlyEqual(qnn_model_wrapper, node_unit, logger, input_names,
+                                                  0 /*input_index*/, output_index, qnn_data_type, quant_param);
+}
+
 void CreateSplitOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
   op_registrations.AddOpBuilder(op_type, std::make_unique<SplitOpBuilder>());
 }
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/tile_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/tile_op_builder.cc
index bf194a3c71337..721db9dd2670e 100644
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/tile_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/tile_op_builder.cc
@@ -30,6 +30,14 @@ class TileOpBuilder : public BaseOpBuilder {
                                      std::vector<std::string>&& input_names,
                                      const logging::Logger& logger,
                                      bool do_op_validation) const override ORT_MUST_USE_RESULT;
+
+  Status OverrideOutputQuantParam(QnnModelWrapper& qnn_model_wrapper,
+                                  const NodeUnit& node_unit,
+                                  const logging::Logger& logger,
+                                  const std::vector<std::string>& input_names,
+                                  size_t output_index,
+                                  Qnn_DataType_t qnn_data_type,
+                                  Qnn_QuantizeParams_t& quant_param) const override ORT_MUST_USE_RESULT;
 };
 
 Status TileOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
@@ -86,6 +94,19 @@ Status TileOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wra
   return Status::OK();
 }
 
+Status TileOpBuilder::OverrideOutputQuantParam(QnnModelWrapper& qnn_model_wrapper,
+                                               const NodeUnit& node_unit,
+                                               const logging::Logger& logger,
+                                               const std::vector<std::string>& input_names,
+                                               size_t output_index,
+                                               Qnn_DataType_t qnn_data_type,
+                                               Qnn_QuantizeParams_t& quant_param) const {
+  // Force the Tile operator output to use the same quantization parameters as the input if nearly equal.
+  // This helps the HTP backend employ certain optimizations.
+  return SetOutputQParamEqualToInputIfNearlyEqual(qnn_model_wrapper, node_unit, logger, input_names,
+                                                  0 /*input_index*/, output_index, qnn_data_type, quant_param);
+}
+
 void CreateTileOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
   op_registrations.AddOpBuilder(op_type, std::make_unique<TileOpBuilder>());
 }
diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc
index 8e31124ce4c85..ab0ea042ea5e2 100644
--- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc
+++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc
@@ -3,9 +3,9 @@
 
 #include "qnn_backend_manager.h"
 #include "qnn_model.h"
-#include <iostream>
-#include <fstream>
 #include <filesystem>
+#include <fstream>
+#include <string>
 #include "QnnOpDef.h"
 #include "HTP/QnnHtpPerfInfrastructure.h"
 #include "CPU/QnnCpuCommon.h"
@@ -16,6 +16,7 @@
 #include "core/common/gsl.h"
 #include "core/framework/endian_utils.h"
 #include "core/common/logging/capture.h"
+#include "core/providers/qnn/builder/onnx_ctx_model_helper.h"
 
 // Flag to determine if Backend should do node validation for each opNode added
 #define DO_GRAPH_NODE_VALIDATIONS 1
@@ -28,14 +29,20 @@ typedef Qnn_ErrorHandle_t (*QnnInterfaceGetProvidersFn_t)(const QnnInterface_t**
 typedef Qnn_ErrorHandle_t (*QnnSystemInterfaceGetProvidersFn_t)(const QnnSystemInterface_t*** providerList,
                                                                 uint32_t* numProviders);
 
-constexpr const char* QNN_PROVIDER = "ORTQNNEP";
+static Qnn_Version_t GetQnnInterfaceApiVersion(const QnnInterface_t* qnn_interface) {
+  return qnn_interface->apiVersion.coreApiVersion;
+}
+
+static Qnn_Version_t GetQnnInterfaceApiVersion(const QnnSystemInterface_t* qnn_interface) {
+  return qnn_interface->systemApiVersion;
+}
 
 template <typename F, class T>
-Status QnnBackendManager::GetQnnInterfaceProviders(const char* lib_path,
-                                                   const char* interface_provider_name,
-                                                   void** backend_lib_handle,
-                                                   T*** interface_providers,
-                                                   uint32_t& num_providers) {
+Status QnnBackendManager::GetQnnInterfaceProvider(const char* lib_path,
+                                                  const char* interface_provider_name,
+                                                  void** backend_lib_handle,
+                                                  Qnn_Version_t req_version,
+                                                  T** interface_provider) {
   std::string error_msg;
   *backend_lib_handle = LoadLib(lib_path,
                                 static_cast<int>(DlOpenFlag::DL_NOW) | static_cast<int>(DlOpenFlag::DL_LOCAL),
@@ -47,10 +54,36 @@ Status QnnBackendManager::GetQnnInterfaceProviders(const char* lib_path,
   GetInterfaceProviders = ResolveSymbol<F>(*backend_lib_handle, interface_provider_name, *logger_);
   ORT_RETURN_IF(nullptr == GetInterfaceProviders, "Failed to get QNN providers!");
 
-  auto result = GetInterfaceProviders((const T***)interface_providers, &num_providers);
+  T** interface_providers{nullptr};
+  uint32_t num_providers{0};
+
+  auto result = GetInterfaceProviders((const T***)&interface_providers, &num_providers);
   ORT_RETURN_IF((QNN_SUCCESS != result || nullptr == *interface_providers || 0 == num_providers),
                 "Failed to get QNN providers.");
 
+  bool found_valid_interface{false};
+  for (size_t pIdx = 0; pIdx < num_providers; pIdx++) {
+    Qnn_Version_t interface_version = GetQnnInterfaceApiVersion(interface_providers[pIdx]);
+
+    LOGS_DEFAULT(VERBOSE) << lib_path << " interface version: " << interface_version.major << "."
+                          << interface_version.minor << "." << interface_version.patch;
+
+    // Check the interface's API version against the required version.
+    // Major versions must match. The interface's minor version must be greater OR equal with a suitable patch version.
+    if (interface_version.major == req_version.major) {
+      bool minor_and_patch_version_ok = (interface_version.minor > req_version.minor) ||
+                                        (interface_version.minor == req_version.minor &&
+                                         interface_version.patch >= req_version.patch);
+      if (minor_and_patch_version_ok) {
+        found_valid_interface = true;
+        *interface_provider = interface_providers[pIdx];
+        break;
+      }
+    }
+  }
+
+  ORT_RETURN_IF_NOT(found_valid_interface, "Unable to find a valid interface for ", lib_path);
+
   return Status::OK();
 }
 
@@ -76,38 +109,89 @@ void QnnBackendManager::SetQnnBackendType(uint32_t backend_id) {
 }
 
 Status QnnBackendManager::LoadBackend() {
-  QnnInterface_t** interface_providers{nullptr};
-  uint32_t num_providers{0};
-  auto rt = GetQnnInterfaceProviders<QnnInterfaceGetProvidersFn_t,
-                                     QnnInterface_t>(backend_path_.c_str(),
-                                                     "QnnInterface_getProviders",
-                                                     &backend_lib_handle_,
-                                                     &interface_providers,
-                                                     num_providers);
+  QnnInterface_t* backend_interface_provider{nullptr};
+  auto rt = GetQnnInterfaceProvider<QnnInterfaceGetProvidersFn_t,
+                                    QnnInterface_t>(backend_path_.c_str(),
+                                                    "QnnInterface_getProviders",
+                                                    &backend_lib_handle_,
+                                                    {QNN_API_VERSION_MAJOR,
+                                                     QNN_API_VERSION_MINOR,
+                                                     QNN_API_VERSION_PATCH},
+                                                    &backend_interface_provider);
   ORT_RETURN_IF_ERROR(rt);
+  qnn_interface_ = backend_interface_provider->QNN_INTERFACE_VER_NAME;
+  auto backend_id = backend_interface_provider->backendId;
+  SetQnnBackendType(backend_id);
 
-  bool found_valid_interface{false};
-  LOGS_DEFAULT(VERBOSE) << "QNN_API_VERSION_MAJOR: " << QNN_API_VERSION_MAJOR
-                        << " QNN_API_VERSION_MINOR: " << QNN_API_VERSION_MINOR;
-  for (size_t pIdx = 0; pIdx < num_providers; pIdx++) {
-    LOGS_DEFAULT(VERBOSE) << "interface_providers major: " << interface_providers[pIdx]->apiVersion.coreApiVersion.major
-                          << " interface_providers minor: " << interface_providers[pIdx]->apiVersion.coreApiVersion.minor;
-    if (QNN_API_VERSION_MAJOR == interface_providers[pIdx]->apiVersion.coreApiVersion.major &&
-        QNN_API_VERSION_MINOR <= interface_providers[pIdx]->apiVersion.coreApiVersion.minor) {
-      found_valid_interface = true;
-      qnn_interface_ = interface_providers[pIdx]->QNN_INTERFACE_VER_NAME;
-      auto backend_id = interface_providers[pIdx]->backendId;
-      SetQnnBackendType(backend_id);
-
-      LOGS_DEFAULT(INFO) << "Found valid interface, version: " << QNN_API_VERSION_MAJOR
-                         << "." << QNN_API_VERSION_MINOR
-                         << " backend provider name: " << interface_providers[pIdx]->providerName
-                         << " backend id: " << backend_id;
-      break;
+  Qnn_Version_t backend_interface_version = GetQnnInterfaceApiVersion(backend_interface_provider);
+  LOGS_DEFAULT(INFO) << "Found valid interface, version: " << backend_interface_version.major
+                     << "." << backend_interface_version.minor << "." << backend_interface_version.patch
+                     << " backend provider name: " << backend_interface_provider->providerName
+                     << " backend id: " << backend_id;
+
+  return Status::OK();
+}
+
+// Loads the intended backend (e.g., HTP, CPU, etc) to get its type, and then
+// sets QNN Saver as the active backend. QNN op builders will still see the intended backend (e.g., HTP)
+// as the backend type to ensure they emit the expected QNN API calls.
+//
+// QNN Saver is a "debugging" backend that serializes all QNN API calls (and weights) into local files.
+// This information can be used to debug issues by replaying QNN API calls with another backend.
+Status QnnBackendManager::LoadQnnSaverBackend() {
+  void* backend_lib_handle = nullptr;
+
+  // Helper that unloads the intended backend library handle when the `unload_backend_lib` variable
+  // goes out of scope. Similar to `defer` in other languages.
+  auto unload_backend_lib = gsl::finally([&] {
+    if (backend_lib_handle != nullptr) {
+      auto result = UnloadLib(backend_lib_handle);
+      if (Status::OK() != result) {
+        ORT_THROW("Failed to unload backend library.");
+      }
     }
-  }
+  });
+
+  // Load the intended backend (e.g., HTP, CPU) to ensure it is valid and to get its type.
+  QnnInterface_t* backend_interface_provider{nullptr};
+  auto rt = GetQnnInterfaceProvider<QnnInterfaceGetProvidersFn_t,
+                                    QnnInterface_t>(backend_path_.c_str(),
+                                                    "QnnInterface_getProviders",
+                                                    &backend_lib_handle,
+                                                    {QNN_API_VERSION_MAJOR,
+                                                     QNN_API_VERSION_MINOR,
+                                                     QNN_API_VERSION_PATCH},
+                                                    &backend_interface_provider);
+  ORT_RETURN_IF_ERROR(rt);
 
-  ORT_RETURN_IF_NOT(found_valid_interface, "Unable to find a valid interface.");
+  // Set the "intended" backend type so that QNN builders still make the expected QNN API calls.
+  auto backend_id = backend_interface_provider->backendId;
+  SetQnnBackendType(backend_id);
+
+  // Load the QNN Saver backend and set it as the activate backend.
+  QnnInterface_t* saver_interface_provider{nullptr};
+  auto saver_rt = GetQnnInterfaceProvider<QnnInterfaceGetProvidersFn_t,
+                                          QnnInterface_t>(qnn_saver_path_.c_str(),
+                                                          "QnnInterface_getProviders",
+                                                          &backend_lib_handle_,  // NOTE: QNN Saver library handle is set
+                                                          {QNN_API_VERSION_MAJOR,
+                                                           QNN_API_VERSION_MINOR,
+                                                           QNN_API_VERSION_PATCH},
+                                                          &saver_interface_provider);
+  ORT_RETURN_IF_ERROR(saver_rt);
+  qnn_interface_ = saver_interface_provider->QNN_INTERFACE_VER_NAME;  // NOTE: QNN Saver will provide the interfaces
+
+  Qnn_Version_t backend_interface_version = GetQnnInterfaceApiVersion(backend_interface_provider);
+  Qnn_Version_t saver_interface_version = GetQnnInterfaceApiVersion(saver_interface_provider);
+
+  LOGS_DEFAULT(INFO) << "Using QNN Saver version: " << saver_interface_version.major << "."
+                     << saver_interface_version.minor << "." << saver_interface_version.patch
+                     << " provider name : " << saver_interface_provider->providerName;
+
+  LOGS_DEFAULT(INFO) << "Intended backend provider name: " << backend_interface_provider->providerName
+                     << " backend id: " << backend_id
+                     << " interface version: " << backend_interface_version.major
+                     << "." << backend_interface_version.minor << "." << backend_interface_version.patch;
 
   return Status::OK();
 }
@@ -120,34 +204,22 @@ Status QnnBackendManager::LoadQnnSystemLib() {
 #endif  // #ifdef _WIN32
   std::filesystem::path lib_file_path(backend_path_.c_str());
   std::string sys_file_path(lib_file_path.remove_filename().string() + system_lib_file);
-  QnnSystemInterface_t** system_interface_providers{nullptr};
-  uint32_t num_providers = 0;
-  auto rt = GetQnnInterfaceProviders<QnnSystemInterfaceGetProvidersFn_t,
-                                     QnnSystemInterface_t>(sys_file_path.c_str(),
-                                                           "QnnSystemInterface_getProviders",
-                                                           &system_lib_handle_,
-                                                           &system_interface_providers,
-                                                           num_providers);
+  QnnSystemInterface_t* system_interface_provider{nullptr};
+  auto rt = GetQnnInterfaceProvider<QnnSystemInterfaceGetProvidersFn_t,
+                                    QnnSystemInterface_t>(sys_file_path.c_str(),
+                                                          "QnnSystemInterface_getProviders",
+                                                          &system_lib_handle_,
+                                                          {QNN_SYSTEM_API_VERSION_MAJOR,
+                                                           QNN_SYSTEM_API_VERSION_MINOR,
+                                                           QNN_SYSTEM_API_VERSION_PATCH},
+                                                          &system_interface_provider);
   ORT_RETURN_IF_ERROR(rt);
+  Qnn_Version_t system_interface_version = GetQnnInterfaceApiVersion(system_interface_provider);
+  qnn_sys_interface_ = system_interface_provider->QNN_SYSTEM_INTERFACE_VER_NAME;
 
-  bool found_valid_interface{false};
-  for (size_t pIdx = 0; pIdx < num_providers; pIdx++) {
-    LOGS_DEFAULT(VERBOSE) << "system_interface_providers major: " << system_interface_providers[pIdx]->systemApiVersion.major
-                          << " system_interface_providers minor: " << system_interface_providers[pIdx]->systemApiVersion.minor;
-    int64_t systems_version_major = static_cast<int64_t>(system_interface_providers[pIdx]->systemApiVersion.major);
-    int64_t systems_version_minor = static_cast<int64_t>(system_interface_providers[pIdx]->systemApiVersion.minor);
-    if (systems_version_major == QNN_SYSTEM_API_VERSION_MAJOR &&
-        systems_version_minor >= QNN_SYSTEM_API_VERSION_MINOR) {
-      found_valid_interface = true;
-      qnn_sys_interface_ = system_interface_providers[pIdx]->QNN_SYSTEM_INTERFACE_VER_NAME;
-      LOGS_DEFAULT(INFO) << "Found valid system interface, version: " << QNN_API_VERSION_MAJOR
-                         << "." << QNN_API_VERSION_MINOR
-                         << " backend provider name: " << system_interface_providers[pIdx]->providerName;
-      break;
-    }
-  }
-
-  ORT_RETURN_IF_NOT(found_valid_interface, "Unable to find a valid system interface.");
+  LOGS_DEFAULT(INFO) << "Found valid system interface, version: " << system_interface_version.major
+                     << "." << system_interface_version.minor
+                     << " backend provider name: " << system_interface_provider->providerName;
 
   return Status::OK();
 }
@@ -202,7 +274,7 @@ void QnnBackendManager::InitializeQnnLog() {
 
 Status QnnBackendManager::InitializeBackend() {
   if (true == backend_initialized_) {
-    LOGS_DEFAULT(INFO) << "Backend intialized already.";
+    LOGS_DEFAULT(INFO) << "Backend initialized already.";
     return Status::OK();
   }
 
@@ -242,7 +314,7 @@ bool QnnBackendManager::IsDevicePropertySupported() {
 
 Status QnnBackendManager::CreateDevice() {
   if (true == device_created_) {
-    LOGS_DEFAULT(INFO) << "Device intialized already.";
+    LOGS_DEFAULT(INFO) << "Device initialized already.";
     return Status::OK();
   }
 
@@ -310,15 +382,48 @@ Status QnnBackendManager::ReleaseProfilehandle() {
   return Status::OK();
 }
 
+Status SetQnnContextConfig(ContextPriority context_priority, QnnContext_Config_t& qnn_context_config) {
+  qnn_context_config.option = QNN_CONTEXT_CONFIG_OPTION_PRIORITY;
+  switch (context_priority) {
+    case ContextPriority::LOW: {
+      qnn_context_config.priority = QNN_PRIORITY_LOW;
+      break;
+    }
+    case ContextPriority::NORMAL: {
+      qnn_context_config.priority = QNN_PRIORITY_NORMAL;
+      break;
+    }
+    case ContextPriority::NORMAL_HIGH: {
+      qnn_context_config.priority = QNN_PRIORITY_NORMAL_HIGH;
+      break;
+    }
+    case ContextPriority::HIGH: {
+      qnn_context_config.priority = QNN_PRIORITY_HIGH;
+      break;
+    }
+    case ContextPriority::UNDEFINED: {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Invalid Qnn context priority.");
+    }
+    default:
+      qnn_context_config.priority = QNN_PRIORITY_NORMAL;
+  }  // switch
+
+  return Status::OK();
+}
+
 Status QnnBackendManager::CreateContext() {
   if (true == context_created_) {
     LOGS_DEFAULT(INFO) << "Context created already.";
     return Status::OK();
   }
 
+  QnnContext_Config_t qnn_context_config = QNN_CONTEXT_CONFIG_INIT;
+  ORT_RETURN_IF_ERROR(SetQnnContextConfig(context_priority_, qnn_context_config));
+  const QnnContext_Config_t* context_configs[] = {&qnn_context_config, nullptr};
+
   auto result = qnn_interface_.contextCreate(backend_handle_,
                                              device_handle_,
-                                             (const QnnContext_Config_t**)&context_config_,
+                                             context_configs,
                                              &context_);
 
   ORT_RETURN_IF(QNN_CONTEXT_NO_ERROR != result, "Failed to create context.");
@@ -339,135 +444,51 @@ Status QnnBackendManager::ReleaseContext() {
   return Status::OK();
 }
 
-bool QnnBackendManager::IsContextCacheFileExists(const std::string& customer_context_cache_path,
-                                                 const std::string& model_description,
-                                                 const onnxruntime::PathString& model_pathstring) {
-  // Avoid duplicate work
-  if (!context_cache_path_.empty()) {
-    return ctx_file_exists_;
-  }
-  model_description_ = model_description;
-  // Use user provided context cache file path if exist, otherwise try model_file.onnx.bin by default
-  if (customer_context_cache_path.empty()) {
-    context_cache_path_ = PathToUTF8String(model_pathstring) + ".bin";
-  } else {
-    context_cache_path_ = customer_context_cache_path;
-  }
-
-  ctx_file_exists_ = std::filesystem::exists(context_cache_path_);
-
-  return ctx_file_exists_;
-}
-
-Status WriteInt16ToBinaryFile(std::ofstream& of_stream, uint16_t value) {
-  const std::vector<uint16_t> data{value};
-  std::vector<unsigned char> data_bytes(sizeof(uint16_t) / sizeof(unsigned char));
-  ORT_RETURN_IF_ERROR(onnxruntime::utils::WriteLittleEndian(gsl::make_span(data), gsl::make_span(data_bytes)));
-  of_stream.write(reinterpret_cast<char*>(data_bytes.data()), data_bytes.size());
-  return Status::OK();
-}
-
-Status QnnBackendManager::DumpQnnContext(const std::string& model_name, const std::string& graph_name) {
+std::unique_ptr<unsigned char[]> QnnBackendManager::GetContextBinaryBuffer(uint64_t& written_buffer_size) {
   if (nullptr == qnn_interface_.contextGetBinarySize ||
       nullptr == qnn_interface_.contextGetBinary) {
     LOGS(*logger_, ERROR) << "Failed to get valid function pointer.";
-    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to get valid function pointer.");
+    return nullptr;
   }
 
   uint64_t required_buffer_size(0);
   Qnn_ErrorHandle_t rt = qnn_interface_.contextGetBinarySize(context_, &required_buffer_size);
   if (QNN_CONTEXT_NO_ERROR != rt) {
     LOGS(*logger_, ERROR) << "Failed to get QNN context binary size. Error code: " << rt;
-    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to get QNN context binary size.");
+    return nullptr;
   }
 
   std::unique_ptr<unsigned char[]> context_buffer = std::make_unique<unsigned char[]>(required_buffer_size);
   if (nullptr == context_buffer) {
     LOGS(*logger_, ERROR) << "Failed to allocate buffer for context cache.";
-    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to allocate buffer for context cache.");
+    return nullptr;
   }
 
-  uint64_t written_buffer_size(0);
   rt = qnn_interface_.contextGetBinary(context_,
                                        reinterpret_cast<void*>(context_buffer.get()),
                                        required_buffer_size,
                                        &written_buffer_size);
   if (QNN_CONTEXT_NO_ERROR != rt) {
     LOGS(*logger_, ERROR) << "Failed to get context binary.";
-    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to get context binary.");
+    return nullptr;
   }
 
   if (required_buffer_size < written_buffer_size) {
     LOGS(*logger_, ERROR) << "Context written buffer size: " << written_buffer_size
                           << " exceeds allocated buffer size: " << required_buffer_size;
-    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Context written buffer exceeds allocated buffer size.");
-  }
-
-  std::ofstream of_stream(context_cache_path_.c_str(), std::ofstream::binary);
-  if (!of_stream) {
-    LOGS(*logger_, ERROR) << "Failed to open cached context file.";
-    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to open context cache file.");
+    return nullptr;
   }
 
-  // Write Ort metadata into context binary file
-  uint16_t model_name_length = static_cast<uint16_t>(model_name.length());
-  uint16_t graph_name_length = static_cast<uint16_t>(graph_name.length());
-  uint16_t model_description_length = static_cast<uint16_t>(model_description_.length());
-
-  // Header: uint16_t(totale_length)|uint16_t(model_name_length)|model_name|uint16_t(graph_name_length)|graph_name|uint16_t(model_description_length)|model_description
-  uint16_t header_length = 4 * sizeof(uint16_t) + model_name_length + graph_name_length + model_description_length;
-  uint16_t totale_length = header_length + static_cast<uint16_t>(strlen(QNN_PROVIDER));
-  of_stream.write(QNN_PROVIDER, strlen(QNN_PROVIDER));
-
-  ORT_RETURN_IF_ERROR(WriteInt16ToBinaryFile(of_stream, header_length));
-
-  ORT_RETURN_IF_ERROR(WriteInt16ToBinaryFile(of_stream, model_name_length));
-  of_stream.write(model_name.c_str(), model_name_length);
-
-  ORT_RETURN_IF_ERROR(WriteInt16ToBinaryFile(of_stream, graph_name_length));
-  of_stream.write(graph_name.c_str(), graph_name_length);
-
-  ORT_RETURN_IF_ERROR(WriteInt16ToBinaryFile(of_stream, model_description_length));
-  of_stream.write(model_description_.c_str(), model_description_length);
-  model_description_.clear();
-
-  LOGS(*logger_, VERBOSE) << "Dump metadata with length: " << totale_length;
-
-  of_stream.write(reinterpret_cast<char*>(context_buffer.get()), written_buffer_size);
-
-  LOGS(*logger_, VERBOSE) << "Dump QNN Context completed.";
-  return Status::OK();
+  LOGS(*logger_, VERBOSE) << "Get context binary buffer succeed.";
+  return context_buffer;
 }
 
-Status QnnBackendManager::LoadCachedQnnContext(QnnModel& qnn_model) {
+Status QnnBackendManager::LoadCachedQnnContextFromBuffer(char* buffer, uint64_t buffer_length, QnnModel& qnn_model) {
   bool result = nullptr == qnn_sys_interface_.systemContextCreate ||
                 nullptr == qnn_sys_interface_.systemContextGetBinaryInfo ||
                 nullptr == qnn_sys_interface_.systemContextFree;
   ORT_RETURN_IF(result, "Failed to get valid function pointer.");
 
-  ORT_RETURN_IF(!ctx_file_exists_, "Qnn context binary file not exist for some reason!");
-
-  uint64_t buffer_size{0};
-  std::ifstream cache_file(context_cache_path_.c_str(), std::ifstream::binary);
-  ORT_RETURN_IF(!cache_file || !cache_file.good(), "Failed to open cache file.");
-  cache_file.seekg(0, cache_file.end);
-  buffer_size = cache_file.tellg();
-  ORT_RETURN_IF(0 == buffer_size, "Empty cache file encountered.");
-  cache_file.seekg(0, cache_file.beg);
-  // Skip Ort generated metadata
-  if (ort_generated_ctx_cache_) {
-    cache_file.seekg(ort_ctx_metadata_length_);
-    buffer_size -= ort_ctx_metadata_length_;
-  }
-
-  std::unique_ptr<unsigned char[]> buffer = std::make_unique<unsigned char[]>(buffer_size);
-  ORT_RETURN_IF(nullptr == buffer, "Failed to allocate memory for cache file.");
-
-  // Load file into buffer
-  const auto& read_result = cache_file.read(reinterpret_cast<char*>(buffer.get()), buffer_size);
-  cache_file.close();
-  ORT_RETURN_IF(!read_result, "Failed to read contents from cached context file.");
-
   QnnSystemContext_Handle_t sys_ctx_handle = nullptr;
   auto rt = qnn_sys_interface_.systemContextCreate(&sys_ctx_handle);
   ORT_RETURN_IF(QNN_SUCCESS != rt, "Failed to create system handle.");
@@ -475,8 +496,8 @@ Status QnnBackendManager::LoadCachedQnnContext(QnnModel& qnn_model) {
   const QnnSystemContext_BinaryInfo_t* binary_info = nullptr;
   Qnn_ContextBinarySize_t binary_info_size{0};
   rt = qnn_sys_interface_.systemContextGetBinaryInfo(sys_ctx_handle,
-                                                     static_cast<void*>(buffer.get()),
-                                                     buffer_size,
+                                                     static_cast<void*>(buffer),
+                                                     buffer_length,
                                                      &binary_info,
                                                      &binary_info_size);
   ORT_RETURN_IF(QNN_SUCCESS != rt, "Failed to get context binary info.");
@@ -500,15 +521,22 @@ Status QnnBackendManager::LoadCachedQnnContext(QnnModel& qnn_model) {
 
   ORT_RETURN_IF(nullptr == qnn_interface_.contextCreateFromBinary,
                 "Invalid function pointer for contextCreateFromBinary.");
+
+  QnnContext_Config_t qnn_context_config = QNN_CONTEXT_CONFIG_INIT;
+  ORT_RETURN_IF_ERROR(SetQnnContextConfig(context_priority_, qnn_context_config));
+  const QnnContext_Config_t* context_configs[] = {&qnn_context_config, nullptr};
+
   rt = qnn_interface_.contextCreateFromBinary(backend_handle_,
                                               device_handle_,
-                                              (const QnnContext_Config_t**)&context_config_,
-                                              static_cast<void*>(buffer.get()),
-                                              buffer_size,
+                                              context_configs,
+                                              static_cast<void*>(buffer),
+                                              buffer_length,
                                               &context_,
                                               profile_backend_handle_);
   ORT_RETURN_IF(QNN_SUCCESS != rt, "Failed to create context from binary.");
 
+  // More work to support multiple partition, how to map the graph name in compile to qnn graph name
+  // Need the lower level framework to understand EPContext op and pass in the partition_name in fused_node during Compile
   ORT_RETURN_IF_ERROR(qnn_model.DeserializeGraphInfoFromBinaryInfo(graphs_info[0]));
 
   qnn_sys_interface_.systemContextFree(sys_ctx_handle);
@@ -517,141 +545,31 @@ Status QnnBackendManager::LoadCachedQnnContext(QnnModel& qnn_model) {
   ORT_RETURN_IF_ERROR(ExtractBackendProfilingInfo());
   context_created_ = true;
 
-  model_description_.clear();
-  model_description_from_ctx_cache_.clear();
   LOGS(*logger_, VERBOSE) << "Load from cached QNN Context completed.";
   return Status::OK();
 }
 
-/* \brief: Read string data from binary file with given length
- * \param[in] binary_file - file stream of the binary file
- * \param[out] result_str - string read from binary file
- * \param[out] length - length to read
- */
-Status ReadStringFromBinaryFile(std::ifstream& binary_file, std::string& result_str, size_t length) {
-  result_str.resize(length);
-  const auto& read_result = binary_file.read(result_str.data(), length);
-  ORT_RETURN_IF(!read_result, "Failed to read contents from cached context binary file.");
-
-  return Status::OK();
-}
-
-/* \brief: Read a uint16_t from binary file
- * \param[in] binary_file - file stream of the binary file
- * \param[out] value - uint16_t value
- */
-Status ReadInt16FromBinaryFile(std::ifstream& binary_file, uint16_t& value) {
-  std::unique_ptr<char[]> buffer = std::make_unique<char[]>(sizeof(uint16_t));
-  ORT_RETURN_IF(nullptr == buffer, "Failed to allocate memory for buffer.");
-  const auto& read_result = binary_file.read(buffer.get(), sizeof(uint16_t));
-  ORT_RETURN_IF(!read_result, "Failed to read contents from cached context binary file.");
-
-  auto src = gsl::make_span<const unsigned char>(reinterpret_cast<unsigned char*>(buffer.get()), sizeof(uint16_t));
-  std::vector<uint16_t> dst(1);
-  ORT_RETURN_IF_ERROR(onnxruntime::utils::ReadLittleEndian(src, gsl::make_span(dst)));
-  value = dst[0];
-
-  return Status::OK();
-}
-
-/* \brief: Try to get metadata from Ort generated context cache binary file.
- *  Cached context binary file generated by Ort has some metadata which can be used for validation with the model
- *  to avoid user choose a wrong context binary file which is not for this model
- *  It is treated as Qnn generated context binary file if no metadata found from the file
- */
-Status QnnBackendManager::GetMetadataFromOrtContextFile() {
-  // Only try parse meta data once
-  if (ctx_metadata_tried_) {
-    return Status::OK();
-  }
-  ctx_metadata_tried_ = true;
-
-  uint64_t buffer_size = 0;
-  std::ifstream cache_file(context_cache_path_.c_str(), std::ifstream::binary);
-  ORT_RETURN_IF(!cache_file || !cache_file.good(), "Failed to open context cache file.");
-  cache_file.seekg(0, cache_file.end);
-  buffer_size = cache_file.tellg();
-  ORT_RETURN_IF(0 == buffer_size, "Empty cache file encountered.");
-  cache_file.seekg(0, cache_file.beg);
-
-  // Read ort flag
-  std::string ort_flag("");
-  size_t ort_flag_length = strlen(QNN_PROVIDER);
-  ORT_RETURN_IF_ERROR(ReadStringFromBinaryFile(cache_file, ort_flag, ort_flag_length));
-
-  // It's not Ort generated context binary file
-  if (strncmp(ort_flag.c_str(), QNN_PROVIDER, ort_flag_length) != 0) {
-    return Status::OK();
-  }
-  ort_generated_ctx_cache_ = true;
-
-  uint16_t str_length = 0;
-  ORT_RETURN_IF_ERROR(ReadInt16FromBinaryFile(cache_file, str_length));
-  ort_ctx_metadata_length_ = str_length + static_cast<uint16_t>(ort_flag_length);
-
-  ORT_RETURN_IF_ERROR(ReadInt16FromBinaryFile(cache_file, str_length));
-  ORT_RETURN_IF_ERROR(ReadStringFromBinaryFile(cache_file, model_name_from_ctx_cache_, static_cast<size_t>(str_length)));
-
-  ORT_RETURN_IF_ERROR(ReadInt16FromBinaryFile(cache_file, str_length));
-  ORT_RETURN_IF_ERROR(ReadStringFromBinaryFile(cache_file, graph_name_from_ctx_cache_, static_cast<size_t>(str_length)));
-
-  ORT_RETURN_IF_ERROR(ReadInt16FromBinaryFile(cache_file, str_length));
-  ORT_RETURN_IF_ERROR(ReadStringFromBinaryFile(cache_file, model_description_from_ctx_cache_, static_cast<size_t>(str_length)));
-
-  return Status::OK();
-}
-
-/* \brief: Validate the model file name and graph name with Ort generated context cache metadata
- * \param[in] model_name - model file name
- * \param[in] graph_name - graph name, e.g Ort_QNN_[hash_id]_[id]. Since GetCapability is called twice,
- *                         [hash_id]_[id] changes even for same graph,
- *                          so only validate the graph name for 2nd call
- */
-Status QnnBackendManager::ValidateWithContextFile(const std::string& model_name, const std::string& graph_name) {
-  ORT_RETURN_IF(!ctx_file_exists_, "Qnn context binary file not exist for some reason!");
-
-  // Get metadata from cached context binary file
-  ORT_RETURN_IF_ERROR(GetMetadataFromOrtContextFile());
-
-  // The context binary file doesn't have ORT metadata, so it is generated from QNN toolchain not from ORT
-  if (!ort_generated_ctx_cache_) {
-    return Status::OK();
-  }
-
-  ORT_RETURN_IF(model_name != model_name_from_ctx_cache_,
-                "Model file name from context cache metadata: " + model_name_from_ctx_cache_ +
-                    " is different with target: " + model_name +
-                    ". Please make sure the context binary file matches the model.");
-
-  ORT_RETURN_IF(model_description_ != model_description_from_ctx_cache_,
-                "Model description from context cache metadata: " + model_description_from_ctx_cache_ +
-                    " is different with target: " + model_description_ +
-                    ". Please make sure the context binary file matches the model.");
-
-  ORT_RETURN_IF(graph_name != graph_name_from_ctx_cache_ && get_capability_round_2_,
-                "Graph name from context cache metadata: " + graph_name_from_ctx_cache_ +
-                    " is different with target: " + graph_name +
-                    ". You may need to re-generate the context binary file.");
-
-  get_capability_round_2_ = true;
-  return Status::OK();
-}
-
 Status QnnBackendManager::SetupBackend(const logging::Logger& logger, bool load_from_cached_context) {
   if (backend_setup_completed_) {
     LOGS(logger, VERBOSE) << "Backend setup already!";
     return Status::OK();
   }
 
-  ORT_RETURN_IF_ERROR(LoadBackend());
+  if (qnn_saver_path_.empty()) {
+    ORT_RETURN_IF_ERROR(LoadBackend());
+  } else {
+    ORT_RETURN_IF_ERROR(LoadQnnSaverBackend());
+  }
+
   LOGS(logger, VERBOSE) << "LoadBackend succeed.";
 
   if (load_from_cached_context) {
     ORT_RETURN_IF_ERROR(LoadQnnSystemLib());
   }
 
+  sdk_build_version_ = GetBackendBuildId();
   LOGS(logger, VERBOSE) << "Backend build version: "
-                        << GetBackendBuildId();
+                        << sdk_build_version_;
 
   SetLogger(&logger);
   LOGS(logger, VERBOSE) << "SetLogger succeed.";
@@ -692,8 +610,7 @@ Status QnnBackendManager::SetHtpPowerConfig() {
                 "HTP infra type = ", htp_infra->infraType, ", which is not perf infra type.");
   QnnHtpDevice_PerfInfrastructure_t& htp_perf_infra = htp_infra->perfInfra;
   // Get power client id
-  uint32_t powerconfig_client_id = 0;
-  status = htp_perf_infra.createPowerConfigId(/*device_id=*/0, /*core_id=*/0, &powerconfig_client_id);
+  status = htp_perf_infra.createPowerConfigId(/*device_id=*/0, /*core_id=*/0, &htp_power_config_client_id_);
   ORT_RETURN_IF(QNN_SUCCESS != status, "createPowerConfigId failed.");
 
   constexpr const int kNumConfigs = 1;
@@ -702,7 +619,7 @@ Status QnnBackendManager::SetHtpPowerConfig() {
   QnnHtpPerfInfrastructure_PowerConfig_t& dcvs_config = power_configs[0];
   dcvs_config.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_DCVS_V3;
   QnnHtpPerfInfrastructure_DcvsV3_t& dcvs_v3 = dcvs_config.dcvsV3Config;
-  dcvs_v3.contextId = powerconfig_client_id;
+  dcvs_v3.contextId = htp_power_config_client_id_;
   dcvs_v3.setSleepDisable = 0;
   dcvs_v3.sleepDisable = 0;
   dcvs_v3.setDcvsEnable = 1;
@@ -800,7 +717,7 @@ Status QnnBackendManager::SetHtpPowerConfig() {
       break;
   }
   std::vector<const QnnHtpPerfInfrastructure_PowerConfig_t*> perf_power_configs_ptr_ = ObtainNullTermPtrVector(power_configs);
-  status = htp_perf_infra.setPowerConfig(powerconfig_client_id, perf_power_configs_ptr_.data());
+  status = htp_perf_infra.setPowerConfig(htp_power_config_client_id_, perf_power_configs_ptr_.data());
   ORT_RETURN_IF(QNN_SUCCESS != status, "setPowerConfig failed for HTP performance mode.");
 
   // Set rpc control latency here, but note that v68 doesn't support rpc polling mode.
@@ -814,7 +731,7 @@ Status QnnBackendManager::SetHtpPowerConfig() {
     rpc_polling_time.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_RPC_POLLING_TIME;
     rpc_control_latency.rpcControlLatencyConfig = rpc_control_latency_;
     perf_power_configs_ptr_ = ObtainNullTermPtrVector(rpc_power_configs);
-    status = htp_perf_infra.setPowerConfig(powerconfig_client_id, perf_power_configs_ptr_.data());
+    status = htp_perf_infra.setPowerConfig(htp_power_config_client_id_, perf_power_configs_ptr_.data());
     ORT_RETURN_IF(QNN_SUCCESS != status, "setPowerConfig failed for RPC control latency.");
   }
 
@@ -835,12 +752,36 @@ void QnnBackendManager::Split(std::vector<std::string>& split_string,
   }
 }
 
+Status QnnBackendManager::DestroyHTPPowerConfigID() {
+  if (htp_performance_mode_ == HtpPerformanceMode::kHtpDefault) {
+    return Status::OK();
+  }
+
+  QnnDevice_Infrastructure_t qnn_device_infra = nullptr;
+  auto status = qnn_interface_.deviceGetInfrastructure(&qnn_device_infra);
+  ORT_RETURN_IF(QNN_SUCCESS != status, "backendGetPerfInfrastructure failed.");
+
+  auto* htp_infra = static_cast<QnnHtpDevice_Infrastructure_t*>(qnn_device_infra);
+  ORT_RETURN_IF(QNN_HTP_DEVICE_INFRASTRUCTURE_TYPE_PERF != htp_infra->infraType,
+                "HTP infra type = ", htp_infra->infraType, ", which is not perf infra type.");
+  QnnHtpDevice_PerfInfrastructure_t& htp_perf_infra = htp_infra->perfInfra;
+
+  Qnn_ErrorHandle_t destroy_ret = htp_perf_infra.destroyPowerConfigId(htp_power_config_client_id_);
+  ORT_RETURN_IF(QNN_SUCCESS != destroy_ret, "destroyPowerConfigId failed.");
+  return Status::OK();
+}
+
 void QnnBackendManager::ReleaseResources() {
   if (!backend_setup_completed_) {
     return;
   }
 
-  auto result = ReleaseContext();
+  auto result = DestroyHTPPowerConfigID();
+  if (Status::OK() != result) {
+    ORT_THROW("Failed to DestroyHTPPowerConfigID.");
+  }
+
+  result = ReleaseContext();
   if (Status::OK() != result) {
     ORT_THROW("Failed to ReleaseContext.");
   }
@@ -890,16 +831,49 @@ Status QnnBackendManager::ExtractBackendProfilingInfo() {
 
   if (num_events > 0) {
     LOGS(*logger_, VERBOSE) << "profile_events: " << profile_events << " num_events: " << num_events;
-  }
 
-  for (size_t event_idx = 0; event_idx < num_events; event_idx++) {
-    ORT_RETURN_IF_ERROR(ExtractProfilingEvent(*(profile_events + event_idx)));
-    ORT_RETURN_IF_ERROR(ExtractProfilingSubEvents(*(profile_events + event_idx)));
+    bool backendSupportsExtendedEventData = false;
+    Qnn_ErrorHandle_t resultPropertyHasCapability =
+        qnn_interface_.propertyHasCapability(QNN_PROPERTY_PROFILE_SUPPORTS_EXTENDED_EVENT);
+    uint16_t errorCodePropertyHasCapability = static_cast<uint16_t>(resultPropertyHasCapability & 0xFFFF);
+    if (errorCodePropertyHasCapability == QNN_PROFILE_NO_ERROR) {
+      LOGS(*logger_, VERBOSE) << "The QNN backend supports extended event data.";
+      backendSupportsExtendedEventData = true;
+    } else {
+      LOGS(*logger_, VERBOSE) << "The QNN backend does not support extended event data.";
+    }
+
+    // Write to CSV in append mode
+    const char* profilingCsvFilename = "qnn-profiling-data.csv";
+    std::ifstream infile(profilingCsvFilename);
+    bool exists = infile.good();
+    infile.close();
+
+    std::ofstream outfile(profilingCsvFilename, std::ios_base::app);
+    ORT_RETURN_IF(!outfile.is_open(), "Failed to open qnn-profiling-data.csv");
+    // If file didn't exist before, write the header
+    if (!exists) {
+      outfile << "Msg Timestamp,Message,Time,Unit of Measurement,Timing Source,Event Level,Event Identifier\n";
+    }
+
+    for (size_t event_idx = 0; event_idx < num_events; event_idx++) {
+      ORT_RETURN_IF_ERROR(
+          ExtractProfilingEvent(*(profile_events + event_idx), "ROOT", outfile, backendSupportsExtendedEventData));
+      ORT_RETURN_IF_ERROR(
+          ExtractProfilingSubEvents(*(profile_events + event_idx), outfile, backendSupportsExtendedEventData));
+    }
+
+    outfile.close();
+    LOGS(*logger_, INFO) << "Wrote QNN profiling events (" << num_events << ") to qnn-profiling-data.csv";
   }
+
   return Status::OK();
 }
 
-Status QnnBackendManager::ExtractProfilingSubEvents(QnnProfile_EventId_t profile_event_id) {
+Status QnnBackendManager::ExtractProfilingSubEvents(
+    QnnProfile_EventId_t profile_event_id,
+    std::ofstream& outfile,
+    bool useExtendedEventData) {
   const QnnProfile_EventId_t* profile_sub_events{nullptr};
   uint32_t num_sub_events{0};
   auto result = qnn_interface_.profileGetSubEvents(profile_event_id, &profile_sub_events, &num_sub_events);
@@ -907,28 +881,195 @@ Status QnnBackendManager::ExtractProfilingSubEvents(QnnProfile_EventId_t profile
 
   if (num_sub_events > 0) {
     LOGS(*logger_, VERBOSE) << "profile_sub_events: " << profile_sub_events << " num_sub_events: " << num_sub_events;
-  }
 
-  for (size_t sub_event_idx = 0; sub_event_idx < num_sub_events; sub_event_idx++) {
-    ORT_RETURN_IF_ERROR(ExtractProfilingEvent(*(profile_sub_events + sub_event_idx)));
-    ORT_RETURN_IF_ERROR(ExtractProfilingSubEvents(*(profile_sub_events + sub_event_idx)));
+    for (size_t sub_event_idx = 0; sub_event_idx < num_sub_events; sub_event_idx++) {
+      ORT_RETURN_IF_ERROR(
+          ExtractProfilingEvent(*(profile_sub_events + sub_event_idx), "SUB-EVENT", outfile, useExtendedEventData));
+      ORT_RETURN_IF_ERROR(
+          ExtractProfilingSubEvents(*(profile_sub_events + sub_event_idx), outfile, useExtendedEventData));
+    }
+
+    LOGS(*logger_, INFO) << "Wrote QNN profiling sub events (" << num_sub_events << ") to qnn-profiling-data.csv";
   }
+
   return Status::OK();
 }
 
-Status QnnBackendManager::ExtractProfilingEvent(QnnProfile_EventId_t profile_event_id) {
+Status QnnBackendManager::ExtractProfilingEvent(
+    QnnProfile_EventId_t profile_event_id,
+    const std::string& eventLevel,
+    std::ofstream& outfile,
+    bool useExtendedEventData) {
+  if (useExtendedEventData) {
+    return ExtractProfilingEventExtended(profile_event_id, eventLevel, outfile);
+  } else {
+    return ExtractProfilingEventBasic(profile_event_id, eventLevel, outfile);
+  }
+}
+
+Status QnnBackendManager::ExtractProfilingEventBasic(
+    QnnProfile_EventId_t profile_event_id,
+    const std::string& eventLevel,
+    std::ofstream& outfile) {
   QnnProfile_EventData_t event_data;
   auto result = qnn_interface_.profileGetEventData(profile_event_id, &event_data);
-  ORT_RETURN_IF(QNN_PROFILE_NO_ERROR != result, "Failed to get provile event data.");
+  QnnProfile_Error_t errorCode = static_cast<QnnProfile_Error_t>(result & 0xFFFF);
+  ORT_RETURN_IF(QNN_PROFILE_NO_ERROR != result, "Failed to get profile event data: " + std::string(QnnProfileErrorToString(errorCode)));
+
+  std::string message = GetEventTypeString(event_data.type);
+  std::string unit = GetUnitString(event_data.unit);
+
+  outfile << "UNKNOWN"
+          << ","
+          << message << ","
+          << event_data.value << ","
+          << unit << ","
+          << "BACKEND"
+          << ","
+          << eventLevel << ","
+          << (event_data.identifier ? event_data.identifier : "NULL") << "\n";
+
+  return Status::OK();
+}
 
-  LOGS(*logger_, VERBOSE) << "Profiling Event Info - Event Type: " << event_data.type
-                          << ", Event Value: " << event_data.value
-                          << ", Event Identifier: " << event_data.identifier
-                          << ", Event Unit: " << event_data.unit;
+Status QnnBackendManager::ExtractProfilingEventExtended(
+    QnnProfile_EventId_t profile_event_id,
+    const std::string& eventLevel,
+    std::ofstream& outfile) {
+  QnnProfile_ExtendedEventData_t event_data_extended;
+  auto resultGetExtendedEventData = qnn_interface_.profileGetExtendedEventData(profile_event_id, &event_data_extended);
+  QnnProfile_Error_t errorCode = static_cast<QnnProfile_Error_t>(resultGetExtendedEventData & 0xFFFF);
+  ORT_RETURN_IF(QNN_PROFILE_NO_ERROR != errorCode, "Failed to get profile event data: " + std::string(QnnProfileErrorToString(errorCode)));
+
+  std::string message = GetEventTypeString(event_data_extended.v1.type);
+  std::string unit = GetUnitString(event_data_extended.v1.unit);
+
+  if (event_data_extended.version == QNN_PROFILE_DATA_VERSION_1) {
+    outfile << event_data_extended.v1.timestamp << ","
+            << message << ","
+            << ExtractQnnScalarValue(event_data_extended.v1.value) << ","
+            << unit << ","
+            << "BACKEND"
+            << ","
+            << eventLevel << ","
+            << (event_data_extended.v1.identifier ? event_data_extended.v1.identifier : "NULL") << "\n";
+  }
 
   return Status::OK();
 }
 
+const std::string& QnnBackendManager::GetUnitString(QnnProfile_EventUnit_t unitType) {
+  const auto& unitStringMap = GetUnitStringMap();
+  auto it = unitStringMap.find(unitType);
+  if (it != unitStringMap.end()) {
+    return it->second;
+  }
+  static const std::string unknown = "UNKNOWN";
+  return unknown;
+}
+
+const std::unordered_map<QnnProfile_EventUnit_t, std::string>& QnnBackendManager::GetUnitStringMap() {
+  static const std::unordered_map<QnnProfile_EventUnit_t, std::string> unitStringMap = {
+      {QNN_PROFILE_EVENTUNIT_MICROSEC, "US"},
+      {QNN_PROFILE_EVENTUNIT_BYTES, "BYTES"},
+      {QNN_PROFILE_EVENTUNIT_CYCLES, "CYCLES"},
+      {QNN_PROFILE_EVENTUNIT_COUNT, "COUNT"},
+      {QNN_PROFILE_EVENTUNIT_OBJECT, "OBJECT"},
+      {QNN_PROFILE_EVENTUNIT_BACKEND, "BACKEND"}};
+  return unitStringMap;
+}
+
+const std::string QnnBackendManager::GetEventTypeString(QnnProfile_EventType_t eventType) {
+  // Interpret the event type
+  switch (eventType) {
+    case QNN_PROFILE_EVENTTYPE_INIT:
+      return "INIT";
+    case QNN_PROFILE_EVENTTYPE_FINALIZE:
+      return "FINALIZE";
+    case QNN_PROFILE_EVENTTYPE_EXECUTE:
+      return "EXECUTE";
+    case QNN_PROFILE_EVENTTYPE_NODE:
+      return "NODE";
+    case QNN_PROFILE_EVENTTYPE_EXECUTE_QUEUE_WAIT:
+      return "EXECUTE QUEUE WAIT";
+    case QNN_PROFILE_EVENTTYPE_EXECUTE_PREPROCESS:
+      return "EXECUTE PREPROCESS";
+    case QNN_PROFILE_EVENTTYPE_EXECUTE_DEVICE:
+      return "EXECUTE DEVICE";
+    case QNN_PROFILE_EVENTTYPE_EXECUTE_POSTPROCESS:
+      return "EXECUTE POSTPROCESS";
+    case QNN_PROFILE_EVENTTYPE_DEINIT:
+      return "DE-INIT";
+    case QNN_PROFILE_EVENTTYPE_BACKEND:
+      return "BACKEND";
+    default:
+      if (eventType > QNN_PROFILE_EVENTTYPE_BACKEND) {
+        return "BACKEND";
+      }
+      return "UNKNOWN";
+  }
+}
+
+const char* QnnBackendManager::QnnProfileErrorToString(QnnProfile_Error_t error) {
+  switch (error) {
+    case QNN_PROFILE_NO_ERROR:
+      return "QNN_PROFILE_NO_ERROR";
+    case QNN_PROFILE_ERROR_UNSUPPORTED:
+      return "QNN_PROFILE_ERROR_UNSUPPORTED";
+    case QNN_PROFILE_ERROR_INVALID_ARGUMENT:
+      return "QNN_PROFILE_ERROR_INVALID_ARGUMENT";
+    case QNN_PROFILE_ERROR_MEM_ALLOC:
+      return "QNN_PROFILE_ERROR_MEM_ALLOC";
+    case QNN_PROFILE_ERROR_INVALID_HANDLE:
+      return "QNN_PROFILE_ERROR_INVALID_HANDLE";
+    case QNN_PROFILE_ERROR_HANDLE_IN_USE:
+      return "QNN_PROFILE_ERROR_HANDLE_IN_USE";
+    case QNN_PROFILE_ERROR_INCOMPATIBLE_EVENT:
+      return "QNN_PROFILE_ERROR_INCOMPATIBLE_EVENT";
+    default:
+      return "UNKNOWN_ERROR";
+  }
+}
+
+const std::string QnnBackendManager::ExtractQnnScalarValue(const Qnn_Scalar_t& scalar) {
+  switch (scalar.dataType) {
+    case QNN_DATATYPE_INT_8:
+      return std::to_string(static_cast<int>(scalar.int8Value));
+    case QNN_DATATYPE_INT_16:
+      return std::to_string(scalar.int16Value);
+    case QNN_DATATYPE_INT_32:
+      return std::to_string(scalar.int32Value);
+    case QNN_DATATYPE_INT_64:
+      return std::to_string(scalar.int64Value);
+    case QNN_DATATYPE_UINT_8:
+      return std::to_string(static_cast<unsigned int>(scalar.uint8Value));
+    case QNN_DATATYPE_UINT_16:
+      return std::to_string(scalar.uint16Value);
+    case QNN_DATATYPE_UINT_32:
+      return std::to_string(scalar.uint32Value);
+    case QNN_DATATYPE_UINT_64:
+      return std::to_string(scalar.uint64Value);
+    case QNN_DATATYPE_FLOAT_16:
+      return std::to_string(scalar.floatValue);
+    case QNN_DATATYPE_FLOAT_32:
+      return std::to_string(scalar.floatValue);
+    case QNN_DATATYPE_SFIXED_POINT_8:
+    case QNN_DATATYPE_SFIXED_POINT_16:
+    case QNN_DATATYPE_SFIXED_POINT_32:
+      return std::to_string(scalar.int32Value);  // Assume using int types for signed fixed points.
+    case QNN_DATATYPE_UFIXED_POINT_8:
+    case QNN_DATATYPE_UFIXED_POINT_16:
+    case QNN_DATATYPE_UFIXED_POINT_32:
+      return std::to_string(scalar.uint32Value);  // Assume using unsigned int types for unsigned fixed points.
+    case QNN_DATATYPE_BOOL_8:
+      return scalar.bool8Value ? "true" : "false";
+    case QNN_DATATYPE_STRING:
+      return scalar.stringValue ? scalar.stringValue : "NULL";
+    default:
+      return "UNKNOWN";
+  }
+}
+
 QnnBackendManager::~QnnBackendManager() {
   ReleaseResources();
 }
diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h
index 4ca63a042c103..bc05820da2f73 100644
--- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h
+++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h
@@ -25,14 +25,18 @@ class QnnModel;
 
 class QnnBackendManager {
  public:
-  QnnBackendManager(std::string backend_path,
+  QnnBackendManager(std::string&& backend_path,
                     ProfilingLevel profiling_level,
                     uint32_t rpc_control_latency,
-                    HtpPerformanceMode htp_performance_mode)
+                    HtpPerformanceMode htp_performance_mode,
+                    ContextPriority context_priority,
+                    std::string&& qnn_saver_path)
       : backend_path_(backend_path),
         profiling_level_(profiling_level),
         rpc_control_latency_(rpc_control_latency),
-        htp_performance_mode_(htp_performance_mode) {
+        htp_performance_mode_(htp_performance_mode),
+        context_priority_(context_priority),
+        qnn_saver_path_(qnn_saver_path) {
   }
   ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(QnnBackendManager);
 
@@ -69,13 +73,9 @@ class QnnBackendManager {
     return CreateContext();
   }
 
-  Status DumpQnnContext(const std::string& model_name, const std::string& graph_name);
+  std::unique_ptr<unsigned char[]> GetContextBinaryBuffer(uint64_t& written_buffer_size);
 
-  Status LoadCachedQnnContext(QnnModel& qnn_model);
-
-  Status GetMetadataFromOrtContextFile();
-
-  Status ValidateWithContextFile(const std::string& model_name, const std::string& graph_name);
+  Status LoadCachedQnnContextFromBuffer(char* buffer, uint64_t buffer_length, QnnModel& qnn_model);
 
   Status SetupBackend(const logging::Logger& logger, bool load_from_cached_context);
 
@@ -89,14 +89,6 @@ class QnnBackendManager {
 
   const Qnn_ProfileHandle_t& GetQnnProfileHandle() { return profile_backend_handle_; }
 
-  std::string GetBackendBuildId() {
-    char* backend_build_id{nullptr};
-    if (QNN_SUCCESS != qnn_interface_.backendGetBuildId((const char**)&backend_build_id)) {
-      LOGS(*logger_, ERROR) << "Unable to get build Id from the backend.";
-    }
-    return (backend_build_id == nullptr ? std::string("") : std::string(backend_build_id));
-  }
-
   void SetLogger(const logging::Logger* logger) {
     if (logger_ == nullptr) {
       logger_ = logger;
@@ -125,23 +117,25 @@ class QnnBackendManager {
   void Split(std::vector<std::string>& split_string, const std::string& tokenized_string, const char separator);
 
   Status ExtractBackendProfilingInfo();
-  Status ExtractProfilingSubEvents(QnnProfile_EventId_t profile_event_id);
-  Status ExtractProfilingEvent(QnnProfile_EventId_t profile_event_id);
+  Status ExtractProfilingSubEvents(QnnProfile_EventId_t profile_event_id, std::ofstream& outfile, bool backendSupportsExtendedEventData);
+  Status ExtractProfilingEvent(QnnProfile_EventId_t profile_event_id, const std::string& eventLevel, std::ofstream& outfile, bool backendSupportsExtendedEventData);
 
   void SetQnnBackendType(uint32_t backend_id);
   QnnBackendType GetQnnBackendType() { return qnn_backend_type_; }
 
-  bool IsContextCacheFileExists(const std::string& customer_context_cache_path,
-                                const std::string& model_description,
-                                const onnxruntime::PathString& model_pathstring);
+  const std::string& GetSdkVersion() { return sdk_build_version_; }
 
  private:
   void* LoadLib(const char* file_name, int flags, std::string& error_msg);
 
   Status LoadQnnSystemLib();
 
+  Status LoadQnnSaverBackend();
+
   Status UnloadLib(void* handle);
 
+  Status DestroyHTPPowerConfigID();
+
   void* LibFunction(void* handle, const char* symbol, std::string& error_msg);
 
   template <class T>
@@ -155,11 +149,11 @@ class QnnBackendManager {
   }
 
   template <typename F, class T>
-  Status GetQnnInterfaceProviders(const char* lib_path,
-                                  const char* interface_provider_name,
-                                  void** backend_lib_handle,
-                                  T*** interface_providers,
-                                  uint32_t& num_providers);
+  Status GetQnnInterfaceProvider(const char* lib_path,
+                                 const char* interface_provider_name,
+                                 void** backend_lib_handle,
+                                 Qnn_Version_t req_version,
+                                 T** interface_provider);
 
   bool IsDevicePropertySupported();
 
@@ -173,6 +167,22 @@ class QnnBackendManager {
     return ret;
   }
 
+  std::string GetBackendBuildId() {
+    char* backend_build_id{nullptr};
+    if (QNN_SUCCESS != qnn_interface_.backendGetBuildId((const char**)&backend_build_id)) {
+      LOGS(*logger_, ERROR) << "Unable to get build Id from the backend.";
+    }
+    return (backend_build_id == nullptr ? std::string("") : std::string(backend_build_id));
+  }
+
+  Status ExtractProfilingEventBasic(QnnProfile_EventId_t profile_event_id, const std::string& eventLevel, std::ofstream& outfile);
+  Status ExtractProfilingEventExtended(QnnProfile_EventId_t profile_event_id, const std::string& eventLevel, std::ofstream& outfile);
+  static const std::string& GetUnitString(QnnProfile_EventUnit_t unitType);
+  static const std::unordered_map<QnnProfile_EventUnit_t, std::string>& GetUnitStringMap();
+  static const std::string GetEventTypeString(QnnProfile_EventType_t eventType);
+  static const std::string ExtractQnnScalarValue(const Qnn_Scalar_t& scalar);
+  const char* QnnProfileErrorToString(QnnProfile_Error_t error);
+
  private:
   const std::string backend_path_;
   const logging::Logger* logger_ = nullptr;
@@ -185,7 +195,6 @@ class QnnBackendManager {
   Qnn_LogHandle_t log_handle_ = nullptr;
   Qnn_DeviceHandle_t device_handle_ = nullptr;
   Qnn_ContextHandle_t context_ = nullptr;
-  QnnContext_Config_t** context_config_ = nullptr;
   ProfilingLevel profiling_level_;
   bool backend_initialized_ = false;
   bool device_created_ = false;
@@ -197,19 +206,13 @@ class QnnBackendManager {
   std::vector<std::string> op_package_paths_;
   uint32_t rpc_control_latency_ = 0;
   HtpPerformanceMode htp_performance_mode_;
-  std::string model_name_from_ctx_cache_ = "";
-  std::string graph_name_from_ctx_cache_ = "";
-  std::string model_description_from_ctx_cache_ = "";
-  std::string model_description_ = "";
-  std::string context_cache_path_ = "";
-  bool ctx_file_exists_ = false;
-  bool ctx_metadata_tried_ = false;
-  bool ort_generated_ctx_cache_ = false;
-  bool get_capability_round_2_ = false;
-  uint16_t ort_ctx_metadata_length_ = 0;
+  ContextPriority context_priority_;
+  std::string sdk_build_version_ = "";
 #ifdef _WIN32
   std::set<HMODULE> mod_handles_;
 #endif
+  const std::string qnn_saver_path_;
+  uint32_t htp_power_config_client_id_ = 0;
 };
 
 }  // namespace qnn
diff --git a/onnxruntime/core/providers/qnn/builder/qnn_def.cc b/onnxruntime/core/providers/qnn/builder/qnn_def.cc
index f4eb7b2a2b158..a77ac16cf624b 100644
--- a/onnxruntime/core/providers/qnn/builder/qnn_def.cc
+++ b/onnxruntime/core/providers/qnn/builder/qnn_def.cc
@@ -201,6 +201,30 @@ const Qnn_QuantizeParams_t& GetQnnTensorQParams(const Qnn_Tensor_t& qnn_tensor)
   }
 }
 
+Status CompareQnnQuantParams(const Qnn_QuantizeParams_t& qparam0, const Qnn_QuantizeParams_t& qparam1,
+                             float& scale_diff, int32_t& offset_diff) {
+  scale_diff = 0.0f;
+  offset_diff = 0;
+
+  ORT_RETURN_IF_NOT((qparam0.encodingDefinition == qparam1.encodingDefinition &&
+                     qparam0.quantizationEncoding == qparam1.quantizationEncoding),
+                    "Expected quantization parameters to be the same type.");
+
+  if (qparam0.encodingDefinition == QNN_DEFINITION_DEFINED) {
+    switch (qparam0.quantizationEncoding) {
+      case QNN_QUANTIZATION_ENCODING_SCALE_OFFSET: {
+        scale_diff = std::abs(qparam0.scaleOffsetEncoding.scale - qparam1.scaleOffsetEncoding.scale);
+        offset_diff = std::abs(qparam0.scaleOffsetEncoding.offset - qparam1.scaleOffsetEncoding.offset);
+        break;
+      }
+      default:
+        return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Unsupported quantization encoding: ", qparam0.quantizationEncoding);
+    }
+  }
+
+  return Status::OK();
+}
+
 bool CreateTensorInQnnGraph(const QNN_INTERFACE_VER_TYPE& qnn_interface,
                             const Qnn_GraphHandle_t& graph,
                             const std::string& node_name,
diff --git a/onnxruntime/core/providers/qnn/builder/qnn_def.h b/onnxruntime/core/providers/qnn/builder/qnn_def.h
index 8649db92be027..f6a3b1bd360ec 100644
--- a/onnxruntime/core/providers/qnn/builder/qnn_def.h
+++ b/onnxruntime/core/providers/qnn/builder/qnn_def.h
@@ -48,6 +48,22 @@ enum class HtpPerformanceMode : uint8_t {
   kHtpBalanced,
 };
 
+enum class ContextPriority : uint8_t {
+  LOW = 0,
+  NORMAL,
+  NORMAL_HIGH,
+  HIGH,
+  UNDEFINED
+};
+
+// Defines the graph optimization strategy used by the HTP backend.
+enum class HtpGraphFinalizationOptimizationMode : uint8_t {
+  kDefault = 0,
+  kMode1 = 1,  // Faster preparation time, less optimal graph
+  kMode2 = 2,  // Longer preparation time, more optimal graph
+  kMode3 = 3,  // Longest preparation time, most likely even more optimal graph.
+};
+
 enum class QnnBackendType : uint8_t {
   CPU = 0,
   GPU,
@@ -106,6 +122,20 @@ uint32_t* GetQnnTensorDims(const Qnn_Tensor_t& qnn_tensor);
 const Qnn_ClientBuffer_t& GetQnnTensorClientBuf(const Qnn_Tensor_t& qnn_tensor);
 const Qnn_QuantizeParams_t& GetQnnTensorQParams(const Qnn_Tensor_t& qnn_tensor);
 
+/**
+ * Compares two sets of quantization parameters. Sets the parameters `scale_diff` and `offset_diff`
+ * to the absolute differences. Returns an error status if the quantization parameters are not
+ * of the same type, or if the type is not supported.
+ *
+ * \param qparam0 The first set of quantization parameters.
+ * \param qparam1 The second set of quantization parameters.
+ * \param scale_diff Set to the absolute value of the difference in scale value.
+ * \param offset_diff Set to the absolute value of the difference in offset value.
+ * \return Status indicating success.
+ */
+Status CompareQnnQuantParams(const Qnn_QuantizeParams_t& qparam0, const Qnn_QuantizeParams_t& qparam1,
+                             float& max_scale_diff, int32_t& max_offset_diff);
+
 // TODO: split out separate files for Wrappers
 class QnnTensorWrapper {
  public:
diff --git a/onnxruntime/core/providers/qnn/builder/qnn_graph_configs_helper.cc b/onnxruntime/core/providers/qnn/builder/qnn_graph_configs_helper.cc
new file mode 100644
index 0000000000000..63aa01b48e7e2
--- /dev/null
+++ b/onnxruntime/core/providers/qnn/builder/qnn_graph_configs_helper.cc
@@ -0,0 +1,43 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/providers/qnn/builder/qnn_graph_configs_helper.h"
+
+#include "HTP/QnnHtpGraph.h"
+
+namespace onnxruntime {
+namespace qnn {
+
+const QnnGraph_Config_t** QnnGraphConfigsBuilder::GetQnnGraphConfigs() {
+  if (graph_config_ptrs_.empty()) {
+    return nullptr;
+  }
+
+  if (!IsNullTerminated()) {
+    graph_config_ptrs_.push_back(nullptr);
+  }
+
+  return graph_config_ptrs_.data();
+}
+
+QnnHtpGraph_CustomConfig_t& QnnGraphConfigsBuilder::PushHtpGraphCustomConfig() {
+  htp_custom_graph_configs_.push_back(QNN_HTP_GRAPH_CUSTOM_CONFIG_INIT);
+  return htp_custom_graph_configs_.back();
+}
+
+QnnGraph_Config_t& QnnGraphConfigsBuilder::PushGraphConfig() {
+  graph_configs_.push_back(QNN_GRAPH_CONFIG_INIT);
+  QnnGraph_Config_t& config = graph_configs_.back();
+
+  // Add pointer to this new graph config to the list of graph config pointers.
+  if (IsNullTerminated()) {
+    graph_config_ptrs_.back() = &config;  // Replace last nullptr entry.
+  } else {
+    graph_config_ptrs_.push_back(&config);
+  }
+
+  return config;
+}
+
+}  // namespace qnn
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/qnn/builder/qnn_graph_configs_helper.h b/onnxruntime/core/providers/qnn/builder/qnn_graph_configs_helper.h
new file mode 100644
index 0000000000000..8c4928fdacbc4
--- /dev/null
+++ b/onnxruntime/core/providers/qnn/builder/qnn_graph_configs_helper.h
@@ -0,0 +1,56 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include <core/common/inlined_containers_fwd.h>
+
+#include "HTP/QnnHtpGraph.h"
+
+namespace onnxruntime {
+namespace qnn {
+
+/**
+ * Helper class for building a null-terminated list of QNN Graph configurations.
+ * A QNN configuration consists of multiple objects with references to each other. This
+ * class ensures that all configuration objects have the same lifetime, so that they remain valid
+ * across the call to graphCreate().
+ */
+class QnnGraphConfigsBuilder {
+ public:
+  /**
+   * Returns a pointer to the beginning of a null-terminated array of QNN Graph configurations.
+   * This result is passed QNN's graphCreate() API.
+   *
+   * \return Pointer to null-terminated QnnGraph_Config_t* array.
+   */
+  const QnnGraph_Config_t** GetQnnGraphConfigs();
+
+  /**
+   * Creates and returns a reference to a new HTP graph configuration object. The object is initialized to
+   * the QNN recommended default value. The caller is meant to override fields in this object.
+   *
+   * \return A reference to a default QnnHtpGraph_CustomConfig_t object.
+   */
+  QnnHtpGraph_CustomConfig_t& PushHtpGraphCustomConfig();
+
+  /**
+   * Creates and returns a reference to a new graph configuration object. The object is initialized to
+   * the QNN recommended default value. The caller is meant to override fields in this object.
+   *
+   * \return A reference to a default QnnGraph_Config_t object.
+   */
+  QnnGraph_Config_t& PushGraphConfig();
+
+ private:
+  bool IsNullTerminated() const {
+    return !graph_config_ptrs_.empty() && graph_config_ptrs_.back() == nullptr;
+  }
+
+  InlinedVector<QnnHtpGraph_CustomConfig_t> htp_custom_graph_configs_;
+  InlinedVector<QnnGraph_Config_t> graph_configs_;
+  InlinedVector<const QnnGraph_Config_t*> graph_config_ptrs_;
+};
+
+}  // namespace qnn
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model.cc b/onnxruntime/core/providers/qnn/builder/qnn_model.cc
index db7196b4c2582..fd3a95b5f1f78 100644
--- a/onnxruntime/core/providers/qnn/builder/qnn_model.cc
+++ b/onnxruntime/core/providers/qnn/builder/qnn_model.cc
@@ -36,15 +36,16 @@ Status QnnModel::SetGraphInputOutputInfo(const GraphViewer& graph_viewer,
     initializer_inputs_.emplace(graph_ini.first);
   }
   auto input_defs = fused_node.InputDefs();
-  ORT_RETURN_IF_ERROR(ParseGraphInputOrOutput(input_defs, inputs_info_, model_input_index_map_, true));
+  ORT_RETURN_IF_ERROR(ParseGraphInputOrOutput(input_defs, input_names_, inputs_info_, model_input_index_map_, true));
 
   auto output_defs = fused_node.OutputDefs();
-  ORT_RETURN_IF_ERROR(ParseGraphInputOrOutput(output_defs, outputs_info_, model_output_index_map_));
+  ORT_RETURN_IF_ERROR(ParseGraphInputOrOutput(output_defs, output_names_, outputs_info_, model_output_index_map_));
 
   return Status::OK();
 }
 
 Status QnnModel::ParseGraphInputOrOutput(ConstPointerContainer<std::vector<NodeArg*>>& input_output_defs,
+                                         std::vector<std::string>& input_output_names,
                                          std::unordered_map<std::string, OnnxTensorInfo>& input_output_info_table,
                                          std::unordered_map<std::string, size_t>& input_output_index_map,
                                          bool is_input) {
@@ -72,6 +73,7 @@ Status QnnModel::ParseGraphInputOrOutput(ConstPointerContainer<std::vector<NodeA
     int32_t data_type = type_proto->tensor_type().elem_type();
     // use index i so that for graph input, it has initializers included
     input_output_info_table.emplace(std::piecewise_construct, std::forward_as_tuple(name), std::forward_as_tuple(i, data_type, std::move(shape)));
+    input_output_names.push_back(name);
   }
 
   return Status::OK();
@@ -85,7 +87,8 @@ const NodeUnit& QnnModel::GetNodeUnit(const Node* node,
 }
 
 Status QnnModel::ComposeGraph(const GraphViewer& graph_viewer,
-                              const onnxruntime::Node& fused_node) {
+                              const onnxruntime::Node& fused_node,
+                              const QnnGraph_Config_t** graph_configs) {
   LOGS(logger_, VERBOSE) << "ComposeGraph Graph name: " << graph_viewer.Name();
 
   // Holder for the NodeUnits in the graph, this will guarantee the NodeUnits is
@@ -105,7 +108,7 @@ Status QnnModel::ComposeGraph(const GraphViewer& graph_viewer,
                                                       initializer_inputs_,
                                                       qnn_backend_manager_->GetQnnBackendType());
   bool rt = true;
-  rt = qnn_model_wrapper.CreateQnnGraph(qnn_backend_manager_->GetQnnContext(), graph_name);
+  rt = qnn_model_wrapper.CreateQnnGraph(qnn_backend_manager_->GetQnnContext(), graph_name, graph_configs);
   if (!rt) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to initialize qnn_model_wrapper.");
   }
diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model.h b/onnxruntime/core/providers/qnn/builder/qnn_model.h
index 934980f05fbf2..de4f872f73ccf 100644
--- a/onnxruntime/core/providers/qnn/builder/qnn_model.h
+++ b/onnxruntime/core/providers/qnn/builder/qnn_model.h
@@ -27,7 +27,8 @@ class QnnModel {
   ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(QnnModel);
 
   Status ComposeGraph(const GraphViewer& graph_viewer,
-                      const onnxruntime::Node& fused_node);
+                      const onnxruntime::Node& fused_node,
+                      const QnnGraph_Config_t** graph_configs = nullptr);
 
   Status FinalizeGraphs();
 
@@ -47,6 +48,7 @@ class QnnModel {
   Status SetGraphInputOutputInfo(const GraphViewer& graph_viewer,
                                  const onnxruntime::Node& fused_node);
   Status ParseGraphInputOrOutput(ConstPointerContainer<std::vector<NodeArg*>>& input_output_defs,
+                                 std::vector<std::string>& input_output_names,
                                  std::unordered_map<std::string, OnnxTensorInfo>& input_output_info_table,
                                  std::unordered_map<std::string, size_t>& input_output_index,
                                  bool is_input = false);
@@ -74,6 +76,24 @@ class QnnModel {
 
   Status DeserializeGraphInfoFromBinaryInfo(const QnnSystemContext_GraphInfo_t& qnn_sys_ctx_graph_info);
 
+  const std::vector<std::string>& GetInputNames() const {
+    return input_names_;
+  }
+
+  const std::vector<std::string>& GetOutputNames() const {
+    return output_names_;
+  }
+
+  const std::unordered_map<std::string, OnnxTensorInfo>& GetInputsInfo() const {
+    return inputs_info_;
+  }
+
+  const std::unordered_map<std::string, OnnxTensorInfo>& GetOutputsInfo() const {
+    return outputs_info_;
+  }
+
+  const std::string& Name() { return graph_info_->Name(); }
+
  private:
   const NodeUnit& GetNodeUnit(const Node* node,
                               const std::unordered_map<const Node*, const NodeUnit*>& node_unit_map) const;
@@ -87,13 +107,13 @@ class QnnModel {
 
   QnnBackendType GetQnnBackendType() { return qnn_backend_type_; }
 
- private:
   size_t GetInputOutputIndex(const std::string& name, const std::unordered_map<std::string, OnnxTensorInfo>& io_info) const {
     auto it = io_info.find(name);
     ORT_ENFORCE(it != io_info.end(), "Input/Output name not found.");
     return it->second.index_;
   }
 
+ private:
   const logging::Logger& logger_;
   std::unique_ptr<GraphInfo> graph_info_;
   QnnBackendManager* qnn_backend_manager_ = nullptr;
@@ -102,6 +122,8 @@ class QnnModel {
   std::unordered_map<std::string, size_t> model_output_index_map_;
   // TODO: remove initializer_inputs_, use QnnModelWrapper
   std::unordered_set<std::string> initializer_inputs_;
+  std::vector<std::string> input_names_;
+  std::vector<std::string> output_names_;
   std::unordered_map<std::string, OnnxTensorInfo> inputs_info_;
   std::unordered_map<std::string, OnnxTensorInfo> outputs_info_;
   std::vector<Qnn_Tensor_t> qnn_inputs_;
diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc
index 9d339387b0a43..a422434205c68 100644
--- a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc
+++ b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc
@@ -365,33 +365,33 @@ bool QnnModelWrapper::ProcessQuantizationParameter(const std::optional<NodeUnitI
   return true;
 }
 
-Status QnnModelWrapper::GetOnnxInputInfo(const NodeUnitIODef& input,
-                                         OnnxInputInfo& input_info) const {
+Status QnnModelWrapper::GetTensorInfo(const NodeUnitIODef& input, TensorInfo& tensor_info) const {
   const std::string& name = input.node_arg.Name();
 
   // Fill in quantization param info.
-  input_info.quant_param = QNN_QUANTIZE_PARAMS_INIT;
+  tensor_info.quant_param = QNN_QUANTIZE_PARAMS_INIT;
   bool is_quantized_tensor = input.quant_param.has_value();
-  utils::InitializeQuantizeParam(input_info.quant_param, is_quantized_tensor);
+  utils::InitializeQuantizeParam(tensor_info.quant_param, is_quantized_tensor);
 
   if (is_quantized_tensor) {
     ORT_RETURN_IF_NOT(ProcessQuantizationParameter(input.quant_param,
-                                                   input_info.quant_param.scaleOffsetEncoding.scale,
-                                                   input_info.quant_param.scaleOffsetEncoding.offset),
+                                                   tensor_info.quant_param.scaleOffsetEncoding.scale,
+                                                   tensor_info.quant_param.scaleOffsetEncoding.offset),
                       "QNN EP: Cannot get quantization parameters for input ", name.c_str());
   }
 
   // Fill in QNN data type.
-  input_info.qnn_data_type = QNN_DATATYPE_FLOAT_32;
-  ORT_RETURN_IF_ERROR(utils::GetQnnDataType(is_quantized_tensor, input.node_arg.TypeAsProto(), input_info.qnn_data_type));
+  tensor_info.qnn_data_type = QNN_DATATYPE_FLOAT_32;
+  ORT_RETURN_IF_ERROR(utils::GetQnnDataType(is_quantized_tensor, input.node_arg.TypeAsProto(),
+                                            tensor_info.qnn_data_type));
 
   // Fill in shape.
-  ORT_RETURN_IF_NOT(GetOnnxShape(input.node_arg, input_info.shape), "Cannot get shape");
+  ORT_RETURN_IF_NOT(GetOnnxShape(input.node_arg, tensor_info.shape), "Cannot get shape");
 
   // Fill in initializer info.
-  input_info.is_initializer = IsInitializerInput(name);
-  if (input_info.is_initializer) {
-    input_info.initializer_tensor = GetInitializerTensors().at(name);
+  tensor_info.is_initializer = IsInitializerInput(name);
+  if (tensor_info.is_initializer) {
+    tensor_info.initializer_tensor = GetInitializerTensors().at(name);
   }
 
   return Status::OK();
diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h
index 22f8d3a0eaa64..2765556243a25 100644
--- a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h
+++ b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h
@@ -18,9 +18,9 @@
 namespace onnxruntime {
 namespace qnn {
 
-// POD struct that stores information about an ONNX input.
-// Filled out by QnnModelWrapper::GetOnnxInputInfo()
-struct OnnxInputInfo {
+// Stores information about an ONNX input or output tensor.
+// Filled out by QnnModelWrapper::GetTensorInfo()
+struct TensorInfo {
   std::vector<uint32_t> shape;
   Qnn_DataType_t qnn_data_type;
   Qnn_QuantizeParams_t quant_param;
@@ -117,8 +117,7 @@ class QnnModelWrapper {
     return input_index_map_.find(tensor_name) != input_index_map_.end();
   }
 
-  // TODO(hecli) rename to GetTensorInfo
-  Status GetOnnxInputInfo(const NodeUnitIODef& input, OnnxInputInfo& input_info) const;
+  Status GetTensorInfo(const NodeUnitIODef& input, TensorInfo& input_info) const;
 
   Status AddReshapeNode(const std::string& input_name,
                         const std::string& output_name,
@@ -181,6 +180,8 @@ class QnnModelWrapper {
 
   QnnBackendType GetQnnBackendType() { return qnn_backend_type_; }
 
+  const GraphViewer& GetGraphViewer() const { return graph_viewer_; }
+
  private:
   bool CreateQnnInputOutputTensors(const std::string& qnn_node_name,
                                    const std::vector<std::string>& names,
diff --git a/onnxruntime/core/providers/qnn/builder/qnn_utils.cc b/onnxruntime/core/providers/qnn/builder/qnn_utils.cc
index dd202c87c0a77..e4074fa6fb60b 100644
--- a/onnxruntime/core/providers/qnn/builder/qnn_utils.cc
+++ b/onnxruntime/core/providers/qnn/builder/qnn_utils.cc
@@ -423,6 +423,75 @@ bool OnnxDataTypeToQnnDataType(const int32_t onnx_data_type, Qnn_DataType_t& qnn
   }
 }
 
+std::pair<float, float> CheckMinMax(float rmin, float rmax) {
+  // Ensure a minimum range of 0.0001 (required by QNN)
+  rmax = std::max(rmax, rmin + 0.0001f);
+
+  // Both QNN and ORT require the range to include 0.0f
+  rmin = std::min(rmin, 0.0f);
+  rmax = std::max(rmax, 0.0f);
+
+  return std::make_pair(rmin, rmax);
+}
+
+template <typename T>
+Status GetQminQmax(const Qnn_DataType_t qnn_data_type,
+                   T& qmin,
+                   T& qmax) {
+  if (qnn_data_type == QNN_DATATYPE_SFIXED_POINT_8) {
+    qmin = static_cast<T>(std::numeric_limits<int8_t>::min());
+    qmax = static_cast<T>(std::numeric_limits<int8_t>::max());
+  } else if (qnn_data_type == QNN_DATATYPE_UFIXED_POINT_8) {
+    qmin = static_cast<T>(std::numeric_limits<uint8_t>::min());
+    qmax = static_cast<T>(std::numeric_limits<uint8_t>::max());
+  } else if (qnn_data_type == QNN_DATATYPE_SFIXED_POINT_16) {
+    qmin = static_cast<T>(std::numeric_limits<int16_t>::min());
+    qmax = static_cast<T>(std::numeric_limits<int16_t>::max());
+  } else if (qnn_data_type == QNN_DATATYPE_UFIXED_POINT_16) {
+    qmin = static_cast<T>(std::numeric_limits<uint16_t>::min());
+    qmax = static_cast<T>(std::numeric_limits<uint16_t>::max());
+  } else {
+    ORT_RETURN_IF(true, "Qnn Data Type: %d not supported yet.", qnn_data_type);
+  }
+  return Status::OK();
+}
+
+Status GetQuantParams(float rmin,
+                      float rmax,
+                      const Qnn_DataType_t qnn_data_type,
+                      float& scale,
+                      int& zero_point) {
+  std::tie(rmin, rmax) = CheckMinMax(rmin, rmax);
+  float qmin = 0.0f;
+  float qmax = 255.0f;
+  ORT_RETURN_IF_ERROR(GetQminQmax(qnn_data_type, qmin, qmax));
+
+  scale = (rmax - rmin) / (qmax - qmin);
+  const float initial_zero_point = qmin - (rmin / scale);
+  zero_point = static_cast<int>(RoundHalfToEven(Saturate(qmax, qmin, initial_zero_point)));
+  // To match QNN quantization definition
+  zero_point = 0 - zero_point;
+  return Status::OK();
+}
+
+double Dequantize(int32_t offset, float scale, const double quant_value) {
+  double offset_d = static_cast<double>(offset);
+  double scale_d = static_cast<double>(scale);
+  return (quant_value + offset_d) * scale_d;
+}
+
+Status Quantize(const double double_value,
+                const float scale,
+                const int zero_point,
+                const Qnn_DataType_t qnn_data_type,
+                int& quant_value) {
+  int qmin = 0;
+  int qmax = 255;
+  ORT_RETURN_IF_ERROR(GetQminQmax(qnn_data_type, qmin, qmax));
+  quant_value = Saturate(qmax, qmin, static_cast<int>(std::round((double_value / scale) - zero_point)));
+  return Status::OK();
+}
+
 }  // namespace utils
 }  // namespace qnn
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/qnn/builder/qnn_utils.h b/onnxruntime/core/providers/qnn/builder/qnn_utils.h
index a54e0c8276e71..edbef7ae92ee0 100644
--- a/onnxruntime/core/providers/qnn/builder/qnn_utils.h
+++ b/onnxruntime/core/providers/qnn/builder/qnn_utils.h
@@ -9,6 +9,8 @@
 #include <vector>
 #include <string>
 
+#include "core/util/qmath.h"
+
 namespace onnxruntime {
 namespace qnn {
 class QnnOpConfigWrapper;
@@ -48,6 +50,38 @@ static bool ArrayHasString(const std::array<std::string_view, N>& strings, std::
   return false;
 }
 
+std::pair<float, float> CheckMinMax(float rmin, float rmax);
+
+template <typename T>
+Status GetQminQmax(const Qnn_DataType_t qnn_data_type, T& qmin, T& qmax);
+
+template <typename T>
+inline T Saturate(const T qmax,
+                  const T qmin,
+                  const T quant_value) {
+  if (quant_value > qmax) {
+    return qmax;
+  } else if (quant_value < qmin) {
+    return qmin;
+  } else {
+    return quant_value;
+  }
+}
+
+Status GetQuantParams(float rmin,
+                      float rmax,
+                      const Qnn_DataType_t qnn_data_type,
+                      float& scale,
+                      int& zero_point);
+
+double Dequantize(int32_t offset, float scale, const double quant_value);
+
+Status Quantize(const double double_value,
+                const float scale,
+                const int zero_point,
+                const Qnn_DataType_t qnn_data_type,
+                int& quant_value);
+
 }  // namespace utils
 }  // namespace qnn
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc
index 7bbfe807da0f2..c7b309ae471c9 100644
--- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc
+++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc
@@ -16,20 +16,12 @@
 #include "core/providers/qnn/builder/qnn_model_wrapper.h"
 #include "core/providers/qnn/builder/op_builder_factory.h"
 #include "core/providers/qnn/builder/qnn_def.h"
+#include "core/providers/qnn/builder/onnx_ctx_model_helper.h"
 
 namespace onnxruntime {
 
 constexpr const char* QNN = "QNN";
 
-std::string GetFileNameFromModelPath(onnxruntime::Path model_path) {
-  auto model_path_components = model_path.GetComponents();
-  // There's no model path if model loaded from buffer stead of file
-  if (model_path_components.empty()) {
-    return "";
-  }
-  return PathToUTF8String(model_path_components.back());
-}
-
 void QNNExecutionProvider::ParseProfilingLevel(std::string profiling_level_string) {
   std::transform(profiling_level_string.begin(),
                  profiling_level_string.end(),
@@ -76,18 +68,55 @@ void QNNExecutionProvider::ParseHtpPerformanceMode(std::string htp_performance_m
   }
 }
 
+void QNNExecutionProvider::ParseQnnContextPriority(std::string context_priority_string) {
+  std::transform(context_priority_string.begin(),
+                 context_priority_string.end(),
+                 context_priority_string.begin(),
+                 [](unsigned char c) { return static_cast<unsigned char>(std::tolower(c)); });
+  LOGS_DEFAULT(VERBOSE) << "QNN context priority: " << context_priority_string;
+  if (context_priority_string == "low") {
+    context_priority_ = qnn::ContextPriority::LOW;
+  } else if (context_priority_string == "normal") {
+    context_priority_ = qnn::ContextPriority::NORMAL;
+  } else if (context_priority_string == "normal_high") {
+    context_priority_ = qnn::ContextPriority::NORMAL_HIGH;
+  } else if (context_priority_string == "high") {
+    context_priority_ = qnn::ContextPriority::HIGH;
+  } else {
+    context_priority_ = qnn::ContextPriority::UNDEFINED;
+    LOGS_DEFAULT(WARNING) << "QNN context priority: " << context_priority_string << " not valid, set to undefined.";
+  }
+}
+
+void QNNExecutionProvider::ParseHtpGraphFinalizationOptimizationMode(const std::string& htp_graph_finalization_opt_mode_string) {
+  LOGS_DEFAULT(VERBOSE) << "HTP graph finalization optimization mode: "
+                        << htp_graph_finalization_opt_mode_string;
+
+  if (htp_graph_finalization_opt_mode_string.empty() || htp_graph_finalization_opt_mode_string == "0") {
+    htp_graph_finalization_opt_mode_ = qnn::HtpGraphFinalizationOptimizationMode::kDefault;
+  } else if (htp_graph_finalization_opt_mode_string == "1") {
+    htp_graph_finalization_opt_mode_ = qnn::HtpGraphFinalizationOptimizationMode::kMode1;
+  } else if (htp_graph_finalization_opt_mode_string == "2") {
+    htp_graph_finalization_opt_mode_ = qnn::HtpGraphFinalizationOptimizationMode::kMode2;
+  } else if (htp_graph_finalization_opt_mode_string == "3") {
+    htp_graph_finalization_opt_mode_ = qnn::HtpGraphFinalizationOptimizationMode::kMode3;
+  } else {
+    LOGS_DEFAULT(WARNING) << "Invalid HTP graph finalization optimization mode: "
+                          << htp_graph_finalization_opt_mode_string;
+  }
+}
+
 QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_options_map,
                                            const SessionOptions* session_options)
-    : IExecutionProvider{onnxruntime::kQnnExecutionProvider, true},
-      runtime_options_(provider_options_map) {
+    : IExecutionProvider{onnxruntime::kQnnExecutionProvider, true} {
   if (session_options) {
     disable_cpu_ep_fallback_ = session_options->config_options.GetConfigOrDefault(
                                    kOrtSessionOptionsDisableCPUEPFallback, "0") == "1";
   }
 
   static const std::string CONTEXT_CACHE_ENABLED = "qnn_context_cache_enable";
-  auto context_cache_enabled_pos = runtime_options_.find(CONTEXT_CACHE_ENABLED);
-  if (context_cache_enabled_pos != runtime_options_.end()) {
+  auto context_cache_enabled_pos = provider_options_map.find(CONTEXT_CACHE_ENABLED);
+  if (context_cache_enabled_pos != provider_options_map.end()) {
     if (context_cache_enabled_pos->second == "1") {
       context_cache_enabled_ = true;
       LOGS_DEFAULT(VERBOSE) << "Context cache enabled.";
@@ -95,46 +124,79 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio
   }
 
   static const std::string CONTEXT_CACHE_PATH = "qnn_context_cache_path";
-  auto context_cache_path_pos = runtime_options_.find(CONTEXT_CACHE_PATH);
-  if (context_cache_path_pos != runtime_options_.end()) {
-    context_cache_path_ = context_cache_path_pos->second;
-    LOGS_DEFAULT(VERBOSE) << "User specified context cache path: " << context_cache_path_;
+  auto context_cache_path_pos = provider_options_map.find(CONTEXT_CACHE_PATH);
+  if (context_cache_path_pos != provider_options_map.end()) {
+    context_cache_path_cfg_ = context_cache_path_pos->second;
+    LOGS_DEFAULT(VERBOSE) << "User specified context cache path: " << context_cache_path_cfg_;
+  }
+
+  static const std::string CONTEXT_CACHE_EMBED_MODE = "qnn_context_embed_mode";
+  auto context_cache_embed_mode_pos = provider_options_map.find(CONTEXT_CACHE_EMBED_MODE);
+  if (context_cache_embed_mode_pos != provider_options_map.end()) {
+    qnn_context_embed_mode_ = context_cache_embed_mode_pos->second == "1";
+    LOGS_DEFAULT(VERBOSE) << "User specified context cache embed mode: " << qnn_context_embed_mode_;
   }
 
   static const std::string BACKEND_PATH = "backend_path";
-  auto backend_path_pos = runtime_options_.find(BACKEND_PATH);
+  auto backend_path_pos = provider_options_map.find(BACKEND_PATH);
 
-  if (backend_path_pos != runtime_options_.end()) {
-    backend_path_ = backend_path_pos->second;
-    LOGS_DEFAULT(VERBOSE) << "Backend path: " << backend_path_;
+  std::string backend_path;
+  if (backend_path_pos != provider_options_map.end()) {
+    backend_path = backend_path_pos->second;
+    LOGS_DEFAULT(VERBOSE) << "Backend path: " << backend_path;
   } else {
     LOGS_DEFAULT(ERROR) << "No backend path provided.";
   }
 
   static const std::string PROFILING_LEVEL = "profiling_level";
-  auto profiling_level_pos = runtime_options_.find(PROFILING_LEVEL);
-  if (profiling_level_pos != runtime_options_.end()) {
+  auto profiling_level_pos = provider_options_map.find(PROFILING_LEVEL);
+  if (profiling_level_pos != provider_options_map.end()) {
     ParseProfilingLevel(profiling_level_pos->second);
   }
 
   static const std::string RPC_CONTROL_LANTENCY = "rpc_control_latency";
-  auto latency_pos = runtime_options_.find(RPC_CONTROL_LANTENCY);
-  if (latency_pos != runtime_options_.end()) {
+  auto latency_pos = provider_options_map.find(RPC_CONTROL_LANTENCY);
+  if (latency_pos != provider_options_map.end()) {
     rpc_control_latency_ = static_cast<uint32_t>(std::stoul(latency_pos->second));
     LOGS_DEFAULT(VERBOSE) << "rpc_control_latency: " << rpc_control_latency_;
   }
 
   htp_performance_mode_ = qnn::HtpPerformanceMode::kHtpDefault;
   static const std::string HTP_PERFORMANCE_MODE = "htp_performance_mode";
-  auto htp_performance_mode_pos = runtime_options_.find(HTP_PERFORMANCE_MODE);
-  if (htp_performance_mode_pos != runtime_options_.end()) {
+  auto htp_performance_mode_pos = provider_options_map.find(HTP_PERFORMANCE_MODE);
+  if (htp_performance_mode_pos != provider_options_map.end()) {
     ParseHtpPerformanceMode(htp_performance_mode_pos->second);
   }
 
-  qnn_backend_manager_ = std::make_unique<qnn::QnnBackendManager>(backend_path_,
-                                                                  profiling_level_,
-                                                                  rpc_control_latency_,
-                                                                  htp_performance_mode_);
+  htp_graph_finalization_opt_mode_ = qnn::HtpGraphFinalizationOptimizationMode::kDefault;
+  static const std::string HTP_GRAPH_FINALIZATION_OPT_MODE = "htp_graph_finalization_optimization_mode";
+  auto htp_graph_finalization_opt_mode_pos = provider_options_map.find(HTP_GRAPH_FINALIZATION_OPT_MODE);
+  if (htp_graph_finalization_opt_mode_pos != provider_options_map.end()) {
+    ParseHtpGraphFinalizationOptimizationMode(htp_graph_finalization_opt_mode_pos->second);
+  }
+
+  // Enable use of QNN Saver if the user provides a path the QNN Saver backend library.
+  static const std::string QNN_SAVER_PATH_KEY = "qnn_saver_path";
+  std::string qnn_saver_path;
+  auto qnn_saver_path_pos = provider_options_map.find(QNN_SAVER_PATH_KEY);
+  if (qnn_saver_path_pos != provider_options_map.end()) {
+    qnn_saver_path = qnn_saver_path_pos->second;
+    LOGS_DEFAULT(VERBOSE) << "User specified QNN Saver path: " << qnn_saver_path;
+  }
+
+  static const std::string QNN_CONTEXT_PRIORITY = "qnn_context_priority";
+  auto qnn_context_priority_pos = provider_options_map.find(QNN_CONTEXT_PRIORITY);
+  if (qnn_context_priority_pos != provider_options_map.end()) {
+    ParseQnnContextPriority(qnn_context_priority_pos->second);
+  }
+
+  qnn_backend_manager_ = std::make_unique<qnn::QnnBackendManager>(
+      std::move(backend_path),
+      profiling_level_,
+      rpc_control_latency_,
+      htp_performance_mode_,
+      context_priority_,
+      std::move(qnn_saver_path));
 }
 
 bool QNNExecutionProvider::IsNodeSupported(qnn::QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit,
@@ -217,22 +279,36 @@ QNNExecutionProvider::GetSupportedNodes(const GraphViewer& graph_viewer,
                                                 initializer_input_lookup,
                                                 qnn_backend_manager_->GetQnnBackendType());
 
-  for (const auto& node : graph_viewer.Nodes()) {
-    const NodeUnit* node_unit = node_unit_map.at(&node);
+  const auto& node_indices = graph_viewer.GetNodesInTopologicalOrder();
+  for (size_t i = 0; i < node_indices.size(); i++) {
+    gsl::not_null<const onnxruntime::Node*> node(graph_viewer.GetNode(node_indices[i]));
+
+    // Get the node_unit associated with the node. Note that the node may not be the node_unit's target node.
+    const NodeUnit* node_unit = node_unit_map.at(node);
+
+    // Visiting 'nodes' in topological order does not guarantee that 'node_units' are
+    // also visited in topological order. Skip this node if it is not the node_unit's target node
+    // to ensure 'node_units' are visited in topological order.
+    if (node != &node_unit->GetNode()) {
+      continue;
+    }
     const bool supported = IsNodeSupported(qnn_model_wrapper,
                                            *node_unit,
                                            node_unit_supported_result,
                                            logger);
     LOGS(logger, VERBOSE) << "Node supported: [" << supported
-                          << "] index: [" << node.Index()
-                          << "] name: [" << node.Name()
-                          << "] Operator type: [" << node.OpType()
+                          << "] index: [" << node->Index()
+                          << "] name: [" << node->Name()
+                          << "] Operator type: [" << node->OpType()
                           << "] as part of the NodeUnit type: [" << node_unit->OpType()
                           << "] index: [" << node_unit->Index()
                           << "] name: [" << node_unit->Name()
                           << "]";
     if (supported) {
-      supported_nodes.insert(&node);
+      // If the node_unit is supported, add all of its nodes to the supported list.
+      for (const auto* node_in_group : node_unit->GetAllNodesInGroup()) {
+        supported_nodes.insert(node_in_group);
+      }
     }
   }
 
@@ -250,10 +326,17 @@ QNNExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_viewer
 
   const auto& logger = *GetLogger();
   bool load_from_cached_context = false;
-  if (context_cache_enabled_) {
-    load_from_cached_context = qnn_backend_manager_->IsContextCacheFileExists(context_cache_path_,
-                                                                              graph_viewer.Description(),
-                                                                              graph_viewer.ModelPath().ToPathString());
+  bool is_qnn_ctx_model = qnn::IsQnnCtxModel(graph_viewer);
+  if (is_qnn_ctx_model) {
+    load_from_cached_context = true;
+  }
+
+  // This is for case: QDQ model + Onnx Qnn context cache model
+  if (context_cache_enabled_ && !is_qnn_ctx_model) {
+    onnxruntime::PathString context_cache_path;
+    load_from_cached_context = qnn::IsContextCacheFileExists(context_cache_path_cfg_,
+                                                             graph_viewer.ModelPath().ToPathString(),
+                                                             context_cache_path);
   }
 
   // Load from cached context will load the QnnSystem lib and skip the Qnn context creation
@@ -263,7 +346,7 @@ QNNExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_viewer
     return result;
   }
 
-  if (context_cache_enabled_ && !IsNpuBackend(qnn_backend_manager_->GetQnnBackendType())) {
+  if ((context_cache_enabled_ || is_qnn_ctx_model) && !IsNpuBackend(qnn_backend_manager_->GetQnnBackendType())) {
     LOGS(logger, ERROR) << "Qnn context cache only works for HTP or DSP backend.";
     return result;
   }
@@ -352,20 +435,6 @@ QNNExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_viewer
   }
 
   const size_t num_of_partitions = result.size();
-
-  if (load_from_cached_context && 1 == num_of_partitions) {
-    rt = qnn_backend_manager_->ValidateWithContextFile(GetFileNameFromModelPath(graph_viewer.ModelPath()),
-                                                       result[0]->sub_graph->GetMetaDef()->name);
-    if (Status::OK() != rt) {
-      LOGS(logger, ERROR) << "QNN failed to validate context cache metadata: " << rt.ErrorMessage();
-      return result;
-    }
-  }
-
-  if (num_of_partitions > 1) {
-    ORT_ENFORCE(!context_cache_enabled_, "Only support single partition for context cache feature.");
-  }
-
   const auto summary_msg = MakeString("Number of partitions supported by QNN EP: ", num_of_partitions,
                                       ", number of nodes in the graph: ", num_nodes_in_graph,
                                       ", number of nodes supported by QNN: ", num_of_supported_nodes);
@@ -410,6 +479,20 @@ Status QNNExecutionProvider::CreateComputeFunc(std::vector<NodeComputeInfo>& nod
   return Status::OK();
 }
 
+void QNNExecutionProvider::InitQnnGraphConfigs(qnn::QnnGraphConfigsBuilder& configs_builder) const {
+  if (qnn_backend_manager_->GetQnnBackendType() == qnn::QnnBackendType::HTP &&
+      htp_graph_finalization_opt_mode_ != qnn::HtpGraphFinalizationOptimizationMode::kDefault) {
+    QnnHtpGraph_CustomConfig_t& htp_graph_opt_config = configs_builder.PushHtpGraphCustomConfig();
+    htp_graph_opt_config.option = QNN_HTP_GRAPH_CONFIG_OPTION_OPTIMIZATION;
+    htp_graph_opt_config.optimizationOption.type = QNN_HTP_GRAPH_OPTIMIZATION_TYPE_FINALIZE_OPTIMIZATION_FLAG;
+    htp_graph_opt_config.optimizationOption.floatValue = static_cast<float>(htp_graph_finalization_opt_mode_);
+
+    QnnGraph_Config_t& graph_opt_config = configs_builder.PushGraphConfig();
+    graph_opt_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM;
+    graph_opt_config.customConfig = &htp_graph_opt_config;
+  }
+}
+
 Status QNNExecutionProvider::CompileFromOrtGraph(const std::vector<FusedNodeAndGraph>& fused_nodes_and_graphs,
                                                  std::vector<NodeComputeInfo>& node_compute_funcs,
                                                  const logging::Logger& logger) {
@@ -420,7 +503,10 @@ Status QNNExecutionProvider::CompileFromOrtGraph(const std::vector<FusedNodeAndG
     std::unique_ptr<qnn::QnnModel> qnn_model = std::make_unique<qnn::QnnModel>(logger,
                                                                                qnn_backend_manager_.get());
 
-    ORT_RETURN_IF_ERROR(qnn_model->ComposeGraph(graph_viewer, fused_node));
+    qnn::QnnGraphConfigsBuilder graph_configs_builder;
+    InitQnnGraphConfigs(graph_configs_builder);
+
+    ORT_RETURN_IF_ERROR(qnn_model->ComposeGraph(graph_viewer, fused_node, graph_configs_builder.GetQnnGraphConfigs()));
     ORT_RETURN_IF_ERROR(qnn_model->FinalizeGraphs());
     ORT_RETURN_IF_ERROR(qnn_model->SetupQnnInputOutput());
 
@@ -435,45 +521,65 @@ Status QNNExecutionProvider::CompileFromOrtGraph(const std::vector<FusedNodeAndG
 Status QNNExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>& fused_nodes_and_graphs,
                                      std::vector<NodeComputeInfo>& node_compute_funcs) {
   const auto& logger = *GetLogger();
+  Node& fused_node = fused_nodes_and_graphs[0].fused_node;
+  const onnxruntime::GraphViewer& graph_viewer(fused_nodes_and_graphs[0].filtered_graph);
+
+  bool is_qnn_ctx_model = false;
+  ORT_RETURN_IF_ERROR(qnn::IsFusedGraphHasCtxNode(fused_nodes_and_graphs, is_qnn_ctx_model));
+
+  onnxruntime::PathString context_cache_path;
+  bool is_ctx_file_exist = qnn::IsContextCacheFileExists(context_cache_path_cfg_,
+                                                         graph_viewer.ModelPath().ToPathString(),
+                                                         context_cache_path);
+  const std::string& model_name = graph_viewer.GetGraph().Name();
+  const std::string& model_description = graph_viewer.GetGraph().Description();
+  const std::string& graph_meta_id = fused_node.Name();
+  if (fused_nodes_and_graphs.size() == 1 && !is_qnn_ctx_model && is_ctx_file_exist) {
+    ORT_RETURN_IF_ERROR(qnn::ValidateWithContextFile(context_cache_path,
+                                                     model_name,
+                                                     model_description,
+                                                     graph_meta_id,
+                                                     logger));
+  }
 
-  if (context_cache_enabled_) {
-    ORT_ENFORCE(fused_nodes_and_graphs.size() == 1, "Only support single partition for context cache feature.");
-    Node& fused_node = fused_nodes_and_graphs[0].fused_node;
-    const onnxruntime::GraphViewer& graph_viewer(fused_nodes_and_graphs[0].filtered_graph);
-    // The dumy_model_description won't be used since IsContextCacheFileExists call cached the result
-    // The graph_viewer.Description here is not same with original model
-    std::string dumy_model_description = "";
-    bool load_from_cached_context = qnn_backend_manager_->IsContextCacheFileExists(context_cache_path_,
-                                                                                   dumy_model_description,
-                                                                                   graph_viewer.ModelPath().ToPathString());
+  if (is_qnn_ctx_model || (context_cache_enabled_ && is_ctx_file_exist)) {
+    ORT_RETURN_IF(fused_nodes_and_graphs.size() != 1, "Only support single partition for context cache feature.");
+    std::unique_ptr<qnn::QnnModel> qnn_model = std::make_unique<qnn::QnnModel>(logger, qnn_backend_manager_.get());
     // Load and execute from cached context if exist
-    if (load_from_cached_context) {
-      std::unique_ptr<qnn::QnnModel> qnn_model = std::make_unique<qnn::QnnModel>(logger,
-                                                                                 qnn_backend_manager_.get());
-      ORT_RETURN_IF_ERROR(qnn_backend_manager_->LoadCachedQnnContext(*(qnn_model.get())));
-      ORT_RETURN_IF_ERROR(qnn_model->SetGraphInputOutputInfo(graph_viewer, fused_node));
-      ORT_RETURN_IF_ERROR(qnn_model->SetupQnnInputOutput());
-
-      // fused node name is QNNExecutionProvider_QNN_[hash_id]_[id]
-      // the name here should be same with context->node_name in compute_info
-      LOGS(logger, VERBOSE) << "fused node name: " << fused_node.Name();
-      qnn_models_.emplace(fused_node.Name(), std::move(qnn_model));
-
-      ORT_RETURN_IF_ERROR(CreateComputeFunc(node_compute_funcs, logger));
-      return Status::OK();
-    } else {
-      // Load and execute from Onnx model if not exit and dump the context
-      ORT_RETURN_IF_ERROR(CompileFromOrtGraph(fused_nodes_and_graphs, node_compute_funcs, logger));
-      // graph_viewer.Name() is generated in GetCapability, e.g QNN_[hash_id]_[id]
-      // dump graph_viewer.Name() as metadata in context cache binary file, so that we can validate it in GetCapability
-      ORT_RETURN_IF_ERROR(qnn_backend_manager_->DumpQnnContext(GetFileNameFromModelPath(graph_viewer.ModelPath()),
-                                                               graph_viewer.Name()));
-    }
+    ORT_RETURN_IF_ERROR(qnn::LoadQnnCtxFromOnnxModel(graph_viewer,
+                                                     context_cache_path,
+                                                     is_qnn_ctx_model,
+                                                     is_ctx_file_exist,
+                                                     qnn_backend_manager_.get(),
+                                                     *(qnn_model.get()),
+                                                     logger));
+    ORT_RETURN_IF_ERROR(qnn_model->SetGraphInputOutputInfo(graph_viewer, fused_node));
+    ORT_RETURN_IF_ERROR(qnn_model->SetupQnnInputOutput());
+
+    // fused node name is QNNExecutionProvider_QNN_[hash_id]_[id]
+    // the name here should be same with context->node_name in compute_info
+    qnn_models_.emplace(graph_meta_id, std::move(qnn_model));
+
+    ORT_RETURN_IF_ERROR(CreateComputeFunc(node_compute_funcs, logger));
     return Status::OK();
   }
 
   ORT_RETURN_IF_ERROR(CompileFromOrtGraph(fused_nodes_and_graphs, node_compute_funcs, logger));
-
+  if (context_cache_enabled_ && !is_qnn_ctx_model) {
+    ORT_RETURN_IF(fused_nodes_and_graphs.size() != 1, "Only support single partition for context cache feature.");
+    uint64_t buffer_size(0);
+    auto context_buffer = qnn_backend_manager_->GetContextBinaryBuffer(buffer_size);
+    ORT_RETURN_IF_ERROR(qnn::GenerateCtxCacheOnnxModel(model_name,
+                                                       model_description,
+                                                       context_buffer.get(),
+                                                       buffer_size,
+                                                       qnn_backend_manager_->GetSdkVersion(),
+                                                       fused_nodes_and_graphs,
+                                                       qnn_models_,
+                                                       context_cache_path,
+                                                       qnn_context_embed_mode_,
+                                                       logger));
+  }
   return Status::OK();
 }
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.h b/onnxruntime/core/providers/qnn/qnn_execution_provider.h
index 2fe507b70a6ab..8c99a916a6f69 100644
--- a/onnxruntime/core/providers/qnn/qnn_execution_provider.h
+++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.h
@@ -8,6 +8,7 @@
 #include <string>
 #include "core/providers/qnn/builder/qnn_backend_manager.h"
 #include "core/providers/qnn/builder/qnn_model.h"
+#include "core/providers/qnn/builder/qnn_graph_configs_helper.h"
 
 namespace onnxruntime {
 
@@ -55,18 +56,24 @@ class QNNExecutionProvider : public IExecutionProvider {
                              const logging::Logger& logger);
 
   void ParseHtpPerformanceMode(std::string htp_performance_mode_string);
+  void ParseQnnContextPriority(std::string context_priority_string);
+
+  void ParseHtpGraphFinalizationOptimizationMode(const std::string& htp_graph_finalization_opt_mode_string);
+
+  void InitQnnGraphConfigs(qnn::QnnGraphConfigsBuilder& configs_holder) const;
 
  private:
-  ProviderOptions runtime_options_;
-  std::string backend_path_;
   qnn::ProfilingLevel profiling_level_ = qnn::ProfilingLevel::OFF;
   qnn::HtpPerformanceMode htp_performance_mode_ = qnn::HtpPerformanceMode::kHtpDefault;
+  qnn::HtpGraphFinalizationOptimizationMode htp_graph_finalization_opt_mode_ = qnn::HtpGraphFinalizationOptimizationMode::kDefault;
   std::unique_ptr<qnn::QnnBackendManager> qnn_backend_manager_;
   std::unordered_map<std::string, std::unique_ptr<qnn::QnnModel>> qnn_models_;
   uint32_t rpc_control_latency_ = 0;
   bool context_cache_enabled_ = false;
-  std::string context_cache_path_ = "";
+  std::string context_cache_path_cfg_ = "";
   bool disable_cpu_ep_fallback_ = false;  // True if CPU EP fallback has been disabled for this session.
+  qnn::ContextPriority context_priority_ = qnn::ContextPriority::NORMAL;
+  bool qnn_context_embed_mode_ = true;
 };
 
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/rocm/cu_inc/common.cuh b/onnxruntime/core/providers/rocm/cu_inc/common.cuh
index 429ceb1f7c699..5f966ac746fcb 100644
--- a/onnxruntime/core/providers/rocm/cu_inc/common.cuh
+++ b/onnxruntime/core/providers/rocm/cu_inc/common.cuh
@@ -2,8 +2,6 @@
 // Licensed under the MIT License.
 
 #pragma once
-#include <type_traits>
-#include <memory>
 #include <stdint.h>
 #include <vector>
 #include <mutex>
@@ -294,6 +292,14 @@ __device__ __inline__ T _Gelu(T a) {
   return a * _Normcdf(a);
 }
 
+template <>
+__device__ __inline__ half _Gelu(half a) {
+  const half kHalf = half(0.5);
+  const half kOne = half(1.0);
+  const half kAlpha = half(M_SQRT1_2);
+  return a * kHalf * (kOne + _Erf(kAlpha * a));
+}
+
 template <typename T>
 __device__ __inline__ T _Mod(T a, T b) {
   T r = a % b;
@@ -348,21 +354,19 @@ struct GridDim {
   };
 };
 
-// aligned vector generates vectorized load/store
+// aligned vector generates vectorized load/store on ROCM
 template <typename T, int vec_size>
 struct alignas(sizeof(T) * vec_size) aligned_vector {
   T val[vec_size];
 };
 
-#define CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id, N)     \
+#define CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id, N)      \
   HIP_LONG id = blockDim.x * blockIdx.x + threadIdx.x; \
-  if (id >= N)                                         \
+  if (id >= N)                                          \
     return;
 
 // HIP_KERNEL_ASSERT is a macro that wraps an assert() call inside rocm kernels.
-// TODO ROCM added support recently, should verify.
-#define HIP_KERNEL_ASSERT(...)
-// #define HIP_KERNEL_ASSERT(...) assert(__VA_ARGS__)
+#define HIP_KERNEL_ASSERT(...) assert(__VA_ARGS__)
 
 // WARP related definitions and functions
 constexpr int GPU_WARP_SIZE = warpSize;
diff --git a/onnxruntime/core/providers/rocm/fpgeneric.cu b/onnxruntime/core/providers/rocm/fpgeneric.cu
index 4df7e0b5a5e3b..d130758bec084 100644
--- a/onnxruntime/core/providers/rocm/fpgeneric.cu
+++ b/onnxruntime/core/providers/rocm/fpgeneric.cu
@@ -68,7 +68,7 @@ rocblas_status rocblasTransposeHelper(hipStream_t stream, rocblas_handle, rocbla
 rocblas_status rocblasCopyHelper(hipStream_t stream, rocblas_handle, int n, const half* x, int incx, half* y, int incy) {
   dim3 dimGrid((unsigned int)(n + COPY_BLOCK_DIM - 1) / COPY_BLOCK_DIM, 1, 1);
   dim3 dimBlock(COPY_BLOCK_DIM, 1, 1);
-  CopyVectorHalf<<<dim3(dimGrid), dim3(dimBlock), 0, stream>>>(x, incx, y, incy, n);
+  CopyVectorHalf<<<dimGrid, dimBlock, 0, stream>>>(x, incx, y, incy, n);
   return rocblas_status_success;
 }
 
@@ -76,6 +76,6 @@ rocblas_status rocblasCopyHelper(hipStream_t stream, rocblas_handle, int n, cons
                                 onnxruntime::BFloat16* y, int incy) {
   dim3 dimGrid((unsigned int)(n + COPY_BLOCK_DIM - 1) / COPY_BLOCK_DIM, 1, 1);
   dim3 dimBlock(COPY_BLOCK_DIM, 1, 1);
-  CopyVectorBFloat16<<<dim3(dimGrid), dim3(dimBlock), 0, stream>>>(x, incx, y, incy, n);
+  CopyVectorBFloat16<<<dimGrid, dimBlock, 0, stream>>>(x, incx, y, incy, n);
   return rocblas_status_success;
 }
diff --git a/onnxruntime/core/providers/rocm/gpu_data_transfer.cc b/onnxruntime/core/providers/rocm/gpu_data_transfer.cc
index fd45ad675ac3e..635a25480b646 100644
--- a/onnxruntime/core/providers/rocm/gpu_data_transfer.cc
+++ b/onnxruntime/core/providers/rocm/gpu_data_transfer.cc
@@ -2,14 +2,15 @@
 // Licensed under the MIT License.
 
 #include "core/providers/shared_library/provider_api.h"
-#include "core/providers/rocm/rocm_common.h"
+
 #include "core/providers/rocm/gpu_data_transfer.h"
+#include "core/providers/rocm/rocm_common.h"
 
-// use default stream for copy for now, to avoid racing in BFC arena as in issue #4829
-// note this may cause some models to run slower if there are ops running on CPU
-// so we leave it as optional, in case user need the previous behavior
-// a full fix to BFC arena is being looked at, and once it's in, we can revert this change
 namespace onnxruntime {
+GPUDataTransfer::GPUDataTransfer() {}
+
+GPUDataTransfer::~GPUDataTransfer() {}
+
 bool GPUDataTransfer::CanCopy(const OrtDevice& src_device, const OrtDevice& dst_device) const {
   return src_device.Type() == OrtDevice::GPU || src_device.MemType() == OrtDevice::MemType::HIP_PINNED ||
          dst_device.Type() == OrtDevice::GPU || dst_device.MemType() == OrtDevice::MemType::HIP_PINNED;
@@ -34,12 +35,12 @@ common::Status GPUDataTransfer::CopyTensor(const Tensor& src, Tensor& dst) const
     } else {
       // copy from other CPU memory to GPU, this is blocking
       HIP_RETURN_IF_ERROR(hipMemcpy(dst_data, src_data, bytes, hipMemcpyHostToDevice));
-      HIP_RETURN_IF_ERROR(hipStreamSynchronize(nullptr));  // TODO: still need stream sync? since already blocking
+      HIP_RETURN_IF_ERROR(hipStreamSynchronize(nullptr));
     }
   } else if (src_device.Type() == OrtDevice::GPU) {
     // copying from GPU to CPU memory, this is blocking
     HIP_RETURN_IF_ERROR(hipMemcpy(dst_data, src_data, bytes, hipMemcpyDeviceToHost));
-    HIP_RETURN_IF_ERROR(hipStreamSynchronize(nullptr));  // TODO: still need stream sync? since already blocking
+    HIP_RETURN_IF_ERROR(hipStreamSynchronize(nullptr));
   } else {
     // copying between cpu memory
     memcpy(dst_data, src_data, bytes);
@@ -57,34 +58,29 @@ common::Status GPUDataTransfer::CopyTensorAsync(const Tensor& src, Tensor& dst,
   auto& dst_device = dst.Location().device;
 
   if (dst_device.Type() == OrtDevice::GPU) {
-    if (src_device.Type() == OrtDevice::CPU && src_device.MemType() == OrtDevice::MemType::HIP_PINNED) {
+    if (src_device.Type() == OrtDevice::CPU) {
       // copy from pinned memory to GPU, this is non-blocking
       HIP_RETURN_IF_ERROR(hipMemcpyAsync(dst_data, src_data, bytes, hipMemcpyHostToDevice, static_cast<hipStream_t>(stream.GetHandle())));
     } else if (src_device.Type() == OrtDevice::GPU) {
       // copying between GPU, this is non-blocking
-      // Copy only if the two addresses are different.
       if (dst_data != src_data) {
         HIP_RETURN_IF_ERROR(hipMemcpyAsync(dst_data, src_data, bytes, hipMemcpyDeviceToDevice, static_cast<hipStream_t>(stream.GetHandle())));
       }
-    } else {
-      // copy from other CPU memory to GPU, this is blocking
-      HIP_RETURN_IF_ERROR(hipMemcpyAsync(dst_data, src_data, bytes, hipMemcpyHostToDevice, static_cast<hipStream_t>(stream.GetHandle())));
-      HIP_RETURN_IF_ERROR(hipStreamSynchronize(static_cast<hipStream_t>(stream.GetHandle())));
     }
   } else if (src_device.Type() == OrtDevice::GPU) {
-    if (dst_device.Type() == OrtDevice::CPU && dst_device.MemType() == OrtDevice::MemType::HIP_PINNED) {
+    if (dst_device.Type() == OrtDevice::CPU) {
       // copying from GPU to pinned memory, this is non-blocking
       HIP_RETURN_IF_ERROR(hipMemcpyAsync(dst_data, src_data, bytes, hipMemcpyDeviceToHost, static_cast<hipStream_t>(stream.GetHandle())));
-    } else {
-      // copying from GPU to CPU memory, this is blocking
-      HIP_RETURN_IF_ERROR(hipMemcpyAsync(dst_data, src_data, bytes, hipMemcpyDeviceToHost, static_cast<hipStream_t>(stream.GetHandle())));
-      HIP_RETURN_IF_ERROR(hipStreamSynchronize(static_cast<hipStream_t>(stream.GetHandle())));
     }
   } else {
-    // copying between cpu memory
+    if (src_device.MemType() == OrtDevice::MemType::CUDA_PINNED) {
+      // sync the stream first to make sure the data arrived
+      HIP_RETURN_IF_ERROR(hipStreamSynchronize(static_cast<hipStream_t>(stream.GetHandle())));
+    }
     memcpy(dst_data, src_data, bytes);
   }
 
   return Status::OK();
 }
+
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/rocm/gpu_data_transfer.h b/onnxruntime/core/providers/rocm/gpu_data_transfer.h
index 3d35ed52fff5c..3d297bdce4a93 100644
--- a/onnxruntime/core/providers/rocm/gpu_data_transfer.h
+++ b/onnxruntime/core/providers/rocm/gpu_data_transfer.h
@@ -10,8 +10,8 @@ namespace onnxruntime {
 
 class GPUDataTransfer : public IDataTransfer {
  public:
-  GPUDataTransfer() = default;
-  ~GPUDataTransfer() = default;
+  GPUDataTransfer();
+  ~GPUDataTransfer();
 
   bool CanCopy(const OrtDevice& src_device, const OrtDevice& dst_device) const override;
 
diff --git a/onnxruntime/core/providers/rocm/integer_gemm.cc b/onnxruntime/core/providers/rocm/integer_gemm.cc
index 3c82a436d74e0..9771f42fd3637 100644
--- a/onnxruntime/core/providers/rocm/integer_gemm.cc
+++ b/onnxruntime/core/providers/rocm/integer_gemm.cc
@@ -5,13 +5,14 @@
 #include <rocblas/rocblas.h>
 #include "core/providers/rocm/shared_inc/integer_gemm.h"
 
+#include "core/common/safeint.h"
 #include "core/providers/rocm/rocm_common.h"
 #include "core/providers/rocm/shared_inc/rocm_call.h"
 
 namespace onnxruntime {
 namespace rocm {
 
-inline int roundoff(int v, int d) {
+constexpr int roundoff(int v, int d) {
   return (v + d - 1) / d * d;
 }
 
@@ -21,20 +22,21 @@ Status GemmInt8(int m, int n, int k,
                 const RocmKernel* rocm_kernel, onnxruntime::Stream* ort_stream) {
   ORT_ENFORCE(a != nullptr && b != nullptr && c != nullptr, "input matrix should not be null");
   ORT_ENFORCE(rocm_kernel != nullptr, "kernel is null");
+  ORT_ENFORCE(ort_stream != nullptr, "Rocm kernel must have the stream instance");
 
-  hipStream_t stream = ort_stream ? static_cast<hipStream_t>(ort_stream->GetHandle()) : nullptr;
+  hipStream_t stream = static_cast<hipStream_t>(ort_stream->GetHandle());
 
   // pad A and B to make their leading dimension be multiples of 32
-  // because cublasGemmEx requires:
+  // because rocblas_gemm_ex requires:
   // 1. leading dimension is multiples of 4
   // 2. A, B is 32-bit aligned
 
-  const int mask = 0x1F;
+  constexpr int mask = 0x1F;
   int lda_aligned = lda;
   IAllocatorUniquePtr<int8_t> a_padded;
   if ((mask & lda_aligned) != 0) {
     lda_aligned = roundoff(lda, 32);
-    a_padded = rocm_kernel->GetScratchBuffer<int8_t>(m * lda_aligned, ort_stream);
+    a_padded = rocm_kernel->GetScratchBuffer<int8_t>(SafeInt<size_t>(m) * lda_aligned, ort_stream);
     HIP_RETURN_IF_ERROR(hipMemcpy2DAsync(a_padded.get(), lda_aligned, a, lda, k, m, hipMemcpyDeviceToDevice, stream));
   }
 
@@ -42,14 +44,15 @@ Status GemmInt8(int m, int n, int k,
   IAllocatorUniquePtr<int8_t> b_padded;
   if ((mask & ldb_aligned) != 0) {
     ldb_aligned = roundoff(ldb, 32);
-    b_padded = rocm_kernel->GetScratchBuffer<int8_t>(k * ldb_aligned, ort_stream);
+    b_padded = rocm_kernel->GetScratchBuffer<int8_t>(SafeInt<size_t>(k) * ldb_aligned, ort_stream);
     HIP_RETURN_IF_ERROR(hipMemcpy2DAsync(b_padded.get(), ldb_aligned, b, ldb, n, k, hipMemcpyDeviceToDevice, stream));
   }
 
-  RocmStream* ort_rocm_stream = static_cast<RocmStream*>(ort_stream);
-  auto handle = ort_rocm_stream->rocblas_handle_;
+  auto* ort_rocm_stream = dynamic_cast<RocmStream*>(ort_stream);
+  auto rocblas = ort_rocm_stream->rocblas_handle_;
+
   ROCBLAS_RETURN_IF_ERROR(rocblas_gemm_ex(
-      handle,
+      rocblas,
       rocblas_operation_none, rocblas_operation_none,
       n, m, k,
       &alpha,
diff --git a/onnxruntime/core/providers/rocm/math/einsum.h b/onnxruntime/core/providers/rocm/math/einsum.h
index a4adc3da98436..6be412348e6dd 100644
--- a/onnxruntime/core/providers/rocm/math/einsum.h
+++ b/onnxruntime/core/providers/rocm/math/einsum.h
@@ -17,8 +17,7 @@ class Einsum final : public onnxruntime::Einsum {
   Einsum(const OpKernelInfo& info) : onnxruntime::Einsum(info) {
     // We need to cast away the const as PerThreadRocblasHandle() is currently a non-const method
     // TODO: Clean up the ROCMExecutionProvider interface to avoid this
-    rocm_ep_ = const_cast<ROCMExecutionProvider*>(
-        static_cast<const ROCMExecutionProvider*>(info.GetExecutionProvider()));
+    rocm_ep_ = static_cast<const ROCMExecutionProvider*>(info.GetExecutionProvider());
   }
 
   Status Compute(OpKernelContext* context) const override;
@@ -32,7 +31,7 @@ class Einsum final : public onnxruntime::Einsum {
   using onnxruntime::Einsum::equation_;
 
   // We need to access to the ROCM EP instance to get the rocblas/miopen handles
-  ROCMExecutionProvider* rocm_ep_;
+  const ROCMExecutionProvider* rocm_ep_;
 };
 
 }  // namespace rocm
diff --git a/onnxruntime/core/providers/rocm/math/einsum_utils/einsum_auxiliary_ops.h b/onnxruntime/core/providers/rocm/math/einsum_utils/einsum_auxiliary_ops.h
index 623bb1d590a27..e1fc3f40ee9a5 100644
--- a/onnxruntime/core/providers/rocm/math/einsum_utils/einsum_auxiliary_ops.h
+++ b/onnxruntime/core/providers/rocm/math/einsum_utils/einsum_auxiliary_ops.h
@@ -21,19 +21,18 @@ namespace EinsumOp {
 // Holds ROCM assets required for ROCM ops that need to be executed as part of the Einsum flow
 struct EinsumRocmAssets {
   explicit EinsumRocmAssets(rocblas_handle rocblas_handle,
-                            ROCMExecutionProvider* rocm_ep,
-                            Stream* ort_stream,
-                            AllocatorPtr gpu_allocator) : rocblas_handle_(rocblas_handle),
-                                                          rocm_ep_(rocm_ep),
-                                                          ort_stream_(ort_stream),
-                                                          gpu_allocator_(gpu_allocator) {}
+                            const ROCMExecutionProvider* rocm_ep,
+                            Stream* ort_stream, AllocatorPtr gpu_allocator) : rocblas_handle_(rocblas_handle),
+                                                                              rocm_ep_(rocm_ep),
+                                                                              ort_stream_(ort_stream),
+                                                                              gpu_allocator_(gpu_allocator) {}
 
   hipStream_t GetRocmStream() {
     return ort_stream_ ? static_cast<hipStream_t>(ort_stream_->GetHandle()) : nullptr;
   }
 
   rocblas_handle rocblas_handle_;
-  ROCMExecutionProvider* rocm_ep_;
+  const ROCMExecutionProvider* rocm_ep_;
   Stream* ort_stream_;
   AllocatorPtr gpu_allocator_;
 };
diff --git a/onnxruntime/core/providers/rocm/math/softmax.cc b/onnxruntime/core/providers/rocm/math/softmax.cc
index 5a07737d92a02..8d922d0bb4db1 100644
--- a/onnxruntime/core/providers/rocm/math/softmax.cc
+++ b/onnxruntime/core/providers/rocm/math/softmax.cc
@@ -29,20 +29,23 @@ Status SoftMaxComputeHelper(
   auto X_data = reinterpret_cast<const HipT_IN*>(X);
 
   if (D <= 1024 && D * sizeof(T) <= 4096) {
-    return dispatch_warpwise_softmax_forward<HipT_IN, HipT_OUT, AccumulationType_t<HipT_ACCUM>, IsLogSoftmax>(
-        stream, Y_data, X_data, gsl::narrow_cast<int>(D),
-        gsl::narrow_cast<int>(D), gsl::narrow_cast<int>(N), tuning_ctx);
+    return dispatch_warpwise_softmax_forward<
+        HipT_IN, HipT_OUT, AccumulationType_t<HipT_ACCUM>, IsLogSoftmax>(
+        stream, Y_data, X_data, gsl::narrow_cast<int>(D), gsl::narrow_cast<int>(D), gsl::narrow_cast<int>(N), tuning_ctx);
   }
+
   return dispatch_blockwise_softmax_forward<HipT_IN, HipT_OUT, AccumulationType_t<HipT_ACCUM>, IsLogSoftmax>(
-      stream, Y_data, X_data, gsl::narrow_cast<int>(D), gsl::narrow_cast<int>(D),
-      gsl::narrow_cast<int>(D), gsl::narrow_cast<int>(N), tuning_ctx);
+      stream, Y_data, X_data, gsl::narrow_cast<int>(D), gsl::narrow_cast<int>(D), gsl::narrow_cast<int>(D),
+      gsl::narrow_cast<int>(N), tuning_ctx);
 }
 
-#define SPECIALIZED_SOFTMAX_HELPER_IMPL(T, TOut)                                                                           \
-  template Status SoftMaxComputeHelper<T, TOut, false>(Stream * stream, const T* input, const TensorShape& shape, TOut* Y, \
-                                                       int64_t axis, RocmTuningContext* tuning_ctx);                       \
-  template Status SoftMaxComputeHelper<T, TOut, true>(Stream * stream, const T* input, const TensorShape& shape, TOut* Y,  \
-                                                      int64_t axis, RocmTuningContext* tuning_ctx);
+#define SPECIALIZED_SOFTMAX_HELPER_IMPL(T, TOut)                                                        \
+  template Status SoftMaxComputeHelper<T, TOut, false>(Stream * stream, const T* input,                 \
+                                                       const TensorShape& shape, TOut* Y, int64_t axis, \
+                                                       RocmTuningContext* tuning_ctx);                  \
+  template Status SoftMaxComputeHelper<T, TOut, true>(Stream * stream, const T* input,                  \
+                                                      const TensorShape& shape, TOut* Y, int64_t axis,  \
+                                                      RocmTuningContext* tuning_ctx);
 
 SPECIALIZED_SOFTMAX_HELPER_IMPL(MLFloat16, float)
 SPECIALIZED_SOFTMAX_HELPER_IMPL(float, float)
diff --git a/onnxruntime/core/providers/rocm/math/softmax_ck.cuh b/onnxruntime/core/providers/rocm/math/softmax_ck.cuh
index 5830c9dd0bf27..f87b436d04a17 100644
--- a/onnxruntime/core/providers/rocm/math/softmax_ck.cuh
+++ b/onnxruntime/core/providers/rocm/math/softmax_ck.cuh
@@ -58,7 +58,7 @@ auto GetCKSoftmaxTypeStringAndOps() {
       auto arg = impl->MakeArgumentPointer(in_lengths, in_strides, reduce_dims, alpha, beta,
                                            params->input, params->output, nop, nop);
       TUNABLE_OP_RETURN_UNSUPPORTED_ARGUMENT_IF(!impl->IsSupportedArgument(arg.get()),
-                                                impl->GetTypeString(), " does not support ", params->Signature());
+                                                impl->GetTypeString(), " does not support the params");
       invoker->Run(arg.get(), StreamConfig{params->StreamHandle()});
       return Status::OK();
     };
diff --git a/onnxruntime/core/providers/rocm/math/softmax_triton.cuh b/onnxruntime/core/providers/rocm/math/softmax_triton.cuh
index 737e396855e35..cc0e0d70056cc 100644
--- a/onnxruntime/core/providers/rocm/math/softmax_triton.cuh
+++ b/onnxruntime/core/providers/rocm/math/softmax_triton.cuh
@@ -60,7 +60,7 @@ auto GetSoftmaxTritonOps() {
       } args = {(void*)params->output, (const void*)params->input, params->input_stride, params->output_stride, params->softmax_elements};
 
       // grid dim is (batch_count, 1, 1)
-      return LaunchTritonKernel(params->stream, i, params->batch_count, 1, 1, &args, sizeof(args));
+      return LaunchTritonKernel(params->StreamHandle(), i, params->batch_count, 1, 1, &args, sizeof(args));
     };
     ret.emplace_back(std::make_pair(metadata->name, std::move(impl)));
   }
diff --git a/onnxruntime/core/providers/rocm/miopen_common.cc b/onnxruntime/core/providers/rocm/miopen_common.cc
index e77bbab2d5750..6b01f02ae49b5 100644
--- a/onnxruntime/core/providers/rocm/miopen_common.cc
+++ b/onnxruntime/core/providers/rocm/miopen_common.cc
@@ -73,7 +73,12 @@ Status MiopenTensor::CreateTensorIfNeeded() {
   return Status::OK();
 }
 
-Status MiopenTensor::Set(gsl::span<const int64_t> input_dims, miopenDataType_t dataType) {
+Status MiopenTensor::Set(gsl::span<const int64_t> input_dims, miopenDataType_t dataType, bool is_nhwc) {
+  if (is_nhwc) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, NOT_IMPLEMENTED,
+                           "NHWC Tensor usage is not supported in AMD builds for now");
+  }
+
   ORT_RETURN_IF_ERROR(CreateTensorIfNeeded());
 
   int rank = gsl::narrow_cast<int>(input_dims.size());
diff --git a/onnxruntime/core/providers/rocm/miopen_common.h b/onnxruntime/core/providers/rocm/miopen_common.h
index 7d9cabb0951c6..eb4eb745b3692 100644
--- a/onnxruntime/core/providers/rocm/miopen_common.h
+++ b/onnxruntime/core/providers/rocm/miopen_common.h
@@ -33,7 +33,7 @@ class MiopenTensor final {
   ~MiopenTensor();
   ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(MiopenTensor);
 
-  Status Set(gsl::span<const int64_t> input_dims, miopenDataType_t dataType);
+  Status Set(gsl::span<const int64_t> input_dims, miopenDataType_t dataType, bool is_nhwc = false);
   Status Set(miopenDataType_t dataType, miopenTensorLayout_t tensor_layout, int n, int c, int h, int w);
   Status Set(const MiopenTensor& x_desc, miopenBatchNormMode_t mode);
 
diff --git a/onnxruntime/core/providers/rocm/nn/conv.cc b/onnxruntime/core/providers/rocm/nn/conv.cc
index 6846813c7cb48..6214ec7bc0ea3 100644
--- a/onnxruntime/core/providers/rocm/nn/conv.cc
+++ b/onnxruntime/core/providers/rocm/nn/conv.cc
@@ -44,14 +44,13 @@ const miopenConvFwdAlgorithm_t Conv<T, NHWC>::kAllAlgos[] = {
     miopenConvolutionFwdAlgoWinograd,
     miopenConvolutionFwdAlgoImplicitGEMM};
 
-miopenStatus_t GetWorkspaceSize(miopenHandle_t handle, const MiopenConvState<miopenConvAlgoPerf_t>& s,
-                                miopenConvFwdAlgorithm_t algo, size_t* sz) {
+miopenStatus_t GetWorkspaceSize(miopenHandle_t handle, const MiopenConvState<miopenConvAlgoPerf_t>& s, miopenConvFwdAlgorithm_t algo, size_t* sz) {
   return miopenConvolutionForwardGetWorkSpaceSize(handle, s.w_desc, s.x_tensor, s.conv_desc, s.y_tensor, sz);
 }
 
 size_t GetMaxWorkspaceSize(miopenHandle_t handle, const MiopenConvState<miopenConvAlgoPerf_t>& s,
                            const miopenConvFwdAlgorithm_t* algo, int n_algo) {
-  // TODO: get maximum available size from memory arean
+  // TODO: get maximum available size from memory arena
   size_t free, total;
   HIP_CALL_THROW(hipMemGetInfo(&free, &total));
   // Assuming 10% of fragmentation
@@ -68,8 +67,7 @@ size_t GetMaxWorkspaceSize(miopenHandle_t handle, const MiopenConvState<miopenCo
 }
 
 Status SliceOutUnwantedOutputSection(hipStream_t stream,
-                                     const void* input_data,
-                                     const gsl::span<const int64_t>& input_dims,
+                                     const void* input_data, gsl::span<const int64_t> input_dims,
                                      void* output_data,
                                      const gsl::span<const int64_t>& output_dims,
                                      const gsl::span<const int64_t>& starts,
@@ -103,8 +101,7 @@ Status Conv<T, NHWC>::UpdateState(OpKernelContext* context, bool bias_expected)
   // Make sure input and weight are 4D for NHWC since we set 4D descriptor for NHWC.
   constexpr bool channels_last = NHWC;
   if (channels_last && (x_shape.NumDimensions() != 4 || w_shape.NumDimensions() != 4)) {
-    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
-                           "Number of dimensions of X and W should be 4 for channels_last format (NHWC)");
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Number of dimensions of X and W should be 4 for channels_last format (NHWC)");
   }
 
   // set B
@@ -140,7 +137,7 @@ Status Conv<T, NHWC>::UpdateState(OpKernelContext* context, bool bias_expected)
 
     const size_t kernel_rank = kernel_shape.size();
 
-    ConvAttributes::ConvPadVector pads(conv_attrs_.pads);
+    ConvPadVector pads(conv_attrs_.pads);
     if (pads.empty()) {
       pads.resize(kernel_rank * 2, 0);
     }
@@ -174,7 +171,7 @@ Status Conv<T, NHWC>::UpdateState(OpKernelContext* context, bool bias_expected)
     TensorShapeVector slice_axes;
     slice_axes.reserve(kernel_rank);
 
-    const size_t spatial_dim_start = channels_last ? 1 : 2;
+    constexpr size_t spatial_dim_start = channels_last ? 1 : 2;
     const size_t spatial_dim_end = spatial_dim_start + kernel_rank;
     TensorShape spatial_shape = X->Shape().Slice(spatial_dim_start, spatial_dim_end);
 
@@ -183,7 +180,6 @@ Status Conv<T, NHWC>::UpdateState(OpKernelContext* context, bool bias_expected)
                                                                      strides, dilations, pads, y_dims, y_dims_with_adjusted_pads,
                                                                      post_slicing_required, slice_starts, slice_ends, slice_axes,
                                                                      channels_last));
-
     if (channels_last) {
       y_dims.push_back(M);
       y_dims_with_adjusted_pads.push_back(M);
@@ -198,9 +194,6 @@ Status Conv<T, NHWC>::UpdateState(OpKernelContext* context, bool bias_expected)
     s_.slice_axes = slice_axes;
 
     s_.Y = context->Output(0, TensorShape(s_.y_dims));
-    if (s_.Y->Shape().Size() == 0) {
-      return Status::OK();
-    }
     if (post_slicing_required) {
       // Post slicing needed. Create and fill in the Conv results in an intermediate buffer.
       s_.memory_for_miopen_conv_results = GetScratchBuffer<void>(TensorShape(y_dims_with_adjusted_pads).Size() * s_.element_size, context->GetComputeStream());
@@ -225,18 +218,23 @@ Status Conv<T, NHWC>::UpdateState(OpKernelContext* context, bool bias_expected)
     }
 
     if (w_dims_changed) {
-      if (channels_last) {
+      if (!channels_last) {
+        ORT_RETURN_IF_ERROR(s_.w_desc.Set(w_dims, MiopenTensor::GetDataType<HipT>()));
+      } else {
         ORT_RETURN_IF_ERROR(s_.w_desc.Set(MiopenTensor::GetDataType<HipT>(),
                                           miopenTensorNHWC,
                                           w_dims[0],
                                           w_dims[3],
                                           w_dims[1],
                                           w_dims[2]));
-      } else {
-        ORT_RETURN_IF_ERROR(s_.w_desc.Set(w_dims, MiopenTensor::GetDataType<HipT>()));
       }
     }
 
+    // We must delay returning early until here so that the weight dims have been cached properly
+    if (s_.Y->Shape().Size() == 0) {
+      return Status::OK();
+    }
+
     if (channels_last) {
       ORT_RETURN_IF_ERROR(s_.x_tensor.Set(MiopenTensor::GetDataType<HipT>(),
                                           miopenTensorNHWC,
@@ -357,7 +355,7 @@ Status Conv<T, NHWC>::ComputeInternal(OpKernelContext* context) const {
   // To deal with asymmetric padding, we may have over-padded on one or both sides of the spatial dimensions
   // This may have lead to extra results that are unnecessary and hence we slice that off here
   if (s_.post_slicing_required) {
-    ORT_RETURN_IF_ERROR(SliceOutUnwantedOutputSection(Stream(context), s_.y_data, s_.y_dims_with_adjusted_pads,
+    ORT_RETURN_IF_ERROR(SliceOutUnwantedOutputSection(Stream(context), s_.y_data, gsl::make_span(s_.y_dims_with_adjusted_pads),
                                                       s_.Y->MutableDataRaw(), s_.y_dims.GetDims(), s_.slice_starts,
                                                       s_.slice_ends, s_.slice_axes, s_.element_size));
   }
@@ -384,18 +382,18 @@ MiopenConvolutionDescriptor::~MiopenConvolutionDescriptor() {
 
 Status MiopenConvolutionDescriptor::Set(
     size_t rank,
-    gsl::span<const int64_t> pads,
-    gsl::span<const int64_t> strides,
-    gsl::span<const int64_t> dilations,
+    const gsl::span<const int64_t>& pads,
+    const gsl::span<const int64_t>& strides,
+    const gsl::span<const int64_t>& dilations,
     int groups,
     miopenConvolutionMode_t mode,
     miopenDataType_t data_type) {
   if (!desc_)
     MIOPEN_RETURN_IF_ERROR(miopenCreateConvolutionDescriptor(&desc_));
 
-  InlinedVector<int> pad_dims(rank);
-  InlinedVector<int> stride_dims(rank);
-  InlinedVector<int> dilation_dims(rank);
+  InlinedVector<int, kTensorShapeSmallBufferElementsSize> pad_dims(rank);
+  InlinedVector<int, kTensorShapeSmallBufferElementsSize> stride_dims(rank);
+  InlinedVector<int, kTensorShapeSmallBufferElementsSize> dilation_dims(rank);
   for (size_t i = 0; i < rank; i++) {
     pad_dims[i] = gsl::narrow_cast<int>(pads[i]);
     stride_dims[i] = gsl::narrow_cast<int>(strides[i]);
diff --git a/onnxruntime/core/providers/rocm/nn/conv.h b/onnxruntime/core/providers/rocm/nn/conv.h
index f4f2331e9197e..bc9846203e57d 100644
--- a/onnxruntime/core/providers/rocm/nn/conv.h
+++ b/onnxruntime/core/providers/rocm/nn/conv.h
@@ -10,6 +10,9 @@
 #include <list>
 
 namespace onnxruntime {
+
+using ConvPadVector = ConvAttributes::ConvPadVector;
+
 namespace rocm {
 
 class MiopenConvolutionDescriptor final {
@@ -18,9 +21,9 @@ class MiopenConvolutionDescriptor final {
   ~MiopenConvolutionDescriptor();
 
   Status Set(size_t rank,
-             gsl::span<const int64_t> pads,
-             gsl::span<const int64_t> strides,
-             gsl::span<const int64_t> dilations,
+             const gsl::span<const int64_t>& pads,
+             const gsl::span<const int64_t>& strides,
+             const gsl::span<const int64_t>& dilations,
              int groups,
              miopenConvolutionMode_t mode,
              miopenDataType_t data_type);
@@ -198,7 +201,7 @@ class Conv : public RocmKernel {
 
 Status SliceOutUnwantedOutputSection(hipStream_t stream,
                                      const void* input_data,
-                                     const gsl::span<const int64_t>& input_dims,
+                                     gsl::span<const int64_t> input_dims,
                                      void* output_data,
                                      const gsl::span<const int64_t>& output_dims,
                                      const gsl::span<const int64_t>& starts,
diff --git a/onnxruntime/core/providers/rocm/nn/conv_transpose.cc b/onnxruntime/core/providers/rocm/nn/conv_transpose.cc
index 475d26d2e306d..7447113fdf847 100644
--- a/onnxruntime/core/providers/rocm/nn/conv_transpose.cc
+++ b/onnxruntime/core/providers/rocm/nn/conv_transpose.cc
@@ -16,7 +16,7 @@ namespace rocm {
       T,                                                                                   \
       kRocmExecutionProvider,                                                              \
       (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
-      ConvTranspose<T>);                                                                   \
+      ConvTranspose<T, false>);                                                            \
   ONNX_OPERATOR_TYPED_KERNEL_EX(                                                           \
       ConvTranspose,                                                                       \
       kOnnxDomain,                                                                         \
@@ -24,20 +24,20 @@ namespace rocm {
       T,                                                                                   \
       kRocmExecutionProvider,                                                              \
       (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
-      ConvTranspose<T>);
+      ConvTranspose<T, false>);
 
 REGISTER_KERNEL_TYPED(float)
 // not yet supported in MIOpen
 // REGISTER_KERNEL_TYPED(double)
 REGISTER_KERNEL_TYPED(MLFloat16)
 
-template <typename T>
-Status ConvTranspose<T>::ComputeInternal(OpKernelContext* context) const {
+template <typename T, bool NHWC>
+Status ConvTranspose<T, NHWC>::ComputeInternal(OpKernelContext* context) const {
   return DoConvTranspose(context, false);
 }
 
-template <typename T>
-Status ConvTranspose<T>::DoConvTranspose(OpKernelContext* context, bool dynamic_padding) const {
+template <typename T, bool NHWC>
+Status ConvTranspose<T, NHWC>::DoConvTranspose(OpKernelContext* context, bool dynamic_padding) const {
   typedef typename ToHipType<T>::MappedType HipT;
 
   const Tensor* X = context->Input<Tensor>(0);
diff --git a/onnxruntime/core/providers/rocm/nn/conv_transpose.h b/onnxruntime/core/providers/rocm/nn/conv_transpose.h
index b4b80aeec9421..55a84cc59fe92 100644
--- a/onnxruntime/core/providers/rocm/nn/conv_transpose.h
+++ b/onnxruntime/core/providers/rocm/nn/conv_transpose.h
@@ -12,10 +12,12 @@
 namespace onnxruntime {
 namespace rocm {
 
-template <typename T>
+template <typename T, bool NHWC>
 class ConvTranspose : public RocmKernel {
  public:
-  ConvTranspose(const OpKernelInfo& info) : RocmKernel(info), conv_transpose_attrs_(info){};
+  ConvTranspose(const OpKernelInfo& info) : RocmKernel(info), conv_transpose_attrs_(info) {
+    static_assert(!NHWC, "AMD builds don't support usage of NHWC ops");
+  };
   Status ComputeInternal(OpKernelContext* context) const override;
   Status DoConvTranspose(OpKernelContext* context, bool dynamic_padding) const;
 
diff --git a/onnxruntime/core/providers/rocm/reduction/reduction_ops.cc b/onnxruntime/core/providers/rocm/reduction/reduction_ops.cc
index 4f726017d8b14..820745b22f614 100644
--- a/onnxruntime/core/providers/rocm/reduction/reduction_ops.cc
+++ b/onnxruntime/core/providers/rocm/reduction/reduction_ops.cc
@@ -8,6 +8,9 @@
 #include "core/providers/rocm/math/binary_elementwise_ops_impl.h"
 #include "core/providers/rocm/math/binary_elementwise_ops.h"
 #include "core/providers/rocm/math/unary_elementwise_ops_impl.h"
+#ifdef ENABLE_TRAINING
+#include "contrib_ops/cpu/aten_ops/aten_op.h"
+#endif
 
 using namespace onnxruntime::common;
 namespace onnxruntime {
@@ -100,8 +103,8 @@ namespace rocm {
       (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
       name<T>);
 
-// ROCM ArgMax/ArgMin doesn't have OpSet12 implementation (with select_last_index attr), keep it in OpSet11 for now.
-#define REGISTER_KERNEL_TYPED_11(name, T)                                                  \
+// ROCM ArgMax/ArgMin doesn't have OpSet12+ implementation (with select_last_index attr) yet
+#define REGISTER_KERNEL_VERSIONED_TYPED_11(name, T)                                        \
   ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(                                                 \
       name,                                                                                \
       kOnnxDomain,                                                                         \
@@ -110,10 +113,10 @@ namespace rocm {
       kRocmExecutionProvider,                                                              \
       (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
       name<T>);                                                                            \
-  ONNX_OPERATOR_TYPED_KERNEL_EX(                                                           \
+  ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(                                                 \
       name,                                                                                \
       kOnnxDomain,                                                                         \
-      11,                                                                                  \
+      11, 11,                                                                              \
       T,                                                                                   \
       kRocmExecutionProvider,                                                              \
       (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
@@ -166,7 +169,6 @@ Status ReduceKernel<allow_multi_axes>::ReduceKernelShared(
   const auto rank = input_shape.NumDimensions();
 
   auto hip_stream = stream ? static_cast<hipStream_t>(stream->GetHandle()) : nullptr;
-
   // Block of fast matrix reduction.
   if (fast_reduction_) {
     int m{}, n{};
@@ -210,10 +212,8 @@ Status ReduceKernel<allow_multi_axes>::ReduceKernelShared(
     ORT_RETURN_IF_ERROR(reduce_desc.Set(miopen_reduce_op, MiopenTensor::GetDataType<float>(), ReduceTensorIndices));
   else
     ORT_RETURN_IF_ERROR(reduce_desc.Set(miopen_reduce_op, miopen_type_X, ReduceTensorIndices));
-
   const auto one = ReduceConsts<HipT>::One;
   const auto zero = ReduceConsts<HipT>::Zero;
-
   MiopenTensor input_tensor;
   MiopenTensor output_tensor;
   ORT_RETURN_IF_ERROR(input_tensor.Set(input_dims_miopen, miopen_type_X));
@@ -444,17 +444,18 @@ template <typename T, miopenReduceTensorIndices_t ReduceTensorIndices>
 Status ReduceComputeCore(const AllocatorPtr& gpu_allocator, const Tensor& input, PrepareReduceMetadata& prepare_reduce_metadata,
                          /*out*/ Tensor& output, miopenReduceTensorOp_t miopen_reduce_op,
                          gsl::span<const int64_t> axes,
-                         bool calculate_log, bool calculate_sqt, bool log_sum_exp, bool fast_reduction, Stream* ort_stream,
+                         bool calculate_log, bool calculate_sqt, bool log_sum_exp, bool fast_reduction,
+                         Stream* ort_stream,
                          const TensorShape* input_shape_override) {
   typedef typename ToHipType<T>::MappedType HipT;
   const TensorShape& input_shape = input_shape_override ? *input_shape_override : input.Shape();
+  hipStream_t stream = ort_stream ? static_cast<hipStream_t>(ort_stream->GetHandle()) : nullptr;
 
   int64_t input_count = prepare_reduce_metadata.input_count;
   int64_t output_count = prepare_reduce_metadata.output_count;
   auto& output_dims = prepare_reduce_metadata.output_dims;
   auto& input_dims_miopen = prepare_reduce_metadata.input_dims_miopen;
   auto& output_dims_miopen = prepare_reduce_metadata.output_dims_miopen;
-  hipStream_t stream = ort_stream ? static_cast<hipStream_t>(ort_stream->GetHandle()) : nullptr;
   // special case when there is a dim value of 0 in the shape.
   if (input_count == 0) {
     assert(output.Shape().Size() == 0);
@@ -540,7 +541,6 @@ Status ReduceComputeCore(const AllocatorPtr& gpu_allocator, const Tensor& input,
 
   const auto one = ReduceConsts<HipT>::One;
   const auto zero = ReduceConsts<HipT>::Zero;
-
   MiopenTensor input_tensor;
   MiopenTensor output_tensor;
   ORT_RETURN_IF_ERROR(input_tensor.Set(input_dims_miopen, miopen_type_X));
@@ -588,11 +588,12 @@ Status ReduceComputeCore(const AllocatorPtr& gpu_allocator, const Tensor& input,
         MIOPEN_RETURN_IF_ERROR(miopenGetReductionIndicesSize(RocmKernel::GetMiopenHandle(rocm_stream), reduce_max_desc,
                                                              input_tensor, output_tensor, &indices_bytes_max));
         auto indices_rocm_max = indices_bytes == 0 ? nullptr : IAllocator::MakeUniquePtr<uint32_t>(gpu_allocator, indices_bytes, false, ort_stream, WaitRocmNotificationOnDevice);
+        auto* p_output = reinterpret_cast<HipT*>(output.template MutableData<T>());
         MIOPEN_RETURN_IF_ERROR(miopenReduceTensor(
             RocmKernel::GetMiopenHandle(rocm_stream), reduce_max_desc, indices_rocm_max.get(), indices_bytes_max,
             workspace_rocm.get(), workspace_bytes,
             &one, input_tensor, reinterpret_cast<const HipT*>(input.Data<T>()),
-            &zero, output_tensor, reinterpret_cast<HipT*>(output.MutableData<T>())));
+            &zero, output_tensor, p_output));
       }
 
       // Exp(X-ReduceMax)
@@ -652,11 +653,12 @@ Status ReduceComputeCore(const AllocatorPtr& gpu_allocator, const Tensor& input,
       if (input_count == output_count) {
         HIP_RETURN_IF_ERROR(hipMemcpyAsync(reinterpret_cast<HipT*>(output.MutableData<T>()), input_data, input_count * sizeof(T), hipMemcpyDeviceToDevice, stream));
       } else {
+        auto* p_output = reinterpret_cast<HipT*>(output.template MutableData<T>());
         MIOPEN_RETURN_IF_ERROR(miopenReduceTensor(
             RocmKernel::GetMiopenHandle(rocm_stream), reduce_desc, indices_rocm.get(), indices_bytes,
             workspace_rocm.get(), workspace_bytes,
             &one, input_tensor, input_data,
-            &zero, output_tensor, reinterpret_cast<HipT*>(output.MutableData<T>())));
+            &zero, output_tensor, p_output));
       }
     } else {
       // miopenReduceTensor for ReduceSum has issue if input and output has same size, we just need to copy the data for this case
@@ -675,11 +677,12 @@ Status ReduceComputeCore(const AllocatorPtr& gpu_allocator, const Tensor& input,
 
           Impl_Cast<float, HipT>(stream, temp_output.get(), reinterpret_cast<HipT*>(output.MutableData<T>()), output_count);
         } else {
+          auto* p_output = reinterpret_cast<HipT*>(output.template MutableData<T>());
           MIOPEN_RETURN_IF_ERROR(miopenReduceTensor(
               RocmKernel::GetMiopenHandle(rocm_stream), reduce_desc, indices_rocm.get(), indices_bytes,
               workspace_rocm.get(), workspace_bytes,
               &one, input_tensor, reinterpret_cast<const HipT*>(input.Data<T>()),
-              &zero, output_tensor, reinterpret_cast<HipT*>(output.MutableData<T>())));
+              &zero, output_tensor, p_output));
         }
       }
     }
@@ -743,18 +746,29 @@ Status ReduceKernel<allow_multi_axes>::ComputeImpl(OpKernelContext* ctx, miopenR
   // empty axes and no-op
   if (axes.empty() && noop_with_empty_axes_) {
     auto* Y = ctx->Output(0, X->Shape());
-    HIP_RETURN_IF_ERROR(hipMemcpyAsync(Y->MutableData<T>(), X->Data<T>(), X->SizeInBytes(), hipMemcpyDeviceToDevice, Stream(ctx)));
+    HIP_RETURN_IF_ERROR(hipMemcpyAsync(Y->MutableData<T>(), X->Data<T>(), X->SizeInBytes(),
+                                       hipMemcpyDeviceToDevice, Stream(ctx)));
     return Status::OK();
   }
 
+#ifdef ENABLE_TRAINING
+  // Use ATen for ReduceSum if possible.
+  const TensorShape& input_shape = X->Shape();
+  if (contrib::IsATenOperatorExecutorInitialized() && miopen_reduce_op == MIOPEN_REDUCE_TENSOR_ADD && !calculate_log_ &&
+      !calculate_sqt_ && !log_sum_exp_ && input_shape.Size() > 0) {
+    if (axes.empty()) {
+      axes.resize(input_shape.NumDimensions());
+      std::iota(axes.begin(), axes.end(), 0);
+    }
+    ORT_RETURN_IF_ERROR(contrib::ExecuteReduceSumATen(ctx, axes, keepdims_));
+    return Status::OK();
+  }
+#endif
+
   PrepareReduceMetadata prepare_reduce_metadata;
-  ORT_RETURN_IF_ERROR(PrepareForReduce(X,
-                                       keepdims_,
-                                       axes,
-                                       prepare_reduce_metadata));
+  ORT_RETURN_IF_ERROR(PrepareForReduce(X, keepdims_, axes, prepare_reduce_metadata));
   Tensor* Y = ctx->Output(0, prepare_reduce_metadata.squeezed_output_dims);
   const bool fast_reduction = fast_reduction_ && !ctx->GetUseDeterministicCompute();
-
   return ReduceComputeCore<T, ReduceTensorIndices>(Info().GetAllocator(OrtMemType::OrtMemTypeDefault), *X, prepare_reduce_metadata, *Y, miopen_reduce_op, axes,
                                                    calculate_log_, calculate_sqt_, log_sum_exp_, fast_reduction, ctx->GetComputeStream());
 }
@@ -837,7 +851,6 @@ Status ReduceKernel<allow_multi_axes>::ComputeImpl(OpKernelContext* ctx, miopenR
     MIOPEN_RETURN_IF_ERROR(miopenReduceTensor(GetMiopenHandle(ctx), reduce_desc, indices_rocm.get(), indices_bytes,         \
                                               workspace_rocm.get(), workspace_bytes, &one, input_tensor, temp_X.get(),      \
                                               &zero, output_tensor, temp_Y.get()));                                         \
-                                                                                                                            \
     Impl_Cast<float, HipT>(Stream(ctx), temp_Y.get(), reinterpret_cast<HipT*>(Y->MutableData<T>()), output_count);          \
                                                                                                                             \
     return Status::OK();                                                                                                    \
@@ -909,13 +922,13 @@ template std::unique_ptr<Tensor> ReduceCompute<MLFloat16, MIOPEN_REDUCE_TENSOR_N
   REGISTER_KERNEL_TYPED(name, BFloat16)
 // REGISTER_KERNEL_TYPED(name, double)
 
-#define REGISTER_KERNEL_HFD_11(name)        \
-  REGISTER_KERNEL_TYPED_11(name, MLFloat16) \
-  REGISTER_KERNEL_TYPED_11(name, float)
-// REGISTER_KERNEL_TYPED_11(name, double)
+#define REGISTER_KERNEL_HFD_VERSIONED_11(name)        \
+  REGISTER_KERNEL_VERSIONED_TYPED_11(name, MLFloat16) \
+  REGISTER_KERNEL_VERSIONED_TYPED_11(name, float)
+// REGISTER_KERNEL_VERSIONED_TYPED_11(name, double)
 
-REGISTER_KERNEL_HFD_11(ArgMax)
-REGISTER_KERNEL_HFD_11(ArgMin)
+REGISTER_KERNEL_HFD_VERSIONED_11(ArgMax)
+REGISTER_KERNEL_HFD_VERSIONED_11(ArgMin)
 REGISTER_KERNEL_HFD(ReduceL1)
 REGISTER_KERNEL_HFD(ReduceL2)
 
diff --git a/onnxruntime/core/providers/rocm/rocm_allocator.cc b/onnxruntime/core/providers/rocm/rocm_allocator.cc
index 84337b66b9e3d..8645b791d4b0f 100644
--- a/onnxruntime/core/providers/rocm/rocm_allocator.cc
+++ b/onnxruntime/core/providers/rocm/rocm_allocator.cc
@@ -51,9 +51,8 @@ void* ROCMAllocator::Alloc(size_t size) {
 
 void ROCMAllocator::Free(void* p) {
   SetDevice(false);
-  CheckDevice(false);  // ignore ROCM failure when free
-  // do not throw error since it's OK for hipFree to fail during shutdown; void to silence nodiscard
-  (void)hipFree(p);
+  CheckDevice(false);                   // ignore ROCM failure when free
+  ORT_IGNORE_RETURN_VALUE(hipFree(p));  // do not throw error since it's OK for hipFree to fail during shutdown
 }
 
 void* ROCMExternalAllocator::Alloc(size_t size) {
diff --git a/onnxruntime/core/providers/rocm/rocm_allocator.h b/onnxruntime/core/providers/rocm/rocm_allocator.h
index bfd6400b37881..04de09ab9c00b 100644
--- a/onnxruntime/core/providers/rocm/rocm_allocator.h
+++ b/onnxruntime/core/providers/rocm/rocm_allocator.h
@@ -3,7 +3,6 @@
 
 #pragma once
 
-#include <unordered_set>
 #include "core/common/inlined_containers.h"
 #include "core/framework/allocator.h"
 #include "core/platform/ort_mutex.h"
@@ -56,7 +55,7 @@ class ROCMPinnedAllocator : public IAllocator {
   ROCMPinnedAllocator(const char* name)
       : IAllocator(
             OrtMemoryInfo(name, OrtAllocatorType::OrtDeviceAllocator,
-                          OrtDevice(OrtDevice::CPU, OrtDevice::MemType::HIP_PINNED, 0),
+                          OrtDevice(OrtDevice::CPU, OrtDevice::MemType::HIP_PINNED, 0 /*CPU device always with id 0*/),
                           0, OrtMemTypeCPUOutput)) {}
 
   void* Alloc(size_t size) override;
diff --git a/onnxruntime/core/providers/rocm/rocm_call.cc b/onnxruntime/core/providers/rocm/rocm_call.cc
index 730f55608c725..484e59f4de7d8 100644
--- a/onnxruntime/core/providers/rocm/rocm_call.cc
+++ b/onnxruntime/core/providers/rocm/rocm_call.cc
@@ -39,11 +39,11 @@ const char* RocmErrString<rocblas_status>(rocblas_status e) {
     CASE_ENUM_TO_STR(rocblas_status_invalid_handle);
     CASE_ENUM_TO_STR(rocblas_status_not_implemented);
     CASE_ENUM_TO_STR(rocblas_status_invalid_pointer);
+    CASE_ENUM_TO_STR(rocblas_status_size_query_mismatch);
     CASE_ENUM_TO_STR(rocblas_status_invalid_size);
     CASE_ENUM_TO_STR(rocblas_status_memory_error);
     CASE_ENUM_TO_STR(rocblas_status_internal_error);
     CASE_ENUM_TO_STR(rocblas_status_perf_degraded);
-    CASE_ENUM_TO_STR(rocblas_status_size_query_mismatch);
     CASE_ENUM_TO_STR(rocblas_status_size_increased);
     CASE_ENUM_TO_STR(rocblas_status_size_unchanged);
     CASE_ENUM_TO_STR(rocblas_status_invalid_value);
diff --git a/onnxruntime/core/providers/rocm/rocm_execution_provider.cc b/onnxruntime/core/providers/rocm/rocm_execution_provider.cc
index c9975d0bc76c0..d7c5098d9dbe4 100644
--- a/onnxruntime/core/providers/rocm/rocm_execution_provider.cc
+++ b/onnxruntime/core/providers/rocm/rocm_execution_provider.cc
@@ -1,6 +1,7 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#include "core/common/inlined_containers.h"
 #include "core/providers/shared_library/provider_api.h"
 #include "core/platform/env_var_utils.h"
 #include "core/providers/rocm/rocm_execution_provider.h"
@@ -9,7 +10,6 @@
 #include "core/providers/rocm/rocm_fwd.h"
 #include "core/providers/rocm/gpu_data_transfer.h"
 #include "core/providers/rocm/rocm_profiler.h"
-#include "core/providers/rocm/rocm_stream_handle.h"
 
 #ifndef DISABLE_CONTRIB_OPS
 #include "contrib_ops/rocm/rocm_contrib_kernels.h"
@@ -23,6 +23,8 @@
 #include "core/providers/rocm/triton_kernel.h"
 #endif
 
+#include "core/providers/rocm/rocm_stream_handle.h"
+
 using namespace onnxruntime::common;
 
 namespace onnxruntime {
@@ -38,42 +40,64 @@ class Memcpy final : public OpKernel {
       ORT_ENFORCE(X != nullptr, "Memcpy: Input tensor is nullptr.");
       Tensor* Y = ctx->Output(0, X->Shape());
       ORT_ENFORCE(Y != nullptr, "Memcpy: Failed to allocate output tensor.");
-      const IDataTransfer* gpu_data_transfer = Info().GetDataTransferManager().GetDataTransfer(X->Location().device, Y->Location().device);
-      return gpu_data_transfer->CopyTensorAsync(*X, *Y, *ctx->GetComputeStream());
-    } else if (X_type->IsSparseTensorType()) {
-      const auto* X = ctx->Input<SparseTensor>(0);
-      ORT_ENFORCE(X != nullptr, "Memcpy: Input tensor is nullptr.");
-      SparseTensor* Y = ctx->OutputSparse(0, X->DenseShape());
-      ORT_ENFORCE(Y != nullptr, "Memcpy: Failed to allocate output sparse tensor.");
-      return X->Copy(Info().GetDataTransferManager(), *Y);
-    } else if (X_type->IsTensorSequenceType()) {
-      const TensorSeq* X = ctx->Input<TensorSeq>(0);
-      ORT_ENFORCE(X != nullptr, "Memcpy: Input tensor sequence is nullptr.");
-      TensorSeq* Y = ctx->Output<TensorSeq>(0);
-      ORT_ENFORCE(Y != nullptr, "Memcpy: Failed to allocate output tensor sequence.");
-      auto X_dtype = X->DataType();
-      Y->SetType(X_dtype);
-      AllocatorPtr alloc;
-      auto status = ctx->GetTempSpaceAllocator(&alloc);
-      if (!status.IsOK()) {
-        return Status(common::ONNXRUNTIME, common::FAIL,
-                      "Memcpy rocm: unable to get an allocator.");
-      }
-      auto X_size = X->Size();
-      Y->Reserve(X_size);
-      for (size_t i = 0; i < X_size; ++i) {
-        const Tensor& source_tensor = X->Get(i);
-        std::unique_ptr<Tensor> target_tensor = Tensor::Create(source_tensor.DataType(), source_tensor.Shape(), alloc);
-        const IDataTransfer* gpu_data_transfer = Info().GetDataTransferManager().GetDataTransfer(source_tensor.Location().device, target_tensor->Location().device);
-        Status retval = gpu_data_transfer->CopyTensorAsync(source_tensor, *target_tensor, *ctx->GetComputeStream());
-        if (!retval.IsOK()) {
-          return retval;
+      // do we support async copy?
+      // The rocmMemCpyAsync will handle the pinned memory and non-pinned memory,
+      // so we don't need the check here.
+      auto* gpu_data_transfer = Info().GetDataTransferManager().GetDataTransfer(X->Location().device, Y->Location().device);
+      ORT_RETURN_IF_ERROR(gpu_data_transfer->CopyTensorAsync(*X, *Y, *ctx->GetComputeStream()));
+      return Status::OK();
+    } else {
+      if (X_type->IsSparseTensorType()) {
+        // TODO: support aysnc copy for sparse tensor
+        // sync the stream first, since it is a sync memory copy
+        HIP_CALL_THROW(hipStreamSynchronize(static_cast<hipStream_t>(ctx->GetComputeStream()->GetHandle())));
+        const auto* X = ctx->Input<SparseTensor>(0);
+        ORT_ENFORCE(X != nullptr, "Memcpy: Input tensor is nullptr.");
+        SparseTensor* Y = ctx->OutputSparse(0, X->DenseShape());
+        ORT_ENFORCE(Y != nullptr, "Memcpy: Failed to allocate output sparse tensor.");
+        return X->Copy(Info().GetDataTransferManager(), *Y);
+      } else if (X_type->IsTensorSequenceType()) {
+        const TensorSeq* X = ctx->Input<TensorSeq>(0);
+        ORT_ENFORCE(X != nullptr, "Memcpy: Input tensor sequence is nullptr.");
+        TensorSeq* Y = ctx->Output<TensorSeq>(0);
+        ORT_ENFORCE(Y != nullptr, "Memcpy: Failed to allocate output tensor sequence.");
+        auto X_dtype = X->DataType();
+        Y->SetType(X_dtype);
+        AllocatorPtr alloc;
+
+        // If we are copying contents to ROCM, the allocator to use
+        // to allocate the buffers of the new tensors in the sequence
+        // can be temp space allocator associated with the ROCM EP
+        if (Node().OpType() == "MemcpyFromHost") {
+          auto status = ctx->GetTempSpaceAllocator(&alloc);
+          if (!status.IsOK()) {
+            return Status(common::ONNXRUNTIME, common::FAIL,
+                          "Memcpy rocm: unable to get an allocator.");
+          }
+        } else {
+          // If we are copying contents to CPU (op type is "MemcpyToHost"),
+          // the allocator to use to allocate the buffers of the new tensors
+          // in the sequence will be the allocator from the CPU EP
+          auto status = ctx->GetTempSpaceCPUAllocator(&alloc);
+          if (!status.IsOK()) {
+            return Status(common::ONNXRUNTIME, common::FAIL,
+                          "Memcpy rocm: unable to get the CPU allocator.");
+          }
+        }
+        auto X_size = X->Size();
+        Y->Reserve(X_size);
+        for (size_t i = 0; i < X_size; ++i) {
+          const Tensor& source_tensor = X->Get(i);
+          std::unique_ptr<Tensor> target_tensor = Tensor::Create(source_tensor.DataType(), source_tensor.Shape(), alloc);
+          auto* gpu_data_transfer = Info().GetDataTransferManager().GetDataTransfer(source_tensor.Location().device,
+                                                                                    target_tensor->Location().device);
+          ORT_RETURN_IF_ERROR(gpu_data_transfer->CopyTensorAsync(source_tensor, *target_tensor, *ctx->GetComputeStream()));
+          Y->Add(std::move(*target_tensor));
         }
-        Y->Add(std::move(*target_tensor));
+        return Status::OK();
       }
-      return Status::OK();
+      return Status(common::ONNXRUNTIME, common::FAIL, "Memcpy: Unsupported input type.");
     }
-    return Status(common::ONNXRUNTIME, common::FAIL, "Memcpy: Unsupported input type.");
   }
 };
 
@@ -100,18 +124,23 @@ ONNX_OPERATOR_KERNEL_EX(
 
 }  // namespace rocm
 
-AllocatorPtr ROCMExecutionProvider::CreateRocmAllocator(OrtDevice::DeviceId device_id, size_t gpu_mem_limit, ArenaExtendStrategy arena_extend_strategy,
-                                                        ROCMExecutionProviderExternalAllocatorInfo external_allocator_info, OrtArenaCfg* default_memory_arena_cfg) {
+AllocatorPtr ROCMExecutionProvider::CreateRocmAllocator(OrtDevice::DeviceId device_id,
+                                                        size_t gpu_mem_limit,
+                                                        ArenaExtendStrategy arena_extend_strategy,
+                                                        ROCMExecutionProviderExternalAllocatorInfo external_allocator_info,
+                                                        const OrtArenaCfg* default_memory_arena_cfg) {
   if (external_allocator_info.UseExternalAllocator()) {
     AllocatorCreationInfo default_memory_info(
         [external_allocator_info](OrtDevice::DeviceId id) {
-          return std::make_unique<ROCMExternalAllocator>(id, HIP, external_allocator_info.alloc, external_allocator_info.free, external_allocator_info.empty_cache);
+          return std::make_unique<ROCMExternalAllocator>(id, HIP,
+                                                         external_allocator_info.alloc,
+                                                         external_allocator_info.free,
+                                                         external_allocator_info.empty_cache);
         },
         device_id,
         false);
 
     return CreateAllocator(default_memory_info);
-
   } else {
     AllocatorCreationInfo default_memory_info(
         [](OrtDevice::DeviceId id) {
@@ -120,12 +149,7 @@ AllocatorPtr ROCMExecutionProvider::CreateRocmAllocator(OrtDevice::DeviceId devi
         device_id,
         true,
         {default_memory_arena_cfg ? *default_memory_arena_cfg
-                                  : OrtArenaCfg(gpu_mem_limit,
-                                                static_cast<int>(arena_extend_strategy),
-                                                -1,
-                                                -1,
-                                                -1,
-                                                -1)},
+                                  : OrtArenaCfg(gpu_mem_limit, static_cast<int>(arena_extend_strategy), -1, -1, -1, -1L)},
         // make it stream aware
         true,
         // enable cross stream sharing?
@@ -149,20 +173,8 @@ ROCMExecutionProvider::PerThreadContext::PerThreadContext(OrtDevice::DeviceId de
 }
 
 ROCMExecutionProvider::PerThreadContext::~PerThreadContext() {
-  // dtor shouldn't throw. if something went wrong earlier (e.g. out of ROCM memory) the handles
-  // here may be bad, and the destroy calls can throw.
-  // https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines#Rc-dtor-noexcept
-  try {
-    ROCBLAS_CALL_THROW(rocblas_destroy_handle(rocblas_handle_));
-  } catch (const std::exception& ex) {
-    LOGS_DEFAULT(ERROR) << "rocblas_destroy_handle threw:" << ex.what();
-  }
-
-  try {
-    MIOPEN_CALL_THROW(miopenDestroy(miopen_handle_));
-  } catch (const std::exception& ex) {
-    LOGS_DEFAULT(ERROR) << "miopenDestroy threw:" << ex.what();
-  }
+  ORT_IGNORE_RETURN_VALUE(ROCBLAS_CALL(rocblas_destroy_handle(rocblas_handle_)));
+  ORT_IGNORE_RETURN_VALUE(MIOPEN_CALL(miopenDestroy(miopen_handle_)));
 }
 
 void OverrideTunableOpInfoByEnv(ROCMExecutionProviderInfo& info) {
@@ -235,7 +247,7 @@ ROCMExecutionProvider::~ROCMExecutionProvider() {
   }
 
   if (!external_stream_ && stream_) {
-    HIP_CALL_THROW(hipStreamDestroy(stream_));
+    ORT_IGNORE_RETURN_VALUE(HIP_CALL(hipStreamDestroy(stream_)));
   }
 }
 
@@ -315,7 +327,7 @@ Status ROCMExecutionProvider::OnRunStart() {
 
 Status ROCMExecutionProvider::OnRunEnd(bool sync_stream) {
   if (sync_stream) {
-    HIP_RETURN_IF_ERROR(hipStreamSynchronize(stream_));
+    HIP_RETURN_IF_ERROR(hipStreamSynchronize(static_cast<hipStream_t>(stream_)));
   }
 
   // In extreme cases (e.g., 1-op graph and that op fallbacks to CPU),
@@ -716,12 +728,12 @@ class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDom
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 12, Mod);
 
 // opset 11
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, float, ArgMax);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, double, ArgMax);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, MLFloat16, ArgMax);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, float, ArgMin);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, double, ArgMin);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, MLFloat16, ArgMin);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, float, ArgMax);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, double, ArgMax);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, MLFloat16, ArgMax);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, float, ArgMin);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, double, ArgMin);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, MLFloat16, ArgMin);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, Compress);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, Concat);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, Flatten);
@@ -774,7 +786,7 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kO
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, ReduceSumSquare);
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, ReduceSumSquare);
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceSumSquare);
-class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 18, Scan);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 15, Scan);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, ScatterElements);
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, int32_t, Slice);
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, int64_t, Slice);
@@ -827,12 +839,10 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kO
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, float, Round);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, double, Round);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, MLFloat16, Round);
-
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 12, int8_t, QuantizeLinear);
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 12, uint8_t, QuantizeLinear);
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 12, int8_t, DequantizeLinear);
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 12, uint8_t, DequantizeLinear);
-
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 13, CumSum);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, int64_t_int64_t_int64_t, OneHot);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, int64_t_float_int64_t, OneHot);
@@ -1087,6 +1097,17 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, bool, Pad);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, SpaceToDepth);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, DepthToSpace);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int8_t, Sign);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int16_t, Sign);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, Sign);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int64_t, Sign);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint8_t, Sign);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint16_t, Sign);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint32_t, Sign);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint64_t, Sign);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Sign);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Sign);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Sign);
 
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, BFloat16, Add);
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, BFloat16, Sub);
@@ -1105,17 +1126,6 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kO
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, uint8_t, QuantizeLinear);
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, int8_t, DequantizeLinear);
 class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, uint8_t, DequantizeLinear);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int8_t, Sign);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int16_t, Sign);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, Sign);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int64_t, Sign);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint8_t, Sign);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint16_t, Sign);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint32_t, Sign);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint64_t, Sign);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Sign);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Sign);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Sign);
 
 // OpSet 14
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, CumSum);
@@ -1186,12 +1196,13 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 15, 18, Shape);
 
 // Opset 16
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, float, PRelu);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, double, PRelu);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, MLFloat16, PRelu);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, float, LeakyRelu);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, double, LeakyRelu);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, MLFloat16, LeakyRelu);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, float, PRelu);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, double, PRelu);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, MLFloat16, PRelu);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, 18, Scan);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, MLFloat16, Where);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, float, Where);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, double_t, Where);
@@ -1258,979 +1269,932 @@ KernelCreateInfo BuildKernelCreateInfo<void>() {
   return {};
 }
 
+// clang-format off
 static Status RegisterRocmKernels(KernelRegistry& kernel_registry) {
   static const BuildKernelCreateInfoFn function_table[] = {
-      BuildKernelCreateInfo<void>,  // default entry to avoid the list become empty after ops-reducing
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, MemcpyFromHost)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, MemcpyToHost)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 4, 10, Concat)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, Unsqueeze)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 8, Flatten)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, Squeeze)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 12, Identity)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 9, Dropout)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, float, Cos)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, double, Cos)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, MLFloat16, Cos)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, float, Sin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, double, Sin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, MLFloat16, Sin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, Gather)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, float, Gemm)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, double, Gemm)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, MLFloat16, Gemm)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 10, float, Gemm)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 10, double, Gemm)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 10, MLFloat16, Gemm)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 8, float, MatMul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 8, double, MatMul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 8, MLFloat16, MatMul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, float, MatMul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, double, MatMul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, MLFloat16, MatMul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, int8_t, MatMulInteger)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 10, float, Clip)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, float, Elu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, double, Elu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, MLFloat16, Elu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, float, HardSigmoid)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, double, HardSigmoid)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, MLFloat16, HardSigmoid)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 15, float, LeakyRelu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 15, double, LeakyRelu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 15, MLFloat16, LeakyRelu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, float, Relu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, double, Relu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, MLFloat16, Relu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, float, Selu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, double, Selu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, MLFloat16, Selu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, float, Sigmoid)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, double, Sigmoid)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, MLFloat16, Sigmoid)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, float, Softsign)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, double, Softsign)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, MLFloat16, Softsign)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, float, Tanh)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, double, Tanh)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, MLFloat16, Tanh)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, float, Softplus)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, double, Softplus)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, MLFloat16, Softplus)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, Softmax)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, Softmax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, Softmax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, LogSoftmax)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, LogSoftmax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, LogSoftmax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 11, float, Pow)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 11, double, Pow)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 11, MLFloat16, Pow)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, float, PRelu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, double, PRelu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, MLFloat16, PRelu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 15, float, PRelu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 15, double, PRelu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 15, MLFloat16, PRelu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, bool, And)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, bool, Or)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, bool, Xor)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 7, Sum)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 8, 12, Sum)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 11, Max)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, Max)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 11, Min)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, Min)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, float, Greater)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, double, Greater)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, MLFloat16, Greater)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 10, bool, Equal)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 10, int32_t, Equal)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 10, int64_t, Equal)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 8, 12, Expand)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, int32_t, Greater)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, int64_t, Greater)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, uint32_t, Greater)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, uint64_t, Greater)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, float, Greater)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, double, Greater)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, MLFloat16, Greater)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, int32_t, GreaterOrEqual)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, int64_t, GreaterOrEqual)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, uint32_t, GreaterOrEqual)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, uint64_t, GreaterOrEqual)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, float, GreaterOrEqual)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, double, GreaterOrEqual)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, MLFloat16, GreaterOrEqual)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, int32_t, LessOrEqual)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, int64_t, LessOrEqual)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, uint32_t, LessOrEqual)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, uint64_t, LessOrEqual)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, float, LessOrEqual)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, double, LessOrEqual)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, MLFloat16, LessOrEqual)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, int32_t, Add)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, int64_t, Add)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, uint32_t, Add)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, uint64_t, Add)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, float, Add)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, double, Add)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, MLFloat16, Add)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, int32_t, Sub)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, int64_t, Sub)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, uint32_t, Sub)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, uint64_t, Sub)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, float, Sub)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, double, Sub)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, MLFloat16, Sub)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, int32_t, Mul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, int64_t, Mul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, uint32_t, Mul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, uint64_t, Mul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, float, Mul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, double, Mul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, MLFloat16, Mul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, int32_t, Div)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, int64_t, Div)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, uint32_t, Div)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, uint64_t, Div)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, float, Div)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, double, Div)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, MLFloat16, Div)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, int8_t, Abs)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, int16_t, Abs)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, int32_t, Abs)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, int64_t, Abs)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, uint8_t, Abs)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, uint16_t, Abs)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, uint32_t, Abs)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, uint64_t, Abs)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, float, Abs)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, double, Abs)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, MLFloat16, Abs)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, int8_t, Neg)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, int16_t, Neg)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, int32_t, Neg)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, int64_t, Neg)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, float, Neg)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, double, Neg)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, MLFloat16, Neg)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, float, Floor)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, double, Floor)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, MLFloat16, Floor)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, float, Ceil)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, double, Ceil)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, MLFloat16, Ceil)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, float, Reciprocal)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, double, Reciprocal)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, MLFloat16, Reciprocal)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, float, Sqrt)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, double, Sqrt)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, MLFloat16, Sqrt)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, float, Log)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, double, Log)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, MLFloat16, Log)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, float, Exp)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, double, Exp)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, MLFloat16, Exp)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, float, Erf)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, double, Erf)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, MLFloat16, Erf)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, bool, Not)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                            7, 8, float, BatchNormalization)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, double, BatchNormalization)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                            7, 8, MLFloat16, BatchNormalization)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                            9, 13, float, BatchNormalization)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 13, double, BatchNormalization)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                            9, 13, MLFloat16, BatchNormalization)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                            1, 12, float, LRN)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                            1, 12, double, LRN)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                            1, 12, MLFloat16, LRN)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, Conv)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, Conv)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, Conv)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, ConvTranspose)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, ConvTranspose)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ConvTranspose)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                            7, 9, float, AveragePool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                            7, 9, double, AveragePool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                            7, 9, MLFloat16, AveragePool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                  1, float, GlobalAveragePool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                  1, double, GlobalAveragePool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                  1, MLFloat16, GlobalAveragePool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                            1, 7, float, MaxPool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                            1, 7, double, MaxPool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                            1, 7, MLFloat16, MaxPool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                            8, 9, float, MaxPool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                            8, 9, double, MaxPool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                            8, 9, MLFloat16, MaxPool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                  1, float, GlobalMaxPool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                  1, double, GlobalMaxPool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                  1, MLFloat16, GlobalMaxPool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, ArgMax)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, ArgMax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ArgMax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, ArgMin)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, ArgMin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ArgMin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, ReduceL1)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, ReduceL1)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceL1)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceL1)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, ReduceL2)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, ReduceL2)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceL2)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceL2)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, ReduceMax)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, ReduceMax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceMax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceMax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, int64_t, ReduceMax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, ReduceMean)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, ReduceMean)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceMean)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceMean)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, ReduceMin)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, ReduceMin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceMin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceMin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, ReduceProd)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, ReduceProd)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceProd)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceProd)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, ReduceSum)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, ReduceSum)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceSum)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceSum)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, int64_t, ReduceSum)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, ReduceLogSum)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, ReduceLogSum)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceLogSum)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, ReduceSumSquare)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, ReduceSumSquare)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceSumSquare)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, ReduceLogSumExp)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, ReduceLogSumExp)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceLogSumExp)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 8, float, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 8, double, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 8, MLFloat16, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 8, int8_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 8, int16_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 8, int32_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 8, int64_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 8, uint8_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 8, uint16_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 8, uint32_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 8, uint64_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 8, bool, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, float, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, double, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, MLFloat16, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, int8_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, int16_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, int32_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, int64_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, uint8_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, uint16_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, uint32_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, uint64_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, bool, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 2, 10, float, Pad)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 2, 10, double, Pad)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 2, 10, MLFloat16, Pad)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 4, Reshape)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 5, 12, Reshape)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 12, Shape)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 12, Size)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, Tile)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Tile)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 12, Transpose)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                  6, float, InstanceNormalization)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                  6, double, InstanceNormalization)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                  6, MLFloat16, InstanceNormalization)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 13, float, RNN)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 13, double, RNN)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 13, MLFloat16, RNN)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 13, float, GRU)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 13, double, GRU)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 13, MLFloat16, GRU)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 13, float, LSTM)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 13, double, LSTM)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 13, MLFloat16, LSTM)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 9, int64_t, Slice)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 10, Compress)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 10, Flatten)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, float, Upsample)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, double, Upsample)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, MLFloat16, Upsample)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, int32_t, Upsample)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, uint8_t, Upsample)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 9, float, Upsample)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 9, double, Upsample)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 9, MLFloat16, Upsample)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 9, int32_t, Upsample)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 9, uint8_t, Upsample)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 2, 10, Split)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, ConstantOfShape)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, int8_t, Shrink)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, int16_t, Shrink)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, int32_t, Shrink)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, int64_t, Shrink)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, uint8_t, Shrink)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, uint16_t, Shrink)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, uint32_t, Shrink)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, uint64_t, Shrink)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, float, Shrink)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, double, Shrink)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, MLFloat16, Shrink)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, float, Less)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, double, Less)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, MLFloat16, Less)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, int32_t, Less)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, int64_t, Less)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, uint32_t, Less)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, uint64_t, Less)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, float, Less)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, double, Less)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, MLFloat16, Less)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, EyeLike)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 10, Scatter)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 15, MLFloat16, Where)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 15, float, Where)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 15, double_t, Where)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 15, int32_t, Where)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 15, int64_t, Where)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 15, uint8_t, Where)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, bool, NonZero)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, uint8_t, NonZero)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, int32_t, NonZero)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, int64_t, NonZero)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, float, NonZero)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, MLFloat16, NonZero)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 9, TopK)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 8, 8, Scan)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 10, Scan)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, Loop)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, DepthToSpace)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 12, SpaceToDepth)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, RandomNormal)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, RandomNormalLike)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, RandomUniform)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, RandomUniformLike)>,
-
-      // opset 10
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                            10, 10, float, AveragePool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                            10, 10, double, AveragePool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                            10, 10, MLFloat16, AveragePool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 11, Dropout)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                            10, 10, float, MaxPool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                            10, 10, double, MaxPool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                            10, 10, MLFloat16, MaxPool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 10, NonMaxSuppression)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 10, float, Resize)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 10, double, Resize)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 10, MLFloat16, Resize)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 10, int32_t, Resize)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 10, uint8_t, Resize)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, ReverseSequence)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, float, RoiAlign)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, double, RoiAlign)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 10, int32_t, Slice)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 10, int64_t, Slice)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, float, ThresholdedRelu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, double, ThresholdedRelu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, MLFloat16, ThresholdedRelu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 10, TopK)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, If)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 12, int8_t, QuantizeLinear)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 12, uint8_t, QuantizeLinear)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 12, int8_t, DequantizeLinear)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 12, uint8_t, DequantizeLinear)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 12, Mod)>,
-
-      // opset 11
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, float, ArgMax)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, double, ArgMax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, MLFloat16, ArgMax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, float, ArgMin)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, double, ArgMin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, MLFloat16, ArgMin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, Compress)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, Concat)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, Flatten)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, Gather)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, GatherElements)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, int64_t, GatherND)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, Gemm)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, Gemm)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, Gemm)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, If)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, Loop)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, NonMaxSuppression)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, Range)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, ReduceL1)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, ReduceL1)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceL1)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, int32_t, ReduceL1)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, ReduceL2)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, ReduceL2)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceL2)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, int32_t, ReduceL2)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, ReduceLogSum)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, ReduceLogSum)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceLogSum)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, ReduceLogSumExp)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, ReduceLogSumExp)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceLogSumExp)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, float, ReduceMax)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, double, ReduceMax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, MLFloat16, ReduceMax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, int32_t, ReduceMax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, int64_t, ReduceMax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, ReduceMean)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, ReduceMean)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceMean)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, int32_t, ReduceMean)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, float, ReduceMin)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, double, ReduceMin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, MLFloat16, ReduceMin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, int32_t, ReduceMin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, ReduceProd)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, ReduceProd)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceProd)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, int32_t, ReduceProd)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, ReduceSum)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, ReduceSum)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceSum)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, int32_t, ReduceSum)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, int64_t, ReduceSum)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, ReduceSumSquare)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, ReduceSumSquare)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceSumSquare)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, Scan)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, ScatterElements)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, int32_t, Slice)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, int64_t, Slice)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, Softmax)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, Softmax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, Softmax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, LogSoftmax)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, LogSoftmax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, LogSoftmax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, Split)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, Squeeze)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, TopK)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, SequenceAt)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, SequenceConstruct)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, SequenceEmpty)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, SequenceLength)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, ConcatFromSequence)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, SequenceErase)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, SequenceInsert)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, Unsqueeze)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, float, Conv)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, double, Conv)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, MLFloat16, Conv)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, float, ConvTranspose)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, double, ConvTranspose)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, MLFloat16, ConvTranspose)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                  11, float, AveragePool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                  11, double, AveragePool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                  11, MLFloat16, AveragePool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                            11, 11, float, MaxPool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                            11, 11, double, MaxPool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                            11, 11, MLFloat16, MaxPool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, Resize)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, Resize)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, Resize)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, int32_t, Resize)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, uint8_t, Resize)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, Clip)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, Pad)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, Pad)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, Pad)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, bool, Equal)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, int32_t, Equal)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, int64_t, Equal)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, uint32_t, Equal)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, uint64_t, Equal)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, Equal)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, Equal)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, Equal)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, float, Round)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, double, Round)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, MLFloat16, Round)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 13, CumSum)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, int64_t_int64_t_int64_t, OneHot)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, int64_t_float_int64_t, OneHot)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, int32_t_float_int32_t, OneHot)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, int64_t_MLFloat16_int64_t, OneHot)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, int32_t_MLFloat16_int32_t, OneHot)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, ScatterND)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, DepthToSpace)>,
-
-      // OpSet 12
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, Clip)>,
-
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                  12, float, MaxPool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                  12, double, MaxPool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                  12, MLFloat16, MaxPool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                  12, int8_t, MaxPool)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                  12, uint8_t, MaxPool)>,
-
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, Pow)>,
-
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, float, ReduceMax)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, double, ReduceMax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, MLFloat16, ReduceMax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, int32_t, ReduceMax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, int64_t, ReduceMax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, int8_t, ReduceMax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, uint8_t, ReduceMax)>,
-
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, float, ReduceMin)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, double, ReduceMin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, MLFloat16, ReduceMin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, int32_t, ReduceMin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, int64_t, ReduceMin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, int8_t, ReduceMin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, uint8_t, ReduceMin)>,
-
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, int64_t, GatherND)>,
-
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, Dropout)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, Einsum)>,
-
-      // OpSet 13
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 14, Pow)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, int32_t, Add)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, int64_t, Add)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, uint32_t, Add)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, uint64_t, Add)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, float, Add)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, double, Add)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Clip)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, MLFloat16, Add)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, int32_t, Sub)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, int64_t, Sub)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, uint32_t, Sub)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, uint64_t, Sub)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, float, Sub)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, double, Sub)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, MLFloat16, Sub)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, int32_t, Mul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, int64_t, Mul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, uint32_t, Mul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, uint64_t, Mul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, float, Mul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, double, Mul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, MLFloat16, Mul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, int32_t, Div)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, int64_t, Div)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, uint32_t, Div)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, uint64_t, Div)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, float, Div)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, double, Div)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, MLFloat16, Div)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int8_t, Abs)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int16_t, Abs)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, Abs)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int64_t, Abs)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint8_t, Abs)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint16_t, Abs)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint32_t, Abs)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint64_t, Abs)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Abs)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Abs)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Abs)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int8_t, Neg)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int16_t, Neg)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, Neg)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int64_t, Neg)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Neg)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Neg)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Neg)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Floor)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Floor)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Floor)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Ceil)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Ceil)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Ceil)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Reciprocal)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Reciprocal)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Reciprocal)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Sqrt)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Sqrt)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Sqrt)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Log)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Log)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Log)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Exp)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Exp)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Exp)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Erf)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Erf)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Erf)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Expand)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Sum)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Max)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Min)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, bool, Equal)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, Equal)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int64_t, Equal)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint32_t, Equal)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint64_t, Equal)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Equal)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Equal)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Equal)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, Greater)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int64_t, Greater)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint32_t, Greater)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint64_t, Greater)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Greater)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Greater)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Greater)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, Less)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int64_t, Less)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint32_t, Less)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint64_t, Less)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Less)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Less)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Less)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, bool, NonZero)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint8_t, NonZero)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, NonZero)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int64_t, NonZero)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, NonZero)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, NonZero)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, float, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, double, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, MLFloat16, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, int8_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, int16_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, int32_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, int64_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, uint8_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, uint16_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, uint32_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, uint64_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, bool, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, Reshape)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 14, Shape)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Size)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Transpose)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, ScatterElements)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, Slice)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int64_t, Slice)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Softmax)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Softmax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Softmax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, LogSoftmax)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, LogSoftmax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, LogSoftmax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 17, Split)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Squeeze)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Unsqueeze)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Concat)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Gather)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, GatherElements)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, MatMul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, MatMul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, MatMul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, float, Relu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, double, Relu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, MLFloat16, Relu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Sigmoid)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Sigmoid)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Sigmoid)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Tanh)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Tanh)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Tanh)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Gemm)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Gemm)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Gemm)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, ReduceL1)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, ReduceL1)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceL1)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, ReduceL1)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, ReduceL2)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, ReduceL2)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceL2)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, ReduceL2)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, ReduceLogSum)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, ReduceLogSum)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceLogSum)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, ReduceLogSumExp)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, ReduceLogSumExp)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceLogSumExp)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, ReduceMax)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, ReduceMax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceMax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, ReduceMax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int64_t, ReduceMax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int8_t, ReduceMax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint8_t, ReduceMax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, ReduceMean)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, ReduceMean)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceMean)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, ReduceMean)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, float, ReduceMin)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, double, ReduceMin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, MLFloat16, ReduceMin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, int32_t, ReduceMin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, int64_t, ReduceMin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, int8_t, ReduceMin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, uint8_t, ReduceMin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, ReduceProd)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, ReduceProd)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceProd)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, ReduceProd)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, ReduceSum)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, ReduceSum)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceSum)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, ReduceSum)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int64_t, ReduceSum)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, ReduceSumSquare)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, ReduceSumSquare)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceSumSquare)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int64_t, GatherND)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Dropout)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Resize)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Resize)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Resize)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, Resize)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint8_t, Resize)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, If)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Loop)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Flatten)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                  13, float, LRN)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                  13, double, LRN)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                  13, MLFloat16, LRN)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, Identity)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, ScatterND)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Pad)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Pad)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Pad)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, bool, Pad)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, SpaceToDepth)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, DepthToSpace)>,
-
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, BFloat16, Add)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, BFloat16, Sub)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, BFloat16, Mul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, BFloat16, Div)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, BFloat16, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, BFloat16, Softmax)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, BFloat16, MatMul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, BFloat16, Relu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, BFloat16, Sigmoid)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, BFloat16, Tanh)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, BFloat16, Gemm)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, BFloat16, ReduceSum)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Mod)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, int8_t, QuantizeLinear)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, uint8_t, QuantizeLinear)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, int8_t, DequantizeLinear)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, uint8_t, DequantizeLinear)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int8_t, Sign)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int16_t, Sign)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, Sign)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int64_t, Sign)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint8_t, Sign)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint16_t, Sign)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint32_t, Sign)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint64_t, Sign)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Sign)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Sign)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Sign)>,
-
-      // OpSet 14
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, CumSum)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, float, Relu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, double, Relu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, MLFloat16, Relu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, int32_t, Add)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, int64_t, Add)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, uint32_t, Add)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, uint64_t, Add)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, float, Add)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, double, Add)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, MLFloat16, Add)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, int32_t, Sub)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, int64_t, Sub)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, uint32_t, Sub)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, uint64_t, Sub)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, float, Sub)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, double, Sub)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, MLFloat16, Sub)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, int32_t, Mul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, int64_t, Mul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, uint32_t, Mul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, uint64_t, Mul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, float, Mul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, double, Mul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, MLFloat16, Mul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, int32_t, Div)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, int64_t, Div)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, uint32_t, Div)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, uint64_t, Div)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, float, Div)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, double, Div)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, MLFloat16, Div)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, 18, Identity)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, float, RNN)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, double, RNN)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, MLFloat16, RNN)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, float, GRU)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, double, GRU)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, MLFloat16, GRU)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, float, LSTM)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, double, LSTM)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, MLFloat16, LSTM)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, 18, Reshape)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                            14, 14, float, BatchNormalization)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, 14, double, BatchNormalization)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                            14, 14, MLFloat16, BatchNormalization)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, float, ReduceMin)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, double, ReduceMin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, MLFloat16, ReduceMin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, int32_t, ReduceMin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, int8_t, ReduceMin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, uint8_t, ReduceMin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, int64_t, ReduceMin)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, BFloat16, Add)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, BFloat16, Sub)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, BFloat16, Mul)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, BFloat16, Div)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, BFloat16, Relu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, Trilu)>,
-
-      // OpSet 15
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 15, Pow)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                  15, float, BatchNormalization)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 15, double, BatchNormalization)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain,
-                                                                  15, MLFloat16, BatchNormalization)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 15, 18, Shape)>,
-
-      // Opset 16
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, float, PRelu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, double, PRelu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, MLFloat16, PRelu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, float, LeakyRelu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, double, LeakyRelu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, MLFloat16, LeakyRelu)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, MLFloat16, Where)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, float, Where)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, double_t, Where)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, int32_t, Where)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, int64_t, Where)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, uint8_t, Where)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, int32_t, GreaterOrEqual)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, int64_t, GreaterOrEqual)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, uint32_t, GreaterOrEqual)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, uint64_t, GreaterOrEqual)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, float, GreaterOrEqual)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, double, GreaterOrEqual)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, MLFloat16, GreaterOrEqual)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, int32_t, LessOrEqual)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, int64_t, LessOrEqual)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, uint32_t, LessOrEqual)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, uint64_t, LessOrEqual)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, float, LessOrEqual)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, double, LessOrEqual)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, MLFloat16, LessOrEqual)>,
-
-      // Opset 17
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 17, float, LayerNormalization)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 17, double, LayerNormalization)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 17, BFloat16, LayerNormalization)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 17, MLFloat16, LayerNormalization)>,
-
-      // Opset 18
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 18, Split)>,
-
-      // Opset 19
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, float, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, double, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, MLFloat16, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, int8_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, int16_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, int32_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, int64_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, uint8_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, uint16_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, uint32_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, uint64_t, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, bool, Cast)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, BFloat16, Cast)>,
-
-      BuildKernelCreateInfo<ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, uint8_t, float, DequantizeLinear)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, int8_t, float, DequantizeLinear)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, uint8_t, MLFloat16, DequantizeLinear)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, int8_t, MLFloat16, DequantizeLinear)>,
-
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, Identity)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, If)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, Loop)>,
-
-      BuildKernelCreateInfo<ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, uint8_t, float, QuantizeLinear)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, int8_t, float, QuantizeLinear)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, uint8_t, MLFloat16, QuantizeLinear)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, int8_t, MLFloat16, QuantizeLinear)>,
-
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, Reshape)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, Scan)>,
-      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, Shape)>,
+    BuildKernelCreateInfo<void>,  // default entry to avoid the list become empty after ops-reducing
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, MemcpyFromHost)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, MemcpyToHost)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 4, 10, Concat)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, Unsqueeze)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 8, Flatten)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, Squeeze)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 12, Identity)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 9, Dropout)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, float, Cos)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, double, Cos)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, MLFloat16, Cos)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, float, Sin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, double, Sin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, MLFloat16, Sin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, Gather)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, float, Gemm)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, double, Gemm)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, MLFloat16, Gemm)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 10, float, Gemm)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 10, double, Gemm)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 10, MLFloat16, Gemm)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 8, float, MatMul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 8, double, MatMul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 8, MLFloat16, MatMul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, float, MatMul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, double, MatMul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, MLFloat16, MatMul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, int8_t, MatMulInteger)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 10, float, Clip)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, float, Elu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, double, Elu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, MLFloat16, Elu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, float, HardSigmoid)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, double, HardSigmoid)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, MLFloat16, HardSigmoid)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 15, float, LeakyRelu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 15, double, LeakyRelu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 15, MLFloat16, LeakyRelu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, float, Relu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, double, Relu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, MLFloat16, Relu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, float, Selu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, double, Selu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, MLFloat16, Selu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, float, Sigmoid)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, double, Sigmoid)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, MLFloat16, Sigmoid)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, float, Softsign)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, double, Softsign)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, MLFloat16, Softsign)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, float, Tanh)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, double, Tanh)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, MLFloat16, Tanh)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, float, Softplus)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, double, Softplus)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, MLFloat16, Softplus)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, Softmax)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, Softmax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, Softmax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, LogSoftmax)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, LogSoftmax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, LogSoftmax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 11, float, Pow)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 11, double, Pow)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 11, MLFloat16, Pow)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, float, PRelu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, double, PRelu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, MLFloat16, PRelu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 15, float, PRelu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 15, double, PRelu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 15, MLFloat16, PRelu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, bool, And)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, bool, Or)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, bool, Xor)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 7, Sum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 8, 12, Sum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 11, Max)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, Max)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 11, Min)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, Min)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, float, Greater)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, double, Greater)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, MLFloat16, Greater)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 10, bool, Equal)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 10, int32_t, Equal)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 10, int64_t, Equal)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 8, 12, Expand)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, int32_t, Greater)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, int64_t, Greater)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, uint32_t, Greater)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, uint64_t, Greater)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, float, Greater)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, double, Greater)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, MLFloat16, Greater)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, int32_t, GreaterOrEqual)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, int64_t, GreaterOrEqual)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, uint32_t, GreaterOrEqual)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, uint64_t, GreaterOrEqual)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, float, GreaterOrEqual)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, double, GreaterOrEqual)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, MLFloat16, GreaterOrEqual)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, int32_t, LessOrEqual)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, int64_t, LessOrEqual)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, uint32_t, LessOrEqual)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, uint64_t, LessOrEqual)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, float, LessOrEqual)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, double, LessOrEqual)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 15, MLFloat16, LessOrEqual)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, int32_t, Add)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, int64_t, Add)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, uint32_t, Add)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, uint64_t, Add)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, float, Add)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, double, Add)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, MLFloat16, Add)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, int32_t, Sub)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, int64_t, Sub)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, uint32_t, Sub)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, uint64_t, Sub)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, float, Sub)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, double, Sub)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, MLFloat16, Sub)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, int32_t, Mul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, int64_t, Mul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, uint32_t, Mul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, uint64_t, Mul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, float, Mul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, double, Mul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, MLFloat16, Mul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, int32_t, Div)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, int64_t, Div)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, uint32_t, Div)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, uint64_t, Div)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, float, Div)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, double, Div)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 12, MLFloat16, Div)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, int8_t, Abs)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, int16_t, Abs)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, int32_t, Abs)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, int64_t, Abs)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, uint8_t, Abs)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, uint16_t, Abs)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, uint32_t, Abs)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, uint64_t, Abs)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, float, Abs)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, double, Abs)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, MLFloat16, Abs)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, int8_t, Neg)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, int16_t, Neg)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, int32_t, Neg)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, int64_t, Neg)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, float, Neg)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, double, Neg)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, MLFloat16, Neg)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, float, Floor)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, double, Floor)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, MLFloat16, Floor)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, float, Ceil)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, double, Ceil)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, MLFloat16, Ceil)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, float, Reciprocal)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, double, Reciprocal)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, MLFloat16, Reciprocal)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, float, Sqrt)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, double, Sqrt)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, MLFloat16, Sqrt)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, float, Log)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, double, Log)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, MLFloat16, Log)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, float, Exp)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, double, Exp)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, MLFloat16, Exp)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, float, Erf)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, double, Erf)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, MLFloat16, Erf)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, bool, Not)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, float, BatchNormalization)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, double, BatchNormalization)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, MLFloat16, BatchNormalization)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 13, float, BatchNormalization)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 13, double, BatchNormalization)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 13, MLFloat16, BatchNormalization)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 12, float, LRN)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 12, double, LRN)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 12, MLFloat16, LRN)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, Conv)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, Conv)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, Conv)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, ConvTranspose)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, ConvTranspose)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ConvTranspose)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 9, float, AveragePool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 9, double, AveragePool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 9, MLFloat16, AveragePool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, float, GlobalAveragePool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, double, GlobalAveragePool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, MLFloat16, GlobalAveragePool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 7, float, MaxPool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 7, double, MaxPool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 7, MLFloat16, MaxPool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 8, 9, float, MaxPool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 8, 9, double, MaxPool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 8, 9, MLFloat16, MaxPool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, float, GlobalMaxPool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, double, GlobalMaxPool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, MLFloat16, GlobalMaxPool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, ArgMax)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, ArgMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ArgMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, ArgMin)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, ArgMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ArgMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, ReduceL1)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, ReduceL1)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceL1)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceL1)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, ReduceL2)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, ReduceL2)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceL2)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceL2)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, ReduceMax)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, int64_t, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, ReduceMean)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, ReduceMean)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceMean)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceMean)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, ReduceMin)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, ReduceProd)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, ReduceProd)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceProd)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceProd)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, ReduceSum)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, ReduceSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, int32_t, ReduceSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, int64_t, ReduceSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, ReduceLogSum)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, ReduceLogSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceLogSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, ReduceSumSquare)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, ReduceSumSquare)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceSumSquare)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, float, ReduceLogSumExp)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, double, ReduceLogSumExp)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, MLFloat16, ReduceLogSumExp)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 8, float, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 8, double, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 8, MLFloat16, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 8, int8_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 8, int16_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 8, int32_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 8, int64_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 8, uint8_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 8, uint16_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 8, uint32_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 8, uint64_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 8, bool, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, float, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, double, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, MLFloat16, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, int8_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, int16_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, int32_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, int64_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, uint8_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, uint16_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, uint32_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, uint64_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, bool, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 2, 10, float, Pad)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 2, 10, double, Pad)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 2, 10, MLFloat16, Pad)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 4, Reshape)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 5, 12, Reshape)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 12, Shape)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 12, Size)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, 12, Tile)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Tile)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 12, Transpose)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, float, InstanceNormalization)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, double, InstanceNormalization)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 6, MLFloat16, InstanceNormalization)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 13, float, RNN)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 13, double, RNN)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 13, MLFloat16, RNN)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 13, float, GRU)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 13, double, GRU)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 13, MLFloat16, GRU)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 13, float, LSTM)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 13, double, LSTM)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 13, MLFloat16, LSTM)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 9, int64_t, Slice)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 10, Compress)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 10, Flatten)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, float, Upsample)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, double, Upsample)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, MLFloat16, Upsample)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, int32_t, Upsample)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, uint8_t, Upsample)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 9, float, Upsample)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 9, double, Upsample)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 9, MLFloat16, Upsample)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 9, int32_t, Upsample)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 9, uint8_t, Upsample)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 2, 10, Split)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, ConstantOfShape)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, int8_t, Shrink)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, int16_t, Shrink)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, int32_t, Shrink)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, int64_t, Shrink)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, uint8_t, Shrink)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, uint16_t, Shrink)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, uint32_t, Shrink)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, uint64_t, Shrink)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, float, Shrink)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, double, Shrink)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, MLFloat16, Shrink)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, float, Less)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, double, Less)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 7, 8, MLFloat16, Less)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, int32_t, Less)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, int64_t, Less)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, uint32_t, Less)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, uint64_t, Less)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, float, Less)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, double, Less)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, MLFloat16, Less)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, EyeLike)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 10, Scatter)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 15, MLFloat16, Where)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 15, float, Where)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 15, double_t, Where)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 15, int32_t, Where)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 15, int64_t, Where)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 15, uint8_t, Where)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, bool, NonZero)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, uint8_t, NonZero)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, int32_t, NonZero)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, int64_t, NonZero)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, float, NonZero)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 12, MLFloat16, NonZero)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 9, TopK)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 8, 8, Scan)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 9, 10, Scan)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, Loop)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, DepthToSpace)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 12, SpaceToDepth)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, RandomNormal)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, RandomNormalLike)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, RandomUniform)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, RandomUniformLike)>,
+
+    // opset 10
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 10, float, AveragePool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 10, double, AveragePool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 10, MLFloat16, AveragePool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 11, Dropout)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 10, float, MaxPool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 10, double, MaxPool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 10, MLFloat16, MaxPool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 10, NonMaxSuppression)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 10, float, Resize)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 10, double, Resize)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 10, MLFloat16, Resize)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 10, int32_t, Resize)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 10, uint8_t, Resize)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, ReverseSequence)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, float, RoiAlign)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, double, RoiAlign)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 10, int32_t, Slice)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 10, int64_t, Slice)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, float, ThresholdedRelu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, double, ThresholdedRelu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, MLFloat16, ThresholdedRelu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 10, TopK)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 1, 10, If)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 12, int8_t, QuantizeLinear)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 12, uint8_t, QuantizeLinear)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 12, int8_t, DequantizeLinear)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 12, uint8_t, DequantizeLinear)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 10, 12, Mod)>,
+
+    // opset 11
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, float, ArgMax)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, double, ArgMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, MLFloat16, ArgMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, float, ArgMin)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, double, ArgMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, MLFloat16, ArgMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, Compress)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, Concat)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, Flatten)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, Gather)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, GatherElements)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, int64_t, GatherND)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, Gemm)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, Gemm)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, Gemm)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, If)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, Loop)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, NonMaxSuppression)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, Range)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, ReduceL1)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, ReduceL1)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceL1)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, int32_t, ReduceL1)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, ReduceL2)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, ReduceL2)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceL2)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, int32_t, ReduceL2)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, ReduceLogSum)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, ReduceLogSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceLogSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, ReduceLogSumExp)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, ReduceLogSumExp)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceLogSumExp)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, float, ReduceMax)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, double, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, MLFloat16, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, int32_t, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, int64_t, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, ReduceMean)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, ReduceMean)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceMean)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, int32_t, ReduceMean)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, float, ReduceMin)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, double, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, MLFloat16, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, int32_t, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, ReduceProd)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, ReduceProd)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceProd)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, int32_t, ReduceProd)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, ReduceSum)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, ReduceSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, int32_t, ReduceSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, int64_t, ReduceSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, ReduceSumSquare)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, ReduceSumSquare)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, ReduceSumSquare)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 15, Scan)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, ScatterElements)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, int32_t, Slice)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, int64_t, Slice)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, Softmax)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, Softmax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, Softmax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, LogSoftmax)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, LogSoftmax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, LogSoftmax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, Split)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, Squeeze)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, TopK)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, SequenceAt)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, SequenceConstruct)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, SequenceEmpty)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, SequenceLength)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, ConcatFromSequence)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, SequenceErase)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, SequenceInsert)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, Unsqueeze)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, float, Conv)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, double, Conv)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, MLFloat16, Conv)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, float, ConvTranspose)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, double, ConvTranspose)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, MLFloat16, ConvTranspose)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, float, AveragePool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, double, AveragePool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, MLFloat16, AveragePool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, float, MaxPool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, double, MaxPool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, MLFloat16, MaxPool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, Resize)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, Resize)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, Resize)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, int32_t, Resize)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, uint8_t, Resize)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 11, Clip)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, Pad)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, Pad)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, Pad)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, bool, Equal)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, int32_t, Equal)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, int64_t, Equal)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, uint32_t, Equal)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, uint64_t, Equal)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, float, Equal)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, double, Equal)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, MLFloat16, Equal)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, float, Round)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, double, Round)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, MLFloat16, Round)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 13, CumSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, int64_t_int64_t_int64_t, OneHot)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, int64_t_float_int64_t, OneHot)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, int32_t_float_int32_t, OneHot)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, int64_t_MLFloat16_int64_t, OneHot)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, int32_t_MLFloat16_int32_t, OneHot)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, ScatterND)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 11, 12, DepthToSpace)>,
+
+    // OpSet 12
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, Clip)>,
+
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, float, MaxPool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, double, MaxPool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, MLFloat16, MaxPool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, int8_t, MaxPool)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, uint8_t, MaxPool)>,
+
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, Pow)>,
+
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, float, ReduceMax)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, double, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, MLFloat16, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, int32_t, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, int64_t, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, int8_t, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, uint8_t, ReduceMax)>,
+
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, float, ReduceMin)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, double, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, MLFloat16, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, int32_t, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, int64_t, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, int8_t, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, uint8_t, ReduceMin)>,
+
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, int64_t, GatherND)>,
+
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, 12, Dropout)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 12, Einsum)>,
+
+    // OpSet 13
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 14, Pow)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, int32_t, Add)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, int64_t, Add)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, uint32_t, Add)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, uint64_t, Add)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, float, Add)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, double, Add)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Clip)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, MLFloat16, Add)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, int32_t, Sub)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, int64_t, Sub)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, uint32_t, Sub)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, uint64_t, Sub)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, float, Sub)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, double, Sub)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, MLFloat16, Sub)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, int32_t, Mul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, int64_t, Mul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, uint32_t, Mul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, uint64_t, Mul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, float, Mul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, double, Mul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, MLFloat16, Mul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, int32_t, Div)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, int64_t, Div)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, uint32_t, Div)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, uint64_t, Div)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, float, Div)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, double, Div)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, MLFloat16, Div)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int8_t, Abs)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int16_t, Abs)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, Abs)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int64_t, Abs)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint8_t, Abs)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint16_t, Abs)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint32_t, Abs)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint64_t, Abs)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Abs)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Abs)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Abs)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int8_t, Neg)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int16_t, Neg)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, Neg)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int64_t, Neg)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Neg)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Neg)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Neg)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Floor)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Floor)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Floor)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Ceil)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Ceil)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Ceil)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Reciprocal)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Reciprocal)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Reciprocal)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Sqrt)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Sqrt)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Sqrt)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Log)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Log)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Log)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Exp)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Exp)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Exp)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Erf)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Erf)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Erf)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Expand)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Sum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Max)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Min)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, bool, Equal)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, Equal)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int64_t, Equal)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint32_t, Equal)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint64_t, Equal)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Equal)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Equal)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Equal)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, Greater)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int64_t, Greater)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint32_t, Greater)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint64_t, Greater)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Greater)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Greater)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Greater)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, Less)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int64_t, Less)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint32_t, Less)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint64_t, Less)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Less)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Less)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Less)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, bool, NonZero)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint8_t, NonZero)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, NonZero)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int64_t, NonZero)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, NonZero)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, NonZero)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, float, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, double, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, MLFloat16, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, int8_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, int16_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, int32_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, int64_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, uint8_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, uint16_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, uint32_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, uint64_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, bool, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, Reshape)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 14, Shape)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Size)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Transpose)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, ScatterElements)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, Slice)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int64_t, Slice)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Softmax)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Softmax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Softmax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, LogSoftmax)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, LogSoftmax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, LogSoftmax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 17, Split)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Squeeze)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Unsqueeze)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Concat)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Gather)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, GatherElements)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, MatMul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, MatMul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, MatMul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, float, Relu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, double, Relu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, MLFloat16, Relu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Sigmoid)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Sigmoid)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Sigmoid)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Tanh)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Tanh)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Tanh)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Gemm)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Gemm)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Gemm)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, ReduceL1)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, ReduceL1)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceL1)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, ReduceL1)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, ReduceL2)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, ReduceL2)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceL2)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, ReduceL2)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, ReduceLogSum)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, ReduceLogSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceLogSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, ReduceLogSumExp)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, ReduceLogSumExp)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceLogSumExp)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, ReduceMax)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int64_t, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int8_t, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint8_t, ReduceMax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, ReduceMean)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, ReduceMean)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceMean)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, ReduceMean)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, float, ReduceMin)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, double, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, MLFloat16, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, int32_t, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, int64_t, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, int8_t, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, uint8_t, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, ReduceProd)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, ReduceProd)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceProd)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, ReduceProd)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, ReduceSum)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, ReduceSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, ReduceSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int64_t, ReduceSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, ReduceSumSquare)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, ReduceSumSquare)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, ReduceSumSquare)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int64_t, GatherND)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Dropout)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Resize)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Resize)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Resize)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, Resize)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint8_t, Resize)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, If)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, Loop)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Flatten)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, LRN)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, LRN)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, LRN)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, Identity)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, ScatterND)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Pad)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Pad)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Pad)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, bool, Pad)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, SpaceToDepth)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, DepthToSpace)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int8_t, Sign)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int16_t, Sign)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int32_t, Sign)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, int64_t, Sign)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint8_t, Sign)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint16_t, Sign)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint32_t, Sign)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, uint64_t, Sign)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, float, Sign)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, double, Sign)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, MLFloat16, Sign)>,
+
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, BFloat16, Add)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, BFloat16, Sub)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, BFloat16, Mul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, BFloat16, Div)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, BFloat16, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, BFloat16, Softmax)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, BFloat16, MatMul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 13, BFloat16, Relu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, BFloat16, Sigmoid)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, BFloat16, Tanh)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, BFloat16, Gemm)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, BFloat16, ReduceSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, Mod)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, int8_t, QuantizeLinear)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, uint8_t, QuantizeLinear)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, int8_t, DequantizeLinear)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 13, 18, uint8_t, DequantizeLinear)>,
+
+    // OpSet 14
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, CumSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, float, Relu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, double, Relu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, MLFloat16, Relu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, int32_t, Add)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, int64_t, Add)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, uint32_t, Add)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, uint64_t, Add)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, float, Add)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, double, Add)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, MLFloat16, Add)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, int32_t, Sub)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, int64_t, Sub)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, uint32_t, Sub)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, uint64_t, Sub)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, float, Sub)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, double, Sub)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, MLFloat16, Sub)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, int32_t, Mul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, int64_t, Mul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, uint32_t, Mul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, uint64_t, Mul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, float, Mul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, double, Mul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, MLFloat16, Mul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, int32_t, Div)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, int64_t, Div)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, uint32_t, Div)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, uint64_t, Div)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, float, Div)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, double, Div)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, MLFloat16, Div)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, 18, Identity)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, float, RNN)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, double, RNN)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, MLFloat16, RNN)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, float, GRU)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, double, GRU)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, MLFloat16, GRU)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, float, LSTM)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, double, LSTM)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, MLFloat16, LSTM)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, 18, Reshape)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, 14, float, BatchNormalization)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, 14, double, BatchNormalization)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, 14, MLFloat16, BatchNormalization)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, float, ReduceMin)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, double, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, MLFloat16, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, int32_t, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, int8_t, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, uint8_t, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, int64_t, ReduceMin)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, BFloat16, Add)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, BFloat16, Sub)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, BFloat16, Mul)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, BFloat16, Div)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, BFloat16, Relu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 14, Trilu)>,
+
+    // OpSet 15
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 15, Pow)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 15, float, BatchNormalization)>,
+    // BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 15, double, BatchNormalization)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 15, MLFloat16, BatchNormalization)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 15, 18, Shape)>,
+
+    // Opset 16
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, float, LeakyRelu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, double, LeakyRelu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, MLFloat16, LeakyRelu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, float, PRelu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, double, PRelu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, MLFloat16, PRelu)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, 18, Scan)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, MLFloat16, Where)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, float, Where)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, double_t, Where)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, int32_t, Where)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, int64_t, Where)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, uint8_t, Where)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, int32_t, GreaterOrEqual)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, int64_t, GreaterOrEqual)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, uint32_t, GreaterOrEqual)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, uint64_t, GreaterOrEqual)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, float, GreaterOrEqual)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, double, GreaterOrEqual)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, MLFloat16, GreaterOrEqual)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, int32_t, LessOrEqual)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, int64_t, LessOrEqual)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, uint32_t, LessOrEqual)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, uint64_t, LessOrEqual)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, float, LessOrEqual)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, double, LessOrEqual)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 16, MLFloat16, LessOrEqual)>,
+
+    // Opset 17
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 17, float, LayerNormalization)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 17, double, LayerNormalization)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 17, BFloat16, LayerNormalization)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 17, MLFloat16, LayerNormalization)>,
+
+    // Opset 18
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 18, Split)>,
+
+    // Opset 19
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, float, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, double, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, MLFloat16, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, int8_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, int16_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, int32_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, int64_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, uint8_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, uint16_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, uint32_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, uint64_t, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, bool, Cast)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, BFloat16, Cast)>,
+
+    BuildKernelCreateInfo<ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, uint8_t, float, DequantizeLinear)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, int8_t, float, DequantizeLinear)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, uint8_t, MLFloat16, DequantizeLinear)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, int8_t, MLFloat16, DequantizeLinear)>,
+
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, Identity)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, If)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, Loop)>,
+
+    BuildKernelCreateInfo<ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, uint8_t, float, QuantizeLinear)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, int8_t, float, QuantizeLinear)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, uint8_t, MLFloat16, QuantizeLinear)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, int8_t, MLFloat16, QuantizeLinear)>,
+
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, Reshape)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, Scan)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kOnnxDomain, 19, Shape)>,
   };
 
   for (auto& function_table_entry : function_table) {
@@ -2250,6 +2214,7 @@ static Status RegisterRocmKernels(KernelRegistry& kernel_registry) {
 
   return Status::OK();
 }
+// clang-format on
 
 }  // namespace rocm
 
@@ -2336,7 +2301,6 @@ ROCMExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph,
   // These are usually shape related computation subgraphs
   // Following logic can be extended for other EPs
   auto cpu_nodes = GetCpuPreferredNodes(graph, kernel_lookup, candidates);
-
   std::vector<std::unique_ptr<ComputeCapability>> result;
   for (auto& node_index : candidates) {
     if (cpu_nodes.count(node_index) > 0)
@@ -2371,7 +2335,7 @@ OrtDevice ROCMExecutionProvider::GetOrtDeviceByMemType(OrtMemType mem_type) cons
 
 std::vector<AllocatorPtr> ROCMExecutionProvider::CreatePreferredAllocators() {
   AllocatorCreationInfo pinned_memory_info(
-      [](OrtDevice::DeviceId device_id) {
+      [](OrtDevice::DeviceId) {
         return std::make_unique<ROCMPinnedAllocator>(HIP_PINNED);
       },
       // TODO: should we use info_.device_id instead of DEFAULT_CPU_ALLOCATOR_DEVICE_ID?
@@ -2383,7 +2347,8 @@ std::vector<AllocatorPtr> ROCMExecutionProvider::CreatePreferredAllocators() {
   return std::vector<AllocatorPtr>{
       CreateRocmAllocator(info_.device_id, info_.gpu_mem_limit, info_.arena_extend_strategy,
                           info_.external_allocator_info, info_.default_memory_arena_cfg),
-      CreateAllocator(pinned_memory_info)};
+      CreateAllocator(pinned_memory_info),
+  };
 }
 
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/rocm/rocm_execution_provider.h b/onnxruntime/core/providers/rocm/rocm_execution_provider.h
index 3e86afb7d643c..c4945b9ac2481 100644
--- a/onnxruntime/core/providers/rocm/rocm_execution_provider.h
+++ b/onnxruntime/core/providers/rocm/rocm_execution_provider.h
@@ -36,11 +36,11 @@ class ROCMExecutionProvider : public IExecutionProvider {
     return nullptr;
   }
 
-  rocblas_handle PerThreadRocblasHandle() {
+  rocblas_handle PerThreadDefaultRocblasHandle() {
     return GetPerThreadContext().RocblasHandle();
   }
 
-  miopenHandle_t PerThreadMiopenHandle() {
+  miopenHandle_t PerThreadDefaultMiopenHandle() {
     return GetPerThreadContext().MiopenHandle();
   }
 
@@ -60,7 +60,6 @@ class ROCMExecutionProvider : public IExecutionProvider {
   const hipDeviceProp_t& GetDeviceProp() const { return device_prop_; };
   int GetMiopenConvExhaustiveSearch() const { return info_.miopen_conv_exhaustive_search; }
   bool DoCopyOnDefaultStream() const { return info_.do_copy_in_default_stream; }
-
   bool GetMiopenConvUseMaxWorkspace() const { return info_.miopen_conv_use_max_workspace; }
 
   ProviderOptions GetProviderOptions() const override {
@@ -68,15 +67,15 @@ class ROCMExecutionProvider : public IExecutionProvider {
   }
 
   static AllocatorPtr CreateRocmAllocator(OrtDevice::DeviceId device_id, size_t rocm_mem_limit, ArenaExtendStrategy arena_extend_strategy,
-                                          ROCMExecutionProviderExternalAllocatorInfo external_alloc_info, OrtArenaCfg* arena_cfg);
+                                          ROCMExecutionProviderExternalAllocatorInfo external_alloc_info, const OrtArenaCfg* arena_cfg);
 
   ITuningContext* GetTuningContext() const override;
 
   std::unique_ptr<profiling::EpProfiler> GetProfiler() override;
 
   void RegisterStreamHandlers(IStreamCommandHandleRegistry& stream_handle_registry, AllocatorMap& allocators) const override;
-  std::vector<AllocatorPtr> CreatePreferredAllocators() override;
   OrtDevice GetOrtDeviceByMemType(OrtMemType mem_type) const override;
+  std::vector<AllocatorPtr> CreatePreferredAllocators() override;
 
  private:
   ROCMExecutionProviderInfo info_;
@@ -105,21 +104,30 @@ class ROCMExecutionProvider : public IExecutionProvider {
 
     template <typename T>
     const T* GetConstOnes(size_t count, hipStream_t stream) {
-      if (std::is_same<T, float>::value) {
+      constexpr bool is_float = std::is_same<T, float>::value;
+      constexpr bool is_double = std::is_same<T, double>::value;
+      constexpr bool is_half = std::is_same<T, half>::value;
+      constexpr bool is_BFloat16 = std::is_same<T, BFloat16>::value;
+      if (is_float) {
         if (!constant_ones_float_) {
           constant_ones_float_ = rocm::CreateConstantOnes<float>();
         }
         return reinterpret_cast<const T*>(constant_ones_float_->GetBuffer(stream, count));
-      } else if (std::is_same<T, double>::value) {
+      } else if (is_double) {
         if (!constant_ones_double_) {
           constant_ones_double_ = rocm::CreateConstantOnes<double>();
         }
         return reinterpret_cast<const T*>(constant_ones_double_->GetBuffer(stream, count));
-      } else if (std::is_same<T, half>::value) {
+      } else if (is_half) {
         if (!constant_ones_half_) {
           constant_ones_half_ = rocm::CreateConstantOnes<half>();
         }
         return reinterpret_cast<const T*>(constant_ones_half_->GetBuffer(stream, count));
+      } else if (is_BFloat16) {
+        if (!constant_ones_bfloat16_) {
+          constant_ones_bfloat16_ = rocm::CreateConstantOnes<BFloat16>();
+        }
+        return reinterpret_cast<const T*>(constant_ones_bfloat16_->GetBuffer(stream, count));
       } else {
         return nullptr;
       }
@@ -132,6 +140,7 @@ class ROCMExecutionProvider : public IExecutionProvider {
     std::unique_ptr<rocm::IConstantBuffer<float>> constant_ones_float_;
     std::unique_ptr<rocm::IConstantBuffer<double>> constant_ones_double_;
     std::unique_ptr<rocm::IConstantBuffer<half>> constant_ones_half_;
+    std::unique_ptr<rocm::IConstantBuffer<BFloat16>> constant_ones_bfloat16_;
   };
 
   using PerThreadContextMap = std::unordered_map<const ROCMExecutionProvider*, std::weak_ptr<PerThreadContext>>;
diff --git a/onnxruntime/core/providers/rocm/rocm_execution_provider_info.cc b/onnxruntime/core/providers/rocm/rocm_execution_provider_info.cc
index 91e3aaaa4280f..650635c153640 100644
--- a/onnxruntime/core/providers/rocm/rocm_execution_provider_info.cc
+++ b/onnxruntime/core/providers/rocm/rocm_execution_provider_info.cc
@@ -27,12 +27,10 @@ constexpr const char* kTunableOpMaxTuningDurationMs = "tunable_op_max_tuning_dur
 }  // namespace provider_option_names
 }  // namespace rocm
 
-namespace {
 const EnumNameMapping<ArenaExtendStrategy> arena_extend_strategy_mapping{
     {ArenaExtendStrategy::kNextPowerOfTwo, "kNextPowerOfTwo"},
     {ArenaExtendStrategy::kSameAsRequested, "kSameAsRequested"},
 };
-}  // namespace
 
 ROCMExecutionProviderInfo ROCMExecutionProviderInfo::FromProviderOptions(const ProviderOptions& options) {
   ROCMExecutionProviderInfo info{};
@@ -81,7 +79,9 @@ ROCMExecutionProviderInfo ROCMExecutionProviderInfo::FromProviderOptions(const P
           .AddAssignmentToEnumReference(
               rocm::provider_option_names::kArenaExtendStrategy,
               arena_extend_strategy_mapping, info.arena_extend_strategy)
-          .AddAssignmentToReference(rocm::provider_option_names::kMiopenConvExhaustiveSearch, info.miopen_conv_exhaustive_search)
+          .AddAssignmentToReference(
+              rocm::provider_option_names::kMiopenConvExhaustiveSearch,
+              info.miopen_conv_exhaustive_search)
           .AddAssignmentToReference(rocm::provider_option_names::kDoCopyInDefaultStream, info.do_copy_in_default_stream)
           .AddAssignmentToReference(rocm::provider_option_names::kMiopenConvUseMaxWorkspace, info.miopen_conv_use_max_workspace)
           .AddValueParser(
diff --git a/onnxruntime/core/providers/rocm/rocm_kernel.h b/onnxruntime/core/providers/rocm/rocm_kernel.h
index 02f15fdad8b77..c0b7d4722d3e4 100644
--- a/onnxruntime/core/providers/rocm/rocm_kernel.h
+++ b/onnxruntime/core/providers/rocm/rocm_kernel.h
@@ -35,14 +35,12 @@ class RocmKernel : public OpKernel {
     // use this to precisely locate the node where ROCM failure comes from
     //  if (hipSuccess != hipDeviceSynchronize())
     //    __debugbreak();
-
     if (s.IsOK()) {
       auto err = hipGetLastError();
       if (err != hipSuccess) {
-        s = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "HIP error ", hipGetErrorName(err), ":", hipGetErrorString(err));
+        return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "HIP error ", hipGetErrorName(err), ":", hipGetErrorString(err));
       }
     }
-
     return s;
   }
 
@@ -64,18 +62,18 @@ class RocmKernel : public OpKernel {
     return IAllocator::MakeUniquePtr<T>(Info().GetAllocator(OrtMemType::OrtMemTypeDefault), count_or_bytes, true);
   }
 
-  template <typename T>
-  inline IAllocatorUniquePtr<T> AllocateBufferOnCPUPinned(size_t count_or_bytes) const {
-    if (count_or_bytes == 0) return nullptr;
-    return IAllocator::MakeUniquePtr<T>(Info().GetAllocator(OrtMemType::OrtMemTypeCPU), count_or_bytes);
-  }
-
   inline void AddDeferredReleaseCPUPtr(void* p, onnxruntime::Stream* ort_stream) const {
     ORT_ENFORCE(ort_stream->GetDevice().Type() == OrtDevice::GPU);
     auto* rocm_ep_stream = static_cast<RocmStream*>(ort_stream);
     rocm_ep_stream->EnqueDeferredCPUBuffer(p);
   }
 
+  template <typename T>
+  inline IAllocatorUniquePtr<T> AllocateBufferOnCPUPinned(size_t count_or_bytes) const {
+    if (count_or_bytes == 0) return nullptr;
+    return IAllocator::MakeUniquePtr<T>(Info().GetAllocator(OrtMemType::OrtMemTypeCPU), count_or_bytes);
+  }
+
   const hipDeviceProp_t& GetDeviceProp() const { return provider_->GetDeviceProp(); }
 
   inline hipStream_t Stream(OpKernelContext* ctx) const {
@@ -83,6 +81,22 @@ class RocmKernel : public OpKernel {
     return stream ? static_cast<hipStream_t>(stream->GetHandle()) : nullptr;
   }
 
+  inline miopenHandle_t GetMiopenHandle(OpKernelContext* ctx) const {
+    return GetMiopenHandle(static_cast<RocmStream*>(ctx->GetComputeStream()));
+  }
+
+  static inline miopenHandle_t GetMiopenHandle(onnxruntime::RocmStream* stream) {
+    return stream->miopen_handle_;
+  }
+
+  inline rocblas_handle GetRocblasHandle(OpKernelContext* ctx) const {
+    return GetRocblasHandle(static_cast<RocmStream*>(ctx->GetComputeStream()));
+  }
+
+  static inline rocblas_handle GetRocblasHandle(onnxruntime::RocmStream* stream) {
+    return stream->rocblas_handle_;
+  }
+
   tunable::RocmTuningContext* GetTuningContext() const {
     return static_cast<tunable::RocmTuningContext*>(provider_->GetTuningContext());
   }
@@ -106,7 +120,7 @@ class RocmKernel : public OpKernel {
       }
     }
 
-    RocmAsyncBuffer(const RocmKernel* op_kernel, gsl::span<const T> vec) : RocmAsyncBuffer(op_kernel, vec.size()) {
+    RocmAsyncBuffer(const RocmKernel* op_kernel, gsl::span<T const> vec) : RocmAsyncBuffer(op_kernel, vec.size()) {
       memcpy(CpuPtr(), vec.data(), vec.size() * sizeof(T));
     }
 
@@ -151,28 +165,17 @@ class RocmKernel : public OpKernel {
     const RocmKernel* op_kernel_;
   };
 
-  inline rocblas_handle RocblasHandle() const {
-    return provider_->PerThreadRocblasHandle();
-  }
-
-  inline miopenHandle_t MiopenHandle() const {
-    return provider_->PerThreadMiopenHandle();
-  }
-
-  static inline rocblas_handle GetRocblasHandle(onnxruntime::RocmStream* stream) {
-    return stream->rocblas_handle_;
-  }
-
-  inline rocblas_handle GetRocblasHandle(OpKernelContext* ctx) const {
-    return GetRocblasHandle(static_cast<RocmStream*>(ctx->GetComputeStream()));
+  inline rocblas_handle DefaultRocblasHandle() const {
+    return provider_->PerThreadDefaultRocblasHandle();
   }
 
-  static inline miopenHandle_t GetMiopenHandle(onnxruntime::RocmStream* stream) {
-    return stream->miopen_handle_;
+  inline miopenHandle_t DefaultMiopenHandle() const {
+    return provider_->PerThreadDefaultMiopenHandle();
   }
 
-  inline miopenHandle_t GetMiopenHandle(OpKernelContext* ctx) const {
-    return GetMiopenHandle(static_cast<RocmStream*>(ctx->GetComputeStream()));
+  inline Status CopyTensor(const Tensor& src, Tensor& dst, onnxruntime::Stream& stream) const {
+    auto* gpu_data_transfer = Info().GetDataTransferManager().GetDataTransfer(src.Location().device, dst.Location().device);
+    return gpu_data_transfer->CopyTensorAsync(src, dst, stream);
   }
 
  protected:
@@ -181,11 +184,6 @@ class RocmKernel : public OpKernel {
     return provider_->template GetConstOnes<T>(count, stream);
   }
 
-  inline Status CopyTensor(const Tensor& src, Tensor& dst, onnxruntime::Stream& stream) const {
-    auto* gpu_data_transfer = Info().GetDataTransferManager().GetDataTransfer(src.Location().device, dst.Location().device);
-    return gpu_data_transfer->CopyTensorAsync(src, dst, stream);
-  }
-
   inline int GetDeviceId() const { return provider_->GetDeviceId(); }
 
  private:
diff --git a/onnxruntime/core/providers/rocm/rocm_provider_factory.cc b/onnxruntime/core/providers/rocm/rocm_provider_factory.cc
index e55b2edbad685..4d88c25469372 100644
--- a/onnxruntime/core/providers/rocm/rocm_provider_factory.cc
+++ b/onnxruntime/core/providers/rocm/rocm_provider_factory.cc
@@ -3,15 +3,13 @@
 
 #include "core/providers/shared_library/provider_api.h"
 #include "core/providers/rocm/rocm_provider_factory.h"
-
-#include <memory>
+#include "core/providers/rocm/rocm_provider_factory_creator.h"
 
 #include "core/common/gsl.h"
 
 #include "core/providers/rocm/rocm_execution_provider.h"
 #include "core/providers/rocm/rocm_execution_provider_info.h"
 #include "core/providers/rocm/rocm_allocator.h"
-#include "core/providers/rocm/rocm_provider_factory_creator.h"
 #include "core/providers/rocm/gpu_data_transfer.h"
 #include "core/providers/rocm/math/unary_elementwise_ops_impl.h"
 
@@ -47,7 +45,7 @@ std::unique_ptr<IExecutionProvider> ROCMProviderFactory::CreateProvider() {
   return std::make_unique<ROCMExecutionProvider>(info_);
 }
 
-struct ProviderInfo_ROCM_Impl : ProviderInfo_ROCM {
+struct ProviderInfo_ROCM_Impl final : ProviderInfo_ROCM {
   OrtStatus* SetCurrentGpuDeviceId(_In_ int device_id) override {
     int num_devices;
     auto hip_err = ::hipGetDeviceCount(&num_devices);
@@ -128,9 +126,24 @@ struct ProviderInfo_ROCM_Impl : ProviderInfo_ROCM {
   }
 
   // Used by slice_concatenate_test.cc and onnxruntime_pybind_state.cc
-  void rocmMemcpy_HostToDevice(void* dst, const void* src, size_t count) override { HIP_CALL_THROW(hipMemcpy(dst, src, count, hipMemcpyHostToDevice)); }
+
+  void rocmMemcpy_HostToDevice(void* dst, const void* src, size_t count) override {
+    // hipMemcpy() operates on the default stream
+    HIP_CALL_THROW(hipMemcpy(dst, src, count, hipMemcpyHostToDevice));
+
+    // To ensure that the copy has completed, invoke a stream sync for the default stream.
+    // For transfers from pageable host memory to device memory, a stream sync is performed before the copy is initiated.
+    // The function will return once the pageable buffer has been copied to the staging memory for DMA transfer
+    // to device memory, but the DMA to final destination may not have completed.
+
+    HIP_CALL_THROW(hipStreamSynchronize(0));
+  }
+
   // Used by onnxruntime_pybind_state.cc
-  void rocmMemcpy_DeviceToHost(void* dst, const void* src, size_t count) override { HIP_CALL_THROW(hipMemcpy(dst, src, count, hipMemcpyDeviceToHost)); }
+  void rocmMemcpy_DeviceToHost(void* dst, const void* src, size_t count) override {
+    // For transfers from device to either pageable or pinned host memory, the function returns only once the copy has completed.
+    HIP_CALL_THROW(hipMemcpy(dst, src, count, hipMemcpyDeviceToHost));
+  }
 
   int hipGetDeviceCount() override {
     int num_devices = 0;
@@ -152,10 +165,9 @@ struct ProviderInfo_ROCM_Impl : ProviderInfo_ROCM {
     return std::make_shared<ROCMProviderFactory>(info);
   }
 
-  std::shared_ptr<IAllocator> CreateRocmAllocator(int16_t device_id, size_t gpu_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, onnxruntime::ROCMExecutionProviderExternalAllocatorInfo& external_allocator_info, OrtArenaCfg* default_memory_arena_cfg) override {
+  std::shared_ptr<IAllocator> CreateRocmAllocator(int16_t device_id, size_t gpu_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, onnxruntime::ROCMExecutionProviderExternalAllocatorInfo& external_allocator_info, const OrtArenaCfg* default_memory_arena_cfg) override {
     return ROCMExecutionProvider::CreateRocmAllocator(device_id, gpu_mem_limit, arena_extend_strategy, external_allocator_info, default_memory_arena_cfg);
   }
-
 } g_info;
 
 struct ROCM_Provider : Provider {
@@ -169,8 +181,8 @@ struct ROCM_Provider : Provider {
     info.gpu_mem_limit = params->gpu_mem_limit;
     info.arena_extend_strategy = static_cast<onnxruntime::ArenaExtendStrategy>(params->arena_extend_strategy);
     info.miopen_conv_exhaustive_search = params->miopen_conv_exhaustive_search;
-    info.do_copy_in_default_stream = params->do_copy_in_default_stream;
-    info.has_user_compute_stream = params->has_user_compute_stream;
+    info.do_copy_in_default_stream = params->do_copy_in_default_stream != 0;
+    info.has_user_compute_stream = params->has_user_compute_stream != 0;
     info.user_compute_stream = params->user_compute_stream;
     info.default_memory_arena_cfg = params->default_memory_arena_cfg;
     info.tunable_op.enable = params->tunable_op_enable;
@@ -180,21 +192,32 @@ struct ROCM_Provider : Provider {
     return std::make_shared<ROCMProviderFactory>(info);
   }
 
+  /**
+   * This function will be called by the C API UpdateROCMProviderOptions().
+   *
+   * What this function does is equivalent to resetting the OrtROCMProviderOptions instance with
+   * default ROCMExecutionProviderInf instance first and then set up the provided provider options.
+   * See ROCMExecutionProviderInfo::FromProviderOptions() for more details.
+   */
   void UpdateProviderOptions(void* provider_options, const ProviderOptions& options) override {
-    auto info = onnxruntime::ROCMExecutionProviderInfo::FromProviderOptions(options);
+    auto internal_options = onnxruntime::ROCMExecutionProviderInfo::FromProviderOptions(options);
     auto& rocm_options = *reinterpret_cast<OrtROCMProviderOptions*>(provider_options);
 
-    rocm_options.device_id = info.device_id;
-    rocm_options.gpu_mem_limit = info.gpu_mem_limit;
-    rocm_options.arena_extend_strategy = static_cast<int>(info.arena_extend_strategy);
-    rocm_options.miopen_conv_exhaustive_search = info.miopen_conv_exhaustive_search;
-    rocm_options.do_copy_in_default_stream = info.do_copy_in_default_stream;
-    rocm_options.has_user_compute_stream = info.has_user_compute_stream;
-    rocm_options.user_compute_stream = info.user_compute_stream;
-    rocm_options.default_memory_arena_cfg = info.default_memory_arena_cfg;
-    rocm_options.tunable_op_enable = info.tunable_op.enable;
-    rocm_options.tunable_op_tuning_enable = info.tunable_op.tuning_enable;
-    rocm_options.tunable_op_max_tuning_duration_ms = info.tunable_op.max_tuning_duration_ms;
+    rocm_options.device_id = internal_options.device_id;
+    rocm_options.gpu_mem_limit = internal_options.gpu_mem_limit;
+    rocm_options.arena_extend_strategy = static_cast<int>(internal_options.arena_extend_strategy);
+    rocm_options.miopen_conv_exhaustive_search = internal_options.miopen_conv_exhaustive_search;
+    rocm_options.do_copy_in_default_stream = internal_options.do_copy_in_default_stream;
+    rocm_options.has_user_compute_stream = internal_options.has_user_compute_stream;
+    // The 'has_user_compute_stream' of the OrtROCMProviderOptions instance can be set by C API UpdateROCMProviderOptionsWithValue() as well.
+    // We only set the 'has_user_compute_stream' of the OrtROCMProviderOptions instance if it is provided in options
+    if (options.find("has_user_compute_stream") != options.end()) {
+      rocm_options.user_compute_stream = internal_options.user_compute_stream;
+    }
+    rocm_options.default_memory_arena_cfg = internal_options.default_memory_arena_cfg;
+    rocm_options.tunable_op_enable = internal_options.tunable_op.enable;
+    rocm_options.tunable_op_tuning_enable = internal_options.tunable_op.tuning_enable;
+    rocm_options.tunable_op_max_tuning_duration_ms = internal_options.tunable_op.max_tuning_duration_ms;
   }
 
   ProviderOptions GetProviderOptions(const void* provider_options) override {
diff --git a/onnxruntime/core/providers/rocm/rocm_provider_factory.h b/onnxruntime/core/providers/rocm/rocm_provider_factory.h
index 8cd7bd357330f..80b887af4eb75 100644
--- a/onnxruntime/core/providers/rocm/rocm_provider_factory.h
+++ b/onnxruntime/core/providers/rocm/rocm_provider_factory.h
@@ -3,6 +3,7 @@
 
 #include "onnxruntime_c_api.h"
 #include "core/framework/provider_options.h"
+#include "core/common/common.h"
 
 namespace onnxruntime {
 class IAllocator;
@@ -43,7 +44,16 @@ struct ProviderInfo_ROCM {
 #endif
 
   virtual std::shared_ptr<onnxruntime::IExecutionProviderFactory> CreateExecutionProviderFactory(const onnxruntime::ROCMExecutionProviderInfo& info) = 0;
-  virtual std::shared_ptr<onnxruntime::IAllocator> CreateRocmAllocator(int16_t device_id, size_t gpu_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, onnxruntime::ROCMExecutionProviderExternalAllocatorInfo& external_allocator_info, OrtArenaCfg* default_memory_arena_cfg) = 0;
+  virtual std::shared_ptr<onnxruntime::IAllocator> CreateRocmAllocator(int16_t device_id, size_t gpu_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, onnxruntime::ROCMExecutionProviderExternalAllocatorInfo& external_allocator_info, const OrtArenaCfg* default_memory_arena_cfg) = 0;
+
+  // This function is the entry point to ROCM EP's UT cases.
+  // All tests ared only called from onnxruntime_test_all.
+  virtual void TestAll() {
+    ORT_NOT_IMPLEMENTED(__FUNCTION__, " is only implements in test code path.");
+  }
+
+ protected:
+  ~ProviderInfo_ROCM() = default;  // Can only be destroyed through a subclass instance
 };
 
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/rocm/rocm_stream_handle.cc b/onnxruntime/core/providers/rocm/rocm_stream_handle.cc
index 0d9877e6b18e6..670aae91ca710 100644
--- a/onnxruntime/core/providers/rocm/rocm_stream_handle.cc
+++ b/onnxruntime/core/providers/rocm/rocm_stream_handle.cc
@@ -1,7 +1,9 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+#include "core/providers/rocm/rocm_resource.h"
 #include "core/providers/rocm/rocm_stream_handle.h"
 #include "core/providers/rocm/rocm_common.h"
 // #include "core/common/spin_pause.h"
-#include "core/providers/rocm/rocm_resource.h"
 
 namespace onnxruntime {
 
@@ -82,15 +84,29 @@ void RocmStream::EnqueDeferredCPUBuffer(void* cpu_buffer) {
   deferred_cpu_buffers_.push_back(cpu_buffer);
 }
 
-struct CpuBuffersInfo {  // TODO: should be moved to base class
+struct CpuBuffersInfo {
+  // This struct stores the information needed
+  // to release CPU buffers allocated for GPU kernels.
+  // It's used to enqueue their release after
+  // associated GPU kernels in a ROCM stream.
+
+  // This is a CPU allocator in ROCM EP.
+  // It must be the one used to allocate the
+  // following pointers.
   AllocatorPtr allocator;
+  // buffers[i] is the i-th pointer added by
+  // AddDeferredReleaseCPUPtr for a specific
+  // ROCM stream. For example, this fields
+  // should contain all values in
+  // deferred_release_buffer_pool_[my_stream]
+  // when release my_stream's buffers.
   std::unique_ptr<void*[]> buffers;
   // CPU buffer buffers[i].
   // Number of buffer points in "buffers".
   size_t n_buffers;
 };
 
-static void ReleaseCpuBufferCallback(hipStream_t /*stream*/, hipError_t /*status*/, void* raw_info) {  // TODO: should be moved to base class
+static void ReleaseCpuBufferCallback(void* raw_info) {
   std::unique_ptr<CpuBuffersInfo> info = std::make_unique<CpuBuffersInfo>();
   info.reset(reinterpret_cast<CpuBuffersInfo*>(raw_info));
   for (size_t i = 0; i < info->n_buffers; ++i) {
@@ -111,14 +127,7 @@ Status RocmStream::CleanUpOnRunEnd() {
       cpu_buffers_info->buffers[i] = deferred_cpu_buffers_.at(i);
     }
     cpu_buffers_info->n_buffers = deferred_cpu_buffers_.size();
-    // TODO(wechi): CUDA deprecates cudaStreamAddCallback and
-    // uses another API, cudaLaunchHostFunc(which can be
-    // captured in CUDA graph). Once AMD adds similar feature,
-    // we should replace the following line with
-    //  hipLaunchHostFunc(stream, ReleaseCpuBufferCallback, cpu_buffers_info);
-
-    // Release memory asynchronously to avoid blocking the compute stream.
-    HIP_RETURN_IF_ERROR(hipStreamAddCallback(static_cast<hipStream_t>(GetHandle()), ReleaseCpuBufferCallback, cpu_buffers_info.release(), 0));
+    HIP_RETURN_IF_ERROR(hipLaunchHostFunc(static_cast<hipStream_t>(GetHandle()), ReleaseCpuBufferCallback, cpu_buffers_info.release()));
   } else {
     HIP_RETURN_IF_ERROR(hipStreamSynchronize(static_cast<hipStream_t>(GetHandle())));
     for (auto* buffer : deferred_cpu_buffers_) {
@@ -130,10 +139,10 @@ Status RocmStream::CleanUpOnRunEnd() {
   return Status::OK();
 }
 
-void* RocmStream::GetResource(int version, int type) const {
+void* RocmStream::GetResource(int version, int id) const {
   ORT_ENFORCE(version <= ORT_ROCM_RESOUCE_VERSION, "resource version unsupported!");
   void* resource{};
-  switch (type) {
+  switch (id) {
     case RocmResource::hip_stream_t:
       return reinterpret_cast<void*>(GetHandle());
       break;
@@ -149,6 +158,7 @@ void* RocmStream::GetResource(int version, int type) const {
   return resource;
 }
 
+// CPU Stream command handles
 void WaitRocmNotificationOnDevice(Stream& stream, synchronize::Notification& notification) {
   static_cast<RocmNotification*>(&notification)->wait_on_device(stream);
 }
diff --git a/onnxruntime/core/providers/rocm/rocm_stream_handle.h b/onnxruntime/core/providers/rocm/rocm_stream_handle.h
index 865cff0abf85f..1f3e5b75548e7 100644
--- a/onnxruntime/core/providers/rocm/rocm_stream_handle.h
+++ b/onnxruntime/core/providers/rocm/rocm_stream_handle.h
@@ -1,3 +1,6 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
 #pragma once
 #include "core/providers/rocm/rocm_pch.h"
 // #include "core/providers/cuda/shared_inc/cuda_utils.h"
@@ -17,14 +20,12 @@ struct RocmStream : Stream {
 
   ~RocmStream();
 
-  std::unique_ptr<synchronize::Notification> CreateNotification(size_t num_consumers) override;
+  std::unique_ptr<synchronize::Notification> CreateNotification(size_t /*num_consumers*/) override;
 
   void Flush() override;
 
   Status CleanUpOnRunEnd() override;
 
-  void* GetResource(int version, int id) const override;
-
   void EnqueDeferredCPUBuffer(void* cpu_buffer);
 
   bool own_stream_{true};
@@ -33,6 +34,8 @@ struct RocmStream : Stream {
 
   rocblas_handle rocblas_handle_{};
 
+  void* GetResource(int version, int id) const override;
+
  private:
   std::vector<void*> deferred_cpu_buffers_;
   AllocatorPtr cpu_allocator_;
diff --git a/onnxruntime/core/providers/rocm/rocm_utils.cu b/onnxruntime/core/providers/rocm/rocm_utils.cu
index cbf410e78a4a9..b817e025cedf4 100644
--- a/onnxruntime/core/providers/rocm/rocm_utils.cu
+++ b/onnxruntime/core/providers/rocm/rocm_utils.cu
@@ -30,13 +30,14 @@ template <typename T>
 void Fill(hipStream_t stream, T* output, T value, int64_t count) {
   int blocksPerGrid = static_cast<int>(CeilDiv(count, GridDim::maxThreadsPerBlock * GridDim::maxElementsPerThread));
   HIP_LONG N = static_cast<HIP_LONG>(count);
-  _Fill<T, GridDim::maxThreadsPerBlock, GridDim::maxElementsPerThread><<<dim3(blocksPerGrid), dim3(GridDim::maxThreadsPerBlock), 0, stream>>>(output, value, N);
+  _Fill<T, GridDim::maxThreadsPerBlock, GridDim::maxElementsPerThread>
+      <<<blocksPerGrid, GridDim::maxThreadsPerBlock, 0, stream>>>(output, value, N);
 }
 template <typename T>
 class ConstantBufferImpl : public IConstantBuffer<T> {
  public:
-  ConstantBufferImpl(T val) : buffer_(nullptr), count_(0), val_(val) {}
-
+  ConstantBufferImpl(T val) : buffer_(nullptr), count_(0), val_(val) {
+  }
   ~ConstantBufferImpl() {
     if (buffer_)
       HIP_CALL_THROW(hipFree(buffer_));
@@ -70,6 +71,7 @@ std::unique_ptr<IConstantBuffer<T>> CreateConstantOnes() {
 template std::unique_ptr<IConstantBuffer<float>> CreateConstantOnes<float>();
 template std::unique_ptr<IConstantBuffer<double>> CreateConstantOnes<double>();
 template std::unique_ptr<IConstantBuffer<half>> CreateConstantOnes<half>();
+template std::unique_ptr<IConstantBuffer<BFloat16>> CreateConstantOnes<BFloat16>();
 
 #define SPECIALIZED_FILL(T) \
   template void Fill<T>(hipStream_t stream, T * output, T value, int64_t count);
@@ -81,6 +83,7 @@ SPECIALIZED_FILL(int64_t)
 SPECIALIZED_FILL(float)
 SPECIALIZED_FILL(double)
 SPECIALIZED_FILL(__half)
+SPECIALIZED_FILL(BFloat16)
 
 }  // namespace rocm
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/rocm/shared_inc/fast_divmod.h b/onnxruntime/core/providers/rocm/shared_inc/fast_divmod.h
deleted file mode 100644
index 83ca0a443c4fa..0000000000000
--- a/onnxruntime/core/providers/rocm/shared_inc/fast_divmod.h
+++ /dev/null
@@ -1,90 +0,0 @@
-//
-// Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved
-// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
-//
-
-#pragma once
-
-#include <iostream>
-#include <limits>
-#include <hip/hip_runtime.h>
-#include <cmath>
-#include "core/common/common.h"
-
-namespace onnxruntime {
-namespace rocm {
-
-// DivMod is a helper class for integer division and modulo operation.
-// There is a fast version for int type and a slow version for other type.
-template <typename T>
-struct DivMod {
-  DivMod(T d = 1) {
-    d_ = d == 0 ? 1 : d;
-    ORT_ENFORCE(d_ >= 1 && d_ <= std::numeric_limits<T>::max());
-  }
-
-  __host__ __device__ inline T div(T n) const {
-    return n / d_;
-  }
-
-  __host__ __device__ inline T mod(T n) const {
-    return n % d_;
-  }
-
-  __host__ __device__ inline void divmod(T n, T& q, T& r) const {
-    q = div(n);
-    r = n - q * d_;
-  }
-
-  T d_;  // divisor
-};
-
-// The code below is based on section 4 Unsigned division of paper https://gmplib.org/~tege/divcnst-pldi94.pdf
-// In current ORT, fast_divmod is used for calculating the position of a element in tensor,
-// so unsigned integer division from the paper is good enough for ORT. The advantage is that div is very simple,
-// then GPU compiler can do loop unroll easilly when divmod is called in a loop.
-template <>
-struct DivMod<int> {
-  DivMod(int d = 1) {
-    d_ = d == 0 ? 1 : d;
-    ORT_ENFORCE(d_ >= 1 && d_ <= static_cast<uint32_t>(std::numeric_limits<int>::max()));
-
-    for (l_ = 0; l_ < 32; l_++)
-      if ((1U << l_) >= d_) break;
-
-    uint64_t one = 1;
-    uint64_t m = ((one << 32) * ((one << l_) - d_)) / d_ + 1;
-    M_ = static_cast<uint32_t>(m);
-    // according to paper, the value of m' should fit in a unsigned integer.
-    ORT_ENFORCE(M_ > 0 && M_ == m);
-  }
-
-  __host__ __device__ inline int div(int n) const {
-#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
-    uint32_t t = __umulhi(M_, n);
-    return (t + n) >> l_;
-#else
-    // Using uint64_t for t, then t + n won't overflow.
-    uint64_t t = ((uint64_t)M_ * n) >> 32;
-    return static_cast<int>((t + n) >> l_);
-#endif
-  }
-
-  __host__ __device__ inline int mod(int n) const {
-    return n - div(n) * d_;
-  }
-
-  __host__ __device__ inline void divmod(int n, int& q, int& r) const {
-    q = div(n);
-    r = n - q * d_;
-  }
-
-  uint32_t d_;  // divisor
-  uint32_t M_;  // m' in the paper.
-  uint32_t l_;  // l_ = ceil(log2(d_))
-};
-
-using fast_divmod = DivMod<int>;  // Keep the old name for backward compatibility.
-
-}  // namespace rocm
-}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/rocm/shared_inc/rocm_call.h b/onnxruntime/core/providers/rocm/shared_inc/rocm_call.h
index d6623ef63f0fd..b6b40666b8bd0 100644
--- a/onnxruntime/core/providers/rocm/shared_inc/rocm_call.h
+++ b/onnxruntime/core/providers/rocm/shared_inc/rocm_call.h
@@ -17,16 +17,20 @@ std::conditional_t<THRW, void, Status> RocmCall(
 
 #define HIP_CALL(expr) (RocmCall<hipError_t, false>((expr), #expr, "HIP", hipSuccess, "", __FILE__, __LINE__))
 #define ROCBLAS_CALL(expr) (RocmCall<rocblas_status, false>((expr), #expr, "ROCBLAS", rocblas_status_success, "", __FILE__, __LINE__))
+
 #define HIPSPARSE_CALL(expr) (RocmCall<hipsparseStatus_t, false>((expr), #expr, "HIPSPARSE", HIPSPARSE_STATUS_SUCCESS, "", __FILE__, __LINE__))
 #define HIPRAND_CALL(expr) (RocmCall<hiprandStatus_t, false>((expr), #expr, "HIPRAND", HIPRAND_STATUS_SUCCESS, "", __FILE__, __LINE__))
 #define MIOPEN_CALL(expr) (RocmCall<miopenStatus_t, false>((expr), #expr, "MIOPEN", miopenStatusSuccess, "", __FILE__, __LINE__))
 #define MIOPEN_CALL2(expr, m) (RocmCall<miopenStatus_t, false>((expr), #expr, "MIOPEN", miopenStatusSuccess, m, __FILE__, __LINE__))
+
 #define HIPFFT_CALL(expr) (RocmCall<hipfftResult, false>((expr), #expr, "HIPFFT", HIPFFT_SUCCESS, "", __FILE__, __LINE__))
 
 #define HIP_CALL_THROW(expr) (RocmCall<hipError_t, true>((expr), #expr, "HIP", hipSuccess, "", __FILE__, __LINE__))
 #define ROCBLAS_CALL_THROW(expr) (RocmCall<rocblas_status, true>((expr), #expr, "ROCBLAS", rocblas_status_success, "", __FILE__, __LINE__))
+
 #define HIPSPARSE_CALL_THROW(expr) (RocmCall<hipsparseStatus_t, true>((expr), #expr, "HIPSPARSE", HIPSPARSE_STATUS_SUCCESS, "", __FILE__, __LINE__))
 #define HIPRAND_CALL_THROW(expr) (RocmCall<hiprandStatus_t, true>((expr), #expr, "HIPRAND", HIPRAND_STATUS_SUCCESS, "", __FILE__, __LINE__))
+
 #define MIOPEN_CALL_THROW(expr) (RocmCall<miopenStatus_t, true>((expr), #expr, "MIOPEN", miopenStatusSuccess, "", __FILE__, __LINE__))
 #define MIOPEN_CALL_THROW2(expr, m) (RocmCall<miopenStatus_t, true>((expr), #expr, "MIOPEN", miopenStatusSuccess, m, __FILE__, __LINE__))
 #define HIPFFT_CALL_THROW(expr) (RocmCall<hipfftResult, true>((expr), #expr, "HIPFFT", HIPFFT_SUCCESS, "", __FILE__, __LINE__))
diff --git a/onnxruntime/core/providers/rocm/tunable/gemm_ck.cuh b/onnxruntime/core/providers/rocm/tunable/gemm_ck.cuh
index 86d023886cfaf..2518f45e0995e 100644
--- a/onnxruntime/core/providers/rocm/tunable/gemm_ck.cuh
+++ b/onnxruntime/core/providers/rocm/tunable/gemm_ck.cuh
@@ -61,7 +61,7 @@ auto GetCKGemmTypeStringAndOps() {
                                            params->lda, params->ldb, params->ldc,
                                            nop, nop, nop);
       TUNABLE_OP_RETURN_UNSUPPORTED_ARGUMENT_IF(!impl->IsSupportedArgument(arg.get()),
-                                                impl->GetTypeString(), " does not support ", params->Signature());
+                                                impl->GetTypeString(), " does not support the params");
       invoker->Run(arg.get(), StreamConfig{params->StreamHandle()});
       return Status::OK();
     };
@@ -164,7 +164,7 @@ auto GetCKStridedBatchedGemmTypeStringAndOps() {
       auto zero = ToHipType<T>::FromFloat(0.0f);
       TUNABLE_OP_RETURN_UNSUPPORTED_ARGUMENT_IF(
           params->alpha != one || params->beta != zero,
-          impl->GetTypeString(), " only supports alpha == 1 and beta == 0", params->Signature());
+          impl->GetTypeString(), " only supports alpha == 1 and beta == 0");
 
       auto nop = Nop{};
       auto arg = impl->MakeArgumentPointer(params->a, params->b, params->c,
@@ -174,7 +174,7 @@ auto GetCKStridedBatchedGemmTypeStringAndOps() {
                                            params->batch,
                                            nop, nop, nop);
       TUNABLE_OP_RETURN_UNSUPPORTED_ARGUMENT_IF(!impl->IsSupportedArgument(arg.get()),
-                                                impl->GetTypeString(), " does not support ", params->Signature());
+                                                impl->GetTypeString(), " does not support the params");
       invoker->Run(arg.get(), StreamConfig{params->StreamHandle()});
       return Status::OK();
     };
diff --git a/onnxruntime/core/providers/rocm/tunable/gemm_hipblaslt.h b/onnxruntime/core/providers/rocm/tunable/gemm_hipblaslt.h
index d5f9de26ada22..776dabd757af4 100644
--- a/onnxruntime/core/providers/rocm/tunable/gemm_hipblaslt.h
+++ b/onnxruntime/core/providers/rocm/tunable/gemm_hipblaslt.h
@@ -26,6 +26,10 @@ using onnxruntime::contrib::rocm::blas::GemmFastGeluParams;
 
 #ifdef USE_HIPBLASLT
 
+// For large K and small M/N, K dim will be split to multiple workgroups and buffers,
+// which will require additional workspace. Here we set the max workspace size to 32MB.
+constexpr const size_t kHipBlasLtMaxWorkSpaceSizeInBytes = 32 * 1024 * 1024;
+
 enum ActivationType {
   NONE = 0,
   RELU = 1,
@@ -221,10 +225,13 @@ auto GetHipBlasLtTypeStringAndOps(ActivationType activation_type = ActivationTyp
 
       TUNABLE_OP_RETURN_UNSUPPORTED_ARGUMENT_IF(
           status != HIPBLAS_STATUS_SUCCESS,
-          "[hipBLASLt] Solution #", i, " failed: algo ", algo_index, " not supported (", params->Signature(), ")");
+          "[hipBLASLt] Solution #", i, " failed: algo ", algo_index, " not supported");
 
       IAllocatorUniquePtr<void> workspace_buffer;
       if (workspace_size > 0) {
+        TUNABLE_OP_RETURN_UNSUPPORTED_ARGUMENT_IF(workspace_size > kHipBlasLtMaxWorkSpaceSizeInBytes,
+                                                  "Workspace size exceeds limit (32M): ", workspace_size);
+        workspace_size = kHipBlasLtMaxWorkSpaceSizeInBytes;
         workspace_buffer = params->tuning_ctx->GetScratchBuffer(workspace_size, params->stream);
       }
 
diff --git a/onnxruntime/core/providers/rocm/tunable/gemm_rocblas.h b/onnxruntime/core/providers/rocm/tunable/gemm_rocblas.h
index 8e894e63c5de1..a391d1af8868c 100644
--- a/onnxruntime/core/providers/rocm/tunable/gemm_rocblas.h
+++ b/onnxruntime/core/providers/rocm/tunable/gemm_rocblas.h
@@ -168,8 +168,7 @@ auto GetRocBlasGemmTypeStringAndOps() {
 
       TUNABLE_OP_RETURN_UNSUPPORTED_ARGUMENT_IF(
           status != rocblas_status_success,
-          "[rocBLAS] Solution #", i, " (original ", solution, ") failed: ", rocblas_status_to_string(status),
-          " (", params->Signature(), ")");
+          "[rocBLAS] Solution #", i, " (original ", solution, ") failed: ", rocblas_status_to_string(status));
 
       return Status::OK();
     };
@@ -238,8 +237,7 @@ auto GetRocBlasBatchedGemmTypeStringAndOps() {
 
       TUNABLE_OP_RETURN_UNSUPPORTED_ARGUMENT_IF(
           status != rocblas_status_success,
-          "[rocBLAS] Solution #", i, " (original ", solution, ") failed: ", rocblas_status_to_string(status),
-          " (", params->Signature(), ")");
+          "[rocBLAS] Solution #", i, " (original ", solution, ") failed: ", rocblas_status_to_string(status));
 
       return Status::OK();
     };
@@ -308,8 +306,7 @@ auto GetRocBlasStridedBatchedGemmTypeStringAndOps() {
 
       TUNABLE_OP_RETURN_UNSUPPORTED_ARGUMENT_IF(
           status != rocblas_status_success,
-          "[rocBLAS] Solution #", i, " (original ", solution, ") failed: ", rocblas_status_to_string(status),
-          " (", params->Signature(), ")");
+          "[rocBLAS] Solution #", i, " (original ", solution, ") failed: ", rocblas_status_to_string(status));
 
       return Status::OK();
     };
diff --git a/onnxruntime/core/providers/shared/utils/utils.cc b/onnxruntime/core/providers/shared/utils/utils.cc
index 0eff00a53271e..39ea4dd8412bb 100644
--- a/onnxruntime/core/providers/shared/utils/utils.cc
+++ b/onnxruntime/core/providers/shared/utils/utils.cc
@@ -32,38 +32,53 @@ bool GetClipMinMax(const InitializedTensorSet& initializers, const Node& node,
   if (!GetType(*node.InputDefs()[0], input_type, logger))
     return false;
 
-  if (input_type != ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
-    LOGS(logger, VERBOSE) << "GetClipMinMax() only support Clip node with float inputs for now. "
-                          << "The node [" << node_name << "] has input 0 type: " << input_type;
-    return false;
-  }
-
   min = std::numeric_limits<float>::lowest();
   max = std::numeric_limits<float>::max();
 
   if (node.SinceVersion() < 11) {  // Clip opset 1, 6 is using attributes for min/max
     NodeAttrHelper helper(node);
+    // attributes will be always float
     min = helper.Get("min", std::numeric_limits<float>::lowest());
     max = helper.Get("max", std::numeric_limits<float>::max());
   } else {
-    if (node.InputDefs().size() > 1) {  // we have input min
+    if (node.InputDefs().size() > 1) {
+      // we have input min
       const auto& min_name = node.InputDefs()[1]->Name();
       if (!Contains(initializers, min_name)) {
         LOGS(logger, VERBOSE) << "Input min of Clip must be known";
         return false;
       }
-      Initializer unpacked_tensor(*initializers.at(min_name));
-      min = unpacked_tensor.DataAsSpan<float>()[0];
-    }
+      Initializer unpacked_tensor_min(*initializers.at(min_name));
+      switch (input_type) {
+        case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
+          min = unpacked_tensor_min.DataAsSpan<float>()[0];
+          break;
+        case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
+          min = (unpacked_tensor_min.DataAsSpan<MLFloat16>()[0]).ToFloat();
+          break;
+        default:
+          LOGS(logger, VERBOSE) << "GetClipMinMax() only support Clip node with float inputs for now. "
+                                << "The node [" << node_name << "] has input 0 type: " << input_type;
+          return false;
+      }
 
-    if (node.InputDefs().size() > 2) {  // we have input max
-      const auto& max_name = node.InputDefs()[2]->Name();
-      if (!Contains(initializers, max_name)) {
-        LOGS(logger, VERBOSE) << "Input max of Clip must be known";
-        return false;
+      if (node.InputDefs().size() > 2) {
+        // we have input max
+        const auto& max_name = node.InputDefs()[2]->Name();
+        if (!Contains(initializers, max_name)) {
+          LOGS(logger, VERBOSE) << "Input max of Clip must be known";
+          return false;
+        }
+        Initializer unpacked_tensor_max(*initializers.at(max_name));
+        switch (input_type) {
+          case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
+            max = unpacked_tensor_max.DataAsSpan<float>()[0];
+            break;
+          case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
+            max = (unpacked_tensor_max.DataAsSpan<MLFloat16>()[0]).ToFloat();
+            break;
+        }
       }
-      Initializer unpacked_tensor(*initializers.at(max_name));
-      max = unpacked_tensor.DataAsSpan<float>()[0];
     }
   }
 
@@ -104,7 +119,7 @@ int64_t NodeAttrHelper::Get(const std::string& key, int64_t def_val) const {
   return node_attributes_.at(key).i();
 }
 
-std::string NodeAttrHelper::Get(const std::string& key, const std::string& def_val) const {
+const std::string& NodeAttrHelper::Get(const std::string& key, const std::string& def_val) const {
   if (!HasAttr(key))
     return def_val;
 
@@ -151,6 +166,12 @@ std::vector<float> NodeAttrHelper::Get(const std::string& key, const std::vector
   return std::vector<float>{source.cbegin(), source.cend()};
 }
 
+std::optional<int64_t> NodeAttrHelper::GetInt(const std::string& key) const {
+  if (!HasAttr(key))
+    return std::nullopt;
+  return node_attributes_.at(key).i();
+}
+
 bool NodeAttrHelper::HasAttr(const std::string& key) const {
   return Contains(node_attributes_, key);
 }
diff --git a/onnxruntime/core/providers/shared/utils/utils.h b/onnxruntime/core/providers/shared/utils/utils.h
index 744c8779c47cc..1e93f040711df 100644
--- a/onnxruntime/core/providers/shared/utils/utils.h
+++ b/onnxruntime/core/providers/shared/utils/utils.h
@@ -6,6 +6,7 @@
 #include <cstdint>
 #include <string>
 #include <vector>
+#include <optional>
 
 #include "core/graph/basic_types.h"
 
@@ -44,7 +45,7 @@ class NodeAttrHelper {
 
   int64_t Get(const std::string& key, int64_t def_val) const;
 
-  std::string Get(const std::string& key, const std::string& def_val) const;
+  const std::string& Get(const std::string& key, const std::string& def_val) const;
 
   std::vector<int64_t> Get(const std::string& key, const std::vector<int64_t>& def_val) const;
   std::vector<float> Get(const std::string& key, const std::vector<float>& def_val) const;
@@ -57,6 +58,8 @@ class NodeAttrHelper {
   uint32_t Get(const std::string& key, uint32_t def_val) const;
   std::vector<uint32_t> Get(const std::string& key, const std::vector<uint32_t>& def_val) const;
 
+  std::optional<int64_t> GetInt(const std::string& key) const;
+
   bool HasAttr(const std::string& key) const;
 
  private:
diff --git a/onnxruntime/core/providers/shared_library/provider_api.h b/onnxruntime/core/providers/shared_library/provider_api.h
index 0d7da46142170..76533a0061702 100644
--- a/onnxruntime/core/providers/shared_library/provider_api.h
+++ b/onnxruntime/core/providers/shared_library/provider_api.h
@@ -240,6 +240,7 @@ struct DeleteOnUnloadPtr {
 
 constexpr const char* kOnnxDomain = "";
 constexpr const char* kMSDomain = "com.microsoft";
+constexpr const char* kMSInternalNHWCDomain = "com.ms.internal.nhwc";
 constexpr const char* kPytorchAtenDomain = "org.pytorch.aten";
 constexpr const char* kNGraphDomain = "com.intel.ai";
 constexpr const char* kCudaExecutionProvider = "CUDAExecutionProvider";
@@ -350,6 +351,9 @@ void InitProviderOrtApi();
   if ((logger).OutputIsEnabled(::onnxruntime::logging::Severity::k##severity, ::onnxruntime::logging::DataType::SYSTEM)) \
   CREATE_MESSAGE(logger, severity, category, ::onnxruntime::logging::DataType::SYSTEM)->Stream()
 
+#define LOGS(logger, severity) \
+  LOGS_CATEGORY(logger, severity, ::onnxruntime::logging::Category::onnxruntime)
+
 #define LOGS_DEFAULT_CATEGORY(severity, category) \
   LOGS_CATEGORY(::onnxruntime::logging::LoggingManager::DefaultLogger(), severity, category)
 
diff --git a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc
index d6546ccdd9d5d..a3155fe6b86cf 100644
--- a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc
+++ b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc
@@ -510,6 +510,13 @@ bool TileOp::IsTileMemcpy(const TensorShape& input_shape, const int64_t* repeats
   return g_host_cpu.TileOp__IsTileMemcpy(input_shape, repeats, rank, is_batched_memcpy, num_of_elements_per_batch, num_of_copies_per_batch, num_of_batch_copies);
 }
 
+Status SliceBase::FlattenOutputDims(gsl::span<const int64_t> input_dimensions, gsl::span<const int64_t> output_dims,
+                                    TensorShapeVector& starts, TensorShapeVector& ends, TensorShapeVector& steps,
+                                    TensorShapeVector*& p_flattened_input_dims, TensorShapeVector*& p_flattened_output_dims) {
+  return g_host_cpu.SliceBase__FlattenOutputDims(
+      input_dimensions, output_dims, starts, ends, steps, p_flattened_input_dims, p_flattened_output_dims);
+}
+
 Status SliceBase::PrepareForCompute(gsl::span<const int64_t> raw_starts,
                                     gsl::span<const int64_t> raw_ends,
                                     gsl::span<const int64_t> raw_axes,
@@ -606,6 +613,16 @@ Status BeamSearch::SetupSubgraphExecutionInfo(const SessionState& session_state,
   return g_host_cpu.BeamSearch__SetupSubgraphExecutionInfo(this, session_state, attribute_name, subgraph_session_state);
 }
 
+Status WhisperBeamSearch::Compute(OpKernelContext* ctx) const { return g_host_cpu.WhisperBeamSearch__Compute(this, ctx); }
+
+void BeamSearchParameters::ParseFromAttributes(const OpKernelInfo& info) { g_host_cpu.BeamSearchParameters__ParseFromAttributes(this, info); }
+
+void GreedySearchParameters::ParseFromAttributes(const OpKernelInfo& info) { g_host_cpu.GreedySearchParameters__ParseFromAttributes(this, info); }
+
+void SamplingParameters::ParseFromAttributes(const OpKernelInfo& info) { g_host_cpu.SamplingParameters__ParseFromAttributes(this, info); }
+
+void WhisperBeamSearchParameters::ParseFromAttributes(const OpKernelInfo& info) { g_host_cpu.WhisperBeamSearchParameters__ParseFromAttributes(this, info); }
+
 void GreedySearch::Init(const OpKernelInfo& info) { g_host_cpu.GreedySearch__Init(this, info); }
 
 Status GreedySearch::Compute(OpKernelContext* ctx) const { return g_host_cpu.GreedySearch__Compute(this, ctx); }
diff --git a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h
index f0ab7869b7d50..c0b282b202ef6 100644
--- a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h
+++ b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h
@@ -1,6 +1,7 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#pragma once
 namespace onnxruntime {
 
 extern ProviderHost* g_host;
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index 96893f63b4540..79f84864a5788 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -365,6 +365,46 @@ std::unique_lock<OrtMutex> TensorrtExecutionProvider::GetApiLock() const {
   return std::unique_lock<OrtMutex>(singleton);
 }
 
+Status GetShapeOfShapeTensor(Ort::ConstValue& input_tensor,
+                             std::vector<int32_t>& shape_values,
+                             nvinfer1::ICudaEngine* trt_engine,
+                             int binding_index,
+                             cudaStream_t stream) {
+  auto tensor_info = input_tensor.GetTensorTypeAndShapeInfo();
+  const auto tensor_shapes = tensor_info.GetShape();
+  const auto tensor_type = tensor_info.GetElementType();
+  nvinfer1::Dims dims = trt_engine->getBindingDimensions(static_cast<int>(binding_index));
+  int nb_dims = dims.nbDims;
+  int shape_size = nb_dims == 0 ? 1 : static_cast<int>(tensor_shapes[0]);  // The shape of the "shape tensor" is either zero dimension (scalar) or 1-dimension
+  shape_values.resize(shape_size, 1);
+
+  switch (tensor_type) {
+    case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32: {
+      auto input = std::make_unique<int32_t[]>(shape_size);
+      CUDA_RETURN_IF_ERROR(cudaMemcpyAsync(input.get(), input_tensor.GetTensorData<int32_t>(), shape_size * sizeof(int32_t), cudaMemcpyDeviceToHost, stream));
+      CUDA_RETURN_IF_ERROR(cudaStreamSynchronize(stream));
+      for (int j = 0; j < shape_size; ++j) {
+        shape_values[j] = input[j];
+      }
+      break;
+    }
+    case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64: {
+      auto input = std::make_unique<int64_t[]>(shape_size);
+      CUDA_RETURN_IF_ERROR(cudaMemcpyAsync(input.get(), input_tensor.GetTensorData<int64_t>(), shape_size * sizeof(int64_t), cudaMemcpyDeviceToHost, stream));
+      CUDA_RETURN_IF_ERROR(cudaStreamSynchronize(stream));
+      for (int j = 0; j < shape_size; ++j) {
+        shape_values[j] = static_cast<int32_t>(input[j]);
+      }
+      break;
+    }
+    default: {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
+                             "TensorRT shape tensor data type: " + std::to_string(tensor_type) + " not supported.");
+    }
+  }
+  return Status::OK();
+}
+
 /*
  * Apply TensorRT optimization profile shapes from provider options.
  *
@@ -404,7 +444,7 @@ bool ApplyProfileShapesFromProviderOptions(std::vector<nvinfer1::IOptimizationPr
 
     // Shape tensor
     if (input->isShapeTensor()) {
-      auto shape_size = nb_dims;
+      int shape_size = nb_dims == 0 ? 1 : static_cast<int>(profile_min_shapes[input_name][i].size());
       std::vector<int32_t> shapes_min(shape_size), shapes_opt(shape_size), shapes_max(shape_size);
 
       LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] shape size of this shape tensor is " << shape_size;
@@ -793,6 +833,10 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
   if (info.has_user_compute_stream) {
     external_stream_ = true;
     stream_ = static_cast<cudaStream_t>(info.user_compute_stream);
+    ORT_IGNORE_RETURN_VALUE(CUBLAS_CALL(cublasCreate(&external_cublas_handle_)));
+    ORT_IGNORE_RETURN_VALUE(CUBLAS_CALL(cublasSetStream(external_cublas_handle_, stream_)));
+    ORT_IGNORE_RETURN_VALUE(CUDNN_CALL(cudnnCreate(&external_cudnn_handle_)));
+    ORT_IGNORE_RETURN_VALUE(CUDNN_CALL(cudnnSetStream(external_cudnn_handle_, stream_)));
   }
 
   std::string profile_min_shapes, profile_max_shapes, profile_opt_shapes;
@@ -820,6 +864,14 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
     if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_) {
       cache_path_ = info.engine_cache_path;
     }
+    // use a more global cache if given
+    if (timing_cache_enable_) {
+      if (!info.timing_cache_path.empty()) {
+        global_cache_path_ = info.timing_cache_path;
+      } else {
+        global_cache_path_ = cache_path_;
+      }
+    }
     engine_decryption_enable_ = info.engine_decryption_enable;
     if (engine_decryption_enable_) {
       engine_decryption_lib_path_ = info.engine_decryption_lib_path;
@@ -924,6 +976,15 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
           LOGS_DEFAULT(WARNING) << "[TensorRT EP] ORT_TENSORRT_ENGINE_CACHE_PATH is deprecated! Please use ORT_TENSORRT_CACHE_PATH to specify engine cache path";
         }
       }
+      if (timing_cache_enable_) {
+        std::string timing_cache_path = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kTimingCachePath);
+        // use a more global cache if given
+        if (!timing_cache_path.empty()) {
+          global_cache_path_ = timing_cache_path;
+        } else {
+          global_cache_path_ = cache_path_;
+        }
+      }
 
       const std::string engine_decryption_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kDecryptionEnable);
       if (!engine_decryption_enable_env.empty()) {
@@ -1015,6 +1076,11 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
         throw std::runtime_error("Failed to create directory " + cache_path_);
       }
     }
+    if (!global_cache_path_.empty() && !fs::is_directory(global_cache_path_)) {
+      if (!fs::create_directory(global_cache_path_)) {
+        throw std::runtime_error("Failed to create directory " + global_cache_path_);
+      }
+    }
     {
       auto lock = GetApiLock();
       runtime_ = std::unique_ptr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(GetTensorrtLogger()));
@@ -1100,6 +1166,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
                         << ", trt_dump_subgraphs: " << dump_subgraphs_
                         << ", trt_engine_cache_enable: " << engine_cache_enable_
                         << ", trt_cache_path: " << cache_path_
+                        << ", trt_global_cache_path: " << global_cache_path_
                         << ", trt_engine_decryption_enable: " << engine_decryption_enable_
                         << ", trt_engine_decryption_lib_path: " << engine_decryption_lib_path_
                         << ", trt_force_sequential_engine_build: " << force_sequential_engine_build_
@@ -1127,6 +1194,11 @@ TensorrtExecutionProvider::~TensorrtExecutionProvider() {
     }
   }
 
+  if (external_stream_) {
+    ORT_IGNORE_RETURN_VALUE(CUBLAS_CALL(cublasDestroy(external_cublas_handle_)));
+    ORT_IGNORE_RETURN_VALUE(CUDNN_CALL(cudnnDestroy(external_cudnn_handle_)));
+  }
+
   if (!external_stream_ && stream_) {
     ORT_IGNORE_RETURN_VALUE(CUDA_CALL(cudaStreamDestroy(stream_)));
   }
@@ -1143,46 +1215,35 @@ bool TensorrtExecutionProvider::IsGraphCaptureEnabled() const {
   return cuda_graph_enable_;
 }
 
-bool TensorrtExecutionProvider::IsGraphCaptured() const {
-  return GetPerThreadContext().IsGraphCaptured();
-}
-
-Status TensorrtExecutionProvider::ReplayGraph() {
-  return GetPerThreadContext().ReplayGraph();
-}
-
-void TensorrtExecutionProvider::PerThreadContext::SetGraphStream(cudaStream_t stream) {
-  cuda_graph_.SetStream(stream);
-}
-
-bool TensorrtExecutionProvider::PerThreadContext::IsGraphCaptureAllowed() const {
+bool TensorrtExecutionProvider::IsGraphCaptureAllowed() const {
   return regular_run_count_before_graph_capture_ >= min_num_runs_before_cuda_graph_capture_;
 }
 
-void TensorrtExecutionProvider::PerThreadContext::CaptureBegin() {
+void TensorrtExecutionProvider::CaptureBegin() {
   cuda_graph_.Reset();
   cuda_graph_.CaptureBegin();
 }
 
-void TensorrtExecutionProvider::PerThreadContext::CaptureEnd() {
+void TensorrtExecutionProvider::CaptureEnd() {
   cuda_graph_.CaptureEnd();
   is_graph_captured_ = true;
 }
 
-bool TensorrtExecutionProvider::PerThreadContext::IsGraphCaptured() const {
+bool TensorrtExecutionProvider::IsGraphCaptured() const {
   return is_graph_captured_;
 }
 
-Status TensorrtExecutionProvider::PerThreadContext::ReplayGraph() {
+Status TensorrtExecutionProvider::ReplayGraph() {
   ORT_ENFORCE(IsGraphCaptured());
   // Please note that CUDAGraph::Replay() is not thread safe.
-  // The cuda graph object is maintained by a per thread basis,
+  // ORT TRT calls ReplayGraph() in compute_func() where synchromization is enforced due to lock_guard(),
   // therefore calling CUDAGraph::Replay() here is guaranteed to be thread safe.
   return cuda_graph_.Replay();
 }
 
-void TensorrtExecutionProvider::PerThreadContext::IncrementRegularRunCountBeforeGraphCapture() {
-  // The cuda graph object is maintained by a per thread basis,
+void TensorrtExecutionProvider::IncrementRegularRunCountBeforeGraphCapture() {
+  // Please note that this function is not thread safe.
+  // ORT TRT calls this function in compute_func() where synchronization is enforced due to lock_guard(),
   // therefore following increment is guaranteed to be thread safe.
   ++regular_run_count_before_graph_capture_;
 }
@@ -1213,22 +1274,30 @@ Status TensorrtExecutionProvider::OnRunEnd(bool sync_stream) {
   if (sync_stream && external_stream_) {
     CUDA_RETURN_IF_ERROR(cudaStreamSynchronize(stream_));
   }
+  return Status::OK();
+}
 
-  // The reason of !IsGraphCaptureEnabled():
-  //  If cuda graph is enabled, the per thread context will not be released
-  //  because the per thread cuda graph needs to be maintained and replayed for
-  //  the next run.
-  // The reason of PerThreadContextCache()->find(this) != PerThreadContextCache()->end():
-  //  In extreme cases (e.g., 1-op graph and that op fallbacks to CPU),
-  //  PerThreadContext won't be created and there is nothing to release.
-  if (!IsGraphCaptureEnabled() &&
-      PerThreadContextCache()->find(this) != PerThreadContextCache()->end()) {
-    ReleasePerThreadContext();
+// Get the pointer to the IBuilder instance.
+// Note: This function is not thread safe. Calls to this function from different threads must be serialized
+// even though it doesn't make sense to have multiple threads initializing the same inference session.
+nvinfer1::IBuilder* TensorrtExecutionProvider::GetBuilder() const {
+  if (!builder_) {
+    TensorrtLogger& trt_logger = GetTensorrtLogger();
+    {
+      auto lock = GetApiLock();
+      builder_ = std::unique_ptr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(trt_logger));
+    }
   }
-  return Status::OK();
+  return builder_.get();
 }
 
 void TensorrtExecutionProvider::GetCustomOpDomainList(std::vector<OrtCustomOpDomain*>& custom_op_domain_list) const {
+  if (info_.custom_op_domain_list.empty()) {
+    common::Status status = CreateTensorRTCustomOpDomainList(info_);
+    if (!status.IsOK()) {
+      LOGS_DEFAULT(WARNING) << "[TensorRT EP] Failed to get TRT plugins from TRT plugin registration.";
+    }
+  }
   custom_op_domain_list = info_.custom_op_domain_list;
 }
 
@@ -1512,7 +1581,9 @@ SubGraphCollection_t TensorrtExecutionProvider::GetSupportedList(SubGraphCollect
           }
         }
 
-        if (has_control_flow_op) {
+        // Only if the newly built graph has control flow op as well as it has parent node,
+        // it needs to handle outer scope values before calling graph.Resolve().
+        if (has_control_flow_op && graph.ParentNode()) {
           LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Handle outer scope values for the subgraph " << graph_build.Name();
           BuildSubGraphContext(graph_build);
           SetGraphOuterScopeValuesAndInputs(graph_build, graph.GetGraph());
@@ -1581,7 +1652,7 @@ SubGraphCollection_t TensorrtExecutionProvider::GetSupportedList(SubGraphCollect
         // Get supported node list recursively
         SubGraphCollection_t parser_nodes_list;
         TensorrtLogger& trt_logger = GetTensorrtLogger();
-        auto trt_builder = std::unique_ptr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(trt_logger));
+        auto trt_builder = GetBuilder();
         const auto explicitBatch = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
         auto trt_network = std::unique_ptr<nvinfer1::INetworkDefinition>(trt_builder->createNetworkV2(explicitBatch));
 
@@ -1758,6 +1829,10 @@ TensorrtExecutionProvider::GetCapability(const GraphViewer& graph,
       if (sub_graphs.size() != 0) {
         bool all_subgraphs_are_supported = true;
         for (auto sub_graph : sub_graphs) {
+          // TRT EP should consider the empty subgraph is fully supported by TRT.
+          if (sub_graph->CreateGraphViewer()->NumberOfNodes() == 0) {
+            continue;
+          }
           if (!AllNodesAssignedToSpecificEP(*(sub_graph->CreateGraphViewer()), kTensorrtExecutionProvider)) {
             all_subgraphs_are_supported = false;
             break;
@@ -1825,27 +1900,33 @@ TensorrtExecutionProvider::GetCapability(const GraphViewer& graph,
       auto sub_graphs = graph.ParentNode()->GetSubgraphs();
       for (auto sub_graph : sub_graphs) {
         if (sub_graph.get() != &graph.GetGraph()) {
-          auto sub_graph_veiwer = sub_graph->CreateGraphViewer();
-          const int number_of_ort_subgraph_nodes = sub_graph_veiwer->NumberOfNodes();
+          auto sub_graph_viewer = sub_graph->CreateGraphViewer();
+          const int number_of_ort_subgraph_nodes = sub_graph_viewer->NumberOfNodes();
           std::vector<size_t> subgraph_nodes_vector(number_of_ort_subgraph_nodes);
           std::iota(std::begin(subgraph_nodes_vector), std::end(subgraph_nodes_vector), 0);
           SubGraphCollection_t parser_subgraph_nodes_vector = {{subgraph_nodes_vector, false}};
           bool subgraph_early_termination = false;
 
-          // Another subgraph of "If" control flow has been parsed by GetCapability before and all subgraph's nodes assigned to TRT EP.
-          if (AllNodesAssignedToSpecificEP(*sub_graph_veiwer, kTensorrtExecutionProvider)) {
+          // Another subgraph of "If" control flow op has no nodes.
+          // In this case, TRT EP should consider this empty subgraph is fully supported by TRT.
+          if (sub_graph_viewer->NumberOfNodes() == 0) {
+            all_subgraphs_are_supported = true;
+            break;
+          }
+          // Another subgraph of "If" control flow op has been parsed by GetCapability before and all subgraph's nodes assigned to TRT EP.
+          else if (AllNodesAssignedToSpecificEP(*sub_graph_viewer, kTensorrtExecutionProvider)) {
             all_subgraphs_are_supported = true;
             break;
           }
           // Another subgraph of "If" control flow has been parsed by GetCapability and not all subgraph's nodes assigned to TRT EP.
           // (Note: GetExecutionProviderType() returns "" meaning node has not yet been assigned to any EPs)
-          else if (!AllNodesAssignedToSpecificEP(*sub_graph_veiwer, "")) {
+          else if (!AllNodesAssignedToSpecificEP(*sub_graph_viewer, "")) {
             all_subgraphs_are_supported = false;
             break;
           }
 
           // Another subgraph of "If" control flow has not yet been parsed by GetCapability.
-          subgraph_supported_nodes_vector = GetSupportedList(parser_subgraph_nodes_vector, 0, max_partition_iterations_, *sub_graph_veiwer, &subgraph_early_termination);
+          subgraph_supported_nodes_vector = GetSupportedList(parser_subgraph_nodes_vector, 0, max_partition_iterations_, *sub_graph_viewer, &subgraph_early_termination);
           all_subgraphs_are_supported = IsSubGraphFullySupported(subgraph_supported_nodes_vector, number_of_ort_subgraph_nodes);
           break;
         }
@@ -1888,6 +1969,7 @@ TensorrtExecutionProvider::GetCapability(const GraphViewer& graph,
   } else if (number_of_trt_nodes == number_of_ort_nodes) {
     LOGS_DEFAULT(INFO) << "[TensorRT EP] Whole graph will run on TensorRT execution provider";
   } else {
+    sync_stream_after_enqueue_ = true;
     LOGS_DEFAULT(INFO) << "[TensorRT EP] Graph is partitioned and number of subgraphs running on TensorRT execution provider is " << number_of_subgraphs;
   }
 
@@ -1932,7 +2014,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
     }
 
     TensorrtLogger& trt_logger = GetTensorrtLogger();
-    auto trt_builder = std::unique_ptr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(trt_logger));
+    auto trt_builder = GetBuilder();
     const auto explicitBatch = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
     auto trt_network = std::unique_ptr<nvinfer1::INetworkDefinition>(trt_builder->createNetworkV2(explicitBatch));
     auto trt_config = std::unique_ptr<nvinfer1::IBuilderConfig>(trt_builder->createBuilderConfig());
@@ -2211,7 +2293,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
       std::string timing_cache_path = "";
       bool engine_update = false;
       if (timing_cache_enable_) {
-        timing_cache_path = GetTimingCachePath(cache_path_, prop);
+        timing_cache_path = GetTimingCachePath(global_cache_path_, prop);
       }
       {
         // ifstream file check, engine serialization/deserialization and engine build are in critical section. It needs lock protection to prevent race condition when inferencing with multithreading.
@@ -2384,7 +2466,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
     // Save TRT engine, other TRT objects and input/output info to map
     parsers_.emplace(fused_node.Name(), std::move(trt_parser));
     engines_.emplace(fused_node.Name(), std::move(trt_engine));
-    builders_.emplace(fused_node.Name(), std::move(trt_builder));
+    contexts_.emplace(fused_node.Name(), std::move(trt_context));
     networks_.emplace(fused_node.Name(), std::move(trt_network));
     input_info_[fused_node.Name()].push_back(input_indexes);
     output_info_[fused_node.Name()].push_back(output_indexes);
@@ -2392,14 +2474,6 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
     input_shape_ranges_[fused_node.Name()] = input_implicit_shape_ranges;
     profiles_.emplace(fused_node.Name(), std::move(trt_profiles));
 
-    // Save TRT context to PerThreadContext map since maintaining execution context in a per thread basis is suggested by TRT doc to avoid synchronization issue
-    if (trt_context) {
-      auto context_status = GetPerThreadContext().UpdateTensorRTContext(fused_node.Name(), std::move(trt_context));
-      if (!context_status) {
-        return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "TensorRT EP failed to create context.");
-      }
-    }
-
     // Create function state
     // TODO: remove default capture
     NodeComputeInfo compute_info;
@@ -2410,14 +2484,14 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
       if (!tactic_sources_.empty()) {
         tactics = GetTacticSourceFromString(tactic_sources_);
       }
-      *p = {context->allocate_func, context->release_func, context->allocator_handle, context->node_name,
-            &parsers_[context->node_name], &engines_[context->node_name], &builders_[context->node_name],
+      *p = {context->allocate_func, context->release_func, context->allocator_handle, context->node_name, builder_.get(),
+            &parsers_[context->node_name], &engines_[context->node_name], &contexts_[context->node_name],
             &networks_[context->node_name], input_info_[context->node_name], output_info_[context->node_name],
-            input_shape_ranges_[context->node_name], &tensorrt_mu_, fp16_enable_, int8_enable_, int8_calibration_cache_available_,
+            input_shape_ranges_[context->node_name], sync_stream_after_enqueue_, &tensorrt_mu_, fp16_enable_, int8_enable_, int8_calibration_cache_available_,
             dla_enable_, dla_core_, &max_workspace_size_, trt_node_name_with_precision, engine_cache_enable_, cache_path_,
             runtime_.get(), profiles_[context->node_name], context_memory_sharing_enable_, &max_ctx_mem_size_,
             dynamic_range_map, engine_decryption_enable_, engine_decryption_, engine_encryption_, timing_cache_enable_,
-            force_timing_cache_match_, detailed_build_log_, build_heuristics_enable_, sparsity_enable_,
+            global_cache_path_, force_timing_cache_match_, detailed_build_log_, build_heuristics_enable_, sparsity_enable_,
             builder_optimization_level_, auxiliary_streams_, !tactic_sources_.empty(), tactics};
       *state = p.release();
       return 0;
@@ -2441,10 +2515,12 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
       const std::unordered_map<std::string, size_t>& input_indexes = (trt_state->input_info)[0];
       const std::unordered_map<std::string, size_t>& output_indexes = (trt_state->output_info)[0];
       const std::unordered_map<std::string, size_t>& output_types = (trt_state->output_info)[1];
+      bool sync_stream_after_enqueue = trt_state->sync_stream_after_enqueue;
       auto fused_node_name = trt_state->fused_node_name;
       auto& shape_ranges = trt_state->input_shape_ranges;
-      auto trt_builder = trt_state->builder->get();
+      auto trt_builder = trt_state->builder;
       auto trt_engine = trt_state->engine->get();
+      auto trt_context = trt_state->context->get();
       auto trt_profiles = trt_state->profiles;
       auto max_context_mem_size_ptr = trt_state->max_context_mem_size_ptr;
       int num_inputs = static_cast<int>(input_indexes.size());
@@ -2477,7 +2553,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
       const std::string profile_cache_path = cache_path + "_sm" + compute_capability + ".profile";
       std::string timing_cache_path = "";
       if (timing_cache_enable_) {
-        timing_cache_path = GetTimingCachePath(cache_path_, prop);
+        timing_cache_path = GetTimingCachePath(global_cache_path_, prop);
       }
 
       // Load serialized engine
@@ -2502,7 +2578,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
           trt_state->engine->reset();
           *(trt_state->engine) = std::unique_ptr<nvinfer1::ICudaEngine>(
               trt_state->runtime->deserializeCudaEngine(engine_buf.get(), engine_size, nullptr));
-          if (*(trt_state->engine) == nullptr) {
+          if (!(*(trt_state->engine))) {
             return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "TensorRT EP Failed to Build Engine.");
           }
           LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] DeSerialized " + engine_cache_path;
@@ -2527,7 +2603,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
           // https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#threading
           trt_state->engine->reset();
           *(trt_state->engine) = std::unique_ptr<nvinfer1::ICudaEngine>(trt_state->runtime->deserializeCudaEngine(engine_buf.get(), engine_size, nullptr));
-          if (*(trt_state->engine) == nullptr) {
+          if (!(*(trt_state->engine))) {
             return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
                                    "TensorRT EP could not deserialize engine from encrypted cache: " + encrypted_engine_cache_path);
           }
@@ -2556,10 +2632,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
       // Regenerate engine
       if (engine_update) {
         // Destroy the IExecutionContext objects before destroying an engine object, otherwise it will lead to undefined behavior.
-        if (GetPerThreadContext().IsTensorRTContextInMap(fused_node_name)) {
-          GetPerThreadContext().ResetTensorRTContext(fused_node_name);
-        }
-
+        trt_state->context->reset();
         trt_state->engine->reset();
         auto trt_config = std::unique_ptr<nvinfer1::IBuilderConfig>(trt_builder->createBuilderConfig());
         trt_config->setMaxWorkspaceSize(*(trt_state->max_workspace_size_ptr));
@@ -2660,7 +2733,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
             LOGS_DEFAULT(INFO) << "TensorRT engine build for " << trt_state->trt_node_name_with_precision << " took: " << std::chrono::duration_cast<std::chrono::milliseconds>(engine_build_stop - engine_build_start).count() << "ms" << std::endl;
           }
         }
-        if (*(trt_state->engine) == nullptr) {
+        if (!(*(trt_state->engine))) {
           return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "TensorRT EP Failed to Build Engine.");
         }
         trt_engine = trt_state->engine->get();
@@ -2706,32 +2779,20 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
         context_update = true;
       }
 
-      // Build execution context if either of the following conditions is true:
-      // (1) The engine is built or updated by this thread.
-      // (2) The first inference run for this thread where there is no IExecutionContext object yet.
-      // (3) The engine is updated by another thread. (We compare the profile shapes maintained by the PerThreadContext to the profile shapes maintained by TRT EP)
-      //
-      // Note: Creating an execution context from an engine is thread safe per TRT doc
-      // https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#threading
-      if (context_update ||
-          !GetPerThreadContext().IsTensorRTContextInMap(fused_node_name) ||
-          GetPerThreadContext().CompareProfileShapes(fused_node_name, shape_ranges)) {
-        std::unique_ptr<nvinfer1::IExecutionContext> new_context;
+      if (context_update) {
         if (trt_state->context_memory_sharing_enable) {
-          new_context.reset(trt_state->engine->get()->createExecutionContextWithoutDeviceMemory());
+          *(trt_state->context) = std::unique_ptr<nvinfer1::IExecutionContext>(
+              trt_state->engine->get()->createExecutionContextWithoutDeviceMemory());
         } else {
-          new_context.reset(trt_state->engine->get()->createExecutionContext());
+          *(trt_state->context) = std::unique_ptr<nvinfer1::IExecutionContext>(
+              trt_state->engine->get()->createExecutionContext());
         }
-        auto context_status = GetPerThreadContext().UpdateTensorRTContext(fused_node_name, std::move(new_context));
-        if (!context_status) {
+        if (!(*(trt_state->context))) {
           return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "TensorRT EP failed to create context.");
         }
-        GetPerThreadContext().UpdateProfileShapes(fused_node_name, shape_ranges);
+        trt_context = trt_state->context->get();
       }
 
-      // Get the reference to the IExecutionContext object that is maintained on a per thread basis.
-      nvinfer1::IExecutionContext& trt_context = GetPerThreadContext().GetTensorRTContext(fused_node_name);
-
       // Get input and output binding names
       int total_bindings = trt_engine->getNbBindings();
       std::vector<void*> buffers(total_bindings);
@@ -2767,12 +2828,22 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
         int nb_dims = dimensions.nbDims;
         if (input_names.count(input_name) == 1) {
           if (trt_engine->isShapeBinding(binding_index)) {
-            trt_context.setInputShapeBinding(binding_index, &tensor_shape_values[input_name][0]);
+            // Get shape of the shape tensor
+            std::vector<int32_t> shape_values;
+            if (!tensor_shape_values[input_name].empty()) {
+              shape_values = tensor_shape_values[input_name];
+            } else {
+              auto status = GetShapeOfShapeTensor(input_tensor, shape_values, trt_engine, binding_index, stream);
+              if (status != Status::OK()) {
+                return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, status.ErrorMessage());
+              }
+            }
+            trt_context->setInputShapeBinding(binding_index, &shape_values[0]);
           } else {
             for (int j = 0, end = nb_dims; j < end; ++j) {
               dimensions.d[j] = static_cast<int32_t>(tensor_shapes[j]);
             }
-            const bool status = trt_context.setBindingDimensions(binding_index, dimensions);
+            const bool status = trt_context->setBindingDimensions(binding_index, dimensions);
             if (!status) {
               ORT_THROW_IF_ERROR(ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
                                                  "TensorRT EP cannot set the dynamic dimensions of a binding"));
@@ -2911,7 +2982,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
         if (index_iter != output_indexes.end()) {
           output_index = index_iter->second;
         }
-        nvinfer1::Dims dimensions = trt_context.getBindingDimensions(static_cast<int>(binding_index));
+        nvinfer1::Dims dimensions = trt_context->getBindingDimensions(static_cast<int>(binding_index));
         int nb_dims = dimensions.nbDims;
         std::vector<int64_t> output_shapes(nb_dims);
         for (int j = 0, end = nb_dims; j < end; ++j) {
@@ -3045,23 +3116,27 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
         if (mem_size > *max_context_mem_size_ptr) {
           *max_context_mem_size_ptr = mem_size;
         }
-        trt_context.setDeviceMemory(IAllocator::MakeUniquePtrFromOrtAllocator<void>(alloc, *max_context_mem_size_ptr).get());
+        trt_context->setDeviceMemory(IAllocator::MakeUniquePtrFromOrtAllocator<void>(alloc, *max_context_mem_size_ptr).get());
       }
 
       // Start CUDA graph capture.
       // Note: The reason we don't put graph capture in OnRunStart() like CUDA EP does is because
       // current ORT TRT doesn't get cuda stream until compute time and graph capture requires cuda stream.
-      if (cuda_graph_enable_ && GetPerThreadContext().IsGraphCaptureAllowed() && !GetPerThreadContext().IsGraphCaptured()) {
+      if (cuda_graph_enable_ && IsGraphCaptureAllowed() && !IsGraphCaptured()) {
         LOGS_DEFAULT(INFO) << "Capturing the cuda graph for this model";
-        GetPerThreadContext().SetGraphStream(stream);
-        GetPerThreadContext().CaptureBegin();
+        cuda_graph_.SetStream(stream);
+        CaptureBegin();
       }
 
       // Run TRT inference
-      if (!trt_context.enqueueV2(&buffers[0], stream, nullptr)) {
+      if (!trt_context->enqueueV2(&buffers[0], stream, nullptr)) {
         return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "TensorRT EP execution context enqueue failed.");
       }
 
+      if (sync_stream_after_enqueue) {
+        cudaStreamSynchronize(stream);
+      }
+
       // Cast INT64 input to INT32 because TensorRT doesn't fully support INT64
       for (size_t i = 0, end = output_binding_names.size(); i < end; ++i) {
         const std::string& output_name = output_binding_names[i];
@@ -3089,14 +3164,14 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
       // Note: One reason we don't put end of graph capture in OnRunEnd() like CUDA EP does is because of cuda stream mentioned in graph capture
       // above, another reason is because OnRunEnd() is not synchronized with OnRunStart() and ExecuteGraph() per inference_session.cc.
       // It's safe to start/end CUDA graph capture in compute_func() here since cuda graph object is maintained by a per thread basis.
-      if (cuda_graph_enable_ && !GetPerThreadContext().IsGraphCaptured()) {
-        if (GetPerThreadContext().IsGraphCaptureAllowed()) {
-          GetPerThreadContext().CaptureEnd();
+      if (cuda_graph_enable_ && !IsGraphCaptured()) {
+        if (IsGraphCaptureAllowed()) {
+          CaptureEnd();
           // CUDA work issued to a capturing stream doesn’t actually run on the GPU,
           // so run the captured graph here to actually execute the work.
-          ORT_RETURN_IF_ERROR(GetPerThreadContext().ReplayGraph());
+          ORT_RETURN_IF_ERROR(ReplayGraph());
         } else {
-          GetPerThreadContext().IncrementRegularRunCountBeforeGraphCapture();
+          IncrementRegularRunCountBeforeGraphCapture();
         }
       }
 
@@ -3116,8 +3191,8 @@ void TensorrtExecutionProvider::RegisterStreamHandlers(IStreamCommandHandleRegis
                             true /* release_cpu_buffer_on_cuda_stream */,
                             stream_,
                             external_stream_ /* use_existing_stream */,
-                            GetPerThreadContext().CudnnHandle(),
-                            GetPerThreadContext().CublasHandle());
+                            external_cudnn_handle_,
+                            external_cublas_handle_);
 }
 
 OrtDevice TensorrtExecutionProvider::GetOrtDeviceByMemType(OrtMemType mem_type) const {
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
index e00e5df581e67..a945d219088aa 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
@@ -26,6 +26,8 @@ static const std::string kDLACore = "ORT_TENSORRT_DLA_CORE";
 static const std::string kDumpSubgraphs = "ORT_TENSORRT_DUMP_SUBGRAPHS";
 static const std::string kEngineCacheEnable = "ORT_TENSORRT_ENGINE_CACHE_ENABLE";
 static const std::string kCachePath = "ORT_TENSORRT_CACHE_PATH";
+// As a timing cache can be used across multiple ONNX files it makes sense to have a seperate cache path
+static const std::string kTimingCachePath = "ORT_TENSORRT_GLOBAL_CACHE_PATH";
 static const std::string kDecryptionEnable = "ORT_TENSORRT_ENGINE_DECRYPTION_ENABLE";
 static const std::string kDecryptionLibPath = "ORT_TENSORRT_ENGINE_DECRYPTION_LIB_PATH";
 static const std::string kForceSequentialEngineBuild = "ORT_TENSORRT_FORCE_SEQUENTIAL_ENGINE_BUILD";
@@ -103,13 +105,15 @@ struct TensorrtFuncState {
   DestroyFunc test_release_func = nullptr;
   AllocatorHandle allocator = nullptr;
   std::string fused_node_name;
+  nvinfer1::IBuilder* builder;
   tensorrt_ptr::unique_pointer<nvonnxparser::IParser>* parser = nullptr;
   std::unique_ptr<nvinfer1::ICudaEngine>* engine = nullptr;
-  std::unique_ptr<nvinfer1::IBuilder>* builder = nullptr;
+  std::unique_ptr<nvinfer1::IExecutionContext>* context = nullptr;
   std::unique_ptr<nvinfer1::INetworkDefinition>* network = nullptr;
   std::vector<std::unordered_map<std::string, size_t>> input_info;
   std::vector<std::unordered_map<std::string, size_t>> output_info;
   std::unordered_map<std::string, std::unordered_map<size_t, std::vector<std::vector<int64_t>>>> input_shape_ranges;
+  bool sync_stream_after_enqueue = false;
   OrtMutex* tensorrt_mu_ptr = nullptr;
   bool fp16_enable = false;
   bool int8_enable = false;
@@ -129,6 +133,7 @@ struct TensorrtFuncState {
   int (*engine_decryption)(const char*, char*, size_t*) = nullptr;
   int (*engine_encryption)(const char*, char*, size_t) = nullptr;
   bool timing_cache_enable = true;
+  std::string timing_cache_path;
   bool force_timing_cache = false;
   bool detailed_build_log = false;
   bool build_heuristics_enable = false;
@@ -195,7 +200,7 @@ class TensorrtExecutionProvider : public IExecutionProvider {
   Status ReplayGraph() override;
 
  private:
-  TensorrtExecutionProviderInfo info_;
+  mutable TensorrtExecutionProviderInfo info_;
   bool external_stream_ = false;
   cudaStream_t stream_ = nullptr;
   int max_partition_iterations_ = 1000;
@@ -216,7 +221,7 @@ class TensorrtExecutionProvider : public IExecutionProvider {
   int builder_optimization_level_ = 3;
   int auxiliary_streams_ = -1;
   std::string tactic_sources_;
-  std::string cache_path_, engine_decryption_lib_path_;
+  std::string global_cache_path_, cache_path_, engine_decryption_lib_path_;
   std::unique_ptr<nvinfer1::IRuntime> runtime_ = nullptr;
   OrtMutex tensorrt_mu_;
   int device_id_;
@@ -240,12 +245,15 @@ class TensorrtExecutionProvider : public IExecutionProvider {
   std::unordered_set<std::string> control_flow_op_set_ = {"If", "Loop", "Scan"};
   mutable std::unordered_map<std::string, std::unique_ptr<SubGraphContext>> subgraph_context_map_;
 
+  mutable std::unique_ptr<nvinfer1::IBuilder> builder_;
+
   // Following maps that hold TRT objects will be accessible by different threads if ORT is using multithreading.
   // In general, TensorRT objects are not thread safe; accesses to an object from different threads must be serialized by the client.
   // But there are still some thread safe operations, please see here https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#threading
   // For those non thread safe operations, TRT EP uses (1) lock_guard or (2) PerThreadContext to make sure synchronization.
   std::unordered_map<std::string, tensorrt_ptr::unique_pointer<nvonnxparser::IParser>> parsers_;
   std::unordered_map<std::string, std::unique_ptr<nvinfer1::ICudaEngine>> engines_;
+  std::unordered_map<std::string, std::unique_ptr<nvinfer1::IExecutionContext>> contexts_;
   std::unordered_map<std::string, std::unique_ptr<nvinfer1::IBuilder>> builders_;
   std::unordered_map<std::string, std::unique_ptr<nvinfer1::INetworkDefinition>> networks_;
   std::unordered_map<std::string, std::vector<std::unordered_map<std::string, size_t>>> input_info_;
@@ -256,6 +264,24 @@ class TensorrtExecutionProvider : public IExecutionProvider {
   std::unordered_map<std::string, ShapeRangesMap> input_shape_ranges_;  // The profile shape ranges that the engine is built with
   std::unordered_map<std::string, std::vector<nvinfer1::IOptimizationProfile*>> profiles_;
 
+  // for external stream, we need to create its cudnn/cublass handle before cuda EP enable cuda graph capture
+  cudnnHandle_t external_cudnn_handle_ = nullptr;
+  cublasHandle_t external_cublas_handle_ = nullptr;
+
+  // Call cudaStreamSynchronize() after TRT enqueueV2()/enqueueV3()
+  mutable bool sync_stream_after_enqueue_ = false;
+
+  CUDAGraph cuda_graph_;
+  bool is_graph_captured_ = false;
+  int regular_run_count_before_graph_capture_ = 0;
+  // There is chance (currently only happens in CUDA EP) that the second regular run allocates GPU memory for causes like:
+  // (1) memory pattern is enabled. (2) arena allocation for stream.
+  // Since no GPU memory allocation is allowed during graph capturing, we need at least two regular runs
+  // to allocate enough memory in Arena before graph capturing.
+  const int min_num_runs_before_cuda_graph_capture_ = 1;  // required min regular runs before graph capture for the necessary memory allocations.
+
+  // [Note] We don't use PerThreadContext for now since it has issue with multithreading
+  //
   // TRT or CUDA objects that must be maintained on a per thread basis will be put under this PerThreadContext data structure.
   // For example, TensorRT execution context and CUDA graph are the ones to be put here.
   class PerThreadContext final {
@@ -432,5 +458,11 @@ class TensorrtExecutionProvider : public IExecutionProvider {
   void CaptureBegin();
   void CaptureEnd();
   void IncrementRegularRunCountBeforeGraphCapture();
+
+  /**
+   * Get the pointer to the IBuilder instance.
+   * This function only creates the instance at the first time it's being called."
+   */
+  nvinfer1::IBuilder* GetBuilder() const;
 };
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.cc
index 54a4d16e4eaf7..4e466a5d568a6 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.cc
@@ -26,27 +26,16 @@ extern TensorrtLogger& GetTensorrtLogger();
  * Note: Current TRT plugin doesn't have APIs to get number of inputs/outputs of the plugin.
  * So, TensorRTCustomOp uses variadic inputs/outputs to pass ONNX graph validation.
  */
-common::Status CreateTensorRTCustomOpDomainList(TensorrtExecutionProviderInfo& info) {
+common::Status CreateTensorRTCustomOpDomainList(std::vector<OrtCustomOpDomain*>& domain_list, const std::string extra_plugin_lib_paths) {
   std::unique_ptr<OrtCustomOpDomain> custom_op_domain = std::make_unique<OrtCustomOpDomain>();
   custom_op_domain->domain_ = "trt.plugins";
 
   // Load any extra TRT plugin library if any.
   // When the TRT plugin library is loaded, the global static object is created and the plugin is registered to TRT registry.
   // This is done through macro, for example, REGISTER_TENSORRT_PLUGIN(VisionTransformerPluginCreator).
-  std::string extra_plugin_lib_paths{""};
-  if (info.has_trt_options) {
-    if (!info.extra_plugin_lib_paths.empty()) {
-      extra_plugin_lib_paths = info.extra_plugin_lib_paths;
-    }
-  } else {
-    const std::string extra_plugin_lib_paths_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kExtraPluginLibPaths);
-    if (!extra_plugin_lib_paths_env.empty()) {
-      extra_plugin_lib_paths = extra_plugin_lib_paths_env;
-    }
-  }
-
   // extra_plugin_lib_paths has the format of "path_1;path_2....;path_n"
-  if (!extra_plugin_lib_paths.empty()) {
+  static bool is_loaded = false;
+  if (!extra_plugin_lib_paths.empty() && !is_loaded) {
     std::stringstream extra_plugin_libs(extra_plugin_lib_paths);
     std::string lib;
     while (std::getline(extra_plugin_libs, lib, ';')) {
@@ -57,35 +46,59 @@ common::Status CreateTensorRTCustomOpDomainList(TensorrtExecutionProviderInfo& i
         LOGS_DEFAULT(WARNING) << "[TensorRT EP]" << status.ToString();
       }
     }
+    is_loaded = true;
   }
 
-  // Get all registered TRT plugins from registry
-  LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Getting all registered TRT plugins from TRT plugin registry ...";
-  TensorrtLogger trt_logger = GetTensorrtLogger();
-  initLibNvInferPlugins(&trt_logger, "");
+  try {
+    // Get all registered TRT plugins from registry
+    LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Getting all registered TRT plugins from TRT plugin registry ...";
+    TensorrtLogger trt_logger = GetTensorrtLogger();
+    initLibNvInferPlugins(&trt_logger, "");
 
-  int num_plugin_creator = 0;
-  auto plugin_creators = getPluginRegistry()->getPluginCreatorList(&num_plugin_creator);
-  std::unordered_set<std::string> registered_plugin_names;
+    int num_plugin_creator = 0;
+    auto plugin_creators = getPluginRegistry()->getPluginCreatorList(&num_plugin_creator);
+    std::unordered_set<std::string> registered_plugin_names;
 
-  for (int i = 0; i < num_plugin_creator; i++) {
-    auto plugin_creator = plugin_creators[i];
-    std::string plugin_name(plugin_creator->getPluginName());
-    LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] " << plugin_name << ", version : " << plugin_creator->getPluginVersion();
+    for (int i = 0; i < num_plugin_creator; i++) {
+      auto plugin_creator = plugin_creators[i];
+      std::string plugin_name(plugin_creator->getPluginName());
+      LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] " << plugin_name << ", version : " << plugin_creator->getPluginVersion();
 
-    // plugin has different versions and we only register once
-    if (registered_plugin_names.find(plugin_name) != registered_plugin_names.end()) {
-      continue;
-    }
+      // plugin has different versions and we only register once
+      if (registered_plugin_names.find(plugin_name) != registered_plugin_names.end()) {
+        continue;
+      }
 
-    std::unique_ptr<TensorRTCustomOp> trt_custom_op = std::make_unique<TensorRTCustomOp>(onnxruntime::kTensorrtExecutionProvider, nullptr);
-    trt_custom_op->SetName(plugin_creator->getPluginName());
-    custom_op_domain->custom_ops_.push_back(trt_custom_op.release());
-    registered_plugin_names.insert(plugin_name);
+      std::unique_ptr<TensorRTCustomOp> trt_custom_op = std::make_unique<TensorRTCustomOp>(onnxruntime::kTensorrtExecutionProvider, nullptr);
+      trt_custom_op->SetName(plugin_creator->getPluginName());
+      custom_op_domain->custom_ops_.push_back(trt_custom_op.release());
+      registered_plugin_names.insert(plugin_name);
+    }
+    domain_list.push_back(custom_op_domain.release());
+  } catch (const std::exception&) {
+    LOGS_DEFAULT(WARNING) << "[TensorRT EP] Failed to get TRT plugins from TRT plugin registration. Therefore, TRT EP can't create custom ops for TRT plugins";
   }
-  info.custom_op_domain_list.push_back(custom_op_domain.release());
+  return Status::OK();
+}
 
-  return common::Status::OK();
+common::Status CreateTensorRTCustomOpDomainList(TensorrtExecutionProviderInfo& info) {
+  std::vector<OrtCustomOpDomain*> domain_list;
+  std::string extra_plugin_lib_paths{""};
+  if (info.has_trt_options) {
+    if (!info.extra_plugin_lib_paths.empty()) {
+      extra_plugin_lib_paths = info.extra_plugin_lib_paths;
+    }
+  } else {
+    const std::string extra_plugin_lib_paths_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kExtraPluginLibPaths);
+    if (!extra_plugin_lib_paths_env.empty()) {
+      extra_plugin_lib_paths = extra_plugin_lib_paths_env;
+    }
+  }
+  auto status = CreateTensorRTCustomOpDomainList(domain_list, extra_plugin_lib_paths);
+  if (!domain_list.empty()) {
+    info.custom_op_domain_list = domain_list;
+  }
+  return Status::OK();
 }
 
 void ReleaseTensorRTCustomOpDomain(OrtCustomOpDomain* domain) {
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.h
index 98ac3220abffd..35bd38d818979 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.h
@@ -13,6 +13,7 @@ using namespace onnxruntime;
 namespace onnxruntime {
 
 common::Status LoadDynamicLibrary(onnxruntime::PathString library_name);
+common::Status CreateTensorRTCustomOpDomainList(std::vector<OrtCustomOpDomain*>& domain_list, const std::string extra_plugin_lib_paths);
 common::Status CreateTensorRTCustomOpDomainList(TensorrtExecutionProviderInfo& info);
 void ReleaseTensorRTCustomOpDomain(OrtCustomOpDomain* domain);
 void ReleaseTensorRTCustomOpDomainList(std::vector<OrtCustomOpDomain*>& custom_op_domain_list);
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_helper.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_helper.cc
index ecc72b1c65476..92fa101118506 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_helper.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_helper.cc
@@ -3,10 +3,36 @@
 
 #include "core/providers/shared_library/provider_api.h"
 #include "tensorrt_execution_provider.h"
+#include "core/framework/murmurhash3.h"
 #include <iostream>
 
 namespace onnxruntime {
 
+namespace {
+// Get unique graph name based on graph's name and all nodes' name
+std::string GetUniqueGraphName(const Graph& graph) {
+  HashValue model_hash = 0;
+  uint32_t hash[4] = {0, 0, 0, 0};
+
+  auto hash_str = [&hash](const std::string& str) {
+    MurmurHash3::x86_128(str.data(), gsl::narrow_cast<int32_t>(str.size()), hash[0], &hash);
+  };
+
+  // Hash all nodes' name
+  for (int i = 0; i < graph.MaxNodeIndex(); ++i) {
+    auto node = graph.GetNode(i);
+    if (node == nullptr) {
+      continue;
+    }
+    hash_str(node->Name());
+  }
+
+  model_hash = hash[0] | (uint64_t(hash[1]) << 32);
+
+  return graph.Name() + "_" + std::to_string(model_hash);
+}
+}  // namespace
+
 // The newly-built graph has not yet being resolved by Graph::Resolve(), so we can't leverage
 // Graph::ResolveContext::IsInputInitializerOrOutput(). We have to implement this fuction again.
 bool TensorrtExecutionProvider::IsInputInitializerOrOutput(const Graph& graph,
@@ -31,10 +57,11 @@ bool TensorrtExecutionProvider::IsOuterScopeValue(const Graph& graph,
 // Graph::ResolveContext::IsLocalValue(). We have to implement this function again.
 bool TensorrtExecutionProvider::IsLocalValue(const Graph& graph,
                                              const std::string& name) const {
-  if (subgraph_context_map_.find(graph.Name()) == subgraph_context_map_.end()) {
+  std::string unique_graph_name = GetUniqueGraphName(graph);
+  if (subgraph_context_map_.find(unique_graph_name) == subgraph_context_map_.end()) {
     return false;
   }
-  SubGraphContext* context = subgraph_context_map_.at(graph.Name()).get();
+  SubGraphContext* context = subgraph_context_map_.at(unique_graph_name).get();
   return context->output_args.find(name) != context->output_args.cend() ||
          context->inputs_and_initializers.find(name) != context->inputs_and_initializers.cend();
 }
@@ -59,13 +86,15 @@ void TensorrtExecutionProvider::BuildSubGraphContext(const Graph& graph) const {
     }
   }
 
+  std::string unique_graph_name = GetUniqueGraphName(graph);
+
   // Subgraph context has been built before, no need to do it again
-  if (subgraph_context_map_.find(graph.Name()) != subgraph_context_map_.end()) {
+  if (subgraph_context_map_.find(unique_graph_name) != subgraph_context_map_.end()) {
     return;
   }
 
-  subgraph_context_map_.emplace(graph.Name(), std::make_unique<SubGraphContext>());
-  SubGraphContext* context = subgraph_context_map_.at(graph.Name()).get();
+  subgraph_context_map_.emplace(unique_graph_name, std::make_unique<SubGraphContext>());
+  SubGraphContext* context = subgraph_context_map_.at(unique_graph_name).get();
 
   // Collect all nodes' outputs and nodes' name
   for (int i = 0; i < graph.MaxNodeIndex(); ++i) {
@@ -138,13 +167,14 @@ void TensorrtExecutionProvider::SetGraphOuterScopeValuesAndInputs(Graph& graph_b
     while (top_level_graph->MutableParentGraph()) {
       top_level_graph = top_level_graph->MutableParentGraph();
     }
-    if (subgraph_context_map_.find(top_level_graph->Name()) == subgraph_context_map_.end()) {
+    std::string unique_graph_name = GetUniqueGraphName(*top_level_graph);
+    if (subgraph_context_map_.find(unique_graph_name) == subgraph_context_map_.end()) {
       LOGS_DEFAULT(ERROR) << "[TensorRT EP] Can't find top-level graph context. \
                               Please check BuildSubGraphContext() has built the graph context correctly.";
       return;
     }
 
-    SubGraphContext* context = subgraph_context_map_.at(top_level_graph->Name()).get();
+    SubGraphContext* context = subgraph_context_map_.at(unique_graph_name).get();
 
     LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Subgraph name is " << graph_build.Name();
     LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Its parent node is " << graph.ParentNode()->Name();
@@ -197,12 +227,13 @@ void TensorrtExecutionProvider::SetGraphOuterScopeValuesAndInputs(Graph& graph_b
 void TensorrtExecutionProvider::SetAllGraphInputs(Graph& graph) const {
   // If ORT TRT doesn't manully set graph input in TensorrtExecutionProvider::SetGraphOuterScopeValuesAndInputs(),
   // Graph::Resolve() will help set graph inputs in Graph::SetGraphInputsOutputs(), so no need to set graph inputs here.
-  if (subgraph_context_map_.find(graph.Name()) == subgraph_context_map_.end() ||
-      subgraph_context_map_[graph.Name()].get()->manually_added_graph_inputs.size() == 0) {
+  std::string unique_graph_name = GetUniqueGraphName(graph);
+  if (subgraph_context_map_.find(unique_graph_name) == subgraph_context_map_.end() ||
+      subgraph_context_map_[unique_graph_name].get()->manually_added_graph_inputs.size() == 0) {
     return;
   }
 
-  SubGraphContext* context = subgraph_context_map_[graph.Name()].get();
+  SubGraphContext* context = subgraph_context_map_[unique_graph_name].get();
   std::vector<const NodeArg*> graph_inputs_including_initializers;
   std::unordered_set<std::string> graph_inputs_including_initializers_set;
 
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc
index 515fc1c62cff1..3ead33f9131d9 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc
@@ -25,7 +25,7 @@ constexpr const char* kDLAEnable = "trt_dla_enable";
 constexpr const char* kDLACore = "trt_dla_core";
 constexpr const char* kDumpSubgraphs = "trt_dump_subgraphs";
 constexpr const char* kEngineCacheEnable = "trt_engine_cache_enable";
-constexpr const char* kCachePath = "trt_engine_cache_path";
+constexpr const char* kEngineCachePath = "trt_engine_cache_path";
 constexpr const char* kDecryptionEnable = "trt_engine_decryption_enable";
 constexpr const char* kDecryptionLibPath = "trt_engine_decryption_lib_path";
 constexpr const char* kForceSequentialEngineBuild = "trt_force_sequential_engine_build";
@@ -33,7 +33,8 @@ constexpr const char* kForceSequentialEngineBuild = "trt_force_sequential_engine
 constexpr const char* kContextMemorySharingEnable = "trt_context_memory_sharing_enable";
 constexpr const char* kLayerNormFP32Fallback = "trt_layer_norm_fp32_fallback";
 constexpr const char* kTimingCacheEnable = "trt_timing_cache_enable";
-constexpr const char* kForceTimingCacheMatch = "trt_force_timing_cache_match";
+constexpr const char* kTimingCachePath = "trt_timing_cache_path";
+constexpr const char* kForceTimingCacheMatch = "trt_force_timing_cache";
 constexpr const char* kDetailedBuildLog = "trt_detailed_build_log";
 constexpr const char* kBuildHeuristics = "trt_build_heuristics_enable";
 constexpr const char* kSparsityEnable = "trt_sparsity_enable";
@@ -76,13 +77,14 @@ TensorrtExecutionProviderInfo TensorrtExecutionProviderInfo::FromProviderOptions
           .AddAssignmentToReference(tensorrt::provider_option_names::kDLACore, info.dla_core)
           .AddAssignmentToReference(tensorrt::provider_option_names::kDumpSubgraphs, info.dump_subgraphs)
           .AddAssignmentToReference(tensorrt::provider_option_names::kEngineCacheEnable, info.engine_cache_enable)
-          .AddAssignmentToReference(tensorrt::provider_option_names::kCachePath, info.engine_cache_path)
+          .AddAssignmentToReference(tensorrt::provider_option_names::kEngineCachePath, info.engine_cache_path)
           .AddAssignmentToReference(tensorrt::provider_option_names::kDecryptionEnable, info.engine_decryption_enable)
           .AddAssignmentToReference(tensorrt::provider_option_names::kDecryptionLibPath, info.engine_decryption_lib_path)
           .AddAssignmentToReference(tensorrt::provider_option_names::kForceSequentialEngineBuild, info.force_sequential_engine_build)
           .AddAssignmentToReference(tensorrt::provider_option_names::kContextMemorySharingEnable, info.context_memory_sharing_enable)
           .AddAssignmentToReference(tensorrt::provider_option_names::kLayerNormFP32Fallback, info.layer_norm_fp32_fallback)
           .AddAssignmentToReference(tensorrt::provider_option_names::kTimingCacheEnable, info.timing_cache_enable)
+          .AddAssignmentToReference(tensorrt::provider_option_names::kTimingCachePath, info.timing_cache_path)
           .AddAssignmentToReference(tensorrt::provider_option_names::kForceTimingCacheMatch, info.force_timing_cache)
           .AddAssignmentToReference(tensorrt::provider_option_names::kDetailedBuildLog, info.detailed_build_log)
           .AddAssignmentToReference(tensorrt::provider_option_names::kBuildHeuristics, info.build_heuristics_enable)
@@ -115,7 +117,7 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const TensorrtE
       {tensorrt::provider_option_names::kDLACore, MakeStringWithClassicLocale(info.dla_core)},
       {tensorrt::provider_option_names::kDumpSubgraphs, MakeStringWithClassicLocale(info.dump_subgraphs)},
       {tensorrt::provider_option_names::kEngineCacheEnable, MakeStringWithClassicLocale(info.engine_cache_enable)},
-      {tensorrt::provider_option_names::kCachePath, MakeStringWithClassicLocale(info.engine_cache_path)},
+      {tensorrt::provider_option_names::kEngineCachePath, MakeStringWithClassicLocale(info.engine_cache_path)},
       {tensorrt::provider_option_names::kDecryptionEnable, MakeStringWithClassicLocale(info.engine_decryption_enable)},
       {tensorrt::provider_option_names::kDecryptionLibPath, MakeStringWithClassicLocale(info.engine_decryption_lib_path)},
       {tensorrt::provider_option_names::kForceSequentialEngineBuild, MakeStringWithClassicLocale(info.force_sequential_engine_build)},
@@ -123,6 +125,7 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const TensorrtE
       {tensorrt::provider_option_names::kContextMemorySharingEnable, MakeStringWithClassicLocale(info.context_memory_sharing_enable)},
       {tensorrt::provider_option_names::kLayerNormFP32Fallback, MakeStringWithClassicLocale(info.layer_norm_fp32_fallback)},
       {tensorrt::provider_option_names::kTimingCacheEnable, MakeStringWithClassicLocale(info.timing_cache_enable)},
+      {tensorrt::provider_option_names::kTimingCachePath, MakeStringWithClassicLocale(info.timing_cache_path)},
       {tensorrt::provider_option_names::kForceTimingCacheMatch, MakeStringWithClassicLocale(info.force_timing_cache)},
       {tensorrt::provider_option_names::kDetailedBuildLog, MakeStringWithClassicLocale(info.detailed_build_log)},
       {tensorrt::provider_option_names::kBuildHeuristics, MakeStringWithClassicLocale(info.build_heuristics_enable)},
@@ -142,7 +145,8 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const TensorrtE
 ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const OrtTensorRTProviderOptionsV2& info) {
   auto empty_if_null = [](const char* s) { return s != nullptr ? std::string{s} : std::string{}; };
   const std::string kInt8CalibTable_ = empty_if_null(info.trt_int8_calibration_table_name);
-  const std::string kCachePath_ = empty_if_null(info.trt_engine_cache_path);
+  const std::string kEngineCachePath_ = empty_if_null(info.trt_engine_cache_path);
+  const std::string kTimingCachePath_ = empty_if_null(info.trt_timing_cache_path);
   const std::string kTacticSources_ = empty_if_null(info.trt_tactic_sources);
   const std::string kDecryptionLibPath_ = empty_if_null(info.trt_engine_decryption_lib_path);
   const std::string kExtraPluginLibPaths_ = empty_if_null(info.trt_extra_plugin_lib_paths);
@@ -164,13 +168,14 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const OrtTensor
       {tensorrt::provider_option_names::kDLACore, MakeStringWithClassicLocale(info.trt_dla_core)},
       {tensorrt::provider_option_names::kDumpSubgraphs, MakeStringWithClassicLocale(info.trt_dump_subgraphs)},
       {tensorrt::provider_option_names::kEngineCacheEnable, MakeStringWithClassicLocale(info.trt_engine_cache_enable)},
-      {tensorrt::provider_option_names::kCachePath, kCachePath_},
+      {tensorrt::provider_option_names::kEngineCachePath, kEngineCachePath_},
       {tensorrt::provider_option_names::kDecryptionEnable, MakeStringWithClassicLocale(info.trt_engine_decryption_enable)},
       {tensorrt::provider_option_names::kDecryptionLibPath, kDecryptionLibPath_},
       {tensorrt::provider_option_names::kForceSequentialEngineBuild, MakeStringWithClassicLocale(info.trt_force_sequential_engine_build)},
       {tensorrt::provider_option_names::kContextMemorySharingEnable, MakeStringWithClassicLocale(info.trt_context_memory_sharing_enable)},
       {tensorrt::provider_option_names::kLayerNormFP32Fallback, MakeStringWithClassicLocale(info.trt_layer_norm_fp32_fallback)},
       {tensorrt::provider_option_names::kTimingCacheEnable, MakeStringWithClassicLocale(info.trt_timing_cache_enable)},
+      {tensorrt::provider_option_names::kTimingCachePath, kTimingCachePath_},
       {tensorrt::provider_option_names::kForceTimingCacheMatch, MakeStringWithClassicLocale(info.trt_force_timing_cache)},
       {tensorrt::provider_option_names::kDetailedBuildLog, MakeStringWithClassicLocale(info.trt_detailed_build_log)},
       {tensorrt::provider_option_names::kBuildHeuristics, MakeStringWithClassicLocale(info.trt_build_heuristics_enable)},
@@ -186,4 +191,93 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const OrtTensor
   };
   return options;
 }
+
+/**
+ * Update OrtTensorRTProviderOptionsV2 instance with ProviderOptions (map of string-based key-value pairs)
+ *
+ * Please note that it will reset the OrtTensorRTProviderOptionsV2 instance first and then set up the provided provider options
+ * See TensorrtExecutionProviderInfo::FromProviderOptions() for more details. This function will be called by the C API UpdateTensorRTProviderOptions() also.
+ *
+ * \param provider_options - a pointer to OrtTensorRTProviderOptionsV2 instance
+ * \param options - a reference to ProviderOptions instance
+ * \param string_copy - if it's true, it uses strncpy() to copy 'provider option' string from ProviderOptions instance to where the 'provider option' const char pointer in OrtTensorRTProviderOptionsV2 instance points to.
+ *                      it it's false, it only saves the pointer and no strncpy().
+ *
+ * Note: If there is strncpy involved, please remember to deallocate or simply call C API ReleaseTensorRTProviderOptions.
+ */
+void TensorrtExecutionProviderInfo::UpdateProviderOptions(void* provider_options, const ProviderOptions& options, bool string_copy) {
+  if (provider_options == nullptr) {
+    return;
+  }
+  auto copy_string_if_needed = [&](std::string& s_in) {
+    if (string_copy) {
+      char* dest = nullptr;
+      auto str_size = s_in.size();
+      if (str_size == 0) {
+        return (const char*)nullptr;
+      } else {
+        dest = new char[str_size + 1];
+#ifdef _MSC_VER
+        strncpy_s(dest, str_size + 1, s_in.c_str(), str_size);
+#else
+        strncpy(dest, s_in.c_str(), str_size);
+#endif
+        dest[str_size] = '\0';
+        return (const char*)dest;
+      }
+    } else {
+      return s_in.c_str();
+    }
+  };
+
+  TensorrtExecutionProviderInfo internal_options = onnxruntime::TensorrtExecutionProviderInfo::FromProviderOptions(options);
+  auto& trt_provider_options_v2 = *reinterpret_cast<OrtTensorRTProviderOptionsV2*>(provider_options);
+  trt_provider_options_v2.device_id = internal_options.device_id;
+
+  // The 'has_user_compute_stream' of the OrtTensorRTProviderOptionsV2 instance can be set by C API UpdateTensorRTProviderOptionsWithValue() as well
+  // We only set the 'has_user_compute_stream' of the OrtTensorRTProviderOptionsV2 instance if it is provided in options
+  if (options.find("has_user_compute_stream") != options.end()) {
+    trt_provider_options_v2.has_user_compute_stream = internal_options.has_user_compute_stream;
+  }
+
+  trt_provider_options_v2.trt_max_partition_iterations = internal_options.max_partition_iterations;
+  trt_provider_options_v2.trt_min_subgraph_size = internal_options.min_subgraph_size;
+  trt_provider_options_v2.trt_max_workspace_size = internal_options.max_workspace_size;
+  trt_provider_options_v2.trt_fp16_enable = internal_options.fp16_enable;
+  trt_provider_options_v2.trt_int8_enable = internal_options.int8_enable;
+
+  trt_provider_options_v2.trt_int8_calibration_table_name = copy_string_if_needed(internal_options.int8_calibration_table_name);
+
+  trt_provider_options_v2.trt_int8_use_native_calibration_table = internal_options.int8_use_native_calibration_table;
+  trt_provider_options_v2.trt_dla_enable = internal_options.dla_enable;
+  trt_provider_options_v2.trt_dla_core = internal_options.dla_core;
+  trt_provider_options_v2.trt_dump_subgraphs = internal_options.dump_subgraphs;
+  trt_provider_options_v2.trt_engine_cache_enable = internal_options.engine_cache_enable;
+
+  trt_provider_options_v2.trt_engine_cache_path = copy_string_if_needed(internal_options.engine_cache_path);
+  trt_provider_options_v2.trt_timing_cache_path = copy_string_if_needed(internal_options.timing_cache_path);
+
+  trt_provider_options_v2.trt_engine_decryption_enable = internal_options.engine_decryption_enable;
+
+  trt_provider_options_v2.trt_engine_decryption_lib_path = copy_string_if_needed(internal_options.engine_decryption_lib_path);
+
+  trt_provider_options_v2.trt_force_sequential_engine_build = internal_options.force_sequential_engine_build;
+  trt_provider_options_v2.trt_context_memory_sharing_enable = internal_options.context_memory_sharing_enable;
+  trt_provider_options_v2.trt_layer_norm_fp32_fallback = internal_options.layer_norm_fp32_fallback;
+  trt_provider_options_v2.trt_timing_cache_enable = internal_options.timing_cache_enable;
+  trt_provider_options_v2.trt_force_timing_cache = internal_options.force_timing_cache;
+  trt_provider_options_v2.trt_detailed_build_log = internal_options.detailed_build_log;
+  trt_provider_options_v2.trt_build_heuristics_enable = internal_options.build_heuristics_enable;
+  trt_provider_options_v2.trt_sparsity_enable = internal_options.sparsity_enable;
+  trt_provider_options_v2.trt_builder_optimization_level = internal_options.builder_optimization_level;
+  trt_provider_options_v2.trt_auxiliary_streams = internal_options.auxiliary_streams;
+
+  trt_provider_options_v2.trt_tactic_sources = copy_string_if_needed(internal_options.tactic_sources);
+  trt_provider_options_v2.trt_extra_plugin_lib_paths = copy_string_if_needed(internal_options.extra_plugin_lib_paths);
+  trt_provider_options_v2.trt_profile_min_shapes = copy_string_if_needed(internal_options.profile_min_shapes);
+  trt_provider_options_v2.trt_profile_max_shapes = copy_string_if_needed(internal_options.profile_max_shapes);
+  trt_provider_options_v2.trt_profile_opt_shapes = copy_string_if_needed(internal_options.profile_opt_shapes);
+
+  trt_provider_options_v2.trt_cuda_graph_enable = internal_options.cuda_graph_enable;
+}
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h
index 4fb9837e1c040..b16543aa3d7dd 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h
@@ -38,6 +38,7 @@ struct TensorrtExecutionProviderInfo {
   bool context_memory_sharing_enable{false};
   bool layer_norm_fp32_fallback{false};
   bool timing_cache_enable{false};
+  std::string timing_cache_path{""};
   bool force_timing_cache{false};
   bool detailed_build_log{false};
   bool build_heuristics_enable{false};
@@ -54,6 +55,7 @@ struct TensorrtExecutionProviderInfo {
   static TensorrtExecutionProviderInfo FromProviderOptions(const ProviderOptions& options);
   static ProviderOptions ToProviderOptions(const TensorrtExecutionProviderInfo& info);
   static ProviderOptions ToProviderOptions(const OrtTensorRTProviderOptionsV2& info);
+  static void UpdateProviderOptions(void* provider_options, const ProviderOptions& options, bool string_copy);
 
   std::vector<OrtCustomOpDomain*> custom_op_domain_list;
 };
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
index 18ec113734b97..426584553f349 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
@@ -2,7 +2,7 @@
 // Licensed under the MIT License.
 
 #include "core/providers/shared_library/provider_api.h"
-#include "core/providers/tensorrt/tensorrt_provider_factory.h"
+#include "tensorrt_provider_factory.h"
 #include <atomic>
 #include "tensorrt_execution_provider.h"
 #include "tensorrt_provider_factory_creator.h"
@@ -18,22 +18,45 @@ namespace onnxruntime {
 void InitializeRegistry();
 void DeleteRegistry();
 
+struct ProviderInfo_TensorRT_Impl final : ProviderInfo_TensorRT {
+  OrtStatus* GetCurrentGpuDeviceId(_In_ int* device_id) override {
+    auto cuda_err = cudaGetDevice(device_id);
+    if (cuda_err != cudaSuccess) {
+      return CreateStatus(ORT_FAIL, "Failed to get device id.");
+    }
+    return nullptr;
+  }
+
+  OrtStatus* UpdateProviderOptions(void* provider_options, const ProviderOptions& options, bool string_copy) override {
+    TensorrtExecutionProviderInfo::UpdateProviderOptions(provider_options, options, string_copy);
+    return nullptr;
+  }
+
+  OrtStatus* GetTensorRTCustomOpDomainList(std::vector<OrtCustomOpDomain*>& domain_list, const std::string extra_plugin_lib_paths) override {
+    common::Status status = CreateTensorRTCustomOpDomainList(domain_list, extra_plugin_lib_paths);
+    if (!status.IsOK()) {
+      return CreateStatus(ORT_FAIL, "[TensorRT EP] Can't create custom ops for TRT plugins.");
+    }
+    return nullptr;
+  }
+
+  OrtStatus* ReleaseCustomOpDomainList(std::vector<OrtCustomOpDomain*>& domain_list) override {
+    ReleaseTensorRTCustomOpDomainList(domain_list);
+    return nullptr;
+  }
+
+} g_info;
+
 struct TensorrtProviderFactory : IExecutionProviderFactory {
   TensorrtProviderFactory(const TensorrtExecutionProviderInfo& info) : info_{info} {}
   ~TensorrtProviderFactory() override {}
 
   std::unique_ptr<IExecutionProvider> CreateProvider() override;
 
-  void GetCustomOpDomainList(std::vector<OrtCustomOpDomain*>& custom_op_domain_list);
-
  private:
   TensorrtExecutionProviderInfo info_;
 };
 
-void TensorrtProviderFactory::GetCustomOpDomainList(std::vector<OrtCustomOpDomain*>& custom_op_domain_list) {
-  custom_op_domain_list = info_.custom_op_domain_list;
-}
-
 std::unique_ptr<IExecutionProvider> TensorrtProviderFactory::CreateProvider() {
   return std::make_unique<TensorrtExecutionProvider>(info_);
 }
@@ -46,15 +69,12 @@ std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Creat
 }
 
 struct Tensorrt_Provider : Provider {
+  void* GetInfo() override { return &g_info; }
   std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory(int device_id) override {
     TensorrtExecutionProviderInfo info;
     info.device_id = device_id;
     info.has_trt_options = false;
 
-    common::Status status = CreateTensorRTCustomOpDomainList(info);
-    if (!status.IsOK()) {
-      LOGS_DEFAULT(WARNING) << "[TensorRT EP] Failed to get TRT plugins from TRT plugin registration.";
-    }
     return std::make_shared<TensorrtProviderFactory>(info);
   }
 
@@ -83,6 +103,7 @@ struct Tensorrt_Provider : Provider {
     info.context_memory_sharing_enable = options.trt_context_memory_sharing_enable != 0;
     info.layer_norm_fp32_fallback = options.trt_layer_norm_fp32_fallback != 0;
     info.timing_cache_enable = options.trt_timing_cache_enable != 0;
+    info.timing_cache_path = options.trt_timing_cache_path == nullptr ? "" : options.trt_timing_cache_path;
     info.force_timing_cache = options.trt_force_timing_cache != 0;
     info.detailed_build_log = options.trt_detailed_build_log != 0;
     info.build_heuristics_enable = options.trt_build_heuristics_enable != 0;
@@ -96,169 +117,11 @@ struct Tensorrt_Provider : Provider {
     info.profile_opt_shapes = options.trt_profile_opt_shapes == nullptr ? "" : options.trt_profile_opt_shapes;
     info.cuda_graph_enable = options.trt_cuda_graph_enable != 0;
 
-    common::Status status = CreateTensorRTCustomOpDomainList(info);
-    if (!status.IsOK()) {
-      LOGS_DEFAULT(WARNING) << "[TensorRT EP] Failed to get TRT plugins from TRT plugin registration.";
-    }
-
     return std::make_shared<TensorrtProviderFactory>(info);
   }
 
-  /**
-   * This function will be called by the C API UpdateTensorRTProviderOptions().
-   *
-   * Please note that it will reset the OrtProviderOptionsV2 instance first and then set up the provided provider options
-   * See TensorrtExecutionProviderInfo::FromProviderOptions() for more details
-   */
   void UpdateProviderOptions(void* provider_options, const ProviderOptions& options) override {
-    auto internal_options = onnxruntime::TensorrtExecutionProviderInfo::FromProviderOptions(options);
-    auto& trt_options = *reinterpret_cast<OrtTensorRTProviderOptionsV2*>(provider_options);
-    trt_options.device_id = internal_options.device_id;
-
-    // The 'has_user_compute_stream' of the OrtTensorRTProviderOptionsV2 instance can be set by C API UpdateTensorRTProviderOptionsWithValue() as well
-    // We only set the 'has_user_compute_stream' of the OrtTensorRTProviderOptionsV2 instance if it is provided in options
-    if (options.find("has_user_compute_stream") != options.end()) {
-      trt_options.has_user_compute_stream = internal_options.has_user_compute_stream;
-    }
-
-    trt_options.trt_max_partition_iterations = internal_options.max_partition_iterations;
-    trt_options.trt_min_subgraph_size = internal_options.min_subgraph_size;
-    trt_options.trt_max_workspace_size = internal_options.max_workspace_size;
-    trt_options.trt_fp16_enable = internal_options.fp16_enable;
-    trt_options.trt_int8_enable = internal_options.int8_enable;
-
-    char* dest = nullptr;
-    auto str_size = internal_options.int8_calibration_table_name.size();
-    if (str_size == 0) {
-      trt_options.trt_int8_calibration_table_name = nullptr;
-    } else {
-      dest = new char[str_size + 1];
-#ifdef _MSC_VER
-      strncpy_s(dest, str_size + 1, internal_options.int8_calibration_table_name.c_str(), str_size);
-#else
-      strncpy(dest, internal_options.int8_calibration_table_name.c_str(), str_size);
-#endif
-      dest[str_size] = '\0';
-      trt_options.trt_int8_calibration_table_name = (const char*)dest;
-    }
-
-    trt_options.trt_int8_use_native_calibration_table = internal_options.int8_use_native_calibration_table;
-    trt_options.trt_dla_enable = internal_options.dla_enable;
-    trt_options.trt_dla_core = internal_options.dla_core;
-    trt_options.trt_dump_subgraphs = internal_options.dump_subgraphs;
-    trt_options.trt_engine_cache_enable = internal_options.engine_cache_enable;
-
-    str_size = internal_options.engine_cache_path.size();
-    if (str_size == 0) {
-      trt_options.trt_engine_cache_path = nullptr;
-    } else {
-      dest = new char[str_size + 1];
-#ifdef _MSC_VER
-      strncpy_s(dest, str_size + 1, internal_options.engine_cache_path.c_str(), str_size);
-#else
-      strncpy(dest, internal_options.engine_cache_path.c_str(), str_size);
-#endif
-      dest[str_size] = '\0';
-      trt_options.trt_engine_cache_path = (const char*)dest;
-    }
-
-    trt_options.trt_engine_decryption_enable = internal_options.engine_decryption_enable;
-
-    str_size = internal_options.engine_decryption_lib_path.size();
-    if (str_size == 0) {
-      trt_options.trt_engine_decryption_lib_path = nullptr;
-    } else {
-      dest = new char[str_size + 1];
-#ifdef _MSC_VER
-      strncpy_s(dest, str_size + 1, internal_options.engine_decryption_lib_path.c_str(), str_size);
-#else
-      strncpy(dest, internal_options.engine_decryption_lib_path.c_str(), str_size);
-#endif
-      dest[str_size] = '\0';
-      trt_options.trt_engine_decryption_lib_path = (const char*)dest;
-    }
-
-    trt_options.trt_force_sequential_engine_build = internal_options.force_sequential_engine_build;
-    trt_options.trt_context_memory_sharing_enable = internal_options.context_memory_sharing_enable;
-    trt_options.trt_layer_norm_fp32_fallback = internal_options.layer_norm_fp32_fallback;
-    trt_options.trt_timing_cache_enable = internal_options.timing_cache_enable;
-    trt_options.trt_force_timing_cache = internal_options.force_timing_cache;
-    trt_options.trt_detailed_build_log = internal_options.detailed_build_log;
-    trt_options.trt_build_heuristics_enable = internal_options.build_heuristics_enable;
-    trt_options.trt_sparsity_enable = internal_options.sparsity_enable;
-    trt_options.trt_builder_optimization_level = internal_options.builder_optimization_level;
-    trt_options.trt_auxiliary_streams = internal_options.auxiliary_streams;
-    str_size = internal_options.tactic_sources.size();
-    if (str_size == 0) {
-      trt_options.trt_tactic_sources = nullptr;
-    } else {
-      dest = new char[str_size + 1];
-#ifdef _MSC_VER
-      strncpy_s(dest, str_size + 1, internal_options.tactic_sources.c_str(), str_size);
-#else
-      strncpy(dest, internal_options.tactic_sources.c_str(), str_size);
-#endif
-      dest[str_size] = '\0';
-      trt_options.trt_tactic_sources = (const char*)dest;
-    }
-
-    str_size = internal_options.extra_plugin_lib_paths.size();
-    if (str_size == 0) {
-      trt_options.trt_extra_plugin_lib_paths = nullptr;
-    } else {
-      dest = new char[str_size + 1];
-#ifdef _MSC_VER
-      strncpy_s(dest, str_size + 1, internal_options.extra_plugin_lib_paths.c_str(), str_size);
-#else
-      strncpy(dest, internal_options.extra_plugin_lib_paths.c_str(), str_size);
-#endif
-      dest[str_size] = '\0';
-      trt_options.trt_extra_plugin_lib_paths = (const char*)dest;
-    }
-
-    str_size = internal_options.profile_min_shapes.size();
-    if (str_size == 0) {
-      trt_options.trt_profile_min_shapes = nullptr;
-    } else {
-      dest = new char[str_size + 1];
-#ifdef _MSC_VER
-      strncpy_s(dest, str_size + 1, internal_options.profile_min_shapes.c_str(), str_size);
-#else
-      strncpy(dest, internal_options.profile_min_shapes.c_str(), str_size);
-#endif
-      dest[str_size] = '\0';
-      trt_options.trt_profile_min_shapes = (const char*)dest;
-    }
-
-    str_size = internal_options.profile_max_shapes.size();
-    if (str_size == 0) {
-      trt_options.trt_profile_max_shapes = nullptr;
-    } else {
-      dest = new char[str_size + 1];
-#ifdef _MSC_VER
-      strncpy_s(dest, str_size + 1, internal_options.profile_max_shapes.c_str(), str_size);
-#else
-      strncpy(dest, internal_options.profile_max_shapes.c_str(), str_size);
-#endif
-      dest[str_size] = '\0';
-      trt_options.trt_profile_max_shapes = (const char*)dest;
-    }
-
-    str_size = internal_options.profile_opt_shapes.size();
-    if (str_size == 0) {
-      trt_options.trt_profile_opt_shapes = nullptr;
-    } else {
-      dest = new char[str_size + 1];
-#ifdef _MSC_VER
-      strncpy_s(dest, str_size + 1, internal_options.profile_opt_shapes.c_str(), str_size);
-#else
-      strncpy(dest, internal_options.profile_opt_shapes.c_str(), str_size);
-#endif
-      dest[str_size] = '\0';
-      trt_options.trt_profile_opt_shapes = (const char*)dest;
-    }
-
-    trt_options.trt_cuda_graph_enable = internal_options.cuda_graph_enable;
+    TensorrtExecutionProviderInfo::UpdateProviderOptions(provider_options, options, true);
   }
 
   ProviderOptions GetProviderOptions(const void* provider_options) override {
@@ -266,11 +129,6 @@ struct Tensorrt_Provider : Provider {
     return onnxruntime::TensorrtExecutionProviderInfo::ToProviderOptions(options);
   }
 
-  void GetCustomOpDomainList(IExecutionProviderFactory* factory, std::vector<OrtCustomOpDomain*>& custom_op_domains_ptr) override {
-    TensorrtProviderFactory* trt_factory = reinterpret_cast<TensorrtProviderFactory*>(factory);
-    trt_factory->GetCustomOpDomainList(custom_op_domains_ptr);
-  }
-
   void Initialize() override {
     InitializeRegistry();
   }
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h
new file mode 100644
index 0000000000000..231e14e5c95f2
--- /dev/null
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h
@@ -0,0 +1,17 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "onnxruntime_c_api.h"
+#include "core/framework/provider_options.h"
+
+namespace onnxruntime {
+struct ProviderInfo_TensorRT {
+  virtual OrtStatus* GetCurrentGpuDeviceId(_In_ int* device_id) = 0;
+  virtual OrtStatus* UpdateProviderOptions(void* provider_options, const ProviderOptions& options, bool string_copy) = 0;
+  virtual OrtStatus* GetTensorRTCustomOpDomainList(std::vector<OrtCustomOpDomain*>& domain_list, const std::string extra_plugin_lib_paths) = 0;
+  virtual OrtStatus* ReleaseCustomOpDomainList(std::vector<OrtCustomOpDomain*>& domain_list) = 0;
+
+ protected:
+  ~ProviderInfo_TensorRT() = default;  // Can only be destroyed through a subclass instance
+};
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/vitisai/imp/tensor_proto.cc b/onnxruntime/core/providers/vitisai/imp/tensor_proto.cc
index 6ee148bc6a068..db03354bf4c44 100644
--- a/onnxruntime/core/providers/vitisai/imp/tensor_proto.cc
+++ b/onnxruntime/core/providers/vitisai/imp/tensor_proto.cc
@@ -16,6 +16,12 @@ gsl::span<const char> tensor_proto_as_raw(
     std::vector<uint8_t> unpacked_tensor;
     auto s = onnxruntime::utils::UnpackInitializerData(tensor, onnxruntime::Path(), unpacked_tensor);
     mut_tensor.mutable_raw_data()->resize(unpacked_tensor.size());
+    mut_tensor.clear_float_data();
+    mut_tensor.clear_int32_data();
+    mut_tensor.clear_string_data();
+    mut_tensor.clear_int64_data();
+    mut_tensor.clear_double_data();
+    mut_tensor.clear_uint64_data();
     memcpy(mut_tensor.mutable_raw_data()->data(), unpacked_tensor.data(), unpacked_tensor.size());
   }
   return gsl::span<const char>(tensor.raw_data().data(), tensor.raw_data().size());
diff --git a/onnxruntime/core/providers/webnn/builders/helper.cc b/onnxruntime/core/providers/webnn/builders/helper.cc
index 31453e005272e..d34cb7e362446 100644
--- a/onnxruntime/core/providers/webnn/builders/helper.cc
+++ b/onnxruntime/core/providers/webnn/builders/helper.cc
@@ -53,9 +53,12 @@ bool IsInputSupported(const NodeArg& input, const std::string& parent_name, cons
   }
 
   for (const auto& dim : shape_proto->dim()) {
-    // For now we workaround dynamic shape support by assuming 1.
+    // WebNN doesn't support dynamic shape - use sessionOptions.freeDimensionOverrides to fix the shape.
     if (!dim.has_dim_value()) {
-      LOGS(logger, VERBOSE) << "Dynamic shape is not supported for now, assume to be 1, for input:" << input_name;
+      LOGS(logger, VERBOSE) << "Dynamic shape is not supported, "
+                            << "use sessionOptions.FreeDimensionOverrides to set a fixed shape for input: "
+                            << input_name;
+      return false;
     }
   }
 
@@ -82,7 +85,7 @@ std::vector<std::vector<NodeIndex>> GetSupportedNodes(const GraphViewer& graph_v
     const auto* node(graph_viewer.GetNode(node_idx));
     bool supported = false;
     // Firstly check if platform supports the WebNN op.
-    if (CheckSingleOp(node->OpType(), wnn_builder_)) {
+    if (CheckSingleOp(node->OpType(), wnn_builder_, device_type)) {
       LOGS(logger, VERBOSE) << "Operator type: [" << node->OpType() << "] is supported by browser";
       supported = IsNodeSupported(*node, graph_viewer, device_type, logger);
     }
@@ -139,5 +142,43 @@ bool IsValidMultidirectionalBroadcast(std::vector<int64_t>& shape_a,
   return true;
 }
 
+bool SetWebnnDataType(emscripten::val& desc, const int32_t data_type) {
+  // WebNN changed the name of the MLOperandDescriptor's data type from "type" to "dataType",
+  // use a duplicate entry temporarily to workaround this API breaking issue.
+  // TODO: Remove legacy "type" once all browsers implement the new "dataType".
+  switch (data_type) {
+    case ONNX_NAMESPACE::TensorProto_DataType_BOOL:
+      desc.set("type", emscripten::val("uint8"));
+      desc.set("dataType", emscripten::val("uint8"));
+      return true;
+    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
+      desc.set("type", emscripten::val("float16"));
+      desc.set("dataType", emscripten::val("float16"));
+      return true;
+    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
+      desc.set("type", emscripten::val("float32"));
+      desc.set("dataType", emscripten::val("float32"));
+      return true;
+    case ONNX_NAMESPACE::TensorProto_DataType_INT32:
+      desc.set("type", emscripten::val("int32"));
+      desc.set("dataType", emscripten::val("int32"));
+      return true;
+    case ONNX_NAMESPACE::TensorProto_DataType_INT64:
+      desc.set("type", emscripten::val("int64"));
+      desc.set("dataType", emscripten::val("int64"));
+      return true;
+    case ONNX_NAMESPACE::TensorProto_DataType_UINT32:
+      desc.set("type", emscripten::val("uint32"));
+      desc.set("dataType", emscripten::val("uint32"));
+      return true;
+    case ONNX_NAMESPACE::TensorProto_DataType_UINT64:
+      desc.set("type", emscripten::val("uint64"));
+      desc.set("dataType", emscripten::val("uint64"));
+      return true;
+    default:
+      return false;
+  }
+}
+
 }  // namespace webnn
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/webnn/builders/helper.h b/onnxruntime/core/providers/webnn/builders/helper.h
index cdad9b22a8ab8..617108c57d8a2 100644
--- a/onnxruntime/core/providers/webnn/builders/helper.h
+++ b/onnxruntime/core/providers/webnn/builders/helper.h
@@ -30,6 +30,11 @@ enum class WebnnDeviceType {
   GPU,
 };
 
+typedef struct {
+  std::string opName;
+  bool isCpuSupported;  // The WebNN CPU backend XNNPack supports it (not about the CPU EP).
+} WebnnOpInfo;
+
 bool GetShape(const NodeArg& node_arg, std::vector<int64_t>& shape, const logging::Logger& logger);
 
 template <typename T>
@@ -128,88 +133,107 @@ std::vector<std::vector<NodeIndex>> GetSupportedNodes(const GraphViewer& graph_v
                                                       const emscripten::val& wnn_builder_,
                                                       const WebnnDeviceType device_type,
                                                       const logging::Logger& logger);
-static const InlinedHashMap<std::string, std::string> op_map = {
-    {"Abs", "abs"},
-    {"Add", "add"},
-    {"ArgMax", "argMax"},
-    {"ArgMin", "argMin"},
-    {"AveragePool", "averagePool2d"},
-    {"BatchNormalization", "meanVarianceNormalization"},
-    {"Cast", "cast"},
-    {"Ceil", "ceil"},
-    {"Clip", "clamp"},
-    {"Concat", "concat"},
-    {"Conv", "conv2d"},
-    {"ConvTranspose", "convTranspose2d"},
-    {"Cos", "cos"},
-    {"Div", "div"},
-    {"Elu", "elu"},
-    {"Equal", "equal"},
-    {"Erf", "erf"},
-    {"Exp", "exp"},
-    {"Expand", "expand"},
-    {"Flatten", "flattenTo2d"},
-    {"Floor", "floor"},
-    {"Gather", "gather"},
-    {"Gemm", "gemm"},
-    {"GlobalAveragePool", "averagePool2d"},
-    {"GlobalMaxPool", "maxPool2d"},
-    {"GlobalLpPool", "l2Pool2d"},
-    {"Greater", "greater"},
-    {"GroupNormalization", "meanVarianceNormalization"},
-    {"HardSigmoid", "hardSigmoid"},
-    {"HardSwish", "hardSwish"},
-    {"Identity", "identity"},
-    {"InstanceNormalization", "meanVarianceNormalization"},
-    {"LayerNormalization", "meanVarianceNormalization"},
-    {"LeakyRelu", "leakyRelu"},
-    {"Less", "lesser"},
-    {"Log", "log"},
-    {"LpPool", "l2Pool2d"},
-    {"MatMul", "matmul"},
-    {"Max", "max"},
-    {"MaxPool", "maxPool2d"},
-    {"Min", "min"},
-    {"Mul", "mul"},
-    {"Neg", "neg"},
-    {"Not", "logicalNot"},
-    {"Pad", "pad"},
-    {"Pow", "pow"},
-    {"PRelu", "prelu"},
-    {"Reciprocal", "reciprocal"},
-    {"ReduceL1", "reduceL1"},
-    {"ReduceL2", "reduceL2"},
-    {"ReduceLogSum", "reduceLogSum"},
-    {"ReduceLogSumExp", "reduceLogSumExp"},
-    {"ReduceMax", "reduceMax"},
-    {"ReduceMean", "reduceMean"},
-    {"ReduceMin", "reduceMin"},
-    {"ReduceProd", "reduceProduct"},
-    {"ReduceSum", "reduceSum"},
-    {"ReduceSumSquare", "reduceSumSquare"},
-    {"Relu", "relu"},
-    {"Reshape", "reshape"},
-    {"Resize", "resample2d"},
-    {"Shape", "slice"},
-    {"Sigmoid", "sigmoid"},
-    {"Softplus", "softplus"},
-    {"Softsign", "softsign"},
-    {"Sin", "sin"},
-    {"Slice", "slice"},
-    {"Softmax", "softmax"},
-    {"Split", "split"},
-    {"Sqrt", "sqrt"},
-    {"Squeeze", "squeeze"},
-    {"Sub", "sub"},
-    {"Tan", "tan"},
-    {"Tanh", "tanh"},
-    {"Transpose", "transpose"},
-    {"Unsqueeze", "unsqueeze"},
-    {"Where", "elementwiseIf"},
+static const InlinedHashMap<std::string, WebnnOpInfo> op_map = {
+    {"Abs", {"abs", true}},
+    {"Add", {"add", true}},
+    {"ArgMax", {"argMax", false}},
+    {"ArgMin", {"argMin", false}},
+    {"AveragePool", {"averagePool2d", true}},
+    {"BatchNormalization", {"meanVarianceNormalization", false}},
+    {"Cast", {"cast", false}},
+    {"Ceil", {"ceil", true}},
+    {"Clip", {"clamp", true}},
+    {"Concat", {"concat", true}},
+    {"Conv", {"conv2d", true}},
+    {"ConvTranspose", {"convTranspose2d", true}},
+    {"Cos", {"cos", false}},
+    {"Div", {"div", true}},
+    {"Elu", {"elu", true}},
+    {"Equal", {"equal", false}},
+    {"Erf", {"erf", false}},
+    {"Exp", {"exp", false}},
+    {"Expand", {"expand", false}},
+    {"Flatten", {"reshape", true}},
+    {"Floor", {"floor", true}},
+    {"Gather", {"gather", false}},
+    {"Gemm", {"gemm", true}},
+    {"GlobalAveragePool", {"averagePool2d", true}},
+    {"GlobalMaxPool", {"maxPool2d", true}},
+    {"GlobalLpPool", {"l2Pool2d", false}},
+    {"Greater", {"greater", false}},
+    {"GreaterOrEqual", {"greaterOrEqual", false}},
+    {"GroupNormalization", {"meanVarianceNormalization", false}},
+    {"HardSigmoid", {"hardSigmoid", false}},
+    {"HardSwish", {"hardSwish", true}},
+    {"Identity", {"identity", false}},
+    {"InstanceNormalization", {"meanVarianceNormalization", false}},
+    {"LayerNormalization", {"meanVarianceNormalization", false}},
+    {"LeakyRelu", {"leakyRelu", true}},
+    {"Less", {"lesser", false}},
+    {"LessOrEqual", {"lesserOrEqual", false}},
+    {"Log", {"log", false}},
+    {"LpPool", {"l2Pool2d", false}},
+    {"MatMul", {"matmul", false}},
+    {"Max", {"max", true}},
+    {"MaxPool", {"maxPool2d", true}},
+    {"Min", {"min", true}},
+    {"Mul", {"mul", true}},
+    {"Neg", {"neg", true}},
+    {"Not", {"logicalNot", false}},
+    {"Pad", {"pad", true}},
+    {"Pow", {"pow", true}},
+    {"PRelu", {"prelu", true}},
+    {"Reciprocal", {"reciprocal", false}},
+    {"ReduceL1", {"reduceL1", false}},
+    {"ReduceL2", {"reduceL2", false}},
+    {"ReduceLogSum", {"reduceLogSum", false}},
+    {"ReduceLogSumExp", {"reduceLogSumExp", false}},
+    {"ReduceMax", {"reduceMax", false}},
+    {"ReduceMean", {"reduceMean", true}},
+    {"ReduceMin", {"reduceMin", false}},
+    {"ReduceProd", {"reduceProduct", false}},
+    {"ReduceSum", {"reduceSum", true}},
+    {"ReduceSumSquare", {"reduceSumSquare", false}},
+    {"Relu", {"relu", true}},
+    {"Reshape", {"reshape", true}},
+    {"Resize", {"resample2d", true}},
+    {"Shape", {"slice", true}},
+    {"Sigmoid", {"sigmoid", true}},
+    {"Softplus", {"softplus", false}},
+    {"Softsign", {"softsign", false}},
+    {"Sin", {"sin", false}},
+    {"Slice", {"slice", true}},
+    {"Softmax", {"softmax", true}},
+    {"Split", {"split", true}},
+    {"Sqrt", {"sqrt", false}},
+    {"Squeeze", {"reshape", true}},
+    {"Sub", {"sub", true}},
+    {"Tan", {"tan", false}},
+    {"Tanh", {"tanh", true}},
+    {"Transpose", {"transpose", true}},
+    {"Unsqueeze", {"reshape", true}},
+    {"Where", {"elementwiseIf", false}},
 };
 
-inline bool CheckSingleOp(const std::string& op_type, const emscripten::val& wnn_builder_) {
-  return op_map.find(op_type) != op_map.end() && wnn_builder_[op_map.find(op_type)->second].as<bool>();
+inline bool CheckSingleOp(const std::string& op_type, const emscripten::val& wnn_builder_,
+                          const WebnnDeviceType device_type) {
+  // Returns false if the op_type is not listed in the op_map.
+  if (op_map.find(op_type) == op_map.end()) {
+    return false;
+  }
+  // Returns false if the WebNN op has not been implemented in MLGraphBuilder in current browser.
+  if (!wnn_builder_[op_map.find(op_type)->second.opName].as<bool>()) {
+    return false;
+  }
+  // The current WebNN CPU (XNNPack) backend supports a limited op list, and we'd rather
+  // fall back early to the ORT CPU EP rather than fail in the WebNN "cpu" deviceType.
+  // This is a workaround because the op may be included in MLGraphBuilder for DirectML
+  // backend but without XNNPack implementation in Chromium.
+  if (!op_map.find(op_type)->second.isCpuSupported) {
+    return false;
+  }
+
+  return true;
 }
 
 constexpr std::array<ONNX_NAMESPACE::TensorProto_DataType, 1> supported_cpu_data_types = {
@@ -231,5 +255,8 @@ bool IsSupportedDataType(const int32_t data_type, const WebnnDeviceType device_t
 bool IsValidMultidirectionalBroadcast(std::vector<int64_t>& shape_a,
                                       std::vector<int64_t>& shape_b,
                                       const logging::Logger& logger);
+
+bool SetWebnnDataType(emscripten::val& desc, const int32_t data_type);
+
 }  // namespace webnn
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/webnn/builders/impl/base_op_builder.h b/onnxruntime/core/providers/webnn/builders/impl/base_op_builder.h
index 301927d9c658f..01e4a3c60281f 100644
--- a/onnxruntime/core/providers/webnn/builders/impl/base_op_builder.h
+++ b/onnxruntime/core/providers/webnn/builders/impl/base_op_builder.h
@@ -46,7 +46,7 @@ class BaseOpBuilder : public IOpBuilder {
   // We still set the mininal supported opset to 1 as we couldn't
   // get the model opset version at this stage.
   virtual int GetMinSupportedOpSet(const Node& /* node */) const { return 1; }
-  virtual int GetMaxSupportedOpSet(const Node& /* node */) const { return 19; }
+  virtual int GetMaxSupportedOpSet(const Node& /* node */) const { return 20; }
 
  private:
   bool HasSupportedOpSet(const Node& node, const logging::Logger& logger) const;
diff --git a/onnxruntime/core/providers/webnn/builders/impl/conv_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/conv_op_builder.cc
index 1e0af51567ca0..af3293dd3d92c 100644
--- a/onnxruntime/core/providers/webnn/builders/impl/conv_op_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/impl/conv_op_builder.cc
@@ -49,9 +49,8 @@ common::Status SetConvBaseOptions(ModelBuilder& model_builder,
   NodeAttrHelper helper(node);
   const auto group = helper.Get("group", static_cast<int32_t>(1));
   const auto& input_defs = node.InputDefs();
-  const auto& weight_tensor = *model_builder.GetInitializerTensors().at(input_defs[1]->Name());
-  const auto& weight_shape = weight_tensor.dims();
-
+  std::vector<int64_t> weight_shape;
+  ORT_RETURN_IF_NOT(GetShape(*input_defs[1], weight_shape, logger), "Cannot get weight shape");
   options.set("strides", emscripten::val::array(strides));
   options.set("dilations", emscripten::val::array(dilations));
   options.set("groups", group);
@@ -278,25 +277,28 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
 
 bool ConvOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers,
                                       const Node& node,
-                                      const WebnnDeviceType /* device_type */,
+                                      const WebnnDeviceType device_type,
                                       const logging::Logger& logger) const {
   const auto& name = node.Name();
   const auto& op_type = node.OpType();
   const auto& input_defs = node.InputDefs();
 
   const auto& weight_name = input_defs[1]->Name();
-  if (Contains(initializers, weight_name)) {
-    const auto& tensor = *initializers.at(weight_name);
-    if (tensor.dims().size() != 4) {
-      LOGS(logger, VERBOSE) << op_type << " [" << name << "] dimension: " << tensor.dims().size()
-                            << " Only conv 2d is supported.";
+  // WebNN CPU backend (XNNPACK) requires the filter operand to be a constant.
+  // https://github.com/google/XNNPACK/blob/master/src/subgraph/convolution-2d.c#L739
+  if (device_type == WebnnDeviceType::CPU) {
+    if (Contains(initializers, weight_name)) {
+      const auto& tensor = *initializers.at(weight_name);
+      if (tensor.dims().size() != 4) {
+        LOGS(logger, VERBOSE) << op_type << " [" << name << "] dimension: " << tensor.dims().size()
+                              << " Only conv 2d is supported.";
+        return false;
+      }
+    } else {
+      LOGS(logger, VERBOSE) << "The weight of " << op_type << " [" << name << "] must be known";
       return false;
     }
-  } else {
-    LOGS(logger, VERBOSE) << "The weight of " << op_type << " [" << name << "] must be known";
-    return false;
   }
-
   return true;
 }
 
diff --git a/onnxruntime/core/providers/webnn/builders/impl/flatten_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/flatten_op_builder.cc
index 6c59ca451f333..f0df27b523dfc 100644
--- a/onnxruntime/core/providers/webnn/builders/impl/flatten_op_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/impl/flatten_op_builder.cc
@@ -36,14 +36,20 @@ Status FlattenOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
   int64_t rank = input_shape.size();
   NodeAttrHelper helper(node);
   int64_t axis = helper.Get("axis", 1);
-  ORT_ENFORCE(axis >= -rank && axis <= rank, "axis ", axis,
-              " is not in valid range [-", rank, ",", rank, "]");
-  if (axis < 0) {
-    axis += rank;
-  }
+  axis = HandleNegativeAxis(axis, rank);
+
+  // Use WebNN's reshape to implement Flatten.
+  int64_t num_pre_axis_elements = std::accumulate(
+      input_shape.begin(), input_shape.begin() + static_cast<int32_t>(axis), 1, std::multiplies<int64_t>());
+  int64_t num_post_axis_elements = std::accumulate(
+      input_shape.begin() + static_cast<int32_t>(axis), input_shape.end(), 1, std::multiplies<int64_t>());
+
+  std::vector<uint32_t> new_shape = {SafeInt<uint32_t>(num_pre_axis_elements),
+                                     SafeInt<uint32_t>(num_post_axis_elements)};
+
   emscripten::val inputs = model_builder.GetOperand(input_defs[0]->Name());
-  emscripten::val output = model_builder.GetBuilder().call<emscripten::val>("flattenTo2d", inputs,
-                                                                            static_cast<int32_t>(axis));
+  emscripten::val output = model_builder.GetBuilder().call<emscripten::val>(
+      "reshape", inputs, emscripten::val::array(new_shape));
 
   model_builder.AddOperand(node.OutputDefs()[0]->Name(), std::move(output));
   return Status::OK();
diff --git a/onnxruntime/core/providers/webnn/builders/impl/logical_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/logical_op_builder.cc
index 4cb49d8f8cd3a..c8f58fa98635f 100644
--- a/onnxruntime/core/providers/webnn/builders/impl/logical_op_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/impl/logical_op_builder.cc
@@ -35,8 +35,12 @@ Status LogicalOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, cons
     output = model_builder.GetBuilder().call<emscripten::val>("equal", input0, input1);
   } else if (op_type == "Greater") {
     output = model_builder.GetBuilder().call<emscripten::val>("greater", input0, input1);
+  } else if (op_type == "GreaterOrEqual") {
+    output = model_builder.GetBuilder().call<emscripten::val>("greaterOrEqual", input0, input1);
   } else if (op_type == "Less") {
     output = model_builder.GetBuilder().call<emscripten::val>("lesser", input0, input1);
+  } else if (op_type == "LessOrEqual") {
+    output = model_builder.GetBuilder().call<emscripten::val>("lesserOrEqual", input0, input1);
   } else {
     return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                            "LogicalOpBuilder::AddToModelBuilderImpl, unknown op: ", op_type);
@@ -54,7 +58,9 @@ void CreateLogicalOpBuilder(const std::string& op_type, OpBuilderRegistrations&
       {
           "Equal",
           "Greater",
+          "GreaterOrEqual",
           "Less",
+          "LessOrEqual",
       };
 
   op_registrations.builders.push_back(std::make_unique<LogicalOpBuilder>());
diff --git a/onnxruntime/core/providers/webnn/builders/impl/shape_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/shape_op_builder.cc
index 04e6d2b548aba..12c2cf6dd0a62 100644
--- a/onnxruntime/core/providers/webnn/builders/impl/shape_op_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/impl/shape_op_builder.cc
@@ -34,7 +34,7 @@ Status ShapeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
   const auto rank = static_cast<int32_t>(input_shape.size());
 
   emscripten::val desc = emscripten::val::object();
-  desc.set("type", emscripten::val("int64"));
+  ORT_RETURN_IF_NOT(SetWebnnDataType(desc, ONNX_NAMESPACE::TensorProto_DataType_INT64), "Unsupported data type");
   emscripten::val dims = emscripten::val::array();
   dims.call<void>("push", rank);
   desc.set("dimensions", dims);
diff --git a/onnxruntime/core/providers/webnn/builders/impl/slice_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/slice_op_builder.cc
index 8778bb2414108..e48cf35012652 100644
--- a/onnxruntime/core/providers/webnn/builders/impl/slice_op_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/impl/slice_op_builder.cc
@@ -114,6 +114,22 @@ bool SliceOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers,
   if (!GetShape(*input_defs[0], input_shape, logger)) {
     return false;
   }
+
+  if (input_defs.size() < 3) {
+    LOGS(logger, VERBOSE) << op_type << " [" << name << "] requires at least 3 inputs (data, starts, ends) but got "
+                          << input_defs.size();
+    return false;
+  }
+
+  // Inputs: starts, ends, axes, and steps must be constant initializers if present.
+  for (size_t i = 1; i < input_defs.size(); i++) {
+    if (!Contains(initializers, input_defs[i]->Name())) {
+      LOGS(logger, VERBOSE) << "Input [" << input_defs[i]->Name() << "] of " << op_type
+                            << " [" << name << "] must be known as initializer";
+      return false;
+    }
+  }
+
   if (input_defs.size() == 5) {  // Check steps.
     const auto& steps_tensor = *initializers.at(input_defs[4]->Name());
     std::vector<uint8_t> unpacked_tensor;
@@ -140,18 +156,6 @@ bool SliceOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers,
     }
   }
 
-  if (input_defs.size() < 3) {
-    LOGS(logger, VERBOSE) << op_type << " [" << name << "] requires at least 3 inputs (data starts and ends) but got "
-                          << input_defs.size();
-    return false;
-  }
-
-  const auto& starts_name = input_defs[1]->Name();
-  const auto& ends_name = input_defs[2]->Name();
-  if (!Contains(initializers, starts_name) || !Contains(initializers, ends_name)) {
-    LOGS(logger, VERBOSE) << op_type << " [" << name << "] need starts and ends as initializer.";
-    return false;
-  }
   return true;
 }
 
diff --git a/onnxruntime/core/providers/webnn/builders/impl/softmax_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/softmax_op_builder.cc
index e7e3cee21c956..beee8b1d77cee 100644
--- a/onnxruntime/core/providers/webnn/builders/impl/softmax_op_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/impl/softmax_op_builder.cc
@@ -35,25 +35,77 @@ Status SoftmaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
   std::vector<int64_t> input_shape;
   ORT_RETURN_IF_NOT(GetShape(*input_defs[0], input_shape, logger), "Cannot get shape");
   const auto input_size = input_shape.size();
-  // WebNN Softmax only support 2d input shape, reshape input to 2d.
-  if (input_size != 2) {
-    int32_t new_shape_0 = SafeInt<int32_t>(input_shape.data()[0]);
-    for (size_t i = 1; i < input_size - 1; i++) {
-      new_shape_0 *= input_shape.data()[i];
+  NodeAttrHelper helper(node);
+  if (node.SinceVersion() < 13) {
+    int32_t axis = helper.Get("axis", 1);
+    axis = static_cast<int32_t>(HandleNegativeAxis(axis, input_size));
+    //  Coerce the input into a 2-dimensional tensor with dimensions [a_0 * ... * a_{k-1}, a_k * ... * a_{n-1}].
+    if (input_size != 2) {
+      int32_t first_dim = static_cast<int32_t>(std::reduce(input_shape.begin(), input_shape.begin() + axis,
+                                                           1, std::multiplies<int64_t>()));
+      int32_t second_dim = static_cast<int32_t>(std::reduce(input_shape.begin() + axis, input_shape.end(),
+                                                            1, std::multiplies<int64_t>()));
+      emscripten::val new_shape = emscripten::val::array(std::vector<int32_t>{first_dim, second_dim});
+      input = model_builder.GetBuilder().call<emscripten::val>("reshape", input, new_shape);
     }
-    emscripten::val new_shape = emscripten::val::array();
-    new_shape.call<void>("push", new_shape_0);
-    new_shape.call<void>("push", static_cast<int32_t>(input_shape.back()));
-    input = model_builder.GetBuilder().call<emscripten::val>("reshape", input, new_shape);
-  }
-  output = model_builder.GetBuilder().call<emscripten::val>("softmax", input);
-  // Reshape output to the same shape of input.
-  if (input_size != 2) {
-    emscripten::val new_shape = emscripten::val::array();
-    for (size_t i = 0; i < input_size; i++) {
-      new_shape.call<void>("push", static_cast<int32_t>(input_shape[i]));
+
+    output = model_builder.GetBuilder().call<emscripten::val>("softmax", input);
+
+    // Reshape output to the same shape of input.
+    if (input_size != 2) {
+      emscripten::val new_shape = emscripten::val::array();
+      for (size_t i = 0; i < input_size; i++) {
+        new_shape.call<void>("push", static_cast<int32_t>(input_shape[i]));
+      }
+      output = model_builder.GetBuilder().call<emscripten::val>("reshape", output, new_shape);
+    }
+  } else {
+    int32_t axis = helper.Get("axis", -1);
+    axis = static_cast<int32_t>(HandleNegativeAxis(axis, input_size));
+    // Wraparound for transpose the target axis to the last.
+    // WebNN compute the softmax values of the 2-D input tensor along axis 1.
+    // https://www.w3.org/TR/webnn/#api-mlgraphbuilder-softmax-method
+    if (axis != static_cast<int>(input_shape.size() - 1)) {
+      emscripten::val options = emscripten::val::object();
+      std::vector<uint32_t> permutation(input_shape.size());
+      std::iota(permutation.begin(), permutation.end(), 0);
+      std::rotate(permutation.begin() + axis, permutation.begin() + axis + 1, permutation.end());
+      options.set("permutation", emscripten::val::array(permutation));
+      input = model_builder.GetBuilder().call<emscripten::val>("transpose", input, options);
+    }
+    // Wraparound for reshape input tensor to 2-D.
+    if (input_shape.size() != 2) {
+      uint32_t first_dim = static_cast<uint32_t>(std::reduce(input_shape.begin(), input_shape.begin() + axis,
+                                                             1, std::multiplies<int64_t>()));
+      first_dim *= static_cast<uint32_t>(std::reduce(input_shape.begin() + axis + 1, input_shape.end(),
+                                                     1, std::multiplies<int64_t>()));
+      uint32_t second_dim = static_cast<uint32_t>(input_shape[axis]);
+      emscripten::val new_shape = emscripten::val::array(std::vector<uint32_t>{first_dim, second_dim});
+      input = model_builder.GetBuilder().call<emscripten::val>("reshape", input, new_shape);
+    }
+
+    output = model_builder.GetBuilder().call<emscripten::val>("softmax", input);
+
+    // Restore from 2-D to the original shape.
+    if (input_shape.size() != 2) {
+      std::vector<uint32_t> new_shape;
+      std::transform(input_shape.begin(), input_shape.begin() + axis, std::back_inserter(new_shape),
+                     [](int64_t dim) -> uint32_t { return static_cast<uint32_t>(dim); });
+      std::transform(input_shape.begin() + axis + 1, input_shape.end(), std::back_inserter(new_shape),
+                     [](int64_t dim) -> uint32_t { return static_cast<uint32_t>(dim); });
+      new_shape.push_back(static_cast<int32_t>(input_shape[axis]));
+      output = model_builder.GetBuilder().call<emscripten::val>("reshape",
+                                                                output, emscripten::val::array(new_shape));
+    }
+    // Restore the corresponding axis back to the initial position from the last position.
+    if (axis != static_cast<int>(input_shape.size() - 1)) {
+      emscripten::val options = emscripten::val::object();
+      std::vector<uint32_t> permutation(input_shape.size());
+      std::iota(permutation.begin(), permutation.end(), 0);
+      std::rotate(permutation.rbegin(), permutation.rbegin() + 1, permutation.rend() - axis);
+      options.set("permutation", emscripten::val::array(permutation));
+      output = model_builder.GetBuilder().call<emscripten::val>("transpose", output, options);
     }
-    output = model_builder.GetBuilder().call<emscripten::val>("reshape", output, new_shape);
   }
   model_builder.AddOperand(node.OutputDefs()[0]->Name(), std::move(output));
   return Status::OK();
@@ -75,13 +127,6 @@ bool SoftmaxOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& /* initiali
                           << input_size << "d shape";
     return false;
   }
-  NodeAttrHelper helper(node);
-  const int32_t axis = helper.Get("axis", 1);
-  // WebNN softmax only support input axis 1
-  if (axis != 1 && axis != -1) {
-    LOGS(logger, VERBOSE) << "SoftMax only support axis 1 or -1, input axis: " << axis;
-    return false;
-  }
 
   return true;
 }
diff --git a/onnxruntime/core/providers/webnn/builders/impl/squeeze_unsqueeze_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/squeeze_unsqueeze_op_builder.cc
index 1c0258944dbe9..2a1672c001b0e 100644
--- a/onnxruntime/core/providers/webnn/builders/impl/squeeze_unsqueeze_op_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/impl/squeeze_unsqueeze_op_builder.cc
@@ -56,6 +56,7 @@ Status SqueezeUnsqueezeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_buil
 
   emscripten::val options = emscripten::val::object();
   std::vector<int32_t> axes_data;
+  auto rank = input_rank;
 
   if (node.SinceVersion() >= 13 && input_defs.size() > 1) {
     // Input axes is provided, use axes initializer data.
@@ -63,35 +64,57 @@ Status SqueezeUnsqueezeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_buil
     const auto& axes_tensor = *initializers.at(input_defs[1]->Name());
     Initializer axes_initializer(axes_tensor);
     const auto axes_data_span = axes_initializer.DataAsSpan<int64_t>();
-    const auto output_rank = input_rank + axes_data_span.size();
+    if (op_type == "Unsqueeze") {
+      // Unsqueeze should check the expanded rank.
+      rank = input_rank + axes_data_span.size();
+    }
     std::transform(
         axes_data_span.begin(), axes_data_span.end(), std::back_inserter(axes_data),
-        [output_rank](int64_t axis) -> int32_t { return SafeInt<int32_t>(HandleNegativeAxis(axis, output_rank)); });
+        [rank](int64_t axis) -> int32_t { return SafeInt<int32_t>(HandleNegativeAxis(axis, rank)); });
   } else {
     NodeAttrHelper helper(node);
     if (helper.HasAttr("axes")) {
       auto axes = helper.Get("axes", std::vector<int64_t>{});
-      const auto output_rank = input_rank + axes.size();
+      if (op_type == "Unsqueeze") {
+        // Unsqueeze should check the expanded rank.
+        rank = input_rank + axes.size();
+      }
       std::transform(
           axes.begin(), axes.end(), std::back_inserter(axes_data),
-          [output_rank](int64_t axis) -> int32_t { return SafeInt<int32_t>(HandleNegativeAxis(axis, output_rank)); });
+          [rank](int64_t axis) -> int32_t { return SafeInt<int32_t>(HandleNegativeAxis(axis, rank)); });
     }
   }
 
-  if (axes_data.size() > 0) {
-    options.set("axes", emscripten::val::array(axes_data));
-  }
-
   emscripten::val output = emscripten::val::undefined();
+  // Use WebNN's reshape to implement Squeeze/Unsqueeze.
+  std::vector<uint32_t> new_shape;
+  std::transform(
+      input_shape.begin(), input_shape.end(), std::back_inserter(new_shape),
+      [](int64_t data) -> uint32_t { return SafeInt<uint32_t>(data); });
+  // Sort axes_data in ascending order.
+  std::sort(axes_data.begin(), axes_data.end());
   if (op_type == "Squeeze") {
-    output = model_builder.GetBuilder().call<emscripten::val>("squeeze", input, options);
+    if (!axes_data.empty()) {
+      for (auto axis = axes_data.rbegin(); axis != axes_data.rend(); ++axis) {
+        size_t index = *axis;
+        new_shape.erase(new_shape.begin() + index);
+      }
+    } else {
+      // Remove all the single dimensions.
+      new_shape.erase(
+          std::remove_if(new_shape.begin(), new_shape.end(), [](uint32_t axis) { return axis == 1; }), new_shape.end());
+    }
   } else if (op_type == "Unsqueeze") {
-    output = model_builder.GetBuilder().call<emscripten::val>("unsqueeze", input, options);
+    // Expand new_shape according to axes_data.
+    for (const int32_t& axis : axes_data) {
+      new_shape.insert(new_shape.begin() + axis, 1);
+    }
   } else {
     return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                            "SqueezeUnsqueezeOpBuilder::AddToModelBuilderImpl, unknown op: ", op_type);
   }
 
+  output = model_builder.GetBuilder().call<emscripten::val>("reshape", input, emscripten::val::array(new_shape));
   model_builder.AddOperand(node.OutputDefs()[0]->Name(), std::move(output));
   return Status::OK();
 }
diff --git a/onnxruntime/core/providers/webnn/builders/model_builder.cc b/onnxruntime/core/providers/webnn/builders/model_builder.cc
index 14ca4f1a1e674..b6631263dfb93 100644
--- a/onnxruntime/core/providers/webnn/builders/model_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/model_builder.cc
@@ -64,12 +64,15 @@ void ModelBuilder::PreprocessActivations() {
     const auto& op_type(node->OpType());
 
     if (op_type == "Clip") {
-      float minValue, maxValue;
-      GetClipMinMax(GetInitializerTensors(), *node, minValue, maxValue, logger_);
-      emscripten::val options = emscripten::val::object();
-      options.set("minValue", minValue);
-      options.set("maxValue", maxValue);
-      activation_nodes_.emplace(node->Index(), wnn_builder_.call<emscripten::val>("clamp", options));
+      // Temporarily disable clamp fusion for WebNN GPU as which is not supported yet.
+      if (wnn_device_type_ == WebnnDeviceType::CPU) {
+        float minValue, maxValue;
+        GetClipMinMax(GetInitializerTensors(), *node, minValue, maxValue, logger_);
+        emscripten::val options = emscripten::val::object();
+        options.set("minValue", minValue);
+        options.set("maxValue", maxValue);
+        activation_nodes_.emplace(node->Index(), wnn_builder_.call<emscripten::val>("clamp", options));
+      }
     } else if (op_type == "Elu") {
       NodeAttrHelper helper(*node);
       emscripten::val options = emscripten::val::object();
@@ -122,46 +125,46 @@ Status ModelBuilder::RegisterInitializers() {
     auto data_type = tensor.data_type();
     emscripten::val operand = emscripten::val::object();
     if (IsSupportedDataType(data_type, wnn_device_type_)) {
-      unpacked_tensors_.push_back({});
-      std::vector<uint8_t>& unpacked_tensor = unpacked_tensors_.back();
-      ORT_RETURN_IF_ERROR(onnxruntime::utils::UnpackInitializerData(tensor, unpacked_tensor));
+      ORT_RETURN_IF_NOT(SetWebnnDataType(desc, data_type), "Unsupported data type");
       auto num_elements = SafeInt<size_t>(Product(tensor.dims()));
       emscripten::val view = emscripten::val::undefined();
+      std::byte* tensor_ptr = nullptr;
+      if (tensor.has_raw_data()) {
+        tensor_ptr = reinterpret_cast<std::byte*>(const_cast<char*>(tensor.raw_data().c_str()));
+      } else {
+        unpacked_tensors_.push_back({});
+        std::vector<uint8_t>& unpacked_tensor = unpacked_tensors_.back();
+        ORT_RETURN_IF_ERROR(onnxruntime::utils::UnpackInitializerData(tensor, unpacked_tensor));
+        tensor_ptr = reinterpret_cast<std::byte*>(unpacked_tensor.data());
+      }
       switch (data_type) {
         case ONNX_NAMESPACE::TensorProto_DataType_BOOL:
-          desc.set("type", emscripten::val("uint8"));
           view = emscripten::val{emscripten::typed_memory_view(num_elements,
-                                                               reinterpret_cast<uint8_t*>(unpacked_tensor.data()))};
+                                                               reinterpret_cast<uint8_t*>(tensor_ptr))};
           break;
         case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
-          desc.set("type", emscripten::val("float16"));
           view = emscripten::val{emscripten::typed_memory_view(num_elements,
-                                                               reinterpret_cast<uint16_t*>(unpacked_tensor.data()))};
+                                                               reinterpret_cast<uint16_t*>(tensor_ptr))};
           break;
         case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
-          desc.set("type", emscripten::val("float32"));
           view = emscripten::val{emscripten::typed_memory_view(num_elements,
-                                                               reinterpret_cast<float*>(unpacked_tensor.data()))};
+                                                               reinterpret_cast<float*>(tensor_ptr))};
           break;
         case ONNX_NAMESPACE::TensorProto_DataType_INT32:
-          desc.set("type", emscripten::val("int32"));
           view = emscripten::val{emscripten::typed_memory_view(num_elements,
-                                                               reinterpret_cast<int32_t*>(unpacked_tensor.data()))};
+                                                               reinterpret_cast<int32_t*>(tensor_ptr))};
           break;
         case ONNX_NAMESPACE::TensorProto_DataType_INT64:
-          desc.set("type", emscripten::val("int64"));
           view = emscripten::val{emscripten::typed_memory_view(num_elements,
-                                                               reinterpret_cast<int64_t*>(unpacked_tensor.data()))};
+                                                               reinterpret_cast<int64_t*>(tensor_ptr))};
           break;
         case ONNX_NAMESPACE::TensorProto_DataType_UINT32:
-          desc.set("type", emscripten::val("uint32"));
           view = emscripten::val{emscripten::typed_memory_view(num_elements,
-                                                               reinterpret_cast<uint32_t*>(unpacked_tensor.data()))};
+                                                               reinterpret_cast<uint32_t*>(tensor_ptr))};
           break;
         case ONNX_NAMESPACE::TensorProto_DataType_UINT64:
-          desc.set("type", emscripten::val("uint64"));
           view = emscripten::val{emscripten::typed_memory_view(num_elements,
-                                                               reinterpret_cast<uint64_t*>(unpacked_tensor.data()))};
+                                                               reinterpret_cast<uint64_t*>(tensor_ptr))};
           break;
         default:
           break;
@@ -218,12 +221,9 @@ Status ModelBuilder::RegisterModelInputOutput(const NodeArg& node_arg, bool is_i
     } else {
       dims.reserve(shape.size());
       for (const auto& dim : shape) {
-        if (!dim.has_dim_value()) {
-          // FIXME: support dyanmic shape.
-          dims.push_back(1);
-        } else {
-          dims.push_back(SafeInt<int32_t>(dim.dim_value()));
-        }
+        // dim_param free dimensions should have already been excluded by IsInputSupported().
+        assert(dim.has_dim_value());
+        dims.push_back(SafeInt<int32_t>(dim.dim_value()));
       }
     }
   }
@@ -241,35 +241,7 @@ Status ModelBuilder::RegisterModelInputOutput(const NodeArg& node_arg, bool is_i
     }
 
     data_type = type_proto->tensor_type().elem_type();
-    switch (data_type) {
-      case ONNX_NAMESPACE::TensorProto_DataType_BOOL:
-        desc.set("type", emscripten::val("uint8"));
-        break;
-      case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
-        desc.set("type", emscripten::val("float16"));
-        break;
-      case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
-        desc.set("type", emscripten::val("float32"));
-        break;
-      case ONNX_NAMESPACE::TensorProto_DataType_INT32:
-        desc.set("type", emscripten::val("int32"));
-        break;
-      case ONNX_NAMESPACE::TensorProto_DataType_INT64:
-        desc.set("type", emscripten::val("int64"));
-        break;
-      case ONNX_NAMESPACE::TensorProto_DataType_UINT32:
-        desc.set("type", emscripten::val("uint32"));
-        break;
-      case ONNX_NAMESPACE::TensorProto_DataType_UINT64:
-        desc.set("type", emscripten::val("uint64"));
-        break;
-      default: {
-        // TODO: support other type.
-        return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
-                               "The ", input_output_type, " of graph doesn't have valid type, name: ", name,
-                               " type: ", type_proto->tensor_type().elem_type());
-      }
-    }
+    ORT_RETURN_IF_NOT(SetWebnnDataType(desc, data_type), "Unsupported data type");
   }
 
   if (is_input) {
@@ -319,41 +291,35 @@ Status ModelBuilder::AddOperandFromPersistMemoryBuffer(
   memcpy(dest, buffer, size);
   emscripten::val view = emscripten::val::undefined();
   emscripten::val desc = emscripten::val::object();
+  ORT_RETURN_IF_NOT(SetWebnnDataType(desc, data_type), "Unsupported data type");
   switch (data_type) {
     case ONNX_NAMESPACE::TensorProto_DataType_BOOL:
       view = emscripten::val{emscripten::typed_memory_view(size / sizeof(uint8_t),
                                                            reinterpret_cast<const uint8_t*>(dest))};
-      desc.set("type", emscripten::val("uint8"));
       break;
     case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
       view = emscripten::val{emscripten::typed_memory_view(size / sizeof(uint16_t),
                                                            reinterpret_cast<const uint16_t*>(dest))};
-      desc.set("type", emscripten::val("float16"));
       break;
     case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
       view = emscripten::val{emscripten::typed_memory_view(size / sizeof(float),
                                                            reinterpret_cast<const float*>(dest))};
-      desc.set("type", emscripten::val("float32"));
       break;
     case ONNX_NAMESPACE::TensorProto_DataType_INT32:
       view = emscripten::val{emscripten::typed_memory_view(size / sizeof(int32_t),
                                                            reinterpret_cast<const int32_t*>(dest))};
-      desc.set("type", emscripten::val("int32"));
       break;
     case ONNX_NAMESPACE::TensorProto_DataType_INT64:
       view = emscripten::val{emscripten::typed_memory_view(size / sizeof(int64_t),
                                                            reinterpret_cast<const int64_t*>(dest))};
-      desc.set("type", emscripten::val("int64"));
       break;
     case ONNX_NAMESPACE::TensorProto_DataType_UINT32:
       view = emscripten::val{emscripten::typed_memory_view(size / sizeof(uint32_t),
                                                            reinterpret_cast<const uint32_t*>(dest))};
-      desc.set("type", emscripten::val("uint32"));
       break;
     case ONNX_NAMESPACE::TensorProto_DataType_UINT64:
       view = emscripten::val{emscripten::typed_memory_view(size / sizeof(uint64_t),
                                                            reinterpret_cast<const uint64_t*>(dest))};
-      desc.set("type", emscripten::val("uint64"));
       break;
     default:
       break;
diff --git a/onnxruntime/core/providers/webnn/builders/op_builder_factory.cc b/onnxruntime/core/providers/webnn/builders/op_builder_factory.cc
index 65dc8ddbeaf90..463317a4dafda 100644
--- a/onnxruntime/core/providers/webnn/builders/op_builder_factory.cc
+++ b/onnxruntime/core/providers/webnn/builders/op_builder_factory.cc
@@ -99,7 +99,9 @@ static OpBuilderRegistrations CreateOpBuilderRegistrations() {
   {  // Logical
     CreateLogicalOpBuilder("Equal", op_registrations);
     CreateLogicalOpBuilder("Greater", op_registrations);
+    CreateLogicalOpBuilder("GreaterOrEqual", op_registrations);
     CreateLogicalOpBuilder("Less", op_registrations);
+    CreateLogicalOpBuilder("LessOrEqual", op_registrations);
   }
 
   {  // Max/Min
diff --git a/onnxruntime/core/providers/webnn/webnn_execution_provider.cc b/onnxruntime/core/providers/webnn/webnn_execution_provider.cc
index 26c739e9a1ce1..4da54aaad3a33 100644
--- a/onnxruntime/core/providers/webnn/webnn_execution_provider.cc
+++ b/onnxruntime/core/providers/webnn/webnn_execution_provider.cc
@@ -17,8 +17,8 @@
 
 namespace onnxruntime {
 
-WebNNExecutionProvider::WebNNExecutionProvider(
-    const std::string& webnn_device_flags, const std::string& webnn_power_flags)
+WebNNExecutionProvider::WebNNExecutionProvider(const std::string& webnn_device_flags,
+                                               const std::string& webnn_threads_number, const std::string& webnn_power_flags)
     : IExecutionProvider{onnxruntime::kWebNNExecutionProvider, true} {
   // Create WebNN context and graph builder.
   const emscripten::val ml = emscripten::val::global("navigator")["ml"];
@@ -26,15 +26,15 @@ WebNNExecutionProvider::WebNNExecutionProvider(
     ORT_THROW("Failed to get ml from navigator.");
   }
   emscripten::val context_options = emscripten::val::object();
-  // Currently WebNN implementation in Chromium temporarily reuses the MLContextOptions
-  // defined in Model Loader API, which uses MLDevicePreference instead of MLDeviceType
-  // defined in WebNN. Because there's an ongoing spec discussion to simplify this API at
-  // https://github.com/webmachinelearning/webnn/issues/302.
-  context_options.set("devicePreference", emscripten::val(webnn_device_flags));
+  context_options.set("deviceType", emscripten::val(webnn_device_flags));
   // WebNN EP uses NHWC layout for CPU XNNPACK backend and NCHW for GPU DML backend.
   if (webnn_device_flags.compare("cpu") == 0) {
     preferred_layout_ = DataLayout::NHWC;
     wnn_device_type_ = webnn::WebnnDeviceType::CPU;
+    // Set "numThreads" if it's not default 0.
+    if (webnn_threads_number.compare("0") != 0) {
+      context_options.set("numThreads", stoi(webnn_threads_number));
+    }
   } else {
     preferred_layout_ = DataLayout::NCHW;
     wnn_device_type_ = webnn::WebnnDeviceType::GPU;
diff --git a/onnxruntime/core/providers/webnn/webnn_execution_provider.h b/onnxruntime/core/providers/webnn/webnn_execution_provider.h
index f8d9a1c33f6c8..13a475327dc0c 100644
--- a/onnxruntime/core/providers/webnn/webnn_execution_provider.h
+++ b/onnxruntime/core/providers/webnn/webnn_execution_provider.h
@@ -18,7 +18,8 @@ class Model;
 
 class WebNNExecutionProvider : public IExecutionProvider {
  public:
-  WebNNExecutionProvider(const std::string& webnn_device_flags, const std::string& webnn_power_flags);
+  WebNNExecutionProvider(const std::string& webnn_device_flags, const std::string& webnn_threads_number,
+                         const std::string& webnn_power_flags);
   virtual ~WebNNExecutionProvider();
 
   std::vector<std::unique_ptr<ComputeCapability>>
diff --git a/onnxruntime/core/providers/webnn/webnn_provider_factory.cc b/onnxruntime/core/providers/webnn/webnn_provider_factory.cc
index 4d6b04c8e76d8..11acec8b1f354 100644
--- a/onnxruntime/core/providers/webnn/webnn_provider_factory.cc
+++ b/onnxruntime/core/providers/webnn/webnn_provider_factory.cc
@@ -10,23 +10,26 @@ using namespace onnxruntime;
 
 namespace onnxruntime {
 struct WebNNProviderFactory : IExecutionProviderFactory {
-  WebNNProviderFactory(const std::string& webnn_device_flags, const std::string& webnn_power_flags)
-      : webnn_device_flags_(webnn_device_flags), webnn_power_flags_(webnn_power_flags) {}
+  WebNNProviderFactory(const std::string& webnn_device_flags, const std::string& webnn_threads_number,
+                       const std::string& webnn_power_flags)
+      : webnn_device_flags_(webnn_device_flags), webnn_threads_number_(webnn_threads_number), webnn_power_flags_(webnn_power_flags) {}
   ~WebNNProviderFactory() override {}
 
   std::unique_ptr<IExecutionProvider> CreateProvider() override;
 
   std::string webnn_device_flags_;
+  std::string webnn_threads_number_;
   std::string webnn_power_flags_;
 };
 
 std::unique_ptr<IExecutionProvider> WebNNProviderFactory::CreateProvider() {
-  return std::make_unique<WebNNExecutionProvider>(webnn_device_flags_, webnn_power_flags_);
+  return std::make_unique<WebNNExecutionProvider>(webnn_device_flags_, webnn_threads_number_, webnn_power_flags_);
 }
 
 std::shared_ptr<IExecutionProviderFactory> WebNNProviderFactoryCreator::Create(
     const ProviderOptions& provider_options) {
   return std::make_shared<onnxruntime::WebNNProviderFactory>(provider_options.at("deviceType"),
+                                                             provider_options.at("numThreads"),
                                                              provider_options.at("powerPreference"));
 }
 
diff --git a/onnxruntime/core/providers/xnnpack/detail/node_support_checker.cc b/onnxruntime/core/providers/xnnpack/detail/node_support_checker.cc
index ea5c75a955cc4..8e7e228f974e6 100644
--- a/onnxruntime/core/providers/xnnpack/detail/node_support_checker.cc
+++ b/onnxruntime/core/providers/xnnpack/detail/node_support_checker.cc
@@ -7,22 +7,22 @@
 
 #include "core/common/common.h"
 #include "core/framework/op_node_proto_helper.h"
-#include "core/graph/graph_viewer.h"
 #include "core/graph/graph_utils.h"
+#include "core/graph/graph_viewer.h"
 #include "core/providers/common.h"
 #include "core/providers/cpu/nn/pool_attributes.h"
-#include "core/providers/xnnpack/detail/utils.h"
 #include "core/providers/shared/node_unit/node_unit.h"
+#include "core/providers/xnnpack/detail/utils.h"
 
 // each operator provides a helper to check if supported
-#include "core/providers/xnnpack/nn/conv.h"
-#include "core/providers/xnnpack/nn/conv_transpose.h"
-#include "core/providers/xnnpack/nn/max_pool.h"
 #include "core/providers/xnnpack/math/gemm.h"
 #include "core/providers/xnnpack/math/matmul.h"
+#include "core/providers/xnnpack/math/softmax.h"
 #include "core/providers/xnnpack/nn/average_pool.h"
-#include "core/providers/xnnpack/nn/resize.h"
-#include "core/providers/xnnpack/nn/softmax.h"
+#include "core/providers/xnnpack/nn/conv.h"
+#include "core/providers/xnnpack/nn/conv_transpose.h"
+#include "core/providers/xnnpack/nn/max_pool.h"
+#include "core/providers/xnnpack/tensor/resize.h"
 
 namespace onnxruntime {
 namespace xnnpack {
diff --git a/onnxruntime/core/providers/xnnpack/detail/utils.cc b/onnxruntime/core/providers/xnnpack/detail/utils.cc
index baca4eef537d7..1a32612981120 100644
--- a/onnxruntime/core/providers/xnnpack/detail/utils.cc
+++ b/onnxruntime/core/providers/xnnpack/detail/utils.cc
@@ -25,7 +25,7 @@ const char* OpTypeToString(OpComputeType opCtype) {
     case op_compute_type_fp16:
       return "fp16";
     case op_compute_type_qs8_per_channel:
-      return "qc8";
+      return "qs8_qc8w";
     case op_compute_type_qs8:
       return "qs8";
     case op_compute_type_qu8:
diff --git a/onnxruntime/core/providers/xnnpack/math/gemm.cc b/onnxruntime/core/providers/xnnpack/math/gemm.cc
index 24c233e2415ca..f7b736b0ff903 100644
--- a/onnxruntime/core/providers/xnnpack/math/gemm.cc
+++ b/onnxruntime/core/providers/xnnpack/math/gemm.cc
@@ -78,7 +78,7 @@ bool Gemm::IsOnnxNodeSupported(const NodeUnit& node_unit, const GraphViewer& gra
   return supported;
 }
 
-Gemm::Gemm(const OpKernelInfo& info) : GemmBase(info), XnnpackKernel(info) {
+Gemm::Gemm(const OpKernelInfo& info) : GemmBase(info), XnnpackKernel(info, /*enable_caches*/ true) {
   const auto& node{Node()};
 
   info.GetAttrOrDefault<float>("alpha", &alpha_, 1.f);
@@ -146,14 +146,9 @@ Status Gemm::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr,
       trans_B_ == CblasNoTrans ? B_->Shape()[1] : B_->Shape()[0],  // size_t output_stride,
       B_->Data<float>(),                                           // const float* kernel,
       bias_Data,                                                   // const float* bias,
-      output_min,
-      output_max,
+      output_min, output_max,
       flags,
-#ifdef XNN_CACHE_ENABLE
-      &xnn_caches_,
-#else
-      0,
-#endif
+      GetCodeCache(), GetWeightsCache(),
       &p);
 
   if (status != xnn_status_success) {
@@ -165,20 +160,25 @@ Status Gemm::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr,
 }
 
 Status Gemm::Compute(OpKernelContext* context) const {
-  pthreadpool_t t_pool = GetThreadPool();
+  pthreadpool_t threadpool = GetThreadPool();
   const auto* A = context->Input<Tensor>(0);
   auto Y = context->Output(0, {M_, N_});
 
   // if input is empty tensor, return as nothing need to be calculated and we've set the shape for the output
-  if (M_ == 0 || N_ == 0)
+  if (M_ == 0 || N_ == 0) {
     return Status::OK();
+  }
+
+  xnn_status status = xnn_reshape_fully_connected_nc_f32(op0_.get(),
+                                                         // Number of rows to multiply
+                                                         trans_A_ == CblasNoTrans ? M_ : K_,
+                                                         threadpool);
+
+  if (status != xnn_status_success) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "xnn_reshape_fully_connected_nc_f32 returned ", status);
+  }
 
-  xnn_status status = xnn_setup_fully_connected_nc_f32(
-      op0_.get(),
-      trans_A_ == CblasNoTrans ? M_ : K_,  // Number of rows to multiply
-      A->Data<float>(),
-      Y->MutableData<float>(),
-      t_pool);
+  status = xnn_setup_fully_connected_nc_f32(op0_.get(), A->Data<float>(), Y->MutableData<float>());
 
   if (status != xnn_status_success) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "xnn_setup_fully_connected_nc_f32 returned ", status);
@@ -192,7 +192,15 @@ Status Gemm::Compute(OpKernelContext* context) const {
   return Status::OK();
 }
 
-ONNX_OPERATOR_VERSIONED_KERNEL_EX(Gemm, kOnnxDomain, 7, 12, kXnnpackExecutionProvider,
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(Gemm, kOnnxDomain, 7, 8, kXnnpackExecutionProvider,
+                                  KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
+                                  Gemm);
+
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(Gemm, kOnnxDomain, 9, 10, kXnnpackExecutionProvider,
+                                  KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
+                                  Gemm);
+
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(Gemm, kOnnxDomain, 11, 12, kXnnpackExecutionProvider,
                                   KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
                                   Gemm);
 
diff --git a/onnxruntime/core/providers/xnnpack/math/gemm.h b/onnxruntime/core/providers/xnnpack/math/gemm.h
index 9191ba204bc25..6d11a8531c20f 100644
--- a/onnxruntime/core/providers/xnnpack/math/gemm.h
+++ b/onnxruntime/core/providers/xnnpack/math/gemm.h
@@ -41,14 +41,6 @@ class Gemm : protected GemmBase, public XnnpackKernel {
 
   float alpha_;
   float beta_;
-
-#ifdef XNN_CACHE_ENABLE
-#if XNN_PLATFORM_JIT
-  xnn_code_cache code_cache_;
-#endif
-  xnn_caches xnn_caches_ = {0, 0};
-  xnn_weights_cache weights_cache_;
-#endif
 };
 
 }  // namespace xnnpack
diff --git a/onnxruntime/core/providers/xnnpack/math/matmul.cc b/onnxruntime/core/providers/xnnpack/math/matmul.cc
index fc7335c79b603..e90aa11c9d087 100644
--- a/onnxruntime/core/providers/xnnpack/math/matmul.cc
+++ b/onnxruntime/core/providers/xnnpack/math/matmul.cc
@@ -62,7 +62,7 @@ bool MatMul::IsOnnxNodeSupported(const NodeUnit& node_unit, const GraphViewer& g
   return supported;
 }
 
-MatMul::MatMul(const OpKernelInfo& info) : XnnpackKernel(info) {}
+MatMul::MatMul(const OpKernelInfo& info) : XnnpackKernel(info, /*enable_caches*/ true) {}
 
 Status MatMul::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr alloc,
                        /*out*/ bool& is_packed,
@@ -99,9 +99,11 @@ Status MatMul::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr alloc,
       output_max,
       flags,
 #ifdef XNN_CACHE_ENABLE
-      &xnn_caches_,
+      GetCodeCache(),
+      GetWeightsCache(),
 #else
-      0,
+      nullptr,
+      nullptr,
 #endif
       &p);
 
@@ -116,7 +118,7 @@ Status MatMul::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr alloc,
 
 Status MatMul::Compute(OpKernelContext* ctx) const {
   const Tensor* a = ctx->Input<Tensor>(0);
-  pthreadpool_t t_pool = GetThreadPool();
+  pthreadpool_t threadpool = GetThreadPool();
   MatMulComputeHelper helper;
   ORT_RETURN_IF_ERROR(helper.Compute(a->Shape(), b_shape_));
   Tensor* y = ctx->Output(0, helper.OutputShape());
@@ -126,13 +128,12 @@ Status MatMul::Compute(OpKernelContext* ctx) const {
 
   auto* y_data = y->MutableData<float>();
 
-  xnn_status status = xnn_setup_fully_connected_nc_f32(
-      op0_.get(),
-      a->Shape()[0],
-      a->Data<float>(),
-      y_data,
-      t_pool);
+  xnn_status status = xnn_reshape_fully_connected_nc_f32(op0_.get(), a->Shape()[0], threadpool);
+  if (status != xnn_status_success) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "xnn_reshape_fully_connected_nc_f32 returned ", status);
+  }
 
+  status = xnn_setup_fully_connected_nc_f32(op0_.get(), a->Data<float>(), y_data);
   if (status != xnn_status_success) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "xnn_setup_fully_connected_nc_f32 returned ", status);
   }
@@ -144,7 +145,11 @@ Status MatMul::Compute(OpKernelContext* ctx) const {
   return Status::OK();
 }
 
-ONNX_OPERATOR_VERSIONED_KERNEL_EX(MatMul, kOnnxDomain, 1, 12, kXnnpackExecutionProvider,
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(MatMul, kOnnxDomain, 1, 8, kXnnpackExecutionProvider,
+                                  KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
+                                  MatMul);
+
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(MatMul, kOnnxDomain, 9, 12, kXnnpackExecutionProvider,
                                   KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
                                   MatMul);
 
diff --git a/onnxruntime/core/providers/xnnpack/math/matmul.h b/onnxruntime/core/providers/xnnpack/math/matmul.h
index f4ed92c6146fb..b76e42c4d3729 100644
--- a/onnxruntime/core/providers/xnnpack/math/matmul.h
+++ b/onnxruntime/core/providers/xnnpack/math/matmul.h
@@ -32,14 +32,6 @@ class MatMul : public XnnpackKernel {
   AllocatorPtr myAlloc;
 
   XnnpackOperator op0_ = nullptr;
-
-#ifdef XNN_CACHE_ENABLE
-#if XNN_PLATFORM_JIT
-  xnn_code_cache code_cache_;
-#endif
-  xnn_caches xnn_caches_ = {0, 0};
-  xnn_weights_cache weights_cache_;
-#endif
 };
 
 }  // namespace xnnpack
diff --git a/onnxruntime/core/providers/xnnpack/nn/softmax.cc b/onnxruntime/core/providers/xnnpack/math/softmax.cc
similarity index 80%
rename from onnxruntime/core/providers/xnnpack/nn/softmax.cc
rename to onnxruntime/core/providers/xnnpack/math/softmax.cc
index bca84317ad891..87440b7814176 100644
--- a/onnxruntime/core/providers/xnnpack/nn/softmax.cc
+++ b/onnxruntime/core/providers/xnnpack/math/softmax.cc
@@ -1,7 +1,7 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-#include "core/providers/xnnpack/nn/softmax.h"
+#include "core/providers/xnnpack/math/softmax.h"
 
 #include <utility>
 
@@ -25,6 +25,7 @@ bool IsQuantSoftmaxSupported(const NodeUnit& node_unit, const GraphViewer& graph
         output_type != TensorTypeUint8) {
       break;
     }
+
     // to ensure its output scale and zp are 1/256 and 0, otherwise xnnpack EP has to do extra requantization
     // idealy, QlinearSoftmax or QDQSoftmax will keep this output scale and zp, but we have to handle some
     // qdq models converted from other framework
@@ -33,6 +34,7 @@ bool IsQuantSoftmaxSupported(const NodeUnit& node_unit, const GraphViewer& graph
     if (fabs(q_scale.DataAsSpan<float>()[0] - 1.0f / 256.0f) > 0.0001f) {
       break;
     }
+
     if (zero_tensor) {
       Initializer q_zp(*zero_tensor, node_unit.ModelPath());
       if (q_zp.DataAsSpan<uint8_t>()[0] != 0) {
@@ -57,6 +59,7 @@ bool Softmax::IsOnnxNodeSupported(const NodeUnit& node_unit,
       IsQuantSoftmaxSupported(node_unit, graph) == false) {
     return false;
   }
+
   // use do {} while(false) so it's easier to set a breakpoint on the return
   do {
     // SoftMax has 1 input.
@@ -133,6 +136,7 @@ Softmax::Softmax(const OpKernelInfo& info) : XnnpackKernel{info} {
     ORT_ENFORCE(status.IsOK(), "opset must be existed in attributes of QlinearSoftmax");
     opset_ = gsl::narrow_cast<int>(opset);
   }
+
   int64_t axis = -1;
   Status status = info.GetAttr<int64_t>("axis", &axis);
   // our op checker function has ensured that axis must be the last dim
@@ -162,23 +166,22 @@ Softmax::Softmax(const OpKernelInfo& info) : XnnpackKernel{info} {
   if (op_type_ == OpComputeType::op_compute_type_qu8) {
     // the order of input tensor, x,x_scale, x_zp, y_scale, y_zp
     OpQuantParam quant_param = ParseQuantParamForOp(info, x_dtype, 1);
-    xstatus = xnn_create_softmax_nc_qu8(
-        channels,
-        channels,
-        channels,
-        quant_param[0].first[0],  // x_scale
-        quant_param[1].second,    // y_zp
-        quant_param[1].first[0],  // y_scale
-        0,                        // flags,
-        &p);
+    xstatus = xnn_create_softmax_nc_qu8(channels,
+                                        channels,
+                                        channels,
+                                        quant_param[0].first[0],  // x_scale
+                                        quant_param[1].second,    // y_zp
+                                        quant_param[1].first[0],  // y_scale
+                                        0,                        // flags,
+                                        &p);
   } else if (op_type_ == OpComputeType::op_compute_type_fp32) {
-    xstatus = xnn_create_softmax_nc_f32(
-        channels,
-        channels,
-        channels,
-        0,  // flags,
-        &p);
+    xstatus = xnn_create_softmax_nc_f32(channels,
+                                        channels,
+                                        channels,
+                                        0,  // flags,
+                                        &p);
   }
+
   ORT_ENFORCE(xstatus == xnn_status_success, "xnn_create_softmax_nc_",
               OpTypeToString(op_type_), " failed. Status:", xstatus);
   op0_.reset(p);
@@ -194,39 +197,48 @@ Status Softmax::Compute(OpKernelContext* ctx) const {
   if (X_shape.Size() == 0) {
     return Status::OK();
   }
-  pthreadpool_t t_pool = GetThreadPool();
+
+  pthreadpool_t threadpool = GetThreadPool();
   const size_t N = X_shape.SizeToDimension(axis_);
   // const size_t D = X_shape.SizeFromDimension(axis_); // the step D is 1
   xnn_status status = xnn_status_invalid_state;
+
+  auto reshape_fn = op_type_ == OpComputeType::op_compute_type_qu8 ? xnn_reshape_softmax_nc_qu8
+                                                                   : xnn_reshape_softmax_nc_f32;
+  status = reshape_fn(op0_.get(), N, threadpool);
+
+  if (status != xnn_status_success) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "xnn_reshape_softmax_nc_", OpTypeToString(op_type_),
+                           " returned ", status);
+  }
+
   if (op_type_ == OpComputeType::op_compute_type_qu8) {
-    status = xnn_setup_softmax_nc_qu8(
-        op0_.get(),
-        N,
-        X->Data<uint8_t>(),
-        Y->MutableData<uint8_t>(),
-        t_pool);
+    status = xnn_setup_softmax_nc_qu8(op0_.get(), X->Data<uint8_t>(), Y->MutableData<uint8_t>());
   } else {
-    status = xnn_setup_softmax_nc_f32(
-        op0_.get(),
-        N,
-        X->Data<float>(),
-        Y->MutableData<float>(),
-        t_pool);
+    status = xnn_setup_softmax_nc_f32(op0_.get(), X->Data<float>(), Y->MutableData<float>());
   }
+
   if (status != xnn_status_success) {
-    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "xnn_setup_softmax_nc_",
-                           OpTypeToString(op_type_), " returned ", status);
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "xnn_setup_softmax_nc_", OpTypeToString(op_type_),
+                           " returned ", status);
   }
-  status = xnn_run_operator(op0_.get(), t_pool);
+
+  status = xnn_run_operator(op0_.get(), threadpool);
   if (status != xnn_status_success) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "xnn_run_operator returned ", status);
   }
+
   return Status::OK();
 }
 
-ONNX_OPERATOR_VERSIONED_KERNEL_EX(Softmax, kOnnxDomain, 1, 12, kXnnpackExecutionProvider,
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(Softmax, kOnnxDomain, 1, 10, kXnnpackExecutionProvider,
                                   KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
                                   Softmax);
+
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(Softmax, kOnnxDomain, 11, 12, kXnnpackExecutionProvider,
+                                  KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
+                                  Softmax);
+
 ONNX_OPERATOR_KERNEL_EX(Softmax, kOnnxDomain, 13, kXnnpackExecutionProvider,
                         KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
                         Softmax);
diff --git a/onnxruntime/core/providers/xnnpack/nn/softmax.h b/onnxruntime/core/providers/xnnpack/math/softmax.h
similarity index 100%
rename from onnxruntime/core/providers/xnnpack/nn/softmax.h
rename to onnxruntime/core/providers/xnnpack/math/softmax.h
diff --git a/onnxruntime/core/providers/xnnpack/nn/average_pool.cc b/onnxruntime/core/providers/xnnpack/nn/average_pool.cc
index 767218fbfd20b..58c209a13cd0c 100644
--- a/onnxruntime/core/providers/xnnpack/nn/average_pool.cc
+++ b/onnxruntime/core/providers/xnnpack/nn/average_pool.cc
@@ -2,10 +2,13 @@
 // Licensed under the MIT License.
 #include "core/providers/xnnpack/nn/average_pool.h"
 
+#include <memory>
+
 #include "core/common/status.h"
 #include "core/graph/graph.h"
 #include "core/providers/utils.h"
 #include "core/framework/tensorprotoutils.h"
+#include "core/providers/xnnpack/xnnpack_init.h"
 #include "core/providers/xnnpack/detail/utils.h"
 
 namespace onnxruntime {
@@ -90,6 +93,10 @@ bool AveragePool::IsOnnxNodeSupported(const NodeUnit& node_unit,
   const auto& inputs = node_unit.Inputs();
   // use do {} while(false) so it's easier to set a breakpoint on the return
   do {
+    if (node_unit.SinceVersion() < 7) {
+      break;
+    }
+
     // AveragePool has 1 input.
     const auto& x_arg = inputs[0].node_arg;
 
@@ -141,6 +148,11 @@ bool AveragePool::IsOnnxNodeSupported(const NodeUnit& node_unit,
       break;
     }
 
+    // need dilations to all be 1
+    if (!pool_attrs.default_dilations) {
+      break;
+    }
+
     supported = true;
   } while (false);
 
@@ -221,24 +233,47 @@ Status AveragePool::Compute(OpKernelContext* context) const {
     return Status::OK();
   }
 
-  pthreadpool_t t_pool = GetThreadPool();
-  xnn_status status = xnn_status_invalid_state;
+  pthreadpool_t threadpool = GetThreadPool();
+
+  // setup allocator/automated dellocate for workspace
+  size_t workspace_size = 0;
+  size_t workspace_alignment = 0;
+  xnn_allocator* allocator = GetStoredAllocator().second;
+  auto deallocator = [allocator](void* ptr) { allocator->aligned_deallocate(allocator->context, ptr); };
+
+  std::unique_ptr<void, decltype(deallocator)> workspace(nullptr, deallocator);
+
+  auto reshape_fn = (avgpool_type_ == OpComputeType::op_compute_type_fp32)
+                        ? xnn_reshape_average_pooling2d_nhwc_f32
+                        : xnn_reshape_average_pooling2d_nhwc_qu8;
+
+  auto status = reshape_fn(op0_.get(), N, H, W,
+                           &workspace_size, &workspace_alignment,
+                           /*output_height_out=*/nullptr, /*output_width_out=*/nullptr,
+                           threadpool);
+
+  if (status != xnn_status_success) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "xnn_reshape_average_pooling2d_nhwc_", OpTypeToString(avgpool_type_),
+                           " returned ", status);
+  }
+
+  workspace.reset(allocator->aligned_allocate(allocator->context, XNN_ALLOCATION_ALIGNMENT, workspace_size));
+
   if (avgpool_type_ == OpComputeType::op_compute_type_fp32) {
-    status = xnn_setup_average_pooling2d_nhwc_f32(op0_.get(), N, H, W,
-                                                  X.Data<float>(), Y.MutableData<float>(),
-                                                  t_pool /*threadpool */);
+    status = xnn_setup_average_pooling2d_nhwc_f32(op0_.get(), workspace.get(),
+                                                  X.Data<float>(), Y.MutableData<float>());
+
   } else if (avgpool_type_ == OpComputeType::op_compute_type_qu8) {
-    status = xnn_setup_average_pooling2d_nhwc_qu8(op0_.get(), N, H, W,
-                                                  X.Data<uint8_t>(), Y.MutableData<uint8_t>(),
-                                                  t_pool /*threadpool */);
+    status = xnn_setup_average_pooling2d_nhwc_qu8(op0_.get(), workspace.get(),
+                                                  X.Data<uint8_t>(), Y.MutableData<uint8_t>());
   }
 
   if (status != xnn_status_success) {
-    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "xnn_setup_average_pooling2d_nhwc_",
-                           OpTypeToString(avgpool_type_), " returned ", status);
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "xnn_setup_average_pooling2d_nhwc_", OpTypeToString(avgpool_type_),
+                           " returned ", status);
   }
 
-  status = xnn_run_operator(op0_.get(), t_pool);
+  status = xnn_run_operator(op0_.get(), threadpool);
   if (status != xnn_status_success) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "xnn_run_operator returned ", status);
   }
@@ -246,8 +281,26 @@ Status AveragePool::Compute(OpKernelContext* context) const {
   return Status::OK();
 }
 
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(
+    AveragePool, kMSInternalNHWCDomain, 7, 9,
+    kXnnpackExecutionProvider,
+    KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
+    AveragePool);
+
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(
+    AveragePool, kMSInternalNHWCDomain, 10, 10,
+    kXnnpackExecutionProvider,
+    KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
+    AveragePool);
+
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(
+    AveragePool, kMSInternalNHWCDomain, 11, 18,
+    kXnnpackExecutionProvider,
+    KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
+    AveragePool);
+
 ONNX_OPERATOR_KERNEL_EX(
-    AveragePool, kMSInternalNHWCDomain, 11,
+    AveragePool, kMSInternalNHWCDomain, 19,
     kXnnpackExecutionProvider,
     KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
     AveragePool);
diff --git a/onnxruntime/core/providers/xnnpack/nn/conv.cc b/onnxruntime/core/providers/xnnpack/nn/conv.cc
index 0772dec59e30e..0cdb9c840aa2d 100644
--- a/onnxruntime/core/providers/xnnpack/nn/conv.cc
+++ b/onnxruntime/core/providers/xnnpack/nn/conv.cc
@@ -3,12 +3,13 @@
 
 #include "conv.h"
 
+#include "core/common/gsl.h"
 #include "core/common/inlined_containers_fwd.h"
+#include "core/framework/tensorprotoutils.h"
 #include "core/framework/transpose_helper.h"
 #include "core/providers/utils.h"
+#include "core/providers/xnnpack/xnnpack_init.h"
 #include "core/providers/xnnpack/detail/utils.h"
-#include "core/framework/tensorprotoutils.h"
-#include "core/common/gsl.h"
 
 namespace onnxruntime {
 namespace xnnpack {
@@ -64,21 +65,48 @@ Status Conv::Compute(OpKernelContext* context) const {
   if (Y->Shape().Size() == 0) {
     return Status::OK();
   }
-  pthreadpool_t t_pool = GetThreadPool();
 
-  xnn_status status = xnn_status_invalid_state;
+  pthreadpool_t threadpool = GetThreadPool();
+
+  // setup allocator/automated dellocate for workspace
+  size_t workspace_size = 0;
+  size_t workspace_alignment = 0;
+  xnn_allocator* allocator = GetStoredAllocator().second;
+  auto deallocator = [allocator](void* ptr) { allocator->aligned_deallocate(allocator->context, ptr); };
+  std::unique_ptr<void, decltype(deallocator)> workspace(nullptr, deallocator);
+
+  auto reshape_fn = xnn_reshape_convolution2d_nhwc_f32;
+  if (conv_type_ == OpComputeType::op_compute_type_qs8) {
+    reshape_fn = xnn_reshape_convolution2d_nhwc_qs8;
+  } else if (conv_type_ == OpComputeType::op_compute_type_qu8) {
+    reshape_fn = xnn_reshape_convolution2d_nhwc_qu8;
+  } else if (conv_type_ == OpComputeType::op_compute_type_qs8_per_channel) {
+    reshape_fn = xnn_reshape_convolution2d_nhwc_qs8_qc8w;
+  }
+
+  auto status = reshape_fn(op0_.get(), N, H, W,
+                           &workspace_size, &workspace_alignment,
+                           /*output_height_out=*/nullptr, /*output_width_out=*/nullptr,
+                           threadpool);
+  if (status != xnn_status_success) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "xnn_reshape_convolution2d_nhwc_", OpTypeToString(conv_type_),
+                           "returned ", status);
+  }
+
+  workspace.reset(allocator->aligned_allocate(allocator->context, XNN_ALLOCATION_ALIGNMENT, workspace_size));
+
   if (conv_type_ == OpComputeType::op_compute_type_fp32) {
-    status = xnn_setup_convolution2d_nhwc_f32(op0_.get(), N, H, W, X.Data<float>(), Y->MutableData<float>(),
-                                              t_pool /*threadpool*/);
+    status = xnn_setup_convolution2d_nhwc_f32(op0_.get(), workspace.get(), X.Data<float>(),
+                                              Y->MutableData<float>());
   } else if (conv_type_ == OpComputeType::op_compute_type_qs8) {
-    status = xnn_setup_convolution2d_nhwc_qs8(op0_.get(), N, H, W, X.Data<int8_t>(), Y->MutableData<int8_t>(),
-                                              t_pool /*threadpool*/);
+    status = xnn_setup_convolution2d_nhwc_qs8(op0_.get(), workspace.get(), X.Data<int8_t>(),
+                                              Y->MutableData<int8_t>());
   } else if (conv_type_ == OpComputeType::op_compute_type_qu8) {
-    status = xnn_setup_convolution2d_nhwc_qu8(op0_.get(), N, H, W, X.Data<uint8_t>(), Y->MutableData<uint8_t>(),
-                                              t_pool /*threadpool*/);
+    status = xnn_setup_convolution2d_nhwc_qu8(op0_.get(), workspace.get(), X.Data<uint8_t>(),
+                                              Y->MutableData<uint8_t>());
   } else if (conv_type_ == OpComputeType::op_compute_type_qs8_per_channel) {
-    status = xnn_setup_convolution2d_nhwc_qc8(op0_.get(), N, H, W, X.Data<int8_t>(), Y->MutableData<int8_t>(),
-                                              t_pool /*threadpool*/);
+    status = xnn_setup_convolution2d_nhwc_qs8_qc8w(op0_.get(), workspace.get(), X.Data<int8_t>(),
+                                                   Y->MutableData<int8_t>());
   }
 
   if (status != xnn_status_success) {
@@ -86,7 +114,7 @@ Status Conv::Compute(OpKernelContext* context) const {
                            OpTypeToString(conv_type_), "returned ", status);
   }
 
-  status = xnn_run_operator(op0_.get(), t_pool);
+  status = xnn_run_operator(op0_.get(), threadpool);
   if (status != xnn_status_success) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "xnn_run_operator returned ", status);
   }
@@ -94,6 +122,10 @@ Status Conv::Compute(OpKernelContext* context) const {
   return Status::OK();
 }
 
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(Conv, kMSInternalNHWCDomain, 1, 10, kXnnpackExecutionProvider,
+                                  KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
+                                  Conv);
+
 ONNX_OPERATOR_KERNEL_EX(Conv, kMSInternalNHWCDomain, 11, kXnnpackExecutionProvider,
                         KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
                         Conv);
diff --git a/onnxruntime/core/providers/xnnpack/nn/conv_base.cc b/onnxruntime/core/providers/xnnpack/nn/conv_base.cc
index b692f373ff4ce..d21014569234e 100644
--- a/onnxruntime/core/providers/xnnpack/nn/conv_base.cc
+++ b/onnxruntime/core/providers/xnnpack/nn/conv_base.cc
@@ -23,7 +23,8 @@ Status CreateXnnpackKernel(const ConvAttributes* conv_attrs_ptr,
                            const std::optional<std::pair<float, float>>& clip_min_max,
                            const Tensor& Weight, const Tensor* Bias,
                            XnnpackOperator& op_uptr,
-                           xnn_caches_t caches_t,
+                           xnn_code_cache_t code_cache,
+                           xnn_weights_cache_t weights_cache,
                            const OpQuantParam& quant_param,
                            OpComputeType conv_type,
                            bool is_transpose = false) {
@@ -75,7 +76,7 @@ Status CreateXnnpackKernel(const ConvAttributes* conv_attrs_ptr,
         C, M,                                         // input channel stride, output channel stride
         Weight.Data<float>(), B_data,
         foutput_min, foutput_max, flags,
-        caches_t,
+        code_cache, weights_cache,
         &p);
   } else if (conv_type == OpComputeType::op_compute_type_qs8) {
     const float output_scale = quant_param[2].first[0];
@@ -99,7 +100,7 @@ Status CreateXnnpackKernel(const ConvAttributes* conv_attrs_ptr,
         quant_param[2].second, quant_param[2].first[0],
         output_min, output_max,
         flags,
-        caches_t,
+        code_cache, weights_cache,
         &p);
   } else if (conv_type == OpComputeType::op_compute_type_qs8_per_channel) {
     auto* B_data = Bias ? Bias->Data<int32_t>() : nullptr;
@@ -107,7 +108,7 @@ Status CreateXnnpackKernel(const ConvAttributes* conv_attrs_ptr,
     const int8_t output_zero_point = quant_param[2].second;
     const int8_t output_min = xnn_u8s8_quantize<int8_t>(foutput_min, output_scale, output_zero_point);
     const int8_t output_max = xnn_u8s8_quantize<int8_t>(foutput_max, output_scale, output_zero_point);
-    status = xnn_create_convolution2d_nhwc_qc8(
+    status = xnn_create_convolution2d_nhwc_qs8_qc8w(
         input_padding_top, input_padding_right, input_padding_bottom, input_padding_left,
         kernel_height, kernel_width,
         subsampling_height, subsampling_width,
@@ -123,7 +124,7 @@ Status CreateXnnpackKernel(const ConvAttributes* conv_attrs_ptr,
         quant_param[2].second, quant_param[2].first[0],
         output_min, output_max,
         flags,
-        caches_t,
+        code_cache, weights_cache,
         &p);
   } else if (conv_type == OpComputeType::op_compute_type_qu8) {
     const auto* B_data = Bias ? Bias->Data<int32_t>() : nullptr;
@@ -148,15 +149,17 @@ Status CreateXnnpackKernel(const ConvAttributes* conv_attrs_ptr,
         quant_param[2].second, quant_param[2].first[0],
         output_min, output_max,
         flags,
-        caches_t,
+        code_cache, weights_cache,
         &p);
   }
+
   if (status != xnn_status_success) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
                            "Failed to create xnnpack kernel. xnn_create_",
                            is_transpose ? "deconvolution2d" : "convolution2d", "_nhwc_",
                            OpTypeToString(conv_type), " returned ", status);
   }
+
   op_uptr.reset(p);
   return Status::OK();
 }
@@ -296,6 +299,11 @@ bool ConvBase::IsOnnxNodeSupported(const NodeUnit& node_unit, const GraphViewer&
   const onnxruntime::Node& node = node_unit.GetNode();
   // use do {} while(false) so it's easier to set a breakpoint on the return
   do {
+    // Internal NHWC domain starts at opset 11
+    if (node_unit.SinceVersion() < 11) {
+      break;
+    }
+
     // Conv has at least 2 inputs.
     const auto& inputs = node_unit.Inputs();
     const auto& x_arg = inputs[0].node_arg;
@@ -367,7 +375,7 @@ bool ConvBase::IsOnnxNodeSupported(const NodeUnit& node_unit, const GraphViewer&
 }
 
 ConvBase::ConvBase(const OpKernelInfo& info, bool is_transpose)
-    : XnnpackKernel(info),
+    : XnnpackKernel(info, /*enable_caches*/ true),
       conv_attrs_(info),
       conv_transpose_attrs_(info),
       convbase_attrs_ref_(is_transpose ? conv_transpose_attrs_ : conv_attrs_),
@@ -383,16 +391,7 @@ ConvBase::ConvBase(const OpKernelInfo& info, bool is_transpose)
       }
     }
   }
-  // xnnpack cache_code, unfortunately these definitions are only available in xnnpack/cache.h,
-#ifdef XNN_CACHE_ENABLE
-#if XNN_PLATFORM_JIT
-  xnn_init_code_cache(&code_cache_);
-  xnn_caches_.code_cache = &code_cache_;
-#endif
-  // TODO(Jicwen) enable weight-cache and code-cache
-  xnn_init_weights_cache(&weights_cache_);
-  xnn_caches_.weights_cache = &weights_cache_;
-#endif
+
   const auto& node{Node()};
   const auto& input_defs = node.InputDefs();
   const NodeArg& X = *input_defs[0];
@@ -477,11 +476,7 @@ ConvBase::ConvBase(const OpKernelInfo& info, bool is_transpose)
 Status ConvBase::CreateKernel() {
   auto ret = CreateXnnpackKernel(&convbase_attrs_ref_, C_, M_, kernel_shape_, clip_min_max_, packed_w_,
                                  B_, op0_,
-#ifdef XNN_CACHE_ENABLE
-                                 &xnn_caches_,
-#else
-                                 0,
-#endif
+                                 GetCodeCache(), GetWeightsCache(),
                                  quant_param_, conv_type_, is_transpose_);
   return ret;
 }
diff --git a/onnxruntime/core/providers/xnnpack/nn/conv_base.h b/onnxruntime/core/providers/xnnpack/nn/conv_base.h
index d3501a56ea24c..53ad51378c6be 100644
--- a/onnxruntime/core/providers/xnnpack/nn/conv_base.h
+++ b/onnxruntime/core/providers/xnnpack/nn/conv_base.h
@@ -39,14 +39,6 @@ class ConvBase : public XnnpackKernel {
   std::optional<std::pair<float, float>> clip_min_max_;
 
   XnnpackOperator op0_ = nullptr;
-  // we can't have the definition here because we can't import xnnpack/cache.h
-#ifdef XNN_CACHE_ENABLE
-#if XNN_PLATFORM_JIT
-  xnn_code_cache code_cache_;
-#endif
-  xnn_caches xnn_caches_ = {0, 0};
-  xnn_weights_cache weights_cache_;
-#endif
   OpQuantParam quant_param_;
   OpComputeType conv_type_ = OpComputeType::op_compute_type_invalid;
 };
diff --git a/onnxruntime/core/providers/xnnpack/nn/conv_transpose.cc b/onnxruntime/core/providers/xnnpack/nn/conv_transpose.cc
index 61d8f7f488547..8698c0739509d 100644
--- a/onnxruntime/core/providers/xnnpack/nn/conv_transpose.cc
+++ b/onnxruntime/core/providers/xnnpack/nn/conv_transpose.cc
@@ -81,29 +81,34 @@ Status ConvTranspose::Compute(OpKernelContext* context) const {
   if (Y->Shape().Size() == 0) {
     return Status::OK();
   }
-  pthreadpool_t t_pool = GetThreadPool();
+  pthreadpool_t threadpool = GetThreadPool();
 
   auto output_pad_0 = gsl::narrow_cast<uint32_t>(conv_transpose_attrs_.output_padding[0]);
   auto output_pad_1 = gsl::narrow_cast<uint32_t>(conv_transpose_attrs_.output_padding[1]);
   xnn_status status = xnn_status_invalid_state;
+
+  auto reshape_fn = xnn_reshape_deconvolution2d_nhwc_f32;
+  if (conv_type_ == OpComputeType::op_compute_type_qs8) {
+    reshape_fn = xnn_reshape_deconvolution2d_nhwc_qs8;
+  } else if (conv_type_ == OpComputeType::op_compute_type_qu8) {
+    reshape_fn = xnn_reshape_deconvolution2d_nhwc_qu8;
+  }
+
+  status = reshape_fn(op0_.get(), N, H, W, output_pad_0, output_pad_1,
+                      /*output_height_out=*/nullptr, /*output_width_out=*/nullptr,
+                      threadpool);
+
+  if (status != xnn_status_success) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "xnn_reshape_deconvolution2d_nhwc_",
+                           OpTypeToString(conv_type_), " returned ", status);
+  }
+
   if (conv_type_ == OpComputeType::op_compute_type_fp32) {
-    status = xnn_setup_deconvolution2d_nhwc_f32(
-        op0_.get(), N, H, W,
-        output_pad_0,
-        output_pad_1, X.Data<float>(), Y->MutableData<float>(),
-        t_pool /*threadpool*/);
+    status = xnn_setup_deconvolution2d_nhwc_f32(op0_.get(), X.Data<float>(), Y->MutableData<float>());
   } else if (conv_type_ == OpComputeType::op_compute_type_qs8) {
-    status = xnn_setup_deconvolution2d_nhwc_qs8(
-        op0_.get(), N, H, W,
-        output_pad_0,
-        output_pad_1, X.Data<int8_t>(), Y->MutableData<int8_t>(),
-        t_pool /*threadpool*/);
+    status = xnn_setup_deconvolution2d_nhwc_qs8(op0_.get(), X.Data<int8_t>(), Y->MutableData<int8_t>());
   } else if (conv_type_ == OpComputeType::op_compute_type_qu8) {
-    status = xnn_setup_deconvolution2d_nhwc_qu8(
-        op0_.get(), N, H, W,
-        output_pad_0,
-        output_pad_1, X.Data<uint8_t>(), Y->MutableData<uint8_t>(),
-        t_pool /*threadpool*/);
+    status = xnn_setup_deconvolution2d_nhwc_qu8(op0_.get(), X.Data<uint8_t>(), Y->MutableData<uint8_t>());
   }
 
   if (status != xnn_status_success) {
@@ -111,7 +116,7 @@ Status ConvTranspose::Compute(OpKernelContext* context) const {
                            OpTypeToString(conv_type_), " returned ", status);
   }
 
-  status = xnn_run_operator(op0_.get(), t_pool);
+  status = xnn_run_operator(op0_.get(), threadpool);
   if (status != xnn_status_success) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "xnn_run_operator returned ", status);
   }
diff --git a/onnxruntime/core/providers/xnnpack/nn/max_pool.cc b/onnxruntime/core/providers/xnnpack/nn/max_pool.cc
index de6dd68bba9c3..2ef9f97f77b14 100644
--- a/onnxruntime/core/providers/xnnpack/nn/max_pool.cc
+++ b/onnxruntime/core/providers/xnnpack/nn/max_pool.cc
@@ -41,6 +41,10 @@ bool MaxPool::IsOnnxNodeSupported(const NodeUnit& node_unit,
   const onnxruntime::Node& node = node_unit.GetNode();
   // use do {} while(false) so it's easier to set a breakpoint on the return
   do {
+    if (node_unit.SinceVersion() < 8) {
+      break;
+    }
+
     // MaxPool has 1 input.
     auto input_defs = node.InputDefs();
     const auto& x_arg = *input_defs[0];
@@ -220,20 +224,29 @@ Status MaxPool::Compute(OpKernelContext* context) const {
     return Status::OK();
   }
 
-  pthreadpool_t t_pool = GetThreadPool();
-  xnn_status status = xnn_status_invalid_state;
+  pthreadpool_t threadpool = GetThreadPool();
+
+  auto reshape_fn = xnn_reshape_max_pooling2d_nhwc_f32;
+  if (maxpool_type_ == OpComputeType::op_compute_type_qu8)
+    reshape_fn = xnn_reshape_max_pooling2d_nhwc_u8;
+  else if (maxpool_type_ == OpComputeType::op_compute_type_qs8) {
+    reshape_fn = xnn_reshape_max_pooling2d_nhwc_s8;
+  }
+
+  auto status = reshape_fn(op0_.get(), N, H, W,
+                           /*output_height_out=*/nullptr, /*output_width_out=*/nullptr,
+                           threadpool);
+  if (status != xnn_status_success) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "xnn_reshape_max_pooling2d_nhwc_",
+                           OpTypeToString(maxpool_type_), " returned ", status);
+  }
+
   if (maxpool_type_ == OpComputeType::op_compute_type_fp32) {
-    status = xnn_setup_max_pooling2d_nhwc_f32(op0_.get(), N, H, W,
-                                              X.Data<float>(), Y->MutableData<float>(),
-                                              t_pool /*threadpool */);
+    status = xnn_setup_max_pooling2d_nhwc_f32(op0_.get(), X.Data<float>(), Y->MutableData<float>());
   } else if (maxpool_type_ == OpComputeType::op_compute_type_qu8) {
-    status = xnn_setup_max_pooling2d_nhwc_u8(op0_.get(), N, H, W,
-                                             X.Data<uint8_t>(), Y->MutableData<uint8_t>(),
-                                             t_pool /*threadpool */);
+    status = xnn_setup_max_pooling2d_nhwc_u8(op0_.get(), X.Data<uint8_t>(), Y->MutableData<uint8_t>());
   } else {
-    status = xnn_setup_max_pooling2d_nhwc_s8(op0_.get(), N, H, W,
-                                             X.Data<int8_t>(), Y->MutableData<int8_t>(),
-                                             t_pool /*threadpool */);
+    status = xnn_setup_max_pooling2d_nhwc_s8(op0_.get(), X.Data<int8_t>(), Y->MutableData<int8_t>());
   }
 
   if (status != xnn_status_success) {
@@ -241,7 +254,7 @@ Status MaxPool::Compute(OpKernelContext* context) const {
                            OpTypeToString(maxpool_type_), " returned ", status);
   }
 
-  status = xnn_run_operator(op0_.get(), t_pool);
+  status = xnn_run_operator(op0_.get(), threadpool);
   if (status != xnn_status_success) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "xnn_run_operator returned ", status);
   }
@@ -249,12 +262,24 @@ Status MaxPool::Compute(OpKernelContext* context) const {
   return Status::OK();
 }
 
-ONNX_OPERATOR_VERSIONED_KERNEL_EX(
-    MaxPool, kMSInternalNHWCDomain, 11, 11, kXnnpackExecutionProvider,
-    KernelDefBuilder().TypeConstraint("T", {DataTypeImpl::GetTensorType<float>(),
-                                            DataTypeImpl::GetTensorType<uint8_t>(),
-                                            DataTypeImpl::GetTensorType<int8_t>()}),
-    MaxPool);
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(MaxPool, kMSInternalNHWCDomain, 8, 9, kXnnpackExecutionProvider,
+                                  KernelDefBuilder().TypeConstraint("T", {DataTypeImpl::GetTensorType<float>(),
+                                                                          DataTypeImpl::GetTensorType<uint8_t>(),
+                                                                          DataTypeImpl::GetTensorType<int8_t>()}),
+                                  MaxPool);
+
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(MaxPool, kMSInternalNHWCDomain, 10, 10, kXnnpackExecutionProvider,
+                                  KernelDefBuilder().TypeConstraint("T", {DataTypeImpl::GetTensorType<float>(),
+                                                                          DataTypeImpl::GetTensorType<uint8_t>(),
+                                                                          DataTypeImpl::GetTensorType<int8_t>()}),
+                                  MaxPool);
+
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(MaxPool, kMSInternalNHWCDomain, 11, 11, kXnnpackExecutionProvider,
+                                  KernelDefBuilder().TypeConstraint("T", {DataTypeImpl::GetTensorType<float>(),
+                                                                          DataTypeImpl::GetTensorType<uint8_t>(),
+                                                                          DataTypeImpl::GetTensorType<int8_t>()}),
+                                  MaxPool);
+
 ONNX_OPERATOR_KERNEL_EX(MaxPool, kMSInternalNHWCDomain, 12, kXnnpackExecutionProvider,
                         KernelDefBuilder()
                             .TypeConstraint("T", {DataTypeImpl::GetTensorType<float>(),
diff --git a/onnxruntime/core/providers/xnnpack/nn/resize.cc b/onnxruntime/core/providers/xnnpack/tensor/resize.cc
similarity index 65%
rename from onnxruntime/core/providers/xnnpack/nn/resize.cc
rename to onnxruntime/core/providers/xnnpack/tensor/resize.cc
index 672b2597279db..0c9e2e9fc17a2 100644
--- a/onnxruntime/core/providers/xnnpack/nn/resize.cc
+++ b/onnxruntime/core/providers/xnnpack/tensor/resize.cc
@@ -1,7 +1,7 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-#include "core/providers/xnnpack/nn/resize.h"
+#include "core/providers/xnnpack/tensor/resize.h"
 
 #include <algorithm>
 #include <utility>
@@ -10,6 +10,7 @@
 #include "core/common/inlined_containers_fwd.h"
 #include "core/framework/op_kernel.h"
 #include "core/optimizer/initializer.h"
+#include "core/providers/xnnpack/xnnpack_init.h"
 
 namespace onnxruntime {
 namespace xnnpack {
@@ -18,26 +19,67 @@ bool Resize::IsOnnxNodeSupported(const NodeUnit& node_unit,
                                  const GraphViewer& graph_viewer) {
   bool supported = false;
   do {
+    if (node_unit.SinceVersion() < 10) {
+      break;
+    }
+
     // Resize has 1-4 input.
     const auto& inputs = node_unit.Inputs();
     const auto& x_arg = inputs[0].node_arg;
 
     const auto* x_type = x_arg.TypeAsProto();
-    if (x_type == nullptr ||
-        (x_type->tensor_type().elem_type() != ONNX_NAMESPACE::TensorProto_DataType_FLOAT &&
-         x_type->tensor_type().elem_type() != ONNX_NAMESPACE::TensorProto_DataType_UINT8 &&
-         x_type->tensor_type().elem_type() != ONNX_NAMESPACE::TensorProto_DataType_INT8)) {
+    if (x_type == nullptr || (x_type->tensor_type().elem_type() != ONNX_NAMESPACE::TensorProto_DataType_FLOAT &&
+                              x_type->tensor_type().elem_type() != ONNX_NAMESPACE::TensorProto_DataType_UINT8 &&
+                              x_type->tensor_type().elem_type() != ONNX_NAMESPACE::TensorProto_DataType_INT8)) {
       break;
     }
 
     const auto* x_shape = x_arg.Shape();
-    //'bilinear' == 2-D input or 4-D input with outermost 2 scales as 1 (NCHW) or
-    // 4-D input with outermost and innermost scales as 1 (NHWC)
-    // but we just support 4-d tensor for now, and the channel must be known.
+
+    // 'bilinear' == 2-D input or 4-D input with outermost 2 scales as 1 (NCHW) can be supported.
+    // we only support 4-d tensor for now, and the channel must be known.
+    // we assume the input in NCHW for this test.
     if (!x_shape || x_shape->dim_size() != 4 || x_shape->dim(1).dim_value() <= 0) {
       break;
     }
 
+    // validate it is in fact NCHW
+    //
+    // opset 10 had `scales` as input 1 and no sizes. later opsets added roi as input 1 followed by scales and sizes.
+    auto opset_version = node_unit.SinceVersion();
+    size_t scale_idx = opset_version == 10 ? 1 : 2;
+    size_t size_idx = 3;
+
+    // onnx shape inferencing validates that one and not both of sizes and scales are provided
+    const auto* scale_tensor = inputs.size() >= scale_idx + 1
+                                   ? graph_viewer.GetConstantInitializer(inputs[scale_idx].node_arg.Name(), true)
+                                   : nullptr;
+    const auto* size_tensor = opset_version > 10 && inputs.size() >= size_idx + 1
+                                  ? graph_viewer.GetConstantInitializer(inputs[size_idx].node_arg.Name(), true)
+                                  : nullptr;
+
+    // if both scales and sizes are nullptr the one that was provided was not a constant initializer
+    if (!scale_tensor && !size_tensor) {
+      break;
+    }
+
+    // check the scale for the second dim is 1 or the size of the second dim matches the input shape.
+    // if not, it is not the C dim as a Resize will not change the number of channels.
+    InlinedVector<float> scale(4, 1.0F);
+    if (scale_tensor) {
+      const Initializer scale_val(*scale_tensor, node_unit.ModelPath());
+      if (scale_val.DataAsSpan<float>()[1] != 1.0F) {
+        break;
+      }
+    }
+
+    if (size_tensor) {
+      const Initializer size_val(*size_tensor, node_unit.ModelPath());
+      if (size_val.DataAsSpan<int64_t>()[1] != x_shape->dim(1).dim_value()) {
+        break;
+      }
+    }
+
     const auto* output_shape = node_unit.Outputs()[0].node_arg.Shape();
     bool length_resized_compatible_pytorch_half_pixel = true;
     // when length_resized > 1, there is no difference between pytorch_half_pixel and half_pixel
@@ -48,18 +90,11 @@ bool Resize::IsOnnxNodeSupported(const NodeUnit& node_unit,
     // if coordinate_transformation_mode is "pytorch_half_pixel",
     // x_original = length_resized > 1 ? (x_resized + 0.5) / scale - 0.5 : 0
     //
-    if (output_shape->dim(2).dim_value() <= 1 || output_shape->dim(1).dim_value() <= 1) {
+    if (output_shape->dim(2).dim_value() <= 1 || output_shape->dim(3).dim_value() <= 1) {
+      // we don't know the output H or W so we don't know if it will be compatible
       length_resized_compatible_pytorch_half_pixel = false;
     }
 
-    // Refer to onnxruntime/core/providers/cpu/tensor/upsamplebase.h,
-    size_t scale_idx = 2;
-    size_t size_idx = 3;
-    auto opset_version = node_unit.SinceVersion();
-    if (opset_version == 10) {
-      scale_idx = 1;
-    }
-
     ProtoHelperNodeContext nc(node_unit.GetNode());
     OpNodeProtoHelper info(&nc);
 
@@ -78,6 +113,7 @@ bool Resize::IsOnnxNodeSupported(const NodeUnit& node_unit,
 
     std::vector<int64_t> axes;
     if (info.GetAttrs<int64_t>("axes", axes).IsOK() && axes.size() > 0) {
+      // TODO: We should be able to handle this if required
       break;
     }
 
@@ -95,9 +131,10 @@ bool Resize::IsOnnxNodeSupported(const NodeUnit& node_unit,
     // Coordinate transformation mode attr was introduced in version 11.
     // before that asymmetric mode was the only available transformation mode
     std::string coordinate_transform_mode_name =
-        opset_version > 10
-            ? info.GetAttrOrDefault<std::string>("coordinate_transformation_mode", "half_pixel")
-            : "asymmetric";
+        opset_version > 10 ? info.GetAttrOrDefault<std::string>("coordinate_transformation_mode", "half_pixel")
+                           : "asymmetric";
+
+    // TODO: Opset 19 added half_pixel_symmetric. Need to see if that can be supported.
 
     if (coordinate_transform_mode_name != "asymmetric" &&
         coordinate_transform_mode_name != "half_pixel" &&
@@ -106,59 +143,7 @@ bool Resize::IsOnnxNodeSupported(const NodeUnit& node_unit,
       break;
     }
 
-    auto exclude_outside = info.GetAttrOrDefault<int64_t>("exclude_outside", 0) == 0 ? false : true;
-    if (exclude_outside) {
-      break;
-    }
-
-    // roi  only takes effect when coordinate_transformation_mode is "tf_crop_and_resize"
-
-    // size or scales shouldnt't be provided in the same time but should at least be provided one of them
-    const auto* scale_tensor = inputs.size() >= scale_idx + 1
-                                   ? graph_viewer.GetConstantInitializer(inputs[scale_idx].node_arg.Name(), true)
-                                   : nullptr;
-    const auto* size_tensor = inputs.size() >= size_idx + 1
-                                  ? graph_viewer.GetConstantInitializer(inputs[size_idx].node_arg.Name(), true)
-                                  : nullptr;
-
-    bool has_size = false;
-    bool has_scale = false;
-    InlinedVector<float> scale(4, 1.0F);
-    if (scale_tensor) {
-      const Initializer scale_val(*scale_tensor, node_unit.ModelPath());
-      auto scale_span = scale_val.DataAsSpan<float>();
-      if (scale_span.size() == 4) {
-        has_scale = true;
-        std::copy(scale_span.begin(), scale_span.end(), scale.begin());
-      }
-    }
-
-    if (size_tensor) {
-      auto input_shape = utils::GetTensorShapeFromTensorShapeProto(*x_shape);
-      const Initializer size_val(*size_tensor, node_unit.ModelPath());
-
-      auto size_span = size_val.DataAsSpan<int64_t>();
-      if (size_span.size() == 4) {
-        has_size = true;
-        scale = {size_span[0] / static_cast<float>(input_shape[0]),
-                 size_span[1] / static_cast<float>(input_shape[1]),
-                 size_span[2] / static_cast<float>(input_shape[2]),
-                 size_span[3] / static_cast<float>(input_shape[3])};
-      }
-    }
-
-    if ((has_size && has_scale) || (!has_size && !has_scale)) {
-      break;
-    }
-
-    if (scale[0] != 1.0F || (scale[1] != 1.0F && scale[3] != 1.0F)) {
-      break;
-    }
-
-    // only support xnn_create_resize_bilinear2d_nchw_f32
-    const bool is_NHWC = scale[3] == 1.0F;
-    if (!is_NHWC && (x_type->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType_UINT8 ||
-                     x_type->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType_INT8)) {
+    if (info.GetAttrOrDefault<int64_t>("exclude_outside", 0) != 0) {
       break;
     }
 
@@ -210,8 +195,7 @@ Resize::Resize(const OpKernelInfo& info) : UpsampleBase(info), XnnpackKernel{inf
     }
   }
 
-  is_NHWC_ = scales_[3] == 1.0F;
-  int64_t channels = x_shape->dim(is_NHWC_ ? 3 : 1).dim_value();
+  int64_t channels = x_shape->dim(3).dim_value();
 
   uint32_t flags = 0;
   ORT_ENFORCE(mode_ == UpsampleMode::LINEAR, "only support bilinear resize");
@@ -225,18 +209,16 @@ Resize::Resize(const OpKernelInfo& info) : UpsampleBase(info), XnnpackKernel{inf
   xnn_status xstatus = xnn_status_invalid_state;
   struct xnn_operator* p = nullptr;
   if (op_type_ == OpComputeType::op_compute_type_fp32) {
-    auto create_func = is_NHWC_ ? xnn_create_resize_bilinear2d_nhwc_f32 : xnn_create_resize_bilinear2d_nchw_f32;
-    xstatus = create_func(
-        channels, channels, channels, flags, &p);
+    xstatus = xnn_create_resize_bilinear2d_nhwc_f32(channels, channels, channels, flags, &p);
   } else if (op_type_ == OpComputeType::op_compute_type_qu8) {
-    xstatus = xnn_create_resize_bilinear2d_nhwc_u8(
-        channels, channels, channels, flags, &p);
+    xstatus = xnn_create_resize_bilinear2d_nhwc_u8(channels, channels, channels, flags, &p);
   } else {
-    xstatus = xnn_create_resize_bilinear2d_nhwc_s8(
-        channels, channels, channels, flags, &p);
+    xstatus = xnn_create_resize_bilinear2d_nhwc_s8(channels, channels, channels, flags, &p);
   }
-  ORT_ENFORCE(xstatus == xnn_status_success, "xnn_create_resize_bilinear2d_nhwc_",
-              OpTypeToString(op_type_), " failed. Status:", xstatus);
+
+  ORT_ENFORCE(xstatus == xnn_status_success, "xnn_create_resize_bilinear2d_nhwc_", OpTypeToString(op_type_), " failed. Status:",
+              xstatus);
+
   op0_.reset(p);
 }
 
@@ -245,48 +227,56 @@ Status Resize::ComputeInternal(OpKernelContext* ctx, const Tensor* input,
                                const TensorShapeVector& output_dims) const {
   const auto& X_shape = input->Shape();
   auto N = X_shape[0];
-  auto H = is_NHWC_ ? X_shape[1] : X_shape[2];
-  auto W = is_NHWC_ ? X_shape[2] : X_shape[3];
+  auto H = X_shape[1];
+  auto W = X_shape[2];
   Tensor* output = ctx->Output(0, TensorShape(output_dims));
 
-  pthreadpool_t t_pool = GetThreadPool();
-  xnn_status status = xnn_status_invalid_state;
+  pthreadpool_t threadpool = GetThreadPool();
+
+  // setup allocator/automated dellocate for workspace
+  size_t workspace_size = 0;
+  size_t workspace_alignment = 0;
+  xnn_allocator* allocator = GetStoredAllocator().second;
+  auto deallocator = [allocator](void* ptr) { allocator->aligned_deallocate(allocator->context, ptr); };
+  std::unique_ptr<void, decltype(deallocator)> workspace(nullptr, deallocator);
+
+  auto reshape_fn = xnn_reshape_resize_bilinear2d_nhwc_f32;
+  if (op_type_ == OpComputeType::op_compute_type_qu8) {
+    reshape_fn = xnn_reshape_resize_bilinear2d_nhwc_u8;
+  } else if (op_type_ == OpComputeType::op_compute_type_qs8) {
+    reshape_fn = xnn_reshape_resize_bilinear2d_nhwc_s8;
+  }
+
+  auto status = reshape_fn(op0_.get(), N, H, W, output_dims[1], output_dims[2],
+                           &workspace_size, &workspace_alignment, threadpool);
+  if (status != xnn_status_success) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "xnn_reshape_resize_bilinear2d_nhwc_", OpTypeToString(op_type_),
+                           " returned ", status);
+  }
+
+  workspace.reset(allocator->aligned_allocate(allocator->context, XNN_ALLOCATION_ALIGNMENT, workspace_size));
+
   if (op_type_ == OpComputeType::op_compute_type_fp32) {
-    auto oH = is_NHWC_ ? output_dims[1] : output_dims[2];
-    auto oW = is_NHWC_ ? output_dims[2] : output_dims[3];
-    auto setup_func = is_NHWC_ ? xnn_setup_resize_bilinear2d_nhwc_f32 : xnn_setup_resize_bilinear2d_nchw_f32;
-    status = setup_func(
-        op0_.get(),
-        N,
-        H, W, oH, oW,
-        input->Data<float>(),
-        output->MutableData<float>(),
-        t_pool);
+    status = xnn_setup_resize_bilinear2d_nhwc_f32(op0_.get(), workspace.get(), input->Data<float>(),
+                                                  output->MutableData<float>());
   } else if (op_type_ == OpComputeType::op_compute_type_qu8) {
-    status = xnn_setup_resize_bilinear2d_nhwc_u8(
-        op0_.get(),
-        N,
-        H, W, output_dims[1], output_dims[2],
-        input->Data<uint8_t>(),
-        output->MutableData<uint8_t>(),
-        t_pool);
+    status = xnn_setup_resize_bilinear2d_nhwc_u8(op0_.get(), workspace.get(), input->Data<uint8_t>(),
+                                                 output->MutableData<uint8_t>());
   } else {
-    status = xnn_setup_resize_bilinear2d_nhwc_s8(
-        op0_.get(),
-        N,
-        H, W, output_dims[1], output_dims[2],
-        input->Data<int8_t>(),
-        output->MutableData<int8_t>(),
-        t_pool);
+    status = xnn_setup_resize_bilinear2d_nhwc_s8(op0_.get(), workspace.get(), input->Data<int8_t>(),
+                                                 output->MutableData<int8_t>());
   }
+
   if (status != xnn_status_success) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "xnn_setup_resize_bilinear2d_nhwc_",
                            OpTypeToString(op_type_), " returned ", status);
   }
-  status = xnn_run_operator(op0_.get(), t_pool);
+
+  status = xnn_run_operator(op0_.get(), threadpool);
   if (status != xnn_status_success) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "xnn_run_operator returned ", status);
   }
+
   return Status::OK();
 }
 
@@ -315,23 +305,29 @@ Status Resize::Compute(OpKernelContext* ctx) const {
   return ComputeInternal(ctx, X, output_shape);
 }
 
-ONNX_OPERATOR_VERSIONED_KERNEL_EX(Resize, kOnnxDomain, 10, 10, kXnnpackExecutionProvider,
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(Resize, kMSInternalNHWCDomain, 10, 10, kXnnpackExecutionProvider,
                                   KernelDefBuilder().TypeConstraint("T", {DataTypeImpl::GetTensorType<float>(),
                                                                           DataTypeImpl::GetTensorType<uint8_t>(),
                                                                           DataTypeImpl::GetTensorType<int8_t>()}),
                                   Resize);
-ONNX_OPERATOR_VERSIONED_KERNEL_EX(Resize, kOnnxDomain, 11, 12, kXnnpackExecutionProvider,
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(Resize, kMSInternalNHWCDomain, 11, 12, kXnnpackExecutionProvider,
                                   KernelDefBuilder().TypeConstraint("T1", {DataTypeImpl::GetTensorType<float>(),
                                                                            DataTypeImpl::GetTensorType<uint8_t>(),
                                                                            DataTypeImpl::GetTensorType<int8_t>()}),
                                   Resize);
-ONNX_OPERATOR_VERSIONED_KERNEL_EX(Resize, kOnnxDomain, 13, 17, kXnnpackExecutionProvider,
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(Resize, kMSInternalNHWCDomain, 13, 17, kXnnpackExecutionProvider,
+                                  KernelDefBuilder().TypeConstraint("T1", {DataTypeImpl::GetTensorType<float>(),
+                                                                           DataTypeImpl::GetTensorType<uint8_t>(),
+                                                                           DataTypeImpl::GetTensorType<int8_t>()}),
+                                  Resize);
+
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(Resize, kMSInternalNHWCDomain, 18, 18, kXnnpackExecutionProvider,
                                   KernelDefBuilder().TypeConstraint("T1", {DataTypeImpl::GetTensorType<float>(),
                                                                            DataTypeImpl::GetTensorType<uint8_t>(),
                                                                            DataTypeImpl::GetTensorType<int8_t>()}),
                                   Resize);
 
-ONNX_OPERATOR_KERNEL_EX(Resize, kOnnxDomain, 18, kXnnpackExecutionProvider,
+ONNX_OPERATOR_KERNEL_EX(Resize, kMSInternalNHWCDomain, 19, kXnnpackExecutionProvider,
                         KernelDefBuilder().TypeConstraint("T1", {DataTypeImpl::GetTensorType<float>(),
                                                                  DataTypeImpl::GetTensorType<uint8_t>(),
                                                                  DataTypeImpl::GetTensorType<int8_t>()}),
diff --git a/onnxruntime/core/providers/xnnpack/nn/resize.h b/onnxruntime/core/providers/xnnpack/tensor/resize.h
similarity index 98%
rename from onnxruntime/core/providers/xnnpack/nn/resize.h
rename to onnxruntime/core/providers/xnnpack/tensor/resize.h
index 4975510ee7db4..06ff1bdb61f59 100644
--- a/onnxruntime/core/providers/xnnpack/nn/resize.h
+++ b/onnxruntime/core/providers/xnnpack/tensor/resize.h
@@ -31,7 +31,6 @@ class Resize : public UpsampleBase, public XnnpackKernel {
                          const TensorShapeVector& output_dims) const;
 
  private:
-  bool is_NHWC_;
   XnnpackOperator op0_;
   TensorShapeVector output_dims_;
   OpComputeType op_type_ = OpComputeType::op_compute_type_invalid;
diff --git a/onnxruntime/core/providers/xnnpack/xnnpack_execution_provider.cc b/onnxruntime/core/providers/xnnpack/xnnpack_execution_provider.cc
index ba577ac38d48c..a2a776df439e4 100644
--- a/onnxruntime/core/providers/xnnpack/xnnpack_execution_provider.cc
+++ b/onnxruntime/core/providers/xnnpack/xnnpack_execution_provider.cc
@@ -27,85 +27,117 @@ KernelCreateInfo BuildKernelCreateInfo<void>() {
   return info;
 }
 
-#define KERNEL_CREATE_INFO_VERSIONED(Start, End, Op) \
-  BuildKernelCreateInfo<                             \
-      ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, Start, End, Op)>
+#define KERNEL_CREATE_INFO_VERSIONED(Start, End, Op, Domain) \
+  BuildKernelCreateInfo<                                     \
+      ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, Domain, Start, End, Op)>
 
-#define KERNEL_CREATE_INFO(Start, Op) \
-  BuildKernelCreateInfo<              \
-      ONNX_OPERATOR_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, Start, Op)>
+#define KERNEL_CREATE_INFO(Start, Op, Domain) \
+  BuildKernelCreateInfo<                      \
+      ONNX_OPERATOR_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, Domain, Start, Op)>
 
-#define KERNEL_CREATE_INFO_TYPED(Start, type, Op) \
-  BuildKernelCreateInfo<                          \
-      ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, Start, type, Op)>
+#define KERNEL_CREATE_INFO_TYPED(Start, Type, Op, Domain) \
+  BuildKernelCreateInfo<                                  \
+      ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, Domain, Start, Type, Op)>
 
+// Layout sensitive operators in NHWC domain
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, 7, 9, AveragePool);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, 10, 10, AveragePool);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, 11, 18, AveragePool);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, 19, AveragePool);
+
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, 1, 10, Conv);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, 11, Conv);
-class ONNX_OPERATOR_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, 11, ConvTranspose);
+
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, 1, 10, ConvTranspose);
-class ONNX_OPERATOR_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, 1, QLinearConvTranspose);
-class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 10, 10, Resize);
-class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 11, 12, Resize);
-class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 13, 17, Resize);
-class ONNX_OPERATOR_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 18, Resize);
-class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, 11, 11, MaxPool);
-class ONNX_OPERATOR_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, 12, MaxPool);
-class ONNX_OPERATOR_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, 11, AveragePool);
-class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 1, 12, Softmax);
-class ONNX_OPERATOR_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 13, Softmax);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, 11, ConvTranspose);
 
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, 10, uint8_t, QLinearConv);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, 10, int8_t, QLinearConv);
+
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, 1, QLinearConvTranspose);
+
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, 1, QLinearAveragePool);
-class ONNX_OPERATOR_KERNEL_CLASS_NAME(kXnnpackExecutionProvider,
-                                      kDynamicDomainByCreate, 1, QLinearSoftmax);
 
-class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 7, 12, Gemm);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, 10, 10, Resize);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, 11, 12, Resize);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, 13, 17, Resize);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, 18, 18, Resize);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, 19, Resize);
+
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, 8, 9, MaxPool);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, 10, 10, MaxPool);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, 11, 11, MaxPool);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kMSInternalNHWCDomain, 12, MaxPool);
+
+// ONNX operators
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 7, 8, Gemm);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 9, 10, Gemm);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 11, 12, Gemm);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 13, Gemm);
 
-class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 1, 12, MatMul);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 1, 8, MatMul);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 9, 12, MatMul);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 13, MatMul);
 
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 1, 10, Softmax);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 11, 12, Softmax);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 13, Softmax);
+
+// Internal domain
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kDynamicDomainByCreate, 1, QLinearSoftmax);
+
 std::unique_ptr<KernelRegistry> RegisterKernels() {
   auto kernel_registry = std::make_unique<onnxruntime::KernelRegistry>();
 
   static const BuildKernelCreateInfoFn function_table[] = {
       BuildKernelCreateInfo<void>,  // default entry to avoid the list becoming empty after ops-reducing
 
-      KERNEL_CREATE_INFO(11, Conv),
-      KERNEL_CREATE_INFO(11, ConvTranspose),
-      KERNEL_CREATE_INFO_VERSIONED(1, 10, ConvTranspose),
-      KERNEL_CREATE_INFO(1, QLinearConvTranspose),
-      KERNEL_CREATE_INFO_VERSIONED(11, 11, MaxPool),
-      KERNEL_CREATE_INFO(12, MaxPool),
-      KERNEL_CREATE_INFO(11, AveragePool),
+      // layout sensitive. nodes will be moved to kMSInternalNHWCDomain by layout transformation
+      KERNEL_CREATE_INFO_VERSIONED(7, 9, AveragePool, kMSInternalNHWCDomain),
+      KERNEL_CREATE_INFO_VERSIONED(10, 10, AveragePool, kMSInternalNHWCDomain),
+      KERNEL_CREATE_INFO_VERSIONED(11, 18, AveragePool, kMSInternalNHWCDomain),
+      KERNEL_CREATE_INFO(19, AveragePool, kMSInternalNHWCDomain),
+
+      KERNEL_CREATE_INFO_VERSIONED(1, 10, Conv, kMSInternalNHWCDomain),
+      KERNEL_CREATE_INFO(11, Conv, kMSInternalNHWCDomain),
+
+      KERNEL_CREATE_INFO_VERSIONED(1, 10, ConvTranspose, kMSInternalNHWCDomain),
+      KERNEL_CREATE_INFO(11, ConvTranspose, kMSInternalNHWCDomain),
+
+      KERNEL_CREATE_INFO_VERSIONED(8, 9, MaxPool, kMSInternalNHWCDomain),
+      KERNEL_CREATE_INFO_VERSIONED(10, 10, MaxPool, kMSInternalNHWCDomain),
+      KERNEL_CREATE_INFO_VERSIONED(11, 11, MaxPool, kMSInternalNHWCDomain),
+      KERNEL_CREATE_INFO(12, MaxPool, kMSInternalNHWCDomain),
+
+      KERNEL_CREATE_INFO(1, QLinearConvTranspose, kMSInternalNHWCDomain),
+
+      KERNEL_CREATE_INFO_VERSIONED(10, 10, Resize, kMSInternalNHWCDomain),
+      KERNEL_CREATE_INFO_VERSIONED(11, 12, Resize, kMSInternalNHWCDomain),
+      KERNEL_CREATE_INFO_VERSIONED(13, 17, Resize, kMSInternalNHWCDomain),
+      KERNEL_CREATE_INFO_VERSIONED(18, 18, Resize, kMSInternalNHWCDomain),
+      KERNEL_CREATE_INFO(19, Resize, kMSInternalNHWCDomain),
+
       // layout insensitive, use ONNX-domain directly
-      BuildKernelCreateInfo<
-          ONNX_OPERATOR_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 13, Softmax)>,
-      BuildKernelCreateInfo<
-          ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 1, 12, Softmax)>,
-      BuildKernelCreateInfo<
-          ONNX_OPERATOR_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 18, Resize)>,
-      BuildKernelCreateInfo<
-          ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 13, 17, Resize)>,
-      BuildKernelCreateInfo<
-          ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 11, 12, Resize)>,
-      BuildKernelCreateInfo<
-          ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 10, 10, Resize)>,
-      BuildKernelCreateInfo<
-          ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 7, 12, Gemm)>,
-      BuildKernelCreateInfo<
-          ONNX_OPERATOR_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 13, Gemm)>,
-      BuildKernelCreateInfo<
-          ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 1, 12, MatMul)>,
-      BuildKernelCreateInfo<
-          ONNX_OPERATOR_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kOnnxDomain, 13, MatMul)>,
+      KERNEL_CREATE_INFO_VERSIONED(1, 10, Softmax, kOnnxDomain),
+      KERNEL_CREATE_INFO_VERSIONED(11, 12, Softmax, kOnnxDomain),
+      KERNEL_CREATE_INFO(13, Softmax, kOnnxDomain),
+
+      KERNEL_CREATE_INFO_VERSIONED(7, 8, Gemm, kOnnxDomain),
+      KERNEL_CREATE_INFO_VERSIONED(9, 10, Gemm, kOnnxDomain),
+      KERNEL_CREATE_INFO_VERSIONED(11, 12, Gemm, kOnnxDomain),
+      KERNEL_CREATE_INFO(13, Gemm, kOnnxDomain),
+
+      KERNEL_CREATE_INFO_VERSIONED(1, 8, MatMul, kOnnxDomain),
+      KERNEL_CREATE_INFO_VERSIONED(9, 12, MatMul, kOnnxDomain),
+      KERNEL_CREATE_INFO(13, MatMul, kOnnxDomain),
 
       //  quantization op
-      KERNEL_CREATE_INFO_TYPED(10, uint8_t, QLinearConv),
-      KERNEL_CREATE_INFO_TYPED(10, int8_t, QLinearConv),
-      KERNEL_CREATE_INFO(1, QLinearAveragePool),
-      BuildKernelCreateInfo<
-          ONNX_OPERATOR_KERNEL_CLASS_NAME(kXnnpackExecutionProvider, kDynamicDomainByCreate, 1, QLinearSoftmax)>,
+      KERNEL_CREATE_INFO(1, QLinearAveragePool, kMSInternalNHWCDomain),
+
+      KERNEL_CREATE_INFO_TYPED(10, uint8_t, QLinearConv, kMSInternalNHWCDomain),
+      KERNEL_CREATE_INFO_TYPED(10, int8_t, QLinearConv, kMSInternalNHWCDomain),
+
+      KERNEL_CREATE_INFO(1, QLinearSoftmax, kDynamicDomainByCreate),
   };
 
   for (auto& function_table_entry : function_table) {
diff --git a/onnxruntime/core/providers/xnnpack/xnnpack_init.cc b/onnxruntime/core/providers/xnnpack/xnnpack_init.cc
index 27634a8b7090c..c3aa1d987c194 100644
--- a/onnxruntime/core/providers/xnnpack/xnnpack_init.cc
+++ b/onnxruntime/core/providers/xnnpack/xnnpack_init.cc
@@ -26,13 +26,15 @@ void xnn_deallocate(void* context, void* pointer) {
 }
 
 void* xnn_aligned_allocate(void* context, size_t alignment, size_t size) {
+  if (size == 0)
+    return nullptr;
+
 #if defined(__wasm__) && !defined(__wasm_relaxed_simd__) && !defined(__wasm_simd128__)
   ORT_ENFORCE(alignment <= 2 * sizeof(void*));
   return xnn_allocate(context, size);
 #else
   void* ptr = xnn_allocate(context, size);
-  ORT_ENFORCE((int64_t(ptr) & (alignment - 1)) == 0,
-              " xnnpack wants to allocate a space with ", alignment, "bytes aligned. But it's not satisfied");
+  ORT_ENFORCE((int64_t(ptr) & (alignment - 1)) == 0, "xnnpack allocation was not aligned to ", alignment, " bytes.");
   // if ptr is not aligned, we have to find a way to return a aligned ptr and store the original ptr
   return ptr;
 #endif
diff --git a/onnxruntime/core/providers/xnnpack/xnnpack_init.h b/onnxruntime/core/providers/xnnpack/xnnpack_init.h
index d309edd0c3a4e..a1e64bf6046b2 100644
--- a/onnxruntime/core/providers/xnnpack/xnnpack_init.h
+++ b/onnxruntime/core/providers/xnnpack/xnnpack_init.h
@@ -5,6 +5,47 @@ struct xnn_allocator;
 namespace onnxruntime {
 namespace xnnpack {
 
+// copy #define logic from XNNPACK src/xnnpack/common.h to determine workspace alignment
+#if defined(__APPLE__)
+#include <TargetConditionals.h>
+#endif
+
+#if defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || defined(_M_IX86)
+#define XNN_ARCH_X86 1
+#else
+#define XNN_ARCH_X86 0
+#endif
+
+#if defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) && !defined(_M_ARM64EC)
+#define XNN_ARCH_X86_64 1
+#else
+#define XNN_ARCH_X86_64 0
+#endif
+
+#if defined(__wasm__) && !defined(__wasm_relaxed_simd__) && !defined(__wasm_simd128__)
+#define XNN_ARCH_WASM 1
+#else
+#define XNN_ARCH_WASM 0
+#endif
+
+#if defined(__ANDROID__) || (defined(__APPLE__) && TARGET_OS_IPHONE)
+#define XNN_PLATFORM_MOBILE 1
+#else
+#define XNN_PLATFORM_MOBILE 0
+#endif
+
+#if XNN_ARCH_WASM
+#define XNN_ALLOCATION_ALIGNMENT 4
+#elif XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_PLATFORM_MOBILE
+#define XNN_ALLOCATION_ALIGNMENT 32
+#else
+#define XNN_ALLOCATION_ALIGNMENT 64
+#endif
+#else
+#define XNN_ALLOCATION_ALIGNMENT 16
+#endif
+
 std::pair<AllocatorPtr&, xnn_allocator*> GetStoredAllocator();
 
 }  // namespace xnnpack
diff --git a/onnxruntime/core/providers/xnnpack/xnnpack_kernel.h b/onnxruntime/core/providers/xnnpack/xnnpack_kernel.h
index ada39c767f7c6..0978a88288114 100644
--- a/onnxruntime/core/providers/xnnpack/xnnpack_kernel.h
+++ b/onnxruntime/core/providers/xnnpack/xnnpack_kernel.h
@@ -4,6 +4,7 @@
 #pragma once
 #include "core/framework/op_kernel.h"
 #include "core/providers/xnnpack/xnnpack_execution_provider.h"
+#include "xnnpack.h"
 
 struct pthreadpool;
 
@@ -12,18 +13,59 @@ namespace xnnpack {
 
 class XnnpackKernel : public OpKernel {
  public:
-  explicit XnnpackKernel(const OpKernelInfo& info)
-      : OpKernel(info),
-        xnnpack_threadpool_(
-            static_cast<const XnnpackExecutionProvider*>(info.GetExecutionProvider())
-                ->GetPrivateThreadPool()) {
+  explicit XnnpackKernel(const OpKernelInfo& info, bool enable_caches = false)
+      : OpKernel{info},
+        xnnpack_threadpool_{
+            static_cast<const XnnpackExecutionProvider*>(info.GetExecutionProvider())->GetPrivateThreadPool()},
+        caches_{enable_caches} {
   }
   [[nodiscard]] pthreadpool* GetThreadPool() const {
     return xnnpack_threadpool_;
   }
 
+  // see comment below about enabling code cache
+  // xnn_code_cache_t GetCodeCache() { return caches_.auto_code_cache.get();}
+  xnn_code_cache_t GetCodeCache() { return nullptr; }
+  xnn_weights_cache_t GetWeightsCache() { return caches_.auto_weights_cache.get(); }
+
  private:
   pthreadpool* xnnpack_threadpool_;
+
+  // Helper class to wrap usage of the XNNPACK weights and code caches.
+  // NOTE: Currently creating/freeing the code cache is not exposed via the public xnnpack.h header so usage is
+  // commented out. If we need to use it, we'll need to add the 'src' directory of XNNPACK to the include path
+  // and #include "xnnpack/cache.h"
+  struct Caches {
+    Caches(bool enable)
+        :  // auto_code_cache(nullptr, xnn_release_code_cache),
+          auto_weights_cache(nullptr, xnn_delete_weights_cache) {
+      if (enable) {
+#ifdef XNN_CACHE_ENABLE
+        xnn_status status = xnn_status_success;
+#if XNN_PLATFORM_JIT
+        // status = xnn_init_code_cache(&code_cache_);
+        // ORT_ENFORCE(status == xnn_status_success, "Failed to initialize XNNPACK code cache");)
+        // auto_code_cache.reset(&code_cache_);
+#endif
+        // status = xnn_init_weights_cache(&weights_cache_);
+        xnn_weights_cache_t weights_cache = nullptr;
+        status = xnn_create_weights_cache(&weights_cache, 0);
+        ORT_ENFORCE(status == xnn_status_success, "Failed to create XNNPACK weights cache");
+        auto_weights_cache.reset(weights_cache);
+#endif
+      }
+    }
+
+    // std::unique_ptr<xnn_code_cache, decltype(&xnn_release_code_cache)> auto_code_cache;
+    std::unique_ptr<xnn_weights_cache, decltype(&xnn_delete_weights_cache)> auto_weights_cache;
+
+    // private:
+    // #if defined(XNN_CACHE_ENABLE) && XNN_PLATFORM_JIT
+    //   xnn_code_cache code_cache_;
+    // #endif
+  };
+
+  Caches caches_;
 };
 }  // namespace xnnpack
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/session/abi_session_options.cc b/onnxruntime/core/session/abi_session_options.cc
index 4fcc6de561f8c..fb314b161f1ad 100644
--- a/onnxruntime/core/session/abi_session_options.cc
+++ b/onnxruntime/core/session/abi_session_options.cc
@@ -143,6 +143,14 @@ ORT_API_STATUS_IMPL(OrtApis::SetSessionLogId, _In_ OrtSessionOptions* options, c
   return nullptr;
 }
 
+///< logging function and optional logging param to use for session output
+ORT_API_STATUS_IMPL(OrtApis::SetUserLoggingFunction, _In_ OrtSessionOptions* options,
+                    _In_ OrtLoggingFunction user_logging_function, _In_opt_ void* user_logging_param) {
+  options->value.user_logging_function = user_logging_function;
+  options->value.user_logging_param = user_logging_param;
+  return nullptr;
+}
+
 ///< applies to session load, initialization, etc
 ORT_API_STATUS_IMPL(OrtApis::SetSessionLogVerbosityLevel, _In_ OrtSessionOptions* options, int session_log_verbosity_level) {
   options->value.session_log_verbosity_level = session_log_verbosity_level;
diff --git a/onnxruntime/core/session/custom_ops.cc b/onnxruntime/core/session/custom_ops.cc
index 14c284d7bbdec..b827c28f129b1 100644
--- a/onnxruntime/core/session/custom_ops.cc
+++ b/onnxruntime/core/session/custom_ops.cc
@@ -25,6 +25,12 @@
 #if !defined(ORT_MINIMAL_BUILD)
 static constexpr uint32_t min_ort_version_with_optional_io_support = 8;
 static constexpr uint32_t min_ort_version_with_variadic_io_support = 14;
+static constexpr uint32_t min_ort_version_with_custom_version = 17;
+#endif
+
+#if !defined(ORT_MINIMAL_BUILD) || defined(ORT_MINIMAL_BUILD_CUSTOM_OPS)
+static constexpr uint32_t min_ort_version_with_compute_v2_support = 16;
+static constexpr uint32_t min_ort_version_with_shape_inference = 17;
 #endif
 
 #if !defined(DISABLE_FLOAT8_TYPES)
@@ -33,6 +39,231 @@ static constexpr uint32_t min_ort_version_with_variadic_io_support = 14;
 #define SUPPORTED_TENSOR_TYPES DataTypeImpl::AllTensorTypesIRv4()
 #endif
 
+#if defined(ORT_MINIMAL_BUILD)
+struct OrtShapeInferContext {
+  size_t GetInputCount() const { return 0; }
+  OrtTensorTypeAndShapeInfo* GetInputTypeShape(size_t) const { return {}; }
+  onnxruntime::Status SetOutputTypeShape(size_t, const OrtTensorTypeAndShapeInfo*) const {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, NOT_IMPLEMENTED, "OrtShapeInferContext::SetOutputTypeShape not implemented for minimal build");
+  }
+  const ONNX_NAMESPACE::AttributeProto* GetAttr(const char*) const { return {}; }
+};
+#else
+struct OrtShapeInferContext {
+  OrtShapeInferContext(ONNX_NAMESPACE::InferenceContext& ctx) : ctx_(ctx) {
+    auto num_inputs = ctx_.getNumInputs();
+    for (size_t ith_input = 0; ith_input < num_inputs; ++ith_input) {
+      const auto* input_type = ctx_.getInputType(ith_input);
+      const auto& value_case = input_type->value_case();
+      ORT_ENFORCE(value_case == ONNX_NAMESPACE::TypeProto::kTensorType, "shape inference not yet supported for non-tensor types");
+      const auto& shape_proto = input_type->tensor_type().shape();
+      const auto& type_proto = input_type->tensor_type();
+      auto elem_type = ::onnxruntime::utils::CApiElementTypeFromProtoType(type_proto.elem_type());
+      auto tensor_shape = ::onnxruntime::utils::GetTensorShapeFromTensorShapeProto(shape_proto);
+      auto symbolic_dims = GetSymbolicDims(shape_proto);
+      input_type_shapes_.emplace_back(OrtTensorTypeAndShapeInfo::GetTensorShapeAndTypeHelper(elem_type, tensor_shape, &symbolic_dims).release());
+    }
+  }
+
+  ~OrtShapeInferContext() = default;
+  size_t GetInputCount() const { return input_type_shapes_.size(); }
+
+  OrtTensorTypeAndShapeInfo* GetInputTypeShape(size_t idx) const {
+    return input_type_shapes_.at(idx).get();
+  }
+
+  onnxruntime::Status SetOutputTypeShape(size_t index, const OrtTensorTypeAndShapeInfo* info) const {
+    ORT_RETURN_IF_NOT(info, "Invalid shape info");
+    ONNX_NAMESPACE::TensorShapeProto shape_proto;
+    const auto& symbolic_dims = info->dim_params;
+    const auto& integer_dims = info->shape.GetDims();
+    ORT_RETURN_IF_NOT(symbolic_dims.size() == integer_dims.size(), "symbolic and integer dims mismatch!");
+    for (size_t ith = 0; ith < symbolic_dims.size(); ith++) {
+      auto* dim_proto = shape_proto.add_dim();
+      if (symbolic_dims[ith].size() > 0) {
+        dim_proto->set_dim_param(symbolic_dims[ith]);
+      } else {
+        dim_proto->set_dim_value(integer_dims[ith]);
+      }
+    }
+    ONNX_NAMESPACE::updateOutputShape(ctx_, index, shape_proto);
+    return onnxruntime::Status::OK();
+  }
+
+  const ONNX_NAMESPACE::AttributeProto* GetAttr(const char* attr_name) const {
+    return ctx_.getAttribute(attr_name);
+  }
+
+ private:
+  static std::vector<std::string> GetSymbolicDims(const ONNX_NAMESPACE::TensorShapeProto& shape_proto) {
+    std::vector<std::string> symblic_dims;
+    for (int ith = 0; ith < shape_proto.dim_size(); ith++) {
+      const auto& dim = shape_proto.dim(ith);
+      if (::onnxruntime::utils::HasDimValue(dim)) {
+        symblic_dims.emplace_back();
+      } else {
+        symblic_dims.emplace_back(dim.dim_param());
+      }
+    }
+    return symblic_dims;
+  }
+  ONNX_NAMESPACE::InferenceContext& ctx_;
+  using TypeShapePtr = std::unique_ptr<OrtTensorTypeAndShapeInfo>;
+  onnxruntime::InlinedVector<TypeShapePtr> input_type_shapes_;
+};
+#endif
+
+ORT_API_STATUS_IMPL(OrtApis::ShapeInferContext_GetInputCount, _In_ const OrtShapeInferContext* context, _Out_ size_t* out) {
+  API_IMPL_BEGIN
+  *out = context->GetInputCount();
+  return nullptr;
+  API_IMPL_END
+}
+
+ORT_API_STATUS_IMPL(OrtApis::ShapeInferContext_GetInputTypeShape, _In_ const OrtShapeInferContext* context, _In_ size_t index, _Outptr_ OrtTensorTypeAndShapeInfo** info) {
+  API_IMPL_BEGIN
+  *info = context->GetInputTypeShape(index);
+  if (*info) {
+    return nullptr;
+  } else {
+    return OrtApis::CreateStatus(OrtErrorCode::ORT_INVALID_ARGUMENT, "Failed to fetch type shape info for the index.");
+  }
+  API_IMPL_END
+}
+
+ORT_API_STATUS_IMPL(OrtApis::ShapeInferContext_GetAttribute, _In_ const OrtShapeInferContext* context, _In_ const char* attr_name, _Outptr_ const OrtOpAttr** attr) {
+  API_IMPL_BEGIN
+  *attr = reinterpret_cast<const OrtOpAttr*>(context->GetAttr(attr_name));
+  if (*attr) {
+    return nullptr;
+  } else {
+    return OrtApis::CreateStatus(OrtErrorCode::ORT_INVALID_ARGUMENT, "Attribute does not exist.");
+  }
+  API_IMPL_END
+}
+
+ORT_API_STATUS_IMPL(OrtApis::ReadOpAttr,
+                    _In_ const OrtOpAttr* op_attr,
+                    _In_ OrtOpAttrType type,
+                    _Inout_ void* data,
+                    _In_ size_t len,
+                    _Out_ size_t* out) {
+  API_IMPL_BEGIN
+
+  if (!op_attr) {
+    return OrtApis::CreateStatus(OrtErrorCode::ORT_INVALID_ARGUMENT, "Invalid attribute.");
+  }
+
+  auto attr = reinterpret_cast<const ONNX_NAMESPACE::AttributeProto*>(op_attr);
+  OrtStatusPtr ret = nullptr;
+  *out = 0;
+
+  if (type == OrtOpAttrType::ORT_OP_ATTR_FLOAT) {
+    if (len < sizeof(float)) {
+      ret = OrtApis::CreateStatus(OrtErrorCode::ORT_INVALID_ARGUMENT, "Size of data not large enough to hold a float.");
+    } else {
+      if (attr->has_f()) {
+        auto output_f = reinterpret_cast<float*>(data);
+        *output_f = attr->f();
+      } else {
+        ret = OrtApis::CreateStatus(OrtErrorCode::ORT_INVALID_ARGUMENT, "Attribute has no float value.");
+      }
+    }
+    *out = sizeof(float);
+
+  } else if (type == OrtOpAttrType::ORT_OP_ATTR_FLOATS) {
+    const auto& floats = attr->floats();
+    auto num_floats = floats.size();
+
+    if (len < sizeof(float) * num_floats) {
+      ret = OrtApis::CreateStatus(OrtErrorCode::ORT_INVALID_ARGUMENT, "Size of data not large enough to hold the array of floats.");
+    } else {
+      auto output_f = reinterpret_cast<float*>(data);
+      for (auto f : floats) {
+        *output_f = f;
+        output_f++;
+      }
+    }
+    *out = num_floats * sizeof(float);
+
+  } else if (type == OrtOpAttrType::ORT_OP_ATTR_INT) {
+    if (len < sizeof(int)) {
+      ret = OrtApis::CreateStatus(OrtErrorCode::ORT_INVALID_ARGUMENT, "Size of data not large enough to hold an int64.");
+    } else {
+      if (attr->has_i()) {
+        auto output_i = reinterpret_cast<int64_t*>(data);
+        *output_i = attr->i();
+      } else {
+        ret = OrtApis::CreateStatus(OrtErrorCode::ORT_INVALID_ARGUMENT, "Attribute has no int64 value.");
+      }
+    }
+    *out = sizeof(int64_t);
+
+  } else if (type == OrtOpAttrType::ORT_OP_ATTR_INTS) {
+    const auto& ints = attr->ints();
+    auto num_ints = ints.size();
+
+    if (len < sizeof(int64_t) * num_ints) {
+      ret = OrtApis::CreateStatus(OrtErrorCode::ORT_INVALID_ARGUMENT, "Size of data not large enough to hold the array of int64.");
+    } else {
+      auto output_i = reinterpret_cast<int64_t*>(data);
+      for (auto i : ints) {
+        *output_i = i;
+        output_i++;
+      }
+    }
+    *out = num_ints * sizeof(int64_t);
+
+  } else if (type == OrtOpAttrType::ORT_OP_ATTR_STRING) {
+    const auto& s = attr->s();
+    if (len < s.size() + 1) {
+      ret = OrtApis::CreateStatus(OrtErrorCode::ORT_INVALID_ARGUMENT, "Size of data not large enough to hold the string.");
+    } else {
+      char* output_c = reinterpret_cast<char*>(data);
+      for (char c : s) {
+        *output_c++ = c;
+      }
+      *output_c = '\0';
+    }
+    *out = s.size() + 1;
+
+  } else if (type == OrtOpAttrType::ORT_OP_ATTR_STRINGS) {
+    const auto& ss = attr->strings();
+    size_t num_bytes = 0;
+    for_each(ss.begin(), ss.end(), [&num_bytes](const std::string& s) { num_bytes += s.size() + 1; });
+
+    if (len < num_bytes) {
+      ret = OrtApis::CreateStatus(OrtErrorCode::ORT_INVALID_ARGUMENT, "Size of data not large enough to hold the array of strings.");
+    } else {
+      char* output_c = reinterpret_cast<char*>(data);
+      for (const auto& s : ss) {
+        for (char c : s) {
+          *output_c++ = c;
+        }
+        *output_c++ = '\0';
+      }
+    }
+    *out = num_bytes;
+
+  } else {
+    ret = OrtApis::CreateStatus(OrtErrorCode::ORT_INVALID_ARGUMENT, "Unknown attribute type.");
+  }
+
+  return ret;
+  API_IMPL_END
+}
+
+ORT_API_STATUS_IMPL(OrtApis::ShapeInferContext_SetOutputTypeShape, _In_ const OrtShapeInferContext* context, _In_ size_t index, _In_ const OrtTensorTypeAndShapeInfo* info) {
+  API_IMPL_BEGIN
+  auto status = context->SetOutputTypeShape(index, info);
+  if (status.IsOK()) {
+    return nullptr;
+  } else {
+    return OrtApis::CreateStatus(static_cast<OrtErrorCode>(status.Code()), status.ErrorMessage().c_str());
+  }
+  API_IMPL_END
+}
+
 ORT_API_STATUS_IMPL(OrtApis::KernelInfoGetAttribute_float, _In_ const OrtKernelInfo* info, _In_ const char* name, _Out_ float* out) {
   API_IMPL_BEGIN
   auto status = reinterpret_cast<const onnxruntime::OpKernelInfo*>(info)->GetAttr<float>(name, out);
@@ -424,7 +655,8 @@ struct CustomOpKernel : OpKernel {
       ORT_THROW("Unsupported version '" + std::to_string(op_.version) + "' in custom op '" + op.GetName(&op));
     }
 
-    if (op_.version > 15 && op_.KernelCompute == 0) {
+    if (op_.version >= min_ort_version_with_compute_v2_support &&
+        op_.CreateKernelV2) {
       op_kernel_ = nullptr;
       Ort::ThrowOnError(
           op_.CreateKernelV2(
@@ -443,13 +675,14 @@ struct CustomOpKernel : OpKernel {
   }
 
   Status Compute(OpKernelContext* ctx) const override {
-    if (op_.version > 15 && op_.KernelCompute == 0) {
+    if (op_.version >= min_ort_version_with_compute_v2_support &&
+        op_.KernelComputeV2) {
       auto status_ptr = op_.KernelComputeV2(op_kernel_, reinterpret_cast<OrtKernelContext*>(ctx));
       return ToStatus(status_ptr);
+    } else {
+      op_.KernelCompute(op_kernel_, reinterpret_cast<OrtKernelContext*>(ctx));
+      return Status::OK();
     }
-
-    op_.KernelCompute(op_kernel_, reinterpret_cast<OrtKernelContext*>(ctx));
-    return Status::OK();
   }
 
  private:
@@ -466,8 +699,19 @@ KernelCreateInfo CreateKernelCreateInfo(const std::string& domain, const OrtCust
 
   KernelDefBuilder def_builder;
   def_builder.SetName(op->GetName(op))
-      .SetDomain(domain)
-      .SinceVersion(1);
+      .SetDomain(domain);
+
+  if (op->version >= min_ort_version_with_custom_version) {
+    if (op->GetStartVersion && op->GetEndVersion) {
+      def_builder.SinceVersion(op->GetStartVersion(op), op->GetEndVersion(op));
+    } else if (op->GetStartVersion) {
+      def_builder.SinceVersion(op->GetStartVersion(op));
+    } else {
+      def_builder.SinceVersion(1);
+    }
+  } else {
+    def_builder.SinceVersion(1);
+  }
 
   // GetInputMemoryType was introduced in ver 13. This check allows custom ops compiled using older versions
   // to work with newer versions (> 12) of the ORT binary.
@@ -588,8 +832,19 @@ ONNX_NAMESPACE::OpSchema CreateSchema(const std::string& domain, const OrtCustom
     schema.TypeConstraint(output_name, DataTypeImpl::ToString(SUPPORTED_TENSOR_TYPES), "all types");
   }
   schema.SetDomain(domain);
-  schema.SinceVersion(1);
+  if (op->version >= min_ort_version_with_custom_version && op->GetStartVersion) {
+    schema.SinceVersion(op->GetStartVersion(op));
+  } else {
+    schema.SinceVersion(1);
+  }
   schema.AllowUncheckedAttributes();
+
+  if (op->version >= min_ort_version_with_shape_inference && op->InferOutputShapeFn) {
+    schema.TypeAndShapeInferenceFunction([op](ONNX_NAMESPACE::InferenceContext& infer_ctx) {
+      OrtShapeInferContext ctx(infer_ctx);
+      op->InferOutputShapeFn(op, &ctx);
+    });
+  }
   return schema;
 }
 
@@ -758,10 +1013,14 @@ common::Status CreateCustomRegistry(gsl::span<OrtCustomOpDomain* const> op_domai
     for (auto schema_iter : schema_map) {
       schemas.push_back(schema_iter.second);
       InlinedVector<const KernelDef*> kernel_defs = std::move(kernel_def_map[schema_iter.first]);
-      ONNX_NAMESPACE::InferenceFunction infer_fn = [kernel_defs](ONNX_NAMESPACE::InferenceContext& infer_ctx) {
+      auto infer_fn = schemas.back().GetTypeAndShapeInferenceFunction();
+      ONNX_NAMESPACE::InferenceFunction extended_infer_fn = [infer_fn, kernel_defs](ONNX_NAMESPACE::InferenceContext& infer_ctx) {
         InferOutputTypes(kernel_defs, infer_ctx);
+        if (infer_fn) {
+          infer_fn(infer_ctx);
+        }
       };
-      schemas.back().TypeAndShapeInferenceFunction(infer_fn);
+      schemas.back().TypeAndShapeInferenceFunction(extended_infer_fn);
     }
 
     ORT_RETURN_IF_ERROR(output->RegisterOpSet(schemas,
@@ -789,7 +1048,6 @@ common::Status CreateCustomRegistry(gsl::span<OrtCustomOpDomain* const> op_domai
       // GetInputMemoryType was introduced in ver 13. This check allows custom ops compiled using older versions
       // to work with newer versions (> 12) of the ORT binary.
       if (op->version > 12) {
-        auto input_count = op->GetInputTypeCount(op);
         for (size_t i = 0; i < input_count; i++) {
           def_builder.InputMemoryType(op->GetInputMemoryType(op, i), i);
         }
diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc
index 5a2a6efb6df4b..75be72658f98f 100644
--- a/onnxruntime/core/session/inference_session.cc
+++ b/onnxruntime/core/session/inference_session.cc
@@ -52,10 +52,13 @@
 #include "core/providers/cpu/cpu_execution_provider.h"
 #ifdef USE_DML  // TODO: This is necessary for the workaround in TransformGraph
 #include "core/providers/dml/DmlExecutionProvider/src/DmlGraphFusionTransformer.h"
+#include "core/providers/dml/DmlExecutionProvider/src/DmlRuntimeGraphFusionTransformer.h"
 #include "core/providers/dml/DmlExecutionProvider/src/GraphTransformer.h"
 #include "core/providers/dml/dml_session_options_config_keys.h"
+#include "core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.h"
 #endif
 #include "core/session/environment.h"
+#include "core/session/user_logging_sink.h"
 #include "core/session/IOBinding.h"
 #include "core/session/inference_session_utils.h"
 #include "core/session/onnxruntime_session_options_config_keys.h"
@@ -71,6 +74,7 @@
 #ifdef ENABLE_TRAINING
 #include "core/framework/partial_graph_execution_state.h"
 #include "core/framework/stream_execution_context.h"
+#include "orttraining/core/optimizer/memory_optimizer.h"
 #endif
 
 using namespace ONNX_NAMESPACE;
@@ -298,6 +302,35 @@ static Status FinalizeSessionOptions(const SessionOptions& user_provided_session
   return Status::OK();
 }
 
+logging::Severity GetSeverity(const SessionOptions& session_options) {
+  logging::Severity severity = logging::Severity::kWARNING;
+  if (session_options.session_log_severity_level == -1) {
+    severity = logging::LoggingManager::DefaultLogger().GetSeverity();
+  } else {
+    ORT_ENFORCE(session_options.session_log_severity_level >= 0 &&
+                    session_options.session_log_severity_level <= static_cast<int>(logging::Severity::kFATAL),
+                "Invalid session log severity level. Not a valid onnxruntime::logging::Severity value: ",
+                session_options.session_log_severity_level);
+    severity = static_cast<logging::Severity>(session_options.session_log_severity_level);
+  }
+  return severity;
+}
+
+void InferenceSession::SetLoggingManager(const SessionOptions& session_options,
+                                         const Environment& session_env) {
+  logging_manager_ = session_env.GetLoggingManager();
+  if (session_options.user_logging_function) {
+    std::unique_ptr<logging::ISink> user_sink = std::make_unique<UserLoggingSink>(session_options.user_logging_function,
+                                                                                  session_options.user_logging_param);
+    user_logging_manager_ = std::make_unique<logging::LoggingManager>(std::move(user_sink),
+                                                                      GetSeverity(session_options),
+                                                                      false,
+                                                                      logging::LoggingManager::InstanceType::Temporal,
+                                                                      &session_options.session_logid);
+    logging_manager_ = user_logging_manager_.get();
+  }
+}
+
 void InferenceSession::ConstructorCommon(const SessionOptions& session_options,
                                          const Environment& session_env) {
   auto status = FinalizeSessionOptions(session_options, model_proto_, is_model_proto_parsed_, session_options_);
@@ -306,6 +339,8 @@ void InferenceSession::ConstructorCommon(const SessionOptions& session_options,
   ORT_ENFORCE(status.IsOK(), "Could not finalize session options while constructing the inference session. Error Message: ",
               status.ErrorMessage());
 
+  SetLoggingManager(session_options, session_env);
+
   // The call to InitLogger depends on the final state of session_options_. Hence it should be invoked
   // after the invocation of FinalizeSessionOptions.
   InitLogger(logging_manager_);  // this sets session_logger_ so that it can be used for logging after this point.
@@ -427,7 +462,6 @@ InferenceSession::InferenceSession(const SessionOptions& session_options, const
 #if !defined(ORT_MINIMAL_BUILD)
       graph_transformer_mgr_(session_options.max_num_graph_transformation_steps),
 #endif
-      logging_manager_(session_env.GetLoggingManager()),
       environment_(session_env) {
   // Initialize assets of this session instance
   ConstructorCommon(session_options, session_env);
@@ -441,7 +475,6 @@ InferenceSession::InferenceSession(const SessionOptions& session_options,
 #if !defined(ORT_MINIMAL_BUILD)
       graph_transformer_mgr_(session_options.max_num_graph_transformation_steps),
 #endif
-      logging_manager_(session_env.GetLoggingManager()),
       external_intra_op_thread_pool_(external_intra_op_thread_pool),
       external_inter_op_thread_pool_(external_inter_op_thread_pool),
       environment_(session_env) {
@@ -454,7 +487,6 @@ InferenceSession::InferenceSession(const SessionOptions& session_options, const
                                    const PathString& model_uri)
     : model_location_(model_uri),
       graph_transformer_mgr_(session_options.max_num_graph_transformation_steps),
-      logging_manager_(session_env.GetLoggingManager()),
       environment_(session_env) {
   auto status = Model::Load(model_location_, model_proto_);
   ORT_ENFORCE(status.IsOK(), "Given model could not be parsed while creating inference session. Error message: ",
@@ -475,7 +507,6 @@ InferenceSession::InferenceSession(const SessionOptions& session_options,
 InferenceSession::InferenceSession(const SessionOptions& session_options, const Environment& session_env,
                                    std::istream& model_istream)
     : graph_transformer_mgr_(session_options.max_num_graph_transformation_steps),
-      logging_manager_(session_env.GetLoggingManager()),
       environment_(session_env) {
   Status st = Model::Load(model_istream, &model_proto_);
   ORT_ENFORCE(st.IsOK(), "Could not parse model successfully while constructing the inference session");
@@ -487,7 +518,6 @@ InferenceSession::InferenceSession(const SessionOptions& session_options, const
 InferenceSession::InferenceSession(const SessionOptions& session_options, const Environment& session_env,
                                    const void* model_data, int model_data_len)
     : graph_transformer_mgr_(session_options.max_num_graph_transformation_steps),
-      logging_manager_(session_env.GetLoggingManager()),
       environment_(session_env) {
   const bool result = model_proto_.ParseFromArray(model_data, model_data_len);
   ORT_ENFORCE(result, "Could not parse model successfully while constructing the inference session");
@@ -586,9 +616,35 @@ common::Status InferenceSession::RegisterExecutionProvider(const std::shared_ptr
   }
 
 #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_MINIMAL_BUILD_CUSTOM_OPS)
-  // Create Custom Op if EP requests it
+  // Register Custom Op if EP requests it
   std::vector<OrtCustomOpDomain*> custom_op_domains;
-  p_exec_provider->GetCustomOpDomainList(custom_op_domains);
+  std::vector<OrtCustomOpDomain*> candidate_custom_op_domains;
+  p_exec_provider->GetCustomOpDomainList(candidate_custom_op_domains);
+
+  auto registry_kernels = kernel_registry_manager_.GetKernelRegistriesByProviderType(p_exec_provider->Type());
+
+  // Register the custom op domain only if it has not been registered before
+  if (registry_kernels.empty()) {
+    custom_op_domains = candidate_custom_op_domains;
+  } else {
+    for (auto candidate_custom_op_domain : candidate_custom_op_domains) {
+      for (auto registry_kernel : registry_kernels) {
+        const auto& kernel_map = registry_kernel->GetKernelCreateMap();
+        bool need_register = true;
+        // If the kernel registry is the ep's custom op registry, we only need to check the first kernel,
+        // because all kernels in one kernel registry should have the same domain name.
+        for (auto iter = kernel_map.begin(); iter != kernel_map.end(); iter++) {
+          if (iter->second.kernel_def->Domain() == candidate_custom_op_domain->domain_) {
+            need_register = false;
+            break;
+          }
+        }
+        if (need_register) {
+          custom_op_domains.push_back(candidate_custom_op_domain);
+        }
+      }
+    }
+  }
 
   if (!custom_op_domains.empty()) {
     if (AddCustomOpDomains(custom_op_domains) != Status::OK()) {
@@ -957,14 +1013,27 @@ common::Status InferenceSession::Load() {
 
 common::Status InferenceSession::TransformGraph(onnxruntime::Graph& graph, bool saving_model_in_ort_format) {
   // The transformer order:
-  // 1. ensure potential QDQ node units have unique DQ nodes (required transformer).
+  // 1. Ensure we inline as many functions as possible. We refer to it as Ahead Of Time (AOT) function inlining.
+  // 2. ensure potential QDQ node units have unique DQ nodes (required transformer).
   //    - This is a required transformer as the ORT code has a hard requirement there are no overlapping QDQ node units.
   //    - We run it here in case optimizers are disabled.
-  // 2. run level 1 optimizations. these only use ONNX operators.
-  // 3. partition nodes based on EP capabilities. EPs may fuse nodes during this process.
-  // 4. run level 2+ optimizations. level 2 and 3 optimizations use contrib ops.
-  // 5. insert cast nodes (required transformer).
-  // 6. insert copy nodes (required transformer).
+  // 3. run level 1 optimizations. these only use ONNX operators.
+  // 4. partition nodes based on EP capabilities. EPs may fuse nodes during this process.
+  // 5. run level 2+ optimizations. level 2 and 3 optimizations use contrib ops.
+  // 6. insert cast nodes (required transformer).
+  // 7. insert copy nodes (required transformer).
+
+  // Run Ahead Of time function inlining
+  GraphPartitioner partitioner(kernel_registry_manager_, execution_providers_);
+  if (const bool disable_aot_function_inlining =
+          session_options_.config_options.GetConfigOrDefault(
+              kOrtSessionOptionsDisableAheadOfTimeFunctionInlining, "0") == "1";
+      !disable_aot_function_inlining) {
+    ORT_RETURN_IF_ERROR_SESSIONID_(partitioner.InlineFunctionsAOT(*model_,
+                                                                  execution_providers_,
+                                                                  kernel_registry_manager_,
+                                                                  *session_logger_));
+  }
 
   auto apply_transformer_once = [](const GraphTransformer& transformer, const logging::Logger& logger,
                                    Graph& graph) {
@@ -1005,18 +1074,22 @@ common::Status InferenceSession::TransformGraph(onnxruntime::Graph& graph, bool
           layout_transformation::TransformLayoutForEP(graph_to_transform, modified, execution_provider,
                                                       std::move(cpu_allocator), debug_graph_fn));
 
-      if (modified) {
-        ORT_RETURN_IF_ERROR_SESSIONID_(
-            graph_transformer_mgr_.ApplyTransformers(graph_to_transform, TransformerLevel::Level1, *session_logger_));
-
-        // debug the graph after the L1 transformers have run against any layout transformation changes.
-        // this is prior to GraphPartitioner::GetCapabilityForEP calling IExecutionProvider::GetCapability the second
-        // time to validate the EP that requested the layout transformation can take all nodes using the new layout.
-        // if that fails, this allows debugging the graph used in that GetCapability call.
-        if (debug_graph_fn) {
-          debug_graph_fn(graph_to_transform);
-        }
-      }
+      // Previously we ran the L1 transformers to handle constant folding of any initializers that were transposed in
+      // a QDQ format model. The transpose optimizer can now look past DQ nodes to directly update initializers which
+      // takes care of most models without needing this.
+      //
+      // if (modified) {
+      //  ORT_RETURN_IF_ERROR_SESSIONID_(
+      //      graph_transformer_mgr_.ApplyTransformers(graph_to_transform, TransformerLevel::Level1, *session_logger_));
+      //
+      // debug the graph after the L1 transformers have run against any layout transformation changes.
+      // this is prior to GraphPartitioner::GetCapabilityForEP calling IExecutionProvider::GetCapability the second
+      // time to validate the EP that requested the layout transformation can take all nodes using the new layout.
+      // if that fails, this allows debugging the graph used in that GetCapability call.
+      // if (debug_graph_fn) {
+      //  debug_graph_fn(graph_to_transform);
+      //}
+      //}
 
       return Status::OK();
     };
@@ -1044,7 +1117,6 @@ common::Status InferenceSession::TransformGraph(onnxruntime::Graph& graph, bool
   }
 
   // Do partitioning based on execution providers' capabilities.
-  GraphPartitioner partitioner(kernel_registry_manager_, execution_providers_);
   ORT_RETURN_IF_ERROR_SESSIONID_(partitioner.Partition(graph, session_state_->GetMutableFuncMgr(), transform_layout_fn,
                                                        mode, debug_graph_fn));
 
@@ -1078,6 +1150,20 @@ common::Status InferenceSession::TransformGraph(onnxruntime::Graph& graph, bool
     ORT_RETURN_IF_ERROR_SESSIONID_(apply_transformer_once(copy_transformer, *session_logger_, graph));
   }
 
+#ifdef ENABLE_TRAINING
+  // Enable memory optimizations (mainly insert recomputation nodes with priority).
+  // Only applicable for training scenarios.
+  {
+    const std::string memory_optimizer_config =
+        session_options_.config_options.GetConfigOrDefault(kOrtSessionOptionsMemoryOptimizerEnabler, "");
+    const std::string probe_level =
+        session_options_.config_options.GetConfigOrDefault(kOrtSessionOptionsMemoryOptimizerProbeLevel, "0");
+
+    MemoryOptimizer mem_transformer{memory_optimizer_config, probe_level};
+    ORT_RETURN_IF_ERROR_SESSIONID_(apply_transformer_once(mem_transformer, *session_logger_, graph));
+  }
+#endif
+
   return Status::OK();
 }
 #endif  // !defined(ORT_MINIMAL_BUILD)
@@ -1531,7 +1617,9 @@ common::Status InferenceSession::Initialize() {
                                                                record_runtime_optimization_produced_op_schema));
 
 #ifdef USE_DML
-      if (execution_providers_.Get(kDmlExecutionProvider)) {
+      const IExecutionProvider* dmlExecutionProvider = execution_providers_.Get(kDmlExecutionProvider);
+
+      if (dmlExecutionProvider) {
         // DML graph fusion is an important runtime optimization that cannot be done ahead of time; it must be disabled
         // when running in "offline mode" and saving an optimized model to disk. To support users that want to optimize
         // models offline, and then disable graph optimizations when running "online", this transformer ignores the ORT
@@ -1541,11 +1629,20 @@ common::Status InferenceSession::Initialize() {
 
         if (dml_graph_fusion_enabled) {
           std::unique_ptr<onnxruntime::GraphTransformer> dmlGraphFusionTransformer = std::make_unique<Dml::DmlGraphFusionTransformer>("DmlGraphFusionTransformer",
-                                                                                                                                      execution_providers_.Get(kDmlExecutionProvider));
+                                                                                                                                      dmlExecutionProvider);
           if (dmlGraphFusionTransformer == nullptr) {
             return Status(common::ONNXRUNTIME, common::FAIL, "DmlGraphFusionTransformer is nullptr");
           }
           ORT_RETURN_IF_ERROR_SESSIONID_(graph_transformer_mgr_.Register(std::move(dmlGraphFusionTransformer), onnxruntime::TransformerLevel::Level3));
+
+          if (static_cast<const Dml::ExecutionProvider*>(dmlExecutionProvider)->DynamicGraphFusionEnabled()) {
+            std::unique_ptr<onnxruntime::GraphTransformer> dmlRuntimeGraphFusionTransformer = std::make_unique<Dml::DmlRuntimeGraphFusionTransformer>("DmlRuntimeGraphFusionTransformer",
+                                                                                                                                                      dmlExecutionProvider);
+            if (dmlRuntimeGraphFusionTransformer == nullptr) {
+              return Status(common::ONNXRUNTIME, common::FAIL, "DmlRuntimeGraphFusionTransformer is nullptr");
+            }
+            ORT_RETURN_IF_ERROR_SESSIONID_(graph_transformer_mgr_.Register(std::move(dmlRuntimeGraphFusionTransformer), onnxruntime::TransformerLevel::Level3));
+          }
         }
 
         // This transformer applies DML-specific fusions that go beyond what ORT offers by default
@@ -1943,9 +2040,10 @@ common::Status InferenceSession::ValidateInputsOutputs(gsl::span<const std::stri
                                                 expected_element_type, "tensor", input_output_moniker));
 
       // check for shape
-      if (iter->second.tensor_shape.has_value()) {
+      const auto& opt_shape = iter->second.tensor_shape;
+      if (opt_shape.has_value() && !opt_shape->GetDims().empty()) {
         ORT_RETURN_IF_ERROR_SESSIONID_(CheckShapes(name, input_output_tensor.Shape(),
-                                                   *iter->second.tensor_shape, input_output_moniker));
+                                                   *opt_shape, input_output_moniker));
       }
     } else if (input_output_ml_value.IsSparseTensor()) {
 #if !defined(DISABLE_SPARSE_TENSORS)
@@ -1956,9 +2054,10 @@ common::Status InferenceSession::ValidateInputsOutputs(gsl::span<const std::stri
         ORT_RETURN_IF_ERROR_SESSIONID_(CheckTypes(sparse_tensor.DataType(), expected_element_type,
                                                   "sparse_tensor", input_output_moniker));
         // Check shape
-        if (iter->second.tensor_shape.has_value()) {
+        const auto& opt_shape = iter->second.tensor_shape;
+        if (opt_shape.has_value() && !opt_shape->GetDims().empty()) {
           ORT_RETURN_IF_ERROR_SESSIONID_(CheckShapes(name, sparse_tensor.DenseShape(),
-                                                     *iter->second.tensor_shape, input_output_moniker));
+                                                     *opt_shape, input_output_moniker));
         }
       } else if (is_sparse_initializer(name) &&
                  expected_type->IsTensorType()) {
@@ -1967,9 +2066,10 @@ common::Status InferenceSession::ValidateInputsOutputs(gsl::span<const std::stri
         ORT_RETURN_IF_ERROR_SESSIONID_(CheckTypes(sparse_tensor.DataType(), expected_element_type,
                                                   "sparse_tensor", input_output_moniker));
         // Check shape
-        if (iter->second.tensor_shape.has_value()) {
+        const auto& opt_shape = iter->second.tensor_shape;
+        if (opt_shape.has_value() && !opt_shape->GetDims().empty()) {
           ORT_RETURN_IF_ERROR_SESSIONID_(CheckShapes(name, sparse_tensor.DenseShape(),
-                                                     *iter->second.tensor_shape, input_output_moniker));
+                                                     *opt_shape, input_output_moniker));
         }
       } else {
         return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, input_output_moniker, " with name: '", name,
@@ -1979,7 +2079,6 @@ common::Status InferenceSession::ValidateInputsOutputs(gsl::span<const std::stri
       return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, input_output_moniker, " with name ", name,
                              " is a sparse tensor, which is not supported in this build.");
 #endif
-
     } else if (input_output_ml_value.IsTensorSequence()) {
       if (!expected_type->IsTensorSequenceType()
 #if !defined(DISABLE_OPTIONAL_TYPE)
@@ -2811,17 +2910,7 @@ const logging::Logger& InferenceSession::CreateLoggerForRun(const RunOptions& ru
 void InferenceSession::InitLogger(logging::LoggingManager* logging_manager) {
   // create logger for session, using provided logging manager if possible
   if (logging_manager != nullptr) {
-    logging::Severity severity = logging::Severity::kWARNING;
-    if (session_options_.session_log_severity_level == -1) {
-      severity = logging::LoggingManager::DefaultLogger().GetSeverity();
-    } else {
-      ORT_ENFORCE(session_options_.session_log_severity_level >= 0 &&
-                      session_options_.session_log_severity_level <= static_cast<int>(logging::Severity::kFATAL),
-                  "Invalid session log severity level. Not a valid onnxruntime::logging::Severity value: ",
-                  session_options_.session_log_severity_level);
-      severity = static_cast<logging::Severity>(session_options_.session_log_severity_level);
-    }
-
+    logging::Severity severity = GetSeverity(session_options_);
     owned_session_logger_ = logging_manager_->CreateLogger(session_options_.session_logid, severity, false,
                                                            session_options_.session_log_verbosity_level);
     session_logger_ = owned_session_logger_.get();
diff --git a/onnxruntime/core/session/inference_session.h b/onnxruntime/core/session/inference_session.h
index 9259e014b9860..4db436f132d11 100644
--- a/onnxruntime/core/session/inference_session.h
+++ b/onnxruntime/core/session/inference_session.h
@@ -595,7 +595,8 @@ class InferenceSession {
 
  private:
   ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(InferenceSession);
-
+  void SetLoggingManager(const SessionOptions& session_options,
+                         const Environment& session_env);
   void ConstructorCommon(const SessionOptions& session_options,
                          const Environment& session_env);
 
@@ -698,7 +699,10 @@ class InferenceSession {
   SessionOptions session_options_;
 
   /// Logging manager if provided.
-  logging::LoggingManager* const logging_manager_;
+  logging::LoggingManager* logging_manager_;
+
+  /// User specified logging mgr; logging_manager_ is simply the ptr in this unique_ptr when available
+  std::unique_ptr<logging::LoggingManager> user_logging_manager_;
 
   /// Logger for this session. WARNING: Will contain nullptr if logging_manager_ is nullptr.
   std::unique_ptr<logging::Logger> owned_session_logger_ = nullptr;
diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc
index 60b6296f7f539..9f8786b727ac1 100644
--- a/onnxruntime/core/session/onnxruntime_c_api.cc
+++ b/onnxruntime/core/session/onnxruntime_c_api.cc
@@ -2713,6 +2713,14 @@ static constexpr OrtApi ort_api_1_to_17 = {
     &OrtApis::GetCUDAProviderOptionsByName,
     &OrtApis::KernelContext_GetResource,
     // End of Version 16 - DO NOT MODIFY ABOVE (see above text for more information)
+
+    &OrtApis::SetUserLoggingFunction,
+    &OrtApis::ShapeInferContext_GetInputCount,
+    &OrtApis::ShapeInferContext_GetInputTypeShape,
+    &OrtApis::ShapeInferContext_GetAttribute,
+    &OrtApis::ShapeInferContext_SetOutputTypeShape,
+    &OrtApis::SetSymbolicDimensions,
+    &OrtApis::ReadOpAttr,
 };
 
 // OrtApiBase can never change as there is no way to know what version of OrtApiBase is returned by OrtGetApiBase.
@@ -2742,6 +2750,7 @@ static_assert(offsetof(OrtApi, ReleaseCANNProviderOptions) / sizeof(void*) == 22
 static_assert(offsetof(OrtApi, GetSessionConfigEntry) / sizeof(void*) == 238, "Size of version 14 API cannot change");
 static_assert(offsetof(OrtApi, GetBuildInfoString) / sizeof(void*) == 254, "Size of version 15 API cannot change");
 static_assert(offsetof(OrtApi, KernelContext_GetResource) / sizeof(void*) == 265, "Size of version 16 API cannot change");
+static_assert(offsetof(OrtApi, SetUserLoggingFunction) / sizeof(void*) == 266, "Size of version 17 API cannot change");
 
 // So that nobody forgets to finish an API version, this check will serve as a reminder:
 static_assert(std::string_view(ORT_VERSION) == "1.17.0",
diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h
index 47da2fa524588..09c83219ad2c8 100644
--- a/onnxruntime/core/session/ort_apis.h
+++ b/onnxruntime/core/session/ort_apis.h
@@ -491,4 +491,14 @@ ORT_API_STATUS_IMPL(GetTensorRTProviderOptionsByName, _In_ const OrtTensorRTProv
 ORT_API_STATUS_IMPL(UpdateCUDAProviderOptionsWithValue, _Inout_ OrtCUDAProviderOptionsV2* cuda_options, _In_ const char* key, _In_ void* value);
 ORT_API_STATUS_IMPL(GetCUDAProviderOptionsByName, _In_ const OrtCUDAProviderOptionsV2* cuda_options, _In_ const char* key, _Outptr_ void** ptr);
 ORT_API_STATUS_IMPL(KernelContext_GetResource, _In_ const OrtKernelContext* context, _In_ int resource_version, _In_ int resource_id, _Outptr_ void** stream);
+
+ORT_API_STATUS_IMPL(SetUserLoggingFunction, _Inout_ OrtSessionOptions* options,
+                    _In_ OrtLoggingFunction user_logging_function, _In_opt_ void* user_logging_param);
+ORT_API_STATUS_IMPL(ShapeInferContext_GetInputCount, _In_ const OrtShapeInferContext* context, _Out_ size_t* out);
+ORT_API_STATUS_IMPL(ShapeInferContext_GetInputTypeShape, _In_ const OrtShapeInferContext* context, _In_ size_t index, _Outptr_ OrtTensorTypeAndShapeInfo** info);
+ORT_API_STATUS_IMPL(ShapeInferContext_GetAttribute, _In_ const OrtShapeInferContext* context, _In_ const char* attr_name, _Outptr_ const OrtOpAttr** attr);
+ORT_API_STATUS_IMPL(ShapeInferContext_SetOutputTypeShape, _In_ const OrtShapeInferContext* context, _In_ size_t index, _In_ const OrtTensorTypeAndShapeInfo* info);
+ORT_API_STATUS_IMPL(SetSymbolicDimensions, _In_ OrtTensorTypeAndShapeInfo* info, _In_ const char* dim_params[], _In_ size_t dim_params_length);
+ORT_API_STATUS_IMPL(ReadOpAttr, _In_ const OrtOpAttr* op_attr, _In_ OrtOpAttrType type, _Inout_ void* data, _In_ size_t len, _Out_ size_t* out);
+
 }  // namespace OrtApis
diff --git a/onnxruntime/core/session/ort_env.cc b/onnxruntime/core/session/ort_env.cc
index eb78d5d799a55..e3957baa990f8 100644
--- a/onnxruntime/core/session/ort_env.cc
+++ b/onnxruntime/core/session/ort_env.cc
@@ -9,6 +9,7 @@
 #include "core/session/ort_apis.h"
 #include "core/session/environment.h"
 #include "core/session/allocator_adapters.h"
+#include "core/session/user_logging_sink.h"
 #include "core/common/logging/logging.h"
 #include "core/framework/provider_shutdown.h"
 #include "core/platform/logging/make_platform_default_log_sink.h"
@@ -20,17 +21,6 @@ std::unique_ptr<OrtEnv> OrtEnv::p_instance_;
 int OrtEnv::ref_count_ = 0;
 onnxruntime::OrtMutex OrtEnv::m_;
 
-LoggingWrapper::LoggingWrapper(OrtLoggingFunction logging_function, void* logger_param)
-    : logging_function_(logging_function), logger_param_(logger_param) {
-}
-
-void LoggingWrapper::SendImpl(const onnxruntime::logging::Timestamp& /*timestamp*/, const std::string& logger_id,
-                              const onnxruntime::logging::Capture& message) {
-  std::string s = message.Location().ToString();
-  logging_function_(logger_param_, static_cast<OrtLoggingLevel>(message.Severity()), message.Category(),
-                    logger_id.c_str(), s.c_str(), message.Message().c_str());
-}
-
 OrtEnv::OrtEnv(std::unique_ptr<onnxruntime::Environment> value1)
     : value_(std::move(value1)) {
 }
@@ -50,8 +40,8 @@ OrtEnv* OrtEnv::GetInstance(const OrtEnv::LoggingManagerConstructionInfo& lm_inf
     std::unique_ptr<LoggingManager> lmgr;
     std::string name = lm_info.logid;
     if (lm_info.logging_function) {
-      std::unique_ptr<ISink> logger = std::make_unique<LoggingWrapper>(lm_info.logging_function,
-                                                                       lm_info.logger_param);
+      std::unique_ptr<ISink> logger = std::make_unique<UserLoggingSink>(lm_info.logging_function,
+                                                                        lm_info.logger_param);
       lmgr = std::make_unique<LoggingManager>(std::move(logger),
                                               static_cast<Severity>(lm_info.default_warning_level),
                                               false,
diff --git a/onnxruntime/core/session/ort_env.h b/onnxruntime/core/session/ort_env.h
index 7d609acb2db5d..444134d0612e9 100644
--- a/onnxruntime/core/session/ort_env.h
+++ b/onnxruntime/core/session/ort_env.h
@@ -5,27 +5,15 @@
 #include <atomic>
 #include <string>
 #include "core/session/onnxruntime_c_api.h"
-#include "core/common/logging/isink.h"
 #include "core/platform/ort_mutex.h"
 #include "core/common/status.h"
+#include "core/common/logging/logging.h"
 #include "core/framework/allocator.h"
 
 namespace onnxruntime {
 class Environment;
 }
 
-class LoggingWrapper : public onnxruntime::logging::ISink {
- public:
-  LoggingWrapper(OrtLoggingFunction logging_function, void* logger_param);
-
-  void SendImpl(const onnxruntime::logging::Timestamp& /*timestamp*/, const std::string& logger_id,
-                const onnxruntime::logging::Capture& message) override;
-
- private:
-  OrtLoggingFunction logging_function_;
-  void* logger_param_;
-};
-
 struct OrtEnv {
  public:
   struct LoggingManagerConstructionInfo {
diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index bf7a3bbd9d380..df4dd55417755 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -108,6 +108,8 @@ namespace onnxruntime {
 
 ProviderInfo_CUDA* TryGetProviderInfo_CUDA();
 ProviderInfo_CUDA& GetProviderInfo_CUDA();
+ProviderInfo_TensorRT* TryGetProviderInfo_TensorRT();
+ProviderInfo_TensorRT& GetProviderInfo_TensorRT();
 ProviderInfo_CANN* TryGetProviderInfo_CANN();
 ProviderInfo_CANN& GetProviderInfo_CANN();
 ProviderInfo_Dnnl* TryGetProviderInfo_Dnnl();
@@ -1330,8 +1332,10 @@ OrtCUDAProviderOptionsV2 OrtCUDAProviderOptionsToOrtCUDAProviderOptionsV2(const
   // Use default value as this field is not available in OrtCUDAProviderOptions
   cuda_options_converted.cudnn_conv_use_max_workspace = 1;
   cuda_options_converted.enable_cuda_graph = 0;
+  cuda_options_converted.prefer_nhwc = 0;
   cuda_options_converted.cudnn_conv1d_pad_to_nc1d = 0;
   cuda_options_converted.enable_skip_layer_norm_strict_mode = 0;
+  cuda_options_converted.use_ep_level_unified_stream = 0;
 
   return cuda_options_converted;
 }
@@ -1418,10 +1422,6 @@ std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Creat
   return s_library_tensorrt.Get().CreateExecutionProviderFactory(provider_options);
 }
 
-void TensorrtProviderGetCustomOpDomainList(IExecutionProviderFactory* factory, std::vector<OrtCustomOpDomain*>& custom_op_domains_ptr) {
-  s_library_tensorrt.Get().GetCustomOpDomainList(factory, custom_op_domains_ptr);
-}
-
 std::shared_ptr<IExecutionProviderFactory> MIGraphXProviderFactoryCreator::Create(const OrtMIGraphXProviderOptions* provider_options) {
   return s_library_migraphx.Get().CreateExecutionProviderFactory(provider_options);
 }
@@ -1432,7 +1432,7 @@ ProviderOptions OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(const O
   if (legacy_ov_options->device_type != nullptr)
     ov_options_converted_map["device_type"] = legacy_ov_options->device_type;
 
-  ov_options_converted_map["enable_vpu_fast_compile"] = legacy_ov_options->enable_vpu_fast_compile;
+  ov_options_converted_map["enable_npu_fast_compile"] = legacy_ov_options->enable_npu_fast_compile;
 
   if (legacy_ov_options->device_id != nullptr)
     ov_options_converted_map["device_id"] = legacy_ov_options->device_id;
@@ -1474,6 +1474,20 @@ ProviderInfo_OpenVINO* GetProviderInfo_OpenVINO() {
   return reinterpret_cast<ProviderInfo_OpenVINO*>(s_library_openvino.Get().GetInfo());
 }
 
+ProviderInfo_TensorRT* TryGetProviderInfo_TensorRT() try {
+  return reinterpret_cast<ProviderInfo_TensorRT*>(s_library_tensorrt.Get().GetInfo());
+} catch (const std::exception& exception) {
+  LOGS_DEFAULT(ERROR) << exception.what();
+  return nullptr;
+}
+
+ProviderInfo_TensorRT& GetProviderInfo_TensorRT() {
+  if (auto* info = TryGetProviderInfo_TensorRT())
+    return *info;
+
+  ORT_THROW("TensorRT Provider not available, can't get interface for it");
+}
+
 ProviderInfo_CUDA* TryGetProviderInfo_CUDA() try {
   return reinterpret_cast<ProviderInfo_CUDA*>(s_library_cuda.Get().GetInfo());
 } catch (const std::exception& exception) {
@@ -1611,6 +1625,28 @@ ProviderOptions GetProviderInfo_Cuda(const OrtCUDAProviderOptionsV2* provider_op
 
 }  // namespace onnxruntime
 
+void AddTensorRTCustomOpDomainToSessionOption(OrtSessionOptions* options, std::string extra_plugin_lib_paths) {
+  auto is_already_in_domains = [&](std::string& domain_name, std::vector<OrtCustomOpDomain*>& domains) {
+    for (auto ptr : domains) {
+      if (domain_name == ptr->domain_) {
+        return true;
+      }
+    }
+    return false;
+  };
+
+  std::vector<OrtCustomOpDomain*> custom_op_domains;
+  onnxruntime::ProviderInfo_TensorRT& provider_info = onnxruntime::GetProviderInfo_TensorRT();
+  provider_info.GetTensorRTCustomOpDomainList(custom_op_domains, extra_plugin_lib_paths);
+  for (auto ptr : custom_op_domains) {
+    if (!is_already_in_domains(ptr->domain_, options->custom_op_domains_)) {
+      options->custom_op_domains_.push_back(ptr);
+    } else {
+      LOGS_DEFAULT(WARNING) << "The custom op domain name " << ptr->domain_ << " is already in session option.";
+    }
+  }
+}
+
 ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_Dnnl, _In_ OrtSessionOptions* options, int use_arena) {
   API_IMPL_BEGIN
   auto factory = onnxruntime::DnnlProviderFactoryCreator::Create(use_arena);
@@ -1632,11 +1668,8 @@ ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_Tensorrt, _In_ OrtS
 
   options->provider_factories.push_back(factory);
 
-  std::vector<OrtCustomOpDomain*> custom_op_domains;
-  TensorrtProviderGetCustomOpDomainList(factory.get(), custom_op_domains);
-  for (auto ptr : custom_op_domains) {
-    options->custom_op_domains_.push_back(ptr);
-  }
+  std::string extra_plugin_lib_paths = onnxruntime::Env::Default().GetEnvironmentVar("trt_extra_plugin_lib_paths");
+  AddTensorRTCustomOpDomainToSessionOption(options, extra_plugin_lib_paths);
 
   return nullptr;
   API_IMPL_END
@@ -1663,11 +1696,7 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In
 
   options->provider_factories.push_back(factory);
 
-  std::vector<OrtCustomOpDomain*> custom_op_domains;
-  TensorrtProviderGetCustomOpDomainList(factory.get(), custom_op_domains);
-  for (auto ptr : custom_op_domains) {
-    options->custom_op_domains_.push_back(ptr);
-  }
+  AddTensorRTCustomOpDomainToSessionOption(options, "");
 
   return nullptr;
   API_IMPL_END
@@ -1771,11 +1800,9 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT_V2,
 
   options->provider_factories.push_back(factory);
 
-  std::vector<OrtCustomOpDomain*> custom_op_domains;
-  TensorrtProviderGetCustomOpDomainList(factory.get(), custom_op_domains);
-  for (auto ptr : custom_op_domains) {
-    options->custom_op_domains_.push_back(ptr);
-  }
+  std::string extra_plugin_lib_paths = (tensorrt_options == nullptr || tensorrt_options->trt_extra_plugin_lib_paths == nullptr) ? "" : tensorrt_options->trt_extra_plugin_lib_paths;
+  AddTensorRTCustomOpDomainToSessionOption(options, extra_plugin_lib_paths);
+
   return nullptr;
   API_IMPL_END
 }
@@ -1784,34 +1811,6 @@ ORT_API_STATUS_IMPL(OrtApis::CreateTensorRTProviderOptions, _Outptr_ OrtTensorRT
   API_IMPL_BEGIN
 #ifdef USE_TENSORRT
   auto options = std::make_unique<OrtTensorRTProviderOptionsV2>();
-  options->device_id = 0;
-  options->has_user_compute_stream = 0;
-  options->user_compute_stream = nullptr;
-  options->trt_max_partition_iterations = 1000;
-  options->trt_min_subgraph_size = 1;
-  options->trt_max_workspace_size = 1 << 30;
-  options->trt_fp16_enable = false;
-  options->trt_int8_enable = false;
-  options->trt_int8_calibration_table_name = nullptr;
-  options->trt_int8_use_native_calibration_table = false;
-  options->trt_dla_enable = false;
-  options->trt_dla_core = false;
-  options->trt_dump_subgraphs = false;
-  options->trt_engine_cache_enable = false;
-  options->trt_engine_cache_path = nullptr;
-  options->trt_engine_decryption_enable = false;
-  options->trt_engine_decryption_lib_path = nullptr;
-  options->trt_force_sequential_engine_build = false;
-  options->trt_context_memory_sharing_enable = false;
-  options->trt_layer_norm_fp32_fallback = false;
-  options->trt_timing_cache_enable = false;
-  options->trt_force_timing_cache = false;
-  options->trt_detailed_build_log = false;
-  options->trt_extra_plugin_lib_paths = nullptr;
-  options->trt_profile_min_shapes = nullptr;
-  options->trt_profile_max_shapes = nullptr;
-  options->trt_profile_opt_shapes = nullptr;
-  options->trt_cuda_graph_enable = false;
   *out = options.release();
   return nullptr;
 #else
@@ -1932,6 +1931,7 @@ ORT_API(void, OrtApis::ReleaseTensorRTProviderOptions, _Frees_ptr_opt_ OrtTensor
   if (ptr != nullptr) {
     delete[] ptr->trt_int8_calibration_table_name;
     delete[] ptr->trt_engine_cache_path;
+    delete[] ptr->trt_timing_cache_path;
     delete[] ptr->trt_engine_decryption_lib_path;
     delete[] ptr->trt_tactic_sources;
     delete[] ptr->trt_extra_plugin_lib_paths;
diff --git a/onnxruntime/core/session/provider_registration.cc b/onnxruntime/core/session/provider_registration.cc
index 9326c6eaff240..cb51a0c460d9a 100644
--- a/onnxruntime/core/session/provider_registration.cc
+++ b/onnxruntime/core/session/provider_registration.cc
@@ -12,6 +12,10 @@
 #include "core/session/ort_apis.h"
 #include "core/providers/openvino/openvino_provider_factory_creator.h"
 
+#if defined(USE_DML)
+#include "core/providers/dml/dml_provider_factory_creator.h"
+#endif
+
 using namespace onnxruntime;
 
 namespace {
@@ -67,7 +71,13 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider,
                                  (std::string(provider_name) + " execution provider is not supported in this build. ").c_str());
   };
 
-  if (strcmp(provider_name, "QNN") == 0) {
+  if (strcmp(provider_name, "DML") == 0) {
+#if defined(USE_DML)
+    options->provider_factories.push_back(DMLProviderFactoryCreator::CreateFromProviderOptions(provider_options));
+#else
+    status = create_not_supported_status();
+#endif
+  } else if (strcmp(provider_name, "QNN") == 0) {
 #if defined(USE_QNN)
     options->provider_factories.push_back(QNNProviderFactoryCreator::Create(provider_options, &(options->value)));
 #else
@@ -94,8 +104,10 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider,
   } else if (strcmp(provider_name, "WEBNN") == 0) {
 #if defined(USE_WEBNN)
     std::string deviceType = options->value.config_options.GetConfigOrDefault("deviceType", "cpu");
+    std::string numThreads = options->value.config_options.GetConfigOrDefault("numThreads", "0");
     std::string powerPreference = options->value.config_options.GetConfigOrDefault("powerPreference", "default");
     provider_options["deviceType"] = deviceType;
+    provider_options["numThreads"] = numThreads;
     provider_options["powerPreference"] = powerPreference;
     options->provider_factories.push_back(WebNNProviderFactoryCreator::Create(provider_options));
 #else
@@ -109,6 +121,10 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider,
 #endif
   } else if (strcmp(provider_name, "JS") == 0) {
 #if defined(USE_JSEP)
+    std::string preferred_layout;
+    if (options->value.config_options.TryGetConfigEntry("preferredLayout", preferred_layout)) {
+      provider_options["preferred_layout"] = preferred_layout;
+    }
     options->provider_factories.push_back(JsProviderFactoryCreator::Create(provider_options));
 #else
     status = create_not_supported_status();
diff --git a/onnxruntime/core/session/user_logging_sink.h b/onnxruntime/core/session/user_logging_sink.h
new file mode 100644
index 0000000000000..5a9ceb21d6500
--- /dev/null
+++ b/onnxruntime/core/session/user_logging_sink.h
@@ -0,0 +1,28 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+#include <string>
+
+#include "core/session/onnxruntime_c_api.h"
+#include "core/common/logging/isink.h"
+
+namespace onnxruntime {
+class UserLoggingSink : public onnxruntime::logging::ISink {
+ public:
+  UserLoggingSink(OrtLoggingFunction logging_function, void* logger_param)
+      : logging_function_(logging_function), logger_param_(logger_param) {
+  }
+
+  void SendImpl(const onnxruntime::logging::Timestamp& /*timestamp*/, const std::string& logger_id,
+                const onnxruntime::logging::Capture& message) override {
+    std::string s = message.Location().ToString();
+    logging_function_(logger_param_, static_cast<OrtLoggingLevel>(message.Severity()), message.Category(),
+                      logger_id.c_str(), s.c_str(), message.Message().c_str());
+  }
+
+ private:
+  OrtLoggingFunction logging_function_{};
+  void* logger_param_{};
+};
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/util/matrix_layout.h b/onnxruntime/core/util/matrix_layout.h
new file mode 100644
index 0000000000000..a0405e32034ae
--- /dev/null
+++ b/onnxruntime/core/util/matrix_layout.h
@@ -0,0 +1,475 @@
+/**
+ * Copyright (c) Microsoft Corporation. All rights reserved.
+ * Licensed under the MIT License.
+ *
+ * Module Name:
+ *    matrix_layout.h
+ *
+ * Abstract:
+ *   Utils for simplifying positioning and striding in tensors. Inspired
+ *   by CUTLASS, striving for 0 runtime cost while promote safety.
+ *
+ *   Only supports 2D tensors (matrix) for now.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include "core/common/gsl.h"
+
+// TODO!! Already have this in cuda, what about cpu code though?
+#if defined(_MSC_VER)
+#define ORT_FORCEINLINE __forceinline
+#else
+#define ORT_FORCEINLINE __attribute__((always_inline)) inline
+#endif
+
+namespace onnxruntime {
+
+//
+// Clang-format doesn't handle force inline decorator well, it insists on
+// adding extra indentation to the next line, making it very confusing
+// to read. So we turn it off for this file.
+// clang-format off
+//
+
+/**
+ * @brief A tuple of integers to represent tensor coordinates
+ */
+template <
+    int Rank_,                     ///< Logical rank of coordinate
+    typename Index_ = int,         ///< Index type used for each dimension
+    typename LongIndex_ = int64_t  ///< Long index type used for linear offsets
+    >
+struct Position {
+ public:
+  /// Number of elements in Position
+  static int const kRank = Rank_;
+
+  /// Index type used to store elements
+  using Index = Index_;
+
+  /// Type used to represent linear offsets
+  using LongIndex = LongIndex_;
+
+ private:
+  Index idx[kRank];
+
+ public:
+  ORT_FORCEINLINE explicit Position(Index value = Index(0)) {
+    for (int i = 0; i < kRank; ++i) {
+      idx[i] = value;
+    }
+  }
+
+  /// Constructs from an array of integers
+  ORT_FORCEINLINE
+  Position(Index const (&_idx)[kRank]) {
+    for (int i = 0; i < kRank; ++i) {
+      idx[i] = _idx[i];
+    }
+  }
+
+  template <int R, typename I, typename L>
+  ORT_FORCEINLINE
+  Position(Position<R, I, L> other) {
+    for (int i = 0; i < kRank; ++i) {
+      idx[i] = other[i];
+    }
+  }
+
+  ORT_FORCEINLINE
+  Position operator+(Position const& b) const {
+    Position c;
+    for (int i = 0; i < kRank; ++i) {
+      c.idx[i] = idx[i] + b.idx[i];
+    }
+    return c;
+  }
+
+  ORT_FORCEINLINE
+  Position operator-(Position const& b) const {
+    Position c;
+    for (int i = 0; i < kRank; ++i) {
+      c.idx[i] = idx[i] - b.idx[i];
+    }
+    return c;
+  }
+
+  ORT_FORCEINLINE
+  Position operator*(Position const& b) const {
+    Position c;
+    for (int i = 0; i < kRank; ++i) {
+      c.idx[i] = idx[i] * b.idx[i];
+    }
+    return c;
+  }
+
+  ORT_FORCEINLINE
+  Position operator/(Position const& b) const {
+    Position c;
+    for (int i = 0; i < kRank; ++i) {
+      c.idx[i] = idx[i] / b.idx[i];
+    }
+    return c;
+  }
+
+  ORT_FORCEINLINE
+  Position& operator+=(Position const& b) {
+    for (int i = 0; i < kRank; ++i) {
+      idx[i] += b.idx[i];
+    }
+    return *this;
+  }
+
+  ORT_FORCEINLINE
+  Position& operator-=(Position const& b) {
+    for (int i = 0; i < kRank; ++i) {
+      idx[i] -= b.idx[i];
+    }
+    return *this;
+  }
+
+  ORT_FORCEINLINE
+  Position& operator*=(Position const& b) {
+    for (int i = 0; i < kRank; ++i) {
+      idx[i] *= b.idx[i];
+    }
+    return *this;
+  }
+
+  ORT_FORCEINLINE
+  Position& operator/=(Position const& b) {
+    for (int i = 0; i < kRank; ++i) {
+      idx[i] /= b.idx[i];
+    }
+    return *this;
+  }
+
+  ORT_FORCEINLINE Index& operator[](int dim) { return idx[dim]; }
+
+  ORT_FORCEINLINE Index const& operator[](int dim) const { return idx[dim]; }
+
+  ORT_FORCEINLINE bool operator==(Position const& b) const {
+    bool equal = true;
+    for (int i = 0; equal && i < kRank; ++i) {
+      equal = (idx[i] == b.idx[i]);
+    }
+    return equal;
+  }
+
+  ORT_FORCEINLINE bool operator!=(Position const& b) const { return !(*this == b); }
+
+  ORT_FORCEINLINE
+  Position& clamp(Position const& max, Position const& min = Position()) {
+    for (int i = 0; i < kRank; ++i) {
+      idx[i] = std::max(std::min(idx[i], max.idx[i]), min.idx[i]);
+    }
+    return *this;
+  }
+
+  ORT_FORCEINLINE
+  Index sum() const {
+    Index sum_(idx[0]);
+    for (int i = 1; i < kRank; ++i) {
+      sum_ += idx[i];
+    }
+    return sum_;
+  }
+
+  ORT_FORCEINLINE
+  LongIndex product() const {
+    LongIndex product_(idx[0]);
+    for (int i = 1; i < kRank; ++i) {
+      product_ *= idx[i];
+    }
+    return product_;
+  }
+};
+
+template <typename T, typename L = int64_t>
+Position<2, T, L> make_Position(T _0, T _1) {
+  T values[2] = {_0, _1};
+  return Position<2, T, L>(values);
+}
+
+template <typename T, typename L = int64_t>
+Position<3, T, L> make_Position(T _0, T _1, T _2) {
+  T values[3] = {_0, _1, _2};
+  return Position<2, T, L>(values);
+}
+
+/// Describes the size of a matrix tile
+template <
+    int Row_,    ///< rows of a matrix
+    int Column_  ///< columns of a matrix
+    >
+struct MatrixShape {
+  static int const kRow = Row_;              ///< rows of a matrix
+  static int const kColumn = Column_;        ///< columns of a matrix
+  static int const kCount = Row_ * Column_;  ///< total number of elements in a matrix
+
+  ORT_FORCEINLINE static Position<2> toCoord() {
+    return make_Position(kRow, kColumn);
+  }
+};
+
+/**
+ * @brief Defines a mapping from logical coordinate to linear memory
+ * offsets in a row major layout matrix
+ */
+class RowMajorLayout {
+ public:
+  /// Index type used for coordinates
+  using Index = int;
+
+  /// Long index type used for offsets
+  using LongIndex = int64_t;
+
+  /// Logical coordinate
+  using MatCoord = Position<2, Index, LongIndex>;
+
+ private:
+  Index stride_;
+
+ public:
+  ORT_FORCEINLINE
+  RowMajorLayout(Index ldm = 0) : stride_(ldm) {}
+
+  ORT_FORCEINLINE static RowMajorLayout packed(MatCoord const& extent) {
+    return RowMajorLayout(extent[1]);
+  }
+
+  /// Returns the offset of a coordinate in linear memory.
+  /// Assumes coordinate has convention (row, column)
+  ORT_FORCEINLINE
+  LongIndex operator()(MatCoord const& coord) const {
+    return LongIndex(coord[0]) * stride_ + coord[1];
+  }
+
+  /// Inverse of layout function, mapping linear offset to logical coordinate
+  ORT_FORCEINLINE
+  MatCoord inverse(LongIndex offset) const {
+    return make_Position(Index(offset / stride_), Index(offset % stride_));
+  }
+
+  ORT_FORCEINLINE
+  Index stride() const {
+    return stride_;
+  }
+};
+
+class ColumnMajorLayout {
+ public:
+  /// Index type used for coordinates
+  using Index = int;
+
+  /// Long index type used for offsets
+  using LongIndex = int64_t;
+
+  /// Logical coordinate
+  using MatCoord = Position<2, Index, LongIndex>;
+
+ private:
+  Index stride_;
+
+ public:
+  ORT_FORCEINLINE
+  ColumnMajorLayout(Index ldm = 0) : stride_(ldm) {}
+
+  ORT_FORCEINLINE static ColumnMajorLayout packed(MatCoord const& extent) {
+    return ColumnMajorLayout(extent[0]);
+  }
+
+  /// Returns the offset of a coordinate in linear memory.
+  /// Assumes coordinate has convention (row, column)
+  ORT_FORCEINLINE
+  LongIndex operator()(MatCoord const& coord) const {
+    return LongIndex(coord[1]) * LongIndex(stride_) + coord[0];
+  }
+
+  /// Inverse of layout function, mapping linear offset to logical coordinate
+  ORT_FORCEINLINE
+  MatCoord inverse(LongIndex offset) const {
+    return make_Position(Index(offset % stride_), Index(offset / stride_));
+  }
+
+  ORT_FORCEINLINE
+  Index stride() const {
+    return stride_;
+  }
+};
+
+/**
+ * @brief A reference to a tensor, with a layout object to map logical
+ * coordinates to linear offsets.
+ */
+template <
+    /// Data type of element stored within tensor, must be numerical types
+    typename Element_,
+    /// Defines a mapping from logical coordinate to linear memory offsets
+    typename Layout_,
+    /// If true, extra bounds checking is performed on all accesses
+    bool ExtraBoundsCheck_ = false>
+class MatrixRef {
+ public:
+  /// Data type of individual access
+  using Element = Element_;
+
+  using Reference = Element&;
+
+  /// Mapping function from logical coordinate to linear memory
+  using Layout = Layout_;
+
+  /// Index type
+  using Index = typename Layout::Index;
+
+  /// Long index used for pointer offsets
+  using LongIndex = typename Layout::LongIndex;
+
+  /// Coordinate in logical tensor space
+  using MatCoord = typename Layout::MatCoord;
+
+  /// MatrixRef to constant data
+  using ConstMatrixRef = MatrixRef<
+      typename std::remove_const<Element>::type const,
+      Layout, ExtraBoundsCheck_>;
+
+  /// MatrixRef to non-constant data
+  using NonConstMatrixRef = MatrixRef<
+      typename std::remove_const<Element>::type,
+      Layout, ExtraBoundsCheck_>;
+
+  static constexpr bool IsNonConstRef = std::is_same<NonConstMatrixRef, MatrixRef<Element_, Layout_>>::value;
+
+ private:
+  /// Pointer to data
+  gsl::span<Element> data_;
+
+  /// Shape of matrix
+  MatCoord shape_;
+
+  /// Layout object maps logical coordinates to linear offsets
+  Layout layout_;
+
+ public:
+  ORT_FORCEINLINE
+  MatrixRef() : data_() {}
+
+  ORT_FORCEINLINE
+  MatrixRef(
+      gsl::span<Element> const& data,  ///< pointer to start of tensor
+      MatCoord const& shape            ///< shape of tensor
+      ) : data_(data), shape_(shape), layout_(Layout::packed(shape)) {
+    Expects(data_.size() >= size_t(shape_.product()));
+  }
+
+  ORT_FORCEINLINE
+  MatrixRef(
+      Element* ptr,          ///< pointer to start of tensor
+      LongIndex size,        ///< size of tensor in elements
+      MatCoord const& shape  ///< shape of tensor
+      ) : data_(ptr, size), shape_(shape), layout_(Layout::packed(shape)) {
+    Expects(data_.size() >= shape_.product());
+  }
+
+  /// Converting constructor from MatrixRef to non-constant data.
+  template <typename _Magic = int>
+  ORT_FORCEINLINE
+  MatrixRef(
+      NonConstMatrixRef const& ref,  ///< MatrixRef to non-const data
+      /// SFINAE trick to avoid creating a copy-constructor when Element_ is already non-const
+      _Magic magic = (typename std::enable_if<!IsNonConstRef, _Magic>::type)0
+      ) : data_(ref.data()), shape_(ref.shape()), layout_(Layout::packed(ref.shape())) {}
+
+  ORT_FORCEINLINE
+  ConstMatrixRef const_ref() const {
+    return ConstMatrixRef(data_, shape_);
+  }
+
+  ORT_FORCEINLINE
+  NonConstMatrixRef non_const_ref() {
+    return NonConstMatrixRef(
+        const_cast<typename std::remove_const<Element>::type*>(data_.data()),
+        data_.size(), shape_);
+  }
+
+  /// Returns true if the MatrixRef is non-null
+  ORT_FORCEINLINE
+  bool good() const { return !data_.empty(); }
+
+  ORT_FORCEINLINE
+  gsl::span<Element> const& data() const { return data_; }
+
+  ORT_FORCEINLINE
+  MatCoord const& shape() const { return shape_; }
+
+  ORT_FORCEINLINE
+  Layout& layout() { return layout_; }
+
+  ORT_FORCEINLINE
+  Layout layout() const { return layout_; }
+
+  ORT_FORCEINLINE
+  Index stride() const { return layout_.stride(); }
+
+  ORT_FORCEINLINE
+  Index& stride() { return layout_.stride(); }
+
+  /// Computes the offset of an index from the origin of the tensor
+  ORT_FORCEINLINE
+  LongIndex offset(MatCoord const& coord) const {
+    if constexpr (ExtraBoundsCheck_) {
+      Expects(coord[0] >= 0 && coord[0] < shape_[0]);
+      Expects(coord[1] >= 0 && coord[1] < shape_[1]);
+    }
+    return layout_(coord);
+  }
+
+  /// Returns a reference to the element at a given Coord
+  ORT_FORCEINLINE
+  Reference at(MatCoord const& coord) const {
+    return data_[offset(coord)];
+  }
+
+  ORT_FORCEINLINE
+  Reference at(int row, int col) const {
+    return data_[offset(make_Position(row, col))];
+  }
+
+  /// Returns a reference to the element at a given Coord
+  ORT_FORCEINLINE
+  Reference operator[](MatCoord const& coord) const {
+    return data_[offset(coord)];
+  }
+};
+
+/// Constructs a MatrixRef, deducing types from arguments.
+template <
+    typename Element,
+    typename Layout = RowMajorLayout,
+    bool ExtraBoundsCheck = false>
+ORT_FORCEINLINE
+MatrixRef<Element, Layout, ExtraBoundsCheck>
+make_MatrixRef(
+    Element* ptr,
+    int64_t size,
+    typename Layout::MatCoord const& shape) {
+  return MatrixRef<Element, Layout, ExtraBoundsCheck>(ptr, size, shape);
+}
+
+template <
+    typename Element,
+    typename Layout = RowMajorLayout,
+    bool ExtraBoundsCheck = false>
+ORT_FORCEINLINE
+MatrixRef<Element, Layout, ExtraBoundsCheck>
+make_MatrixRef(
+    const gsl::span<Element>& span,
+    typename Layout::MatCoord const& shape) {
+  return MatrixRef<Element, Layout, ExtraBoundsCheck>(span, shape);
+}
+
+// clang-format off
+
+}  // namespace onnxruntime
diff --git a/onnxruntime/python/backend/backend.py b/onnxruntime/python/backend/backend.py
index 1edae383e93e6..97b7358f2a223 100644
--- a/onnxruntime/python/backend/backend.py
+++ b/onnxruntime/python/backend/backend.py
@@ -63,7 +63,7 @@ def is_opset_supported(cls, model):
                         error_message = (
                             "Skipping this test as only released onnx opsets are supported."
                             "To run this test set env variable ALLOW_RELEASED_ONNX_OPSET_ONLY to 0."
-                            " Got Domain '{}' version '{}'.".format(domain, opset.version)
+                            f" Got Domain '{domain}' version '{opset.version}'."
                         )
                         return False, error_message
                 except AttributeError:
@@ -74,7 +74,7 @@ def is_opset_supported(cls, model):
                         error_message = (
                             "Skipping this test as only released onnx opsets are supported."
                             "To run this test set env variable ALLOW_RELEASED_ONNX_OPSET_ONLY to 0."
-                            " Got Domain '{}' version '{}'.".format(domain, opset.version)
+                            f" Got Domain '{domain}' version '{opset.version}'."
                         )
                         return False, error_message
         return True, ""
diff --git a/onnxruntime/python/onnxruntime_inference_collection.py b/onnxruntime/python/onnxruntime_inference_collection.py
index 4124822adef1f..1a3e22142f80e 100644
--- a/onnxruntime/python/onnxruntime_inference_collection.py
+++ b/onnxruntime/python/onnxruntime_inference_collection.py
@@ -438,7 +438,7 @@ def _create_inference_session(self, providers, provider_options, disabled_optimi
 
         # Tensorrt can fall back to CUDA if it's explicitly assigned. All others fall back to CPU.
         if "TensorrtExecutionProvider" in available_providers:
-            if any(
+            if providers and any(
                 provider == "CUDAExecutionProvider"
                 or (isinstance(provider, tuple) and provider[0] == "CUDAExecutionProvider")
                 for provider in providers
@@ -448,7 +448,7 @@ def _create_inference_session(self, providers, provider_options, disabled_optimi
                 self._fallback_providers = ["CPUExecutionProvider"]
         # MIGraphX can fall back to ROCM if it's explicitly assigned. All others fall back to CPU.
         elif "MIGraphXExecutionProvider" in available_providers:
-            if any(
+            if providers and any(
                 provider == "ROCMExecutionProvider"
                 or (isinstance(provider, tuple) and provider[0] == "ROCMExecutionProvider")
                 for provider in providers
@@ -463,16 +463,11 @@ def _create_inference_session(self, providers, provider_options, disabled_optimi
         providers, provider_options = check_and_normalize_provider_args(
             providers, provider_options, available_providers
         )
-        if not providers and len(available_providers) > 1:
-            self.disable_fallback()
-            raise ValueError(
-                f"This ORT build has {available_providers} enabled. "
-                "Since ORT 1.9, you are required to explicitly set "
-                "the providers parameter when instantiating InferenceSession. For example, "
-                f"onnxruntime.InferenceSession(..., providers={available_providers}, ...)"
-            )
 
         session_options = self._sess_options if self._sess_options else C.get_default_session_options()
+
+        self._register_ep_custom_ops(session_options, providers, provider_options)
+
         if self._model_path:
             sess = C.InferenceSession(session_options, self._model_path, True, self._read_config_from_model)
         else:
@@ -515,6 +510,13 @@ def _reset_session(self, providers, provider_options):
         self._sess_options = self._sess_options_initial
         self._create_inference_session(providers, provider_options)
 
+    def _register_ep_custom_ops(self, session_options, providers, provider_options):
+        for i in range(len(providers)):
+            if providers[i] == "TensorrtExecutionProvider":
+                C.register_tensorrt_plugins_as_custom_ops(session_options, provider_options[i])
+            elif isinstance(providers[i], tuple) and providers[i][0] == "TensorrtExecutionProvider":
+                C.register_tensorrt_plugins_as_custom_ops(session_options, providers[i][1])
+
 
 class IOBinding:
     """
diff --git a/onnxruntime/python/onnxruntime_pybind_iobinding.cc b/onnxruntime/python/onnxruntime_pybind_iobinding.cc
index 7638a12bb820c..59d5a77bfbea3 100644
--- a/onnxruntime/python/onnxruntime_pybind_iobinding.cc
+++ b/onnxruntime/python/onnxruntime_pybind_iobinding.cc
@@ -60,8 +60,6 @@ void addIoBindingMethods(pybind11::module& m) {
       })
       // This binds input as a Tensor that wraps memory pointer along with the OrtMemoryInfo
       .def("bind_input", [](SessionIOBinding* io_binding, const std::string& name, const OrtDevice& device, py::object& element_type, const std::vector<int64_t>& shape, int64_t data_ptr) -> void {
-        ORT_ENFORCE(data_ptr != 0, "Pointer to data memory is not valid");
-
         PyArray_Descr* dtype;
         if (!PyArray_DescrConverter(element_type.ptr(), &dtype)) {
           throw std::runtime_error("Not a valid numpy type");
diff --git a/onnxruntime/python/onnxruntime_pybind_module.cc b/onnxruntime/python/onnxruntime_pybind_module.cc
index f320707697c9e..1d8ca195ab82b 100644
--- a/onnxruntime/python/onnxruntime_pybind_module.cc
+++ b/onnxruntime/python/onnxruntime_pybind_module.cc
@@ -10,7 +10,14 @@ namespace onnxruntime {
 namespace python {
 namespace py = pybind11;
 
+#if defined(USE_MPI) && defined(ORT_USE_NCCL)
+static constexpr bool HAS_COLLECTIVE_OPS = true;
+#else
+static constexpr bool HAS_COLLECTIVE_OPS = false;
+#endif
+
 void CreateInferencePybindStateModule(py::module& m);
+void CreateQuantPybindModule(py::module& m);
 
 PYBIND11_MODULE(onnxruntime_pybind11_state, m) {
   CreateInferencePybindStateModule(m);
@@ -23,6 +30,8 @@ PYBIND11_MODULE(onnxruntime_pybind11_state, m) {
 
   m.def("get_version_string", []() -> std::string { return ORT_VERSION; });
   m.def("get_build_info", []() -> std::string { return ORT_BUILD_INFO; });
+  m.def("has_collective_ops", []() -> bool { return HAS_COLLECTIVE_OPS; });
+  CreateQuantPybindModule(m);
 }
 }  // namespace python
 }  // namespace onnxruntime
diff --git a/onnxruntime/python/onnxruntime_pybind_quant.cc b/onnxruntime/python/onnxruntime_pybind_quant.cc
new file mode 100644
index 0000000000000..ff76887e917cd
--- /dev/null
+++ b/onnxruntime/python/onnxruntime_pybind_quant.cc
@@ -0,0 +1,105 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include <pybind11/pybind11.h>
+#include <pybind11/numpy.h>
+#include <pybind11/functional.h>
+
+#include "core/mlas/inc/mlas_q4.h"
+#include "contrib_ops/cpu/quantization/dequantize_blockwise_bnb4.h"
+#include "core/util/thread_utils.h"
+
+namespace pybind11 {
+namespace detail {
+// python3 -c 'import numpy as np; print(np.dtype(np.float16).num)'
+constexpr int NPY_FLOAT16 = 23;
+template <>
+struct npy_format_descriptor<onnxruntime::MLFloat16> {
+  static constexpr auto name = _("float16");
+  static pybind11::dtype dtype() {
+    handle ptr = npy_api::get().PyArray_DescrFromType_(NPY_FLOAT16);
+    return reinterpret_borrow<pybind11::dtype>(ptr);
+  }
+  static std::string format() {
+    // following: https://docs.python.org/3/library/struct.html#format-characters
+    return "e";
+  }
+};
+}  // namespace detail
+}  // namespace pybind11
+
+namespace onnxruntime {
+namespace python {
+
+namespace py = pybind11;
+using namespace onnxruntime;
+
+template <typename T>
+void QuantizeMatMul4BitsBlockwise(
+    py::array_t<uint8_t> dst,          // shape: [ N, block_per_K, block_blob_size ]
+    py::array_t<T> src,                // shape: [K, N]
+    py::array_t<T> scale,              // shape: [N, block_per_K]
+    py::array_t<uint8_t> zero_points,  // shape: [N, block_per_K] if bits > 4 else [N, (block_per_K + 1) / 2]
+    int32_t block_size,
+    int32_t N,
+    int32_t K,
+    bool is_symmetric) {
+  OrtThreadPoolParams to;
+  auto tp = concurrency::CreateThreadPool(&onnxruntime::Env::Default(), to,
+                                          concurrency::ThreadPoolType::INTRA_OP);
+
+  py::buffer_info dst_buf = dst.request();
+  py::buffer_info src_buf = src.request();
+  py::buffer_info scale_buf = scale.request();
+  py::buffer_info zp_buf = zero_points.request();
+
+  MlasQuantizeBlockwise<T, 4>(
+      reinterpret_cast<uint8_t*>(dst_buf.ptr),
+      reinterpret_cast<T*>(scale_buf.ptr),
+      is_symmetric ? nullptr : reinterpret_cast<uint8_t*>(zp_buf.ptr),
+      reinterpret_cast<const T*>(src_buf.ptr),
+      block_size,
+      true,
+      K,
+      N,
+      N,
+      tp.get());
+}
+
+template <typename T>
+void QuantizeMatMulBnb4Blockwise(
+    py::array_t<uint8_t> dst,
+    py::array_t<T> src,
+    py::array_t<T> absmax,
+    int32_t block_size,
+    int32_t quant_type,
+    int32_t N,
+    int32_t K) {
+  OrtThreadPoolParams to;
+  auto tp = concurrency::CreateThreadPool(&onnxruntime::Env::Default(), to,
+                                          concurrency::ThreadPoolType::INTRA_OP);
+
+  py::buffer_info dst_buf = dst.request();
+  py::buffer_info src_buf = src.request();
+  py::buffer_info absmax_buf = absmax.request();
+
+  contrib::QuantizeBlockwiseBnb4<T>(
+      static_cast<uint8_t*>(dst_buf.ptr),
+      static_cast<const T*>(src_buf.ptr),
+      static_cast<T*>(absmax_buf.ptr),
+      block_size,
+      quant_type,
+      N,
+      K,
+      tp.get());
+}
+
+void CreateQuantPybindModule(py::module& m) {
+  m.def("quantize_matmul_4bits", &QuantizeMatMul4BitsBlockwise<float>);
+  m.def("quantize_matmul_4bits", &QuantizeMatMul4BitsBlockwise<MLFloat16>);
+  m.def("quantize_matmul_bnb4", &QuantizeMatMulBnb4Blockwise<float>);
+  m.def("quantize_matmul_bnb4", &QuantizeMatMulBnb4Blockwise<MLFloat16>);
+}
+
+}  // namespace python
+}  // namespace onnxruntime
diff --git a/onnxruntime/python/onnxruntime_pybind_schema.cc b/onnxruntime/python/onnxruntime_pybind_schema.cc
index a8c217b0ff1f6..3a977772873f3 100644
--- a/onnxruntime/python/onnxruntime_pybind_schema.cc
+++ b/onnxruntime/python/onnxruntime_pybind_schema.cc
@@ -59,7 +59,7 @@ void addGlobalSchemaFunctions(pybind11::module& m) {
             onnxruntime::ArmNNProviderFactoryCreator::Create(0),
 #endif
 #ifdef USE_DML
-            onnxruntime::DMLProviderFactoryCreator::Create(0, /*skip_software_device_check*/ true),
+            onnxruntime::DMLProviderFactoryCreator::Create(0, false, false, false),
 #endif
 #ifdef USE_NNAPI
             onnxruntime::NnapiProviderFactoryCreator::Create(0, std::optional<std::string>()),
diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
index 907ea0ec41e23..56312898b0d16 100644
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -430,6 +430,38 @@ const ROCMExecutionProviderInfo GetRocmExecutionProviderInfo(ProviderInfo_ROCM*
 }
 #endif
 
+#ifdef USE_TENSORRT
+void RegisterTensorRTPluginsAsCustomOps(PySessionOptions& so, const ProviderOptions& options) {
+  if (auto* tensorrt_provider_info = TryGetProviderInfo_TensorRT()) {
+    auto is_already_in_domains = [&](std::string& domain_name, std::vector<OrtCustomOpDomain*>& domains) {
+      for (auto ptr : domains) {
+        if (domain_name == ptr->domain_) {
+          return true;
+        }
+      }
+      return false;
+    };
+
+    std::string trt_extra_plugin_lib_paths = "";
+    const auto it = options.find("trt_extra_plugin_lib_paths");
+    if (it != options.end()) {
+      trt_extra_plugin_lib_paths = it->second;
+    }
+    std::vector<OrtCustomOpDomain*> domain_list;
+    tensorrt_provider_info->GetTensorRTCustomOpDomainList(domain_list, trt_extra_plugin_lib_paths);
+    for (auto ptr : domain_list) {
+      if (!is_already_in_domains(ptr->domain_, so.custom_op_domains_)) {
+        so.custom_op_domains_.push_back(ptr);
+      } else {
+        LOGS_DEFAULT(WARNING) << "The custom op domain name " << ptr->domain_ << " is already in session option.";
+      }
+    }
+  } else {
+    ORT_THROW("Please install TensorRT libraries as mentioned in the GPU requirements page, make sure they're in the PATH or LD_LIBRARY_PATH, and that your GPU is supported.");
+  }
+}
+#endif
+
 std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
     const SessionOptions& session_options,
     const std::string& type,
@@ -443,43 +475,14 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
     // If the environment variable 'ORT_TENSORRT_UNAVAILABLE' exists, then we do not load TensorRT. This is set by _ld_preload for the manylinux case
     // as in that case, trying to load the library itself will result in a crash due to the way that auditwheel strips dependencies.
     if (Env::Default().GetEnvironmentVar("ORT_TENSORRT_UNAVAILABLE").empty()) {
-      std::string calibration_table, cache_path, lib_path, min_profile, max_profile, opt_profile;
+      // provider_options_map is just a reference to the ProviderOptionsMap instance, so it can be released anytime from application.
+      // So we need these std::string variables defined here as they will be kept alive for the lifetime of TRT EP and we can still access them from OrtTensorRTProviderOptionsV2 instance.
+      // (The reason is string copy is involved, for example params.trt_engine_cache_path = cache_path.c_str() and those std::string variable is referenced by OrtTensorRTProviderOptionsV2 instance
+      // and TRT EP instance, so it won't be released.)
+      std::string calibration_table, cache_path, timing_cache_path, lib_path, trt_tactic_sources, trt_extra_plugin_lib_paths, min_profile, max_profile, opt_profile;
       auto it = provider_options_map.find(type);
       if (it != provider_options_map.end()) {
-        OrtTensorRTProviderOptionsV2 params{
-            0,
-            0,
-            nullptr,
-            1000,
-            1,
-            1 << 30,
-            0,
-            0,
-            nullptr,
-            0,
-            0,
-            0,
-            0,
-            0,
-            nullptr,
-            0,
-            nullptr,
-            0,
-            0,
-            0,
-            0,
-            0,
-            0,
-            0,
-            0,
-            2,
-            -1,
-            nullptr,
-            nullptr,
-            nullptr,
-            nullptr,
-            nullptr,
-            0};
+        OrtTensorRTProviderOptionsV2 params;
         for (auto option : it->second) {
           if (option.first == "device_id") {
             if (!option.second.empty()) {
@@ -620,6 +623,13 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
             } else {
               ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_timing_cache_enable' should be 'True' or 'False'. Default value is 'False'.\n");
             }
+          } else if (option.first == "trt_timing_cache_path") {
+            if (!option.second.empty()) {
+              timing_cache_path = option.second;
+              params.trt_timing_cache_path = timing_cache_path.c_str();
+            } else {
+              ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_timing_cache_path' should be a path string i.e. 'cache_folder/'.\n");
+            }
           } else if (option.first == "trt_force_timing_cache") {
             if (option.second == "True" || option.second == "true") {
               params.trt_force_timing_cache = true;
@@ -666,13 +676,15 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
             }
           } else if (option.first == "trt_tactic_sources") {
             if (!option.second.empty()) {
-              params.trt_tactic_sources = option.second.c_str();
+              trt_tactic_sources = option.second;
+              params.trt_tactic_sources = trt_tactic_sources.c_str();
             } else {
               ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_tactic_sources' should be a string. e.g. \"-CUDNN,+CUBLAS\" available keys: \"CUBLAS\"|\"CUBLAS_LT\"|\"CUDNN\"|\"EDGE_MASK_CONVOLUTIONS\".\n");
             }
           } else if (option.first == "trt_extra_plugin_lib_paths") {
             if (!option.second.empty()) {
-              params.trt_extra_plugin_lib_paths = option.second.c_str();
+              trt_extra_plugin_lib_paths = option.second;
+              params.trt_extra_plugin_lib_paths = trt_extra_plugin_lib_paths.c_str();
             } else {
               ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_extra_plugin_lib_paths' should be a path string.\n");
             }
@@ -718,33 +730,115 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
         }
       }
     }
-    LOGS_DEFAULT(WARNING) << "Failed to create " << type << ". Please reference https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#requirements to ensure all dependencies are met.";
+    LOGS_DEFAULT(WARNING) << "Failed to create "
+                          << type
+                          << ". Please reference "
+                          << "https://onnxruntime.ai/docs/execution-providers/"
+                          << "TensorRT-ExecutionProvider.html#requirements to ensure all dependencies are met.";
 #endif
   } else if (type == kMIGraphXExecutionProvider) {
 #ifdef USE_MIGRAPHX
-    return onnxruntime::MIGraphXProviderFactoryCreator::Create(0)->CreateProvider();
+    std::string calibration_table;
+    auto it = provider_options_map.find(type);
+    if (it != provider_options_map.end()) {
+      OrtMIGraphXProviderOptions params{
+          0,
+          0,
+          0,
+          0,
+          nullptr};
+      for (auto option : it->second) {
+        if (option.first == "device_id") {
+          if (!option.second.empty()) {
+            params.device_id = std::stoi(option.second);
+          } else {
+            ORT_THROW("[ERROR] [MIGraphX] The value for the key 'device_id' should be a number i.e. '0'.\n");
+          }
+        } else if (option.first == "migraphx_fp16_enable") {
+          if (option.second == "True" || option.second == "true") {
+            params.migraphx_fp16_enable = true;
+          } else if (option.second == "False" || option.second == "false") {
+            params.migraphx_fp16_enable = false;
+          } else {
+            ORT_THROW(
+                "[ERROR] [MIGraphX] The value for the key 'trt_fp16_enable' should be"
+                " 'True' or 'False'. Default value is 'False'.\n");
+          }
+        } else if (option.first == "migraphx_int8_enable") {
+          if (option.second == "True" || option.second == "true") {
+            params.migraphx_int8_enable = true;
+          } else if (option.second == "False" || option.second == "false") {
+            params.migraphx_int8_enable = false;
+          } else {
+            ORT_THROW(
+                "[ERROR] [MIGraphX] The value for the key 'migx_int8_enable' should be"
+                " 'True' or 'False'. Default value is 'False'.\n");
+          }
+        } else if (option.first == "migraphx_int8_calibration_table_name") {
+          if (!option.second.empty()) {
+            calibration_table = option.second;
+            params.migraphx_int8_calibration_table_name = calibration_table.c_str();
+          } else {
+            ORT_THROW(
+                "[ERROR] [MIGraphX] The value for the key 'migx_int8_calibration_table_name' should be a "
+                "file name i.e. 'cal_table'.\n");
+          }
+        } else if (option.first == "migraphx_use_native_calibration_table") {
+          if (option.second == "True" || option.second == "true") {
+            params.migraphx_use_native_calibration_table = true;
+          } else if (option.second == "False" || option.second == "false") {
+            params.migraphx_use_native_calibration_table = false;
+          } else {
+            ORT_THROW(
+                "[ERROR] [MIGraphX] The value for the key 'migx_int8_use_native_calibration_table' should be"
+                " 'True' or 'False'. Default value is 'False'.\n");
+          }
+        } else {
+          ORT_THROW("Invalid MIGraphX EP option: ", option.first);
+        }
+      }
+      if (std::shared_ptr<IExecutionProviderFactory> migraphx_provider_factory =
+              onnxruntime::MIGraphXProviderFactoryCreator::Create(&params)) {
+        return migraphx_provider_factory->CreateProvider();
+      }
+    } else {
+      if (std::shared_ptr<IExecutionProviderFactory> migraphx_provider_factory =
+              onnxruntime::MIGraphXProviderFactoryCreator::Create(cuda_device_id)) {
+        return migraphx_provider_factory->CreateProvider();
+      }
+    }
 #endif
   } else if (type == kCudaExecutionProvider) {
 #ifdef USE_CUDA
-    // If the environment variable 'CUDA_UNAVAILABLE' exists, then we do not load cuda. This is set by _ld_preload for the manylinux case
-    // as in that case, trying to load the library itself will result in a crash due to the way that auditwheel strips dependencies.
+    // If the environment variable 'CUDA_UNAVAILABLE' exists, then we do not load cuda.
+    // This is set by _ld_preload for the manylinux case as in that case,
+    // trying to load the library itself will result in a crash due to the way that auditwheel strips dependencies.
     if (Env::Default().GetEnvironmentVar("ORT_CUDA_UNAVAILABLE").empty()) {
       if (auto* cuda_provider_info = TryGetProviderInfo_CUDA()) {
         const CUDAExecutionProviderInfo info = GetCudaExecutionProviderInfo(cuda_provider_info,
                                                                             provider_options_map);
 
-        // This variable is never initialized because the APIs by which it should be initialized are deprecated, however they still
-        // exist are are in-use. Neverthless, it is used to return CUDAAllocator, hence we must try to initialize it here if we can
-        // since FromProviderOptions might contain external CUDA allocator.
+        // This variable is never initialized because the APIs by which it should be initialized are deprecated,
+        // however they still exist are are in-use. Neverthless, it is used to return CUDAAllocator,
+        // hence we must try to initialize it here if we can since FromProviderOptions might contain
+        // external CUDA allocator.
         external_allocator_info = info.external_allocator_info;
         return cuda_provider_info->CreateExecutionProviderFactory(info)->CreateProvider();
       } else {
         if (!Env::Default().GetEnvironmentVar("CUDA_PATH").empty()) {
-          ORT_THROW("CUDA_PATH is set but CUDA wasn't able to be loaded. Please install the correct version of CUDA and cuDNN as mentioned in the GPU requirements page (https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements), make sure they're in the PATH, and that your GPU is supported.");
+          ORT_THROW(
+              "CUDA_PATH is set but CUDA wasnt able to be loaded. Please install the correct version of CUDA and"
+              "cuDNN as mentioned in the GPU requirements page "
+              " (https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements), "
+              " make sure they're in the PATH, and that your GPU is supported.");
         }
       }
     }
-    LOGS_DEFAULT(WARNING) << "Failed to create " << type << ". Please reference https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements to ensure all dependencies are met.";
+    LOGS_DEFAULT(WARNING) << "Failed to create "
+                          << type
+                          << ". Please reference "
+                          << "https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements"
+                          << "to ensure all dependencies are met.";
 #endif
   } else if (type == kRocmExecutionProvider) {
 #ifdef USE_ROCM
@@ -801,10 +895,10 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
         if (option.first == "device_type") {
           OV_provider_options_map[option.first] = option.second;
           continue;
-        } else if (option.first == "enable_vpu_fast_compile") {
+        } else if (option.first == "enable_npu_fast_compile") {
           if (!(option.second == "True" || option.second == "true" ||
                 option.second == "False" || option.second == "false")) {
-            ORT_THROW("Invalid value passed for enable_vpu_fast_compile: ", option.second);
+            ORT_THROW("Invalid value passed for enable_npu_fast_compile: ", option.second);
           }
           OV_provider_options_map[option.first] = option.second;
         } else if (option.first == "enable_opencl_throttling") {
@@ -887,18 +981,10 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
 #endif
   } else if (type == kDmlExecutionProvider) {
 #ifdef USE_DML
-    int device_id = 0;
-    auto it = provider_options_map.find(type);
-    if (it != provider_options_map.end()) {
-      for (auto option : it->second) {
-        if (option.first == "device_id") {
-          if (!option.second.empty()) {
-            device_id = std::stoi(option.second);
-          }
-        }
-      }
-    }
-    return onnxruntime::DMLProviderFactoryCreator::Create(device_id)->CreateProvider();
+    auto cit = provider_options_map.find(type);
+    return onnxruntime::DMLProviderFactoryCreator::CreateFromProviderOptions(
+               cit == provider_options_map.end() ? ProviderOptions{} : cit->second)
+        ->CreateProvider();
 #endif
   } else if (type == kNnapiExecutionProvider) {
 #if defined(USE_NNAPI)
@@ -1209,16 +1295,22 @@ void addGlobalMethods(py::module& m) {
   });
 #endif
 
+#ifdef USE_TENSORRT
+  m.def(
+      "register_tensorrt_plugins_as_custom_ops", [](PySessionOptions& so, const ProviderOptions& options) { RegisterTensorRTPluginsAsCustomOps(so, options); },
+      "Register TensorRT plugins as custom ops.");
+#endif
+
 #ifdef ENABLE_ATEN
   m.def("register_aten_op_executor",
-        [](const std::string& is_tensor_argument_address_str, const std::string& aten_op_executor_address_str) -> void {
-          size_t is_tensor_argument_address_int, aten_op_executor_address_int;
+        [](const std::string& is_cpu_argument_address_str, const std::string& aten_op_executor_address_str) -> void {
+          size_t is_cpu_argument_address_int, aten_op_executor_address_int;
           ORT_THROW_IF_ERROR(
-              ParseStringWithClassicLocale(is_tensor_argument_address_str, is_tensor_argument_address_int));
+              ParseStringWithClassicLocale(is_cpu_argument_address_str, is_cpu_argument_address_int));
           ORT_THROW_IF_ERROR(ParseStringWithClassicLocale(aten_op_executor_address_str, aten_op_executor_address_int));
-          void* p_is_tensor_argument = reinterpret_cast<void*>(is_tensor_argument_address_int);
+          void* p_is_cpu_argument = reinterpret_cast<void*>(is_cpu_argument_address_int);
           void* p_aten_op_executor = reinterpret_cast<void*>(aten_op_executor_address_int);
-          contrib::aten_ops::ATenOperatorExecutor::Instance().Initialize(p_is_tensor_argument, p_aten_op_executor);
+          contrib::aten_ops::ATenOperatorExecutor::Instance().Initialize(p_is_cpu_argument, p_aten_op_executor);
         });
 #endif
 }
diff --git a/onnxruntime/python/onnxruntime_pybind_state_common.h b/onnxruntime/python/onnxruntime_pybind_state_common.h
index 18a9079b5c4f2..a5bcbce89bac6 100644
--- a/onnxruntime/python/onnxruntime_pybind_state_common.h
+++ b/onnxruntime/python/onnxruntime_pybind_state_common.h
@@ -60,11 +60,11 @@ struct OrtStatus {
 #elif OPENVINO_CONFIG_GPU_FP16
 #define BACKEND_OPENVINO "-OPENVINO_GPU_FP16"
 
-#elif OPENVINO_CONFIG_VPUX_FP16
-#define BACKEND_OPENVINO "-OPENVINO_VPUX_FP16"
+#elif OPENVINO_CONFIG_NPU_FP16
+#define BACKEND_OPENVINO "-OPENVINO_NPU_FP16"
 
-#elif OPENVINO_CONFIG_VPUX_U8
-#define BACKEND_OPENVINO "-OPENVINO_VPUX_U8"
+#elif OPENVINO_CONFIG_NPU_U8
+#define BACKEND_OPENVINO "-OPENVINO_NPU_U8"
 
 #elif OPENVINO_CONFIG_MULTI
 #define BACKEND_OPENVINO "-OPENVINO_MULTI"
@@ -180,6 +180,13 @@ extern onnxruntime::ArenaExtendStrategy arena_extend_strategy;
 }  // namespace onnxruntime
 #endif
 
+#ifdef USE_TENSORRT
+namespace onnxruntime {
+ProviderInfo_TensorRT* TryGetProviderInfo_TensorRT();
+ProviderInfo_TensorRT& GetProviderInfo_TensorRT();
+}  // namespace onnxruntime
+#endif
+
 #ifdef USE_CANN
 namespace onnxruntime {
 ProviderInfo_CANN* TryGetProviderInfo_CANN();
diff --git a/onnxruntime/python/tools/kernel_explorer/device_array.h b/onnxruntime/python/tools/kernel_explorer/device_array.h
index bb868c2b7a59a..12c526fa0c813 100644
--- a/onnxruntime/python/tools/kernel_explorer/device_array.h
+++ b/onnxruntime/python/tools/kernel_explorer/device_array.h
@@ -62,8 +62,8 @@ class DeviceArray {
  private:
   std::shared_ptr<void> device_;
   void* host_;
-  ssize_t size_;
-  ssize_t itemsize_;
+  py::ssize_t size_;
+  py::ssize_t itemsize_;
 };
 
 }  // namespace onnxruntime
diff --git a/onnxruntime/python/tools/kernel_explorer/kernels/cuda/dequant_blockwise_bnb4.cu b/onnxruntime/python/tools/kernel_explorer/kernels/cuda/dequant_blockwise_bnb4.cu
new file mode 100644
index 0000000000000..3504ce1bebe8c
--- /dev/null
+++ b/onnxruntime/python/tools/kernel_explorer/kernels/cuda/dequant_blockwise_bnb4.cu
@@ -0,0 +1,89 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// This file serve as a simple example for adding a tunable op to onnxruntime.
+
+#include <cuda_runtime_api.h>
+#include <cuda_fp16.h>
+#include <pybind11/pybind11.h>
+
+#include <string>
+
+#include "core/providers/cuda/tunable/cuda_tunable.h"
+#include "python/tools/kernel_explorer/kernel_explorer_interface.h"
+#include "python/tools/kernel_explorer/device_array.h"
+#include "contrib_ops/cuda/quantization/dequantize_blockwise_bnb4.cuh"
+
+namespace py = pybind11;
+
+namespace onnxruntime {
+
+// Extend the OpParams so that all specializations have the same parameter passing interface
+template <typename T>
+struct DequantizeBnb4Params : cuda::tunable::OpParams {
+  std::string Signature() const override { return std::to_string(n_); }
+
+  int quant_type_;
+  T* output_;
+  const uint8_t* quant_;
+  const T* absmax_;
+  T* quant_map_buffer_;
+  int n_;
+  int k_;
+};
+
+template <typename T>
+class DequantizeBnb4 : public IKernelExplorer {
+ public:
+  DequantizeBnb4(
+      int quant_type,
+      DeviceArray& output,
+      DeviceArray& quant,
+      DeviceArray& absmax,
+      DeviceArray& quant_map_buffer,
+      int n, int k) {
+    params_.tuning_ctx = TuningContext();
+    params_.stream = Stream();
+    params_.quant_type_ = quant_type;
+    params_.output_ = static_cast<T*>(output.ptr());
+    params_.quant_ = static_cast<uint8_t*>(quant.ptr());
+    params_.absmax_ = static_cast<T*>(absmax.ptr());
+    params_.quant_map_buffer_ = static_cast<T*>(quant_map_buffer.ptr());
+    params_.n_ = n;
+    params_.k_ = k;
+  }
+
+  void Run() override {
+    ORT_THROW_IF_ERROR(contrib::cuda::SetBnbQuantMap(
+        params_.quant_type_,
+        params_.quant_map_buffer_,
+        params_.StreamHandle()));
+    ORT_THROW_IF_ERROR(contrib::cuda::DequantizeBnb4(
+      params_.quant_map_buffer_,
+        params_.output_,
+        params_.quant_,
+        params_.absmax_,
+        64,
+        params_.n_ * params_.k_,
+        params_.StreamHandle()));
+  }
+
+ private:
+  // A VectorAddOp<T> is a callable that can process const VectorAddParams<T>*
+  using ParamsT = DequantizeBnb4Params<T>;
+  ParamsT params_{};
+};
+
+#define REGISTER_OP(name, type)                                                               \
+  py::class_<name<type>>(m, #name "_" #type)                                                  \
+      .def(py::init<int, DeviceArray&, DeviceArray&, DeviceArray&, DeviceArray&, int, int>()) \
+      .def("SetRepeats", &name<type>::SetRepeats)                                             \
+      .def("Profile", &name<type>::Profile)                                                   \
+      .def("Run", &name<type>::Run);
+
+KE_REGISTER(m) {
+  REGISTER_OP(DequantizeBnb4, half);
+  REGISTER_OP(DequantizeBnb4, float);
+}
+
+}  // namespace onnxruntime
diff --git a/onnxruntime/python/tools/kernel_explorer/kernels/cuda/dequant_blockwise_int4.cu b/onnxruntime/python/tools/kernel_explorer/kernels/cuda/dequant_blockwise_int4.cu
new file mode 100644
index 0000000000000..9b5e4079a7e31
--- /dev/null
+++ b/onnxruntime/python/tools/kernel_explorer/kernels/cuda/dequant_blockwise_int4.cu
@@ -0,0 +1,78 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// This file serve as a simple example for adding a tunable op to onnxruntime.
+
+#include <cuda_runtime_api.h>
+#include <cuda_fp16.h>
+#include <pybind11/pybind11.h>
+
+#include <string>
+
+#include "core/providers/cuda/tunable/cuda_tunable.h"
+#include "python/tools/kernel_explorer/kernel_explorer_interface.h"
+#include "python/tools/kernel_explorer/device_array.h"
+#include "contrib_ops/cuda/quantization/dequantize_blockwise.cuh"
+
+namespace py = pybind11;
+
+namespace onnxruntime {
+
+// Extend the OpParams so that all specializations have the same parameter passing interface
+template <typename T>
+struct DequantizeInt4Params : cuda::tunable::OpParams {
+  std::string Signature() const override { return std::to_string(n_); }
+
+  T* output_;
+  const uint8_t* quant_;
+  const T* scales_;
+  const uint8_t* zero_points_;
+  int n_;
+  int k_;
+};
+
+template <typename T>
+class DequantizeInt4 : public IKernelExplorer {
+ public:
+  DequantizeInt4(DeviceArray& output, DeviceArray& quant, DeviceArray& scales, int n, int k) {
+    params_.tuning_ctx = TuningContext();
+    params_.stream = Stream();
+    params_.output_ = static_cast<T*>(output.ptr());
+    params_.quant_ = static_cast<uint8_t*>(quant.ptr());
+    params_.scales_ = static_cast<T*>(scales.ptr());
+    params_.zero_points_ = nullptr;
+    params_.n_ = n;
+    params_.k_ = k;
+  }
+
+  void Run() override {
+    ORT_THROW_IF_ERROR(contrib::cuda::Dequantize4Bits(
+        params_.output_,
+        params_.quant_,
+        params_.scales_,
+        params_.zero_points_,
+        params_.k_,
+        params_.n_,
+        32,
+        params_.StreamHandle()));
+  }
+
+ private:
+  // A VectorAddOp<T> is a callable that can process const VectorAddParams<T>*
+  using ParamsT = DequantizeInt4Params<T>;
+  ParamsT params_{};
+};
+
+#define REGISTER_OP(name, type)                                            \
+  py::class_<name<type>>(m, #name "_" #type)                               \
+      .def(py::init<DeviceArray&, DeviceArray&, DeviceArray&, int, int>()) \
+      .def("SetRepeats", &name<type>::SetRepeats)                          \
+      .def("Profile", &name<type>::Profile)                                \
+      .def("Run", &name<type>::Run);
+
+KE_REGISTER(m) {
+  REGISTER_OP(DequantizeInt4, half);
+  REGISTER_OP(DequantizeInt4, float);
+}
+
+}  // namespace onnxruntime
diff --git a/onnxruntime/python/tools/kernel_explorer/kernels/cuda/gemm.cu b/onnxruntime/python/tools/kernel_explorer/kernels/cuda/gemm.cu
new file mode 100644
index 0000000000000..fd9e9c4fd1612
--- /dev/null
+++ b/onnxruntime/python/tools/kernel_explorer/kernels/cuda/gemm.cu
@@ -0,0 +1,94 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// This file serve as a simple example for adding a tunable op to onnxruntime.
+
+#include <cuda_runtime_api.h>
+#include <cuda_fp16.h>
+
+#include <pybind11/pybind11.h>
+
+#include <string>
+
+#include "core/providers/cuda/tunable/cuda_tunable.h"
+#include "core/providers/cuda/shared_inc/fpgeneric.h"
+#include "core/providers/cuda/cuda_stream_handle.h"
+#include "python/tools/kernel_explorer/kernel_explorer_interface.h"
+#include "python/tools/kernel_explorer/kernels/vector_add_kernel.cuh"
+#include "contrib_ops/cuda/quantization/matmul_nbits.cuh"
+
+namespace py = pybind11;
+
+namespace onnxruntime {
+
+// Extend the OpParams so that all specializations have the same parameter passing interface
+template <typename T>
+struct GemmBenchmarkParams : cuda::tunable::OpParams {
+  std::string Signature() const override { return std::to_string(n_); }
+
+  T* output_;
+  const T* a_;
+  const T* b_;
+  int m_;
+  int n_;
+  int k_;
+  cublasHandle_t cublas_handle;
+};
+
+template <typename T>
+class GemmBenchmark : public IKernelExplorer {
+ public:
+  GemmBenchmark(DeviceArray& output, DeviceArray& a, DeviceArray& b, int m, int n, int k) {
+    params_.tuning_ctx = TuningContext();
+    params_.stream = Stream();
+    params_.output_ = static_cast<T*>(output.ptr());
+    params_.a_ = static_cast<T*>(a.ptr());
+    params_.b_ = static_cast<T*>(b.ptr());
+    params_.m_ = m;
+    params_.n_ = n;
+    params_.k_ = k;
+
+    CUBLAS_CALL_THROW(cublasCreate(&(params_.cublas_handle)));
+    CUDA_CALL_THROW(cudaGetDeviceProperties(&device_prop_, 0));
+  }
+
+  void Run() override {
+    typedef typename ToCudaType<T>::MappedType CudaT;
+    CudaT one = ToCudaType<T>::FromFloat(1.0f);
+    CudaT zero = ToCudaType<T>::FromFloat(0.0f);
+    CUBLAS_CALL_THROW(cublasGemmHelper(
+        params_.cublas_handle,
+        CUBLAS_OP_N,
+        CUBLAS_OP_N,
+        params_.n_, params_.m_, params_.k_,
+        &one,
+        reinterpret_cast<const CudaT*>(params_.b_),
+        params_.n_,
+        reinterpret_cast<const CudaT*>(params_.a_),
+        params_.k_,
+        &zero,
+        params_.output_,
+        params_.n_,
+        device_prop_));
+  }
+
+ private:
+  // A VectorAddOp<T> is a callable that can process const VectorAddParams<T>*
+  using ParamsT = GemmBenchmarkParams<T>;
+  ParamsT params_{};
+  cudaDeviceProp device_prop_;
+};
+
+#define REGISTER_OP(name, type)                                                               \
+  py::class_<name<type>>(m, #name "_" #type)                                                  \
+      .def(py::init<DeviceArray&, DeviceArray&, DeviceArray&, int, int, int>()) \
+      .def("SetRepeats", &name<type>::SetRepeats)                                             \
+      .def("Profile", &name<type>::Profile)                                                   \
+      .def("Run", &name<type>::Run);
+
+KE_REGISTER(m) {
+  REGISTER_OP(GemmBenchmark, half);
+  REGISTER_OP(GemmBenchmark, float);
+}
+
+}  // namespace onnxruntime
diff --git a/onnxruntime/python/tools/kernel_explorer/kernels/cuda/matmul_4bits.cu b/onnxruntime/python/tools/kernel_explorer/kernels/cuda/matmul_4bits.cu
new file mode 100644
index 0000000000000..9e8c4cd7be36e
--- /dev/null
+++ b/onnxruntime/python/tools/kernel_explorer/kernels/cuda/matmul_4bits.cu
@@ -0,0 +1,102 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// This file serve as a simple example for adding a tunable op to onnxruntime.
+
+#include <cuda_runtime_api.h>
+#include <cuda_fp16.h>
+#include <pybind11/pybind11.h>
+
+#include <string>
+
+#include "core/providers/cuda/tunable/cuda_tunable.h"
+#include "python/tools/kernel_explorer/kernel_explorer_interface.h"
+#include "python/tools/kernel_explorer/kernels/vector_add_kernel.cuh"
+#include "contrib_ops/cuda/quantization/matmul_nbits.cuh"
+
+namespace py = pybind11;
+
+namespace onnxruntime {
+
+// Extend the OpParams so that all specializations have the same parameter passing interface
+template <typename T>
+struct MatrixFloatInt4Params : cuda::tunable::OpParams {
+  std::string Signature() const override { return std::to_string(n_); }
+
+  T* output_;
+  const T* a_;
+  const uint8_t* b_;
+  const T* scales_;
+  const uint8_t* zero_points_;
+  int m_;
+  int n_;
+  int k_;
+};
+
+template <typename T>
+class MatrixFloatInt4 : public IKernelExplorer {
+ public:
+  MatrixFloatInt4(DeviceArray& output,
+                  DeviceArray& a,
+                  DeviceArray& b,
+                  DeviceArray& scales,
+                  int m, int n, int k) {
+    params_.tuning_ctx = TuningContext();
+    params_.stream = Stream();
+    params_.output_ = static_cast<T*>(output.ptr());
+    params_.a_ = static_cast<T*>(a.ptr());
+    params_.b_ = static_cast<uint8_t*>(b.ptr());
+    params_.scales_ = static_cast<T*>(scales.ptr());
+    params_.zero_points_ = nullptr;
+    params_.m_ = m;
+    params_.n_ = n;
+    params_.k_ = k;
+
+    CUDA_CALL_THROW(cudaGetDeviceProperties(&device_prop_, 0));
+  }
+
+  MatrixFloatInt4(DeviceArray& output,
+                  DeviceArray& a,
+                  DeviceArray& b,
+                  DeviceArray& scales,
+                  DeviceArray& zeropoints,
+                  int m, int n, int k) : MatrixFloatInt4(output, a, b, scales, m, n, k) {
+    params_.zero_points_ = static_cast<uint8_t*>(zeropoints.ptr());
+  }
+
+  void Run() override {
+    contrib::cuda::TryMatMul4Bits<T>(
+        params_.output_,
+        params_.a_,
+        params_.b_,
+        params_.scales_,
+        params_.zero_points_,
+        params_.m_,
+        params_.n_,
+        params_.k_,
+        32,
+        static_cast<int>(device_prop_.sharedMemPerBlock),
+        params_.StreamHandle());
+  }
+
+ private:
+  // A VectorAddOp<T> is a callable that can process const VectorAddParams<T>*
+  using ParamsT = MatrixFloatInt4Params<T>;
+  ParamsT params_{};
+  cudaDeviceProp device_prop_;
+};
+
+#define REGISTER_OP(name, type)                                                                             \
+  py::class_<name<type>>(m, #name "_" #type)                                                                \
+      .def(py::init<DeviceArray&, DeviceArray&, DeviceArray&, DeviceArray&, int, int, int>())               \
+      .def(py::init<DeviceArray&, DeviceArray&, DeviceArray&, DeviceArray&, DeviceArray&, int, int, int>()) \
+      .def("SetRepeats", &name<type>::SetRepeats)                                                           \
+      .def("Profile", &name<type>::Profile)                                                                 \
+      .def("Run", &name<type>::Run);
+
+KE_REGISTER(m) {
+  REGISTER_OP(MatrixFloatInt4, half);
+  REGISTER_OP(MatrixFloatInt4, float);
+}
+
+}  // namespace onnxruntime
diff --git a/onnxruntime/python/tools/kernel_explorer/kernels/cuda/matmul_bnb4.cu b/onnxruntime/python/tools/kernel_explorer/kernels/cuda/matmul_bnb4.cu
new file mode 100644
index 0000000000000..e4cd83565357a
--- /dev/null
+++ b/onnxruntime/python/tools/kernel_explorer/kernels/cuda/matmul_bnb4.cu
@@ -0,0 +1,96 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// This file serve as a simple example for adding a tunable op to onnxruntime.
+
+#include <cuda_runtime_api.h>
+#include <cuda_fp16.h>
+#include <pybind11/pybind11.h>
+
+#include <string>
+
+#include "core/providers/cuda/tunable/cuda_tunable.h"
+#include "python/tools/kernel_explorer/kernel_explorer_interface.h"
+#include "python/tools/kernel_explorer/kernels/vector_add_kernel.cuh"
+#include "contrib_ops/cuda/quantization/dequantize_blockwise_bnb4.cuh"
+#include "contrib_ops/cuda/quantization/matmul_bnb4.cuh"
+
+namespace py = pybind11;
+
+namespace onnxruntime {
+
+// Extend the OpParams so that all specializations have the same parameter passing interface
+template <typename T>
+struct MatrixFloatBnb4Params : cuda::tunable::OpParams {
+  std::string Signature() const override { return std::to_string(n_); }
+
+  int quant_type_;
+  T* output_;
+  const T* a_;
+  const uint8_t* b_;
+  const T* absmax_;
+  T* quant_map_buffer_;
+  int m_;
+  int n_;
+  int k_;
+};
+
+template <typename T>
+class MatrixFloatBnb4 : public IKernelExplorer {
+ public:
+  MatrixFloatBnb4(DeviceArray& output,
+                  DeviceArray& a,
+                  DeviceArray& b,
+                  DeviceArray& absmax,
+                  DeviceArray& quant_map_buffer,
+                  int quant_type, int m, int n, int k) {
+    params_.tuning_ctx = TuningContext();
+    params_.stream = Stream();
+    params_.output_ = static_cast<T*>(output.ptr());
+    params_.a_ = static_cast<T*>(a.ptr());
+    params_.b_ = static_cast<uint8_t*>(b.ptr());
+    params_.absmax_ = static_cast<T*>(absmax.ptr());
+    params_.quant_map_buffer_ = static_cast<T*>(quant_map_buffer.ptr());
+    params_.quant_type_ = quant_type;
+    params_.m_ = m;
+    params_.n_ = n;
+    params_.k_ = k;
+  }
+
+  void Run() override {
+    ORT_THROW_IF_ERROR(contrib::cuda::SetBnbQuantMap(
+        params_.quant_type_,
+        params_.quant_map_buffer_,
+        params_.StreamHandle()));
+    contrib::cuda::TryMatMulBnb4(
+        params_.quant_map_buffer_,
+        params_.output_,
+        params_.a_,
+        params_.b_,
+        params_.absmax_,
+        params_.m_,
+        params_.n_,
+        params_.k_,
+        64,
+        params_.StreamHandle());
+  }
+
+ private:
+  // A VectorAddOp<T> is a callable that can process const VectorAddParams<T>*
+  using ParamsT = MatrixFloatBnb4Params<T>;
+  ParamsT params_{};
+};
+
+#define REGISTER_OP(name, type)                                                                                  \
+  py::class_<name<type>>(m, #name "_" #type)                                                                     \
+      .def(py::init<DeviceArray&, DeviceArray&, DeviceArray&, DeviceArray&, DeviceArray&, int, int, int, int>()) \
+      .def("SetRepeats", &name<type>::SetRepeats)                                                                \
+      .def("Profile", &name<type>::Profile)                                                                      \
+      .def("Run", &name<type>::Run);
+
+KE_REGISTER(m) {
+  REGISTER_OP(MatrixFloatBnb4, half);
+  REGISTER_OP(MatrixFloatBnb4, float);
+}
+
+}  // namespace onnxruntime
diff --git a/onnxruntime/python/tools/kernel_explorer/kernels/dequantize_blockwise_bnb4.py b/onnxruntime/python/tools/kernel_explorer/kernels/dequantize_blockwise_bnb4.py
new file mode 100644
index 0000000000000..140151aadcc0f
--- /dev/null
+++ b/onnxruntime/python/tools/kernel_explorer/kernels/dequantize_blockwise_bnb4.py
@@ -0,0 +1,92 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+import sys
+from dataclasses import dataclass
+
+import kernel_explorer as ke
+import numpy as np
+from utils import dtype_to_bytes
+
+
+def dtype_to_funcs(dtype):
+    type_map = {
+        "float16": list(filter(lambda x: "DequantizeBnb4_half" in x, dir(ke))),
+        "float32": list(filter(lambda x: "DequantizeBnb4_float" in x, dir(ke))),
+    }
+    return type_map[dtype]
+
+
+quant_enums = {"FP4": 0, "NF4": 1}
+
+
+dtypes = ["float16", "float32"]
+quant_types = ["FP4", "NF4"]
+
+
+@dataclass
+class DequantizeBnb4Metric(ke.BandwidthMetric):
+    quant_type: str
+    n: int
+    k: int
+
+    def report(self):
+        return (
+            f"{self.duration:6.2f} us {self.gbps:5.2f} GB/s"
+            f" {self.quant_type} {self.dtype} n={self.n} k={self.k} {self.name}"
+        )
+
+
+def profile_dequantize_int4_func(qt, n, k, dtype, func):
+    np.random.seed(0)
+    block_size = 64
+    numel = n * k
+    output = np.random.rand(n, k).astype(dtype)
+    quant = np.random.randint(low=0, high=255, size=(numel + 1) // 2).astype("uint8")
+    absmax = np.random.rand((numel + block_size - 1) // block_size).astype(dtype)
+    quant_map_buffer = np.zeros(16).astype(dtype)
+
+    output_d = ke.DeviceArray(output)
+    quant_d = ke.DeviceArray(quant)
+    absmax_d = ke.DeviceArray(absmax)
+    quant_map_buffer_d = ke.DeviceArray(quant_map_buffer)
+    f = getattr(ke, func)
+    my_op = f(quant_enums[qt], output_d, quant_d, absmax_d, quant_map_buffer_d, n, k)
+    duration_ms = my_op.Profile()
+    total_bytes = numel / 2 + (numel + numel / block_size) * dtype_to_bytes(dtype)
+
+    ke.report(DequantizeBnb4Metric(func, dtype, duration_ms, total_bytes, qt, n, k))
+
+
+def profile_with_args(qt, n, k, dtype, sort):
+    with ke.benchmark(sort):
+        for func in dtype_to_funcs(dtype):
+            profile_dequantize_int4_func(qt, n, k, dtype, func)
+
+
+def profile():
+    for qt in quant_types:
+        for dt in dtypes:
+            for n, k in ((4096, 4096), (4096, 12288), (12288, 4096)):
+                profile_with_args(qt, n, k, dt, True)
+                print()
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    group = parser.add_argument_group("profile with args")
+    group.add_argument("n", type=int)
+    group.add_argument("k", type=int)
+    group.add_argument("quant_type", choices=quant_types)
+    group.add_argument("dtype", choices=dtypes)
+    group.add_argument("--sort", action="store_true")
+
+    if len(sys.argv) == 1:
+        profile()
+    else:
+        args = parser.parse_args()
+        profile_with_args(args.quant_type, args.n, args.k, args.dtype, args.sort)
diff --git a/onnxruntime/python/tools/kernel_explorer/kernels/dequantize_blockwise_int4.py b/onnxruntime/python/tools/kernel_explorer/kernels/dequantize_blockwise_int4.py
new file mode 100644
index 0000000000000..7088039f9e531
--- /dev/null
+++ b/onnxruntime/python/tools/kernel_explorer/kernels/dequantize_blockwise_int4.py
@@ -0,0 +1,78 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+import sys
+from dataclasses import dataclass
+
+import kernel_explorer as ke
+import numpy as np
+from utils import dtype_to_bytes
+
+
+def dtype_to_funcs(dtype):
+    type_map = {
+        "float16": list(filter(lambda x: "DequantizeInt4_half" in x, dir(ke))),
+        "float32": list(filter(lambda x: "DequantizeInt4_float" in x, dir(ke))),
+    }
+    return type_map[dtype]
+
+
+dtypes = ["float16", "float32"]
+
+
+@dataclass
+class DequantizeInt4Metric(ke.BandwidthMetric):
+    n: int
+    k: int
+
+    def report(self):
+        return f"{self.duration:6.2f} us {self.gbps:5.2f} GB/s {self.dtype} n={self.n} k={self.k} {self.name}"
+
+
+def profile_dequantize_int4_func(n, k, dtype, func):
+    np.random.seed(0)
+    output = np.random.rand(n, k).astype(dtype)
+    quant = np.random.randint(low=0, high=127, size=(n, (k + 31) // 32, 16)).astype("uint8")
+    scales = np.random.rand(n, (k + 31) // 32).astype(dtype)
+
+    output_d = ke.DeviceArray(output)
+    quant_d = ke.DeviceArray(quant)
+    scales_d = ke.DeviceArray(scales)
+    f = getattr(ke, func)
+    my_op = f(output_d, quant_d, scales_d, n, k)
+    duration_ms = my_op.Profile()
+    total_bytes = (n * k) / 2 + (n * k + n * k / 32) * dtype_to_bytes(dtype)
+
+    ke.report(DequantizeInt4Metric(func, dtype, duration_ms, total_bytes, n, k))
+
+
+def profile_with_args(n, k, dtype, sort):
+    with ke.benchmark(sort):
+        for func in dtype_to_funcs(dtype):
+            profile_dequantize_int4_func(n, k, dtype, func)
+
+
+def profile():
+    for dt in dtypes:
+        for n, k in ((4096, 4096), (4096, 12288), (12288, 4096)):
+            profile_with_args(n, k, dt, True)
+            print()
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    group = parser.add_argument_group("profile with args")
+    group.add_argument("n", type=int)
+    group.add_argument("k", type=int)
+    group.add_argument("dtype", choices=dtypes)
+    group.add_argument("--sort", action="store_true")
+
+    if len(sys.argv) == 1:
+        profile()
+    else:
+        args = parser.parse_args()
+        profile_with_args(args.n, args.k, args.dtype, args.sort)
diff --git a/onnxruntime/python/tools/kernel_explorer/kernels/gemm_test.py b/onnxruntime/python/tools/kernel_explorer/kernels/gemm_test.py
index e378f3e1cc198..8182cdb17567c 100644
--- a/onnxruntime/python/tools/kernel_explorer/kernels/gemm_test.py
+++ b/onnxruntime/python/tools/kernel_explorer/kernels/gemm_test.py
@@ -179,6 +179,7 @@ def profile_with_args(dtype, transa, transb, m, n, k, sort):
         profile_gemm_func(getattr(ke, "RocBlasGemm" + dtype_suffix), dtype, transa, transb, m, n, k)
         profile_gemm_func(getattr(ke, "CKGemm" + dtype_suffix + transab_suffix), dtype, transa, transb, m, n, k)
         profile_gemm_func(getattr(ke, "GemmTunable" + dtype_suffix + transab_suffix), dtype, transa, transb, m, n, k)
+        profile_gemm_func(getattr(ke, "GemmBenchmark" + dtype_suffix), dtype, transa, transb, m, n, k)
         if ke.is_hipblaslt_available():
             profile_gemm_func(
                 getattr(ke, "GemmHipBlasLt" + dtype_suffix + transab_suffix), dtype, transa, transb, m, n, k
diff --git a/onnxruntime/python/tools/kernel_explorer/kernels/matmul_4bits.py b/onnxruntime/python/tools/kernel_explorer/kernels/matmul_4bits.py
new file mode 100644
index 0000000000000..111e156cd6d01
--- /dev/null
+++ b/onnxruntime/python/tools/kernel_explorer/kernels/matmul_4bits.py
@@ -0,0 +1,132 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+import sys
+from dataclasses import dataclass
+
+import kernel_explorer as ke
+import numpy as np
+from utils import dtype_to_bytes
+
+
+def dtype_to_funcs(dtype):
+    type_map = {
+        "float16": list(filter(lambda x: "MatrixFloatInt4_half" in x, dir(ke))),
+        "float32": list(filter(lambda x: "MatrixFloatInt4_float" in x, dir(ke))),
+    }
+    return type_map[dtype]
+
+
+def dtype_to_funcs_cublas(dtype):
+    type_map = {
+        "float16": list(filter(lambda x: "GemmBenchmark_half" in x, dir(ke))),
+        "float32": list(filter(lambda x: "GemmBenchmark_float" in x, dir(ke))),
+    }
+    return type_map[dtype]
+
+
+dtypes = ["float16", "float32"]
+
+
+@dataclass
+class MatrixMulMetric(ke.BandwidthMetric):
+    m: int
+    n: int
+    k: int
+
+    def report(self):
+        return (
+            f"{self.duration:6.2f} us {self.gbps:5.2f} GB/s {self.dtype} m={self.m} n={self.n} k={self.k} {self.name}"
+        )
+
+
+@dataclass
+class MatrixFpInt4Metric(MatrixMulMetric):
+    is_symmetric: bool
+
+    def report(self):
+        return f"{self.duration:6.2f} us {self.gbps:5.2f} GB/s {self.dtype} m={self.m} n={self.n} k={self.k} is_symmetric={self.is_symmetric} {self.name}"
+
+
+def profile_matmul_fp_int4_func(m, n, k, dtype, func, is_symmetric):
+    np.random.seed(0)
+    output = np.random.rand(m, n).astype(dtype)
+    a = np.random.rand(m, k).astype(dtype)
+    b = np.random.randint(low=0, high=127, size=(n, (k + 31) // 32, 16)).astype("uint8")
+    scales = np.random.rand(n * ((k + 31) // 32)).astype(dtype)
+    zeropoints = np.random.rand(n * (((k + 31) // 32 + 1) // 2)).astype(dtype)
+
+    output_d = ke.DeviceArray(output)
+    a_d = ke.DeviceArray(a)
+    b_d = ke.DeviceArray(b)
+    scales_d = ke.DeviceArray(scales)
+    zeropoints_d = ke.DeviceArray(zeropoints)
+    f = getattr(ke, func)
+
+    my_op = (
+        f(output_d, a_d, b_d, scales_d, m, n, k)
+        if is_symmetric
+        else f(output_d, a_d, b_d, scales_d, zeropoints_d, m, n, k)
+    )
+    duration_ms = my_op.Profile()
+    total_bytes = (m * k + n * k + m * n) * (dtype_to_bytes(dtype))
+
+    ke.report(MatrixFpInt4Metric(func, dtype, duration_ms, total_bytes, m, n, k, is_symmetric))
+
+
+def profile_gemm_func(m, n, k, dtype, func):
+    np.random.seed(0)
+    output = np.random.rand(m, n).astype(dtype)
+    a = np.random.rand(m, k).astype(dtype)
+    b = np.random.rand(k, n).astype(dtype)
+
+    output_d = ke.DeviceArray(output)
+    a_d = ke.DeviceArray(a)
+    b_d = ke.DeviceArray(b)
+    f = getattr(ke, func)
+    my_op = f(output_d, a_d, b_d, m, n, k)
+    duration_ms = my_op.Profile()
+    total_bytes = (m * k + n * k + m * n) * (dtype_to_bytes(dtype))
+
+    ke.report(MatrixMulMetric(func, dtype, duration_ms, total_bytes, m, n, k))
+
+
+def profile_with_args(m, n, k, dtype, sort):
+    with ke.benchmark(sort):
+        for func in dtype_to_funcs(dtype):
+            profile_matmul_fp_int4_func(m, n, k, dtype, func, True)
+
+        for func in dtype_to_funcs(dtype):
+            profile_matmul_fp_int4_func(m, n, k, dtype, func, False)
+
+        for func in dtype_to_funcs_cublas(dtype):
+            profile_gemm_func(m, n, k, dtype, func)
+
+
+def profile():
+    dims_m = [1]
+    for dt in dtypes:
+        for m in dims_m:
+            for n, k in ((4096, 4096), (4096, 12288), (12288, 4096)):
+                profile_with_args(m, n, k, dt, False)
+                print()
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    group = parser.add_argument_group("profile with args")
+    group.add_argument("m", type=int)
+    group.add_argument("n", type=int)
+    group.add_argument("k", type=int)
+    group.add_argument("dtype", choices=dtypes)
+    group.add_argument("--sort", action="store_true")
+
+    if len(sys.argv) == 1:
+        profile()
+    else:
+        args = parser.parse_args()
+        profile_with_args(args.m, args.n, args.k, args.dtype, args.sort)
diff --git a/onnxruntime/python/tools/kernel_explorer/kernels/matmul_bnb4.py b/onnxruntime/python/tools/kernel_explorer/kernels/matmul_bnb4.py
new file mode 100644
index 0000000000000..4a9489050fd61
--- /dev/null
+++ b/onnxruntime/python/tools/kernel_explorer/kernels/matmul_bnb4.py
@@ -0,0 +1,136 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+import sys
+from dataclasses import dataclass
+
+import kernel_explorer as ke
+import numpy as np
+from utils import dtype_to_bytes
+
+
+def dtype_to_funcs(dtype):
+    type_map = {
+        "float16": list(filter(lambda x: "MatrixFloatBnb4_half" in x, dir(ke))),
+        "float32": list(filter(lambda x: "MatrixFloatBnb4_float" in x, dir(ke))),
+    }
+    return type_map[dtype]
+
+
+def dtype_to_funcs_cublas(dtype):
+    type_map = {
+        "float16": list(filter(lambda x: "GemmBenchmark_half" in x, dir(ke))),
+        "float32": list(filter(lambda x: "GemmBenchmark_float" in x, dir(ke))),
+    }
+    return type_map[dtype]
+
+
+quant_enums = {"FP4": 0, "NF4": 1}
+
+
+dtypes = ["float16", "float32"]
+quant_types = ["FP4", "NF4"]
+
+
+@dataclass
+class MatrixMulMetric(ke.BandwidthMetric):
+    m: int
+    n: int
+    k: int
+
+    def report(self):
+        return (
+            f"{self.duration:6.2f} us {self.gbps:5.2f} GB/s {self.dtype} m={self.m} n={self.n} k={self.k} {self.name}"
+        )
+
+
+@dataclass
+class MatrixFpBnb4Metric(MatrixMulMetric):
+    quant_type: str
+
+    def report(self):
+        return (
+            f"{self.duration:6.2f} us {self.gbps:5.2f} GB/s"
+            f" {self.quant_type} {self.dtype} m={self.m} n={self.n} k={self.k} {self.name}"
+        )
+
+
+def profile_matmul_fp_bnb4_func(qt, m, n, k, dtype, func):
+    np.random.seed(0)
+    block_size = 64
+    numel = n * k
+    output = np.random.rand(m, n).astype(dtype)
+    a = np.random.rand(m, k).astype(dtype)
+    b = np.random.randint(low=0, high=255, size=(numel + 1) // 2).astype("uint8")
+    absmax = np.random.rand((numel + block_size - 1) // block_size).astype(dtype)
+    quant_map_buffer = np.zeros(16).astype(dtype)
+
+    output_d = ke.DeviceArray(output)
+    a_d = ke.DeviceArray(a)
+    b_d = ke.DeviceArray(b)
+    absmax_d = ke.DeviceArray(absmax)
+    quant_map_buffer_d = ke.DeviceArray(quant_map_buffer)
+    f = getattr(ke, func)
+
+    my_op = f(output_d, a_d, b_d, absmax_d, quant_map_buffer_d, quant_enums[qt], m, n, k)
+    duration_ms = my_op.Profile()
+    total_bytes = (m * k + n * k + m * n) * (dtype_to_bytes(dtype))
+
+    ke.report(MatrixFpBnb4Metric(func, dtype, duration_ms, total_bytes, m, n, k, qt))
+
+
+def profile_gemm_func(m, n, k, dtype, func):
+    np.random.seed(0)
+    output = np.random.rand(m, n).astype(dtype)
+    a = np.random.rand(m, k).astype(dtype)
+    b = np.random.rand(k, n).astype(dtype)
+
+    output_d = ke.DeviceArray(output)
+    a_d = ke.DeviceArray(a)
+    b_d = ke.DeviceArray(b)
+    f = getattr(ke, func)
+    my_op = f(output_d, a_d, b_d, m, n, k)
+    duration_ms = my_op.Profile()
+    total_bytes = (m * k + n * k + m * n) * (dtype_to_bytes(dtype))
+
+    ke.report(MatrixMulMetric(func, dtype, duration_ms, total_bytes, m, n, k))
+
+
+def profile_with_args(qt, m, n, k, dtype, sort):
+    with ke.benchmark(sort):
+        for func in dtype_to_funcs(dtype):
+            profile_matmul_fp_bnb4_func(qt, m, n, k, dtype, func)
+
+        for func in dtype_to_funcs_cublas(dtype):
+            profile_gemm_func(m, n, k, dtype, func)
+
+
+def profile():
+    dims_m = [1]
+    for qt in quant_types:
+        for dt in dtypes:
+            for m in dims_m:
+                for n, k in ((4096, 4096), (4096, 12288), (12288, 4096)):
+                    profile_with_args(qt, m, n, k, dt, False)
+                    print()
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    group = parser.add_argument_group("profile with args")
+    group.add_argument("m", type=int)
+    group.add_argument("n", type=int)
+    group.add_argument("k", type=int)
+    group.add_argument("quant_type", choices=quant_types)
+    group.add_argument("dtype", choices=dtypes)
+    group.add_argument("--sort", action="store_true")
+
+    if len(sys.argv) == 1:
+        profile()
+    else:
+        args = parser.parse_args()
+        profile_with_args(args.quant_type, args.m, args.n, args.k, args.dtype, args.sort)
diff --git a/onnxruntime/python/tools/onnxruntime_test.py b/onnxruntime/python/tools/onnxruntime_test.py
index c20e055d72720..5605568edaccc 100644
--- a/onnxruntime/python/tools/onnxruntime_test.py
+++ b/onnxruntime/python/tools/onnxruntime_test.py
@@ -40,7 +40,7 @@ def generate_feeds(sess, symbolic_dims: dict | None = None):
             if not dim:
                 # unknown dim
                 shape.append(1)
-            elif type(dim) == str:
+            elif isinstance(dim, str):
                 # symbolic dim. see if we have a value otherwise use 1
                 if dim in symbolic_dims:
                     shape.append(int(symbolic_dims[dim]))
diff --git a/onnxruntime/python/tools/profile_explorer/profile_explorer.py b/onnxruntime/python/tools/profile_explorer/profile_explorer.py
index 78f8805a89076..6e0747883989f 100644
--- a/onnxruntime/python/tools/profile_explorer/profile_explorer.py
+++ b/onnxruntime/python/tools/profile_explorer/profile_explorer.py
@@ -82,8 +82,8 @@ def _shape_to_string(shape):
     for dict_obj in shape:
         if len(dict_obj) > 1:
             raise ValueError("Unhandled type in _shape_to_string()")
-        key = list(dict_obj.keys())[0]
-        value = list(dict_obj.values())[0]
+        key = next(iter(dict_obj.keys()))
+        value = next(iter(dict_obj.values()))
         if len(res) != 0:
             res += ","
         res += f'{key}({"x".join(str(v) for v in value)})'
diff --git a/onnxruntime/python/tools/qnn/gen_qnn_ctx_onnx_model.py b/onnxruntime/python/tools/qnn/gen_qnn_ctx_onnx_model.py
new file mode 100644
index 0000000000000..1cd8793ab14ec
--- /dev/null
+++ b/onnxruntime/python/tools/qnn/gen_qnn_ctx_onnx_model.py
@@ -0,0 +1,150 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+import json
+from argparse import ArgumentParser
+
+import onnx
+from onnx import TensorProto, helper
+
+
+class QnnTensorStruct:
+    def __init__(self):
+        self.name = ""
+        self.onnx_data_type = TensorProto.FLOAT
+        self.dim = []
+
+
+def qnn_data_type_to_onnx_data_type(qnn_data_type):
+    # QNN_DATATYPE_UFIXED_POINT_8 QNN_DATATYPE_UINT_8
+    if qnn_data_type == 0x0408 or qnn_data_type == 0x0108:
+        return TensorProto.UINT8
+    # QNN_DATATYPE_UFIXED_POINT_16 QNN_DATATYPE_UINT_16
+    elif qnn_data_type == 0x0416 or qnn_data_type == 0x0116:
+        return TensorProto.UINT16
+    # QNN_DATATYPE_UFIXED_POINT_32 QNN_DATATYPE_UINT_32
+    elif qnn_data_type == 0x0432 or qnn_data_type == 0x0132:
+        return TensorProto.UINT32
+    # QNN_DATATYPE_UINT_64
+    elif qnn_data_type == 0x0164:
+        return TensorProto.UINT64
+    # QNN_DATATYPE_FIXED_POINT_8 QNN_DATATYPE_INT_8
+    elif qnn_data_type == 0x0308 or qnn_data_type == 0x0008:
+        return TensorProto.INT8
+    # QNN_DATATYPE_FIXED_POINT_16 QNN_DATATYPE_INT_16
+    elif qnn_data_type == 0x0316 or qnn_data_type == 0x0016:
+        return TensorProto.INT16
+    # QNN_DATATYPE_FIXED_POINT_32 QNN_DATATYPE_INT_32
+    elif qnn_data_type == 0x0332 or qnn_data_type == 0x0032:
+        return TensorProto.INT32
+    # QNN_DATATYPE_INT_64
+    elif qnn_data_type == 0x0064:
+        return TensorProto.INT64
+    # QNN_DATATYPE_FLOAT_16
+    elif qnn_data_type == 0x0216:
+        return TensorProto.FLOAT16
+    # QNN_DATATYPE_FLOAT_32
+    elif qnn_data_type == 0x0232:
+        return TensorProto.FLOAT
+    # QNN_DATATYPE_BOOL_8
+    elif qnn_data_type == 0x0508:
+        return TensorProto.BOOL
+    else:
+        return TensorProto.UNDEFINED
+
+
+def parse_qnn_json_file(qnn_json_file_path, qnn_input_tensor_dic, qnn_output_tensor_dic):
+    with open(qnn_json_file_path) as qnn_json_file:
+        qnn_json = json.load(qnn_json_file)
+        assert "graph" in qnn_json, "QNN converted json file not valid. Can't find graph."
+        assert "tensors" in qnn_json["graph"], "QNN converted json file not valid. Can't find tensors."
+        for qnn_tensor_name, qnn_tensor_attribute in qnn_json["graph"]["tensors"].items():
+            # type:0 - QNN input tensor, type:1 - QNN output tensor
+            assert (
+                "type" in qnn_tensor_attribute
+                and "data_type" in qnn_tensor_attribute
+                and "dims" in qnn_tensor_attribute
+            ), "QNN converted json file not valid. Can't find some keys from tensors"
+
+            # Get all graph inputs
+            if qnn_tensor_attribute["type"] == 0:
+                qnn_tensor = QnnTensorStruct()
+                qnn_tensor.name = qnn_tensor_name
+                qnn_tensor.onnx_data_type = qnn_data_type_to_onnx_data_type(qnn_tensor_attribute["data_type"])
+                qnn_tensor.dim = qnn_tensor_attribute["dims"]
+                qnn_input_tensor_dic[qnn_tensor_name] = qnn_tensor
+
+            # Get all graph outputs
+            if qnn_tensor_attribute["type"] == 1:
+                qnn_tensor = QnnTensorStruct()
+                qnn_tensor.name = qnn_tensor_name
+                qnn_tensor.onnx_data_type = qnn_data_type_to_onnx_data_type(qnn_tensor_attribute["data_type"])
+                qnn_tensor.dim = qnn_tensor_attribute["dims"]
+                qnn_output_tensor_dic[qnn_tensor_name] = qnn_tensor
+
+    assert (
+        len(qnn_input_tensor_dic) >= 1 and len(qnn_output_tensor_dic) >= 1
+    ), "Converted QNN model not valid. It should have at least 1 input & 1 output."
+
+
+# Onnxruntime QNN EP can support context binary file generated by QNN tool chain. However QNN generated context binary file
+# uses channel last data layout and 8 bits or 16 bits for input and output.
+# This script gets the QNN model input & output information from QNN converted model_net.json file, compare them with Onnx model
+# and inserts Cast, Transpose nodes to Onnx model if required
+def main():
+    parser = ArgumentParser("Generate Onnx model which includes the QNN context binary.")
+    parser.add_argument("-b", "--qnn_bin", help="Required. Path to Qnn context binary file.", required=True, type=str)
+    parser.add_argument(
+        "-q", "--qnn_json", help="Required. Path to Qnn converted model_net.json file.", required=True, type=str
+    )
+    parser.add_argument(
+        "--disable_embed_mode",
+        action="store_true",
+        default=False,
+        help="Set embed_mode=1 which mean embed Qnn context binary into the onnx model. Otherwise, set context binary file path in the onnx model",
+    )
+    args = parser.parse_args()
+
+    # Parse Qnn model_net.json file to get the graph input output information
+    qnn_input_tensor_dic = {}
+    qnn_output_tensor_dic = {}
+    parse_qnn_json_file(args.qnn_json, qnn_input_tensor_dic, qnn_output_tensor_dic)
+
+    if args.disable_embed_mode:
+        ep_cache_context_content = args.qnn_bin
+        ctx_embed_mode = 0
+    else:
+        with open(args.qnn_bin, "rb") as file:
+            ep_cache_context_content = file.read()
+        ctx_embed_mode = 1
+
+    qnn_inputs = []
+    for qnn_input in qnn_input_tensor_dic.values():
+        qnn_inputs.append(helper.make_tensor_value_info(qnn_input.name, qnn_input.onnx_data_type, qnn_input.dim))
+
+    qnn_outputs = []
+    for qnn_output in qnn_output_tensor_dic.values():
+        qnn_outputs.append(helper.make_tensor_value_info(qnn_output.name, qnn_output.onnx_data_type, qnn_output.dim))
+
+    qnn_ep_context_node = helper.make_node(
+        "EPContext",
+        name="QnnContext",
+        inputs=qnn_input_tensor_dic.keys(),
+        outputs=qnn_output_tensor_dic.keys(),
+        ep_cache_context=ep_cache_context_content,
+        embed_mode=ctx_embed_mode,
+        source="Qnn",
+        domain="com.microsoft",
+    )
+
+    graph_def = helper.make_graph([qnn_ep_context_node], "qnn-onnx-model", qnn_inputs, qnn_outputs)
+
+    model_def = helper.make_model(graph_def, producer_name="MS")
+
+    onnx.save(model_def, args.qnn_json.replace(".json", "_qnn_ctx.onnx"))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/onnxruntime/python/tools/quantization/calibrate.py b/onnxruntime/python/tools/quantization/calibrate.py
index bdf00f21100bf..f934b55bdc30d 100644
--- a/onnxruntime/python/tools/quantization/calibrate.py
+++ b/onnxruntime/python/tools/quantization/calibrate.py
@@ -22,7 +22,7 @@
 
 
 class TensorData:
-    _allowed = frozenset(["avg", "std", "lowest", "highest", "hist", "hist_edges"])
+    _allowed = frozenset(["avg", "std", "lowest", "highest", "hist", "hist_edges", "bins"])
 
     def __init__(self, **kwargs):
         for k, v in kwargs.items():
@@ -55,7 +55,7 @@ def __init__(self, calibration_method, data: Dict[str, Union[TensorData, Tuple]]
                     self.data[k] = TensorData(lowest=v[0], highest=v[1])
                     continue
                 if len(v) == 4:
-                    self.data[k] = TensorData(lowest=v[0], highest=v[1], histogram=v[2], bins=v[3])
+                    self.data[k] = TensorData(lowest=v[0], highest=v[1], hist=v[2], bins=v[3])
                     continue
                 raise TypeError(f"Unexpected tuple for {k:r}, it has {len(v)} elements: {v}.")
             if not isinstance(v, TensorData):
@@ -224,6 +224,7 @@ def __init__(
         use_external_data_format=False,
         moving_average=False,
         averaging_constant=0.01,
+        max_intermediate_outputs=None,
     ):
         """
         :param model_path: ONNX model to calibrate. It is a model path
@@ -233,6 +234,7 @@ def __init__(
         :param use_external_data_format: use external data format to store model which size is >= 2Gb
         :param moving_average: compute the moving average of the minimum and maximum values instead of the global minimum and maximum.
         :param averaging_constant: constant smoothing factor to use when computing the moving average.
+        :param max_intermediate_outputs: maximum number of intermediate outputs before an intermediate range is computed.
         """
         super().__init__(
             model_path,
@@ -249,6 +251,7 @@ def __init__(
         if moving_average and (averaging_constant < 0 or averaging_constant > 1):
             raise ValueError("Invalid averaging constant, which should not be < 0 or > 1.")
         self.averaging_constant = averaging_constant
+        self.max_intermediate_outputs = max_intermediate_outputs
 
     def augment_graph(self):
         """
@@ -302,8 +305,14 @@ def collect_data(self, data_reader: CalibrationDataReader):
             if not inputs:
                 break
             self.intermediate_outputs.append(self.infer_session.run(None, inputs))
-
-        if len(self.intermediate_outputs) == 0:
+            if (
+                self.max_intermediate_outputs is not None
+                and len(self.intermediate_outputs) == self.max_intermediate_outputs
+            ):
+                self.compute_range()
+                self.clear_collected_data()
+
+        if len(self.intermediate_outputs) == 0 and self.calibrate_tensors_range is None:
             raise ValueError("No data is collected.")
 
         t = self.compute_data()
@@ -363,9 +372,9 @@ def compute_data(self) -> TensorsData:
             else:
                 min_value_array = min(merged_added_output_dict[added_output_names[i]])
                 max_value_array = max(merged_added_output_dict[added_output_names[i + 1]])
-            if type(min_value_array) == int or min_value_array.size > 0:
+            if isinstance(min_value_array, int) or min_value_array.size > 0:
                 min_value = float(min_value_array)
-            if type(max_value_array) == int or max_value_array.size > 0:
+            if isinstance(max_value_array, int) or max_value_array.size > 0:
                 max_value = float(max_value_array)
 
             if self.symmetric:
@@ -1011,6 +1020,9 @@ def create_calibrator(
         symmetric = False if "symmetric" not in extra_options else extra_options["symmetric"]
         moving_average = False if "moving_average" not in extra_options else extra_options["moving_average"]
         averaging_constant = 0.01 if "averaging_constant" not in extra_options else extra_options["averaging_constant"]
+        max_intermediate_outputs = (
+            None if "max_intermediate_outputs" not in extra_options else extra_options["max_intermediate_outputs"]
+        )
         calibrator = MinMaxCalibrater(
             model,
             op_types_to_calibrate,
@@ -1019,6 +1031,7 @@ def create_calibrator(
             symmetric=symmetric,
             moving_average=moving_average,
             averaging_constant=averaging_constant,
+            max_intermediate_outputs=max_intermediate_outputs,
         )
     elif calibrate_method == CalibrationMethod.Entropy:
         # default settings for entropy algorithm
diff --git a/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py b/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py
new file mode 100644
index 0000000000000..1c3c212b54fa4
--- /dev/null
+++ b/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py
@@ -0,0 +1,229 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+import argparse
+import logging
+import os
+from typing import List, Tuple
+
+import numpy as np
+import numpy.typing as npt
+import onnx
+from onnx.onnx_pb import GraphProto, ModelProto, NodeProto, TensorProto
+
+from onnxruntime.capi._pybind_state import quantize_matmul_4bits
+
+from .onnx_model import ONNXModel
+from .quant_utils import attribute_to_kwarg
+
+logging.basicConfig(format="%(asctime)s %(name)s [%(levelname)s] - %(message)s", level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+class MatMul4BitsQuantizer:
+    """Perform 4b quantization of constant MatMul weights"""
+
+    def __init__(self, model: ModelProto, block_size: int, is_symmetric: bool, nodes_to_exclude=None):
+        if nodes_to_exclude is None:
+            nodes_to_exclude = []
+        self.model = ONNXModel(model)
+        self.block_size = block_size
+        self.is_symmetric = is_symmetric
+        self.nodes_to_exclude = set(nodes_to_exclude)
+
+    @staticmethod
+    def __get_initializer(name, graph_path: List[GraphProto]) -> Tuple[TensorProto, GraphProto]:
+        for gid in range(len(graph_path) - 1, -1, -1):
+            graph = graph_path[gid]
+            for tensor in graph.initializer:
+                if tensor.name == name:
+                    return tensor, graph
+        return None, None
+
+    def int4_block_quant(self, fp32weight: npt.ArrayLike) -> np.ndarray:
+        """4b quantize fp32 weight to a blob"""
+
+        if len(fp32weight.shape) != 2:
+            raise ValueError("Current int4 block quantization only supports 2D tensors!")
+        rows, cols = fp32weight.shape
+
+        block_size = self.block_size
+        blob_size = block_size // 2
+        k_blocks = (rows + block_size - 1) // block_size
+        padded_rows = k_blocks * block_size
+        pad_len = padded_rows - rows
+        if pad_len > 0:
+            fp32weight = np.pad(fp32weight, ((0, pad_len), (0, 0)), "constant")
+
+        # block wise quantization, each block comes from a single column
+        packed = np.zeros((cols, k_blocks, blob_size), dtype="uint8")
+        scales = np.zeros((cols * k_blocks), dtype=fp32weight.dtype)
+        zero_point = np.zeros(cols * ((k_blocks + 1) // 2), dtype="uint8")
+        quantize_matmul_4bits(packed, fp32weight, scales, zero_point, block_size, cols, rows, self.is_symmetric)
+
+        return (packed, scales, zero_point)
+
+    def _q4_matmul_node_weight(self, node: NodeProto, graph_stack: List[GraphProto]) -> NodeProto:
+        """If the node is MatMul with fp32 const weight, quantize the weight with int4, and return the new node"""
+
+        if node.op_type != "MatMul":
+            return node  # only care about MatMul for now
+
+        logger.info(f"start to quantize {node.name} ...")
+        if node.name in self.nodes_to_exclude:
+            logger.info(f"exclude to quantize {node.name} as specified by nodes_to_exclude...")
+            return node
+
+        inputB = node.input[1]  # noqa: N806
+        B, Bs_graph = MatMul4BitsQuantizer.__get_initializer(inputB, graph_stack)  # noqa: N806
+        if B is None:
+            logger.info("MatMul doesn't have const weight. Skip to quantize")
+            return node  # only care about constant weight
+
+        B_array = onnx.numpy_helper.to_array(B)  # noqa: N806
+        if len(B_array.shape) != 2:
+            logger.info("MatMul weight is not 2D. Skip to quantize")
+            return node  # can only process 2-D matrix
+
+        packed, scales, zero_points = self.int4_block_quant(B_array)
+        B_quant = onnx.numpy_helper.from_array(packed)  # noqa: N806
+        B_quant.name = B.name + "_Q4"
+        for input in Bs_graph.input:
+            if input.name == inputB:
+                Bs_graph.input.remove(input)
+                break
+
+        scales_tensor = onnx.numpy_helper.from_array(scales)
+        scales_tensor.name = B.name + "_scales"
+        Bs_graph.initializer.extend([B_quant, scales_tensor])
+
+        input_names = [node.input[0], B_quant.name, scales_tensor.name]
+        if not self.is_symmetric:
+            zp_tensor = onnx.numpy_helper.from_array(zero_points)
+            zp_tensor.name = B.name + "_zero_points"
+            Bs_graph.initializer.extend([zp_tensor])
+            input_names.append(zp_tensor.name)
+
+        kwargs = {}
+        rows, cols = B_array.shape
+        kwargs["K"] = rows
+        kwargs["N"] = cols
+        kwargs["bits"] = 4
+        kwargs["block_size"] = self.block_size
+
+        matmul_q4_node = onnx.helper.make_node(
+            "MatMulNBits",
+            inputs=input_names,
+            outputs=[node.output[0]],
+            name=node.name + "_Q4" if node.name else "",
+            domain="com.microsoft",
+            **kwargs,
+        )
+
+        logger.info(f"complete quantization of {node.name} ...")
+
+        return matmul_q4_node
+
+    def _process_subgraph(self, graph_stack: List[GraphProto]):
+        new_nodes = []
+        graph = graph_stack[-1]
+
+        for node in graph.node:
+            graph_attrs = [
+                attr
+                for attr in node.attribute
+                if attr.type == onnx.AttributeProto.GRAPH or attr.type == onnx.AttributeProto.GRAPHS
+            ]
+            if len(graph_attrs):
+                kwargs = {}
+                for attr in node.attribute:
+                    if attr.type == onnx.AttributeProto.GRAPH:
+                        # recursive call to take care of sub-graph
+                        graph_stack.append(attr.g)
+                        kv = {attr.name: self._process_subgraph(graph_stack)}
+                    elif attr.type == onnx.AttributeProto.GRAPHS:
+                        value = []
+                        for subgraph in attr.graphs:
+                            # recursive call to take care of sub-graph
+                            graph_stack.append(subgraph)
+                            value.extend([self._process_subgraph(graph_stack)])
+                        kv = {attr.name: value}
+                    else:
+                        kv = attribute_to_kwarg(attr)
+                    kwargs.update(kv)
+                node = onnx.helper.make_node(  # noqa: PLW2901
+                    node.op_type, node.input, node.output, name=node.name, **kwargs
+                )
+
+            new_nodes.append(self._q4_matmul_node_weight(node, graph_stack))
+
+        graph.ClearField("node")
+        graph.node.extend(new_nodes)
+        graph_stack.pop()
+        return graph
+
+    def process(self):
+        # use a stack to keep track of sub-graphs
+        graph_stack = [self.model.graph()]
+        opset_import = self.model.opset_import()
+
+        has_ms_domain = False
+        for opset in opset_import:
+            if opset.domain == "com.microsoft":
+                has_ms_domain = True
+        if not has_ms_domain:
+            opset_import.extend([onnx.helper.make_opsetid("com.microsoft", 1)])
+
+        self._process_subgraph(graph_stack)
+        self.model.clean_initializers()
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="""Blockwise int4 quantization for MatMul 2D weight matrices.
+
+A weight matrix is partitioned into into blocks, where each block is a
+continguous subset inside each column. Each block is quantized into a
+set of 4b integers with a scaling factor and an optional offset.
+"""
+    )
+
+    parser.add_argument("--input_model", required=True, help="Path to the input model file")
+    parser.add_argument("--output_model", required=True, help="Path to the output model file")
+    parser.add_argument("--block_size", required=False, default=32)
+    parser.add_argument(
+        "--symmetric", required=False, default=True, help="Indicate whether to quantize the model symmetrically"
+    )
+    parser.add_argument("-v", "--verbose", required=False, action="store_true")
+    parser.set_defaults(verbose=False)
+    parser.add_argument(
+        "--nodes_to_exclude",
+        nargs="+",
+        type=str,
+        required=False,
+        default=[],
+        help="Specify the nodes to be excluded from quantization with node names",
+    )
+
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    if args.verbose:
+        logger.setLevel(logging.DEBUG)
+
+    input_model_path = args.input_model
+    output_model_path = args.output_model
+
+    if os.path.exists(output_model_path):
+        logger.error(f"file {output_model_path} already exists")
+        raise Exception(f"file {output_model_path} already exists")
+
+    model = onnx.load(input_model_path)
+    quant = MatMul4BitsQuantizer(model, args.block_size, args.symmetric, nodes_to_exclude=args.nodes_to_exclude)
+    quant.process()
+    quant.model.save_model_to_file(output_model_path, True)
diff --git a/onnxruntime/python/tools/quantization/matmul_bnb4_quantizer.py b/onnxruntime/python/tools/quantization/matmul_bnb4_quantizer.py
new file mode 100644
index 0000000000000..951746a089305
--- /dev/null
+++ b/onnxruntime/python/tools/quantization/matmul_bnb4_quantizer.py
@@ -0,0 +1,240 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+import argparse
+import logging
+import os
+from typing import List, Tuple
+
+import numpy as np
+import numpy.typing as npt
+import onnx
+from onnx.onnx_pb import GraphProto, ModelProto, NodeProto, TensorProto
+
+from onnxruntime.capi._pybind_state import quantize_matmul_bnb4
+
+from .onnx_model import ONNXModel
+from .quant_utils import attribute_to_kwarg
+
+logger = logging.getLogger(__name__)
+
+
+class MatMulBnb4Quantizer:
+    """Perform 4b quantization of constant MatMul weights using FP4 or NF4 data type"""
+
+    ##################
+    # quantization types, must be consistent with native code type
+    # Bnb_DataType_t defined in blockwise_quant_block_bnb4.h
+
+    # 4b floating point with bias of 3
+    FP4 = 0
+
+    # 4b NormalFloat
+    NF4 = 1
+
+    def __init__(self, model: ModelProto, quant_type: int, block_size: int, nodes_to_exclude=None):
+        nodes_to_exclude = nodes_to_exclude or []
+        assert quant_type in [MatMulBnb4Quantizer.FP4, MatMulBnb4Quantizer.NF4]
+        self.model = ONNXModel(model)
+        self.quant_type = quant_type
+        self.block_size = block_size
+        self.nodes_to_exclude = set(nodes_to_exclude)
+
+    @staticmethod
+    def __get_initializer(name, graph_path: List[GraphProto]) -> Tuple[TensorProto, GraphProto]:
+        for gid in range(len(graph_path) - 1, -1, -1):
+            graph = graph_path[gid]
+            for tensor in graph.initializer:
+                if tensor.name == name:
+                    return tensor, graph
+        return None, None
+
+    def bnb4_block_quant(self, fpweight: npt.ArrayLike) -> np.ndarray:
+        """4b quantize fp32/fp16 weight"""
+
+        if len(fpweight.shape) != 2:
+            raise ValueError("Current bnb4 block quantization only supports 2D tensors!")
+        # need to copy since the transposed weight still has the original memory layout
+        # Linear4bit quantizes its weight data which is the transposed weight
+        fpweight_t = fpweight.transpose().copy()
+
+        rows, cols = fpweight.shape
+        numel = rows * cols
+        block_size = self.block_size
+        num_blocks = (numel + block_size - 1) // block_size
+        quantized_numel = (numel + 1) // 2
+
+        packed = np.zeros(quantized_numel, dtype="uint8")
+        absmax = np.zeros(num_blocks, dtype=fpweight.dtype)
+        # block wise quantization, fpweight_t is flattened and divided into blocks
+        quantize_matmul_bnb4(packed, fpweight_t, absmax, block_size, self.quant_type, cols, rows)
+
+        return (packed, absmax)
+
+    def _bnb4_matmul_node_weight(self, node: NodeProto, graph_stack: List[GraphProto]) -> NodeProto:
+        """If the node is MatMul with fp32 const weight, quantize the weight with int4, and return the new node"""
+
+        if node.op_type != "MatMul":
+            return node  # only care about MatMul for now
+
+        logger.debug(f"start to quantize {node.name} ...")
+        if node.name in self.nodes_to_exclude:
+            logger.debug(f"exclude to quantize {node.name} as specified by nodes_to_exclude...")
+            return node
+
+        inputB = node.input[1]  # noqa: N806
+        B, Bs_graph = MatMulBnb4Quantizer.__get_initializer(inputB, graph_stack)  # noqa: N806
+        if B is None:
+            logger.debug("MatMul doesn't have const weight. Skip to quantize")
+            return node  # only care about constant weight
+
+        B_array = onnx.numpy_helper.to_array(B)  # noqa: N806
+        if len(B_array.shape) != 2:
+            logger.debug("MatMul weight is not 2D. Skip to quantize")
+            return node  # can only process 2-D matrix
+
+        packed, absmax = self.bnb4_block_quant(B_array)
+        B_quant = onnx.numpy_helper.from_array(packed)  # noqa: N806
+        B_quant.name = B.name + "_Bnb4"
+        for input in Bs_graph.input:
+            if input.name == inputB:
+                Bs_graph.input.remove(input)
+                break
+
+        absmax_tensor = onnx.numpy_helper.from_array(absmax)
+        absmax_tensor.name = B.name + "_absmax"
+
+        Bs_graph.initializer.extend([B_quant, absmax_tensor])
+
+        kwargs = {}
+        rows, cols = B_array.shape
+        kwargs["K"] = rows
+        kwargs["N"] = cols
+        kwargs["block_size"] = self.block_size
+        kwargs["quant_type"] = self.quant_type
+
+        matmul_bnb4_node = onnx.helper.make_node(
+            "MatMulBnb4",
+            inputs=[node.input[0], B_quant.name, absmax_tensor.name],
+            outputs=[node.output[0]],
+            name=node.name + "_Bnb4" if node.name else "",
+            domain="com.microsoft",
+            **kwargs,
+        )
+
+        logger.debug(f"complete quantization of {node.name} ...")
+
+        return matmul_bnb4_node
+
+    def _process_subgraph(self, graph_stack: List[GraphProto]):
+        new_nodes = []
+        graph = graph_stack[-1]
+
+        for node in graph.node:
+            graph_attrs = [
+                attr
+                for attr in node.attribute
+                if attr.type == onnx.AttributeProto.GRAPH or attr.type == onnx.AttributeProto.GRAPHS
+            ]
+            if len(graph_attrs):
+                kwargs = {}
+                for attr in node.attribute:
+                    if attr.type == onnx.AttributeProto.GRAPH:
+                        # recursive call to take care of sub-graph
+                        graph_stack.append(attr.g)
+                        kv = {attr.name: self._process_subgraph(graph_stack)}
+                    elif attr.type == onnx.AttributeProto.GRAPHS:
+                        value = []
+                        for subgraph in attr.graphs:
+                            # recursive call to take care of sub-graph
+                            graph_stack.append(subgraph)
+                            value.extend([self._process_subgraph(graph_stack)])
+                        kv = {attr.name: value}
+                    else:
+                        kv = attribute_to_kwarg(attr)
+                    kwargs.update(kv)
+                node = onnx.helper.make_node(  # noqa: PLW2901
+                    node.op_type, node.input, node.output, name=node.name, **kwargs
+                )
+
+            new_nodes.append(self._bnb4_matmul_node_weight(node, graph_stack))
+
+        graph.ClearField("node")
+        graph.node.extend(new_nodes)
+        graph_stack.pop()
+        return graph
+
+    def process(self):
+        # use a stack to keep track of sub-graphs
+        graph_stack = [self.model.graph()]
+        opset_import = self.model.opset_import()
+
+        has_ms_domain = False
+        for opset in opset_import:
+            if opset.domain == "com.microsoft":
+                has_ms_domain = True
+        if not has_ms_domain:
+            opset_import.extend([onnx.helper.make_opsetid("com.microsoft", 1)])
+
+        self._process_subgraph(graph_stack)
+        self.model.clean_initializers()
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="""Blockwise FP4/NF4 quantization for MatMul 2D weight matrices.
+
+A weight matrix is partitioned into blocks, where each block is a contiguous
+subset inside the flattened transposed weight matrix. Each block is quantized
+into a set of 4b integers with an absolute value scaling factor.
+"""
+    )
+
+    parser.add_argument("--input_model", required=True, help="Path to the input model file")
+    parser.add_argument("--output_model", required=True, help="Path to the output model file")
+    parser.add_argument(
+        "--quant_type",
+        required=False,
+        default=1,
+        options=[MatMulBnb4Quantizer.FP4, MatMulBnb4Quantizer.NF4],
+        help="Quantization data type. 0: FP4, 1: NF4",
+    )
+    parser.add_argument(
+        "--block_size",
+        required=False,
+        default=64,
+        description="Block size for blockwise quantization. Note: bnb.nn.Linear4bit only uses block_size=64",
+    )
+    parser.add_argument("-v", "--verbose", required=False, action="store_true")
+    parser.set_defaults(verbose=False)
+    parser.add_argument(
+        "--nodes_to_exclude",
+        nargs="+",
+        type=str,
+        required=False,
+        default=[],
+        help="Specify the nodes to be excluded from quantization with node names",
+    )
+
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    if args.verbose:
+        logger.setLevel(logging.DEBUG)
+
+    input_model_path = args.input_model
+    output_model_path = args.output_model
+
+    if os.path.exists(output_model_path):
+        logger.error(f"file {output_model_path} already exists")
+        raise Exception(f"file {output_model_path} already exists")
+
+    model = onnx.load(input_model_path)
+    quant = MatMulBnb4Quantizer(model, args.quant_type, args.block_size, nodes_to_exclude=args.nodes_to_exclude)
+    quant.process()
+    quant.model.save_model_to_file(output_model_path, True)
diff --git a/onnxruntime/python/tools/quantization/onnx_model.py b/onnxruntime/python/tools/quantization/onnx_model.py
index 7d22e2c217dd1..e4342908f68ea 100644
--- a/onnxruntime/python/tools/quantization/onnx_model.py
+++ b/onnxruntime/python/tools/quantization/onnx_model.py
@@ -354,6 +354,7 @@ def save_model_to_file(self, output_path, use_external_data_format=False):
                 self.model,
                 all_tensors_to_one_file=True,
                 location=Path(output_path).name + ".data",
+                convert_attribute=True,
             )
         for init in self.model.graph.initializer:
             self._check_init(init, "end")
diff --git a/onnxruntime/python/tools/quantization/onnx_quantizer.py b/onnxruntime/python/tools/quantization/onnx_quantizer.py
index 2d1e418f9d2b4..c1c2248bc82d6 100644
--- a/onnxruntime/python/tools/quantization/onnx_quantizer.py
+++ b/onnxruntime/python/tools/quantization/onnx_quantizer.py
@@ -111,9 +111,10 @@ def __init__(
         self.is_activation_symmetric = (
             False if "ActivationSymmetric" not in self.extra_options else self.extra_options["ActivationSymmetric"]
         )
+        self.min_real_range = self.extra_options.get("MinimumRealRange")
 
-        self.activation_qType = activation_qType.tensor_type
-        self.weight_qType = weight_qType.tensor_type
+        self.activation_qType = getattr(activation_qType, "tensor_type", activation_qType)
+        self.weight_qType = getattr(weight_qType, "tensor_type", weight_qType)
         """
             Dictionary specifying the min and max values for tensors. It has following format:
                 {
@@ -597,7 +598,7 @@ def _get_quantization_params(self, param_name, use_scale=None, use_zeropoint=Non
             if params is None or len(params) != 2:
                 raise ValueError(
                     "Quantization parameters should contain zero point and scale. "
-                    "Specified values for output {}: {}".format(param_name, params)
+                    f"Specified values for output {param_name}: {params}"
                 )
 
             zero_point_values = [params["zero_point"]]
@@ -645,6 +646,7 @@ def _get_quantize_input_nodes(self, node, input_index, qType, given_scale_name=N
         :return: List of newly created nodes in NodeProto format.
         """
         input_name = node.input[input_index]
+        assert input_name != "", "Cannot access undefined variable in graph."
         output_name = input_name + TENSOR_NAME_QUANT_SUFFIX
         ql_node_name = input_name + "_QuantizeLinear"
 
@@ -997,6 +999,7 @@ def quantize_initializer(self, weight, qType, reduce_range=False, keep_float_wei
             qType,
             self.is_weight_symmetric,
             self.reduce_range and reduce_range,
+            self.min_real_range,
         )
 
         if qType in {
@@ -1086,6 +1089,7 @@ def quantize_weight_per_channel(
                 self.is_weight_symmetric
                 or weight_qType in (onnx_proto.TensorProto.INT8, onnx_proto.TensorProto.FLOAT8E4M3FN),
                 self.reduce_range and reduce_range,
+                self.min_real_range,
             )
             rmin_list.append(rmin)
             rmax_list.append(rmax)
@@ -1207,7 +1211,9 @@ def calculate_quantization_params(self):
                 rmin, rmax = td.range_value
                 qmin, qmax = get_qmin_qmax_for_qType(self.activation_qType, symmetric=self.is_activation_symmetric)
 
-                zero, scale = compute_scale_zp(rmin, rmax, qmin, qmax, self.is_activation_symmetric)
+                zero, scale = compute_scale_zp(
+                    rmin, rmax, qmin, qmax, self.is_activation_symmetric, self.min_real_range
+                )
             quantization_params[tensor_name] = QuantizationParams(zero_point=zero, scale=scale)
 
         return quantization_params
diff --git a/onnxruntime/python/tools/quantization/operators/conv.py b/onnxruntime/python/tools/quantization/operators/conv.py
index d23459b478e6a..23f9eaf4b0e0b 100644
--- a/onnxruntime/python/tools/quantization/operators/conv.py
+++ b/onnxruntime/python/tools/quantization/operators/conv.py
@@ -157,7 +157,7 @@ def quantize(self):
                 nodes,
             ) = self.quantizer.quantize_activation(node, [0])
             quant_weight_tuple = self.quantizer.quantize_weight_per_channel(
-                node.input[1], onnx_proto.TensorProto.INT8, 0
+                node.input[1], onnx_proto.TensorProto.INT8, 0  # self.quantizer.weight_qType?
             )
             quantized_input_names.append(quant_weight_tuple[0])
             zero_point_names.append(quant_weight_tuple[1])
diff --git a/onnxruntime/python/tools/quantization/operators/lstm.py b/onnxruntime/python/tools/quantization/operators/lstm.py
index 7e91f9b76ca36..90a52cb528b32 100644
--- a/onnxruntime/python/tools/quantization/operators/lstm.py
+++ b/onnxruntime/python/tools/quantization/operators/lstm.py
@@ -47,10 +47,10 @@ def quantize(self):
             R.dims[0] = R_num_dir * R_4_hidden_size
 
         quant_input_weight_tuple = self.quantizer.quantize_weight_per_channel(
-            node.input[1], onnx_proto.TensorProto.INT8, 0
+            node.input[1], onnx_proto.TensorProto.INT8, 0  # self.quantizer.weight_qType?
         )
         quant_recurrent_weight_tuple = self.quantizer.quantize_weight_per_channel(
-            node.input[2], onnx_proto.TensorProto.INT8, 0
+            node.input[2], onnx_proto.TensorProto.INT8, 0  # self.quantizer.weight_qType?
         )
 
         W_quant_weight = model.get_initializer(quant_input_weight_tuple[0])  # noqa: N806
diff --git a/onnxruntime/python/tools/quantization/operators/pad.py b/onnxruntime/python/tools/quantization/operators/pad.py
index 2d1690e545263..25818de1b76bd 100644
--- a/onnxruntime/python/tools/quantization/operators/pad.py
+++ b/onnxruntime/python/tools/quantization/operators/pad.py
@@ -31,7 +31,7 @@ def quantize(self):
             kwargs.update(kv)
 
         if "mode" not in kwargs or kwargs["mode"] == b"constant":
-            if len(node.input) > 2:  # There is 3rd input 'constant_value'
+            if len(node.input) > 2 and node.input[2] != "":  # There is 3rd input 'constant_value'
                 zp_tensor = self.quantizer.model.get_initializer(quantized_input_value.zp_name)
                 scale_tensor = self.quantizer.model.get_initializer(quantized_input_value.scale_name)
                 if zp_tensor is None or scale_tensor is None:
@@ -72,7 +72,17 @@ def quantize(self):
                     self.quantizer.new_nodes.extend(pad_value_qnodes)
                     node.input[2] = pad_value_qnodes[0].output[0]
             else:
-                node.input.extend([quantized_input_value.zp_name])  # pad zero_point for original zero
+                # In quantized format, the `zero` before quantization is mapped
+                # to quantized_input_value.zp_name. Thus, padding 0 to
+                # original tensor should become padding zero point to quantized
+                # tensor.
+                if len(node.input) == 2:
+                    # Feed quantization's zero point to padding node.
+                    node.input.append(quantized_input_value.zp_name)
+                else:
+                    # Assign quantization's zero point to padding node.
+                    assert node.input[2] == ""
+                    node.input[2] = quantized_input_value.zp_name
 
         # Create an entry for output quantized value
         quantized_output_value = QuantizedValue(
diff --git a/onnxruntime/python/tools/quantization/operators/softmax.py b/onnxruntime/python/tools/quantization/operators/softmax.py
index 1e380d7764952..bd09b05ddd9ff 100644
--- a/onnxruntime/python/tools/quantization/operators/softmax.py
+++ b/onnxruntime/python/tools/quantization/operators/softmax.py
@@ -1,6 +1,14 @@
 import onnx
 
-from ..quant_utils import TENSOR_NAME_QUANT_SUFFIX, QuantizedValue, QuantizedValueType, attribute_to_kwarg, ms_domain
+from ..quant_utils import (
+    TENSOR_NAME_QUANT_SUFFIX,
+    QuantizedValue,
+    QuantizedValueType,
+    attribute_to_kwarg,
+    compute_scale_zp,
+    get_qmin_qmax_for_qType,
+    ms_domain,
+)
 from .base_operator import QuantOperatorBase
 from .qdq_base_operator import QDQOperatorBase
 
@@ -77,15 +85,11 @@ def quantize(self):
 class QDQSoftmax(QDQOperatorBase):
     def quantize(self):
         super().quantize()
-        if self.quantizer.activation_qType == onnx.onnx_pb.TensorProto.UINT8:
-            out_scale = 1 / 256.0
-            out_zero_point = 0
-        elif self.quantizer.is_activation_symmetric:
-            # results are all greater or equal to 0, so we can only use
-            # half of the range
-            out_scale = 1 / 127.0
-            out_zero_point = 0
-        else:
-            out_scale = 1 / 256.0
-            out_zero_point = -128
+        symmetric = self.quantizer.is_activation_symmetric
+
+        # Enforce Softmax range: 0.0 to 1.0
+        rmin, rmax = 0.0, 1.0
+        qmin, qmax = get_qmin_qmax_for_qType(self.quantizer.activation_qType, symmetric=symmetric)
+        out_zero_point, out_scale = compute_scale_zp(rmin, rmax, qmin, qmax, symmetric=symmetric)
+
         self.quantizer.set_quant_scale_zp(self.node.output[0], (out_scale, out_zero_point))
diff --git a/onnxruntime/python/tools/quantization/qdq_quantizer.py b/onnxruntime/python/tools/quantization/qdq_quantizer.py
index e595b580b20df..5c97dd20cf507 100644
--- a/onnxruntime/python/tools/quantization/qdq_quantizer.py
+++ b/onnxruntime/python/tools/quantization/qdq_quantizer.py
@@ -283,7 +283,13 @@ def _add_qdq_pair_for_initializer(self, weight_proto, tensor_type, axis=None):
                 raise ValueError("Per-Channel support with QDQ format requires onnx opset version 13 or above.")
             q_weight_name, zp_name, scale_name = self.quantize_weight_per_channel(
                 weight_name,
-                self.weight_qType if tensor_type is QDQQuantTensorType.WEIGHT else self.activation_qType,
+                # Quantization type is forced to be TensorProto.INT8.
+                # when the expected value would be (see below)
+                # self.weight_qType if tensor_type is QDQQuantTensorType.WEIGHT else self.activation_qType.
+                # QLinearConv expects to have a unique value for all channels.
+                # This code does not enforce that but it is necessarily the case when the
+                # quantization is symmetric (as for INT8).
+                onnx_proto.TensorProto.INT8,
                 axis,
                 keep_float_weight=self.add_qdq_pair_to_weight,
             )
diff --git a/onnxruntime/python/tools/quantization/quant_utils.py b/onnxruntime/python/tools/quantization/quant_utils.py
index 74e54c3f1fa37..8825d789933fb 100644
--- a/onnxruntime/python/tools/quantization/quant_utils.py
+++ b/onnxruntime/python/tools/quantization/quant_utils.py
@@ -184,7 +184,7 @@ def quantize_nparray(qType, arr, scale, zero_point, low=None, high=None):
         return arr_fp32.astype(dtype)
 
 
-def compute_scale_zp(rmin, rmax, qmin, qmax, symmetric=False):
+def compute_scale_zp(rmin, rmax, qmin, qmax, symmetric=False, min_real_range=None):
     """Calculate the scale s and zero point z for the quantization relation
     r = s(q-z), where r are the original values and q are the corresponding
     quantized values.
@@ -199,6 +199,8 @@ def compute_scale_zp(rmin, rmax, qmin, qmax, symmetric=False):
     :parameter rmax: maximum value of r
     :parameter qmin: minimum value representable by the target quantization data type
     :parameter qmax: maximum value representable by the target quantization data type
+    :parameter symmetric: True if the floating-point range should be made symmetric. Defaults to False.
+    :parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
     :return: zero and scale [z, s]
 
     """
@@ -211,6 +213,10 @@ def compute_scale_zp(rmin, rmax, qmin, qmax, symmetric=False):
     rmin = min(rmin, 0)
     rmax = max(rmax, 0)
 
+    # Ensure a minimum float-point range if specified.
+    if min_real_range is not None:
+        rmax = max(rmax, rmin + min_real_range)
+
     if symmetric:
         absmax = max(abs(rmin), abs(rmax))
         rmin = -absmax
@@ -254,11 +260,13 @@ def compute_scale_zp_float8(element_type, std):
     return [zero, scale]
 
 
-def quantize_data(data, qType, symmetric, reduce_range=False):
+def quantize_data(data, qType, symmetric, reduce_range=False, min_real_range=None):
     """
     :param data: data to quantize
     :param qType: data type to quantize to. Supported types UINT8 and INT8
     :param symmetric: whether symmetric quantization is used or not. This is applied to INT8.
+    :parameter reduce_range: True if the quantization range should be reduced. Defaults to False.
+    :parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
     :return: minimum, maximum, zero point, scale, and quantized weights
 
     To pack weights, we compute a linear transformation
@@ -301,7 +309,7 @@ def quantize_data(data, qType, symmetric, reduce_range=False):
     if qType in (TensorProto.INT8, TensorProto.UINT8, TensorProto.INT16, TensorProto.UINT16):
         if len(data):
             qmin, qmax = get_qmin_qmax_for_qType(qType, reduce_range, symmetric=symmetric)
-            zero_point, scale = compute_scale_zp(rmin, rmax, qmin, qmax, symmetric)
+            zero_point, scale = compute_scale_zp(rmin, rmax, qmin, qmax, symmetric, min_real_range)
         quantized_data = quantize_nparray(qType, numpy.asarray(data), scale, zero_point)
         return rmin, rmax, zero_point, scale, quantized_data
 
@@ -505,7 +513,7 @@ def apply_plot(hist, hist_edges):
     plt.show()
 
 
-def write_calibration_table(calibration_cache):
+def write_calibration_table(calibration_cache, dir="."):
     """
     Helper function to write calibration table to files.
     """
@@ -519,7 +527,7 @@ def write_calibration_table(calibration_cache):
 
     logging.info(f"calibration cache: {calibration_cache}")
 
-    with open("calibration.json", "w") as file:
+    with open(os.path.join(dir, "calibration.json"), "w") as file:
         file.write(json.dumps(calibration_cache))  # use `json.loads` to do the reverse
 
     # Serialize data using FlatBuffers
@@ -551,7 +559,7 @@ def write_calibration_table(calibration_cache):
     builder.Finish(cal_table)
     buf = builder.Output()
 
-    with open("calibration.flatbuffers", "wb") as file:
+    with open(os.path.join(dir, "calibration.flatbuffers"), "wb") as file:
         file.write(buf)
 
     # Deserialize data (for validation)
@@ -564,7 +572,7 @@ def write_calibration_table(calibration_cache):
             logging.info(key_value.Value())
 
     # write plain text
-    with open("calibration.cache", "w") as file:
+    with open(os.path.join(dir, "calibration.cache"), "w") as file:
         for key in sorted(calibration_cache.keys()):
             value = calibration_cache[key]
             s = key + " " + str(max(abs(value[0]), abs(value[1])))
diff --git a/onnxruntime/python/tools/quantization/quantize.py b/onnxruntime/python/tools/quantization/quantize.py
index 706047fe32400..c9e9a92e2af50 100644
--- a/onnxruntime/python/tools/quantization/quantize.py
+++ b/onnxruntime/python/tools/quantization/quantize.py
@@ -351,6 +351,10 @@ def quantize_static(
                     Default is 0.01. Constant smoothing factor to use when computing the moving average of the
                     minimum and maximum values. Effective only when the calibration method selected is MinMax and
                     when CalibMovingAverage is set to True.
+                CalibMaxIntermediateOutputs = Optional[int] :
+                    Default is None. If set to an integer, during calculation of the min-max range of the tensors
+                    it will load at max value number of outputs before computing and merging the range. This will
+                    produce the same result as all computing with None, but is more memory efficient.
                 SmoothQuant = True/False :
                     Default is False. If enabled, SmoothQuant algorithm will be applied before quantization to do
                     fake input channel quantization.
@@ -366,6 +370,12 @@ def quantize_static(
                     `com.microsoft` domain, which forces use of ONNX Runtime's QuantizeLinear and DequantizeLinear
                     contrib op implementations. The contrib op implementations may support features not standardized
                     into the ONNX specification (e.g., 16-bit quantization types).
+                MinimumRealRange = float|None :
+                    Default is None. If set to a floating-point value, the calculation of the quantization parameters
+                    (i.e., scale and zero point) will enforce a minimum range between rmin and rmax. If (rmax - rmin)
+                    is less than the specified minimum range, rmax will be set to rmin + MinimumRealRange. This is
+                    necessary for EPs like QNN that require a minimum floating-point range when determining
+                    quantization parameters.
     """
     if activation_type == QuantType.QFLOAT8E4M3FN or weight_type == QuantType.QFLOAT8E4M3FN:
         if calibrate_method != CalibrationMethod.Distribution:
@@ -396,6 +406,7 @@ def quantize_static(
         ("CalibTensorRangeSymmetric", "symmetric"),
         ("CalibMovingAverage", "moving_average"),
         ("CalibMovingAverageConstant", "averaging_constant"),
+        ("CalibMaxIntermediateOutputs", "max_intermediate_outputs"),
     ]
     calib_extra_options = {
         key: extra_options.get(name) for (name, key) in calib_extra_options_keys if name in extra_options
diff --git a/onnxruntime/python/tools/quantization/shape_inference.py b/onnxruntime/python/tools/quantization/shape_inference.py
index eff3dc0bcdc35..b7d4726610387 100644
--- a/onnxruntime/python/tools/quantization/shape_inference.py
+++ b/onnxruntime/python/tools/quantization/shape_inference.py
@@ -99,7 +99,10 @@ def quant_pre_process(
                 sess_option = onnxruntime.SessionOptions()
                 sess_option.optimized_model_filepath = opt_model_path
                 sess_option.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_BASIC
-                _ = onnxruntime.InferenceSession(input_model_path, sess_option, providers=["CPUExecutionProvider"])
+                sess = onnxruntime.InferenceSession(input_model_path, sess_option, providers=["CPUExecutionProvider"])
+                # Close the session to avoid the cleanup error on Windows for temp folders
+                # https://github.com/microsoft/onnxruntime/issues/17627
+                del sess
             except Exception:
                 logger.error(
                     "ONNX Runtime Model Optimization Failed! Consider rerun with option `--skip_optimization'."
diff --git a/onnxruntime/python/tools/symbolic_shape_infer.py b/onnxruntime/python/tools/symbolic_shape_infer.py
index f1ae93cfc1b90..a9cbef98d9165 100755
--- a/onnxruntime/python/tools/symbolic_shape_infer.py
+++ b/onnxruntime/python/tools/symbolic_shape_infer.py
@@ -24,7 +24,7 @@ def get_attribute(node, attr_name, default_value=None):
 
 
 def get_dim_from_proto(dim):
-    return getattr(dim, dim.WhichOneof("value")) if type(dim.WhichOneof("value")) == str else None
+    return getattr(dim, dim.WhichOneof("value")) if type(dim.WhichOneof("value")) is str else None  # noqa: E721
 
 
 def is_sequence(type_proto):
@@ -82,7 +82,7 @@ def handle_negative_axis(axis, rank):
 
 def get_opset(mp, domain=None):
     domain = domain or ["", "onnx", "ai.onnx"]
-    if type(domain) != list:
+    if type(domain) != list:  # noqa: E721
         domain = [domain]
     for opset in mp.opset_import:
         if opset.domain in domain:
@@ -92,7 +92,7 @@ def get_opset(mp, domain=None):
 
 
 def as_scalar(x):
-    if type(x) == list:
+    if type(x) == list:  # noqa: E721
         assert len(x) == 1
         return x[0]
     elif type(x) == np.ndarray:
@@ -102,7 +102,7 @@ def as_scalar(x):
 
 
 def as_list(x, keep_none):
-    if type(x) == list:
+    if type(x) == list:  # noqa: E721
         return x
     elif type(x) == np.ndarray:
         return list(x)
@@ -113,7 +113,7 @@ def as_list(x, keep_none):
 
 
 def sympy_reduce_product(x):
-    if type(x) == list:
+    if type(x) == list:  # noqa: E721
         value = sympy.Integer(1)
         for v in x:
             value = value * v
@@ -147,13 +147,17 @@ def __init__(self, int_max, auto_merge, guess_output_rank, verbose, prefix=""):
             "GatherElements": self._infer_GatherElements,
             "GatherND": self._infer_GatherND,
             "Identity": self._pass_on_shape_and_type,
+            "AllReduce": self._pass_on_shape_and_type,
             "If": self._infer_If,
             "Loop": self._infer_Loop,
             "MatMul": self._infer_MatMul,
             "MatMulInteger16": self._infer_MatMulInteger,
             "MaxPool": self._infer_Pool,
             "Max": self._infer_symbolic_compute_ops,
+            "MemcpyFromHost": self._pass_on_shape_and_type,
+            "MemcpyToHost": self._pass_on_shape_and_type,
             "Min": self._infer_symbolic_compute_ops,
+            "MoE": self._pass_on_shape_and_type,
             "Mul": self._infer_symbolic_compute_ops,
             "NonMaxSuppression": self._infer_NonMaxSuppression,
             "NonZero": self._infer_NonZero,
@@ -198,7 +202,9 @@ def __init__(self, int_max, auto_merge, guess_output_rank, verbose, prefix=""):
             "GatedRelativePositionBias": self._infer_GatedRelativePositionBias,
             "Gelu": self._infer_Gelu,
             "GemmFastGelu": self._infer_GemmFastGelu,
+            "GemmFloat8": self._infer_GemmFloat8,
             "GroupNorm": self._infer_GroupNorm,
+            "SkipGroupNorm": self._infer_SkipGroupNorm,
             "LayerNormalization": self._infer_LayerNormalization,
             "LongformerAttention": self._infer_LongformerAttention,
             "MultiHeadAttention": self._infer_MultiHeadAttention,
@@ -206,9 +212,11 @@ def __init__(self, int_max, auto_merge, guess_output_rank, verbose, prefix=""):
             "PackedAttention": self._infer_PackedAttention,
             "PackedMultiHeadAttention": self._infer_PackedMultiHeadAttention,
             "PythonOp": self._infer_PythonOp,
+            "QuickGelu": self._infer_FastGelu,
             "RelativePositionBias": self._infer_RelativePositionBias,
             "RemovePadding": self._infer_RemovePadding,
             "RestorePadding": self._infer_RestorePadding,
+            "RotaryEmbedding": self._infer_RotaryEmbedding,
             "SimplifiedLayerNormalization": self._infer_LayerNormalization,
             "SkipLayerNormalization": self._infer_SkipLayerNormalization,
             "SkipSimplifiedLayerNormalization": self._infer_SkipLayerNormalization,
@@ -230,7 +238,6 @@ def __init__(self, int_max, auto_merge, guess_output_rank, verbose, prefix=""):
             "upsample_nearest1d": self._infer_aten_upsample,
             "upsample_nearest2d": self._infer_aten_upsample,
             "upsample_nearest3d": self._infer_aten_upsample,
-            "upsample_bilinear2d": self._infer_aten_upsample,
         }
         self.run_ = True
         self.suggested_merge_ = {}
@@ -244,7 +251,7 @@ def __init__(self, int_max, auto_merge, guess_output_rank, verbose, prefix=""):
         self.prefix_ = prefix
 
     def _add_suggested_merge(self, symbols, apply=False):
-        assert all([(type(s) == str and s in self.symbolic_dims_) or is_literal(s) for s in symbols])
+        assert all([(type(s) == str and s in self.symbolic_dims_) or is_literal(s) for s in symbols])  # noqa: E721
         symbols = set(symbols)
         for k, v in self.suggested_merge_.items():
             if k in symbols:
@@ -314,7 +321,7 @@ def _preprocess(self, in_mp):
         )
 
     def _merge_symbols(self, dims):
-        if not all([type(d) == str for d in dims]):
+        if not all([type(d) == str for d in dims]):  # noqa: E721
             if self.auto_merge_:
                 unique_dims = list(set(dims))
                 is_int = [is_literal(d) for d in unique_dims]
@@ -397,7 +404,7 @@ def _get_shape_rank(self, node, idx):
     def _get_sympy_shape(self, node, idx):
         sympy_shape = []
         for d in self._get_shape(node, idx):
-            if type(d) == str:
+            if type(d) == str:  # noqa: E721
                 sympy_shape.append(
                     self.symbolic_dims_[d]
                     if d in self.symbolic_dims_
@@ -423,7 +430,7 @@ def _try_get_value(self, node, idx):
 
     def _update_computed_dims(self, new_sympy_shape):
         for i, new_dim in enumerate(new_sympy_shape):
-            if not is_literal(new_dim) and type(new_dim) != str:
+            if not is_literal(new_dim) and type(new_dim) != str:  # noqa: E721
                 str_dim = str(new_dim)
                 if str_dim in self.suggested_merge_:
                     if is_literal(self.suggested_merge_[str_dim]):
@@ -463,6 +470,8 @@ def _onnx_infer_single_node(self, node):
             "BiasSplitGelu",
             "BiasAdd",
             "NhwcConv",
+            "QuickGelu",
+            "RotaryEmbedding",
         ]
 
         if not skip_infer:
@@ -549,7 +558,7 @@ def _onnx_infer_subgraph(self, node, subgraph, use_node_input=True, inc_subgraph
         # for new symbolic dims from subgraph output, add to main graph symbolic dims
         subgraph_shapes = [get_shape_from_value_info(o) for o in symbolic_shape_inference.out_mp_.graph.output]
         subgraph_new_symbolic_dims = {
-            d for s in subgraph_shapes if s for d in s if type(d) == str and d not in self.symbolic_dims_
+            d for s in subgraph_shapes if s for d in s if type(d) == str and d not in self.symbolic_dims_  # noqa: E721
         }
         new_dims = {}
         for d in subgraph_new_symbolic_dims:
@@ -579,14 +588,14 @@ def int_or_float(value, allow_float_values):
                     assert len(v.shape) == 1
                     new_v = [int_or_float(vv, allow_float_values) for vv in v]
                 values[i] = new_v
-        values_len = [len(v) if type(v) == list else 0 for v in values]
+        values_len = [len(v) if isinstance(v, list) else 0 for v in values]
         max_len = max(values_len)
         if max_len >= 1 and broadcast:
             # broadcast
             for i, v in enumerate(values):
                 if v is None:
                     continue  # don't broadcast if value is unknown
-                if type(v) == list:
+                if isinstance(v, list):
                     if len(v) < max_len:
                         values[i] = v * max_len
                     else:
@@ -607,7 +616,7 @@ def _compute_on_sympy_data(self, node, op_func):
             values = self._get_int_or_float_values(node, broadcast=True)
 
         if all([v is not None for v in values]):
-            is_list = [type(v) == list for v in values]
+            is_list = [isinstance(v, list) for v in values]
             as_list = any(is_list)
             if as_list:
                 self.sympy_data_[node.output[0]] = [op_func(vs) for vs in zip(*values)]
@@ -864,7 +873,7 @@ def _infer_Concat(self, node):  # noqa: N802
                 self.sympy_data_[node.output[0]] = []
                 for i in range(len(node.input)):
                     value = values[i]
-                    if type(value) == list:
+                    if isinstance(value, list):
                         self.sympy_data_[node.output[0]].extend(value)
                     else:
                         self.sympy_data_[node.output[0]].append(value)
@@ -884,7 +893,7 @@ def _infer_Concat(self, node):  # noqa: N802
             if all([d == dims[0] for d in dims]):
                 continue
             merged = self._merge_symbols(dims)
-            if type(merged) == str:
+            if type(merged) == str:  # noqa: E721
                 sympy_shape[d] = self.symbolic_dims_[merged] if merged else None
             else:
                 sympy_shape[d] = merged
@@ -924,7 +933,7 @@ def _infer_ConstantOfShape(self, node):  # noqa: N802
         sympy_shape = self._get_int_or_float_values(node)[0]
         vi = self.known_vi_[node.output[0]]
         if sympy_shape is not None:
-            if type(sympy_shape) != list:
+            if type(sympy_shape) != list:  # noqa: E721
                 sympy_shape = [sympy_shape]
             self._update_computed_dims(sympy_shape)
             # update sympy data if output type is int, and shape is known
@@ -995,7 +1004,7 @@ def _infer_Einsum(self, node):  # noqa: N802
                 letter = term[-i]
                 if letter != 46:  # letter != b'.'
                     dim = shape[-i]
-                    if letter not in letter_to_dim.keys():
+                    if letter not in letter_to_dim:
                         letter_to_dim[letter] = dim
                     elif type(dim) != sympy.Symbol:
                         letter_to_dim[letter] = dim
@@ -1064,7 +1073,7 @@ def _infer_Gather(self, node):  # noqa: N802
             idx = self._try_get_value(node, 1)
             if idx is not None:
                 data = self.sympy_data_[node.input[0]]
-                if type(data) == list:
+                if type(data) == list:  # noqa: E721
                     if type(idx) == np.ndarray and len(idx.shape) == 1:
                         self.sympy_data_[node.output[0]] = [data[int(i)] for i in idx]
                     else:
@@ -1556,12 +1565,12 @@ def _infer_Reshape(self, node):  # noqa: N802
             )
         else:
             input_sympy_shape = self._get_sympy_shape(node, 0)
-            total = int(1)
+            total = 1
             for d in input_sympy_shape:
                 total = total * d
             new_sympy_shape = []
             deferred_dim_idx = -1
-            non_deferred_size = int(1)
+            non_deferred_size = 1
             for i, d in enumerate(shape_value):
                 if type(d) == sympy.Symbol:
                     new_sympy_shape.append(d)
@@ -1867,7 +1876,7 @@ def handle_negative_index(index, bound):
             and len(steps) == 1
         ):
             input_sympy_data = self.sympy_data_[node.input[0]]
-            if type(input_sympy_data) == list or (
+            if type(input_sympy_data) == list or (  # noqa: E721
                 type(input_sympy_data) == np.array and len(input_sympy_data.shape) == 1
             ):
                 self.sympy_data_[node.output[0]] = input_sympy_data[starts[0] : ends[0] : steps[0]]
@@ -1935,7 +1944,7 @@ def _infer_Squeeze(self, node):  # noqa: N802
             # For symbolic dimensions we guess they are !=1.
             output_shape = [s for s in input_shape if s != 1]
             if self.verbose_ > 0:
-                symbolic_dimensions = [s for s in input_shape if type(s) != int]
+                symbolic_dimensions = [s for s in input_shape if type(s) != int]  # noqa: E721
                 if len(symbolic_dimensions) > 0:
                     logger.debug(
                         f"Symbolic dimensions in input shape of op: '{node.op_type}' node: '{node.name}'. "
@@ -1948,8 +1957,8 @@ def _infer_Squeeze(self, node):  # noqa: N802
                 if i not in axes:
                     output_shape.append(input_shape[i])
                 else:
-                    assert input_shape[i] == 1 or type(input_shape[i]) != int
-                    if self.verbose_ > 0 and type(input_shape[i]) != int:
+                    assert input_shape[i] == 1 or type(input_shape[i]) != int  # noqa: E721
+                    if self.verbose_ > 0 and type(input_shape[i]) != int:  # noqa: E721
                         logger.debug(
                             f"Symbolic dimensions in input shape of op: '{node.op_type}' node: '{node.name}'. "
                             f"Assuming the dimension '{input_shape[i]}' at index {i} of the input to be equal to 1."
@@ -2308,9 +2317,15 @@ def _infer_FastGelu(self, node):  # noqa: N802
     def _infer_Gelu(self, node):  # noqa: N802
         self._propagate_shape_and_type(node)
 
+    def _infer_QuickGelu(self, node):  # noqa: N802
+        self._propagate_shape_and_type(node)
+
     def _infer_GemmFastGelu(self, node):  # noqa: N802
         self._compute_matmul_shape(node)
 
+    def _infer_GemmFloat8(self, node):  # noqa: N802
+        self._compute_matmul_shape(node)
+
     def _infer_LayerNormalization(self, node):  # noqa: N802
         self._propagate_shape_and_type(node)
         if len(node.output) > 1:
@@ -2366,6 +2381,11 @@ def _infer_SkipLayerNormalization(self, node):  # noqa: N802
     def _infer_GroupNorm(self, node):  # noqa: N802
         self._propagate_shape_and_type(node)
 
+    def _infer_SkipGroupNorm(self, node):  # noqa: N802
+        self._propagate_shape_and_type(node, 0, 0)
+        if len(node.output) > 1:
+            self._propagate_shape_and_type(node, 0, 1)
+
     def _infer_BiasSplitGelu(self, node):  # noqa: N802
         input_shape = self._get_shape(node, 0)
         bias_shape = self._get_shape(node, 1)
@@ -2379,16 +2399,29 @@ def _infer_BiasSplitGelu(self, node):  # noqa: N802
     def _infer_BiasAdd(self, node):  # noqa: N802
         self._propagate_shape_and_type(node)
 
+    def _infer_RotaryEmbedding(self, node):  # noqa: N802
+        if len(node.output) == 1:
+            self._propagate_shape_and_type(node)
+        elif len(node.output) == 2:
+            # Extraneous constant nodes outputted by RotaryEmbedding function made with `export_modules_as_functions`
+            self._propagate_shape_and_type(node, input_index=1, output_index=0)
+            self._propagate_shape_and_type(node, input_index=0, output_index=1)  # true output
+        elif len(node.output) == 3:
+            # Extraneous constant nodes outputted by RotaryEmbedding function made with `export_modules_as_functions`
+            self._propagate_shape_and_type(node, input_index=1, output_index=0)
+            self._propagate_shape_and_type(node, input_index=1, output_index=1)
+            self._propagate_shape_and_type(node, input_index=0, output_index=2)  # true output
+
     def _infer_PythonOp(self, node):  # noqa: N802
         output_tensor_types = get_attribute(node, "output_tensor_types")
         assert output_tensor_types
         output_tensor_ranks = get_attribute(node, "output_tensor_ranks")
         assert output_tensor_ranks
 
-        from onnxruntime.training.ortmodule._custom_autograd_function_exporter import PythonOpShapeInferStore
+        from onnxruntime.capi._pybind_state import get_shape_inference_function
 
         func_name = get_attribute(node, "func_name").decode()
-        shape_inferer = PythonOpShapeInferStore.get_shape_infer(func_name)
+        shape_inferer = get_shape_inference_function(func_name)
 
         # Set the context output separately.
         # The first output is torch.autograd.Function''s context.
@@ -2430,7 +2463,7 @@ def _propagate_shape_and_type(self, node, input_index=0, output_index=0):
         vi.CopyFrom(helper.make_tensor_value_info(node.output[output_index], output_dtype, shape))
 
     def _is_none_dim(self, dim_value):
-        if type(dim_value) != str:
+        if type(dim_value) != str:  # noqa: E721
             return False
         if "unk__" not in dim_value:
             return False
@@ -2464,7 +2497,7 @@ def _infer_impl(self, start_sympy_data=None):
                     # some models use None for symbolic dim in input, replace it with a string
                     input_dims[i_dim].dim_param = str(self._new_symbolic_dim(i.name, i_dim))
 
-            self.input_symbols_.update([d for d in input_shape if type(d) == str])
+            self.input_symbols_.update([d for d in input_shape if type(d) == str])  # noqa: E721
 
         for s in self.input_symbols_:
             if s in self.suggested_merge_:
@@ -2584,12 +2617,19 @@ def get_prereq(node):
                         self._check_merged_dims(in_dims, allow_broadcast=True)
 
             for i_o in range(len(node.output)):
-                # Special case: We do not care about the training related
-                # outputs of SkipLayerNormalization
+                # Special cases:
+                # 1) We do not care about the training related outputs of SkipLayerNormalization
+                # 2) We do not care about the extraneous constant outputs in RotaryEmbedding because
+                # the RotaryEmbedding op created during export can be replaced by the RotaryEmbedding
+                # contrib op
                 if (
                     node.op_type == "SkipLayerNormalization" or node.op_type == "SkipSimplifiedLayerNormalization"
                 ) and i_o in [1, 2]:
                     continue
+                if node.op_type == "RotaryEmbedding" and len(node.output) > 1:
+                    # Skip symbolic shape inference for RotaryEmbedding functions that have extraneous outputs
+                    # generated by `export_modules_as_functions`
+                    continue
 
                 vi = self.known_vi_[node.output[i_o]]
                 out_type = vi.type
@@ -2751,13 +2791,13 @@ def get_prereq(node):
                             if i in self.known_vi_:
                                 logger.debug(self.known_vi_[i])
                             else:
-                                logger.debug(f"not in knwon_vi_ for {i}")
+                                logger.debug(f"not in known_vi_ for {i}")
                         logger.debug("node outputs:")
                         for o in node.output:
                             if o in self.known_vi_:
                                 logger.debug(self.known_vi_[o])
                             else:
-                                logger.debug(f"not in knwon_vi_ for {o}")
+                                logger.debug(f"not in known_vi_ for {o}")
                         if self.auto_merge_ and not out_type_undefined:
                             logger.debug("Merging: " + str(self.suggested_merge_))
                     return False
diff --git a/onnxruntime/python/tools/tensorrt/perf/benchmark.py b/onnxruntime/python/tools/tensorrt/perf/benchmark.py
index d440cafb23236..0f06676641a96 100644
--- a/onnxruntime/python/tools/tensorrt/perf/benchmark.py
+++ b/onnxruntime/python/tools/tensorrt/perf/benchmark.py
@@ -89,7 +89,13 @@
 
 def split_and_sort_output(string_list):
     string_list = string_list.split("\n")
-    string_list.sort()
+
+    def custom_sort(item):
+        # Parse digits
+        numbers = re.findall(r"\d+", item)
+        return int(numbers[0]) if numbers else float("inf")
+
+    string_list.sort(key=custom_sort)
     return string_list
 
 
@@ -812,7 +818,7 @@ def write_map_to_file(result, file_name):
     if os.path.exists(file_name):
         existed_result = read_map_from_file(file_name)
 
-    for model, _ep_list in result.items():
+    for model in result:
         if model in existed_result:
             existed_result[model] = {**existed_result[model], **result[model]}
         else:
@@ -1122,7 +1128,7 @@ def calculate_gain(value, ep1, ep2):
 
 
 def add_improvement_information(model_to_latency):
-    for _key, value in model_to_latency.items():
+    for value in model_to_latency.values():
         if trt in value and cuda in value:
             gain = calculate_gain(value, trt, cuda)
             value[trt_cuda_gain] = f"{gain:.2f} %"
@@ -1209,13 +1215,13 @@ def add_status_dict(status_dict, model_name, ep, status):
 def build_status(status_dict, results, is_fail):
     if is_fail:
         for model, model_info in results.items():
-            for ep, _ep_info in model_info.items():
+            for ep in model_info:
                 model_name = model
                 status = "Fail"
                 add_status_dict(status_dict, model_name, ep, status)
     else:
         for model, value in results.items():
-            for ep, _ep_info in value.items():
+            for ep in value:
                 model_name = model
                 status = "Pass"
                 add_status_dict(status_dict, model_name, ep, status)
@@ -2176,7 +2182,7 @@ def parse_arguments():
         required=False,
         default=True,
         action="store_true",
-        help="Inlcude Float16 into benchmarking.",
+        help="Include Float16 into benchmarking.",
     )
 
     parser.add_argument("--trtexec", required=False, default=None, help="trtexec executable path.")
@@ -2270,7 +2276,7 @@ def main():
     logger.info(f"\nTotal models: {len(models)}")
 
     fail_model_cnt = 0
-    for key, _value in models.items():
+    for key in models:
         if key in model_to_fail_ep:
             fail_model_cnt += 1
     logger.info(f"Fail models: {fail_model_cnt}")
diff --git a/onnxruntime/python/tools/tensorrt/perf/perf_utils.py b/onnxruntime/python/tools/tensorrt/perf/perf_utils.py
index c46cadc2c1752..c639c6c73c82b 100644
--- a/onnxruntime/python/tools/tensorrt/perf/perf_utils.py
+++ b/onnxruntime/python/tools/tensorrt/perf/perf_utils.py
@@ -279,7 +279,7 @@ def calculate_trt_latency_percentage(trt_op_map):
             op_map = trt_op_map[ep]
 
             total_time = 0
-            for _key, value in op_map.items():
+            for value in op_map.values():
                 total_time += int(value)
 
             if ep == "TensorrtExecutionProvider":
diff --git a/onnxruntime/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/transformers/benchmark.py
index 97330295e17ed..f506516442b1e 100644
--- a/onnxruntime/python/tools/transformers/benchmark.py
+++ b/onnxruntime/python/tools/transformers/benchmark.py
@@ -779,7 +779,7 @@ def main():
         logger.error("fp16 is for GPU only")
         return
 
-    if args.precision == Precision.INT8 and args.use_gpu:
+    if args.precision == Precision.INT8 and args.use_gpu and args.provider != "migraphx":
         logger.error("int8 is for CPU only")
         return
 
diff --git a/onnxruntime/python/tools/transformers/benchmark_helper.py b/onnxruntime/python/tools/transformers/benchmark_helper.py
index 67d3c95922a87..b6f7a44450c62 100644
--- a/onnxruntime/python/tools/transformers/benchmark_helper.py
+++ b/onnxruntime/python/tools/transformers/benchmark_helper.py
@@ -33,6 +33,7 @@ class Precision(Enum):
     FLOAT32 = "fp32"
     FLOAT16 = "fp16"
     INT8 = "int8"
+    INT4 = "int4"
 
     def __str__(self):
         return self.value
@@ -542,7 +543,7 @@ def measure_gpu_usage(self):
         while True:
             for i in range(device_count):
                 max_gpu_usage[i] = max(max_gpu_usage[i], self.get_used_memory(i))
-            time.sleep(0.005)  # 2ms
+            time.sleep(0.005)  # 5ms
             if not self.keep_measuring:
                 break
         return [
@@ -555,7 +556,7 @@ def measure_gpu_usage(self):
         ]
 
 
-def measure_memory(is_gpu, func, monitor_type="cuda"):
+def measure_memory(is_gpu, func, monitor_type="cuda", start_memory=None):
     memory_monitor_type = None
     if monitor_type == "rocm":
         memory_monitor_type = RocmMemoryMonitor
@@ -565,10 +566,16 @@ def measure_memory(is_gpu, func, monitor_type="cuda"):
     monitor = memory_monitor_type(False)
 
     if is_gpu:
-        memory_before_test = monitor.measure_gpu_usage()
+        if start_memory is not None:
+            memory_before_test = start_memory
+        else:
+            memory_before_test = monitor.measure_gpu_usage()
         if memory_before_test is None:
             return None
 
+        if func is None:
+            return memory_before_test
+
         with ThreadPoolExecutor() as executor:
             monitor = memory_monitor_type()
             mem_thread = executor.submit(monitor.measure_gpu_usage)
@@ -595,10 +602,16 @@ def measure_memory(is_gpu, func, monitor_type="cuda"):
         return None
 
     # CPU memory
-    memory_before_test = monitor.measure_cpu_usage()
+    if start_memory is not None:
+        memory_before_test = start_memory
+    else:
+        memory_before_test = monitor.measure_cpu_usage()
+
+    if func is None:
+        return memory_before_test
 
     with ThreadPoolExecutor() as executor:
-        monitor = MemoryMonitor()
+        monitor = memory_monitor_type()
         mem_thread = executor.submit(monitor.measure_cpu_usage)
         try:
             fn_thread = executor.submit(func)
diff --git a/onnxruntime/python/tools/transformers/convert_generation.py b/onnxruntime/python/tools/transformers/convert_generation.py
index c0cabbb5e9759..b59af41c49df7 100644
--- a/onnxruntime/python/tools/transformers/convert_generation.py
+++ b/onnxruntime/python/tools/transformers/convert_generation.py
@@ -1272,7 +1272,139 @@ def find_past_seq_len_usage(subg: GraphProto):
     return tensor_names_to_rename, nodes_to_remove
 
 
-def update_decoder_subgraph_share_buffer_and_use_decoder_masked_mha(subg: GraphProto):
+def replace_mha_with_gqa(model: OnnxModel, attn_mask: str, kv_num_heads: int = 0, world_size: int = 1):
+    # Insert attention_mask subgraph to calculate shared inputs for all GroupQueryAttention nodes
+    #
+    #                attention_mask
+    #               /              \
+    #          ReduceSum          Shape
+    #              |                |
+    #             Sub             Gather
+    #              |                |
+    #          seqlens_k   total_sequence_length
+    #              |                |
+    #        Cast to int32    Cast to int32
+
+    model.add_initializer(
+        onnx.helper.make_tensor(
+            name="one",
+            data_type=TensorProto.INT64,
+            dims=[1],
+            vals=[1],
+        )
+    )
+    reduce_sum_node = onnx.helper.make_node(
+        "ReduceSum",
+        inputs=[attn_mask, "one"],
+        outputs=[attn_mask + "_row_sums"],
+        name=model.create_node_name("ReduceSum"),
+    )
+    sub_node = onnx.helper.make_node(
+        "Sub",
+        inputs=[attn_mask + "_row_sums", "one"],
+        outputs=["seqlens_k_int64"],
+        name=model.create_node_name("Sub"),
+    )
+    seqlen_k_cast_node = onnx.helper.make_node(
+        "Cast",
+        inputs=["seqlens_k_int64"],
+        outputs=["seqlens_k"],
+        name=model.create_node_name("Cast"),
+        to=TensorProto.INT32,
+    )
+    shape_node = onnx.helper.make_node(
+        "Shape",
+        inputs=[attn_mask],
+        outputs=[attn_mask + "_shape"],
+        name=model.create_node_name("Shape"),
+    )
+    gather_node = onnx.helper.make_node(
+        "Gather",
+        inputs=[attn_mask + "_shape", "one"],
+        outputs=["total_seq_len_int64"],
+        name=model.create_node_name("Gather"),
+        axis=0,
+    )
+    total_seqlen_cast_node = onnx.helper.make_node(
+        "Cast",
+        inputs=["total_seq_len_int64"],
+        outputs=["total_seq_len"],
+        name=model.create_node_name("Cast"),
+        to=TensorProto.INT32,
+    )
+    model.model.graph.node.extend(
+        [reduce_sum_node, sub_node, seqlen_k_cast_node, shape_node, gather_node, total_seqlen_cast_node]
+    )
+
+    # Replace MultiHeadAttention with GroupQueryAttention
+    mha_nodes = list(filter(lambda node: node.op_type == "MultiHeadAttention", model.model.graph.node))
+    for node in mha_nodes:
+        num_heads_mha = 0
+        for att in node.attribute:
+            if att.name == "num_heads":
+                num_heads_mha = att.i
+        gqa_node = onnx.helper.make_node(
+            "GroupQueryAttention",
+            inputs=[
+                node.input[0],  # query
+                node.input[1],  # key
+                node.input[2],  # value
+                node.input[6],  # past_key
+                node.input[7],  # past_value
+                "seqlens_k",  # seqlens_k (for attention_mask)
+                "total_seq_len",  # total_seq_len (for attention_mask)
+            ],
+            outputs=node.output,
+            name=node.name.replace("MultiHeadAttention", "GroupQueryAttention"),
+            domain="com.microsoft",
+            num_heads=num_heads_mha // world_size,
+            kv_num_heads=num_heads_mha // world_size if kv_num_heads == 0 else kv_num_heads // world_size,
+        )
+        model.model.graph.node.remove(node)
+        model.model.graph.node.extend([gqa_node])
+    return model
+
+
+def update_decoder_subgraph_output_cross_attention(subg: GraphProto):
+    input_self_past_0 = 1
+    # w/wo attention mask, w/wo hidden_state
+    graph_input_names = [gi.name for gi in subg.input]
+    while input_self_past_0 < 3 and not graph_input_names[input_self_past_0].startswith("past"):
+        input_self_past_0 += 1
+    output_self_present_0 = 1
+
+    num_layers = (len(subg.output) - output_self_present_0) // 2
+    input_cross_past_0 = 2 * num_layers + input_self_past_0
+    past_key_cross_inputs = {subg.input[layer * 2 + input_cross_past_0].name: layer for layer in range(num_layers)}
+    print(f"    --past_key_cross_inputs={past_key_cross_inputs}")
+
+    input_past_key_cross_0_shape = shape_of(subg.input[input_cross_past_0])
+    print(f"past_key_cross_0_shape is {input_past_key_cross_0_shape}")
+    batch_size_dim = input_past_key_cross_0_shape[0]
+    num_heads_dim = input_past_key_cross_0_shape[1]
+    cross_seq_len_dim = input_past_key_cross_0_shape[2]
+
+    num_layer_output_qk = 0
+    for node in subg.node:
+        if (node.op_type == "DecoderMaskedMultiHeadAttention") and (node.input[1] in past_key_cross_inputs):
+            print(f"    -- add cross QK output from: node: {node.name} with output: {node.output}")
+            num_layer_output_qk += 1
+            layer = past_key_cross_inputs[node.input[1]]
+            cross_attention_out_name = f"output_cross_qk_{layer}"
+            appended_names = [""] * (3 - len(node.output))
+            appended_names.append(cross_attention_out_name)
+            node.output.extend(appended_names)
+            node.attribute.extend([onnx.helper.make_attribute("output_qk", 1)])
+
+            cross_attention = onnx.helper.make_tensor_value_info(
+                cross_attention_out_name, TensorProto.FLOAT, [batch_size_dim, num_heads_dim, 1, cross_seq_len_dim]
+            )
+            subg.output.extend([cross_attention])
+    if num_layer_output_qk != num_layers:
+        raise ValueError(f"Did not add cross QK for all layers{num_layers} vs {num_layer_output_qk}")
+
+
+def update_decoder_subgraph_share_buffer_and_use_decoder_masked_mha(subg: ModelProto):
     input_self_past_0 = 1
     # w/wo attention mask, w/wo hidden_state
     graph_input_names = [gi.name for gi in subg.input]
diff --git a/onnxruntime/python/tools/transformers/convert_to_packing_mode.py b/onnxruntime/python/tools/transformers/convert_to_packing_mode.py
index 0b8dbdcdd9638..4da97f0de7bed 100644
--- a/onnxruntime/python/tools/transformers/convert_to_packing_mode.py
+++ b/onnxruntime/python/tools/transformers/convert_to_packing_mode.py
@@ -67,7 +67,7 @@ def _try_getting_last_layernorm(self) -> Union[NodeProto, None]:
                 last_layernorm_node = node
         return last_layernorm_node
 
-    def _are_attentions_supportted(self) -> bool:
+    def _are_attentions_supported(self) -> bool:
         raise NotImplementedError()
 
     def _insert_removepadding_node(self, inputs: List[str], outputs: List[str]) -> None:
@@ -105,7 +105,7 @@ def _get_input_to_remove_padding(self, first_attention_node) -> Union[str, None]
     def convert(self, use_symbolic_shape_infer: bool = True) -> None:
         logger.debug("start converting to packing model...")
 
-        if not self._are_attentions_supportted():
+        if not self._are_attentions_supported():
             return
 
         attention_mask = self._try_getting_attention_mask()
@@ -164,7 +164,7 @@ class PackingAttention(PackingAttentionBase):
     def __init__(self, model: OnnxModel):
         super().__init__(model, Operators.ATTENTION)
 
-    def _are_attentions_supportted(self) -> bool:
+    def _are_attentions_supported(self) -> bool:
         for node in self.attention_nodes:
             if OnnxModel.get_node_attribute(node, "past_present_share_buffer") is not None:
                 return False
@@ -237,7 +237,7 @@ def _check_empty_output(self, node, index: int, name: str):
                 return False
         return True
 
-    def _are_attentions_supportted(self) -> bool:
+    def _are_attentions_supported(self) -> bool:
         for node in self.attention_nodes:
             for attr in node.attribute:
                 if attr.name not in ["num_heads", "mask_filter_value", "scale"]:
diff --git a/onnxruntime/python/tools/transformers/float16.py b/onnxruntime/python/tools/transformers/float16.py
index 222f5f5e27d98..f680a15fc2c1b 100644
--- a/onnxruntime/python/tools/transformers/float16.py
+++ b/onnxruntime/python/tools/transformers/float16.py
@@ -145,7 +145,8 @@ def make_value_info_from_tensor(tensor):
 
 
 # Some operators has data type fixed as float for some inputs. Key is op_type, value is list of input indices
-ALWAYS_FLOAT_INPUTS = {"Resize": [2], "GroupNorm": [1, 2]}
+# Note that DirectML allows float16 gamma and beta in GroupNorm. Use force_fp16_inputs parameter could overwrite this.
+ALWAYS_FLOAT_INPUTS = {"Resize": [2], "GroupNorm": [1, 2], "SkipGroupNorm": [1, 2]}
 
 
 class InitializerTracker:
@@ -402,7 +403,7 @@ def convert_float_to_float16(
 
         queue = next_level
 
-    for _key, value in fp32_initializers.items():
+    for value in fp32_initializers.values():
         # By default, to avoid precision loss, do not convert an initializer to fp16 when it is used only by fp32 nodes.
         if force_fp16_initializers or value.fp16_nodes:
             value.initializer = convert_tensor_float_to_float16(value.initializer, min_positive_val, max_finite_val)
diff --git a/onnxruntime/python/tools/transformers/fusion_attention.py b/onnxruntime/python/tools/transformers/fusion_attention.py
index 40f2aee875382..d11cb91d98b0c 100644
--- a/onnxruntime/python/tools/transformers/fusion_attention.py
+++ b/onnxruntime/python/tools/transformers/fusion_attention.py
@@ -78,7 +78,15 @@ def process_mask(self, input: str) -> str:
             # ReduceSum-13: axes is moved from attribute to input
             axes_name = "ort_const_1_reduce_sum_axes"
             if self.model.get_initializer(axes_name) is None:
-                self.add_initializer(name=axes_name, data_type=TensorProto.INT64, dims=[1], vals=[1], raw=False)
+                self.model.add_initializer(
+                    helper.make_tensor(
+                        name=axes_name,
+                        data_type=TensorProto.INT64,
+                        dims=[1],
+                        vals=[1],
+                        raw=False,
+                    )
+                )
             mask_index_node = helper.make_node(
                 "ReduceSum",
                 inputs=[input_name, axes_name],
@@ -103,7 +111,7 @@ def __init__(
         model: OnnxModel,
         hidden_size: int,
         num_heads: int,
-        attention_mask: AttentionMask,
+        attention_mask: Optional[AttentionMask] = None,
         use_multi_head_attention: bool = False,
         disable_multi_head_attention_bias: bool = False,
         search_op_types: List[str] = ["SkipLayerNormalization", "LayerNormalization"],  # noqa: B006
@@ -112,7 +120,7 @@ def __init__(
         super().__init__(model, attention_op_name, search_op_types)
         self.hidden_size = hidden_size
         self.num_heads = num_heads
-        self.attention_mask = attention_mask
+        self.attention_mask = attention_mask if attention_mask else AttentionMask(model)
         self.use_multi_head_attention = use_multi_head_attention
         self.disable_multi_head_attention_bias = disable_multi_head_attention_bias
         self.mask_filter_value = None
@@ -211,6 +219,31 @@ def get_add_qk_str(self, add_qk: NodeProto):
 
         return add_qk.input[1]
 
+    def reshape_add_qk(self, add_qk: str):
+        # Convert 4D mask from (B,1,S,T) to (B,N,S,T)
+        # B = batch size, N = num heads, S = source sequence length, T = target sequence length
+        mask_output_name = add_qk + "_mask"
+
+        # Check if concat node for (B,1,S,T) --> (B,N,S,T) already exists
+        concat_node = list(filter(lambda node: node.output[0] == mask_output_name, self.nodes_to_add))
+        if len(concat_node) == 1:
+            return mask_output_name
+
+        assert len(concat_node) == 0
+        concat_node_name = self.model.create_node_name("Concat")
+        concat_add_qk_fp32 = helper.make_node(
+            "Concat",
+            inputs=[add_qk for _ in range(self.num_heads)],
+            outputs=[mask_output_name],
+            name=concat_node_name,
+            axis=1,
+        )
+        # Add new node to graph
+        self.nodes_to_add.append(concat_add_qk_fp32)
+        self.node_name_to_graph_name[concat_node_name] = self.this_graph_name
+
+        return mask_output_name
+
     def concat_kv(self, past_k: str, past_v: str) -> str:
         """Concatenate past_k and past_v inputs to create past_kv input.
 
@@ -624,7 +657,6 @@ def create_multihead_attention_node(
             return None
 
         graph_input_names = set([node.name for node in self.model.graph().input])
-        graph_output_names = set([node.name for node in self.model.graph().output])
         mha_node_name = self.model.create_node_name("Attention")
 
         # Add initial Q/K/V inputs for MHA
@@ -640,8 +672,8 @@ def create_multihead_attention_node(
             else:
                 mha_inputs.extend([q_matmul.output[0], k_matmul.output[0], v_matmul.output[0]])
         elif (
-            type(k_matmul) == str
-            and type(v_matmul) == str
+            type(k_matmul) == str  # noqa: E721
+            and type(v_matmul) == str  # noqa: E721
             and k_matmul in graph_input_names
             and v_matmul in graph_input_names
         ):
@@ -660,12 +692,15 @@ def create_multihead_attention_node(
             mha_inputs.append("")
 
         # Add optional inputs for MHA
-        if past_k and past_v and past_k in graph_input_names and past_v in graph_input_names:
+
+        if past_k and past_v:
             mha_inputs.extend([key_padding_mask, add_qk, past_k, past_v])
+        elif key_padding_mask or add_qk:
+            mha_inputs.extend([key_padding_mask, add_qk])
 
         # Add outputs for MHA
         mha_outputs = [output]
-        if present_k and present_v and present_k in graph_output_names and present_v in graph_output_names:
+        if present_k and present_v:
             mha_outputs.extend([present_k, present_v])
 
         mha_node = helper.make_node(
@@ -867,21 +902,8 @@ def create_attention_node(
                 past_kv = self.concat_kv(past_k, past_v)
                 attention_inputs.append(past_kv)
 
-            if add_qk_str:
-                # Convert 4d mask from (B,1,M,M) to (B,N,M,M)
-                # B = batch size, M = max sequence length, N = num heads
-                concat_node_name = self.model.create_node_name("Concat")
-                mask_output_name = add_qk_str + "_mask"
-                concat_add_qk_fp32 = helper.make_node(
-                    "Concat",
-                    inputs=[add_qk_str for _ in range(num_heads)],
-                    outputs=[mask_output_name],
-                    name=concat_node_name,
-                    axis=1,
-                )
-                # Add new nodes to graph
-                self.nodes_to_add.append(concat_add_qk_fp32)
-                self.node_name_to_graph_name[concat_node_name] = self.this_graph_name
+            if add_qk_str is not None:
+                mask_output_name = self.reshape_add_qk(add_qk_str)
 
                 # Add attention mask to attention node
                 if not past_exists:
diff --git a/onnxruntime/python/tools/transformers/fusion_base.py b/onnxruntime/python/tools/transformers/fusion_base.py
index 117468be412fa..67f4f0b55cff8 100644
--- a/onnxruntime/python/tools/transformers/fusion_base.py
+++ b/onnxruntime/python/tools/transformers/fusion_base.py
@@ -113,3 +113,25 @@ def add_initializer(self, name: str, data_type: int, dims: Sequence[int], vals:
 
         self.model.add_initializer(tensor, self.this_graph_name)
         return tensor
+
+    def add_nodes_to_remove(self, nodes: List[NodeProto]):
+        # Some nodes are shared between paths (e.g. rotary embedding nodes in the Q and K paths).
+        # When path A is fused, its shared nodes are added to `self.nodes_to_remove`. But when path B
+        # is fused, its shared nodes are also added to `self.nodes_to_remove`. When the nodes are
+        # iteratively removed from `self.nodes_to_remove`, path A's shared nodes are removed first.
+        # Since path A's shared nodes are removed, path B's shared nodes are not removed because they
+        # were previously removed for path A. This causes an error to print in remove_node that a node
+        # has failed to be removed.
+        #
+        # To avoid this error, we pre-emptively check if the shared nodes are already in `self.nodes_to_remove`.
+        # We could alternatively convert `self.nodes_to_remove` to a set to avoid this issue, but there could
+        # be scenarios where the nodes need to be removed in a specific order and converting to a set would
+        # lose this order.
+        for node in nodes:
+            if node not in self.nodes_to_remove:
+                self.nodes_to_remove.append(node)
+
+    def add_nodes_to_remove_with_nodes_to_keep(self, nodes: List[NodeProto], nodes_to_keep: List[NodeProto]):
+        for node in nodes:
+            if node not in self.nodes_to_remove and node not in nodes_to_keep:
+                self.nodes_to_remove.append(node)
diff --git a/onnxruntime/python/tools/transformers/fusion_conformer_attention.py b/onnxruntime/python/tools/transformers/fusion_conformer_attention.py
new file mode 100644
index 0000000000000..6bc681c57444e
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/fusion_conformer_attention.py
@@ -0,0 +1,143 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+import logging
+
+from fusion_attention import AttentionMask, FusionAttention
+from onnx_model import OnnxModel
+
+logger = logging.getLogger(__name__)
+
+
+class FusionConformerAttention(FusionAttention):
+    """
+    Fuse Conformer Attention subgraph into one MultiHeadAttention node.
+    """
+
+    def __init__(
+        self,
+        model: OnnxModel,
+        hidden_size: int,
+        num_heads: int,
+        attention_mask: AttentionMask,
+    ):
+        super().__init__(model, hidden_size, num_heads, attention_mask)
+
+    def fuse(self, normalize_node, input_name_to_nodes, output_name_to_node):
+        # SkipLayerNormalization has two inputs, and one of them is the root input for attention.
+        qkv_nodes = self.model.match_parent_path(
+            normalize_node,
+            ["Add", "MatMul", "Reshape", "Transpose", "MatMul"],
+            [1, 1, 0, 0, 0],
+        )
+        if qkv_nodes is not None:
+            (
+                _,
+                _,
+                reshape_qkv,
+                transpose_qkv,
+                matmul_qkv,
+            ) = qkv_nodes
+        else:
+            logger.debug("fuse_conformer_attention: failed to match qkv path")
+            return
+
+        v_nodes = self.model.match_parent_path(
+            matmul_qkv,
+            ["Concat", "Transpose", "Reshape", "Add", "MatMul"],
+            [1, 1, 0, 0, 1],
+        )
+
+        add_v = None
+        if v_nodes is not None:
+            (concat_v, _, _, add_v, matmul_v) = v_nodes
+            concat_parent = self.model.get_parent(concat_v, 0, None)
+            present_v = concat_v.output[0]
+            past_v = concat_parent.output[0]
+        else:
+            logger.debug("fuse_conformer_attention: failed to match v path")
+            return
+
+        qk_nodes = self.model.match_parent_path(matmul_qkv, ["Softmax", "Add", "MatMul"], [0, 0, 0])
+
+        if qk_nodes is not None:
+            _, add_qk, matmul_qk = qk_nodes
+        else:
+            logger.debug("fuse_conformer_attention: failed to match qk path")
+            return
+
+        q_nodes = self.model.match_parent_path(
+            matmul_qk,
+            ["Div", "Transpose", "Reshape", "Add", "MatMul"],
+            [0, 0, 0, 0, 1],
+        )
+        if q_nodes is not None:
+            _, _, reshape_q, add_q, matmul_q = q_nodes
+        else:
+            logger.debug("fuse_conformer_attention: failed to match q path")
+            return
+
+        k_nodes = self.model.match_parent_path(
+            matmul_qk,
+            ["Transpose", "Concat", "Transpose", "Reshape", "Add", "MatMul"],
+            [1, 0, 1, 0, 0, 1],
+        )
+
+        matmul_k = None
+        if k_nodes is not None:
+            _, concat_k, _, _, add_k, matmul_k = k_nodes
+            concat_parent = self.model.get_parent(concat_k, 0, None)
+            past_k = concat_parent.output[0]
+            present_k = concat_k.output[0]
+        else:
+            logger.debug("fuse_conformer_attention: failed to match k path")
+            return
+
+        attention_last_node = reshape_qkv
+        num_heads, hidden_size = self.get_num_heads_and_hidden_size(reshape_q)
+
+        if num_heads <= 0 or hidden_size <= 0 or (hidden_size % num_heads) != 0:
+            logger.debug("fuse_conformer_attention: failed to detect num_heads or hidden_size")
+            return
+
+        new_node = self.create_multihead_attention_node(
+            matmul_q,
+            matmul_k,
+            matmul_v,
+            add_q,
+            add_k,
+            add_v,
+            num_heads,
+            hidden_size,
+            attention_last_node.output[0],
+            add_qk=add_qk.input[1],
+            past_k=past_k,
+            past_v=past_v,
+            present_k=present_k,
+            present_v=present_v,
+        )
+
+        if new_node is None:
+            logger.debug("fuse_conformer_attention: MultiHeadAttention node creation failed")
+            return
+
+        self.nodes_to_add.append(new_node)
+        self.node_name_to_graph_name[new_node.name] = self.this_graph_name
+
+        self.nodes_to_remove.extend([attention_last_node, transpose_qkv, matmul_qkv])
+        self.nodes_to_remove.extend(qk_nodes)
+
+        # When using multihead attention, keep MatMul nodes in original graph
+        if q_nodes[-1].op_type == "MatMul":
+            q_nodes.pop()
+        if k_nodes[-1].op_type == "MatMul":
+            k_nodes.pop()
+        if v_nodes[-1].op_type == "MatMul":
+            v_nodes.pop()
+
+        self.nodes_to_remove.extend(k_nodes)
+        self.nodes_to_remove.extend(v_nodes)
+
+        # Use prune graph to remove mask nodes since they are shared by all attention nodes.
+        self.prune_graph = True
diff --git a/onnxruntime/python/tools/transformers/fusion_group_norm.py b/onnxruntime/python/tools/transformers/fusion_group_norm.py
index a4491d29b3698..c718d2c27e015 100644
--- a/onnxruntime/python/tools/transformers/fusion_group_norm.py
+++ b/onnxruntime/python/tools/transformers/fusion_group_norm.py
@@ -82,19 +82,11 @@ def fuse(self, add_node, input_name_to_nodes: Dict, output_name_to_node: Dict):
             return
 
         instance_norm_scale = self.model.get_constant_value(instance_norm.input[1])
-        if instance_norm_scale is None:
-            return
-        instance_norm_bias = self.model.get_constant_value(instance_norm.input[2])
-        if instance_norm_bias is None:
+        if instance_norm_scale is None or len(instance_norm_scale.shape) != 1:
             return
 
-        if not (
-            len(instance_norm_scale.shape) == 1
-            and len(instance_norm_bias.shape) == 1
-            and instance_norm_scale.shape == instance_norm_bias.shape
-            and instance_norm_scale.shape[0] == 32
-        ):
-            logger.info("InstanceNormalization groups=%d", instance_norm_scale.shape[0])
+        instance_norm_bias = self.model.get_constant_value(instance_norm.input[2])
+        if instance_norm_bias is None or instance_norm_scale.shape != instance_norm_scale.shape:
             return
 
         if not np.allclose(np.ones_like(instance_norm_scale), instance_norm_scale):
@@ -104,9 +96,6 @@ def fuse(self, add_node, input_name_to_nodes: Dict, output_name_to_node: Dict):
 
         group_norm_name = self.model.create_node_name("GroupNorm", name_prefix="GroupNorm")
 
-        if weight_elements not in [320, 640, 960, 1280, 1920, 2560, 128, 256, 512]:
-            logger.info("GroupNorm channels=%d", weight_elements)
-
         self.add_initializer(
             name=group_norm_name + "_gamma",
             data_type=TensorProto.FLOAT,
diff --git a/onnxruntime/python/tools/transformers/fusion_options.py b/onnxruntime/python/tools/transformers/fusion_options.py
index 69b5cd26f4525..b9b92d2fe8a00 100644
--- a/onnxruntime/python/tools/transformers/fusion_options.py
+++ b/onnxruntime/python/tools/transformers/fusion_options.py
@@ -26,6 +26,7 @@ def __init__(self, model_type):
         self.enable_gelu = True
         self.enable_layer_norm = True
         self.enable_attention = True
+        self.enable_rotary_embeddings = True
 
         # Use MultiHeadAttention instead of Attention operator. The difference:
         # (1) Attention has merged weights for Q/K/V projection, which might be faster in some cases since 3 MatMul is
@@ -60,6 +61,7 @@ def __init__(self, model_type):
         if model_type in ["unet", "vae", "clip"]:
             self.enable_nhwc_conv = True
             self.enable_group_norm = True
+            self.enable_skip_group_norm = True
             self.enable_bias_splitgelu = True
             self.enable_packed_qkv = True
             self.enable_packed_kv = True
@@ -81,6 +83,8 @@ def parse(args):
             options.enable_gelu = False
         if args.disable_layer_norm:
             options.enable_layer_norm = False
+        if args.disable_rotary_embeddings:
+            options.enable_rotary_embeddings = False
         if args.disable_attention:
             options.enable_attention = False
         if args.use_multi_head_attention:
@@ -113,6 +117,8 @@ def parse(args):
                 options.enable_nhwc_conv = False
             if args.disable_group_norm:
                 options.enable_group_norm = False
+            if args.disable_skip_group_norm:
+                options.enable_skip_group_norm = False
             if args.disable_bias_splitgelu:
                 options.enable_bias_splitgelu = False
             if args.disable_packed_qkv:
@@ -247,6 +253,14 @@ def add_arguments(parser: ArgumentParser):
         )
         parser.set_defaults(disable_group_norm=False)
 
+        parser.add_argument(
+            "--disable_skip_group_norm",
+            required=False,
+            action="store_true",
+            help="not fuse Add + GroupNorm to SkipGroupNorm. Only works for model_type=unet or vae",
+        )
+        parser.set_defaults(disable_skip_group_norm=False)
+
         parser.add_argument(
             "--disable_packed_kv",
             required=False,
@@ -294,3 +308,10 @@ def add_arguments(parser: ArgumentParser):
             help="Use channels_first (NCHW) instead of channels_last (NHWC) for GroupNorm. Only works for model_type=unet or vae",
         )
         parser.set_defaults(use_group_norm_channels_first=False)
+
+        parser.add_argument(
+            "--disable_rotary_embeddings",
+            required=False,
+            action="store_true",
+            help="Do not fuse rotary embeddings into RotaryEmbedding op",
+        )
diff --git a/onnxruntime/python/tools/transformers/fusion_rotary_attention.py b/onnxruntime/python/tools/transformers/fusion_rotary_attention.py
new file mode 100644
index 0000000000000..de89b35366a23
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/fusion_rotary_attention.py
@@ -0,0 +1,1382 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+import logging
+from typing import Optional, Union
+
+from fusion_attention import FusionAttention
+from fusion_base import Fusion
+from onnx import FunctionProto, NodeProto, TensorProto, helper, numpy_helper
+from onnx_model import OnnxModel
+
+logger = logging.getLogger(__name__)
+
+
+class FusionRotaryAttention(FusionAttention):
+    """
+    Fuse Attention subgraph with rotary positional embeddings into one MultiHeadAttention node.
+    """
+
+    def __init__(
+        self,
+        model: OnnxModel,
+        hidden_size: int,
+        num_heads: int,
+    ):
+        super().__init__(
+            model,
+            hidden_size,
+            num_heads,
+            use_multi_head_attention=True,
+            search_op_types=[
+                "SimplifiedLayerNormalization",
+                "SkipSimplifiedLayerNormalization",
+                "LayerNormalization",
+                "SkipLayerNormalization",
+                "Add",
+            ],
+        )
+
+    def create_mha_node(
+        self,
+        input: str,
+        output: str,
+        q_rotary: NodeProto,
+        k_rotary: NodeProto,
+        v_matmul: NodeProto,
+        attn_mask: str = "",
+        add_qk: str = "",
+        past_k: str = "",
+        past_v: str = "",
+        present_k: str = "",
+        present_v: str = "",
+        scale: Optional[float] = None,
+    ) -> Union[NodeProto, None]:
+        assert self.num_heads > 0
+
+        if self.hidden_size > 0 and (self.hidden_size % self.num_heads) != 0:
+            logger.debug(
+                f"fuse_rotary_attention: input hidden size {self.hidden_size} is not a multiple of num of heads {self.num_heads}"
+            )
+            return None
+
+        mha_node_name = self.model.create_node_name("MultiHeadAttention")
+        mha_inputs = [
+            q_rotary.output[0],
+            k_rotary.output[0],
+            v_matmul.output[0],
+            "",  # bias
+            attn_mask,  # key_padding_mask
+            add_qk,  # relative_position_bias
+            past_k,
+            past_v,
+        ]
+
+        mha_outputs = [output]
+        if present_k and present_v:
+            mha_outputs.extend([present_k, present_v])
+
+        mha_node = helper.make_node(
+            "MultiHeadAttention",
+            inputs=mha_inputs,
+            outputs=mha_outputs,
+            name=mha_node_name,
+        )
+
+        mha_node.domain = "com.microsoft"
+        mha_node.attribute.extend([helper.make_attribute("num_heads", self.num_heads)])
+        if scale is not None:
+            mha_node.attribute.extend([helper.make_attribute("scale", scale)])
+        if self.mask_filter_value is not None:
+            mha_node.attribute.extend([helper.make_attribute("mask_filter_value", float(self.mask_filter_value))])
+
+        self.increase_counter("MultiHeadAttention")
+        return mha_node
+
+    def check_runtime_shape_paths_for_function(
+        self,
+        reshape_qkv_2,  # Reshape after Transpose
+        reshape_qkv_1,  # Reshape before Transpose
+        reshape_q_2,  # Reshape after RotaryEmbedding
+        reshape_k_2,  # Reshape after RotaryEmbedding
+        reshape_v_2,  # Reshape after Transpose
+        reshape_v_1,  # Reshape before Transpose
+        add_qk,  # Add before Softmax
+        root_input,  # Root input to attention subgraph
+    ):
+        # Check #1: check paths for qkv nodes
+        concat_qkv_2_path = self.model.match_parent_path(reshape_qkv_2, ["Concat"], [1])
+        concat_qkv_1_path = self.model.match_parent_path(reshape_qkv_1, ["Concat"], [1])
+        if concat_qkv_2_path is None or concat_qkv_1_path is None:
+            return False
+        concat_qkv_2, concat_qkv_1 = concat_qkv_2_path[0], concat_qkv_1_path[0]
+
+        reshape_qkv_2_path_1 = self.model.match_parent_path(concat_qkv_2, ["Unsqueeze", "Gather", "Shape"], [0, 0, 0])
+        reshape_qkv_2_path_2 = self.model.match_parent_path(concat_qkv_2, ["Unsqueeze", "Gather", "Shape"], [1, 0, 0])
+        reshape_qkv_1_path_1 = self.model.match_parent_path(concat_qkv_1, ["Unsqueeze", "Gather", "Shape"], [0, 0, 0])
+        reshape_qkv_1_path_2 = self.model.match_parent_path(concat_qkv_1, ["Unsqueeze", "Gather", "Shape"], [2, 0, 0])
+        if (
+            reshape_qkv_2_path_1 is None
+            or reshape_qkv_2_path_2 is None
+            or reshape_qkv_1_path_1 is None
+            or reshape_qkv_1_path_2 is None
+        ):
+            return False
+
+        _, gather_1, shape_1 = reshape_qkv_2_path_1
+        _, gather_2, shape_2 = reshape_qkv_2_path_2
+
+        # Check root_input --> Shape --> Gather connection
+        if shape_1.input[0] != root_input or shape_2.input[0] != root_input:
+            return False
+
+        # Check Gather --> Unsqueeze --> Concat --> Reshape connection for reshape_qkv_1_path_1 and reshape_qkv_1_path_2
+        if reshape_qkv_1_path_1[1].name != gather_1.name or reshape_qkv_1_path_2[1].name != gather_2.name:
+            return False
+
+        # Check #2: check paths for v nodes
+        concat_v_2_path = self.model.match_parent_path(reshape_v_2, ["Concat"], [1])
+        concat_v_1_path = self.model.match_parent_path(reshape_v_1, ["Concat"], [1])
+        if concat_v_2_path is None or concat_v_1_path is None:
+            return False
+        concat_v_2, concat_v_1 = concat_v_2_path[0], concat_v_1_path[0]
+
+        reshape_v_2_path_1 = self.model.match_parent_path(
+            concat_v_2, ["Unsqueeze", "Mul", "Gather", "Shape"], [0, 0, 0, 0]
+        )
+        reshape_v_2_path_2 = self.model.match_parent_path(
+            concat_v_2, ["Unsqueeze", "Add", "Gather", "Shape"], [1, 0, 0, 0]
+        )
+        reshape_v_1_path_1 = self.model.match_parent_path(concat_v_1, ["Unsqueeze", "Gather", "Shape"], [0, 0, 0])
+        reshape_v_1_path_2 = self.model.match_parent_path(concat_v_1, ["Unsqueeze", "Gather", "Shape"], [1, 0, 0])
+        if (
+            reshape_v_2_path_1 is None
+            or reshape_v_2_path_2 is None
+            or reshape_v_1_path_1 is None
+            or reshape_v_1_path_2 is None
+        ):
+            return False
+
+        # Check Gather --> Mul --> Unsqueeze --> Concat --> Reshape connection for reshape_v_2_path_1
+        # Check Gather --> Add --> Unsqueeze --> Concat --> Reshape connection for reshape_v_2_path_2
+        # Check Gather --> Unsqueeze --> Concat --> Reshape connection for reshape_v_1_path_1 and reshape_v_1_path_2
+        if (
+            reshape_v_2_path_1[2].name != gather_1.name
+            or reshape_v_2_path_2[2].name != gather_2.name
+            or reshape_v_1_path_1[1].name != gather_1.name
+            or reshape_v_1_path_2[1].name != gather_2.name
+        ):
+            return False
+
+        # Check #3: check paths for k nodes
+        concat_k_2_path = self.model.match_parent_path(reshape_k_2, ["Concat"], [1])
+        if concat_k_2_path is None:
+            return False
+        concat_k_2 = concat_k_2_path[0]
+
+        reshape_k_2_path_1 = self.model.match_parent_path(
+            concat_k_2, ["Unsqueeze", "Mul", "Gather", "Shape"], [0, 0, 0, 0]
+        )
+        reshape_k_2_path_2 = self.model.match_parent_path(
+            concat_k_2, ["Unsqueeze", "Add", "Gather", "Shape"], [2, 0, 0, 0]
+        )
+        if reshape_k_2_path_1 is None or reshape_k_2_path_2 is None:
+            return False
+
+        # Check Gather --> Mul --> Unsqueeze --> Concat --> Reshape connection for reshape_k_2_path_1
+        # Check Gather --> Add --> Unsqueeze --> Concat --> Reshape connection for reshape_k_2_path_2
+        if reshape_k_2_path_1[2].name != gather_1.name or reshape_k_2_path_2[2].name != gather_2.name:
+            return False
+
+        # Check #4: check paths for q nodes
+        concat_q_2_path = self.model.match_parent_path(reshape_q_2, ["Concat"], [1])
+        if concat_q_2_path is None:
+            return False
+        concat_q_2 = concat_q_2_path[0]
+
+        reshape_q_2_path_1 = self.model.match_parent_path(
+            concat_q_2, ["Unsqueeze", "Mul", "Gather", "Shape"], [0, 0, 0, 0]
+        )
+        reshape_q_2_path_2 = self.model.match_parent_path(concat_q_2, ["Unsqueeze", "Gather", "Shape"], [1, 0, 0])
+        if reshape_q_2_path_1 is None or reshape_q_2_path_2 is None:
+            return False
+
+        # Check Gather --> Mul --> Unsqueeze --> Concat --> Reshape connection for reshape_q_2_path_1
+        # Check Gather --> Unsqueeze --> Concat --> Reshape connection for reshape_q_2_path_2
+        if reshape_q_2_path_1[2].name != gather_1.name or reshape_q_2_path_2[1].name != gather_2.name:
+            return False
+
+        # Check #5: check Mul nodes are the same for q, k, v
+        mul_q = reshape_q_2_path_1[1]
+        mul_k = reshape_k_2_path_1[1]
+        mul_v = reshape_v_2_path_1[1]
+        gather_1_out = gather_1.output[0]
+        if mul_q.input[0] != gather_1_out or mul_k.input[0] != gather_1_out or mul_v.input[0] != gather_1_out:
+            return False
+
+        # Check #6: check paths for attention mask nodes
+        attn_mask_path_1 = self.model.match_parent_path(add_qk, ["Concat", "Slice", "Slice"], [1, 0, 0])
+        attn_mask_path_2 = self.model.match_parent_path(add_qk, ["Cast", "Concat", "Slice", "Slice"], [1, 0, 0, 0])
+        if attn_mask_path_1 is not None:
+            _, slice_qk_2, slice_qk_1 = attn_mask_path_1
+        elif attn_mask_path_2 is not None:
+            _, _, slice_qk_2, slice_qk_1 = attn_mask_path_2
+        else:
+            return False
+        # Check first input to Slice #1 is 3D attention mask of shape (B,S,T)
+        if slice_qk_1.input[0] not in {"attn_mask", "attention_mask"}:
+            return False
+
+        slice_qk_2_path = self.model.match_parent_path(
+            slice_qk_2, ["Unsqueeze", "Add", "Gather", "Shape"], [2, 0, 1, 0]
+        )
+        slice_qk_1_path_1 = self.model.match_parent_path(
+            slice_qk_1, ["Unsqueeze", "Add", "Gather", "Shape"], [2, 0, 1, 0]
+        )
+        slice_qk_1_path_2 = self.model.match_parent_path(slice_qk_1, ["Unsqueeze"], [1])
+        if slice_qk_2_path is None or slice_qk_1_path_1 is None or slice_qk_1_path_2 is None:
+            return False
+
+        # Check Gather --> Add --> Unsqueeze #3 --> Slice #2 connection for slice_qk_2_path
+        # Check Gather --> Add --> Unsqueeze #2 --> Slice #1 connection for slice_qk_1_path_1
+        if slice_qk_2_path[1].name != slice_qk_1_path_1[1].name or slice_qk_2_path[2].name != slice_qk_1_path_1[2].name:
+            return False
+
+        # Check Unsqueeze #1 --> Slice #1 connection for slice_qk_1_path_2
+        # Check if first input to Add and Unsqueeze #1 is position ids
+        if slice_qk_1_path_1[1].input[0] != slice_qk_1_path_2[0].input[0]:
+            return False
+
+        return True
+
+    def check_runtime_shape_paths_for_nodes(
+        self,
+        reshape_qkv,  # Final reshape before o_proj MatMul
+        reshape_q,  # Reshape before q_proj MatMul
+        reshape_k,  # Reshape before k_proj MatMul
+        reshape_v,  # Reshape before v_proj MatMul
+        root_input,  # Root input to attention subgraph
+    ):
+        # Check #1: check paths for qkv nodes
+        concat_qkv_path = self.model.match_parent_path(reshape_qkv, ["Concat"], [1])
+        if concat_qkv_path is None:
+            return False
+        concat_qkv = concat_qkv_path[0]
+
+        reshape_qkv_path_1 = self.model.match_parent_path(concat_qkv, ["Unsqueeze", "Gather", "Shape"], [0, 0, 0])
+        reshape_qkv_path_2 = self.model.match_parent_path(concat_qkv, ["Unsqueeze", "Gather", "Shape"], [1, 0, 0])
+        if reshape_qkv_path_1 is None or reshape_qkv_path_2 is None:
+            return False
+
+        _, gather_1, shape_1 = reshape_qkv_path_1
+        _, gather_2, shape_2 = reshape_qkv_path_2
+
+        # Check root_input --> Shape --> Gather connection
+        if shape_1.input[0] != root_input or shape_2.input[0] != root_input:
+            return False
+
+        # Check #2: check paths for v nodes
+        concat_v_path = self.model.match_parent_path(reshape_v, ["Concat"], [1])
+        if concat_v_path is None:
+            return False
+        concat_v = concat_v_path[0]
+
+        reshape_v_path_1 = self.model.match_parent_path(concat_v, ["Unsqueeze", "Gather", "Shape"], [0, 0, 0])
+        reshape_v_path_2 = self.model.match_parent_path(concat_v, ["Unsqueeze", "Gather", "Shape"], [1, 0, 0])
+        if reshape_v_path_1 is None or reshape_v_path_2 is None:
+            return False
+
+        # Check Gather --> Unsqueeze --> Concat --> Reshape connection
+        if reshape_v_path_1[1].name != gather_1.name or reshape_v_path_2[1].name != gather_2.name:
+            return False
+
+        # Check #3: check paths for k nodes
+        concat_k_path = self.model.match_parent_path(reshape_k, ["Concat"], [1])
+        if concat_k_path is None:
+            return False
+        concat_k = concat_k_path[0]
+
+        reshape_k_path_1 = self.model.match_parent_path(concat_k, ["Unsqueeze", "Gather", "Shape"], [0, 0, 0])
+        reshape_k_path_2 = self.model.match_parent_path(concat_k, ["Unsqueeze", "Gather", "Shape"], [1, 0, 0])
+        if reshape_k_path_1 is None or reshape_k_path_2 is None:
+            return False
+
+        # Check Gather --> Unsqueeze --> Concat --> Reshape connection
+        if reshape_k_path_1[1].name != gather_1.name or reshape_k_path_2[1].name != gather_2.name:
+            return False
+
+        # Check #4: check paths for q nodes
+        concat_q_path = self.model.match_parent_path(reshape_q, ["Concat"], [1])
+        if concat_q_path is None:
+            return False
+        concat_q = concat_q_path[0]
+
+        reshape_q_path_1 = self.model.match_parent_path(concat_q, ["Unsqueeze", "Gather", "Shape"], [0, 0, 0])
+        reshape_q_path_2 = self.model.match_parent_path(concat_q, ["Unsqueeze", "Gather", "Shape"], [1, 0, 0])
+        if reshape_q_path_1 is None or reshape_q_path_2 is None:
+            return False
+
+        # Check Gather --> Unsqueeze --> Concat --> Reshape connection
+        if reshape_q_path_1[1].name != gather_1.name or reshape_q_path_2[1].name != gather_2.name:
+            return False
+
+        return True
+
+    def fuse(self, normalize_node, input_name_to_nodes, output_name_to_node):
+        if normalize_node.op_type not in {"SkipSimplifiedLayerNormalization", "SkipLayerNormalization", "Add"}:
+            return
+
+        # qkv_nodes_1 is for LLaMA-2 Microsoft
+        # qkv_nodes_2 is for LLaMA-2 Hugging Face
+        # qkv_nodes_3 is for LLaMA-2 distribute Hugging Face model
+        qkv_nodes = None
+        qkv_nodes_1 = self.model.match_parent_path(
+            normalize_node,
+            ["MatMul", "Reshape", "Transpose", "Reshape", "MatMul"],
+            [1, 0, 0, 0, 0],
+        )
+        qkv_nodes_2 = self.model.match_parent_path(
+            normalize_node,
+            ["MatMul", "Reshape", "Transpose", "MatMul"],
+            [1, 0, 0, 0],
+        )
+        qkv_nodes_3 = self.model.match_parent_path(
+            normalize_node,
+            ["AllReduce", "MatMul", "Reshape", "Transpose", "MatMul"],
+            [1, 0, 0, 0, 0],
+        )
+        if qkv_nodes_1 is not None:
+            _, reshape_qkv_2, _, reshape_qkv_1, matmul_qkv = qkv_nodes_1
+            qkv_nodes = qkv_nodes_1
+        elif qkv_nodes_2 is not None:
+            _, reshape_qkv, _, matmul_qkv = qkv_nodes_2
+            qkv_nodes = qkv_nodes_2
+        elif qkv_nodes_3 is not None:
+            _, _, reshape_qkv, _, matmul_qkv = qkv_nodes_3
+            qkv_nodes = qkv_nodes_3
+        else:
+            logger.debug("fuse_rotary_attention: failed to match qkv nodes")
+            return
+
+        # v_nodes_1 is for LLaMA-2 Microsoft
+        # v_nodes_3 is for LLaMA-2 Hugging Face
+        # v_nodes_4 is for LLaMA-2 70B model
+        past_v, present_v, past_seq_len = "", "", ""
+        v_nodes = None
+        v_nodes_1 = self.model.match_parent_path(
+            matmul_qkv,
+            ["Reshape", "Transpose", "Concat", "Transpose", "Reshape", "MatMul"],
+            [1, 0, 0, 1, 0, 0],
+        )
+        v_nodes_2 = self.model.match_parent_path(
+            matmul_qkv,
+            ["Concat", "Transpose", "Reshape", "MatMul"],
+            [1, 1, 0, 0],
+        )
+        v_nodes_3 = self.model.match_parent_path(
+            matmul_qkv,
+            ["Transpose", "Reshape", "MatMul"],
+            [1, 0, 0],
+        )
+        _, v_nodes_4, _ = self.model.match_parent_paths_all(
+            matmul_qkv,
+            [
+                (
+                    ["Reshape", "Expand", "Unsqueeze", "Concat", "Transpose", "Reshape", "MatMul"],
+                    [1, 0, 0, 0, 1, 0, 0],
+                ),
+                (
+                    [
+                        "Reshape",
+                        "Expand",
+                        "Where",
+                        "Equal",
+                        "Reshape",
+                        "Concat",
+                        "Unsqueeze",
+                        "Gather",
+                        "Shape",
+                        "Concat",
+                        "Transpose",
+                        "Reshape",
+                        "MatMul",
+                    ],
+                    [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
+                ),
+                (
+                    [
+                        "Reshape",
+                        "Expand",
+                        "Where",
+                        "Equal",
+                        "Mul",
+                        "ConstantOfShape",
+                        "Shape",
+                        "Reshape",
+                        "Concat",
+                        "Unsqueeze",
+                        "Gather",
+                        "Shape",
+                        "Concat",
+                        "Transpose",
+                        "Reshape",
+                        "MatMul",
+                    ],
+                    [1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0],
+                ),
+                (
+                    [
+                        "Reshape",
+                        "Expand",
+                        "Where",
+                        "ConstantOfShape",
+                        "Shape",
+                        "Reshape",
+                        "Concat",
+                        "Unsqueeze",
+                        "Gather",
+                        "Shape",
+                        "Concat",
+                        "Transpose",
+                        "Reshape",
+                        "MatMul",
+                    ],
+                    [1, 0, 1, 1, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0],
+                ),
+                (
+                    [
+                        "Reshape",
+                        "Expand",
+                        "Where",
+                        "Reshape",
+                        "Concat",
+                        "Unsqueeze",
+                        "Gather",
+                        "Shape",
+                        "Concat",
+                        "Transpose",
+                        "Reshape",
+                        "MatMul",
+                    ],
+                    [1, 0, 1, 2, 0, 4, 0, 0, 0, 1, 0, 0],
+                ),
+                (
+                    ["Reshape", "Concat", "Unsqueeze", "Gather", "Shape", "Concat", "Transpose", "Reshape", "MatMul"],
+                    [1, 1, 0, 0, 0, 0, 1, 0, 0],
+                ),
+                (
+                    [
+                        "Reshape",
+                        "Concat",
+                        "Unsqueeze",
+                        "Mul",
+                        "Gather",
+                        "Shape",
+                        "Concat",
+                        "Transpose",
+                        "Reshape",
+                        "MatMul",
+                    ],
+                    [1, 1, 1, 0, 0, 0, 0, 1, 0, 0],
+                ),
+                (
+                    ["Reshape", "Concat", "Unsqueeze", "Gather", "Shape", "Concat", "Transpose", "Reshape", "MatMul"],
+                    [1, 1, 2, 0, 0, 0, 1, 0, 0],
+                ),
+                (
+                    ["Reshape", "Concat", "Unsqueeze", "Gather", "Shape", "Concat", "Transpose", "Reshape", "MatMul"],
+                    [1, 1, 3, 0, 0, 0, 1, 0, 0],
+                ),
+            ],
+            output_name_to_node=None,
+        )
+        if v_nodes_1 is not None:
+            reshape_v_2, _, concat_v, _, reshape_v_1, matmul_v = v_nodes_1
+            v_nodes = v_nodes_1
+
+            concat_v_path = self.model.match_parent_path(
+                concat_v,
+                ["Slice", "Unsqueeze"],
+                [0, 2],
+            )
+            if concat_v_path is None:
+                logger.debug("fuse_rotary_attention: failed to match past/present concat in v path")
+                return
+
+            past_v = concat_v_path[0].input[0]
+            past_seq_len = concat_v_path[-1].input[0]
+            present_v = concat_v.output[0]
+        elif v_nodes_2 is not None:
+            concat_v, transpose_v, reshape_v, matmul_v = v_nodes_2
+            v_nodes = v_nodes_2
+            past_v = concat_v.input[0]
+            present_v = concat_v.output[0]
+        elif v_nodes_3 is not None:
+            transpose_v, reshape_v, matmul_v = v_nodes_3
+            v_nodes = v_nodes_3
+            present_v = transpose_v.output[0]
+        elif v_nodes_4 is not None and len(v_nodes_4) == 9:
+            concat_v, transpose_v, reshape_v, matmul_v = v_nodes_4[0][-4:]
+            v_nodes = v_nodes_4
+            past_v = concat_v.input[0]
+            present_v = concat_v.output[0]
+        else:
+            logger.debug("fuse_rotary_attention: failed to match v path")
+            return
+
+        qk_nodes = self.model.match_parent_path(
+            matmul_qkv,
+            ["Softmax", "Add", "Div", "MatMul"],
+            [0, 0, 0, 0],
+        )
+        add_qk, matmul_qk = None, None
+        if qk_nodes is not None:
+            _, add_qk, _, matmul_qk = qk_nodes
+        else:
+            logger.debug("fuse_rotary_attention: failed to match qk nodes")
+            return
+
+        # attn_mask_nodes_1, attn_mask_nodes_2 are for LLaMA-2 Microsoft's 3D attention mask
+        # attn_mask_nodes_3, attn_mask_nodes_4 are for LLaMA-2 Hugging Face's 2D attention mask
+        attn_mask, add_qk_str = "", ""
+        attn_mask_nodes_1 = self.model.match_parent_path(
+            add_qk,
+            ["Concat", "Slice", "Slice"],
+            [1, 0, 0],
+        )
+        attn_mask_nodes_2 = self.model.match_parent_path(
+            add_qk,
+            ["Cast", "Concat", "Slice", "Slice"],
+            [1, 0, 0, 0],
+        )
+        attn_mask_nodes_3 = self.model.match_parent_path(
+            add_qk,
+            ["Add", "Where", "Sub", "Cast", "Expand", "Unsqueeze", "Unsqueeze"],
+            [1, 0, 2, 1, 0, 0, 0],
+        )
+        attn_mask_nodes_4 = self.model.match_parent_path(
+            add_qk,
+            ["Where", "Sub", "Cast", "Expand", "Unsqueeze", "Unsqueeze"],
+            [1, 2, 1, 0, 0, 0],
+        )
+        attn_mask_nodes_5 = self.model.match_parent_path(
+            add_qk,
+            ["Expand", "Add", "Where", "Sub", "Cast", "Expand", "Unsqueeze", "Unsqueeze"],
+            [1, 0, 0, 2, 1, 0, 0, 0],
+        )
+        attn_mask_nodes_6 = self.model.match_parent_path(
+            add_qk,
+            ["Expand", "Where", "Sub", "Cast", "Expand", "Unsqueeze", "Unsqueeze"],
+            [1, 0, 2, 1, 0, 0, 0],
+        )
+        if attn_mask_nodes_1 is not None:
+            _, slice_mask_1, slice_mask_2 = attn_mask_nodes_1
+            attn_mask = slice_mask_1.output[0]
+        elif attn_mask_nodes_2 is not None:
+            _, _, slice_mask_1, slice_mask_2 = attn_mask_nodes_2
+            attn_mask = slice_mask_1.output[0]
+        elif attn_mask_nodes_3 is not None:
+            # Reshape from (B,1,S,T) to (B,N,S,T)
+            add_qk_str = self.reshape_add_qk(attn_mask_nodes_3[0].output[0])
+        elif attn_mask_nodes_4 is not None:
+            # Reshape from (B,1,S,T) to (B,N,S,T)
+            add_qk_str = self.reshape_add_qk(attn_mask_nodes_4[0].output[0])
+        elif attn_mask_nodes_5 is not None:
+            # The mask has already been reshaped to (B,N,S,T)
+            add_qk_str = attn_mask_nodes_5[0].output[0]
+        elif attn_mask_nodes_6 is not None:
+            # The mask has already been reshaped to (B,N,S,T)
+            add_qk_str = attn_mask_nodes_6[0].output[0]
+        else:
+            logger.debug("fuse_rotary_attention: failed to match attention mask nodes")
+            return
+
+        # k_nodes_1 is for LLaMA-2 Microsoft
+        # k_nodes_2 is for LLaMA-2 Hugging Face
+        # k_nodes_4 is for LLaMA-2 70B Hugging Face
+        past_k, present_k = "", ""
+        k_nodes = None
+        k_nodes_1 = self.model.match_parent_path(
+            matmul_qk,
+            ["Reshape", "Transpose", "Concat", "Transpose", "RotaryEmbedding", "MatMul"],
+            [1, 0, 0, 1, 0, 0],
+        )
+        k_nodes_2 = self.model.match_parent_path(
+            matmul_qk,
+            ["Transpose", "RotaryEmbedding", "Transpose", "Reshape", "MatMul"],
+            [1, 0, 0, 0, 0],
+        )
+        k_nodes_3 = self.model.match_parent_path(
+            matmul_qk,
+            ["Transpose", "Concat", "RotaryEmbedding", "Transpose", "Reshape", "MatMul"],
+            [1, 0, 1, 0, 0, 0],
+        )
+        _, k_nodes_4, _ = self.model.match_parent_paths_all(
+            matmul_qk,
+            [
+                (
+                    [
+                        "Transpose",
+                        "Reshape",
+                        "Expand",
+                        "Unsqueeze",
+                        "Concat",
+                        "RotaryEmbedding",
+                        "Transpose",
+                        "Reshape",
+                        "MatMul",
+                    ],
+                    [1, 0, 0, 0, 0, 1, 0, 0, 0],
+                ),
+                (
+                    [
+                        "Transpose",
+                        "Reshape",
+                        "Expand",
+                        "Where",
+                        "Equal",
+                        "Reshape",
+                        "Concat",
+                        "Unsqueeze",
+                        "Gather",
+                        "Shape",
+                        "Concat",
+                        "RotaryEmbedding",
+                        "Transpose",
+                        "Reshape",
+                        "MatMul",
+                    ],
+                    [1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
+                ),
+                (
+                    [
+                        "Transpose",
+                        "Reshape",
+                        "Expand",
+                        "Where",
+                        "Equal",
+                        "Mul",
+                        "ConstantOfShape",
+                        "Shape",
+                        "Reshape",
+                        "Concat",
+                        "Unsqueeze",
+                        "Gather",
+                        "Shape",
+                        "Concat",
+                        "RotaryEmbedding",
+                        "Transpose",
+                        "Reshape",
+                        "MatMul",
+                    ],
+                    [1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0],
+                ),
+                (
+                    [
+                        "Transpose",
+                        "Reshape",
+                        "Expand",
+                        "Where",
+                        "ConstantOfShape",
+                        "Shape",
+                        "Reshape",
+                        "Concat",
+                        "Unsqueeze",
+                        "Gather",
+                        "Shape",
+                        "Concat",
+                        "RotaryEmbedding",
+                        "Transpose",
+                        "Reshape",
+                        "MatMul",
+                    ],
+                    [1, 0, 0, 1, 1, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0],
+                ),
+                (
+                    [
+                        "Transpose",
+                        "Reshape",
+                        "Expand",
+                        "Where",
+                        "Reshape",
+                        "Concat",
+                        "Unsqueeze",
+                        "Gather",
+                        "Shape",
+                        "Concat",
+                        "RotaryEmbedding",
+                        "Transpose",
+                        "Reshape",
+                        "MatMul",
+                    ],
+                    [1, 0, 0, 1, 2, 0, 4, 0, 0, 0, 1, 0, 0, 0],
+                ),
+                (
+                    [
+                        "Transpose",
+                        "Reshape",
+                        "Concat",
+                        "Unsqueeze",
+                        "Gather",
+                        "Shape",
+                        "Concat",
+                        "RotaryEmbedding",
+                        "Transpose",
+                        "Reshape",
+                        "MatMul",
+                    ],
+                    [1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0],
+                ),
+                (
+                    [
+                        "Transpose",
+                        "Reshape",
+                        "Concat",
+                        "Unsqueeze",
+                        "Mul",
+                        "Gather",
+                        "Shape",
+                        "Concat",
+                        "RotaryEmbedding",
+                        "Transpose",
+                        "Reshape",
+                        "MatMul",
+                    ],
+                    [1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0],
+                ),
+                (
+                    [
+                        "Transpose",
+                        "Reshape",
+                        "Concat",
+                        "Unsqueeze",
+                        "Gather",
+                        "Shape",
+                        "Concat",
+                        "RotaryEmbedding",
+                        "Transpose",
+                        "Reshape",
+                        "MatMul",
+                    ],
+                    [1, 0, 1, 2, 0, 0, 0, 1, 0, 0, 0],
+                ),
+                (
+                    [
+                        "Transpose",
+                        "Reshape",
+                        "Concat",
+                        "Unsqueeze",
+                        "Gather",
+                        "Shape",
+                        "Concat",
+                        "RotaryEmbedding",
+                        "Transpose",
+                        "Reshape",
+                        "MatMul",
+                    ],
+                    [1, 0, 1, 3, 0, 0, 0, 1, 0, 0, 0],
+                ),
+            ],
+            output_name_to_node=None,
+        )
+        if k_nodes_1 is not None:
+            reshape_k_2, _, concat_k, _, rotary_k, matmul_k = k_nodes_1
+            k_nodes = k_nodes_1
+
+            concat_k_path = self.model.match_parent_path(
+                concat_k,
+                ["Slice", "Unsqueeze"],
+                [0, 2],
+            )
+            if concat_k_path is None:
+                logger.debug("fuse_rotary_attention: failed to match past/present concat in k path")
+                return
+
+            past_k = concat_k_path[0].input[0]
+            shared_past_seq_len = concat_k_path[-1].input[0]
+            present_k = concat_k.output[0]
+
+            assert past_seq_len == shared_past_seq_len
+        elif k_nodes_2 is not None:
+            _, rotary_k, _, reshape_k, matmul_k = k_nodes_2
+            k_nodes = k_nodes_2
+            present_k = rotary_k.output[0]
+        elif k_nodes_3 is not None:
+            _, concat_k, rotary_k, _, reshape_k, matmul_k = k_nodes_3
+            k_nodes = k_nodes_3
+            past_k = concat_k.input[0]
+            present_k = concat_k.output[0]
+        elif k_nodes_4 is not None and len(k_nodes_4) == 9:
+            reshape_k, matmul_k = k_nodes_4[0][-2:]
+            concat_k, rotary_k = k_nodes_4[0][-5:-3]
+            k_nodes = k_nodes_4
+            past_k = concat_k.input[0]
+            present_k = concat_k.output[0]
+        else:
+            logger.debug("fuse_rotary_attention: failed to match k nodes")
+            return
+
+        # q_nodes_1 is for LLaMA-2 Microsoft
+        # q_nodes_2 is for LLaMA-2 Hugging Face
+        q_nodes = None
+        q_nodes_1 = self.model.match_parent_path(
+            matmul_qk,
+            ["Reshape", "Transpose", "RotaryEmbedding", "MatMul"],
+            [0, 0, 0, 0],
+        )
+        q_nodes_2 = self.model.match_parent_path(
+            matmul_qk,
+            ["RotaryEmbedding", "Transpose", "Reshape", "MatMul"],
+            [0, 0, 0, 0],
+        )
+        if q_nodes_1 is not None:
+            reshape_q_2, _, rotary_q, matmul_q = q_nodes_1
+            q_nodes = q_nodes_1
+        elif q_nodes_2 is not None:
+            rotary_q, _, reshape_q, matmul_q = q_nodes_2
+            q_nodes = q_nodes_2
+        else:
+            logger.debug("fuse_rotary_attention: failed to match q nodes")
+            return
+
+        if matmul_q.input[0] != matmul_k.input[0] and matmul_k.input[0] != matmul_v.input[0]:
+            logger.debug("fuse_rotary_attention: failed to find the same root_input for q, k, v paths")
+            return
+
+        root_output = ""
+        if qkv_nodes == qkv_nodes_1:
+            if not self.check_runtime_shape_paths_for_function(
+                reshape_qkv_2,
+                reshape_qkv_1,
+                reshape_q_2,
+                reshape_k_2,
+                reshape_v_2,
+                reshape_v_1,
+                add_qk,
+                matmul_q.input[0],
+            ):
+                logger.debug("fuse_rotary_attention: failed to verify runtime shape paths")
+                return
+            root_output = reshape_qkv_2.output[0]
+
+        elif qkv_nodes in (qkv_nodes_2, qkv_nodes_3):
+            if not self.check_runtime_shape_paths_for_nodes(
+                reshape_qkv,
+                reshape_q,
+                reshape_k,
+                reshape_v,
+                matmul_q.input[0],
+            ):
+                logger.debug("fuse_rotary_attention: failed to verify runtime shape paths")
+                return
+            root_output = reshape_qkv.output[0]
+
+            # Rename inputs of rotary_q/k so it connects with output of matmul_q/k
+            # Before: MatMul --> Reshape --> Transpose --> RotaryEmbedding
+            # After: MatMul --> RotaryEmbedding
+            rotary_q.input[0] = matmul_q.output[0]
+            rotary_k.input[0] = matmul_k.output[0]
+
+            # Rename current output of rotary_k (present_key) so it doesn't match output of MHA (present_key)
+            rotary_k.output[0] = rotary_k.name + "_output_0"
+
+            if qkv_nodes == qkv_nodes_3:
+                qkv_nodes = qkv_nodes[1:]
+
+        new_node = self.create_mha_node(
+            matmul_q.input[0],
+            root_output,
+            rotary_q,
+            rotary_k,
+            matmul_v,
+            attn_mask,
+            add_qk_str,
+            past_k,
+            past_v,
+            present_k,
+            present_v,
+        )
+        if new_node is None:
+            logger.debug("fuse_rotary_attention: failed to create multi-head attention with rotary embeddings")
+            return
+
+        self.nodes_to_add.append(new_node)
+        self.node_name_to_graph_name[new_node.name] = self.this_graph_name
+
+        self.nodes_to_remove.extend(qkv_nodes[1:])
+
+        if v_nodes != v_nodes_4:
+            self.nodes_to_remove.extend(v_nodes[:-1])
+        else:
+            nodes_to_keep = [v_nodes[0][-1]]
+            for temp_path in v_nodes:
+                self.add_nodes_to_remove_with_nodes_to_keep(temp_path, nodes_to_keep)
+
+        self.nodes_to_remove.extend(qk_nodes)
+
+        if k_nodes == k_nodes_1:
+            self.nodes_to_remove.extend(k_nodes[:-2])
+        elif k_nodes == k_nodes_2:
+            self.nodes_to_remove.append(k_nodes[0])
+            self.nodes_to_remove.append(k_nodes[2])
+            self.nodes_to_remove.append(k_nodes[3])
+        elif k_nodes == k_nodes_3:
+            self.nodes_to_remove.append(k_nodes[0])
+            self.nodes_to_remove.append(k_nodes[1])
+            self.nodes_to_remove.append(k_nodes[3])
+            self.nodes_to_remove.append(k_nodes[4])
+        elif k_nodes == k_nodes_4:
+            nodes_to_keep = [k_nodes[0][-1], k_nodes[0][-4]]
+            for temp_path in k_nodes:
+                self.add_nodes_to_remove_with_nodes_to_keep(temp_path, nodes_to_keep)
+
+        if q_nodes == q_nodes_1:
+            self.nodes_to_remove.extend(q_nodes[:-2])
+        elif q_nodes == q_nodes_2:
+            self.nodes_to_remove.append(q_nodes[1])
+            self.nodes_to_remove.append(q_nodes[2])
+
+        self.prune_graph = True
+
+
+class FusionRotaryEmbeddings(Fusion):
+    def __init__(self, model: OnnxModel):
+        self.base_name = "RotaryEmbedding"
+        super().__init__(model, self.base_name, [self.base_name, self.base_name + ".1", "Add"])
+
+    # The RotaryEmbedding function can have multiple extraneous constant outputs even though the function is supposed to produce only one output.
+    # This is a byproduct of a potential CSE bug when using `export_modules_as_functions` in the TorchScript exporter.
+    # To work around this issue, we set the extraneous constant values from the RotaryEmbedding function as initializers in the locations where they are actually used.
+    def reassign_extra_outputs(self, rot_emb_node: NodeProto, function: FunctionProto):
+        # Find extra outputs and Constant nodes attached to those outputs
+        extra_constants, extra_outputs = [], []
+        for fn_node in function.node:
+            if fn_node.op_type == "Constant" and fn_node.input == [] and fn_node.output[0] in function.output:
+                extra_constants.append(fn_node)
+                output_index = list(function.output).index(fn_node.output[0])
+                extra_outputs.append(rot_emb_node.output[output_index])
+
+        # Set extra Constant node outputs as initializers
+        extra_initializers = []
+        for extra_constant in extra_constants:
+            constant_tensorproto = extra_constant.attribute[0].t
+            constant_tensorproto.name = self.model.create_node_name("Constant")
+            self.model.add_initializer(constant_tensorproto)
+            extra_initializers.append(constant_tensorproto.name)
+
+        # Update references of Constant node outputs to initializer references
+        for extra_output, extra_initializer in zip(extra_outputs, extra_initializers):
+            nodes_to_update = list(filter(lambda entry: extra_output in entry.input, self.model.model.graph.node))
+            for node_to_update in nodes_to_update:
+                OnnxModel.replace_node_input(node_to_update, extra_output, extra_initializer)
+
+        return extra_outputs
+
+    def create_rotary_embeddings_from_function(self, node: NodeProto):
+        rotary_emb_node_name = self.model.create_node_name(self.base_name)
+
+        matmul_path = self.model.match_parent_path(
+            node,
+            ["Reshape", "MatMul"],
+            [0, 0],
+        )
+        if matmul_path is not None:
+            reshape_node, matmul_node = matmul_path
+        else:
+            logger.debug("fuse_rotary_embeddings: failed to match MatMul")
+            return
+
+        rotary_emb_inputs = [
+            matmul_node.output[0],  # x is of shape (B,S,D) instead of (B,S,N,H)
+            node.input[1],  # position_ids
+        ]
+
+        # Convert cos_cache and sin_cache from node attributes to model initializers
+        cos_cache_node = list(filter(lambda constant: constant.output[0] == node.input[2], self.model.model.graph.node))
+        sin_cache_node = list(filter(lambda constant: constant.output[0] == node.input[3], self.model.model.graph.node))
+        cos_cache_name, sin_cache_name = "cos_cache", "sin_cache"
+
+        if (
+            len(cos_cache_node) == 1
+            and len(sin_cache_node) == 1
+            and self.model.get_initializer(cos_cache_name) is None
+            and self.model.get_initializer(sin_cache_name) is None
+        ):
+            cos_cache = numpy_helper.to_array(cos_cache_node[0].attribute[0].t).squeeze()
+            sin_cache = numpy_helper.to_array(sin_cache_node[0].attribute[0].t).squeeze()
+
+            cos_cache_tensor = helper.make_tensor(
+                name=cos_cache_name,
+                data_type=TensorProto.FLOAT,
+                dims=list(cos_cache.shape),
+                vals=cos_cache.flatten().tolist(),
+            )
+            self.model.add_initializer(cos_cache_tensor, self.this_graph_name)
+            sin_cache_tensor = helper.make_tensor(
+                name=sin_cache_name,
+                data_type=TensorProto.FLOAT,
+                dims=list(sin_cache.shape),
+                vals=sin_cache.flatten().tolist(),
+            )
+            self.model.add_initializer(sin_cache_tensor, self.this_graph_name)
+
+            self.nodes_to_remove.extend([cos_cache_node[0], sin_cache_node[0]])
+
+        rotary_emb_inputs.extend([cos_cache_name, sin_cache_name])
+
+        rotary_emb_outputs = node.output
+        if len(rotary_emb_outputs) > 1:
+            # Re-assign extraneous constant outputs in RotaryEmbedding functions as initializers
+            func = list(filter(lambda fn: fn.name == node.op_type, self.model.model.functions))
+            assert len(func) == 1
+            extra_outputs = self.reassign_extra_outputs(node, func[0])
+            rotary_emb_outputs = list(filter(lambda output_name: output_name not in extra_outputs, rotary_emb_outputs))
+            assert len(rotary_emb_outputs) == 1
+
+        rotary_emb_node = helper.make_node(
+            self.base_name,
+            inputs=rotary_emb_inputs,
+            outputs=rotary_emb_outputs,
+            name=rotary_emb_node_name,
+            interleaved=1,
+        )
+        rotary_emb_node.domain = "com.microsoft"
+
+        self.nodes_to_remove.append(reshape_node)
+
+        return rotary_emb_node
+
+    def create_rotary_embeddings_from_nodes(
+        self,
+        root_input: str,
+        position_ids: str,
+        cos_slice: str,
+        sin_slice: str,
+        output: str,
+    ):
+        rotary_emb_node_name = self.model.create_node_name(self.base_name)
+
+        # Convert cos_cache and sin_cache from node attributes to model initializers
+        cos_cache_node = list(filter(lambda constant: constant.output[0] == cos_slice, self.model.model.graph.node))
+        sin_cache_node = list(filter(lambda constant: constant.output[0] == sin_slice, self.model.model.graph.node))
+        cos_cache_name, sin_cache_name = "cos_cache", "sin_cache"
+
+        if (
+            len(cos_cache_node) == 1
+            and len(sin_cache_node) == 1
+            and self.model.get_initializer(cos_cache_name) is None
+            and self.model.get_initializer(sin_cache_name) is None
+        ):
+            cos_cache = numpy_helper.to_array(cos_cache_node[0].attribute[0].t).squeeze()
+            sin_cache = numpy_helper.to_array(sin_cache_node[0].attribute[0].t).squeeze()
+
+            # Reshape cos/sin cache from (M, H) to (M, H/2)
+            head_size = cos_cache.shape[1]
+            cos_cache = cos_cache[:, : (head_size // 2)]
+            sin_cache = sin_cache[:, : (head_size // 2)]
+
+            cos_cache_tensor = helper.make_tensor(
+                name=cos_cache_name,
+                data_type=TensorProto.FLOAT,
+                dims=list(cos_cache.shape),
+                vals=cos_cache.flatten().tolist(),
+            )
+            self.model.add_initializer(cos_cache_tensor, self.this_graph_name)
+            sin_cache_tensor = helper.make_tensor(
+                name=sin_cache_name,
+                data_type=TensorProto.FLOAT,
+                dims=list(sin_cache.shape),
+                vals=sin_cache.flatten().tolist(),
+            )
+            self.model.add_initializer(sin_cache_tensor, self.this_graph_name)
+
+            self.nodes_to_remove.extend([cos_cache_node[0], sin_cache_node[0]])
+
+        rotary_emb_node = helper.make_node(
+            self.base_name,
+            inputs=[root_input, position_ids, cos_cache_name, sin_cache_name],
+            outputs=[output],
+            name=rotary_emb_node_name,
+            interleaved=0,
+        )
+        rotary_emb_node.domain = "com.microsoft"
+        return rotary_emb_node
+
+    def fuse(self, node, input_name_to_nodes, output_name_to_node):
+        # Node is either RotaryEmbedding function or Add
+        if self.base_name not in node.op_type and node.op_type != "Add":
+            return
+
+        # Check if node is "RotaryEmbedding nn.Module" exported as a function
+        # (e.g. export_modules_as_functions={RotaryEmbedding} in torch.onnx.export)
+        rotary_emb_node = None
+        if node.op_type != "Add":
+            # Verify that function has the correct inputs
+            if len(node.input) not in {4, 5} or node.input[1] not in {
+                "pos",
+                "pos_id",
+                "position_id",
+                "pos_ids",
+                "position_ids",
+            }:
+                logger.debug("fuse_rotary_embeddings: failed to verify inputs for RotaryEmbedding function")
+                return
+
+            rotary_emb_node = self.create_rotary_embeddings_from_function(node)
+            if rotary_emb_node is None:
+                logger.debug("fuse_rotary_embeddings: failed to create RotaryEmbedding node")
+                return
+
+            # Remove RotaryEmbedding function
+            self.nodes_to_remove.append(node)
+
+            # Remove RotaryEmbedding function's shape inference stored in value_info
+            # The new shape will be calculated during symbolic shape inference
+            old_shape_infer = list(
+                filter(lambda node: node.name == rotary_emb_node.output[0], self.model.model.graph.value_info)
+            )
+            assert len(old_shape_infer) == 1
+            self.model.model.graph.value_info.remove(old_shape_infer[0])
+
+        else:
+            # Rotary embeddings are defined using the below functions:
+            #
+            # def rotate_half(x):
+            #     """Rotates half the hidden dims of the input."""
+            #     x1 = x[..., : x.shape[-1] // 2]
+            #     x2 = x[..., x.shape[-1] // 2 :]
+            #     return torch.cat((-x2, x1), dim=-1)
+            #
+            # def apply_rope(x, cos, sin, position_ids):
+            #     cos = cos.squeeze(1).squeeze(0)  # [seq_len, dim]
+            #     sin = sin.squeeze(1).squeeze(0)  # [seq_len, dim]
+            #     cos = cos[position_ids].unsqueeze(1)  # [bs, 1, seq_len, dim]
+            #     sin = sin[position_ids].unsqueeze(1)  # [bs, 1, seq_len, dim]
+            #     x_embed = (x * cos) + (rotate_half(x) * sin)
+            #     return x_embed
+
+            # Check paths for rotate_half(x)
+            rotate_half_x2_path_1 = self.model.match_parent_path(
+                node,
+                ["Mul", "Concat", "Neg", "Slice", "Transpose"],
+                [1, 0, 0, 0, 0],
+            )
+            rotate_half_x2_path_2 = self.model.match_parent_path(
+                node,
+                ["Mul", "Concat", "Neg", "Slice", "Unsqueeze", "Div", "Gather", "Shape", "Transpose"],
+                [1, 0, 0, 0, 1, 0, 0, 0, 0],
+            )
+            if rotate_half_x2_path_1 is None or rotate_half_x2_path_2 is None:
+                logger.debug("fuse_rotary_embeddings: failed to match x2 in rotate_half")
+                return
+
+            rotate_half_x1_path_1 = self.model.match_parent_path(
+                node,
+                ["Mul", "Concat", "Slice", "Transpose"],
+                [1, 0, 1, 0],
+            )
+            rotate_half_x1_path_2 = self.model.match_parent_path(
+                node,
+                ["Mul", "Concat", "Slice", "Unsqueeze", "Div", "Gather", "Shape", "Transpose"],
+                [1, 0, 1, 2, 0, 0, 0, 0],
+            )
+            if rotate_half_x1_path_1 is None or rotate_half_x1_path_2 is None:
+                logger.debug("fuse_rotary_embeddings: failed to match x1 in rotate_half")
+                return
+
+            if (
+                rotate_half_x1_path_1[-1].name != rotate_half_x1_path_2[-1].name
+                or rotate_half_x2_path_1[-1].name != rotate_half_x2_path_2[-1].name
+                or rotate_half_x1_path_1[-1].name != rotate_half_x2_path_1[-1].name
+                or rotate_half_x1_path_2[-1].name != rotate_half_x2_path_2[-1].name
+            ):
+                logger.debug("fuse_rotary_embeddings: failed to match common input in rotate_half")
+                return
+
+            # Check path for x
+            x_path = self.model.match_parent_path(
+                node,
+                ["Mul", "Transpose"],
+                [0, 0],
+            )
+            if x_path is None:
+                logger.debug("fuse_rotary_embeddings: failed to match x in rotate_half")
+                return
+
+            # Check path for sin
+            sin_path, sin_cache, position_ids = None, "", ""
+            sin_path_1 = self.model.match_parent_path(
+                node,
+                ["Mul", "Unsqueeze", "Gather", "Squeeze", "Squeeze", "Slice", "Unsqueeze", "Gather", "Shape"],
+                [1, 1, 0, 0, 0, 0, 2, 0, 0],
+            )
+            sin_path_2 = self.model.match_parent_path(
+                node,
+                ["Mul", "Unsqueeze", "Gather", "Squeeze", "Squeeze", "Slice", "Unsqueeze", "Add"],
+                [1, 1, 0, 0, 0, 0, 2, 0],
+            )
+            sin_path_3 = self.model.match_parent_path(
+                node,
+                ["Mul", "Unsqueeze", "Gather", "Slice", "Unsqueeze", "Gather", "Shape"],
+                [1, 1, 0, 0, 2, 0, 0],
+            )
+            sin_path_4 = self.model.match_parent_path(
+                node,
+                ["Mul", "Unsqueeze", "Gather", "Slice", "Unsqueeze", "Add"],
+                [1, 1, 0, 0, 2, 0],
+            )
+            if sin_path_1 is not None:
+                sin_path = sin_path_1
+                sin_cache = sin_path[-4].input[0]
+            elif sin_path_2 is not None:
+                sin_path = sin_path_2
+                sin_cache = sin_path[-3].input[0]
+            elif sin_path_3 is not None:
+                sin_path = sin_path_3
+                sin_cache = sin_path[-4].input[0]
+                position_ids = sin_path[2].input[1]
+            elif sin_path_4 is not None:
+                sin_path = sin_path_4
+                sin_cache = sin_path[-3].input[0]
+                position_ids = sin_path[2].input[1]
+            else:
+                logger.debug("fuse_rotary_embeddings: failed to match sin path in apply_rope")
+                return
+
+            # Check path for cos
+            cos_path, cos_cache = None, ""
+            cos_path_1 = self.model.match_parent_path(
+                node,
+                ["Mul", "Unsqueeze", "Gather", "Squeeze", "Squeeze", "Slice", "Unsqueeze", "Gather", "Shape"],
+                [0, 1, 0, 0, 0, 0, 2, 0, 0],
+            )
+            cos_path_2 = self.model.match_parent_path(
+                node,
+                ["Mul", "Unsqueeze", "Gather", "Squeeze", "Squeeze", "Slice", "Unsqueeze", "Add"],
+                [0, 1, 0, 0, 0, 0, 2, 0],
+            )
+            cos_path_3 = self.model.match_parent_path(
+                node,
+                ["Mul", "Unsqueeze", "Gather", "Slice", "Unsqueeze", "Gather", "Shape"],
+                [0, 1, 0, 0, 2, 0, 0],
+            )
+            cos_path_4 = self.model.match_parent_path(
+                node,
+                ["Mul", "Unsqueeze", "Gather", "Slice", "Unsqueeze", "Add"],
+                [0, 1, 0, 0, 2, 0],
+            )
+            if cos_path_1 is not None:
+                cos_path = cos_path_1
+                cos_cache = cos_path[-4].input[0]
+            elif cos_path_2 is not None:
+                cos_path = cos_path_2
+                cos_cache = cos_path[-3].input[0]
+            elif cos_path_3 is not None:
+                cos_path = cos_path_3
+                cos_cache = cos_path[-4].input[0]
+                position_ids = cos_path[2].input[1]
+            elif cos_path_4 is not None:
+                cos_path = cos_path_4
+                cos_cache = cos_path[-3].input[0]
+                position_ids = cos_path[2].input[1]
+            else:
+                logger.debug("fuse_rotary_embeddings: failed to match sin path in apply_rope")
+                return
+
+            # Check path for position ids
+            if position_ids == "":
+                position_ids_from_sin_path = self.model.match_parent_path(
+                    sin_path[2],
+                    ["Reshape"],
+                    [1],
+                )
+                position_ids_from_cos_path = self.model.match_parent_path(
+                    cos_path[2],
+                    ["Reshape"],
+                    [1],
+                )
+                if (
+                    position_ids_from_sin_path is None
+                    or position_ids_from_cos_path is None
+                    or position_ids_from_sin_path[0].name != position_ids_from_cos_path[0].name
+                ):
+                    logger.debug("fuse_rotary_embeddings: failed to match position ids path in apply_rope")
+                    return
+                position_ids = position_ids_from_cos_path[0].input[0]
+            else:
+                position_ids_from_sin_path = []
+                position_ids_from_cos_path = []
+
+            past_seq_len_path, curr_seq_len_path = None, None
+            if (sin_path == sin_path_1 and cos_path == cos_path_1) or (
+                sin_path == sin_path_3 and cos_path == cos_path_3
+            ):
+                if sin_path[-2].name != cos_path[-2].name or sin_path[-1].name != cos_path[-1].name:
+                    logger.debug(
+                        "fuse_rotary_embeddings: failed to match common Gather node and Shape node in sin cache and cos cache"
+                    )
+                    return
+            elif (sin_path == sin_path_2 and cos_path == cos_path_2) or (
+                sin_path == sin_path_4 and cos_path == cos_path_4
+            ):
+                if sin_path[-1].name != cos_path[-1].name:
+                    logger.debug("fuse_rotary_embeddings: failed to match common Add node in sin cache and cos cache")
+                    return
+                # Match past sequence length path: past_key --> Shape --> Gather --> Add
+                past_seq_len_path = self.model.match_parent_path(
+                    sin_path[-1],
+                    ["Gather", "Shape"],
+                    [1, 0],
+                )
+                # Match current sequence length path: transpose_k --> Shape --> Gather --> Add
+                curr_seq_len_path = self.model.match_parent_path(
+                    sin_path[-1],
+                    ["Gather", "Shape", "Transpose"],
+                    [0, 0, 0],
+                )
+                if (
+                    past_seq_len_path is None
+                    or curr_seq_len_path is None
+                    or self.model.find_graph_input(past_seq_len_path[-1].input[0]) is None
+                    or curr_seq_len_path[-1].op_type != "Transpose"
+                ):
+                    logger.debug("fuse_rotary_embeddings: failed to match past_seq_len and curr_seq_len paths")
+                    return
+            else:
+                logger.debug("fuse_rotary_embeddings: failed to match common cache paths")
+
+            rotary_emb_node = self.create_rotary_embeddings_from_nodes(
+                rotate_half_x1_path_1[-1].output[0],
+                position_ids,
+                cos_cache,
+                sin_cache,
+                node.output[0],
+            )
+            if rotary_emb_node is None:
+                logger.debug("fuse_rotary_embeddings: failed to create RotaryEmbedding node")
+                return
+
+            # Remove rotary embedding nodes
+            self.add_nodes_to_remove([node])
+            self.add_nodes_to_remove(rotate_half_x1_path_1[:-1])
+            self.add_nodes_to_remove(rotate_half_x1_path_2[:-1])
+            self.add_nodes_to_remove(rotate_half_x2_path_1[:-1])
+            self.add_nodes_to_remove(rotate_half_x2_path_2[:-1])
+            self.add_nodes_to_remove(x_path[:-1])
+            self.add_nodes_to_remove(sin_path)
+            self.add_nodes_to_remove(cos_path)
+            self.add_nodes_to_remove(position_ids_from_sin_path[:-1])
+            self.add_nodes_to_remove(position_ids_from_cos_path[:-1])
+
+            if past_seq_len_path is not None and len(self.model.get_children(past_seq_len_path[0])) == 1:
+                # In merged HF model, output of Gather in past_seq_len_path is used twice
+                # for past_key_values.0.key and once for other past_key_values
+                self.add_nodes_to_remove(past_seq_len_path)
+            if curr_seq_len_path is not None:
+                self.add_nodes_to_remove(curr_seq_len_path[:-1])
+
+        self.increase_counter(self.base_name)
+        self.node_name_to_graph_name[rotary_emb_node.name] = self.this_graph_name
+        self.nodes_to_add.append(rotary_emb_node)
+        self.prune_graph = True
diff --git a/onnxruntime/python/tools/transformers/fusion_shape.py b/onnxruntime/python/tools/transformers/fusion_shape.py
index 11d6b7a8d3cf4..bc32d78eda66c 100644
--- a/onnxruntime/python/tools/transformers/fusion_shape.py
+++ b/onnxruntime/python/tools/transformers/fusion_shape.py
@@ -48,22 +48,22 @@ def fuse(
         input_name_to_nodes: Dict[str, List[NodeProto]],
         output_name_to_node: Dict[str, NodeProto],
     ):
-        """
-        Smplify subgraph like
-
-                   (2d_input)
-                    /       \
-                Shape       shape
-                /             \
-            Gather(indices=0)  Gather(indices=1)
-                |                |
-            Unsqueeze(axes=0)   Unsqueeze(axes=0)
-                   \\          /
-                      Concat
-                        |
-
-        into  (2d_input) --> Shape -->
-        """
+        #
+        # Simplify subgraph like
+        #
+        #          (2d_input)
+        #           /       \
+        #       Shape       shape
+        #       /             \
+        #   Gather(indices=0)  Gather(indices=1)
+        #       |                |
+        #   Unsqueeze(axes=0)   Unsqueeze(axes=0)
+        #          \           /
+        #             Concat
+        #               |
+        #
+        # into  (2d_input) --> Shape -->
+        #
         opset_version = self.model.get_opset_version()
 
         inputs = len(concat_node.input)
diff --git a/onnxruntime/python/tools/transformers/fusion_simplified_layernorm.py b/onnxruntime/python/tools/transformers/fusion_simplified_layernorm.py
new file mode 100644
index 0000000000000..6f35fa5617a39
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/fusion_simplified_layernorm.py
@@ -0,0 +1,141 @@
+import logging
+from typing import Dict
+
+from fusion_base import Fusion
+from fusion_skiplayernorm import FusionSkipLayerNormalization
+from onnx import helper
+from onnx_model import OnnxModel
+
+logger = logging.getLogger(__name__)
+
+
+class FusionSimplifiedLayerNormalization(Fusion):
+    def __init__(self, model: OnnxModel):
+        super().__init__(model, "SimplifiedLayerNormalization", "Mul")
+
+    def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict):
+        if node.op_type != "Mul":
+            return
+
+        sim_ln_nodes = None
+        # SimplifiedLayerNorm calculation (notation from https://onnx.ai/onnx/operators/onnx__LayerNormalization.html#summary):
+        # DD = Pow(D, 2)
+        # Var = ReduceMean(DD)
+        # VarEps = Add(Var, epsilon)
+        # StdDev = Sqrt(VarEps)
+        # InvStdDev = Div(1, StdDev)
+        # Normalized = Mul(D, InvStdDev)
+        # NormalizedScaled = Mul(Normalized, Scale)
+
+        #                              SimplifiedLayerNorm
+        #          +-------------------------------------------------------+
+        #          |                                                       |
+        # Add --> Pow --> ReduceMean --> Add --> Sqrt --> Div --> Mul --> Mul
+        #                                                                  |
+        #                                                                 node
+        sim_ln_nodes_1 = self.model.match_parent_path(
+            node,
+            ["Mul", "Div", "Sqrt", "Add", "ReduceMean", "Pow", "Add"],
+            [1, 1, 1, 0, 0, 0, 0],
+        )
+        #                                SimplifiedLayerNorm
+        #             +-------------------------------------------------------+
+        #             |                                                       |
+        # Gather --> Pow --> ReduceMean --> Add --> Sqrt --> Div --> Mul --> Mul
+        #                                                                     |
+        #                                                                    node
+        sim_ln_nodes_2 = self.model.match_parent_path(
+            node,
+            ["Mul", "Div", "Sqrt", "Add", "ReduceMean", "Pow", "Gather"],
+            [1, 1, 1, 0, 0, 0, 0],
+        )
+
+        # For LLaMA from Microsoft custom export:
+        # sim_ln_nodes_3 uses a different start parent index than sim_ln_nodes_1
+        #
+        #                              SimplifiedLayerNorm
+        #          +-------------------------------------------------------+
+        #          |                                                       |
+        # Add --> Pow --> ReduceMean --> Add --> Sqrt --> Div --> Mul --> Mul
+        #                                                                  |
+        #                                                                 node
+        sim_ln_nodes_3 = self.model.match_parent_path(
+            node,
+            ["Mul", "Div", "Sqrt", "Add", "ReduceMean", "Pow", "Add"],
+            [0, 1, 1, 0, 0, 0, 0],
+        )
+
+        # sim_ln_nodes_4 starts with a graph input instead of an Add node like sim_ln_nodes_3
+        #
+        #                                  SimplifiedLayerNorm
+        #                  +-----------------------------------------------+
+        #                  |                                               |
+        # graph_input --> Pow --> ReduceMean --> Add --> Sqrt --> Div --> Mul
+        #                                                                  |
+        #                                                                 node
+        sim_ln_nodes_4 = self.model.match_parent_path(
+            node,
+            ["Mul", "Div", "Sqrt", "Add", "ReduceMean", "Pow"],
+            [0, 1, 1, 0, 0, 0],
+        )
+
+        add_node, pow_node = None, None
+        if sim_ln_nodes_1 is not None:
+            sim_ln_nodes = sim_ln_nodes_1
+            add_node = sim_ln_nodes[3]
+            pow_node = sim_ln_nodes[-2]
+        elif sim_ln_nodes_2 is not None:
+            sim_ln_nodes = sim_ln_nodes_2
+            add_node = sim_ln_nodes[3]
+            pow_node = sim_ln_nodes[-2]
+        elif sim_ln_nodes_3 is not None:
+            sim_ln_nodes = sim_ln_nodes_3
+            add_node = sim_ln_nodes[3]
+            pow_node = sim_ln_nodes[-2]
+        elif sim_ln_nodes_4 is not None:
+            sim_ln_nodes = sim_ln_nodes_4
+            add_node = sim_ln_nodes[3]
+            pow_node = sim_ln_nodes[-1]
+            # Verify that parent input to Pow node is graph_input
+            if pow_node.input[0] not in self.model.get_graphs_input_names():
+                return
+        else:
+            return
+
+        layernorm_weight_index = 1 if sim_ln_nodes in (sim_ln_nodes_3, sim_ln_nodes_4) else 0
+        starts_with_graph_input = sim_ln_nodes == sim_ln_nodes_4
+
+        if self.model.find_constant_input(pow_node, 2.0) != 1:
+            return
+
+        root_input = pow_node.input[0]
+        if root_input != sim_ln_nodes[0].input[0]:
+            return
+
+        i, add_weight = self.model.get_constant_input(add_node)
+        if add_weight is None or add_weight <= 0 or add_weight > 1.0e-4:
+            logger.warning(f"epsilon value is not expected: {add_weight}")
+            return
+
+        self.nodes_to_remove.extend(sim_ln_nodes[:-1] if not starts_with_graph_input else sim_ln_nodes)
+        self.nodes_to_remove.append(node)
+
+        normalize_node = helper.make_node(
+            "SimplifiedLayerNormalization",
+            inputs=[root_input, node.input[layernorm_weight_index]],
+            outputs=[node.output[0]],
+            name=self.model.create_node_name("SimplifiedLayerNormalization", name_prefix="LayerNorm"),
+        )
+        normalize_node.attribute.extend([helper.make_attribute("epsilon", float(add_weight))])
+        normalize_node.attribute.extend([helper.make_attribute("axis", -1)])
+        normalize_node.attribute.extend([helper.make_attribute("stash_type", 1)])
+        self.nodes_to_add.append(normalize_node)
+        self.node_name_to_graph_name[normalize_node.name] = self.this_graph_name
+
+
+class FusionSkipSimplifiedLayerNormalization(FusionSkipLayerNormalization):
+    def __init__(self, model: OnnxModel):
+        super().__init__(model, "SkipSimplifiedLayerNormalization", "SimplifiedLayerNormalization")
+
+    def fuse(self, node, input_name_to_nodes, output_name_to_node):
+        super().fuse(node, input_name_to_nodes, output_name_to_node)
diff --git a/onnxruntime/python/tools/transformers/fusion_skip_group_norm.py b/onnxruntime/python/tools/transformers/fusion_skip_group_norm.py
new file mode 100644
index 0000000000000..df80acbd97807
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/fusion_skip_group_norm.py
@@ -0,0 +1,255 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+from logging import getLogger
+from typing import List
+
+from fusion_base import Fusion
+from fusion_utils import NumpyHelper
+from onnx import helper
+from onnx_model import OnnxModel
+
+logger = getLogger(__name__)
+
+
+class FusionSkipGroupNorm(Fusion):
+    """
+    Fuse Add + GroupNorm into one node: SkipGroupNorm.
+    """
+
+    def __init__(self, model: OnnxModel):
+        super().__init__(model, "SkipGroupNorm", "GroupNorm")
+        # Update shape inference is needed since other fusions might add new edge which does not have shape info yet.
+        self.shape_infer_helper = self.model.infer_runtime_shape(update=True)
+
+        if self.shape_infer_helper is None:
+            logger.warning("SkipGroupNorm fusion will be skipped since symbolic shape inference disabled or failed.")
+
+    def create_transpose_node(self, input_name: str, perm: List[int], output_name=None):
+        """Append a Transpose node after an input"""
+        node_name = self.model.create_node_name("Transpose")
+        if output_name is None:
+            output_name = node_name + "_out" + "-" + input_name
+        transpose_node = helper.make_node("Transpose", inputs=[input_name], outputs=[output_name], name=node_name)
+        transpose_node.attribute.extend([helper.make_attribute("perm", perm)])
+        return transpose_node
+
+    def get_skip_index(self, add, is_channel_last: bool):
+        """Add has two inputs. This classifies which input is skip based on shape info (skip allows broadcast)."""
+        skip = -1
+        broadcast = False
+
+        assert self.shape_infer_helper is not None
+        shape_a = self.shape_infer_helper.get_edge_shape(add.input[0])
+        shape_b = self.shape_infer_helper.get_edge_shape(add.input[1])
+        assert shape_a is not None and shape_b is not None
+
+        if len(shape_a) == 4 and len(shape_b) == 4:
+            if shape_a == shape_b:
+                skip = 1
+            else:
+                c = 3 if is_channel_last else 1
+                h = 1 if is_channel_last else 2
+                w = 2 if is_channel_last else 3
+                if shape_a[0] == shape_b[0] and shape_a[c] == shape_b[c]:
+                    if shape_b[h] == 1 and shape_b[w] == 1:
+                        skip = 1
+                        broadcast = True
+                    elif shape_a[h] == 1 and shape_a[w] == 1:
+                        skip = 0
+                        broadcast = True
+
+        if skip < 0:
+            logger.debug(
+                "skip SkipGroupNorm fusion since shape of Add inputs (%s, %s) are not expected",
+                add.input[0],
+                add.input[1],
+            )
+        return skip, broadcast
+
+    def has_multiple_consumers(self, output_name, input_name_to_nodes):
+        """Whether an output has multiple consumers (like graph output or more than one children nodes)"""
+        return self.model.find_graph_output(output_name) is not None or (
+            output_name in input_name_to_nodes and len(input_name_to_nodes[output_name]) > 1
+        )
+
+    def remove_if_safe(self, node, input_name_to_nodes):
+        """Remove a node if it is safe (only one children, and not graph output)"""
+        if not self.has_multiple_consumers(node.output[0], input_name_to_nodes):
+            self.nodes_to_remove.extend([node])
+
+    def is_bias_1d(self, bias_name: str):
+        """Whether bias is an initializer of one dimension"""
+        initializer = self.model.get_initializer(bias_name)
+        if initializer is None:
+            return False
+
+        bias_weight = NumpyHelper.to_array(initializer)
+        if bias_weight is None:
+            logger.debug("Bias weight not found")
+            return False
+
+        if len(bias_weight.shape) != 1:
+            logger.debug("Bias weight is not 1D")
+            return False
+        return True
+
+    def match_bias_path(self, node, input_name_to_nodes, output_name_to_node):
+        """
+        Match the bias graph pattern from an Transpose node after Reshape node like in below example.
+        It checks whether the bias is 1D initializer. If so, remove Add and redirect MatMul output to Reshape.
+        """
+        # Before Fusion:
+        #                        MatMul  (bias)
+        #                            \  /     (shape)
+        #                             Add    /
+        #                               \   /
+        #       (a)                   Reshape
+        #        \                       |
+        # Transpose([0, 3, 1, 2])   Transpose([0, 3, 1, 2])  --- the start node, this func only handles the above nodes.
+        #                        \  /
+        #                         Add
+        #                         / \
+        #                      (c)  Transpose([0,2,3,1])
+        #                              |
+        #                           GroupNorm
+        #                              |
+        #                             (d)
+        #
+        # After Fusion (the nodes below Reshape is handled in the fuse function):
+        #                    MatMul (shape)
+        #                       \   /
+        #                (a)   Reshape
+        #                  \    /
+        #                SkipGroupNorm
+        #                  /    \
+        #                (d)   Transpose([0, 3, 1, 2])
+        #                         \
+        #                         (c)
+
+        add_input_index = []
+        bias_nodes = self.model.match_parent_path(
+            node, ["Reshape", "Add", "MatMul"], [0, 0, None], output_name_to_node, add_input_index
+        )
+        if bias_nodes is None:
+            return None
+
+        (reshape, add_bias, matmul) = bias_nodes
+        bias = bias_nodes[1].input[1 - add_input_index[0]]
+        if not self.is_bias_1d(bias):
+            return None
+
+        reshape.input[0] = matmul.output[0]
+        self.remove_if_safe(add_bias, input_name_to_nodes)
+
+        return bias
+
+    def match_transpose_from_nhwc(self, output_name, input_name_to_nodes, output_name_to_node):
+        """Match whether an output is from a Transpose(perm=[0,3,1,2]) node."""
+        parent = output_name_to_node[output_name] if output_name in output_name_to_node else None
+        if parent is not None and parent.op_type == "Transpose":
+            permutation = OnnxModel.get_node_attribute(parent, "perm")
+            if permutation == [0, 3, 1, 2]:
+                self.remove_if_safe(parent, input_name_to_nodes)
+                return parent
+        return None
+
+    def fuse(self, node, input_name_to_nodes, output_name_to_node):
+        # This fusion requires shape information, so skip it if shape is not available.
+        if self.shape_infer_helper is None:
+            return
+
+        # Before Fusion:
+        #     (a)  (b)
+        #       \  /
+        #       Add
+        #       /\
+        #   (c)   Transpose([0,2,3,1])
+        #            \
+        #          GroupNorm
+        #             |
+        #            (d)
+        #
+        # After Fusion:
+        #           (a)              (b)
+        #             \              /
+        #   Transpose([0,2,3,1])   Transpose([0,2,3,1])
+        #                \        /
+        #              SkipGroupNorm
+        #                  /    \
+        #                 /    Transpose([0, 3, 1, 2])
+        #                /        \
+        #               (d)       (c)
+        nodes = self.model.match_parent_path(node, ["Transpose", "Add"], [0, 0], output_name_to_node)
+        if nodes is None:
+            return
+
+        (transpose, add) = nodes
+        if transpose in self.nodes_to_remove or add in self.nodes_to_remove:
+            return
+
+        if self.has_multiple_consumers(transpose.output[0], input_name_to_nodes):
+            return
+
+        permutation = OnnxModel.get_node_attribute(transpose, "perm")
+        if permutation != [0, 2, 3, 1]:
+            return
+
+        inputs = []
+        bias = None
+        for i in range(2):
+            matched_transpose = self.match_transpose_from_nhwc(add.input[i], input_name_to_nodes, output_name_to_node)
+            if matched_transpose:
+                # When there is an Transpose node before Add (see examples in match_bias_path), we do not need to
+                # insert another Transpose node. The existing Transpose node will be removed in prune_graph if it
+                # has only one consumer.
+                inputs.append(matched_transpose.input[0])
+                # See whether it match bias pattern.
+                if bias is None:
+                    bias = self.match_bias_path(matched_transpose, input_name_to_nodes, output_name_to_node)
+            else:
+                # Otherwise, insert a Transpose node before Add.
+                new_transpose = self.create_transpose_node(add.input[i], [0, 2, 3, 1])
+                self.model.add_node(new_transpose, self.this_graph_name)
+                inputs.append(new_transpose.output[0])
+
+        skip, broadcast = self.get_skip_index(add, is_channel_last=False)
+        if skip < 0:
+            return
+
+        inputs = [inputs[1 - skip], node.input[1], node.input[2], inputs[skip]]
+        if bias:
+            inputs = [*inputs, bias]
+
+        outputs = node.output
+
+        new_node_name = self.model.create_node_name(self.fused_op_type, name_prefix="SkipGroupNorm")
+        if self.has_multiple_consumers(add.output[0], input_name_to_nodes):
+            add_out_name = new_node_name + "_add_out"
+            outputs.append(add_out_name)
+
+            # Insert a Transpose node after add output.
+            add_out_transpose = self.create_transpose_node(add_out_name, [0, 3, 1, 2], add.output[0])
+            self.model.add_node(add_out_transpose, self.this_graph_name)
+
+        skip_group_norm = helper.make_node(
+            self.fused_op_type,
+            inputs=inputs,
+            outputs=outputs,
+            name=new_node_name,
+        )
+        skip_group_norm.domain = "com.microsoft"
+
+        self.increase_counter(
+            f"SkipGroupNorm(add_out={int(len(outputs) > 1)} bias={int(bias is not None)} broadcast={int(broadcast)})"
+        )
+
+        # Pass attributes from GroupNorm node to SkipGroupNorm
+        for att in node.attribute:
+            skip_group_norm.attribute.extend([att])
+
+        self.nodes_to_remove.extend([add, transpose, node])
+        self.nodes_to_add.append(skip_group_norm)
+        self.node_name_to_graph_name[skip_group_norm.name] = self.this_graph_name
+        self.prune_graph = True
diff --git a/onnxruntime/python/tools/transformers/fusion_transpose.py b/onnxruntime/python/tools/transformers/fusion_transpose.py
index 2762d95dd7b00..ca699903a7cd9 100644
--- a/onnxruntime/python/tools/transformers/fusion_transpose.py
+++ b/onnxruntime/python/tools/transformers/fusion_transpose.py
@@ -128,7 +128,9 @@ def fuse(
             return
 
         if not (
-            self.model.get_constant_value(unsqueeze_3.input[1]) == 3
+            len(unsqueeze_3.input) == 2
+            and self.model.get_constant_value(unsqueeze_3.input[1]) == 3
+            and len(unsqueeze_2.input) == 2
             and self.model.get_constant_value(unsqueeze_2.input[1]) == 2
             and len(self.model.get_children(gemm, input_name_to_nodes)) == 1
             and len(self.model.get_children(unsqueeze_3, input_name_to_nodes)) == 1
diff --git a/onnxruntime/python/tools/transformers/io_binding_helper.py b/onnxruntime/python/tools/transformers/io_binding_helper.py
index 71c1a21d8f768..50703b9c17e03 100644
--- a/onnxruntime/python/tools/transformers/io_binding_helper.py
+++ b/onnxruntime/python/tools/transformers/io_binding_helper.py
@@ -1,6 +1,6 @@
 import logging
 from collections import OrderedDict
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Tuple, Union
 
 import numpy
 import torch
@@ -229,7 +229,7 @@ def __del__(self):
         del self.io_binding
         del self.ort_session
 
-    def allocate_buffers(self, shape_dict: Dict[str, tuple]):
+    def allocate_buffers(self, shape_dict: Dict[str, Union[Tuple[int], List[int]]]):
         """Allocate tensors for I/O Binding"""
         if self.enable_cuda_graph:
             for name, shape in shape_dict.items():
@@ -283,6 +283,7 @@ def infer(self, feed_dict: Dict[str, torch.Tensor]):
             if name in self.input_names:
                 if self.enable_cuda_graph:
                     assert self.input_tensors[name].nelement() == tensor.nelement()
+                    assert self.input_tensors[name].dtype == tensor.dtype
                     assert tensor.device.type == "cuda"
                     # Please install cuda-python package with a version corresponding to CUDA in your machine.
                     from cuda import cudart
diff --git a/onnxruntime/python/tools/transformers/large_model_exporter.py b/onnxruntime/python/tools/transformers/large_model_exporter.py
new file mode 100644
index 0000000000000..407c3b80e153f
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/large_model_exporter.py
@@ -0,0 +1,385 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+"""
+Export LLM to onnx
+"""
+import argparse
+import inspect
+import math
+import os
+import tempfile
+from pathlib import Path
+from typing import Optional
+
+import onnx
+import torch
+import transformers
+from torch import nn
+
+
+def disable_huggingface_init():
+    """do not init model twice as it slow initialization"""
+
+    torch.nn.init.kaiming_uniform_ = lambda x, *args, **kwargs: x
+    torch.nn.init.uniform_ = lambda x, *args, **kwargs: x
+    torch.nn.init.normal_ = lambda x, *args, **kwargs: x
+    torch.nn.init.constant_ = lambda x, *args, **kwargs: x
+    torch.nn.init.xavier_uniform_ = lambda x, *args, **kwargs: x
+    torch.nn.init.xavier_normal_ = lambda x, *args, **kwargs: x
+    torch.nn.init.kaiming_normal_ = lambda x, *args, **kwargs: x
+    torch.nn.init.orthogonal_ = lambda x, *args, **kwargs: x
+
+
+def get_model_parameter_size(model: nn.Module):
+    """to calculate how much memory this model needs"""
+    param_size = 0
+    param_sum = 0
+    for param in model.parameters():
+        param_size += param.nelement() * param.element_size()
+        param_sum += param.nelement()
+    buffer_size = 0
+    buffer_sum = 0
+    for buffer in model.buffers():
+        buffer_size += buffer.nelement() * buffer.element_size()
+        buffer_sum += buffer.nelement()
+    all_size = (param_size + buffer_size) / 1024 / 1024
+    return all_size
+
+
+def initialize_model_and_sample_inputs(hf_model: str, cache_dir: Optional[str], tokenizer=None):
+    """
+    get the pretrained torch model from hugginface,
+    and sample model-inputs
+    """
+
+    disable_huggingface_init()
+
+    model = transformers.AutoModelForCausalLM.from_pretrained(  # type: ignore
+        hf_model, torch_dtype=torch.float16, cache_dir=cache_dir, trust_remote_code=True
+    )
+    if tokenizer is None:
+        tokenizer = hf_model
+    tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer)  # type: ignore
+
+    sample_inputs = tuple(tokenizer("Hello, my dog is cute", return_tensors="pt").values())
+    return model, sample_inputs
+
+
+def auto_pipeline_parallel(model: nn.Module, gpulist: list, sample_inputs: tuple):
+    """Make the model executable across multiple GPUs."""
+
+    def input_gpu_device_hook(mod, inputs, kwargs):
+        modifyed_inputs = []
+        first_dev = None
+        for layer_input in inputs:
+            if type(layer_input) is not torch.Tensor:
+                modifyed_inputs.append(layer_input)
+            elif hasattr(mod, "weight"):
+                modifyed_inputs.append(layer_input.to(mod.weight.device))
+            elif hasattr(mod, "parameters"):
+                device = next(mod.parameters(), layer_input).device
+                modifyed_inputs.append(layer_input.to(device))
+            elif hasattr(next(mod.children(), None), "weight"):
+                modifyed_inputs.append(layer_input.to(next(mod.children()).weight.device))
+            elif first_dev is not None and layer_input.device != first_dev:
+                modifyed_inputs.append(layer_input.to(first_dev))
+            else:
+                modifyed_inputs.append(layer_input)
+            if first_dev is None:
+                first_dev = modifyed_inputs[0].device
+        for key, value in kwargs.items():
+            if type(value) is torch.Tensor:
+                kwargs[key] = value.to(first_dev)
+
+        return (tuple(modifyed_inputs), kwargs)
+
+    def move_layer_to_device_rurc(mod, dev):
+        mod.to(dev)
+        for layer in mod.named_children():
+            move_layer_to_device_rurc(layer[1], dev)
+
+    model = model.half()
+    all_hooks = []
+    all_hooks.append(model.register_forward_pre_hook(input_gpu_device_hook, with_kwargs=True))
+    pre_fix = next(iter(model.named_children()))[0]
+    for top_name, top_module in model.named_children():
+        for name, module in top_module.named_children():
+            all_hooks.append(module.register_forward_pre_hook(input_gpu_device_hook, with_kwargs=True))
+            if type(module) in [torch.nn.ModuleList]:
+                num_layers_on_each_gpu = math.floor(len(module) / len(gpulist))
+                for idx, attn_layer in enumerate(module):
+                    all_hooks.append(attn_layer.register_forward_pre_hook(input_gpu_device_hook, with_kwargs=True))
+
+                    to_dev = gpulist[min(idx // num_layers_on_each_gpu, len(gpulist))]
+                    attn_layer.to(to_dev)
+                    move_layer_to_device_rurc(attn_layer, to_dev)
+                    print(f"move {pre_fix}.{name}.{idx} to {to_dev}")
+            else:
+                module.to(gpulist[0])
+                print(f"move {pre_fix}.{name} to {gpulist[0]}")
+        if len(list(top_module.named_children())) == 0:
+            top_module.to(gpulist[0])
+            print(f"move {top_name} to {gpulist[0]}")
+
+    with torch.no_grad():
+        model(sample_inputs[0], attention_mask=sample_inputs[1])
+    return model
+
+
+def retrieve_onnx_inputs(model: nn.Module, sample_inputs: tuple, with_past: bool):
+    """
+    auto retrieve onnx inputs from torch model as we can't enumlate all possibilities
+    for all models
+    """
+    user_inputs = []
+
+    def hook_for_inputs(_, inputs, kwargs):
+        user_inputs.append((inputs, kwargs))
+        return user_inputs[0]
+
+    hook_handle = model.register_forward_pre_hook(hook_for_inputs, with_kwargs=True)
+
+    forward_params = inspect.signature(model.forward).parameters
+    input_keys = list(forward_params.keys())
+    default_values = [forward_params.get(key).default for key in input_keys]
+    out = model(sample_inputs[0], attention_mask=sample_inputs[1])
+    hook_handle.remove()
+    user_inputs = user_inputs[0]
+    onnx_inputs = default_values
+    for idx, _val in enumerate(user_inputs[0]):
+        onnx_inputs[idx] = user_inputs[0][idx]
+    for key, value in user_inputs[1].items():
+        idx = input_keys.index(key)
+        onnx_inputs[idx] = value
+    for idx, (key, value) in enumerate(zip(input_keys, onnx_inputs)):
+        if type(value) is torch.Tensor:
+            value.to(model.device)
+        if "use_cache" in key:
+            onnx_inputs[idx] = with_past
+            out = model(sample_inputs[0], attention_mask=sample_inputs[1], use_cache=with_past) if with_past else out
+
+    return input_keys, onnx_inputs, out.past_key_values
+
+
+def move_to_appropriate_device(model: nn.Module, sample_inputs_tp: tuple) -> nn.Module:
+    """
+    According to the model size, we will upload it to
+    CPU if has no GPU or enough GPU memory,
+    Single GPU if has only one GPU in local or model size is enough to fit one GPU
+    Multiple GPU if there is more than one gpu in local and model is too large
+    """
+    total_mem_per_cpu = torch.cuda.get_device_properties(0).total_memory / 1024 / 1024
+
+    print(f"Model_Size = {get_model_parameter_size(model)/1024} GB")
+    print(f"total_mem_per_cpu = {total_mem_per_cpu/1024} GB")
+    if get_model_parameter_size(model) > total_mem_per_cpu * 0.45:
+        device_collection = [torch.device(i) for i in range(torch.cuda.device_count())]
+        if len(device_collection) > 1:
+            print(
+                f"{len(device_collection)} GPUs are used to export onnx, \
+                   Please set CUDA_VISIBLE_DEVICES to use specific GPU group"
+            )
+            model = auto_pipeline_parallel(model, device_collection, sample_inputs_tp)
+        else:
+            print("!!!! convert model to float and export onnx using CPU")
+            model = model.cpu().float()
+    else:
+        print("Export model on a single GPU")
+        model = model.cuda().half()
+    return model
+
+
+def adapt_inputs_to_device(sample_inputs: tuple, device: torch.device) -> tuple:
+    """move inputs to device"""
+    sample_inputs_ = []
+    for sample_int in sample_inputs:
+        if isinstance(sample_int, torch.Tensor):
+            sample_inputs_.append(sample_int.to(device))
+        else:
+            sample_inputs_.append(sample_int)
+    return tuple(sample_inputs_)
+
+
+def fetch_onnx_inputs_outputs_name(
+    model: nn.Module,
+    onnx_inputs: list,
+    torch_input_names: tuple,
+    past_key_values: tuple,
+    with_past: bool,
+    input_with_past: bool,
+):
+    """fetch onnx inputs and outputs name"""
+    num_of_past_key = 0
+    kv_cache_axis = {0: "batch_size"}
+    # try get num_of_past_key and shape of past_key_value
+    if past_key_values is not None:
+        num_of_past_key = len(past_key_values)
+        seq_index = (torch.tensor(past_key_values[0][0].shape) == onnx_inputs[0].shape[-1]).nonzero().view(-1)
+        assert seq_index.numel() == 1
+        kv_cache_axis = {0: "batch_size", seq_index.item(): "seq_len"}
+
+    if not num_of_past_key:
+        num_of_past_key = model.config.num_hidden_layers
+
+    onnx_inp_names = ("input_ids", "attention_mask")
+    onnx_out_names = ("logits",)
+    onnx_dynamic_axes = {
+        "input_ids": {0: "batch_size", 1: "seq_len"},
+        "attention_mask": {0: "batch_size", 1: "seq_len"},
+    }
+    if input_with_past:
+        for i in range(num_of_past_key):
+            onnx_inp_names += (f"present_key.{i}",)
+            onnx_inp_names += (f"present_values.{i}",)
+
+            onnx_dynamic_axes[onnx_inp_names[-1]] = kv_cache_axis
+            onnx_dynamic_axes[onnx_inp_names[-2]] = kv_cache_axis
+
+    if with_past or input_with_past:
+        for i in range(num_of_past_key):
+            onnx_out_names += (f"past_key.{i}",)
+            onnx_out_names += (f"past_values.{i}",)
+            onnx_dynamic_axes[onnx_out_names[-1]] = kv_cache_axis
+            onnx_dynamic_axes[onnx_out_names[-2]] = kv_cache_axis
+
+    for idx, name in enumerate(torch_input_names):
+        if input_with_past:
+            if name == "past_key_values":
+                onnx_inputs[idx] = past_key_values
+            elif name == "attention_mask":
+                attn_mask = onnx_inputs[idx]
+                onnx_inputs[idx] = torch.cat(
+                    (attn_mask, torch.ones((attn_mask.shape[0], 1), device=attn_mask.device)), dim=1
+                )
+            elif name == "input_ids":
+                input_ids = onnx_inputs[idx]
+                onnx_inputs[idx] = input_ids[:, -1:]
+
+    return onnx_inp_names, onnx_out_names, onnx_dynamic_axes
+
+
+def do_export_internal(model: nn.Module, onnx_io_tuple: tuple, onnx_inputs: tuple, onnx_path: Path, opset: int):
+    """do export with torch.onnx.export"""
+    onnx_model_name = onnx_path.name
+    onnx_inp_names, onnx_out_names, onnx_dynamic_axes = onnx_io_tuple
+    # two step to export onnx
+    # 1. export onnx with lots of pieces of weights
+    # 2. save all weights to external data
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        tmp_onnx = os.path.join(tmpdirname, "tmp.onnx")
+
+        torch.onnx.export(
+            model=model,
+            args=tuple(onnx_inputs),
+            f=tmp_onnx,
+            verbose=False,
+            opset_version=opset,
+            input_names=onnx_inp_names,
+            output_names=onnx_out_names,
+            dynamic_axes=onnx_dynamic_axes,
+        )
+
+        onnx_path.unlink(missing_ok=True)
+        (onnx_path.parent / f"{onnx_model_name}_ext.data").unlink(missing_ok=True)
+
+        onnx_model = onnx.load(str(tmp_onnx))
+        onnx.save_model(
+            onnx_model,
+            str(onnx_path),
+            save_as_external_data=(len(os.listdir(tmpdirname)) > 1),
+            all_tensors_to_one_file=True,
+            location=f"{onnx_model_name}_ext.data",
+            size_threshold=1024,
+            convert_attribute=False,
+        )
+
+
+@torch.no_grad()
+def export_onnx(hf_model: str, cache_dir: Optional[str], onnx_path_str: str, with_past: bool, opset: int):
+    """
+    do export
+    model: torch model
+    onnx_path: where the onnx model saved to
+    sample_inputs_tp: inputs for torch model
+    """
+    model, sample_inputs_tp = initialize_model_and_sample_inputs(hf_model, cache_dir)
+
+    model = move_to_appropriate_device(model, sample_inputs_tp)
+
+    sample_inputs = adapt_inputs_to_device(sample_inputs_tp, next(model.parameters()).device)
+
+    # input_keys would be usesful if the model has some special inputs
+    input_keys, onnx_inputs, past_key_value = retrieve_onnx_inputs(model, sample_inputs, with_past)
+
+    onnx_io_tuple = fetch_onnx_inputs_outputs_name(model, onnx_inputs, input_keys, past_key_value, with_past, False)
+
+    onnx_model_name = "model.onnx"
+    onnx_path: Path = Path(onnx_path_str).absolute()
+    if onnx_path.suffix != ".onnx":
+        onnx_path = onnx_path / onnx_model_name
+
+    do_export_internal(model, onnx_io_tuple, onnx_inputs, onnx_path, opset)
+    if not with_past:
+        return
+
+    onnx_io_tuple = fetch_onnx_inputs_outputs_name(model, onnx_inputs, input_keys, past_key_value, with_past, True)
+
+    onnx_model_name = "model_with_past.onnx"
+    onnx_path = onnx_path.parent / onnx_model_name
+
+    do_export_internal(model, onnx_io_tuple, onnx_inputs, onnx_path, opset)
+
+
+def parse_arguments():
+    """arguments parsing."""
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        "-m",
+        "--model",
+        required=True,
+        type=str,
+        default=["meta-llama/Llama-2-70b-hf"],
+        help="Pre-trained models in huggingface model hub",
+    )
+    parser.add_argument(
+        "-s",
+        "--saved_path",
+        required=False,
+        type=str,
+        default="./onnx_models/",
+        help="where the onnx model will be saved",
+    )
+    parser.add_argument(
+        "--cache_dir",
+        required=False,
+        type=str,
+        default=None,
+        help=("cache directy of huggingface, by setting this to avoid useless downloading if you have one"),
+    )
+    parser.add_argument(
+        "--with_past",
+        action="store_true",
+        default=False,
+        help=("The tool will export onnx without past-key-value by default"),
+    )
+    parser.add_argument(
+        "--opset",
+        required=False,
+        type=int,
+        default=17,
+        help=(
+            "the opset to save onnx model, \
+              try to increase it if this opset doens't have new features you want"
+        ),
+    )
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_arguments()
+
+    export_onnx(args.model, args.cache_dir, args.saved_path, args.with_past, args.opset)
diff --git a/onnxruntime/python/tools/transformers/models/bert/eval_squad.py b/onnxruntime/python/tools/transformers/models/bert/eval_squad.py
index 0cbda8894528a..6089c960e47ee 100644
--- a/onnxruntime/python/tools/transformers/models/bert/eval_squad.py
+++ b/onnxruntime/python/tools/transformers/models/bert/eval_squad.py
@@ -6,15 +6,26 @@
 # This script evaluates accuracy of ONNX models for question-answering task on SQuAD data set.
 # Example to evaluate raw and optimized model for CUDA in Linux:
 #   pip3 install datasets evaluate optimum transformers onnxruntime-gpu
-#   python3 eval_squad.py -m distilbert-base-cased-distilled-squad
-#   python3 -m onnxruntime.transformers.optimizer --output optimized_fp16.onnx --num_heads 12 --hidden_size 768 \
-#           --input /home/$USER/.cache/huggingface/hub/distilbert-base-cased-distilled-squad/model.onnx \
-#           --use_mask_index --float16
-#   python3 eval_squad.py -m distilbert-base-cased-distilled-squad --onnx optimized_fp16.onnx
-
+#
+#   python3 eval_squad.py -m bert-large-uncased-whole-word-masking-finetuned-squad -s 384 -b 1 --use_io_binding
+#
+#   python3 -m onnxruntime.transformers.optimizer \
+#           --input ./bert-large-uncased-whole-word-masking-finetuned-squad/model.onnx \
+#           --output ./bert-large-uncased-whole-word-masking-finetuned-squad/optimized_model.onnx
+#
+#   python3 eval_squad.py -m bert-large-uncased-whole-word-masking-finetuned-squad -s 384 -b 1 --use_io_binding \
+#           --onnx ./bert-large-uncased-whole-word-masking-finetuned-squad/optimized_model.onnx
+#
+#   Snippet of example output in A100:
+#   {'exact': 86.65089877010406, 'f1': 92.99433524952254, 'total': 10570, 'HasAns_exact': 86.65089877010406
+#    'total_time_in_seconds': 81.69239814393222, 'samples_per_second': 129.387804008115,
+#    'latency_in_seconds': 0.007728703703304846, 'provider': 'CUDAExecutionProvider',
+#    'pretrained_model_name': 'bert-large-uncased-whole-word-masking-finetuned-squad',
+#    'batch_size': 1, 'sequence_length': 384, 'use_io_binding': True}
 import argparse
 import csv
 import os
+import time
 
 try:
     from importlib.metadata import PackageNotFoundError, version
@@ -24,17 +35,15 @@
 from pathlib import Path
 from typing import Any, Dict, List, Optional
 
-import torch
 from datasets import load_dataset
 from evaluate import evaluator
 from optimum.onnxruntime import ORTModelForQuestionAnswering
-from optimum.onnxruntime.modeling_ort import ORTModel
 from optimum.version import __version__ as optimum_version
 from packaging import version as version_check
 from transformers import AutoTokenizer, pipeline
 
-if version_check.parse(optimum_version) < version_check.parse("1.6.0"):
-    raise ImportError(f"Please install optimum>=1.6.0. The version {optimum_version} was found.")
+if version_check.parse(optimum_version) < version_check.parse("1.13.1"):
+    raise ImportError(f"Please install optimum>=1.13.1. Current version: {optimum_version}.")
 
 PRETRAINED_SQUAD_MODELS = [
     "bert-large-uncased-whole-word-masking-finetuned-squad",
@@ -64,23 +73,24 @@ def load_onnx_model(
         model: ORTModel for the onnx model
         onnx_path: the path of onnx model
     """
-    model = ORTModelForQuestionAnswering.from_pretrained(model_id, from_transformers=True)
 
-    if onnx_path is not None:
-        model.model_name = Path(onnx_path).name
-
-        if provider != "CPUExecutionProvider":
-            model.device = torch.device("cuda:0")
-            model.model = ORTModel.load_model(onnx_path, provider)
-        else:
-            model.device = torch.device("cpu")
-            model.model = ORTModel.load_model(onnx_path)
+    if onnx_path is None:
+        # Export onnx to a sub-directory named by the model id
+        model = ORTModelForQuestionAnswering.from_pretrained(
+            model_id, export=True, provider=provider, use_io_binding=use_io_binding
+        )
+        save_onnx_dir = os.path.join(".", model_id)
+        model.save_pretrained(save_onnx_dir)
+        onnx_path = os.path.join(save_onnx_dir, "model.onnx")
+        print("Model is exported to onnx file:", onnx_path)
     else:
-        onnx_path = os.path.join(model.model_save_dir.as_posix(), model.model_name)
-        if provider != "CPUExecutionProvider":
-            model.to("cuda")
-
-    model.use_io_binding = use_io_binding
+        model = ORTModelForQuestionAnswering.from_pretrained(
+            os.path.dirname(onnx_path),
+            file_name=Path(onnx_path).name,
+            provider=provider,
+            use_io_binding=use_io_binding,
+            # provider_options={"enable_skip_layer_norm_strict_mode": True},
+        )
 
     return model, onnx_path
 
@@ -211,7 +221,12 @@ def main():
     for sequence_length in args.sequence_lengths:
         tokenizer.model_max_length = sequence_length
         tokenizer.doc_stride = min(sequence_length // 2, 128)
+        if args.onnx is None:
+            print("Exporting onnx model. It might take a few minutes...")
+        start_time = time.time()
         ort_model, onnx_path = load_onnx_model(pretrained_model_name, args.onnx, args.provider, args.use_io_binding)
+        latency = time.time() - start_time
+        print(f"Onnx model exported or loaded in {latency:.1f} seconds")
 
         print(ort_model.config)
         if sequence_length > ort_model.config.max_position_embeddings:
@@ -222,14 +237,22 @@ def main():
         )
 
         task_evaluator = evaluator("question-answering")
+        print("Loading dataset...")
+        start_time = time.time()
         squad_dataset = load_dataset("squad", split=f"validation[:{args.total}]" if args.total > 0 else "validation")
+        latency = time.time() - start_time
+        print(f"Dataset loaded in {latency:.1f} seconds")
 
+        print("Evaluating squad_v2 with ORT. It might take a few minutes...")
+        start_time = time.time()
         result = task_evaluator.compute(
             model_or_pipeline=qa_pipeline,
             data=squad_dataset,
             metric="squad_v2",
             squad_v2_format=True,
         )
+        latency = time.time() - start_time
+        print(f"Evaluation done in {latency:.1f} seconds")
 
         result["provider"] = args.provider
         result["disable_fused_attention"] = disable_fused_attention
diff --git a/onnxruntime/python/tools/transformers/models/llama/README.md b/onnxruntime/python/tools/transformers/models/llama/README.md
index b4461a2eadb8c..44dea3cb73b6e 100644
--- a/onnxruntime/python/tools/transformers/models/llama/README.md
+++ b/onnxruntime/python/tools/transformers/models/llama/README.md
@@ -1,5 +1,20 @@
 # LLaMA-2
 
+## Prerequisites
+
+Please note the package versions needed for using LLaMA-2 in the `requirements.txt` file that fits your scenario.
+- `requirements-cpu.txt`
+  - For running LLaMA-2 on CPU
+- `requirements-cuda.txt`
+  - For running LLaMA-2 on CUDA
+  - Note that `torch` with CUDA enabled is not installed automatically. This is because `torch` should be installed with the CUDA version used on your machine. Please visit [the PyTorch website](https://pytorch.org/get-started/locally/) to download the `torch` version that is used with the CUDA version installed on your machine and satisfies the requirement listed in the file.
+- `requirements-quant.txt`
+  - For running the SmoothQuant algorithm using [Intel's Neural Compressor](https://github.com/intel/neural-compressor)
+- `requirements-70b-model.txt`
+  - For running the LLaMA-2 70B model on multiple GPUs
+- `requirements.txt`
+  - Package versions needed in each of the above files
+
 ## Exporting LLaMA-2
 
 There are several ways to export LLaMA-2 models (using LLaMA-2 7B as an example).
@@ -17,12 +32,31 @@ $ python3 -m onnxruntime.transformers.models.llama.convert_to_onnx -m meta-llama
 
 To make this option compatible with [Hugging Face's Optimum](https://github.com/huggingface/optimum), you will need to create `config.json` and `generation_config.json` for your model and store them in the same directory as your ONNX models. For example, you can find those JSON files for LLaMA-2 7B on Hugging Face [here](https://huggingface.co/meta-llama/Llama-2-7b-hf).
 
+As indicated in `requirements.txt`, you will also need to install Optimum from source. Once installed, you will need to modify `ORTModelForCausalLM.forward` in `optimum/optimum/onnxruntime/modeling_decoder.py` as follows:
+
+```
+# Before
+if self.use_cache:
+    if past_key_values is not None:
+        input_ids = input_ids[:, -1:]
+        # Flatten the past_key_values (no need to flatten for models using multi-query attn)
+
+
+# After
+if self.use_cache:
+    if past_key_values is not None:
+        input_ids = input_ids[:, -1:] if past_key_values[0][0].shape[2] != 0 else input_ids
+        # Flatten the past_key_values (no need to flatten for models using multi-query attn)
+```
+
 ### Option 2: from [Microsoft's custom export](https://github.com/microsoft/Llama-2-Onnx)
 
 Please follow the [README instructions](https://github.com/microsoft/Llama-2-Onnx#before-you-start) in the custom export of LLaMA-2.
 
 ### Option 3: from [Hugging Face Optimum](https://github.com/huggingface/optimum)
 
+Note that this may produce two ONNX models with older Optimum versions. The above two options produce one ONNX model and installing Optimum from source will now produce one ONNX model.
+
 First, log into the Hugging Face CLI in your terminal:
 
 ```
@@ -47,6 +81,15 @@ model.save_pretrained(name.split("/")[-1] + "-onnx")
 
 Here are some additional examples for exporting LLaMA-2.
 
+Export Model with Different GPU Device Ids
+```
+# From source using first GPU:
+$ CUDA_VISIBLE_DEVICES=0 python3 -m models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --input ./Llama-2-7b-hf --output ./llama2-7b
+
+# From wheel using second GPU:
+$ CUDA_VISIBLE_DEVICES=1 python3 -m onnxruntime.transformers.models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --input ./Llama-2-7b-hf --output ./llama2-7b
+```
+
 Export Saved Model on Disk
 ```
 # From source:
@@ -56,38 +99,153 @@ $ python3 -m models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --input ./
 $ python3 -m onnxruntime.transformers.models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --input ./Llama-2-7b-hf --output ./llama2-7b
 ```
 
-Export for FP16
+Export for FP32 CUDA
 ```
 # From source:
-$ python3 -m models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --output llama2-7b-fp16 --precision fp16
+$ python3 -m models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --output llama2-7b-fp32-gpu --precision fp32 --execution_provider cuda
 
 # From wheel:
-$ python3 -m onnxruntime.transformers.models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --output llama2-7b-fp16 --precision fp16
+$ python3 -m onnxruntime.transformers.models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --output llama2-7b-fp32-gpu --precision fp32 --execution_provider cuda
 ```
 
-Export for INT8
+Export for FP32 CPU
 ```
 # From source:
-$ python3 -m models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --output llama2-7b-int8 --precision int8 --quantization_method smooth_quant
+$ python3 -m models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --output llama2-7b-fp32-cpu --precision fp32 --execution_provider cpu
 
 # From wheel:
-$ python3 -m onnxruntime.transformers.models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --output llama2-7b-int8 --precision int8 --quantization_method smooth_quant
+$ python3 -m onnxruntime.transformers.models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --output llama2-7b-fp32-cpu --precision fp32 --execution_provider cpu
+```
+
+Export for FP16 CUDA (with MultiHeadAttention)
+```
+# From source:
+$ python3 -m models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --output llama2-7b-fp16 --precision fp16 --execution_provider cuda
+
+# From wheel:
+$ python3 -m onnxruntime.transformers.models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --output llama2-7b-fp16 --precision fp16 --execution_provider cuda
+```
+
+Export for FP16 CUDA (with GroupQueryAttention)
+```
+# From source:
+$ python3 -m models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --output llama2-7b-fp16 --precision fp16 --execution_provider cuda --use_gqa
+
+# From wheel:
+$ python3 -m onnxruntime.transformers.models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --output llama2-7b-fp16 --precision fp16 --execution_provider cuda --use_gqa
+```
+
+Note: GroupQueryAttention currently works with the FP16 CUDA and INT4 CUDA models, and it can provide faster inference than MultiHeadAttention, especially for large sequence lengths (e.g. 1024 or larger). For the best performance, you should pre-allocate the KV cache buffers to have size `(batch_size, num_heads, max_sequence_length, head_size)` so that the past KV and present KV caches share the same memory. You also need to bind them with ONNX Runtime's [IO binding](https://onnxruntime.ai/docs/api/python/api_summary.html#iobinding).
+
+Here is an example of how you can bind directly to `torch.tensor` objects:
+```
+# Assumes all inputs and outputs to the model are pre-allocated with the correct shapes in GPU memory
+
+# Bind inputs
+for k, v in inputs.items():
+    io_binding.bind_input(
+        name=k,
+        device_type="cuda",
+        device_id=0,
+        element_type=np.float16,
+        shape=tuple(v.shape),
+        buffer_ptr=v.data_ptr()
+    )
+
+# Bind outputs
+for output in model.get_outputs():
+    name = output.name
+    if "present" in name:
+        # Bind KV cache outputs to KV cache inputs
+        v = inputs[name.replace("present", "past_key_values")]
+        io_binding.bind_output(
+            name=name,
+            device_type="cuda",
+            device_id=0,
+            element_type=np.float16,
+            shape=tuple(v.shape),
+            buffer_ptr=v.data_ptr()
+        )
+    else:
+        # Bind other outputs as actual outputs
+        v = outputs[name]
+        io_binding.bind_output(
+            name=name,
+            device_type="cuda",
+            device_id=0,
+            element_type=np.float16,
+            shape=tuple(v.shape),
+            buffer_ptr=v.data_ptr()
+        )
+
+io_binding.synchronize_inputs()
+sess.run_with_iobinding(io_binding)
+io_binding.synchronize_outputs()
+```
+
+Export for INT8 CPU (SmoothQuant)
+```
+# From source:
+$ python3 -m models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --output llama2-7b-int8 --precision int8 --quantization_method smooth_quant --execution_provider cpu --no_merged
+
+# From wheel:
+$ python3 -m onnxruntime.transformers.models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --output llama2-7b-int8 --precision int8 --quantization_method smooth_quant --execution_provider cpu --no_merged
 ```
 
 Note: [Intel's Neural Compressor](https://github.com/intel/neural-compressor) takes time to run the SmoothQuant quantization algorithm on LLMs. On an [Azure Standard_NC24s_v3 VM](https://learn.microsoft.com/en-us/azure/virtual-machines/ncv3-series), it takes about ~30-45 min for each of the exported ONNX models.
 
+Export for INT8 CPU (DynamicQuant)
+```
+# From source:
+$ python3 -m models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --output llama2-7b-int8 --precision int8 --quantization_method quantize_dynamic --execution_provider cpu
+
+# From wheel:
+$ python3 -m onnxruntime.transformers.models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --output llama2-7b-int8 --precision int8 --quantization_method quantize_dynamic --execution_provider cpu
+```
+
+Export for INT4 CUDA
+```
+# From source:
+$ python3 -m models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --output llama2-7b-int4-gpu --precision int4 --quantization_method blockwise --execution_provider cuda --use_gqa
+
+# From wheel:
+$ python3 -m onnxruntime.transformers.models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --output llama2-7b-int4-gpu --precision int4 --quantization_method blockwise --execution_provider cuda --use_gqa
+```
+
+Note: See the FP16 CUDA notes about GroupQueryAttention. The `--use_gqa` flag is optional.
+
+Export for INT4 CPU
+```
+# From source:
+$ python3 -m models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --output llama2-7b-int4-cpu --precision int4 --quantization_method blockwise --execution_provider cpu
+
+# From wheel:
+$ python3 -m onnxruntime.transformers.models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --output llama2-7b-int4-cpu --precision int4 --quantization_method blockwise --execution_provider cpu
+```
+
+Export LLaMA-2 70B sharded model into 4 partitions
+```
+# From source:
+# 1. Install necessary packages from requirements-70b-model.txt
+$ pip install -r requirements-70b-model.txt
+
+# 2. Build ONNX Runtime from source with NCCL enabled. Here is a sample command:
+$ ./build.sh --config Release --use_cuda --cuda_home /usr/local/cuda-12.2 --cudnn_home /usr/local/cuda-12.2 --build_wheel --cuda_version=12.2 --parallel --skip_tests --enable_nccl --nccl_home /usr/local/cuda-12.2 --use_mpi --mpi_home=/usr/lib/x86_64-linux-gnu/
+
+# 3. Shard and export the LLaMA-2 70B model. With FP16, you will need at least 140GB of GPU memory to load the model. Therefore, you will need at least 4 40GB A100 GPUs or 2 80GB A100 GPUs to shard the PyTorch model and export each shard to ONNX. Here is an example command:
+$ CUDA_VISIBLE_DEVICES=0,1,2,3 bash convert_70b_model.sh 4 -m meta-llama/Llama-2-70b-hf --output llama2-70b-distributed --precision fp16 --execution_provider cuda --use_gqa
+```
+
 ## Benchmark LLaMA-2
 
 Here are some examples of how you can benchmark LLaMA-2.
 
-Note: In the below examples, `PyTorch` refers to running in PyTorch without `torch.compile` and `PyTorch 2.0` refers to running in PyTorch with `torch.compile`.
-
 ### Variants
 
-1. PyTorch (without `torch.compile`), FP32
+1. PyTorch without `torch.compile`, FP32
 ```
 python3 -m models.llama.benchmark \
-    --benchmark-type hf-pt \
+    --benchmark-type hf-pt-eager \
     --model-name meta-llama/Llama-2-7b-hf \
     --precision fp32 \
     --batch-sizes "1 2" \
@@ -96,10 +254,10 @@ python3 -m models.llama.benchmark \
     --auth
 ```
 
-2. PyTorch 2.0 (with `torch.compile`), FP16
+2. PyTorch with `torch.compile`, FP16
 ```
 python3 -m models.llama.benchmark \
-    --benchmark-type hf-pt2 \
+    --benchmark-type hf-pt-compile \
     --model-name meta-llama/Llama-2-7b-hf \
     --precision fp16 \
     --batch-sizes "1 2" \
@@ -112,7 +270,7 @@ python3 -m models.llama.benchmark \
 ```
 python3 -m models.llama.benchmark \
     --benchmark-type hf-ort \
-    --hf-ort-model-path ./Llama-2-7b-hf-onnx/ \
+    --hf-ort-dir-path ./Llama-2-7b-hf-onnx/ \
     --model-name meta-llama/Llama-2-7b-hf \
     --precision fp32 \
     --batch-sizes "1 2" \
@@ -121,11 +279,11 @@ python3 -m models.llama.benchmark \
     --auth
 ```
 
-4. Optimum + ONNX Runtime, FP16, export via convert_to_onnx
+4. Optimum + ONNX Runtime, FP16, export via Optimum or convert_to_onnx
 ```
 python3 -m models.llama.benchmark \
     --benchmark-type hf-ort \
-    --hf-ort-model-path ./llama2-7b-fp16/ \
+    --hf-ort-dir-path ./Llama-2-7b-hf-onnx/ \
     --model-name meta-llama/Llama-2-7b-hf \
     --precision fp16 \
     --batch-sizes "1 2" \
@@ -134,24 +292,35 @@ python3 -m models.llama.benchmark \
     --auth
 ```
 
-5. Optimum + ONNX Runtime, INT8, export via convert_to_onnx
+5. ONNX Runtime, FP32, Microsoft custom export
 ```
 python3 -m models.llama.benchmark \
-    --benchmark-type hf-ort \
-    --hf-ort-model-path ./llama2-7b-int8/ \
+    --benchmark-type ort-msft \
+    --ort-model-path ./llama-2-onnx/7B_float32/ONNX/LlamaV2_7B_float32.onnx \
     --model-name meta-llama/Llama-2-7b-hf \
-    --precision int8 \
+    --precision fp32 \
     --batch-sizes "1 2" \
     --sequence-lengths "8 16" \
-    --device cpu \
-    --auth
+    --device cpu
 ```
 
-6. ONNX Runtime, FP32, Microsoft custom export
+6. ONNX Runtime, FP16, Microsoft custom export
 ```
 python3 -m models.llama.benchmark \
-    --benchmark-type ort \
-    --ort-model-path llama-2-onnx/7B_float32/ONNX/LlamaV2_7B_float32.onnx \
+    --benchmark-type ort-msft \
+    --ort-model-path ./llama-2-onnx/7B_float16/ONNX/LlamaV2_7B_float16.onnx \
+    --model-name meta-llama/Llama-2-7b-hf \
+    --precision fp16 \
+    --batch-sizes "1 2" \
+    --sequence-lengths "8 16" \
+    --device cuda
+```
+
+7. ONNX Runtime, FP32, convert_to_onnx, use 2nd GPU
+```
+CUDA_VISIBLE_DEVICES=1 python3 -m models.llama.benchmark \
+    --benchmark-type ort-convert-to-onnx \
+    --ort-model-path ./llama2-7b/rank_0_Llama-2-7b-hf_decoder_merged_model_fp32.onnx \
     --model-name meta-llama/Llama-2-7b-hf \
     --precision fp32 \
     --batch-sizes "1 2" \
@@ -159,11 +328,11 @@ python3 -m models.llama.benchmark \
     --device cpu
 ```
 
-7. ONNX Runtime, FP16, Microsoft custom export
+8. ONNX Runtime, FP16, convert_to_onnx, use 5th GPU
 ```
-python3 -m models.llama.benchmark \
-    --benchmark-type ort \
-    --ort-model-path ./llama-2-onnx/7B_float16/ONNX/LlamaV2_7B_float16.onnx \
+CUDA_VISIBLE_DEVICES=4 python3 -m models.llama.benchmark \
+    --benchmark-type ort-convert-to-onnx \
+    --ort-model-path ./llama2-7b/rank_0_Llama-2-7b-hf_decoder_merged_model_fp16.onnx \
     --model-name meta-llama/Llama-2-7b-hf \
     --precision fp16 \
     --batch-sizes "1 2" \
@@ -171,17 +340,35 @@ python3 -m models.llama.benchmark \
     --device cuda
 ```
 
+9. ONNX Runtime, FP16, convert_to_onnx, LLaMA-2 70B shard to 4 GPUs
+```
+CUDA_VISIBLE_DEVICES=4,5,6,7 bash benchmark_70b_model.sh 4 \
+    --benchmark-type ort-convert-to-onnx \
+    --ort-model-path ./llama2-70b-dis/rank_{}_Llama-2-70b-hf_decoder_merged_model_fp16.onnx \
+    --model-name meta-llama/Llama-2-70b-hf \
+    --precision fp16 \
+    --device cuda \
+    --warmup-runs 5 \
+    --num-runs 100
+```
+
 You can profile a variant by adding the `--profile` flag and providing one batch size and sequence length combination.
 
 ### Benchmark All
-You can use `benchmark_all.py` to benchmark across various platforms and automatically store the results in a CSV file. Here is an example.
+You can use `benchmark_all.py` to benchmark across various options and automatically store the results in a CSV file. Here is an example.
 ```
 python3 -m models.llama.benchmark_all \
-    --hf-ort-model-path ./llama2-7b-fp16/ \
-    --ort-model-path ./llama-2-onnx/7B_float16/ONNX/LlamaV2_7B_float16.onnx \
+    --hf-pt-eager \
+    --hf-pt-compile \
+    --hf-ort-dir-path ./llama2-7b-fp16/ \
+    --ort-convert-to-onnx-model-path ./llama2-7b-fp16/Llama-2-7b-hf_decoder_merged_model_fp16.onnx \
+    --ort-msft-model-path ./llama-2-onnx/7B_float16/ONNX/LlamaV2_7B_float16.onnx \
     --model-name meta-llama/Llama-2-7b-hf \
     --precision fp16 \
     --batch-sizes "1 2" \
     --sequence-lengths "8 16" \
-    --device cuda
+    --device cuda \
+    --warmup-runs 5 \
+    --num-runs 1000 \
+    --timeout 60  # number of minutes before moving to the next benchmark
 ```
diff --git a/onnxruntime/python/tools/transformers/models/llama/benchmark.py b/onnxruntime/python/tools/transformers/models/llama/benchmark.py
index d19ed5cc28fed..021b0dd03a9db 100644
--- a/onnxruntime/python/tools/transformers/models/llama/benchmark.py
+++ b/onnxruntime/python/tools/transformers/models/llama/benchmark.py
@@ -8,23 +8,61 @@
 import time
 
 import numpy as np
+import onnx
 import psutil
 import torch
-from benchmark_helper import setup_logger
-from llama_inputs import get_msft_sample_inputs, get_sample_inputs, get_sample_with_past_kv_inputs
+from benchmark_helper import measure_memory, setup_logger
+from dist_settings import get_rank, get_size
+from llama_inputs import (
+    add_io_bindings,
+    get_merged_sample_with_past_kv_inputs,
+    get_msft_sample_inputs,
+    get_sample_inputs,
+    get_sample_with_past_kv_inputs,
+)
 from optimum.onnxruntime import ORTModelForCausalLM
 from torch.profiler import ProfilerActivity, profile, record_function
 from tqdm import trange
-from transformers import LlamaConfig, LlamaForCausalLM, LlamaTokenizer
+from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
 
 import onnxruntime as ort
-from onnxruntime.transformers.benchmark_helper import measure_memory
 
 logger = logging.getLogger(__name__)
 
 
-def get_inputs(args: argparse.Namespace):
-    if args.benchmark_type in {"hf-pt", "hf-pt2", "hf-ort"}:
+# For determining whether the ONNX model can do both prompt generation and token generation or only one of the two
+def get_ort_model_inputs_len(args, model):
+    if args.benchmark_type in {"hf-pt-eager", "hf-pt-compile"}:
+        return 0
+    if args.benchmark_type == "hf-ort":
+        try:
+            # New Optimum export (https://github.com/huggingface/optimum/blob/888332364c2e0091da1fc974737c7e277af168bf/optimum/onnxruntime/modeling_ort.py#L268)
+            return len(model.inputs_names)
+        except Exception:
+            # Old Optimum export (https://github.com/huggingface/optimum/blob/c5ad7f971cb0a494eac03dc0909f146725f999c5/optimum/onnxruntime/base.py#L54)
+            return len(model.decoder.input_names)
+    return len(model.get_inputs())
+
+
+def get_inputs(args: argparse.Namespace, ort_model_inputs_len: int):
+    init_inputs, iter_inputs = None, None
+
+    # For past_present_share_buffer:
+    # Set max_seq_len to 16384 for CodeLLaMA (finetuned variant of LLaMA-2)
+    # Set max_seq_len to 4096 for Hugging Face LLaMA-2 model since that is the default value
+    # Set max_seq_len to 2048 for Microsoft LLaMA-2 model since that is the max value currently supported
+    temp_name = args.model_name.lower().replace("-", "").replace("_", "")
+    max_seq_len = (
+        2048
+        if args.benchmark_type == "ort-msft"
+        else 16384
+        if "codellama" in temp_name
+        else 4096
+        if "llama2" in temp_name
+        else 2048
+    )
+
+    if args.benchmark_type in {"hf-pt-eager", "hf-pt-compile"}:
         init_inputs = get_sample_inputs(
             args.config,
             args.target_device,
@@ -41,21 +79,103 @@ def get_inputs(args: argparse.Namespace):
             return_dict=True,
         )
 
-    elif args.benchmark_type == "ort":
+    elif args.benchmark_type == "hf-ort":
+        if ort_model_inputs_len == 3:  # [input_ids, attention_mask, position_ids]
+            # Using split models in Optimum (e.g. created by Optimum export)
+            init_inputs = get_sample_inputs(
+                args.config,
+                args.target_device,
+                args.batch_size,
+                args.sequence_length,
+                return_dict=True,
+            )
+            iter_inputs = get_sample_with_past_kv_inputs(
+                args.config,
+                args.target_device,
+                args.batch_size,
+                args.sequence_length,
+                use_fp16=args.use_fp16,
+                return_dict=True,
+            )
+        else:
+            # Using merged model in Optimum (e.g. created by convert_to_onnx export)
+            init_inputs = get_merged_sample_with_past_kv_inputs(
+                args.config,
+                args.target_device,
+                args.batch_size,
+                seq_len=args.sequence_length,
+                past_seq_len=0,
+                max_seq_len=max_seq_len,
+                use_fp16=args.use_fp16,
+                use_gqa=args.use_gqa,
+                engine="pt",
+                return_dict=True,
+            )
+            iter_inputs = get_merged_sample_with_past_kv_inputs(
+                args.config,
+                args.target_device,
+                args.batch_size,
+                seq_len=1,
+                past_seq_len=args.sequence_length,
+                max_seq_len=max_seq_len,
+                use_fp16=args.use_fp16,
+                use_gqa=args.use_gqa,
+                engine="pt",
+                return_dict=True,
+            )
+
+    elif args.benchmark_type == "ort-convert-to-onnx":
+        # Microsoft export from convert_to_onnx
+        init_inputs = get_merged_sample_with_past_kv_inputs(
+            args.config,
+            args.target_device,
+            args.batch_size,
+            seq_len=args.sequence_length,
+            past_seq_len=0,
+            max_seq_len=max_seq_len,
+            use_fp16=args.use_fp16,
+            use_gqa=args.use_gqa,
+            engine="ort",
+            return_dict=True,
+            world_size=args.world_size,
+        )
+        iter_inputs = get_merged_sample_with_past_kv_inputs(
+            args.config,
+            args.target_device,
+            args.batch_size,
+            seq_len=1,
+            past_seq_len=args.sequence_length,
+            max_seq_len=max_seq_len,
+            use_fp16=args.use_fp16,
+            use_gqa=args.use_gqa,
+            engine="ort",
+            return_dict=True,
+            world_size=args.world_size,
+        )
+
+    elif args.benchmark_type == "ort-msft":
         # Microsoft export from https://github.com/microsoft/Llama-2-Onnx
+        split_kv = ort_model_inputs_len > 5  # original inputs: [x, attn_mask, k_cache, v_cache, pos]
+
         init_inputs = get_msft_sample_inputs(
             args.config,
             args.batch_size,
             past_seq_len=0,
             seq_len=args.sequence_length,
+            max_seq_len=max_seq_len,
             use_fp16=args.use_fp16,
+            use_gqa=args.use_gqa,
+            split_kv=split_kv,
         )
         iter_inputs = get_msft_sample_inputs(
             args.config,
             args.batch_size,
             past_seq_len=args.sequence_length,
             seq_len=1,
+            max_seq_len=max_seq_len,
             use_fp16=args.use_fp16,
+            use_gqa=args.use_gqa,
+            split_kv=split_kv,
         )
 
     else:
@@ -69,14 +189,16 @@ def get_model(args: argparse.Namespace):
     start_time, end_time = None, None
 
     # There are multiple sources that the model could come from:
-    # 1) Benchmark LLaMA from unofficial source on Hugging Face
-    # 2) Benchmark LLaMA from official source on Hugging Face, which requires an authentication token
-    # 3) Benchmark LLaMA from local download of model
-
-    if args.benchmark_type in {"hf-pt", "hf-pt2"}:
-        source = args.hf_pt_model_path if args.hf_pt_model_path else args.model_name
+    # 1) Benchmark LLaMA-2 from unofficial source on Hugging Face
+    # 2) Benchmark LLaMA-2 from official source on Hugging Face, which requires an authentication token
+    # 3) Benchmark LLaMA-2 from local download of model
+    # 4) Benchmark LLaMA-2 from Microsoft (already optimized, available at https://github.com/microsoft/Llama-2-Onnx)
+    # 5) Benchmark LLaMA-2 from convert_to_onnx
+
+    if args.benchmark_type in {"hf-pt-eager", "hf-pt-compile"}:
+        source = args.hf_pt_dir_path if args.hf_pt_dir_path else args.model_name
         start_time = time.time()
-        model = LlamaForCausalLM.from_pretrained(
+        model = AutoModelForCausalLM.from_pretrained(
             source,
             torch_dtype=torch.float16 if args.use_fp16 else torch.float32,
             use_auth_token=args.auth,
@@ -84,10 +206,10 @@ def get_model(args: argparse.Namespace):
         ).to(args.target_device)
         end_time = time.time()
 
-        if args.benchmark_type == "hf-pt2":
+        if args.benchmark_type == "hf-pt-compile":
             model = torch.compile(model)
 
-    elif args.benchmark_type in {"hf-ort", "ort"}:
+    elif args.benchmark_type in {"hf-ort", "ort-msft", "ort-convert-to-onnx"}:
         sess_options = ort.SessionOptions()
         sess_options.enable_profiling = args.profile
         if args.verbose:
@@ -104,43 +226,43 @@ def get_model(args: argparse.Namespace):
 
         decoder_file_name = None
         decoder_with_past_file_name = None
-        for filename in os.listdir(args.hf_ort_model_path):
+        for filename in os.listdir(args.hf_ort_dir_path):
             if ".onnx" not in filename or ".onnx_data" in filename or ".onnx.data" in filename:
                 continue
-            if "decoder_model.onnx" in filename or f"decoder_model_{args.precision}.onnx" in filename:
+            if "decoder_model" in filename or filename == "model.onnx":
+                decoder_file_name = filename
+            if "decoder_with_past_model" in filename:
+                decoder_with_past_file_name = filename
+            if "decoder_merged_model" in filename:
                 decoder_file_name = filename
-            if (
-                "decoder_with_past_model.onnx" in filename
-                or f"decoder_with_past_model_{args.precision}.onnx" in filename
-            ):
                 decoder_with_past_file_name = filename
 
         start_time = time.time()
         model = ORTModelForCausalLM.from_pretrained(
-            args.hf_ort_model_path,
+            args.hf_ort_dir_path,
             decoder_file_name=decoder_file_name,
             decoder_with_past_file_name=decoder_with_past_file_name,
             use_auth_token=args.auth,
             use_io_binding=(args.device != "cpu"),
+            use_merged=(True if decoder_file_name == "model.onnx" else None),
             provider=provider,
             provider_options=provider_options,
             session_options=sess_options,
         )
         end_time = time.time()
 
-    if args.benchmark_type == "ort":
-        # Microsoft export from https://github.com/microsoft/Llama-2-Onnx
-        logger.info(f"Loading model from {args.ort_model_path}")
+    if args.benchmark_type in {"ort-msft", "ort-convert-to-onnx"}:
+        # Ex: Microsoft export from https://github.com/microsoft/Llama-2-Onnx
+        logger.info(f"Loading model from {args.ort_model_path.format(args.rank)}")
         start_time = time.time()
         model = ort.InferenceSession(
-            args.ort_model_path,
+            args.ort_model_path.format(args.rank),
             sess_options,
             providers=[args.execution_provider],
         )
         end_time = time.time()
 
     logger.info(f"Loaded model in {end_time - start_time} s")
-
     return model
 
 
@@ -148,7 +270,7 @@ def time_fn(args, fn, inputs):
     # Warm up
     warmup_range = (
         range(args.warmup_runs)
-        if args.benchmark_type == "ort"
+        if args.benchmark_type in {"ort-msft", "ort-convert-to-onnx"}
         else trange(args.warmup_runs, file=sys.stdout, desc="Warm up")
     )
 
@@ -156,37 +278,57 @@ def time_fn(args, fn, inputs):
         outputs = fn(inputs)
         logger.info(outputs)
 
+    input_sync = (  # noqa: E731
+        lambda *kwargs: args.io_binding.synchronize_inputs()
+        if args.device != "cpu" and args.benchmark_type in {"ort-msft", "ort-convert-to-onnx"}  # ORT synchronize
+        else lambda *kwargs: torch.cuda.synchronize()
+        if args.device != "cpu" and torch.cuda.is_available()  # PyTorch synchronize
+        else lambda *kwargs: None  # no-op function
+    )
+
+    output_sync = (  # noqa: E731
+        lambda *kwargs: args.io_binding.synchronize_outputs()
+        if args.device != "cpu" and args.benchmark_type in {"ort-msft", "ort-convert-to-onnx"}  # ORT synchronize
+        else lambda *kwargs: torch.cuda.synchronize()
+        if args.device != "cpu" and torch.cuda.is_available()  # PyTorch synchronize
+        else lambda *kwargs: None  # no-op function
+    )
+
     for _ in warmup_range:
+        input_sync()
         fn(inputs)
+        output_sync()
 
     # Benchmark
-    if args.device != "cpu":
-        torch.cuda.synchronize()
-    start_time = time.time()
-
+    total_time = 0
     bench_range = (
         range(args.num_runs)
-        if args.benchmark_type == "ort"
+        if args.benchmark_type in {"ort-msft", "ort-convert-to-onnx"}
         else trange(args.num_runs, file=sys.stdout, desc="Benchmark")
     )
     for _ in bench_range:
+        input_sync()
+        start_time = time.time()
+
         fn(inputs)
 
-    if args.device != "cpu":
-        torch.cuda.synchronize()
-    end_time = time.time()
+        output_sync()
+        end_time = time.time()
+
+        total_time += end_time - start_time
 
     # Newline print after trange in order to print metrics on new lines without progress bar on same line
-    if args.benchmark_type != "ort":
+    if args.benchmark_type not in {"ort-msft", "ort-convert-to-onnx"}:
         logger.info("")
 
-    latency = (end_time - start_time) / args.num_runs
+    latency = total_time / args.num_runs
     throughput = args.batch_size / latency
 
-    logger.info(f"Batch Size: {args.batch_size}")
-    logger.info(f"Sequence Length: {args.sequence_length}")
-    logger.info(f"Latency: {latency} s")
-    logger.info(f"Throughput: {throughput} qps")
+    if args.rank == 0:
+        logger.info(f"Batch Size: {args.batch_size}")
+        logger.info(f"Sequence Length: {args.sequence_length}")
+        logger.info(f"Latency: {latency} s")
+        logger.info(f"Throughput: {throughput} tps")
     return
 
 
@@ -196,7 +338,7 @@ def profile_fn(args, fn, inputs, inputs_type):
     prefix = f"b{args.batch_size}_s{args.sequence_length}_{args.benchmark_type.lower()}-{args.precision}-{args.device}_{fn.__name__.replace('_', '-')}_{inputs_type}_{datetime.datetime.now():%Y-%m-%d_%H:%M:%S}"
     filename = None
 
-    if args.benchmark_type in {"hf-pt", "hf-pt2"}:
+    if args.benchmark_type in {"hf-pt-eager", "hf-pt-compile"}:
         # Profile PyTorch kernels
         with profile(  # noqa: SIM117
             activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True, profile_memory=True
@@ -226,7 +368,8 @@ def measure_fn(args, fn, inputs):
     process.cpu_percent(interval=0.1)
 
     fn(inputs)
-    logger.info(f"CPU usage: {process.cpu_percent(interval=None)}%")
+    if args.rank == 0:
+        logger.info(f"CPU usage: {process.cpu_percent(interval=None) / psutil.cpu_count(logical=False)}%")
 
     # Measure memory usage
     gc.collect()
@@ -267,7 +410,7 @@ def get_logits(inputs):
 
     generate_fn = get_logits
 
-    if args.benchmark_type == "hf-pt2":
+    if args.benchmark_type == "hf-pt-compile":
         # Run forward pass once with each set of inputs to process through Dynamo
         generate_fn(init_inputs)
         generate_fn(iter_inputs)
@@ -280,7 +423,7 @@ def get_logits(inputs):
             logger.warning(f"Renaming {old_logname} to {new_logname}")
             os.rename(old_logname, os.path.join(args.log_folder, new_logname))
 
-        new_logname = profile_fn(args, generate_fn, iter_inputs, "per-token")
+        new_logname = profile_fn(args, generate_fn, iter_inputs, "token")
         if args.benchmark_type == "hf-ort":
             # Turn profiling off to stop appending to log
             old_logname = model.decoder_with_past.session.end_profiling()
@@ -300,7 +443,7 @@ def get_logits(inputs):
 
 
 def run_ort_inference(args, init_inputs, iter_inputs, model):
-    def prepare_ort_inputs(inputs):
+    def prepare_ort_inputs(inputs, kv_cache_ortvalues):
         # Check that all model inputs will be provided
         model_inputs = set(map(lambda model_input: model_input.name, model.get_inputs()))
         user_inputs = set(inputs.keys())
@@ -318,14 +461,13 @@ def prepare_ort_inputs(inputs):
 
         # Add IO bindings for non-CPU execution providers
         if args.device != "cpu":
-            io_binding = model.io_binding()
-            for k, v in inputs.items():
-                io_binding.bind_cpu_input(k, v)
-            for output in model.get_outputs():
-                io_binding.bind_output(output.name)
-            return io_binding
+            io_binding, kv_cache_ortvalues = add_io_bindings(
+                model, inputs, args.device, int(args.rank), args.use_gqa, kv_cache_ortvalues
+            )
+            setattr(args, "io_binding", io_binding)  # noqa: B010
+            return io_binding, kv_cache_ortvalues
 
-        return inputs
+        return inputs, kv_cache_ortvalues
 
     def with_io_binding(io_binding):
         # Inference pass with IO binding
@@ -337,9 +479,10 @@ def without_io_binding(inputs):
         return outputs
 
     generate_fn = with_io_binding if args.device != "cpu" else without_io_binding
+    kv_cache_ortvalues = {}
 
     if args.profile:
-        ort_init_inputs = prepare_ort_inputs(init_inputs)
+        ort_init_inputs, kv_cache_ortvalues = prepare_ort_inputs(init_inputs, kv_cache_ortvalues)
         new_logname = profile_fn(args, generate_fn, ort_init_inputs, "prompt")
 
         # Turn profiling off to stop appending to log file
@@ -349,8 +492,8 @@ def without_io_binding(inputs):
 
         # Re-initialize model for new log file instead of appending to old log file
         model = get_model(args)
-        ort_iter_inputs = prepare_ort_inputs(iter_inputs)
-        new_logname = profile_fn(args, generate_fn, ort_iter_inputs, "per-token")
+        ort_iter_inputs, kv_cache_ortvalues = prepare_ort_inputs(iter_inputs, kv_cache_ortvalues)
+        new_logname = profile_fn(args, generate_fn, ort_iter_inputs, "token")
 
         # Turn profiling off to stop appending to log
         old_logname = model.end_profiling()
@@ -360,29 +503,33 @@ def without_io_binding(inputs):
 
     # ORT evaluations
     logger.info("\nEvaluating `model(inputs)` step to get past_key_values")
-    ort_init_inputs = prepare_ort_inputs(init_inputs)
+    ort_init_inputs, kv_cache_ortvalues = prepare_ort_inputs(init_inputs, kv_cache_ortvalues)
     time_fn(args, generate_fn, ort_init_inputs)
     measure_fn(args, generate_fn, ort_init_inputs)
 
     logger.info("\nEvaluating `model(inputs)` step with past_key_values")
-    ort_iter_inputs = prepare_ort_inputs(iter_inputs)
+    ort_iter_inputs, kv_cache_ortvalues = prepare_ort_inputs(iter_inputs, kv_cache_ortvalues)
     time_fn(args, generate_fn, ort_iter_inputs)
     measure_fn(args, generate_fn, ort_iter_inputs)
 
 
 def run_inference(args, init_inputs, iter_inputs, model):
-    if args.benchmark_type in {"hf-pt", "hf-pt2", "hf-ort"}:
+    if args.benchmark_type in {"hf-pt-eager", "hf-pt-compile", "hf-ort"}:
         run_hf_inference(args, init_inputs, iter_inputs, model)
-    elif args.benchmark_type == "ort":
+    elif args.benchmark_type in {"ort-msft", "ort-convert-to-onnx"}:
         run_ort_inference(args, init_inputs, iter_inputs, model)
     else:
         raise Exception(f"Cannot recognize {args.benchmark_type}")
 
 
-def get_args():
+def get_args(rank=0):
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        "-bt", "--benchmark-type", type=str, required=True, choices=["hf-pt", "hf-pt2", "hf-ort", "ort"]
+        "-bt",
+        "--benchmark-type",
+        type=str,
+        required=True,
+        choices=["hf-pt-eager", "hf-pt-compile", "hf-ort", "ort-msft", "ort-convert-to-onnx"],
     )
     parser.add_argument(
         "-m",
@@ -402,20 +549,20 @@ def get_args():
         required=True,
         type=str,
         default="fp32",
-        choices=["int8", "fp16", "fp32"],
+        choices=["int4", "int8", "fp16", "fp32"],
         help="Precision for model. For ONNX models, the model's precision should be set before running this script.",
     )
     parser.add_argument(
-        "--hf-pt-model-path",
+        "--hf-pt-dir-path",
         type=str,
         default="",
         help="Path to directory containing all PyTorch files (e.g. tokenizer, PyTorch model)",
     )
     parser.add_argument(
-        "--hf-ort-model-path",
+        "--hf-ort-dir-path",
         type=str,
         default="",
-        help="Path to directory containing all ONNX files (e.g. tokenizer, encoder, decoder, decoder_with_past)",
+        help="Path to directory containing all ONNX files (e.g. tokenizer, decoder_merged, decoder, decoder_with_past)",
     )
     parser.add_argument(
         "--ort-model-path",
@@ -433,7 +580,7 @@ def get_args():
     parser.add_argument(
         "-s",
         "--sequence-lengths",
-        default="8 16 32 64 128 256 512",
+        default="32 64 128 256 512",
     )
     parser.add_argument(
         "-d",
@@ -470,20 +617,25 @@ def get_args():
     if "ort" in args.benchmark_type:
         setattr(args, "execution_provider", f"{args.device.upper()}ExecutionProvider")  # noqa: B010
         if args.execution_provider == "CUDAExecutionProvider":
-            args.execution_provider = (args.execution_provider, {"device_id": args.device_id})
+            args.execution_provider = (args.execution_provider, {"device_id": rank})
         elif args.execution_provider == "ROCMExecutionProvider":
-            args.execution_provider = (args.execution_provider, {"device_id": args.device_id})
+            args.execution_provider = (args.execution_provider, {"device_id": rank})
             args.device = "cuda"
 
-    # Check that model paths have been specified for any benchmarking with ORT
+    # Check that paths have been specified for any benchmarking with ORT
     if args.benchmark_type == "hf-ort":
-        assert args.hf_ort_model_path, "Please specify a path to `--hf-ort-model-path`"
-    if args.benchmark_type == "ort":
+        assert args.hf_ort_dir_path, "Please specify a path to `--hf-ort-dir-path`"
+    if args.benchmark_type in {"ort-msft", "ort-convert-to-onnx"}:
         assert args.ort_model_path, "Please specify a path to `--ort-model-path`"
 
     args.batch_sizes = args.batch_sizes.split(" ")
     args.sequence_lengths = args.sequence_lengths.split(" ")
 
+    # Use FP32 precision for FP32, INT8, INT4 CPU models, use FP16 precision for FP16 and INT4 GPU models
+    args.precision = (
+        "fp32" if args.precision in {"int8", "fp32"} or (args.precision == "int4" and args.device == "cpu") else "fp16"
+    )
+
     # Check that only one (batch_size, sequence_length) combination is set for profiling
     if args.profile:
         assert (
@@ -494,14 +646,19 @@ def get_args():
 
 
 def main():
-    args = get_args()
+    rank = get_rank()
+    world_size = get_size()
+
+    args = get_args(rank)
     setup_logger(args.verbose)
     logger.info(args.__dict__)
     torch.backends.cudnn.benchmark = True
 
-    tokenizer = LlamaTokenizer.from_pretrained(args.model_name)
-    config = LlamaConfig.from_pretrained(args.model_name)
-    target_device = f"cuda:{args.device_id}" if args.device != "cpu" else args.device
+    args.rank = rank
+    args.world_size = world_size
+    tokenizer = AutoTokenizer.from_pretrained(args.model_name)
+    config = AutoConfig.from_pretrained(args.model_name)
+    target_device = f"cuda:{args.rank}" if args.device != "cpu" else args.device
     use_fp16 = args.precision == "fp16"
 
     setattr(args, "tokenizer", tokenizer)  # noqa: B010
@@ -509,14 +666,28 @@ def main():
     setattr(args, "target_device", target_device)  # noqa: B010
     setattr(args, "use_fp16", use_fp16)  # noqa: B010
 
-    # Measure prompt cost (init_inputs) and generated token cost (iter_inputs)
+    # Get model and model info
     model = get_model(args)
+    ort_model_inputs_len = get_ort_model_inputs_len(args, model)
+
+    # Check if past_present_share_buffer can be enabled (only for FP16 models with GQA)
+    if args.benchmark_type in {"ort-convert-to-onnx", "ort-msft"}:
+        onnx_model = onnx.load_model(args.ort_model_path.format(args.rank), load_external_data=False)
+        gqa_nodes = list(filter(lambda node: node.op_type == "GroupQueryAttention", onnx_model.graph.node))
+
+        use_buffer_share = use_fp16 and len(gqa_nodes) > 0 and args.device != "cpu"
+        setattr(args, "use_gqa", use_buffer_share)  # noqa: B010
+    else:
+        setattr(args, "use_gqa", False)  # noqa: B010
+
+    # Measure prompt cost (init_inputs) and generated token cost (iter_inputs)
     for batch_size, sequence_length in itertools.product(args.batch_sizes, args.sequence_lengths):
-        logger.info(f"\nBatch size = {batch_size} and sequence length = {sequence_length}...")
+        if args.rank == 0:
+            logger.info(f"\nBatch size = {batch_size} and sequence length = {sequence_length}...")
         setattr(args, "batch_size", int(batch_size))  # noqa: B010
         setattr(args, "sequence_length", int(sequence_length))  # noqa: B010
 
-        init_inputs, iter_inputs = get_inputs(args)
+        init_inputs, iter_inputs = get_inputs(args, ort_model_inputs_len)
         run_inference(args, init_inputs, iter_inputs, model)
 
 
diff --git a/onnxruntime/python/tools/transformers/models/llama/benchmark_70b_model.sh b/onnxruntime/python/tools/transformers/models/llama/benchmark_70b_model.sh
new file mode 100644
index 0000000000000..38f1916456658
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/models/llama/benchmark_70b_model.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+NUM_GPUS=${1:-1}
+
+MPI="mpirun --allow-run-as-root
+    -mca btl_openib_warn_no_device_params_found 0 -mca pml ob1 -mca btl ^openib -mca btl_tcp_if_include eth0
+    --tag-output --npernode $NUM_GPUS --bind-to numa
+    -x MIOPEN_FIND_MODE=1"
+
+CMD="$MPI python benchmark.py ${@:2}"
+
+$CMD
\ No newline at end of file
diff --git a/onnxruntime/python/tools/transformers/models/llama/benchmark_all.py b/onnxruntime/python/tools/transformers/models/llama/benchmark_all.py
index 7199c945fe6ba..b35a5e27f9ea3 100644
--- a/onnxruntime/python/tools/transformers/models/llama/benchmark_all.py
+++ b/onnxruntime/python/tools/transformers/models/llama/benchmark_all.py
@@ -43,15 +43,38 @@ def get_args():
     )
 
     parser.add_argument(
-        "--hf-ort-model-path",
+        "--hf-pt-eager",
+        default=False,
+        action="store_true",
+        help="Benchmark in PyTorch without `torch.compile`",
+    )
+
+    parser.add_argument(
+        "--hf-pt-compile",
+        default=False,
+        action="store_true",
+        help="Benchmark in PyTorch with `torch.compile`",
+    )
+
+    parser.add_argument(
+        "--hf-ort-dir-path",
         type=str,
+        default="",
         help="Path to folder containing ONNX models for Optimum + ORT benchmarking",
     )
 
     parser.add_argument(
-        "--ort-model-path",
+        "--ort-msft-model-path",
+        type=str,
+        default="",
+        help="Path to ONNX model from https://github.com/microsoft/Llama-2-Onnx",
+    )
+
+    parser.add_argument(
+        "--ort-convert-to-onnx-model-path",
         type=str,
-        help="Path to ONNX model for ORT benchmarking",
+        default="",
+        help="Path to ONNX model from convert_to_onnx",
     )
 
     parser.add_argument(
@@ -65,7 +88,7 @@ def get_args():
         "--precision",
         type=str,
         required=True,
-        choices=["int8", "fp16", "fp32"],
+        choices=["int4", "int8", "fp16", "fp32"],
         help="Precision to run model",
     )
 
@@ -138,8 +161,6 @@ def process_log_file(device_id, log_file, base_results):
                 step = "per-token"
             elif latency_pattern in line:
                 latency_s = float(line[len(latency_pattern) : line.rfind(" ")])
-                if step == "prompt":
-                    latency_s /= sequence_length
                 latency_ms = latency_s * 1000
             elif throughput_pattern in line:
                 throughput = float(line[len(throughput_pattern) : line.rfind(" ")])
@@ -184,7 +205,7 @@ def save_results(results, filename):
             "Step",
             "Latency (s)",
             "Latency (ms)",
-            "Throughput (qps)",
+            "Throughput (tps)",
             "Memory (GB)",
         ],
     )
@@ -194,7 +215,7 @@ def save_results(results, filename):
     df["Sequence Length"] = df["Sequence Length"].astype("int")
     df["Latency (s)"] = df["Latency (s)"].astype("float")
     df["Latency (ms)"] = df["Latency (ms)"].astype("float")
-    df["Throughput (qps)"] = df["Throughput (qps)"].astype("float")
+    df["Throughput (tps)"] = df["Throughput (tps)"].astype("float")
     df["Memory (GB)"] = df["Memory (GB)"].astype("float")
 
     df.to_csv(filename, index=False)
@@ -226,75 +247,78 @@ def main():
     torch.backends.cudnn.benchmark = True
 
     all_results = []
+    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.device_id)
+
     # Benchmark PyTorch without torch.compile
-    benchmark_cmd = [
-        "python3",
-        "benchmark.py",
-        "--benchmark-type",
-        "hf-pt",
-        "--model-name",
-        args.model_name,
-        "--precision",
-        args.precision,
-        "--batch-sizes",
-        args.batch_sizes,
-        "--sequence-lengths",
-        args.sequence_lengths,
-        "--device",
-        args.device,
-        "--device-id",
-        str(args.device_id),
-        "--warmup-runs",
-        str(args.warmup_runs),
-        "--num-runs",
-        str(args.num_runs),
-        "--log-folder",
-        args.log_folder,
-        "--auth",
-    ]
-    logger.info("Benchmark PyTorch without torch.compile")
-    results = benchmark(args, benchmark_cmd, "pytorch")
-    all_results.extend(results)
+    if args.hf_pt_eager:
+        benchmark_cmd = [
+            "python",
+            "-m",
+            "models.llama.benchmark",
+            "--benchmark-type",
+            "hf-pt-eager",
+            "--model-name",
+            args.model_name,
+            "--precision",
+            args.precision,
+            "--batch-sizes",
+            args.batch_sizes,
+            "--sequence-lengths",
+            args.sequence_lengths,
+            "--device",
+            args.device,
+            "--warmup-runs",
+            str(args.warmup_runs),
+            "--num-runs",
+            str(args.num_runs),
+            "--log-folder",
+            args.log_folder,
+            "--auth",
+        ]
+        logger.info("Benchmark PyTorch without torch.compile")
+        results = benchmark(args, benchmark_cmd, "pytorch-eager")
+        all_results.extend(results)
 
     # Benchmark PyTorch with torch.compile
-    benchmark_cmd = [
-        "python3",
-        "benchmark.py",
-        "--benchmark-type",
-        "hf-pt2",
-        "--model-name",
-        args.model_name,
-        "--precision",
-        args.precision,
-        "--batch-sizes",
-        args.batch_sizes,
-        "--sequence-lengths",
-        args.sequence_lengths,
-        "--device",
-        args.device,
-        "--device-id",
-        str(args.device_id),
-        "--warmup-runs",
-        str(args.warmup_runs),
-        "--num-runs",
-        str(args.num_runs),
-        "--log-folder",
-        args.log_folder,
-        "--auth",
-    ]
-    logger.info("Benchmark PyTorch with torch.compile")
-    results = benchmark(args, benchmark_cmd, "pytorch-2")
-    all_results.extend(results)
+    if args.hf_pt_compile:
+        benchmark_cmd = [
+            "python",
+            "-m",
+            "models.llama.benchmark",
+            "--benchmark-type",
+            "hf-pt-compile",
+            "--model-name",
+            args.model_name,
+            "--precision",
+            args.precision,
+            "--batch-sizes",
+            args.batch_sizes,
+            "--sequence-lengths",
+            args.sequence_lengths,
+            "--device",
+            args.device,
+            "--warmup-runs",
+            str(args.warmup_runs),
+            "--num-runs",
+            str(args.num_runs),
+            "--log-folder",
+            args.log_folder,
+            "--auth",
+        ]
+        logger.info("Benchmark PyTorch with torch.compile")
+        results = benchmark(args, benchmark_cmd, "pytorch-compile")
+        all_results.extend(results)
 
     # Benchmark Optimum + ONNX Runtime
-    if args.hf_ort_model_path:
+    if args.hf_ort_dir_path:
         benchmark_cmd = [
-            "python3",
-            "benchmark.py",
+            "python",
+            "-m",
+            "models.llama.benchmark",
             "--benchmark-type",
             "hf-ort",
-            "--hf-ort-model-path",
-            args.hf_ort_model_path,
+            "--hf-ort-dir-path",
+            args.hf_ort_dir_path,
             "--model-name",
             args.model_name,
             "--precision",
@@ -305,8 +329,6 @@ def main():
             args.sequence_lengths,
             "--device",
             args.device,
-            "--device-id",
-            str(args.device_id),
             "--warmup-runs",
             str(args.warmup_runs),
             "--num-runs",
@@ -316,18 +338,50 @@ def main():
             "--auth",
         ]
         logger.info("Benchmark Optimum + ONNX Runtime")
-        results = benchmark(args, benchmark_cmd, "pytorch-ort")
+        results = benchmark(args, benchmark_cmd, "optimum-ort")
+        all_results.extend(results)
+
+    # Benchmark Microsoft model in ONNX Runtime
+    if args.ort_msft_model_path:
+        benchmark_cmd = [
+            "python",
+            "-m",
+            "models.llama.benchmark",
+            "--benchmark-type",
+            "ort-msft",
+            "--ort-model-path",
+            args.ort_msft_model_path,
+            "--model-name",
+            args.model_name,
+            "--precision",
+            args.precision,
+            "--batch-sizes",
+            args.batch_sizes,
+            "--sequence-lengths",
+            args.sequence_lengths,
+            "--device",
+            args.device,
+            "--warmup-runs",
+            str(args.warmup_runs),
+            "--num-runs",
+            str(args.num_runs),
+            "--log-folder",
+            args.log_folder,
+        ]
+        logger.info("Benchmark Microsoft model in ONNX Runtime")
+        results = benchmark(args, benchmark_cmd, "ort-msft")
         all_results.extend(results)
 
-    # Benchmark ONNX Runtime
-    if args.ort_model_path:
+    # Benchmark convert_to_onnx model in ONNX Runtime
+    if args.ort_convert_to_onnx_model_path:
         benchmark_cmd = [
-            "python3",
-            "benchmark.py",
+            "python",
+            "-m",
+            "models.llama.benchmark",
             "--benchmark-type",
-            "ort",
+            "ort-convert-to-onnx",
             "--ort-model-path",
-            args.ort_model_path,
+            args.ort_convert_to_onnx_model_path,
             "--model-name",
             args.model_name,
             "--precision",
@@ -338,8 +392,6 @@ def main():
             args.sequence_lengths,
             "--device",
             args.device,
-            "--device-id",
-            str(args.device_id),
             "--warmup-runs",
             str(args.warmup_runs),
             "--num-runs",
@@ -347,7 +399,7 @@ def main():
             "--log-folder",
             args.log_folder,
         ]
-        logger.info("Benchmark ONNX Runtime")
+        logger.info("Benchmark convert_to_onnx model in ONNX Runtime")
         results = benchmark(args, benchmark_cmd, "onnxruntime")
         all_results.extend(results)
 
diff --git a/onnxruntime/python/tools/transformers/models/llama/convert_70b_model.sh b/onnxruntime/python/tools/transformers/models/llama/convert_70b_model.sh
new file mode 100644
index 0000000000000..637d15c10e0c7
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/models/llama/convert_70b_model.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+NUM_GPUS=${1:-1}
+
+MPI="mpirun --allow-run-as-root
+    -mca btl_openib_warn_no_device_params_found 0 -mca pml ob1 -mca btl ^openib -mca btl_tcp_if_include eth0
+    --tag-output --npernode $NUM_GPUS --bind-to numa
+    -x MIOPEN_FIND_MODE=1"
+
+CMD="$MPI python convert_to_onnx.py ${@:2}"
+
+$CMD
\ No newline at end of file
diff --git a/onnxruntime/python/tools/transformers/models/llama/convert_to_onnx.py b/onnxruntime/python/tools/transformers/models/llama/convert_to_onnx.py
index f96347ba67aa6..c9c7f4d39d423 100644
--- a/onnxruntime/python/tools/transformers/models/llama/convert_to_onnx.py
+++ b/onnxruntime/python/tools/transformers/models/llama/convert_to_onnx.py
@@ -1,21 +1,28 @@
 import argparse
 import logging
 import os
-import tempfile
+import shutil
 from itertools import chain
 from typing import List
 
 import onnx
 import torch
 from benchmark_helper import Precision, prepare_environment, setup_logger
-from llama_inputs import get_sample_inputs, get_sample_with_past_kv_inputs
+from convert_generation import replace_mha_with_gqa
+from dist_settings import barrier, get_rank, get_size, init_dist
+from llama_inputs import get_merged_sample_with_past_kv_inputs, get_sample_inputs, get_sample_with_past_kv_inputs
 from llama_parity import main as parity_check
+from llama_torch import setup_torch_model
 from onnx_model import OnnxModel
-from transformers import LlamaConfig, LlamaForCausalLM
+from optimizer import optimize_model
+from packaging import version
+from transformers import AutoConfig, AutoModelForCausalLM
 
 from onnxruntime import quantization as ort_quantization
+from onnxruntime.quantization.matmul_4bits_quantizer import MatMul4BitsQuantizer
 
 logger = logging.getLogger("")
+init_dist()
 
 
 def get_model_dynamic_axes(input_names: List[str], output_names: List[str]):
@@ -58,6 +65,33 @@ def get_model_with_past_kv_dynamic_axes(input_names: List[str], output_names: Li
     return dynamic_axes
 
 
+def get_merged_model_dynamic_axes(input_names: List[str], output_names: List[str]):
+    dynamic_axes = {}
+    for name in input_names + output_names:
+        if name in {"input_ids", "position_ids"}:
+            # shape is (batch_size, sequence_length)
+            dynamic_axes[name] = {0: "batch_size", 1: "sequence_length"}
+        elif name == "attention_mask":
+            # shape is (batch_size, past_sequence_length + sequence_length) = (batch_size, total_sequence_length)
+            # for prompt generation, past_sequence_length = 0
+            # for token generation, sequence_length = 1
+            dynamic_axes[name] = {0: "batch_size", 1: "total_sequence_length"}
+        elif "past" in name:
+            # shape is (batch_size, num_heads, past_sequence_length, head_size)
+            dynamic_axes[name] = {0: "batch_size", 2: "past_sequence_length"}
+        elif name == "logits":
+            # shape is (batch_size, sequence_length, vocab_size)
+            dynamic_axes[name] = {0: "batch_size", 1: "sequence_length"}
+        elif "present" in name:
+            # shape is (batch_size, num_heads, past_sequence_length + sequence_length, head_size) = (batch_size, num_heads, total_sequence_length, head_size)
+            # for prompt generation, past_sequence_length = 0
+            # for token generation, sequence_length = 1
+            dynamic_axes[name] = {0: "batch_size", 2: "total_sequence_length"}
+        else:
+            raise Exception("Unknown input or output name found")
+    return dynamic_axes
+
+
 def save_onnx_model(onnx_model: onnx.ModelProto, output_path: str, data_path: str):
     onnx.save(
         onnx_model,
@@ -98,7 +132,9 @@ def save_onnx_model(onnx_model: onnx.ModelProto, output_path: str, data_path: st
 # del onnx_model
 # temp_dir.cleanup()
 #
-def run_dynamo_export(args: argparse.Namespace, l_config: LlamaConfig, llama: LlamaForCausalLM):
+def run_dynamo_export(
+    args: argparse.Namespace, l_config: AutoConfig, llama: AutoModelForCausalLM, rank: int = 0, world_size: int = 1
+):
     from torch._dynamo import config
 
     config.capture_scalar_outputs = True
@@ -119,9 +155,9 @@ def run_dynamo_export(args: argparse.Namespace, l_config: LlamaConfig, llama: Ll
     onnx.checker.check_model(temp_path)
     onnx.shape_inference.infer_shapes_path(temp_path)
 
-    output_path = os.path.join(args.output, f"{args.model_name}_decoder_model_fp32.onnx")
+    output_path = os.path.join(args.output, f"rank_{rank}_{args.model_name}_decoder_model_fp32.onnx")
     onnx_model = onnx.load_model(temp_path, load_external_data=True)
-    save_onnx_model(onnx_model, output_path, f"{args.model_name}_decoder_model_fp32.onnx.data")
+    save_onnx_model(onnx_model, output_path, f"rank_{rank}_{args.model_name}_decoder_model_fp32.onnx.data")
     del onnx_model
     os.system(
         f"rm {os.path.join(temp_dir, 'model.*')} && rm {os.path.join(temp_dir, '*.weight')} && rm {temp_path}"
@@ -129,7 +165,7 @@ def run_dynamo_export(args: argparse.Namespace, l_config: LlamaConfig, llama: Ll
 
     # Export decoder_with_past_model.onnx
     input_ids, attn_mask, pos_ids, past_kv = get_sample_with_past_kv_inputs(
-        l_config, device, batch_size, sequence_length
+        l_config, device, batch_size, sequence_length, world_size=world_size
     )
     temp_dir = args.output  # tempfile.TemporaryDirectory()
     temp_path = os.path.join(temp_dir, "temp.onnx")  # os.path.join(temp_dir.name, "temp.onnx")
@@ -141,9 +177,9 @@ def run_dynamo_export(args: argparse.Namespace, l_config: LlamaConfig, llama: Ll
     onnx.checker.check_model(temp_path)
     onnx.shape_inference.infer_shapes_path(temp_path)
 
-    output_path = os.path.join(args.output, f"{args.model_name}_decoder_with_past_model_fp32.onnx")
+    output_path = os.path.join(args.output, f"rank_{rank}_{args.model_name}_decoder_with_past_model_fp32.onnx")
     onnx_model = onnx.load_model(temp_path, load_external_data=True)
-    save_onnx_model(onnx_model, output_path, f"{args.model_name}_decoder_with_past_model_fp32.onnx.data")
+    save_onnx_model(onnx_model, output_path, f"rank_{rank}_{args.model_name}_decoder_with_past_model_fp32.onnx.data")
     del onnx_model
     os.system(
         f"rm {os.path.join(temp_dir, 'model.*')} && rm {os.path.join(temp_dir, '*.weight')} && rm {temp_path}"
@@ -152,10 +188,21 @@ def run_dynamo_export(args: argparse.Namespace, l_config: LlamaConfig, llama: Ll
     logger.info(f"The {args.model_name} ONNX model has been successfully created with the Dynamo exporter!")
 
 
-def run_torchscript_export(args: argparse.Namespace, l_config: LlamaConfig, llama: LlamaForCausalLM):
+def _prepare_dir(dir_path):
+    if not os.path.exists(dir_path):
+        os.makedirs(dir_path)
+
+
+def run_torchscript_separate_export(
+    args: argparse.Namespace, l_config: AutoConfig, llama: AutoModelForCausalLM, rank: int = 0, world_size: int = 1
+):
     # Dummy values for export
     batch_size, sequence_length = 2, 8
-    device = torch.device("cpu")
+
+    # set device used to export model
+    # for llama-2-70b we will use current gpus to speed up export process
+    # for other models, we will use CPU to make sure we have enough memory to do export
+    device = llama.device if args.model_name == "Llama-2-70b-hf" else torch.device("cpu")
 
     # Export decoder_model.onnx
     decoder_inputs = get_sample_inputs(l_config, device, batch_size, sequence_length)
@@ -168,8 +215,12 @@ def run_torchscript_export(args: argparse.Namespace, l_config: LlamaConfig, llam
         ),
     ]
     dynamic_axes = get_model_dynamic_axes(input_names, output_names)
-    temp_dir = tempfile.TemporaryDirectory()
-    temp_path = os.path.join(temp_dir.name, "temp.onnx")
+
+    # Avoid using system temp dir to avoid overflood on hard disk as 70b model is very large.
+    # Use temp folder per rank to avoid race condition here.
+    temp_dir = f"./temp_{rank}"
+    _prepare_dir(temp_dir)
+    temp_path = os.path.join(temp_dir, "temp.onnx")
     torch.onnx.export(
         llama,
         args=decoder_inputs,
@@ -187,18 +238,25 @@ def run_torchscript_export(args: argparse.Namespace, l_config: LlamaConfig, llam
     onnx.checker.check_model(temp_path)
     onnx.shape_inference.infer_shapes_path(temp_path)
 
-    output_path = os.path.join(args.output, f"{args.model_name}_decoder_model_fp32.onnx")
+    output_path = os.path.join(args.output, f"rank_{rank}_{args.model_name}_decoder_model_fp32.onnx")
     onnx_model = onnx.load_model(temp_path, load_external_data=True)
     save_onnx_model(
         onnx_model,
         output_path,
-        f"{args.model_name}_decoder_model_fp32.onnx.data",
+        f"rank_{rank}_{args.model_name}_decoder_model_fp32.onnx.data",
     )
     del onnx_model
-    temp_dir.cleanup()
+    shutil.rmtree(temp_dir)
 
     # Export decoder_with_past_model.onnx
-    decoder_with_past_inputs = get_sample_with_past_kv_inputs(l_config, device, batch_size, sequence_length)
+    decoder_with_past_inputs = get_sample_with_past_kv_inputs(
+        l_config,
+        device,
+        batch_size,
+        sequence_length,
+        use_fp16=args.precision == Precision.FLOAT16,
+        world_size=world_size,
+    )
     input_names = [
         "input_ids",
         "attention_mask",
@@ -216,8 +274,12 @@ def run_torchscript_export(args: argparse.Namespace, l_config: LlamaConfig, llam
         ),
     ]
     dynamic_axes = get_model_with_past_kv_dynamic_axes(input_names, output_names)
-    temp_dir = tempfile.TemporaryDirectory()
-    temp_path = os.path.join(temp_dir.name, "temp.onnx")
+
+    # Avoid using system temp dir to avoid overflood on hard disk as 70b model is very large.
+    # Use temp folder per rank to avoid race condition here.
+    temp_dir = f"./temp_past_{rank}"
+    _prepare_dir(temp_dir)
+    temp_path = os.path.join(temp_dir, "temp.onnx")
     torch.onnx.export(
         llama,
         args=decoder_with_past_inputs,
@@ -235,17 +297,238 @@ def run_torchscript_export(args: argparse.Namespace, l_config: LlamaConfig, llam
     onnx.checker.check_model(temp_path)
     onnx.shape_inference.infer_shapes_path(temp_path)
 
-    output_path = os.path.join(args.output, f"{args.model_name}_decoder_with_past_model_fp32.onnx")
+    output_path = os.path.join(args.output, f"rank_{rank}_{args.model_name}_decoder_with_past_model_fp32.onnx")
+    onnx_model = onnx.load_model(temp_path, load_external_data=True)
+    save_onnx_model(
+        onnx_model,
+        output_path,
+        f"rank_{rank}_{args.model_name}_decoder_with_past_model_fp32.onnx.data",
+    )
+    del onnx_model
+    shutil.rmtree(temp_dir)
+
+    logger.info(
+        f"The {args.model_name} separate ONNX model has been successfully created with the TorchScript exporter!"
+    )
+
+
+def run_torchscript_merged_export(
+    args: argparse.Namespace, l_config: AutoConfig, llama: AutoModelForCausalLM, rank: int = 0, world_size: int = 1
+):
+    # Dummy values for export
+    batch_size, sequence_length, past_sequence_length = 2, 8, 0
+
+    # set device used to export model
+    # for llama-2-70b we will use current gpus to speed up export process
+    # for other models, we will use CPU to make sure we have enough memory to do export
+    device = llama.device if args.model_name == "Llama-2-70b-hf" else torch.device("cpu")
+
+    temp_name = args.model_name.lower().replace("-", "").replace("_", "")
+    max_sequence_length = 16384 if "codellama" in temp_name else 4096 if "llama2" in temp_name else 2048
+
+    # Export decoder_merged_model.onnx
+    decoder_merged_inputs = get_merged_sample_with_past_kv_inputs(
+        l_config,
+        device,
+        batch_size,
+        sequence_length,
+        past_sequence_length,
+        max_seq_len=max_sequence_length,
+        use_fp16=args.precision == Precision.FLOAT16,
+        world_size=world_size,
+    )
+    input_names = [
+        "input_ids",
+        "attention_mask",
+        "position_ids",
+        *list(
+            chain.from_iterable(
+                (f"past_key_values.{i}.key", f"past_key_values.{i}.value") for i in range(l_config.num_hidden_layers)
+            )
+        ),
+    ]
+    output_names = [
+        "logits",
+        *list(
+            chain.from_iterable((f"present.{i}.key", f"present.{i}.value") for i in range(l_config.num_hidden_layers))
+        ),
+    ]
+    dynamic_axes = get_merged_model_dynamic_axes(input_names, output_names)
+
+    # Avoid using system temp dir to avoid overflood on hard disk as 70b model is very large.
+    # Use temp folder per rank to avoid race condition here.
+    temp_dir = f"./temp_{rank}"
+    _prepare_dir(temp_dir)
+    temp_path = os.path.join(temp_dir, "temp.onnx")
+    torch.onnx.export(
+        llama,
+        args=decoder_merged_inputs,
+        f=temp_path,
+        export_params=True,
+        input_names=input_names,
+        output_names=output_names,
+        dynamic_axes=dynamic_axes,
+        opset_version=13,
+        do_constant_folding=True,
+        verbose=args.verbose,
+    )
+
+    # Check decoder_merged_model.onnx and save all external data to one file
+    onnx.checker.check_model(temp_path)
+    onnx.shape_inference.infer_shapes_path(temp_path)
+
+    output_path = os.path.join(args.output, f"rank_{rank}_{args.model_name}_decoder_merged_model_fp32.onnx")
     onnx_model = onnx.load_model(temp_path, load_external_data=True)
     save_onnx_model(
         onnx_model,
         output_path,
-        f"{args.model_name}_decoder_with_past_model_fp32.onnx.data",
+        f"rank_{rank}_{args.model_name}_decoder_merged_model_fp32.onnx.data",
     )
     del onnx_model
-    temp_dir.cleanup()
+    shutil.rmtree(temp_dir)
+
+    logger.info(f"The {args.model_name} merged ONNX model has been successfully created with the TorchScript exporter!")
 
-    logger.info(f"The {args.model_name} ONNX model has been successfully created with the TorchScript exporter!")
+
+# Optimize the model as FP32
+def optimize_export(config: AutoConfig, input_path: str, output_path: str):
+    from fusion_options import FusionOptions
+
+    optimization_options = FusionOptions("gpt2")
+
+    model_opt = optimize_model(
+        input_path,
+        model_type="gpt2",
+        num_heads=config.num_attention_heads,
+        hidden_size=config.hidden_size,
+        opt_level=0,
+        optimization_options=optimization_options,
+        only_onnxruntime=False,
+    )
+    model_opt.save_model_to_file(output_path, use_external_data_format=True)
+    logger.info(f"The ONNX model at {input_path} has been successfully optimized and saved at {output_path}!")
+    remove_existing_model(input_path)
+
+
+def convert_to_float16(
+    args: argparse.Namespace, config: AutoConfig, old_paths: List[str], rank: int = 0, world_size: int = 1
+):
+    decoder_model_fp16_path = os.path.join(args.output, f"rank_{rank}_{args.model_name}_decoder_model_fp16.onnx")
+    decoder_with_past_model_fp16_path = os.path.join(
+        args.output, f"rank_{rank}_{args.model_name}_decoder_with_past_model_fp16.onnx"
+    )
+    decoder_merged_model_fp16_path = os.path.join(
+        args.output, f"rank_{rank}_{args.model_name}_decoder_merged_model_fp16.onnx"
+    )
+    new_paths = [decoder_model_fp16_path, decoder_with_past_model_fp16_path, decoder_merged_model_fp16_path]
+
+    logger.info("Converting to float16...")
+    for fp32_path, fp16_path in zip(old_paths, new_paths):
+        if os.path.exists(fp32_path):
+            model = OnnxModel(onnx.load_model(fp32_path, load_external_data=True))
+            model.convert_float_to_float16(keep_io_types=False)
+            if args.use_gqa:
+                model = use_group_query_attention(config, model, world_size)
+            model.save_model_to_file(fp16_path, use_external_data_format=True)
+            del model
+            logger.info(f"The ONNX model at {fp32_path} has been converted to float16 and saved at {fp16_path}!")
+            remove_existing_model(fp32_path)
+
+    logger.info(f"The {args.model_name} ONNX model has been successfully converted to float16!")
+    return new_paths
+
+
+def use_group_query_attention(config: AutoConfig, fp16_model_opt: OnnxModel, world_size: int = 1):
+    # Replace MultiHeadAttention with GroupQueryAttention
+    fp16_model_opt = replace_mha_with_gqa(fp16_model_opt, "attention_mask", config.num_key_value_heads, world_size)
+    fp16_model_opt.prune_graph()
+    fp16_model_opt.update_graph(allow_remove_graph_inputs=True)
+    return fp16_model_opt
+
+
+def smooth_quant(
+    args: argparse.Namespace,
+    decoder_model_fp32_path: str,
+    decoder_with_past_model_fp32_path: str,
+    decoder_model_int8_path: str,
+    decoder_with_past_model_int8_path: str,
+):
+    from neural_compressor import PostTrainingQuantConfig
+    from neural_compressor import quantization as intel_quantization
+    from neural_compressor import set_workspace
+    from onnx.external_data_helper import load_external_data_for_model
+    from quant_kv_dataloader import QuantKVDataLoader
+
+    set_workspace(args.nc_workspace)
+    quantization_config = PostTrainingQuantConfig(
+        calibration_sampling_size=[args.calibration_sampling_size],
+        recipes={
+            "optypes_to_exclude_output_quant": ["MatMul"],
+            "smooth_quant": True,
+            "smooth_quant_args": {"alpha": args.smooth_quant_alpha},
+        },
+        op_type_dict={
+            "^((?!(MatMul|Gather|Conv)).)*$": {
+                "weight": {"dtype": ["fp32"]},
+                "activation": {"dtype": ["fp32"]},
+            }
+        },
+    )
+
+    # Convert decoder_model.onnx to INT8
+    decoder_model_int8 = intel_quantization.fit(
+        decoder_model_fp32_path,
+        quantization_config,
+        calib_dataloader=QuantKVDataLoader(args),
+    )
+    load_external_data_for_model(
+        decoder_model_int8._model,
+        os.path.split(decoder_model_int8._model_path)[0],
+    )
+    save_onnx_model(
+        decoder_model_int8._model,
+        decoder_model_int8_path,
+        f"{args.model_name}_decoder_model_int8.onnx.data",
+    )
+    del decoder_model_int8
+    logger.info(
+        f"The ONNX model at {decoder_model_fp32_path} has been quantized to int8 and saved at {decoder_model_int8_path}!"
+    )
+    remove_existing_model(decoder_model_fp32_path)
+
+    # Convert decoder_with_past_model.onnx to INT8
+    decoder_with_past_model_int8 = intel_quantization.fit(
+        decoder_with_past_model_fp32_path,
+        quantization_config,
+        calib_dataloader=QuantKVDataLoader(args, onnx_model_path=decoder_model_fp32_path),
+    )
+    load_external_data_for_model(
+        decoder_with_past_model_int8._model,
+        os.path.split(decoder_with_past_model_int8._model_path)[0],
+    )
+    save_onnx_model(
+        decoder_with_past_model_int8._model,
+        decoder_with_past_model_int8_path,
+        f"{args.model_name}_decoder_with_past_model_int8.onnx.data",
+    )
+    del decoder_with_past_model_int8
+    logger.info(
+        f"The ONNX model at {decoder_with_past_model_fp32_path} has been quantized to int8 and saved at {decoder_with_past_model_int8_path}!"
+    )
+    remove_existing_model(decoder_with_past_model_fp32_path)
+
+    logger.info(f"The {args.model_name} ONNX model has been successfully quantized to int8!")
+
+    logger.warning(f"Removing {args.nc_workspace}")
+    shutil.rmtree(args.nc_workspace)
+
+
+def remove_existing_model(model_path: str):
+    # Remove ONNX model and its external data
+    data_path = os.path.join(model_path + ".data")
+    os.remove(model_path)
+    os.remove(data_path)
+    logger.warning(f"Removed {model_path} and {data_path}")
 
 
 def remove_existing_files(output_path: str):
@@ -253,7 +536,7 @@ def remove_existing_files(output_path: str):
         filepath = os.path.join(output_path, filename)
         if ".onnx" in filename or ".onnx.data" in filename:
             os.remove(filepath)
-            logger.warning(f"Removing {filepath}")
+            logger.warning(f"Removed {filepath}")
 
 
 def get_args():
@@ -288,7 +571,7 @@ def get_args():
         required=False,
         type=Precision,
         default=Precision.FLOAT32,
-        choices=[Precision.FLOAT32, Precision.FLOAT16, Precision.INT8],
+        choices=[Precision.FLOAT32, Precision.FLOAT16, Precision.INT8, Precision.INT4],
         help="Precision to export model in",
     )
 
@@ -301,15 +584,50 @@ def get_args():
         help="Execution provider to verify parity with",
     )
 
+    parser.add_argument(
+        "-r",
+        "--reexport",
+        required=False,
+        action="store_true",
+        help="Re-export models and overwrite existing models in output folder",
+    )
+    parser.set_defaults(reexport=False)
+
+    parser.add_argument(
+        "--use_gqa",
+        required=False,
+        action="store_true",
+        help="Use GroupQueryAttention instead of MultiHeadAttention",
+    )
+    parser.set_defaults(use_gqa=False)
+
+    parser.add_argument(
+        "--no_merged",
+        required=False,
+        action="store_true",
+        help="Export models into 2 ONNX files instead of 1. Deprecated in favor of exporting into 1 ONNX file.",
+    )
+    parser.set_defaults(no_merged=False)
+
     parser.add_argument(
         "-q",
         "--quantization_method",
         default="",
-        choices=["smooth_quant", "quantize_dynamic"],
-        help="Run a specific quantization algorithm. Need to install extra packages in `requirements-quant.txt` for SmoothQuant.",
+        choices=["blockwise", "smooth_quant", "quantize_dynamic"],
+        help="Run a specific quantization algorithm (blockwise for int4, smooth_quant for int8, quantize_dynamic for int8). Blockwise is recommended. Need to install extra packages in `requirements-quant.txt` for SmoothQuant.",
     )
 
-    smooth_quant_group = parser.add_argument_group("smooth_quant")
+    blockwise_group = parser.add_argument_group("4-bit quantization")
+
+    blockwise_group.add_argument(
+        "--block_size",
+        required=False,
+        default=32,
+        type=int,
+        help="Block size to quantize with. See https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py for details.",
+    )
+
+    smooth_quant_group = parser.add_argument_group("smooth_quant (8-bit quantization)")
 
     smooth_quant_group.add_argument(
         "--smooth_quant_alpha",
@@ -352,7 +670,7 @@ def get_args():
         help="Workspace to save intermediate files generated by Intel's Neural Compressor package.",
     )
 
-    quantize_dynamic_group = parser.add_argument_group("quantize_dynamic")
+    quantize_dynamic_group = parser.add_argument_group("quantize_dynamic (8-bit quantization)")
 
     quantize_dynamic_group.add_argument(
         "--quantize_embedding_layer",
@@ -394,182 +712,253 @@ def get_args():
     )
     parser.set_defaults(use_dynamo_export=False)
 
+    parser.add_argument(
+        "--cache_dir",
+        required=False,
+        type=str,
+        default="./model_cache",
+        help="model cache dir to override default HF cache dir to avoid overflood the /home dir",
+    )
+
     args = parser.parse_args()
     return args
 
 
 def main():
+    if version.parse(torch.__version__) < version.parse("2.2.0") and "2.2.0.dev" not in torch.__version__:
+        # Second predicate is for comparing nightly (ex: 2.2.0.dev20230920 vs 2.2.0) since first predicate is false
+        # in that scenario. It can be removed when torch v2.2.0 is released in stable.
+        logger.error(f"Detected PyTorch version {torch.__version__}. Please upgrade and use v2.2.0 or newer.")
+        return
+
     args = get_args()
     setup_logger(args.verbose)
     prepare_environment(args.input, args.output, args.execution_provider != "cpu")
-    remove_existing_files(args.output)
+    if args.reexport:
+        remove_existing_files(args.output)
     logger.info(f"Arguments: {args}")
 
+    world_size = get_size()
+    rank = get_rank()
+
     # Load model and config
     use_auth_token = args.input == os.path.join(".")
     setattr(args, "use_auth_token", use_auth_token)  # noqa: B010
-    l_config = LlamaConfig.from_pretrained(
-        args.model_name if use_auth_token else args.input, use_auth_token=use_auth_token
-    )
-    llama = LlamaForCausalLM.from_pretrained(
-        args.model_name if use_auth_token else args.input, use_auth_token=use_auth_token, use_cache=True
-    )
+
     original_model_name = args.model_name
     setattr(args, "original_model_name", original_model_name)  # noqa: B010
     args.model_name = args.model_name.split("/")[-1]
 
-    # Export to ONNX
-    if args.use_dynamo_export:
-        logger.warning("Please ensure you have installed PyTorch, ONNX, and ONNX Script as follows.")
-        logger.warning("Step 1 - PyTorch nightly: https://pytorch.org/get-started/locally/")
-        logger.warning("Step 2 - ONNX weekly: https://pypi.org/project/onnx-weekly/")
-        logger.warning(
-            "Step 3 - ONNX Script from source: https://github.com/microsoft/onnxscript#installing-onnx-script"
-        )
-        logger.warning(
-            "Note: After you install ONNX weekly, omit `onnx` when running the first line for installing ONNX Script. This is because you already installed `onnx-weekly` in the previous step."
-        )
-        run_dynamo_export(args, l_config, llama)
-    else:
-        run_torchscript_export(args, l_config, llama)
-
-    # Change precision of exported models if not FP32
-    decoder_model_fp32_path = os.path.join(args.output, f"{args.model_name}_decoder_model_fp32.onnx")
-    decoder_with_past_model_fp32_path = os.path.join(
-        args.output, f"{args.model_name}_decoder_with_past_model_fp32.onnx"
-    )
-
-    if args.precision == Precision.FLOAT16:
-        # Convert decoder_model.onnx to FP16
-        decoder_model_fp16_path = os.path.join(args.output, f"{args.model_name}_decoder_model_fp16.onnx")
-        model = OnnxModel(onnx.load_model(decoder_model_fp32_path, load_external_data=True))
-        model.convert_float_to_float16(keep_io_types=False, op_block_list=["If"])
-        model.save_model_to_file(decoder_model_fp16_path, use_external_data_format=True, all_tensors_to_one_file=True)
-        del model
-
-        # Convert decoder_with_past_model.onnx to FP16
-        decoder_with_past_model_fp16_path = os.path.join(
-            args.output, f"{args.model_name}_decoder_with_past_model_fp16.onnx"
-        )
-        model = OnnxModel(onnx.load_model(decoder_with_past_model_fp32_path, load_external_data=True))
-        model.convert_float_to_float16(keep_io_types=False, op_block_list=["If"])
-        model.save_model_to_file(
-            decoder_with_past_model_fp16_path, use_external_data_format=True, all_tensors_to_one_file=True
-        )
-        del model
-
-    elif args.precision == Precision.INT8:
-        decoder_model_int8_path = os.path.join(args.output, f"{args.model_name}_decoder_model_int8.onnx")
-        decoder_with_past_model_int8_path = os.path.join(
-            args.output, f"{args.model_name}_decoder_with_past_model_int8.onnx"
-        )
-
-        if args.quantization_method == "smooth_quant":
-            from neural_compressor import PostTrainingQuantConfig
-            from neural_compressor import quantization as intel_quantization
-            from neural_compressor import set_workspace
-            from onnx.external_data_helper import load_external_data_for_model
-            from quant_kv_dataloader import QuantKVDataLoader
-
-            set_workspace(args.nc_workspace)
-            quantization_config = PostTrainingQuantConfig(
-                calibration_sampling_size=[args.calibration_sampling_size],
-                recipes={
-                    "optypes_to_exclude_output_quant": ["MatMul"],
-                    "smooth_quant": args.smooth_quant,
-                    "smooth_quant_args": {"alpha": args.smooth_quant_alpha},
-                },
-                op_type_dict={
-                    "^((?!(MatMul|Gather|Conv)).)*$": {
-                        "weight": {"dtype": ["fp32"]},
-                        "activation": {"dtype": ["fp32"]},
-                    }
-                },
-            )
+    setattr(args, "device_name", "cpu" if args.execution_provider == "cpu" else f"cuda:{rank}")  # noqa: B010
+    setattr(args, "device", torch.device(args.device_name))  # noqa: B010
+
+    location = args.original_model_name if use_auth_token else args.input
 
-            # Convert decoder_model.onnx to INT8
-            decoder_model_int8 = intel_quantization.fit(
-                decoder_model_fp32_path,
-                quantization_config,
-                calib_dataloader=QuantKVDataLoader(args),
+    # Use CUDA for LLaMA-2-70B to speed up export and CPU for other models
+    l_config, llama = setup_torch_model(
+        args, location, use_auth_token, device=args.device if args.model_name == "Llama-2-70b-hf" else None
+    )
+
+    assert l_config.num_attention_heads % world_size == 0 and l_config.num_key_value_heads % world_size == 0
+
+    barrier()
+    for i in range(world_size):
+        if i == rank:
+            # Set model paths for FP32 model
+            decoder_model_fp32_path = os.path.join(
+                args.output, f"rank_{rank}_{args.model_name}_decoder_model_fp32.onnx"
             )
-            load_external_data_for_model(
-                decoder_model_int8._model,
-                os.path.split(decoder_model_int8._model_path)[0],
+            decoder_with_past_model_fp32_path = os.path.join(
+                args.output, f"rank_{rank}_{args.model_name}_decoder_with_past_model_fp32.onnx"
             )
-            save_onnx_model(
-                decoder_model_int8._model,
-                decoder_model_int8_path,
-                f"{args.model_name}_decoder_model_int8.onnx.data",
+            decoder_merged_model_fp32_path = os.path.join(
+                args.output, f"rank_{rank}_{args.model_name}_decoder_merged_model_fp32.onnx"
             )
-            del decoder_model_int8
+            old_paths = [decoder_model_fp32_path, decoder_with_past_model_fp32_path, decoder_merged_model_fp32_path]
 
-            # Convert decoder_with_past_model.onnx to INT8
-            decoder_with_past_model_int8 = intel_quantization.fit(
-                decoder_with_past_model_fp32_path,
-                quantization_config,
-                calib_dataloader=QuantKVDataLoader(args, onnx_model_path=decoder_model_fp32_path),
+            missing_separate_exports = (
+                args.no_merged
+                and not os.path.exists(decoder_model_fp32_path)
+                and not os.path.exists(decoder_with_past_model_fp32_path)
             )
-            load_external_data_for_model(
-                decoder_with_past_model_int8._model,
-                os.path.split(decoder_with_past_model_int8._model_path)[0],
+            missing_merged_export = not args.no_merged and not os.path.exists(decoder_merged_model_fp32_path)
+
+            # Export to ONNX
+            if missing_separate_exports or missing_merged_export:
+                if args.use_dynamo_export and missing_separate_exports:
+                    logger.warning("Please ensure you have installed PyTorch, ONNX, and ONNX Script as follows.")
+                    logger.warning("Step 1 - PyTorch nightly: https://pytorch.org/get-started/locally/")
+                    logger.warning("Step 2 - ONNX weekly: https://pypi.org/project/onnx-weekly/")
+                    logger.warning(
+                        "Step 3 - ONNX Script from source: https://github.com/microsoft/onnxscript#installing-onnx-script"
+                    )
+                    logger.warning(
+                        "Note: After you install ONNX weekly, omit `onnx` when running the first line for installing ONNX Script. This is because you already installed `onnx-weekly` in the previous step."
+                    )
+                    run_dynamo_export(args, l_config, llama)
+                elif args.no_merged:
+                    run_torchscript_separate_export(args, l_config, llama, rank, world_size)
+                else:
+                    run_torchscript_merged_export(args, l_config, llama, rank, world_size)
+            del llama  # Delete LLaMA model from memory since it will be loaded again during parity check
+
+            # Set model paths to store FP32 optimized model
+            decoder_model_fp32_opt_path = os.path.join(
+                args.output, f"rank_{rank}_{args.model_name}_decoder_model_fp32_opt.onnx"
             )
-            save_onnx_model(
-                decoder_with_past_model_int8._model,
-                decoder_with_past_model_int8_path,
-                f"{args.model_name}_decoder_with_past_model_int8.onnx.data",
+            decoder_with_past_model_fp32_opt_path = os.path.join(
+                args.output, f"rank_{rank}_{args.model_name}_decoder_with_past_model_fp32_opt.onnx"
             )
-            del decoder_with_past_model_int8
-
-            logger.info(f"Removing {args.nc_workspace}")
-            os.system(f"rm -R {args.nc_workspace}")
-
-        elif args.quantization_method == "quantize_dynamic":
-            logger.warning(
-                "The `quantize_dynamic` method is deprecated in favor of `smooth_quant` instead. Precision loss may be high with `quantize_dynamic`."
+            decoder_merged_model_fp32_opt_path = os.path.join(
+                args.output, f"rank_{rank}_{args.model_name}_decoder_merged_model_fp32_opt.onnx"
             )
-
-            # Convert decoder_model.onnx to INT8
-            ort_quantization.quantize_dynamic(
-                decoder_model_fp32_path,
-                decoder_model_int8_path,
-                op_types_to_quantize=["MatMul", "Gemm", "Gather"]
-                if args.quantize_embedding_layer
-                else ["MatMul", "Gemm"],
-                per_channel=args.quantize_per_channel,
-                reduce_range=args.quantize_reduce_range,
-                use_external_data_format=True,
-                extra_options={"MatMulConstBOnly": True},
+            new_paths = [
+                decoder_model_fp32_opt_path,
+                decoder_with_past_model_fp32_opt_path,
+                decoder_merged_model_fp32_opt_path,
+            ]
+
+            # Run the optimizer script
+            logger.info("Optimizing models...")
+            for orig_path, opt_path in zip(old_paths, new_paths):
+                if os.path.exists(orig_path):
+                    optimize_export(l_config, input_path=orig_path, output_path=opt_path)
+
+            # Re-assign default FP32 model paths as their optimized versions
+            decoder_model_fp32_path = decoder_model_fp32_opt_path
+            decoder_with_past_model_fp32_path = decoder_with_past_model_fp32_opt_path
+            decoder_merged_model_fp32_path = decoder_merged_model_fp32_opt_path
+            old_paths = [decoder_model_fp32_path, decoder_with_past_model_fp32_path, decoder_merged_model_fp32_path]
+
+            logger.info(
+                f"The {args.model_name} ONNX model has been successfully optimized with the ORT transformer optimizer script!"
             )
 
-            # Convert decoder_with_past_model.onnx to INT8
-            ort_quantization.quantize_dynamic(
-                decoder_with_past_model_fp32_path,
-                decoder_with_past_model_int8_path,
-                op_types_to_quantize=["MatMul", "Gemm", "Gather"]
-                if args.quantize_embedding_layer
-                else ["MatMul", "Gemm"],
-                per_channel=args.quantize_per_channel,
-                reduce_range=args.quantize_reduce_range,
-                use_external_data_format=True,
-                extra_options={"MatMulConstBOnly": True},
-            )
+            # Change precision of exported models from FP32
+            if args.precision == Precision.FLOAT16:
+                new_paths = convert_to_float16(args, l_config, old_paths, rank, world_size)
+
+            elif args.precision == Precision.INT8:
+                decoder_model_int8_path = os.path.join(
+                    args.output, f"rank_{rank}_{args.model_name}_decoder_model_int8.onnx"
+                )
+                decoder_with_past_model_int8_path = os.path.join(
+                    args.output, f"rank_{rank}_{args.model_name}_decoder_with_past_model_int8.onnx"
+                )
+                decoder_merged_model_int8_path = os.path.join(
+                    args.output, f"rank_{rank}_{args.model_name}_decoder_merged_model_int8.onnx"
+                )
+                new_paths = [decoder_model_int8_path, decoder_with_past_model_int8_path, decoder_merged_model_int8_path]
+
+                if args.quantization_method == "smooth_quant":
+                    if not args.no_merged:
+                        logger.error("SmoothQuant must be used on separately exported models")
+                    else:
+                        logger.info(
+                            f"Quantizing {decoder_model_fp32_path} and {decoder_with_past_model_fp32_path} to int8"
+                        )
+                        smooth_quant(args, old_paths[0], old_paths[1], new_paths[0], new_paths[1])
+
+                elif args.quantization_method == "quantize_dynamic":
+                    logger.warning(
+                        "The `quantize_dynamic` method is deprecated in favor of `smooth_quant` instead. Precision loss may be high with `quantize_dynamic`."
+                    )
+
+                    logger.info("Quantizing to int8...")
+                    for fp32_path, int8_path in zip(old_paths, new_paths):
+                        if os.path.exists(fp32_path):
+                            ort_quantization.quantize_dynamic(
+                                fp32_path,
+                                int8_path,
+                                op_types_to_quantize=["MatMul", "Gemm", "Gather"]
+                                if args.quantize_embedding_layer
+                                else ["MatMul", "Gemm"],
+                                per_channel=args.quantize_per_channel,
+                                reduce_range=args.quantize_reduce_range,
+                                use_external_data_format=True,
+                                extra_options={"MatMulConstBOnly": True},
+                            )
+                            logger.info(
+                                f"The ONNX model at {fp32_path} has been quantized to int8 and saved at {int8_path}!"
+                            )
+                            remove_existing_model(decoder_model_fp32_path)
+
+                    logger.info(f"The {args.model_name} ONNX model has been successfully quantized to int8!")
+
+                else:
+                    raise Exception(f"Could not recognize {args.quantization_method} as a quantization method")
+
+            elif args.precision == Precision.INT4:
+                if args.execution_provider != "cpu":
+                    old_paths = convert_to_float16(args, l_config, old_paths, rank, world_size)
+
+                decoder_model_int4_path = os.path.join(
+                    args.output, f"rank_{rank}_{args.model_name}_decoder_model_int4.onnx"
+                )
+                decoder_with_past_model_int4_path = os.path.join(
+                    args.output, f"rank_{rank}_{args.model_name}_decoder_with_past_model_int4.onnx"
+                )
+                decoder_merged_model_int4_path = os.path.join(
+                    args.output, f"rank_{rank}_{args.model_name}_decoder_merged_model_int4.onnx"
+                )
+                new_paths = [decoder_model_int4_path, decoder_with_past_model_int4_path, decoder_merged_model_int4_path]
+
+                for fp_path, int4_path in zip(old_paths, new_paths):
+                    if os.path.exists(fp_path):
+                        model = onnx.load_model(fp_path, load_external_data=True)
+                        quant = MatMul4BitsQuantizer(model, args.block_size, is_symmetric=True, nodes_to_exclude=[])
+                        quant.process()
+                        quant.model.save_model_to_file(int4_path, use_external_data_format=True)
+                        del model
+                        del quant
+                        logger.info(f"The ONNX model at {fp_path} has been quantized to int4 and saved at {int4_path}!")
+                        remove_existing_model(fp_path)
+        barrier()
 
-        else:
-            raise Exception(f"Could not recognize {args.quantization_method} as a quantization method")
+    logger.info("Verifying parity on all ONNX models created")
+
+    # Use FP32 precision for FP32, INT8, INT4 CPU models, use FP16 precision for FP16 and INT4 GPU models
+    args.precision = (
+        "fp32"
+        if args.precision in {Precision.INT8, Precision.FLOAT32}
+        or (args.precision == Precision.INT4 and args.execution_provider == "cpu")
+        else "fp16"
+    )
 
     # Verify parity on all saved ONNX models
-    del llama  # Delete LLaMA model from memory since it will be loaded again during parity check
-    logger.info("Verifying parity on all ONNX models created")
     for filename in os.listdir(args.output):
-        if ".data" in filename or ".onnx" not in filename:
+        if (
+            ".data" in filename
+            or ".onnx" not in filename
+            or args.precision not in filename
+            or f"rank_{rank}" not in filename
+        ):
             continue
 
-        precision = filename[filename.rfind("_") + 1 : filename.find(".onnx")]
-        parity_cmd = ["-m", f"{original_model_name}", "-o", f"{os.path.join(args.output, filename)}", "-fp", precision]
+        parity_cmd = [
+            "-m",
+            original_model_name,
+            "-o",
+            os.path.join(args.output, filename),
+            "-ep",
+            args.execution_provider,
+            "-fp",
+            args.precision,
+            "--cache_dir",
+            args.cache_dir,
+        ]
         if "with_past" in filename:
             parity_cmd.append("--use_past_kv")
-        parity_check(parity_cmd)
+        if "merged" in filename:
+            parity_cmd.append("--merged")
+        if args.use_gqa:
+            parity_cmd.append("--use_gqa")
+
+        try:
+            logger.debug(f"check parity with cmd: {parity_cmd}")
+            parity_check(parity_cmd)
+        except Exception as e:
+            logger.warning(f"An error occurred while verifying parity: {e}", exc_info=True)
 
 
 if __name__ == "__main__":
diff --git a/onnxruntime/python/tools/transformers/models/llama/dist_settings.py b/onnxruntime/python/tools/transformers/models/llama/dist_settings.py
new file mode 100644
index 0000000000000..72192ce8d8c63
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/models/llama/dist_settings.py
@@ -0,0 +1,52 @@
+import os
+
+import torch.distributed as dist
+
+
+def init_dist():
+    if "LOCAL_RANK" in os.environ:
+        int(os.environ["LOCAL_RANK"])
+        rank = int(os.environ["RANK"])
+        world_size = int(os.environ["WORLD_SIZE"])
+
+        dist.init_process_group("nccl", init_method="tcp://127.0.0.1:7645", world_size=world_size, rank=rank)
+    elif "OMPI_COMM_WORLD_LOCAL_RANK" in os.environ:
+        int(os.environ.get("OMPI_COMM_WORLD_LOCAL_RANK", 0))
+        rank = int(os.environ.get("OMPI_COMM_WORLD_RANK", 0))
+        world_size = int(os.environ.get("OMPI_COMM_WORLD_SIZE", 1))
+
+        dist.init_process_group("nccl", init_method="tcp://127.0.0.1:7647", world_size=world_size, rank=rank)
+    else:
+        # don't need to do init for single process
+        pass
+
+
+def _get_comm():
+    try:
+        from mpi4py import MPI
+
+        comm = MPI.COMM_WORLD
+        return comm
+    except ImportError:
+        return None
+
+
+def get_rank():
+    comm = _get_comm()
+    return comm.Get_rank() if comm is not None else 0
+
+
+def get_size():
+    comm = _get_comm()
+    return comm.Get_size() if comm is not None else 1
+
+
+def barrier():
+    comm = _get_comm()
+    if comm is not None:
+        comm.Barrier()
+
+
+def print_out(*args):
+    if get_rank() == 0:
+        print(*args)
diff --git a/onnxruntime/python/tools/transformers/models/llama/llama_inputs.py b/onnxruntime/python/tools/transformers/models/llama/llama_inputs.py
index 6a28498a9ffc9..bae1ae82e8f7e 100644
--- a/onnxruntime/python/tools/transformers/models/llama/llama_inputs.py
+++ b/onnxruntime/python/tools/transformers/models/llama/llama_inputs.py
@@ -2,29 +2,46 @@
 
 import numpy as np
 import torch
-from transformers import LlamaConfig
+from transformers import AutoConfig
+
+from onnxruntime import InferenceSession, OrtValue
 
 
 # Get position_ids from attention_mask
 def get_position_ids(attention_mask: torch.Tensor, use_past_kv: bool):
     position_ids = attention_mask.long().cumsum(-1) - 1
+    position_ids.masked_fill_(attention_mask == 0, 1)
     if use_past_kv:
+        # Shape: (batch_size, 1)
         position_ids = position_ids[:, -1].unsqueeze(-1)
+
+    # Shape: (batch_size, sequence_length)
     return position_ids
 
 
 # Inputs for first pass to get initial past_key_values
+#   input_ids: (batch_size, sequence_length)
+#   attention_mask: (batch_size, sequence_length)
+#   position_ids: (batch_size, sequence_length)
 def get_sample_inputs(
-    config: LlamaConfig, device: torch.device, batch_size: int, seq_len: int, return_dict: bool = False
+    config: AutoConfig,
+    device: torch.device,
+    batch_size: int,
+    seq_len: int,
+    engine: str = "pt",
+    return_dict: bool = False,
 ):
-    input_ids = torch.randint(
-        low=0, high=config.vocab_size, size=(batch_size, seq_len), device=device, dtype=torch.int64
-    )
-    attention_mask = torch.ones(batch_size, seq_len, device=device, dtype=torch.int64)
-    # position_ids is of shape (batch_size, seq_len)
+    input_ids = torch.randint(low=0, high=config.vocab_size, size=(batch_size, seq_len), dtype=torch.int64)
+    attention_mask = torch.ones(batch_size, seq_len, dtype=torch.int64)
     position_ids = get_position_ids(attention_mask, use_past_kv=False)
 
+    # Convert inputs to NumPy (for ORT) or send to device (for PyTorch)
+    input_ids = input_ids.numpy() if engine == "ort" else input_ids.to(device)
+    attention_mask = attention_mask.numpy() if engine == "ort" else attention_mask.to(device)
+    position_ids = position_ids.numpy() if engine == "ort" else position_ids.to(device)
+
     if not return_dict:
+        # For export
         return (input_ids, attention_mask, position_ids)
 
     inputs = {
@@ -36,84 +53,260 @@ def get_sample_inputs(
 
 
 # Inputs for subsequent passes with past_key_values
+#   input_ids: (batch_size, 1)
+#   attention_mask: (batch_size, past_sequence_length + 1)
+#   position_ids: (batch_size, 1)
+#   past_key: (batch_size, num_heads, past_sequence_length, head_size)
+#   past_value: (batch_size, num_heads, past_sequence_length, head_size)
 def get_sample_with_past_kv_inputs(
-    config: LlamaConfig,
+    config: AutoConfig,
     device: torch.device,
     batch_size: int,
     past_seq_len: int,
     use_fp16: bool = False,
+    engine: str = "pt",
     return_dict: bool = False,
+    world_size: int = 1,
 ):
-    input_ids = torch.randint(low=0, high=config.vocab_size, size=(batch_size, 1), device=device, dtype=torch.int64)
-    attention_mask = torch.ones(batch_size, past_seq_len + 1, device=device, dtype=torch.int64)
+    input_ids = torch.randint(low=0, high=config.vocab_size, size=(batch_size, 1), dtype=torch.int64)
+    attention_mask = torch.ones(batch_size, past_seq_len + 1, dtype=torch.int64)
     # position_ids is of shape (batch_size, 1)
     position_ids = get_position_ids(attention_mask, use_past_kv=True)
-    past_kv = get_sample_past_kv_inputs(config, device, batch_size, past_seq_len, use_fp16)
+    past_kv = get_past_kv_inputs(config, batch_size, past_seq_len, use_fp16, world_size=world_size)
+
+    # Convert inputs to NumPy (for ORT) or send to device (for PyTorch)
+    input_ids = input_ids.numpy() if engine == "ort" else input_ids.to(device)
+    attention_mask = attention_mask.numpy() if engine == "ort" else attention_mask.to(device)
+    position_ids = position_ids.numpy() if engine == "ort" else position_ids.to(device)
+    past_kv = (
+        flatten_past_kv_inputs(past_kv)
+        if engine == "ort"
+        else list(map(lambda kv: (kv[0].to(device), kv[1].to(device)), past_kv))
+    )
 
     if not return_dict:
+        # For export
+        assert isinstance(past_kv, list)
         return (input_ids, attention_mask, position_ids, past_kv)
 
     inputs = {
         "input_ids": input_ids,
         "attention_mask": attention_mask,
         "position_ids": position_ids,
-        "past_key_values": past_kv,
     }
+    if engine == "ort":
+        assert isinstance(past_kv, dict)
+        inputs.update(past_kv)
+    else:
+        assert isinstance(past_kv, list)
+        inputs["past_key_values"] = past_kv
+
     return inputs
 
 
-# Create past_key_values
-def get_sample_past_kv_inputs(
-    config: LlamaConfig, device: torch.device, batch_size: int, past_seq_len: int, use_fp16: bool
+# Inputs for all passes with past_key_values
+#   input_ids: (batch_size, sequence_length)
+#   attention_mask: (batch_size, past_sequence_length + sequence_length)
+#   position_ids: (batch_size, sequence_length)
+#   past_key: (batch_size, num_heads, kv_sequence_length, head_size)
+#      For models with GQA, kv_sequence_length = max_sequence_length
+#      For models without GQA, kv_sequence_length = past_sequence_length
+#   past_value: (batch_size, num_heads, kv_sequence_length, head_size)
+#      For models with GQA, kv_sequence_length = max_sequence_length
+#      For models without GQA, kv_sequence_length = past_sequence_length
+def get_merged_sample_with_past_kv_inputs(
+    config: AutoConfig,
+    device: torch.device,
+    batch_size: int,
+    seq_len: int,
+    past_seq_len: int,
+    max_seq_len: int,
+    use_fp16: bool = False,
+    use_gqa: bool = False,
+    engine: str = "pt",
+    return_dict: bool = False,
+    world_size: int = 1,
+):
+    input_ids = torch.randint(low=0, high=config.vocab_size, size=(batch_size, seq_len), dtype=torch.int64)
+    attention_mask = torch.ones(batch_size, past_seq_len + seq_len, dtype=torch.int64)
+    # position_ids is of shape (batch_size, seq_len) for prompt generation, (batch_size, 1) for token generation
+    position_ids = get_position_ids(attention_mask, use_past_kv=(past_seq_len != 0))
+    past_kv = get_past_kv_inputs(config, batch_size, past_seq_len, use_fp16, world_size=world_size)
+
+    # Convert inputs to NumPy (for ORT) or send to device (for PyTorch)
+    input_ids = input_ids.numpy() if engine == "ort" else input_ids.to(device)
+    attention_mask = attention_mask.numpy() if engine == "ort" else attention_mask.to(device)
+    position_ids = position_ids.numpy() if engine == "ort" else position_ids.to(device)
+    past_kv = (
+        flatten_past_kv_inputs(past_kv)
+        if engine == "ort"
+        else list(map(lambda kv: (kv[0].to(device), kv[1].to(device)), past_kv))
+    )
+
+    if not return_dict:
+        # For export
+        assert isinstance(past_kv, list)
+        return (input_ids, attention_mask, position_ids, past_kv)
+
+    inputs = {
+        "input_ids": input_ids,
+        "attention_mask": attention_mask,
+        "position_ids": position_ids,
+    }
+    if engine == "ort":
+        assert isinstance(past_kv, dict)
+        inputs.update(past_kv)
+
+        if use_gqa:
+            inputs = enable_past_present_share_buffer(inputs, past_seq_len, max_seq_len)
+
+    else:
+        assert isinstance(past_kv, list)
+        inputs["past_key_values"] = past_kv
+
+    return inputs
+
+
+# Inputs for Microsoft export from https://github.com/microsoft/Llama-2-Onnx
+def get_msft_sample_inputs(
+    config: AutoConfig,
+    batch_size: int,
+    past_seq_len: int,
+    seq_len: int,
+    max_seq_len: int,
+    use_fp16: bool,
+    use_gqa: bool,
+    split_kv: bool,
 ):
-    num_heads, head_size = config.num_attention_heads, config.hidden_size // config.num_attention_heads
+    np_dtype = np.float16 if use_fp16 else np.float32
+    head_size = config.hidden_size // config.num_attention_heads
+
+    if not split_kv:
+        ort_inputs = {
+            "x": np.random.rand(batch_size, seq_len, config.hidden_size).astype(np_dtype),
+            "attn_mask": (-10000.0 * np.triu(np.ones((batch_size, max_seq_len, max_seq_len)), k=1)).astype(np_dtype),
+            "k_cache": np.random.rand(
+                batch_size, config.num_hidden_layers, past_seq_len, config.num_attention_heads, head_size
+            ).astype(np_dtype),
+            "v_cache": np.random.rand(
+                batch_size, config.num_hidden_layers, past_seq_len, config.num_attention_heads, head_size
+            ).astype(np_dtype),
+            "pos": np.array(past_seq_len, dtype=np.int64),
+        }
+    else:
+        ort_inputs = {
+            "x": np.random.rand(batch_size, seq_len, config.hidden_size).astype(np_dtype),
+            "attn_mask": (np.triu(np.ones((batch_size, max_seq_len, max_seq_len), dtype=np.int32), k=1) - 1).astype(
+                np.int32
+            ),
+            "pos": np.array(past_seq_len, dtype=np.int64),
+        }
+        for i in range(config.num_hidden_layers):
+            ort_inputs.update(
+                {
+                    f"k_{i}_cache": np.random.rand(
+                        batch_size, config.num_attention_heads, past_seq_len, head_size
+                    ).astype(np_dtype),
+                    f"v_{i}_cache": np.random.rand(
+                        batch_size, config.num_attention_heads, past_seq_len, head_size
+                    ).astype(np_dtype),
+                }
+            )
+
+        if use_gqa:
+            ort_inputs = enable_past_present_share_buffer(ort_inputs, past_seq_len, max_seq_len)
+
+    return ort_inputs
+
+
+# Create past_key_values
+# Each is of shape (batch_size, num_heads, past_sequence_length, head_size)
+def get_past_kv_inputs(config: AutoConfig, batch_size: int, past_seq_len: int, use_fp16: bool, world_size: int = 1):
+    num_heads, head_size = config.num_key_value_heads // world_size, config.hidden_size // config.num_attention_heads
     torch_dtype = torch.float16 if use_fp16 else torch.float32
     past_kv = [
         (
-            torch.rand(batch_size, num_heads, past_seq_len, head_size, device=device, dtype=torch_dtype),
-            torch.rand(batch_size, num_heads, past_seq_len, head_size, device=device, dtype=torch_dtype),
+            torch.rand(batch_size, num_heads, past_seq_len, head_size, dtype=torch_dtype),
+            torch.rand(batch_size, num_heads, past_seq_len, head_size, dtype=torch_dtype),
         )
         for _ in range(config.num_hidden_layers)
     ]
     return past_kv
 
 
-# Convert list of past_kv to dict of past_key and past_value
-def flatten_past_kv_inputs(past_key_values: List[Tuple[torch.Tensor, torch.Tensor]], use_fp16: bool):
+# Convert list of past_key_values to dict of past_key and past_value
+def flatten_past_kv_inputs(past_key_values: List[Tuple[torch.Tensor, torch.Tensor]]):
     past_kv = {}
-    np_dtype = np.float16 if use_fp16 else np.float32
     for i, (past_k, past_v) in enumerate(past_key_values):
-        past_kv[f"past_key_values.{i}.key"] = past_k.detach().cpu().numpy().astype(np_dtype)
-        past_kv[f"past_key_values.{i}.value"] = past_v.detach().cpu().numpy().astype(np_dtype)
+        past_kv[f"past_key_values.{i}.key"] = past_k.detach().cpu().numpy()
+        past_kv[f"past_key_values.{i}.value"] = past_v.detach().cpu().numpy()
     return past_kv
 
 
 # Format PyTorch inputs to ONNX Runtime inputs
-def convert_inputs_for_ort(pt_inputs: dict, use_fp16: bool):
+def convert_inputs_for_ort(
+    pt_inputs: dict,
+    use_gqa: bool = False,
+    past_seq_len: int = 0,
+    max_seq_len: int = 2048,
+    device: str = "",
+    device_id: int = -1,
+):
     ort_inputs = {}
     for k, v in pt_inputs.items():
-        if k == "past_key_values":
-            ort_inputs.update(flatten_past_kv_inputs(v, use_fp16))
+        if isinstance(v, np.ndarray):
+            ort_inputs[k] = v
+        elif k == "past_key_values":
+            ort_inputs.update(flatten_past_kv_inputs(v))
         else:
             ort_inputs[k] = v.detach().cpu().numpy()
+
+    # Reshape KV caches if using past-present-share-buffer
+    if use_gqa and device != "" and device != "cpu" and device_id > -1:
+        ort_inputs = enable_past_present_share_buffer(ort_inputs, past_seq_len, max_seq_len)
+
     return ort_inputs
 
 
-# Inputs for Microsoft export from https://github.com/microsoft/Llama-2-Onnx
-def get_msft_sample_inputs(config: LlamaConfig, batch_size: int, past_seq_len: int, seq_len: int, use_fp16: bool):
-    np_dtype = np.float16 if use_fp16 else np.float32
-    head_size = config.hidden_size // config.num_attention_heads
-    max_seq_len = 2048
-
-    ort_inputs = {
-        "x": np.random.rand(batch_size, seq_len, config.hidden_size).astype(np_dtype),
-        "attn_mask": (-10000.0 * np.triu(np.ones((batch_size, max_seq_len, max_seq_len)), k=1)).astype(np_dtype),
-        "k_cache": np.random.rand(
-            batch_size, config.num_hidden_layers, past_seq_len, config.num_attention_heads, head_size
-        ).astype(np_dtype),
-        "v_cache": np.random.rand(
-            batch_size, config.num_hidden_layers, past_seq_len, config.num_attention_heads, head_size
-        ).astype(np_dtype),
-        "pos": np.array(past_seq_len, dtype=np.int64),
-    }
+def enable_past_present_share_buffer(ort_inputs: dict, past_seq_len: int, max_seq_len: int):
+    for k, v in ort_inputs.items():
+        # Allocate new buffers with max_sequence_length for GQA
+        if "cache" in k or "past_key_values" in k:
+            # Copy v (BxSxPxH) into new_v (BxSxMxH)
+            batch_size, num_heads, _, head_size = v.shape
+            new_v = np.zeros((batch_size, num_heads, max_seq_len, head_size), dtype=v.dtype)
+            new_v[:batch_size, :num_heads, :past_seq_len, :head_size] = v
+            ort_inputs[k] = new_v
     return ort_inputs
+
+
+# Add IO bindings for execution providers
+def add_io_bindings(
+    model: InferenceSession, ort_inputs: dict, device: str, device_id: int, use_gqa: bool, kv_cache_ortvalues: dict
+):
+    io_binding = model.io_binding()
+
+    for k, v in ort_inputs.items():
+        # Bind OrtValue inputs to device
+        if use_gqa and ("cache" in k or "past_key_values" in k):
+            if k not in kv_cache_ortvalues:
+                v_device = OrtValue.ortvalue_from_numpy(v, device_type=device, device_id=device_id)
+                io_binding.bind_ortvalue_input(k, v_device)
+                kv_cache_ortvalues[k] = v_device
+            else:
+                kv_cache_ortvalues[k].update_inplace(v)
+                io_binding.bind_ortvalue_input(k, kv_cache_ortvalues[k])
+        else:
+            v_device = OrtValue.ortvalue_from_numpy(v, device_type=device, device_id=device_id)
+            io_binding.bind_ortvalue_input(k, v_device)
+
+    for output in model.get_outputs():
+        name = output.name
+        if use_gqa and ("out" in name or "present" in name):
+            # Bind present KV cache outputs to past KV cache inputs in order to buffer share
+            input_name = name.replace("out", "cache").replace("present", "past_key_values")
+            io_binding.bind_ortvalue_output(name, kv_cache_ortvalues[input_name])
+        else:
+            io_binding.bind_output(name, device_type=device, device_id=device_id)
+
+    return io_binding, kv_cache_ortvalues
diff --git a/onnxruntime/python/tools/transformers/models/llama/llama_parity.py b/onnxruntime/python/tools/transformers/models/llama/llama_parity.py
index dadf394440c9a..418a65325c8f0 100644
--- a/onnxruntime/python/tools/transformers/models/llama/llama_parity.py
+++ b/onnxruntime/python/tools/transformers/models/llama/llama_parity.py
@@ -1,48 +1,142 @@
 import argparse
 import logging
 import os
+import time
 from typing import List
 
 import numpy as np
 import torch
-from benchmark_helper import create_onnxruntime_session, setup_logger
-from llama_inputs import convert_inputs_for_ort, get_sample_inputs, get_sample_with_past_kv_inputs
-from transformers import LlamaConfig, LlamaForCausalLM
+from benchmark_helper import setup_logger
+from dist_settings import get_rank, get_size
+from llama_inputs import (
+    add_io_bindings,
+    convert_inputs_for_ort,
+    get_merged_sample_with_past_kv_inputs,
+    get_sample_inputs,
+    get_sample_with_past_kv_inputs,
+)
+from llama_torch import setup_torch_model
+from transformers import AutoConfig, AutoModelForCausalLM
+
+import onnxruntime as ort
 
 logger = logging.getLogger("")
 
 
-def verify_parity(args: argparse.Namespace, config: LlamaConfig, pt_model: LlamaForCausalLM):
+def get_sequence_lengths(args: argparse.Namespace):
+    past_sequence_length, curr_sequence_length = (8, 1) if args.use_past_kv else (0, 8)
+    temp_name = args.model_name.lower().replace("-", "").replace("_", "")
+    max_sequence_length = 16384 if "codellama" in temp_name else 4096 if "llama2" in temp_name else 2048
+    return past_sequence_length, curr_sequence_length, max_sequence_length
+
+
+def get_inputs(args: argparse.Namespace, config: AutoConfig):
     # Dummy values for parity
-    batch_size, sequence_length = 2, 8
-    device = torch.device("cpu")
+    world_size = get_size()
+    batch_size = 2
+    past_sequence_length, sequence_length, max_sequence_length = get_sequence_lengths(args)
 
-    # Run inference with PyTorch
-    inputs = (
-        get_sample_inputs(config, device, batch_size, sequence_length, return_dict=True)
-        if not args.use_past_kv
-        else get_sample_with_past_kv_inputs(
-            config, device, batch_size, sequence_length, use_fp16=(args.precision == "fp16"), return_dict=True
+    if args.merged:
+        inputs = get_merged_sample_with_past_kv_inputs(
+            config,
+            args.device,
+            batch_size,
+            seq_len=sequence_length,
+            past_seq_len=past_sequence_length,
+            max_seq_len=max_sequence_length,
+            use_fp16=args.use_fp16,
+            use_gqa=args.use_gqa,
+            return_dict=True,
+            world_size=world_size,
         )
-    )
+    elif args.use_past_kv:
+        inputs = get_sample_with_past_kv_inputs(
+            config,
+            args.device,
+            batch_size,
+            sequence_length,
+            use_fp16=args.use_fp16,
+            return_dict=True,
+            world_size=world_size,
+        )
+    else:
+        inputs = get_sample_inputs(config, args.device, batch_size, sequence_length, return_dict=True)
+
+    return inputs
+
+
+def verify_parity(
+    args: argparse.Namespace, config: AutoConfig, pt_model: AutoModelForCausalLM, kv_cache_ortvalues: dict
+):
+    inputs = get_inputs(args, config)
+
+    # Run inference with PyTorch
+    if args.execution_provider != "cpu":
+        torch.cuda.synchronize()
+    start_time = time.time()
     pt_outputs = pt_model(**inputs).logits.detach().cpu().numpy()
+    if args.execution_provider != "cpu":
+        torch.cuda.synchronize()
+    end_time = time.time()
+    logger.info(f"PyTorch took {end_time - start_time} s")
+    del pt_model
 
     # Run inference with ORT
-    inputs = convert_inputs_for_ort(inputs, use_fp16=(args.precision == "fp16"))
-    ort_model = create_onnxruntime_session(
+    past_sequence_length, _, max_sequence_length = get_sequence_lengths(args)
+    inputs = convert_inputs_for_ort(
+        inputs,
+        use_gqa=args.use_gqa,
+        past_seq_len=past_sequence_length,
+        max_seq_len=max_sequence_length,
+        device=args.execution_provider,
+        device_id=int(args.rank),
+    )
+
+    ep = f"{args.execution_provider.upper()}ExecutionProvider"
+    if ep == "CUDAExecutionProvider":
+        ep = (ep, {"device_id": args.rank})
+    ort_model = ort.InferenceSession(
         args.onnx_model_path,
-        args.execution_provider != "cpu",  # use_gpu
-        provider=args.execution_provider,
-        verbose=args.verbose,
+        sess_options=ort.SessionOptions(),
+        providers=[ep],
     )
-    ort_outputs = ort_model.run(None, inputs)[0]
+
+    # Add IO bindings for non-CPU execution providers
+    if args.execution_provider != "cpu":
+        io_binding, kv_cache_ortvalues = add_io_bindings(
+            ort_model,
+            inputs,
+            args.execution_provider,
+            int(args.rank),
+            args.use_gqa,
+            kv_cache_ortvalues,
+        )
+
+        io_binding.synchronize_inputs()
+        start_time = time.time()
+        ort_model.run_with_iobinding(io_binding)
+        io_binding.synchronize_outputs()
+        end_time = time.time()
+
+        ort_outputs = io_binding.copy_outputs_to_cpu()[0]  # Get logits
+        del ort_model
+
+    else:
+        start_time = time.time()
+        ort_outputs = ort_model.run(None, inputs)
+        end_time = time.time()
+
+        ort_outputs = ort_outputs[0]  # Get logits
+
+    logger.info(f"ONNX Runtime took {end_time - start_time} s")
 
     # Compare PyTorch and ONNX Runtime accuracy
-    tol = 1e-3 if args.precision == "fp32" else 1e-2 if args.precision == "fp16" else 1e2
+    tol = 2e1 if "int4" in args.onnx_model_path or "int8" in args.onnx_model_path else 5e-1
     parity = np.allclose(pt_outputs, ort_outputs, rtol=tol, atol=tol)
     logger.warning(f"Are PyTorch and ONNX Runtime results close? {parity}")
     if not parity:
         logger.warning(f"Max diff: {np.max(pt_outputs - ort_outputs)}")
+    return kv_cache_ortvalues
 
 
 def get_args(argv: List[str]):
@@ -96,15 +190,45 @@ def get_args(argv: List[str]):
     )
     parser.set_defaults(use_past_kv=False)
 
+    parser.add_argument(
+        "-g",
+        "--use_gqa",
+        action="store_true",
+        help="Use if model has GroupQueryAttention",
+    )
+    parser.set_defaults(use_gqa=False)
+
+    parser.add_argument(
+        "--merged",
+        action="store_true",
+        help="Use merged model (i.e. decoder_merged_model.onnx).",
+    )
+    parser.set_defaults(merged=False)
+
     parser.add_argument(
         "-fp",
         "--precision",
         required=True,
-        choices=["int8", "fp16", "fp32"],
+        choices=["int4", "int8", "fp16", "fp32"],
         help="Precision of model",
     )
 
+    parser.add_argument(
+        "--cache_dir",
+        required=False,
+        type=str,
+        default="./model_cache",
+        help="model cache dir to override default HF cache dir to avoid overflood the /home dir",
+    )
+
     args = parser.parse_args() if argv == [] else parser.parse_args(argv)
+
+    # Use FP32 precision for FP32, INT8, INT4 CPU models, use FP16 precision for FP16 and INT4 GPU models
+    args.precision = (
+        "fp32"
+        if args.precision in {"int8", "fp32"} or (args.precision == "int4" and args.execution_provider == "cpu")
+        else "fp16"
+    )
     return args
 
 
@@ -112,21 +236,39 @@ def main(argv: List[str] = []):  # noqa: B006
     args = get_args(argv)
     setup_logger(args.verbose)
     logger.info(f"Arguments: {args}")
+    rank = get_rank()
 
     # Load model and config
+    setattr(args, "use_fp16", args.precision == "fp16")  # noqa: B010
+    args.rank = rank
+    setattr(args, "device_name", "cpu" if args.execution_provider == "cpu" else f"cuda:{rank}")  # noqa: B010
+    setattr(args, "device", torch.device(args.device_name))  # noqa: B010
     use_auth_token = args.torch_model_directory == os.path.join(".")
     location = args.model_name if use_auth_token else args.torch_model_directory
 
-    config = LlamaConfig.from_pretrained(location, use_auth_token=use_auth_token)
-    llama = LlamaForCausalLM.from_pretrained(
+    config, llama = setup_torch_model(
+        args,
         location,
-        torch_dtype=(torch.float16 if args.precision == "fp16" else torch.float32),
-        use_auth_token=use_auth_token,
-        use_cache=True,
+        use_auth_token,
+        torch_dtype=(torch.float16 if args.use_fp16 else torch.float32),
+        device=args.device,
     )
 
-    verify_parity(args, config, llama)
+    kv_cache_ortvalues = {}
+    if not args.merged:
+        verify_parity(args, config, llama, kv_cache_ortvalues)
+    else:
+        # Verify prompt generation in merged model (decoder_model.onnx)
+        args.use_past_kv = False
+        kv_cache_ortvalues = verify_parity(args, config, llama, kv_cache_ortvalues)
+
+        # Verify token generation in merged model (decoder_with_past_model.onnx)
+        args.use_past_kv = True
+        verify_parity(args, config, llama, kv_cache_ortvalues)
 
 
 if __name__ == "__main__":
+    seed = 2
+    np.random.seed(seed)
+    torch.manual_seed(seed)
     main()
diff --git a/onnxruntime/python/tools/transformers/models/llama/llama_torch.py b/onnxruntime/python/tools/transformers/models/llama/llama_torch.py
new file mode 100644
index 0000000000000..94e0397116d1c
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/models/llama/llama_torch.py
@@ -0,0 +1,38 @@
+import logging
+import os
+
+import torch
+from dist_settings import barrier, get_rank, get_size
+from transformers import AutoConfig, AutoModelForCausalLM
+
+logger = logging.getLogger("")
+
+
+def setup_torch_model(args, location, use_auth_token, torch_dtype=torch.float32, device=None):
+    world_size = get_size()
+    logger.info(f"world_size: {world_size}")
+    rank = get_rank()
+    barrier()
+
+    if not os.path.exists(args.cache_dir):
+        os.makedirs(args.cache_dir, exist_ok=True)
+
+    for i in range(world_size):
+        if i == rank % (world_size):
+            l_config = AutoConfig.from_pretrained(location, use_auth_token=use_auth_token, cache_dir=args.cache_dir)
+            l_config.use_cache = True
+            llama = AutoModelForCausalLM.from_pretrained(
+                location,
+                use_auth_token=use_auth_token,
+                config=l_config,
+                torch_dtype=torch_dtype,
+                cache_dir=args.cache_dir,
+            )
+            if world_size > 1:
+                llama.parallel_model()
+            if device:
+                llama.to(device)
+            llama.eval()
+            llama.requires_grad_(False)
+        barrier()
+    return l_config, llama
diff --git a/onnxruntime/python/tools/transformers/models/llama/requirements-70b-model.txt b/onnxruntime/python/tools/transformers/models/llama/requirements-70b-model.txt
new file mode 100644
index 0000000000000..572cfdb71be4a
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/models/llama/requirements-70b-model.txt
@@ -0,0 +1,4 @@
+-r requirements.txt
+git+https://github.com/frankdongms/transformers.git@frdong/shard_llama
+mpi4py
+psutil
\ No newline at end of file
diff --git a/onnxruntime/python/tools/transformers/models/llama/requirements-cpu.txt b/onnxruntime/python/tools/transformers/models/llama/requirements-cpu.txt
index e9ad937cf14e7..3d707fa13e3c8 100644
--- a/onnxruntime/python/tools/transformers/models/llama/requirements-cpu.txt
+++ b/onnxruntime/python/tools/transformers/models/llama/requirements-cpu.txt
@@ -1,3 +1,2 @@
 -r requirements.txt
-torch>=2.0.1
-onnxruntime>=1.16.0
\ No newline at end of file
+onnxruntime>=1.16.2
\ No newline at end of file
diff --git a/onnxruntime/python/tools/transformers/models/llama/requirements-cuda.txt b/onnxruntime/python/tools/transformers/models/llama/requirements-cuda.txt
index 5544abcaa1228..b634bcc50f6e4 100644
--- a/onnxruntime/python/tools/transformers/models/llama/requirements-cuda.txt
+++ b/onnxruntime/python/tools/transformers/models/llama/requirements-cuda.txt
@@ -1,4 +1,4 @@
 -r requirements.txt
-# Please manually install torch>=2.0.1 with CUDA enabled for the CUDA version installed in your system.
+# Please manually install torch>=2.2.0.dev20230920 with CUDA enabled for the CUDA version installed in your system.
 # Instructions can be found here: https://pytorch.org/get-started/locally/
-onnxruntime-gpu>=1.16.0
\ No newline at end of file
+onnxruntime-gpu>=1.16.2
\ No newline at end of file
diff --git a/onnxruntime/python/tools/transformers/models/llama/requirements.txt b/onnxruntime/python/tools/transformers/models/llama/requirements.txt
index f843ef4dc5568..4210f36982aef 100644
--- a/onnxruntime/python/tools/transformers/models/llama/requirements.txt
+++ b/onnxruntime/python/tools/transformers/models/llama/requirements.txt
@@ -1,5 +1,6 @@
-git+https://github.com/kunal-vaishnavi/optimum.git@kvaishnavi/llama-add-position-ids
-transformers>=4.28.1
+git+https://github.com/huggingface/optimum.git
+transformers>=4.33.2
+torch>=2.2.0.dev20230920
 onnx>=1.14.0
 datasets>=2.8.0
 protobuf==3.20.2
\ No newline at end of file
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md b/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md
index 7ffefdd05f215..3d00c9cd6bf59 100644
--- a/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md
@@ -1,108 +1,165 @@
 # Stable Diffusion GPU Optimization
 
-## Overview
-
-[Stable Diffusion](https://stability.ai/blog/stable-diffusion-announcement) is a text-to-image latent diffusion model for image generation. Explanation of the Stable Diffusion can be found in [Stable Diffusion with Diffusers](https://huggingface.co/blog/stable_diffusion).
-
-## Optimizations for Stable Diffusion
-
 ONNX Runtime uses the following optimizations to speed up Stable Diffusion in CUDA:
 
 * [Flash Attention](https://arxiv.org/abs/2205.14135) for float16 precision. Flash Attention uses tiling to reduce number of GPU memory reads/writes, and improves performance with less memory for long sequence length. The kernel requires GPUs of Compute Capability >= 7.5 (like T4, A100, and RTX 2060~4090).
 * [Memory Efficient Attention](https://arxiv.org/abs/2112.05682v2) for float32 precision or older GPUs (like V100). We used the fused multi-head attention kernel in CUTLASS, and the kernel was contributed by xFormers.
 * Channel-last (NHWC) convolution. For NVidia GPU with Tensor Cores support, NHWC tensor layout is recommended for convolution. See [Tensor Layouts In Memory: NCHW vs NHWC](https://docs.nvidia.com/deeplearning/performance/dl-performance-convolutional/index.html#tensor-layout).
-* GroupNorm kernel for NHWC tensor layout.
+* GroupNorm for NHWC tensor layout, and SkipGroupNorm fusion which fuses GroupNorm with Add bias and residual inputs
 * SkipLayerNormalization which fuses LayerNormalization with Add bias and residual inputs.
 * BiasSplitGelu is a fusion of Add bias with SplitGelu activation.
 * BiasAdd fuses Add bias and residual.
 * Reduce Transpose nodes by graph transformation.
 
-These optimizations are firstly carried out on CUDA EP. They may not work on other EP. To show the impact of each optimization on latency and GPU memory, we did some experiments:
+These optimizations are firstly carried out on CUDA EP. They may not work on other EP.
 
-### Results on RTX 3060 GPU:
+## Scripts:
 
-| Optimizations                                                                      | Average Latency (batch_size=1) | Memory in MB (batch_size=1) | Average Latency (batch_size=8) | Memory in MB (batch_size=8) |
-| ---------------------------------------------------------------------------------- | ------------------------------ | --------------------------- | ------------------------------ | --------------------------- |
-| Raw FP32 models                                                                    | 25.6                           | 10,667                      | OOM                            | OOM                         |
-| FP16 baseline                                                                      | 10.2                           | 10,709                      | OOM                            | OOM                         |
-| FP16 baseline + FMHA                                                               | 6.1                            | 7,719                       | 39.1                           | 10,821                      |
-| FP16 baseline + FMHA + NhwcConv                                                    | 5.5                            | 7,656                       | 38.8                           | 11,615                      |
-| FP16 baseline + FMHA + NhwcConv + GroupNorm                                        | 5.1                            | 6,673                       | 35.8                           | 10,763                      |
-| FP16 baseline + FMHA + NhwcConv + GroupNorm + BiasSplitGelu                        | 4.9                            | 4,447                       | 33.7                           | 6,669                       |
-| FP16 baseline + FMHA + NhwcConv + GroupNorm + BiasSplitGelu + Packed QKV           | 4.8                            | 4,625                       | 33.5                           | 6,663                       |
-| FP16 baseline + FMHA + NhwcConv + GroupNorm + BiasSplitGelu + Packed QKV + BiasAdd | 4.7                            | 4,480                       | 33.3                           | 6,499                       |
+| Script                                         | Description                                                                               |
+| ---------------------------------------------- | ----------------------------------------------------------------------------------------- |
+| [demo_txt2img_xl.py](./demo_txt2img_xl.py)     | Demo of text to image generation using Stable Diffusion XL model.                         |
+| [demo_txt2img.py](./demo_txt2img.py)           | Demo of text to image generation using Stable Diffusion models except XL.                 |
+| [optimize_pipeline.py](./optimize_pipeline.py) | Optimize Stable Diffusion ONNX models exported from Huggingface diffusers or optimum      |
+| [benchmark.py](./benchmark.py)                 | Benchmark latency and memory of OnnxRuntime, xFormers or PyTorch 2.0 on stable diffusion. |
 
-FP16 baseline contains optimizations available in ONNX Runtime 1.13 including LayerNormalization, SkipLayerNormalization, Gelu and float16 conversion.
 
-Here FMHA means Attention and MultiHeadAttention operators with Flash Attention and Memory Efficient Attention kernels but inputs are not packed. Packed QKV means the inputs are packed.
+## Run demo with docker
 
-The last two optimizations (Packed QKV and BiasAdd) are only available in nightly package. Compared to 1.14.1, nightly package has slight improvement in performance.
+#### Clone the onnxruntime repository
+```
+git clone https://github.com/microsoft/onnxruntime
+cd onnxruntime
+```
 
-### Results on MI250X with 1 GCD
+#### Launch NVIDIA pytorch container
 
-With runtime tuning enabled, we get following performance number on one GCD of a MI250X GPU:
+Install nvidia-docker using [these instructions](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker).
 
-| Optimizations                                                         | Average Latency (batch_size=1) | Memory in MB (batch_size=1) | Average Latency (batch_size=8) | Memory in MB (batch_size=8) |
-| --------------------------------------------------------------------- | ------------------------------ | --------------------------- | ------------------------------ | --------------------------- |
-| Raw FP32 models                                                       | 6.7                            | 17,319                      | 36.4 *                         | 33,787                      |
-| FP16 baseline                                                         | 4.1                            | 8,945                       | 24.0 *                         | 34,493                      |
-| FP16 baseline + FMHA                                                  | 2.6                            | 4,886                       | 15.0                           | 10,146                      |
-| FP16 baseline + FMHA + NhwcConv                                       | 2.4                            | 4,952                       | 14.8                           | 9,632                       |
-| FP16 baseline + FMHA + NhwcConv + GroupNorm                           | 2.3                            | 4,906                       | 13.6                           | 9,774                       |
-| FP16 baseline + FMHA + NhwcConv + GroupNorm + BiasSplitGelu           | 2.2                            | 4,910                       | 12.5                           | 9,646                       |
-| FP16 baseline + FMHA + NhwcConv + GroupNorm + BiasSplitGelu + BiasAdd | 2.2                            | 4,910                       | 12.5                           | 9,778                       |
+```
+docker run --rm -it --gpus all -v $PWD:/workspace nvcr.io/nvidia/pytorch:23.10-py3 /bin/bash
+```
 
-The entries marked with `*` produce suspicious output images. The might be numerical stability or correctness issue for the pipeline. The performance number is for reference only.
+#### Build onnxruntime from source
+After launching the docker, you can build and install onnxruntime-gpu wheel like the following.
+```
+export CUDACXX=/usr/local/cuda-12.2/bin/nvcc
+git config --global --add safe.directory '*'
+sh build.sh --config Release  --build_shared_lib --parallel --use_cuda --cuda_version 12.2 \
+            --cuda_home /usr/local/cuda-12.2 --cudnn_home /usr/lib/x86_64-linux-gnu/ --build_wheel --skip_tests \
+            --use_tensorrt --tensorrt_home /usr/src/tensorrt \
+            --cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=OFF \
+            --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=80 \
+            --allow_running_as_root
+python3 -m pip install --upgrade pip
+python3 -m pip install build/Linux/Release/dist/onnxruntime_gpu-1.17.0-cp310-cp310-linux_x86_64.whl --force-reinstall
+```
 
-## Scripts:
+If the GPU is not A100, change `CMAKE_CUDA_ARCHITECTURES=80` in the command line according to the GPU compute capacity.
 
-| Script                                         | Description                                                                               |
-| ---------------------------------------------- | ----------------------------------------------------------------------------------------- |
-| [optimize_pipeline.py](./optimize_pipeline.py) | Optimize Stable Diffusion ONNX models                                                     |
-| [benchmark.py](./benchmark.py)                 | Benchmark latency and memory of OnnxRuntime, xFormers or PyTorch 2.0 on stable diffusion. |
+#### Install required packages
+```
+cd /workspace/onnxruntime/python/tools/transformers/models/stable_diffusion
+python3 -m pip install -r requirements-cuda12.txt
+python3 -m pip install --upgrade polygraphy onnx-graphsurgeon --extra-index-url https://pypi.ngc.nvidia.com
+```
 
-In below example, we run the scripts in source code directory. You can get source code like the following:
+### Run Demo
 
+You can review the usage of supported pipelines like the following:
 ```
-git clone https://github.com/microsoft/onnxruntime
-cd onnxruntime/onnxruntime/python/tools/transformers/models/stable_diffusion
+python3 demo_txt2img.py --help
+python3 demo_txt2img_xl.py --help
 ```
 
-## Example of Stable Diffusion 1.5
+For example:
+`--engine {ORT_CUDA,ORT_TRT,TRT}` can be used to choose different backend engines including CUDA or TensorRT execution provider of ONNX Runtime, or TensorRT.
+`--work-dir WORK_DIR` can be used to load or save models under the given directory. You can download the [optimized ONNX models of Stable Diffusion XL 1.0](https://huggingface.co/tlwu/stable-diffusion-xl-1.0-onnxruntime#usage-example) to save time in running the XL demo.
 
-Below is an example to optimize Stable Diffusion 1.5 in Linux. For Windows OS, please change the format of path to be like `.\sd` instead of `./sd`.
+#### Generate an image guided by a text prompt
+```python3 demo_txt2img.py "astronaut riding a horse on mars"```
 
-### Setup Environment (CUDA)
+#### Generate an image with Stable Diffusion XL guided by a text prompt
+```python3 demo_txt2img_xl.py "starry night over Golden Gate Bridge by van gogh"```
 
-It is recommended to create a Conda environment with Python 3.8, 3.9 or 3.10, and run the model with [CUDA 11.7](https://developer.nvidia.com/cuda-11-7-0-download-archive) or 11.8.
+If you do not provide prompt, the script will generate different image sizes for a list of prompts for demonstration.
+
+### Generate an image guided by a text prompt using LCM LoRA
 ```
-conda create -n py38 python=3.8
-conda activate py38
-pip install torch==1.13.1+cu117 --extra-index-url https://download.pytorch.org/whl/cu117
-pip install -r requirements-cuda.txt
+python3 demo_txt2img_xl.py "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k" --scheduler LCM --lora-weights latent-consistency/lcm-lora-sdxl --denoising-steps 4
+```
+#### Generate an image with SDXL LCM model guided by a text prompt
+```
+python3 demo_txt2img_xl.py --lcm --disable-refiner "an astronaut riding a rainbow unicorn, cinematic, dramatic"
 ```
 
-ONNX Runtime requires CUDA and [cuDNN](https://developer.nvidia.com/rdp/cudnn-download) for GPU inference. CUDA 11.7 and cuDNN 8.5 are used in our tests.
+#### Generate an image with a text prompt using a control net
+```
+python3 demo_txt2img.py "Stormtrooper's lecture in beautiful lecture hall" --controlnet-type depth --controlnet-scale 1.0
+
+python3 demo_txt2img_xl.py "young Mona Lisa" --controlnet-type canny --controlnet-scale 0.5 --scheduler UniPC --disable-refiner
+```
 
-#### Install Nightly (Optional)
+## Optimize Stable Diffusion ONNX models for Hugging Face Diffusers or Optimum
 
-Skip this step if you use onnxruntime-gpu package from official releases.
+If you are able to run the above demo with docker, you can use the docker and skip the following setup and fast forward to [Export ONNX pipeline](#export-onnx-pipeline).
 
-To try latest optimizations, you can install [ort-nightly-gpu](https://aiinfra.visualstudio.com/PublicPackages/_artifacts/feed/ORT-Nightly/PyPI/ort-nightly-gpu/) package like the following:
+Below setup does not use docker. We'll use the environment to optimize ONNX models of Stable Diffusion exported by huggingface diffusers or optimum.
+For Windows OS, please change the format of path to be like `.\sd` instead of `./sd`.
 
+It is recommended to create a Conda environment with Python 3.10 for the following setup:
 ```
-pip uninstall onnxruntime-gpu
-pip install ort-nightly-gpu -i https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple/
+conda create -n py310 python=3.10
+conda activate py310
 ```
 
+### Setup Environment (CUDA) without docker
+
+First, we need install CUDA 11.8 or 12.1, [cuDNN](https://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html) 8.5 or above, and [TensorRT 8.6.1](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html) in the machine.
+
+#### CUDA 11.8:
+
+In the Conda environment, install PyTorch 2.1 or above, and other required packages like the following:
+```
+pip install torch --index-url https://download.pytorch.org/whl/cu118
+pip install --upgrade polygraphy onnx-graphsurgeon --extra-index-url https://pypi.ngc.nvidia.com
+pip install -r requirements-cuda11.txt
+```
+
+For Windows, install nvtx like the following:
+```
+conda install -c conda-forge nvtx
+```
+
+We cannot directly `pip install tensorrt` for CUDA 11. Follow https://github.com/NVIDIA/TensorRT/issues/2773 to install TensorRT for CUDA 11 in Linux.
+
+For Windows, pip install the tensorrt wheel in the downloaded TensorRT zip file instead. Like `pip install tensorrt-8.6.1.6.windows10.x86_64.cuda-11.8\tensorrt-8.6.1.6\python\tensorrt-8.6.1-cp310-none-win_amd64.whl`.
+
+#### CUDA 12.*:
+The official package of onnxruntime-gpu 1.16.* is built for CUDA 11.8. To use CUDA 12.*, you will need [build onnxruntime from source](https://onnxruntime.ai/docs/build/inferencing.html).
+
+```
+git clone --recursive https://github.com/Microsoft/onnxruntime.git
+cd onnxruntime
+pip install cmake
+pip install -r requirements-dev.txt
+```
+Follow [example script for A100 in Ubuntu](https://github.com/microsoft/onnxruntime/blob/26a7b63716e3125bfe35fe3663ba10d2d7322628/build_release.sh)
+or [example script for RTX 4090 in Windows](https://github.com/microsoft/onnxruntime/blob/8df5f4e0df1f3b9ceeb0f1f2561b09727ace9b37/build_trt.cmd) to build and install onnxruntime-gpu wheel.
+
+Then install other python packages like the following:
+```
+pip install torch --index-url https://download.pytorch.org/whl/cu121
+pip install --upgrade polygraphy onnx-graphsurgeon --extra-index-url https://pypi.ngc.nvidia.com
+pip install -r requirements-cuda12.txt
+```
+Finally, `pip install tensorrt` for Linux. For Windows, pip install the tensorrt wheel in the downloaded TensorRT zip file instead.
+
 ### Setup Environment (ROCm)
 
-It is recommended that the users run the model with ROCm 5.4 or newer and Python 3.8, 3.9 or 3.10.
+It is recommended that the users run the model with ROCm 5.4 or newer and Python 3.10.
 Note that Windows is not supported for ROCm at the moment.
 
 ```
-conda create -n py38 python=3.8
-conda activate py38
 wget https://repo.radeon.com/rocm/manylinux/rocm-rel-5.4/torch-1.12.1%2Brocm5.4-cp38-cp38-linux_x86_64.whl
 pip install torch-1.12.1+rocm5.4-cp38-cp38-linux_x86_64.whl
 pip install -r requirements-rocm.txt
@@ -167,7 +224,13 @@ Example to optimize the exported float32 ONNX models, and save to float16 models
 python -m onnxruntime.transformers.models.stable_diffusion.optimize_pipeline -i ./sd_v1_5/fp32 -o ./sd_v1_5/fp16 --float16
 ```
 
-For SDXL model, it is recommended to use a machine with 32 GB or more memory to optimize.
+In all examples below, we run the scripts in source code directory. You can get source code like the following:
+```
+git clone https://github.com/microsoft/onnxruntime
+cd onnxruntime/onnxruntime/python/tools/transformers/models/stable_diffusion
+```
+
+For SDXL model, it is recommended to use a machine with 48 GB or more memory to optimize.
 ```
 python optimize_pipeline.py -i ./sd_xl_base_onnx -o ./sd_xl_base_fp16 --float16
 ```
@@ -233,20 +296,61 @@ Sometime, it complains ptxas not found when there are multiple CUDA versions ins
 Note that torch.compile is not supported in Windows: we encountered error `Windows not yet supported for torch.compile`. So it is excluded from RTX 3060 results of Windows.
 
 
-### Run Benchmark with TensorRT and TensorRT execution provider
+### Run Benchmark with TensorRT or TensorRT execution provider
 
 For TensorRT installation, follow https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html.
 
 ```
 pip install torch==1.13.1+cu117 --extra-index-url https://download.pytorch.org/whl/cu117
-pip install --upgrade polygraphy>=0.47.0 onnx-graphsurgeon --extra-index-url https://pypi.ngc.nvidia.com
+pip install --upgrade polygraphy onnx-graphsurgeon --extra-index-url https://pypi.ngc.nvidia.com
 pip install -r requirements-tensorrt.txt
 export CUDA_MODULE_LOADING=LAZY
 python benchmark.py -e tensorrt -b 1 -v 1.5
 python benchmark.py -e onnxruntime -r tensorrt -b 1 -v 1.5
 python benchmark.py -e onnxruntime -r tensorrt -b 1 -v 1.5 --enable_cuda_graph
+
+python benchmark.py -e tensorrt --height 1024 --width 1024 -s 30  -b 1 -v xl-1.0 --enable_cuda_graph
+python benchmark.py -e onnxruntime -r tensorrt --height 1024 --width 1024 -s 30  -b 1 -v xl-1.0 --enable_cuda_graph
 ```
 
+### Results on RTX 3060 GPU:
+
+To show the impact of each optimization on latency and GPU memory, we did some experiments:
+
+| Optimizations                                                                      | Average Latency (batch_size=1) | Memory in MB (batch_size=1) | Average Latency (batch_size=8) | Memory in MB (batch_size=8) |
+| ---------------------------------------------------------------------------------- | ------------------------------ | --------------------------- | ------------------------------ | --------------------------- |
+| Raw FP32 models                                                                    | 25.6                           | 10,667                      | OOM                            | OOM                         |
+| FP16 baseline                                                                      | 10.2                           | 10,709                      | OOM                            | OOM                         |
+| FP16 baseline + FMHA                                                               | 6.1                            | 7,719                       | 39.1                           | 10,821                      |
+| FP16 baseline + FMHA + NhwcConv                                                    | 5.5                            | 7,656                       | 38.8                           | 11,615                      |
+| FP16 baseline + FMHA + NhwcConv + GroupNorm                                        | 5.1                            | 6,673                       | 35.8                           | 10,763                      |
+| FP16 baseline + FMHA + NhwcConv + GroupNorm + BiasSplitGelu                        | 4.9                            | 4,447                       | 33.7                           | 6,669                       |
+| FP16 baseline + FMHA + NhwcConv + GroupNorm + BiasSplitGelu + Packed QKV           | 4.8                            | 4,625                       | 33.5                           | 6,663                       |
+| FP16 baseline + FMHA + NhwcConv + GroupNorm + BiasSplitGelu + Packed QKV + BiasAdd | 4.7                            | 4,480                       | 33.3                           | 6,499                       |
+
+FP16 baseline contains optimizations available in ONNX Runtime 1.13 including LayerNormalization, SkipLayerNormalization, Gelu and float16 conversion.
+
+Here FMHA means Attention and MultiHeadAttention operators with Flash Attention and Memory Efficient Attention kernels but inputs are not packed. Packed QKV means the inputs are packed.
+
+The last two optimizations (Packed QKV and BiasAdd) are only available in nightly package. Compared to 1.14.1, nightly package has slight improvement in performance.
+
+### Results on MI250X with 1 GCD
+
+With runtime tuning enabled, we get following performance number on one GCD of a MI250X GPU:
+
+| Optimizations                                                         | Average Latency (batch_size=1) | Memory in MB (batch_size=1) | Average Latency (batch_size=8) | Memory in MB (batch_size=8) |
+| --------------------------------------------------------------------- | ------------------------------ | --------------------------- | ------------------------------ | --------------------------- |
+| Raw FP32 models                                                       | 6.7                            | 17,319                      | 36.4 *                         | 33,787                      |
+| FP16 baseline                                                         | 4.1                            | 8,945                       | 24.0 *                         | 34,493                      |
+| FP16 baseline + FMHA                                                  | 2.6                            | 4,886                       | 15.0                           | 10,146                      |
+| FP16 baseline + FMHA + NhwcConv                                       | 2.4                            | 4,952                       | 14.8                           | 9,632                       |
+| FP16 baseline + FMHA + NhwcConv + GroupNorm                           | 2.3                            | 4,906                       | 13.6                           | 9,774                       |
+| FP16 baseline + FMHA + NhwcConv + GroupNorm + BiasSplitGelu           | 2.2                            | 4,910                       | 12.5                           | 9,646                       |
+| FP16 baseline + FMHA + NhwcConv + GroupNorm + BiasSplitGelu + BiasAdd | 2.2                            | 4,910                       | 12.5                           | 9,778                       |
+
+The entries marked with `*` produce suspicious output images. The might be numerical stability or correctness issue for the pipeline. The performance number is for reference only.
+
+
 ### Example Benchmark output
 
 Common settings for below test results:
@@ -382,7 +486,8 @@ Results are from Standard_NC4as_T4_v3 Azure virtual machine:
 
 ### Credits
 
-Some CUDA kernels (Flash Attention, GroupNorm, SplitGelu and BiasAdd etc.) were originally implemented in [TensorRT](https://github.com/nviDIA/TensorRT) by Nvidia.
+Some CUDA kernels (TensorRT Fused Attention, GroupNorm, SplitGelu and BiasAdd etc.) and demo diffusion were originally implemented in [TensorRT](https://github.com/nviDIA/TensorRT) by Nvidia.
+We use [Flash Attention v2](https://github.com/Dao-AILab/flash-attention) in Linux.
 We use Memory efficient attention from [CUTLASS](https://github.com/NVIDIA/cutlass). The kernels were developed by Meta xFormers.
 The ONNX export script and pipeline for stable diffusion was developed by Huggingface [diffusers](https://github.com/huggingface/diffusers) library.
 
@@ -390,10 +495,8 @@ Most ROCm kernel optimizations are from [composable kernel](https://github.com/R
 Some kernels are enabled by MIOpen. We hereby thank for the AMD developers' collaboration.
 
 ### Future Works
-
-There are other optimizations might improve the performance or reduce memory footprint:
-* Export the whole pipeline into a single ONNX model. Currently, there are multiple ONNX models (CLIP, VAE and U-Net etc). Each model uses separated thread pool and memory allocator. Combine them into one model could share thread pool and memory allocator. The end result is more efficient and less memory footprint.
-* For Stable Diffusion 2.1, we disable TensorRT flash attention kernel and use only memory efficient attention. It is possible to add flash attention in Windows to improve performance.
-* Reduce GPU memory footprint by actively deleting buffers for intermediate results.
-* Safety Checker Optimization
-* Leverage FP8 in latest GPU
+* Update demo to support inpainting.
+* Support flash attention in Windows.
+* Integration with UI.
+* Optimization for H100 GPU.
+* Export the whole pipeline into a single ONNX model. This senario is mainly for mobile device.
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/benchmark.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/benchmark.py
index 13126f648d290..1f1db914e274b 100755
--- a/onnxruntime/python/tools/transformers/models/stable_diffusion/benchmark.py
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/benchmark.py
@@ -10,15 +10,18 @@
 import sys
 import time
 
+import __init__  # noqa: F401. Walk-around to run this script directly
 import coloredlogs
 
 # import torch before onnxruntime so that onnxruntime uses the cuDNN in the torch package.
 import torch
+from benchmark_helper import measure_memory
 
 SD_MODELS = {
     "1.5": "runwayml/stable-diffusion-v1-5",
     "2.0": "stabilityai/stable-diffusion-2",
     "2.1": "stabilityai/stable-diffusion-2-1",
+    "xl-1.0": "stabilityai/stable-diffusion-xl-refiner-1.0",
 }
 
 PROVIDERS = {
@@ -43,139 +46,13 @@ def example_prompts():
         "delicate elvish moonstone necklace on a velvet background, symmetrical intricate motifs, leaves, flowers, 8k",
     ]
 
-    return prompts
+    negative_prompt = "bad composition, ugly, abnormal, malformed"
 
-
-class CudaMemoryMonitor:
-    def __init__(self, keep_measuring=True):
-        self.keep_measuring = keep_measuring
-
-    def measure_gpu_usage(self):
-        from py3nvml.py3nvml import (
-            NVMLError,
-            nvmlDeviceGetCount,
-            nvmlDeviceGetHandleByIndex,
-            nvmlDeviceGetMemoryInfo,
-            nvmlDeviceGetName,
-            nvmlInit,
-            nvmlShutdown,
-        )
-
-        max_gpu_usage = []
-        gpu_name = []
-        try:
-            nvmlInit()
-            device_count = nvmlDeviceGetCount()
-            if not isinstance(device_count, int):
-                print(f"nvmlDeviceGetCount result is not integer: {device_count}")
-                return None
-
-            max_gpu_usage = [0 for i in range(device_count)]
-            gpu_name = [nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(i)) for i in range(device_count)]
-            while True:
-                for i in range(device_count):
-                    info = nvmlDeviceGetMemoryInfo(nvmlDeviceGetHandleByIndex(i))
-                    if isinstance(info, str):
-                        print(f"nvmlDeviceGetMemoryInfo returns str: {info}")
-                        return None
-                    max_gpu_usage[i] = max(max_gpu_usage[i], info.used / 1024**2)
-                time.sleep(0.002)  # 2ms
-                if not self.keep_measuring:
-                    break
-            nvmlShutdown()
-            return [
-                {
-                    "device_id": i,
-                    "name": gpu_name[i],
-                    "max_used_MB": max_gpu_usage[i],
-                }
-                for i in range(device_count)
-            ]
-        except NVMLError as error:
-            print("Error fetching GPU information using nvml: %s", error)
-            return None
-
-
-class RocmMemoryMonitor:
-    def __init__(self, keep_measuring=True):
-        self.keep_measuring = keep_measuring
-        rocm_smi_path = "/opt/rocm/libexec/rocm_smi"
-        if os.path.exists(rocm_smi_path):
-            if rocm_smi_path not in sys.path:
-                sys.path.append(rocm_smi_path)
-        try:
-            import rocm_smi
-
-            self.rocm_smi = rocm_smi
-            self.rocm_smi.initializeRsmi()
-        except ImportError:
-            self.rocm_smi = None
-
-    def get_used_memory(self, dev):
-        if self.rocm_smi is None:
-            return -1
-        return self.rocm_smi.getMemInfo(dev, "VRAM")[0] / 1024 / 1024
-
-    def measure_gpu_usage(self):
-        device_count = len(self.rocm_smi.listDevices()) if self.rocm_smi is not None else 0
-        max_gpu_usage = [0 for i in range(device_count)]
-        gpu_name = [f"GPU{i}" for i in range(device_count)]
-        while True:
-            for i in range(device_count):
-                max_gpu_usage[i] = max(max_gpu_usage[i], self.get_used_memory(i))
-            time.sleep(0.002)  # 2ms
-            if not self.keep_measuring:
-                break
-        return [
-            {
-                "device_id": i,
-                "name": gpu_name[i],
-                "max_used_MB": max_gpu_usage[i],
-            }
-            for i in range(device_count)
-        ]
+    return prompts, negative_prompt
 
 
 def measure_gpu_memory(monitor_type, func, start_memory=None):
-    if monitor_type is None:
-        return None
-
-    monitor = monitor_type(False)
-    memory_before_test = monitor.measure_gpu_usage()
-
-    if start_memory is None:
-        start_memory = memory_before_test
-    if start_memory is None:
-        return None
-    if func is None:
-        return start_memory
-
-    from concurrent.futures import ThreadPoolExecutor
-
-    with ThreadPoolExecutor() as executor:
-        monitor = monitor_type()
-        mem_thread = executor.submit(monitor.measure_gpu_usage)
-        try:
-            fn_thread = executor.submit(func)
-            _ = fn_thread.result()
-        finally:
-            monitor.keep_measuring = False
-            max_usage = mem_thread.result()
-
-        if max_usage is None:
-            return None
-
-        print(f"GPU memory usage: before={memory_before_test}  peak={max_usage}")
-        if len(start_memory) >= 1 and len(max_usage) >= 1 and len(start_memory) == len(max_usage):
-            # When there are multiple GPUs, we will check the one with maximum usage.
-            max_used = 0
-            for i, memory_before in enumerate(start_memory):
-                before = memory_before["max_used_MB"]
-                after = max_usage[i]["max_used_MB"]
-                used = after - before
-                max_used = max(max_used, used)
-            return max_used
-    return None
+    return measure_memory(is_gpu=True, func=func, monitor_type=monitor_type, start_memory=start_memory)
 
 
 def get_ort_pipeline(model_name: str, directory: str, provider, disable_safety_checker: bool):
@@ -256,7 +133,7 @@ def run_ort_pipeline(
 
     assert isinstance(pipe, OnnxStableDiffusionPipeline)
 
-    prompts = example_prompts()
+    prompts, negative_prompt = example_prompts()
 
     def warmup():
         pipe("warm up", height, width, num_inference_steps=steps, num_images_per_prompt=batch_size)
@@ -275,13 +152,12 @@ def warmup():
         for j in range(batch_count):
             inference_start = time.time()
             images = pipe(
-                prompt,
+                [prompt] * batch_size,
                 height,
                 width,
                 num_inference_steps=steps,
-                negative_prompt=None,
+                negative_prompt=[negative_prompt] * batch_size,
                 guidance_scale=7.5,
-                num_images_per_prompt=batch_size,
             ).images
             inference_end = time.time()
             latency = inference_end - inference_start
@@ -320,7 +196,7 @@ def run_torch_pipeline(
     start_memory,
     memory_monitor_type,
 ):
-    prompts = example_prompts()
+    prompts, negative_prompt = example_prompts()
 
     # total 2 runs of warm up, and measure GPU memory for CUDA EP
     def warmup():
@@ -342,13 +218,12 @@ def warmup():
         for j in range(batch_count):
             inference_start = time.time()
             images = pipe(
-                prompt=prompt,
+                prompt=[prompt] * batch_size,
                 height=height,
                 width=width,
                 num_inference_steps=steps,
                 guidance_scale=7.5,
-                negative_prompt=None,
-                num_images_per_prompt=batch_size,
+                negative_prompt=[negative_prompt] * batch_size,
                 generator=None,  # torch.Generator
             ).images
 
@@ -427,7 +302,7 @@ def run_ort(
 
 
 def export_and_run_ort(
-    model_name: str,
+    version: str,
     provider: str,
     batch_size: int,
     disable_safety_checker: bool,
@@ -443,15 +318,19 @@ def export_and_run_ort(
     assert provider == "CUDAExecutionProvider"
 
     from diffusers import DDIMScheduler
+    from diffusion_models import PipelineInfo
     from onnxruntime_cuda_txt2img import OnnxruntimeCudaStableDiffusionPipeline
 
-    scheduler = DDIMScheduler.from_pretrained(model_name, subfolder="scheduler")
+    pipeline_info = PipelineInfo(version)
+    model_name = pipeline_info.name()
 
+    scheduler = DDIMScheduler.from_pretrained(model_name, subfolder="scheduler")
     pipe = OnnxruntimeCudaStableDiffusionPipeline.from_pretrained(
         model_name,
         scheduler=scheduler,
         requires_safety_checker=not disable_safety_checker,
         enable_cuda_graph=enable_cuda_graph,
+        pipeline_info=pipeline_info,
     )
 
     # re-use cached folder to save ONNX models
@@ -473,7 +352,7 @@ def warmup():
     image_filename_prefix = get_image_filename_prefix("ort_cuda", model_name, batch_size, disable_safety_checker)
 
     latency_list = []
-    prompts = example_prompts()
+    prompts, negative_prompt = example_prompts()
     for i, prompt in enumerate(prompts):
         if i >= num_prompts:
             break
@@ -481,6 +360,7 @@ def warmup():
             inference_start = time.time()
             images = pipe(
                 [prompt] * batch_size,
+                negative_prompt=[negative_prompt] * batch_size,
                 num_inference_steps=steps,
             ).images
             inference_end = time.time()
@@ -514,7 +394,7 @@ def warmup():
 
 
 def run_ort_trt(
-    model_name: str,
+    version: str,
     batch_size: int,
     disable_safety_checker: bool,
     height: int,
@@ -528,8 +408,12 @@ def run_ort_trt(
     enable_cuda_graph: bool,
 ):
     from diffusers import DDIMScheduler
+    from diffusion_models import PipelineInfo
     from onnxruntime_tensorrt_txt2img import OnnxruntimeTensorRTStableDiffusionPipeline
 
+    pipeline_info = PipelineInfo(version)
+    model_name = pipeline_info.name()
+
     assert batch_size <= max_batch_size
 
     scheduler = DDIMScheduler.from_pretrained(model_name, subfolder="scheduler")
@@ -544,6 +428,7 @@ def run_ort_trt(
         max_batch_size=max_batch_size,
         onnx_opset=17,
         enable_cuda_graph=enable_cuda_graph,
+        pipeline_info=pipeline_info,
     )
 
     # re-use cached folder to save ONNX models and TensorRT Engines
@@ -552,7 +437,7 @@ def run_ort_trt(
     pipe = pipe.to("cuda")
 
     def warmup():
-        pipe(["warm up"] * batch_size, num_inference_steps=steps)
+        pipe(["warm up"] * batch_size, negative_prompt=["negative"] * batch_size, num_inference_steps=steps)
 
     # Run warm up, and measure GPU memory of two runs
     # The first run has algo search so it might need more memory
@@ -564,7 +449,7 @@ def warmup():
     image_filename_prefix = get_image_filename_prefix("ort_trt", model_name, batch_size, disable_safety_checker)
 
     latency_list = []
-    prompts = example_prompts()
+    prompts, negative_prompt = example_prompts()
     for i, prompt in enumerate(prompts):
         if i >= num_prompts:
             break
@@ -572,6 +457,7 @@ def warmup():
             inference_start = time.time()
             images = pipe(
                 [prompt] * batch_size,
+                negative_prompt=[negative_prompt] * batch_size,
                 num_inference_steps=steps,
             ).images
             inference_end = time.time()
@@ -589,7 +475,7 @@ def warmup():
         "model_name": model_name,
         "engine": "onnxruntime",
         "version": ort_version,
-        "provider": f"tensorrt{trt_version})",
+        "provider": f"tensorrt({trt_version})",
         "directory": pipe.engine_dir,
         "height": height,
         "width": width,
@@ -606,7 +492,148 @@ def warmup():
     }
 
 
-def run_tensorrt(
+def run_ort_trt_static(
+    work_dir: str,
+    version: str,
+    batch_size: int,
+    disable_safety_checker: bool,
+    height: int,
+    width: int,
+    steps: int,
+    num_prompts: int,
+    batch_count: int,
+    start_memory,
+    memory_monitor_type,
+    max_batch_size: int,
+    nvtx_profile: bool = False,
+    use_cuda_graph: bool = True,
+):
+    print("[I] Initializing ORT TensorRT EP accelerated StableDiffusionXL txt2img pipeline (static input shape)")
+
+    # Register TensorRT plugins
+    from trt_utilities import init_trt_plugins
+
+    init_trt_plugins()
+
+    assert batch_size <= max_batch_size
+
+    from diffusion_models import PipelineInfo
+
+    pipeline_info = PipelineInfo(version)
+    short_name = pipeline_info.short_name()
+
+    from engine_builder import EngineType, get_engine_paths
+    from pipeline_txt2img import Txt2ImgPipeline
+
+    engine_type = EngineType.ORT_TRT
+    onnx_dir, engine_dir, output_dir, framework_model_dir, _ = get_engine_paths(work_dir, pipeline_info, engine_type)
+
+    # Initialize pipeline
+    pipeline = Txt2ImgPipeline(
+        pipeline_info,
+        scheduler="DDIM",
+        output_dir=output_dir,
+        hf_token=None,
+        verbose=False,
+        nvtx_profile=nvtx_profile,
+        max_batch_size=max_batch_size,
+        use_cuda_graph=use_cuda_graph,
+        framework_model_dir=framework_model_dir,
+        engine_type=engine_type,
+    )
+
+    # Load TensorRT engines and pytorch modules
+    pipeline.backend.build_engines(
+        engine_dir,
+        framework_model_dir,
+        onnx_dir,
+        17,
+        opt_image_height=height,
+        opt_image_width=width,
+        opt_batch_size=batch_size,
+        force_engine_rebuild=False,
+        static_batch=True,
+        static_image_shape=True,
+        max_workspace_size=0,
+        device_id=torch.cuda.current_device(),
+    )
+
+    # Here we use static batch and image size, so the resource allocation only need done once.
+    # For dynamic batch and image size, some cost (like memory allocation) shall be included in latency.
+    pipeline.load_resources(height, width, batch_size)
+
+    def warmup():
+        pipeline.run(
+            ["warm up"] * batch_size, ["negative"] * batch_size, height, width, denoising_steps=steps, warmup=True
+        )
+
+    # Run warm up, and measure GPU memory of two runs
+    # The first run has algo search so it might need more memory
+    first_run_memory = measure_gpu_memory(memory_monitor_type, warmup, start_memory)
+    second_run_memory = measure_gpu_memory(memory_monitor_type, warmup, start_memory)
+
+    warmup()
+
+    image_filename_prefix = get_image_filename_prefix("ort_trt", short_name, batch_size, disable_safety_checker)
+
+    latency_list = []
+    prompts, negative_prompt = example_prompts()
+    for i, prompt in enumerate(prompts):
+        if i >= num_prompts:
+            break
+        for j in range(batch_count):
+            inference_start = time.time()
+            # Use warmup mode here since non-warmup mode will save image to disk.
+            images, pipeline_time = pipeline.run(
+                [prompt] * batch_size,
+                [negative_prompt] * batch_size,
+                height,
+                width,
+                denoising_steps=steps,
+                guidance=7.5,
+                seed=123,
+                warmup=True,
+            )
+            images = pipeline.to_pil_image(
+                images
+            )  # include image conversion time to pil image for apple-to-apple compare
+            inference_end = time.time()
+            latency = inference_end - inference_start
+            latency_list.append(latency)
+            print(f"End2End took {latency:.3f} seconds. Inference latency: {pipeline_time:.1f} ms")
+            for k, image in enumerate(images):
+                image.save(f"{image_filename_prefix}_{i}_{j}_{k}.jpg")
+
+    pipeline.teardown()
+
+    from tensorrt import __version__ as trt_version
+
+    from onnxruntime import __version__ as ort_version
+
+    return {
+        "model_name": pipeline_info.name(),
+        "engine": "onnxruntime",
+        "version": ort_version,
+        "provider": f"tensorrt({trt_version})",
+        "directory": engine_dir,
+        "height": height,
+        "width": width,
+        "steps": steps,
+        "batch_size": batch_size,
+        "batch_count": batch_count,
+        "num_prompts": num_prompts,
+        "average_latency": sum(latency_list) / len(latency_list),
+        "median_latency": statistics.median(latency_list),
+        "first_run_memory_MB": first_run_memory,
+        "second_run_memory_MB": second_run_memory,
+        "disable_safety_checker": disable_safety_checker,
+        "enable_cuda_graph": use_cuda_graph,
+    }
+
+
+def run_tensorrt_static(
+    work_dir: str,
+    version: str,
     model_name: str,
     batch_size: int,
     disable_safety_checker: bool,
@@ -618,32 +645,79 @@ def run_tensorrt(
     start_memory,
     memory_monitor_type,
     max_batch_size: int,
+    nvtx_profile: bool = False,
+    use_cuda_graph: bool = True,
 ):
-    from diffusers import DDIMScheduler
-    from diffusers.pipelines.stable_diffusion import StableDiffusionPipeline
+    print("[I] Initializing TensorRT accelerated StableDiffusionXL txt2img pipeline (static input shape)")
+
+    from cuda import cudart
+
+    # Register TensorRT plugins
+    from trt_utilities import init_trt_plugins
+
+    init_trt_plugins()
 
     assert batch_size <= max_batch_size
 
-    scheduler = DDIMScheduler.from_pretrained(model_name, subfolder="scheduler")
-    pipe = StableDiffusionPipeline.from_pretrained(
-        model_name,
-        custom_pipeline="stable_diffusion_tensorrt_txt2img",
-        revision="fp16",
-        torch_dtype=torch.float16,
-        scheduler=scheduler,
-        requires_safety_checker=not disable_safety_checker,
-        image_height=height,
-        image_width=width,
+    from diffusion_models import PipelineInfo
+
+    pipeline_info = PipelineInfo(version)
+
+    from engine_builder import EngineType, get_engine_paths
+    from pipeline_txt2img import Txt2ImgPipeline
+
+    engine_type = EngineType.TRT
+    onnx_dir, engine_dir, output_dir, framework_model_dir, timing_cache = get_engine_paths(
+        work_dir, pipeline_info, engine_type
+    )
+
+    # Initialize pipeline
+    pipeline = Txt2ImgPipeline(
+        pipeline_info,
+        scheduler="DDIM",
+        output_dir=output_dir,
+        hf_token=None,
+        verbose=False,
+        nvtx_profile=nvtx_profile,
         max_batch_size=max_batch_size,
+        use_cuda_graph=True,
+        engine_type=engine_type,
     )
 
-    # re-use cached folder to save ONNX models and TensorRT Engines
-    pipe.set_cached_folder(model_name, revision="fp16")
+    # Load TensorRT engines and pytorch modules
+    pipeline.backend.load_engines(
+        engine_dir=engine_dir,
+        framework_model_dir=framework_model_dir,
+        onnx_dir=onnx_dir,
+        onnx_opset=17,
+        opt_batch_size=batch_size,
+        opt_image_height=height,
+        opt_image_width=width,
+        force_export=False,
+        force_optimize=False,
+        force_build=False,
+        static_batch=True,
+        static_shape=True,
+        enable_refit=False,
+        enable_preview=False,
+        enable_all_tactics=False,
+        timing_cache=timing_cache,
+        onnx_refit_dir=None,
+    )
 
-    pipe = pipe.to("cuda")
+    # activate engines
+    max_device_memory = max(pipeline.backend.max_device_memory(), pipeline.backend.max_device_memory())
+    _, shared_device_memory = cudart.cudaMalloc(max_device_memory)
+    pipeline.backend.activate_engines(shared_device_memory)
+
+    # Here we use static batch and image size, so the resource allocation only need done once.
+    # For dynamic batch and image size, some cost (like memory allocation) shall be included in latency.
+    pipeline.load_resources(height, width, batch_size)
 
     def warmup():
-        pipe(["warm up"] * batch_size, num_inference_steps=steps)
+        pipeline.run(
+            ["warm up"] * batch_size, ["negative"] * batch_size, height, width, denoising_steps=steps, warmup=True
+        )
 
     # Run warm up, and measure GPU memory of two runs
     # The first run has algo search so it might need more memory
@@ -655,28 +729,225 @@ def warmup():
     image_filename_prefix = get_image_filename_prefix("trt", model_name, batch_size, disable_safety_checker)
 
     latency_list = []
-    prompts = example_prompts()
+    prompts, negative_prompt = example_prompts()
     for i, prompt in enumerate(prompts):
         if i >= num_prompts:
             break
         for j in range(batch_count):
             inference_start = time.time()
-            images = pipe(
+            # Use warmup mode here since non-warmup mode will save image to disk.
+            images, pipeline_time = pipeline.run(
                 [prompt] * batch_size,
-                num_inference_steps=steps,
-            ).images
+                [negative_prompt] * batch_size,
+                height,
+                width,
+                denoising_steps=steps,
+                guidance=7.5,
+                seed=123,
+                warmup=True,
+            )
+            images = pipeline.to_pil_image(
+                images
+            )  # include image conversion time to pil image for apple-to-apple compare
             inference_end = time.time()
             latency = inference_end - inference_start
             latency_list.append(latency)
-            print(f"Inference took {latency:.3f} seconds")
+            print(f"End2End took {latency:.3f} seconds. Inference latency: {pipeline_time:.1f} ms")
             for k, image in enumerate(images):
                 image.save(f"{image_filename_prefix}_{i}_{j}_{k}.jpg")
 
-    from tensorrt import __version__ as trt_version
+    pipeline.teardown()
+
+    import tensorrt as trt
+
+    return {
+        "engine": "tensorrt",
+        "version": trt.__version__,
+        "provider": "default",
+        "height": height,
+        "width": width,
+        "steps": steps,
+        "batch_size": batch_size,
+        "batch_count": batch_count,
+        "num_prompts": num_prompts,
+        "average_latency": sum(latency_list) / len(latency_list),
+        "median_latency": statistics.median(latency_list),
+        "first_run_memory_MB": first_run_memory,
+        "second_run_memory_MB": second_run_memory,
+        "enable_cuda_graph": use_cuda_graph,
+    }
+
+
+def run_tensorrt_static_xl(
+    work_dir: str,
+    version: str,
+    batch_size: int,
+    disable_safety_checker: bool,
+    height: int,
+    width: int,
+    steps: int,
+    num_prompts: int,
+    batch_count: int,
+    start_memory,
+    memory_monitor_type,
+    max_batch_size: int,
+    nvtx_profile: bool = False,
+    use_cuda_graph=True,
+):
+    print("[I] Initializing TensorRT accelerated StableDiffusionXL txt2img pipeline (static input shape)")
+
+    import tensorrt as trt
+    from cuda import cudart
+    from trt_utilities import init_trt_plugins
+
+    # Validate image dimensions
+    image_height = height
+    image_width = width
+    if image_height % 8 != 0 or image_width % 8 != 0:
+        raise ValueError(
+            f"Image height and width have to be divisible by 8 but specified as: {image_height} and {image_width}."
+        )
+
+    # Register TensorRT plugins
+    init_trt_plugins()
+
+    assert batch_size <= max_batch_size
+
+    from diffusion_models import PipelineInfo
+    from engine_builder import EngineType, get_engine_paths
+
+    def init_pipeline(pipeline_class, pipeline_info):
+        engine_type = EngineType.TRT
+
+        onnx_dir, engine_dir, output_dir, framework_model_dir, timing_cache = get_engine_paths(
+            work_dir, pipeline_info, engine_type
+        )
+
+        # Initialize pipeline
+        pipeline = pipeline_class(
+            pipeline_info,
+            scheduler="DDIM",
+            output_dir=output_dir,
+            hf_token=None,
+            verbose=False,
+            nvtx_profile=nvtx_profile,
+            max_batch_size=max_batch_size,
+            use_cuda_graph=use_cuda_graph,
+            framework_model_dir=framework_model_dir,
+            engine_type=engine_type,
+        )
+
+        pipeline.backend.load_engines(
+            engine_dir=engine_dir,
+            framework_model_dir=framework_model_dir,
+            onnx_dir=onnx_dir,
+            onnx_opset=17,
+            opt_batch_size=batch_size,
+            opt_image_height=height,
+            opt_image_width=width,
+            force_export=False,
+            force_optimize=False,
+            force_build=False,
+            static_batch=True,
+            static_shape=True,
+            enable_refit=False,
+            enable_preview=False,
+            enable_all_tactics=False,
+            timing_cache=timing_cache,
+            onnx_refit_dir=None,
+        )
+        return pipeline
+
+    from pipeline_img2img_xl import Img2ImgXLPipeline
+    from pipeline_txt2img_xl import Txt2ImgXLPipeline
+
+    base_pipeline_info = PipelineInfo(version)
+    demo_base = init_pipeline(Txt2ImgXLPipeline, base_pipeline_info)
+
+    refiner_pipeline_info = PipelineInfo(version, is_refiner=True)
+    demo_refiner = init_pipeline(Img2ImgXLPipeline, refiner_pipeline_info)
+
+    max_device_memory = max(demo_base.backend.max_device_memory(), demo_refiner.backend.max_device_memory())
+    _, shared_device_memory = cudart.cudaMalloc(max_device_memory)
+    demo_base.backend.activate_engines(shared_device_memory)
+    demo_refiner.backend.activate_engines(shared_device_memory)
+
+    # Here we use static batch and image size, so the resource allocation only need done once.
+    # For dynamic batch and image size, some cost (like memory allocation) shall be included in latency.
+    demo_base.load_resources(image_height, image_width, batch_size)
+    demo_refiner.load_resources(image_height, image_width, batch_size)
+
+    def run_sd_xl_inference(prompt, negative_prompt, seed=None, warmup=False):
+        images, time_base = demo_base.run(
+            prompt,
+            negative_prompt,
+            image_height,
+            image_width,
+            denoising_steps=steps,
+            guidance=5.0,
+            warmup=warmup,
+            seed=seed,
+            return_type="latent",
+        )
+
+        images, time_refiner = demo_refiner.run(
+            prompt,
+            negative_prompt,
+            images,
+            image_height,
+            image_width,
+            denoising_steps=steps,
+            guidance=5.0,
+            warmup=warmup,
+            seed=seed,
+        )
+        return images, time_base + time_refiner
+
+    def warmup():
+        run_sd_xl_inference(["warm up"] * batch_size, ["negative"] * batch_size, warmup=True)
+
+    # Run warm up, and measure GPU memory of two runs
+    # The first run has algo search so it might need more memory
+    first_run_memory = measure_gpu_memory(memory_monitor_type, warmup, start_memory)
+    second_run_memory = measure_gpu_memory(memory_monitor_type, warmup, start_memory)
+
+    warmup()
+
+    model_name = refiner_pipeline_info.name()
+    image_filename_prefix = get_image_filename_prefix("trt", model_name, batch_size, disable_safety_checker)
+
+    latency_list = []
+    prompts, negative_prompt = example_prompts()
+    for i, prompt in enumerate(prompts):
+        if i >= num_prompts:
+            break
+        for j in range(batch_count):
+            inference_start = time.time()
+            # Use warmup mode here since non-warmup mode will save image to disk.
+            if nvtx_profile:
+                cudart.cudaProfilerStart()
+            images, pipeline_time = run_sd_xl_inference(
+                [prompt] * batch_size, [negative_prompt] * batch_size, seed=123, warmup=True
+            )
+            if nvtx_profile:
+                cudart.cudaProfilerStop()
+            images = demo_refiner.to_pil_image(
+                images
+            )  # include image conversion time to pil image for apple-to-apple compare
+            inference_end = time.time()
+            latency = inference_end - inference_start
+            latency_list.append(latency)
+            print(f"End2End took {latency:.3f} seconds. Inference latency: {pipeline_time:.1f} ms")
+            for k, image in enumerate(images):
+                image.save(f"{image_filename_prefix}_{i}_{j}_{k}.png")
+
+    demo_base.teardown()
+    demo_refiner.teardown()
 
     return {
+        "model_name": model_name,
         "engine": "tensorrt",
-        "version": trt_version,
+        "version": trt.__version__,
         "provider": "default",
         "height": height,
         "width": width,
@@ -688,7 +959,178 @@ def warmup():
         "median_latency": statistics.median(latency_list),
         "first_run_memory_MB": first_run_memory,
         "second_run_memory_MB": second_run_memory,
-        "enable_cuda_graph": False,
+        "enable_cuda_graph": use_cuda_graph,
+    }
+
+
+def run_ort_trt_xl(
+    work_dir: str,
+    version: str,
+    batch_size: int,
+    disable_safety_checker: bool,
+    height: int,
+    width: int,
+    steps: int,
+    num_prompts: int,
+    batch_count: int,
+    start_memory,
+    memory_monitor_type,
+    max_batch_size: int,
+    nvtx_profile: bool = False,
+    use_cuda_graph=True,
+):
+    from cuda import cudart
+
+    # Validate image dimensions
+    image_height = height
+    image_width = width
+    if image_height % 8 != 0 or image_width % 8 != 0:
+        raise ValueError(
+            f"Image height and width have to be divisible by 8 but specified as: {image_height} and {image_width}."
+        )
+
+    assert batch_size <= max_batch_size
+
+    from engine_builder import EngineType, get_engine_paths
+
+    def init_pipeline(pipeline_class, pipeline_info):
+        engine_type = EngineType.ORT_TRT
+
+        onnx_dir, engine_dir, output_dir, framework_model_dir, _ = get_engine_paths(
+            work_dir, pipeline_info, engine_type
+        )
+
+        # Initialize pipeline
+        pipeline = pipeline_class(
+            pipeline_info,
+            scheduler="DDIM",
+            output_dir=output_dir,
+            hf_token=None,
+            verbose=False,
+            nvtx_profile=nvtx_profile,
+            max_batch_size=max_batch_size,
+            use_cuda_graph=use_cuda_graph,
+            framework_model_dir=framework_model_dir,
+            engine_type=engine_type,
+        )
+
+        pipeline.backend.build_engines(
+            engine_dir,
+            framework_model_dir,
+            onnx_dir,
+            17,
+            opt_image_height=height,
+            opt_image_width=width,
+            opt_batch_size=batch_size,
+            force_engine_rebuild=False,
+            static_batch=True,
+            static_image_shape=True,
+            max_workspace_size=0,
+            device_id=torch.cuda.current_device(),  # TODO: might not work with CUDA_VISIBLE_DEVICES
+        )
+        return pipeline
+
+    from diffusion_models import PipelineInfo
+    from pipeline_img2img_xl import Img2ImgXLPipeline
+    from pipeline_txt2img_xl import Txt2ImgXLPipeline
+
+    base_pipeline_info = PipelineInfo(version)
+    demo_base = init_pipeline(Txt2ImgXLPipeline, base_pipeline_info)
+
+    refiner_pipeline_info = PipelineInfo(version, is_refiner=True)
+    demo_refiner = init_pipeline(Img2ImgXLPipeline, refiner_pipeline_info)
+
+    demo_base.load_resources(image_height, image_width, batch_size)
+    demo_refiner.load_resources(image_height, image_width, batch_size)
+
+    def run_sd_xl_inference(prompt, negative_prompt, seed=None, warmup=False):
+        images, time_base = demo_base.run(
+            prompt,
+            negative_prompt,
+            image_height,
+            image_width,
+            denoising_steps=steps,
+            guidance=5.0,
+            warmup=warmup,
+            seed=seed,
+            return_type="latent",
+        )
+        images, time_refiner = demo_refiner.run(
+            prompt,
+            negative_prompt,
+            images,
+            image_height,
+            image_width,
+            denoising_steps=steps,
+            guidance=5.0,
+            warmup=warmup,
+            seed=seed,
+        )
+        return images, time_base + time_refiner
+
+    def warmup():
+        run_sd_xl_inference(["warm up"] * batch_size, ["negative"] * batch_size, warmup=True)
+
+    # Run warm up, and measure GPU memory of two runs
+    # The first run has algo search so it might need more memory
+    first_run_memory = measure_gpu_memory(memory_monitor_type, warmup, start_memory)
+    second_run_memory = measure_gpu_memory(memory_monitor_type, warmup, start_memory)
+
+    warmup()
+
+    model_name = refiner_pipeline_info.name()
+    image_filename_prefix = get_image_filename_prefix("ort_trt", model_name, batch_size, disable_safety_checker)
+
+    latency_list = []
+    prompts, negative_prompt = example_prompts()
+    for i, prompt in enumerate(prompts):
+        if i >= num_prompts:
+            break
+        for j in range(batch_count):
+            inference_start = time.time()
+            # Use warmup mode here since non-warmup mode will save image to disk.
+            if nvtx_profile:
+                cudart.cudaProfilerStart()
+            images, pipeline_time = run_sd_xl_inference(
+                [prompt] * batch_size, [negative_prompt] * batch_size, seed=123, warmup=True
+            )
+            if nvtx_profile:
+                cudart.cudaProfilerStop()
+            images = demo_refiner.to_pil_image(
+                images
+            )  # include image conversion time to pil image for apple-to-apple compare
+            inference_end = time.time()
+            latency = inference_end - inference_start
+            latency_list.append(latency)
+            print(f"End2End took {latency:.3f} seconds. Inference latency: {pipeline_time:.1f} ms")
+            for k, image in enumerate(images):
+                filename = f"{image_filename_prefix}_{i}_{j}_{k}.png"
+                image.save(filename)
+                print("Image saved to", filename)
+
+    demo_base.teardown()
+    demo_refiner.teardown()
+
+    from tensorrt import __version__ as trt_version
+
+    from onnxruntime import __version__ as ort_version
+
+    return {
+        "model_name": model_name,
+        "engine": "onnxruntime",
+        "version": ort_version,
+        "provider": f"tensorrt{trt_version})",
+        "height": height,
+        "width": width,
+        "steps": steps,
+        "batch_size": batch_size,
+        "batch_count": batch_count,
+        "num_prompts": num_prompts,
+        "average_latency": sum(latency_list) / len(latency_list),
+        "median_latency": statistics.median(latency_list),
+        "first_run_memory_MB": first_run_memory,
+        "second_run_memory_MB": second_run_memory,
+        "enable_cuda_graph": use_cuda_graph,
     }
 
 
@@ -808,6 +1250,15 @@ def parse_arguments():
         help="Directory of saved onnx pipeline. It could be the output directory of optimize_pipeline.py.",
     )
 
+    parser.add_argument(
+        "-w",
+        "--work_dir",
+        required=False,
+        type=str,
+        default=".",
+        help="Root directory to save exported onnx models, built engines etc.",
+    )
+
     parser.add_argument(
         "--enable_safety_checker",
         required=False,
@@ -922,28 +1373,31 @@ def main():
     args = parse_arguments()
     print(args)
 
-    if args.enable_cuda_graph:
-        if not (args.engine == "onnxruntime" and args.provider in ["cuda", "tensorrt"] and args.pipeline is None):
-            raise ValueError("The stable diffusion pipeline does not support CUDA graph.")
+    if args.engine == "onnxruntime":
+        if args.version in ["2.1"]:
+            # Set a flag to avoid overflow in attention, which causes black image output in SD 2.1 model.
+            # The environment variables shall be set before the first run of Attention or MultiHeadAttention operator.
+            os.environ["ORT_DISABLE_TRT_FLASH_ATTENTION"] = "1"
 
         from packaging import version
 
         from onnxruntime import __version__ as ort_version
 
-        if version.parse(ort_version) < version.parse("1.16"):
-            raise ValueError(
-                "CUDA graph requires ONNX Runtime 1.16. You can install nightly like the following:\n"
-                " pip uninstall onnxruntime-gpu\n"
-                " pip install ort-nightly-gpu -i https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple/"
-            )
+        if version.parse(ort_version) == version.parse("1.16.0"):
+            # ORT 1.16 has a bug that might trigger Attention RuntimeError when latest fusion script is applied on clip model.
+            # The walkaround is to enable fused causal attention, or disable Attention fusion for clip model.
+            os.environ["ORT_ENABLE_FUSED_CAUSAL_ATTENTION"] = "1"
+
+        if args.enable_cuda_graph:
+            if not (args.engine == "onnxruntime" and args.provider in ["cuda", "tensorrt"] and args.pipeline is None):
+                raise ValueError("The stable diffusion pipeline does not support CUDA graph.")
+
+            if version.parse(ort_version) < version.parse("1.16"):
+                raise ValueError("CUDA graph requires ONNX Runtime 1.16 or later")
 
     coloredlogs.install(fmt="%(funcName)20s: %(message)s")
 
-    memory_monitor_type = None
-    if args.provider in ["cuda", "tensorrt"]:
-        memory_monitor_type = CudaMemoryMonitor
-    elif args.provider == "rocm":
-        memory_monitor_type = RocmMemoryMonitor
+    memory_monitor_type = "rocm" if args.provider == "rocm" else "cuda"
 
     start_memory = measure_gpu_memory(memory_monitor_type, None)
     print("GPU memory used before loading models:", start_memory)
@@ -951,89 +1405,157 @@ def main():
     sd_model = SD_MODELS[args.version]
     provider = PROVIDERS[args.provider]
     if args.engine == "onnxruntime" and args.provider == "tensorrt":
-        result = run_ort_trt(
-            sd_model,
-            args.batch_size,
-            not args.enable_safety_checker,
-            args.height,
-            args.width,
-            args.steps,
-            args.num_prompts,
-            args.batch_count,
-            start_memory,
-            memory_monitor_type,
-            args.max_trt_batch_size,
-            args.enable_cuda_graph,
-        )
+        if "xl" in args.version:
+            print("Testing Txt2ImgXLPipeline with static input shape. Backend is ORT TensorRT EP.")
+            result = run_ort_trt_xl(
+                work_dir=args.work_dir,
+                version=args.version,
+                batch_size=args.batch_size,
+                disable_safety_checker=True,
+                height=args.height,
+                width=args.width,
+                steps=args.steps,
+                num_prompts=args.num_prompts,
+                batch_count=args.batch_count,
+                start_memory=start_memory,
+                memory_monitor_type=memory_monitor_type,
+                max_batch_size=args.max_trt_batch_size,
+                nvtx_profile=False,
+                use_cuda_graph=args.enable_cuda_graph,
+            )
+        elif args.tuning:
+            print(
+                "Testing OnnxruntimeTensorRTStableDiffusionPipeline with {}.".format(
+                    "static input shape" if args.enable_cuda_graph else "dynamic batch size"
+                )
+            )
+            result = run_ort_trt(
+                version=args.version,
+                batch_size=args.batch_size,
+                disable_safety_checker=not args.enable_safety_checker,
+                height=args.height,
+                width=args.width,
+                steps=args.steps,
+                num_prompts=args.num_prompts,
+                batch_count=args.batch_count,
+                start_memory=start_memory,
+                memory_monitor_type=memory_monitor_type,
+                max_batch_size=args.max_trt_batch_size,
+                enable_cuda_graph=args.enable_cuda_graph,
+            )
+        else:
+            print("Testing Txt2ImgPipeline with static input shape. Backend is ORT TensorRT EP.")
+            result = run_ort_trt_static(
+                work_dir=args.work_dir,
+                version=args.version,
+                batch_size=args.batch_size,
+                disable_safety_checker=not args.enable_safety_checker,
+                height=args.height,
+                width=args.width,
+                steps=args.steps,
+                num_prompts=args.num_prompts,
+                batch_count=args.batch_count,
+                start_memory=start_memory,
+                memory_monitor_type=memory_monitor_type,
+                max_batch_size=args.max_trt_batch_size,
+                nvtx_profile=False,
+                use_cuda_graph=args.enable_cuda_graph,
+            )
+
     elif args.engine == "onnxruntime" and provider == "CUDAExecutionProvider" and args.pipeline is None:
-        print("Pipeline is not specified. Trying export and optimize onnx models...")
+        print(
+            "Testing OnnxruntimeCudaStableDiffusionPipeline with {} input shape. Backend is ORT CUDA EP.".format(
+                "static" if args.enable_cuda_graph else "dynamic"
+            )
+        )
         result = export_and_run_ort(
-            sd_model,
-            provider,
-            args.batch_size,
-            not args.enable_safety_checker,
-            args.height,
-            args.width,
-            args.steps,
-            args.num_prompts,
-            args.batch_count,
-            start_memory,
-            memory_monitor_type,
-            args.enable_cuda_graph,
+            version=args.version,
+            provider=provider,
+            batch_size=args.batch_size,
+            disable_safety_checker=not args.enable_safety_checker,
+            height=args.height,
+            width=args.width,
+            steps=args.steps,
+            num_prompts=args.num_prompts,
+            batch_count=args.batch_count,
+            start_memory=start_memory,
+            memory_monitor_type=memory_monitor_type,
+            enable_cuda_graph=args.enable_cuda_graph,
         )
     elif args.engine == "onnxruntime":
         assert args.pipeline and os.path.isdir(
             args.pipeline
         ), "--pipeline should be specified for the directory of ONNX models"
-
-        if args.version in ["2.1"]:
-            # Set a flag to avoid overflow in attention, which causes black image output in SD 2.1 model
-            # This shall be done before the first inference run.
-            os.environ["ORT_DISABLE_TRT_FLASH_ATTENTION"] = "1"
-
+        print(f"Testing diffusers StableDiffusionPipeline with {provider} provider and tuning={args.tuning}")
         result = run_ort(
-            sd_model,
-            args.pipeline,
-            provider,
-            args.batch_size,
-            not args.enable_safety_checker,
-            args.height,
-            args.width,
-            args.steps,
-            args.num_prompts,
-            args.batch_count,
-            start_memory,
-            memory_monitor_type,
-            args.tuning,
+            model_name=sd_model,
+            directory=args.pipeline,
+            provider=provider,
+            batch_size=args.batch_size,
+            disable_safety_checker=not args.enable_safety_checker,
+            height=args.height,
+            width=args.width,
+            steps=args.steps,
+            num_prompts=args.num_prompts,
+            batch_count=args.batch_count,
+            start_memory=start_memory,
+            memory_monitor_type=memory_monitor_type,
+            tuning=args.tuning,
+        )
+    elif args.engine == "tensorrt" and "xl" in args.version:
+        print("Testing Txt2ImgXLPipeline with static input shape. Backend is TensorRT.")
+        result = run_tensorrt_static_xl(
+            work_dir=args.work_dir,
+            version=args.version,
+            batch_size=args.batch_size,
+            disable_safety_checker=True,
+            height=args.height,
+            width=args.width,
+            steps=args.steps,
+            num_prompts=args.num_prompts,
+            batch_count=args.batch_count,
+            start_memory=start_memory,
+            memory_monitor_type=memory_monitor_type,
+            max_batch_size=args.max_trt_batch_size,
+            nvtx_profile=False,
+            use_cuda_graph=args.enable_cuda_graph,
         )
     elif args.engine == "tensorrt":
-        result = run_tensorrt(
-            sd_model,
-            args.batch_size,
-            not args.enable_safety_checker,
-            args.height,
-            args.width,
-            args.steps,
-            args.num_prompts,
-            args.batch_count,
-            start_memory,
-            memory_monitor_type,
-            args.max_trt_batch_size,
+        print("Testing Txt2ImgPipeline with static input shape. Backend is TensorRT.")
+        result = run_tensorrt_static(
+            work_dir=args.work_dir,
+            version=args.version,
+            model_name=sd_model,
+            batch_size=args.batch_size,
+            disable_safety_checker=True,
+            height=args.height,
+            width=args.width,
+            steps=args.steps,
+            num_prompts=args.num_prompts,
+            batch_count=args.batch_count,
+            start_memory=start_memory,
+            memory_monitor_type=memory_monitor_type,
+            max_batch_size=args.max_trt_batch_size,
+            nvtx_profile=False,
+            use_cuda_graph=args.enable_cuda_graph,
         )
     else:
+        print(
+            f"Testing Txt2ImgPipeline with dynamic input shape. Backend is PyTorch: compile={args.enable_torch_compile}, xformers={args.use_xformers}."
+        )
         result = run_torch(
-            sd_model,
-            args.batch_size,
-            not args.enable_safety_checker,
-            args.enable_torch_compile,
-            args.use_xformers,
-            args.height,
-            args.width,
-            args.steps,
-            args.num_prompts,
-            args.batch_count,
-            start_memory,
-            memory_monitor_type,
+            model_name=sd_model,
+            batch_size=args.batch_size,
+            disable_safety_checker=not args.enable_safety_checker,
+            enable_torch_compile=args.enable_torch_compile,
+            use_xformers=args.use_xformers,
+            height=args.height,
+            width=args.width,
+            steps=args.steps,
+            num_prompts=args.num_prompts,
+            batch_count=args.batch_count,
+            start_memory=start_memory,
+            memory_monitor_type=memory_monitor_type,
         )
 
     print(result)
@@ -1068,8 +1590,9 @@ def main():
 
 
 if __name__ == "__main__":
+    import traceback
+
     try:
         main()
-    except Exception as e:
-        tb = sys.exc_info()
-        print(e.with_traceback(tb[2]))
+    except Exception:
+        traceback.print_exception(*sys.exc_info())
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/demo_txt2img.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/demo_txt2img.py
new file mode 100644
index 0000000000000..c18747d5c6518
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/demo_txt2img.py
@@ -0,0 +1,150 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+# Modified from TensorRT demo diffusion, which has the following license:
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# --------------------------------------------------------------------------
+
+import coloredlogs
+from cuda import cudart
+from demo_utils import (
+    add_controlnet_arguments,
+    arg_parser,
+    get_metadata,
+    init_pipeline,
+    max_batch,
+    parse_arguments,
+    process_controlnet_arguments,
+    repeat_prompt,
+)
+from diffusion_models import PipelineInfo
+from engine_builder import EngineType, get_engine_type
+from pipeline_txt2img import Txt2ImgPipeline
+
+if __name__ == "__main__":
+    coloredlogs.install(fmt="%(funcName)20s: %(message)s")
+
+    parser = arg_parser("Options for Stable Diffusion Demo")
+    add_controlnet_arguments(parser)
+    args = parse_arguments(is_xl=False, parser=parser)
+
+    controlnet_images, controlnet_scale = process_controlnet_arguments(args)
+
+    prompt, negative_prompt = repeat_prompt(args)
+
+    image_height = args.height
+    image_width = args.width
+
+    # Register TensorRT plugins
+    engine_type = get_engine_type(args.engine)
+    if engine_type == EngineType.TRT:
+        from trt_utilities import init_trt_plugins
+
+        init_trt_plugins()
+
+    max_batch_size = max_batch(args)
+
+    batch_size = len(prompt)
+    if batch_size > max_batch_size:
+        raise ValueError(
+            f"Batch size {len(prompt)} is larger than allowed {max_batch_size}. If dynamic shape is used, then maximum batch size is 4"
+        )
+
+    # For TensorRT,  performance of engine built with dynamic shape is very sensitive to the range of image size.
+    # Here, we reduce the range of image size for TensorRT to trade-off flexibility and performance.
+    # This range can cover common used shape of landscape 512x768, portrait 768x512, or square 512x512 and 768x768.
+    min_image_size = 512 if args.engine != "ORT_CUDA" else 256
+    max_image_size = 768 if args.engine != "ORT_CUDA" else 1024
+    pipeline_info = PipelineInfo(
+        args.version,
+        min_image_size=min_image_size,
+        max_image_size=max_image_size,
+        do_classifier_free_guidance=(args.guidance > 1.0),
+        controlnet=args.controlnet_type,
+        lora_weights=args.lora_weights,
+        lora_scale=args.lora_scale,
+    )
+
+    # Ideally, the optimized batch size and image size for TRT engine shall align with user's preference. That is to
+    # optimize the shape used most frequently. We can let user config it when we develop a UI plugin.
+    # In this demo, we optimize batch size 1 and image size 512x512 (or 768x768 for SD 2.0/2.1) for dynamic engine.
+    # This is mainly for benchmark purpose to simulate the case that we have no knowledge of user's preference.
+    opt_batch_size = 1 if args.build_dynamic_batch else batch_size
+    opt_image_height = pipeline_info.default_image_size() if args.build_dynamic_shape else args.height
+    opt_image_width = pipeline_info.default_image_size() if args.build_dynamic_shape else args.width
+
+    pipeline = init_pipeline(
+        Txt2ImgPipeline,
+        pipeline_info,
+        engine_type,
+        args,
+        max_batch_size,
+        opt_batch_size,
+        opt_image_height,
+        opt_image_width,
+    )
+
+    if engine_type == EngineType.TRT:
+        max_device_memory = max(pipeline.backend.max_device_memory(), pipeline.backend.max_device_memory())
+        _, shared_device_memory = cudart.cudaMalloc(max_device_memory)
+        pipeline.backend.activate_engines(shared_device_memory)
+
+    if engine_type == EngineType.ORT_CUDA and args.enable_vae_slicing:
+        pipeline.backend.enable_vae_slicing()
+
+    pipeline.load_resources(image_height, image_width, batch_size)
+
+    def run_inference(warmup=False):
+        return pipeline.run(
+            prompt,
+            negative_prompt,
+            image_height,
+            image_width,
+            warmup=warmup,
+            denoising_steps=args.denoising_steps,
+            guidance=args.guidance,
+            seed=args.seed,
+            controlnet_images=controlnet_images,
+            controlnet_scales=controlnet_scale,
+            return_type="image",
+        )
+
+    if not args.disable_cuda_graph:
+        # inference once to get cuda graph
+        _, _ = run_inference(warmup=True)
+
+    print("[I] Warming up ..")
+    for _ in range(args.num_warmup_runs):
+        _, _ = run_inference(warmup=True)
+
+    print("[I] Running StableDiffusion pipeline")
+    if args.nvtx_profile:
+        cudart.cudaProfilerStart()
+    images, perf_data = run_inference(warmup=False)
+    if args.nvtx_profile:
+        cudart.cudaProfilerStop()
+
+    metadata = get_metadata(args, False)
+    metadata.update(pipeline.metadata())
+    if perf_data:
+        metadata.update(perf_data)
+    metadata["images"] = len(images)
+    print(metadata)
+    pipeline.save_images(images, prompt, negative_prompt, metadata)
+
+    pipeline.teardown()
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/demo_txt2img_xl.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/demo_txt2img_xl.py
new file mode 100644
index 0000000000000..646e3518fa053
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/demo_txt2img_xl.py
@@ -0,0 +1,318 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+# Modified from TensorRT demo diffusion, which has the following license:
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# --------------------------------------------------------------------------
+
+import coloredlogs
+from cuda import cudart
+from demo_utils import (
+    add_controlnet_arguments,
+    arg_parser,
+    get_metadata,
+    init_pipeline,
+    max_batch,
+    parse_arguments,
+    process_controlnet_arguments,
+    repeat_prompt,
+)
+from diffusion_models import PipelineInfo
+from engine_builder import EngineType, get_engine_type
+from pipeline_img2img_xl import Img2ImgXLPipeline
+from pipeline_txt2img_xl import Txt2ImgXLPipeline
+
+
+def load_pipelines(args, batch_size):
+    # Register TensorRT plugins
+    engine_type = get_engine_type(args.engine)
+    if engine_type == EngineType.TRT:
+        from trt_utilities import init_trt_plugins
+
+        init_trt_plugins()
+
+    max_batch_size = max_batch(args)
+
+    if batch_size > max_batch_size:
+        raise ValueError(f"Batch size {batch_size} is larger than allowed {max_batch_size}.")
+
+    # For TensorRT,  performance of engine built with dynamic shape is very sensitive to the range of image size.
+    # Here, we reduce the range of image size for TensorRT to trade-off flexibility and performance.
+    # This range can cover most frequent shape of landscape (832x1216), portrait (1216x832) or square (1024x1024).
+    min_image_size = 832 if args.engine != "ORT_CUDA" else 512
+    max_image_size = 1216 if args.engine != "ORT_CUDA" else 2048
+
+    # No VAE decoder in base when it outputs latent instead of image.
+    base_info = PipelineInfo(
+        args.version,
+        use_vae=args.disable_refiner,
+        min_image_size=min_image_size,
+        max_image_size=max_image_size,
+        use_lcm=args.lcm,
+        do_classifier_free_guidance=(args.guidance > 1.0),
+        controlnet=args.controlnet_type,
+        lora_weights=args.lora_weights,
+        lora_scale=args.lora_scale,
+    )
+
+    # Ideally, the optimized batch size and image size for TRT engine shall align with user's preference. That is to
+    # optimize the shape used most frequently. We can let user config it when we develop a UI plugin.
+    # In this demo, we optimize batch size 1 and image size 1024x1024 for SD XL dynamic engine.
+    # This is mainly for benchmark purpose to simulate the case that we have no knowledge of user's preference.
+    opt_batch_size = 1 if args.build_dynamic_batch else batch_size
+    opt_image_height = base_info.default_image_size() if args.build_dynamic_shape else args.height
+    opt_image_width = base_info.default_image_size() if args.build_dynamic_shape else args.width
+
+    base = init_pipeline(
+        Txt2ImgXLPipeline,
+        base_info,
+        engine_type,
+        args,
+        max_batch_size,
+        opt_batch_size,
+        opt_image_height,
+        opt_image_width,
+    )
+
+    refiner = None
+    if not args.disable_refiner:
+        refiner_info = PipelineInfo(
+            args.version, is_refiner=True, min_image_size=min_image_size, max_image_size=max_image_size
+        )
+        refiner = init_pipeline(
+            Img2ImgXLPipeline,
+            refiner_info,
+            engine_type,
+            args,
+            max_batch_size,
+            opt_batch_size,
+            opt_image_height,
+            opt_image_width,
+        )
+
+    if engine_type == EngineType.TRT:
+        max_device_memory = max(base.backend.max_device_memory(), (refiner or base).backend.max_device_memory())
+        _, shared_device_memory = cudart.cudaMalloc(max_device_memory)
+        base.backend.activate_engines(shared_device_memory)
+        if refiner:
+            refiner.backend.activate_engines(shared_device_memory)
+
+    if engine_type == EngineType.ORT_CUDA:
+        enable_vae_slicing = args.enable_vae_slicing
+        if batch_size > 4 and not enable_vae_slicing:
+            print("Updating enable_vae_slicing to be True to avoid cuDNN error for batch size > 4.")
+            enable_vae_slicing = True
+        if enable_vae_slicing:
+            (refiner or base).backend.enable_vae_slicing()
+    return base, refiner
+
+
+def run_pipelines(
+    args, base, refiner, prompt, negative_prompt, controlnet_image=None, controlnet_scale=None, is_warm_up=False
+):
+    image_height = args.height
+    image_width = args.width
+    batch_size = len(prompt)
+    base.load_resources(image_height, image_width, batch_size)
+    if refiner:
+        refiner.load_resources(image_height, image_width, batch_size)
+
+    def run_base_and_refiner(warmup=False):
+        images, base_perf = base.run(
+            prompt,
+            negative_prompt,
+            image_height,
+            image_width,
+            warmup=warmup,
+            denoising_steps=args.denoising_steps,
+            guidance=args.guidance,
+            seed=args.seed,
+            controlnet_images=controlnet_image,
+            controlnet_scales=controlnet_scale,
+            return_type="latent" if refiner else "image",
+        )
+        if refiner is None:
+            return images, base_perf
+
+        # Use same seed in base and refiner.
+        seed = base.get_current_seed()
+
+        images, refiner_perf = refiner.run(
+            prompt,
+            negative_prompt,
+            images,
+            image_height,
+            image_width,
+            warmup=warmup,
+            denoising_steps=args.refiner_steps,
+            strength=args.strength,
+            guidance=args.refiner_guidance,
+            seed=seed,
+        )
+
+        perf_data = None
+        if base_perf and refiner_perf:
+            perf_data = {"latency": base_perf["latency"] + refiner_perf["latency"]}
+            perf_data.update({"base." + key: val for key, val in base_perf.items()})
+            perf_data.update({"refiner." + key: val for key, val in refiner_perf.items()})
+
+        return images, perf_data
+
+    if not args.disable_cuda_graph:
+        # inference once to get cuda graph
+        _, _ = run_base_and_refiner(warmup=True)
+
+    if args.num_warmup_runs > 0:
+        print("[I] Warming up ..")
+    for _ in range(args.num_warmup_runs):
+        _, _ = run_base_and_refiner(warmup=True)
+
+    if is_warm_up:
+        return
+
+    print("[I] Running StableDiffusion XL pipeline")
+    if args.nvtx_profile:
+        cudart.cudaProfilerStart()
+    images, perf_data = run_base_and_refiner(warmup=False)
+    if args.nvtx_profile:
+        cudart.cudaProfilerStop()
+
+    if refiner:
+        print("|----------------|--------------|")
+        print("| {:^14} | {:>9.2f} ms |".format("e2e", perf_data["latency"]))
+        print("|----------------|--------------|")
+
+    metadata = get_metadata(args, True)
+    metadata.update({"base." + key: val for key, val in base.metadata().items()})
+    if refiner:
+        metadata.update({"refiner." + key: val for key, val in refiner.metadata().items()})
+    if perf_data:
+        metadata.update(perf_data)
+    metadata["images"] = len(images)
+    print(metadata)
+    (refiner or base).save_images(images, prompt, negative_prompt, metadata)
+
+
+def run_demo(args):
+    """Run Stable Diffusion XL Base + Refiner together (known as ensemble of expert denoisers) to generate an image."""
+    controlnet_image, controlnet_scale = process_controlnet_arguments(args)
+    prompt, negative_prompt = repeat_prompt(args)
+    batch_size = len(prompt)
+    base, refiner = load_pipelines(args, batch_size)
+    run_pipelines(args, base, refiner, prompt, negative_prompt, controlnet_image, controlnet_scale)
+    base.teardown()
+    if refiner:
+        refiner.teardown()
+
+
+def run_dynamic_shape_demo(args):
+    """Run demo of generating images with different settings with ORT CUDA provider."""
+    args.engine = "ORT_CUDA"
+    args.disable_cuda_graph = True
+    if args.lcm:
+        args.disable_refiner = True
+    base, refiner = load_pipelines(args, 1)
+
+    prompts = [
+        "starry night over Golden Gate Bridge by van gogh",
+        "beautiful photograph of Mt. Fuji during cherry blossom",
+        "little cute gremlin sitting on a bed, cinematic",
+        "cute grey cat with blue eyes, wearing a bowtie, acrylic painting",
+        "beautiful Renaissance Revival Estate, Hobbit-House, detailed painting, warm colors, 8k, trending on Artstation",
+        "blue owl, big green eyes, portrait, intricate metal design, unreal engine, octane render, realistic",
+        "An astronaut riding a rainbow unicorn, cinematic, dramatic",
+        "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm",
+    ]
+
+    # refiner, batch size, height, width, scheduler, steps, prompt, seed, guidance, refiner scheduler, refiner steps, refiner strength
+    configs = [
+        (1, 832, 1216, "UniPC", 8, prompts[0], None, 5.0, "UniPC", 10, 0.3),
+        (1, 1024, 1024, "DDIM", 24, prompts[1], None, 5.0, "DDIM", 30, 0.3),
+        (1, 1216, 832, "UniPC", 16, prompts[2], None, 5.0, "UniPC", 10, 0.3),
+        (1, 1344, 768, "DDIM", 24, prompts[3], None, 5.0, "UniPC", 20, 0.3),
+        (2, 640, 1536, "UniPC", 16, prompts[4], 4312973633252712, 5.0, "UniPC", 10, 0.3),
+        (2, 1152, 896, "DDIM", 24, prompts[5], 1964684802882906, 5.0, "UniPC", 20, 0.3),
+    ]
+
+    # In testing LCM, refiner is disabled so the settings of refiner is not used.
+    if args.lcm:
+        configs = [
+            (1, 1024, 1024, "LCM", 8, prompts[6], None, 1.0, "UniPC", 20, 0.3),
+            (1, 1216, 832, "LCM", 6, prompts[7], 1337, 1.0, "UniPC", 20, 0.3),
+        ]
+
+    # Warm up each combination of (batch size, height, width) once before serving.
+    args.prompt = ["warm up"]
+    args.num_warmup_runs = 1
+    for batch_size, height, width, _, _, _, _, _, _, _, _ in configs:
+        args.batch_size = batch_size
+        args.height = height
+        args.width = width
+        print(f"\nWarm up batch_size={batch_size}, height={height}, width={width}")
+        prompt, negative_prompt = repeat_prompt(args)
+        run_pipelines(args, base, refiner, prompt, negative_prompt, is_warm_up=True)
+
+    # Run pipeline on a list of prompts.
+    args.num_warmup_runs = 0
+    for (
+        batch_size,
+        height,
+        width,
+        scheduler,
+        steps,
+        example_prompt,
+        seed,
+        guidance,
+        refiner_scheduler,
+        refiner_steps,
+        strength,
+    ) in configs:
+        args.prompt = [example_prompt]
+        args.batch_size = batch_size
+        args.height = height
+        args.width = width
+        args.scheduler = scheduler
+        args.denoising_steps = steps
+        args.seed = seed
+        args.guidance = guidance
+        args.refiner_scheduler = refiner_scheduler
+        args.refiner_steps = refiner_steps
+        args.strength = strength
+        base.set_scheduler(scheduler)
+        if refiner:
+            refiner.set_scheduler(refiner_scheduler)
+        prompt, negative_prompt = repeat_prompt(args)
+        run_pipelines(args, base, refiner, prompt, negative_prompt, is_warm_up=False)
+
+    base.teardown()
+    if refiner:
+        refiner.teardown()
+
+
+if __name__ == "__main__":
+    coloredlogs.install(fmt="%(funcName)20s: %(message)s")
+
+    parser = arg_parser("Options for Stable Diffusion XL Demo")
+    add_controlnet_arguments(parser)
+    args = parse_arguments(is_xl=True, parser=parser)
+
+    no_prompt = isinstance(args.prompt, list) and len(args.prompt) == 1 and not args.prompt[0]
+    if no_prompt:
+        run_dynamic_shape_demo(args)
+    else:
+        run_demo(args)
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/demo_utils.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/demo_utils.py
new file mode 100644
index 0000000000000..f0c83fc507ae4
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/demo_utils.py
@@ -0,0 +1,668 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+# Modified from TensorRT demo diffusion, which has the following license:
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# --------------------------------------------------------------------------
+import argparse
+import os
+import sys
+from importlib.metadata import PackageNotFoundError, version
+from io import BytesIO
+from typing import Any, Dict, List
+
+import controlnet_aux
+import cv2
+import numpy as np
+import requests
+import torch
+from diffusers.utils import load_image
+from diffusion_models import PipelineInfo
+from engine_builder import EngineType, get_engine_paths
+from PIL import Image
+
+
+class RawTextArgumentDefaultsHelpFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawTextHelpFormatter):
+    pass
+
+
+def arg_parser(description: str):
+    return argparse.ArgumentParser(description=description, formatter_class=RawTextArgumentDefaultsHelpFormatter)
+
+
+def parse_arguments(is_xl: bool, parser):
+    engines = ["ORT_CUDA", "ORT_TRT", "TRT"]
+
+    parser.add_argument(
+        "--engine",
+        type=str,
+        default=engines[0],
+        choices=engines,
+        help="Backend engine in {engines}. "
+        "ORT_CUDA is CUDA execution provider; ORT_TRT is Tensorrt execution provider; TRT is TensorRT",
+    )
+
+    supported_versions = PipelineInfo.supported_versions(is_xl)
+    parser.add_argument(
+        "--version",
+        type=str,
+        default=supported_versions[-1] if is_xl else "1.5",
+        choices=supported_versions,
+        help="Version of Stable Diffusion" + (" XL." if is_xl else "."),
+    )
+
+    parser.add_argument(
+        "--height",
+        type=int,
+        default=1024 if is_xl else 512,
+        help="Height of image to generate (must be multiple of 8).",
+    )
+    parser.add_argument(
+        "--width", type=int, default=1024 if is_xl else 512, help="Height of image to generate (must be multiple of 8)."
+    )
+
+    parser.add_argument(
+        "--scheduler",
+        type=str,
+        default="DDIM",
+        choices=["DDIM", "UniPC", "LCM"] if is_xl else ["DDIM", "EulerA", "UniPC", "LCM"],
+        help="Scheduler for diffusion process" + " of base" if is_xl else "",
+    )
+
+    parser.add_argument(
+        "--work-dir",
+        default=".",
+        help="Root Directory to store torch or ONNX models, built engines and output images etc.",
+    )
+
+    parser.add_argument("prompt", nargs="*", default=[""], help="Text prompt(s) to guide image generation.")
+
+    parser.add_argument(
+        "--negative-prompt", nargs="*", default=[""], help="Optional negative prompt(s) to guide the image generation."
+    )
+    parser.add_argument(
+        "--batch-size",
+        type=int,
+        default=1,
+        choices=[1, 2, 4, 8, 16],
+        help="Number of times to repeat the prompt (batch size multiplier).",
+    )
+
+    parser.add_argument(
+        "--denoising-steps",
+        type=int,
+        default=30 if is_xl else 50,
+        help="Number of denoising steps" + (" in base." if is_xl else "."),
+    )
+
+    parser.add_argument(
+        "--guidance",
+        type=float,
+        default=5.0 if is_xl else 7.5,
+        help="Higher guidance scale encourages to generate images that are closely linked to the text prompt.",
+    )
+
+    parser.add_argument(
+        "--lora-scale", type=float, default=1, help="Scale of LoRA weights, default 1 (must between 0 and 1)"
+    )
+    parser.add_argument("--lora-weights", type=str, default="", help="LoRA weights to apply in the base model")
+
+    if is_xl:
+        parser.add_argument(
+            "--lcm",
+            action="store_true",
+            help="Use fine-tuned latent consistency model to replace the UNet in base.",
+        )
+
+        parser.add_argument(
+            "--refiner-scheduler",
+            type=str,
+            default="DDIM",
+            choices=["DDIM", "UniPC"],
+            help="Scheduler for diffusion process of refiner.",
+        )
+
+        parser.add_argument(
+            "--refiner-guidance",
+            type=float,
+            default=5.0,
+            help="Guidance scale used in refiner.",
+        )
+
+        parser.add_argument(
+            "--refiner-steps",
+            type=int,
+            default=30,
+            help="Number of denoising steps in refiner. Note that actual refiner steps is refiner_steps * strength.",
+        )
+
+        parser.add_argument(
+            "--strength",
+            type=float,
+            default=0.3,
+            help="A value between 0 and 1. The higher the value less the final image similar to the seed image.",
+        )
+
+        parser.add_argument(
+            "--disable-refiner", action="store_true", help="Disable refiner and only run base for XL pipeline."
+        )
+
+    # ONNX export
+    parser.add_argument(
+        "--onnx-opset",
+        type=int,
+        default=None,
+        choices=range(14, 18),
+        help="Select ONNX opset version to target for exported models.",
+    )
+    parser.add_argument(
+        "--force-onnx-export", action="store_true", help="Force ONNX export of CLIP, UNET, and VAE models."
+    )
+    parser.add_argument(
+        "--force-onnx-optimize", action="store_true", help="Force ONNX optimizations for CLIP, UNET, and VAE models."
+    )
+
+    # Framework model ckpt
+    parser.add_argument(
+        "--framework-model-dir",
+        default="pytorch_model",
+        help="Directory for HF saved models. Default is pytorch_model.",
+    )
+    parser.add_argument("--hf-token", type=str, help="HuggingFace API access token for downloading model checkpoints.")
+
+    # Engine build options.
+    parser.add_argument("--force-engine-build", action="store_true", help="Force rebuilding the TensorRT engine.")
+    parser.add_argument(
+        "--build-dynamic-batch", action="store_true", help="Build TensorRT engines to support dynamic batch size."
+    )
+    parser.add_argument(
+        "--build-dynamic-shape", action="store_true", help="Build TensorRT engines to support dynamic image sizes."
+    )
+
+    # Inference related options
+    parser.add_argument(
+        "--num-warmup-runs", type=int, default=5, help="Number of warmup runs before benchmarking performance."
+    )
+    parser.add_argument("--nvtx-profile", action="store_true", help="Enable NVTX markers for performance profiling.")
+    parser.add_argument("--seed", type=int, default=None, help="Seed for random generator to get consistent results.")
+    parser.add_argument("--disable-cuda-graph", action="store_true", help="Disable cuda graph.")
+
+    group = parser.add_argument_group("Options for ORT_CUDA engine only")
+    group.add_argument("--enable-vae-slicing", action="store_true", help="True will feed only one image to VAE once.")
+
+    # TensorRT only options
+    group = parser.add_argument_group("Options for TensorRT (--engine=TRT) only")
+    group.add_argument("--onnx-refit-dir", help="ONNX models to load the weights from.")
+    group.add_argument(
+        "--build-enable-refit", action="store_true", help="Enable Refit option in TensorRT engines during build."
+    )
+    group.add_argument(
+        "--build-preview-features", action="store_true", help="Build TensorRT engines with preview features."
+    )
+    group.add_argument(
+        "--build-all-tactics", action="store_true", help="Build TensorRT engines using all tactic sources."
+    )
+
+    args = parser.parse_args()
+
+    if (
+        args.engine in ["ORT_CUDA", "ORT_TRT"]
+        and (args.force_onnx_export or args.force_onnx_optimize)
+        and not args.force_engine_build
+    ):
+        raise ValueError(
+            "For ORT_CUDA or ORT_TRT, --force_onnx_export and --force_onnx_optimize are not supported. "
+            "Please use --force_engine_build instead."
+        )
+
+    # Validate image dimensions
+    if args.height % 64 != 0 or args.width % 64 != 0:
+        raise ValueError(
+            f"Image height and width have to be divisible by 64 but specified as: {args.height} and {args.width}."
+        )
+
+    if (args.build_dynamic_batch or args.build_dynamic_shape) and not args.disable_cuda_graph:
+        print("[I] CUDA Graph is disabled since dynamic input shape is configured.")
+        args.disable_cuda_graph = True
+
+    if args.onnx_opset is None:
+        args.onnx_opset = 14 if args.engine == "ORT_CUDA" else 17
+
+    if is_xl:
+        if args.lcm and args.scheduler != "LCM":
+            print("[I] Use --scheduler=LCM for base since LCM is used.")
+            args.scheduler = "LCM"
+
+        assert args.strength > 0.0 and args.strength < 1.0
+
+        assert not (args.lcm and args.lora_weights), "it is not supported to use both lcm unet and Lora together"
+
+    if args.scheduler == "LCM":
+        if args.guidance > 1.0:
+            print("[I] Use --guidance=1.0 for base since LCM is used.")
+            args.guidance = 1.0
+        if args.denoising_steps > 16:
+            print("[I] Use --denoising_steps=8 (no more than 16) for base since LCM is used.")
+            args.denoising_steps = 8
+
+    print(args)
+
+    return args
+
+
+def max_batch(args):
+    do_classifier_free_guidance = args.guidance > 1.0
+    batch_multiplier = 2 if do_classifier_free_guidance else 1
+    max_batch_size = 32 // batch_multiplier
+    if args.engine != "ORT_CUDA" and (args.build_dynamic_shape or args.height > 512 or args.width > 512):
+        max_batch_size = 8 // batch_multiplier
+    return max_batch_size
+
+
+def get_metadata(args, is_xl: bool = False) -> Dict[str, Any]:
+    metadata = {
+        "command": " ".join(['"' + x + '"' if " " in x else x for x in sys.argv]),
+        "args.prompt": args.prompt,
+        "args.negative_prompt": args.negative_prompt,
+        "args.batch_size": args.batch_size,
+        "height": args.height,
+        "width": args.width,
+        "cuda_graph": not args.disable_cuda_graph,
+        "vae_slicing": args.enable_vae_slicing,
+        "engine": args.engine,
+    }
+
+    if args.lora_weights:
+        metadata["lora_weights"] = args.lora_weights
+        metadata["lora_scale"] = args.lora_scale
+
+    if args.controlnet_type:
+        metadata["controlnet_type"] = args.controlnet_type
+        metadata["controlnet_scale"] = args.controlnet_scale
+
+    if is_xl and not args.disable_refiner:
+        metadata["base.scheduler"] = args.scheduler
+        metadata["base.denoising_steps"] = args.denoising_steps
+        metadata["base.guidance"] = args.guidance
+        metadata["refiner.strength"] = args.strength
+        metadata["refiner.scheduler"] = args.refiner_scheduler
+        metadata["refiner.denoising_steps"] = args.refiner_steps
+        metadata["refiner.guidance"] = args.refiner_guidance
+    else:
+        metadata["scheduler"] = args.scheduler
+        metadata["denoising_steps"] = args.denoising_steps
+        metadata["guidance"] = args.guidance
+
+    # Version of installed python packages
+    packages = ""
+    for name in [
+        "onnxruntime-gpu",
+        "torch",
+        "tensorrt",
+        "transformers",
+        "diffusers",
+        "onnx",
+        "onnx-graphsurgeon",
+        "polygraphy",
+        "controlnet_aux",
+    ]:
+        try:
+            packages += (" " if packages else "") + f"{name}=={version(name)}"
+        except PackageNotFoundError:
+            continue
+    metadata["packages"] = packages
+    metadata["device"] = torch.cuda.get_device_name()
+    metadata["torch.version.cuda"] = torch.version.cuda
+
+    return metadata
+
+
+def repeat_prompt(args):
+    if not isinstance(args.prompt, list):
+        raise ValueError(f"`prompt` must be of type `str` or `str` list, but is {type(args.prompt)}")
+    prompt = args.prompt * args.batch_size
+
+    if not isinstance(args.negative_prompt, list):
+        raise ValueError(
+            f"`--negative-prompt` must be of type `str` or `str` list, but is {type(args.negative_prompt)}"
+        )
+
+    if len(args.negative_prompt) == 1:
+        negative_prompt = args.negative_prompt * len(prompt)
+    else:
+        negative_prompt = args.negative_prompt
+
+    return prompt, negative_prompt
+
+
+def init_pipeline(
+    pipeline_class, pipeline_info, engine_type, args, max_batch_size, opt_batch_size, opt_image_height, opt_image_width
+):
+    onnx_dir, engine_dir, output_dir, framework_model_dir, timing_cache = get_engine_paths(
+        work_dir=args.work_dir, pipeline_info=pipeline_info, engine_type=engine_type
+    )
+
+    # Initialize demo
+    pipeline = pipeline_class(
+        pipeline_info,
+        scheduler=args.refiner_scheduler if pipeline_info.is_xl_refiner() else args.scheduler,
+        output_dir=output_dir,
+        hf_token=args.hf_token,
+        verbose=False,
+        nvtx_profile=args.nvtx_profile,
+        max_batch_size=max_batch_size,
+        use_cuda_graph=not args.disable_cuda_graph,
+        framework_model_dir=framework_model_dir,
+        engine_type=engine_type,
+    )
+
+    if engine_type == EngineType.ORT_CUDA:
+        # Build CUDA EP engines and load pytorch modules
+        pipeline.backend.build_engines(
+            engine_dir=engine_dir,
+            framework_model_dir=framework_model_dir,
+            onnx_dir=onnx_dir,
+            tmp_dir=os.path.join(args.work_dir or ".", engine_type.name, pipeline_info.short_name(), "tmp"),
+            force_engine_rebuild=args.force_engine_build,
+            device_id=torch.cuda.current_device(),
+        )
+    elif engine_type == EngineType.ORT_TRT:
+        # Build TensorRT EP engines and load pytorch modules
+        pipeline.backend.build_engines(
+            engine_dir,
+            framework_model_dir,
+            onnx_dir,
+            args.onnx_opset,
+            opt_image_height=opt_image_height,
+            opt_image_width=opt_image_width,
+            opt_batch_size=opt_batch_size,
+            force_engine_rebuild=args.force_engine_build,
+            static_batch=not args.build_dynamic_batch,
+            static_image_shape=not args.build_dynamic_shape,
+            max_workspace_size=0,
+            device_id=torch.cuda.current_device(),
+            timing_cache=timing_cache,
+        )
+    elif engine_type == EngineType.TRT:
+        # Load TensorRT engines and pytorch modules
+        pipeline.backend.load_engines(
+            engine_dir,
+            framework_model_dir,
+            onnx_dir,
+            args.onnx_opset,
+            opt_batch_size=opt_batch_size,
+            opt_image_height=opt_image_height,
+            opt_image_width=opt_image_width,
+            force_export=args.force_onnx_export,
+            force_optimize=args.force_onnx_optimize,
+            force_build=args.force_engine_build,
+            static_batch=not args.build_dynamic_batch,
+            static_shape=not args.build_dynamic_shape,
+            enable_refit=args.build_enable_refit,
+            enable_preview=args.build_preview_features,
+            enable_all_tactics=args.build_all_tactics,
+            timing_cache=timing_cache,
+            onnx_refit_dir=args.onnx_refit_dir,
+        )
+
+    return pipeline
+
+
+def get_depth_image(image):
+    """
+    Create depth map for SDXL depth control net.
+    """
+    from transformers import DPTFeatureExtractor, DPTForDepthEstimation
+
+    depth_estimator = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas").to("cuda")
+    feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-hybrid-midas")
+
+    image = feature_extractor(images=image, return_tensors="pt").pixel_values.to("cuda")
+    with torch.no_grad(), torch.autocast("cuda"):
+        depth_map = depth_estimator(image).predicted_depth
+
+    depth_map = torch.nn.functional.interpolate(
+        depth_map.unsqueeze(1),
+        size=(1024, 1024),
+        mode="bicubic",
+        align_corners=False,
+    )
+    depth_min = torch.amin(depth_map, dim=[1, 2, 3], keepdim=True)
+    depth_max = torch.amax(depth_map, dim=[1, 2, 3], keepdim=True)
+    depth_map = (depth_map - depth_min) / (depth_max - depth_min)
+    image = torch.cat([depth_map] * 3, dim=1)
+
+    image = image.permute(0, 2, 3, 1).cpu().numpy()[0]
+    image = Image.fromarray((image * 255.0).clip(0, 255).astype(np.uint8))
+    return image
+
+
+def get_canny_image(image) -> Image.Image:
+    """
+    Create canny image for SDXL control net.
+    """
+    image = np.array(image)
+    image = cv2.Canny(image, 100, 200)
+    image = image[:, :, None]
+    image = np.concatenate([image, image, image], axis=2)
+    image = Image.fromarray(image)
+    return image
+
+
+def process_controlnet_images_xl(args) -> List[Image.Image]:
+    """
+    Process control image for SDXL control net.
+    """
+    image = None
+    if args.controlnet_image:
+        image = Image.open(args.controlnet_image[0])
+    else:
+        # If no image is provided, download an image for demo purpose.
+        if args.controlnet_type[0] == "canny":
+            image = load_image(
+                "https://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png"
+            )
+        elif args.controlnet_type[0] == "depth":
+            image = load_image(
+                "https://huggingface.co/lllyasviel/sd-controlnet-depth/resolve/main/images/stormtrooper.png"
+            )
+
+    controlnet_images = []
+    if args.controlnet_type[0] == "canny":
+        controlnet_images.append(get_canny_image(image))
+    elif args.controlnet_type[0] == "depth":
+        controlnet_images.append(get_depth_image(image))
+    else:
+        raise ValueError(f"The controlnet is not supported for SDXL: {args.controlnet_type}")
+
+    return controlnet_images
+
+
+def add_controlnet_arguments(parser, is_xl: bool = False):
+    """
+    Add control net related arguments.
+    """
+    group = parser.add_argument_group("Options for ControlNet (only supports SD 1.5 or XL).")
+
+    group.add_argument(
+        "--controlnet-image",
+        nargs="*",
+        type=str,
+        default=[],
+        help="Path to the input regular RGB image/images for controlnet",
+    )
+    group.add_argument(
+        "--controlnet-type",
+        nargs="*",
+        type=str,
+        default=[],
+        choices=list(PipelineInfo.supported_controlnet("xl-1.0" if is_xl else "1.5").keys()),
+        help="A list of controlnet type",
+    )
+    group.add_argument(
+        "--controlnet-scale",
+        nargs="*",
+        type=float,
+        default=[],
+        help="The outputs of the controlnet are multiplied by `controlnet_scale` before they are added to the residual in the original unet. Default is 0.35 for SDXL, or 1.0 for SD 1.5",
+    )
+
+
+def download_image(url) -> Image.Image:
+    response = requests.get(url)
+    return Image.open(BytesIO(response.content)).convert("RGB")
+
+
+def controlnet_demo_images(controlnet_list: List[str], height, width) -> List[Image.Image]:
+    """
+    Return demo images of control net v1.1 for Stable Diffusion 1.5.
+    """
+    control_images = []
+    shape = (height, width)
+    for controlnet in controlnet_list:
+        if controlnet == "canny":
+            canny_image = download_image(
+                "https://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png"
+            )
+            canny_image = controlnet_aux.CannyDetector()(canny_image)
+            control_images.append(canny_image.resize(shape))
+        elif controlnet == "normalbae":
+            normal_image = download_image(
+                "https://huggingface.co/lllyasviel/sd-controlnet-normal/resolve/main/images/toy.png"
+            )
+            normal_image = controlnet_aux.NormalBaeDetector.from_pretrained("lllyasviel/Annotators")(normal_image)
+            control_images.append(normal_image.resize(shape))
+        elif controlnet == "depth":
+            depth_image = download_image(
+                "https://huggingface.co/lllyasviel/sd-controlnet-depth/resolve/main/images/stormtrooper.png"
+            )
+            depth_image = controlnet_aux.LeresDetector.from_pretrained("lllyasviel/Annotators")(depth_image)
+            control_images.append(depth_image.resize(shape))
+        elif controlnet == "mlsd":
+            mlsd_image = download_image(
+                "https://huggingface.co/lllyasviel/sd-controlnet-mlsd/resolve/main/images/room.png"
+            )
+            mlsd_image = controlnet_aux.MLSDdetector.from_pretrained("lllyasviel/Annotators")(mlsd_image)
+            control_images.append(mlsd_image.resize(shape))
+        elif controlnet == "openpose":
+            openpose_image = download_image(
+                "https://huggingface.co/lllyasviel/sd-controlnet-openpose/resolve/main/images/pose.png"
+            )
+            openpose_image = controlnet_aux.OpenposeDetector.from_pretrained("lllyasviel/Annotators")(openpose_image)
+            control_images.append(openpose_image.resize(shape))
+        elif controlnet == "scribble":
+            scribble_image = download_image(
+                "https://huggingface.co/lllyasviel/sd-controlnet-scribble/resolve/main/images/bag.png"
+            )
+            scribble_image = controlnet_aux.HEDdetector.from_pretrained("lllyasviel/Annotators")(
+                scribble_image, scribble=True
+            )
+            control_images.append(scribble_image.resize(shape))
+        elif controlnet == "seg":
+            seg_image = download_image(
+                "https://huggingface.co/lllyasviel/sd-controlnet-seg/resolve/main/images/house.png"
+            )
+            seg_image = controlnet_aux.SamDetector.from_pretrained(
+                "ybelkada/segment-anything", subfolder="checkpoints"
+            )(seg_image)
+            control_images.append(seg_image.resize(shape))
+        else:
+            raise ValueError(f"There is no demo image of this controlnet: {controlnet}")
+    return control_images
+
+
+def process_controlnet_image(controlnet_type: str, image: Image.Image, height, width):
+    """
+    Process control images of control net v1.1 for Stable Diffusion 1.5.
+    """
+    control_image = None
+    shape = (height, width)
+    image = image.convert("RGB")
+    if controlnet_type == "canny":
+        canny_image = controlnet_aux.CannyDetector()(image)
+        control_image = canny_image.resize(shape)
+    elif controlnet_type == "normalbae":
+        normal_image = controlnet_aux.NormalBaeDetector.from_pretrained("lllyasviel/Annotators")(image)
+        control_image = normal_image.resize(shape)
+    elif controlnet_type == "depth":
+        depth_image = controlnet_aux.LeresDetector.from_pretrained("lllyasviel/Annotators")(image)
+        control_image = depth_image.resize(shape)
+    elif controlnet_type == "mlsd":
+        mlsd_image = controlnet_aux.MLSDdetector.from_pretrained("lllyasviel/Annotators")(image)
+        control_image = mlsd_image.resize(shape)
+    elif controlnet_type == "openpose":
+        openpose_image = controlnet_aux.OpenposeDetector.from_pretrained("lllyasviel/Annotators")(image)
+        control_image = openpose_image.resize(shape)
+    elif controlnet_type == "scribble":
+        scribble_image = controlnet_aux.HEDdetector.from_pretrained("lllyasviel/Annotators")(image, scribble=True)
+        control_image = scribble_image.resize(shape)
+    elif controlnet_type == "seg":
+        seg_image = controlnet_aux.SamDetector.from_pretrained("ybelkada/segment-anything", subfolder="checkpoints")(
+            image
+        )
+        control_image = seg_image.resize(shape)
+    else:
+        raise ValueError(f"There is no demo image of this controlnet_type: {controlnet_type}")
+    return control_image
+
+
+def process_controlnet_arguments(args):
+    """
+    Process control net arguments, and returns a list of control images and a tensor of control net scales.
+    """
+    assert isinstance(args.controlnet_type, list)
+    assert isinstance(args.controlnet_scale, list)
+    assert isinstance(args.controlnet_image, list)
+    if args.version not in ["1.5", "xl-1.0"]:
+        raise ValueError("This demo only supports ControlNet in Stable Diffusion 1.5 or XL.")
+
+    is_xl = args.version == "xl-1.0"
+    if is_xl and len(args.controlnet_type) > 1:
+        raise ValueError("This demo only support one ControlNet for Stable Diffusion XL.")
+
+    if len(args.controlnet_image) != 0 and len(args.controlnet_image) != len(args.controlnet_scale):
+        raise ValueError(
+            f"Numbers of ControlNets {len(args.controlnet_image)} should be equal to number of ControlNet scales {len(args.controlnet_scale)}."
+        )
+
+    if len(args.controlnet_type) == 0:
+        return None, None
+
+    if len(args.controlnet_scale) == 0:
+        args.controlnet_scale = [0.5 if is_xl else 1.0] * len(args.controlnet_type)
+    elif len(args.controlnet_type) != len(args.controlnet_scale):
+        raise ValueError(
+            f"Numbers of ControlNets {len(args.controlnet_type)} should be equal to number of ControlNet scales {len(args.controlnet_scale)}."
+        )
+
+    # Convert controlnet scales to tensor
+    controlnet_scale = torch.FloatTensor(args.controlnet_scale)
+
+    if is_xl:
+        images = process_controlnet_images_xl(args)
+    else:
+        images = []
+        if len(args.controlnet_image) > 0:
+            for i, image in enumerate(args.controlnet_image):
+                images.append(
+                    process_controlnet_image(args.controlnet_type[i], Image.open(image), args.height, args.width)
+                )
+        else:
+            images = controlnet_demo_images(args.controlnet_type, args.height, args.width)
+
+    return images, controlnet_scale
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/diffusion_models.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/diffusion_models.py
new file mode 100644
index 0000000000000..c09aff2f514c6
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/diffusion_models.py
@@ -0,0 +1,1278 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+# Modified from stable_diffusion_tensorrt_txt2img.py in diffusers and TensorRT demo diffusion,
+# which has the following license:
+#
+# Copyright 2023 The HuggingFace Inc. team.
+# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+import tempfile
+from typing import Dict, List, Optional
+
+import onnx
+import onnx_graphsurgeon as gs
+import torch
+from diffusers.models import AutoencoderKL, ControlNetModel, UNet2DConditionModel
+from onnx import GraphProto, ModelProto, shape_inference
+from ort_optimizer import OrtStableDiffusionOptimizer
+from polygraphy.backend.onnx.loader import fold_constants
+from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer
+
+from onnxruntime.transformers.onnx_model import OnnxModel
+
+logger = logging.getLogger(__name__)
+
+
+class TrtOptimizer:
+    def __init__(self, onnx_graph):
+        self.graph = gs.import_onnx(onnx_graph)
+
+    def cleanup(self):
+        self.graph.cleanup().toposort()
+
+    def get_optimized_onnx_graph(self):
+        return gs.export_onnx(self.graph)
+
+    def select_outputs(self, keep, names=None):
+        self.graph.outputs = [self.graph.outputs[o] for o in keep]
+        if names:
+            for i, name in enumerate(names):
+                self.graph.outputs[i].name = name
+
+    def fold_constants(self):
+        onnx_graph = fold_constants(gs.export_onnx(self.graph), allow_onnxruntime_shape_inference=True)
+        self.graph = gs.import_onnx(onnx_graph)
+
+    def infer_shapes(self):
+        onnx_graph = gs.export_onnx(self.graph)
+        if onnx_graph.ByteSize() >= onnx.checker.MAXIMUM_PROTOBUF:
+            with tempfile.TemporaryDirectory() as temp_dir:
+                input_onnx_path = os.path.join(temp_dir, "model.onnx")
+                onnx.save_model(
+                    onnx_graph,
+                    input_onnx_path,
+                    save_as_external_data=True,
+                    all_tensors_to_one_file=True,
+                    convert_attribute=False,
+                )
+                output_onnx_path = os.path.join(temp_dir, "model_with_shape.onnx")
+                onnx.shape_inference.infer_shapes_path(input_onnx_path, output_onnx_path)
+                onnx_graph = onnx.load(output_onnx_path)
+        else:
+            onnx_graph = shape_inference.infer_shapes(onnx_graph)
+
+        self.graph = gs.import_onnx(onnx_graph)
+
+
+class PipelineInfo:
+    def __init__(
+        self,
+        version: str,
+        is_inpaint: bool = False,
+        is_refiner: bool = False,
+        use_vae=False,
+        min_image_size=256,
+        max_image_size=1024,
+        use_fp16_vae=True,
+        use_lcm=False,
+        do_classifier_free_guidance=True,
+        controlnet=None,
+        lora_weights=None,
+        lora_scale=1.0,
+    ):
+        self.version = version
+        self._is_inpaint = is_inpaint
+        self._is_refiner = is_refiner
+        self._use_vae = use_vae
+        self._min_image_size = min_image_size
+        self._max_image_size = max_image_size
+        self._use_fp16_vae = use_fp16_vae
+        self._use_lcm = use_lcm
+        self.do_classifier_free_guidance = do_classifier_free_guidance and not use_lcm
+        self.controlnet = controlnet  # A list of control net type
+        self.lora_weights = lora_weights
+        self.lora_scale = lora_scale
+
+        if is_refiner:
+            assert not use_lcm
+            assert self.is_xl()
+
+    def is_inpaint(self) -> bool:
+        return self._is_inpaint
+
+    def is_xl(self) -> bool:
+        return "xl" in self.version
+
+    def is_xl_base(self) -> bool:
+        return self.is_xl() and not self._is_refiner
+
+    def is_xl_refiner(self) -> bool:
+        return self.is_xl() and self._is_refiner
+
+    def use_safetensors(self) -> bool:
+        return self.is_xl()
+
+    def stages(self) -> List[str]:
+        if self.is_xl_base():
+            return ["clip", "clip2", "unetxl"] + (["vae"] if self._use_vae else [])
+
+        if self.is_xl_refiner():
+            return ["clip2", "unetxl", "vae"]
+
+        return ["clip", "unet", "vae"]
+
+    def vae_scaling_factor(self) -> float:
+        return 0.13025 if self.is_xl() else 0.18215
+
+    def vae_torch_fallback(self) -> bool:
+        return self.is_xl() and not self._use_fp16_vae
+
+    def custom_fp16_vae(self) -> Optional[str]:
+        # For SD XL, use a VAE that fine-tuned to run in fp16 precision without generating NaNs
+        return "madebyollin/sdxl-vae-fp16-fix" if self._use_fp16_vae and self.is_xl() else None
+
+    def custom_unet(self) -> Optional[str]:
+        return "latent-consistency/lcm-sdxl" if self._use_lcm and self.is_xl_base() else None
+
+    @staticmethod
+    def supported_versions(is_xl: bool):
+        return ["xl-1.0"] if is_xl else ["1.4", "1.5", "2.0-base", "2.0", "2.1", "2.1-base"]
+
+    def name(self) -> str:
+        if self.version == "1.4":
+            if self.is_inpaint():
+                return "runwayml/stable-diffusion-inpainting"
+            else:
+                return "CompVis/stable-diffusion-v1-4"
+        elif self.version == "1.5":
+            if self.is_inpaint():
+                return "runwayml/stable-diffusion-inpainting"
+            else:
+                return "runwayml/stable-diffusion-v1-5"
+        elif self.version == "2.0-base":
+            if self.is_inpaint():
+                return "stabilityai/stable-diffusion-2-inpainting"
+            else:
+                return "stabilityai/stable-diffusion-2-base"
+        elif self.version == "2.0":
+            if self.is_inpaint():
+                return "stabilityai/stable-diffusion-2-inpainting"
+            else:
+                return "stabilityai/stable-diffusion-2"
+        elif self.version == "2.1":
+            return "stabilityai/stable-diffusion-2-1"
+        elif self.version == "2.1-base":
+            return "stabilityai/stable-diffusion-2-1-base"
+        elif self.version == "xl-1.0":
+            if self.is_xl_refiner():
+                return "stabilityai/stable-diffusion-xl-refiner-1.0"
+            else:
+                return "stabilityai/stable-diffusion-xl-base-1.0"
+
+        raise ValueError(f"Incorrect version {self.version}")
+
+    def short_name(self) -> str:
+        return self.name().split("/")[-1].replace("stable-diffusion", "sd")
+
+    def clip_embedding_dim(self):
+        # TODO: can we read from config instead
+        if self.version in ("1.4", "1.5"):
+            return 768
+        elif self.version in ("2.0", "2.0-base", "2.1", "2.1-base"):
+            return 1024
+        elif self.version in ("xl-1.0") and self.is_xl_base():
+            return 768
+        else:
+            raise ValueError(f"Invalid version {self.version}")
+
+    def clipwithproj_embedding_dim(self):
+        if self.version in ("xl-1.0"):
+            return 1280
+        else:
+            raise ValueError(f"Invalid version {self.version}")
+
+    def unet_embedding_dim(self):
+        if self.version in ("1.4", "1.5"):
+            return 768
+        elif self.version in ("2.0", "2.0-base", "2.1", "2.1-base"):
+            return 1024
+        elif self.version in ("xl-1.0") and self.is_xl_base():
+            return 2048
+        elif self.version in ("xl-1.0") and self.is_xl_refiner():
+            return 1280
+        else:
+            raise ValueError(f"Invalid version {self.version}")
+
+    def min_image_size(self):
+        return self._min_image_size
+
+    def max_image_size(self):
+        return self._max_image_size
+
+    def default_image_size(self):
+        if self.is_xl():
+            return 1024
+        if self.version in ("2.0", "2.1"):
+            return 768
+        return 512
+
+    @staticmethod
+    def supported_controlnet(version="1.5"):
+        if version == "xl-1.0":
+            return {
+                "canny": "diffusers/controlnet-canny-sdxl-1.0",
+                "depth": "diffusers/controlnet-depth-sdxl-1.0",
+            }
+        elif version == "1.5":
+            return {
+                "canny": "lllyasviel/control_v11p_sd15_canny",
+                "depth": "lllyasviel/control_v11f1p_sd15_depth",
+                "openpose": "lllyasviel/control_v11p_sd15_openpose",
+                # "tile": "lllyasviel/control_v11f1e_sd15_tile",
+                # "lineart": "lllyasviel/control_v11p_sd15_lineart",
+                # "inpaint": "lllyasviel/control_v11p_sd15_inpaint",
+                # "softedge": "lllyasviel/control_v11p_sd15_softedge",
+                "mlsd": "lllyasviel/control_v11p_sd15_mlsd",
+                "scribble": "lllyasviel/control_v11p_sd15_scribble",
+                # "ip2p": "lllyasviel/control_v11e_sd15_ip2p",
+                "normalbae": "lllyasviel/control_v11p_sd15_normalbae",
+                "seg": "lllyasviel/control_v11p_sd15_seg",
+                # "shuffle": "lllyasviel/control_v11e_sd15_shuffle",
+                # "lineart_anime": "lllyasviel/control_v11p_sd15s2_lineart_anime",
+            }
+        return None
+
+    def controlnet_name(self):
+        """Return a list of controlnet name"""
+        if not self.controlnet:
+            return None
+        controlnet_map = PipelineInfo.supported_controlnet(self.version)
+        if controlnet_map is None:
+            return None
+        return [controlnet_map[controlnet] for controlnet in self.controlnet]
+
+
+class BaseModel:
+    def __init__(
+        self,
+        pipeline_info: PipelineInfo,
+        model,
+        device,
+        fp16: bool = False,
+        max_batch_size: int = 16,
+        embedding_dim: int = 768,
+        text_maxlen: int = 77,
+    ):
+        self.name = self.__class__.__name__
+
+        self.pipeline_info = pipeline_info
+
+        self.model = model
+        self.fp16 = fp16
+        self.device = device
+
+        self.min_batch = 1
+        self.max_batch = max_batch_size
+        self.min_image_shape = pipeline_info.min_image_size()
+        self.max_image_shape = pipeline_info.max_image_size()
+        self.min_latent_shape = self.min_image_shape // 8
+        self.max_latent_shape = self.max_image_shape // 8
+
+        self.embedding_dim = embedding_dim
+        self.text_maxlen = text_maxlen
+
+    def get_batch_multiplier(self):
+        return 2 if self.pipeline_info.do_classifier_free_guidance else 1
+
+    def get_ort_optimizer(self):
+        model_name_to_model_type = {
+            "CLIP": "clip",
+            "UNet": "unet",
+            "VAE": "vae",
+            "UNetXL": "unet",
+            "CLIPWithProj": "clip",
+        }
+        model_type = model_name_to_model_type[self.name]
+        return OrtStableDiffusionOptimizer(model_type)
+
+    def get_model(self):
+        return self.model
+
+    def from_pretrained(self, model_class, framework_model_dir, hf_token, subfolder, **kwargs):
+        model_dir = os.path.join(framework_model_dir, self.pipeline_info.name(), subfolder)
+
+        if not os.path.exists(model_dir):
+            model = model_class.from_pretrained(
+                self.pipeline_info.name(),
+                subfolder=subfolder,
+                use_safetensors=self.pipeline_info.use_safetensors(),
+                use_auth_token=hf_token,
+                **kwargs,
+            ).to(self.device)
+            model.save_pretrained(model_dir)
+        else:
+            print(f"Load {self.name} pytorch model from: {model_dir}")
+
+            model = model_class.from_pretrained(model_dir).to(self.device)
+        return model
+
+    def load_model(self, framework_model_dir: str, hf_token: str, subfolder: str):
+        pass
+
+    def get_input_names(self) -> List[str]:
+        pass
+
+    def get_output_names(self) -> List[str]:
+        pass
+
+    def get_dynamic_axes(self) -> Dict[str, Dict[int, str]]:
+        pass
+
+    def get_sample_input(self, batch_size, image_height, image_width) -> tuple:
+        pass
+
+    def get_profile_id(self, batch_size, image_height, image_width, static_batch, static_image_shape):
+        """For TensorRT EP"""
+        (
+            min_batch,
+            max_batch,
+            min_image_height,
+            max_image_height,
+            min_image_width,
+            max_image_width,
+            _,
+            _,
+            _,
+            _,
+        ) = self.get_minmax_dims(batch_size, image_height, image_width, static_batch, static_image_shape)
+
+        if (self.name in ["UNet", "UNetXL"]) and (self.get_batch_multiplier() == 1):
+            profile_id = f"_b1_{batch_size}" if static_batch else f"_b1_{min_batch}_{max_batch}"
+        else:
+            profile_id = f"_b_{batch_size}" if static_batch else f"_b_{min_batch}_{max_batch}"
+
+        if self.name != "CLIP":
+            if static_image_shape:
+                profile_id += f"_h_{image_height}_w_{image_width}"
+            else:
+                profile_id += f"_h_{min_image_height}_{max_image_height}_w_{min_image_width}_{max_image_width}"
+
+        return profile_id
+
+    def get_input_profile(self, batch_size, image_height, image_width, static_batch, static_image_shape):
+        """For TensorRT"""
+        pass
+
+    def get_shape_dict(self, batch_size, image_height, image_width):
+        pass
+
+    def fp32_input_output_names(self) -> List[str]:
+        """For CUDA EP, we export ONNX model with FP32 first, then convert it to mixed precision model.
+        This is a list of input or output names that are kept as float32 during converting.
+        For the first version, we will use same data type as TensorRT.
+        """
+        return []
+
+    def optimize_ort(
+        self,
+        input_onnx_path,
+        optimized_onnx_path,
+        to_fp16=True,
+        fp32_op_list=None,
+        optimize_by_ort=True,
+        optimize_by_fusion=True,
+        tmp_dir=None,
+    ):
+        optimizer = self.get_ort_optimizer()
+        optimizer.optimize(
+            input_onnx_path,
+            optimized_onnx_path,
+            float16=to_fp16,
+            keep_io_types=self.fp32_input_output_names(),
+            fp32_op_list=fp32_op_list,
+            optimize_by_ort=optimize_by_ort,
+            optimize_by_fusion=optimize_by_fusion,
+            tmp_dir=tmp_dir,
+        )
+
+    def optimize_trt(self, input_onnx_path, optimized_onnx_path):
+        onnx_graph = onnx.load(input_onnx_path)
+        opt = TrtOptimizer(onnx_graph)
+        opt.cleanup()
+        opt.fold_constants()
+        opt.infer_shapes()
+        opt.cleanup()
+        onnx_opt_graph = opt.get_optimized_onnx_graph()
+
+        if onnx_opt_graph.ByteSize() > onnx.checker.MAXIMUM_PROTOBUF:
+            onnx.save_model(
+                onnx_opt_graph,
+                optimized_onnx_path,
+                save_as_external_data=True,
+                all_tensors_to_one_file=True,
+                convert_attribute=False,
+            )
+        else:
+            onnx.save(onnx_opt_graph, optimized_onnx_path)
+
+    def check_dims(self, batch_size, image_height, image_width):
+        assert batch_size >= self.min_batch and batch_size <= self.max_batch
+        assert image_height % 8 == 0 or image_width % 8 == 0
+        latent_height = image_height // 8
+        latent_width = image_width // 8
+        assert latent_height >= self.min_latent_shape and latent_height <= self.max_latent_shape
+        assert latent_width >= self.min_latent_shape and latent_width <= self.max_latent_shape
+        return (latent_height, latent_width)
+
+    def get_minmax_dims(self, batch_size, image_height, image_width, static_batch, static_image_shape):
+        min_batch = batch_size if static_batch else self.min_batch
+        max_batch = batch_size if static_batch else self.max_batch
+        latent_height = image_height // 8
+        latent_width = image_width // 8
+        min_image_height = image_height if static_image_shape else self.min_image_shape
+        max_image_height = image_height if static_image_shape else self.max_image_shape
+        min_image_width = image_width if static_image_shape else self.min_image_shape
+        max_image_width = image_width if static_image_shape else self.max_image_shape
+        min_latent_height = latent_height if static_image_shape else self.min_latent_shape
+        max_latent_height = latent_height if static_image_shape else self.max_latent_shape
+        min_latent_width = latent_width if static_image_shape else self.min_latent_shape
+        max_latent_width = latent_width if static_image_shape else self.max_latent_shape
+        return (
+            min_batch,
+            max_batch,
+            min_image_height,
+            max_image_height,
+            min_image_width,
+            max_image_width,
+            min_latent_height,
+            max_latent_height,
+            min_latent_width,
+            max_latent_width,
+        )
+
+
+class CLIP(BaseModel):
+    def __init__(
+        self,
+        pipeline_info: PipelineInfo,
+        model,
+        device,
+        max_batch_size,
+        embedding_dim: int = 0,
+        clip_skip=0,
+    ):
+        super().__init__(
+            pipeline_info,
+            model=model,
+            device=device,
+            max_batch_size=max_batch_size,
+            embedding_dim=embedding_dim if embedding_dim > 0 else pipeline_info.clip_embedding_dim(),
+        )
+        self.output_hidden_state = pipeline_info.is_xl()
+
+        # see https://github.com/huggingface/diffusers/pull/5057 for more information of clip_skip.
+        # Clip_skip=1 means that the output of the pre-final layer will be used for computing the prompt embeddings.
+        self.clip_skip = clip_skip
+
+    def get_input_names(self):
+        return ["input_ids"]
+
+    def get_output_names(self):
+        # The exported onnx model has no hidden_state. For SD-XL, We will add hidden_state to optimized onnx model.
+        return ["text_embeddings"]
+
+    def get_dynamic_axes(self):
+        return {"input_ids": {0: "B"}, "text_embeddings": {0: "B"}}
+
+    def get_input_profile(self, batch_size, image_height, image_width, static_batch, static_image_shape):
+        self.check_dims(batch_size, image_height, image_width)
+        min_batch, max_batch, _, _, _, _, _, _, _, _ = self.get_minmax_dims(
+            batch_size, image_height, image_width, static_batch, static_image_shape
+        )
+        return {
+            "input_ids": [(min_batch, self.text_maxlen), (batch_size, self.text_maxlen), (max_batch, self.text_maxlen)]
+        }
+
+    def get_shape_dict(self, batch_size, image_height, image_width):
+        self.check_dims(batch_size, image_height, image_width)
+        output = {
+            "input_ids": (batch_size, self.text_maxlen),
+            "text_embeddings": (batch_size, self.text_maxlen, self.embedding_dim),
+        }
+
+        if self.output_hidden_state:
+            output["hidden_states"] = (batch_size, self.text_maxlen, self.embedding_dim)
+
+        return output
+
+    def get_sample_input(self, batch_size, image_height, image_width):
+        self.check_dims(batch_size, image_height, image_width)
+        return (torch.zeros(batch_size, self.text_maxlen, dtype=torch.int32, device=self.device),)
+
+    def add_hidden_states_graph_output(self, model: ModelProto, optimized_onnx_path, use_external_data_format=False):
+        graph: GraphProto = model.graph
+        hidden_layers = -1
+        for i in range(len(graph.node)):
+            for j in range(len(graph.node[i].output)):
+                name = graph.node[i].output[j]
+                if "layers" in name:
+                    hidden_layers = max(int(name.split(".")[1].split("/")[0]), hidden_layers)
+
+        assert self.clip_skip >= 0 and self.clip_skip < hidden_layers
+
+        node_output_name = f"/text_model/encoder/layers.{hidden_layers - 1 - self.clip_skip}/Add_1_output_0"
+
+        # search the name in outputs of all node
+        found = False
+        for i in range(len(graph.node)):
+            for j in range(len(graph.node[i].output)):
+                if graph.node[i].output[j] == node_output_name:
+                    found = True
+                    break
+            if found:
+                break
+        if not found:
+            raise RuntimeError("Failed to find hidden_states graph output in clip")
+
+        # Insert a Cast  (fp32 -> fp16) node so that hidden_states has same data type as the first graph output.
+        graph_output_name = "hidden_states"
+        cast_node = onnx.helper.make_node("Cast", inputs=[node_output_name], outputs=[graph_output_name])
+        cast_node.attribute.extend([onnx.helper.make_attribute("to", graph.output[0].type.tensor_type.elem_type)])
+
+        hidden_state = graph.output.add()
+        hidden_state.CopyFrom(
+            onnx.helper.make_tensor_value_info(
+                graph_output_name,
+                graph.output[0].type.tensor_type.elem_type,
+                ["B", self.text_maxlen, self.embedding_dim],
+            )
+        )
+
+        onnx_model = OnnxModel(model)
+        onnx_model.add_node(cast_node)
+        onnx_model.save_model_to_file(optimized_onnx_path, use_external_data_format=use_external_data_format)
+
+    def optimize_ort(
+        self,
+        input_onnx_path,
+        optimized_onnx_path,
+        to_fp16=True,
+        fp32_op_list=None,
+        optimize_by_ort=True,
+        optimize_by_fusion=True,
+        tmp_dir=None,
+    ):
+        optimizer = self.get_ort_optimizer()
+
+        if not self.output_hidden_state:
+            optimizer.optimize(
+                input_onnx_path,
+                optimized_onnx_path,
+                float16=to_fp16,
+                keep_io_types=[],
+                fp32_op_list=fp32_op_list,
+                keep_outputs=["text_embeddings"],
+                optimize_by_ort=optimize_by_ort,
+                optimize_by_fusion=optimize_by_fusion,
+                tmp_dir=tmp_dir,
+            )
+        elif optimize_by_fusion:
+            with tempfile.TemporaryDirectory() as tmp_dir:
+                # Save to a temporary file so that we can load it with Onnx Runtime.
+                logger.info("Saving a temporary model to add hidden_states to graph output ...")
+                tmp_model_path = os.path.join(tmp_dir, "model.onnx")
+
+                model = onnx.load(input_onnx_path)
+                self.add_hidden_states_graph_output(model, tmp_model_path, use_external_data_format=True)
+                optimizer.optimize(
+                    tmp_model_path,
+                    optimized_onnx_path,
+                    float16=to_fp16,
+                    keep_io_types=[],
+                    fp32_op_list=fp32_op_list,
+                    keep_outputs=["text_embeddings", "hidden_states"],
+                    optimize_by_ort=optimize_by_ort,
+                    optimize_by_fusion=optimize_by_fusion,
+                    tmp_dir=tmp_dir,
+                )
+        else:  # input is optimized model, there is no need to add hidden states.
+            optimizer.optimize(
+                input_onnx_path,
+                optimized_onnx_path,
+                float16=to_fp16,
+                keep_io_types=[],
+                fp32_op_list=fp32_op_list,
+                keep_outputs=["text_embeddings", "hidden_states"],
+                optimize_by_ort=optimize_by_ort,
+                optimize_by_fusion=optimize_by_fusion,
+                tmp_dir=tmp_dir,
+            )
+
+    def optimize_trt(self, input_onnx_path, optimized_onnx_path):
+        onnx_graph = onnx.load(input_onnx_path)
+        opt = TrtOptimizer(onnx_graph)
+        opt.select_outputs([0])  # delete graph output#1
+        opt.cleanup()
+        opt.fold_constants()
+        opt.infer_shapes()
+        opt.select_outputs([0], names=["text_embeddings"])  # rename network output
+        opt.cleanup()
+        onnx_opt_graph = opt.get_optimized_onnx_graph()
+        if self.output_hidden_state:
+            self.add_hidden_states_graph_output(onnx_opt_graph, optimized_onnx_path)
+        else:
+            onnx.save(onnx_opt_graph, optimized_onnx_path)
+
+    def load_model(self, framework_model_dir, hf_token, subfolder="text_encoder"):
+        return self.from_pretrained(CLIPTextModel, framework_model_dir, hf_token, subfolder)
+
+
+class CLIPWithProj(CLIP):
+    def __init__(
+        self,
+        pipeline_info: PipelineInfo,
+        model,
+        device,
+        max_batch_size=16,
+        clip_skip=0,
+    ):
+        super().__init__(
+            pipeline_info,
+            model,
+            device=device,
+            max_batch_size=max_batch_size,
+            embedding_dim=pipeline_info.clipwithproj_embedding_dim(),
+            clip_skip=clip_skip,
+        )
+
+    def load_model(self, framework_model_dir, hf_token, subfolder="text_encoder_2"):
+        return self.from_pretrained(CLIPTextModelWithProjection, framework_model_dir, hf_token, subfolder)
+
+    def get_shape_dict(self, batch_size, image_height, image_width):
+        self.check_dims(batch_size, image_height, image_width)
+        output = {
+            "input_ids": (batch_size, self.text_maxlen),
+            "text_embeddings": (batch_size, self.embedding_dim),
+        }
+
+        if self.output_hidden_state:
+            output["hidden_states"] = (batch_size, self.text_maxlen, self.embedding_dim)
+
+        return output
+
+
+class UNet2DConditionControlNetModel(torch.nn.Module):
+    def __init__(self, unet, controlnets: ControlNetModel):
+        super().__init__()
+        self.unet = unet
+        self.controlnets = controlnets
+
+    def forward(self, sample, timestep, encoder_hidden_states, controlnet_images, controlnet_scales):
+        for i, (controlnet_image, conditioning_scale, controlnet) in enumerate(
+            zip(controlnet_images, controlnet_scales, self.controlnets)
+        ):
+            down_samples, mid_sample = controlnet(
+                sample,
+                timestep,
+                encoder_hidden_states=encoder_hidden_states,
+                controlnet_cond=controlnet_image,
+                return_dict=False,
+            )
+
+            down_samples = [down_sample * conditioning_scale for down_sample in down_samples]
+            mid_sample *= conditioning_scale
+
+            # merge samples
+            if i == 0:
+                down_block_res_samples, mid_block_res_sample = down_samples, mid_sample
+            else:
+                down_block_res_samples = [
+                    samples_prev + samples_curr
+                    for samples_prev, samples_curr in zip(down_block_res_samples, down_samples)
+                ]
+                mid_block_res_sample += mid_sample
+
+        noise_pred = self.unet(
+            sample,
+            timestep,
+            encoder_hidden_states=encoder_hidden_states,
+            down_block_additional_residuals=down_block_res_samples,
+            mid_block_additional_residual=mid_block_res_sample,
+        )
+        return noise_pred[0]
+
+
+# Modified from convert_stable_diffusion_controlnet_to_onnx.py in diffusers
+class UNet2DConditionXLControlNetModel(torch.nn.Module):
+    def __init__(self, unet, controlnets: ControlNetModel):
+        super().__init__()
+        self.unet = unet
+        self.controlnets = controlnets
+
+    def forward(
+        self,
+        sample,
+        timestep,
+        encoder_hidden_states,
+        text_embeds,
+        time_ids,
+        controlnet_images,
+        controlnet_scales,
+    ):
+        added_cond_kwargs = {"text_embeds": text_embeds, "time_ids": time_ids}
+        for i, (controlnet_image, conditioning_scale, controlnet) in enumerate(
+            zip(controlnet_images, controlnet_scales, self.controlnets)
+        ):
+            down_samples, mid_sample = controlnet(
+                sample,
+                timestep,
+                encoder_hidden_states=encoder_hidden_states,
+                controlnet_cond=controlnet_image,
+                conditioning_scale=conditioning_scale,
+                added_cond_kwargs=added_cond_kwargs,
+                return_dict=False,
+            )
+
+            # merge samples
+            if i == 0:
+                down_block_res_samples, mid_block_res_sample = down_samples, mid_sample
+            else:
+                down_block_res_samples = [
+                    samples_prev + samples_curr
+                    for samples_prev, samples_curr in zip(down_block_res_samples, down_samples)
+                ]
+                mid_block_res_sample += mid_sample
+
+        noise_pred = self.unet(
+            sample,
+            timestep,
+            encoder_hidden_states=encoder_hidden_states,
+            down_block_additional_residuals=down_block_res_samples,
+            mid_block_additional_residual=mid_block_res_sample,
+            added_cond_kwargs=added_cond_kwargs,
+            return_dict=False,
+        )
+        return noise_pred[0]
+
+
+class UNet(BaseModel):
+    def __init__(
+        self,
+        pipeline_info: PipelineInfo,
+        model,
+        device,
+        fp16=False,  # used by TRT
+        max_batch_size=16,
+        text_maxlen=77,
+        unet_dim=4,
+    ):
+        super().__init__(
+            pipeline_info,
+            model=model,
+            device=device,
+            fp16=fp16,
+            max_batch_size=max_batch_size,
+            embedding_dim=pipeline_info.unet_embedding_dim(),
+            text_maxlen=text_maxlen,
+        )
+
+        self.unet_dim = unet_dim
+        self.controlnet = pipeline_info.controlnet_name()
+
+    def load_model(self, framework_model_dir, hf_token, subfolder="unet"):
+        options = {"variant": "fp16", "torch_dtype": torch.float16} if self.fp16 else {}
+
+        model = self.from_pretrained(UNet2DConditionModel, framework_model_dir, hf_token, subfolder, **options)
+
+        if self.controlnet:
+            cnet_model_opts = {"torch_dtype": torch.float16} if self.fp16 else {}
+            controlnets = torch.nn.ModuleList(
+                [ControlNetModel.from_pretrained(name, **cnet_model_opts).to(self.device) for name in self.controlnet]
+            )
+            model = UNet2DConditionControlNetModel(model, controlnets)
+
+        return model
+
+    def get_input_names(self):
+        if not self.controlnet:
+            return ["sample", "timestep", "encoder_hidden_states"]
+        else:
+            return ["sample", "timestep", "encoder_hidden_states", "controlnet_images", "controlnet_scales"]
+
+    def get_output_names(self):
+        return ["latent"]
+
+    def get_dynamic_axes(self):
+        b = "2B" if self.get_batch_multiplier() == 2 else "B"
+        output = {
+            "sample": {0: b, 2: "H", 3: "W"},
+            "encoder_hidden_states": {0: b},
+            "latent": {0: b, 2: "H", 3: "W"},
+        }
+        if self.controlnet:
+            output.update(
+                {
+                    "controlnet_images": {1: b, 3: "8H", 4: "8W"},
+                }
+            )
+        return output
+
+    def get_input_profile(self, batch_size, image_height, image_width, static_batch, static_image_shape):
+        latent_height, latent_width = self.check_dims(batch_size, image_height, image_width)
+        (
+            min_batch,
+            max_batch,
+            min_image_height,
+            max_image_height,
+            min_image_width,
+            max_image_width,
+            min_latent_height,
+            max_latent_height,
+            min_latent_width,
+            max_latent_width,
+        ) = self.get_minmax_dims(batch_size, image_height, image_width, static_batch, static_image_shape)
+        m = self.get_batch_multiplier()
+        output = {
+            "sample": [
+                (m * min_batch, self.unet_dim, min_latent_height, min_latent_width),
+                (m * batch_size, self.unet_dim, latent_height, latent_width),
+                (m * max_batch, self.unet_dim, max_latent_height, max_latent_width),
+            ],
+            "encoder_hidden_states": [
+                (m * min_batch, self.text_maxlen, self.embedding_dim),
+                (m * batch_size, self.text_maxlen, self.embedding_dim),
+                (m * max_batch, self.text_maxlen, self.embedding_dim),
+            ],
+        }
+
+        if self.controlnet:
+            output.update(
+                {
+                    "controlnet_images": [
+                        (len(self.controlnet), m * min_batch, 3, min_image_height, min_image_width),
+                        (len(self.controlnet), m * batch_size, 3, image_height, image_width),
+                        (len(self.controlnet), m * max_batch, 3, max_image_height, max_image_width),
+                    ]
+                }
+            )
+        return output
+
+    def get_shape_dict(self, batch_size, image_height, image_width):
+        latent_height, latent_width = self.check_dims(batch_size, image_height, image_width)
+        m = self.get_batch_multiplier()
+        output = {
+            "sample": (m * batch_size, self.unet_dim, latent_height, latent_width),
+            "timestep": [1],
+            "encoder_hidden_states": (m * batch_size, self.text_maxlen, self.embedding_dim),
+            "latent": (m * batch_size, 4, latent_height, latent_width),
+        }
+
+        if self.controlnet:
+            output.update(
+                {
+                    "controlnet_images": (len(self.controlnet), m * batch_size, 3, image_height, image_width),
+                    "controlnet_scales": [len(self.controlnet)],
+                }
+            )
+        return output
+
+    def get_sample_input(self, batch_size, image_height, image_width):
+        latent_height, latent_width = self.check_dims(batch_size, image_height, image_width)
+        dtype = torch.float16 if self.fp16 else torch.float32
+        m = self.get_batch_multiplier()
+        output = (
+            torch.randn(
+                m * batch_size, self.unet_dim, latent_height, latent_width, dtype=torch.float32, device=self.device
+            ),
+            torch.tensor([1.0], dtype=torch.float32, device=self.device),
+            torch.randn(m * batch_size, self.text_maxlen, self.embedding_dim, dtype=dtype, device=self.device),
+        )
+
+        if self.controlnet:
+            output = (
+                *output,
+                torch.randn(
+                    len(self.controlnet), m * batch_size, 3, image_height, image_width, dtype=dtype, device=self.device
+                ),
+                torch.randn(len(self.controlnet), dtype=dtype, device=self.device),
+            )
+        return output
+
+    def fp32_input_output_names(self) -> List[str]:
+        return ["sample", "timestep"]
+
+
+class UNetXL(BaseModel):
+    def __init__(
+        self,
+        pipeline_info: PipelineInfo,
+        model,
+        device,
+        fp16=False,  # used by TRT
+        max_batch_size=16,
+        text_maxlen=77,
+        unet_dim=4,
+        time_dim=6,
+    ):
+        super().__init__(
+            pipeline_info,
+            model,
+            device=device,
+            fp16=fp16,
+            max_batch_size=max_batch_size,
+            embedding_dim=pipeline_info.unet_embedding_dim(),
+            text_maxlen=text_maxlen,
+        )
+        self.unet_dim = unet_dim
+        self.time_dim = time_dim
+
+        self.custom_unet = pipeline_info.custom_unet()
+        self.controlnet = pipeline_info.controlnet_name()
+
+    def load_model(self, framework_model_dir, hf_token, subfolder="unet"):
+        options = {"variant": "fp16", "torch_dtype": torch.float16} if self.fp16 else {}
+
+        if self.custom_unet:
+            model_dir = os.path.join(framework_model_dir, self.custom_unet, subfolder)
+            if not os.path.exists(model_dir):
+                unet = UNet2DConditionModel.from_pretrained(self.custom_unet, **options)
+                unet.save_pretrained(model_dir)
+            else:
+                unet = UNet2DConditionModel.from_pretrained(model_dir, **options)
+            model = unet.to(self.device)
+        else:
+            model = self.from_pretrained(UNet2DConditionModel, framework_model_dir, hf_token, subfolder, **options)
+
+        if self.controlnet:
+            cnet_model_opts = {"torch_dtype": torch.float16} if self.fp16 else {}
+            controlnets = torch.nn.ModuleList(
+                [ControlNetModel.from_pretrained(path, **cnet_model_opts).to(self.device) for path in self.controlnet]
+            )
+            model = UNet2DConditionXLControlNetModel(model, controlnets)
+
+        return model
+
+    def get_input_names(self):
+        input_names = ["sample", "timestep", "encoder_hidden_states", "text_embeds", "time_ids"]
+        if self.controlnet:
+            return [*input_names, "controlnet_images", "controlnet_scales"]
+        return input_names
+
+    def get_output_names(self):
+        return ["latent"]
+
+    def get_dynamic_axes(self):
+        b = "2B" if self.get_batch_multiplier() == 2 else "B"
+        output = {
+            "sample": {0: b, 2: "H", 3: "W"},
+            "encoder_hidden_states": {0: b},
+            "text_embeds": {0: b},
+            "time_ids": {0: b},
+            "latent": {0: b, 2: "H", 3: "W"},
+        }
+
+        if self.controlnet:
+            output.update(
+                {
+                    "controlnet_images": {1: b, 3: "8H", 4: "8W"},
+                }
+            )
+        return output
+
+    def get_input_profile(self, batch_size, image_height, image_width, static_batch, static_image_shape):
+        latent_height, latent_width = self.check_dims(batch_size, image_height, image_width)
+        (
+            min_batch,
+            max_batch,
+            min_image_height,
+            max_image_height,
+            min_image_width,
+            max_image_width,
+            min_latent_height,
+            max_latent_height,
+            min_latent_width,
+            max_latent_width,
+        ) = self.get_minmax_dims(batch_size, image_height, image_width, static_batch, static_image_shape)
+        m = self.get_batch_multiplier()
+        output = {
+            "sample": [
+                (m * min_batch, self.unet_dim, min_latent_height, min_latent_width),
+                (m * batch_size, self.unet_dim, latent_height, latent_width),
+                (m * max_batch, self.unet_dim, max_latent_height, max_latent_width),
+            ],
+            "encoder_hidden_states": [
+                (m * min_batch, self.text_maxlen, self.embedding_dim),
+                (m * batch_size, self.text_maxlen, self.embedding_dim),
+                (m * max_batch, self.text_maxlen, self.embedding_dim),
+            ],
+            "text_embeds": [(m * min_batch, 1280), (m * batch_size, 1280), (m * max_batch, 1280)],
+            "time_ids": [
+                (m * min_batch, self.time_dim),
+                (m * batch_size, self.time_dim),
+                (m * max_batch, self.time_dim),
+            ],
+        }
+
+        if self.controlnet:
+            output.update(
+                {
+                    "controlnet_images": [
+                        (len(self.controlnet), m * min_batch, 3, min_image_height, min_image_width),
+                        (len(self.controlnet), m * batch_size, 3, image_height, image_width),
+                        (len(self.controlnet), m * max_batch, 3, max_image_height, max_image_width),
+                    ],
+                }
+            )
+        return output
+
+    def get_shape_dict(self, batch_size, image_height, image_width):
+        latent_height, latent_width = self.check_dims(batch_size, image_height, image_width)
+        m = self.get_batch_multiplier()
+        output = {
+            "sample": (m * batch_size, self.unet_dim, latent_height, latent_width),
+            "timestep": (1,),
+            "encoder_hidden_states": (m * batch_size, self.text_maxlen, self.embedding_dim),
+            "text_embeds": (m * batch_size, 1280),
+            "time_ids": (m * batch_size, self.time_dim),
+            "latent": (m * batch_size, 4, latent_height, latent_width),
+        }
+
+        if self.controlnet:
+            output.update(
+                {
+                    "controlnet_images": (len(self.controlnet), m * batch_size, 3, image_height, image_width),
+                    "controlnet_scales": [len(self.controlnet)],
+                }
+            )
+        return output
+
+    def get_sample_input(self, batch_size, image_height, image_width):
+        latent_height, latent_width = self.check_dims(batch_size, image_height, image_width)
+        dtype = torch.float16 if self.fp16 else torch.float32
+        m = self.get_batch_multiplier()
+        if not self.controlnet:
+            return (
+                torch.randn(
+                    m * batch_size, self.unet_dim, latent_height, latent_width, dtype=torch.float32, device=self.device
+                ),
+                torch.tensor([1.0], dtype=torch.float32, device=self.device),
+                torch.randn(m * batch_size, self.text_maxlen, self.embedding_dim, dtype=dtype, device=self.device),
+                {
+                    "added_cond_kwargs": {
+                        "text_embeds": torch.randn(m * batch_size, 1280, dtype=dtype, device=self.device),
+                        "time_ids": torch.randn(m * batch_size, self.time_dim, dtype=dtype, device=self.device),
+                    }
+                },
+            )
+        else:
+            # sample, timestep, encoder_hidden_states, text_embeds, time_ids, controlnet_images, controlnet_scales,
+            return (
+                torch.randn(
+                    m * batch_size, self.unet_dim, latent_height, latent_width, dtype=torch.float32, device=self.device
+                ),
+                torch.tensor([1.0], dtype=torch.float32, device=self.device),
+                torch.randn(m * batch_size, self.text_maxlen, self.embedding_dim, dtype=dtype, device=self.device),
+                torch.randn(m * batch_size, 1280, dtype=dtype, device=self.device),
+                torch.randn(m * batch_size, self.time_dim, dtype=dtype, device=self.device),
+                torch.randn(
+                    len(self.controlnet), m * batch_size, 3, image_height, image_width, dtype=dtype, device=self.device
+                ),
+                torch.randn(len(self.controlnet), dtype=dtype, device=self.device),
+            )
+
+    def fp32_input_output_names(self) -> List[str]:
+        return ["sample", "timestep"]
+
+
+# VAE Decoder
+class VAE(BaseModel):
+    def __init__(
+        self,
+        pipeline_info: PipelineInfo,
+        model,
+        device,
+        max_batch_size,
+        fp16: bool = False,
+        custom_fp16_vae: Optional[str] = None,
+    ):
+        super().__init__(
+            pipeline_info,
+            model=model,
+            device=device,
+            fp16=fp16,
+            max_batch_size=max_batch_size,
+        )
+
+        # For SD XL, need custom trained fp16 model to speed up, and avoid overflow at the same time.
+        self.custom_fp16_vae = custom_fp16_vae
+
+    def load_model(self, framework_model_dir, hf_token: Optional[str] = None, subfolder: str = "vae_decoder"):
+        model_name = self.custom_fp16_vae or self.pipeline_info.name()
+
+        model_dir = os.path.join(framework_model_dir, model_name, subfolder)
+        if not os.path.exists(model_dir):
+            if self.custom_fp16_vae:
+                vae = AutoencoderKL.from_pretrained(self.custom_fp16_vae, torch_dtype=torch.float16).to(self.device)
+            else:
+                vae = AutoencoderKL.from_pretrained(
+                    self.pipeline_info.name(),
+                    subfolder="vae",
+                    use_safetensors=self.pipeline_info.use_safetensors(),
+                    use_auth_token=hf_token,
+                ).to(self.device)
+            vae.save_pretrained(model_dir)
+        else:
+            print(f"Load {self.name} pytorch model from: {model_dir}")
+            if self.custom_fp16_vae:
+                vae = AutoencoderKL.from_pretrained(model_dir, torch_dtype=torch.float16).to(self.device)
+            else:
+                vae = AutoencoderKL.from_pretrained(model_dir).to(self.device)
+
+        vae.forward = vae.decode
+        return vae
+
+    def get_input_names(self):
+        return ["latent"]
+
+    def get_output_names(self):
+        return ["images"]
+
+    def get_dynamic_axes(self):
+        return {"latent": {0: "B", 2: "H", 3: "W"}, "images": {0: "B", 2: "8H", 3: "8W"}}
+
+    def get_input_profile(self, batch_size, image_height, image_width, static_batch, static_image_shape):
+        latent_height, latent_width = self.check_dims(batch_size, image_height, image_width)
+        (
+            min_batch,
+            max_batch,
+            _,
+            _,
+            _,
+            _,
+            min_latent_height,
+            max_latent_height,
+            min_latent_width,
+            max_latent_width,
+        ) = self.get_minmax_dims(batch_size, image_height, image_width, static_batch, static_image_shape)
+        return {
+            "latent": [
+                (min_batch, 4, min_latent_height, min_latent_width),
+                (batch_size, 4, latent_height, latent_width),
+                (max_batch, 4, max_latent_height, max_latent_width),
+            ]
+        }
+
+    def get_shape_dict(self, batch_size, image_height, image_width):
+        latent_height, latent_width = self.check_dims(batch_size, image_height, image_width)
+        return {
+            "latent": (batch_size, 4, latent_height, latent_width),
+            "images": (batch_size, 3, image_height, image_width),
+        }
+
+    def get_sample_input(self, batch_size, image_height, image_width):
+        latent_height, latent_width = self.check_dims(batch_size, image_height, image_width)
+        return (torch.randn(batch_size, 4, latent_height, latent_width, dtype=torch.float32, device=self.device),)
+
+    def fp32_input_output_names(self) -> List[str]:
+        return [] if self.fp16 else ["latent", "images"]
+
+
+def get_tokenizer(pipeline_info: PipelineInfo, framework_model_dir, hf_token, subfolder="tokenizer"):
+    tokenizer_dir = os.path.join(framework_model_dir, pipeline_info.name(), subfolder)
+
+    if not os.path.exists(tokenizer_dir):
+        model = CLIPTokenizer.from_pretrained(
+            pipeline_info.name(),
+            subfolder=subfolder,
+            use_safetensors=pipeline_info.is_xl(),
+            use_auth_token=hf_token,
+        )
+        model.save_pretrained(tokenizer_dir)
+    else:
+        print(f"[I] Load tokenizer pytorch model from: {tokenizer_dir}")
+        model = CLIPTokenizer.from_pretrained(tokenizer_dir)
+    return model
+
+
+class TorchVAEEncoder(torch.nn.Module):
+    def __init__(self, vae_encoder):
+        super().__init__()
+        self.vae_encoder = vae_encoder
+
+    def forward(self, x):
+        return self.vae_encoder.encode(x).latent_dist.sample()
+
+
+class VAEEncoder(BaseModel):
+    def __init__(self, pipeline_info: PipelineInfo, model, device, max_batch_size):
+        super().__init__(
+            pipeline_info,
+            model=model,
+            device=device,
+            max_batch_size=max_batch_size,
+        )
+
+    def load_model(self, framework_model_dir, hf_token, subfolder="vae_encoder"):
+        vae = self.from_pretrained(AutoencoderKL, framework_model_dir, hf_token, subfolder)
+        return TorchVAEEncoder(vae)
+
+    def get_input_names(self):
+        return ["images"]
+
+    def get_output_names(self):
+        return ["latent"]
+
+    def get_dynamic_axes(self):
+        return {"images": {0: "B", 2: "8H", 3: "8W"}, "latent": {0: "B", 2: "H", 3: "W"}}
+
+    def get_input_profile(self, batch_size, image_height, image_width, static_batch, static_image_shape):
+        self.check_dims(batch_size, image_height, image_width)
+
+        (
+            min_batch,
+            max_batch,
+            min_image_height,
+            max_image_height,
+            min_image_width,
+            max_image_width,
+            _,
+            _,
+            _,
+            _,
+        ) = self.get_minmax_dims(batch_size, image_height, image_width, static_batch, static_image_shape)
+
+        return {
+            "images": [
+                (min_batch, 3, min_image_height, min_image_width),
+                (batch_size, 3, image_height, image_width),
+                (max_batch, 3, max_image_height, max_image_width),
+            ],
+        }
+
+    def get_shape_dict(self, batch_size, image_height, image_width):
+        latent_height, latent_width = self.check_dims(batch_size, image_height, image_width)
+        return {
+            "images": (batch_size, 3, image_height, image_width),
+            "latent": (batch_size, 4, latent_height, latent_width),
+        }
+
+    def get_sample_input(self, batch_size, image_height, image_width):
+        self.check_dims(batch_size, image_height, image_width)
+        return torch.randn(batch_size, 3, image_height, image_width, dtype=torch.float32, device=self.device)
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/diffusion_schedulers.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/diffusion_schedulers.py
new file mode 100644
index 0000000000000..6932c8056cf78
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/diffusion_schedulers.py
@@ -0,0 +1,946 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+# Modified from utilities.py of TensorRT demo diffusion, which has the following license:
+#
+# Copyright 2022 The HuggingFace Inc. team.
+# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# --------------------------------------------------------------------------
+
+from typing import List, Optional
+
+import numpy as np
+import torch
+
+
+class DDIMScheduler:
+    def __init__(
+        self,
+        device="cuda",
+        num_train_timesteps: int = 1000,
+        beta_start: float = 0.0001,
+        beta_end: float = 0.02,
+        clip_sample: bool = False,
+        set_alpha_to_one: bool = False,
+        steps_offset: int = 1,
+        prediction_type: str = "epsilon",
+    ):
+        # this schedule is very specific to the latent diffusion model.
+        betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
+
+        alphas = 1.0 - betas
+        self.alphas_cumprod = torch.cumprod(alphas, dim=0)
+        # standard deviation of the initial noise distribution
+        self.init_noise_sigma = 1.0
+
+        # At every step in ddim, we are looking into the previous alphas_cumprod
+        # For the final step, there is no previous alphas_cumprod because we are already at 0
+        # `set_alpha_to_one` decides whether we set this parameter simply to one or
+        # whether we use the final alpha of the "non-previous" one.
+        self.final_alpha_cumprod = torch.tensor(1.0) if set_alpha_to_one else self.alphas_cumprod[0]
+
+        # setable values
+        self.num_inference_steps = None
+        self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64))
+        self.steps_offset = steps_offset
+        self.num_train_timesteps = num_train_timesteps
+        self.clip_sample = clip_sample
+        self.prediction_type = prediction_type
+        self.device = device
+
+    def configure(self):
+        variance = np.zeros(self.num_inference_steps, dtype=np.float32)
+        for idx, timestep in enumerate(self.timesteps):
+            prev_timestep = timestep - self.num_train_timesteps // self.num_inference_steps
+            variance[idx] = self._get_variance(timestep, prev_timestep)
+        self.variance = torch.from_numpy(variance).to(self.device)
+
+        timesteps = self.timesteps.long().cpu()
+        self.filtered_alphas_cumprod = self.alphas_cumprod[timesteps].to(self.device)
+        self.final_alpha_cumprod = self.final_alpha_cumprod.to(self.device)
+
+    def scale_model_input(self, sample: torch.FloatTensor, idx, *args, **kwargs) -> torch.FloatTensor:
+        return sample
+
+    def _get_variance(self, timestep, prev_timestep):
+        alpha_prod_t = self.alphas_cumprod[timestep]
+        alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod
+        beta_prod_t = 1 - alpha_prod_t
+        beta_prod_t_prev = 1 - alpha_prod_t_prev
+
+        variance = (beta_prod_t_prev / beta_prod_t) * (1 - alpha_prod_t / alpha_prod_t_prev)
+
+        return variance
+
+    def set_timesteps(self, num_inference_steps: int):
+        self.num_inference_steps = num_inference_steps
+        step_ratio = self.num_train_timesteps // self.num_inference_steps
+        # creates integer timesteps by multiplying by ratio
+        # casting to int to avoid issues when num_inference_step is power of 3
+        timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(np.int64)
+        self.timesteps = torch.from_numpy(timesteps).to(self.device)
+        self.timesteps += self.steps_offset
+
+    def step(
+        self,
+        model_output,
+        sample,
+        idx,
+        timestep,
+        eta: float = 0.0,
+        use_clipped_model_output: bool = False,
+        generator=None,
+        variance_noise: torch.FloatTensor = None,
+    ):
+        if self.num_inference_steps is None:
+            raise ValueError(
+                "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler"
+            )
+
+        # See formulas (12) and (16) of DDIM paper https://arxiv.org/pdf/2010.02502.pdf
+        # Ideally, read DDIM paper in-detail understanding
+
+        # Notation (<variable name> -> <name in paper>
+        # - pred_noise_t -> e_theta(x_t, t)
+        # - pred_original_sample -> f_theta(x_t, t) or x_0
+        # - std_dev_t -> sigma_t
+        # - eta -> η
+        # - pred_sample_direction -> "direction pointing to x_t"
+        # - pred_prev_sample -> "x_t-1"
+
+        prev_idx = idx + 1
+        alpha_prod_t = self.filtered_alphas_cumprod[idx]
+        alpha_prod_t_prev = (
+            self.filtered_alphas_cumprod[prev_idx] if prev_idx < self.num_inference_steps else self.final_alpha_cumprod
+        )
+
+        beta_prod_t = 1 - alpha_prod_t
+
+        # 3. compute predicted original sample from predicted noise also called
+        # "predicted x_0" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
+        if self.prediction_type == "epsilon":
+            pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
+        elif self.prediction_type == "sample":
+            pred_original_sample = model_output
+        elif self.prediction_type == "v_prediction":
+            pred_original_sample = (alpha_prod_t**0.5) * sample - (beta_prod_t**0.5) * model_output
+            # predict V
+            model_output = (alpha_prod_t**0.5) * model_output + (beta_prod_t**0.5) * sample
+        else:
+            raise ValueError(
+                f"prediction_type given as {self.prediction_type} must be one of `epsilon`, `sample`, or"
+                " `v_prediction`"
+            )
+
+        # 4. Clip "predicted x_0"
+        if self.clip_sample:
+            pred_original_sample = torch.clamp(pred_original_sample, -1, 1)
+
+        # 5. compute variance: "sigma_t(η)" -> see formula (16)
+        # o_t = sqrt((1 - a_t-1)/(1 - a_t)) * sqrt(1 - a_t/a_t-1)
+        variance = self.variance[idx]
+        std_dev_t = eta * variance ** (0.5)
+
+        if use_clipped_model_output:
+            # the model_output is always re-derived from the clipped x_0 in Glide
+            model_output = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5)
+
+        # 6. compute "direction pointing to x_t" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
+        pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * model_output
+
+        # 7. compute x_t without "random noise" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
+        prev_sample = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction
+
+        if eta > 0:
+            # randn_like does not support generator https://github.com/pytorch/pytorch/issues/27072
+            device = model_output.device
+            if variance_noise is not None and generator is not None:
+                raise ValueError(
+                    "Cannot pass both generator and variance_noise. Please make sure that either `generator` or"
+                    " `variance_noise` stays `None`."
+                )
+
+            if variance_noise is None:
+                variance_noise = torch.randn(
+                    model_output.shape, generator=generator, device=device, dtype=model_output.dtype
+                )
+            variance = std_dev_t * variance_noise
+
+            prev_sample = prev_sample + variance
+
+        return prev_sample
+
+    def add_noise(self, init_latents, noise, idx, latent_timestep):
+        sqrt_alpha_prod = self.filtered_alphas_cumprod[idx] ** 0.5
+        sqrt_one_minus_alpha_prod = (1 - self.filtered_alphas_cumprod[idx]) ** 0.5
+        noisy_latents = sqrt_alpha_prod * init_latents + sqrt_one_minus_alpha_prod * noise
+
+        return noisy_latents
+
+
+class EulerAncestralDiscreteScheduler:
+    def __init__(
+        self,
+        num_train_timesteps: int = 1000,
+        beta_start: float = 0.0001,
+        beta_end: float = 0.02,
+        device="cuda",
+        steps_offset=0,
+        prediction_type="epsilon",
+    ):
+        # this schedule is very specific to the latent diffusion model.
+        betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
+
+        alphas = 1.0 - betas
+        self.alphas_cumprod = torch.cumprod(alphas, dim=0)
+
+        sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
+        sigmas = np.concatenate([sigmas[::-1], [0.0]]).astype(np.float32)
+        self.sigmas = torch.from_numpy(sigmas)
+
+        # standard deviation of the initial noise distribution
+        self.init_noise_sigma = self.sigmas.max()
+
+        # setable values
+        self.num_inference_steps = None
+        timesteps = np.linspace(0, num_train_timesteps - 1, num_train_timesteps, dtype=float)[::-1].copy()
+        self.timesteps = torch.from_numpy(timesteps)
+        self.is_scale_input_called = False
+        self.device = device
+        self.num_train_timesteps = num_train_timesteps
+        self.steps_offset = steps_offset
+        self.prediction_type = prediction_type
+
+    def scale_model_input(self, sample: torch.FloatTensor, idx, timestep, *args, **kwargs) -> torch.FloatTensor:
+        if isinstance(timestep, torch.Tensor):
+            timestep = timestep.to(self.timesteps.device)
+        step_index = (self.timesteps == timestep).nonzero().item()
+        sigma = self.sigmas[step_index]
+        sample = sample / ((sigma**2 + 1) ** 0.5)
+        self.is_scale_input_called = True
+        return sample
+
+    def set_timesteps(self, num_inference_steps: int):
+        self.num_inference_steps = num_inference_steps
+
+        timesteps = np.linspace(0, self.num_train_timesteps - 1, num_inference_steps, dtype=np.float32)[::-1].copy()
+        sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
+        sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
+        sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32)
+        self.sigmas = torch.from_numpy(sigmas).to(device=self.device)
+        self.timesteps = torch.from_numpy(timesteps).to(device=self.device)
+
+    def configure(self):
+        dts = np.zeros(self.num_inference_steps, dtype=np.float32)
+        sigmas_up = np.zeros(self.num_inference_steps, dtype=np.float32)
+        for idx, timestep in enumerate(self.timesteps):
+            step_index = (self.timesteps == timestep).nonzero().item()
+            sigma = self.sigmas[step_index]
+
+            sigma_from = self.sigmas[step_index]
+            sigma_to = self.sigmas[step_index + 1]
+            sigma_up = (sigma_to**2 * (sigma_from**2 - sigma_to**2) / sigma_from**2) ** 0.5
+            sigma_down = (sigma_to**2 - sigma_up**2) ** 0.5
+            dt = sigma_down - sigma
+            dts[idx] = dt
+            sigmas_up[idx] = sigma_up
+
+        self.dts = torch.from_numpy(dts).to(self.device)
+        self.sigmas_up = torch.from_numpy(sigmas_up).to(self.device)
+
+    def step(
+        self,
+        model_output,
+        sample,
+        idx,
+        timestep,
+        generator=None,
+    ):
+        step_index = (self.timesteps == timestep).nonzero().item()
+        sigma = self.sigmas[step_index]
+
+        # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise
+        if self.prediction_type == "epsilon":
+            pred_original_sample = sample - sigma * model_output
+        elif self.prediction_type == "v_prediction":
+            # * c_out + input * c_skip
+            pred_original_sample = model_output * (-sigma / (sigma**2 + 1) ** 0.5) + (sample / (sigma**2 + 1))
+        else:
+            raise ValueError(
+                f"prediction_type given as {self.prediction_type} must be one of `epsilon`, or `v_prediction`"
+            )
+
+        sigma_up = self.sigmas_up[idx]
+
+        # 2. Convert to an ODE derivative
+        derivative = (sample - pred_original_sample) / sigma
+
+        dt = self.dts[idx]
+
+        prev_sample = sample + derivative * dt
+
+        device = model_output.device
+        noise = torch.randn(model_output.shape, dtype=model_output.dtype, device=device, generator=generator).to(device)
+
+        prev_sample = prev_sample + noise * sigma_up
+
+        return prev_sample
+
+    def add_noise(self, original_samples, noise, idx, timestep=None):
+        step_index = (self.timesteps == timestep).nonzero().item()
+        noisy_samples = original_samples + noise * self.sigmas[step_index]
+        return noisy_samples
+
+
+class UniPCMultistepScheduler:
+    def __init__(
+        self,
+        device="cuda",
+        num_train_timesteps: int = 1000,
+        beta_start: float = 0.00085,
+        beta_end: float = 0.012,
+        solver_order: int = 2,
+        prediction_type: str = "epsilon",
+        thresholding: bool = False,
+        dynamic_thresholding_ratio: float = 0.995,
+        sample_max_value: float = 1.0,
+        predict_x0: bool = True,
+        solver_type: str = "bh2",
+        lower_order_final: bool = True,
+        disable_corrector: Optional[List[int]] = None,
+    ):
+        self.device = device
+        self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
+
+        self.alphas = 1.0 - self.betas
+        self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
+        # Currently we only support VP-type noise schedule
+        self.alpha_t = torch.sqrt(self.alphas_cumprod)
+        self.sigma_t = torch.sqrt(1 - self.alphas_cumprod)
+        self.lambda_t = torch.log(self.alpha_t) - torch.log(self.sigma_t)
+
+        # standard deviation of the initial noise distribution
+        self.init_noise_sigma = 1.0
+
+        self.predict_x0 = predict_x0
+        # setable values
+        self.num_inference_steps = None
+        timesteps = np.linspace(0, num_train_timesteps - 1, num_train_timesteps, dtype=np.float32)[::-1].copy()
+        self.timesteps = torch.from_numpy(timesteps)
+        self.model_outputs = [None] * solver_order
+        self.timestep_list = [None] * solver_order
+        self.lower_order_nums = 0
+        self.disable_corrector = disable_corrector if disable_corrector else []
+        self.last_sample = None
+        self.num_train_timesteps = num_train_timesteps
+        self.solver_order = solver_order
+        self.prediction_type = prediction_type
+        self.thresholding = thresholding
+        self.dynamic_thresholding_ratio = dynamic_thresholding_ratio
+        self.sample_max_value = sample_max_value
+        self.solver_type = solver_type
+        self.lower_order_final = lower_order_final
+
+    def set_timesteps(self, num_inference_steps: int):
+        timesteps = (
+            np.linspace(0, self.num_train_timesteps - 1, num_inference_steps + 1)
+            .round()[::-1][:-1]
+            .copy()
+            .astype(np.int64)
+        )
+
+        # when num_inference_steps == num_train_timesteps, we can end up with
+        # duplicates in timesteps.
+        _, unique_indices = np.unique(timesteps, return_index=True)
+        timesteps = timesteps[np.sort(unique_indices)]
+
+        self.timesteps = torch.from_numpy(timesteps).to(self.device)
+
+        self.num_inference_steps = len(timesteps)
+
+        self.model_outputs = [
+            None,
+        ] * self.solver_order
+        self.lower_order_nums = 0
+        self.last_sample = None
+
+    # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
+    def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
+        dtype = sample.dtype
+        batch_size, channels, height, width = sample.shape
+
+        if dtype not in (torch.float32, torch.float64):
+            sample = sample.float()  # upcast for quantile calculation, and clamp not implemented for cpu half
+
+        # Flatten sample for doing quantile calculation along each image
+        sample = sample.reshape(batch_size, channels * height * width)
+
+        abs_sample = sample.abs()  # "a certain percentile absolute pixel value"
+
+        s = torch.quantile(abs_sample, self.dynamic_thresholding_ratio, dim=1)
+        s = torch.clamp(
+            s, min=1, max=self.sample_max_value
+        )  # When clamped to min=1, equivalent to standard clipping to [-1, 1]
+
+        s = s.unsqueeze(1)  # (batch_size, 1) because clamp will broadcast along dim=0
+        sample = torch.clamp(sample, -s, s) / s  # "we threshold xt0 to the range [-s, s] and then divide by s"
+
+        sample = sample.reshape(batch_size, channels, height, width)
+        sample = sample.to(dtype)
+
+        return sample
+
+    def convert_model_output(
+        self, model_output: torch.FloatTensor, timestep: int, sample: torch.FloatTensor
+    ) -> torch.FloatTensor:
+        if self.predict_x0:
+            if self.prediction_type == "epsilon":
+                alpha_t, sigma_t = self.alpha_t[timestep], self.sigma_t[timestep]
+                x0_pred = (sample - sigma_t * model_output) / alpha_t
+            elif self.prediction_type == "sample":
+                x0_pred = model_output
+            elif self.prediction_type == "v_prediction":
+                alpha_t, sigma_t = self.alpha_t[timestep], self.sigma_t[timestep]
+                x0_pred = alpha_t * sample - sigma_t * model_output
+            else:
+                raise ValueError(
+                    f"prediction_type given as {self.prediction_type} must be one of `epsilon`, `sample`, or"
+                    " `v_prediction` for the UniPCMultistepScheduler."
+                )
+
+            if self.thresholding:
+                x0_pred = self._threshold_sample(x0_pred)
+
+            return x0_pred
+        else:
+            if self.prediction_type == "epsilon":
+                return model_output
+            elif self.prediction_type == "sample":
+                alpha_t, sigma_t = self.alpha_t[timestep], self.sigma_t[timestep]
+                epsilon = (sample - alpha_t * model_output) / sigma_t
+                return epsilon
+            elif self.prediction_type == "v_prediction":
+                alpha_t, sigma_t = self.alpha_t[timestep], self.sigma_t[timestep]
+                epsilon = alpha_t * model_output + sigma_t * sample
+                return epsilon
+            else:
+                raise ValueError(
+                    f"prediction_type given as {self.prediction_type} must be one of `epsilon`, `sample`, or"
+                    " `v_prediction` for the UniPCMultistepScheduler."
+                )
+
+    def multistep_uni_p_bh_update(
+        self,
+        model_output: torch.FloatTensor,
+        prev_timestep: int,
+        sample: torch.FloatTensor,
+        order: int,
+    ) -> torch.FloatTensor:
+        timestep_list = self.timestep_list
+        model_output_list = self.model_outputs
+
+        s0, t = self.timestep_list[-1], prev_timestep
+        m0 = model_output_list[-1]
+        x = sample
+
+        lambda_t, lambda_s0 = self.lambda_t[t], self.lambda_t[s0]
+        alpha_t, alpha_s0 = self.alpha_t[t], self.alpha_t[s0]
+        sigma_t, sigma_s0 = self.sigma_t[t], self.sigma_t[s0]
+
+        h = lambda_t - lambda_s0
+
+        rks = []
+        d1s = []
+        for i in range(1, order):
+            si = timestep_list[-(i + 1)]
+            mi = model_output_list[-(i + 1)]
+            lambda_si = self.lambda_t[si]
+            rk = (lambda_si - lambda_s0) / h
+            rks.append(rk)
+            d1s.append((mi - m0) / rk)
+
+        rks.append(1.0)
+        rks = torch.tensor(rks, device=self.device)
+
+        r = []
+        b = []
+
+        hh = -h if self.predict_x0 else h
+        h_phi_1 = torch.expm1(hh)  # h\phi_1(h) = e^h - 1
+        h_phi_k = h_phi_1 / hh - 1
+
+        factorial_i = 1
+
+        if self.solver_type == "bh1":
+            b_h = hh
+        elif self.solver_type == "bh2":
+            b_h = torch.expm1(hh)
+        else:
+            raise NotImplementedError()
+
+        for i in range(1, order + 1):
+            r.append(torch.pow(rks, i - 1))
+            b.append(h_phi_k * factorial_i / b_h)
+            factorial_i *= i + 1
+            h_phi_k = h_phi_k / hh - 1 / factorial_i
+
+        r = torch.stack(r)
+        b = torch.tensor(b, device=self.device)
+
+        if len(d1s) > 0:
+            d1s = torch.stack(d1s, dim=1)  # (B, K)
+            # for order 2, we use a simplified version
+            if order == 2:
+                rhos_p = torch.tensor([0.5], dtype=x.dtype, device=self.device)
+            else:
+                rhos_p = torch.linalg.solve(r[:-1, :-1], b[:-1])
+        else:
+            d1s = None
+
+        if self.predict_x0:
+            x_t_ = sigma_t / sigma_s0 * x - alpha_t * h_phi_1 * m0
+            if d1s is not None:
+                pred_res = torch.einsum("k,bkchw->bchw", rhos_p, d1s)
+            else:
+                pred_res = 0
+            x_t = x_t_ - alpha_t * b_h * pred_res
+        else:
+            x_t_ = alpha_t / alpha_s0 * x - sigma_t * h_phi_1 * m0
+            if d1s is not None:
+                pred_res = torch.einsum("k,bkchw->bchw", rhos_p, d1s)
+            else:
+                pred_res = 0
+            x_t = x_t_ - sigma_t * b_h * pred_res
+
+        x_t = x_t.to(x.dtype)
+        return x_t
+
+    def multistep_uni_c_bh_update(
+        self,
+        this_model_output: torch.FloatTensor,
+        this_timestep: int,
+        last_sample: torch.FloatTensor,
+        # this_sample: torch.FloatTensor,
+        order: int,
+    ) -> torch.FloatTensor:
+        timestep_list = self.timestep_list
+        model_output_list = self.model_outputs
+
+        s0, t = timestep_list[-1], this_timestep
+        m0 = model_output_list[-1]
+        x = last_sample
+        # x_t = this_sample
+        model_t = this_model_output
+
+        lambda_t, lambda_s0 = self.lambda_t[t], self.lambda_t[s0]
+        alpha_t, alpha_s0 = self.alpha_t[t], self.alpha_t[s0]
+        sigma_t, sigma_s0 = self.sigma_t[t], self.sigma_t[s0]
+
+        h = lambda_t - lambda_s0
+
+        rks = []
+        d1s = []
+        for i in range(1, order):
+            si = timestep_list[-(i + 1)]
+            mi = model_output_list[-(i + 1)]
+            lambda_si = self.lambda_t[si]
+            rk = (lambda_si - lambda_s0) / h
+            rks.append(rk)
+            d1s.append((mi - m0) / rk)
+
+        rks.append(1.0)
+        rks = torch.tensor(rks, device=self.device)
+
+        r = []
+        b = []
+
+        hh = -h if self.predict_x0 else h
+        h_phi_1 = torch.expm1(hh)  # h\phi_1(h) = e^h - 1
+        h_phi_k = h_phi_1 / hh - 1
+
+        factorial_i = 1
+
+        if self.solver_type == "bh1":
+            b_h = hh
+        elif self.solver_type == "bh2":
+            b_h = torch.expm1(hh)
+        else:
+            raise NotImplementedError()
+
+        for i in range(1, order + 1):
+            r.append(torch.pow(rks, i - 1))
+            b.append(h_phi_k * factorial_i / b_h)
+            factorial_i *= i + 1
+            h_phi_k = h_phi_k / hh - 1 / factorial_i
+
+        r = torch.stack(r)
+        b = torch.tensor(b, device=self.device)
+
+        if len(d1s) > 0:
+            d1s = torch.stack(d1s, dim=1)
+        else:
+            d1s = None
+
+        # for order 1, we use a simplified version
+        if order == 1:
+            rhos_c = torch.tensor([0.5], dtype=x.dtype, device=self.device)
+        else:
+            rhos_c = torch.linalg.solve(r, b)
+
+        if self.predict_x0:
+            x_t_ = sigma_t / sigma_s0 * x - alpha_t * h_phi_1 * m0
+            if d1s is not None:
+                corr_res = torch.einsum("k,bkchw->bchw", rhos_c[:-1], d1s)
+            else:
+                corr_res = 0
+            d1_t = model_t - m0
+            x_t = x_t_ - alpha_t * b_h * (corr_res + rhos_c[-1] * d1_t)
+        else:
+            x_t_ = alpha_t / alpha_s0 * x - sigma_t * h_phi_1 * m0
+            if d1s is not None:
+                corr_res = torch.einsum("k,bkchw->bchw", rhos_c[:-1], d1s)
+            else:
+                corr_res = 0
+            d1_t = model_t - m0
+            x_t = x_t_ - sigma_t * b_h * (corr_res + rhos_c[-1] * d1_t)
+        x_t = x_t.to(x.dtype)
+        return x_t
+
+    def step(
+        self,
+        model_output: torch.FloatTensor,
+        timestep: int,
+        sample: torch.FloatTensor,
+        return_dict: bool = True,
+    ):
+        if self.num_inference_steps is None:
+            raise ValueError(
+                "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler"
+            )
+
+        if isinstance(timestep, torch.Tensor):
+            timestep = timestep.to(self.device)
+        step_index = (self.timesteps == timestep).nonzero()
+        if len(step_index) == 0:
+            step_index = len(self.timesteps) - 1
+        else:
+            step_index = step_index.item()
+
+        use_corrector = step_index > 0 and step_index - 1 not in self.disable_corrector and self.last_sample is not None
+
+        model_output_convert = self.convert_model_output(model_output, timestep, sample)
+        if use_corrector:
+            sample = self.multistep_uni_c_bh_update(
+                this_model_output=model_output_convert,
+                this_timestep=timestep,
+                last_sample=self.last_sample,
+                # this_sample=sample,
+                order=self.this_order,
+            )
+
+        # now prepare to run the predictor
+        prev_timestep = 0 if step_index == len(self.timesteps) - 1 else self.timesteps[step_index + 1]
+
+        for i in range(self.solver_order - 1):
+            self.model_outputs[i] = self.model_outputs[i + 1]
+            self.timestep_list[i] = self.timestep_list[i + 1]
+
+        self.model_outputs[-1] = model_output_convert
+        self.timestep_list[-1] = timestep
+
+        if self.lower_order_final:
+            this_order = min(self.solver_order, len(self.timesteps) - step_index)
+        else:
+            this_order = self.solver_order
+
+        self.this_order = min(this_order, self.lower_order_nums + 1)  # warmup for multistep
+        assert self.this_order > 0
+
+        self.last_sample = sample
+        prev_sample = self.multistep_uni_p_bh_update(
+            model_output=model_output,  # pass the original non-converted model output, in case solver-p is used
+            prev_timestep=prev_timestep,
+            sample=sample,
+            order=self.this_order,
+        )
+
+        if self.lower_order_nums < self.solver_order:
+            self.lower_order_nums += 1
+
+        if not return_dict:
+            return (prev_sample,)
+
+        return prev_sample
+
+    def scale_model_input(self, sample: torch.FloatTensor, *args, **kwargs) -> torch.FloatTensor:
+        return sample
+
+    # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise
+    def add_noise(
+        self,
+        original_samples: torch.FloatTensor,
+        noise: torch.FloatTensor,
+        idx,
+        timesteps: torch.IntTensor,
+    ) -> torch.FloatTensor:
+        # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
+        alphas_cumprod = self.alphas_cumprod.to(device=self.device, dtype=original_samples.dtype)
+        timesteps = timesteps.to(self.device)
+
+        sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
+        sqrt_alpha_prod = sqrt_alpha_prod.flatten()
+        while len(sqrt_alpha_prod.shape) < len(original_samples.shape):
+            sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1)
+
+        sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5
+        sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten()
+        while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape):
+            sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1)
+
+        noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise
+        return noisy_samples
+
+    def configure(self):
+        pass
+
+    def __len__(self):
+        return self.num_train_timesteps
+
+
+# Modified from diffusers.schedulers.LCMScheduler
+class LCMScheduler:
+    def __init__(
+        self,
+        device="cuda",
+        num_train_timesteps: int = 1000,
+        beta_start: float = 0.00085,
+        beta_end: float = 0.012,
+        original_inference_steps: int = 50,
+        clip_sample: bool = False,
+        clip_sample_range: float = 1.0,
+        steps_offset: int = 0,
+        prediction_type: str = "epsilon",
+        thresholding: bool = False,
+        dynamic_thresholding_ratio: float = 0.995,
+        sample_max_value: float = 1.0,
+        timestep_spacing: str = "leading",
+        timestep_scaling: float = 10.0,
+    ):
+        self.device = device
+        self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
+        self.alphas = 1.0 - self.betas
+        self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
+        self.final_alpha_cumprod = self.alphas_cumprod[0]
+        # standard deviation of the initial noise distribution
+        self.init_noise_sigma = 1.0
+        # setable values
+        self.num_inference_steps = None
+        self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64))
+
+        self.num_train_timesteps = num_train_timesteps
+        self.clip_sample = clip_sample
+        self.clip_sample_range = clip_sample_range
+        self.steps_offset = steps_offset
+        self.prediction_type = prediction_type
+        self.thresholding = thresholding
+        self.timestep_spacing = timestep_spacing
+        self.timestep_scaling = timestep_scaling
+        self.original_inference_steps = original_inference_steps
+        self.dynamic_thresholding_ratio = dynamic_thresholding_ratio
+        self.sample_max_value = sample_max_value
+
+        self._step_index = None
+
+    # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._init_step_index
+    def _init_step_index(self, timestep):
+        if isinstance(timestep, torch.Tensor):
+            timestep = timestep.to(self.timesteps.device)
+
+        index_candidates = (self.timesteps == timestep).nonzero()
+
+        if len(index_candidates) > 1:
+            step_index = index_candidates[1]
+        else:
+            step_index = index_candidates[0]
+
+        self._step_index = step_index.item()
+
+    @property
+    def step_index(self):
+        return self._step_index
+
+    def scale_model_input(self, sample: torch.FloatTensor, *args, **kwargs) -> torch.FloatTensor:
+        return sample
+
+    # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
+    def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
+        dtype = sample.dtype
+        batch_size, channels, *remaining_dims = sample.shape
+
+        if dtype not in (torch.float32, torch.float64):
+            sample = sample.float()  # upcast for quantile calculation, and clamp not implemented for cpu half
+
+        # Flatten sample for doing quantile calculation along each image
+        sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))
+
+        abs_sample = sample.abs()  # "a certain percentile absolute pixel value"
+
+        s = torch.quantile(abs_sample, self.dynamic_thresholding_ratio, dim=1)
+        s = torch.clamp(
+            s, min=1, max=self.sample_max_value
+        )  # When clamped to min=1, equivalent to standard clipping to [-1, 1]
+        s = s.unsqueeze(1)  # (batch_size, 1) because clamp will broadcast along dim=0
+        sample = torch.clamp(sample, -s, s) / s  # "we threshold xt0 to the range [-s, s] and then divide by s"
+
+        sample = sample.reshape(batch_size, channels, *remaining_dims)
+        sample = sample.to(dtype)
+
+        return sample
+
+    def set_timesteps(
+        self,
+        num_inference_steps: int,
+        strength: int = 1.0,
+    ):
+        assert num_inference_steps <= self.num_train_timesteps
+
+        self.num_inference_steps = num_inference_steps
+        original_steps = self.original_inference_steps
+
+        assert original_steps <= self.num_train_timesteps
+        assert num_inference_steps <= original_steps
+
+        # LCM Timesteps Setting
+        # Currently, only linear spacing is supported.
+        c = self.num_train_timesteps // original_steps
+        # LCM Training Steps Schedule
+        lcm_origin_timesteps = np.asarray(list(range(1, int(original_steps * strength) + 1))) * c - 1
+        skipping_step = len(lcm_origin_timesteps) // num_inference_steps
+        # LCM Inference Steps Schedule
+        timesteps = lcm_origin_timesteps[::-skipping_step][:num_inference_steps]
+
+        self.timesteps = torch.from_numpy(timesteps.copy()).to(device=self.device, dtype=torch.long)
+
+        self._step_index = None
+
+    def get_scalings_for_boundary_condition_discrete(self, timestep):
+        self.sigma_data = 0.5  # Default: 0.5
+        scaled_timestep = timestep * self.timestep_scaling
+
+        c_skip = self.sigma_data**2 / (scaled_timestep**2 + self.sigma_data**2)
+        c_out = scaled_timestep / (scaled_timestep**2 + self.sigma_data**2) ** 0.5
+        return c_skip, c_out
+
+    def step(
+        self,
+        model_output: torch.FloatTensor,
+        timestep: int,
+        sample: torch.FloatTensor,
+        generator: Optional[torch.Generator] = None,
+    ):
+        if self.num_inference_steps is None:
+            raise ValueError(
+                "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler"
+            )
+
+        if self.step_index is None:
+            self._init_step_index(timestep)
+
+        # 1. get previous step value
+        prev_step_index = self.step_index + 1
+        if prev_step_index < len(self.timesteps):
+            prev_timestep = self.timesteps[prev_step_index]
+        else:
+            prev_timestep = timestep
+
+        # 2. compute alphas, betas
+        alpha_prod_t = self.alphas_cumprod[timestep]
+        alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod
+
+        beta_prod_t = 1 - alpha_prod_t
+        beta_prod_t_prev = 1 - alpha_prod_t_prev
+
+        # 3. Get scalings for boundary conditions
+        c_skip, c_out = self.get_scalings_for_boundary_condition_discrete(timestep)
+
+        # 4. Compute the predicted original sample x_0 based on the model parameterization
+        if self.prediction_type == "epsilon":  # noise-prediction
+            predicted_original_sample = (sample - beta_prod_t.sqrt() * model_output) / alpha_prod_t.sqrt()
+        elif self.prediction_type == "sample":  # x-prediction
+            predicted_original_sample = model_output
+        elif self.prediction_type == "v_prediction":  # v-prediction
+            predicted_original_sample = alpha_prod_t.sqrt() * sample - beta_prod_t.sqrt() * model_output
+        else:
+            raise ValueError(
+                f"prediction_type given as {self.prediction_type} must be one of `epsilon`, `sample` or"
+                " `v_prediction` for `LCMScheduler`."
+            )
+
+        # 5. Clip or threshold "predicted x_0"
+        if self.thresholding:
+            predicted_original_sample = self._threshold_sample(predicted_original_sample)
+        elif self.clip_sample:
+            predicted_original_sample = predicted_original_sample.clamp(-self.clip_sample_range, self.clip_sample_range)
+
+        # 6. Denoise model output using boundary conditions
+        denoised = c_out * predicted_original_sample + c_skip * sample
+
+        # 7. Sample and inject noise z ~ N(0, I) for MultiStep Inference
+        # Noise is not used on the final timestep of the timestep schedule.
+        # This also means that noise is not used for one-step sampling.
+        if self.step_index != self.num_inference_steps - 1:
+            noise = torch.randn(
+                model_output.shape, device=model_output.device, dtype=denoised.dtype, generator=generator
+            )
+            prev_sample = alpha_prod_t_prev.sqrt() * denoised + beta_prod_t_prev.sqrt() * noise
+        else:
+            prev_sample = denoised
+
+        # upon completion increase step index by one
+        self._step_index += 1
+
+        return (prev_sample,)
+
+    # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise
+    def add_noise(
+        self,
+        original_samples: torch.FloatTensor,
+        noise: torch.FloatTensor,
+        timesteps: torch.IntTensor,
+    ) -> torch.FloatTensor:
+        # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
+        alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
+        timesteps = timesteps.to(original_samples.device)
+
+        sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
+        sqrt_alpha_prod = sqrt_alpha_prod.flatten()
+        while len(sqrt_alpha_prod.shape) < len(original_samples.shape):
+            sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1)
+
+        sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5
+        sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten()
+        while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape):
+            sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1)
+
+        noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise
+        return noisy_samples
+
+    def configure(self):
+        pass
+
+    def __len__(self):
+        return self.num_train_timesteps
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder.py
new file mode 100644
index 0000000000000..8e167b74d6918
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder.py
@@ -0,0 +1,266 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+import hashlib
+import os
+from enum import Enum
+
+import torch
+from diffusion_models import CLIP, VAE, CLIPWithProj, PipelineInfo, UNet, UNetXL
+
+
+class EngineType(Enum):
+    ORT_CUDA = 0  # ONNX Runtime CUDA Execution Provider
+    ORT_TRT = 1  # ONNX Runtime TensorRT Execution Provider
+    TRT = 2  # TensorRT
+    TORCH = 3  # PyTorch
+
+
+def get_engine_type(name: str) -> EngineType:
+    name_to_type = {
+        "ORT_CUDA": EngineType.ORT_CUDA,
+        "ORT_TRT": EngineType.ORT_TRT,
+        "TRT": EngineType.TRT,
+        "TORCH": EngineType.TORCH,
+    }
+    return name_to_type[name]
+
+
+class EngineBuilder:
+    def __init__(
+        self,
+        engine_type: EngineType,
+        pipeline_info: PipelineInfo,
+        device="cuda",
+        max_batch_size=16,
+        hf_token=None,
+        use_cuda_graph=False,
+    ):
+        """
+        Initializes the Engine Builder.
+
+        Args:
+            pipeline_info (PipelineInfo):
+                Version and Type of pipeline.
+            device (str | torch.device):
+                device to run engine
+            max_batch_size (int):
+                Maximum batch size for dynamic batch engine.
+            hf_token (str):
+                HuggingFace User Access Token to use for downloading Stable Diffusion model checkpoints.
+            use_cuda_graph (bool):
+                Use CUDA graph to capture engine execution and then launch inference
+        """
+        self.engine_type = engine_type
+        self.pipeline_info = pipeline_info
+        self.max_batch_size = max_batch_size
+        self.hf_token = hf_token
+        self.use_cuda_graph = use_cuda_graph
+        self.device = torch.device(device)
+        self.torch_device = torch.device(device, torch.cuda.current_device())
+        self.stages = pipeline_info.stages()
+
+        self.vae_torch_fallback = self.pipeline_info.vae_torch_fallback()
+        self.custom_fp16_vae = self.pipeline_info.custom_fp16_vae()
+
+        self.models = {}
+        self.engines = {}
+        self.torch_models = {}
+        self.use_vae_slicing = False
+
+        self.torch_sdpa = getattr(torch.nn.functional, "scaled_dot_product_attention", None)
+
+    def enable_vae_slicing(self):
+        self.use_vae_slicing = True
+
+    def disable_torch_spda(self):
+        if hasattr(torch.nn.functional, "scaled_dot_product_attention"):
+            delattr(torch.nn.functional, "scaled_dot_product_attention")
+
+    def enable_torch_spda(self):
+        if (not hasattr(torch.nn.functional, "scaled_dot_product_attention")) and self.torch_sdpa:
+            torch.nn.functional.scaled_dot_product_attention = self.torch_sdpa
+
+    def teardown(self):
+        for engine in self.engines.values():
+            del engine
+        self.engines = {}
+
+    def get_cached_model_name(self, model_name):
+        hash_source = []
+        if model_name in ["clip", "clip2", "unet", "unetxl"] and self.pipeline_info.lora_weights:
+            if self.pipeline_info.lora_weights in [
+                "latent-consistency/lcm-lora-sdxl",
+                "latent-consistency/lcm-lora-sdv1-5",
+            ]:
+                if model_name in ["unet", "unetxl"]:
+                    model_name = model_name + "_lcm-lora"
+            else:
+                model_name = model_name + "_lora"
+                hash_source.append(self.pipeline_info.lora_weights)
+
+        # TODO(tianleiwu): save custom model to a directory named by its original model.
+        if model_name == "unetxl" and self.pipeline_info.custom_unet():
+            model_name = model_name + "_lcm"
+
+        if model_name in ["unet", "unetxl"] and self.pipeline_info.controlnet:
+            model_name = model_name + "_" + "_".join(self.pipeline_info.controlnet)
+
+        if hash_source:
+            model_name += "_" + hashlib.md5("\t".join(hash_source).encode("utf-8")).digest().hex()[:8]
+
+        # TODO: When we support original VAE, we shall save custom VAE to another directory.
+
+        if self.pipeline_info.is_inpaint():
+            model_name += "_inpaint"
+        return model_name
+
+    def get_model_dir(self, model_name, root_dir, opt=True, suffix="", create=True):
+        engine_name = self.engine_type.name.lower()
+        directory_name = self.get_cached_model_name(model_name) + (f".{engine_name}" if opt else "") + suffix
+        onnx_model_dir = os.path.join(root_dir, directory_name)
+        if create:
+            os.makedirs(onnx_model_dir, exist_ok=True)
+        return onnx_model_dir
+
+    def get_onnx_path(self, model_name, onnx_dir, opt=True, suffix=""):
+        onnx_model_dir = self.get_model_dir(model_name, onnx_dir, opt=opt, suffix=suffix)
+        return os.path.join(onnx_model_dir, "model.onnx")
+
+    def get_engine_path(self, engine_dir, model_name, profile_id):
+        return os.path.join(engine_dir, self.get_cached_model_name(model_name) + profile_id)
+
+    def load_pipeline_with_lora(self):
+        """Load text encoders and UNet with diffusers pipeline"""
+        from diffusers import DiffusionPipeline
+
+        pipeline = DiffusionPipeline.from_pretrained(
+            self.pipeline_info.name(),
+            variant="fp16",
+            torch_dtype=torch.float16,
+        )
+        pipeline.load_lora_weights(self.pipeline_info.lora_weights)
+        pipeline.fuse_lora(lora_scale=self.pipeline_info.lora_scale)
+
+        del pipeline.vae
+        pipeline.vae = None
+        return pipeline
+
+    def get_or_load_model(self, pipeline, model_name, model_obj, framework_model_dir):
+        if model_name in ["clip", "clip2", "unet", "unetxl"] and pipeline:
+            if model_name == "clip":
+                model = pipeline.text_encoder
+                pipeline.text_encoder = None
+            elif model_name == "clip2":
+                model = pipeline.text_encoder_2
+                pipeline.text_encoder_2 = None
+            else:
+                model = pipeline.unet
+                pipeline.unet = None
+        else:
+            model = model_obj.load_model(framework_model_dir, self.hf_token)
+
+        return model.to(self.torch_device)
+
+    def load_models(self, framework_model_dir: str):
+        # For TRT or ORT_TRT, we will export fp16 torch model for UNet.
+        # For ORT_CUDA, we export fp32 model first, then optimize to fp16.
+        export_fp16_unet = self.engine_type in [EngineType.ORT_TRT, EngineType.TRT]
+
+        if "clip" in self.stages:
+            self.models["clip"] = CLIP(
+                self.pipeline_info,
+                None,  # not loaded yet
+                device=self.torch_device,
+                max_batch_size=self.max_batch_size,
+                clip_skip=0,
+            )
+
+        if "clip2" in self.stages:
+            self.models["clip2"] = CLIPWithProj(
+                self.pipeline_info,
+                None,  # not loaded yet
+                device=self.torch_device,
+                max_batch_size=self.max_batch_size,
+                clip_skip=0,
+            )
+
+        if "unet" in self.stages:
+            self.models["unet"] = UNet(
+                self.pipeline_info,
+                None,  # not loaded yet
+                device=self.torch_device,
+                fp16=export_fp16_unet,
+                max_batch_size=self.max_batch_size,
+                unet_dim=(9 if self.pipeline_info.is_inpaint() else 4),
+            )
+
+        if "unetxl" in self.stages:
+            self.models["unetxl"] = UNetXL(
+                self.pipeline_info,
+                None,  # not loaded yet
+                device=self.torch_device,
+                fp16=export_fp16_unet,
+                max_batch_size=self.max_batch_size,
+                unet_dim=4,
+                time_dim=(5 if self.pipeline_info.is_xl_refiner() else 6),
+            )
+
+        # VAE Decoder
+        if "vae" in self.stages:
+            self.models["vae"] = VAE(
+                self.pipeline_info,
+                None,  # not loaded yet
+                device=self.torch_device,
+                max_batch_size=self.max_batch_size,
+                custom_fp16_vae=self.custom_fp16_vae,
+            )
+
+            if self.vae_torch_fallback:
+                self.torch_models["vae"] = self.models["vae"].load_model(framework_model_dir, self.hf_token)
+
+    def load_resources(self, image_height, image_width, batch_size):
+        # Allocate buffers for I/O bindings
+        for model_name, obj in self.models.items():
+            if model_name == "vae" and self.vae_torch_fallback:
+                continue
+            slice_size = 1 if (model_name == "vae" and self.use_vae_slicing) else batch_size
+            self.engines[model_name].allocate_buffers(
+                shape_dict=obj.get_shape_dict(slice_size, image_height, image_width), device=self.torch_device
+            )
+
+    def _vae_decode(self, latents):
+        if self.vae_torch_fallback:
+            if not self.custom_fp16_vae:
+                latents = latents.to(dtype=torch.float32)
+                self.torch_models["vae"] = self.torch_models["vae"].to(dtype=torch.float32)
+            images = self.torch_models["vae"](latents)["sample"]
+        else:
+            images = self.run_engine("vae", {"latent": latents})["images"]
+
+        return images
+
+    def vae_decode(self, latents):
+        if self.use_vae_slicing:
+            # The output tensor points to same buffer. Need clone it to avoid overwritten.
+            decoded_slices = [self._vae_decode(z_slice).clone() for z_slice in latents.split(1)]
+            return torch.cat(decoded_slices)
+
+        return self._vae_decode(latents)
+
+
+def get_engine_paths(work_dir: str, pipeline_info: PipelineInfo, engine_type: EngineType):
+    root_dir = work_dir or "."
+    short_name = pipeline_info.short_name()
+
+    # When both ORT_CUDA and ORT_TRT/TRT is used, we shall make sub directory for each engine since
+    # ORT_CUDA need fp32 torch model, while ORT_TRT/TRT use fp16 torch model.
+    onnx_dir = os.path.join(root_dir, engine_type.name, short_name, "onnx")
+    engine_dir = os.path.join(root_dir, engine_type.name, short_name, "engine")
+    output_dir = os.path.join(root_dir, engine_type.name, short_name, "output")
+
+    timing_cache = os.path.join(root_dir, engine_type.name, "timing_cache")
+    framework_model_dir = os.path.join(root_dir, engine_type.name, "torch_model")
+
+    return onnx_dir, engine_dir, output_dir, framework_model_dir, timing_cache
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder_ort_cuda.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder_ort_cuda.py
new file mode 100644
index 0000000000000..2ac9a45577676
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder_ort_cuda.py
@@ -0,0 +1,314 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+import gc
+import logging
+import os
+import shutil
+from typing import List, Optional
+
+import torch
+from diffusion_models import PipelineInfo
+from engine_builder import EngineBuilder, EngineType
+from ort_utils import CudaSession
+
+import onnxruntime as ort
+
+logger = logging.getLogger(__name__)
+
+
+class OrtCudaEngine(CudaSession):
+    def __init__(
+        self,
+        onnx_path,
+        device_id: int = 0,
+        enable_cuda_graph: bool = False,
+        disable_optimization: bool = False,
+    ):
+        self.onnx_path = onnx_path
+        self.provider = "CUDAExecutionProvider"
+        self.provider_options = CudaSession.get_cuda_provider_options(device_id, enable_cuda_graph)
+        # self.provider_options["enable_skip_layer_norm_strict_mode"] = True
+
+        session_options = ort.SessionOptions()
+
+        # When the model has been optimized by onnxruntime, we can disable optimization to save session creation time.
+        if disable_optimization:
+            session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL
+
+        logger.info("creating CUDA EP session for %s", onnx_path)
+        ort_session = ort.InferenceSession(
+            onnx_path,
+            session_options,
+            providers=[
+                (self.provider, self.provider_options),
+                "CPUExecutionProvider",
+            ],
+        )
+        logger.info("created CUDA EP session for %s", onnx_path)
+
+        device = torch.device("cuda", device_id)
+        super().__init__(ort_session, device, enable_cuda_graph)
+
+    def allocate_buffers(self, shape_dict, device):
+        super().allocate_buffers(shape_dict)
+
+
+class _ModelConfig:
+    """
+    Configuration of one model (like Clip, UNet etc) on ONNX export and optimization for CUDA provider.
+    For example, if you want to use fp32 in layer normalization, set the following:
+        force_fp32_ops=["SkipLayerNormalization", "LayerNormalization"]
+    """
+
+    def __init__(
+        self,
+        onnx_opset_version: int,
+        use_cuda_graph: bool,
+        fp16: bool = True,
+        force_fp32_ops: Optional[List[str]] = None,
+        optimize_by_ort: bool = True,
+    ):
+        self.onnx_opset_version = onnx_opset_version
+        self.use_cuda_graph = use_cuda_graph
+        self.fp16 = fp16
+        self.force_fp32_ops = force_fp32_ops
+        self.optimize_by_ort = optimize_by_ort
+
+
+class OrtCudaEngineBuilder(EngineBuilder):
+    def __init__(
+        self,
+        pipeline_info: PipelineInfo,
+        max_batch_size=16,
+        hf_token=None,
+        device="cuda",
+        use_cuda_graph=False,
+    ):
+        """
+        Initializes the ONNX Runtime TensorRT ExecutionProvider Engine Builder.
+
+        Args:
+            pipeline_info (PipelineInfo):
+                Version and Type of pipeline.
+            max_batch_size (int):
+                Maximum batch size for dynamic batch engine.
+            hf_token (str):
+                HuggingFace User Access Token to use for downloading Stable Diffusion model checkpoints.
+            device (str):
+                device to run.
+            use_cuda_graph (bool):
+                Use CUDA graph to capture engine execution and then launch inference
+        """
+        super().__init__(
+            EngineType.ORT_CUDA,
+            pipeline_info,
+            max_batch_size=max_batch_size,
+            hf_token=hf_token,
+            device=device,
+            use_cuda_graph=use_cuda_graph,
+        )
+
+        self.model_config = {}
+
+    def _configure(
+        self,
+        model_name: str,
+        onnx_opset_version: int,
+        use_cuda_graph: bool,
+        fp16: bool = True,
+        force_fp32_ops: Optional[List[str]] = None,
+        optimize_by_ort: bool = True,
+    ):
+        self.model_config[model_name] = _ModelConfig(
+            onnx_opset_version,
+            use_cuda_graph,
+            fp16=fp16,
+            force_fp32_ops=force_fp32_ops,
+            optimize_by_ort=optimize_by_ort,
+        )
+
+    def configure_xl(self, onnx_opset_version: int):
+        self._configure(
+            "clip",
+            onnx_opset_version=onnx_opset_version,
+            use_cuda_graph=self.use_cuda_graph,
+        )
+        self._configure(
+            "clip2",
+            onnx_opset_version=onnx_opset_version,  # TODO: ArgMax-12 is not implemented in CUDA
+            use_cuda_graph=False,  # TODO: fix Runtime Error with cuda graph
+        )
+        self._configure(
+            "unetxl",
+            onnx_opset_version=onnx_opset_version,
+            use_cuda_graph=self.use_cuda_graph,
+        )
+
+        self._configure(
+            "vae",
+            onnx_opset_version=onnx_opset_version,
+            use_cuda_graph=self.use_cuda_graph,
+        )
+
+    def build_engines(
+        self,
+        engine_dir: str,
+        framework_model_dir: str,
+        onnx_dir: str,
+        tmp_dir: Optional[str] = None,
+        onnx_opset_version: int = 17,
+        force_engine_rebuild: bool = False,
+        device_id: int = 0,
+        save_fp32_intermediate_model=False,
+    ):
+        self.torch_device = torch.device("cuda", device_id)
+        self.load_models(framework_model_dir)
+
+        if force_engine_rebuild:
+            if os.path.isdir(onnx_dir):
+                logger.info("Remove existing directory %s since force_engine_rebuild is enabled", onnx_dir)
+                shutil.rmtree(onnx_dir)
+            if os.path.isdir(engine_dir):
+                logger.info("Remove existing directory %s since force_engine_rebuild is enabled", engine_dir)
+                shutil.rmtree(engine_dir)
+
+        if not os.path.isdir(engine_dir):
+            os.makedirs(engine_dir)
+
+        if not os.path.isdir(onnx_dir):
+            os.makedirs(onnx_dir)
+
+        # Add default configuration if missing
+        if self.pipeline_info.is_xl():
+            self.configure_xl(onnx_opset_version)
+        for model_name in self.models:
+            if model_name not in self.model_config:
+                self.model_config[model_name] = _ModelConfig(onnx_opset_version, self.use_cuda_graph)
+
+        # Load lora only when we need export text encoder or UNet to ONNX.
+        load_lora = False
+        if self.pipeline_info.lora_weights:
+            for model_name in self.models:
+                if model_name not in ["clip", "clip2", "unet", "unetxl"]:
+                    continue
+                onnx_path = self.get_onnx_path(model_name, onnx_dir, opt=False)
+
+                suffix = ".fp16" if self.model_config[model_name].fp16 else ".fp32"
+                onnx_opt_path = self.get_onnx_path(model_name, engine_dir, opt=True, suffix=suffix)
+                if not os.path.exists(onnx_opt_path):
+                    if not os.path.exists(onnx_path):
+                        load_lora = True
+                        break
+
+        # Export models to ONNX
+        self.disable_torch_spda()
+        pipe = self.load_pipeline_with_lora() if load_lora else None
+
+        for model_name, model_obj in self.models.items():
+            if model_name == "vae" and self.vae_torch_fallback:
+                continue
+
+            onnx_path = self.get_onnx_path(model_name, onnx_dir, opt=False)
+            suffix = ".fp16" if self.model_config[model_name].fp16 else ".fp32"
+            onnx_opt_path = self.get_onnx_path(model_name, engine_dir, opt=True, suffix=suffix)
+            if not os.path.exists(onnx_opt_path):
+                if not os.path.exists(onnx_path):
+                    print("----")
+                    logger.info("Exporting model: %s", onnx_path)
+
+                    model = self.get_or_load_model(pipe, model_name, model_obj, framework_model_dir)
+                    model = model.to(torch.float32)
+
+                    with torch.inference_mode():
+                        # For CUDA EP, export FP32 onnx since some graph fusion only supports fp32 graph pattern.
+                        # Export model with sample of batch size 1, image size 512 x 512
+                        inputs = model_obj.get_sample_input(1, 512, 512)
+
+                        torch.onnx.export(
+                            model,
+                            inputs,
+                            onnx_path,
+                            export_params=True,
+                            opset_version=self.model_config[model_name].onnx_opset_version,
+                            do_constant_folding=True,
+                            input_names=model_obj.get_input_names(),
+                            output_names=model_obj.get_output_names(),
+                            dynamic_axes=model_obj.get_dynamic_axes(),
+                        )
+                    del model
+                    torch.cuda.empty_cache()
+                    gc.collect()
+                else:
+                    logger.info("Found cached model: %s", onnx_path)
+
+                # Generate fp32 optimized model.
+                # If final target is fp16 model, we save fp32 optimized model so that it is easy to tune
+                # fp16 conversion. That could save a lot of time in developing.
+                use_fp32_intermediate = save_fp32_intermediate_model and self.model_config[model_name].fp16
+                onnx_fp32_path = onnx_path
+                if use_fp32_intermediate:
+                    onnx_fp32_path = self.get_onnx_path(model_name, engine_dir, opt=True, suffix=".fp32")
+                    if not os.path.exists(onnx_fp32_path):
+                        print("------")
+                        logger.info("Generating optimized model: %s", onnx_fp32_path)
+                        model_obj.optimize_ort(
+                            onnx_path,
+                            onnx_fp32_path,
+                            to_fp16=False,
+                            fp32_op_list=self.model_config[model_name].force_fp32_ops,
+                            optimize_by_ort=self.model_config[model_name].optimize_by_ort,
+                            tmp_dir=self.get_model_dir(model_name, tmp_dir, opt=False, suffix=".fp32", create=False),
+                        )
+                    else:
+                        logger.info("Found cached optimized model: %s", onnx_fp32_path)
+
+                # Generate the final optimized model.
+                if not os.path.exists(onnx_opt_path):
+                    print("------")
+                    logger.info("Generating optimized model: %s", onnx_opt_path)
+
+                    # When there is fp32 intermediate optimized model, this will just convert model from fp32 to fp16.
+                    optimize_by_ort = False if use_fp32_intermediate else self.model_config[model_name].optimize_by_ort
+
+                    model_obj.optimize_ort(
+                        onnx_fp32_path,
+                        onnx_opt_path,
+                        to_fp16=self.model_config[model_name].fp16,
+                        fp32_op_list=self.model_config[model_name].force_fp32_ops,
+                        optimize_by_ort=optimize_by_ort,
+                        optimize_by_fusion=not use_fp32_intermediate,
+                        tmp_dir=self.get_model_dir(model_name, tmp_dir, opt=False, suffix=".fp16", create=False),
+                    )
+                else:
+                    logger.info("Found cached optimized model: %s", onnx_opt_path)
+        self.enable_torch_spda()
+
+        built_engines = {}
+        for model_name in self.models:
+            if model_name == "vae" and self.vae_torch_fallback:
+                continue
+
+            suffix = ".fp16" if self.model_config[model_name].fp16 else ".fp32"
+            onnx_opt_path = self.get_onnx_path(model_name, engine_dir, opt=True, suffix=suffix)
+
+            use_cuda_graph = self.model_config[model_name].use_cuda_graph
+
+            engine = OrtCudaEngine(
+                onnx_opt_path,
+                device_id=device_id,
+                enable_cuda_graph=use_cuda_graph,
+                disable_optimization=False,
+            )
+
+            logger.info("%s options for %s: %s", engine.provider, model_name, engine.provider_options)
+            built_engines[model_name] = engine
+
+        self.engines = built_engines
+
+        return built_engines
+
+    def run_engine(self, model_name, feed_dict):
+        return self.engines[model_name].infer(feed_dict)
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder_ort_trt.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder_ort_trt.py
new file mode 100644
index 0000000000000..8c637007b840d
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder_ort_trt.py
@@ -0,0 +1,304 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+import gc
+import logging
+import os
+import shutil
+
+import torch
+from cuda import cudart
+from diffusion_models import PipelineInfo
+from engine_builder import EngineBuilder, EngineType
+from ort_utils import CudaSession
+from packaging import version
+
+import onnxruntime as ort
+
+logger = logging.getLogger(__name__)
+
+
+class OrtTensorrtEngine(CudaSession):
+    def __init__(
+        self,
+        engine_path,
+        device_id,
+        onnx_path,
+        fp16,
+        input_profile,
+        workspace_size,
+        enable_cuda_graph,
+        timing_cache_path=None,
+    ):
+        self.engine_path = engine_path
+        self.ort_trt_provider_options = self.get_tensorrt_provider_options(
+            input_profile,
+            workspace_size,
+            fp16,
+            device_id,
+            enable_cuda_graph,
+            timing_cache_path=timing_cache_path,
+        )
+
+        session_options = ort.SessionOptions()
+        session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL
+        logger.info("creating TRT EP session for %s", onnx_path)
+        ort_session = ort.InferenceSession(
+            onnx_path,
+            session_options,
+            providers=[
+                ("TensorrtExecutionProvider", self.ort_trt_provider_options),
+            ],
+        )
+        logger.info("created TRT EP session for %s", onnx_path)
+
+        device = torch.device("cuda", device_id)
+        super().__init__(ort_session, device, enable_cuda_graph)
+
+    def get_tensorrt_provider_options(
+        self, input_profile, workspace_size, fp16, device_id, enable_cuda_graph, timing_cache_path=None
+    ):
+        trt_ep_options = {
+            "device_id": device_id,
+            "trt_fp16_enable": fp16,
+            "trt_engine_cache_enable": True,
+            "trt_timing_cache_enable": True,
+            "trt_detailed_build_log": True,
+            "trt_engine_cache_path": self.engine_path,
+        }
+
+        if version.parse(ort.__version__) > version.parse("1.16.2") and timing_cache_path is not None:
+            trt_ep_options["trt_timing_cache_path"] = timing_cache_path
+
+        if enable_cuda_graph:
+            trt_ep_options["trt_cuda_graph_enable"] = True
+
+        if workspace_size > 0:
+            trt_ep_options["trt_max_workspace_size"] = workspace_size
+
+        if input_profile:
+            min_shapes = []
+            max_shapes = []
+            opt_shapes = []
+            for name, profile in input_profile.items():
+                assert isinstance(profile, list) and len(profile) == 3
+                min_shape = profile[0]
+                opt_shape = profile[1]
+                max_shape = profile[2]
+                assert len(min_shape) == len(opt_shape) and len(opt_shape) == len(max_shape)
+
+                min_shapes.append(f"{name}:" + "x".join([str(x) for x in min_shape]))
+                opt_shapes.append(f"{name}:" + "x".join([str(x) for x in opt_shape]))
+                max_shapes.append(f"{name}:" + "x".join([str(x) for x in max_shape]))
+
+            trt_ep_options["trt_profile_min_shapes"] = ",".join(min_shapes)
+            trt_ep_options["trt_profile_max_shapes"] = ",".join(max_shapes)
+            trt_ep_options["trt_profile_opt_shapes"] = ",".join(opt_shapes)
+
+        logger.info("trt_ep_options=%s", trt_ep_options)
+
+        return trt_ep_options
+
+    def allocate_buffers(self, shape_dict, device):
+        super().allocate_buffers(shape_dict)
+
+
+class OrtTensorrtEngineBuilder(EngineBuilder):
+    def __init__(
+        self,
+        pipeline_info: PipelineInfo,
+        max_batch_size=16,
+        hf_token=None,
+        device="cuda",
+        use_cuda_graph=False,
+    ):
+        """
+        Initializes the ONNX Runtime TensorRT ExecutionProvider Engine Builder.
+
+        Args:
+            pipeline_info (PipelineInfo):
+                Version and Type of pipeline.
+            max_batch_size (int):
+                Maximum batch size for dynamic batch engine.
+            hf_token (str):
+                HuggingFace User Access Token to use for downloading Stable Diffusion model checkpoints.
+            device (str):
+                device to run.
+            use_cuda_graph (bool):
+                Use CUDA graph to capture engine execution and then launch inference
+        """
+        super().__init__(
+            EngineType.ORT_TRT,
+            pipeline_info,
+            max_batch_size=max_batch_size,
+            hf_token=hf_token,
+            device=device,
+            use_cuda_graph=use_cuda_graph,
+        )
+
+    def has_engine_file(self, engine_path):
+        if os.path.isdir(engine_path):
+            children = os.scandir(engine_path)
+            for entry in children:
+                if entry.is_file() and entry.name.endswith(".engine"):
+                    return True
+        return False
+
+    def get_work_space_size(self, model_name, max_workspace_size):
+        gibibyte = 2**30
+        workspace_size = 4 * gibibyte if model_name == "clip" else max_workspace_size
+        if workspace_size == 0:
+            _, free_mem, _ = cudart.cudaMemGetInfo()
+            # The following logic are adopted from TensorRT demo diffusion.
+            if free_mem > 6 * gibibyte:
+                workspace_size = free_mem - 4 * gibibyte
+        return workspace_size
+
+    def build_engines(
+        self,
+        engine_dir,
+        framework_model_dir,
+        onnx_dir,
+        onnx_opset,
+        opt_image_height,
+        opt_image_width,
+        opt_batch_size=1,
+        force_engine_rebuild=False,
+        static_batch=False,
+        static_image_shape=True,
+        max_workspace_size=0,
+        device_id=0,
+        timing_cache=None,
+    ):
+        self.torch_device = torch.device("cuda", device_id)
+        self.load_models(framework_model_dir)
+
+        if force_engine_rebuild:
+            if os.path.isdir(onnx_dir):
+                logger.info("Remove existing directory %s since force_engine_rebuild is enabled", onnx_dir)
+                shutil.rmtree(onnx_dir)
+            if os.path.isdir(engine_dir):
+                logger.info("Remove existing directory %s since force_engine_rebuild is enabled", engine_dir)
+                shutil.rmtree(engine_dir)
+
+        if not os.path.isdir(engine_dir):
+            os.makedirs(engine_dir)
+
+        if not os.path.isdir(onnx_dir):
+            os.makedirs(onnx_dir)
+
+        # Load lora only when we need export text encoder or UNet to ONNX.
+        load_lora = False
+        if self.pipeline_info.lora_weights:
+            for model_name, model_obj in self.models.items():
+                if model_name not in ["clip", "clip2", "unet", "unetxl"]:
+                    continue
+                profile_id = model_obj.get_profile_id(
+                    opt_batch_size, opt_image_height, opt_image_width, static_batch, static_image_shape
+                )
+                engine_path = self.get_engine_path(engine_dir, model_name, profile_id)
+                if not self.has_engine_file(engine_path):
+                    onnx_path = self.get_onnx_path(model_name, onnx_dir, opt=False)
+                    onnx_opt_path = self.get_onnx_path(model_name, onnx_dir, opt=True)
+                    if not os.path.exists(onnx_opt_path):
+                        if not os.path.exists(onnx_path):
+                            load_lora = True
+                            break
+
+        # Export models to ONNX
+        self.disable_torch_spda()
+        pipe = self.load_pipeline_with_lora() if load_lora else None
+
+        for model_name, model_obj in self.models.items():
+            if model_name == "vae" and self.vae_torch_fallback:
+                continue
+
+            profile_id = model_obj.get_profile_id(
+                opt_batch_size, opt_image_height, opt_image_width, static_batch, static_image_shape
+            )
+            engine_path = self.get_engine_path(engine_dir, model_name, profile_id)
+            if not self.has_engine_file(engine_path):
+                onnx_path = self.get_onnx_path(model_name, onnx_dir, opt=False)
+                onnx_opt_path = self.get_onnx_path(model_name, onnx_dir, opt=True)
+                if not os.path.exists(onnx_opt_path):
+                    if not os.path.exists(onnx_path):
+                        logger.info(f"Exporting model: {onnx_path}")
+                        model = self.get_or_load_model(pipe, model_name, model_obj, framework_model_dir)
+
+                        with torch.inference_mode(), torch.autocast("cuda"):
+                            inputs = model_obj.get_sample_input(opt_batch_size, opt_image_height, opt_image_width)
+                            torch.onnx.export(
+                                model,
+                                inputs,
+                                onnx_path,
+                                export_params=True,
+                                opset_version=onnx_opset,
+                                do_constant_folding=True,
+                                input_names=model_obj.get_input_names(),
+                                output_names=model_obj.get_output_names(),
+                                dynamic_axes=model_obj.get_dynamic_axes(),
+                            )
+                        del model
+                        torch.cuda.empty_cache()
+                        gc.collect()
+                    else:
+                        logger.info("Found cached model: %s", onnx_path)
+
+                    # Optimize onnx
+                    if not os.path.exists(onnx_opt_path):
+                        logger.info("Generating optimizing model: %s", onnx_opt_path)
+                        model_obj.optimize_trt(onnx_path, onnx_opt_path)
+                    else:
+                        logger.info("Found cached optimized model: %s", onnx_opt_path)
+        self.enable_torch_spda()
+
+        built_engines = {}
+        for model_name, model_obj in self.models.items():
+            if model_name == "vae" and self.vae_torch_fallback:
+                continue
+
+            profile_id = model_obj.get_profile_id(
+                opt_batch_size, opt_image_height, opt_image_width, static_batch, static_image_shape
+            )
+
+            engine_path = self.get_engine_path(engine_dir, model_name, profile_id)
+            onnx_opt_path = self.get_onnx_path(model_name, onnx_dir, opt=True)
+            if not self.has_engine_file(engine_path):
+                logger.info(
+                    "Building TensorRT engine for %s from %s to %s. It can take a while to complete...",
+                    model_name,
+                    onnx_opt_path,
+                    engine_path,
+                )
+            else:
+                logger.info("Reuse cached TensorRT engine in directory %s", engine_path)
+
+            input_profile = model_obj.get_input_profile(
+                opt_batch_size,
+                opt_image_height,
+                opt_image_width,
+                static_batch=static_batch,
+                static_image_shape=static_image_shape,
+            )
+
+            engine = OrtTensorrtEngine(
+                engine_path,
+                device_id,
+                onnx_opt_path,
+                fp16=True,
+                input_profile=input_profile,
+                workspace_size=self.get_work_space_size(model_name, max_workspace_size),
+                enable_cuda_graph=self.use_cuda_graph,
+                timing_cache_path=timing_cache,
+            )
+
+            built_engines[model_name] = engine
+
+        self.engines = built_engines
+
+        return built_engines
+
+    def run_engine(self, model_name, feed_dict):
+        return self.engines[model_name].infer(feed_dict)
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder_tensorrt.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder_tensorrt.py
new file mode 100644
index 0000000000000..bac1a8bb8140d
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder_tensorrt.py
@@ -0,0 +1,530 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+# Modified from TensorRT demo diffusion, which has the following license:
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# --------------------------------------------------------------------------
+
+import gc
+import os
+import pathlib
+from collections import OrderedDict
+
+import numpy as np
+import onnx
+import onnx_graphsurgeon as gs
+import tensorrt as trt
+import torch
+from cuda import cudart
+from diffusion_models import PipelineInfo
+from engine_builder import EngineBuilder, EngineType
+from polygraphy.backend.common import bytes_from_path
+from polygraphy.backend.trt import (
+    CreateConfig,
+    ModifyNetworkOutputs,
+    Profile,
+    engine_from_bytes,
+    engine_from_network,
+    network_from_onnx_path,
+    save_engine,
+)
+from trt_utilities import TRT_LOGGER
+
+# Map of numpy dtype -> torch dtype
+numpy_to_torch_dtype_dict = {
+    np.int32: torch.int32,
+    np.int64: torch.int64,
+    np.float16: torch.float16,
+    np.float32: torch.float32,
+}
+
+
+def _cuda_assert(cuda_ret):
+    err = cuda_ret[0]
+    if err != cudart.cudaError_t.cudaSuccess:
+        raise RuntimeError(
+            f"CUDA ERROR: {err}, error code reference: https://nvidia.github.io/cuda-python/module/cudart.html#cuda.cudart.cudaError_t"
+        )
+    if len(cuda_ret) > 1:
+        return cuda_ret[1]
+    return None
+
+
+class TensorrtEngine:
+    def __init__(
+        self,
+        engine_path,
+    ):
+        self.engine_path = engine_path
+        self.engine = None
+        self.context = None
+        self.buffers = OrderedDict()
+        self.tensors = OrderedDict()
+        self.cuda_graph_instance = None
+
+    def __del__(self):
+        del self.engine
+        del self.context
+        del self.buffers
+        del self.tensors
+
+    def refit(self, onnx_path, onnx_refit_path):
+        def convert_int64(arr):
+            if len(arr.shape) == 0:
+                return np.int32(arr)
+            return arr
+
+        def add_to_map(refit_dict, name, values):
+            if name in refit_dict:
+                assert refit_dict[name] is None
+                if values.dtype == np.int64:
+                    values = convert_int64(values)
+                refit_dict[name] = values
+
+        print(f"Refitting TensorRT engine with {onnx_refit_path} weights")
+        refit_nodes = gs.import_onnx(onnx.load(onnx_refit_path)).toposort().nodes
+
+        # Construct mapping from weight names in refit model -> original model
+        name_map = {}
+        for n, node in enumerate(gs.import_onnx(onnx.load(onnx_path)).toposort().nodes):
+            refit_node = refit_nodes[n]
+            assert node.op == refit_node.op
+            # Constant nodes in ONNX do not have inputs but have a constant output
+            if node.op == "Constant":
+                name_map[refit_node.outputs[0].name] = node.outputs[0].name
+            # Handle scale and bias weights
+            elif node.op == "Conv":
+                if node.inputs[1].__class__ == gs.Constant:
+                    name_map[refit_node.name + "_TRTKERNEL"] = node.name + "_TRTKERNEL"
+                if node.inputs[2].__class__ == gs.Constant:
+                    name_map[refit_node.name + "_TRTBIAS"] = node.name + "_TRTBIAS"
+            # For all other nodes: find node inputs that are initializers (gs.Constant)
+            else:
+                for i, inp in enumerate(node.inputs):
+                    if inp.__class__ == gs.Constant:
+                        name_map[refit_node.inputs[i].name] = inp.name
+
+        def map_name(name):
+            if name in name_map:
+                return name_map[name]
+            return name
+
+        # Construct refit dictionary
+        refit_dict = {}
+        refitter = trt.Refitter(self.engine, TRT_LOGGER)
+        all_weights = refitter.get_all()
+        for layer_name, role in zip(all_weights[0], all_weights[1]):
+            # for specialized roles, use a unique name in the map:
+            if role == trt.WeightsRole.KERNEL:
+                name = layer_name + "_TRTKERNEL"
+            elif role == trt.WeightsRole.BIAS:
+                name = layer_name + "_TRTBIAS"
+            else:
+                name = layer_name
+
+            assert name not in refit_dict, "Found duplicate layer: " + name
+            refit_dict[name] = None
+
+        for n in refit_nodes:
+            # Constant nodes in ONNX do not have inputs but have a constant output
+            if n.op == "Constant":
+                name = map_name(n.outputs[0].name)
+                print(f"Add Constant {name}\n")
+                add_to_map(refit_dict, name, n.outputs[0].values)
+
+            # Handle scale and bias weights
+            elif n.op == "Conv":
+                if n.inputs[1].__class__ == gs.Constant:
+                    name = map_name(n.name + "_TRTKERNEL")
+                    add_to_map(refit_dict, name, n.inputs[1].values)
+
+                if n.inputs[2].__class__ == gs.Constant:
+                    name = map_name(n.name + "_TRTBIAS")
+                    add_to_map(refit_dict, name, n.inputs[2].values)
+
+            # For all other nodes: find node inputs that are initializers (AKA gs.Constant)
+            else:
+                for inp in n.inputs:
+                    name = map_name(inp.name)
+                    if inp.__class__ == gs.Constant:
+                        add_to_map(refit_dict, name, inp.values)
+
+        for layer_name, weights_role in zip(all_weights[0], all_weights[1]):
+            if weights_role == trt.WeightsRole.KERNEL:
+                custom_name = layer_name + "_TRTKERNEL"
+            elif weights_role == trt.WeightsRole.BIAS:
+                custom_name = layer_name + "_TRTBIAS"
+            else:
+                custom_name = layer_name
+
+            # Skip refitting Trilu for now; scalar weights of type int64 value 1 - for clip model
+            if layer_name.startswith("onnx::Trilu"):
+                continue
+
+            if refit_dict[custom_name] is not None:
+                refitter.set_weights(layer_name, weights_role, refit_dict[custom_name])
+            else:
+                print(f"[W] No refit weights for layer: {layer_name}")
+
+        if not refitter.refit_cuda_engine():
+            print("Failed to refit!")
+            exit(0)
+
+    def build(
+        self,
+        onnx_path,
+        fp16,
+        input_profile=None,
+        enable_refit=False,
+        enable_preview=False,
+        enable_all_tactics=False,
+        timing_cache=None,
+        update_output_names=None,
+    ):
+        print(f"Building TensorRT engine for {onnx_path}: {self.engine_path}")
+        p = Profile()
+        if input_profile:
+            for name, dims in input_profile.items():
+                assert len(dims) == 3
+                p.add(name, min=dims[0], opt=dims[1], max=dims[2])
+
+        config_kwargs = {}
+        if not enable_all_tactics:
+            config_kwargs["tactic_sources"] = []
+
+        network = network_from_onnx_path(onnx_path, flags=[trt.OnnxParserFlag.NATIVE_INSTANCENORM])
+        if update_output_names:
+            print(f"Updating network outputs to {update_output_names}")
+            network = ModifyNetworkOutputs(network, update_output_names)
+        engine = engine_from_network(
+            network,
+            config=CreateConfig(
+                fp16=fp16, refittable=enable_refit, profiles=[p], load_timing_cache=timing_cache, **config_kwargs
+            ),
+            save_timing_cache=timing_cache,
+        )
+        save_engine(engine, path=self.engine_path)
+
+    def load(self):
+        print(f"Loading TensorRT engine: {self.engine_path}")
+        self.engine = engine_from_bytes(bytes_from_path(self.engine_path))
+
+    def activate(self, reuse_device_memory=None):
+        if reuse_device_memory:
+            self.context = self.engine.create_execution_context_without_device_memory()
+            self.context.device_memory = reuse_device_memory
+        else:
+            self.context = self.engine.create_execution_context()
+
+    def allocate_buffers(self, shape_dict=None, device="cuda"):
+        for idx in range(self.engine.num_io_tensors):
+            binding = self.engine[idx]
+            if shape_dict and binding in shape_dict:
+                shape = shape_dict[binding]
+            else:
+                shape = self.engine.get_binding_shape(binding)
+            dtype = trt.nptype(self.engine.get_binding_dtype(binding))
+            if self.engine.binding_is_input(binding):
+                self.context.set_binding_shape(idx, shape)
+            tensor = torch.empty(tuple(shape), dtype=numpy_to_torch_dtype_dict[dtype]).to(device=device)
+            self.tensors[binding] = tensor
+
+    def infer(self, feed_dict, stream, use_cuda_graph=False):
+        for name, buf in feed_dict.items():
+            self.tensors[name].copy_(buf)
+
+        for name, tensor in self.tensors.items():
+            self.context.set_tensor_address(name, tensor.data_ptr())
+
+        if use_cuda_graph:
+            if self.cuda_graph_instance is not None:
+                _cuda_assert(cudart.cudaGraphLaunch(self.cuda_graph_instance, stream))
+                _cuda_assert(cudart.cudaStreamSynchronize(stream))
+            else:
+                # do inference before CUDA graph capture
+                noerror = self.context.execute_async_v3(stream)
+                if not noerror:
+                    raise ValueError("ERROR: inference failed.")
+                # capture cuda graph
+                _cuda_assert(
+                    cudart.cudaStreamBeginCapture(stream, cudart.cudaStreamCaptureMode.cudaStreamCaptureModeGlobal)
+                )
+                self.context.execute_async_v3(stream)
+                self.graph = _cuda_assert(cudart.cudaStreamEndCapture(stream))
+
+                from cuda import nvrtc
+
+                result, major, minor = nvrtc.nvrtcVersion()
+                assert result == nvrtc.nvrtcResult(0)
+                if major < 12:
+                    self.cuda_graph_instance = _cuda_assert(
+                        cudart.cudaGraphInstantiate(self.graph, b"", 0)
+                    )  # cuda < 12
+                else:
+                    self.cuda_graph_instance = _cuda_assert(cudart.cudaGraphInstantiate(self.graph, 0))  # cuda >= 12
+        else:
+            noerror = self.context.execute_async_v3(stream)
+            if not noerror:
+                raise ValueError("ERROR: inference failed.")
+
+        return self.tensors
+
+
+class TensorrtEngineBuilder(EngineBuilder):
+    """
+    Helper class to hide the detail of TensorRT Engine from pipeline.
+    """
+
+    def __init__(
+        self,
+        pipeline_info: PipelineInfo,
+        max_batch_size=16,
+        hf_token=None,
+        device="cuda",
+        use_cuda_graph=False,
+    ):
+        """
+        Initializes the ONNX Runtime TensorRT ExecutionProvider Engine Builder.
+
+        Args:
+            pipeline_info (PipelineInfo):
+                Version and Type of pipeline.
+            max_batch_size (int):
+                Maximum batch size for dynamic batch engine.
+            hf_token (str):
+                HuggingFace User Access Token to use for downloading Stable Diffusion model checkpoints.
+            device (str):
+                device to run.
+            use_cuda_graph (bool):
+                Use CUDA graph to capture engine execution and then launch inference
+        """
+        super().__init__(
+            EngineType.TRT,
+            pipeline_info,
+            max_batch_size=max_batch_size,
+            hf_token=hf_token,
+            device=device,
+            use_cuda_graph=use_cuda_graph,
+        )
+
+        self.stream = None
+        self.shared_device_memory = None
+
+    def load_resources(self, image_height, image_width, batch_size):
+        super().load_resources(image_height, image_width, batch_size)
+
+        self.stream = _cuda_assert(cudart.cudaStreamCreate())
+
+    def teardown(self):
+        super().teardown()
+
+        if self.shared_device_memory:
+            cudart.cudaFree(self.shared_device_memory)
+
+        cudart.cudaStreamDestroy(self.stream)
+        del self.stream
+
+    def load_engines(
+        self,
+        engine_dir,
+        framework_model_dir,
+        onnx_dir,
+        onnx_opset,
+        opt_batch_size,
+        opt_image_height,
+        opt_image_width,
+        force_export=False,
+        force_optimize=False,
+        force_build=False,
+        static_batch=False,
+        static_shape=True,
+        enable_refit=False,
+        enable_preview=False,
+        enable_all_tactics=False,
+        timing_cache=None,
+        onnx_refit_dir=None,
+    ):
+        """
+        Build and load engines for TensorRT accelerated inference.
+        Export ONNX models first, if applicable.
+
+        Args:
+            engine_dir (str):
+                Directory to write the TensorRT engines.
+            framework_model_dir (str):
+                Directory to write the framework model ckpt.
+            onnx_dir (str):
+                Directory to write the ONNX models.
+            onnx_opset (int):
+                ONNX opset version to export the models.
+            opt_batch_size (int):
+                Batch size to optimize for during engine building.
+            opt_image_height (int):
+                Image height to optimize for during engine building. Must be a multiple of 8.
+            opt_image_width (int):
+                Image width to optimize for during engine building. Must be a multiple of 8.
+            force_export (bool):
+                Force re-exporting the ONNX models.
+            force_optimize (bool):
+                Force re-optimizing the ONNX models.
+            force_build (bool):
+                Force re-building the TensorRT engine.
+            static_batch (bool):
+                Build engine only for specified opt_batch_size.
+            static_shape (bool):
+                Build engine only for specified opt_image_height & opt_image_width. Default = True.
+            enable_refit (bool):
+                Build engines with refit option enabled.
+            enable_preview (bool):
+                Enable TensorRT preview features.
+            enable_all_tactics (bool):
+                Enable all tactic sources during TensorRT engine builds.
+            timing_cache (str):
+                Path to the timing cache to accelerate build or None
+            onnx_refit_dir (str):
+                Directory containing refit ONNX models.
+        """
+        # Create directory
+        for directory in [engine_dir, onnx_dir]:
+            if not os.path.exists(directory):
+                print(f"[I] Create directory: {directory}")
+                pathlib.Path(directory).mkdir(parents=True)
+
+        self.load_models(framework_model_dir)
+
+        # Load lora only when we need export text encoder or UNet to ONNX.
+        load_lora = False
+        if self.pipeline_info.lora_weights:
+            for model_name, model_obj in self.models.items():
+                if model_name not in ["clip", "clip2", "unet", "unetxl"]:
+                    continue
+                profile_id = model_obj.get_profile_id(
+                    opt_batch_size, opt_image_height, opt_image_width, static_batch, static_shape
+                )
+                engine_path = self.get_engine_path(engine_dir, model_name, profile_id)
+                if force_export or force_build or not os.path.exists(engine_path):
+                    onnx_path = self.get_onnx_path(model_name, onnx_dir, opt=False)
+                    onnx_opt_path = self.get_onnx_path(model_name, onnx_dir, opt=True)
+                    if force_export or not os.path.exists(onnx_opt_path):
+                        if force_export or not os.path.exists(onnx_path):
+                            load_lora = True
+                            break
+
+        # Export models to ONNX
+        self.disable_torch_spda()
+        pipe = self.load_pipeline_with_lora() if load_lora else None
+
+        for model_name, model_obj in self.models.items():
+            if model_name == "vae" and self.vae_torch_fallback:
+                continue
+            profile_id = model_obj.get_profile_id(
+                opt_batch_size, opt_image_height, opt_image_width, static_batch, static_shape
+            )
+            engine_path = self.get_engine_path(engine_dir, model_name, profile_id)
+            if force_export or force_build or not os.path.exists(engine_path):
+                onnx_path = self.get_onnx_path(model_name, onnx_dir, opt=False)
+                onnx_opt_path = self.get_onnx_path(model_name, onnx_dir, opt=True)
+                if force_export or not os.path.exists(onnx_opt_path):
+                    if force_export or not os.path.exists(onnx_path):
+                        print(f"Exporting model: {onnx_path}")
+                        model = self.get_or_load_model(pipe, model_name, model_obj, framework_model_dir)
+
+                        with torch.inference_mode(), torch.autocast("cuda"):
+                            inputs = model_obj.get_sample_input(1, opt_image_height, opt_image_width)
+                            torch.onnx.export(
+                                model,
+                                inputs,
+                                onnx_path,
+                                export_params=True,
+                                opset_version=onnx_opset,
+                                do_constant_folding=True,
+                                input_names=model_obj.get_input_names(),
+                                output_names=model_obj.get_output_names(),
+                                dynamic_axes=model_obj.get_dynamic_axes(),
+                            )
+                        del model
+                        torch.cuda.empty_cache()
+                        gc.collect()
+                    else:
+                        print(f"Found cached model: {onnx_path}")
+
+                    # Optimize onnx
+                    if force_optimize or not os.path.exists(onnx_opt_path):
+                        print(f"Generating optimizing model: {onnx_opt_path}")
+                        model_obj.optimize_trt(onnx_path, onnx_opt_path)
+                    else:
+                        print(f"Found cached optimized model: {onnx_opt_path} ")
+        self.enable_torch_spda()
+
+        # Build TensorRT engines
+        for model_name, model_obj in self.models.items():
+            if model_name == "vae" and self.vae_torch_fallback:
+                continue
+            profile_id = model_obj.get_profile_id(
+                opt_batch_size, opt_image_height, opt_image_width, static_batch, static_shape
+            )
+            engine_path = self.get_engine_path(engine_dir, model_name, profile_id)
+            engine = TensorrtEngine(engine_path)
+            onnx_opt_path = self.get_onnx_path(model_name, onnx_dir, opt=True)
+
+            if force_build or not os.path.exists(engine.engine_path):
+                engine.build(
+                    onnx_opt_path,
+                    fp16=True,
+                    input_profile=model_obj.get_input_profile(
+                        opt_batch_size,
+                        opt_image_height,
+                        opt_image_width,
+                        static_batch,
+                        static_shape,
+                    ),
+                    enable_refit=enable_refit,
+                    enable_preview=enable_preview,
+                    enable_all_tactics=enable_all_tactics,
+                    timing_cache=timing_cache,
+                    update_output_names=None,
+                )
+            self.engines[model_name] = engine
+
+        # Load TensorRT engines
+        for model_name in self.models:
+            if model_name == "vae" and self.vae_torch_fallback:
+                continue
+            self.engines[model_name].load()
+            if onnx_refit_dir:
+                onnx_refit_path = self.get_onnx_path(model_name, onnx_refit_dir, opt=True)
+                if os.path.exists(onnx_refit_path):
+                    self.engines[model_name].refit(onnx_opt_path, onnx_refit_path)
+
+    def max_device_memory(self):
+        max_device_memory = 0
+        for engine in self.engines.values():
+            max_device_memory = max(max_device_memory, engine.engine.device_memory_size)
+        return max_device_memory
+
+    def activate_engines(self, shared_device_memory=None):
+        if shared_device_memory is None:
+            max_device_memory = self.max_device_memory()
+            _, shared_device_memory = cudart.cudaMalloc(max_device_memory)
+        self.shared_device_memory = shared_device_memory
+        # Load and activate TensorRT engines
+        for engine in self.engines.values():
+            engine.activate(reuse_device_memory=self.shared_device_memory)
+
+    def run_engine(self, model_name, feed_dict):
+        return self.engines[model_name].infer(feed_dict, self.stream, use_cuda_graph=self.use_cuda_graph)
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/models.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/models.py
deleted file mode 100644
index 0f7688a3df9f6..0000000000000
--- a/onnxruntime/python/tools/transformers/models/stable_diffusion/models.py
+++ /dev/null
@@ -1,368 +0,0 @@
-# -------------------------------------------------------------------------
-# Copyright (c) Microsoft Corporation.  All rights reserved.
-# Licensed under the MIT License.
-# --------------------------------------------------------------------------
-#
-# Copyright 2023 The HuggingFace Inc. team.
-# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Models used in Stable diffusion.
-"""
-import logging
-
-import onnx
-import onnx_graphsurgeon as gs
-import torch
-from onnx import shape_inference
-from ort_optimizer import OrtStableDiffusionOptimizer
-from polygraphy.backend.onnx.loader import fold_constants
-
-logger = logging.getLogger(__name__)
-
-
-class TrtOptimizer:
-    def __init__(self, onnx_graph):
-        self.graph = gs.import_onnx(onnx_graph)
-
-    def cleanup(self):
-        self.graph.cleanup().toposort()
-
-    def get_optimized_onnx_graph(self):
-        return gs.export_onnx(self.graph)
-
-    def select_outputs(self, keep, names=None):
-        self.graph.outputs = [self.graph.outputs[o] for o in keep]
-        if names:
-            for i, name in enumerate(names):
-                self.graph.outputs[i].name = name
-
-    def fold_constants(self):
-        onnx_graph = fold_constants(gs.export_onnx(self.graph), allow_onnxruntime_shape_inference=True)
-        self.graph = gs.import_onnx(onnx_graph)
-
-    def infer_shapes(self):
-        onnx_graph = gs.export_onnx(self.graph)
-        if onnx_graph.ByteSize() > 2147483648:
-            raise TypeError("ERROR: model size exceeds supported 2GB limit")
-        else:
-            onnx_graph = shape_inference.infer_shapes(onnx_graph)
-
-        self.graph = gs.import_onnx(onnx_graph)
-
-
-class BaseModel:
-    def __init__(self, model, name, device="cuda", fp16=False, max_batch_size=16, embedding_dim=768, text_maxlen=77):
-        self.model = model
-        self.name = name
-        self.fp16 = fp16
-        self.device = device
-
-        self.min_batch = 1
-        self.max_batch = max_batch_size
-        self.min_image_shape = 256  # min image resolution: 256x256
-        self.max_image_shape = 1024  # max image resolution: 1024x1024
-        self.min_latent_shape = self.min_image_shape // 8
-        self.max_latent_shape = self.max_image_shape // 8
-
-        self.embedding_dim = embedding_dim
-        self.text_maxlen = text_maxlen
-
-        self.model_type = name.lower() if name in ["CLIP", "UNet"] else "vae"
-        self.ort_optimizer = OrtStableDiffusionOptimizer(self.model_type)
-
-    def get_model(self):
-        return self.model
-
-    def get_input_names(self):
-        pass
-
-    def get_output_names(self):
-        pass
-
-    def get_dynamic_axes(self):
-        return None
-
-    def get_sample_input(self, batch_size, image_height, image_width):
-        pass
-
-    def get_profile_id(self, batch_size, image_height, image_width, static_batch, static_image_shape):
-        """For TensorRT EP"""
-        (
-            min_batch,
-            max_batch,
-            min_image_height,
-            max_image_height,
-            min_image_width,
-            max_image_width,
-            _,
-            _,
-            _,
-            _,
-        ) = self.get_minmax_dims(batch_size, image_height, image_width, static_batch, static_image_shape)
-
-        profile_id = f"_b_{batch_size}" if static_batch else f"_b_{min_batch}_{max_batch}"
-
-        if self.name != "CLIP":
-            if static_image_shape:
-                profile_id += f"_h_{image_height}_w_{image_width}"
-            else:
-                profile_id += f"_h_{min_image_height}_{max_image_height}_w_{min_image_width}_{max_image_width}"
-
-        return profile_id
-
-    def get_input_profile(self, batch_size, image_height, image_width, static_batch, static_image_shape):
-        """For TensorRT"""
-        return None
-
-    def get_shape_dict(self, batch_size, image_height, image_width):
-        return None
-
-    def optimize_ort(self, input_onnx_path, optimized_onnx_path, to_fp16=True):
-        self.ort_optimizer.optimize(input_onnx_path, optimized_onnx_path, to_fp16)
-
-    def optimize_trt(self, input_onnx_path, optimized_onnx_path):
-        onnx_graph = onnx.load(input_onnx_path)
-        opt = TrtOptimizer(onnx_graph)
-        opt.cleanup()
-        opt.fold_constants()
-        opt.infer_shapes()
-        opt.cleanup()
-        onnx_opt_graph = opt.get_optimized_onnx_graph()
-        onnx.save(onnx_opt_graph, optimized_onnx_path)
-
-    def check_dims(self, batch_size, image_height, image_width):
-        assert batch_size >= self.min_batch and batch_size <= self.max_batch
-        assert image_height % 8 == 0 or image_width % 8 == 0
-        latent_height = image_height // 8
-        latent_width = image_width // 8
-        assert latent_height >= self.min_latent_shape and latent_height <= self.max_latent_shape
-        assert latent_width >= self.min_latent_shape and latent_width <= self.max_latent_shape
-        return (latent_height, latent_width)
-
-    def get_minmax_dims(self, batch_size, image_height, image_width, static_batch, static_image_shape):
-        min_batch = batch_size if static_batch else self.min_batch
-        max_batch = batch_size if static_batch else self.max_batch
-        latent_height = image_height // 8
-        latent_width = image_width // 8
-        min_image_height = image_height if static_image_shape else self.min_image_shape
-        max_image_height = image_height if static_image_shape else self.max_image_shape
-        min_image_width = image_width if static_image_shape else self.min_image_shape
-        max_image_width = image_width if static_image_shape else self.max_image_shape
-        min_latent_height = latent_height if static_image_shape else self.min_latent_shape
-        max_latent_height = latent_height if static_image_shape else self.max_latent_shape
-        min_latent_width = latent_width if static_image_shape else self.min_latent_shape
-        max_latent_width = latent_width if static_image_shape else self.max_latent_shape
-        return (
-            min_batch,
-            max_batch,
-            min_image_height,
-            max_image_height,
-            min_image_width,
-            max_image_width,
-            min_latent_height,
-            max_latent_height,
-            min_latent_width,
-            max_latent_width,
-        )
-
-
-class CLIP(BaseModel):
-    def __init__(self, model, device, max_batch_size, embedding_dim):
-        super().__init__(
-            model=model,
-            name="CLIP",
-            device=device,
-            max_batch_size=max_batch_size,
-            embedding_dim=embedding_dim,
-        )
-
-    def get_input_names(self):
-        return ["input_ids"]
-
-    def get_output_names(self):
-        return ["text_embeddings"]
-
-    def get_dynamic_axes(self):
-        return {"input_ids": {0: "B"}, "text_embeddings": {0: "B"}}
-
-    def get_input_profile(self, batch_size, image_height, image_width, static_batch, static_image_shape):
-        self.check_dims(batch_size, image_height, image_width)
-        min_batch, max_batch, _, _, _, _, _, _, _, _ = self.get_minmax_dims(
-            batch_size, image_height, image_width, static_batch, static_image_shape
-        )
-        return {
-            "input_ids": [(min_batch, self.text_maxlen), (batch_size, self.text_maxlen), (max_batch, self.text_maxlen)]
-        }
-
-    def get_shape_dict(self, batch_size, image_height, image_width):
-        self.check_dims(batch_size, image_height, image_width)
-        return {
-            "input_ids": (batch_size, self.text_maxlen),
-            "text_embeddings": (batch_size, self.text_maxlen, self.embedding_dim),
-        }
-
-    def get_sample_input(self, batch_size, image_height, image_width):
-        self.check_dims(batch_size, image_height, image_width)
-        return torch.zeros(batch_size, self.text_maxlen, dtype=torch.int32, device=self.device)
-
-    def optimize_trt(self, input_onnx_path, optimized_onnx_path):
-        onnx_graph = onnx.load(input_onnx_path)
-        opt = TrtOptimizer(onnx_graph)
-        opt.select_outputs([0])  # delete graph output#1
-        opt.cleanup()
-        opt.fold_constants()
-        opt.infer_shapes()
-        opt.select_outputs([0], names=["text_embeddings"])  # rename network output
-        opt.cleanup()
-        onnx_opt_graph = opt.get_optimized_onnx_graph()
-        onnx.save(onnx_opt_graph, optimized_onnx_path)
-
-
-class UNet(BaseModel):
-    def __init__(
-        self,
-        model,
-        device="cuda",
-        fp16=False,  # used by TRT
-        max_batch_size=16,
-        embedding_dim=768,
-        text_maxlen=77,
-        unet_dim=4,
-    ):
-        super().__init__(
-            model=model,
-            name="UNet",
-            device=device,
-            fp16=fp16,
-            max_batch_size=max_batch_size,
-            embedding_dim=embedding_dim,
-            text_maxlen=text_maxlen,
-        )
-        self.unet_dim = unet_dim
-
-    def get_input_names(self):
-        return ["sample", "timestep", "encoder_hidden_states"]
-
-    def get_output_names(self):
-        return ["latent"]
-
-    def get_dynamic_axes(self):
-        return {
-            "sample": {0: "2B", 2: "H", 3: "W"},
-            "encoder_hidden_states": {0: "2B"},
-            "latent": {0: "2B", 2: "H", 3: "W"},
-        }
-
-    def get_input_profile(self, batch_size, image_height, image_width, static_batch, static_image_shape):
-        latent_height, latent_width = self.check_dims(batch_size, image_height, image_width)
-        (
-            min_batch,
-            max_batch,
-            _,
-            _,
-            _,
-            _,
-            min_latent_height,
-            max_latent_height,
-            min_latent_width,
-            max_latent_width,
-        ) = self.get_minmax_dims(batch_size, image_height, image_width, static_batch, static_image_shape)
-        return {
-            "sample": [
-                (2 * min_batch, self.unet_dim, min_latent_height, min_latent_width),
-                (2 * batch_size, self.unet_dim, latent_height, latent_width),
-                (2 * max_batch, self.unet_dim, max_latent_height, max_latent_width),
-            ],
-            "encoder_hidden_states": [
-                (2 * min_batch, self.text_maxlen, self.embedding_dim),
-                (2 * batch_size, self.text_maxlen, self.embedding_dim),
-                (2 * max_batch, self.text_maxlen, self.embedding_dim),
-            ],
-        }
-
-    def get_shape_dict(self, batch_size, image_height, image_width):
-        latent_height, latent_width = self.check_dims(batch_size, image_height, image_width)
-        return {
-            "sample": (2 * batch_size, self.unet_dim, latent_height, latent_width),
-            "timestep": [1],
-            "encoder_hidden_states": (2 * batch_size, self.text_maxlen, self.embedding_dim),
-            "latent": (2 * batch_size, 4, latent_height, latent_width),
-        }
-
-    def get_sample_input(self, batch_size, image_height, image_width):
-        latent_height, latent_width = self.check_dims(batch_size, image_height, image_width)
-        dtype = torch.float16 if self.fp16 else torch.float32
-        return (
-            torch.randn(
-                2 * batch_size, self.unet_dim, latent_height, latent_width, dtype=torch.float32, device=self.device
-            ),
-            torch.tensor([1.0], dtype=torch.float32, device=self.device),
-            torch.randn(2 * batch_size, self.text_maxlen, self.embedding_dim, dtype=dtype, device=self.device),
-        )
-
-
-class VAE(BaseModel):
-    def __init__(self, model, device, max_batch_size, embedding_dim):
-        super().__init__(
-            model=model,
-            name="VAE Decoder",
-            device=device,
-            max_batch_size=max_batch_size,
-            embedding_dim=embedding_dim,
-        )
-
-    def get_input_names(self):
-        return ["latent"]
-
-    def get_output_names(self):
-        return ["images"]
-
-    def get_dynamic_axes(self):
-        return {"latent": {0: "B", 2: "H", 3: "W"}, "images": {0: "B", 2: "8H", 3: "8W"}}
-
-    def get_input_profile(self, batch_size, image_height, image_width, static_batch, static_image_shape):
-        latent_height, latent_width = self.check_dims(batch_size, image_height, image_width)
-        (
-            min_batch,
-            max_batch,
-            _,
-            _,
-            _,
-            _,
-            min_latent_height,
-            max_latent_height,
-            min_latent_width,
-            max_latent_width,
-        ) = self.get_minmax_dims(batch_size, image_height, image_width, static_batch, static_image_shape)
-        return {
-            "latent": [
-                (min_batch, 4, min_latent_height, min_latent_width),
-                (batch_size, 4, latent_height, latent_width),
-                (max_batch, 4, max_latent_height, max_latent_width),
-            ]
-        }
-
-    def get_shape_dict(self, batch_size, image_height, image_width):
-        latent_height, latent_width = self.check_dims(batch_size, image_height, image_width)
-        return {
-            "latent": (batch_size, 4, latent_height, latent_width),
-            "images": (batch_size, 3, image_height, image_width),
-        }
-
-    def get_sample_input(self, batch_size, image_height, image_width):
-        latent_height, latent_width = self.check_dims(batch_size, image_height, image_width)
-        return torch.randn(batch_size, 4, latent_height, latent_width, dtype=torch.float32, device=self.device)
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/onnxruntime_cuda_txt2img.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/onnxruntime_cuda_txt2img.py
index 6134fa7bddcf4..37785869a355b 100644
--- a/onnxruntime/python/tools/transformers/models/stable_diffusion/onnxruntime_cuda_txt2img.py
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/onnxruntime_cuda_txt2img.py
@@ -43,16 +43,14 @@
     StableDiffusionSafetyChecker,
 )
 from diffusers.schedulers import DDIMScheduler
-from diffusers.utils import DIFFUSERS_CACHE
-from huggingface_hub import snapshot_download
-from models import CLIP, VAE, UNet
-from ort_utils import Engines
+from diffusion_models import CLIP, VAE, PipelineInfo, UNet
+from ort_utils import Engines, StableDiffusionPipelineMixin
 from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
 
 logger = logging.getLogger(__name__)
 
 
-class OnnxruntimeCudaStableDiffusionPipeline(StableDiffusionPipeline):
+class OnnxruntimeCudaStableDiffusionPipeline(StableDiffusionPipelineMixin, StableDiffusionPipeline):
     r"""
     Pipeline for text-to-image generation using CUDA provider in ONNX Runtime.
     This pipeline inherits from [`StableDiffusionPipeline`]. Check the documentation in super class for most parameters.
@@ -70,11 +68,12 @@ def __init__(
         requires_safety_checker: bool = True,
         # ONNX export parameters
         onnx_opset: int = 14,
-        onnx_dir: str = "raw_onnx",
+        onnx_dir: str = "onnx_ort",
         # Onnxruntime execution provider parameters
-        engine_dir: str = "onnxruntime_optimized_onnx",
+        engine_dir: str = "ORT_CUDA",
         force_engine_rebuild: bool = False,
         enable_cuda_graph: bool = False,
+        pipeline_info: PipelineInfo = None,
     ):
         super().__init__(
             vae, text_encoder, tokenizer, unet, scheduler, safety_checker, feature_extractor, requires_safety_checker
@@ -96,51 +95,38 @@ def __init__(
 
         self.fp16 = False
 
-    def __load_models(self):
-        self.embedding_dim = self.text_encoder.config.hidden_size
+        self.pipeline_info = pipeline_info
 
-        self.models["clip"] = CLIP(
-            self.text_encoder,
-            device=self.torch_device,
-            max_batch_size=self.max_batch_size,
-            embedding_dim=self.embedding_dim,
-        )
+    def load_models(self):
+        assert self.pipeline_info.clip_embedding_dim() == self.text_encoder.config.hidden_size
 
-        self.models["unet"] = UNet(
-            self.unet,
-            device=self.torch_device,
-            fp16=self.fp16,
-            max_batch_size=self.max_batch_size,
-            embedding_dim=self.embedding_dim,
-            unet_dim=(9 if self.inpaint else 4),
-        )
+        stages = self.pipeline_info.stages()
+        if "clip" in stages:
+            self.models["clip"] = CLIP(
+                self.pipeline_info,
+                self.text_encoder,
+                device=self.torch_device,
+                max_batch_size=self.max_batch_size,
+                clip_skip=0,
+            )
 
-        self.models["vae"] = VAE(
-            self.vae, device=self.torch_device, max_batch_size=self.max_batch_size, embedding_dim=self.embedding_dim
-        )
+        if "unet" in stages:
+            self.models["unet"] = UNet(
+                self.pipeline_info,
+                self.unet,
+                device=self.torch_device,
+                fp16=False,
+                max_batch_size=self.max_batch_size,
+                unet_dim=(9 if self.pipeline_info.is_inpaint() else 4),
+            )
 
-    @classmethod
-    def set_cached_folder(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs):
-        cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
-        resume_download = kwargs.pop("resume_download", False)
-        proxies = kwargs.pop("proxies", None)
-        local_files_only = kwargs.pop("local_files_only", False)
-        use_auth_token = kwargs.pop("use_auth_token", None)
-        revision = kwargs.pop("revision", None)
-
-        cls.cached_folder = (
-            pretrained_model_name_or_path
-            if os.path.isdir(pretrained_model_name_or_path)
-            else snapshot_download(
-                pretrained_model_name_or_path,
-                cache_dir=cache_dir,
-                resume_download=resume_download,
-                proxies=proxies,
-                local_files_only=local_files_only,
-                use_auth_token=use_auth_token,
-                revision=revision,
+        if "vae" in stages:
+            self.models["vae"] = VAE(
+                self.pipeline_info,
+                self.vae,
+                device=self.torch_device,
+                max_batch_size=self.max_batch_size,
             )
-        )
 
     def to(
         self,
@@ -156,7 +142,7 @@ def to(
 
         # load models
         self.fp16 = torch_dtype == torch.float16
-        self.__load_models()
+        self.load_models()
 
         # build engines
         self.engines.build(
@@ -180,88 +166,6 @@ def to(
 
         return self
 
-    def __encode_prompt(self, prompt, negative_prompt):
-        r"""
-        Encodes the prompt into text encoder hidden states.
-
-        Args:
-             prompt (`str` or `List[str]`, *optional*):
-                prompt to be encoded
-            negative_prompt (`str` or `List[str]`, *optional*):
-                The prompt or prompts not to guide the image generation. If not defined, one has to pass
-                `negative_prompt_embeds`. instead. If not defined, one has to pass `negative_prompt_embeds`. instead.
-                Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`).
-        """
-        # Tokenize prompt
-        text_input_ids = (
-            self.tokenizer(
-                prompt,
-                padding="max_length",
-                max_length=self.tokenizer.model_max_length,
-                truncation=True,
-                return_tensors="pt",
-            )
-            .input_ids.type(torch.int32)
-            .to(self.torch_device)
-        )
-
-        # NOTE: output tensor for CLIP must be cloned because it will be overwritten when called again for negative prompt
-        text_embeddings = (
-            self.engines.get_engine("clip").infer({"input_ids": text_input_ids})["text_embeddings"].clone()
-        )
-
-        # Tokenize negative prompt
-        uncond_input_ids = (
-            self.tokenizer(
-                negative_prompt,
-                padding="max_length",
-                max_length=self.tokenizer.model_max_length,
-                truncation=True,
-                return_tensors="pt",
-            )
-            .input_ids.type(torch.int32)
-            .to(self.torch_device)
-        )
-
-        uncond_embeddings = self.engines.get_engine("clip").infer({"input_ids": uncond_input_ids})["text_embeddings"]
-
-        # Concatenate the unconditional and text embeddings into a single batch to avoid doing two forward passes for classifier free guidance
-        text_embeddings = torch.cat([uncond_embeddings, text_embeddings]).to(dtype=torch.float16)
-
-        return text_embeddings
-
-    def __denoise_latent(self, latents, text_embeddings, timesteps=None, mask=None, masked_image_latents=None):
-        if not isinstance(timesteps, torch.Tensor):
-            timesteps = self.scheduler.timesteps
-
-        for _step_index, timestep in enumerate(timesteps):
-            # Expand the latents if we are doing classifier free guidance
-            latent_model_input = torch.cat([latents] * 2)
-            latent_model_input = self.scheduler.scale_model_input(latent_model_input, timestep)
-            if isinstance(mask, torch.Tensor):
-                latent_model_input = torch.cat([latent_model_input, mask, masked_image_latents], dim=1)
-
-            timestep_float = timestep.to(torch.float16) if self.fp16 else timestep.to(torch.float32)
-
-            # Predict the noise residual
-            noise_pred = self.engines.get_engine("unet").infer(
-                {"sample": latent_model_input, "timestep": timestep_float, "encoder_hidden_states": text_embeddings},
-            )["latent"]
-
-            # Perform guidance
-            noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
-            noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
-
-            latents = self.scheduler.step(noise_pred, timestep, latents).prev_sample
-
-        latents = 1.0 / 0.18215 * latents
-        return latents
-
-    def __decode_latent(self, latents):
-        images = self.engines.get_engine("vae").infer({"latent": latents})["images"]
-        images = (images / 2 + 0.5).clamp(0, 1)
-        return images.cpu().permute(0, 2, 3, 1).float().numpy()
-
     def __allocate_buffers(self, image_height, image_width, batch_size):
         # Allocate output tensors for I/O bindings
         for model_name, obj in self.models.items():
@@ -337,7 +241,7 @@ def __call__(
 
         with torch.inference_mode(), torch.autocast("cuda"):
             # CLIP text encoder
-            text_embeddings = self.__encode_prompt(prompt, negative_prompt)
+            text_embeddings = self.encode_prompt(self.engines.get_engine("clip"), prompt, negative_prompt)
 
             # Pre-initialize latents
             num_channels_latents = self.unet_in_channels
@@ -352,30 +256,37 @@ def __call__(
             )
 
             # UNet denoiser
-            latents = self.__denoise_latent(latents, text_embeddings)
+            latents = self.denoise_latent(
+                self.engines.get_engine("unet"), latents, text_embeddings, timestep_fp16=self.fp16
+            )
 
             # VAE decode latent
-            images = self.__decode_latent(latents)
+            images = self.decode_latent(self.engines.get_engine("vae"), latents)
 
         images, has_nsfw_concept = self.run_safety_checker(images, self.torch_device, text_embeddings.dtype)
         images = self.numpy_to_pil(images)
         return StableDiffusionPipelineOutput(images=images, nsfw_content_detected=has_nsfw_concept)
 
 
-if __name__ == "__main__":
-    model_name_or_path = "runwayml/stable-diffusion-v1-5"
+def example():
+    pipeline_info = PipelineInfo("1.5")
+    model_name_or_path = pipeline_info.name()
     scheduler = DDIMScheduler.from_pretrained(model_name_or_path, subfolder="scheduler")
-
     pipe = OnnxruntimeCudaStableDiffusionPipeline.from_pretrained(
         model_name_or_path,
         scheduler=scheduler,
+        pipeline_info=pipeline_info,
     )
 
     # re-use cached folder to save ONNX models
-    pipe.set_cached_folder(model_name_or_path)
+    pipe.set_cached_folder(model_name_or_path, resume_download=True, local_files_only=True)
 
     pipe = pipe.to("cuda", torch_dtype=torch.float16)
 
     prompt = "photorealistic new zealand hills"
     image = pipe(prompt).images[0]
-    image.save("ort_trt_txt2img_new_zealand_hills.png")
+    image.save("ort_cuda_txt2img_new_zealand_hills.png")
+
+
+if __name__ == "__main__":
+    example()
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/onnxruntime_tensorrt_txt2img.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/onnxruntime_tensorrt_txt2img.py
index 6f3c215f36318..c663e37c7ea7d 100644
--- a/onnxruntime/python/tools/transformers/models/stable_diffusion/onnxruntime_tensorrt_txt2img.py
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/onnxruntime_tensorrt_txt2img.py
@@ -32,13 +32,11 @@
 pip install onnxruntime-gpu
 """
 
-import gc
+import logging
 import os
-import shutil
 from typing import List, Optional, Union
 
 import torch
-from cuda import cudart
 from diffusers.models import AutoencoderKL, UNet2DConditionModel
 from diffusers.pipelines.stable_diffusion import (
     StableDiffusionPipeline,
@@ -46,224 +44,15 @@
     StableDiffusionSafetyChecker,
 )
 from diffusers.schedulers import DDIMScheduler
-from diffusers.utils import DIFFUSERS_CACHE, logging
-from huggingface_hub import snapshot_download
-from models import CLIP, VAE, UNet
-from ort_utils import OrtCudaSession
+from diffusion_models import PipelineInfo
+from engine_builder_ort_trt import OrtTensorrtEngineBuilder
+from ort_utils import StableDiffusionPipelineMixin
 from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
 
-import onnxruntime as ort
+logger = logging.getLogger(__name__)
 
-logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
 
-
-class Engine(OrtCudaSession):
-    def __init__(self, engine_path, device_id, onnx_path, fp16, input_profile, workspace_size, enable_cuda_graph):
-        self.engine_path = engine_path
-        self.ort_trt_provider_options = self.get_tensorrt_provider_options(
-            input_profile,
-            workspace_size,
-            fp16,
-            device_id,
-            enable_cuda_graph,
-        )
-
-        sess_options = ort.SessionOptions()
-        sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL
-        ort_session = ort.InferenceSession(
-            onnx_path,
-            sess_options,
-            providers=[
-                ("TensorrtExecutionProvider", self.ort_trt_provider_options),
-            ],
-        )
-
-        device = torch.device("cuda", device_id)
-        super().__init__(ort_session, device, enable_cuda_graph)
-
-    def get_tensorrt_provider_options(self, input_profile, workspace_size, fp16, device_id, enable_cuda_graph):
-        trt_ep_options = {
-            "device_id": device_id,
-            "trt_fp16_enable": fp16,
-            "trt_engine_cache_enable": True,
-            "trt_timing_cache_enable": True,
-            "trt_detailed_build_log": True,
-            "trt_engine_cache_path": self.engine_path,
-        }
-
-        if enable_cuda_graph:
-            trt_ep_options["trt_cuda_graph_enable"] = True
-
-        if workspace_size > 0:
-            trt_ep_options["trt_max_workspace_size"] = workspace_size
-
-        if input_profile:
-            min_shapes = []
-            max_shapes = []
-            opt_shapes = []
-            for name, profile in input_profile.items():
-                assert isinstance(profile, list) and len(profile) == 3
-                min_shape = profile[0]
-                opt_shape = profile[1]
-                max_shape = profile[2]
-                assert len(min_shape) == len(opt_shape) and len(opt_shape) == len(max_shape)
-
-                min_shapes.append(f"{name}:" + "x".join([str(x) for x in min_shape]))
-                opt_shapes.append(f"{name}:" + "x".join([str(x) for x in opt_shape]))
-                max_shapes.append(f"{name}:" + "x".join([str(x) for x in max_shape]))
-
-            trt_ep_options["trt_profile_min_shapes"] = ",".join(min_shapes)
-            trt_ep_options["trt_profile_max_shapes"] = ",".join(max_shapes)
-            trt_ep_options["trt_profile_opt_shapes"] = ",".join(opt_shapes)
-
-        logger.info("trt_ep_options=%s", trt_ep_options)
-
-        return trt_ep_options
-
-
-def get_onnx_path(model_name, onnx_dir, opt=True):
-    return os.path.join(onnx_dir, model_name + (".opt" if opt else "") + ".onnx")
-
-
-def get_engine_path(engine_dir, model_name, profile_id):
-    return os.path.join(engine_dir, model_name + profile_id)
-
-
-def has_engine_file(engine_path):
-    if os.path.isdir(engine_path):
-        children = os.scandir(engine_path)
-        for entry in children:
-            if entry.is_file() and entry.name.endswith(".engine"):
-                return True
-    return False
-
-
-def get_work_space_size(model_name, max_workspace_size):
-    gibibyte = 2**30
-    workspace_size = 4 * gibibyte if model_name == "clip" else max_workspace_size
-    if workspace_size == 0:
-        _, free_mem, _ = cudart.cudaMemGetInfo()
-        # The following logic are adopted from TensorRT demo diffusion.
-        if free_mem > 6 * gibibyte:
-            workspace_size = free_mem - 4 * gibibyte
-    return workspace_size
-
-
-def build_engines(
-    models,
-    engine_dir,
-    onnx_dir,
-    onnx_opset,
-    opt_image_height,
-    opt_image_width,
-    opt_batch_size=1,
-    force_engine_rebuild=False,
-    static_batch=False,
-    static_image_shape=True,
-    max_workspace_size=0,
-    device_id=0,
-    enable_cuda_graph=False,
-):
-    if force_engine_rebuild:
-        if os.path.isdir(onnx_dir):
-            logger.info("Remove existing directory %s since force_engine_rebuild is enabled", onnx_dir)
-            shutil.rmtree(onnx_dir)
-        if os.path.isdir(engine_dir):
-            logger.info("Remove existing directory %s since force_engine_rebuild is enabled", engine_dir)
-            shutil.rmtree(engine_dir)
-
-    if not os.path.isdir(engine_dir):
-        os.makedirs(engine_dir)
-
-    if not os.path.isdir(onnx_dir):
-        os.makedirs(onnx_dir)
-
-    # Export models to ONNX
-    for model_name, model_obj in models.items():
-        profile_id = model_obj.get_profile_id(
-            opt_batch_size, opt_image_height, opt_image_width, static_batch, static_image_shape
-        )
-        engine_path = get_engine_path(engine_dir, model_name, profile_id)
-        if not has_engine_file(engine_path):
-            onnx_path = get_onnx_path(model_name, onnx_dir, opt=False)
-            onnx_opt_path = get_onnx_path(model_name, onnx_dir)
-            if not os.path.exists(onnx_opt_path):
-                if not os.path.exists(onnx_path):
-                    logger.info(f"Exporting model: {onnx_path}")
-                    model = model_obj.get_model()
-                    with torch.inference_mode(), torch.autocast("cuda"):
-                        inputs = model_obj.get_sample_input(opt_batch_size, opt_image_height, opt_image_width)
-                        torch.onnx.export(
-                            model,
-                            inputs,
-                            onnx_path,
-                            export_params=True,
-                            opset_version=onnx_opset,
-                            do_constant_folding=True,
-                            input_names=model_obj.get_input_names(),
-                            output_names=model_obj.get_output_names(),
-                            dynamic_axes=model_obj.get_dynamic_axes(),
-                        )
-                    del model
-                    torch.cuda.empty_cache()
-                    gc.collect()
-                else:
-                    logger.info("Found cached model: %s", onnx_path)
-
-                # Optimize onnx
-                if not os.path.exists(onnx_opt_path):
-                    logger.info("Generating optimizing model: %s", onnx_opt_path)
-                    model_obj.optimize_trt(onnx_path, onnx_opt_path)
-                else:
-                    logger.info("Found cached optimized model: %s", onnx_opt_path)
-
-    built_engines = {}
-    for model_name, model_obj in models.items():
-        profile_id = model_obj.get_profile_id(
-            opt_batch_size, opt_image_height, opt_image_width, static_batch, static_image_shape
-        )
-
-        engine_path = get_engine_path(engine_dir, model_name, profile_id)
-        onnx_opt_path = get_onnx_path(model_name, onnx_dir)
-
-        if not has_engine_file(engine_path):
-            logger.info(
-                "Building TensorRT engine for %s from %s to %s. It can take a while to complete...",
-                model_name,
-                onnx_opt_path,
-                engine_path,
-            )
-        else:
-            logger.info("Reuse cached TensorRT engine in directory %s", engine_path)
-
-        input_profile = model_obj.get_input_profile(
-            opt_batch_size,
-            opt_image_height,
-            opt_image_width,
-            static_batch=static_batch,
-            static_image_shape=static_image_shape,
-        )
-
-        engine = Engine(
-            engine_path,
-            device_id,
-            onnx_opt_path,
-            fp16=True,
-            input_profile=input_profile,
-            workspace_size=get_work_space_size(model_name, max_workspace_size),
-            enable_cuda_graph=enable_cuda_graph,
-        )
-
-        built_engines[model_name] = engine
-
-    return built_engines
-
-
-def run_engine(engine, feed_dict):
-    return engine.infer(feed_dict)
-
-
-class OnnxruntimeTensorRTStableDiffusionPipeline(StableDiffusionPipeline):
+class OnnxruntimeTensorRTStableDiffusionPipeline(StableDiffusionPipelineMixin, StableDiffusionPipeline):
     r"""
     Pipeline for text-to-image generation using TensorRT execution provider in ONNX Runtime.
 
@@ -285,11 +74,12 @@ def __init__(
         max_batch_size: int = 16,
         # ONNX export parameters
         onnx_opset: int = 17,
-        onnx_dir: str = "onnx",
+        onnx_dir: str = "onnx_trt",
         # TensorRT engine build parameters
-        engine_dir: str = "onnxruntime_tensorrt_engine",
+        engine_dir: str = "ORT_TRT",  # use short name here to avoid path exceeds 260 chars in Windows.
         force_engine_rebuild: bool = False,
         enable_cuda_graph: bool = False,
+        pipeline_info: Optional[PipelineInfo] = None,
     ):
         super().__init__(
             vae, text_encoder, tokenizer, unet, scheduler, safety_checker, feature_extractor, requires_safety_checker
@@ -299,16 +89,14 @@ def __init__(
 
         self.image_height = image_height
         self.image_width = image_width
-        self.inpaint = False
         self.onnx_opset = onnx_opset
         self.onnx_dir = onnx_dir
         self.engine_dir = engine_dir
         self.force_engine_rebuild = force_engine_rebuild
-        self.enable_cuda_graph = enable_cuda_graph
 
-        # Although cuda graph requires static input shape, engine built with dyamic batch gets better performance in T4.
+        # Although cuda graph requires static input shape, engine built with dynamic batch gets better performance in T4.
         # Use static batch could reduce GPU memory footprint.
-        self.build_static_batch = False
+        self.build_static_batch = enable_cuda_graph
 
         # TODO: support dynamic image shape.
         self.build_dynamic_shape = False
@@ -318,54 +106,13 @@ def __init__(
         if self.build_dynamic_shape or self.image_height > 512 or self.image_width > 512:
             self.max_batch_size = 4
 
-        self.models = {}  # loaded in __load_models()
         self.engines = {}  # loaded in build_engines()
-
-    def __load_models(self):
-        self.embedding_dim = self.text_encoder.config.hidden_size
-
-        self.models["clip"] = CLIP(
-            self.text_encoder,
-            device=self.torch_device,
-            max_batch_size=self.max_batch_size,
-            embedding_dim=self.embedding_dim,
-        )
-
-        self.models["unet"] = UNet(
-            self.unet,
-            device=self.torch_device,
-            fp16=True,
-            max_batch_size=self.max_batch_size,
-            embedding_dim=self.embedding_dim,
-            unet_dim=(9 if self.inpaint else 4),
+        self.engine_builder = OrtTensorrtEngineBuilder(
+            pipeline_info, max_batch_size=max_batch_size, use_cuda_graph=enable_cuda_graph
         )
 
-        self.models["vae"] = VAE(
-            self.vae, device=self.torch_device, max_batch_size=self.max_batch_size, embedding_dim=self.embedding_dim
-        )
-
-    @classmethod
-    def set_cached_folder(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs):
-        cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
-        resume_download = kwargs.pop("resume_download", False)
-        proxies = kwargs.pop("proxies", None)
-        local_files_only = kwargs.pop("local_files_only", False)
-        use_auth_token = kwargs.pop("use_auth_token", None)
-        revision = kwargs.pop("revision", None)
-
-        cls.cached_folder = (
-            pretrained_model_name_or_path
-            if os.path.isdir(pretrained_model_name_or_path)
-            else snapshot_download(
-                pretrained_model_name_or_path,
-                cache_dir=cache_dir,
-                resume_download=resume_download,
-                proxies=proxies,
-                local_files_only=local_files_only,
-                use_auth_token=use_auth_token,
-                revision=revision,
-            )
-        )
+        self.pipeline_info = pipeline_info
+        self.stages = pipeline_info.stages()
 
     def to(
         self,
@@ -381,11 +128,9 @@ def to(
         self.torch_device = self._execution_device
         logger.info(f"Running inference on device: {self.torch_device}")
 
-        self.__load_models()
-
-        self.engines = build_engines(
-            self.models,
+        self.engines = self.engine_builder.build_engines(
             self.engine_dir,
+            None,
             self.onnx_dir,
             self.onnx_opset,
             opt_image_height=self.image_height,
@@ -394,96 +139,10 @@ def to(
             static_batch=self.build_static_batch,
             static_image_shape=not self.build_dynamic_shape,
             device_id=self.torch_device.index,
-            enable_cuda_graph=self.enable_cuda_graph,
         )
 
         return self
 
-    def __encode_prompt(self, prompt, negative_prompt):
-        r"""
-        Encodes the prompt into text encoder hidden states.
-
-        Args:
-             prompt (`str` or `List[str]`, *optional*):
-                prompt to be encoded
-            negative_prompt (`str` or `List[str]`, *optional*):
-                The prompt or prompts not to guide the image generation. If not defined, one has to pass
-                `negative_prompt_embeds`. instead. If not defined, one has to pass `negative_prompt_embeds`. instead.
-                Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`).
-        """
-        # Tokenize prompt
-        text_input_ids = (
-            self.tokenizer(
-                prompt,
-                padding="max_length",
-                max_length=self.tokenizer.model_max_length,
-                truncation=True,
-                return_tensors="pt",
-            )
-            .input_ids.type(torch.int32)
-            .to(self.torch_device)
-        )
-
-        # NOTE: output tensor for CLIP must be cloned because it will be overwritten when called again for negative prompt
-        text_embeddings = run_engine(self.engines["clip"], {"input_ids": text_input_ids})["text_embeddings"].clone()
-
-        # Tokenize negative prompt
-        uncond_input_ids = (
-            self.tokenizer(
-                negative_prompt,
-                padding="max_length",
-                max_length=self.tokenizer.model_max_length,
-                truncation=True,
-                return_tensors="pt",
-            )
-            .input_ids.type(torch.int32)
-            .to(self.torch_device)
-        )
-
-        uncond_embeddings = run_engine(self.engines["clip"], {"input_ids": uncond_input_ids})["text_embeddings"]
-
-        # Concatenate the unconditional and text embeddings into a single batch to avoid doing two forward passes for classifier free guidance
-        text_embeddings = torch.cat([uncond_embeddings, text_embeddings]).to(dtype=torch.float16)
-
-        return text_embeddings
-
-    def __denoise_latent(self, latents, text_embeddings, timesteps=None, mask=None, masked_image_latents=None):
-        if not isinstance(timesteps, torch.Tensor):
-            timesteps = self.scheduler.timesteps
-        for _step_index, timestep in enumerate(timesteps):
-            # Expand the latents if we are doing classifier free guidance
-            latent_model_input = torch.cat([latents] * 2)
-            latent_model_input = self.scheduler.scale_model_input(latent_model_input, timestep)
-            if isinstance(mask, torch.Tensor):
-                latent_model_input = torch.cat([latent_model_input, mask, masked_image_latents], dim=1)
-
-            # Predict the noise residual
-            timestep_float = timestep.float() if timestep.dtype != torch.float32 else timestep
-
-            noise_pred = run_engine(
-                self.engines["unet"],
-                {"sample": latent_model_input, "timestep": timestep_float, "encoder_hidden_states": text_embeddings},
-            )["latent"]
-
-            # Perform guidance
-            noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
-            noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
-
-            latents = self.scheduler.step(noise_pred, timestep, latents).prev_sample
-
-        latents = 1.0 / 0.18215 * latents
-        return latents
-
-    def __decode_latent(self, latents):
-        images = run_engine(self.engines["vae"], {"latent": latents})["images"]
-        images = (images / 2 + 0.5).clamp(0, 1)
-        return images.cpu().permute(0, 2, 3, 1).float().numpy()
-
-    def __allocate_buffers(self, image_height, image_width, batch_size):
-        # Allocate output tensors for I/O bindings
-        for model_name, obj in self.models.items():
-            self.engines[model_name].allocate_buffers(obj.get_shape_dict(batch_size, image_height, image_width))
-
     @torch.no_grad()
     def __call__(
         self,
@@ -547,11 +206,11 @@ def __call__(
                 f"Batch size {len(prompt)} is larger than allowed {self.max_batch_size}. If dynamic shape is used, then maximum batch size is 4"
             )
 
-        self.__allocate_buffers(self.image_height, self.image_width, batch_size)
+        self.engine_builder.load_resources(self.image_height, self.image_width, batch_size)
 
         with torch.inference_mode(), torch.autocast("cuda"):
             # CLIP text encoder
-            text_embeddings = self.__encode_prompt(prompt, negative_prompt)
+            text_embeddings = self.encode_prompt(self.engines["clip"], prompt, negative_prompt)
 
             # Pre-initialize latents
             num_channels_latents = self.unet.config.in_channels
@@ -566,10 +225,10 @@ def __call__(
             )
 
             # UNet denoiser
-            latents = self.__denoise_latent(latents, text_embeddings)
+            latents = self.denoise_latent(self.engines["unet"], latents, text_embeddings)
 
             # VAE decode latent
-            images = self.__decode_latent(latents)
+            images = self.decode_latent(self.engines["vae"], latents)
 
         images, has_nsfw_concept = self.run_safety_checker(images, self.torch_device, text_embeddings.dtype)
         images = self.numpy_to_pil(images)
@@ -577,8 +236,8 @@ def __call__(
 
 
 if __name__ == "__main__":
-    model_name_or_path = "runwayml/stable-diffusion-v1-5"
-
+    pipeline_info = PipelineInfo("1.5")
+    model_name_or_path = pipeline_info.name()
     scheduler = DDIMScheduler.from_pretrained(model_name_or_path, subfolder="scheduler")
 
     pipe = OnnxruntimeTensorRTStableDiffusionPipeline.from_pretrained(
@@ -589,6 +248,7 @@ def __call__(
         image_height=512,
         image_width=512,
         max_batch_size=4,
+        pipeline_info=pipeline_info,
     )
 
     # re-use cached folder to save ONNX models and TensorRT Engines
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/optimize_pipeline.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/optimize_pipeline.py
index 4512c971ac27c..ffcfd6d9fd7e0 100644
--- a/onnxruntime/python/tools/transformers/models/stable_diffusion/optimize_pipeline.py
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/optimize_pipeline.py
@@ -13,7 +13,7 @@
 #    python optimize_pipeline.py -i ./sd-v1-5 -o ./sd-v1-5-fp16 --float16
 #
 # Note that the optimizations are carried out for CUDA Execution Provider at first, other EPs may not have the support
-# for the fused opeartors. The users could disable the operator fusion manually to workaround.
+# for the fused operators. The users could disable the operator fusion manually to workaround.
 
 import argparse
 import logging
@@ -49,7 +49,6 @@ def has_external_data(onnx_model_path):
 def _optimize_sd_pipeline(
     source_dir: Path,
     target_dir: Path,
-    overwrite: bool,
     use_external_data_format: Optional[bool],
     float16: bool,
     force_fp32_ops: List[str],
@@ -61,7 +60,6 @@ def _optimize_sd_pipeline(
     Args:
         source_dir (Path): Root of input directory of stable diffusion onnx pipeline with float32 models.
         target_dir (Path): Root of output directory of stable diffusion onnx pipeline with optimized models.
-        overwrite (bool): Overwrite files if exists.
         use_external_data_format (Optional[bool]): use external data format.
         float16 (bool): use half precision
         force_fp32_ops(List[str]): operators that are forced to run in float32.
@@ -144,6 +142,7 @@ def _optimize_sd_pipeline(
             opt_level=0,
             optimization_options=fusion_options,
             use_gpu=True,
+            provider=args.provider,
         )
 
         if float16:
@@ -168,6 +167,7 @@ def _optimize_sd_pipeline(
                 optimize_by_onnxruntime(
                     str(tmp_model_path),
                     use_gpu=True,
+                    provider=args.provider,
                     optimized_model_path=str(ort_optimized_model_path),
                     save_as_external_data=use_external_data_format,
                 )
@@ -233,7 +233,7 @@ def optimize_stable_diffusion_pipeline(
     args,
 ):
     if os.path.exists(output_dir):
-        if args.overwrite:
+        if overwrite:
             shutil.rmtree(output_dir, ignore_errors=True)
         else:
             raise RuntimeError("output directory existed:{output_dir}. Add --overwrite to empty the directory.")
@@ -247,7 +247,6 @@ def optimize_stable_diffusion_pipeline(
     _optimize_sd_pipeline(
         source_dir,
         target_dir,
-        overwrite,
         use_external_data_format,
         float16,
         args.force_fp32_ops,
@@ -319,11 +318,19 @@ def parse_arguments(argv: Optional[List[str]] = None):
         required=False,
         action="store_true",
         help="Onnx model larger than 2GB need to use external data format. "
-        "If specifed, save each onnx model to two files: one for onnx graph, another for weights. "
+        "If specified, save each onnx model to two files: one for onnx graph, another for weights. "
         "If not specified, use same format as original model by default. ",
     )
     parser.set_defaults(use_external_data_format=None)
 
+    parser.add_argument(
+        "--provider",
+        required=False,
+        type=str,
+        default=None,
+        help="Execution provider to use.",
+    )
+
     FusionOptions.add_arguments(parser)
 
     args = parser.parse_args(argv)
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/ort_optimizer.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/ort_optimizer.py
index 0824c8f07d6e2..ff91bf416bf51 100644
--- a/onnxruntime/python/tools/transformers/models/stable_diffusion/ort_optimizer.py
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/ort_optimizer.py
@@ -8,10 +8,13 @@
 """
 
 import logging
+import os
+import shutil
 import tempfile
 from pathlib import Path
 
 import onnx
+from packaging import version
 
 from onnxruntime.transformers.fusion_options import FusionOptions
 from onnxruntime.transformers.onnx_model_clip import ClipOnnxModel
@@ -32,53 +35,97 @@ def __init__(self, model_type: str):
             "clip": ClipOnnxModel,
         }
 
-    def optimize_by_ort(self, onnx_model):
+    def _optimize_by_ort(self, onnx_model, use_external_data_format, tmp_dir):
+        # Save to a temporary file so that we can load it with Onnx Runtime.
+        logger.info("Saving a temporary model to run OnnxRuntime graph optimizations...")
+        tmp_model_path = Path(tmp_dir) / "model.onnx"
+        onnx_model.save_model_to_file(str(tmp_model_path), use_external_data_format=use_external_data_format)
+        ort_optimized_model_path = Path(tmp_dir) / "optimized.onnx"
+        optimize_by_onnxruntime(
+            str(tmp_model_path),
+            use_gpu=True,
+            optimized_model_path=str(ort_optimized_model_path),
+            save_as_external_data=use_external_data_format,
+            external_data_filename="optimized.onnx_data",
+        )
+        model = onnx.load(str(ort_optimized_model_path), load_external_data=True)
+        return self.model_type_class_mapping[self.model_type](model)
+
+    def optimize_by_ort(self, onnx_model, use_external_data_format=False, tmp_dir=None):
         # Use this step to see the final graph that executed by Onnx Runtime.
-        with tempfile.TemporaryDirectory() as tmp_dir:
-            # Save to a temporary file so that we can load it with Onnx Runtime.
-            logger.info("Saving a temporary model to run OnnxRuntime graph optimizations...")
-            tmp_model_path = Path(tmp_dir) / "model.onnx"
-            onnx_model.save_model_to_file(str(tmp_model_path))
-            ort_optimized_model_path = tmp_model_path
-            optimize_by_onnxruntime(
-                str(tmp_model_path), use_gpu=True, optimized_model_path=str(ort_optimized_model_path)
-            )
-            model = onnx.load(str(ort_optimized_model_path), load_external_data=True)
-            return self.model_type_class_mapping[self.model_type](model)
+        if tmp_dir is None:
+            with tempfile.TemporaryDirectory() as temp_dir:
+                return self._optimize_by_ort(onnx_model, use_external_data_format, temp_dir)
+        else:
+            os.makedirs(tmp_dir, exist_ok=True)
+            model = self._optimize_by_ort(onnx_model, use_external_data_format, tmp_dir)
+            shutil.rmtree(tmp_dir)
+            return model
 
-    def optimize(self, input_fp32_onnx_path, optimized_onnx_path, float16=True):
+    def optimize(
+        self,
+        input_fp32_onnx_path,
+        optimized_onnx_path,
+        float16=True,
+        keep_io_types=False,
+        fp32_op_list=None,
+        keep_outputs=None,
+        optimize_by_ort=True,
+        optimize_by_fusion=True,
+        final_target_float16=True,
+        tmp_dir=None,
+    ):
         """Optimize onnx model using ONNX Runtime transformers optimizer"""
         logger.info(f"Optimize {input_fp32_onnx_path}...")
-        fusion_options = FusionOptions(self.model_type)
-        if self.model_type in ["unet"] and not float16:
-            fusion_options.enable_packed_kv = False
-            fusion_options.enable_packed_qkv = False
-
-        m = optimize_model(
-            input_fp32_onnx_path,
-            model_type=self.model_type,
-            num_heads=0,  # will be deduced from graph
-            hidden_size=0,  # will be deduced from graph
-            opt_level=0,
-            optimization_options=fusion_options,
-            use_gpu=True,
-        )
 
-        if self.model_type == "clip":
-            m.prune_graph(outputs=["text_embeddings"])  # remove the pooler_output, and only keep the first output.
+        if optimize_by_fusion:
+            fusion_options = FusionOptions(self.model_type)
+
+            # It is allowed float16=False and final_target_float16=True, for using fp32 as intermediate optimization step.
+            # For rare fp32 use case, we can disable packed kv/qkv since there is no fp32 TRT fused attention kernel.
+            if self.model_type in ["unet"] and not final_target_float16:
+                fusion_options.enable_packed_kv = False
+                fusion_options.enable_packed_qkv = False
+
+            m = optimize_model(
+                input_fp32_onnx_path,
+                model_type=self.model_type,
+                num_heads=0,  # will be deduced from graph
+                hidden_size=0,  # will be deduced from graph
+                opt_level=0,
+                optimization_options=fusion_options,
+                use_gpu=True,
+            )
+        else:
+            model = onnx.load_model(input_fp32_onnx_path, load_external_data=True)
+            m = self.model_type_class_mapping[self.model_type](model)
+
+        if keep_outputs:
+            m.prune_graph(outputs=keep_outputs)
+
+        model_size = m.model.ByteSize()
+
+        # model size might be negative (overflow?) in Windows.
+        use_external_data_format = model_size <= 0 or model_size >= onnx.checker.MAXIMUM_PROTOBUF
+
+        # Note that ORT < 1.16 could not save model larger than 2GB.
+        # This step is is optional since it has no impact on inference latency.
+        # The optimized model is not portable. It could only run in the same execution provider (CUDA EP in this case).
+        # When the model has been optimized by onnxruntime, we can disable optimization in SessionOption
+        # to save session creation time. Another benefit is to inspect the final graph for developing purpose.
+        from onnxruntime import __version__ as ort_version
+
+        if optimize_by_ort and (version.parse(ort_version) >= version.parse("1.16.0") or not use_external_data_format):
+            m = self.optimize_by_ort(m, use_external_data_format=use_external_data_format, tmp_dir=tmp_dir)
 
         if float16:
             logger.info("Convert to float16 ...")
             m.convert_float_to_float16(
-                keep_io_types=False,
-                op_block_list=["RandomNormalLike"],
+                keep_io_types=keep_io_types,
+                op_block_list=fp32_op_list,
             )
 
-        # Note that ORT 1.15 could not save model larger than 2GB. This only works for float16
-        if float16 or (self.model_type != "unet"):
-            m = self.optimize_by_ort(m)
-
         m.get_operator_statistics()
         m.get_fused_operator_statistics()
-        m.save_model_to_file(optimized_onnx_path, use_external_data_format=(self.model_type == "unet") and not float16)
+        m.save_model_to_file(optimized_onnx_path, use_external_data_format=use_external_data_format)
         logger.info("%s is optimized: %s", self.model_type, optimized_onnx_path)
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/ort_utils.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/ort_utils.py
index 7192e4ad5584f..0afa13a0f4dca 100644
--- a/onnxruntime/python/tools/transformers/models/stable_diffusion/ort_utils.py
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/ort_utils.py
@@ -7,122 +7,36 @@
 import logging
 import os
 import shutil
-from collections import OrderedDict
-from typing import Any, Dict
+import sys
+from typing import Union
 
 import torch
 
 import onnxruntime as ort
-from onnxruntime.transformers.io_binding_helper import TypeHelper
 
 logger = logging.getLogger(__name__)
 
 
-class OrtCudaSession:
-    """Inference Session with IO Binding for ONNX Runtime CUDA or TensorRT provider"""
-
-    def __init__(self, ort_session: ort.InferenceSession, device: torch.device, enable_cuda_graph=False):
-        self.ort_session = ort_session
-        self.input_names = [input.name for input in self.ort_session.get_inputs()]
-        self.output_names = [output.name for output in self.ort_session.get_outputs()]
-        self.io_name_to_numpy_type = TypeHelper.get_io_numpy_type_map(self.ort_session)
-        self.io_binding = self.ort_session.io_binding()
-        self.enable_cuda_graph = enable_cuda_graph
-
-        self.input_tensors = OrderedDict()
-        self.output_tensors = OrderedDict()
-        self.device = device
-
-    def __del__(self):
-        del self.input_tensors
-        del self.output_tensors
-        del self.io_binding
-        del self.ort_session
-
-    def allocate_buffers(self, shape_dict: Dict[str, tuple]):
-        """Allocate tensors for I/O Binding"""
-        if self.enable_cuda_graph:
-            for name, shape in shape_dict.items():
-                if name in self.input_names:
-                    # Reuse allocated buffer when the shape is same
-                    if name in self.input_tensors:
-                        if tuple(self.input_tensors[name].shape) == tuple(shape):
-                            continue
-                        raise RuntimeError("Expect static input shape for cuda graph")
-
-                    numpy_dtype = self.io_name_to_numpy_type[name]
-                    tensor = torch.empty(tuple(shape), dtype=TypeHelper.numpy_type_to_torch_type(numpy_dtype)).to(
-                        device=self.device
-                    )
-                    self.input_tensors[name] = tensor
-
-                    self.io_binding.bind_input(
-                        name,
-                        tensor.device.type,
-                        tensor.device.index,
-                        numpy_dtype,
-                        list(tensor.size()),
-                        tensor.data_ptr(),
-                    )
-
-        for name, shape in shape_dict.items():
-            if name in self.output_names:
-                # Reuse allocated buffer when the shape is same
-                if name in self.output_tensors and tuple(self.output_tensors[name].shape) == tuple(shape):
-                    continue
-
-                numpy_dtype = self.io_name_to_numpy_type[name]
-                tensor = torch.empty(tuple(shape), dtype=TypeHelper.numpy_type_to_torch_type(numpy_dtype)).to(
-                    device=self.device
-                )
-                self.output_tensors[name] = tensor
-
-                self.io_binding.bind_output(
-                    name,
-                    tensor.device.type,
-                    tensor.device.index,
-                    numpy_dtype,
-                    list(tensor.size()),
-                    tensor.data_ptr(),
-                )
-
-    def infer(self, feed_dict):
-        """Bind input tensors and run inference"""
-        for name, tensor in feed_dict.items():
-            assert isinstance(tensor, torch.Tensor) and tensor.is_contiguous()
-            if name in self.input_names:
-                if self.enable_cuda_graph:
-                    assert self.input_tensors[name].nelement() == tensor.nelement()
-                    assert tensor.device.type == "cuda"
-                    # Update input tensor inplace since cuda graph requires input and output has fixed memory address.
-                    from cuda import cudart
-
-                    cudart.cudaMemcpy(
-                        self.input_tensors[name].data_ptr(),
-                        tensor.data_ptr(),
-                        tensor.element_size() * tensor.nelement(),
-                        cudart.cudaMemcpyKind.cudaMemcpyDeviceToDevice,
-                    )
-                else:
-                    self.io_binding.bind_input(
-                        name,
-                        tensor.device.type,
-                        tensor.device.index,
-                        TypeHelper.torch_type_to_numpy_type(tensor.dtype),
-                        [1] if len(tensor.shape) == 0 else list(tensor.shape),
-                        tensor.data_ptr(),
-                    )
+def add_transformers_dir_to_path():
+    sys.path.append(os.path.dirname(__file__))
+
+    transformers_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), "..", ".."))
+    if transformers_dir not in sys.path:
+        sys.path.append(transformers_dir)
 
-        self.ort_session.run_with_iobinding(self.io_binding)
 
-        return self.output_tensors
+add_transformers_dir_to_path()
+from io_binding_helper import CudaSession  # noqa: E402. Walk-around to test locally
 
 
-class Engine(OrtCudaSession):
+# -----------------------------------------------------------------------------------------------------
+# Utilities for CUDA EP
+# -----------------------------------------------------------------------------------------------------
+class Engine(CudaSession):
     def __init__(self, engine_path, provider: str, device_id: int = 0, enable_cuda_graph=False):
         self.engine_path = engine_path
         self.provider = provider
-        self.provider_options = self.get_cuda_provider_options(device_id, enable_cuda_graph)
+        self.provider_options = CudaSession.get_cuda_provider_options(device_id, enable_cuda_graph)
 
         device = torch.device("cuda", device_id)
         ort_session = ort.InferenceSession(
@@ -135,13 +49,6 @@ def __init__(self, engine_path, provider: str, device_id: int = 0, enable_cuda_g
 
         super().__init__(ort_session, device, enable_cuda_graph)
 
-    def get_cuda_provider_options(self, device_id: int, enable_cuda_graph: bool) -> Dict[str, Any]:
-        return {
-            "device_id": device_id,
-            "arena_extend_strategy": "kSameAsRequested",
-            "enable_cuda_graph": enable_cuda_graph,
-        }
-
 
 class Engines:
     def __init__(self, provider, onnx_opset: int = 14):
@@ -197,9 +104,16 @@ def build(
                     model = model_obj.get_model().to(model_obj.device)
                     with torch.inference_mode():
                         inputs = model_obj.get_sample_input(1, 512, 512)
+                        fp32_inputs = tuple(
+                            [
+                                (tensor.to(torch.float32) if tensor.dtype == torch.float16 else tensor)
+                                for tensor in inputs
+                            ]
+                        )
+
                         torch.onnx.export(
                             model,
-                            inputs,
+                            fp32_inputs,
                             onnx_path,
                             export_params=True,
                             opset_version=self.onnx_opset,
@@ -224,3 +138,125 @@ def build(
 
     def get_engine(self, model_name):
         return self.engines[model_name]
+
+
+def run_engine(engine, feed_dict):
+    return engine.infer(feed_dict)
+
+
+# -----------------------------------------------------------------------------------------------------
+# Utilities for both CUDA and TensorRT EP
+# -----------------------------------------------------------------------------------------------------
+
+
+class StableDiffusionPipelineMixin:
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def encode_prompt(self, clip_engine, prompt, negative_prompt):
+        """
+        Encodes the prompt into text encoder hidden states.
+        """
+
+        # Tokenize prompt
+        text_input_ids = (
+            self.tokenizer(
+                prompt,
+                padding="max_length",
+                max_length=self.tokenizer.model_max_length,
+                truncation=True,
+                return_tensors="pt",
+            )
+            .input_ids.type(torch.int32)
+            .to(self.torch_device)
+        )
+
+        # NOTE: output tensor for CLIP must be cloned because it will be overwritten when called again for negative prompt
+        text_embeddings = run_engine(clip_engine, {"input_ids": text_input_ids})["text_embeddings"].clone()
+
+        # Tokenize negative prompt
+        uncond_input_ids = (
+            self.tokenizer(
+                negative_prompt,
+                padding="max_length",
+                max_length=self.tokenizer.model_max_length,
+                truncation=True,
+                return_tensors="pt",
+            )
+            .input_ids.type(torch.int32)
+            .to(self.torch_device)
+        )
+
+        uncond_embeddings = run_engine(clip_engine, {"input_ids": uncond_input_ids})["text_embeddings"]
+
+        # Concatenate the unconditional and text embeddings into a single batch to avoid doing two forward passes for classifier free guidance
+        text_embeddings = torch.cat([uncond_embeddings, text_embeddings]).to(dtype=torch.float16)
+
+        return text_embeddings
+
+    def denoise_latent(
+        self,
+        unet_engine,
+        latents,
+        text_embeddings,
+        timesteps=None,
+        mask=None,
+        masked_image_latents=None,
+        timestep_fp16=False,
+    ):
+        if not isinstance(timesteps, torch.Tensor):
+            timesteps = self.scheduler.timesteps
+
+        for _step_index, timestep in enumerate(timesteps):
+            # Expand the latents if we are doing classifier free guidance
+            latent_model_input = torch.cat([latents] * 2)
+            latent_model_input = self.scheduler.scale_model_input(latent_model_input, timestep)
+            if isinstance(mask, torch.Tensor):
+                latent_model_input = torch.cat([latent_model_input, mask, masked_image_latents], dim=1)
+
+            # Predict the noise residual
+            timestep_float = timestep.to(torch.float16) if timestep_fp16 else timestep.to(torch.float32)
+
+            noise_pred = run_engine(
+                unet_engine,
+                {"sample": latent_model_input, "timestep": timestep_float, "encoder_hidden_states": text_embeddings},
+            )["latent"]
+
+            # Perform guidance
+            noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+            noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
+
+            latents = self.scheduler.step(noise_pred, timestep, latents).prev_sample
+
+        latents = 1.0 / 0.18215 * latents
+        return latents
+
+    def decode_latent(self, vae_engine, latents):
+        images = run_engine(vae_engine, {"latent": latents})["images"]
+        images = (images / 2 + 0.5).clamp(0, 1)
+        return images.cpu().permute(0, 2, 3, 1).float().numpy()
+
+    def set_cached_folder(self, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs):
+        from diffusers.utils import DIFFUSERS_CACHE
+        from huggingface_hub import snapshot_download
+
+        cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
+        resume_download = kwargs.pop("resume_download", False)
+        proxies = kwargs.pop("proxies", None)
+        local_files_only = kwargs.pop("local_files_only", False)
+        use_auth_token = kwargs.pop("use_auth_token", None)
+        revision = kwargs.pop("revision", None)
+
+        self.cached_folder = (
+            pretrained_model_name_or_path
+            if os.path.isdir(pretrained_model_name_or_path)
+            else snapshot_download(
+                pretrained_model_name_or_path,
+                cache_dir=cache_dir,
+                resume_download=resume_download,
+                proxies=proxies,
+                local_files_only=local_files_only,
+                use_auth_token=use_auth_token,
+                revision=revision,
+            )
+        )
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_img2img_xl.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_img2img_xl.py
new file mode 100644
index 0000000000000..31ede1ba901f2
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_img2img_xl.py
@@ -0,0 +1,236 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+# Modified from TensorRT demo diffusion, which has the following license:
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# --------------------------------------------------------------------------
+
+import time
+
+import torch
+from diffusion_models import PipelineInfo
+from pipeline_stable_diffusion import StableDiffusionPipeline
+
+
+class Img2ImgXLPipeline(StableDiffusionPipeline):
+    """
+    Stable Diffusion Img2Img XL pipeline.
+    """
+
+    def __init__(self, pipeline_info: PipelineInfo, *args, **kwargs):
+        """
+        Initializes the Img2Img XL Diffusion pipeline.
+
+        Args:
+            pipeline_info (PipelineInfo):
+                Version and Type of stable diffusion pipeline.
+        """
+        assert pipeline_info.is_xl_refiner()
+
+        super().__init__(pipeline_info, *args, **kwargs)
+
+        self.requires_aesthetics_score = True
+
+    def _get_add_time_ids(
+        self, original_size, crops_coords_top_left, target_size, aesthetic_score, negative_aesthetic_score, dtype
+    ):
+        if self.requires_aesthetics_score:
+            add_time_ids = list(original_size + crops_coords_top_left + (aesthetic_score,))
+            add_neg_time_ids = list(original_size + crops_coords_top_left + (negative_aesthetic_score,))
+        else:
+            add_time_ids = list(original_size + crops_coords_top_left + target_size)
+            add_neg_time_ids = list(original_size + crops_coords_top_left + target_size)
+        add_time_ids = torch.tensor([add_time_ids], dtype=dtype)
+        add_neg_time_ids = torch.tensor([add_neg_time_ids], dtype=dtype)
+        add_time_ids = torch.cat([add_neg_time_ids, add_time_ids], dim=0).to(device=self.device)
+        return add_time_ids
+
+    def _infer(
+        self,
+        prompt,
+        negative_prompt,
+        init_image,
+        image_height,
+        image_width,
+        denoising_steps=30,
+        strength=0.3,
+        guidance=5.0,
+        seed=None,
+        warmup=False,
+        return_type="image",
+    ):
+        assert negative_prompt is None or len(prompt) == len(negative_prompt)
+
+        original_size = (image_height, image_width)
+        crops_coords_top_left = (0, 0)
+        target_size = (image_height, image_width)
+
+        aesthetic_score = 6.0
+        negative_aesthetic_score = 2.5
+
+        self.set_denoising_steps(denoising_steps)
+        self.set_random_seed(seed)
+
+        with torch.inference_mode(), torch.autocast("cuda"):
+            batch_size = len(prompt)
+
+            torch.cuda.synchronize()
+            e2e_tic = time.perf_counter()
+
+            # Initialize timesteps
+            timesteps, t_start = self.initialize_timesteps(self.denoising_steps, strength)
+
+            latent_timestep = timesteps[:1].repeat(batch_size)
+
+            # CLIP text encoder 2
+            text_embeddings, pooled_embeddings2 = self.encode_prompt(
+                prompt,
+                negative_prompt,
+                encoder="clip2",
+                tokenizer=self.tokenizer2,
+                pooled_outputs=True,
+                output_hidden_states=True,
+            )
+
+            # Time embeddings
+            add_time_ids = self._get_add_time_ids(
+                original_size,
+                crops_coords_top_left,
+                target_size,
+                aesthetic_score,
+                negative_aesthetic_score,
+                dtype=text_embeddings.dtype,
+            )
+
+            add_time_ids = add_time_ids.repeat(batch_size, 1)
+
+            add_kwargs = {"text_embeds": pooled_embeddings2, "time_ids": add_time_ids}
+
+            # Pre-process input image
+            init_image = self.preprocess_images(batch_size, (init_image,))[0]
+
+            # VAE encode init image
+            if init_image.shape[1] == 4:
+                init_latents = init_image
+            else:
+                init_latents = self.encode_image(init_image)
+
+            # Add noise to latents using timesteps
+            noise = torch.randn(init_latents.shape, device=self.device, dtype=torch.float32, generator=self.generator)
+            latents = self.scheduler.add_noise(init_latents, noise, t_start, latent_timestep)
+
+            # UNet denoiser
+            latents = self.denoise_latent(
+                latents,
+                text_embeddings,
+                timesteps=timesteps,
+                step_offset=t_start,
+                denoiser="unetxl",
+                guidance=guidance,
+                add_kwargs=add_kwargs,
+            )
+
+        with torch.inference_mode():
+            # VAE decode latent
+            if return_type == "latent":
+                images = latents
+            else:
+                images = self.decode_latent(latents / self.vae_scaling_factor)
+
+            torch.cuda.synchronize()
+            e2e_toc = time.perf_counter()
+
+            perf_data = None
+            if not warmup:
+                print("SD-XL Refiner Pipeline")
+                perf_data = self.print_summary(e2e_tic, e2e_toc, batch_size)
+
+        return images, perf_data
+
+    def run(
+        self,
+        prompt,
+        negative_prompt,
+        init_image,
+        image_height,
+        image_width,
+        denoising_steps=30,
+        guidance=5.0,
+        strength=0.3,
+        seed=None,
+        warmup=False,
+        return_type="image",
+    ):
+        """
+        Run the diffusion pipeline.
+
+        Args:
+            prompt (str):
+                The text prompt to guide image generation.
+            negative_prompt (str):
+                The prompt not to guide the image generation.
+            init_image (tuple[torch.Tensor]):
+                Image from base pipeline.
+            image_height (int):
+                Height (in pixels) of the image to be generated. Must be a multiple of 8.
+            image_width (int):
+                Width (in pixels) of the image to be generated. Must be a multiple of 8.
+            denoising_steps (int):
+                Number of denoising steps. More steps usually lead to higher quality image at the expense of slower inference.
+            guidance (float):
+                Higher guidance scale encourages to generate images that are closely linked to the text prompt.
+            seed (int):
+                Seed for the random generator
+            warmup (bool):
+                Indicate if this is a warmup run.
+            return_type (str):
+                It can be "latent" or "image".
+        """
+
+        if self.is_backend_tensorrt():
+            import tensorrt as trt
+            from trt_utilities import TRT_LOGGER
+
+            with trt.Runtime(TRT_LOGGER):
+                return self._infer(
+                    prompt,
+                    negative_prompt,
+                    init_image,
+                    image_height,
+                    image_width,
+                    denoising_steps=denoising_steps,
+                    strength=strength,
+                    guidance=guidance,
+                    seed=seed,
+                    warmup=warmup,
+                    return_type=return_type,
+                )
+        else:
+            return self._infer(
+                prompt,
+                negative_prompt,
+                init_image,
+                image_height,
+                image_width,
+                denoising_steps=denoising_steps,
+                strength=strength,
+                guidance=guidance,
+                seed=seed,
+                warmup=warmup,
+                return_type=return_type,
+            )
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_stable_diffusion.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_stable_diffusion.py
new file mode 100644
index 0000000000000..5d51554a5cee4
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_stable_diffusion.py
@@ -0,0 +1,527 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+# Modified from TensorRT demo diffusion, which has the following license:
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# --------------------------------------------------------------------------
+
+import os
+import pathlib
+import random
+from typing import Any, Dict, List
+
+import numpy as np
+import nvtx
+import torch
+from cuda import cudart
+from diffusion_models import PipelineInfo, get_tokenizer
+from diffusion_schedulers import DDIMScheduler, EulerAncestralDiscreteScheduler, LCMScheduler, UniPCMultistepScheduler
+from engine_builder import EngineType
+from engine_builder_ort_cuda import OrtCudaEngineBuilder
+from engine_builder_ort_trt import OrtTensorrtEngineBuilder
+from engine_builder_tensorrt import TensorrtEngineBuilder
+
+
+class StableDiffusionPipeline:
+    """
+    Stable Diffusion pipeline using TensorRT.
+    """
+
+    def __init__(
+        self,
+        pipeline_info: PipelineInfo,
+        max_batch_size=16,
+        scheduler="DDIM",
+        device="cuda",
+        output_dir=".",
+        hf_token=None,
+        verbose=False,
+        nvtx_profile=False,
+        use_cuda_graph=False,
+        framework_model_dir="pytorch_model",
+        engine_type: EngineType = EngineType.ORT_TRT,
+    ):
+        """
+        Initializes the Diffusion pipeline.
+
+        Args:
+            pipeline_info (PipelineInfo):
+                Version and Type of pipeline.
+            max_batch_size (int):
+                Maximum batch size for dynamic batch engine.
+            scheduler (str):
+                The scheduler to guide the denoising process. Must be one of [DDIM, EulerA, UniPC, LCM].
+            device (str):
+                PyTorch device to run inference. Default: 'cuda'
+            output_dir (str):
+                Output directory for log files and image artifacts
+            hf_token (str):
+                HuggingFace User Access Token to use for downloading Stable Diffusion model checkpoints.
+            verbose (bool):
+                Enable verbose logging.
+            nvtx_profile (bool):
+                Insert NVTX profiling markers.
+            use_cuda_graph (bool):
+                Use CUDA graph to capture engine execution and then launch inference
+            framework_model_dir (str):
+                cache directory for framework checkpoints
+            engine_type (EngineType)
+                backend engine type like ORT_TRT or TRT
+        """
+
+        self.pipeline_info = pipeline_info
+        self.version = pipeline_info.version
+
+        self.vae_scaling_factor = pipeline_info.vae_scaling_factor()
+
+        self.max_batch_size = max_batch_size
+
+        self.framework_model_dir = framework_model_dir
+        self.output_dir = output_dir
+        for directory in [self.framework_model_dir, self.output_dir]:
+            if not os.path.exists(directory):
+                print(f"[I] Create directory: {directory}")
+                pathlib.Path(directory).mkdir(parents=True)
+
+        self.hf_token = hf_token
+        self.device = device
+        self.torch_device = torch.device(device, torch.cuda.current_device())
+        self.verbose = verbose
+        self.nvtx_profile = nvtx_profile
+
+        self.use_cuda_graph = use_cuda_graph
+
+        self.tokenizer = None
+        self.tokenizer2 = None
+
+        self.generator = torch.Generator(device="cuda")
+        self.actual_steps = None
+
+        self.current_scheduler = None
+        self.set_scheduler(scheduler)
+
+        # backend engine
+        self.engine_type = engine_type
+        if engine_type == EngineType.TRT:
+            self.backend = TensorrtEngineBuilder(pipeline_info, max_batch_size, hf_token, device, use_cuda_graph)
+        elif engine_type == EngineType.ORT_TRT:
+            self.backend = OrtTensorrtEngineBuilder(pipeline_info, max_batch_size, hf_token, device, use_cuda_graph)
+        elif engine_type == EngineType.ORT_CUDA:
+            self.backend = OrtCudaEngineBuilder(pipeline_info, max_batch_size, hf_token, device, use_cuda_graph)
+        else:
+            raise RuntimeError(f"Backend engine type {engine_type.name} is not supported")
+
+        # Load text tokenizer
+        if not self.pipeline_info.is_xl_refiner():
+            self.tokenizer = get_tokenizer(
+                self.pipeline_info, self.framework_model_dir, self.hf_token, subfolder="tokenizer"
+            )
+
+        if self.pipeline_info.is_xl():
+            self.tokenizer2 = get_tokenizer(
+                self.pipeline_info, self.framework_model_dir, self.hf_token, subfolder="tokenizer_2"
+            )
+
+        self.control_image_processor = None
+        if self.pipeline_info.is_xl() and self.pipeline_info.controlnet:
+            from diffusers.image_processor import VaeImageProcessor
+
+            self.control_image_processor = VaeImageProcessor(
+                vae_scale_factor=8, do_convert_rgb=True, do_normalize=False
+            )
+
+        # Create CUDA events
+        self.events = {}
+        for stage in ["clip", "denoise", "vae", "vae_encoder"]:
+            for marker in ["start", "stop"]:
+                self.events[stage + "-" + marker] = cudart.cudaEventCreate()[1]
+        self.markers = {}
+
+    def is_backend_tensorrt(self):
+        return self.engine_type == EngineType.TRT
+
+    def set_scheduler(self, scheduler: str):
+        if scheduler == self.current_scheduler:
+            return
+
+        # Scheduler options
+        sched_opts = {"num_train_timesteps": 1000, "beta_start": 0.00085, "beta_end": 0.012}
+        if self.version in ("2.0", "2.1"):
+            sched_opts["prediction_type"] = "v_prediction"
+        else:
+            sched_opts["prediction_type"] = "epsilon"
+
+        if scheduler == "DDIM":
+            self.scheduler = DDIMScheduler(device=self.device, **sched_opts)
+        elif scheduler == "EulerA":
+            self.scheduler = EulerAncestralDiscreteScheduler(device=self.device, **sched_opts)
+        elif scheduler == "UniPC":
+            self.scheduler = UniPCMultistepScheduler(device=self.device, **sched_opts)
+        elif scheduler == "LCM":
+            self.scheduler = LCMScheduler(device=self.device, **sched_opts)
+        else:
+            raise ValueError("Scheduler should be either DDIM, EulerA, UniPC or LCM")
+
+        self.current_scheduler = scheduler
+        self.denoising_steps = None
+
+    def set_denoising_steps(self, denoising_steps: int):
+        if not (self.denoising_steps == denoising_steps and isinstance(self.scheduler, DDIMScheduler)):
+            self.scheduler.set_timesteps(denoising_steps)
+            self.scheduler.configure()
+            self.denoising_steps = denoising_steps
+
+    def load_resources(self, image_height, image_width, batch_size):
+        # If engine is built with static input shape, call this only once after engine build.
+        # Otherwise, it need be called before every inference run.
+        self.backend.load_resources(image_height, image_width, batch_size)
+
+    def set_random_seed(self, seed):
+        if isinstance(seed, int):
+            self.generator.manual_seed(seed)
+        else:
+            self.generator.seed()
+
+    def get_current_seed(self):
+        return self.generator.initial_seed()
+
+    def teardown(self):
+        for e in self.events.values():
+            cudart.cudaEventDestroy(e)
+
+        if self.backend:
+            self.backend.teardown()
+
+    def run_engine(self, model_name, feed_dict):
+        return self.backend.run_engine(model_name, feed_dict)
+
+    def initialize_latents(self, batch_size, unet_channels, latent_height, latent_width):
+        latents_dtype = torch.float32  # text_embeddings.dtype
+        latents_shape = (batch_size, unet_channels, latent_height, latent_width)
+        latents = torch.randn(latents_shape, device=self.device, dtype=latents_dtype, generator=self.generator)
+        # Scale the initial noise by the standard deviation required by the scheduler
+        latents = latents * self.scheduler.init_noise_sigma
+        return latents
+
+    def initialize_timesteps(self, timesteps, strength):
+        self.scheduler.set_timesteps(timesteps)
+        offset = self.scheduler.steps_offset if hasattr(self.scheduler, "steps_offset") else 0
+        init_timestep = int(timesteps * strength) + offset
+        init_timestep = min(init_timestep, timesteps)
+        t_start = max(timesteps - init_timestep + offset, 0)
+        timesteps = self.scheduler.timesteps[t_start:].to(self.device)
+        return timesteps, t_start
+
+    def start_profile(self, name, color="blue"):
+        if self.nvtx_profile:
+            self.markers[name] = nvtx.start_range(message=name, color=color)
+        event_name = name + "-start"
+        if event_name in self.events:
+            cudart.cudaEventRecord(self.events[event_name], 0)
+
+    def stop_profile(self, name):
+        event_name = name + "-stop"
+        if event_name in self.events:
+            cudart.cudaEventRecord(self.events[event_name], 0)
+        if self.nvtx_profile:
+            nvtx.end_range(self.markers[name])
+
+    def preprocess_images(self, batch_size, images=()):
+        self.start_profile("preprocess", color="pink")
+        init_images = []
+        for i in images:
+            image = i.to(self.device).float()
+            if image.shape[0] != batch_size:
+                image = image.repeat(batch_size, 1, 1, 1)
+            init_images.append(image)
+        self.stop_profile("preprocess")
+        return tuple(init_images)
+
+    def preprocess_controlnet_images(
+        self, batch_size, images=None, do_classifier_free_guidance=True, height=1024, width=1024
+    ):
+        """
+        Process a list of PIL.Image.Image as control images, and return a torch tensor.
+        """
+        if images is None:
+            return None
+        self.start_profile("preprocess", color="pink")
+
+        if not self.pipeline_info.is_xl():
+            images = [
+                (np.array(i.convert("RGB")).astype(np.float32) / 255.0)[..., None]
+                .transpose(3, 2, 0, 1)
+                .repeat(batch_size, axis=0)
+                for i in images
+            ]
+            if do_classifier_free_guidance:
+                images = [torch.cat([torch.from_numpy(i).to(self.device).float()] * 2) for i in images]
+            else:
+                images = [torch.from_numpy(i).to(self.device).float() for i in images]
+            images = torch.cat([image[None, ...] for image in images], dim=0)
+            images = images.to(dtype=torch.float16)
+        else:
+            images = self.control_image_processor.preprocess(images, height=height, width=width).to(dtype=torch.float32)
+            images = images.repeat_interleave(batch_size, dim=0)
+            images = images.to(device=self.device, dtype=torch.float16)
+            if do_classifier_free_guidance:
+                images = torch.cat([images] * 2)
+        self.stop_profile("preprocess")
+        return images
+
+    def encode_prompt(
+        self,
+        prompt,
+        negative_prompt,
+        encoder="clip",
+        tokenizer=None,
+        pooled_outputs=False,
+        output_hidden_states=False,
+        force_zeros_for_empty_prompt=False,
+        do_classifier_free_guidance=True,
+    ):
+        if tokenizer is None:
+            tokenizer = self.tokenizer
+
+        self.start_profile("clip", color="green")
+
+        # Tokenize prompt
+        text_input_ids = (
+            tokenizer(
+                prompt,
+                padding="max_length",
+                max_length=tokenizer.model_max_length,
+                truncation=True,
+                return_tensors="pt",
+            )
+            .input_ids.type(torch.int32)
+            .to(self.device)
+        )
+
+        # NOTE: output tensor for CLIP must be cloned because it will be overwritten when called again for negative prompt
+        outputs = self.run_engine(encoder, {"input_ids": text_input_ids})
+        text_embeddings = outputs["text_embeddings"].clone()
+        if output_hidden_states:
+            hidden_states = outputs["hidden_states"].clone()
+
+        # Note: negative prompt embedding is not needed for SD XL when guidance <= 1
+        if do_classifier_free_guidance:
+            # For SD XL base, handle force_zeros_for_empty_prompt
+            is_empty_negative_prompt = all([not i for i in negative_prompt])
+            if force_zeros_for_empty_prompt and is_empty_negative_prompt:
+                uncond_embeddings = torch.zeros_like(text_embeddings)
+                if output_hidden_states:
+                    uncond_hidden_states = torch.zeros_like(hidden_states)
+            else:
+                # Tokenize negative prompt
+                uncond_input_ids = (
+                    tokenizer(
+                        negative_prompt,
+                        padding="max_length",
+                        max_length=tokenizer.model_max_length,
+                        truncation=True,
+                        return_tensors="pt",
+                    )
+                    .input_ids.type(torch.int32)
+                    .to(self.device)
+                )
+
+                outputs = self.run_engine(encoder, {"input_ids": uncond_input_ids})
+                uncond_embeddings = outputs["text_embeddings"]
+                if output_hidden_states:
+                    uncond_hidden_states = outputs["hidden_states"]
+
+            # Concatenate the unconditional and text embeddings into a single batch to avoid doing two forward passes for classifier free guidance
+            text_embeddings = torch.cat([uncond_embeddings, text_embeddings]).to(dtype=torch.float16)
+
+        if pooled_outputs:
+            pooled_output = text_embeddings
+
+        if output_hidden_states:
+            if do_classifier_free_guidance:
+                text_embeddings = torch.cat([uncond_hidden_states, hidden_states]).to(dtype=torch.float16)
+            else:
+                text_embeddings = hidden_states.to(dtype=torch.float16)
+
+        self.stop_profile("clip")
+
+        if pooled_outputs:
+            return text_embeddings, pooled_output
+        return text_embeddings
+
+    def denoise_latent(
+        self,
+        latents,
+        text_embeddings,
+        denoiser="unet",
+        timesteps=None,
+        step_offset=0,
+        mask=None,
+        masked_image_latents=None,
+        guidance=7.5,
+        add_kwargs=None,
+    ):
+        do_classifier_free_guidance = guidance > 1.0
+
+        self.start_profile("denoise", color="blue")
+
+        if not isinstance(timesteps, torch.Tensor):
+            timesteps = self.scheduler.timesteps
+
+        for step_index, timestep in enumerate(timesteps):
+            # Expand the latents if we are doing classifier free guidance
+            latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
+
+            latent_model_input = self.scheduler.scale_model_input(
+                latent_model_input, step_offset + step_index, timestep
+            )
+
+            if isinstance(mask, torch.Tensor):
+                latent_model_input = torch.cat([latent_model_input, mask, masked_image_latents], dim=1)
+
+            # Predict the noise residual
+            if self.nvtx_profile:
+                nvtx_unet = nvtx.start_range(message="unet", color="blue")
+
+            timestep_float = timestep.float() if timestep.dtype != torch.float32 else timestep
+
+            params = {
+                "sample": latent_model_input,
+                "timestep": timestep_float,
+                "encoder_hidden_states": text_embeddings,
+            }
+
+            if add_kwargs:
+                params.update(add_kwargs)
+
+            noise_pred = self.run_engine(denoiser, params)["latent"]
+
+            if self.nvtx_profile:
+                nvtx.end_range(nvtx_unet)
+
+            # perform guidance
+            if do_classifier_free_guidance:
+                noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+                noise_pred = noise_pred_uncond + guidance * (noise_pred_text - noise_pred_uncond)
+
+            if type(self.scheduler) == UniPCMultistepScheduler:
+                latents = self.scheduler.step(noise_pred, timestep, latents, return_dict=False)[0]
+            elif type(self.scheduler) == LCMScheduler:
+                latents = self.scheduler.step(noise_pred, timestep, latents, generator=self.generator)[0]
+            else:
+                latents = self.scheduler.step(noise_pred, latents, step_offset + step_index, timestep)
+
+        # The actual number of steps. It might be different from denoising_steps.
+        self.actual_steps = len(timesteps)
+
+        self.stop_profile("denoise")
+        return latents
+
+    def encode_image(self, init_image):
+        self.start_profile("vae_encoder", color="red")
+        init_latents = self.run_engine("vae_encoder", {"images": init_image})["latent"]
+        init_latents = self.vae_scaling_factor * init_latents
+        self.stop_profile("vae_encoder")
+        return init_latents
+
+    def decode_latent(self, latents):
+        self.start_profile("vae", color="red")
+        images = self.backend.vae_decode(latents)
+        self.stop_profile("vae")
+        return images
+
+    def print_summary(self, tic, toc, batch_size, vae_enc=False) -> Dict[str, Any]:
+        throughput = batch_size / (toc - tic)
+        latency_clip = cudart.cudaEventElapsedTime(self.events["clip-start"], self.events["clip-stop"])[1]
+        latency_unet = cudart.cudaEventElapsedTime(self.events["denoise-start"], self.events["denoise-stop"])[1]
+        latency_vae = cudart.cudaEventElapsedTime(self.events["vae-start"], self.events["vae-stop"])[1]
+        latency_vae_encoder = (
+            cudart.cudaEventElapsedTime(self.events["vae_encoder-start"], self.events["vae_encoder-stop"])[1]
+            if vae_enc
+            else None
+        )
+        latency = (toc - tic) * 1000.0
+
+        print("|----------------|--------------|")
+        print("| {:^14} | {:^12} |".format("Module", "Latency"))
+        print("|----------------|--------------|")
+        if vae_enc:
+            print("| {:^14} | {:>9.2f} ms |".format("VAE-Enc", latency_vae_encoder))
+        print("| {:^14} | {:>9.2f} ms |".format("CLIP", latency_clip))
+        print(
+            "| {:^14} | {:>9.2f} ms |".format(
+                "UNet" + ("+CNet" if self.pipeline_info.controlnet else "") + " x " + str(self.actual_steps),
+                latency_unet,
+            )
+        )
+        print("| {:^14} | {:>9.2f} ms |".format("VAE-Dec", latency_vae))
+
+        print("|----------------|--------------|")
+        print("| {:^14} | {:>9.2f} ms |".format("Pipeline", latency))
+        print("|----------------|--------------|")
+        print(f"Throughput: {throughput:.2f} image/s")
+
+        perf_data = {
+            "latency_clip": latency_clip,
+            "latency_unet": latency_unet,
+            "latency_vae": latency_vae,
+            "latency": latency,
+            "throughput": throughput,
+        }
+        if vae_enc:
+            perf_data["latency_vae_encoder"] = latency_vae_encoder
+        return perf_data
+
+    @staticmethod
+    def to_pil_image(images):
+        images = (
+            ((images + 1) * 255 / 2).clamp(0, 255).detach().permute(0, 2, 3, 1).round().type(torch.uint8).cpu().numpy()
+        )
+
+        from PIL import Image
+
+        return [Image.fromarray(images[i]) for i in range(images.shape[0])]
+
+    def metadata(self) -> Dict[str, Any]:
+        return {
+            "actual_steps": self.actual_steps,
+            "seed": self.get_current_seed(),
+            "name": self.pipeline_info.name(),
+            "custom_vae": self.pipeline_info.custom_fp16_vae(),
+            "custom_unet": self.pipeline_info.custom_unet(),
+        }
+
+    def save_images(self, images: List, prompt: List[str], negative_prompt: List[str], metadata: Dict[str, Any]):
+        images = self.to_pil_image(images)
+        session_id = str(random.randint(1000, 9999))
+        for i, image in enumerate(images):
+            seed = str(self.get_current_seed())
+            prefix = "".join(x for x in prompt[i] if x.isalnum() or x in ", -").replace(" ", "_")[:20]
+            parts = [prefix, session_id, str(i + 1), str(seed), self.current_scheduler, str(self.actual_steps)]
+            image_path = os.path.join(self.output_dir, "-".join(parts) + ".png")
+            print(f"Saving image {i+1} / {len(images)} to: {image_path}")
+
+            from PIL import PngImagePlugin
+
+            info = PngImagePlugin.PngInfo()
+            for k, v in metadata.items():
+                info.add_text(k, str(v))
+            info.add_text("prompt", prompt[i])
+            info.add_text("negative_prompt", negative_prompt[i])
+
+            image.save(image_path, "PNG", pnginfo=info)
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_txt2img.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_txt2img.py
new file mode 100644
index 0000000000000..2d2fdb542c845
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_txt2img.py
@@ -0,0 +1,178 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+# Modified from TensorRT demo diffusion, which has the following license:
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# --------------------------------------------------------------------------
+
+import time
+
+import torch
+from diffusion_models import PipelineInfo
+from pipeline_stable_diffusion import StableDiffusionPipeline
+
+
+class Txt2ImgPipeline(StableDiffusionPipeline):
+    """
+    Stable Diffusion Txt2Img pipeline using NVidia TensorRT.
+    """
+
+    def __init__(self, pipeline_info: PipelineInfo, **kwargs):
+        """
+        Initializes the Txt2Img Diffusion pipeline.
+
+        Args:
+            pipeline_info (PipelineInfo):
+                Version and Type of stable diffusion pipeline.
+        """
+        super().__init__(pipeline_info, **kwargs)
+
+    def _infer(
+        self,
+        prompt,
+        negative_prompt,
+        image_height,
+        image_width,
+        denoising_steps=50,
+        guidance=7.5,
+        seed=None,
+        controlnet_images=None,
+        controlnet_scales=None,
+        warmup=False,
+        return_type="latent",
+    ):
+        assert len(prompt) == len(negative_prompt)
+        batch_size = len(prompt)
+
+        self.set_denoising_steps(denoising_steps)
+        self.set_random_seed(seed)
+
+        with torch.inference_mode(), torch.autocast("cuda"):
+            # Pre-initialize latents
+            latents = self.initialize_latents(
+                batch_size=batch_size,
+                unet_channels=4,
+                latent_height=(image_height // 8),
+                latent_width=(image_width // 8),
+            )
+
+            torch.cuda.synchronize()
+            e2e_tic = time.perf_counter()
+
+            # CLIP text encoder
+            do_classifier_free_guidance = guidance > 1.0
+            text_embeddings = self.encode_prompt(
+                prompt,
+                negative_prompt,
+                do_classifier_free_guidance=do_classifier_free_guidance,
+            )
+
+            add_kwargs = None
+            if self.pipeline_info.controlnet:
+                controlnet_images = self.preprocess_controlnet_images(
+                    latents.shape[0], controlnet_images, do_classifier_free_guidance=do_classifier_free_guidance
+                )
+                add_kwargs = {
+                    "controlnet_images": controlnet_images,
+                    "controlnet_scales": controlnet_scales.to(controlnet_images.dtype).to(controlnet_images.device),
+                }
+
+            # UNet denoiser
+            latents = self.denoise_latent(latents, text_embeddings, guidance=guidance, add_kwargs=add_kwargs)
+
+            # VAE decode latent
+            images = self.decode_latent(latents / self.vae_scaling_factor)
+
+            torch.cuda.synchronize()
+            e2e_toc = time.perf_counter()
+
+            perf_data = None
+            if not warmup:
+                perf_data = self.print_summary(e2e_tic, e2e_toc, batch_size)
+
+            return images, perf_data
+
+    def run(
+        self,
+        prompt,
+        negative_prompt,
+        image_height,
+        image_width,
+        denoising_steps=30,
+        guidance=7.5,
+        seed=None,
+        controlnet_images=None,
+        controlnet_scales=None,
+        warmup=False,
+        return_type="image",
+    ):
+        """
+        Run the diffusion pipeline.
+
+        Args:
+            prompt (str):
+                The text prompt to guide image generation.
+            negative_prompt (str):
+                The prompt not to guide the image generation.
+            image_height (int):
+                Height (in pixels) of the image to be generated. Must be a multiple of 8.
+            image_width (int):
+                Width (in pixels) of the image to be generated. Must be a multiple of 8.
+            denoising_steps (int):
+                Number of denoising steps. More steps usually lead to higher quality image at the expense of slower inference.
+            guidance (float):
+                Higher guidance scale encourages to generate images that are closely linked to the text prompt.
+            seed (int):
+                Seed for the random generator
+            warmup (bool):
+                Indicate if this is a warmup run.
+            return_type (str):
+                type of return. The value can be "latent" or "image".
+        """
+        if self.is_backend_tensorrt():
+            import tensorrt as trt
+            from trt_utilities import TRT_LOGGER
+
+            with trt.Runtime(TRT_LOGGER):
+                return self._infer(
+                    prompt,
+                    negative_prompt,
+                    image_height,
+                    image_width,
+                    denoising_steps=denoising_steps,
+                    guidance=guidance,
+                    seed=seed,
+                    controlnet_images=controlnet_images,
+                    controlnet_scales=controlnet_scales,
+                    warmup=warmup,
+                    return_type=return_type,
+                )
+        else:
+            return self._infer(
+                prompt,
+                negative_prompt,
+                image_height,
+                image_width,
+                denoising_steps=denoising_steps,
+                guidance=guidance,
+                seed=seed,
+                controlnet_images=controlnet_images,
+                controlnet_scales=controlnet_scales,
+                warmup=warmup,
+                return_type=return_type,
+            )
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_txt2img_xl.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_txt2img_xl.py
new file mode 100644
index 0000000000000..d3387ab6db1bd
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_txt2img_xl.py
@@ -0,0 +1,231 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+# Modified from TensorRT demo diffusion, which has the following license:
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# --------------------------------------------------------------------------
+
+import time
+
+import torch
+from diffusion_models import PipelineInfo
+from pipeline_stable_diffusion import StableDiffusionPipeline
+
+
+class Txt2ImgXLPipeline(StableDiffusionPipeline):
+    """
+    Stable Diffusion Txt2Img XL pipeline.
+    """
+
+    def __init__(self, pipeline_info: PipelineInfo, *args, **kwargs):
+        """
+        Initializes the Txt2Img XL Diffusion pipeline.
+
+        Args:
+            pipeline_info (PipelineInfo):
+                Version and Type of stable diffusion pipeline.
+        """
+        assert pipeline_info.is_xl_base()
+
+        super().__init__(pipeline_info, *args, **kwargs)
+
+    def _get_add_time_ids(self, original_size, crops_coords_top_left, target_size, dtype):
+        add_time_ids = list(original_size + crops_coords_top_left + target_size)
+        add_time_ids = torch.tensor([add_time_ids], dtype=dtype)
+        return add_time_ids
+
+    def _infer(
+        self,
+        prompt,
+        negative_prompt,
+        image_height,
+        image_width,
+        denoising_steps=30,
+        guidance=5.0,
+        seed=None,
+        controlnet_images=None,
+        controlnet_scales=None,
+        warmup=False,
+        return_type="image",
+    ):
+        assert len(prompt) == len(negative_prompt)
+        do_classifier_free_guidance = guidance > 1.0
+        original_size = (image_height, image_width)
+        crops_coords_top_left = (0, 0)
+        target_size = (image_height, image_width)
+        batch_size = len(prompt)
+
+        self.set_denoising_steps(denoising_steps)
+        self.set_random_seed(seed)
+
+        with torch.inference_mode(), torch.autocast("cuda"):
+            # Pre-initialize latents
+            latents = self.initialize_latents(
+                batch_size=batch_size,
+                unet_channels=4,
+                latent_height=(image_height // 8),
+                latent_width=(image_width // 8),
+            )
+
+            torch.cuda.synchronize()
+            e2e_tic = time.perf_counter()
+
+            # CLIP text encoder
+            text_embeddings = self.encode_prompt(
+                prompt,
+                negative_prompt,
+                encoder="clip",
+                tokenizer=self.tokenizer,
+                output_hidden_states=True,
+                force_zeros_for_empty_prompt=True,
+                do_classifier_free_guidance=do_classifier_free_guidance,
+            )
+            # CLIP text encoder 2
+            text_embeddings2, pooled_embeddings2 = self.encode_prompt(
+                prompt,
+                negative_prompt,
+                encoder="clip2",
+                tokenizer=self.tokenizer2,
+                pooled_outputs=True,
+                output_hidden_states=True,
+                force_zeros_for_empty_prompt=True,
+                do_classifier_free_guidance=do_classifier_free_guidance,
+            )
+
+            # Merged text embeddings
+            text_embeddings = torch.cat([text_embeddings, text_embeddings2], dim=-1)
+
+            # Time embeddings
+            add_time_ids = self._get_add_time_ids(
+                original_size, crops_coords_top_left, target_size, dtype=text_embeddings.dtype
+            )
+            add_time_ids = add_time_ids.repeat(batch_size, 1)
+            if do_classifier_free_guidance:
+                add_time_ids = torch.cat([add_time_ids, add_time_ids], dim=0)
+
+            add_kwargs = {"text_embeds": pooled_embeddings2, "time_ids": add_time_ids.to(self.device)}
+            if self.pipeline_info.controlnet:
+                controlnet_images = self.preprocess_controlnet_images(
+                    latents.shape[0],
+                    controlnet_images,
+                    do_classifier_free_guidance=do_classifier_free_guidance,
+                    height=image_height,
+                    width=image_width,
+                )
+                add_kwargs.update(
+                    {
+                        "controlnet_images": controlnet_images,
+                        "controlnet_scales": controlnet_scales.to(controlnet_images.dtype).to(controlnet_images.device),
+                    }
+                )
+
+            # UNet denoiser
+            latents = self.denoise_latent(
+                latents,
+                text_embeddings,
+                denoiser="unetxl",
+                guidance=guidance,
+                add_kwargs=add_kwargs,
+            )
+
+            # VAE decode latent
+            if return_type == "latent":
+                images = latents
+            else:
+                images = self.decode_latent(latents / self.vae_scaling_factor)
+
+            torch.cuda.synchronize()
+            e2e_toc = time.perf_counter()
+
+            perf_data = None
+            if not warmup:
+                print("SD-XL Base Pipeline")
+                perf_data = self.print_summary(e2e_tic, e2e_toc, batch_size)
+
+            return images, perf_data
+
+    def run(
+        self,
+        prompt,
+        negative_prompt,
+        image_height,
+        image_width,
+        denoising_steps=30,
+        guidance=5.0,
+        seed=None,
+        controlnet_images=None,
+        controlnet_scales=None,
+        warmup=False,
+        return_type="image",
+    ):
+        """
+        Run the diffusion pipeline.
+
+        Args:
+            prompt (str):
+                The text prompt to guide image generation.
+            negative_prompt (str):
+                The prompt not to guide the image generation.
+            image_height (int):
+                Height (in pixels) of the image to be generated. Must be a multiple of 8.
+            image_width (int):
+                Width (in pixels) of the image to be generated. Must be a multiple of 8.
+            denoising_steps (int):
+                Number of denoising steps. More steps usually lead to higher quality image at the expense of slower inference.
+            guidance (float):
+                Higher guidance scale encourages to generate images that are closely linked to the text prompt.
+            seed (int):
+                Seed for the random generator
+            warmup (bool):
+                Indicate if this is a warmup run.
+            return_type (str):
+                It can be "latent" or "image".
+        """
+
+        if self.is_backend_tensorrt():
+            import tensorrt as trt
+            from trt_utilities import TRT_LOGGER
+
+            with trt.Runtime(TRT_LOGGER):
+                return self._infer(
+                    prompt,
+                    negative_prompt,
+                    image_height,
+                    image_width,
+                    denoising_steps=denoising_steps,
+                    guidance=guidance,
+                    seed=seed,
+                    controlnet_images=controlnet_images,
+                    controlnet_scales=controlnet_scales,
+                    warmup=warmup,
+                    return_type=return_type,
+                )
+        else:
+            return self._infer(
+                prompt,
+                negative_prompt,
+                image_height,
+                image_width,
+                denoising_steps=denoising_steps,
+                guidance=guidance,
+                seed=seed,
+                controlnet_images=controlnet_images,
+                controlnet_scales=controlnet_scales,
+                warmup=warmup,
+                return_type=return_type,
+            )
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements-cuda.txt b/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements-cuda.txt
deleted file mode 100644
index b942749f8dcd2..0000000000000
--- a/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements-cuda.txt
+++ /dev/null
@@ -1,8 +0,0 @@
--r requirements.txt
-onnxruntime-gpu>=1.14
-py3nvml>=0.2.7
-# cuda-python is needed for cuda graph. It shall be compatible with CUDA version of torch and onnxruntime-gpu.
-cuda-python==11.7.0
-#To export onnx of stable diffusion, please install PyTorch 1.13.1+cu117
-#--extra-index-url https://download.pytorch.org/whl/cu117
-#torch==1.13.1+cu117
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements-cuda11.txt b/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements-cuda11.txt
new file mode 100644
index 0000000000000..447cb54f98ed2
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements-cuda11.txt
@@ -0,0 +1,22 @@
+-r requirements.txt
+
+# Official onnxruntime-gpu 1.16.1 is built with CUDA 11.8.
+onnxruntime-gpu>=1.16.2
+
+py3nvml
+
+# The version of cuda-python shall be compatible with installed CUDA version.
+# For example, if your CUDA version is 12.1, you can install cuda-python 12.1.
+cuda-python==11.8.0
+
+# For windows, cuda-python need the following
+pywin32; platform_system == "Windows"
+
+# For windows, run `conda install -c conda-forge nvtx` instead
+nvtx; platform_system != "Windows"
+
+# Please install PyTorch 2.1 or above for CUDA 11.8 using one of the following commands:
+# pip3 install torch --index-url https://download.pytorch.org/whl/cu118
+
+# Run the following command to install some extra packages for onnx graph optimization for TensorRT manually.
+# pip3 install --upgrade polygraphy onnx-graphsurgeon --extra-index-url https://pypi.ngc.nvidia.com
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements-cuda12.txt b/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements-cuda12.txt
new file mode 100644
index 0000000000000..1ff0e3c1cf5af
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements-cuda12.txt
@@ -0,0 +1,22 @@
+-r requirements.txt
+
+# For CUDA 12.*, you will need build onnxruntime-gpu from source and install the wheel. See README.md for detail.
+# onnxruntime-gpu>=1.16.2
+
+py3nvml
+
+# The version of cuda-python shall be compatible with installed CUDA version.
+# For example, if your CUDA version is 12.1, you can install cuda-python 12.1.
+cuda-python>=12.1.0
+
+# For windows, cuda-python need the following
+pywin32; platform_system == "Windows"
+
+# For windows, run `conda install -c conda-forge nvtx` instead
+nvtx; platform_system != "Windows"
+
+# Please install PyTorch 2.1 or above for 12.1 using one of the following commands:
+# pip3 install torch --index-url https://download.pytorch.org/whl/cu121
+
+# Run the following command to install some extra packages for onnx graph optimization for TensorRT manually.
+# pip3 install --upgrade polygraphy onnx-graphsurgeon --extra-index-url https://pypi.ngc.nvidia.com
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements-tensorrt.txt b/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements-tensorrt.txt
deleted file mode 100644
index 567f39c0119e6..0000000000000
--- a/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements-tensorrt.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-diffusers>=0.16.0
-transformers>=4.26.0
-numpy>=1.24.1
-accelerate
-onnx>=1.13.0
-coloredlogs
-packaging
-protobuf
-psutil
-sympy
-tensorrt>=8.6.1
-onnxruntime-gpu>=1.15.1
-py3nvml
-# cuda-python version shall be compatible with CUDA version of torch and onnxruntime-gpu
-cuda-python==11.7.0
-#To export onnx of stable diffusion, please install PyTorch 1.13.1+cu117
-#--extra-index-url https://download.pytorch.org/whl/cu117
-#torch==1.13.1+cu117
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements.txt b/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements.txt
index d4e6c9fa07695..a04f05f4b23d8 100644
--- a/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements.txt
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements.txt
@@ -1,14 +1,18 @@
-diffusers>=0.19.3
-transformers>=4.31.0
+diffusers==0.23.1
+transformers==4.35.1
 numpy>=1.24.1
 accelerate
-onnx>=1.13.0
+onnx==1.14.1
 coloredlogs
 packaging
+# Use newer version of protobuf might cause crash
 protobuf==3.20.3
 psutil
 sympy
+controlnet_aux
 # The following are for SDXL
-optimum>=1.11.1
+optimum==1.13.1
 safetensors
 invisible_watermark
+# newer version of opencv-python migth encounter module 'cv2.dnn' has no attribute 'DictValue' error
+opencv-python==4.8.0.74
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/trt_utilities.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/trt_utilities.py
new file mode 100644
index 0000000000000..d03a9f9f55372
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/trt_utilities.py
@@ -0,0 +1,12 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+import tensorrt as trt
+
+TRT_LOGGER = trt.Logger(trt.Logger.ERROR)
+
+
+def init_trt_plugins():
+    # Register TensorRT plugins
+    trt.init_libnvinfer_plugins(TRT_LOGGER, "")
diff --git a/onnxruntime/python/tools/transformers/models/whisper/README.md b/onnxruntime/python/tools/transformers/models/whisper/README.md
index e9365becd2cd1..8ff5c8a6e1de0 100644
--- a/onnxruntime/python/tools/transformers/models/whisper/README.md
+++ b/onnxruntime/python/tools/transformers/models/whisper/README.md
@@ -79,24 +79,22 @@ $ python3 -m onnxruntime.transformers.models.whisper.convert_to_onnx -m openai/w
 
 Here are some examples of how you can benchmark Whisper across various end-to-end (E2E) implementations.
 
-Note: In the below examples, `PyTorch` refers to running in PyTorch without `torch.compile` and `PyTorch 2.0` refers to running in PyTorch with `torch.compile`.
-
 ### Variants
 
-1. PyTorch (without `torch.compile`), FP32
+1. PyTorch without `torch.compile`, FP32
 ```
 python3 -m models.whisper.benchmark \
-    --benchmark-type hf-pt \
+    --benchmark-type hf-pt-eager \
     --audio-path 1272-141231-0002.mp3 \
     --model-name openai/whisper-large-v2 \
     --precision fp32 \
     --device cpu
 ```
 
-2. PyTorch 2.0 (with `torch.compile`), FP16
+2. PyTorch with `torch.compile`, FP16
 ```
 python3 -m models.whisper.benchmark \
-    --benchmark-type hf-pt2 \
+    --benchmark-type hf-pt-compile \
     --audio-path 1272-141231-0002.mp3 \
     --model-name openai/whisper-large-v2 \
     --precision fp16 \
@@ -109,7 +107,7 @@ python3 -m models.whisper.benchmark \
     --benchmark-type hf-ort \
     --audio-path 1272-141231-0002.mp3 \
     --model-name openai/whisper-large-v2 \
-    --hf-ort-model-path ./whisper-large-v2-onnx/ \
+    --hf-ort-dir-path ./whisper-large-v2-onnx/ \
     --precision fp32 \
     --device cpu
 ```
@@ -156,7 +154,9 @@ You can use `benchmark_all.py` to benchmark across various platforms and automat
 ```
 python3 -m models.whisper.benchmark_all \
     --audio-path ./whisper-test-audios/ \
-    --hf-ort-model-path ./whisper-large-v2-onnx/ \
+    --hf-pt-eager \
+    --hf-pt-compile \
+    --hf-ort-dir-path ./whisper-large-v2-onnx/ \
     --ort-model-path ./wlarge-fp32/whisper-large-v2_all.onnx \
     --model-name openai/whisper-large-v2 \
     --precision fp32 \
@@ -169,28 +169,28 @@ Here is a benchmark for an MP3 file with 20.7s of audio.
 
 #### FP16
 
-| Engine        | Size     | Per-Token Latency | Real-Time Factor |
-| ------------- | -------- | ----------------- | ---------------- |
-| PyTorch       | Tiny     | 4.697 ms/token    | 0.004697         |
-| PyTorch 2.0   | Tiny     | 3.406 ms/token    | 0.003406         |
-| ONNX Runtime  | Tiny     | 0.746 ms/token    | 0.000746         |
-| PyTorch       | Medium   | 17.837 ms/token   | 0.017387         |
-| PyTorch 2.0   | Medium   | 18.124 ms/token   | 0.018124         |
-| ONNX Runtime  | Medium   | 3.894 ms/token    | 0.003894         |
-| PyTorch       | Large v2 | 23.470 ms/token   | 0.023470         |
-| PyTorch 2.0   | Large v2 | 23.146 ms/token   | 0.023146         |
-| ONNX Runtime  | Large v2 | 6.262 ms/token    | 0.006262         |
+| Engine          | Size     | Per-Token Latency | Real-Time Factor |
+| --------------- | -------- | ----------------- | ---------------- |
+| PyTorch eager   | Tiny     | 4.697 ms/token    | 0.004697         |
+| PyTorch compile | Tiny     | 3.406 ms/token    | 0.003406         |
+| ONNX Runtime    | Tiny     | 0.746 ms/token    | 0.000746         |
+| PyTorch eager   | Medium   | 17.837 ms/token   | 0.017387         |
+| PyTorch compile | Medium   | 18.124 ms/token   | 0.018124         |
+| ONNX Runtime    | Medium   | 3.894 ms/token    | 0.003894         |
+| PyTorch eager   | Large v2 | 23.470 ms/token   | 0.023470         |
+| PyTorch compile | Large v2 | 23.146 ms/token   | 0.023146         |
+| ONNX Runtime    | Large v2 | 6.262 ms/token    | 0.006262         |
 
 #### FP32
 
-| Engine        | Size     | Per-Token Latency | Real-Time Factor |
-| ------------- | -------- | ----------------- | ---------------- |
-| PyTorch       | Tiny     | 6.220 ms/token    | 0.006220         |
-| PyTorch 2.0   | Tiny     | 3.944 ms/token    | 0.003944         |
-| ONNX Runtime  | Tiny     | 1.545 ms/token    | 0.001545         |
-| PyTorch       | Medium   | 19.093 ms/token   | 0.019093         |
-| PyTorch 2.0   | Medium   | 20.459 ms/token   | 0.020459         |
-| ONNX Runtime  | Medium   | 9.440 ms/token    | 0.009440         |
-| PyTorch       | Large v2 | 25.844 ms/token   | 0.025844         |
-| PyTorch 2.0   | Large v2 | 26.397 ms/token   | 0.026397         |
-| ONNX Runtime  | Large v2 | 7.492 ms/token    | 0.007492         |
+| Engine          | Size     | Per-Token Latency | Real-Time Factor |
+| --------------- | -------- | ----------------- | ---------------- |
+| PyTorch eager   | Tiny     | 6.220 ms/token    | 0.006220         |
+| PyTorch compile | Tiny     | 3.944 ms/token    | 0.003944         |
+| ONNX Runtime    | Tiny     | 1.545 ms/token    | 0.001545         |
+| PyTorch eager   | Medium   | 19.093 ms/token   | 0.019093         |
+| PyTorch compile | Medium   | 20.459 ms/token   | 0.020459         |
+| ONNX Runtime    | Medium   | 9.440 ms/token    | 0.009440         |
+| PyTorch eager   | Large v2 | 25.844 ms/token   | 0.025844         |
+| PyTorch compile | Large v2 | 26.397 ms/token   | 0.026397         |
+| ONNX Runtime    | Large v2 | 7.492 ms/token    | 0.007492         |
diff --git a/onnxruntime/python/tools/transformers/models/whisper/benchmark.py b/onnxruntime/python/tools/transformers/models/whisper/benchmark.py
index 283528bea7465..759ae6d14f184 100644
--- a/onnxruntime/python/tools/transformers/models/whisper/benchmark.py
+++ b/onnxruntime/python/tools/transformers/models/whisper/benchmark.py
@@ -24,7 +24,7 @@
 
 
 def get_inputs(args: argparse.Namespace):
-    if args.benchmark_type not in {"hf-pt", "hf-pt2", "hf-ort", "ort"}:
+    if args.benchmark_type not in {"hf-pt-eager", "hf-pt-compile", "hf-ort", "ort"}:
         raise Exception("Unable to auto-detect inputs for provided model")
 
     def load_via_ffmpeg():
@@ -102,7 +102,7 @@ def get_model(args: argparse.Namespace):
     # 2) Benchmark Whisper ONNX model from Optimum export (without pre/post processing)
     # 3) Benchmark Whisper ONNX E2E model from Olive (with pre/post processing)
 
-    if args.benchmark_type in {"hf-pt", "hf-pt2"}:
+    if args.benchmark_type in {"hf-pt-eager", "hf-pt-compile"}:
         source = args.hf_pt_model_path if args.hf_pt_model_path else args.model_name
         start_time = time.time()
         model = AutoModelForSpeechSeq2Seq.from_pretrained(
@@ -112,7 +112,7 @@ def get_model(args: argparse.Namespace):
         ).to(args.target_device)
         end_time = time.time()
 
-        if args.benchmark_type == "hf-pt2":
+        if args.benchmark_type == "hf-pt-compile":
             model = torch.compile(model)
 
     elif args.benchmark_type in {"hf-ort", "ort"}:
@@ -136,7 +136,7 @@ def get_model(args: argparse.Namespace):
 
         start_time = time.time()
         model = ORTModelForSpeechSeq2Seq.from_pretrained(
-            args.hf_ort_model_path,
+            args.hf_ort_dir_path,
             use_io_binding=(args.device != "cpu"),
             provider=provider,
             provider_options=provider_options,
@@ -214,7 +214,7 @@ def profile_fn(args, fn, inputs, inputs_type):
     prefix = f"{args.benchmark_type.lower()}-{args.precision}-{args.device}_{fn.__name__.replace('_', '-')}_{inputs_type}_{datetime.datetime.now():%Y-%m-%d_%H:%M:%S}"
     filename = None
 
-    if args.benchmark_type in {"hf-pt", "hf-pt2"}:
+    if args.benchmark_type in {"hf-pt-eager", "hf-pt-compile"}:
         # Profile PyTorch kernels
         with profile(  # noqa: SIM117
             activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True, profile_memory=True
@@ -280,7 +280,7 @@ def gen_and_dec(inputs):
 
     generate_fn = gen_and_dec
 
-    if args.benchmark_type == "hf-pt2":
+    if args.benchmark_type == "hf-pt-compile":
         # Run forward pass once with each set of inputs to process through Dynamo
         generate_fn(inputs)
 
@@ -345,7 +345,7 @@ def prepare_ort_inputs(inputs, warmup=False):
             for k, v in inputs.items():
                 io_binding.bind_cpu_input(k, v)
             for output in model.get_outputs():
-                io_binding.bind_output(output.name)
+                io_binding.bind_output(output.name, device_type=args.device, device_id=args.device_id)
             return io_binding
 
         return inputs
@@ -407,7 +407,7 @@ def handle_output(output):
 
 
 def run_inference(args, inputs, model):
-    if args.benchmark_type in {"hf-pt", "hf-pt2", "hf-ort"}:
+    if args.benchmark_type in {"hf-pt-eager", "hf-pt-compile", "hf-ort"}:
         run_hf_inference(args, inputs, model)
     elif args.benchmark_type == "ort":
         run_ort_inference(args, inputs, model)
@@ -419,8 +419,13 @@ def parse_args():
     parser = argparse.ArgumentParser()
 
     parser.add_argument(
-        "-bt", "--benchmark-type", type=str, required=True, choices=["hf-pt", "hf-pt2", "hf-ort", "ort"]
+        "-bt",
+        "--benchmark-type",
+        type=str,
+        required=True,
+        choices=["hf-pt-eager", "hf-pt-compile", "hf-ort", "ort"],
     )
+
     parser.add_argument(
         "-m",
         "--model-name",
@@ -445,7 +450,7 @@ def parse_args():
         help="Path to directory containing all PyTorch files (e.g. tokenizer, PyTorch model)",
     )
     parser.add_argument(
-        "--hf-ort-model-path",
+        "--hf-ort-dir-path",
         type=str,
         default="",
         help="Path to directory containing all ONNX files (e.g. tokenizer, encoder, decoder, decoder_with_past)",
@@ -538,7 +543,7 @@ def parse_args():
 
     # Check that model paths have been specified for any benchmarking with ORT
     if args.benchmark_type == "hf-ort":
-        assert args.hf_ort_model_path, "Please specify a path to `--hf-ort-model-path`"
+        assert args.hf_ort_dir_path, "Please specify a path to `--hf-ort-dir-path`"
     if args.benchmark_type == "ort":
         assert args.ort_model_path, "Please specify a path to `--ort-model-path`"
 
diff --git a/onnxruntime/python/tools/transformers/models/whisper/benchmark_all.py b/onnxruntime/python/tools/transformers/models/whisper/benchmark_all.py
index 08d7befec3cfd..071b539ac1899 100644
--- a/onnxruntime/python/tools/transformers/models/whisper/benchmark_all.py
+++ b/onnxruntime/python/tools/transformers/models/whisper/benchmark_all.py
@@ -54,7 +54,21 @@ def get_args():
     )
 
     parser.add_argument(
-        "--hf-ort-model-path",
+        "--hf-pt-eager",
+        default=False,
+        action="store_true",
+        help="Benchmark in PyTorch without `torch.compile`",
+    )
+
+    parser.add_argument(
+        "--hf-pt-compile",
+        default=False,
+        action="store_true",
+        help="Benchmark in PyTorch with `torch.compile`",
+    )
+
+    parser.add_argument(
+        "--hf-ort-dir-path",
         type=str,
         help="Path to folder containing ONNX models for Optimum + ORT benchmarking",
     )
@@ -136,7 +150,7 @@ def process_log_file(device_id, log_file, base_results):
 
     load_audio_latency_s, load_audio_throughput_s = None, None
     feat_ext_latency_s, feat_ext_throughput_s = None, None
-    latency_s, per_token_latency_s, per_token_latency_ms = None, None, None
+    token_length, latency_s, per_token_latency_s, per_token_latency_ms = None, None, None, None
     throughput, memory = None, None
 
     # Detect metrics
@@ -310,73 +324,75 @@ def main():
         logger.info(f"Testing {audio_path}...")
 
         # Benchmark PyTorch without torch.compile
-        benchmark_cmd = [  # noqa: RUF005
-            "python3",
-            "-m",
-            "models.whisper.benchmark",
-            "--audio-path",
-            audio_path,
-            "--benchmark-type",
-            "hf-pt",
-            "--model-name",
-            args.model_name,
-            "--precision",
-            args.precision,
-            "--device",
-            args.device,
-            "--device-id",
-            str(args.device_id),
-            "--warmup-runs",
-            str(args.warmup_runs),
-            "--num-runs",
-            str(args.num_runs),
-            "--log-folder",
-            args.log_folder,
-        ] + hf_decoder_input_ids_cmd
-        logger.info("Benchmark PyTorch without torch.compile")
-        results = benchmark(args, benchmark_cmd, "pytorch", audio_file, duration)
-        all_results.extend(results)
+        if args.hf_pt_eager:
+            benchmark_cmd = [  # noqa: RUF005
+                "python",
+                "-m",
+                "models.whisper.benchmark",
+                "--audio-path",
+                audio_path,
+                "--benchmark-type",
+                "hf-pt-eager",
+                "--model-name",
+                args.model_name,
+                "--precision",
+                args.precision,
+                "--device",
+                args.device,
+                "--device-id",
+                str(args.device_id),
+                "--warmup-runs",
+                str(args.warmup_runs),
+                "--num-runs",
+                str(args.num_runs),
+                "--log-folder",
+                args.log_folder,
+            ] + hf_decoder_input_ids_cmd
+            logger.info("Benchmark PyTorch without torch.compile")
+            results = benchmark(args, benchmark_cmd, "pytorch-eager", audio_file, duration)
+            all_results.extend(results)
 
         # Benchmark PyTorch with torch.compile
-        benchmark_cmd = [  # noqa: RUF005
-            "python3",
-            "-m",
-            "models.whisper.benchmark",
-            "--audio-path",
-            audio_path,
-            "--benchmark-type",
-            "hf-pt2",
-            "--model-name",
-            args.model_name,
-            "--precision",
-            args.precision,
-            "--device",
-            args.device,
-            "--device-id",
-            str(args.device_id),
-            "--warmup-runs",
-            str(args.warmup_runs),
-            "--num-runs",
-            str(args.num_runs),
-            "--log-folder",
-            args.log_folder,
-        ] + hf_decoder_input_ids_cmd
-        logger.info("Benchmark PyTorch with torch.compile")
-        results = benchmark(args, benchmark_cmd, "pytorch-2", audio_file, duration)
-        all_results.extend(results)
+        if args.hf_pt_compile:
+            benchmark_cmd = [  # noqa: RUF005
+                "python",
+                "-m",
+                "models.whisper.benchmark",
+                "--audio-path",
+                audio_path,
+                "--benchmark-type",
+                "hf-pt-compile",
+                "--model-name",
+                args.model_name,
+                "--precision",
+                args.precision,
+                "--device",
+                args.device,
+                "--device-id",
+                str(args.device_id),
+                "--warmup-runs",
+                str(args.warmup_runs),
+                "--num-runs",
+                str(args.num_runs),
+                "--log-folder",
+                args.log_folder,
+            ] + hf_decoder_input_ids_cmd
+            logger.info("Benchmark PyTorch with torch.compile")
+            results = benchmark(args, benchmark_cmd, "pytorch-compile", audio_file, duration)
+            all_results.extend(results)
 
         # Benchmark Optimum + ONNX Runtime
-        if args.hf_ort_model_path:
+        if args.hf_ort_dir_path:
             benchmark_cmd = [  # noqa: RUF005
-                "python3",
+                "python",
                 "-m",
                 "models.whisper.benchmark",
                 "--audio-path",
                 audio_path,
                 "--benchmark-type",
                 "hf-ort",
-                "--hf-ort-model-path",
-                args.hf_ort_model_path,
+                "--hf-ort-dir-path",
+                args.hf_ort_dir_path,
                 "--model-name",
                 args.model_name,
                 "--precision",
@@ -393,14 +409,14 @@ def main():
                 args.log_folder,
             ] + hf_decoder_input_ids_cmd
             logger.info("Benchmark Optimum + ONNX Runtime")
-            results = benchmark(args, benchmark_cmd, "pytorch-ort", audio_file, duration)
+            results = benchmark(args, benchmark_cmd, "optimum-ort", audio_file, duration)
             all_results.extend(results)
 
         # Benchmark ONNX Runtime
         if args.ort_model_path:
             benchmark_cmd = (
                 [  # noqa: RUF005
-                    "python3",
+                    "python",
                     "-m",
                     "models.whisper.benchmark",
                     "--audio-path",
diff --git a/onnxruntime/python/tools/transformers/models/whisper/convert_to_onnx.py b/onnxruntime/python/tools/transformers/models/whisper/convert_to_onnx.py
index 3562df1660ea9..50637b772c233 100644
--- a/onnxruntime/python/tools/transformers/models/whisper/convert_to_onnx.py
+++ b/onnxruntime/python/tools/transformers/models/whisper/convert_to_onnx.py
@@ -169,6 +169,79 @@ def parse_arguments(argv=None):
     )
     parser.set_defaults(chain_model=True)
 
+    parser.add_argument(
+        "--use_whisper_beamsearch",
+        required=False,
+        action="store_true",
+        help="When chain_model, using WhisperBeamSearch operator rather than BeamSearch operator. \
+              It will be set to true when collect_cross_qk, extra_decoding_ids or output_no_speech_probs is set.",
+    )
+    parser.set_defaults(use_whisper_beamsearch=False)
+
+    parser.add_argument(
+        "--extra_decoding_ids",
+        required=False,
+        action="store_true",
+        help="Need extra starting decoding ids for some feature like cross qk. Default if false.",
+    )
+    parser.set_defaults(extra_decoding_ids=False)
+
+    parser.add_argument(
+        "--collect_cross_qk",
+        required=False,
+        action="store_true",
+        help="Beam search model collect stacked cross QK.",
+    )
+    parser.set_defaults(collect_cross_qk=False)
+
+    parser.add_argument(
+        "--output_cross_qk",
+        required=False,
+        action="store_true",
+        help="Beam search model output collected qk as output. Also hint collect_cross_qk",
+    )
+    parser.set_defaults(output_cross_qk=False)
+
+    parser.add_argument(
+        "--no_speech_token_id",
+        default=50362,
+        type=int,
+        help="specify no_speech_token_id. Default is 50362. if >= 0, will be add into beam search attr. \
+              Note that default value maybe different between the multilingual and English-only models.",
+    )
+
+    parser.add_argument(
+        "--output_no_speech_probs",
+        required=False,
+        action="store_true",
+        help="Beam search model output no speech probs which is computed from the encoder/context-decoder graph.",
+    )
+    parser.set_defaults(output_no_speech_probs=False)
+
+    parser.add_argument(
+        "--output_scores",
+        required=False,
+        action="store_true",
+        help="Beam search model output scores over vocab per generated token.",
+    )
+    parser.set_defaults(output_scores=False)
+
+    parser.add_argument(
+        "--output_sequence_scores",
+        required=False,
+        action="store_true",
+        help="Beam search model output scores for each generated sequence.",
+    )
+    parser.set_defaults(output_sequence_scores=False)
+
+    parser.add_argument(
+        "--cross_qk_onnx_model",
+        required=False,
+        type=str,
+        default=None,
+        help="the model which consume cross_qk.",
+    )
+
     parser.add_argument(
         "--beam_output_model",
         type=str,
@@ -220,6 +293,7 @@ def parse_arguments(argv=None):
     )
 
     args = parser.parse_args(argv)
+    args.collect_cross_qk = args.collect_cross_qk or args.output_cross_qk
 
     return args
 
diff --git a/onnxruntime/python/tools/transformers/models/whisper/whisper_chain.py b/onnxruntime/python/tools/transformers/models/whisper/whisper_chain.py
index 3b1e656136547..33958e55f8c38 100644
--- a/onnxruntime/python/tools/transformers/models/whisper/whisper_chain.py
+++ b/onnxruntime/python/tools/transformers/models/whisper/whisper_chain.py
@@ -3,7 +3,11 @@
 
 import onnx
 from benchmark_helper import Precision
-from convert_generation import get_shared_initializers, update_decoder_subgraph_share_buffer_and_use_decoder_masked_mha
+from convert_generation import (
+    get_shared_initializers,
+    update_decoder_subgraph_output_cross_attention,
+    update_decoder_subgraph_share_buffer_and_use_decoder_masked_mha,
+)
 from onnx import TensorProto, helper
 from transformers import WhisperConfig
 
@@ -20,7 +24,10 @@ def verify_inputs(beam_inputs, graph_inputs):
 
 
 def chain_model(args):
-    # Load encoder/decoder and insert necessary (but unused) graph inputs expected by BeamSearch op
+    # Load encoder/decoder and insert necessary (but unused) graph inputs expected by BeamSearch op or WhisperBeamSearch op
+    args.use_whisper_beamsearch = (
+        args.use_whisper_beamsearch or args.collect_cross_qk or args.output_no_speech_probs or args.extra_decoding_ids
+    )
     encoder_model = onnx.load_model(args.encoder_path, load_external_data=True)
     encoder_model.graph.name = "encoderdecoderinit subgraph"
 
@@ -43,7 +50,29 @@ def chain_model(args):
         "decoder_input_ids" if args.use_forced_decoder_ids else "",
         "logits_processor" if args.use_logits_processor else "",
     ]
+
     beam_outputs = ["sequences"]
+    if args.output_sequence_scores:
+        beam_outputs.append("sequence_scores")
+    if args.output_scores:
+        beam_outputs.append("scores")
+
+    if args.use_whisper_beamsearch:
+        assert len(beam_inputs) == 12
+        beam_inputs.extend(
+            [
+                "cross_qk_layer_head" if args.collect_cross_qk else "",
+                "extra_decoding_ids" if args.extra_decoding_ids else "",
+            ]
+        )
+        if args.collect_cross_qk:
+            while len(beam_outputs) < 3:
+                beam_outputs.extend([""])
+            beam_outputs.extend(["cross_qk"])
+        if args.output_no_speech_probs:
+            while len(beam_outputs) < 4:
+                beam_outputs.extend([""])
+            beam_outputs.extend(["no_speech_probs_beam"])
 
     input_features_cast_node, len_pen_cast_node, rep_pen_cast_node = None, None, None
     if args.precision == Precision.FLOAT16:
@@ -69,7 +98,8 @@ def chain_model(args):
             to=TensorProto.FLOAT16,
         )
 
-    node = helper.make_node("BeamSearch", inputs=beam_inputs, outputs=beam_outputs, name="BeamSearch_zcode")
+    operator_type = "WhisperBeamSearch" if args.use_whisper_beamsearch else "BeamSearch"
+    node = helper.make_node(operator_type, inputs=beam_inputs, outputs=beam_outputs, name="BeamSearch_zcode")
     node.domain = "com.microsoft"
     node.attribute.extend(
         [
@@ -81,6 +111,11 @@ def chain_model(args):
             helper.make_attribute("model_type", 2),
         ]
     )
+    if args.use_whisper_beamsearch:
+        if args.collect_cross_qk:
+            node.attribute.extend([helper.make_attribute("decoder_output_cross_qk", 1)])
+        if args.no_speech_token_id >= 0:
+            node.attribute.extend([helper.make_attribute("no_speech_token", args.no_speech_token_id)])
 
     input_features = helper.make_tensor_value_info(
         "input_features", TensorProto.FLOAT, ["batch_size", "feature_size", "sequence_length"]
@@ -121,17 +156,50 @@ def chain_model(args):
         logits_processor = helper.make_tensor_value_info("logits_processor", TensorProto.INT32, [1])
         graph_inputs.append(logits_processor)
 
+    if args.collect_cross_qk:
+        cross_qk_layer_head = helper.make_tensor_value_info(
+            "cross_qk_layer_head", TensorProto.INT32, ["num_layer_head", 2]
+        )
+        graph_inputs.append(cross_qk_layer_head)
+
+    if args.extra_decoding_ids:
+        extra_decoding_ids = helper.make_tensor_value_info(
+            "extra_decoding_ids", TensorProto.INT32, ["batch_size", "extra_decoding_ids_len"]
+        )
+        graph_inputs.append(extra_decoding_ids)
+
     # graph outputs
     sequences = helper.make_tensor_value_info(
         "sequences", TensorProto.INT32, ["batch_size", "num_return_sequences", "max_length"]
     )
     graph_outputs = [sequences]
+    if args.output_cross_qk or (not args.cross_qk_onnx_model and args.collect_cross_qk):
+        cross_qk = helper.make_tensor_value_info(
+            "cross_qk",
+            TensorProto.FLOAT,
+            ["batch_size", "num_return_sequences", "num_layer_head_cross_qk", "max_length", "frames"],
+        )
+        graph_outputs.extend([cross_qk])
+
+    if args.output_no_speech_probs:
+        no_speech_probs = helper.make_tensor_value_info("no_speech_probs", TensorProto.FLOAT, ["batch_size"])
+        graph_outputs.extend([no_speech_probs])
+
+    if args.output_sequence_scores:
+        sequence_scores = helper.make_tensor_value_info("sequence_scores", TensorProto.FLOAT, ["batch_size"])
+        graph_outputs.extend([sequence_scores])
+
+    if args.output_scores:
+        scores = helper.make_tensor_value_info("scores", TensorProto.FLOAT, ["batch_size"])
+        graph_outputs.extend([scores])
 
     if hasattr(args, "use_gpu") and args.use_gpu:
         if update_decoder_subgraph_share_buffer_and_use_decoder_masked_mha(decoder_model.graph):
             logger.info("Updated whisper decoder subgraph to use DecoderMaskedMultiHeadAttention successfully!")
         else:
             logger.warning("DecoderMaskedMultiHeadAttention could not be applied to whisper decoder subgraph")
+        if hasattr(args, "collect_cross_qk") and args.collect_cross_qk:
+            update_decoder_subgraph_output_cross_attention(decoder_model.graph)
 
     # Initializers/opsets
     # Delete shared data between decoder/encoder and move to larger graph initializers
@@ -150,7 +218,35 @@ def chain_model(args):
         if args.precision == Precision.FLOAT16
         else [node]
     )
+    if args.output_no_speech_probs:
+        prob_cast_node = helper.make_node(
+            "Cast",
+            inputs=["no_speech_probs_beam"],
+            outputs=["no_speech_probs"],
+            name="no_speech_probs_cast_to_fp32",
+            to=TensorProto.FLOAT,
+        )
+        graph_nodes.extend([prob_cast_node])
+
     beam_graph = helper.make_graph(graph_nodes, "beam-search-test", graph_inputs, graph_outputs, initializers)
+    beam_graph_input_names = [gi.name for gi in graph_inputs]
+    beam_graph_output_names = [go.name for go in graph_outputs]
+
+    if args.cross_qk_onnx_model:
+        post_qk_model = onnx.load_model(args.cross_qk_onnx_model, load_external_data=True)
+        post_qk_graph = post_qk_model.graph
+        beam_graph.initializer.extend(post_qk_graph.initializer)
+        beam_graph.node.extend(post_qk_graph.node)
+        # If tensor from cross_qk_onnx_model has same name as tensor in beamsearch graph, treat them as same tensor.
+        # User should notice this rule when provide cross_qk_onnx_model to append to the beamsearch node.
+        for pgi in post_qk_graph.input:
+            if (
+                (pgi.name not in beam_graph_input_names)
+                and (pgi.name not in beam_graph_output_names)
+                and (pgi.name != "cross_qk")
+            ):
+                beam_graph.input.extend([pgi])
+        beam_graph.output.extend(post_qk_graph.output)
 
     # Verify graph's inputs match beam search's inputs
     verify_inputs(beam_inputs, graph_inputs)
diff --git a/onnxruntime/python/tools/transformers/models/whisper/whisper_helper.py b/onnxruntime/python/tools/transformers/models/whisper/whisper_helper.py
index 3a81700a7fd04..8c22cd5e745b3 100644
--- a/onnxruntime/python/tools/transformers/models/whisper/whisper_helper.py
+++ b/onnxruntime/python/tools/transformers/models/whisper/whisper_helper.py
@@ -312,6 +312,7 @@ def verify_onnx(
             "tensor(uint8)": np.uint8,
         }
 
+        use_extra_decoding_ids = "extra_decoding_ids" in ort_names
         for name, dtype in zip(ort_names, ort_dtypes):
             if name == "input_features":
                 inputs[name] = inputs[name].detach().cpu().numpy()
@@ -320,9 +321,18 @@ def verify_onnx(
             elif name == "prefix_vocab_mask":
                 inputs[name] = np.ones((batch_size, config.vocab_size), dtype=ort_to_np[dtype])
             elif name == "decoder_input_ids":
-                inputs[name] = np.array([[config.decoder_start_token_id, 50259, 50359, 50363]], dtype=ort_to_np[dtype])
+                raw_input_ids = (
+                    [[config.decoder_start_token_id]]
+                    if use_extra_decoding_ids
+                    else [[config.decoder_start_token_id, 50259, 50359, 50363]]
+                )
+                inputs[name] = np.array(raw_input_ids, dtype=ort_to_np[dtype])
             elif name == "logits_processor":
                 inputs[name] = np.array([1], dtype=ort_to_np[dtype])
+            elif name == "cross_qk_layer_head":
+                inputs[name] = np.array([[0, 0]], dtype=ort_to_np[dtype])
+            elif name == "extra_decoding_ids":
+                inputs[name] = np.repeat(np.array([[50259, 50359, 50363]], dtype=ort_to_np[dtype]), batch_size, 0)
             else:
                 inputs[name] = np.array([inputs[name]], dtype=ort_to_np[dtype])
         ort_outputs = ort_session.run(None, inputs)[0][0]
diff --git a/onnxruntime/python/tools/transformers/onnx_model.py b/onnxruntime/python/tools/transformers/onnx_model.py
index 60be2d84b2bc8..7bdbc08cf733a 100644
--- a/onnxruntime/python/tools/transformers/onnx_model.py
+++ b/onnxruntime/python/tools/transformers/onnx_model.py
@@ -337,6 +337,18 @@ def match_parent_paths(self, node, paths, output_name_to_node):
                 return i, matched, return_indice
         return -1, None, None
 
+    def match_parent_paths_all(self, node, paths, output_name_to_node):
+        match_i, matches, return_indices = [], [], []
+        for i, path in enumerate(paths):
+            assert isinstance(path, (List, Tuple))
+            return_indice = []
+            matched = self.match_parent_path(node, path[0], path[1], output_name_to_node, return_indice)
+            if matched:
+                match_i.append(i)
+                matches.append(matched)
+                return_indices.append(return_indice)
+        return match_i, matches, return_indices
+
     def match_parent_path(
         self,
         node,
@@ -610,7 +622,7 @@ def convert_float_to_float16(self, use_symbolic_shape_infer=True, **kwargs):
 
            When symbolic shape inference is used (even if it failed), ONNX shape inference will be disabled.
 
-           Note that onnx shape inference will fail for model larger than 2GB. For large model, you have to eanble
+           Note that onnx shape inference will fail for model larger than 2GB. For large model, you have to enable
            symbolic shape inference. If your model is not optimized, you can also use model path to call
            convert_float_to_float16 in float16.py (see https://github.com/microsoft/onnxruntime/pull/15067) to
            avoid the 2GB limit.
@@ -663,7 +675,7 @@ def convert_float_to_float16(self, use_symbolic_shape_infer=True, **kwargs):
                         if vi.name in name_vi:
                             del name_vi[vi.name]
                     for vi in name_vi.values():
-                        model.graph.value_info.append(vi)  # noqa: PERF402
+                        model.graph.value_info.append(vi)
             except Exception:
                 logger.warning(
                     "Failed to run symbolic shape inference. Please file an issue in https://github.com/microsoft/onnxruntime."
@@ -832,7 +844,7 @@ def get_first_output(node):
         # Keep track of nodes to keep. The key is first output of node, and the value is the node.
         output_to_node = {}
 
-        # Start from graph outputs, and find parent nodes recurisvely, and add nodes to the output_to_node dictionary.
+        # Start from graph outputs, and find parent nodes recursively, and add nodes to the output_to_node dictionary.
         dq = deque()
         for output in keep_outputs:
             if output in output_name_to_node:
@@ -1114,7 +1126,9 @@ def get_operator_statistics(self, include_domain=False):
             op = (node.domain + ":" if include_domain and node.domain else "") + node.op_type
             op_count[op] = 1 if op not in op_count else (op_count[op] + 1)
 
-        logger.info(f"Operators:{op_count}")
+        # Sorted by count in the descending order, then by key in alphabetical order.
+        logger.info(f"Operators:{sorted(op_count.items(), key=lambda kv:(-kv[1], kv[0]))}")
+
         return op_count
 
     @staticmethod
@@ -1161,7 +1175,7 @@ def has_same_value(
             signature_cache1 (dict): Optional dictionary to store data signatures of tensor1 in order to speed up comparison.
             signature_cache2 (dict): Optional dictionary to store data signatures of tensor2 in order to speed up comparison.
         Returns:
-            bool: True when two intializers has same value.
+            bool: True when two initializers has same value.
         """
         sig1 = (
             signature_cache1[tensor1.name]
diff --git a/onnxruntime/python/tools/transformers/onnx_model_bert.py b/onnxruntime/python/tools/transformers/onnx_model_bert.py
index 995f8c6541b4c..882100a0d019e 100644
--- a/onnxruntime/python/tools/transformers/onnx_model_bert.py
+++ b/onnxruntime/python/tools/transformers/onnx_model_bert.py
@@ -22,7 +22,9 @@
 from fusion_qordered_layernorm import FusionQOrderedLayerNormalization
 from fusion_qordered_matmul import FusionQOrderedMatMul
 from fusion_reshape import FusionReshape
+from fusion_rotary_attention import FusionRotaryEmbeddings
 from fusion_shape import FusionShape
+from fusion_simplified_layernorm import FusionSimplifiedLayerNormalization, FusionSkipSimplifiedLayerNormalization
 from fusion_skiplayernorm import FusionBiasSkipLayerNormalization, FusionSkipLayerNormalization
 from fusion_utils import FusionUtils
 from onnx import GraphProto, ModelProto, TensorProto, ValueInfoProto, helper
@@ -106,10 +108,36 @@ def fuse_layer_norm(self):
         fusion = FusionQOrderedLayerNormalization(self)
         fusion.apply()
 
+    def fuse_simplified_layer_norm(self):
+        fusion = FusionSimplifiedLayerNormalization(self)
+        fusion.apply()
+
     def fuse_skip_layer_norm(self):
         fusion = FusionSkipLayerNormalization(self)
         fusion.apply()
 
+    def fuse_skip_simplified_layer_norm(self):
+        fusion = FusionSkipSimplifiedLayerNormalization(self)
+        fusion.apply()
+
+    def fuse_rotary_embeddings(self):
+        fusion = FusionRotaryEmbeddings(self)
+        fusion.apply()
+        # Remove non-MS domain functions
+        rot_emb_nodes = list(
+            filter(
+                lambda node: node.op_type == "RotaryEmbedding" and node.domain != "com.microsoft", self.model.graph.node
+            )
+        )
+        non_ms_domains_to_keep = set(map(lambda node: node.domain, rot_emb_nodes))
+        i = 0
+        while i < len(self.model.functions):
+            fn = self.model.functions[i]
+            if "RotaryEmbedding" in fn.name and fn.domain not in non_ms_domains_to_keep:
+                self.model.functions.remove(fn)
+            else:
+                i += 1
+
     # Only relevant in models with Q-DQ nodes
     def fuse_qordered_mamtul(self):
         fusion = FusionQOrderedMatMul(self)
@@ -367,6 +395,7 @@ def optimize(self, options: Optional[FusionOptions] = None, add_dynamic_axes: bo
 
         if (options is None) or options.enable_layer_norm:
             self.fuse_layer_norm()
+            self.fuse_simplified_layer_norm()
 
         if (options is None) or options.enable_gelu:
             self.fuse_gelu()
@@ -377,6 +406,10 @@ def optimize(self, options: Optional[FusionOptions] = None, add_dynamic_axes: bo
 
         if (options is None) or options.enable_skip_layer_norm:
             self.fuse_skip_layer_norm()
+            self.fuse_skip_simplified_layer_norm()
+
+        if (options is None) or options.enable_rotary_embeddings:
+            self.fuse_rotary_embeddings()
 
         if options is not None:
             self.attention_mask.set_mask_format(options.attention_mask_format)
@@ -442,38 +475,56 @@ def get_fused_operator_statistics(self):
             "BiasGelu",
             "GemmFastGelu",
             "LayerNormalization",
+            "SimplifiedLayerNormalization",
             "SkipLayerNormalization",
+            "SkipSimplifiedLayerNormalization",
+            "RotaryEmbedding",
         ]
         q_ops = ["QOrderedAttention", "QOrderedGelu", "QOrderedLayerNormalization", "QOrderedMatMul"]
         for op in ops + q_ops:
             nodes = self.get_nodes_by_op_type(op)
             op_count[op] = len(nodes)
 
-        logger.info(f"Optimized operators:{op_count}")
+        logger.info(f"Optimized operators: {op_count}")
         return op_count
 
-    def is_fully_optimized(self):
+    def is_fully_optimized(self, fused_op_count=None):
         """
         Returns True when the model is fully optimized.
         """
-        op_count = self.get_fused_operator_statistics()
-        embed = op_count["EmbedLayerNormalization"]
-        attention = op_count["Attention"] + op_count["MultiHeadAttention"] + op_count["QOrderedAttention"]
-        gelu = op_count["Gelu"] + op_count["BiasGelu"] + op_count["FastGelu"]
-        layer_norm = op_count["LayerNormalization"] + op_count["SkipLayerNormalization"]
-        is_perfect = (embed > 0) and (attention > 0) and (attention == gelu) and (layer_norm >= 2 * attention)
+        if fused_op_count is None:
+            fused_op_count = self.get_fused_operator_statistics()
+
+        def op_count(op_name: str):
+            return fused_op_count.get(op_name) or 0
+
+        embed = op_count("EmbedLayerNormalization")
+        attention = op_count("Attention") + op_count("MultiHeadAttention") + op_count("QOrderedAttention")
+        gelu = op_count("Gelu") + op_count("BiasGelu") + op_count("FastGelu")
+        layer_norm = op_count("LayerNormalization") + op_count("SkipLayerNormalization")
+        simple_layer_norm = op_count("SimplifiedLayerNormalization") + op_count("SkipSimplifiedLayerNormalization")
+
+        is_perfect = (
+            (embed > 0)
+            and (attention > 0)
+            and (attention == gelu)
+            and ((layer_norm >= 2 * attention) or (simple_layer_norm >= 2 * attention))
+        )
 
         if layer_norm == 0:
             logger.debug("Layer Normalization not fused")
 
+        if simple_layer_norm == 0:
+            logger.debug("Simple Layer Normalization not fused")
+
         if gelu == 0:
-            logger.debug("Gelu/FastGelu not fused")
+            logger.debug("Gelu (or FastGelu) not fused")
 
         if embed == 0:
-            logger.debug("Embed Layer not fused")
+            logger.debug("EmbedLayerNormalization not fused")
 
         if attention == 0:
-            logger.warning("Attention not fused")
+            logger.warning("Attention (or MultiHeadAttention) not fused")
 
         return is_perfect
 
diff --git a/onnxruntime/python/tools/transformers/onnx_model_conformer.py b/onnxruntime/python/tools/transformers/onnx_model_conformer.py
new file mode 100644
index 0000000000000..1506d85f53fd4
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/onnx_model_conformer.py
@@ -0,0 +1,33 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+import logging
+from typing import Optional
+
+from fusion_attention import AttentionMask
+from fusion_conformer_attention import FusionConformerAttention
+from fusion_options import FusionOptions
+from onnx_model_bert import BertOnnxModel
+
+logger = logging.getLogger(__name__)
+
+
+class ConformerOnnxModel(BertOnnxModel):
+    def __init__(self, model, num_heads, hidden_size):
+        super().__init__(model, num_heads, hidden_size)
+        self.attention_mask = AttentionMask(self)
+        self.attention_fusion = FusionConformerAttention(self, self.hidden_size, self.num_heads, self.attention_mask)
+
+    def optimize(self, options: Optional[FusionOptions] = None, add_dynamic_axes: bool = False):
+        self.attention_fusion.use_multi_head_attention = False if options is None else options.use_multi_head_attention
+        self.attention_fusion.disable_multi_head_attention_bias = (
+            False if options is None else options.disable_multi_head_attention_bias
+        )
+        super().optimize(options, add_dynamic_axes)
+
+    def fuse_attention(self):
+        self.attention_fusion.apply()
+
+    def preprocess(self):
+        self.adjust_reshape_and_expand()
diff --git a/onnxruntime/python/tools/transformers/onnx_model_gpt2.py b/onnxruntime/python/tools/transformers/onnx_model_gpt2.py
index 263857ffbc130..6545bb08cdd5e 100644
--- a/onnxruntime/python/tools/transformers/onnx_model_gpt2.py
+++ b/onnxruntime/python/tools/transformers/onnx_model_gpt2.py
@@ -8,6 +8,7 @@
 from fusion_gpt_attention import FusionGptAttention
 from fusion_gpt_attention_megatron import FusionGptAttentionMegatron
 from fusion_gpt_attention_no_past import FusionGptAttentionNoPast
+from fusion_rotary_attention import FusionRotaryAttention
 from onnx_model_bert import BertOnnxModel
 
 logger = logging.getLogger(__name__)
@@ -27,6 +28,9 @@ def fuse_attention(self):
             fusion = FusionGptAttentionMegatron(self, self.num_heads)
             fusion.apply()
 
+        fusion = FusionRotaryAttention(self, self.hidden_size, self.num_heads)
+        fusion.apply()
+
     def postprocess(self):
         """
         Remove extra reshape nodes.
@@ -94,4 +98,4 @@ def postprocess(self):
             reshape_count += 2
 
         self.prune_graph()
-        logger.info(f"postprocess: remove Reshape count:{reshape_count}")
+        logger.info(f"postprocess: remove Reshape count: {reshape_count}")
diff --git a/onnxruntime/python/tools/transformers/onnx_model_t5.py b/onnxruntime/python/tools/transformers/onnx_model_t5.py
index ab6a7c72a2c7a..95f40af3fd746 100644
--- a/onnxruntime/python/tools/transformers/onnx_model_t5.py
+++ b/onnxruntime/python/tools/transformers/onnx_model_t5.py
@@ -3,12 +3,12 @@
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
 import logging
-from typing import Dict, Optional, Union
+from typing import Optional, Union
 
 import numpy as np
 from fusion_attention import AttentionMask, FusionAttention
 from fusion_base import Fusion
-from fusion_skiplayernorm import FusionSkipLayerNormalization
+from fusion_simplified_layernorm import FusionSimplifiedLayerNormalization, FusionSkipSimplifiedLayerNormalization
 from fusion_utils import NumpyHelper
 from onnx import NodeProto, TensorProto, helper
 from onnx_model import OnnxModel
@@ -56,8 +56,8 @@ def create_attention_node(
         Args:
             mask_index (str): mask input
             q_matmul (NodeProto): MatMul node in fully connection for Q
-            k_matmul (NodeProto): MatMul node in fully connection for  K
-            v_matmul (NodeProto): MatMul node in fully connection for  V
+            k_matmul (NodeProto): MatMul node in fully connection for K
+            v_matmul (NodeProto): MatMul node in fully connection for V
             num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
             hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
             input (str): input name
@@ -687,67 +687,6 @@ def fuse(self, node, input_name_to_nodes, output_name_to_node):
         self.node_name_to_graph_name[rpb_node.name] = self.this_graph_name
 
 
-class FusionSimplifiedLayerNormalization(Fusion):
-    def __init__(self, model: OnnxModel):
-        super().__init__(model, "SimplifiedLayerNormalization", "Mul")
-
-    def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict):
-        if node.op_type != "Mul":
-            return
-
-        sim_ln_nodes = self.model.match_parent_path(
-            node,
-            ["Mul", "Div", "Sqrt", "Add", "ReduceMean", "Pow", "Add"],
-            [1, 1, 1, 0, 0, 0, 0],
-        )
-        if sim_ln_nodes is None:
-            sim_ln_nodes = self.model.match_parent_path(
-                node,
-                ["Mul", "Div", "Sqrt", "Add", "ReduceMean", "Pow", "Gather"],
-                [1, 1, 1, 0, 0, 0, 0],
-            )
-            if sim_ln_nodes is None:
-                return
-
-        pow_node = sim_ln_nodes[-2]
-        if self.model.find_constant_input(pow_node, 2.0) != 1:
-            return
-
-        root_input = pow_node.input[0]
-
-        mul_node_1 = sim_ln_nodes[0]
-        if root_input != mul_node_1.input[0]:
-            return
-
-        second_add_node = sim_ln_nodes[3]
-        i, add_weight = self.model.get_constant_input(second_add_node)
-        if add_weight is None or add_weight <= 0 or add_weight > 1.0e-4:
-            logger.warning(f"epsilon value is not expeced: {add_weight}")
-            return
-
-        self.nodes_to_remove.extend(sim_ln_nodes[:-1])
-
-        normalize_node = helper.make_node(
-            "SimplifiedLayerNormalization",
-            inputs=[root_input, node.input[0]],
-            outputs=[node.output[0]],
-            name=self.model.create_node_name("SimplifiedLayerNormalization", name_prefix="LayerNorm"),
-        )
-        normalize_node.attribute.extend([helper.make_attribute("epsilon", float(add_weight))])
-        normalize_node.attribute.extend([helper.make_attribute("axis", int(-1))])
-        normalize_node.attribute.extend([helper.make_attribute("stash_type", int(1))])
-        self.nodes_to_add.append(normalize_node)
-        self.node_name_to_graph_name[normalize_node.name] = self.this_graph_name
-
-
-class FusionSkipSimplifiedLayerNormalization(FusionSkipLayerNormalization):
-    def __init__(self, model: OnnxModel):
-        super().__init__(model, "SkipSimplifiedLayerNormalization", "SimplifiedLayerNormalization")
-
-    def fuse(self, node, input_name_to_nodes, output_name_to_node):
-        super().fuse(node, input_name_to_nodes, output_name_to_node)
-
-
 class T5OnnxModel(BertOnnxModel):
     def __init__(self, model, num_heads, hidden_size):
         super().__init__(model, num_heads, hidden_size)
diff --git a/onnxruntime/python/tools/transformers/onnx_model_unet.py b/onnxruntime/python/tools/transformers/onnx_model_unet.py
index 294641dd1e067..4d15b9288e7b6 100644
--- a/onnxruntime/python/tools/transformers/onnx_model_unet.py
+++ b/onnxruntime/python/tools/transformers/onnx_model_unet.py
@@ -12,6 +12,7 @@
 from fusion_group_norm import FusionGroupNorm
 from fusion_nhwc_conv import FusionNhwcConv
 from fusion_options import FusionOptions
+from fusion_skip_group_norm import FusionSkipGroupNorm
 from fusion_transpose import FusionInsertTranspose, FusionTranspose
 from onnx import ModelProto
 from onnx_model import OnnxModel
@@ -57,8 +58,8 @@ def remove_useless_div(self):
             logger.info("Removed %d Div nodes", len(nodes_to_remove))
 
     def convert_conv_to_nhwc(self):
-        # Do not update weight here since save external data has a bug
-        conv_to_nhwc_conv = FusionNhwcConv(self, update_weight=False)
+        # Transpose weights in offline might help since ORT does not apply constant-folding on Transpose nodes.
+        conv_to_nhwc_conv = FusionNhwcConv(self, update_weight=True)
         conv_to_nhwc_conv.apply()
 
     def merge_adjacent_transpose(self):
@@ -150,6 +151,10 @@ def optimize(self, options: Optional[FusionOptions] = None):
         # Remove reshape nodes that having same shape of input and output based on symbolic shape inference.
         self.utils.remove_useless_reshape_nodes()
 
+        if (options is None) or options.enable_skip_group_norm:
+            skip_group_norm_fusion = FusionSkipGroupNorm(self)
+            skip_group_norm_fusion.apply()
+
         if (options is None) or options.enable_bias_skip_layer_norm:
             # Fuse SkipLayerNormalization and Add Bias before it.
             self.fuse_add_bias_skip_layer_norm()
@@ -181,6 +186,7 @@ def get_fused_operator_statistics(self):
             "SkipLayerNormalization",
             "BiasSplitGelu",
             "GroupNorm",
+            "SkipGroupNorm",
             "NhwcConv",
             "BiasAdd",
         ]
diff --git a/onnxruntime/python/tools/transformers/onnx_model_vae.py b/onnxruntime/python/tools/transformers/onnx_model_vae.py
index 9e79014e71027..de8b59074a871 100644
--- a/onnxruntime/python/tools/transformers/onnx_model_vae.py
+++ b/onnxruntime/python/tools/transformers/onnx_model_vae.py
@@ -32,6 +32,7 @@ def get_fused_operator_statistics(self):
         ops = [
             "Attention",
             "GroupNorm",
+            "SkipGroupNorm",
             "NhwcConv",
         ]
         for op in ops:
diff --git a/onnxruntime/python/tools/transformers/optimizer.py b/onnxruntime/python/tools/transformers/optimizer.py
index 3f274eb6c835a..6842a97fe0c77 100644
--- a/onnxruntime/python/tools/transformers/optimizer.py
+++ b/onnxruntime/python/tools/transformers/optimizer.py
@@ -32,6 +32,7 @@
 from onnx_model_bert_keras import BertOnnxModelKeras
 from onnx_model_bert_tf import BertOnnxModelTF
 from onnx_model_clip import ClipOnnxModel
+from onnx_model_conformer import ConformerOnnxModel
 from onnx_model_gpt2 import Gpt2OnnxModel
 from onnx_model_t5 import T5OnnxModel
 from onnx_model_tnlr import TnlrOnnxModel
@@ -56,6 +57,7 @@
     "unet": (UnetOnnxModel, "pytorch", 1),  # UNet in Stable Diffusion
     "vae": (VaeOnnxModel, "pytorch", 1),  # UAE in Stable Diffusion
     "vit": (BertOnnxModel, "pytorch", 1),
+    "conformer": (ConformerOnnxModel, "pytorch", 1),
 }
 
 
@@ -69,6 +71,8 @@ def optimize_by_onnxruntime(
     save_as_external_data: bool = False,
     external_data_filename: str = "",
     external_data_file_threshold: int = 1024,
+    *,
+    provider: Optional[str] = None,
 ) -> str:
     """
     Use onnxruntime to optimize model.
@@ -82,6 +86,7 @@ def optimize_by_onnxruntime(
         save_as_external_data (bool): whether to save external data outside of ONNX model
         external_data_filename (str): name of external data file. If not provided, name is automatically created from ONNX model.
         external_data_file_threshold (int): threshold to decide whether to save tensor in ONNX model or in external data file
+        provider (str or None): execution provider to use if use_gpu
     Returns:
         optimized_model_path (str): the path of optimized model
     """
@@ -90,13 +95,17 @@ def optimize_by_onnxruntime(
 
     import onnxruntime
 
-    if use_gpu and set(onnxruntime.get_available_providers()).isdisjoint(
-        ["CUDAExecutionProvider", "ROCMExecutionProvider", "MIGraphXExecutionProvider"]
+    if (
+        use_gpu
+        and provider is None
+        and set(onnxruntime.get_available_providers()).isdisjoint(
+            ["CUDAExecutionProvider", "ROCMExecutionProvider", "MIGraphXExecutionProvider"]
+        )
     ):
         logger.error("There is no gpu for onnxruntime to do optimization.")
         return onnx_model_path
 
-    model = OnnxModel(load_model(onnx_model_path, format=None, load_external_data=False))
+    model = OnnxModel(load_model(onnx_model_path, load_external_data=False))
     if model.use_float16() and not use_gpu:
         logger.warning(
             "This model uses float16 in the graph, use_gpu=False might cause extra Cast nodes. "
@@ -138,17 +147,32 @@ def optimize_by_onnxruntime(
         kwargs["disabled_optimizers"] = disabled_optimizers
 
     if not use_gpu:
-        onnxruntime.InferenceSession(onnx_model_path, sess_options, providers=["CPUExecutionProvider"], **kwargs)
+        providers = ["CPUExecutionProvider"]
+    elif provider is not None:
+        if provider == "dml":
+            providers = ["DmlExecutionProvider"]
+        elif provider == "rocm":
+            providers = ["ROCMExecutionProvider"]
+        elif provider == "migraphx":
+            providers = ["MIGraphXExecutionProvider", "ROCMExecutionProvider"]
+        elif provider == "cuda":
+            providers = ["CUDAExecutionProvider"]
+        elif provider == "tensorrt":
+            providers = ["TensorrtExecutionProvider", "CUDAExecutionProvider"]
+        else:
+            providers = ["CUDAExecutionProvider"]
+
+        providers.append("CPUExecutionProvider")
     else:
-        gpu_ep = []
+        providers = []
 
         if torch_version.hip:
-            gpu_ep.append("MIGraphXExecutionProvider")
-            gpu_ep.append("ROCMExecutionProvider")
+            providers.append("MIGraphXExecutionProvider")
+            providers.append("ROCMExecutionProvider")
         else:
-            gpu_ep.append("CUDAExecutionProvider")
+            providers.append("CUDAExecutionProvider")
 
-        onnxruntime.InferenceSession(onnx_model_path, sess_options, providers=gpu_ep, **kwargs)
+    onnxruntime.InferenceSession(onnx_model_path, sess_options, providers=providers, **kwargs)
 
     assert os.path.exists(optimized_model_path) and os.path.isfile(optimized_model_path)
     logger.debug("Save optimized model by onnxruntime to %s", optimized_model_path)
@@ -220,6 +244,8 @@ def optimize_model(
     use_gpu: bool = False,
     only_onnxruntime: bool = False,
     verbose: bool = False,
+    *,
+    provider: Optional[str] = None,
 ):
     """Optimize Model by OnnxRuntime and/or python fusion logic.
 
@@ -257,6 +283,7 @@ def optimize_model(
         use_gpu (bool, optional): use gpu or not for onnxruntime. Defaults to False.
         only_onnxruntime (bool, optional): only use onnxruntime to optimize model, and no python fusion.
             Defaults to False.
+        provider (str, optional): execution provider to use if use_gpu. Defaults to None.
 
      Returns:
         object of an optimizer class.
@@ -302,6 +329,7 @@ def optimize_model(
         temp_model_path = optimize_by_onnxruntime(
             input,
             use_gpu=use_gpu,
+            provider=provider,
             optimized_model_path=optimized_model_path,
             opt_level=opt_level,
             disabled_optimizers=disabled_optimizers,
@@ -316,6 +344,7 @@ def optimize_model(
         temp_model_path = optimize_by_onnxruntime(
             input,
             use_gpu=use_gpu,
+            provider=provider,
             optimized_model_path=optimized_model_path,
             opt_level=1,
             disabled_optimizers=disabled_optimizers,
@@ -423,6 +452,14 @@ def _parse_arguments():
     )
     parser.set_defaults(use_gpu=False)
 
+    parser.add_argument(
+        "--provider",
+        required=False,
+        type=str,
+        default=None,
+        help="Execution provider to use if use_gpu",
+    )
+
     parser.add_argument(
         "--only_onnxruntime",
         required=False,
@@ -501,6 +538,7 @@ def main():
         opt_level=args.opt_level,
         optimization_options=optimization_options,
         use_gpu=args.use_gpu,
+        provider=args.provider,
         only_onnxruntime=args.only_onnxruntime,
     )
 
@@ -510,11 +548,14 @@ def main():
     if args.input_int32:
         optimizer.change_graph_inputs_to_int32()
 
-    if args.model_type in ["bert", "gpt2"]:
-        if optimizer.is_fully_optimized():
-            logger.info("The model has been fully optimized.")
-        else:
-            logger.info("The model has been optimized.")
+    # Print the operator statistics might help end user.
+    optimizer.get_operator_statistics()
+
+    fused_op_count = optimizer.get_fused_operator_statistics()
+    if "bert" in args.model_type and optimizer.is_fully_optimized(fused_op_count):
+        logger.info("The model has been fully optimized.")
+    else:
+        logger.info("The model has been optimized.")
 
     if args.convert_to_packing_mode:
         if args.model_type == "bert":
diff --git a/onnxruntime/python/tools/transformers/profiler.py b/onnxruntime/python/tools/transformers/profiler.py
index fc2417ea897c3..8e45b149eaf03 100644
--- a/onnxruntime/python/tools/transformers/profiler.py
+++ b/onnxruntime/python/tools/transformers/profiler.py
@@ -454,7 +454,7 @@ def group_node_results(sess_time, kernel_time_only, use_gpu):
 
 
 def get_dim_from_type_proto(dim):
-    return getattr(dim, dim.WhichOneof("value")) if type(dim.WhichOneof("value")) == str else None
+    return getattr(dim, dim.WhichOneof("value")) if type(dim.WhichOneof("value")) == str else None  # noqa: E721
 
 
 def get_shape_from_type_proto(type_proto):
@@ -573,7 +573,7 @@ def create_gpt2_inputs(onnx_model, batch_size, sequence_length, past_sequence_le
         shape = get_shape_from_type_proto(graph_input.type)
         for i, dim in enumerate(shape):
             if isinstance(dim, str):
-                if dim not in symbols.keys():
+                if dim not in symbols:
                     raise RuntimeError(f"symbol is not supported: {dim}")
                 else:
                     shape[i] = symbols[dim]
@@ -615,7 +615,7 @@ def create_longformer_inputs(onnx_model, batch_size, sequence_length, global_len
         shape = get_shape_from_type_proto(graph_input.type)
         for i, dim in enumerate(shape):
             if isinstance(dim, str):
-                if dim not in symbols.keys():
+                if dim not in symbols:
                     raise RuntimeError(f"symbol is not supported: {dim}")
                 else:
                     shape[i] = symbols[dim]
diff --git a/onnxruntime/python/tools/transformers/shape_infer_helper.py b/onnxruntime/python/tools/transformers/shape_infer_helper.py
index f8a5464d8af78..f1fc0c952e8e4 100644
--- a/onnxruntime/python/tools/transformers/shape_infer_helper.py
+++ b/onnxruntime/python/tools/transformers/shape_infer_helper.py
@@ -28,12 +28,12 @@ def __init__(self, model, verbose=0, int_max=2**31 - 1, auto_merge=True, guess_o
         self.is_inferred_: bool = False
         self.dynamic_axis_mapping_: Dict[str, int] = {}
 
-    def infer(self, dynamic_axis_mapping: Dict[str, int], max_runs: int = 128):
+    def infer(self, dynamic_axis_mapping: Dict[str, int], max_runs: int = 200):
         """Run shape inference, and try replace dynamic axis from string to integer when mapping is provided.
 
         Args:
             dynamic_axis_mapping (_type_): a dictionary with name of dynamic axis as key, like {"batch_size" : 4}
-            max_runs (int, optional): limit maximum number of runs to avoid infinite loop. Defaults to 32.
+            max_runs (int, optional): limit maximum number of runs to avoid infinite loop. Defaults to 200.
 
         Returns:
             bool: whether all shapes has been inferred or not.
diff --git a/onnxruntime/python/torch_cpp_extensions/aten_op_executor/__init__.py b/onnxruntime/python/torch_cpp_extensions/aten_op_executor/__init__.py
index 9dee6564509d5..8bf7cbf80eb37 100644
--- a/onnxruntime/python/torch_cpp_extensions/aten_op_executor/__init__.py
+++ b/onnxruntime/python/torch_cpp_extensions/aten_op_executor/__init__.py
@@ -29,5 +29,5 @@ def load_aten_op_executor_cpp_extension():
     from onnxruntime.training.ortmodule.torch_cpp_extensions import aten_op_executor
 
     _C.register_aten_op_executor(
-        str(aten_op_executor.is_tensor_argument_address()), str(aten_op_executor.execute_aten_operator_address())
+        str(aten_op_executor.is_cpu_argument_address()), str(aten_op_executor.execute_aten_operator_address())
     )
diff --git a/onnxruntime/python/torch_cpp_extensions/aten_op_executor/aten_op_executor.cc b/onnxruntime/python/torch_cpp_extensions/aten_op_executor/aten_op_executor.cc
index 182f2368f5b47..903a394a06ef3 100644
--- a/onnxruntime/python/torch_cpp_extensions/aten_op_executor/aten_op_executor.cc
+++ b/onnxruntime/python/torch_cpp_extensions/aten_op_executor/aten_op_executor.cc
@@ -154,11 +154,32 @@ class ATenOperatorCache {
   std::unordered_map<std::pair<std::string, std::string>, ATenOperator, PairHash> ops_;
 };
 
-// Backend uses this function to check if an argument is CPU input (non-tensor argument) or not.
-bool IsTensorArgument(const char* op_name, const char* overload_name, size_t index) {
-  const auto& aten_op = ATenOperatorCache::Instance().GetOperator(op_name, overload_name);
-  TORCH_INTERNAL_ASSERT(index < aten_op.argument_size);
-  return aten_op.elem_kinds[index] == c10::TypeKind::TensorType;
+const std::unordered_map<std::string, std::unordered_set<size_t>> kCpuTensorInputsMap = {
+    {"_efficient_attention_forward", {4, 5, 11, 12}}, {"_efficient_attention_backward", {6, 7, 12, 13}}};
+
+const std::unordered_map<std::string, std::unordered_set<size_t>> kCpuTensorOutputsMap = {
+    {"_efficient_attention_forward", {2, 3}}};
+
+// Backend uses this function to check if an argument is CPU input or not.
+bool IsCpuArgument(const char* op_name, const char* overload_name, size_t index, bool is_input) {
+  if (is_input) {
+    // If the argument is non-tensor type, it's CPU argument.
+    const auto& aten_op = ATenOperatorCache::Instance().GetOperator(op_name, overload_name);
+    TORCH_INTERNAL_ASSERT(index < aten_op.argument_size);
+    if (aten_op.elem_kinds[index] != c10::TypeKind::TensorType) {
+      return true;
+    }
+  }
+
+  std::string full_name = std::string(op_name);
+  std::string overload_name_str = std::string(overload_name);
+  if (overload_name_str != "") {
+    full_name += ("." + overload_name_str);
+  }
+
+  const auto& cpu_tensors_map = is_input ? kCpuTensorInputsMap : kCpuTensorOutputsMap;
+  return cpu_tensors_map.find(full_name) != cpu_tensors_map.end() &&
+         cpu_tensors_map.at(full_name).find(index) != cpu_tensors_map.at(full_name).end();
 }
 
 void ExecuteATenOperator(const char* op_name, const char* overload_name, size_t input_size,
@@ -196,14 +217,15 @@ void ExecuteATenOperator(const char* op_name, const char* overload_name, size_t
   size_t output_index = 0;
   for (const auto& ret : torch::jit::pop(stack, output_size)) {
     const auto& tensor = ret.toTensor();
-    dlpack_outputs[output_index++] = at::toDLPack(tensor.is_contiguous() ? tensor : tensor.contiguous());
+    dlpack_outputs[output_index++] =
+        tensor.defined() ? at::toDLPack(tensor.is_contiguous() ? tensor : tensor.contiguous()) : nullptr;
   }
 }
 
-size_t is_tensor_argument_address() { return reinterpret_cast<size_t>(&IsTensorArgument); }
+size_t is_cpu_argument_address() { return reinterpret_cast<size_t>(&IsCpuArgument); }
 size_t execute_aten_operator_address() { return reinterpret_cast<size_t>(&ExecuteATenOperator); }
 
 PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
-  m.def("is_tensor_argument_address", &is_tensor_argument_address, "Address of tensor argument check.");
+  m.def("is_cpu_argument_address", &is_cpu_argument_address, "Address of tensor argument check.");
   m.def("execute_aten_operator_address", &execute_aten_operator_address, "Address of Aten operator executor");
 }
diff --git a/onnxruntime/python/torch_cpp_extensions/ort_torch_ext/__init__.py b/onnxruntime/python/torch_cpp_extensions/ort_torch_ext/__init__.py
index 7d5716b85db30..329fba5aa670a 100644
--- a/onnxruntime/python/torch_cpp_extensions/ort_torch_ext/__init__.py
+++ b/onnxruntime/python/torch_cpp_extensions/ort_torch_ext/__init__.py
@@ -5,7 +5,7 @@
 
 from onnxruntime.capi import _pybind_state as _C
 
-from .aten_op_executor import execute_aten_operator_address, is_tensor_argument_address
+from .aten_op_executor import execute_aten_operator_address, is_cpu_argument_address
 
 
 def run_once_aten_op_executor(f):
@@ -30,7 +30,7 @@ def aten_op_executor_wrapper(*args, **kwargs):
 
 @run_once_aten_op_executor
 def load_aten_op_executor_cpp_extension():
-    _C.register_aten_op_executor(str(is_tensor_argument_address()), str(execute_aten_operator_address()))
+    _C.register_aten_op_executor(str(is_cpu_argument_address()), str(execute_aten_operator_address()))
 
 
 def init_aten_op_executor():
diff --git a/onnxruntime/test/contrib_ops/bias_add_op_test.cc b/onnxruntime/test/contrib_ops/bias_add_op_test.cc
index 7699f4479caa7..6fd091ef66110 100644
--- a/onnxruntime/test/contrib_ops/bias_add_op_test.cc
+++ b/onnxruntime/test/contrib_ops/bias_add_op_test.cc
@@ -107,6 +107,20 @@ TEST(BiasAddTest, BiasAddTest_HiddenSize_1280) {
   constexpr int64_t num_channels = 1280;
   RunBiasAddTest(batch_size, image_size, num_channels);
 }
+
+TEST(BiasAddTest, BiasAddTest_HiddenSize_768) {
+  constexpr int64_t batch_size = 2;
+  constexpr int64_t image_size = 5;
+  constexpr int64_t num_channels = 768;
+  RunBiasAddTest(batch_size, image_size, num_channels);
+}
+
+TEST(BiasAddTest, BiasAddTest_HiddenSize_1536) {
+  constexpr int64_t batch_size = 1;
+  constexpr int64_t image_size = 3;
+  constexpr int64_t num_channels = 1536;
+  RunBiasAddTest(batch_size, image_size, num_channels);
+}
 #endif
 
 }  // namespace test
diff --git a/onnxruntime/test/contrib_ops/bias_split_gelu_op_test.cc b/onnxruntime/test/contrib_ops/bias_split_gelu_op_test.cc
index db14eb3da42cd..a979717d23573 100644
--- a/onnxruntime/test/contrib_ops/bias_split_gelu_op_test.cc
+++ b/onnxruntime/test/contrib_ops/bias_split_gelu_op_test.cc
@@ -152,6 +152,20 @@ TEST(BiasSplitGeluTest, BiasSplitGeluTest_HiddenSize_10240) {
   RunBiasSplitGeluTest(batch_size, sequence_length, hidden_size);
 }
 
+TEST(BiasSplitGeluTest, BiasSplitGeluTest_HiddenSize_6144) {
+  constexpr int64_t batch_size = 2;
+  constexpr int64_t sequence_length = 3;
+  constexpr int64_t hidden_size = 6144;
+  RunBiasSplitGeluTest(batch_size, sequence_length, hidden_size);
+}
+
+TEST(BiasSplitGeluTest, BiasSplitGeluTest_HiddenSize_12288) {
+  constexpr int64_t batch_size = 1;
+  constexpr int64_t sequence_length = 2;
+  constexpr int64_t hidden_size = 12288;
+  RunBiasSplitGeluTest(batch_size, sequence_length, hidden_size);
+}
+
 #endif
 
 }  // namespace test
diff --git a/onnxruntime/test/contrib_ops/dynamic_time_warping_op_test.cc b/onnxruntime/test/contrib_ops/dynamic_time_warping_op_test.cc
new file mode 100644
index 0000000000000..ea6f93a273055
--- /dev/null
+++ b/onnxruntime/test/contrib_ops/dynamic_time_warping_op_test.cc
@@ -0,0 +1,123 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "gtest/gtest.h"
+#include "test/providers/provider_test_utils.h"
+#include "test/util/include/default_providers.h"
+#include "test/common/cuda_op_test_utils.h"
+
+using namespace ONNX_NAMESPACE;
+
+namespace onnxruntime {
+namespace test {
+
+#ifdef USE_CUDA
+
+TEST(DynamicTimeWarp, simple) {
+  if (NeedSkipIfCudaArchLowerThan(530)) {
+    return;
+  }
+
+  std::vector<float> X = {
+      3.0f,
+      8.0f,
+      5.0f,
+      1.0f,
+      9.0f,
+      8.0f,
+      5.0f,
+      7.0f,
+      4.0f,
+      4.0f,
+      9.0f,
+      6.0f,
+      2.0f,
+      9.0f,
+      7.0f,
+      2.0f,
+      5.0f,
+      6.0f,
+      1.0f,
+      8.0f,
+      4.0f,
+      6.0f,
+      5.0f,
+      8.0f,
+      4.0f,
+      8.0f,
+      3.0f,
+      6.0f,
+      3.0f,
+      9.0f,
+      1.0f,
+      1.0f,
+      6.0f,
+      8.0f,
+      3.0f,
+      5.0f,
+      5.0f,
+      3.0f,
+      3.0f,
+      8.0f,
+      8.0f,
+      7.0f,
+      1.0f,
+      2.0f,
+      2.0f,
+      1.0f,
+      5.0f,
+      4.0f,
+      5.0f,
+      0.0f,
+      3.0f,
+      6.0f,
+      3.0f,
+      7.0f,
+      4.0f,
+      5.0f,
+      4.0f,
+      5.0f,
+      4.0f,
+      0.0f,
+  };
+
+  std::vector<int32_t> Y = {
+      0,
+      1,
+      2,
+      3,
+      4,
+      4,
+      4,
+      4,
+      5,
+      5,
+      5,
+      5,
+      0,
+      1,
+      1,
+      1,
+      2,
+      3,
+      4,
+      5,
+      6,
+      7,
+      8,
+      9,
+  };
+
+  OpTester tester("DynamicTimeWarping", 1, onnxruntime::kMSDomain);
+  tester.AddInput<float>("input", {6, 10}, X);
+  tester.AddOutput<int32_t>("output", {2, 12}, Y);
+
+  std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+  execution_providers.push_back(DefaultCudaExecutionProvider());
+  tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
+}
+
+#endif
+
+}  // namespace test
+}  // namespace onnxruntime
diff --git a/onnxruntime/test/contrib_ops/element_wise_ops_test.cc b/onnxruntime/test/contrib_ops/element_wise_ops_test.cc
index 15e2449cd230f..c641103a74465 100644
--- a/onnxruntime/test/contrib_ops/element_wise_ops_test.cc
+++ b/onnxruntime/test/contrib_ops/element_wise_ops_test.cc
@@ -162,13 +162,13 @@ static void RunBiasGeluTestBFloat16(const std::vector<int64_t>& input_dims, cons
   tester.AddInput<BFloat16>("B", bias_dims, bias_data_bf16);
   tester.AddOutput<BFloat16>("C", input_dims, output_data_bf16);
   std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
-#ifdef USE_CUDA
+#if defined(USE_CUDA)
   execution_providers.push_back(DefaultCudaExecutionProvider());
-#elif USE_ROCM
+#elif defined(USE_ROCM)
   execution_providers.push_back(DefaultRocmExecutionProvider());
-#elif USE_DNNL
+#elif defined(USE_DNNL)
   execution_providers.push_back(DefaultDnnlExecutionProvider());
-#elif USE_DML
+#elif defined(USE_DML)
   execution_providers.push_back(DefaultDmlExecutionProvider());
 #endif
   tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
@@ -197,9 +197,8 @@ TEST(BiasGeluTest, BFloat16) {
 }
 #endif
 
+#if defined(USE_CUDA) || defined(USE_ROCM)
 TEST(MathOpTest, ComplexMul) {
-  if (DefaultCudaExecutionProvider() == nullptr) return;
-
   std::vector<float> input_a_data = {
       -0.5f, 0.6f};
 
@@ -219,13 +218,15 @@ TEST(MathOpTest, ComplexMul) {
   tester.AddOutput<float>("C", {4, 2}, output_data);
 
   std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+#if defined(USE_CUDA)
   execution_providers.push_back(DefaultCudaExecutionProvider());
+#elif defined(USE_ROCM)
+  execution_providers.push_back(DefaultRocmExecutionProvider());
+#endif
   tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
 }
 
 TEST(MathOpTest, ComplexMulConj) {
-  if (DefaultCudaExecutionProvider() == nullptr) return;
-
   std::vector<float> input_a_data = {
       -0.5f, 0.6f};
 
@@ -245,13 +246,15 @@ TEST(MathOpTest, ComplexMulConj) {
   tester.AddOutput<float>("C", {4, 2}, output_data);
 
   std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+#ifdef USE_CUDA
   execution_providers.push_back(DefaultCudaExecutionProvider());
+#elif defined(USE_ROCM)
+  execution_providers.push_back(DefaultRocmExecutionProvider());
+#endif
   tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
 }
 
 TEST(MathOpTest, ComplexMul_fp16) {
-  if (DefaultCudaExecutionProvider() == nullptr) return;
-
   std::vector<MLFloat16> input_a_data = {
       MLFloat16(-0.5f), MLFloat16(0.6f)};
 
@@ -271,13 +274,15 @@ TEST(MathOpTest, ComplexMul_fp16) {
   tester.AddOutput<MLFloat16>("C", {4, 2}, output_data);
 
   std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+#ifdef USE_CUDA
   execution_providers.push_back(DefaultCudaExecutionProvider());
+#elif defined(USE_ROCM)
+  execution_providers.push_back(DefaultRocmExecutionProvider());
+#endif
   tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
 }
 
 TEST(MathOpTest, ComplexMulConj_fp16) {
-  if (DefaultCudaExecutionProvider() == nullptr) return;
-
   std::vector<MLFloat16> input_a_data = {
       MLFloat16(-0.5f), MLFloat16(0.6f)};
 
@@ -297,9 +302,14 @@ TEST(MathOpTest, ComplexMulConj_fp16) {
   tester.AddOutput<MLFloat16>("C", {4, 2}, output_data);
 
   std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+#ifdef USE_CUDA
   execution_providers.push_back(DefaultCudaExecutionProvider());
+#elif defined(USE_ROCM)
+  execution_providers.push_back(DefaultRocmExecutionProvider());
+#endif
   tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
 }
+#endif
 
 }  // namespace test
 }  // namespace onnxruntime
diff --git a/onnxruntime/test/contrib_ops/fft_op_test.cc b/onnxruntime/test/contrib_ops/fft_op_test.cc
index eaadb95c8a0c0..56a6466c760f6 100644
--- a/onnxruntime/test/contrib_ops/fft_op_test.cc
+++ b/onnxruntime/test/contrib_ops/fft_op_test.cc
@@ -8,7 +8,15 @@
 namespace onnxruntime {
 namespace test {
 TEST(ContribOpTest, Rfft) {
-  if (DefaultCudaExecutionProvider() == nullptr) return;
+  if (DefaultCudaExecutionProvider() == nullptr && DefaultRocmExecutionProvider() == nullptr) return;
+
+  std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+  if (DefaultCudaExecutionProvider() != nullptr) {
+    execution_providers.push_back(DefaultCudaExecutionProvider());
+  }
+  if (DefaultRocmExecutionProvider() != nullptr) {
+    execution_providers.push_back(DefaultRocmExecutionProvider());
+  }
 
   OpTester test("Rfft", 1, onnxruntime::kMSDomain);
   test.AddAttribute("signal_ndim", static_cast<int64_t>(1));
@@ -17,13 +25,19 @@ TEST(ContribOpTest, Rfft) {
   // Target values conputed using PyTorch torch.fft.rfft(X, dim=-1, norm="backward")
   test.AddInput<float>("X", {4, 4}, {0.8129f, 1.3108f, -0.8790f, -1.2046f, 0.1661f, -0.9831f, 0.5879f, 0.4918f, 1.2506f, 0.7244f, -2.6260f, -1.1268f, -1.6885f, 1.0439f, -0.2595f, 1.8780f});
   test.AddOutput<float>("Y", {4, 3, 2}, {0.0400f, 0.0000f, 1.6919f, -2.5154f, -0.1722f, 0.0000f, 0.2627f, 0.0000f, -0.4218f, 1.4748f, 1.2454f, 0.0000f, -1.7779f, 0.0000f, 3.8766f, -1.8512f, -0.9730f, 0.0000f, 0.9740f, 0.0000f, -1.4290f, 0.8341f, -4.8699f, 0.0000f});
-  std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
-  execution_providers.push_back(DefaultCudaExecutionProvider());
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
 }
 
 TEST(ContribOpTest, Irfft) {
-  if (DefaultCudaExecutionProvider() == nullptr) return;
+  if (DefaultCudaExecutionProvider() == nullptr && DefaultRocmExecutionProvider() == nullptr) return;
+
+  std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+  if (DefaultCudaExecutionProvider() != nullptr) {
+    execution_providers.push_back(DefaultCudaExecutionProvider());
+  }
+  if (DefaultRocmExecutionProvider() != nullptr) {
+    execution_providers.push_back(DefaultRocmExecutionProvider());
+  }
 
   OpTester test("Irfft", 1, onnxruntime::kMSDomain);
   test.AddAttribute("signal_ndim", static_cast<int64_t>(1));
@@ -31,8 +45,6 @@ TEST(ContribOpTest, Irfft) {
   test.AddAttribute("normalized", static_cast<int64_t>(0));
   test.AddInput<float>("X", {4, 3, 2}, {0.0400f, 0.0000f, 1.6919f, -2.5154f, -0.1722f, 0.0000f, 0.2627f, 0.0000f, -0.4218f, 1.4748f, 1.2454f, 0.0000f, -1.7779f, 0.0000f, 3.8766f, -1.8512f, -0.9730f, 0.0000f, 0.9740f, 0.0000f, -1.4290f, 0.8341f, -4.8699f, 0.0000f});
   test.AddOutput<float>("Y", {4, 4}, {0.8129f, 1.3108f, -0.8790f, -1.2046f, 0.1661f, -0.9831f, 0.5879f, 0.4918f, 1.2506f, 0.7244f, -2.6260f, -1.1268f, -1.6885f, 1.0439f, -0.2595f, 1.8780f});
-  std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
-  execution_providers.push_back(DefaultCudaExecutionProvider());
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
 }
 }  // namespace test
diff --git a/onnxruntime/test/contrib_ops/gemm_float8_test.cc b/onnxruntime/test/contrib_ops/gemm_float8_test.cc
new file mode 100644
index 0000000000000..c022736075cde
--- /dev/null
+++ b/onnxruntime/test/contrib_ops/gemm_float8_test.cc
@@ -0,0 +1,126 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "gtest/gtest.h"
+#include "test/common/tensor_op_test_utils.h"
+#include "test/common/cuda_op_test_utils.h"
+#include "test/providers/provider_test_utils.h"
+
+namespace onnxruntime {
+namespace test {
+
+#if defined(USE_CUDA) && defined(CUDA_VERSION) && CUDA_VERSION >= 12000
+
+TEST(GemmFloat8OpTest, BFloat16) {
+  OpTester test("GemmFloat8", 1, onnxruntime::kMSDomain);
+  test.AddAttribute("transA", (int64_t)0);
+  test.AddAttribute("transB", (int64_t)0);
+  test.AddAttribute("alpha", 1.0f);
+  test.AddAttribute("beta", 1.0f);
+  test.AddAttribute("activation", "NONE");
+  test.AddAttribute("dtype", static_cast<int64_t>(ONNX_NAMESPACE::TensorProto_DataType_BFLOAT16));
+  test.AddInput<BFloat16>("A", {2, 4}, MakeBFloat16({1.0f, 2.0f, 3.0f, 4.0f, -1.0f, -2.0f, -3.0f, -4.0f}));
+  test.AddInput<BFloat16>("B", {4, 3}, MakeBFloat16({1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f}));
+  test.AddInput<BFloat16>("C", {2, 3}, MakeBFloat16({1.f, 1.f, 1.f, 1.f, 1.f, 1.f}));
+  test.AddOutput<BFloat16>("Y", {2, 3}, MakeBFloat16({11.0f, 11.0f, 11.0f, -9.0f, -9.0f, -9.0f}));
+  std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+  execution_providers.push_back(DefaultCudaExecutionProvider());
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
+}
+
+TEST(GemmFloat8OpTest, Float) {
+  OpTester test("GemmFloat8", 1, onnxruntime::kMSDomain);
+  test.AddAttribute("transA", (int64_t)0);
+  test.AddAttribute("transB", (int64_t)0);
+  test.AddAttribute("alpha", 1.0f);
+  test.AddAttribute("beta", 1.0f);
+  test.AddAttribute("activation", "NONE");
+  test.AddAttribute("dtype", static_cast<int64_t>(ONNX_NAMESPACE::TensorProto_DataType_FLOAT));
+  test.AddInput<float>("A", {2, 4}, std::vector<float>({1.0f, 2.0f, 3.0f, 4.0f, -1.0f, -2.0f, -3.0f, -4.0f}));
+  test.AddInput<float>("B", {4, 3}, std::vector<float>({1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f}));
+  test.AddInput<float>("C", {2, 3}, std::vector<float>({1.f, 1.f, 1.f, 1.f, 1.f, 1.f}));
+  test.AddOutput<float>("Y", {2, 3}, std::vector<float>({11.0f, 11.0f, 11.0f, -9.0f, -9.0f, -9.0f}));
+  std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+  execution_providers.push_back(DefaultCudaExecutionProvider());
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
+}
+
+std::vector<MLFloat16> _Cvt(const std::vector<float>& tensor) {
+  std::vector<MLFloat16> fp16_data(tensor.size());
+  ConvertFloatToMLFloat16(tensor.data(), fp16_data.data(), static_cast<int>(tensor.size()));
+  return fp16_data;
+}
+
+TEST(GemmFloat8OpTest, Float16) {
+  OpTester test("GemmFloat8", 1, onnxruntime::kMSDomain);
+  test.AddAttribute("transA", (int64_t)0);
+  test.AddAttribute("transB", (int64_t)0);
+  test.AddAttribute("alpha", 1.0f);
+  test.AddAttribute("beta", 1.0f);
+  test.AddAttribute("activation", "NONE");
+  test.AddAttribute("dtype", static_cast<int64_t>(ONNX_NAMESPACE::TensorProto_DataType_FLOAT16));
+  test.AddInput<MLFloat16>("A", {2, 4}, _Cvt(std::vector<float>({1.0f, 2.0f, 3.0f, 4.0f, -1.0f, -2.0f, -3.0f, -4.0f})));
+  test.AddInput<MLFloat16>("B", {4, 3}, _Cvt(std::vector<float>({1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f})));
+  test.AddInput<MLFloat16>("C", {2, 3}, _Cvt(std::vector<float>({1.f, 1.f, 1.f, 1.f, 1.f, 1.f})));
+  test.AddOutput<MLFloat16>("Y", {2, 3}, _Cvt(std::vector<float>({11.0f, 11.0f, 11.0f, -9.0f, -9.0f, -9.0f})));
+  std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+  execution_providers.push_back(DefaultCudaExecutionProvider());
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
+}
+
+#if (!defined(DISABLE_FLOAT8_TYPES)) && (CUDA_VERSION >= 12000)
+
+template <typename T>
+std::vector<T> _TypedCvt(const std::vector<float>& tensor);
+
+template <>
+std::vector<float> _TypedCvt(const std::vector<float>& tensor) {
+  return tensor;
+}
+
+template <>
+std::vector<Float8E4M3FN> _TypedCvt(const std::vector<float>& tensor) {
+  std::vector<Float8E4M3FN> out(tensor.size());
+  for (size_t i = 0; i < tensor.size(); ++i) {
+    out[i] = Float8E4M3FN(tensor[i]);
+  }
+  return out;
+}
+
+template <typename ab_type, typename out_type>
+void TestGemmFloat8WithFloat8(int64_t dtype) {
+  int min_cuda_architecture = 11080;
+  if (!HasCudaEnvironment(min_cuda_architecture)) {
+    LOGS_DEFAULT(WARNING) << "Hardware NOT support Matrix Multiplication for FLOAT8";
+    return;
+  }
+  OpTester test("GemmFloat8", 1, onnxruntime::kMSDomain);
+  test.AddAttribute("transA", (int64_t)0);
+  test.AddAttribute("transB", (int64_t)1);
+  test.AddAttribute("alpha", 1.0f);
+  test.AddAttribute("beta", 1.0f);
+  test.AddAttribute("activation", "NONE");
+  test.AddAttribute("dtype", dtype);
+  test.AddInput<ab_type>("A", {2, 4}, _TypeCvt<ap_type>(std::vector<float>({1.0f, 2.0f, 3.0f, 4.0f, -1.0f, -2.0f, -3.0f, -4.0f})));
+  test.AddInput<ab_type>("B", {3, 4}, _TypeCvt<ap_type>(std::vector<float>({1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f})));
+  test.AddInput<out_type>("C", {2, 3}, _TypeCvt<out_type>(std::vector<float>({1.f, 1.f, 1.f, 1.f, 1.f, 1.f})));
+  test.AddOutput<MLFloat16>("Y", {2, 3}, _TypeCvt<out_type>(std::vector<float>({11.0f, 11.0f, 11.0f, -9.0f, -9.0f, -9.0f})));
+  std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+  execution_providers.push_back(DefaultCudaExecutionProvider());
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
+}
+
+TEST(GemmFloat8OpTest, Float8E4M3FNToFloat) {
+  TestGemmFloat8WithFloat8<Float8E4M3FN, float>(static_cast<int64_t>(ONNX_NAMESPACE::TensorProto_DataType_FLOAT));
+}
+
+TEST(GemmFloat8OpTest, Float8E4M3FNToFloat8E4M3FN) {
+  TestGemmFloat8WithFloat8<Float8E4M3FN, Float8E4M3FN>(static_cast<int64_t>(ONNX_NAMESPACE::TensorProto_DataType_FLOAT8E4M3FN));
+}
+
+#endif
+
+#endif
+
+}  // namespace test
+}  // namespace onnxruntime
diff --git a/onnxruntime/test/contrib_ops/greedy_search_test.cc b/onnxruntime/test/contrib_ops/greedy_search_test.cc
index f5259c1391f38..1baf50c1ba616 100644
--- a/onnxruntime/test/contrib_ops/greedy_search_test.cc
+++ b/onnxruntime/test/contrib_ops/greedy_search_test.cc
@@ -50,12 +50,26 @@ TEST(GreedySearchTest, GptGreedySearchFp16_VocabPadded) {
   const char* input_names[] = {"input_ids", "max_length", "min_length", "repetition_penalty"};
   const char* const output_names[] = {"sequences"};
 
-  constexpr int min_cuda_architecture = 530;
-  if (HasCudaEnvironment(min_cuda_architecture)) {
-    Ort::SessionOptions session_options;
 #ifdef USE_CUDA
-    Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0));
+  constexpr int min_cuda_architecture = 530;
+  bool is_cuda = HasCudaEnvironment(min_cuda_architecture);
+#else
+  bool is_cuda = false;
 #endif
+#ifdef USE_ROCM
+  bool is_rocm = true;
+#else
+  bool is_rocm = false;
+#endif
+
+  if (is_cuda || is_rocm) {
+    Ort::SessionOptions session_options;
+    if (is_cuda) {
+      Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0));
+    }
+    if (is_rocm) {
+      Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ROCM(session_options, 0));
+    }
 
     // The following model was obtained by padding the vocabulary size in testdata/transformers/tiny_gpt2_beamsearch_fp16.onnx
     // (by making beam_size == 1) from 1000 to 1600 (just for illustrative and testing purposes) to see if the greedy search
@@ -117,12 +131,26 @@ TEST(GreedySearchTest, GptGreedySearchFp32) {
   const char* input_names[] = {"input_ids", "max_length", "min_length", "repetition_penalty"};
   const char* const output_names[] = {"sequences"};
 
-  constexpr int min_cuda_architecture = 530;
-  if (HasCudaEnvironment(min_cuda_architecture)) {
-    Ort::SessionOptions session_options;
 #ifdef USE_CUDA
-    Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0));
+  constexpr int min_cuda_architecture = 530;
+  bool is_cuda = HasCudaEnvironment(min_cuda_architecture);
+#else
+  bool is_cuda = false;
 #endif
+#ifdef USE_ROCM
+  bool is_rocm = true;
+#else
+  bool is_rocm = false;
+#endif
+
+  if (is_cuda || is_rocm) {
+    Ort::SessionOptions session_options;
+    if (is_cuda) {
+      Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0));
+    }
+    if (is_rocm) {
+      Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ROCM(session_options, 0));
+    }
 
     Ort::Session session(*ort_env, ORT_TSTR("testdata/transformers/tiny_gpt2_greedysearch_with_init_decoder.onnx"), session_options);
 
diff --git a/onnxruntime/test/contrib_ops/matmul_4bits_test.cc b/onnxruntime/test/contrib_ops/matmul_4bits_test.cc
new file mode 100644
index 0000000000000..3c6217915bef0
--- /dev/null
+++ b/onnxruntime/test/contrib_ops/matmul_4bits_test.cc
@@ -0,0 +1,170 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#ifndef ORT_MINIMAL_BUILD
+
+#include "core/common/span_utils.h"
+#include "core/framework/tensor.h"
+#include "core/mlas/inc/mlas_q4.h"
+#include "core/mlas/inc/mlas.h"
+#include "core/session/inference_session.h"
+#include "test/common/tensor_op_test_utils.h"
+#include "test/framework/test_utils.h"
+#include "test/optimizer/graph_transform_test_builder.h"
+#include "test/providers/provider_test_utils.h"
+#include "test/util/include/default_providers.h"
+#include "core/util/qmath.h"
+
+#include <chrono>
+#include <random>
+
+#include "gtest/gtest.h"
+#include "gmock/gmock.h"
+
+namespace onnxruntime {
+namespace test {
+
+static constexpr int QBits = 4;
+
+void QuantizeDequantize(std::vector<float>& raw_vals,
+                        std::vector<uint8_t>& quant_vals,
+                        std::vector<float>& scales,
+                        std::vector<uint8_t>* zp,
+                        int32_t N,
+                        int32_t K,
+                        int32_t block_size) {
+  OrtThreadPoolParams to;
+  auto tp = concurrency::CreateThreadPool(&onnxruntime::Env::Default(), to,
+                                          concurrency::ThreadPoolType::INTRA_OP);
+
+  MlasQuantizeBlockwise<float, 4>(
+      quant_vals.data(),
+      scales.data(),
+      zp != nullptr ? zp->data() : nullptr,
+      raw_vals.data(),
+      block_size,
+      true,
+      K,
+      N,
+      N,
+      tp.get());
+
+  // Note that input1_f_vals is NxK after dequant
+  MlasDequantizeBlockwise<float, 4>(
+      raw_vals.data(),                       // dequantized output
+      quant_vals.data(),                     // quantized input
+      scales.data(),                         // quantization scales
+      zp != nullptr ? zp->data() : nullptr,  // quantization zero points
+      block_size,                            // quantization block size
+      true,                                  // columnwise quantization
+      K,                                     // number of rows
+      N,                                     // number of columns
+      tp.get());
+}
+
+void RunTest(int64_t M, int64_t N, int64_t K, int64_t block_size, bool has_zeropoint, bool use_float16) {
+  RandomValueGenerator random{1234};
+  std::vector<float> input0_vals(random.Gaussian<float>(std::vector<int64_t>({M, K}), 0.0f, 0.25f));
+  std::vector<float> input1_f_vals(random.Gaussian<float>(std::vector<int64_t>({K, N}), 0.0f, 0.25f));
+
+#if 0  // for Debugging
+  std::vector<float> input1_f_vals_trans(N * K);
+  MlasTranspose(input1_f_vals.data(), input1_f_vals_trans.data(), K, N);
+#endif
+
+  int q_rows, q_cols;
+  MlasBlockwiseQuantizedShape<float, 4>((int)block_size, true, (int)K, (int)N, q_rows, q_cols);
+
+  size_t q_data_size_in_bytes, q_scale_size, q_zp_size_in_bytes;
+  MlasBlockwiseQuantizedBufferSizes(4, static_cast<int>(block_size), /* columnwise */ true,
+                                    static_cast<int>(K), static_cast<int>(N),
+                                    q_data_size_in_bytes, q_scale_size, &q_zp_size_in_bytes);
+
+  std::vector<uint8_t> input1_vals(q_data_size_in_bytes);
+  std::vector<float> scales(q_scale_size);
+  std::vector<uint8_t> zp(q_zp_size_in_bytes);
+
+  QuantizeDequantize(input1_f_vals,
+                     input1_vals,
+                     scales,
+                     has_zeropoint ? &zp : nullptr,
+                     static_cast<int32_t>(N),
+                     static_cast<int32_t>(K),
+                     static_cast<int32_t>(block_size));
+
+  std::vector<float> expected_vals(M * N);
+  for (int64_t m = 0; m < M; m++) {
+    for (int64_t n = 0; n < N; n++) {
+      float sum = 0.0f;
+      for (int64_t k = 0; k < K; k++) {
+        sum += input0_vals[m * K + k] * input1_f_vals[n * K + k];
+      }
+      expected_vals[m * N + n] = sum;
+    }
+  }
+
+  OpTester test("MatMulNBits", 1, kMSDomain);
+  test.AddAttribute<int64_t>("K", K);
+  test.AddAttribute<int64_t>("N", N);
+  test.AddAttribute<int64_t>("block_size", block_size);
+  test.AddAttribute<int64_t>("bits", QBits);
+  if (use_float16) {
+    test.AddInput<MLFloat16>("A", {M, K}, ToFloat16(input0_vals), false);
+    test.AddInput<uint8_t>("B", {q_cols, q_rows}, input1_vals, true);
+    test.AddInput<MLFloat16>("scales", {static_cast<int64_t>(q_scale_size)}, ToFloat16(scales), true);
+    if (has_zeropoint) {
+      test.AddInput<uint8_t>("zero_points", {static_cast<int64_t>(q_zp_size_in_bytes)}, zp, true);
+    }
+
+    test.AddOutput<MLFloat16>("Y", {M, N}, ToFloat16(expected_vals));
+    test.SetOutputAbsErr("Y", 0.02f);
+
+    std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+    execution_providers.push_back(DefaultCudaExecutionProvider());
+    test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
+  } else {
+    test.AddInput<float>("A", {M, K}, input0_vals, false);
+    test.AddInput<uint8_t>("B", {q_cols, q_rows}, input1_vals, true);
+    test.AddInput<float>("scales", {static_cast<int64_t>(q_scale_size)}, scales, true);
+    if (has_zeropoint) {
+      test.AddInput<uint8_t>("zero_points", {static_cast<int64_t>(q_zp_size_in_bytes)}, zp, true);
+    }
+
+    test.AddOutput<float>("Y", {M, N}, expected_vals);
+
+    test.Run();
+  }
+}
+
+TEST(MatMulNBits, Float32) {
+  for (auto M : {1, 2, 100}) {
+    for (auto N : {1, 2, 32, 288}) {
+      for (auto K : {16, 32, 64, 128, 256, 1024, 93, 1234}) {
+        for (auto block_size : {16, 32, 64, 128}) {
+          RunTest(M, N, K, block_size, false, false);
+          RunTest(M, N, K, block_size, true, false);
+        }
+      }
+    }
+  }
+}
+
+#if defined(USE_CUDA)
+TEST(MatMulNBits, Float16) {
+  for (auto M : {1, 2, 100}) {
+    for (auto N : {1, 2, 32, 288}) {
+      for (auto K : {16, 32, 64, 128, 256, 1024, 93, 1234}) {
+        for (auto block_size : {16, 32, 64, 128}) {
+          RunTest(M, N, K, block_size, false, true);
+          RunTest(M, N, K, block_size, true, true);
+        }
+      }
+    }
+  }
+}
+
+#endif
+}  // namespace test
+}  // namespace onnxruntime
+
+#endif  // ORT_MINIMAL_BUILD
diff --git a/onnxruntime/test/contrib_ops/matmul_bnb4_test.cc b/onnxruntime/test/contrib_ops/matmul_bnb4_test.cc
new file mode 100644
index 0000000000000..e739b17d5885f
--- /dev/null
+++ b/onnxruntime/test/contrib_ops/matmul_bnb4_test.cc
@@ -0,0 +1,151 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#ifndef ORT_MINIMAL_BUILD
+
+#include "core/common/span_utils.h"
+#include "core/framework/tensor.h"
+#include "core/mlas/inc/mlas_q4.h"
+#include "core/mlas/inc/mlas.h"
+#include "core/session/inference_session.h"
+#include "test/common/tensor_op_test_utils.h"
+#include "test/framework/test_utils.h"
+#include "test/optimizer/graph_transform_test_builder.h"
+#include "test/providers/provider_test_utils.h"
+#include "test/util/include/default_providers.h"
+#include "core/util/qmath.h"
+#include "contrib_ops/cpu/quantization/dequantize_blockwise_bnb4.h"
+
+#include <chrono>
+#include <random>
+
+#include "gtest/gtest.h"
+#include "gmock/gmock.h"
+
+namespace onnxruntime {
+namespace test {
+
+void QuantizeDequantizeBnb4(std::vector<float>& raw_vals,  // N X K
+                            std::vector<uint8_t>& quant_vals,
+                            std::vector<float>& absmax,
+                            int32_t quant_type,
+                            int32_t N,
+                            int32_t K,
+                            int32_t block_size) {
+  OrtThreadPoolParams to;
+  auto tp = concurrency::CreateThreadPool(&onnxruntime::Env::Default(), to,
+                                          concurrency::ThreadPoolType::INTRA_OP);
+
+  contrib::QuantizeBlockwiseBnb4<float>(
+      quant_vals.data(),
+      raw_vals.data(),
+      absmax.data(),
+      block_size,
+      quant_type,
+      N,
+      K,
+      tp.get());
+
+  contrib::DequantizeBlockwiseBnb4<float>(
+      raw_vals.data(),
+      quant_vals.data(),
+      absmax.data(),
+      block_size,
+      quant_type,
+      N,
+      K,
+      tp.get());
+}
+
+void RunTest(int64_t quant_type, int64_t M, int64_t N, int64_t K, int64_t block_size, bool use_float16) {
+  RandomValueGenerator random{1234};
+  std::vector<float> input0_vals(random.Gaussian<float>(std::vector<int64_t>({M, K}), 0.0f, 0.25f));
+  // quantizer expects transposed weights, N X K
+  std::vector<float> input1_f_vals(random.Gaussian<float>(std::vector<int64_t>({N, K}), 0.0f, 0.25f));
+
+  int64_t numel = N * K;
+  int64_t quantized_numel = (numel + 1) / 2;
+  int64_t total_block_count = (numel + block_size - 1) / block_size;
+  std::vector<uint8_t> input1_vals(quantized_numel);
+  std::vector<float> absmax(total_block_count);
+
+  QuantizeDequantizeBnb4(input1_f_vals,
+                         input1_vals,
+                         absmax,
+                         static_cast<int32_t>(quant_type),
+                         static_cast<int32_t>(N),
+                         static_cast<int32_t>(K),
+                         static_cast<int32_t>(block_size));
+
+  std::vector<float> expected_vals(M * N);
+  for (int64_t m = 0; m < M; m++) {
+    for (int64_t n = 0; n < N; n++) {
+      float sum = 0.0f;
+      for (int64_t k = 0; k < K; k++) {
+        sum += input0_vals[m * K + k] * input1_f_vals[n * K + k];
+      }
+      expected_vals[m * N + n] = sum;
+    }
+  }
+
+  OpTester test("MatMulBnb4", 1, kMSDomain);
+  test.AddAttribute<int64_t>("K", K);
+  test.AddAttribute<int64_t>("N", N);
+  test.AddAttribute<int64_t>("block_size", block_size);
+  test.AddAttribute<int64_t>("quant_type", quant_type);
+  if (use_float16) {
+    test.AddInput<MLFloat16>("A", {M, K}, ToFloat16(input0_vals), false);
+    test.AddInput<uint8_t>("B", {quantized_numel}, input1_vals, true);
+    test.AddInput<MLFloat16>("absmax", {total_block_count}, ToFloat16(absmax), true);
+
+    test.AddOutput<MLFloat16>("Y", {M, N}, ToFloat16(expected_vals));
+    test.SetOutputAbsErr("Y", 0.02f);
+
+    std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+    execution_providers.push_back(DefaultCudaExecutionProvider());
+    test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
+  } else {
+    test.AddInput<float>("A", {M, K}, input0_vals, false);
+    test.AddInput<uint8_t>("B", {quantized_numel}, input1_vals, true);
+    test.AddInput<float>("absmax", {total_block_count}, absmax, true);
+
+    test.AddOutput<float>("Y", {M, N}, expected_vals);
+
+    test.Run();
+  }
+}
+
+TEST(MatMulBnb4, Float32) {
+  for (auto qt : {0, 1}) {
+    for (auto M : {1, 2, 100}) {
+      for (auto N : {1, 2, 32, 288}) {
+        for (auto K : {16, 32, 64, 128, 256, 1024, 93, 1234}) {
+          for (auto block_size : {16, 32, 64, 128}) {
+            RunTest(qt, M, N, K, block_size, false);
+          }
+        }
+      }
+    }
+  }
+}
+
+#if defined(USE_CUDA)
+TEST(MatMulBnb4, Float16) {
+  for (auto qt : {0, 1}) {
+    for (auto M : {1, 2, 100}) {
+      for (auto N : {1, 2, 32, 288}) {
+        for (auto K : {16, 32, 64, 128, 256, 1024, 93, 1234}) {
+          for (auto block_size : {16, 32, 64, 128}) {
+            RunTest(qt, M, N, K, block_size, true);
+          }
+        }
+      }
+    }
+  }
+}
+
+#endif
+}  // namespace test
+}  // namespace onnxruntime
+
+#endif  // ORT_MINIMAL_BUILD
diff --git a/onnxruntime/test/contrib_ops/matmul_fpq4_test.cc b/onnxruntime/test/contrib_ops/matmul_fpq4_test.cc
index dd886ed1c6f5b..09ae5eddb122c 100644
--- a/onnxruntime/test/contrib_ops/matmul_fpq4_test.cc
+++ b/onnxruntime/test/contrib_ops/matmul_fpq4_test.cc
@@ -24,7 +24,7 @@ namespace onnxruntime {
 namespace test {
 
 TEST(MatMulFpQ4, MatMul2DSym) {
-  // (100 x 41) X (41 x 288)
+  // (100 x 52) X (52 x 288)
   constexpr int64_t M = 100;
   constexpr int64_t N = 288;
   constexpr int64_t K = 52;
diff --git a/onnxruntime/test/contrib_ops/moe_test.cc b/onnxruntime/test/contrib_ops/moe_test.cc
new file mode 100644
index 0000000000000..ebb0261deefa5
--- /dev/null
+++ b/onnxruntime/test/contrib_ops/moe_test.cc
@@ -0,0 +1,423 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "gtest/gtest.h"
+#include "test/common/tensor_op_test_utils.h"
+#include "test/common/cuda_op_test_utils.h"
+#include "test/providers/provider_test_utils.h"
+
+namespace onnxruntime {
+namespace test {
+
+static void RunMoETest(
+    const std::vector<float>& input,
+    const std::vector<float>& router_probs,
+    const std::vector<float>& fc1_experts_weights,
+    const std::vector<float>& fc2_experts_weights,
+    const std::vector<float>& fc1_experts_bias,
+    const std::vector<float>& fc2_experts_bias,
+    const std::vector<float>& output_data,
+    int num_rows,
+    int num_experts,
+    int hidden_size,
+    int inter_size,
+    std::string activation_type,
+    bool use_float16 = false) {
+  int min_cuda_architecture = use_float16 ? 530 : 0;
+
+  bool enable_cuda = HasCudaEnvironment(min_cuda_architecture);
+  if (enable_cuda) {
+    OpTester tester("MoE", 1, onnxruntime::kMSDomain);
+    tester.AddAttribute<int64_t>("k", static_cast<int64_t>(1));
+    tester.AddAttribute<std::string>("activation_type", activation_type);
+
+    std::vector<int64_t> input_dims = {num_rows, hidden_size};
+    std::vector<int64_t> router_probs_dims = {num_rows, num_experts};
+    std::vector<int64_t> fc1_experts_weights_dims = {num_experts, hidden_size, inter_size};
+    std::vector<int64_t> fc2_experts_weights_dims = {num_experts, inter_size, hidden_size};
+    std::vector<int64_t> fc1_experts_bias_dims = {num_experts, inter_size};
+    std::vector<int64_t> fc2_experts_bias_dims = {num_experts, hidden_size};
+    std::vector<int64_t> output_dims = {num_rows, hidden_size};
+
+    if (use_float16) {
+      tester.AddInput<MLFloat16>("input", input_dims, ToFloat16(input));
+      tester.AddInput<MLFloat16>("router_probs", router_probs_dims, ToFloat16(router_probs));
+      tester.AddInput<MLFloat16>("fc1_experts_weights", fc1_experts_weights_dims, ToFloat16(fc1_experts_weights));
+      tester.AddInput<MLFloat16>("fc2_experts_weights", fc2_experts_weights_dims, ToFloat16(fc2_experts_weights));
+      tester.AddInput<MLFloat16>("fc1_experts_bias", fc1_experts_bias_dims, ToFloat16(fc1_experts_bias));
+      tester.AddInput<MLFloat16>("fc2_experts_bias", fc2_experts_bias_dims, ToFloat16(fc2_experts_bias));
+      tester.AddOutput<MLFloat16>("output", output_dims, ToFloat16(output_data));
+    } else {
+      tester.AddInput<float>("input", input_dims, input);
+      tester.AddInput<float>("router_probs", router_probs_dims, router_probs);
+      tester.AddInput<float>("fc1_experts_weights", fc1_experts_weights_dims, fc1_experts_weights);
+      tester.AddInput<float>("fc2_experts_weights", fc2_experts_weights_dims, fc2_experts_weights);
+      tester.AddInput<float>("fc1_experts_bias", fc1_experts_bias_dims, fc1_experts_bias);
+      tester.AddInput<float>("fc2_experts_bias", fc2_experts_bias_dims, fc2_experts_bias);
+      tester.AddOutput<float>("output", output_dims, output_data);
+    }
+
+    std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+    execution_providers.push_back(DefaultCudaExecutionProvider());
+    tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
+  }
+}
+
+TEST(MoETest, MoETest_Gelu) {
+  int num_rows = 4;
+  int num_experts = 4;
+  int hidden_size = 8;
+  int inter_size = 16;
+
+  const std::vector<float> input = {
+      -1.1200173f, -0.45884353f, -1.2929888f, 1.0784022f, 0.116372705f, 0.26902613f, -1.8818876f, -0.5457026f,
+      0.22222236f, -0.28868636f, 0.6692926f, 1.4944887f, 0.02431708f, -0.49781424f, 0.7378293f, 1.276276f,
+      -0.15469065f, -0.28456813f, -0.6296439f, -0.24855971f, 0.80565417f, -1.1018785f, -0.74082595f, 0.82407707f,
+      -0.95033455f, 0.659333f, -0.68629056f, -0.2916592f, 1.869919f, -1.1053563f, -0.14417848f, -0.34625578f};
+  const std::vector<float> router_probs = {
+      -0.84837115f, 0.100507565f, -0.10548311f, 0.40957215f, 1.0159845f, 0.26919764f, 0.021741152f, -0.34184334f,
+      -0.71324956f, 0.29018253f, -0.18227568f, 0.31496462f, -0.48426327f, -1.006643f, -0.100081146f, -0.07692295f};
+  const std::vector<float> fc1_experts_weights = {
+      0.14731085f, 0.52229995f, 0.14753294f, 0.22475791f, 0.20864725f, 0.6708725f, 0.20204341f, 0.4890914f,
+      0.52103406f, 0.8223115f, 0.122039974f, 0.15674388f, 0.20966923f, 0.8499667f, 0.3202675f, 0.92174435f,
+      0.6808038f, 0.563313f, 0.496278f, 0.40115923f, 0.5627332f, 0.38582766f, 0.49648678f, 0.5637965f,
+      0.10889745f, 0.23793429f, 0.90374637f, 0.09422666f, 0.4640969f, 0.99461937f, 0.6806185f, 0.5141565f,
+      0.066695035f, 0.74768895f, 0.14385962f, 0.35806787f, 0.33224183f, 0.4259563f, 0.50546914f, 0.91240376f,
+      0.5624194f, 0.9478464f, 0.8058562f, 0.18389302f, 0.72425205f, 0.14655197f, 0.28808743f, 0.64706135f,
+      0.66509604f, 0.875114f, 0.33904207f, 0.50080043f, 0.7574118f, 0.016453922f, 0.8614903f, 0.08653879f,
+      0.50689125f, 0.41499162f, 0.23666352f, 0.5660855f, 0.91345936f, 0.35384023f, 0.20315295f, 0.31508058f,
+      0.0044258237f, 0.725697f, 0.25986814f, 0.16632986f, 0.21194929f, 0.787478f, 0.76478684f, 0.8837609f,
+      0.68136156f, 0.33302015f, 0.36027592f, 0.647715f, 0.91101736f, 0.6359461f, 0.26342732f, 0.2649613f,
+      0.02726549f, 0.608024f, 0.21940875f, 0.054212093f, 0.93843824f, 0.1752944f, 0.44311923f, 0.64324677f,
+      0.51592916f, 0.16355914f, 0.09583914f, 0.8985412f, 0.58141935f, 0.91481227f, 0.3323797f, 0.6472777f,
+      0.3856619f, 0.47776443f, 0.1954779f, 0.66910046f, 0.65808296f, 0.4896857f, 0.38754892f, 0.1917851f,
+      0.8457724f, 0.12778795f, 0.70483273f, 0.33187324f, 0.258766f, 0.58982253f, 0.24027151f, 0.6152024f,
+      0.5981904f, 0.12875527f, 0.5832493f, 0.7129646f, 0.6979155f, 0.43706065f, 0.09010619f, 0.42292297f,
+      0.67365384f, 0.31756145f, 0.68979055f, 0.8329813f, 0.2389242f, 0.5049309f, 0.7067495f, 0.5391889f,
+      0.54176575f, 0.5624327f, 0.10692614f, 0.5392941f, 0.8462349f, 0.9505569f, 0.79387546f, 0.5670015f,
+      0.7335071f, 0.25676018f, 0.08565581f, 0.07003945f, 0.99880487f, 0.8173947f, 0.15438312f, 0.6956213f,
+      0.8775838f, 0.9998074f, 0.93719745f, 0.8873769f, 0.38537037f, 0.32452917f, 0.9105244f, 0.7801898f,
+      0.19911051f, 0.9495086f, 0.7415793f, 0.77256775f, 0.18661183f, 0.6434499f, 0.32471877f, 0.8906783f,
+      0.4100297f, 0.69465625f, 0.5888109f, 0.7127341f, 0.33008623f, 0.7437857f, 0.15076452f, 0.6129275f,
+      0.16170406f, 0.006731212f, 0.09847212f, 0.89473504f, 0.7705178f, 0.96910787f, 0.9005606f, 0.053477287f,
+      0.15878445f, 0.4192087f, 0.17528385f, 0.84719825f, 0.121996105f, 0.25604928f, 0.016954303f, 0.21612722f,
+      0.91123873f, 0.90938f, 0.85791886f, 0.88606364f, 0.94459325f, 0.3719685f, 0.72000104f, 0.9454652f,
+      0.6654094f, 0.9998382f, 0.75933146f, 0.81082416f, 0.32500392f, 0.73991376f, 0.5574533f, 0.38059133f,
+      0.21814507f, 0.21944171f, 0.11525959f, 0.83566517f, 0.8554656f, 0.44309366f, 0.210657f, 0.88645273f,
+      0.81974447f, 0.537167f, 0.26393235f, 0.9595239f, 0.70447034f, 0.12042731f, 0.97854143f, 0.8796869f,
+      0.31775457f, 0.78107727f, 0.21590549f, 0.42164284f, 0.9245506f, 0.52065957f, 0.14639091f, 0.33288354f,
+      0.36427742f, 0.4035356f, 0.5478503f, 0.9624148f, 0.5267702f, 0.19128f, 0.52562714f, 0.7397436f,
+      0.7480201f, 0.04303074f, 0.41052878f, 0.12842774f, 0.2866572f, 0.6801467f, 0.1449349f, 0.68586344f,
+      0.92438906f, 0.5327942f, 0.16675615f, 0.32085752f, 0.60918206f, 0.11884099f, 0.74840516f, 0.04606521f,
+      0.01935333f, 0.014169693f, 0.39856833f, 0.83621645f, 0.026760519f, 0.91559356f, 0.29998857f, 0.64644206f,
+      0.52280146f, 0.049140453f, 0.9146645f, 0.7692217f, 0.99699783f, 0.7526061f, 0.1699655f, 0.9172919f,
+      0.5268722f, 0.73710823f, 0.09908545f, 0.35618675f, 0.009061217f, 0.30525374f, 0.6078656f, 0.10741913f,
+      0.6593821f, 0.7684034f, 0.56965464f, 0.16545832f, 0.11234015f, 0.3457417f, 0.7194791f, 0.9931982f,
+      0.7875145f, 0.44369537f, 0.6753082f, 0.009468555f, 0.07294935f, 0.73330396f, 0.2167924f, 0.74054784f,
+      0.14703393f, 0.25234455f, 0.08815551f, 0.76092035f, 0.44905245f, 0.88480055f, 0.8094361f, 0.7766713f,
+      0.51607805f, 0.345411f, 0.39128417f, 0.5664503f, 0.74785477f, 0.14970505f, 0.91963893f, 0.44563496f,
+      0.08102721f, 0.22947109f, 0.94240886f, 0.9572636f, 0.036860168f, 0.85264915f, 0.7505796f, 0.79595923f,
+      0.9232646f, 0.23052484f, 0.6578879f, 0.7046166f, 0.35225332f, 0.66732657f, 0.3561433f, 0.80913067f,
+      0.3612727f, 0.31360215f, 0.6258745f, 0.6773468f, 0.25571418f, 0.54419917f, 0.78976786f, 0.45025164f,
+      0.65216696f, 0.3794065f, 0.6752498f, 0.1378029f, 0.2059856f, 0.24620473f, 0.95950544f, 0.36545795f,
+      0.49863482f, 0.25775224f, 0.99914503f, 0.9883351f, 0.122906685f, 0.09466505f, 0.12100351f, 0.49758863f,
+      0.37254804f, 0.17272717f, 0.32066393f, 0.59446543f, 0.23875463f, 0.61079127f, 0.38534206f, 0.25771832f,
+      0.56869274f, 0.9111291f, 0.16196036f, 0.5232172f, 0.31561613f, 0.99065316f, 0.025618374f, 0.0206694f,
+      0.9926925f, 0.18365502f, 0.5958617f, 0.45684695f, 0.3946715f, 0.3883261f, 0.8177203f, 0.5238985f,
+      0.013192713f, 0.20481992f, 0.32954985f, 0.7516082f, 0.17643315f, 0.9714598f, 0.38863534f, 0.410219f,
+      0.891779f, 0.75130385f, 0.92406017f, 0.7892222f, 0.34832305f, 0.1682638f, 0.46279848f, 0.9138188f,
+      0.3321901f, 0.036315024f, 0.7049642f, 0.9867357f, 0.3576584f, 0.08598822f, 0.046470165f, 0.6252997f,
+      0.46214014f, 0.24750638f, 0.60106593f, 0.6898794f, 0.8976595f, 0.8881911f, 0.42515814f, 0.059116423f,
+      0.048188448f, 0.9668448f, 0.7210276f, 0.7179537f, 0.06738949f, 0.96300787f, 0.97367156f, 0.95143014f,
+      0.07820749f, 0.3113383f, 0.1561181f, 0.9734828f, 0.28516f, 0.27172273f, 0.76195645f, 0.26870382f,
+      0.25373894f, 0.45626426f, 0.45194024f, 0.11051077f, 0.91683406f, 0.27943915f, 0.67735744f, 0.9348918f,
+      0.7521582f, 0.57078993f, 0.9254285f, 0.5672131f, 0.2686717f, 0.97299975f, 0.61834025f, 0.012159586f,
+      0.3576542f, 0.15941626f, 0.9383765f, 0.41742706f, 0.044237554f, 0.46856833f, 0.81400645f, 0.6299002f,
+      0.6581022f, 0.5464366f, 0.68640935f, 0.378174f, 0.3010999f, 0.032645762f, 0.12333155f, 0.71670127f,
+      0.20394331f, 0.57173324f, 0.6595957f, 0.53540194f, 0.17582512f, 0.9781642f, 0.20925027f, 0.9112503f,
+      0.10224587f, 0.37972575f, 0.7719844f, 0.29570967f, 0.9200215f, 0.15592176f, 0.080114245f, 0.27454042f,
+      0.5808252f, 0.96037793f, 0.26129955f, 0.6788141f, 0.37464648f, 0.39156884f, 0.8676517f, 0.112507045f,
+      0.55310667f, 0.9702046f, 0.4312939f, 0.88821906f, 0.3460216f, 0.9024811f, 0.016334832f, 0.42793816f,
+      0.4121768f, 0.6620425f, 0.6961637f, 0.88390845f, 0.425507f, 0.48017246f, 0.8424056f, 0.36471343f,
+      0.9383168f, 0.16709393f, 0.44589508f, 0.47314453f, 0.72310495f, 0.84183806f, 0.4207481f, 0.0857597f,
+      0.7477461f, 0.6495659f, 0.70084965f, 0.19156617f, 0.8217978f, 0.9735775f, 0.5433857f, 0.032975793f,
+      0.85099494f, 0.12927437f, 0.61493605f, 0.5726589f, 0.26598173f, 0.6740978f, 0.052783668f, 0.61387974f};
+  const std::vector<float> fc2_experts_weights = {
+      0.18302453f, 0.44593316f, 0.5643144f, 0.9259722f, 0.26143986f, 0.82031804f, 0.4364831f, 0.2625361f,
+      0.06460017f, 0.04124081f, 0.98830533f, 0.37530023f, 0.5249744f, 0.63555616f, 0.8398661f, 0.92673707f,
+      0.9055086f, 0.12955844f, 0.4198916f, 0.20413119f, 0.21432412f, 0.6186035f, 0.969324f, 0.099448025f,
+      0.80260223f, 0.24076664f, 0.40261286f, 0.89688545f, 0.38691485f, 0.5455279f, 0.15048373f, 0.92562044f,
+      0.43536508f, 0.13430476f, 0.64640516f, 0.14449131f, 0.10324633f, 0.5304596f, 0.8964218f, 0.358508f,
+      0.73533344f, 0.9296606f, 0.83163047f, 0.23771948f, 0.44519007f, 0.34265757f, 0.09793854f, 0.5002066f,
+      0.87621754f, 0.9212578f, 0.54665035f, 0.6135615f, 0.28353918f, 0.8774212f, 0.29194576f, 0.1526736f,
+      0.57699674f, 0.7996927f, 0.04920423f, 0.95198375f, 0.67986554f, 0.14969361f, 0.39229625f, 0.93378997f,
+      0.11638266f, 0.3538614f, 0.66399014f, 0.06195748f, 0.7740991f, 0.7602738f, 0.81010276f, 0.18122643f,
+      0.9980005f, 0.20361924f, 0.99917024f, 0.020154774f, 0.054515004f, 0.80709815f, 0.55225646f, 0.52884465f,
+      0.22312081f, 0.29026228f, 0.35380626f, 0.012922287f, 0.52598435f, 0.58842945f, 0.4995767f, 0.66146517f,
+      0.9744255f, 0.632942f, 0.3169638f, 0.29422665f, 0.18009722f, 0.15339059f, 0.41947508f, 0.4115672f,
+      0.72243124f, 0.2862816f, 0.89860183f, 0.14915991f, 0.5014211f, 0.94945997f, 0.99719256f, 0.21036887f,
+      0.5890645f, 0.55906135f, 0.26557416f, 0.32725257f, 0.635427f, 0.1523174f, 0.58249784f, 0.71636236f,
+      0.30296493f, 0.9153206f, 0.46709478f, 0.72685635f, 0.9951532f, 0.34716582f, 0.7717041f, 0.3569854f,
+      0.4269635f, 0.41526443f, 0.4968937f, 0.3111158f, 0.61719346f, 0.5188402f, 0.8169449f, 0.39879733f,
+      0.5501401f, 0.31400484f, 0.08127314f, 0.7023336f, 0.56397897f, 0.29975814f, 0.33094752f, 0.63076067f,
+      0.40959156f, 0.82673794f, 0.52832156f, 0.68886834f, 0.7178481f, 0.37731683f, 0.71633244f, 0.86896664f,
+      0.5230092f, 0.59784645f, 0.5181678f, 0.8461837f, 0.28890234f, 0.23421508f, 0.7178768f, 0.06484294f,
+      0.5080162f, 0.27005446f, 0.8300168f, 0.034480453f, 0.8031663f, 0.9946784f, 0.60117006f, 0.46668667f,
+      0.9921749f, 0.28632385f, 0.45993322f, 0.28104752f, 0.43097937f, 0.60866946f, 0.5667807f, 0.40556252f,
+      7.969141e-05f, 0.52560204f, 0.48518902f, 0.5752184f, 0.8831251f, 0.9860047f, 0.20335877f, 0.46882278f,
+      0.2996632f, 0.03917718f, 0.13617045f, 0.96928054f, 0.79153055f, 0.76857555f, 0.7778716f, 0.102760494f,
+      0.5525096f, 0.9653573f, 0.22095704f, 0.94479716f, 0.63141924f, 0.8517718f, 0.28580618f, 0.73050886f,
+      0.05675614f, 0.46825224f, 0.6667756f, 0.6499472f, 0.91840404f, 0.99132854f, 0.9548785f, 0.8356961f,
+      0.851531f, 0.43548512f, 0.111976564f, 0.31438643f, 0.44386774f, 0.22980672f, 0.75558543f, 0.6755136f,
+      0.58067596f, 0.62078035f, 0.93922615f, 0.6821157f, 0.061530292f, 0.13705963f, 0.7203748f, 0.5681396f,
+      0.7438458f, 0.0006400347f, 0.038565338f, 0.8066132f, 0.81982285f, 0.047644496f, 0.68979263f, 0.109577894f,
+      0.8786539f, 0.6568952f, 0.99439347f, 0.0070040226f, 0.018661916f, 0.838051f, 0.94391155f, 0.80634f,
+      0.8324149f, 0.078864336f, 0.8619068f, 0.027926445f, 0.61170083f, 0.17248261f, 0.30140227f, 0.5885344f,
+      0.30341f, 0.42088854f, 0.02608782f, 0.02856338f, 0.69368154f, 0.28836077f, 0.19580519f, 0.30270886f,
+      0.09121573f, 0.100299895f, 0.79918617f, 0.75412107f, 0.56660175f, 0.22687018f, 0.6663505f, 0.5224626f,
+      0.1426636f, 0.6075949f, 0.95527196f, 0.008196831f, 0.0028039217f, 0.5640625f, 0.87651116f, 0.19575512f,
+      0.61006856f, 0.85149264f, 0.6541582f, 0.6082054f, 0.998863f, 0.82573634f, 0.21878648f, 0.54321826f,
+      0.7554362f, 0.94095474f, 0.002533555f, 0.77075267f, 0.35483408f, 0.010389388f, 0.610987f, 0.22779316f,
+      0.5708561f, 0.17537653f, 0.12373549f, 0.4575745f, 0.33203715f, 0.79243237f, 0.54310906f, 0.8902793f,
+      0.5937015f, 0.33921933f, 0.8386668f, 0.52732253f, 0.59384584f, 0.3391887f, 0.5017944f, 0.40386343f,
+      0.45749134f, 0.110060334f, 0.49692506f, 0.084977865f, 0.3924346f, 0.7897731f, 0.15232486f, 0.16297412f,
+      0.37791175f, 0.36293298f, 0.5846437f, 0.5830078f, 0.75354826f, 0.15555972f, 0.4647144f, 0.7796456f,
+      0.93248576f, 0.46352726f, 0.2106899f, 0.6437313f, 0.78473866f, 0.18762505f, 0.20985329f, 0.7209991f,
+      0.464967f, 0.02775067f, 0.21170747f, 0.7027664f, 0.33041215f, 0.8451145f, 0.89526993f, 0.57273495f,
+      0.46046263f, 0.34128642f, 0.47471708f, 0.59101045f, 0.11807448f, 0.38050216f, 0.08409953f, 0.80687743f,
+      0.18158185f, 0.9567719f, 0.3711096f, 0.21356237f, 0.74022657f, 0.57453954f, 0.846228f, 0.70873487f,
+      0.018330276f, 0.8162452f, 0.40584308f, 0.27901447f, 0.81752694f, 0.86466515f, 0.060534656f, 0.45478833f,
+      0.9106033f, 0.6936434f, 0.92123467f, 0.32865065f, 0.22417879f, 0.9299548f, 0.70841146f, 0.97999126f,
+      0.2911517f, 0.17896658f, 0.44139355f, 0.029210031f, 0.6959876f, 0.8687942f, 0.62002844f, 0.45059657f,
+      0.74790317f, 0.18262434f, 0.98912156f, 0.0028281808f, 0.021027386f, 0.38184917f, 0.90842223f, 0.5500629f,
+      0.69202286f, 0.13349658f, 0.6823429f, 0.44412827f, 0.7004118f, 0.8531213f, 0.7173401f, 0.4574679f,
+      0.46920043f, 0.18640989f, 0.31914896f, 0.82491904f, 0.29950172f, 0.8105199f, 0.30173403f, 0.38355058f,
+      0.5106411f, 0.04116726f, 0.49500751f, 0.44960213f, 0.45508182f, 0.4000479f, 0.89418864f, 0.8689936f,
+      0.16112137f, 0.7322634f, 0.10780871f, 0.07433933f, 0.652841f, 0.50734824f, 0.26674682f, 0.017748117f,
+      0.30643195f, 0.66699976f, 0.03719926f, 0.014267266f, 0.56343627f, 0.13979793f, 0.061959863f, 0.3073569f,
+      0.41949958f, 0.045647383f, 0.16613615f, 0.5327839f, 0.028514147f, 0.4297228f, 0.17714864f, 0.15338135f,
+      0.6965155f, 0.11515516f, 0.1210829f, 0.78514075f, 0.59348315f, 0.9553564f, 0.36635226f, 0.25849247f,
+      0.45372677f, 0.5025297f, 0.88132215f, 0.0019600391f, 0.46439964f, 0.7211761f, 0.22465849f, 0.2459296f,
+      0.7416339f, 0.020907402f, 0.6184779f, 0.112906754f, 0.7485309f, 0.072479784f, 0.8074024f, 0.026683688f,
+      0.07971662f, 0.50736845f, 0.8939942f, 0.0718022f, 0.27697015f, 0.9391413f, 0.4161513f, 0.7071423f,
+      0.019000888f, 0.34275955f, 0.24608392f, 0.9215306f, 0.70751995f, 0.13516217f, 0.5806135f, 0.49425328f,
+      0.29456508f, 0.21446168f, 0.3340807f, 0.89411324f, 0.14157385f, 0.14382833f, 0.34574044f, 0.50869817f,
+      0.63610595f, 0.51500404f, 0.37963718f, 0.19682491f, 0.41028368f, 0.29872334f, 0.9039644f, 0.013295233f,
+      0.1810705f, 0.093204916f, 0.4086216f, 0.8896367f, 0.9382696f, 0.06472236f, 0.47833657f, 0.7934831f,
+      0.7203987f, 0.9095519f, 0.4861309f, 0.16405362f, 0.83076525f, 0.3285427f, 0.7588931f, 0.37678176f,
+      0.71254706f, 0.949713f, 0.96492773f, 0.044967473f, 0.16925985f, 0.2932666f, 0.18114948f, 0.97975004f,
+      0.4558406f, 0.16832972f, 0.27750528f, 0.2238177f, 0.7039947f, 0.06387442f, 0.033798456f, 0.007119417f};
+  const std::vector<float> fc1_experts_bias = {
+      0.71526206f, 0.7472273f, 0.18946046f, 0.6239893f, 0.86909235f, 0.5726507f, 0.3942092f, 0.5369412f,
+      0.44638616f, 0.7517496f, 0.16049433f, 0.75355124f, 0.7818118f, 0.19706267f, 0.9082818f, 0.9910924f,
+      0.30288565f, 0.3599528f, 0.74917775f, 0.10828978f, 0.697729f, 0.61665237f, 0.81516486f, 0.0656966f,
+      0.0846076f, 0.72456455f, 0.6801054f, 0.034616888f, 0.22117025f, 0.042510748f, 0.14178854f, 0.27440017f,
+      0.91376925f, 0.40047455f, 0.7871756f, 0.97484046f, 0.7278661f, 0.052394807f, 0.75161135f, 0.6907173f,
+      0.8875328f, 0.0067828894f, 0.807508f, 0.9092707f, 0.034817636f, 0.55231315f, 0.92683655f, 0.13634592f,
+      0.66405964f, 0.7209387f, 0.63104504f, 0.9971379f, 0.9093898f, 0.9289774f, 0.4376766f, 0.9193563f,
+      0.03404367f, 0.23018533f, 0.39305943f, 0.3514716f, 0.96184736f, 0.73583263f, 0.8219065f, 0.8401047f};
+  const std::vector<float> fc2_experts_bias = {
+      0.12649822f, 0.4420895f, 0.5730123f, 0.63004625f, 0.7571163f, 0.3010466f, 0.3492328f, 0.91837066f,
+      0.36580783f, 0.15267932f, 0.8390199f, 0.83857775f, 0.34321654f, 0.40003997f, 0.13106f, 0.08245313f,
+      0.68802476f, 0.28640372f, 0.89804775f, 0.09964341f, 0.43088746f, 0.5107959f, 0.75697356f, 0.90466535f,
+      0.83860224f, 0.720098f, 0.2705031f, 0.14292616f, 0.052693605f, 0.5248023f, 0.9849401f, 0.40502876f};
+  const std::vector<float> output = {
+      0.2552814f, 0.17651685f, 0.0034551744f, -0.123282805f, 0.0073816925f, 0.004265253f, 0.16927283f, -0.05276826f,
+      9.555821f, 7.6907287f, 10.626425f, 7.0543795f, 8.10093f, 10.3664465f, 10.925815f, 8.737018f,
+      0.565234f, 0.17098689f, 0.10810414f, 0.43916586f, 0.3535297f, 0.45673048f, 0.3853893f, 0.18613164f,
+      1.3354061f, 0.5049282f, 0.72775036f, 0.90331376f, 1.2945517f, 0.9123066f, 1.1995136f, 0.7708638f};
+
+  RunMoETest(input,
+             router_probs,
+             fc1_experts_weights,
+             fc2_experts_weights,
+             fc1_experts_bias,
+             fc2_experts_bias,
+             output,
+             num_rows,
+             num_experts,
+             hidden_size,
+             inter_size,
+             "gelu");
+}
+
+TEST(MoETest, MoETest_Relu) {
+  int num_rows = 4;
+  int num_experts = 4;
+  int hidden_size = 8;
+  int inter_size = 16;
+
+  const std::vector<float> input = {
+      0.7670296f, -0.93721074f, -2.330477f, -0.78088343f, 0.8250065f, 1.2206652f, -0.06297584f, 1.1463639f,
+      1.2215378f, -0.31372663f, -0.7234253f, -0.3627346f, 0.44249064f, 0.19418247f, -0.49998695f, -0.55005103f,
+      0.023851749f, -1.5203826f, 0.52939993f, -0.39082858f, -1.9291036f, 0.034976702f, -0.48336256f, -1.226073f,
+      -0.33963847f, 0.0073261578f, -0.0521804f, 1.16749f, 1.7302082f, 2.0561688f, -0.2347232f, -1.3456243f};
+  const std::vector<float> router_probs = {
+      -0.08146476f, -0.40439552f, 1.0100367f, -0.7724162f, -0.08113786f, -0.36328858f, 0.3688482f, -0.013465762f,
+      -0.32420647f, -0.3815508f, 0.79585606f, 0.14430691f, -0.21869831f, 0.11483674f, -0.11992836f, 0.35216537f};
+  const std::vector<float> fc1_experts_weights = {
+      0.81960344f, 0.9296998f, 0.45050132f, 0.38805157f, 0.50729614f, 0.47014588f, 0.62020564f, 0.6401168f,
+      0.045871615f, 0.31548113f, 0.92106473f, 0.6947775f, 0.4751312f, 0.19854712f, 0.19409746f, 0.052116573f,
+      0.3370188f, 0.6688521f, 0.8188108f, 0.73084867f, 0.058027983f, 0.19931877f, 0.42109168f, 0.98367476f,
+      0.57232875f, 0.37051463f, 0.7068576f, 0.30955923f, 0.17637217f, 0.8649436f, 0.2726491f, 0.39976662f,
+      0.0025978684f, 0.8346353f, 0.8788173f, 0.6822241f, 0.1513629f, 0.0065300465f, 0.093910515f, 0.8728501f,
+      0.7400529f, 0.9207522f, 0.76193494f, 0.6265461f, 0.49510366f, 0.11974698f, 0.07161391f, 0.032325685f,
+      0.704681f, 0.254516f, 0.3993737f, 0.21224737f, 0.40888822f, 0.14808255f, 0.17329216f, 0.6658554f,
+      0.3514018f, 0.8086716f, 0.33959562f, 0.13321638f, 0.41178054f, 0.2576263f, 0.3470292f, 0.024002194f,
+      0.77974546f, 0.15189773f, 0.75130886f, 0.7268921f, 0.85721636f, 0.11647397f, 0.8595984f, 0.2636242f,
+      0.6855346f, 0.96955734f, 0.42948407f, 0.49613327f, 0.38488472f, 0.08250773f, 0.73995143f, 0.003641069f,
+      0.81039995f, 0.87411255f, 0.9728532f, 0.38206023f, 0.08917904f, 0.61241513f, 0.77621365f, 0.0023456216f,
+      0.38650817f, 0.20027226f, 0.45626813f, 0.25389326f, 0.2956162f, 0.34127057f, 0.024847984f, 0.91025376f,
+      0.9191656f, 0.42156547f, 0.44305897f, 0.29594004f, 0.04846859f, 0.013427794f, 0.6858292f, 0.22547692f,
+      0.17856151f, 0.4609884f, 0.33349442f, 0.3382396f, 0.5160656f, 0.3939438f, 0.3278438f, 0.26059705f,
+      0.0930863f, 0.9192536f, 0.29990643f, 0.63248974f, 0.32651705f, 0.54063064f, 0.9661502f, 0.73036134f,
+      0.06670016f, 0.6984514f, 0.9746214f, 0.63154167f, 0.83521235f, 0.99294376f, 0.4233855f, 0.6037772f,
+      0.15248245f, 0.39696145f, 0.8702919f, 0.7563229f, 0.18360549f, 0.099057496f, 0.15831816f, 0.00656116f,
+      0.114180505f, 0.3763513f, 0.8374386f, 0.5836911f, 0.11969727f, 0.09888804f, 0.74873763f, 0.12807935f,
+      0.43843627f, 0.739853f, 0.26859397f, 0.44548005f, 0.45647776f, 0.38170832f, 0.24648392f, 0.054280818f,
+      0.0958215f, 0.23226917f, 0.98291886f, 0.25849265f, 0.16423601f, 0.6211971f, 0.63780516f, 0.77395487f,
+      0.8800602f, 0.7784371f, 0.004249513f, 0.5443443f, 0.80287653f, 0.45378727f, 0.20536041f, 0.9766699f,
+      0.31298608f, 0.21532774f, 0.04922247f, 0.52233416f, 0.72156656f, 0.6106814f, 0.59887487f, 0.12080628f,
+      0.03305638f, 0.5088047f, 0.95591706f, 0.7884607f, 0.20888287f, 0.43509573f, 0.13140821f, 0.2587883f,
+      0.5905492f, 0.77226925f, 0.91418463f, 0.04094696f, 0.8343076f, 0.14735395f, 0.6872336f, 0.92312264f,
+      0.5070212f, 0.9549045f, 0.07397425f, 0.3090204f, 0.79162645f, 0.39106607f, 0.39764988f, 0.29160416f,
+      0.84465307f, 0.7452516f, 0.66022503f, 0.21901816f, 0.09412521f, 0.5540803f, 0.6481394f, 0.26914406f,
+      0.36010116f, 0.83768386f, 0.53982985f, 0.52255917f, 0.37694973f, 0.04720515f, 0.029871285f, 0.26099247f,
+      0.2458393f, 0.6557768f, 0.35444462f, 0.30438894f, 0.9767149f, 0.67416143f, 0.85645115f, 0.25794363f,
+      0.2957666f, 0.68377024f, 0.16686243f, 0.17314798f, 0.47585016f, 0.31711966f, 0.125171f, 0.7965795f,
+      0.90208143f, 0.58111167f, 0.41294336f, 0.036863506f, 0.31788063f, 0.6272928f, 0.73576546f, 0.43679124f,
+      0.30232358f, 0.77861303f, 0.10180014f, 0.816009f, 0.30602258f, 0.5076527f, 0.40119207f, 0.5606195f,
+      0.3489008f, 0.8635635f, 0.48700142f, 0.89029974f, 0.98074025f, 0.25640452f, 0.13524544f, 0.901151f,
+      0.89180696f, 0.11822635f, 0.46134835f, 0.006936848f, 0.09070045f, 0.59657127f, 0.6330173f, 0.6059905f,
+      0.36391765f, 0.96128887f, 0.571489f, 0.2049576f, 0.4716931f, 0.6200726f, 0.67509633f, 0.14645958f,
+      0.6873948f, 0.24455917f, 0.08452982f, 0.22689629f, 0.9822047f, 0.9274289f, 0.9477422f, 0.7935056f,
+      0.87772477f, 0.43307513f, 0.22488606f, 0.7498283f, 0.24090862f, 0.16256708f, 0.34033298f, 0.6049296f,
+      0.7573983f, 0.3057955f, 0.20571685f, 0.56744653f, 0.2052834f, 0.17446929f, 0.76062596f, 0.4160077f,
+      0.9568925f, 0.9863913f, 0.64955276f, 0.67207885f, 0.61514187f, 0.50783044f, 0.46363378f, 0.50687206f,
+      0.6867124f, 0.9648854f, 0.37042046f, 0.2886421f, 0.37891757f, 0.25843787f, 0.58501935f, 0.8732242f,
+      0.8909887f, 0.72956276f, 0.13203424f, 0.23164761f, 0.3901443f, 0.40783793f, 0.54112387f, 0.041014254f,
+      0.65562236f, 0.11856395f, 0.18362767f, 0.08430874f, 0.9356598f, 0.026530087f, 0.8771834f, 0.48319155f,
+      0.4418506f, 0.81273925f, 0.4537862f, 0.81357706f, 0.8615075f, 0.06589496f, 0.692392f, 0.5943895f,
+      0.60750586f, 0.5729957f, 0.6367655f, 0.2594666f, 0.43602943f, 0.97506f, 0.83592474f, 0.48121578f,
+      0.029734552f, 0.5219139f, 0.15951324f, 0.90659577f, 0.19645631f, 0.4638992f, 0.38902867f, 0.5889769f,
+      0.9705138f, 0.5475096f, 0.789582f, 0.8881108f, 0.9036556f, 0.32732427f, 0.38817167f, 0.7409689f,
+      0.36356616f, 0.734132f, 0.39076614f, 0.16087383f, 0.70352167f, 0.576659f, 0.7229242f, 0.996743f,
+      0.84136647f, 0.97399056f, 0.5267614f, 0.06989372f, 0.14923638f, 0.18941313f, 0.059375823f, 0.24937624f,
+      0.039716125f, 0.038692355f, 0.20122272f, 0.0070830584f, 0.19309378f, 0.69065434f, 0.9170264f, 0.3512686f,
+      0.3545606f, 0.76697665f, 0.25331455f, 0.26358372f, 0.80806476f, 0.064349174f, 0.5611374f, 0.941691f,
+      0.58574325f, 0.6359719f, 0.20880443f, 0.49310172f, 0.5274922f, 0.62271714f, 0.694273f, 0.9344639f,
+      0.11835027f, 0.51498765f, 0.25018185f, 0.10446805f, 0.45996118f, 0.059881568f, 0.8489496f, 0.5579074f,
+      0.23052096f, 0.76128954f, 0.02678603f, 0.3066004f, 0.40259063f, 0.07512486f, 0.18205583f, 0.4183907f,
+      0.8793823f, 0.9828271f, 0.8181312f, 0.20143801f, 0.17288941f, 0.9363466f, 0.6768587f, 0.51328385f,
+      0.56766605f, 0.098151624f, 0.33305728f, 0.98130906f, 0.3766839f, 0.47491795f, 0.08483446f, 0.22029644f,
+      0.4897902f, 0.18942028f, 0.4379952f, 0.7034796f, 0.0109113455f, 0.64850605f, 0.16939592f, 0.25597447f,
+      0.69195485f, 0.8975601f, 0.36334568f, 0.29471546f, 0.04788208f, 0.24217117f, 0.062181532f, 0.38556474f,
+      0.6020277f, 0.03156215f, 0.93655676f, 0.81369543f, 0.010527074f, 0.2611835f, 0.6630776f, 0.3972702f,
+      0.44551176f, 0.27424216f, 0.9016098f, 0.22050089f, 0.9146384f, 0.53226113f, 0.6005109f, 0.8900659f,
+      0.4176172f, 0.21532834f, 0.4191329f, 0.9055267f, 0.12900633f, 0.6134902f, 0.008604288f, 0.76215106f,
+      0.68473387f, 0.5211961f, 0.71459657f, 0.50056237f, 0.7766764f, 0.10418975f, 0.42657375f, 0.7218073f,
+      0.9979084f, 0.7546957f, 0.1364128f, 0.8845484f, 0.38850087f, 0.39324278f, 0.04554516f, 0.42129284f,
+      0.8536634f, 0.5697224f, 0.20877302f, 0.65390605f, 0.3396778f, 0.956497f, 0.066022694f, 0.34206223f,
+      0.017213225f, 0.3030849f, 0.6576238f, 0.9813073f, 0.58397317f, 0.99017924f, 0.59782606f, 0.788768f,
+      0.9008311f, 0.91796166f, 0.22013813f, 0.959695f, 0.80288273f, 0.2662105f, 0.26139832f, 0.080626905f};
+  const std::vector<float> fc2_experts_weights = {
+      0.6255686f, 0.09472537f, 0.71121234f, 0.65789884f, 0.065598905f, 0.63625044f, 0.45933473f, 0.7284089f,
+      0.7868948f, 0.0029274821f, 0.95854944f, 0.919321f, 0.6989418f, 0.043019474f, 0.32138962f, 0.35509557f,
+      0.37150103f, 0.78196156f, 0.6817853f, 0.89608955f, 0.31273842f, 0.6682699f, 0.6778976f, 0.08370459f,
+      0.014990091f, 0.24055547f, 0.84227383f, 0.029270172f, 0.0647831f, 0.7801003f, 0.7697645f, 0.91119635f,
+      0.12253064f, 0.13405013f, 0.75649333f, 0.9348151f, 0.7991694f, 0.57832605f, 0.66478735f, 0.97456336f,
+      0.17739785f, 0.2729941f, 0.8497335f, 0.15788019f, 0.22429371f, 0.86499554f, 0.65776104f, 0.661535f,
+      0.2880798f, 0.49309975f, 0.9576164f, 0.19988996f, 0.5039311f, 0.73779976f, 0.15482187f, 0.98558843f,
+      0.25019473f, 0.379932f, 0.36471486f, 0.17417055f, 0.009367704f, 0.7819258f, 0.63283706f, 0.031699598f,
+      0.1781866f, 0.994184f, 0.6911175f, 0.7006223f, 0.20085096f, 0.28080195f, 0.42452294f, 0.40856004f,
+      0.15737581f, 0.5411925f, 0.549694f, 0.4366895f, 0.5693159f, 0.3018247f, 0.63012594f, 0.6885702f,
+      0.2366305f, 0.004210472f, 0.7617172f, 0.61926836f, 0.24570602f, 0.981851f, 0.273876f, 0.8378734f,
+      0.75366426f, 0.080795944f, 0.82247066f, 0.040263534f, 0.22299266f, 0.41664255f, 0.16297674f, 0.98845494f,
+      0.39971018f, 0.69859487f, 0.053544044f, 0.7878332f, 0.34460813f, 0.11966437f, 0.5731115f, 0.7422309f,
+      0.93269855f, 0.19460368f, 0.25394785f, 0.59613144f, 0.6356306f, 0.6922361f, 0.7744376f, 0.38662314f,
+      0.7777848f, 0.8686458f, 0.36938924f, 0.8557286f, 0.74428976f, 0.9410264f, 0.21586305f, 0.2530955f,
+      0.35543054f, 0.52536315f, 0.8000995f, 0.21456867f, 0.750327f, 0.3208093f, 0.80205464f, 0.47626138f,
+      0.061956525f, 0.22487706f, 0.13812399f, 0.74798125f, 0.1647259f, 0.45834088f, 0.6078779f, 0.22580266f,
+      0.644235f, 0.011788309f, 0.14224577f, 0.0469383f, 0.34876132f, 0.3178513f, 0.5715967f, 0.40754277f,
+      0.735041f, 0.9583977f, 0.67939556f, 0.30301625f, 0.031807184f, 0.68110096f, 0.25227106f, 0.75443816f,
+      0.83424246f, 0.69286025f, 0.9691554f, 0.9748982f, 0.60586995f, 0.13568163f, 0.94672066f, 0.26275212f,
+      0.2638232f, 0.9183893f, 0.88740516f, 0.65107566f, 0.5313419f, 0.07941705f, 0.44809794f, 0.9795632f,
+      0.6273294f, 0.542809f, 0.3961745f, 0.32560885f, 0.79801136f, 0.53083426f, 0.8252871f, 0.4115007f,
+      0.7184546f, 0.70638496f, 0.57973206f, 0.8141865f, 0.81332296f, 0.96346164f, 0.88438797f, 0.37215167f,
+      0.0766899f, 0.5914087f, 0.49563587f, 0.3695873f, 0.41627264f, 0.5235164f, 0.86481494f, 0.6558706f,
+      0.32245284f, 0.29438752f, 0.37618434f, 0.3067485f, 0.9496114f, 0.76482266f, 0.95148784f, 0.5015968f,
+      0.60083544f, 0.67338234f, 0.026723444f, 0.5446483f, 0.466555f, 0.21967298f, 0.112026334f, 0.9426372f,
+      0.906533f, 0.73173434f, 0.97712487f, 0.29709607f, 0.41363865f, 0.6893093f, 0.4173867f, 0.4018826f,
+      0.086719275f, 0.63433063f, 0.1978364f, 0.5181831f, 0.9874878f, 0.34609234f, 0.34240413f, 0.8016564f,
+      0.31617337f, 0.4570613f, 0.96686924f, 0.29501313f, 0.14229488f, 0.22017813f, 0.36137718f, 0.26275063f,
+      0.24053413f, 0.70197225f, 0.58496886f, 0.33996922f, 0.11154431f, 0.34257007f, 0.28898042f, 0.33729053f,
+      0.048938513f, 0.60771453f, 0.13263822f, 0.11060041f, 0.091483414f, 0.70869184f, 0.19898665f, 0.29362458f,
+      0.8919203f, 0.7654821f, 0.7866956f, 0.02524674f, 0.1414501f, 0.3112445f, 0.9130488f, 0.5511502f,
+      0.12605143f, 0.5031309f, 0.11166459f, 0.39045036f, 0.36251247f, 0.9328308f, 0.65486836f, 0.41281444f,
+      0.5844644f, 0.35566723f, 0.6964502f, 0.6977819f, 0.63427305f, 0.30511153f, 0.92657536f, 0.42781502f,
+      0.30534166f, 0.813157f, 0.90752834f, 0.9975799f, 0.64812917f, 0.32955307f, 0.753946f, 0.92897725f,
+      0.009582937f, 0.43805653f, 0.15901726f, 0.5931799f, 0.7067924f, 0.39670604f, 0.45817143f, 0.7250554f,
+      0.41596514f, 0.08011025f, 0.900068f, 0.24834275f, 0.44507074f, 0.5471632f, 0.46995157f, 0.029657006f,
+      0.7294f, 0.27288425f, 0.2406702f, 0.6194577f, 0.23906898f, 0.26892018f, 0.33152503f, 0.3121612f,
+      0.29118127f, 0.36515707f, 0.6299379f, 0.095391035f, 0.19735986f, 0.5072957f, 0.56953406f, 0.77614623f,
+      0.14877802f, 0.65959847f, 0.7841949f, 0.7776301f, 0.03428924f, 0.3091979f, 0.07021719f, 0.18359429f,
+      0.77849144f, 0.42534047f, 0.7123557f, 0.20649683f, 0.57597995f, 0.19757104f, 0.749946f, 0.2813105f,
+      0.37462044f, 0.06618434f, 0.50165176f, 0.9747401f, 0.7426891f, 0.23322952f, 0.50672436f, 0.44517577f,
+      0.09746289f, 0.89204556f, 0.50806034f, 0.6052985f, 0.2980855f, 0.26604044f, 0.5824448f, 0.68485546f,
+      0.612149f, 0.25902748f, 0.9854489f, 0.4263978f, 0.19379246f, 0.26614368f, 0.9922104f, 0.5000241f,
+      0.4321279f, 0.2919191f, 0.3689273f, 0.078885734f, 0.10265827f, 0.79264474f, 0.9277247f, 0.9771502f,
+      0.13902885f, 0.77043164f, 0.19051671f, 0.7982801f, 0.86077714f, 0.8869355f, 0.86002564f, 0.81278664f,
+      0.5097318f, 0.7297412f, 0.32111454f, 0.7177174f, 0.33929902f, 0.49160433f, 0.064810574f, 0.3692627f,
+      0.23706353f, 0.3313396f, 0.18070674f, 0.05027789f, 0.53255826f, 0.8244896f, 0.9553747f, 0.7917771f,
+      0.24083132f, 0.005495131f, 0.6896569f, 0.78015697f, 0.07074398f, 0.67929304f, 0.9227386f, 0.5302883f,
+      0.19877058f, 0.90993816f, 0.71350795f, 0.8311006f, 0.16185725f, 0.79097277f, 0.15846318f, 0.99474716f,
+      0.28815013f, 0.80128354f, 0.6001208f, 0.63250524f, 0.4233225f, 0.7053677f, 0.29161406f, 0.028710365f,
+      0.30789846f, 0.8917693f, 0.36836517f, 0.6571592f, 0.3151368f, 0.8750746f, 0.7992451f, 0.6765068f,
+      0.24441916f, 0.091435075f, 0.5188247f, 0.20667112f, 0.9110969f, 0.019512117f, 0.72343415f, 0.998457f,
+      0.7504142f, 0.6704894f, 0.01892668f, 0.9809466f, 0.41447622f, 0.032795787f, 0.9935814f, 0.29653466f,
+      0.4646262f, 0.95763975f, 0.15339965f, 0.14625502f, 0.58130866f, 0.43307304f, 0.6151709f, 0.08064735f,
+      0.5149533f, 0.27762014f, 0.25419557f, 0.04218155f, 0.7651092f, 0.59631824f, 0.077278376f, 0.89677596f,
+      0.6508104f, 0.5927816f, 0.2064318f, 0.57540226f, 0.9817701f, 0.84294224f, 0.11056489f, 0.9564106f,
+      0.5387549f, 0.74048257f, 0.88833815f, 0.9262546f, 0.11023259f, 0.93783194f, 0.16041255f, 0.53748304f,
+      0.1506182f, 0.39038336f, 0.47727865f, 0.44018233f, 0.42101204f, 0.53943527f, 0.99320936f, 0.79050577f,
+      0.77973497f, 0.7001237f, 0.88709056f, 0.4769255f, 0.5397561f, 0.60289854f, 0.06393474f, 0.09722155f,
+      0.5613007f, 0.30437487f, 0.49082512f, 0.3852706f, 0.5778314f, 0.8253078f, 0.33417904f, 0.9004303f,
+      0.8947809f, 0.11625093f, 0.11388689f, 0.09546256f, 0.22598988f, 0.30536187f, 0.46236527f, 0.3784039f,
+      0.24737573f, 0.3411532f, 0.31912774f, 0.9905191f, 0.31468558f, 0.14199954f, 0.7078488f, 0.47111923f,
+      0.882782f, 0.8124163f, 0.9593644f, 0.13382024f, 0.8214317f, 0.9196194f, 0.25308424f, 0.95958996f};
+  const std::vector<float> fc1_experts_bias = {
+      0.8748215f, 0.5054756f, 0.74107623f, 0.32518923f, 0.0639081f, 0.62639004f, 0.64906263f, 0.17322052f,
+      0.7424998f, 0.07288867f, 0.93031204f, 0.9841952f, 0.6361292f, 0.18628561f, 0.7433356f, 0.5852079f,
+      0.6359594f, 0.66432667f, 0.88067776f, 0.28508204f, 0.38752747f, 0.63635296f, 0.55448055f, 0.9031888f,
+      0.23738074f, 0.48179168f, 0.5934266f, 0.3672055f, 0.84085834f, 0.5546908f, 0.03788501f, 0.44583207f,
+      0.27322155f, 0.5485856f, 0.44189203f, 0.00403291f, 0.40888733f, 0.45211035f, 0.35256076f, 0.9593902f,
+      0.39090043f, 0.8212086f, 0.62385887f, 0.07793343f, 0.61749303f, 0.9143678f, 0.17294967f, 0.17681253f,
+      0.9894245f, 0.901755f, 0.221053f, 0.8008725f, 0.43603396f, 0.007035315f, 0.5375667f, 0.661547f,
+      0.35001957f, 0.67394173f, 0.072449565f, 0.84650797f, 0.92626715f, 0.77573335f, 0.58474565f, 0.66467446f};
+  const std::vector<float> fc2_experts_bias = {
+      0.13822609f, 0.3750633f, 0.45226622f, 0.22175694f, 0.13068998f, 0.8363088f, 0.8393226f, 0.045905888f,
+      0.65910596f, 0.7034011f, 0.97498417f, 0.78927684f, 0.95966834f, 0.33630514f, 0.8501932f, 0.9067007f,
+      0.027835965f, 0.09864664f, 0.6012027f, 0.7730189f, 0.25159347f, 0.55506724f, 0.49927413f, 0.62655383f,
+      0.23132521f, 0.7820195f, 0.8325047f, 0.15307087f, 0.5048437f, 0.5013873f, 0.66055787f, 0.96579224f};
+  const std::vector<float> output = {
+      1.3775184f, 2.0985768f, 2.091839f, 2.9706357f, 1.9404914f, 1.9915576f, 2.3302228f, 2.3702593f,
+      0.51896286f, 0.7936432f, 0.9944805f, 1.3225251f, 0.73894113f, 0.87975955f, 1.0468717f, 1.1585085f,
+      0.012911659f, 0.045757107f, 0.27884653f, 0.3585817f, 0.116771236f, 0.25755364f, 0.23161705f, 0.2906256f,
+      4.8571277f, 5.649453f, 5.485141f, 5.306299f, 4.767025f, 6.9010167f, 5.3520975f, 6.711155f};
+
+  RunMoETest(input,
+             router_probs,
+             fc1_experts_weights,
+             fc2_experts_weights,
+             fc1_experts_bias,
+             fc2_experts_bias,
+             output,
+             num_rows,
+             num_experts,
+             hidden_size,
+             inter_size,
+             "relu");
+}
+
+}  // namespace test
+}  // namespace onnxruntime
diff --git a/onnxruntime/test/contrib_ops/qordered_attention_test.cc b/onnxruntime/test/contrib_ops/qordered_attention_test.cc
index 24e4bff528285..1dd0162ad722f 100644
--- a/onnxruntime/test/contrib_ops/qordered_attention_test.cc
+++ b/onnxruntime/test/contrib_ops/qordered_attention_test.cc
@@ -240,12 +240,6 @@ static std::vector<int8_t> transpose(const T& src, size_t h, size_t w) {
 }
 
 TEST(QOrderedTest, Attention_WithData_ROW_ORDER) {
-  int cuda_runtime_version = 0;
-  // Need 11.4 or higher cuda runtime
-  if ((cudaRuntimeGetVersion(&cuda_runtime_version) != cudaSuccess) || (cuda_runtime_version < 11040)) {
-    return;
-  }
-
   // Needs Turing architecture
   if (NeedSkipIfCudaArchLowerThan(750) || NeedSkipIfCudaArchGreaterEqualThan(800)) {
     return;
diff --git a/onnxruntime/test/contrib_ops/qordered_longformer_attention_op_test.cc b/onnxruntime/test/contrib_ops/qordered_longformer_attention_op_test.cc
index 55209d9422fdd..06fe42ca989d7 100644
--- a/onnxruntime/test/contrib_ops/qordered_longformer_attention_op_test.cc
+++ b/onnxruntime/test/contrib_ops/qordered_longformer_attention_op_test.cc
@@ -34,12 +34,6 @@ static void run_qordered_longformer_attention_op_test(
     const int64_t head_size,
     const int64_t window,
     int64_t input_hidden_size = 0) {
-  int cuda_runtime_version = 0;
-  // Need 11.4 or higher cuda runtime
-  if ((cudaRuntimeGetVersion(&cuda_runtime_version) != cudaSuccess) || (cuda_runtime_version < 11040)) {
-    return;
-  }
-
   // Needs Turing architecture
   if (NeedSkipIfCudaArchLowerThan(750) || NeedSkipIfCudaArchGreaterEqualThan(800)) {
     return;
diff --git a/onnxruntime/test/contrib_ops/qordered_matmul_op_test.cc b/onnxruntime/test/contrib_ops/qordered_matmul_op_test.cc
index e5b3d59ef86e3..e3905db6355d9 100644
--- a/onnxruntime/test/contrib_ops/qordered_matmul_op_test.cc
+++ b/onnxruntime/test/contrib_ops/qordered_matmul_op_test.cc
@@ -21,12 +21,6 @@ static void RunQOrdered_MatMul_Test(
     OrderCublasLt weight_order,
     float scale_A, float scale_B, float scale_C, float scale_Y,
     bool add_bias = false, bool broadcast_c_batch = false, bool per_channel = false) {
-  int cuda_runtime_version = 0;
-  // Need 11.4 or higher cuda runtime
-  if ((cudaRuntimeGetVersion(&cuda_runtime_version) != cudaSuccess) || (cuda_runtime_version < 11040)) {
-    return;
-  }
-
   // Needs Turing architecture
   if (NeedSkipIfCudaArchLowerThan(750) || NeedSkipIfCudaArchGreaterEqualThan(800)) {
     return;
diff --git a/onnxruntime/test/contrib_ops/qordered_qdq_op_test.cc b/onnxruntime/test/contrib_ops/qordered_qdq_op_test.cc
index 15e97751acf2d..0f3f702695b80 100644
--- a/onnxruntime/test/contrib_ops/qordered_qdq_op_test.cc
+++ b/onnxruntime/test/contrib_ops/qordered_qdq_op_test.cc
@@ -73,12 +73,6 @@ static void RunQOrdered_Quantize_Test(
     std::vector<int64_t> const& shape,
     OrderCublasLt order_q,
     float scale) {
-  int cuda_runtime_version = 0;
-  // Need 11.4 or higher cuda runtime
-  if ((cudaRuntimeGetVersion(&cuda_runtime_version) != cudaSuccess) || (cuda_runtime_version < 11040)) {
-    return;
-  }
-
   auto qvec = QuantizeTransform(shape, scale, fvec, order_q);
 
   std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
@@ -153,12 +147,6 @@ static void RunQOrdered_Dequantize_Test(
     std::vector<int64_t> const& shape,
     OrderCublasLt order_q,
     float scale) {
-  int cuda_runtime_version = 0;
-  // Need 11.4 or higher cuda runtime
-  if ((cudaRuntimeGetVersion(&cuda_runtime_version) != cudaSuccess) || (cuda_runtime_version < 11040)) {
-    return;
-  }
-
   auto fvec = DequantizeTransform<T>(shape, scale, qvec, order_q);
 
   std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
diff --git a/onnxruntime/test/contrib_ops/rotary_embedding_op_test.cc b/onnxruntime/test/contrib_ops/rotary_embedding_op_test.cc
new file mode 100644
index 0000000000000..55f01bf0d3f1d
--- /dev/null
+++ b/onnxruntime/test/contrib_ops/rotary_embedding_op_test.cc
@@ -0,0 +1,641 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include <cassert>
+#include "gtest/gtest.h"
+#include "core/session/onnxruntime_cxx_api.h"
+#include "test/common/tensor_op_test_utils.h"
+#include "test/common/cuda_op_test_utils.h"
+#include "test/providers/provider_test_utils.h"
+
+namespace onnxruntime {
+namespace test {
+
+static void RunTest(
+    const std::vector<float>& input_data,
+    const std::vector<int64_t>& position_ids,
+    const std::vector<float>& cos_cache,
+    const std::vector<float>& sin_cache,
+    const std::vector<float>& output_data,
+    int batch_size,
+    int sequence_length,
+    int head_size,
+    int num_heads,
+    int max_sequence_length,
+    int64_t interleaved,
+    bool use_float16,
+    bool disable_cpu,
+    bool disable_cuda,
+    bool disable_dml) {
+  //    input        : (batch_size, sequence_length, hidden_size)
+  //    position ids : (1) or (batch_size, sequence_length)
+  //    cos cache    : (max_sequence_length, head_size / 2)
+  //    sin cache    : (max_sequence_length, head_size / 2)
+  //    interleaved  : 0 = false, 1 = true
+
+  int hidden_size = num_heads * head_size;
+  std::vector<int64_t> input_dims = {batch_size, sequence_length, hidden_size};
+  std::vector<int64_t> pos_dims;
+  std::vector<int64_t> cache_dims = {max_sequence_length, head_size / 2};
+
+  assert(hidden_size != 0 && head_size != 0 && num_heads != 0 && max_sequence_length != 0);
+  assert(max_sequence_length >= sequence_length);
+  if (position_ids.size() == 1) {
+    pos_dims = {1};
+  } else {
+    pos_dims = {batch_size, sequence_length};
+  }
+
+  std::string op_type = "RotaryEmbedding";
+  std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+
+  int min_cuda_architecture = use_float16 ? 530 : 0;
+  bool enable_cuda = HasCudaEnvironment(min_cuda_architecture);
+  bool enable_dml = (nullptr != DefaultDmlExecutionProvider().get()) && !disable_dml;
+
+  if (enable_cuda && !disable_cuda) {
+    execution_providers.push_back(DefaultCudaExecutionProvider());
+  }
+  if (enable_dml && !disable_dml) {
+    execution_providers.push_back(DefaultDmlExecutionProvider());
+  }
+  if (!use_float16 && !disable_cpu) {
+    execution_providers.push_back(DefaultCpuExecutionProvider());
+  }
+  if (execution_providers.size() == 0) {
+    // Return early if CI pipeline does not support EP (e.g. CUDA EP for CPU CI pipeline)
+    return;
+  }
+
+  OpTester test(op_type.c_str(), 1, onnxruntime::kMSDomain);
+  test.AddAttribute<int64_t>("interleaved", interleaved);
+
+  if (!use_float16) {
+    test.AddInput<float>("input", input_dims, input_data);
+    test.AddInput<int64_t>("position_ids", pos_dims, position_ids);
+    test.AddInput<float>("cos_cache", cache_dims, cos_cache);
+    test.AddInput<float>("sin_cache", cache_dims, sin_cache);
+    test.AddOutput<float>("output", input_dims, output_data);
+  } else {
+    test.AddInput<MLFloat16>("input", input_dims, ToFloat16(input_data));
+    test.AddInput<int64_t>("position_ids", pos_dims, position_ids);
+    test.AddInput<MLFloat16>("cos_cache", cache_dims, ToFloat16(cos_cache));
+    test.AddInput<MLFloat16>("sin_cache", cache_dims, ToFloat16(sin_cache));
+    test.AddOutput<MLFloat16>("output", input_dims, ToFloat16(output_data));
+  }
+  test.SetOutputAbsErr("output", 0.002f);
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
+}
+
+static void RunTests(const std::vector<float>& input_data,
+                     const std::vector<int64_t>& position_ids,
+                     const std::vector<float>& cos_cache,
+                     const std::vector<float>& sin_cache,
+                     const std::vector<float>& output_data,
+                     int batch_size,
+                     int sequence_length,
+                     int head_size = 0,
+                     int num_heads = 0,
+                     int max_sequence_length = 0,
+                     int64_t interleaved = 0,
+                     bool use_float16 = true) {
+  // FP32 test for CPU
+  RunTest(input_data,
+          position_ids,
+          cos_cache,
+          sin_cache,
+          output_data,
+          batch_size,
+          sequence_length,
+          head_size,
+          num_heads,
+          max_sequence_length,
+          interleaved,
+          false, /* use_fp16 */
+          false, /* disable_cpu */
+          true,  /* disable_cuda */
+          true /* disable_dml */);
+
+  // FP32 test for CUDA and DML
+  RunTest(input_data,
+          position_ids,
+          cos_cache,
+          sin_cache,
+          output_data,
+          batch_size,
+          sequence_length,
+          head_size,
+          num_heads,
+          max_sequence_length,
+          interleaved,
+          false, /* use_fp16 */
+          false, /* disable_cpu */
+          false, /* disable_cuda */
+          false /* disable_dml */);
+
+  // FP16 test for CUDA and DML
+  if (use_float16) {
+    RunTest(input_data,
+            position_ids,
+            cos_cache,
+            sin_cache,
+            output_data,
+            batch_size,
+            sequence_length,
+            head_size,
+            num_heads,
+            max_sequence_length,
+            interleaved,
+            true,  /* use_fp16 */
+            true,  /* disable_cpu */
+            false, /* disable_cuda*/
+            false /* disable_dml */);
+  }
+}
+
+// Interleaved = true, pos ids shape = (1)
+TEST(RotaryEmbeddingTest, RotaryEmbedding_Interleaved_SmallData_LlamaMSFT) {
+  int batch_size = 1;
+  int sequence_length = 3;
+  int num_heads = 2;
+  int head_size = 4;
+  int max_sequence_length = 8;
+  int64_t interleaved = 1;  // true
+
+  std::vector<float> input_data = {
+      -1.0408f, 0.9166f, -1.3042f, -1.1097f, -0.1320f, -0.2751f, -0.2350f, 0.0937f,
+      -1.2188f, 1.1676f, -1.0574f, -0.1188f, -0.7396f, -1.2425f, -0.1752f, 0.6990f,
+      -0.8110f, 0.6737f, -1.1233f, -0.0919f, -0.6861f, 0.7202f, 0.1963f, 0.6142f};
+
+  std::vector<int64_t> position_ids = {0};
+
+  std::vector<float> cos_cache = {
+      1.0000f, 1.0000f, 0.5403f, 0.9999f, -0.4161f, 0.9998f, -0.9900f, 0.9996f,
+      -0.6536f, 0.9992f, 0.2837f, 0.9988f, 0.9602f, 0.9982f, 0.7539f, 0.9976f};
+
+  std::vector<float> sin_cache = {
+      0.0000f, 0.0000f, 0.8415f, 0.0100f, 0.9093f, 0.0200f, 0.1411f, 0.0300f,
+      -0.7568f, 0.0400f, -0.9589f, 0.0500f, -0.2794f, 0.0600f, 0.6570f, 0.0699f};
+
+  std::vector<float> output_data = {
+      -1.0408f, 0.9166f, -1.3042f, -1.1097f, -0.1320f, -0.2751f, -0.2350f, 0.0937f,
+      -1.6411f, -0.3948f, -1.0561f, -0.1294f, 0.6460f, -1.2937f, -0.1822f, 0.6972f,
+      -0.2751f, -1.0178f, -1.1212f, -0.1143f, -0.3694f, -0.9235f, 0.1840f, 0.6180f};
+
+  RunTests(input_data,
+           position_ids,
+           cos_cache,
+           sin_cache,
+           output_data,
+           batch_size,
+           sequence_length,
+           head_size,
+           num_heads,
+           max_sequence_length,
+           interleaved);
+}
+
+// Interleaved = true, pos ids shape = (1)
+TEST(RotaryEmbeddingTest, RotaryEmbedding_Interleaved_LargeData_LlamaMSFT) {
+  int batch_size = 2;
+  int sequence_length = 8;
+  int num_heads = 4;
+  int head_size = 6;
+  int max_sequence_length = 16;
+  int64_t interleaved = 1;  // true
+
+  std::vector<float> input_data = {
+      -1.0408f, 0.9166f, -1.3042f, -1.1097f, -1.2188f,
+      1.1676f, -1.0190f, 0.3157f, -1.6036f, 1.8493f,
+      0.0447f, 1.5853f, 0.1036f, -0.3514f, 0.2421f,
+      0.6463f, 0.8730f, -0.9276f, 1.0311f, -1.9557f,
+      -0.1482f, 1.7376f, 2.2039f, -0.6589f, -1.0574f,
+      -0.1188f, -0.9078f, 0.3452f, -0.5713f, -0.2351f,
+      -0.5912f, 1.1312f, 0.7562f, -1.2023f, -0.5833f,
+      -0.4407f, 0.1766f, 1.0224f, -0.4826f, -0.5421f,
+      -0.5342f, -0.6413f, 1.3314f, -0.4498f, 0.5493f,
+      0.0539f, 0.2601f, 0.8570f, 1.0076f, -0.7529f,
+      -0.2250f, -0.4327f, -1.5071f, -0.4586f, -1.9791f,
+      0.7787f, -0.7749f, -0.1398f, 1.1414f, -0.6354f,
+      0.0352f, -0.4765f, -0.0409f, 1.1993f, 0.5374f,
+      -0.1930f, 2.5211f, -0.0452f, -0.3105f, -0.9407f,
+      -0.0034f, 1.5199f, -0.8480f, 0.5266f, 0.0299f,
+      -0.0498f, 1.0651f, 0.8860f, -1.4702f, -0.2134f,
+      -0.8707f, 1.6159f, -0.2356f, 0.9444f, 0.5937f,
+      0.7203f, 0.5061f, 1.5192f, -0.4897f, 0.9231f,
+      0.2654f, -0.1441f, 0.5407f, -1.5476f, 0.6455f,
+      -1.1382f, 0.4640f, -0.4986f, 0.1289f, 2.7631f,
+      0.1405f, 1.1191f, 2.1134f, -0.9754f, 0.1757f,
+      -0.1319f, -0.2735f, 0.3355f, -0.6008f, -1.1164f,
+      0.2577f, -0.7226f, -0.9244f, 1.8737f, 0.6052f,
+      1.1904f, 1.2195f, -0.0470f, -1.0914f, 1.0223f,
+      0.3152f, 1.7528f, -0.7650f, 1.8299f, -0.2784f,
+      -0.2719f, 0.1885f, 2.1432f, 0.8527f, 0.0965f,
+      -0.0625f, 0.8269f, 1.0122f, -1.4482f, -0.0644f,
+      0.3215f, 0.5908f, -1.4197f, 0.2113f, 0.0306f,
+      0.3604f, 0.3166f, -0.8975f, -0.6393f, -1.2944f,
+      -0.0243f, -0.2354f, -0.7087f, 1.1566f, 0.4296f,
+      0.5599f, -0.7776f, 0.3339f, 0.1759f, 2.1108f,
+      1.0702f, 0.8279f, -0.2969f, 0.7120f, -0.2068f,
+      -0.1548f, 0.1553f, 0.6207f, -0.1690f, -0.5816f,
+      1.2632f, 0.0695f, 1.1862f, -1.1874f, -0.7468f,
+      -0.9320f, -0.8579f, -0.9647f, -0.0991f, 0.0195f,
+      1.1213f, -1.4873f, -0.2043f, -1.0466f, -1.5772f,
+      -0.0489f, 0.3430f, 0.1264f, 0.1519f, -1.3639f,
+      -1.6593f, 1.8127f, -1.4459f, -0.2158f, -0.9792f,
+      -1.4392f, 0.6508f, 0.8964f, 0.5717f, -0.2390f,
+      0.6983f, -1.3416f, 0.2715f, -0.2852f, 0.6051f,
+      0.2167f, -0.2181f, -1.6306f, 1.4788f, 0.2754f,
+      -0.0261f, -0.4618f, -0.5646f, -1.0389f, 0.5819f,
+      1.3697f, 0.0002f, 1.5333f, -1.0556f, -0.1254f,
+      0.1527f, -0.5996f, -1.0962f, 1.6327f, 1.3951f,
+      0.8784f, 0.3389f, 1.2907f, 0.3124f, 0.7299f,
+      1.4220f, 0.3375f, 0.0438f, 1.8698f, -0.2635f,
+      -2.0799f, -0.6313f, 0.4090f, -1.1458f, 0.0784f,
+      -1.8848f, -1.6165f, 0.6179f, 0.9905f, -0.0729f,
+      0.5054f, -0.6681f, -1.4382f, 1.7547f, -0.9605f,
+      -0.4558f, -1.6105f, 0.2979f, 1.1537f, -1.5604f,
+      1.2779f, -1.2514f, 0.6056f, 0.5763f, -3.3558f,
+      0.2836f, 0.6909f, -0.7631f, 2.4451f, -0.3500f,
+      1.3289f, -0.6494f, 0.3478f, 1.0038f, -0.2937f,
+      0.9238f, -1.2185f, 0.4138f, 0.5033f, 0.9174f,
+      1.8131f, 1.4436f, -0.4207f, 0.0220f, -0.6807f,
+      -1.3306f, 1.5646f, 0.3338f, 0.7105f, 0.4683f,
+      -0.6179f, 0.0818f, -0.0488f, -0.9810f, -1.3632f,
+      0.0929f, -1.7926f, -0.2921f, -0.4792f, 0.6756f,
+      -0.3413f, -0.2242f, -0.2111f, 0.6282f, 0.1667f,
+      -1.4055f, 1.5895f, 1.0838f, -0.9077f, -0.8060f,
+      0.7967f, -2.9351f, 2.4179f, -0.4026f, 0.6451f,
+      1.6845f, -0.0901f, 0.6106f, 2.3603f, 1.3908f,
+      -0.7917f, -0.6734f, -0.1213f, -1.1116f, -0.7401f,
+      -0.7879f, 0.0606f, -2.3337f, -1.2603f, -1.7245f,
+      -0.3533f, -0.9421f, -0.1776f, 0.3992f, -1.7142f,
+      -0.5319f, -0.8848f, 0.6513f, 1.0002f, -1.4699f,
+      -1.4254f, 0.7013f, 0.2414f, 0.2551f, -0.7457f,
+      0.3133f, -1.0941f, -0.3682f, -0.0163f, -0.0645f,
+      -0.8101f, 0.1415f, 0.0551f, 0.5873f, -0.5887f,
+      -1.4733f, -0.8565f, 0.7400f, -0.5033f, 0.0553f,
+      0.9265f, -0.8652f, -0.0288f, -0.2209f, 0.0610f,
+      0.6776f, 0.4361f, -0.8052f, 0.3955f, 0.8988f,
+      0.8238f, 0.2262f, 1.2912f, 0.6488f, 1.2114f,
+      1.3569f, 0.2983f, 0.4718f, -1.1936f, 0.7928f,
+      -0.8665f, 0.9468f, 1.1629f, 0.0616f, -1.3136f,
+      -0.2764f, 0.0277f, -0.1126f, 0.2342f, -0.5866f,
+      -1.8219f, 1.1079f, 0.5795f, -1.4249f};
+
+  std::vector<int64_t> position_ids = {0};
+
+  std::vector<float> cos_cache = {
+      1.0000f, 1.0000f, 1.0000f, 0.5403f, 0.9989f, 1.0000f, -0.4161f, 0.9957f,
+      1.0000f, -0.9900f, 0.9903f, 1.0000f, -0.6536f, 0.9828f, 1.0000f, 0.2837f,
+      0.9732f, 0.9999f, 0.9602f, 0.9615f, 0.9999f, 0.7539f, 0.9477f, 0.9999f,
+      -0.1455f, 0.9318f, 0.9999f, -0.9111f, 0.9140f, 0.9998f, -0.8391f, 0.8942f,
+      0.9998f, 0.0044f, 0.8725f, 0.9997f, 0.8439f, 0.8488f, 0.9997f, 0.9074f,
+      0.8234f, 0.9996f, 0.1367f, 0.7962f, 0.9995f, -0.7597f, 0.7673f, 0.9995f};
+
+  std::vector<float> sin_cache = {
+      0.0000f, 0.0000f, 0.0000f, 0.8415f, 0.0464f, 0.0022f, 0.9093f, 0.0927f,
+      0.0043f, 0.1411f, 0.1388f, 0.0065f, -0.7568f, 0.1846f, 0.0086f, -0.9589f,
+      0.2300f, 0.0108f, -0.2794f, 0.2749f, 0.0129f, 0.6570f, 0.3192f, 0.0151f,
+      0.9894f, 0.3629f, 0.0172f, 0.4121f, 0.4057f, 0.0194f, -0.5440f, 0.4477f,
+      0.0215f, -1.0000f, 0.4887f, 0.0237f, -0.5366f, 0.5286f, 0.0259f, 0.4202f,
+      0.5675f, 0.0280f, 0.9906f, 0.6050f, 0.0302f, 0.6503f, 0.6413f, 0.0323f};
+
+  std::vector<float> output_data = {
+      -1.0408f, 0.9166f, -1.3042f, -1.1097f, -1.2188f,
+      1.1676f, -1.0190f, 0.3157f, -1.6036f, 1.8493f,
+      0.0447f, 1.5853f, 0.1036f, -0.3514f, 0.2421f,
+      0.6463f, 0.8730f, -0.9276f, 1.0311f, -1.9557f,
+      -0.1482f, 1.7376f, 2.2039f, -0.6589f, -0.4713f,
+      -0.9540f, -0.9229f, 0.3027f, -0.5708f, -0.2363f,
+      -1.2713f, 0.1137f, 0.8112f, -1.1659f, -0.5824f,
+      -0.4419f, -0.7649f, 0.7011f, -0.4569f, -0.5639f,
+      -0.5328f, -0.6424f, 1.0979f, 0.8773f, 0.5462f,
+      0.0793f, 0.2582f, 0.8576f, 0.2653f, 1.2295f,
+      -0.1839f, -0.4517f, -1.5052f, -0.4651f, 0.1155f,
+      -2.1237f, -0.7586f, -0.2110f, 1.1441f, -0.6304f,
+      0.4186f, 0.2303f, -0.1519f, 1.1903f, 0.5382f,
+      -0.1906f, -1.0080f, 2.3112f, -0.2220f, -0.9655f,
+      -0.0099f, 1.5198f, 0.7652f, -0.6410f, 0.0365f,
+      -0.0452f, 1.0593f, 0.8929f, 1.4856f, 0.0038f,
+      -1.0865f, 1.4794f, -0.2417f, 0.9428f, -0.6894f,
+      -0.6293f, 0.2904f, 1.5747f, -0.4956f, 0.9199f,
+      -0.2424f, 0.1801f, 0.7503f, -1.4576f, 0.6529f,
+      -1.1340f, -0.6807f, -0.0252f, -0.3834f, 2.7394f,
+      0.1308f, 1.1203f, -2.1196f, -0.9618f, 0.1970f,
+      -0.0972f, -0.2764f, 0.3332f, -0.4522f, 1.1844f,
+      0.3867f, -0.6626f, -0.9405f, 1.8656f, 0.5053f,
+      -1.2361f, 1.2072f, 0.1789f, -1.1002f, 1.0129f,
+      1.7702f, 0.1949f, -1.1653f, 1.6049f, -0.2755f,
+      -0.2749f, 2.1087f, 0.4272f, 0.8076f, 0.2900f,
+      -0.0714f, 0.8261f, -1.1016f, -1.3814f, -0.1366f,
+      0.2981f, 0.6060f, -1.4132f, 0.0893f, -0.1939f,
+      0.2779f, 0.3910f, -0.8906f, -0.6489f, -1.2496f,
+      0.3383f, -0.0315f, -0.7461f, 1.1510f, 0.4445f,
+      0.3203f, -0.9031f, 0.2727f, 0.2609f, 2.0968f,
+      1.0974f, 0.7120f, -0.5164f, 0.7415f, -0.0031f,
+      -0.1568f, 0.1533f, 0.5487f, -0.3357f, -0.9064f,
+      1.0546f, 0.0542f, 1.1870f, -0.4045f, -1.3431f,
+      -0.6094f, -1.1105f, -0.9631f, -0.1137f, -0.7219f,
+      0.8582f, -1.3443f, -0.6684f, -1.0227f, -1.5929f,
+      -0.2622f, 0.2264f, 0.0713f, 0.1843f, -1.3387f,
+      -1.6797f, 2.3165f, 0.1009f, 0.1081f, -0.9969f,
+      -1.4488f, 0.6291f, 0.8964f, 0.5717f, -0.2390f,
+      0.6983f, -1.3416f, 0.2715f, -0.2852f, 0.6051f,
+      0.2167f, -0.2181f, -1.6306f, 1.4788f, 0.2754f,
+      -0.0261f, -0.4618f, -0.5646f, -1.0389f, 0.5819f,
+      1.3697f, 0.0002f, 1.5333f, -1.0556f, -0.1254f,
+      0.1527f, 0.5985f, -1.0968f, 1.5662f, 1.4693f,
+      0.8776f, 0.3408f, 0.4345f, 1.2549f, 0.6631f,
+      1.4543f, 0.3374f, 0.0445f, 1.2320f, 1.4311f,
+      -2.0483f, -0.7272f, 0.4114f, -1.1449f, 1.6283f,
+      -0.9524f, -1.6435f, 0.5422f, 0.9907f, -0.0708f,
+      0.3972f, 0.7376f, -1.5947f, 1.6138f, -0.9586f,
+      -0.4600f, 0.3993f, -1.5884f, 1.2934f, -1.4467f,
+      1.2833f, -1.2459f, -0.7760f, 0.3108f, -3.3677f,
+      -0.0287f, 0.6942f, -0.7601f, -0.6993f, 2.3690f,
+      1.3834f, -0.5234f, 0.3435f, 1.0053f, 0.1604f,
+      -0.9560f, -1.2641f, 0.2406f, 0.4973f, 0.9206f,
+      -1.9987f, -1.1733f, -0.4197f, -0.0366f, -0.6720f,
+      -1.3350f, -1.5960f, -0.1097f, 0.6386f, 0.5624f,
+      -0.6184f, 0.0778f, 0.1867f, 0.9643f, -1.3629f,
+      -0.0972f, -1.7907f, -0.3037f, 0.8245f, -0.0789f,
+      -0.2940f, -0.2833f, -0.2165f, 0.6264f, -1.1726f,
+      0.7926f, 1.3621f, 1.3586f, -0.9007f, -0.8138f,
+      -2.7421f, 1.3155f, 2.4507f, 0.0507f, 0.6305f,
+      1.6900f, 0.5210f, -0.3309f, 2.0630f, 1.8026f,
+      -0.7859f, -0.6802f, -1.1003f, -0.1990f, -0.5391f,
+      -0.9370f, 0.0857f, -2.3330f, -2.0112f, 0.7193f,
+      -0.1272f, -0.9981f, -0.1818f, 0.3973f, -0.9963f,
+      1.4929f, -1.0109f, 0.4304f, 1.0160f, -1.4590f,
+      0.2682f, 1.5658f, 0.1762f, 0.3038f, -0.7491f,
+      0.3052f, -1.1534f, -0.0478f, 0.0021f, -0.0665f,
+      -0.8118f, 0.1310f, 0.2171f, 0.5485f, -0.1610f,
+      -1.5784f, -0.8660f, 0.7289f, -0.4678f, 0.1937f,
+      1.1287f, -0.5772f, -0.0259f, -0.2212f, 0.2479f,
+      0.6336f, 0.6407f, -0.6543f, 0.3838f, 0.9039f,
+      0.4724f, 0.7117f, 1.0165f, 1.0270f, 1.1908f,
+      1.3750f, -0.0850f, 0.5517f, -1.3842f, 0.3703f,
+      -0.8806f, 0.9336f, 0.8362f, 0.8105f, -1.1566f,
+      -0.6813f, 0.0294f, -0.1122f, 0.5620f, -0.2884f,
+      -2.0803f, 0.4684f, 0.6009f, -1.4160f};
+
+  RunTests(input_data,
+           position_ids,
+           cos_cache,
+           sin_cache,
+           output_data,
+           batch_size,
+           sequence_length,
+           head_size,
+           num_heads,
+           max_sequence_length,
+           interleaved);
+}
+
+// Interleaved = false, pos ids shape = (1)
+TEST(RotaryEmbeddingTest, RotaryEmbedding_NotInterleaved_LargeData_LlamaMSFT) {
+  int batch_size = 2;
+  int sequence_length = 8;
+  int num_heads = 4;
+  int head_size = 6;
+  int max_sequence_length = 16;
+  int64_t interleaved = 0;  // false
+
+  std::vector<float> input_data = {
+      -1.0408f, 0.9166f, -1.3042f, -1.1097f, -1.2188f,
+      1.1676f, -1.0190f, 0.3157f, -1.6036f, 1.8493f,
+      0.0447f, 1.5853f, 0.1036f, -0.3514f, 0.2421f,
+      0.6463f, 0.8730f, -0.9276f, 1.0311f, -1.9557f,
+      -0.1482f, 1.7376f, 2.2039f, -0.6589f, -1.0574f,
+      -0.1188f, -0.9078f, 0.3452f, -0.5713f, -0.2351f,
+      -0.5912f, 1.1312f, 0.7562f, -1.2023f, -0.5833f,
+      -0.4407f, 0.1766f, 1.0224f, -0.4826f, -0.5421f,
+      -0.5342f, -0.6413f, 1.3314f, -0.4498f, 0.5493f,
+      0.0539f, 0.2601f, 0.8570f, 1.0076f, -0.7529f,
+      -0.2250f, -0.4327f, -1.5071f, -0.4586f, -1.9791f,
+      0.7787f, -0.7749f, -0.1398f, 1.1414f, -0.6354f,
+      0.0352f, -0.4765f, -0.0409f, 1.1993f, 0.5374f,
+      -0.1930f, 2.5211f, -0.0452f, -0.3105f, -0.9407f,
+      -0.0034f, 1.5199f, -0.8480f, 0.5266f, 0.0299f,
+      -0.0498f, 1.0651f, 0.8860f, -1.4702f, -0.2134f,
+      -0.8707f, 1.6159f, -0.2356f, 0.9444f, 0.5937f,
+      0.7203f, 0.5061f, 1.5192f, -0.4897f, 0.9231f,
+      0.2654f, -0.1441f, 0.5407f, -1.5476f, 0.6455f,
+      -1.1382f, 0.4640f, -0.4986f, 0.1289f, 2.7631f,
+      0.1405f, 1.1191f, 2.1134f, -0.9754f, 0.1757f,
+      -0.1319f, -0.2735f, 0.3355f, -0.6008f, -1.1164f,
+      0.2577f, -0.7226f, -0.9244f, 1.8737f, 0.6052f,
+      1.1904f, 1.2195f, -0.0470f, -1.0914f, 1.0223f,
+      0.3152f, 1.7528f, -0.7650f, 1.8299f, -0.2784f,
+      -0.2719f, 0.1885f, 2.1432f, 0.8527f, 0.0965f,
+      -0.0625f, 0.8269f, 1.0122f, -1.4482f, -0.0644f,
+      0.3215f, 0.5908f, -1.4197f, 0.2113f, 0.0306f,
+      0.3604f, 0.3166f, -0.8975f, -0.6393f, -1.2944f,
+      -0.0243f, -0.2354f, -0.7087f, 1.1566f, 0.4296f,
+      0.5599f, -0.7776f, 0.3339f, 0.1759f, 2.1108f,
+      1.0702f, 0.8279f, -0.2969f, 0.7120f, -0.2068f,
+      -0.1548f, 0.1553f, 0.6207f, -0.1690f, -0.5816f,
+      1.2632f, 0.0695f, 1.1862f, -1.1874f, -0.7468f,
+      -0.9320f, -0.8579f, -0.9647f, -0.0991f, 0.0195f,
+      1.1213f, -1.4873f, -0.2043f, -1.0466f, -1.5772f,
+      -0.0489f, 0.3430f, 0.1264f, 0.1519f, -1.3639f,
+      -1.6593f, 1.8127f, -1.4459f, -0.2158f, -0.9792f,
+      -1.4392f, 0.6508f, 0.8964f, 0.5717f, -0.2390f,
+      0.6983f, -1.3416f, 0.2715f, -0.2852f, 0.6051f,
+      0.2167f, -0.2181f, -1.6306f, 1.4788f, 0.2754f,
+      -0.0261f, -0.4618f, -0.5646f, -1.0389f, 0.5819f,
+      1.3697f, 0.0002f, 1.5333f, -1.0556f, -0.1254f,
+      0.1527f, -0.5996f, -1.0962f, 1.6327f, 1.3951f,
+      0.8784f, 0.3389f, 1.2907f, 0.3124f, 0.7299f,
+      1.4220f, 0.3375f, 0.0438f, 1.8698f, -0.2635f,
+      -2.0799f, -0.6313f, 0.4090f, -1.1458f, 0.0784f,
+      -1.8848f, -1.6165f, 0.6179f, 0.9905f, -0.0729f,
+      0.5054f, -0.6681f, -1.4382f, 1.7547f, -0.9605f,
+      -0.4558f, -1.6105f, 0.2979f, 1.1537f, -1.5604f,
+      1.2779f, -1.2514f, 0.6056f, 0.5763f, -3.3558f,
+      0.2836f, 0.6909f, -0.7631f, 2.4451f, -0.3500f,
+      1.3289f, -0.6494f, 0.3478f, 1.0038f, -0.2937f,
+      0.9238f, -1.2185f, 0.4138f, 0.5033f, 0.9174f,
+      1.8131f, 1.4436f, -0.4207f, 0.0220f, -0.6807f,
+      -1.3306f, 1.5646f, 0.3338f, 0.7105f, 0.4683f,
+      -0.6179f, 0.0818f, -0.0488f, -0.9810f, -1.3632f,
+      0.0929f, -1.7926f, -0.2921f, -0.4792f, 0.6756f,
+      -0.3413f, -0.2242f, -0.2111f, 0.6282f, 0.1667f,
+      -1.4055f, 1.5895f, 1.0838f, -0.9077f, -0.8060f,
+      0.7967f, -2.9351f, 2.4179f, -0.4026f, 0.6451f,
+      1.6845f, -0.0901f, 0.6106f, 2.3603f, 1.3908f,
+      -0.7917f, -0.6734f, -0.1213f, -1.1116f, -0.7401f,
+      -0.7879f, 0.0606f, -2.3337f, -1.2603f, -1.7245f,
+      -0.3533f, -0.9421f, -0.1776f, 0.3992f, -1.7142f,
+      -0.5319f, -0.8848f, 0.6513f, 1.0002f, -1.4699f,
+      -1.4254f, 0.7013f, 0.2414f, 0.2551f, -0.7457f,
+      0.3133f, -1.0941f, -0.3682f, -0.0163f, -0.0645f,
+      -0.8101f, 0.1415f, 0.0551f, 0.5873f, -0.5887f,
+      -1.4733f, -0.8565f, 0.7400f, -0.5033f, 0.0553f,
+      0.9265f, -0.8652f, -0.0288f, -0.2209f, 0.0610f,
+      0.6776f, 0.4361f, -0.8052f, 0.3955f, 0.8988f,
+      0.8238f, 0.2262f, 1.2912f, 0.6488f, 1.2114f,
+      1.3569f, 0.2983f, 0.4718f, -1.1936f, 0.7928f,
+      -0.8665f, 0.9468f, 1.1629f, 0.0616f, -1.3136f,
+      -0.2764f, 0.0277f, -0.1126f, 0.2342f, -0.5866f,
+      -1.8219f, 1.1079f, 0.5795f, -1.4249f};
+
+  std::vector<int64_t> position_ids = {0};
+
+  std::vector<float> cos_cache = {
+      1.0000f, 1.0000f, 1.0000f, 0.5403f, 0.9989f, 1.0000f, -0.4161f, 0.9957f,
+      1.0000f, -0.9900f, 0.9903f, 1.0000f, -0.6536f, 0.9828f, 1.0000f, 0.2837f,
+      0.9732f, 0.9999f, 0.9602f, 0.9615f, 0.9999f, 0.7539f, 0.9477f, 0.9999f,
+      -0.1455f, 0.9318f, 0.9999f, -0.9111f, 0.9140f, 0.9998f, -0.8391f, 0.8942f,
+      0.9998f, 0.0044f, 0.8725f, 0.9997f, 0.8439f, 0.8488f, 0.9997f, 0.9074f,
+      0.8234f, 0.9996f, 0.1367f, 0.7962f, 0.9995f, -0.7597f, 0.7673f, 0.9995f};
+
+  std::vector<float> sin_cache = {
+      0.0000f, 0.0000f, 0.0000f, 0.8415f, 0.0464f, 0.0022f, 0.9093f, 0.0927f,
+      0.0043f, 0.1411f, 0.1388f, 0.0065f, -0.7568f, 0.1846f, 0.0086f, -0.9589f,
+      0.2300f, 0.0108f, -0.2794f, 0.2749f, 0.0129f, 0.6570f, 0.3192f, 0.0151f,
+      0.9894f, 0.3629f, 0.0172f, 0.4121f, 0.4057f, 0.0194f, -0.5440f, 0.4477f,
+      0.0215f, -1.0000f, 0.4887f, 0.0237f, -0.5366f, 0.5286f, 0.0259f, 0.4202f,
+      0.5675f, 0.0280f, 0.9906f, 0.6050f, 0.0302f, 0.6503f, 0.6413f, 0.0323f};
+
+  std::vector<float> output_data = {
+      -1.0408f, 0.9166f, -1.3042f, -1.1097f, -1.2188f,
+      1.1676f, -1.0190f, 0.3157f, -1.6036f, 1.8493f,
+      0.0447f, 1.5853f, 0.1036f, -0.3514f, 0.2421f,
+      0.6463f, 0.8730f, -0.9276f, 1.0311f, -1.9557f,
+      -0.1482f, 1.7376f, 2.2039f, -0.6589f, -0.8618f,
+      -0.0922f, -0.9073f, -0.7032f, -0.5762f, -0.2371f,
+      0.6923f, 1.1571f, 0.7572f, -1.1471f, -0.5302f,
+      -0.4391f, 0.5516f, 1.0461f, -0.4812f, -0.1443f,
+      -0.4862f, -0.6423f, 0.6740f, -0.4614f, 0.5475f,
+      1.1495f, 0.2389f, 0.8582f, -0.0259f, -0.6099f,
+      -0.2230f, 1.0963f, -1.5704f, -0.4595f, 0.9507f,
+      0.6696f, -0.7721f, -1.7415f, 1.2087f, -0.6387f,
+      -1.1052f, -0.5243f, -0.0400f, -0.4671f, 0.4909f,
+      -0.1931f, -0.1937f, -0.0447f, -0.3171f, 2.6839f,
+      -0.0076f, 1.5185f, 0.8465f, 0.3737f, 0.0242f,
+      -0.0703f, 1.1279f, 0.8862f, 1.2275f, -0.1786f,
+      -0.8767f, -1.8072f, -0.2630f, 0.9387f, -0.8021f,
+      0.7813f, 0.5001f, -1.4202f, -0.3850f, 0.9263f,
+      -0.0443f, -0.2323f, 0.5480f, 1.5696f, 0.6193f,
+      -1.1346f, 1.7878f, -0.5160f, 0.1192f, -2.1572f,
+      0.0460f, 1.1202f, -1.4812f, -0.9082f, 0.1728f,
+      -1.5132f, -0.4489f, 0.3370f, -0.1541f, -0.9266f,
+      0.2416f, 0.9270f, -1.1146f, 1.8758f, -0.4312f,
+      1.3714f, 1.2106f, -0.4272f, -0.8529f, 1.0328f,
+      1.8441f, 1.7698f, -0.7620f, 0.2168f, 0.1322f,
+      -0.2802f, 0.1460f, 2.1002f, 0.8437f, -0.1534f,
+      0.4321f, 0.8360f, 0.5955f, -1.5452f, -0.0491f,
+      -0.8794f, 0.2418f, -1.4203f, 0.3635f, 0.2362f,
+      0.3672f, -0.1128f, -0.8664f, -0.6354f, -1.4409f,
+      -0.3413f, -0.2409f, -0.3188f, 1.1054f, 0.4265f,
+      0.5867f, -1.3279f, 0.3201f, 0.0125f, 1.8157f,
+      1.0745f, 0.7372f, -0.2429f, 0.7100f, -0.4299f,
+      -0.2304f, 0.1645f, 0.9489f, -0.1816f, -0.5968f,
+      1.0394f, 0.0204f, 1.1786f, -0.3315f, -0.3997f,
+      -0.9304f, -1.4268f, -1.1526f, -0.1132f, 0.1490f,
+      1.3967f, -1.4634f, -0.1412f, -0.6339f, -1.5995f,
+      -0.1366f, 0.7604f, 0.1514f, 0.0824f, -1.1830f,
+      -1.6572f, 2.0099f, -0.9108f, -0.2256f, 0.4527f,
+      -1.8254f, 0.6475f, 0.8964f, 0.5717f, -0.2390f,
+      0.6983f, -1.3416f, 0.2715f, -0.2852f, 0.6051f,
+      0.2167f, -0.2181f, -1.6306f, 1.4788f, 0.2754f,
+      -0.0261f, -0.4618f, -0.5646f, -1.0389f, 0.5819f,
+      1.3697f, 0.0002f, 1.5333f, -1.0556f, -0.1254f,
+      0.1527f, -1.4979f, -1.1358f, 1.6320f, 0.2493f,
+      0.8266f, 0.3424f, -0.4992f, 0.2964f, 0.7298f,
+      1.8544f, 0.3516f, 0.0454f, 1.5415f, -0.2822f,
+      -2.0774f, 1.2323f, 0.3963f, -1.1503f, -0.4775f,
+      -1.9287f, -1.6164f, 0.3998f, 0.9020f, -0.0764f,
+      -1.8059f, -0.5762f, -1.4362f, -0.2706f, -1.0183f,
+      -0.4620f, 2.0891f, 0.1782f, 1.1591f, -0.8151f,
+      1.3000f, -1.2464f, -0.5099f, 0.5098f, -3.3525f,
+      0.4326f, 0.7414f, -0.7775f, -0.4271f, -0.3807f,
+      1.3245f, 2.4936f, 0.3139f, 1.0095f, 0.2323f,
+      0.8450f, -1.2244f, -0.4511f, 0.6266f, 0.9095f,
+      -1.7981f, 1.5241f, -0.4121f, 0.2341f, -0.4737f,
+      -1.3333f, -1.6150f, 0.4164f, 0.7100f, -0.2429f,
+      -0.5656f, 0.0863f, 0.0352f, -0.7227f, -1.3613f,
+      -0.0988f, -1.9114f, -0.3009f, 0.1435f, 0.7029f,
+      -0.3467f, 0.5092f, -0.0828f, 0.6253f, 0.7113f,
+      -1.2138f, 1.5964f, -0.8346f, -1.1515f, -0.7923f,
+      -0.8254f, -3.0038f, 2.4033f, -0.3398f, 0.0922f,
+      1.7053f, 1.1114f, 0.7462f, 2.3660f, -0.8409f,
+      -0.6654f, -0.6530f, -0.7899f, -1.0957f, -0.7149f,
+      -0.1072f, -0.1967f, -2.3416f, -1.2609f, -1.6375f,
+      -0.3576f, 0.9413f, -0.5694f, 0.3954f, 0.1383f,
+      -0.7477f, -0.8689f, 1.8286f, 0.8510f, -1.4793f,
+      -0.1597f, 0.8541f, 0.2380f, 1.4392f, -0.5644f,
+      0.3158f, -1.0686f, -0.1313f, -0.0181f, 0.2438f,
+      -0.8801f, 0.1413f, -0.3587f, 0.8002f, -0.5982f,
+      -1.4301f, -0.6620f, 0.7324f, -0.7250f, 0.0610f,
+      0.9293f, -0.6902f, -0.0125f, -0.2089f, -0.1664f,
+      0.5428f, 0.4245f, -0.7901f, 0.5665f, 0.9044f,
+      0.1948f, -0.1723f, 1.2705f, 1.0303f, 1.2202f,
+      1.3762f, -0.2959f, 0.7237f, -1.2077f, 0.7937f,
+      -0.6705f, 0.9287f, 1.0583f, 0.0496f, -1.3118f,
+      0.5556f, 0.0459f, -0.1324f, -0.5513f, -0.7409f,
+      -1.8002f, 0.9892f, 0.3619f, -1.4522f};
+
+  RunTests(input_data,
+           position_ids,
+           cos_cache,
+           sin_cache,
+           output_data,
+           batch_size,
+           sequence_length,
+           head_size,
+           num_heads,
+           max_sequence_length,
+           interleaved);
+}
+
+// Interleaved = false, pos ids shape = (batch_size, sequence_length)
+TEST(RotaryEmbeddingTest, RotaryEmbedding_NotInterleaved_SmallData_LlamaMSFT) {
+  int batch_size = 1;
+  int sequence_length = 2;
+  int num_heads = 3;
+  int head_size = 6;
+  int max_sequence_length = 4;
+  int64_t interleaved = 0;  // false
+
+  std::vector<float> input_data = {
+      -1.0408f, 0.9166f, -1.3042f, -1.1097f, -1.2188f, 1.1676f, 1.0076f, -0.7529f,
+      -0.2250f, -0.4327f, -1.5071f, -0.4586f, -0.8663f, -0.2656f, 0.1665f, 0.7911f,
+      -0.9320f, -0.8579f, -1.0574f, -0.1188f, -0.9078f, 0.3452f, -0.5713f, -0.2351f,
+      -0.8480f, 0.5266f, -1.2944f, -0.0243f, -0.2354f, -0.7087f, -0.9647f, -0.0991f,
+      -0.2994f, -0.0650f, -1.5720f, -1.3211f};
+
+  std::vector<int64_t> position_ids = {0, 1};
+
+  std::vector<float> cos_cache = {
+      1.0000f, 1.0000f, 1.0000f, 0.5403f, 0.9989f, 1.0000f, -0.4161f, 0.9957f,
+      1.0000f, -0.9900f, 0.9903f, 1.0000f};
+
+  std::vector<float> sin_cache = {
+      0.0000f, 0.0000f, 0.0000f, 0.8415f, 0.0464f, 0.0022f, 0.9093f, 0.0927f, 0.0043f,
+      0.1411f, 0.1388f, 0.0065f};
+
+  std::vector<float> output_data = {
+      -1.0408f, 0.9166f, -1.3042f, -1.1097f, -1.2188f, 1.1676f, 1.0076f, -0.7529f,
+      -0.2250f, -0.4327f, -1.5071f, -0.4586f, -0.8663f, -0.2656f, 0.1665f, 0.7911f,
+      -0.9320f, -0.8579f, -0.8618f, -0.0922f, -0.9073f, -0.7032f, -0.5762f, -0.2371f,
+      -0.4377f, 0.5370f, -1.2929f, -0.7267f, -0.2107f, -0.7115f, -0.4666f, -0.0261f,
+      -0.2965f, -0.8469f, -1.5749f, -1.3217f};
+
+  RunTests(input_data,
+           position_ids,
+           cos_cache,
+           sin_cache,
+           output_data,
+           batch_size,
+           sequence_length,
+           head_size,
+           num_heads,
+           max_sequence_length,
+           interleaved);
+}
+
+}  // namespace test
+}  // namespace onnxruntime
diff --git a/onnxruntime/test/contrib_ops/skip_group_norm_op_test.cc b/onnxruntime/test/contrib_ops/skip_group_norm_op_test.cc
new file mode 100644
index 0000000000000..fefd5722054de
--- /dev/null
+++ b/onnxruntime/test/contrib_ops/skip_group_norm_op_test.cc
@@ -0,0 +1,286 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include <random>
+#include "test/common/tensor_op_test_utils.h"
+#include "test/common/cuda_op_test_utils.h"
+#include "test/framework/test_utils.h"
+#include "test/providers/provider_test_utils.h"
+
+#include "gtest/gtest.h"
+#include "gmock/gmock.h"
+
+using namespace std;
+
+namespace onnxruntime {
+namespace test {
+
+TEST(SkipGroupNormTest, SkipGroupNorm_with_bias) {
+  constexpr int64_t B = 2;
+  constexpr int64_t C = 16;
+  constexpr int64_t H = 2;
+  constexpr int64_t W = 2;
+
+  std::vector<int64_t> dims_nhwc{B, H, W, C};
+  std::vector<float> input_data_nhwc = {
+      -0.768555f, 1.575195f, -0.698242f, 1.587891f, 0.371826f, -0.280029f, -1.328125f, 0.127197f,
+      -0.197144f, 0.982422f, -0.671387f, -1.925781f, 1.800781f, -0.020218f, -0.782227f, 1.291992f,
+      -0.935059f, 1.782227f, -0.674316f, -1.943359f, -0.218994f, 0.054138f, -1.539062f, -0.546387f,
+      -2.160156f, 1.195312f, 1.653320f, -0.674316f, 0.224731f, -0.093262f, 1.160156f, -0.389404f,
+      1.748047f, 0.766113f, 0.234375f, 0.011177f, -0.055847f, -0.930664f, -0.490234f, -0.655762f,
+      -0.382568f, -0.554688f, 0.910645f, -0.227295f, 1.687500f, 0.028397f, -0.241699f, -0.480957f,
+      -0.355713f, -2.095703f, -0.443359f, -0.126221f, -0.815918f, 0.792969f, -0.450439f, -0.952148f,
+      -1.174805f, 0.242798f, 0.138550f, -0.237061f, -0.994141f, 0.346436f, 0.147705f, 0.125854f,
+      -0.517090f, 0.253906f, 0.400146f, -0.540039f, -0.788574f, 0.146606f, -0.409668f, 0.281982f,
+      1.444336f, 0.044434f, -0.366699f, 2.250000f, -0.453613f, -0.652344f, 1.828125f, -0.244751f,
+      0.307129f, -0.051361f, 0.106384f, 0.844727f, 1.648438f, -0.904785f, -0.353760f, 0.510742f,
+      0.074829f, -0.311279f, 0.274902f, 1.594727f, 1.367188f, 0.098755f, 0.043304f, -0.207397f,
+      0.068298f, -0.601074f, 0.083008f, 0.264893f, -0.659180f, -0.216797f, -0.086548f, -0.683594f,
+      -0.964844f, -2.591797f, -0.817383f, -0.461914f, -1.840820f, -0.712402f, -0.052094f, -0.583008f,
+      1.114258f, 0.190308f, 1.087891f, 0.005146f, 1.041992f, 1.363281f, -0.273682f, -0.465576f,
+      -0.027618f, 1.345703f, 0.789551f, -0.015991f, 0.401611f, 0.726562f, 0.598633f, 0.133667f};
+
+  std::vector<float> gamma_data = {
+      0.241255f, 0.556660f, -0.835532f, 0.564596f, -1.338308f, -0.278924f, 0.357326f, -1.745484f,
+      0.277184f, 0.101415f, -0.018637f, -0.526188f, -0.011698f, -2.349411f, 0.206578f, 0.357679f};
+
+  std::vector<float> beta_data = {
+      -1.194839f, 0.209146f, -0.677225f, -0.547338f, 1.275685f, -1.099577f, 0.470916f, 0.293907f,
+      -1.094209f, 2.350204f, -1.633769f, 0.248753f, -0.180166f, 0.365134f, -0.555731f, 1.843083f};
+
+  std::vector<float> skip_data_nhwc = {
+      0.892578f, -0.471924f, -0.423096f, 1.277344f, 0.257080f, -1.366211f, 1.552734f, 0.441406f,
+      -0.033142f, -0.059418f, 1.536133f, -0.225464f, 1.472656f, 0.591309f, -0.386230f, -2.197266f,
+      0.089600f, -0.256592f, -1.873047f, 0.916992f, 0.392090f, 0.015526f, -0.949219f, 0.566895f,
+      -0.220459f, 1.262695f, -0.437744f, -2.283203f, -0.264893f, -0.660156f, 2.353516f, 1.992188f,
+      0.865723f, -0.854004f, -1.014648f, 0.899414f, -1.041016f, 1.378906f, -0.075073f, -2.541016f,
+      -0.883789f, -0.428711f, 0.981934f, -0.072754f, 2.214844f, 0.658203f, 0.170166f, -1.727539f,
+      -0.672363f, -1.373047f, 0.318115f, 0.422363f, 0.260742f, -0.547852f, 0.545898f, -0.155762f,
+      0.679688f, 2.861328f, -0.300781f, -0.504883f, 1.548828f, 0.353760f, -0.387695f, -1.595703f,
+      -0.170166f, -0.002897f, 0.273193f, -0.383545f, -1.082031f, -0.894043f, -1.048828f, -0.044708f,
+      0.049286f, 0.220215f, 0.272705f, -0.853027f, -0.489258f, 0.513672f, 0.977051f, 0.310547f,
+      -0.577148f, -0.479004f, 0.838867f, 0.872559f, -0.510254f, 0.101807f, -0.299805f, -1.179688f,
+      -1.555664f, 0.668457f, 0.939453f, 0.118103f, -0.376709f, 0.735352f, -0.214233f, -1.987305f,
+      -0.931152f, 1.268555f, 1.427734f, -0.757812f, -1.324219f, 0.375488f, 1.364258f, -1.708008f,
+      0.976562f, -0.037659f, -1.779297f, -0.196655f, 1.636719f, 0.690430f, 0.941895f, -1.882812f,
+      0.431641f, 0.203857f, 1.306641f, -0.126343f, 1.408203f, 1.188477f, 0.432861f, -2.296875f,
+      -0.475342f, 1.517578f, -0.824219f, 1.288086f, -0.028244f, 1.918945f, 0.352295f, 0.693359f};
+
+  std::vector<float> bias_data = {
+      -0.537598f, 0.500488f, -0.252441f, -0.460693f, -1.640625f, -1.298828f, 0.331787f, -1.588867f,
+      1.000977f, 1.458984f, 0.702637f, 0.147827f, 1.143555f, 0.533691f, -0.072510f, 0.511230f};
+
+  std::vector<float> norm_data_nhwc = {
+      -1.213867f, 0.856445f, -0.119141f, 0.386475f, 0.714355f, -0.804688f,
+      1.048828f, -0.426270f, -1.091797f, 2.435547f, -1.641602f, 0.989746f,
+      -0.200928f, 0.267334f, -0.800781f, 1.577148f, -1.357422f, 1.000977f,
+      0.613281f, -0.963867f, 1.179688f, -1.169922f, 0.308350f, 0.304199f,
+      -1.396484f, 2.513672f, -1.644531f, 1.206055f, -0.180664f, 1.896484f,
+      -0.294678f, 2.046875f, -0.844238f, 0.448486f, -0.294189f, -0.291504f,
+      2.480469f, -1.250977f, 0.833008f, 4.593750f, -1.238281f, 2.335938f,
+      -1.651367f, 0.491943f, -0.204834f, 0.125610f, -0.682129f, 1.333984f,
+      -1.384766f, -0.708008f, -0.630859f, -0.504883f, 1.924805f, -1.208008f,
+      1.013672f, 1.809570f, -1.128906f, 2.546875f, -1.631836f, 0.610840f,
+      -0.184326f, 0.110046f, -0.700195f, 1.471680f, -1.511719f, 0.492188f,
+      -0.847168f, -1.373047f, 2.837891f, -0.998047f, 0.521484f, 0.262207f,
+      -0.810547f, 2.400391f, -1.628906f, 0.049896f, -0.174927f, 1.076172f,
+      -0.252197f, 1.784180f, -1.418945f, 0.090820f, -1.056641f, 0.002945f,
+      0.627441f, -0.989746f, 0.679199f, 1.130859f, -1.371094f, 2.408203f,
+      -1.645508f, -0.062988f, -0.192017f, -0.655762f, -0.718262f, 1.170898f,
+      -1.550781f, 0.706055f, -1.492188f, -1.148438f, 2.921875f, -1.136719f,
+      1.058594f, 2.781250f, -1.089844f, 2.201172f, -1.597656f, 0.785645f,
+      -0.181396f, 0.868164f, -0.552246f, 1.097656f, -1.015625f, 0.565430f,
+      -2.173828f, -0.955078f, -0.336426f, -1.503906f, 0.838867f, 3.136719f,
+      -1.186523f, 2.580078f, -1.629883f, 0.094604f, -0.186523f, -3.884766f,
+      -0.542480f, 1.990234f};
+
+  std::vector<float> add_out_data_nhwc = {
+      -0.414062f, 1.604492f, -1.374023f, 2.404297f, -1.011719f, -2.945312f, 0.556641f, -1.020508f,
+      0.770508f, 2.382812f, 1.567383f, -2.003906f, 4.417969f, 1.105469f, -1.240234f, -0.394531f,
+      -1.382812f, 2.027344f, -2.800781f, -1.487305f, -1.466797f, -1.229492f, -2.156250f, -1.568359f,
+      -1.379883f, 3.917969f, 1.917969f, -2.808594f, 1.103516f, -0.219727f, 3.441406f, 2.113281f,
+      2.076172f, 0.412598f, -1.033203f, 0.449951f, -2.738281f, -0.851562f, -0.233521f, -4.785156f,
+      -0.265625f, 0.475586f, 2.595703f, -0.152222f, 5.046875f, 1.220703f, -0.144043f, -1.697266f,
+      -1.566406f, -2.968750f, -0.377686f, -0.164551f, -2.195312f, -1.053711f, 0.427246f, -2.697266f,
+      0.505859f, 4.562500f, 0.540527f, -0.594238f, 1.698242f, 1.233398f, -0.312500f, -0.958496f,
+      -1.224609f, 0.751465f, 0.420898f, -1.384766f, -3.511719f, -2.046875f, -1.126953f, -1.351562f,
+      2.494141f, 1.724609f, 0.608398f, 1.544922f, 0.200684f, 0.395020f, 2.732422f, 0.577148f,
+      -0.807617f, -0.029785f, 0.692871f, 1.256836f, -0.502441f, -2.101562f, -0.321777f, -2.257812f,
+      -0.479492f, 1.816406f, 1.916992f, 1.860352f, 2.134766f, 1.367188f, -0.243408f, -1.683594f,
+      -1.400391f, 1.167969f, 1.257812f, -0.953613f, -3.625000f, -1.140625f, 1.609375f, -3.980469f,
+      1.012695f, -1.170898f, -1.894531f, -0.510742f, 0.939453f, 0.511719f, 0.817383f, -1.955078f,
+      1.007812f, 0.894531f, 2.142578f, -0.582031f, 0.809570f, 1.252930f, 0.490967f, -4.351562f,
+      0.497803f, 4.320312f, 0.667969f, 1.419922f, 1.516602f, 3.179688f, 0.878906f, 1.337891f};
+
+  int min_cuda_architecture = 530;
+  bool enable_cuda = HasCudaEnvironment(min_cuda_architecture);
+
+  std::array<int, 2> channels_last_values = {-1, 1};
+
+  for (const int channels_last : channels_last_values) {
+    if (enable_cuda) {
+      std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+      if (enable_cuda && channels_last != 0) {
+        execution_providers.push_back(DefaultCudaExecutionProvider());
+      }
+
+      // Don't run the test if no providers are supported
+      if (execution_providers.empty()) {
+        continue;
+      }
+
+      OpTester test("SkipGroupNorm", 1, onnxruntime::kMSDomain);
+      test.AddAttribute<float>("epsilon", 1e-05f);
+      test.AddAttribute<int64_t>("groups", 4);
+      test.AddAttribute<int64_t>("activation", 0);
+
+      // We interpret channels_last==-1 as the attribute not being provided
+      if (channels_last != -1) {
+        test.AddAttribute<int64_t>("channels_last", channels_last);
+      }
+
+      test.AddInput<MLFloat16>("X", dims_nhwc, ToFloat16(input_data_nhwc));
+      test.AddInput<float>("gamma", {C}, gamma_data);
+      test.AddInput<float>("beta", {C}, beta_data);
+      test.AddInput<MLFloat16>("skip", dims_nhwc, ToFloat16(skip_data_nhwc));
+      test.AddInput<MLFloat16>("bias", {C}, ToFloat16(bias_data));
+
+      constexpr float rel_error = 0.0f;
+      constexpr float abs_error = 0.02f;
+      test.AddOutput<MLFloat16>("Y", dims_nhwc, ToFloat16(norm_data_nhwc), false, rel_error, abs_error);
+      test.AddOutput<MLFloat16>("S", dims_nhwc, ToFloat16(add_out_data_nhwc), false, rel_error, abs_error);
+
+      test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
+    }
+  }
+}
+
+TEST(SkipGroupNormTest, SkipGroupNorm_no_bias_broadcast_skip) {
+  constexpr int64_t B = 1;
+  constexpr int64_t C = 64;
+  constexpr int64_t H = 1;
+  constexpr int64_t W = 1;
+
+  std::vector<int64_t> dims_nhwc{B, H, W, C};
+  std::vector<float> input_data_nhwc = {
+      0.588867f, 0.896484f, -0.213623f, 0.803223f, 0.659180f, -0.216187f, 1.197266f, -0.486084f,
+      -0.718750f, 0.332031f, -0.364746f, -0.831543f, -0.031219f, -1.059570f, 0.161621f, 1.519531f,
+      0.169312f, 1.048828f, 1.330078f, 0.450195f, -2.867188f, -1.456055f, 0.708496f, -1.120117f,
+      -1.208984f, -1.199219f, -1.505859f, -0.549316f, 0.505371f, 0.723145f, -0.359131f, -0.250977f,
+      -0.879883f, -0.305664f, 0.709473f, 0.815430f, 0.617676f, -0.638672f, 0.066772f, -2.330078f,
+      -1.316406f, 1.744141f, 1.122070f, -0.633789f, -1.802734f, -0.825684f, 0.622559f, -0.481689f,
+      -1.364258f, -0.536621f, -0.464111f, 0.247437f, -0.213989f, 0.384521f, 0.556641f, -0.303711f,
+      -0.160034f, 0.882324f, -0.212036f, -0.796387f, 0.153076f, -1.311523f, 2.212891f, 0.685059f};
+
+  std::vector<float> gamma_data = {
+      0.789682f, 0.869051f, -0.010169f, -0.021685f, 0.506611f, 1.267444f, -0.312695f, 0.877844f,
+      0.598637f, 0.598314f, -1.721544f, -0.593328f, 0.986705f, -0.419391f, -0.852584f, -0.572351f,
+      0.912797f, -0.586863f, 0.477761f, -0.484418f, -0.193835f, 0.347757f, 0.327637f, -1.100304f,
+      1.233108f, -0.272569f, -0.688656f, 0.687245f, 0.398386f, 0.888089f, -0.792587f, -0.769029f,
+      -0.427778f, 0.100768f, -2.187060f, 1.279301f, 1.109054f, 0.375992f, 1.514775f, 1.271436f,
+      0.822896f, -0.476750f, 0.475507f, -1.011297f, 1.177197f, 1.586540f, -1.059944f, -0.145351f,
+      0.841555f, -2.014113f, -0.230498f, 0.302128f, -0.180508f, 0.980534f, -0.126871f, 0.203151f,
+      -0.754841f, 0.420570f, -1.085798f, 1.335042f, -0.674930f, 2.453507f, 2.139259f, 1.087436f};
+
+  std::vector<float> beta_data = {
+      -0.064518f, -0.262683f, 0.827528f, -0.960938f, 1.062519f, 2.417941f, 0.212789f, -1.638430f,
+      1.875453f, -0.883058f, -0.006704f, 0.424894f, -0.869972f, 0.727008f, 0.879303f, -3.024141f,
+      -2.610873f, 1.269641f, 0.883006f, 0.804167f, -1.510324f, 2.258091f, -0.006750f, -1.553668f,
+      -1.659453f, 0.579603f, 0.652358f, 0.007077f, 0.099180f, 0.418658f, -0.273778f, -1.036199f,
+      -1.128691f, -0.296022f, -0.224056f, 1.476306f, 0.577624f, -0.372049f, -0.581659f, -1.841807f,
+      -0.361721f, 0.051160f, -0.749332f, -2.634807f, 0.562719f, -0.738667f, 0.024864f, -1.135937f,
+      -1.368144f, -1.458886f, -0.946683f, 1.953936f, -1.198661f, 0.166648f, 0.447206f, -0.458140f,
+      -0.553395f, 0.112900f, 0.255989f, -0.184551f, 1.254163f, -0.260479f, -1.232429f, 1.902575f};
+
+  std::vector<float> skip_data = {
+      0.952148f, 1.342773f, -0.172974f, -0.395264f, 1.119141f, 0.330566f,
+      0.281494f, 0.472900f, -0.692871f, -0.634766f, 0.013504f, -1.866211f,
+      -0.428223f, 0.669922f, -0.323486f, 0.713867f, -0.350586f, 0.659180f,
+      -0.288574f, 0.324219f, -0.300781f, -0.789551f, -0.216431f, -0.221436f,
+      -0.086670f, 0.366211f, -0.643555f, -0.977051f, 0.001021f, 0.415527f,
+      -0.271729f, 0.836426f, 0.035370f, -0.806152f, 0.936035f, -0.021332f,
+      -1.095703f, 0.971680f, 1.648438f, 0.840820f, 0.837402f, 0.607910f,
+      -1.894531f, 0.666016f, -0.171143f, 1.625977f, -0.620117f, -0.039581f,
+      1.702148f, -2.410156f, 1.565430f, -0.756348f, 1.446289f, 0.583496f,
+      -0.497559f, -0.271729f, -0.956055f, -1.642578f, 0.833496f, -1.136719f,
+      1.248047f, -2.515625f, 0.080383f, 0.376221f};
+
+  std::vector<float> norm_data_nhwc = {
+      0.494873f, 1.017578f, 0.841797f, -0.949219f, 1.552734f, 1.333984f, 0.012703f, -2.511719f,
+      1.424805f, -0.818359f, -0.128418f, 1.462891f, -0.882812f, 0.709961f, 0.693848f, -4.210938f,
+      -2.505859f, 0.513184f, 1.300781f, 0.460938f, -1.172852f, 1.851562f, 0.167969f, -0.885254f,
+      -2.535156f, 0.656738f, 1.683594f, -0.627441f, 0.478271f, 1.782227f, -0.196777f, -1.824219f,
+      -0.791016f, -0.398682f, -3.197266f, 2.275391f, 0.052704f, -0.286865f, 1.567383f, -3.552734f,
+      -0.646973f, -0.927734f, -1.032227f, -2.722656f, -1.337891f, 0.432129f, -0.040253f, -1.080078f,
+      -1.118164f, 3.123047f, -1.153320f, 1.843750f, -1.378906f, 0.941406f, 0.437256f, -0.542969f,
+      -0.218872f, 0.006115f, -0.265869f, -1.356445f, 0.649902f, -4.882812f, 1.696289f, 2.679688f};
+
+  std::vector<float> add_out_data_nhwc = {
+      1.541016f, 2.238281f, -0.386719f, 0.407959f, 1.778320f, 0.114380f,
+      1.478516f, -0.013184f, -1.412109f, -0.302734f, -0.351318f, -2.697266f,
+      -0.459473f, -0.389648f, -0.161865f, 2.234375f, -0.181274f, 1.708008f,
+      1.041016f, 0.774414f, -3.167969f, -2.246094f, 0.492188f, -1.341797f,
+      -1.295898f, -0.833008f, -2.148438f, -1.526367f, 0.506348f, 1.138672f,
+      -0.630859f, 0.585449f, -0.844727f, -1.111328f, 1.645508f, 0.793945f,
+      -0.478027f, 0.333008f, 1.714844f, -1.489258f, -0.479004f, 2.351562f,
+      -0.772461f, 0.032227f, -1.973633f, 0.800293f, 0.002441f, -0.521484f,
+      0.337891f, -2.947266f, 1.101562f, -0.508789f, 1.232422f, 0.967773f,
+      0.059082f, -0.575195f, -1.116211f, -0.760254f, 0.621582f, -1.933594f,
+      1.401367f, -3.828125f, 2.292969f, 1.061523f};
+
+  int min_cuda_architecture = 530;
+  bool enable_cuda = HasCudaEnvironment(min_cuda_architecture);
+
+  std::array<bool, 2> has_add_out_values = {true, false};
+  std::array<int, 2> skip_dims = {2, 4};
+
+  constexpr int channels_last = 1;
+  for (const int skip_dim : skip_dims) {
+    for (const bool has_add_out : has_add_out_values) {
+      if (enable_cuda) {
+        std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+        if (enable_cuda && channels_last != 0) {
+          execution_providers.push_back(DefaultCudaExecutionProvider());
+        }
+
+        // Don't run the test if no providers are supported
+        if (execution_providers.empty()) {
+          continue;
+        }
+
+        OpTester test("SkipGroupNorm", 1, onnxruntime::kMSDomain);
+        test.AddAttribute<float>("epsilon", 1e-05f);
+        test.AddAttribute<int64_t>("groups", 8);
+        test.AddAttribute<int64_t>("activation", 0);
+
+        // We interpret channels_last==-1 as the attribute not being provided
+        if (channels_last != -1) {
+          test.AddAttribute<int64_t>("channels_last", channels_last);
+        }
+
+        test.AddInput<MLFloat16>("X", dims_nhwc, ToFloat16(input_data_nhwc));
+        test.AddInput<float>("gamma", {C}, gamma_data);
+        test.AddInput<float>("beta", {C}, beta_data);
+        if (skip_dim == 2) {
+          test.AddInput<MLFloat16>("skip", {B, C}, ToFloat16(skip_data));
+        } else {
+          test.AddInput<MLFloat16>("skip", {B, 1, 1, C}, ToFloat16(skip_data));
+        }
+        // no bias
+
+        constexpr float rel_error = 0.0f;
+        constexpr float abs_error = 0.02f;
+        test.AddOutput<MLFloat16>("Y", dims_nhwc, ToFloat16(norm_data_nhwc), false, rel_error, abs_error);
+
+        if (has_add_out) {
+          test.AddOutput<MLFloat16>("S", dims_nhwc, ToFloat16(add_out_data_nhwc), false, rel_error, abs_error);
+        }
+
+        test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
+      }
+    }
+  }
+}
+
+}  // namespace test
+}  // namespace onnxruntime
diff --git a/onnxruntime/test/contrib_ops/skiplayernorm_op_test.cc b/onnxruntime/test/contrib_ops/skiplayernorm_op_test.cc
index 2395532198805..bb56a5aba7f65 100644
--- a/onnxruntime/test/contrib_ops/skiplayernorm_op_test.cc
+++ b/onnxruntime/test/contrib_ops/skiplayernorm_op_test.cc
@@ -11,14 +11,15 @@ namespace onnxruntime {
 namespace test {
 constexpr float epsilon_ = 1e-12f;
 
-static void RunTest(
+static void RunOneTest(
+    bool strict,
     const std::vector<float>& input_data,
     const std::vector<float>& skip_data,
     const std::vector<float>& gamma_data,
     const std::vector<float>& beta_data,
     const std::vector<float>& bias_data,
     const std::vector<float>& output_data,
-    const std::vector<float>& skip_input_bias_add_output_data,
+    const std::vector<float>& sum_output_data,
     float epsilon,
     int batch_size,
     int sequence_length,
@@ -27,7 +28,6 @@ static void RunTest(
     bool no_beta = false,
     bool simplified = false,
     bool use_token_count = false,
-    bool strict = false,
     bool broadcast_skip = false,
     bool no_batch_size = false) {
   // Input and output shapes
@@ -82,14 +82,14 @@ static void RunTest(
 
     test.AddOutput<float>("output", output_dims, output_data);
 
-    if (skip_input_bias_add_output_data.size() != 0) {
+    if (sum_output_data.size() != 0) {
       // The second and third outputs are reserved for something else
       test.AddOptionalOutputEdge<float>();
       test.AddOptionalOutputEdge<float>();
 
       test.AddOutput<float>("skip_input_bias_add_output",
                             output_dims,
-                            skip_input_bias_add_output_data);
+                            sum_output_data);
     }
 
     if (cpu_ep != nullptr) {
@@ -117,14 +117,19 @@ static void RunTest(
 
     test.AddOutput<MLFloat16>("output", output_dims, ToFloat16(output_data));
 
-    if (skip_input_bias_add_output_data.size() != 0) {
+    // Use larger threshold for fp16
+    if (use_float16) {
+      test.SetOutputAbsErr("output", 0.01f);
+    }
+
+    if (sum_output_data.size() != 0) {
       // The second and third outputs are reserved for something else
       test.AddOptionalOutputEdge<MLFloat16>();
       test.AddOptionalOutputEdge<MLFloat16>();
 
       test.AddOutput<MLFloat16>("skip_input_bias_add_output",
                                 output_dims,
-                                ToFloat16(skip_input_bias_add_output_data));
+                                ToFloat16(sum_output_data));
     }
 
     if (dml_ep != nullptr) {
@@ -151,6 +156,36 @@ static void RunTest(
   }
 }
 
+static void RunTest(
+    const std::vector<float>& input_data,
+    const std::vector<float>& skip_data,
+    const std::vector<float>& gamma_data,
+    const std::vector<float>& beta_data,
+    const std::vector<float>& bias_data,
+    const std::vector<float>& output_data,
+    const std::vector<float>& sum_output_data,
+    float epsilon,
+    int batch_size,
+    int sequence_length,
+    int hidden_size,
+    bool use_float16 = false,
+    bool no_beta = false,
+    bool simplified = false,
+    bool use_token_count = false,
+    bool broadcast_skip = false,
+    bool no_batch_size = false) {
+  RunOneTest(false, input_data, skip_data, gamma_data, beta_data, bias_data, output_data, sum_output_data,
+             epsilon, batch_size, sequence_length, hidden_size, use_float16, no_beta, simplified,
+             use_token_count, broadcast_skip, no_batch_size);
+
+  // strict mode does not support skip broadcasting.
+  if (!broadcast_skip) {
+    RunOneTest(true, input_data, skip_data, gamma_data, beta_data, bias_data, output_data, sum_output_data,
+               epsilon, batch_size, sequence_length, hidden_size, use_float16, no_beta, simplified,
+               use_token_count, broadcast_skip, no_batch_size);
+  }
+}
+
 TEST(SkipLayerNormTest, SkipLayerNormNullInput) {
   int batch_size = 1;
   int sequence_length = 0;
@@ -359,8 +394,7 @@ TEST(SkipLayerNormTest, SkipLayerNormBatch1_Float16_vec) {
           true /*use_float16*/,
           false /*no_beta*/,
           false /*simplified*/,
-          false /*use_token_count*/,
-          true /*strict*/);
+          false /*use_token_count*/);
 }
 
 TEST(SkipLayerNormTest, SkipLayerNormBatch1_NoBeta) {
@@ -648,8 +682,7 @@ TEST(SkipLayerNormTest, SkipLayerNormBatch1_Float16_vec_token_count) {
           true /*use_float16*/,
           false /*no_beta*/,
           false /*simplified*/,
-          true /*use_token_count*/,
-          true /*strict*/);
+          true /*use_token_count*/);
 }
 
 TEST(SkipLayerNormTest, SkipLayerNormBatch2_TokenCount) {
@@ -776,13 +809,12 @@ TEST(SkipLayerNormTest, SkipLayerNormBatch2_Skip_Broadcast_No_Batch_Size) {
           batch_size,
           sequence_length,
           hidden_size,
-          false,
-          false,
-          false,
-          false,
-          false,
-          false,
-          true);
+          false,  // use_float16
+          false,  // no_beta
+          false,  // simplified
+          false,  // use_token_count
+          true,   // broadcast_skip
+          true);  // no_batch_size
 }
 
 TEST(SkipLayerNormTest, SkipLayerNormBatch2_Skip_Broadcast_Batch_Size_1) {
@@ -823,13 +855,12 @@ TEST(SkipLayerNormTest, SkipLayerNormBatch2_Skip_Broadcast_Batch_Size_1) {
           batch_size,
           sequence_length,
           hidden_size,
-          false,
-          false,
-          false,
-          false,
-          false,
-          true,
-          false);
+          false,   // use_float16
+          false,   // no_beta
+          false,   // simplified
+          false,   // use_token_count
+          true,    // broadcast_skip
+          false);  // no_batch_size
 }
 #endif
 
diff --git a/onnxruntime/test/contrib_ops/tensor_op_test.cc b/onnxruntime/test/contrib_ops/tensor_op_test.cc
index 44cb49580ce8b..81c8641f450f6 100644
--- a/onnxruntime/test/contrib_ops/tensor_op_test.cc
+++ b/onnxruntime/test/contrib_ops/tensor_op_test.cc
@@ -6,6 +6,7 @@
 #include "test/common/tensor_op_test_utils.h"
 #include "test/providers/provider_test_utils.h"
 #include "test/util/include/default_providers.h"
+#include "test/common/cuda_op_test_utils.h"
 
 using namespace ONNX_NAMESPACE;
 using namespace onnxruntime::test;
@@ -204,5 +205,79 @@ TEST(MVNContribOpTest, MeanVarianceNormalizationCPUTest_Version1_TO_8) {
   MeanVarianceNormalizationPerChannel(false, true);
 }
 
+#ifdef USE_CUDA
+
+TEST(UnfoldTensorOpTest, LastDim) {
+  if (NeedSkipIfCudaArchLowerThan(530)) {
+    return;
+  }
+
+  std::vector<float> X = {
+      1.0f, 2.0f, 3.0f, 4.0f,
+      5.0f, 6.0f, 7.0f, 8.0f,
+      6.0f, 7.0f, 8.0f, 9.0f};
+
+  std::vector<float> output = {
+      1.0f, 2.0f, 3.0f, 2.0f, 3.0f, 4.0f,
+      5.0f, 6.0f, 7.0f, 6.0f, 7.0f, 8.0f,
+      6.0f, 7.0f, 8.0f, 7.0f, 8.0f, 9.0f};
+
+  OpTester tester("UnfoldTensor", 1, onnxruntime::kMSDomain);
+
+  tester.AddAttribute<int64_t>("size", 3LL);
+  tester.AddInput<float>("input", {3, 4}, X);
+  tester.AddOutput<float>("output", {3, 2, 3}, output);
+
+  std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+  execution_providers.push_back(DefaultCudaExecutionProvider());
+  tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
+}
+
+TEST(UnfoldTensorOpTest, NormalDim) {
+  if (NeedSkipIfCudaArchLowerThan(530)) {
+    return;
+  }
+
+  std::vector<int16_t> X = {
+      1, 2, 3, 4, 2, 2, 3, 4, 3, 2, 3, 4,
+      4, 6, 7, 8, 5, 6, 7, 8, 6, 6, 7, 8,
+      6, 7, 8, 9, 7, 7, 8, 9, 8, 7, 8, 9,
+      9, 7, 8, 9, 10, 7, 8, 9, 11, 7, 8, 9};
+
+  std::vector<int16_t> output = {
+      1, 2, 3,
+      2, 2, 2,
+      3, 3, 3,
+      4, 4, 4,
+
+      3, 4, 5,
+      2, 6, 6,
+      3, 7, 7,
+      4, 8, 8,
+
+      6, 7, 8,
+      7, 7, 7,
+      8, 8, 8,
+      9, 9, 9,
+
+      8, 9, 10,
+      7, 7, 7,
+      8, 8, 8,
+      9, 9, 9};
+
+  OpTester tester("UnfoldTensor", 1, onnxruntime::kMSDomain);
+  tester.AddAttribute<int64_t>("dim", 1LL);
+  tester.AddAttribute<int64_t>("size", 3LL);
+  tester.AddAttribute<int64_t>("step", 2LL);
+  tester.AddInput<int16_t>("input", {2, 6, 4}, X);
+  tester.AddOutput<int16_t>("output", {2, 2, 4, 3}, output);
+
+  std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+  execution_providers.push_back(DefaultCudaExecutionProvider());
+  tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
+}
+
+#endif
+
 }  // namespace test
 }  // namespace onnxruntime
diff --git a/onnxruntime/test/contrib_ops/tokenizer_test.cc b/onnxruntime/test/contrib_ops/tokenizer_test.cc
index 4daf9dac886d0..b8fb964b86c13 100644
--- a/onnxruntime/test/contrib_ops/tokenizer_test.cc
+++ b/onnxruntime/test/contrib_ops/tokenizer_test.cc
@@ -10,7 +10,7 @@ namespace test {
 namespace tokenizer_test {
 const std::string start_mark{0x2};
 const std::string end_mark{0x3};
-const std::string padval(u8"0xdeadbeaf");
+const std::string padval("0xdeadbeaf");
 
 constexpr const char* domain = onnxruntime::kMSDomain;
 constexpr int opset_ver = 1;
@@ -220,7 +220,7 @@ TEST(ContribOpTest, TokenizerCharLevel_CyrillicCharsWithMarkersC) {
     InitTestAttr(test, true, {""}, 1);
 
     std::vector<int64_t> dims{2};
-    std::vector<std::string> input{u8"Абсурд", u8"Кома"};
+    std::vector<std::string> input{"Абсурд", "Кома"};
     test.AddInput<std::string>("T", dims, input);
 
     std::vector<int64_t> output_dims(dims);
@@ -229,10 +229,10 @@ TEST(ContribOpTest, TokenizerCharLevel_CyrillicCharsWithMarkersC) {
     output_dims.push_back(int64_t(6 + 2));
     std::vector<std::string> output{
         start_mark,
-        u8"А", u8"б", u8"с", u8"у", u8"р", u8"д",
+        "А", "б", "с", "у", "р", "д",
         end_mark,
         start_mark,
-        u8"К", u8"о", u8"м", u8"а",
+        "К", "о", "м", "а",
         end_mark,
         padval,
         padval};
@@ -254,7 +254,7 @@ TEST(ContribOpTest, TokenizerCharLevel_MixedCharsWithMarkersC) {
     InitTestAttr(test, true, {""}, 1);
 
     std::vector<int64_t> dims{2};
-    std::vector<std::string> input{u8"Абсу中文", u8"Коñó"};
+    std::vector<std::string> input{"Абсу中文", "Коñó"};
     test.AddInput<std::string>("T", dims, input);
 
     std::vector<int64_t> output_dims(dims);
@@ -263,10 +263,10 @@ TEST(ContribOpTest, TokenizerCharLevel_MixedCharsWithMarkersC) {
     output_dims.push_back(int64_t(6 + 2));
     std::vector<std::string> output{
         start_mark,
-        u8"А", u8"б", u8"с", u8"у", u8"中", u8"文",
+        "А", "б", "с", "у", "中", "文",
         end_mark,
         start_mark,
-        u8"К", u8"о", u8"ñ", u8"ó",
+        "К", "о", "ñ", "ó",
         end_mark,
         padval,
         padval};
@@ -285,7 +285,7 @@ TEST(ContribOpTest, TokenizerCharLevel_EmptyOutputC) {
     InitTestAttr(test, true, {""}, 1);
 
     std::vector<int64_t> dims{2};
-    std::vector<std::string> input{u8"", u8""};
+    std::vector<std::string> input{"", ""};
     test.AddInput<std::string>("T", dims, input);
 
     std::vector<int64_t> output_dims(dims);
@@ -306,7 +306,7 @@ TEST(ContribOpTest, TokenizerCharLevel_EmptyOutputNC) {
     InitTestAttr(test, true, {""}, 1);
 
     std::vector<int64_t> dims{2, 2};
-    std::vector<std::string> input{u8"", u8"", u8"", u8""};
+    std::vector<std::string> input{"", "", "", ""};
     test.AddInput<std::string>("T", dims, input);
 
     std::vector<int64_t> output_dims(dims);
@@ -325,13 +325,13 @@ TEST(ContribOpTest, TokenizerWithSeparators_MixCharsWithMarkersC) {
   // [C] dimensions
   // Output [C][D]
   {
-    std::string sepexp = u8"(у|ñ)";
+    std::string sepexp = "(у|ñ)";
 
     OpTester test("Tokenizer", opset_ver, domain);
     InitTestAttr(test, true, {sepexp}, 1);
 
     std::vector<int64_t> dims{2};
-    std::vector<std::string> input{u8"Абсу中文", u8"Коñó"};
+    std::vector<std::string> input{"Абсу中文", "Коñó"};
     test.AddInput<std::string>("T", dims, input);
 
     std::vector<int64_t> output_dims(dims);
@@ -339,10 +339,10 @@ TEST(ContribOpTest, TokenizerWithSeparators_MixCharsWithMarkersC) {
     output_dims.push_back(int64_t(2 + 2));
     std::vector<std::string> output{
         start_mark,
-        u8"Абс", u8"中文",
+        "Абс", "中文",
         end_mark,
         start_mark,
-        u8"Ко", u8"ó",
+        "Ко", "ó",
         end_mark};
 
     test.AddOutput<std::string>("Y", output_dims, output);
@@ -355,13 +355,13 @@ TEST(ContribOpTest, TokenizerWithSeparators_MixCharsWithMarkersCompleteMatchEmpt
   // Test entire separators match so we get nothing
   // in the output
   {
-    std::string sepexp = u8"(Абсу中文)|(Коñó)";
+    std::string sepexp = "(Абсу中文)|(Коñó)";
 
     OpTester test("Tokenizer", opset_ver, domain);
     InitTestAttr(test, true, {sepexp}, 1);
 
     std::vector<int64_t> dims{2};
-    std::vector<std::string> input{u8"Абсу中文", u8"Коñó"};
+    std::vector<std::string> input{"Абсу中文", "Коñó"};
     test.AddInput<std::string>("T", dims, input);
 
     std::vector<int64_t> output_dims(dims);
@@ -378,13 +378,13 @@ TEST(ContribOpTest, TokenizerWithSeparators_MixCharsWithMarkersCompleteMatchEmpt
 TEST(ContribOpTest, TokenizerWithSeparators_MixCharsWithMarkersStartMatchC) {
   // Match the start
   {
-    std::string sepexp = u8"(А)|(К)";
+    std::string sepexp = "(А)|(К)";
 
     OpTester test("Tokenizer", opset_ver, domain);
     InitTestAttr(test, true, {sepexp}, 1);
 
     std::vector<int64_t> dims{2};
-    std::vector<std::string> input{u8"Абсу中文", u8"Коñó"};
+    std::vector<std::string> input{"Абсу中文", "Коñó"};
     test.AddInput<std::string>("T", dims, input);
 
     std::vector<int64_t> output_dims(dims);
@@ -392,10 +392,10 @@ TEST(ContribOpTest, TokenizerWithSeparators_MixCharsWithMarkersStartMatchC) {
     output_dims.push_back(int64_t(3));
     std::vector<std::string> output{
         start_mark,
-        u8"бсу中文",
+        "бсу中文",
         end_mark,
         start_mark,
-        u8"оñó",
+        "оñó",
         end_mark};
 
     test.AddOutput<std::string>("Y", output_dims, output);
@@ -407,13 +407,13 @@ TEST(ContribOpTest, TokenizerWithSeparators_MixCharsWithMarkersStartMatchC) {
 TEST(ContribOpTest, TokenizerWithSeparators_MixCharsWithMarkersEndMatchC) {
   // Match the end
   {
-    std::string sepexp = u8"(文)|(ó)";
+    std::string sepexp = "(文)|(ó)";
 
     OpTester test("Tokenizer", opset_ver, domain);
     InitTestAttr(test, true, {sepexp}, 1);
 
     std::vector<int64_t> dims{2};
-    std::vector<std::string> input{u8"Абсу中文", u8"Коñó"};
+    std::vector<std::string> input{"Абсу中文", "Коñó"};
     test.AddInput<std::string>("T", dims, input);
 
     std::vector<int64_t> output_dims(dims);
@@ -421,10 +421,10 @@ TEST(ContribOpTest, TokenizerWithSeparators_MixCharsWithMarkersEndMatchC) {
     output_dims.push_back(int64_t(3));
     std::vector<std::string> output{
         start_mark,
-        u8"Абсу中",
+        "Абсу中",
         end_mark,
         start_mark,
-        u8"Коñ",
+        "Коñ",
         end_mark};
 
     test.AddOutput<std::string>("Y", output_dims, output);
@@ -436,13 +436,13 @@ TEST(ContribOpTest, TokenizerWithSeparators_MixCharsWithMarkersEndMatchC) {
 TEST(ContribOpTest, TokenizerWithSeparators_MixCharsWithMarkersEndMatchAtLeast4CharsC) {
   // Match the end, require at least 4 chars
   {
-    std::string sepexp = u8"(文)|(ó)";
+    std::string sepexp = "(文)|(ó)";
 
     OpTester test("Tokenizer", opset_ver, domain);
     InitTestAttr(test, true, {sepexp}, 4);
 
     std::vector<int64_t> dims{2};
-    std::vector<std::string> input{u8"Абсу中文", u8"Коñó"};
+    std::vector<std::string> input{"Абсу中文", "Коñó"};
     test.AddInput<std::string>("T", dims, input);
 
     std::vector<int64_t> output_dims(dims);
@@ -451,7 +451,7 @@ TEST(ContribOpTest, TokenizerWithSeparators_MixCharsWithMarkersEndMatchAtLeast4C
     output_dims.push_back(int64_t(3));
     std::vector<std::string> output{
         start_mark,
-        u8"Абсу中",
+        "Абсу中",
         end_mark,
         start_mark,
         end_mark,
@@ -466,13 +466,13 @@ TEST(ContribOpTest, TokenizerWithSeparators_MixCharsWithMarkersEndMatchAtLeast4C
 TEST(ContribOpTest, TokenizerWithSeparators_MixCharsWithMarkersEmptyInputEmptyOutputC) {
   // Empty input for [C] should produce [C][0]
   {
-    std::string sepexp = u8"(文)|(ó)";
+    std::string sepexp = "(文)|(ó)";
 
     OpTester test("Tokenizer", opset_ver, domain);
     InitTestAttr(test, true, {sepexp}, 4);
 
     std::vector<int64_t> dims{2};
-    std::vector<std::string> input{u8"", u8""};
+    std::vector<std::string> input{"", ""};
     test.AddInput<std::string>("T", dims, input);
 
     std::vector<int64_t> output_dims(dims);
@@ -488,13 +488,13 @@ TEST(ContribOpTest, TokenizerWithSeparators_MixCharsWithMarkersEmptyInputEmptyOu
 TEST(ContribOpTest, TokenizerWithSeparators_MixCharsWithMarkersEmptyInputEmptyOutputNC) {
   // Empty input for [N][C] should produce [N][C][0]
   {
-    std::string sepexp = u8"(文)|(ó)";
+    std::string sepexp = "(文)|(ó)";
 
     OpTester test("Tokenizer", opset_ver, domain);
     InitTestAttr(test, true, {sepexp}, 4);
 
     std::vector<int64_t> dims{2, 2};
-    std::vector<std::string> input{u8"", u8"文", u8"ó", u8""};
+    std::vector<std::string> input{"", "文", "ó", ""};
     test.AddInput<std::string>("T", dims, input);
 
     std::vector<int64_t> output_dims(dims);
@@ -514,20 +514,20 @@ TEST(ContribOpTest, TokenizerWithSeparators_MixCharsNoMarkersSeparatorsOverlapSh
   {
     // In this case the first pattern must match first
     // and there would be no match for the second
-    std::vector<std::string> separators = {u8"су", u8"Абсу"};
+    std::vector<std::string> separators = {"су", "Абсу"};
 
     OpTester test("Tokenizer", opset_ver, domain);
     InitTestAttr(test, false, separators, 1);
 
     std::vector<int64_t> dims{1};
-    std::vector<std::string> input{u8"Абсу中文"};
+    std::vector<std::string> input{"Абсу中文"};
     test.AddInput<std::string>("T", dims, input);
 
     std::vector<int64_t> output_dims(dims);
     // must split in 2 with no two middle characters
     output_dims.push_back(int64_t(2));
     std::vector<std::string> output{
-        u8"Аб", u8"中文"};
+        "Аб", "中文"};
 
     test.AddOutput<std::string>("Y", output_dims, output);
 
@@ -543,21 +543,21 @@ TEST(ContribOpTest, TokenizerWithSeparators_MixCharsNoMarkersSeparatorsOverlapLo
     // In this case the first pattern must match first
     // and there would be no match for the second
     std::vector<std::string> separators = {
-        u8"Абсу",
-        u8"су"};
+        "Абсу",
+        "су"};
 
     OpTester test("Tokenizer", opset_ver, domain);
     InitTestAttr(test, false, separators, 1);
 
     std::vector<int64_t> dims{1};
-    std::vector<std::string> input{u8"Абсу中文"};
+    std::vector<std::string> input{"Абсу中文"};
     test.AddInput<std::string>("T", dims, input);
 
     std::vector<int64_t> output_dims(dims);
     // Must drop the beginning of the word that
     // also contains the second separator
     output_dims.push_back(int64_t(1));
-    std::vector<std::string> output{u8"中文"};
+    std::vector<std::string> output{"中文"};
 
     test.AddOutput<std::string>("Y", output_dims, output);
 
@@ -573,21 +573,21 @@ TEST(ContribOpTest, TokenizerWithSeparators_MixCharsNoMarkersSeparatorsOverlapLo
     // In this case the first pattern must match first
     // and there would be no match for the second
     std::vector<std::string> separators = {
-        u8"Абсу",
-        u8"су"};
+        "Абсу",
+        "су"};
 
     OpTester test("Tokenizer", opset_ver, domain);
     InitTestAttr(test, false, separators, 1);
 
     std::vector<int64_t> dims{1};
-    std::vector<std::string> input{u8"Абсусусу中文"};
+    std::vector<std::string> input{"Абсусусу中文"};
     test.AddInput<std::string>("T", dims, input);
 
     std::vector<int64_t> output_dims(dims);
     // Must drop the beginning of the word that
     // also contains the second separator
     output_dims.push_back(int64_t(1));
-    std::vector<std::string> output{u8"中文"};
+    std::vector<std::string> output{"中文"};
 
     test.AddOutput<std::string>("Y", output_dims, output);
 
@@ -605,21 +605,21 @@ TEST(ContribOpTest, TokenizerWithSeparators_MixCharsNoMarkersSeparatorsOverlapin
     // so the earlier match for the first wins.
     // and there would be no match for the second
     std::vector<std::string> separators = {
-        u8"усу",
-        u8"Абсу"};
+        "усу",
+        "Абсу"};
 
     OpTester test("Tokenizer", opset_ver, domain);
     InitTestAttr(test, false, separators, 1);
 
     std::vector<int64_t> dims{1};
-    std::vector<std::string> input{u8"Абсусусу中文"};
+    std::vector<std::string> input{"Абсусусу中文"};
     test.AddInput<std::string>("T", dims, input);
 
     std::vector<int64_t> output_dims(dims);
     // Must drop the beginning of the word that
     // also contains the second separator
     output_dims.push_back(int64_t(2));
-    std::vector<std::string> output{u8"Абс", u8"су中文"};
+    std::vector<std::string> output{"Абс", "су中文"};
 
     test.AddOutput<std::string>("Y", output_dims, output);
 
@@ -633,14 +633,14 @@ TEST(ContribOpTest, TokenizerWithSeparators_MixCharCommonPrefixC) {
   // [C] dimensions
   // Output [C][D]
   std::vector<std::string> separators = {
-      u8";",
-      u8";;;"};
+      ";",
+      ";;;"};
 
   OpTester test("Tokenizer", opset_ver, domain);
   InitTestAttr(test, true, separators, 1);
 
   std::vector<int64_t> dims{4};
-  std::vector<std::string> input{u8"a;b", u8"a;;;b", u8"b;c;;;d;e", u8"a;;b;;;c"};
+  std::vector<std::string> input{"a;b", "a;;;b", "b;c;;;d;e", "a;;b;;;c"};
   test.AddInput<std::string>("T", dims, input);
 
   std::vector<int64_t> output_dims(dims);
@@ -648,27 +648,27 @@ TEST(ContribOpTest, TokenizerWithSeparators_MixCharCommonPrefixC) {
   output_dims.push_back(int64_t(6));
   std::vector<std::string> output{
       start_mark,
-      u8"a",
-      u8"b",
+      "a",
+      "b",
       end_mark,
       padval,
       padval,
       start_mark,
-      u8"a",
-      u8"b",
+      "a",
+      "b",
       end_mark,
       padval,
       padval,
       start_mark,
-      u8"b",
-      u8"c",
-      u8"d",
-      u8"e",
+      "b",
+      "c",
+      "d",
+      "e",
       end_mark,
       start_mark,
-      u8"a",
-      u8"b",
-      u8"c",
+      "a",
+      "b",
+      "c",
       end_mark,
       padval,
   };
@@ -679,27 +679,27 @@ TEST(ContribOpTest, TokenizerWithSeparators_MixCharCommonPrefixC) {
 
 TEST(ContribOpTest, TokenizerExpression_RegEx) {
   OpTester test("Tokenizer", opset_ver, domain);
-  const std::string tokenexp(u8"a.");
+  const std::string tokenexp("a.");
   InitTestAttr(test, true, {}, 1, tokenexp);
 
   std::vector<int64_t> dims{4};
-  std::vector<std::string> input{u8"a;b", u8"a;;;b", u8"b;c;;;d;e", u8"a;;b;;;c"};
+  std::vector<std::string> input{"a;b", "a;;;b", "b;c;;;d;e", "a;;b;;;c"};
   test.AddInput<std::string>("T", dims, input);
 
   std::vector<int64_t> output_dims(dims);
   output_dims.push_back(int64_t(3));
   std::vector<std::string> output{
       start_mark,
-      u8"a;",
+      "a;",
       end_mark,
       start_mark,
-      u8"a;",
+      "a;",
       end_mark,
       start_mark,
       end_mark,
       padval,
       start_mark,
-      u8"a;",
+      "a;",
       end_mark,
   };
 
@@ -709,11 +709,11 @@ TEST(ContribOpTest, TokenizerExpression_RegEx) {
 
 TEST(ContribOpTest, TokenizerExpression_RegRep) {
   OpTester test("Tokenizer", opset_ver, domain);
-  const std::string tokenexp(u8"c;+");
+  const std::string tokenexp("c;+");
   InitTestAttr(test, true, {}, 1, tokenexp);
 
   std::vector<int64_t> dims{4};
-  std::vector<std::string> input{u8"a;b", u8"a;;;b", u8"b;c;;;d;e", u8"a;;b;;;c"};
+  std::vector<std::string> input{"a;b", "a;;;b", "b;c;;;d;e", "a;;b;;;c"};
   test.AddInput<std::string>("T", dims, input);
 
   std::vector<int64_t> output_dims(dims);
@@ -726,7 +726,7 @@ TEST(ContribOpTest, TokenizerExpression_RegRep) {
       end_mark,
       padval,
       start_mark,
-      u8"c;;;",
+      "c;;;",
       end_mark,
       start_mark,
       end_mark,
@@ -738,31 +738,31 @@ TEST(ContribOpTest, TokenizerExpression_RegRep) {
 
 TEST(ContribOpTest, TokenizerExpression_Grouping) {
   OpTester test("Tokenizer", opset_ver, domain);
-  const std::string tokenexp(u8"(a;)|(b;)");
+  const std::string tokenexp("(a;)|(b;)");
   InitTestAttr(test, true, {}, 1, tokenexp);
 
   std::vector<int64_t> dims{4};
-  std::vector<std::string> input{u8"a;b", u8"a;;;b", u8"b;c;;;d;e", u8"a;;b;;;c"};
+  std::vector<std::string> input{"a;b", "a;;;b", "b;c;;;d;e", "a;;b;;;c"};
   test.AddInput<std::string>("T", dims, input);
 
   std::vector<int64_t> output_dims(dims);
   output_dims.push_back(int64_t(4));
   std::vector<std::string> output{
       start_mark,
-      u8"a;",
+      "a;",
       end_mark,
       padval,
       start_mark,
-      u8"a;",
+      "a;",
       end_mark,
       padval,
       start_mark,
-      u8"b;",
+      "b;",
       end_mark,
       padval,
       start_mark,
-      u8"a;",
-      u8"b;",
+      "a;",
+      "b;",
       end_mark};
 
   test.AddOutput<std::string>("Y", output_dims, output);
@@ -771,22 +771,22 @@ TEST(ContribOpTest, TokenizerExpression_Grouping) {
 
 TEST(ContribOpTest, TokenizerExpression_RegDot) {
   OpTester test("Tokenizer", opset_ver, domain);
-  const std::string tokenexp(u8".");
+  const std::string tokenexp(".");
   InitTestAttr(test, true, {}, 1, tokenexp);
 
   std::vector<int64_t> dims{1};
-  std::vector<std::string> input{u8"a;;;b"};
+  std::vector<std::string> input{"a;;;b"};
   test.AddInput<std::string>("T", dims, input);
 
   std::vector<int64_t> output_dims(dims);
   output_dims.push_back(int64_t(7));
   std::vector<std::string> output{
       start_mark,
-      u8"a",
-      u8";",
-      u8";",
-      u8";",
-      u8"b",
+      "a",
+      ";",
+      ";",
+      ";",
+      "b",
       end_mark};
 
   test.AddOutput<std::string>("Y", output_dims, output);
@@ -795,19 +795,19 @@ TEST(ContribOpTest, TokenizerExpression_RegDot) {
 
 TEST(ContribOpTest, TokenizerExpression_RegChar) {
   OpTester test("Tokenizer", opset_ver, domain);
-  const std::string tokenexp(u8"\\w");
+  const std::string tokenexp("\\w");
   InitTestAttr(test, true, {}, 1, tokenexp);
 
   std::vector<int64_t> dims{1};
-  std::vector<std::string> input{u8"a;;;b"};
+  std::vector<std::string> input{"a;;;b"};
   test.AddInput<std::string>("T", dims, input);
 
   std::vector<int64_t> output_dims(dims);
   output_dims.push_back(int64_t(4));
   std::vector<std::string> output{
       start_mark,
-      u8"a",
-      u8"b",
+      "a",
+      "b",
       end_mark};
 
   test.AddOutput<std::string>("Y", output_dims, output);
diff --git a/onnxruntime/test/framework/function_test.cc b/onnxruntime/test/framework/function_test.cc
index e126979532644..9ab78cac3aca4 100644
--- a/onnxruntime/test/framework/function_test.cc
+++ b/onnxruntime/test/framework/function_test.cc
@@ -6,35 +6,47 @@
 #include "onnx/defs/parser.h"
 
 #include "core/common/span_utils.h"
+#include "core/framework/customregistry.h"
+#include "core/framework/op_kernel.h"
 #include "core/graph/model.h"
 #include "core/providers/cpu/cpu_execution_provider.h"
 #include "core/session/inference_session.h"
 
 #include "test/test_environment.h"
 #include "test/framework/test_utils.h"
+#include "inference_session_wrapper.h"
 #include "test/common/tensor_op_test_utils.h"
 #include "test/util/include/asserts.h"
 
+#include "test/providers/internal_testing/internal_testing_execution_provider.h"
+
 // Unit tests to check the implementation of functions, model-local functions,
 // function-inlining etc.
 
 namespace onnxruntime {
 namespace test {
 
-static void Check(const char* source,
-                  const char* input_name, std::vector<float> input_values,
-                  const char* output_name, std::vector<float> output_values) {
-  // Convert source-representation of model to ModelProto:
+// Convert source-representation of model to ModelProto:
+static void ParseOnnxSource(const char* source, std::string& result) {
   ONNX_NAMESPACE::OnnxParser parser(source);
   ONNX_NAMESPACE::ModelProto model;
   auto parse_status = parser.Parse(model);
   ASSERT_TRUE(parse_status.IsOK()) << parse_status.ErrorMessage();
   ASSERT_TRUE(parser.EndOfInput()) << "Extra unparsed input unexpected.";
 
-  // Serialize and then load model:
+  // Serialize
   std::string serialized_model;
   const bool serialization_status = model.SerializeToString(&serialized_model);
   ASSERT_TRUE(serialization_status) << "Failed to serialize proto to string";
+  result = std::move(serialized_model);
+}
+
+static void Check(const char* source,
+                  const char* input_name, std::vector<float> input_values,
+                  const char* output_name, std::vector<float> output_values) {
+  // Serialize and then load model:
+  std::string serialized_model;
+  ParseOnnxSource(source, serialized_model);
 
   SessionOptions session_options;
   InferenceSession session_object{session_options, GetEnvironment()};
@@ -69,12 +81,14 @@ static void Check(const char* source,
   float threshold = 0.001f;
 
   for (size_t i = 0; i < size; ++i) {
-    ASSERT_NEAR(data[i], output_values[i], threshold) << "at position i:" << i;
+    if (!std::isnan(data[i]) && !std::isnan(output_values[i])) {
+      ASSERT_NEAR(data[i], output_values[i], threshold) << "at position i:" << i;
+    }
   }
 }
 
-TEST(FunctionTest, Basic) {
-  const char* code = R"(
+namespace {
+const char* basic_code = R"(
         <
         ir_version: 8,
         opset_import: [ "" : 16, "local" : 1 ]
@@ -93,8 +107,10 @@ TEST(FunctionTest, Basic) {
             ly = Mul (lx, two)
         }
         )";
+}
 
-  Check(code, "x", {1.0, 2.0, 3.0}, "y", {2.0, 4.0, 6.0});
+TEST(FunctionTest, Basic) {
+  Check(basic_code, "x", {1.0, 2.0, 3.0}, "y", {2.0, 4.0, 6.0});
 }
 
 // Check that variables are renamed to avoid conflicts when multiple
@@ -389,25 +405,13 @@ TEST(FunctionTest, AttrSaturateNan) {
         >
         agraph (float[N] x) => (float[N] y)
         {
-            y0 = local.myfun <a = 1e6> (x)
-            y1 = local.myfun (x)
-            y = Add (y0, y1)
-        }
-
-        <
-        opset_import: [ "" : 19 ],
-        domain: "local"
-        >
-        myfun <a: float=1.0> (x) => (y) {
-            x2 = Constant <value_float: float=@a>()
-            x2_ = Cast<to=18>(x2)
-            x3 = CastLike<saturate=0>(x2, x2_)
-            x3_ = Cast<to=1>(x3)
-            y = Add (x, x3_)
+            x_E4M3FNUZ = Cast<to=18>(x)
+            x_E4M3FNUZ_2 = CastLike<saturate=0>(x, x_E4M3FNUZ)  # NaN when OOR
+            y = Cast<to=1>(x_E4M3FNUZ_2)
         }
         )";
 
-  Check(code, "x", {1.0, 2.0, 1e6}, "y", {243.0, 245.0, 2000241});  // std::numeric_limits<float>::quiet_NaN()});
+  Check(code, "x", {1.0, 2.0, 1e6}, "y", {1.0, 2.0, std::numeric_limits<float>::quiet_NaN()});
 }
 
 #endif
@@ -530,5 +534,85 @@ TEST(FunctionTest, ConstantFoldingInSubGraph) {
   Check(code, "X", {1.0, 2.0, 3.0}, "Y", {3.0, 4.0, 5.0, 3.0, 4.0, 5.0, 3.0, 4.0, 5.0});
 }
 
+TEST(FunctionTest, TestInlinedLocalFunctionRemoved) {
+  std::string serialized_model;
+  ParseOnnxSource(basic_code, serialized_model);
+
+  // Default is to do AOT Function inlining
+  SessionOptions session_options;
+  InferenceSessionWrapper session_object{session_options, GetEnvironment()};
+
+  std::stringstream sstr(serialized_model);
+  ASSERT_STATUS_OK(session_object.Load(sstr));
+
+  auto model_proto = session_object.GetModel().ToProto();
+  ASSERT_EQ(1, model_proto.functions_size());
+
+  ASSERT_STATUS_OK(session_object.Initialize());
+
+  // All functions removed
+  model_proto = session_object.GetModel().ToProto();
+  ASSERT_EQ(0, model_proto.functions_size());
+}
+
+TEST(FunctionTest, TestInlinedLocalFunctionNotRemoved) {
+  std::string serialized_model;
+  ParseOnnxSource(basic_code, serialized_model);
+
+  // Default is to do AOT Function inlining
+  SessionOptions session_options;
+  InferenceSessionWrapper session_object{session_options, GetEnvironment()};
+
+  using InternalTestingEP = onnxruntime::internal_testing_ep::InternalTestingExecutionProvider;
+  const std::unordered_set<std::string> empty_set;
+  auto internal_testing_ep = std::make_unique<InternalTestingEP>(empty_set, empty_set, DataLayout::NCHW);
+  internal_testing_ep->EnableStaticKernels().TakeAllNodes();
+
+  ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(std::move(internal_testing_ep)));
+
+  std::stringstream sstr(serialized_model);
+  ASSERT_STATUS_OK(session_object.Load(sstr));
+
+  auto model_proto = session_object.GetModel().ToProto();
+  ASSERT_EQ(1, model_proto.functions_size());
+
+  ASSERT_STATUS_OK(session_object.Initialize());
+
+  // myfun is not removed because it was claimed by InternalTestingEP
+  model_proto = session_object.GetModel().ToProto();
+#ifdef USE_TVM
+  // TVM EP takes the whole graph and optimizes it within its own framework.
+  // It does not retain the original graph.
+  ASSERT_EQ(0, model_proto.functions_size());
+#else
+  ASSERT_EQ(1, model_proto.functions_size());
+#endif
+}
+
+TEST(FunctionTest, TestInlinedFunctionDoesNotReserrectNonExistingArgs) {
+  // Verify this runs
+  constexpr const ORTCHAR_T* model_uri = ORT_TSTR("testdata/transform/gh_issue_18338.onnx");
+
+  SessionOptions session_options;
+  InferenceSessionWrapper session_object{session_options, GetEnvironment()};
+
+  ASSERT_STATUS_OK(session_object.Load(model_uri));
+  ASSERT_STATUS_OK(session_object.Initialize());
+
+  // Scalar shape for input_0 and output
+  const std::string input_names[] = {"input_0"};
+  const std::string output_names[] = {"_val_3"};
+  TensorShape input_shape;
+  MLFloat16 input_0_data{684.f};
+
+  OrtValue input_0;
+  Tensor::InitOrtValue(DataTypeImpl::GetType<MLFloat16>(), input_shape, &input_0_data, OrtMemoryInfo(), input_0);
+
+  std::vector<OrtValue> fetches(1);
+  RunOptions run_options;
+  ASSERT_STATUS_OK(session_object.Run(run_options, AsSpan(input_names), AsSpan({input_0}),
+                                      AsSpan(output_names), &fetches, 0));
+}
+
 }  // namespace test
 }  // namespace onnxruntime
diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc
index 077c6ff58e2da..486ec37d1eebd 100644
--- a/onnxruntime/test/framework/inference_session_test.cc
+++ b/onnxruntime/test/framework/inference_session_test.cc
@@ -890,6 +890,47 @@ TEST(InferenceSessionTests, ConfigureVerbosityLevel) {
 #endif
 }
 
+TEST(InferenceSessionTests, UseUserSpecifiedLoggingFunctionInSession) {
+  SessionOptions so;
+  /*
+  typedef void(ORT_API_CALL* OrtLoggingFunction)(
+      void* param, OrtLoggingLevel severity, const char* category, const char* logid, const char* code_location,
+      const char* message);
+  */
+  std::vector<std::string> log_msgs;
+  so.user_logging_function = [](void* param, OrtLoggingLevel severity, const char* category, const char* logid, const char* code_location,
+                                const char* message) {
+    ORT_UNUSED_PARAMETER(severity);
+    ORT_UNUSED_PARAMETER(category);
+    ORT_UNUSED_PARAMETER(logid);
+    ORT_UNUSED_PARAMETER(code_location);
+    std::vector<std::string>* v_ptr = reinterpret_cast<std::vector<std::string>*>(param);
+    std::vector<std::string>& msg_vector = *v_ptr;
+    msg_vector.push_back(std::string(message));
+  };
+  so.user_logging_param = &log_msgs;
+  so.session_log_severity_level = static_cast<int>(Severity::kVERBOSE);
+  so.session_log_verbosity_level = 1;
+  so.session_logid = "InferenceSessionTests.UseUserSpecifiedLoggingFunctionInSession";
+
+  InferenceSession session_object{so, GetEnvironment()};
+  ASSERT_STATUS_OK(session_object.Load(MODEL_URI));
+  ASSERT_STATUS_OK(session_object.Initialize());
+
+  RunOptions run_options;
+  run_options.run_tag = "one session/one tag";
+  RunModel(session_object, run_options);
+
+// vlog output is disabled in release builds
+#ifndef NDEBUG
+  bool have_log_entry_with_vlog_session_msg =
+      (std::find_if(log_msgs.begin(), log_msgs.end(),
+                    [&](std::string msg) { return msg.find("Added input argument with name") != string::npos; }) !=
+       log_msgs.end());
+  ASSERT_TRUE(have_log_entry_with_vlog_session_msg);
+#endif
+}
+
 TEST(InferenceSessionTests, TestWithIstream) {
   SessionOptions so;
 
@@ -2056,7 +2097,7 @@ TEST(InferenceSessionTests, TestStrictShapeInference) {
 
   ASSERT_STATUS_OK(session_options.config_options.AddConfigEntry(kOrtSessionOptionsConfigStrictShapeTypeInference, "1"));
   tester.Run(session_options, OpTester::ExpectResult::kExpectFailure,
-             "Mismatch between number of source and target dimensions. Source=1 Target=2",
+             "Mismatch between number of inferred and declared dimensions. inferred=1 declared=2",
              excluded_provider_types);
 }
 
diff --git a/onnxruntime/test/framework/insert_cast_transformer_test.cc b/onnxruntime/test/framework/insert_cast_transformer_test.cc
index c38baee39216b..1804c09043c7b 100644
--- a/onnxruntime/test/framework/insert_cast_transformer_test.cc
+++ b/onnxruntime/test/framework/insert_cast_transformer_test.cc
@@ -4,6 +4,7 @@
 #include "core/framework/allocator.h"
 #include "core/optimizer/insert_cast_transformer.h"
 #include "core/graph/model.h"
+#include "core/graph/node_attr_utils.h"
 #include "gtest/gtest.h"
 #include "test_utils.h"
 #include "test/test_environment.h"
@@ -110,6 +111,70 @@ TEST(TransformerTest, InsertCastAllCPUTest) {
   }
 }
 
+TEST(TransformerTest, CastRemovalDoesNotLowerPrecisionTest) {
+  auto model = std::make_shared<onnxruntime::Model>("test", false, DefaultLoggingManager().DefaultLogger());
+  onnxruntime::Graph& graph = model->MainGraph();
+  TypeProto tensor_float_32;
+  tensor_float_32.mutable_tensor_type()->set_elem_type(TensorProto_DataType_FLOAT);
+  TypeProto tensor_float_64;
+  tensor_float_64.mutable_tensor_type()->set_elem_type(TensorProto_DataType_DOUBLE);
+  onnxruntime::NodeArg n1_def("N1", &tensor_float_64),
+      n2_def("N2", &tensor_float_32),
+      n3_def("N3", &tensor_float_64);
+
+  NodeAttributes n1_attrs = {{"to", utils::MakeAttribute("to", static_cast<int64_t>(ONNX_NAMESPACE::TensorProto_DataType_FLOAT))}};
+  NodeAttributes n2_attrs = {{"to", utils::MakeAttribute("to", static_cast<int64_t>(ONNX_NAMESPACE::TensorProto_DataType_DOUBLE))}};
+
+  graph.AddNode("node1", "Cast", "F64 to F32 cast", ArgMap{&n1_def}, ArgMap{&n2_def}, &n1_attrs);
+  graph.AddNode("node2", "Cast", "F32 to F64 cast", ArgMap{&n2_def}, ArgMap{&n3_def}, &n2_attrs);
+
+  auto status = graph.Resolve();
+  ASSERT_TRUE(status.IsOK()) << status.ErrorMessage();
+
+  InsertCastTransformer cast_inserter("Test", DefaultCpuExecutionProvider()->GetKernelRegistry().get());
+
+  bool modified = true;
+  status = cast_inserter.Apply(graph, modified, DefaultLoggingManager().DefaultLogger());
+  EXPECT_TRUE(status.IsOK()) << status.ErrorMessage();
+  status = graph.Resolve();
+  EXPECT_TRUE(status.IsOK()) << status.ErrorMessage();
+
+  // When casting f64 -> f32 -> f64 we should not be optimising away the cast since there is a loss of precision.
+  EXPECT_EQ(graph.NumberOfNodes(), 2);
+}
+
+TEST(TransformerTest, CastRemovalDoesNotRemoveSignednessTest) {
+  auto model = std::make_shared<onnxruntime::Model>("test", false, DefaultLoggingManager().DefaultLogger());
+  onnxruntime::Graph& graph = model->MainGraph();
+  TypeProto tensor_uint32;
+  tensor_uint32.mutable_tensor_type()->set_elem_type(TensorProto_DataType_UINT32);
+  TypeProto tensor_int32;
+  tensor_int32.mutable_tensor_type()->set_elem_type(TensorProto_DataType_INT32);
+  onnxruntime::NodeArg n1_def("N1", &tensor_int32),
+      n2_def("N2", &tensor_uint32),
+      n3_def("N3", &tensor_int32);
+
+  NodeAttributes n1_attrs = {{"to", utils::MakeAttribute("to", static_cast<int64_t>(ONNX_NAMESPACE::TensorProto_DataType_UINT32))}};
+  NodeAttributes n2_attrs = {{"to", utils::MakeAttribute("to", static_cast<int64_t>(ONNX_NAMESPACE::TensorProto_DataType_INT32))}};
+
+  graph.AddNode("node1", "Cast", "I32 to UI32 cast", ArgMap{&n1_def}, ArgMap{&n2_def}, &n1_attrs);
+  graph.AddNode("node2", "Cast", "UI32 to I32 cast", ArgMap{&n2_def}, ArgMap{&n3_def}, &n2_attrs);
+
+  auto status = graph.Resolve();
+  ASSERT_TRUE(status.IsOK()) << status.ErrorMessage();
+
+  InsertCastTransformer cast_inserter("Test", DefaultCpuExecutionProvider()->GetKernelRegistry().get());
+
+  bool modified = true;
+  status = cast_inserter.Apply(graph, modified, DefaultLoggingManager().DefaultLogger());
+  EXPECT_TRUE(status.IsOK()) << status.ErrorMessage();
+  status = graph.Resolve();
+  EXPECT_TRUE(status.IsOK()) << status.ErrorMessage();
+
+  // When casting i32 -> ui32 -> i32 we should not be optimising away the cast since applying the casts produces a very different result.
+  EXPECT_EQ(graph.NumberOfNodes(), 2);
+}
+
 // test that when there are 3 Cast ops in a row we remove the correct ones
 TEST(TransformerTest, ThreeInARowRemoval) {
   auto model_uri = MODEL_FOLDER ORT_TSTR("triple-cast.onnx");
diff --git a/onnxruntime/test/framework/kernel_type_str_resolver_utils_test.cc b/onnxruntime/test/framework/kernel_type_str_resolver_utils_test.cc
index ac213f70b1272..1c6721fed05a2 100644
--- a/onnxruntime/test/framework/kernel_type_str_resolver_utils_test.cc
+++ b/onnxruntime/test/framework/kernel_type_str_resolver_utils_test.cc
@@ -49,7 +49,9 @@ TEST(KernelTypeStrResolverUtilsTest, VerifyLayoutTransformationRequiredOpsResolv
 #endif  // !defined(DISABLE_CONTRIB_OPS)
 }
 
-// run this test manually to output a hard-coded byte array
+// run this test manually to output a hard-coded byte array.
+// update AddLayoutTransformationRequiredOpsToKernelTypeStrResolver in
+// onnxruntime/core/framework/kernel_type_str_resolver_utils.cc
 TEST(KernelTypeStrResolverUtilsTest, DISABLED_PrintExpectedLayoutTransformationRequiredOpsResolverByteArray) {
 #if defined(DISABLE_CONTRIB_OPS)
   FAIL() << "Contrib ops must be enabled.";
diff --git a/onnxruntime/test/framework/session_state_test.cc b/onnxruntime/test/framework/session_state_test.cc
index 82e5efd92a8f1..e1ce1d4abf81d 100644
--- a/onnxruntime/test/framework/session_state_test.cc
+++ b/onnxruntime/test/framework/session_state_test.cc
@@ -850,6 +850,130 @@ TEST_F(SessionStateTestSharedInitalizersWithPrePacking, test3) {
   ASSERT_EQ(session_state_2.GetUsedSharedPrePackedWeightCounter(), static_cast<size_t>(1));
 }
 
+// Pre-packing enabled + shared initializers +
+// pre-packed weights container + subgraphs =
+// caching enabled in pre-packed weights used in subgraphs
+TEST_F(SessionStateTestSharedInitalizersWithPrePacking, test4) {
+  SessionOptions sess_options;
+  sess_options.enable_mem_pattern = true;
+  sess_options.execution_mode = ExecutionMode::ORT_SEQUENTIAL;
+  sess_options.use_deterministic_compute = false;
+  sess_options.enable_mem_reuse = true;
+  // Enable pre-packing
+  sess_options.config_options.configurations[kOrtSessionOptionsConfigDisablePrepacking] = "0";
+
+  // Enable shared initializer
+  OrtMemoryInfo mem_info(CPU, OrtDeviceAllocator);
+  std::vector<float> float_data(1, 1);
+  auto value = std::make_unique<OrtValue>();
+  Tensor::InitOrtValue(DataTypeImpl::GetType<float>(), TensorShape(std::vector<int64_t>{1}),
+                       reinterpret_cast<void*>(float_data.data()), mem_info, *value);
+
+  ASSERT_STATUS_OK(sess_options.AddInitializer("if_shared", value.get()));
+
+  // Enable pre-packed weights container
+  PrepackedWeightsContainer prepacked_weights_container;
+
+  // First session/model
+  Model model_1("graph_main", false, ModelMetaData(), PathString(), IOnnxRuntimeOpSchemaRegistryList(),
+                domain_to_version, std::vector<ONNX_NAMESPACE::FunctionProto>(),
+                DefaultLoggingManager().DefaultLogger());
+
+  CreateGraphWithSubgraph(model_1.MainGraph());
+  PlaceAllNodesToCPUEP(model_1.MainGraph());
+  SessionState session_state_1(model_1.MainGraph(),
+                               execution_providers,
+                               tp.get(),
+                               nullptr, /*inter_op_thread_pool*/
+                               dtm,
+                               DefaultLoggingManager().DefaultLogger(),
+                               profiler,
+                               sess_options,
+                               &prepacked_weights_container);
+
+  ASSERT_STATUS_OK(session_state_1.FinalizeSessionState(std::basic_string<PATH_CHAR_TYPE>(),
+                                                        kernel_registry_manager));
+
+  // At the main graph level, there should be no pre-packing calls as there are
+  // no initializers (shared or otherwise) consumed by any nodes in the main graph
+  ASSERT_EQ(session_state_1.GetNumberOfPrepacksCounter(), static_cast<size_t>(0));
+
+  auto if_index_1 = 1;
+  if (session_state_1.GetKernel(0)->Node().OpType() == "If") {
+    if_index_1 = 0;
+  }
+
+  const auto& subgraph_session_states = session_state_1.GetSubgraphSessionStateMap();
+  const auto& if_node_session_states = subgraph_session_states.at(if_index_1);
+  const auto& session_state_1_then_branch_session_state = *if_node_session_states.at("then_branch");
+  const auto& session_state_1_else_branch_session_state = *if_node_session_states.at("else_branch");
+
+  auto if_node_branches_prepack_counter_1 =
+      session_state_1_then_branch_session_state.GetNumberOfPrepacksCounter() +
+      session_state_1_else_branch_session_state.GetNumberOfPrepacksCounter();
+
+  // We should be seeing 2 pre-pack calls in the "If" node (one in each subgraph)
+  ASSERT_EQ(if_node_branches_prepack_counter_1, static_cast<size_t>(2));
+
+  auto if_node_branches_shared_prepack_counter_1 =
+      session_state_1_then_branch_session_state.GetUsedSharedPrePackedWeightCounter() +
+      session_state_1_else_branch_session_state.GetUsedSharedPrePackedWeightCounter();
+
+  // We should only be seeing 1 shared pre-pack weights usage in the "If" node
+  // Either the "then branch" or "else branch" will be using the shared version
+  // depending on which branch writes to the shared container
+  ASSERT_EQ(if_node_branches_shared_prepack_counter_1, static_cast<size_t>(1));
+
+  // Second session/model
+  Model model_2("graph_main", false, ModelMetaData(), PathString(), IOnnxRuntimeOpSchemaRegistryList(),
+                domain_to_version, std::vector<ONNX_NAMESPACE::FunctionProto>(),
+                DefaultLoggingManager().DefaultLogger());
+
+  CreateGraphWithSubgraph(model_2.MainGraph());
+  PlaceAllNodesToCPUEP(model_2.MainGraph());
+  SessionState session_state_2(model_2.MainGraph(),
+                               execution_providers,
+                               tp.get(),
+                               nullptr, /*inter_op_thread_pool*/
+                               dtm,
+                               DefaultLoggingManager().DefaultLogger(),
+                               profiler,
+                               sess_options,
+                               &prepacked_weights_container);
+
+  ASSERT_STATUS_OK(session_state_2.FinalizeSessionState(std::basic_string<PATH_CHAR_TYPE>(),
+                                                        kernel_registry_manager));
+
+  // At the main graph level, there should be no pre-packing calls as there are
+  // no initializers (shared or otherwise) consumed by any nodes in the main graph
+  ASSERT_EQ(session_state_2.GetNumberOfPrepacksCounter(), static_cast<size_t>(0));
+
+  auto if_index_2 = 1;
+  if (session_state_2.GetKernel(0)->Node().OpType() == "If") {
+    if_index_2 = 0;
+  }
+
+  const auto& subgraph_session_states_2 = session_state_2.GetSubgraphSessionStateMap();
+  const auto& if_node_session_states_2 = subgraph_session_states_2.at(if_index_2);
+  const auto& session_state_2_then_branch_session_state = *if_node_session_states_2.at("then_branch");
+  const auto& session_state_2_else_branch_session_state = *if_node_session_states_2.at("else_branch");
+
+  auto if_node_branches_prepack_counter_2 =
+      session_state_2_then_branch_session_state.GetNumberOfPrepacksCounter() +
+      session_state_2_else_branch_session_state.GetNumberOfPrepacksCounter();
+
+  // We should be seeing 2 pre-pack calls in the "If" node (one in each subgraph)
+  ASSERT_EQ(if_node_branches_prepack_counter_2, static_cast<size_t>(2));
+
+  auto if_node_branches_shared_prepack_counter_2 =
+      session_state_2_then_branch_session_state.GetUsedSharedPrePackedWeightCounter() +
+      session_state_2_else_branch_session_state.GetUsedSharedPrePackedWeightCounter();
+
+  // We should be seeing 2 shared pre-pack weights calls in the "If" node
+  // Both branches will be using the shared version coming from the first model.
+  ASSERT_EQ(if_node_branches_shared_prepack_counter_2, static_cast<size_t>(2));
+}
+
 INSTANTIATE_TEST_SUITE_P(SessionStateTests,
                          SessionStatePrepackingTest,
                          testing::Values(PrepackingTestParam{false, false},
diff --git a/onnxruntime/test/framework/tensor_test.cc b/onnxruntime/test/framework/tensor_test.cc
index f24064a403c5d..38e3f184ebc18 100644
--- a/onnxruntime/test/framework/tensor_test.cc
+++ b/onnxruntime/test/framework/tensor_test.cc
@@ -214,6 +214,7 @@ TEST(TensorTest, Strided) {
   TensorShape shape({2, 3, 4});
   auto alloc = TestCPUExecutionProvider()->CreatePreferredAllocators()[0];
   void* data = alloc->Alloc(shape.Size() * sizeof(float));
+
   Tensor t(DataTypeImpl::GetType<float>(), shape, data, alloc->Info());
   EXPECT_TRUE(t.IsContiguous());
   const TensorShapeVector strides{12, 4, 1};
@@ -227,6 +228,7 @@ TEST(TensorTest, Strided) {
   ASSERT_EQ(t.Shape(), new_shape);
   ASSERT_THAT(t.Strides(), testing::ContainerEq(gsl::make_span(new_strides)));
   ASSERT_EQ(t.SizeInBytes(), sizeof(float) * 24);
+
   Tensor t2(DataTypeImpl::GetType<float>(), new_shape, data, alloc->Info(), 0L, gsl::make_span(new_strides));
   EXPECT_FALSE(t2.IsContiguous());
   ASSERT_EQ(t2.Shape(), new_shape);
@@ -237,7 +239,9 @@ TEST(TensorTest, Strided) {
   ASSERT_EQ(t2.Shape(), shape);
   ASSERT_THAT(t2.Strides(), testing::ContainerEq(gsl::make_span(strides)));
   ASSERT_EQ(t2.SizeInBytes(), sizeof(float) * 24);
+
   alloc->Free(data);
+
   data = alloc->Alloc(sizeof(int64_t));
   const TensorShapeVector single_element_strides{0, 0, 0};
   Tensor t3(DataTypeImpl::GetType<int64_t>(), shape, data, alloc->Info(), 0L, gsl::make_span(single_element_strides));
@@ -246,8 +250,10 @@ TEST(TensorTest, Strided) {
   ASSERT_THAT(t3.Strides(), testing::ContainerEq(gsl::make_span(single_element_strides)));
   ASSERT_EQ(t3.SizeInBytes(), sizeof(int64_t));
   alloc->Free(data);
+
   const TensorShapeVector zero_strides{0, 0, 0};
-  Tensor t4(DataTypeImpl::GetType<float>(), shape, alloc, zero_strides);
+  Tensor t4(DataTypeImpl::GetType<float>(), shape, alloc);
+  t4.SetShapeAndStrides(shape, zero_strides);
   EXPECT_FALSE(t4.IsContiguous());
   EXPECT_EQ(t4.Shape(), shape);
   ASSERT_THAT(t4.Strides(), testing::ContainerEq(gsl::make_span(zero_strides)));
diff --git a/onnxruntime/test/framework/test_tensor_loader.cc b/onnxruntime/test/framework/test_tensor_loader.cc
index f627409bb0e60..e71830be08b5e 100644
--- a/onnxruntime/test/framework/test_tensor_loader.cc
+++ b/onnxruntime/test/framework/test_tensor_loader.cc
@@ -1,11 +1,13 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#include "gtest/gtest.h"
+
 #include "core/common/common.h"
 #include "core/framework/callback.h"
 #include "core/framework/tensorprotoutils.h"
-#include "gtest/gtest.h"
-#include "file_util.h"
+#include "test/util/include/file_util.h"
+#include "test/util/include/asserts.h"
 
 #ifdef _WIN32
 #include <Windows.h>
@@ -30,13 +32,14 @@ TEST(CApiTensorTest, load_simple_float_tensor_not_enough_space) {
   std::vector<float> output(1);
   OrtValue value;
   OrtMemoryInfo cpu_memory_info(onnxruntime::CPU, OrtDeviceAllocator, OrtDevice(), 0, OrtMemTypeDefault);
-  auto st = utils::TensorProtoToMLValue(Env::Default(), nullptr, p,
-                                        MemBuffer(output.data(), output.size() * sizeof(float), cpu_memory_info), value);
-  // check the result
-  ASSERT_FALSE(st.IsOK());
+
+  ASSERT_STATUS_NOT_OK(
+      utils::TensorProtoToOrtValue(Env::Default(), nullptr, p,
+                                   MemBuffer(output.data(), output.size() * sizeof(float), cpu_memory_info),
+                                   value));
 }
 
-TEST(CApiTensorTest, load_simple_float_tensor) {
+TEST(CApiTensorTest, load_simple_float_tensor_membuffer) {
   // construct a tensor proto
   onnx::TensorProto p;
   p.mutable_float_data()->Add(1.0f);
@@ -51,9 +54,37 @@ TEST(CApiTensorTest, load_simple_float_tensor) {
   std::vector<float> output(3);
   OrtValue value;
   OrtMemoryInfo cpu_memory_info(onnxruntime::CPU, OrtDeviceAllocator, OrtDevice(), 0, OrtMemTypeDefault);
-  auto st = utils::TensorProtoToMLValue(Env::Default(), nullptr, p,
-                                        MemBuffer(output.data(), output.size() * sizeof(float), cpu_memory_info), value);
-  ASSERT_TRUE(st.IsOK()) << st.ErrorMessage();
+  ASSERT_STATUS_OK(
+      utils::TensorProtoToOrtValue(Env::Default(), nullptr, p,
+                                   MemBuffer(output.data(), output.size() * sizeof(float), cpu_memory_info),
+                                   value));
+  float* real_output;
+  auto ort_st = g_ort->GetTensorMutableData(&value, (void**)&real_output);
+  ASSERT_EQ(ort_st, nullptr) << g_ort->GetErrorMessage(ort_st);
+  // check the result
+  ASSERT_EQ(real_output[0], 1.0f);
+  ASSERT_EQ(real_output[1], 2.2f);
+  ASSERT_EQ(real_output[2], 3.5f);
+  g_ort->ReleaseStatus(ort_st);
+}
+
+TEST(CApiTensorTest, load_simple_float_tensor_allocator) {
+  // construct a tensor proto
+  onnx::TensorProto p;
+  p.mutable_float_data()->Add(1.0f);
+  p.mutable_float_data()->Add(2.2f);
+  p.mutable_float_data()->Add(3.5f);
+  p.mutable_dims()->Add(3);
+  p.set_data_type(onnx::TensorProto_DataType_FLOAT);
+  std::string s;
+  // save it to a buffer
+  ASSERT_TRUE(p.SerializeToString(&s));
+  // deserialize it
+  AllocatorPtr tmp_allocator = std::make_shared<CPUAllocator>();
+  OrtValue value;
+
+  ASSERT_STATUS_OK(utils::TensorProtoToOrtValue(Env::Default(), nullptr, p, tmp_allocator, value));
+
   float* real_output;
   auto ort_st = g_ort->GetTensorMutableData(&value, (void**)&real_output);
   ASSERT_EQ(ort_st, nullptr) << g_ort->GetErrorMessage(ort_st);
@@ -106,9 +137,9 @@ static void run_external_data_test() {
   }
   OrtValue value;
   OrtMemoryInfo cpu_memory_info(onnxruntime::CPU, OrtDeviceAllocator, OrtDevice(), 0, OrtMemTypeDefault);
-  auto st = utils::TensorProtoToMLValue(Env::Default(), nullptr, p,
-                                        MemBuffer(output.data(), output.size() * sizeof(float), cpu_memory_info), value);
-  ASSERT_TRUE(st.IsOK()) << st.ErrorMessage();
+  ASSERT_STATUS_OK(utils::TensorProtoToOrtValue(
+      Env::Default(), nullptr, p, MemBuffer(output.data(), output.size() * sizeof(float), cpu_memory_info), value));
+
   float* real_output;
   auto ort_st = g_ort->GetTensorMutableData(&value, (void**)&real_output);
   ASSERT_EQ(ort_st, nullptr) << g_ort->GetErrorMessage(ort_st);
@@ -156,11 +187,10 @@ TEST(CApiTensorTest, load_huge_tensor_with_external_data) {
   std::vector<int> output(total_ele_count);
   OrtValue value;
   OrtMemoryInfo cpu_memory_info(onnxruntime::CPU, OrtDeviceAllocator, OrtDevice(), 0, OrtMemTypeDefault);
-  auto st = utils::TensorProtoToMLValue(Env::Default(), nullptr, p,
-                                        MemBuffer(output.data(), output.size() * sizeof(int), cpu_memory_info), value);
+  ASSERT_STATUS_OK(
+      utils::TensorProtoToOrtValue(Env::Default(), nullptr, p,
+                                   MemBuffer(output.data(), output.size() * sizeof(int), cpu_memory_info), value));
 
-  // check the result
-  ASSERT_TRUE(st.IsOK()) << "Error from TensorProtoToMLValue: " << st.ErrorMessage();
   int* buffer;
   auto ort_st = g_ort->GetTensorMutableData(&value, (void**)&buffer);
   ASSERT_EQ(ort_st, nullptr) << "Error from OrtGetTensorMutableData: " << g_ort->GetErrorMessage(ort_st);
diff --git a/onnxruntime/test/framework/tunable_op_test.cc b/onnxruntime/test/framework/tunable_op_test.cc
index bfc46c56975e6..0d9e557ebc813 100644
--- a/onnxruntime/test/framework/tunable_op_test.cc
+++ b/onnxruntime/test/framework/tunable_op_test.cc
@@ -263,6 +263,7 @@ TEST(TunableOp, OpWrapsMutableFunctor) {
 
 class VecAddMoveOnlyFunctor {
  public:
+  VecAddMoveOnlyFunctor() = default;
   VecAddMoveOnlyFunctor(VecAddMoveOnlyFunctor&&) = default;
   ORT_DISALLOW_COPY_AND_ASSIGNMENT(VecAddMoveOnlyFunctor);
 
@@ -288,6 +289,7 @@ TEST(TunableOp, OpWrapsMoveOnlyFunctor) {
 
 class VecAddWithIsSupportedMethod {
  public:
+  VecAddWithIsSupportedMethod() = default;
   VecAddWithIsSupportedMethod(VecAddWithIsSupportedMethod&&) = default;
   ORT_DISALLOW_COPY_AND_ASSIGNMENT(VecAddWithIsSupportedMethod);
 
diff --git a/onnxruntime/test/mlas/bench/bench_q4gemm.cpp b/onnxruntime/test/mlas/bench/bench_q4gemm.cpp
index cf02d4f3628f9..87e3601612761 100644
--- a/onnxruntime/test/mlas/bench/bench_q4gemm.cpp
+++ b/onnxruntime/test/mlas/bench/bench_q4gemm.cpp
@@ -33,7 +33,7 @@ void Q4GEMM(benchmark::State& state, MLAS_BLK_QUANT_TYPE qtype) {
   auto B1 = RandomVectorUniform(static_cast<size_t>(N * K), -1.0f, 1.0f);
   std::vector<float> C1(static_cast<size_t>(M * N));
 
-  std::vector<float> B1_packed(pack_b_size);
+  std::vector<uint8_t> B1_packed(pack_b_size);
   MlasQ4GemmPackB(qtype, B1_packed.data(), B1.data(), N, K, N);
 
   MLAS_Q4_GEMM_DATA_PARAMS params1;
diff --git a/onnxruntime/test/mlas/bench/bench_sgemm.cpp b/onnxruntime/test/mlas/bench/bench_sgemm.cpp
index baa8f1a830ea1..e6e34bc88ad59 100644
--- a/onnxruntime/test/mlas/bench/bench_sgemm.cpp
+++ b/onnxruntime/test/mlas/bench/bench_sgemm.cpp
@@ -128,7 +128,7 @@ BENCHMARK_CAPTURE(SGEMM, PACKB_TransA, true, true, false)->Apply(GemmSizeProduct
 
 static void GemmLLMSizeProducts(benchmark::internal::Benchmark* b) {
   b->ArgNames(sgemm_bench_arg_names);
-  ArgsProduct(b, {{1, 1024, 2048}, {4096}, {4096}});
+  ArgsProduct(b, {{1, 1024, 2048}, {4096, 11008}, {4096, 11008}});
 }
 
 BENCHMARK_CAPTURE(SGEMM, LLM, false, false, true)->Apply(GemmLLMSizeProducts)->UseRealTime();
diff --git a/onnxruntime/test/mlas/bench/bench_sqnbitgemm.cpp b/onnxruntime/test/mlas/bench/bench_sqnbitgemm.cpp
new file mode 100644
index 0000000000000..2f2635dab0512
--- /dev/null
+++ b/onnxruntime/test/mlas/bench/bench_sqnbitgemm.cpp
@@ -0,0 +1,86 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "mlas_q4.h"
+#include "mlas_qnbit.h"
+
+#include <stdexcept>
+
+#include "benchmark/benchmark.h"
+
+#include "bench_util.h"
+#include "core/util/thread_utils.h"
+
+template <size_t BlkBitWidth, size_t BlkLen, bool Symmetric>
+void SQNBITGEMM(benchmark::State& state) {
+  if (state.range(0) <= 0) throw std::invalid_argument("M must greater than 0!");
+  if (state.range(1) <= 0) throw std::invalid_argument("N must greater than 0!");
+  if (state.range(2) <= 0) throw std::invalid_argument("K must greater than 0!");
+  if (state.range(3) <= 0) throw std::invalid_argument("Threads must greater than 0!");
+
+  const size_t M = static_cast<size_t>(state.range(0));
+  const size_t N = static_cast<size_t>(state.range(1));
+  const size_t K = static_cast<size_t>(state.range(2));
+  const size_t threads = static_cast<size_t>(state.range(3));
+
+  size_t QuantBDataSizeInBytes, QuantBScaleSize, QuantBZeroPointSizeInBytes;
+  MlasBlockwiseQuantizedBufferSizes(
+      BlkBitWidth, BlkLen, /* columnwise */ true,
+      static_cast<int>(K), static_cast<int>(N),
+      QuantBDataSizeInBytes, QuantBScaleSize, &QuantBZeroPointSizeInBytes);
+
+  OrtThreadPoolParams tpo;
+  tpo.thread_pool_size = static_cast<int>(threads);
+  tpo.auto_set_affinity = true;
+
+  std::unique_ptr<onnxruntime::concurrency::ThreadPool> tp(
+      onnxruntime::concurrency::CreateThreadPool(&onnxruntime::Env::Default(),
+                                                 tpo, onnxruntime::concurrency::ThreadPoolType::INTRA_OP));
+
+  auto A = RandomVectorUniform(static_cast<size_t>(M * K), -1.0f, 1.0f);
+  auto B = RandomVectorUniform(static_cast<size_t>(K * N), -1.0f, 1.0f);
+  std::vector<float> C(static_cast<size_t>(M * N));
+
+  std::vector<uint8_t> QuantBData(QuantBDataSizeInBytes);
+  std::vector<float> QuantBScale(QuantBScaleSize);
+  std::vector<uint8_t> QuantBZeroPoint(Symmetric ? 0 : QuantBZeroPointSizeInBytes);
+
+  MlasQuantizeBlockwise<float, BlkBitWidth>(QuantBData.data(), QuantBScale.data(),
+                                            Symmetric ? nullptr : QuantBZeroPoint.data(),
+                                            B.data(), BlkLen, /* columnwise */ true,
+                                            static_cast<int>(K), static_cast<int>(N), static_cast<int>(N),
+                                            tp.get());
+
+  MLAS_SQNBIT_GEMM_DATA_PARAMS params{};
+  params.A = A.data();
+  params.lda = K;
+  params.QuantBData = QuantBData.data();
+  params.QuantBScale = QuantBScale.data();
+  params.QuantBZeroPoint = Symmetric ? nullptr : QuantBZeroPoint.data();
+  params.Bias = nullptr;
+  params.C = C.data();
+  params.ldc = N;
+
+  // warm up run
+  MlasSQNBitGemmBatch(M, N, K, 1, BlkBitWidth, BlkLen, &params, tp.get());
+
+  for (auto _ : state) {
+    MlasSQNBitGemmBatch(M, N, K, 1, BlkBitWidth, BlkLen, &params, tp.get());
+  }
+}
+
+static void GemmSizeProducts(benchmark::internal::Benchmark* b) {
+  b->ArgNames({"M", "N", "K", "Threads"});
+  ArgsProduct(b, {{1, 1024, 2048}, {4096, 11008}, {4096, 11008}, {8}});
+}
+
+BENCHMARK(SQNBITGEMM<4, 16, false>)->Apply(GemmSizeProducts)->UseRealTime();
+BENCHMARK(SQNBITGEMM<4, 16, true>)->Apply(GemmSizeProducts)->UseRealTime();
+BENCHMARK(SQNBITGEMM<4, 32, false>)->Apply(GemmSizeProducts)->UseRealTime();
+BENCHMARK(SQNBITGEMM<4, 32, true>)->Apply(GemmSizeProducts)->UseRealTime();
+BENCHMARK(SQNBITGEMM<4, 64, false>)->Apply(GemmSizeProducts)->UseRealTime();
+BENCHMARK(SQNBITGEMM<4, 64, true>)->Apply(GemmSizeProducts)->UseRealTime();
+BENCHMARK(SQNBITGEMM<4, 128, false>)->Apply(GemmSizeProducts)->UseRealTime();
+BENCHMARK(SQNBITGEMM<4, 128, true>)->Apply(GemmSizeProducts)->UseRealTime();
+BENCHMARK(SQNBITGEMM<4, 256, false>)->Apply(GemmSizeProducts)->UseRealTime();
+BENCHMARK(SQNBITGEMM<4, 256, true>)->Apply(GemmSizeProducts)->UseRealTime();
diff --git a/onnxruntime/test/mlas/unittest/test_activation.cpp b/onnxruntime/test/mlas/unittest/test_activation.cpp
index eb3e35d739bb3..2bb0bbcd35e26 100644
--- a/onnxruntime/test/mlas/unittest/test_activation.cpp
+++ b/onnxruntime/test/mlas/unittest/test_activation.cpp
@@ -256,9 +256,6 @@ class MlasActivationTest : public MlasTestBase {
   }
 };
 
-template <>
-MlasActivationTest* MlasTestFixture<MlasActivationTest>::mlas_tester(nullptr);
-
 static UNUSED_VARIABLE bool added_to_main = AddTestRegister([](bool is_short_execute) {
   return is_short_execute ? MlasDirectShortExecuteTests<MlasActivationTest>::RegisterShortExecute() : 0;
 });
diff --git a/onnxruntime/test/mlas/unittest/test_blkq8.cpp b/onnxruntime/test/mlas/unittest/test_blkq8.cpp
index 15bbd1b4cb28d..5cff86d411ca9 100644
--- a/onnxruntime/test/mlas/unittest/test_blkq8.cpp
+++ b/onnxruntime/test/mlas/unittest/test_blkq8.cpp
@@ -150,12 +150,6 @@ class MlasBlkQ8ShortExeTest : public MlasTestFixture<MlasBlkQ8Test<Threaded>> {
   size_t M_, K_;
 };
 
-template <>
-MlasBlkQ8Test<true>* MlasTestFixture<MlasBlkQ8Test<true>>::mlas_tester(nullptr);
-
-template <>
-MlasBlkQ8Test<false>* MlasTestFixture<MlasBlkQ8Test<false>>::mlas_tester(nullptr);
-
 static size_t BlkQ8ReisterShortTests() {
   size_t cnt = 0;
   cnt += MlasBlkQ8ShortExeTest<true>::RegisterShortExecuteTests();
diff --git a/onnxruntime/test/mlas/unittest/test_blockq4.cpp b/onnxruntime/test/mlas/unittest/test_blockq4.cpp
new file mode 100644
index 0000000000000..07f0748fb7ed1
--- /dev/null
+++ b/onnxruntime/test/mlas/unittest/test_blockq4.cpp
@@ -0,0 +1,211 @@
+/*++
+
+Copyright (c) Microsoft Corporation. All rights reserved.
+
+Licensed under the MIT License.
+
+Module Name:
+
+    test_blockq4.cpp
+
+Abstract:
+
+    Tests for MLAS blockwise int4 quantization and dequantization code.
+
+--*/
+
+#ifndef ORT_MINIMAL_BUILD
+
+#include "test_util.h"
+#include "mlas_q4.h"
+
+class MlasBlockwiseQdqTest : public MlasTestBase {
+ private:
+  MatrixGuardBuffer<float> FpBuf;
+  MatrixGuardBuffer<float> FpBuf2;
+  MatrixGuardBuffer<uint8_t> InputElements;
+  MatrixGuardBuffer<float> InputScales;
+  MatrixGuardBuffer<uint8_t> InputOffsets;
+  MatrixGuardBuffer<uint8_t> OutputElements;
+  MatrixGuardBuffer<float> OutputScales;
+  MatrixGuardBuffer<uint8_t> OutputOffsets;
+
+  void Test(int rows, int columns, int block_size, bool columnwise, bool symmetric) {
+    float* dequant_buf = FpBuf.GetBuffer(rows * columns, true);
+    float* transposed = FpBuf2.GetBuffer(rows * columns, true);
+
+    MLAS_THREADPOOL* threadpool_ptr = GetMlasThreadPool();
+
+    int meta_rows;
+    int meta_cols;
+    MlasBlockwiseQuantMetaShape<float, 4>(block_size, columnwise, rows, columns, meta_rows, meta_cols);
+
+    int q_rows;
+    int q_cols;
+    MlasBlockwiseQuantizedShape<float, 4>(block_size, columnwise, rows, columns, q_rows, q_cols);
+
+    size_t q_data_size_in_bytes, q_scale_size, q_zp_size_in_bytes;
+    MlasBlockwiseQuantizedBufferSizes(4, block_size, columnwise, rows, columns,
+                                      q_data_size_in_bytes, q_scale_size, &q_zp_size_in_bytes);
+
+    uint8_t* elements = InputElements.GetBuffer(q_data_size_in_bytes, true);
+
+    int v = 7;
+    for (int c = 0; c < columns; c++) {
+      for (int r = 0; r < rows; r += 2) {
+        int idx = c * q_rows + r / 2;
+        uint8_t v0 = static_cast<uint8_t>(v);
+        v = (v + 5) % 16;
+        if (v == 11 || v == 7 || v == 3) {
+          // making the cycle 13 instead of 16, avoiding same values in a row
+          v = (v + 5) % 16;
+        }
+        uint8_t v1 = 0;
+        if (r + 1 < rows) {
+          v1 = static_cast<uint8_t>(v);
+          v = (v + 5) % 16;
+          if (v == 11 || v == 7 || v == 3) {
+            // making the cycle 13 instead of 16, avoiding same values in a row
+            v = (v + 5) % 16;
+          }
+        }
+
+        elements[idx] = (v1 << 4) | v0;
+      }
+    }
+
+    float* scales = InputScales.GetBuffer(q_scale_size);
+    uint8_t* zp = symmetric ? nullptr : InputOffsets.GetBuffer(q_zp_size_in_bytes, true);
+    if (zp) {
+      for (int c = 0; c < meta_cols; c++) {
+        for (int r = 0; r < meta_rows; r += 2) {
+          int idx = c * ((meta_rows + 1) / 2) + r / 2;
+          uint8_t v0 = static_cast<uint8_t>(v);
+          v = (v + 5) % 16;
+          if (v == 11 || v == 7 || v == 3) {
+            // making the cycle 13 instead of 16, avoiding same values in a row
+            v = (v + 5) % 16;
+          }
+          uint8_t v1 = 0;
+          if (r + 1 < meta_rows) {
+            v1 = static_cast<uint8_t>(v);
+            v = (v + 5) % 16;
+            if (v == 11 || v == 7 || v == 3) {
+              // making the cycle 13 instead of 16, avoiding same values in a row
+              v = (v + 5) % 16;
+            }
+          }
+          zp[idx] = (v1 << 4) | v0;
+        }
+      }
+    }
+
+    MlasDequantizeBlockwise<float, 4>(dequant_buf, elements, scales, zp, block_size,
+                                      columnwise, rows, columns, threadpool_ptr);
+
+    MlasTranspose(dequant_buf, transposed, columns, rows);
+
+    uint8_t* o_elements = OutputElements.GetBuffer(q_rows * q_cols, true);
+    float* o_scales = OutputScales.GetBuffer(meta_rows * meta_cols);
+    uint8_t* o_zp = symmetric ? nullptr : OutputOffsets.GetBuffer(((meta_rows + 1) / 2) * meta_cols, true);
+
+    MlasQuantizeBlockwise<float, 4>(o_elements, o_scales, o_zp, transposed, block_size,
+                                    columnwise, rows, columns, columns, threadpool_ptr);
+
+    for (int c = 0; c < columns; c++) {
+      for (int r = 0; r < rows; r += 2) {
+        int idx = c * q_rows + r / 2;
+        ASSERT_EQ(o_elements[idx] & 0xf, elements[idx] & 0xf)
+            << ", index=[" << r << "x" << c << "], shape=[" << rows << "x" << columns
+            << "] block: " << block_size << ", symmetric: " << symmetric << ", columnwise: " << columnwise;
+        if (r + 1 < rows) {
+          ASSERT_EQ(o_elements[idx] >> 4, elements[idx] >> 4)
+              << ", index=[" << r + 1 << "x" << c << "], shape=[" << rows << "x" << columns
+              << "] block: " << block_size << ", symmetric: " << symmetric << ", columnwise: " << columnwise;
+        }
+      }
+    }
+
+    for (int c = 0; c < meta_cols; c++) {
+      for (int r = 0; r < meta_rows; r++) {
+        int idx = c * meta_rows + r;
+        ASSERT_EQ(o_scales[idx], scales[idx])
+            << ", index=" << r << "x" << c << ", shape=[" << rows << "x" << columns
+            << "] block: " << block_size << ", symmetric: " << symmetric << ", columnwise: " << columnwise;
+      }
+    }
+
+    if (symmetric) return;
+    for (int c = 0; c < meta_cols; c++) {
+      for (int r = 0; r < meta_rows; r += 2) {
+        int idx = c * ((meta_rows + 1) / 2) + r / 2;
+        ASSERT_EQ(o_zp[idx] & 0xf, zp[idx] & 0xf)
+            << ", index=" << r << "x" << c << ", shape=[" << rows << "x" << columns
+            << "] block: " << block_size << ", symmetric: " << symmetric << ", columnwise: " << columnwise;
+        if (r + 1 < meta_rows) {
+          ASSERT_EQ(o_zp[idx] >> 4, zp[idx] >> 4)
+              << ", index=" << r + 1 << "x" << c << ", shape=[" << rows << "x" << columns
+              << "] block: " << block_size << ", symmetric: " << symmetric << ", columnwise: " << columnwise;
+        }
+      }
+    }
+  }
+
+ public:
+  static const char* GetTestSuiteName() {
+    static const std::string suite_name("BlockQ4");
+    return suite_name.c_str();
+  }
+
+  void ExecuteShort(void) override {
+    Test(20, 1, 32, true, false);
+    Test(20, 1, 32, true, true);
+    Test(1, 20, 32, false, false);
+    Test(1, 20, 32, false, true);
+    Test(52, 1, 32, true, false);
+    Test(52, 1, 32, true, true);
+    Test(1, 52, 32, false, false);
+    Test(1, 52, 32, false, true);
+    Test(20, 3, 32, true, false);
+    Test(20, 3, 32, true, true);
+    Test(3, 20, 32, false, false);
+    Test(3, 20, 32, false, true);
+    Test(52, 3, 32, true, false);
+    Test(52, 3, 32, true, true);
+    Test(3, 52, 32, false, false);
+    Test(3, 52, 32, false, true);
+    Test(52, 3, 64, true, false);
+    Test(52, 3, 64, true, true);
+    Test(3, 52, 64, false, false);
+    Test(3, 52, 64, false, true);
+    Test(32 * 9 + 17, 41, 32, true, false);
+    Test(32 * 9 + 17, 41, 32, true, true);
+    Test(41, 32 * 9 + 17, 32, false, false);
+    Test(41, 32 * 9 + 17, 32, false, true);
+    Test(32 * 9 + 17, 41, 64, true, false);
+    Test(32 * 9 + 17, 41, 64, true, true);
+    Test(41, 32 * 9 + 17, 64, false, false);
+    Test(41, 32 * 9 + 17, 64, false, true);
+    Test(32 * 15 + 17, 63, 128, true, false);
+    Test(32 * 15 + 17, 63, 128, true, true);
+    Test(63, 32 * 15 + 17, 128, false, false);
+    Test(63, 32 * 15 + 17, 128, false, true);
+
+    Test(256, 256, 32, true, false);
+    Test(256, 256, 32, true, true);
+    Test(256, 256, 32, false, false);
+    Test(256, 256, 32, false, true);
+  }
+
+  MlasBlockwiseQdqTest() = default;
+};
+
+static UNUSED_VARIABLE bool added_to_main = AddTestRegister([](bool is_short_execute) {
+  size_t count = 0;
+  if (is_short_execute) {
+    count += MlasDirectShortExecuteTests<MlasBlockwiseQdqTest>::RegisterShortExecute();
+  }
+  return count;
+});
+
+#endif  // ORT_MINIMAL_BUILD
diff --git a/onnxruntime/test/mlas/unittest/test_conv2d.cpp b/onnxruntime/test/mlas/unittest/test_conv2d.cpp
index 97560bbfc2e7e..1700cd8f1800f 100644
--- a/onnxruntime/test/mlas/unittest/test_conv2d.cpp
+++ b/onnxruntime/test/mlas/unittest/test_conv2d.cpp
@@ -4,11 +4,6 @@
 #include "test_conv2d.h"
 #include "test_conv2d_fixture.h"
 
-template <>
-MlasConv2DTest<false>* MlasTestFixture<MlasConv2DTest<false>>::mlas_tester(nullptr);
-template <>
-MlasConv2DTest<true>* MlasTestFixture<MlasConv2DTest<true>>::mlas_tester(nullptr);
-
 static size_t Conv2dRegistLongExecute() {
   size_t count = MlasLongExecuteTests<MlasConv2DTest<false>>::RegisterLongExecute();
   if (GetMlasThreadPool() != nullptr) {
diff --git a/onnxruntime/test/mlas/unittest/test_conv2d_nchwc.cpp b/onnxruntime/test/mlas/unittest/test_conv2d_nchwc.cpp
index 78a047e385b99..e5a536eb9e4f0 100644
--- a/onnxruntime/test/mlas/unittest/test_conv2d_nchwc.cpp
+++ b/onnxruntime/test/mlas/unittest/test_conv2d_nchwc.cpp
@@ -4,11 +4,6 @@
 #include "test_conv2d_nchwc.h"
 #include "test_conv2d_fixture.h"
 
-template <>
-MlasNchwcConv2DTest<false>* MlasTestFixture<MlasNchwcConv2DTest<false>>::mlas_tester(nullptr);
-template <>
-MlasNchwcConv2DTest<true>* MlasTestFixture<MlasNchwcConv2DTest<true>>::mlas_tester(nullptr);
-
 static size_t Conv2dNchwcRegistLongExecute() {
   size_t count = 0;
 
diff --git a/onnxruntime/test/mlas/unittest/test_exp.cpp b/onnxruntime/test/mlas/unittest/test_exp.cpp
index ce8c4e97748f8..f9cdffef1947d 100644
--- a/onnxruntime/test/mlas/unittest/test_exp.cpp
+++ b/onnxruntime/test/mlas/unittest/test_exp.cpp
@@ -50,9 +50,6 @@ class MlasComputeExpTest : public MlasTestBase {
   }
 };
 
-template <>
-MlasComputeExpTest* MlasTestFixture<MlasComputeExpTest>::mlas_tester(nullptr);
-
 static UNUSED_VARIABLE bool added_to_main = AddTestRegister([](bool is_short_execute) {
   // no long execute needed
   return is_short_execute ? MlasDirectShortExecuteTests<MlasComputeExpTest>::RegisterShortExecute() : 0;
diff --git a/onnxruntime/test/mlas/unittest/test_fgemm.cpp b/onnxruntime/test/mlas/unittest/test_fgemm.cpp
index 6b8d4529faadb..e3f50baf3633d 100644
--- a/onnxruntime/test/mlas/unittest/test_fgemm.cpp
+++ b/onnxruntime/test/mlas/unittest/test_fgemm.cpp
@@ -7,24 +7,6 @@
 #include <memory>
 #include <sstream>
 
-template <>
-MlasFgemmTest<float, false, false>* MlasTestFixture<MlasFgemmTest<float, false, false>>::mlas_tester(nullptr);
-template <>
-MlasFgemmTest<float, false, true>* MlasTestFixture<MlasFgemmTest<float, false, true>>::mlas_tester(nullptr);
-template <>
-MlasFgemmTest<float, true, false>* MlasTestFixture<MlasFgemmTest<float, true, false>>::mlas_tester(nullptr);
-template <>
-MlasFgemmTest<float, true, true>* MlasTestFixture<MlasFgemmTest<float, true, true>>::mlas_tester(nullptr);
-
-#ifdef MLAS_SUPPORTS_GEMM_DOUBLE
-
-template <>
-MlasFgemmTest<double, false, false>* MlasTestFixture<MlasFgemmTest<double, false, false>>::mlas_tester(nullptr);
-template <>
-MlasFgemmTest<double, false, true>* MlasTestFixture<MlasFgemmTest<double, false, true>>::mlas_tester(nullptr);
-
-#endif
-
 static size_t FGemmRegistLongExecute() {
   size_t count = 0;
 
diff --git a/onnxruntime/test/mlas/unittest/test_fp16_activation.cpp b/onnxruntime/test/mlas/unittest/test_fp16_activation.cpp
index a9e062e0b6534..484a9a22429d5 100644
--- a/onnxruntime/test/mlas/unittest/test_fp16_activation.cpp
+++ b/onnxruntime/test/mlas/unittest/test_fp16_activation.cpp
@@ -148,9 +148,6 @@ class MlasFp16ActivationTest : public MlasTestBase {
   }
 };
 
-template <>
-MlasFp16ActivationTest* MlasTestFixture<MlasFp16ActivationTest>::mlas_tester(nullptr);
-
 static UNUSED_VARIABLE bool added_to_main = AddTestRegister([](bool is_short_execute) {
   return is_short_execute ? MlasDirectShortExecuteTests<MlasFp16ActivationTest>::RegisterShortExecute() : 0;
 });
diff --git a/onnxruntime/test/mlas/unittest/test_halfgemm.cpp b/onnxruntime/test/mlas/unittest/test_halfgemm.cpp
index 1a307d339b0f2..2a478675d09eb 100644
--- a/onnxruntime/test/mlas/unittest/test_halfgemm.cpp
+++ b/onnxruntime/test/mlas/unittest/test_halfgemm.cpp
@@ -89,42 +89,6 @@ class HalfGemmShortExecuteTest : public MlasTestFixture<MlasHalfGemmTest<AType,
   bool hasBias_;
 };
 
-template <>
-MlasHalfGemmTest<float, MLFp16, false, false>* MlasTestFixture<MlasHalfGemmTest<float, MLFp16, false, false>>::mlas_tester(nullptr);
-template <>
-MlasHalfGemmTest<float, MLFp16, false, true>* MlasTestFixture<MlasHalfGemmTest<float, MLFp16, false, true>>::mlas_tester(nullptr);
-template <>
-MlasHalfGemmTest<float, MLFp16, true, false>* MlasTestFixture<MlasHalfGemmTest<float, MLFp16, true, false>>::mlas_tester(nullptr);
-template <>
-MlasHalfGemmTest<float, MLFp16, true, true>* MlasTestFixture<MlasHalfGemmTest<float, MLFp16, true, true>>::mlas_tester(nullptr);
-
-template <>
-MlasHalfGemmTest<MLFp16, float, false, false>* MlasTestFixture<MlasHalfGemmTest<MLFp16, float, false, false>>::mlas_tester(nullptr);
-template <>
-MlasHalfGemmTest<MLFp16, float, false, true>* MlasTestFixture<MlasHalfGemmTest<MLFp16, float, false, true>>::mlas_tester(nullptr);
-template <>
-MlasHalfGemmTest<MLFp16, float, true, false>* MlasTestFixture<MlasHalfGemmTest<MLFp16, float, true, false>>::mlas_tester(nullptr);
-template <>
-MlasHalfGemmTest<MLFp16, float, true, true>* MlasTestFixture<MlasHalfGemmTest<MLFp16, float, true, true>>::mlas_tester(nullptr);
-
-template <>
-MlasHalfGemmTest<float, float, false, false>* MlasTestFixture<MlasHalfGemmTest<float, float, false, false>>::mlas_tester(nullptr);
-template <>
-MlasHalfGemmTest<float, float, false, true>* MlasTestFixture<MlasHalfGemmTest<float, float, false, true>>::mlas_tester(nullptr);
-template <>
-MlasHalfGemmTest<float, float, true, false>* MlasTestFixture<MlasHalfGemmTest<float, float, true, false>>::mlas_tester(nullptr);
-template <>
-MlasHalfGemmTest<float, float, true, true>* MlasTestFixture<MlasHalfGemmTest<float, float, true, true>>::mlas_tester(nullptr);
-
-template <>
-MlasHalfGemmTest<MLFp16, MLFp16, false, false>* MlasTestFixture<MlasHalfGemmTest<MLFp16, MLFp16, false, false>>::mlas_tester(nullptr);
-template <>
-MlasHalfGemmTest<MLFp16, MLFp16, false, true>* MlasTestFixture<MlasHalfGemmTest<MLFp16, MLFp16, false, true>>::mlas_tester(nullptr);
-template <>
-MlasHalfGemmTest<MLFp16, MLFp16, true, false>* MlasTestFixture<MlasHalfGemmTest<MLFp16, MLFp16, true, false>>::mlas_tester(nullptr);
-template <>
-MlasHalfGemmTest<MLFp16, MLFp16, true, true>* MlasTestFixture<MlasHalfGemmTest<MLFp16, MLFp16, true, true>>::mlas_tester(nullptr);
-
 static size_t HalfGemmRegistLongExecute() {
   size_t count = 0;
 
diff --git a/onnxruntime/test/mlas/unittest/test_halfgemm.h b/onnxruntime/test/mlas/unittest/test_halfgemm.h
index 2861b0e746fdc..4db5c2bebca40 100644
--- a/onnxruntime/test/mlas/unittest/test_halfgemm.h
+++ b/onnxruntime/test/mlas/unittest/test_halfgemm.h
@@ -18,20 +18,6 @@ Module Name:
 
 #include "test_fp16.h"
 
-inline bool
-CloseEnough(float actual, float expected) {
-  if (std::isnan(actual)) {
-    return std::isnan(expected);
-  }
-  float diff = std::abs(actual - expected);
-  float top = std::max(std::abs(actual), std::abs(expected));
-  float ratio = 0;
-  if (top > 0.0001) {
-    ratio = diff / top;
-  }
-  return ratio < 0.005;
-}
-
 /**
  * @brief Test class for half precision GEMM
  * @tparam AType  Data type of A matrix, can be either float or MLFp16
diff --git a/onnxruntime/test/mlas/unittest/test_main.cpp b/onnxruntime/test/mlas/unittest/test_main.cpp
index 66b5a6a15db2b..505c0c01dfa90 100644
--- a/onnxruntime/test/mlas/unittest/test_main.cpp
+++ b/onnxruntime/test/mlas/unittest/test_main.cpp
@@ -1,17 +1,18 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-#include "test_util.h"
-
-#include <list>
 #include <algorithm>
+#include <list>
+#include <memory>
+
+#include "test_util.h"
 
 #if !defined(BUILD_MLAS_NO_ONNXRUNTIME)
 
 MLAS_THREADPOOL* GetMlasThreadPool(void) {
-  static MLAS_THREADPOOL* threadpool = new onnxruntime::concurrency::ThreadPool(
+  static auto threadpool = std::make_unique<onnxruntime::concurrency::ThreadPool>(
       &onnxruntime::Env::Default(), onnxruntime::ThreadOptions(), nullptr, 2, true);
-  return threadpool;
+  return threadpool.get();
 }
 
 #else
diff --git a/onnxruntime/test/mlas/unittest/test_minmax.cpp b/onnxruntime/test/mlas/unittest/test_minmax.cpp
index f0df504720c0c..245879deccffd 100644
--- a/onnxruntime/test/mlas/unittest/test_minmax.cpp
+++ b/onnxruntime/test/mlas/unittest/test_minmax.cpp
@@ -46,9 +46,6 @@ class MlasFindMinMaxElementsTest : public MlasTestBase {
   }
 };
 
-template <>
-MlasFindMinMaxElementsTest* MlasTestFixture<MlasFindMinMaxElementsTest>::mlas_tester(nullptr);
-
 #ifdef __GNUC__
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-parameter"
diff --git a/onnxruntime/test/mlas/unittest/test_pool2d.cpp b/onnxruntime/test/mlas/unittest/test_pool2d.cpp
index 012e7f25fddce..8cefb8332ec32 100644
--- a/onnxruntime/test/mlas/unittest/test_pool2d.cpp
+++ b/onnxruntime/test/mlas/unittest/test_pool2d.cpp
@@ -4,20 +4,6 @@
 #include "test_pool2d.h"
 #include "test_pool2d_fixture.h"
 
-template <>
-MlasPool2DTest<MlasMaximumPooling, false>* MlasTestFixture<MlasPool2DTest<MlasMaximumPooling, false>>::mlas_tester(nullptr);
-template <>
-MlasPool2DTest<MlasAveragePoolingExcludePad, false>* MlasTestFixture<MlasPool2DTest<MlasAveragePoolingExcludePad, false>>::mlas_tester(nullptr);
-template <>
-MlasPool2DTest<MlasAveragePoolingIncludePad, false>* MlasTestFixture<MlasPool2DTest<MlasAveragePoolingIncludePad, false>>::mlas_tester(nullptr);
-
-template <>
-MlasPool2DTest<MlasMaximumPooling, true>* MlasTestFixture<MlasPool2DTest<MlasMaximumPooling, true>>::mlas_tester(nullptr);
-template <>
-MlasPool2DTest<MlasAveragePoolingExcludePad, true>* MlasTestFixture<MlasPool2DTest<MlasAveragePoolingExcludePad, true>>::mlas_tester(nullptr);
-template <>
-MlasPool2DTest<MlasAveragePoolingIncludePad, true>* MlasTestFixture<MlasPool2DTest<MlasAveragePoolingIncludePad, true>>::mlas_tester(nullptr);
-
 static size_t Pool2dRegistLongExecute() {
   size_t count = 0;
   count += MlasLongExecuteTests<MlasPool2DTest<MlasMaximumPooling, false>>::RegisterLongExecute();
diff --git a/onnxruntime/test/mlas/unittest/test_pool2d_nchwc.cpp b/onnxruntime/test/mlas/unittest/test_pool2d_nchwc.cpp
index 190fbe7d5a6f1..bee690b10b737 100644
--- a/onnxruntime/test/mlas/unittest/test_pool2d_nchwc.cpp
+++ b/onnxruntime/test/mlas/unittest/test_pool2d_nchwc.cpp
@@ -4,20 +4,6 @@
 #include "test_pool2d_nchwc.h"
 #include "test_pool2d_fixture.h"
 
-template <>
-MlasNchwcPool2DTest<MlasMaximumPooling, false>* MlasTestFixture<MlasNchwcPool2DTest<MlasMaximumPooling, false>>::mlas_tester(nullptr);
-template <>
-MlasNchwcPool2DTest<MlasAveragePoolingExcludePad, false>* MlasTestFixture<MlasNchwcPool2DTest<MlasAveragePoolingExcludePad, false>>::mlas_tester(nullptr);
-template <>
-MlasNchwcPool2DTest<MlasAveragePoolingIncludePad, false>* MlasTestFixture<MlasNchwcPool2DTest<MlasAveragePoolingIncludePad, false>>::mlas_tester(nullptr);
-
-template <>
-MlasNchwcPool2DTest<MlasMaximumPooling, true>* MlasTestFixture<MlasNchwcPool2DTest<MlasMaximumPooling, true>>::mlas_tester(nullptr);
-template <>
-MlasNchwcPool2DTest<MlasAveragePoolingExcludePad, true>* MlasTestFixture<MlasNchwcPool2DTest<MlasAveragePoolingExcludePad, true>>::mlas_tester(nullptr);
-template <>
-MlasNchwcPool2DTest<MlasAveragePoolingIncludePad, true>* MlasTestFixture<MlasNchwcPool2DTest<MlasAveragePoolingIncludePad, true>>::mlas_tester(nullptr);
-
 static size_t Pool2dNchwcRegistLongExecute() {
   size_t count = 0;
   if (MlasNchwcGetBlockSize() > 1) {
diff --git a/onnxruntime/test/mlas/unittest/test_pool3d.cpp b/onnxruntime/test/mlas/unittest/test_pool3d.cpp
index a93698234f7da..e0ce4c240be80 100644
--- a/onnxruntime/test/mlas/unittest/test_pool3d.cpp
+++ b/onnxruntime/test/mlas/unittest/test_pool3d.cpp
@@ -4,20 +4,6 @@
 #include "test_pool3d.h"
 #include "test_pool3d_fixture.h"
 
-template <>
-MlasPool3DTest<MlasMaximumPooling, false>* MlasTestFixture<MlasPool3DTest<MlasMaximumPooling, false>>::mlas_tester(nullptr);
-template <>
-MlasPool3DTest<MlasAveragePoolingExcludePad, false>* MlasTestFixture<MlasPool3DTest<MlasAveragePoolingExcludePad, false>>::mlas_tester(nullptr);
-template <>
-MlasPool3DTest<MlasAveragePoolingIncludePad, false>* MlasTestFixture<MlasPool3DTest<MlasAveragePoolingIncludePad, false>>::mlas_tester(nullptr);
-
-template <>
-MlasPool3DTest<MlasMaximumPooling, true>* MlasTestFixture<MlasPool3DTest<MlasMaximumPooling, true>>::mlas_tester(nullptr);
-template <>
-MlasPool3DTest<MlasAveragePoolingExcludePad, true>* MlasTestFixture<MlasPool3DTest<MlasAveragePoolingExcludePad, true>>::mlas_tester(nullptr);
-template <>
-MlasPool3DTest<MlasAveragePoolingIncludePad, true>* MlasTestFixture<MlasPool3DTest<MlasAveragePoolingIncludePad, true>>::mlas_tester(nullptr);
-
 static size_t Pool3dRegistLongExecute() {
   size_t count = 0;
   count += MlasLongExecuteTests<MlasPool3DTest<MlasMaximumPooling, false>>::RegisterLongExecute();
diff --git a/onnxruntime/test/mlas/unittest/test_q4gemm.cpp b/onnxruntime/test/mlas/unittest/test_q4gemm.cpp
index 2c3bf23a9330b..dccd7d00b6d3f 100644
--- a/onnxruntime/test/mlas/unittest/test_q4gemm.cpp
+++ b/onnxruntime/test/mlas/unittest/test_q4gemm.cpp
@@ -83,19 +83,6 @@ class Q4GemmShortExecuteTest : public MlasTestFixture<MlasQ4GemmTest<QType, Thre
   bool hasBias_;
 };
 
-template <>
-MlasQ4GemmTest<BlkQ4Sym, false>* MlasTestFixture<MlasQ4GemmTest<BlkQ4Sym, false>>::mlas_tester(nullptr);
-template <>
-MlasQ4GemmTest<BlkQ4Sym, true>* MlasTestFixture<MlasQ4GemmTest<BlkQ4Sym, true>>::mlas_tester(nullptr);
-template <>
-MlasQ4GemmTest<BlkQ4Zp8, false>* MlasTestFixture<MlasQ4GemmTest<BlkQ4Zp8, false>>::mlas_tester(nullptr);
-template <>
-MlasQ4GemmTest<BlkQ4Zp8, true>* MlasTestFixture<MlasQ4GemmTest<BlkQ4Zp8, true>>::mlas_tester(nullptr);
-template <>
-MlasQ4GemmTest<BlkQ4Sym128, false>* MlasTestFixture<MlasQ4GemmTest<BlkQ4Sym128, false>>::mlas_tester(nullptr);
-template <>
-MlasQ4GemmTest<BlkQ4Sym128, true>* MlasTestFixture<MlasQ4GemmTest<BlkQ4Sym128, true>>::mlas_tester(nullptr);
-
 static size_t Q4GemmRegistShortExecute() {
   size_t count = 0;
 
diff --git a/onnxruntime/test/mlas/unittest/test_q4gemm.h b/onnxruntime/test/mlas/unittest/test_q4gemm.h
index 58a64491ae80b..97c6969b5bf91 100644
--- a/onnxruntime/test/mlas/unittest/test_q4gemm.h
+++ b/onnxruntime/test/mlas/unittest/test_q4gemm.h
@@ -19,20 +19,6 @@ Module Name:
 #include "test_util.h"
 #include "mlas_q4.h"
 
-inline bool
-CloseEnough(float actual, float expected) {
-  if (std::isnan(actual)) {
-    return std::isnan(expected);
-  }
-  float diff = std::abs(actual - expected);
-  float top = std::max(std::abs(actual), std::abs(expected));
-  float ratio = 0;
-  if (top > 0.0001) {
-    ratio = diff / top;
-  }
-  return ratio < 0.005;
-}
-
 /**
  * @brief Test class for int4 block quantized GEMM
  *        Note: only 2-D matmul supported for now
diff --git a/onnxruntime/test/mlas/unittest/test_q4qdq.cpp b/onnxruntime/test/mlas/unittest/test_q4qdq.cpp
index 8215c63a2cc56..955c3b1201989 100644
--- a/onnxruntime/test/mlas/unittest/test_q4qdq.cpp
+++ b/onnxruntime/test/mlas/unittest/test_q4qdq.cpp
@@ -141,9 +141,6 @@ class MlasQ4dqTest : public MlasTestBase {
   MlasQ4dqTest() = default;
 };
 
-template <>
-MlasQ4dqTest* MlasTestFixture<MlasQ4dqTest>::mlas_tester(nullptr);
-
 static UNUSED_VARIABLE bool added_to_main = AddTestRegister([](bool is_short_execute) {
   if (MlasQ4GemmPackBSize(BlkQ4Sym, 32, 32) == 0) {
     return (size_t)0;
diff --git a/onnxruntime/test/mlas/unittest/test_q8q4gemm.cpp b/onnxruntime/test/mlas/unittest/test_q8q4gemm.cpp
index bac16b0103a6e..d3f601793a970 100644
--- a/onnxruntime/test/mlas/unittest/test_q8q4gemm.cpp
+++ b/onnxruntime/test/mlas/unittest/test_q8q4gemm.cpp
@@ -19,20 +19,6 @@ Module Name:
 #include "test_util.h"
 #include "mlas_q4.h"
 
-inline bool
-CloseEnough(float actual, float expected) {
-  if (std::isnan(actual)) {
-    return std::isnan(expected);
-  }
-  float diff = std::abs(actual - expected);
-  float top = std::max(std::abs(actual), std::abs(expected));
-  float ratio = 0;
-  if (top > 0.0001) {
-    ratio = diff / top;
-  }
-  return ratio < 0.005;
-}
-
 template <size_t QBlkLen>
 static void blkq8_dequant_reference(const int8_t* src, float* dst, size_t M, size_t K) {
   const size_t num_blks = K / QBlkLen;
@@ -271,19 +257,6 @@ class Q8Q4GemmShortExecuteTest : public MlasTestFixture<MlasQ8Q4GemmTest<QType,
   bool hasBias_;
 };
 
-template <>
-MlasQ8Q4GemmTest<BlkQ4Sym, false>* MlasTestFixture<MlasQ8Q4GemmTest<BlkQ4Sym, false>>::mlas_tester(nullptr);
-template <>
-MlasQ8Q4GemmTest<BlkQ4Sym, true>* MlasTestFixture<MlasQ8Q4GemmTest<BlkQ4Sym, true>>::mlas_tester(nullptr);
-template <>
-MlasQ8Q4GemmTest<BlkQ4Zp8, false>* MlasTestFixture<MlasQ8Q4GemmTest<BlkQ4Zp8, false>>::mlas_tester(nullptr);
-template <>
-MlasQ8Q4GemmTest<BlkQ4Zp8, true>* MlasTestFixture<MlasQ8Q4GemmTest<BlkQ4Zp8, true>>::mlas_tester(nullptr);
-template <>
-MlasQ8Q4GemmTest<BlkQ4Sym128, false>* MlasTestFixture<MlasQ8Q4GemmTest<BlkQ4Sym128, false>>::mlas_tester(nullptr);
-template <>
-MlasQ8Q4GemmTest<BlkQ4Sym128, true>* MlasTestFixture<MlasQ8Q4GemmTest<BlkQ4Sym128, true>>::mlas_tester(nullptr);
-
 static size_t Q8Q4GemmRegistShortExecute() {
   size_t count = 0;
 
diff --git a/onnxruntime/test/mlas/unittest/test_qgemm.cpp b/onnxruntime/test/mlas/unittest/test_qgemm.cpp
index a55331f1377fa..6bb93d35357f8 100644
--- a/onnxruntime/test/mlas/unittest/test_qgemm.cpp
+++ b/onnxruntime/test/mlas/unittest/test_qgemm.cpp
@@ -1,60 +1,6 @@
 #include "test_qgemm.h"
 #include "test_qgemm_fixture.h"
 
-template <>
-MlasQgemmTest<uint8_t, int8_t, int32_t, false, false>* MlasTestFixture<MlasQgemmTest<uint8_t, int8_t, int32_t, false, false>>::mlas_tester(nullptr);
-template <>
-MlasQgemmTest<uint8_t, int8_t, int32_t, false, true>* MlasTestFixture<MlasQgemmTest<uint8_t, int8_t, int32_t, false, true>>::mlas_tester(nullptr);
-template <>
-MlasQgemmTest<uint8_t, int8_t, int32_t, true, false>* MlasTestFixture<MlasQgemmTest<uint8_t, int8_t, int32_t, true, false>>::mlas_tester(nullptr);
-template <>
-MlasQgemmTest<uint8_t, int8_t, int32_t, true, true>* MlasTestFixture<MlasQgemmTest<uint8_t, int8_t, int32_t, true, true>>::mlas_tester(nullptr);
-
-template <>
-MlasQgemmTest<int8_t, int8_t, int32_t, false, false>* MlasTestFixture<MlasQgemmTest<int8_t, int8_t, int32_t, false, false>>::mlas_tester(nullptr);
-template <>
-MlasQgemmTest<int8_t, int8_t, int32_t, false, true>* MlasTestFixture<MlasQgemmTest<int8_t, int8_t, int32_t, false, true>>::mlas_tester(nullptr);
-template <>
-MlasQgemmTest<int8_t, int8_t, int32_t, true, false>* MlasTestFixture<MlasQgemmTest<int8_t, int8_t, int32_t, true, false>>::mlas_tester(nullptr);
-template <>
-MlasQgemmTest<int8_t, int8_t, int32_t, true, true>* MlasTestFixture<MlasQgemmTest<int8_t, int8_t, int32_t, true, true>>::mlas_tester(nullptr);
-
-template <>
-MlasQgemmTest<uint8_t, uint8_t, int32_t, false, false>* MlasTestFixture<MlasQgemmTest<uint8_t, uint8_t, int32_t, false, false>>::mlas_tester(nullptr);
-template <>
-MlasQgemmTest<uint8_t, uint8_t, int32_t, false, true>* MlasTestFixture<MlasQgemmTest<uint8_t, uint8_t, int32_t, false, true>>::mlas_tester(nullptr);
-template <>
-MlasQgemmTest<uint8_t, uint8_t, int32_t, true, false>* MlasTestFixture<MlasQgemmTest<uint8_t, uint8_t, int32_t, true, false>>::mlas_tester(nullptr);
-template <>
-MlasQgemmTest<uint8_t, uint8_t, int32_t, true, true>* MlasTestFixture<MlasQgemmTest<uint8_t, uint8_t, int32_t, true, true>>::mlas_tester(nullptr);
-
-template <>
-MlasQgemmTest<uint8_t, int8_t, float, false, false>* MlasTestFixture<MlasQgemmTest<uint8_t, int8_t, float, false, false>>::mlas_tester(nullptr);
-template <>
-MlasQgemmTest<uint8_t, int8_t, float, false, true>* MlasTestFixture<MlasQgemmTest<uint8_t, int8_t, float, false, true>>::mlas_tester(nullptr);
-template <>
-MlasQgemmTest<uint8_t, int8_t, float, true, false>* MlasTestFixture<MlasQgemmTest<uint8_t, int8_t, float, true, false>>::mlas_tester(nullptr);
-template <>
-MlasQgemmTest<uint8_t, int8_t, float, true, true>* MlasTestFixture<MlasQgemmTest<uint8_t, int8_t, float, true, true>>::mlas_tester(nullptr);
-
-template <>
-MlasQgemmTest<int8_t, int8_t, float, false, false>* MlasTestFixture<MlasQgemmTest<int8_t, int8_t, float, false, false>>::mlas_tester(nullptr);
-template <>
-MlasQgemmTest<int8_t, int8_t, float, false, true>* MlasTestFixture<MlasQgemmTest<int8_t, int8_t, float, false, true>>::mlas_tester(nullptr);
-template <>
-MlasQgemmTest<int8_t, int8_t, float, true, false>* MlasTestFixture<MlasQgemmTest<int8_t, int8_t, float, true, false>>::mlas_tester(nullptr);
-template <>
-MlasQgemmTest<int8_t, int8_t, float, true, true>* MlasTestFixture<MlasQgemmTest<int8_t, int8_t, float, true, true>>::mlas_tester(nullptr);
-
-template <>
-MlasQgemmTest<uint8_t, uint8_t, float, false, false>* MlasTestFixture<MlasQgemmTest<uint8_t, uint8_t, float, false, false>>::mlas_tester(nullptr);
-template <>
-MlasQgemmTest<uint8_t, uint8_t, float, false, true>* MlasTestFixture<MlasQgemmTest<uint8_t, uint8_t, float, false, true>>::mlas_tester(nullptr);
-template <>
-MlasQgemmTest<uint8_t, uint8_t, float, true, false>* MlasTestFixture<MlasQgemmTest<uint8_t, uint8_t, float, true, false>>::mlas_tester(nullptr);
-template <>
-MlasQgemmTest<uint8_t, uint8_t, float, true, true>* MlasTestFixture<MlasQgemmTest<uint8_t, uint8_t, float, true, true>>::mlas_tester(nullptr);
-
 static size_t QGemmRegistLongExecute() {
   size_t count = 0;
 
diff --git a/onnxruntime/test/mlas/unittest/test_qlinear_binaryop.cpp b/onnxruntime/test/mlas/unittest/test_qlinear_binaryop.cpp
index 93dda4bee183b..5876f186eaa0d 100644
--- a/onnxruntime/test/mlas/unittest/test_qlinear_binaryop.cpp
+++ b/onnxruntime/test/mlas/unittest/test_qlinear_binaryop.cpp
@@ -163,11 +163,6 @@ class MlasQLinearMulTest : public MlasQLinearBinaryOpTest {
   }
 };
 
-template <>
-MlasQLinearAddTest* MlasTestFixture<MlasQLinearAddTest>::mlas_tester(nullptr);
-template <>
-MlasQLinearMulTest* MlasTestFixture<MlasQLinearMulTest>::mlas_tester(nullptr);
-
 static bool UNUSED_VARIABLE added_to_main = AddTestRegister([](bool is_short_execute) {
   size_t count = 0;
   if (is_short_execute) {
diff --git a/onnxruntime/test/mlas/unittest/test_qlinear_gavgpool.cpp b/onnxruntime/test/mlas/unittest/test_qlinear_gavgpool.cpp
index aeb13af5b941a..e6c230df57fbc 100644
--- a/onnxruntime/test/mlas/unittest/test_qlinear_gavgpool.cpp
+++ b/onnxruntime/test/mlas/unittest/test_qlinear_gavgpool.cpp
@@ -162,11 +162,6 @@ class MlasQLinearGlobalAveragePoolTest : public MlasTestBase {
   }
 };
 
-template <>
-MlasQLinearGlobalAveragePoolTest<int8_t>* MlasTestFixture<MlasQLinearGlobalAveragePoolTest<int8_t>>::mlas_tester(nullptr);
-template <>
-MlasQLinearGlobalAveragePoolTest<uint8_t>* MlasTestFixture<MlasQLinearGlobalAveragePoolTest<uint8_t>>::mlas_tester(nullptr);
-
 template <>
 const std::vector<int8_t> MlasQLinearGlobalAveragePoolTest<int8_t>::ZeroPoints = {-128, -110, 1, 103, 127};
 
diff --git a/onnxruntime/test/mlas/unittest/test_quantizelinear.cpp b/onnxruntime/test/mlas/unittest/test_quantizelinear.cpp
index 2832598fef1a9..986d158d2b1b9 100644
--- a/onnxruntime/test/mlas/unittest/test_quantizelinear.cpp
+++ b/onnxruntime/test/mlas/unittest/test_quantizelinear.cpp
@@ -71,15 +71,6 @@ class MlasQuantizeLinearTest : public MlasTestBase {
   }
 };
 
-template <>
-MlasQuantizeLinearTest<int8_t>* MlasTestFixture<MlasQuantizeLinearTest<int8_t>>::mlas_tester(nullptr);
-template <>
-MlasQuantizeLinearTest<uint8_t>* MlasTestFixture<MlasQuantizeLinearTest<uint8_t>>::mlas_tester(nullptr);
-template <>
-MlasQuantizeLinearTest<int16_t>* MlasTestFixture<MlasQuantizeLinearTest<int16_t>>::mlas_tester(nullptr);
-template <>
-MlasQuantizeLinearTest<uint16_t>* MlasTestFixture<MlasQuantizeLinearTest<uint16_t>>::mlas_tester(nullptr);
-
 static UNUSED_VARIABLE bool added_to_main = AddTestRegister([](bool is_short_execute) {
   size_t count = 0;
   if (is_short_execute) {
diff --git a/onnxruntime/test/mlas/unittest/test_reorder_output.cpp b/onnxruntime/test/mlas/unittest/test_reorder_output.cpp
index 21373fe9f66e7..e39abd8578da4 100644
--- a/onnxruntime/test/mlas/unittest/test_reorder_output.cpp
+++ b/onnxruntime/test/mlas/unittest/test_reorder_output.cpp
@@ -88,9 +88,6 @@ class MlasReorderOutputTest : public MlasTestBase {
   }
 };
 
-template <>
-MlasReorderOutputTest* MlasTestFixture<MlasReorderOutputTest>::mlas_tester(nullptr);
-
 static UNUSED_VARIABLE bool added_to_main = AddTestRegister([](bool is_short_execute) {
   return (MlasNchwcGetBlockSize() > 1 && is_short_execute)
              ? MlasDirectShortExecuteTests<MlasReorderOutputTest>::RegisterShortExecute()
diff --git a/onnxruntime/test/mlas/unittest/test_scaleoutput.cpp b/onnxruntime/test/mlas/unittest/test_scaleoutput.cpp
index 7732b1fa8c72e..34f17843b0726 100644
--- a/onnxruntime/test/mlas/unittest/test_scaleoutput.cpp
+++ b/onnxruntime/test/mlas/unittest/test_scaleoutput.cpp
@@ -77,9 +77,6 @@ class MlasScaleOutputTest : public MlasTestBase {
   }
 };
 
-template <>
-MlasScaleOutputTest* MlasTestFixture<MlasScaleOutputTest>::mlas_tester(nullptr);
-
 static UNUSED_VARIABLE bool added_to_main = AddTestRegister([](bool is_short_execute) {
   return is_short_execute ? MlasDirectShortExecuteTests<MlasScaleOutputTest>::RegisterShortExecute() : 0;
 });
diff --git a/onnxruntime/test/mlas/unittest/test_softmax.cpp b/onnxruntime/test/mlas/unittest/test_softmax.cpp
index 3df2b88f9652a..4c5e11bbe9566 100644
--- a/onnxruntime/test/mlas/unittest/test_softmax.cpp
+++ b/onnxruntime/test/mlas/unittest/test_softmax.cpp
@@ -97,11 +97,6 @@ class MlasSoftmaxTest : public MlasTestBase {
   }
 };
 
-template <>
-MlasSoftmaxTest<false>* MlasTestFixture<MlasSoftmaxTest<false>>::mlas_tester(nullptr);
-template <>
-MlasSoftmaxTest<true>* MlasTestFixture<MlasSoftmaxTest<true>>::mlas_tester(nullptr);
-
 static UNUSED_VARIABLE bool added_to_main = AddTestRegister([](bool is_short_execute) {
   size_t count = 0;
   if (is_short_execute) {
diff --git a/onnxruntime/test/mlas/unittest/test_sqnbitgemm.cpp b/onnxruntime/test/mlas/unittest/test_sqnbitgemm.cpp
new file mode 100644
index 0000000000000..6c97d60301573
--- /dev/null
+++ b/onnxruntime/test/mlas/unittest/test_sqnbitgemm.cpp
@@ -0,0 +1,270 @@
+/*++
+
+Copyright (c) Microsoft Corporation. All rights reserved.
+
+Licensed under the MIT License.
+
+Module Name:
+
+    test_sqnbitgemm.h
+
+Abstract:
+
+    Tests for MLAS n-bit int block quantized GEMM.
+
+--*/
+
+#include "test_util.h"
+#include "mlas_q4.h"
+#include "mlas_qnbit.h"
+
+/**
+ * @brief Test class for n-bit int block quantized GEMM
+ *        Note: only 2-D matmul supported for now
+ */
+template <size_t BlkBitWidth, size_t BlkLen>
+class MlasSQNBitGemmTest : public MlasTestBase {
+ private:
+  MatrixGuardBuffer<float> BufferA;
+  MatrixGuardBuffer<float> BufferB;
+  MatrixGuardBuffer<uint8_t> BufferQuantBData;
+  MatrixGuardBuffer<uint8_t> BufferQuantBZeroPoint;
+  MatrixGuardBuffer<float> BufferQuantBScale;
+  MatrixGuardBuffer<float> BufferDequantizedB;
+  MatrixGuardBuffer<float> BufferBias;
+  MatrixGuardBuffer<float> BufferC;
+  MatrixGuardBuffer<float> BufferCReference;
+
+  void CallGemm(size_t M,
+                size_t N,
+                size_t K,
+                const float* A,
+                size_t lda,
+                const uint8_t* QuantBData,
+                const float* QuantBScale,
+                const uint8_t* QuantBZeroPoint,
+                const float* Bias,
+                float* C,
+                size_t ldc,
+                MLAS_THREADPOOL* Threadpool) {
+    MLAS_SQNBIT_GEMM_DATA_PARAMS params;
+    params.A = A;
+    params.lda = lda;
+    params.Bias = Bias;
+    params.C = C;
+    params.ldc = ldc;
+    params.QuantBData = QuantBData;
+    params.QuantBScale = QuantBScale;
+    params.QuantBZeroPoint = QuantBZeroPoint;
+    params.PostProcessor = nullptr;
+
+    MlasSQNBitGemmBatch(M, N, K, 1, BlkBitWidth, BlkLen, &params, Threadpool);
+  }
+
+  void CallReferenceGemm(size_t M,
+                         size_t N,
+                         size_t K,
+                         const float* A,
+                         const uint8_t* QuantBData,
+                         const float* QuantBScale,
+                         const uint8_t* QuantBZeroPoint,
+                         const float* Bias,
+                         float* C) {
+    float* DequantizedBData = BufferDequantizedB.GetBuffer(K * N);
+    MlasDequantizeBlockwise<float, BlkBitWidth>(
+        DequantizedBData, QuantBData, QuantBScale, QuantBZeroPoint, BlkLen, /* columnwise */ true,
+        static_cast<int>(K), static_cast<int>(N), GetMlasThreadPool());
+    // Note: DequantizedBData is in column major layout.
+
+    for (size_t m = 0; m < M; m++) {
+      for (size_t n = 0; n < N; n++) {
+        const float* a = A + m * K;
+        const float* b = DequantizedBData + n * K;
+        float* c = C + (m * N) + n;
+
+        float sum = Bias == nullptr ? 0.0f : Bias[n];
+        for (size_t k = 0; k < K; k++) {
+          sum += (*a) * (*b);
+          b += 1;
+          a += 1;
+        }
+        *c = sum;
+      }
+    }
+  }
+
+ public:
+  void Test(size_t M, size_t N, size_t K,
+            bool WithBias, bool Symmetric, bool WithThreadpool) {
+    MLAS_THREADPOOL* Threadpool = WithThreadpool ? GetMlasThreadPool() : nullptr;
+
+    const float* A = BufferA.GetBuffer(K * M);
+
+    const float* B = BufferB.GetBuffer(N * K);
+
+    const float* Bias = nullptr;
+    if (WithBias) {
+      Bias = BufferBias.GetBuffer(N);
+    }
+
+#if 0
+    auto print_matrix = [](size_t ncols, size_t nrows, const float* data) {
+      for (size_t row = 0; row < nrows; ++row) {
+        for (size_t col = 0; col < ncols; ++col) {
+          std::cout << data[row * nrows + col] << "\t";
+        }
+        std::cout << "\n";
+      }
+    };
+
+    std::cout << "A:\n";
+    print_matrix(M, K, A);
+    std::cout << "B:\n";
+    print_matrix(K, N, B);
+#endif
+
+    float* C = BufferC.GetBuffer(N * M, true);
+    float* CReference = BufferCReference.GetBuffer(N * M, true);
+
+    // pack B
+    uint8_t* QuantBData = nullptr;
+    float* QuantBScale = nullptr;
+    uint8_t* QuantBZeroPoint = nullptr;
+    {
+      size_t QuantBDataSizeInBytes, QuantBScaleSize, QuantBZeroPointSizeInBytes;
+      MlasBlockwiseQuantizedBufferSizes(BlkBitWidth, BlkLen, /* columnwise */ true,
+                                        static_cast<int>(K), static_cast<int>(N),
+                                        QuantBDataSizeInBytes, QuantBScaleSize, &QuantBZeroPointSizeInBytes);
+
+      QuantBData = BufferQuantBData.GetBuffer(QuantBDataSizeInBytes);
+      QuantBScale = BufferQuantBScale.GetBuffer(QuantBScaleSize);
+      if (Symmetric) {
+        QuantBZeroPoint = BufferQuantBZeroPoint.GetBuffer(QuantBZeroPointSizeInBytes);
+      }
+
+      MlasQuantizeBlockwise<float, 4>(QuantBData, QuantBScale, QuantBZeroPoint,
+                                      B, BlkLen,
+                                      /* columnwise */ true,
+                                      static_cast<int>(K), static_cast<int>(N),
+                                      static_cast<int>(N),
+                                      GetMlasThreadPool());
+    }
+
+    CallGemm(M, N, K, A, /* lda */ K, QuantBData, QuantBScale, QuantBZeroPoint, Bias, C, /* ldc */ N, Threadpool);
+    CallReferenceGemm(M, N, K, A, QuantBData, QuantBScale, QuantBZeroPoint, Bias, CReference);
+
+    size_t f = 0;
+    for (size_t m = 0; m < M; m++) {
+      for (size_t n = 0; n < N; n++, f++) {
+        ASSERT_TRUE(CloseEnough(C[f], CReference[f]))
+            << "Expected: " << CReference[f] << " Actual: " << C[f] << "@[" << m << "x" << n << "], "
+            << "M=" << M << ", N=" << N << ", K=" << K;
+      }
+    }
+  }
+
+ public:
+  static const char* GetTestSuiteName() {
+    static std::string suite_name = std::string("SQNBitGemm") +
+                                    "BlkBitWidth" + std::to_string(BlkBitWidth) +
+                                    "BlkLen" + std::to_string(BlkLen);
+    return suite_name.c_str();
+  }
+};
+
+//
+// Short Execute() test helper to register each test separately by all parameters.
+//
+template <size_t BlkBitWidth, size_t BlkLen>
+class SQNBitGemmShortExecuteTest : public MlasTestFixture<MlasSQNBitGemmTest<BlkBitWidth, BlkLen>> {
+ public:
+  explicit SQNBitGemmShortExecuteTest(size_t M, size_t N, size_t K,
+                                      bool WithThreadpool, bool Symmetric, bool WithBias)
+      : M_(M), N_(N), K_(K), WithThreadpool_(WithThreadpool), Symmetric_(Symmetric), WithBias_(WithBias) {
+  }
+
+  void TestBody() override {
+    MlasTestFixture<MlasSQNBitGemmTest<BlkBitWidth, BlkLen>>::mlas_tester->Test(
+        M_, N_, K_, WithThreadpool_, Symmetric_, WithBias_);
+  }
+
+  static size_t RegisterSingleTest(size_t M, size_t N, size_t K,
+                                   bool WithThreadpool, bool Symmetric, bool WithBias) {
+    std::stringstream ss;
+    ss << (WithThreadpool ? "SingleThread" : "Threaded")
+       << "/isSymmetric" << Symmetric
+       << "/M" << M << "xN" << N << "xK" << K
+       << "/hasBias" << WithBias;
+    auto test_name = ss.str();
+
+    testing::RegisterTest(
+        MlasSQNBitGemmTest<BlkBitWidth, BlkLen>::GetTestSuiteName(),
+        test_name.c_str(),
+        nullptr,
+        test_name.c_str(),
+        __FILE__,
+        __LINE__,
+        // Important to use the fixture type as the return type here.
+        [=]() -> MlasTestFixture<MlasSQNBitGemmTest<BlkBitWidth, BlkLen>>* {
+          return new SQNBitGemmShortExecuteTest(
+              M, N, K, WithThreadpool, Symmetric, WithBias);
+        });
+
+    return 1;
+  }
+
+  static size_t RegisterShortExecuteTests() {
+    size_t test_registered = 0;
+
+    if (MlasIsSQNBitGemmAvailable(BlkBitWidth, BlkLen)) {
+      for (bool WithThreadpool : {false, true}) {
+        for (bool Symmetric : {false, true}) {
+          for (size_t b = 1; b < 16; b++) {
+            test_registered += RegisterSingleTest(b, b, b, WithThreadpool, Symmetric, false);
+            test_registered += RegisterSingleTest(b, b, b, WithThreadpool, Symmetric, true);
+          }
+          for (size_t b = 16; b <= 256; b <<= 1) {
+            test_registered += RegisterSingleTest(b, b, b, WithThreadpool, Symmetric, false);
+            test_registered += RegisterSingleTest(b, b, b, WithThreadpool, Symmetric, true);
+          }
+          for (size_t b = 256; b < 320; b += 32) {
+            test_registered += RegisterSingleTest(b, b, b, WithThreadpool, Symmetric, true);
+          }
+          for (size_t b = 1; b < 96; b++) {
+            test_registered += RegisterSingleTest(1, b, 32, WithThreadpool, Symmetric, false);
+            test_registered += RegisterSingleTest(1, 32, b, WithThreadpool, Symmetric, true);
+            test_registered += RegisterSingleTest(1, b, b, WithThreadpool, Symmetric, false);
+          }
+          test_registered += RegisterSingleTest(43, 500, 401, WithThreadpool, Symmetric, true);
+
+          // test_registered += RegisterSingleTest(1001, 1027, 1031, WithThreadpool, Symmetric, false);
+        }
+      }
+    }
+
+    return test_registered;
+  }
+
+ private:
+  size_t M_, N_, K_;
+  bool WithThreadpool_, Symmetric_, WithBias_;
+};
+
+static size_t SQNBitGemmRegisterAllShortExecuteTests() {
+  size_t count = 0;
+
+  count += SQNBitGemmShortExecuteTest<4, 16>::RegisterShortExecuteTests();
+  count += SQNBitGemmShortExecuteTest<4, 32>::RegisterShortExecuteTests();
+  count += SQNBitGemmShortExecuteTest<4, 64>::RegisterShortExecuteTests();
+  count += SQNBitGemmShortExecuteTest<4, 128>::RegisterShortExecuteTests();
+  count += SQNBitGemmShortExecuteTest<4, 256>::RegisterShortExecuteTests();
+
+  return count;
+}
+
+static UNUSED_VARIABLE bool added_to_main = AddTestRegister([](bool is_short_execute) {
+  if (is_short_execute) {
+    return SQNBitGemmRegisterAllShortExecuteTests() > 0;
+  }
+  return false;
+});
diff --git a/onnxruntime/test/mlas/unittest/test_symm_qgemm.cpp b/onnxruntime/test/mlas/unittest/test_symm_qgemm.cpp
index adfe5564ebbbf..bb3aea02cc011 100644
--- a/onnxruntime/test/mlas/unittest/test_symm_qgemm.cpp
+++ b/onnxruntime/test/mlas/unittest/test_symm_qgemm.cpp
@@ -1,10 +1,5 @@
 #include "test_symm_qgemm_fixture.h"
 
-template <>
-MlasSymmQgemmTest<int8_t, int32_t, false>* MlasTestFixture<MlasSymmQgemmTest<int8_t, int32_t, false>>::mlas_tester(nullptr);
-template <>
-MlasSymmQgemmTest<int8_t, int32_t, true>* MlasTestFixture<MlasSymmQgemmTest<int8_t, int32_t, true>>::mlas_tester(nullptr);
-
 static size_t SymmQgemmRegistLongExecute() {
   if (MlasSymmQgemmPackBSize(16, 16, true) == 0) {
     return 0;
diff --git a/onnxruntime/test/mlas/unittest/test_transpose.cpp b/onnxruntime/test/mlas/unittest/test_transpose.cpp
index 74ce5868f411d..8fa98411a21ab 100644
--- a/onnxruntime/test/mlas/unittest/test_transpose.cpp
+++ b/onnxruntime/test/mlas/unittest/test_transpose.cpp
@@ -45,13 +45,6 @@ class MlasTransposeTest : public MlasTestBase {
   }
 };
 
-template <>
-MlasTransposeTest<uint32_t>* MlasTestFixture<MlasTransposeTest<uint32_t>>::mlas_tester(nullptr);
-template <>
-MlasTransposeTest<uint16_t>* MlasTestFixture<MlasTransposeTest<uint16_t>>::mlas_tester(nullptr);
-template <>
-MlasTransposeTest<uint8_t>* MlasTestFixture<MlasTransposeTest<uint8_t>>::mlas_tester(nullptr);
-
 static UNUSED_VARIABLE bool added_to_main = AddTestRegister([](bool is_short_execute) {
   size_t count = 0;
   if (is_short_execute) {
diff --git a/onnxruntime/test/mlas/unittest/test_util.h b/onnxruntime/test/mlas/unittest/test_util.h
index c5ee8b4b6115a..8eecda900ff27 100644
--- a/onnxruntime/test/mlas/unittest/test_util.h
+++ b/onnxruntime/test/mlas/unittest/test_util.h
@@ -9,6 +9,7 @@
 #include <stdio.h>
 #include <memory.h>
 #include <algorithm>
+#include <cmath>
 #include <limits>
 #include <memory>
 #include <random>
@@ -188,8 +189,7 @@ class MlasTestFixture : public testing::Test {
     mlas_tester = nullptr;
   };
 
-  // Do not forgot to define this static member element when upon usage.
-  static TMlasTester* mlas_tester;
+  static inline TMlasTester* mlas_tester = nullptr;
 };
 
 // Long Execute test. It is too heavy to register each single test, treat long execute big groups.
@@ -254,3 +254,16 @@ inline void ReorderInputNchw(const int64_t* input_shape, const float* S, float*
     D += spatial_count * nchwc_channel_count;
   }
 }
+
+inline bool CloseEnough(float actual, float expected) {
+  if (std::isnan(actual)) {
+    return std::isnan(expected);
+  }
+  float diff = std::abs(actual - expected);
+  float top = std::max(std::abs(actual), std::abs(expected));
+  float ratio = 0;
+  if (top > 0.0001) {
+    ratio = diff / top;
+  }
+  return ratio < 0.005;
+}
diff --git a/onnxruntime/test/onnx/TestCase.cc b/onnxruntime/test/onnx/TestCase.cc
index fcef036163d4c..636c0bbfa94e9 100644
--- a/onnxruntime/test/onnx/TestCase.cc
+++ b/onnxruntime/test/onnx/TestCase.cc
@@ -6,11 +6,13 @@
 #include "TestCase.h"
 
 #include <cctype>
+#include <filesystem>
 #include <fstream>
 #include <memory>
 #include <sstream>
 #include <map>
 #include <regex>
+#include <set>
 #include <string>
 
 #include "callback.h"
@@ -184,7 +186,7 @@ void LoopDataFile(int test_data_pb_fd, bool is_input, const TestModelInfo& model
   f.SetCloseOnDelete(true);
   google::protobuf::io::CodedInputStream coded_input(&f);
   bool clean_eof = false;
-  int item_id = 1;
+  [[maybe_unused]] int item_id = 1;
   for (proto::TraditionalMLData data;
        ParseDelimitedFromCodedStream(&data, &coded_input, &clean_eof);
        ++item_id, data.Clear()) {
@@ -730,6 +732,8 @@ void LoadTests(const std::vector<std::basic_string<PATH_CHAR_TYPE>>& input_paths
                const std::vector<std::basic_string<PATH_CHAR_TYPE>>& whitelisted_test_cases,
                const TestTolerances& tolerances,
                const std::unordered_set<std::basic_string<ORTCHAR_T>>& disabled_tests,
+               std::unique_ptr<std::set<BrokenTest>> broken_tests,
+               std::unique_ptr<std::set<std::string>> broken_tests_keyword_set,
                const std::function<void(std::unique_ptr<ITestCase>)>& process_function) {
   std::vector<std::basic_string<PATH_CHAR_TYPE>> paths(input_paths);
   while (!paths.empty()) {
@@ -782,11 +786,60 @@ void LoadTests(const std::vector<std::basic_string<PATH_CHAR_TYPE>>& input_paths
         ORT_NOT_IMPLEMENTED(ToUTF8String(filename_str), " is not supported");
       }
 
+      auto test_case_dir = model_info->GetDir();
+      auto test_case_name_in_log = test_case_name + ORT_TSTR(" in ") + test_case_dir;
+
+#if !defined(ORT_MINIMAL_BUILD) && !defined(USE_QNN)
+      // to skip some models like *-int8 or *-qdq
+      if ((reinterpret_cast<OnnxModelInfo*>(model_info.get()))->HasDomain(ONNX_NAMESPACE::AI_ONNX_TRAINING_DOMAIN) ||
+          (reinterpret_cast<OnnxModelInfo*>(model_info.get()))->HasDomain(ONNX_NAMESPACE::AI_ONNX_PREVIEW_TRAINING_DOMAIN)) {
+        fprintf(stderr, "Skip test case:: %s %s\n", ToUTF8String(test_case_name_in_log).c_str(), " as it has training domain");
+        return true;
+      }
+#endif
+
+      bool has_test_data = false;
+      LoopDir(test_case_dir, [&](const PATH_CHAR_TYPE* filename, OrtFileType f_type) -> bool {
+        if (filename[0] == '.') return true;
+        if (f_type == OrtFileType::TYPE_DIR) {
+          has_test_data = true;
+          return false;
+        }
+        return true;
+      });
+      if (!has_test_data) {
+        fprintf(stderr, "Skip test case:: %s %s\n", ToUTF8String(test_case_name_in_log).c_str(), " due to no test data");
+        return true;
+      }
+
+      if (broken_tests) {
+        BrokenTest t = {ToUTF8String(test_case_name), ""};
+        auto iter = broken_tests->find(t);
+        auto opset_version = model_info->GetNominalOpsetVersion();
+        if (iter != broken_tests->end() &&
+            (opset_version == TestModelInfo::unknown_version || iter->broken_opset_versions_.empty() ||
+             iter->broken_opset_versions_.find(opset_version) != iter->broken_opset_versions_.end())) {
+          fprintf(stderr, "Skip test case:: %s %s\n", ToUTF8String(test_case_name_in_log).c_str(), " due to broken_tests");
+          return true;
+        }
+      }
+
+      if (broken_tests_keyword_set) {
+        for (auto iter2 = broken_tests_keyword_set->begin(); iter2 != broken_tests_keyword_set->end(); ++iter2) {
+          std::string keyword = *iter2;
+          if (ToUTF8String(test_case_name).find(keyword) != std::string::npos) {
+            fprintf(stderr, "Skip test case:: %s %s\n", ToUTF8String(test_case_name_in_log).c_str(), " as it is in broken test keywords");
+            return true;
+          }
+        }
+      }
+
       const auto tolerance_key = ToUTF8String(my_dir_name);
 
       std::unique_ptr<ITestCase> l = CreateOnnxTestCase(ToUTF8String(test_case_name), std::move(model_info),
                                                         tolerances.absolute(tolerance_key),
                                                         tolerances.relative(tolerance_key));
+      fprintf(stdout, "Load Test Case: %s\n", ToUTF8String(test_case_name_in_log).c_str());
       process_function(std::move(l));
       return true;
     });
@@ -816,3 +869,571 @@ double TestTolerances::relative(const std::string& name) const {
   }
   return iter->second;
 }
+
+std::unique_ptr<std::set<BrokenTest>> GetBrokenTests(const std::string& provider_name) {
+  auto broken_tests = std::make_unique<std::set<BrokenTest>>(std::initializer_list<BrokenTest>{
+      {"slice_neg_steps",
+       "Type parameter (Tind) bound to different types (tensor(int64) and tensor(int32) in node ()."},
+      {"cast_BFLOAT16_to_FLOAT", "Unexpected input data type"},
+      {"loop13_seq", "Creation of empty sequences is currently not supported in the test runner"},
+      {"sequence_insert_at_front", "shape mismatch, expect {4} got {3}"},
+      {"cast_FLOAT_to_BFLOAT16", "expect uint16 got bfloat16"},
+      {"mnist", "Input data isn't in valid range"},
+      {"BERT_Squad", "test data bug"},
+      {"constantofshape_float_ones", "test data bug", {"opset9", "opset10"}},
+      {"constantofshape_int_zeros", "test data bug", {"opset9", "opset10"}},
+      {"cast_STRING_to_FLOAT", "Linux CI has old ONNX python package with bad test data", {"opset9", "opset10"}},
+      // Numpy float to string has unexpected rounding for some results given numpy default precision is meant to be 8.
+      // "e.g. 0.296140194 -> '0.2961402' not '0.29614019'. ORT produces the latter with precision set to 8,
+      // which doesn't match the expected output that was generated with numpy.
+      {"cast_FLOAT_to_STRING", "Numpy float to string has unexpected rounding for some results."},
+      {"tf_nasnet_large", "disable temporarily"},
+      {"tf_nasnet_mobile", "disable temporarily"},
+      {"tf_pnasnet_large", "disable temporarily"},
+      {"shrink", "test case is wrong", {"opset9"}},
+      {"maxpool_with_argmax_2d_precomputed_strides", "ShapeInferenceError"},
+      {"tf_inception_v2", "result mismatch"},
+      {"tf_resnet_v1_50", "result mismatch when Conv BN Fusion is applied"},
+      {"tf_resnet_v1_101", "result mismatch when Conv BN Fusion is applied"},
+      {"tf_resnet_v1_152", "result mismatch when Conv BN Fusion is applied"},
+      {"mxnet_arcface", "Model is an invalid ONNX model"},
+      {"unique_not_sorted_without_axis", "Expected data for 'Y' is incorrect and in sorted order."},
+      {"cumsum_1d_reverse_exclusive", "only failing linux GPU CI. Likely build error."},
+      {"resize_downsample_scales_cubic_align_corners", "results mismatch with onnx tests"},
+      {"resize_downsample_scales_linear_align_corners", "results mismatch with onnx tests"},
+      {"resize_tf_crop_and_resize", "Bad onnx test output. Needs test fix."},
+      {"resize_upsample_sizes_nearest_ceil_half_pixel", "Bad onnx test output. Needs test fix."},
+      {"resize_upsample_sizes_nearest_floor_align_corners", "Bad onnx test output. Needs test fix."},
+      {"resize_upsample_sizes_nearest_round_prefer_ceil_asymmetric", "Bad onnx test output. Needs test fix."},
+      {"bitshift_right_uint16", "BitShift(11) uint16 support not enabled currently"},
+      {"bitshift_left_uint16", "BitShift(11) uint16 support not enabled currently"},
+      {"maxunpool_export_with_output_shape",
+       "Invalid output in ONNX test. See https://github.com/onnx/onnx/issues/2398"},
+      {"cntk_simple_seg", "Bad onnx test output caused by wrong SAME_UPPER/SAME_LOWER for ConvTranspose"},
+      {"training_dropout", "result differs", {}},               // Temporary, subsequent PR will remove this.
+      {"training_dropout_default", "result differs", {}},       // Temporary, subsequent PR will remove this.
+      {"training_dropout_default_mask", "result differs", {}},  // Temporary, subsequent PR will remove this.
+      {"training_dropout_mask", "result differs", {}},          // Temporary, subsequent PR will remove this.
+      {"batchnorm_epsilon_training_mode", "training only", {}},
+      {"batchnorm_example_training_mode", "training only", {}},
+      {"bernoulli", "type error", {}},
+      {"bernoulli_double", "type error", {}},
+      {"bernoulli_double_expanded", "type error", {}},
+      {"bernoulli_expanded", "type error", {}},
+      {"bernoulli_seed", "type error", {}},
+      {"bernoulli_seed_expanded", "type error", {}},
+      {"castlike_BFLOAT16_to_FLOAT", "type error", {}},
+      {"castlike_BFLOAT16_to_FLOAT_expanded", "type error", {}},
+      {"castlike_FLOAT_to_BFLOAT16", "type error", {}},
+      {"castlike_FLOAT_to_BFLOAT16_expanded", "type error", {}},
+      {"castlike_FLOAT_to_STRING", "type error", {}},
+      {"castlike_FLOAT_to_STRING_expanded", "type error", {}},
+      {"convtranspose_autopad_same", "Test data has been corrected in ONNX 1.10.", {"opset13", "opset14"}},
+      {"gru_batchwise", "type error", {}},
+      {"lstm_batchwise", "type error", {}},
+      {"optional_get_element", "type error", {}},
+      {"optional_get_element_sequence", "type error", {}},
+      {"optional_has_element", "type error", {}},
+      {"optional_has_element_empty", "type error", {}},
+      {"shape_end_1", "type error", {}},
+      {"shape_end_negative_1", "type error", {}},
+      {"shape_start_1", "type error", {}},
+      {"shape_start_1_end_2", "type error", {}},
+      {"shape_start_1_end_negative_1", "type error", {}},
+      {"shape_start_negative_1", "type error", {}},
+      {"simple_rnn_batchwise", "type error", {}},
+      {"mod_float_mixed_sign_example", "fmod attribute must be true for floating point types", {}},
+      {"col2im_pads", "result mismatch", {"opset18"}},
+      {"reduce_l1_empty_set", "unknown version", {}},
+      {"reduce_l1_empty_set_expanded", "unknown version", {}},
+      {"reduce_l2_empty_set", "unknown version", {}},
+      {"reduce_l2_empty_set_expanded", "unknown version", {}},
+      {"reduce_log_sum_empty_set", "unknown version", {}},
+      {"reduce_log_sum_empty_set_expanded", "unknown version", {}},
+      {"reduce_log_sum_exp_empty_set", "unknown version", {}},
+      {"reduce_log_sum_exp_empty_set_expanded", "unknown version", {}},
+      {"reduce_prod_empty_set", "unknown version", {}},
+      {"reduce_sum_empty_set", "unknown version", {}},
+      {"reduce_sum_square_empty_set", "unknown version", {}},
+      {"reduce_sum_square_empty_set_expanded", "unknown version", {}},
+#ifdef ENABLE_TRAINING_CORE
+      {"adagrad", "not a registered function/op", {}},                  // Op not registered.
+      {"adagrad_multiple", "not a registered function/op", {}},         // Op not registered.
+      {"adam", "not a registered function/op", {}},                     // Op not registered.
+      {"adam_multiple", "not a registered function/op", {}},            // Op not registered.
+      {"gradient_of_add", "not a registered function/op", {}},          // Op not registered.
+      {"gradient_of_add_and_mul", "not a registered function/op", {}},  // Op not registered.
+      {"momentum", "not a registered function/op", {}},                 // Op not registered.
+      {"momentum_multiple", "not a registered function/op", {}},        // Op not registered.
+      {"nesterov_momentum", "not a registered function/op", {}},        // Op not registered.
+      {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_weight_ignore_index_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_log_prob",
+       "type error",
+       {"opset12"}},
+      {"softmax_cross_entropy_mean_weight_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_weight_ignore_index_3d", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_weight_ignore_index_4d_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_weight_ignore_index_4d", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_no_weight_ignore_index", "type error", {"opset12"}},
+      {"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_log_prob",
+       "type error",
+       {"opset12"}},
+      {"softmax_cross_entropy_mean_3d_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_none_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_3d", "type error", {"opset12"}},
+      {"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_weight_ignore_index_3d_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_no_weight_ignore_index_3d_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_none_weights_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_sum_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_weight_ignore_index", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_no_weight_ignore_index_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_no_weight_ignore_index_3d", "type error", {"opset12"}},
+      {"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index", "type error", {"opset12"}},
+      {"softmax_cross_entropy_sum", "type error", {"opset12"}},
+      {"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob",
+       "type error",
+       {"opset12"}},
+      {"softmax_cross_entropy_none_weights", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_no_weight_ignore_index_4d_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_none", "type error", {"opset12"}},
+      {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index", "type error", {"opset12"}},
+      {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_weight", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_no_weight_ignore_index_4d", "type error", {"opset12"}},
+#endif
+      {"mask_rcnn_keras", "this model currently has an invalid contrib op version set to 10", {}}});
+
+  // Some EPs may fail to pass some specific testcases.
+  // For example TenosrRT EP may fail on FLOAT16 related testcases if GPU doesn't support float16.
+  // Instead of list all these testcases, we can use following keyword set to filter out testcases wchich contain
+  // specific keyword.
+  // std::set<std::string> broken_tests_keyword_set = {};
+
+  if (provider_name == "cuda") {
+#ifdef _WIN32
+    broken_tests->insert({"LSTM_Seq_lens_unpacked", "this test fails with new image since Aug 25."});
+    broken_tests->insert({"bidaf", "this test fails with new image since Aug 25."});
+    broken_tests->insert({"Candy", "Flaky test, need to investigate", {"opset9"}});
+#else
+    broken_tests->insert({"bidaf", "this test should be recovered when multi-gpu pipeline deprecates NV12", {"opset9"}});
+#endif
+  }
+
+  if (provider_name == "nnapi") {
+    broken_tests->insert({"scan9_sum", "Error with the extra graph"});
+    broken_tests->insert({"scan_sum", "Error with the extra graph"});
+    broken_tests->insert({"mvn_expanded", "Failed to find kernel for MemcpyFromHost(1) (node Memcpy_1)"});
+    broken_tests->insert({"dynamicquantizelinear_expanded", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"dynamicquantizelinear_max_adjusted_expanded", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"dynamicquantizelinear_min_adjusted_expanded", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"gemm_transposeB", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"range_float_type_positive_delta_expanded", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"range_int32_type_negative_delta_expanded", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"convtranspose_1d", "1d convtranspose not supported yet"});
+    broken_tests->insert({"convtranspose_3d", "3d convtranspose not supported yet"});
+    broken_tests->insert({"maxpool_2d_uint8", "result mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NC_expanded", "shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_expanded", "shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_mean_expanded", "shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_sum_expanded", "shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_expanded", "shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_mean_expanded", "shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum_expanded", "shape mismatch"});
+    // Disable based on George Wu's recommendation.
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum_ignore_index_expanded",
+         "shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_iinput_shape_is_NCd1_weight_ignore_index", "Shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_iinput_shape_is_NCd1_weight_ignore_index_expanded", "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NC", "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1", "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1_expanded", "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1_ignore_index", "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1_ignore_index_expanded", "Shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1_mean_weight_negative_ignore_index", "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1_mean_weight_negative_ignore_index_expanded",
+                          "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1_weight", "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1_weight_expanded", "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2", "Shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_no_weight_reduction_mean_ignore_index", "Shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_no_weight_reduction_mean_ignore_index_expanded",
+         "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_mean", "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_sum", "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight", "Shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_mean", "Shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum", "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum_ignore_index",
+                          "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index",
+                          "Shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded",
+         "Shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index", "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_expanded",
+                          "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight", "Shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight", "Shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "Shape mismatch"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index", "Shape mismatch"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_expanded", "Shape mismatch"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_log_prob", "Shape mismatch"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_log_prob_expanded",
+         "Shape mismatch"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded",
+                          "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob",
+                          "Shape mismatch"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob_expanded",
+         "Shape mismatch"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index", "Shape mismatch"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_expanded", "Shape mismatch"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_log_prob", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_log_prob_expanded",
+                          "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob", "Shape mismatch"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight", "Shape mismatch"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "Shape mismatch"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob", "Shape mismatch"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_3d", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_3d_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_3d_log_prob", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_3d_log_prob_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_log_prob", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_log_prob_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_no_weight_ignore_index", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_no_weight_ignore_index_3d", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_no_weight_ignore_index_3d_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_no_weight_ignore_index_3d_log_prob", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_no_weight_ignore_index_3d_log_prob_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_no_weight_ignore_index_4d", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_no_weight_ignore_index_4d_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_no_weight_ignore_index_4d_log_prob", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_no_weight_ignore_index_4d_log_prob_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_no_weight_ignore_index_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_no_weight_ignore_index_log_prob", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_no_weight_ignore_index_log_prob_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_ignore_index", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_ignore_index_3d", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_ignore_index_3d_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_ignore_index_3d_log_prob", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_ignore_index_3d_log_prob_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_ignore_index_4d", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_ignore_index_4d_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_ignore_index_4d_log_prob", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_ignore_index_4d_log_prob_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_ignore_index_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_ignore_index_log_prob", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_ignore_index_log_prob_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_log_prob", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_log_prob_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_none", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_none_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_none_log_prob", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_none_log_prob_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_none_weights", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_none_weights_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_none_weights_log_prob", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_none_weights_log_prob_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_sum", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_sum_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_sum_log_prob", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_sum_log_prob_expanded", "Shape mismatch"});
+  }
+
+  if (provider_name == "tensorrt") {
+    broken_tests->insert({"convtranspose_with_kernel", "It causes segmentation fault"});
+    broken_tests->insert({"convtranspose_pad", "It causes segmentation fault"});
+    broken_tests->insert({"convtranspose_kernel_shape", "It causes segmentation fault"});
+    broken_tests->insert({"dynamicquantizelinear_expanded", "It causes segmentation fault"});
+    broken_tests->insert({"dynamicquantizelinear_min_adjusted_expanded", "It causes segmentation fault"});
+    broken_tests->insert({"dynamicquantizelinear_max_adjusted_expanded", "It causes segmentation fault"});
+
+    broken_tests->insert({"basic_conv_with_padding",
+                          "Cannot set more than one input unless network has Q/DQ layers. TensorRT EP could not build "
+                          "engine for fused node"});
+    broken_tests->insert({"basic_conv_without_padding",
+                          "Cannot set more than one input unless network has Q/DQ layers. TensorRT EP could not build "
+                          "engine for fused node"});
+    broken_tests->insert({"conv_with_strides_no_padding",
+                          "Cannot set more than one input unless network has Q/DQ layers. TensorRT EP could not build "
+                          "engine for fused node"});
+
+    broken_tests->insert({"conv_with_autopad_same",
+                          "Internal Error (node_of_y: Cannot set more than one input unless network has Q/DQ layers.)"});
+
+    // unsupported tests since opset16
+    broken_tests->insert({"sequence_map_add_2_sequences", "not supported by TensorRT EP"});
+    broken_tests->insert({"sequence_map_extract_shapes", "not supported by TensorRT EP."});
+    broken_tests->insert({"sequence_map_add_1_sequence_1_tensor", "not supported by TensorRT EP."});
+    broken_tests->insert({"sequence_map_identity_1_sequence", "not supported by TensorRT EP."});
+    broken_tests->insert({"sequence_map_identity_2_sequences", "not supported by TensorRT EP."});
+    broken_tests->insert({"sequence_map_identity_1_sequence_1_tensor", "not supported by TensorRT EP."});
+    broken_tests->insert({"leakyrelu_expanded", "not supported by TensorRT EP."});
+    broken_tests->insert({"leakyrelu_default_expanded", "not supported by TensorRT EP."});
+    broken_tests->insert({"leakyrelu_example_expanded", "not supported by TensorRT EP."});
+    broken_tests->insert({"prelu_broadcast_expanded", "not supported by TensorRT EP."});
+    broken_tests->insert({"prelu_example_expanded", "not supported by TensorRT EP."});
+  }
+
+  if (provider_name == "dml") {
+    broken_tests->insert({"tinyyolov3", "The parameter is incorrect"});
+    broken_tests->insert({"PixelShuffle", "Test requires 6D Reshape, which isn't supported by DirectML"});
+    broken_tests->insert({"operator_permute2", "Test requires 6D Transpose, which isn't supported by DirectML"});
+    broken_tests->insert({"resize_downsample_linear",
+                          "ORT 0.4 uses asymmetric but will conform to half_pixel in the next ONNX version."});
+    broken_tests->insert(
+        {"resize_upsample_linear", "ORT 0.4 uses asymmetric but will conform to half_pixel in the next ONNX version."});
+    broken_tests->insert(
+        {"resize_upsample_linear", "ORT 0.4 uses asymmetric but will conform to half_pixel in the next ONNX version."});
+
+    // These tests are temporarily disabled pending investigation
+    broken_tests->insert({"dynamicquantizelinear_expanded", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"dynamicquantizelinear_max_adjusted_expanded", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"dynamicquantizelinear_min_adjusted_expanded", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"mxnet_arcface", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"yolov3", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"tf_inception_v2", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"fp16_inception_v1", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"candy", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"BERT_Squad", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"LSTM_Seq_lens_unpacked", "The parameter is incorrect"});
+    broken_tests->insert({"mlperf_ssd_resnet34_1200", "The parameter is incorrect"});
+
+    broken_tests->insert({"resize_downsample_scales_linear",
+                          "DML uses half_pixel and this test assumed \"asymmetric\" but does not include \"mode\""});
+    broken_tests->insert({"resize_downsample_sizes_linear_pytorch_half_pixel",
+                          "DML does not support downsampling by such a large factor - skips input pixels"});
+    broken_tests->insert({"resize_downsample_sizes_nearest",
+                          "DML uses pixel centers for nearest, rounding 1 value off for the middle column"});
+    broken_tests->insert({"resize_upsample_sizes_nearest",
+                          "DML uses pixel centers for nearest, which makes more sense (the 3rd row mismatches)"});
+    broken_tests->insert({"unsqueeze_three_axes", "DML does not support 6D tensors"});
+    broken_tests->insert({"unsqueeze_unsorted_axes", "DMLdoes not support 6D tensors"});
+
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index",
+                          "DML does not support 5D+ tensors"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded",
+         "DML does not support 5D+ tensors"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight", "DML does not support 5D+ tensors"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded",
+                          "DML does not support 5D+ tensors"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight",
+                          "DML does not support 5D+ tensors"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded",
+                          "DML does not support 5D+ tensors"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index",
+                          "DML does not support 5D+ tensors"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded",
+                          "DML does not support 5D+ tensors"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob",
+                          "DML does not support 5D+ tensors"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob_expanded",
+         "DML does not support 5D+ tensors"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight", "DML does not support 5D+ tensors"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "DML does not support 5D+ tensors"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob", "DML does not support 5D+ tensors"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob_expanded",
+                          "DML does not support 5D+ tensors"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight", "DML does not support 5D+ tensors"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded",
+                          "DML does not support 5D+ tensors"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob",
+                          "DML does not support 5D+ tensors"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob_expanded",
+                          "DML does not support 5D+ tensors"});
+  }
+
+  if (provider_name == "qnn") {
+    broken_tests->insert({"gemm_default_no_bias", "result differs"});
+    broken_tests->insert({"resize_downsample_scales_linear", "result differs"});
+    broken_tests->insert({"resize_downsample_scales_linear_antialias", "result differs"});
+    broken_tests->insert({"resize_downsample_sizes_linear_antialias", "result differs"});
+    broken_tests->insert({"sce_NCd1_mean_weight_negative_ii", "result differs"});
+    broken_tests->insert({"sce_NCd1_mean_weight_negative_ii_expanded", "result differs"});
+    broken_tests->insert({"sce_NCd1_mean_weight_negative_ii_log_prob", "result differs"});
+    broken_tests->insert({"sce_NCd1_mean_weight_negative_ii_log_prob_expanded", "result differs"});
+    broken_tests->insert({"sce_mean", "result differs"});
+    broken_tests->insert({"sce_mean_3d", "result differs"});
+    broken_tests->insert({"sce_mean_3d_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_3d_log_prob", "result differs"});
+    broken_tests->insert({"sce_mean_3d_log_prob_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_log_prob", "result differs"});
+    broken_tests->insert({"sce_mean_log_prob_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_no_weight_ii", "result differs"});
+    broken_tests->insert({"sce_mean_no_weight_ii_3d", "result differs"});
+    broken_tests->insert({"sce_mean_no_weight_ii_3d_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_no_weight_ii_3d_log_prob", "result differs"});
+    broken_tests->insert({"sce_mean_no_weight_ii_3d_log_prob_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_no_weight_ii_4d", "result differs"});
+    broken_tests->insert({"sce_mean_no_weight_ii_4d_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_no_weight_ii_4d_log_prob", "result differs"});
+    broken_tests->insert({"sce_mean_no_weight_ii_4d_log_prob_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_no_weight_ii_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_no_weight_ii_log_prob", "result differs"});
+    broken_tests->insert({"sce_mean_no_weight_ii_log_prob_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_weight", "result differs"});
+    broken_tests->insert({"sce_mean_weight_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_weight_ii", "result differs"});
+    broken_tests->insert({"sce_mean_weight_ii_3d", "result differs"});
+    broken_tests->insert({"sce_mean_weight_ii_3d_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_weight_ii_3d_log_prob", "result differs"});
+    broken_tests->insert({"sce_mean_weight_ii_3d_log_prob_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_weight_ii_4d", "result differs"});
+    broken_tests->insert({"sce_mean_weight_ii_4d_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_weight_ii_4d_log_prob", "result differs"});
+    broken_tests->insert({"sce_mean_weight_ii_4d_log_prob_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_weight_ii_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_weight_ii_log_prob", "result differs"});
+    broken_tests->insert({"sce_mean_weight_ii_log_prob_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_weight_log_prob", "result differs"});
+    broken_tests->insert({"sce_mean_weight_log_prob_expanded", "result differs"});
+    broken_tests->insert({"sce_none", "result differs"});
+    broken_tests->insert({"sce_none_expanded", "result differs"});
+    broken_tests->insert({"sce_none_log_prob", "result differs"});
+    broken_tests->insert({"sce_none_log_prob_expanded", "result differs"});
+    broken_tests->insert({"sce_sum", "result differs"});
+    broken_tests->insert({"sce_sum_expanded", "result differs"});
+    broken_tests->insert({"sce_sum_log_prob", "result differs"});
+    broken_tests->insert({"sce_sum_log_prob_expanded", "result differs"});
+    broken_tests->insert({"gridsample_reflection_padding", "result differs"});
+    broken_tests->insert({"gridsample_volumetric_nearest_align_corners_0", "unknown version"});
+    broken_tests->insert({"gridsample_volumetric_nearest_align_corners_1", "unknown version"});
+    broken_tests->insert({"spacetodepth", "result differs"});
+  }
+
+#ifdef DISABLE_CONTRIB_OPS
+  broken_tests->insert({"coreml_SqueezeNet_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_Permute_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_ReLU_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_Padding-Upsampling-Normalizer_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"tiny_yolov2", "This model uses contrib ops."});
+  broken_tests->insert({"fp16_tiny_yolov2", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_Pooling_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_Padding_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_Normalizer_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_linear_sklearn_load_breast_cancer", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_linear_ImageNet_small", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_linear_ImageNet_large", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_linear_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_leakyrelu_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_hard_sigmoid_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_elu_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_Dense_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_Conv2D_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"coreml_VGG16_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"coreml_Resnet50_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"coreml_Inceptionv3_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"coreml_FNS-Candy_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"coreml_AgeNet_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_thresholdedrelu_ImageNet_large", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_thresholdedrelu_ImageNet_small", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_thresholdedrelu_sklearn_load_breast_cancer", "This model uses contrib ops."});
+  broken_tests->insert({"thresholdedrelu", "This model uses contrib ops."});
+  broken_tests->insert({"thresholdedrelu_default", "This model uses contrib ops."});
+  broken_tests->insert({"dynamic_slice_default_axes", "This model uses contrib ops."});
+  broken_tests->insert({"thresholdedrelu_example", "This model uses contrib ops."});
+  broken_tests->insert({"dynamic_slice_neg failed", "This model uses contrib ops."});
+  broken_tests->insert({"dynamic_slice_start_out_of_bounds", "This model uses contrib ops."});
+  broken_tests->insert({"dynamic_slice", "This model uses contrib ops."});
+  broken_tests->insert({"dynamic_slice_end_out_of_bounds", "This model uses contrib ops."});
+  broken_tests->insert({"dynamic_slice_neg", "This model uses contrib ops."});
+  broken_tests->insert({"mvn", "This model uses contrib ops.", {"onnx130"}});
+  broken_tests->insert({"cdist_float32_euclidean_1000_2000_1", "This model uses contrib ops."});
+  broken_tests->insert({"cdist_float32_euclidean_1000_2000_500", "This model uses contrib ops."});
+  broken_tests->insert({"cdist_float32_euclidean_1_1_1", "This model uses contrib ops."});
+  broken_tests->insert({"cdist_float32_sqeuclidean_1000_2000_1", "This model uses contrib ops."});
+  broken_tests->insert({"cdist_float32_sqeuclidean_1000_2000_500", "This model uses contrib ops."});
+  broken_tests->insert({"cdist_float32_sqeuclidean_1_1_1", "This model uses contrib ops."});
+  broken_tests->insert({"cdist_float64_euclidean_1000_2000_1", "This model uses contrib ops."});
+  broken_tests->insert({"cdist_float64_euclidean_1000_2000_500", "This model uses contrib ops."});
+  broken_tests->insert({"cdist_float64_euclidean_1_1_1", "This model uses contrib ops."});
+  broken_tests->insert({"cdist_float64_sqeuclidean_1000_2000_1", "This model uses contrib ops."});
+  broken_tests->insert({"cdist_float64_sqeuclidean_1000_2000_500", "This model uses contrib ops."});
+  broken_tests->insert({"cdist_float64_sqeuclidean_1_1_1", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_Average_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"bidaf", "This model uses contrib ops."});
+  broken_tests->insert({"fp16_test_tiny_yolov2", "This model uses contrib ops."});
+  broken_tests->insert({"fp16_coreml_FNS-Candy", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_Repeat_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_BiDirectional_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"fp16_coreml_LinearRegression_NYCTaxi", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_Average_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_GRU_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_SimpleRNN_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_Dot_imageNet", "This model uses contrib ops."});
+#endif
+  return broken_tests;
+}
+
+// Some EPs may fail to pass some specific testcases.
+// For example TenosrRT EP may fail on FLOAT16 related testcases if GPU doesn't support float16.
+// Instead of list all these testcases, we can use following keyword set to filter out testcases wchich contain
+// specific keyword.
+std::unique_ptr<std::set<std::string>> GetBrokenTestsKeyWordSet(const std::string& provider_name) {
+  auto broken_tests_keyword_set = std::make_unique<std::set<std::string>>();
+  if (provider_name == "tensorrt") {
+    broken_tests_keyword_set->insert({"scatternd_add"});
+    broken_tests_keyword_set->insert({"scatternd_multiply"});
+    broken_tests_keyword_set->insert({"scatter_elements_with_duplicate_indices"});
+
+    // sce op is not supported
+    broken_tests_keyword_set->insert({"sce"});
+
+    // TensorRT EP CI uses Nvidia Tesla M60 which doesn't support fp16.
+    broken_tests_keyword_set->insert({"FLOAT16"});
+  }
+  return broken_tests_keyword_set;
+}
diff --git a/onnxruntime/test/onnx/TestCase.h b/onnxruntime/test/onnx/TestCase.h
index 0e3e7852f5180..96b0b5f6f7c08 100644
--- a/onnxruntime/test/onnx/TestCase.h
+++ b/onnxruntime/test/onnx/TestCase.h
@@ -101,8 +101,27 @@ class TestTolerances {
   const Map relative_overrides_;
 };
 
+struct BrokenTest {
+  std::string test_name_;
+  std::string reason_;
+  std::set<std::string> broken_opset_versions_ = {};  // apply to all versions if empty
+  BrokenTest(std::string name, std::string reason) : test_name_(std::move(name)), reason_(std::move(reason)) {}
+  BrokenTest(std::string name, std::string reason, const std::initializer_list<std::string>& versions) : test_name_(std::move(name)), reason_(std::move(reason)), broken_opset_versions_(versions) {}
+  bool operator<(const struct BrokenTest& test) const {
+    return strcmp(test_name_.c_str(), test.test_name_.c_str()) < 0;
+  }
+};
+
 void LoadTests(const std::vector<std::basic_string<PATH_CHAR_TYPE>>& input_paths,
                const std::vector<std::basic_string<PATH_CHAR_TYPE>>& whitelisted_test_cases,
                const TestTolerances& tolerances,
                const std::unordered_set<std::basic_string<ORTCHAR_T>>& disabled_tests,
+               std::unique_ptr<std::set<BrokenTest>> broken_test_list,
+               std::unique_ptr<std::set<std::string>> broken_tests_keyword_set,
                const std::function<void(std::unique_ptr<ITestCase>)>& process_function);
+
+std::unique_ptr<std::set<BrokenTest>> GetBrokenTests(const std::string& provider_name);
+
+std::unique_ptr<std::set<std::string>> GetBrokenTestsKeyWordSet(const std::string& provider_name);
+
+std::unique_ptr<std::set<std::string>> GetBrokenTestsKeyWordSet(const std::string& provider_name);
diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc
index 062ca4ece86bf..2c0804397cfe8 100644
--- a/onnxruntime/test/onnx/main.cc
+++ b/onnxruntime/test/onnx/main.cc
@@ -56,6 +56,12 @@ void usage() {
       "\t    [QNN only] [rpc_control_latency]: QNN rpc control latency. default to 10.\n"
       "\t    [QNN only] [htp_performance_mode]: QNN performance mode, options: 'burst', 'balanced', 'default', 'high_performance', \n"
       "\t    'high_power_saver', 'low_balanced', 'low_power_saver', 'power_saver', 'sustained_high_performance'. Default to 'default'. \n"
+      "\t    [QNN only] [qnn_context_priority]: QNN context priority, options: 'low', 'normal', 'normal_high', 'high'. Default to 'normal'. \n"
+      "\t    [QNN only] [qnn_context_embed_mode]: 1 means dump the QNN context binary into the Onnx skeleton model.\n"
+      "\t    0 means dump the QNN context binary into separate bin file and set the path in the Onnx skeleton model.\n"
+      "\t    [QNN only] [qnn_saver_path]: QNN Saver backend path. e.g '/folderpath/libQnnSaver.so'.\n"
+      "\t    [QNN only] [htp_graph_finalization_optimization_mode]: QNN graph finalization optimization mode, options: \n"
+      "\t    '0', '1', '2', '3', default is '0'.\n"
       "\t [Usage]: -e <provider_name> -i '<key1>|<value1> <key2>|<value2>' \n\n"
       "\t [Example] [For QNN EP] -e qnn -i \"profiling_level|detailed backend_path|/folderpath/libQnnCpu.so\" \n\n"
       "\t    [SNPE only] [runtime]: SNPE runtime, options: 'CPU', 'GPU', 'GPU_FLOAT16', 'DSP', 'AIP_FIXED_TF'. \n"
@@ -168,6 +174,7 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
   bool user_graph_optimization_level_set = false;
   bool set_denormal_as_zero = false;
   std::basic_string<ORTCHAR_T> ep_runtime_config_string;
+  std::string provider_name = "cpu";
 
   OrtLoggingLevel logging_level = ORT_LOGGING_LEVEL_ERROR;
   bool verbose_logging_required = false;
@@ -213,6 +220,7 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
           whitelisted_test_cases.emplace_back(optarg);
           break;
         case 'e':
+          provider_name = ToUTF8String(optarg);
           if (!CompareCString(optarg, ORT_TSTR("cpu"))) {
             // do nothing
           } else if (!CompareCString(optarg, ORT_TSTR("cuda"))) {
@@ -453,6 +461,10 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
           if (value.empty()) {
             ORT_THROW("Please provide the QNN backend path.");
           }
+        } else if (key == "qnn_context_embed_mode") {
+          if (value != "0") {
+            ORT_THROW("Set to 0 to disable qnn_context_embed_mode.");
+          }
         } else if (key == "qnn_context_cache_enable") {
           if (value != "1") {
             ORT_THROW("Set to 1 to enable qnn_context_cache_enable.");
@@ -477,9 +489,26 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
             std::string str = str_stream.str();
             ORT_THROW("Wrong value for htp_performance_mode. select from: " + str);
           }
+        } else if (key == "qnn_context_priority") {
+          std::set<std::string> supported_qnn_context_priority = {"low", "normal", "normal_high", "high"};
+          if (supported_qnn_context_priority.find(value) == supported_qnn_context_priority.end()) {
+            ORT_THROW("Supported qnn_context_priority: low, normal, normal_high, high");
+          }
+        } else if (key == "qnn_saver_path") {
+          // no validation
+        } else if (key == "htp_graph_finalization_optimization_mode") {
+          std::unordered_set<std::string> supported_htp_graph_final_opt_modes = {"0", "1", "2", "3"};
+          if (supported_htp_graph_final_opt_modes.find(value) == supported_htp_graph_final_opt_modes.end()) {
+            std::ostringstream str_stream;
+            std::copy(supported_htp_graph_final_opt_modes.begin(), supported_htp_graph_final_opt_modes.end(),
+                      std::ostream_iterator<std::string>(str_stream, ","));
+            std::string str = str_stream.str();
+            ORT_THROW("Wrong value for htp_graph_finalization_optimization_mode. select from: " + str);
+          }
         } else {
-          ORT_THROW(R"(Wrong key type entered. Choose from options: ['backend_path', 'qnn_context_cache_enable', 
-'qnn_context_cache_path', 'profiling_level', 'rpc_control_latency', 'htp_performance_mode'])");
+          ORT_THROW(R"(Wrong key type entered. Choose from options: ['backend_path', 'qnn_context_cache_enable',
+'qnn_context_cache_path', 'profiling_level', 'rpc_control_latency', 'htp_performance_mode', 'qnn_saver_path',
+'htp_graph_finalization_optimization_mode', 'qnn_context_priority'])");
         }
 
         qnn_options[key] = value;
@@ -750,6 +779,7 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
         ORT_TSTR("sce_none_weights_expanded")};
 
     std::unordered_set<std::basic_string<ORTCHAR_T>> all_disabled_tests(std::begin(immutable_broken_tests), std::end(immutable_broken_tests));
+
     if (enable_cuda) {
       all_disabled_tests.insert(std::begin(cuda_flaky_tests), std::end(cuda_flaky_tests));
     }
@@ -772,10 +802,14 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
     all_disabled_tests.insert(std::begin(x86_disabled_tests), std::end(x86_disabled_tests));
 #endif
 
+    auto broken_tests = GetBrokenTests(provider_name);
+    auto broken_tests_keyword_set = GetBrokenTestsKeyWordSet(provider_name);
     std::vector<ITestCase*> tests;
     LoadTests(data_dirs, whitelisted_test_cases,
               LoadTestTolerances(enable_cuda, enable_openvino, override_tolerance, atol, rtol),
               all_disabled_tests,
+              std::move(broken_tests),
+              std::move(broken_tests_keyword_set),
               [&owned_tests, &tests](std::unique_ptr<ITestCase> l) {
                 tests.push_back(l.get());
                 owned_tests.push_back(std::move(l));
@@ -792,506 +826,10 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
     fwrite(res.c_str(), 1, res.size(), stdout);
   }
 
-  struct BrokenTest {
-    std::string test_name_;
-    std::string reason_;
-    std::set<std::string> broken_versions_ = {};  // apply to all versions if empty
-    BrokenTest(std::string name, std::string reason) : test_name_(std::move(name)), reason_(std::move(reason)) {}
-    BrokenTest(std::string name, std::string reason, const std::initializer_list<std::string>& versions) : test_name_(std::move(name)), reason_(std::move(reason)), broken_versions_(versions) {}
-    bool operator<(const struct BrokenTest& test) const {
-      return strcmp(test_name_.c_str(), test.test_name_.c_str()) < 0;
-    }
-  };
-
-  std::set<BrokenTest> broken_tests = {
-    {"BERT_Squad", "test data bug"},
-    {"constantofshape_float_ones", "test data bug", {"onnx141", "onnx150"}},
-    {"constantofshape_int_zeros", "test data bug", {"onnx141", "onnx150"}},
-    {"convtranspose_autopad_same", "Test data has been corrected in ONNX 1.10.", {"onnx180", "onnx181", "onnx190"}},
-    {"cast_STRING_to_FLOAT", "Linux CI has old ONNX python package with bad test data", {"onnx141"}},
-    // Numpy float to string has unexpected rounding for some results given numpy default precision is meant to be 8.
-    // "e.g. 0.296140194 -> '0.2961402' not '0.29614019'. ORT produces the latter with precision set to 8,
-    // which doesn't match the expected output that was generated with numpy.
-    {"cast_FLOAT_to_STRING", "Numpy float to string has unexpected rounding for some results."},
-    {"cntk_simple_seg", "Bad onnx test output caused by wrong SAME_UPPER/SAME_LOWER for ConvTranspose", {}},
-    {"tf_nasnet_large", "disable temporarily"},
-    {"tf_nasnet_mobile", "disable temporarily"},
-    {"tf_pnasnet_large", "disable temporarily"},
-    {"shrink", "test case is wrong", {"onnx141"}},
-    {"maxpool_with_argmax_2d_precomputed_strides", "ShapeInferenceError"},
-    {"tf_inception_v2", "result mismatch"},
-    {"tf_resnet_v1_50", "result mismatch when Conv BN Fusion is applied"},
-    {"tf_resnet_v1_101", "result mismatch when Conv BN Fusion is applied"},
-    {"tf_resnet_v1_152", "result mismatch when Conv BN Fusion is applied"},
-    {"mxnet_arcface", "Model is an invalid ONNX model"},
-    {"unique_not_sorted_without_axis", "Expected data for 'Y' is incorrect and in sorted order."},
-    {"cumsum_1d_reverse_exclusive", "only failing linux GPU CI. Likely build error."},
-    {"resize_downsample_scales_cubic_align_corners", "results mismatch with onnx tests"},
-    {"resize_downsample_scales_linear_align_corners", "results mismatch with onnx tests"},
-    {"resize_tf_crop_and_resize", "Bad onnx test output. Needs test fix."},
-    {"resize_upsample_sizes_nearest_ceil_half_pixel", "Bad onnx test output. Needs test fix."},
-    {"resize_upsample_sizes_nearest_floor_align_corners", "Bad onnx test output. Needs test fix."},
-    {"resize_upsample_sizes_nearest_round_prefer_ceil_asymmetric", "Bad onnx test output. Needs test fix."},
-    {"bitshift_right_uint16", "BitShift(11) uint16 support not enabled currently"},
-    {"bitshift_left_uint16", "BitShift(11) uint16 support not enabled currently"},
-    {"maxunpool_export_with_output_shape", "Invalid output in ONNX test. See https://github.com/onnx/onnx/issues/2398"},
-    {"training_dropout", "result differs", {}},                       // Temporary, subsequent PR will remove this.
-    {"training_dropout_default", "result differs", {}},               // Temporary, subsequent PR will remove this.
-    {"training_dropout_default_mask", "result differs", {}},          // Temporary, subsequent PR will remove this.
-    {"training_dropout_mask", "result differs", {}},                  // Temporary, subsequent PR will remove this.
-    {"adagrad", "not a registered function/op", {}},                  // Op not registered.
-    {"adagrad_multiple", "not a registered function/op", {}},         // Op not registered.
-    {"adam", "not a registered function/op", {}},                     // Op not registered.
-    {"adam_multiple", "not a registered function/op", {}},            // Op not registered.
-    {"gradient_of_add", "not a registered function/op", {}},          // Op not registered.
-    {"gradient_of_add_and_mul", "not a registered function/op", {}},  // Op not registered.
-    {"momentum", "not a registered function/op", {}},                 // Op not registered.
-    {"momentum_multiple", "not a registered function/op", {}},        // Op not registered.
-    {"nesterov_momentum", "not a registered function/op", {}},        // Op not registered.
-    {"sequence_insert_at_back", "onnx currently not supporting loading segment", {}},
-    {"sequence_insert_at_front", "onnx currently not supporting loading segment", {}},
-    {"loop13_seq", "ORT api does not currently support creating empty sequences (needed for this test)", {}},
-    {"cast_FLOAT_to_BFLOAT16", "onnx generate bfloat tensor as uint16 type", {}},
-    {"cast_BFLOAT16_to_FLOAT", "onnx generate bfloat tensor as uint16 type", {}},
-    {"castlike_FLOAT_to_BFLOAT16", "Depends on cast.", {}},
-    {"castlike_BFLOAT16_to_FLOAT", "Depends on cast", {}},
-    {"castlike_FLOAT_to_BFLOAT16_expanded", "Depends on cast.", {}},
-    {"castlike_BFLOAT16_to_FLOAT_expanded", "Depends on cast", {}},
-    {"castlike_FLOAT_to_STRING", "Numpy float to string has unexpected rounding for some results.", {}},
-    {"castlike_FLOAT_to_STRING_expanded", "Numpy float to string has unexpected rounding for some results.", {}},
-    {"bernoulli", "By design. Test data is for informational purpose because the generator is non deterministic."},
-    {"bernoulli_double", "By design. Test data is for informational purpose because the generator is non deterministic."},
-    {"bernoulli_double_expanded", "By design. Test data is for informational purpose because the generator is non deterministic."},
-    {"bernoulli_seed", "By design. Test data is for informational purpose because the generator is non deterministic."},
-    {"bernoulli_seed_expanded", "By design. Test data is for informational purpose because the generator is non deterministic."},
-    {"bernoulli_expanded", "By design. Test data is for informational purpose because the generator is non deterministic."},
-    {"test_roialign_aligned_true", "Opset 16 not supported yet."},
-    {"test_roialign_aligned_false", "Opset 16 not supported yet."},
-    {"test_roialign_mode_max", "Onnx roialign mode expected output is incorrect."},
-    {"test_scatternd_add", "Opset 16 not supported yet."},
-    {"test_scatternd_multiply", "Opset 16 not supported yet."},
-    {"test_scatter_elements_with_duplicate_indices", "Opset 16 not supported yet."},
-    {"col2im_pads", "onnx 18 test data error."},
-
-#if defined(DISABLE_OPTIONAL_TYPE)
-    {"test_optional_get_element", "Optional type not supported in this build flavor."},
-    {"test_optional_get_element_sequence", "Optional type not supported in this build flavor."},
-    {"test_optional_has_element", "Optional type not supported in this build flavor."},
-    {"test_optional_has_element_empty", "Optional type not supported in this build flavor."},
-    {"test_if_opt", "Optional type not supported in this build flavor."},
-    {"test_loop16_seq_none", "Optional type not supported in this build flavor."},
-    {"test_identity_opt", "Optional type not supported in this build flavor."},
-#endif
-
-  };
-
-#ifdef DISABLE_ML_OPS
-  auto starts_with = [](const std::string& find_in, const std::string& find_what) {
-    return find_in.compare(0, find_what.size(), find_what) == 0;
-  };
-  for (const auto& test_ptr : owned_tests) {
-    const std::string& test_name = test_ptr->GetTestCaseName();
-    if (starts_with(test_name, "XGBoost_") ||
-        starts_with(test_name, "coreml_") ||
-        starts_with(test_name, "scikit_") ||
-        starts_with(test_name, "libsvm_")) {
-      broken_tests.insert({test_name, "Traditional ML ops are disabled in this build."});
-    }
-  }
-#endif
-
-  if (enable_openvino) {
-    broken_tests.insert({"operator_permute2", "Disabled temporariliy"});
-    broken_tests.insert({"operator_repeat", "Disabled temporariliy"});
-    broken_tests.insert({"operator_repeat_dim_overflow", "Disabled temporariliy"});
-    broken_tests.insert({"mlperf_ssd_resnet34_1200", "Disabled temporariliy"});
-    broken_tests.insert({"candy", "Results mismatch: 1 of 150528"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob_expanded", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob_expanded", "OpenVino does not support 5D+ tensors"});
-  }
-
-  if (enable_dnnl) {
-    broken_tests.insert({"tf_mobilenet_v2_1.0_224", "result mismatch"});
-    broken_tests.insert({"tf_mobilenet_v2_1.4_224", "result mismatch"});
-    broken_tests.insert({"tf_mobilenet_v1_1.0_224", "result mismatch"});
-    broken_tests.insert({"mobilenetv2-1.0", "result mismatch"});
-    broken_tests.insert({"candy", "result mismatch"});
-    broken_tests.insert({"range_float_type_positive_delta_expanded", "get unknown exception from DNNL EP"});
-    broken_tests.insert({"range_int32_type_negative_delta_expanded", "get unknown exception from DNNL EP"});
-    broken_tests.insert({"averagepool_2d_ceil", "maxpool ceiling not supported"});
-    broken_tests.insert({"maxpool_2d_ceil", "maxpool ceiling not supported"});
-    broken_tests.insert({"maxpool_2d_dilations", "maxpool dilations not supported"});
-    broken_tests.insert({"mlperf_ssd_resnet34_1200", "test pass on dev box but fails on CI build"});
-    broken_tests.insert({"convtranspose_1d", "1d convtranspose not supported yet"});
-    broken_tests.insert({"convtranspose_3d", "3d convtranspose not supported yet"});
-    broken_tests.insert({"maxpool_2d_uint8", "Does not work on DNNL, NNAPI"});
-  }
-
-  if (enable_nnapi) {
-    broken_tests.insert({"scan9_sum", "Error with the extra graph"});
-    broken_tests.insert({"scan_sum", "Error with the extra graph"});
-    broken_tests.insert({"mvn_expanded", "Failed to find kernel for MemcpyFromHost(1) (node Memcpy_1)"});
-    broken_tests.insert({"dynamicquantizelinear_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"dynamicquantizelinear_max_adjusted_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"dynamicquantizelinear_min_adjusted_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"gemm_transposeB", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"range_float_type_positive_delta_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"range_int32_type_negative_delta_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"convtranspose_1d", "1d convtranspose not supported yet"});
-    broken_tests.insert({"convtranspose_3d", "3d convtranspose not supported yet"});
-    broken_tests.insert({"maxpool_2d_uint8", "result mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NC_expanded", "shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_expanded", "shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_mean_expanded", "shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_sum_expanded", "shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_expanded", "shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_mean_expanded", "shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum_expanded", "shape mismatch"});
-    // Disable based on George Wu's recommendation.
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum_ignore_index_expanded", "shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_iinput_shape_is_NCd1_weight_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_iinput_shape_is_NCd1_weight_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NC", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_mean_weight_negative_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_mean_weight_negative_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_weight", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_weight_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_no_weight_reduction_mean_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_no_weight_reduction_mean_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_mean", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_sum", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_mean", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_3d", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_3d_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_3d_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_3d_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_3d", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_3d_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_3d_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_3d_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_4d", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_4d_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_4d_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_4d_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_3d", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_3d_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_3d_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_3d_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_4d", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_4d_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_4d_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_4d_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_weights", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_weights_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_weights_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_weights_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_sum", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_sum_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_sum_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_sum_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"nllloss_NCd1_ignore_index", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_ignore_index_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_mean_weight_negative_ignore_index", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_mean_weight_negative_ignore_index_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_weight", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_weight_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_weight_ignore_index", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_weight_ignore_index_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1d2_no_weight_reduction_mean_ignore_index", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1d2_no_weight_reduction_mean_ignore_index_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1d2_with_weight_reduction_mean", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1d2_with_weight_reduction_mean_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1d2d3d4d5_mean_weight", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1d2d3d4d5_mean_weight_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_ii", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_ii_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_mean_weight_negative_ii", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_mean_weight_negative_ii_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_weight_ii", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_weight_ii_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1d2_no_weight_reduction_mean_ii", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1d2_no_weight_reduction_mean_ii_expanded", "wait for investigation"});
-  }
-
-  if (enable_tensorrt) {
-    broken_tests.insert({"fp16_shufflenet", "TRT EP bug"});
-    broken_tests.insert({"fp16_inception_v1", "TRT EP bug"});
-    broken_tests.insert({"fp16_tiny_yolov2", "TRT EP bug"});
-    broken_tests.insert({"tf_inception_v3", "TRT Engine couldn't be created"});
-    broken_tests.insert({"tf_mobilenet_v1_1.0_224", "TRT Engine couldn't be created"});
-    broken_tests.insert({"tf_mobilenet_v2_1.0_224", "TRT Engine couldn't be created"});
-    broken_tests.insert({"tf_mobilenet_v2_1.4_224", "TRT Engine couldn't be created"});
-    broken_tests.insert({"tf_resnet_v1_101", "TRT Engine couldn't be created"});
-    broken_tests.insert({"tf_resnet_v1_152", "TRT Engine couldn't be created"});
-    broken_tests.insert({"tf_resnet_v1_50", "TRT Engine couldn't be created"});
-    broken_tests.insert({"tf_resnet_v2_101", "TRT Engine couldn't be created"});
-    broken_tests.insert({"tf_resnet_v2_152", "TRT Engine couldn't be created"});
-    broken_tests.insert({"tf_resnet_v2_50", "TRT Engine couldn't be created"});
-    broken_tests.insert({"convtranspose_1d", "1d convtranspose not supported yet"});
-    broken_tests.insert({"convtranspose_3d", "3d convtranspose not supported yet"});
-  }
-
-  if (enable_cuda) {
-    broken_tests.insert({"candy", "result mismatch"});
-    broken_tests.insert({"tinyyolov3", "The parameter is incorrect"});
-    broken_tests.insert({"mlperf_ssd_mobilenet_300", "unknown error"});
-    broken_tests.insert({"mlperf_ssd_resnet34_1200", "unknown error"});
-    broken_tests.insert({"tf_inception_v1", "flaky test"});  // TODO: Investigate cause for flakiness
-    broken_tests.insert({"faster_rcnn", "Linux: faster_rcnn:output=6383:shape mismatch, expect {77} got {57}"});
-    broken_tests.insert({"split_zero_size_splits", "alloc failed"});
-  }
-
-  if (enable_dml) {
-    broken_tests.insert({"tinyyolov3", "The parameter is incorrect"});
-    broken_tests.insert({"PixelShuffle", "Test requires 6D Reshape, which isn't supported by DirectML"});
-    broken_tests.insert({"operator_permute2", "Test requires 6D Transpose, which isn't supported by DirectML"});
-    broken_tests.insert({"resize_downsample_linear", "ORT 0.4 uses asymmetric but will conform to half_pixel in the next ONNX version."});
-    broken_tests.insert({"resize_upsample_linear", "ORT 0.4 uses asymmetric but will conform to half_pixel in the next ONNX version."});
-    broken_tests.insert({"resize_upsample_linear", "ORT 0.4 uses asymmetric but will conform to half_pixel in the next ONNX version."});
-
-    // These tests are temporarily disabled pending investigation
-    broken_tests.insert({"dynamicquantizelinear", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"dynamicquantizelinear_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"dynamicquantizelinear_max_adjusted_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"dynamicquantizelinear_min_adjusted_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"mxnet_arcface", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"yolov3", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"tf_inception_v2", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"fp16_inception_v1", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"candy", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"BERT_Squad", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"LSTM_Seq_lens_unpacked", "The parameter is incorrect"});
-
-    broken_tests.insert({"resize_downsample_scales_linear", "DML uses half_pixel and this test assumed \"asymmetric\" but does not include \"mode\""});
-    broken_tests.insert({"resize_downsample_sizes_linear_pytorch_half_pixel", "DML does not support downsampling by such a large factor - skips input pixels"});
-    broken_tests.insert({"resize_downsample_sizes_nearest", "DML uses pixel centers for nearest, rounding 1 value off for the middle column"});
-    broken_tests.insert({"resize_upsample_sizes_nearest", "DML uses pixel centers for nearest, which makes more sense (the 3rd row mismatches)"});
-    broken_tests.insert({"unsqueeze_three_axes", "DML does not support 6D tensors"});
-    broken_tests.insert({"unsqueeze_unsorted_axes", "DMLdoes not support 6D tensors"});
-
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob_expanded", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob_expanded", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob_expanded", "DML does not support 5D+ tensors"});
-
-    // TODO: Remove identity tests when fixed #42638109
-    broken_tests.insert({"identity_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_add_1_sequence_1_tensor_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_add_1_sequence_1_tensor_expanded_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_add_2_sequences_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_add_2_sequences_expanded_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_extract_shapes_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_extract_shapes_expanded_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_identity_1_sequence_1_tensor_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_identity_1_sequence_1_tensor_expanded_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_identity_1_sequence_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_identity_1_sequence_expanded_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_identity_2_sequences_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_identity_2_sequences_expanded_cpu", "Optional type not yet supported for identity-16."});
-  }
-  if (enable_qnn) {
-    broken_tests.insert({"gemm_default_no_bias", "result differs"});
-    broken_tests.insert({"resize_downsample_scales_linear", "result differs"});
-    broken_tests.insert({"resize_downsample_scales_linear_antialias", "result differs"});
-    broken_tests.insert({"resize_downsample_sizes_linear_antialias", "result differs"});
-    broken_tests.insert({"sce_NCd1_mean_weight_negative_ii", "result differs"});
-    broken_tests.insert({"sce_NCd1_mean_weight_negative_ii_expanded", "result differs"});
-    broken_tests.insert({"sce_NCd1_mean_weight_negative_ii_log_prob", "result differs"});
-    broken_tests.insert({"sce_NCd1_mean_weight_negative_ii_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_mean", "result differs"});
-    broken_tests.insert({"sce_mean_3d", "result differs"});
-    broken_tests.insert({"sce_mean_3d_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_3d_log_prob", "result differs"});
-    broken_tests.insert({"sce_mean_3d_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_log_prob", "result differs"});
-    broken_tests.insert({"sce_mean_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_3d", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_3d_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_3d_log_prob", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_3d_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_4d", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_4d_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_4d_log_prob", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_4d_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_log_prob", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_weight", "result differs"});
-    broken_tests.insert({"sce_mean_weight_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_3d", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_3d_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_3d_log_prob", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_3d_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_4d", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_4d_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_4d_log_prob", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_4d_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_log_prob", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_weight_log_prob", "result differs"});
-    broken_tests.insert({"sce_mean_weight_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_none", "result differs"});
-    broken_tests.insert({"sce_none_expanded", "result differs"});
-    broken_tests.insert({"sce_none_log_prob", "result differs"});
-    broken_tests.insert({"sce_none_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_sum", "result differs"});
-    broken_tests.insert({"sce_sum_expanded", "result differs"});
-    broken_tests.insert({"sce_sum_log_prob", "result differs"});
-    broken_tests.insert({"sce_sum_log_prob_expanded", "result differs"});
-    broken_tests.insert({"gridsample_reflection_padding", "result differs"});
-    broken_tests.insert({"spacetodepth", "result differs"});
-  }
-#if defined(_WIN32) && !defined(_WIN64)
-  broken_tests.insert({"vgg19", "failed: bad allocation"});
-#endif
-
-  // Disable mask_rcnn_keras as this model currently has an invalid contrib op version set to 10
-  broken_tests.insert({"mask_rcnn_keras", "This model uses contrib ops."});
-
-#ifdef DISABLE_CONTRIB_OPS
-  broken_tests.insert({"coreml_SqueezeNet_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Permute_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_ReLU_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Padding-Upsampling-Normalizer_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"tiny_yolov2", "This model uses contrib ops."});
-  broken_tests.insert({"fp16_tiny_yolov2", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Pooling_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Padding_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Normalizer_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_linear_sklearn_load_breast_cancer", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_linear_ImageNet_small", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_linear_ImageNet_large", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_linear_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_leakyrelu_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_hard_sigmoid_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_elu_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Dense_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Conv2D_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"coreml_VGG16_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"coreml_Resnet50_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"coreml_Inceptionv3_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"coreml_FNS-Candy_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"coreml_AgeNet_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_thresholdedrelu_ImageNet_large", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_thresholdedrelu_ImageNet_small", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_thresholdedrelu_sklearn_load_breast_cancer", "This model uses contrib ops."});
-  broken_tests.insert({"thresholdedrelu", "This model uses contrib ops."});
-  broken_tests.insert({"thresholdedrelu_default", "This model uses contrib ops."});
-  broken_tests.insert({"dynamic_slice_default_axes", "This model uses contrib ops."});
-  broken_tests.insert({"thresholdedrelu_example", "This model uses contrib ops."});
-  broken_tests.insert({"dynamic_slice_neg failed", "This model uses contrib ops."});
-  broken_tests.insert({"dynamic_slice_start_out_of_bounds", "This model uses contrib ops."});
-  broken_tests.insert({"dynamic_slice", "This model uses contrib ops."});
-  broken_tests.insert({"dynamic_slice_end_out_of_bounds", "This model uses contrib ops."});
-  broken_tests.insert({"dynamic_slice_neg", "This model uses contrib ops."});
-  broken_tests.insert({"mvn", "This model uses contrib ops.", {"onnx130"}});
-  broken_tests.insert({"cdist_float32_euclidean_1000_2000_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float32_euclidean_1000_2000_500", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float32_euclidean_1_1_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float32_sqeuclidean_1000_2000_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float32_sqeuclidean_1000_2000_500", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float32_sqeuclidean_1_1_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float64_euclidean_1000_2000_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float64_euclidean_1000_2000_500", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float64_euclidean_1_1_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float64_sqeuclidean_1000_2000_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float64_sqeuclidean_1000_2000_500", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float64_sqeuclidean_1_1_1", "This model uses contrib ops."});
-#endif
-
   int result = 0;
   for (const auto& p : stat.GetFailedTest()) {
-    BrokenTest t = {p.first, ""};
-    auto iter = broken_tests.find(t);
-    if (iter == broken_tests.end() || (p.second != TestModelInfo::unknown_version && !iter->broken_versions_.empty() &&
-                                       iter->broken_versions_.find(p.second) == iter->broken_versions_.end())) {
-      fprintf(stderr, "test %s failed, please fix it\n", p.first.c_str());
-      result = -1;
-    }
+    fprintf(stderr, "test %s failed, please fix it\n", p.first.c_str());
+    result = -1;
   }
   return result;
 }
diff --git a/onnxruntime/test/onnx/microbenchmark/reduceminmax.cc b/onnxruntime/test/onnx/microbenchmark/reduceminmax.cc
index bd2abadf49b81..d866045ba4962 100644
--- a/onnxruntime/test/onnx/microbenchmark/reduceminmax.cc
+++ b/onnxruntime/test/onnx/microbenchmark/reduceminmax.cc
@@ -91,6 +91,8 @@ BENCHMARK(BM_FindMinMaxMlasSSE2)
     ->Arg(98304)
     ->Arg(160000);
 
+#ifdef MLAS_TARGET_AMD64
+
 // MLAS avx implementation
 static void BM_FindMinMaxMlasAvx(benchmark::State& state) {
   const size_t batch_size = static_cast<size_t>(state.range(0));
@@ -115,3 +117,5 @@ BENCHMARK(BM_FindMinMaxMlasAvx)
     ->Arg(80000)
     ->Arg(98304)
     ->Arg(160000);
+
+#endif  // MLAS_TARGET_AMD64
diff --git a/onnxruntime/test/optimizer/compute_optimizer_test.cc b/onnxruntime/test/optimizer/compute_optimizer_test.cc
index a03d0da2538d4..9dcedd1fd7681 100644
--- a/onnxruntime/test/optimizer/compute_optimizer_test.cc
+++ b/onnxruntime/test/optimizer/compute_optimizer_test.cc
@@ -847,7 +847,7 @@ Test graph includes multiple equivalent subgraphs as below.
 Add an Identity node because currently, we don't allow Gather generates graph output.
 */
 TEST(ComputeOptimizerTests, GatherLayerNormalization) {
-  std::vector<std::tuple<int, int64_t, int64_t, bool>> test_config_pairs{
+  std::vector<std::tuple<bool, int64_t, int64_t, bool>> test_config_pairs{
       // {
       //  is_scalar_slice,
       //  ln_axis_before_propagation,
@@ -929,13 +929,6 @@ TEST(ComputeOptimizerTests, GatherLayerNormalization) {
             const ONNX_NAMESPACE::TensorShapeProto* slice_out_shape = producer_node->OutputDefs()[0]->Shape();
             TEST_RETURN_IF_NOT(slice_out_shape != nullptr);
 
-            auto& attrs = node.GetAttributes();
-            TEST_RETURN_IF_NOT(attrs.find("axis") != attrs.end());
-
-            auto& axis_attr = attrs.at("axis");
-            auto axis_value = (int)axis_attr.i();
-            TEST_RETURN_IF_NOT(axis_value == ln_axis_after);
-
             if (is_scalar_slice) {
               TEST_RETURN_IF_NOT(slice_out_shape->dim_size() == 2);
               TEST_RETURN_IF_NOT(utils::HasDimValue(slice_out_shape->dim(0)) &&
@@ -951,10 +944,15 @@ TEST(ComputeOptimizerTests, GatherLayerNormalization) {
               TEST_RETURN_IF_NOT(utils::HasDimValue(slice_out_shape->dim(2)) &&
                                  slice_out_shape->dim(2).dim_value() == 256);
             }
-
           } else {
             TEST_RETURN_IF_NOT(producer_node == nullptr);
           }
+          auto& attrs = node.GetAttributes();
+          TEST_RETURN_IF_NOT(attrs.find("axis") != attrs.end());
+
+          auto& axis_attr = attrs.at("axis");
+          auto axis_value = (int)axis_attr.i();
+          TEST_RETURN_IF_NOT(axis_value == ln_axis_after);
         }
       }
 
@@ -2841,165 +2839,110 @@ Test graph include multiple equivalent subgraphs as below.
 
 Add an Identity node because currently we don't allow Reshape generate graph output.
 */
-TEST(ComputeOptimizerTests, ReshapeLayerNormalization_PropagationOnOneBranch) {
-  const logging::Logger* logger = &logging::LoggingManager::DefaultLogger();
-  auto pre_graph_checker = [](Graph& graph) -> Status {
-    auto op_count_pre = CountOpsInGraph(graph);
-    TEST_RETURN_IF_NOT(op_count_pre.size() == 3U);
-    TEST_RETURN_IF_NOT(op_count_pre["LayerNormalization"] == 1);
-    TEST_RETURN_IF_NOT(op_count_pre["Reshape"] == 1);
-    TEST_RETURN_IF_NOT(op_count_pre["Identity"] == 1);
-    return Status::OK();
-  };
-
-  auto post_graph_checker = [](Graph& graph) {
-    auto op_count_post = CountOpsInGraph(graph);
-    TEST_RETURN_IF_NOT(op_count_post.size() == 3U);
-    TEST_RETURN_IF_NOT(op_count_post["LayerNormalization"] == 1);
-    TEST_RETURN_IF_NOT(op_count_post["Reshape"] == 1);
-    TEST_RETURN_IF_NOT(op_count_post["Identity"] == 1);
-
-    for (Node& node : graph.Nodes()) {
-      if (node.OpType() == "LayerNormalization") {
-        const auto& input_defs = node.InputDefs();
-
-        {
-          auto producer_node = graph.GetProducerNode(input_defs[0]->Name());
-          TEST_RETURN_IF_NOT(producer_node != nullptr);
-          TEST_RETURN_IF_NOT(producer_node->OpType() == "Reshape");
-
-          InlinedVector<int64_t> values;
-          constexpr bool require_constant = true;
-          NodeArg* initializer_node_arg = graph.GetNodeArg(producer_node->InputDefs()[1]->Name());
-          TEST_RETURN_IF_NOT(optimizer_utils::AppendTensorFromInitializer(graph, *initializer_node_arg, values, require_constant));
-          TEST_RETURN_IF_NOT(values.size() == 2);
-          TEST_RETURN_IF_NOT(values[0] == -1);
-          TEST_RETURN_IF_NOT(values[1] == 1024);
-        }
-
-        {
-          auto producer_node = graph.GetProducerNode(input_defs[1]->Name());
-          TEST_RETURN_IF_NOT(producer_node == nullptr);
-        }
-
-        {
-          auto producer_node = graph.GetProducerNode(input_defs[2]->Name());
-          TEST_RETURN_IF_NOT(producer_node == nullptr);
-        }
-      }
-    }
-    return Status::OK();
+TEST(ComputeOptimizerTests, ReshapeLayerNormalization) {
+  std::vector<std::tuple<int64_t, int64_t, bool>> test_config_pairs{
+      // {
+      //  ln_axis_before_propagation,
+      //  expected_ln_axis_after_propagation,
+      //  expected to propagate
+      // }
+      {0, 0, false},
+      {1, 1, false},
+      {2, 1, true},
+      {-3, -3, false},
+      {-2, -2, false},
+      {-1, -1, true},
   };
 
-  std::vector<int> fist_dim_values = {-1, 128};
-  for (auto first_dim_value : fist_dim_values) {
-    auto build_test_case = [&first_dim_value](ModelTestBuilder& builder) {
-      auto* input1_arg = builder.MakeInput<float>({{4, 32, 1024}});
-      auto* input2_arg = builder.MakeInput<float>({{1024}});
-      auto* input3_arg = builder.MakeInput<float>({{1024}});
-      auto* ln_out = builder.MakeIntermediate();
-      builder.AddNode("LayerNormalization", {input1_arg, input2_arg, input3_arg}, {ln_out})
-          .AddAttribute("axis", static_cast<int64_t>(-1));
-
-      auto* shape_initializer = builder.MakeInitializer<int64_t>({2}, {first_dim_value, 1024});
-      auto* reshape_out = builder.MakeIntermediate();
-      builder.AddNode("Reshape", {ln_out, shape_initializer}, {reshape_out});
+  for (auto p : test_config_pairs) {
+    int64_t ln_axis_before = std::get<0>(p);
+    int64_t ln_axis_after = std::get<1>(p);
+    bool expected_to_propagate = std::get<2>(p);
 
-      auto* identity_out = builder.MakeOutput();
-      builder.AddNode("Identity", {reshape_out}, {identity_out});
+    const logging::Logger* logger = &logging::LoggingManager::DefaultLogger();
+    auto pre_graph_checker = [](Graph& graph) -> Status {
+      auto op_count_pre = CountOpsInGraph(graph);
+      TEST_RETURN_IF_NOT(op_count_pre.size() == 3U);
+      TEST_RETURN_IF_NOT(op_count_pre["LayerNormalization"] == 1);
+      TEST_RETURN_IF_NOT(op_count_pre["Reshape"] == 1);
+      TEST_RETURN_IF_NOT(op_count_pre["Identity"] == 1);
+      return Status::OK();
     };
 
-    const std::vector<int> opsets{12, 13, 14};
-    for (auto& opset_version : opsets) {
-      std::unique_ptr<GraphTransformer> transformer = std::make_unique<UpStreamReshapeGraphTransformer>();
-      ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, opset_version, *logger, std::move(transformer),
-                                            TransformerLevel::Level1,
-                                            1, pre_graph_checker, post_graph_checker));
-    }
-  }
-}
+    auto post_graph_checker = [ln_axis_after, expected_to_propagate](Graph& graph) {
+      auto op_count_post = CountOpsInGraph(graph);
+      TEST_RETURN_IF_NOT(op_count_post.size() == 3U);
+      TEST_RETURN_IF_NOT(op_count_post["LayerNormalization"] == 1);
+      TEST_RETURN_IF_NOT(op_count_post["Reshape"] == 1);
+      TEST_RETURN_IF_NOT(op_count_post["Identity"] == 1);
 
-/*
-Test graph include multiple equivalent subgraphs as below.
-           graph input [4, 32, 1024] (float)       graph input [1024] (float)     graph input [1024] (float)
-                            |                         |                             /
-                             \_____________   _______/  __________________________/
-                                           \ /         /
-                                    LayerNormalization
-                                            |
-                                         Reshape
-                                            |
-                                         Identity
-                                            |
-                                    graph out [128, 1024] (float)
+      for (Node& node : graph.Nodes()) {
+        if (node.OpType() == "LayerNormalization") {
+          const auto& input_defs = node.InputDefs();
 
-Add an Identity node because currently we don't allow Reshape generate graph output.
-*/
-TEST(ComputeOptimizerTests, ReshapeLayerNormalization_NoPropagation) {
-  const logging::Logger* logger = &logging::LoggingManager::DefaultLogger();
-  auto pre_graph_checker = [](Graph& graph) -> Status {
-    auto op_count_pre = CountOpsInGraph(graph);
-    TEST_RETURN_IF_NOT(op_count_pre.size() == 3U);
-    TEST_RETURN_IF_NOT(op_count_pre["LayerNormalization"] == 1);
-    TEST_RETURN_IF_NOT(op_count_pre["Reshape"] == 1);
-    TEST_RETURN_IF_NOT(op_count_pre["Identity"] == 1);
-    return Status::OK();
-  };
+          if (expected_to_propagate) {
+            auto producer_node = graph.GetProducerNode(input_defs[0]->Name());
+            TEST_RETURN_IF_NOT(producer_node != nullptr);
+            TEST_RETURN_IF_NOT(producer_node->OpType() == "Reshape");
 
-  auto post_graph_checker = [](Graph& graph) {
-    auto op_count_post = CountOpsInGraph(graph);
-    TEST_RETURN_IF_NOT(op_count_post.size() == 3U);
-    TEST_RETURN_IF_NOT(op_count_post["LayerNormalization"] == 1);
-    TEST_RETURN_IF_NOT(op_count_post["Reshape"] == 1);
-    TEST_RETURN_IF_NOT(op_count_post["Identity"] == 1);
+            InlinedVector<int64_t> values;
+            constexpr bool require_constant = true;
+            NodeArg* initializer_node_arg = graph.GetNodeArg(producer_node->InputDefs()[1]->Name());
+            TEST_RETURN_IF_NOT(optimizer_utils::AppendTensorFromInitializer(graph, *initializer_node_arg, values, require_constant));
+            TEST_RETURN_IF_NOT(values.size() == 2);
+            TEST_RETURN_IF_NOT(values[0] == -1);
+            TEST_RETURN_IF_NOT(values[1] == 1024);
+          } else {
+            auto producer_node = graph.GetProducerNode(input_defs[0]->Name());
+            TEST_RETURN_IF_NOT(producer_node == nullptr);
+          }
 
-    for (Node& node : graph.Nodes()) {
-      if (node.OpType() == "LayerNormalization") {
-        const auto& input_defs = node.InputDefs();
+          {
+            auto producer_node = graph.GetProducerNode(input_defs[1]->Name());
+            TEST_RETURN_IF_NOT(producer_node == nullptr);
+          }
 
-        {
-          auto producer_node = graph.GetProducerNode(input_defs[0]->Name());
-          TEST_RETURN_IF_NOT(producer_node == nullptr);
-        }
+          {
+            auto producer_node = graph.GetProducerNode(input_defs[2]->Name());
+            TEST_RETURN_IF_NOT(producer_node == nullptr);
+          }
 
-        {
-          auto producer_node = graph.GetProducerNode(input_defs[1]->Name());
-          TEST_RETURN_IF_NOT(producer_node == nullptr);
-        }
+          auto& attrs = node.GetAttributes();
+          TEST_RETURN_IF_NOT(attrs.find("axis") != attrs.end());
 
-        {
-          auto producer_node = graph.GetProducerNode(input_defs[2]->Name());
-          TEST_RETURN_IF_NOT(producer_node == nullptr);
+          auto& axis_attr = attrs.at("axis");
+          auto axis_value = (int)axis_attr.i();
+          TEST_RETURN_IF_NOT(axis_value == ln_axis_after);
         }
       }
-    }
-    return Status::OK();
-  };
-
-  std::vector<int> fist_dim_values = {-1, 128};
-  for (auto first_dim_value : fist_dim_values) {
-    auto build_test_case = [&first_dim_value](ModelTestBuilder& builder) {
-      auto* input1_arg = builder.MakeInput<float>({{4, 32, 1024}});
-      auto* input2_arg = builder.MakeInput<float>({{1024}});
-      auto* input3_arg = builder.MakeInput<float>({{1024}});
-      auto* ln_out = builder.MakeIntermediate();
-      builder.AddNode("LayerNormalization", {input1_arg, input2_arg, input3_arg}, {ln_out})
-          .AddAttribute("axis", static_cast<int64_t>(1));
-
-      auto* shape_initializer = builder.MakeInitializer<int64_t>({2}, {first_dim_value, 1024});
-      auto* reshape_out = builder.MakeIntermediate();
-      builder.AddNode("Reshape", {ln_out, shape_initializer}, {reshape_out});
-
-      auto* identity_out = builder.MakeOutput();
-      builder.AddNode("Identity", {reshape_out}, {identity_out});
+      return Status::OK();
     };
 
-    const std::vector<int> opsets{12, 13, 14};
-    for (auto& opset_version : opsets) {
-      std::unique_ptr<GraphTransformer> transformer = std::make_unique<UpStreamReshapeGraphTransformer>();
-      ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, opset_version, *logger, std::move(transformer),
-                                            TransformerLevel::Level1,
-                                            1, pre_graph_checker, post_graph_checker));
+    std::vector<int> fist_dim_values = {-1, 128};
+    for (auto first_dim_value : fist_dim_values) {
+      auto build_test_case = [ln_axis_before, &first_dim_value](ModelTestBuilder& builder) {
+        auto* input1_arg = builder.MakeInput<float>({{4, 32, 1024}});
+        auto* input2_arg = builder.MakeInput<float>({{1024}});
+        auto* input3_arg = builder.MakeInput<float>({{1024}});
+        auto* ln_out = builder.MakeIntermediate();
+        builder.AddNode("LayerNormalization", {input1_arg, input2_arg, input3_arg}, {ln_out})
+            .AddAttribute("axis", ln_axis_before);
+
+        auto* shape_initializer = builder.MakeInitializer<int64_t>({2}, {first_dim_value, 1024});
+        auto* reshape_out = builder.MakeIntermediate();
+        builder.AddNode("Reshape", {ln_out, shape_initializer}, {reshape_out});
+
+        auto* identity_out = builder.MakeOutput();
+        builder.AddNode("Identity", {reshape_out}, {identity_out});
+      };
+
+      const std::vector<int> opsets{12, 13, 14};
+      for (auto& opset_version : opsets) {
+        std::unique_ptr<GraphTransformer> transformer = std::make_unique<UpStreamReshapeGraphTransformer>();
+        ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, opset_version, *logger, std::move(transformer),
+                                              TransformerLevel::Level1,
+                                              1, pre_graph_checker, post_graph_checker));
+      }
     }
   }
 }
diff --git a/onnxruntime/test/optimizer/ensure_unique_dq_for_node_unit_test.cc b/onnxruntime/test/optimizer/ensure_unique_dq_for_node_unit_test.cc
index feff607703341..7a67747f7cf4c 100644
--- a/onnxruntime/test/optimizer/ensure_unique_dq_for_node_unit_test.cc
+++ b/onnxruntime/test/optimizer/ensure_unique_dq_for_node_unit_test.cc
@@ -63,7 +63,8 @@ std::function<void(ModelTestBuilder&)> GetGraphBuilder(const GraphConfig& config
         return graph.ToGraphProto();
       };
 
-      auto* if_input = builder.MakeInitializerBool({}, {true});
+      // Make this an input to prevent If constant folding affecting this test
+      auto* if_input = builder.MakeInput<bool>({1}, {true});
       auto* if_output = builder.MakeOutput();
       Node& if_node = builder.AddNode("If", {if_input}, {if_output});
       if_node.AddAttribute("then_branch", create_if_subgraph(true));
diff --git a/onnxruntime/test/optimizer/graph_transform_test.cc b/onnxruntime/test/optimizer/graph_transform_test.cc
index dce1f2d40e8b9..ef6e2d531bc1a 100755
--- a/onnxruntime/test/optimizer/graph_transform_test.cc
+++ b/onnxruntime/test/optimizer/graph_transform_test.cc
@@ -10,6 +10,8 @@
 
 #include "gtest/gtest.h"
 #include "gmock/gmock.h"
+#include "onnx/defs/parser.h"
+#include "onnx/defs/printer.h"
 
 #include "asserts.h"
 #include "core/common/span_utils.h"
@@ -31,6 +33,8 @@
 #include "core/optimizer/conv_add_act_fusion.h"
 #include "core/optimizer/conv_add_fusion.h"
 #include "core/optimizer/conv_bn_fusion.h"
+#include "core/optimizer/matmul_bn_fusion.h"
+#include "core/optimizer/pad_fusion.h"
 #include "core/optimizer/conv_mul_fusion.h"
 #include "core/optimizer/div_mul_fusion.h"
 #include "core/optimizer/dropout_elimination.h"
@@ -1000,6 +1004,334 @@ TEST_F(GraphTransformationTests, ConstantFoldingAShapeNodeDeepInTheGraph) {
   ASSERT_TRUE(op_to_count.size() == 0U);
 }
 
+// Test we don't fail when constant folding hits a string initializer
+TEST_F(GraphTransformationTests, ConstantFoldingStringInitializer) {
+  constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "gh_issue_17392.onnx";
+  std::shared_ptr<Model> model;
+  ASSERT_STATUS_OK(Model::Load(model_uri, model, nullptr, *logger_));
+  Graph& graph = model->MainGraph();
+  std::map<std::string, int> op_to_count = CountOpsInGraph(graph);
+  ASSERT_EQ(op_to_count["Identity"], 1);
+
+  onnxruntime::GraphTransformerManager graph_transformation_mgr{5};
+  std::unique_ptr<CPUExecutionProvider> e = std::make_unique<CPUExecutionProvider>(CPUExecutionProviderInfo());
+  ASSERT_STATUS_OK(graph_transformation_mgr.Register(
+      std::make_unique<ConstantFolding>(*e.get(), false /*skip_dequantize_linear*/), TransformerLevel::Level1));
+  ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, *logger_));
+
+  op_to_count = CountOpsInGraph(graph);
+
+  ASSERT_EQ(op_to_count.size(), 0U) << "Identity node should have been removed";
+}
+
+TEST_F(GraphTransformationTests, ConstantFoldingIfConstantInlining) {
+  // This test covers the following necessary cases:
+  // The input refers to the explicit or implicit inputs of If node.
+  // The output of the node is the output of the subgraph being inlined.
+  // Constant nodes and initializers are promoted to the outer graph.
+  // The initializer or a constant node is the output of the subgraph being inlined.
+  // Nested subgraphs names are renamed as appropriate.
+  // In all If node is constant folded twice. The last If node is not constant
+  // folded because the input is indirectly dependent on the size of the input.
+  // XXX: Can we constant fold Size() if the graph input shape is fixed?
+
+  const char* code = R"(
+  <
+  ir_version: 8,
+  opset_import: [ "" : 16, "local" : 1 ]
+  >
+  agraph (float[128] x, float[128] x1) => (float[N] y)
+  {
+      y = local.aten_gather <dim: int = 1, sparse_grad: int = 0> (x, x1)
+  }
+  <
+    opset_import: [ "" : 16, "local" : 1],
+    domain: "local"
+  >
+  aten_gather <dim>(self, index) => (result_16)
+  {
+     tmp = Shape (index)
+     tmp_0 = Size (tmp)
+     int64_0 = Constant <value: tensor = int64 int64_0 {0}> ()
+     int64_0_cast = CastLike (int64_0, tmp_0)
+     cond = Equal (tmp_0, int64_0_cast)
+     result_16 = If (cond) <then_branch: graph = thenGraph_10 () => ( result) {
+        result = Identity (self)
+     }, else_branch: graph = elseGraph_10 () => ( result_15) {
+        tmp_1 = Shape (self)
+        tmp_2 = Size (tmp_1)
+        int64_0_3 = Constant <value: tensor = int64 int64_0_3 {0}> ()
+        int64_0_3_cast = CastLike (int64_0_3, tmp_2)
+        cond_4 = Equal (tmp_2, int64_0_3_cast)
+        self_8 = If (cond_4) <then_branch: graph = thenGraph_13 () => ( self_6) {
+           tmp_5 = Constant <value_ints: ints = [-1]> ()
+           self_6 = Reshape (self, tmp_5)
+        }, else_branch: graph = elseGraph_13 () => ( self_7) {
+           self_7 = Identity (self)
+        }>
+        tmp_9 = Size (index)
+        int64_0_10 = Constant <value: tensor = int64 int64_0_10 {0}> ()
+        int64_0_10_cast = CastLike (int64_0_10, tmp_9)
+        cond_11 = Equal (tmp_9, int64_0_10_cast)
+        result_15 = If (cond_11) <then_branch: graph = thenGraph_15 () => ( result_12) {
+           result_12 = CastLike (index, self_8)
+        }, else_branch: graph = elseGraph_15 () => ( result_14) {
+           index_13 = Cast <to: int = 7> (index)
+           result_14 = GatherElements <axis: int = @dim> (self_8, index_13)
+        }>
+     }>
+  }
+)";
+
+  ONNX_NAMESPACE::OnnxParser parser(code);
+  ONNX_NAMESPACE::ModelProto model_proto;
+  auto parse_status = parser.Parse(model_proto);
+  ASSERT_TRUE(parse_status.IsOK()) << parse_status.ErrorMessage();
+  ASSERT_TRUE(parser.EndOfInput()) << "Extra unparsed input unexpected.";
+
+  {
+    // Test that the model is loadable and check the function call node.
+    std::shared_ptr<Model> p_model;
+    ASSERT_STATUS_OK(Model::Load(std::move(model_proto), p_model, nullptr, *logger_));
+    Graph& graph = p_model->MainGraph();
+    std::map<std::string, int> op_to_count = CountOpsInGraph(graph);
+    ASSERT_EQ(op_to_count["local.aten_gather"], 1);
+    model_proto = p_model->ToProto();
+  }
+
+  std::string serialized_model;
+  const bool serialization_status = model_proto.SerializeToString(&serialized_model);
+  ASSERT_TRUE(serialization_status) << "Failed to serialize proto to string";
+
+  // AOT inlining is necessary in this case, so the If nodes within the function
+  // are brought out to the outer scope. So we load this into a session object.
+
+  SessionOptions session_options;
+  InferenceSessionWrapper session_object{session_options, GetEnvironment()};
+
+  std::stringstream sstr(serialized_model);
+  ASSERT_STATUS_OK(session_object.Load(sstr));
+  ASSERT_STATUS_OK(session_object.Initialize());
+
+  // const auto resulting_model_proto = session_object.GetModel().ToProto();
+  // std::string printed_model = ONNX_NAMESPACE::ProtoToString(resulting_model_proto);
+  // ASSERT_FALSE(printed_model.empty());
+  // std::cout << printed_model << std::endl;
+
+  // This is the resulting model proto.
+  // The remaining If node is not constant foldable because Size() does not constant fold
+  // although the shape is fixed.
+  /*
+    <
+       ir_version: 8,
+       opset_import: ["" : 16, "local" : 1,
+         "com.microsoft.nchwc" : 1,
+          "ai.onnx.ml" : 4,
+          "com.ms.internal.nhwc" : 20,
+          "ai.onnx.training" : 1,
+          "ai.onnx.preview.training" : 1,
+          "com.microsoft" : 1,
+          "com.microsoft.experimental" : 1,
+          "org.pytorch.aten" : 1]
+    >
+    agraph (float[128] x, float[128] x1) => (float[128] y) {
+       _if_elseGraph_10__inlfunc_aten_gather_tmp_9 = Size (x1)
+       _if_elseGraph_10__inlfunc_aten_gather_cond_11 =
+                                  Equal (_if_elseGraph_10__inlfunc_aten_gather_tmp_9, ortshared_7_0_1_0_token_10)
+       y = If (_if_elseGraph_10__inlfunc_aten_gather_cond_11) <then_branch: graph = thenGraph_15 () => (float[128] _inlfunc_aten_gather_result_12) {
+          _inlfunc_aten_gather_result_12 = Cast <to: int = 1> (x1)
+       }, else_branch: graph = elseGraph_15 () => (float[128] _inlfunc_aten_gather_result_14) {
+          _inlfunc_aten_gather_index_13 = Cast <to: int = 7> (x1)
+          _inlfunc_aten_gather_result_14 = GatherElements <axis: int = 1> (x, _inlfunc_aten_gather_index_13)
+       }>
+    }
+  */
+
+  auto& graph = session_object.GetModel().MainGraph();
+  auto op_to_count = CountOpsInGraph(graph);
+  ASSERT_EQ(op_to_count["local.aten_gather"], 0);
+  ASSERT_EQ(op_to_count["If"], 1);
+}
+
+TEST_F(GraphTransformationTests, ConstantFoldingIfConstantInliningRebuildEdges) {
+  constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "transform_nested_ifs_toplogical_sorted_nodes.onnx";
+
+  SessionOptions so;
+  so.session_logid = "GraphTransformationTests.ConstantFoldingIfConstantInliningRebuildEdges";
+
+  SessionOptions session_options;
+  InferenceSessionWrapper session_object{session_options, GetEnvironment()};
+  ASSERT_STATUS_OK(session_object.Load(model_uri));
+  ASSERT_STATUS_OK(session_object.Initialize());
+
+  auto& graph = session_object.GetModel().MainGraph();
+  auto op_to_count = CountOpsInGraph(graph);
+  ASSERT_EQ(op_to_count["pkg.onnxscript.torch_lib._aten_linalg_vector_norm_no_dim_onnx"], 0);
+  ASSERT_EQ(op_to_count["If"], 0);
+  ASSERT_EQ(op_to_count["Reshape"], 1);
+  ASSERT_EQ(op_to_count["Abs"], 1);
+  ASSERT_EQ(op_to_count["Mul"], 1);
+  ASSERT_EQ(op_to_count["ReduceSum"], 1);
+  ASSERT_EQ(op_to_count["Sqrt"], 1);
+  ASSERT_EQ(op_to_count["Cast"], 2);
+}
+
+TEST_F(GraphTransformationTests, ConstantFoldingIfConstantInliningEdgesWithMiddleArgNonExisting) {
+  // This model has a Resize() call with a middle argument non-existing.
+  // We want to make sure that the input edges for that Resize() node
+  // are properly rebuilt with a middle argument non-existing
+  // during If constant folding
+  // This test is only valid if Resize() node resides in the nested subgraph which gets inlined
+  // however, the destination graph must not be the main graph. Then we test that the edges are rebuild
+  // properly. Also Resize() should not be the first node in the resulting subgraph, so it has edges
+  const char* code = R"(
+  <
+  ir_version: 8,
+  opset_import: [ "" : 16, "local" : 1 ]
+  >
+  agraph (float[128] x, float[128] x1) => (float[N] y)
+  {
+      y = local.aten_gather <dim: int = 1, sparse_grad: int = 0> (x, x1)
+  }
+  <
+    opset_import: [ "" : 16, "local" : 1],
+    domain: "local"
+  >
+  aten_gather <dim>(self, index) => (result_16)
+  {
+     resize_scales = Constant <value_floats: floats = [1.5]> ()
+     tmp_0 = Size (index)
+     int64_0 = Constant <value: tensor = int64 int64_0 {0}> ()
+     int64_0_cast = CastLike (int64_0, tmp_0)
+     cond = Equal (tmp_0, int64_0_cast)
+     result_16 = If (cond) <then_branch: graph = thenGraph_10 () => ( result) {
+        result = Identity (self)
+     }, else_branch: graph = elseGraph_10 () => ( result_15) {
+        tmp_1 = Shape (self)
+        tmp_2 = Size (tmp_1)
+        int64_0_3 = Constant <value: tensor = int64 int64_0_3 {0}> ()
+        int64_0_3_cast = CastLike (int64_0_3, tmp_2)
+        cond_4 = Equal (tmp_2, int64_0_3_cast)
+        self_8 = If (cond_4) <then_branch: graph = thenGraph_13 () => ( self_6) {
+           tmp_5 = Constant <value_ints: ints = [-1]> ()
+           self_6 = Reshape (self, tmp_5)
+        }, else_branch: graph = elseGraph_13 () => ( self_7) {
+           self_71 = Mul(self, self)
+           float_size = CastLike (tmp_0, resize_scales)
+           non_constant_resize_scales = Mul(float_size, resize_scales)
+           self_7 = Resize(self_71,, non_constant_resize_scales)
+        }>
+        tmp_9 = Size (index)
+        int64_0_10 = Constant <value: tensor = int64 int64_0_10 {0}> ()
+        int64_0_10_cast = CastLike (int64_0_10, tmp_9)
+        cond_11 = Equal (tmp_9, int64_0_10_cast)
+        result_15 = If (cond_11) <then_branch: graph = thenGraph_15 () => ( result_12) {
+           result_12 = CastLike (index, self_8)
+        }, else_branch: graph = elseGraph_15 () => ( result_14) {
+           index_13 = Cast <to: int = 7> (index)
+           result_14 = GatherElements <axis: int = @dim> (self_8, index_13)
+        }>
+     }>
+  }
+  )";
+
+  /** Optimized model graph
+  <
+     ir_version: 8,
+     opset_import: ["" : 16,
+     "local" : 1,
+     "com.microsoft.nchwc" : 1,
+     "ai.onnx.ml" : 4,
+     "ai.onnx.training" : 1,
+     "ai.onnx.preview.training" : 1,
+     "com.microsoft" : 1,
+     "com.microsoft.experimental" : 1, "org.pytorch.aten" : 1]
+  >
+  agraph (float[128] x, float[128] x1) => (float[128] y)
+     <float[1] _inlfunc_aten_gather_resize_scales =  {1.5}, int64 ortshared_7_0_1_0_token_8 =  {0}>
+  {
+     _inlfunc_aten_gather_tmp_0 = Size (x1)
+     _inlfunc_aten_gather_cond = Equal (_inlfunc_aten_gather_tmp_0, ortshared_7_0_1_0_token_8)
+      y = If (_inlfunc_aten_gather_cond) <then_branch: graph = thenGraph_10 () =>
+          (float[128] _inlfunc_aten_gather_result) {
+        _inlfunc_aten_gather_result = Identity (x)
+     }, else_branch: graph = elseGraph_10 () => (float[128] _inlfunc_aten_gather_result_15)
+        <int64 _inlfunc_aten_gather_int64_0_10 =  {0}>
+  {
+        _if_else_branch__inlfunc_aten_gather_self_71 = Mul (x, x)
+        _if_else_branch__inlfunc_aten_gather_float_size = Cast <to: int = 1> (_inlfunc_aten_gather_tmp_0)
+        _if_else_branch__inlfunc_aten_gather_non_constant_resize_scales = Mul (
+          _if_else_branch__inlfunc_aten_gather_float_size, _inlfunc_aten_gather_resize_scales)
+        _inlfunc_aten_gather_self_8 = Resize <exclude_outside: int = 0, coordinate_transformation_mode:
+                string = "half_pixel", cubic_coeff_a: float = -0.75, extrapolation_value: float = 0, mode:
+                string = "nearest", nearest_mode: string = "round_prefer_floor"> (
+                    _if_else_branch__inlfunc_aten_gather_self_71, ,
+                    _if_else_branch__inlfunc_aten_gather_non_constant_resize_scales)
+        _inlfunc_aten_gather_tmp_9 = Size (x1)
+        _inlfunc_aten_gather_cond_11 = Equal (_inlfunc_aten_gather_tmp_9, _inlfunc_aten_gather_int64_0_10)
+        _inlfunc_aten_gather_result_15 = If (_inlfunc_aten_gather_cond_11) <then_branch: graph = thenGraph_15 () =>
+              (float[128] _inlfunc_aten_gather_result_12) {
+           _inlfunc_aten_gather_result_12 = Cast <to: int = 1> (x1)
+        }, else_branch: graph = elseGraph_15 () => (float[128] _inlfunc_aten_gather_result_14) {
+           _inlfunc_aten_gather_index_13 = Cast <to: int = 7> (x1)
+           _inlfunc_aten_gather_result_14 = GatherElements <axis: int = 1> (
+                          _inlfunc_aten_gather_self_8, _inlfunc_aten_gather_index_13)
+        }>
+     }>
+  }
+
+  */
+
+  ONNX_NAMESPACE::OnnxParser parser(code);
+  ONNX_NAMESPACE::ModelProto model_proto;
+  auto parse_status = parser.Parse(model_proto);
+  ASSERT_TRUE(parse_status.IsOK()) << parse_status.ErrorMessage();
+  ASSERT_TRUE(parser.EndOfInput()) << "Extra unparsed input unexpected.";
+
+  std::string serialized_model;
+  const bool serialization_status = model_proto.SerializeToString(&serialized_model);
+  ASSERT_TRUE(serialization_status) << "Failed to serialize proto to string";
+
+  // AOT inlining is necessary in this case, so the If nodes within the function
+  // are brought out to the outer scope. So we load this into a session object.
+  SessionOptions session_options;
+  InferenceSessionWrapper session_object{session_options, GetEnvironment()};
+  std::stringstream sstr(serialized_model);
+  ASSERT_STATUS_OK(session_object.Load(sstr));
+  ASSERT_STATUS_OK(session_object.Initialize());
+
+  // Let's verify the correctness of the rebuild edges in the Resize node that still
+  // resides within an if else subgraph.
+  auto& graph = session_object.GetModel().MainGraph();
+  auto op_to_count = CountOpsInGraph(graph);
+  ASSERT_EQ(op_to_count["If"], 2);
+  ASSERT_EQ(op_to_count["Resize"], 1);
+
+  auto if_node = std::find_if(graph.Nodes().begin(), graph.Nodes().end(),
+                              [](const auto& node) { return node.OpType() == "If"; });
+  ASSERT_NE(graph.Nodes().cend(), if_node);
+  // Resize is in the else branch
+  auto subgraph_map = if_node->GetAttributeNameToSubgraphMap();
+  auto branch = subgraph_map.find("else_branch");
+  ASSERT_NE(subgraph_map.cend(), branch);
+
+  auto resize_node = std::find_if(branch->second->Nodes().begin(), branch->second->Nodes().end(),
+                                  [](const auto& node) { return node.OpType() == "Resize"; });
+  ASSERT_NE(branch->second->Nodes().cend(), resize_node);
+
+  // Check the edges
+  ASSERT_EQ(2U, resize_node->GetInputEdgesCount());
+  // Should have input edges with arg_pos 0 and 2
+  // With 1 is missing
+  InlinedHashSet<size_t> dest_edges;
+  auto zero_edge = resize_node->InputEdgesBegin();
+  dest_edges.insert(zero_edge->GetDstArgIndex());
+  ++zero_edge;
+  dest_edges.insert(zero_edge->GetDstArgIndex());
+  ASSERT_TRUE(dest_edges.find(0) != dest_edges.end());
+  ASSERT_TRUE(dest_edges.find(2) != dest_edges.end());
+}
+
 // Check transformations in the case of a subgraph with constant inputs.
 TEST_F(GraphTransformationTests, SubgraphWithConstantInputs) {
   constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "constant-subgraph.onnx";
@@ -1059,6 +1391,425 @@ TEST_F(GraphTransformationTests, FuseConvBNNoBias) {
   }
 }
 
+TEST_F(GraphTransformationTests, FusePadWithConv) {
+  constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/fuse-pad-conv.onnx";
+
+  std::shared_ptr<Model> p_model;
+  ASSERT_STATUS_OK(Model::Load(model_uri, p_model, nullptr, *logger_));
+  Graph& graph = p_model->MainGraph();
+
+  std::vector<int64_t> expected_pads;
+  GraphViewer graphViewer(graph);
+  for (auto& node_index : graphViewer.GetNodesInTopologicalOrder()) {
+    auto& node = *graph.GetNode(node_index);
+    if (node.OpType() == "Pad") {
+      const auto* pads_proto = graph_utils::GetConstantInitializer(graph, node.InputDefs()[1]->Name());
+      Initializer pads{*pads_proto, graph.ModelPath()};
+      gsl::span<const int64_t> pads_values = pads.DataAsSpan<int64_t>();
+      expected_pads.resize(pads_values.size() - 4);
+
+      for (uint32_t pads_index = 2, index = 0; pads_index < pads_values.size() / 2; pads_index++, index++) {
+        expected_pads[index] = pads_values[pads_index];
+        expected_pads[index + (expected_pads.size() / 2)] = pads_values[pads_index + (pads_values.size() / 2)];
+      }
+    } else if (node.OpType() == "Conv") {
+      auto child_pads = node.GetMutableAttributes()["pads"].mutable_ints();
+      for (uint32_t index = 0; index < expected_pads.size(); index++) {
+        expected_pads[index] += child_pads->Get(index);
+      }
+    }
+  }
+
+  onnxruntime::GraphTransformerManager graph_transformation_mgr{5};
+  auto rule_transformer_L1 = std::make_unique<RuleBasedGraphTransformer>("RuleTransformerL1");
+  ASSERT_STATUS_OK(rule_transformer_L1->Register(std::make_unique<PadFusion>()));
+  ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::move(rule_transformer_L1), TransformerLevel::Level1));
+
+  ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, *logger_));
+
+  std::map<std::string, int> op_to_count = CountOpsInGraph(graph);
+  ASSERT_EQ(op_to_count["Pad"], 0);
+  ASSERT_EQ(op_to_count["Conv"], 1);
+
+  for (auto& node : graph.Nodes()) {
+    if (node.OpType() == "Conv") {
+      auto child_pads = node.GetMutableAttributes()["pads"].mutable_ints();
+      ASSERT_EQ(child_pads->size(), static_cast<int32_t>(expected_pads.size()))
+          << "fusion should produce the same size of pads integer as the Conv node";
+      for (uint32_t index = 0; index < expected_pads.size(); index++) {
+        ASSERT_EQ(expected_pads[index], child_pads->Get(index))
+            << "fusion does not produce correct padding value";
+      }
+    }
+  }
+}
+
+TEST_F(GraphTransformationTests, FusePadWithMaxPool) {
+  constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/fuse-pad-maxpool.onnx";
+
+  std::shared_ptr<Model> p_model;
+  ASSERT_STATUS_OK(Model::Load(model_uri, p_model, nullptr, *logger_));
+  Graph& graph = p_model->MainGraph();
+
+  std::vector<int64_t> expected_pads;
+  GraphViewer graphViewer(graph);
+  for (auto& node_index : graphViewer.GetNodesInTopologicalOrder()) {
+    auto& node = *graph.GetNode(node_index);
+    if (node.OpType() == "Pad") {
+      const auto* pads_proto = graph_utils::GetConstantInitializer(graph, node.InputDefs()[1]->Name());
+      Initializer pads{*pads_proto, graph.ModelPath()};
+      gsl::span<const int64_t> pads_values = pads.DataAsSpan<int64_t>();
+      expected_pads.resize(pads_values.size() - 4);
+
+      for (uint32_t pads_index = 2, index = 0; pads_index < pads_values.size() / 2; pads_index++, index++) {
+        expected_pads[index] = pads_values[pads_index];
+        expected_pads[index + (expected_pads.size() / 2)] = pads_values[pads_index + (pads_values.size() / 2)];
+      }
+    } else if (node.OpType() == "MaxPool") {
+      auto child_pads = node.GetMutableAttributes()["pads"].mutable_ints();
+      for (uint32_t index = 0; index < expected_pads.size(); index++) {
+        expected_pads[index] += child_pads->Get(index);
+      }
+    }
+  }
+
+  onnxruntime::GraphTransformerManager graph_transformation_mgr{5};
+  auto rule_transformer_L1 = std::make_unique<RuleBasedGraphTransformer>("RuleTransformerL1");
+  ASSERT_STATUS_OK(rule_transformer_L1->Register(std::make_unique<PadFusion>()));
+  ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::move(rule_transformer_L1), TransformerLevel::Level1));
+
+  ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, *logger_));
+
+  std::map<std::string, int> op_to_count = CountOpsInGraph(graph);
+  ASSERT_EQ(op_to_count["Pad"], 0);
+  ASSERT_EQ(op_to_count["MaxPool"], 1);
+
+  for (auto& node : graph.Nodes()) {
+    if (node.OpType() == "MaxPool") {
+      auto child_pads = node.GetMutableAttributes()["pads"].mutable_ints();
+      ASSERT_EQ(child_pads->size(), static_cast<int32_t>(expected_pads.size()))
+          << "fusion should produce the same size of pads integer as the MaxPool node";
+      for (uint32_t index = 0; index < expected_pads.size(); index++) {
+        ASSERT_EQ(expected_pads[index], child_pads->Get(index))
+            << "fusion does not produce correct padding value";
+      }
+    }
+  }
+}
+
+TEST_F(GraphTransformationTests, FusePadWithMaxPoolOpsetLessThan11) {
+  constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/fuse-pad-maxpool-opset8.onnx";
+
+  std::shared_ptr<Model> p_model;
+  ASSERT_STATUS_OK(Model::Load(model_uri, p_model, nullptr, *logger_));
+  Graph& graph = p_model->MainGraph();
+
+  std::vector<int64_t> expected_pads;
+  GraphViewer graphViewer(graph);
+  for (auto& node_index : graphViewer.GetNodesInTopologicalOrder()) {
+    auto& node = *graph.GetNode(node_index);
+    if (node.OpType() == "Pad") {
+      gsl::span<const int64_t> pads_values = node.GetAttributes().at("pads").ints();
+      expected_pads.resize(pads_values.size() - 4);
+
+      for (uint32_t pads_index = 2, index = 0; pads_index < pads_values.size() / 2; pads_index++, index++) {
+        expected_pads[index] = pads_values[pads_index];
+        expected_pads[index + (expected_pads.size() / 2)] = pads_values[pads_index + (pads_values.size() / 2)];
+      }
+    } else if (node.OpType() == "MaxPool") {
+      auto child_pads = node.GetMutableAttributes()["pads"].mutable_ints();
+      for (uint32_t index = 0; index < expected_pads.size(); index++) {
+        expected_pads[index] += child_pads->Get(index);
+      }
+    }
+  }
+
+  onnxruntime::GraphTransformerManager graph_transformation_mgr{5};
+  auto rule_transformer_L1 = std::make_unique<RuleBasedGraphTransformer>("RuleTransformerL1");
+  ASSERT_STATUS_OK(rule_transformer_L1->Register(std::make_unique<PadFusion>()));
+  ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::move(rule_transformer_L1), TransformerLevel::Level1));
+
+  ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, *logger_));
+
+  std::map<std::string, int> op_to_count = CountOpsInGraph(graph);
+  ASSERT_EQ(op_to_count["Pad"], 0);
+  ASSERT_EQ(op_to_count["MaxPool"], 1);
+
+  for (auto& node : graph.Nodes()) {
+    if (node.OpType() == "MaxPool") {
+      auto child_pads = node.GetMutableAttributes()["pads"].mutable_ints();
+      ASSERT_EQ(child_pads->size(), static_cast<int32_t>(expected_pads.size()))
+          << "fusion should produce the same size of pads integer as the MaxPool node";
+      for (uint32_t index = 0; index < expected_pads.size(); index++) {
+        ASSERT_EQ(expected_pads[index], child_pads->Get(index))
+            << "fusion does not produce correct padding value";
+      }
+    }
+  }
+}
+
+TEST_F(GraphTransformationTests, FuseMatmulBNWithInBetweenNodes) {
+  constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/fuse-matmul-bn-with-reshape.onnx";
+
+  std::shared_ptr<Model> p_model;
+  ASSERT_STATUS_OK(Model::Load(model_uri, p_model, nullptr, *logger_));
+  Graph& graph = p_model->MainGraph();
+
+  std::string expected_output_name;
+  GraphViewer graphViewer(graph);
+  for (auto& node_index : graphViewer.GetNodesInTopologicalOrder()) {
+    auto& node = *graph.GetNode(node_index);
+    if (node.OpType() == "MatMul") {
+      expected_output_name = node.OutputDefs()[0]->Name();
+    }
+  }
+
+  onnxruntime::GraphTransformerManager graph_transformation_mgr{5};
+  auto rule_transformer_L1 = std::make_unique<RuleBasedGraphTransformer>("RuleTransformerL1");
+  ASSERT_STATUS_OK(rule_transformer_L1->Register(std::make_unique<MatmulBNFusion>()));
+  ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::move(rule_transformer_L1), TransformerLevel::Level1));
+
+  ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, *logger_));
+
+  std::map<std::string, int> op_to_count = CountOpsInGraph(graph);
+  ASSERT_EQ(op_to_count["BatchNormalization"], 0);
+  ASSERT_EQ(op_to_count["MatMul"], 0);
+  ASSERT_EQ(op_to_count["Gemm"], 1);
+
+  for (auto& node : graph.Nodes()) {
+    if (node.OpType() == "Gemm") {
+      ASSERT_EQ(node.OutputDefs()[0]->Name(), expected_output_name)
+          << "fusion should produce the same output name as the MatMul node";
+    }
+  }
+}
+
+TEST_F(GraphTransformationTests, FuseMatmulBNWithEmptyOptionalOutputWithInBetweenNodes) {
+  constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/fuse-matmul-bn-with-reshape.onnx";
+
+  std::shared_ptr<Model> p_model;
+  ASSERT_STATUS_OK(Model::Load(model_uri, p_model, nullptr, *logger_));
+  Graph& graph = p_model->MainGraph();
+
+  std::string expected_output_name;
+  GraphViewer graphViewer(graph);
+  for (auto& node_index : graphViewer.GetNodesInTopologicalOrder()) {
+    auto& node = *graph.GetNode(node_index);
+    if (node.OpType() == "MatMul") {
+      expected_output_name = node.OutputDefs()[0]->Name();
+    } else if (node.OpType() == "BatchNormalization") {
+      node.MutableOutputDefs().push_back(&graph.GetOrCreateNodeArg("", nullptr));
+    }
+  }
+
+  onnxruntime::GraphTransformerManager graph_transformation_mgr{5};
+  auto rule_transformer_L1 = std::make_unique<RuleBasedGraphTransformer>("RuleTransformerL1");
+  ASSERT_STATUS_OK(rule_transformer_L1->Register(std::make_unique<MatmulBNFusion>()));
+  ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::move(rule_transformer_L1), TransformerLevel::Level1));
+
+  ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, *logger_));
+
+  std::map<std::string, int> op_to_count = CountOpsInGraph(graph);
+  ASSERT_EQ(op_to_count["BatchNormalization"], 0);
+  ASSERT_EQ(op_to_count["MatMul"], 0);
+  ASSERT_EQ(op_to_count["Gemm"], 1);
+
+  for (auto& node : graph.Nodes()) {
+    if (node.OpType() == "Gemm") {
+      ASSERT_EQ(node.OutputDefs()[0]->Name(), expected_output_name)
+          << "fusion should produce the same output name as the MatMul node";
+    }
+  }
+}
+
+// should not fuse
+TEST_F(GraphTransformationTests, FuseMatmulBNWithOptionalOutputWithInBetweenNodes) {
+  constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/fuse-matmul-bn-with-reshape.onnx";
+
+  std::shared_ptr<Model> p_model;
+  ASSERT_STATUS_OK(Model::Load(model_uri, p_model, nullptr, *logger_));
+  Graph& graph = p_model->MainGraph();
+
+  GraphViewer graphViewer(graph);
+  for (auto& node_index : graphViewer.GetNodesInTopologicalOrder()) {
+    auto& node = *graph.GetNode(node_index);
+    if (node.OpType() == "BatchNormalization") {
+      // additional non-empty output to batchNormalization
+      ONNX_NAMESPACE::TypeProto optional_output_tensor_type;
+      optional_output_tensor_type.mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TypeProto::kTensorType);
+      auto& arg = graph.GetOrCreateNodeArg("bn_optional_output", &optional_output_tensor_type);
+      node.MutableOutputDefs().push_back(&arg);
+    }
+  }
+
+  onnxruntime::GraphTransformerManager graph_transformation_mgr{5};
+  auto rule_transformer_L1 = std::make_unique<RuleBasedGraphTransformer>("RuleTransformerL1");
+  ASSERT_STATUS_OK(rule_transformer_L1->Register(std::make_unique<MatmulBNFusion>()));
+  ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::move(rule_transformer_L1), TransformerLevel::Level1));
+
+  ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, *logger_));
+
+  std::map<std::string, int> op_to_count = CountOpsInGraph(graph);
+  ASSERT_EQ(op_to_count["BatchNormalization"], 1);
+  ASSERT_EQ(op_to_count["MatMul"], 1);
+  ASSERT_EQ(op_to_count["Gemm"], 0);
+}
+
+TEST_F(GraphTransformationTests, FuseMatmulBNDirectly) {
+  constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/fuse-matmul-bn-directly.onnx";
+
+  std::shared_ptr<Model> p_model;
+  ASSERT_STATUS_OK(Model::Load(model_uri, p_model, nullptr, *logger_));
+  Graph& graph = p_model->MainGraph();
+
+  std::string expected_output_name;
+  GraphViewer graphViewer(graph);
+  for (auto& node_index : graphViewer.GetNodesInTopologicalOrder()) {
+    auto& node = *graph.GetNode(node_index);
+    if (node.OpType() == "BatchNormalization") {
+      expected_output_name = node.OutputDefs()[0]->Name();
+    }
+  }
+
+  onnxruntime::GraphTransformerManager graph_transformation_mgr{5};
+  auto rule_transformer_L1 = std::make_unique<RuleBasedGraphTransformer>("RuleTransformerL1");
+  ASSERT_STATUS_OK(rule_transformer_L1->Register(std::make_unique<MatmulBNFusion>()));
+  ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::move(rule_transformer_L1), TransformerLevel::Level1));
+
+  ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, *logger_));
+
+  std::map<std::string, int> op_to_count = CountOpsInGraph(graph);
+  ASSERT_EQ(op_to_count["BatchNormalization"], 0);
+  ASSERT_EQ(op_to_count["MatMul"], 0);
+  ASSERT_EQ(op_to_count["Gemm"], 1);
+
+  for (auto& node : graph.Nodes()) {
+    if (node.OpType() == "Gemm") {
+      ASSERT_EQ(node.OutputDefs()[0]->Name(), expected_output_name)
+          << "fusion should produce the same output name as the last node";
+    }
+  }
+}
+
+TEST_F(GraphTransformationTests, FuseMatmulBNWithOnlyReshape) {
+  constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/fuse-matmul-bn-only-reshape.onnx";
+
+  std::shared_ptr<Model> p_model;
+  ASSERT_STATUS_OK(Model::Load(model_uri, p_model, nullptr, *logger_));
+  Graph& graph = p_model->MainGraph();
+
+  std::string expected_output_name;
+  GraphViewer graphViewer(graph);
+  for (auto& node_index : graphViewer.GetNodesInTopologicalOrder()) {
+    auto& node = *graph.GetNode(node_index);
+    if (node.OpType() == "MatMul") {
+      expected_output_name = node.OutputDefs()[0]->Name();
+    }
+  }
+
+  onnxruntime::GraphTransformerManager graph_transformation_mgr{5};
+  auto rule_transformer_L1 = std::make_unique<RuleBasedGraphTransformer>("RuleTransformerL1");
+  ASSERT_STATUS_OK(rule_transformer_L1->Register(std::make_unique<MatmulBNFusion>()));
+  ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::move(rule_transformer_L1), TransformerLevel::Level1));
+
+  ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, *logger_));
+
+  std::map<std::string, int> op_to_count = CountOpsInGraph(graph);
+  ASSERT_EQ(op_to_count["BatchNormalization"], 0);
+  ASSERT_EQ(op_to_count["MatMul"], 0);
+  ASSERT_EQ(op_to_count["Gemm"], 1);
+
+  for (auto& node : graph.Nodes()) {
+    if (node.OpType() == "Gemm") {
+      ASSERT_EQ(node.OutputDefs()[0]->Name(), expected_output_name)
+          << "fusion should produce the same output name as the MatMul node";
+    }
+  }
+}
+
+TEST_F(GraphTransformationTests, FuseMatmulBNWithOnlyTranspose) {
+  constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/fuse-matmul-bn-only-transpose.onnx";
+
+  std::shared_ptr<Model> p_model;
+  ASSERT_STATUS_OK(Model::Load(model_uri, p_model, nullptr, *logger_));
+  Graph& graph = p_model->MainGraph();
+
+  std::string expected_output_name;
+  GraphViewer graphViewer(graph);
+  for (auto& node_index : graphViewer.GetNodesInTopologicalOrder()) {
+    auto& node = *graph.GetNode(node_index);
+    if (node.OpType() == "MatMul") {
+      expected_output_name = node.OutputDefs()[0]->Name();
+    }
+  }
+
+  onnxruntime::GraphTransformerManager graph_transformation_mgr{5};
+  auto rule_transformer_L1 = std::make_unique<RuleBasedGraphTransformer>("RuleTransformerL1");
+  ASSERT_STATUS_OK(rule_transformer_L1->Register(std::make_unique<MatmulBNFusion>()));
+  ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::move(rule_transformer_L1), TransformerLevel::Level1));
+
+  ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, *logger_));
+
+  std::map<std::string, int> op_to_count = CountOpsInGraph(graph);
+  ASSERT_EQ(op_to_count["BatchNormalization"], 0);
+  ASSERT_EQ(op_to_count["MatMul"], 0);
+  ASSERT_EQ(op_to_count["Gemm"], 1);
+
+  for (auto& node : graph.Nodes()) {
+    if (node.OpType() == "Gemm") {
+      ASSERT_EQ(node.OutputDefs()[0]->Name(), expected_output_name)
+          << "fusion should produce the same output name as the MatMul node";
+    }
+  }
+}
+
+TEST_F(GraphTransformationTests, FuseMatmulBNWithoutBatchNormalization) {
+  constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/fuse-matmul-bn-only-transpose.onnx";
+
+  std::shared_ptr<Model> p_model;
+  ASSERT_STATUS_OK(Model::Load(model_uri, p_model, nullptr, *logger_));
+  Graph& graph = p_model->MainGraph();
+
+  GraphViewer graphViewer(graph);
+  for (auto& node_index : graphViewer.GetNodesInTopologicalOrder()) {
+    auto& node = *graph.GetNode(node_index);
+    if (node.OpType() == "BatchNormalization") {
+      graph_utils::RemoveNode(graph, node);
+    }
+  }
+
+  onnxruntime::GraphTransformerManager graph_transformation_mgr{5};
+  auto rule_transformer_L1 = std::make_unique<RuleBasedGraphTransformer>("RuleTransformerL1");
+  ASSERT_STATUS_OK(rule_transformer_L1->Register(std::make_unique<MatmulBNFusion>()));
+  ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::move(rule_transformer_L1), TransformerLevel::Level1));
+
+  ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, *logger_));
+
+  std::map<std::string, int> op_to_count = CountOpsInGraph(graph);
+  ASSERT_EQ(op_to_count["MatMul"], 1);
+}
+
+// should not fuse
+TEST_F(GraphTransformationTests, FuseMatmulBNWithNonIgnorableNode) {
+  constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/fuse-matmul-bn-non-ignorable-node.onnx";
+
+  std::shared_ptr<Model> p_model;
+  ASSERT_STATUS_OK(Model::Load(model_uri, p_model, nullptr, *logger_));
+  Graph& graph = p_model->MainGraph();
+
+  onnxruntime::GraphTransformerManager graph_transformation_mgr{5};
+  auto rule_transformer_L1 = std::make_unique<RuleBasedGraphTransformer>("RuleTransformerL1");
+  ASSERT_STATUS_OK(rule_transformer_L1->Register(std::make_unique<MatmulBNFusion>()));
+  ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::move(rule_transformer_L1), TransformerLevel::Level1));
+
+  ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, *logger_));
+
+  std::map<std::string, int> op_to_count = CountOpsInGraph(graph);
+  ASSERT_EQ(op_to_count["BatchNormalization"], 1);
+  ASSERT_EQ(op_to_count["MatMul"], 1);
+  ASSERT_EQ(op_to_count["Gemm"], 0);
+}
+
 TEST_F(GraphTransformationTests, DontFuseConvWithBNWithOptionalOutputs) {
   constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/fuse-conv-bn-no-bias.onnx";
 
@@ -1155,7 +1906,7 @@ TEST_F(GraphTransformationTests, NotWhereFusion) {
   ASSERT_TRUE(op_to_count["Not"] == 1);  // can't remove Not if it is graph output/ has consumer that's not where
 }
 
-#if defined(USE_CUDA) && !defined(DISABLE_CONTRIB_OPS)
+#if (defined(USE_CUDA) || defined(USE_JSEP)) && !defined(DISABLE_CONTRIB_OPS)
 // Conv->Add->Relu will be transformed to FusedConv
 TEST_F(GraphTransformationTests, FuseCudaConvAddRelu) {
   constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/conv_add_relu.onnx";
@@ -1335,6 +2086,10 @@ TEST_F(GraphTransformationTests, FuseConvActivation) {
     for (auto& node : p_model->MainGraph().Nodes()) {
       node.SetExecutionProviderType(kCudaExecutionProvider);
     }
+#elif defined(USE_JSEP)
+    for (auto& node : p_model->MainGraph().Nodes()) {
+      node.SetExecutionProviderType(kJsExecutionProvider);
+    }
 #endif
     std::map<std::string, int> op_to_count_before_fusion = CountOpsInGraph(graph);
     ASSERT_TRUE(op_to_count_before_fusion[model.second] >= 1);
@@ -1349,6 +2104,13 @@ TEST_F(GraphTransformationTests, FuseConvActivation) {
     std::set<std::string> cuda_rocm_supported = {"Relu"};
     if (cuda_rocm_supported.find(model.second) == cuda_rocm_supported.end()) {
       ASSERT_EQ(op_to_count_before_fusion[model.second], op_to_count_after_fusion[model.second]);
+    } else {
+      ASSERT_EQ(op_to_count_after_fusion[model.second], 0);
+    }
+#elif defined(USE_JSEP)
+    std::set<std::string> js_supported = {"Relu", "Clip", "Sigmoid", "Tanh", "LeakyRelu"};
+    if (js_supported.find(model.second) == js_supported.end()) {
+      ASSERT_EQ(op_to_count_before_fusion[model.second], op_to_count_after_fusion[model.second]);
     } else {
       ASSERT_TRUE(op_to_count_after_fusion[model.second] == 0);
     }
@@ -6280,7 +7042,7 @@ TEST_F(GraphTransformationTests, ConstantSharing_ShouldNotShareForGraphOutput) {
 TEST_F(GraphTransformationTests, GatherToSplitFusion) {
   auto build_test_case = [&](ModelTestBuilder& builder) {
     auto* data_arg = builder.MakeInput<float>({{54}});
-    auto* shape_arg = builder.MakeInput<int64_t>({{1}});
+    auto* shape_arg = builder.MakeInput<int64_t>({{4}});
     auto* reshape_out = builder.MakeIntermediate<float>({{2, 3, 3, 3}});
     auto* gather_index_1 = builder.MakeInitializer<int64_t>({}, {static_cast<int64_t>(0)});
     auto* gather_index_2 = builder.MakeInitializer<int64_t>({}, {static_cast<int64_t>(1)});
@@ -6304,7 +7066,10 @@ TEST_F(GraphTransformationTests, GatherToSplitFusion) {
     builder.AddNode("Transpose", {gather_out_3}, {transpose_out_3}).AddAttribute("perm", std::vector<int64_t>{0, 2, 1});
   };
 
-  auto pre_graph_checker = [&](Graph& graph) { TEST_RETURN_IF_NOT(CountOpsInGraph(graph)["Gather"] == 3); return Status::OK(); };
+  auto pre_graph_checker = [&](Graph& graph) {
+    TEST_RETURN_IF_NOT(CountOpsInGraph(graph)["Gather"] == 3);
+    return Status::OK();
+  };
 
   // OpSet-12
   {
@@ -6327,8 +7092,8 @@ TEST_F(GraphTransformationTests, GatherToSplitFusion) {
     };
 
     std::unique_ptr<GraphTransformer> transformer = std::make_unique<GatherToSplitFusion>();
-    ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, 12, *logger_, std::move(transformer), TransformerLevel::Level1, 1,
-                                          pre_graph_checker, post_graph_checker));
+    ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, 12, *logger_, std::move(transformer),
+                                          TransformerLevel::Level1, 1, pre_graph_checker, post_graph_checker));
   }
 
   // OpSet-14
@@ -6356,8 +7121,8 @@ TEST_F(GraphTransformationTests, GatherToSplitFusion) {
     };
 
     std::unique_ptr<GraphTransformer> transformer = std::make_unique<GatherToSplitFusion>();
-    ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, 14, *logger_, std::move(transformer), TransformerLevel::Level1, 1,
-                                          pre_graph_checker, post_graph_checker));
+    ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, 14, *logger_, std::move(transformer),
+                                          TransformerLevel::Level1, 1, pre_graph_checker, post_graph_checker));
   }
 
   // OpSet-18
@@ -6385,15 +7150,15 @@ TEST_F(GraphTransformationTests, GatherToSplitFusion) {
     };
 
     std::unique_ptr<GraphTransformer> transformer = std::make_unique<GatherToSplitFusion>();
-    ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, 18, *logger_, std::move(transformer), TransformerLevel::Level1, 1,
-                                          pre_graph_checker, post_graph_checker));
+    ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, 18, *logger_, std::move(transformer),
+                                          TransformerLevel::Level1, 1, pre_graph_checker, post_graph_checker));
   }
 }
 
 TEST_F(GraphTransformationTests, GatherToSplitFusion_NoSqueeze) {
   auto build_test_case = [&](ModelTestBuilder& builder) {
     auto* data_arg = builder.MakeInput<float>({{54}});
-    auto* shape_arg = builder.MakeInput<int64_t>({{1}});
+    auto* shape_arg = builder.MakeInput<int64_t>({{4}});
     auto* reshape_out = builder.MakeIntermediate<float>({{2, 3, 3, 3}});
     auto* gather_index_1 = builder.MakeInitializer<int64_t>({1}, {static_cast<int64_t>(0)});
     auto* gather_index_2 = builder.MakeInitializer<int64_t>({1}, {static_cast<int64_t>(1)});
@@ -6417,7 +7182,10 @@ TEST_F(GraphTransformationTests, GatherToSplitFusion_NoSqueeze) {
     builder.AddNode("Transpose", {gather_out_3}, {transpose_out_3}).AddAttribute("perm", std::vector<int64_t>{0, 2, 1});
   };
 
-  auto pre_graph_checker = [&](Graph& graph) { TEST_RETURN_IF_NOT(CountOpsInGraph(graph)["Gather"] == 3); return Status::OK(); };
+  auto pre_graph_checker = [&](Graph& graph) {
+    TEST_RETURN_IF_NOT(CountOpsInGraph(graph)["Gather"] == 3);
+    return Status::OK();
+  };
 
   // OpSet-12
   {
@@ -6436,8 +7204,8 @@ TEST_F(GraphTransformationTests, GatherToSplitFusion_NoSqueeze) {
     };
 
     std::unique_ptr<GraphTransformer> transformer = std::make_unique<GatherToSplitFusion>();
-    ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, 12, *logger_, std::move(transformer), TransformerLevel::Level1, 1,
-                                          pre_graph_checker, post_graph_checker));
+    ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, 12, *logger_, std::move(transformer),
+                                          TransformerLevel::Level1, 1, pre_graph_checker, post_graph_checker));
   }
 
   // OpSet-14
@@ -6457,8 +7225,8 @@ TEST_F(GraphTransformationTests, GatherToSplitFusion_NoSqueeze) {
     };
 
     std::unique_ptr<GraphTransformer> transformer = std::make_unique<GatherToSplitFusion>();
-    ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, 14, *logger_, std::move(transformer), TransformerLevel::Level1, 1,
-                                          pre_graph_checker, post_graph_checker));
+    ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, 14, *logger_, std::move(transformer),
+                                          TransformerLevel::Level1, 1, pre_graph_checker, post_graph_checker));
   }
 
   // OpSet-18
@@ -6478,13 +7246,180 @@ TEST_F(GraphTransformationTests, GatherToSplitFusion_NoSqueeze) {
     };
 
     std::unique_ptr<GraphTransformer> transformer = std::make_unique<GatherToSplitFusion>();
-    ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, 18, *logger_, std::move(transformer), TransformerLevel::Level1, 1,
-                                          pre_graph_checker, post_graph_checker));
+    ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, 18, *logger_, std::move(transformer),
+                                          TransformerLevel::Level1, 1, pre_graph_checker, post_graph_checker));
+  }
+}
+
+TEST_F(GraphTransformationTests, GatherToSplitFusion_Consume_Input) {
+  auto build_test_case = [&](ModelTestBuilder& builder) {
+    auto* data_arg = builder.MakeInput<float>({{2, 3, 3, 3}});
+    auto* gather_index_1 = builder.MakeInitializer<int64_t>({}, {static_cast<int64_t>(0)});
+    auto* gather_index_2 = builder.MakeInitializer<int64_t>({}, {static_cast<int64_t>(1)});
+    auto* gather_index_3 = builder.MakeInitializer<int64_t>({}, {static_cast<int64_t>(2)});
+    auto* gather_out_1 = builder.MakeIntermediate();
+    auto* gather_out_2 = builder.MakeIntermediate();
+    auto* gather_out_3 = builder.MakeIntermediate();
+    auto* transpose_out_1 = builder.MakeOutput();
+    auto* transpose_out_2 = builder.MakeOutput();
+    auto* transpose_out_3 = builder.MakeOutput();
+
+    builder.AddNode("Gather", {data_arg, gather_index_1}, {gather_out_1}).AddAttribute("axis", static_cast<int64_t>(2));
+    builder.AddNode("Gather", {data_arg, gather_index_2}, {gather_out_2})
+        .AddAttribute("axis", static_cast<int64_t>(-2));
+    builder.AddNode("Gather", {data_arg, gather_index_3}, {gather_out_3}).AddAttribute("axis", static_cast<int64_t>(2));
+    builder.AddNode("Transpose", {gather_out_1}, {transpose_out_1}).AddAttribute("perm", std::vector<int64_t>{0, 2, 1});
+    builder.AddNode("Transpose", {gather_out_2}, {transpose_out_2}).AddAttribute("perm", std::vector<int64_t>{0, 2, 1});
+    builder.AddNode("Transpose", {gather_out_3}, {transpose_out_3}).AddAttribute("perm", std::vector<int64_t>{0, 2, 1});
+  };
+
+  auto pre_graph_checker = [&](Graph& graph) {
+    TEST_RETURN_IF_NOT(CountOpsInGraph(graph)["Gather"] == 3);
+    return Status::OK();
+  };
+
+  // OpSet-12
+  {
+    auto post_graph_checker = [&](Graph& graph) {
+      TEST_RETURN_IF_NOT(CountOpsInGraph(graph)["Gather"] == 0);
+      TEST_RETURN_IF_NOT(CountOpsInGraph(graph)["Split"] == 1);
+      TEST_RETURN_IF_NOT(CountOpsInGraph(graph)["Squeeze"] == 3);
+      for (auto& node : graph.Nodes()) {
+        if (node.OpType() == "Split") {
+          auto& attrs = node.GetAttributes();
+          TEST_RETURN_IF_NOT(attrs.find("axis") != attrs.end());
+          TEST_RETURN_IF_NOT(2 == static_cast<int>(attrs.at("axis").i()));
+        } else if (node.OpType() == "Squeeze") {
+          auto& attrs = node.GetAttributes();
+          TEST_RETURN_IF_NOT(attrs.find("axes") != attrs.end());
+          TEST_RETURN_IF_NOT(2 == static_cast<int>(attrs.at("axes").ints().at(0)));
+        }
+      }
+      return Status::OK();
+    };
+
+    std::unique_ptr<GraphTransformer> transformer = std::make_unique<GatherToSplitFusion>();
+    ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, 12, *logger_, std::move(transformer),
+                                          TransformerLevel::Level1, 1, pre_graph_checker, post_graph_checker));
+  }
+
+  // OpSet-14
+  {
+    auto post_graph_checker = [&](Graph& graph) {
+      TEST_RETURN_IF_NOT(CountOpsInGraph(graph)["Gather"] == 0);
+      TEST_RETURN_IF_NOT(CountOpsInGraph(graph)["Split"] == 1);
+      TEST_RETURN_IF_NOT(CountOpsInGraph(graph)["Squeeze"] == 3);
+      for (auto& node : graph.Nodes()) {
+        if (node.OpType() == "Split") {
+          auto& attrs = node.GetAttributes();
+          TEST_RETURN_IF_NOT(attrs.find("axis") != attrs.end());
+          TEST_RETURN_IF_NOT(2 == static_cast<int>(attrs.at("axis").i()));
+        } else if (node.OpType() == "Squeeze") {
+          const NodeArg& input_arg = *(node.InputDefs()[1]);
+          const ONNX_NAMESPACE::TensorProto* tensor_proto =
+              graph_utils::GetConstantInitializer(graph, input_arg.Name());
+          TEST_RETURN_IF_NOT(tensor_proto != nullptr);
+          Initializer init_const{*tensor_proto, graph.ModelPath()};
+          TEST_RETURN_IF_NOT(tensor_proto->data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT64);
+          TEST_RETURN_IF_NOT(2 == static_cast<int>(*(init_const.data<int64_t>())));
+        }
+      }
+      return Status::OK();
+    };
+
+    std::unique_ptr<GraphTransformer> transformer = std::make_unique<GatherToSplitFusion>();
+    ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, 14, *logger_, std::move(transformer),
+                                          TransformerLevel::Level1, 1, pre_graph_checker, post_graph_checker));
+  }
+
+  // OpSet-18
+  {
+    auto post_graph_checker = [&](Graph& graph) {
+      TEST_RETURN_IF_NOT(CountOpsInGraph(graph)["Gather"] == 0);
+      TEST_RETURN_IF_NOT(CountOpsInGraph(graph)["Split"] == 1);
+      TEST_RETURN_IF_NOT(CountOpsInGraph(graph)["Squeeze"] == 3);
+      for (auto& node : graph.Nodes()) {
+        if (node.OpType() == "Split") {
+          auto& attrs = node.GetAttributes();
+          TEST_RETURN_IF_NOT(attrs.find("axis") != attrs.end());
+          TEST_RETURN_IF_NOT(2 == static_cast<int>(attrs.at("axis").i()));
+        } else if (node.OpType() == "Squeeze") {
+          const NodeArg& input_arg = *(node.InputDefs()[1]);
+          const ONNX_NAMESPACE::TensorProto* tensor_proto =
+              graph_utils::GetConstantInitializer(graph, input_arg.Name());
+          TEST_RETURN_IF_NOT(tensor_proto != nullptr);
+          Initializer init_const{*tensor_proto, graph.ModelPath()};
+          TEST_RETURN_IF_NOT(tensor_proto->data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT64);
+          TEST_RETURN_IF_NOT(2 == static_cast<int>(*(init_const.data<int64_t>())));
+        }
+      }
+      return Status::OK();
+    };
+
+    std::unique_ptr<GraphTransformer> transformer = std::make_unique<GatherToSplitFusion>();
+    ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, 18, *logger_, std::move(transformer),
+                                          TransformerLevel::Level1, 1, pre_graph_checker, post_graph_checker));
   }
 }
 
+TEST_F(GraphTransformationTests, GatherToSplitFusion_Consume_Initializer) {
+  auto build_test_case = [&](ModelTestBuilder& builder) {
+    auto* data_arg = builder.MakeInitializer<float>({2, 3, 3, 3}, std::vector<float>(54));
+    auto* gather_index_1 = builder.MakeInitializer<int64_t>({}, {static_cast<int64_t>(0)});
+    auto* gather_index_2 = builder.MakeInitializer<int64_t>({}, {static_cast<int64_t>(1)});
+    auto* gather_index_3 = builder.MakeInitializer<int64_t>({}, {static_cast<int64_t>(2)});
+    auto* gather_out_1 = builder.MakeIntermediate();
+    auto* gather_out_2 = builder.MakeIntermediate();
+    auto* gather_out_3 = builder.MakeIntermediate();
+    auto* transpose_out_1 = builder.MakeOutput();
+    auto* transpose_out_2 = builder.MakeOutput();
+    auto* transpose_out_3 = builder.MakeOutput();
+
+    builder.AddNode("Gather", {data_arg, gather_index_1}, {gather_out_1}).AddAttribute("axis", static_cast<int64_t>(2));
+    builder.AddNode("Gather", {data_arg, gather_index_2}, {gather_out_2})
+        .AddAttribute("axis", static_cast<int64_t>(-2));
+    builder.AddNode("Gather", {data_arg, gather_index_3}, {gather_out_3}).AddAttribute("axis", static_cast<int64_t>(2));
+    builder.AddNode("Transpose", {gather_out_1}, {transpose_out_1}).AddAttribute("perm", std::vector<int64_t>{0, 2, 1});
+    builder.AddNode("Transpose", {gather_out_2}, {transpose_out_2}).AddAttribute("perm", std::vector<int64_t>{0, 2, 1});
+    builder.AddNode("Transpose", {gather_out_3}, {transpose_out_3}).AddAttribute("perm", std::vector<int64_t>{0, 2, 1});
+  };
+
+  auto pre_graph_checker = [&](Graph& graph) {
+    TEST_RETURN_IF_NOT(CountOpsInGraph(graph)["Gather"] == 3);
+    return Status::OK();
+  };
+
+  auto post_graph_checker = [&](Graph& graph) {
+    TEST_RETURN_IF_NOT(CountOpsInGraph(graph)["Gather"] == 0);
+    TEST_RETURN_IF_NOT(CountOpsInGraph(graph)["Split"] == 1);
+    TEST_RETURN_IF_NOT(CountOpsInGraph(graph)["Squeeze"] == 3);
+    for (auto& node : graph.Nodes()) {
+      if (node.OpType() == "Split") {
+        auto& attrs = node.GetAttributes();
+        TEST_RETURN_IF_NOT(attrs.find("axis") != attrs.end());
+        TEST_RETURN_IF_NOT(2 == static_cast<int>(attrs.at("axis").i()));
+      } else if (node.OpType() == "Squeeze") {
+        const NodeArg& input_arg = *(node.InputDefs()[1]);
+        const ONNX_NAMESPACE::TensorProto* tensor_proto = graph_utils::GetConstantInitializer(graph, input_arg.Name());
+        TEST_RETURN_IF_NOT(tensor_proto != nullptr);
+        Initializer init_const{*tensor_proto, graph.ModelPath()};
+        TEST_RETURN_IF_NOT(tensor_proto->data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT64);
+        TEST_RETURN_IF_NOT(2 == static_cast<int>(*(init_const.data<int64_t>())));
+      }
+    }
+    return Status::OK();
+  };
+
+  std::unique_ptr<GraphTransformer> transformer = std::make_unique<GatherToSplitFusion>();
+  ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, 14, *logger_, std::move(transformer), TransformerLevel::Level1,
+                                        1, pre_graph_checker, post_graph_checker));
+}
+
 TEST_F(GraphTransformationTests, GatherToSplitFusion_Invalid) {
-  auto pre_graph_checker = [&](Graph& graph) { TEST_RETURN_IF_NOT(CountOpsInGraph(graph)["Gather"] == 3); return Status::OK(); };
+  auto pre_graph_checker = [&](Graph& graph) {
+    TEST_RETURN_IF_NOT(CountOpsInGraph(graph)["Gather"] == 3);
+    return Status::OK();
+  };
   auto post_graph_checker = [&](Graph& graph) {
     TEST_RETURN_IF_NOT(CountOpsInGraph(graph)["Gather"] == 3);
     TEST_RETURN_IF_NOT(CountOpsInGraph(graph)["Split"] == 0);
@@ -6524,8 +7459,8 @@ TEST_F(GraphTransformationTests, GatherToSplitFusion_Invalid) {
     };
 
     std::unique_ptr<GraphTransformer> transformer = std::make_unique<GatherToSplitFusion>();
-    ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, 12, *logger_, std::move(transformer), TransformerLevel::Level1, 1,
-                                          pre_graph_checker, post_graph_checker));
+    ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, 12, *logger_, std::move(transformer),
+                                          TransformerLevel::Level1, 1, pre_graph_checker, post_graph_checker));
   }
 
   // Invalid Gather indices.
@@ -6560,8 +7495,8 @@ TEST_F(GraphTransformationTests, GatherToSplitFusion_Invalid) {
     };
 
     std::unique_ptr<GraphTransformer> transformer = std::make_unique<GatherToSplitFusion>();
-    ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, 14, *logger_, std::move(transformer), TransformerLevel::Level1, 1,
-                                          pre_graph_checker, post_graph_checker));
+    ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, 14, *logger_, std::move(transformer),
+                                          TransformerLevel::Level1, 1, pre_graph_checker, post_graph_checker));
   }
 
   // Invalid Gather axis.
@@ -6596,8 +7531,8 @@ TEST_F(GraphTransformationTests, GatherToSplitFusion_Invalid) {
     };
 
     std::unique_ptr<GraphTransformer> transformer = std::make_unique<GatherToSplitFusion>();
-    ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, 14, *logger_, std::move(transformer), TransformerLevel::Level1, 1,
-                                          pre_graph_checker, post_graph_checker));
+    ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, 14, *logger_, std::move(transformer),
+                                          TransformerLevel::Level1, 1, pre_graph_checker, post_graph_checker));
   }
 }
 
@@ -6644,8 +7579,8 @@ TEST_F(GraphTransformationTests, GatherToSliceFusion) {
     };
 
     std::unique_ptr<GraphTransformer> transformer = std::make_unique<GatherToSliceFusion>();
-    ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, 12, *logger_, std::move(transformer), TransformerLevel::Level1, 1,
-                                          pre_graph_checker, post_graph_checker));
+    ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, 12, *logger_, std::move(transformer),
+                                          TransformerLevel::Level1, 1, pre_graph_checker, post_graph_checker));
   }
 
   // OpSet-14, Tind is int64.
@@ -6683,8 +7618,8 @@ TEST_F(GraphTransformationTests, GatherToSliceFusion) {
     };
 
     std::unique_ptr<GraphTransformer> transformer = std::make_unique<GatherToSliceFusion>();
-    ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, 14, *logger_, std::move(transformer), TransformerLevel::Level1, 1,
-                                          pre_graph_checker, post_graph_checker));
+    ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, 14, *logger_, std::move(transformer),
+                                          TransformerLevel::Level1, 1, pre_graph_checker, post_graph_checker));
   }
 }
 
diff --git a/onnxruntime/test/optimizer/qdq_test_utils.h b/onnxruntime/test/optimizer/qdq_test_utils.h
index 2008d96539dca..5cb4633dadd46 100644
--- a/onnxruntime/test/optimizer/qdq_test_utils.h
+++ b/onnxruntime/test/optimizer/qdq_test_utils.h
@@ -466,11 +466,11 @@ GetQDQTestCaseFn BuildDoubleQDQWithoutLastOutput(int output_index, bool use_cont
 }
 
 template <typename InputType, typename OutputType>
-GetQDQTestCaseFn BuildQDQSplitTestCase(
-    const std::vector<int64_t>& input_shape,
-    const int64_t& axis,
-    bool use_contrib_qdq = false) {
-  return [input_shape, axis, use_contrib_qdq](ModelTestBuilder& builder) {
+GetQDQTestCaseFn BuildQDQSplitTestCase(const std::vector<int64_t>& input_shape,
+                                       const int64_t& axis,
+                                       bool use_diff_output_scale,
+                                       bool use_contrib_qdq = false) {
+  return [input_shape, axis, use_diff_output_scale, use_contrib_qdq](ModelTestBuilder& builder) {
     auto* input_arg = builder.MakeInput<InputType>(input_shape,
                                                    std::numeric_limits<InputType>::min(),
                                                    std::numeric_limits<InputType>::max());
@@ -478,16 +478,30 @@ GetQDQTestCaseFn BuildQDQSplitTestCase(
     InputType dq_zp = std::numeric_limits<InputType>::max() / 2;
     OutputType q_zp = std::numeric_limits<OutputType>::max() / 2;
     auto* dq_output = builder.MakeIntermediate();
-    builder.AddDequantizeLinearNode<InputType>(input_arg, .003f, dq_zp, dq_output, use_contrib_qdq);
+    constexpr float input_scale = 0.003f;
+    builder.AddDequantizeLinearNode<InputType>(input_arg, input_scale, dq_zp, dq_output, use_contrib_qdq);
 
     // add Split
+    std::vector<NodeArg*> split_inputs;
+    split_inputs.push_back(dq_output);
+
+    // Use the optional 'split' input when testing Split 13
+    int opset = builder.DomainToVersionMap().find(kOnnxDomain)->second;
+    if (opset >= 13 && opset < 18) {
+      int64_t dim = input_shape[axis];
+      int64_t split_size = dim / 3;
+      split_inputs.push_back(builder.Make1DInitializer(std::vector<int64_t>{split_size,
+                                                                            split_size, dim - (2 * split_size)}));
+    }
 
     auto* split_output_1 = builder.MakeIntermediate();
     auto* split_output_2 = builder.MakeIntermediate();
     auto* split_output_3 = builder.MakeIntermediate();
-    Node& split_node = builder.AddNode("Split", {dq_output}, {split_output_1, split_output_2, split_output_3});
+    Node& split_node = builder.AddNode("Split", split_inputs, {split_output_1, split_output_2, split_output_3});
     split_node.AddAttribute("axis", axis);
-    if (builder.DomainToVersionMap().find(kOnnxDomain)->second >= 18) {
+
+    // Use the 'num_outputs' attribute when testing Split >= 18
+    if (opset >= 18) {
       split_node.AddAttribute("num_outputs", static_cast<int64_t>(3));
     }
 
@@ -495,11 +509,12 @@ GetQDQTestCaseFn BuildQDQSplitTestCase(
     auto* q_split_output_1 = builder.MakeOutput();
     auto* q_split_output_2 = builder.MakeOutput();
     auto* q_split_output_3 = builder.MakeOutput();
-    builder.AddQuantizeLinearNode<OutputType>(split_output_1, .003f, q_zp, q_split_output_1,
+    float output_scale = use_diff_output_scale ? input_scale + 0.001f : input_scale;
+    builder.AddQuantizeLinearNode<OutputType>(split_output_1, output_scale, q_zp, q_split_output_1,
                                               use_contrib_qdq);  // Model input (node_token_1)
-    builder.AddQuantizeLinearNode<OutputType>(split_output_2, .003f, q_zp, q_split_output_2,
+    builder.AddQuantizeLinearNode<OutputType>(split_output_2, output_scale, q_zp, q_split_output_2,
                                               use_contrib_qdq);  // Model input (node_token_2)
-    builder.AddQuantizeLinearNode<OutputType>(split_output_3, .003f, q_zp, q_split_output_3,
+    builder.AddQuantizeLinearNode<OutputType>(split_output_3, output_scale, q_zp, q_split_output_3,
                                               use_contrib_qdq);
   };
 }
@@ -549,13 +564,30 @@ GetQDQTestCaseFn BuildQDQTransposeTestCase(
     InputType dq_zp = std::numeric_limits<InputType>::max() / 2;
     OutputType q_zp = std::numeric_limits<OutputType>::max() / 2;
 
-    // add DQ
-    auto* dq_output = builder.MakeIntermediate();
-    builder.AddDequantizeLinearNode<InputType>(input_arg, .003f, dq_zp, dq_output, use_contrib_qdq);
+    // In order to test additional EPs that are more sensitive to whether the Transpose is in a QDQ node unit or not,
+    // we need a QDQ node unit prior to DQ -> Transpose -> Q -> graph output.
+    // The transpose optimizer will push the transpose, convert its input to uint8, and drop the empty DQ -> Q.
+    // If there's a QDQ node unit prior, the scale and zp info can be read from the Q node feeding the standalone
+    // Transpose node, so we add a DQ -> Mul -> Q to provide that.
+    // Essentially eveything has worked correctly if the DQ -> Transpose -> Q becomes a single Transpose and the
+    // extra QDQ node unit simply allows some additional functionality to be tested.
+
+    // add DQ -> Mul -> Q
+    auto* dq_output_0 = builder.MakeIntermediate();
+    auto* mul_output = builder.MakeIntermediate();
+    auto* q_output_0 = builder.MakeIntermediate();
+    auto mul_by = builder.MakeInitializer<float>({1}, 2.f, 3.f);
+    builder.AddDequantizeLinearNode<InputType>(input_arg, .003f, dq_zp, dq_output_0, use_contrib_qdq);
+    builder.AddNode("Mul", {dq_output_0, mul_by}, {mul_output});
+    builder.AddQuantizeLinearNode<OutputType>(mul_output, .003f, q_zp, q_output_0, use_contrib_qdq);
+
+    // add DQ -> Transpose -> Q
+    auto* dq_output_1 = builder.MakeIntermediate();
+    builder.AddDequantizeLinearNode<InputType>(q_output_0, .003f, dq_zp, dq_output_1, use_contrib_qdq);
 
     // add Transpose
     auto* transpose_output = builder.MakeIntermediate();
-    Node& transpose_node = builder.AddNode("Transpose", {dq_output}, {transpose_output});
+    Node& transpose_node = builder.AddNode("Transpose", {dq_output_1}, {transpose_output});
     transpose_node.AddAttribute("perm", perms);
 
     // add Q
diff --git a/onnxruntime/test/optimizer/qdq_transformer_test.cc b/onnxruntime/test/optimizer/qdq_transformer_test.cc
index d3616a14d8a5d..6b0f837c14b5a 100644
--- a/onnxruntime/test/optimizer/qdq_transformer_test.cc
+++ b/onnxruntime/test/optimizer/qdq_transformer_test.cc
@@ -1187,21 +1187,32 @@ static void RunDoubleQDQWithoutLastNodeBeingOutput(int output_index, int expecte
 TEST(QDQTransformerTests, DoubleQDQ_Without_Last_Node_Being_Output) {
   constexpr bool use_contrib_qdq = true;  // For readability.
 
-  RunDoubleQDQWithoutLastNodeBeingOutput<uint8_t>(0, 2, 2);
-  RunDoubleQDQWithoutLastNodeBeingOutput<uint8_t>(0, 2, 2, use_contrib_qdq);
-  RunDoubleQDQWithoutLastNodeBeingOutput<uint16_t>(0, 2, 2, use_contrib_qdq);
-  RunDoubleQDQWithoutLastNodeBeingOutput<int16_t>(0, 2, 2, use_contrib_qdq);
-
-  // EnsureUniqueDQForNodeUnit will duplicate first DQ, so expected one more (3)
-  RunDoubleQDQWithoutLastNodeBeingOutput<uint8_t>(1, 2, 3);
-  RunDoubleQDQWithoutLastNodeBeingOutput<uint8_t>(1, 2, 3, use_contrib_qdq);
-  RunDoubleQDQWithoutLastNodeBeingOutput<uint16_t>(1, 2, 3, use_contrib_qdq);
-  RunDoubleQDQWithoutLastNodeBeingOutput<int16_t>(1, 2, 3, use_contrib_qdq);
+  // the first node being a graph output doesn't prevent the DQ -> Q in the middle from being removed
+  // if they have matching type/scale/zp
+  // Q -> DQ -> Q -> DQ
+  //  `-> graph output
+  RunDoubleQDQWithoutLastNodeBeingOutput<uint8_t>(0, 1, 1);
+  RunDoubleQDQWithoutLastNodeBeingOutput<uint8_t>(0, 1, 1, use_contrib_qdq);
+  RunDoubleQDQWithoutLastNodeBeingOutput<uint16_t>(0, 1, 1, use_contrib_qdq);
+  RunDoubleQDQWithoutLastNodeBeingOutput<int16_t>(0, 1, 1, use_contrib_qdq);
+
+  // EnsureUniqueDQForNodeUnit will duplicate first DQ, but after that the DQ -> Q in the middle can still be removed
+  // leaveing one Q and 2 DQ.
+  // Q -> DQ -> Q -> DQ
+  //       `-> graph output
+  // =>
+  // Q -> DQ -> Q -> DQ
+  //  `-> DQ -> graph output
+  RunDoubleQDQWithoutLastNodeBeingOutput<uint8_t>(1, 1, 2);
+  RunDoubleQDQWithoutLastNodeBeingOutput<uint8_t>(1, 1, 2, use_contrib_qdq);
+  RunDoubleQDQWithoutLastNodeBeingOutput<uint16_t>(1, 1, 2, use_contrib_qdq);
+  RunDoubleQDQWithoutLastNodeBeingOutput<int16_t>(1, 1, 2, use_contrib_qdq);
 
   RunDoubleQDQWithoutLastNodeBeingOutput<uint8_t>(2, 2, 2);
   RunDoubleQDQWithoutLastNodeBeingOutput<uint8_t>(2, 2, 2, use_contrib_qdq);
   RunDoubleQDQWithoutLastNodeBeingOutput<uint16_t>(2, 2, 2, use_contrib_qdq);
 
+  // last node being a graph output doesn't prevent the DQ -> Q in the middle from being removed
   RunDoubleQDQWithoutLastNodeBeingOutput<uint8_t>(3, 1, 1);
   RunDoubleQDQWithoutLastNodeBeingOutput<uint8_t>(3, 1, 1, use_contrib_qdq);
   RunDoubleQDQWithoutLastNodeBeingOutput<uint16_t>(3, 1, 1, use_contrib_qdq);
@@ -1210,27 +1221,51 @@ TEST(QDQTransformerTests, DoubleQDQ_Without_Last_Node_Being_Output) {
 // Runs a test that checks if DQ -> Split -> Q (many) is replaced with just Split.
 template <typename InputQType, typename OutputQType>
 static void RunDropSplitQDQTestCase(const std::vector<int64_t>& input_shape, int64_t axis,
-                                    bool use_contrib_qdq = false) {
-  auto check_graph = [use_contrib_qdq](InferenceSessionWrapper& session) {
+                                    bool all_same_quant_params, bool use_contrib_qdq = false) {
+  auto check_graph = [all_same_quant_params, use_contrib_qdq](InferenceSessionWrapper& session) {
     auto op_to_count = CountOpsInGraph(session.GetGraph());
     const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq);
+    int expected_q_ops = all_same_quant_params ? 0 : 3;
+    int expected_dq_ops = all_same_quant_params ? 0 : 1;
     EXPECT_EQ(op_to_count["Split"], 1);
-    EXPECT_EQ(op_to_count[qdq_keys.quantize_linear], 0);
-    EXPECT_EQ(op_to_count[qdq_keys.dequantize_linear], 0);
+    EXPECT_EQ(op_to_count[qdq_keys.quantize_linear], expected_q_ops);
+    EXPECT_EQ(op_to_count[qdq_keys.dequantize_linear], expected_dq_ops);
   };
-  TransformerTester(BuildQDQSplitTestCase<InputQType, OutputQType>(input_shape, axis, use_contrib_qdq),
+  TransformerTester(BuildQDQSplitTestCase<InputQType, OutputQType>(input_shape, axis, !all_same_quant_params,
+                                                                   use_contrib_qdq),
                     check_graph,
                     TransformerLevel::Level1,
                     TransformerLevel::Level2,
-                    {12, 18, 19});
+                    {12, 13, 18, 19});  // Test different ways to specify the split in each opset:
+                                        // 12 - split into equal parts without explicit 'split' attribute
+                                        // 13 - use optional 'split' input to split into 3 parts
+                                        // 18 - use 'num_outputs' attribute to split into 3 parts
+                                        // 19 - use 'num_outputs' attribute to split into 3 parts
 }
 
 // Test that DQ -> Split -> Q (many) is replaced with just Split for various quantization types.
 TEST(QDQTransformerTests, Split) {
-  RunDropSplitQDQTestCase<int8_t, int8_t>({6, 18, 54}, 0);
-  RunDropSplitQDQTestCase<int8_t, int8_t>({6, 18, 54}, 0, true);      // Use com.microsoft int8 QDQ ops
-  RunDropSplitQDQTestCase<int16_t, int16_t>({6, 18, 54}, 0, true);    // Use com.microsoft int16 QDQ ops
-  RunDropSplitQDQTestCase<uint16_t, uint16_t>({6, 18, 54}, 0, true);  // Use com.microsoft uint16 QDQ ops
+  // Test cases that drop Q/DQ ops from DQ -> Split -> Q (many).
+  // This happens when all the Q/DQ ops have equal and constant quantization parameters.
+  {
+    constexpr bool ALL_SAME_QUANT_PARAMS = true;
+    constexpr bool USE_CONTRIB_QDQ_OPS = true;
+    RunDropSplitQDQTestCase<int8_t, int8_t>({6, 18, 54}, 0, ALL_SAME_QUANT_PARAMS);
+    RunDropSplitQDQTestCase<int8_t, int8_t>({6, 18, 54}, 0, ALL_SAME_QUANT_PARAMS, USE_CONTRIB_QDQ_OPS);
+    RunDropSplitQDQTestCase<int16_t, int16_t>({6, 18, 54}, 0, ALL_SAME_QUANT_PARAMS, USE_CONTRIB_QDQ_OPS);
+    RunDropSplitQDQTestCase<uint16_t, uint16_t>({6, 18, 54}, 0, ALL_SAME_QUANT_PARAMS, USE_CONTRIB_QDQ_OPS);
+  }
+
+  // Test cases that DO NOT drop Q/DQ ops from DQ -> Split -> Q (many)
+  // This happens when the Q/DQ ops do not have equal and constant quantization parameters.
+  {
+    constexpr bool DIFF_QUANT_PARAMS = false;
+    constexpr bool USE_CONTRIB_QDQ_OPS = true;
+    RunDropSplitQDQTestCase<int8_t, int8_t>({6, 18, 54}, 0, DIFF_QUANT_PARAMS);
+    RunDropSplitQDQTestCase<int8_t, int8_t>({6, 18, 54}, 0, DIFF_QUANT_PARAMS, USE_CONTRIB_QDQ_OPS);
+    RunDropSplitQDQTestCase<int16_t, int16_t>({6, 18, 54}, 0, DIFF_QUANT_PARAMS, USE_CONTRIB_QDQ_OPS);
+    RunDropSplitQDQTestCase<uint16_t, uint16_t>({6, 18, 54}, 0, DIFF_QUANT_PARAMS, USE_CONTRIB_QDQ_OPS);
+  }
 }
 
 // Because split isn't one the supported ops, this will stay the same
@@ -1296,12 +1331,15 @@ TEST(QDQTransformerTests, Where) {
 template <typename QuantType>
 static void RunDropQDQTransposeTestCase(const std::vector<int64_t>& input_shape, const std::vector<int64_t>& perms,
                                         bool use_contrib_qdq = false) {
+  // model has DQ -> Mul -> Q -> DQ -> Transpose -> Q -> output
+  // post transform and optimization it should be DQ -> Mul -> Q -> Transpose(uint8) -> output
   auto check_graph = [&](InferenceSessionWrapper& session) {
     auto op_to_count = CountOpsInGraph(session.GetGraph());
     const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq);
     EXPECT_EQ(op_to_count["Transpose"], 1);
-    EXPECT_EQ(op_to_count[qdq_keys.quantize_linear], 0);
-    EXPECT_EQ(op_to_count[qdq_keys.dequantize_linear], 0);
+    EXPECT_EQ(op_to_count["Mul"], 1);
+    EXPECT_EQ(op_to_count[qdq_keys.quantize_linear], 1);
+    EXPECT_EQ(op_to_count[qdq_keys.dequantize_linear], 1);
   };
 
   TransformerTester(BuildQDQTransposeTestCase<QuantType, QuantType>(input_shape, perms, use_contrib_qdq),
@@ -3068,29 +3106,54 @@ TEST(QDQTransformerTests, QDQPropagation_Per_Layer_No_Propagation) {
       transpose_node.AddAttribute("perm", perms);
     };
 
+    bool use_transpose_optimizer = false;
+
     auto check_graph = [&](InferenceSessionWrapper& session) {
-      // transpose optimization will change the order of the nodes,
-      // but as we're testing there's no propagation of the DQ what matters is the op counts.
-      auto op_counts = CountOpsInGraph(session.GetGraph());
       const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq);
-      EXPECT_EQ(op_counts[qdq_keys.dequantize_linear], 1);
-      EXPECT_EQ(op_counts["Transpose"], 1);
+
+      // if the transpose optimizer isn't used the DQ doesn't propagate past the Transpose
+      // TODO: Should it? It makes it easier for an EP to do a quantized Tranpose if it's in a QDQ node unit as it
+      // doesn't have to special-case looking for a solo Transpose.
+      std::vector<std::string> expected_op_types_in_order{qdq_keys.dequantize_linear,
+                                                          "Transpose"};
+      if (use_transpose_optimizer) {
+        // fixup of QDQ node units would have put the Transpose in a QDQ node unit for consistency IFF
+        // the scale and zero point inputs are constant (which they are here)
+        expected_op_types_in_order.push_back(qdq_keys.quantize_linear);
+        expected_op_types_in_order.push_back(qdq_keys.dequantize_linear);
+      }
+
+      const auto op_types_in_order = GetNodeOpTypesInTopologicalOrder(session.GetGraph(), true);
+      EXPECT_EQ(op_types_in_order, expected_op_types_in_order);
+
+      if (use_transpose_optimizer) {
+        // the trailing Q/DQ should have updated axis based on the transpose. default axis of 1 moves to 3 with
+        // transpose of {0,2,3,1} (NCHW -> NHWC)
+        GraphViewer graph_viewer{session.GetGraph()};
+        const auto& ordered_nodes = graph_viewer.GetNodesInTopologicalOrder();
+        const auto& q_node = *graph_viewer.GetNode(ordered_nodes.back() - 1);
+        const auto& dq_node = *graph_viewer.GetNode(ordered_nodes.back());
+
+        EXPECT_EQ(graph_utils::GetNodeAttribute(q_node, std::string("axis"))->i(), 3);
+        EXPECT_EQ(graph_utils::GetNodeAttribute(dq_node, std::string("axis"))->i(), 3);
+      }
     };
 
-    TransformerTester(build_test_case,
-                      check_graph,
-                      TransformerLevel::Default,
-                      TransformerLevel::Level1);
-    TransformerTester(build_test_case,
-                      check_graph,
-                      TransformerLevel::Default,
-                      TransformerLevel::Level1,
-                      18);  // disable TransposeOptimizer for simplicity
-    TransformerTester(build_test_case,
-                      check_graph,
-                      TransformerLevel::Default,
-                      TransformerLevel::Level1,
-                      19);  // disable TransposeOptimizer for simplicity
+    auto run_test = [&](int opset) {
+      use_transpose_optimizer = true;
+      TransformerTester(build_test_case, check_graph, TransformerLevel::Level1, TransformerLevel::Level2, opset);
+
+      use_transpose_optimizer = false;
+      TransformerTester(build_test_case, check_graph, TransformerLevel::Level1, TransformerLevel::Level2, opset,
+                        // defaults that we're not overriding
+                        0.0, 0.0, nullptr, {},
+                        // disable generic L1 and CPU EP specific L2 TransposeOptimizer
+                        {"TransposeOptimizer", std::string("TransposeOptimizer_") + kCpuExecutionProvider});
+    };
+
+    run_test(12);
+    run_test(18);
+    run_test(19);
   };
 
   test_case({1, 13, 13, 23}, {0, 2, 3, 1}, false /*use_contrib_qdq*/);
@@ -3293,10 +3356,9 @@ TEST(QDQTransformerTests, QDQPropagation_GH11605_Opset12_19) {
     // Original: DQ -> Tr -> SoftM -> Tr
     // QDQ Prop inserts a Q/DQ pair to create a QDQ node group for the Transpose: DQ -> Tr -> Q -> DQ -> SoftM -> Tr
     // Transpose opt phase 1 moves the Tr down until it blocks on the SoftMax: DQ -> Q -> DQ -> Tr -> SoftM -> Tr
-    // Transpose opt phase 2 flips the Tr to prior to the DQ as it's not part of a QDQ node group at that point, as
-    // running the transpose on 8-bit data should be cheaper: DQ -> Q -> Tr -> DQ -> SoftM -> Tr
-    // QDQ cleanup in Level2 removes the unnecessary DQ/Q pair at the start: Tr -> DQ -> SoftM -> Tr
-    // this is the optimal result as the Transpose is using 8-bit data and we have no surplus Q/DQ pairs
+    // Transpose opt phase 2 repairs the QDQ node units: DQ -> Q -> DQ -> Tr -> Q -> DQ -> SoftM -> TR
+    // and removes the unnecessary DQ/Q pair at the start: DQ -> Tr -> Q -> DQ -> SoftM -> Tr
+    // The L2 CPU EP QDQ handling converts the DQ -> Tr -> Q to a Transpose with 8-bit data.
     auto check_graph = [&](InferenceSessionWrapper& session) {
       const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq);
       std::vector<std::string> expected_op_types_in_order{
@@ -3305,8 +3367,13 @@ TEST(QDQTransformerTests, QDQPropagation_GH11605_Opset12_19) {
           "Softmax",
           "Transpose"};
 
-      const auto op_types_in_order = GetNodeOpTypesInTopologicalOrder(session.GetGraph(), true);
+      const auto& graph = session.GetGraph();
+      GraphViewer graph_viewer(graph);
+      const auto op_types_in_order = GetNodeOpTypesInTopologicalOrder(graph, true);
       EXPECT_EQ(op_types_in_order, expected_op_types_in_order);
+
+      auto first_node = graph_viewer.GetNode(graph_viewer.GetNodesInTopologicalOrder().front());
+      EXPECT_EQ(*first_node->InputDefs()[0]->Type(), "tensor(uint8)");
     };
 
     TransformerTester(build_test_case,
diff --git a/onnxruntime/test/optimizer/transpose_optimizer_test.cc b/onnxruntime/test/optimizer/transpose_optimizer_test.cc
index 1f4c499985ad0..a1649f9e6b588 100644
--- a/onnxruntime/test/optimizer/transpose_optimizer_test.cc
+++ b/onnxruntime/test/optimizer/transpose_optimizer_test.cc
@@ -12,6 +12,9 @@
 #include "core/graph/node_attr_utils.h"
 #include "core/framework/op_node_proto_helper.h"
 #include "core/framework/utils.h"
+#include "core/optimizer/transpose_optimization/onnx_transpose_optimization.h"
+#include "core/optimizer/transpose_optimization/optimizer_api.h"
+#include "core/optimizer/transpose_optimization/ort_optimizer_utils.h"
 #include "core/session/onnxruntime_session_options_config_keys.h"
 
 #include "test/test_environment.h"
@@ -19,6 +22,7 @@
 #include "test/providers/internal_testing/internal_testing_execution_provider.h"
 #include "test/util/include/asserts.h"
 #include "test/util/include/inference_session_wrapper.h"
+#include "test/util/include/test_utils.h"
 
 namespace onnxruntime {
 namespace test {
@@ -316,20 +320,6 @@ TEST(TransposeOptimizerTests, TestPadNonconst) {
                     /*opset_version*/ {11, 18});
 }
 
-// The CUDA Resize kernel assumes that the input is NCHW and
-// Resize can't be supported in ORT builds with CUDA enabled.
-// TODO: Enable this once the CUDA Resize kernel is implemented
-// "generically" (i.e.) aligning with the generic nature of the
-// ONNX spec.
-// See https://github.com/microsoft/onnxruntime/pull/10824 for
-// a similar fix applied to the CPU Resize kernel.
-// Per tests included in #10824, the ROCM EP also generates
-// incorrect results when this handler is used, so the Resize
-// handler is not enabled even for those builds.
-//
-// The QNN EP requires the input to be NHWC, so the Resize handler is also not enabled
-// for QNN builds.
-#if !defined(USE_CUDA) && !defined(USE_ROCM) && !defined(USE_QNN)
 TEST(TransposeOptimizerTests, TestResize) {
   auto build_test_case_1 = [&](ModelTestBuilder& builder) {
     auto* input0_arg = MakeInput<float>(builder, {{4, -1, 2, -1}}, {4, 6, 2, 10}, 0.0, 1.0);
@@ -358,7 +348,9 @@ TEST(TransposeOptimizerTests, TestResize) {
   TransformerTester(build_test_case_1,
                     check_optimized_graph_1,
                     TransformerLevel::Default,
-                    TransformerLevel::Level1,
+                    // need the level 2 TransposeOptimizer as pushing a Transpose through a Resize requires it to be
+                    // assigned to the CPU EP first
+                    TransformerLevel::Level2,
                     /*opset_version*/ {10, 18});
 }
 
@@ -386,7 +378,9 @@ TEST(TransposeOptimizerTests, TestResizeOpset11) {
   TransformerTester(build_test_case_1,
                     check_optimized_graph_1,
                     TransformerLevel::Default,
-                    TransformerLevel::Level1,
+                    // need the level 2 TransposeOptimizer as pushing a Transpose through a Resize requires it to be
+                    // assigned to the CPU EP first
+                    TransformerLevel::Level2,
                     /*opset_version*/ {11, 18});
 }
 
@@ -414,7 +408,9 @@ TEST(TransposeOptimizerTests, TestResizeOpset15) {
   TransformerTester(build_test_case_1,
                     check_optimized_graph_1,
                     TransformerLevel::Default,
-                    TransformerLevel::Level1,
+                    // need the level 2 TransposeOptimizer as pushing a Transpose through a Resize requires it to be
+                    // assigned to the CPU EP first
+                    TransformerLevel::Level2,
                     /*opset_version*/ {15, 18});
 }
 
@@ -444,7 +440,9 @@ TEST(TransposeOptimizerTests, TestResizeSizeRoi) {
   TransformerTester(build_test_case_1,
                     check_optimized_graph_1,
                     TransformerLevel::Default,
-                    TransformerLevel::Level1,
+                    // need the level 2 TransposeOptimizer as pushing a Transpose through a Resize requires it to be
+                    // assigned to the CPU EP first
+                    TransformerLevel::Level2,
                     /*opset_version*/ {15, 18});
 }
 
@@ -478,7 +476,9 @@ TEST(TransposeOptimizerTests, TestResizeRoiScalesZeroRank0) {
   TransformerTester(build_test_case_1,
                     check_optimized_graph_1,
                     TransformerLevel::Default,
-                    TransformerLevel::Level1,
+                    // need the level 2 TransposeOptimizer as pushing a Transpose through a Resize requires it to be
+                    // assigned to the CPU EP first
+                    TransformerLevel::Level2,
                     {12, 18});
 }
 
@@ -507,7 +507,9 @@ TEST(TransposeOptimizerTests, TestResizeNonconst) {
   TransformerTester(build_test_case_1,
                     check_optimized_graph_1,
                     TransformerLevel::Default,
-                    TransformerLevel::Level1,
+                    // need the level 2 TransposeOptimizer as pushing a Transpose through a Resize requires it to be
+                    // assigned to the CPU EP first
+                    TransformerLevel::Level2,
                     /*opset_version*/ {11, 18});
 }
 
@@ -536,11 +538,12 @@ TEST(TransposeOptimizerTests, TestResizeNonconstOpset13) {
   TransformerTester(build_test_case_1,
                     check_optimized_graph_1,
                     TransformerLevel::Default,
-                    TransformerLevel::Level1,
+                    // need the level 2 TransposeOptimizer as pushing a Transpose through a Resize requires it to be
+                    // assigned to the CPU EP first
+                    TransformerLevel::Level2,
                     /*opset_version*/ {13, 18});
 }
 
-#endif
 TEST(TransposeOptimizerTests, TestAdd) {
   auto build_test_case_1 = [&](ModelTestBuilder& builder) {
     auto* input0_arg = builder.MakeInput<float>({4, 6, 10}, 0.0, 1.0);
@@ -3739,66 +3742,6 @@ TEST(TransposeOptimizerTests, TestDequantizeLinearNoAxis) {
 #endif
 }
 
-// Utility function that runs TransformerTester for the graph in which a single DequantizeLinear node is
-// the parent of two Transpose nodes. The DQ should be duplicated by EnsureUniqueDQForNodeUnit, and the
-// Transposes should be pushed.
-template <typename QuantType>
-static void RunDequantizeLinearTransposePropagationTestCase(const std::string& dq_domain = "") {
-  auto build_test_case = [dq_domain](ModelTestBuilder& builder) {
-    auto* input0_arg = MakeInput<QuantType>(builder, {{2, -1, 6, 3}}, {2, 4, 6, 3}, 0, 5);
-    auto* scale_arg = MakeInput<float>(builder, {std::vector<int64_t>{}}, std::vector<int64_t>{}, {2.3f});
-    auto* zero_point_arg = MakeInput<QuantType>(builder, {std::vector<int64_t>{}}, std::vector<int64_t>{}, {10});
-    auto* dequantizelinear_1_out_0 = builder.MakeIntermediate();
-    auto* transpose_1_out_0 = builder.MakeOutput();
-    auto* transpose_2_out_0 = builder.MakeOutput();
-
-    builder.AddNode("DequantizeLinear", {input0_arg, scale_arg, zero_point_arg}, {dequantizelinear_1_out_0},
-                    dq_domain);
-
-    auto& transpose_1 = builder.AddNode("Transpose", {dequantizelinear_1_out_0}, {transpose_1_out_0});
-    transpose_1.AddAttribute("perm", std::vector<int64_t>{0, 3, 1, 2});
-
-    auto& transpose_2 = builder.AddNode("Transpose", {dequantizelinear_1_out_0}, {transpose_2_out_0});
-    transpose_2.AddAttribute("perm", std::vector<int64_t>{0, 2, 3, 1});
-  };
-
-  auto check_graph = [dq_domain](InferenceSessionWrapper& session) {
-    const auto& graph = session.GetGraph();
-
-    const char* dq_count_key = (dq_domain == kMSDomain) ? "com.microsoft.DequantizeLinear" : "DequantizeLinear";
-    const auto op_count = CountOpsInGraph(graph);
-    decltype(op_count) expected_op_count{
-        {dq_count_key, 2},  // EnsureUniqueDQForNodeUnit should duplicate the original DQ
-        {"Transpose", 2},
-    };
-    ASSERT_EQ(op_count, expected_op_count);
-
-    // Transposes should be pushed, so check for Transpose -> DQ edges
-    for (const auto& node : graph.Nodes()) {
-      if (node.OpType() == "Transpose") {
-        ASSERT_EQ(node.GetOutputEdgesCount(), static_cast<size_t>(1));
-        ASSERT_EQ(node.OutputEdgesBegin()->GetNode().OpType(), "DequantizeLinear");
-      }
-    }
-  };
-
-  TransformerTester(build_test_case,
-                    check_graph,
-                    TransformerLevel::Default,
-                    TransformerLevel::Level1,
-                    /*opset_version*/ 10);
-}
-
-TEST(TransposeOptimizerTests, TestDequantizeLinearTransposePropagation) {
-  RunDequantizeLinearTransposePropagationTestCase<uint8_t>();
-#if !defined(DISABLE_CONTRIB_OPS)
-  // Use com.microsoft.DequantizeLinear
-  RunDequantizeLinearTransposePropagationTestCase<uint8_t>(kMSDomain);
-  RunDequantizeLinearTransposePropagationTestCase<uint16_t>(kMSDomain);
-  RunDequantizeLinearTransposePropagationTestCase<int16_t>(kMSDomain);
-#endif
-}
-
 TEST(TransposeOptimizerTests, TestCast) {
   auto build_test_case_1 = [&](ModelTestBuilder& builder) {
     auto* input0_arg = MakeInput<int32_t>(builder, {{-1, 4, -1, 5}}, {2, 4, 6, 5}, -1, 5);
@@ -4395,9 +4338,9 @@ TEST(TransposeOptimizerTests, RegressionTest_GitHubIssue9671) {
 
   SessionOptions so;
   so.session_logid = "TransposeOptimizerTests.RegressionTest_GitHubIssue9671";
-  InferenceSession session_object{so, GetEnvironment()};
-  ASSERT_STATUS_OK(session_object.Load(model_uri));
-  ASSERT_STATUS_OK(session_object.Initialize());  // optimizers run during initialization
+  InferenceSession session{so, GetEnvironment()};
+  ASSERT_STATUS_OK(session.Load(model_uri));
+  ASSERT_STATUS_OK(session.Initialize());  // optimizers run during initialization
 }
 
 // regression test for a model where the transpose optimizations incorrectly removed a node providing an implicit
@@ -4409,9 +4352,9 @@ TEST(TransposeOptimizerTests, RegressionTest_GitHubIssue10305) {
 
   SessionOptions so;
   so.session_logid = "TransposeOptimizerTests.RegressionTest_GitHubIssue10305";
-  InferenceSession session_object{so, GetEnvironment()};
-  ASSERT_STATUS_OK(session_object.Load(model_uri));
-  ASSERT_STATUS_OK(session_object.Initialize());  // optimizers run during initialization
+  InferenceSession session{so, GetEnvironment()};
+  ASSERT_STATUS_OK(session.Load(model_uri));
+  ASSERT_STATUS_OK(session.Initialize());  // optimizers run during initialization
 }
 
 // regression test for a model with DQ node with per-axis dequantization followed by a Transpose.
@@ -4432,30 +4375,31 @@ TEST(TransposeOptimizerTests, RegressionTest_GitHubIssue12151) {
 
   {
     so.graph_optimization_level = TransformerLevel::Default;  // off
-    InferenceSession session_object{so, GetEnvironment()};
-    ASSERT_STATUS_OK(session_object.Load(model_uri));
-    ASSERT_STATUS_OK(session_object.Initialize());
-    ASSERT_STATUS_OK(session_object.Run(feeds, output_names, &fetches_orig));
+    InferenceSession session{so, GetEnvironment()};
+    ASSERT_STATUS_OK(session.Load(model_uri));
+    ASSERT_STATUS_OK(session.Initialize());
+    ASSERT_STATUS_OK(session.Run(feeds, output_names, &fetches_orig));
   }
 
   {
     so.graph_optimization_level = TransformerLevel::Level1;  // enable transpose optimizer
-    InferenceSession session_object{so, GetEnvironment()};
-    ASSERT_STATUS_OK(session_object.Load(model_uri));
-    ASSERT_STATUS_OK(session_object.Initialize());
-    ASSERT_STATUS_OK(session_object.Run(feeds, output_names, &fetches));
+    InferenceSession session{so, GetEnvironment()};
+    ASSERT_STATUS_OK(session.Load(model_uri));
+    ASSERT_STATUS_OK(session.Initialize());
+    ASSERT_STATUS_OK(session.Run(feeds, output_names, &fetches));
   }
 
   ASSERT_THAT(fetches_orig[0].Get<Tensor>().DataAsSpan<float>(),
               testing::ContainerEq(fetches[0].Get<Tensor>().DataAsSpan<float>()));
 }
 
+// These tests uses internal testing EP with static kernels which requires a full build,
+// and the NHWC Conv which requires contrib ops
+#if !defined(ORT_MINIMAL_BUILD) && !defined(DISABLE_CONTRIB_OPS)
+
 // Test a Transpose node followed by a Reshape that is logically equivalent to an Transpose can be merged.
 // The test graph was extracted from a model we were trying to use with the QNN EP.
 TEST(TransposeOptimizerTests, QnnTransposeReshape) {
-  // test uses internal testing EP with static kernels which requires a full build,
-  // and the NHWC Conv with requires contrib ops
-#if !defined(ORT_MINIMAL_BUILD) && !defined(DISABLE_CONTRIB_OPS)
   Status status;
   auto model_uri = ORT_TSTR("testdata/layout_transform_reshape.onnx");
 
@@ -4497,14 +4441,17 @@ TEST(TransposeOptimizerTests, QnnTransposeReshape) {
   for (const auto& node : graph.Nodes()) {
     EXPECT_TRUE(node.GetExecutionProviderType() == expected_ep) << node.OpType() << " node named '" << node.Name()
                                                                 << "' was not assigned to the internal testing EP.";
+
+    if (node.Name() == "Mul_212" || node.Name() == "Add_213") {
+      // check that the special case in TransposeInputs for a single element input reconnects things back up correctly
+      const auto& inputs = node.InputDefs();
+      EXPECT_EQ(inputs.size(), size_t(2));
+      EXPECT_TRUE(inputs[1]->Exists());
+    }
   }
-#endif
 }
 
 TEST(TransposeOptimizerTests, QnnTransposeReshapeQDQ) {
-  // test uses internal testing EP with static kernels which requires a full build,
-  // and the NHWC Conv with requires contrib ops
-#if !defined(ORT_MINIMAL_BUILD) && !defined(DISABLE_CONTRIB_OPS)
   Status status;
   auto model_uri = ORT_TSTR("testdata/layout_transform_reshape.qdq.onnx");
 
@@ -4541,7 +4488,269 @@ TEST(TransposeOptimizerTests, QnnTransposeReshapeQDQ) {
     EXPECT_TRUE(node.GetExecutionProviderType() == expected_ep) << node.OpType() << " node named '" << node.Name()
                                                                 << "' was not assigned to the internal testing EP.";
   }
-#endif
+}
+
+// Validate handling for EP with layout specific Resize that prefers NHWC
+TEST(TransposeOptimizerTests, QnnResizeOpset11) {
+  Status status;
+  auto model_uri = ORT_TSTR("testdata/nhwc_resize_scales_opset11.onnx");
+
+  SessionOptions so;
+  // Uncomment to debug
+  // ASSERT_STATUS_OK(so.config_options.AddConfigEntry(kDebugLayoutTransformation, "1"));
+
+  using InternalTestingEP = onnxruntime::internal_testing_ep::InternalTestingExecutionProvider;
+
+  // set the test EP to support all ops in the model so that the layout transform applies to all nodes
+  const std::unordered_set<std::string> empty_set;
+  auto internal_testing_ep = std::make_unique<InternalTestingEP>(empty_set, empty_set, DataLayout::NHWC);
+  internal_testing_ep->EnableStaticKernels().TakeAllNodes();
+
+  InferenceSessionWrapper session{so, GetEnvironment()};
+  ASSERT_STATUS_OK(session.RegisterExecutionProvider(std::move(internal_testing_ep)));
+  ASSERT_STATUS_OK(session.Load(model_uri));
+  ASSERT_STATUS_OK(session.Initialize());
+
+  const auto& graph = session.GetGraph();
+  // all nodes should be assigned to the internal testing EP, which also means they should be in NHWC layout
+  std::string expected_ep(onnxruntime::utils::kInternalTestingExecutionProvider);
+  for (const auto& node : graph.Nodes()) {
+    EXPECT_TRUE(node.GetExecutionProviderType() == expected_ep) << node.OpType() << " node named '" << node.Name()
+                                                                << "' was not assigned to the internal testing EP.";
+    if (node.OpType() == "Resize") {
+      EXPECT_EQ(node.Domain(), kMSInternalNHWCDomain) << "Resize was not converted to NHWC layout";
+    }
+  }
+
+  std::map<std::string, int> op_to_count = CountOpsInGraph(graph);
+  ASSERT_EQ(op_to_count["Transpose"], 2) << "Resize should have been wrapped in 2 Transpose nodes to convert to NHWC";
+
+  // And the post-Resize Transpose should have been pushed all the way to the end
+  GraphViewer viewer(graph);
+  EXPECT_EQ(graph.GetNode(viewer.GetNodesInTopologicalOrder().back())->OpType(), "Transpose");
+}
+#endif  // !defined(ORT_MINIMAL_BUILD) && !defined(DISABLE_CONTRIB_OPS)
+
+static void CheckSharedInitializerHandling(bool broadcast) {
+  auto model_uri = broadcast ? ORT_TSTR("testdata/transpose_optimizer_shared_initializers_broadcast.onnx")
+                             : ORT_TSTR("testdata/transpose_optimizer_shared_initializers.onnx");
+
+  RandomValueGenerator random{123};
+  std::vector<int64_t> input_dims{1, 2, 2, 3};
+  std::vector<float> input_data = random.Gaussian<float>(input_dims, 0.0f, 1.0f);
+
+  OrtValue input;
+  CreateMLValue<float>(TestCPUExecutionProvider()->CreatePreferredAllocators()[0], input_dims, input_data, &input);
+
+  NameMLValMap feeds{{"input0", input}};
+
+  std::vector<std::string> output_names{"output0"};
+  std::vector<OrtValue> fetches_orig;
+  std::vector<OrtValue> fetches;
+
+  SessionOptions so;
+  ASSERT_STATUS_OK(so.config_options.AddConfigEntry(kOrtSessionOptionsDisableQuantQDQ, "1"));
+
+  // get results with no modifications to the model
+  {
+    so.graph_optimization_level = TransformerLevel::Default;  // off
+    InferenceSessionWrapper session{so, GetEnvironment()};
+    ASSERT_STATUS_OK(session.Load(model_uri));
+    ASSERT_STATUS_OK(session.Initialize());
+    ASSERT_STATUS_OK(session.Run(feeds, output_names, &fetches_orig));
+  }
+
+  {
+    InferenceSessionWrapper session{so, GetEnvironment()};
+    ASSERT_STATUS_OK(session.Load(model_uri));
+
+    // we call the ONNX transpose optimizer directly to simplify the model required to exercise the shared initializer
+    // handling. this means we don't need to disable optimizers that might alter the graph before the
+    // transpose optimizer runs (at a minimum ConstantFolding, CommonSubexpressionElimination and ConstantSharing).
+    Graph& graph = session.GetMutableGraph();
+    CPUAllocator allocator;
+
+    using namespace onnx_transpose_optimization;
+    auto api_graph = MakeApiGraph(graph, TestCPUExecutionProvider()->CreatePreferredAllocators()[0],
+                                  /*new_node_ep*/ nullptr);
+
+    // default optimization cost check
+    OptimizeResult result = Optimize(*api_graph);
+
+    ASSERT_EQ(result.error_msg, std::nullopt);
+    ASSERT_TRUE(result.graph_modified);
+    ASSERT_TRUE(graph.GraphResolveNeeded());
+    ASSERT_STATUS_OK(graph.Resolve());
+
+    // Use this hack to save model for viewing if needed
+    // ASSERT_STATUS_OK(Model::Save(const_cast<Model&>(session.GetModel()), "updated_model.onnx"));
+
+    std::map<std::string, int> op_to_count = CountOpsInGraph(graph);
+    EXPECT_EQ(op_to_count["Transpose"], 0) << "The Transpose nodes should have been pushed through or canceled out.";
+    if (broadcast) {
+      EXPECT_EQ(op_to_count["Unsqueeze"], 0) << "Any Unsqueeze nodes should have been canceled out.";
+    }
+
+    ASSERT_STATUS_OK(session.Initialize());
+    ASSERT_STATUS_OK(session.Run(feeds, output_names, &fetches));
+  }
+
+  ASSERT_THAT(fetches_orig[0].Get<Tensor>().DataAsSpan<float>(),
+              testing::ContainerEq(fetches[0].Get<Tensor>().DataAsSpan<float>()));
+}
+
+// test we re-use a modified shared initializer wherever possible. model has one initializer that is used by 3 DQ nodes
+// and one initializer that is used by 2 Add nodes. both cases should be handled with the initializer being
+// modified in-place for the first usage, and the Transpose added to the second usage being cancelled out when the
+// original Transpose at the start of the model is pushed down.
+TEST(TransposeOptimizerTests, SharedInitializerHandling) {
+  CheckSharedInitializerHandling(/*broadcast*/ false);
+}
+
+// same setup as the above test, however the initializer is broadcast to bring UnsqueezeInput into play.
+// the in-place modification of the initializer for the first usage results in
+//   <initializer> -> Transpose -> Squeeze -> {DQ | Add}
+// the later usages of the initializer should attempt to cancel out the Squeeze in UnsqueezeInput,
+// followed by canceling out the Transpose in TransposeInput.
+TEST(TransposeOptimizerTests, SharedInitializerHandlingBroadcast) {
+  CheckSharedInitializerHandling(/*broadcast*/ true);
+}
+
+// Unit test where EstimateTransposeValueCost must look past a DQ -> Squeeze to see the Transponse of a shared
+// initializer for the overall cost of pushing the Transpose throught the second Where to be negative.
+TEST(TransposeOptimizerTests, SharedInitializerHandlingBroadcast2) {
+  auto model_uri = ORT_TSTR("testdata/transpose_optimizer_shared_initializers_broadcast2.onnx");
+
+  RandomValueGenerator random{123};
+  std::vector<int64_t> cond_input_0_dims{3, 2};
+  std::vector<int64_t> cond_input_1_dims{2, 3};
+  std::vector<bool> cond_input_data = {true, false, false, true, true, false};
+
+  std::vector<int64_t> x_0_input_dims{3};
+  std::vector<int64_t> x_1_input_dims{3};
+  std::vector<float> x_input_data_0 = random.Gaussian<float>(x_0_input_dims, 0.0f, 1.0f);
+  std::vector<float> x_input_data_1 = random.Gaussian<float>(x_1_input_dims, 0.0f, 1.0f);
+
+  OrtValue cond_input_0, cond_input_1, x_input_0, x_input_1;
+  CreateMLValue<bool>(TestCPUExecutionProvider()->CreatePreferredAllocators()[0], cond_input_0_dims, cond_input_data,
+                      &cond_input_0);
+  CreateMLValue<bool>(TestCPUExecutionProvider()->CreatePreferredAllocators()[0], cond_input_1_dims, cond_input_data,
+                      &cond_input_1);
+  CreateMLValue<float>(TestCPUExecutionProvider()->CreatePreferredAllocators()[0], x_0_input_dims, x_input_data_0,
+                       &x_input_0);
+  CreateMLValue<float>(TestCPUExecutionProvider()->CreatePreferredAllocators()[0], x_1_input_dims, x_input_data_1,
+                       &x_input_1);
+
+  NameMLValMap feeds{{"cond_in_0", cond_input_0},
+                     {"cond_in_1", cond_input_1},
+                     {"x_in_0", x_input_0},
+                     {"x_in_1", x_input_1}};
+
+  std::vector<std::string> output_names{"output0"};
+  std::vector<OrtValue> fetches_orig;
+  std::vector<OrtValue> fetches;
+
+  SessionOptions so;
+  ASSERT_STATUS_OK(so.config_options.AddConfigEntry(kDebugLayoutTransformation, "1"));
+  ASSERT_STATUS_OK(so.config_options.AddConfigEntry(kOrtSessionOptionsDisableQuantQDQ, "1"));
+
+  // get results with no modifications to the model
+  {
+    so.graph_optimization_level = TransformerLevel::Default;  // off
+    InferenceSessionWrapper session{so, GetEnvironment()};
+    ASSERT_STATUS_OK(session.Load(model_uri));
+    ASSERT_STATUS_OK(session.Initialize());
+    ASSERT_STATUS_OK(session.Run(feeds, output_names, &fetches_orig));
+  }
+
+  {
+    InferenceSessionWrapper session{so, GetEnvironment()};
+    ASSERT_STATUS_OK(session.Load(model_uri));
+
+    // we call the ONNX transpose optimizer directly to simplify the model required to exercise the shared initializer
+    // handling. this means we don't need to disable optimizers that might alter the graph before the
+    // transpose optimizer runs (at a minimum ConstantFolding, CommonSubexpressionElimination and ConstantSharing).
+    Graph& graph = session.GetMutableGraph();
+    CPUAllocator allocator;
+
+    using namespace onnx_transpose_optimization;
+    auto api_graph = MakeApiGraph(graph, TestCPUExecutionProvider()->CreatePreferredAllocators()[0],
+                                  /*new_node_ep*/ nullptr);
+
+    // default optimization cost check
+    OptimizeResult result = Optimize(*api_graph);
+
+    ASSERT_EQ(result.error_msg, std::nullopt);
+    ASSERT_TRUE(result.graph_modified);
+    ASSERT_TRUE(graph.GraphResolveNeeded());
+    ASSERT_STATUS_OK(graph.Resolve());
+
+    // Use this hack to save model for viewing if needed
+    // ASSERT_STATUS_OK(Model::Save(const_cast<Model&>(session.GetModel()), updated_model.onnx"));
+
+    // Pushing the initial Transpose through the 2 Where nodes results in
+    // - x_in_0 needs Transpose and Unsqueeze to broadcast correctly into the first Where
+    // - y_quant is updated in-place to transposed layout and used in both Where nodes
+    // - x_in_1 needs Transpose and Unsqueeze to broadcast correctly into the second Where
+    // - cond_in_1 needs Transpose
+    //   - as we're pushing a Transpose through the Add for one input, and undo-ing the Transpose on y_quant for
+    //     the other input, we save 2 by adding 1 to cond_in_1
+    std::map<std::string, int> op_to_count = CountOpsInGraph(graph);
+    EXPECT_EQ(op_to_count["Transpose"], 3) << "The 2 X inputs and cond_in_1 should require transpose.";
+    EXPECT_EQ(op_to_count["Unsqueeze"], 2) << "The 2 X inputs should require Unsqueeze.";
+
+    ASSERT_STATUS_OK(session.Initialize());
+    ASSERT_STATUS_OK(session.Run(feeds, output_names, &fetches));
+  }
+
+  ASSERT_THAT(fetches_orig[0].Get<Tensor>().DataAsSpan<float>(),
+              testing::ContainerEq(fetches[0].Get<Tensor>().DataAsSpan<float>()));
+}
+
+// model where layout transform results in transposing a non-const input that is broadcast.
+// this inserts Unsqueeze -> Transpose between the input and the node.
+// test that QDQ node units are created for Unsqueeze and Transpose by inserting Q->DQ pairs after them
+TEST(TransposeOptimizerTests, QnnTransposeNonConstBroadcastInput) {
+  Status status;
+  auto model_uri = ORT_TSTR("testdata/layout_transform_nonconst_broadcast_input.onnx");
+
+  SessionOptions so;
+
+  // ASSERT_STATUS_OK(so.config_options.AddConfigEntry(kDebugLayoutTransformation, "1"));
+
+  using InternalTestingEP = onnxruntime::internal_testing_ep::InternalTestingExecutionProvider;
+
+  // set the test EP to support all ops in the model so that the layout transform applies to all nodes
+  const std::unordered_set<std::string> empty_set;
+  auto internal_testing_ep = std::make_unique<InternalTestingEP>(empty_set, empty_set, DataLayout::NHWC);
+  internal_testing_ep->EnableStaticKernels().TakeAllNodes();
+
+  InferenceSessionWrapper session{so, GetEnvironment()};
+  ASSERT_STATUS_OK(session.RegisterExecutionProvider(std::move(internal_testing_ep)));
+  ASSERT_STATUS_OK(session.Load(model_uri));
+  ASSERT_STATUS_OK(session.Initialize());
+
+  const auto& graph = session.GetGraph();
+  std::map<std::string, int> op_to_count = CountOpsInGraph(graph);
+
+  ASSERT_EQ(op_to_count["Transpose"], 3) << "Should have Transpose on 2 inputs and one on output.";
+
+  // all nodes should be assigned to the internal testing EP, which also means they should be in NHWC layout
+  std::string expected_ep(onnxruntime::utils::kInternalTestingExecutionProvider);
+  for (const auto& node : graph.Nodes()) {
+    EXPECT_EQ(node.GetExecutionProviderType(), expected_ep) << node.OpType() << " node named '" << node.Name()
+                                                            << "' was not assigned to the internal testing EP.";
+    // all nodes should be in QDQ node units except the Cast on an input which was not in a QDQ unit
+    if (node.OpType() != "QuantizeLinear" && node.OpType() != "DequantizeLinear" && node.OpType() != "Cast") {
+      for (auto cur_input = node.InputNodesBegin(), end = node.InputNodesEnd(); cur_input != end; ++cur_input) {
+        EXPECT_EQ(cur_input->OpType(), "DequantizeLinear");
+      }
+
+      for (auto cur_output = node.OutputNodesBegin(), end = node.OutputNodesEnd(); cur_output != end; ++cur_output) {
+        EXPECT_EQ(cur_output->OpType(), "QuantizeLinear");
+      }
+    }
+  }
 }
 }  // namespace test
 }  // namespace onnxruntime
diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc
index 56f924ed351fb..a72a0d105eefc 100644
--- a/onnxruntime/test/perftest/command_args_parser.cc
+++ b/onnxruntime/test/perftest/command_args_parser.cc
@@ -1,4 +1,5 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
 // Licensed under the MIT License.
 
 #include "command_args_parser.h"
@@ -33,8 +34,8 @@ namespace perftest {
       "\t-A: Disable memory arena\n"
       "\t-I: Generate tensor input binding (Free dimensions are treated as 1.)\n"
       "\t-c [parallel runs]: Specifies the (max) number of runs to invoke simultaneously. Default:1.\n"
-      "\t-e [cpu|cuda|dnnl|tensorrt|openvino|dml|acl|nnapi|coreml|snpe|rocm|migraphx|xnnpack|vitisai]: Specifies the provider 'cpu','cuda','dnnl','tensorrt', "
-      "'openvino', 'dml', 'acl', 'nnapi', 'coreml', 'snpe', 'rocm', 'migraphx', 'xnnpack' or 'vitisai'. "
+      "\t-e [cpu|cuda|dnnl|tensorrt|openvino|dml|acl|nnapi|coreml|qnn|snpe|rocm|migraphx|xnnpack|vitisai]: Specifies the provider 'cpu','cuda','dnnl','tensorrt', "
+      "'openvino', 'dml', 'acl', 'nnapi', 'coreml', 'qnn', 'snpe', 'rocm', 'migraphx', 'xnnpack' or 'vitisai'. "
       "Default:'cpu'.\n"
       "\t-b [tf|ort]: backend to use. Default:ort\n"
       "\t-r [repeated_times]: Specifies the repeated times if running in 'times' test mode.Default:1000.\n"
@@ -53,13 +54,13 @@ namespace perftest {
       "\t-o [optimization level]: Default is 99 (all). Valid values are 0 (disable), 1 (basic), 2 (extended), 99 (all).\n"
       "\t\tPlease see onnxruntime_c_api.h (enum GraphOptimizationLevel) for the full list of all optimization levels.\n"
       "\t-u [optimized_model_path]: Specify the optimized model path for saving.\n"
-      "\t-d [cudnn_conv_algorithm]: Specify CUDNN convolution algorithms: 0(benchmark), 1(heuristic), 2(default). \n"
-      "\t-q: [CUDA only] use separate stream for copy. \n"
+      "\t-d [CUDA only][cudnn_conv_algorithm]: Specify CUDNN convolution algorithms: 0(benchmark), 1(heuristic), 2(default). \n"
+      "\t-q [CUDA only] use separate stream for copy. \n"
       "\t-z: Set denormal as zero. When turning on this option reduces latency dramatically, a model may have denormals.\n"
       "\t-i: Specify EP specific runtime options as key value pairs. Different runtime options available are: \n"
       "\t    [OpenVINO only] [device_type]: Overrides the accelerator hardware type and precision with these values at runtime.\n"
       "\t    [OpenVINO only] [device_id]: Selects a particular hardware device for inference.\n"
-      "\t    [OpenVINO only] [enable_vpu_fast_compile]: Optionally enabled to speeds up the model's compilation on VPU device targets.\n"
+      "\t    [OpenVINO only] [enable_npu_fast_compile]: Optionally enabled to speeds up the model's compilation on NPU device targets.\n"
       "\t    [OpenVINO only] [num_of_threads]: Overrides the accelerator hardware type and precision with these values at runtime.\n"
       "\t    [OpenVINO only] [cache_dir]: Explicitly specify the path to dump and load the blobs(Model caching) or cl_cache (Kernel Caching) files feature. If blob files are already present, it will be directly loaded.\n"
       "\t    [OpenVINO only] [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU device(Reduces the CPU Utilization while using GPU) \n"
@@ -70,8 +71,12 @@ namespace perftest {
       "\t    [QNN only] [rpc_control_latency]: QNN rpc control latency. default to 10.\n"
       "\t    [QNN only] [htp_performance_mode]: QNN performance mode, options: 'burst', 'balanced', 'default', 'high_performance', \n"
       "\t    'high_power_saver', 'low_balanced', 'low_power_saver', 'power_saver', 'sustained_high_performance'. Default to 'default'. \n"
+      "\t    [QNN only] [qnn_context_priority]: QNN context priority, options: 'low', 'normal', 'normal_high', 'high'. Default to 'normal'. \n"
+      "\t    [QNN only] [qnn_saver_path]: QNN Saver backend path. e.g '/folderpath/libQnnSaver.so'.\n"
+      "\t    [QNN only] [htp_graph_finalization_optimization_mode]: QNN graph finalization optimization mode, options: \n"
+      "\t    '0', '1', '2', '3', default is '0'.\n"
       "\t [Usage]: -e <provider_name> -i '<key1>|<value1> <key2>|<value2>'\n\n"
-      "\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU_FP32 enable_vpu_fast_compile|true num_of_threads|5 enable_opencl_throttling|true cache_dir|\"<path>\"\"\n"
+      "\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU_FP32 enable_npu_fast_compile|true num_of_threads|5 enable_opencl_throttling|true cache_dir|\"<path>\"\"\n"
       "\t [Example] [For QNN EP] -e qnn -i \"backend_path|/folderpath/libQnnCpu.so\" \n\n"
       "\t    [TensorRT only] [trt_max_partition_iterations]: Maximum iterations for TensorRT parser to get capability.\n"
       "\t    [TensorRT only] [trt_min_subgraph_size]: Minimum size of TensorRT subgraphs.\n"
diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
index 57b2403e23a37..c2dd81ec9f359 100644
--- a/onnxruntime/test/perftest/ort_test_session.cc
+++ b/onnxruntime/test/perftest/ort_test_session.cc
@@ -1,7 +1,12 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
+// Licensed under the MIT License.
+
 #include "ort_test_session.h"
 #include <algorithm>
 #include <limits>
 #include <set>
+#include <list>
 #include <type_traits>
 #include <core/session/onnxruntime_cxx_api.h>
 #include "core/session/onnxruntime_session_options_config_keys.h"
@@ -93,47 +98,77 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
 #endif
   } else if (provider_name == onnxruntime::kCudaExecutionProvider) {
 #ifdef USE_CUDA
-    OrtCUDAProviderOptions cuda_options;
-    cuda_options.cudnn_conv_algo_search = static_cast<OrtCudnnConvAlgoSearch>(performance_test_config.run_config.cudnn_conv_algo);
-    cuda_options.do_copy_in_default_stream = !performance_test_config.run_config.do_cuda_copy_in_separate_stream;
-    // TODO: Support arena configuration for users of perf test
-    session_options.AppendExecutionProvider_CUDA(cuda_options);
+    const auto& api = Ort::GetApi();
+    OrtCUDAProviderOptionsV2* cuda_options;
+    Ort::ThrowOnError(api.CreateCUDAProviderOptions(&cuda_options));
+    std::vector<const char*> option_keys, option_values;
+    // used to keep all option keys and value strings alive
+    std::list<std::string> buffer;
+    buffer.emplace_back("cudnn_conv_algo_search");
+    option_keys.push_back(buffer.back().c_str());
+    switch (performance_test_config.run_config.cudnn_conv_algo) {
+      case 0:
+        buffer.emplace_back("EXHAUSTIVE");
+        break;
+      case 1:
+        buffer.emplace_back("HEURISTIC");
+        break;
+      default:
+        buffer.emplace_back("DEFAULT");
+        break;
+    }
+    option_values.push_back(buffer.back().c_str());
+
+    buffer.emplace_back("do_copy_in_default_stream");
+    option_keys.push_back(buffer.back().c_str());
+    buffer.emplace_back(!performance_test_config.run_config.do_cuda_copy_in_separate_stream ? "1" : "0");
+    option_values.push_back(buffer.back().c_str());
+
+#ifdef _MSC_VER
+    std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string);
+#else
+    std::string ov_string = performance_test_config.run_config.ep_runtime_config_string;
+#endif
+    std::istringstream ss(ov_string);
+    std::string token;
+    while (ss >> token) {
+      if (token == "") {
+        continue;
+      }
+      auto pos = token.find("|");
+      if (pos == std::string::npos || pos == 0 || pos == token.length()) {
+        ORT_THROW(
+            "[ERROR] [CUDA] Use a '|' to separate the key and value for the run-time option you are trying to use.\n");
+      }
+
+      buffer.emplace_back(token.substr(0, pos));
+      option_keys.push_back(buffer.back().c_str());
+      buffer.emplace_back(token.substr(pos + 1));
+      option_values.push_back(buffer.back().c_str());
+    }
+
+    Ort::Status status(api.UpdateCUDAProviderOptions(cuda_options,
+                                                     option_keys.data(), option_values.data(), option_keys.size()));
+    if (!status.IsOK()) {
+      OrtAllocator* allocator;
+      char* options;
+      Ort::ThrowOnError(api.GetAllocatorWithDefaultOptions(&allocator));
+      Ort::ThrowOnError(api.GetCUDAProviderOptionsAsString(cuda_options, allocator, &options));
+      ORT_THROW("[ERROR] [CUDA] Configuring the CUDA options failed with message: ", status.GetErrorMessage(),
+                "\nSupported options are:\n", options);
+    }
+    session_options.AppendExecutionProvider_CUDA_V2(*cuda_options);
 #else
     ORT_THROW("CUDA is not supported in this build\n");
 #endif
   } else if (provider_name == onnxruntime::kTensorrtExecutionProvider) {
 #ifdef USE_TENSORRT
-    int device_id = 0;
-    int trt_max_partition_iterations = 1000;
-    int trt_min_subgraph_size = 1;
-    size_t trt_max_workspace_size = 1 << 30;
-    bool trt_fp16_enable = false;
-    bool trt_int8_enable = false;
-    std::string trt_int8_calibration_table_name = "";
-    bool trt_int8_use_native_calibration_table = false;
-    bool trt_dla_enable = false;
-    int trt_dla_core = 0;
-    bool trt_dump_subgraphs = false;
-    bool trt_engine_cache_enable = false;
-    std::string trt_engine_cache_path = "";
-    bool trt_engine_decryption_enable = false;
-    std::string trt_engine_decryption_lib_path = "";
-    bool trt_force_sequential_engine_build = false;
-    bool trt_context_memory_sharing_enable = false;
-    bool trt_layer_norm_fp32_fallback = false;
-    bool trt_timing_cache_enable = false;
-    bool trt_force_timing_cache = false;
-    bool trt_detailed_build_log = false;
-    bool trt_build_heuristics_enable = false;
-    bool trt_sparsity_enable = false;
-    int trt_builder_optimization_level = 3;
-    int trt_auxiliary_streams = -1;
-    std::string trt_tactic_sources = "";
-    std::string trt_extra_plugin_lib_paths = "";
-    std::string trt_profile_min_shapes = "";
-    std::string trt_profile_max_shapes = "";
-    std::string trt_profile_opt_shapes = "";
-    bool trt_cuda_graph_enable = false;
+    const auto& api = Ort::GetApi();
+    OrtTensorRTProviderOptionsV2* tensorrt_options;
+    Ort::ThrowOnError(api.CreateTensorRTProviderOptions(&tensorrt_options));
+    std::vector<const char*> option_keys, option_values;
+    // used to keep all option keys and value strings alive
+    std::list<std::string> buffer;
 
 #ifdef _MSC_VER
     std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string);
@@ -148,272 +183,31 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
       }
       auto pos = token.find("|");
       if (pos == std::string::npos || pos == 0 || pos == token.length()) {
-        ORT_THROW("[ERROR] [TensorRT] Use a '|' to separate the key and value for the run-time option you are trying to use.\n");
+        ORT_THROW(
+            "[ERROR] [TensorRT] Use a '|' to separate the key and value for the run-time option you are trying to use.\n");
       }
 
-      auto key = token.substr(0, pos);
-      auto value = token.substr(pos + 1);
-      if (key == "device_id") {
-        if (!value.empty()) {
-          device_id = std::stoi(value);
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'device_id' should be a number.\n");
-        }
-      } else if (key == "trt_max_partition_iterations") {
-        if (!value.empty()) {
-          trt_max_partition_iterations = std::stoi(value);
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_partition_iterations' should be a number.\n");
-        }
-      } else if (key == "trt_min_subgraph_size") {
-        if (!value.empty()) {
-          trt_min_subgraph_size = std::stoi(value);
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_min_subgraph_size' should be a number.\n");
-        }
-      } else if (key == "trt_max_workspace_size") {
-        if (!value.empty()) {
-          trt_max_workspace_size = std::stoull(value);
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_workspace_size' should be a number.\n");
-        }
-      } else if (key == "trt_fp16_enable") {
-        if (value == "true" || value == "True") {
-          trt_fp16_enable = true;
-        } else if (value == "false" || value == "False") {
-          trt_fp16_enable = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_fp16_enable' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_int8_enable") {
-        if (value == "true" || value == "True") {
-          trt_int8_enable = true;
-        } else if (value == "false" || value == "False") {
-          trt_int8_enable = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_enable' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_int8_calibration_table_name") {
-        if (!value.empty()) {
-          trt_int8_calibration_table_name = value;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_calibration_table_name' should be a non-empty string.\n");
-        }
-      } else if (key == "trt_int8_use_native_calibration_table") {
-        if (value == "true" || value == "True") {
-          trt_int8_use_native_calibration_table = true;
-        } else if (value == "false" || value == "False") {
-          trt_int8_use_native_calibration_table = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_use_native_calibration_table' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_dla_enable") {
-        if (value == "true" || value == "True") {
-          trt_dla_enable = true;
-        } else if (value == "false" || value == "False") {
-          trt_dla_enable = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_enable' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_dla_core") {
-        if (!value.empty()) {
-          trt_dla_core = std::stoi(value);
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dla_core' should be a number.\n");
-        }
-      } else if (key == "trt_dump_subgraphs") {
-        if (value == "true" || value == "True") {
-          trt_dump_subgraphs = true;
-        } else if (value == "false" || value == "False") {
-          trt_dump_subgraphs = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_dump_subgraphs' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_engine_cache_enable") {
-        if (value == "true" || value == "True") {
-          trt_engine_cache_enable = true;
-        } else if (value == "false" || value == "False") {
-          trt_engine_cache_enable = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_enable' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_engine_cache_path") {
-        if (!value.empty()) {
-          trt_engine_cache_path = value;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_cache_path' should be a non-empty string.\n");
-        }
-      } else if (key == "trt_engine_decryption_enable") {
-        if (value == "true" || value == "True") {
-          trt_engine_decryption_enable = true;
-        } else if (value == "false" || value == "False") {
-          trt_engine_decryption_enable = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_enable' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_engine_decryption_lib_path") {
-        if (!value.empty()) {
-          trt_engine_decryption_lib_path = value;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_decryption_lib_path' should be a non-empty string.\n");
-        }
-      } else if (key == "trt_force_sequential_engine_build") {
-        if (value == "true" || value == "True") {
-          trt_force_sequential_engine_build = true;
-        } else if (value == "false" || value == "False") {
-          trt_force_sequential_engine_build = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_force_sequential_engine_build' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_context_memory_sharing_enable") {
-        if (value == "true" || value == "True") {
-          trt_context_memory_sharing_enable = true;
-        } else if (value == "false" || value == "False") {
-          trt_context_memory_sharing_enable = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_context_memory_sharing_enable' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_layer_norm_fp32_fallback") {
-        if (value == "true" || value == "True") {
-          trt_layer_norm_fp32_fallback = true;
-        } else if (value == "false" || value == "False") {
-          trt_layer_norm_fp32_fallback = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_layer_norm_fp32_fallback' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_timing_cache_enable") {
-        if (value == "true" || value == "True") {
-          trt_timing_cache_enable = true;
-        } else if (value == "false" || value == "False") {
-          trt_timing_cache_enable = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_timing_cache_enable' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_force_timing_cache") {
-        if (value == "true" || value == "True") {
-          trt_force_timing_cache = true;
-        } else if (value == "false" || value == "False") {
-          trt_force_timing_cache = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_force_timing_cache' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_detailed_build_log") {
-        if (value == "true" || value == "True") {
-          trt_detailed_build_log = true;
-        } else if (value == "false" || value == "False") {
-          trt_detailed_build_log = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_detailed_build_log' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_build_heuristics_enable") {
-        if (value == "true" || value == "True") {
-          trt_build_heuristics_enable = true;
-        } else if (value == "false" || value == "False") {
-          trt_build_heuristics_enable = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_build_heuristics_enable' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_sparsity_enable") {
-        if (value == "true" || value == "True") {
-          trt_sparsity_enable = true;
-        } else if (value == "false" || value == "False") {
-          trt_sparsity_enable = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_sparsity_enable' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "trt_builder_optimization_level") {
-        if (!value.empty()) {
-          trt_builder_optimization_level = std::stoi(value);
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_builder_optimization_level' should be a number and default to 2.\n");
-        }
-      } else if (key == "trt_auxiliary_streams") {
-        if (!value.empty()) {
-          trt_auxiliary_streams = std::stoi(value);
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_auxiliary_streams' should be a number.\n");
-        }
-      } else if (key == "trt_tactic_sources") {
-        if (!value.empty()) {
-          trt_tactic_sources = value;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_tactic_sources' should be a non-empty string.\n");
-        }
-      } else if (key == "trt_extra_plugin_lib_paths") {
-        if (!value.empty()) {
-          trt_extra_plugin_lib_paths = value;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_extra_plugin_lib_paths' should be a non-empty string.\n");
-        }
-      } else if (key == "trt_profile_min_shapes") {
-        if (!value.empty()) {
-          trt_profile_min_shapes = value;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_profile_min_shapes' should be a non-empty string.\n");
-        }
-      } else if (key == "trt_profile_max_shapes") {
-        if (!value.empty()) {
-          trt_profile_max_shapes = value;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_profile_max_shapes' should be a non-empty string.\n");
-        }
-      } else if (key == "trt_profile_opt_shapes") {
-        if (!value.empty()) {
-          trt_profile_opt_shapes = value;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_profile_opt_shapes' should be a non-empty string.\n");
-        }
-      } else if (key == "trt_cuda_graph_enable") {
-        if (value == "true" || value == "True") {
-          trt_cuda_graph_enable = true;
-        } else if (value == "false" || value == "False") {
-          trt_cuda_graph_enable = false;
-        } else {
-          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_cuda_graph_enable' should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else {
-        ORT_THROW("[ERROR] [TensorRT] wrong key type entered. Choose from the following runtime key options that are available for TensorRT. ['device_id', 'trt_max_partition_iterations', 'trt_min_subgraph_size', 'trt_max_workspace_size', 'trt_fp16_enable', 'trt_int8_enable', 'trt_int8_calibration_table_name', 'trt_int8_use_native_calibration_table', 'trt_dla_enable', 'trt_dla_core', 'trt_dump_subgraphs', 'trt_engine_cache_enable', 'trt_engine_cache_path', 'trt_engine_decryption_enable', 'trt_engine_decryption_lib_path', 'trt_force_sequential_engine_build', 'trt_context_memory_sharing_enable', 'trt_layer_norm_fp32_fallback', 'trt_timing_cache_enable', 'trt_force_timing_cache', 'trt_detailed_build_log', 'trt_build_heuristics_enable', 'trt_sparsity_enable', 'trt_builder_optimization_level', 'trt_auxiliary_streams', 'trt_tactic_sources', 'trt_extra_plugin_lib_paths', 'trt_profile_min_shapes', 'trt_profile_max_shapes', 'trt_profile_opt_shapes', 'trt_cuda_graph_enable'] \n");
-      }
+      buffer.emplace_back(token.substr(0, pos));
+      option_keys.push_back(buffer.back().c_str());
+      buffer.emplace_back(token.substr(pos + 1));
+      option_values.push_back(buffer.back().c_str());
     }
-    OrtTensorRTProviderOptionsV2 tensorrt_options;
-    tensorrt_options.device_id = device_id;
-    tensorrt_options.has_user_compute_stream = 0;
-    tensorrt_options.user_compute_stream = nullptr;
-    tensorrt_options.trt_max_partition_iterations = trt_max_partition_iterations;
-    tensorrt_options.trt_min_subgraph_size = trt_min_subgraph_size;
-    tensorrt_options.trt_max_workspace_size = trt_max_workspace_size;
-    tensorrt_options.trt_fp16_enable = trt_fp16_enable;
-    tensorrt_options.trt_int8_enable = trt_int8_enable;
-    tensorrt_options.trt_int8_calibration_table_name = trt_int8_calibration_table_name.c_str();
-    tensorrt_options.trt_int8_use_native_calibration_table = trt_int8_use_native_calibration_table;
-    tensorrt_options.trt_dla_enable = trt_dla_enable;
-    tensorrt_options.trt_dla_core = trt_dla_core;
-    tensorrt_options.trt_dump_subgraphs = trt_dump_subgraphs;
-    tensorrt_options.trt_engine_cache_enable = trt_engine_cache_enable;
-    tensorrt_options.trt_engine_cache_path = trt_engine_cache_path.c_str();
-    tensorrt_options.trt_engine_decryption_enable = trt_engine_decryption_enable;
-    tensorrt_options.trt_engine_decryption_lib_path = trt_engine_decryption_lib_path.c_str();
-    tensorrt_options.trt_force_sequential_engine_build = trt_force_sequential_engine_build;
-    tensorrt_options.trt_context_memory_sharing_enable = trt_context_memory_sharing_enable;
-    tensorrt_options.trt_layer_norm_fp32_fallback = trt_layer_norm_fp32_fallback;
-    tensorrt_options.trt_timing_cache_enable = trt_timing_cache_enable;
-    tensorrt_options.trt_force_timing_cache = trt_force_timing_cache;
-    tensorrt_options.trt_detailed_build_log = trt_detailed_build_log;
-    tensorrt_options.trt_build_heuristics_enable = trt_build_heuristics_enable;
-    tensorrt_options.trt_sparsity_enable = trt_sparsity_enable;
-    tensorrt_options.trt_builder_optimization_level = trt_builder_optimization_level;
-    tensorrt_options.trt_auxiliary_streams = trt_auxiliary_streams;
-    tensorrt_options.trt_tactic_sources = trt_tactic_sources.c_str();
-    tensorrt_options.trt_extra_plugin_lib_paths = trt_extra_plugin_lib_paths.c_str();
-    tensorrt_options.trt_profile_min_shapes = trt_profile_min_shapes.c_str();
-    tensorrt_options.trt_profile_max_shapes = trt_profile_max_shapes.c_str();
-    tensorrt_options.trt_profile_opt_shapes = trt_profile_opt_shapes.c_str();
-    tensorrt_options.trt_cuda_graph_enable = trt_cuda_graph_enable;
-
-    session_options.AppendExecutionProvider_TensorRT_V2(tensorrt_options);
+
+    Ort::Status status(api.UpdateTensorRTProviderOptions(tensorrt_options,
+                                                         option_keys.data(), option_values.data(), option_keys.size()));
+    if (!status.IsOK()) {
+      OrtAllocator* allocator;
+      char* options;
+      Ort::ThrowOnError(api.GetAllocatorWithDefaultOptions(&allocator));
+      Ort::ThrowOnError(api.GetTensorRTProviderOptionsAsString(tensorrt_options, allocator, &options));
+      ORT_THROW("[ERROR] [TensorRT] Configuring the CUDA options failed with message: ", status.GetErrorMessage(),
+                "\nSupported options are:\n", options);
+    }
+
+    session_options.AppendExecutionProvider_TensorRT_V2(*tensorrt_options);
 
     OrtCUDAProviderOptions cuda_options;
-    cuda_options.device_id = device_id;
+    cuda_options.device_id = tensorrt_options->device_id;
     cuda_options.cudnn_conv_algo_search = static_cast<OrtCudnnConvAlgoSearch>(performance_test_config.run_config.cudnn_conv_algo);
     cuda_options.do_copy_in_default_stream = !performance_test_config.run_config.do_cuda_copy_in_separate_stream;
     // TODO: Support arena configuration for users of perf test
@@ -446,8 +240,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
       if (key == "device_type") {
         std::set<std::string> ov_supported_device_types = {"CPU_FP32", "CPU_FP16", "GPU_FP32",
                                                            "GPU.0_FP32", "GPU.1_FP32", "GPU_FP16",
-                                                           "GPU.0_FP16", "GPU.1_FP16",
-                                                           "VPUX_FP16", "VPUX_U8"};
+                                                           "GPU.0_FP16", "GPU.1_FP16"};
         if (ov_supported_device_types.find(value) != ov_supported_device_types.end()) {
           ov_options[key] = value;
         } else if (value.find("HETERO:") == 0) {
@@ -460,17 +253,17 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
           ORT_THROW(
               "[ERROR] [OpenVINO] You have selcted wrong configuration value for the key 'device_type'. "
               "Select from 'CPU_FP32', 'CPU_FP16', 'GPU_FP32', 'GPU.0_FP32', 'GPU.1_FP32', 'GPU_FP16', "
-              "'GPU.0_FP16', 'GPU.1_FP16', 'VPUX_FP16', 'VPUX_U8' or from"
+              "'GPU.0_FP16', 'GPU.1_FP16' or from"
               " HETERO/MULTI/AUTO options available. \n");
         }
       } else if (key == "device_id") {
         ov_options[key] = value;
-      } else if (key == "enable_vpu_fast_compile") {
+      } else if (key == "enable_npu_fast_compile") {
         if (value == "true" || value == "True" ||
             value == "false" || value == "False") {
           ov_options[key] = value;
         } else {
-          ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_vpu_fast_compile' should be a boolean i.e. true or false. Default value is false.\n");
+          ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_npu_fast_compile' should be a boolean i.e. true or false. Default value is false.\n");
         }
       } else if (key == "enable_opencl_throttling") {
         if (value == "true" || value == "True" ||
@@ -505,7 +298,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
           ov_options[key] = value;
         }
       } else {
-        ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_vpu_fast_compile', 'num_of_threads', 'cache_dir', 'num_streams', 'enable_opencl_throttling|true'] \n");
+        ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_npu_fast_compile', 'num_of_threads', 'cache_dir', 'num_streams', 'enable_opencl_throttling|true'] \n");
       }
     }
     session_options.AppendExecutionProvider("OpenVINO", ov_options);
@@ -563,9 +356,26 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
           std::string str = str_stream.str();
           ORT_THROW("Supported htp_performance_mode: " + str);
         }
+      } else if (key == "qnn_saver_path") {
+        // no validation
+      } else if (key == "htp_graph_finalization_optimization_mode") {
+        std::unordered_set<std::string> supported_htp_graph_final_opt_modes = {"0", "1", "2", "3"};
+        if (supported_htp_graph_final_opt_modes.find(value) == supported_htp_graph_final_opt_modes.end()) {
+          std::ostringstream str_stream;
+          std::copy(supported_htp_graph_final_opt_modes.begin(), supported_htp_graph_final_opt_modes.end(),
+                    std::ostream_iterator<std::string>(str_stream, ","));
+          std::string str = str_stream.str();
+          ORT_THROW("Wrong value for htp_graph_finalization_optimization_mode. select from: " + str);
+        }
+      } else if (key == "qnn_context_priority") {
+        std::set<std::string> supported_qnn_context_priority = {"low", "normal", "normal_high", "high"};
+        if (supported_qnn_context_priority.find(value) == supported_qnn_context_priority.end()) {
+          ORT_THROW("Supported qnn_context_priority: low, normal, normal_high, high");
+        }
       } else {
         ORT_THROW(R"(Wrong key type entered. Choose from options: ['backend_path', 'qnn_context_cache_enable',
-'qnn_context_cache_path', 'profiling_level', 'rpc_control_latency', 'htp_performance_mode'])");
+'qnn_context_cache_path', 'profiling_level', 'rpc_control_latency', 'htp_performance_mode', 'qnn_saver_path',
+'htp_graph_finalization_optimization_mode', 'qnn_context_priority'])");
       }
 
       qnn_options[key] = value;
@@ -662,9 +472,12 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
 #endif
   } else if (provider_name == onnxruntime::kDmlExecutionProvider) {
 #ifdef USE_DML
-    Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_DML(session_options, 0));
+    std::unordered_map<std::string, std::string> dml_options;
+    dml_options["performance_preference"] = "high_performance";
+    dml_options["device_filter"] = "gpu";
+    session_options.AppendExecutionProvider("DML", dml_options);
 #else
-    ORT_THROW("DirectML is not supported in this build\n");
+    ORT_THROW("DML is not supported in this build\n");
 #endif
   } else if (provider_name == onnxruntime::kAclExecutionProvider) {
 #ifdef USE_ACL
diff --git a/onnxruntime/test/platform/ios/ios_package_test/.gitignore b/onnxruntime/test/platform/apple/apple_package_test/.gitignore
similarity index 100%
rename from onnxruntime/test/platform/ios/ios_package_test/.gitignore
rename to onnxruntime/test/platform/apple/apple_package_test/.gitignore
diff --git a/onnxruntime/test/platform/ios/ios_package_test/Podfile.template b/onnxruntime/test/platform/apple/apple_package_test/Podfile.template
similarity index 52%
rename from onnxruntime/test/platform/ios/ios_package_test/Podfile.template
rename to onnxruntime/test/platform/apple/apple_package_test/Podfile.template
index d2155660d73da..3d191d6fb1cc6 100644
--- a/onnxruntime/test/platform/ios/ios_package_test/Podfile.template
+++ b/onnxruntime/test/platform/apple/apple_package_test/Podfile.template
@@ -1,14 +1,34 @@
-platform :ios, '13.0'
+def include_macos_target
+  if '@C_POD_NAME@' != 'onnxruntime-mobile-c'
+    return true
+  end
+  return false
+end
 
 target 'ios_package_test' do
   # Comment the next line if you don't want to use dynamic frameworks
   use_frameworks!
 
+  platform :ios, '13.0'
+
   target 'ios_package_testUITests' do
     inherit! :search_paths
     pod '@C_POD_NAME@', :podspec  => '@C_POD_PODSPEC@'
   end
+end
 
+if include_macos_target
+  target 'macos_package_test' do
+      # Comment the next line if you don't want to use dynamic frameworks
+      use_frameworks!
+
+      platform :osx, '11.0'
+
+      target 'macos_package_testUITests' do
+        inherit! :search_paths
+        pod '@C_POD_NAME@', :podspec  => '@C_POD_PODSPEC@'
+      end
+  end
 end
 
 # This is to prevent the pods to be code signed if enabled
diff --git a/onnxruntime/test/platform/ios/ios_package_test/README.md b/onnxruntime/test/platform/apple/apple_package_test/README.md
similarity index 100%
rename from onnxruntime/test/platform/ios/ios_package_test/README.md
rename to onnxruntime/test/platform/apple/apple_package_test/README.md
diff --git a/onnxruntime/test/platform/ios/ios_package_test/ios_package_test.xcodeproj/project.pbxproj b/onnxruntime/test/platform/apple/apple_package_test/apple_package_test.xcodeproj/project.pbxproj
similarity index 57%
rename from onnxruntime/test/platform/ios/ios_package_test/ios_package_test.xcodeproj/project.pbxproj
rename to onnxruntime/test/platform/apple/apple_package_test/apple_package_test.xcodeproj/project.pbxproj
index 151db693236f0..66dd772e5e40b 100644
--- a/onnxruntime/test/platform/ios/ios_package_test/ios_package_test.xcodeproj/project.pbxproj
+++ b/onnxruntime/test/platform/apple/apple_package_test/apple_package_test.xcodeproj/project.pbxproj
@@ -14,6 +14,11 @@
 		229E595926586B4A006E41AE /* sigmoid.ort in Resources */ = {isa = PBXBuildFile; fileRef = 229E595826586B4A006E41AE /* sigmoid.ort */; };
 		22C1D8EA271A79FD002CEE67 /* ios_package_uitest_cpp_api.mm in Sources */ = {isa = PBXBuildFile; fileRef = 22C1D8E9271A79FD002CEE67 /* ios_package_uitest_cpp_api.mm */; };
 		22C1D8EB271A7A06002CEE67 /* sigmoid.ort in Resources */ = {isa = PBXBuildFile; fileRef = 229E595826586B4A006E41AE /* sigmoid.ort */; };
+		51C316BD2B0881450033C70B /* AppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 51C316BC2B0881450033C70B /* AppDelegate.m */; };
+		51C316C52B0881480033C70B /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 51C316C32B0881480033C70B /* Main.storyboard */; };
+		51C316C72B0881480033C70B /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = 51C316C62B0881480033C70B /* main.m */; };
+		51C316DC2B0881490033C70B /* macos_package_uitest_cpp_api.mm in Sources */ = {isa = PBXBuildFile; fileRef = 51C316DB2B0881490033C70B /* macos_package_uitest_cpp_api.mm */; };
+		51C316E82B0892EE0033C70B /* sigmoid.ort in Resources */ = {isa = PBXBuildFile; fileRef = 229E595826586B4A006E41AE /* sigmoid.ort */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXContainerItemProxy section */
@@ -24,6 +29,13 @@
 			remoteGlobalIDString = 229E591B265869BF006E41AE;
 			remoteInfo = ios_package_test;
 		};
+		51C316D82B0881490033C70B /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 229E5914265869BF006E41AE /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = 51C316B82B0881450033C70B;
+			remoteInfo = macos_package_test;
+		};
 /* End PBXContainerItemProxy section */
 
 /* Begin PBXFileReference section */
@@ -37,6 +49,14 @@
 		229E595826586B4A006E41AE /* sigmoid.ort */ = {isa = PBXFileReference; lastKnownFileType = file; path = sigmoid.ort; sourceTree = "<group>"; };
 		22C1D8DE271A79AF002CEE67 /* ios_package_testUITests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = ios_package_testUITests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
 		22C1D8E9271A79FD002CEE67 /* ios_package_uitest_cpp_api.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = ios_package_uitest_cpp_api.mm; sourceTree = "<group>"; };
+		51C316B92B0881450033C70B /* macos_package_test.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = macos_package_test.app; sourceTree = BUILT_PRODUCTS_DIR; };
+		51C316BB2B0881450033C70B /* AppDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
+		51C316BC2B0881450033C70B /* AppDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = AppDelegate.m; sourceTree = "<group>"; };
+		51C316C42B0881480033C70B /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = "<group>"; };
+		51C316C62B0881480033C70B /* main.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = main.m; sourceTree = "<group>"; };
+		51C316C82B0881480033C70B /* macos_package_test.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = macos_package_test.entitlements; sourceTree = "<group>"; };
+		51C316D72B0881490033C70B /* macos_package_testUITests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = macos_package_testUITests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
+		51C316DB2B0881490033C70B /* macos_package_uitest_cpp_api.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = macos_package_uitest_cpp_api.mm; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -54,6 +74,20 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
+		51C316B62B0881450033C70B /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		51C316D42B0881490033C70B /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
 /* End PBXFrameworksBuildPhase section */
 
 /* Begin PBXGroup section */
@@ -63,7 +97,10 @@
 				229E595426586A77006E41AE /* models */,
 				229E591E265869BF006E41AE /* ios_package_test */,
 				22C1D8DF271A79AF002CEE67 /* ios_package_testUITests */,
+				51C316BA2B0881450033C70B /* macos_package_test */,
+				51C316DA2B0881490033C70B /* macos_package_testUITests */,
 				229E591D265869BF006E41AE /* Products */,
+				B49FE29C3625E88EDCCDD4BC /* Pods */,
 			);
 			sourceTree = "<group>";
 		};
@@ -72,6 +109,8 @@
 			children = (
 				229E591C265869BF006E41AE /* ios_package_test.app */,
 				22C1D8DE271A79AF002CEE67 /* ios_package_testUITests.xctest */,
+				51C316B92B0881450033C70B /* macos_package_test.app */,
+				51C316D72B0881490033C70B /* macos_package_testUITests.xctest */,
 			);
 			name = Products;
 			sourceTree = "<group>";
@@ -105,6 +144,33 @@
 			path = ios_package_testUITests;
 			sourceTree = "<group>";
 		};
+		51C316BA2B0881450033C70B /* macos_package_test */ = {
+			isa = PBXGroup;
+			children = (
+				51C316BB2B0881450033C70B /* AppDelegate.h */,
+				51C316BC2B0881450033C70B /* AppDelegate.m */,
+				51C316C32B0881480033C70B /* Main.storyboard */,
+				51C316C62B0881480033C70B /* main.m */,
+				51C316C82B0881480033C70B /* macos_package_test.entitlements */,
+			);
+			path = macos_package_test;
+			sourceTree = "<group>";
+		};
+		51C316DA2B0881490033C70B /* macos_package_testUITests */ = {
+			isa = PBXGroup;
+			children = (
+				51C316DB2B0881490033C70B /* macos_package_uitest_cpp_api.mm */,
+			);
+			path = macos_package_testUITests;
+			sourceTree = "<group>";
+		};
+		B49FE29C3625E88EDCCDD4BC /* Pods */ = {
+			isa = PBXGroup;
+			children = (
+			);
+			path = Pods;
+			sourceTree = "<group>";
+		};
 /* End PBXGroup section */
 
 /* Begin PBXNativeTarget section */
@@ -143,6 +209,41 @@
 			productReference = 22C1D8DE271A79AF002CEE67 /* ios_package_testUITests.xctest */;
 			productType = "com.apple.product-type.bundle.ui-testing";
 		};
+		51C316B82B0881450033C70B /* macos_package_test */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 51C316DF2B0881490033C70B /* Build configuration list for PBXNativeTarget "macos_package_test" */;
+			buildPhases = (
+				51C316B52B0881450033C70B /* Sources */,
+				51C316B62B0881450033C70B /* Frameworks */,
+				51C316B72B0881450033C70B /* Resources */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = macos_package_test;
+			productName = macos_package_test;
+			productReference = 51C316B92B0881450033C70B /* macos_package_test.app */;
+			productType = "com.apple.product-type.application";
+		};
+		51C316D62B0881490033C70B /* macos_package_testUITests */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 51C316E52B0881490033C70B /* Build configuration list for PBXNativeTarget "macos_package_testUITests" */;
+			buildPhases = (
+				51C316D32B0881490033C70B /* Sources */,
+				51C316D42B0881490033C70B /* Frameworks */,
+				51C316D52B0881490033C70B /* Resources */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+				51C316D92B0881490033C70B /* PBXTargetDependency */,
+			);
+			name = macos_package_testUITests;
+			productName = macos_package_testUITests;
+			productReference = 51C316D72B0881490033C70B /* macos_package_testUITests.xctest */;
+			productType = "com.apple.product-type.bundle.ui-testing";
+		};
 /* End PBXNativeTarget section */
 
 /* Begin PBXProject section */
@@ -158,9 +259,16 @@
 						CreatedOnToolsVersion = 13.0;
 						TestTargetID = 229E591B265869BF006E41AE;
 					};
+					51C316B82B0881450033C70B = {
+						CreatedOnToolsVersion = 15.0.1;
+					};
+					51C316D62B0881490033C70B = {
+						CreatedOnToolsVersion = 15.0.1;
+						TestTargetID = 51C316B82B0881450033C70B;
+					};
 				};
 			};
-			buildConfigurationList = 229E5917265869BF006E41AE /* Build configuration list for PBXProject "ios_package_test" */;
+			buildConfigurationList = 229E5917265869BF006E41AE /* Build configuration list for PBXProject "apple_package_test" */;
 			compatibilityVersion = "Xcode 9.3";
 			developmentRegion = en;
 			hasScannedForEncodings = 0;
@@ -175,6 +283,8 @@
 			targets = (
 				229E591B265869BF006E41AE /* ios_package_test */,
 				22C1D8DD271A79AF002CEE67 /* ios_package_testUITests */,
+				51C316B82B0881450033C70B /* macos_package_test */,
+				51C316D62B0881490033C70B /* macos_package_testUITests */,
 			);
 		};
 /* End PBXProject section */
@@ -198,6 +308,22 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
+		51C316B72B0881450033C70B /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				51C316C52B0881480033C70B /* Main.storyboard in Resources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		51C316D52B0881490033C70B /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				51C316E82B0892EE0033C70B /* sigmoid.ort in Resources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
 /* End PBXResourcesBuildPhase section */
 
 /* Begin PBXSourcesBuildPhase section */
@@ -218,6 +344,23 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
+		51C316B52B0881450033C70B /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				51C316C72B0881480033C70B /* main.m in Sources */,
+				51C316BD2B0881450033C70B /* AppDelegate.m in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		51C316D32B0881490033C70B /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				51C316DC2B0881490033C70B /* macos_package_uitest_cpp_api.mm in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
 /* End PBXSourcesBuildPhase section */
 
 /* Begin PBXTargetDependency section */
@@ -226,6 +369,11 @@
 			target = 229E591B265869BF006E41AE /* ios_package_test */;
 			targetProxy = 22C1D8E4271A79AF002CEE67 /* PBXContainerItemProxy */;
 		};
+		51C316D92B0881490033C70B /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = 51C316B82B0881450033C70B /* macos_package_test */;
+			targetProxy = 51C316D82B0881490033C70B /* PBXContainerItemProxy */;
+		};
 /* End PBXTargetDependency section */
 
 /* Begin PBXVariantGroup section */
@@ -245,6 +393,14 @@
 			name = LaunchScreen.storyboard;
 			sourceTree = "<group>";
 		};
+		51C316C32B0881480033C70B /* Main.storyboard */ = {
+			isa = PBXVariantGroup;
+			children = (
+				51C316C42B0881480033C70B /* Base */,
+			);
+			name = Main.storyboard;
+			sourceTree = "<group>";
+		};
 /* End PBXVariantGroup section */
 
 /* Begin XCBuildConfiguration section */
@@ -300,6 +456,7 @@
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
 				IPHONEOS_DEPLOYMENT_TARGET = 13.0;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
@@ -353,6 +510,7 @@
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
 				IPHONEOS_DEPLOYMENT_TARGET = 13.0;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
 				SDKROOT = iphoneos;
@@ -365,6 +523,7 @@
 			buildSettings = {
 				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
 				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+				CODE_SIGN_IDENTITY = "Apple Development";
 				CODE_SIGN_STYLE = Automatic;
 				INFOPLIST_FILE = ios_package_test/Info.plist;
 				LD_RUNPATH_SEARCH_PATHS = (
@@ -373,7 +532,10 @@
 				);
 				PRODUCT_BUNDLE_IDENTIFIER = "ai.onnxruntime.tests.ios-package-test";
 				PRODUCT_NAME = "$(TARGET_NAME)";
-				TARGETED_DEVICE_FAMILY = "1,2";
+				SUPPORTED_PLATFORMS = "iphoneos iphonesimulator";
+				SUPPORTS_MACCATALYST = NO;
+				SUPPORTS_MAC_DESIGNED_FOR_IPHONE_IPAD = NO;
+				TARGETED_DEVICE_FAMILY = 1;
 			};
 			name = Debug;
 		};
@@ -382,6 +544,7 @@
 			buildSettings = {
 				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
 				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+				CODE_SIGN_IDENTITY = "Apple Development";
 				CODE_SIGN_STYLE = Automatic;
 				INFOPLIST_FILE = ios_package_test/Info.plist;
 				LD_RUNPATH_SEARCH_PATHS = (
@@ -390,7 +553,10 @@
 				);
 				PRODUCT_BUNDLE_IDENTIFIER = "ai.onnxruntime.tests.ios-package-test";
 				PRODUCT_NAME = "$(TARGET_NAME)";
-				TARGETED_DEVICE_FAMILY = "1,2";
+				SUPPORTED_PLATFORMS = "iphoneos iphonesimulator";
+				SUPPORTS_MACCATALYST = NO;
+				SUPPORTS_MAC_DESIGNED_FOR_IPHONE_IPAD = NO;
+				TARGETED_DEVICE_FAMILY = 1;
 			};
 			name = Release;
 		};
@@ -398,6 +564,7 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++17";
+				CODE_SIGN_IDENTITY = "Apple Development";
 				CODE_SIGN_STYLE = Automatic;
 				CURRENT_PROJECT_VERSION = 1;
 				GENERATE_INFOPLIST_FILE = YES;
@@ -420,6 +587,7 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++17";
+				CODE_SIGN_IDENTITY = "Apple Development";
 				CODE_SIGN_STYLE = Automatic;
 				CURRENT_PROJECT_VERSION = 1;
 				GENERATE_INFOPLIST_FILE = YES;
@@ -438,10 +606,128 @@
 			};
 			name = Release;
 		};
+		51C316E02B0881490033C70B /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
+				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
+				CODE_SIGN_ENTITLEMENTS = macos_package_test/macos_package_test.entitlements;
+				CODE_SIGN_IDENTITY = "Apple Development";
+				CODE_SIGN_STYLE = Automatic;
+				COMBINE_HIDPI_IMAGES = YES;
+				CURRENT_PROJECT_VERSION = 1;
+				DEVELOPMENT_TEAM = UBF8T346G9;
+				ENABLE_HARDENED_RUNTIME = YES;
+				ENABLE_USER_SCRIPT_SANDBOXING = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu17;
+				GENERATE_INFOPLIST_FILE = YES;
+				INFOPLIST_KEY_NSHumanReadableCopyright = "";
+				INFOPLIST_KEY_NSMainStoryboardFile = Main;
+				INFOPLIST_KEY_NSPrincipalClass = NSApplication;
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/../Frameworks",
+				);
+				LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
+				MARKETING_VERSION = 1.0;
+				PRODUCT_BUNDLE_IDENTIFIER = "ai.onnxruntime.tests.macos-package-test";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				PROVISIONING_PROFILE_SPECIFIER = "";
+				SDKROOT = macosx;
+				SWIFT_EMIT_LOC_STRINGS = YES;
+			};
+			name = Debug;
+		};
+		51C316E12B0881490033C70B /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
+				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
+				CODE_SIGN_ENTITLEMENTS = macos_package_test/macos_package_test.entitlements;
+				CODE_SIGN_IDENTITY = "Apple Development";
+				CODE_SIGN_STYLE = Automatic;
+				COMBINE_HIDPI_IMAGES = YES;
+				CURRENT_PROJECT_VERSION = 1;
+				DEVELOPMENT_TEAM = UBF8T346G9;
+				ENABLE_HARDENED_RUNTIME = YES;
+				ENABLE_USER_SCRIPT_SANDBOXING = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu17;
+				GENERATE_INFOPLIST_FILE = YES;
+				INFOPLIST_KEY_NSHumanReadableCopyright = "";
+				INFOPLIST_KEY_NSMainStoryboardFile = Main;
+				INFOPLIST_KEY_NSPrincipalClass = NSApplication;
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/../Frameworks",
+				);
+				LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
+				MARKETING_VERSION = 1.0;
+				PRODUCT_BUNDLE_IDENTIFIER = "ai.onnxruntime.tests.macos-package-test";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				PROVISIONING_PROFILE_SPECIFIER = "";
+				SDKROOT = macosx;
+				SWIFT_EMIT_LOC_STRINGS = YES;
+			};
+			name = Release;
+		};
+		51C316E62B0881490033C70B /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
+				CODE_SIGN_IDENTITY = "Apple Development";
+				CODE_SIGN_STYLE = Automatic;
+				CURRENT_PROJECT_VERSION = 1;
+				DEVELOPMENT_TEAM = UBF8T346G9;
+				ENABLE_USER_SCRIPT_SANDBOXING = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu17;
+				GENERATE_INFOPLIST_FILE = YES;
+				LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
+				MARKETING_VERSION = 1.0;
+				PRODUCT_BUNDLE_IDENTIFIER = "com.MS.macos-package-testUITests";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				PROVISIONING_PROFILE_SPECIFIER = "";
+				SDKROOT = macosx;
+				SWIFT_EMIT_LOC_STRINGS = NO;
+				TEST_TARGET_NAME = macos_package_test;
+			};
+			name = Debug;
+		};
+		51C316E72B0881490033C70B /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
+				CODE_SIGN_IDENTITY = "Apple Development";
+				CODE_SIGN_STYLE = Automatic;
+				CURRENT_PROJECT_VERSION = 1;
+				DEVELOPMENT_TEAM = UBF8T346G9;
+				ENABLE_USER_SCRIPT_SANDBOXING = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu17;
+				GENERATE_INFOPLIST_FILE = YES;
+				LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
+				MARKETING_VERSION = 1.0;
+				PRODUCT_BUNDLE_IDENTIFIER = "com.MS.macos-package-testUITests";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				PROVISIONING_PROFILE_SPECIFIER = "";
+				SDKROOT = macosx;
+				SWIFT_EMIT_LOC_STRINGS = NO;
+				TEST_TARGET_NAME = macos_package_test;
+			};
+			name = Release;
+		};
 /* End XCBuildConfiguration section */
 
 /* Begin XCConfigurationList section */
-		229E5917265869BF006E41AE /* Build configuration list for PBXProject "ios_package_test" */ = {
+		229E5917265869BF006E41AE /* Build configuration list for PBXProject "apple_package_test" */ = {
 			isa = XCConfigurationList;
 			buildConfigurations = (
 				229E5949265869C2006E41AE /* Debug */,
@@ -468,6 +754,24 @@
 			defaultConfigurationIsVisible = 0;
 			defaultConfigurationName = Release;
 		};
+		51C316DF2B0881490033C70B /* Build configuration list for PBXNativeTarget "macos_package_test" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				51C316E02B0881490033C70B /* Debug */,
+				51C316E12B0881490033C70B /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		51C316E52B0881490033C70B /* Build configuration list for PBXNativeTarget "macos_package_testUITests" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				51C316E62B0881490033C70B /* Debug */,
+				51C316E72B0881490033C70B /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
 /* End XCConfigurationList section */
 	};
 	rootObject = 229E5914265869BF006E41AE /* Project object */;
diff --git a/onnxruntime/test/platform/ios/ios_package_test/ios_package_test.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/onnxruntime/test/platform/apple/apple_package_test/apple_package_test.xcodeproj/project.xcworkspace/contents.xcworkspacedata
similarity index 100%
rename from onnxruntime/test/platform/ios/ios_package_test/ios_package_test.xcodeproj/project.xcworkspace/contents.xcworkspacedata
rename to onnxruntime/test/platform/apple/apple_package_test/apple_package_test.xcodeproj/project.xcworkspace/contents.xcworkspacedata
diff --git a/onnxruntime/test/platform/ios/ios_package_test/ios_package_test.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/onnxruntime/test/platform/apple/apple_package_test/apple_package_test.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
similarity index 100%
rename from onnxruntime/test/platform/ios/ios_package_test/ios_package_test.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
rename to onnxruntime/test/platform/apple/apple_package_test/apple_package_test.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
diff --git a/onnxruntime/test/platform/apple/apple_package_test/apple_package_test.xcodeproj/project.xcworkspace/xcshareddata/WorkspaceSettings.xcsettings b/onnxruntime/test/platform/apple/apple_package_test/apple_package_test.xcodeproj/project.xcworkspace/xcshareddata/WorkspaceSettings.xcsettings
new file mode 100644
index 0000000000000..0c67376ebacb4
--- /dev/null
+++ b/onnxruntime/test/platform/apple/apple_package_test/apple_package_test.xcodeproj/project.xcworkspace/xcshareddata/WorkspaceSettings.xcsettings
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict/>
+</plist>
diff --git a/onnxruntime/test/platform/ios/ios_package_test/ios_package_test/AppDelegate.h b/onnxruntime/test/platform/apple/apple_package_test/ios_package_test/AppDelegate.h
similarity index 100%
rename from onnxruntime/test/platform/ios/ios_package_test/ios_package_test/AppDelegate.h
rename to onnxruntime/test/platform/apple/apple_package_test/ios_package_test/AppDelegate.h
diff --git a/onnxruntime/test/platform/ios/ios_package_test/ios_package_test/AppDelegate.m b/onnxruntime/test/platform/apple/apple_package_test/ios_package_test/AppDelegate.m
similarity index 100%
rename from onnxruntime/test/platform/ios/ios_package_test/ios_package_test/AppDelegate.m
rename to onnxruntime/test/platform/apple/apple_package_test/ios_package_test/AppDelegate.m
diff --git a/onnxruntime/test/platform/ios/ios_package_test/ios_package_test/Base.lproj/LaunchScreen.storyboard b/onnxruntime/test/platform/apple/apple_package_test/ios_package_test/Base.lproj/LaunchScreen.storyboard
similarity index 100%
rename from onnxruntime/test/platform/ios/ios_package_test/ios_package_test/Base.lproj/LaunchScreen.storyboard
rename to onnxruntime/test/platform/apple/apple_package_test/ios_package_test/Base.lproj/LaunchScreen.storyboard
diff --git a/onnxruntime/test/platform/ios/ios_package_test/ios_package_test/Base.lproj/Main.storyboard b/onnxruntime/test/platform/apple/apple_package_test/ios_package_test/Base.lproj/Main.storyboard
similarity index 100%
rename from onnxruntime/test/platform/ios/ios_package_test/ios_package_test/Base.lproj/Main.storyboard
rename to onnxruntime/test/platform/apple/apple_package_test/ios_package_test/Base.lproj/Main.storyboard
diff --git a/onnxruntime/test/platform/ios/ios_package_test/ios_package_test/Info.plist b/onnxruntime/test/platform/apple/apple_package_test/ios_package_test/Info.plist
similarity index 100%
rename from onnxruntime/test/platform/ios/ios_package_test/ios_package_test/Info.plist
rename to onnxruntime/test/platform/apple/apple_package_test/ios_package_test/Info.plist
diff --git a/onnxruntime/test/platform/ios/ios_package_test/ios_package_test/main.m b/onnxruntime/test/platform/apple/apple_package_test/ios_package_test/main.m
similarity index 100%
rename from onnxruntime/test/platform/ios/ios_package_test/ios_package_test/main.m
rename to onnxruntime/test/platform/apple/apple_package_test/ios_package_test/main.m
diff --git a/onnxruntime/test/platform/ios/ios_package_test/ios_package_testUITests/ios_package_uitest_cpp_api.mm b/onnxruntime/test/platform/apple/apple_package_test/ios_package_testUITests/ios_package_uitest_cpp_api.mm
similarity index 100%
rename from onnxruntime/test/platform/ios/ios_package_test/ios_package_testUITests/ios_package_uitest_cpp_api.mm
rename to onnxruntime/test/platform/apple/apple_package_test/ios_package_testUITests/ios_package_uitest_cpp_api.mm
diff --git a/onnxruntime/test/platform/apple/apple_package_test/macos_package_test/AppDelegate.h b/onnxruntime/test/platform/apple/apple_package_test/macos_package_test/AppDelegate.h
new file mode 100644
index 0000000000000..e7b3600a059cb
--- /dev/null
+++ b/onnxruntime/test/platform/apple/apple_package_test/macos_package_test/AppDelegate.h
@@ -0,0 +1,12 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//
+//  AppDelegate.h
+//  macos_package_test
+//
+
+#import <Cocoa/Cocoa.h>
+
+@interface AppDelegate : NSObject <NSApplicationDelegate>
+
+@end
diff --git a/onnxruntime/test/platform/apple/apple_package_test/macos_package_test/AppDelegate.m b/onnxruntime/test/platform/apple/apple_package_test/macos_package_test/AppDelegate.m
new file mode 100644
index 0000000000000..36d16491c63b1
--- /dev/null
+++ b/onnxruntime/test/platform/apple/apple_package_test/macos_package_test/AppDelegate.m
@@ -0,0 +1,28 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//
+//  AppDelegate.h
+//  macos_package_test
+//
+
+#import "AppDelegate.h"
+
+@interface AppDelegate ()
+
+@end
+
+@implementation AppDelegate
+
+- (void)applicationDidFinishLaunching:(NSNotification*)aNotification {
+  // Insert code here to initialize your application
+}
+
+- (void)applicationWillTerminate:(NSNotification*)aNotification {
+  // Insert code here to tear down your application
+}
+
+- (BOOL)applicationSupportsSecureRestorableState:(NSApplication*)app {
+  return YES;
+}
+
+@end
diff --git a/onnxruntime/test/platform/apple/apple_package_test/macos_package_test/Base.lproj/Main.storyboard b/onnxruntime/test/platform/apple/apple_package_test/macos_package_test/Base.lproj/Main.storyboard
new file mode 100644
index 0000000000000..1cddb62a02eb6
--- /dev/null
+++ b/onnxruntime/test/platform/apple/apple_package_test/macos_package_test/Base.lproj/Main.storyboard
@@ -0,0 +1,719 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document type="com.apple.InterfaceBuilder3.Cocoa.Storyboard.XIB" version="3.0" toolsVersion="22155" targetRuntime="MacOSX.Cocoa" propertyAccessControl="none" useAutolayout="YES" initialViewController="B8D-0N-5wS">
+    <dependencies>
+        <deployment identifier="macosx"/>
+        <plugIn identifier="com.apple.InterfaceBuilder.CocoaPlugin" version="22155"/>
+        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
+    </dependencies>
+    <scenes>
+        <!--Application-->
+        <scene sceneID="JPo-4y-FX3">
+            <objects>
+                <application id="hnw-xV-0zn" sceneMemberID="viewController">
+                    <menu key="mainMenu" title="Main Menu" systemMenu="main" id="AYu-sK-qS6">
+                        <items>
+                            <menuItem title="macos_package_test" id="1Xt-HY-uBw">
+                                <modifierMask key="keyEquivalentModifierMask"/>
+                                <menu key="submenu" title="macos_package_test" systemMenu="apple" id="uQy-DD-JDr">
+                                    <items>
+                                        <menuItem title="About macos_package_test" id="5kV-Vb-QxS">
+                                            <modifierMask key="keyEquivalentModifierMask"/>
+                                            <connections>
+                                                <action selector="orderFrontStandardAboutPanel:" target="Ady-hI-5gd" id="Exp-CZ-Vem"/>
+                                            </connections>
+                                        </menuItem>
+                                        <menuItem isSeparatorItem="YES" id="VOq-y0-SEH"/>
+                                        <menuItem title="Preferences…" keyEquivalent="," id="BOF-NM-1cW"/>
+                                        <menuItem isSeparatorItem="YES" id="wFC-TO-SCJ"/>
+                                        <menuItem title="Services" id="NMo-om-nkz">
+                                            <modifierMask key="keyEquivalentModifierMask"/>
+                                            <menu key="submenu" title="Services" systemMenu="services" id="hz9-B4-Xy5"/>
+                                        </menuItem>
+                                        <menuItem isSeparatorItem="YES" id="4je-JR-u6R"/>
+                                        <menuItem title="Hide macos_package_test" keyEquivalent="h" id="Olw-nP-bQN">
+                                            <connections>
+                                                <action selector="hide:" target="Ady-hI-5gd" id="PnN-Uc-m68"/>
+                                            </connections>
+                                        </menuItem>
+                                        <menuItem title="Hide Others" keyEquivalent="h" id="Vdr-fp-XzO">
+                                            <modifierMask key="keyEquivalentModifierMask" option="YES" command="YES"/>
+                                            <connections>
+                                                <action selector="hideOtherApplications:" target="Ady-hI-5gd" id="VT4-aY-XCT"/>
+                                            </connections>
+                                        </menuItem>
+                                        <menuItem title="Show All" id="Kd2-mp-pUS">
+                                            <modifierMask key="keyEquivalentModifierMask"/>
+                                            <connections>
+                                                <action selector="unhideAllApplications:" target="Ady-hI-5gd" id="Dhg-Le-xox"/>
+                                            </connections>
+                                        </menuItem>
+                                        <menuItem isSeparatorItem="YES" id="kCx-OE-vgT"/>
+                                        <menuItem title="Quit macos_package_test" keyEquivalent="q" id="4sb-4s-VLi">
+                                            <connections>
+                                                <action selector="terminate:" target="Ady-hI-5gd" id="Te7-pn-YzF"/>
+                                            </connections>
+                                        </menuItem>
+                                    </items>
+                                </menu>
+                            </menuItem>
+                            <menuItem title="File" id="dMs-cI-mzQ">
+                                <modifierMask key="keyEquivalentModifierMask"/>
+                                <menu key="submenu" title="File" id="bib-Uj-vzu">
+                                    <items>
+                                        <menuItem title="New" keyEquivalent="n" id="Was-JA-tGl">
+                                            <connections>
+                                                <action selector="newDocument:" target="Ady-hI-5gd" id="4Si-XN-c54"/>
+                                            </connections>
+                                        </menuItem>
+                                        <menuItem title="Open…" keyEquivalent="o" id="IAo-SY-fd9">
+                                            <connections>
+                                                <action selector="openDocument:" target="Ady-hI-5gd" id="bVn-NM-KNZ"/>
+                                            </connections>
+                                        </menuItem>
+                                        <menuItem title="Open Recent" id="tXI-mr-wws">
+                                            <modifierMask key="keyEquivalentModifierMask"/>
+                                            <menu key="submenu" title="Open Recent" systemMenu="recentDocuments" id="oas-Oc-fiZ">
+                                                <items>
+                                                    <menuItem title="Clear Menu" id="vNY-rz-j42">
+                                                        <modifierMask key="keyEquivalentModifierMask"/>
+                                                        <connections>
+                                                            <action selector="clearRecentDocuments:" target="Ady-hI-5gd" id="Daa-9d-B3U"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                </items>
+                                            </menu>
+                                        </menuItem>
+                                        <menuItem isSeparatorItem="YES" id="m54-Is-iLE"/>
+                                        <menuItem title="Close" keyEquivalent="w" id="DVo-aG-piG">
+                                            <connections>
+                                                <action selector="performClose:" target="Ady-hI-5gd" id="HmO-Ls-i7Q"/>
+                                            </connections>
+                                        </menuItem>
+                                        <menuItem title="Save…" keyEquivalent="s" id="pxx-59-PXV">
+                                            <connections>
+                                                <action selector="saveDocument:" target="Ady-hI-5gd" id="teZ-XB-qJY"/>
+                                            </connections>
+                                        </menuItem>
+                                        <menuItem title="Save As…" keyEquivalent="S" id="Bw7-FT-i3A">
+                                            <connections>
+                                                <action selector="saveDocumentAs:" target="Ady-hI-5gd" id="mDf-zr-I0C"/>
+                                            </connections>
+                                        </menuItem>
+                                        <menuItem title="Revert to Saved" keyEquivalent="r" id="KaW-ft-85H">
+                                            <connections>
+                                                <action selector="revertDocumentToSaved:" target="Ady-hI-5gd" id="iJ3-Pv-kwq"/>
+                                            </connections>
+                                        </menuItem>
+                                        <menuItem isSeparatorItem="YES" id="aJh-i4-bef"/>
+                                        <menuItem title="Page Setup…" keyEquivalent="P" id="qIS-W8-SiK">
+                                            <modifierMask key="keyEquivalentModifierMask" shift="YES" command="YES"/>
+                                            <connections>
+                                                <action selector="runPageLayout:" target="Ady-hI-5gd" id="Din-rz-gC5"/>
+                                            </connections>
+                                        </menuItem>
+                                        <menuItem title="Print…" keyEquivalent="p" id="aTl-1u-JFS">
+                                            <connections>
+                                                <action selector="print:" target="Ady-hI-5gd" id="qaZ-4w-aoO"/>
+                                            </connections>
+                                        </menuItem>
+                                    </items>
+                                </menu>
+                            </menuItem>
+                            <menuItem title="Edit" id="5QF-Oa-p0T">
+                                <modifierMask key="keyEquivalentModifierMask"/>
+                                <menu key="submenu" title="Edit" id="W48-6f-4Dl">
+                                    <items>
+                                        <menuItem title="Undo" keyEquivalent="z" id="dRJ-4n-Yzg">
+                                            <connections>
+                                                <action selector="undo:" target="Ady-hI-5gd" id="M6e-cu-g7V"/>
+                                            </connections>
+                                        </menuItem>
+                                        <menuItem title="Redo" keyEquivalent="Z" id="6dh-zS-Vam">
+                                            <connections>
+                                                <action selector="redo:" target="Ady-hI-5gd" id="oIA-Rs-6OD"/>
+                                            </connections>
+                                        </menuItem>
+                                        <menuItem isSeparatorItem="YES" id="WRV-NI-Exz"/>
+                                        <menuItem title="Cut" keyEquivalent="x" id="uRl-iY-unG">
+                                            <connections>
+                                                <action selector="cut:" target="Ady-hI-5gd" id="YJe-68-I9s"/>
+                                            </connections>
+                                        </menuItem>
+                                        <menuItem title="Copy" keyEquivalent="c" id="x3v-GG-iWU">
+                                            <connections>
+                                                <action selector="copy:" target="Ady-hI-5gd" id="G1f-GL-Joy"/>
+                                            </connections>
+                                        </menuItem>
+                                        <menuItem title="Paste" keyEquivalent="v" id="gVA-U4-sdL">
+                                            <connections>
+                                                <action selector="paste:" target="Ady-hI-5gd" id="UvS-8e-Qdg"/>
+                                            </connections>
+                                        </menuItem>
+                                        <menuItem title="Paste and Match Style" keyEquivalent="V" id="WeT-3V-zwk">
+                                            <modifierMask key="keyEquivalentModifierMask" option="YES" command="YES"/>
+                                            <connections>
+                                                <action selector="pasteAsPlainText:" target="Ady-hI-5gd" id="cEh-KX-wJQ"/>
+                                            </connections>
+                                        </menuItem>
+                                        <menuItem title="Delete" id="pa3-QI-u2k">
+                                            <modifierMask key="keyEquivalentModifierMask"/>
+                                            <connections>
+                                                <action selector="delete:" target="Ady-hI-5gd" id="0Mk-Ml-PaM"/>
+                                            </connections>
+                                        </menuItem>
+                                        <menuItem title="Select All" keyEquivalent="a" id="Ruw-6m-B2m">
+                                            <connections>
+                                                <action selector="selectAll:" target="Ady-hI-5gd" id="VNm-Mi-diN"/>
+                                            </connections>
+                                        </menuItem>
+                                        <menuItem isSeparatorItem="YES" id="uyl-h8-XO2"/>
+                                        <menuItem title="Find" id="4EN-yA-p0u">
+                                            <modifierMask key="keyEquivalentModifierMask"/>
+                                            <menu key="submenu" title="Find" id="1b7-l0-nxx">
+                                                <items>
+                                                    <menuItem title="Find…" tag="1" keyEquivalent="f" id="Xz5-n4-O0W">
+                                                        <connections>
+                                                            <action selector="performFindPanelAction:" target="Ady-hI-5gd" id="cD7-Qs-BN4"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem title="Find and Replace…" tag="12" keyEquivalent="f" id="YEy-JH-Tfz">
+                                                        <modifierMask key="keyEquivalentModifierMask" option="YES" command="YES"/>
+                                                        <connections>
+                                                            <action selector="performFindPanelAction:" target="Ady-hI-5gd" id="WD3-Gg-5AJ"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem title="Find Next" tag="2" keyEquivalent="g" id="q09-fT-Sye">
+                                                        <connections>
+                                                            <action selector="performFindPanelAction:" target="Ady-hI-5gd" id="NDo-RZ-v9R"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem title="Find Previous" tag="3" keyEquivalent="G" id="OwM-mh-QMV">
+                                                        <connections>
+                                                            <action selector="performFindPanelAction:" target="Ady-hI-5gd" id="HOh-sY-3ay"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem title="Use Selection for Find" tag="7" keyEquivalent="e" id="buJ-ug-pKt">
+                                                        <connections>
+                                                            <action selector="performFindPanelAction:" target="Ady-hI-5gd" id="U76-nv-p5D"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem title="Jump to Selection" keyEquivalent="j" id="S0p-oC-mLd">
+                                                        <connections>
+                                                            <action selector="centerSelectionInVisibleArea:" target="Ady-hI-5gd" id="IOG-6D-g5B"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                </items>
+                                            </menu>
+                                        </menuItem>
+                                        <menuItem title="Spelling and Grammar" id="Dv1-io-Yv7">
+                                            <modifierMask key="keyEquivalentModifierMask"/>
+                                            <menu key="submenu" title="Spelling" id="3IN-sU-3Bg">
+                                                <items>
+                                                    <menuItem title="Show Spelling and Grammar" keyEquivalent=":" id="HFo-cy-zxI">
+                                                        <connections>
+                                                            <action selector="showGuessPanel:" target="Ady-hI-5gd" id="vFj-Ks-hy3"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem title="Check Document Now" keyEquivalent=";" id="hz2-CU-CR7">
+                                                        <connections>
+                                                            <action selector="checkSpelling:" target="Ady-hI-5gd" id="fz7-VC-reM"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem isSeparatorItem="YES" id="bNw-od-mp5"/>
+                                                    <menuItem title="Check Spelling While Typing" id="rbD-Rh-wIN">
+                                                        <modifierMask key="keyEquivalentModifierMask"/>
+                                                        <connections>
+                                                            <action selector="toggleContinuousSpellChecking:" target="Ady-hI-5gd" id="7w6-Qz-0kB"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem title="Check Grammar With Spelling" id="mK6-2p-4JG">
+                                                        <modifierMask key="keyEquivalentModifierMask"/>
+                                                        <connections>
+                                                            <action selector="toggleGrammarChecking:" target="Ady-hI-5gd" id="muD-Qn-j4w"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem title="Correct Spelling Automatically" id="78Y-hA-62v">
+                                                        <modifierMask key="keyEquivalentModifierMask"/>
+                                                        <connections>
+                                                            <action selector="toggleAutomaticSpellingCorrection:" target="Ady-hI-5gd" id="2lM-Qi-WAP"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                </items>
+                                            </menu>
+                                        </menuItem>
+                                        <menuItem title="Substitutions" id="9ic-FL-obx">
+                                            <modifierMask key="keyEquivalentModifierMask"/>
+                                            <menu key="submenu" title="Substitutions" id="FeM-D8-WVr">
+                                                <items>
+                                                    <menuItem title="Show Substitutions" id="z6F-FW-3nz">
+                                                        <modifierMask key="keyEquivalentModifierMask"/>
+                                                        <connections>
+                                                            <action selector="orderFrontSubstitutionsPanel:" target="Ady-hI-5gd" id="oku-mr-iSq"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem isSeparatorItem="YES" id="gPx-C9-uUO"/>
+                                                    <menuItem title="Smart Copy/Paste" id="9yt-4B-nSM">
+                                                        <modifierMask key="keyEquivalentModifierMask"/>
+                                                        <connections>
+                                                            <action selector="toggleSmartInsertDelete:" target="Ady-hI-5gd" id="3IJ-Se-DZD"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem title="Smart Quotes" id="hQb-2v-fYv">
+                                                        <modifierMask key="keyEquivalentModifierMask"/>
+                                                        <connections>
+                                                            <action selector="toggleAutomaticQuoteSubstitution:" target="Ady-hI-5gd" id="ptq-xd-QOA"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem title="Smart Dashes" id="rgM-f4-ycn">
+                                                        <modifierMask key="keyEquivalentModifierMask"/>
+                                                        <connections>
+                                                            <action selector="toggleAutomaticDashSubstitution:" target="Ady-hI-5gd" id="oCt-pO-9gS"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem title="Smart Links" id="cwL-P1-jid">
+                                                        <modifierMask key="keyEquivalentModifierMask"/>
+                                                        <connections>
+                                                            <action selector="toggleAutomaticLinkDetection:" target="Ady-hI-5gd" id="Gip-E3-Fov"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem title="Data Detectors" id="tRr-pd-1PS">
+                                                        <modifierMask key="keyEquivalentModifierMask"/>
+                                                        <connections>
+                                                            <action selector="toggleAutomaticDataDetection:" target="Ady-hI-5gd" id="R1I-Nq-Kbl"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem title="Text Replacement" id="HFQ-gK-NFA">
+                                                        <modifierMask key="keyEquivalentModifierMask"/>
+                                                        <connections>
+                                                            <action selector="toggleAutomaticTextReplacement:" target="Ady-hI-5gd" id="DvP-Fe-Py6"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                </items>
+                                            </menu>
+                                        </menuItem>
+                                        <menuItem title="Transformations" id="2oI-Rn-ZJC">
+                                            <modifierMask key="keyEquivalentModifierMask"/>
+                                            <menu key="submenu" title="Transformations" id="c8a-y6-VQd">
+                                                <items>
+                                                    <menuItem title="Make Upper Case" id="vmV-6d-7jI">
+                                                        <modifierMask key="keyEquivalentModifierMask"/>
+                                                        <connections>
+                                                            <action selector="uppercaseWord:" target="Ady-hI-5gd" id="sPh-Tk-edu"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem title="Make Lower Case" id="d9M-CD-aMd">
+                                                        <modifierMask key="keyEquivalentModifierMask"/>
+                                                        <connections>
+                                                            <action selector="lowercaseWord:" target="Ady-hI-5gd" id="iUZ-b5-hil"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem title="Capitalize" id="UEZ-Bs-lqG">
+                                                        <modifierMask key="keyEquivalentModifierMask"/>
+                                                        <connections>
+                                                            <action selector="capitalizeWord:" target="Ady-hI-5gd" id="26H-TL-nsh"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                </items>
+                                            </menu>
+                                        </menuItem>
+                                        <menuItem title="Speech" id="xrE-MZ-jX0">
+                                            <modifierMask key="keyEquivalentModifierMask"/>
+                                            <menu key="submenu" title="Speech" id="3rS-ZA-NoH">
+                                                <items>
+                                                    <menuItem title="Start Speaking" id="Ynk-f8-cLZ">
+                                                        <modifierMask key="keyEquivalentModifierMask"/>
+                                                        <connections>
+                                                            <action selector="startSpeaking:" target="Ady-hI-5gd" id="654-Ng-kyl"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem title="Stop Speaking" id="Oyz-dy-DGm">
+                                                        <modifierMask key="keyEquivalentModifierMask"/>
+                                                        <connections>
+                                                            <action selector="stopSpeaking:" target="Ady-hI-5gd" id="dX8-6p-jy9"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                </items>
+                                            </menu>
+                                        </menuItem>
+                                    </items>
+                                </menu>
+                            </menuItem>
+                            <menuItem title="Format" id="jxT-CU-nIS">
+                                <modifierMask key="keyEquivalentModifierMask"/>
+                                <menu key="submenu" title="Format" id="GEO-Iw-cKr">
+                                    <items>
+                                        <menuItem title="Font" id="Gi5-1S-RQB">
+                                            <modifierMask key="keyEquivalentModifierMask"/>
+                                            <menu key="submenu" title="Font" systemMenu="font" id="aXa-aM-Jaq">
+                                                <items>
+                                                    <menuItem title="Show Fonts" keyEquivalent="t" id="Q5e-8K-NDq">
+                                                        <connections>
+                                                            <action selector="orderFrontFontPanel:" target="YLy-65-1bz" id="WHr-nq-2xA"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem title="Bold" tag="2" keyEquivalent="b" id="GB9-OM-e27">
+                                                        <connections>
+                                                            <action selector="addFontTrait:" target="YLy-65-1bz" id="hqk-hr-sYV"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem title="Italic" tag="1" keyEquivalent="i" id="Vjx-xi-njq">
+                                                        <connections>
+                                                            <action selector="addFontTrait:" target="YLy-65-1bz" id="IHV-OB-c03"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem title="Underline" keyEquivalent="u" id="WRG-CD-K1S">
+                                                        <connections>
+                                                            <action selector="underline:" target="Ady-hI-5gd" id="FYS-2b-JAY"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem isSeparatorItem="YES" id="5gT-KC-WSO"/>
+                                                    <menuItem title="Bigger" tag="3" keyEquivalent="+" id="Ptp-SP-VEL">
+                                                        <connections>
+                                                            <action selector="modifyFont:" target="YLy-65-1bz" id="Uc7-di-UnL"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem title="Smaller" tag="4" keyEquivalent="-" id="i1d-Er-qST">
+                                                        <connections>
+                                                            <action selector="modifyFont:" target="YLy-65-1bz" id="HcX-Lf-eNd"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem isSeparatorItem="YES" id="kx3-Dk-x3B"/>
+                                                    <menuItem title="Kern" id="jBQ-r6-VK2">
+                                                        <modifierMask key="keyEquivalentModifierMask"/>
+                                                        <menu key="submenu" title="Kern" id="tlD-Oa-oAM">
+                                                            <items>
+                                                                <menuItem title="Use Default" id="GUa-eO-cwY">
+                                                                    <modifierMask key="keyEquivalentModifierMask"/>
+                                                                    <connections>
+                                                                        <action selector="useStandardKerning:" target="Ady-hI-5gd" id="6dk-9l-Ckg"/>
+                                                                    </connections>
+                                                                </menuItem>
+                                                                <menuItem title="Use None" id="cDB-IK-hbR">
+                                                                    <modifierMask key="keyEquivalentModifierMask"/>
+                                                                    <connections>
+                                                                        <action selector="turnOffKerning:" target="Ady-hI-5gd" id="U8a-gz-Maa"/>
+                                                                    </connections>
+                                                                </menuItem>
+                                                                <menuItem title="Tighten" id="46P-cB-AYj">
+                                                                    <modifierMask key="keyEquivalentModifierMask"/>
+                                                                    <connections>
+                                                                        <action selector="tightenKerning:" target="Ady-hI-5gd" id="hr7-Nz-8ro"/>
+                                                                    </connections>
+                                                                </menuItem>
+                                                                <menuItem title="Loosen" id="ogc-rX-tC1">
+                                                                    <modifierMask key="keyEquivalentModifierMask"/>
+                                                                    <connections>
+                                                                        <action selector="loosenKerning:" target="Ady-hI-5gd" id="8i4-f9-FKE"/>
+                                                                    </connections>
+                                                                </menuItem>
+                                                            </items>
+                                                        </menu>
+                                                    </menuItem>
+                                                    <menuItem title="Ligatures" id="o6e-r0-MWq">
+                                                        <modifierMask key="keyEquivalentModifierMask"/>
+                                                        <menu key="submenu" title="Ligatures" id="w0m-vy-SC9">
+                                                            <items>
+                                                                <menuItem title="Use Default" id="agt-UL-0e3">
+                                                                    <modifierMask key="keyEquivalentModifierMask"/>
+                                                                    <connections>
+                                                                        <action selector="useStandardLigatures:" target="Ady-hI-5gd" id="7uR-wd-Dx6"/>
+                                                                    </connections>
+                                                                </menuItem>
+                                                                <menuItem title="Use None" id="J7y-lM-qPV">
+                                                                    <modifierMask key="keyEquivalentModifierMask"/>
+                                                                    <connections>
+                                                                        <action selector="turnOffLigatures:" target="Ady-hI-5gd" id="iX2-gA-Ilz"/>
+                                                                    </connections>
+                                                                </menuItem>
+                                                                <menuItem title="Use All" id="xQD-1f-W4t">
+                                                                    <modifierMask key="keyEquivalentModifierMask"/>
+                                                                    <connections>
+                                                                        <action selector="useAllLigatures:" target="Ady-hI-5gd" id="KcB-kA-TuK"/>
+                                                                    </connections>
+                                                                </menuItem>
+                                                            </items>
+                                                        </menu>
+                                                    </menuItem>
+                                                    <menuItem title="Baseline" id="OaQ-X3-Vso">
+                                                        <modifierMask key="keyEquivalentModifierMask"/>
+                                                        <menu key="submenu" title="Baseline" id="ijk-EB-dga">
+                                                            <items>
+                                                                <menuItem title="Use Default" id="3Om-Ey-2VK">
+                                                                    <modifierMask key="keyEquivalentModifierMask"/>
+                                                                    <connections>
+                                                                        <action selector="unscript:" target="Ady-hI-5gd" id="0vZ-95-Ywn"/>
+                                                                    </connections>
+                                                                </menuItem>
+                                                                <menuItem title="Superscript" id="Rqc-34-cIF">
+                                                                    <modifierMask key="keyEquivalentModifierMask"/>
+                                                                    <connections>
+                                                                        <action selector="superscript:" target="Ady-hI-5gd" id="3qV-fo-wpU"/>
+                                                                    </connections>
+                                                                </menuItem>
+                                                                <menuItem title="Subscript" id="I0S-gh-46l">
+                                                                    <modifierMask key="keyEquivalentModifierMask"/>
+                                                                    <connections>
+                                                                        <action selector="subscript:" target="Ady-hI-5gd" id="Q6W-4W-IGz"/>
+                                                                    </connections>
+                                                                </menuItem>
+                                                                <menuItem title="Raise" id="2h7-ER-AoG">
+                                                                    <modifierMask key="keyEquivalentModifierMask"/>
+                                                                    <connections>
+                                                                        <action selector="raiseBaseline:" target="Ady-hI-5gd" id="4sk-31-7Q9"/>
+                                                                    </connections>
+                                                                </menuItem>
+                                                                <menuItem title="Lower" id="1tx-W0-xDw">
+                                                                    <modifierMask key="keyEquivalentModifierMask"/>
+                                                                    <connections>
+                                                                        <action selector="lowerBaseline:" target="Ady-hI-5gd" id="OF1-bc-KW4"/>
+                                                                    </connections>
+                                                                </menuItem>
+                                                            </items>
+                                                        </menu>
+                                                    </menuItem>
+                                                    <menuItem isSeparatorItem="YES" id="Ndw-q3-faq"/>
+                                                    <menuItem title="Show Colors" keyEquivalent="C" id="bgn-CT-cEk">
+                                                        <connections>
+                                                            <action selector="orderFrontColorPanel:" target="Ady-hI-5gd" id="mSX-Xz-DV3"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem isSeparatorItem="YES" id="iMs-zA-UFJ"/>
+                                                    <menuItem title="Copy Style" keyEquivalent="c" id="5Vv-lz-BsD">
+                                                        <modifierMask key="keyEquivalentModifierMask" option="YES" command="YES"/>
+                                                        <connections>
+                                                            <action selector="copyFont:" target="Ady-hI-5gd" id="GJO-xA-L4q"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem title="Paste Style" keyEquivalent="v" id="vKC-jM-MkH">
+                                                        <modifierMask key="keyEquivalentModifierMask" option="YES" command="YES"/>
+                                                        <connections>
+                                                            <action selector="pasteFont:" target="Ady-hI-5gd" id="JfD-CL-leO"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                </items>
+                                            </menu>
+                                        </menuItem>
+                                        <menuItem title="Text" id="Fal-I4-PZk">
+                                            <modifierMask key="keyEquivalentModifierMask"/>
+                                            <menu key="submenu" title="Text" id="d9c-me-L2H">
+                                                <items>
+                                                    <menuItem title="Align Left" keyEquivalent="{" id="ZM1-6Q-yy1">
+                                                        <connections>
+                                                            <action selector="alignLeft:" target="Ady-hI-5gd" id="zUv-R1-uAa"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem title="Center" keyEquivalent="|" id="VIY-Ag-zcb">
+                                                        <connections>
+                                                            <action selector="alignCenter:" target="Ady-hI-5gd" id="spX-mk-kcS"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem title="Justify" id="J5U-5w-g23">
+                                                        <modifierMask key="keyEquivalentModifierMask"/>
+                                                        <connections>
+                                                            <action selector="alignJustified:" target="Ady-hI-5gd" id="ljL-7U-jND"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem title="Align Right" keyEquivalent="}" id="wb2-vD-lq4">
+                                                        <connections>
+                                                            <action selector="alignRight:" target="Ady-hI-5gd" id="r48-bG-YeY"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem isSeparatorItem="YES" id="4s2-GY-VfK"/>
+                                                    <menuItem title="Writing Direction" id="H1b-Si-o9J">
+                                                        <modifierMask key="keyEquivalentModifierMask"/>
+                                                        <menu key="submenu" title="Writing Direction" id="8mr-sm-Yjd">
+                                                            <items>
+                                                                <menuItem title="Paragraph" enabled="NO" id="ZvO-Gk-QUH">
+                                                                    <modifierMask key="keyEquivalentModifierMask"/>
+                                                                </menuItem>
+                                                                <menuItem id="YGs-j5-SAR">
+                                                                    <string key="title">	Default</string>
+                                                                    <modifierMask key="keyEquivalentModifierMask"/>
+                                                                    <connections>
+                                                                        <action selector="makeBaseWritingDirectionNatural:" target="Ady-hI-5gd" id="qtV-5e-UBP"/>
+                                                                    </connections>
+                                                                </menuItem>
+                                                                <menuItem id="Lbh-J2-qVU">
+                                                                    <string key="title">	Left to Right</string>
+                                                                    <modifierMask key="keyEquivalentModifierMask"/>
+                                                                    <connections>
+                                                                        <action selector="makeBaseWritingDirectionLeftToRight:" target="Ady-hI-5gd" id="S0X-9S-QSf"/>
+                                                                    </connections>
+                                                                </menuItem>
+                                                                <menuItem id="jFq-tB-4Kx">
+                                                                    <string key="title">	Right to Left</string>
+                                                                    <modifierMask key="keyEquivalentModifierMask"/>
+                                                                    <connections>
+                                                                        <action selector="makeBaseWritingDirectionRightToLeft:" target="Ady-hI-5gd" id="5fk-qB-AqJ"/>
+                                                                    </connections>
+                                                                </menuItem>
+                                                                <menuItem isSeparatorItem="YES" id="swp-gr-a21"/>
+                                                                <menuItem title="Selection" enabled="NO" id="cqv-fj-IhA">
+                                                                    <modifierMask key="keyEquivalentModifierMask"/>
+                                                                </menuItem>
+                                                                <menuItem id="Nop-cj-93Q">
+                                                                    <string key="title">	Default</string>
+                                                                    <modifierMask key="keyEquivalentModifierMask"/>
+                                                                    <connections>
+                                                                        <action selector="makeTextWritingDirectionNatural:" target="Ady-hI-5gd" id="lPI-Se-ZHp"/>
+                                                                    </connections>
+                                                                </menuItem>
+                                                                <menuItem id="BgM-ve-c93">
+                                                                    <string key="title">	Left to Right</string>
+                                                                    <modifierMask key="keyEquivalentModifierMask"/>
+                                                                    <connections>
+                                                                        <action selector="makeTextWritingDirectionLeftToRight:" target="Ady-hI-5gd" id="caW-Bv-w94"/>
+                                                                    </connections>
+                                                                </menuItem>
+                                                                <menuItem id="RB4-Sm-HuC">
+                                                                    <string key="title">	Right to Left</string>
+                                                                    <modifierMask key="keyEquivalentModifierMask"/>
+                                                                    <connections>
+                                                                        <action selector="makeTextWritingDirectionRightToLeft:" target="Ady-hI-5gd" id="EXD-6r-ZUu"/>
+                                                                    </connections>
+                                                                </menuItem>
+                                                            </items>
+                                                        </menu>
+                                                    </menuItem>
+                                                    <menuItem isSeparatorItem="YES" id="fKy-g9-1gm"/>
+                                                    <menuItem title="Show Ruler" id="vLm-3I-IUL">
+                                                        <modifierMask key="keyEquivalentModifierMask"/>
+                                                        <connections>
+                                                            <action selector="toggleRuler:" target="Ady-hI-5gd" id="FOx-HJ-KwY"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem title="Copy Ruler" keyEquivalent="c" id="MkV-Pr-PK5">
+                                                        <modifierMask key="keyEquivalentModifierMask" control="YES" command="YES"/>
+                                                        <connections>
+                                                            <action selector="copyRuler:" target="Ady-hI-5gd" id="71i-fW-3W2"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                    <menuItem title="Paste Ruler" keyEquivalent="v" id="LVM-kO-fVI">
+                                                        <modifierMask key="keyEquivalentModifierMask" control="YES" command="YES"/>
+                                                        <connections>
+                                                            <action selector="pasteRuler:" target="Ady-hI-5gd" id="cSh-wd-qM2"/>
+                                                        </connections>
+                                                    </menuItem>
+                                                </items>
+                                            </menu>
+                                        </menuItem>
+                                    </items>
+                                </menu>
+                            </menuItem>
+                            <menuItem title="View" id="H8h-7b-M4v">
+                                <modifierMask key="keyEquivalentModifierMask"/>
+                                <menu key="submenu" title="View" id="HyV-fh-RgO">
+                                    <items>
+                                        <menuItem title="Show Toolbar" keyEquivalent="t" id="snW-S8-Cw5">
+                                            <modifierMask key="keyEquivalentModifierMask" option="YES" command="YES"/>
+                                            <connections>
+                                                <action selector="toggleToolbarShown:" target="Ady-hI-5gd" id="BXY-wc-z0C"/>
+                                            </connections>
+                                        </menuItem>
+                                        <menuItem title="Customize Toolbar…" id="1UK-8n-QPP">
+                                            <modifierMask key="keyEquivalentModifierMask"/>
+                                            <connections>
+                                                <action selector="runToolbarCustomizationPalette:" target="Ady-hI-5gd" id="pQI-g3-MTW"/>
+                                            </connections>
+                                        </menuItem>
+                                        <menuItem isSeparatorItem="YES" id="hB3-LF-h0Y"/>
+                                        <menuItem title="Show Sidebar" keyEquivalent="s" id="kIP-vf-haE">
+                                            <modifierMask key="keyEquivalentModifierMask" control="YES" command="YES"/>
+                                            <connections>
+                                                <action selector="toggleSidebar:" target="Ady-hI-5gd" id="iwa-gc-5KM"/>
+                                            </connections>
+                                        </menuItem>
+                                        <menuItem title="Enter Full Screen" keyEquivalent="f" id="4J7-dP-txa">
+                                            <modifierMask key="keyEquivalentModifierMask" control="YES" command="YES"/>
+                                            <connections>
+                                                <action selector="toggleFullScreen:" target="Ady-hI-5gd" id="dU3-MA-1Rq"/>
+                                            </connections>
+                                        </menuItem>
+                                    </items>
+                                </menu>
+                            </menuItem>
+                            <menuItem title="Window" id="aUF-d1-5bR">
+                                <modifierMask key="keyEquivalentModifierMask"/>
+                                <menu key="submenu" title="Window" systemMenu="window" id="Td7-aD-5lo">
+                                    <items>
+                                        <menuItem title="Minimize" keyEquivalent="m" id="OY7-WF-poV">
+                                            <connections>
+                                                <action selector="performMiniaturize:" target="Ady-hI-5gd" id="VwT-WD-YPe"/>
+                                            </connections>
+                                        </menuItem>
+                                        <menuItem title="Zoom" id="R4o-n2-Eq4">
+                                            <modifierMask key="keyEquivalentModifierMask"/>
+                                            <connections>
+                                                <action selector="performZoom:" target="Ady-hI-5gd" id="DIl-cC-cCs"/>
+                                            </connections>
+                                        </menuItem>
+                                        <menuItem isSeparatorItem="YES" id="eu3-7i-yIM"/>
+                                        <menuItem title="Bring All to Front" id="LE2-aR-0XJ">
+                                            <modifierMask key="keyEquivalentModifierMask"/>
+                                            <connections>
+                                                <action selector="arrangeInFront:" target="Ady-hI-5gd" id="DRN-fu-gQh"/>
+                                            </connections>
+                                        </menuItem>
+                                    </items>
+                                </menu>
+                            </menuItem>
+                            <menuItem title="Help" id="wpr-3q-Mcd">
+                                <modifierMask key="keyEquivalentModifierMask"/>
+                                <menu key="submenu" title="Help" systemMenu="help" id="F2S-fz-NVQ">
+                                    <items>
+                                        <menuItem title="macos_package_test Help" keyEquivalent="?" id="FKE-Sm-Kum">
+                                            <connections>
+                                                <action selector="showHelp:" target="Ady-hI-5gd" id="y7X-2Q-9no"/>
+                                            </connections>
+                                        </menuItem>
+                                    </items>
+                                </menu>
+                            </menuItem>
+                        </items>
+                    </menu>
+                    <connections>
+                        <outlet property="delegate" destination="Voe-Tx-rLC" id="PrD-fu-P6m"/>
+                    </connections>
+                </application>
+                <customObject id="Voe-Tx-rLC" customClass="AppDelegate"/>
+                <customObject id="YLy-65-1bz" customClass="NSFontManager"/>
+                <customObject id="Ady-hI-5gd" userLabel="First Responder" customClass="NSResponder" sceneMemberID="firstResponder"/>
+            </objects>
+            <point key="canvasLocation" x="75" y="0.0"/>
+        </scene>
+        <!--Window Controller-->
+        <scene sceneID="R2V-B0-nI4">
+            <objects>
+                <windowController id="B8D-0N-5wS" sceneMemberID="viewController">
+                    <window key="window" title="Window" allowsToolTipsWhenApplicationIsInactive="NO" autorecalculatesKeyViewLoop="NO" releasedWhenClosed="NO" visibleAtLaunch="NO" animationBehavior="default" id="IQv-IB-iLA">
+                        <windowStyleMask key="styleMask" titled="YES" closable="YES" miniaturizable="YES" resizable="YES"/>
+                        <windowPositionMask key="initialPositionMask" leftStrut="YES" rightStrut="YES" topStrut="YES" bottomStrut="YES"/>
+                        <rect key="contentRect" x="196" y="240" width="480" height="270"/>
+                        <rect key="screenRect" x="0.0" y="0.0" width="1680" height="1027"/>
+                        <connections>
+                            <outlet property="delegate" destination="B8D-0N-5wS" id="98r-iN-zZc"/>
+                        </connections>
+                    </window>
+                    <connections>
+                        <segue destination="XfG-lQ-9wD" kind="relationship" relationship="window.shadowedContentViewController" id="cq2-FE-JQM"/>
+                    </connections>
+                </windowController>
+                <customObject id="Oky-zY-oP4" userLabel="First Responder" customClass="NSResponder" sceneMemberID="firstResponder"/>
+            </objects>
+            <point key="canvasLocation" x="75" y="250"/>
+        </scene>
+        <!--View Controller-->
+        <scene sceneID="hIz-AP-VOD">
+            <objects>
+                <viewController id="XfG-lQ-9wD" customClass="ViewController" sceneMemberID="viewController">
+                    <view key="view" id="m2S-Jp-Qdl">
+                        <rect key="frame" x="0.0" y="0.0" width="480" height="270"/>
+                        <autoresizingMask key="autoresizingMask"/>
+                    </view>
+                </viewController>
+                <customObject id="rPt-NT-nkU" userLabel="First Responder" customClass="NSResponder" sceneMemberID="firstResponder"/>
+            </objects>
+            <point key="canvasLocation" x="75" y="655"/>
+        </scene>
+    </scenes>
+</document>
diff --git a/onnxruntime/test/platform/apple/apple_package_test/macos_package_test/macos_package_test.entitlements b/onnxruntime/test/platform/apple/apple_package_test/macos_package_test/macos_package_test.entitlements
new file mode 100644
index 0000000000000..18aff0ce43c20
--- /dev/null
+++ b/onnxruntime/test/platform/apple/apple_package_test/macos_package_test/macos_package_test.entitlements
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>com.apple.security.app-sandbox</key>
+	<true/>
+	<key>com.apple.security.files.user-selected.read-only</key>
+	<true/>
+</dict>
+</plist>
diff --git a/onnxruntime/test/platform/apple/apple_package_test/macos_package_test/main.m b/onnxruntime/test/platform/apple/apple_package_test/macos_package_test/main.m
new file mode 100644
index 0000000000000..ee939ac3752c1
--- /dev/null
+++ b/onnxruntime/test/platform/apple/apple_package_test/macos_package_test/main.m
@@ -0,0 +1,15 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//
+//  AppDelegate.h
+//  macos_package_test
+//
+
+#import <Cocoa/Cocoa.h>
+
+int main(int argc, const char* argv[]) {
+  @autoreleasepool {
+    // Setup code that might create autoreleased objects goes here.
+  }
+  return NSApplicationMain(argc, argv);
+}
diff --git a/onnxruntime/test/platform/apple/apple_package_test/macos_package_testUITests/macos_package_uitest_cpp_api.mm b/onnxruntime/test/platform/apple/apple_package_test/macos_package_testUITests/macos_package_uitest_cpp_api.mm
new file mode 100644
index 0000000000000..613c6e545939f
--- /dev/null
+++ b/onnxruntime/test/platform/apple/apple_package_test/macos_package_testUITests/macos_package_uitest_cpp_api.mm
@@ -0,0 +1,108 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//
+//  macos_package_test_cpp_api.mm
+//  macos_package_test_cpp_api
+//
+//  This file hosts the tests of ORT C++ API
+//
+
+#import <XCTest/XCTest.h>
+#include <math.h>
+#include <onnxruntime/onnxruntime_cxx_api.h>
+
+#if __has_include(<onnxruntime/coreml_provider_factory.h>)
+#define COREML_EP_AVAILABLE 1
+#else
+#define COREML_EP_AVAILABLE 0
+#endif
+
+#if COREML_EP_AVAILABLE
+#include <onnxruntime/coreml_provider_factory.h>
+#endif
+
+void testSigmoid(const char* modelPath, bool useCoreML) {
+  // This is an e2e test for ORT C++ API
+  Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "testCppAPI");
+
+  // initialize session options if needed
+  Ort::SessionOptions session_options;
+  session_options.SetIntraOpNumThreads(1);
+
+#if COREML_EP_AVAILABLE
+  if (useCoreML) {
+    const uint32_t flags = COREML_FLAG_USE_CPU_ONLY;
+    Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CoreML(session_options, flags));
+  }
+#else
+  (void)useCoreML;
+#endif
+
+  Ort::Session session(env, modelPath, session_options);
+
+  size_t input_tensor_size = 3 * 4 * 5;
+  float input_tensor_values[input_tensor_size];
+  float expected_output_values[input_tensor_size];
+  const char* input_node_names[] = {"x"};
+  const char* output_node_names[] = {"y"};
+  const int64_t input_node_dims[] = {3, 4, 5};
+
+  for (size_t i = 0; i < input_tensor_size; i++) {
+    input_tensor_values[i] = (float)i - 30;
+    expected_output_values[i] = 1.0f / (1 + exp(-input_tensor_values[i]));
+  }
+
+  auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
+  Ort::Value input_tensor =
+      Ort::Value::CreateTensor<float>(memory_info, input_tensor_values, input_tensor_size, input_node_dims, 3);
+  XCTAssert(input_tensor.IsTensor());
+
+  auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names,
+                                    &input_tensor, 1, output_node_names, 1);
+  XCTAssertEqual(output_tensors.size(), 1);
+  XCTAssert(output_tensors.front().IsTensor());
+
+  // Get pointer to output tensor float values
+  float* output_values = output_tensors.front().GetTensorMutableData<float>();
+  for (size_t i = 0; i < input_tensor_size; i++) {
+    XCTAssertEqualWithAccuracy(expected_output_values[i], output_values[i], 1e-6);
+  }
+}
+
+@interface macos_package_testUITests : XCTestCase
+
+@end
+
+@implementation macos_package_testUITests
+
+- (void)setUp {
+  // Put setup code here. This method is called before the invocation of each test method in the class.
+
+  // In UI tests it is usually best to stop immediately when a failure occurs.
+  self.continueAfterFailure = NO;
+
+  // In UI tests it’s important to set the initial state - such as interface orientation - required for your tests before they run. The setUp method is a good place to do this.
+}
+
+- (void)tearDown {
+  // Put teardown code here. This method is called after the invocation of each test method in the class.
+}
+
+- (NSString*)getFilePath {
+  NSBundle* bundle = [NSBundle bundleForClass:[self class]];
+  NSString* ns_model_path = [bundle pathForResource:@"sigmoid" ofType:@"ort"];
+  XCTAssertNotNil(ns_model_path);
+  return ns_model_path;
+}
+
+- (void)testCppAPI_Basic {
+  testSigmoid([self getFilePath].UTF8String, false /* useCoreML */);
+}
+
+#if COREML_EP_AVAILABLE
+- (void)testCppAPI_Basic_CoreML {
+  testSigmoid([self getFilePath].UTF8String, true /* useCoreML */);
+}
+#endif
+
+@end
diff --git a/onnxruntime/test/platform/ios/ios_package_test/models/sigmoid.ort b/onnxruntime/test/platform/apple/apple_package_test/models/sigmoid.ort
similarity index 100%
rename from onnxruntime/test/platform/ios/ios_package_test/models/sigmoid.ort
rename to onnxruntime/test/platform/apple/apple_package_test/models/sigmoid.ort
diff --git a/onnxruntime/test/providers/base_tester.cc b/onnxruntime/test/providers/base_tester.cc
index 459a8c71ad611..16cce85f7cb0a 100644
--- a/onnxruntime/test/providers/base_tester.cc
+++ b/onnxruntime/test/providers/base_tester.cc
@@ -399,6 +399,8 @@ bool SetEpsForAllNodes(Graph& graph,
                        const std::vector<std::unique_ptr<IExecutionProvider>>& execution_providers,
                        const std::vector<std::shared_ptr<CustomRegistry>>* custom_registries) {
   const OpSchemaKernelTypeStrResolver kernel_type_str_resolver{};
+  const KernelRegistry::TypeConstraintMap type_constraint_map{};
+
   for (auto& node : graph.Nodes()) {
     if (node.OpType() == kConstant)
       continue;
@@ -426,13 +428,28 @@ bool SetEpsForAllNodes(Graph& graph,
         break;
       }
 
+      // check the internal NHWC domain if EP requests NHWC as it may only have a kernel registered in that domain
+      if (ep->GetPreferredLayout() == DataLayout::NHWC) {
+        const KernelCreateInfo* kci = nullptr;
+        auto status = ep->GetKernelRegistry()->TryFindKernel(ep->Type(),
+                                                             std::string_view(node.OpType()),
+                                                             std::string_view(kMSInternalNHWCDomain),
+                                                             node.SinceVersion(),
+                                                             type_constraint_map,
+                                                             &kci);
+        if (status.IsOK() && kci != nullptr) {
+          found = true;
+          break;
+        }
+      }
+
       // Check the EP has an impl for the node from custom_registries
       if (custom_registries != nullptr &&
           std::any_of(custom_registries->cbegin(), custom_registries->cend(),
-                      [&](auto reg) { return KernelRegistry::HasImplementationOf(
-                                          *reg->GetKernelRegistry(),
-                                          node, ep->Type(),
-                                          kernel_type_str_resolver); })) {
+                      [&](auto reg) {
+                        return KernelRegistry::HasImplementationOf(*reg->GetKernelRegistry(), node, ep->Type(),
+                                                                   kernel_type_str_resolver);
+                      })) {
         found = true;
         break;
       }
@@ -760,7 +777,7 @@ void BaseTester::ExecuteModelForEps(
     for (const auto& ep : execution_providers) {
       providers.append(ep->Type() + " ");
     }
-    LOGS_DEFAULT(WARNING) << "registered execution providers " << providers << "were unable to run the model.";
+    LOGS_DEFAULT(WARNING) << "registered execution providers " << providers << " were unable to run the model.";
     return;
   }
 
diff --git a/onnxruntime/test/providers/compare_provider_test_utils.cc b/onnxruntime/test/providers/compare_provider_test_utils.cc
index 94fb03540e3f8..3ef74259e27b6 100644
--- a/onnxruntime/test/providers/compare_provider_test_utils.cc
+++ b/onnxruntime/test/providers/compare_provider_test_utils.cc
@@ -121,5 +121,83 @@ void CompareOpTester::CompareWithCPU(const std::string& target_provider_type,
   }
 }
 
+void CompareOpTester::CompareEPs(const std::shared_ptr<IExecutionProvider>& source_execution_provider,
+                                 std::vector<std::shared_ptr<IExecutionProvider>>& target_execution_providers,
+                                 double per_sample_tolerance,
+                                 double relative_per_sample_tolerance,
+                                 const bool need_cpu_cast,
+                                 const std::unordered_map<std::string, int>& extra_domain_to_version) {
+  SetTestFunctionCalled();
+
+  auto& model = BuildModel(extra_domain_to_version);
+  auto& graph = model.MainGraph();
+
+  // In InferenceSession::Initialize(), the call to graph partitioner, which is responsible
+  // for Inlining function bodies for ops whose kernel is missing happens before the
+  // Cast Transformer. As a result, for MLFloat16 tests where the node is missing a CPU kernel,
+  // the function body is instead used for CPU pass. This option allows the comparison with
+  // the CPU kernel by adding the input/output casts before looking for a registered CPU kernel.
+  if (need_cpu_cast) {
+    InsertCastTransformer transformer("Test", GetExecutionProvider(kCpuExecutionProvider)->GetKernelRegistry().get());
+    bool modified = false;
+    ASSERT_STATUS_OK(transformer.Apply(graph, modified, DefaultLoggingManager().DefaultLogger()));
+  }
+
+  ASSERT_STATUS_OK(graph.Resolve());
+
+  // Hookup the inputs and outputs
+  std::unordered_map<std::string, OrtValue> feeds;
+  std::vector<std::string> output_names;
+  FillFeedsAndOutputNames(feeds, output_names);
+
+  // Run the model
+  SessionOptions so;
+  so.session_logid = Op();
+
+  InferenceSession source_session_object{so, GetEnvironment()};
+  ASSERT_STATUS_OK(source_session_object.RegisterExecutionProvider(source_execution_provider));
+
+  // first run with source provider
+  std::string s1;
+  model.ToProto().SerializeToString(&s1);
+  std::istringstream model_proto_str(s1);
+
+  ASSERT_STATUS_OK(source_session_object.Load(model_proto_str));
+
+  ASSERT_STATUS_OK(source_session_object.Initialize());
+
+  std::vector<OrtValue> source_fetches;
+  ASSERT_STATUS_OK(source_session_object.Run({}, feeds, output_names, &source_fetches));
+
+  for (auto& target_execution_provider : target_execution_providers) {
+    // run with target provider
+    // build the graph again as the other graphs may be with casts
+    auto& tp_model = BuildModel(extra_domain_to_version);
+    auto& tp_graph = tp_model.MainGraph();
+
+    ASSERT_STATUS_OK(tp_graph.Resolve());
+
+    InferenceSession target_session_object{so, GetEnvironment()};
+    ASSERT_STATUS_OK(target_session_object.RegisterExecutionProvider(target_execution_provider));
+
+    std::string s2;
+    tp_model.ToProto().SerializeToString(&s2);
+    std::istringstream model_proto_str1(s2);
+    ASSERT_STATUS_OK(target_session_object.Load(model_proto_str1));
+
+    ASSERT_STATUS_OK(target_session_object.Initialize());
+
+    std::vector<OrtValue> target_fetches;
+    ASSERT_STATUS_OK(target_session_object.Run({}, feeds, output_names, &target_fetches));
+
+    // compare
+    ASSERT_TRUE(source_fetches.size() == target_fetches.size());
+    for (size_t i = 0; i < source_fetches.size(); i++) {
+      auto ret = CompareOrtValue(target_fetches[i], source_fetches[i], per_sample_tolerance,
+                                 relative_per_sample_tolerance, false);
+      EXPECT_EQ(ret.first, COMPARE_RESULT::SUCCESS) << ret.second;
+    }
+  }
+}
 }  // namespace test
 }  // namespace onnxruntime
diff --git a/onnxruntime/test/providers/compare_provider_test_utils.h b/onnxruntime/test/providers/compare_provider_test_utils.h
index 924fe405ba8dd..155016d7e69a2 100644
--- a/onnxruntime/test/providers/compare_provider_test_utils.h
+++ b/onnxruntime/test/providers/compare_provider_test_utils.h
@@ -3,6 +3,11 @@
 
 #pragma once
 
+#include <string>
+#include <memory>
+#include <vector>
+#include <unordered_map>
+
 #include "core/graph/constants.h"
 #include "test/common/tensor_op_test_utils.h"
 #include "test/providers/provider_test_utils.h"
@@ -22,6 +27,13 @@ class CompareOpTester : public OpTester {
                       double relative_per_sample_tolerance = 1e-4,
                       const bool need_cpu_cast = false,
                       const std::unordered_map<std::string, int>& extra_domain_to_version = {});
+
+  void CompareEPs(const std::shared_ptr<IExecutionProvider>& source_execution_provider,
+                  std::vector<std::shared_ptr<IExecutionProvider>>& target_execution_providers,
+                  double per_sample_tolerance,
+                  double relative_per_sample_tolerance = 1e-4,
+                  const bool need_cpu_cast = false,
+                  const std::unordered_map<std::string, int>& extra_domain_to_version = {});
 };
 
 }  // namespace test
diff --git a/onnxruntime/test/providers/cpu/controlflow/loop_test.cc b/onnxruntime/test/providers/cpu/controlflow/loop_test.cc
index 8dcf632192249..9c0b779870c70 100644
--- a/onnxruntime/test/providers/cpu/controlflow/loop_test.cc
+++ b/onnxruntime/test/providers/cpu/controlflow/loop_test.cc
@@ -358,7 +358,11 @@ void RunTest(int64_t max_iterations,
     // we want the CUDA provider to be first, and the CPU provider second. all except the Loop node should run on
     // CUDA given that, which creates the scenario where we need to copy to/from CPU to execute the Loop node correctly.
     std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+#if defined(USE_CUDA)
     execution_providers.push_back(DefaultCudaExecutionProvider());
+#elif defined(USE_ROCM)
+    execution_providers.push_back(DefaultRocmExecutionProvider());
+#endif
     execution_providers.push_back(DefaultCpuExecutionProvider());
 
     test.Run(expect_result, failure_message, {kTensorrtExecutionProvider}, nullptr, &execution_providers);
@@ -1038,8 +1042,8 @@ TEST(Loop, IterationCountAsOutput) {
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
 }
 
-#ifdef USE_CUDA
-// test that when part of the subgraph run on CUDA it executes successfully
+#if defined(USE_CUDA) || defined(USE_ROCM)
+// test that when part of the subgraph run on CUDA/ROCm it executes successfully
 TEST(Loop, MixedExecutionProviders) {
   RunOptions options{};
   options.mixed_execution_providers = true;
diff --git a/onnxruntime/test/providers/cpu/controlflow/scan_test.cc b/onnxruntime/test/providers/cpu/controlflow/scan_test.cc
index 6d8e05b93510a..3d46893cdb82d 100644
--- a/onnxruntime/test/providers/cpu/controlflow/scan_test.cc
+++ b/onnxruntime/test/providers/cpu/controlflow/scan_test.cc
@@ -411,7 +411,11 @@ static void RunTest_v9(const std::string test_name, int64_t sequence_len, int64_
     // we want the CUDA provider to be first, and the CPU provider second. all except the Scan node should run on
     // CUDA given that, which creates the scenario where we need to copy to/from CPU to execute the Scan node correctly.
     std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+#if defined(USE_CUDA)
     execution_providers.push_back(DefaultCudaExecutionProvider());
+#elif defined(USE_ROCM)
+    execution_providers.push_back(DefaultRocmExecutionProvider());
+#endif
     execution_providers.push_back(DefaultCpuExecutionProvider());
 
     test.Run(expect_result, failure_message, options.excluded_provider_types, nullptr, &execution_providers);
@@ -578,7 +582,7 @@ TEST(Scan9, DISABLED_BadShape) {
   ShortSequenceOneInBatchOneLoopStateVar(
       options,
       "Node:concat Output:concat_out_1 [ShapeInferenceError] Mismatch between number of source and target dimensions. "
-      "Source=2 Target=1");
+      "inferred=2 declared=1");
 }
 
 TEST(Scan8, ShortSequenceTwoInBatchOneLoopStateVar) {
@@ -1162,7 +1166,11 @@ void UnknownDimInSubgraphOutput(bool is_v8, bool mixed_execution_providers = fal
     // we want the CUDA provider to be first, and the CPU provider second. all except the Scan node should run on
     // CUDA given that, which creates the scenario where we need to copy to/from CPU to execute the Scan node correctly.
     std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+#if defined(USE_CUDA)
     execution_providers.push_back(DefaultCudaExecutionProvider());
+#elif defined(USE_ROCM)
+    execution_providers.push_back(DefaultRocmExecutionProvider());
+#endif
     execution_providers.push_back(DefaultCpuExecutionProvider());
 
     test.Run(OpTester::ExpectResult::kExpectSuccess, "", RunOptions().excluded_provider_types, nullptr,
@@ -1174,7 +1182,7 @@ void UnknownDimInSubgraphOutput(bool is_v8, bool mixed_execution_providers = fal
 
 TEST_8_AND_9(UnknownDimInSubgraphOutput);
 
-#ifdef USE_CUDA
+#if defined(USE_CUDA) || defined(USE_ROCM)
 TEST(Scan, MixedExecutionProviders) {
   RunOptions options{};
   options.is_v8 = false;
diff --git a/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc b/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc
index 257ce977700a6..5e746ed0c62d4 100644
--- a/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc
+++ b/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc
@@ -1238,7 +1238,7 @@ TEST(MathOpTest, Sum_8_Test1) {
   // This test runs fine on CPU Plugin
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider});
 #else
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});                    // TensorRT: Expected output shape [{3,3,3}] did not match run output shape [{3,1,1}] for sum
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  // TensorRT: Expected output shape [{3,3,3}] did not match run output shape [{3,1,1}] for sum
 #endif
 }
 
@@ -1264,7 +1264,7 @@ TEST(MathOpTest, Sum_8_Test1_double) {
   // This test runs fine on CPU Plugin
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider});
 #else
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});                    // TensorRT: Expected output shape [{3,3,3}] did not match run output shape [{3,1,1}] for sum
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  // TensorRT: Expected output shape [{3,3,3}] did not match run output shape [{3,1,1}] for sum
 #endif
 }
 TEST(MathOpTest, Sum_8_Test2) {
diff --git a/onnxruntime/test/providers/cpu/math/softmax_test.cc b/onnxruntime/test/providers/cpu/math/softmax_test.cc
index b94c17c3b0e24..6eb72255bdf9a 100644
--- a/onnxruntime/test/providers/cpu/math/softmax_test.cc
+++ b/onnxruntime/test/providers/cpu/math/softmax_test.cc
@@ -421,7 +421,7 @@ TEST(SoftmaxOperator, GH15949_regression_test) {
                           {0.00032932f, 0.01798029f, 0.9816904f});
 
   // disable TRT as it does not support axis=0 as used by the model
-  tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kCoreMLExecutionProvider});
+  tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
 }
 
 }  // namespace test
diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc
index ef2d7e31654ba..859e082716760 100644
--- a/onnxruntime/test/providers/cpu/model_tests.cc
+++ b/onnxruntime/test/providers/cpu/model_tests.cc
@@ -1,6 +1,7 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#include <iostream>
 #include <iterator>
 #include <gtest/gtest.h>
 
@@ -13,6 +14,7 @@
 #include "asserts.h"
 #include <core/platform/path_lib.h>
 #include "default_providers.h"
+#include "test/onnx/TestCase.h"
 #include <string>
 #include <codecvt>
 #include <locale>
@@ -66,23 +68,6 @@ namespace test {
 // parameter is provider_name + "_" + model_path
 class ModelTest : public testing::TestWithParam<std::basic_string<ORTCHAR_T>> {};
 
-namespace {
-struct BrokenTest {
-  std::string test_name_;
-  std::string reason_;
-  std::set<std::string> broken_opset_versions_ = {};  // apply to all versions if empty
-  BrokenTest(std::string name, std::string reason) : test_name_(std::move(name)), reason_(std::move(reason)) {
-  }
-
-  BrokenTest(std::string name, std::string reason, const std::initializer_list<std::string>& opversions)
-      : test_name_(std::move(name)), reason_(std::move(reason)), broken_opset_versions_(opversions) {
-  }
-
-  bool operator<(const struct BrokenTest& test) const {
-    return strcmp(test_name_.c_str(), test.test_name_.c_str()) < 0;
-  }
-};
-}  // namespace
 #ifdef GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST
 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(ModelTest);
 #endif
@@ -114,488 +99,9 @@ TEST_P(ModelTest, Run) {
     SkipTest("it has the training domain. No pipeline should need to run these tests.");
     return;
   }
-  std::set<BrokenTest> broken_tests = {
-      {"slice_neg_steps",
-       "Type parameter (Tind) bound to different types (tensor(int64) and tensor(int32) in node ()."},
-      {"cast_BFLOAT16_to_FLOAT", "Unexpected input data type"},
-      {"loop13_seq", "Creation of empty sequences is currently not supported in the test runner"},
-      {"sequence_insert_at_front", "shape mismatch, expect {4} got {3}"},
-      {"cast_FLOAT_to_BFLOAT16", "expect uint16 got bfloat16"},
-      {"mnist", "Input data isn't in valid range"},
-      {"BERT_Squad", "test data bug"},
-      {"constantofshape_float_ones", "test data bug", {"opset9", "opset10"}},
-      {"constantofshape_int_zeros", "test data bug", {"opset9", "opset10"}},
-      {"cast_STRING_to_FLOAT", "Linux CI has old ONNX python package with bad test data", {"opset9", "opset10"}},
-      // Numpy float to string has unexpected rounding for some results given numpy default precision is meant to be 8.
-      // "e.g. 0.296140194 -> '0.2961402' not '0.29614019'. ORT produces the latter with precision set to 8,
-      // which doesn't match the expected output that was generated with numpy.
-      {"cast_FLOAT_to_STRING", "Numpy float to string has unexpected rounding for some results."},
-      {"tf_nasnet_large", "disable temporarily"},
-      {"tf_nasnet_mobile", "disable temporarily"},
-      {"tf_pnasnet_large", "disable temporarily"},
-      {"shrink", "test case is wrong", {"opset9"}},
-      {"maxpool_with_argmax_2d_precomputed_strides", "ShapeInferenceError"},
-      {"tf_inception_v2", "result mismatch"},
-      {"tf_resnet_v1_50", "result mismatch when Conv BN Fusion is applied"},
-      {"tf_resnet_v1_101", "result mismatch when Conv BN Fusion is applied"},
-      {"tf_resnet_v1_152", "result mismatch when Conv BN Fusion is applied"},
-      {"mxnet_arcface", "Model is an invalid ONNX model"},
-      {"unique_not_sorted_without_axis", "Expected data for 'Y' is incorrect and in sorted order."},
-      {"cumsum_1d_reverse_exclusive", "only failing linux GPU CI. Likely build error."},
-      {"resize_downsample_scales_cubic_align_corners", "results mismatch with onnx tests"},
-      {"resize_downsample_scales_linear_align_corners", "results mismatch with onnx tests"},
-      {"resize_tf_crop_and_resize", "Bad onnx test output. Needs test fix."},
-      {"resize_upsample_sizes_nearest_ceil_half_pixel", "Bad onnx test output. Needs test fix."},
-      {"resize_upsample_sizes_nearest_floor_align_corners", "Bad onnx test output. Needs test fix."},
-      {"resize_upsample_sizes_nearest_round_prefer_ceil_asymmetric", "Bad onnx test output. Needs test fix."},
-      {"bitshift_right_uint16", "BitShift(11) uint16 support not enabled currently"},
-      {"bitshift_left_uint16", "BitShift(11) uint16 support not enabled currently"},
-      {"maxunpool_export_with_output_shape",
-       "Invalid output in ONNX test. See https://github.com/onnx/onnx/issues/2398"},
-      {"cntk_simple_seg", "Bad onnx test output caused by wrong SAME_UPPER/SAME_LOWER for ConvTranspose"},
-      {"training_dropout", "result differs", {}},               // Temporary, subsequent PR will remove this.
-      {"training_dropout_default", "result differs", {}},       // Temporary, subsequent PR will remove this.
-      {"training_dropout_default_mask", "result differs", {}},  // Temporary, subsequent PR will remove this.
-      {"training_dropout_mask", "result differs", {}},          // Temporary, subsequent PR will remove this.
-      {"batchnorm_epsilon_training_mode", "training only", {}},
-      {"batchnorm_example_training_mode", "training only", {}},
-      {"bernoulli", "type error", {}},
-      {"bernoulli_double", "type error", {}},
-      {"bernoulli_double_expanded", "type error", {}},
-      {"bernoulli_expanded", "type error", {}},
-      {"bernoulli_seed", "type error", {}},
-      {"bernoulli_seed_expanded", "type error", {}},
-      {"castlike_BFLOAT16_to_FLOAT", "type error", {}},
-      {"castlike_BFLOAT16_to_FLOAT_expanded", "type error", {}},
-      {"castlike_FLOAT_to_BFLOAT16", "type error", {}},
-      {"castlike_FLOAT_to_BFLOAT16_expanded", "type error", {}},
-      {"castlike_FLOAT_to_STRING", "type error", {}},
-      {"castlike_FLOAT_to_STRING_expanded", "type error", {}},
-      {"convtranspose_autopad_same", "Test data has been corrected in ONNX 1.10.", {"opset13", "opset14"}},
-      {"gru_batchwise", "type error", {}},
-      {"lstm_batchwise", "type error", {}},
-      {"optional_get_element", "type error", {}},
-      {"optional_get_element_sequence", "type error", {}},
-      {"optional_has_element", "type error", {}},
-      {"optional_has_element_empty", "type error", {}},
-      {"shape_end_1", "type error", {}},
-      {"shape_end_negative_1", "type error", {}},
-      {"shape_start_1", "type error", {}},
-      {"shape_start_1_end_2", "type error", {}},
-      {"shape_start_1_end_negative_1", "type error", {}},
-      {"shape_start_negative_1", "type error", {}},
-      {"simple_rnn_batchwise", "type error", {}},
-      {"mod_float_mixed_sign_example", "fmod attribute must be true for floating point types", {}},
-      {"col2im_pads", "result mismatch", {"opset18"}},
-#ifdef ENABLE_TRAINING_CORE
-      {"adagrad", "not a registered function/op", {}},                  // Op not registered.
-      {"adagrad_multiple", "not a registered function/op", {}},         // Op not registered.
-      {"adam", "not a registered function/op", {}},                     // Op not registered.
-      {"adam_multiple", "not a registered function/op", {}},            // Op not registered.
-      {"gradient_of_add", "not a registered function/op", {}},          // Op not registered.
-      {"gradient_of_add_and_mul", "not a registered function/op", {}},  // Op not registered.
-      {"momentum", "not a registered function/op", {}},                 // Op not registered.
-      {"momentum_multiple", "not a registered function/op", {}},        // Op not registered.
-      {"nesterov_momentum", "not a registered function/op", {}},        // Op not registered.
-      {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_weight_ignore_index_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_log_prob",
-       "type error",
-       {"opset12"}},
-      {"softmax_cross_entropy_mean_weight_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_weight_ignore_index_3d", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_weight_ignore_index_4d_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_weight_ignore_index_4d", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_no_weight_ignore_index", "type error", {"opset12"}},
-      {"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_log_prob",
-       "type error",
-       {"opset12"}},
-      {"softmax_cross_entropy_mean_3d_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_none_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_3d", "type error", {"opset12"}},
-      {"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_weight_ignore_index_3d_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_no_weight_ignore_index_3d_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_none_weights_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_sum_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_weight_ignore_index", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_no_weight_ignore_index_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_no_weight_ignore_index_3d", "type error", {"opset12"}},
-      {"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index", "type error", {"opset12"}},
-      {"softmax_cross_entropy_sum", "type error", {"opset12"}},
-      {"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob",
-       "type error",
-       {"opset12"}},
-      {"softmax_cross_entropy_none_weights", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_no_weight_ignore_index_4d_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_none", "type error", {"opset12"}},
-      {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index", "type error", {"opset12"}},
-      {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_weight", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_no_weight_ignore_index_4d", "type error", {"opset12"}},
-#endif
-      {"mask_rcnn_keras", "this model currently has an invalid contrib op version set to 10", {}}};
-
-  // Some EPs may fail to pass some specific testcases.
-  // For example TenosrRT EP may fail on FLOAT16 related testcases if GPU doesn't support float16.
-  // Instead of list all these testcases, we can use following keyword set to filter out testcases wchich contain
-  // specific keyword.
-  std::set<std::string> broken_tests_keyword_set = {};
-
-  if (provider_name == "cuda") {
-#ifdef _WIN32
-    broken_tests.insert({"LSTM_Seq_lens_unpacked", "this test fails with new image since Aug 25."});
-    broken_tests.insert({"bidaf", "this test fails with new image since Aug 25."});
-    broken_tests.insert({"Candy", "Flaky test, need to investigate", {"opset9"}});
-#else
-    broken_tests.insert({"bidaf", "this test should be recovered when multi-gpu pipeline deprecates NV12", {"opset9"}});
-#endif
-  }
-
-  if (provider_name == "nnapi") {
-    broken_tests.insert({"scan9_sum", "Error with the extra graph"});
-    broken_tests.insert({"scan_sum", "Error with the extra graph"});
-    broken_tests.insert({"mvn_expanded", "Failed to find kernel for MemcpyFromHost(1) (node Memcpy_1)"});
-    broken_tests.insert({"dynamicquantizelinear_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"dynamicquantizelinear_max_adjusted_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"dynamicquantizelinear_min_adjusted_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"gemm_transposeB", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"range_float_type_positive_delta_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"range_int32_type_negative_delta_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"convtranspose_1d", "1d convtranspose not supported yet"});
-    broken_tests.insert({"convtranspose_3d", "3d convtranspose not supported yet"});
-    broken_tests.insert({"maxpool_2d_uint8", "result mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NC_expanded", "shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_expanded", "shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_mean_expanded", "shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_sum_expanded", "shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_expanded", "shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_mean_expanded", "shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum_expanded", "shape mismatch"});
-    // Disable based on George Wu's recommendation.
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum_ignore_index_expanded",
-         "shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_iinput_shape_is_NCd1_weight_ignore_index", "Shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_iinput_shape_is_NCd1_weight_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NC", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1_mean_weight_negative_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_mean_weight_negative_ignore_index_expanded",
-                         "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_weight", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_weight_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2", "Shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_no_weight_reduction_mean_ignore_index", "Shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_no_weight_reduction_mean_ignore_index_expanded",
-         "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_mean", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_sum", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight", "Shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_mean", "Shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum_ignore_index",
-                         "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index",
-                         "Shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded",
-         "Shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_expanded",
-                         "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight", "Shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight", "Shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "Shape mismatch"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index", "Shape mismatch"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_log_prob", "Shape mismatch"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_log_prob_expanded",
-         "Shape mismatch"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded",
-                         "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob",
-                         "Shape mismatch"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob_expanded",
-         "Shape mismatch"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index", "Shape mismatch"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_log_prob_expanded",
-                         "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob", "Shape mismatch"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight", "Shape mismatch"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "Shape mismatch"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob", "Shape mismatch"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_3d", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_3d_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_3d_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_3d_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_3d", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_3d_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_3d_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_3d_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_4d", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_4d_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_4d_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_4d_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_3d", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_3d_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_3d_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_3d_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_4d", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_4d_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_4d_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_4d_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_weights", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_weights_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_weights_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_weights_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_sum", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_sum_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_sum_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_sum_log_prob_expanded", "Shape mismatch"});
-  }
-
-  if (provider_name == "tensorrt") {
-    broken_tests.insert({"convtranspose_with_kernel", "It causes segmentation fault"});
-    broken_tests.insert({"convtranspose_pad", "It causes segmentation fault"});
-    broken_tests.insert({"convtranspose_kernel_shape", "It causes segmentation fault"});
-    broken_tests.insert({"dynamicquantizelinear_expanded", "It causes segmentation fault"});
-    broken_tests.insert({"dynamicquantizelinear_min_adjusted_expanded", "It causes segmentation fault"});
-    broken_tests.insert({"dynamicquantizelinear_max_adjusted_expanded", "It causes segmentation fault"});
-
-    broken_tests.insert({"basic_conv_with_padding",
-                         "Cannot set more than one input unless network has Q/DQ layers. TensorRT EP could not build "
-                         "engine for fused node"});
-    broken_tests.insert({"basic_conv_without_padding",
-                         "Cannot set more than one input unless network has Q/DQ layers. TensorRT EP could not build "
-                         "engine for fused node"});
-    broken_tests.insert({"conv_with_strides_no_padding",
-                         "Cannot set more than one input unless network has Q/DQ layers. TensorRT EP could not build "
-                         "engine for fused node"});
-
-    broken_tests.insert({"conv_with_autopad_same",
-                         "Internal Error (node_of_y: Cannot set more than one input unless network has Q/DQ layers.)"});
-
-    // unsupported tests since opset16
-    broken_tests.insert({"sequence_map_add_2_sequences", "not supported by TensorRT EP"});
-    broken_tests.insert({"sequence_map_extract_shapes", "not supported by TensorRT EP."});
-    broken_tests.insert({"sequence_map_add_1_sequence_1_tensor", "not supported by TensorRT EP."});
-    broken_tests.insert({"sequence_map_identity_1_sequence", "not supported by TensorRT EP."});
-    broken_tests.insert({"sequence_map_identity_2_sequences", "not supported by TensorRT EP."});
-    broken_tests.insert({"sequence_map_identity_1_sequence_1_tensor", "not supported by TensorRT EP."});
-    broken_tests.insert({"leakyrelu_expanded", "not supported by TensorRT EP."});
-    broken_tests.insert({"leakyrelu_default_expanded", "not supported by TensorRT EP."});
-    broken_tests.insert({"leakyrelu_example_expanded", "not supported by TensorRT EP."});
-    broken_tests.insert({"prelu_broadcast_expanded", "not supported by TensorRT EP."});
-    broken_tests.insert({"prelu_example_expanded", "not supported by TensorRT EP."});
-    broken_tests_keyword_set.insert({"scatternd_add"});
-    broken_tests_keyword_set.insert({"scatternd_multiply"});
-    broken_tests_keyword_set.insert({"scatter_elements_with_duplicate_indices"});
-
-    // sce op is not supported
-    broken_tests_keyword_set.insert({"sce"});
-
-    // TensorRT EP CI uses Nvidia Tesla M60 which doesn't support fp16.
-    broken_tests_keyword_set.insert({"FLOAT16"});
-  }
-
-  if (provider_name == "dml") {
-    broken_tests.insert({"tinyyolov3", "The parameter is incorrect"});
-    broken_tests.insert({"PixelShuffle", "Test requires 6D Reshape, which isn't supported by DirectML"});
-    broken_tests.insert({"operator_permute2", "Test requires 6D Transpose, which isn't supported by DirectML"});
-    broken_tests.insert({"resize_downsample_linear",
-                         "ORT 0.4 uses asymmetric but will conform to half_pixel in the next ONNX version."});
-    broken_tests.insert(
-        {"resize_upsample_linear", "ORT 0.4 uses asymmetric but will conform to half_pixel in the next ONNX version."});
-    broken_tests.insert(
-        {"resize_upsample_linear", "ORT 0.4 uses asymmetric but will conform to half_pixel in the next ONNX version."});
-
-    // These tests are temporarily disabled pending investigation
-    broken_tests.insert({"dynamicquantizelinear_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"dynamicquantizelinear_max_adjusted_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"dynamicquantizelinear_min_adjusted_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"mxnet_arcface", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"yolov3", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"tf_inception_v2", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"fp16_inception_v1", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"candy", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"BERT_Squad", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"LSTM_Seq_lens_unpacked", "The parameter is incorrect"});
-
-    broken_tests.insert({"resize_downsample_scales_linear",
-                         "DML uses half_pixel and this test assumed \"asymmetric\" but does not include \"mode\""});
-    broken_tests.insert({"resize_downsample_sizes_linear_pytorch_half_pixel",
-                         "DML does not support downsampling by such a large factor - skips input pixels"});
-    broken_tests.insert({"resize_downsample_sizes_nearest",
-                         "DML uses pixel centers for nearest, rounding 1 value off for the middle column"});
-    broken_tests.insert({"resize_upsample_sizes_nearest",
-                         "DML uses pixel centers for nearest, which makes more sense (the 3rd row mismatches)"});
-    broken_tests.insert({"unsqueeze_three_axes", "DML does not support 6D tensors"});
-    broken_tests.insert({"unsqueeze_unsorted_axes", "DMLdoes not support 6D tensors"});
-
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index",
-                         "DML does not support 5D+ tensors"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded",
-         "DML does not support 5D+ tensors"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded",
-                         "DML does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight",
-                         "DML does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded",
-                         "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index",
-                         "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded",
-                         "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob",
-                         "DML does not support 5D+ tensors"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob_expanded",
-         "DML does not support 5D+ tensors"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight", "DML does not support 5D+ tensors"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "DML does not support 5D+ tensors"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob_expanded",
-                         "DML does not support 5D+ tensors"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded",
-                         "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob",
-                         "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob_expanded",
-                         "DML does not support 5D+ tensors"});
-  }
-
-#ifdef DISABLE_CONTRIB_OPS
-  broken_tests.insert({"coreml_SqueezeNet_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Permute_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_ReLU_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Padding-Upsampling-Normalizer_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"tiny_yolov2", "This model uses contrib ops."});
-  broken_tests.insert({"fp16_tiny_yolov2", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Pooling_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Padding_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Normalizer_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_linear_sklearn_load_breast_cancer", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_linear_ImageNet_small", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_linear_ImageNet_large", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_linear_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_leakyrelu_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_hard_sigmoid_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_elu_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Dense_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Conv2D_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"coreml_VGG16_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"coreml_Resnet50_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"coreml_Inceptionv3_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"coreml_FNS-Candy_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"coreml_AgeNet_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_thresholdedrelu_ImageNet_large", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_thresholdedrelu_ImageNet_small", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_thresholdedrelu_sklearn_load_breast_cancer", "This model uses contrib ops."});
-  broken_tests.insert({"thresholdedrelu", "This model uses contrib ops."});
-  broken_tests.insert({"thresholdedrelu_default", "This model uses contrib ops."});
-  broken_tests.insert({"dynamic_slice_default_axes", "This model uses contrib ops."});
-  broken_tests.insert({"thresholdedrelu_example", "This model uses contrib ops."});
-  broken_tests.insert({"dynamic_slice_neg failed", "This model uses contrib ops."});
-  broken_tests.insert({"dynamic_slice_start_out_of_bounds", "This model uses contrib ops."});
-  broken_tests.insert({"dynamic_slice", "This model uses contrib ops."});
-  broken_tests.insert({"dynamic_slice_end_out_of_bounds", "This model uses contrib ops."});
-  broken_tests.insert({"dynamic_slice_neg", "This model uses contrib ops."});
-  broken_tests.insert({"mvn", "This model uses contrib ops.", {"onnx130"}});
-  broken_tests.insert({"cdist_float32_euclidean_1000_2000_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float32_euclidean_1000_2000_500", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float32_euclidean_1_1_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float32_sqeuclidean_1000_2000_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float32_sqeuclidean_1000_2000_500", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float32_sqeuclidean_1_1_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float64_euclidean_1000_2000_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float64_euclidean_1000_2000_500", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float64_euclidean_1_1_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float64_sqeuclidean_1000_2000_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float64_sqeuclidean_1000_2000_500", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float64_sqeuclidean_1_1_1", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Average_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"bidaf", "This model uses contrib ops."});
-  broken_tests.insert({"fp16_test_tiny_yolov2", "This model uses contrib ops."});
-  broken_tests.insert({"fp16_coreml_FNS-Candy", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Repeat_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_BiDirectional_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"fp16_coreml_LinearRegression_NYCTaxi", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Average_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_GRU_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_SimpleRNN_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Dot_imageNet", "This model uses contrib ops."});
-#endif
 
+  auto broken_tests = GetBrokenTests(provider_name);
+  auto broken_tests_keyword_set = GetBrokenTestsKeyWordSet(provider_name);
   std::basic_string<ORTCHAR_T> model_dir;
   (void)GetDirNameFromFilePath(model_path, model_dir);
   std::basic_string<PATH_CHAR_TYPE> test_case_name = GetLastComponent(model_dir);
@@ -603,16 +109,16 @@ TEST_P(ModelTest, Run) {
     test_case_name = test_case_name.substr(5);
   {
     BrokenTest t = {ToUTF8String(test_case_name), ""};
-    auto iter = broken_tests.find(t);
+    auto iter = broken_tests->find(t);
     auto opset_version = model_info->GetNominalOpsetVersion();
-    if (iter != broken_tests.end() &&
+    if (iter != broken_tests->end() &&
         (opset_version == TestModelInfo::unknown_version || iter->broken_opset_versions_.empty() ||
          iter->broken_opset_versions_.find(opset_version) != iter->broken_opset_versions_.end())) {
       SkipTest("It's in broken_tests");
       return;
     }
 
-    for (auto iter2 = broken_tests_keyword_set.begin(); iter2 != broken_tests_keyword_set.end(); ++iter2) {
+    for (auto iter2 = broken_tests_keyword_set->begin(); iter2 != broken_tests_keyword_set->end(); ++iter2) {
       std::string keyword = *iter2;
       if (ToUTF8String(test_case_name).find(keyword) != std::string::npos) {
         SkipTest("It's in broken_tests_keyword");
@@ -690,11 +196,7 @@ TEST_P(ModelTest, Run) {
 #endif
       else if (provider_name == "tensorrt") {
         if (test_case_name.find(ORT_TSTR("FLOAT16")) != std::string::npos) {
-          OrtTensorRTProviderOptionsV2 params{0, 0, nullptr, 1000, 1, 1 << 30,
-                                              1,  // enable fp16
-                                              0, nullptr, 0, 0, 0, 0, 0, nullptr, 0, nullptr, 0, 0, 0, 0, 0, 0, 0, 0,
-                                              3, -1, nullptr, nullptr, nullptr, nullptr, nullptr, 0};
-
+          OrtTensorRTProviderOptionsV2 params;
           ortso.AppendExecutionProvider_TensorRT_V2(params);
         } else {
           OrtTensorRTProviderOptionsV2* ep_option = nullptr;
@@ -909,7 +411,7 @@ ::std::vector<::std::basic_string<ORTCHAR_T>> GetParameterStrings() {
   // If an EP doesn't have any CI build pipeline, then there is no need to specify any opset.
 #ifdef USE_TENSORRT
   // tensorrt: only enable opset 14 to 17 of onnx tests
-  provider_names[provider_name_tensorrt] = {opset14, opset15, opset16, opset17};
+  provider_names[provider_name_tensorrt] = {opset12, opset14, opset15, opset16, opset17};
 #endif
 #ifdef USE_MIGRAPHX
   provider_names[provider_name_migraphx] = {opset7, opset8, opset9, opset10, opset11, opset12, opset13, opset14, opset15, opset16, opset17, opset18};
@@ -942,6 +444,13 @@ ::std::vector<::std::basic_string<ORTCHAR_T>> GetParameterStrings() {
 #ifdef USE_DML
   provider_names[provider_name_dml] = {opset7, opset8, opset9, opset10, opset11, opset12, opset13, opset14, opset15, opset16, opset17, opset18};
 #endif
+
+#if defined(ENABLE_TRAINING_CORE) && defined(USE_CUDA)
+  // Removing the CPU EP tests from CUDA build for training as these tests are already run in the CPU pipelines.
+  // Note: These are inference tests, we run these in training builds as an extra check. Therefore reducing
+  // the number of times these are run to reduce the CI time.
+  provider_names.erase(provider_name_cpu);
+#endif
   std::vector<std::basic_string<ORTCHAR_T>> v;
   // Permanently exclude following tests because ORT support only opset starting from 7,
   // Please make no more changes to the list
@@ -1084,37 +593,10 @@ ::std::vector<::std::basic_string<ORTCHAR_T>> GetParameterStrings() {
                                                    ORT_TSTR("mul_uint8"),
                                                    ORT_TSTR("div_uint8")};
   static const ORTCHAR_T* tensorrt_disabled_tests[] = {
-      ORT_TSTR("udnie"),
-      ORT_TSTR("rain_princess"),
-      ORT_TSTR("pointilism"),
-      ORT_TSTR("mosaic"),
-      ORT_TSTR("LSTM_Seq_lens_unpacked"),
-      ORT_TSTR("cgan"),
-      ORT_TSTR("candy"),
-      ORT_TSTR("tinyyolov3"),
-      ORT_TSTR("yolov3"),
-      ORT_TSTR("mlperf_ssd_resnet34_1200"),
-      ORT_TSTR("mlperf_ssd_mobilenet_300"),
-      ORT_TSTR("mask_rcnn"),
-      ORT_TSTR("faster_rcnn"),
-      ORT_TSTR("fp16_shufflenet"),
-      ORT_TSTR("fp16_inception_v1"),
-      ORT_TSTR("fp16_tiny_yolov2"),
-      ORT_TSTR("tf_inception_v3"),
-      ORT_TSTR("tf_mobilenet_v1_1.0_224"),
-      ORT_TSTR("tf_mobilenet_v2_1.0_224"),
-      ORT_TSTR("tf_mobilenet_v2_1.4_224"),
-      ORT_TSTR("tf_resnet_v1_101"),
-      ORT_TSTR("tf_resnet_v1_152"),
-      ORT_TSTR("tf_resnet_v1_50"),
-      ORT_TSTR("tf_resnet_v2_101"),
-      ORT_TSTR("tf_resnet_v2_152"),
-      ORT_TSTR("tf_resnet_v2_50"),
-      ORT_TSTR("convtranspose_1d"),
-      ORT_TSTR("convtranspose_3d"),
-      ORT_TSTR("conv_with_strides_and_asymmetric_padding"),
-      ORT_TSTR("conv_with_strides_padding"),
-      ORT_TSTR("size")  // INVALID_ARGUMENT: Cannot find binding of given name: x
+      ORT_TSTR("YOLOv3-12"),           // needs to run symbolic shape inference shape first
+      ORT_TSTR("SSD-MobilenetV1-12"),  // symbolic shape inference shape error
+      ORT_TSTR("SSD"),                 // needs to run symbolic shape inference shape first
+      ORT_TSTR("size")                 // INVALID_ARGUMENT: Cannot find binding of given name: x
   };
   std::vector<std::basic_string<ORTCHAR_T>> paths;
 
@@ -1133,11 +615,15 @@ ::std::vector<::std::basic_string<ORTCHAR_T>> GetParameterStrings() {
 #if defined(NDEBUG) || defined(RUN_MODELTEST_IN_DEBUG_MODE)
 #ifdef _WIN32
     ORT_STRING_VIEW model_test_root_path = ORT_TSTR("..\\models");
+    // thus, only the root path should be mounted.
+    ORT_STRING_VIEW model_zoo_path = ORT_TSTR("..\\models\\zoo");
 #else
     ORT_STRING_VIEW model_test_root_path = ORT_TSTR("../models");
+    ORT_STRING_VIEW model_zoo_path = ORT_TSTR("../models/zoo");
 #endif
     for (auto p : kvp.second) {
       paths.push_back(ConcatPathComponent(model_test_root_path, p));
+      paths.push_back(ConcatPathComponent(model_zoo_path, p));
     }
 #endif
 
@@ -1168,6 +654,12 @@ ::std::vector<::std::basic_string<ORTCHAR_T>> GetParameterStrings() {
                                                     ORT_TSTR("bvlc_alexnet"),
                                                     ORT_TSTR("bvlc_reference_caffenet"),
                                                     ORT_TSTR("coreml_VGG16_ImageNet"),
+                                                    ORT_TSTR("VGG 16-fp32"),
+                                                    ORT_TSTR("VGG 19-caffe2"),
+                                                    ORT_TSTR("VGG 19-bn"),
+                                                    ORT_TSTR("VGG 16-bn"),
+                                                    ORT_TSTR("VGG 19"),
+                                                    ORT_TSTR("VGG 16"),
                                                     ORT_TSTR("faster_rcnn"),
                                                     ORT_TSTR("GPT2"),
                                                     ORT_TSTR("GPT2_LM_HEAD"),
diff --git a/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc b/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc
index c8343483b80a6..cb5fc8095982c 100644
--- a/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc
+++ b/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc
@@ -109,7 +109,7 @@ TEST(ConvFp16Test, Conv1D_Invalid_Input_Shape) {
   TestConvFp16Op(attrs, {X, dummy_vals}, {X_shape, dummy_shape}, dummy_vals, dummy_shape, false,
                  OpTester::ExpectResult::kExpectFailure,
                  "Node:node1 Output:Y [ShapeInferenceError] Can't merge shape info. "
-                 "Both source and target dimension have values but they differ. Source=0 Target=2 Dimension=2",
+                 "Both inferred and declared dimension have values but they differ. Inferred=0 Declared=2 Dimension=2",
                  -1);  // use latest opset for shape inferencing errors
 }
 
@@ -132,7 +132,7 @@ TEST(ConvFp16Test, Conv2D_Invalid_Input_Shape) {
   TestConvFp16Op(attrs, {X, dummy_vals}, {X_shape, dummy_shape}, dummy_vals, dummy_shape, false,
                  OpTester::ExpectResult::kExpectFailure,
                  "Node:node1 Output:Y [ShapeInferenceError] Can't merge shape info. "
-                 "Both source and target dimension have values but they differ. Source=1 Target=2 Dimension=0",
+                 "Both inferred and declared dimension have values but they differ. Inferred=1 Declared=2 Dimension=0",
                  -1);  // use latest opset for shape inferencing errors
 }
 
diff --git a/onnxruntime/test/providers/cpu/nn/conv_op_test.cc b/onnxruntime/test/providers/cpu/nn/conv_op_test.cc
index e01fd8c78e55f..5103aed50b152 100644
--- a/onnxruntime/test/providers/cpu/nn/conv_op_test.cc
+++ b/onnxruntime/test/providers/cpu/nn/conv_op_test.cc
@@ -249,7 +249,7 @@ TEST(ConvTest, Conv1D_Invalid_Input_Shape) {
   TestConvOp(attrs, {X, dummy_vals}, {X_shape, dummy_shape}, dummy_vals, dummy_shape, false,
              OpTester::ExpectResult::kExpectFailure,
              "Node:node1 Output:Y [ShapeInferenceError] Can't merge shape info. "
-             "Both source and target dimension have values but they differ. Source=0 Target=2 Dimension=2",
+             "Both inferred and declared dimension have values but they differ. Inferred=0 Declared=2 Dimension=2",
              -1);  // use latest opset for shape inferencing errors
 }
 
@@ -272,7 +272,7 @@ TEST(ConvTest, Conv2D_Invalid_Input_Shape) {
   TestConvOp(attrs, {X, dummy_vals}, {X_shape, dummy_shape}, dummy_vals, dummy_shape, false,
              OpTester::ExpectResult::kExpectFailure,
              "Node:node1 Output:Y [ShapeInferenceError] Can't merge shape info. "
-             "Both source and target dimension have values but they differ. Source=1 Target=2 Dimension=0",
+             "Both inferred and declared dimension have values but they differ. Inferred=1 Declared=2 Dimension=0",
              -1);  // use latest opset for shape inferencing errors
 }
 
diff --git a/onnxruntime/test/providers/cpu/nn/lp_norm_op_test.cc b/onnxruntime/test/providers/cpu/nn/lp_norm_op_test.cc
index e37206d6aebf2..b7cead66bd7fb 100644
--- a/onnxruntime/test/providers/cpu/nn/lp_norm_op_test.cc
+++ b/onnxruntime/test/providers/cpu/nn/lp_norm_op_test.cc
@@ -143,7 +143,7 @@ void L1NormalizationWithZeroNorm() {
 
   vector<T> expected_output = {0.5f, 0.5f, 0.f, 0.f};
   test.AddOutput<T>("Y", input_dims, expected_output);
-  test.Run();
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
 }
 
 TEST(LpNormalizationTest, L1NormalizationWithZeroNorm) {
@@ -163,7 +163,7 @@ void L2NormalizationWithZeroNorm() {
 
   vector<T> expected_output = {1.f, 0.f, 0.f, 0.f};
   test.AddOutput<T>("Y", input_dims, expected_output);
-  test.Run();
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
 }
 
 TEST(LpNormalizationTest, L2NormalizationWithZeroNorm) {
diff --git a/onnxruntime/test/providers/cpu/nn/tfidfvectorizer_test.cc b/onnxruntime/test/providers/cpu/nn/tfidfvectorizer_test.cc
index a22253dbb74d4..379b892f39135 100644
--- a/onnxruntime/test/providers/cpu/nn/tfidfvectorizer_test.cc
+++ b/onnxruntime/test/providers/cpu/nn/tfidfvectorizer_test.cc
@@ -91,7 +91,7 @@ TEST(TfIdfVectorizerTest, Int32_TF_onlyBigrams_Skip0_Empty_Dim1Fail) {
 
   test.Run(OpTester::ExpectResult::kExpectFailure,
            "Can't merge shape info. "
-           "Both source and target dimension have values but they differ. Source=7 Target=0 Dimension=0");
+           "Both inferred and declared dimension have values but they differ. Inferred=7 Declared=0 Dimension=0");
 }
 
 TEST(TfIdfVectorizerTest, Int32_TF_onlyBigrams_Skip0_Empty_Dim1Success) {
@@ -136,7 +136,7 @@ TEST(TfIdfVectorizerTest, Int32_TF_onlyBigrams_Skip0_Empty_Dim2) {
   test.AddOutput<float>("Y", out_dims, output);
 
   test.Run(OpTester::ExpectResult::kExpectFailure,
-           "Mismatch between number of source and target dimensions. Source=2 Target=1");
+           "Mismatch between number of inferred and declared dimensions. inferred=2 declared=1");
 }
 
 TEST(TfIdfVectorizerTest, Int32_TF_onlyBigrams_Skip01_Empty_Dim2) {
@@ -159,7 +159,7 @@ TEST(TfIdfVectorizerTest, Int32_TF_onlyBigrams_Skip01_Empty_Dim2) {
   test.AddOutput<float>("Y", out_dims, output);
 
   test.Run(OpTester::ExpectResult::kExpectFailure,
-           "Mismatch between number of source and target dimensions. Source=2 Target=1");
+           "Mismatch between number of inferred and declared dimensions. inferred=2 declared=1");
 }
 
 TEST(TfIdfVectorizerTest, Int32_TF_onlyBigrams_Skip0_Empty_Dim2N) {
diff --git a/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc b/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc
index c9b851e450f9d..79da8004a9edd 100644
--- a/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc
+++ b/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc
@@ -1086,7 +1086,7 @@ TEST(ReductionOpTest, ReduceMax_int32) {
 #if defined(OPENVINO_CONFIG_GPU_FP32) || defined(OPENVINO_CONFIG_GPU_FP16)
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider});  // OpenVINO: Disabled temporarily
 #else
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});                          // TensorRT: axis must be 0
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  // TensorRT: axis must be 0
 #endif
 }
 
@@ -1107,7 +1107,7 @@ TEST(ReductionOpTest, ReduceMax_int64) {
 #if defined(OPENVINO_CONFIG_GPU_FP32) || defined(OPENVINO_CONFIG_GPU_FP16)
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider});  // OpenVINO: Disabled temporarily
 #else
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});                          // TensorRT: axis must be 0
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  // TensorRT: axis must be 0
 #endif
 }
 
diff --git a/onnxruntime/test/providers/cpu/rnn/rnn_op_test.cc b/onnxruntime/test/providers/cpu/rnn/rnn_op_test.cc
index d1a523b1eecf9..b9875b9553a55 100644
--- a/onnxruntime/test/providers/cpu/rnn/rnn_op_test.cc
+++ b/onnxruntime/test/providers/cpu/rnn/rnn_op_test.cc
@@ -762,7 +762,7 @@ TEST(RNNTest, RNN_invalid_sequence_lens) {
     test.AddOutput<float>("Y_h", Y_h_dims, Y_h_data);
 
     // the CUDA RNN version allows the invalid sequence lengths, so disable testing on CUDA and TensorRT
-    test.Run(OpTester::ExpectResult::kExpectFailure, error_msg, {kCudaExecutionProvider, kTensorrtExecutionProvider});
+    test.Run(OpTester::ExpectResult::kExpectFailure, error_msg, {kCudaExecutionProvider, kTensorrtExecutionProvider, kOpenVINOExecutionProvider});
   };
 
   // should batch batch_size to be valid
@@ -860,7 +860,7 @@ TEST(RNNTest, RNN_bidirectional_with_sequence_lens) {
 
   test.AddOutput<float>("Y_h", Y_h_dims, Y_h_data);
 
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kTensorrtExecutionProvider});
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kTensorrtExecutionProvider, kOpenVINOExecutionProvider});
 }
 
 TEST(RNNTest, RNN_with_invalid_activation_load_failure) {
diff --git a/onnxruntime/test/providers/cpu/sequence/sequence_ops_test.cc b/onnxruntime/test/providers/cpu/sequence/sequence_ops_test.cc
index d29aac81150c5..60e75811e4333 100644
--- a/onnxruntime/test/providers/cpu/sequence/sequence_ops_test.cc
+++ b/onnxruntime/test/providers/cpu/sequence/sequence_ops_test.cc
@@ -330,15 +330,26 @@ TEST(SequenceOpsTest, SequenceConstructPositive) {
 
 // SplitToSequence
 template <typename T>
-static std::vector<T> GetConsequtiveVector(T start, int num) {
+static std::vector<T> GetConsecutiveVector(T start, size_t num) {
   std::vector<T> inputv(num);
   std::iota(inputv.begin(), inputv.end(), start);
   return inputv;
 }
 
+template <>
+std::vector<MLFloat16> GetConsecutiveVector<MLFloat16>(MLFloat16 start, size_t num) {
+  std::vector<MLFloat16> inputv;
+  inputv.reserve(num);
+  float start_f = start.ToFloat();
+  for (size_t i = 0; i < num; ++i) {
+    inputv.push_back(MLFloat16{start_f + static_cast<float>(i)});
+  }
+  return inputv;
+}
+
 TEST(SequenceOpsTest, SplitToSequence_DefaultAxis0EqualSplitFloat) {
   OpTester test("SplitToSequence", 11);
-  test.AddInput<float>("input", {4, 2}, GetConsequtiveVector<float>(1.f, 8));
+  test.AddInput<float>("input", {4, 2}, GetConsecutiveVector<float>(1.f, 8));
   test.AddInput<int64_t>("split", {1, 2}, {2, 2});
   SeqTensors<float> output;
   output.AddTensor({2, 2}, {1.f, 2.f, 3.f, 4.f});
@@ -347,9 +358,31 @@ TEST(SequenceOpsTest, SplitToSequence_DefaultAxis0EqualSplitFloat) {
   test.Run();
 }
 
+TEST(SequenceOpsTest, SplitToSequence_DefaultAxis0EqualSplitMLFloat16) {
+  OpTester test("SplitToSequence", 11);
+  test.AddInput<MLFloat16>("input", {4, 2}, GetConsecutiveVector<MLFloat16>(MLFloat16::One, 8));
+  test.AddInput<int64_t>("split", {1, 2}, {2, 2});
+  SeqTensors<MLFloat16> output;
+
+  std::vector<MLFloat16> tensor_1;
+  const auto data_1 = {1.f, 2.f, 3.f, 4.f};
+  for (auto f : data_1)
+    tensor_1.push_back(MLFloat16{f});
+
+  std::vector<MLFloat16> tensor_2;
+  const auto data_2 = {5.f, 6.f, 7.f, 8.f};
+  for (auto f : data_2)
+    tensor_2.push_back(MLFloat16{f});
+
+  output.AddTensor({2, 2}, tensor_1);
+  output.AddTensor({2, 2}, tensor_2);
+  test.AddSeqOutput("S2", output);
+  test.Run();
+}
+
 TEST(SequenceOpsTest, SplitToSequence_DefaultAxis0EqualSplitLong) {
   OpTester test("SplitToSequence", 11);
-  test.AddInput<int64_t>("input", {4, 2}, GetConsequtiveVector<int64_t>(1, 8));
+  test.AddInput<int64_t>("input", {4, 2}, GetConsecutiveVector<int64_t>(1, 8));
   test.AddInput<int64_t>("split", {1, 2}, {2, 2});
   SeqTensors<int64_t> output;
   output.AddTensor({2, 2}, {1, 2, 3, 4});
@@ -360,7 +393,7 @@ TEST(SequenceOpsTest, SplitToSequence_DefaultAxis0EqualSplitLong) {
 
 TEST(SequenceOpsTest, SplitToSequence_DefaultAxis0EqualSplitFloatScalarSplit) {
   OpTester test("SplitToSequence", 11);
-  test.AddInput<float>("input", {4, 2}, GetConsequtiveVector<float>(1.f, 8));
+  test.AddInput<float>("input", {4, 2}, GetConsecutiveVector<float>(1.f, 8));
   test.AddInput<int64_t>("split", {}, {2});
   SeqTensors<float> output;
   output.AddTensor({2, 2}, {1.f, 2.f, 3.f, 4.f});
@@ -371,7 +404,7 @@ TEST(SequenceOpsTest, SplitToSequence_DefaultAxis0EqualSplitFloatScalarSplit) {
 
 TEST(SequenceOpsTest, SplitToSequence_Axis0DefaultSplitFloatSetAxisExplicitly) {
   OpTester test("SplitToSequence", 11);
-  test.AddInput<float>("input", {4, 2}, GetConsequtiveVector<float>(1.f, 8));
+  test.AddInput<float>("input", {4, 2}, GetConsecutiveVector<float>(1.f, 8));
   int64_t axis = 0;
   test.AddAttribute("axis", axis);
   SeqTensors<float> output;
@@ -385,7 +418,7 @@ TEST(SequenceOpsTest, SplitToSequence_Axis0DefaultSplitFloatSetAxisExplicitly) {
 
 TEST(SequenceOpsTest, SplitToSequence_PositiveAxisScalarSplit) {
   OpTester test("SplitToSequence", 11);
-  test.AddInput<float>("input", {2, 2, 6}, GetConsequtiveVector<float>(1.f, 2 * 2 * 6));
+  test.AddInput<float>("input", {2, 2, 6}, GetConsecutiveVector<float>(1.f, 2 * 2 * 6));
   int64_t axis = 2;
   test.AddAttribute("axis", axis);
   test.AddInput<int64_t>("split", {}, {2});
@@ -411,11 +444,11 @@ TEST(SequenceOpsTest, SplitToSequence_PositiveAxisScalarSplit) {
 
 TEST(SequenceOpsTest, SplitToSequence_DefaultAxis0UnevenSplitFloat) {
   OpTester test("SplitToSequence", 11);
-  test.AddInput<float>("input", {5, 2}, GetConsequtiveVector<float>(1.f, 10));
+  test.AddInput<float>("input", {5, 2}, GetConsecutiveVector<float>(1.f, 10));
   test.AddInput<int64_t>("split", {}, {2});
   SeqTensors<float> output;
-  output.AddTensor({2, 2}, GetConsequtiveVector<float>(1.f, 4));
-  output.AddTensor({2, 2}, GetConsequtiveVector<float>(5.f, 4));
+  output.AddTensor({2, 2}, GetConsecutiveVector<float>(1.f, 4));
+  output.AddTensor({2, 2}, GetConsecutiveVector<float>(5.f, 4));
   output.AddTensor({1, 2}, {9.f, 10.f});
   test.AddSeqOutput("S2", output);
   test.Run();
@@ -423,22 +456,22 @@ TEST(SequenceOpsTest, SplitToSequence_DefaultAxis0UnevenSplitFloat) {
 
 TEST(SequenceOpsTest, SplitToSequence_DefaultAxis0UnevenSplitFloat2) {
   OpTester test("SplitToSequence", 11);
-  test.AddInput<float>("input", {17, 2}, GetConsequtiveVector<float>(1.f, 34));
+  test.AddInput<float>("input", {17, 2}, GetConsecutiveVector<float>(1.f, 34));
   test.AddInput<int64_t>("split", {}, {3});
   SeqTensors<float> output;
-  output.AddTensor({3, 2}, GetConsequtiveVector<float>(1.f, 6));
-  output.AddTensor({3, 2}, GetConsequtiveVector<float>(7.f, 6));
-  output.AddTensor({3, 2}, GetConsequtiveVector<float>(13.f, 6));
-  output.AddTensor({3, 2}, GetConsequtiveVector<float>(19.f, 6));
-  output.AddTensor({3, 2}, GetConsequtiveVector<float>(25.f, 6));
-  output.AddTensor({2, 2}, GetConsequtiveVector<float>(31.f, 4));
+  output.AddTensor({3, 2}, GetConsecutiveVector<float>(1.f, 6));
+  output.AddTensor({3, 2}, GetConsecutiveVector<float>(7.f, 6));
+  output.AddTensor({3, 2}, GetConsecutiveVector<float>(13.f, 6));
+  output.AddTensor({3, 2}, GetConsecutiveVector<float>(19.f, 6));
+  output.AddTensor({3, 2}, GetConsecutiveVector<float>(25.f, 6));
+  output.AddTensor({2, 2}, GetConsecutiveVector<float>(31.f, 4));
   test.AddSeqOutput("S2", output);
   test.Run();
 }
 
 TEST(SequenceOpsTest, SplitToSequence_PositiveAxisUnevenSplit) {
   OpTester test("SplitToSequence", 11);
-  test.AddInput<float>("input", {2, 5}, GetConsequtiveVector<float>(1.f, 10));
+  test.AddInput<float>("input", {2, 5}, GetConsecutiveVector<float>(1.f, 10));
   test.AddInput<int64_t>("split", {}, {2});
   int64_t axis = 1;
   test.AddAttribute("axis", axis);
@@ -452,33 +485,33 @@ TEST(SequenceOpsTest, SplitToSequence_PositiveAxisUnevenSplit) {
 
 TEST(SequenceOpsTest, SplitToSequence_Axis0DefaultSplitFloatSetAxisExplicitlyDontKeepDims3Dim) {
   OpTester test("SplitToSequence", 11);
-  test.AddInput<float>("input", {2, 3, 4}, GetConsequtiveVector<float>(1.f, 2 * 3 * 4));
+  test.AddInput<float>("input", {2, 3, 4}, GetConsecutiveVector<float>(1.f, 2 * 3 * 4));
   test.AddAttribute<int64_t>("keepdims", 0);
   int64_t axis = 0;
   test.AddAttribute("axis", axis);
   SeqTensors<float> output;
-  output.AddTensor({3, 4}, GetConsequtiveVector<float>(1.f, 12));
-  output.AddTensor({3, 4}, GetConsequtiveVector<float>(13.f, 12));
+  output.AddTensor({3, 4}, GetConsecutiveVector<float>(1.f, 12));
+  output.AddTensor({3, 4}, GetConsecutiveVector<float>(13.f, 12));
   test.AddSeqOutput("S2", output);
   test.Run();
 }
 
 TEST(SequenceOpsTest, SplitToSequence_Axis0DefaultSplitFloatSetAxisExplicitlyDontKeepDims2Dim) {
   OpTester test("SplitToSequence", 11);
-  test.AddInput<float>("input", {2, 3}, GetConsequtiveVector<float>(1.f, 2 * 3));
+  test.AddInput<float>("input", {2, 3}, GetConsecutiveVector<float>(1.f, 2 * 3));
   test.AddAttribute<int64_t>("keepdims", 0);
   int64_t axis = 0;
   test.AddAttribute("axis", axis);
   SeqTensors<float> output;
-  output.AddTensor({3}, GetConsequtiveVector<float>(1.f, 3));
-  output.AddTensor({3}, GetConsequtiveVector<float>(4.f, 3));
+  output.AddTensor({3}, GetConsecutiveVector<float>(1.f, 3));
+  output.AddTensor({3}, GetConsecutiveVector<float>(4.f, 3));
   test.AddSeqOutput("S2", output);
   test.Run();
 }
 
 TEST(SequenceOpsTest, SplitToSequence_PositiveAxisDontKeepDims) {
   OpTester test("SplitToSequence", 11);
-  test.AddInput<float>("input", {2, 3, 4}, GetConsequtiveVector<float>(1.f, 2 * 3 * 4));
+  test.AddInput<float>("input", {2, 3, 4}, GetConsecutiveVector<float>(1.f, 2 * 3 * 4));
   test.AddAttribute<int64_t>("keepdims", 0);
   int64_t axis = 2;
   test.AddAttribute("axis", axis);
diff --git a/onnxruntime/test/providers/cpu/tensor/affine_grid_test.cc b/onnxruntime/test/providers/cpu/tensor/affine_grid_test.cc
new file mode 100644
index 0000000000000..e37e784f28930
--- /dev/null
+++ b/onnxruntime/test/providers/cpu/tensor/affine_grid_test.cc
@@ -0,0 +1,165 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/util/math.h"
+#include "gtest/gtest.h"
+#include "test/providers/provider_test_utils.h"
+
+namespace onnxruntime {
+namespace test {
+TEST(AffineGridTest, 2d) {
+  OpTester test("AffineGrid", 20);
+  test.AddInput<float>("theta", {1, 2, 3}, {1.0f, 0.0, 0.0f, 0.0f, 1.0, 0.0f});
+  test.AddInput<int64_t>("size", {4}, {1, 1, 2, 3});
+  test.AddOutput<float>("grid", {1, 2, 3, 2},
+                        {-0.6667f, -0.5000f, 0.0000f, -0.5000f, 0.6667f, -0.5000f, -0.6667f, 0.5000f, 0.0000f, 0.5000f, 0.6667f, 0.5000f});
+  test.Run();
+}
+
+// following tests code is generated with:
+// python onnxruntime/test/providers/cpu/tensor/affine_grid_test_gen.py
+TEST(AffineGridTest, test_2d_0) {
+  OpTester test("AffineGrid", 20);
+  test.AddAttribute("align_corners", (int64_t)0);
+  test.AddInput<float>("theta", {1, 2, 3}, {1.477212f, -0.173648f, 0.300000f, 0.173648f, 0.492404f, -0.500000f});
+  test.AddInput<int64_t>("size", {4}, {1, 1, 3, 2});
+  test.AddOutput<float>("grid", {1, 3, 2, 2}, {-0.3228f, -0.9151f, 1.1544f, -0.7414f, -0.4386f, -0.5868f, 1.0386f, -0.4132f, -0.5544f, -0.2586f, 0.9228f, -0.0849f});
+  test.Run();
+}
+
+TEST(AffineGridTest, test_2d_1) {
+  OpTester test("AffineGrid", 20);
+  test.AddAttribute("align_corners", (int64_t)0);
+  test.AddInput<float>("theta", {2, 2, 3}, {1.477212f, -0.173648f, 0.300000f, 0.173648f, 0.492404f, -0.500000f, 1.477212f, -0.173648f, 0.300000f, 0.173648f, 0.492404f, -0.500000f});
+  test.AddInput<int64_t>("size", {4}, {2, 10, 2, 3});
+  test.AddOutput<float>("grid", {2, 2, 3, 2}, {-0.5980f, -0.8620f, 0.3868f, -0.7462f, 1.3716f, -0.6304f, -0.7716f, -0.3696f, 0.2132f, -0.2538f, 1.1980f, -0.1380f, -0.5980f, -0.8620f, 0.3868f, -0.7462f, 1.3716f, -0.6304f, -0.7716f, -0.3696f, 0.2132f, -0.2538f, 1.1980f, -0.1380f});
+  test.Run();
+}
+
+TEST(AffineGridTest, test_2d_2) {
+  OpTester test("AffineGrid", 20);
+  test.AddAttribute("align_corners", (int64_t)0);
+  test.AddInput<float>("theta", {1, 2, 3}, {1.500000f, -0.866025f, -0.500000f, 0.866025f, 2.750000f, -0.500000f});
+  test.AddInput<int64_t>("size", {4}, {1, 1, 3, 2});
+  test.AddOutput<float>("grid", {1, 3, 2, 2}, {-0.6726f, -2.7663f, 0.8274f, -1.9003f, -1.2500f, -0.9330f, 0.2500f, -0.0670f, -1.8274f, 0.9003f, -0.3274f, 1.7663f});
+  test.Run();
+}
+
+TEST(AffineGridTest, test_2d_3) {
+  OpTester test("AffineGrid", 20);
+  test.AddAttribute("align_corners", (int64_t)0);
+  test.AddInput<float>("theta", {2, 2, 3}, {1.500000f, -0.866025f, -0.500000f, 0.866025f, 2.750000f, -0.500000f, 1.500000f, -0.866025f, -0.500000f, 0.866025f, 2.750000f, -0.500000f});
+  test.AddInput<int64_t>("size", {4}, {2, 10, 2, 3});
+  test.AddOutput<float>("grid", {2, 2, 3, 2}, {-1.0670f, -2.4524f, -0.0670f, -1.8750f, 0.9330f, -1.2976f, -1.9330f, 0.2976f, -0.9330f, 0.8750f, 0.0670f, 1.4524f, -1.0670f, -2.4524f, -0.0670f, -1.8750f, 0.9330f, -1.2976f, -1.9330f, 0.2976f, -0.9330f, 0.8750f, 0.0670f, 1.4524f});
+  test.Run();
+}
+
+TEST(AffineGridTest, test_2d_4) {
+  OpTester test("AffineGrid", 20);
+  test.AddAttribute("align_corners", (int64_t)1);
+  test.AddInput<float>("theta", {1, 2, 3}, {1.477212f, -0.173648f, 0.300000f, 0.173648f, 0.492404f, -0.500000f});
+  test.AddInput<int64_t>("size", {4}, {1, 1, 3, 2});
+  test.AddOutput<float>("grid", {1, 3, 2, 2}, {-1.0036f, -1.1661f, 1.9509f, -0.8188f, -1.1772f, -0.6736f, 1.7772f, -0.3264f, -1.3509f, -0.1812f, 1.6036f, 0.1661f});
+  test.Run();
+}
+
+TEST(AffineGridTest, test_2d_5) {
+  OpTester test("AffineGrid", 20);
+  test.AddAttribute("align_corners", (int64_t)1);
+  test.AddInput<float>("theta", {2, 2, 3}, {1.477212f, -0.173648f, 0.300000f, 0.173648f, 0.492404f, -0.500000f, 1.477212f, -0.173648f, 0.300000f, 0.173648f, 0.492404f, -0.500000f});
+  test.AddInput<int64_t>("size", {4}, {2, 10, 2, 3});
+  test.AddOutput<float>("grid", {2, 2, 3, 2}, {-1.0036f, -1.1661f, 0.4736f, -0.9924f, 1.9509f, -0.8188f, -1.3509f, -0.1812f, 0.1264f, -0.0076f, 1.6036f, 0.1661f, -1.0036f, -1.1661f, 0.4736f, -0.9924f, 1.9509f, -0.8188f, -1.3509f, -0.1812f, 0.1264f, -0.0076f, 1.6036f, 0.1661f});
+  test.Run();
+}
+
+TEST(AffineGridTest, test_2d_6) {
+  OpTester test("AffineGrid", 20);
+  test.AddAttribute("align_corners", (int64_t)1);
+  test.AddInput<float>("theta", {1, 2, 3}, {1.500000f, -0.866025f, -0.500000f, 0.866025f, 2.750000f, -0.500000f});
+  test.AddInput<int64_t>("size", {4}, {1, 1, 3, 2});
+  test.AddOutput<float>("grid", {1, 3, 2, 2}, {-1.1340f, -4.1160f, 1.8660f, -2.3840f, -2.0000f, -1.3660f, 1.0000f, 0.3660f, -2.8660f, 1.3840f, 0.1340f, 3.1160f});
+  test.Run();
+}
+
+TEST(AffineGridTest, test_2d_7) {
+  OpTester test("AffineGrid", 20);
+  test.AddAttribute("align_corners", (int64_t)1);
+  test.AddInput<float>("theta", {2, 2, 3}, {1.500000f, -0.866025f, -0.500000f, 0.866025f, 2.750000f, -0.500000f, 1.500000f, -0.866025f, -0.500000f, 0.866025f, 2.750000f, -0.500000f});
+  test.AddInput<int64_t>("size", {4}, {2, 10, 2, 3});
+  test.AddOutput<float>("grid", {2, 2, 3, 2}, {-1.1340f, -4.1160f, 0.3660f, -3.2500f, 1.8660f, -2.3840f, -2.8660f, 1.3840f, -1.3660f, 2.2500f, 0.1340f, 3.1160f, -1.1340f, -4.1160f, 0.3660f, -3.2500f, 1.8660f, -2.3840f, -2.8660f, 1.3840f, -1.3660f, 2.2500f, 0.1340f, 3.1160f});
+  test.Run();
+}
+
+TEST(AffineGridTest, test_3d_0) {
+  OpTester test("AffineGrid", 20);
+  test.AddAttribute("align_corners", (int64_t)0);
+  test.AddInput<float>("theta", {1, 3, 4}, {1.409539f, 0.000000f, 0.513030f, 0.300000f, 0.118782f, 1.969615f, -0.326352f, -0.500000f, -0.168412f, 0.086824f, 0.462708f, 1.800000f});
+  test.AddInput<int64_t>("size", {5}, {1, 1, 3, 2, 2});
+  test.AddOutput<float>("grid", {1, 3, 2, 2, 3}, {-0.7468f, -1.3266f, 1.5323f, 0.6627f, -1.2078f, 1.3639f, -0.7468f, 0.6430f, 1.6191f, 0.6627f, 0.7618f, 1.4507f, -0.4048f, -1.5442f, 1.8408f, 1.0048f, -1.4254f, 1.6724f, -0.4048f, 0.4254f, 1.9276f, 1.0048f, 0.5442f, 1.7592f, -0.0627f, -1.7618f, 2.1493f, 1.3468f, -1.6430f, 1.9809f, -0.0627f, 0.2078f, 2.2361f, 1.3468f, 0.3266f, 2.0677f});
+  test.Run();
+}
+
+TEST(AffineGridTest, test_3d_1) {
+  OpTester test("AffineGrid", 20);
+  test.AddAttribute("align_corners", (int64_t)0);
+  test.AddInput<float>("theta", {2, 3, 4}, {1.409539f, 0.000000f, 0.513030f, 0.300000f, 0.118782f, 1.969615f, -0.326352f, -0.500000f, -0.168412f, 0.086824f, 0.462708f, 1.800000f, 1.409539f, 0.000000f, 0.513030f, 0.300000f, 0.118782f, 1.969615f, -0.326352f, -0.500000f, -0.168412f, 0.086824f, 0.462708f, 1.800000f});
+  test.AddInput<int64_t>("size", {5}, {2, 10, 2, 2, 3});
+  test.AddOutput<float>("grid", {2, 2, 2, 3, 3}, {-0.8962f, -1.4008f, 1.6375f, 0.0435f, -1.3216f, 1.5252f, 0.9832f, -1.2424f, 1.4130f, -0.8962f, 0.5688f, 1.7243f, 0.0435f, 0.6480f, 1.6121f, 0.9832f, 0.7272f, 1.4998f, -0.3832f, -1.7272f, 2.1002f, 0.5565f, -1.6480f, 1.9879f, 1.4962f, -1.5688f, 1.8757f, -0.3832f, 0.2424f, 2.1870f, 0.5565f, 0.3216f, 2.0748f, 1.4962f, 0.4008f, 1.9625f, -0.8962f, -1.4008f, 1.6375f, 0.0435f, -1.3216f, 1.5252f, 0.9832f, -1.2424f, 1.4130f, -0.8962f, 0.5688f, 1.7243f, 0.0435f, 0.6480f, 1.6121f, 0.9832f, 0.7272f, 1.4998f, -0.3832f, -1.7272f, 2.1002f, 0.5565f, -1.6480f, 1.9879f, 1.4962f, -1.5688f, 1.8757f, -0.3832f, 0.2424f, 2.1870f, 0.5565f, 0.3216f, 2.0748f, 1.4962f, 0.4008f, 1.9625f});
+  test.Run();
+}
+
+TEST(AffineGridTest, test_3d_2) {
+  OpTester test("AffineGrid", 20);
+  test.AddAttribute("align_corners", (int64_t)0);
+  test.AddInput<float>("theta", {1, 3, 4}, {0.259808f, 0.000000f, -0.150000f, -0.500000f, -1.299038f, 1.500000f, -2.250000f, -0.500000f, 1.375000f, 4.763140f, 2.381570f, 0.300000f});
+  test.AddInput<int64_t>("size", {5}, {1, 1, 3, 2, 2});
+  test.AddOutput<float>("grid", {1, 3, 2, 2, 3}, {-0.5299f, 0.8995f, -4.3568f, -0.2701f, -0.3995f, -2.9818f, -0.5299f, 2.3995f, 0.4064f, -0.2701f, 1.1005f, 1.7814f, -0.6299f, -0.6005f, -2.7691f, -0.3701f, -1.8995f, -1.3941f, -0.6299f, 0.8995f, 1.9941f, -0.3701f, -0.3995f, 3.3691f, -0.7299f, -2.1005f, -1.1814f, -0.4701f, -3.3995f, 0.1936f, -0.7299f, -0.6005f, 3.5818f, -0.4701f, -1.8995f, 4.9568f});
+  test.Run();
+}
+
+TEST(AffineGridTest, test_3d_3) {
+  OpTester test("AffineGrid", 20);
+  test.AddAttribute("align_corners", (int64_t)0);
+  test.AddInput<float>("theta", {2, 3, 4}, {0.259808f, 0.000000f, -0.150000f, -0.500000f, -1.299038f, 1.500000f, -2.250000f, -0.500000f, 1.375000f, 4.763140f, 2.381570f, 0.300000f, 0.259808f, 0.000000f, -0.150000f, -0.500000f, -1.299038f, 1.500000f, -2.250000f, -0.500000f, 1.375000f, 4.763140f, 2.381570f, 0.300000f});
+  test.AddInput<int64_t>("size", {5}, {2, 10, 2, 2, 3});
+  test.AddOutput<float>("grid", {2, 2, 2, 3, 3}, {-0.5982f, 0.7410f, -4.1890f, -0.4250f, -0.1250f, -3.2724f, -0.2518f, -0.9910f, -2.3557f, -0.5982f, 2.2410f, 0.5741f, -0.4250f, 1.3750f, 1.4908f, -0.2518f, 0.5090f, 2.4075f, -0.7482f, -1.5090f, -1.8075f, -0.5750f, -2.3750f, -0.8908f, -0.4018f, -3.2410f, 0.0259f, -0.7482f, -0.0090f, 2.9557f, -0.5750f, -0.8750f, 3.8724f, -0.4018f, -1.7410f, 4.7890f, -0.5982f, 0.7410f, -4.1890f, -0.4250f, -0.1250f, -3.2724f, -0.2518f, -0.9910f, -2.3557f, -0.5982f, 2.2410f, 0.5741f, -0.4250f, 1.3750f, 1.4908f, -0.2518f, 0.5090f, 2.4075f, -0.7482f, -1.5090f, -1.8075f, -0.5750f, -2.3750f, -0.8908f, -0.4018f, -3.2410f, 0.0259f, -0.7482f, -0.0090f, 2.9557f, -0.5750f, -0.8750f, 3.8724f, -0.4018f, -1.7410f, 4.7890f});
+  test.Run();
+}
+
+TEST(AffineGridTest, test_3d_4) {
+  OpTester test("AffineGrid", 20);
+  test.AddAttribute("align_corners", (int64_t)1);
+  test.AddInput<float>("theta", {1, 3, 4}, {1.409539f, 0.000000f, 0.513030f, 0.300000f, 0.118782f, 1.969615f, -0.326352f, -0.500000f, -0.168412f, 0.086824f, 0.462708f, 1.800000f});
+  test.AddInput<int64_t>("size", {5}, {1, 1, 3, 2, 2});
+  test.AddOutput<float>("grid", {1, 3, 2, 2, 3}, {-1.6226f, -2.2620f, 1.4189f, 1.1965f, -2.0245f, 1.0821f, -1.6226f, 1.6772f, 1.5925f, 1.1965f, 1.9147f, 1.2557f, -1.1095f, -2.5884f, 1.8816f, 1.7095f, -2.3508f, 1.5448f, -1.1095f, 1.3508f, 2.0552f, 1.7095f, 1.5884f, 1.7184f, -0.5965f, -2.9147f, 2.3443f, 2.2226f, -2.6772f, 2.0075f, -0.5965f, 1.0245f, 2.5179f, 2.2226f, 1.2620f, 2.1811f});
+  test.Run();
+}
+
+TEST(AffineGridTest, test_3d_5) {
+  OpTester test("AffineGrid", 20);
+  test.AddAttribute("align_corners", (int64_t)1);
+  test.AddInput<float>("theta", {2, 3, 4}, {1.409539f, 0.000000f, 0.513030f, 0.300000f, 0.118782f, 1.969615f, -0.326352f, -0.500000f, -0.168412f, 0.086824f, 0.462708f, 1.800000f, 1.409539f, 0.000000f, 0.513030f, 0.300000f, 0.118782f, 1.969615f, -0.326352f, -0.500000f, -0.168412f, 0.086824f, 0.462708f, 1.800000f});
+  test.AddInput<int64_t>("size", {5}, {2, 10, 2, 2, 3});
+  test.AddOutput<float>("grid", {2, 2, 2, 3, 3}, {-1.6226f, -2.2620f, 1.4189f, -0.2130f, -2.1433f, 1.2505f, 1.1965f, -2.0245f, 1.0821f, -1.6226f, 1.6772f, 1.5925f, -0.2130f, 1.7960f, 1.4241f, 1.1965f, 1.9147f, 1.2557f, -0.5965f, -2.9147f, 2.3443f, 0.8130f, -2.7960f, 2.1759f, 2.2226f, -2.6772f, 2.0075f, -0.5965f, 1.0245f, 2.5179f, 0.8130f, 1.1433f, 2.3495f, 2.2226f, 1.2620f, 2.1811f, -1.6226f, -2.2620f, 1.4189f, -0.2130f, -2.1433f, 1.2505f, 1.1965f, -2.0245f, 1.0821f, -1.6226f, 1.6772f, 1.5925f, -0.2130f, 1.7960f, 1.4241f, 1.1965f, 1.9147f, 1.2557f, -0.5965f, -2.9147f, 2.3443f, 0.8130f, -2.7960f, 2.1759f, 2.2226f, -2.6772f, 2.0075f, -0.5965f, 1.0245f, 2.5179f, 0.8130f, 1.1433f, 2.3495f, 2.2226f, 1.2620f, 2.1811f});
+  test.Run();
+}
+
+TEST(AffineGridTest, test_3d_6) {
+  OpTester test("AffineGrid", 20);
+  test.AddAttribute("align_corners", (int64_t)1);
+  test.AddInput<float>("theta", {1, 3, 4}, {0.259808f, 0.000000f, -0.150000f, -0.500000f, -1.299038f, 1.500000f, -2.250000f, -0.500000f, 1.375000f, 4.763140f, 2.381570f, 0.300000f});
+  test.AddInput<int64_t>("size", {5}, {1, 1, 3, 2, 2});
+  test.AddOutput<float>("grid", {1, 3, 2, 2, 3}, {-0.6098f, 1.5490f, -8.2197f, -0.0902f, -1.0490f, -5.4697f, -0.6098f, 4.5490f, 1.3066f, -0.0902f, 1.9510f, 4.0566f, -0.7598f, -0.7010f, -5.8381f, -0.2402f, -3.2990f, -3.0881f, -0.7598f, 2.2990f, 3.6881f, -0.2402f, -0.2990f, 6.4381f, -0.9098f, -2.9510f, -3.4566f, -0.3902f, -5.5490f, -0.7066f, -0.9098f, 0.0490f, 6.0697f, -0.3902f, -2.5490f, 8.8197f});
+  test.Run();
+}
+
+TEST(AffineGridTest, test_3d_7) {
+  OpTester test("AffineGrid", 20);
+  test.AddAttribute("align_corners", (int64_t)1);
+  test.AddInput<float>("theta", {2, 3, 4}, {0.259808f, 0.000000f, -0.150000f, -0.500000f, -1.299038f, 1.500000f, -2.250000f, -0.500000f, 1.375000f, 4.763140f, 2.381570f, 0.300000f, 0.259808f, 0.000000f, -0.150000f, -0.500000f, -1.299038f, 1.500000f, -2.250000f, -0.500000f, 1.375000f, 4.763140f, 2.381570f, 0.300000f});
+  test.AddInput<int64_t>("size", {5}, {2, 10, 2, 2, 3});
+  test.AddOutput<float>("grid", {2, 2, 2, 3, 3}, {-0.6098f, 1.5490f, -8.2197f, -0.3500f, 0.2500f, -6.8447f, -0.0902f, -1.0490f, -5.4697f, -0.6098f, 4.5490f, 1.3066f, -0.3500f, 3.2500f, 2.6816f, -0.0902f, 1.9510f, 4.0566f, -0.9098f, -2.9510f, -3.4566f, -0.6500f, -4.2500f, -2.0816f, -0.3902f, -5.5490f, -0.7066f, -0.9098f, 0.0490f, 6.0697f, -0.6500f, -1.2500f, 7.4447f, -0.3902f, -2.5490f, 8.8197f, -0.6098f, 1.5490f, -8.2197f, -0.3500f, 0.2500f, -6.8447f, -0.0902f, -1.0490f, -5.4697f, -0.6098f, 4.5490f, 1.3066f, -0.3500f, 3.2500f, 2.6816f, -0.0902f, 1.9510f, 4.0566f, -0.9098f, -2.9510f, -3.4566f, -0.6500f, -4.2500f, -2.0816f, -0.3902f, -5.5490f, -0.7066f, -0.9098f, 0.0490f, 6.0697f, -0.6500f, -1.2500f, 7.4447f, -0.3902f, -2.5490f, 8.8197f});
+  test.Run();
+}
+}  // namespace test
+}  // namespace onnxruntime
diff --git a/onnxruntime/test/providers/cpu/tensor/affine_grid_test_gen.py b/onnxruntime/test/providers/cpu/tensor/affine_grid_test_gen.py
new file mode 100644
index 0000000000000..7dcd6484a5688
--- /dev/null
+++ b/onnxruntime/test/providers/cpu/tensor/affine_grid_test_gen.py
@@ -0,0 +1,117 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+# This code is used to generate the test cases for the AffineGrid operator
+# in onnxruntime/test/providers/cpu/tensor/affine_grid_test.cc
+
+import argparse
+
+import numpy as np
+import torch
+from torch.nn.functional import affine_grid
+
+opset_version = 20
+parser = argparse.ArgumentParser(description="Generate test cases for the AffineGrid operator.")
+parser.add_argument("--dim", type=int, choices=[2, 3], help="Dimension of the test cases (2 or 3)")
+args = parser.parse_args()
+
+if args.dim is None or args.dim == 2:
+    align_corners_options = [False, True]
+    angles = [10, 60]
+    translations = [np.array([0.3, -0.5]), np.array([-0.5, -0.5])]
+    scales = [np.array([1.5, 0.5]), np.array([3.0, 5.5])]
+    sizes = [[1, 1, 3, 2], [2, 10, 2, 3]]
+    test_count = 0
+
+    for align_corners in align_corners_options:
+        for angle, translation, scale in zip(angles, translations, scales):
+            for size in sizes:
+                theta = np.array([], dtype=np.float32)
+                for _ in range(size[0]):
+                    angle_radian = (angle / 180.0) * np.pi
+                    theta = np.append(
+                        theta,
+                        [
+                            np.cos(angle_radian) * scale[0],
+                            -np.sin(angle_radian),
+                            translation[0],
+                            np.sin(angle_radian),
+                            np.cos(angle_radian) * scale[1],
+                            translation[1],
+                        ],
+                    )
+                theta = theta.reshape(size[0], 2, 3)
+                theta = torch.Tensor(theta)
+                grid = affine_grid(theta, size, align_corners=align_corners)
+
+                # Print the C++ code for the test case
+                print(f"TEST(AffineGridTest, test_2d_{test_count}) {{")
+                print(f'  OpTester test("AffineGrid", {opset_version});')
+                print(f'  test.AddAttribute("align_corners", (int64_t){1 if align_corners else 0});')
+                print(
+                    f"  test.AddInput<float>(\"theta\", {{{theta.shape[0]}, {theta.shape[1]}, {theta.shape[2]}}}, {{{', '.join([f'{x:.6f}f' for x in theta.flatten()])}}});"
+                )
+                print(
+                    f'  test.AddInput<int64_t>("size", {{{len(size)}}}, {{{size[0]}, {size[1]}, {size[2]}, {size[3]}}});'
+                )
+                print(
+                    f"  test.AddOutput<float>(\"grid\", {{{size[0]}, {size[2]}, {size[3]}, 2}}, {{{', '.join([f'{x:.4f}f' for x in grid.flatten()])}}});"
+                )
+                print("  test.Run();")
+                print("}\n")
+                test_count += 1
+
+
+if args.dim is None or args.dim == 3:
+    align_corners_options = [False, True]
+    angles = [[10, 20], [60, -30]]
+    translations = [np.array([0.3, -0.5, 1.8]), np.array([-0.5, -0.5, 0.3])]
+    scales = [np.array([1.5, 2.0, 0.5]), np.array([0.3, 3.0, 5.5])]
+    sizes = [[1, 1, 3, 2, 2], [2, 10, 2, 2, 3]]
+    test_count = 0
+
+    for align_corners in align_corners_options:
+        for angle, translation, scale in zip(angles, translations, scales):
+            for size in sizes:
+                theta = np.array([], dtype=np.float32)
+                for _ in range(size[0]):
+                    angle_radian_x = (angle[0] / 180.0) * np.pi
+                    angle_radian_y = (angle[1] / 180.0) * np.pi
+                    rot_matrix_x = np.array(
+                        [
+                            [1, 0, 0],
+                            [0, np.cos(angle_radian_x), -np.sin(angle_radian_x)],
+                            [0, np.sin(angle_radian_x), np.cos(angle_radian_x)],
+                        ]
+                    )
+                    rot_matrix_y = np.array(
+                        [
+                            [np.cos(angle_radian_y), 0, np.sin(angle_radian_y)],
+                            [0, 1, 0],
+                            [-np.sin(angle_radian_y), 0, np.cos(angle_radian_y)],
+                        ]
+                    )
+                    rot_matrix = np.matmul(rot_matrix_x, rot_matrix_y)
+                    rot_matrix = rot_matrix * scale.reshape(3, 1)
+                    rot_matrix = np.append(rot_matrix, np.reshape(translation, (3, 1)), axis=1)
+                    theta = np.append(theta, rot_matrix.flatten())
+                theta = theta.reshape(size[0], 3, 4)
+                theta = torch.Tensor(theta)
+                grid = affine_grid(theta, size, align_corners=align_corners)
+
+                # Print the C++ code for the test case
+                print(f"TEST(AffineGridTest, test_3d_{test_count}) {{")
+                print(f'  OpTester test("AffineGrid", {opset_version});')
+                print(f'  test.AddAttribute("align_corners", (int64_t){1 if align_corners else 0});')
+                print(
+                    f"  test.AddInput<float>(\"theta\", {{{theta.shape[0]}, {theta.shape[1]}, {theta.shape[2]}}}, {{{', '.join([f'{x:.6f}f' for x in theta.flatten()])}}});"
+                )
+                print(
+                    f'  test.AddInput<int64_t>("size", {{{len(size)}}}, {{{size[0]}, {size[1]}, {size[2]}, {size[3]}, {size[4]}}});'
+                )
+                print(
+                    f"  test.AddOutput<float>(\"grid\", {{{size[0]}, {size[2]}, {size[3]}, {size[4]}, 3}}, {{{', '.join([f'{x:.4f}f' for x in grid.flatten()])}}});"
+                )
+                print("  test.Run();")
+                print("}\n")
+                test_count += 1
diff --git a/onnxruntime/test/providers/cpu/tensor/compress_op.test.cc b/onnxruntime/test/providers/cpu/tensor/compress_op.test.cc
index c95ac1603a317..c3d91100605e9 100644
--- a/onnxruntime/test/providers/cpu/tensor/compress_op.test.cc
+++ b/onnxruntime/test/providers/cpu/tensor/compress_op.test.cc
@@ -66,7 +66,7 @@ TEST(CompressTest, Compress_3dims_has_extra_condition) {
   // has condition length = 3 > input_dim[axis] = 2
   test.AddInput<bool>("condition", {3}, {0, 1, 1});
   test.AddOutput<float>("output", {2, 1, 3}, {4.0f, 5.0f, 6.0f, 10.0f, 11.0f, 12.0f});
-  test.Run();
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
 }
 
 TEST(CompressTest, Compress_3dims_has_extra_input) {
diff --git a/onnxruntime/test/providers/cpu/tensor/grid_sample_test.cc b/onnxruntime/test/providers/cpu/tensor/grid_sample_test.cc
new file mode 100644
index 0000000000000..0f097622abff0
--- /dev/null
+++ b/onnxruntime/test/providers/cpu/tensor/grid_sample_test.cc
@@ -0,0 +1,1019 @@
+#include "gtest/gtest.h"
+#include "test/providers/provider_test_utils.h"
+#include "test/util/include/default_providers.h"
+
+#include <limits>
+
+namespace onnxruntime {
+namespace test {
+// DO NOT edit following tests. They are generated by:
+// onnxruntime/test/providers/cpu/tensor/grid_sample_test_gen.py
+TEST(GridsampleTest, test_grid_sample_16_4D_nearest_zeros_align_corners) {
+  OpTester test("GridSample", 16);
+  std::string mode = "nearest";
+  std::string padding_mode = "zeros";
+  int64_t align_corners = 1;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{-1.125840f, -1.152360f, -0.250579f, -0.433879f, 0.848710f, 0.692009f, -0.316013f, -2.115219f, 0.468096f, -0.157712f, 1.443660f, 0.266049f, 0.166455f, 0.874382f, -0.143474f, -0.111609f, 0.931827f, 1.259009f, 2.004981f, 0.053737f, 0.618057f, -0.412802f, -0.841065f, -2.316042f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{0.063110f, -0.615220f, 0.203022f, -1.120434f, -0.867079f, -0.618636f, 0.757125f, 0.703586f, -0.532194f, -0.043299f, 0.767473f, 1.192960f, 0.476259f, 0.162111f, 0.804584f, -0.706563f, 0.223613f, -0.930367f, -0.831703f, -0.619900f, 0.542968f, 0.482592f, -0.710823f, 0.362529f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{-1.152360f, -1.152360f, -1.125840f, 0.692009f, -0.250579f, 0.692009f, -2.115219f, -2.115219f, -0.316013f, 0.266049f, 0.468096f, 0.266049f, -0.111609f, 0.874382f, 0.874382f, 0.166455f, -0.111609f, -0.143474f, -0.412802f, 0.053737f, 0.053737f, 2.004981f, -0.412802f, 0.618057f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_16_4D_nearest_zeros_no_align_corners) {
+  OpTester test("GridSample", 16);
+  std::string mode = "nearest";
+  std::string padding_mode = "zeros";
+  int64_t align_corners = 0;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{-0.569248f, 0.919971f, 1.110816f, 1.289874f, -1.478174f, 2.567233f, -0.473120f, 0.335551f, -0.003304f, -0.534441f, 1.168688f, 0.394503f, 1.941462f, 0.791498f, -0.020252f, -0.437170f, -1.535287f, -0.412679f, 0.966303f, 1.624783f, -0.365619f, -1.302440f, 0.099403f, 0.441822f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{-1.143118f, -0.021569f, -0.903671f, -0.925628f, -0.066120f, 0.180174f, -0.491436f, 0.712053f, -0.730247f, 1.088844f, 0.822360f, -1.011940f, -0.298661f, 0.054147f, 0.175081f, 0.284609f, 0.470914f, 0.071880f, -0.585515f, 0.567827f, -1.151099f, -0.711248f, -0.300396f, -0.584536f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{0.000000f, -0.569248f, 1.110816f, -1.478174f, 0.000000f, 0.000000f, 0.000000f, -0.473120f, -0.003304f, 1.168688f, 0.000000f, 0.000000f, -0.020252f, -0.437170f, -0.437170f, -1.535287f, 0.000000f, 1.941462f, -0.365619f, -1.302440f, -1.302440f, 0.099403f, 0.000000f, 0.966303f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_16_4D_nearest_border_align_corners) {
+  OpTester test("GridSample", 16);
+  std::string mode = "nearest";
+  std::string padding_mode = "border";
+  int64_t align_corners = 1;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{-0.883376f, -0.418913f, -0.804826f, 0.565610f, 0.610365f, 0.466884f, 1.950657f, -1.063099f, -0.829367f, -1.407257f, 1.626847f, 0.172273f, -1.611502f, -0.479448f, -0.143351f, -0.317295f, 0.573655f, 0.997931f, 0.543609f, 0.078804f, 0.862860f, -0.019490f, 0.991047f, -0.777735f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{-1.080070f, -0.080985f, 1.055303f, -0.489470f, 1.083604f, 0.434584f, -1.082953f, 0.759237f, -0.138473f, -0.535688f, 0.959584f, -0.969714f, 0.128766f, -0.251242f, 0.856935f, 0.334973f, 0.576606f, 0.423791f, -0.288570f, -0.252367f, -0.988898f, 0.650213f, 0.952774f, 0.821070f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{-0.804826f, 0.565610f, 0.565610f, 0.610365f, -0.883376f, -0.418913f, -0.829367f, -1.407257f, -1.407257f, 1.626847f, 1.950657f, -1.063099f, -0.317295f, -0.317295f, -0.317295f, -0.143351f, 0.573655f, 0.997931f, -0.019490f, -0.019490f, -0.019490f, 0.862860f, 0.991047f, -0.777735f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_16_4D_nearest_border_no_align_corners) {
+  OpTester test("GridSample", 16);
+  std::string mode = "nearest";
+  std::string padding_mode = "border";
+  int64_t align_corners = 0;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{-0.559630f, 0.533472f, 0.406887f, 0.394587f, 0.171511f, 0.876045f, -0.287087f, 1.021640f, 0.438649f, -0.010704f, 1.338354f, -0.279405f, -0.551834f, -2.889061f, -1.509981f, 1.024115f, 0.195393f, -0.737109f, 1.700101f, 0.346216f, 0.971125f, 1.450250f, -0.051909f, -0.628431f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{0.149807f, 1.074831f, 0.734055f, -0.758657f, 0.538205f, -0.848275f, -0.508590f, 0.352947f, 0.396231f, 0.900274f, -0.386299f, 0.001921f, 0.617788f, -1.160511f, 0.867577f, -0.992307f, 0.016539f, -0.204020f, -0.632008f, 0.158605f, 0.992302f, -0.350783f, -0.712433f, -0.443807f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{0.876045f, 0.533472f, 0.533472f, 0.171511f, 0.876045f, 0.406887f, -0.279405f, 1.021640f, 1.021640f, 1.338354f, -0.279405f, 0.438649f, -2.889061f, -2.889061f, 1.024115f, -1.509981f, -2.889061f, -0.551834f, 0.346216f, 0.346216f, 1.450250f, 0.971125f, 0.346216f, 1.700101f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_16_4D_nearest_reflection_align_corners) {
+  OpTester test("GridSample", 16);
+  std::string mode = "nearest";
+  std::string padding_mode = "reflection";
+  int64_t align_corners = 1;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{-0.039373f, -0.801472f, -0.495544f, -0.361514f, 0.585113f, -1.156007f, -0.143365f, -0.194741f, -0.906885f, -0.591838f, 0.150785f, -1.041149f, -0.720534f, -2.214754f, -0.683730f, 0.516358f, 0.792848f, 0.083228f, 0.422800f, -1.868747f, -1.105713f, 0.143731f, 0.583597f, 1.348155f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{0.829854f, -0.893309f, 0.491599f, -0.403504f, -0.578962f, 0.215574f, -0.623348f, 0.276486f, 0.235657f, -0.890987f, 0.199798f, 0.511115f, 0.474997f, -0.151054f, -0.983745f, -0.184985f, 0.416769f, -0.437853f, 0.455497f, 0.799155f, -0.626582f, 0.011834f, 0.496199f, 0.094053f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{-0.801472f, -0.361514f, -0.495544f, -0.495544f, -0.801472f, -1.156007f, -0.194741f, -0.591838f, -0.906885f, -0.906885f, -0.194741f, -1.041149f, 0.516358f, -0.683730f, 0.516358f, 0.083228f, -0.683730f, 0.516358f, 0.143731f, -1.105713f, 0.143731f, 1.348155f, -1.105713f, 0.143731f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_16_4D_nearest_reflection_no_align_corners) {
+  OpTester test("GridSample", 16);
+  std::string mode = "nearest";
+  std::string padding_mode = "reflection";
+  int64_t align_corners = 0;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{-0.129230f, -0.054595f, 0.408347f, 1.126366f, 1.935057f, 1.007685f, 1.004642f, -0.433520f, -0.562711f, -0.832754f, -1.395545f, -0.399295f, -0.309940f, -0.056062f, 0.517413f, -1.596237f, 0.356960f, -2.297482f, -0.871083f, -1.674028f, 0.563055f, -1.435067f, 0.719400f, -1.370747f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{-0.811910f, -1.183845f, -0.963667f, 0.947364f, 0.649243f, 1.125859f, 0.961345f, -1.071655f, -0.818917f, -0.193899f, -0.779319f, 0.833276f, -0.907209f, -0.585482f, -1.159310f, -0.681295f, 0.986973f, 0.982512f, 0.859005f, 0.926553f, 1.067024f, -0.307276f, 0.528003f, 1.069117f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{-0.129230f, 1.935057f, 1.007685f, -0.054595f, 0.408347f, 1.935057f, 1.004642f, -1.395545f, -0.399295f, -0.433520f, -0.562711f, -1.395545f, -0.309940f, -0.309940f, -2.297482f, -2.297482f, -1.596237f, -2.297482f, -0.871083f, -0.871083f, -1.370747f, -1.370747f, -1.435067f, -1.370747f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_16_4D_bilinear_zeros_align_corners) {
+  OpTester test("GridSample", 16);
+  std::string mode = "bilinear";
+  std::string padding_mode = "zeros";
+  int64_t align_corners = 1;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{0.294201f, 0.797322f, 1.264215f, 0.935492f, 0.545464f, -1.537389f, 0.312439f, 0.740060f, -0.575326f, -1.432532f, -0.666175f, 1.017438f, -2.241368f, 0.437349f, -0.555362f, -0.057943f, 0.658583f, 0.992938f, -0.206548f, -0.244841f, -0.380599f, 1.131112f, -0.090205f, -0.897900f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{0.595248f, -1.096726f, -0.214731f, -0.891773f, -0.512023f, 0.432352f, -0.852156f, 0.446072f, 1.018534f, 0.078706f, -0.799785f, -0.429942f, 0.262037f, -0.914782f, 0.596172f, -1.089444f, -1.153552f, -1.165993f, -0.243436f, 0.806920f, -1.135775f, 0.997425f, -0.480027f, 0.351461f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{0.628229f, 0.561377f, 0.688215f, 0.861459f, 0.733996f, 0.850061f, 0.590307f, 0.329661f, -0.555725f, -0.595435f, -1.228216f, -0.224152f, -0.524667f, -0.094262f, -1.725798f, 0.562584f, 0.610959f, -0.014286f, -0.162194f, -0.215901f, -0.159037f, -0.282404f, -0.084779f, -0.097448f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_16_4D_bilinear_zeros_no_align_corners) {
+  OpTester test("GridSample", 16);
+  std::string mode = "bilinear";
+  std::string padding_mode = "zeros";
+  int64_t align_corners = 0;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{-1.199109f, -0.025686f, 1.802375f, -1.059653f, 3.402826f, -0.568670f, -0.475489f, 1.743163f, 1.060884f, -0.015953f, 1.275653f, 0.009457f, -0.369450f, 1.218198f, 0.255044f, 0.273993f, 1.404381f, 1.082878f, 0.788966f, -0.137615f, 0.122478f, -1.076701f, -0.650897f, -1.619658f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{0.038587f, -0.371014f, -0.260918f, 0.159481f, 0.594851f, -0.840708f, 1.007133f, -0.130476f, -1.005535f, -0.649269f, 1.061781f, 1.097433f, -1.111536f, 0.846358f, 0.601391f, 0.710302f, 1.015835f, -0.646740f, 0.378931f, 0.491080f, -0.354592f, 0.401584f, -0.345256f, 0.741914f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{-0.199899f, 1.437523f, -0.017180f, -0.422530f, -0.554188f, -0.088180f, 0.613663f, 0.843979f, 1.165913f, 0.161823f, -0.215288f, 0.001466f, 0.398506f, 0.909392f, 0.576145f, 0.897902f, 0.920312f, 1.201733f, -0.184698f, -1.360176f, -0.080218f, -1.352020f, -0.497572f, -0.710420f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_16_4D_bilinear_border_align_corners) {
+  OpTester test("GridSample", 16);
+  std::string mode = "bilinear";
+  std::string padding_mode = "border";
+  int64_t align_corners = 1;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{-0.546073f, -0.630178f, -0.634650f, 0.974665f, 0.209843f, 0.029890f, 1.709235f, -0.725759f, -0.876951f, 0.522287f, 0.462005f, -1.329269f, -0.295974f, 1.371414f, 0.973846f, 0.765543f, -0.403897f, -0.326279f, 0.748218f, -0.195299f, 0.676756f, -0.080633f, 0.158123f, 0.099984f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{1.182462f, -0.759228f, 0.230068f, -0.103567f, -0.252788f, -0.268017f, 0.762529f, 0.057356f, -1.168338f, -0.708432f, -0.409080f, 0.603860f, -0.776560f, 1.131504f, -0.267275f, -0.215474f, 0.940270f, 0.603129f, 1.017745f, 0.694133f, -0.364025f, -0.796167f, -0.089284f, 0.993165f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{-0.243777f, 0.256440f, -0.179228f, 0.741578f, -0.571899f, 0.031558f, -0.425264f, 0.007242f, -0.044977f, 0.271677f, 0.955187f, -0.224230f, -0.395226f, 0.771988f, 0.108104f, 0.007673f, 0.371491f, -0.360026f, 0.151628f, 0.399982f, 0.038327f, 0.044739f, 0.445689f, 0.133017f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_16_4D_bilinear_border_no_align_corners) {
+  OpTester test("GridSample", 16);
+  std::string mode = "bilinear";
+  std::string padding_mode = "border";
+  int64_t align_corners = 0;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{-0.873307f, 0.004261f, -1.257887f, -1.084466f, 0.752979f, 0.323648f, -0.275010f, 1.305612f, -0.009480f, -0.831312f, -0.556290f, 2.070567f, 0.710039f, -0.146461f, -0.746745f, 0.725842f, 0.403461f, 0.234374f, 0.173281f, 1.724145f, -0.408946f, 0.782749f, -1.520847f, -0.314686f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{0.605180f, 0.169896f, 1.021029f, 0.161312f, -0.555188f, 1.135200f, 0.284017f, -1.170817f, -0.341630f, -0.817401f, 1.052104f, -0.198175f, -1.093830f, -0.075436f, 0.753615f, 0.311761f, 0.379445f, 0.111448f, 0.447382f, -0.292382f, -0.477360f, -1.121650f, -0.904004f, 0.520083f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{-0.725617f, -0.743749f, 0.752979f, -0.185279f, -0.734326f, -0.760828f, -0.091786f, -0.129152f, -0.556290f, 0.964224f, -0.024687f, -0.196084f, -0.581904f, 0.496011f, 0.499240f, 0.319537f, 0.690648f, 0.150559f, -0.343065f, 0.269544f, 0.455333f, 1.124628f, 0.208392f, -1.276367f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_16_4D_bilinear_reflection_align_corners) {
+  OpTester test("GridSample", 16);
+  std::string mode = "bilinear";
+  std::string padding_mode = "reflection";
+  int64_t align_corners = 1;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{0.540757f, -0.947807f, 0.202144f, -0.350748f, 0.545005f, 1.541211f, 0.600239f, -0.338015f, -1.080823f, -1.391537f, -0.352570f, 1.560770f, -0.822488f, -2.140920f, 0.099553f, -0.697505f, 0.665352f, -2.256198f, -1.002236f, -1.395144f, 0.415783f, 0.268104f, -0.151752f, 0.794042f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{1.051960f, -0.798975f, -0.129852f, -0.064453f, 0.535452f, 0.820411f, -0.190205f, -0.994177f, 0.594591f, 0.358958f, 0.482039f, -0.740241f, 0.772315f, 1.136586f, 0.104126f, -1.120858f, 0.842388f, -0.889742f, 0.275846f, 0.174381f, -0.561644f, 0.417835f, -1.073319f, 0.273311f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{-0.793997f, -0.042818f, 1.034663f, -0.061725f, 0.327743f, -0.470152f, -0.528701f, -1.125254f, 0.678924f, 0.212033f, -0.430627f, -0.410903f, -1.743740f, -1.404122f, -1.882401f, -0.546577f, -0.033295f, 0.203686f, 0.631537f, -1.031405f, -1.182924f, 0.344248f, 0.246420f, 0.266212f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_16_4D_bilinear_reflection_no_align_corners) {
+  OpTester test("GridSample", 16);
+  std::string mode = "bilinear";
+  std::string padding_mode = "reflection";
+  int64_t align_corners = 0;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{0.584178f, 1.050431f, 1.285579f, -1.616520f, -0.768962f, -1.220462f, 0.573128f, 0.699197f, -1.654887f, 0.493267f, -0.615042f, 1.311865f, 0.788249f, -1.232951f, 0.454381f, -1.436621f, 0.711631f, 0.554599f, -0.807529f, 1.680131f, 0.597634f, -0.238890f, -0.345997f, 1.770104f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{0.564800f, 1.031186f, 0.795913f, -0.629473f, -0.131544f, -0.377622f, -0.964948f, 0.000496f, 0.902922f, 1.011019f, 0.111961f, 0.272548f, -0.519506f, 0.905811f, -0.499330f, -0.833583f, 0.184792f, 0.719262f, -1.081910f, 1.084761f, 0.431677f, -0.840735f, -0.258489f, 1.041096f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{-1.220462f, 0.901641f, 0.521980f, 1.284051f, -1.220462f, -0.717235f, 1.311865f, 0.687708f, -0.023386f, -1.654114f, 1.311865f, 0.029458f, 0.711631f, 0.786895f, 0.604097f, 0.711631f, -1.094857f, 0.673706f, -0.345997f, -0.805863f, 1.103092f, -0.345997f, 1.510167f, 0.165064f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_16_4D_bicubic_zeros_align_corners) {
+  OpTester test("GridSample", 16);
+  std::string mode = "bicubic";
+  std::string padding_mode = "zeros";
+  int64_t align_corners = 1;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{0.497417f, 0.268522f, 1.476879f, 0.354795f, 1.624709f, 0.593423f, -1.725412f, -0.622016f, -0.466707f, -0.319962f, 0.701868f, 0.494252f, -0.630165f, 0.548236f, 1.042740f, 0.253800f, -2.667303f, 1.379165f, -0.519418f, 0.672783f, -0.005627f, -0.180192f, -0.018395f, 0.998084f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{0.213755f, 0.141747f, -0.562622f, -0.414594f, 0.325025f, -0.834438f, 0.197995f, 0.519270f, -0.472884f, 0.996769f, -0.078973f, 0.544455f, 1.188368f, -0.366802f, 0.652090f, -0.343235f, -0.175288f, -0.203365f, -0.007455f, -0.453322f, 0.281264f, 0.045216f, 0.760668f, -0.242886f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{1.007407f, 1.068583f, 0.492134f, 1.222040f, 1.576835f, 1.464183f, -0.238652f, -1.242164f, -1.156880f, 0.279082f, 0.744912f, 0.338287f, 0.215322f, 0.388598f, 0.866571f, 0.556826f, 0.608617f, 0.326312f, 0.044527f, -0.028766f, -0.136528f, -0.084880f, -0.121429f, -0.105516f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_16_4D_bicubic_zeros_no_align_corners) {
+  OpTester test("GridSample", 16);
+  std::string mode = "bicubic";
+  std::string padding_mode = "zeros";
+  int64_t align_corners = 0;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{-1.065470f, 0.402578f, -0.405242f, -0.583366f, -0.258523f, -0.605559f, -0.188242f, 0.959607f, 1.189619f, -0.179522f, -1.823240f, -0.051351f, -1.636092f, -2.510569f, -1.238273f, -0.929619f, -0.058536f, 0.772879f, 0.468944f, 0.259886f, 0.757624f, -2.041813f, -0.552378f, 0.626977f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{-1.199809f, 0.061445f, -0.035546f, 0.180524f, 0.919500f, 1.166411f, -0.711939f, -0.074825f, -0.480808f, -1.105975f, -0.873191f, 1.126273f, 0.699673f, 0.644581f, 0.666892f, -0.953375f, 0.126023f, 1.116858f, -0.669703f, 1.067513f, 0.315406f, 0.844252f, -0.514065f, 0.553221f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{-0.086429f, -0.590424f, -0.090572f, -0.393926f, -0.379182f, -0.031455f, 0.347836f, 0.182097f, 0.050161f, 1.154870f, -0.134312f, -0.509844f, 0.697346f, -1.440179f, 0.264668f, 0.021389f, 0.729883f, -0.236038f, 0.576661f, 0.348301f, 0.149351f, -0.327477f, 0.607344f, -0.405680f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_16_4D_bicubic_border_align_corners) {
+  OpTester test("GridSample", 16);
+  std::string mode = "bicubic";
+  std::string padding_mode = "border";
+  int64_t align_corners = 1;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{0.203585f, -1.032829f, 1.130481f, -0.570301f, -2.100938f, 0.389922f, 0.087343f, -0.857360f, 1.193520f, -0.019760f, 0.280285f, 1.811013f, 1.838673f, 0.164184f, 1.436009f, 0.167011f, -1.139939f, -0.029833f, -0.009878f, 0.079750f, 0.216590f, -0.265852f, -0.528116f, -0.451915f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{0.797796f, -1.010726f, 0.868577f, -1.132977f, 0.268082f, -0.786042f, -0.476635f, 0.212483f, -0.471816f, -0.189867f, -1.137389f, -1.131448f, 0.464836f, -0.507934f, -0.730068f, -0.473499f, -0.981082f, -0.959280f, 0.718047f, 0.609891f, 0.159844f, -0.655512f, 0.399241f, 0.053910f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{-0.934180f, -1.004565f, -0.467118f, 0.384839f, 0.792549f, 0.188357f, -0.785741f, -0.871727f, -0.372851f, 0.958270f, 0.751528f, 0.046397f, 0.598629f, 1.686400f, 1.817043f, 0.015806f, 0.866266f, 0.480930f, -0.013358f, 0.152904f, -0.001292f, -0.385043f, 0.030959f, -0.152332f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_16_4D_bicubic_border_no_align_corners) {
+  OpTester test("GridSample", 16);
+  std::string mode = "bicubic";
+  std::string padding_mode = "border";
+  int64_t align_corners = 0;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{-0.427361f, 0.814325f, -1.412076f, -0.099774f, 0.074936f, 0.590322f, 0.398556f, -0.635891f, -1.081747f, -0.330179f, 0.271759f, -1.089819f, -0.746656f, -0.942538f, -1.251568f, -1.730282f, -0.722323f, 0.525964f, -0.436259f, -0.188952f, -0.499550f, 1.502071f, -0.014112f, 1.194050f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{-0.102021f, -0.935855f, -0.007380f, -0.996053f, -0.258157f, 0.695455f, -0.834420f, -0.808862f, -0.293012f, -0.328961f, 0.203145f, 0.199219f, 0.608516f, -0.826657f, -0.084685f, 0.671149f, 1.037966f, -0.087535f, -0.694344f, 0.344955f, 0.683373f, -0.749700f, -0.696352f, 0.530398f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{0.154701f, 0.273277f, 0.226316f, -0.467055f, -0.820643f, -0.311691f, 0.084699f, -0.052970f, 0.001158f, 0.679701f, -0.467804f, -0.607116f, -0.871407f, -0.210613f, -1.860685f, -1.059387f, -0.902250f, -0.918798f, -0.360562f, 0.476049f, 1.499304f, -0.418396f, -0.298854f, -0.235927f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_16_4D_bicubic_reflection_align_corners) {
+  OpTester test("GridSample", 16);
+  std::string mode = "bicubic";
+  std::string padding_mode = "reflection";
+  int64_t align_corners = 1;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{-1.084082f, -0.128738f, -0.681077f, -1.309896f, 0.660269f, -1.412063f, 1.834581f, 0.456195f, 0.162801f, -0.638266f, 0.897973f, -0.383653f, 0.297945f, 1.809414f, -0.091298f, 1.092744f, -0.102453f, -1.726535f, -0.484632f, 0.712097f, 1.820312f, -0.852073f, -0.341399f, -0.138106f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{-0.501236f, -0.770480f, -0.140656f, -1.129896f, 0.470370f, 0.885106f, 0.288068f, -0.118568f, 0.594968f, -0.761702f, 1.173892f, -1.193212f, -1.149534f, -0.283562f, 0.980213f, 0.120151f, 0.460855f, -0.879608f, 0.437623f, -0.134092f, 0.480988f, 0.847491f, 0.521616f, -0.102077f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{-0.953278f, -0.722872f, -1.065112f, -1.071529f, -0.344328f, -0.233562f, 1.436462f, 1.232983f, -0.181487f, -0.297043f, 0.464837f, 0.396673f, 0.053896f, 0.733510f, 1.541248f, 1.117701f, -1.352406f, 1.131762f, 1.324986f, -0.882173f, 0.469635f, -0.247133f, -0.196824f, -0.393592f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_16_4D_bicubic_reflection_no_align_corners) {
+  OpTester test("GridSample", 16);
+  std::string mode = "bicubic";
+  std::string padding_mode = "reflection";
+  int64_t align_corners = 0;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{-1.122981f, 0.620969f, -0.876394f, -1.774003f, -0.810376f, -1.475962f, 0.667025f, 0.668804f, -0.748346f, 1.422400f, 0.138469f, -0.165945f, 1.266886f, -0.496157f, 0.158060f, 0.488900f, 0.414476f, 0.419527f, 0.238000f, -0.034674f, 0.229435f, 0.234530f, 0.320846f, 0.703888f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{0.471637f, -0.923628f, -0.909401f, 0.684338f, 0.224360f, 1.092855f, -0.320755f, -0.579618f, -0.111056f, 0.006071f, 0.915173f, -1.195296f, -0.085441f, 0.530823f, -0.660820f, -0.609769f, 0.579921f, -1.149822f, 0.284347f, -0.929024f, 0.596474f, -1.026049f, 0.737766f, -1.135959f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{0.998063f, -0.689213f, -1.266024f, -0.870706f, -1.217616f, 1.292693f, 0.543307f, 0.219521f, -0.255151f, 0.543599f, 0.062982f, 0.527696f, 0.387590f, 1.352544f, -0.758053f, -0.262859f, -0.820496f, -0.934255f, 0.434353f, 0.262797f, -0.092283f, -0.021089f, -0.106052f, -0.119717f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_4D_nearest_zeros_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "nearest";
+  std::string padding_mode = "zeros";
+  int64_t align_corners = 1;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{0.404710f, -0.654932f, 0.052124f, 0.340055f, -0.212416f, 1.562917f, -0.907159f, -1.566185f, 0.596746f, 1.002548f, -0.820504f, 0.509186f, 0.951389f, 0.773736f, -2.144711f, 0.044147f, 1.290612f, 0.664926f, 0.530731f, -0.423196f, -0.388699f, 0.333224f, 0.293744f, -0.157543f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{0.528957f, 0.982925f, -0.033286f, -0.806271f, 0.793837f, -0.411498f, 0.621343f, -0.295724f, 0.510113f, 1.079311f, 1.115827f, -1.092078f, -0.793776f, -0.496160f, -0.765241f, 1.151400f, -0.105983f, -0.796009f, -0.533987f, -0.662838f, 0.489587f, -1.046701f, -1.118884f, -1.182913f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{1.562917f, 0.404710f, 0.340055f, 0.340055f, 1.562917f, -0.654932f, 0.509186f, -0.907159f, 1.002548f, 1.002548f, 0.509186f, -1.566185f, -2.144711f, 1.290612f, 0.951389f, 0.951389f, 0.773736f, 0.951389f, -0.388699f, 0.293744f, 0.530731f, 0.530731f, -0.423196f, 0.530731f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_5D_nearest_zeros_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "nearest";
+  std::string padding_mode = "zeros";
+  int64_t align_corners = 1;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 3, 2};
+  std::initializer_list<float> X_data{-1.495959f, 0.018231f, 0.345600f, 0.031206f, 0.400390f, 0.425763f, 0.839517f, 1.238945f, 0.523906f, -1.658372f, 0.548335f, -1.398321f, -1.976414f, 1.232491f, -0.545575f, -0.069414f, 0.732245f, -0.150333f, -0.707132f, 0.467497f, 0.278677f, 1.335679f, 1.155313f, -0.056298f, 0.430615f, -0.932645f, -1.505319f, 0.103317f, 1.521579f, 0.365497f, 1.428928f, 0.364333f, 1.683777f, 1.010632f, 0.621895f, 2.284701f, 1.574905f, -0.310514f, 1.495724f, 1.003370f, -1.437482f, 0.043097f, -1.645546f, -1.464643f, 0.350139f, -0.105905f, -0.740495f, 1.157691f, 1.443377f, 0.198399f, -1.105180f, -2.037115f, 2.128767f, -0.204457f, 0.468464f, 1.203629f, -0.362309f, -0.130520f, 1.532353f, 1.547599f, -0.831847f, -1.008509f, 0.023218f, 0.342626f, -0.882915f, 0.560640f, -1.142297f, 1.119107f, 0.385787f, -0.068515f, -0.529550f, -0.233903f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 3, 2, 3};
+  std::initializer_list<float> Grid_data{0.812645f, 0.528235f, -0.550793f, -0.856977f, -1.073535f, 0.059526f, 1.163856f, -0.227931f, -0.050518f, -0.872033f, 0.368412f, 0.760780f, -1.183099f, -0.844947f, 0.888849f, 0.284117f, -0.074815f, 0.214510f, -0.182450f, -0.838758f, -1.121316f, 0.789250f, -0.142724f, -0.445665f, -0.309738f, -0.654508f, -0.355420f, -1.030097f, 0.898012f, 0.490011f, -0.605186f, -0.409576f, 0.538365f, -0.444367f, 0.316432f, 0.330410f, -0.755392f, 0.300602f, 0.073421f, 1.048061f, -0.434184f, -0.308482f, 1.033921f, -0.979923f, 0.086698f, 1.156203f, -0.538042f, 1.150419f, 1.064809f, 1.116408f, -0.114508f, 1.085560f, -0.522863f, -0.410766f, 0.453879f, 0.253497f, 0.661531f, 1.140383f, -0.751187f, 0.636872f, 0.401477f, 0.633082f, 0.569007f, -0.448884f, -0.948427f, 0.960462f, -0.684283f, 0.767193f, -1.143172f, -0.207603f, 0.012719f, 0.207628f, 0.096998f, 0.378128f, -0.133613f, 0.293885f, 1.187501f, -0.776462f, -0.065516f, -0.458068f, 1.052916f, 1.027248f, -0.032723f, -0.415959f, -0.741439f, 0.858648f, -0.082636f, 1.130172f, 0.684314f, 1.050365f, 0.949108f, -0.779811f, 0.351243f, -0.497591f, 0.602104f, -0.107892f, 0.103884f, -0.829931f, -1.072471f, 0.451888f, 0.278862f, 0.104235f, 0.815033f, -0.501089f, 0.425977f, -0.660914f, 0.248640f, -0.273958f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 3, 2};
+  std::initializer_list<float> Y_data{0.425763f, 0.839517f, -1.658372f, -0.545575f, -1.976414f, -1.658372f, -1.495959f, -1.658372f, 0.839517f, 0.548335f, -0.545575f, 0.523906f, 0.523906f, -1.658372f, 1.238945f, 1.232491f, -1.398321f, 1.238945f, -0.056298f, 0.430615f, 0.103317f, 1.683777f, 1.428928f, 0.103317f, -0.707132f, 0.103317f, 0.430615f, 1.521579f, 1.683777f, -1.505319f, -1.505319f, 0.103317f, -0.932645f, 0.364333f, 0.365497f, -0.932645f, -2.037115f, 0.198399f, -0.204457f, 1.443377f, -1.437482f, 0.350139f, -0.105905f, 0.043097f, -1.105180f, -0.105905f, -0.740495f, -0.204457f, -1.464643f, -0.740495f, -0.310514f, -0.105905f, -1.464643f, 0.350139f, -0.068515f, 1.119107f, -0.233903f, -1.142297f, 1.532353f, 0.023218f, 0.342626f, 1.547599f, 0.385787f, 0.342626f, -0.882915f, -0.233903f, -1.008509f, -0.882915f, 1.203629f, 0.342626f, -1.008509f, 0.023218f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_4D_nearest_zeros_no_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "nearest";
+  std::string padding_mode = "zeros";
+  int64_t align_corners = 0;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{-1.948141f, 1.836740f, -0.418393f, -0.125621f, 1.779137f, -0.028049f, 0.367697f, -0.388847f, -0.939514f, -0.129193f, -0.101240f, -3.087570f, -0.778617f, 1.026859f, 0.624162f, 0.291416f, 0.580998f, -0.185200f, 0.333020f, 0.415896f, 0.011702f, 0.014502f, -0.722870f, -0.201041f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{0.818167f, -0.394078f, 0.627076f, -1.124307f, -0.296864f, -0.244061f, -0.423780f, 0.504000f, -0.546789f, -0.139085f, -0.346504f, -1.126900f, -0.198169f, -1.016972f, 0.699725f, 0.641356f, 1.124151f, -0.402963f, 0.061023f, 0.235069f, 1.197862f, 1.099936f, -0.621047f, -1.021083f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{1.836740f, 0.000000f, -0.418393f, 1.779137f, -0.418393f, 0.000000f, -0.388847f, 0.000000f, -0.939514f, -0.101240f, -0.939514f, 0.000000f, 0.000000f, -0.185200f, 0.000000f, 0.291416f, 0.000000f, 0.000000f, 0.000000f, -0.201041f, 0.000000f, 0.014502f, 0.000000f, 0.000000f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_5D_nearest_zeros_no_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "nearest";
+  std::string padding_mode = "zeros";
+  int64_t align_corners = 0;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 3, 2};
+  std::initializer_list<float> X_data{0.317302f, 0.629807f, -0.470444f, 0.215051f, 2.234212f, -1.940229f, 0.577203f, -0.166697f, -0.023467f, -0.451050f, -2.199999f, 1.469197f, -1.758133f, -0.570410f, -1.040355f, -0.627640f, 1.398573f, 0.275127f, -0.333592f, -0.677762f, -0.247167f, -0.290725f, -0.986956f, 0.173983f, -0.971920f, 0.225261f, -0.626680f, 1.660835f, 0.972993f, 0.223424f, 2.283593f, -1.145964f, -0.851223f, -2.052948f, -1.351783f, -0.028922f, 0.394421f, 0.057878f, -0.668671f, -0.088841f, 0.560186f, -0.105506f, 0.277478f, 1.047901f, -0.564728f, -0.287761f, 0.653621f, 0.259766f, 1.629452f, -2.337903f, -0.276703f, 0.258084f, -0.552200f, -0.464470f, -0.412042f, -1.047346f, 0.169468f, 1.334588f, 0.580615f, 1.217562f, -2.487876f, -1.218598f, -0.256617f, 1.397251f, 0.694875f, 0.732315f, 0.574448f, 0.673838f, -1.870634f, -0.855206f, 1.068415f, 0.096061f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 3, 2, 3};
+  std::initializer_list<float> Grid_data{0.650046f, -0.680891f, -0.200337f, -1.006178f, -0.676990f, 0.500592f, -1.118072f, -0.684288f, 0.899676f, -0.615418f, -0.499387f, -0.336929f, 0.512951f, -0.787164f, 0.120318f, 0.490083f, -0.087112f, 0.216982f, -0.915417f, 0.542519f, 0.448475f, -0.150519f, -0.992244f, 0.479971f, 0.783050f, -0.209890f, 0.565605f, 0.444791f, -0.479961f, -0.083304f, 1.194526f, 0.005665f, -0.955336f, -0.087514f, 0.596991f, -0.391708f, -0.628420f, 0.988534f, 0.634814f, -0.203871f, 0.061307f, -0.126915f, 0.278599f, 0.042647f, -0.726162f, 0.222329f, 0.031386f, 0.077584f, -0.457305f, 0.307467f, -0.970375f, 0.358708f, 0.650272f, -0.132064f, -0.932160f, -0.004362f, 0.001704f, -1.037046f, -0.848754f, 1.109926f, 0.897382f, 0.665044f, 0.831311f, 0.461956f, 0.675346f, 0.794786f, -0.280329f, -0.152546f, 0.855656f, -0.000432f, -0.780824f, -0.930479f, 0.671131f, 0.993983f, 0.931935f, 0.199703f, 0.828337f, -1.101760f, -0.864556f, -1.154677f, 0.966824f, -0.010858f, -0.552558f, 0.406048f, -0.449199f, -0.769613f, 0.462838f, 0.219719f, -0.859342f, -0.790394f, 0.562644f, 0.912452f, 0.097688f, -0.602742f, 0.579449f, 0.209287f, -1.050575f, -0.777654f, 0.262652f, 0.742529f, -0.385517f, 0.580240f, -0.743175f, 1.148320f, 0.855053f, 0.224769f, 0.533871f, 0.417788f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 3, 2};
+  std::initializer_list<float> Y_data{-0.166697f, 0.000000f, 0.000000f, 0.317302f, -0.166697f, -0.451050f, 1.398573f, -1.758133f, -0.627640f, -0.166697f, 0.000000f, 2.234212f, 1.398573f, -0.023467f, 0.215051f, -0.451050f, -0.470444f, 1.469197f, 0.225261f, 0.000000f, 0.000000f, -0.333592f, 0.225261f, 1.660835f, -1.351783f, 2.283593f, -2.052948f, 0.225261f, 0.000000f, -0.986956f, -1.351783f, -0.626680f, -0.290725f, 1.660835f, -0.247167f, 0.223424f, -0.564728f, 0.000000f, -0.464470f, -0.464470f, -0.276703f, 0.394421f, -0.464470f, 0.000000f, 0.000000f, 1.629452f, 1.629452f, 0.057878f, 0.259766f, 0.653621f, 0.000000f, -2.337903f, 0.000000f, -0.464470f, -0.256617f, 0.000000f, 0.096061f, 0.096061f, -1.870634f, -0.412042f, 0.096061f, 0.000000f, 0.000000f, 0.574448f, 0.574448f, -1.047346f, 0.732315f, 0.694875f, 0.000000f, 0.673838f, 0.000000f, 0.096061f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_4D_nearest_border_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "nearest";
+  std::string padding_mode = "border";
+  int64_t align_corners = 1;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{0.660065f, 0.995767f, -0.226389f, 0.590604f, -2.628610f, 0.444899f, 0.023282f, 0.024018f, -0.584701f, 1.988638f, -0.023379f, 0.711650f, -1.062933f, -0.064113f, 1.178346f, -0.652373f, 1.259795f, 1.508661f, -0.079368f, 0.819443f, 0.836356f, -0.362184f, -1.153828f, -0.561180f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{-0.447651f, -0.521958f, 0.673539f, 0.222645f, 1.010165f, 0.451903f, 0.966699f, -0.966970f, 0.964714f, -0.551345f, -0.321222f, 0.007182f, -0.225038f, 0.237367f, 1.069316f, -0.716982f, 0.370785f, -0.964445f, 0.188419f, 0.988574f, 0.809140f, 1.027635f, 0.649589f, -0.099282f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{0.660065f, 0.590604f, 0.590604f, 0.995767f, 0.995767f, -0.226389f, 0.023282f, 1.988638f, 1.988638f, 0.024018f, 0.024018f, -0.584701f, 1.178346f, -0.064113f, -0.064113f, 1.508661f, 1.508661f, -0.652373f, 0.836356f, 0.819443f, 0.819443f, -0.561180f, -0.561180f, -0.362184f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_5D_nearest_border_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "nearest";
+  std::string padding_mode = "border";
+  int64_t align_corners = 1;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 3, 2};
+  std::initializer_list<float> X_data{-0.920922f, -0.560469f, -2.244605f, -0.061799f, 0.523656f, 0.110097f, -0.944521f, 0.818932f, 1.069286f, 0.611457f, -0.355875f, 1.664810f, 0.116694f, 2.318200f, 0.681699f, -0.792880f, -0.025672f, -0.592222f, 0.229768f, -0.521888f, 0.570937f, -0.029345f, -0.873323f, 1.721509f, 2.011626f, -0.310838f, 1.121670f, 0.778967f, -0.450894f, 1.030269f, 0.166967f, -0.244737f, 0.227200f, -0.416612f, -0.276513f, 0.714623f, 0.908783f, -1.393580f, -0.983675f, -0.366833f, 1.473970f, 0.624368f, -0.607720f, -0.523833f, -0.124702f, -0.766457f, -0.131027f, 2.227047f, 1.399269f, 0.053366f, -0.295771f, -0.283811f, 0.019280f, -0.104450f, -0.574185f, -2.130628f, 0.617878f, -1.728151f, -0.272528f, 1.299354f, -1.109310f, -1.881107f, -1.300843f, -0.765376f, -0.477722f, -1.230664f, -0.495792f, 1.061688f, 1.244247f, -0.550821f, -0.520524f, 1.541448f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 3, 2, 3};
+  std::initializer_list<float> Grid_data{-1.189605f, -0.312072f, 0.459409f, 1.033285f, -1.083635f, 0.572921f, -1.138649f, -1.147562f, -0.751493f, -0.158500f, 0.335153f, -0.912613f, 0.924528f, 1.085165f, 0.073832f, 0.976781f, -0.543258f, -0.474714f, -0.154854f, 0.131118f, -0.837104f, -0.960885f, 0.474040f, 0.345992f, 1.173923f, -0.489256f, 0.423768f, -0.484246f, 0.592379f, -0.066474f, 0.889570f, 0.666682f, 0.998817f, 0.616675f, 0.045084f, 1.034127f, -0.704858f, 1.131824f, 1.172625f, 1.146321f, -0.560545f, -0.635830f, 0.075922f, 0.373677f, 0.601953f, 0.488043f, 1.021787f, -0.300648f, -0.393688f, 0.402240f, 0.334401f, -0.699993f, 0.116070f, -0.911100f, -0.352043f, -0.470968f, 1.051900f, -1.080208f, -0.708510f, -1.174356f, 0.302647f, -0.923627f, 0.388249f, -0.833533f, -0.768697f, -0.613051f, 0.180083f, 1.102657f, 1.124055f, -0.090660f, -1.175396f, -0.396450f, -0.457333f, -0.255235f, 0.458506f, 0.603882f, 0.532050f, 0.342802f, -0.485794f, -0.012730f, 0.152721f, -0.612948f, -0.107348f, -0.149795f, -1.133775f, 0.813507f, -0.121323f, -1.037352f, 0.949408f, -0.645689f, 0.424853f, 1.190055f, 0.055551f, 0.345244f, 0.476794f, 0.906949f, -0.368187f, -0.675263f, -0.093908f, 0.938461f, 0.103178f, 0.833774f, -0.008922f, 0.368184f, 0.041727f, 0.032575f, -1.141943f, -1.049081f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 3, 2};
+  std::initializer_list<float> Y_data{1.069286f, 2.318200f, -0.920922f, -2.244605f, 1.664810f, 0.818932f, -2.244605f, 1.069286f, 0.611457f, -0.355875f, -0.592222f, -0.792880f, -0.025672f, -0.560469f, -0.792880f, 1.664810f, 1.069286f, -2.244605f, 1.121670f, -0.244737f, 0.229768f, 0.570937f, 1.030269f, -0.310838f, 0.570937f, 1.121670f, 0.778967f, -0.450894f, 0.714623f, -0.416612f, -0.276513f, -0.521888f, -0.416612f, 1.030269f, 1.121670f, 0.570937f, -0.295771f, 0.908783f, -0.523833f, 0.908783f, -0.104450f, -0.607720f, -0.124702f, 2.227047f, -0.124702f, -0.124702f, -0.131027f, 1.473970f, 2.227047f, -0.283811f, -0.607720f, -0.283811f, -0.124702f, -1.393580f, 1.244247f, -0.574185f, -1.881107f, -0.574185f, 1.541448f, -1.109310f, -1.300843f, -1.230664f, -1.300843f, -1.300843f, -0.477722f, -0.272528f, -1.230664f, -0.550821f, -1.109310f, -0.550821f, -1.300843f, -2.130628f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_4D_nearest_border_no_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "nearest";
+  std::string padding_mode = "border";
+  int64_t align_corners = 0;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{0.950589f, -1.656624f, 0.767704f, -0.650720f, -1.404308f, -0.531582f, -0.280854f, 0.344309f, -0.959146f, -0.115645f, 0.515696f, -0.114243f, 1.971614f, 0.274268f, 0.543080f, -1.758563f, 1.771011f, 0.934901f, 0.695798f, 1.905137f, 1.598307f, 1.108385f, 0.156008f, 1.290824f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{0.482490f, -0.910951f, -0.001676f, -0.442514f, 0.580438f, 1.039346f, -0.159076f, -0.603960f, -0.922037f, -0.705026f, 0.346468f, 0.275332f, 0.646235f, -0.178307f, 0.616600f, -1.069108f, 0.322583f, 1.164952f, -1.187638f, -0.622953f, 0.768203f, -0.187618f, -0.639652f, 0.732078f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{-1.656624f, 0.950589f, -0.531582f, 0.950589f, 0.950589f, -0.650720f, 0.344309f, -0.280854f, -0.114243f, -0.280854f, -0.280854f, -0.115645f, -1.758563f, 0.274268f, 0.934901f, 1.971614f, -1.758563f, 1.771011f, 1.108385f, 1.905137f, 1.290824f, 0.695798f, 1.108385f, 0.156008f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_5D_nearest_border_no_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "nearest";
+  std::string padding_mode = "border";
+  int64_t align_corners = 0;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 3, 2};
+  std::initializer_list<float> X_data{0.465448f, -0.337086f, -0.870849f, -0.389573f, -0.083941f, 1.306894f, 0.719508f, -0.203690f, -1.143864f, 1.163003f, 0.312170f, -2.008687f, 1.731257f, -0.270431f, 1.095352f, -1.673520f, 0.492743f, 0.521962f, -1.938783f, -0.186813f, -0.836257f, -1.835450f, 0.476500f, -0.123386f, 0.246604f, 1.374159f, -0.158435f, 1.268192f, -0.704226f, -0.195314f, -0.277259f, 0.582961f, -0.340940f, 0.192264f, 0.463124f, -2.719402f, -0.593470f, -1.165777f, 0.566071f, 1.622836f, -0.886798f, 1.874877f, -0.849095f, 0.550185f, 0.604298f, 0.073976f, -0.800372f, -0.097283f, -1.576251f, -0.633278f, -1.776745f, -0.827586f, 0.665697f, 0.884698f, 0.467112f, -0.645219f, -0.510110f, 0.032418f, -1.056009f, -0.206175f, -0.173385f, 0.947787f, 1.937234f, 0.615880f, -0.311580f, 0.770921f, -0.841602f, 1.796220f, 0.479491f, 1.609346f, 1.113868f, -0.453360f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 3, 2, 3};
+  std::initializer_list<float> Grid_data{-0.151540f, -0.033291f, -0.597203f, 0.836404f, -0.686848f, -0.485355f, -0.936738f, -1.009057f, 1.065352f, -0.926635f, -0.165670f, -0.347352f, 0.439545f, 0.320963f, -0.919909f, 1.077689f, -1.195359f, 0.118687f, -0.100253f, -0.278089f, 0.817760f, 1.013180f, 0.156316f, -0.423839f, 0.892139f, 0.753924f, 0.215530f, -0.328214f, 0.050592f, 1.069553f, 0.130134f, -0.236478f, -1.015986f, -0.643059f, 0.866682f, -0.042256f, -0.079912f, 0.467233f, -0.789513f, -0.081063f, -0.337505f, 0.627865f, 0.976589f, 0.753489f, 0.894667f, -1.072442f, -0.426020f, 0.142099f, -1.019226f, 0.325527f, -0.786578f, 0.514215f, 0.971223f, -1.026539f, 1.005531f, 0.559922f, -0.791906f, 1.148613f, -1.039306f, -0.807864f, -0.596935f, -0.060766f, 0.215484f, -0.352165f, -1.137417f, -0.138518f, 0.910459f, 0.923925f, 0.600710f, 0.174227f, 0.298169f, -0.925092f, 0.485927f, -1.194283f, -0.495564f, -0.315357f, 0.881199f, -0.034981f, -0.546611f, 0.209651f, -0.995724f, -0.317709f, 0.332343f, -0.079474f, -0.126024f, 0.733410f, -0.911554f, -0.605911f, 1.161566f, 0.238787f, -0.194293f, 0.621583f, 0.721901f, -0.200521f, -0.499850f, -0.196149f, 0.435730f, -0.153196f, 0.698401f, -0.978582f, -0.588758f, 0.914808f, 0.157427f, 0.241646f, 0.394674f, -0.283552f, -0.479889f, 0.344261f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 3, 2};
+  std::initializer_list<float> Y_data{-0.870849f, -0.337086f, 1.731257f, -0.870849f, -0.389573f, -0.203690f, 1.095352f, -0.389573f, -2.008687f, 1.095352f, -0.389573f, 0.312170f, -0.083941f, 1.731257f, 0.521962f, 0.719508f, -0.870849f, 1.306894f, -0.836257f, -0.186813f, -0.277259f, -0.836257f, -1.835450f, 1.374159f, -0.340940f, -1.835450f, -0.195314f, -0.340940f, -1.835450f, -0.704226f, 0.476500f, -0.277259f, -2.719402f, 0.246604f, -0.836257f, -0.123386f, 1.874877f, -1.165777f, 0.604298f, -0.849095f, 0.884698f, 1.622836f, -1.165777f, -0.800372f, 0.566071f, 0.604298f, -0.886798f, -0.800372f, 0.665697f, -0.849095f, -0.827586f, -1.576251f, -0.827586f, -1.576251f, -0.206175f, -0.645219f, 1.937234f, -0.173385f, -0.453360f, 0.032418f, -0.645219f, -0.311580f, -0.510110f, 1.937234f, -1.056009f, -0.311580f, 1.113868f, -0.173385f, 1.609346f, -0.841602f, 1.609346f, -0.841602f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_4D_nearest_reflection_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "nearest";
+  std::string padding_mode = "reflection";
+  int64_t align_corners = 1;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{0.079043f, 0.407494f, 1.038992f, -0.437542f, 0.991216f, 0.409636f, 1.050403f, -0.687172f, -2.021689f, 0.789633f, 0.538178f, 0.414847f, 2.221617f, -0.254833f, -0.179968f, -0.952356f, -1.213159f, 0.499103f, -0.374865f, 0.441938f, -0.114847f, 0.716887f, 1.059090f, 0.438870f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{0.355147f, -0.222342f, -1.197658f, 0.844060f, 1.188586f, 0.605435f, 1.174232f, 0.327060f, -0.094032f, -0.955794f, -1.048806f, -0.826196f, -0.304468f, 0.698768f, -0.495101f, -0.046607f, -0.016936f, -0.784415f, -0.032484f, 1.158664f, 0.959105f, 0.913943f, -0.118352f, 0.021282f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{-0.437542f, 0.991216f, 0.409636f, -0.437542f, 0.079043f, 0.079043f, 0.789633f, 0.538178f, 0.414847f, 0.789633f, 1.050403f, 1.050403f, -1.213159f, -0.179968f, 2.221617f, -1.213159f, 0.499103f, -0.179968f, 1.059090f, -0.114847f, -0.374865f, 1.059090f, 0.438870f, -0.114847f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_5D_nearest_reflection_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "nearest";
+  std::string padding_mode = "reflection";
+  int64_t align_corners = 1;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 3, 2};
+  std::initializer_list<float> X_data{0.189379f, 0.825309f, -0.701365f, 0.787800f, -1.102514f, 0.126954f, 1.824453f, -0.144635f, -1.712534f, 0.361739f, -0.462516f, -2.153102f, 0.536963f, 0.581639f, -1.325014f, -1.314673f, -0.524797f, -1.304159f, -1.093757f, -1.703444f, -0.672976f, 0.505303f, 1.497654f, -0.545441f, -1.334648f, 0.474489f, 0.484384f, 0.434399f, -0.733471f, 0.452991f, 0.324606f, -1.307459f, -0.640603f, -0.450100f, 0.772854f, 1.281813f, -0.481714f, 1.224667f, -0.437546f, 0.371986f, -0.320368f, -1.011020f, -1.199298f, 0.213302f, 1.795444f, 0.409271f, 1.328065f, -1.037527f, 0.224494f, 0.217863f, -0.925740f, 0.344755f, -1.445667f, -0.935542f, -0.427280f, -2.010803f, -1.174929f, 1.434105f, -1.168630f, 0.321896f, -0.561974f, -0.209305f, -1.063838f, 1.451708f, 0.266913f, -0.132535f, 0.798299f, 0.619547f, -0.324459f, 0.255630f, 0.488773f, -0.142060f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 3, 2, 3};
+  std::initializer_list<float> Grid_data{-0.034431f, 1.048250f, 0.160255f, -0.446426f, 0.879791f, -0.683555f, 0.039704f, 0.269729f, 0.538601f, -1.107191f, 0.058867f, -0.310704f, 0.778040f, 0.403733f, 0.480956f, 0.721512f, -0.268657f, -0.076883f, 0.962704f, -0.967187f, -0.829464f, 0.087786f, -0.475353f, 0.068725f, 1.060032f, -0.139108f, -1.023162f, -0.545493f, 1.102040f, -0.263627f, -0.526173f, 0.540152f, 0.148556f, -1.058015f, 0.999344f, 0.675750f, 1.043022f, 0.525119f, -0.404585f, -0.391737f, 0.581547f, -0.232625f, 0.235264f, -1.162786f, -0.593187f, 0.445737f, -0.059159f, -0.576901f, -1.046721f, 0.762672f, -0.241271f, -1.179040f, 1.157741f, 0.583952f, -0.717767f, -0.875798f, 1.159575f, 0.005010f, -0.721707f, 0.690536f, -0.249959f, 0.082204f, -0.625120f, -1.016394f, -0.796947f, -0.131764f, -0.868737f, 1.182731f, 0.012988f, -0.459398f, 0.474264f, -1.063883f, -0.613791f, 0.450721f, -1.019595f, 0.598084f, 0.100866f, -1.000569f, -1.190919f, 0.379261f, 0.567202f, -0.239888f, -1.061107f, -0.691616f, 0.127540f, 0.043657f, 0.307172f, 0.212184f, -0.062900f, 0.633272f, 1.164016f, 0.999377f, 1.090411f, -0.405004f, -0.409578f, -0.132722f, 0.354671f, 0.485734f, -0.106963f, -0.775112f, -0.905400f, 1.155262f, -0.322627f, -0.162203f, -0.735432f, -0.594912f, 0.263568f, 0.505424f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 3, 2};
+  std::initializer_list<float> Y_data{-0.462516f, -1.102514f, -1.314673f, -1.712534f, 0.361739f, 0.361739f, 0.825309f, 0.361739f, 0.787800f, -0.462516f, -0.462516f, -0.524797f, -2.153102f, -0.462516f, 0.825309f, 0.787800f, -0.462516f, -0.524797f, -0.733471f, 1.497654f, -0.450100f, 0.484384f, 0.434399f, 0.434399f, -1.703444f, 0.434399f, 0.505303f, -0.733471f, -0.733471f, 0.772854f, 0.452991f, -0.733471f, -1.703444f, 0.505303f, -0.733471f, 0.772854f, 0.224494f, 0.217863f, -0.437546f, -1.199298f, 1.328065f, -0.437546f, -0.437546f, 0.371986f, -0.925740f, -0.481714f, 0.409271f, 0.344755f, -0.935542f, 1.795444f, 0.409271f, 0.224494f, -0.437546f, -0.925740f, 0.798299f, 0.619547f, -1.174929f, -0.561974f, 0.266913f, -1.174929f, -1.174929f, 1.434105f, -0.324459f, -0.427280f, 1.451708f, 0.255630f, -0.142060f, -1.063838f, 1.451708f, 0.798299f, -1.174929f, -0.324459f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_4D_nearest_reflection_no_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "nearest";
+  std::string padding_mode = "reflection";
+  int64_t align_corners = 0;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{-0.769854f, -0.805659f, 0.813652f, -0.010183f, 0.276463f, -0.771678f, -2.563015f, -1.243904f, 2.365071f, 0.730651f, -0.068795f, -1.495438f, 0.211578f, -1.042373f, 0.884036f, -0.746288f, 1.011368f, 0.194463f, -0.307214f, 0.556053f, 0.629364f, 0.083601f, 0.248627f, -0.822453f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{0.569884f, 1.163780f, -0.977608f, -0.145509f, 0.651234f, 1.099753f, -0.853766f, 0.509955f, 0.495437f, 0.723445f, -0.827299f, 0.856340f, -0.522676f, -0.738659f, 0.238269f, 1.016568f, -0.794666f, 0.640690f, -0.137431f, 0.383085f, 0.936085f, 0.325824f, -0.996188f, -0.361291f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{-0.771678f, 0.813652f, -0.771678f, 0.276463f, -0.771678f, 0.276463f, -1.495438f, 2.365071f, -1.495438f, -0.068795f, -1.495438f, -0.068795f, 0.211578f, 0.194463f, 1.011368f, 1.011368f, -0.746288f, 0.211578f, -0.307214f, -0.822453f, 0.248627f, 0.248627f, 0.083601f, -0.307214f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_5D_nearest_reflection_no_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "nearest";
+  std::string padding_mode = "reflection";
+  int64_t align_corners = 0;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 3, 2};
+  std::initializer_list<float> X_data{-0.185898f, 0.403325f, 0.737314f, 0.545995f, -1.010481f, -1.204522f, -0.147342f, 0.232425f, -1.339485f, 0.013892f, -1.098319f, 0.478079f, 0.051159f, -0.906061f, -0.428560f, 0.583460f, 1.137472f, 1.487881f, 1.349931f, -0.118774f, 0.436410f, 1.334689f, -1.115846f, 0.159820f, 0.617671f, 0.546630f, 1.861115f, 0.500044f, 0.623446f, 0.541840f, -0.279259f, -0.573875f, 0.783115f, -1.125017f, -1.166457f, -0.827232f, 0.273074f, 0.702953f, 1.288608f, -1.037043f, 0.021860f, 0.575628f, -0.034170f, 1.400741f, 0.508057f, 0.994702f, -2.267981f, 1.677437f, 0.175134f, 0.712679f, -0.440408f, -1.248550f, 1.618839f, -0.214598f, 0.486398f, -0.478466f, 0.912471f, 0.499651f, -0.886606f, -0.929524f, 0.449260f, 0.017969f, -0.050906f, 1.799695f, -0.033007f, -1.884108f, -1.392415f, -0.852990f, -0.052969f, 0.819434f, 0.089723f, 0.598047f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 3, 2, 3};
+  std::initializer_list<float> Grid_data{-0.118828f, 0.082315f, 0.328488f, -0.834821f, -0.138863f, -0.988801f, -0.976128f, 0.156412f, -1.171383f, 0.319534f, -1.105438f, -0.834991f, -0.248995f, -1.145138f, 0.969159f, 0.983228f, -0.626795f, 0.251376f, 0.613890f, 0.381328f, -0.160747f, -1.131853f, 0.872567f, -1.052516f, -0.222240f, 0.074438f, -0.395210f, -0.438906f, -1.037125f, 0.066119f, -0.136254f, 1.046163f, -0.395065f, 0.927498f, 0.056808f, -0.539139f, -0.285382f, -0.136177f, 0.012430f, -0.197703f, 0.356128f, 0.988219f, 0.188620f, 0.434655f, 0.741024f, 0.258662f, 0.553165f, 0.629461f, 1.123216f, -1.095185f, 0.410630f, -0.054374f, -0.215508f, -0.462650f, 0.721441f, 1.097745f, -0.979308f, 0.648336f, 0.827460f, 0.209729f, 0.014136f, 0.923431f, 0.035578f, -0.299309f, -0.088614f, 0.385002f, 0.300407f, -0.064744f, 0.378800f, 0.323185f, -0.972071f, 0.299012f, 0.734213f, 0.137618f, -0.109532f, 0.919238f, -1.048417f, -0.547724f, -0.542389f, 1.036863f, -1.160666f, 0.119013f, -1.162427f, -0.039461f, 0.447285f, -0.280625f, 1.164882f, 0.003820f, -0.611796f, 0.309439f, 0.624077f, -0.002384f, 1.026569f, -0.759499f, 0.512014f, 0.681403f, 0.596030f, -0.000440f, 0.342557f, -0.941414f, -0.941707f, -0.074588f, -0.150400f, 0.891031f, 0.871352f, 0.813657f, -0.549640f, -0.942044f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 3, 2};
+  std::initializer_list<float> Y_data{-1.339485f, 0.737314f, 0.737314f, 0.403325f, 0.051159f, 0.232425f, 0.478079f, -1.010481f, 0.737314f, -0.147342f, -1.010481f, 0.545995f, -1.339485f, 1.137472f, 1.487881f, 1.487881f, -0.906061f, 0.737314f, 1.861115f, 0.436410f, 0.436410f, -0.118774f, -0.279259f, 0.546630f, 0.541840f, -1.115846f, 0.436410f, 0.617671f, -1.115846f, 1.334689f, 1.861115f, -1.166457f, -0.827232f, -0.827232f, -0.573875f, 0.436410f, 0.575628f, 1.677437f, 1.677437f, -0.440408f, -1.248550f, 1.400741f, 0.994702f, 0.702953f, 0.021860f, 1.400741f, -1.248550f, 1.400741f, -1.248550f, 1.618839f, -1.248550f, -0.034170f, 1.618839f, 0.702953f, -0.929524f, -1.884108f, -1.884108f, -0.052969f, 0.819434f, 0.017969f, 1.799695f, -0.478466f, -0.886606f, 0.017969f, 0.819434f, 0.017969f, 0.819434f, 0.089723f, 0.819434f, 0.449260f, 0.089723f, -0.478466f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_4D_bilinear_zeros_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "linear";
+  std::string padding_mode = "zeros";
+  int64_t align_corners = 1;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{0.010274f, 1.493496f, -0.264303f, 0.035897f, -0.751962f, -0.370195f, -0.514836f, 0.399928f, -0.191651f, -0.239505f, -1.931184f, -1.074773f, -0.121908f, 0.050673f, -0.741501f, -0.229127f, -0.360925f, 0.264077f, 1.537180f, 1.603202f, -1.241810f, -0.388456f, -0.609742f, 0.095097f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{-0.118589f, -0.020968f, -0.893597f, 1.170924f, -0.517539f, 0.698168f, -0.672718f, 0.008056f, 0.410793f, -1.101817f, 0.550440f, -0.918534f, 0.167456f, -0.237959f, 0.687868f, 1.166281f, 0.270439f, -0.034265f, -0.594534f, 0.447403f, -0.577587f, 0.495680f, -0.520113f, 0.813977f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{-0.115313f, -0.606595f, -0.518616f, -0.218999f, 0.948961f, 1.063015f, -0.210622f, -1.563324f, -1.265386f, -0.212304f, 0.117155f, 0.159843f, -0.342175f, 0.138844f, -0.402196f, -0.457139f, -0.432849f, -0.286783f, -0.191760f, -0.012426f, -0.621658f, -0.799488f, -0.763820f, -0.551571f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_5D_bilinear_zeros_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "linear";
+  std::string padding_mode = "zeros";
+  int64_t align_corners = 1;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 3, 2};
+  std::initializer_list<float> X_data{-1.787070f, -0.894227f, -0.113069f, 0.713917f, 0.041566f, -1.847208f, 0.013441f, -1.439041f, 1.051864f, 1.576791f, 1.180527f, -1.457019f, 0.298446f, 1.142738f, -0.961347f, -0.471509f, -0.074154f, 0.047739f, -0.679950f, -2.306940f, -0.552171f, -0.357144f, -0.492247f, -0.455872f, 0.399680f, 0.057915f, -0.362704f, 1.083763f, -0.084941f, -1.691393f, -1.913178f, 0.696366f, 1.172833f, 0.901506f, -1.189840f, -1.197158f, 0.007338f, 0.161468f, -1.048452f, -0.480832f, 0.391235f, 1.056413f, -0.116648f, 0.632195f, 0.840261f, -2.187738f, 0.302910f, -0.956190f, -0.362645f, 0.771747f, 0.524840f, -0.954672f, -1.084612f, -0.525794f, -0.969691f, -1.056405f, -0.364709f, 0.336189f, -0.178281f, 1.015025f, -0.532580f, 0.036602f, -0.434395f, -1.208987f, -1.084039f, 0.642844f, -0.819208f, -0.982898f, -0.109210f, -1.231957f, 1.083089f, -0.870451f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 3, 2, 3};
+  std::initializer_list<float> Grid_data{0.350638f, -0.554259f, 0.740901f, -1.134597f, -0.450763f, -0.706065f, -0.712365f, -0.727142f, -1.130749f, 0.205940f, -0.237380f, -1.010413f, -0.000494f, -0.199898f, 0.495032f, -0.939943f, -0.337590f, 0.247001f, 0.508664f, 0.090780f, 0.325198f, 1.199561f, -0.415694f, 0.817854f, 1.033666f, -1.061540f, 0.290273f, 0.679739f, -0.187185f, 0.662278f, 0.040817f, 0.913540f, 0.025838f, -0.768267f, 0.911326f, 0.356885f, 1.020923f, 0.297892f, 0.637209f, 0.748214f, 0.202064f, -0.278959f, 0.247841f, -0.836700f, 0.040996f, -0.385697f, 0.075869f, -0.950110f, 0.733227f, -1.107135f, 0.513890f, 0.790272f, -1.099795f, 1.084212f, -0.892061f, -0.235640f, 0.621837f, -0.380523f, 1.069422f, -0.529383f, -0.160661f, -0.784422f, -0.556715f, 1.171015f, 0.902476f, 0.088357f, 0.098667f, -1.018314f, 0.905937f, -0.179914f, -0.500513f, -0.954987f, 0.986618f, 0.569025f, 0.722795f, 0.124254f, -0.814285f, 0.491561f, 0.138395f, 0.402690f, -0.298810f, -0.566298f, 0.985118f, 0.402260f, -0.487031f, 0.107159f, -0.260850f, -0.102620f, 0.672911f, -0.955102f, 1.086040f, 0.807667f, 0.001031f, -0.490841f, 0.244670f, -0.794290f, 0.779461f, -0.634633f, 0.229290f, -1.180597f, 0.574650f, 0.812338f, 0.900697f, 0.097950f, 0.708525f, 0.409153f, 0.804739f, 0.677169f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 3, 2};
+  std::initializer_list<float> Y_data{0.171946f, -0.411342f, -1.046998f, -0.002345f, 0.246533f, 0.396970f, 0.664278f, 0.199883f, -0.636287f, 0.162358f, -0.061161f, 0.528084f, 0.041846f, 0.750291f, -0.476442f, 0.142258f, -0.067844f, 0.869081f, 0.360025f, -0.406785f, -0.701985f, -0.718142f, 0.519179f, -0.022693f, 0.618451f, 0.708731f, 0.224429f, 0.784241f, -0.812606f, -0.521137f, 0.266524f, 0.190886f, 0.231077f, -0.465330f, 0.204730f, 0.348489f, 0.356190f, 0.256096f, -0.038212f, -0.943162f, 0.258902f, -0.360112f, -0.920536f, 0.126677f, -0.523600f, -0.361337f, -0.154168f, 0.179761f, -1.141155f, -0.423488f, -0.225410f, -0.204886f, -1.162816f, -0.678226f, -0.384409f, -0.146245f, -0.622531f, 0.312188f, -0.828836f, -0.541017f, -0.778291f, -0.602484f, -0.328754f, -0.163964f, -0.508068f, 0.193021f, 0.273133f, -0.217934f, -0.562420f, 0.287725f, -1.097279f, -0.306201f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_4D_bilinear_zeros_no_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "linear";
+  std::string padding_mode = "zeros";
+  int64_t align_corners = 0;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{0.185965f, 0.133937f, -0.763030f, 0.733342f, 1.932445f, -0.582571f, -1.312078f, 0.738952f, 0.444459f, 0.742593f, -0.805960f, -0.202535f, 0.970323f, -0.801176f, 0.277655f, -1.938051f, -1.879800f, 0.287116f, 0.261958f, -0.358247f, -0.107750f, 0.748162f, -0.742330f, 0.344665f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{-0.460252f, 0.734353f, -1.069308f, 1.005361f, 1.198595f, -0.327629f, 0.474026f, 1.196645f, 0.361782f, 0.469280f, 0.440632f, -0.490951f, 0.292918f, -0.639568f, 1.024697f, -0.514217f, 0.274326f, -0.347614f, 0.600117f, 0.019780f, 0.659824f, -0.324940f, -0.704174f, 0.460072f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{1.646426f, 0.409452f, 0.132247f, -0.106052f, -0.009495f, 0.270785f, -0.702581f, -0.170769f, 0.223282f, -0.044740f, 0.006388f, 0.645576f, -0.476802f, -0.504368f, -0.897503f, -1.684608f, -1.162742f, -0.963921f, -0.197266f, -0.050021f, 0.151796f, 0.662485f, 0.175502f, -0.434265f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_5D_bilinear_zeros_no_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "linear";
+  std::string padding_mode = "zeros";
+  int64_t align_corners = 0;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 3, 2};
+  std::initializer_list<float> X_data{-0.299262f, -0.304887f, 0.906636f, -0.392850f, -0.050410f, 0.548199f, -1.235108f, -0.475848f, 0.635455f, 0.307462f, -1.241370f, -0.538672f, 0.863466f, 0.799983f, -0.090064f, -0.751721f, 0.956040f, -0.117709f, -2.183699f, -0.484444f, 1.105900f, 0.164466f, 0.720736f, 0.168044f, -0.656400f, 1.770106f, -0.544832f, 1.358424f, 0.981648f, -1.759268f, -0.526924f, 1.322339f, 0.148774f, 0.321413f, -1.257438f, -0.383775f, -2.117908f, -0.077921f, -0.197889f, 0.555813f, -1.517724f, 1.419652f, -0.891774f, 1.684663f, -1.524669f, -2.055758f, -0.299843f, -0.644860f, 0.428609f, -1.704372f, 1.257671f, -0.886508f, -0.029344f, -1.718824f, -0.294273f, 1.537690f, -1.366837f, -1.610098f, 0.650240f, -0.288219f, 0.837292f, 0.431683f, -0.405852f, 0.492271f, 0.416507f, 0.971658f, -0.183526f, 0.615709f, -0.081615f, 1.160796f, 1.431487f, 0.485687f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 3, 2, 3};
+  std::initializer_list<float> Grid_data{0.884040f, -0.825214f, 0.496720f, -0.440955f, 1.195811f, 0.169268f, -1.042100f, 0.206524f, 0.145895f, -1.160650f, 0.240829f, 1.144915f, 0.345332f, -0.006382f, -0.248763f, 0.318888f, -0.534619f, 1.181719f, 1.037350f, 0.560600f, -0.446974f, -1.126746f, -0.690807f, 1.166754f, -1.101454f, -1.145775f, -0.086488f, 0.381780f, -1.194351f, -1.114106f, 0.006524f, -0.402521f, 0.836016f, 0.344533f, -1.041627f, -1.081571f, 0.824102f, -0.212785f, -0.524949f, 0.377977f, -0.235842f, 0.573897f, 0.304308f, -0.519568f, -0.961787f, 0.649611f, -0.720973f, -0.132725f, 0.164074f, -0.698360f, 0.653669f, -0.844065f, 0.294728f, 0.128341f, 0.440293f, -1.177701f, 0.069319f, 0.585007f, -0.768260f, 0.296941f, 0.004702f, 1.018020f, -0.254096f, 0.008198f, -0.521925f, -0.295744f, 0.343532f, -1.157334f, 0.910329f, 0.862921f, 0.508195f, 0.898317f, -0.373544f, 0.273330f, 0.061050f, -0.829794f, -0.461335f, -0.426012f, -0.296704f, -1.065526f, -0.843948f, -0.113955f, -0.182548f, -1.089296f, 0.256401f, 0.653393f, 0.999377f, 1.009925f, -0.838519f, -0.384579f, -0.569276f, 0.220093f, 0.321562f, 0.266984f, 0.701244f, 0.633093f, -0.644096f, 0.823778f, 0.809482f, 0.158802f, -1.044029f, -0.735991f, 0.334411f, 0.414891f, 1.118940f, 0.610743f, 0.434932f, -0.040928f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 3, 2};
+  std::initializer_list<float> Y_data{0.222880f, -0.137918f, 0.042779f, 0.027606f, 0.146833f, 0.119531f, 0.062001f, 0.077615f, -0.124874f, -0.020856f, 0.248748f, -0.050235f, -0.185885f, -0.124030f, -0.148987f, -0.345107f, 0.753440f, -0.055873f, 0.674388f, 0.063018f, -0.054480f, -0.034452f, 0.780917f, 0.193151f, -0.140647f, -0.047364f, -0.095816f, -0.046983f, 0.254384f, -0.123703f, 0.191358f, 0.674903f, -0.311971f, 1.032054f, 0.672506f, 0.009147f, 0.281933f, 0.135835f, -0.145082f, -0.392560f, -0.229593f, -0.632284f, -0.936929f, -0.916689f, -0.502247f, -0.108609f, -0.645451f, 0.242939f, -0.165902f, -1.220095f, -0.015084f, -0.300940f, -0.352557f, -0.886474f, 0.109150f, 0.398365f, 0.235757f, 0.358618f, 0.082189f, 0.268617f, 0.077955f, -0.157573f, 0.023048f, -0.346908f, 0.360128f, 0.389098f, 0.122882f, 0.675956f, 0.735857f, 0.354858f, 0.244544f, 0.631102f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_4D_bilinear_border_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "linear";
+  std::string padding_mode = "border";
+  int64_t align_corners = 1;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{-1.916003f, 0.150784f, -0.179898f, 0.402727f, -0.549764f, 1.772484f, 1.014343f, 0.502823f, 0.976771f, -0.071957f, 0.519875f, 0.408665f, 1.435640f, -0.807775f, -0.181661f, -0.574026f, -0.335351f, -0.155602f, 0.348749f, 1.055618f, 0.737784f, -0.394725f, 0.597608f, 0.006105f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{-0.189838f, -1.050410f, -1.072351f, -0.930754f, -0.502573f, 0.186642f, -0.564332f, -0.042774f, -0.143740f, 1.097448f, -0.547044f, 1.127440f, -0.921224f, -1.001202f, 0.390232f, -0.698394f, 0.615509f, -0.663897f, 0.944958f, 1.161950f, 0.076823f, 0.256464f, 1.118784f, 0.711380f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{-1.078787f, -1.795786f, -0.023270f, -0.113413f, 0.444460f, -0.023826f, 0.807136f, 1.011742f, 0.674182f, 0.754935f, 0.472262f, 0.494688f, 1.347277f, -0.223507f, -0.417529f, -0.160549f, -0.353331f, -0.276367f, 0.376591f, 0.571813f, 0.551111f, 0.022384f, 0.166782f, -0.109583f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_5D_bilinear_border_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "linear";
+  std::string padding_mode = "border";
+  int64_t align_corners = 1;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 3, 2};
+  std::initializer_list<float> X_data{-0.332555f, 0.980958f, 0.002632f, -1.976749f, 0.979548f, 1.109773f, -0.534887f, 0.705692f, -0.143637f, -0.600830f, 0.315853f, -0.604687f, -0.300652f, -0.375240f, 0.377196f, -0.140920f, 1.159946f, 2.364598f, 0.320719f, 0.397938f, -0.680097f, -1.201632f, 0.270077f, -0.036712f, -0.972864f, 0.792393f, -1.159168f, -0.016679f, -0.665027f, 0.809646f, -1.684452f, 0.049476f, 0.065748f, 0.279619f, -1.079668f, 0.301309f, 1.010100f, -0.119015f, -0.104838f, 0.916627f, -0.522838f, 0.485269f, -1.221088f, 2.044754f, -0.669823f, 0.128370f, 0.080480f, 0.372679f, -0.046427f, -0.732652f, -0.395790f, 0.012594f, -0.170518f, -0.706783f, -0.862588f, -1.177275f, -1.165262f, 0.914826f, -0.661128f, -0.386656f, -0.599246f, 0.544643f, 0.930679f, -1.146137f, 0.212913f, -0.022433f, 1.692830f, 0.187511f, -0.631569f, -0.311540f, -0.885167f, -0.429959f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 3, 2, 3};
+  std::initializer_list<float> Grid_data{-0.453992f, 0.394222f, 0.755023f, -0.025610f, 0.658840f, 0.982105f, -0.642922f, -0.265292f, -1.080379f, 0.275464f, 0.855228f, -0.233029f, 0.191483f, 0.383441f, -0.025595f, 0.932929f, 0.174866f, -1.179535f, -0.990943f, -1.188918f, 0.049460f, 0.648682f, -0.158317f, 1.078936f, -0.215883f, 0.245340f, 1.082089f, 0.607310f, -0.038283f, 1.155868f, -0.716957f, 0.446971f, 0.757844f, -0.743030f, -1.127212f, 0.383835f, -0.455267f, -0.605570f, 0.238686f, -0.870514f, 1.079285f, -0.107719f, -0.384303f, 1.003178f, 0.334130f, 0.228627f, -0.573757f, 1.143690f, -0.365482f, 0.998076f, -0.088210f, 0.601965f, 0.843747f, -0.893403f, -0.799804f, -1.186625f, 0.865515f, 1.031983f, -0.438564f, -0.587735f, 0.200868f, 0.646055f, 0.296203f, -0.250092f, -0.763290f, 1.026321f, -0.777136f, -1.159559f, -0.479127f, 0.239290f, 0.446029f, 0.464001f, -0.695158f, -0.460548f, -0.533616f, -0.581111f, -1.010728f, 0.245640f, -0.348981f, -1.155007f, -0.700701f, -0.720655f, -0.517635f, -0.741485f, -0.208103f, 0.430035f, -0.971177f, -0.102798f, -0.345348f, -0.613510f, -0.266458f, -0.508597f, 0.038577f, -0.866220f, 0.227567f, 1.101759f, 0.994334f, -0.538031f, 0.369874f, -1.134245f, 1.010332f, -1.195878f, -1.072351f, -1.077155f, -1.114385f, 0.162516f, -0.317319f, 0.287217f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 3, 2};
+  std::initializer_list<float> Y_data{0.517362f, 1.168304f, -0.283719f, -0.056944f, -0.345007f, -1.383013f, -0.517978f, -0.099340f, 0.531814f, -0.051495f, 0.570203f, -0.350444f, -0.195512f, 0.335075f, 0.533103f, -0.173681f, 0.110927f, 0.549661f, -0.303447f, -0.209369f, -0.479343f, 0.113517f, -0.222508f, -0.981697f, -1.000072f, 0.163343f, -0.019158f, 0.217390f, -0.442252f, -1.020732f, -0.645033f, -0.481248f, -0.359233f, -0.271288f, -0.165768f, -0.092544f, -0.219889f, 0.671201f, -0.041137f, -0.289275f, -0.022793f, -0.130253f, -0.072692f, -0.451858f, 0.402947f, 0.168711f, 0.110811f, 0.202315f, -0.200036f, -0.331588f, 0.583341f, -0.522838f, 1.010100f, -0.018650f, 1.269564f, -0.168394f, -0.209390f, 0.740205f, -0.675828f, -0.325915f, -0.404694f, 0.067064f, -0.744102f, -0.639736f, -0.416580f, -0.317643f, 0.004590f, -0.665815f, -0.163600f, -0.661128f, -0.862588f, -0.132515f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_4D_bilinear_border_no_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "linear";
+  std::string padding_mode = "border";
+  int64_t align_corners = 0;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{-0.050553f, -0.825690f, -0.616085f, 0.337113f, 0.370334f, -0.105073f, -0.565382f, 0.396842f, -0.373193f, -0.780451f, -1.932970f, 1.104960f, -2.569945f, 0.661190f, -0.192302f, 0.734279f, 0.351872f, -1.068136f, 0.173665f, -0.778153f, -0.981877f, 1.485344f, 0.431733f, 0.428167f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{-0.330875f, 0.589988f, 0.011588f, -1.144325f, -1.038357f, 0.435055f, -1.053243f, -0.957144f, -0.715458f, 1.143742f, -0.341215f, -0.494762f, -0.810255f, 0.767649f, -0.193763f, 0.231402f, 0.286668f, 0.338432f, 0.768106f, 0.062272f, 0.124125f, -0.077928f, -0.932481f, -0.274618f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{0.204265f, -0.447104f, 0.027635f, -0.050553f, 0.370334f, -0.248695f, -1.306797f, -0.073120f, -1.391077f, -0.565382f, -1.932970f, -0.419110f, 0.351872f, 0.030903f, -0.124253f, 0.565919f, 0.276202f, -1.171718f, 0.431733f, 0.001712f, 0.689913f, 1.386595f, 0.443614f, -0.505878f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_5D_bilinear_border_no_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "linear";
+  std::string padding_mode = "border";
+  int64_t align_corners = 0;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 3, 2};
+  std::initializer_list<float> X_data{-0.727099f, 0.057663f, -0.548384f, 0.078163f, -0.133679f, 0.211872f, 0.271687f, -1.221973f, -2.630687f, -0.558102f, -0.327183f, 0.039894f, 1.222102f, 0.144418f, 0.696676f, -2.231791f, 0.910544f, 2.749837f, -0.354036f, -0.106102f, 2.453576f, 0.332319f, -1.743712f, 1.416859f, 0.260041f, -1.179930f, 0.407328f, 0.375476f, 2.028488f, 0.174825f, -1.467126f, 0.079045f, 0.870076f, -0.895165f, 0.631429f, 0.358222f, 1.484120f, -0.622331f, 0.727481f, 0.644213f, 1.299103f, -0.378573f, 1.360908f, 0.905514f, 0.180065f, 0.972162f, 1.246238f, -0.537204f, -1.241497f, -0.772822f, -0.149044f, -1.642060f, 0.120091f, 0.937023f, 0.422106f, 0.652040f, 0.045585f, -1.089530f, 0.356099f, 0.536075f, -1.840257f, -1.035736f, 0.348653f, 0.187942f, 0.150011f, 0.521798f, 1.271739f, 0.977495f, 0.811927f, 0.641729f, 0.964401f, -0.693074f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 3, 2, 3};
+  std::initializer_list<float> Grid_data{1.017692f, -0.818194f, 0.525611f, -0.556812f, -0.124601f, 1.120205f, 0.153552f, -1.144168f, 1.103147f, -0.050771f, -0.600881f, -0.633732f, 1.029039f, 0.020253f, 0.662802f, 0.788674f, -0.465758f, 0.101853f, -0.776226f, 1.002064f, -0.634553f, 0.797064f, 0.304043f, 0.740241f, -0.845484f, -0.037319f, 0.621792f, -0.047898f, -0.017218f, 0.584766f, -0.896882f, -0.240587f, 0.546590f, 0.588539f, 1.114539f, -0.237379f, 0.284327f, -0.590432f, -0.201402f, -0.602420f, 0.889284f, 0.007310f, 0.488176f, 0.660055f, 0.223618f, 0.127703f, -0.087830f, -1.016490f, 0.193341f, -0.265853f, -1.008634f, 1.118021f, -0.127930f, -0.598904f, -1.168221f, -1.105256f, 0.456964f, -0.547805f, -0.518368f, -0.694346f, 0.968648f, -0.288466f, 0.777819f, 0.952657f, -0.930362f, 0.895254f, -0.229149f, 1.149323f, 0.612939f, -1.162419f, 0.222934f, 0.421831f, -0.435327f, 0.909973f, -0.993750f, -0.380767f, 1.143396f, 1.171977f, 0.599451f, -0.716336f, -1.032482f, -0.975683f, -0.299985f, 0.679795f, 0.379920f, -0.145729f, 1.079221f, 0.942322f, -0.560859f, -0.519668f, -0.014079f, 0.249021f, -0.008590f, 0.463277f, 0.827937f, -0.216375f, 0.589310f, 0.163207f, 0.460623f, 0.494016f, -0.320739f, -0.535032f, 0.512922f, -0.768302f, 0.630003f, -0.769945f, 0.823242f, 0.481487f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 3, 2};
+  std::initializer_list<float> Y_data{-0.144687f, 0.794879f, 0.517780f, -0.372025f, -2.071523f, -0.953122f, -0.143000f, 0.040151f, 0.511071f, -0.723342f, 0.441486f, 0.101130f, -0.668215f, -0.313612f, 0.918245f, -0.165560f, -0.141496f, -0.002992f, -0.187333f, 0.433250f, -0.456623f, -0.082449f, -0.849978f, -0.635311f, -1.562003f, -0.323540f, 0.716348f, 0.089914f, 0.085623f, 0.617075f, -0.522245f, 2.013170f, 0.249061f, 0.948093f, 0.518262f, 0.230788f, -0.422900f, 1.315807f, -1.265941f, -0.772822f, 0.375354f, 0.159706f, 1.190603f, 0.217497f, -0.622331f, -0.640623f, -1.324261f, -0.126419f, 0.497220f, -0.421485f, -0.512049f, 0.218454f, -0.680520f, 0.432900f, 0.292848f, 0.338349f, 0.787015f, 0.977495f, 0.494135f, 0.649655f, 0.367739f, 0.766775f, 0.652040f, 1.018832f, 0.738819f, 0.107251f, 0.287288f, 0.515065f, 0.300961f, -0.279154f, 0.866776f, 0.738188f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_4D_bilinear_reflection_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "linear";
+  std::string padding_mode = "reflection";
+  int64_t align_corners = 1;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{-0.599439f, 0.317612f, -0.294302f, -0.530613f, 0.754687f, 0.092241f, -1.009405f, -1.155944f, 0.336327f, 0.159353f, -1.134330f, 0.510271f, 0.271972f, 1.301884f, 1.027400f, 1.193876f, 0.304363f, 1.027256f, 0.186801f, 0.719412f, -0.310900f, -1.123812f, -0.312771f, 2.729156f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{0.853801f, 0.833200f, -0.477474f, 0.131677f, 0.571825f, 0.858708f, -1.120796f, 1.194690f, -0.301706f, 0.488934f, -0.745307f, -0.923452f, -0.812682f, 0.707226f, -0.591920f, 0.697573f, 0.362777f, 0.477332f, -0.266909f, -0.379588f, -0.561456f, -0.670762f, 1.106438f, -0.065215f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{0.031577f, -0.232574f, 0.133168f, 0.515460f, 0.063332f, -0.470541f, 0.353729f, 0.159106f, 0.163701f, -0.770097f, -0.133556f, -0.925350f, 0.568498f, 0.636194f, 0.976680f, 0.921805f, 0.684184f, 1.189063f, -0.133022f, 0.070598f, 0.388079f, -0.232737f, 0.042589f, -0.965013f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_5D_bilinear_reflection_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "linear";
+  std::string padding_mode = "reflection";
+  int64_t align_corners = 1;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 3, 2};
+  std::initializer_list<float> X_data{-0.441629f, 0.199148f, 1.214051f, -0.000869f, 0.863692f, -0.067719f, -0.621662f, 0.235179f, 0.691041f, 0.176564f, 0.036477f, -0.085879f, 0.785440f, -1.837889f, -0.300151f, -1.710413f, 0.484432f, 2.160478f, -0.049246f, 0.372475f, -1.060470f, -1.000841f, -0.473439f, 0.963055f, 0.174518f, 0.932434f, 0.039338f, -0.343549f, -1.446623f, -0.673622f, 0.520395f, -0.279228f, -0.367065f, -0.871085f, 0.649273f, -0.835047f, 1.063542f, -1.829784f, 1.476173f, -1.048210f, -1.127299f, 1.204756f, -0.998390f, -1.014054f, -1.032717f, 0.977184f, 0.959897f, -0.749289f, 0.784492f, 1.343993f, 1.291144f, 0.099496f, 2.086763f, 0.529948f, -2.296640f, 0.570701f, 0.491216f, -0.003836f, -0.591929f, -0.076994f, 1.239698f, -0.888840f, 0.623497f, 0.769879f, 2.240972f, -2.081689f, 0.798466f, 1.207944f, -0.486804f, -0.488222f, -0.746382f, -0.220282f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 3, 2, 3};
+  std::initializer_list<float> Grid_data{-0.169044f, 0.178997f, 1.112567f, -0.825642f, -0.359793f, 0.170758f, -0.081412f, 0.319486f, 0.630993f, -0.493702f, 0.093438f, 1.085657f, -0.679024f, -0.813753f, -0.920282f, 0.717311f, -1.100678f, -0.583561f, 0.810473f, -0.719377f, 0.975857f, -0.560957f, 0.189840f, 0.157082f, -0.029434f, 0.747413f, 1.019186f, -0.749235f, 0.673000f, 0.320624f, -0.022362f, -0.839050f, 0.355966f, 0.871005f, -1.030007f, -1.108265f, -1.179701f, 0.277273f, -0.344802f, -0.372753f, 1.117390f, -0.306079f, -0.762057f, 0.107942f, -0.658634f, -0.351593f, 0.633875f, 0.276953f, -0.823465f, 1.142446f, 0.811875f, -0.818022f, 0.522699f, 0.493103f, -0.861061f, -0.843352f, -0.993629f, 0.534540f, 0.209070f, 0.507143f, -0.527071f, 0.902309f, 0.153227f, -0.957513f, -0.302041f, 0.612404f, 0.263859f, -0.183579f, -0.838388f, -0.746482f, 1.035039f, -0.687403f, 0.850371f, -0.401659f, 0.011995f, -1.168548f, -0.390077f, 1.011575f, -1.077360f, 0.603794f, -1.009901f, 0.175023f, -1.087964f, -0.949961f, -0.968757f, -0.416100f, 0.163389f, -0.879807f, 0.304124f, 0.722748f, 0.978239f, 1.062535f, 0.790067f, -0.353356f, -0.110591f, 1.061730f, 0.596951f, -0.318231f, 0.905999f, -1.048710f, 1.027042f, 0.671407f, -0.880154f, -0.978736f, 0.938431f, 1.183815f, 0.104716f, -0.468883f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 3, 2};
+  std::initializer_list<float> Y_data{-0.414201f, 0.167816f, -0.042305f, -0.423495f, -0.101419f, 0.120192f, -1.543294f, 0.344146f, 0.709278f, 0.248721f, -0.269138f, 0.158159f, 0.659876f, 0.226329f, 0.874509f, 0.240959f, 0.412611f, 0.225904f, -0.448580f, 0.057703f, -0.426538f, -0.401142f, -0.147435f, 0.401852f, -0.355426f, -0.286018f, -0.219687f, -0.564205f, 0.282723f, 0.363522f, -0.543706f, -0.787722f, -0.692217f, -0.594894f, 0.091005f, -0.328214f, 0.919003f, 0.408116f, 0.631220f, 0.303619f, -0.197801f, -0.308153f, 0.094457f, 1.027881f, -0.077622f, -0.597219f, -0.661449f, 0.947805f, 0.279352f, 0.828246f, 0.571205f, 1.646163f, 0.714257f, 0.049881f, -1.680014f, -0.056047f, 0.892393f, 0.250564f, 0.138843f, 0.178706f, 0.161286f, 0.036891f, -0.141908f, -0.510903f, 0.733949f, -0.112944f, -0.581858f, -0.269439f, 0.056781f, 0.200325f, 0.814038f, 0.277386f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_4D_bilinear_reflection_no_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "linear";
+  std::string padding_mode = "reflection";
+  int64_t align_corners = 0;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{-0.173652f, -1.513725f, -0.704586f, -1.952375f, -0.699404f, -0.806298f, 1.640852f, -0.138969f, -0.695411f, -1.352111f, 0.568797f, -0.564294f, -0.056468f, 0.641604f, -0.438370f, 0.450167f, -1.091401f, 1.669729f, -0.908544f, 0.244467f, 0.172109f, 1.156741f, -0.617128f, 1.155460f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{0.252250f, -0.151452f, 0.824706f, -0.588292f, -0.591147f, -0.155082f, -0.732938f, 0.457493f, -0.439559f, 0.492330f, 0.696447f, 0.700722f, -0.220298f, 0.654884f, -0.635434f, -1.195619f, -0.114204f, -0.870080f, -0.929674f, 0.305035f, 1.025429f, -0.472240f, -0.067881f, -0.869393f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{-1.538390f, -1.565293f, -0.581079f, -0.701030f, -0.725252f, -0.806298f, -0.850602f, -0.281588f, -0.151944f, 0.172138f, 0.177246f, -0.564294f, -0.316822f, -0.056468f, 0.212846f, -0.737167f, 0.585773f, 0.245182f, -0.111277f, -0.908544f, -0.463717f, -0.189009f, 0.510522f, -0.410307f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_5D_bilinear_reflection_no_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "linear";
+  std::string padding_mode = "reflection";
+  int64_t align_corners = 0;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 3, 2};
+  std::initializer_list<float> X_data{1.179856f, 1.432512f, 1.016210f, -0.661096f, 0.335863f, 0.565957f, -0.517555f, 2.232456f, -0.615173f, -0.073628f, -0.260768f, -1.952025f, 0.304237f, 0.902323f, -0.485170f, 0.781595f, -1.777093f, -0.274107f, -1.030698f, 0.181435f, 1.947646f, 1.007702f, -0.100718f, 0.154090f, -0.483193f, 1.565921f, -0.932274f, 0.313820f, -0.439116f, -0.411861f, -0.821795f, -1.685022f, -0.013518f, 0.519914f, -0.175407f, -0.507962f, 0.050913f, 0.981904f, 1.087165f, 1.758657f, 0.075954f, -0.481552f, 0.085590f, 0.537831f, -0.419622f, -1.756791f, 1.324879f, -0.267061f, -0.683518f, 0.605393f, 0.041004f, -0.756742f, 0.744950f, -0.508619f, -0.594679f, -1.165646f, -0.699604f, -0.271502f, 0.437731f, -2.206233f, 1.088781f, -0.629873f, -0.904741f, -1.233533f, 2.466710f, -0.117309f, -0.684130f, 0.598811f, 0.288846f, -1.195569f, 0.935300f, 0.962852f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 3, 2, 3};
+  std::initializer_list<float> Grid_data{0.625842f, 0.210304f, -0.725943f, -0.553764f, -0.182412f, -0.296478f, -0.254040f, -0.820211f, 0.869312f, 0.622346f, 0.236815f, 0.271706f, 0.140482f, 0.897281f, 0.271537f, 0.182799f, -0.659653f, 0.400310f, -1.122656f, 0.378466f, -1.040147f, -0.496646f, 0.633526f, -0.714734f, 0.955528f, -0.663024f, 1.136629f, 0.369854f, -0.520025f, 0.731855f, -1.062711f, -0.760189f, -0.751812f, 0.157968f, 0.117892f, -1.032129f, 1.157953f, -0.001147f, -0.640796f, 0.028663f, -0.515104f, 0.331070f, 0.434411f, -0.340393f, 0.069958f, 0.714010f, -0.780518f, -0.267586f, -0.177029f, -0.793935f, 0.097737f, 0.044103f, -0.969274f, 0.246164f, 1.145360f, 0.638273f, -0.650926f, 1.098440f, -0.824873f, -0.610135f, 0.529312f, 0.954650f, 1.145143f, 1.033109f, -0.660775f, 0.274592f, -0.753497f, 0.026500f, 0.994206f, 0.590870f, -1.108049f, -0.516447f, -1.012489f, 0.565286f, -0.152334f, -0.877228f, -0.383453f, 0.393797f, 0.111096f, 1.125969f, -0.015932f, 0.377468f, -0.363512f, 0.143194f, 0.042988f, 1.030777f, 0.502813f, -0.683870f, -1.066269f, -1.141727f, -0.435790f, 0.155118f, 1.128919f, -0.117905f, 0.469189f, 0.609870f, -0.919201f, -0.992659f, 0.454699f, 0.559331f, -0.558762f, 0.188050f, -1.174933f, 0.015126f, 0.294147f, 0.011359f, -0.190476f, 0.499476f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 3, 2};
+  std::initializer_list<float> Y_data{-0.274014f, 0.145076f, 0.451342f, -0.273219f, -1.128307f, 0.962473f, 0.629978f, 0.370138f, 0.901663f, 0.778787f, 1.179856f, 0.014218f, -0.634683f, 0.585419f, 0.972130f, 1.911376f, 0.389205f, 0.849839f, 0.738424f, 0.054296f, -1.034114f, 0.096287f, -0.408114f, -0.474491f, 0.784791f, 0.001762f, -1.672976f, -1.127656f, -1.030698f, 1.105979f, 0.979492f, -0.258014f, 0.693543f, 1.010218f, -0.008927f, -0.078404f, -0.384825f, 0.944247f, -0.508619f, 0.548774f, 0.068986f, 0.881841f, 0.869967f, -0.274754f, 0.337312f, -0.374188f, 0.161655f, 0.050913f, 0.146763f, 0.119233f, -0.438980f, 0.228062f, -0.187221f, -0.376543f, -2.077576f, -1.120214f, 0.962852f, -0.133462f, 0.314542f, -1.044921f, 1.568017f, -0.060947f, 0.838264f, -0.652863f, 0.978122f, -0.594679f, 0.366536f, 0.596221f, -0.120431f, -0.435362f, -0.328892f, -0.434798f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_4D_bicubic_zeros_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "cubic";
+  std::string padding_mode = "zeros";
+  int64_t align_corners = 1;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{0.741614f, -1.612838f, 0.274100f, -0.685296f, -0.032079f, -0.246424f, 0.089412f, -0.776545f, -0.152179f, 0.312533f, -1.503701f, -0.720829f, 0.877575f, 0.407229f, -0.889951f, 0.603605f, -0.140859f, 2.032775f, -0.520668f, 1.063163f, -1.008883f, 0.194195f, -0.303240f, -0.967884f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{-0.932019f, -0.034394f, 0.554511f, 0.484230f, 0.141120f, 0.485083f, -0.836516f, 0.999462f, 0.026764f, 0.775689f, 0.265464f, -0.133497f, 0.514005f, 1.139161f, 1.183700f, -1.010095f, 0.072779f, -0.862052f, 0.699178f, 0.861473f, -0.842637f, -0.069355f, 0.830374f, 0.793568f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{0.274192f, -0.348792f, -0.238780f, -0.048938f, -0.195915f, -0.488976f, -0.104505f, -0.351103f, -0.583059f, -1.533095f, -1.141282f, 0.187052f, 1.668728f, 0.345182f, 0.682750f, 1.893112f, -0.775917f, 1.920082f, -0.889375f, 1.071508f, 0.336517f, -0.933740f, -0.981629f, -0.893789f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_4D_bicubic_zeros_no_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "cubic";
+  std::string padding_mode = "zeros";
+  int64_t align_corners = 0;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{0.333395f, 0.977190f, 0.214232f, 0.363731f, -1.352515f, -0.980304f, -0.354887f, -0.481711f, -0.607915f, -0.309748f, 2.262781f, 0.963363f, 1.997079f, 0.987449f, -0.537662f, 1.011585f, 0.822184f, 0.567108f, 0.135401f, -0.943315f, -0.614181f, 0.030652f, 0.914757f, 0.971777f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{-0.487111f, 0.913573f, 0.641905f, -0.093110f, 0.512522f, 0.358369f, 0.655341f, -0.964320f, 0.370929f, -1.136512f, -0.789199f, -0.447185f, -0.116915f, -1.132446f, 0.029865f, 0.191588f, -0.476239f, 0.389224f, 1.048588f, -0.204978f, -0.639094f, -1.062994f, -0.876243f, -0.663705f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{-1.051920f, 0.501832f, -0.508839f, 0.563480f, 0.297178f, 0.246571f, 1.781955f, -0.353574f, 0.481200f, -0.258839f, -0.145200f, -0.469558f, 0.624262f, 0.351267f, 0.180256f, 0.571859f, 0.903895f, 1.383745f, -0.081406f, 0.133665f, 0.348401f, -0.164219f, 0.138237f, 0.203282f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_4D_bicubic_border_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "cubic";
+  std::string padding_mode = "border";
+  int64_t align_corners = 1;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{-0.480448f, 0.682093f, 0.237716f, -1.234307f, 2.139750f, 2.410321f, 0.491472f, -0.553422f, 0.032129f, -0.162503f, 0.144036f, -1.889875f, -0.293944f, -1.390146f, -1.552136f, 1.604720f, -1.707202f, 0.182427f, -0.631000f, 0.196649f, 0.427711f, -0.014224f, -1.319834f, -2.703346f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{0.503717f, 0.572989f, 0.179517f, -0.060398f, 0.503876f, 0.288627f, -1.148268f, 0.194010f, -0.532910f, -0.636357f, 0.464076f, 0.245386f, 0.203212f, -0.569260f, 0.554489f, 1.126118f, 0.146805f, 0.493232f, -1.052794f, 0.713394f, 0.416866f, 0.540634f, 0.500415f, -0.315629f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{0.885659f, -0.722912f, -0.180469f, 0.697015f, -0.322127f, -0.292851f, -0.867861f, -0.047527f, -0.447720f, 0.028100f, 0.191874f, -0.378776f, -0.321888f, -0.277691f, -0.037604f, -1.766707f, 0.320836f, 0.415106f, 0.179209f, -2.609096f, -0.929794f, -0.788240f, -1.212243f, 0.337704f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_4D_bicubic_border_no_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "cubic";
+  std::string padding_mode = "border";
+  int64_t align_corners = 0;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{-0.924256f, -2.309784f, 1.272769f, 0.548427f, -1.478527f, -3.472946f, -1.252325f, 0.268589f, 0.326270f, 0.105016f, 0.515184f, -0.951158f, -0.658693f, -2.018776f, 0.981625f, -0.401504f, 1.560519f, -0.129836f, -1.876357f, 0.511516f, -1.825582f, 0.358958f, -0.805392f, -1.409127f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{0.874856f, -1.090775f, 1.169192f, 0.447098f, 0.583418f, 0.267395f, 0.788144f, 1.129706f, -0.102229f, -0.984624f, 1.101916f, -0.253070f, -0.578731f, 0.738703f, 0.669694f, 0.160659f, -0.075327f, -0.229561f, 1.100291f, 0.731142f, 0.714643f, 0.765214f, -0.628031f, 0.250554f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{-2.647128f, -2.154235f, -0.768645f, -3.893546f, -1.698376f, -0.114530f, 0.458115f, -0.696657f, -0.370692f, -1.169692f, -0.754730f, 0.320002f, 1.683550f, -0.301499f, -0.176003f, -0.236653f, -0.278257f, 1.480160f, -0.700350f, 0.095525f, -0.891605f, -1.569065f, -1.633715f, -1.535763f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_4D_bicubic_reflection_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "cubic";
+  std::string padding_mode = "reflection";
+  int64_t align_corners = 1;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{-0.328038f, -0.658850f, -0.054298f, 0.012663f, -0.077366f, 0.644305f, -1.262985f, 0.922028f, 0.189962f, 0.518836f, 1.168413f, -0.286220f, 0.431207f, -0.295352f, -0.357675f, -0.311715f, 0.839514f, -0.651820f, -0.283934f, 0.430508f, 0.206334f, 0.765966f, -1.144732f, -0.507045f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{-0.372000f, -1.056863f, -0.360826f, -0.268314f, 0.691035f, -0.595044f, 0.720198f, 0.166462f, -0.201118f, -1.069416f, 1.184721f, -0.213980f, 0.755038f, -0.620722f, -1.168597f, -0.956522f, -0.614982f, -0.382162f, -0.169456f, 1.000817f, -1.106710f, 0.598940f, 1.009714f, 0.007723f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{-0.403118f, -0.158055f, -0.496030f, 0.161379f, -0.440603f, -0.193607f, -0.746082f, -0.076433f, 0.751030f, 0.360851f, -0.488453f, 0.664305f, -0.259139f, 0.411796f, -0.156648f, 0.281569f, 0.437515f, -0.313812f, 0.573781f, -0.265706f, 0.200380f, -0.906155f, -0.724311f, 0.760352f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+
+TEST(GridsampleTest, test_grid_sample_20_4D_bicubic_reflection_no_align_corners) {
+  OpTester test("GridSample", 20);
+  std::string mode = "cubic";
+  std::string padding_mode = "reflection";
+  int64_t align_corners = 0;
+  std::initializer_list<int64_t> X_shape{2, 2, 3, 2};
+  std::initializer_list<float> X_data{-0.290962f, 0.867797f, -0.085436f, -1.597520f, 0.695524f, 0.838739f, 0.513032f, 0.166242f, -0.546135f, -0.780313f, -0.512993f, -0.449479f, 1.594718f, 0.953375f, 0.692587f, -0.798364f, -0.128799f, -0.456210f, 2.098909f, -1.561220f, 1.713821f, -0.701970f, -0.287280f, -1.708048f};
+  std::initializer_list<int64_t> Grid_shape{2, 3, 2, 2};
+  std::initializer_list<float> Grid_data{0.934471f, 0.728362f, -0.458301f, -1.040800f, 0.157908f, 0.753451f, -0.122762f, 0.100970f, 0.889432f, 0.495471f, 0.897108f, 0.176205f, 0.134514f, -0.287037f, -0.202498f, -0.637759f, 0.802292f, 1.094459f, 0.445338f, 0.034096f, -0.396126f, -1.184798f, -0.222199f, -0.851887f};
+  std::initializer_list<int64_t> Y_shape{2, 2, 3, 2};
+  std::initializer_list<float> Y_data{1.037788f, -0.275160f, 0.953595f, -0.518196f, 0.118127f, -1.525148f, -0.413483f, 0.696689f, -0.450182f, -0.696169f, -0.561886f, -0.828986f, 0.343953f, 1.379632f, -0.417260f, -0.781500f, 1.666511f, 1.599268f, 0.106200f, 1.088396f, -2.079140f, -0.612122f, 1.822402f, 1.173807f};
+  test.AddInput<float>("X", X_shape, X_data);
+  test.AddInput<float>("Grid", Grid_shape, Grid_data);
+  test.AddAttribute("mode", mode);
+  test.AddAttribute("padding_mode", padding_mode);
+  test.AddAttribute("align_corners", align_corners);
+  test.AddOutput<float>("Y", Y_shape, Y_data);
+  test.ConfigEp(DefaultCpuExecutionProvider())
+      .RunWithConfig();
+}
+}  // namespace test
+}  // namespace onnxruntime
diff --git a/onnxruntime/test/providers/cpu/tensor/grid_sample_test_gen.py b/onnxruntime/test/providers/cpu/tensor/grid_sample_test_gen.py
new file mode 100644
index 0000000000000..e4d58e79243ef
--- /dev/null
+++ b/onnxruntime/test/providers/cpu/tensor/grid_sample_test_gen.py
@@ -0,0 +1,81 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+# This code is used to generate the test cases for the GridSample operator
+# in onnxruntime/test/providers/cpu/tensor/grid_sample_test.cc
+
+import torch
+
+# Define the input dimensions
+N, C, D, H, W = 2, 2, 3, 3, 2
+
+# Define the modes, padding modes, and whether to align corners
+modes = ["nearest", "bilinear", "bicubic"]
+padding_modes = ["zeros", "border", "reflection"]
+align_corners_options = [True, False]
+
+# Loop over the combinations of parameters
+torch.manual_seed(0)
+for opset_version in [16, 20]:
+    for mode in modes:
+        for padding_mode in padding_modes:
+            for align_corners in align_corners_options:
+                for ndim in [4, 5]:
+                    if ndim == 5 and mode == "bicubic":
+                        continue
+
+                    if opset_version < 20 and ndim == 5:
+                        continue
+
+                    # Create a random input tensor with the specified dimensions
+                    input_shape = (N,) + (C,) + (((D, H, W)) if ndim == 5 else ((H, W)))
+                    input_tensor = torch.randn(*input_shape)
+
+                    # Create a random grid tensor with the specified dimensions
+                    grid_shape = (N,) + (((D, H, W)) if ndim == 5 else ((H, W))) + (ndim - 2,)
+
+                    # Between -1.2 to + 1.2
+                    grid_tensor = torch.rand(*grid_shape) * 2.4 - 1.2
+
+                    # Apply grid_sample
+                    output_tensor = torch.nn.functional.grid_sample(
+                        input_tensor, grid_tensor, mode=mode, padding_mode=padding_mode, align_corners=align_corners
+                    )
+
+                    X_data_str = "{" + ", ".join([f"{x:.6f}f" for x in input_tensor.numpy().flatten()]) + "}"
+                    Grid_data_str = "{" + ", ".join([f"{x:.6f}f" for x in grid_tensor.numpy().flatten()]) + "}"
+
+                    Y_shape = output_tensor.shape
+                    Y_data_str = "{" + ", ".join([f"{x:.6f}f" for x in output_tensor.numpy().flatten()]) + "}"
+
+                    onnx_mode = mode
+                    if opset_version >= 20:
+                        if mode == "bilinear":
+                            onnx_mode = "linear"
+                        elif mode == "bicubic":
+                            onnx_mode = "cubic"
+
+                    onnx_align_corners = 1 if align_corners else 0
+
+                    test_name = f"test_grid_sample_{opset_version}_{ndim}D_{mode}_{padding_mode}_{'align_corners' if align_corners else 'no_align_corners'}"
+                    print(f"TEST(GridsampleTest, {test_name}) {{")
+                    print(f'OpTester test("GridSample", {opset_version});')
+                    print(f'std::string mode = "{onnx_mode}";')
+                    print(f'std::string padding_mode = "{padding_mode}";')
+                    print(f"int64_t align_corners = {onnx_align_corners};")
+                    print(f"std::initializer_list<int64_t> X_shape {{ {', '.join(map(str, input_shape))} }};")
+                    print(f"std::initializer_list<float> X_data { X_data_str };")
+                    print(f"std::initializer_list<int64_t> Grid_shape {{ {', '.join(map(str, grid_shape))} }};")
+                    print(f"std::initializer_list<float> Grid_data { Grid_data_str };")
+                    print(f"std::initializer_list<int64_t> Y_shape {{ {', '.join(map(str, Y_shape))} }};")
+                    print(f"std::initializer_list<float> Y_data { Y_data_str };")
+
+                    print('test.AddInput<float>("X", X_shape, X_data);')
+                    print('test.AddInput<float>("Grid", Grid_shape, Grid_data);')
+                    print('test.AddAttribute("mode", mode);')
+                    print('test.AddAttribute("padding_mode", padding_mode);')
+                    print('test.AddAttribute("align_corners", align_corners);')
+                    print('test.AddOutput<float>("Y", Y_shape, Y_data);')
+                    print("test.Run();")
+                    print("}")
+                    print("\n")
diff --git a/onnxruntime/test/providers/cpu/tensor/isinf_test.cc b/onnxruntime/test/providers/cpu/tensor/isinf_test.cc
index ddb392eb82e13..2e583c5d2547b 100644
--- a/onnxruntime/test/providers/cpu/tensor/isinf_test.cc
+++ b/onnxruntime/test/providers/cpu/tensor/isinf_test.cc
@@ -17,85 +17,137 @@ constexpr double DOUBLE_INF = std::numeric_limits<double>::infinity();
 constexpr double DOUBLE_NINF = -std::numeric_limits<double>::infinity();
 constexpr double DOUBLE_NAN = std::numeric_limits<double>::quiet_NaN();
 
-TEST(IsInfTest, test_isinf_float) {
-  // Defaults for detect_negative = 1
-  // detect_positive = 1
-  OpTester test("IsInf", 10);
+template <typename T>
+void run_is_inf_test(int opset, int64_t detect_positive, int64_t detect_negative, const std::initializer_list<T>& input, const std::initializer_list<bool>& output) {
+  OpTester test("IsInf", opset);
+  test.AddAttribute<int64_t>("detect_positive", detect_positive);
+  test.AddAttribute<int64_t>("detect_negative", detect_negative);
+  test.AddInput<T>("X", {onnxruntime::narrow<int64_t>(input.size())}, input);
+  test.AddOutput<bool>("Y", {onnxruntime::narrow<int64_t>(output.size())}, output);
+  test.Run();
+}
 
-  std::vector<int64_t> input_dim{6};
-  std::vector<float> input = {-1.2f, FLOAT_NAN, FLOAT_INF, 2.8f, FLOAT_NINF, FLOAT_INF};
-  test.AddInput<float>("X", input_dim, input);
+TEST(IsInfTest, test_isinf_float10) {
+  std::initializer_list<float> input = {-1.2f, FLOAT_NAN, FLOAT_INF, 2.8f, FLOAT_NINF, FLOAT_INF};
+  std::initializer_list<bool> output = {false, false, true, false, true, true};
+  run_is_inf_test(10, 1, 1, input, output);
+}
 
-  std::vector<int64_t> output_dim(input_dim);
-  test.AddOutput<bool>("Y", output_dim, {false, false, true, false, true, true});
-  test.Run();
+TEST(IsInfTest, test_isinf_float20) {
+  std::initializer_list<float> input = {-1.2f, FLOAT_NAN, FLOAT_INF, 2.8f, FLOAT_NINF, FLOAT_INF};
+  std::initializer_list<bool> output = {false, false, true, false, true, true};
+  run_is_inf_test(20, 1, 1, input, output);
 }
 
-TEST(IsInfTest, test_isinf_double) {
-  // Defaults for detect_negative = 1
-  // detect_positive = 1
-  OpTester test("IsInf", 10);
+TEST(IsInfTest, test_isinf_double10) {
+  std::initializer_list<double> input = {-1.2, DOUBLE_NAN, DOUBLE_INF, 2.8, DOUBLE_NINF, DOUBLE_INF};
+  std::initializer_list<bool> output = {false, false, true, false, true, true};
+  run_is_inf_test(10, 1, 1, input, output);
+}
 
-  std::vector<int64_t> input_dim{6};
-  std::vector<double> input = {-1.2, DOUBLE_NAN, DOUBLE_INF, 2.8, DOUBLE_NINF, DOUBLE_INF};
-  test.AddInput<double>("X", input_dim, input);
+TEST(IsInfTest, test_isinf_double20) {
+  std::initializer_list<double> input = {-1.2, DOUBLE_NAN, DOUBLE_INF, 2.8, DOUBLE_NINF, DOUBLE_INF};
+  std::initializer_list<bool> output = {false, false, true, false, true, true};
+  run_is_inf_test(20, 1, 1, input, output);
+}
 
-  std::vector<int64_t> output_dim(input_dim);
-  test.AddOutput<bool>("Y", output_dim, {false, false, true, false, true, true});
-  test.Run();
+TEST(IsInfTest, test_isinf_positive_float10) {
+  std::initializer_list<double> input = {-1.7f, FLOAT_NAN, FLOAT_INF, 3.6f, FLOAT_NINF, FLOAT_INF};
+  std::initializer_list<bool> output = {false, false, true, false, false, true};
+  run_is_inf_test(10, 1, 0, input, output);
 }
 
-TEST(IsInfTest, test_isinf_positive_float) {
-  OpTester test("IsInf", 10);
-  test.AddAttribute<int64_t>("detect_negative", 0);
+TEST(IsInfTest, test_isinf_positive_float20) {
+  std::initializer_list<double> input = {-1.7f, FLOAT_NAN, FLOAT_INF, 3.6f, FLOAT_NINF, FLOAT_INF};
+  std::initializer_list<bool> output = {false, false, true, false, false, true};
+  run_is_inf_test(20, 1, 0, input, output);
+}
 
-  std::vector<int64_t> input_dim{6};
-  std::vector<float> input = {-1.7f, FLOAT_NAN, FLOAT_INF, 3.6f, FLOAT_NINF, FLOAT_INF};
-  test.AddInput<float>("X", input_dim, input);
+TEST(IsInfTest, test_isinf_positive_double10) {
+  std::initializer_list<double> input = {-1.7, DOUBLE_NAN, DOUBLE_INF, 3.6, DOUBLE_NINF, DOUBLE_INF};
+  std::initializer_list<bool> output = {false, false, true, false, false, true};
+  run_is_inf_test(10, 1, 0, input, output);
+}
 
-  std::vector<int64_t> output_dim(input_dim);
-  test.AddOutput<bool>("Y", output_dim, {false, false, true, false, false, true});
-  test.Run();
+TEST(IsInfTest, test_isinf_positive_double20) {
+  std::initializer_list<double> input = {-1.7, DOUBLE_NAN, DOUBLE_INF, 3.6, DOUBLE_NINF, DOUBLE_INF};
+  std::initializer_list<bool> output = {false, false, true, false, false, true};
+  run_is_inf_test(20, 1, 0, input, output);
+}
+
+TEST(IsInfTest, test_isinf_negative_float10) {
+  std::initializer_list<float> input = {-1.7f, FLOAT_NAN, FLOAT_INF, 3.6f, FLOAT_NINF, FLOAT_INF};
+  std::initializer_list<bool> output = {false, false, false, false, true, false};
+  run_is_inf_test(10, 0, 1, input, output);
 }
 
-TEST(IsInfTest, test_isinf_positive_double) {
-  OpTester test("IsInf", 10);
-  test.AddAttribute<int64_t>("detect_negative", 0);
+TEST(IsInfTest, test_isinf_negative_float20) {
+  std::initializer_list<float> input = {-1.7f, FLOAT_NAN, FLOAT_INF, 3.6f, FLOAT_NINF, FLOAT_INF};
+  std::initializer_list<bool> output = {false, false, false, false, true, false};
+  run_is_inf_test(20, 0, 1, input, output);
+}
 
-  std::vector<int64_t> input_dim{6};
-  std::vector<double> input = {-1.7, DOUBLE_NAN, DOUBLE_INF, 3.6, DOUBLE_NINF, DOUBLE_INF};
-  test.AddInput<double>("X", input_dim, input);
+TEST(IsInfTest, test_isinf_negative_double10) {
+  std::initializer_list<double> input = {-1.7, DOUBLE_NAN, DOUBLE_INF, 3.6, DOUBLE_NINF, DOUBLE_INF};
+  std::initializer_list<bool> output = {false, false, false, false, true, false};
+  run_is_inf_test(10, 0, 1, input, output);
+}
 
-  std::vector<int64_t> output_dim(input_dim);
-  test.AddOutput<bool>("Y", output_dim, {false, false, true, false, false, true});
-  test.Run();
+TEST(IsInfTest, test_isinf_negative_double20) {
+  std::initializer_list<double> input = {-1.7, DOUBLE_NAN, DOUBLE_INF, 3.6, DOUBLE_NINF, DOUBLE_INF};
+  std::initializer_list<bool> output = {false, false, false, false, true, false};
+  run_is_inf_test(20, 0, 1, input, output);
 }
 
-TEST(IsInfTest, test_isinf_negative_float) {
-  OpTester test("IsInf", 10);
-  test.AddAttribute<int64_t>("detect_positive", 0);
+#if !defined(DISABLE_FLOAT8_TYPES)
+TEST(IsInfTest, test_Float8E4M3FN) {
+  std::initializer_list<Float8E4M3FN> input = {
+      Float8E4M3FN(-1.0f), Float8E4M3FN(FLOAT_NAN, false), Float8E4M3FN(1.0f), Float8E4M3FN(FLOAT_NINF, false), Float8E4M3FN(FLOAT_NINF, false), Float8E4M3FN(FLOAT_INF, false)};
+  std::initializer_list<bool> output = {false, false, false, false, false, false};
+  run_is_inf_test(20, 1, 1, input, output);
+}
 
-  std::vector<int64_t> input_dim{6};
-  std::vector<float> input = {-1.7f, FLOAT_NAN, FLOAT_INF, 3.6f, FLOAT_NINF, FLOAT_INF};
-  test.AddInput<float>("X", input_dim, input);
+TEST(IsInfTest, test_Float8E4M3FNUZ) {
+  std::initializer_list<Float8E4M3FNUZ> input = {
+      Float8E4M3FNUZ(-1.0f), Float8E4M3FNUZ(FLOAT_NAN, false), Float8E4M3FNUZ(1.0f), Float8E4M3FNUZ(FLOAT_NINF, false), Float8E4M3FNUZ(FLOAT_NINF, false), Float8E4M3FNUZ(FLOAT_INF, false)};
+  std::initializer_list<bool> output = {false, false, false, false, false, false};
+  run_is_inf_test(20, 1, 1, input, output);
+}
 
-  std::vector<int64_t> output_dim(input_dim);
-  test.AddOutput<bool>("Y", output_dim, {false, false, false, false, true, false});
-  test.Run();
+TEST(IsInfTest, test_Float8E5M2_detect_both) {
+  std::initializer_list<Float8E5M2> input = {
+      Float8E5M2(-1.0f), Float8E5M2(FLOAT_NINF, false), Float8E5M2(1.0f), Float8E5M2(FLOAT_NINF, false), Float8E5M2(FLOAT_NAN, false), Float8E5M2(FLOAT_INF, false)};
+  std::initializer_list<bool> output = {false, true, false, true, false, true};
+  run_is_inf_test(20, 1, 1, input, output);
 }
 
-TEST(IsInfTest, test_isinf_negative_double) {
-  OpTester test("IsInf", 10);
-  test.AddAttribute<int64_t>("detect_positive", 0);
+TEST(IsInfTest, test_Float8E5M2_detect_positive) {
+  std::initializer_list<Float8E5M2> input = {
+      Float8E5M2(-1.0f), Float8E5M2(FLOAT_NINF, false), Float8E5M2(1.0f), Float8E5M2(FLOAT_NINF, false), Float8E5M2(FLOAT_NAN, false), Float8E5M2(FLOAT_INF, false)};
+  std::initializer_list<bool> output = {false, false, false, false, false, true};
+  run_is_inf_test(20, 1, 0, input, output);
+}
 
-  std::vector<int64_t> input_dim{6};
-  std::vector<double> input = {-1.7, DOUBLE_NAN, DOUBLE_INF, 3.6, DOUBLE_NINF, DOUBLE_INF};
-  test.AddInput<double>("X", input_dim, input);
+TEST(IsInfTest, test_Float8E5M2_detect_negative) {
+  std::initializer_list<Float8E5M2> input = {
+      Float8E5M2(-1.0f), Float8E5M2(FLOAT_NINF, false), Float8E5M2(1.0f), Float8E5M2(FLOAT_NINF, false), Float8E5M2(FLOAT_NAN, false), Float8E5M2(FLOAT_INF, false)};
+  std::initializer_list<bool> output = {false, true, false, true, false, false};
+  run_is_inf_test(20, 0, 1, input, output);
+}
 
-  std::vector<int64_t> output_dim(input_dim);
-  test.AddOutput<bool>("Y", output_dim, {false, false, false, false, true, false});
-  test.Run();
+TEST(IsInfTest, test_Float8E5M2_none) {
+  std::initializer_list<Float8E5M2> input = {
+      Float8E5M2(-1.0f), Float8E5M2(FLOAT_NINF, false), Float8E5M2(1.0f), Float8E5M2(FLOAT_NINF, false), Float8E5M2(FLOAT_NAN, false), Float8E5M2(FLOAT_INF, false)};
+  std::initializer_list<bool> output = {false, false, false, false, false, false};
+  run_is_inf_test(20, 0, 0, input, output);
 }
 
+TEST(IsInfTest, test_Float8E5M2FNUZ) {
+  std::initializer_list<Float8E5M2FNUZ> input = {
+      Float8E5M2FNUZ(-1.0f), Float8E5M2FNUZ(FLOAT_NINF, false), Float8E5M2FNUZ(1.0f), Float8E5M2FNUZ(FLOAT_NINF, false), Float8E5M2FNUZ(FLOAT_NAN, false), Float8E5M2FNUZ(FLOAT_INF, false)};
+  std::initializer_list<bool> output = {false, false, false, false, false, false};
+  run_is_inf_test(20, 1, 1, input, output);
+}
+#endif
 }  // namespace test
 }  // namespace onnxruntime
diff --git a/onnxruntime/test/providers/cpu/tensor/isnan_test.cc b/onnxruntime/test/providers/cpu/tensor/isnan_test.cc
index 0dffc452b519d..0f1e5c07cdd9b 100644
--- a/onnxruntime/test/providers/cpu/tensor/isnan_test.cc
+++ b/onnxruntime/test/providers/cpu/tensor/isnan_test.cc
@@ -9,29 +9,84 @@
 namespace onnxruntime {
 namespace test {
 
-TEST(IsNaNOpTest, IsNaNFloat) {
-  OpTester test("IsNaN", 9, kOnnxDomain);
-  std::vector<int64_t> dims{2, 2};
-  test.AddInput<float>("X", dims, {1.0f, NAN, 2.0f, NAN});
-  test.AddOutput<bool>("Y", dims, {false, true, false, true});
+template <typename T>
+void run_is_nan_test(int opset, const std::vector<int64_t>& dims, const std::initializer_list<T>& input, const std::initializer_list<bool>& output) {
+  OpTester test("IsNaN", opset, kOnnxDomain);
+  test.AddInput<T>("X", dims, input);
+  test.AddOutput<bool>("Y", dims, output);
   test.Run();
 }
 
-TEST(IsNaNOpTest, IsNaNFloat16) {
-  OpTester test("IsNaN", 9, kOnnxDomain);
+TEST(IsNaNOpTest, IsNaNFloat9) {
   std::vector<int64_t> dims{2, 2};
-  test.AddInput<MLFloat16>("X", dims, std::initializer_list<MLFloat16>({MLFloat16(1.0f), MLFloat16::NaN, MLFloat16(2.0f), MLFloat16::NaN}));
-  test.AddOutput<bool>("Y", dims, {false, true, false, true});
-  test.Run();
+  std::initializer_list<float> input = {1.0f, NAN, 2.0f, NAN};
+  std::initializer_list<bool> output = {false, true, false, true};
+  run_is_nan_test(9, dims, input, output);
 }
 
-TEST(IsNaNOpTest, IsNaNDouble) {
-  OpTester test("IsNaN", 9, kOnnxDomain);
+TEST(IsNaNOpTest, IsNaNFloat20) {
   std::vector<int64_t> dims{2, 2};
-  test.AddInput<double>("X", dims, {1.0, NAN, 2.0, NAN});
-  test.AddOutput<bool>("Y", dims, {false, true, false, true});
-  test.Run();
+  std::initializer_list<float> input = {1.0f, NAN, 2.0f, NAN};
+  std::initializer_list<bool> output = {false, true, false, true};
+  run_is_nan_test(20, dims, input, output);
+}
+
+TEST(IsNaNOpTest, IsNaNFloat16_9) {
+  std::vector<int64_t> dims{2, 2};
+  std::initializer_list<MLFloat16> input = {MLFloat16(1.0f), MLFloat16::NaN, MLFloat16(2.0f), MLFloat16::NaN};
+  std::initializer_list<bool> output = {false, true, false, true};
+  run_is_nan_test(9, dims, input, output);
+}
+
+TEST(IsNaNOpTest, IsNaNFloat16_20) {
+  std::vector<int64_t> dims{2, 2};
+  std::initializer_list<MLFloat16> input = {MLFloat16(1.0f), MLFloat16::NaN, MLFloat16(2.0f), MLFloat16::NaN};
+  std::initializer_list<bool> output = {false, true, false, true};
+  run_is_nan_test(20, dims, input, output);
+}
+
+TEST(IsNaNOpTest, IsNaNDouble9) {
+  std::vector<int64_t> dims{2, 2};
+  std::initializer_list<double> input = {1.0, NAN, 2.0, NAN};
+  std::initializer_list<bool> output = {false, true, false, true};
+  run_is_nan_test(9, dims, input, output);
+}
+
+TEST(IsNaNOpTest, IsNaNDouble20) {
+  std::vector<int64_t> dims{2, 2};
+  std::initializer_list<double> input = {1.0, NAN, 2.0, NAN};
+  std::initializer_list<bool> output = {false, true, false, true};
+  run_is_nan_test(20, dims, input, output);
 }
 
+#if !defined(DISABLE_FLOAT8_TYPES)
+TEST(IsNaNOpTest, IsNaNFloat8E4M3FN) {
+  std::vector<int64_t> dims{2, 2};
+  std::initializer_list<Float8E4M3FN> input = {Float8E4M3FN(1.0f), Float8E4M3FN(-NAN), Float8E4M3FN(2.0f), Float8E4M3FN(NAN)};
+  std::initializer_list<bool> output = {false, true, false, true};
+  run_is_nan_test(20, dims, input, output);
+}
+
+TEST(IsNaNOpTest, IsNaN_Float8E4M3FNUZ) {
+  std::vector<int64_t> dims{2, 2};
+  std::initializer_list<Float8E4M3FNUZ> input = {Float8E4M3FNUZ(1.0f), Float8E4M3FNUZ(-NAN), Float8E4M3FNUZ(2.0f), Float8E4M3FNUZ(-NAN)};
+  std::initializer_list<bool> output = {false, true, false, true};
+  run_is_nan_test(20, dims, input, output);
+}
+
+TEST(IsNaNOpTest, IsNaNFloat8E5M2) {
+  std::vector<int64_t> dims{2, 2};
+  std::initializer_list<Float8E5M2> input = {Float8E5M2(1.0f), Float8E5M2(-NAN), Float8E5M2(2.0f), Float8E5M2(NAN)};
+  std::initializer_list<bool> output = {false, true, false, true};
+  run_is_nan_test(20, dims, input, output);
+}
+
+TEST(IsNaNOpTest, IsNaN_Float8E5M2FNUZ) {
+  std::vector<int64_t> dims{2, 2};
+  std::initializer_list<Float8E5M2FNUZ> input = {Float8E5M2FNUZ(1.0f), Float8E5M2FNUZ(-NAN), Float8E5M2FNUZ(2.0f), Float8E5M2FNUZ(NAN)};
+  std::initializer_list<bool> output = {false, true, false, true};
+  run_is_nan_test(20, dims, input, output);
+}
+#endif
 }  // namespace test
 }  // namespace onnxruntime
diff --git a/onnxruntime/test/providers/cpu/tensor/resize_op_test.cc b/onnxruntime/test/providers/cpu/tensor/resize_op_test.cc
index 832a8a744c08b..2ead9ec91f93f 100644
--- a/onnxruntime/test/providers/cpu/tensor/resize_op_test.cc
+++ b/onnxruntime/test/providers/cpu/tensor/resize_op_test.cc
@@ -99,9 +99,8 @@ TEST(ResizeOpTest, NhwcResizeOpLinearDownSampleTest_tf_crop_and_resize_with_extr
   // CUDA: result mismatch due to not implementing NHWC support
   // TensorRT: results mismatch
   // ROCm: results mismatch
-  // QNN: conflict with layout transformer, need furture investigation
   test.Run(OpTester::ExpectResult::kExpectSuccess, "",
-           {kCudaExecutionProvider, kTensorrtExecutionProvider, kRocmExecutionProvider, kQnnExecutionProvider});
+           {kCudaExecutionProvider, kTensorrtExecutionProvider, kRocmExecutionProvider});
 }
 
 TEST(ResizeOpTest, NhwcResizeOpLinearDownSampleTest_tf_crop_and_resize_with_extrapolation_uint8) {
@@ -131,7 +130,7 @@ TEST(ResizeOpTest, NhwcResizeOpLinearDownSampleTest_tf_crop_and_resize_with_extr
   test.AddOutput<uint8_t>("Y", {N, static_cast<int64_t>(H * scales[1]), static_cast<int64_t>(W * scales[2]), C}, Y);
   // CUDA: result mismatch due to not implementing NHWC support
   // ROCm: results mismatch
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kRocmExecutionProvider, kQnnExecutionProvider});
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kRocmExecutionProvider});
 }
 
 TEST(ResizeOpTest, NhwcResizeOpLinearDownSampleTest_tf_crop_and_resize_with_extrapolation_int8) {
@@ -159,7 +158,7 @@ TEST(ResizeOpTest, NhwcResizeOpLinearDownSampleTest_tf_crop_and_resize_with_extr
                            10, 10, 10};
 
   test.AddOutput<int8_t>("Y", {N, static_cast<int64_t>(H * scales[1]), static_cast<int64_t>(W * scales[2]), C}, Y);
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kQnnExecutionProvider});
+  test.Run();
 }
 
 TEST(ResizeOpTest, NhwcResizeOpLinearDownSampleTest_tf_crop_and_resize_without_extrapolation_uint8) {
@@ -188,7 +187,7 @@ TEST(ResizeOpTest, NhwcResizeOpLinearDownSampleTest_tf_crop_and_resize_without_e
   test.AddOutput<uint8_t>("Y", {N, static_cast<int64_t>(H * scales[1]), static_cast<int64_t>(W * scales[2]), C}, Y);
   // CUDA: result mismatch due to not implementing NHWC support
   // ROCm: results mismatch
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kRocmExecutionProvider, kQnnExecutionProvider});
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kRocmExecutionProvider});
 }
 
 TEST(ResizeOpTest, NhwcResizeOpLinearDownSampleTest_tf_crop_and_resize_without_extrapolation_int8) {
@@ -215,7 +214,7 @@ TEST(ResizeOpTest, NhwcResizeOpLinearDownSampleTest_tf_crop_and_resize_without_e
                            0, 0, 0};
 
   test.AddOutput<int8_t>("Y", {N, static_cast<int64_t>(H * scales[1]), static_cast<int64_t>(W * scales[2]), C}, Y);
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kQnnExecutionProvider});
+  test.Run();
 }
 
 TEST(ResizeOpTest, ResizeOpLinearDownSampleTest_4DBilinear) {
@@ -261,9 +260,8 @@ TEST(ResizeOpTest, NhwcResizeOpLinearDownSampleTest_4DBilinear) {
   test.AddOutput<float>("Y", {N, static_cast<int64_t>(H * scales[1]), static_cast<int64_t>(W * scales[2]), C}, Y);
   // CUDA: result mismatch due to not implementing NHWC support
   // ROCm: results mismatch
-  // QNN: conflict with layout transformer, need furture investigation
   test.Run(OpTester::ExpectResult::kExpectSuccess, "",
-           {kCudaExecutionProvider, kRocmExecutionProvider, kQnnExecutionProvider});
+           {kCudaExecutionProvider, kRocmExecutionProvider});
 }
 
 TEST(ResizeOpTest, NhwcResizeOpLinearDownSampleTest_4DBilinear_uint8) {
@@ -287,7 +285,7 @@ TEST(ResizeOpTest, NhwcResizeOpLinearDownSampleTest_4DBilinear_uint8) {
   test.AddOutput<uint8_t>("Y", {N, static_cast<int64_t>(H * scales[1]), static_cast<int64_t>(W * scales[2]), C}, Y);
   // CUDA: result mismatch due to not implementing NHWC support
   // ROCm: results mismatch
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kRocmExecutionProvider, kQnnExecutionProvider});
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kRocmExecutionProvider});
 }
 
 TEST(ResizeOpTest, NhwcResizeOpLinearDownSampleTest_4DBilinear_int8) {
@@ -309,7 +307,7 @@ TEST(ResizeOpTest, NhwcResizeOpLinearDownSampleTest_4DBilinear_int8) {
   std::vector<int8_t> Y = {0, 0};
 
   test.AddOutput<int8_t>("Y", {N, static_cast<int64_t>(H * scales[1]), static_cast<int64_t>(W * scales[2]), C}, Y);
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kQnnExecutionProvider});
+  test.Run();
 }
 
 // Since NNAPI(TFLite) only using the scale calculate using the input/output size
@@ -399,7 +397,9 @@ TEST(ResizeOpTest, ResizeOpLinearDownSampleTest_4DBilinear_align_corners) {
     std::vector<float> Y = {1.0f, 4.0f};
 
     test.AddOutput<float>("Y", {N, C, static_cast<int64_t>(H * scales[2]), static_cast<int64_t>(W * scales[3])}, Y);
-    test.Run();
+
+    // QNN: result mismatch ("NaN" instead of 1.0f on QNN CPU backend)
+    test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kQnnExecutionProvider});
   };
 
   run_test(false);
@@ -435,7 +435,7 @@ TEST(ResizeOpTest, NhwcResizeOpLinearDownSampleTest_4DBilinear_align_corners_uin
     test.AddOutput<uint8_t>("Y", {N, static_cast<int64_t>(H * scales[1]), static_cast<int64_t>(W * scales[2]), C}, Y);
     // CUDA: result mismatch due to not implementing NHWC support
     // ROCm: results mismatch
-    test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kRocmExecutionProvider, kQnnExecutionProvider});
+    test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kRocmExecutionProvider});
   };
 
   run_test(false);
@@ -465,7 +465,7 @@ TEST(ResizeOpTest, NhwcResizeOpLinearDownSampleTest_4DBilinear_align_corners_int
 
     test.AddOutput<int8_t>("Y", {N, static_cast<int64_t>(H * scales[1]), static_cast<int64_t>(W * scales[2]), C}, Y);
     // TensorRT: results mismatch
-    test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kQnnExecutionProvider});
+    test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
   };
 
   run_test(false);
@@ -532,7 +532,7 @@ TEST(ResizeOpTest, NhwcResizeOpLinearDownSampleTest_4DBilinear_pytorch_half_pixe
   test.AddOutput<uint8_t>("Y", {N, sizes[1], sizes[2], C}, Y);
   // CUDA: result mismatch due to not implementing NHWC support
   // ROCm: results mismatch
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kRocmExecutionProvider, kQnnExecutionProvider});
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kRocmExecutionProvider});
 }
 
 TEST(ResizeOpTest, NhwcResizeOpLinearDownSampleTest_4DBilinear_pytorch_half_pixel_int8) {
@@ -560,7 +560,7 @@ TEST(ResizeOpTest, NhwcResizeOpLinearDownSampleTest_4DBilinear_pytorch_half_pixe
   std::vector<int8_t> Y = {0, 2, -9};
 
   test.AddOutput<int8_t>("Y", {N, sizes[1], sizes[2], C}, Y);
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kQnnExecutionProvider});  // TensorRT: results mismatch
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  // TensorRT: results mismatch
 }
 
 TEST(ResizeOpTest, ResizeOpLinearUpSampleTest_4DBilinear_asymmetric) {
@@ -641,7 +641,7 @@ TEST(ResizeOpTest, NhwcResizeOpLinearUpSampleTest_4DBilinear_asymmetric_uint8) {
                             Y, false, .0f, 1.0f);
     // CUDA: result mismatch due to not implementing NHWC support
     // ROCm: results mismatch
-    test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kRocmExecutionProvider, kQnnExecutionProvider});
+    test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kRocmExecutionProvider});
   };
 
   run_test(false);
@@ -683,7 +683,7 @@ TEST(ResizeOpTest, NhwcResizeOpLinearUpSampleTest_4DBilinear_asymmetric_int8) {
     test.AddOutput<int8_t>("Y", {N, static_cast<int64_t>(H * scales[1]), static_cast<int64_t>(W * scales[2]), C},
                            Y, false, .0f, 1.0f);
     // TensorRT: results mismatch
-    test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kQnnExecutionProvider});
+    test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
   };
 
   run_test(false);
@@ -780,7 +780,7 @@ TEST(ResizeOpTest, ResizeOpLinearUpSampleTest_5DTrilinear_pytorch_half_pixel) {
 }
 
 TEST(ResizeOpTest, ResizeOpLinearScalesNoOpTest) {
-  // To test NNAPI EP, we need the sclaes/sizes to be in initializers
+  // To test NNAPI EP, we need the scales/sizes to be in initializers
   auto run_test = [](bool scales_in_initializer) {
     OpTester test("Resize", 13);
     std::vector<float> roi{};
@@ -1079,7 +1079,7 @@ TEST(ResizeOpTest, ResizeOpNearestUpSample_Floor_Align_Corners) {
                           13.0f, 13.0f, 13.0f, 14.0f, 14.0f, 15.0f, 15.0f, 16.0f};
 
   test.AddOutput<float>("Y", {N, C, static_cast<int64_t>(H * scales[2]), static_cast<int64_t>(W * scales[3])}, Y);
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kQnnExecutionProvider});  // QNN: result diff
+  test.Run();
 }
 
 TEST(ResizeOpTest, ResizeOpNearest_OneToOneMappingBetweenInputAndOutputDataDims) {
@@ -1887,7 +1887,7 @@ void TestAntialiasing(std::map<std::string, std::string> attributes,
 
   test.AddOutput<T>("Y", output_shape, output_data);
   // TensorRT 8.5 supports operators up to Opset 17. Temporarily exclude TensorRT EP due to accurarcy issue.
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kQnnExecutionProvider});
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
 }
 
 TEST(ResizeOpTest, Antialias_Bilinear_No_ExcludeOutside) {
diff --git a/onnxruntime/test/providers/cpu/tensor/split_op_test.cc b/onnxruntime/test/providers/cpu/tensor/split_op_test.cc
index 7712a0a5bf724..70a43d660decb 100644
--- a/onnxruntime/test/providers/cpu/tensor/split_op_test.cc
+++ b/onnxruntime/test/providers/cpu/tensor/split_op_test.cc
@@ -94,7 +94,7 @@ constexpr T ValueFromIdx(size_t idx) {
 }
 
 template <typename T>
-void SplitTestAxis0EqualSplit(bool use_opset_13 = false) {
+void SplitTestAxis0EqualSplit() {
   SCOPED_TRACE(onnxruntime::MakeString("data type: ", utils::ToTensorProtoElementType<T>()));
 
   constexpr int64_t axis = 0;
@@ -117,11 +117,20 @@ void SplitTestAxis0EqualSplit(bool use_opset_13 = false) {
                      {V(5), V(6),
                       V(7), V(8)}});
 
+  // BFloat16 added in opset 13
+  if constexpr (!std::is_same_v<T, BFloat16>) {
+    RunTest<T>(axis, {}, input, outputs,
+               // TensorRT parser: Assertion failed: axis != BATCH_DIM
+               {kTensorrtExecutionProvider},  // is_tensorrt_supported
+               false,                         // expect_failure
+               false /*split_as_input*/);
+  }
+
   RunTest<T>(axis, {}, input, outputs,
              // TensorRT parser: Assertion failed: axis != BATCH_DIM
              {kTensorrtExecutionProvider},  // is_tensorrt_supported
              false,                         // expect_failure
-             use_opset_13);                 // split_as_input
+             true /*split_as_input*/);
 }
 
 }  // namespace
@@ -130,7 +139,7 @@ TEST(SplitOperatorTest, Axis0EqualSplit) {
   SplitTestAxis0EqualSplit<float>();
   SplitTestAxis0EqualSplit<double>();
   SplitTestAxis0EqualSplit<MLFloat16>();
-  SplitTestAxis0EqualSplit<BFloat16>(true);  // BFloat16 added in opset 13
+  SplitTestAxis0EqualSplit<BFloat16>();
   SplitTestAxis0EqualSplit<int8_t>();
   SplitTestAxis0EqualSplit<int16_t>();
   SplitTestAxis0EqualSplit<int32_t>();
@@ -162,8 +171,11 @@ TEST(SplitOperatorTest, Axis0UnequalSplitFloat) {
                      {3.f, 4.f,
                       5.f, 6.f,
                       7.f, 8.f}});
+
   // TensorRT parser: Assertion failed: axis != BATCH_DIM
   RunTest<float>(axis, splits, input, outputs, {kTensorrtExecutionProvider});
+  // CoreML EP, etc. requires split to be an input. Same applies to below sets of tests.
+  RunTest<float>(axis, splits, input, outputs, {kTensorrtExecutionProvider}, false, true);
 }
 
 TEST(SplitOperatorTest, Axis0UnequalSplitString) {
@@ -186,6 +198,7 @@ TEST(SplitOperatorTest, Axis0UnequalSplitString) {
                       "e", "f",
                       "g", "h"}});
   // TensorRT parser: Assertion failed: axis != BATCH_DIM
+  RunTest<std::string>(axis, splits, input, outputs, {kTensorrtExecutionProvider}, false, true);
   RunTest<std::string>(axis, splits, input, outputs, {kTensorrtExecutionProvider});
 }
 
@@ -205,7 +218,7 @@ TEST(SplitOperatorTest, Axis1EqualSplitFloat) {
   outputs.push_back({{2, 2},
                      {3.f, 4.f,
                       7.f, 8.f}});
-
+  RunTest<float>(axis, {}, input, outputs, {kTensorrtExecutionProvider}, false, true);
   RunTest<float>(axis, {}, input, outputs, {kTensorrtExecutionProvider});
 }
 
@@ -226,6 +239,7 @@ TEST(SplitOperatorTest, Axis1EqualSplitString) {
                      {"c", "d",
                       "g", "h"}});
 
+  RunTest<std::string>(axis, {}, input, outputs, {kTensorrtExecutionProvider}, false, true);
   RunTest<std::string>(axis, {}, input, outputs, {kTensorrtExecutionProvider});
 }
 
@@ -248,6 +262,7 @@ TEST(SplitOperatorTest, Axis1UnequalSplitFloat) {
                      {4.f,
                       8.f}});
 
+  RunTest<float>(axis, splits, input, outputs, {kTensorrtExecutionProvider}, false, true);
   RunTest<float>(axis, splits, input, outputs, {kTensorrtExecutionProvider});
 }
 
@@ -270,6 +285,7 @@ TEST(SplitOperatorTest, Axis1UnequalSplitString) {
                      {"d",
                       "h"}});
 
+  RunTest<std::string>(axis, splits, input, outputs, {kTensorrtExecutionProvider}, false, true);
   RunTest<std::string>(axis, splits, input, outputs, {kTensorrtExecutionProvider});
 }
 
@@ -312,6 +328,7 @@ TEST(SplitOperatorTest, Axis2EqualSplit) {
                       17.f, 18.f,
                       23.f, 24.f}});
 
+  RunTest<float>(axis, {}, input, outputs, {kTensorrtExecutionProvider}, false, true);
   RunTest<float>(axis, {}, input, outputs, {kTensorrtExecutionProvider});
 }
 
@@ -344,6 +361,9 @@ TEST(SplitOperatorTest, Axis2UnequalSplit) {
                       16.f, 17.f, 18.f,
                       22.f, 23.f, 24.f}});
 
+  // Note: temporarily marked qnn ep as excluded when running tests with split_as_input=true.
+  // TODO: Need to resolve to see if it's not supported or test case failure.
+  RunTest<float>(axis, splits, input, outputs, {kTensorrtExecutionProvider, kQnnExecutionProvider}, false, true);
   RunTest<float>(axis, splits, input, outputs, {kTensorrtExecutionProvider});
 }
 
@@ -353,7 +373,7 @@ TEST(SplitOperatorTest, ZeroSizeInput) {
 
   ShapeAndFloatData input = CreateInput<float>({0, 2});
 
-  RunTest<float>(axis, {}, input, outputs, {kTensorrtExecutionProvider, kQnnExecutionProvider});
+  RunTest<float>(axis, {}, input, outputs, {kTensorrtExecutionProvider, kQnnExecutionProvider, kCoreMLExecutionProvider});
 }
 
 // test a split of a dimension that has leading and trailing dimensions
@@ -377,6 +397,7 @@ TEST(SplitOperatorTest, Axis1SplitMiddleDimensionEqually) {
                       25.f, 26.f, 27.f, 28.f,
                       29.f, 30.f, 31.f, 32.f}});
 
+  RunTest<float>(axis, {}, input, outputs, {kTensorrtExecutionProvider}, false, true);
   RunTest<float>(axis, {}, input, outputs, {kTensorrtExecutionProvider});
 }
 
@@ -403,6 +424,7 @@ TEST(SplitOperatorTest, Axis1SplitMiddleDimensionUnequally) {
                       25.f, 26.f, 27.f, 28.f,
                       29.f, 30.f, 31.f, 32.f}});
 
+  RunTest<float>(axis, splits, input, outputs, {kTensorrtExecutionProvider}, false, true);
   RunTest<float>(axis, splits, input, outputs, {kTensorrtExecutionProvider});
 }
 
@@ -423,6 +445,7 @@ TEST(SplitOperatorTest, NegativeAxis) {
                      {3.f, 4.f,
                       7.f, 8.f}});
 
+  RunTest<float>(axis, {}, input, outputs, {kTensorrtExecutionProvider}, false, true);
   RunTest<float>(axis, {}, input, outputs, {kTensorrtExecutionProvider});
 }
 
@@ -439,6 +462,7 @@ TEST(SplitOperatorTest, InvalidAxis) {
 
   outputs.push_back({{1}, {0.f}});
 
+  RunTest<float>(axis, {}, input, outputs, {}, true, true, -1, true, "Invalid value of attribute 'axis'");
   RunTest<float>(axis, {}, input, outputs, {}, true, false, -1, true, "Invalid value of attribute 'axis'");
 }
 
@@ -459,6 +483,8 @@ TEST(SplitOperatorTest, SplitAttributeSumTooSmall) {
   outputs.push_back({{1, 2}, {1.f, 2.f}});
   outputs.push_back({{2, 2}, {3.f, 4.f, 5.f, 6.f}});
 
+  RunTest<float>(axis, splits, input, outputs, {kTensorrtExecutionProvider}, true, true, -1, true,
+                 "[ShapeInferenceError] Mismatch between the sum of 'split'");
   RunTest<float>(axis, splits, input, outputs, {kTensorrtExecutionProvider}, true, false, -1, true,
                  "[ShapeInferenceError] Mismatch between the sum of 'split'");  // TensorRT parser: Assertion failed: axis != BATCH_DIM
 }
@@ -478,6 +504,8 @@ TEST(SplitOperatorTest, InvalidValueInSplitAttribute) {
   outputs.push_back({{1, 2}, {1.f, 2.f}});
   outputs.push_back({{3, 2}, {3.f, 4.f, 5.f, 6.f, 7.f, 8.f}});
 
+  RunTest<float>(axis, splits, input, outputs, {kTensorrtExecutionProvider}, true, true, -1, true,
+                 "[ShapeInferenceError] Mismatch between number of splits");
   RunTest<float>(axis, splits, input, outputs, {kTensorrtExecutionProvider}, true, false, -1, true,
                  "[ShapeInferenceError] Mismatch between number of splits");  // TensorRT parser: Assertion failed: axis != BATCH_DIM
 }
@@ -654,7 +682,8 @@ TEST(SplitOperatorTest, MissingOptionalInputAdded) {
                      {3.f, 4.f,
                       7.f, 8.f}});
 
-  RunTest<float>(axis, {}, input, outputs, {kTensorrtExecutionProvider}, false, true, -1, false, {}, false);
+  // CoreML EP does not support the case when split_is_input==true but missing providing the split as initializer.
+  RunTest<float>(axis, {}, input, outputs, {kTensorrtExecutionProvider, kCoreMLExecutionProvider}, false, true, -1, false, {}, false);
 }
 
 TEST(SplitOperatorTest, Split18_NumOutputs_EvenSplit) {
@@ -677,6 +706,9 @@ TEST(SplitOperatorTest, Split18_NumOutputs_EvenSplit) {
                       7.f, 8.f}});
 
   int64_t num_outputs = 2;
+#ifdef USE_COREML
+  RunTest<float>(axis, {}, input, outputs, {kTensorrtExecutionProvider}, false, true, num_outputs, true);
+#endif
   RunTest<float>(axis, {}, input, outputs, {kTensorrtExecutionProvider}, false, true, num_outputs, false);
 }
 
@@ -703,6 +735,9 @@ TEST(SplitOperatorTest, Split18_NumOutputs_UnevenSplit) {
   outputs.push_back({{1, 2}, {9.f, 10.f}});
 
   int64_t num_outputs = 3;
+#ifdef USE_COREML
+  RunTest<float>(axis, {}, input, outputs, {kTensorrtExecutionProvider, kQnnExecutionProvider}, false, true, num_outputs, true);
+#endif
   RunTest<float>(axis, {}, input, outputs, {kTensorrtExecutionProvider, kQnnExecutionProvider}, false, true, num_outputs, false);
 }
 
@@ -728,6 +763,10 @@ TEST(SplitOperatorTest, Split18_InvalidNumOutputs) {
       };
   RunTest<float>(axis, {}, input, outputs, excluded_providers, true, true, num_outputs, false,
                  "Attribute `num_outputs` value cannot be lower than 1");
+#ifdef USE_COREML
+  RunTest<float>(axis, {}, input, outputs, excluded_providers, true, true, num_outputs, true,
+                 "Attribute `num_outputs` value cannot be lower than 1");
+#endif
 
   outputs.clear();
   outputs.push_back({{1, 2},
@@ -738,6 +777,10 @@ TEST(SplitOperatorTest, Split18_InvalidNumOutputs) {
   num_outputs = 3;
   RunTest<float>(axis, {}, input, outputs, excluded_providers, true, true, num_outputs, false,
                  "Invalid num_outputs value of 3. Size of dimension being split is 2");
+#ifdef USE_COREML
+  RunTest<float>(axis, {}, input, outputs, excluded_providers, true, true, num_outputs, true,
+                 "Invalid num_outputs value of 3. Size of dimension being split is 2");
+#endif
 }
 
 TEST(SplitOperatorTest, Split18_NumOutputsEvenSplitAxis1) {
@@ -755,6 +798,9 @@ TEST(SplitOperatorTest, Split18_NumOutputsEvenSplitAxis1) {
 
   int64_t num_outputs = 3;
   RunTest<float>(axis, {}, input, outputs, {kTensorrtExecutionProvider}, false, true, num_outputs, false);
+#ifdef USE_COREML
+  RunTest<float>(axis, {}, input, outputs, {kTensorrtExecutionProvider}, false, true, num_outputs);
+#endif
 }
 
 TEST(SplitOperatorTest, Split18_NumOutputsUnevenSplitAxis1) {
@@ -772,6 +818,9 @@ TEST(SplitOperatorTest, Split18_NumOutputsUnevenSplitAxis1) {
   outputs.push_back({{2, 1}, {3.f, 6.f}});
 
   int64_t num_outputs = 2;
+#ifdef USE_COREML
+  RunTest<float>(axis, {}, input, outputs, {kTensorrtExecutionProvider, kQnnExecutionProvider}, false, true, num_outputs);
+#endif
   RunTest<float>(axis, {}, input, outputs, {kTensorrtExecutionProvider, kQnnExecutionProvider}, false, true, num_outputs, false);
 }
 
diff --git a/onnxruntime/test/providers/cpu/tensor/transpose_test.cc b/onnxruntime/test/providers/cpu/tensor/transpose_test.cc
index c334e0c5ddcb6..0e7ac5ed2b2f0 100644
--- a/onnxruntime/test/providers/cpu/tensor/transpose_test.cc
+++ b/onnxruntime/test/providers/cpu/tensor/transpose_test.cc
@@ -37,7 +37,7 @@ TEST(TransposeOpTest, PermRankDoesNotMatchTensorRank) {
   // This failure comes from shape inference, because in this case it knows the input dims.
   // But in the real world, the model can supply different input dims at runtime.
   test.Run(OpTester::ExpectResult::kExpectFailure,
-           "Node:node1 Output:Y [ShapeInferenceError] Mismatch between number of source and target dimensions. Source=3 Target=4");
+           "Node:node1 Output:Y [ShapeInferenceError] Mismatch between number of inferred and declared dimensions. inferred=3 declared=4");
 }
 
 // Some of the tests can't run on TensorrtExecutionProvider because of errors.
diff --git a/onnxruntime/test/providers/cpu/tensor/unsqueeze_op_test.cc b/onnxruntime/test/providers/cpu/tensor/unsqueeze_op_test.cc
index 2120da604f94a..d2aa5dd428fec 100644
--- a/onnxruntime/test/providers/cpu/tensor/unsqueeze_op_test.cc
+++ b/onnxruntime/test/providers/cpu/tensor/unsqueeze_op_test.cc
@@ -99,7 +99,7 @@ TEST(TensorOpTest, Unsqueeze_scalar_2) {
     test.AddInput<float>("input", {}, std::vector<float>{1.0f});
     test.AddInput<int64_t>("axes", {2}, std::vector<int64_t>{0, -1}, axes_is_initializer);
     test.AddOutput<float>("output", {1, 1}, std::vector<float>{1.0f});
-    test.Run();
+    test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
   };
   run_test(false);
   run_test(true);
diff --git a/onnxruntime/test/providers/cuda/nhwc/conv_test.cc b/onnxruntime/test/providers/cuda/nhwc/conv_test.cc
new file mode 100644
index 0000000000000..13d4546d669e3
--- /dev/null
+++ b/onnxruntime/test/providers/cuda/nhwc/conv_test.cc
@@ -0,0 +1,74 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
+// Licensed under the MIT License.
+
+#include "test/providers/cuda/nhwc/nhwc_cuda_helper.h"
+
+namespace onnxruntime {
+namespace test {
+
+template <typename T>
+struct ConvOp {
+  const std::vector<int64_t> input_dims;
+  const std::vector<int64_t> kernel_shape;
+  int64_t channels;
+  int64_t group = 1;
+  bool bias = false;
+  std::vector<int64_t> strides = {1, 1};
+  std::vector<int64_t> padding = {0, 0, 0, 0};
+  std::vector<int64_t> dilations = {1, 1};
+
+  std::unique_ptr<CompareOpTester> get_test() {
+    RandomValueGenerator random{};
+
+    auto test = std::make_unique<CompareOpTester>("Conv", 11);  // internal NHWC domain starts at opset 11
+    std::vector<T> input_data = random.Uniform<T>(input_dims, 0.0f, 1.0f);
+
+    std::vector<int64_t> weight_dims{channels, input_dims[1] / group, kernel_shape[0], kernel_shape[1]};
+    std::vector<T> weight_data = random.Uniform<T>(weight_dims, -0.4f, 0.4f);
+
+    test->AddInput<T>("X", input_dims, input_data);
+    test->AddInput<T>("W", weight_dims, weight_data, true);
+    if (bias) {
+      std::vector<int64_t> bias_dims{channels};
+      std::vector<T> bias_data = random.Uniform<T>(bias_dims, 0.2f, 0.4f);
+      test->AddInput<T>("B", bias_dims, bias_data, true);
+    }
+    test->AddAttribute("group", group);
+    test->AddAttribute("kernel_shape", kernel_shape);
+    test->AddAttribute("strides", strides);
+    test->AddAttribute("dilations", dilations);
+    test->AddAttribute("pads", padding);
+
+    std::vector<int64_t> output_dims = {
+        input_dims[0], channels,
+        ComputeOutputShape(input_dims[2], strides[0], kernel_shape[0], dilations[0], padding[0], padding[1]),
+        ComputeOutputShape(input_dims[3], strides[1], kernel_shape[1], dilations[1], padding[2], padding[3])};
+    std::vector<T> output_data = FillZeros<T>(output_dims);
+
+    test->AddOutput<T>("Y", output_dims, output_data);
+    return test;
+  }
+};
+
+TYPED_TEST(CudaNhwcTypedTest, ConvNhwcBias) {
+  auto op = ConvOp<TypeParam>{.input_dims = {1, 16, 64, 64}, .kernel_shape = {3, 3}, .channels = 16, .bias = true};
+
+  MAKE_PROVIDERS_EPS_TYPE(TypeParam)
+}
+
+TYPED_TEST(CudaNhwcTypedTest, ConvNhwcGroupNoBias) {
+  auto op = ConvOp<TypeParam>{.input_dims = {1, 16, 64, 64}, .kernel_shape = {3, 3}, .channels = 16, .group = 4};
+
+  MAKE_PROVIDERS_EPS_TYPE(TypeParam)
+}
+
+TYPED_TEST(CudaNhwcTypedTest, ConvNhwcPadding) {
+  auto op =
+      ConvOp<TypeParam>{.input_dims = {2, 4, 64, 64}, .kernel_shape = {3, 3}, .channels = 4, .padding = {4, 4, 4, 4}};
+
+  MAKE_PROVIDERS_EPS_TYPE(TypeParam)
+}
+
+}  // namespace test
+}  // namespace onnxruntime
diff --git a/onnxruntime/test/providers/cuda/nhwc/conv_transpose_test.cc b/onnxruntime/test/providers/cuda/nhwc/conv_transpose_test.cc
new file mode 100644
index 0000000000000..06da2a5304716
--- /dev/null
+++ b/onnxruntime/test/providers/cuda/nhwc/conv_transpose_test.cc
@@ -0,0 +1,97 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
+// Licensed under the MIT License.
+
+#include "test/providers/cuda/nhwc/nhwc_cuda_helper.h"
+
+namespace onnxruntime {
+namespace test {
+
+template <typename T>
+struct ConvTransposeOp {
+  const std::vector<int64_t> input_dims;
+  const std::vector<int64_t> kernel_shape;
+  int64_t channels;
+  int64_t group = 1;
+  bool bias = false;
+  std::vector<int64_t> strides = {1, 1};
+  std::vector<int64_t> padding = {0, 0, 0, 0};
+  std::vector<int64_t> output_padding = {0, 0, 0, 0};
+  std::vector<int64_t> dilations = {1, 1};
+
+  std::unique_ptr<CompareOpTester> get_test() {
+    RandomValueGenerator random{};
+
+    auto test = std::make_unique<CompareOpTester>("ConvTranspose", 14);
+    std::vector<T> input_data = random.Uniform<T>(input_dims, 0.0f, 1.0f);
+
+    std::vector<int64_t> weight_dims{input_dims[1], channels / group, kernel_shape[0], kernel_shape[1]};
+    std::vector<T> weight_data = random.Uniform<T>(weight_dims, -0.4f, 0.4f);
+
+    test->AddInput<T>("X", input_dims, input_data);
+    test->AddInput<T>("W", weight_dims, weight_data, true);
+    if (bias) {
+      std::vector<int64_t> bias_dims{channels};
+      std::vector<T> bias_data = random.Uniform<T>(bias_dims, 0.2f, 0.4f);
+      test->AddInput<T>("B", bias_dims, bias_data, true);
+    }
+    test->AddAttribute("group", group);
+    test->AddAttribute("kernel_shape", kernel_shape);
+    test->AddAttribute("strides", strides);
+    test->AddAttribute("dilations", dilations);
+    test->AddAttribute("pads", padding);
+    if (!output_padding.empty()) {
+      test->AddAttribute("output_padding", output_padding);
+    } else {
+      output_padding = {0, 0, 0, 0};
+    }
+
+    std::vector<int64_t> output_dims = {
+        input_dims[0], channels,
+        (kernel_shape[1] - 1) * dilations[1] + (input_dims[2] - 1) * strides[1] - (padding[1] + padding[0]) + 1 +
+            output_padding[2],
+        (kernel_shape[0] - 1) * dilations[0] + (input_dims[3] - 1) * strides[0] - (padding[3] + padding[2]) + 1 +
+            output_padding[3]};
+    std::vector<T> output_data = FillZeros<T>(output_dims);
+
+    test->AddOutput<T>("Y", output_dims, output_data);
+    return test;
+  }
+};
+
+TYPED_TEST(CudaNhwcTypedTest, ConvTransposeNhwcGroupNoBias) {
+  auto op =
+      ConvTransposeOp<TypeParam>{.input_dims = {8, 8, 32, 32}, .kernel_shape = {3, 3}, .channels = 16, .group = 4};
+
+  MAKE_PROVIDERS_EPS_TYPE(TypeParam)
+}
+
+TYPED_TEST(CudaNhwcTypedTest, ConvTransposeNhwcBias) {
+  auto op =
+      ConvTransposeOp<TypeParam>{.input_dims = {1, 8, 80, 80}, .kernel_shape = {5, 5}, .channels = 16, .bias = true};
+
+  MAKE_PROVIDERS_EPS_TYPE(TypeParam)
+}
+
+TYPED_TEST(CudaNhwcTypedTest, ConvTransposeNhwcPad) {
+  auto op = ConvTransposeOp<TypeParam>{.input_dims = {1, 16, 8, 8},
+                                       .kernel_shape = {3, 3},
+                                       .channels = 32,
+                                       .padding = {2, 2, 2, 2},
+                                       .output_padding = {}};
+
+  MAKE_PROVIDERS_EPS_TYPE(TypeParam)
+}
+
+TYPED_TEST(CudaNhwcTypedTest, ConvTransposeNhwcOutPad) {
+  auto op = ConvTransposeOp<TypeParam>{.input_dims = {1, 32, 8, 8},
+                                       .kernel_shape = {3, 3},
+                                       .channels = 32,
+                                       .strides = {2, 2},
+                                       .output_padding = {1, 1, 1, 1}};
+
+  MAKE_PROVIDERS_EPS_TYPE(TypeParam)
+}
+
+}  // namespace test
+}  // namespace onnxruntime
diff --git a/onnxruntime/test/providers/cuda/nhwc/nhwc_cuda_helper.h b/onnxruntime/test/providers/cuda/nhwc/nhwc_cuda_helper.h
new file mode 100644
index 0000000000000..2c942bb790096
--- /dev/null
+++ b/onnxruntime/test/providers/cuda/nhwc/nhwc_cuda_helper.h
@@ -0,0 +1,47 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
+// Licensed under the MIT License.
+
+#include <vector>
+#include <utility>
+#include <memory>
+
+#include "core/providers/cuda/cuda_provider_options.h"
+#include "core/providers/common.h"
+
+#include "test/providers/compare_provider_test_utils.h"
+#include "test/common/cuda_op_test_utils.h"
+
+#include "gtest/gtest.h"
+
+#define MAKE_PROVIDERS_EPS(eps)                                           \
+  std::vector<std::shared_ptr<IExecutionProvider>> execution_providers;   \
+  OrtCUDAProviderOptionsV2 nhwc = {.prefer_nhwc = true};                  \
+  execution_providers.push_back(CudaExecutionProviderWithOptions(&nhwc)); \
+                                                                          \
+  double error_tolerance = eps;                                           \
+  OrtCUDAProviderOptionsV2 nchw = {.prefer_nhwc = false};                 \
+  auto source_ep = CudaExecutionProviderWithOptions(&nchw);               \
+  auto test = op.get_test();                                              \
+  test->CompareEPs(std::move(source_ep), execution_providers, error_tolerance);
+
+#define MAKE_PROVIDERS() MAKE_PROVIDERS_EPS(1e-3)
+
+#define MAKE_PROVIDERS_EPS_TYPE(T)             \
+  if (std::is_same<T, MLFloat16>::value) {     \
+    MAKE_PROVIDERS_EPS(2e-2)                   \
+  } else if (std::is_same<T, double>::value) { \
+    MAKE_PROVIDERS_EPS(2e-4)                   \
+  } else {                                     \
+    MAKE_PROVIDERS_EPS(2e-3)                   \
+  }
+namespace onnxruntime {
+namespace test {
+
+template <typename T>
+class CudaNhwcTypedTest : public ::testing::Test {};
+
+using CudaNhwcTestTypes = ::testing::Types<float, MLFloat16>;  // double,
+TYPED_TEST_SUITE(CudaNhwcTypedTest, CudaNhwcTestTypes);
+}  // namespace test
+}  // namespace onnxruntime
diff --git a/onnxruntime/test/providers/cuda/nhwc/norm_test.cc b/onnxruntime/test/providers/cuda/nhwc/norm_test.cc
new file mode 100644
index 0000000000000..52da8ba557c2d
--- /dev/null
+++ b/onnxruntime/test/providers/cuda/nhwc/norm_test.cc
@@ -0,0 +1,51 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
+// Licensed under the MIT License.
+
+#include "test/providers/cuda/nhwc/nhwc_cuda_helper.h"
+
+namespace onnxruntime {
+namespace test {
+
+template <typename T>
+struct BatchNormOp {
+  const std::vector<int64_t> input_dims;
+
+  std::unique_ptr<CompareOpTester> get_test() {
+    // create rand inputs
+    RandomValueGenerator random{};
+
+    auto test = std::make_unique<CompareOpTester>("BatchNormalization", 14);
+    std::vector<T> input_data = random.Uniform<T>(input_dims, 0.0f, 0.3f);
+    auto channels = input_dims[1];
+    test->AddInput<T>("X", input_dims, input_data);
+
+    std::vector<int64_t> bias_dims{channels};
+    std::vector<T> bias_data = random.Uniform<T>(bias_dims, 0.2f, 1.0f);
+    test->AddInput<T>("B", bias_dims, bias_data);
+    // we simply gonna reuse the bias data here.
+    test->AddInput<T>("scale", bias_dims, bias_data);
+
+    std::vector<int64_t> mean{channels};
+    std::vector<T> mean_data = random.Uniform<T>(mean, 0.7f, 0.8f);
+    test->AddInput<T>("input_mean", bias_dims, bias_data);
+    std::vector<int64_t> var{channels};
+    std::vector<T> var_data = random.Uniform<T>(var, 0.0f, 0.1f);
+    test->AddInput<T>("input_var", bias_dims, bias_data);
+
+    std::vector<T> output_data = FillZeros<T>(input_dims);
+    test->AddOutput<T>("Y", input_dims, output_data);
+    return test;
+  }
+};
+
+TYPED_TEST(CudaNhwcTypedTest, BatchNormNhwc) {
+  auto op = BatchNormOp<TypeParam>{
+      .input_dims = {4, 16, 64, 64},
+  };
+
+  MAKE_PROVIDERS()
+}
+
+}  // namespace test
+}  // namespace onnxruntime
diff --git a/onnxruntime/test/providers/cuda/nhwc/pool_test.cc b/onnxruntime/test/providers/cuda/nhwc/pool_test.cc
new file mode 100644
index 0000000000000..e0d59901da80c
--- /dev/null
+++ b/onnxruntime/test/providers/cuda/nhwc/pool_test.cc
@@ -0,0 +1,95 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) 2023 NVIDIA Corporation.
+// Licensed under the MIT License.
+
+#include "test/providers/cuda/nhwc/nhwc_cuda_helper.h"
+
+namespace onnxruntime {
+namespace test {
+
+template <typename T>
+struct PoolOp {
+  const std::string pooling_type;
+  const std::vector<int64_t> input_dims;
+  const std::vector<int64_t> kernel_shape;
+  int64_t channels;
+  int64_t group = 1;
+  std::vector<int64_t> strides = {1, 1};
+  std::vector<int64_t> padding = {0, 0, 0, 0};
+
+  std::unique_ptr<CompareOpTester> get_test() {
+    RandomValueGenerator random{};
+
+    auto test = std::make_unique<CompareOpTester>(pooling_type.c_str(), 14);
+    std::vector<T> input_data = random.Uniform<T>(input_dims, 0.0f, 0.3f);
+
+    test->AddInput<T>("X", input_dims, input_data);
+
+    test->AddAttribute("kernel_shape", kernel_shape);
+    test->AddAttribute("strides", strides);
+    test->AddAttribute("pads", padding);
+
+    std::vector<int64_t> output_dims = {
+        input_dims[0], channels,
+        (input_dims[2] - (kernel_shape[0] - 1) + padding[1] + padding[0] - 1) / strides[0] + 1,
+        (input_dims[3] - (kernel_shape[1] - 1) + padding[3] + padding[2] - 1) / strides[1] + 1};
+    std::vector<T> output_data = FillZeros<T>(output_dims);
+
+    test->AddOutput<T>("Y", output_dims, output_data);
+    return test;
+  }
+};
+
+TYPED_TEST(CudaNhwcTypedTest, AveragePoolNhwc) {
+  auto op = PoolOp<TypeParam>{
+      .pooling_type = "AveragePool",
+      .input_dims = {1, 16, 64, 64},
+      .kernel_shape = {3, 3},
+      .channels = 16,
+  };
+  MAKE_PROVIDERS()
+}
+
+TYPED_TEST(CudaNhwcTypedTest, MaxPoolNhwc) {
+  auto op = PoolOp<TypeParam>{
+      .pooling_type = "MaxPool",
+      .input_dims = {1, 16, 64, 64},
+      .kernel_shape = {3, 3},
+      .channels = 16,
+  };
+  MAKE_PROVIDERS()
+}
+
+TYPED_TEST(CudaNhwcTypedTest, GlobalMaxPoolNhwc) {
+  RandomValueGenerator random{};
+  auto test = std::make_unique<CompareOpTester>("GlobalMaxPool", 14);
+  const std::vector<int64_t> input_dims = {4, 16, 4, 8};
+  std::vector<TypeParam> input_data = random.Uniform<TypeParam>(input_dims, 0.5f, 1.3f);
+  test->AddInput<TypeParam>("X", input_dims, input_data);
+
+  std::vector<int64_t> output_dims = {input_dims[0], input_dims[1], 1, 1};
+  std::vector<TypeParam> output_data = FillZeros<TypeParam>(output_dims);
+  test->AddOutput<TypeParam>("Y", output_dims, output_data);
+
+  std::vector<std::shared_ptr<IExecutionProvider>> execution_providers;
+  OrtCUDAProviderOptionsV2 nhwc = {.prefer_nhwc = true};
+  execution_providers.push_back(CudaExecutionProviderWithOptions(&nhwc));
+
+  double error_tolerance = 1e-3;
+  OrtCUDAProviderOptionsV2 nchw = {.prefer_nhwc = false};
+  auto source_ep = CudaExecutionProviderWithOptions(&nchw);
+  test->CompareEPs(std::move(source_ep), execution_providers, error_tolerance);
+}
+
+TYPED_TEST(CudaNhwcTypedTest, AveragePoolNhwcPad) {
+  auto op = PoolOp<TypeParam>{.pooling_type = "AveragePool",
+                              .input_dims = {1, 16, 64, 64},
+                              .kernel_shape = {3, 3},
+                              .channels = 16,
+                              .padding = {2, 2, 2, 2}};
+
+  MAKE_PROVIDERS()
+}
+
+}  // namespace test
+}  // namespace onnxruntime
diff --git a/onnxruntime/test/providers/cuda/test_cases/blkq4_fp16_sm80_prepack_test.cc b/onnxruntime/test/providers/cuda/test_cases/blkq4_fp16_sm80_prepack_test.cc
new file mode 100644
index 0000000000000..aba2b0b2cb4a4
--- /dev/null
+++ b/onnxruntime/test/providers/cuda/test_cases/blkq4_fp16_sm80_prepack_test.cc
@@ -0,0 +1,507 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include <random>
+
+#include "core/framework/float16.h"
+#include "core/mickey/blk_q4/prepack_sm80.h"
+#include "core/mlas/inc/mlas_q4.h"
+
+#include "gtest/gtest.h"
+
+namespace onnxruntime {
+namespace test {
+
+void prepack_weights_ref(
+    int rows,
+    int columns,
+    const MatrixRef<uint8_t const, ColumnMajorLayout, true>& tensor_weight,
+    const MatrixRef<uint8_t, ColumnMajorLayout, true>& tensor_weight_prepacked) {
+  EXPECT_TRUE(tensor_weight.shape()[0] == rows / 2 && tensor_weight.shape()[1] == columns);
+  EXPECT_TRUE(tensor_weight_prepacked.shape()[0] == rows && tensor_weight_prepacked.shape()[1] == columns / 2);
+
+  auto t0_base = make_Position(0, 0);
+  auto t1_base = make_Position(4, 0);
+  auto t2_base = make_Position(0, 8);
+  auto t3_base = make_Position(4, 8);
+  for (int col_dtile = 0; col_dtile < columns / 16; ++col_dtile) {
+    for (int row_dtile = 0; row_dtile < rows / 16; ++row_dtile) {
+      // Packing from a 8x16 tile to a 16x8 tile
+      auto dtile_base = make_Position(row_dtile * 8, col_dtile * 16);
+      auto packed_tile_base = make_Position(row_dtile * 16, col_dtile * 8);
+      for (int col = 0; col < 8; ++col) {
+        for (int row = 0; row < 4; ++row) {
+          auto cord = make_Position(row, col);
+          auto packed_cord = packed_tile_base + make_Position(row * 4, col);  // packed tile is 16x8
+          uint8_t buf[4];
+          buf[0] = tensor_weight.at(dtile_base + t0_base + cord);
+          buf[1] = tensor_weight.at(dtile_base + t1_base + cord);
+          buf[2] = tensor_weight.at(dtile_base + t2_base + cord);
+          buf[3] = tensor_weight.at(dtile_base + t3_base + cord);
+
+          // [0, 1, 2, 3, 4, 5, 6, 7] => [0, 2, 4, 6, 1, 3, 5, 7] so that each pair of adjacent weights
+          // are in different b16 register at the same positions. This makes it easier to convert to
+          // fp16x2 format in a b32 register
+
+          tensor_weight_prepacked.at(packed_cord) = (buf[0] & 0x0f) | ((buf[1] & 0x0f) << 4);
+          tensor_weight_prepacked.at(packed_cord + make_Position(1, 0)) = (buf[2] & 0x0f) | ((buf[3] & 0x0f) << 4);
+          tensor_weight_prepacked.at(packed_cord + make_Position(2, 0)) = ((buf[0] & 0xf0) >> 4) | (buf[1] & 0xf0);
+          tensor_weight_prepacked.at(packed_cord + make_Position(3, 0)) = ((buf[2] & 0xf0) >> 4) | (buf[3] & 0xf0);
+        }
+      }
+    }
+  }
+}
+
+template <
+    typename ScaleElementT,
+    typename Layout,
+    typename QuantBlocking>
+void prepack_quant_scales_ref(
+    int rows,
+    int columns,
+    const MatrixRef<ScaleElementT const, Layout, true>& tensor_scale,
+    const MatrixRef<ScaleElementT, Layout, true>& tensor_scale_prepacked) {
+  EXPECT_TRUE(tensor_scale.shape()[0] == (rows / QuantBlocking::kRow) && tensor_scale.shape()[1] == (columns / QuantBlocking::kColumn));
+  EXPECT_TRUE(tensor_scale_prepacked.shape() == tensor_scale.shape());
+
+  // Only prepacking scale and offset tensors for a often used special case:
+  //    16b gemm (2 elements per 32b register, operand tile shape 8x8)
+  //    2 B operand tiles per mma instruction stacked on k dimension
+  //    (1,n) quantization blocking
+  if constexpr (sizeof(ScaleElementT) == 2 && QuantBlocking::kRow == 1) {
+    // In Ampere tensor op, each operand B tile is 8 x 8, in a warp of 32 threads, each thread
+    // holds a fragment of the tile containing 2 elements in the k dimension. Most often we use
+    // mma instruction shape of 16x8x16, which means 2 B tiles are stacked in the k dimension,
+    // as shown below (T stands for thread):
+    // T0, T4, T8, T12
+    // T1, T5, T9, T13
+    // T2, T6, T10, T14
+    // T3, T7, T11, T15
+    // T0, T4, T8, T12
+    // T1, T5, T9, T13
+    // T2, T6, T10, T14
+    // T3, T7, T11, T15
+    //
+    // We need to deliver quantization scale and offset elements to the corresponding threads,
+    // so we can perform dequantization efficiently. With a column major layout, each thread
+    // needs two separate loads for a mma instruction, due to the tile fragment layout shown
+    // above. To reduce the number of loads, we rearrange each column as below, so we can use
+    // a single load to load fragments for two tiles:
+    // T0        T0
+    // T1        T0
+    // T2        T1
+    // T3   =>   T1
+    // T0        T2
+    // T1        T2
+    // T2        T3
+    // T3        T3
+
+    for (int col = 0; col < tensor_scale.shape()[1]; ++col) {
+      for (int row_blk = 0; row_blk < tensor_scale.shape()[0]; row_blk += 16) {
+        for (int thread_id = 0; thread_id < 4; thread_id++) {
+          const int dst_idx = row_blk + thread_id * 4;
+          const int src_idx = row_blk + thread_id * 2;
+          tensor_scale_prepacked.at(dst_idx + 0, col) = tensor_scale.at(src_idx + 0, col);
+          tensor_scale_prepacked.at(dst_idx + 1, col) = tensor_scale.at(src_idx + 1, col);
+          tensor_scale_prepacked.at(dst_idx + 2, col) = tensor_scale.at(src_idx + 8, col);
+          tensor_scale_prepacked.at(dst_idx + 3, col) = tensor_scale.at(src_idx + 9, col);
+        }
+      }
+    }
+  } else {
+    // In all other cases, we don't prepack scale or offset
+    FAIL() << "Scale prepack only supported for 16b gemm with (1,n) quantization blocking";
+  }
+}
+
+template <typename Layout, typename QuantBlocking>
+void prepack_quant_offsets_ref(
+    size_t rows,
+    size_t columns,
+    MatrixRef<uint8_t const, Layout, true> tensor_offset,
+    MatrixRef<uint8_t, Layout, true> tensor_offset_prepacked) {
+  // EXPECT_TRUE(tensor_offset.shape()[0] == (rows / QuantBlocking::kRow) && tensor_offset.shape()[1] == (columns / QuantBlocking::kColumn));
+  EXPECT_TRUE(tensor_offset_prepacked.shape() == tensor_offset.shape());
+
+  // Only prepacking scale and offset tensors for a often used special case:
+  //    16b gemm (2 elements per 32b register, operand tile shape 8x8)
+  //    2 B operand tiles per mma instruction stacked on k dimension
+  //    (1,n) quantization blocking
+  if constexpr (QuantBlocking::kRow != 1) {
+    FAIL() << "Offsets prepack only supported for 16b gemm with (1,n) quantization blocking";
+  }
+  // In Ampere tensor op, each operand B tile is 8 x 8, in a warp of 32 threads, each thread
+  // holds a fragment of the tile containing 2 elements in the k dimension. Most often we use
+  // mma instruction shape of 16x8x16, which means 2 B tiles are stacked in the k dimension,
+  // as shown below (T stands for thread):
+  // T0, T4, T8, T12
+  // T1, T5, T9, T13
+  // T2, T6, T10, T14
+  // T3, T7, T11, T15
+  // T0, T4, T8, T12
+  // T1, T5, T9, T13
+  // T2, T6, T10, T14
+  // T3, T7, T11, T15
+  //
+  // We need to deliver quantization scale and offset elements to the corresponding threads,
+  // so we can perform dequantization efficiently. With a column major layout, each thread
+  // needs two separate loads for a mma instruction, due to the tile fragment layout shown
+  // above. To reduce the number of loads, we rearrange each column as below, so we can use
+  // a single load to load fragments for two tiles:
+  // T0        T0
+  // T1        T0
+  // T2        T1
+  // T3   =>   T1
+  // T0        T2
+  // T1        T2
+  // T2        T3
+  // T3        T3
+  if (tensor_offset_prepacked.good()) {
+    for (int col = 0; col < tensor_offset.shape()[1]; ++col) {
+      for (int row_blk = 0; row_blk < tensor_offset.shape()[0]; row_blk += 16) {
+        for (int thread_id = 0; thread_id < 4; thread_id++) {
+          const int dst_idx = row_blk + thread_id * 4;
+          const int src_idx = row_blk + thread_id * 2;
+          // [a, b, c, d] => [a, c, b, d] so that adjacent weights are in their own
+          // 16b element: [a, x, b, x] and [x, c, x, d], which makes it easier to
+          // convert to fp16x2 format in a b32 register
+          tensor_offset_prepacked.at(dst_idx + 0, col) = tensor_offset.at(src_idx + 0, col);
+          tensor_offset_prepacked.at(dst_idx + 1, col) = tensor_offset.at(src_idx + 8, col);
+          tensor_offset_prepacked.at(dst_idx + 2, col) = tensor_offset.at(src_idx + 1, col);
+          tensor_offset_prepacked.at(dst_idx + 3, col) = tensor_offset.at(src_idx + 9, col);
+        }
+      }
+    }
+  }
+}
+
+template <bool ColumnMajorQuantBlocking>
+void testPrepack(int rows, int columns, bool has_offset = true) {
+  using ElementT = MLFloat16;
+  constexpr int block_size = 32;
+  using Base = onnxruntime::cuda::BlockwiseQuantization<
+      ElementT,
+      block_size,
+      4,
+      ColumnMajorQuantBlocking>;
+
+  using QuantBlocking = typename Base::QuantBlocking;
+  using ElementW = typename Base::ElementW;
+  using LayoutWPack = typename Base::LayoutWPack;
+  using ElementQOffset = typename Base::ElementQOffset;
+  using LayoutQmeta = typename Base::LayoutQmeta;
+
+  unsigned int seed = 28571;  // Replace with desired seed value
+  std::seed_seq seq{seed};
+  std::mt19937 gen(seq);
+  std::uniform_int_distribution<> dis(0, 8192);
+
+  const auto q_weight_shape = Base::get_quant_weights_shape(rows, columns);
+  const auto meta_shape = Base::get_quant_meta_shape(rows, columns);
+
+  //
+  // For testing quantization and dequantization, it is not straight
+  // forward to avoid flaky tests due to rounding errors. The way we
+  // try to achieve this is to:
+  // 1. Generate a set of quantized weights, scales and offsets
+  // 2. Dequantize the weights
+  // 3. Quantize the dequantized weights
+  // 4. Compare the dequantied-and-then-quantized weights with
+  //    the original quantized weights
+  //
+  // Random filling of the initial values are key to get this right.
+  // For weights, we must ensure each block gets a full range of
+  // values, i.e. must contain 0 and 15. And for scales, they must
+  // all be positive.
+  //
+
+  std::vector<ElementW> q_weights(q_weight_shape.product());
+  MatrixRef<ElementW, LayoutWPack, true> tensor_q_weight(
+      q_weights, make_Position(rows / 2, columns));
+  int v = 7;
+  for (int c = 0; c < tensor_q_weight.shape()[1]; c++) {
+    for (int r = 0; r < tensor_q_weight.shape()[0]; ++r) {
+      uint8_t v0 = static_cast<uint8_t>(v);
+      v = (v + 5) % 16;
+      if (v == 11 || v == 7 || v == 3) {
+        // making the cycle 13 instead of 16, avoiding same values in a row
+        v = (v + 5) % 16;
+      }
+      uint8_t v1 = 0;
+      if (r + 1 < rows) {
+        v1 = static_cast<uint8_t>(v);
+        v = (v + 5) % 16;
+        if (v == 11 || v == 7 || v == 3) {
+          // making the cycle 13 instead of 16, avoiding same values in a row
+          v = (v + 5) % 16;
+        }
+      }
+
+      tensor_q_weight.at(r, c) = ElementW((v1 << 4) | v0);
+    }
+  }
+
+  std::vector<ElementT> q_scales(meta_shape.product());
+  for (size_t i = 0; i < q_scales.size(); i++) {
+    q_scales[i] = ElementT(((dis(gen) % 127) + 1) / 32.0f);
+  }
+  MatrixRef<ElementT, LayoutQmeta, true> tensor_scale(
+      q_scales, meta_shape);
+
+  std::vector<ElementQOffset> q_zp(meta_shape.product());
+  for (size_t i = 0; i < q_zp.size(); i++) {
+    q_zp[i] = dis(gen) % 16;
+  }
+  MatrixRef<ElementQOffset, LayoutQmeta, true> tensor_offset(
+      q_zp, meta_shape);
+
+#if 0  // debug
+  // Fill tensor_q_weight with the patterned data, easier to debug with print
+  int loop_val = 0;
+  int offset = 3;
+  for (int col_tile = 0; col_tile < tensor_q_weight.extent().column()/8; ++col_tile) {
+    for (int row_tile = 0; row_tile < tensor_q_weight.extent().row()/4; ++row_tile) {
+      for (int col = 0; col < 8; ++col) {
+        for (int row = 0; row < 4; ++row) {
+          auto weight_cord = cutlass::make_Coord(row_tile * 4 + row, col_tile * 8 + col);
+          auto val = (loop_val + offset) % 256;
+          tensor_q_weight.at(weight_cord) = ElementW(val);
+          loop_val++;
+          if (loop_val == 256) {
+            loop_val = 0;
+            offset += 11;
+          }
+        }
+      }
+    }
+  }
+  for (int col = 0; col < tensor_scale.extent().column(); ++col){
+    int c =  col * QuantBlocking::kColumn;
+    for (int row = 0; row < tensor_scale.extent().row(); ++row){
+      int r = row * QuantBlocking::kRow;
+      auto weight_cord = cutlass::make_Coord(r/2, c);
+      int w = 0;
+      if (r % 2 == 0) {
+        w = int(tensor_q_weight.at(weight_cord) & 0x0f);
+      } else {
+        w = int(tensor_q_weight.at(weight_cord) >> 4);
+      }
+      tensor_scale.at({row, col}) = w;
+      tensor_offset.at({row, col}) = ElementQOffset(w);
+    }
+  }
+
+  int fill_val = -512;
+  int factor = 1;
+  for (int col = 0; col < tensor_scale.extent().column(); ++col){
+    for (int row = 0; row < tensor_scale.extent().row(); ++row){
+      tensor_scale.at({row, col}) = ElementQScale((float)fill_val * float(factor));
+      fill_val++;
+      if (fill_val == 512) {
+        fill_val = -512;
+        factor += 1;
+      }
+    }
+  }
+
+#endif  // debug
+
+  std::vector<ElementT> dequants(rows * columns);
+  MatrixRef<ElementT, RowMajorLayout> tensor_dequant(dequants, make_Position(rows, columns));
+
+  // Dequantize weights and save into matrix B for reference
+  for (int col = 0; col < tensor_dequant.shape()[1]; ++col) {
+    for (int row = 0; row < tensor_dequant.shape()[0]; ++row) {
+      auto weight_cord = make_Position(row / 2, col);
+      auto scale_cord = make_Position(row / QuantBlocking::kRow, col / QuantBlocking::kColumn);
+      const uint8_t offset = has_offset ? tensor_offset.at(scale_cord) : 8;
+      int w = 0;
+      if (row % 2 == 0) {
+        w = int(tensor_q_weight.at(weight_cord) & 0x0f);
+      } else {
+        w = int(tensor_q_weight.at(weight_cord) >> 4);
+      }
+      float scale = float(tensor_scale.at(scale_cord));
+      float dequant = scale * float(w - offset);
+      tensor_dequant.at(row, col) = ElementT(dequant);
+      // Prints for help debugging in case of test failure
+      // fprintf(stderr, "(%2d,%2d)= %2d, %2d, %f, %f\n", row, col, w, offset, scale, dequant);
+    }
+  }
+
+  int q_rows, q_cols;
+  MlasBlockwiseQuantizedShape<ElementT, 4>(
+      block_size, ColumnMajorQuantBlocking, rows, columns, q_rows, q_cols);
+  // to be exact, q_rows are padded to multiple of block_size, deal with it when we care about strange shapes
+  EXPECT_EQ(q_rows, q_weight_shape[0]);
+  EXPECT_EQ(q_cols, q_weight_shape[1]);
+
+  //
+  // Quantization tool outputs:
+  //
+  std::vector<ElementW> o_elements(q_rows * q_cols);
+  MatrixRef<ElementW, ColumnMajorLayout, true> tensor_o_elements(o_elements, q_weight_shape);
+
+  std::vector<ElementT> o_scales(meta_shape.product());
+  MatrixRef<ElementT, ColumnMajorLayout, true> tensor_o_scales(o_scales, meta_shape);
+
+  std::vector<uint8_t> o_zp(((meta_shape[0] + 1) / 2) * meta_shape[1], true);
+  MatrixRef<uint8_t, ColumnMajorLayout, true> tensor_o_zp(
+      o_zp, make_Position((meta_shape[0] + 1) / 2, meta_shape[1]));
+
+  MlasQuantizeBlockwise<MLFloat16, 4>(o_elements.data(), o_scales.data(), has_offset ? o_zp.data() : nullptr,
+                                      tensor_dequant.data().data(), block_size,
+                                      ColumnMajorQuantBlocking, rows, columns, columns, nullptr);
+  for (int col = 0; col < tensor_q_weight.shape()[1]; ++col) {
+    for (int row = 0; row < tensor_q_weight.shape()[0]; ++row) {
+      EXPECT_EQ(tensor_o_elements.at(row, col), tensor_q_weight.at(row, col))
+          << "quantized value mismatch at [" << row << "," << col << "]"
+          << " shape[" << rows << "," << columns << "]"
+          << (ColumnMajorQuantBlocking ? "Column-wise-block" : "Row-wise-block")
+          << std::endl;
+    }
+  }
+
+  for (int col = 0; col < meta_shape[1]; ++col) {
+    for (int row = 0; row < meta_shape[0]; row += 2) {
+      if (has_offset) {
+        uint8_t pair01 = tensor_o_zp.at(row / 2, col);
+        EXPECT_EQ(tensor_offset.at(row + 0, col), pair01 & 0xf)
+            << "quantized offset mismatch at [" << row << "," << col << "]"
+            << " shape[" << rows << "," << columns << "]"
+            << (ColumnMajorQuantBlocking ? "Column-wise-block" : "Row-wise-block")
+            << std::endl;
+        if (row + 1 < meta_shape[0]) {
+          EXPECT_EQ(tensor_offset.at(row + 1, col), pair01 >> 4)
+              << "quantized offset mismatch at [" << row + 1 << "," << col << "]"
+              << " shape[" << rows << "," << columns << "]"
+              << (ColumnMajorQuantBlocking ? "Column-wise-block" : "Row-wise-block")
+              << std::endl;
+        }
+      }
+
+      EXPECT_EQ(tensor_scale.at(row + 0, col), tensor_o_scales.at(row + 0, col))
+          << "quantized scale mismatch at [" << row << "," << col << "]"
+          << " shape[" << rows << "," << columns << "]"
+          << (ColumnMajorQuantBlocking ? "Column-wise-block" : "Row-wise-block")
+          << std::endl;
+      if (row + 1 < meta_shape[0]) {
+        EXPECT_EQ(tensor_scale.at(row + 1, col), tensor_o_scales.at(row + 1, col))
+            << "quantized scale mismatch at [" << row + 1 << "," << col << "]"
+            << " shape[" << rows << "," << columns << "]"
+            << (ColumnMajorQuantBlocking ? "Column-wise-block" : "Row-wise-block")
+            << std::endl;
+      }
+    }
+  }
+
+  //
+  // Now we just setup fp16 weights tensor_dequant, quantized weights tensor_q_weight,
+  // quantization scale tensor_scale and quantization offset tensor_offset. The above
+  // testing just make sure our test setup is consistent with quantization tool output.
+  //
+  // Next we test the prepack code
+  //
+
+  std::vector<ElementW> packed_w_ref(q_weight_shape.product());
+  MatrixRef<ElementW, LayoutWPack, true> tensor_packed_w_ref(
+      packed_w_ref, make_Position(rows, columns / 2));
+  prepack_weights_ref(rows, columns, tensor_q_weight, tensor_packed_w_ref);
+
+  std::vector<ElementW> packed_w(q_weight_shape.product());
+  MatrixRef<ElementW, LayoutWPack, true> tensor_packed_w(
+      packed_w, make_Position(rows, columns / 2));
+  Base::prepack_weights(rows, columns, o_elements, packed_w);
+
+  for (int col = 0; col < tensor_packed_w.shape()[1]; ++col) {
+    for (int row = 0; row < tensor_packed_w.shape()[0]; ++row) {
+      EXPECT_EQ(tensor_packed_w_ref.at(row, col), tensor_packed_w.at(row, col))
+          << "prepacked weights mismatch at [" << row << "," << col << "]"
+          << " shape[" << rows << "," << columns << "]"
+          << (ColumnMajorQuantBlocking ? "Column-wise-block" : "Row-wise-block")
+          << std::endl;
+    }
+  }
+
+  std::vector<ElementT> packed_scales_ref(meta_shape.product());
+  MatrixRef<ElementT, LayoutQmeta, true> tensor_packed_s_ref =
+      Base::ShouldRearrangeMeta ? make_MatrixRef<ElementT, LayoutQmeta, true>(packed_scales_ref, meta_shape)
+                                : tensor_scale;
+  if (Base::ShouldRearrangeMeta) {
+    prepack_quant_scales_ref<ElementT, LayoutQmeta, QuantBlocking>(
+        rows, columns, tensor_scale.const_ref(), tensor_packed_s_ref);
+  }
+
+  std::vector<ElementT> packed_scales(meta_shape.product());
+  MatrixRef<ElementT, LayoutQmeta, true> tensor_packed_s(
+      packed_scales, meta_shape);
+  Base::prepack_quant_scales(rows, columns, o_scales, packed_scales);
+
+  for (int col = 0; col < tensor_packed_s.shape()[1]; ++col) {
+    for (int row = 0; row < tensor_packed_s.shape()[0]; ++row) {
+      EXPECT_EQ(tensor_packed_s_ref.at(row, col), tensor_packed_s.at(row, col))
+          << "prepacked scales mismatch at [" << row << "," << col << "]"
+          << " shape[" << rows << "," << columns << "]"
+          << (ColumnMajorQuantBlocking ? "Column-wise-block" : "Row-wise-block")
+          << std::endl;
+    }
+  }
+
+  if (has_offset) {
+    std::vector<ElementQOffset> packed_zp_ref(meta_shape.product());
+    MatrixRef<ElementQOffset, LayoutQmeta, true> tensor_packed_zp_ref =
+        Base::ShouldRearrangeMeta ? make_MatrixRef<ElementQOffset, LayoutQmeta, true>(packed_zp_ref, meta_shape)
+                                  : tensor_offset;
+    if (Base::ShouldRearrangeMeta) {
+      prepack_quant_offsets_ref<LayoutQmeta, QuantBlocking>(
+          rows, columns, tensor_offset.const_ref(), tensor_packed_zp_ref);
+    }
+
+    std::vector<ElementQOffset> packed_zp(meta_shape.product());
+    MatrixRef<ElementQOffset, LayoutQmeta, true> tensor_packed_zp(
+        packed_zp, meta_shape);
+    Base::prepack_quant_offsets(rows, columns, o_zp, packed_zp);
+
+    for (int col = 0; col < tensor_packed_zp.shape()[1]; ++col) {
+      for (int row = 0; row < tensor_packed_zp.shape()[0]; ++row) {
+        EXPECT_EQ(tensor_packed_zp_ref.at(row, col), tensor_packed_zp.at(row, col))
+            << "prepacked offsets mismatch at [" << row << "," << col << "]"
+            << " shape[" << rows << "," << columns << "]"
+            << (ColumnMajorQuantBlocking ? "Column-wise-block" : "Row-wise-block")
+            << std::endl;
+      }
+    }
+  }
+}
+
+// TODO: code runs on CPU, but this is for sm80 only, maybe enable only when test on sm80
+TEST(BlkQ4_GEMM, PrepackSm80Test) {
+  testPrepack<false>(32, 32);
+  testPrepack<false>(32, 32, false);
+  testPrepack<true>(32, 32);
+  testPrepack<true>(32, 32, false);
+  testPrepack<false>(32, 64);
+  testPrepack<false>(32, 128);
+  testPrepack<false>(32, 256);
+  testPrepack<false>(64, 32);
+  testPrepack<false>(128, 32);
+  testPrepack<false>(256, 32);
+  testPrepack<false>(256, 256);
+  testPrepack<false>(32, 128, false);
+  testPrepack<false>(128, 32, false);
+  testPrepack<false>(256, 256, false);
+  testPrepack<true>(32, 64);
+  testPrepack<true>(32, 128);
+  testPrepack<true>(32, 256);
+  testPrepack<true>(64, 32);
+  testPrepack<true>(128, 32);
+  testPrepack<true>(256, 32);
+  testPrepack<true>(256, 256);
+  testPrepack<true>(32, 128, false);
+  testPrepack<true>(128, 32, false);
+  testPrepack<true>(256, 256, false);
+}
+
+}  // namespace test
+}  // namespace onnxruntime
diff --git a/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc b/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc
index f7499fd7ad812..8955a83e66c01 100644
--- a/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc
+++ b/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc
@@ -203,7 +203,8 @@ TEST(InternalTestingEP, TestMixOfStaticAndCompiledKernels) {
 }
 
 TEST(InternalTestingEP, TestNhwcConversionOfStaticKernels) {
-  const ORTCHAR_T* ort_model_path = ORT_MODEL_FOLDER "squeezenet/model.onnx";
+  // the internal NHWC domain supports opset 11 and later
+  const ORTCHAR_T* ort_model_path = ORT_MODEL_FOLDER "squeezenet/model_opset11.onnx";
 
   SessionOptions so;
   // set this if you want to manually inspect the optimized model
diff --git a/onnxruntime/test/providers/kernel_compute_test_utils.cc b/onnxruntime/test/providers/kernel_compute_test_utils.cc
index 0bcf3cbbfd089..977a5bd9ea7b8 100644
--- a/onnxruntime/test/providers/kernel_compute_test_utils.cc
+++ b/onnxruntime/test/providers/kernel_compute_test_utils.cc
@@ -5,6 +5,8 @@
 
 #include "test/providers/kernel_compute_test_utils.h"
 
+#include <utility>
+
 #include "core/framework/execution_providers.h"
 #include "core/optimizer/optimizer_execution_frame.h"
 #include "test/util/include/default_providers.h"
@@ -55,12 +57,20 @@ void KernelComputeTester::Run(std::unordered_set<int> strided_outputs) {
     }
 #if defined(USE_CUDA) || defined(USE_ROCM)
     if ((provider_ == kCudaExecutionProvider || provider_ == kRocmExecutionProvider) && !data.is_cpu_data_) {
-      OrtValue gpu_value;
       const Tensor& tensor = data.value_.Get<Tensor>();
-      Tensor::InitOrtValue(tensor.DataType(), tensor.Shape(),
-                           execution_providers.Get(ep_type)->CreatePreferredAllocators()[0], gpu_value,
-                           tensor.Strides());
-      ASSERT_STATUS_OK(dtm.CopyTensor(tensor, *gpu_value.GetMutable<Tensor>()));
+
+      Tensor gpu_tensor(tensor.DataType(), tensor.Shape(),
+                        execution_providers.Get(ep_type)->CreatePreferredAllocators()[0]);
+
+      if (const auto strides = tensor.Strides(); !strides.empty()) {
+        gpu_tensor.SetShapeAndStrides(tensor.Shape(), strides);
+      }
+
+      ASSERT_STATUS_OK(dtm.CopyTensor(tensor, gpu_tensor));
+
+      OrtValue gpu_value;
+      Tensor::InitOrtValue(std::move(gpu_tensor), gpu_value);
+
       initializer_map[name] = gpu_value;
     }
 #endif
@@ -161,8 +171,7 @@ void KernelComputeTester::Run(std::unordered_set<int> strided_outputs) {
       } else {
         const Tensor& tensor = outputs[i].Get<Tensor>();
         Tensor::InitOrtValue(tensor.DataType(), tensor.Shape(),
-                             execution_providers.Get(cpu_ep_type)->CreatePreferredAllocators()[0], cpu_value,
-                             tensor.Strides());
+                             execution_providers.Get(cpu_ep_type)->CreatePreferredAllocators()[0], cpu_value);
         ASSERT_STATUS_OK(dtm.CopyTensor(tensor, *cpu_value.GetMutable<Tensor>()));
       }
 
diff --git a/onnxruntime/test/providers/kernel_compute_test_utils.h b/onnxruntime/test/providers/kernel_compute_test_utils.h
index aed5856fea5a2..a93fd24a3ad4f 100644
--- a/onnxruntime/test/providers/kernel_compute_test_utils.h
+++ b/onnxruntime/test/providers/kernel_compute_test_utils.h
@@ -75,7 +75,12 @@ class KernelComputeTester {
     OrtValue value;
     TensorShape shape(dims);
     auto allocator = AllocatorManager::Instance().GetAllocator(CPU);
-    Tensor::InitOrtValue(DataTypeImpl::GetType<T>(), shape, std::move(allocator), value, strides);
+    Tensor::InitOrtValue(DataTypeImpl::GetType<T>(), shape, std::move(allocator), value);
+
+    if (!strides.empty()) {
+      value.GetMutable<Tensor>()->SetShapeAndStrides(shape, strides);
+    }
+
     if (values) {
       Tensor* tensor = value.GetMutable<Tensor>();
       auto* p_data = tensor->MutableData<T>();
diff --git a/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc b/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc
index 0e783a94c5479..b3e1025e7367c 100644
--- a/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc
+++ b/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc
@@ -556,10 +556,11 @@ TEST(NnapiExecutionProviderTest, ActivationOutsideOfPartition) {
   constexpr auto* model_file_name = ORT_TSTR("testdata/mnist.basic.ort");
   // stop NNAPI partitioning at Relu so NNAPI EP only takes first Conv
   const auto nnapi_partitioning_stop_ops = "Relu";
-  TestModelLoad(model_file_name, std::make_unique<NnapiExecutionProvider>(0, nnapi_partitioning_stop_ops),
-                // expect one NNAPI partition
-                [](const Graph& graph) { ASSERT_EQ(CountAssignedNodes(graph, kNnapiExecutionProvider), 1)
-                                             << "Exactly one node should have been taken by the NNAPI EP"; });
+  TestModelLoad(
+      model_file_name, std::make_unique<NnapiExecutionProvider>(0, nnapi_partitioning_stop_ops),
+      // expect one NNAPI partition
+      [](const Graph& graph) { ASSERT_EQ(CountAssignedNodes(graph, kNnapiExecutionProvider), 1)
+                                   << "Exactly one node should have been taken by the NNAPI EP"; });
 }
 
 }  // namespace test
diff --git a/onnxruntime/test/providers/qnn/batch_norm_htp_test.cc b/onnxruntime/test/providers/qnn/batch_norm_htp_test.cc
index 9b65ca7bda3e2..b4e8f5390787c 100644
--- a/onnxruntime/test/providers/qnn/batch_norm_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/batch_norm_htp_test.cc
@@ -175,13 +175,7 @@ static void RunBatchNormQDQTest(const TestInputDef<float>& input_def,
 // TODO: FIX TRANSLATION!!!
 // Check that QNN compiles DQ -> BatchNormalization -> Q as a single unit.
 // Use an input of rank 3.
-// QNN v2.13
-// Inaccuracy detected for output 'output', element 4.
-// Output quant params: scale=0.019084848463535309, zero_point=9.
-// Expected val: 1.7755576372146606
-// QNN QDQ val: 2.9963212013244629 (err 1.2207635641098022)
-// CPU QDQ val: 0.82064849138259888 (err 0.95490914583206177)
-TEST_F(QnnHTPBackendTests, DISABLED_BatchNorm1D) {
+TEST_F(QnnHTPBackendTests, BatchNorm1D) {
   constexpr int64_t num_channels = 2;
 
   RunBatchNormQDQTest(TestInputDef<float>({1, num_channels, 3}, false, {-5.0f, -4.0f, -3.0f, 0.0f, 2.0f, 5.0f}),  // Input data
@@ -193,13 +187,7 @@ TEST_F(QnnHTPBackendTests, DISABLED_BatchNorm1D) {
 // TODO: FIX TRANSLATION!!!
 // Check that QNN compiles DQ -> BatchNormalization -> Q as a single unit.
 // Use an input of rank 4.
-// QNN v2.13
-// Inaccuracy detected for output 'output', element 14.
-// Output quant params: scale=0.023071292787790298, zero_point=19.
-// Expected val: 2.8554618358612061
-// QNN QDQ val: 5.3294687271118164 (err 2.4740068912506104)
-// CPU QDQ val: 1.6611330509185791 (err 1.194328784942627)
-TEST_F(QnnHTPBackendTests, DISABLED_BatchNorm2D) {
+TEST_F(QnnHTPBackendTests, BatchNorm2D) {
   constexpr int64_t num_channels = 2;
   std::vector<float> input_data = {-8.0f, -6.0f, -4.0f, -2.0f, 0.0f, 1.1f, 3.3f, 8.0f,
                                    -7.0f, -5.0f, -3.0f, -1.0f, 0.0f, 2.1f, 4.3f, 7.0f};
@@ -226,4 +214,4 @@ TEST_F(QnnHTPBackendTests, BatchNorm3D) {
 }  // namespace test
 }  // namespace onnxruntime
 
-#endif
\ No newline at end of file
+#endif
diff --git a/onnxruntime/test/providers/qnn/gather_op_htp_test.cc b/onnxruntime/test/providers/qnn/gather_op_htp_test.cc
index 37e0db906d054..48cd5ad99540a 100644
--- a/onnxruntime/test/providers/qnn/gather_op_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/gather_op_htp_test.cc
@@ -15,6 +15,39 @@ namespace onnxruntime {
 namespace test {
 #if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
 
+// Returns a function that creates a graph with a QDQ Gather operator.
+template <typename QuantType, typename IndicesType>
+GetTestQDQModelFn<QuantType> BuildQDQGatherTestCase(const TestInputDef<float>& input_def,
+                                                    const TestInputDef<IndicesType>& indices_def,
+                                                    const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
+                                                    bool use_contrib_qdq = false) {
+  return [input_def, indices_def, attrs, use_contrib_qdq](ModelTestBuilder& builder,
+                                                          std::vector<QuantParams<QuantType>>& output_qparams) {
+    // input -> Q -> DQ ->
+    NodeArg* input = MakeTestInput(builder, input_def);
+    QuantParams<QuantType> input_qparams = GetTestInputQuantParams<QuantType>(input_def);
+    NodeArg* input_qdq = AddQDQNodePair<QuantType>(builder, input, input_qparams.scale, input_qparams.zero_point,
+                                                   use_contrib_qdq);
+
+    // indices input
+    NodeArg* indices_input = MakeTestInput(builder, indices_def);
+
+    // Gather op
+    NodeArg* gather_output = builder.MakeIntermediate();
+    Node& gather_node = builder.AddNode("Gather", {input_qdq, indices_input}, {gather_output});
+
+    for (const auto& attr : attrs) {
+      gather_node.AddAttributeProto(attr);
+    }
+
+    // op_output -> Q -> DQ -> output
+    // NOTE: Input and output quantization parameters must be equal for Gather.
+    output_qparams[0] = input_qparams;  // Overwrite!
+    AddQDQNodePairWithOutputAsGraphOutput<QuantType>(builder, gather_output, input_qparams.scale,
+                                                     input_qparams.zero_point, use_contrib_qdq);
+  };
+}
+
 // Test the accuracy of a QDQ Gather model on QNN EP. Checks if the QDQ model on QNN EP as accurate as the QDQ model on CPU EP
 // (compared to float32 model).
 template <typename QuantType, typename IndicesType>
@@ -22,7 +55,8 @@ static void RunQDQGatherOpTest(const TestInputDef<float>& input_def,
                                const TestInputDef<IndicesType>& indices_def,
                                const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
                                int opset,
-                               ExpectedEPNodeAssignment expected_ep_assignment) {
+                               ExpectedEPNodeAssignment expected_ep_assignment,
+                               bool use_contrib_qdq = false) {
   ProviderOptions provider_options;
 #if defined(_WIN32)
   provider_options["backend_path"] = "QnnHtp.dll";
@@ -31,7 +65,8 @@ static void RunQDQGatherOpTest(const TestInputDef<float>& input_def,
 #endif
 
   auto f32_model_builder = BuildOpTestCase<float, IndicesType>("Gather", {input_def}, {indices_def}, attrs);
-  auto qdq_model_builder = BuildQDQOpTestCase<QuantType, IndicesType>("Gather", {input_def}, {indices_def}, attrs);
+  auto qdq_model_builder = BuildQDQGatherTestCase<QuantType, IndicesType>(input_def, indices_def, attrs,
+                                                                          use_contrib_qdq);
 
   TestQDQModelAccuracy<QuantType>(f32_model_builder,
                                   qdq_model_builder,
@@ -52,6 +87,16 @@ TEST_F(QnnHTPBackendTests, GatherOp_IndicesStaticInt64_Axis0) {
                                        ExpectedEPNodeAssignment::All);
 }
 
+// Test 16-bit QDQ Gather with static int64 indices with default axis.
+TEST_F(QnnHTPBackendTests, GatherOp_U16_IndicesStaticInt64_Axis0) {
+  RunQDQGatherOpTest<uint16_t, int64_t>(TestInputDef<float>({3, 2}, false, {1.0f, 1.2f, 2.3f, 3.4f, 4.5f, 5.7f}),
+                                        TestInputDef<int64_t>({2, 2}, true, {0, 1, 1, 2}),
+                                        {utils::MakeAttribute("axis", static_cast<int64_t>(0))},
+                                        13,
+                                        ExpectedEPNodeAssignment::All,
+                                        true);  // Use 'com.microsoft' Q/DQ ops
+}
+
 // Tests that dynamic int64 indices are not supported on HTP backend.
 TEST_F(QnnHTPBackendTests, GatherOp_IndicesDynamicInt64_Axis0) {
   RunQDQGatherOpTest<uint8_t, int64_t>(TestInputDef<float>({3, 2}, false, {1.0f, 1.2f, 2.3f, 3.4f, 4.5f, 5.7f}),
diff --git a/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc b/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc
index f662ac14336f8..3598ba1ac8851 100644
--- a/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc
@@ -21,21 +21,26 @@ template <typename QuantType>
 static GetTestQDQModelFn<QuantType> BuildQDQInstanceNormTestCase(const TestInputDef<float>& input_def,
                                                                  const TestInputDef<float>& scale_def,
                                                                  const TestInputDef<float>& bias_def,
-                                                                 const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs) {
-  return [input_def, scale_def, bias_def, attrs](ModelTestBuilder& builder,
-                                                 std::vector<QuantParams<QuantType>>& output_qparams) {
+                                                                 const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
+                                                                 bool use_contrib_qdq = false) {
+  return [input_def, scale_def, bias_def, attrs,
+          use_contrib_qdq](ModelTestBuilder& builder,
+                           std::vector<QuantParams<QuantType>>& output_qparams) {
     // input => Q => DQ =>
     NodeArg* input = MakeTestInput(builder, input_def);
     QuantParams<QuantType> input_qparams = GetTestInputQuantParams<QuantType>(input_def);
-    NodeArg* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point);
+    NodeArg* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point,
+                                        use_contrib_qdq);
 
     // scale => Q => DQ =>
     NodeArg* scale = MakeTestInput(builder, scale_def);
     QuantParams<QuantType> scale_qparams = GetTestInputQuantParams<QuantType>(scale_def);
-    NodeArg* scale_qdq = AddQDQNodePair(builder, scale, scale_qparams.scale, scale_qparams.zero_point);
+    NodeArg* scale_qdq = AddQDQNodePair(builder, scale, scale_qparams.scale, scale_qparams.zero_point,
+                                        use_contrib_qdq);
 
     // bias (as int32) => DQ =>
-    NodeArg* bias_qdq = MakeTestQDQBiasInput(builder, bias_def, input_qparams.scale * scale_qparams.scale);
+    NodeArg* bias_qdq = MakeTestQDQBiasInput(builder, bias_def, input_qparams.scale * scale_qparams.scale,
+                                             use_contrib_qdq);
 
     // InstanceNormalization operator.
     auto* instance_norm_output = builder.MakeIntermediate();
@@ -46,7 +51,8 @@ static GetTestQDQModelFn<QuantType> BuildQDQInstanceNormTestCase(const TestInput
     }
 
     // Add instance_norm_output -> Q -> output_u8
-    AddQDQNodePairWithOutputAsGraphOutput<QuantType>(builder, instance_norm_output, output_qparams[0].scale, output_qparams[0].zero_point);
+    AddQDQNodePairWithOutputAsGraphOutput<QuantType>(builder, instance_norm_output, output_qparams[0].scale,
+                                                     output_qparams[0].zero_point, use_contrib_qdq);
   };
 }
 
@@ -65,7 +71,8 @@ static void RunInstanceNormQDQTest(const TestInputDef<float>& input_def,
                                    const TestInputDef<float>& scale_def,
                                    const TestInputDef<float>& bias_def,
                                    const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
-                                   ExpectedEPNodeAssignment expected_ep_assignment) {
+                                   ExpectedEPNodeAssignment expected_ep_assignment,
+                                   bool use_contrib_qdq = false) {
   ProviderOptions provider_options;
 #if defined(_WIN32)
   provider_options["backend_path"] = "QnnHtp.dll";
@@ -75,23 +82,40 @@ static void RunInstanceNormQDQTest(const TestInputDef<float>& input_def,
 
   // Runs model with DQ-> InstanceNorm -> Q and compares the outputs of the CPU and QNN EPs.
   TestQDQModelAccuracy(BuildOpTestCase<float>("InstanceNormalization", {input_def, scale_def, bias_def}, {}, attrs),
-                       BuildQDQInstanceNormTestCase<QuantType>(input_def, scale_def, bias_def, attrs),
+                       BuildQDQInstanceNormTestCase<QuantType>(input_def, scale_def, bias_def, attrs, use_contrib_qdq),
                        provider_options,
                        18,
-                       expected_ep_assignment,
-                       1e-5f);
+                       expected_ep_assignment);
 }
 
 // Check that QNN compiles DQ -> InstanceNormalization -> Q as a single unit.
 // Use an input of rank 4.
 TEST_F(QnnHTPBackendTests, InstanceNormU8) {
-  RunInstanceNormQDQTest(TestInputDef<float>({1, 2, 3, 3}, false, -10.0f, 10.0f),
-                         TestInputDef<float>({2}, true, -2.0f, 2.0f),
-                         TestInputDef<float>({2}, true, -3.0f, 3.0f),
+  // fails with QNN 2.15.1 with the following fixed input.
+  std::vector<float> input_data = {3.21289f, -5.9981f, -1.72799f, 6.27263f, 3.36205f, -1.93515f, -5.40113f, 3.75648f, 6.15357f,
+                                   -5.25769f, 2.73637f, -0.901382f, -6.55612f, 1.99497f, -4.79228f, 2.69813f, 8.3064f, 0.0362501f};
+  std::vector<float> scale_data = {-0.148738f, -1.45158f};
+  std::vector<float> bias_data = {-2.2785083772f, 2.3338717017f};
+  RunInstanceNormQDQTest(TestInputDef<float>({1, 2, 3, 3}, false, input_data).OverrideValueRange(-10.0f, 10.0f),
+                         TestInputDef<float>({2}, true, scale_data).OverrideValueRange(-2.0f, 2.0f),
+                         TestInputDef<float>({2}, true, bias_data).OverrideValueRange(-3.0f, 3.0f),
                          {},
                          ExpectedEPNodeAssignment::All);
 }
 
+TEST_F(QnnHTPBackendTests, InstanceNormU16) {
+  std::vector<float> input_data = {3.21289f, -5.9981f, -1.72799f, 6.27263f, 3.36205f, -1.93515f, -5.40113f, 3.75648f, 6.15357f,
+                                   -5.25769f, 2.73637f, -0.901382f, -6.55612f, 1.99497f, -4.79228f, 2.69813f, 8.3064f, 0.0362501f};
+  std::vector<float> scale_data = {-0.148738f, -1.45158f};
+  std::vector<float> bias_data = {-2.2785083772f, 2.3338717017f};
+  RunInstanceNormQDQTest<uint16_t>(TestInputDef<float>({1, 2, 3, 3}, false, input_data).OverrideValueRange(-10.0f, 10.0f),
+                                   TestInputDef<float>({2}, true, scale_data).OverrideValueRange(-2.0f, 2.0f),
+                                   TestInputDef<float>({2}, true, bias_data).OverrideValueRange(-3.0f, 3.0f),
+                                   {},
+                                   ExpectedEPNodeAssignment::All,
+                                   true);  // Use contrib Q/DQ ops for 16bit support.
+}
+
 // Check that QNN compiles DQ -> InstanceNormalization -> Q as a single unit.
 // Use an input of rank 3.
 TEST_F(QnnHTPBackendTests, InstanceNormU8Rank3) {
@@ -102,6 +126,58 @@ TEST_F(QnnHTPBackendTests, InstanceNormU8Rank3) {
                          ExpectedEPNodeAssignment::All);
 }
 
+// Test 8-bit QDQ InstanceNormalization with an input of rank 3 with N != 1,
+// which requires wrapping the QNN InstanceNorm op with reshapes.
+TEST_F(QnnHTPBackendTests, InstanceNormU8Rank3_BatchSizeNot1) {
+  std::vector<float> input_data = {6.0f, 4.0f, 2.0f, 6.0f, 8.0f, 2.0f,
+                                   -8.0f, -6.0f, 0.0f, 1.0f, 3.0f, 6.0f};
+  RunInstanceNormQDQTest(TestInputDef<float>({2, 2, 3}, false, input_data),
+                         TestInputDef<float>({2}, true, {1.0f, 2.0f}),
+                         TestInputDef<float>({2}, true, {1.0f, 3.0f}),
+                         {},
+                         ExpectedEPNodeAssignment::All);
+}
+
+// Test 16-bit QDQ InstanceNormalization with an input of rank 3 with N != 1,
+// which requires wrapping the QNN InstanceNorm op with reshapes.
+TEST_F(QnnHTPBackendTests, InstanceNormU16Rank3_BatchSizeNot1) {
+  std::vector<float> input_data = {6.0f, 4.0f, 2.0f, 6.0f, 8.0f, 2.0f,
+                                   -8.0f, -6.0f, 0.0f, 1.0f, 3.0f, 6.0f};
+  RunInstanceNormQDQTest<uint16_t>(TestInputDef<float>({2, 2, 3}, false, input_data),
+                                   TestInputDef<float>({2}, true, {1.0f, 2.0f}),
+                                   TestInputDef<float>({2}, true, {1.0f, 3.0f}),
+                                   {},
+                                   ExpectedEPNodeAssignment::All,
+                                   true);  // Use contrib Q/DQ ops for 16bit support.
+}
+
+// Test 8-bit QDQ InstanceNormalization with an input of rank 3 with N != 1,
+// which requires wrapping the QNN InstanceNorm op with reshapes.
+// Input 0 is an initializer.
+TEST_F(QnnHTPBackendTests, InstanceNormU8Rank3_BatchSizeNot1_Initializer) {
+  std::vector<float> input_data = {6.0f, 4.0f, 2.0f, 6.0f, 8.0f, 2.0f,
+                                   -8.0f, -6.0f, 0.0f, 1.0f, 3.0f, 6.0f};
+  RunInstanceNormQDQTest(TestInputDef<float>({2, 2, 3}, true, input_data),
+                         TestInputDef<float>({2}, true, {1.0f, 2.0f}),
+                         TestInputDef<float>({2}, false, {1.0f, 3.0f}),
+                         {},
+                         ExpectedEPNodeAssignment::All);
+}
+
+// Test 16-bit QDQ InstanceNormalization with an input of rank 3 with N != 1,
+// which requires wrapping the QNN InstanceNorm op with reshapes.
+// Input 0 is an initializer.
+TEST_F(QnnHTPBackendTests, InstanceNormU16Rank3_BatchSizeNot1_Initializer) {
+  std::vector<float> input_data = {6.0f, 4.0f, 2.0f, 6.0f, 8.0f, 2.0f,
+                                   -8.0f, -6.0f, 0.0f, 1.0f, 3.0f, 6.0f};
+  RunInstanceNormQDQTest<uint16_t>(TestInputDef<float>({2, 2, 3}, true, input_data),
+                                   TestInputDef<float>({2}, true, {1.0f, 2.0f}),
+                                   TestInputDef<float>({2}, false, {1.0f, 3.0f}),
+                                   {},
+                                   ExpectedEPNodeAssignment::All,
+                                   true);  // Use contrib Q/DQ ops for 16-bit support.
+}
+
 // Check that QNN InstanceNorm operator does not handle inputs with rank > 4.
 TEST_F(QnnHTPBackendTests, InstanceNormU8Rank5) {
   RunInstanceNormQDQTest(TestInputDef<float>({1, 2, 3, 3, 3}, false, -10.0f, 10.0f),
@@ -116,4 +192,4 @@ TEST_F(QnnHTPBackendTests, InstanceNormU8Rank5) {
 }  // namespace test
 }  // namespace onnxruntime
 
-#endif
\ No newline at end of file
+#endif
diff --git a/onnxruntime/test/providers/qnn/matmul_test.cpp b/onnxruntime/test/providers/qnn/matmul_test.cpp
index e721ccbcb45a9..3da3dc858175b 100644
--- a/onnxruntime/test/providers/qnn/matmul_test.cpp
+++ b/onnxruntime/test/providers/qnn/matmul_test.cpp
@@ -112,12 +112,13 @@ TEST_F(QnnCPUBackendTests, MatMulOp) {
 }
 
 // Test MatMul broadcasting
-// Note slight inaccuracy in CPU backend:
+// Failed randomly on Linux
+// Value of: expected_tensor.DataAsSpan<float>()
 // Expected: contains 896 values, where each value and its corresponding value in 16-byte object
-// <80-03 00-00 00-00 00-00 40-00 34-DD F7-01 00-00> are an almost-equal pair
-// Actual: 16-byte object <80-03 00-00 00-00 00-00 40-00 23-DD F7-01 00-00>,
-// where the value pair (73.68116, 73.680809) at index #80 don't match, which is -0.000350952 from 73.6812
-TEST_F(QnnCPUBackendTests, MatMulOp_Broadcast) {
+// <80-03 00-00 00-00 00-00 40-B8 53-08 CC-7F 00-00> are an almost-equal pair
+// Actual: 16-byte object <80-03 00-00 00-00 00-00 C0-B7 43-08 CC-7F 00-00>, where the value pair
+// (-5.19657087, 0) at index #29 don't match, which is 5.19657 from -5.19657
+TEST_F(QnnCPUBackendTests, DISABLED_MatMulOp_Broadcast) {
   // Create two matrices with element values in the range [-10.0, 10.0].
   std::vector<float> input_a = GetFloatDataInRange(-10.0f, 10.0f, 28 * 64);
   std::vector<float> input_b = GetFloatDataInRange(-10.0f, 10.0f, 64 * 32);
@@ -141,11 +142,6 @@ TEST_F(QnnHTPBackendTests, MatMulOp_HTP_u8) {
 }
 
 // Test QDQ MatMul with 16-bit act, 8-bit weights (static)
-// TODO: (SLIGHT) Inaccuracy detected for output 'output', element 0.
-// Output quant params: scale=0.0015259021893143654, zero_point=0.
-// Expected val: 98
-// QNN QDQ val: 97.720298767089844 (err 0.27970123291015625)
-// CPU QDQ val: 97.726402282714844 (err 0.27359771728515625)
 TEST_F(QnnHTPBackendTests, MatMulOp_HTP_A16_W8Static) {
   std::vector<float> input0_data = {-10.0f, -4.0f, -2.0f, 0.0f, 5.0f, 10.0f};
   std::vector<float> input1_data = {-10.0f, -6.0f, -1.0f, 0.0f, 3.0f, 10.0f};
@@ -157,6 +153,40 @@ TEST_F(QnnHTPBackendTests, MatMulOp_HTP_A16_W8Static) {
                                                     7e-3f);
 }
 
+// Test QDQ MatMul with uint16 activation uint16 weights, both dynamic
+// Inaccuracy detected for output 'output_0', element 1.
+// Output quant params: scale=0.0015259021893143654, zero_point=0.
+// Expected val: 40
+// QNN QDQ val: 39.681087493896484 (err 0.31891250610351562)
+// CPU QDQ val: 39.99847412109375 (err 0.00152587890625)
+TEST_F(QnnHTPBackendTests, DISABLED_MatMulOp_HTP_A16_W16Dynamic) {
+  std::vector<float> input0_data = {-10.0f, -4.0f, -2.0f, 0.0f, 5.0f, 10.0f};
+  std::vector<float> input1_data = {-10.0f, -6.0f, -1.0f, 0.0f, 3.0f, 10.0f};
+  RunQDQMatMulOpOpTest<uint16_t, uint16_t, uint16_t>(TestInputDef<float>({2, 3}, false, input0_data),
+                                                     TestInputDef<float>({3, 2}, false, input1_data),
+                                                     ExpectedEPNodeAssignment::All,
+                                                     18,
+                                                     true,  // Use com.microsoft Q/DQ ops
+                                                     7e-3f);
+}
+
+// Test QDQ MatMul with uint16 activation uint16 weights, both dynamic
+// Inaccuracy detected for output 'output_0', element 1.
+// Output quant params: scale=0.71908456087112427, zero_point=1.
+// Expected val: 46848.41015625
+// QNN QDQ val: 46844.04296875 (err 4.3671875)
+// CPU QDQ val: 46848.359375 (err 0.05078125)
+TEST_F(QnnHTPBackendTests, DISABLED_MatMulOp_HTP_A16_W16DynamicLarge) {
+  std::vector<float> input0_data = GetFloatDataInRange(-10.0f, 10.0f, 12 * 96 * 512);
+  std::vector<float> input1_data = GetFloatDataInRange(-10.0f, 10.0f, 12 * 96 * 512);
+  RunQDQMatMulOpOpTest<uint16_t, uint16_t, uint16_t>(TestInputDef<float>({1, 12, 96, 512}, false, input0_data),
+                                                     TestInputDef<float>({1, 12, 512, 96}, false, input1_data),
+                                                     ExpectedEPNodeAssignment::All,
+                                                     18,
+                                                     true,  // Use com.microsoft Q/DQ ops
+                                                     7e-3f);
+}
+
 // Test 16-bit QDQ MatMul with static weights
 // TODO: Inaccuracy detected for output 'output', element 0.
 // Output quant params: scale=0.0015259021893143654, zero_point=0.
diff --git a/onnxruntime/test/providers/qnn/pad_op_test.cpp b/onnxruntime/test/providers/qnn/pad_op_test.cpp
index 95961e423833a..792dbeadfa758 100644
--- a/onnxruntime/test/providers/qnn/pad_op_test.cpp
+++ b/onnxruntime/test/providers/qnn/pad_op_test.cpp
@@ -167,7 +167,7 @@ TEST_F(QnnCPUBackendTests, Pad2dPadsNotIni) {
 TEST_F(QnnCPUBackendTests, DISABLED_PadModeReflect) {
   bool has_constant_value = false;
   RunPadOpTest(TestInputDef<float>({3, 2}, false, {1.0f, 1.2f, 2.3f, 3.4f, 4.5f, 5.6f}),
-               TestInputDef<int64_t>({4}, true, {0, 2, 0, 0}),
+               TestInputDef<int64_t>({4}, true, {0, 1, 0, 0}),
                TestInputDef<float>({1}, true, {0.0f}),
                {utils::MakeAttribute("mode", "reflect")},
                ExpectedEPNodeAssignment::All,
@@ -266,13 +266,37 @@ TEST_F(QnnHTPBackendTests, PadHasConstantValueQuantized) {
                            constant_value_quantized);
 }
 
-// QNN graph execute error. Error code: 6031
-TEST_F(QnnHTPBackendTests, DISABLED_PadReflectMode) {
+TEST_F(QnnHTPBackendTests, PadReflectMode) {
+  bool has_constant_value_input = false;
+  RunQDQPadOpTest<uint8_t>(TestInputDef<float>({3, 2}, false, {1.0f, 1.2f, 2.3f, 3.4f, 4.5f, 5.6f}),
+                           TestInputDef<int64_t>({4}, true, {0, 1, 0, 0}),
+                           TestInputDef<float>({1}, true, {0.0f}),
+                           {utils::MakeAttribute("mode", "reflect")},
+                           ExpectedEPNodeAssignment::All,
+                           has_constant_value_input);
+}
+
+// Pad amount should not be greater than shape(input[0])[i] - 1
+TEST_F(QnnHTPBackendTests, PadReflectModeOutOfRangePadAmount) {
   bool has_constant_value_input = false;
   RunQDQPadOpTest<uint8_t>(TestInputDef<float>({3, 2}, false, {1.0f, 1.2f, 2.3f, 3.4f, 4.5f, 5.6f}),
                            TestInputDef<int64_t>({4}, true, {0, 2, 0, 0}),
                            TestInputDef<float>({1}, true, {0.0f}),
                            {utils::MakeAttribute("mode", "reflect")},
+                           ExpectedEPNodeAssignment::None,
+                           has_constant_value_input);
+}
+
+TEST_F(QnnHTPBackendTests, Pad4dReflectMode) {
+  bool has_constant_value_input = false;
+  RunQDQPadOpTest<uint8_t>(TestInputDef<float>({1, 2, 2, 2}, false,
+                                               {1.0f, 2.0f,
+                                                3.0f, 4.0f,
+                                                5.0f, 6.0f,
+                                                7.0f, 8.0f}),
+                           TestInputDef<int64_t>({8}, true, {0, 1, 1, 1, 0, 1, 1, 1}),
+                           TestInputDef<float>({1}, true, {0.0f}),
+                           {utils::MakeAttribute("mode", "reflect")},
                            ExpectedEPNodeAssignment::All,
                            has_constant_value_input);
 }
@@ -329,8 +353,7 @@ TEST_F(QnnHTPBackendTests, DISABLED_Pad4dOutOfRangePadConstantValue) {
                            ExpectedEPNodeAssignment::All);
 }
 
-// Pad 5d supported, but Quantize & Dequantize doesn't support 5d
-TEST_F(QnnHTPBackendTests, DISABLED_Pad5d) {
+TEST_F(QnnHTPBackendTests, Pad5d) {
   RunQDQPadOpTest<uint8_t>(TestInputDef<float>({1, 2, 2, 2, 2}, false, GetFloatDataInRange(1.0f, 10.0f, 16)),
                            TestInputDef<int64_t>({10}, true, {0, 0, 0, 1, 0, 0, 0, 1, 0, 0}),
                            TestInputDef<float>({1}, true, {2.0f}),
diff --git a/onnxruntime/test/providers/qnn/qnn_basic_test.cc b/onnxruntime/test/providers/qnn/qnn_basic_test.cc
index a441e828c0cc6..2e2acb36e8071 100644
--- a/onnxruntime/test/providers/qnn/qnn_basic_test.cc
+++ b/onnxruntime/test/providers/qnn/qnn_basic_test.cc
@@ -2,6 +2,7 @@
 // Licensed under the MIT License.
 
 #include <string>
+#include <filesystem>
 
 #include "core/session/onnxruntime_cxx_api.h"
 #include "core/session/onnxruntime_session_options_config_keys.h"
@@ -172,7 +173,9 @@ TEST(QnnEP, TestDisableCPUFallback_ConflictingConfig) {
 // The models passed to this function are subgraphs extracted from a larger model that exhibited
 // shape inferencing issues on QNN. Thus, the models are expected to have a specific input/output
 // types and shapes.
-static void RunNHWCResizeModel(const ORTCHAR_T* ort_model_path, bool use_htp) {
+static void RunNHWCResizeModel(const ORTCHAR_T* ort_model_path, bool use_htp, bool enable_qnn_saver = false,
+                               std::string htp_graph_finalization_opt_mode = "",
+                               std::string qnn_context_priority = "") {
   Ort::SessionOptions so;
 
   // Ensure all type/shape inference warnings result in errors!
@@ -183,10 +186,24 @@ static void RunNHWCResizeModel(const ORTCHAR_T* ort_model_path, bool use_htp) {
 
 #if defined(_WIN32)
   options["backend_path"] = use_htp ? "QnnHtp.dll" : "QnnCpu.dll";
+  if (enable_qnn_saver) {
+    options["qnn_saver_path"] = "QnnSaver.dll";
+  }
 #else
   options["backend_path"] = use_htp ? "libQnnHtp.so" : "libQnnCpu.so";
+  if (enable_qnn_saver) {
+    options["qnn_saver_path"] = "libQnnSaver.so";
+  }
 #endif
 
+  if (!htp_graph_finalization_opt_mode.empty()) {
+    options["htp_graph_finalization_optimization_mode"] = std::move(htp_graph_finalization_opt_mode);
+  }
+
+  if (!qnn_context_priority.empty()) {
+    options["qnn_context_priority"] = std::move(qnn_context_priority);
+  }
+
   so.AppendExecutionProvider("QNN", options);
 
   Ort::Session session(*ort_env, ort_model_path, so);
@@ -226,7 +243,7 @@ static void RunNHWCResizeModel(const ORTCHAR_T* ort_model_path, bool use_htp) {
   auto typeshape = ort_output.GetTensorTypeAndShapeInfo();
   std::vector<int64_t> output_shape = typeshape.GetShape();
 
-  ASSERT_THAT(output_shape, ::testing::ElementsAre(1, 6, 7, 10));
+  EXPECT_THAT(output_shape, ::testing::ElementsAre(1, 6, 7, 10));
 }
 
 // Test shape inference of NHWC Resize operator (opset 11) that uses
@@ -253,6 +270,23 @@ TEST_F(QnnCPUBackendTests, TestNHWCResizeShapeInference_sizes_opset18) {
   RunNHWCResizeModel(ORT_MODEL_FOLDER "nhwc_resize_sizes_opset18.onnx", false);
 }
 
+// Test that QNN Saver generates the expected files for a model meant to run on the QNN CPU backend.
+TEST_F(QnnCPUBackendTests, QnnSaver_OutputFiles) {
+  const std::filesystem::path qnn_saver_output_dir = "saver_output";
+
+  // Remove pre-existing QNN Saver output files. Note that fs::remove_all() can handle non-existing paths.
+  std::filesystem::remove_all(qnn_saver_output_dir);
+  ASSERT_FALSE(std::filesystem::exists(qnn_saver_output_dir));
+
+  RunNHWCResizeModel(ORT_MODEL_FOLDER "nhwc_resize_sizes_opset18.onnx",
+                     false,  // use_htp
+                     true);  // enable_qnn_saver
+
+  // Check that QNN Saver output files exist.
+  EXPECT_TRUE(std::filesystem::exists(qnn_saver_output_dir / "saver_output.c"));
+  EXPECT_TRUE(std::filesystem::exists(qnn_saver_output_dir / "params.bin"));
+}
+
 #if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
 
 // Test shape inference of QDQ NHWC Resize operator (opset 18) that uses
@@ -261,6 +295,47 @@ TEST_F(QnnHTPBackendTests, TestNHWCResizeShapeInference_qdq_sizes_opset18) {
   RunNHWCResizeModel(ORT_MODEL_FOLDER "nhwc_resize_sizes_opset18.quant.onnx", true);
 }
 
+// Test that QNN Saver generates the expected files for a model meant to run on the QNN HTP backend.
+TEST_F(QnnHTPBackendTests, QnnSaver_OutputFiles) {
+  const std::filesystem::path qnn_saver_output_dir = "saver_output";
+
+  // Remove pre-existing QNN Saver output files. Note that fs::remove_all() can handle non-existing paths.
+  std::filesystem::remove_all(qnn_saver_output_dir);
+  ASSERT_FALSE(std::filesystem::exists(qnn_saver_output_dir));
+
+  RunNHWCResizeModel(ORT_MODEL_FOLDER "nhwc_resize_sizes_opset18.onnx",
+                     true,   // use_htp
+                     true);  // enable_qnn_saver
+
+  // Check that QNN Saver output files exist.
+  EXPECT_TRUE(std::filesystem::exists(qnn_saver_output_dir / "saver_output.c"));
+  EXPECT_TRUE(std::filesystem::exists(qnn_saver_output_dir / "params.bin"));
+}
+
+// Test that models run with various HTP graph finalization optimization modes.
+TEST_F(QnnHTPBackendTests, HTPGraphFinalizationOptimizationModes) {
+  constexpr std::array<const char*, 5> graph_opt_modes = {"",    // No explicit mode specified
+                                                          "0",   // Explicit default mode
+                                                          "1",   // Mode 1
+                                                          "2",   // Mode 2
+                                                          "3"};  // Mode 3
+  for (auto mode : graph_opt_modes) {
+    RunNHWCResizeModel(ORT_MODEL_FOLDER "nhwc_resize_sizes_opset18.quant.onnx",
+                       true,   // use_htp
+                       false,  // enable_qnn_saver
+                       mode);  // htp_graph_finalization_opt_mode
+  }
+}
+
+// Test that models run with high QNN context priority.
+TEST_F(QnnHTPBackendTests, QnnContextPriorityHigh) {
+  RunNHWCResizeModel(ORT_MODEL_FOLDER "nhwc_resize_sizes_opset18.quant.onnx",
+                     true,     // use_htp
+                     false,    // enable_qnn_saver
+                     "",       // htp_graph_finalization_opt_mode
+                     "high");  // qnn_context_priority
+}
+
 #endif  // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
 #endif  // !defined(ORT_MINIMAL_BUILD)
 
diff --git a/onnxruntime/test/providers/qnn/qnn_test_utils.cc b/onnxruntime/test/providers/qnn/qnn_test_utils.cc
index 51df93f8853ec..a067c9c53e57a 100644
--- a/onnxruntime/test/providers/qnn/qnn_test_utils.cc
+++ b/onnxruntime/test/providers/qnn/qnn_test_utils.cc
@@ -9,6 +9,7 @@
 #include "test/util/include/default_providers.h"
 #include "test/util/include/test/test_environment.h"
 
+#include "core/platform/env_var_utils.h"
 #include "core/common/span_utils.h"
 #include "core/framework/compute_capability.h"
 #include "core/graph/graph.h"
@@ -41,7 +42,22 @@ std::vector<float> GetFloatDataInRange(float min_val, float max_val, size_t num_
   return data;
 }
 
-void RunQnnModelTest(const GetTestModelFn& build_test_case, const ProviderOptions& provider_options,
+void TryEnableQNNSaver(ProviderOptions& qnn_options) {
+  // Allow dumping QNN API calls to file by setting an environment variable that enables the QNN Saver backend.
+  constexpr auto kEnableQNNSaverEnvironmentVariableName = "ORT_UNIT_TEST_ENABLE_QNN_SAVER";
+  static std::optional<int> enable_qnn_saver = onnxruntime::ParseEnvironmentVariable<int>(
+      kEnableQNNSaverEnvironmentVariableName);
+
+  if (enable_qnn_saver.has_value() && *enable_qnn_saver != 0) {
+#if defined(_WIN32)
+    qnn_options["qnn_saver_path"] = "QnnSaver.dll";
+#else
+    qnn_options["qnn_saver_path"] = "libQnnSaver.so";
+#endif  // defined(_WIN32)
+  }
+}
+
+void RunQnnModelTest(const GetTestModelFn& build_test_case, ProviderOptions provider_options,
                      int opset_version, ExpectedEPNodeAssignment expected_ep_assignment,
                      float fp32_abs_err, logging::Severity log_severity) {
   EPVerificationParams verification_params;
@@ -65,6 +81,7 @@ void RunQnnModelTest(const GetTestModelFn& build_test_case, const ProviderOption
   // Serialize the model to a string.
   std::string model_data;
   model.ToProto().SerializeToString(&model_data);
+  TryEnableQNNSaver(provider_options);
   RunAndVerifyOutputsWithEP(AsByteSpan(model_data.data(), model_data.size()), "QNN_EP_TestLogID",
                             QnnExecutionProviderWithOptions(provider_options),
                             helper.feeds_, verification_params);
diff --git a/onnxruntime/test/providers/qnn/qnn_test_utils.h b/onnxruntime/test/providers/qnn/qnn_test_utils.h
index 14c62f98f6a3e..396fc193bf73c 100644
--- a/onnxruntime/test/providers/qnn/qnn_test_utils.h
+++ b/onnxruntime/test/providers/qnn/qnn_test_utils.h
@@ -220,6 +220,25 @@ void InferenceModel(const std::string& model_data, const char* log_id,
                     ExpectedEPNodeAssignment expected_ep_assignment, const NameMLValMap& feeds,
                     std::vector<OrtValue>& output_vals);
 
+/**
+ * If the ORT_UNIT_TEST_ENABLE_QNN_SAVER environment variable is enabled (set to 1), this function modifies
+ * the QNN EP provider options to enable the QNN Saver backend, which dumps QNN API calls (and weights) to disk.
+ *
+ * - saver_output/saver_output.c: C file containing all QNN API calls.
+ * - saver_output/params.bin: binary file containing all input/output/parameter tensor data provided during tensor
+ *                            creation, op config validation, and graph execution.
+ *
+ * Enabling the QNN Saver backend has 2 note-worthy effects:
+ * 1. All QNN API calls will succeed.
+ * 2. Inference output returns dummy data.
+ *
+ * Because output files from QNN Saver are always overwritten, it is recommended to run individual unit tests via the
+ * --gtest_filter command-line option. Ex: --gtest_filter=QnnHTPBackendTests.Resize_DownSample_Linear_AlignCorners
+ *
+ * \param qnn_options QNN EP provider options that may be modified to enable QNN Saver.
+ */
+void TryEnableQNNSaver(ProviderOptions& qnn_options);
+
 /**
  * Tests the accuracy of a QDQ model on QNN EP by runnning 3 inferences:
  *
@@ -240,9 +259,10 @@ void InferenceModel(const std::string& model_data, const char* log_id,
  */
 template <typename QuantType>
 inline void TestQDQModelAccuracy(const GetTestModelFn& f32_model_fn, const GetTestQDQModelFn<QuantType>& qdq_model_fn,
-                                 const ProviderOptions& qnn_options, int opset_version,
+                                 ProviderOptions qnn_options, int opset_version,
                                  ExpectedEPNodeAssignment expected_ep_assignment, float fp32_abs_err = 1e-4f,
-                                 logging::Severity log_severity = logging::Severity::kERROR) {
+                                 logging::Severity log_severity = logging::Severity::kERROR,
+                                 const std::string& qnn_ctx_model_path = "") {
   // Add kMSDomain to cover contrib op like Gelu
   const std::unordered_map<std::string, int> domain_to_version = {{"", opset_version}, {kMSDomain, 1}};
 
@@ -300,9 +320,23 @@ inline void TestQDQModelAccuracy(const GetTestModelFn& f32_model_fn, const GetTe
   qdq_model.ToProto().SerializeToString(&qdq_model_data);
 
   // Run QDQ model on QNN EP and collect outputs.
+  TryEnableQNNSaver(qnn_options);
   std::vector<OrtValue> qnn_qdq_outputs;
-  InferenceModel(qdq_model_data, "qdq_model_logger", QnnExecutionProviderWithOptions(qnn_options),
-                 expected_ep_assignment, qdq_helper.feeds_, qnn_qdq_outputs);
+  if (!qnn_ctx_model_path.empty()) {
+    onnx::ModelProto model_proto;
+    onnxruntime::Model qnn_ctx_model;
+    // Load the QNN context cache model from path specified
+    ASSERT_STATUS_OK(qnn_ctx_model.Load(ToPathString(qnn_ctx_model_path), model_proto));
+    std::string qnn_ctx_model_data;
+    model_proto.SerializeToString(&qnn_ctx_model_data);
+    // Run QNN context cache model on QNN EP and collect outputs.
+    InferenceModel(qnn_ctx_model_data, "qnn_ctx_model_logger", QnnExecutionProviderWithOptions(qnn_options),
+                   expected_ep_assignment, qdq_helper.feeds_, qnn_qdq_outputs);
+  } else {
+    // Run QDQ model on QNN EP and collect outputs.
+    InferenceModel(qdq_model_data, "qdq_model_logger", QnnExecutionProviderWithOptions(qnn_options),
+                   expected_ep_assignment, qdq_helper.feeds_, qnn_qdq_outputs);
+  }
 
   if (expected_ep_assignment != ExpectedEPNodeAssignment::None) {
     // Run QDQ model on CPU EP and collect outputs.
@@ -538,7 +572,7 @@ inline GetTestQDQModelFn<QuantType> BuildQDQOpTestCase(const std::string& op_typ
  * \param fp32_abs_err The acceptable error between CPU EP and QNN EP.
  * \param log_severity The logger's minimum severity level.
  */
-void RunQnnModelTest(const GetTestModelFn& build_test_case, const ProviderOptions& provider_options,
+void RunQnnModelTest(const GetTestModelFn& build_test_case, ProviderOptions provider_options,
                      int opset_version, ExpectedEPNodeAssignment expected_ep_assignment,
                      float fp32_abs_err = 1e-5f, logging::Severity log_severity = logging::Severity::kERROR);
 
diff --git a/onnxruntime/test/providers/qnn/reduce_op_test.cc b/onnxruntime/test/providers/qnn/reduce_op_test.cc
index 57252f93492e5..1403197cd67ea 100644
--- a/onnxruntime/test/providers/qnn/reduce_op_test.cc
+++ b/onnxruntime/test/providers/qnn/reduce_op_test.cc
@@ -465,14 +465,44 @@ TEST_F(QnnHTPBackendTests, ReduceSumS8Opset13_NoKeepDims) {
                              ExpectedEPNodeAssignment::All);
 }
 
-// Test that we don't support rank 5 Reduce ops.
-TEST_F(QnnHTPBackendTests, ReduceSumS8Opset13_Rank5Unsupported) {
+// Test rank 5 ReduceSum (s8 quant) with axes = [0, 1, 2, 3, 4], keep_dims = true
+// TODO: QNN 2.15.1 Graph finalization error:
+// graph_prepare.cc:234:ERROR:could not create op: q::Sum
+// graph_prepare.cc:1093:ERROR:Op 0x102500000011 preparation failed with err:-1
+// Completed stage: Graph Transformations and Optimizations (17163 us)
+// QnnDsp <E> "node_token_3" generated: could not create op
+// QnnDsp <E> RouterWindows graph prepare failed 12
+// QnnDsp <E> Failed to finalize graph (id: 1) with err 1002{}
+TEST_F(QnnHTPBackendTests, DISABLED_ReduceSumS8Opset13_Rank5) {
   RunReduceOpQDQTest<int8_t>("ReduceSum",
-                             TestInputDef<float>({1, 3, 4, 4, 2}, false, -10.0f, 10.0f),
+                             TestInputDef<float>({1, 3, 4, 4, 2}, false, GetFloatDataInRange(-10.0f, 10.0f, 96)),
                              {0, 1, 2, 3, 4},  // axes
                              true,             // keepdims
                              13,               // opset
-                             ExpectedEPNodeAssignment::None);
+                             ExpectedEPNodeAssignment::All);
+}
+
+// Test that QNN validation APIs reject inputs of unsupported ranks.
+TEST_F(QnnHTPBackendTests, ReduceSumS8Opset13_Rank6_Unsupported) {
+  RunReduceOpQDQTest<int8_t>("ReduceSum",
+                             TestInputDef<float>({1, 3, 4, 4, 2, 1}, false, GetFloatDataInRange(-10.0f, 10.0f, 96)),
+                             {-1},                             // axes
+                             false,                            // keepdims
+                             13,                               // opset
+                             ExpectedEPNodeAssignment::None);  // Not assigned to QNN EP
+}
+
+// Test rank 5 ReduceSum (u8 quant) with axes = [-1], keep_dims = false
+// TODO: Enable on QNN 2.15.1 (works fine)
+TEST_F(QnnHTPBackendTests, DISABLED_ReduceSumU8Opset13_Rank5_LastAxis) {
+  constexpr size_t num_elems = 2ULL * 12 * 124 * 2 * 4;
+  std::vector<float> input_data = GetFloatDataInRange(-100.0f, 100.0f, num_elems);
+  RunReduceOpQDQTest<uint8_t>("ReduceSum",
+                              TestInputDef<float>({2, 12, 124, 2, 4}, false, input_data),
+                              {-1},   // axes
+                              false,  // keepdims
+                              13,     // opset
+                              ExpectedEPNodeAssignment::All);
 }
 
 //
diff --git a/onnxruntime/test/providers/qnn/reshape_expand_op_test.cc b/onnxruntime/test/providers/qnn/reshape_expand_op_test.cc
new file mode 100644
index 0000000000000..3964edc11461b
--- /dev/null
+++ b/onnxruntime/test/providers/qnn/reshape_expand_op_test.cc
@@ -0,0 +1,313 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#if !defined(ORT_MINIMAL_BUILD)
+
+#include <string>
+
+#include "test/providers/qnn/qnn_test_utils.h"
+#include "core/graph/node_attr_utils.h"
+
+#include "onnx/onnx_pb.h"
+#include "gtest/gtest.h"
+
+namespace onnxruntime {
+namespace test {
+
+// Runs a model with a Reshape/Expand operator on the QNN CPU backend. Checks the graph node assignment
+// and that inference outputs for QNN EP and CPU EP match.
+template <typename DataType>
+static void RunReshapeExpandTestOnCPU(const std::string& op_type,
+                                      const TestInputDef<DataType>& input_def,
+                                      const TestInputDef<int64_t>& shape_def,
+                                      const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
+                                      ExpectedEPNodeAssignment expected_ep_assignment,
+                                      int opset = 19) {
+  ProviderOptions provider_options;
+
+#if defined(_WIN32)
+  provider_options["backend_path"] = "QnnCpu.dll";
+#else
+  provider_options["backend_path"] = "libQnnCpu.so";
+#endif
+
+  RunQnnModelTest(BuildOpTestCase<DataType, int64_t>(op_type, {input_def}, {shape_def}, attrs),
+                  provider_options,
+                  opset,
+                  expected_ep_assignment);
+}
+
+//
+// CPU tests:
+//
+
+// Test that Reshape with a dynamic shape input is not supported by QNN EP.
+TEST_F(QnnCPUBackendTests, Reshape_DynamicShape_Unsupported) {
+  RunReshapeExpandTestOnCPU("Reshape",
+                            TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
+                            TestInputDef<int64_t>({2}, false /* is_initializer */, {1, 48}),
+                            {},                              // Attributes
+                            ExpectedEPNodeAssignment::None,  // Should not be assigned to QNN EP.
+                            19);                             // Opset
+}
+
+// Test that Reshape with an enabled 'allowzero' attribute is not supported by QNN EP.
+TEST_F(QnnCPUBackendTests, Reshape_AllowZeroAttr_Unsupported) {
+  RunReshapeExpandTestOnCPU("Reshape", TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
+                            TestInputDef<int64_t>({2}, true, {1, 48}),
+                            {utils::MakeAttribute("allowzero", static_cast<int64_t>(1))},
+                            ExpectedEPNodeAssignment::None,  // Should not be assigned to QNN EP.
+                            19);                             // Opset
+}
+
+// Test Reshape of rank 4 -> rank 2.
+TEST_F(QnnCPUBackendTests, Reshape_4D_f32) {
+  RunReshapeExpandTestOnCPU("Reshape", TestInputDef<float>({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48)),
+                            TestInputDef<int64_t>({2}, true, {1, 48}),
+                            {},  // Attributes
+                            ExpectedEPNodeAssignment::All,
+                            19);  // Opset
+}
+
+// Test Expand with non-initializer shape input, not supported.
+TEST_F(QnnCPUBackendTests, Expand_NonIniShape) {
+  RunReshapeExpandTestOnCPU("Expand", TestInputDef<float>({1}, false, {1.0f}),
+                            TestInputDef<int64_t>({2}, false, {2, 2}),
+                            {},  // Attributes
+                            ExpectedEPNodeAssignment::None,
+                            19);  // Opset
+}
+
+// Test Expand with initializer shape input.
+TEST_F(QnnCPUBackendTests, Expand_IniShape) {
+  RunReshapeExpandTestOnCPU("Expand", TestInputDef<float>({1}, false, {1.0f}),
+                            TestInputDef<int64_t>({2}, true, {2, 3}),
+                            {},  // Attributes
+                            ExpectedEPNodeAssignment::All,
+                            19);  // Opset
+}
+
+// Test Expand with initializer shape input.
+TEST_F(QnnCPUBackendTests, Expand_Uint32) {
+  RunReshapeExpandTestOnCPU("Expand", TestInputDef<uint32_t>({1}, false, {1}),
+                            TestInputDef<int64_t>({2}, true, {2, 3}),
+                            {},  // Attributes
+                            ExpectedEPNodeAssignment::All,
+                            19);  // Opset
+}
+
+// Test Expand with 6D output.
+TEST_F(QnnCPUBackendTests, Expand_6D) {
+  RunReshapeExpandTestOnCPU("Expand", TestInputDef<float>({3}, false, {1.0f, 2.0f, 3.0f}),
+                            TestInputDef<int64_t>({6}, true, {1, 2, 3, 4, 5, 3}),
+                            {},  // Attributes
+                            ExpectedEPNodeAssignment::All,
+                            19);  // Opset
+}
+
+#if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
+//
+// HTP tests:
+//
+
+// Returns a function that creates a graph with a QDQ Reshape/Expand operator.
+template <typename QuantType>
+GetTestQDQModelFn<QuantType> BuildQDQReshapeExpandTestCase(const std::string& op_type,
+                                                           const TestInputDef<float>& input_def,
+                                                           const TestInputDef<int64_t>& shape_def,
+                                                           const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
+                                                           bool use_contrib_qdq = false) {
+  return [input_def, shape_def, attrs,
+          use_contrib_qdq, op_type](ModelTestBuilder& builder,
+                                    std::vector<QuantParams<QuantType>>& output_qparams) {
+    // input -> Q -> DQ ->
+    NodeArg* input = MakeTestInput(builder, input_def);
+    QuantParams<QuantType> input_qparams = GetTestInputQuantParams<QuantType>(input_def);
+    NodeArg* input_qdq = AddQDQNodePair<QuantType>(builder, input, input_qparams.scale, input_qparams.zero_point,
+                                                   use_contrib_qdq);
+
+    // shape input
+    NodeArg* shape_input = MakeTestInput(builder, shape_def);
+
+    // Reshape op
+    NodeArg* reshape_output = builder.MakeIntermediate();
+    Node& reshape_node = builder.AddNode(op_type, {input_qdq, shape_input}, {reshape_output});
+
+    for (const auto& attr : attrs) {
+      reshape_node.AddAttributeProto(attr);
+    }
+
+    // op_output -> Q -> DQ -> output
+    // NOTE: Input and output quantization parameters must be equal for Reshape.
+    output_qparams[0] = input_qparams;  // Overwrite!
+    AddQDQNodePairWithOutputAsGraphOutput<QuantType>(builder, reshape_output, input_qparams.scale,
+                                                     input_qparams.zero_point, use_contrib_qdq);
+  };
+}
+
+// Runs a model with a non-QDQ Reshape operator on the QNN HTP backend. Checks the graph node assignment
+// and that inference outputs for QNN EP and CPU EP match.
+template <typename DataType>
+static void RunReshapeExpandTestOnHTP(const std::string& op_type,
+                                      const TestInputDef<DataType>& input_def,
+                                      const TestInputDef<int64_t>& shape_def,
+                                      const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
+                                      ExpectedEPNodeAssignment expected_ep_assignment,
+                                      int opset = 19) {
+  ProviderOptions provider_options;
+
+#if defined(_WIN32)
+  provider_options["backend_path"] = "QnnHtp.dll";
+#else
+  provider_options["backend_path"] = "libQnnHtp.so";
+#endif
+
+  RunQnnModelTest(BuildOpTestCase<DataType, int64_t>(op_type, {input_def}, {shape_def}, attrs),
+                  provider_options,
+                  opset,
+                  expected_ep_assignment);
+}
+
+// Runs a QDQ Reshape model on the QNN (HTP) EP and the ORT CPU EP. Checks the graph node assignment and that inference
+// running the QDQ model on QNN EP is at least as accurate as on ORT CPU EP (compared to the baseline float32 model).
+template <typename QType>
+static void RunQDQReshapeExpandTestOnHTP(const std::string& op_type,
+                                         const TestInputDef<float>& input_def,
+                                         const TestInputDef<int64_t>& shape_def,
+                                         const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
+                                         ExpectedEPNodeAssignment expected_ep_assignment,
+                                         int opset = 19,
+                                         bool use_contrib_qdq = false) {
+  ProviderOptions provider_options;
+
+#if defined(_WIN32)
+  provider_options["backend_path"] = "QnnHtp.dll";
+#else
+  provider_options["backend_path"] = "libQnnHtp.so";
+#endif
+
+  auto f32_model_builder = BuildOpTestCase<float, int64_t>(op_type, {input_def}, {shape_def}, attrs);
+  auto qdq_model_builder = BuildQDQReshapeExpandTestCase<QType>(op_type, input_def, shape_def, attrs, use_contrib_qdq);
+  TestQDQModelAccuracy(f32_model_builder,
+                       qdq_model_builder,
+                       provider_options,
+                       opset,
+                       expected_ep_assignment);
+}
+
+// Test that QDQ Reshape with a dynamic shape input is not supported by QNN EP.
+TEST_F(QnnHTPBackendTests, Reshape_DynamicShape_Unsupported) {
+  RunQDQReshapeExpandTestOnHTP<uint8_t>("Reshape",
+                                        TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
+                                        TestInputDef<int64_t>({2}, false /* is_initializer */, {1, 48}),
+                                        {},                              // Attributes
+                                        ExpectedEPNodeAssignment::None,  // Should not be assigned to QNN EP.
+                                        19);                             // Opset
+}
+
+// Test that QDQ Reshape with an enabled 'allowzero' attribute is not supported by QNN EP.
+TEST_F(QnnHTPBackendTests, Reshape_AllowZeroAttr_Unsupported) {
+  RunQDQReshapeExpandTestOnHTP<uint8_t>("Reshape",
+                                        TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
+                                        TestInputDef<int64_t>({2}, true, {1, 48}),
+                                        {utils::MakeAttribute("allowzero", static_cast<int64_t>(1))},
+                                        ExpectedEPNodeAssignment::None,  // Should not be assigned to QNN EP.
+                                        19);                             // Opset
+}
+
+// Test 8-bit QDQ Reshape of rank 4 -> rank 2.
+TEST_F(QnnHTPBackendTests, Reshape_4D_u8) {
+  RunQDQReshapeExpandTestOnHTP<uint8_t>("Reshape",
+                                        TestInputDef<float>({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48)),
+                                        TestInputDef<int64_t>({2}, true, {1, 48}),
+                                        {},  // Attributes
+                                        ExpectedEPNodeAssignment::All,
+                                        19);  // Opset
+}
+
+// Test 16-bit QDQ Reshape of rank 4 -> rank 2.
+TEST_F(QnnHTPBackendTests, Reshape_4D_u16) {
+  RunQDQReshapeExpandTestOnHTP<uint16_t>("Reshape",
+                                         TestInputDef<float>({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48)),
+                                         TestInputDef<int64_t>({2}, true, {1, 48}),
+                                         {},  // Attributes
+                                         ExpectedEPNodeAssignment::All,
+                                         19,     // Opset
+                                         true);  // Use com.microsoft Q/DQ ops
+}
+
+// Test that int32 Reshape runs on HTP backend.
+TEST_F(QnnHTPBackendTests, Reshape_4D_int32) {
+  std::vector<int32_t> input_data = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
+  RunReshapeExpandTestOnHTP<int32_t>("Reshape",
+                                     TestInputDef<int32_t>({1, 3, 2, 2}, false, input_data),
+                                     TestInputDef<int64_t>({3}, true, {1, 1, 12}),
+                                     {},  // Attributes
+                                     ExpectedEPNodeAssignment::All,
+                                     19);  // Opset
+}
+
+// Test QDQ Reshape with a shape value of 0 (copy dimension from input)
+TEST_F(QnnHTPBackendTests, Reshape_4D_0MeansCopy) {
+  RunQDQReshapeExpandTestOnHTP<uint8_t>("Reshape",
+                                        TestInputDef<float>({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48)),
+                                        TestInputDef<int64_t>({3}, true, {1, 0, 16}),  // zero means copy => '(1, 3, 16)'
+                                        {},                                            // Attributes
+                                        ExpectedEPNodeAssignment::All,
+                                        19);  // Opset
+}
+
+// Test QDQ Reshape with a shape value of -1 (dimension is inferred from the expected number of elements)
+TEST_F(QnnHTPBackendTests, Reshape_4D_Neg1MeansInfer) {
+  RunQDQReshapeExpandTestOnHTP<uint8_t>("Reshape",
+                                        TestInputDef<float>({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48)),
+                                        TestInputDef<int64_t>({3}, true, {1, 3, -1}),  // -1 means infer => '(1, 3, 16)'
+                                        {},                                            // Attributes
+                                        ExpectedEPNodeAssignment::All,
+                                        19);  // Opset
+}
+
+// Test that int32 Expand runs on HTP backend.
+TEST_F(QnnHTPBackendTests, Expand_HTP_int32) {
+  RunReshapeExpandTestOnHTP<int32_t>("Expand",
+                                     TestInputDef<int32_t>({1}, false, {1}),
+                                     TestInputDef<int64_t>({3}, true, {1, 2, 3}),
+                                     {},  // Attributes
+                                     ExpectedEPNodeAssignment::All,
+                                     19);  // Opset
+}
+
+// Test QDQ Expand
+TEST_F(QnnHTPBackendTests, Expand_4D) {
+  RunQDQReshapeExpandTestOnHTP<uint8_t>("Expand",
+                                        TestInputDef<float>({3}, false, {1.0f, 2.0f, 3.0f}),
+                                        TestInputDef<int64_t>({4}, true, {3, 2, 2, 1}),
+                                        {},  // Attributes
+                                        ExpectedEPNodeAssignment::All,
+                                        19);  // Opset
+}
+
+// Test QDQ Expand
+TEST_F(QnnHTPBackendTests, Expand_5D) {
+  RunQDQReshapeExpandTestOnHTP<uint8_t>("Expand",
+                                        TestInputDef<float>({1, 3}, false, {1.0f, 2.0f, 3.0f}),
+                                        TestInputDef<int64_t>({5}, true, {3, 2, 2, 2, 1}),
+                                        {},  // Attributes
+                                        ExpectedEPNodeAssignment::All,
+                                        19);  // Opset
+}
+
+// Test QDQ Expand 6D not supported for HTP backend according to QNN doc
+TEST_F(QnnHTPBackendTests, Expand_6D) {
+  RunQDQReshapeExpandTestOnHTP<uint8_t>("Expand",
+                                        TestInputDef<float>({1, 3}, false, {1.0f, 2.0f, 3.0f}),
+                                        TestInputDef<int64_t>({6}, true, {3, 2, 2, 2, 2, 1}),
+                                        {},  // Attributes
+                                        ExpectedEPNodeAssignment::None,
+                                        19);  // Opset
+}
+
+#endif  // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
+}  // namespace test
+}  // namespace onnxruntime
+#endif  // !defined(ORT_MINIMAL_BUILD)
diff --git a/onnxruntime/test/providers/qnn/reshape_op_test.cc b/onnxruntime/test/providers/qnn/reshape_op_test.cc
deleted file mode 100644
index eb495e44ec770..0000000000000
--- a/onnxruntime/test/providers/qnn/reshape_op_test.cc
+++ /dev/null
@@ -1,225 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-#if !defined(ORT_MINIMAL_BUILD)
-
-#include <string>
-
-#include "test/providers/qnn/qnn_test_utils.h"
-#include "core/graph/node_attr_utils.h"
-
-#include "onnx/onnx_pb.h"
-#include "gtest/gtest.h"
-
-namespace onnxruntime {
-namespace test {
-
-// Runs a model with a Reshape operator on the QNN CPU backend. Checks the graph node assignment
-// and that inference outputs for QNN EP and CPU EP match.
-template <typename DataType>
-static void RunReshapeTestOnCPU(const TestInputDef<DataType>& input_def,
-                                const TestInputDef<int64_t>& shape_def,
-                                const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
-                                ExpectedEPNodeAssignment expected_ep_assignment,
-                                int opset = 19) {
-  ProviderOptions provider_options;
-
-#if defined(_WIN32)
-  provider_options["backend_path"] = "QnnCpu.dll";
-#else
-  provider_options["backend_path"] = "libQnnCpu.so";
-#endif
-
-  RunQnnModelTest(BuildOpTestCase<DataType, int64_t>("Reshape", {input_def}, {shape_def}, attrs),
-                  provider_options,
-                  opset,
-                  expected_ep_assignment);
-}
-
-//
-// CPU tests:
-//
-
-// Test that Reshape with a dynamic shape input is not supported by QNN EP.
-TEST_F(QnnCPUBackendTests, Reshape_DynamicShape_Unsupported) {
-  RunReshapeTestOnCPU(TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
-                      TestInputDef<int64_t>({2}, false /* is_initializer */, {1, 48}),
-                      {},                              // Attributes
-                      ExpectedEPNodeAssignment::None,  // Should not be assigned to QNN EP.
-                      19);                             // Opset
-}
-
-// Test that Reshape with an enabled 'allowzero' attribute is not supported by QNN EP.
-TEST_F(QnnCPUBackendTests, Reshape_AllowZeroAttr_Unsupported) {
-  RunReshapeTestOnCPU(TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
-                      TestInputDef<int64_t>({2}, true, {1, 48}),
-                      {utils::MakeAttribute("allowzero", static_cast<int64_t>(1))},
-                      ExpectedEPNodeAssignment::None,  // Should not be assigned to QNN EP.
-                      19);                             // Opset
-}
-
-// Test Reshape of rank 4 -> rank 2.
-TEST_F(QnnCPUBackendTests, Reshape_4D_f32) {
-  RunReshapeTestOnCPU(TestInputDef<float>({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48)),
-                      TestInputDef<int64_t>({2}, true, {1, 48}),
-                      {},  // Attributes
-                      ExpectedEPNodeAssignment::All,
-                      19);  // Opset
-}
-
-#if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
-//
-// HTP tests:
-//
-
-// Returns a function that creates a graph with a QDQ Reshape operator.
-template <typename QuantType>
-GetTestQDQModelFn<QuantType> BuildQDQReshapeTestCase(const TestInputDef<float>& input_def,
-                                                     const TestInputDef<int64_t>& shape_def,
-                                                     const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
-                                                     bool use_contrib_qdq = false) {
-  return [input_def, shape_def, attrs,
-          use_contrib_qdq](ModelTestBuilder& builder,
-                           std::vector<QuantParams<QuantType>>& output_qparams) {
-    // input -> Q -> DQ ->
-    NodeArg* input = MakeTestInput(builder, input_def);
-    QuantParams<QuantType> input_qparams = GetTestInputQuantParams<QuantType>(input_def);
-    NodeArg* input_qdq = AddQDQNodePair<QuantType>(builder, input, input_qparams.scale, input_qparams.zero_point,
-                                                   use_contrib_qdq);
-
-    // shape input
-    NodeArg* shape_input = MakeTestInput(builder, shape_def);
-
-    // Reshape op
-    NodeArg* reshape_output = builder.MakeIntermediate();
-    Node& reshape_node = builder.AddNode("Reshape", {input_qdq, shape_input}, {reshape_output});
-
-    for (const auto& attr : attrs) {
-      reshape_node.AddAttributeProto(attr);
-    }
-
-    // op_output -> Q -> DQ -> output
-    // NOTE: Input and output quantization parameters must be equal for Reshape.
-    output_qparams[0] = input_qparams;  // Overwrite!
-    AddQDQNodePairWithOutputAsGraphOutput<QuantType>(builder, reshape_output, input_qparams.scale,
-                                                     input_qparams.zero_point, use_contrib_qdq);
-  };
-}
-
-// Runs a model with a non-QDQ Reshape operator on the QNN HTP backend. Checks the graph node assignment
-// and that inference outputs for QNN EP and CPU EP match.
-template <typename DataType>
-static void RunReshapeTestOnHTP(const TestInputDef<DataType>& input_def,
-                                const TestInputDef<int64_t>& shape_def,
-                                const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
-                                ExpectedEPNodeAssignment expected_ep_assignment,
-                                int opset = 19) {
-  ProviderOptions provider_options;
-
-#if defined(_WIN32)
-  provider_options["backend_path"] = "QnnHtp.dll";
-#else
-  provider_options["backend_path"] = "libQnnHtp.so";
-#endif
-
-  RunQnnModelTest(BuildOpTestCase<DataType, int64_t>("Reshape", {input_def}, {shape_def}, attrs),
-                  provider_options,
-                  opset,
-                  expected_ep_assignment);
-}
-
-// Runs a QDQ Reshape model on the QNN (HTP) EP and the ORT CPU EP. Checks the graph node assignment and that inference
-// running the QDQ model on QNN EP is at least as accurate as on ORT CPU EP (compared to the baseline float32 model).
-template <typename QType>
-static void RunQDQReshapeTestOnHTP(const TestInputDef<float>& input_def,
-                                   const TestInputDef<int64_t>& shape_def,
-                                   const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
-                                   ExpectedEPNodeAssignment expected_ep_assignment,
-                                   int opset = 19,
-                                   bool use_contrib_qdq = false) {
-  ProviderOptions provider_options;
-
-#if defined(_WIN32)
-  provider_options["backend_path"] = "QnnHtp.dll";
-#else
-  provider_options["backend_path"] = "libQnnHtp.so";
-#endif
-
-  auto f32_model_builder = BuildOpTestCase<float, int64_t>("Reshape", {input_def}, {shape_def}, attrs);
-  auto qdq_model_builder = BuildQDQReshapeTestCase<QType>(input_def, shape_def, attrs, use_contrib_qdq);
-  TestQDQModelAccuracy(f32_model_builder,
-                       qdq_model_builder,
-                       provider_options,
-                       opset,
-                       expected_ep_assignment);
-}
-
-// Test that QDQ Reshape with a dynamic shape input is not supported by QNN EP.
-TEST_F(QnnHTPBackendTests, Reshape_DynamicShape_Unsupported) {
-  RunQDQReshapeTestOnHTP<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
-                                  TestInputDef<int64_t>({2}, false /* is_initializer */, {1, 48}),
-                                  {},                              // Attributes
-                                  ExpectedEPNodeAssignment::None,  // Should not be assigned to QNN EP.
-                                  19);                             // Opset
-}
-
-// Test that QDQ Reshape with an enabled 'allowzero' attribute is not supported by QNN EP.
-TEST_F(QnnHTPBackendTests, Reshape_AllowZeroAttr_Unsupported) {
-  RunQDQReshapeTestOnHTP<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
-                                  TestInputDef<int64_t>({2}, true, {1, 48}),
-                                  {utils::MakeAttribute("allowzero", static_cast<int64_t>(1))},
-                                  ExpectedEPNodeAssignment::None,  // Should not be assigned to QNN EP.
-                                  19);                             // Opset
-}
-
-// Test 8-bit QDQ Reshape of rank 4 -> rank 2.
-TEST_F(QnnHTPBackendTests, Reshape_4D_u8) {
-  RunQDQReshapeTestOnHTP<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48)),
-                                  TestInputDef<int64_t>({2}, true, {1, 48}),
-                                  {},  // Attributes
-                                  ExpectedEPNodeAssignment::All,
-                                  19);  // Opset
-}
-
-// Test 16-bit QDQ Reshape of rank 4 -> rank 2.
-TEST_F(QnnHTPBackendTests, Reshape_4D_u16) {
-  RunQDQReshapeTestOnHTP<uint16_t>(TestInputDef<float>({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48)),
-                                   TestInputDef<int64_t>({2}, true, {1, 48}),
-                                   {},  // Attributes
-                                   ExpectedEPNodeAssignment::All,
-                                   19,     // Opset
-                                   true);  // Use com.microsoft Q/DQ ops
-}
-
-// Test that int32 Reshape runs on HTP backend.
-TEST_F(QnnHTPBackendTests, Reshape_4D_int32) {
-  std::vector<int32_t> input_data = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
-  RunReshapeTestOnHTP<int32_t>(TestInputDef<int32_t>({1, 3, 2, 2}, false, input_data),
-                               TestInputDef<int64_t>({3}, true, {1, 1, 12}),
-                               {},  // Attributes
-                               ExpectedEPNodeAssignment::All,
-                               19);  // Opset
-}
-
-// Test QDQ Reshape with a shape value of 0 (copy dimension from input)
-TEST_F(QnnHTPBackendTests, Reshape_4D_0MeansCopy) {
-  RunQDQReshapeTestOnHTP<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48)),
-                                  TestInputDef<int64_t>({3}, true, {1, 0, 16}),  // zero means copy => '(1, 3, 16)'
-                                  {},                                            // Attributes
-                                  ExpectedEPNodeAssignment::All,
-                                  19);  // Opset
-}
-
-// Test QDQ Reshape with a shape value of -1 (dimension is inferred from the expected number of elements)
-TEST_F(QnnHTPBackendTests, Reshape_4D_Neg1MeansInfer) {
-  RunQDQReshapeTestOnHTP<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48)),
-                                  TestInputDef<int64_t>({3}, true, {1, 3, -1}),  // -1 means infer => '(1, 3, 16)'
-                                  {},                                            // Attributes
-                                  ExpectedEPNodeAssignment::All,
-                                  19);  // Opset
-}
-
-#endif  // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
-}  // namespace test
-}  // namespace onnxruntime
-#endif  // !defined(ORT_MINIMAL_BUILD)
diff --git a/onnxruntime/test/providers/qnn/resize_test.cc b/onnxruntime/test/providers/qnn/resize_test.cc
index cf336ca9eeb8b..cd6865d443cc0 100644
--- a/onnxruntime/test/providers/qnn/resize_test.cc
+++ b/onnxruntime/test/providers/qnn/resize_test.cc
@@ -120,7 +120,7 @@ static void RunCPUResizeOpTest(const TestInputDef<float>& input_def, const std::
                                const std::string& mode, const std::string& coordinate_transformation_mode,
                                const std::string& nearest_mode,
                                ExpectedEPNodeAssignment expected_ep_assignment,
-                               int opset = 11) {
+                               int opset = 19) {
   ProviderOptions provider_options;
 #if defined(_WIN32)
   provider_options["backend_path"] = "QnnCpu.dll";
@@ -138,7 +138,7 @@ static void RunCPUResizeOpTestWithScales(const TestInputDef<float>& input_def, c
                                          const std::string& mode, const std::string& coordinate_transformation_mode,
                                          const std::string& nearest_mode,
                                          ExpectedEPNodeAssignment expected_ep_assignment,
-                                         int opset = 11) {
+                                         int opset = 19) {
   ProviderOptions provider_options;
 #if defined(_WIN32)
   provider_options["backend_path"] = "QnnCpu.dll";
@@ -157,7 +157,8 @@ static void RunQDQResizeOpTest(const TestInputDef<float>& input_def,
                                const std::vector<int64_t>& sizes_data,
                                const std::string& mode, const std::string& coordinate_transformation_mode,
                                const std::string& nearest_mode,
-                               ExpectedEPNodeAssignment expected_ep_assignment) {
+                               ExpectedEPNodeAssignment expected_ep_assignment,
+                               int opset = 19) {
   ProviderOptions provider_options;
 #if defined(_WIN32)
   provider_options["backend_path"] = "QnnHtp.dll";
@@ -169,27 +170,20 @@ static void RunQDQResizeOpTest(const TestInputDef<float>& input_def,
                        GetQDQResizeModelBuilder<QuantType>(input_def, sizes_data, mode, coordinate_transformation_mode,
                                                            nearest_mode),
                        provider_options,
-                       18,  // opset
-                       expected_ep_assignment,
-                       1e-5f);
+                       opset,
+                       expected_ep_assignment);
 }
 
 //
 // CPU tests:
 //
 
-// TODO: Our QNN CPU translation of ONNX Resize with "nearest" mode uses QNN's ResizeNearestNeighbor
-// operator, which does not have a way to specify rounding (i.e., "nearest_mode" in ONNX). It is not clear
-// what kind of rounding QNN's ResizeNearestNeighbor uses. Therefore, we do not yet know how to compare
-// ONNX Resize to QNN ResizeNearestNeighbor. These tests should remain disabled until this behavior is
-// clarified. If, for example, it turns out that ResizeNearestNeighbor uses "floor" rounding, then we should
-// only compare against ONNX resize with "floor" rounding.
-
 // Upsample that uses "round_prefer_floor" as the "nearest_mode".
 // coordinate_transformation_mode: "half_pixel"
-TEST_F(QnnCPUBackendTests, DISABLED_ResizeUpsampleNearestHalfPixel_rpf) {
-  RunCPUResizeOpTest(TestInputDef<float>({1, 2, 7, 5}, false, -10.0f, 10.0f),  // Random input w/ range [-10, 10]
-                     {1, 2, 21, 10},                                           // Sizes
+TEST_F(QnnCPUBackendTests, ResizeUpsampleNearestHalfPixel_rpf) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 70);
+  RunCPUResizeOpTest(TestInputDef<float>({1, 2, 7, 5}, false, input_data),
+                     {1, 2, 21, 10},  // Sizes
                      "nearest",
                      "half_pixel",
                      "round_prefer_floor",
@@ -198,57 +192,72 @@ TEST_F(QnnCPUBackendTests, DISABLED_ResizeUpsampleNearestHalfPixel_rpf) {
 
 // Upsample that uses "round_prefer_ceil" as the "nearest_mode".
 // coordinate_transformation_mode: "half_pixel"
-TEST_F(QnnCPUBackendTests, DISABLED_ResizeUpsampleNearestHalfPixel_rpc) {
-  RunCPUResizeOpTest(TestInputDef<float>({1, 1, 2, 4}, false, -10.0f, 10.0f),
+TEST_F(QnnCPUBackendTests, ResizeUpsampleNearestHalfPixel_rpc) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 8);
+  RunCPUResizeOpTest(TestInputDef<float>({1, 1, 2, 4}, false, input_data),
                      {1, 1, 7, 5}, "nearest", "half_pixel", "round_prefer_ceil",
                      ExpectedEPNodeAssignment::All);
 }
 
 // Downsample that uses "round_prefer_ceil" as the "nearest_mode".
 // coordinate_transformation_mode: "half_pixel"
-TEST_F(QnnCPUBackendTests, DISABLED_ResizeDownsampleNearestHalfPixel_rpc) {
-  RunCPUResizeOpTest(TestInputDef<float>({1, 1, 2, 4}, false, -10.0f, 10.0f),
+TEST_F(QnnCPUBackendTests, ResizeDownsampleNearestHalfPixel_rpc) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 8);
+  RunCPUResizeOpTest(TestInputDef<float>({1, 1, 2, 4}, false, input_data),
                      {1, 1, 1, 3}, "nearest", "half_pixel", "round_prefer_ceil",
                      ExpectedEPNodeAssignment::All);
 }
 
 // Downsample that uses "round_prefer_floor" as the "nearest_mode".
 // coordinate_transformation_mode: "half_pixel"
-TEST_F(QnnCPUBackendTests, DISABLED_ResizeDownsampleNearestHalfPixel_rpf) {
-  RunCPUResizeOpTest(TestInputDef<float>({1, 1, 2, 4}, false, -10.0f, 10.0f),
+TEST_F(QnnCPUBackendTests, ResizeDownsampleNearestHalfPixel_rpf) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 8);
+  RunCPUResizeOpTest(TestInputDef<float>({1, 1, 2, 4}, false, input_data),
                      {1, 1, 1, 2}, "nearest", "half_pixel", "round_prefer_ceil",
                      ExpectedEPNodeAssignment::All);
 }
 
 // Upsample that uses "round_prefer_floor" as the "nearest_mode".
 // coordinate_transformation_mode: "align_corners"
-// QNN v2.13: index #50 don't match, which is 4.67152 from -1.93515
-TEST_F(QnnCPUBackendTests, DISABLED_ResizeUpsampleNearestAlignCorners_rpf) {
-  RunCPUResizeOpTest(TestInputDef<float>({1, 2, 7, 5}, false, -10.0f, 10.0f),
+TEST_F(QnnCPUBackendTests, ResizeUpsampleNearestAlignCorners_rpf) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 70);
+  RunCPUResizeOpTest(TestInputDef<float>({1, 2, 7, 5}, false, input_data),
                      {1, 2, 21, 10}, "nearest", "align_corners", "round_prefer_floor",
                      ExpectedEPNodeAssignment::All);
 }
 
+// Upsample that uses "round_prefer_floor" as the "nearest_mode".
+// coordinate_transformation_mode: "asymmetric"
+TEST_F(QnnCPUBackendTests, ResizeUpsampleNearestAsymmetric_rpf) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 70);
+  RunCPUResizeOpTest(TestInputDef<float>({1, 2, 7, 5}, false, input_data),
+                     {1, 2, 21, 10}, "nearest", "asymmetric", "round_prefer_floor",
+                     ExpectedEPNodeAssignment::All);
+}
+
 // Upsample that uses "round_prefer_ceil" as the "nearest_mode".
 // coordinate_transformation_mode: "align_corners"
-TEST_F(QnnCPUBackendTests, DISABLED_ResizeUpsampleNearestAlignCorners_rpc) {
-  RunCPUResizeOpTest(TestInputDef<float>({1, 1, 2, 4}, false, -10.0f, 10.0f),
+TEST_F(QnnCPUBackendTests, ResizeUpsampleNearestAlignCorners_rpc) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 8);
+  RunCPUResizeOpTest(TestInputDef<float>({1, 1, 2, 4}, false, input_data),
                      {1, 1, 7, 5}, "nearest", "align_corners", "round_prefer_ceil",
                      ExpectedEPNodeAssignment::All);
 }
 
 // Downsample that uses "round_prefer_ceil" as the "nearest_mode".
 // coordinate_transformation_mode: "align_corners"
-TEST_F(QnnCPUBackendTests, DISABLED_ResizeDownsampleNearestAlignCorners_rpc) {
-  RunCPUResizeOpTest(TestInputDef<float>({1, 1, 2, 4}, false, -10.0f, 10.0f),
+TEST_F(QnnCPUBackendTests, ResizeDownsampleNearestAlignCorners_rpc) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 8);
+  RunCPUResizeOpTest(TestInputDef<float>({1, 1, 2, 4}, false, input_data),
                      {1, 1, 1, 3}, "nearest", "align_corners", "round_prefer_ceil",
                      ExpectedEPNodeAssignment::All);
 }
 
 // Downsample that uses "round_prefer_floor" as the "nearest_mode".
 // coordinate_transformation_mode: "align_corners"
-TEST_F(QnnCPUBackendTests, DISABLED_ResizeDownsampleNearestAlignCorners_rpf) {
-  RunCPUResizeOpTest(TestInputDef<float>({1, 1, 2, 4}, false, -10.0f, 10.0f),
+TEST_F(QnnCPUBackendTests, ResizeDownsampleNearestAlignCorners_rpf) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 8);
+  RunCPUResizeOpTest(TestInputDef<float>({1, 1, 2, 4}, false, input_data),
                      {1, 1, 1, 2}, "nearest", "align_corners", "round_prefer_floor",
                      ExpectedEPNodeAssignment::All);
 }
@@ -258,76 +267,177 @@ TEST_F(QnnCPUBackendTests, DISABLED_ResizeDownsampleNearestAlignCorners_rpf) {
 //
 
 TEST_F(QnnCPUBackendTests, Resize2xLinearHalfPixel) {
-  RunCPUResizeOpTest(TestInputDef<float>({1, 3, 4, 5}, false, -10.0f, 10.0f),
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 60);
+  RunCPUResizeOpTest(TestInputDef<float>({1, 3, 4, 5}, false, input_data),
                      {1, 3, 8, 10}, "linear", "half_pixel", "",
                      ExpectedEPNodeAssignment::All);
 }
 
 TEST_F(QnnCPUBackendTests, Resize2xLinearHalfPixel_scales) {
-  RunCPUResizeOpTestWithScales(TestInputDef<float>({1, 3, 4, 5}, false, -10.0f, 10.0f),
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 60);
+  RunCPUResizeOpTestWithScales(TestInputDef<float>({1, 3, 4, 5}, false, input_data),
                                {1.0f, 1.0f, 2.0f, 2.0f}, "linear", "half_pixel", "",
                                ExpectedEPNodeAssignment::All);
 }
 
 TEST_F(QnnCPUBackendTests, Resize2xLinearAlignCorners) {
-  RunCPUResizeOpTest(TestInputDef<float>({1, 3, 4, 5}, false, -10.0f, 10.0f),
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 60);
+  RunCPUResizeOpTest(TestInputDef<float>({1, 3, 4, 5}, false, input_data),
                      {1, 3, 8, 10}, "linear", "align_corners", "",
                      ExpectedEPNodeAssignment::All);
 }
 
 TEST_F(QnnCPUBackendTests, Resize2xLinearAlignCorners_scales) {
-  RunCPUResizeOpTestWithScales(TestInputDef<float>({1, 3, 4, 5}, false, -10.0f, 10.0f),
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 60);
+  RunCPUResizeOpTestWithScales(TestInputDef<float>({1, 3, 4, 5}, false, input_data),
                                {1.0f, 1.0f, 2.0f, 2.0f}, "linear", "align_corners", "",
                                ExpectedEPNodeAssignment::All);
 }
 
+// Test Resize downsample with mode: "linear", coordinate_transformation_mode: "align_corners"
+// TODO: Enable ResizeOpTest.ResizeOpLinearDownSampleTest_4DBilinear_align_corners in cpu resize_op tests when fixed.
+//
+// Input f32[1,1,2,4]: 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0
+// Expected output f32[1, 1, 1, 2]: 1.0, 4.0
+// Actual output f32[1, 1, 1, 2]: NaN, NaN
+TEST_F(QnnCPUBackendTests, DISABLED_Resize_DownSample_Linear_AlignCorners_scales) {
+  std::vector<float> input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
+  RunCPUResizeOpTestWithScales(TestInputDef<float>({1, 1, 2, 4}, false, input_data),
+                               {1.0f, 1.0f, 0.6f, 0.6f}, "linear", "align_corners", "",
+                               ExpectedEPNodeAssignment::All);
+}
+
+// Test Resize downsample with mode: "linear", coordinate_transformation_mode: "half_pixel"
+// TODO: Enable ResizeOpTest.ResizeOpLinearDownSampleTest_4DBilinear cpu resize_op tests when fixed.
+//
+// Input f32[1,1,2,4]: 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0
+// Expected output f32[1, 1, 1, 2]: 2.6666 4.3333
+// Actual output f32[1, 1, 1, 2]: NaN, NaN
+TEST_F(QnnCPUBackendTests, DISABLED_Resize_DownSample_Linear_HalfPixel_scales) {
+  std::vector<float> input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
+  RunCPUResizeOpTestWithScales(TestInputDef<float>({1, 1, 2, 4}, false, input_data),
+                               {1.0f, 1.0f, 0.6f, 0.6f}, "linear", "half_pixel", "",
+                               ExpectedEPNodeAssignment::All);
+}
+
 #if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
 //
 // HTP tests:
 //
 
+// Test QDQ Resize downsample with mode: "linear", coordinate_transformation_mode: "align_corners"
+TEST_F(QnnHTPBackendTests, Resize_DownSample_Linear_AlignCorners) {
+  std::vector<float> input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
+  RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 1, 2, 4}, false, input_data),
+                              {1, 1, 1, 2}, "linear", "align_corners", "",
+                              ExpectedEPNodeAssignment::All);
+}
+
+// Test QDQ Resize downsample with mode: "linear", coordinate_transformation_mode: "half_pixel"
+TEST_F(QnnHTPBackendTests, Resize_DownSample_Linear_HalfPixel) {
+  std::vector<float> input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
+  RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 1, 2, 4}, false, input_data),
+                              {1, 1, 1, 2}, "linear", "half_pixel", "",
+                              ExpectedEPNodeAssignment::All);
+}
+
+// Test 2x QDQ Resize mode: "linear", coordinate_transformation_mode: "pytorch_half_pixel"
+// QNN EP uses QNN's Resize op.
 TEST_F(QnnHTPBackendTests, ResizeU8_2xLinearPytorchHalfPixel) {
-  RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 48);
+  RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, input_data),
                               {1, 3, 8, 8}, "linear", "pytorch_half_pixel", "",
                               ExpectedEPNodeAssignment::All);
 }
 
-TEST_F(QnnHTPBackendTests, ResizeU8_2xNearestHalfPixelRoundPreferFloor) {
-  RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
-                              {1, 3, 8, 8}, "nearest", "half_pixel", "round_prefer_floor",
+// Test 2x QDQ Resize mode: "linear", coordinate_transformation_mode: "half_pixel"
+// QNN EP uses QNN's Resize op.
+TEST_F(QnnHTPBackendTests, ResizeU8_2xLinearHalfPixel) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 48);
+  RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, input_data),
+                              {1, 3, 8, 8}, "linear", "half_pixel", "",
                               ExpectedEPNodeAssignment::All);
 }
 
-TEST_F(QnnHTPBackendTests, ResizeU8_2xNearestAsymmetricFloor) {
-  RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
-                              {1, 3, 8, 8}, "nearest", "asymmetric", "floor",
+// Test 2x QDQ Resize mode: "linear", coordinate_transformation_mode: "align_corners"
+// QNN EP uses QNN's Resize op.
+TEST_F(QnnHTPBackendTests, ResizeU8_2xLinearAlignCorners) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 48);
+  RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, input_data),
+                              {1, 3, 8, 8}, "linear", "align_corners", "",
                               ExpectedEPNodeAssignment::All);
 }
 
-// TODO: Investigate with Qualcomm. The qnn-onnx-converter tool translates ONNX Resize [nearest, asymmetric, ceil] to
-// QNN ResizeNearestNeighbor {align_corners: 0, half_pixel: 0}, which is NOT equivalent. It would be better to use
-// QNN's own Resize operator (instead of ResizeNearestNeighbor), but it doesn't support the "asymmetric" coordinate
-// transform mode.
-//
-// QNN v2.13: Inaccuracy detected for output 'output', element 189.
-// Output quant params: scale=0.078431375324726105, zero_point=127.
-// Expected val: -2.663428783416748
-// QNN QDQ val: 7.4509806632995605 (err 10.114409446716309)
-// CPU QDQ val: -2.6666667461395264 (err 0.0032379627227783203)
-TEST_F(QnnHTPBackendTests, DISABLED_ResizeU8_2xNearestAsymmetricCeil) {
-  RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
-                              {1, 3, 8, 8}, "nearest", "asymmetric", "ceil",
+// Test 2x QDQ Resize mode: "linear", coordinate_transformation_mode: "asymmetric"
+// QNN EP uses QNN's Resize op.
+TEST_F(QnnHTPBackendTests, ResizeU8_2xLinearAsymmetric) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 48);
+  RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, input_data),
+                              {1, 3, 8, 8}, "linear", "asymmetric", "",
                               ExpectedEPNodeAssignment::All);
 }
 
+// Test 2x QDQ Resize mode: "nearest", coordinate_transformation_mode: "half_pixel", nearest_mode: "round_prefer_floor"
+// QNN EP uses QNN's Resize op.
+TEST_F(QnnHTPBackendTests, ResizeU8_2xNearestHalfPixelRoundPreferFloor) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 48);
+  RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, input_data),
+                              {1, 3, 8, 8}, "nearest", "half_pixel", "round_prefer_floor",
+                              ExpectedEPNodeAssignment::All);
+}
+
+// Test that the nearest_mode "ceil" is not supported on the HTP backend.
+TEST_F(QnnHTPBackendTests, ResizeU8_NearestModeCeil_Unsupported) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 48);
+  RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, input_data),
+                              {1, 3, 8, 8}, "nearest", "asymmetric", "ceil",
+                              ExpectedEPNodeAssignment::None);
+}
+
+// Test 3x QDQ Resize mode: "nearest", coordinate_transformation_mode: "asymmetric", nearest_mode: "floor".
+// QNN EP uses QNN's ResizeNearestNeighbor op.
 TEST_F(QnnHTPBackendTests, ResizeU8_3xNearestAsymmetricFloor) {
-  RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 48);
+  RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, input_data),
                               {1, 3, 12, 12}, "nearest", "asymmetric", "floor",
                               ExpectedEPNodeAssignment::All);
 }
 
+// Test 2x QDQ Resize mode: "nearest", coordinate_transformation_mode: "asymmetric", nearest_mode: "round_prefer_floor"
+// QNN EP uses QNN's Resize op.
+TEST_F(QnnHTPBackendTests, ResizeU8_2xNearestAsymmetricRoundPreferFloor) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 8);
+  RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 2, 2, 2}, false, input_data),
+                              {1, 2, 4, 4}, "nearest", "asymmetric", "round_prefer_floor",
+                              ExpectedEPNodeAssignment::All);
+}
+
+// Test 3x QDQ Resize mode: "nearest", coordinate_transformation_mode: "asymmetric", nearest_mode: "round_prefer_floor"
+// QNN EP uses QNN's Resize op.
+//
+// TODO: Inaccuracy detected for output 'output_0', element 2.
+// Output quant params: scale=0.078431375324726105, zero_point=127.
+// Expected val: -3.3333334922790527
+// QNN QDQ val: -9.960784912109375 (err 6.6274514198303223)
+// CPU QDQ val: -3.2941176891326904 (err 0.039215803146362305)
+//
+// More debugging info:
+// Input elements f32[1,1,2,2] = -10.0000000 -3.33333349 3.33333302 10.0000000
+// ORT CPU EP (f32 model) outputs: -10.0000000 -10.0000000 -3.33333349 -3.33333349 -3.33333349 -3.33333349 -10.00 ...
+// ORT CPU EP (qdq model) outputs: -9.96078491 -9.96078491 -3.29411769 -3.29411769 -3.29411769 -3.29411769 -9.961 ...
+// ORT QNN EP (qdq model) outputs: -9.96078491 -9.96078491 -9.96078491 -3.37254906 -3.37254906 -3.37254906 -9.961 ...
+TEST_F(QnnHTPBackendTests, DISABLED_ResizeU8_3xNearestAsymmetricRoundPreferFloor) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 4);
+  RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 1, 2, 2}, false, input_data),
+                              {1, 1, 6, 6}, "nearest", "asymmetric", "round_prefer_floor",
+                              ExpectedEPNodeAssignment::All);
+}
+
+// Test 0.5x QDQ Resize mode: "nearest", coordinate_transformation_mode: "asymmetric", nearest_mode: "floor"
+// QNN EP uses QNN's ResizeNearestNeighbor op.
 TEST_F(QnnHTPBackendTests, ResizeU8_HalfNearestAsymmetricFloor) {
-  RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 48);
+  RunQDQResizeOpTest<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, input_data),
                               {1, 3, 2, 2}, "nearest", "asymmetric", "floor",
                               ExpectedEPNodeAssignment::All);
 }
diff --git a/onnxruntime/test/providers/qnn/simple_op_htp_test.cc b/onnxruntime/test/providers/qnn/simple_op_htp_test.cc
index f77c098f72116..3435bd71aa4b3 100644
--- a/onnxruntime/test/providers/qnn/simple_op_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/simple_op_htp_test.cc
@@ -447,8 +447,9 @@ TEST_F(QnnHTPBackendTests, UnaryOp_Log_U16) {
 // Check that QNN compiles DQ -> Softmax -> Q as a single unit.
 // Test that the default axis (-1) for SoftMax opset 13 works.
 TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_DefaultAxis) {
+  const std::vector<float> input_data = GetFloatDataInRange(-5.0f, 5.0f, 6);
   RunQDQOpTest<uint8_t>("Softmax",
-                        {TestInputDef<float>({1, 2, 3}, false, -5.0f, 5.0f)},
+                        {TestInputDef<float>({1, 2, 3}, false, input_data)},
                         {},  // Uses default axis of -1 for opset 13
                         13,
                         ExpectedEPNodeAssignment::All);
@@ -466,14 +467,43 @@ TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_U16_DefaultAxis) {
                          true);        // Use com.microsoft domain for Q/DQ ops
 }
 
-// Check that QNN compiles DQ -> Softmax -> Q as a single unit.
-// Test that an axis != -1 is not supported.
-TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_UnsupportedAxis) {
+// Test that 8-bit QDQ Softmax (opset 13) with axis != -1 is supported by QNN EP.
+// QNN EP will wrap the operator with transposes.
+TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_NonLastAxis) {
+  const std::vector<float> input_data = {0.0f, 1.0f, 2.0f, 10.0f, 11.0f, 12.0f, 100.0f, 110.0f, 120.0f,
+                                         1.0856307f, 0.99734545f, 0.2829785f, 1.5062947f, 0.5786002f, 1.6514366f,
+                                         2.4266791f, 0.42891264f, 1.2659363f};
   RunQDQOpTest<uint8_t>("Softmax",
-                        {TestInputDef<float>({1, 2, 3}, false, -5.0f, 5.0f)},
+                        {TestInputDef<float>({1, 2, 3, 3}, false, input_data)},
                         {utils::MakeAttribute("axis", static_cast<int64_t>(1))},
                         13,
-                        ExpectedEPNodeAssignment::None);
+                        ExpectedEPNodeAssignment::All);
+}
+
+// Test that 8-bit QDQ Softmax (opset 13) with axis != -1 is supported by QNN EP.
+// QNN EP will wrap the operator with transposes.
+// This is a configuration used in one of our partner's models.
+TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_NonLastAxis_LargeInput) {
+  const std::vector<float> input_data = GetFloatDataInRange(-50.0f, 50.0f, 124);
+  RunQDQOpTest<uint8_t>("Softmax",
+                        {TestInputDef<float>({1, 124, 1}, false, input_data)},
+                        {utils::MakeAttribute("axis", static_cast<int64_t>(1))},
+                        13,
+                        ExpectedEPNodeAssignment::All);
+}
+
+// Test that 16-bit QDQ Softmax (opset 13) with axis != -1 is supported by QNN EP.
+// QNN EP will wrap the operator with transposes.
+// This is a configuration used in one of our partner's models.
+TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_U16_NonLastAxis_LargeInput) {
+  const std::vector<float> input_data = GetFloatDataInRange(-50.0f, 50.0f, 124);
+  RunQDQOpTest<uint16_t>("Softmax",
+                         {TestInputDef<float>({1, 124, 1}, false, input_data)},
+                         {utils::MakeAttribute("axis", static_cast<int64_t>(1))},
+                         13,
+                         ExpectedEPNodeAssignment::All,
+                         kOnnxDomain,
+                         true);
 }
 
 // Check that QNN compiles DQ -> Softmax -> Q as a single unit.
@@ -507,15 +537,15 @@ TEST_F(QnnHTPBackendTests, UnaryOp_LogSoftmax13_DefaultAxis) {
                         ExpectedEPNodeAssignment::All);
 }
 
-// Check that QNN compiles DQ -> LogSoftmax -> Q as a single unit.
-// Test that an axis != -1 is not supported.
-TEST_F(QnnHTPBackendTests, UnaryOp_LogSoftmax13_UnsupportedAxis) {
+// Test that 8-bit QDQ LogSoftmax (opset 13) with axis != -1 is supported by QNN EP.
+// QNN EP will wrap the operator with transposes.
+TEST_F(QnnHTPBackendTests, UnaryOp_LogSoftmax13_NonLastAxis) {
   std::vector<float> input_data = GetFloatDataInRange(-5.0f, 5.0f, 6);
   RunQDQOpTest<uint8_t>("LogSoftmax",
                         {TestInputDef<float>({1, 2, 3}, false, input_data)},
                         {utils::MakeAttribute("axis", static_cast<int64_t>(1))},
                         13,
-                        ExpectedEPNodeAssignment::None);
+                        ExpectedEPNodeAssignment::All);
 }
 
 // Check that QNN compiles DQ -> LogSoftmax -> Q as a single unit.
@@ -679,10 +709,11 @@ TEST_F(QnnHTPBackendTests, SpaceToDepthOp_U16) {
                          true);        // Use com.microsoft domain for Q/DQ ops
 }
 
-// Run QDQ model on HTP twice
-// 1st run will generate the Qnn context cache binary file
-// 2nd run will load and run from Qnn context cache binary file
-TEST_F(QnnHTPBackendTests, ContextBinaryCacheTest) {
+// Run QDQ model on HTP 3 times
+// 1st run will generate the Qnn context cache onnx file
+// 2nd run will load and run from QDQ model + Qnn context cache model
+// 3rd run directly loads and run from Qnn context cache model
+TEST_F(QnnHTPBackendTests, ContextBinaryCacheEmbedModeTest) {
   ProviderOptions provider_options;
 #if defined(_WIN32)
   provider_options["backend_path"] = "QnnHtp.dll";
@@ -690,7 +721,7 @@ TEST_F(QnnHTPBackendTests, ContextBinaryCacheTest) {
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
   provider_options["qnn_context_cache_enable"] = "1";
-  const std::string context_binary_file = "./qnn_context_binary_test.bin";
+  const std::string context_binary_file = "./qnn_context_binary_test.onnx";
   provider_options["qnn_context_cache_path"] = context_binary_file;
 
   const TestInputDef<float> input_def({1, 2, 3}, false, -10.0f, 10.0f);
@@ -707,12 +738,176 @@ TEST_F(QnnHTPBackendTests, ContextBinaryCacheTest) {
   // Make sure the Qnn context cache binary file is generated
   EXPECT_TRUE(std::filesystem::exists(context_binary_file.c_str()));
 
-  // 2nd run will load and run from Qnn context cache binary file
+  // 2nd run loads and run from QDQ model + Qnn context cache model
   TestQDQModelAccuracy(BuildOpTestCase<float>(op_type, {input_def}, {}, {}),
                        BuildQDQOpTestCase<uint8_t>(op_type, {input_def}, {}, {}),
                        provider_options,
                        14,
                        ExpectedEPNodeAssignment::All);
+
+  // 3rd run directly loads and run from Qnn context cache model
+  TestQDQModelAccuracy(BuildOpTestCase<float>(op_type, {input_def}, {}, {}),
+                       BuildQDQOpTestCase<uint8_t>(op_type, {input_def}, {}, {}),
+                       provider_options,
+                       14,
+                       ExpectedEPNodeAssignment::All,
+                       1e-4f,
+                       logging::Severity::kERROR,
+                       context_binary_file);
+}
+
+// Run QDQ model on HTP 3 times
+// 1st run will generate the Onnx skeleton file + Qnn context cache binary file
+// 2nd run will loads and run from QDQ model + Onnx skeleton file + Qnn context cache binary file
+// 3rd run directly loads and run from Onnx skeleton file + Qnn context cache binary file
+TEST_F(QnnHTPBackendTests, ContextBinaryCacheNonEmbedModeTest) {
+  ProviderOptions provider_options;
+#if defined(_WIN32)
+  provider_options["backend_path"] = "QnnHtp.dll";
+#else
+  provider_options["backend_path"] = "libQnnHtp.so";
+#endif
+  provider_options["qnn_context_cache_enable"] = "1";
+  const std::string context_binary_file = "./qnn_context_cache_non_embed.onnx";
+  provider_options["qnn_context_cache_path"] = context_binary_file;
+  provider_options["qnn_context_embed_mode"] = "0";
+
+  const TestInputDef<float> input_def({1, 2, 3}, false, -10.0f, 10.0f);
+  const std::string op_type = "Atan";
+
+  // Runs model with DQ-> Atan-> Q and compares the outputs of the CPU and QNN EPs.
+  // 1st run will generate the Onnx skeleton file + Qnn context cache binary file
+  TestQDQModelAccuracy(BuildOpTestCase<float>(op_type, {input_def}, {}, {}),
+                       BuildQDQOpTestCase<uint8_t>(op_type, {input_def}, {}, {}),
+                       provider_options,
+                       14,
+                       ExpectedEPNodeAssignment::All);
+
+  // Check the Onnx skeleton file is generated
+  EXPECT_TRUE(std::filesystem::exists(context_binary_file.c_str()));
+  // Check the Qnn context cache binary file is generated
+  EXPECT_TRUE(std::filesystem::exists("qnn_context_cache_non_embed.onnx_QNNExecutionProvider_QNN_8283143575221199085_1_0.bin"));
+
+  // 2nd run loads and run from QDQ model + Onnx skeleton file + Qnn context cache binary file
+  TestQDQModelAccuracy(BuildOpTestCase<float>(op_type, {input_def}, {}, {}),
+                       BuildQDQOpTestCase<uint8_t>(op_type, {input_def}, {}, {}),
+                       provider_options,
+                       14,
+                       ExpectedEPNodeAssignment::All);
+
+  // 3rd run directly loads and run from Onnx skeleton file + Qnn context cache binary file
+  TestQDQModelAccuracy(BuildOpTestCase<float>(op_type, {input_def}, {}, {}),
+                       BuildQDQOpTestCase<uint8_t>(op_type, {input_def}, {}, {}),
+                       provider_options,
+                       14,
+                       ExpectedEPNodeAssignment::All,
+                       1e-4f,
+                       logging::Severity::kERROR,
+                       context_binary_file);
+}
+
+// Run QDQ model on HTP 2 times
+// 1st run will generate the Onnx skeleton file + Qnn context cache binary file
+// Then delete the context bin file to make the 2nd sesssion.Initialize() return the status with code INVALID_GRAPH
+TEST_F(QnnHTPBackendTests, ContextBinaryCache_InvalidGraph) {
+  ProviderOptions provider_options;
+#if defined(_WIN32)
+  provider_options["backend_path"] = "QnnHtp.dll";
+#else
+  provider_options["backend_path"] = "libQnnHtp.so";
+#endif
+  provider_options["qnn_context_cache_enable"] = "1";
+  const std::string context_binary_file = "./qnn_context_cache_non_embed.onnx";
+  provider_options["qnn_context_cache_path"] = context_binary_file;
+  provider_options["qnn_context_embed_mode"] = "0";
+
+  const TestInputDef<float> input_def({1, 2, 3}, false, -10.0f, 10.0f);
+  const std::string op_type = "Atan";
+
+  // Runs model with DQ-> Atan-> Q and compares the outputs of the CPU and QNN EPs.
+  // 1st run will generate the Onnx skeleton file + Qnn context cache binary file
+  TestQDQModelAccuracy(BuildOpTestCase<float>(op_type, {input_def}, {}, {}),
+                       BuildQDQOpTestCase<uint8_t>(op_type, {input_def}, {}, {}),
+                       provider_options,
+                       14,
+                       ExpectedEPNodeAssignment::All);
+
+  // Check the Onnx skeleton file is generated
+  EXPECT_TRUE(std::filesystem::exists(context_binary_file.c_str()));
+  // Check the Qnn context cache binary file is generated
+  std::filesystem::path context_bin = "qnn_context_cache_non_embed.onnx_QNNExecutionProvider_QNN_8283143575221199085_1_0.bin";
+  EXPECT_TRUE(std::filesystem::exists(context_bin));
+  // Delete the Qnn context cache binary file
+  EXPECT_TRUE(std::filesystem::remove(context_bin));
+
+  // loads and run from Onnx skeleton file + Qnn context cache binary file
+  onnx::ModelProto model_proto;
+  onnxruntime::Model qnn_ctx_model;
+  // Load the QNN context cache model from path specified
+  ASSERT_STATUS_OK(qnn_ctx_model.Load(ToPathString(context_binary_file), model_proto));
+  std::string qnn_ctx_model_data;
+  model_proto.SerializeToString(&qnn_ctx_model_data);
+
+  SessionOptions so;
+  so.session_logid = "qnn_ctx_model_logger";
+  RunOptions run_options;
+  run_options.run_tag = so.session_logid;
+
+  InferenceSessionWrapper session_object{so, GetEnvironment()};
+
+  std::string provider_type = kCpuExecutionProvider;
+  ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(QnnExecutionProviderWithOptions(provider_options)));
+  ASSERT_STATUS_OK(session_object.Load(qnn_ctx_model_data.data(), static_cast<int>(qnn_ctx_model_data.size())));
+  // Verify the return status with code INVALID_GRAPH
+  ASSERT_TRUE(session_object.Initialize().Code() == common::StatusCode::INVALID_GRAPH);
+}
+
+// Run QDQ model on HTP with 2 inputs
+// 1st run will generate the Qnn context cache onnx file
+// 2nd run will load and run from QDQ model + Qnn context cache model
+// 3rd run directly loads and run from Qnn context cache model
+TEST_F(QnnHTPBackendTests, ContextBinary2InputsTest) {
+  ProviderOptions provider_options;
+#if defined(_WIN32)
+  provider_options["backend_path"] = "QnnHtp.dll";
+#else
+  provider_options["backend_path"] = "libQnnHtp.so";
+#endif
+  provider_options["qnn_context_cache_enable"] = "1";
+  const std::string context_binary_file = "./qnn_context_binary_2inputs_test.onnx";
+  provider_options["qnn_context_cache_path"] = context_binary_file;
+
+  const TestInputDef<float> input_def1({1, 2, 3}, false, -10.0f, 10.0f);
+  const TestInputDef<float> input_def2({1, 2, 3}, false, -10.0f, 10.0f);
+  const std::string op_type = "Add";
+
+  // Runs model with DQ-> Add-> Q and compares the outputs of the CPU and QNN EPs.
+  // 1st run will generate the Qnn context cache binary file
+  TestQDQModelAccuracy(BuildOpTestCase<float>(op_type, {input_def1, input_def2}, {}, {}),
+                       BuildQDQOpTestCase<uint8_t>(op_type, {input_def1, input_def2}, {}, {}),
+                       provider_options,
+                       14,
+                       ExpectedEPNodeAssignment::All);
+
+  // Make sure the Qnn context cache binary file is generated
+  EXPECT_TRUE(std::filesystem::exists(context_binary_file.c_str()));
+
+  // 2nd run loads and run from QDQ model + Qnn context cache model
+  TestQDQModelAccuracy(BuildOpTestCase<float>(op_type, {input_def1, input_def2}, {}, {}),
+                       BuildQDQOpTestCase<uint8_t>(op_type, {input_def1, input_def2}, {}, {}),
+                       provider_options,
+                       14,
+                       ExpectedEPNodeAssignment::All);
+
+  // 3rd run directly loads and run from Qnn context cache model
+  TestQDQModelAccuracy(BuildOpTestCase<float>(op_type, {input_def1, input_def2}, {}, {}),
+                       BuildQDQOpTestCase<uint8_t>(op_type, {input_def1, input_def2}, {}, {}),
+                       provider_options,
+                       14,
+                       ExpectedEPNodeAssignment::All,
+                       1e-4f,
+                       logging::Severity::kERROR,
+                       context_binary_file);
 }
 
 TEST_F(QnnHTPBackendTests, QuantAccuracyTest) {
@@ -1080,6 +1275,28 @@ TEST_F(QnnHTPBackendTests, VariadicOp_Concat_2Inputs_2ndAxis) {
                         13,
                         ExpectedEPNodeAssignment::All);
 }
+
+TEST_F(QnnHTPBackendTests, LpNormalization_u8_rank4) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 8);
+  RunQDQOpTest<uint8_t>("LpNormalization",
+                        {TestInputDef<float>({1, 2, 2, 2}, false, input_data)},
+                        {utils::MakeAttribute("axis", static_cast<int64_t>(-1)),  // Last axis
+                         utils::MakeAttribute("p", static_cast<int64_t>(2))},     // Order 2 to map to QNN's L2Norm operator
+                        13,
+                        ExpectedEPNodeAssignment::All);
+}
+
+TEST_F(QnnHTPBackendTests, LpNormalization_u16_rank4) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 8);
+  RunQDQOpTest<uint16_t>("LpNormalization",
+                         {TestInputDef<float>({1, 2, 2, 2}, false, input_data)},
+                         {utils::MakeAttribute("axis", static_cast<int64_t>(-1)),  // Last axis
+                          utils::MakeAttribute("p", static_cast<int64_t>(2))},     // Order 2 to map to QNN's L2Norm operator
+                         13,
+                         ExpectedEPNodeAssignment::All,
+                         kOnnxDomain,
+                         true);
+}
 #endif  // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
 
 }  // namespace test
diff --git a/onnxruntime/test/providers/qnn/slice_htp_test.cc b/onnxruntime/test/providers/qnn/slice_htp_test.cc
index edc079dc65276..07c97d2d7b1fa 100644
--- a/onnxruntime/test/providers/qnn/slice_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/slice_htp_test.cc
@@ -14,6 +14,48 @@
 
 namespace onnxruntime {
 namespace test {
+
+// Test for "index-out-of-bounds" bug that occurred when a Slice operator
+// shared one of its initializer inputs with another op that was processed by QNN EP first.
+TEST_F(QnnCPUBackendTests, Slice_SharedInitializersBugFix) {
+  // Model with an Add that processes a shared initializer before Slice is processed.
+  GetTestModelFn model_fn = [](ModelTestBuilder& builder) {
+    NodeArg* input0 = builder.MakeInput<int32_t>({2, 2}, {1, 2, 3, 4});
+
+    // Initializers
+    NodeArg* starts_input = builder.Make1DInitializer<int32_t>({1, 0});  // Shared by Add
+    NodeArg* ends_input = builder.Make1DInitializer<int32_t>({2, 2});
+    NodeArg* axes_input = builder.Make1DInitializer<int32_t>({0, 1});
+    NodeArg* steps_input = builder.Make1DInitializer<int32_t>({1, 1});
+
+    // Add input0 with a shared initializer.
+    NodeArg* add_output = builder.MakeIntermediate();
+    builder.AddNode("Add", {input0, starts_input}, {add_output});
+
+    // Cast Add's output to float.
+    NodeArg* cast_output = builder.MakeIntermediate();
+    Node& cast_node = builder.AddNode("Cast", {add_output}, {cast_output});
+    cast_node.AddAttribute("to", static_cast<int64_t>(ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
+
+    // Slice Cast's output
+    NodeArg* slice0_out = builder.MakeOutput();
+    builder.AddNode("Slice", {cast_output, starts_input, ends_input, axes_input, steps_input}, {slice0_out});
+  };
+
+  ProviderOptions provider_options;
+
+#if defined(_WIN32)
+  provider_options["backend_path"] = "QnnCpu.dll";
+#else
+  provider_options["backend_path"] = "libQnnCpu.so";
+#endif
+
+  RunQnnModelTest(model_fn,
+                  provider_options,
+                  13,  // opset
+                  ExpectedEPNodeAssignment::All);
+}
+
 #if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
 
 /**
@@ -26,6 +68,7 @@ namespace test {
  * \param axes_def The axes input's definition.
  * \param steps_def The steps input's definition.
  * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None).
+ * \param use_contrib_qdq Force Q/DQ ops to use the com.microsoft domain (enable 16-bit).
  */
 template <typename QuantType = uint8_t>
 static void RunSliceQDQTest(const TestInputDef<float>& data_def,
@@ -33,7 +76,8 @@ static void RunSliceQDQTest(const TestInputDef<float>& data_def,
                             const TestInputDef<int64_t>& ends_def,
                             const TestInputDef<int64_t>& axes_def,
                             const TestInputDef<int64_t>& steps_def,
-                            ExpectedEPNodeAssignment expected_ep_assignment) {
+                            ExpectedEPNodeAssignment expected_ep_assignment,
+                            bool use_contrib_qdq = false) {
   ProviderOptions provider_options;
 #if defined(_WIN32)
   provider_options["backend_path"] = "QnnHtp.dll";
@@ -45,7 +89,8 @@ static void RunSliceQDQTest(const TestInputDef<float>& data_def,
   const std::vector<TestInputDef<int64_t>> int64_inputs = {starts_def, ends_def, axes_def, steps_def};
 
   TestQDQModelAccuracy(BuildOpTestCase<float, int64_t>("Slice", f32_inputs, int64_inputs, {}),
-                       BuildQDQOpTestCase<QuantType, int64_t>("Slice", f32_inputs, int64_inputs, {}),
+                       BuildQDQOpTestCase<QuantType, int64_t>("Slice", f32_inputs, int64_inputs, {}, kOnnxDomain,
+                                                              use_contrib_qdq),
                        provider_options,
                        18,
                        expected_ep_assignment);
@@ -123,6 +168,39 @@ TEST_F(QnnHTPBackendTests, SliceInt32OnHTP) {
                                ExpectedEPNodeAssignment::All);
 }
 
+// Test 8-bit QDQ Slice with more than 1 axis.
+TEST_F(QnnHTPBackendTests, SliceU8_MultAxes) {
+  std::vector<float> input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
+  RunSliceQDQTest<uint8_t>(TestInputDef<float>({2, 4}, false, input_data),
+                           TestInputDef<int64_t>({2}, true, {1, 0}),  // starts
+                           TestInputDef<int64_t>({2}, true, {2, 3}),  // ends
+                           TestInputDef<int64_t>({2}, true, {0, 1}),  // axes
+                           TestInputDef<int64_t>({2}, true, {1, 2}),  // steps
+                           ExpectedEPNodeAssignment::All);
+}
+
+// Test 16-bit QDQ Slice with more than 1 axis.
+TEST_F(QnnHTPBackendTests, SliceU16_MultAxes) {
+  std::vector<float> input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
+  RunSliceQDQTest<uint16_t>(TestInputDef<float>({2, 4}, false, input_data),
+                            TestInputDef<int64_t>({2}, true, {1, 0}),  // starts
+                            TestInputDef<int64_t>({2}, true, {2, 3}),  // ends
+                            TestInputDef<int64_t>({2}, true, {0, 1}),  // axes
+                            TestInputDef<int64_t>({2}, true, {1, 2}),  // steps
+                            ExpectedEPNodeAssignment::All,
+                            true);  // Use com.microsoft Q/DQ ops for 16-bit
+}
+
+// Test 8-bit QDQ Slice with more than 1 axis and an end value that exceeds the associated dimension size.
+TEST_F(QnnHTPBackendTests, SliceU8_MultAxes_LargeEnd) {
+  std::vector<float> input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
+  RunSliceQDQTest<uint8_t>(TestInputDef<float>({2, 4}, false, input_data),
+                           TestInputDef<int64_t>({2}, true, {0, 1}),      // starts
+                           TestInputDef<int64_t>({2}, true, {-1, 1000}),  // ends
+                           TestInputDef<int64_t>({2}, true, {0, 1}),      // axes
+                           TestInputDef<int64_t>({2}, true, {1, 1}),      // steps
+                           ExpectedEPNodeAssignment::All);
+}
 #endif  // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
 
 }  // namespace test
diff --git a/onnxruntime/test/providers/qnn/where_htp_test.cc b/onnxruntime/test/providers/qnn/where_htp_test.cc
index 49f3ef0fd983a..2d2aa23c28235 100644
--- a/onnxruntime/test/providers/qnn/where_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/where_htp_test.cc
@@ -126,6 +126,7 @@ TEST_F(QnnHTPBackendTests, WhereLargeDataU8) {
 // QnnDsp <E> graph prepare failed 13
 // QnnDsp <E> Failed to finalize graph QNN_4851394333842096633_1 with err: 1002
 // QnnDsp <E> Failed to finalize graph (id: 1) with err 1002
+// Worked with QNN v2.16
 TEST_F(QnnHTPBackendTests, DISABLED_WhereLargeDataBroadcastU8) {
   RunWhereQDQTest(TestInputDef<bool>({5120}, false, false, true),
                   TestInputDef<float>({1, 16, 64, 5120}, true, 0.0f, 1.0f),
@@ -133,6 +134,17 @@ TEST_F(QnnHTPBackendTests, DISABLED_WhereLargeDataBroadcastU8) {
                   ExpectedEPNodeAssignment::All);
 }
 
+// .\hexagon\prepare\seq\initial_sequencer_dp.cc:149:ERROR:A single op,
+// "q::Broadcast" (Op ID: 19a200000012), requires 0xb40000 bytes of TCM, which is greater than the TCM size of 0x400000!
+// .\hexagon\prepare\seq\initial_sequencer_dp.cc : 156 : ERROR :
+// The name of the failing op before optimization is : "q::QNN_ElementWiseSelect"(Op ID : 12).
+TEST_F(QnnHTPBackendTests, DISABLED_WhereLargeDataBroadcastTransformedU8) {
+  RunWhereQDQTest(TestInputDef<bool>({1, 1, 5120, 1}, false, false, true),
+                  TestInputDef<float>({1, 64, 5120, 16}, true, 0.0f, 1.0f),
+                  TestInputDef<float>({1, 1, 1, 1}, true, {3.0f}),
+                  ExpectedEPNodeAssignment::All);
+}
+
 #endif  // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
 
 }  // namespace test
diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index 288cdfca2b56d..d9f917f6d187c 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -175,41 +175,7 @@ void RunWithOneSessionSingleThreadInference(std::string model_name, std::string
   std::vector<int64_t> expected_dims_mul_m = {1, 3, 2};
   std::vector<float> expected_values_mul_m = {3.0f, 6.0f, 9.0f, 12.0f, 15.0f, 18.0f};
 
-  OrtTensorRTProviderOptionsV2 params{
-      0,
-      0,
-      nullptr,
-      1000,
-      1,
-      1 << 30,
-      0,
-      0,
-      nullptr,
-      0,
-      0,
-      0,
-      0,
-      0,
-      nullptr,
-      0,
-      nullptr,
-      0,
-      0,
-      0,
-      0,
-      0,
-      0,
-      0,
-      0,
-      3,
-      -1,
-      nullptr,
-      nullptr,
-      nullptr,
-      nullptr,
-      nullptr,
-      0};
-
+  OrtTensorRTProviderOptionsV2 params;
   params.trt_engine_cache_enable = 1;
   std::unique_ptr<IExecutionProvider> execution_provider = TensorrtExecutionProviderWithOptions(&params);
   EXPECT_TRUE(session_object.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
@@ -259,41 +225,7 @@ void RunWithOneSessionMultiThreadsInference(std::string model_name, std::string
   std::vector<int64_t> expected_dims_nonzero_m = {3, 6};
   std::vector<int64_t> expected_values_nonzero_m = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 0, 1, 0, 1, 0, 1};
 
-  OrtTensorRTProviderOptionsV2 params{
-      0,
-      0,
-      nullptr,
-      1000,
-      1,
-      1 << 30,
-      0,
-      0,
-      nullptr,
-      0,
-      0,
-      0,
-      0,
-      0,
-      nullptr,
-      0,
-      nullptr,
-      0,
-      0,
-      0,
-      0,
-      0,
-      0,
-      0,
-      0,
-      3,
-      -1,
-      nullptr,
-      nullptr,
-      nullptr,
-      nullptr,
-      nullptr,
-      0};
-
+  OrtTensorRTProviderOptionsV2 params;
   params.trt_engine_cache_enable = 1;
   std::unique_ptr<IExecutionProvider> execution_provider = TensorrtExecutionProviderWithOptions(&params);
   EXPECT_TRUE(session_object.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
@@ -422,41 +354,7 @@ TEST(TensorrtExecutionProviderTest, TRTPluginsCustomOpTest) {
   output_names.push_back("output");
   std::vector<OrtValue> fetches;
 
-  OrtTensorRTProviderOptionsV2 params{
-      0,
-      0,
-      nullptr,
-      1000,
-      1,
-      1 << 30,
-      0,
-      0,
-      nullptr,
-      0,
-      0,
-      0,
-      0,
-      0,
-      nullptr,
-      0,
-      nullptr,
-      0,
-      0,
-      0,
-      0,
-      0,
-      0,
-      0,
-      0,
-      3,
-      -1,
-      nullptr,
-      nullptr,
-      nullptr,
-      nullptr,
-      nullptr,
-      0};
-
+  OrtTensorRTProviderOptionsV2 params;
   std::unique_ptr<IExecutionProvider> execution_provider = TensorrtExecutionProviderWithOptions(&params);
   EXPECT_TRUE(session_object.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
   std::cout << model_name << std::endl;
@@ -516,41 +414,7 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) {
   std::vector<int64_t> expected_dims_mul_m = {1, 3, 2};
   std::vector<float> expected_values_mul_m = {3.0f, 6.0f, 9.0f, 12.0f, 15.0f, 18.0f};
 
-  OrtTensorRTProviderOptionsV2 params{
-      0,
-      0,
-      nullptr,
-      1000,
-      1,
-      1 << 30,
-      0,
-      0,
-      nullptr,
-      0,
-      0,
-      0,
-      0,
-      0,
-      nullptr,
-      0,
-      nullptr,
-      0,
-      0,
-      0,
-      0,
-      0,
-      0,
-      0,
-      0,
-      3,
-      -1,
-      nullptr,
-      nullptr,
-      nullptr,
-      nullptr,
-      nullptr,
-      0};
-
+  OrtTensorRTProviderOptionsV2 params;
   if (cache_type.compare("engine") == 0) {
     /* Following code block tests the functionality of engine and optimization profile of ORT TRT, including:
      * - engine cache serialization/de-serialization
@@ -726,6 +590,7 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) {
     // uint64_t compilation_without_cache_ms, compilation_with_cache_ms;
 
     // First session is created with TRT EP with timing cache enabled
+    // Not specifying a trt_timing_cache_path will result in using the working directory
     params.trt_timing_cache_enable = 1;
     {
       // auto start = chrono::steady_clock::now();
diff --git a/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc b/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc
index 225649ef391b1..65db81e7f4013 100644
--- a/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc
+++ b/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc
@@ -9,8 +9,9 @@
 #include "core/framework/utils.h"
 #include "core/graph/graph.h"
 #include "core/providers/xnnpack/xnnpack_execution_provider.h"
-#include "core/session/onnxruntime_cxx_api.h"
 #include "core/session/inference_session.h"
+#include "core/session/onnxruntime_cxx_api.h"
+#include "core/session/onnxruntime_session_options_config_keys.h"
 
 #include "test/common/tensor_op_test_utils.h"
 #include "test/framework/test_utils.h"
@@ -214,8 +215,13 @@ static void RunModelTestWithPath(const ORTCHAR_T* ort_model_path, const char* gr
   NameMLValMap feeds;
   feeds.insert(std::make_pair("input", ml_value_x));
 
+  // XNNPACK supports int8 data
+  std::function<void(SessionOptions&)> so_updater = [](SessionOptions& so) {
+    ASSERT_STATUS_OK(so.config_options.AddConfigEntry(kOrtSessionOptionsQDQIsInt8Allowed, "1"));
+  };
+
   auto ep = DefaultXnnpackExecutionProvider();
-  RunAndVerifyOutputsWithEP(ort_model_path, graph_name, std::move(ep), feeds, params);
+  RunAndVerifyOutputsWithEP(ort_model_path, graph_name, std::move(ep), feeds, params, so_updater);
 }
 
 TEST(XnnpackEP, DISABLED_TestQDQConvU8U8) {  //  [ONNXRuntimeError] : 9 : NOT_IMPLEMENTED : Could not find an implementation for QuantizeLinear(19) node with name 'node_token_12'
@@ -254,8 +260,7 @@ TEST(XnnpackEP, DISABLED_TestQDQConvS8S8) {  //  [ONNXRuntimeError] : 9 : NOT_IM
 
 TEST(XnnpackEP, TestQDQConvS8S8_per_channel) {
   std::function<void(const Graph&)> graph_verify = [](const Graph& graph) -> void {
-    ASSERT_EQ(graph.NumberOfNodes(), 5) << "Transpose*2 + dq +q +qlinearconv "
-                                           "leaving 5 nodes.";
+    ASSERT_EQ(graph.NumberOfNodes(), 5) << "-> Q -> Transpose -> QLinearConv -> Transpose -> DQ.";
   };
   const ORTCHAR_T* ort_model_path = ORT_MODEL_FOLDER "conv_qdq_s8s8_perchannel.onnx";
   RunModelTestWithPath(ort_model_path, "xnnpack_qdq_test_graph_conv_s8s8_perchannel", graph_verify, 0.2f);
diff --git a/onnxruntime/test/python/onnx_backend_test_series.py b/onnxruntime/test/python/onnx_backend_test_series.py
index e8dc93049e18e..c48b07422d452 100644
--- a/onnxruntime/test/python/onnx_backend_test_series.py
+++ b/onnxruntime/test/python/onnx_backend_test_series.py
@@ -73,7 +73,7 @@ def apply_filters(filters, category):
     opset_version = f"opset{onnx.defs.onnx_opset_version()}"
     validated_filters = []
     for f in filters[category]:
-        if type(f) is list:
+        if type(f) is list:  # noqa: E721
             opset_regex = f[0]
             filter_regex = f[1]
             opset_match = re.match(opset_regex, opset_version)
@@ -140,6 +140,9 @@ def create_backend_test(test_name=None):
         if backend.supports_device("OPENVINO_CPU_FP16"):
             current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_CPU_FP16")
 
+        if backend.supports_device("OPENVINO_NPU_FP16"):
+            current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_NPU_FP16")
+
         if backend.supports_device("OPENVINO"):
             current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_opset18")
 
diff --git a/onnxruntime/test/python/onnxruntime_test_collective.py b/onnxruntime/test/python/onnxruntime_test_collective.py
index db1ebb5384730..4882b403c3c91 100644
--- a/onnxruntime/test/python/onnxruntime_test_collective.py
+++ b/onnxruntime/test/python/onnxruntime_test_collective.py
@@ -155,6 +155,7 @@ def _create_alltoall_ut_model_for_boolean_tensor(
         )
         return ORTBertPretrainTest._create_model_with_opsets(graph_def)
 
+    @unittest.skipIf(not ort.has_collective_ops(), reason="onnx not compiled with mpi support")
     @parameterized.expand(
         [
             (np.float32, TensorProto.FLOAT),
@@ -193,6 +194,7 @@ def test_all_reduce(self, np_elem_type, elem_type):
             outputs[0], size * input, err_msg=f"{rank}: AllGather ({np_elem_type}, {elem_type}): results mismatch"
         )
 
+    @unittest.skipIf(not ort.has_collective_ops(), reason="onnx not compiled with mpi support")
     @parameterized.expand(
         [
             (np.float32, TensorProto.FLOAT, TensorProto.FLOAT),
@@ -231,6 +233,7 @@ def test_all_gather(self, np_elem_type, elem_type, communication_elem_type):
             err_msg=f"{rank}: AllGather (axis0) ({np_elem_type}, {elem_type}, {communication_elem_type}): results mismatch",
         )
 
+    @unittest.skipIf(not ort.has_collective_ops(), reason="onnx not compiled with mpi support")
     def test_all_gather_bool(self):
         model = self._create_allgather_ut_model((4,), 0, TensorProto.INT64, TensorProto.INT64)
         rank, _ = self._get_rank_size()
@@ -250,6 +253,7 @@ def test_all_gather_bool(self):
 
         np.testing.assert_allclose(y, y_expected, err_msg=f"{rank}: AllGather (bool): results mismatch")
 
+    @unittest.skipIf(not ort.has_collective_ops(), reason="onnx not compiled with mpi support")
     def test_all_gather_axis1(self):
         model = self._create_allgather_ut_model((128, 128), 1)
         rank, size = self._get_rank_size()
@@ -268,6 +272,7 @@ def test_all_gather_axis1(self):
 
         np.testing.assert_allclose(outputs[0], expected_output, err_msg=f"{rank}: AllGather (axis1): results mismatch")
 
+    @unittest.skipIf(not ort.has_collective_ops(), reason="onnx not compiled with mpi support")
     @parameterized.expand(
         [
             (np.float32, TensorProto.FLOAT, TensorProto.FLOAT),
@@ -349,6 +354,7 @@ def test_all_to_all(self, np_elem_type, elem_type, communication_elem_type):
             err_msg=f"{rank}: AllToAll ({np_elem_type}, {elem_type}, {communication_elem_type}): results mismatch",
         )
 
+    @unittest.skipIf(not ort.has_collective_ops(), reason="onnx not compiled with mpi support")
     def test_all_to_all_bool(self):
         rank, _ = self._get_rank_size()
 
diff --git a/onnxruntime/test/python/onnxruntime_test_distributed.py b/onnxruntime/test/python/onnxruntime_test_distributed.py
new file mode 100644
index 0000000000000..de70478761f19
--- /dev/null
+++ b/onnxruntime/test/python/onnxruntime_test_distributed.py
@@ -0,0 +1,1672 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import unittest
+from typing import Tuple
+
+import numpy as np
+import onnxscript
+from mpi4py import MPI
+from onnxscript import FLOAT, FLOAT16, INT64
+
+import onnxruntime as ort
+
+MICROSOFT_OPSET = onnxscript.values.Opset(domain="com.microsoft", version=1)
+comm = MPI.COMM_WORLD
+
+
+def shard_tensor(X, rank, axis, num_shards):
+    return np.split(X, num_shards, axis)[rank]
+
+
+def shard_tensor_per_device_mesh(X, rank, axis, device_mesh):
+    if axis is None:
+        return X
+    shards = np.split(X, len(device_mesh), axis)
+    selected_shards = tuple(shard for device_id, shard in zip(device_mesh, shards) if device_id == rank)
+    return np.concatenate(selected_shards, axis=axis)
+
+
+def translate_single_device_mesh(device_mesh: np.ndarray):
+    device_mesh_shape = "[" + ",".join(str(dim) for dim in device_mesh.shape) + "]"
+    device_mesh_elements = "[" + ",".join(str(elem) for elem in device_mesh.flat) + "]"
+    return device_mesh_shape, device_mesh_elements
+
+
+def translate_all_device_meshes(device_meshes: np.ndarray):
+    assert all(len(mesh.shape) == 1 for mesh in device_meshes)
+    device_mesh_shapes = []
+    device_mesh_elements = []
+    for device_mesh in device_meshes:
+        device_mesh_shape, device_mesh_element = translate_single_device_mesh(device_mesh)
+        device_mesh_shapes.append(device_mesh_shape)
+        device_mesh_elements.append(device_mesh_element)
+    return device_mesh_shapes, device_mesh_elements
+
+
+def parse_sharding_spec(spec: str):
+    axis_conditions = []
+    sharding_device_axes = []
+    token_index = 0
+    while True:
+        token = spec[token_index]
+        if token == "R":
+            axis_conditions.append("R")
+            sharding_device_axes.append(None)
+            token_index += 1
+        elif token == "S":
+            axis_conditions.append("S")
+            # Move token pointer to "[""
+            token_index += 1
+            assert spec[token_index] == "["
+            number_tokens = ""
+            while True:
+                token_index += 1
+                token = spec[token_index]
+                if token == "]":
+                    break
+                number_tokens += token
+            assert spec[token_index] == "]"
+            # Skip "]" and point to next S/R token
+            token_index += 1
+            sharding_device_axes.append(int(number_tokens))
+        else:
+            raise ValueError(f"Invalid spec: {spec}")
+        if token_index >= len(spec):
+            break
+    return axis_conditions, sharding_device_axes
+
+
+def find_shard_axis(axis_conditions, shard_device_axes):
+    sharded_axis = None
+    sharded_axis_count = 0
+    for i, cond in enumerate(axis_conditions):
+        if cond == "S":
+            sharded_axis = i
+            sharded_axis_count += 1
+    assert sharded_axis_count in (0, 1), "Can shard at most one axis per tensor."
+    if sharded_axis is not None:
+        assert shard_device_axes[sharded_axis] == 0, "Device mesh must be 1-D, so 0 is the only valid device mesh axis."
+    return sharded_axis
+
+
+def shard_tensor_per_spec(tensor: np.ndarray, rank: int, spec: str, device_mesh: np.ndarray):
+    axis_conditions, shard_device_axes = parse_sharding_spec(spec)
+    sharded_axis = find_shard_axis(axis_conditions, shard_device_axes)
+    return shard_tensor_per_device_mesh(tensor, rank, sharded_axis, list(device_mesh.flat))
+
+
+class TestDistributedReshape(unittest.TestCase):
+    def _check_distributed_reshape(
+        self,
+        shape: Tuple[int, ...],
+        target_shape: Tuple[int, ...],
+        input_device_meshes: np.ndarray,
+        input_shard_specs: Tuple[str, ...],
+        output_device_meshes: np.ndarray,
+        output_shard_specs: Tuple[str, ...],
+    ):
+        input_device_mesh_shapes, input_device_mesh_elements = translate_all_device_meshes(input_device_meshes)
+        output_device_mesh_shapes, output_device_mesh_elements = translate_all_device_meshes(output_device_meshes)
+
+        @onnxscript.script()
+        def distributed_reshape_instance(data_tensor: FLOAT, shape_tensor: INT64):
+            return MICROSOFT_OPSET.DistributedReshape(
+                data_tensor,
+                shape_tensor,
+                input_device_mesh_shapes=input_device_mesh_shapes,
+                input_device_mesh_elements=input_device_mesh_elements,
+                input_shard_specs=input_shard_specs,
+                output_device_mesh_shapes=output_device_mesh_shapes,
+                output_device_mesh_elements=output_device_mesh_elements,
+                output_shard_specs=output_shard_specs,
+            )
+
+        rank = comm.Get_rank()
+        data_tensor = np.arange(np.prod(shape), dtype=np.float32).reshape(*shape)
+        shape_tensor = np.array(
+            target_shape,
+            dtype=np.int64,
+        )
+
+        local_data_tensor = shard_tensor_per_spec(data_tensor, rank, input_shard_specs[0], input_device_meshes[0])
+        assert "S" not in input_shard_specs[1], "Shape should not be sharded."
+
+        expected = np.reshape(data_tensor, shape_tensor)
+        local_expected = shard_tensor_per_spec(expected, rank, output_shard_specs[0], output_device_meshes[0])
+
+        onnx_model = distributed_reshape_instance.to_model_proto(
+            input_types=[FLOAT[tuple(local_data_tensor.shape)], INT64[tuple(shape_tensor.shape)]],
+            output_types=[FLOAT[tuple(local_expected.shape)]],
+        )
+
+        # Each MPI process owns a sharded model.
+        sess = ort.InferenceSession(
+            onnx_model.SerializeToString(),
+            providers=["CUDAExecutionProvider"],
+            provider_options=[{"device_id": str(rank)}],
+        )
+
+        # Each MPI process executes its sharded model.
+        # The result is `local` tensor stored on a specific MPI rank
+        # instead of `logical` tensor.
+        result = sess.run(
+            None,
+            {
+                "data_tensor": local_data_tensor,
+                "shape_tensor": shape_tensor,
+            },
+        )
+
+        # Compare local tensor and the corresponding logical sub-tensor
+        # obtained by sharding logical tensor following output's sharding spec.
+        np.testing.assert_allclose(result[0], local_expected, rtol=1e-5, atol=1e-8)
+
+    def test_reshape_two_axis_fusion_shape_2_3_sr_01_shape_6_s_01(self):
+        # Two axis fusion.
+        # S[0]R, shape=[2, 3], device_mesh=[0, 1] -> S[0], shape = [6], device_mesh=[0, 1]
+        self._check_distributed_reshape(
+            shape=(
+                2,
+                3,
+            ),
+            target_shape=(6,),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("S[0]R", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("S[0]",),
+        )
+
+    def test_reshape_two_axis_fusion_shape_2_4_rs_01_shape_8_s_0101(self):
+        # Two axis fusion.
+        # RS[0], shape=[2, 4], device_mesh=[0, 1] -> S[0], shape = [8], device_mesh=[0, 1, 0, 1]
+        self._check_distributed_reshape(
+            shape=(
+                2,
+                4,
+            ),
+            target_shape=(8,),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("RS[0]", "R"),
+            output_device_meshes=[np.array([0, 1, 0, 1])],
+            output_shard_specs=("S[0]",),
+        )
+
+    def test_reshape_two_axis_fusion_shape_2_3_5_srr_01_shape_2_15_sr_01(self):
+        # Two axis fusion.
+        # S[0]RR, shape=[2, 3, 5], device_mesh=[0, 1] -> S[0]R, shape = [2, 15], device_mesh=[0, 1]
+        self._check_distributed_reshape(
+            shape=(
+                2,
+                3,
+                5,
+            ),
+            target_shape=(
+                2,
+                15,
+            ),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("S[0]RR", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("S[0]R",),
+        )
+
+    def test_reshape_two_axis_fusion_shape_2_3_5_rsr_01_shape_2_15_sr_01(self):
+        # Two axis fusion.
+        # RS[0]R, shape=[2, 4, 5], device_mesh=[0, 1] -> RS[0], shape = [2, 20], device_mesh=[0, 1]
+        self._check_distributed_reshape(
+            shape=(
+                2,
+                4,
+                5,
+            ),
+            target_shape=(
+                2,
+                20,
+            ),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("RS[0]R", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("RS[0]",),
+        )
+
+    def test_reshape_two_axis_fusion_shape_2_3_6_rrs_01_shape_2_18_rs_010101(self):
+        # Two axis fusion.
+        # RRS[0], shape=[2, 3, 6], device_mesh=[0, 1] -> RS[0], shape = [2, 18], device_mesh=[0, 1, 0, 1, 0, 1]
+        self._check_distributed_reshape(
+            shape=(
+                2,
+                3,
+                6,
+            ),
+            target_shape=(
+                2,
+                18,
+            ),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("RRS[0]", "R"),
+            output_device_meshes=[np.array([0, 1, 0, 1, 0, 1])],
+            output_shard_specs=("RS[0]",),
+        )
+        # Two axis fusion.
+        # RRS[0], shape=[2, 3, 8], device_mesh=[0, 1, 0, 1] -> RS[0], shape = [2, 24], device_mesh=[0, 1, 0, 1] * 3
+
+        # Two axis fusion.
+        # RS[0]R, shape=[2, 8, 3], device_mesh=[0, 1, 0, 1] -> RS[0], shape = [2, 24], device_mesh=[0, 1, 0, 1]
+
+    def test_reshape_two_axis_decomposition_shape_6_s_01_shape_2_3_sr_01(self):
+        # Two axis decomposition
+        # S[0], shape=[6], device_mesh=[0, 1] -> S[0]R, shape=[2, 3], device_mesh=[0, 1]
+        self._check_distributed_reshape(
+            shape=(6,),
+            target_shape=(
+                2,
+                3,
+            ),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("S[0]", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("S[0]R",),
+        )
+
+    def test_reshape_two_axis_decomposition_shape_16_s_01_shape_1_16_sr_01(self):
+        # Two axis decomposition
+        # S[0], shape=[16], device_mesh=[0, 1] -> RS[0], shape=[1, 16], device_mesh=[0, 1]
+        self._check_distributed_reshape(
+            shape=(16,),
+            target_shape=(
+                1,
+                16,
+            ),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("S[0]", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("RS[0]",),
+        )
+
+    def test_reshape_two_axis_decomposition_shape_16_s_01_shape_2_8_sr_01(self):
+        # Two axis decomposition
+        # S[0], shape=[16], device_mesh=[0, 1] -> S[0]R, shape=[2, 8], device_mesh=[0, 1]
+        self._check_distributed_reshape(
+            shape=(16,),
+            target_shape=(
+                2,
+                8,
+            ),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("S[0]", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("S[0]R",),
+        )
+
+    def test_reshape_two_axis_decomposition_shape_16_s_01_shape_4_4_sr_01(self):
+        # Two axis decomposition
+        # S[0], shape=[16], device_mesh=[0, 1] -> S[0]R, shape=[4, 4], device_mesh=[0, 1]
+        self._check_distributed_reshape(
+            shape=(16,),
+            target_shape=(
+                4,
+                4,
+            ),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("S[0]", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("S[0]R",),
+        )
+
+    def test_reshape_two_axis_decomposition_shape_16_s_01_shape_8_2_sr_01(self):
+        # Two axis decomposition
+        # S[0], shape=[16], device_mesh=[0, 1] -> S[0]R, shape=[8, 2], device_mesh=[0, 1]
+        self._check_distributed_reshape(
+            shape=(16,),
+            target_shape=(
+                8,
+                2,
+            ),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("S[0]", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("S[0]R",),
+        )
+
+    def test_reshape_two_axis_decomposition_shape_16_s_01_shape_16_1_sr_01(self):
+        # Two axis decomposition
+        # S[0], shape=[16], device_mesh=[0, 1] -> S[0]R, shape=[16, 1], device_mesh=[0, 1]
+        self._check_distributed_reshape(
+            shape=(16,),
+            target_shape=(
+                16,
+                1,
+            ),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("S[0]", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("S[0]R",),
+        )
+
+    def test_reshape_two_axis_decomposition_shape_16_s_0101_shape_1_16_sr_0101(self):
+        # Two axis decomposition
+        # S[0], shape=[16], device_mesh=[0, 1, 0, 1] -> RS[0], shape=[1, 16], device_mesh=[0, 1, 0, 1]
+
+        self._check_distributed_reshape(
+            shape=(16,),
+            target_shape=(
+                1,
+                16,
+            ),
+            input_device_meshes=[np.array([0, 1, 0, 1])] * 2,
+            input_shard_specs=("S[0]", "R"),
+            output_device_meshes=[np.array([0, 1, 0, 1])],
+            output_shard_specs=("RS[0]",),
+        )
+
+    def test_reshape_two_axis_decomposition_shape_16_s_0101_shape_2_8_rs_01(self):
+        # Two axis decomposition
+        #                                 repeats=2                       8 = repeats * [unique IDs]
+        # S[0], shape=[16], device_mesh=[0, 1, 0, 1] -> RS[0], shape=[2, 8], device_mesh=[0, 1]
+        self._check_distributed_reshape(
+            shape=(16,),
+            target_shape=(
+                2,
+                8,
+            ),
+            input_device_meshes=[np.array([0, 1, 0, 1])] * 2,
+            input_shard_specs=("S[0]", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("RS[0]",),
+        )
+
+    def test_reshape_two_axis_decomposition_shape_16_s_0101_shape_4_4_sr_0101(self):
+        # Two axis decomposition
+        # S[0], shape=[16], device_mesh=[0, 1, 0, 1] -> S[0]R, shape=[4, 4], device_mesh=[0, 1, 0, 1]
+        self._check_distributed_reshape(
+            shape=(16,),
+            target_shape=(
+                4,
+                4,
+            ),
+            input_device_meshes=[np.array([0, 1, 0, 1])] * 2,
+            input_shard_specs=("S[0]", "R"),
+            output_device_meshes=[np.array([0, 1, 0, 1])],
+            output_shard_specs=("S[0]R",),
+        )
+
+    def test_reshape_two_axis_decomposition_shape_16_s_0101_shape_8_2_sr_0101(self):
+        # Two axis decomposition
+        # S[0], shape=[16], device_mesh=[0, 1, 0, 1] -> S[0]R, shape=[8, 2], device_mesh=[0, 1, 0, 1]
+        self._check_distributed_reshape(
+            shape=(16,),
+            target_shape=(
+                8,
+                2,
+            ),
+            input_device_meshes=[np.array([0, 1, 0, 1])] * 2,
+            input_shard_specs=("S[0]", "R"),
+            output_device_meshes=[np.array([0, 1, 0, 1])],
+            output_shard_specs=("S[0]R",),
+        )
+
+    def test_reshape_two_axis_decomposition_shape_16_s_0101_shape_16_1_sr_0101(self):
+        # Two axis decomposition
+        # S[0], shape=[16], device_mesh=[0, 1, 0, 1] -> S[0]R, shape=[16, 1], device_mesh=[0, 1, 0, 1]
+        self._check_distributed_reshape(
+            shape=(16,),
+            target_shape=(
+                16,
+                1,
+            ),
+            input_device_meshes=[np.array([0, 1, 0, 1])] * 2,
+            input_shard_specs=("S[0]", "R"),
+            output_device_meshes=[np.array([0, 1, 0, 1])],
+            output_shard_specs=("S[0]R",),
+        )
+
+    def test_reshape_two_axis_decomposition_shape_21_4096_s_01_shape_3_7_4096_rrs_01(self):
+        # Two axis decomposition
+        # [21, 4096] -> [3, 7, 4096]
+        # data: (21, 2048), (RS, [0, 1])
+        # shape: None, (R, [0, 1])
+        # reshaped: None, None
+        # -----------------------------------
+        # new reshaped: None, (RRS, [0, 1])
+        self._check_distributed_reshape(
+            shape=(
+                21,
+                4096,
+            ),
+            target_shape=(
+                3,
+                7,
+                4096,
+            ),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("RS[0]", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("RRS[0]",),
+        )
+
+    def test_reshape_two_axis_decomposition_shape_3_7_4096_rrs_01_shape_3_7_64_64_rrsr_01(self):
+        # Two axis decomposition
+        # [3, 7, 4096] -> [3, 7, 64, 64]
+        # data: (3, 7, 2048), (RRS, [0, 1])
+        # shape: None, (R, [0, 1])
+        # reshaped: None, None
+        # -----------------------------------
+        # new reshaped: None, (RRSR, [0, 1])
+
+        self._check_distributed_reshape(
+            shape=(
+                3,
+                7,
+                4096,
+            ),
+            target_shape=(
+                3,
+                7,
+                64,
+                64,
+            ),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("RRS[0]", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("RRS[0]R",),
+        )
+
+    def test_reshape_two_axis_fusion_shape_3_7_4096_rrr_01_shape_21_4906_rr_01(self):
+        # Two axis fusion
+        # [3, 7, 4096] -> [21, 4096]
+        # data: (3, 7, 4096), (RRR, [0, 1])
+        # shape: None, (R, [0, 1])
+        # reshaped: None, None
+        # -----------------------------------
+        # new reshaped: None, (RR, [0, 1])
+        self._check_distributed_reshape(
+            shape=(
+                3,
+                7,
+                4096,
+            ),
+            target_shape=(
+                21,
+                4096,
+            ),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("RRR", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("RR",),
+        )
+
+    def test_reshape_two_axis_fusion_shape_21_4096_rrr_01_shape_3_7_4906_rr_01(self):
+        # Two axis fusion
+        # [21, 4096] -> [3, 7, 4096]
+        # data: (21, 4096), (RR, [0, 1])
+        # shape: None, (R, [0, 1])
+        # reshaped: None, None
+        # -----------------------------------
+        # new reshaped: None, (RRR, [0, 1])
+        self._check_distributed_reshape(
+            shape=(
+                21,
+                4096,
+            ),
+            target_shape=(
+                3,
+                7,
+                4096,
+            ),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("RR", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("RRR",),
+        )
+
+    def test_reshape_two_axis_fusion_shape_3_64_7_64_rsrr_01_shape_192_7_64_srr_010101(self):
+        # Two axis fusion
+        # [3, 64, 7, 64] -> [192, 7, 64]
+        # data: (3, 32, 7, 64), (RSRR, [0, 1])
+        # shape: None, (R, [0, 1])
+        # reshaped: None, None
+        # -----------------------------------
+        # new reshaped: None, (SRR, [0, 1, 0, 1, 0, 1])
+
+        self._check_distributed_reshape(
+            shape=(
+                3,
+                64,
+                7,
+                64,
+            ),
+            target_shape=(
+                192,
+                7,
+                64,
+            ),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("RS[0]RR", "R"),
+            output_device_meshes=[np.array([0, 1, 0, 1, 0, 1])],
+            output_shard_specs=("S[0]RR",),
+        )
+
+    def test_reshape_two_axis_decomposition_shape_192_7_7_srr_010101_shape_3_64_7_7_rsrr_01(self):
+        # Two axis decomposition
+        # [192, 7, 7] -> [3, 64, 7, 7]
+        # data: (96, 7, 7), (SRR, [0, 1, 0, 1, 0, 1])
+        # shape: None, (R, [0, 1])
+        # reshaped: None, None
+        # -----------------------------------
+        # new reshaped: None, (RSRR, [0.0, 1.0])
+
+        self._check_distributed_reshape(
+            shape=(
+                192,
+                7,
+                7,
+            ),
+            target_shape=(
+                3,
+                64,
+                7,
+                7,
+            ),
+            input_device_meshes=[np.array([0, 1, 0, 1, 0, 1])] * 2,
+            input_shard_specs=("S[0]RR", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("RS[0]RR",),
+        )
+
+    def test_reshape_two_axis_fusion_shape_3_64_7_7_rsrr_01_shape_192_7_7_srr_010101(self):
+        # Two axis fusion
+        # [3, 64, 7, 7] -> [192, 7, 7]
+        # data: (3, 32, 7, 7), (RSRR, [0, 1])
+        # shape: None, (R, [0, 1])
+        # reshaped: None, None
+        # -----------------------------------
+        # new reshaped: None, (SRR, [0, 1, 0, 1, 0, 1])
+
+        self._check_distributed_reshape(
+            shape=(
+                3,
+                64,
+                7,
+                7,
+            ),
+            target_shape=(
+                192,
+                7,
+                7,
+            ),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("RS[0]RR", "R"),
+            output_device_meshes=[np.array([0, 1, 0, 1, 0, 1])],
+            output_shard_specs=("S[0]RR",),
+        )
+
+    def test_reshape_two_axis_decomposition_shape_192_7_64_srr_010101_shape_3_64_7_64_rsrr_01(self):
+        # Two axis decomposition
+        # [192, 7, 64] -> [3, 64, 7, 64]
+        # data: (96, 7, 64), (SRR, [0, 1, 0, 1, 0, 1])
+        # shape: None, (R, [0, 1])
+        # reshaped: None, None
+        # -----------------------------------
+        # new reshaped: None, (RSRR, [0.0, 1.0])
+
+        self._check_distributed_reshape(
+            shape=(
+                192,
+                7,
+                64,
+            ),
+            target_shape=(
+                3,
+                64,
+                7,
+                64,
+            ),
+            input_device_meshes=[np.array([0, 1, 0, 1, 0, 1])] * 2,
+            input_shard_specs=("S[0]RR", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("RS[0]RR",),
+        )
+
+    def test_reshape_two_axis_fusion_shape_3_7_64_64_rrsr_01_shape_3_7_4096_rrs_01(self):
+        # Two axis fusion
+        # [3, 7, 64, 64] -> [3, 7, 4096]
+        # data: (3, 7, 32, 64), (RRSR, [0, 1])
+        # shape: None, (R, [0, 1])
+        # reshaped: None, None
+        # -----------------------------------
+        # new reshaped: None, (RRS, [0, 1])
+
+        self._check_distributed_reshape(
+            shape=(
+                3,
+                7,
+                64,
+                64,
+            ),
+            target_shape=(
+                3,
+                7,
+                4096,
+            ),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("RRS[0]R", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("RRS[0]",),
+        )
+
+    def test_reshape_two_axis_fusion_shape_3_7_4096_rrs_01_shape_21_4906_rs_01(self):
+        # Two axis fusion
+        # [3, 7, 4096] -> [21, 4096]
+        # data: (3, 7, 2048), (RRS, [0, 1])
+        # shape: None, (R, [0, 1])
+        # reshaped: None, None
+        # -----------------------------------
+        # new reshaped: None, (RS, [0, 1])
+        self._check_distributed_reshape(
+            shape=(
+                3,
+                7,
+                4096,
+            ),
+            target_shape=(
+                21,
+                4096,
+            ),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("RRS[0]", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("RS[0]",),
+        )
+
+
+class TestDistributedExpand(unittest.TestCase):
+    def _check_distributed_expand(
+        self,
+        shape: Tuple[int, ...],
+        target_shape: Tuple[int, ...],
+        input_device_meshes: np.ndarray,
+        input_shard_specs: Tuple[str, ...],
+        output_device_meshes: np.ndarray,
+        output_shard_specs: Tuple[str, ...],
+    ):
+        assert len(input_device_meshes) == len(input_shard_specs)
+        assert len(output_device_meshes) == len(output_shard_specs)
+
+        input_device_mesh_shapes, input_device_mesh_elements = translate_all_device_meshes(input_device_meshes)
+        output_device_mesh_shapes, output_device_mesh_elements = translate_all_device_meshes(output_device_meshes)
+
+        @onnxscript.script()
+        def distributed_expand_instance(data_tensor: FLOAT, shape_tensor: INT64):
+            return MICROSOFT_OPSET.DistributedExpand(
+                data_tensor,
+                shape_tensor,
+                input_device_mesh_shapes=input_device_mesh_shapes,
+                input_device_mesh_elements=input_device_mesh_elements,
+                input_shard_specs=input_shard_specs,
+                output_device_mesh_shapes=output_device_mesh_shapes,
+                output_device_mesh_elements=output_device_mesh_elements,
+                output_shard_specs=output_shard_specs,
+            )
+
+        rank = comm.Get_rank()
+        data_tensor = np.arange(np.prod(shape), dtype=np.float32).reshape(*shape)
+        shape_tensor = np.array(
+            target_shape,
+            dtype=np.int64,
+        )
+
+        local_data_tensor = shard_tensor_per_spec(data_tensor, rank, input_shard_specs[0], input_device_meshes[0])
+        assert "S" not in input_shard_specs[1], "Shape should not be sharded."
+
+        expected = data_tensor * np.ones(shape_tensor)
+        local_expected = shard_tensor_per_spec(expected, rank, output_shard_specs[0], output_device_meshes[0])
+
+        onnx_model = distributed_expand_instance.to_model_proto(
+            input_types=[FLOAT[tuple(local_data_tensor.shape)], INT64[tuple(shape_tensor.shape)]],
+            output_types=[FLOAT[tuple(local_expected.shape)]],
+        )
+
+        # Each MPI process owns a sharded model.
+        sess = ort.InferenceSession(
+            onnx_model.SerializeToString(),
+            providers=["CUDAExecutionProvider"],
+            provider_options=[{"device_id": str(rank)}],
+        )
+
+        # Each MPI process executes its sharded model.
+        # The result is `local` tensor stored on a specific MPI rank
+        # instead of `logical` tensor.
+        result = sess.run(
+            None,
+            {
+                "data_tensor": local_data_tensor,
+                "shape_tensor": shape_tensor,
+            },
+        )
+
+        # Compare local tensor and the corresponding logical sub-tensor
+        # obtained by sharding logical tensor following output's sharding spec.
+        np.testing.assert_allclose(result[0], local_expected, rtol=1e-5, atol=1e-8)
+
+    def test_expand_sharded_on_expanded_axis(self):
+        # data: shape=[8,1], spec=(RR, [0,1])
+        # shape: shape=[2], spec=(R, [0,1]), value=[1,4]
+        # output: shape=[8,4], spec=(RS, [0,1])
+        self._check_distributed_expand(
+            shape=(
+                8,
+                1,
+            ),
+            target_shape=(
+                8,
+                4,
+            ),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("RR", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("RS[0]",),
+        )
+
+    def test_expand_sharded_on_expanded_axis_with_device_mesh_0101(self):
+        # data: shape=[8,1], spec=(RR, [0,1])
+        # shape: shape=[2], spec=(R, [0,1]), value=[1,4]
+        # output: shape=[8,4], spec=(RS, [0,1])
+        self._check_distributed_expand(
+            shape=(
+                8,
+                1,
+            ),
+            target_shape=(
+                8,
+                8,
+            ),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("RR", "R"),
+            output_device_meshes=[np.array([0, 1, 0, 1])],
+            output_shard_specs=("RS[0]",),
+        )
+
+    def test_expand_replicated_on_expanded_axis(self):
+        # data: shape=[8,1], spec=(RR, [0,1])
+        # shape: shape=[2], spec=(R, [0,1]), value=[1,4]
+        # output: shape=[8,4], spec=(RR, [0,1])
+        self._check_distributed_expand(
+            shape=(
+                8,
+                1,
+            ),
+            target_shape=(
+                1,
+                4,
+            ),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("RR", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("RR",),
+        )
+
+    def test_expand_with_pass_through_sharding_spec(self):
+        # data: shape=[8,1], spec=(SR, [0,1])
+        # shape: shape=[2], spec=(R, [0,1]), value=[1,4]
+        # output: shape=[8,4], spec=(SR, [0,1])
+        self._check_distributed_expand(
+            shape=(
+                8,
+                1,
+            ),
+            target_shape=(
+                1,
+                4,
+            ),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=(
+                "S[0]R",
+                "R",
+            ),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("S[0]R",),
+        )
+
+    def test_expand_in_tiny_llama(self):
+        # data: shape=[2,4,256,4], spec=(RSRR, [0,1])
+        # shape: shape=[4], spec=(R, [0,1,2,3]), value=[2,4,256,4]
+        # output: shape=[2,4,256,4], spec=(RSRR, [0,1])
+        self._check_distributed_expand(
+            shape=(
+                2,
+                4,
+                256,
+                4,
+            ),
+            target_shape=(
+                2,
+                4,
+                256,
+                4,
+            ),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("RS[0]RR", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("RS[0]RR",),
+        )
+
+
+class TestDistributedUnsqueeze(unittest.TestCase):
+    def _check_distributed_unsqueeze(
+        self,
+        shape: Tuple[int, ...],
+        axes: Tuple[int, ...],
+        input_device_meshes: np.ndarray,
+        input_shard_specs: Tuple[str, ...],
+        output_device_meshes: np.ndarray,
+        output_shard_specs: Tuple[str, ...],
+    ):
+        assert len(input_device_meshes) == len(input_shard_specs)
+        assert len(output_device_meshes) == len(output_shard_specs)
+
+        input_device_mesh_shapes, input_device_mesh_elements = translate_all_device_meshes(input_device_meshes)
+        output_device_mesh_shapes, output_device_mesh_elements = translate_all_device_meshes(output_device_meshes)
+
+        @onnxscript.script()
+        def distributed_unsqueeze_instance(data_tensor: FLOAT, axes_tensor: INT64):
+            return MICROSOFT_OPSET.DistributedUnsqueeze(
+                data_tensor,
+                axes_tensor,
+                input_device_mesh_shapes=input_device_mesh_shapes,
+                input_device_mesh_elements=input_device_mesh_elements,
+                input_shard_specs=input_shard_specs,
+                output_device_mesh_shapes=output_device_mesh_shapes,
+                output_device_mesh_elements=output_device_mesh_elements,
+                output_shard_specs=output_shard_specs,
+            )
+
+        rank = comm.Get_rank()
+        data_tensor = np.arange(np.prod(shape), dtype=np.float32).reshape(*shape)
+        axes_tensor = np.array(axes, dtype=np.int64)
+
+        local_data_tensor = shard_tensor_per_spec(data_tensor, rank, input_shard_specs[0], input_device_meshes[0])
+        assert "S" not in input_shard_specs[1], "Shape should not be sharded."
+
+        expected = data_tensor.copy()
+        for axis in sorted(axes):
+            expected = np.expand_dims(expected, axis=axis)
+
+        local_expected = shard_tensor_per_spec(expected, rank, output_shard_specs[0], output_device_meshes[0])
+
+        onnx_model = distributed_unsqueeze_instance.to_model_proto(
+            input_types=[FLOAT[tuple(local_data_tensor.shape)], INT64[tuple(axes_tensor.shape)]],
+            output_types=[FLOAT[tuple(local_expected.shape)]],
+        )
+
+        # Each MPI process owns a sharded model.
+        sess = ort.InferenceSession(
+            onnx_model.SerializeToString(),
+            providers=["CUDAExecutionProvider"],
+            provider_options=[{"device_id": str(rank)}],
+        )
+
+        # Each MPI process executes its sharded model.
+        # The result is `local` tensor stored on a specific MPI rank
+        # instead of `logical` tensor.
+        result = sess.run(
+            None,
+            {
+                "data_tensor": local_data_tensor,
+                "axes_tensor": axes_tensor,
+            },
+        )
+
+        # Compare local tensor and the corresponding logical sub-tensor
+        # obtained by sharding logical tensor following output's sharding spec.
+        np.testing.assert_allclose(result[0], local_expected, rtol=1e-5, atol=1e-8)
+
+    def test_unsqueeze_sharded(self):
+        # data: shape=[8,1], spec=(RR, [0,1])
+        # shape: shape=[2], spec=(R, [0,1]), value=[1,4]
+        # output: shape=[8,4], spec=(RS, [0,1])
+        self._check_distributed_unsqueeze(
+            shape=(
+                8,
+                2,
+            ),
+            axes=(1,),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("RS[0]", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("RRS[0]",),
+        )
+
+    def test_unsqueeze_descending_axes(self):
+        # data: shape=[8,1], spec=(RR, [0,1])
+        # shape: shape=[2], spec=(R, [0,1]), value=[1,4]
+        # output: shape=[8,4], spec=(RS, [0,1])
+        self._check_distributed_unsqueeze(
+            shape=(
+                8,
+                2,
+            ),
+            axes=(
+                4,
+                1,
+                0,
+            ),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("RS[0]", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("RRRS[0]R",),
+        )
+
+    def test_unsqueeze_not_sharded(self):
+        # data: shape=[8,1], spec=(RR, [0,1])
+        # shape: shape=[2], spec=(R, [0,1]), value=[1,4]
+        # output: shape=[8,4], spec=(RS, [0,1])
+        self._check_distributed_unsqueeze(
+            shape=(
+                8,
+                2,
+            ),
+            axes=(2,),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("RR", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("RRR",),
+        )
+
+
+class TestDistributedSqueeze(unittest.TestCase):
+    def _check_distributed_squeeze(
+        self,
+        shape: Tuple[int, ...],
+        axes: Tuple[int, ...],
+        input_device_meshes: np.ndarray,
+        input_shard_specs: Tuple[str, ...],
+        output_device_meshes: np.ndarray,
+        output_shard_specs: Tuple[str, ...],
+    ):
+        assert len(input_device_meshes) == len(input_shard_specs)
+        assert len(output_device_meshes) == len(output_shard_specs)
+
+        input_device_mesh_shapes, input_device_mesh_elements = translate_all_device_meshes(input_device_meshes)
+        output_device_mesh_shapes, output_device_mesh_elements = translate_all_device_meshes(output_device_meshes)
+
+        @onnxscript.script()
+        def distributed_squeeze_instance(data_tensor: FLOAT, axes_tensor: INT64):
+            return MICROSOFT_OPSET.DistributedSqueeze(
+                data_tensor,
+                axes_tensor,
+                input_device_mesh_shapes=input_device_mesh_shapes,
+                input_device_mesh_elements=input_device_mesh_elements,
+                input_shard_specs=input_shard_specs,
+                output_device_mesh_shapes=output_device_mesh_shapes,
+                output_device_mesh_elements=output_device_mesh_elements,
+                output_shard_specs=output_shard_specs,
+            )
+
+        rank = comm.Get_rank()
+        data_tensor = np.arange(np.prod(shape), dtype=np.float32).reshape(*shape)
+        axes_tensor = np.array(axes, dtype=np.int64)
+
+        local_data_tensor = shard_tensor_per_spec(data_tensor, rank, input_shard_specs[0], input_device_meshes[0])
+        assert "S" not in input_shard_specs[1], "Shape should not be sharded."
+
+        expected = data_tensor.copy()
+        for axis in sorted(axes, reverse=True):
+            expected = np.squeeze(expected, axis=axis)
+
+        local_expected = shard_tensor_per_spec(expected, rank, output_shard_specs[0], output_device_meshes[0])
+
+        onnx_model = distributed_squeeze_instance.to_model_proto(
+            input_types=[FLOAT[tuple(local_data_tensor.shape)], INT64[tuple(axes_tensor.shape)]],
+            output_types=[FLOAT[tuple(local_expected.shape)]],
+        )
+
+        # Each MPI process owns a sharded model.
+        sess = ort.InferenceSession(
+            onnx_model.SerializeToString(),
+            providers=["CUDAExecutionProvider"],
+            provider_options=[{"device_id": str(rank)}],
+        )
+
+        # Each MPI process executes its sharded model.
+        # The result is `local` tensor stored on a specific MPI rank
+        # instead of `logical` tensor.
+        result = sess.run(
+            None,
+            {
+                "data_tensor": local_data_tensor,
+                "axes_tensor": axes_tensor,
+            },
+        )
+
+        # Compare local tensor and the corresponding logical sub-tensor
+        # obtained by sharding logical tensor following output's sharding spec.
+        np.testing.assert_allclose(result[0], local_expected, rtol=1e-5, atol=1e-8)
+
+    def test_squeeze_sharded(self):
+        # data: shape=[8,1], spec=(RR, [0,1])
+        # shape: shape=[2], spec=(R, [0,1]), value=[1,4]
+        # output: shape=[8,4], spec=(RS, [0,1])
+        self._check_distributed_squeeze(
+            shape=(
+                1,
+                2,
+            ),
+            axes=(0,),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("RS[0]", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("S[0]",),
+        )
+
+    def test_squeeze_not_sharded(self):
+        # data: shape=[8,1], spec=(RR, [0,1])
+        # shape: shape=[2], spec=(R, [0,1]), value=[1,4]
+        # output: shape=[8,4], spec=(RS, [0,1])
+        self._check_distributed_squeeze(
+            shape=(
+                8,
+                1,
+                1,
+                1,
+            ),
+            axes=(
+                1,
+                3,
+            ),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("RRRR", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("RR",),
+        )
+
+
+class TestDistributedReduce(unittest.TestCase):
+    def _check_distributed_reduce(
+        self,
+        keepdims: int,
+        dtype: np.dtype,
+        shape: Tuple[int, ...],
+        axes: Tuple[int, ...],
+        input_device_meshes: np.ndarray,
+        input_shard_specs: Tuple[str, ...],
+        output_device_meshes: np.ndarray,
+        output_shard_specs: Tuple[str, ...],
+    ):
+        assert len(input_device_meshes) == len(input_shard_specs)
+        assert len(output_device_meshes) == len(output_shard_specs)
+
+        input_device_mesh_shapes, input_device_mesh_elements = translate_all_device_meshes(input_device_meshes)
+        output_device_mesh_shapes, output_device_mesh_elements = translate_all_device_meshes(output_device_meshes)
+
+        @onnxscript.script()
+        def distributed_reduce_sum_instance(data_tensor: FLOAT, axes_tensor: INT64):
+            return MICROSOFT_OPSET.DistributedReduceSum(
+                data_tensor,
+                axes_tensor,
+                keepdims=keepdims,
+                input_device_mesh_shapes=input_device_mesh_shapes,
+                input_device_mesh_elements=input_device_mesh_elements,
+                input_shard_specs=input_shard_specs,
+                output_device_mesh_shapes=output_device_mesh_shapes,
+                output_device_mesh_elements=output_device_mesh_elements,
+                output_shard_specs=output_shard_specs,
+            )
+
+        @onnxscript.script()
+        def distributed_reduce_max_instance(data_tensor: FLOAT, axes_tensor: INT64):
+            return MICROSOFT_OPSET.DistributedReduceMax(
+                data_tensor,
+                axes_tensor,
+                keepdims=keepdims,
+                input_device_mesh_shapes=input_device_mesh_shapes,
+                input_device_mesh_elements=input_device_mesh_elements,
+                input_shard_specs=input_shard_specs,
+                output_device_mesh_shapes=output_device_mesh_shapes,
+                output_device_mesh_elements=output_device_mesh_elements,
+                output_shard_specs=output_shard_specs,
+            )
+
+        @onnxscript.script()
+        def distributed_reduce_mean_instance(data_tensor: FLOAT, axes_tensor: INT64):
+            return MICROSOFT_OPSET.DistributedReduceMean(
+                data_tensor,
+                axes_tensor,
+                keepdims=keepdims,
+                input_device_mesh_shapes=input_device_mesh_shapes,
+                input_device_mesh_elements=input_device_mesh_elements,
+                input_shard_specs=input_shard_specs,
+                output_device_mesh_shapes=output_device_mesh_shapes,
+                output_device_mesh_elements=output_device_mesh_elements,
+                output_shard_specs=output_shard_specs,
+            )
+
+        rank = comm.Get_rank()
+
+        for onnx_func, np_func in zip(
+            [distributed_reduce_sum_instance, distributed_reduce_max_instance, distributed_reduce_mean_instance],
+            [np.sum, np.maximum.reduce, np.mean],
+        ):
+            data = np.random.randint(4, size=shape).astype(dtype)
+            expected = np_func(data, axis=axes, keepdims=bool(keepdims))
+
+            assert len(input_shard_specs) == 2 and len(input_device_meshes) == 2, "Reduce has two inputs."
+            assert "S" not in input_shard_specs[1], "Tensor `axes` should not be sharded."
+            assert len(output_shard_specs) == 1 and len(output_device_meshes) == 1, "Reduce has only one output."
+
+            local_data = shard_tensor_per_spec(data, rank, input_shard_specs[0], input_device_meshes[0])
+            local_expected = shard_tensor_per_spec(expected, rank, output_shard_specs[0], output_device_meshes[0])
+
+            if dtype == np.float32:
+                onnx_model = onnx_func.to_model_proto(
+                    input_types=[FLOAT[tuple(local_data.shape)], INT64[len(axes)]],
+                    output_types=[FLOAT[tuple(local_expected.shape)]],
+                )
+            elif dtype == np.int64:
+                onnx_model = onnx_func.to_model_proto(
+                    input_types=[INT64[tuple(local_data.shape)], INT64[len(axes)]],
+                    output_types=[INT64[tuple(local_expected.shape)]],
+                )
+            elif dtype == np.float16:
+                onnx_model = onnx_func.to_model_proto(
+                    input_types=[FLOAT16[tuple(local_data.shape)], INT64[len(axes)]],
+                    output_types=[FLOAT16[tuple(local_expected.shape)]],
+                )
+            else:
+                raise RuntimeError(f"Unsupported dtype: {dtype}")
+
+            # Each MPI process owns a sharded model.
+            sess = ort.InferenceSession(
+                onnx_model.SerializeToString(),
+                providers=["CUDAExecutionProvider"],
+                provider_options=[{"device_id": str(rank)}],
+            )
+
+            # Each MPI process executes its sharded model.
+            # The result is `local` tensor stored on a specific MPI rank
+            # instead of `logical` tensor.
+            result = sess.run(
+                None,
+                {
+                    "data_tensor": local_data,
+                    "axes_tensor": np.array(axes, dtype=np.int64),
+                },
+            )
+
+            # Compare local tensor and the corresponding logical sub-tensor
+            # obtained by sharding logical tensor following output's sharding spec.
+            np.testing.assert_allclose(result[0], local_expected, rtol=1e-5, atol=1e-8)
+
+    def test_reduce(self):
+        self._check_distributed_reduce(
+            keepdims=1,
+            dtype=np.float32,
+            shape=(
+                8,
+                4,
+            ),
+            axes=(0,),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("RR", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("RR",),
+        )
+
+    def test_reduce_sharded(self):
+        self._check_distributed_reduce(
+            keepdims=1,
+            dtype=np.float32,
+            shape=(
+                8,
+                4,
+            ),
+            axes=(1,),
+            input_device_meshes=[np.array([0, 1])] * 2,
+            input_shard_specs=("S[0]R", "R"),
+            output_device_meshes=[np.array([0, 1])],
+            output_shard_specs=("S[0]R",),
+        )
+
+
+class TestDistributed(unittest.TestCase):
+    def test_matmul_rs_sr_rr(self):
+        # It means 1-D tensor with single element: [2].
+        device_mesh_shape = "[2]"
+        # It means 1-D tensor with two elements: [0, 1].
+        device_mesh_elements = "[0,1]"
+
+        @onnxscript.script()
+        def matmul_rs_sr_rr(tensor_x: FLOAT, tensor_w: FLOAT) -> FLOAT:
+            return MICROSOFT_OPSET.DistributedMatMul(
+                tensor_x,
+                tensor_w,
+                input_shard_specs=["RS[0]", "S[0]R"],
+                output_shard_specs=["RR"],
+                input_device_mesh_shapes=[device_mesh_shape, device_mesh_shape],
+                input_device_mesh_elements=[device_mesh_elements, device_mesh_elements],
+                output_device_mesh_shapes=[device_mesh_shape],
+                output_device_mesh_elements=[device_mesh_elements],
+            )
+
+        rank = comm.Get_rank()
+        tensor_x = np.array([[1, 2, 3, 4], [3, 4, 5, 6]], dtype=np.float32)
+        tensor_w = np.array([[1, 1], [2, 2], [3, 3], [4, 4]], dtype=np.float32)
+
+        onnx_model = matmul_rs_sr_rr.to_model_proto(
+            input_types=[FLOAT[2, "s"], FLOAT["s", 2]],
+            output_types=[FLOAT[2, 2]],
+        )
+
+        sess = ort.InferenceSession(
+            onnx_model.SerializeToString(),
+            providers=["CUDAExecutionProvider"],
+            provider_options=[{"device_id": str(rank)}],
+        )
+
+        tensor_shard_x = shard_tensor(tensor_x, rank=rank, axis=1, num_shards=2)
+        tensor_shard_w = shard_tensor(tensor_w, rank=rank, axis=0, num_shards=2)
+
+        result = sess.run(None, {"tensor_x": tensor_shard_x, "tensor_w": tensor_shard_w})
+
+        expected = np.matmul(tensor_x, tensor_w)
+        np.testing.assert_allclose(result[0], expected, rtol=1e-5, atol=1e-8)
+
+    def test_matmul2d_rs_rs_rr(self):
+        device_mesh_shape = "[2]"
+        device_mesh_elements = "[0, 1]"
+
+        @onnxscript.script()
+        def matmul_rs_rs_rr(tensor_x: FLOAT, tensor_w: FLOAT) -> FLOAT:
+            return MICROSOFT_OPSET.DistributedMatMul(
+                tensor_x,
+                tensor_w,
+                input_shard_specs=["RS[0]", "RS[0]"],
+                output_shard_specs=["RR"],
+                input_device_mesh_shapes=[device_mesh_shape, device_mesh_shape],
+                input_device_mesh_elements=[device_mesh_elements, device_mesh_elements],
+                output_device_mesh_shapes=[device_mesh_shape],
+                output_device_mesh_elements=[device_mesh_elements],
+            )
+
+        rank = comm.Get_rank()
+        tensor_x = np.array([[1, 2, 3, 4], [3, 4, 5, 6]], dtype=np.float32)
+        tensor_w = np.array([[1, 1], [2, 2], [3, 3], [4, 4]], dtype=np.float32)
+
+        # Shape informaton should match the shapes seen by the operator.
+        # If the tensor W with shape [4, 2] is sharded following "RS[0]", its shape
+        # should be [4, 1] in ORT when calling ctx->Input<Tensor>(1)->Shape().
+        onnx_model = matmul_rs_rs_rr.to_model_proto(
+            input_types=[FLOAT[2, "s"], FLOAT[4, "t"]],
+            output_types=[FLOAT[2, 2]],
+        )
+
+        sess = ort.InferenceSession(
+            onnx_model.SerializeToString(),
+            providers=["CUDAExecutionProvider"],
+            provider_options=[{"device_id": str(rank)}],
+        )
+
+        tensor_shard_x = shard_tensor(tensor_x, rank=rank, axis=1, num_shards=2)
+        tensor_shard_w = shard_tensor(tensor_w, rank=rank, axis=1, num_shards=2)
+
+        result = sess.run(None, {"tensor_x": tensor_shard_x, "tensor_w": tensor_shard_w})
+
+        expected = np.matmul(tensor_x, tensor_w)
+        np.testing.assert_allclose(result[0], expected, rtol=1e-5, atol=1e-8)
+
+    def test_matmul2d_rs_rs_rs(self):
+        device_mesh_shape = "[2]"
+        device_mesh_elements = "[0, 1]"
+
+        @onnxscript.script()
+        def matmul2d_rs_rs_rs(tensor_x: FLOAT, tensor_w: FLOAT) -> FLOAT:
+            return MICROSOFT_OPSET.DistributedMatMul(
+                tensor_x,
+                tensor_w,
+                input_shard_specs=["RS[0]", "RS[0]"],
+                output_shard_specs=["RS[0]"],
+                input_device_mesh_shapes=[device_mesh_shape, device_mesh_shape],
+                input_device_mesh_elements=[device_mesh_elements, device_mesh_elements],
+                output_device_mesh_shapes=[device_mesh_shape],
+                output_device_mesh_elements=[device_mesh_elements],
+            )
+
+        rank = comm.Get_rank()
+        tensor_x = np.array([[1, 2, 3, 4], [3, 4, 5, 6]], dtype=np.float32)
+        tensor_w = np.array([[1, 1], [2, 2], [3, 3], [4, 4]], dtype=np.float32)
+
+        onnx_model = matmul2d_rs_rs_rs.to_model_proto(
+            input_types=[FLOAT[2, "s"], FLOAT[4, "t"]],
+            output_types=[FLOAT[2, "u"]],
+        )
+
+        sess = ort.InferenceSession(
+            onnx_model.SerializeToString(),
+            providers=["CUDAExecutionProvider"],
+            provider_options=[{"device_id": str(rank)}],
+        )
+
+        tensor_shard_x = shard_tensor(tensor_x, rank=rank, axis=1, num_shards=2)
+        tensor_shard_w = shard_tensor(tensor_w, rank=rank, axis=1, num_shards=2)
+
+        result = sess.run(None, {"tensor_x": tensor_shard_x, "tensor_w": tensor_shard_w})
+
+        expected = shard_tensor(np.matmul(tensor_x, tensor_w), rank=rank, axis=1, num_shards=2)
+        np.testing.assert_allclose(result[0], expected, rtol=1e-5, atol=1e-8)
+
+    def test_matmul_srr_rr_srr(self):
+        device_mesh_shape = "[2]"
+        device_mesh_elements = "[0, 1]"
+
+        @onnxscript.script()
+        def matmul_srr_rr_srr(tensor_x: FLOAT, tensor_w: FLOAT) -> FLOAT:
+            return MICROSOFT_OPSET.DistributedMatMul(
+                tensor_x,
+                tensor_w,
+                input_shard_specs=["S[0]RR", "RR"],
+                output_shard_specs=["S[0]RR"],
+                input_device_mesh_shapes=[device_mesh_shape, device_mesh_shape],
+                input_device_mesh_elements=[device_mesh_elements, device_mesh_elements],
+                output_device_mesh_shapes=[device_mesh_shape],
+                output_device_mesh_elements=[device_mesh_elements],
+            )
+
+        rank = comm.Get_rank()
+        # Shape [2, 2, 4]
+        tensor_x = np.array([[[1, 2, 3, 4], [3, 4, 5, 6]], [[1, 2, 3, 4], [3, 4, 5, 6]]], dtype=np.float32)
+        # Shape [4, 2]
+        tensor_w = np.array([[1, 1], [2, 2], [3, 3], [4, 4]], dtype=np.float32)
+
+        onnx_model = matmul_srr_rr_srr.to_model_proto(
+            input_types=[FLOAT["s", 2, 4], FLOAT[4, 2]],
+            output_types=[FLOAT["s", 2, 2]],
+        )
+
+        sess = ort.InferenceSession(
+            onnx_model.SerializeToString(),
+            providers=["CUDAExecutionProvider"],
+            provider_options=[{"device_id": str(rank)}],
+        )
+
+        tensor_shard_x = shard_tensor(tensor_x, rank=rank, axis=0, num_shards=2)
+        tensor_shard_w = tensor_w
+
+        result = sess.run(None, {"tensor_x": tensor_shard_x, "tensor_w": tensor_shard_w})
+
+        expected = shard_tensor(np.matmul(tensor_x, tensor_w), rank=rank, axis=0, num_shards=2)
+        np.testing.assert_allclose(result[0], expected, rtol=1e-5, atol=1e-8)
+
+    def test_matmul_srr_rrrr_rsrr(self):
+        device_mesh_shape = "[2]"
+        device_mesh_elements = "[0, 1]"
+
+        @onnxscript.script()
+        def matmul_srr_rrrr_rsrr(tensor_x: FLOAT, tensor_w: FLOAT) -> FLOAT:
+            return MICROSOFT_OPSET.DistributedMatMul(
+                tensor_x,
+                tensor_w,
+                input_shard_specs=["S[0]RR", "RRRR"],
+                output_shard_specs=["RS[0]RR"],
+                input_device_mesh_shapes=[device_mesh_shape, device_mesh_shape],
+                input_device_mesh_elements=[device_mesh_elements, device_mesh_elements],
+                output_device_mesh_shapes=[device_mesh_shape],
+                output_device_mesh_elements=[device_mesh_elements],
+            )
+
+        rank = comm.Get_rank()
+        # Shape [2, 2, 4]
+        tensor_x = np.array([[[1, 2, 3, 4], [3, 4, 5, 6]], [[1, 2, 3, 4], [3, 4, 5, 6]]], dtype=np.float32)
+        # Shape [1, 2, 4, 2]
+        tensor_w = np.array([[[[1, 1], [2, 2], [3, 3], [4, 4]], [[1, 1], [2, 2], [3, 3], [4, 4]]]], dtype=np.float32)
+
+        onnx_model = matmul_srr_rrrr_rsrr.to_model_proto(
+            input_types=[FLOAT["s", 2, 4], FLOAT[1, 2, 4, 2]],
+            output_types=[FLOAT[1, "s", 2, 2]],
+        )
+
+        sess = ort.InferenceSession(
+            onnx_model.SerializeToString(),
+            providers=["CUDAExecutionProvider"],
+            provider_options=[{"device_id": str(rank)}],
+        )
+
+        tensor_shard_x = shard_tensor(tensor_x, rank=rank, axis=0, num_shards=2)
+        tensor_shard_w = tensor_w
+
+        result = sess.run(None, {"tensor_x": tensor_shard_x, "tensor_w": tensor_shard_w})
+
+        expected = shard_tensor(np.matmul(tensor_x, tensor_w), rank=rank, axis=1, num_shards=2)
+        np.testing.assert_allclose(result[0], expected, rtol=1e-5, atol=1e-8)
+
+    def test_matmul_sr_rs_rr(self):
+        device_mesh_shape = "[2]"
+        device_mesh_elements = "[0, 1]"
+
+        @onnxscript.script()
+        def matmul_sr_rs_rr(tensor_x: FLOAT, tensor_w: FLOAT) -> FLOAT:
+            return MICROSOFT_OPSET.DistributedMatMul(
+                tensor_x,
+                tensor_w,
+                input_shard_specs=["S[0]R", "RS[0]"],
+                output_shard_specs=["RR"],
+                input_device_mesh_shapes=[device_mesh_shape, device_mesh_shape],
+                input_device_mesh_elements=[device_mesh_elements, device_mesh_elements],
+                output_device_mesh_shapes=[device_mesh_shape],
+                output_device_mesh_elements=[device_mesh_elements],
+            )
+
+        rank = comm.Get_rank()
+        # Shape [4, 2]
+        tensor_x = np.array([[1, 2], [3, 4], [5, 6], [7, 8]], dtype=np.float32)
+        # Shape [2, 2]
+        tensor_w = np.array([[1, 1], [2, 2]], dtype=np.float32)
+
+        onnx_model = matmul_sr_rs_rr.to_model_proto(
+            input_types=[FLOAT["s", 2], FLOAT[2, "t"]],
+            output_types=[FLOAT["s", "t"]],
+        )
+
+        sess = ort.InferenceSession(
+            onnx_model.SerializeToString(),
+            providers=["CUDAExecutionProvider"],
+            provider_options=[{"device_id": str(rank)}],
+        )
+
+        tensor_shard_x = shard_tensor(tensor_x, rank=rank, axis=0, num_shards=2)
+        tensor_shard_w = shard_tensor(tensor_w, rank=rank, axis=1, num_shards=2)
+
+        result = sess.run(None, {"tensor_x": tensor_shard_x, "tensor_w": tensor_shard_w})
+
+        expected = np.matmul(tensor_x, tensor_w)
+        np.testing.assert_allclose(result[0], expected, rtol=1e-5, atol=1e-8)
+
+    def test_matmul_rr_rs_rs(self):
+        device_mesh_shape = "[2]"
+        device_mesh_elements = "[0, 1]"
+
+        @onnxscript.script()
+        def matmul_rr_rs_rs(tensor_x: FLOAT, tensor_w: FLOAT) -> FLOAT:
+            return MICROSOFT_OPSET.DistributedMatMul(
+                tensor_x,
+                tensor_w,
+                input_shard_specs=["RR", "RS[0]"],
+                output_shard_specs=["RS[0]"],
+                input_device_mesh_shapes=[device_mesh_shape, device_mesh_shape],
+                input_device_mesh_elements=[device_mesh_elements, device_mesh_elements],
+                output_device_mesh_shapes=[device_mesh_shape],
+                output_device_mesh_elements=[device_mesh_elements],
+            )
+
+        rank = comm.Get_rank()
+        # Shape [4, 2]
+        tensor_x = np.array([[1, 2], [3, 4], [5, 6], [7, 8]], dtype=np.float32)
+        # Shape [2, 4]
+        tensor_w = np.array([[1, 1, 1, 1], [2, 2, 2, 2]], dtype=np.float32)
+
+        onnx_model = matmul_rr_rs_rs.to_model_proto(
+            input_types=[FLOAT[4, 2], FLOAT[2, "s"]],
+            output_types=[FLOAT[4, "t"]],
+        )
+
+        sess = ort.InferenceSession(
+            onnx_model.SerializeToString(),
+            providers=["CUDAExecutionProvider"],
+            provider_options=[{"device_id": str(rank)}],
+        )
+
+        tensor_shard_x = tensor_x
+        tensor_shard_w = shard_tensor(tensor_w, rank=rank, axis=1, num_shards=2)
+
+        result = sess.run(None, {"tensor_x": tensor_shard_x, "tensor_w": tensor_shard_w})
+
+        expected = shard_tensor(np.matmul(tensor_x, tensor_w), rank=rank, axis=1, num_shards=2)
+        np.testing.assert_allclose(result[0], expected, rtol=1e-5, atol=1e-8)
+
+    def test_matmul_rr_sr_rr(self):
+        device_mesh_shape = "[2]"
+        device_mesh_elements = "[0, 1]"
+
+        @onnxscript.script()
+        def matmul_rr_sr_rr(tensor_x: FLOAT, tensor_w: FLOAT) -> FLOAT:
+            return MICROSOFT_OPSET.DistributedMatMul(
+                tensor_x,
+                tensor_w,
+                input_shard_specs=["RR", "S[0]R"],
+                output_shard_specs=["RR"],
+                input_device_mesh_shapes=[device_mesh_shape, device_mesh_shape],
+                input_device_mesh_elements=[device_mesh_elements, device_mesh_elements],
+                output_device_mesh_shapes=[device_mesh_shape],
+                output_device_mesh_elements=[device_mesh_elements],
+            )
+
+        rank = comm.Get_rank()
+        # Shape [4, 2]
+        tensor_x = np.array([[1, 2], [3, 4], [5, 6], [7, 8]], dtype=np.float32)
+        # Shape [2, 6]
+        tensor_w = np.array([[1, 1, 1, 1, 1, 1], [2, 2, 2, 2, 2, 2]], dtype=np.float32)
+
+        onnx_model = matmul_rr_sr_rr.to_model_proto(
+            input_types=[FLOAT[4, 2], FLOAT["s", 6]],
+            output_types=[FLOAT[4, 6]],
+        )
+
+        sess = ort.InferenceSession(
+            onnx_model.SerializeToString(),
+            providers=["CUDAExecutionProvider"],
+            provider_options=[{"device_id": str(rank)}],
+        )
+
+        tensor_shard_x = tensor_x
+        tensor_shard_w = shard_tensor(tensor_w, rank=rank, axis=0, num_shards=2)
+
+        result = sess.run(None, {"tensor_x": tensor_shard_x, "tensor_w": tensor_shard_w})
+
+        expected = np.matmul(tensor_x, tensor_w)
+        np.testing.assert_allclose(result[0], expected, rtol=1e-5, atol=1e-8)
+
+    def test_slice_sr_axis1(self):
+        device_mesh_shape = "[2]"
+        device_mesh_elements = "[0, 1]"
+
+        @onnxscript.script()
+        def slice_sr_axis1(tensor_x: FLOAT, tensor_starts: INT64, tensor_ends: INT64, tensor_axes: INT64) -> FLOAT:
+            return MICROSOFT_OPSET.DistributedSlice(
+                tensor_x,
+                tensor_starts,
+                tensor_ends,
+                tensor_axes,
+                input_shard_specs=["S[0]R", "R", "R", "R", "R"],
+                output_shard_specs=["S[0]R"],
+                input_device_mesh_shapes=[device_mesh_shape] * 5,
+                input_device_mesh_elements=[device_mesh_elements] * 5,
+                output_device_mesh_shapes=[device_mesh_shape],
+                output_device_mesh_elements=[device_mesh_elements],
+            )
+
+        rank = comm.Get_rank()
+        # Shape [2, 4]
+        tensor_x = np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=np.float32)
+        tensor_starts = np.array([0], dtype=np.int64)
+        tensor_ends = np.array([2], dtype=np.int64)
+        tensor_axes = np.array([1], dtype=np.int64)
+
+        onnx_model = slice_sr_axis1.to_model_proto(
+            input_types=[FLOAT[1, 4], INT64[1], INT64[1], INT64[1]],
+            output_types=[FLOAT[1, 2]],
+        )
+
+        sess = ort.InferenceSession(
+            onnx_model.SerializeToString(),
+            providers=["CUDAExecutionProvider"],
+            provider_options=[{"device_id": str(rank)}],
+        )
+
+        tensor_shard_x = shard_tensor(tensor_x, rank=rank, axis=0, num_shards=2)
+
+        result = sess.run(
+            None,
+            {
+                "tensor_x": tensor_shard_x,
+                "tensor_starts": tensor_starts,
+                "tensor_ends": tensor_ends,
+                "tensor_axes": tensor_axes,
+            },
+        )
+
+        expected = tensor_shard_x[:, 0:2]
+        np.testing.assert_allclose(result[0], expected, rtol=1e-5, atol=1e-8)
+
+    def test_slice_rs_axis1(self):
+        device_mesh_shape = "[2]"
+        device_mesh_elements = "[0, 1]"
+
+        @onnxscript.script()
+        def slice_sr_axis1(tensor_x: FLOAT, tensor_starts: INT64, tensor_ends: INT64, tensor_axes: INT64) -> FLOAT:
+            return MICROSOFT_OPSET.DistributedSlice(
+                tensor_x,
+                tensor_starts,
+                tensor_ends,
+                tensor_axes,
+                input_shard_specs=["RS[0]", "R", "R", "R", "R"],
+                output_shard_specs=["RS[0]"],
+                input_device_mesh_shapes=[device_mesh_shape] * 5,
+                input_device_mesh_elements=[device_mesh_elements] * 5,
+                output_device_mesh_shapes=[device_mesh_shape],
+                output_device_mesh_elements=[device_mesh_elements],
+            )
+
+        rank = comm.Get_rank()
+        # Shape [2, 4]
+        tensor_x = np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=np.float32)
+        tensor_starts = np.array([0], dtype=np.int64)
+        tensor_ends = np.array([2], dtype=np.int64)
+        tensor_axes = np.array([1], dtype=np.int64)
+
+        onnx_model = slice_sr_axis1.to_model_proto(
+            input_types=[FLOAT[2, 2], INT64[1], INT64[1], INT64[1]],
+            output_types=[FLOAT[2, 1]],
+        )
+
+        sess = ort.InferenceSession(
+            onnx_model.SerializeToString(),
+            providers=["CUDAExecutionProvider"],
+            provider_options=[{"device_id": str(rank)}],
+        )
+
+        tensor_shard_x = shard_tensor(tensor_x, rank=rank, axis=1, num_shards=2)
+        result = sess.run(
+            None,
+            {
+                "tensor_x": tensor_shard_x,
+                "tensor_starts": tensor_starts,
+                "tensor_ends": tensor_ends,
+                "tensor_axes": tensor_axes,
+            },
+        )
+
+        expected = tensor_x[:, 0:2][:, rank : rank + 1]
+        np.testing.assert_allclose(result[0], expected, rtol=1e-5, atol=1e-8)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/onnxruntime/test/python/onnxruntime_test_float8.py b/onnxruntime/test/python/onnxruntime_test_float8.py
index 76ca5d9538374..bb63ea234498f 100644
--- a/onnxruntime/test/python/onnxruntime_test_float8.py
+++ b/onnxruntime/test/python/onnxruntime_test_float8.py
@@ -334,7 +334,7 @@ def test_model_cast_cast_cpu(self, name: str, float_name: str, saturate: int):
         ]
     )
     @unittest.skipIf(not hasattr(TensorProto, "FLOAT8E4M3FN"), reason="needs onnx>=1.14.0")
-    @unittest.skipIf("CUDAExecutionProvider" not in available_providers, reason="Not running on CUDA.")
+    @unittest.skipIf("CUDAExecutionProvider" not in available_providers, reason="Not running without CUDA.")
     def test_model_cast_cast_cuda(self, name: str, float_name: str, saturate: int, provider: str):
         so = onnxruntime.SessionOptions()
         so.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL
@@ -373,7 +373,7 @@ def test_model_cast_cast_cuda(self, name: str, float_name: str, saturate: int, p
         ]
     )
     @unittest.skipIf(not hasattr(TensorProto, "FLOAT8E4M3FN"), reason="needs onnx>=1.14.0")
-    @unittest.skipIf("CUDAExecutionProvider" not in available_providers, reason="Not running on CUDA.")
+    @unittest.skipIf("CUDAExecutionProvider" not in available_providers, reason="Not running without CUDA.")
     def test_model_cast_cast_cuda_ortvalue(self, name: str, float_name: str, saturate: int, provider: str):
         so = onnxruntime.SessionOptions()
         so.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL
@@ -627,7 +627,7 @@ def test_model_cast_like_x2_cpu(self, name: str, float_name: str, saturate: int)
         ]
     )
     @unittest.skipIf(not hasattr(TensorProto, "FLOAT8E4M3FN"), reason="needs onnx>=1.14.0")
-    @unittest.skipIf("CUDAExecutionProvider" not in available_providers, reason="Not running on CUDA.")
+    @unittest.skipIf("CUDAExecutionProvider" not in available_providers, reason="Not running without CUDA.")
     def test_model_qdq_cuda(self, name: str, float_name: str, saturate: int, provider: str):
         so = onnxruntime.SessionOptions()
         so.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL
@@ -693,7 +693,7 @@ def test_model_qdq_cuda_ortvalue(self, name: str, float_name: str, saturate: int
         self.assertEqual(expect.shape, y.shape)
         self.assertEqual(expect.dtype, y.dtype)
 
-    @unittest.skipIf("CUDAExecutionProvider" not in available_providers, reason="Not running on CUDA.")
+    @unittest.skipIf("CUDAExecutionProvider" not in available_providers, reason="Not running without CUDA.")
     def test_compare_cpu_cuda_e4m3fn(self):
         folder = os.path.join(os.path.dirname(__file__), "..", "testdata", "float8")
         model = os.path.join(folder, "te.cast_fp8_1_fp32.onnx")
diff --git a/onnxruntime/test/python/onnxruntime_test_float8_gemm8.py b/onnxruntime/test/python/onnxruntime_test_float8_gemm8.py
new file mode 100644
index 0000000000000..482a334b12b85
--- /dev/null
+++ b/onnxruntime/test/python/onnxruntime_test_float8_gemm8.py
@@ -0,0 +1,301 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# pylint: disable=C0116,W0212,R1720,C0103,C0114
+#
+# Note: the precision is different on V100, H100 even with the same code.
+# The thresholds were adjusted on H100 as the precision seems lower on this machine.
+
+import itertools
+import unittest
+import warnings
+
+import numpy as np
+import parameterized
+from numpy.testing import assert_allclose
+from onnx import TensorProto
+from onnx.checker import check_model
+from onnx.defs import onnx_opset_version
+from onnx.helper import make_graph, make_model, make_node, make_opsetid, make_tensor_value_info
+from onnx.numpy_helper import from_array
+
+from onnxruntime import InferenceSession, get_available_providers
+
+available_providers = [provider for provider in get_available_providers()]
+
+
+class TestFloat8Gemm8(unittest.TestCase):
+    def get_model_gemm(
+        self,
+        float_name,
+        alpha=1.0,
+        beta=0.0,
+        transA=0,
+        transB=0,
+        domain="",
+        dtype=TensorProto.FLOAT,
+        activation="NONE",
+    ):
+        proto_type = getattr(TensorProto, float_name)
+        use_f8 = proto_type in (TensorProto.FLOAT8E4M3FN, TensorProto.FLOAT8E5M2)
+
+        a = make_tensor_value_info("A", TensorProto.FLOAT, [None, None])
+        b = make_tensor_value_info("B", TensorProto.FLOAT, [None, None])
+        d = make_tensor_value_info("Y", TensorProto.FLOAT, [None, None])
+
+        inits = []
+        kwargs = {}
+        node_inputs = ["Af", "Bf"]
+        inputs = [a, b]
+        bias = beta != 0
+        if bias:
+            inputs.append(make_tensor_value_info("C", TensorProto.FLOAT, [None, None]))
+            node_inputs = ["Af", "Bf", "Cf"]
+            if use_f8:
+                node_inputs.extends(["one"] * 3)
+        elif use_f8:
+            node_inputs.append("")
+            node_inputs.extend(["one"] * 3)
+
+        if use_f8:
+            assert domain == "com.microsoft"
+            inits.append(from_array(np.array([1], dtype=np.float32), name="one"))
+            kwargs = dict(
+                domain=domain,
+                dtype=dtype,
+            )
+            if activation is not None:
+                kwargs["activation"] = activation
+            op_name = "GemmFloat8"
+        elif domain == "com.microsoft":
+            op_name = "GemmFloat8"
+            kwargs = dict(
+                domain=domain,
+                dtype=dtype,
+            )
+        else:
+            op_name = "Gemm"
+        nodes = [
+            make_node("Cast", ["A"], ["Af"], to=proto_type),
+            make_node("Cast", ["B"], ["Bf"], to=proto_type),
+            make_node("Cast", ["C"], ["Cf"], to=proto_type) if bias else None,
+            make_node(
+                op_name,
+                node_inputs,
+                ["Yf"],
+                transA=transA,
+                transB=transB,
+                alpha=alpha,
+                beta=beta,
+                **kwargs,
+            ),
+            make_node("Cast", ["Yf"], ["Y"], to=TensorProto.FLOAT),
+        ]
+        nodes = [n for n in nodes if n is not None]
+        graph = make_graph(nodes, "gemm", inputs, [d], inits)
+        opset_imports = [make_opsetid("", onnx_opset_version() - 1)]
+        if domain == "com.microsoft":
+            opset_imports.append(make_opsetid("com.microsoft", 1))
+        onnx_model = make_model(graph, opset_imports=opset_imports, ir_version=9)
+        if domain != "com.microsoft":
+            check_model(onnx_model)
+        return onnx_model
+
+    def common_test_model_gemm(self, float_type, mul=0.33, atol=0, rtol=0, square=True, **kwargs):
+        if square:
+            a = (np.arange(256) * 0.01).astype(np.float32).reshape((-1, 16))
+            b = (np.arange(256) * -0.01).astype(np.float32).reshape((-1, 16))
+            c = (np.arange(256) * 0.03).astype(np.float32).reshape((-1, 16))
+            b[:, 0] += 1
+        else:
+            a = (np.arange(256) / 256).astype(np.float32).reshape((32, -1))
+            b = (np.arange(512) / 512).astype(np.float32).reshape((32, -1))
+            c = (np.arange(128) / 128).astype(np.float32).reshape((8, 16))
+
+        feeds = {"A": a, "B": b}
+
+        expected = (a.T if kwargs.get("transA", 0) else a) @ (b.T if kwargs.get("transB", 0) else b)
+        expected *= kwargs.get("alpha", 1.0)
+        if kwargs.get("beta", 0) != 0:
+            expected += kwargs["beta"] * c
+            feeds["C"] = c
+
+        onnx_model = self.get_model_gemm("FLOAT", **kwargs)
+
+        ref = InferenceSession(
+            onnx_model.SerializeToString(), providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
+        )
+        y = ref.run(None, feeds)[0]
+        if float_type in ("FLOAT", "FLOAT16"):
+            try:
+                assert_allclose(expected, y, atol=atol, rtol=rtol)
+            except Exception as e:
+
+                def check(f):
+                    try:
+                        return f()[:2, :2]
+                    except Exception as e:
+                        return str(e)
+
+                raise AssertionError(
+                    f"Gemm ERROR len(inputs)={len(feeds)}"
+                    f"\na@b=\n{check(lambda:a@b)}"
+                    f"\na.T@b=\n{check(lambda:a.T@b)}"
+                    f"\na@b.T=\n{check(lambda:a@b.T)}"
+                    f"\na.T@b.T=\n{check(lambda:a.T@b.T)}"
+                    f"\n----\nb@a=\n{check(lambda:b@a)}"
+                    f"\nb.T@a=\n{check(lambda:b.T@a)}"
+                    f"\nb@a.T=\n{check(lambda:b@a.T)}"
+                    f"\nb.T@a.T=\n{check(lambda:b.T@a.T)}"
+                    f"\n----\nexpected=\n{expected[:2,:2]}"
+                    f"\n----\ngot=\n{y[:2,:2]}"
+                    f"\nkwargs={kwargs}"
+                ) from e
+
+        self.assertEqual(expected.shape, y.shape)
+        self.assertEqual(expected.dtype, y.dtype)
+
+        onnx_model_f8 = self.get_model_gemm(float_type, domain="com.microsoft", **kwargs)
+        try:
+            ref8 = InferenceSession(
+                onnx_model_f8.SerializeToString(), providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
+            )
+        except Exception as e:
+            if "CUDA < 12.0 does not support bias" in str(e):
+                return
+            raise AssertionError(f"Could not load model {onnx_model_f8}") from e
+        try:
+            y = ref8.run(None, feeds)[0]
+        except Exception as e:
+            if "CUBLAS_STATUS_NOT_SUPPORTED" in str(e):
+                # Skipping. This machine does not support float8.
+                warnings.warn("unable to test with float8 on this machine.")
+                return
+            raise AssertionError(f"Could not execute model {onnx_model_f8}") from e
+        try:
+            assert_allclose(expected, y, atol=atol, rtol=rtol)
+        except Exception as e:
+
+            def check(f):
+                try:
+                    return f()[:2, :2]
+                except Exception as e:
+                    return str(e)
+
+            raise AssertionError(
+                f"Gemm ERROR len(inputs)={len(feeds)}"
+                f"\na@b=\n{check(lambda:a@b)}"
+                f"\na.T@b=\n{check(lambda:a.T@b)}"
+                f"\na@b.T=\n{check(lambda:a@b.T)}"
+                f"\na.T@b.T=\n{check(lambda:a.T@b.T)}"
+                f"\n----\nb@a=\n{check(lambda:b@a)}"
+                f"\nb.T@a=\n{check(lambda:b.T@a)}"
+                f"\nb@a.T=\n{check(lambda:b@a.T)}"
+                f"\nb.T@a.T=\n{check(lambda:b.T@a.T)}"
+                f"\n----\nexpected=\n{expected[:2,:2]}"
+                f"\n----\ngot=\n{y[:2,:2]}"
+                f"\nkwargs={kwargs}"
+            ) from e
+        self.assertEqual(expected.shape, y.shape)
+        self.assertEqual(expected.dtype, y.dtype)
+
+    @unittest.skipIf("CUDAExecutionProvider" not in available_providers, reason="Not running without CUDA.")
+    def test_model_gemm_float(self):
+        self.common_test_model_gemm("FLOAT", transA=1, rtol=1e-3)
+
+    @unittest.skipIf("CUDAExecutionProvider" not in available_providers, reason="Not running without CUDA.")
+    def test_model_gemm_float_default_values(self):
+        self.common_test_model_gemm("FLOAT", transA=1, rtol=1e-3, activation=None)
+
+    @unittest.skipIf("CUDAExecutionProvider" not in available_providers, reason="Not running without CUDA.")
+    def test_model_gemm_float_relu(self):
+        self.common_test_model_gemm("FLOAT", transA=1, rtol=1e-3, activation="RELU")
+
+    @unittest.skipIf("CUDAExecutionProvider" not in available_providers, reason="Not running without CUDA.")
+    def test_model_gemm_float_gelu(self):
+        self.common_test_model_gemm("FLOAT", transA=1, rtol=1e-3, activation="GELU")
+
+    @unittest.skipIf("CUDAExecutionProvider" not in available_providers, reason="Not running without CUDA.")
+    def test_model_gemm_float_bias(self):
+        self.common_test_model_gemm("FLOAT", transA=1, beta=1.0, rtol=1e-3)
+
+    @unittest.skipIf("CUDAExecutionProvider" not in available_providers, reason="Not running without CUDA.")
+    def test_model_gemm_float16(self):
+        self.common_test_model_gemm(
+            "FLOAT16",
+            rtol=1e-2,
+            dtype=TensorProto.FLOAT16,
+            transB=1,
+        )
+
+    @unittest.skipIf("CUDAExecutionProvider" not in available_providers, reason="Not running without CUDA.")
+    @unittest.skipIf(not hasattr(TensorProto, "FLOAT8E4M3FN"), reason="needs onnx>=1.14.0")
+    def test_model_gemm_float8_e4m3(self):
+        self.common_test_model_gemm(
+            "FLOAT8E4M3FN",
+            rtol=0.5,
+            dtype=TensorProto.FLOAT,
+            transA=0,
+            transB=1,
+            alpha=10.0,
+        )
+
+    @parameterized.parameterized.expand(list(itertools.product([0, 1], [0, 1])))
+    @unittest.skipIf("CUDAExecutionProvider" not in available_providers, reason="Not running without CUDA.")
+    def test_combinations_square_matrices(self, transA, transB):
+        self.common_test_model_gemm("FLOAT", transA=transA, transB=transB, rtol=1e-3)
+
+    @parameterized.parameterized.expand(
+        [
+            ((2, 3), (3, 5), 0, 0),
+            ((2, 3), (5, 3), 0, 1),
+            ((2, 3), (5, 2), 1, 1),
+            ((2, 3), (2, 5), 1, 0),
+        ]
+    )
+    @unittest.skipIf("CUDAExecutionProvider" not in available_providers, reason="Not running without CUDA.")
+    def test_combinations(self, shapeA, shapeB, transA, transB):
+        model = make_model(
+            make_graph(
+                [
+                    make_node(
+                        "GemmFloat8",
+                        ["A", "B"],
+                        ["Y"],
+                        transA=transA,
+                        transB=transB,
+                        domain="com.microsoft",
+                    )
+                ],
+                "f8",
+                [
+                    make_tensor_value_info("A", TensorProto.FLOAT, [None, None]),
+                    make_tensor_value_info("B", TensorProto.FLOAT, [None, None]),
+                ],
+                [make_tensor_value_info("Y", TensorProto.FLOAT, [None, None])],
+            ),
+            opset_imports=[make_opsetid("", 19), make_opsetid("com.microsoft", 1)],
+        )
+
+        sess = InferenceSession(model.SerializeToString(), providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
+        a = np.arange(np.prod(shapeA)).reshape(shapeA).astype(np.float32)
+        b = np.arange(np.prod(shapeB)).reshape(shapeB).astype(np.float32)
+        try:
+            expected = (a.T if transA else a) @ (b.T if transB else b)
+        except Exception as e:
+            raise AssertionError(
+                f"Unable to multiply shapes={shapeA}x{shapeB}, transA={transA}, transB={transB}"
+            ) from e
+        try:
+            got = sess.run(None, {"A": a, "B": b})
+        except Exception as e:
+            raise AssertionError(
+                f"Unable to run Gemm with shapes={shapeA}x{shapeB}, transA={transA}, transB={transB}"
+            ) from e
+        self.assertEqual(expected.shape, got[0].shape)
+        self.assertEqual(expected.dtype, got[0].dtype)
+        assert_allclose(expected, got[0])
+
+
+if __name__ == "__main__":
+    # TestFloat8Gemm8().test_model_gemm_float()
+    unittest.main(verbosity=2)
diff --git a/onnxruntime/test/python/onnxruntime_test_ort_trainer.py b/onnxruntime/test/python/onnxruntime_test_ort_trainer.py
deleted file mode 100644
index 4cf2e5d7f7588..0000000000000
--- a/onnxruntime/test/python/onnxruntime_test_ort_trainer.py
+++ /dev/null
@@ -1,1026 +0,0 @@
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
-
-import copy
-import os
-import unittest
-
-import numpy as np
-import onnx
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from helper import get_name
-from numpy.testing import assert_allclose
-from torchvision import datasets, transforms
-
-import onnxruntime
-from onnxruntime.capi.ort_trainer import (
-    IODescription,
-    LossScaler,
-    ModelDescription,
-    ORTTrainer,
-    generate_sample,
-    load_checkpoint,
-    save_checkpoint,
-)
-
-SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__))
-
-
-def ort_trainer_learning_rate_description():
-    return IODescription(
-        "Learning_Rate",
-        [
-            1,
-        ],
-        torch.float32,
-    )
-
-
-def remove_extra_info(model_desc):
-    simple_model_desc = copy.deepcopy(model_desc)
-    for input_desc in simple_model_desc.inputs_:
-        input_desc.dtype_ = None
-        input_desc.num_classes_ = None
-    for output_desc in simple_model_desc.outputs_:
-        output_desc.dtype_ = None
-        output_desc.num_classes_ = None
-    return simple_model_desc
-
-
-def bert_model_description():
-    vocab_size = 30528
-    input_ids_desc = IODescription(
-        "input_ids",
-        ["batch", "max_seq_len_in_batch"],
-        torch.int64,
-        num_classes=vocab_size,
-    )
-    segment_ids_desc = IODescription("segment_ids", ["batch", "max_seq_len_in_batch"], torch.int64, num_classes=2)
-    input_mask_desc = IODescription("input_mask", ["batch", "max_seq_len_in_batch"], torch.int64, num_classes=2)
-    masked_lm_labels_desc = IODescription(
-        "masked_lm_labels",
-        ["batch", "max_seq_len_in_batch"],
-        torch.int64,
-        num_classes=vocab_size,
-    )
-    next_sentence_labels_desc = IODescription(
-        "next_sentence_labels",
-        [
-            "batch",
-        ],
-        torch.int64,
-        num_classes=2,
-    )
-    loss_desc = IODescription("loss", [], torch.float32)
-
-    return ModelDescription(
-        [
-            input_ids_desc,
-            segment_ids_desc,
-            input_mask_desc,
-            masked_lm_labels_desc,
-            next_sentence_labels_desc,
-        ],
-        [loss_desc],
-    )
-
-
-def map_optimizer_attributes(name):
-    no_decay_keys = ["bias", "gamma", "beta", "LayerNorm"]
-    no_decay = any(no_decay_key in name for no_decay_key in no_decay_keys)
-    if no_decay:
-        return {"alpha": 0.9, "beta": 0.999, "lambda": 0.0, "epsilon": 1e-6}
-    else:
-        return {"alpha": 0.9, "beta": 0.999, "lambda": 0.01, "epsilon": 1e-6}
-
-
-def generate_sample_batch(desc, batch_size, device):
-    desc_ = copy.deepcopy(desc)
-    desc_.shape_[0] = batch_size
-    sample = generate_sample(desc_, device)
-    return sample
-
-
-def create_ort_trainer(
-    gradient_accumulation_steps,
-    use_mixed_precision,
-    allreduce_post_accumulation,
-    use_simple_model_desc=True,
-    loss_scaler=None,
-    deepspeed_zero_stage=0,
-):
-    model_desc = bert_model_description()
-    simple_model_desc = remove_extra_info(model_desc) if use_simple_model_desc else model_desc
-    learning_rate_description = ort_trainer_learning_rate_description()
-    device = torch.device("cuda", 0)
-
-    onnx_model = onnx.load(get_name("bert_toy_postprocessed.onnx"))
-
-    model = ORTTrainer(
-        onnx_model,
-        None,
-        simple_model_desc,
-        "LambOptimizer",
-        map_optimizer_attributes,
-        learning_rate_description,
-        device,
-        gradient_accumulation_steps=gradient_accumulation_steps,
-        world_rank=0,
-        world_size=1,
-        loss_scaler=loss_scaler,
-        use_mixed_precision=use_mixed_precision,
-        allreduce_post_accumulation=allreduce_post_accumulation,
-        deepspeed_zero_stage=deepspeed_zero_stage,
-    )
-
-    return model, model_desc, device
-
-
-def run_bert_training_test(
-    gradient_accumulation_steps,
-    use_mixed_precision,
-    allreduce_post_accumulation,
-    use_simple_model_desc=True,
-    use_internel_loss_scale=False,
-):
-    torch.manual_seed(1)
-    onnxruntime.set_seed(1)
-
-    loss_scaler = LossScaler("ort_test_input_loss_scalar", True) if use_internel_loss_scale else None
-
-    model, model_desc, device = create_ort_trainer(
-        gradient_accumulation_steps,
-        use_mixed_precision,
-        allreduce_post_accumulation,
-        use_simple_model_desc,
-        loss_scaler,
-    )
-
-    if loss_scaler is None:
-        loss_scaler = LossScaler(model.loss_scale_input_name, True)
-
-    input_ids_batches = []
-    segment_ids_batches = []
-    input_mask_batches = []
-    masked_lm_labels_batches = []
-    next_sentence_labels_batches = []
-    batch_size = 16
-    num_batches = 8
-    for _batch in range(num_batches):
-        input_ids_batches = [
-            *input_ids_batches,
-            generate_sample_batch(model_desc.inputs_[0], batch_size, device),
-        ]
-        segment_ids_batches = [
-            *segment_ids_batches,
-            generate_sample_batch(model_desc.inputs_[1], batch_size, device),
-        ]
-        input_mask_batches = [
-            *input_mask_batches,
-            generate_sample_batch(model_desc.inputs_[2], batch_size, device),
-        ]
-        masked_lm_labels_batches = [
-            *masked_lm_labels_batches,
-            generate_sample_batch(model_desc.inputs_[3], batch_size, device),
-        ]
-        next_sentence_labels_batches = [
-            *next_sentence_labels_batches,
-            generate_sample_batch(model_desc.inputs_[4], batch_size, device),
-        ]
-
-    lr_batch_list = [
-        0.0000000e00,
-        4.6012269e-07,
-        9.2024538e-07,
-        1.3803681e-06,
-        1.8404908e-06,
-        2.3006135e-06,
-        2.7607362e-06,
-        3.2208588e-06,
-        3.6809815e-06,
-    ]
-
-    actual_losses = []
-    actual_all_finites = []
-
-    for batch_count in range(num_batches):
-        input_ids = generate_sample_batch(model_desc.inputs_[0], batch_size, device)
-        segment_ids = generate_sample_batch(model_desc.inputs_[1], batch_size, device)
-        input_mask = generate_sample_batch(model_desc.inputs_[2], batch_size, device)
-        masked_lm_labels = generate_sample_batch(model_desc.inputs_[3], batch_size, device)
-        next_sentence_labels = generate_sample_batch(model_desc.inputs_[4], batch_size, device)
-        lr = lr_batch_list[batch_count]
-
-        learning_rate = torch.tensor([lr]).to(device)
-        training_args = [
-            input_ids,
-            segment_ids,
-            input_mask,
-            masked_lm_labels,
-            next_sentence_labels,
-            learning_rate,
-        ]
-        if use_mixed_precision:
-            if not use_internel_loss_scale:
-                loss_scale = torch.tensor([loss_scaler.loss_scale_]).to(device)
-                training_args.append(loss_scale)
-            actual_loss = model.train_step(*training_args)
-            if isinstance(actual_loss, (list, tuple)):
-                assert len(actual_loss) == 2
-                actual_loss, actual_all_finite = actual_loss
-                if not use_internel_loss_scale:
-                    loss_scaler.update_loss_scale(actual_all_finite.item())
-                    actual_all_finites = [
-                        *actual_all_finites,
-                        actual_all_finite.cpu().numpy().item(0),
-                    ]
-
-            actual_losses = [*actual_losses, actual_loss.cpu().numpy().item(0)]
-        else:
-            loss = model(*training_args)
-            actual_losses = [*actual_losses, loss.cpu().numpy().item(0)]
-
-        if batch_count == num_batches - 1:
-            # test eval_step api with fetches at the end of the training.
-            # if eval_step is called during the training, it will affect the actual training loss (training session is stateful).
-            eval_loss = model.eval_step(
-                input_ids,
-                segment_ids,
-                input_mask,
-                masked_lm_labels,
-                next_sentence_labels,
-                fetches=["loss"],
-            )
-            eval_loss = eval_loss.cpu().numpy().item(0)
-
-    # If using internal loss scale, all_finites are handled internally too.
-    if use_mixed_precision and not use_internel_loss_scale:
-        return actual_losses, actual_all_finites, eval_loss
-    else:
-        return actual_losses, eval_loss
-
-
-class MNISTWrapper:
-    class NeuralNet(nn.Module):
-        def __init__(self, input_size, hidden_size, num_classes):
-            super().__init__()
-            self.fc1 = nn.Linear(input_size, hidden_size)
-            self.relu = nn.ReLU()
-            self.fc2 = nn.Linear(hidden_size, num_classes)
-            self.register_buffer("bias_buffer", torch.tensor(1e-6))
-
-        def forward(self, x):
-            out = self.fc1(x)
-            out = self.relu(out)
-            out = self.fc2(out)
-            out = torch.add(out, self.bias_buffer.to(out.dtype))
-            return out
-
-    class NeuralNetWithLoss(nn.Module):
-        def __init__(self, input_size, hidden_size, num_classes):
-            super().__init__()
-            self.fc1 = nn.Linear(input_size, hidden_size)
-            self.relu = nn.ReLU()
-            self.fc2 = nn.Linear(hidden_size, num_classes)
-
-        def forward(self, x, target):
-            out = self.fc1(x)
-            out = self.relu(out)
-            out = self.fc2(out)
-            return F.nll_loss(F.log_softmax(out, dim=1), target), out
-
-    def my_loss(x, target):  # noqa: N805
-        return F.nll_loss(F.log_softmax(x, dim=1), target)
-
-    def train_with_trainer(self, learningRate, trainer, device, train_loader, epoch):
-        actual_losses = []
-        for batch_idx, (data, target) in enumerate(train_loader):
-            data, target = data.to(device), target.to(device)  # noqa: PLW2901
-            data = data.reshape(data.shape[0], -1)  # noqa: PLW2901
-
-            loss, _ = trainer.train_step(data, target, torch.tensor([learningRate]))
-
-            args_log_interval = 100
-            if batch_idx % args_log_interval == 0:
-                print(
-                    "Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
-                        epoch,
-                        batch_idx * len(data),
-                        len(train_loader.dataset),
-                        100.0 * batch_idx / len(train_loader),
-                        loss.item(),
-                    )
-                )
-                actual_losses = [*actual_losses, loss.cpu().numpy().item()]
-
-        return actual_losses
-
-    # TODO: comple this once ORT training can do evaluation.
-    def test_with_trainer(self, trainer, device, test_loader):
-        test_loss = 0
-        correct = 0
-        with torch.no_grad():
-            for data, target in test_loader:
-                data, target = data.to(device), target.to(device)  # noqa: PLW2901
-                data = data.reshape(data.shape[0], -1)  # noqa: PLW2901
-                output = F.log_softmax(trainer.eval_step((data), fetches=["probability"]), dim=1)
-                test_loss += F.nll_loss(output, target, reduction="sum").item()  # sum up batch loss
-                pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
-                correct += pred.eq(target.view_as(pred)).sum().item()
-
-        test_loss /= len(test_loader.dataset)
-
-        print(
-            "\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format(
-                test_loss,
-                correct,
-                len(test_loader.dataset),
-                100.0 * correct / len(test_loader.dataset),
-            )
-        )
-
-        return test_loss, correct / len(test_loader.dataset)
-
-    def mnist_model_description():
-        input_desc = IODescription("input1", ["batch", 784], torch.float32)
-        label_desc = IODescription(
-            "label",
-            [
-                "batch",
-            ],
-            torch.int64,
-            num_classes=10,
-        )
-        loss_desc = IODescription("loss", [], torch.float32)
-        probability_desc = IODescription("probability", ["batch", 10], torch.float32)
-        return ModelDescription([input_desc, label_desc], [loss_desc, probability_desc])
-
-    def get_loaders(self):
-        args_batch_size = 64
-        args_test_batch_size = 1000
-
-        kwargs = {"num_workers": 0, "pin_memory": True}
-        # set shuffle to False to get deterministic data set among different torch version
-        train_loader = torch.utils.data.DataLoader(
-            datasets.MNIST(
-                os.path.join(SCRIPT_DIR, "data"),
-                train=True,
-                download=True,
-                transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]),
-            ),
-            batch_size=args_batch_size,
-            shuffle=False,
-            **kwargs,
-        )
-        test_loader = torch.utils.data.DataLoader(
-            datasets.MNIST(
-                os.path.join(SCRIPT_DIR, "data"),
-                train=False,
-                transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]),
-            ),
-            batch_size=args_test_batch_size,
-            shuffle=False,
-            **kwargs,
-        )
-
-        return train_loader, test_loader
-
-    def get_model(self):
-        input_size = 784
-        hidden_size = 500
-        num_classes = 10
-
-        # warning: changes the pytorch random generator state
-        model = MNISTWrapper.NeuralNet(input_size, hidden_size, num_classes)
-        model_desc = MNISTWrapper.mnist_model_description()
-        return model, model_desc
-
-    def get_model_with_internal_loss(self):
-        input_size = 784
-        hidden_size = 500
-        num_classes = 10
-
-        # warning: changes the pytorch random generator state
-        model = MNISTWrapper.NeuralNetWithLoss(input_size, hidden_size, num_classes)
-        model_desc = MNISTWrapper.mnist_model_description()
-        return model, model_desc
-
-    def get_trainer(
-        self,
-        model,
-        model_desc,
-        device,
-        onnx_opset_ver=12,
-        frozen_weights=[],  # noqa: B006
-        internal_loss_fn=False,
-        get_lr_this_step=None,
-        optimizer="SGDOptimizer",
-    ):
-        loss_fn = MNISTWrapper.my_loss if not internal_loss_fn else None
-        return ORTTrainer(
-            model,
-            loss_fn,
-            model_desc,
-            optimizer,
-            None,
-            IODescription(
-                "Learning_Rate",
-                [
-                    1,
-                ],
-                torch.float32,
-            ),
-            device,
-            _opset_version=onnx_opset_ver,
-            frozen_weights=frozen_weights,
-            get_lr_this_step=get_lr_this_step,
-        )
-
-
-class TestOrtTrainer(unittest.TestCase):
-    def run_mnist_training_and_testing(onnx_opset_ver):  # noqa: N805
-        torch.manual_seed(1)
-        device = torch.device("cuda")
-
-        mnist = MNISTWrapper()
-        train_loader, test_loader = mnist.get_loaders()
-        model, model_desc = mnist.get_model()
-        trainer = mnist.get_trainer(model, model_desc, device, onnx_opset_ver=onnx_opset_ver)
-
-        learningRate = 0.01  # noqa: N806
-        args_epochs = 2
-        expected_losses = [
-            2.312044143676758,
-            0.8018650412559509,
-            0.5819257497787476,
-            0.47025489807128906,
-            0.35800155997276306,
-            0.41124576330184937,
-            0.2731882333755493,
-            0.4201386570930481,
-            0.39458805322647095,
-            0.38380366563796997,
-            0.2722422480583191,
-            0.24230478703975677,
-            0.23505745828151703,
-            0.33442264795303345,
-            0.21140924096107483,
-            0.31545233726501465,
-            0.18556523323059082,
-            0.3453553020954132,
-            0.29598352313041687,
-            0.3595045208930969,
-        ]
-
-        expected_test_losses = [0.3145490005493164, 0.256188737487793]
-        expected_test_accuracies = [0.9075, 0.9265]
-
-        actual_losses = []
-        actual_test_losses, actual_accuracies = [], []
-        for epoch in range(1, args_epochs + 1):
-            actual_losses = [
-                *actual_losses,
-                *mnist.train_with_trainer(learningRate, trainer, device, train_loader, epoch),
-            ]
-
-            test_loss, accuracy = mnist.test_with_trainer(trainer, device, test_loader)
-            actual_test_losses = [*actual_test_losses, test_loss]
-            actual_accuracies = [*actual_accuracies, accuracy]
-
-            # if you update outcomes, also do so for resume from checkpoint test
-            # args_checkpoint_epoch = 1
-            # if epoch == args_checkpoint_epoch:
-            # state = {'rng_state': torch.get_rng_state(), 'model': trainer.state_dict()}
-            # torch.save(state, get_name("ckpt_mnist.pt"))
-
-        print("actual_losses=", actual_losses)
-        print("actual_test_losses=", actual_test_losses)
-        print("actual_accuracies=", actual_accuracies)
-
-        # to update expected outcomes, enable pdb and run the test with -s and copy paste outputs
-        # import pdb; pdb.set_trace()
-        rtol = 1e-03
-        assert_allclose(expected_losses, actual_losses, rtol=rtol, err_msg="loss mismatch")
-        assert_allclose(
-            expected_test_losses,
-            actual_test_losses,
-            rtol=rtol,
-            err_msg="test loss mismatch",
-        )
-        assert_allclose(
-            expected_test_accuracies,
-            actual_accuracies,
-            rtol=rtol,
-            err_msg="test accuracy mismatch",
-        )
-
-    def test_mnist_training_and_testing_opset12(self):
-        TestOrtTrainer.run_mnist_training_and_testing(onnx_opset_ver=12)
-
-    def test_mnist_resume_training_and_testing(self):
-        torch.manual_seed(1)
-        device = torch.device("cuda")
-
-        mnist = MNISTWrapper()
-        train_loader, test_loader = mnist.get_loaders()
-        model, model_desc = mnist.get_model()
-
-        learningRate = 0.01  # noqa: N806
-        args_epochs = 2
-        args_checkpoint_epoch = 1
-        # should match those in test without checkpointing
-        expected_losses = [
-            0.26509523391723633,
-            0.24135658144950867,
-            0.2397943139076233,
-            0.3351520597934723,
-            0.20998981595039368,
-            0.31488314270973206,
-            0.18481917679309845,
-            0.34727591276168823,
-            0.2971782684326172,
-            0.3609251379966736,
-        ]
-
-        expected_test_losses = [0.25632242965698243]
-        expected_test_accuracies = [0.9264]
-
-        actual_losses = []
-        actual_test_losses, actual_accuracies = [], []
-
-        # restore from checkpoint
-        resume_trainer = mnist.get_trainer(model, model_desc, device)
-        checkpoint = torch.load(get_name("ckpt_mnist.pt"), map_location="cpu")
-        torch.set_rng_state(checkpoint["rng_state"])
-        resume_trainer.load_state_dict(checkpoint["model"], strict=True)
-
-        # continue ..
-        for epoch in range(args_checkpoint_epoch + 1, args_epochs + 1):
-            actual_losses = [
-                *actual_losses,
-                *mnist.train_with_trainer(learningRate, resume_trainer, device, train_loader, epoch),
-            ]
-
-            test_loss, accuracy = mnist.test_with_trainer(resume_trainer, device, test_loader)
-            actual_test_losses = [*actual_test_losses, test_loss]
-            actual_accuracies = [*actual_accuracies, accuracy]
-
-        print("actual_losses=", actual_losses)
-        print("actual_test_losses=", actual_test_losses)
-        print("actual_accuracies=", actual_accuracies)
-
-        # to update expected outcomes, enable pdb and run the test with -s and copy paste outputs
-        # import pdb; pdb.set_trace()
-        rtol = 1e-03
-        assert_allclose(expected_losses, actual_losses, rtol=rtol, err_msg="loss mismatch")
-        assert_allclose(
-            expected_test_losses,
-            actual_test_losses,
-            rtol=rtol,
-            err_msg="test loss mismatch",
-        )
-        assert_allclose(
-            expected_test_accuracies,
-            actual_accuracies,
-            rtol=rtol,
-            err_msg="test accuracy mismatch",
-        )
-
-    def test_mnist_state_dict(self):
-        torch.manual_seed(1)
-        device = torch.device("cuda")
-
-        mnist = MNISTWrapper()
-        train_loader, test_loader = mnist.get_loaders()
-        model, model_desc = mnist.get_model()
-
-        trainer = mnist.get_trainer(model, model_desc, device)
-        state_dict = trainer.state_dict()
-        assert state_dict == {}
-
-        learningRate = 0.02  # noqa: N806
-
-        data, target = next(iter(train_loader))
-        data, target = data.to(device), target.to(device)
-        data = data.reshape(data.shape[0], -1)
-
-        loss, _ = trainer.train_step(data, target, torch.tensor([learningRate]))
-
-        state_dict = trainer.state_dict()
-        assert state_dict.keys() == {
-            "fc1.bias",
-            "fc1.weight",
-            "fc2.bias",
-            "fc2.weight",
-            "bias_buffer",
-        }
-
-    def test_mnist_save_as_onnx(self):
-        torch.manual_seed(1)
-        device = torch.device("cuda")
-        onnx_file_name = "mnist.onnx"
-        if os.path.exists(onnx_file_name):
-            os.remove(onnx_file_name)
-
-        mnist = MNISTWrapper()
-        train_loader, test_loader = mnist.get_loaders()
-        model, model_desc = mnist.get_model()
-
-        trainer = mnist.get_trainer(model, model_desc, device)
-        trainer.save_as_onnx(onnx_file_name)
-        assert not os.path.exists(onnx_file_name)
-
-        learningRate = 0.02  # noqa: N806
-
-        data, target = next(iter(train_loader))
-        data, target = data.to(device), target.to(device)
-        data = data.reshape(data.shape[0], -1)
-
-        loss, _ = trainer.train_step(data, target, torch.tensor([learningRate]))
-
-        trainer.save_as_onnx(onnx_file_name)
-        assert os.path.exists(onnx_file_name)
-
-    def test_mnist_device(self):
-        torch.manual_seed(1)
-        device = torch.device("cuda")
-
-        mnist = MNISTWrapper()
-        train_loader, test_loader = mnist.get_loaders()
-        model, model_desc = mnist.get_model()
-
-        for model_device in [torch.device("cpu"), torch.device("cuda")]:
-            model.to(model_device)
-            trainer = mnist.get_trainer(model, model_desc, device)
-            learningRate = 0.02  # noqa: N806
-
-            data, target = next(iter(train_loader))
-            data, target = data.to(device), target.to(device)
-            data = data.reshape(data.shape[0], -1)
-
-            loss, _ = trainer.train_step(data, target, torch.tensor([learningRate]))
-
-    def test_mnist_initializer_names(self):
-        torch.manual_seed(1)
-        device = torch.device("cuda")
-
-        mnist = MNISTWrapper()
-        train_loader, test_loader = mnist.get_loaders()
-        model, model_desc = mnist.get_model()
-
-        trainer = mnist.get_trainer(model, model_desc, device)
-        learningRate = 0.02  # noqa: N806
-
-        data, target = next(iter(train_loader))
-        data, target = data.to(device), target.to(device)
-        data = data.reshape(data.shape[0], -1)
-
-        loss, _ = trainer.train_step(data, target, torch.tensor([learningRate]))
-
-        assert ({n.name for n in trainer.onnx_model_.graph.initializer} - {"bias_buffer"}) == {
-            n for n, t in model.named_parameters()
-        }
-
-    def test_mnist_initializer_names_with_internal_loss(self):
-        torch.manual_seed(1)
-        device = torch.device("cuda")
-
-        mnist = MNISTWrapper()
-        train_loader, test_loader = mnist.get_loaders()
-        model, model_desc = mnist.get_model_with_internal_loss()
-
-        def get_lr_this_step(global_step):
-            learningRate = 0.02  # noqa: N806
-            return torch.tensor([learningRate])
-
-        trainer = mnist.get_trainer(
-            model,
-            model_desc,
-            device,
-            internal_loss_fn=True,
-            get_lr_this_step=get_lr_this_step,
-        )
-
-        data, target = next(iter(train_loader))
-        data, target = data.to(device), target.to(device)
-        data = data.reshape(data.shape[0], -1)
-
-        loss, _ = trainer.train_step(data, target)
-
-        assert {n.name for n in trainer.onnx_model_.graph.initializer} == {n for n, t in model.named_parameters()}
-
-    def test_mnist_frozen_weight(self):
-        torch.manual_seed(1)
-        device = torch.device("cuda")
-
-        mnist = MNISTWrapper()
-        train_loader, test_loader = mnist.get_loaders()
-        model, model_desc = mnist.get_model()
-
-        trainer = mnist.get_trainer(model, model_desc, device, frozen_weights=["fc1.weight"])
-
-        learningRate = 0.02  # noqa: N806
-
-        data, target = next(iter(train_loader))
-        data, target = data.to(device), target.to(device)
-        data = data.reshape(data.shape[0], -1)
-
-        loss, _ = trainer.train_step(data, target, torch.tensor([learningRate]))
-
-        fc1_trainstep_1 = trainer.state_dict()["fc1.weight"]
-        fc2_trainstep_1 = trainer.state_dict()["fc2.weight"]
-
-        loss, _ = trainer.train_step(data, target, torch.tensor([learningRate]))
-
-        fc1_trainstep_2 = trainer.state_dict()["fc1.weight"]
-        fc2_trainstep_2 = trainer.state_dict()["fc2.weight"]
-        assert np.array_equal(fc1_trainstep_1, fc1_trainstep_2) and not np.array_equal(fc2_trainstep_1, fc2_trainstep_2)
-
-    def test_mnist_torch_buffer(self):
-        torch.manual_seed(1)
-        device = torch.device("cuda")
-
-        mnist = MNISTWrapper()
-        train_loader, test_loader = mnist.get_loaders()
-        model, model_desc = mnist.get_model()
-
-        trainer = mnist.get_trainer(model, model_desc, device)
-
-        learningRate = 0.02  # noqa: N806
-
-        data, target = next(iter(train_loader))
-        data, target = data.to(device), target.to(device)
-        data = data.reshape(data.shape[0], -1)
-
-        loss, _ = trainer.train_step(data, target, torch.tensor([learningRate]))
-
-        fc1_trainstep_1 = trainer.state_dict()["fc1.weight"]
-        bias_buffer_trainstep_1 = trainer.state_dict()["bias_buffer"]
-
-        loss, _ = trainer.train_step(data, target, torch.tensor([learningRate]))
-
-        fc1_trainstep_2 = trainer.state_dict()["fc1.weight"]
-        bias_buffer_trainstep_2 = trainer.state_dict()["bias_buffer"]
-        assert not np.array_equal(fc1_trainstep_1, fc1_trainstep_2) and np.array_equal(
-            bias_buffer_trainstep_1, bias_buffer_trainstep_2
-        )
-
-    def test_mnist_frozen_weight_checkpoint(self):
-        torch.manual_seed(1)
-        device = torch.device("cuda")
-
-        mnist = MNISTWrapper()
-        train_loader, test_loader = mnist.get_loaders()
-        model, model_desc = mnist.get_model()
-
-        trainer = mnist.get_trainer(model, model_desc, device, frozen_weights=["fc1.weight"])
-
-        learningRate = 0.02  # noqa: N806
-
-        # do one train step
-        data, target = next(iter(train_loader))
-        data, target = data.to(device), target.to(device)
-        data = data.reshape(data.shape[0], -1)
-
-        loss, _ = trainer.train_step(data, target, torch.tensor([learningRate]))
-
-        # do one eval step
-        data, target = next(iter(train_loader))
-        data, target = data.to(device), target.to(device)
-        data = data.reshape(data.shape[0], -1)
-
-        loss, _ = trainer.eval_step(data, target)
-
-        # save checkpoint, load model and compare
-        state_dict = trainer.state_dict()
-
-        new_model, _ = mnist.get_model()
-        trainer = mnist.get_trainer(new_model, model_desc, device, frozen_weights=["fc1.weight"])
-        trainer.load_state_dict(state_dict)
-
-        ckpt_loss, _ = trainer.eval_step(data, target)
-        assert loss == ckpt_loss
-
-        loaded_state_dict = trainer.state_dict()
-        assert state_dict.keys() == loaded_state_dict.keys()
-
-    def test_mnist_training_checkpoint(self):
-        torch.manual_seed(1)
-        device = torch.device("cuda")
-
-        mnist = MNISTWrapper()
-        train_loader, test_loader = mnist.get_loaders()
-        model, model_desc = mnist.get_model()
-
-        trainer = mnist.get_trainer(
-            model,
-            model_desc,
-            device,
-            optimizer="LambOptimizer",
-            frozen_weights=["fc1.weight"],
-        )
-
-        learningRate = 0.02  # noqa: N806
-
-        # do 5 train step
-        for _i in range(5):
-            data, target = next(iter(train_loader))
-            data, target = data.to(device), target.to(device)
-            data = data.reshape(data.shape[0], -1)
-
-            loss, _ = trainer.train_step(data, target, torch.tensor([learningRate]))
-
-        # do one eval step
-        data, target = next(iter(train_loader))
-        data, target = data.to(device), target.to(device)
-        data = data.reshape(data.shape[0], -1)
-
-        loss, _ = trainer.eval_step(data, target)
-
-        # save checkpoint, load model and compare
-        state_dict = trainer.state_dict()
-
-        new_model, _ = mnist.get_model()
-        trainer = mnist.get_trainer(
-            new_model,
-            model_desc,
-            device,
-            optimizer="LambOptimizer",
-            frozen_weights=["fc1.weight"],
-        )
-        trainer.load_state_dict(state_dict)
-
-        ckpt_loss, _ = trainer.eval_step(data, target)
-        assert loss == ckpt_loss
-
-        loaded_state_dict = trainer.state_dict()
-        assert state_dict.keys() == loaded_state_dict.keys()
-        for key in state_dict:
-            assert np.array_equal(state_dict[key], loaded_state_dict[key])
-
-    def test_bert_training_basic(self):
-        expected_losses = [
-            11.027887,
-            11.108191,
-            11.055356,
-            11.040912,
-            10.960277,
-            11.02691,
-            11.082471,
-            10.920979,
-        ]
-        expected_eval_loss = [10.958977]
-        actual_losses, actual_eval_loss = run_bert_training_test(
-            gradient_accumulation_steps=1,
-            use_mixed_precision=False,
-            allreduce_post_accumulation=False,
-        )
-
-        # to update expected outcomes, enable pdb and run the test with -s and copy paste outputs
-        # print('losses expected: ', expected_losses)
-        # print('losses actual:   ', actual_losses)
-        # print('eval_loss expected: ', expected_eval_loss)
-        # print('eval_loss actual:   ', actual_eval_loss)
-        # import pdb; pdb.set_trace()
-
-        rtol = 1e-03
-        assert_allclose(expected_losses, actual_losses, rtol=rtol, err_msg="loss mismatch")
-        assert_allclose(
-            expected_eval_loss,
-            actual_eval_loss,
-            rtol=rtol,
-            err_msg="evaluation loss mismatch",
-        )
-
-    def test_bert_training_gradient_accumulation(self):
-        expected_losses = [
-            11.027887,
-            11.108191,
-            11.055354,
-            11.040904,
-            10.960266,
-            11.026897,
-            11.082475,
-            10.920998,
-        ]
-        expected_eval_loss = [10.958998]
-
-        actual_losses, actual_eval_loss = run_bert_training_test(
-            gradient_accumulation_steps=4,
-            use_mixed_precision=False,
-            allreduce_post_accumulation=False,
-        )
-
-        # to update expected outcomes, enable pdb and run the test with -s and copy paste outputs
-        # print('losses expected: ', expected_losses)
-        # print('losses actual:   ', actual_losses)
-        # print('eval_loss expected: ', expected_eval_loss)
-        # print('eval_loss actual:   ', actual_eval_loss)
-        # import pdb; pdb.set_trace()
-
-        rtol = 1e-03
-        assert_allclose(expected_losses, actual_losses, rtol=rtol, err_msg="loss mismatch")
-        assert_allclose(
-            expected_eval_loss,
-            actual_eval_loss,
-            rtol=rtol,
-            err_msg="evaluation loss mismatch",
-        )
-
-    def test_bert_checkpointing_basic(self):
-        model, _, _ = create_ort_trainer(
-            gradient_accumulation_steps=1,
-            use_mixed_precision=False,
-            allreduce_post_accumulation=True,
-            use_simple_model_desc=True,
-            loss_scaler=None,
-        )
-        sd = model.state_dict()
-
-        # modify one of the default values
-        sd["bert.encoder.layer.0.attention.output.LayerNorm.weight"] += 1
-        model.load_state_dict(sd)
-
-        ckpt_dir = "testdata"
-        save_checkpoint(model, ckpt_dir, "bert_toy_save_test")
-        del model
-
-        # create new model
-        model2, _, _ = create_ort_trainer(
-            gradient_accumulation_steps=1,
-            use_mixed_precision=False,
-            allreduce_post_accumulation=True,
-            use_simple_model_desc=True,
-            loss_scaler=None,
-        )
-
-        # load changed checkpoint
-        load_checkpoint(model2, ckpt_dir, "bert_toy_save_test")
-        loaded_sd = model2.state_dict()
-
-        for k, v in loaded_sd.items():
-            assert torch.all(torch.eq(v, sd[k]))
-
-    def test_wrap_model_loss_fn_state_dict(self):
-        torch.manual_seed(1)
-        device = torch.device("cuda")
-
-        class LinearModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-                self.linear = torch.nn.Linear(2, 4)
-
-            def forward(self, y=None, x=None):
-                if y is not None:
-                    return self.linear(x) + y
-                else:
-                    return self.linear(x) + torch.ones(2, 4)
-
-        pt_model = LinearModel()
-        data = torch.randn(2, 2)
-        label = torch.tensor([0, 1], dtype=torch.int64)
-        input_desc = IODescription("x", [2, 2], torch.float32)
-        label_desc = IODescription(
-            "label",
-            [
-                2,
-            ],
-            torch.int64,
-            num_classes=4,
-        )
-        output_desc = IODescription("output", [2, 4], torch.float32)
-        loss_desc = IODescription("loss", [], torch.float32)
-        model_desc = ModelDescription([input_desc, label_desc], [loss_desc, output_desc])
-
-        def loss_fn(x, label):
-            return F.nll_loss(F.log_softmax(x, dim=1), label)
-
-        def get_lr_this_step(global_step):
-            learningRate = 0.02  # noqa: N806
-            return torch.tensor([learningRate])
-
-        ort_trainer = ORTTrainer(
-            pt_model,
-            loss_fn,
-            model_desc,
-            "SGDOptimizer",
-            None,
-            IODescription(
-                "Learning_Rate",
-                [
-                    1,
-                ],
-                torch.float32,
-            ),
-            device,
-            get_lr_this_step=get_lr_this_step,
-        )
-        ort_trainer.train_step(x=data, label=label)
-        state_dict = ort_trainer.state_dict()
-        assert state_dict.keys() == {"linear.bias", "linear.weight"}
-
-
-if __name__ == "__main__":
-    unittest.main(module=__name__, buffer=True)
diff --git a/onnxruntime/test/python/onnxruntime_test_ort_trainer_with_mixed_precision.py b/onnxruntime/test/python/onnxruntime_test_ort_trainer_with_mixed_precision.py
deleted file mode 100644
index 3b994e6f26710..0000000000000
--- a/onnxruntime/test/python/onnxruntime_test_ort_trainer_with_mixed_precision.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
-
-import unittest
-
-from numpy.testing import assert_allclose, assert_array_equal
-from onnxruntime_test_ort_trainer import run_bert_training_test
-
-
-class TestOrtTrainer(unittest.TestCase):
-    def test_bert_training_mixed_precision(self):
-        expected_losses = [
-            11.034248352050781,
-            11.125300407409668,
-            11.006105422973633,
-            11.047048568725586,
-            11.027417182922363,
-            11.015759468078613,
-            11.060905456542969,
-            10.971782684326172,
-        ]
-        expected_all_finites = [True, True, True, True, True, True, True, True]
-        expected_eval_loss = [10.959012985229492]
-        actual_losses, actual_all_finites, actual_eval_loss = run_bert_training_test(
-            gradient_accumulation_steps=1,
-            use_mixed_precision=True,
-            allreduce_post_accumulation=False,
-            use_simple_model_desc=False,
-        )
-
-        rtol = 1e-02
-        assert_allclose(expected_losses, actual_losses, rtol=rtol, err_msg="loss mismatch")
-        assert_array_equal(expected_all_finites, actual_all_finites, "all_finite mismatch")
-        assert_allclose(
-            expected_eval_loss,
-            actual_eval_loss,
-            rtol=rtol,
-            err_msg="evaluation loss mismatch",
-        )
-
-    def test_bert_training_mixed_precision_internal_loss_scale(self):
-        expected_losses = [
-            11.034248352050781,
-            11.125300407409668,
-            11.006105422973633,
-            11.047048568725586,
-            11.027417182922363,
-            11.015759468078613,
-            11.060905456542969,
-            10.971782684326172,
-        ]
-        expected_eval_loss = [10.959012985229492]
-        actual_losses, actual_eval_loss = run_bert_training_test(
-            gradient_accumulation_steps=1,
-            use_mixed_precision=True,
-            allreduce_post_accumulation=False,
-            use_simple_model_desc=False,
-            use_internel_loss_scale=True,
-        )
-
-        rtol = 1e-02
-        assert_allclose(expected_losses, actual_losses, rtol=rtol, err_msg="loss mismatch")
-        assert_allclose(
-            expected_eval_loss,
-            actual_eval_loss,
-            rtol=rtol,
-            err_msg="evaluation loss mismatch",
-        )
-
-    def test_bert_training_gradient_accumulation_mixed_precision(self):
-        expected_losses = [
-            11.034248352050781,
-            11.125300407409668,
-            11.006077766418457,
-            11.047025680541992,
-            11.027434349060059,
-            11.0156831741333,
-            11.060973167419434,
-            10.971841812133789,
-        ]
-        expected_all_finites = [True, True]
-        expected_eval_loss = [10.95903205871582]
-        actual_losses, actual_all_finites, actual_eval_loss = run_bert_training_test(
-            gradient_accumulation_steps=4,
-            use_mixed_precision=True,
-            allreduce_post_accumulation=False,
-            use_simple_model_desc=False,
-        )
-
-        rtol = 1e-02
-        assert_allclose(expected_losses, actual_losses, rtol=rtol, err_msg="loss mismatch")
-        assert_array_equal(expected_all_finites, actual_all_finites, "all_finite mismatch")
-        assert_allclose(
-            expected_eval_loss,
-            actual_eval_loss,
-            rtol=rtol,
-            err_msg="evaluation loss mismatch",
-        )
-
-
-if __name__ == "__main__":
-    unittest.main(module=__name__, buffer=True)
diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py
index 59f7781bb4f8a..d8628c4288206 100644
--- a/onnxruntime/test/python/onnxruntime_test_python.py
+++ b/onnxruntime/test/python/onnxruntime_test_python.py
@@ -80,11 +80,7 @@ def test_model_serialization(self):
             so.log_severity_level = 1
             so.logid = "TestModelSerialization"
             so.optimized_model_filepath = "./PythonApiTestOptimizedModel.onnx"
-            onnxrt.InferenceSession(
-                get_name("mul_1.onnx"),
-                sess_options=so,
-                providers=["CPUExecutionProvider"],
-            )
+            onnxrt.InferenceSession(get_name("mul_1.onnx"), sess_options=so)
             self.assertTrue(os.path.isfile(so.optimized_model_filepath))
             os.remove(so.optimized_model_filepath)
         except Fail as onnxruntime_error:
@@ -107,11 +103,7 @@ def test_model_serialization_with_external_initializers(self):
                 "session.optimized_model_external_initializers_file_name", external_initializers_file
             )
             so.add_session_config_entry("session.optimized_model_external_initializers_min_size_in_bytes", "100")
-            onnxrt.InferenceSession(
-                get_name("mnist.onnx"),
-                sess_options=so,
-                providers=["CPUExecutionProvider"],
-            )
+            onnxrt.InferenceSession(get_name("mnist.onnx"), sess_options=so)
             self.assertTrue(os.path.isfile(so.optimized_model_filepath))
             self.assertTrue(os.path.isfile(external_initializers_file))
             os.remove(so.optimized_model_filepath)
@@ -137,7 +129,7 @@ def test_model_serialization_with_external_initializers_to_directory(self):
                 "session.optimized_model_external_initializers_file_name", external_initializers_file
             )
             so.add_session_config_entry("session.optimized_model_external_initializers_min_size_in_bytes", "100")
-            onnxrt.InferenceSession(get_name("mnist.onnx"), sess_options=so, providers=["CPUExecutionProvider"])
+            onnxrt.InferenceSession(get_name("mnist.onnx"), sess_options=so)
             self.assertTrue(os.path.isfile(so.optimized_model_filepath))
             self.assertTrue(os.path.isfile(os.path.join(directory, external_initializers_file)))
             os.remove(so.optimized_model_filepath)
@@ -163,9 +155,7 @@ def test_model_serialization_with_original_external_initializers_to_directory(se
                 "session.optimized_model_external_initializers_file_name", external_initializers_file
             )
             so.add_session_config_entry("session.optimized_model_external_initializers_min_size_in_bytes", "100")
-            onnxrt.InferenceSession(
-                get_name("model_with_orig_ext_data.onnx"), sess_options=so, providers=["CPUExecutionProvider"]
-            )
+            onnxrt.InferenceSession(get_name("model_with_orig_ext_data.onnx"), sess_options=so)
             self.assertTrue(os.path.isfile(so.optimized_model_filepath))
             self.assertTrue(os.path.isfile(os.path.join(directory, external_initializers_file)))
             os.remove(so.optimized_model_filepath)
@@ -198,9 +188,7 @@ def test_model_serialization_with_original_external_initializers_to_current_dire
         # still refers to the original external data file. We shall fix this issue so that the
         # optimized model only refers to one external data file.
         so.add_session_config_entry("session.optimized_model_external_initializers_min_size_in_bytes", "10")
-        session1 = onnxrt.InferenceSession(
-            get_name("model_with_orig_ext_data.onnx"), sess_options=so, providers=["CPUExecutionProvider"]
-        )
+        session1 = onnxrt.InferenceSession(get_name("model_with_orig_ext_data.onnx"), sess_options=so)
         del session1
         self.assertTrue(os.path.isfile(optimized_model_filepath))
         self.assertTrue(os.path.isfile(external_initializers_file))
@@ -216,9 +204,7 @@ def test_model_serialization_with_original_external_initializers_to_current_dire
 
         # verify that we can load the optimized model with external data in current directory and save
         # optimized model with external data to current directory.
-        session2 = onnxrt.InferenceSession(
-            optimized_model_filepath, sess_options=so2, providers=["CPUExecutionProvider"]
-        )
+        session2 = onnxrt.InferenceSession(optimized_model_filepath, sess_options=so2)
         del session2
         self.assertTrue(os.path.isfile(optimized_model_filepath_2))
         self.assertTrue(os.path.isfile(external_initializers_file_2))
@@ -227,9 +213,7 @@ def test_model_serialization_with_original_external_initializers_to_current_dire
         os.remove(optimized_model_filepath)
         os.remove(external_initializers_file)
 
-        session3 = onnxrt.InferenceSession(
-            optimized_model_filepath_2, sess_options=onnxrt.SessionOptions(), providers=["CPUExecutionProvider"]
-        )
+        session3 = onnxrt.InferenceSession(optimized_model_filepath_2, sess_options=onnxrt.SessionOptions())
         del session3
 
         os.remove(optimized_model_filepath_2)
@@ -314,6 +298,20 @@ def test_set_providers_with_options(self):
             self.assertEqual(option["trt_engine_cache_path"], str(engine_cache_path))
             self.assertEqual(option["trt_force_sequential_engine_build"], "1")
 
+            from onnxruntime.capi import _pybind_state as C
+
+            session_options = C.get_default_session_options()
+
+            # TRT plugins registered as custom op domain should only be added once in session option regardless of number of session creation
+            sess1 = onnxrt.InferenceSession(
+                get_name("mul_1.onnx"), session_options, providers=["TensorrtExecutionProvider"]
+            )
+            sess2 = onnxrt.InferenceSession(
+                get_name("mul_1.onnx"), session_options, providers=["TensorrtExecutionProvider"]
+            )
+            self.assertIn("TensorrtExecutionProvider", sess1.get_providers())
+            self.assertIn("TensorrtExecutionProvider", sess2.get_providers())
+
             # We currently disable following test code since that not all test machines/GPUs have nvidia int8 capability
 
             """
diff --git a/onnxruntime/test/python/onnxruntime_test_training_unit_tests.py b/onnxruntime/test/python/onnxruntime_test_training_unit_tests.py
deleted file mode 100644
index 540f39b797bdb..0000000000000
--- a/onnxruntime/test/python/onnxruntime_test_training_unit_tests.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
-
-import unittest
-
-import torch
-import torch.nn as nn
-from numpy.testing import assert_allclose
-from onnxruntime_test_ort_trainer import map_optimizer_attributes, ort_trainer_learning_rate_description
-from onnxruntime_test_training_unittest_utils import process_dropout
-
-import onnxruntime
-from onnxruntime.capi.ort_trainer import IODescription, ModelDescription, ORTTrainer
-
-
-class TestTrainingDropout(unittest.TestCase):
-    def setUp(self):
-        torch.manual_seed(1)
-        onnxruntime.set_seed(1)
-
-    @unittest.skip(
-        "Temporarily disable this test. The graph below will trigger ORT to "
-        "sort backward graph before forward graph which gives incorrect result. "
-        "https://github.com/microsoft/onnxruntime/issues/16801"
-    )
-    def test_training_and_eval_dropout(self):
-        class TwoDropoutNet(nn.Module):
-            def __init__(self, drop_prb_1, drop_prb_2, dim_size):
-                super().__init__()
-                self.drop_1 = nn.Dropout(drop_prb_1)
-                self.drop_2 = nn.Dropout(drop_prb_2)
-                self.weight_1 = torch.nn.Parameter(torch.zeros(dim_size, dtype=torch.float32))
-
-            def forward(self, x):
-                x = x + self.weight_1
-                x = self.drop_1(x)
-                x = self.drop_2(x)
-                output = x
-                return output[0]
-
-        dim_size = 3
-        device = torch.device("cuda", 0)
-        # This will drop all values, therefore expecting all 0 in output tensor
-        model = TwoDropoutNet(0.999, 0.999, dim_size)
-        input_desc = IODescription("input", [dim_size], torch.float32)
-        output_desc = IODescription("output", [], torch.float32)
-        model_desc = ModelDescription([input_desc], [output_desc])
-        lr_desc = ort_trainer_learning_rate_description()
-        model = ORTTrainer(
-            model,
-            None,
-            model_desc,
-            "LambOptimizer",
-            map_optimizer_attributes,
-            lr_desc,
-            device,
-            postprocess_model=process_dropout,
-            world_rank=0,
-            world_size=1,
-        )
-        input = torch.ones(dim_size, dtype=torch.float32).to(device)
-        expected_training_output = [0.0]
-        expected_eval_output = [1.0]
-        learning_rate = torch.tensor([1.0000000e00]).to(device)
-        input_args = [input, learning_rate]
-        train_output = model.train_step(*input_args)
-
-        rtol = 1e-04
-        assert_allclose(
-            expected_training_output,
-            train_output.item(),
-            rtol=rtol,
-            err_msg="dropout training loss mismatch",
-        )
-
-        eval_output = model.eval_step(input)
-        assert_allclose(
-            expected_eval_output,
-            eval_output.item(),
-            rtol=rtol,
-            err_msg="dropout eval loss mismatch",
-        )
-
-        # Do another train step to make sure it's using original ratios
-        train_output_2 = model.train_step(*input_args)
-        assert_allclose(
-            expected_training_output,
-            train_output_2.item(),
-            rtol=rtol,
-            err_msg="dropout training loss 2 mismatch",
-        )
-
-
-if __name__ == "__main__":
-    unittest.main(module=__name__, buffer=True)
diff --git a/onnxruntime/test/python/onnxruntime_test_training_unittest_utils.py b/onnxruntime/test/python/onnxruntime_test_training_unittest_utils.py
deleted file mode 100644
index 3d3feca06a99b..0000000000000
--- a/onnxruntime/test/python/onnxruntime_test_training_unittest_utils.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import numpy as np
-from onnx import numpy_helper
-
-
-def get_node_index(model, node):
-    i = 0
-    while i < len(model.graph.node):
-        if model.graph.node[i] == node:
-            break
-        i += 1
-    return i if i < len(model.graph.node) else None
-
-
-def add_const(model, name, output, t_value=None, f_value=None):
-    const_node = model.graph.node.add()
-    const_node.op_type = "Constant"
-    const_node.name = name
-    const_node.output.extend([output])
-    attr = const_node.attribute.add()
-    attr.name = "value"
-    if t_value is not None:
-        attr.type = 4
-        attr.t.CopyFrom(t_value)
-    else:
-        attr.type = 1
-        attr.f = f_value
-    return const_node
-
-
-def process_dropout(model):
-    dropouts = []
-    index = 0
-    for node in model.graph.node:
-        if node.op_type == "Dropout":
-            new_dropout = model.graph.node.add()
-            new_dropout.op_type = "TrainableDropout"
-            new_dropout.name = "TrainableDropout_%d" % index
-            # make ratio node
-            ratio = np.asarray([node.attribute[0].f], dtype=np.float32)
-            print(ratio.shape)
-            ratio_value = numpy_helper.from_array(ratio)
-            ratio_node = add_const(
-                model,
-                "dropout_node_ratio_%d" % index,
-                "dropout_node_ratio_%d" % index,
-                t_value=ratio_value,
-            )
-            print(ratio_node)
-            new_dropout.input.extend([node.input[0], ratio_node.output[0]])
-            new_dropout.output.extend(node.output)
-            dropouts.append(get_node_index(model, node))
-            index += 1
-    dropouts.sort(reverse=True)
-    for d in dropouts:
-        del model.graph.node[d]
-    model.opset_import[0].version = 10
diff --git a/onnxruntime/test/python/quantization/op_test_utils.py b/onnxruntime/test/python/quantization/op_test_utils.py
index e94ac5c961583..eede1be05f85f 100644
--- a/onnxruntime/test/python/quantization/op_test_utils.py
+++ b/onnxruntime/test/python/quantization/op_test_utils.py
@@ -279,6 +279,9 @@ def check_model_correctness(
     ops_set = set(node.op_type for node in model_onnx.graph.node)
     check_reference_evaluator = not (ops_set & {"EmbedLayerNormalization", "Conv", "Attention", "Transpose"})
 
+    with open(model_path_to_check, "rb") as f:
+        model_check = onnx.load(f)
+
     if check_reference_evaluator and onnx_recent_enough:
         ref = ReferenceEvaluator(model_path_origin)
         ref_origin_results = ref.run(None, inputs)
@@ -289,7 +292,7 @@ def check_model_correctness(
                 output,
                 rtol=rtol,
                 atol=atol,
-                err_msg=f"Model {model_path_to_check!r} failed for providers={providers!r}.",
+                err_msg=f"Model {model_path_origin!r} failed for providers={providers!r}.",
             )
 
     # Verifies the shapes in the quantized model.
@@ -301,40 +304,52 @@ def check_model_correctness(
                 expected_shapes[init.name] = tuple(init.dims)
         checked = 0
         f8_quantization = False
-        with open(model_path_to_check, "rb") as f:
-            model_check = onnx.load(f)
-            for init in model_check.graph.initializer:
-                if init.name.endswith("_quantized"):
-                    name = init.name.replace("_quantized", "")
-                    expected = expected_shapes[name]
-                    shape = tuple(init.dims)
-                    if not dynamic and expected != shape:
-                        raise AssertionError(
-                            f"Shape mismatch for initializer {init.name!r} from {init.name!r}, "
-                            f"shape={shape} != {expected} (expected)."
-                        )
-                    else:
-                        checked += 1
-                if "zero_point" in init.name:
-                    dt = init.data_type
-                    f8_quantization = f8_quantization or dt in (
-                        TensorProto.FLOAT8E4M3FN,
-                        TensorProto.FLOAT8E4M3FNUZ,
-                        TensorProto.FLOAT8E5M2,
-                        TensorProto.FLOAT8E5M2FNUZ,
+        for init in model_check.graph.initializer:
+            if init.name.endswith("_quantized"):
+                name = init.name.replace("_quantized", "")
+                expected = expected_shapes[name]
+                shape = tuple(init.dims)
+                if not dynamic and expected != shape:
+                    raise AssertionError(
+                        f"Shape mismatch for initializer {init.name!r} from {init.name!r}, "
+                        f"shape={shape} != {expected} (expected)."
                     )
-            if checked == 0:
-                raise AssertionError(
-                    f"Unable to check expected shape, expected_shapes={expected_shapes}, "
-                    f"names={[init.name for init in model_check.graph.initializer]}."
+                else:
+                    checked += 1
+            if "zero_point" in init.name:
+                dt = init.data_type
+                f8_quantization = f8_quantization or dt in (
+                    TensorProto.FLOAT8E4M3FN,
+                    TensorProto.FLOAT8E4M3FNUZ,
+                    TensorProto.FLOAT8E5M2,
+                    TensorProto.FLOAT8E5M2FNUZ,
                 )
+        if checked == 0:
+            raise AssertionError(
+                f"Unable to check expected shape, expected_shapes={expected_shapes}, "
+                f"names={[init.name for init in model_check.graph.initializer]}."
+            )
         if f8_quantization:
             check_sign_f8_quantization(model_path_origin, model_path_to_check)
 
     # Verifies the expected outputs.
     if check_reference_evaluator and onnx_recent_enough:
+        reference_new_ops = [QGemm]
+        has_missing_reference_ops = any(
+            node.domain not in ["", "ai.onnx"]
+            and not any(
+                node.domain == new_node.op_domain and node.op_type == new_node.__name__
+                for new_node in reference_new_ops
+            )
+            for node in model_check.graph.node
+        )
+        if has_missing_reference_ops:
+            # We need to skip the test if the model contains ops that are not supported.
+            testcase.skipTest(
+                f"Model {model_path_to_check!r} contains ops that are not supported by the reference evaluator."
+            )
         # Needs pv.Version(onnx.__version__) >= pv.Version("1.16.0")
-        ref = ReferenceEvaluator(model_path_to_check, new_ops=[QGemm])
+        ref = ReferenceEvaluator(model_check, new_ops=reference_new_ops)
         target_results = ref.run(None, inputs)
         testcase.assertEqual(len(origin_results), len(target_results), "result count are different")
         for idx, ref_output in enumerate(origin_results):
@@ -378,6 +393,9 @@ def check_qtype_by_node_type(testcase, model_to_check, check_list):
         model = onnx.load(model_to_check)
     elif isinstance(model_to_check, onnx.ModelProto):
         model = model_to_check
+    # NOTE: ONNX shape inference does not work on MS domain nodes.
+    # Therefore, this function cannot currently be used for graphs that contain ops such as
+    # com.microsoft.QuantizeLinear, which support 16-bit quantization.
     model = onnx.shape_inference.infer_shapes(model)
     value_infos = {vi.name: vi for vi in model.graph.value_info}
     value_infos.update({ot.name: ot for ot in model.graph.output})
diff --git a/onnxruntime/test/python/quantization/resnet_code.py b/onnxruntime/test/python/quantization/resnet_code.py
new file mode 100644
index 0000000000000..2f78047c824a6
--- /dev/null
+++ b/onnxruntime/test/python/quantization/resnet_code.py
@@ -0,0 +1,13757 @@
+import numpy
+from onnx import numpy_helper
+from onnx.helper import make_graph, make_model, make_node, make_opsetid, make_tensor_value_info, set_model_props
+
+
+def create_model():
+    initializers = []
+    nodes = []
+    inputs = []
+    outputs = []
+    functions = []
+
+    # opsets
+    opsets = {"": 13}
+
+    # initializers
+
+    list_value = [
+        -0.013732648454606533,
+        -0.005861935671418905,
+        0.06889285147190094,
+        -0.1172710582613945,
+        0.08841240406036377,
+        -0.03748627379536629,
+        0.016256270930171013,
+        -0.1059316024184227,
+        0.08246039599180222,
+        0.14295539259910583,
+        -0.32958757877349854,
+        0.1631188541650772,
+        0.05412565544247627,
+        -0.10758306831121445,
+        0.12607362866401672,
+        -0.4987836182117462,
+        0.7441706657409668,
+        -0.24774713814258575,
+        -0.30415549874305725,
+        0.4033295810222626,
+        -0.13447114825248718,
+        0.04623159021139145,
+        0.2380414456129074,
+        -1.226112723350525,
+        2.150630235671997,
+        -1.702580213546753,
+        0.5305419564247131,
+        -0.06836353242397308,
+        -0.20055373013019562,
+        0.7035881280899048,
+        -0.8389442563056946,
+        -0.1904432326555252,
+        1.2609282732009888,
+        -1.0670661926269531,
+        0.4142579436302185,
+        0.04739700257778168,
+        -0.3265092074871063,
+        1.1873037815093994,
+        -1.6817731857299805,
+        0.9709527492523193,
+        -0.09095840901136398,
+        -0.12556785345077515,
+        0.0835147574543953,
+        -0.24109329283237457,
+        0.032948240637779236,
+        0.46304041147232056,
+        -0.6594106554985046,
+        0.349990576505661,
+        -0.04113377630710602,
+        0.016451245173811913,
+        0.008994563482701778,
+        -0.028321878984570503,
+        -0.05336569994688034,
+        0.16036668419837952,
+        -0.12088149785995483,
+        0.031160499900579453,
+        -0.0618649423122406,
+        0.07205374538898468,
+        0.15965768694877625,
+        -0.3389044404029846,
+        0.21603335440158844,
+        0.04029613360762596,
+        -0.0813034325838089,
+        0.1019665077328682,
+        -0.4873599112033844,
+        0.7873126268386841,
+        -0.2951086163520813,
+        -0.43754327297210693,
+        0.5905176401138306,
+        -0.21821773052215576,
+        0.06022067740559578,
+        0.26326146721839905,
+        -1.6453089714050293,
+        2.606400728225708,
+        -1.8939754962921143,
+        0.5196341276168823,
+        0.0055860355496406555,
+        -0.2335057258605957,
+        0.9807199239730835,
+        -1.2137882709503174,
+        -0.2699125409126282,
+        1.7379733324050903,
+        -1.4401814937591553,
+        0.435971736907959,
+        -0.04829222336411476,
+        -0.24543480575084686,
+        1.3292583227157593,
+        -2.0375823974609375,
+        1.2458536624908447,
+        -0.08251484483480453,
+        -0.14181238412857056,
+        0.10612589120864868,
+        -0.21671657264232635,
+        0.1129523366689682,
+        0.3666985034942627,
+        -0.7546612024307251,
+        0.42979565262794495,
+        -0.0976259633898735,
+        -0.0008812264422886074,
+        0.02994859404861927,
+        -0.07027778774499893,
+        0.01393035613000393,
+        0.07363647222518921,
+        -0.10249849408864975,
+        0.06602989137172699,
+        -0.012129798531532288,
+        0.10730132460594177,
+        -0.04546127840876579,
+        -0.16065146028995514,
+        0.14788293838500977,
+        -0.05488971993327141,
+        0.03601694852113724,
+        0.07513345777988434,
+        -0.23953600227832794,
+        0.48062530159950256,
+        -0.42057543992996216,
+        -0.02402813360095024,
+        0.17920851707458496,
+        -0.10703158378601074,
+        -0.028666120022535324,
+        0.2815375030040741,
+        -0.860264241695404,
+        1.4422725439071655,
+        -1.2058128118515015,
+        0.5272247791290283,
+        -0.06504356116056442,
+        -0.20021803677082062,
+        0.44968947768211365,
+        -0.3856053650379181,
+        -0.1589551419019699,
+        0.7579770684242249,
+        -0.8349987268447876,
+        0.3225692808628082,
+        0.08153475821018219,
+        -0.43163740634918213,
+        0.8742384910583496,
+        -0.9722443222999573,
+        0.579015851020813,
+        -0.06688100844621658,
+        -0.12384293973445892,
+        0.08289378881454468,
+        -0.10082041472196579,
+        -0.11204896867275238,
+        0.3934254050254822,
+        -0.4511864185333252,
+        0.32745760679244995,
+        -0.06534548103809357,
+        -0.028830429539084435,
+        0.021844232454895973,
+        0.01775779016315937,
+        -0.004250001162290573,
+        0.013087524101138115,
+        -0.001250433037057519,
+        -0.040545206516981125,
+        -0.014049320481717587,
+        -0.024194253608584404,
+        -0.023865194991230965,
+        -0.0038033330347388983,
+        0.00920871365815401,
+        -0.006582418456673622,
+        0.0032474950421601534,
+        -0.0369916632771492,
+        -0.16640843451023102,
+        -0.28968843817710876,
+        -0.3531132638454437,
+        -0.26307201385498047,
+        -0.13392697274684906,
+        -0.03747623786330223,
+        0.08083077520132065,
+        0.2026241272687912,
+        0.25018608570098877,
+        0.2529378831386566,
+        0.2307336926460266,
+        0.13928599655628204,
+        0.08631229400634766,
+        0.13893137872219086,
+        0.4867081344127655,
+        0.7170669436454773,
+        0.8331555724143982,
+        0.6734364032745361,
+        0.3549460768699646,
+        0.16798041760921478,
+        -0.14487245678901672,
+        -0.47733625769615173,
+        -0.7670150995254517,
+        -0.875726580619812,
+        -0.6291986703872681,
+        -0.2910463213920593,
+        -0.09991979598999023,
+        -0.009158087894320488,
+        0.018850643187761307,
+        0.02646111696958542,
+        -0.009077857248485088,
+        0.029430989176034927,
+        -0.03707962855696678,
+        -0.05111744999885559,
+        -0.02076525054872036,
+        0.011828843504190445,
+        0.017857171595096588,
+        0.02548048458993435,
+        -0.009077494964003563,
+        0.0022066361270844936,
+        -0.02064262516796589,
+        -0.008582246489822865,
+        -0.022748643532395363,
+        -0.03038850985467434,
+        0.0006585497176274657,
+        -0.0016039719339460135,
+        -0.01612498238682747,
+        0.013966801576316357,
+        -0.05851661041378975,
+        -0.21422894299030304,
+        -0.33863192796707153,
+        -0.3720807433128357,
+        -0.3030800521373749,
+        -0.1737397164106369,
+        -0.05903157964348793,
+        0.15018144249916077,
+        0.27454254031181335,
+        0.31182464957237244,
+        0.30118387937545776,
+        0.24605700373649597,
+        0.14123573899269104,
+        0.14992672204971313,
+        0.20660799741744995,
+        0.5046274662017822,
+        0.7706091403961182,
+        0.8978630900382996,
+        0.7368614673614502,
+        0.3929724097251892,
+        0.23079657554626465,
+        -0.21169082820415497,
+        -0.5920398235321045,
+        -0.893406867980957,
+        -0.9499238729476929,
+        -0.730407178401947,
+        -0.3615736961364746,
+        -0.15422092378139496,
+        -0.024615347385406494,
+        0.005115498788654804,
+        0.024657316505908966,
+        0.028517475351691246,
+        0.027910854667425156,
+        -0.009482389315962791,
+        -0.042242538183927536,
+        -0.017875321209430695,
+        0.00430292496457696,
+        0.015949612483382225,
+        0.003636278910562396,
+        -0.018156034871935844,
+        -0.0009349065367132425,
+        -0.0010362856555730104,
+        -0.013051170855760574,
+        -0.009141271002590656,
+        -8.714485738892108e-05,
+        0.02399279735982418,
+        0.01753612607717514,
+        -0.013710699044167995,
+        -0.014245252124965191,
+        -0.0028008236549794674,
+        -0.08206935226917267,
+        -0.1098734438419342,
+        -0.10250325500965118,
+        -0.08874496072530746,
+        -0.031079040840268135,
+        0.004536658991128206,
+        0.03923843801021576,
+        0.08478657901287079,
+        0.07715648412704468,
+        0.018803801387548447,
+        0.013921198435127735,
+        0.015864359214901924,
+        0.04947463795542717,
+        0.039856068789958954,
+        0.1712094396352768,
+        0.362756609916687,
+        0.4192918539047241,
+        0.2668488621711731,
+        0.11430513113737106,
+        0.06648365408182144,
+        -0.058979276567697525,
+        -0.24177154898643494,
+        -0.3709423542022705,
+        -0.3979431986808777,
+        -0.29706764221191406,
+        -0.11569518595933914,
+        -0.01848490908741951,
+        -0.015523962676525116,
+        0.05081642046570778,
+        0.09057094901800156,
+        0.08520761132240295,
+        0.04497350752353668,
+        -0.019453801214694977,
+        -0.06109466031193733,
+        0.011463015340268612,
+        -0.008522219955921173,
+        -0.005283404141664505,
+        -0.017313135787844658,
+        -0.0015744483098387718,
+        -0.011845857836306095,
+        -0.016727561131119728,
+        -0.006708915811032057,
+        0.0008860539528541267,
+        -0.010050912387669086,
+        -0.028460539877414703,
+        -0.0165643822401762,
+        -0.016545938327908516,
+        -0.00567589420825243,
+        -0.0032017906196415424,
+        -0.0130555285140872,
+        -0.026848897337913513,
+        -0.02615198865532875,
+        0.002669057110324502,
+        -0.027966763824224472,
+        -0.03851256147027016,
+        -0.014509409666061401,
+        -0.029059220105409622,
+        -0.007284109480679035,
+        0.04045313969254494,
+        0.10005538910627365,
+        0.014574537053704262,
+        -0.044292762875556946,
+        -0.01750861294567585,
+        -0.02231375314295292,
+        -0.004432118032127619,
+        0.10051869601011276,
+        0.1443023532629013,
+        0.0508832149207592,
+        -0.04350621998310089,
+        -0.0025447055231779814,
+        -0.014583000913262367,
+        -0.02153291553258896,
+        0.018860718235373497,
+        0.03618147224187851,
+        0.007304056081920862,
+        -0.029104959219694138,
+        0.00576505484059453,
+        -0.016025763005018234,
+        -0.025094063952565193,
+        -0.05296780914068222,
+        -0.037012189626693726,
+        -0.04414081946015358,
+        -0.053135257214307785,
+        -0.028890708461403847,
+        -0.010220452211797237,
+        -0.027575822547078133,
+        -0.01087758969515562,
+        -0.027209162712097168,
+        -0.030827227979898453,
+        -0.007646164856851101,
+        -0.016133273020386696,
+        0.000639698002487421,
+        -0.0034172122832387686,
+        0.03914793208241463,
+        0.030786357820034027,
+        0.005965455900877714,
+        0.020923329517245293,
+        -0.03435938432812691,
+        -0.0026781477499753237,
+        0.04278327897191048,
+        0.20045910775661469,
+        0.21770593523979187,
+        0.09422573447227478,
+        0.03198440372943878,
+        -0.021056609228253365,
+        0.028007682412862778,
+        0.19196027517318726,
+        0.4791645109653473,
+        0.5333831906318665,
+        0.3014310598373413,
+        0.103666290640831,
+        -0.03651479259133339,
+        0.027079502120614052,
+        0.19239209592342377,
+        0.5168290138244629,
+        0.5564895868301392,
+        0.2977963089942932,
+        0.07770062237977982,
+        -0.042239490896463394,
+        -0.017265107482671738,
+        0.08760321140289307,
+        0.2775075435638428,
+        0.312491774559021,
+        0.12284757196903229,
+        0.019664151594042778,
+        -0.026643047109246254,
+        0.0009152573184110224,
+        0.016156431287527084,
+        0.09042830765247345,
+        0.08991760015487671,
+        0.013326293788850307,
+        0.02613811008632183,
+        0.021025240421295166,
+        0.0198842640966177,
+        0.03375901281833649,
+        0.028616728261113167,
+        0.026605166494846344,
+        0.04126269370317459,
+        0.029309948906302452,
+        0.01408455427736044,
+        -0.003831037785857916,
+        0.01922326348721981,
+        -0.018229445442557335,
+        -0.013015883974730968,
+        0.017597628757357597,
+        -0.007964612916111946,
+        0.045263469219207764,
+        0.0184696726500988,
+        -0.001163159729912877,
+        -0.1809321641921997,
+        -0.22486254572868347,
+        -0.08606110513210297,
+        0.001087217591702938,
+        0.037091098725795746,
+        -0.013625397346913815,
+        -0.178089901804924,
+        -0.5483279824256897,
+        -0.612791895866394,
+        -0.32531827688217163,
+        -0.06506585329771042,
+        0.05076128616929054,
+        -0.007585812360048294,
+        -0.20981833338737488,
+        -0.6155760884284973,
+        -0.7119701504707336,
+        -0.354442298412323,
+        -0.04236743599176407,
+        0.045713260769844055,
+        0.03192479908466339,
+        -0.07216271013021469,
+        -0.310979425907135,
+        -0.3656359910964966,
+        -0.13522450625896454,
+        0.008291869424283504,
+        0.03362602740526199,
+        -0.0009240762447007,
+        0.01604474149644375,
+        -0.055634208023548126,
+        -0.06180194392800331,
+        0.0222025066614151,
+        0.027704820036888123,
+        -0.034385330975055695,
+        -0.07050742954015732,
+        -0.06287489086389542,
+        0.03521641716361046,
+        -0.00020920530369039625,
+        0.05458284169435501,
+        0.058752644807100296,
+        -0.08097169548273087,
+        -0.01668735221028328,
+        0.18557283282279968,
+        0.26208117604255676,
+        0.1253771185874939,
+        0.07758381962776184,
+        -0.022084739059209824,
+        0.016727397218346596,
+        0.23247942328453064,
+        0.35444316267967224,
+        0.21802566945552826,
+        -0.04409221559762955,
+        -0.08573070168495178,
+        -0.0994141548871994,
+        0.07754423469305038,
+        0.14311672747135162,
+        0.04036660119891167,
+        -0.29222917556762695,
+        -0.38828015327453613,
+        -0.26185816526412964,
+        -0.12845511734485626,
+        0.04763585329055786,
+        -0.017382778227329254,
+        -0.16010743379592896,
+        -0.2395028918981552,
+        -0.2049665004014969,
+        -0.041346337646245956,
+        0.091490738093853,
+        -0.005191737785935402,
+        -0.07687077671289444,
+        -0.08105621486902237,
+        -0.05329642817378044,
+        -0.03404862806200981,
+        0.11478845030069351,
+        0.13328343629837036,
+        -0.037197597324848175,
+        -0.01787363924086094,
+        -0.016605347394943237,
+        0.007853846065700054,
+        0.029950136318802834,
+        0.10808859020471573,
+        0.02873288467526436,
+        -0.1766187697649002,
+        -0.17560969293117523,
+        -0.03922238200902939,
+        0.14447443187236786,
+        0.1534212827682495,
+        0.11272227019071579,
+        0.008810695260763168,
+        -0.1485181748867035,
+        0.07839693129062653,
+        0.43013128638267517,
+        0.4898712635040283,
+        0.26522761583328247,
+        0.10202436149120331,
+        -0.07163076847791672,
+        0.09933187812566757,
+        0.47377726435661316,
+        0.6340300440788269,
+        0.36741772294044495,
+        -0.04812543839216232,
+        -0.17370514571666718,
+        -0.17513291537761688,
+        0.22105705738067627,
+        0.3226463794708252,
+        0.09850790351629257,
+        -0.4044247269630432,
+        -0.6237908601760864,
+        -0.4679968059062958,
+        -0.1954391747713089,
+        0.09878316521644592,
+        -0.004430827684700489,
+        -0.31550562381744385,
+        -0.5235733985900879,
+        -0.4510284662246704,
+        -0.13843706250190735,
+        0.10064390301704407,
+        -0.006748788990080357,
+        -0.12714813649654388,
+        -0.2107744812965393,
+        -0.18755048513412476,
+        -0.05646044388413429,
+        0.12781813740730286,
+        0.18928050994873047,
+        -0.04337320104241371,
+        -0.04973407834768295,
+        -0.04690375551581383,
+        0.0245530866086483,
+        0.10698680579662323,
+        0.1646823137998581,
+        0.081840381026268,
+        -0.01471243891865015,
+        -0.03138890117406845,
+        -0.04195617139339447,
+        0.012708203867077827,
+        0.033312954008579254,
+        0.02409377694129944,
+        -0.0036440726835280657,
+        -0.06239784508943558,
+        0.0037516560405492783,
+        0.11261500418186188,
+        0.13069754838943481,
+        0.05901307612657547,
+        0.048614490777254105,
+        -0.027712708339095116,
+        0.027247682213783264,
+        0.19195327162742615,
+        0.2688453793525696,
+        0.1509387195110321,
+        0.020540937781333923,
+        -0.004100556951016188,
+        -0.012650247663259506,
+        0.039176344871520996,
+        0.09037251025438309,
+        -0.004689970053732395,
+        -0.23859903216362,
+        -0.2364242821931839,
+        -0.15189304947853088,
+        -0.0761493444442749,
+        -0.0028172829188406467,
+        -0.04328106716275215,
+        -0.16187387704849243,
+        -0.21743592619895935,
+        -0.1282283067703247,
+        -0.024501819163560867,
+        0.04029383510351181,
+        -0.027387680485844612,
+        -0.05414740741252899,
+        -0.08344019204378128,
+        -0.06591048091650009,
+        0.012637111358344555,
+        0.06905930489301682,
+        0.08426016569137573,
+        -0.0030199100729078054,
+        0.034059297293424606,
+        0.01111840270459652,
+        0.013492933474481106,
+        0.0674189031124115,
+        0.08242739737033844,
+        0.006129032466560602,
+        -0.07763395458459854,
+        -0.03002289868891239,
+        -0.055725954473018646,
+        0.008795201778411865,
+        0.02994825504720211,
+        -0.06114519387483597,
+        -0.0560108907520771,
+        -0.008179228752851486,
+        -0.07149285078048706,
+        -0.02700420655310154,
+        -0.01306728646159172,
+        0.06276566535234451,
+        0.007125973701477051,
+        -0.03540417551994324,
+        -0.039717916399240494,
+        0.009147526696324348,
+        -0.06517947465181351,
+        0.0720859095454216,
+        -0.05035398155450821,
+        0.06659520417451859,
+        -0.01841895841062069,
+        0.004233633633702993,
+        -0.020911216735839844,
+        -0.004646372981369495,
+        1.6690073013305664,
+        0.4517613649368286,
+        -0.07667035609483719,
+        0.005556757096201181,
+        -0.02638973295688629,
+        0.044588603079319,
+        -0.020916732028126717,
+        0.2571280598640442,
+        -0.009559552185237408,
+        -0.043380800634622574,
+        0.03196016326546669,
+        -0.03783237189054489,
+        -0.03076902963221073,
+        0.03180111199617386,
+        0.06352709978818893,
+        0.020281998440623283,
+        -0.00741154421120882,
+        -0.0009214285528287292,
+        -0.0476187989115715,
+        -0.07208544760942459,
+        -0.05323023349046707,
+        -0.011103631928563118,
+        0.02877136506140232,
+        -0.05324484035372734,
+        -0.10076326876878738,
+        0.026193000376224518,
+        0.03536469116806984,
+        0.045722659677267075,
+        -0.03756006807088852,
+        0.022998394444584846,
+        0.0019359687576070428,
+        0.01654801517724991,
+        0.047304198145866394,
+        -0.08431598544120789,
+        -0.0645647644996643,
+        -0.17326746881008148,
+        -0.10692577064037323,
+        -0.08416426181793213,
+        -0.04107839986681938,
+        -0.0012680464424192905,
+        -0.02600814774632454,
+        -0.014215772971510887,
+        0.2114446610212326,
+        -0.040954578667879105,
+        -0.05050172284245491,
+        0.004194092936813831,
+        -0.0025900816544890404,
+        -0.1359374076128006,
+        0.03946976363658905,
+        2.3023669719696045,
+        0.7484877109527588,
+        -0.1994970589876175,
+        -0.06490366160869598,
+        0.007983183488249779,
+        -0.017937449738383293,
+        -0.12516839802265167,
+        0.3313288688659668,
+        0.11946671456098557,
+        -0.16942338645458221,
+        -0.007721045054495335,
+        0.02824605070054531,
+        -0.05310647189617157,
+        -0.1122083067893982,
+        -0.17094524204730988,
+        -0.08465421944856644,
+        -0.09679102897644043,
+        -0.03848385065793991,
+        0.040121182799339294,
+        -0.06661732494831085,
+        0.0005764663219451904,
+        -0.05729356408119202,
+        -0.04778655245900154,
+        -0.034835152328014374,
+        -0.07634143531322479,
+        -0.05054831504821777,
+        0.00597620103508234,
+        0.04499154910445213,
+        -0.03308190405368805,
+        -0.04915233701467514,
+        -0.05842791870236397,
+        0.003590918146073818,
+        0.055837079882621765,
+        -0.02547842636704445,
+        -0.018847621977329254,
+        -0.2073899656534195,
+        -0.14987564086914062,
+        -0.03971748799085617,
+        0.05886378139257431,
+        0.020922083407640457,
+        -0.039155181497335434,
+        -0.028855402022600174,
+        0.08688661456108093,
+        -0.1402827501296997,
+        -0.05810496211051941,
+        0.037841811776161194,
+        -0.04082907736301422,
+        -0.1191127747297287,
+        -0.10852136462926865,
+        1.6274418830871582,
+        0.3678200840950012,
+        -0.2865799367427826,
+        -0.05291350558400154,
+        0.023858532309532166,
+        -0.046683818101882935,
+        -0.2307816743850708,
+        -0.001670230645686388,
+        -0.17716962099075317,
+        -0.16724731028079987,
+        0.040194038301706314,
+        -0.023075448349118233,
+        -0.01538322027772665,
+        -0.07914327085018158,
+        -0.19621343910694122,
+        -0.11628971993923187,
+        -0.05851752683520317,
+        0.06313594430685043,
+        0.017808571457862854,
+        0.02447943389415741,
+        0.048611078411340714,
+        -0.009247995913028717,
+        0.00789090245962143,
+        0.06673033535480499,
+        0.0661577433347702,
+        0.019111329689621925,
+        0.038164373487234116,
+        0.029342610388994217,
+        -0.03547409921884537,
+        -0.11017149686813354,
+        -0.11077891290187836,
+        0.001108204829506576,
+        -0.0330691784620285,
+        -0.05039837956428528,
+        0.017638904973864555,
+        0.277705579996109,
+        0.5606598258018494,
+        0.5469182133674622,
+        0.13591277599334717,
+        0.012421006336808205,
+        0.046348799020051956,
+        -0.02721901424229145,
+        -0.5645118355751038,
+        -1.072814702987671,
+        -0.9852984547615051,
+        -0.3608386516571045,
+        -0.010197073221206665,
+        -0.09785731136798859,
+        -0.02597353421151638,
+        0.4627133309841156,
+        1.1483618021011353,
+        0.9505703449249268,
+        0.17471027374267578,
+        -0.016467586159706116,
+        0.026623696088790894,
+        0.04765752702951431,
+        -0.4000166058540344,
+        -0.8956774473190308,
+        -0.6268588304519653,
+        -0.09439487755298615,
+        0.02861764468252659,
+        -0.004155704285949469,
+        0.08989865332841873,
+        0.27384331822395325,
+        0.6518518328666687,
+        0.4184596836566925,
+        0.13106893002986908,
+        0.0050344159826636314,
+        0.007061495911329985,
+        -0.016157688573002815,
+        -0.1364346295595169,
+        -0.27324289083480835,
+        -0.14245718717575073,
+        -0.04623992741107941,
+        -0.015541884116828442,
+        0.030779436230659485,
+        0.03756715729832649,
+        0.01957445964217186,
+        -0.04964561015367508,
+        -0.0211405660957098,
+        0.044496409595012665,
+        -0.026335055008530617,
+        -0.11620140820741653,
+        -0.11803250014781952,
+        0.18242181837558746,
+        0.5057784914970398,
+        0.5045838952064514,
+        0.03748183697462082,
+        0.05692485347390175,
+        0.1608155369758606,
+        0.02245517633855343,
+        -0.7651812434196472,
+        -1.5504053831100464,
+        -1.3563542366027832,
+        -0.4314505457878113,
+        -0.028384560719132423,
+        -0.12238024920225143,
+        0.106974296271801,
+        1.11427903175354,
+        2.173083543777466,
+        1.747692346572876,
+        0.5455064177513123,
+        0.03363418206572533,
+        0.11388687789440155,
+        -0.05905687436461449,
+        -0.8059568405151367,
+        -1.6196117401123047,
+        -1.1898213624954224,
+        -0.2654758095741272,
+        -0.004251840524375439,
+        -0.0916782096028328,
+        -0.024067873135209084,
+        0.22692462801933289,
+        0.6695711612701416,
+        0.3673460781574249,
+        -0.017016466706991196,
+        -0.029604146257042885,
+        0.020365707576274872,
+        0.03215239942073822,
+        0.0070981839671730995,
+        -0.14026938378810883,
+        -0.02425236999988556,
+        0.059152450412511826,
+        -0.006319367326796055,
+        0.003989882301539183,
+        0.048541076481342316,
+        0.003988460637629032,
+        -0.03105335496366024,
+        -0.08329232037067413,
+        0.03226872906088829,
+        0.02119620516896248,
+        -0.0953872874379158,
+        -0.15174035727977753,
+        0.07963212579488754,
+        0.29094186425209045,
+        0.2690921127796173,
+        -0.020104877650737762,
+        0.024988379329442978,
+        0.15326620638370514,
+        0.1256464123725891,
+        -0.40941280126571655,
+        -0.946648120880127,
+        -0.8358487486839294,
+        -0.14284957945346832,
+        -0.07980851829051971,
+        -0.1435413807630539,
+        0.038134895265102386,
+        0.8021518588066101,
+        1.552701473236084,
+        1.2496209144592285,
+        0.38152581453323364,
+        0.07136060297489166,
+        0.14329172670841217,
+        -0.06546801328659058,
+        -0.5923707485198975,
+        -1.253793478012085,
+        -0.9458200335502625,
+        -0.156633198261261,
+        -0.04217473417520523,
+        -0.11199303716421127,
+        -0.07520301640033722,
+        0.15331010520458221,
+        0.4794600307941437,
+        0.2449675053358078,
+        -0.10396319627761841,
+        0.0034801275469362736,
+        0.04475663974881172,
+        0.024035215377807617,
+        0.056806568056344986,
+        -0.07363307476043701,
+        -0.001563104335218668,
+        0.05157755687832832,
+        0.043718185275793076,
+        0.02102719619870186,
+        0.11859089881181717,
+        0.08675580471754074,
+        -0.13180124759674072,
+        -0.15522590279579163,
+        0.03273458778858185,
+        -0.0019622649997472763,
+        0.1011638194322586,
+        -0.10800585150718689,
+        -0.6884365677833557,
+        -0.5495791435241699,
+        0.0780424103140831,
+        0.33674973249435425,
+        -0.21274283528327942,
+        -0.4183696210384369,
+        -0.8053947687149048,
+        0.03347628563642502,
+        1.3938312530517578,
+        0.9454176425933838,
+        -0.012210174463689327,
+        0.04924672842025757,
+        0.16284359991550446,
+        1.1340152025222778,
+        2.0020322799682617,
+        0.2796843647956848,
+        -0.968036413192749,
+        -0.5768532752990723,
+        0.17757350206375122,
+        0.37485063076019287,
+        0.11534234136343002,
+        -1.2916942834854126,
+        -1.692176103591919,
+        -0.30523377656936646,
+        0.14307916164398193,
+        0.03928302228450775,
+        -0.19196964800357819,
+        -0.4533900022506714,
+        -0.3294944167137146,
+        0.5480389595031738,
+        0.4497548043727875,
+        0.2170887440443039,
+        -0.05817069113254547,
+        -0.06957870721817017,
+        0.03169052675366402,
+        0.23751793801784515,
+        0.0823391005396843,
+        -0.04811413958668709,
+        -0.051265716552734375,
+        -0.0395645909011364,
+        -0.03849785774946213,
+        0.04607917368412018,
+        0.09946659207344055,
+        -0.029992828145623207,
+        -0.05369366332888603,
+        -0.005230880342423916,
+        0.012808755040168762,
+        0.1821947544813156,
+        0.05478882044553757,
+        -0.47736144065856934,
+        -0.44480830430984497,
+        -0.036321353167295456,
+        0.13646431267261505,
+        -0.04045571759343147,
+        -0.21837295591831207,
+        -0.6888197660446167,
+        -0.08431777358055115,
+        0.96018385887146,
+        0.6788493990898132,
+        0.011028020642697811,
+        0.05917810648679733,
+        0.02488739602267742,
+        0.6898419857025146,
+        1.4259209632873535,
+        0.13193827867507935,
+        -0.8078985810279846,
+        -0.31056249141693115,
+        0.018122224137187004,
+        0.137860506772995,
+        0.051947757601737976,
+        -0.9757952094078064,
+        -1.1060559749603271,
+        0.06675099581480026,
+        0.2091575562953949,
+        -0.029623042792081833,
+        -0.0705878809094429,
+        -0.18514159321784973,
+        -0.07947035878896713,
+        0.5719470381736755,
+        0.2286168485879898,
+        -0.03433626517653465,
+        0.0036030709743499756,
+        0.006251791957765818,
+        0.04144154116511345,
+        0.08598234504461288,
+        -0.050599172711372375,
+        -0.10440917313098907,
+        -0.02927244082093239,
+        -0.04102599248290062,
+        -0.07101748138666153,
+        -0.03579306975007057,
+        0.03586365282535553,
+        0.06752362847328186,
+        0.048901572823524475,
+        -0.020898710936307907,
+        -0.009411930106580257,
+        0.10169848799705505,
+        0.1812015175819397,
+        -0.014482695609331131,
+        -0.12548771500587463,
+        -0.060731250792741776,
+        -0.034499138593673706,
+        0.0829617902636528,
+        0.04616715386509895,
+        -0.20867496728897095,
+        -0.1990129053592682,
+        0.1773940473794937,
+        0.13156233727931976,
+        -0.03437860682606697,
+        0.04012921825051308,
+        -0.11132699251174927,
+        -0.023460939526557922,
+        0.2713286876678467,
+        -0.06662362813949585,
+        -0.2709292471408844,
+        -0.0030232456047087908,
+        -0.10379529744386673,
+        -0.07136038690805435,
+        0.03757762163877487,
+        -0.20515622198581696,
+        -0.1231834888458252,
+        0.26915228366851807,
+        0.0998353362083435,
+        -0.031466737389564514,
+        0.04657471179962158,
+        0.07664929330348969,
+        0.10308870673179626,
+        0.23429608345031738,
+        -0.06942534446716309,
+        -0.09051290899515152,
+        0.03243685141205788,
+        0.04053235426545143,
+        -0.021392958238720894,
+        -0.05330868810415268,
+        -0.11525140702724457,
+        -0.03889385238289833,
+        0.01636480540037155,
+        -0.009352890774607658,
+        0.13151532411575317,
+        -0.14738643169403076,
+        -0.18289834260940552,
+        0.15955400466918945,
+        -0.001023759599775076,
+        0.028809679672122,
+        0.012261062860488892,
+        0.29654747247695923,
+        -0.285063236951828,
+        -0.40187928080558777,
+        0.3713407516479492,
+        0.009383893571794033,
+        -0.023022817447781563,
+        -0.003799814498052001,
+        0.48470190167427063,
+        -0.43402406573295593,
+        -0.5858806371688843,
+        0.5751441717147827,
+        0.05045031011104584,
+        -0.05559438094496727,
+        -0.02045449987053871,
+        0.5281224250793457,
+        -0.5058223605155945,
+        -0.5950849056243896,
+        0.6492323279380798,
+        0.013408469036221504,
+        -0.05940670147538185,
+        -0.0044364179484546185,
+        0.3112560212612152,
+        -0.34908774495124817,
+        -0.42427319288253784,
+        0.43349501490592957,
+        0.03724945709109306,
+        -0.05263671651482582,
+        -0.010485195554792881,
+        0.1261255145072937,
+        -0.1349790245294571,
+        -0.2524855136871338,
+        0.24608080089092255,
+        0.036001257598400116,
+        -0.028843939304351807,
+        0.0056989979930222034,
+        0.04458172619342804,
+        -0.06122935935854912,
+        -0.166972354054451,
+        0.14557687938213348,
+        0.018050044775009155,
+        0.032598987221717834,
+        -0.0055792503990232944,
+        0.24355076253414154,
+        -0.21433626115322113,
+        -0.29646870493888855,
+        0.1958809792995453,
+        0.015435033477842808,
+        0.05235098674893379,
+        0.010786890983581543,
+        0.47903597354888916,
+        -0.4127257168292999,
+        -0.6203306317329407,
+        0.47024452686309814,
+        0.0823090448975563,
+        -0.04538045823574066,
+        -0.004072466865181923,
+        0.7509317994117737,
+        -0.6508772969245911,
+        -0.8481631278991699,
+        0.7875698208808899,
+        0.0966777428984642,
+        -0.10461349785327911,
+        0.0063789174892008305,
+        0.7535857558250427,
+        -0.8082649111747742,
+        -0.8165622353553772,
+        0.9064085483551025,
+        0.04986630380153656,
+        -0.10200339555740356,
+        0.0314355194568634,
+        0.46324053406715393,
+        -0.5523763298988342,
+        -0.5632953643798828,
+        0.6378755569458008,
+        0.07833302766084671,
+        -0.07979781180620193,
+        0.031164664775133133,
+        0.1967470794916153,
+        -0.21681970357894897,
+        -0.29283079504966736,
+        0.3367702066898346,
+        0.034929461777210236,
+        -0.047199901193380356,
+        -0.0033645557705312967,
+        0.05454660952091217,
+        -0.11264829337596893,
+        -0.190998375415802,
+        0.17961400747299194,
+        0.0009085010970011353,
+        -0.0001827089727157727,
+        0.04841821268200874,
+        0.019923821091651917,
+        -0.07004066556692123,
+        -0.10590090602636337,
+        0.054114967584609985,
+        0.04302384704351425,
+        0.00462615629658103,
+        0.022948985919356346,
+        0.1673787385225296,
+        -0.1319379210472107,
+        -0.2711219787597656,
+        0.2387620061635971,
+        0.05667697265744209,
+        -0.018639734014868736,
+        -0.07672597467899323,
+        0.3503187298774719,
+        -0.2981504797935486,
+        -0.38647517561912537,
+        0.4072522521018982,
+        0.010913677513599396,
+        -0.05246961489319801,
+        -0.04058554396033287,
+        0.39216771721839905,
+        -0.3605193495750427,
+        -0.34857264161109924,
+        0.46899959444999695,
+        -0.03358001261949539,
+        -0.05188553035259247,
+        -0.023204902186989784,
+        0.17140533030033112,
+        -0.2120431810617447,
+        -0.2144550085067749,
+        0.2837989032268524,
+        -0.0191226527094841,
+        -0.020922169089317322,
+        0.004324179142713547,
+        0.038136694580316544,
+        -0.042803723365068436,
+        -0.11487454175949097,
+        0.11820490658283234,
+        0.003412557765841484,
+        0.0035020115319639444,
+        0.03646541014313698,
+        -0.010104459710419178,
+        -0.010897459462285042,
+        -0.09292570501565933,
+        0.06823977828025818,
+        0.02677192911505699,
+        0.020071662962436676,
+        0.005776307079941034,
+        0.02613351307809353,
+        0.017107944935560226,
+        -0.0002623539185151458,
+        -0.039298396557569504,
+        -0.0314190648496151,
+        -0.019773684442043304,
+        -0.01924789510667324,
+        0.04253160580992699,
+        0.09694722294807434,
+        0.1925637573003769,
+        0.1901547759771347,
+        0.09470294415950775,
+        -0.00296174269169569,
+        -0.03602522239089012,
+        0.03572473302483559,
+        0.08787581324577332,
+        0.1773553043603897,
+        0.20970025658607483,
+        0.14899243414402008,
+        0.05427362397313118,
+        -0.032429151237010956,
+        0.023915717378258705,
+        0.06557436287403107,
+        0.13488733768463135,
+        0.17550915479660034,
+        0.17485061287879944,
+        0.10260436683893204,
+        -0.005381361581385136,
+        -0.05573735386133194,
+        -0.09410752356052399,
+        -0.07940010726451874,
+        -0.03424998000264168,
+        0.007975265383720398,
+        0.028827181085944176,
+        0.023788832128047943,
+        -0.02962818741798401,
+        -0.13474339246749878,
+        -0.22529757022857666,
+        -0.20413516461849213,
+        -0.14711618423461914,
+        -0.05960607901215553,
+        0.04579121991991997,
+        0.005325576290488243,
+        -0.11592217534780502,
+        -0.2260522097349167,
+        -0.2467145025730133,
+        -0.22054187953472137,
+        -0.13919179141521454,
+        0.0016459478065371513,
+        0.0515579916536808,
+        0.060555730015039444,
+        0.040788713842630386,
+        -0.017907800152897835,
+        -0.026459651067852974,
+        -0.02488812990486622,
+        0.015644825994968414,
+        0.10543125867843628,
+        0.19312354922294617,
+        0.28380078077316284,
+        0.28878358006477356,
+        0.16968156397342682,
+        0.04848042502999306,
+        -0.00986899808049202,
+        0.06337545067071915,
+        0.16356752812862396,
+        0.2444516271352768,
+        0.29273414611816406,
+        0.2314801961183548,
+        0.12695762515068054,
+        -0.022283215075731277,
+        0.018402203917503357,
+        0.07152476161718369,
+        0.14247483015060425,
+        0.18759845197200775,
+        0.20828258991241455,
+        0.14114585518836975,
+        -0.047197990119457245,
+        -0.13794781267642975,
+        -0.17509934306144714,
+        -0.1696663200855255,
+        -0.1206701323390007,
+        -0.036128126084804535,
+        0.007180679589509964,
+        0.006984225939959288,
+        -0.09600912779569626,
+        -0.22975720465183258,
+        -0.33287662267684937,
+        -0.2942708134651184,
+        -0.20305578410625458,
+        -0.08411446958780289,
+        0.042896877974271774,
+        -0.020053744316101074,
+        -0.16365791857242584,
+        -0.3145587742328644,
+        -0.3321540057659149,
+        -0.2667454183101654,
+        -0.1542910486459732,
+        -0.006954069249331951,
+        0.020191870629787445,
+        0.014010002836585045,
+        0.0016916356980800629,
+        -0.04649524390697479,
+        -0.014931428246200085,
+        -0.017954425886273384,
+        -0.020003901794552803,
+        0.03831968829035759,
+        0.08447518199682236,
+        0.14068123698234558,
+        0.13400419056415558,
+        0.08205568045377731,
+        -0.0004489773709792644,
+        -0.019211264327168465,
+        0.023363608866930008,
+        0.08738930523395538,
+        0.12299696356058121,
+        0.13070489466190338,
+        0.09040816128253937,
+        0.03286544978618622,
+        -0.006979941390454769,
+        -0.0010930931894108653,
+        0.04313739389181137,
+        0.10121051222085953,
+        0.11390950530767441,
+        0.11383924633264542,
+        0.06694260239601135,
+        -0.00425445893779397,
+        -0.0666416585445404,
+        -0.09225274622440338,
+        -0.0977785512804985,
+        -0.07118111103773117,
+        -0.026749763637781143,
+        -0.019425569102168083,
+        0.03321055322885513,
+        -0.0033978468272835016,
+        -0.08309262245893478,
+        -0.15557922422885895,
+        -0.14969374239444733,
+        -0.07188998907804489,
+        -0.018716221675276756,
+        0.022834330797195435,
+        0.004232254344969988,
+        -0.04141783341765404,
+        -0.125192329287529,
+        -0.14545302093029022,
+        -0.12225300818681717,
+        -0.05844716727733612,
+        0.010607236064970493,
+        0.024218380451202393,
+        -0.002702374942600727,
+        -0.030814893543720245,
+        0.03507756441831589,
+        -0.0506589449942112,
+        0.03415676951408386,
+        0.0011444400297477841,
+        0.0026324463542550802,
+        0.028514407575130463,
+        -0.01849454641342163,
+        -0.030959082767367363,
+        -0.05565863475203514,
+        0.05771413818001747,
+        0.003916156478226185,
+        -0.004474544432014227,
+        0.04403551295399666,
+        0.1733711212873459,
+        -0.37650829553604126,
+        0.22322984039783478,
+        0.0032540319953113794,
+        -0.01139416079968214,
+        -0.039046600461006165,
+        0.0021948080975562334,
+        0.5777754783630371,
+        -1.1944804191589355,
+        0.769478976726532,
+        -0.1349843591451645,
+        0.0004430754925124347,
+        -0.0061850035563111305,
+        -0.08340868353843689,
+        0.8327823877334595,
+        -1.649588942527771,
+        1.126111388206482,
+        -0.2918313145637512,
+        0.003614947199821472,
+        0.0016799914883449674,
+        -0.03255167230963707,
+        0.6123784184455872,
+        -1.1993682384490967,
+        0.8305437564849854,
+        -0.13622376322746277,
+        0.00905851274728775,
+        -0.006772476714104414,
+        0.07578610628843307,
+        0.05859832838177681,
+        -0.4543764293193817,
+        0.26330503821372986,
+        0.0259060300886631,
+        -0.0007997890934348106,
+        0.01269856933504343,
+        0.006897627376019955,
+        -0.02491801232099533,
+        -0.03139931708574295,
+        0.0028456314466893673,
+        0.0008253560517914593,
+        -0.01086023822426796,
+        -0.004186873324215412,
+        0.06299160420894623,
+        -0.039931319653987885,
+        -0.09315146505832672,
+        0.05495935305953026,
+        0.027547571808099747,
+        -0.010900916531682014,
+        -0.025233760476112366,
+        0.060600072145462036,
+        0.21010243892669678,
+        -0.5445898771286011,
+        0.35070353746414185,
+        -0.033771682530641556,
+        -0.0269146841019392,
+        -0.025363197550177574,
+        -0.021729450672864914,
+        0.70921790599823,
+        -1.4368270635604858,
+        0.9582043290138245,
+        -0.1708265244960785,
+        0.010022420436143875,
+        -0.032301150262355804,
+        -0.08667651563882828,
+        1.0338889360427856,
+        -1.913576364517212,
+        1.262008547782898,
+        -0.23795078694820404,
+        -0.032233912497758865,
+        -0.01397701445966959,
+        -0.05402921140193939,
+        0.7621430158615112,
+        -1.387437343597412,
+        0.8621506094932556,
+        -0.14765247702598572,
+        -0.004747485741972923,
+        0.0017516895895823836,
+        0.08154146373271942,
+        0.16601374745368958,
+        -0.5324177742004395,
+        0.27442997694015503,
+        0.03274058923125267,
+        -0.008812552317976952,
+        0.005774920806288719,
+        0.04165825620293617,
+        -0.011749272234737873,
+        -0.01953396573662758,
+        -0.009672109968960285,
+        0.01170953270047903,
+        0.003071938641369343,
+        -0.018979815766215324,
+        0.062123894691467285,
+        -0.004921444226056337,
+        -0.03380037844181061,
+        0.01310884952545166,
+        0.007953890599310398,
+        -0.0012086924398317933,
+        -0.03317898139357567,
+        -0.0015596294542774558,
+        0.08166785538196564,
+        -0.2291223704814911,
+        0.11783571541309357,
+        -0.016078786924481392,
+        0.018957575783133507,
+        0.025793947279453278,
+        -0.09036394208669662,
+        0.3833881616592407,
+        -0.5794023871421814,
+        0.4610825777053833,
+        -0.14165280759334564,
+        -0.007412370759993792,
+        0.05252876877784729,
+        -0.21435455977916718,
+        0.6177686452865601,
+        -0.8516795635223389,
+        0.667263925075531,
+        -0.22572898864746094,
+        -0.004465761594474316,
+        0.02589319832623005,
+        -0.1893543303012848,
+        0.43213585019111633,
+        -0.6462821364402771,
+        0.434274822473526,
+        -0.15750259160995483,
+        -0.01198036689311266,
+        -2.4281514924950898e-05,
+        0.039562296122312546,
+        0.11126027256250381,
+        -0.23193514347076416,
+        0.1412443071603775,
+        -0.011839920654892921,
+        0.007880321703851223,
+        0.02950354479253292,
+        0.011689653620123863,
+        -0.07272310554981232,
+        -0.03319466486573219,
+        -0.003948990721255541,
+        0.03549842908978462,
+        -0.02165558747947216,
+        -0.09912239760160446,
+        -0.08742356300354004,
+        0.30591821670532227,
+        0.23934677243232727,
+        0.02658180706202984,
+        -0.022127188742160797,
+        -0.02769642136991024,
+        0.16399237513542175,
+        0.5140998959541321,
+        0.007951628416776657,
+        -0.5589093565940857,
+        -0.24106110632419586,
+        -0.02753414213657379,
+        0.06947467476129532,
+        0.048558495938777924,
+        -0.5370690822601318,
+        -0.761831521987915,
+        0.16272802650928497,
+        0.29426246881484985,
+        0.07943751662969589,
+        -0.022394873201847076,
+        -0.217612162232399,
+        -0.03093647211790085,
+        0.5945476293563843,
+        0.2873935103416443,
+        -0.16481661796569824,
+        -0.02931203693151474,
+        -0.029083512723445892,
+        0.06754925847053528,
+        0.20200076699256897,
+        -0.07271742075681686,
+        -0.1976277083158493,
+        -0.04189611226320267,
+        0.06403793394565582,
+        -0.00022445111244451255,
+        -0.01032529678195715,
+        -0.03415631130337715,
+        0.009091783314943314,
+        0.04317992925643921,
+        0.07196266949176788,
+        -0.025028688833117485,
+        -0.02722775563597679,
+        -0.017168480902910233,
+        -0.027666645124554634,
+        -0.06734028458595276,
+        0.10843724757432938,
+        0.08066407591104507,
+        -0.027849983423948288,
+        -0.0045820740051567554,
+        -0.03388727456331253,
+        0.16772156953811646,
+        0.651636004447937,
+        0.34874194860458374,
+        -0.1454945057630539,
+        -0.18056720495224,
+        0.11703842133283615,
+        0.43017855286598206,
+        0.7624525427818298,
+        -0.3420296907424927,
+        -1.272199273109436,
+        -0.5284644365310669,
+        -0.005667245015501976,
+        0.08240436762571335,
+        -0.13299596309661865,
+        -1.3164156675338745,
+        -1.659982442855835,
+        0.19898656010627747,
+        0.6253566741943359,
+        0.25137946009635925,
+        -0.18244975805282593,
+        -0.5360167622566223,
+        -0.06195700913667679,
+        1.2547520399093628,
+        1.0296341180801392,
+        0.10651036351919174,
+        -0.023540280759334564,
+        -0.07594245672225952,
+        0.1492130160331726,
+        0.5033117532730103,
+        0.09394379705190659,
+        -0.22459803521633148,
+        -0.22473134100437164,
+        -0.04738321527838707,
+        0.04127531498670578,
+        0.0682951882481575,
+        -0.02095615118741989,
+        -0.1233135387301445,
+        -0.10028401762247086,
+        -0.008111395873129368,
+        -0.000617706507910043,
+        0.018859047442674637,
+        0.028446361422538757,
+        -0.06159031391143799,
+        -0.1292838156223297,
+        0.051308393478393555,
+        0.11001072078943253,
+        -0.02056661807000637,
+        -0.012175443582236767,
+        -0.1313694268465042,
+        0.0067574759013950825,
+        0.4612729251384735,
+        0.323080450296402,
+        -0.09392253309488297,
+        -0.1256203055381775,
+        0.03537299111485481,
+        0.2556088864803314,
+        0.6467183232307434,
+        -0.16340143978595734,
+        -0.8799455165863037,
+        -0.3312987685203552,
+        0.01464154850691557,
+        0.07046713680028915,
+        0.053634822368621826,
+        -0.8514915108680725,
+        -1.176972508430481,
+        0.2056443840265274,
+        0.4998764395713806,
+        0.1268644779920578,
+        -0.10905193537473679,
+        -0.3750888705253601,
+        -0.06701061874628067,
+        0.9052186608314514,
+        0.6792045831680298,
+        -0.00323892361484468,
+        -0.0007412935374304652,
+        -0.03608793020248413,
+        0.1009129211306572,
+        0.36775916814804077,
+        0.035214491188526154,
+        -0.2273784875869751,
+        -0.15815992653369904,
+        -0.004773923195898533,
+        0.06374036520719528,
+        0.04737555980682373,
+        -0.0563247986137867,
+        -0.09587392956018448,
+        -0.043853096663951874,
+        0.032572731375694275,
+        -0.0036250585690140724,
+        0.07889056205749512,
+        -0.03589344769716263,
+        -0.019771328195929527,
+        0.04937156289815903,
+        0.039052557200193405,
+        -0.013377528637647629,
+        -0.0841481015086174,
+        -0.03358105197548866,
+        -0.2128981053829193,
+        -0.14468812942504883,
+        0.14675867557525635,
+        0.2550889551639557,
+        0.22369499504566193,
+        -0.0032973098568618298,
+        0.006679064594209194,
+        -0.11752036958932877,
+        0.025247232988476753,
+        0.23064176738262177,
+        0.25043538212776184,
+        0.3474777638912201,
+        0.2151806503534317,
+        0.051294319331645966,
+        0.16301114857196808,
+        0.25422143936157227,
+        -0.1796918362379074,
+        -0.6128425598144531,
+        -0.42049655318260193,
+        0.07740531116724014,
+        -0.007960617542266846,
+        0.2504507601261139,
+        0.2932300865650177,
+        -0.5157915949821472,
+        -1.2904177904129028,
+        -1.0362532138824463,
+        -0.22443994879722595,
+        0.007411653641611338,
+        0.16024430096149445,
+        0.33939966559410095,
+        -0.2748318016529083,
+        -0.8487470149993896,
+        -0.5955387949943542,
+        0.033155132085084915,
+        -0.09185351431369781,
+        -0.05639262869954109,
+        0.17084303498268127,
+        0.11292264610528946,
+        -0.046329669654369354,
+        0.11495561897754669,
+        0.31740760803222656,
+        -0.13903948664665222,
+        0.05507560819387436,
+        0.10180198401212692,
+        -0.1369788944721222,
+        -0.10618618875741959,
+        -0.001083499751985073,
+        0.16340164840221405,
+        0.07591762393712997,
+        0.3417445123195648,
+        0.27897438406944275,
+        -0.32192930579185486,
+        -0.5731648206710815,
+        -0.46150147914886475,
+        -0.03230089321732521,
+        0.04096771031618118,
+        0.22242987155914307,
+        0.027000218629837036,
+        -0.4113498628139496,
+        -0.433158278465271,
+        -0.5252256393432617,
+        -0.3510502874851227,
+        -0.133863165974617,
+        -0.38554033637046814,
+        -0.45547229051589966,
+        0.2475612610578537,
+        1.154951572418213,
+        0.8282179236412048,
+        -0.13197137415409088,
+        -0.03350961208343506,
+        -0.5282800197601318,
+        -0.5297923684120178,
+        0.9037952423095703,
+        2.516275405883789,
+        2.086421489715576,
+        0.3573826849460602,
+        -0.010694397613406181,
+        -0.31418153643608093,
+        -0.5325371026992798,
+        0.48083701729774475,
+        1.7732245922088623,
+        1.2747145891189575,
+        -0.06401863694190979,
+        0.14296381175518036,
+        0.07267159968614578,
+        -0.28001847863197327,
+        -0.29204103350639343,
+        0.12853951752185822,
+        -0.1998838633298874,
+        -0.6375644207000732,
+        0.06310836225748062,
+        -0.020014479756355286,
+        -0.08150970935821533,
+        0.08175478130578995,
+        0.07667485624551773,
+        0.0025236753281205893,
+        -0.08504530042409897,
+        -0.035742271691560745,
+        -0.1332666128873825,
+        -0.15150736272335052,
+        0.18459312617778778,
+        0.3363596200942993,
+        0.2501969635486603,
+        0.029292423278093338,
+        -0.060296736657619476,
+        -0.1142202764749527,
+        -0.05918247997760773,
+        0.18826954066753387,
+        0.2183520495891571,
+        0.21247169375419617,
+        0.14935970306396484,
+        0.09923429787158966,
+        0.21808095276355743,
+        0.21930061280727386,
+        -0.060535889118909836,
+        -0.5729222297668457,
+        -0.4199080169200897,
+        0.058897778391838074,
+        0.050647757947444916,
+        0.2784770131111145,
+        0.2754706144332886,
+        -0.40136128664016724,
+        -1.3269731998443604,
+        -1.124815583229065,
+        -0.11878778040409088,
+        -0.005137663800269365,
+        0.17839783430099487,
+        0.2115524858236313,
+        -0.24165289103984833,
+        -0.9655010104179382,
+        -0.7425088286399841,
+        0.0304054357111454,
+        -0.07012742757797241,
+        -0.015557953156530857,
+        0.1128007024526596,
+        0.18957749009132385,
+        -0.07996463775634766,
+        0.09505810588598251,
+        0.34419506788253784,
+        -0.3072076439857483,
+        0.03868290036916733,
+        0.11494885385036469,
+        0.03748936951160431,
+        0.0797261893749237,
+        -0.003397951368242502,
+        -0.07380004972219467,
+        -0.11507676541805267,
+        -0.10298885405063629,
+        0.10698320716619492,
+        0.06602972000837326,
+        0.08226803690195084,
+        0.0037747276946902275,
+        -0.162277951836586,
+        0.01671667955815792,
+        0.09137773513793945,
+        0.18799471855163574,
+        0.04144813120365143,
+        0.1285877376794815,
+        0.1820434182882309,
+        0.04940629005432129,
+        0.0991915687918663,
+        0.10219171643257141,
+        -0.013141660951077938,
+        -0.051191627979278564,
+        0.05468929558992386,
+        0.087598517537117,
+        0.15897324681282043,
+        0.11863455921411514,
+        -0.00814050156623125,
+        -0.07701541483402252,
+        -0.14013728499412537,
+        -0.044140227138996124,
+        -0.05328791216015816,
+        0.06760499626398087,
+        0.12053386867046356,
+        0.09780212491750717,
+        -0.053725965321063995,
+        -0.07915244251489639,
+        -0.0032519602682441473,
+        0.019637396559119225,
+        0.07848430424928665,
+        0.019138827919960022,
+        0.1460287868976593,
+        0.1281038075685501,
+        0.024417784065008163,
+        0.059176862239837646,
+        0.0658111497759819,
+        -0.016405148431658745,
+        -0.18877744674682617,
+        0.16666102409362793,
+        0.1610611230134964,
+        0.08374520391225815,
+        0.11570518463850021,
+        0.11903064697980881,
+        0.1294964700937271,
+        0.06379758566617966,
+        0.08417274057865143,
+        0.12754113972187042,
+        0.025328608229756355,
+        0.05170705169439316,
+        0.0835295170545578,
+        0.07477264851331711,
+        0.11244285851716995,
+        0.11559426784515381,
+        0.045258160680532455,
+        -0.14825093746185303,
+        -0.08153342455625534,
+        0.06288623809814453,
+        0.11952362209558487,
+        0.11784297972917557,
+        0.011141132563352585,
+        -0.21666541695594788,
+        -0.29976174235343933,
+        -0.2279169261455536,
+        -0.11828474700450897,
+        0.12436322867870331,
+        0.10465826094150543,
+        -0.09751085937023163,
+        -0.292611300945282,
+        -0.37374064326286316,
+        -0.31437963247299194,
+        -0.25637903809547424,
+        0.06173908710479736,
+        0.14131486415863037,
+        0.008434675633907318,
+        -0.23816508054733276,
+        -0.30330890417099,
+        -0.22094152867794037,
+        -0.11608295142650604,
+        0.13235151767730713,
+        0.15353602170944214,
+        0.15839524567127228,
+        0.012247815728187561,
+        -0.08126968890428543,
+        -0.003756331978365779,
+        0.10660683363676071,
+        0.21976575255393982,
+        -0.04188326746225357,
+        0.15462253987789154,
+        0.06303395330905914,
+        0.006879634689539671,
+        0.008284888230264187,
+        0.07084798067808151,
+        0.1211942657828331,
+        0.10190404951572418,
+        0.02935362420976162,
+        -0.05645999684929848,
+        -0.16800500452518463,
+        -0.1850246787071228,
+        -0.09476880729198456,
+        -0.025327544659376144,
+        0.054355036467313766,
+        -0.035813912749290466,
+        -0.18694879114627838,
+        -0.34871891140937805,
+        -0.3151862621307373,
+        -0.1943007856607437,
+        -0.09755205363035202,
+        0.014881589449942112,
+        -0.14875493943691254,
+        -0.37112873792648315,
+        -0.37739917635917664,
+        -0.3241480886936188,
+        -0.2915399968624115,
+        -0.11268249899148941,
+        -0.019726404920220375,
+        -0.2510305941104889,
+        -0.38005372881889343,
+        -0.3622463345527649,
+        -0.2932804226875305,
+        -0.28574010729789734,
+        -0.1505027860403061,
+        -0.004947682376950979,
+        -0.18587322533130646,
+        -0.34759166836738586,
+        -0.28965193033218384,
+        -0.21052972972393036,
+        -0.18780536949634552,
+        -0.07400713860988617,
+        0.11154936999082565,
+        -0.03556853160262108,
+        -0.1896934062242508,
+        -0.18135806918144226,
+        -0.10117948800325394,
+        -0.0393117293715477,
+        0.06517928093671799,
+        -0.016659021377563477,
+        -0.011290309950709343,
+        -0.007930322550237179,
+        0.008189777843654156,
+        0.03678786754608154,
+        0.021890517324209213,
+        0.0034292477648705244,
+        0.02200375869870186,
+        0.0014921070542186499,
+        -0.0800287202000618,
+        -0.17657361924648285,
+        -0.18702608346939087,
+        -0.12880444526672363,
+        -0.022084584459662437,
+        0.026420501992106438,
+        -0.023968446999788284,
+        -0.07948111742734909,
+        -0.16741475462913513,
+        -0.18733707070350647,
+        -0.16539834439754486,
+        -0.07347387820482254,
+        -0.009723886847496033,
+        -0.02016977220773697,
+        -0.061092622578144073,
+        -0.13145211338996887,
+        -0.15919029712677002,
+        -0.15043555200099945,
+        -0.10107766091823578,
+        0.0016151965828612447,
+        0.0627974420785904,
+        0.08695066720247269,
+        0.11727584898471832,
+        0.11745581030845642,
+        0.11329426616430283,
+        0.0533670075237751,
+        -0.016355818137526512,
+        0.008450252935290337,
+        0.06448577344417572,
+        0.1538505256175995,
+        0.21232697367668152,
+        0.14713847637176514,
+        0.039088234305381775,
+        -0.015588105656206608,
+        0.026483291760087013,
+        0.060862988233566284,
+        0.18265819549560547,
+        0.23042462766170502,
+        0.168768972158432,
+        0.034099943935871124,
+        -0.018249109387397766,
+        -0.0321880541741848,
+        -0.03254542127251625,
+        -0.03061222843825817,
+        -0.0026304698549211025,
+        0.017764942720532417,
+        0.010707704350352287,
+        0.009254949167370796,
+        -0.04533161595463753,
+        -0.1483704000711441,
+        -0.2637183666229248,
+        -0.2678598165512085,
+        -0.1737881749868393,
+        -0.049990858882665634,
+        0.013515918515622616,
+        -0.054345693439245224,
+        -0.1467861533164978,
+        -0.24911582469940186,
+        -0.2831358015537262,
+        -0.22300836443901062,
+        -0.13739243149757385,
+        -0.017879672348499298,
+        -0.040345460176467896,
+        -0.09990613907575607,
+        -0.16936856508255005,
+        -0.2266550064086914,
+        -0.2020808756351471,
+        -0.1509508341550827,
+        0.014163740910589695,
+        0.07591170817613602,
+        0.09185601025819778,
+        0.10455341637134552,
+        0.09514842182397842,
+        0.09877350926399231,
+        0.053898438811302185,
+        0.005704578943550587,
+        0.0591997392475605,
+        0.13600079715251923,
+        0.21777905523777008,
+        0.2574957311153412,
+        0.20117221772670746,
+        0.11415109038352966,
+        -0.001181072206236422,
+        0.09470006823539734,
+        0.18978413939476013,
+        0.3073742389678955,
+        0.36875811219215393,
+        0.3069853186607361,
+        0.1708926260471344,
+        -0.0325310118496418,
+        -0.02656698040664196,
+        0.016060845926404,
+        0.02459372952580452,
+        0.04165660962462425,
+        0.033969976007938385,
+        0.012855498120188713,
+        0.030497560277581215,
+        0.004896117839962244,
+        -0.030887477099895477,
+        -0.13454437255859375,
+        -0.1294785887002945,
+        -0.06398608535528183,
+        0.016156472265720367,
+        0.03577340394258499,
+        -0.0033482143189758062,
+        -0.07112833857536316,
+        -0.16465041041374207,
+        -0.1621057391166687,
+        -0.09478478878736496,
+        -0.03555302321910858,
+        -0.001592929707840085,
+        -0.01719600521028042,
+        -0.06598587334156036,
+        -0.1411861628293991,
+        -0.1496778130531311,
+        -0.11535074561834335,
+        -0.0905962884426117,
+        -0.013807609677314758,
+        0.029542237520217896,
+        0.039138730615377426,
+        0.03988270089030266,
+        0.02665030211210251,
+        0.049553126096725464,
+        -0.0015685928519815207,
+        -0.018007200211286545,
+        0.009533192962408066,
+        0.06910547614097595,
+        0.1034330427646637,
+        0.15017645061016083,
+        0.10221225768327713,
+        0.020978443324565887,
+        -0.023747621104121208,
+        0.02295384369790554,
+        0.09313814342021942,
+        0.1771395057439804,
+        0.21169933676719666,
+        0.17989481985569,
+        0.05862005427479744,
+        -0.004540165886282921,
+        0.021994179114699364,
+        -0.003493826137855649,
+        -0.000224211675231345,
+        0.031808022409677505,
+        -0.05090906098484993,
+        0.001970196608453989,
+        0.01633802428841591,
+        0.0049764602445065975,
+        0.0006027702474966645,
+        -0.005952450912445784,
+        -0.009886081330478191,
+        -0.08520589768886566,
+        0.030780712142586708,
+        0.00037104589864611626,
+        0.011886775493621826,
+        -0.023506291210651398,
+        0.08029806613922119,
+        -0.005086984951049089,
+        -0.07738454639911652,
+        0.06721897423267365,
+        -0.02397127076983452,
+        0.006669329944998026,
+        -0.016343094408512115,
+        0.06056324020028114,
+        0.15656796097755432,
+        -0.49836501479148865,
+        0.2475810945034027,
+        -0.009270203299820423,
+        -0.006855266634374857,
+        0.0034896093420684338,
+        -0.027938276529312134,
+        0.5722692012786865,
+        -1.1357109546661377,
+        0.5644665956497192,
+        0.015787361189723015,
+        -0.015141892246901989,
+        -0.0032788251992315054,
+        -0.04797150194644928,
+        0.6196744441986084,
+        -1.1540743112564087,
+        0.6065864562988281,
+        0.0019708566833287477,
+        0.006332532037049532,
+        0.014192940667271614,
+        0.03773411735892296,
+        0.27323007583618164,
+        -0.594700813293457,
+        0.2488076239824295,
+        -0.008853388018906116,
+        0.005692378617823124,
+        0.000576167949475348,
+        -0.027197014540433884,
+        0.022015029564499855,
+        -0.02571249194443226,
+        0.004507753532379866,
+        -0.002439734758809209,
+        -0.01994609646499157,
+        0.03601142391562462,
+        0.008136607706546783,
+        0.01658148691058159,
+        -0.06548810750246048,
+        0.022721221670508385,
+        -0.0038820707704871893,
+        -0.0007800398161634803,
+        0.001392301986925304,
+        0.09576108306646347,
+        -0.014628835022449493,
+        -0.14505760371685028,
+        0.07135403156280518,
+        -0.00839388556778431,
+        -0.004555124789476395,
+        -0.04466082155704498,
+        0.1456393599510193,
+        0.3475525975227356,
+        -0.7879117131233215,
+        0.36262738704681396,
+        0.008226356469094753,
+        0.0055343699641525745,
+        -0.061139706522226334,
+        0.08975803852081299,
+        0.9340736269950867,
+        -1.7307822704315186,
+        0.796896755695343,
+        -0.024700213223695755,
+        -0.013090251013636589,
+        -0.05148586630821228,
+        0.050525497645139694,
+        0.927090048789978,
+        -1.7473385334014893,
+        0.7727715373039246,
+        -0.005721901543438435,
+        0.010676853358745575,
+        -0.012798544019460678,
+        0.11131046712398529,
+        0.4181194007396698,
+        -0.8475598096847534,
+        0.33206430077552795,
+        0.018843427300453186,
+        0.0006885005859658122,
+        0.027498219162225723,
+        0.00207257061265409,
+        0.0032615051604807377,
+        -0.021950624883174896,
+        -0.008452882058918476,
+        -0.007631891872733831,
+        -0.028561849147081375,
+        0.04865337535738945,
+        -0.0023105579894036055,
+        -0.026170270517468452,
+        -0.011794357560575008,
+        0.004327487666159868,
+        0.01756221242249012,
+        0.0011611212976276875,
+        -0.008793564513325691,
+        0.0741758644580841,
+        -0.057649385184049606,
+        -0.006000686902552843,
+        -0.022717488929629326,
+        -0.0047143916599452496,
+        0.005709030199795961,
+        -0.05611564591526985,
+        0.05792170390486717,
+        0.1873699128627777,
+        -0.3856293857097626,
+        0.1371920108795166,
+        0.018953431397676468,
+        0.015250314958393574,
+        -0.0016827551880851388,
+        -0.08515634387731552,
+        0.6517581939697266,
+        -0.9557326436042786,
+        0.46986615657806396,
+        -0.014306572265923023,
+        -0.01625121757388115,
+        -0.016088897362351418,
+        -0.13429272174835205,
+        0.6437729001045227,
+        -1.0167845487594604,
+        0.5061463117599487,
+        0.00879831612110138,
+        -0.008598369546234608,
+        0.02747279778122902,
+        0.007245234213769436,
+        0.2527446150779724,
+        -0.47163763642311096,
+        0.15560215711593628,
+        0.005050336476415396,
+        -0.024848125874996185,
+        -0.0006449198699556291,
+        -0.008673148229718208,
+        -0.06940636038780212,
+        -0.016248086467385292,
+        0.1250494420528412,
+        0.026387182995676994,
+        0.009615709073841572,
+        -0.0025482974015176296,
+        -0.04534498229622841,
+        -0.2626228630542755,
+        -0.2753732204437256,
+        0.052055053412914276,
+        0.010792221873998642,
+        0.007360508665442467,
+        0.10271529853343964,
+        0.1113760769367218,
+        -0.31120774149894714,
+        -0.49849262833595276,
+        -0.2206398844718933,
+        0.04994913563132286,
+        0.054614756256341934,
+        0.27786919474601746,
+        0.56647789478302,
+        0.20970205962657928,
+        -0.22717078030109406,
+        -0.17321231961250305,
+        -0.07836200296878815,
+        -0.09607961028814316,
+        0.10685958713293076,
+        0.40848156809806824,
+        0.34087467193603516,
+        -0.005242985673248768,
+        -0.0682876780629158,
+        -0.0694413110613823,
+        -0.1886596381664276,
+        -0.04473332315683365,
+        0.18096435070037842,
+        0.1961163580417633,
+        0.0014336564345285296,
+        0.014584851451218128,
+        0.0462430939078331,
+        -0.1556192934513092,
+        -0.12809665501117706,
+        0.0213937908411026,
+        0.10984069108963013,
+        -0.023050926625728607,
+        -0.013447473756968975,
+        0.007857509888708591,
+        -0.027979737147688866,
+        -0.04768490046262741,
+        -0.09350565075874329,
+        -0.1659490317106247,
+        0.007927919737994671,
+        0.26641780138015747,
+        0.03398526459932327,
+        0.02118881419301033,
+        -0.006898822728544474,
+        -0.15209096670150757,
+        -0.4939330220222473,
+        -0.42655149102211,
+        0.08215854316949844,
+        0.02115131914615631,
+        0.08892140537500381,
+        0.2164168655872345,
+        0.12431265413761139,
+        -0.47813764214515686,
+        -0.6588870882987976,
+        -0.3097454905509949,
+        0.0837375745177269,
+        0.1548176258802414,
+        0.49661529064178467,
+        0.7337944507598877,
+        0.1966201215982437,
+        -0.29367199540138245,
+        -0.2547970116138458,
+        -0.11655519157648087,
+        -0.11720486730337143,
+        0.21941716969013214,
+        0.5902130603790283,
+        0.42572125792503357,
+        0.020460324361920357,
+        -0.12768393754959106,
+        -0.12030418962240219,
+        -0.2582310736179352,
+        -0.0355166494846344,
+        0.2766987085342407,
+        0.28080257773399353,
+        0.08665957301855087,
+        0.027141664177179337,
+        0.02690703421831131,
+        -0.25276950001716614,
+        -0.23180679976940155,
+        0.015180152840912342,
+        0.11523276567459106,
+        0.041165824979543686,
+        0.017444534227252007,
+        0.0009439520072191954,
+        -0.025763530284166336,
+        -0.022880665957927704,
+        -0.024819007143378258,
+        -0.04901815578341484,
+        0.027672944590449333,
+        0.11211585998535156,
+        0.024664992466568947,
+        -0.010093869641423225,
+        0.009466213174164295,
+        -0.043605536222457886,
+        -0.17007218301296234,
+        -0.1366996467113495,
+        0.08740171790122986,
+        -0.014591479673981667,
+        -0.0031720874831080437,
+        0.0835830345749855,
+        0.028662094846367836,
+        -0.21436777710914612,
+        -0.24753160774707794,
+        -0.06092096120119095,
+        0.03788171336054802,
+        0.04295210912823677,
+        0.19064708054065704,
+        0.3095722496509552,
+        0.08003447204828262,
+        -0.09509303420782089,
+        -0.05495578795671463,
+        -0.052218906581401825,
+        -0.07204427570104599,
+        0.07710819691419601,
+        0.18033725023269653,
+        0.0834946483373642,
+        -0.049662720412015915,
+        -0.06561554968357086,
+        -0.013351643458008766,
+        -0.11217659711837769,
+        0.031957074999809265,
+        0.12180440872907639,
+        0.06891122460365295,
+        -0.013705568388104439,
+        0.0011150656500831246,
+        0.03281388059258461,
+        -0.11285661906003952,
+        -0.06422404199838638,
+        0.04218210279941559,
+        0.014165353029966354,
+        -0.006244795396924019,
+        0.01745765097439289,
+        0.08924975246191025,
+        0.01710040494799614,
+        -0.14013372361660004,
+        -0.21913501620292664,
+        0.03613810986280441,
+        0.14273521304130554,
+        0.05801931768655777,
+        0.021427493542432785,
+        0.23185034096240997,
+        0.2427377849817276,
+        -0.4384608566761017,
+        -0.7205182909965515,
+        -0.18313364684581757,
+        0.033575087785720825,
+        -0.0809125304222107,
+        0.04173902049660683,
+        0.7251381874084473,
+        1.1058244705200195,
+        -0.015065462328493595,
+        -0.6434917449951172,
+        -0.3080260753631592,
+        -0.090518057346344,
+        -0.3659006655216217,
+        -0.4520319700241089,
+        0.5924424529075623,
+        1.4148176908493042,
+        0.5285682082176208,
+        -0.027211233973503113,
+        -0.07359065115451813,
+        -0.08583711832761765,
+        -0.5631492137908936,
+        -1.0246236324310303,
+        -0.1835726648569107,
+        0.3307121694087982,
+        0.22562064230442047,
+        0.05237145721912384,
+        0.13263091444969177,
+        0.13899636268615723,
+        -0.1626550555229187,
+        -0.3918432295322418,
+        -0.03585565462708473,
+        0.06904798001050949,
+        0.029870154336094856,
+        0.04289601743221283,
+        0.05758490040898323,
+        0.10055387020111084,
+        -0.011962685734033585,
+        -0.13269846141338348,
+        0.0012237781193107367,
+        0.05511128902435303,
+        0.03764793649315834,
+        -0.07580426335334778,
+        -0.1750984787940979,
+        0.0189101230353117,
+        0.08156414330005646,
+        0.01691802591085434,
+        0.004023027140647173,
+        0.18009696900844574,
+        0.22744491696357727,
+        -0.38747039437294006,
+        -0.6413040161132812,
+        -0.19208981096744537,
+        0.01971367374062538,
+        -0.036756888031959534,
+        0.004946697968989611,
+        0.7331712245941162,
+        1.1178003549575806,
+        0.03220612183213234,
+        -0.5881579518318176,
+        -0.24453559517860413,
+        -0.11856977641582489,
+        -0.43593257665634155,
+        -0.5339378118515015,
+        0.49467018246650696,
+        1.3376370668411255,
+        0.5238692164421082,
+        0.04584280773997307,
+        0.004761924035847187,
+        -0.032823480665683746,
+        -0.5419207811355591,
+        -1.0093209743499756,
+        -0.19847697019577026,
+        0.20687319338321686,
+        0.12301573902368546,
+        0.07981085777282715,
+        0.14125365018844604,
+        0.19885297119617462,
+        -0.1678825318813324,
+        -0.4042292535305023,
+        0.004483209457248449,
+        0.03009556047618389,
+        0.010802071541547775,
+        0.005967534612864256,
+        0.0892769992351532,
+        0.07342032343149185,
+        -0.0588892325758934,
+        -0.09044717997312546,
+        0.06307072192430496,
+        -0.012583961710333824,
+        -0.006880680099129677,
+        0.0030021765269339085,
+        0.01633061282336712,
+        0.06990820169448853,
+        0.0070900083519518375,
+        -0.03546716272830963,
+        -0.022131899371743202,
+        -0.02906683459877968,
+        0.010664403438568115,
+        -0.18731924891471863,
+        -0.158770352602005,
+        0.08571326732635498,
+        0.039154618978500366,
+        0.032578419893980026,
+        -0.005781106185168028,
+        0.17460086941719055,
+        0.2787456810474396,
+        -0.13416190445423126,
+        -0.23966801166534424,
+        -0.004878139588981867,
+        0.02796499989926815,
+        -0.06610933691263199,
+        -0.19162042438983917,
+        0.11163146048784256,
+        0.371842622756958,
+        0.06444671750068665,
+        0.016595548018813133,
+        0.01164282951503992,
+        0.08330011367797852,
+        -0.03192862868309021,
+        -0.2867860198020935,
+        -0.07080501317977905,
+        -0.016348646953701973,
+        -0.06306261569261551,
+        -0.016291450709104538,
+        0.010558445006608963,
+        0.13014638423919678,
+        0.06202690303325653,
+        -0.03361419215798378,
+        0.0691375732421875,
+        0.003561250865459442,
+        -0.013095442205667496,
+        -0.050333790481090546,
+        -0.019117066636681557,
+        0.0012089330703020096,
+        -0.004555183462798595,
+        -0.022682132199406624,
+        0.04747068136930466,
+        -0.06425238400697708,
+        -0.0010437731398269534,
+        -0.0071629988960921764,
+        -0.04302623122930527,
+        -0.04830477759242058,
+        -0.04069536179304123,
+        -0.06627446413040161,
+        -0.011470981873571873,
+        0.03961857780814171,
+        0.026594260707497597,
+        -0.020662540569901466,
+        -0.05999285355210304,
+        -0.053548794239759445,
+        -0.025959201157093048,
+        -0.015834785997867584,
+        0.013910192996263504,
+        -0.015868371352553368,
+        -0.056620921939611435,
+        -0.06785159558057785,
+        -0.061030179262161255,
+        -0.03560228645801544,
+        -0.04177624359726906,
+        -0.024657463654875755,
+        -0.04889696091413498,
+        0.004557035397738218,
+        0.15414470434188843,
+        0.21642963588237762,
+        0.035425592213869095,
+        -0.04339970648288727,
+        -0.05034525692462921,
+        -0.08522290736436844,
+        0.10652441531419754,
+        0.6791198253631592,
+        0.7785530686378479,
+        0.19941796362400055,
+        -0.05430706962943077,
+        -0.02583213709294796,
+        -0.055139996111392975,
+        0.17940561473369598,
+        0.6757862567901611,
+        0.8240399360656738,
+        0.25826773047447205,
+        -0.062254682183265686,
+        -0.026456547901034355,
+        -0.027271386235952377,
+        -0.0026193747762590647,
+        0.11893659085035324,
+        0.1915995329618454,
+        0.013776157051324844,
+        0.08452087640762329,
+        0.009950258769094944,
+        0.01774573139846325,
+        0.06609759479761124,
+        0.06512798368930817,
+        0.07601971179246902,
+        0.09192144125699997,
+        0.007696932647377253,
+        -0.056120894849300385,
+        -0.03937293961644173,
+        0.043086692690849304,
+        0.055803027004003525,
+        0.08208976686000824,
+        0.03658852353692055,
+        0.025779196992516518,
+        -0.0340605266392231,
+        0.03186321631073952,
+        0.09720855951309204,
+        0.10651290416717529,
+        0.09562067687511444,
+        0.08120692521333694,
+        0.06832587718963623,
+        0.03940538689494133,
+        0.09561086446046829,
+        -0.03726261481642723,
+        -0.3520663380622864,
+        -0.4187469184398651,
+        -0.11643502116203308,
+        0.06203937157988548,
+        0.056670401245355606,
+        0.11540547758340836,
+        -0.2742924690246582,
+        -1.1301417350769043,
+        -1.2482489347457886,
+        -0.4411431849002838,
+        0.08538330346345901,
+        0.036888301372528076,
+        0.08759869635105133,
+        -0.32129940390586853,
+        -1.1163593530654907,
+        -1.26430082321167,
+        -0.48638999462127686,
+        0.1056363582611084,
+        0.042436979711055756,
+        0.07075526565313339,
+        -0.08341801166534424,
+        -0.30567145347595215,
+        -0.39268070459365845,
+        -0.10187282413244247,
+        -0.02507772110402584,
+        -0.0044433241710066795,
+        -0.009278317913413048,
+        -0.02964872494339943,
+        -0.018799586221575737,
+        -0.03760084509849548,
+        -0.030454028397798538,
+        -0.004638439975678921,
+        0.026587119325995445,
+        0.0095819728448987,
+        -0.007110759150236845,
+        -0.006491640582680702,
+        -0.028083719313144684,
+        -0.009543413296341896,
+        -0.005706887226551771,
+        0.013012710027396679,
+        -0.010281933471560478,
+        -0.0544208325445652,
+        -0.023230208083987236,
+        -0.05344587564468384,
+        -0.04052828997373581,
+        -0.028035156428813934,
+        -0.011922319419682026,
+        -0.045427750796079636,
+        0.020700184628367424,
+        0.2117788940668106,
+        0.21090814471244812,
+        0.07214333862066269,
+        -0.019348343834280968,
+        -0.014455118216574192,
+        -0.03561105206608772,
+        0.17339389026165009,
+        0.49509289860725403,
+        0.5219546556472778,
+        0.26121678948402405,
+        -0.029803339391946793,
+        -0.013761913403868675,
+        -0.04028521850705147,
+        0.17008572816848755,
+        0.45583003759384155,
+        0.4757367670536041,
+        0.22357690334320068,
+        -0.050064269453287125,
+        -0.021086007356643677,
+        -0.039873600006103516,
+        0.06433176249265671,
+        0.20187893509864807,
+        0.2078690379858017,
+        0.07802058011293411,
+        0.022050827741622925,
+        -0.05272649601101875,
+        -0.024311071261763573,
+        -0.12387345731258392,
+        -0.20065246522426605,
+        0.0262442696839571,
+        0.20101603865623474,
+        0.056791841983795166,
+        -0.008266052231192589,
+        0.025132112205028534,
+        -0.23289933800697327,
+        -0.5296569466590881,
+        -0.282010018825531,
+        0.025113720446825027,
+        0.13172000646591187,
+        0.16999290883541107,
+        0.31588253378868103,
+        0.05583454668521881,
+        -0.5321000814437866,
+        -0.5585035085678101,
+        -0.23885560035705566,
+        0.0461968369781971,
+        0.13807418942451477,
+        0.6536149382591248,
+        0.6385176777839661,
+        -0.15636183321475983,
+        -0.5484278798103333,
+        -0.5470613241195679,
+        -0.06269911676645279,
+        -0.06726553291082382,
+        0.5561463236808777,
+        1.0985187292099,
+        0.6801460385322571,
+        0.12841203808784485,
+        -0.21693651378154755,
+        -0.19168342649936676,
+        -0.43073776364326477,
+        -0.15226863324642181,
+        0.41150590777397156,
+        0.47421786189079285,
+        0.25146934390068054,
+        -0.017203813418745995,
+        -0.09694849699735641,
+        -0.4082376956939697,
+        -0.3549531400203705,
+        -0.023591510951519012,
+        0.12086013704538345,
+        0.08050766587257385,
+        -0.044960521161556244,
+        -0.0031193571630865335,
+        0.014398006722331047,
+        -0.005931032355874777,
+        0.01548685971647501,
+        0.05407734215259552,
+        -0.006386967841535807,
+        0.021660227328538895,
+        0.01656133122742176,
+        0.002835798542946577,
+        0.0008500503608956933,
+        0.021802745759487152,
+        0.13470955193042755,
+        0.06802596151828766,
+        0.0033256933093070984,
+        0.03037848509848118,
+        0.054654810577631,
+        -0.034221138805150986,
+        0.015171438455581665,
+        0.23395732045173645,
+        0.24771827459335327,
+        0.16352902352809906,
+        -0.07505007833242416,
+        -0.0814652070403099,
+        -0.21493901312351227,
+        -0.3109704852104187,
+        0.013416547328233719,
+        0.12807825207710266,
+        0.12044191360473633,
+        -0.007915153168141842,
+        0.0100772799924016,
+        -0.15165796875953674,
+        -0.4013277292251587,
+        -0.24811144173145294,
+        -0.06641282886266708,
+        0.022568246349692345,
+        0.061083581298589706,
+        0.09920243173837662,
+        0.0695505365729332,
+        -0.12213064730167389,
+        -0.12606006860733032,
+        -0.04593949392437935,
+        -0.040190644562244415,
+        0.03899035230278969,
+        0.12688779830932617,
+        0.114081971347332,
+        -0.013348283246159554,
+        0.03325115144252777,
+        0.007111718878149986,
+        0.048056699335575104,
+        -0.003726312192156911,
+        0.05401211231946945,
+        0.05355936661362648,
+        0.21303032338619232,
+        0.2944865822792053,
+        -0.13604623079299927,
+        -0.3770989775657654,
+        -0.0808275118470192,
+        -0.006103217601776123,
+        -0.02005188539624214,
+        0.37605899572372437,
+        0.7776278853416443,
+        0.32064270973205566,
+        -0.23708422482013702,
+        -0.23380732536315918,
+        -0.22103570401668549,
+        -0.45596328377723694,
+        0.07213663309812546,
+        0.9384943246841431,
+        0.8762810230255127,
+        0.3557227551937103,
+        -0.09239326417446136,
+        -0.25462013483047485,
+        -0.9858288168907166,
+        -0.9860153198242188,
+        0.2600172162055969,
+        0.7731484770774841,
+        0.7665594816207886,
+        0.14806008338928223,
+        0.13109923899173737,
+        -0.6917864680290222,
+        -1.580305814743042,
+        -0.9557210803031921,
+        -0.16357193887233734,
+        0.3189502954483032,
+        0.28703632950782776,
+        0.5599567890167236,
+        0.2459551841020584,
+        -0.5451022982597351,
+        -0.6926754713058472,
+        -0.4368602931499481,
+        0.027606861665844917,
+        0.025857241824269295,
+        0.5376880764961243,
+        0.535673975944519,
+        0.09012678265571594,
+        -0.14688564836978912,
+        -0.1812361180782318,
+        0.050619762390851974,
+        0.021388273686170578,
+        -0.05923623591661453,
+        -0.006538081914186478,
+        0.05171535536646843,
+        -0.051560595631599426,
+        -0.007643367163836956,
+        0.027748188003897667,
+        0.0024676925968378782,
+        -0.008760283701121807,
+        0.13039670884609222,
+        0.18568934500217438,
+        0.06342563778162003,
+        0.030788781121373177,
+        -0.004423442296683788,
+        -0.041261281818151474,
+        0.013299684040248394,
+        0.22491391003131866,
+        0.27831292152404785,
+        0.0883866474032402,
+        0.048967570066452026,
+        0.0012756097130477428,
+        -0.03215779736638069,
+        0.02710782177746296,
+        0.20178261399269104,
+        0.22446107864379883,
+        0.06052157282829285,
+        0.019020315259695053,
+        0.02715166099369526,
+        -0.03146626800298691,
+        -0.017960363999009132,
+        0.11820292472839355,
+        0.16114193201065063,
+        0.05221821367740631,
+        -0.02201441302895546,
+        -0.026308327913284302,
+        0.008580431342124939,
+        -0.02444308064877987,
+        0.061380185186862946,
+        0.11184953153133392,
+        0.006053542252629995,
+        -0.03248603641986847,
+        -0.037558719515800476,
+        0.01881473697721958,
+        -0.02349863201379776,
+        0.02150980569422245,
+        0.09881952404975891,
+        0.03962325677275658,
+        -0.0031782283913344145,
+        -0.0030868228059262037,
+        -0.007606725674122572,
+        -0.06136326491832733,
+        0.022755015641450882,
+        0.09683670848608017,
+        0.0016674631042405963,
+        0.01306125894188881,
+        0.011335537768900394,
+        -0.01769089885056019,
+        0.005807302892208099,
+        0.19103741645812988,
+        0.2631426155567169,
+        0.10424992442131042,
+        0.025223100557923317,
+        -0.024689532816410065,
+        -0.03370697423815727,
+        0.0512213259935379,
+        0.2983294129371643,
+        0.37597405910491943,
+        0.18788966536521912,
+        0.056492965668439865,
+        -0.006051253993064165,
+        -0.027141474187374115,
+        0.06733105331659317,
+        0.29171472787857056,
+        0.32160115242004395,
+        0.14176633954048157,
+        0.008538221009075642,
+        -0.013039524666965008,
+        -0.04279422387480736,
+        0.03345612436532974,
+        0.19111940264701843,
+        0.25728005170822144,
+        0.09830093383789062,
+        -0.03371569141745567,
+        -0.05277566984295845,
+        -0.0011038694065064192,
+        -0.013657800853252411,
+        0.10037966072559357,
+        0.1724642813205719,
+        0.04436478391289711,
+        -0.02240786701440811,
+        -0.02181128039956093,
+        0.019526727497577667,
+        -0.050060197710990906,
+        0.017275504767894745,
+        0.07785085588693619,
+        -0.001727179973386228,
+        -0.0014453287003561854,
+        0.019352080300450325,
+        -0.003202121239155531,
+        -0.04241566359996796,
+        0.005586653482168913,
+        0.06037082523107529,
+        0.014115821570158005,
+        -0.00568200321868062,
+        0.018071964383125305,
+        -0.0007147599244490266,
+        0.011219227686524391,
+        0.10582104325294495,
+        0.15557849407196045,
+        0.06189450994133949,
+        0.014160261489450932,
+        0.00814653467386961,
+        -0.028064200654625893,
+        0.026086319237947464,
+        0.1474728286266327,
+        0.18273885548114777,
+        0.06638553738594055,
+        0.019263381138443947,
+        0.028977060690522194,
+        -0.02551555074751377,
+        0.01937149092555046,
+        0.12000202387571335,
+        0.1285850703716278,
+        0.047506313771009445,
+        -0.011383740231394768,
+        0.02826755866408348,
+        -0.009583448991179466,
+        -0.02093282900750637,
+        0.07994058728218079,
+        0.0926218256354332,
+        0.0318426676094532,
+        -0.024409465491771698,
+        0.020994359627366066,
+        0.03295197710394859,
+        -0.034276511520147324,
+        0.037398867309093475,
+        0.0794353187084198,
+        0.022805212065577507,
+        0.0015407208120450377,
+        0.013169347308576107,
+        0.038584139198064804,
+        -0.002118688775226474,
+        0.03358406573534012,
+        0.09085306525230408,
+        0.04255761206150055,
+        0.010275964625179768,
+        0.025351760908961296,
+        0.04205995053052902,
+        0.1319226324558258,
+        0.049708493053913116,
+        -0.03743802383542061,
+        -0.04293569549918175,
+        -0.07646205276250839,
+        -0.04986324533820152,
+        0.15992362797260284,
+        0.011027384549379349,
+        -0.32150742411613464,
+        -0.3761928677558899,
+        -0.1654653549194336,
+        -0.08728181570768356,
+        0.044714685529470444,
+        -0.007500737439841032,
+        -0.41376256942749023,
+        -0.6625701189041138,
+        -0.21809393167495728,
+        0.10641554743051529,
+        0.09274336695671082,
+        0.10189083218574524,
+        -0.1175118163228035,
+        -0.2905261516571045,
+        -0.06248515099287033,
+        0.4791955053806305,
+        0.49865299463272095,
+        0.23415400087833405,
+        0.12729482352733612,
+        -0.05814196541905403,
+        -0.003843356389552355,
+        0.16410382091999054,
+        0.40895968675613403,
+        0.22034852206707,
+        0.021014101803302765,
+        -0.05658271536231041,
+        -0.012199933640658855,
+        0.034277670085430145,
+        0.09565535932779312,
+        0.18921032547950745,
+        0.010441004298627377,
+        -0.07427560538053513,
+        -0.09049694985151291,
+        -0.00554919708520174,
+        0.021386168897151947,
+        0.0297325998544693,
+        0.06431404501199722,
+        -0.07367311418056488,
+        -0.08734254539012909,
+        -0.059512097388505936,
+        0.11382041126489639,
+        0.19622667133808136,
+        0.02534862980246544,
+        -0.09704668819904327,
+        -0.10857658833265305,
+        -0.10241919010877609,
+        -0.037928055971860886,
+        0.17917697131633759,
+        -0.0396210141479969,
+        -0.472421795129776,
+        -0.5453466176986694,
+        -0.23921693861484528,
+        -0.06353127211332321,
+        0.033679377287626266,
+        -0.011634309776127338,
+        -0.523267924785614,
+        -0.8400278091430664,
+        -0.3026646375656128,
+        0.17986975610256195,
+        0.20296970009803772,
+        0.14190459251403809,
+        -0.12953802943229675,
+        -0.3968985378742218,
+        -0.13779792189598083,
+        0.548722505569458,
+        0.7039015293121338,
+        0.4025704264640808,
+        0.19535738229751587,
+        -0.08568660169839859,
+        -0.0589536651968956,
+        0.1868993639945984,
+        0.5782724618911743,
+        0.43018248677253723,
+        0.08876730501651764,
+        -0.10219226032495499,
+        -0.04660544916987419,
+        0.018129168078303337,
+        0.14359626173973083,
+        0.3174169361591339,
+        0.07668197154998779,
+        -0.13716676831245422,
+        -0.2058524489402771,
+        -0.023707473650574684,
+        0.03213014453649521,
+        0.06718969345092773,
+        0.0917893499135971,
+        -0.10766899585723877,
+        -0.206499844789505,
+        -0.12713390588760376,
+        -0.03174767270684242,
+        0.046395305544137955,
+        0.018318502232432365,
+        -0.002416136907413602,
+        -0.027143845334649086,
+        -0.0036621293984353542,
+        -0.019220896065235138,
+        0.05427055433392525,
+        0.05058867856860161,
+        -0.05274957790970802,
+        -0.11321325600147247,
+        -0.07062514126300812,
+        -0.01720590703189373,
+        -0.00901520811021328,
+        0.01746262051165104,
+        -0.08946436643600464,
+        -0.2304752618074417,
+        -0.1021895483136177,
+        0.013501768000423908,
+        0.029721295461058617,
+        -0.010094762779772282,
+        0.009764805436134338,
+        -0.06424269080162048,
+        -0.03032868541777134,
+        0.13044297695159912,
+        0.12166891992092133,
+        0.07157951593399048,
+        0.029467372223734856,
+        -0.03827595338225365,
+        -0.031337328255176544,
+        -0.026486340910196304,
+        0.05953369289636612,
+        0.029497025534510612,
+        0.022669093683362007,
+        -0.01055963709950447,
+        -0.025020133703947067,
+        0.002589448355138302,
+        0.017152298241853714,
+        0.062067389488220215,
+        0.008266719058156013,
+        0.00563611788675189,
+        -0.0044869836419820786,
+        0.003065212396904826,
+        0.014371387660503387,
+        0.013636622577905655,
+        0.021183570846915245,
+        -0.012462744489312172,
+        -0.02493542619049549,
+        0.009652925655245781,
+        -0.09309647232294083,
+        -0.09614148736000061,
+        0.020278261974453926,
+        0.262399286031723,
+        0.0025974283926188946,
+        -0.09532646089792252,
+        -0.0391894206404686,
+        -0.003332971129566431,
+        -0.25919869542121887,
+        -0.2104814499616623,
+        0.5975717306137085,
+        0.20378711819648743,
+        -0.20521192252635956,
+        0.005045099183917046,
+        0.16707547008991241,
+        -0.08322134613990784,
+        -1.1734565496444702,
+        0.4060916006565094,
+        0.9109339714050293,
+        -0.22450445592403412,
+        -0.14085394144058228,
+        0.19534644484519958,
+        0.6220589280128479,
+        -1.0614460706710815,
+        -1.2444484233856201,
+        1.1965712308883667,
+        0.5032565593719482,
+        -0.26604175567626953,
+        -0.13583213090896606,
+        0.6453277468681335,
+        0.4994892477989197,
+        -1.7917202711105347,
+        -0.15182015299797058,
+        0.7891079783439636,
+        0.10711944103240967,
+        -0.11587982624769211,
+        0.08287231624126434,
+        0.7848142981529236,
+        -0.1764022707939148,
+        -1.0492321252822876,
+        0.15281184017658234,
+        0.3100045919418335,
+        -0.0461110882461071,
+        -0.06824400275945663,
+        0.25544390082359314,
+        0.3444065451622009,
+        -0.3189513683319092,
+        -0.3503313362598419,
+        0.05462741479277611,
+        -0.041028521955013275,
+        0.00624969182536006,
+        -0.0014677124563604593,
+        0.10383514314889908,
+        -0.03467189520597458,
+        -0.03946290910243988,
+        0.012734192423522472,
+        -0.003676857566460967,
+        -0.1616411954164505,
+        -0.034441810101270676,
+        0.34758275747299194,
+        -0.0017601394793018699,
+        -0.17407774925231934,
+        0.05167992413043976,
+        0.12394318729639053,
+        -0.018228475004434586,
+        -0.71342533826828,
+        0.39672648906707764,
+        0.4870489537715912,
+        -0.27272745966911316,
+        -0.02687050960958004,
+        0.09090551733970642,
+        0.46698617935180664,
+        -0.6089348196983337,
+        -0.7488552331924438,
+        0.8327828645706177,
+        0.19947239756584167,
+        -0.17806877195835114,
+        -0.09197663515806198,
+        0.3198661506175995,
+        0.42619431018829346,
+        -1.1321229934692383,
+        -0.05452701821923256,
+        0.4155597984790802,
+        -0.001295815804041922,
+        -0.06596186012029648,
+        -0.05821318179368973,
+        0.4515152871608734,
+        0.06321248412132263,
+        -0.6065720319747925,
+        0.10882120579481125,
+        0.13767170906066895,
+        0.01809641905128956,
+        -0.070295050740242,
+        0.04035783186554909,
+        0.22459834814071655,
+        -0.048405971378088,
+        -0.14622822403907776,
+        -0.01119917817413807,
+        0.00666345190256834,
+        0.04815478250384331,
+        -0.017866114154458046,
+        -0.04813665896654129,
+        -0.02366034686565399,
+        0.03589487820863724,
+        -0.0066519430838525295,
+        0.0004148671869188547,
+        -0.014153627678751945,
+        0.04403751716017723,
+        0.04098428785800934,
+        -0.10525348782539368,
+        -0.0078808031976223,
+        0.0444580540060997,
+        -0.027595041319727898,
+        0.010916849598288536,
+        -0.1390431821346283,
+        0.20334453880786896,
+        -0.006475532427430153,
+        -0.16053295135498047,
+        0.06964287906885147,
+        -0.025649840012192726,
+        0.12622858583927155,
+        -0.09694403409957886,
+        -0.09791161119937897,
+        0.2617567479610443,
+        -0.06268735229969025,
+        -0.03128494322299957,
+        -0.017743078991770744,
+        -0.02372320368885994,
+        0.2195650041103363,
+        -0.2456466406583786,
+        0.031090563163161278,
+        0.010196326300501823,
+        -0.04323133826255798,
+        0.02746250294148922,
+        -0.079569011926651,
+        0.06894756853580475,
+        0.11414647102355957,
+        -0.12175147980451584,
+        0.025397513061761856,
+        0.006027852185070515,
+        0.013360690325498581,
+        -0.024561991915106773,
+        -0.10966529697179794,
+        0.04913714900612831,
+        0.09801583737134933,
+        0.00013951699656900018,
+        -0.03194398432970047,
+        0.002382949460297823,
+        -0.003335593966767192,
+        0.023621119558811188,
+        0.024585755541920662,
+        -0.016027197241783142,
+        -0.02846739999949932,
+        -0.012949706055223942,
+        -0.020852699875831604,
+        -0.016913240775465965,
+        0.016088848933577538,
+        0.141468346118927,
+        0.07285624742507935,
+        -0.008997173048555851,
+        -0.033306676894426346,
+        -0.03418722003698349,
+        -0.15127411484718323,
+        -0.047440435737371445,
+        0.2687169015407562,
+        0.17237843573093414,
+        0.03505166247487068,
+        -0.06994523108005524,
+        -0.031143782660365105,
+        -0.3024960458278656,
+        -0.1552041918039322,
+        0.33517369627952576,
+        0.28441429138183594,
+        0.06471730768680573,
+        -0.0613982267677784,
+        -0.02271229960024357,
+        -0.29379361867904663,
+        -0.3259792923927307,
+        0.16062304377555847,
+        0.29220375418663025,
+        0.10862076282501221,
+        -0.005909152328968048,
+        0.049116987735033035,
+        -0.20140305161476135,
+        -0.3278747797012329,
+        -0.02566053718328476,
+        0.14338354766368866,
+        0.006411381531506777,
+        -0.007274044211953878,
+        0.08232597261667252,
+        -0.04198717698454857,
+        -0.17330540716648102,
+        -0.01131037063896656,
+        0.08018575608730316,
+        -0.02374250255525112,
+        -0.002276432001963258,
+        0.00019528658594936132,
+        -0.024716932326555252,
+        0.026509074494242668,
+        0.08361849933862686,
+        0.012956380844116211,
+        -0.06030649319291115,
+        -0.020338360220193863,
+        -0.03577016666531563,
+        -0.06858085840940475,
+        0.008245388977229595,
+        0.25225168466567993,
+        0.16135559976100922,
+        -0.03690743073821068,
+        -0.09188401699066162,
+        -0.10410526394844055,
+        -0.25971388816833496,
+        -0.07926154136657715,
+        0.3933144509792328,
+        0.33186599612236023,
+        0.059405017644166946,
+        -0.11824909597635269,
+        -0.10528354346752167,
+        -0.4808295667171478,
+        -0.25224801898002625,
+        0.4267246127128601,
+        0.4853539764881134,
+        0.16933484375476837,
+        -0.073345847427845,
+        -0.02648857608437538,
+        -0.4723232388496399,
+        -0.4904792010784149,
+        0.1938265562057495,
+        0.44070878624916077,
+        0.22439399361610413,
+        0.03877745941281319,
+        0.08536087721586227,
+        -0.31432414054870605,
+        -0.5158097743988037,
+        -0.09537900239229202,
+        0.20227058231830597,
+        0.07895126938819885,
+        0.059195615351200104,
+        0.14728911221027374,
+        -0.059377528727054596,
+        -0.2884902060031891,
+        -0.12288203090429306,
+        0.05220698565244675,
+        -0.045279599726200104,
+        0.019795719534158707,
+        -0.009819806553423405,
+        -0.013713877648115158,
+        0.0012175077572464943,
+        0.03281072899699211,
+        0.0017424041870981455,
+        -0.028847966343164444,
+        -0.0032059827353805304,
+        -0.020358575507998466,
+        0.0009416870889253914,
+        -0.007760196924209595,
+        0.07921157032251358,
+        0.03826644644141197,
+        -0.02976907789707184,
+        -0.03300238028168678,
+        -0.017963968217372894,
+        -0.055836472660303116,
+        -0.03299689665436745,
+        0.15166012942790985,
+        0.06786434352397919,
+        0.008589516393840313,
+        -0.05790036544203758,
+        -0.0029997669626027346,
+        -0.14070068299770355,
+        -0.08799122273921967,
+        0.19680362939834595,
+        0.14703704416751862,
+        0.03569985553622246,
+        -0.02847554162144661,
+        0.03601403906941414,
+        -0.1339161992073059,
+        -0.20527805387973785,
+        0.1060374304652214,
+        0.16269326210021973,
+        0.0575268417596817,
+        0.0029672966338694096,
+        0.018848277628421783,
+        -0.1029881089925766,
+        -0.19446833431720734,
+        -0.055140964686870575,
+        0.09632515162229538,
+        0.01196608692407608,
+        0.01994382217526436,
+        0.0030014747753739357,
+        0.0029817752074450254,
+        -0.09395840018987656,
+        -0.038611751049757004,
+        0.03793984279036522,
+        -0.006295992527157068,
+        0.01736803539097309,
+        -0.0961727425456047,
+        0.1318971812725067,
+        0.00169672432821244,
+        0.02773740515112877,
+        -0.03737606480717659,
+        -0.02413480542600155,
+        -0.07371329516172409,
+        0.04465596005320549,
+        0.34972262382507324,
+        0.269726425409317,
+        0.14907677471637726,
+        -0.15323053300380707,
+        -0.24987848103046417,
+        -0.32931339740753174,
+        0.05209995433688164,
+        0.5192161798477173,
+        0.5108750462532043,
+        0.2627664804458618,
+        -0.26889729499816895,
+        -0.49891141057014465,
+        -0.5081418752670288,
+        0.13535383343696594,
+        0.7318623661994934,
+        0.7116816639900208,
+        0.2973657250404358,
+        -0.38982102274894714,
+        -0.7131763100624084,
+        -0.5916072130203247,
+        0.1200462281703949,
+        0.7752112746238708,
+        0.6947993636131287,
+        0.21100594103336334,
+        -0.5576100945472717,
+        -0.7797606587409973,
+        -0.6058254837989807,
+        0.08617032319307327,
+        0.6432424187660217,
+        0.522933304309845,
+        0.16018754243850708,
+        -0.5134027004241943,
+        -0.6838728189468384,
+        -0.5088241100311279,
+        0.10101393610239029,
+        0.4321025311946869,
+        0.3330003023147583,
+        0.10116448998451233,
+        -0.2786642014980316,
+        -0.4134466052055359,
+        -0.3247438967227936,
+        0.009768294170498848,
+        0.008712833747267723,
+        -0.029476309195160866,
+        0.007709377445280552,
+        0.025279967114329338,
+        0.01615188643336296,
+        0.01585867628455162,
+        -0.0031516810413450003,
+        -0.06462288647890091,
+        -0.055517926812171936,
+        -0.013180199079215527,
+        -0.014849795028567314,
+        0.05535515025258064,
+        0.04162544384598732,
+        0.0022392054088413715,
+        -0.09408581256866455,
+        -0.07889631390571594,
+        -0.032870080322027206,
+        0.0382377915084362,
+        0.07495865970849991,
+        0.08439645916223526,
+        0.008036677725613117,
+        -0.1167779192328453,
+        -0.10782196372747421,
+        -0.06854722648859024,
+        0.06310252100229263,
+        0.09643208235502243,
+        0.08629462122917175,
+        -0.016969647258520126,
+        -0.10456187278032303,
+        -0.10410942137241364,
+        -0.017384463921189308,
+        0.03931420296430588,
+        0.11296819150447845,
+        0.08688211441040039,
+        -0.018024103716015816,
+        -0.0985492691397667,
+        -0.10534191876649857,
+        0.016594627872109413,
+        0.024613894522190094,
+        0.09626104682683945,
+        0.056779902428388596,
+        -0.01314453687518835,
+        -0.1173979789018631,
+        -0.07576211541891098,
+        -0.00741730397567153,
+        0.04463285952806473,
+        0.06365535408258438,
+        0.029472019523382187,
+        0.06097950413823128,
+        -0.0884813666343689,
+        -0.020469073206186295,
+        -0.004499382339417934,
+        0.006147715728729963,
+        0.0061135985888540745,
+        0.046618249267339706,
+        -0.024977274239063263,
+        -0.2809607684612274,
+        -0.20776452124118805,
+        -0.10792756825685501,
+        0.10520339012145996,
+        0.2195160835981369,
+        0.27846819162368774,
+        -0.0425783209502697,
+        -0.4539273977279663,
+        -0.4210258722305298,
+        -0.24160517752170563,
+        0.2377386838197708,
+        0.4254952371120453,
+        0.40258923172950745,
+        -0.08894401043653488,
+        -0.6261403560638428,
+        -0.6177268624305725,
+        -0.2941279113292694,
+        0.36115866899490356,
+        0.6176164746284485,
+        0.5170959234237671,
+        -0.12760992348194122,
+        -0.6392932534217834,
+        -0.6288641095161438,
+        -0.20397846400737762,
+        0.4859760105609894,
+        0.7283636927604675,
+        0.5233575105667114,
+        -0.08038943260908127,
+        -0.513219952583313,
+        -0.4611802101135254,
+        -0.08622774481773376,
+        0.41959214210510254,
+        0.6145293116569519,
+        0.4252074360847473,
+        -0.08993257582187653,
+        -0.3586794435977936,
+        -0.23889268934726715,
+        -0.07402873039245605,
+        0.2362663745880127,
+        0.33187127113342285,
+        0.24442552030086517,
+        -0.10037989169359207,
+        -0.1200498715043068,
+        -0.06188809871673584,
+        0.009648810140788555,
+        0.07703708112239838,
+        -0.07734857499599457,
+        -0.16337357461452484,
+        -0.13160429894924164,
+        -0.037760209292173386,
+        0.10750655829906464,
+        0.21975228190422058,
+        0.21332265436649323,
+        0.1482381671667099,
+        -0.012174196541309357,
+        -0.03128019720315933,
+        0.06983920931816101,
+        0.2055918425321579,
+        0.16611628234386444,
+        0.20955723524093628,
+        0.21407610177993774,
+        0.13214662671089172,
+        0.01558306161314249,
+        0.20919384062290192,
+        0.21453723311424255,
+        0.10980720072984695,
+        0.10323476791381836,
+        0.1754676252603531,
+        0.16320686042308807,
+        0.076839879155159,
+        0.2669583261013031,
+        0.29500535130500793,
+        0.18005967140197754,
+        0.14900699257850647,
+        0.2337430715560913,
+        0.2607984244823456,
+        -0.08909865468740463,
+        0.12383633106946945,
+        0.27329200506210327,
+        0.2634970247745514,
+        0.2298160344362259,
+        0.22673286497592926,
+        0.1753624528646469,
+        -0.14258335530757904,
+        -0.033422429114580154,
+        0.09338828176259995,
+        0.21975602209568024,
+        0.2488732784986496,
+        0.21165378391742706,
+        0.08514796197414398,
+        0.0776415765285492,
+        -0.028732767328619957,
+        -0.0827818363904953,
+        -0.14784079790115356,
+        -0.06101813539862633,
+        -0.10570015013217926,
+        -0.07298385351896286,
+        -0.03352680057287216,
+        -0.08094660192728043,
+        -0.08546923100948334,
+        -0.025722583755850792,
+        -0.04828448221087456,
+        -0.15816760063171387,
+        -0.22295169532299042,
+        -0.04976325109601021,
+        -0.12255501747131348,
+        -0.04869991913437843,
+        0.09818085283041,
+        0.2285904735326767,
+        0.015187943354249,
+        -0.19952231645584106,
+        -0.1415022611618042,
+        -0.09511925280094147,
+        0.10828559100627899,
+        0.35640013217926025,
+        0.5399265289306641,
+        0.3026861250400543,
+        -0.10532847791910172,
+        -0.0455780103802681,
+        -0.09365752339363098,
+        0.2482689470052719,
+        0.5483031272888184,
+        0.6572608947753906,
+        0.4098849594593048,
+        -0.0039499495178461075,
+        -0.11641024053096771,
+        -0.22666053473949432,
+        -0.03133581206202507,
+        0.2815704643726349,
+        0.3229265809059143,
+        0.009749597869813442,
+        -0.19616934657096863,
+        -0.05046992748975754,
+        -0.15597671270370483,
+        -0.22775587439537048,
+        -0.14872166514396667,
+        -0.12174414098262787,
+        -0.23433859646320343,
+        -0.238412007689476,
+        0.09725375473499298,
+        0.08522887527942657,
+        0.006490080617368221,
+        -0.024619178846478462,
+        0.07278231531381607,
+        0.13406167924404144,
+        0.22993306815624237,
+        0.10250072181224823,
+        0.09119024127721786,
+        -0.07687287777662277,
+        -0.1012108325958252,
+        -0.09500063210725784,
+        -0.10082961618900299,
+        0.09466016292572021,
+        0.11299365013837814,
+        -0.033278487622737885,
+        -0.20269805192947388,
+        -0.21449527144432068,
+        -0.08820098638534546,
+        -0.18970704078674316,
+        -0.050536416471004486,
+        -0.03471578657627106,
+        -0.13205547630786896,
+        -0.18150201439857483,
+        -0.03963223099708557,
+        0.13029472529888153,
+        -0.11594776809215546,
+        -0.173879474401474,
+        0.017406627535820007,
+        -0.11885572224855423,
+        -0.06966021656990051,
+        0.1687183529138565,
+        0.2677668035030365,
+        -0.020446041598916054,
+        -0.11710261553525925,
+        0.044354867190122604,
+        -0.10054060816764832,
+        -0.1287878155708313,
+        -0.03600803390145302,
+        -0.03198331966996193,
+        -0.22372953593730927,
+        -0.11045534163713455,
+        0.22963544726371765,
+        0.16736479103565216,
+        -0.023956498131155968,
+        -0.0882943719625473,
+        -0.11904646456241608,
+        -0.10481738299131393,
+        0.083598293364048,
+        0.058089643716812134,
+        -0.04821285232901573,
+        0.16764044761657715,
+        -0.13788309693336487,
+        -0.1412951946258545,
+        0.059633608907461166,
+        0.012824267148971558,
+        -0.03141501545906067,
+        -0.017422236502170563,
+        0.3908282518386841,
+        -0.31520241498947144,
+        -0.27876099944114685,
+        0.17109407484531403,
+        0.011913848109543324,
+        -0.04440265893936157,
+        0.05610174685716629,
+        0.5290316343307495,
+        -0.4506116211414337,
+        -0.2946499288082123,
+        0.2802693545818329,
+        0.04180249199271202,
+        -0.05673402547836304,
+        0.0445592887699604,
+        0.4933576285839081,
+        -0.4903600513935089,
+        -0.3259376883506775,
+        0.26069584488868713,
+        0.047843094915151596,
+        -0.053804315626621246,
+        0.029928382486104965,
+        0.3588394224643707,
+        -0.39090782403945923,
+        -0.18598265945911407,
+        0.1703576147556305,
+        0.010407418012619019,
+        0.019840527325868607,
+        -0.017079327255487442,
+        0.21012797951698303,
+        -0.1586841642856598,
+        -0.12738685309886932,
+        0.12431345880031586,
+        0.028149213641881943,
+        0.05083676427602768,
+        -0.07053223252296448,
+        0.12090320140123367,
+        -0.13737183809280396,
+        -0.09807822853326797,
+        0.07203921675682068,
+        -0.01965559460222721,
+        0.036479320377111435,
+        -0.02657422423362732,
+        0.2924504280090332,
+        -0.19397024810314178,
+        -0.20908842980861664,
+        0.07435549795627594,
+        0.011985386721789837,
+        -0.051603686064481735,
+        0.039122600108385086,
+        0.5911946892738342,
+        -0.45937344431877136,
+        -0.43863579630851746,
+        0.23180224001407623,
+        0.05592876672744751,
+        -0.10227655619382858,
+        0.1371937245130539,
+        0.7193072438240051,
+        -0.6789532899856567,
+        -0.5275344252586365,
+        0.4098500609397888,
+        0.09136661887168884,
+        -0.08802130073308945,
+        0.12226735055446625,
+        0.6819202303886414,
+        -0.7316576838493347,
+        -0.5229181051254272,
+        0.37578293681144714,
+        0.09086397290229797,
+        -0.05128701403737068,
+        0.09287497401237488,
+        0.5103837251663208,
+        -0.6150248646736145,
+        -0.3208717107772827,
+        0.29780012369155884,
+        0.071808360517025,
+        0.04605705663561821,
+        0.028153980150818825,
+        0.30872926115989685,
+        -0.32211968302726746,
+        -0.1925150454044342,
+        0.18948692083358765,
+        0.07391810417175293,
+        0.08546463400125504,
+        -0.07042243331670761,
+        0.14390304684638977,
+        -0.22509464621543884,
+        -0.12615789473056793,
+        0.09681600332260132,
+        0.0030679223127663136,
+        0.06206878274679184,
+        -0.0493885837495327,
+        0.11675205081701279,
+        -0.09476804733276367,
+        -0.0708041712641716,
+        0.027848264202475548,
+        0.018535451963543892,
+        0.01112216804176569,
+        -0.023546719923615456,
+        0.2808285057544708,
+        -0.2312571257352829,
+        -0.16320407390594482,
+        0.15229304134845734,
+        -0.007220278959721327,
+        -0.026767488569021225,
+        -0.008487970568239689,
+        0.39064091444015503,
+        -0.3746477961540222,
+        -0.22930599749088287,
+        0.23297259211540222,
+        -0.020648201927542686,
+        -0.03918099403381348,
+        -0.03193120285868645,
+        0.37857353687286377,
+        -0.38306936621665955,
+        -0.25103962421417236,
+        0.2414209097623825,
+        0.007709929719567299,
+        -0.041483473032712936,
+        -0.001570625347085297,
+        0.315625935792923,
+        -0.276553213596344,
+        -0.13154125213623047,
+        0.17517149448394775,
+        0.03219839558005333,
+        0.002647437620908022,
+        -0.012777225114405155,
+        0.17064248025417328,
+        -0.13943275809288025,
+        -0.10204917937517166,
+        0.09418098628520966,
+        0.026260169222950935,
+        0.05167905613780022,
+        -0.024634944275021553,
+        0.0931941494345665,
+        -0.11875593662261963,
+        -0.0752263143658638,
+        0.0569780170917511,
+        0.00024334408226422966,
+        -0.001991289434954524,
+        -0.012094452045857906,
+        -0.0012201170902699232,
+        0.01342268567532301,
+        0.006425719242542982,
+        0.01147665549069643,
+        -0.002208880614489317,
+        -0.019385183230042458,
+        -0.024868011474609375,
+        0.00465290667489171,
+        0.009205960668623447,
+        0.0016242304118350148,
+        0.0059639886021614075,
+        -0.03436571732163429,
+        0.01672518253326416,
+        0.008815832436084747,
+        0.06389293074607849,
+        0.06249547377228737,
+        0.06542838364839554,
+        0.043118152767419815,
+        0.04117512330412865,
+        0.014435848221182823,
+        0.0065850247628986835,
+        0.03811212629079819,
+        -0.006077034864574671,
+        -0.004025861620903015,
+        0.006247953977435827,
+        0.014478449709713459,
+        0.0009701942908577621,
+        -0.002422194229438901,
+        0.009390920400619507,
+        -0.052253514528274536,
+        -0.05192738026380539,
+        -0.010346310213208199,
+        -0.001328076352365315,
+        -0.002972622634842992,
+        0.0015572139527648687,
+        0.022503724321722984,
+        -0.002475353656336665,
+        0.001927886507473886,
+        0.02994818612933159,
+        0.02062363363802433,
+        -0.0010653833160176873,
+        -0.005995174869894981,
+        0.024450020864605904,
+        0.013005194254219532,
+        0.0496530681848526,
+        0.029475165531039238,
+        0.004157512914389372,
+        -0.0007043799851089716,
+        0.01860312558710575,
+        0.03839566186070442,
+        0.00014980587002355605,
+        0.018569663166999817,
+        0.05668198689818382,
+        0.04645680636167526,
+        0.01642409712076187,
+        0.03577466681599617,
+        0.03575601801276207,
+        -0.03680748492479324,
+        -0.01865880750119686,
+        0.041660092771053314,
+        0.033268485218286514,
+        0.03338993713259697,
+        0.04665865749120712,
+        -0.03322917968034744,
+        -0.2860279381275177,
+        -0.28877392411231995,
+        -0.09617949277162552,
+        0.014234350994229317,
+        0.038012001663446426,
+        -0.016850680112838745,
+        -0.27252569794654846,
+        -0.6714493632316589,
+        -0.686245322227478,
+        -0.3376169502735138,
+        -0.0812990590929985,
+        0.003058002796024084,
+        -0.026376569643616676,
+        -0.29216718673706055,
+        -0.6779875159263611,
+        -0.6917123198509216,
+        -0.3184400796890259,
+        -0.058261968195438385,
+        0.06338769942522049,
+        0.03199980780482292,
+        -0.09837217628955841,
+        -0.3355932831764221,
+        -0.30900436639785767,
+        -0.04878076910972595,
+        0.061543505638837814,
+        0.04651529714465141,
+        0.0263908002525568,
+        0.0030237447936087847,
+        -0.10458099842071533,
+        -0.07959774881601334,
+        0.05430716276168823,
+        0.056767694652080536,
+        0.00796051137149334,
+        -0.016737859696149826,
+        -0.042338743805885315,
+        -0.0198048185557127,
+        -0.03085070475935936,
+        -0.058721307665109634,
+        -0.036032311618328094,
+        -0.0035414688754826784,
+        -8.359456842299551e-05,
+        -0.02213932015001774,
+        0.02032857947051525,
+        0.021788733080029488,
+        -0.03522418439388275,
+        -0.025317413732409477,
+        -0.042937491089105606,
+        -0.05680134892463684,
+        -0.012510996311903,
+        0.226289302110672,
+        0.24401520192623138,
+        0.022300971671938896,
+        -0.030825607478618622,
+        -0.05485948920249939,
+        0.007590078748762608,
+        0.2208130657672882,
+        0.6964298486709595,
+        0.7457719445228577,
+        0.3470557630062103,
+        0.06941442936658859,
+        -0.03543366119265556,
+        0.035853609442710876,
+        0.2872598171234131,
+        0.7504303455352783,
+        0.7509996294975281,
+        0.34327855706214905,
+        0.024429334327578545,
+        -0.05711393058300018,
+        -0.034500252455472946,
+        0.057939525693655014,
+        0.33292675018310547,
+        0.3141649067401886,
+        0.033748809248209,
+        -0.062175147235393524,
+        -0.041224412620067596,
+        -0.01891348883509636,
+        -0.014519350603222847,
+        0.08635713160037994,
+        0.03148616850376129,
+        -0.08749162405729294,
+        -0.05658482387661934,
+        0.00018510188965592533,
+        0.002624311950057745,
+        -0.003570129396393895,
+        0.0067627751268446445,
+        -0.01349653396755457,
+        -0.003961967770010233,
+        0.0034001911990344524,
+        -0.00385954394005239,
+        0.018012456595897675,
+        -0.018755480647087097,
+        -0.03163064643740654,
+        -0.0035233700182288885,
+        0.011690095998346806,
+        -0.014693490229547024,
+        0.017746854573488235,
+        0.05693097040057182,
+        0.1272590607404709,
+        0.23477119207382202,
+        0.19823509454727173,
+        0.05071045830845833,
+        -0.007188393268734217,
+        -0.05571149289608002,
+        -0.06468938291072845,
+        -0.017831332981586456,
+        -0.07572834193706512,
+        -0.19599483907222748,
+        -0.15608063340187073,
+        -0.039450764656066895,
+        -0.035583946853876114,
+        -0.1605951488018036,
+        -0.5041624307632446,
+        -0.6836286783218384,
+        -0.3773191571235657,
+        -0.08623629808425903,
+        -0.04881078004837036,
+        0.029403403401374817,
+        0.15516817569732666,
+        0.4108496308326721,
+        0.6393839716911316,
+        0.4688946008682251,
+        0.2135964334011078,
+        0.0623941570520401,
+        0.02426956780254841,
+        -8.065254223765805e-05,
+        -0.00816427543759346,
+        -0.09353788942098618,
+        -0.06872912496328354,
+        -0.029405562207102776,
+        0.012364620342850685,
+        0.0060868943110108376,
+        0.017015695571899414,
+        -0.0076495204120874405,
+        -0.006090708542615175,
+        -0.016521835699677467,
+        0.009218892082571983,
+        0.030833140015602112,
+        -0.0002345978282392025,
+        0.03332215175032616,
+        0.0030349211301654577,
+        0.009600857272744179,
+        0.05706647038459778,
+        0.06095677986741066,
+        -0.016137542203068733,
+        0.03195658698678017,
+        0.13535599410533905,
+        0.28229761123657227,
+        0.4573267698287964,
+        0.39102476835250854,
+        0.17547546327114105,
+        0.005337159149348736,
+        -0.07699840515851974,
+        -0.12667469680309296,
+        -0.16613735258579254,
+        -0.2908898890018463,
+        -0.44942277669906616,
+        -0.34229782223701477,
+        -0.16225378215312958,
+        -0.1100199744105339,
+        -0.4044281840324402,
+        -0.9058251976966858,
+        -1.1549302339553833,
+        -0.7502554059028625,
+        -0.2716369032859802,
+        -0.13495275378227234,
+        0.08614412695169449,
+        0.3164423108100891,
+        0.7155097723007202,
+        1.0356683731079102,
+        0.7939887642860413,
+        0.39567017555236816,
+        0.16957539319992065,
+        0.02675812318921089,
+        0.048314403742551804,
+        0.053107086569070816,
+        -0.009243623353540897,
+        -0.011442561633884907,
+        0.004911235999315977,
+        0.012210517190396786,
+        0.006660772021859884,
+        -0.004562888294458389,
+        -0.009606098756194115,
+        -0.01610635593533516,
+        -0.03475078567862511,
+        0.007796770427376032,
+        0.02015513926744461,
+        0.020311446860432625,
+        0.009043446741998196,
+        -0.01929326355457306,
+        -0.04183953255414963,
+        -0.003052672604098916,
+        0.020744286477565765,
+        0.01371331699192524,
+        0.004048139322549105,
+        0.0692848190665245,
+        0.16867054998874664,
+        0.2799474000930786,
+        0.28119951486587524,
+        0.13579942286014557,
+        -0.0015732255997136235,
+        -0.05406518653035164,
+        -0.05831173434853554,
+        -0.034435681998729706,
+        -0.11925295740365982,
+        -0.2570647895336151,
+        -0.19120880961418152,
+        -0.09981344640254974,
+        -0.011702792719006538,
+        -0.22477947175502777,
+        -0.5395713448524475,
+        -0.7111374139785767,
+        -0.4207299053668976,
+        -0.11811137199401855,
+        -0.035199034959077835,
+        0.024358956143260002,
+        0.16262274980545044,
+        0.46769100427627563,
+        0.677872896194458,
+        0.4637402892112732,
+        0.15558630228042603,
+        0.04467496648430824,
+        0.03221412003040314,
+        0.02430277317762375,
+        -0.006398700177669525,
+        -0.07235423475503922,
+        -0.03669704124331474,
+        -0.000992153538390994,
+        0.02220241352915764,
+        -0.03329842537641525,
+        0.05199713259935379,
+        -0.14053553342819214,
+        0.1906905472278595,
+        -0.13544943928718567,
+        0.08535720407962799,
+        -0.009813228622078896,
+        0.03578176349401474,
+        -0.05863757058978081,
+        0.33848440647125244,
+        -0.49837300181388855,
+        0.15308170020580292,
+        0.14865124225616455,
+        -0.12349266558885574,
+        -0.025796135887503624,
+        0.17790427803993225,
+        -0.7813658714294434,
+        0.853188693523407,
+        0.2489670068025589,
+        -0.7378701567649841,
+        0.2207188457250595,
+        0.05207442864775658,
+        -0.4280349314212799,
+        1.1408430337905884,
+        -0.24505679309368134,
+        -1.5490919351577759,
+        1.4560288190841675,
+        -0.31143030524253845,
+        -0.03536878153681755,
+        0.5640448331832886,
+        -0.6874421834945679,
+        -1.210310697555542,
+        2.6637399196624756,
+        -1.6589887142181396,
+        0.2221546173095703,
+        0.10179737955331802,
+        -0.4354941248893738,
+        0.034149203449487686,
+        1.480568528175354,
+        -2.072199821472168,
+        0.9205833673477173,
+        0.021510563790798187,
+        -0.07755836099386215,
+        0.17983688414096832,
+        0.040537625551223755,
+        -0.5325585603713989,
+        0.550999641418457,
+        -0.11060550063848495,
+        -0.09052976220846176,
+        -0.048361390829086304,
+        0.03450514376163483,
+        -0.11854307353496552,
+        0.23462797701358795,
+        -0.17563995718955994,
+        0.0653814822435379,
+        -0.009748813696205616,
+        0.07013920694589615,
+        -0.08628369867801666,
+        0.3019683063030243,
+        -0.630340576171875,
+        0.274477481842041,
+        0.15417183935642242,
+        -0.036220982670784,
+        -0.07344137132167816,
+        0.2339126616716385,
+        -1.0395091772079468,
+        1.2002928256988525,
+        0.085142120718956,
+        -0.7080597281455994,
+        0.23101751506328583,
+        0.016307154670357704,
+        -0.45877355337142944,
+        1.617128849029541,
+        -0.6593433618545532,
+        -1.8957709074020386,
+        1.746606469154358,
+        -0.37062564492225647,
+        0.01213759370148182,
+        0.5851964354515076,
+        -1.0307577848434448,
+        -1.4803766012191772,
+        3.812014102935791,
+        -2.0028398036956787,
+        0.12008816003799438,
+        0.01813559979200363,
+        -0.5065457820892334,
+        0.17598780989646912,
+        2.0418734550476074,
+        -2.680522918701172,
+        0.7466094493865967,
+        0.16271913051605225,
+        -0.04379571974277496,
+        0.21930621564388275,
+        0.041255541145801544,
+        -0.6644601821899414,
+        0.481300413608551,
+        0.05410065874457359,
+        -0.09025495499372482,
+        0.01954805478453636,
+        0.01899997517466545,
+        -0.1337241530418396,
+        0.19821906089782715,
+        -0.06395180523395538,
+        -0.03586877882480621,
+        0.01973363384604454,
+        0.013873124495148659,
+        -0.09288538247346878,
+        0.4300728440284729,
+        -0.4235192537307739,
+        0.03646458685398102,
+        0.10077393800020218,
+        -0.07569073140621185,
+        -0.08176662772893906,
+        0.3834531605243683,
+        -0.747482419013977,
+        0.4493187367916107,
+        0.2960513234138489,
+        -0.5245057344436646,
+        0.27831950783729553,
+        0.0731748417019844,
+        -0.45574328303337097,
+        0.6987965703010559,
+        0.019539732486009598,
+        -1.1160184144973755,
+        1.0756875276565552,
+        -0.3804619312286377,
+        -0.040626902133226395,
+        0.2780243456363678,
+        -0.32946258783340454,
+        -0.8122196793556213,
+        1.9535348415374756,
+        -1.300661563873291,
+        0.3443142771720886,
+        0.04858396574854851,
+        -0.17409801483154297,
+        -0.07783844321966171,
+        1.0875797271728516,
+        -1.5148566961288452,
+        0.8014272451400757,
+        -0.19643208384513855,
+        -0.033590562641620636,
+        0.11178025603294373,
+        0.08284300565719604,
+        -0.5165408849716187,
+        0.5841389894485474,
+        -0.24739950895309448,
+        0.027926180511713028,
+        -0.028708497062325478,
+        0.0037401756271719933,
+        -0.0047450135461986065,
+        0.008427698165178299,
+        0.009801353327929974,
+        -0.0029346586670726538,
+        -0.010193527676165104,
+        0.014876358211040497,
+        0.009861295111477375,
+        -0.005554665345698595,
+        -0.06270359456539154,
+        -0.0316256619989872,
+        0.006706684362143278,
+        0.04316525161266327,
+        0.008637072518467903,
+        -0.03666357323527336,
+        -0.0719730481505394,
+        -0.1525861918926239,
+        -0.14396126568317413,
+        -0.05387119948863983,
+        0.01955549605190754,
+        0.007112634833902121,
+        -0.05175568535923958,
+        -0.16772602498531342,
+        -0.20807777345180511,
+        -0.18768996000289917,
+        -0.17093753814697266,
+        -0.03334345668554306,
+        0.0011808606795966625,
+        -0.01579100452363491,
+        -0.12589050829410553,
+        -0.17219413816928864,
+        -0.19648219645023346,
+        -0.21980451047420502,
+        -0.04920821264386177,
+        0.0012217299081385136,
+        0.023885242640972137,
+        -0.056074876338243484,
+        -0.13907776772975922,
+        -0.19139252603054047,
+        -0.13652737438678741,
+        -0.0027339402586221695,
+        0.004720518831163645,
+        -0.00037206560955382884,
+        0.017924504354596138,
+        -0.02118082158267498,
+        -0.06553903222084045,
+        -0.0435921773314476,
+        0.02721239998936653,
+        0.020702000707387924,
+        0.024033410474658012,
+        0.005382229574024677,
+        -0.01273527555167675,
+        -0.01742861233651638,
+        0.007402990944683552,
+        0.010333286598324776,
+        0.02598601020872593,
+        0.012456837110221386,
+        -0.03471057116985321,
+        -0.10051856189966202,
+        -0.08084382116794586,
+        -0.023420603945851326,
+        0.031205907464027405,
+        0.00424322672188282,
+        -0.03734385594725609,
+        -0.1152661070227623,
+        -0.2012551724910736,
+        -0.1995576024055481,
+        -0.07972321659326553,
+        -0.011126434430480003,
+        -0.0185835100710392,
+        -0.06944561004638672,
+        -0.21481844782829285,
+        -0.26795628666877747,
+        -0.24916253983974457,
+        -0.17833945155143738,
+        -0.06658200174570084,
+        -0.00305415247566998,
+        -0.054028186947107315,
+        -0.19072681665420532,
+        -0.256619930267334,
+        -0.26868295669555664,
+        -0.21621295809745789,
+        -0.06564134359359741,
+        0.0031192339956760406,
+        0.013205861672759056,
+        -0.08044812828302383,
+        -0.18137820065021515,
+        -0.23007699847221375,
+        -0.13054916262626648,
+        -0.01135951280593872,
+        0.013734308071434498,
+        0.010981118306517601,
+        -0.02249351143836975,
+        -0.05804377421736717,
+        -0.10652261227369308,
+        -0.04163172468543053,
+        0.017101088538765907,
+        -0.028687385842204094,
+        -0.0019976652693003416,
+        0.009987232275307178,
+        0.010130539536476135,
+        0.0015575449215248227,
+        -0.000983694102615118,
+        -0.012845008634030819,
+        0.01329281460493803,
+        0.0029350779950618744,
+        -0.003755913581699133,
+        -0.036475058645009995,
+        -0.0245466697961092,
+        -0.0020879909861832857,
+        0.025867130607366562,
+        -0.0065954397432506084,
+        0.008656582795083523,
+        -0.04037104919552803,
+        -0.11718368530273438,
+        -0.13506115972995758,
+        -0.024255141615867615,
+        0.014097613282501698,
+        -0.0009370348998345435,
+        -0.010953565128147602,
+        -0.12869219481945038,
+        -0.18789908289909363,
+        -0.19098156690597534,
+        -0.12795749306678772,
+        -0.002666366985067725,
+        -0.004907527007162571,
+        -0.014610078185796738,
+        -0.11913872510194778,
+        -0.19921070337295532,
+        -0.21869640052318573,
+        -0.1849898099899292,
+        -0.03470952808856964,
+        0.0064156935550272465,
+        0.03401843458414078,
+        -0.04000416398048401,
+        -0.12354391813278198,
+        -0.16908879578113556,
+        -0.10385500639677048,
+        0.002833302365615964,
+        -0.036176733672618866,
+        -0.001048827893100679,
+        0.010002595372498035,
+        -0.020798830315470695,
+        -0.0488261841237545,
+        -0.002972641494125128,
+        0.016395021229982376,
+        -0.045770127326250076,
+        -0.12710650265216827,
+        -0.1637774109840393,
+        -0.1411965787410736,
+        0.20447289943695068,
+        0.509396493434906,
+        0.07264503091573715,
+        0.12041529268026352,
+        -0.015143441036343575,
+        -0.2673257887363434,
+        -0.3589763641357422,
+        0.11289574205875397,
+        0.8517020344734192,
+        0.7068799138069153,
+        0.067301444709301,
+        -0.02102830447256565,
+        -0.5235708355903625,
+        -1.2064802646636963,
+        -0.856619656085968,
+        0.26774707436561584,
+        0.6825867295265198,
+        0.13516077399253845,
+        0.3054035007953644,
+        -0.0727991834282875,
+        -1.4912222623825073,
+        -1.906838297843933,
+        -0.8574200868606567,
+        -0.15282419323921204,
+        0.39327505230903625,
+        0.9758505821228027,
+        1.2323224544525146,
+        0.18179064989089966,
+        -0.947610080242157,
+        -0.6657719016075134,
+        -0.19935055077075958,
+        -0.09150458872318268,
+        0.34379544854164124,
+        1.2025749683380127,
+        0.9517407417297363,
+        -0.12023784220218658,
+        -0.3146151900291443,
+        -0.1049022302031517,
+        -0.34867578744888306,
+        -0.32945582270622253,
+        0.28920575976371765,
+        0.7844374179840088,
+        0.35520124435424805,
+        0.007452746387571096,
+        0.018862545490264893,
+        -0.0021927610505372286,
+        0.0321974977850914,
+        0.05439181253314018,
+        -0.030729038640856743,
+        -0.03517322614789009,
+        -0.037830010056495667,
+        -0.056672073900699615,
+        -0.017769837751984596,
+        0.06385952979326248,
+        0.08161566406488419,
+        0.07809178531169891,
+        0.06333671510219574,
+        -0.036322008818387985,
+        -0.06432312726974487,
+        -0.03629852458834648,
+        0.010879911482334137,
+        0.088901087641716,
+        0.0021402277052402496,
+        0.09618857502937317,
+        0.02661084569990635,
+        -0.03414442762732506,
+        -0.08736730366945267,
+        -0.048222169280052185,
+        0.03507986292243004,
+        -0.053828027099370956,
+        0.006044292356818914,
+        0.04232194274663925,
+        0.001624415279366076,
+        -0.028371643275022507,
+        -0.08724038302898407,
+        -0.005835397634655237,
+        0.01057528518140316,
+        0.04210871085524559,
+        0.06106603890657425,
+        0.04250370338559151,
+        0.0028668276499956846,
+        -0.07583706080913544,
+        -0.06849333643913269,
+        -0.08538331836462021,
+        -0.021475542336702347,
+        0.044341571629047394,
+        0.03604369983077049,
+        0.05146002024412155,
+        0.00280605535954237,
+        -0.004615028854459524,
+        -0.07857430726289749,
+        -0.03716180846095085,
+        0.010876243002712727,
+        -0.03418488800525665,
+        0.007391764782369137,
+        0.05969953536987305,
+        0.08769611269235611,
+        0.066011443734169,
+        -0.10404568910598755,
+        -0.27194535732269287,
+        -0.05224551260471344,
+        -0.03618992492556572,
+        -0.023098375648260117,
+        0.13832588493824005,
+        0.21510572731494904,
+        -0.07285867631435394,
+        -0.489085853099823,
+        -0.33285844326019287,
+        -0.04830349236726761,
+        0.014211038127541542,
+        0.2612524926662445,
+        0.6911754608154297,
+        0.5294638276100159,
+        -0.2706173360347748,
+        -0.39350029826164246,
+        -0.05156399682164192,
+        -0.16490484774112701,
+        0.1161464974284172,
+        0.8029336929321289,
+        1.1809980869293213,
+        0.5025736689567566,
+        0.07084998488426208,
+        -0.1901131123304367,
+        -0.4918227195739746,
+        -0.603122889995575,
+        -0.09460704773664474,
+        0.5786081552505493,
+        0.35392242670059204,
+        0.1328991800546646,
+        -0.008106965571641922,
+        -0.2159435749053955,
+        -0.6369062662124634,
+        -0.5241336822509766,
+        0.06276796758174896,
+        0.1139409989118576,
+        0.05483332276344299,
+        0.1703934520483017,
+        0.14603517949581146,
+        -0.16187912225723267,
+        -0.4139055907726288,
+        -0.14918148517608643,
+        -0.06163417547941208,
+        0.005302567034959793,
+        0.015524876303970814,
+        -0.11895350366830826,
+        -0.19724233448505402,
+        0.03412429615855217,
+        0.10862118750810623,
+        0.08550503104925156,
+        -0.008599682711064816,
+        -0.03031114675104618,
+        -0.33224624395370483,
+        -0.27994298934936523,
+        0.196475550532341,
+        0.31109708547592163,
+        0.17151644825935364,
+        -0.04994147643446922,
+        -0.167176753282547,
+        -0.5247878432273865,
+        -0.21136601269245148,
+        0.54701828956604,
+        0.6110883951187134,
+        0.04194486886262894,
+        -0.27640673518180847,
+        -0.0795169249176979,
+        -0.360530287027359,
+        0.3472684621810913,
+        1.5428175926208496,
+        1.0249378681182861,
+        -0.2724844515323639,
+        -0.3013695478439331,
+        0.020736562088131905,
+        -0.019495302811264992,
+        0.7758124470710754,
+        1.5381159782409668,
+        0.028625331819057465,
+        -1.289720892906189,
+        -0.5894255638122559,
+        0.0526396706700325,
+        0.11443997919559479,
+        0.5935031771659851,
+        0.47169724106788635,
+        -1.2507063150405884,
+        -1.351940631866455,
+        -0.03894977271556854,
+        0.05095001682639122,
+        0.01581231690943241,
+        0.11137383431196213,
+        -0.22327138483524323,
+        -0.9629225730895996,
+        -0.2607772946357727,
+        0.5907121300697327,
+        0.006906076334416866,
+        0.002633580705150962,
+        0.01940075121819973,
+        0.0143396882340312,
+        0.020781584084033966,
+        -0.07249777764081955,
+        -0.016355905681848526,
+        0.016553230583667755,
+        -0.027528395876288414,
+        0.0244428887963295,
+        0.024910561740398407,
+        0.027229825034737587,
+        -0.04104151204228401,
+        0.007100561633706093,
+        0.0157785601913929,
+        -0.06626633554697037,
+        0.006520191207528114,
+        0.021171070635318756,
+        0.036674920469522476,
+        -0.06950324773788452,
+        -0.03003627620637417,
+        2.178798422391992e-05,
+        -0.07278106361627579,
+        0.014382920227944851,
+        0.0982266515493393,
+        0.1454961597919464,
+        -0.10096189379692078,
+        0.022237209603190422,
+        -0.00040665315464138985,
+        -0.013766243122518063,
+        0.06440296769142151,
+        0.21751047670841217,
+        0.02519127167761326,
+        -0.23383572697639465,
+        0.0038903038948774338,
+        -0.042271602898836136,
+        -0.012596859596669674,
+        0.023778460919857025,
+        0.07685687392950058,
+        -0.21480663120746613,
+        -0.19205358624458313,
+        0.04876565560698509,
+        -0.016765035688877106,
+        -0.02620583213865757,
+        0.01641852967441082,
+        0.02201787941157818,
+        -0.07457322627305984,
+        -0.003633625339716673,
+        0.07550841569900513,
+        0.024774253368377686,
+        0.04710151255130768,
+        0.09110233932733536,
+        -0.017366377636790276,
+        -0.04366954043507576,
+        -0.039786458015441895,
+        0.005311290733516216,
+        0.037867460399866104,
+        0.05367766693234444,
+        0.07434491813182831,
+        -0.07251215726137161,
+        -0.04231821000576019,
+        -0.023427855223417282,
+        0.036294277757406235,
+        0.07782749086618423,
+        0.11835407465696335,
+        0.08753973245620728,
+        -0.20742319524288177,
+        -0.13341759145259857,
+        -0.008225077763199806,
+        0.07292432337999344,
+        0.006392402108758688,
+        0.021914338693022728,
+        -0.09218581020832062,
+        -0.44192466139793396,
+        -0.1744878888130188,
+        0.014938815496861935,
+        0.10678526759147644,
+        -0.012087192386388779,
+        -0.024533385410904884,
+        -0.1804407387971878,
+        -0.3253834545612335,
+        0.040678758174180984,
+        0.2011708915233612,
+        0.17262929677963257,
+        -0.0045212251134216785,
+        -0.033313386142253876,
+        -0.10575363039970398,
+        -0.07636692374944687,
+        0.20343273878097534,
+        0.28330928087234497,
+        0.043149981647729874,
+        -0.01109551265835762,
+        -0.0027725452091544867,
+        0.003926735837012529,
+        0.029440222308039665,
+        0.23945140838623047,
+        0.09122566133737564,
+        -0.15140119194984436,
+        0.08737201988697052,
+        0.07120998948812485,
+        0.05722665786743164,
+        -0.04388495534658432,
+        0.02116825245320797,
+        0.023315919563174248,
+        0.10898162424564362,
+        0.11808467656373978,
+        0.03412344306707382,
+        0.002771642990410328,
+        -0.1959579437971115,
+        -0.05181330814957619,
+        -0.0044630044139921665,
+        0.12481725960969925,
+        0.09140311926603317,
+        0.03444851189851761,
+        -0.10931172221899033,
+        -0.3204459846019745,
+        -0.21193139255046844,
+        -0.11101037263870239,
+        0.04186606407165527,
+        -0.07420916110277176,
+        -0.2004990428686142,
+        -0.26937955617904663,
+        -0.12928874790668488,
+        0.20819628238677979,
+        -0.17379426956176758,
+        -0.2181481271982193,
+        0.005387924611568451,
+        -0.24132733047008514,
+        -0.23942433297634125,
+        0.41489261388778687,
+        1.0702778100967407,
+        0.024913936853408813,
+        -0.28405970335006714,
+        0.083008773624897,
+        -0.11059781163930893,
+        -0.17623695731163025,
+        -0.17386195063591003,
+        0.010644182562828064,
+        -0.32716259360313416,
+        -0.2135595828294754,
+        0.1223129853606224,
+        0.07060510665178299,
+        -0.048680394887924194,
+        -0.3332099914550781,
+        -0.25886017084121704,
+        -0.18619979918003082,
+        -0.00733158877119422,
+        0.03393476828932762,
+        -0.010564662516117096,
+        -0.01817108877003193,
+        -0.05650597810745239,
+        -0.01891104131937027,
+        -0.0554141066968441,
+        -0.004592927638441324,
+        -0.0013615720672532916,
+        -0.05552899092435837,
+        -0.0560498908162117,
+        -0.1080632209777832,
+        -0.013965745456516743,
+        -0.03290533646941185,
+        -0.02599845454096794,
+        -0.02877708151936531,
+        -0.05670137703418732,
+        -0.07158109545707703,
+        -0.08808472007513046,
+        -0.03919175639748573,
+        -0.08478893339633942,
+        -0.08045543730258942,
+        -0.10066724568605423,
+        -0.048338882625103,
+        -0.06750114262104034,
+        0.08164039999246597,
+        0.3343777060508728,
+        0.004952755756676197,
+        -0.14891156554222107,
+        0.032855477184057236,
+        -0.03277512267231941,
+        0.0474768728017807,
+        0.6316664814949036,
+        1.2214386463165283,
+        0.2548498213291168,
+        -0.13185030221939087,
+        -0.018188906833529472,
+        -0.07653989642858505,
+        -0.01643386110663414,
+        0.06630122661590576,
+        0.23864209651947021,
+        -0.013703612610697746,
+        -0.09347789734601974,
+        -0.0900193303823471,
+        -0.04930814355611801,
+        -0.02791711315512657,
+        -0.15441712737083435,
+        -0.01623091846704483,
+        -0.0447690524160862,
+        -0.06071227043867111,
+        -0.04737209901213646,
+        -0.059769801795482635,
+        -0.04375007003545761,
+        -0.00650476710870862,
+        0.021540174260735512,
+        -0.05590728670358658,
+        -0.13030850887298584,
+        -0.022067781537771225,
+        -0.05066747963428497,
+        0.00609770929440856,
+        0.108611099421978,
+        0.1621929407119751,
+        0.05232185125350952,
+        -0.049729123711586,
+        -0.11906369775533676,
+        -0.030973592773079872,
+        0.057787079364061356,
+        0.1610448956489563,
+        0.18756121397018433,
+        0.07277501374483109,
+        -0.05777435004711151,
+        -0.05227195844054222,
+        0.14434091746807098,
+        0.1889694482088089,
+        0.26951169967651367,
+        0.4710105359554291,
+        0.2164669781923294,
+        0.05052375793457031,
+        -0.0038236663676798344,
+        0.20267778635025024,
+        0.31214746832847595,
+        0.7506387829780579,
+        1.2302387952804565,
+        0.4363090693950653,
+        0.16759593784809113,
+        -0.049752235412597656,
+        0.044786907732486725,
+        0.14537742733955383,
+        0.2227499932050705,
+        0.37362414598464966,
+        0.16590620577335358,
+        0.0864599421620369,
+        -0.14058542251586914,
+        -0.04404178634285927,
+        -0.0325944609940052,
+        -0.019113417714834213,
+        0.17414243519306183,
+        0.11160623282194138,
+        -0.034911543130874634,
+        0.1523953527212143,
+        0.04554234445095062,
+        -0.054958827793598175,
+        -0.11794494092464447,
+        -0.19570015370845795,
+        -0.21358126401901245,
+        -0.1885669231414795,
+        -0.08286706358194351,
+        -0.29818814992904663,
+        -0.52330082654953,
+        -0.6190353631973267,
+        -0.682529091835022,
+        -0.6171367764472961,
+        -0.4793100655078888,
+        -0.11180876195430756,
+        -0.3490432798862457,
+        -0.5531057715415955,
+        -0.6426181793212891,
+        -0.6420838832855225,
+        -0.4970071613788605,
+        -0.27038174867630005,
+        -0.09740017354488373,
+        -0.1929621547460556,
+        -0.30848363041877747,
+        -0.27204805612564087,
+        -0.2515120208263397,
+        -0.07497832179069519,
+        0.03551386669278145,
+        -0.05060403421521187,
+        0.08276989310979843,
+        0.14321963489055634,
+        0.3583574593067169,
+        0.40667927265167236,
+        0.39398193359375,
+        0.27561235427856445,
+        0.005085935816168785,
+        0.2793635427951813,
+        0.48155927658081055,
+        0.7088037729263306,
+        0.7394692897796631,
+        0.6158861517906189,
+        0.3986552655696869,
+        0.025508087128400803,
+        0.38533228635787964,
+        0.5305332541465759,
+        0.6659612059593201,
+        0.6396889090538025,
+        0.5396444797515869,
+        0.39010515809059143,
+        -0.03072960674762726,
+        0.014305810444056988,
+        0.029885446652770042,
+        0.038084372878074646,
+        0.012448564171791077,
+        0.034353457391262054,
+        0.048626724630594254,
+        0.048866890370845795,
+        0.07561437785625458,
+        0.09152165800333023,
+        0.08432324975728989,
+        0.09332144260406494,
+        0.07517607510089874,
+        0.049146559089422226,
+        0.03146318346261978,
+        0.06335246562957764,
+        0.06438779830932617,
+        0.06851581484079361,
+        0.09263566881418228,
+        0.06460423022508621,
+        0.011992924846708775,
+        0.03396693989634514,
+        0.04433950409293175,
+        0.04642309248447418,
+        0.0022602551616728306,
+        -0.0361824594438076,
+        -0.0005105047021061182,
+        0.030808264389634132,
+        0.0022333709057420492,
+        -0.017826544120907784,
+        -0.03796307370066643,
+        -0.012887164019048214,
+        -0.028499294072389603,
+        -0.03367336839437485,
+        -0.03668365254998207,
+        -0.02807682938873768,
+        -0.07444571703672409,
+        -0.081318199634552,
+        -0.09610070288181305,
+        -0.05368436127901077,
+        -0.09006591141223907,
+        -0.10038736462593079,
+        -0.04115951433777809,
+        -0.056811004877090454,
+        -0.09935522079467773,
+        -0.11107856035232544,
+        -0.07852742075920105,
+        -0.0942930206656456,
+        -0.07625897973775864,
+        -0.12966541945934296,
+        -0.038938648998737335,
+        0.04580259323120117,
+        0.10179819911718369,
+        0.17127273976802826,
+        0.17857632040977478,
+        0.13426578044891357,
+        0.04687841981649399,
+        0.2424812912940979,
+        0.42633309960365295,
+        0.5291624069213867,
+        0.6012980937957764,
+        0.5449428558349609,
+        0.3945220708847046,
+        0.07037744671106339,
+        0.26918724179267883,
+        0.44614800810813904,
+        0.5331310629844666,
+        0.568580687046051,
+        0.43367546796798706,
+        0.25516101717948914,
+        0.08428427577018738,
+        0.177769735455513,
+        0.24885930120944977,
+        0.2178547978401184,
+        0.13834305107593536,
+        0.07452446967363358,
+        0.005187708884477615,
+        0.050621017813682556,
+        -0.08428733795881271,
+        -0.15576106309890747,
+        -0.25531095266342163,
+        -0.34646397829055786,
+        -0.3276817202568054,
+        -0.24377694725990295,
+        0.02817704901099205,
+        -0.2531633675098419,
+        -0.3907041549682617,
+        -0.5944734811782837,
+        -0.6062930822372437,
+        -0.5171639919281006,
+        -0.3501560389995575,
+        -0.019397703930735588,
+        -0.2758809030056,
+        -0.4118667244911194,
+        -0.5375933051109314,
+        -0.5525977611541748,
+        -0.44681206345558167,
+        -0.2748269736766815,
+        -0.04229651764035225,
+        -0.005005967803299427,
+        -0.011332424357533455,
+        0.011387092061340809,
+        -0.015463154762983322,
+        -0.012038768269121647,
+        0.011360889300704002,
+        0.03551746904850006,
+        0.05123865604400635,
+        0.020377267152071,
+        0.1065637394785881,
+        0.18875306844711304,
+        0.18516196310520172,
+        0.12519532442092896,
+        -0.042940977960824966,
+        -0.03246130794286728,
+        -0.016645772382616997,
+        0.07807288318872452,
+        -0.7815885543823242,
+        -0.5930942296981812,
+        0.03312799707055092,
+        -0.04537777230143547,
+        -0.022234303876757622,
+        0.009241255931556225,
+        0.16947965323925018,
+        -0.0700032040476799,
+        -0.06346366554498672,
+        0.09555318206548691,
+        0.02858082763850689,
+        0.009246457368135452,
+        0.03902693837881088,
+        0.007071994710713625,
+        0.10085106641054153,
+        0.0881502702832222,
+        0.011019160971045494,
+        0.006030070595443249,
+        -0.012882355600595474,
+        -0.01701420359313488,
+        0.022596944123506546,
+        -0.05345382168889046,
+        0.02355102449655533,
+        -0.0091088330373168,
+        0.00015542628534603864,
+        -0.0004997836658731103,
+        -0.006951311603188515,
+        0.01267238613218069,
+        -0.0033983420580625534,
+        -0.0030770134180784225,
+        0.02975126914680004,
+        0.010702245868742466,
+        -0.016947058960795403,
+        0.007774800062179565,
+        0.09566964209079742,
+        0.07426714897155762,
+        0.1621979922056198,
+        0.12728945910930634,
+        0.06112523376941681,
+        0.06061968579888344,
+        0.07934501022100449,
+        0.11534841358661652,
+        0.10001469403505325,
+        0.15475066006183624,
+        0.1828109323978424,
+        0.02134544588625431,
+        -0.015320047736167908,
+        0.012000483460724354,
+        -0.014393450692296028,
+        -1.5520576238632202,
+        -1.2115217447280884,
+        0.017239907756447792,
+        -0.007013735361397266,
+        0.0019166347337886691,
+        0.025112343952059746,
+        0.1803419440984726,
+        -0.30807924270629883,
+        -0.33957329392433167,
+        0.10846519470214844,
+        0.06151076406240463,
+        0.054799750447273254,
+        0.06235412135720253,
+        0.09605015069246292,
+        0.16495031118392944,
+        0.12624189257621765,
+        0.12234552949666977,
+        0.006969878450036049,
+        0.0033541936427354813,
+        0.008165130391716957,
+        0.035377491265535355,
+        -0.03170061111450195,
+        0.019396571442484856,
+        -0.011411413550376892,
+        0.019043665379285812,
+        0.00957057997584343,
+        0.0055394587107002735,
+        0.05569477006793022,
+        0.0076510305516421795,
+        0.018707536160945892,
+        0.06073765829205513,
+        0.006503407843410969,
+        -0.0058801183477044106,
+        -0.03229741007089615,
+        0.0386439748108387,
+        0.03167358413338661,
+        0.027749545872211456,
+        -0.04634377732872963,
+        -0.00019781991431955248,
+        0.024982664734125137,
+        0.009453915059566498,
+        0.1091528981924057,
+        0.21055325865745544,
+        0.23810525238513947,
+        0.13829846680164337,
+        -0.019112061709165573,
+        -0.0014926757430657744,
+        0.01856786385178566,
+        0.10649964213371277,
+        -0.8599057793617249,
+        -0.6383436322212219,
+        0.10839059948921204,
+        -0.038730181753635406,
+        -0.030203847214579582,
+        -0.033147793263196945,
+        0.18132103979587555,
+        -0.1427767276763916,
+        -0.11132896691560745,
+        0.10957232862710953,
+        -0.00349965482018888,
+        0.03486581891775131,
+        0.016247740015387535,
+        0.060106489807367325,
+        0.1439678966999054,
+        0.07201634347438812,
+        0.07603273540735245,
+        -0.0072280303575098515,
+        0.01600506529211998,
+        -0.012912745587527752,
+        0.015192546881735325,
+        -0.034853674471378326,
+        0.026164958253502846,
+        0.001483929343521595,
+        0.0508253313601017,
+        -0.010546445846557617,
+        -0.024398569017648697,
+        -0.0043407524935901165,
+        0.0030393539927899837,
+        -0.009643012657761574,
+        -0.008882591500878334,
+        0.01182172168046236,
+        0.003359999740496278,
+        -0.01145304087549448,
+        -7.34154018573463e-05,
+        0.007416137028485537,
+        -0.012022661976516247,
+        0.013550116680562496,
+        -0.005982181057333946,
+        -0.019205773249268532,
+        -0.0811527743935585,
+        -0.06323252618312836,
+        -0.026379290968179703,
+        -0.04671972244977951,
+        -0.006205265875905752,
+        0.05242094770073891,
+        0.05065605416893959,
+        0.01961991749703884,
+        0.021542323753237724,
+        0.04147094115614891,
+        0.04451332613825798,
+        0.05155060812830925,
+        0.15659169852733612,
+        0.4448348879814148,
+        0.7207449078559875,
+        0.8680058717727661,
+        0.7269517779350281,
+        0.36259666085243225,
+        0.10394725203514099,
+        -0.20449180901050568,
+        -0.42664405703544617,
+        -0.7290332317352295,
+        -0.9376083016395569,
+        -0.735107958316803,
+        -0.3541502356529236,
+        -0.23789332807064056,
+        -0.10901623964309692,
+        -0.26809337735176086,
+        -0.38465574383735657,
+        -0.44440212845802307,
+        -0.4070444703102112,
+        -0.22405119240283966,
+        -0.14190013706684113,
+        0.07151509076356888,
+        0.21848519146442413,
+        0.41893038153648376,
+        0.4783499836921692,
+        0.4281534254550934,
+        0.28631147742271423,
+        0.057699400931596756,
+        0.0029010034631937742,
+        -0.02580493874847889,
+        -0.02152368798851967,
+        -0.025850815698504448,
+        0.004789783153682947,
+        0.021941278129816055,
+        0.00574735039845109,
+        -0.004016151186078787,
+        -0.014377521350979805,
+        -0.0828985944390297,
+        -0.06380187720060349,
+        -0.048879947513341904,
+        -0.04580164700746536,
+        -0.030843649059534073,
+        0.024663949385285378,
+        0.03409295156598091,
+        0.060452476143836975,
+        0.037006158381700516,
+        0.058853648602962494,
+        0.07275765389204025,
+        0.02882941998541355,
+        0.14549848437309265,
+        0.4268765151500702,
+        0.7150183320045471,
+        0.8942612409591675,
+        0.7532845139503479,
+        0.3846176564693451,
+        0.15604183077812195,
+        -0.19108416140079498,
+        -0.42633384466171265,
+        -0.7508237361907959,
+        -0.9448286890983582,
+        -0.719300389289856,
+        -0.3583783805370331,
+        -0.2060524821281433,
+        -0.10382426530122757,
+        -0.2624296545982361,
+        -0.4049411416053772,
+        -0.4338999092578888,
+        -0.41390693187713623,
+        -0.22797809541225433,
+        -0.14593803882598877,
+        0.08197329193353653,
+        0.2430788278579712,
+        0.3906225562095642,
+        0.47147202491760254,
+        0.42429792881011963,
+        0.29326340556144714,
+        0.06683206558227539,
+        0.004355552606284618,
+        -0.007973028346896172,
+        0.0035172239877283573,
+        -0.0018502225866541266,
+        -0.015291260555386543,
+        0.0025160792283713818,
+        0.0015979957534000278,
+        0.011951611377298832,
+        -0.0004334237310104072,
+        -0.00172338483389467,
+        0.017284434288740158,
+        -0.00445173867046833,
+        -0.004828867502510548,
+        0.004030159674584866,
+        0.03321678191423416,
+        -0.016998661682009697,
+        -0.029765218496322632,
+        -0.07912255078554153,
+        -0.0494595468044281,
+        0.012136446312069893,
+        0.029541414231061935,
+        -0.01129366084933281,
+        0.09502168744802475,
+        0.21533286571502686,
+        0.3453419804573059,
+        0.22987395524978638,
+        0.04720258712768555,
+        0.0032486498821526766,
+        -0.0042808204889297485,
+        -0.10162857174873352,
+        -0.21601493656635284,
+        -0.3040534257888794,
+        -0.19600912928581238,
+        -0.0568307563662529,
+        -0.0062937624752521515,
+        -0.021828925237059593,
+        -0.03831009939312935,
+        -0.08992031216621399,
+        -0.08103442937135696,
+        -0.07600760459899902,
+        -0.02319694682955742,
+        -0.008472982794046402,
+        -0.004151565954089165,
+        0.05002164468169212,
+        0.0985124409198761,
+        0.11273156106472015,
+        0.10279814153909683,
+        0.032678257673978806,
+        -0.023295480757951736,
+        -0.022312145680189133,
+        0.032877422869205475,
+        0.08301658928394318,
+        -0.049675002694129944,
+        -0.05956050381064415,
+        0.006878976244479418,
+        0.011597251519560814,
+        -0.03617611899971962,
+        -0.005020621232688427,
+        0.0066283573396503925,
+        0.061849869787693024,
+        0.0668889507651329,
+        -0.1120104044675827,
+        0.0215831957757473,
+        -0.008177083916962147,
+        0.019240612164139748,
+        -0.03794482350349426,
+        -0.21581093966960907,
+        0.3248063623905182,
+        0.0525924488902092,
+        -0.13873063027858734,
+        -0.030904211103916168,
+        -0.004122832324355841,
+        0.2784009277820587,
+        -0.42068102955818176,
+        -0.15351417660713196,
+        0.4266241192817688,
+        -0.10780557245016098,
+        0.03840374946594238,
+        -0.15116721391677856,
+        0.2292502224445343,
+        0.23400554060935974,
+        -0.5023872256278992,
+        0.14868289232254028,
+        0.09809935092926025,
+        0.03480924293398857,
+        -0.046804867684841156,
+        -0.14212554693222046,
+        0.3073779344558716,
+        -0.029529480263590813,
+        -0.13998086750507355,
+        -0.02750661037862301,
+        0.010526027530431747,
+        0.032874979078769684,
+        -0.07645174115896225,
+        -0.02746269293129444,
+        0.10902399569749832,
+        -0.00446560001000762,
+        -0.01339190173894167,
+        0.003540819976478815,
+        -0.04410126060247421,
+        -0.10884726047515869,
+        0.016081949695944786,
+        0.15211890637874603,
+        0.04027504846453667,
+        -0.05552368983626366,
+        0.04718002676963806,
+        0.014503135345876217,
+        -0.2764658033847809,
+        -0.16068166494369507,
+        0.3356778621673584,
+        0.06485499441623688,
+        -0.07164154946804047,
+        0.084479421377182,
+        0.2702949047088623,
+        -0.1339409202337265,
+        -0.9642015695571899,
+        0.47433769702911377,
+        0.4715694189071655,
+        -0.17669782042503357,
+        -0.04434441775083542,
+        0.2641690671443939,
+        0.7357130646705627,
+        -1.2222046852111816,
+        -0.8205837607383728,
+        0.9091072678565979,
+        0.14896778762340546,
+        -0.09332367032766342,
+        -0.16173647344112396,
+        0.8782246708869934,
+        0.3819980323314667,
+        -1.619883418083191,
+        0.059255462139844894,
+        0.42745286226272583,
+        -0.03186821565032005,
+        -0.16420172154903412,
+        0.12124066799879074,
+        0.8650834560394287,
+        -0.3728218674659729,
+        -0.5816569328308105,
+        0.10949260741472244,
+        -0.010671291500329971,
+        -0.07903271913528442,
+        -0.09700250625610352,
+        0.3192030191421509,
+        0.2756008505821228,
+        -0.2616698145866394,
+        -0.11051242798566818,
+        0.016789941117167473,
+        -0.0484573096036911,
+        -0.12333080172538757,
+        0.0158428642898798,
+        0.11172449588775635,
+        0.014953864738345146,
+        -0.011746960692107677,
+        0.05310823395848274,
+        0.030244171619415283,
+        -0.23969320952892303,
+        -0.1039247065782547,
+        0.285805881023407,
+        -0.04652552306652069,
+        -0.05380000174045563,
+        0.05430186912417412,
+        0.25547218322753906,
+        -0.06164371967315674,
+        -0.7386756539344788,
+        0.4393811821937561,
+        0.2623714804649353,
+        -0.1849273294210434,
+        -0.049713607877492905,
+        0.1656467467546463,
+        0.6638666391372681,
+        -0.899787187576294,
+        -0.5747878551483154,
+        0.7465870976448059,
+        -0.025567445904016495,
+        -0.051771312952041626,
+        -0.19754628837108612,
+        0.6828271746635437,
+        0.4451557695865631,
+        -1.2559787034988403,
+        0.07448688894510269,
+        0.27905938029289246,
+        0.003908769693225622,
+        -0.18454433977603912,
+        -0.011183545924723148,
+        0.7449039816856384,
+        -0.228777676820755,
+        -0.47592073678970337,
+        0.13784541189670563,
+        0.019371675327420235,
+        -0.06424596160650253,
+        -0.1660400629043579,
+        0.2080633044242859,
+        0.2942465841770172,
+        -0.20263032615184784,
+        -0.0709841251373291,
+        -0.0021153483539819717,
+        -0.028180474415421486,
+        -0.021557176485657692,
+        0.012511649169027805,
+        0.06533018499612808,
+        0.006560645066201687,
+        -0.01908997632563114,
+        -0.020228691399097443,
+        0.10450740903615952,
+        0.04476405307650566,
+        -0.20389842987060547,
+        -0.36356496810913086,
+        -0.18690945208072662,
+        0.06581642478704453,
+        0.005246834829449654,
+        -0.14777734875679016,
+        0.04554577171802521,
+        0.7314760088920593,
+        1.1759854555130005,
+        0.7747871279716492,
+        0.08771117031574249,
+        0.04425497353076935,
+        0.14875195920467377,
+        -0.05036012455821037,
+        -1.0561891794204712,
+        -1.7835016250610352,
+        -1.313464879989624,
+        -0.4041728973388672,
+        -0.08825081586837769,
+        -0.18483860790729523,
+        -0.09619659930467606,
+        0.6506555676460266,
+        1.2331949472427368,
+        1.057729721069336,
+        0.3030258119106293,
+        0.053314659744501114,
+        0.10696353763341904,
+        0.19720971584320068,
+        -0.19457301497459412,
+        -0.3546113669872284,
+        -0.3773464560508728,
+        0.007737448439002037,
+        0.007112926337867975,
+        -0.026632368564605713,
+        -0.07708505541086197,
+        0.016982559114694595,
+        0.03331448882818222,
+        0.03235285356640816,
+        -0.04479134455323219,
+        0.0062864539213478565,
+        -0.04983896017074585,
+        -0.014209658838808537,
+        0.025105496868491173,
+        0.07187403738498688,
+        -0.019782420247793198,
+        -0.0387532040476799,
+        0.01098113413900137,
+        0.10765481740236282,
+        -0.005502769257873297,
+        -0.29967597126960754,
+        -0.5370010733604431,
+        -0.25729984045028687,
+        0.0341138020157814,
+        -0.01927473582327366,
+        -0.11736954003572464,
+        0.09457080066204071,
+        0.8881804943084717,
+        1.5049697160720825,
+        1.0347492694854736,
+        0.22410355508327484,
+        -0.004720119293779135,
+        0.1449226438999176,
+        -0.11916695535182953,
+        -1.2009364366531372,
+        -2.080855369567871,
+        -1.5549882650375366,
+        -0.5231477618217468,
+        -0.005029830615967512,
+        -0.11258674412965775,
+        0.03710457682609558,
+        0.9192798137664795,
+        1.525830626487732,
+        1.3018689155578613,
+        0.44408130645751953,
+        0.006972550880163908,
+        0.07937697321176529,
+        0.060622286051511765,
+        -0.4068094491958618,
+        -0.5964561104774475,
+        -0.6058750152587891,
+        -0.1743212193250656,
+        -0.0038881103973835707,
+        -0.04932431876659393,
+        -0.04989266395568848,
+        0.07228495925664902,
+        0.10359980911016464,
+        0.11054171621799469,
+        0.017031395807862282,
+        -0.012849675491452217,
+        -0.02224516123533249,
+        -0.019851619377732277,
+        0.04567919671535492,
+        0.12134519219398499,
+        0.018673665821552277,
+        -0.03933878242969513,
+        0.03506385162472725,
+        0.07499910145998001,
+        -0.004981306381523609,
+        -0.269795298576355,
+        -0.4478399455547333,
+        -0.3141564130783081,
+        0.014856644906103611,
+        -0.01102763693779707,
+        -0.11778493225574493,
+        -0.00048367868294008076,
+        0.46917271614074707,
+        0.8380635976791382,
+        0.5829758048057556,
+        0.14924737811088562,
+        0.00504975114017725,
+        0.1242799386382103,
+        0.027800291776657104,
+        -0.5343790054321289,
+        -0.9185061454772949,
+        -0.6974499225616455,
+        -0.1733488291501999,
+        0.028415951877832413,
+        -0.07513032108545303,
+        0.010947657749056816,
+        0.5501428246498108,
+        0.8556726574897766,
+        0.6854383945465088,
+        0.21023745834827423,
+        -0.04757346957921982,
+        0.028925150632858276,
+        -0.05005616322159767,
+        -0.4106282889842987,
+        -0.5990055203437805,
+        -0.5274976491928101,
+        -0.18928098678588867,
+        0.007199999876320362,
+        0.004744168370962143,
+        -0.006203897297382355,
+        0.16117095947265625,
+        0.20310591161251068,
+        0.17358633875846863,
+        0.057794276624917984,
+        0.0018837900133803487,
+        -0.021730661392211914,
+        0.03705505281686783,
+        0.048999205231666565,
+        0.017187459394335747,
+        -0.04760497808456421,
+        -0.06534644961357117,
+        0.027641354128718376,
+        -0.02722003310918808,
+        -0.09557735174894333,
+        0.2721945643424988,
+        0.06861108541488647,
+        -0.17862513661384583,
+        0.029542427510023117,
+        -0.028343068435788155,
+        -0.24357359111309052,
+        0.2928915321826935,
+        0.6317090392112732,
+        -0.5675624012947083,
+        -0.31298428773880005,
+        0.119928739964962,
+        -0.04503166303038597,
+        0.1997436285018921,
+        0.9068917632102966,
+        -0.6105388402938843,
+        -1.176649808883667,
+        0.391012579202652,
+        0.21436090767383575,
+        0.06404570490121841,
+        0.4306352436542511,
+        -0.18372972309589386,
+        -1.6093186140060425,
+        0.5129231810569763,
+        0.8333584666252136,
+        -0.11607109010219574,
+        0.024050598964095116,
+        -0.027272621169686317,
+        -0.8072280883789062,
+        0.15613007545471191,
+        1.0115277767181396,
+        -0.1886059194803238,
+        -0.1662863790988922,
+        -0.07484262436628342,
+        -0.11359186470508575,
+        -0.05765556916594505,
+        0.48085057735443115,
+        0.031143836677074432,
+        -0.20803743600845337,
+        0.005643316078931093,
+        -0.011422591283917427,
+        -0.02063453011214733,
+        0.010139239020645618,
+        0.026931140571832657,
+        0.02650240994989872,
+        0.014503400772809982,
+        -0.030498046427965164,
+        0.01038119662553072,
+        -0.041832923889160156,
+        -0.11747029423713684,
+        0.24838468432426453,
+        0.08126607537269592,
+        -0.17684465646743774,
+        0.009867151267826557,
+        -0.04349489137530327,
+        -0.22892898321151733,
+        0.3097872734069824,
+        0.6229272484779358,
+        -0.5710748434066772,
+        -0.2540203332901001,
+        0.15970031917095184,
+        -0.05765099450945854,
+        0.24631772935390472,
+        0.9121918678283691,
+        -0.6539115309715271,
+        -1.1680796146392822,
+        0.43742635846138,
+        0.1981748640537262,
+        0.060766786336898804,
+        0.48115089535713196,
+        -0.2704729437828064,
+        -1.668082594871521,
+        0.6258481740951538,
+        0.8217618465423584,
+        -0.17844447493553162,
+        0.07583325356245041,
+        -0.031355466693639755,
+        -0.884739100933075,
+        0.21298757195472717,
+        1.0279508829116821,
+        -0.2118954360485077,
+        -0.16616611182689667,
+        -0.025157395750284195,
+        -0.11329160630702972,
+        -0.08147483319044113,
+        0.46636614203453064,
+        0.023730026558041573,
+        -0.21343427896499634,
+        -0.015201984904706478,
+        -0.00498165050521493,
+        0.022955382242798805,
+        0.020228328183293343,
+        -0.029405873268842697,
+        -0.032065436244010925,
+        0.047389160841703415,
+        -0.01793060638010502,
+        0.01669210195541382,
+        0.05227159336209297,
+        -0.11703876405954361,
+        0.006789325270801783,
+        0.03741219639778137,
+        -0.04651298373937607,
+        -0.012846981175243855,
+        0.024231625720858574,
+        -0.13399703800678253,
+        -0.024073680862784386,
+        0.2970501184463501,
+        -0.1497301310300827,
+        -0.04287628084421158,
+        0.08405227214097977,
+        -0.06020639091730118,
+        -0.01648692972958088,
+        0.4150170087814331,
+        -0.17000712454319,
+        -0.43461430072784424,
+        0.27202337980270386,
+        0.006708468310534954,
+        -0.04474359005689621,
+        0.15199843049049377,
+        -0.03348325565457344,
+        -0.6591396331787109,
+        0.4057810306549072,
+        0.25226324796676636,
+        -0.16070741415023804,
+        0.03464199975132942,
+        0.023064177483320236,
+        -0.35642316937446594,
+        0.22774185240268707,
+        0.37138837575912476,
+        -0.24171461164951324,
+        -0.023513946682214737,
+        0.028774995356798172,
+        -0.02702418342232704,
+        -0.012504744343459606,
+        0.17893734574317932,
+        -0.1554262489080429,
+        -0.09501983970403671,
+        0.06177212670445442,
+        -0.013536165468394756,
+        0.012441401369869709,
+        0.006566522642970085,
+        -0.018207622691988945,
+        0.003373368876054883,
+        -0.034891802817583084,
+        0.002223123563453555,
+        0.006169564090669155,
+        0.022658145055174828,
+        -0.005327044054865837,
+        -0.023764559999108315,
+        -0.004386506043374538,
+        -0.02777106687426567,
+        0.01950058527290821,
+        0.004401096608489752,
+        0.02882237359881401,
+        0.01790205016732216,
+        -0.007827110588550568,
+        -0.005222277250140905,
+        -0.05361752584576607,
+        0.008359426632523537,
+        -0.026494475081562996,
+        -0.015572195872664452,
+        -0.04412947595119476,
+        -0.006163781508803368,
+        0.180303692817688,
+        0.17117105424404144,
+        -0.014117442071437836,
+        0.014543564058840275,
+        0.03875281661748886,
+        0.002004631096497178,
+        0.11982911080121994,
+        0.609316349029541,
+        0.5792325735092163,
+        0.10267578810453415,
+        -0.02287464588880539,
+        -0.011516223661601543,
+        -0.02587946131825447,
+        0.019127164036035538,
+        0.2742871046066284,
+        0.23896890878677368,
+        -0.013414637185633183,
+        0.012439075857400894,
+        0.01148916780948639,
+        0.0024075021501630545,
+        -0.028374193236231804,
+        -0.02938784286379814,
+        -0.061723873019218445,
+        -0.03288640081882477,
+        0.010918691754341125,
+        0.01171314436942339,
+        0.00894222967326641,
+        -0.0050367508083581924,
+        0.00322812981903553,
+        -0.01958087645471096,
+        0.000401448953198269,
+        0.00655051926150918,
+        0.008647873997688293,
+        -0.015351405367255211,
+        -0.022286182269454002,
+        -0.0018973759142681956,
+        -0.032965533435344696,
+        0.009401706047356129,
+        0.01680464670062065,
+        0.01722409576177597,
+        0.017367251217365265,
+        -0.0012145076179876924,
+        0.015895379707217216,
+        -0.013976357877254486,
+        0.01587546430528164,
+        -0.019388504326343536,
+        -0.004597584251314402,
+        -0.026080038398504257,
+        0.020517753437161446,
+        0.20680218935012817,
+        0.20302064716815948,
+        0.03813354671001434,
+        0.027738921344280243,
+        0.02183712273836136,
+        0.023807305842638016,
+        0.14632326364517212,
+        0.5991678237915039,
+        0.608651340007782,
+        0.15929070115089417,
+        -0.02112223394215107,
+        -0.020013611763715744,
+        -0.03723381832242012,
+        0.032139480113983154,
+        0.27032363414764404,
+        0.24862462282180786,
+        0.02374681644141674,
+        0.007894856855273247,
+        0.00042308925185352564,
+        -0.004832752980291843,
+        -0.024313796311616898,
+        -0.0018940505106002092,
+        -0.02681432105600834,
+        0.002362651750445366,
+        0.013330202549695969,
+        0.012553646229207516,
+        0.002630018163472414,
+        0.002979951212182641,
+        0.0015847217291593552,
+        -0.03376828506588936,
+        -0.010844729840755463,
+        -0.002748559694737196,
+        0.012938202358782291,
+        -0.011872833594679832,
+        -0.0025761008728295565,
+        0.003677211469039321,
+        -0.04305516183376312,
+        0.001133457524701953,
+        0.0020396243780851364,
+        0.01797032356262207,
+        0.016580887138843536,
+        0.04445189982652664,
+        0.013270077295601368,
+        -0.04839251935482025,
+        0.011546633206307888,
+        -0.015829432755708694,
+        0.019473392516374588,
+        -0.011464826762676239,
+        0.018693143501877785,
+        0.18201367557048798,
+        0.16157257556915283,
+        0.02082117274403572,
+        0.015915032476186752,
+        0.010720869526267052,
+        -0.0020238866563886404,
+        0.09329187124967575,
+        0.46998023986816406,
+        0.5186727046966553,
+        0.09814783185720444,
+        -0.016547314822673798,
+        0.00325066689401865,
+        -0.028936590999364853,
+        0.01002424769103527,
+        0.21822214126586914,
+        0.22012007236480713,
+        0.008229314349591732,
+        0.015599996782839298,
+        0.014740276150405407,
+        0.0019725109450519085,
+        0.003613655688241124,
+        -0.03043546713888645,
+        -0.06308998167514801,
+        0.014664110727608204,
+        0.06775129586458206,
+        -0.12990300357341766,
+        -0.03638269379734993,
+        -0.03883139044046402,
+        0.05194637551903725,
+        0.03896122798323631,
+        -0.05132362246513367,
+        -0.07234688848257065,
+        -0.36106064915657043,
+        -0.2839237451553345,
+        -0.11496391147375107,
+        0.3026673197746277,
+        0.3528609871864319,
+        0.21559017896652222,
+        -0.11970120668411255,
+        -0.5473688244819641,
+        -0.5362005233764648,
+        -0.21015112102031708,
+        0.4089161455631256,
+        0.6033567786216736,
+        0.38614287972450256,
+        -0.12437233328819275,
+        -0.6394402384757996,
+        -0.6945835947990417,
+        -0.3482857942581177,
+        0.5189254283905029,
+        0.8457668423652649,
+        0.6248002648353577,
+        -0.12700730562210083,
+        -0.6978924870491028,
+        -0.7764106392860413,
+        -0.4171960651874542,
+        0.44747814536094666,
+        0.8406224846839905,
+        0.6821274161338806,
+        -0.07793218642473221,
+        -0.5459966659545898,
+        -0.6139025092124939,
+        -0.35998886823654175,
+        0.27800890803337097,
+        0.6048891544342041,
+        0.591307520866394,
+        -0.04850815609097481,
+        -0.3863481283187866,
+        -0.3542836606502533,
+        -0.2491992861032486,
+        0.1616278886795044,
+        0.3402666747570038,
+        0.4610227644443512,
+        -0.010262396186590195,
+        0.0408165417611599,
+        0.006382474210113287,
+        -0.011430315673351288,
+        -0.027895113453269005,
+        -0.009767768904566765,
+        0.005882019177079201,
+        0.05225436016917229,
+        0.0415218211710453,
+        0.08244743943214417,
+        0.026765575632452965,
+        -0.05404946208000183,
+        -0.06101839989423752,
+        -0.028233220800757408,
+        0.03128793090581894,
+        0.07133004069328308,
+        0.0718698799610138,
+        0.042146697640419006,
+        -0.08380170166492462,
+        -0.09263177216053009,
+        -0.07569421827793121,
+        0.032425008714199066,
+        0.12351400405168533,
+        0.09103626012802124,
+        -0.004768018145114183,
+        -0.05960838869214058,
+        -0.11922567337751389,
+        -0.10132396221160889,
+        0.044341862201690674,
+        0.100867860019207,
+        0.09607693552970886,
+        -0.00129030947573483,
+        -0.05481477826833725,
+        -0.1278291642665863,
+        -0.12058380991220474,
+        0.016678951680660248,
+        0.09958931058645248,
+        0.08456224203109741,
+        0.061599165201187134,
+        -0.049776893109083176,
+        -0.11354166269302368,
+        -0.09844806790351868,
+        0.004753128159791231,
+        0.07868346571922302,
+        0.06464104354381561,
+        0.020981626585125923,
+        -0.010770543478429317,
+        -0.08838209509849548,
+        -0.07265795767307281,
+        -0.058313023298978806,
+        0.10897739976644516,
+        0.026735201478004456,
+        0.03972309082746506,
+        -0.019998662173748016,
+        -0.048948734998703,
+        0.03377270698547363,
+        0.053406376391649246,
+        0.27304399013519287,
+        0.20850272476673126,
+        0.07890326529741287,
+        -0.22241365909576416,
+        -0.2816997468471527,
+        -0.1745096743106842,
+        0.08957889676094055,
+        0.4962941110134125,
+        0.4586986303329468,
+        0.20177948474884033,
+        -0.3625744581222534,
+        -0.47758376598358154,
+        -0.32412785291671753,
+        0.0669194757938385,
+        0.5394997596740723,
+        0.601328432559967,
+        0.24388420581817627,
+        -0.4319041073322296,
+        -0.6893490552902222,
+        -0.5106037259101868,
+        0.10174300521612167,
+        0.5457565784454346,
+        0.6549625992774963,
+        0.38772058486938477,
+        -0.3778320252895355,
+        -0.6820934414863586,
+        -0.551069438457489,
+        0.049600999802351,
+        0.45137161016464233,
+        0.5143972039222717,
+        0.3713279068470001,
+        -0.26546329259872437,
+        -0.5121409893035889,
+        -0.47691628336906433,
+        0.03843758627772331,
+        0.30808231234550476,
+        0.3185756504535675,
+        0.22629432380199432,
+        -0.14860986173152924,
+        -0.2915389835834503,
+        -0.3552006185054779,
+        -0.003137432038784027,
+        -0.01327254343777895,
+        -0.027139298617839813,
+        0.04800891876220703,
+        0.05380738899111748,
+        -0.01380784809589386,
+        0.0022881641052663326,
+        -0.012132279574871063,
+        0.06182793900370598,
+        0.03762871399521828,
+        0.0966145321726799,
+        0.08963571488857269,
+        0.06551238149404526,
+        0.031640589237213135,
+        -0.010532311163842678,
+        0.07195396721363068,
+        0.11343465745449066,
+        0.11621421575546265,
+        0.047318290919065475,
+        0.1111951395869255,
+        0.044054243713617325,
+        0.016777141019701958,
+        0.03392713516950607,
+        0.06047024950385094,
+        -0.7924502491950989,
+        -0.7310910224914551,
+        0.031088173389434814,
+        0.0906061977148056,
+        0.022829236462712288,
+        0.04470035433769226,
+        0.025999872013926506,
+        -0.8246837258338928,
+        -0.723675549030304,
+        0.15835590660572052,
+        0.07358791679143906,
+        -0.015819497406482697,
+        -0.014207872562110424,
+        0.08506257086992264,
+        0.08868777751922607,
+        0.0976945012807846,
+        0.11740022897720337,
+        0.016287995502352715,
+        -0.024363648146390915,
+        0.04249691963195801,
+        0.02909177541732788,
+        0.12011238187551498,
+        0.10729824751615524,
+        0.05927390977740288,
+        0.04731644690036774,
+        0.008210064843297005,
+        0.03859357163310051,
+        -0.005175672471523285,
+        0.01984376832842827,
+        -0.0011626111809164286,
+        -0.0010909241391345859,
+        0.02311880886554718,
+        0.007646523881703615,
+        0.04582137614488602,
+        -0.0027255103923380375,
+        0.027656713500618935,
+        0.02781369723379612,
+        0.015750093385577202,
+        0.040563344955444336,
+        -0.007784596644341946,
+        0.006534814368933439,
+        0.002403199439868331,
+        -0.020037032663822174,
+        -0.011717663146555424,
+        0.07826739549636841,
+        0.018203573301434517,
+        0.021228624507784843,
+        0.014112413860857487,
+        -0.02866269089281559,
+        -0.9502679109573364,
+        -0.825043797492981,
+        0.05938851460814476,
+        0.06553053110837936,
+        0.015418429858982563,
+        0.0616452619433403,
+        -0.0094453701749444,
+        -0.9471839666366577,
+        -0.7922234535217285,
+        0.13069523870944977,
+        0.04939320683479309,
+        0.007429714780300856,
+        0.022599652409553528,
+        0.0820123627781868,
+        0.06440276652574539,
+        0.09897352755069733,
+        0.0856291800737381,
+        0.006608777679502964,
+        -0.0005533680086955428,
+        0.021656949073076248,
+        0.014818831346929073,
+        0.03757459297776222,
+        -0.001428246614523232,
+        0.03473127633333206,
+        0.03607869893312454,
+        0.017313262447714806,
+        0.0025767614133656025,
+        -0.033292777836322784,
+        0.027883101254701614,
+        -0.007534499745815992,
+        -0.04302362725138664,
+        -0.01795666106045246,
+        -0.007667913101613522,
+        0.012547189369797707,
+        -0.021762438118457794,
+        0.03789107874035835,
+        0.06384614109992981,
+        0.0014223429607227445,
+        -0.01393786258995533,
+        -0.041693057864904404,
+        -0.01813604310154915,
+        0.065328449010849,
+        0.15736474096775055,
+        0.1531635969877243,
+        0.09920474886894226,
+        -0.04044449329376221,
+        0.010558396577835083,
+        0.05559245124459267,
+        0.10931257158517838,
+        -0.5784384608268738,
+        -0.5109886527061462,
+        0.17690584063529968,
+        0.07484250515699387,
+        0.010378374718129635,
+        0.0890144556760788,
+        0.13172735273838043,
+        -0.6058865785598755,
+        -0.49908995628356934,
+        0.1835336685180664,
+        0.005293308291584253,
+        -0.03870566934347153,
+        -0.025229454040527344,
+        0.12571711838245392,
+        0.14792272448539734,
+        0.14905226230621338,
+        0.0700206533074379,
+        -0.035034529864788055,
+        0.013128797523677349,
+        0.015581230632960796,
+        0.005400130525231361,
+        0.07070232182741165,
+        0.03829728811979294,
+        -0.013876918703317642,
+        -0.019958000630140305,
+        -0.020086020231246948,
+        -0.019999003037810326,
+        -0.015111410059034824,
+        0.11963249742984772,
+        -0.08270428329706192,
+        -0.0025947154499590397,
+        -0.010668564587831497,
+        0.016670405864715576,
+        -0.03206938877701759,
+        -0.053453829139471054,
+        0.1236601173877716,
+        -0.020077411085367203,
+        0.00779569149017334,
+        -0.0318986251950264,
+        0.03579804673790932,
+        -0.060723867267370224,
+        -0.009301809594035149,
+        0.09249342232942581,
+        -0.13378725945949554,
+        0.17496798932552338,
+        -0.0935625433921814,
+        0.06569044291973114,
+        -0.18187756836414337,
+        0.06397300213575363,
+        0.3793930113315582,
+        -0.5664302706718445,
+        0.23658618330955505,
+        -0.03206830099225044,
+        0.03155658766627312,
+        0.039305318146944046,
+        -0.6008145213127136,
+        1.0417630672454834,
+        -0.5062726140022278,
+        -0.04698493704199791,
+        0.0979752242565155,
+        -0.037326715886592865,
+        0.26255178451538086,
+        -0.590207576751709,
+        0.4195419251918793,
+        0.12212422490119934,
+        -0.26122942566871643,
+        0.06442253291606903,
+        -0.07682429254055023,
+        0.12608948349952698,
+        -0.13872937858104706,
+        -0.030260663479566574,
+        0.2047160565853119,
+        -0.13068141043186188,
+        0.016608506441116333,
+        -0.021629147231578827,
+        0.04659907519817352,
+        0.024417348206043243,
+        0.06751634925603867,
+        -0.1705978959798813,
+        0.0655774399638176,
+        -0.0041802311316132545,
+        -0.02263445220887661,
+        -0.014069054275751114,
+        0.06242800131440163,
+        0.08984102308750153,
+        -0.19382472336292267,
+        0.09380361437797546,
+        -0.0032764992211014032,
+        -0.03950225189328194,
+        -0.08896161615848541,
+        0.28387022018432617,
+        0.1668996810913086,
+        -0.5457127094268799,
+        0.21796099841594696,
+        0.012032964266836643,
+        0.030721815302968025,
+        -0.4431600570678711,
+        0.3104412257671356,
+        1.0070439577102661,
+        -1.1077969074249268,
+        0.08187273889780045,
+        0.1387241780757904,
+        0.09014563262462616,
+        -0.25378379225730896,
+        -0.9253583550453186,
+        1.9745515584945679,
+        -0.6605072617530823,
+        -0.4394792318344116,
+        0.11501576751470566,
+        0.03007262572646141,
+        0.2538164258003235,
+        -1.1462018489837646,
+        0.7988958954811096,
+        0.46934643387794495,
+        -0.4244523048400879,
+        -0.0001816617150325328,
+        -0.04351970925927162,
+        0.20500127971172333,
+        -0.40710335969924927,
+        -0.15871365368366241,
+        0.4640160799026489,
+        -0.06024328991770744,
+        -0.016036653891205788,
+        -0.012419192120432854,
+        0.05552554875612259,
+        0.050986770540475845,
+        -0.0171927809715271,
+        -0.12105240672826767,
+        0.03947274759411812,
+        0.009537882171571255,
+        -0.026668362319469452,
+        0.017273351550102234,
+        0.10812800377607346,
+        -0.015008139424026012,
+        -0.14154496788978577,
+        0.08008233457803726,
+        -0.01306608971208334,
+        -0.05574854835867882,
+        -0.06091056764125824,
+        0.2888447940349579,
+        0.05022002384066582,
+        -0.4581625759601593,
+        0.21146118640899658,
+        -0.01495362538844347,
+        0.02946372702717781,
+        -0.38554418087005615,
+        0.30167311429977417,
+        0.7605867981910706,
+        -0.898481547832489,
+        0.11953620612621307,
+        0.12686115503311157,
+        0.09949854761362076,
+        -0.14409342408180237,
+        -0.7404491901397705,
+        1.5449001789093018,
+        -0.5307857394218445,
+        -0.3347839415073395,
+        0.09940771013498306,
+        0.009087899699807167,
+        0.3081797957420349,
+        -0.9053899049758911,
+        0.5102643370628357,
+        0.4646914303302765,
+        -0.36200836300849915,
+        -0.043260715901851654,
+        -0.05309509113430977,
+        0.22480911016464233,
+        -0.2674587666988373,
+        -0.25316888093948364,
+        0.435017466545105,
+        -0.017485838383436203,
+        -0.049459364265203476,
+        0.012460661120712757,
+        -0.02262282371520996,
+        -0.04392899200320244,
+        0.013330060057342052,
+        0.05963548645377159,
+        -0.020561739802360535,
+        -0.013496879488229752,
+        -0.02310933545231819,
+        -0.06549905985593796,
+        0.12132573872804642,
+        0.22165189683437347,
+        -0.07683887332677841,
+        -0.12427931278944016,
+        0.05543455854058266,
+        0.009089780040085316,
+        0.19844494760036469,
+        0.07650767266750336,
+        -0.48934996128082275,
+        -0.35080164670944214,
+        0.13422781229019165,
+        0.022217294201254845,
+        -0.006589306052774191,
+        -0.18357548117637634,
+        -0.6055922508239746,
+        0.09492127597332001,
+        0.7073907256126404,
+        0.1777055710554123,
+        -0.05434347689151764,
+        0.04566245526075363,
+        -0.023967979475855827,
+        0.4856843054294586,
+        0.8131930828094482,
+        -0.2068077027797699,
+        -0.3863125145435333,
+        0.02887917123734951,
+        -0.05048410966992378,
+        0.051201049238443375,
+        0.057671088725328445,
+        -0.6412642002105713,
+        -0.39739903807640076,
+        0.11036981642246246,
+        0.06687764078378677,
+        -0.018151026219129562,
+        0.0022760110441595316,
+        -0.09328305721282959,
+        0.1352599710226059,
+        0.19680921733379364,
+        0.032235175371170044,
+        -0.06123670935630798,
+        -0.013810456730425358,
+        -0.01821190118789673,
+        -0.029903864488005638,
+        0.027588335797190666,
+        0.0762094110250473,
+        -0.046041399240493774,
+        0.017117975279688835,
+        -0.018925148993730545,
+        0.00423092395067215,
+        0.2065701186656952,
+        0.157025545835495,
+        -0.26491472125053406,
+        -0.24569831788539886,
+        0.0873267725110054,
+        0.004694689530879259,
+        0.1838335543870926,
+        -0.18973900377750397,
+        -0.9744532108306885,
+        -0.41959065198898315,
+        0.409589946269989,
+        0.22223009169101715,
+        -0.0989728644490242,
+        -0.40883490443229675,
+        -0.8418471813201904,
+        0.40256521105766296,
+        1.4742398262023926,
+        0.4913789629936218,
+        -0.14741277694702148,
+        -0.0028576564509421587,
+        0.0861843004822731,
+        1.0056577920913696,
+        1.479182481765747,
+        -0.21940617263317108,
+        -0.8383130431175232,
+        -0.30560192465782166,
+        0.12028121203184128,
+        0.24013034999370575,
+        0.11750353127717972,
+        -1.1071972846984863,
+        -0.9066778421401978,
+        -0.055051110684871674,
+        0.15361995995044708,
+        0.0032418384216725826,
+        -0.08823435008525848,
+        -0.3188804090023041,
+        -0.02160414680838585,
+        0.2972750663757324,
+        0.17006494104862213,
+        0.03401973098516464,
+        0.017106015235185623,
+        0.010733614675700665,
+        0.004688877146691084,
+        0.02985573373734951,
+        0.046415988355875015,
+        -0.05177726596593857,
+        -0.04624386876821518,
+        0.026672907173633575,
+        0.03479000926017761,
+        0.22761401534080505,
+        0.12049756944179535,
+        -0.23494181036949158,
+        -0.2207801640033722,
+        0.06036320701241493,
+        0.02112250216305256,
+        0.16173022985458374,
+        -0.14196650683879852,
+        -0.8236543536186218,
+        -0.3530665934085846,
+        0.3715725541114807,
+        0.25781863927841187,
+        -0.09806561470031738,
+        -0.341796338558197,
+        -0.7201419472694397,
+        0.2111824005842209,
+        1.1648427248001099,
+        0.3866075575351715,
+        -0.1955428272485733,
+        -0.13164694607257843,
+        -0.06048528477549553,
+        0.7989920973777771,
+        1.143347144126892,
+        -0.19509637355804443,
+        -0.6719933152198792,
+        -0.26912447810173035,
+        0.16733723878860474,
+        0.32526257634162903,
+        0.1910397708415985,
+        -0.8516904711723328,
+        -0.6005953550338745,
+        0.10627525299787521,
+        0.16700856387615204,
+        0.032433755695819855,
+        -0.11345972120761871,
+        -0.270126610994339,
+        -0.012052524834871292,
+        0.25489771366119385,
+        0.14647918939590454,
+        -0.014324051328003407,
+        -0.011148945428431034,
+        -0.0011708218371495605,
+        -0.018903911113739014,
+        -0.010648071765899658,
+        -0.017981043085455894,
+        0.014055400155484676,
+        -0.020784996449947357,
+        -0.030126383528113365,
+        0.1150858998298645,
+        -0.1112036183476448,
+        -0.023664508014917374,
+        0.1651369333267212,
+        -0.055412910878658295,
+        -0.007318025920540094,
+        -0.07404221594333649,
+        0.3068569302558899,
+        -0.6175673007965088,
+        0.35226404666900635,
+        0.1940349042415619,
+        -0.22921296954154968,
+        0.06411048769950867,
+        0.001689439988695085,
+        0.23336739838123322,
+        -0.9470900893211365,
+        1.2042961120605469,
+        -0.44587329030036926,
+        -0.15847182273864746,
+        0.07572423666715622,
+        0.11138042062520981,
+        -0.2075018584728241,
+        -0.2651064693927765,
+        0.8896074295043945,
+        -0.7130936980247498,
+        0.10370831191539764,
+        0.07730382680892944,
+        0.02368813008069992,
+        -0.20520009100437164,
+        0.13611918687820435,
+        0.31062978506088257,
+        -0.471883624792099,
+        0.21489326655864716,
+        -0.0216743852943182,
+        -0.04020361602306366,
+        -0.022920167073607445,
+        0.16054102778434753,
+        -0.002624030224978924,
+        -0.14670424163341522,
+        0.12018264085054398,
+        -0.043656397610902786,
+        -0.005084550939500332,
+        0.03873870149254799,
+        -0.07967288792133331,
+        -0.007439201697707176,
+        0.027688704431056976,
+        0.08916077762842178,
+        -0.0036629599053412676,
+        -0.01389122661203146,
+        0.1402083784341812,
+        -0.2923351228237152,
+        -0.01932896114885807,
+        0.224355086684227,
+        -0.013193303719162941,
+        -0.03984276205301285,
+        -0.04474477842450142,
+        0.3302844762802124,
+        -0.9746807217597961,
+        0.5603556036949158,
+        0.3556183874607086,
+        -0.2713812589645386,
+        0.01890619471669197,
+        0.06983876973390579,
+        0.09052442759275436,
+        -1.3613605499267578,
+        1.8220031261444092,
+        -0.40902698040008545,
+        -0.31302449107170105,
+        0.03893759846687317,
+        0.11448371410369873,
+        -0.4220678210258484,
+        -0.3677598237991333,
+        1.539440631866455,
+        -0.8297391533851624,
+        -0.08504960685968399,
+        0.0629446730017662,
+        -0.016804160550236702,
+        -0.31778836250305176,
+        0.2363198846578598,
+        0.6452136635780334,
+        -0.700931191444397,
+        0.09927428513765335,
+        0.0019635935313999653,
+        -0.05397690460085869,
+        -0.014552262611687183,
+        0.2352754771709442,
+        0.09991656988859177,
+        -0.28891685605049133,
+        0.07818552106618881,
+        -0.021534763276576996,
+        -0.009461677633225918,
+        -0.01069199200719595,
+        -0.008059840649366379,
+        -0.0129952197894454,
+        0.038492631167173386,
+        0.018906958401203156,
+        -0.025432486087083817,
+        -0.03420932963490486,
+        0.09104404598474503,
+        -0.10342919826507568,
+        -0.035048507153987885,
+        0.1415904313325882,
+        -0.052986644208431244,
+        -0.021596742793917656,
+        -0.049690280109643936,
+        0.3079117238521576,
+        -0.5487046837806702,
+        0.27024003863334656,
+        0.15158434212207794,
+        -0.16488635540008545,
+        0.027642132714390755,
+        0.004561549983918667,
+        0.21555493772029877,
+        -0.9188903570175171,
+        1.0972669124603271,
+        -0.3528037667274475,
+        -0.07574182748794556,
+        0.021962830796837807,
+        0.08826783299446106,
+        -0.18681983649730682,
+        -0.2789378762245178,
+        0.864517331123352,
+        -0.5642455816268921,
+        0.07469761371612549,
+        0.03803368657827377,
+        0.014268620871007442,
+        -0.17712704837322235,
+        0.1349189728498459,
+        0.3181247115135193,
+        -0.45067182183265686,
+        0.1391848623752594,
+        0.009777083061635494,
+        -0.028080958873033524,
+        -0.03586730733513832,
+        0.14503192901611328,
+        -0.014655024744570255,
+        -0.1472700834274292,
+        0.07361634075641632,
+        -0.0029754601418972015,
+        -0.006887470372021198,
+        -0.019166842103004456,
+        0.0034907464869320393,
+        -0.015169994905591011,
+        0.053831856697797775,
+        -0.028789488598704338,
+        -0.02033298648893833,
+        0.0018537036376073956,
+        0.07567961513996124,
+        -0.07041627168655396,
+        -0.047083087265491486,
+        0.17573483288288116,
+        -0.04860217124223709,
+        0.013171656988561153,
+        0.020158233121037483,
+        -0.006270059384405613,
+        -0.28434091806411743,
+        0.2760852873325348,
+        0.32198208570480347,
+        -0.43535903096199036,
+        0.03188510239124298,
+        0.019360313192009926,
+        -0.20063988864421844,
+        0.04450676590204239,
+        0.9678076505661011,
+        -0.683987021446228,
+        -0.3979112207889557,
+        0.2618143558502197,
+        -0.049711134284734726,
+        -0.06456997990608215,
+        0.6518288850784302,
+        -0.1357039213180542,
+        -1.1304017305374146,
+        0.4881652295589447,
+        0.19583553075790405,
+        -0.03677722439169884,
+        0.21429045498371124,
+        0.09559855610132217,
+        -0.7311355471611023,
+        0.10988117009401321,
+        0.4949330687522888,
+        -0.17359353601932526,
+        0.03822369873523712,
+        0.011371256783604622,
+        -0.1900172382593155,
+        -0.04778448864817619,
+        0.2897090017795563,
+        -0.02235160581767559,
+        -0.05582524091005325,
+        0.007624597754329443,
+        -0.027456223964691162,
+        -0.029680097475647926,
+        -0.023810429498553276,
+        0.15409281849861145,
+        0.013284318149089813,
+        -0.0788225457072258,
+        -0.025637971237301826,
+        0.01406402699649334,
+        -0.13676859438419342,
+        0.027384959161281586,
+        0.30458444356918335,
+        -0.11150643229484558,
+        -0.06806201487779617,
+        0.009601237252354622,
+        -0.0866582989692688,
+        -0.2328706979751587,
+        0.5188567638397217,
+        0.3787381649017334,
+        -0.655829906463623,
+        0.0072118742391467094,
+        -0.0031494891736656427,
+        -0.2424815446138382,
+        0.28893929719924927,
+        1.2396824359893799,
+        -1.0406886339187622,
+        -0.6376030445098877,
+        0.4103420078754425,
+        -0.05929668992757797,
+        0.03918358311057091,
+        0.9274081587791443,
+        -0.28890565037727356,
+        -1.6682262420654297,
+        0.66976398229599,
+        0.35488471388816833,
+        0.027932289987802505,
+        0.3169145882129669,
+        0.09107685089111328,
+        -1.2099432945251465,
+        0.11623579263687134,
+        0.7632684707641602,
+        -0.16506360471248627,
+        0.037474747747182846,
+        -0.005203985143452883,
+        -0.35939401388168335,
+        -0.17138688266277313,
+        0.525232195854187,
+        0.10247340798377991,
+        -0.14317406713962555,
+        0.007572649512439966,
+        -0.006046198774129152,
+        0.06188087910413742,
+        -0.050851333886384964,
+        0.032844241708517075,
+        0.0544477179646492,
+        -0.07947597652673721,
+        -0.03073730878531933,
+        0.04025515541434288,
+        -0.010001083835959435,
+        -0.11831062287092209,
+        0.17422229051589966,
+        -0.05468267202377319,
+        -0.04996664077043533,
+        0.023996006697416306,
+        0.02888253889977932,
+        -0.18709556758403778,
+        0.13987921178340912,
+        0.32867854833602905,
+        -0.31714990735054016,
+        0.019951285794377327,
+        0.027247004210948944,
+        -0.19416090846061707,
+        -0.006519266404211521,
+        0.7540720105171204,
+        -0.5474190711975098,
+        -0.27137213945388794,
+        0.20772530138492584,
+        -0.042619917541742325,
+        -0.09566087275743484,
+        0.548494815826416,
+        -0.1599852293729782,
+        -0.9178788661956787,
+        0.5456539988517761,
+        0.07497559487819672,
+        0.003984459210187197,
+        0.18640351295471191,
+        0.12121234089136124,
+        -0.7249511480331421,
+        0.2559764087200165,
+        0.4684237241744995,
+        -0.19216996431350708,
+        0.018075481057167053,
+        0.02684594877064228,
+        -0.221074178814888,
+        -0.09164194762706757,
+        0.3596596121788025,
+        -0.08310746401548386,
+        -0.10815230011940002,
+        -0.015406409278512001,
+        -0.011985878460109234,
+        0.028467312455177307,
+        -0.0879230722784996,
+        0.0347294844686985,
+        0.05081191286444664,
+        0.00362736196257174,
+        0.010529003106057644,
+        -0.002672453410923481,
+        0.025318201631307602,
+        -0.06232529878616333,
+        0.008822780102491379,
+        0.06744717806577682,
+        0.003999210894107819,
+        -0.0022885131184011698,
+        -0.046704765409231186,
+        0.13673964142799377,
+        -0.2590992748737335,
+        -0.022161437198519707,
+        0.258914053440094,
+        -0.10650330036878586,
+        0.023435762152075768,
+        0.06992689520120621,
+        0.03760937228798866,
+        -0.5444027185440063,
+        0.4131152629852295,
+        0.25325170159339905,
+        -0.2482522875070572,
+        0.010479461401700974,
+        0.045747850090265274,
+        -0.1541248857975006,
+        -0.35291528701782227,
+        0.9078133702278137,
+        -0.34428781270980835,
+        -0.14787709712982178,
+        -0.024105649441480637,
+        -0.007651817053556442,
+        -0.14991067349910736,
+        0.17544956505298615,
+        0.3692120611667633,
+        -0.46861159801483154,
+        0.10201738774776459,
+        0.003734431229531765,
+        -0.010433703660964966,
+        0.022045455873012543,
+        0.0944862961769104,
+        0.01679016835987568,
+        -0.16537833213806152,
+        0.07900089025497437,
+        -0.004211293533444405,
+        -0.01076442189514637,
+        0.09729930013418198,
+        -0.1490965485572815,
+        -0.02511671558022499,
+        0.0766475573182106,
+        0.010980346240103245,
+        -0.010220799595117569,
+        -0.0004861881607212126,
+        0.09204736351966858,
+        -0.179045170545578,
+        -0.025164175778627396,
+        0.15608654916286469,
+        0.004787537269294262,
+        -0.0005253870622254908,
+        0.034556396305561066,
+        0.1509256660938263,
+        -0.5432079434394836,
+        -0.03155849874019623,
+        0.513609766960144,
+        -0.14458952844142914,
+        0.015178131870925426,
+        0.09172039479017258,
+        -0.12612608075141907,
+        -0.926306962966919,
+        0.8281942009925842,
+        0.5954549908638,
+        -0.492740273475647,
+        0.007195526268333197,
+        -0.018258413299918175,
+        -0.4074647128582001,
+        -0.43008187413215637,
+        1.7370752096176147,
+        -0.350849986076355,
+        -0.5158001780509949,
+        -0.017458094283938408,
+        -0.08306471258401871,
+        -0.2334563285112381,
+        0.445117712020874,
+        0.7808031439781189,
+        -0.7913723587989807,
+        -0.11814796179533005,
+        -0.00913319457322359,
+        0.0223994143307209,
+        0.1012248545885086,
+        0.25349485874176025,
+        0.028286214917898178,
+        -0.4809858798980713,
+        0.05953341722488403,
+        0.015634188428521156,
+        0.005101620219647884,
+        0.10901974141597748,
+        -0.11964976042509079,
+        -0.09117673337459564,
+        0.0734483003616333,
+        0.01821213960647583,
+        5.350751234800555e-05,
+        -0.020279232412576675,
+        0.1097220927476883,
+        -0.1354990452528,
+        -0.08653146773576736,
+        0.11775246262550354,
+        -0.012575668282806873,
+        0.0310806967318058,
+        0.010271146893501282,
+        0.20337054133415222,
+        -0.3854014277458191,
+        -0.09943562000989914,
+        0.3921409249305725,
+        -0.08432158827781677,
+        0.010676748119294643,
+        0.040244489908218384,
+        -0.0015478944405913353,
+        -0.7022866010665894,
+        0.49858638644218445,
+        0.42338883876800537,
+        -0.2982582449913025,
+        -0.005396307446062565,
+        -0.008777705952525139,
+        -0.2325415015220642,
+        -0.4083922803401947,
+        1.186205506324768,
+        -0.26399391889572144,
+        -0.2621048092842102,
+        -0.015712907537817955,
+        -0.04675402492284775,
+        -0.1797540783882141,
+        0.2992522716522217,
+        0.4747498333454132,
+        -0.5266988277435303,
+        0.04581758379936218,
+        -0.04037958011031151,
+        0.0071074217557907104,
+        0.047499995678663254,
+        0.16617828607559204,
+        -0.03973710536956787,
+        -0.2953551113605499,
+        0.10628587752580643,
+        -0.00904526561498642,
+        0.010427894070744514,
+        0.08035022020339966,
+        0.03841109946370125,
+        -0.06335253268480301,
+        -0.06992083787918091,
+        0.015409895218908787,
+        -0.026900725439190865,
+        -0.04523912072181702,
+        0.08087682723999023,
+        0.12542113661766052,
+        0.018750213086605072,
+        -0.23430712521076202,
+        0.11755944788455963,
+        -0.019747508689761162,
+        -0.03171322122216225,
+        -0.12132623791694641,
+        0.2640603184700012,
+        0.38445138931274414,
+        -0.5724408030509949,
+        0.15661633014678955,
+        0.01949799247086048,
+        -0.021771302446722984,
+        -0.18984957039356232,
+        -0.23499636352062225,
+        1.2112919092178345,
+        -0.7037869095802307,
+        -0.14260035753250122,
+        0.01848726160824299,
+        0.06443414837121964,
+        -0.11740390956401825,
+        -0.8794785141944885,
+        1.4160369634628296,
+        0.016899125650525093,
+        -0.5444768071174622,
+        0.017313210293650627,
+        0.0508052259683609,
+        0.11102095246315002,
+        -0.790285587310791,
+        0.3501206636428833,
+        0.7238660454750061,
+        -0.49468666315078735,
+        -0.019021952524781227,
+        -0.01212992612272501,
+        0.15032203495502472,
+        -0.3573611080646515,
+        -0.1293754130601883,
+        0.45295456051826477,
+        -0.08407819271087646,
+        -0.008717959746718407,
+        0.022566653788089752,
+        -0.012640242464840412,
+        0.03181227669119835,
+        0.0638526976108551,
+        -0.058120664209127426,
+        -0.042917650192976,
+        0.02129550836980343,
+        -0.018790805712342262,
+        -0.00655191857367754,
+        0.05951414257287979,
+        0.12890471518039703,
+        -0.1886381357908249,
+        0.059096939861774445,
+        -0.016928592696785927,
+        0.02327263168990612,
+        -0.17282842099666595,
+        0.13812857866287231,
+        0.38889989256858826,
+        -0.5282873511314392,
+        0.07564643770456314,
+        -0.006128210574388504,
+        -0.00876594614237547,
+        -0.18427829444408417,
+        -0.26697441935539246,
+        1.2529815435409546,
+        -0.6549165844917297,
+        -0.2111111879348755,
+        0.011410325765609741,
+        0.07089994102716446,
+        -0.12627695500850677,
+        -0.8245998024940491,
+        1.4581915140151978,
+        -0.01822204887866974,
+        -0.5626582503318787,
+        -0.01661459542810917,
+        0.03759436681866646,
+        0.10841676592826843,
+        -0.7652962803840637,
+        0.4360819458961487,
+        0.7012669444084167,
+        -0.47011038661003113,
+        0.01529701892286539,
+        -0.0033166150096803904,
+        0.12170535326004028,
+        -0.3871544301509857,
+        -0.05247795954346657,
+        0.4504147171974182,
+        -0.11442532390356064,
+        -0.00882577896118164,
+        0.005190832540392876,
+        -0.05153197422623634,
+        0.0055236960761249065,
+        0.09320031106472015,
+        -0.03762076050043106,
+        -0.021778371185064316,
+        0.00750907463952899,
+        0.014965789392590523,
+        -0.015135630965232849,
+        -0.037086039781570435,
+        0.08020154386758804,
+        -0.04429963231086731,
+        0.0038218852132558823,
+        -0.01712334342300892,
+        0.053772956132888794,
+        -0.05226677283644676,
+        -0.024439912289381027,
+        0.12774989008903503,
+        -0.18722355365753174,
+        0.0683830976486206,
+        -0.010828870348632336,
+        -0.012880662456154823,
+        0.02679484151303768,
+        -0.13696907460689545,
+        0.46868517994880676,
+        -0.322968989610672,
+        0.052930932492017746,
+        0.009463602676987648,
+        -0.046861011534929276,
+        0.07714711129665375,
+        -0.35792097449302673,
+        0.5517901182174683,
+        -0.13382655382156372,
+        -0.12921281158924103,
+        0.018562642857432365,
+        -0.03842621296644211,
+        0.10284601897001266,
+        -0.28243398666381836,
+        0.13314206898212433,
+        0.20769073069095612,
+        -0.1551610678434372,
+        0.018036767840385437,
+        -0.03553476929664612,
+        0.036686040461063385,
+        -0.09568552672863007,
+        0.008917863480746746,
+        0.11340243369340897,
+        -0.04745811969041824,
+        0.005833764094859362,
+        -0.04174824804067612,
+        0.022730106487870216,
+        0.0013601485406979918,
+        -0.07473982870578766,
+        -0.004801879171282053,
+        0.05632775276899338,
+        -0.04081303998827934,
+        0.11509573459625244,
+        0.004507652949541807,
+        -0.24791881442070007,
+        0.43171870708465576,
+        -0.1362573653459549,
+        -0.10758046060800552,
+        0.02746163308620453,
+        -0.2954745888710022,
+        0.30186471343040466,
+        0.3135572075843811,
+        -1.2296111583709717,
+        0.8754236102104187,
+        -0.11699853837490082,
+        0.022482017055153847,
+        0.24945153295993805,
+        -0.7858022451400757,
+        0.5181443095207214,
+        1.4243930578231812,
+        -1.876152515411377,
+        0.4689188003540039,
+        0.04258054122328758,
+        -0.030832920223474503,
+        0.9340220093727112,
+        -1.512351632118225,
+        -0.3731614947319031,
+        2.021338701248169,
+        -0.7801089286804199,
+        -0.09288544207811356,
+        -0.12423597276210785,
+        -0.36861127614974976,
+        1.1679530143737793,
+        -0.4960964024066925,
+        -1.0398281812667847,
+        0.686152458190918,
+        0.02052121050655842,
+        0.07246638089418411,
+        -0.01763315312564373,
+        -0.37442535161972046,
+        0.33217450976371765,
+        0.22260302305221558,
+        -0.2657756209373474,
+        0.00016369696822948754,
+        0.008136127144098282,
+        -0.03592197597026825,
+        0.022231513634324074,
+        0.041430093348026276,
+        -0.06439317017793655,
+        0.03496818616986275,
+        -0.05143435671925545,
+        0.09930871427059174,
+        0.017110232263803482,
+        -0.3834381699562073,
+        0.44344815611839294,
+        -0.00280396337620914,
+        -0.11487428843975067,
+        0.050503507256507874,
+        -0.22837062180042267,
+        0.47540077567100525,
+        0.5802375674247742,
+        -1.7325034141540527,
+        0.8587368130683899,
+        0.10429240018129349,
+        -0.02456486038863659,
+        0.1340152472257614,
+        -1.2299835681915283,
+        0.7986555099487305,
+        2.2204456329345703,
+        -2.4498374462127686,
+        0.33742472529411316,
+        0.1001473218202591,
+        0.08700849115848541,
+        0.9933257102966309,
+        -2.5278031826019287,
+        -0.5935835242271423,
+        2.710871934890747,
+        -0.87749183177948,
+        -0.06125229224562645,
+        -0.19061818718910217,
+        -0.04017600044608116,
+        1.7519460916519165,
+        -0.7798219919204712,
+        -1.28012216091156,
+        0.7500321269035339,
+        0.02245335467159748,
+        0.08263842761516571,
+        -0.1563340127468109,
+        -0.3502165377140045,
+        0.5060794949531555,
+        0.11768018454313278,
+        -0.2394258826971054,
+        0.0027446788735687733,
+        -0.0012661140644922853,
+        0.010839025489985943,
+        0.04500429332256317,
+        -0.04333498701453209,
+        -0.027386408299207687,
+        0.04357098788022995,
+        -0.04407481476664543,
+        0.08443310111761093,
+        -0.08108946681022644,
+        -0.20346391201019287,
+        0.3825778365135193,
+        -0.16498182713985443,
+        -0.04287993535399437,
+        0.05340999737381935,
+        -0.14011172950267792,
+        0.29446643590927124,
+        0.2738667130470276,
+        -1.1299961805343628,
+        0.7827413082122803,
+        -0.07552053779363632,
+        -0.03602323681116104,
+        0.16167275607585907,
+        -0.6924317479133606,
+        0.4478289783000946,
+        1.2428895235061646,
+        -1.4833877086639404,
+        0.4690392315387726,
+        -0.00820756796747446,
+        -0.09873292595148087,
+        0.692342221736908,
+        -1.0981175899505615,
+        -0.3906446695327759,
+        1.438644528388977,
+        -0.719068169593811,
+        0.026173872873187065,
+        -0.09383898228406906,
+        -0.3282022774219513,
+        1.0363390445709229,
+        -0.23960772156715393,
+        -0.7638148069381714,
+        0.5488630533218384,
+        -0.015319733880460262,
+        0.11911362409591675,
+        0.017409542575478554,
+        -0.4231888949871063,
+        0.23724795877933502,
+        0.1191876158118248,
+        -0.15694500505924225,
+        -0.03534351661801338,
+        0.06342366337776184,
+        0.17738288640975952,
+        0.012300643138587475,
+        -0.06408121436834335,
+        -0.06030220910906792,
+        0.0018237337935715914,
+        0.07659764587879181,
+        0.1820947527885437,
+        0.24410061538219452,
+        -0.06998514384031296,
+        -0.1491813361644745,
+        -0.06184092164039612,
+        0.04607890918850899,
+        0.15362663567066193,
+        0.18308304250240326,
+        0.08175522834062576,
+        -0.305602103471756,
+        -0.2915116548538208,
+        -0.08144206553697586,
+        0.07138665020465851,
+        -0.03521484509110451,
+        -0.0914112851023674,
+        -0.2766699492931366,
+        -0.6285344362258911,
+        -0.38168880343437195,
+        -0.0033710987772792578,
+        0.14477019011974335,
+        -0.03885374590754509,
+        -0.11367184668779373,
+        -0.1979650855064392,
+        -0.3575190007686615,
+        0.016150522977113724,
+        0.28292712569236755,
+        0.2836199402809143,
+        -0.016672370955348015,
+        -0.034946177154779434,
+        -0.014770845882594585,
+        -0.0004113636096008122,
+        0.29938748478889465,
+        0.3562523126602173,
+        0.13313128054141998,
+        -0.029499055817723274,
+        0.007187174167484045,
+        0.0636785551905632,
+        0.047712039202451706,
+        0.20670579373836517,
+        0.10999035090208054,
+        -0.1150810718536377,
+        0.00879934523254633,
+        -0.009125287644565105,
+        -0.013732590712606907,
+        0.04738131910562515,
+        0.0549951009452343,
+        -0.014094026759266853,
+        -0.01195482350885868,
+        -0.017125386744737625,
+        -0.071754589676857,
+        -0.023961570113897324,
+        0.013098018243908882,
+        0.05972208455204964,
+        -0.032899752259254456,
+        -0.024354496970772743,
+        -0.013116234913468361,
+        -0.05865325778722763,
+        -0.006360829807817936,
+        0.12809234857559204,
+        0.14038555324077606,
+        -0.022946689277887344,
+        -0.039698828011751175,
+        0.05144746974110603,
+        -0.025034509599208832,
+        0.08764739334583282,
+        0.24594412744045258,
+        0.19307002425193787,
+        -0.04085381329059601,
+        -0.020323628559708595,
+        0.022060081362724304,
+        0.01799374632537365,
+        0.09039195626974106,
+        0.1681770235300064,
+        0.0016234283102676272,
+        -0.23777234554290771,
+        -0.11634974926710129,
+        -0.014439117163419724,
+        -0.034799374639987946,
+        0.0457066111266613,
+        0.049919649958610535,
+        -0.1926913857460022,
+        -0.2680967450141907,
+        0.0018220803467556834,
+        -0.012749310582876205,
+        -0.04389086738228798,
+        0.0060565415769815445,
+        -0.012036234140396118,
+        -0.12737582623958588,
+        -0.05777670815587044,
+        0.09932202100753784,
+        0.09969642758369446,
+        -0.1296343356370926,
+        -0.2964152693748474,
+        -0.05487265810370445,
+        0.12073978036642075,
+        0.06634647399187088,
+        0.004042446613311768,
+        -0.1586746722459793,
+        -0.6267098784446716,
+        -0.5184157490730286,
+        -0.032286129891872406,
+        0.28023189306259155,
+        0.12663227319717407,
+        -0.08828771114349365,
+        -0.2600027620792389,
+        -0.5287090539932251,
+        -0.0994620993733406,
+        0.7820600271224976,
+        0.9638882279396057,
+        0.2193463146686554,
+        -0.13466303050518036,
+        0.042050741612911224,
+        -0.02292742393910885,
+        0.7523098587989807,
+        1.7435946464538574,
+        1.111282229423523,
+        -0.2104763388633728,
+        -0.35129284858703613,
+        0.08224371820688248,
+        0.11167984455823898,
+        0.6513852477073669,
+        0.9696454405784607,
+        -0.1501394510269165,
+        -1.1777327060699463,
+        -0.7738466262817383,
+        0.01114045549184084,
+        0.004884988535195589,
+        0.2849186658859253,
+        0.14232710003852844,
+        -1.0306764841079712,
+        -1.2078118324279785,
+        -0.14658716320991516,
+        0.036605384200811386,
+        0.0001495486794738099,
+        0.12111346423625946,
+        -0.24653346836566925,
+        -0.7028710246086121,
+        -0.18977169692516327,
+        0.5171932578086853,
+        -0.02514370158314705,
+        0.0885375589132309,
+        -0.1023016944527626,
+        0.023200739175081253,
+        0.11839435249567032,
+        -0.09749021381139755,
+        0.008283962495625019,
+        0.0106261121109128,
+        -0.031724803149700165,
+        -0.1594654619693756,
+        0.433218389749527,
+        -0.33944255113601685,
+        0.14406877756118774,
+        -0.0339396670460701,
+        0.09370072185993195,
+        -0.35916459560394287,
+        0.7577320337295532,
+        -0.5531823635101318,
+        -0.016844574362039566,
+        0.2994873523712158,
+        -0.21487002074718475,
+        -0.16125759482383728,
+        0.35567227005958557,
+        0.09099612385034561,
+        -1.3889282941818237,
+        1.9466298818588257,
+        -1.2556309700012207,
+        0.4389301836490631,
+        -0.010665428824722767,
+        0.4707520306110382,
+        -1.4310415983200073,
+        2.0986156463623047,
+        -1.5515614748001099,
+        0.3905705511569977,
+        0.01881679706275463,
+        0.057307951152324677,
+        -0.29734691977500916,
+        0.369127094745636,
+        -0.05115725100040436,
+        -0.44008156657218933,
+        0.48642784357070923,
+        -0.13904061913490295,
+        -0.004375698510557413,
+        -0.06351548433303833,
+        0.256020188331604,
+        -0.34121274948120117,
+        0.22490821778774261,
+        0.004067304544150829,
+        -0.059063635766506195,
+        -0.010710661299526691,
+        0.03514768183231354,
+        -0.08577805012464523,
+        0.05103181675076485,
+        0.04276616871356964,
+        -0.10832246392965317,
+        0.03325289487838745,
+        0.06318283081054688,
+        -0.11063538491725922,
+        -0.062119144946336746,
+        0.40978243947029114,
+        -0.5597845315933228,
+        0.34106317162513733,
+        -0.030269838869571686,
+        0.057014383375644684,
+        -0.44329890608787537,
+        1.0965592861175537,
+        -1.0767146348953247,
+        0.13287265598773956,
+        0.517289400100708,
+        -0.310720294713974,
+        -0.15501761436462402,
+        0.5854693055152893,
+        -0.12469431757926941,
+        -1.7694847583770752,
+        2.6433238983154297,
+        -1.596714735031128,
+        0.3888415992259979,
+        -0.02415616251528263,
+        0.42178481817245483,
+        -1.8008503913879395,
+        2.8845136165618896,
+        -1.7628657817840576,
+        0.1951047033071518,
+        0.11415407806634903,
+        0.07305648922920227,
+        -0.34212157130241394,
+        0.46562451124191284,
+        0.03175807744264603,
+        -0.7942091226577759,
+        0.6133171319961548,
+        -0.14596694707870483,
+        0.010496735572814941,
+        -0.03459644690155983,
+        0.2948842942714691,
+        -0.47654271125793457,
+        0.2612597346305847,
+        0.016025209799408913,
+        -0.05287598818540573,
+        -0.01606004498898983,
+        0.022197037935256958,
+        0.028397703543305397,
+        -0.0390767939388752,
+        0.0037972000427544117,
+        -0.07010228931903839,
+        0.10934390872716904,
+        0.017220165580511093,
+        0.02215729095041752,
+        -0.14772991836071014,
+        0.2353552132844925,
+        -0.3846408724784851,
+        0.23990634083747864,
+        -0.02300707995891571,
+        0.12085225433111191,
+        -0.3576957881450653,
+        0.6410096883773804,
+        -0.532350480556488,
+        -0.002389132045209408,
+        0.41821879148483276,
+        -0.24739143252372742,
+        -0.10216745734214783,
+        0.16793736815452576,
+        0.16367803514003754,
+        -1.1304419040679932,
+        1.676539421081543,
+        -1.064436435699463,
+        0.26995453238487244,
+        -0.07634275406599045,
+        0.3324422240257263,
+        -1.11312997341156,
+        1.8095507621765137,
+        -1.2477567195892334,
+        0.3605581820011139,
+        -0.06627745926380157,
+        0.008511146530508995,
+        -0.19528241455554962,
+        0.4320055842399597,
+        -0.22881783545017242,
+        -0.18463851511478424,
+        0.3064245581626892,
+        -0.14437103271484375,
+        0.02049900032579899,
+        0.018321938812732697,
+        0.14011529088020325,
+        -0.26683253049850464,
+        0.2172057181596756,
+        -0.12119362503290176,
+        0.025965997949242592,
+        -0.03424325957894325,
+        0.0433838777244091,
+        0.1072857677936554,
+        0.1997794657945633,
+        0.0648089200258255,
+        -0.06444115936756134,
+        -0.13146057724952698,
+        0.02106364443898201,
+        -0.22582228481769562,
+        -0.007233713287860155,
+        0.18876874446868896,
+        -0.5612399578094482,
+        0.2632557451725006,
+        0.44088244438171387,
+        0.11389002948999405,
+        -0.2791701555252075,
+        -0.18004432320594788,
+        0.8571203947067261,
+        -1.9517340660095215,
+        -1.4906251430511475,
+        0.3436146676540375,
+        0.31222787499427795,
+        -0.20083315670490265,
+        -0.217665895819664,
+        3.801243782043457,
+        1.2014728784561157,
+        -0.9149202704429626,
+        0.6968244910240173,
+        0.12756747007369995,
+        -0.06783506274223328,
+        -2.086660385131836,
+        0.5455523133277893,
+        0.49095916748046875,
+        -0.5991013050079346,
+        0.7938552498817444,
+        -0.1335069239139557,
+        0.4730406701564789,
+        -1.00951087474823,
+        -0.537578821182251,
+        -0.49764835834503174,
+        -1.2683815956115723,
+        -0.045739322900772095,
+        -0.16049732267856598,
+        0.30239275097846985,
+        0.035600025206804276,
+        0.6344828605651855,
+        0.8256548643112183,
+        -0.12940075993537903,
+        0.09257010370492935,
+        -0.11000311374664307,
+        0.003206665627658367,
+        -0.008585316129028797,
+        -0.14573170244693756,
+        0.172541081905365,
+        0.2107972949743271,
+        -0.05270108953118324,
+        -0.08480435609817505,
+        0.1914149820804596,
+        0.21630872786045074,
+        -0.23309426009655,
+        -0.29484814405441284,
+        -0.1899339109659195,
+        0.02601807750761509,
+        -0.05416746065020561,
+        0.20924429595470428,
+        0.15566189587116241,
+        -0.1556546688079834,
+        -0.23387494683265686,
+        -0.5112816691398621,
+        0.24130745232105255,
+        -0.049835484474897385,
+        -0.2685615122318268,
+        -0.024764614179730415,
+        0.5458847880363464,
+        0.9501044750213623,
+        0.1328524947166443,
+        0.21218529343605042,
+        0.2524968683719635,
+        -0.5205130577087402,
+        -0.3361912667751312,
+        1.1678112745285034,
+        -0.004513490945100784,
+        -0.9149109125137329,
+        0.2125048041343689,
+        0.22423015534877777,
+        -0.08384363353252411,
+        -0.2866036593914032,
+        -0.20210212469100952,
+        -1.2377471923828125,
+        -0.7704879641532898,
+        0.365038126707077,
+        -0.08308980613946915,
+        -0.08326874673366547,
+        0.456358402967453,
+        0.35142943263053894,
+        0.19268833100795746,
+        0.3706081509590149,
+        -0.04951317980885506,
+        0.10151109844446182,
+        0.005193099845200777,
+        -0.1124582439661026,
+        -0.08353164792060852,
+        -0.18709596991539001,
+        -0.18975794315338135,
+        0.17628741264343262,
+        0.05536900460720062,
+        0.008301885798573494,
+        -0.1890449970960617,
+        0.056875281035900116,
+        0.7981322407722473,
+        -0.05872391164302826,
+        -0.4860122501850128,
+        -0.08073797076940536,
+        0.13145819306373596,
+        -0.03608228266239166,
+        -0.6600452661514282,
+        2.243560314178467,
+        1.9288626909255981,
+        -0.5698518753051758,
+        -0.2486664056777954,
+        0.42693793773651123,
+        0.2667267322540283,
+        -4.395429611206055,
+        -2.15342378616333,
+        0.819127082824707,
+        -0.9362612962722778,
+        -0.3760467767715454,
+        0.5671858787536621,
+        2.468177080154419,
+        -1.6694080829620361,
+        -0.49952322244644165,
+        1.502772569656372,
+        -1.0188850164413452,
+        -0.10419629514217377,
+        -0.36795151233673096,
+        1.2645196914672852,
+        0.7223924994468689,
+        1.751431941986084,
+        2.018704891204834,
+        -0.3197852671146393,
+        0.22054125368595123,
+        -0.19326329231262207,
+        -0.5307535529136658,
+        -0.9362435936927795,
+        -1.0772119760513306,
+        -0.19870880246162415,
+        -0.0650869607925415,
+        -0.0796947032213211,
+        0.15733301639556885,
+        0.08798394352197647,
+        0.0010860684560611844,
+        0.05327683687210083,
+        0.1107875183224678,
+        0.13224183022975922,
+        0.08979664742946625,
+        0.004348093178123236,
+        -0.07060158997774124,
+        -0.19925491511821747,
+        -0.15811985731124878,
+        -0.08220887929201126,
+        -0.022623460739850998,
+        0.08509720861911774,
+        0.00792989507317543,
+        -0.14345014095306396,
+        -0.2720486521720886,
+        -0.18885627388954163,
+        -0.11063539236783981,
+        -0.0355350486934185,
+        0.048891279846429825,
+        -0.12828074395656586,
+        -0.2712610363960266,
+        -0.20134924352169037,
+        -0.1863398402929306,
+        -0.19976121187210083,
+        -0.09535074234008789,
+        0.009852319024503231,
+        -0.2776590585708618,
+        -0.3087778687477112,
+        -0.21431012451648712,
+        -0.19772370159626007,
+        -0.23412325978279114,
+        -0.11640459299087524,
+        0.09514907747507095,
+        -0.17561811208724976,
+        -0.29451555013656616,
+        -0.2381855845451355,
+        -0.18296842277050018,
+        -0.18682444095611572,
+        -0.023345205932855606,
+        0.1438502073287964,
+        0.02504260651767254,
+        -0.1554802507162094,
+        -0.1477985382080078,
+        -0.07874225080013275,
+        -0.002977968193590641,
+        0.1048416793346405,
+        -0.1779504120349884,
+        0.13204343616962433,
+        0.14215172827243805,
+        0.049610622227191925,
+        0.0888131782412529,
+        0.07250366359949112,
+        0.0696505531668663,
+        0.009899160824716091,
+        0.032067786902189255,
+        0.08401404321193695,
+        -0.03567894548177719,
+        -0.004740188363939524,
+        -0.0021664693485945463,
+        -0.011156522668898106,
+        0.0821070745587349,
+        0.10295391082763672,
+        -0.0017653254326432943,
+        -0.16915833950042725,
+        -0.062223054468631744,
+        0.004783258773386478,
+        0.038355808705091476,
+        0.10124270617961884,
+        -0.003437258303165436,
+        -0.18881437182426453,
+        -0.15905225276947021,
+        -0.12576808035373688,
+        -0.11059725284576416,
+        0.021587060764431953,
+        0.07237453758716583,
+        -0.1706620156764984,
+        -0.27434206008911133,
+        -0.23003827035427094,
+        -0.20530915260314941,
+        -0.20856624841690063,
+        -0.021966496482491493,
+        0.13395215570926666,
+        -0.03810539469122887,
+        -0.2409798800945282,
+        -0.2515420913696289,
+        -0.1872486174106598,
+        -0.15951117873191833,
+        0.04223426431417465,
+        0.09909931570291519,
+        0.12328703701496124,
+        -0.057749148458242416,
+        -0.1300545036792755,
+        -0.046062104403972626,
+        0.019744107499718666,
+        0.09484386444091797,
+        -0.2709728479385376,
+        0.03540695831179619,
+        0.1206774190068245,
+        0.057636432349681854,
+        0.10385740548372269,
+        0.032486993819475174,
+        -0.020434774458408356,
+        -0.10122086852788925,
+        -0.0023329253308475018,
+        0.16941140592098236,
+        0.098082534968853,
+        0.1250472217798233,
+        0.06134447827935219,
+        -0.025240115821361542,
+        0.004181401338428259,
+        0.14425808191299438,
+        0.17515034973621368,
+        0.04739757999777794,
+        0.1618604063987732,
+        0.1751406490802765,
+        0.09162088483572006,
+        0.09512057155370712,
+        0.13736343383789062,
+        0.028775952756404877,
+        0.042535409331321716,
+        0.08839954435825348,
+        0.09229374676942825,
+        0.1658262014389038,
+        0.09852072596549988,
+        0.002680110279470682,
+        -0.05479496717453003,
+        -0.03634755313396454,
+        -0.002902726177126169,
+        -0.023990361019968987,
+        0.1277875006198883,
+        0.12727677822113037,
+        0.1002269834280014,
+        -0.040967896580696106,
+        -0.07101184874773026,
+        -0.007902896963059902,
+        0.019561029970645905,
+        0.145268052816391,
+        0.017638152465224266,
+        0.19240263104438782,
+        0.12857146561145782,
+        0.05043037235736847,
+        0.11596394330263138,
+        0.12513381242752075,
+        0.12088746577501297,
+        0.04333524778485298,
+        0.05500142276287079,
+        0.05169082432985306,
+        -0.09941842406988144,
+        -0.005959822330623865,
+        -0.032586321234703064,
+        -0.03065132349729538,
+        -0.04826900362968445,
+        0.14192889630794525,
+        0.2543988823890686,
+        0.09563885629177094,
+        -0.28965362906455994,
+        -0.1341734230518341,
+        0.033991701900959015,
+        -0.22402706742286682,
+        -0.3190857768058777,
+        0.011840387247502804,
+        0.9620282053947449,
+        1.0609054565429688,
+        -0.13429726660251617,
+        -0.20191268622875214,
+        0.05324135720729828,
+        -0.16234318912029266,
+        -0.9101927280426025,
+        -1.7916113138198853,
+        0.3981992304325104,
+        1.3173034191131592,
+        0.53525310754776,
+        0.18472574651241302,
+        0.3719426691532135,
+        0.7792536020278931,
+        -0.027768991887569427,
+        -2.245561122894287,
+        -1.2211185693740845,
+        0.22817185521125793,
+        -0.0023349972907453775,
+        -0.12598364055156708,
+        0.06836964190006256,
+        0.9917387366294861,
+        1.1885775327682495,
+        -0.2851368486881256,
+        -0.7428704500198364,
+        -0.04798422381281853,
+        -0.00811613816767931,
+        -0.19619861245155334,
+        -0.28184008598327637,
+        0.0828644260764122,
+        0.44643187522888184,
+        0.1461745798587799,
+        -0.005575121380388737,
+        -0.06604957580566406,
+        0.011459077708423138,
+        0.03927984461188316,
+        0.0634538009762764,
+        -0.005732079967856407,
+        -0.01014732290059328,
+        0.07607843726873398,
+        0.06948187947273254,
+        -0.010600326582789421,
+        -0.056259915232658386,
+        -0.24602480232715607,
+        -0.01649448834359646,
+        0.11143466085195541,
+        -0.0027401424013078213,
+        -0.012853104621171951,
+        0.08452893793582916,
+        0.639316201210022,
+        0.5167437195777893,
+        -0.2775256335735321,
+        -0.22241903841495514,
+        -0.07067711651325226,
+        -0.06368192285299301,
+        -0.4687917232513428,
+        -1.1776493787765503,
+        0.36015447974205017,
+        0.9171182513237,
+        0.1905054748058319,
+        -0.010661551728844643,
+        0.10800722986459732,
+        0.5352235436439514,
+        0.18558207154273987,
+        -1.5184046030044556,
+        -0.8130561709403992,
+        0.15417319536209106,
+        0.0713079422712326,
+        -0.07369451224803925,
+        -0.09037846326828003,
+        0.6168488264083862,
+        0.9663773775100708,
+        -0.007113471627235413,
+        -0.33585548400878906,
+        -0.02738586813211441,
+        0.061310965567827225,
+        -0.0955657884478569,
+        -0.23896107077598572,
+        -0.1107473075389862,
+        0.1830059289932251,
+        0.10748914629220963,
+        -0.040772341191768646,
+        -0.05803938955068588,
+        -0.0004895658930763602,
+        0.07664632797241211,
+        0.039049405604600906,
+        -0.002806248841807246,
+        -0.02642429992556572,
+        0.05169009417295456,
+        -0.036710865795612335,
+        -0.1002974808216095,
+        -0.12001149356365204,
+        -0.08043934404850006,
+        0.11466419696807861,
+        0.12322796136140823,
+        0.07564827799797058,
+        0.10148002207279205,
+        0.04720174893736839,
+        0.14046646654605865,
+        -0.0819464847445488,
+        -0.30803975462913513,
+        -0.0838734582066536,
+        -0.0801682323217392,
+        0.05861072987318039,
+        0.04970559477806091,
+        -0.20592759549617767,
+        0.2673366665840149,
+        0.2431953400373459,
+        -0.10027645528316498,
+        -0.07884806394577026,
+        -0.09939537942409515,
+        0.1181628480553627,
+        0.25269386172294617,
+        -0.3439132571220398,
+        -0.11160463094711304,
+        0.08640077710151672,
+        0.07200870662927628,
+        -0.03449570760130882,
+        -0.17610406875610352,
+        -0.021308166906237602,
+        0.30556705594062805,
+        0.05186203494668007,
+        -0.004691269714385271,
+        -0.005278654862195253,
+        0.06289899349212646,
+        0.052224051207304,
+        -0.05927770212292671,
+        -0.1586783081293106,
+        -0.022610770538449287,
+        0.03463536128401756,
+        0.004338411148637533,
+        0.01452699676156044,
+        -0.008622901514172554,
+        0.010536444373428822,
+        -0.038111478090286255,
+        0.013373414985835552,
+        0.007125865668058395,
+        -0.003420598339289427,
+        0.03533756732940674,
+        0.0320388600230217,
+        0.045789655297994614,
+        -0.08139114826917648,
+        -0.03447948023676872,
+        -0.01453007198870182,
+        -0.004573625046759844,
+        0.10279268026351929,
+        0.10881853848695755,
+        0.07537791877985,
+        -0.10887791216373444,
+        -0.0980544164776802,
+        -0.06889445334672928,
+        0.006558350287377834,
+        0.197514146566391,
+        0.17890937626361847,
+        0.07630149275064468,
+        -0.16081148386001587,
+        -0.16685302555561066,
+        -0.11421715468168259,
+        -0.013679573312401772,
+        0.22477784752845764,
+        0.20761631429195404,
+        0.07321957498788834,
+        -0.17697854340076447,
+        -0.17810045182704926,
+        -0.1579347848892212,
+        -0.02679254300892353,
+        0.1408146619796753,
+        0.15144851803779602,
+        0.08801613748073578,
+        -0.13237154483795166,
+        -0.13181765377521515,
+        -0.1279487907886505,
+        -0.01779216341674328,
+        0.08145096898078918,
+        0.05625852569937706,
+        0.07724357396364212,
+        -0.04653938114643097,
+        -0.07479449361562729,
+        -0.06189379468560219,
+        -0.04310920089483261,
+        0.02028634026646614,
+        -0.006228619255125523,
+        0.03549303859472275,
+        -0.043929651379585266,
+        0.007818001322448254,
+        0.00874761026352644,
+        -0.017027731984853745,
+        0.11014463752508163,
+        0.0841977447271347,
+        0.05960552394390106,
+        -0.12814101576805115,
+        -0.0544624924659729,
+        -0.045333195477724075,
+        0.02336869016289711,
+        0.22365787625312805,
+        0.18523427844047546,
+        0.09366372227668762,
+        -0.20144090056419373,
+        -0.16367222368717194,
+        -0.13003699481487274,
+        0.0590205080807209,
+        0.3301562964916229,
+        0.26524844765663147,
+        0.09425198286771774,
+        -0.26156124472618103,
+        -0.28513699769973755,
+        -0.21749621629714966,
+        0.04356053099036217,
+        0.35879984498023987,
+        0.29898661375045776,
+        0.0977487862110138,
+        -0.28175386786460876,
+        -0.2964495122432709,
+        -0.249031201004982,
+        0.028877725824713707,
+        0.26395633816719055,
+        0.23059280216693878,
+        0.09593978524208069,
+        -0.22489066421985626,
+        -0.2248908430337906,
+        -0.19214706122875214,
+        0.007535146549344063,
+        0.15299226343631744,
+        0.09148521721363068,
+        0.06946425884962082,
+        -0.1445557326078415,
+        -0.11587042361497879,
+        -0.0978587418794632,
+        -0.00984917301684618,
+        -0.012626220472157001,
+        -0.02837960794568062,
+        0.02399199828505516,
+        -0.005340439733117819,
+        0.023224178701639175,
+        0.011642432771623135,
+        0.003958537708967924,
+        0.042965203523635864,
+        0.01099414099007845,
+        0.024063799530267715,
+        -0.0702008455991745,
+        0.007805663626641035,
+        0.0050195748917758465,
+        0.017281856387853622,
+        0.10123670846223831,
+        0.06401767581701279,
+        0.02626805007457733,
+        -0.1073761060833931,
+        -0.03802435100078583,
+        -0.014407800510525703,
+        -0.0006281707319431007,
+        0.15516239404678345,
+        0.12629136443138123,
+        0.033691491931676865,
+        -0.17609107494354248,
+        -0.15251316130161285,
+        -0.07914211601018906,
+        -0.015578335151076317,
+        0.18422608077526093,
+        0.1740245372056961,
+        0.06139932945370674,
+        -0.17213505506515503,
+        -0.1602732092142105,
+        -0.08922445774078369,
+        -0.012822975404560566,
+        0.13543544709682465,
+        0.12543149292469025,
+        0.07651004195213318,
+        -0.13805902004241943,
+        -0.09661149233579636,
+        -0.052669934928417206,
+        -0.03268992528319359,
+        0.0391642227768898,
+        0.01116940937936306,
+        0.04585625231266022,
+        -0.06474924832582474,
+        -0.023607701063156128,
+        -0.007017284631729126,
+        -0.026150476187467575,
+        0.05729387328028679,
+        -0.10095079243183136,
+        0.16617903113365173,
+        -0.13664309680461884,
+        0.026482274755835533,
+        0.008411461487412453,
+        -0.03410203382372856,
+        0.022963764145970345,
+        0.008903563022613525,
+        0.11244194954633713,
+        -0.20863348245620728,
+        0.11064451932907104,
+        -0.024916114285588264,
+        0.009591493755578995,
+        -0.26092270016670227,
+        0.5717483758926392,
+        -0.38539814949035645,
+        0.035056713968515396,
+        0.08623965084552765,
+        -0.016184961423277855,
+        0.11129201203584671,
+        -0.6138678789138794,
+        1.3646206855773926,
+        -1.4969615936279297,
+        0.8465064764022827,
+        -0.2794847786426544,
+        0.05826558917760849,
+        0.07709132134914398,
+        -0.5444677472114563,
+        1.3013663291931152,
+        -1.5686073303222656,
+        0.9930508732795715,
+        -0.39188963174819946,
+        0.08085884898900986,
+        -0.05875617265701294,
+        0.03498996049165726,
+        0.23967482149600983,
+        -0.3468690514564514,
+        0.19146253168582916,
+        0.019604403525590897,
+        -0.027150027453899384,
+        -0.024670494720339775,
+        0.09944183379411697,
+        -0.11718503385782242,
+        0.09772855788469315,
+        -0.11857263743877411,
+        0.09660946577787399,
+        -0.03638811036944389,
+        -0.0295167975127697,
+        0.1032838523387909,
+        -0.12557579576969147,
+        0.11812210828065872,
+        -0.08446288853883743,
+        0.027706580236554146,
+        0.010997293516993523,
+        -0.06348618865013123,
+        0.09578556567430496,
+        -0.0165568757802248,
+        -0.014778072014451027,
+        -0.07772849500179291,
+        0.11245536059141159,
+        -0.043248821049928665,
+        0.013345679268240929,
+        -0.22149333357810974,
+        0.6456363797187805,
+        -0.7280437350273132,
+        0.3046833574771881,
+        0.06304280459880829,
+        -0.07310052216053009,
+        0.08824795484542847,
+        -0.65179842710495,
+        1.6453673839569092,
+        -2.046448230743408,
+        1.3267604112625122,
+        -0.42399832606315613,
+        0.0010522910160943866,
+        0.07953720539808273,
+        -0.5960973501205444,
+        1.5601089000701904,
+        -2.084894895553589,
+        1.4612183570861816,
+        -0.5491638779640198,
+        0.13709494471549988,
+        -0.09170618653297424,
+        0.07287970930337906,
+        0.24422486126422882,
+        -0.4581631124019623,
+        0.29479551315307617,
+        -0.07515113800764084,
+        -0.012292998842895031,
+        -0.04451148584485054,
+        0.14961428940296173,
+        -0.15577177703380585,
+        0.06323063373565674,
+        -0.07806269824504852,
+        0.07061618566513062,
+        -0.026793144643306732,
+        -0.051938362419605255,
+        0.13946141302585602,
+        -0.14129231870174408,
+        0.11092118173837662,
+        -0.08889970183372498,
+        0.034787945449352264,
+        -0.008983314968645573,
+        -0.04930088296532631,
+        0.09856640547513962,
+        -0.09350966662168503,
+        0.07015673816204071,
+        -0.06468848884105682,
+        0.08028972148895264,
+        -0.02378295361995697,
+        0.004251216538250446,
+        -0.11239825189113617,
+        0.2660067081451416,
+        -0.367576539516449,
+        0.2212517410516739,
+        -0.035011082887649536,
+        -0.037866897881031036,
+        0.11835235357284546,
+        -0.4868132174015045,
+        0.9402765035629272,
+        -1.0933791399002075,
+        0.9518744349479675,
+        -0.5096855759620667,
+        0.12277142703533173,
+        0.12916085124015808,
+        -0.4648635983467102,
+        0.8895858526229858,
+        -1.0776352882385254,
+        1.023865818977356,
+        -0.5914785861968994,
+        0.1682877242565155,
+        -0.05646277964115143,
+        0.04132156819105148,
+        -0.01790236309170723,
+        -0.059831030666828156,
+        0.10092897713184357,
+        -0.1268356889486313,
+        0.013669619336724281,
+        -0.02746082842350006,
+        0.11544085294008255,
+        -0.2124193012714386,
+        0.2733248472213745,
+        -0.1360178142786026,
+        0.025302443653345108,
+        0.01249375008046627,
+        -0.015119954012334347,
+        0.017966970801353455,
+        0.00269943755120039,
+        0.014392177574336529,
+        0.007648292928934097,
+        0.011665135622024536,
+        -0.006192799191921949,
+        0.004215092398226261,
+        0.017718149349093437,
+        0.046436555683612823,
+        0.044417623430490494,
+        0.01518242433667183,
+        -0.0020157198887318373,
+        -0.01828707568347454,
+        -0.029163505882024765,
+        -0.03131464868783951,
+        -0.004393945913761854,
+        0.048599082976579666,
+        0.015757638961076736,
+        -0.015650734305381775,
+        -0.002684049541130662,
+        -0.0697445422410965,
+        -0.25050923228263855,
+        -0.4758685231208801,
+        -0.5382962822914124,
+        -0.38907238841056824,
+        -0.12599025666713715,
+        -0.00266047241166234,
+        0.0758173018693924,
+        0.26593172550201416,
+        0.4203726053237915,
+        0.4958920478820801,
+        0.3697706162929535,
+        0.12434400618076324,
+        0.026325728744268417,
+        0.022295912727713585,
+        0.08135133236646652,
+        0.2627769708633423,
+        0.26325660943984985,
+        0.12326934933662415,
+        0.058665141463279724,
+        0.04346219077706337,
+        -0.0013142779935151339,
+        -0.10037153959274292,
+        -0.27075886726379395,
+        -0.28071707487106323,
+        -0.17300420999526978,
+        -0.06914675980806351,
+        0.004067219793796539,
+        -0.020674005150794983,
+        0.02103183977305889,
+        0.0033879741095006466,
+        0.013523808680474758,
+        -0.007318845018744469,
+        -0.009975744411349297,
+        -0.02981705591082573,
+        0.023193644359707832,
+        0.09624253213405609,
+        0.1077117845416069,
+        0.11186518520116806,
+        0.07592211663722992,
+        0.04614634811878204,
+        0.015908582136034966,
+        -0.05212458223104477,
+        -0.1262977123260498,
+        -0.10974782705307007,
+        -0.07645918428897858,
+        -0.06987964361906052,
+        -0.08783216774463654,
+        -0.046172842383384705,
+        -0.22593465447425842,
+        -0.5281140804290771,
+        -0.8424770832061768,
+        -0.9608982801437378,
+        -0.7363743185997009,
+        -0.3312055170536041,
+        -0.10426472127437592,
+        0.24067367613315582,
+        0.5504152178764343,
+        0.81276935338974,
+        0.9592635035514832,
+        0.7479950785636902,
+        0.32608768343925476,
+        0.14525265991687775,
+        0.15008939802646637,
+        0.32246851921081543,
+        0.5287250876426697,
+        0.5817036032676697,
+        0.37340155243873596,
+        0.20366452634334564,
+        0.1546182781457901,
+        -0.11224830150604248,
+        -0.29856279492378235,
+        -0.5281672477722168,
+        -0.5890122056007385,
+        -0.4024880528450012,
+        -0.23706914484500885,
+        -0.0641399398446083,
+        -0.0025121152866631746,
+        0.0051757702603936195,
+        -0.014290476217865944,
+        0.0043721878901124,
+        -0.004783981014043093,
+        0.021787043660879135,
+        -0.004969750996679068,
+        -0.022116241976618767,
+        0.05208030343055725,
+        0.07022145390510559,
+        0.03730607405304909,
+        0.03242917358875275,
+        0.04344351217150688,
+        -0.01189794484525919,
+        -0.0418211966753006,
+        -0.059125497937202454,
+        -0.014576594345271587,
+        0.01294493954628706,
+        -0.011262460611760616,
+        -0.059920165687799454,
+        -0.04733816161751747,
+        -0.12665517628192902,
+        -0.29677024483680725,
+        -0.5247481465339661,
+        -0.6474934816360474,
+        -0.4751538038253784,
+        -0.1937171369791031,
+        -0.05117221921682358,
+        0.14646948873996735,
+        0.32891425490379333,
+        0.5415402054786682,
+        0.6071264147758484,
+        0.4653589427471161,
+        0.18045872449874878,
+        0.09937354922294617,
+        0.1264665126800537,
+        0.18507222831249237,
+        0.31783968210220337,
+        0.3545042872428894,
+        0.22468777000904083,
+        0.09973976761102676,
+        0.1227618008852005,
+        -0.07824759930372238,
+        -0.20465101301670074,
+        -0.36476215720176697,
+        -0.38243186473846436,
+        -0.2540777623653412,
+        -0.13525226712226868,
+        -0.03621843457221985,
+        -0.012233156710863113,
+        -0.01481863297522068,
+        -0.04313792288303375,
+        0.002874002791941166,
+        -0.028444716706871986,
+        -0.04687628522515297,
+        -0.026806645095348358,
+        -0.0228339321911335,
+        -0.015892738476395607,
+        -0.015550780110061169,
+        0.07011140882968903,
+        0.0017389585264027119,
+        -0.05721491947770119,
+        -0.017484690994024277,
+        -0.03954736143350601,
+        -0.006339249666780233,
+        0.08166316151618958,
+        0.37439921498298645,
+        0.2830294966697693,
+        0.00668215099722147,
+        -0.038873329758644104,
+        -0.012295035645365715,
+        0.04932165890932083,
+        0.31826695799827576,
+        0.8449289202690125,
+        0.7123299241065979,
+        0.2574000954627991,
+        0.04747961834073067,
+        -0.04416817054152489,
+        -0.005029442720115185,
+        0.2027042657136917,
+        0.6639980673789978,
+        0.6243636012077332,
+        0.21359916031360626,
+        0.027929672971367836,
+        -0.05395142361521721,
+        -0.04981911554932594,
+        -0.006375179626047611,
+        0.23660773038864136,
+        0.2155737280845642,
+        0.020577391609549522,
+        -0.032118700444698334,
+        -0.02332071214914322,
+        -0.009217707440257072,
+        -0.038096409291028976,
+        0.05811609327793121,
+        0.03776064142584801,
+        -0.03570764884352684,
+        -0.042420413345098495,
+        0.017812976613640785,
+        0.019242385402321815,
+        0.030057156458497047,
+        0.003040613606572151,
+        0.02378096617758274,
+        0.04043402150273323,
+        0.0243258997797966,
+        0.014026327058672905,
+        0.005650558043271303,
+        -0.002831381279975176,
+        -0.0645776093006134,
+        -0.03761167451739311,
+        0.043774381279945374,
+        0.010685136541724205,
+        0.031011218205094337,
+        -0.0025828774087131023,
+        -0.11959855258464813,
+        -0.3524792194366455,
+        -0.30037227272987366,
+        -0.053334690630435944,
+        0.009859252721071243,
+        0.0010005333460867405,
+        -0.04819931834936142,
+        -0.3154168128967285,
+        -0.7240553498268127,
+        -0.6380828022956848,
+        -0.25695785880088806,
+        -0.06639125943183899,
+        0.03295261785387993,
+        -0.012727363035082817,
+        -0.24232468008995056,
+        -0.6055921912193298,
+        -0.5679556727409363,
+        -0.20067356526851654,
+        -0.03628019988536835,
+        0.04774145409464836,
+        0.029560575261712074,
+        -0.038632482290267944,
+        -0.24032950401306152,
+        -0.2095729559659958,
+        -0.006905315909534693,
+        0.02563827484846115,
+        0.03053808957338333,
+        0.0012747920118272305,
+        0.004095789045095444,
+        -0.07932732999324799,
+        -0.046672020107507706,
+        0.02153847925364971,
+        0.019504766911268234,
+        -0.006118285935372114,
+        0.0026654782705008984,
+        0.013819373212754726,
+        -0.01078135147690773,
+        0.0070082321763038635,
+        0.00906399916857481,
+        0.010149766691029072,
+        0.000516490894369781,
+        0.00034157291520386934,
+        0.02412085421383381,
+        0.006926041562110186,
+        0.023299943655729294,
+        0.01129852794110775,
+        -0.0018704778049141169,
+        0.016042279079556465,
+        0.023886069655418396,
+        0.04207555204629898,
+        -0.0021778997033834457,
+        0.041684601455926895,
+        0.05059140920639038,
+        0.03518521040678024,
+        -0.0032736151479184628,
+        -0.0007146652205847204,
+        0.015503454953432083,
+        -0.11896659433841705,
+        -0.07006713002920151,
+        0.007565992418676615,
+        0.012584990821778774,
+        0.00843358226120472,
+        0.017024952918291092,
+        0.0359124094247818,
+        -0.05997823178768158,
+        -0.04116949439048767,
+        -0.016472430899739265,
+        0.002696823561564088,
+        0.00829327292740345,
+        0.016238784417510033,
+        0.0455794483423233,
+        0.0019872160628437996,
+        -0.005927432328462601,
+        -0.003552153240889311,
+        0.020063765347003937,
+        0.00010026743984781206,
+        0.01045019831508398,
+        0.034689340740442276,
+        0.014206668362021446,
+        0.015128945000469685,
+        0.00972809735685587,
+        0.019944868981838226,
+        0.020581791177392006,
+        0.02938947267830372,
+        0.03923909366130829,
+        0.03601628914475441,
+        0.030168617144227028,
+        0.05403255671262741,
+        0.03985666483640671,
+        0.020015308633446693,
+        0.0285494402050972,
+        0.013555807992815971,
+        -0.04409409686923027,
+        -0.07503483444452286,
+        0.01716756261885166,
+        0.02053452841937542,
+        0.057520389556884766,
+        0.02973104454576969,
+        -0.04563397541642189,
+        -0.2676408588886261,
+        -0.30933722853660583,
+        -0.11671236902475357,
+        0.0020135289523750544,
+        0.022801443934440613,
+        -0.03161352127790451,
+        -0.2704106271266937,
+        -0.5803710222244263,
+        -0.5762420296669006,
+        -0.30449461936950684,
+        -0.0780220776796341,
+        0.017343536019325256,
+        -0.05319945886731148,
+        -0.2906038463115692,
+        -0.598426342010498,
+        -0.5925986766815186,
+        -0.31852787733078003,
+        -0.09950074553489685,
+        0.05888299271464348,
+        0.01939479075372219,
+        -0.1060815081000328,
+        -0.3505017161369324,
+        -0.3200446665287018,
+        -0.10609738528728485,
+        0.03659524768590927,
+        0.056114207953214645,
+        0.03447861596941948,
+        0.014380007050931454,
+        -0.09436371922492981,
+        -0.07562272250652313,
+        0.04223132133483887,
+        0.06327345967292786,
+        -0.03735652193427086,
+        -0.052881840616464615,
+        -0.058017320930957794,
+        -0.02474917098879814,
+        -0.02431381866335869,
+        -0.0629878118634224,
+        -0.05212349444627762,
+        -0.03820814937353134,
+        -0.0034579068887978792,
+        -0.004930540919303894,
+        0.07968354970216751,
+        0.07278168946504593,
+        0.015167324803769588,
+        -0.013638288713991642,
+        -0.05875609815120697,
+        -0.008851750753819942,
+        0.10708516091108322,
+        0.33075177669525146,
+        0.3502756953239441,
+        0.14791442453861237,
+        0.03131852671504021,
+        -0.028764141723513603,
+        0.07454497367143631,
+        0.3000347316265106,
+        0.6147283315658569,
+        0.6289594173431396,
+        0.3398674726486206,
+        0.13494613766670227,
+        -0.03705109655857086,
+        0.0633230209350586,
+        0.3147434592247009,
+        0.595033586025238,
+        0.594217836856842,
+        0.33864542841911316,
+        0.11264053732156754,
+        -0.059276629239320755,
+        0.005206871312111616,
+        0.14524762332439423,
+        0.37473905086517334,
+        0.34477534890174866,
+        0.12632343173027039,
+        0.011062734760344028,
+        -0.06149457022547722,
+        -0.028670497238636017,
+        0.011082210578024387,
+        0.13112866878509521,
+        0.1106843650341034,
+        -0.0025933771394193172,
+        -0.03781202808022499,
+        0.030325254425406456,
+        0.017758814617991447,
+        0.01635698974132538,
+        -0.008786264806985855,
+        -0.0005018062074668705,
+        0.005934061016887426,
+        0.020206287503242493,
+        0.019497420638799667,
+        -0.01290479488670826,
+        -0.010817185044288635,
+        -0.032760608941316605,
+        -0.026973316445946693,
+        -0.0021766452118754387,
+        -0.012848617509007454,
+        -0.0002560729335527867,
+        -0.02383977733552456,
+        -0.05322824791073799,
+        -0.05382781848311424,
+        -0.04459262639284134,
+        -0.04581240937113762,
+        -0.03465775027871132,
+        0.0026904877740889788,
+        -0.026097090914845467,
+        -0.05170493200421333,
+        -0.04981262609362602,
+        -0.05221042037010193,
+        -0.05268307775259018,
+        -0.04735802114009857,
+        0.019142162054777145,
+        -0.019374292343854904,
+        -0.03312355652451515,
+        -0.04133244976401329,
+        -0.033129844814538956,
+        -0.01844680868089199,
+        -0.024726904928684235,
+        0.0012146441731601954,
+        -0.025521529838442802,
+        -0.03120318427681923,
+        -0.04863203689455986,
+        -0.021450525149703026,
+        -0.04190714284777641,
+        -0.02833862416446209,
+        0.017827404662966728,
+        -0.010181388817727566,
+        -0.020994380116462708,
+        -0.04290826618671417,
+        -0.031555648893117905,
+        -0.030525390058755875,
+        -0.024981478229165077,
+        -0.017512500286102295,
+        0.019927235320210457,
+        0.00433371402323246,
+        -0.009276121854782104,
+        -0.03990143537521362,
+        -0.021251117810606956,
+        0.017825132235884666,
+        -0.02313065528869629,
+        0.012881814502179623,
+        0.0009175563463941216,
+        -0.0656605213880539,
+        -0.007037178613245487,
+        0.023603176698088646,
+        0.04873553663492203,
+        0.013912673108279705,
+        9.78652315097861e-05,
+        -0.03166677802801132,
+        -0.11772678792476654,
+        -0.034320034086704254,
+        0.04952533170580864,
+        0.10113520920276642,
+        0.030472615733742714,
+        -0.05131377652287483,
+        -0.1371452510356903,
+        -0.2326214611530304,
+        -0.0629519522190094,
+        0.12444627285003662,
+        0.15845368802547455,
+        0.014535457827150822,
+        -0.06888624280691147,
+        -0.18798232078552246,
+        -0.24720685184001923,
+        -0.04858007654547691,
+        0.26889580488204956,
+        0.2433905005455017,
+        -0.01772989332675934,
+        -0.06027546152472496,
+        -0.12164203822612762,
+        -0.20018024742603302,
+        0.0035393801517784595,
+        0.27190765738487244,
+        0.1929154396057129,
+        -0.012923460453748703,
+        -0.013931642286479473,
+        -0.043986693024635315,
+        -0.0655391663312912,
+        0.04751605913043022,
+        0.13482201099395752,
+        0.06690078228712082,
+        -0.01862635649740696,
+        0.02938506379723549,
+        0.01789080537855625,
+        -0.006509440019726753,
+        -0.029202938079833984,
+        -0.023693149909377098,
+        0.01042762491852045,
+        -0.0035929735749959946,
+        0.024952176958322525,
+        -0.013459124602377415,
+        -0.10798560827970505,
+        -0.020217353478074074,
+        0.017876077443361282,
+        0.07628928124904633,
+        0.04444783553481102,
+        0.012667268514633179,
+        -0.09012818336486816,
+        -0.22452381253242493,
+        -0.07556752860546112,
+        0.07942477613687515,
+        0.17035256326198578,
+        0.0396822914481163,
+        -0.08236342668533325,
+        -0.23916372656822205,
+        -0.3645225763320923,
+        -0.10748416185379028,
+        0.1996970921754837,
+        0.3076043725013733,
+        -0.0033923503942787647,
+        -0.13259321451187134,
+        -0.28894615173339844,
+        -0.3605952262878418,
+        -0.07969008386135101,
+        0.3583948314189911,
+        0.4267900586128235,
+        -0.02228585258126259,
+        -0.11386624723672867,
+        -0.21445821225643158,
+        -0.26956692337989807,
+        0.026791207492351532,
+        0.37918713688850403,
+        0.37130093574523926,
+        -0.05172214284539223,
+        -0.05132569745182991,
+        -0.07469630241394043,
+        -0.11400169134140015,
+        0.07863093167543411,
+        0.24061299860477448,
+        0.19393151998519897,
+        -0.03217098489403725,
+        0.013085477985441685,
+        0.032348379492759705,
+        0.03207695484161377,
+        0.010604938492178917,
+        -0.026534704491496086,
+        -0.018284842371940613,
+        -0.01768680103123188,
+        -0.001516501884907484,
+        0.013829287141561508,
+        -0.034318119287490845,
+        0.015753330662846565,
+        -0.0018936718115583062,
+        0.014737343415617943,
+        0.03306088596582413,
+        0.020835628733038902,
+        -0.03396771103143692,
+        -0.10758449137210846,
+        -0.03052518330514431,
+        0.020080547779798508,
+        0.06180800125002861,
+        0.03735671192407608,
+        -0.037925880402326584,
+        -0.09720461815595627,
+        -0.21495617926120758,
+        -0.06842153519392014,
+        0.08532039076089859,
+        0.13350333273410797,
+        0.03649023920297623,
+        -0.03904158994555473,
+        -0.1483580619096756,
+        -0.2068314403295517,
+        -0.05687328055500984,
+        0.21108660101890564,
+        0.21018920838832855,
+        0.009318819269537926,
+        -0.037683792412281036,
+        -0.09845960140228271,
+        -0.1535443514585495,
+        0.004504916723817587,
+        0.20256847143173218,
+        0.1799001693725586,
+        -0.03175490349531174,
+        -0.020391397178173065,
+        -0.007309200707823038,
+        -0.06765769422054291,
+        0.013149870559573174,
+        0.08469820767641068,
+        0.04147877171635628,
+        -0.0027241194620728493,
+        0.008016721345484257,
+        0.001382349175401032,
+        0.0001219741752720438,
+        -0.059255484491586685,
+        -0.03761141747236252,
+        0.0381690077483654,
+        -0.01603613793849945,
+        0.0017731477273628116,
+        -0.016544193029403687,
+        0.09518970549106598,
+        0.1735895872116089,
+        0.005558829288929701,
+        -0.13464735448360443,
+        -0.0703420490026474,
+        0.001990854274481535,
+        -0.03426021337509155,
+        -0.4390500485897064,
+        -0.11292288452386856,
+        0.20430812239646912,
+        0.14832687377929688,
+        0.06074441969394684,
+        -0.03749264031648636,
+        0.408058226108551,
+        0.43119552731513977,
+        -0.3804298937320709,
+        -0.3694773018360138,
+        -0.03696960583329201,
+        0.04022200033068657,
+        -0.0812998041510582,
+        -0.4322642385959625,
+        0.19638888537883759,
+        0.7809834480285645,
+        0.11584538966417313,
+        -0.04975399747490883,
+        -0.015579828992486,
+        0.1362757831811905,
+        0.027220597490668297,
+        -0.4703449606895447,
+        -0.3726261258125305,
+        0.11754196882247925,
+        -0.01204066164791584,
+        -0.00118898821529001,
+        -0.05152498185634613,
+        0.08767394721508026,
+        0.14183296263217926,
+        0.01692730002105236,
+        -0.04587334021925926,
+        0.011115594767034054,
+        0.021572716534137726,
+        -0.021584773436188698,
+        -0.012763801962137222,
+        0.05708793178200722,
+        0.021982798352837563,
+        -0.02731800265610218,
+        0.03000856563448906,
+        0.006653181277215481,
+        -0.02485630102455616,
+        -0.20296195149421692,
+        -0.10483214259147644,
+        0.20483383536338806,
+        0.1350196748971939,
+        -0.08543248474597931,
+        0.02644401416182518,
+        0.26855263113975525,
+        0.1071053072810173,
+        -0.8168368935585022,
+        -0.6617473363876343,
+        0.02877889946103096,
+        0.21807144582271576,
+        -0.02164696715772152,
+        -0.03712613880634308,
+        0.9743875861167908,
+        1.1631361246109009,
+        -0.45643851161003113,
+        -0.8180081844329834,
+        -0.28109386563301086,
+        -0.09115415811538696,
+        -0.4352502226829529,
+        -0.7433719038963318,
+        0.5383746027946472,
+        1.7271664142608643,
+        0.509749174118042,
+        -0.0689467042684555,
+        0.010011479258537292,
+        0.11752951890230179,
+        -0.28825971484184265,
+        -1.113126277923584,
+        -0.6029489636421204,
+        0.357056587934494,
+        0.19766344130039215,
+        0.023361098021268845,
+        0.04305602237582207,
+        0.24867205321788788,
+        0.16359609365463257,
+        -0.2485191822052002,
+        -0.2251967489719391,
+        0.030422789976000786,
+        0.0049157580360770226,
+        -0.05497031658887863,
+        -0.030760835856199265,
+        0.034536562860012054,
+        0.019565051421523094,
+        -0.00933124776929617,
+        0.01611645519733429,
+        0.07988770306110382,
+        -0.021982649341225624,
+        -0.21876110136508942,
+        -0.10555483400821686,
+        0.1893070936203003,
+        0.14684906601905823,
+        -0.031080693006515503,
+        0.09768003225326538,
+        0.3261844515800476,
+        0.1466774046421051,
+        -0.6738073825836182,
+        -0.5424039363861084,
+        0.04689512774348259,
+        0.22039148211479187,
+        -0.07084018737077713,
+        -0.07436021417379379,
+        0.8260523080825806,
+        1.0253428220748901,
+        -0.38162854313850403,
+        -0.727206289768219,
+        -0.2605172097682953,
+        -0.0996573269367218,
+        -0.3653049170970917,
+        -0.6791687607765198,
+        0.43514078855514526,
+        1.4186147451400757,
+        0.38797008991241455,
+        -0.12675431370735168,
+        0.02766786515712738,
+        0.14237603545188904,
+        -0.2306709885597229,
+        -0.9204807877540588,
+        -0.5071616172790527,
+        0.32662850618362427,
+        0.20703284442424774,
+        -0.020968681201338768,
+        0.014105334877967834,
+        0.24642448127269745,
+        0.20103473961353302,
+        -0.15519124269485474,
+        -0.22072142362594604,
+        0.049920063465833664,
+        -0.05465548485517502,
+        0.018651481717824936,
+        0.030082669109106064,
+        0.05234164372086525,
+        0.10243640840053558,
+        0.03569166734814644,
+        0.038984544575214386,
+        0.05248976871371269,
+        0.24501988291740417,
+        0.4674161374568939,
+        0.7142530083656311,
+        0.7423628568649292,
+        0.6262048482894897,
+        0.4019012451171875,
+        -0.010997634381055832,
+        0.17266513407230377,
+        0.4467124342918396,
+        0.7795005440711975,
+        0.8282667994499207,
+        0.6824804544448853,
+        0.3955397605895996,
+        0.009771074168384075,
+        0.10707246512174606,
+        0.23039454221725464,
+        0.33151063323020935,
+        0.36120596528053284,
+        0.3240644633769989,
+        0.17939962446689606,
+        -0.01115038525313139,
+        -0.11081521213054657,
+        -0.2146066278219223,
+        -0.3572347164154053,
+        -0.44021451473236084,
+        -0.38320258259773254,
+        -0.24643990397453308,
+        0.031578775495290756,
+        -0.21325217187404633,
+        -0.4312629997730255,
+        -0.7276368141174316,
+        -0.8273008465766907,
+        -0.718246340751648,
+        -0.4161607027053833,
+        -0.06636986136436462,
+        -0.28078269958496094,
+        -0.476252943277359,
+        -0.734549880027771,
+        -0.7796792984008789,
+        -0.6637035608291626,
+        -0.41896238923072815,
+        0.021693198010325432,
+        0.006199972704052925,
+        -0.016619624570012093,
+        -0.010678192600607872,
+        0.012267512269318104,
+        0.004102918319404125,
+        -0.004080160986632109,
+        -0.0029241242446005344,
+        -0.027252744883298874,
+        -0.0772257149219513,
+        -0.09107967466115952,
+        -0.11302012205123901,
+        -0.08569496124982834,
+        -0.07242150604724884,
+        -0.016465697437524796,
+        -0.04874062165617943,
+        -0.09103028476238251,
+        -0.09025602042675018,
+        -0.07523388415575027,
+        -0.06320428103208542,
+        -0.048220545053482056,
+        -0.028701437637209892,
+        -0.008647853508591652,
+        -0.022354092448949814,
+        -0.06076030433177948,
+        -0.030872423201799393,
+        -0.045786645263433456,
+        -0.04190178960561752,
+        0.03718986362218857,
+        0.021405767649412155,
+        0.007675759959965944,
+        0.02794131636619568,
+        0.030316906049847603,
+        0.007403802592307329,
+        0.04861852154135704,
+        0.023217258974909782,
+        0.04545973241329193,
+        0.07504793256521225,
+        0.06824314594268799,
+        0.07417462021112442,
+        0.0769289955496788,
+        0.0766506940126419,
+        -0.0028638055082410574,
+        0.05911175534129143,
+        0.055706772953271866,
+        0.10735032707452774,
+        0.10494870692491531,
+        0.11092723160982132,
+        0.09338293969631195,
+        0.04235343262553215,
+        -0.022347571328282356,
+        -0.026347652077674866,
+        -0.06954608112573624,
+        -0.06944439560174942,
+        -0.05570404976606369,
+        -0.042987462133169174,
+        -0.056951191276311874,
+        -0.2151203453540802,
+        -0.3603246510028839,
+        -0.5899456143379211,
+        -0.6453464031219482,
+        -0.5338351726531982,
+        -0.31790611147880554,
+        0.049492284655570984,
+        -0.12898015975952148,
+        -0.40155911445617676,
+        -0.6737278699874878,
+        -0.7170611619949341,
+        -0.5817899703979492,
+        -0.32979026436805725,
+        -0.005899591837078333,
+        -0.07673019915819168,
+        -0.190496027469635,
+        -0.34019437432289124,
+        -0.3314637243747711,
+        -0.2796767055988312,
+        -0.1381818801164627,
+        -0.008025999180972576,
+        0.08429048955440521,
+        0.2105528861284256,
+        0.3415210545063019,
+        0.4151126444339752,
+        0.34003961086273193,
+        0.21059827506542206,
+        -0.03514896333217621,
+        0.1792585551738739,
+        0.3903186321258545,
+        0.6413942575454712,
+        0.7557680010795593,
+        0.6069726943969727,
+        0.3415443003177643,
+        0.03447553142905235,
+        0.21517080068588257,
+        0.4215562045574188,
+        0.6151171922683716,
+        0.6550290584564209,
+        0.5680058002471924,
+        0.33561068773269653,
+        -0.12205997854471207,
+        -0.0038300298620015383,
+        0.3281119763851166,
+        -0.2328944057226181,
+        -0.03834507241845131,
+        0.05432930961251259,
+        -0.014430212788283825,
+        0.006271198857575655,
+        0.32864242792129517,
+        0.47277259826660156,
+        -0.5593215227127075,
+        -0.14971251785755157,
+        0.13066314160823822,
+        -0.09738356620073318,
+        0.2966129779815674,
+        0.5606555342674255,
+        -0.3184640407562256,
+        -2.022890090942383,
+        -0.361995667219162,
+        0.5496177673339844,
+        0.02796279452741146,
+        -0.21818380057811737,
+        -0.5373459458351135,
+        -1.9538941383361816,
+        -1.9984712600708008,
+        1.6747761964797974,
+        1.5063239336013794,
+        -0.24534250795841217,
+        -0.040306344628334045,
+        -0.16963164508342743,
+        -0.40690454840660095,
+        1.3548375368118286,
+        3.922116279602051,
+        0.8723023533821106,
+        -0.8986141681671143,
+        0.06912416964769363,
+        0.2192920595407486,
+        0.352949321269989,
+        1.2243634462356567,
+        1.1395865678787231,
+        -1.5146961212158203,
+        -1.1557590961456299,
+        -0.05440744385123253,
+        -0.04629289731383324,
+        -0.002693743444979191,
+        -0.21906790137290955,
+        -0.5464610457420349,
+        -1.1933224201202393,
+        0.01913866586983204,
+        0.09363497048616409,
+        -0.06080613285303116,
+        -0.049100056290626526,
+        0.04482033848762512,
+        -0.04087500274181366,
+        -0.009318803437054157,
+        0.009458474814891815,
+        -0.09565524011850357,
+        -0.2264278084039688,
+        -0.0698866918683052,
+        0.13825084269046783,
+        0.014815542846918106,
+        -0.05801662430167198,
+        0.012776852585375309,
+        -0.0753035843372345,
+        -0.07555855065584183,
+        0.484436959028244,
+        0.6397283673286438,
+        0.12687323987483978,
+        -0.01779526099562645,
+        0.05689511448144913,
+        0.06747376173734665,
+        0.26353734731674194,
+        0.5908273458480835,
+        0.4315526783466339,
+        -0.5426794290542603,
+        -0.44501280784606934,
+        -0.019558124244213104,
+        -0.03320806100964546,
+        -0.025809556245803833,
+        0.17376014590263367,
+        -0.5201969742774963,
+        -1.2842578887939453,
+        -0.3674038052558899,
+        0.0882175862789154,
+        -0.030023137107491493,
+        -0.1173325777053833,
+        0.02555503323674202,
+        -0.39882710576057434,
+        -0.37364596128463745,
+        0.3550366163253784,
+        0.3903135359287262,
+        0.04022252932190895,
+        0.016731394454836845,
+        0.11207644641399384,
+        -0.020967213436961174,
+        -0.028497911989688873,
+        0.37590932846069336,
+        0.14920172095298767,
+        0.029958104714751244,
+        0.039632707834243774,
+        -0.24969367682933807,
+        0.16809938848018646,
+        0.07703239470720291,
+        -0.03522319719195366,
+        -0.007072617299854755,
+        0.07751759141683578,
+        -0.06782346963882446,
+        -0.4010501801967621,
+        0.41269779205322266,
+        0.1311105638742447,
+        -0.07331988960504532,
+        0.08240311592817307,
+        -0.20034979283809662,
+        -0.4718745946884155,
+        -0.178948312997818,
+        1.3285318613052368,
+        0.20384186506271362,
+        -0.48546233773231506,
+        -0.09941625595092773,
+        0.13249020278453827,
+        0.29977336525917053,
+        1.2681238651275635,
+        1.5725642442703247,
+        -1.0834472179412842,
+        -1.0335719585418701,
+        0.25975045561790466,
+        0.06584863364696503,
+        0.1609305590391159,
+        0.25940945744514465,
+        -0.8426372408866882,
+        -2.590407609939575,
+        -0.4723183214664459,
+        0.7581043243408203,
+        -0.03634117543697357,
+        -0.10199672728776932,
+        -0.3744191527366638,
+        -0.7823801636695862,
+        -0.7062401175498962,
+        1.116550087928772,
+        0.7735803127288818,
+        0.012776976451277733,
+        0.034575968980789185,
+        -0.10188565403223038,
+        0.2212170958518982,
+        0.5182898044586182,
+        0.8056022524833679,
+        -0.1897655427455902,
+        -0.005556725896894932,
+        -0.003909373190253973,
+        -0.02175678312778473,
+        -0.04085654392838478,
+        -0.03573022410273552,
+        -0.0038509985897690058,
+        0.02454996667802334,
+        0.039437733590602875,
+        0.02077251859009266,
+        0.02166259102523327,
+        0.17245841026306152,
+        0.09513862431049347,
+        -0.10491111874580383,
+        -0.08084940910339355,
+        -0.026179829612374306,
+        0.0215831957757473,
+        -0.16602416336536407,
+        -0.2803819179534912,
+        0.23894084990024567,
+        0.3269801735877991,
+        0.04504352807998657,
+        0.0009768904419615865,
+        0.01959501951932907,
+        0.24426960945129395,
+        -0.1451571136713028,
+        -0.5944203734397888,
+        -0.17875447869300842,
+        0.028336334973573685,
+        0.004323791246861219,
+        -0.045389141887426376,
+        0.0343034490942955,
+        0.46665430068969727,
+        0.3707427978515625,
+        -0.114569291472435,
+        0.04335101321339607,
+        -0.018011711537837982,
+        -0.021181274205446243,
+        -0.19074901938438416,
+        -0.20113815367221832,
+        0.048786211758852005,
+        0.08533122390508652,
+        -0.06084573268890381,
+        0.01217757910490036,
+        0.030666939914226532,
+        0.05272842198610306,
+        0.010849648155272007,
+        -0.05913804844021797,
+        -0.04202868044376373,
+        -0.0015147016383707523,
+        -0.03421122953295708,
+        0.015080726705491543,
+        0.12191007286310196,
+        0.10450142621994019,
+        -0.04972418025135994,
+        -0.07557133585214615,
+        -0.02221665158867836,
+        -0.0861242413520813,
+        -0.14919178187847137,
+        -0.04388582333922386,
+        0.4605262875556946,
+        0.5697804093360901,
+        0.1583399623632431,
+        -0.045628566294908524,
+        -0.05220475420355797,
+        -0.13630147278308868,
+        -0.7103163599967957,
+        -1.0178179740905762,
+        0.1927143931388855,
+        0.7479860186576843,
+        0.47013771533966064,
+        0.16943301260471344,
+        0.2398149073123932,
+        0.4710526168346405,
+        -0.5974176526069641,
+        -1.8564051389694214,
+        -0.7726883292198181,
+        0.05584309995174408,
+        0.08902852982282639,
+        0.0931839719414711,
+        0.46213099360466003,
+        1.2080260515213013,
+        0.6001025438308716,
+        -0.590207576751709,
+        -0.4145379662513733,
+        -0.04529324173927307,
+        -0.08303339034318924,
+        -0.2470429688692093,
+        -0.03481363505125046,
+        0.4808541238307953,
+        0.4001348614692688,
+        -0.1292688548564911,
+        -0.03635162487626076,
+        -0.006270444020628929,
+        -0.0314505510032177,
+        -0.13043232262134552,
+        -0.10837803781032562,
+        0.10718243569135666,
+        0.07523836195468903,
+        -0.00597786670550704,
+        0.06580565124750137,
+        0.11166563630104065,
+        0.021869506686925888,
+        -0.10510984063148499,
+        -0.07651247084140778,
+        0.01229890063405037,
+        -0.08976037800312042,
+        -0.14929910004138947,
+        -0.018859578296542168,
+        0.4408939778804779,
+        0.4029107689857483,
+        -0.05015433207154274,
+        -0.13887189328670502,
+        -0.04514491930603981,
+        -0.07346425950527191,
+        -0.5277182459831238,
+        -0.7335640788078308,
+        0.24182197451591492,
+        0.626846432685852,
+        0.23399080336093903,
+        0.09675730019807816,
+        0.15529058873653412,
+        0.42680656909942627,
+        -0.4012089967727661,
+        -1.3605350255966187,
+        -0.4793834686279297,
+        0.10987094044685364,
+        0.07592830061912537,
+        0.003319029463455081,
+        0.24004696309566498,
+        0.9590277671813965,
+        0.4946591258049011,
+        -0.4889579117298126,
+        -0.34744441509246826,
+        -0.020535729825496674,
+        -0.026767954230308533,
+        -0.2090117186307907,
+        -0.11841326951980591,
+        0.37452432513237,
+        0.39960840344429016,
+        -0.07025045901536942,
+        -0.022984744980931282,
+        0.022319970652461052,
+        -0.0027356306090950966,
+        -0.13681942224502563,
+        -0.09797768294811249,
+        0.09914079308509827,
+        0.10856777429580688,
+    ]
+    value = numpy.array(list_value, dtype=numpy.float32).reshape((64, 3, 7, 7))
+    tensor = numpy_helper.from_array(value, name="onnx::Conv_501")
+
+    initializers.append(tensor)
+
+    list_value = [
+        3.085598945617676,
+        2.2436060905456543,
+        4.244357585906982,
+        1.4069645404815674,
+        -4.00622034072876,
+        2.595770835876465,
+        2.7202603816986084,
+        2.4405417442321777,
+        1.1759933233261108,
+        2.021026372909546,
+        2.6628992557525635,
+        6.445226192474365,
+        -7.029932498931885,
+        1.1305793523788452,
+        2.537140369415283,
+        5.456772327423096,
+        4.780154705047607,
+        10.039976119995117,
+        2.912492275238037,
+        15.781542778015137,
+        2.5154318809509277,
+        2.628824472427368,
+        2.2992050647735596,
+        2.0950584411621094,
+        -7.93365478515625,
+        2.067786931991577,
+        4.094852447509766,
+        1.673399806022644,
+        3.1814424991607666,
+        22.49496078491211,
+        2.232640027999878,
+        2.6427979469299316,
+        -9.418174743652344,
+        1.790976643562317,
+        2.3774726390838623,
+        2.5836219787597656,
+        2.5608203411102295,
+        2.287343978881836,
+        2.6439085006713867,
+        16.859027862548828,
+        1.8699607849121094,
+        -3.6987526416778564,
+        2.6861538887023926,
+        2.8997464179992676,
+        2.689293384552002,
+        2.6654043197631836,
+        2.3799915313720703,
+        2.5603086948394775,
+        3.146122694015503,
+        2.715951681137085,
+        2.889486789703369,
+        2.966134548187256,
+        -4.960191249847412,
+        2.6123547554016113,
+        1.3074164390563965,
+        2.2033026218414307,
+        2.2114620208740234,
+        4.132844924926758,
+        4.893764495849609,
+        2.6469600200653076,
+        2.654136896133423,
+        1.9311997890472412,
+        2.881012439727783,
+        2.6991193294525146,
+    ]
+    value = numpy.array(list_value, dtype=numpy.float32)
+    tensor = numpy_helper.from_array(value, name="onnx::Conv_502")
+
+    initializers.append(tensor)
+
+    list_value = [
+        0.057212892919778824,
+        0.06299274414777756,
+        -0.018499961122870445,
+        -0.06501776725053787,
+        -0.015820641070604324,
+        0.024293724447488785,
+        0.05624663084745407,
+        -0.025112055242061615,
+        0.043546054512262344,
+        0.08439744263887405,
+        0.005678815301507711,
+        0.0034800865687429905,
+        0.030301403254270554,
+        -0.011669250205159187,
+        -0.005434689112007618,
+        -0.1591511219739914,
+        0.02324092946946621,
+        -0.018942436203360558,
+        0.025366367772221565,
+        -0.07414374500513077,
+        0.03468436002731323,
+        -0.003742520697414875,
+        -0.06651683896780014,
+        0.005561002530157566,
+        0.04527103528380394,
+        -0.13710148632526398,
+        0.0025444801431149244,
+        0.03583350405097008,
+        0.015219246037304401,
+        -0.053635064512491226,
+        0.004856681916862726,
+        -0.07223699986934662,
+        0.016770021989941597,
+        0.0012010147329419851,
+        0.014582094736397266,
+        -0.005172556731849909,
+        0.02009868621826172,
+        -0.0064261858351528645,
+        -0.029086023569107056,
+        0.001915874076075852,
+        0.0008194410474970937,
+        0.01620865799486637,
+        0.03067426010966301,
+        -0.0018463254673406482,
+        0.05358384922146797,
+        -0.003966080490499735,
+        -0.05991416424512863,
+        -0.06455761194229126,
+        0.01634763367474079,
+        -0.013959774747490883,
+        0.03615918383002281,
+        0.004434086848050356,
+        0.02086004987359047,
+        -0.004025993403047323,
+        -0.8869641423225403,
+        0.05558132007718086,
+        0.024729542434215546,
+        -0.005809253081679344,
+        -0.025079259648919106,
+        0.04757235199213028,
+        0.0023902510292828083,
+        0.01522061601281166,
+        0.011692625470459461,
+        0.023033330217003822,
+        -0.012664714828133583,
+        -0.29325294494628906,
+        -0.006855700630694628,
+        -0.243958979845047,
+        0.0024398649111390114,
+        -0.060877203941345215,
+        -0.21996521949768066,
+        -0.008708474226295948,
+        -0.06639625877141953,
+        -0.03170674294233322,
+        -0.09708897024393082,
+        0.013403226621448994,
+        0.024766888469457626,
+        0.2594103217124939,
+        -0.02221749909222126,
+        0.0662861093878746,
+        -0.15123076736927032,
+        -0.010314224287867546,
+        -0.0029192541260272264,
+        0.05985910817980766,
+        0.021665453910827637,
+        0.003247617743909359,
+        -0.006802591495215893,
+        0.00772367138415575,
+        0.0399332195520401,
+        0.005198766943067312,
+        0.006013805978000164,
+        -0.04212838411331177,
+        -0.03166411817073822,
+        0.13363900780677795,
+        0.006383878644555807,
+        -0.05536859482526779,
+        0.02053261175751686,
+        0.015062958002090454,
+        0.03352641686797142,
+        -0.2944328486919403,
+        0.019855381920933723,
+        -0.15567174553871155,
+        -0.06759943068027496,
+        0.07467031478881836,
+        0.01674237661063671,
+        0.004549413453787565,
+        -0.0032498433720320463,
+        -0.1837870180606842,
+        -0.04725493863224983,
+        -0.111307792365551,
+        0.022237055003643036,
+        0.004200428258627653,
+        0.00970534235239029,
+        -0.045657914131879807,
+        -0.024577995762228966,
+        0.0035376595333218575,
+        0.008936531841754913,
+        -0.03904002904891968,
+        0.05013228952884674,
+        -0.011168933473527431,
+        -0.008444730192422867,
+        0.0035155978985130787,
+        -0.023502476513385773,
+        0.005275514908134937,
+        -0.09448224306106567,
+        -0.009177467785775661,
+        -0.010720008052885532,
+        0.004110944457352161,
+        -0.0060218218713998795,
+        0.058124978095293045,
+        -0.0016586220590397716,
+        0.15812785923480988,
+        -0.049118027091026306,
+        -0.007983109913766384,
+        -0.04265601187944412,
+        -0.01627231575548649,
+        0.33705562353134155,
+        0.01555223111063242,
+        0.035853929817676544,
+        0.0005046340520493686,
+        0.054810188710689545,
+        -0.08808254450559616,
+        -0.0013819067971780896,
+        -0.14938786625862122,
+        -0.019771935418248177,
+        0.004152575507760048,
+        0.021979758515954018,
+        0.1985529363155365,
+        -0.07694264501333237,
+        0.013187955133616924,
+        -0.016572976484894753,
+        -0.03094586730003357,
+        -0.03673199936747551,
+        -0.03916170820593834,
+        -0.003836784977465868,
+        -0.012262578122317791,
+        0.005559554789215326,
+        0.1488093137741089,
+        -0.01842501200735569,
+        -0.004847189411520958,
+        -0.02391587756574154,
+        0.015824301168322563,
+        0.012022596783936024,
+        0.06724318116903305,
+        -0.032682593911886215,
+        0.00450896704569459,
+        -0.0024625889491289854,
+        0.00933725107461214,
+        -0.04473242908716202,
+        0.06270455569028854,
+        -0.02062271721661091,
+        -0.01071448065340519,
+        -0.017757099121809006,
+        0.01575278490781784,
+        -0.06489317119121552,
+        -0.01519051194190979,
+        0.0028058059979230165,
+        0.00917835533618927,
+        -0.01291860081255436,
+        -0.009537308476865292,
+        0.041757628321647644,
+        0.03203853219747543,
+        -0.10918509215116501,
+        -0.007152496371418238,
+        -0.06777876615524292,
+        0.03223242610692978,
+        0.01780836284160614,
+        -0.09791012853384018,
+        -0.009385241195559502,
+        0.013184775598347187,
+        0.0031673219054937363,
+        -0.010640445165336132,
+        0.024713385850191116,
+        -0.026738369837403297,
+        -0.004191657993942499,
+        -0.13764967024326324,
+        -0.003720735665410757,
+        0.01737186871469021,
+        0.015459887683391571,
+        0.033229030668735504,
+        0.008042111992835999,
+        -0.007184108253568411,
+        0.008226306177675724,
+        0.0031303109135478735,
+        0.0406314842402935,
+        -0.8669105768203735,
+        0.02079751342535019,
+        -0.17030003666877747,
+        -0.03849703446030617,
+        0.034153200685977936,
+        -0.007219486869871616,
+        0.11227627843618393,
+        -0.2681085467338562,
+        0.015872526913881302,
+        0.10855260491371155,
+        -0.008631505072116852,
+        0.02556358277797699,
+        0.06043418496847153,
+        -0.012900532223284245,
+        -0.08834894001483917,
+        0.028099440038204193,
+        -0.05156330019235611,
+        0.032628703862428665,
+        0.044928934425115585,
+        0.006176372990012169,
+        0.007333829998970032,
+        -0.037409231066703796,
+        -0.046724822372198105,
+        -0.011172871105372906,
+        0.04603327810764313,
+        0.03288746625185013,
+        -0.20848578214645386,
+        0.0028185085393488407,
+        -0.032673876732587814,
+        0.061944279819726944,
+        0.016787173226475716,
+        0.02703898213803768,
+        -0.0060023171827197075,
+        0.06870592385530472,
+        0.03154531493782997,
+        0.02784041129052639,
+        0.007780189625918865,
+        0.02033168077468872,
+        0.0019289497286081314,
+        0.02545374445617199,
+        0.04262726008892059,
+        0.01301807351410389,
+        -0.023882156237959862,
+        0.027872221544384956,
+        -0.013518108054995537,
+        -0.0031075032893568277,
+        0.03753834590315819,
+        0.0369209349155426,
+        -0.014378191903233528,
+        0.004397932440042496,
+        -0.030286893248558044,
+        -0.007679021451622248,
+        -0.045032769441604614,
+        0.032050322741270065,
+        -0.03373495861887932,
+        -0.04363032802939415,
+        0.034301597625017166,
+        -0.07021668553352356,
+        0.03942524269223213,
+        -0.11061309278011322,
+        0.049139462411403656,
+        0.04161922261118889,
+        -0.01507576834410429,
+        -0.012748259119689465,
+        0.06599434465169907,
+        0.007602245546877384,
+        -0.03973209857940674,
+        -0.06923151016235352,
+        0.026153067126870155,
+        -0.04221056029200554,
+        -0.4828230142593384,
+        0.03360651433467865,
+        0.01847662217915058,
+        -0.08594681322574615,
+        0.04071836546063423,
+        -0.0035729086957871914,
+        0.0049045816995203495,
+        -0.036198534071445465,
+        0.03046257793903351,
+        0.013275806792080402,
+        0.09266786277294159,
+        -0.03625647351145744,
+        -0.059672992676496506,
+        0.050213005393743515,
+        -0.018153885379433632,
+        -0.0858495831489563,
+        0.01621098257601261,
+        -0.03029749169945717,
+        0.02193332649767399,
+        0.0422661192715168,
+        0.6109512448310852,
+        -0.01068826112896204,
+        -0.02184930257499218,
+        -0.03213764354586601,
+        -0.03148162364959717,
+        -0.055331334471702576,
+        0.006972005590796471,
+        -0.00815682765096426,
+        0.014874683693051338,
+        -0.012943249195814133,
+        -0.03318992629647255,
+        -0.0010484680533409119,
+        0.005414161365479231,
+        -0.013610370457172394,
+        0.008836873807013035,
+        -0.05890084058046341,
+        -0.022663919255137444,
+        -0.018899116665124893,
+        -0.01037894282490015,
+        0.005064660683274269,
+        0.08522599190473557,
+        0.0075323861092329025,
+        0.013720778748393059,
+        0.032096460461616516,
+        -0.008450351655483246,
+        0.020377663895487785,
+        0.04537765309214592,
+        0.014030816033482552,
+        0.024340089410543442,
+        0.0231801588088274,
+        -0.10347768664360046,
+        0.041163086891174316,
+        -0.060614243149757385,
+        -0.09241361171007156,
+        0.05831432715058327,
+        -0.16008608043193817,
+        -0.04505622759461403,
+        0.04866329953074455,
+        -0.0656094029545784,
+        0.09627313911914825,
+        0.1153625100851059,
+        0.008151216432452202,
+        0.03813345730304718,
+        0.05990723893046379,
+        0.24788673222064972,
+        0.06294118613004684,
+        0.11761849373579025,
+        -0.0722033903002739,
+        -0.013892017304897308,
+        -0.016778236255049706,
+        0.038522012531757355,
+        -0.015539593063294888,
+        0.01263216882944107,
+        0.0003969807003159076,
+        -0.0224238783121109,
+        -0.005919966846704483,
+        0.031987495720386505,
+        -0.014712700620293617,
+        0.03508169203996658,
+        0.07568854838609695,
+        -0.011961974203586578,
+        0.027983952313661575,
+        -0.03512958809733391,
+        -0.010324078612029552,
+        -0.2895449995994568,
+        0.007338976487517357,
+        -0.042290836572647095,
+        -0.1640917807817459,
+        -0.034807007759809494,
+        -0.1268443465232849,
+        0.18418198823928833,
+        -0.3867812156677246,
+        -0.14214494824409485,
+        0.001021744217723608,
+        0.11288078874349594,
+        0.006741920951753855,
+        -0.006421610247343779,
+        0.021150892600417137,
+        0.02486848644912243,
+        0.002660338068380952,
+        0.03732302784919739,
+        0.10844919830560684,
+        -0.032568808645009995,
+        0.009477612562477589,
+        0.053578171879053116,
+        -0.07421902567148209,
+        0.05660263076424599,
+        0.03038308583199978,
+        0.049440011382102966,
+        0.0395139642059803,
+        0.0217339675873518,
+        0.028231965377926826,
+        0.1661153882741928,
+        -0.02168717049062252,
+        0.055143170058727264,
+        -0.14159196615219116,
+        0.05894732475280762,
+        0.006888065952807665,
+        -0.06988262385129929,
+        0.017527412623167038,
+        -0.007171930745244026,
+        -0.00448343763127923,
+        0.02932717651128769,
+        -0.00652179354801774,
+        -0.002897858154028654,
+        0.020487705245614052,
+        -0.027063967660069466,
+        -0.02539752423763275,
+        -0.1066114604473114,
+        -0.10011029988527298,
+        -0.03331710025668144,
+        -0.003807300003245473,
+        -0.010441976599395275,
+        -0.005605363752692938,
+        0.09679440408945084,
+        0.020033519715070724,
+        -0.010188378393650055,
+        -0.030630890280008316,
+        -0.00955540407449007,
+        0.02825581096112728,
+        -0.4307324290275574,
+        0.012557203881442547,
+        0.043258048593997955,
+        0.09386534243822098,
+        -0.009555542841553688,
+        0.05304868891835213,
+        0.014706632122397423,
+        -0.012911850586533546,
+        0.0981304720044136,
+        -0.010722141712903976,
+        -0.027317194268107414,
+        0.0893903523683548,
+        -0.19983792304992676,
+        -0.15778200328350067,
+        -0.1012115329504013,
+        -0.3758164644241333,
+        -0.05782865360379219,
+        -0.01230492815375328,
+        -0.37126046419143677,
+        -0.01596723683178425,
+        0.0020407456904649734,
+        -0.017498979344964027,
+        0.005369496997445822,
+        -0.023121315985918045,
+        0.022279681637883186,
+        -0.006232256535440683,
+        0.05115891620516777,
+        0.006679570768028498,
+        0.0026316209696233273,
+        0.04291496425867081,
+        0.04381528124213219,
+        -0.05994122102856636,
+        0.007081915624439716,
+        -0.04571640491485596,
+        0.07592425495386124,
+        -0.00836833007633686,
+        0.008123279549181461,
+        -0.008003163151443005,
+        -0.003938044421374798,
+        0.005643180105835199,
+        0.016194086521863937,
+        -0.004063089843839407,
+        0.012334472499787807,
+        0.017072021961212158,
+        0.005761854816228151,
+        0.004702428821474314,
+        0.005736868362873793,
+        0.0017962371930480003,
+        0.059996701776981354,
+        0.19533602893352509,
+        0.02649352326989174,
+        -0.06493135541677475,
+        -0.05955052375793457,
+        0.015692468732595444,
+        -0.10623155534267426,
+        0.07290898263454437,
+        0.036108434200286865,
+        -0.01248949021100998,
+        0.16444285213947296,
+        -0.005899128969758749,
+        0.07875277101993561,
+        0.0014204353792592883,
+        0.03381470963358879,
+        -0.09680792689323425,
+        0.002102318685501814,
+        0.026962973177433014,
+        0.031665392220020294,
+        -0.18168538808822632,
+        0.11163855344057083,
+        -0.5409999489784241,
+        0.07833191007375717,
+        -0.005324948113411665,
+        0.0267564058303833,
+        0.02250477857887745,
+        0.03249068558216095,
+        -0.18441715836524963,
+        -0.006447427906095982,
+        0.037927329540252686,
+        0.0005173985846340656,
+        -0.02617005631327629,
+        0.05929232016205788,
+        -0.028510913252830505,
+        0.05447050556540489,
+        0.012390155345201492,
+        0.00046797769027762115,
+        -0.008598590269684792,
+        -0.17247197031974792,
+        -0.02855759859085083,
+        0.033968932926654816,
+        -0.09011702984571457,
+        0.05276056379079819,
+        0.03299655020236969,
+        -0.005699596833437681,
+        -0.1954648792743683,
+        0.011109501123428345,
+        -0.0013570536393672228,
+        -0.6543989181518555,
+        0.009102803654968739,
+        0.0407538004219532,
+        0.04312055557966232,
+        0.027609223499894142,
+        -0.035538043826818466,
+        0.027167823165655136,
+        -0.024043193086981773,
+        0.0047575319185853004,
+        -0.006788836792111397,
+        0.025714389979839325,
+        0.007848678156733513,
+        -0.07680192589759827,
+        0.009700766764581203,
+        -0.0097329281270504,
+        0.00586724653840065,
+        0.022815868258476257,
+        -0.023448282852768898,
+        -0.05608998239040375,
+        0.10786863416433334,
+        -0.02803603559732437,
+        0.012898198328912258,
+        -0.009270391426980495,
+        -0.021972229704260826,
+        0.26533082127571106,
+        -0.01021308358758688,
+        -0.01972626894712448,
+        0.062940314412117,
+        0.022569671273231506,
+        0.027042347937822342,
+        -0.05669092759490013,
+        -0.01200617104768753,
+        -0.006279367487877607,
+        -0.009608528576791286,
+        -0.013600943610072136,
+        -0.02187415212392807,
+        0.0351138636469841,
+        0.006282923277467489,
+        -0.011123511008918285,
+        -0.009205769747495651,
+        0.001010146806947887,
+        -0.4796978235244751,
+        -0.0030205894727259874,
+        -0.011987377889454365,
+        -0.027548225596547127,
+        0.009372347965836525,
+        -0.005388603545725346,
+        -0.006444129627197981,
+        -0.02501147985458374,
+        0.027465635910630226,
+        0.027784524485468864,
+        0.006878893356770277,
+        -0.027763860300183296,
+        -0.0047700353898108006,
+        -0.018965192139148712,
+        0.027898501604795456,
+        0.022454144433140755,
+        0.02973407506942749,
+        0.03505602851510048,
+        0.04003170132637024,
+        -0.004336829297244549,
+        -0.01998550072312355,
+        -0.06097743660211563,
+        -0.07844759523868561,
+        0.0013787010684609413,
+        0.0066132270731031895,
+        -0.03124997951090336,
+        0.0313432514667511,
+        0.047656893730163574,
+        0.06175797060132027,
+        -0.02077358029782772,
+        -0.004535601008683443,
+        -0.10219905525445938,
+        -0.07125344127416611,
+        -0.06927482783794403,
+        -0.04813461750745773,
+        -0.02618095651268959,
+        -0.01255929097533226,
+        -0.009180150926113129,
+        -0.005838831886649132,
+        0.09108023345470428,
+        -0.032710760831832886,
+        0.03091445378959179,
+        -0.01955563761293888,
+        0.0959300771355629,
+        -0.09353741258382797,
+        -0.0761636272072792,
+        -0.023445438593626022,
+        -0.012328366748988628,
+        0.05850536748766899,
+        -0.052494827657938004,
+        0.0025638933293521404,
+        -0.017152179032564163,
+        -0.004435579292476177,
+        0.12312240898609161,
+        -0.007241012528538704,
+        0.09605048596858978,
+        0.03355967625975609,
+        -0.015987426042556763,
+        -0.03470349311828613,
+        -0.02499505691230297,
+        -0.015004142187535763,
+        -0.018609771504998207,
+        -0.06654462963342667,
+        0.013861652463674545,
+        -0.005973289255052805,
+        -0.04734775796532631,
+        0.08755116909742355,
+        0.03012942522764206,
+        0.07887610793113708,
+        -0.01827712170779705,
+        0.10793066769838333,
+        0.10793614387512207,
+        -0.01075535174459219,
+        0.03439560532569885,
+        0.011567444540560246,
+        0.0016386889619752765,
+        -0.031207261607050896,
+        -0.01707504875957966,
+        0.20471863448619843,
+        0.0025428179651498795,
+        0.004082779865711927,
+        -0.012389302253723145,
+        0.0400562584400177,
+        -0.21075034141540527,
+        0.012872264720499516,
+        -0.01639414019882679,
+        0.016652485355734825,
+        0.0016037120949476957,
+        -0.006540367379784584,
+        -0.0068405005149543285,
+        -0.2484254390001297,
+        0.0008089764742180705,
+        -0.022340824827551842,
+        -0.005441636312752962,
+        0.002882100408896804,
+        0.008654038421809673,
+        0.07159754633903503,
+        -0.02537086047232151,
+        0.011997461318969727,
+        -0.49913132190704346,
+        -0.02300887741148472,
+        0.044442202895879745,
+        0.001787978457286954,
+        0.010291379876434803,
+        0.009601960889995098,
+        -0.5312613248825073,
+        -0.014247804880142212,
+        0.06685849279165268,
+        0.035772595554590225,
+        0.03432310372591019,
+        0.03151272237300873,
+        -0.10318460315465927,
+        -0.030476456508040428,
+        -0.004469831008464098,
+        -0.16645164787769318,
+        -0.021104637533426285,
+        0.013934006914496422,
+        -0.011767406016588211,
+        0.008054615929722786,
+        0.06089277192950249,
+        0.0003409573109820485,
+        -0.0053401123732328415,
+        0.05970478057861328,
+        -0.004363172687590122,
+        0.014423285610973835,
+        -0.002795026171952486,
+        -0.019875092431902885,
+        -0.07540513575077057,
+        -0.09043378382921219,
+        0.00750827556475997,
+        -0.045314721763134,
+        -0.00724808732047677,
+        0.005193864461034536,
+        -0.020468784496188164,
+        -0.01098695583641529,
+        -0.0003122477210126817,
+        -0.007263806648552418,
+        -0.03325646370649338,
+        0.021689830347895622,
+        -0.13272541761398315,
+        0.02332465350627899,
+        -0.019292252138257027,
+        0.05533658340573311,
+        -0.018616480752825737,
+        -0.015228793025016785,
+        -0.28432801365852356,
+        -0.29721561074256897,
+        0.04648810625076294,
+        -0.014750649221241474,
+        -0.15370936691761017,
+        -0.1497083604335785,
+        0.013243601657450199,
+        0.042343802750110626,
+        -0.017519792541861534,
+        -0.0161418616771698,
+        0.00807454064488411,
+        -0.023562468588352203,
+        -0.0315413773059845,
+        0.03386805206537247,
+        0.2854529917240143,
+        0.0191020630300045,
+        -0.49126777052879333,
+        0.052687134593725204,
+        -0.023298051208257675,
+        -0.009119837544858456,
+        0.05149759724736214,
+        -0.8527837991714478,
+        0.08062390983104706,
+        0.057379938662052155,
+        -0.020724931731820107,
+        -0.006624895613640547,
+        0.05322050303220749,
+        0.017887847498059273,
+        0.04229281470179558,
+        0.04171830415725708,
+        0.029683062806725502,
+        -0.00028416322311386466,
+        0.1112222746014595,
+        -0.0448714978992939,
+        -0.005255761090666056,
+        0.017773712053894997,
+        -0.0016064767260104418,
+        -0.013840594328939915,
+        -0.00398495327681303,
+        -4.32919041486457e-05,
+        0.040796443819999695,
+        0.018185198307037354,
+        -0.018671950325369835,
+        0.0028256692457944155,
+        -0.020582057535648346,
+        0.05567716807126999,
+        -0.056062404066324234,
+        0.01614757999777794,
+        -0.0029299987945705652,
+        0.048686008900403976,
+        0.04299888014793396,
+        0.12249592691659927,
+        0.01469603180885315,
+        -0.1254546344280243,
+        -0.18532024323940277,
+        -0.003263876074925065,
+        0.014804725535213947,
+        0.004450956825166941,
+        -0.013681051321327686,
+        -0.0030781759414821863,
+        -0.03433656692504883,
+        -0.0035507124848663807,
+        0.1600082814693451,
+        -0.028547707945108414,
+        -0.00989136379212141,
+        -0.012126478366553783,
+        -0.12963305413722992,
+        0.008547360077500343,
+        0.017959514632821083,
+        -0.012571084313094616,
+        0.0008666724897921085,
+        -0.010519342496991158,
+        -0.009684977121651173,
+        -0.04285729303956032,
+        0.015031769871711731,
+        -0.030043724924325943,
+        0.018907636404037476,
+        0.08019450306892395,
+        -0.04836742579936981,
+        0.01025464478880167,
+        -0.004908542148768902,
+        -0.10327022522687912,
+        -0.10163667798042297,
+        -0.03403499722480774,
+        -0.019678063690662384,
+        -0.043049123138189316,
+        0.0384567566215992,
+        -0.05596519634127617,
+        -0.09381429851055145,
+        -0.18688108026981354,
+        -0.09762943536043167,
+        -0.03164997324347496,
+        -0.006416287273168564,
+        0.07003920525312424,
+        -0.016646990552544594,
+        -0.025972194969654083,
+        -0.028768088668584824,
+        -0.06332779675722122,
+        0.045144014060497284,
+        -0.03735211119055748,
+        -0.010442189872264862,
+        0.10948455333709717,
+        0.14629514515399933,
+        -0.023416690528392792,
+        -0.01347778458148241,
+        0.020830679684877396,
+        0.0003131759003736079,
+        0.007049075793474913,
+        0.06547018885612488,
+        0.03152740001678467,
+        0.08380027115345001,
+        0.03185325488448143,
+        -0.015359007753431797,
+        0.08864206075668335,
+        0.032676901668310165,
+        -0.002908645663410425,
+        0.053111132234334946,
+        0.0026159954722970724,
+        -0.05177146941423416,
+        -0.033048152923583984,
+        -0.0020293137058615685,
+        -0.07363513857126236,
+        -0.17662747204303741,
+        0.004798125941306353,
+        0.07139395922422409,
+        0.019802849739789963,
+        0.009199771098792553,
+        -0.009043877013027668,
+        -0.07681646943092346,
+        -0.06748555600643158,
+        0.05094710737466812,
+        0.0014789587585255504,
+        -0.0166088305413723,
+        -0.27988284826278687,
+        0.03634800389409065,
+        0.05322619527578354,
+        -0.15566207468509674,
+        -0.019964642822742462,
+        -0.010204506106674671,
+        -0.011832086369395256,
+        -0.0680927112698555,
+        -0.05793820694088936,
+        0.0020100779365748167,
+        -0.24647225439548492,
+        0.04904041066765785,
+        -0.05589786171913147,
+        -0.030167482793331146,
+        0.023974033072590828,
+        -0.22719347476959229,
+        0.019620347768068314,
+        -0.18078163266181946,
+        -0.11321499198675156,
+        -0.023790234699845314,
+        -0.1266157031059265,
+        0.01117659267038107,
+        0.13824795186519623,
+        -0.024211348965764046,
+        -0.0548308864235878,
+        0.04849318787455559,
+        -0.0016174454940482974,
+        -0.01826266385614872,
+        0.006709347013384104,
+        -0.350631982088089,
+        0.03139018639922142,
+        0.021502504125237465,
+        -0.12596893310546875,
+        0.04311670735478401,
+        -0.005905786994844675,
+        -0.0807335153222084,
+        -0.07214773446321487,
+        -0.2054852843284607,
+        -0.04526854678988457,
+        -0.09145382046699524,
+        0.002603817731142044,
+        -0.01951524056494236,
+        -0.0028278473764657974,
+        -0.03270411863923073,
+        -0.0003385065938346088,
+        -0.019816655665636063,
+        -0.003430107608437538,
+        0.010664679110050201,
+        0.030127109959721565,
+        0.02611778862774372,
+        0.030213139951229095,
+        0.04682943969964981,
+        0.010338326916098595,
+        -0.02618880569934845,
+        0.014982170425355434,
+        -0.06979402899742126,
+        0.06403722614049911,
+        0.025545112788677216,
+        -0.11981001496315002,
+        0.004320457112044096,
+        0.008849565871059895,
+        0.07450827211141586,
+        -0.04322020336985588,
+        -0.07648278027772903,
+        0.009221173822879791,
+        -0.12771189212799072,
+        0.027474528178572655,
+        -0.1637975573539734,
+        -0.022587651386857033,
+        0.0713210329413414,
+        -0.09652210026979446,
+        -0.04942077025771141,
+        -0.08977267891168594,
+        -0.004629603121429682,
+        -0.09891843795776367,
+        0.0004028059483971447,
+        0.12999524176120758,
+        0.009417874738574028,
+        -0.012465995736420155,
+        0.09959464520215988,
+        0.012048770673573017,
+        0.00529639283195138,
+        -0.1231047734618187,
+        -0.010156300850212574,
+        -0.0067022680304944515,
+        0.09231371432542801,
+        0.1372271031141281,
+        0.01140755694359541,
+        -0.014376018196344376,
+        0.009014246053993702,
+        -0.0558021254837513,
+        0.009297777898609638,
+        -0.023461824283003807,
+        0.12312523275613785,
+        0.0013492326252162457,
+        -0.10130659490823746,
+        0.07867099344730377,
+        -0.04363301396369934,
+        -0.05203291028738022,
+        0.010715829208493233,
+        0.2679101228713989,
+        0.047242000699043274,
+        0.009700302965939045,
+        -0.004188477993011475,
+        0.04595324397087097,
+        -0.10256988555192947,
+        0.013266253285109997,
+        0.13415516912937164,
+        -0.06461263447999954,
+        -0.04262775555253029,
+        0.014638054184615612,
+        -0.020396970212459564,
+        0.016008291393518448,
+        0.012964261695742607,
+        0.030219901353120804,
+        -0.03906702250242233,
+        -0.009459082037210464,
+        -0.006880247965455055,
+        0.009383107535541058,
+        0.0591101311147213,
+        -0.049882922321558,
+        -0.014105924405157566,
+        -0.04896679148077965,
+        0.021726086735725403,
+        -0.013863577507436275,
+        -0.05801064148545265,
+        -0.031143831089138985,
+        0.0010298469569534063,
+        -0.03104572743177414,
+        0.1193046048283577,
+        0.00880056619644165,
+        -0.01678626798093319,
+        0.0014990485506132245,
+        -0.001967367948964238,
+        -0.0053575835190713406,
+        -0.006879259832203388,
+        -0.008937212638556957,
+        0.014141763560473919,
+        0.00687083275988698,
+        -0.0012949275551363826,
+        0.017160816118121147,
+        -0.035110652446746826,
+        -0.00976842176169157,
+        0.026605995371937752,
+        0.004003277514129877,
+        0.010927689261734486,
+        0.002173327375203371,
+        -0.05133439600467682,
+        -0.04658171907067299,
+        0.03023359179496765,
+        -0.015038624405860901,
+        0.016580749303102493,
+        0.02393144741654396,
+        0.004817661829292774,
+        -0.008468102663755417,
+        0.017239807173609734,
+        0.019924553111195564,
+        0.02557404898107052,
+        0.01985766738653183,
+        -0.01881517469882965,
+        -0.14637643098831177,
+        -0.005403783638030291,
+        -0.013156545348465443,
+        -0.3882855176925659,
+        0.01537711638957262,
+        0.005061861593276262,
+        0.018044542521238327,
+        0.00010373388067819178,
+        -0.01769324019551277,
+        -0.020439250394701958,
+        0.01761222817003727,
+        0.017716309055685997,
+        -0.01828574948012829,
+        0.0059916484169662,
+        0.006117791403084993,
+        -0.0025541253853589296,
+        0.01598154753446579,
+        0.0015296537894755602,
+        0.006711189169436693,
+        -0.005831963382661343,
+        0.024547481909394264,
+        0.011665170080959797,
+        0.013990279287099838,
+        -0.009193074889481068,
+        -0.0014407691778615117,
+        0.0025373499374836683,
+        -0.001535113900899887,
+        0.022016262635588646,
+        0.002165747107937932,
+        -0.00010288839985150844,
+        -0.01185672264546156,
+        0.3959958255290985,
+        -0.06701132655143738,
+        0.024550342932343483,
+        -0.007259713020175695,
+        0.00011224728223169222,
+        0.08959072828292847,
+        0.006745494436472654,
+        -0.007461291737854481,
+        -0.0010788652580231428,
+        -0.003997487016022205,
+        0.0023250498343259096,
+        0.005845727398991585,
+        0.002441686810925603,
+        0.0010628585005179048,
+        0.004687050357460976,
+        0.03825820982456207,
+        0.0027951127849519253,
+        0.004356732591986656,
+        0.0036379920784384012,
+        -0.00048690394032746553,
+        -0.31681910157203674,
+        0.01621195860207081,
+        0.009373913519084454,
+        -0.005099120549857616,
+        0.004866141825914383,
+        0.008112045004963875,
+        -0.009933174587786198,
+        -0.006929770577698946,
+        0.005561198107898235,
+        -0.2225065976381302,
+        -0.00019208311277907342,
+        -0.003284667618572712,
+        0.010527989827096462,
+        -0.010160842910408974,
+        -0.008410060778260231,
+        0.004605174530297518,
+        0.01542133092880249,
+        0.013958578929305077,
+        0.0021779180970042944,
+        0.002810562262311578,
+        0.001369283301755786,
+        -0.0003347232413943857,
+        0.013902815990149975,
+        -0.0022218015510588884,
+        0.00024955783737823367,
+        -0.0019350153161212802,
+        0.0025213193148374557,
+        -0.0054915109649300575,
+        -0.00011564489977899939,
+        -0.0037644850090146065,
+        -0.002863431815057993,
+        -0.0025196163915097713,
+        0.02352992817759514,
+        0.00354134407825768,
+        -0.010700036771595478,
+        -0.03428381308913231,
+        0.008170859888195992,
+        0.005420713219791651,
+        -0.0013479178305715322,
+        0.0015741022070869803,
+        -0.18286381661891937,
+        0.03189067915081978,
+        0.0014371845172718167,
+        -4.885893940809183e-05,
+        -0.004666821099817753,
+        -0.026595929637551308,
+        -0.0064376350492239,
+        0.01583540253341198,
+        -0.085715651512146,
+        -0.00916224904358387,
+        -0.3605174124240875,
+        0.019973354414105415,
+        0.05533794313669205,
+        0.053907446563243866,
+        0.030877795070409775,
+        -0.919844925403595,
+        8.968543988885358e-05,
+        -0.02068270742893219,
+        0.012602192349731922,
+        0.03245612978935242,
+        0.06622699648141861,
+        0.00882122665643692,
+        -0.03616628423333168,
+        -0.02428283728659153,
+        0.003318701172247529,
+        -0.0007259293342940509,
+        -0.026197656989097595,
+        -0.059503961354494095,
+        0.029495801776647568,
+        -0.006955073680728674,
+        -0.01926456019282341,
+        0.009927013888955116,
+        0.059641581028699875,
+        0.0016886347439140081,
+        -0.029346982017159462,
+        0.01948450319468975,
+        -0.04397860914468765,
+        0.025248751044273376,
+        0.04597266763448715,
+        0.009454794228076935,
+        -0.018872544169425964,
+        -0.039650529623031616,
+        0.026324709877371788,
+        -0.01808176562190056,
+        0.028935831040143967,
+        0.009501701220870018,
+        -0.05183069407939911,
+        -0.005787428934127092,
+        -0.021436212584376335,
+        0.029735956341028214,
+        0.0350160151720047,
+        0.033825185149908066,
+        0.03185566887259483,
+        0.018431033939123154,
+        0.02450188808143139,
+        0.03271135315299034,
+        -0.0027792940381914377,
+        -0.0004625302099157125,
+        0.01268392987549305,
+        0.045023106038570404,
+        0.05562014505267143,
+        0.029052015393972397,
+        -0.002513203304260969,
+        -0.08349838852882385,
+        7.017837560852058e-06,
+        -0.0014392733573913574,
+        0.016982918605208397,
+        0.016358936205506325,
+        -0.024013325572013855,
+        -0.004375616554170847,
+        -0.03734249249100685,
+        0.04336351156234741,
+        0.07323610782623291,
+        -0.0243068914860487,
+        0.009403819218277931,
+        0.02663031965494156,
+        0.01930687017738819,
+        0.02175578847527504,
+        0.01639295555651188,
+        0.024892140179872513,
+        0.031219134107232094,
+        0.02986173704266548,
+        -0.002100786194205284,
+        0.05054357647895813,
+        0.04015854373574257,
+        0.0048207067884504795,
+        -0.03244275599718094,
+        0.027246609330177307,
+        0.00409608893096447,
+        -0.0054193479008972645,
+        0.07014931738376617,
+        0.009954879060387611,
+        0.022472694516181946,
+        -0.47738370299339294,
+        -0.019097158685326576,
+        0.028984038159251213,
+        -0.042564358562231064,
+        -0.006040808744728565,
+        0.04094231128692627,
+        -0.007740774191915989,
+        -0.07854597270488739,
+        0.003920051269233227,
+        -0.050799619406461716,
+        0.023691626265645027,
+        0.019952887669205666,
+        0.00716764759272337,
+        -0.0046928380616009235,
+        0.00041822553612291813,
+        0.006359069608151913,
+        0.017860781401395798,
+        -0.22999149560928345,
+        -0.02180831879377365,
+        -0.024055887013673782,
+        -0.0226126741617918,
+        -0.01795077696442604,
+        0.015591473318636417,
+        -0.004053472075611353,
+        0.016760380938649178,
+        0.03378744795918465,
+        -0.0027090508956462145,
+        0.00999806821346283,
+        0.019252799451351166,
+        0.0027550198137760162,
+        0.03454355522990227,
+        -0.0295003242790699,
+        -0.007663591764867306,
+        0.061172280460596085,
+        0.049142658710479736,
+        -0.00858291145414114,
+        -0.0035321018658578396,
+        -0.7689260244369507,
+        0.0004916944890283048,
+        0.02915046364068985,
+        0.017000442370772362,
+        -0.003298018593341112,
+        -0.0405484102666378,
+        0.021160880103707314,
+        0.0013289587805047631,
+        -0.07510386407375336,
+        0.03890690207481384,
+        0.03729970380663872,
+        -0.04906352981925011,
+        -0.10020274668931961,
+        0.01506283599883318,
+        -0.053726132959127426,
+        0.016631007194519043,
+        0.03425036743283272,
+        0.03358260169625282,
+        -0.023937245830893517,
+        -0.13656578958034515,
+        -0.13947314023971558,
+        0.012915699742734432,
+        0.02431132085621357,
+        -0.03089652583003044,
+        0.1382707953453064,
+        0.056695129722356796,
+        -0.09263960272073746,
+        0.10406216233968735,
+        0.02619105577468872,
+        -0.01678614132106304,
+        -0.16045455634593964,
+        8.974489173851907e-05,
+        -0.03521093726158142,
+        -0.028908027336001396,
+        0.21234789490699768,
+        -0.02046572044491768,
+        -0.09703273326158524,
+        0.05248226970434189,
+        0.011973158456385136,
+        0.004557646345347166,
+        -0.018632734194397926,
+        -0.1649131029844284,
+        -0.00682018743827939,
+        -0.12712189555168152,
+        0.10513507574796677,
+        0.020745709538459778,
+        0.02996259182691574,
+        -0.15409024059772491,
+        -0.08719073981046677,
+        -0.14634187519550323,
+        -0.16255779564380646,
+        -0.15963757038116455,
+        -0.1324772834777832,
+        -0.022830091416835785,
+        -0.06426219642162323,
+        -0.025459224358201027,
+        0.00281702633947134,
+        0.03255268186330795,
+        -0.05778049677610397,
+        -0.30381152033805847,
+        -0.06582051515579224,
+        -0.033722274005413055,
+        0.014956191182136536,
+        0.004153797868639231,
+        0.2391217201948166,
+        -0.0311420951038599,
+        0.001518488978035748,
+        0.019769812002778053,
+        -0.056324463337659836,
+        -0.006009253207594156,
+        -0.21367721259593964,
+        -0.0481688529253006,
+        0.22422266006469727,
+        0.0402204655110836,
+        0.1432792693376541,
+        0.14159953594207764,
+        -0.0025862890761345625,
+        -0.028965365141630173,
+        0.011978867463767529,
+        0.161293163895607,
+        0.028642605990171432,
+        -0.008417634293437004,
+        -0.10145614296197891,
+        0.08381767570972443,
+        0.05199432373046875,
+        0.18680602312088013,
+        -0.023287687450647354,
+        0.03601476550102234,
+        0.03738229721784592,
+        0.19291405379772186,
+        0.03553088754415512,
+        0.05483124405145645,
+        0.09577616304159164,
+        -0.004635817836970091,
+        0.052481625229120255,
+        -0.042084019631147385,
+        -0.2629147469997406,
+        -0.006157668773084879,
+        -0.0401761569082737,
+        0.02154349908232689,
+        -0.056558139622211456,
+        -0.003753019031137228,
+        0.01922912523150444,
+        0.1291409730911255,
+        -0.21358416974544525,
+        0.004696246236562729,
+        0.13787509500980377,
+        -0.07022479176521301,
+        -0.06828727573156357,
+        0.09193858504295349,
+        -0.06863763928413391,
+        -0.05677935853600502,
+        -0.030970478430390358,
+        -0.10181070864200592,
+        -0.1247706487774849,
+        0.014181962236762047,
+        -0.09259836375713348,
+        -0.03174220770597458,
+        -0.014812505804002285,
+        -0.024658311158418655,
+        -0.04815720021724701,
+        -0.01683010160923004,
+        0.015726473182439804,
+        0.002938281511887908,
+        -0.1586887538433075,
+        -0.29276973009109497,
+        -0.029981529340147972,
+        -0.046828676015138626,
+        -0.04909103736281395,
+        0.06043976545333862,
+        0.03698069602251053,
+        -0.04807118698954582,
+        0.0943484902381897,
+        0.01930702105164528,
+        0.06498143821954727,
+        0.0381690077483654,
+        -0.19611406326293945,
+        0.006944946013391018,
+        0.06454038619995117,
+        -0.19779883325099945,
+        0.04966692253947258,
+        0.046355295926332474,
+        0.0590626522898674,
+        -0.24392037093639374,
+        -0.0018132536206394434,
+        0.010944955050945282,
+        -0.014556891284883022,
+        0.051466893404722214,
+        -0.0059846509248018265,
+        -0.06719732284545898,
+        0.030604040250182152,
+        0.051190104335546494,
+        -0.053196243941783905,
+        -0.06912374496459961,
+        -0.06263922154903412,
+        0.05626852437853813,
+        0.013047950342297554,
+        -0.005828890949487686,
+        0.056055404245853424,
+        0.007044378202408552,
+        0.030499491840600967,
+        -0.035373322665691376,
+        0.030934391543269157,
+        0.04358363524079323,
+        0.001537138712592423,
+        0.005963161122053862,
+        -0.005889860913157463,
+        0.053225863724946976,
+        0.052091702818870544,
+        -0.02871675044298172,
+        0.05662619322538376,
+        -0.4585985839366913,
+        0.06490323692560196,
+        0.02542230300605297,
+        0.017592567950487137,
+        0.05066920816898346,
+        -0.20954127609729767,
+        -0.06689731031656265,
+        -0.3632309138774872,
+        -0.03407476842403412,
+        0.04976007342338562,
+        0.03856723755598068,
+        0.009329214692115784,
+        -0.10107281804084778,
+        0.007077769376337528,
+        -0.005482642911374569,
+        0.04388934373855591,
+        0.03984231874346733,
+        0.005358297843486071,
+        0.05032944679260254,
+        0.007170544005930424,
+        0.017318176105618477,
+        -0.03577208146452904,
+        -0.02195456624031067,
+        0.014414021745324135,
+        -0.008203372359275818,
+        0.04585091397166252,
+        -0.012298643589019775,
+        0.03959968313574791,
+        -0.06015963852405548,
+        -0.1360240876674652,
+        -0.07704123109579086,
+        -0.0842466950416565,
+        -0.11261942237615585,
+        0.0433686338365078,
+        -0.1059969812631607,
+        0.014813154004514217,
+        0.04216694459319115,
+        0.10441470146179199,
+        0.04579426348209381,
+        0.026033954694867134,
+        0.08725529909133911,
+        -0.14662955701351166,
+        -0.0726592168211937,
+        0.1293957382440567,
+        0.013497715815901756,
+        -0.01318936888128519,
+        -0.05188713222742081,
+        0.08793413639068604,
+        0.1094818189740181,
+        0.07991892844438553,
+        0.03549068048596382,
+        -0.04469897970557213,
+        -0.10442564636468887,
+        0.13456915318965912,
+        0.01154977548867464,
+        -0.05959299951791763,
+        0.01768219843506813,
+        0.0179652888327837,
+        -0.010112428106367588,
+        0.020603090524673462,
+        -0.7144030928611755,
+        0.20126283168792725,
+        0.058172807097435,
+        -0.10543914139270782,
+        0.07461538910865784,
+        -0.1744592934846878,
+        0.055722273886203766,
+        -0.046595826745033264,
+        0.06237049773335457,
+        0.05800141766667366,
+        0.04118870943784714,
+        0.002582935383543372,
+        0.010623090900480747,
+        -0.0439014658331871,
+        0.044685740023851395,
+        -0.017063472419977188,
+        -0.0173367727547884,
+        -0.04761765897274017,
+        0.06136244907975197,
+        0.08495236933231354,
+        0.24923592805862427,
+        -0.061080869287252426,
+        0.15922360122203827,
+        -0.09322690963745117,
+        -0.09617402404546738,
+        0.0029533954802900553,
+        0.12630371749401093,
+        0.0011397749185562134,
+        0.0005059551913291216,
+        -0.060922350734472275,
+        -0.16446451842784882,
+        0.057099178433418274,
+        0.03073902614414692,
+        -0.031064951792359352,
+        0.012277435511350632,
+        0.020447896793484688,
+        0.06010727211833,
+        0.07065457105636597,
+        0.026963504031300545,
+        0.010798406787216663,
+        -0.02631279267370701,
+        0.02046871930360794,
+        -0.004800989292562008,
+        -0.03282550349831581,
+        0.053904879838228226,
+        -0.03294985368847847,
+        -0.4204113185405731,
+        0.028552187606692314,
+        0.023685462772846222,
+        0.0017703581834211946,
+        0.02868991158902645,
+        -0.3585520088672638,
+        -0.011516556143760681,
+        -0.00248165475204587,
+        0.011379038915038109,
+        0.0459531806409359,
+        0.015357235446572304,
+        0.05573337897658348,
+        0.06516549736261368,
+        0.02981666848063469,
+        0.05498211458325386,
+        0.028714550659060478,
+        -0.005899528972804546,
+        0.008476868271827698,
+        0.11328839510679245,
+        0.020578190684318542,
+        -0.15382742881774902,
+        0.015724696218967438,
+        -0.08402770012617111,
+        0.060314107686281204,
+        0.032343748956918716,
+        0.014438764192163944,
+        -0.13614842295646667,
+        -0.0017508765449747443,
+        0.09998518973588943,
+        -0.06364594399929047,
+        0.049632295966148376,
+        -0.11922458559274673,
+        -0.08834195137023926,
+        0.019541991874575615,
+        0.06320779770612717,
+        0.017419861629605293,
+        -0.0028468866366893053,
+        -0.14753428101539612,
+        0.02623703144490719,
+        -0.011462770402431488,
+        0.06676206737756729,
+        -0.014891563914716244,
+        -0.002118025440722704,
+        0.02519390918314457,
+        -0.29581141471862793,
+        0.0264339130371809,
+        0.04027356952428818,
+        0.00412194337695837,
+        0.03778498247265816,
+        -0.012331741861999035,
+        0.15336745977401733,
+        -0.034510836005210876,
+        0.0319819413125515,
+        0.01916184462606907,
+        0.04952343553304672,
+        -0.026733938604593277,
+        -0.014996573328971863,
+        0.0010714810341596603,
+        0.01959756202995777,
+        -0.0392388179898262,
+        -0.0052064210176467896,
+        -0.05015777423977852,
+        -0.0002977418771479279,
+        -0.04029487073421478,
+        -0.012846150435507298,
+        -0.09198840707540512,
+        0.0118671590462327,
+        -0.06176264211535454,
+        0.006427878048270941,
+        0.04043034091591835,
+        -0.017270859330892563,
+        -0.012422707863152027,
+        0.01713552325963974,
+        -0.026697810739278793,
+        0.2446632832288742,
+        -0.020500628277659416,
+        -0.0012782106641680002,
+        -0.13429665565490723,
+        0.07528743892908096,
+        -0.002225265372544527,
+        0.06695574522018433,
+        0.0017388156848028302,
+        -0.0629071593284607,
+        -0.05081196129322052,
+        0.042025983333587646,
+        0.029097404330968857,
+        0.07048555463552475,
+        -0.11881273239850998,
+        0.012633765116333961,
+        -0.06181430071592331,
+        0.038810230791568756,
+        0.05186169967055321,
+        0.03248963877558708,
+        0.07868267595767975,
+        0.024977494031190872,
+        0.023991582915186882,
+        0.0023529180325567722,
+        0.07197123020887375,
+        0.02653665468096733,
+        0.058702051639556885,
+        0.015001803636550903,
+        0.043739400804042816,
+        -0.07251746207475662,
+        0.045659150928258896,
+        -0.02111324854195118,
+        0.26666632294654846,
+        0.1975221484899521,
+        -0.031074335798621178,
+        0.029075143858790398,
+        0.013020229525864124,
+        0.015244663693010807,
+        0.01387549377977848,
+        -0.025354426354169846,
+        0.06151636317372322,
+        -0.034430794417858124,
+        0.00752665288746357,
+        0.1678706705570221,
+        -0.016560610383749008,
+        0.0421285480260849,
+        -0.02527586743235588,
+        -0.02166694961488247,
+        -0.034658536314964294,
+        0.036866605281829834,
+        -0.036233626306056976,
+        0.02042747661471367,
+        0.028099242597818375,
+        0.020503878593444824,
+        0.022789381444454193,
+        0.08666791766881943,
+        -0.06426636874675751,
+        -0.043599683791399,
+        0.1136128157377243,
+        0.020200412720441818,
+        -0.003839759388938546,
+        -0.06010120362043381,
+        -0.02218424715101719,
+        0.09008956700563431,
+        0.008711264468729496,
+        -0.04874516651034355,
+        -0.011533043347299099,
+        -0.036206502467393875,
+        -0.006006627343595028,
+        -0.0350450798869133,
+        0.005623341538012028,
+        0.09562186151742935,
+        -0.03952183946967125,
+        -0.013931595720350742,
+        -0.020029470324516296,
+        0.0022144403774291277,
+        -0.020198611542582512,
+        0.012238736264407635,
+        0.054415784776210785,
+        -0.024457741528749466,
+        -0.01174110360443592,
+        0.031656913459300995,
+        0.060322560369968414,
+        0.01573050767183304,
+        0.03361794352531433,
+        0.022875478491187096,
+        0.036340806633234024,
+        -0.02932620421051979,
+        0.0224352665245533,
+        -0.013475337065756321,
+        -0.030774995684623718,
+        0.013921404257416725,
+        -0.01229875348508358,
+        -0.07986237108707428,
+        -0.007543445099145174,
+        0.05208213999867439,
+        -0.04440496116876602,
+        -0.029659371823072433,
+        -0.029070377349853516,
+        0.07376870512962341,
+        -0.07208643853664398,
+        -0.05429431423544884,
+        -0.007887271232903004,
+        0.011400371789932251,
+        0.014227204024791718,
+        0.01763899251818657,
+        -0.0426466204226017,
+        0.0024213625583797693,
+        0.02564665488898754,
+        0.0020850151777267456,
+        0.027386819943785667,
+        0.12722602486610413,
+        -0.060991525650024414,
+        -0.009061425924301147,
+        0.014208497479557991,
+        -0.006956137716770172,
+        0.09096626192331314,
+        0.0037735258229076862,
+        -0.8347064852714539,
+        -0.2857951521873474,
+        0.0011818337952718139,
+        0.0341162234544754,
+        -0.04230167716741562,
+        0.05230262130498886,
+        0.08486262708902359,
+        -0.34235459566116333,
+        -0.02393503487110138,
+        0.02718495950102806,
+        0.050966840237379074,
+        0.024611525237560272,
+        -0.004936584271490574,
+        -0.036420952528715134,
+        -0.009803534485399723,
+        0.05421328917145729,
+        0.008357672952115536,
+        0.020987343043088913,
+        -0.007292840629816055,
+        0.018060531467199326,
+        0.06739793717861176,
+        0.06161382421851158,
+        0.000842935056425631,
+        -0.007857701741158962,
+        0.023870037868618965,
+        -0.009690430946648121,
+        -0.04231289029121399,
+        -0.22531479597091675,
+        0.034284885972738266,
+        0.07360551506280899,
+        0.0421777106821537,
+        0.000788167177233845,
+        -0.3953339457511902,
+        -0.042627450078725815,
+        -0.02774403616786003,
+        0.02647743932902813,
+        -0.01561375055462122,
+        0.04745408892631531,
+        0.021774733439087868,
+        0.006606150884181261,
+        0.03879173845052719,
+        0.06500626355409622,
+        0.044954728335142136,
+        0.01523532159626484,
+        0.04741065576672554,
+        -0.13645507395267487,
+        0.0038059696089476347,
+        -0.012993253767490387,
+        -0.004529603291302919,
+        0.03268986567854881,
+        -0.025349941104650497,
+        -0.02268051542341709,
+        -0.0001516443444415927,
+        -0.010289257392287254,
+        -0.0010476588504388928,
+        -0.0690254345536232,
+        0.04298266023397446,
+        -0.05470968782901764,
+        0.04369102790951729,
+        -0.007372597698122263,
+        0.027607066556811333,
+        0.0009343988494947553,
+        -0.09573916345834732,
+        0.04389296472072601,
+        -0.01522558368742466,
+        -0.03138086944818497,
+        0.04511113464832306,
+        -0.0342172235250473,
+        -0.00033129166695289314,
+        -0.037289440631866455,
+        0.055575959384441376,
+        0.01849759928882122,
+        0.03041103295981884,
+        -0.01965116336941719,
+        0.07604960352182388,
+        -0.0399625338613987,
+        -0.008190250024199486,
+        -0.015386211685836315,
+        -0.04315667226910591,
+        0.0023679479490965605,
+        0.018971435725688934,
+        -0.005599244497716427,
+        -0.029607947915792465,
+        0.07574024051427841,
+        -0.013816094025969505,
+        0.04464992880821228,
+        0.00032806122908368707,
+        0.06071484833955765,
+        0.04261377081274986,
+        0.012208743952214718,
+        0.0801805928349495,
+        0.02875029854476452,
+        -0.0662921741604805,
+        0.015754999592900276,
+        0.05831082537770271,
+        0.03810921683907509,
+        0.05483977496623993,
+        -0.019509335979819298,
+        0.0032034649048000574,
+        0.011807492934167385,
+        -0.01916244812309742,
+        0.022101666778326035,
+        -0.0366031751036644,
+        0.10915965586900711,
+        0.030322788283228874,
+        -0.028386037796735764,
+        -0.05443429946899414,
+        -0.02489445172250271,
+        0.0892239362001419,
+        -0.05427740886807442,
+        -0.034238025546073914,
+        -0.04136161506175995,
+        -0.041148390620946884,
+        0.06879492849111557,
+        -0.37424594163894653,
+        0.028803903609514236,
+        0.05349116027355194,
+        0.0359492301940918,
+        -0.3629145622253418,
+        -0.17875684797763824,
+        -0.012246759608387947,
+        0.2744927704334259,
+        -0.010421697050333023,
+        -0.19415415823459625,
+        0.005668101832270622,
+        0.018326066434383392,
+        0.28319111466407776,
+        -0.008164885453879833,
+        -0.07401272654533386,
+        -0.04154321923851967,
+        0.030028337612748146,
+        -0.008959534578025341,
+        -0.03160349279642105,
+        -0.0191870778799057,
+        0.044875819236040115,
+        0.052173007279634476,
+        0.012135458178818226,
+        0.008775291964411736,
+        0.005302258301526308,
+        0.009224606677889824,
+        -0.07574712485074997,
+        0.06096252053976059,
+        0.02645082212984562,
+        0.05135556682944298,
+        0.021985528990626335,
+        0.0076704383827745914,
+        0.02961125783622265,
+        -0.07608609646558762,
+        -0.17564956843852997,
+        0.03679918497800827,
+        -0.2696506083011627,
+        0.0627906322479248,
+        0.031165480613708496,
+        0.01799822598695755,
+        0.02351829782128334,
+        0.015595306642353535,
+        -0.25137314200401306,
+        -0.011266927234828472,
+        0.04895596578717232,
+        0.01718883402645588,
+        0.0009224268142133951,
+        0.021923478692770004,
+        0.044791676104068756,
+        0.079147569835186,
+        0.02014082670211792,
+        -0.0003547854721546173,
+        -0.02535748854279518,
+        -0.029639363288879395,
+        -0.01965961419045925,
+        -0.37630724906921387,
+        0.01674639992415905,
+        0.01316642016172409,
+        -0.025120021775364876,
+        -0.12474260479211807,
+        0.059980470687150955,
+        0.036066047847270966,
+        -0.15973420441150665,
+        -0.010871605016291142,
+        0.014708316884934902,
+        -0.2174367904663086,
+        0.012985467910766602,
+        -0.03782057762145996,
+        -0.003427069401368499,
+        -0.011010636575520039,
+        0.02433733455836773,
+        0.08641276508569717,
+        -0.004630533047020435,
+        0.019430357962846756,
+        -0.02088969387114048,
+        -0.06182911619544029,
+        0.02577812969684601,
+        0.015741532668471336,
+        0.04723552614450455,
+        -0.003783567575737834,
+        0.11646346747875214,
+        0.01827184483408928,
+        -0.0999741181731224,
+        -0.0031216999050229788,
+        -0.002268272452056408,
+        -0.019456079229712486,
+        -0.003156653605401516,
+        0.0067732855677604675,
+        0.027299508452415466,
+        0.06979037076234818,
+        0.013329057022929192,
+        -0.016705401241779327,
+        0.33774301409721375,
+        0.007617524825036526,
+        0.044453222304582596,
+        0.0016282782889902592,
+        0.0010982973035424948,
+        0.04183036834001541,
+        0.016857653856277466,
+        0.006673034280538559,
+        -0.0187662523239851,
+        0.0037163379602134228,
+        -0.04568779841065407,
+        -0.007807960733771324,
+        0.016653010621666908,
+        0.0033014933578670025,
+        0.015063234604895115,
+        0.012843966484069824,
+        -0.012042546644806862,
+        0.016909126192331314,
+        0.022089935839176178,
+        -0.002550398698076606,
+        0.04166745766997337,
+        -0.0014742743223905563,
+        -0.010846617631614208,
+        -0.12333541363477707,
+        0.0018612967105582356,
+        0.04913188889622688,
+        -0.029431112110614777,
+        0.01824735291302204,
+        0.10425490140914917,
+        -0.08880072832107544,
+        0.03029320202767849,
+        0.018876856192946434,
+        0.016104502603411674,
+        0.00882721971720457,
+        0.0029782119672745466,
+        0.007922517135739326,
+        -0.02030068263411522,
+        -0.029835309833288193,
+        0.006661414168775082,
+        -0.04313879832625389,
+        -0.001850730157457292,
+        -0.0035070034209638834,
+        -0.0070700813084840775,
+        0.009637435898184776,
+        -0.016844747588038445,
+        -0.026075454428792,
+        0.0030682040378451347,
+        0.004208600614219904,
+        -0.005515689495950937,
+        -0.018976539373397827,
+        -0.019196776673197746,
+        -0.008948019705712795,
+        0.016215825453400612,
+        0.00296461652033031,
+        0.14222395420074463,
+        -0.029066482558846474,
+        -0.011013337410986423,
+        -0.01267730537801981,
+        -0.004976287949830294,
+        -0.016607511788606644,
+        -0.0005681798211298883,
+        -0.012520174495875835,
+        -0.0015903630992397666,
+        -0.0013642794219776988,
+        -0.21956196427345276,
+        -0.0011431180173531175,
+        -0.0008808697457425296,
+        -0.022889399901032448,
+        0.024718068540096283,
+        -0.054929111152887344,
+        -0.015585094690322876,
+        -0.018188318237662315,
+        -0.0008287815726362169,
+        -0.01957552134990692,
+        0.10818513482809067,
+        -0.0034382494632154703,
+        -0.02667389065027237,
+        -0.01304248720407486,
+        -0.0034645304549485445,
+        -0.008519704453647137,
+        -0.015123830176889896,
+        -0.008219013921916485,
+        -0.009952309541404247,
+        -2.3375787350232713e-05,
+        -0.012512428686022758,
+        -0.001955948770046234,
+        -0.0029842876829206944,
+        -0.004291659686714411,
+        0.006655955221503973,
+        0.007771315053105354,
+        0.014132227748632431,
+        -0.007390063256025314,
+        -0.024650415405631065,
+        -0.022503213956952095,
+        0.0032607221510261297,
+        -0.008497492410242558,
+        0.00860870536416769,
+        0.002819088753312826,
+        -0.01841069757938385,
+        -0.010009711608290672,
+        -0.2912862300872803,
+        0.017160022631287575,
+        0.11349690705537796,
+        -0.027656083926558495,
+        -0.04482223838567734,
+        -0.019336597993969917,
+        0.07413014769554138,
+        0.014554106630384922,
+        0.020965611562132835,
+        -0.028231356292963028,
+        -0.0582813061773777,
+        0.05617539584636688,
+        -0.05042734369635582,
+        0.025630727410316467,
+        -0.0956532284617424,
+        -0.14554104208946228,
+        -0.020851148292422295,
+        0.006990485824644566,
+        0.08457829803228378,
+        -0.11314752697944641,
+        0.004020951222628355,
+        -0.03477870300412178,
+        0.005594289395958185,
+        0.011181964538991451,
+        0.010988114401698112,
+        0.019416088238358498,
+        0.026451971381902695,
+        -0.00452260859310627,
+        0.0004952011513523757,
+        0.012377702631056309,
+        -0.0063480171374976635,
+        0.0256175734102726,
+        -0.020753338932991028,
+        0.03223377838730812,
+        -0.1147943064570427,
+        -0.009170151315629482,
+        0.015267477370798588,
+        -0.0009072314132936299,
+        -0.1621374636888504,
+        0.022807778790593147,
+        0.007394107989966869,
+        0.01378557924181223,
+        -0.10719677805900574,
+        -0.000919080339372158,
+        -0.006567052565515041,
+        -0.007409179583191872,
+        -0.007469762582331896,
+        -0.004784661345183849,
+        -0.03967805579304695,
+        0.015857066959142685,
+        -0.02015744335949421,
+        0.056037548929452896,
+        0.03962035849690437,
+        0.08429893851280212,
+        0.022117067128419876,
+        -0.2675061821937561,
+        0.016738418489694595,
+        0.0037785861641168594,
+        0.004771686624735594,
+        -0.134505033493042,
+        -0.010618447326123714,
+        -0.004784524440765381,
+        0.014044507406651974,
+        -0.03105556219816208,
+        0.05049083009362221,
+        0.012162688188254833,
+        0.005920265335589647,
+        0.008554516360163689,
+        0.0025892227422446012,
+        0.023483717814087868,
+        -0.20711173117160797,
+        0.03360452130436897,
+        -0.24758699536323547,
+        -0.05136318504810333,
+        -0.015016172081232071,
+        0.06466241925954819,
+        0.023470288142561913,
+        0.023495715111494064,
+        0.004300899337977171,
+        0.02461574412882328,
+        0.025745516642928123,
+        -0.026187308132648468,
+        0.08441776037216187,
+        -0.06955462694168091,
+        -0.11116205900907516,
+        -0.2169608771800995,
+        -0.004244703333824873,
+        -0.024184226989746094,
+        -0.10068271309137344,
+        -0.021129190921783447,
+        -0.021129680797457695,
+        -0.0054467362351715565,
+        0.17416934669017792,
+        0.015367642976343632,
+        -0.01237915363162756,
+        0.024573752656579018,
+        0.004588739015161991,
+        0.05616860091686249,
+        -0.0018992060795426369,
+        -0.12394066900014877,
+        -0.03691404312849045,
+        -0.15878455340862274,
+        0.10572423785924911,
+        0.014409378170967102,
+        -0.008566108532249928,
+        -0.20319701731204987,
+        -0.018277373164892197,
+        -0.21615462005138397,
+        -0.11269525438547134,
+        -0.2767113745212555,
+        -0.25617966055870056,
+        -0.0036413148045539856,
+        -0.008058675564825535,
+        -0.051732294261455536,
+        -0.013052727095782757,
+        0.05229722708463669,
+        -0.03535814583301544,
+        0.3111231327056885,
+        -0.044130608439445496,
+        -0.02232682704925537,
+        -0.0040402463637292385,
+        0.013798556290566921,
+        -0.07689940929412842,
+        -0.028940049931406975,
+        -0.00565366679802537,
+        -0.028972560539841652,
+        -0.007728889584541321,
+        0.013665011152625084,
+        -0.014678380452096462,
+        -0.06747694313526154,
+        -0.06480871140956879,
+        -0.00028885426581837237,
+        -0.01525174267590046,
+        0.027096102014183998,
+        -0.05200905352830887,
+        0.0066903820261359215,
+        0.0023834225721657276,
+        -0.002379713812842965,
+        -0.0208051148802042,
+        0.335977703332901,
+        0.03895771875977516,
+        -0.04814215749502182,
+        -0.037339694797992706,
+        -0.004409746266901493,
+        0.07042848318815231,
+        -0.08318590372800827,
+        -0.04138712212443352,
+        0.06309781968593597,
+        0.007484383415430784,
+        0.09696535021066666,
+        0.024134323000907898,
+        -0.009859816171228886,
+        -0.06243982911109924,
+        0.04630015045404434,
+        -0.06593744456768036,
+        0.009306293912231922,
+        0.5033899545669556,
+        0.007804783061146736,
+        0.024170484393835068,
+        -0.036085959523916245,
+        0.016438491642475128,
+        0.01678072288632393,
+        -0.006299734115600586,
+        -0.027441656216979027,
+        -0.014344800263643265,
+        0.022293711081147194,
+        0.011197407729923725,
+        -0.0026971842162311077,
+        0.2685070335865021,
+        0.01403988990932703,
+        -0.005100077483803034,
+        -0.026031343266367912,
+        -0.005419034510850906,
+        -0.014735087752342224,
+        -0.0283498577773571,
+        0.002656748052686453,
+        -0.07137783616781235,
+        0.02235356532037258,
+        -0.02970476634800434,
+        0.20672672986984253,
+        0.017398398369550705,
+        0.02438206970691681,
+        0.025746773928403854,
+        -0.03279582038521767,
+        0.043908532708883286,
+        -0.003417646512389183,
+        0.020200302824378014,
+        0.007243862375617027,
+        -0.004560714587569237,
+        -0.01142876222729683,
+        -0.028091270476579666,
+        -0.2949703335762024,
+        0.0729827880859375,
+        0.004566277377307415,
+        0.16689160466194153,
+        0.034872010350227356,
+        -0.09590360522270203,
+        -0.13309867680072784,
+        0.06429398059844971,
+        0.04174232855439186,
+        -0.022723963484168053,
+        -0.04695400968194008,
+        0.013115685433149338,
+        0.013574879616498947,
+        0.04794493317604065,
+        -0.015077140182256699,
+        0.09493618458509445,
+        0.008845972828567028,
+        0.020302923396229744,
+        0.02037016488611698,
+        0.009083293378353119,
+        0.0747746080160141,
+        -0.008078188635408878,
+        0.024796344339847565,
+        -0.015212535858154297,
+        -0.005867444910109043,
+        0.08309170603752136,
+        0.03676094114780426,
+        0.07232356816530228,
+        -0.3577176630496979,
+        0.0013658110983669758,
+        -0.0009247250854969025,
+        0.02284996211528778,
+        0.012630275450646877,
+        0.013745593838393688,
+        0.003447894938290119,
+        0.03563565015792847,
+        -0.031025355681777,
+        -0.07258180528879166,
+        -0.13482442498207092,
+        -0.029425248503684998,
+        -0.014927731826901436,
+        0.045984312891960144,
+        -0.0176406130194664,
+        -0.22678181529045105,
+        -0.025248311460018158,
+        -0.11617762595415115,
+        -0.056157518178224564,
+        0.009453062899410725,
+        -0.34616726636886597,
+        0.05691010504961014,
+        -0.32302799820899963,
+        -0.026544231921434402,
+        -0.007374088745564222,
+        -0.07682909071445465,
+        -0.021214107051491737,
+        -0.07102422416210175,
+        0.02693488635122776,
+        0.014817211776971817,
+        0.015572831965982914,
+        0.04313618317246437,
+        -0.1277216374874115,
+        0.02174532599747181,
+        -0.0226149819791317,
+        -0.00010956164624076337,
+        0.023728065192699432,
+        0.008212783373892307,
+        0.010561724193394184,
+        -0.011036543175578117,
+        -0.022485855966806412,
+        0.008243439719080925,
+        -0.03383245691657066,
+        -0.5630682110786438,
+        0.0015974265988916159,
+        -0.28416821360588074,
+        0.04123701527714729,
+        -0.0042976438999176025,
+        0.03786511346697807,
+        0.01862393692135811,
+        -0.04082413762807846,
+        -0.05792848393321037,
+        0.0068894242867827415,
+        0.0024085959885269403,
+        0.001471342402510345,
+        0.030681759119033813,
+        -0.026314062997698784,
+        0.0555737242102623,
+        0.03169534355401993,
+        0.0031395808327943087,
+        0.018701769411563873,
+        -0.5604594945907593,
+        0.01526441890746355,
+        -0.00621993700042367,
+        0.0009401043644174933,
+        0.01587403193116188,
+        0.030135583132505417,
+        -0.007350685074925423,
+        0.006527469493448734,
+        0.016000108793377876,
+        -0.042957425117492676,
+        0.018247080966830254,
+        0.0025622656103223562,
+        -0.03169511258602142,
+        0.09235119074583054,
+        -0.013365034945309162,
+        0.01607452519237995,
+        0.017734844237565994,
+        0.05609896034002304,
+        0.04819876700639725,
+        -0.0871855691075325,
+        0.05157865956425667,
+        0.009171447716653347,
+        0.022200705483555794,
+        -0.005507844965904951,
+        -0.024452703073620796,
+        0.010224574245512486,
+        -0.006914906669408083,
+        0.004650818649679422,
+        0.02167516015470028,
+        0.10456826537847519,
+        -0.07652094960212708,
+        -6.050072988728061e-05,
+        0.012855490669608116,
+        0.022669879719614983,
+        0.022655120119452477,
+        0.033012885600328445,
+        0.025709744542837143,
+        0.00481270719319582,
+        0.005920717027038336,
+        -0.08545156568288803,
+        -0.004363589454442263,
+        -0.01531639602035284,
+        0.030760569497942924,
+        0.02796284481883049,
+        -0.03690989315509796,
+        0.044959694147109985,
+        -0.14276015758514404,
+        -0.0002254673163406551,
+        -0.15694372355937958,
+        0.012381293810904026,
+        -0.021977441385388374,
+        0.005496624857187271,
+        -0.035593707114458084,
+        -0.0950438603758812,
+        0.03825876861810684,
+        0.05915532633662224,
+        -0.023323312401771545,
+        0.017213119193911552,
+        -0.03807183355093002,
+        0.02619507722556591,
+        0.02741156332194805,
+        0.005847832188010216,
+        0.0020307491067796946,
+        0.025714349001646042,
+        -0.04780200496315956,
+        0.010206928476691246,
+        -0.01345440000295639,
+        0.029133174568414688,
+        -0.0014764482621103525,
+        0.004046705551445484,
+        -0.007725241594016552,
+        0.013041527941823006,
+        0.0018969239899888635,
+        0.002417983952909708,
+        -0.010975837707519531,
+        0.0015862436266615987,
+        0.00597577728331089,
+        0.002882696921005845,
+        0.02855525352060795,
+        -0.005954153370112181,
+        0.04090835899114609,
+        -0.39500924944877625,
+        0.03586621209979057,
+        -0.5250031352043152,
+        -0.05697731301188469,
+        -0.09568691998720169,
+        -0.07179264724254608,
+        0.04683076590299606,
+        0.009320023469626904,
+        -0.11629963666200638,
+        -0.0016945215174928308,
+        0.01624997705221176,
+        -0.0063682254403829575,
+        0.15033549070358276,
+        -0.5171176791191101,
+        -0.01525783073157072,
+        0.016417231410741806,
+        -0.00303818890824914,
+        0.2500321865081787,
+        0.022074062377214432,
+        0.01191191840916872,
+        0.012274803593754768,
+        0.016534989699721336,
+        -0.028437916189432144,
+        0.04241323843598366,
+        -0.01824999786913395,
+        -0.34815871715545654,
+        0.04734490439295769,
+        -0.06419701874256134,
+        -0.022288290783762932,
+        -0.0004865761147812009,
+        0.05369419604539871,
+        -0.058212973177433014,
+        -0.2196469008922577,
+        0.010950890369713306,
+        0.029042819514870644,
+        -0.07349151372909546,
+        -0.0422789566218853,
+        0.062069639563560486,
+        0.05589267984032631,
+        0.014877256006002426,
+        0.04236084595322609,
+        0.03975239768624306,
+        0.16930873692035675,
+        0.03981085494160652,
+        0.11499395221471786,
+        0.0271450225263834,
+        0.013969083316624165,
+        -0.0002660648606251925,
+        0.010936664417386055,
+        -0.18389767408370972,
+        -0.10237602889537811,
+        0.03041323646903038,
+        -0.013864071108400822,
+        -0.015729930251836777,
+        0.037400804460048676,
+        -0.009598327800631523,
+        -0.09533312171697617,
+        -0.014712700620293617,
+        0.08537333458662033,
+        -0.007200485561043024,
+        -0.31139102578163147,
+        -0.06366845220327377,
+        0.02039063163101673,
+        -0.023356139659881592,
+        -0.0029549277387559414,
+        -0.12494662404060364,
+        0.011755092069506645,
+        -0.26468148827552795,
+        -0.11541861295700073,
+        0.010529865510761738,
+        -0.05965733155608177,
+        -0.05945499241352081,
+        -0.08796169608831406,
+        -0.014683439396321774,
+        0.008732054382562637,
+        0.010073489509522915,
+        0.09553763270378113,
+        0.034884922206401825,
+        0.018675342202186584,
+        -0.009549405425786972,
+        -0.0007051719003356993,
+        -0.16936513781547546,
+        -0.0030460187699645758,
+        -0.022060535848140717,
+        -0.06689190864562988,
+        0.013926704414188862,
+        0.012043816037476063,
+        -0.0587068572640419,
+        -0.03814113140106201,
+        0.06235629320144653,
+        0.013228330761194229,
+        0.04154474660754204,
+        -0.08039120584726334,
+        0.028436705470085144,
+        -0.042226389050483704,
+        -0.019135186448693275,
+        0.03747033327817917,
+        -0.14261123538017273,
+        0.02827540971338749,
+        0.0455685593187809,
+        -0.031124960631132126,
+        -0.007588588632643223,
+        0.0034326373133808374,
+        -0.07682976871728897,
+        0.24654042720794678,
+        -0.014518304727971554,
+        -0.07052458822727203,
+        -0.08241941034793854,
+        -0.04116151109337807,
+        -0.048463717103004456,
+        -0.038745298981666565,
+        0.036902472376823425,
+        0.0442035011947155,
+        0.05572585016489029,
+        -0.014312628656625748,
+        0.010794793255627155,
+        -0.3440641760826111,
+        -0.5161325335502625,
+        0.0005156552069820464,
+        -0.010257269255816936,
+        -0.02412656880915165,
+        -0.023385023698210716,
+        0.05533458665013313,
+        -0.012186119332909584,
+        -0.029286568984389305,
+        0.04116401448845863,
+        -0.044610101729631424,
+        -0.019175484776496887,
+        0.06835268437862396,
+        0.06366674602031708,
+        0.0373748242855072,
+        0.03804386034607887,
+        0.05369521677494049,
+        -0.04451881721615791,
+        0.0018838117830455303,
+        0.34775662422180176,
+        0.010958605445921421,
+        -0.047990139573812485,
+        0.04386777803301811,
+        -0.10427688807249069,
+        0.04417382925748825,
+        4.402965714689344e-05,
+        0.01935163326561451,
+        -0.06753949075937271,
+        0.02735923044383526,
+        0.01465953141450882,
+        0.06198301538825035,
+        -0.015980403870344162,
+        -0.2108263075351715,
+        0.008177559822797775,
+        0.006046924740076065,
+        0.002665479900315404,
+        0.20868580043315887,
+        -0.013740362599492073,
+        0.008203004486858845,
+        -0.005066391546279192,
+        0.026405498385429382,
+        0.01383009273558855,
+        0.012581533752381802,
+        0.009014940820634365,
+        0.022820021957159042,
+        -0.008534795604646206,
+        0.2603924572467804,
+        0.02297227643430233,
+        -0.000749691273085773,
+        0.044753506779670715,
+        0.018596511334180832,
+        0.006852792575955391,
+        -0.008686172775924206,
+        -0.10452616959810257,
+        0.017021872103214264,
+        0.003722329391166568,
+        -0.025453045964241028,
+        -0.011473417282104492,
+        -0.017907623201608658,
+        0.01400628499686718,
+        -0.1670989990234375,
+        0.004298652987927198,
+        -0.0022204748820513487,
+        0.16521315276622772,
+        -0.008831127546727657,
+        0.026490870863199234,
+        0.006190746556967497,
+        -0.0177209060639143,
+        0.08967147767543793,
+        0.0033069502096623182,
+        -0.005021366756409407,
+        0.0004906906979158521,
+        0.0169216375797987,
+        -0.06124846637248993,
+        -0.005200678016990423,
+        0.08404737710952759,
+        -0.010559299029409885,
+        -0.006309974938631058,
+        0.023113396018743515,
+        -0.010227260179817677,
+        0.001256447983905673,
+        0.019783375784754753,
+        -0.006308461539447308,
+        -0.04529590904712677,
+        -0.00908862054347992,
+        -0.043217338621616364,
+        -0.32200074195861816,
+        0.02592635713517666,
+        0.030795685946941376,
+        -0.001814531977288425,
+        0.0092842485755682,
+        0.07088880985975266,
+        -0.0867588147521019,
+        0.024099843576550484,
+        -0.0034031609538942575,
+        0.007234686985611916,
+        -0.02505563199520111,
+        0.0030480287969112396,
+        -0.019158190116286278,
+        0.26473408937454224,
+        -0.011918547563254833,
+        -0.023240016773343086,
+        -0.06084466353058815,
+        -0.021916134282946587,
+        -0.010251260362565517,
+        -0.0009625791572034359,
+        0.082605741918087,
+        -0.013018425554037094,
+        0.007627277635037899,
+        -0.0010813736589625478,
+        0.007952406071126461,
+        0.06551267951726913,
+        -0.026020025834441185,
+        0.050048135221004486,
+        -0.010610008612275124,
+        -0.02429312653839588,
+        -0.025263017043471336,
+        -0.04611891135573387,
+        0.04451768472790718,
+        -0.08045025914907455,
+        -0.048037610948085785,
+        0.008019295521080494,
+        0.0160224549472332,
+        0.002078550634905696,
+        -0.0202508345246315,
+        -0.5446130633354187,
+        0.012585492804646492,
+        -0.0331973135471344,
+        0.08371605724096298,
+        -0.00590998912230134,
+        -0.013058983720839024,
+        0.027742384001612663,
+        0.1042199358344078,
+        -0.3072803318500519,
+        0.06284149736166,
+        -0.28551968932151794,
+        0.026768438518047333,
+        0.022245990112423897,
+        0.018242113292217255,
+        -0.035077981650829315,
+        0.03546127676963806,
+        0.10165776312351227,
+        -0.025475669652223587,
+        -0.014933750964701176,
+        0.040547240525484085,
+        -0.033055808395147324,
+        0.011755919083952904,
+        -0.014459444209933281,
+        -0.03455093130469322,
+        0.020743343979120255,
+        0.02720930427312851,
+        -0.287664532661438,
+        0.008260028436779976,
+        -0.009877690114080906,
+        0.16657423973083496,
+        -0.010943812318146229,
+        -0.012381386943161488,
+        0.030678801238536835,
+        0.1559792459011078,
+        0.038967035710811615,
+        -0.023399239405989647,
+        0.015019542537629604,
+        -0.014201333746314049,
+        -0.014202176593244076,
+        -0.006699408870190382,
+        -0.13175444304943085,
+        0.004643211141228676,
+        0.012747463770210743,
+        -0.04086190089583397,
+        0.06581410765647888,
+        -0.12192045897245407,
+        -0.03126347437500954,
+        0.011175516061484814,
+        -0.00914736744016409,
+        -0.02883930690586567,
+        -0.11305265873670578,
+        -0.04405384883284569,
+        -0.009120048955082893,
+        -0.008926079608500004,
+        -0.03169447183609009,
+        0.05464877560734749,
+        0.25674498081207275,
+        0.08497058600187302,
+        -0.023222925141453743,
+        0.35592252016067505,
+        -0.006929511670023203,
+        0.025255810469388962,
+        -0.05150032415986061,
+        0.039239466190338135,
+        -0.07082924991846085,
+        -0.017321549355983734,
+        0.17293211817741394,
+        -0.02155853807926178,
+        -0.014333213679492474,
+        0.0031305316369980574,
+        -0.013490653596818447,
+        -0.1376512199640274,
+        -0.021713266149163246,
+        -0.029826253652572632,
+        -0.0011473714839667082,
+        -0.012434332631528378,
+        -0.04860873892903328,
+        0.013857590034604073,
+        0.0703854188323021,
+        0.034528713673353195,
+        -0.014423011802136898,
+        0.0882454589009285,
+        -0.091700978577137,
+        0.038885727524757385,
+        0.012043441645801067,
+        -0.03183690831065178,
+        -0.014495689421892166,
+        -0.019726552069187164,
+        -0.010094117373228073,
+        -0.004218627233058214,
+        -0.04413086175918579,
+        -0.1344134360551834,
+        -0.0004976870259270072,
+        -0.0008357573533430696,
+        0.04518067091703415,
+        0.046797975897789,
+        0.24766182899475098,
+        0.01065139751881361,
+        -0.0034267394803464413,
+        -0.016103556379675865,
+        -0.05139121413230896,
+        0.012563390657305717,
+        -0.03310413286089897,
+        -0.030157553032040596,
+        0.046670909970998764,
+        0.012565785087645054,
+        -0.040275491774082184,
+        0.023816417902708054,
+        -0.38536572456359863,
+        0.04508889466524124,
+        0.13637560606002808,
+        -0.010654824785888195,
+        0.0459851399064064,
+        -0.0046302699483931065,
+        -0.020852191373705864,
+        0.10662271827459335,
+        0.06486576050519943,
+        0.05727925896644592,
+        0.09816201776266098,
+        0.04878557100892067,
+        -0.16256237030029297,
+        0.014547038823366165,
+        0.018567964434623718,
+        -0.07284612208604813,
+        0.017150163650512695,
+        0.0246741883456707,
+        -0.38470372557640076,
+        -0.07465949654579163,
+        0.03010236658155918,
+        -0.004397575277835131,
+        -0.06618984788656235,
+        -0.02908281609416008,
+        0.060166433453559875,
+        -0.0020949048921465874,
+        0.007689109072089195,
+        -0.0047390698455274105,
+        -0.014199030585587025,
+        -0.01794746331870556,
+        -0.02528063952922821,
+        0.002218312583863735,
+        0.10169881582260132,
+        0.010602130554616451,
+        -0.06605861335992813,
+        -0.0008762837387621403,
+        -0.035027723759412766,
+        -0.011684391647577286,
+        0.02247578091919422,
+        0.17245104908943176,
+        0.22525252401828766,
+        -0.010771296918392181,
+        0.05595310404896736,
+        0.06338834017515182,
+        -0.0038216698449105024,
+        -0.0032836494501680136,
+        0.005779017228633165,
+        -0.18020786345005035,
+        -0.05066698044538498,
+        -0.0035458216443657875,
+        -0.10578767210245132,
+        -0.041712939739227295,
+        0.2104150652885437,
+        -0.03753345459699631,
+        0.013989892788231373,
+        0.01988149993121624,
+        0.05108603090047836,
+        0.04496738687157631,
+        -0.3034508526325226,
+        0.0226743221282959,
+        -0.0431472510099411,
+        -0.025635428726673126,
+        -0.18961989879608154,
+        -0.17218825221061707,
+        0.03576141223311424,
+        0.060613714158535004,
+        -0.011970550753176212,
+        -0.21435107290744781,
+        0.01422552578151226,
+        0.02974064089357853,
+        -0.061079952865839005,
+        0.031064646318554878,
+        0.009629320353269577,
+        -0.13762925565242767,
+        0.01928475871682167,
+        0.007310172542929649,
+        0.06103459745645523,
+        -0.16216528415679932,
+        0.03330384939908981,
+        0.09578404575586319,
+        -0.0037327276077121496,
+        0.029233848676085472,
+        -0.0015759399393573403,
+        0.005511409603059292,
+        -0.4195749759674072,
+        0.024169376119971275,
+        0.13220365345478058,
+        0.007961929775774479,
+        0.008045470342040062,
+        0.01919495314359665,
+        -0.023188553750514984,
+        0.07084394991397858,
+        -0.24922333657741547,
+        0.02011212892830372,
+        -0.18514998257160187,
+        0.03114209696650505,
+        0.09826567023992538,
+        0.00592303741723299,
+        -0.010020115412771702,
+        0.027117054909467697,
+        -0.214133620262146,
+        -0.01214816514402628,
+        0.06564164906740189,
+        0.02513044886291027,
+        0.02132420241832733,
+        -0.02127540111541748,
+        -0.041606876999139786,
+        0.04196378216147423,
+        -0.02060609683394432,
+        0.01730814389884472,
+        -0.17418994009494781,
+        0.03462710976600647,
+        -0.017470642924308777,
+        -0.3992193639278412,
+        0.02652592957019806,
+        0.025042008608579636,
+        0.026447610929608345,
+        -0.19199316203594208,
+        3.27593952533789e-05,
+        0.002988220192492008,
+        -0.21171888709068298,
+        0.03300239518284798,
+        0.015727035701274872,
+        -0.008947308175265789,
+        0.03924538940191269,
+        -0.08990193158388138,
+        0.023726975545287132,
+        0.03463870286941528,
+        -0.05018220469355583,
+        0.13170146942138672,
+        0.054000236093997955,
+        0.01158218178898096,
+        0.062349993735551834,
+        -0.014724616892635822,
+        0.039657603949308395,
+        0.04436490684747696,
+        0.014076294377446175,
+        0.07666806876659393,
+        0.09630247205495834,
+        -0.04152659326791763,
+        -0.1860806941986084,
+        -0.07671733945608139,
+        0.031573690474033356,
+        -0.44617798924446106,
+        -0.004897239152342081,
+        -0.03991628438234329,
+        0.01880800537765026,
+        -0.04769768565893173,
+        0.02198435738682747,
+        0.01341161783784628,
+        -0.12239313870668411,
+        0.019765935838222504,
+        0.005221452098339796,
+        -0.025201082229614258,
+        0.005132562946528196,
+        0.08668412268161774,
+        0.0035341952461749315,
+        0.008583099581301212,
+        0.032979920506477356,
+        0.03324040770530701,
+        0.04411708936095238,
+        -0.008390798233449459,
+        0.040486790239810944,
+        -0.059673551470041275,
+        0.02003314346075058,
+        -0.0990666076540947,
+        0.03971675783395767,
+        0.012021057307720184,
+        0.0017271327087655663,
+        0.01818535290658474,
+        0.0025106174871325493,
+        0.043714240193367004,
+        0.019146842882037163,
+        -0.0041794623248279095,
+        0.033447377383708954,
+        0.06863203644752502,
+        -0.004350902978330851,
+        0.0113364327698946,
+        -0.05825724080204964,
+        -0.04649435728788376,
+        -0.10618306696414948,
+        0.02653644233942032,
+        0.012514552101492882,
+        0.019399365410208702,
+        -0.0022177041973918676,
+        0.017741208896040916,
+        0.04115311801433563,
+        0.05122101679444313,
+        0.055051617324352264,
+        0.01687677949666977,
+        -0.03698579967021942,
+        0.10053858160972595,
+        -0.007528421934694052,
+        0.003968802746385336,
+        0.02458524890244007,
+        -0.02144794538617134,
+        0.026791265234351158,
+        -0.016701897606253624,
+        0.014119372703135014,
+        -0.03460531681776047,
+        -0.02320348098874092,
+        0.056146953254938126,
+        0.028700685128569603,
+        -0.14820916950702667,
+        -0.016996873542666435,
+        0.025667931884527206,
+        0.08408629894256592,
+        0.00034475952270440757,
+        0.007573155220597982,
+        0.06784884631633759,
+        0.025982951745390892,
+        -0.08363039791584015,
+        -0.015748541802167892,
+        -0.0029514851048588753,
+        -0.01523523684591055,
+        0.10500328987836838,
+        0.3070858418941498,
+        -0.024624783545732498,
+        0.0058471946977078915,
+        -0.039751242846250534,
+        0.0012745993444696069,
+        -0.0796508714556694,
+        0.024727927520871162,
+        0.056764136999845505,
+        -0.013338261283934116,
+        -0.04794292524456978,
+        -0.02609768509864807,
+        -0.010784422047436237,
+        -0.048712026327848434,
+        0.020345501601696014,
+        0.0021618579048663378,
+        -0.0021724768448621035,
+        0.03056410700082779,
+        -0.01633712649345398,
+        -0.47168225049972534,
+        -0.014639903791248798,
+        -0.012550815008580685,
+        0.03358187526464462,
+        0.07889427989721298,
+        -0.03615899011492729,
+        -0.002809660043567419,
+        -0.006953644100576639,
+        0.02024337276816368,
+        -0.0738825723528862,
+        -0.006984011270105839,
+        -0.04472561925649643,
+        -0.027498915791511536,
+        0.07207506150007248,
+        -0.09166522324085236,
+        -0.008861960843205452,
+        0.05264359340071678,
+        0.01889069564640522,
+        -0.1380404680967331,
+        -0.010141258127987385,
+        0.015403619967401028,
+        -0.16416165232658386,
+        -0.03529815003275871,
+        0.042106859385967255,
+        0.11173021793365479,
+        -0.3143587112426758,
+        0.011045016348361969,
+        0.0012351945042610168,
+        0.03840603306889534,
+        0.0685538575053215,
+        -0.000746160454582423,
+        -0.028142500668764114,
+        0.027154160663485527,
+        0.005731801502406597,
+        0.04433267563581467,
+        -0.8158469796180725,
+        0.02226361259818077,
+        -0.07650655508041382,
+        0.026958195492625237,
+        -0.005810025613754988,
+        -0.020102059468626976,
+        -0.0019310436910018325,
+        0.07697021961212158,
+        -0.057701658457517624,
+        0.05954534560441971,
+        0.0027106746565550566,
+        -0.06311310827732086,
+        0.011713752523064613,
+        -0.0034454476553946733,
+        -0.0006881420267745852,
+        0.08937360346317291,
+        -0.0008253820124082267,
+        -0.031066063791513443,
+        -0.14708301424980164,
+        -0.04438449814915657,
+        0.004772413522005081,
+        0.05992274731397629,
+        0.07473544776439667,
+        -0.1784757375717163,
+        -0.19057415425777435,
+        -0.014637955464422703,
+        -0.24898527562618256,
+        0.13606221973896027,
+        -0.018039124086499214,
+        -0.047193415462970734,
+        -0.06526428461074829,
+        0.04075757786631584,
+        0.049901530146598816,
+        -0.008585861884057522,
+        0.01616351678967476,
+        -3.091737016802654e-05,
+        0.024283329024910927,
+        0.008861682377755642,
+        -0.0005823548417538404,
+        0.0997646301984787,
+        0.051001910120248795,
+        0.009473294951021671,
+        -0.0032046104315668344,
+        0.018362928181886673,
+        0.008627718314528465,
+        -0.4148157835006714,
+        -0.016077928245067596,
+        0.0745391696691513,
+        0.00724065862596035,
+        0.08948155492544174,
+        0.11626332253217697,
+        -0.052439428865909576,
+        0.005599102005362511,
+        0.002622961765155196,
+        0.07586965709924698,
+        0.03274847939610481,
+        -0.02099076844751835,
+        -0.04666733741760254,
+        -0.0013019372709095478,
+        0.04945925995707512,
+        0.11393380910158157,
+        0.006346395239233971,
+        0.04721064493060112,
+        0.010331138968467712,
+        0.08918803185224533,
+        0.04288423806428909,
+        -0.09234773367643356,
+        0.020141584798693657,
+        -3.256054696976207e-05,
+        -0.02799108810722828,
+        0.018966441974043846,
+        -0.4136410355567932,
+        -0.07217283546924591,
+        0.01840362884104252,
+        -0.055327851325273514,
+        0.003275467548519373,
+        -0.017174070701003075,
+        -0.032178670167922974,
+        0.09021560847759247,
+        -0.524413526058197,
+        0.01994725503027439,
+        0.10380692034959793,
+        -0.01043684035539627,
+        -0.00011200909648323432,
+        0.01331041194498539,
+        0.020127851516008377,
+        -0.025159789249300957,
+        0.05252581834793091,
+        0.04759140685200691,
+        0.0032084162812680006,
+        -0.03579062595963478,
+        0.054719552397727966,
+        -0.04674411937594414,
+        0.028389262035489082,
+        0.001127603929489851,
+        -0.0006243048119358718,
+        -0.00550495833158493,
+        -0.022523507475852966,
+        -0.024282312020659447,
+        0.009519628249108791,
+        -0.39908328652381897,
+        -0.009265545755624771,
+        -0.00037090369733050466,
+        0.06425131112337112,
+        -0.05998316407203674,
+        -0.015221518464386463,
+        -0.004825026262551546,
+        0.11847284436225891,
+        -0.011302731931209564,
+        -0.006884834263473749,
+        -0.04678218811750412,
+        -0.012078279629349709,
+        0.021638741716742516,
+        -0.016819776967167854,
+        -0.009127719327807426,
+        -0.002491263672709465,
+        0.0016752213705331087,
+        -0.016600262373685837,
+        0.011772023513913155,
+        -0.013447183184325695,
+        -0.020662957802414894,
+        -0.011593316681683064,
+        0.008270744234323502,
+        -0.0026990456972271204,
+        -0.004406482446938753,
+        -0.023110052570700645,
+        -0.00208942755125463,
+        -0.1711198389530182,
+        0.012432538904249668,
+        -0.0045453268103301525,
+        0.024807902052998543,
+        -0.0035043740645051003,
+        -0.004001997876912355,
+        -0.013488625176250935,
+        -0.02020987868309021,
+        -0.01216109935194254,
+        -0.004432092886418104,
+        0.09323672950267792,
+        -0.015641510486602783,
+        -0.019307948648929596,
+        0.01117538008838892,
+        -0.01422040443867445,
+        0.01705607771873474,
+        -0.0029596879612654448,
+        -0.0021530911326408386,
+        -0.006551788654178381,
+        0.00429268553853035,
+        -0.1620807945728302,
+        -0.014128226786851883,
+        -0.005428737495094538,
+        -0.006771362852305174,
+        0.005730633158236742,
+        0.0007243106956593692,
+        0.0024031582288444042,
+        -0.00199915561825037,
+        0.006133859045803547,
+        -0.013380909338593483,
+        0.00733462069183588,
+        -0.001863821060396731,
+        -0.0020169683266431093,
+        -0.014070986770093441,
+        -0.006501683499664068,
+        -0.029421553015708923,
+        0.0009377509704791009,
+        -0.01718256250023842,
+        -0.05819401144981384,
+        -0.018859732896089554,
+        0.0010356366401538253,
+        0.006394123658537865,
+        -0.021985618397593498,
+        -0.01204769592732191,
+        -0.002014884725213051,
+        -0.019398409873247147,
+        -0.013122898526489735,
+        -0.017277296632528305,
+        -0.002270353492349386,
+        -0.05294327810406685,
+        -0.020317314192652702,
+        -0.018196573480963707,
+        -0.010375416837632656,
+        -0.019704729318618774,
+        -0.016109557822346687,
+        -0.0167380403727293,
+        -0.0285252146422863,
+        -0.02665277197957039,
+        -0.03554505482316017,
+        -0.00741522666066885,
+        -0.013580105267465115,
+        -0.026335405185818672,
+        -0.011694515123963356,
+        -0.004639182705432177,
+        -0.03996071219444275,
+        -0.022463932633399963,
+        -0.007204636000096798,
+        -0.021065134555101395,
+        -0.014410646632313728,
+        0.0035447971895337105,
+        -0.0013098351191729307,
+        -0.024171002209186554,
+        0.00047751085367053747,
+        -0.01870289072394371,
+        -0.06016797944903374,
+        -0.025703946128487587,
+        -0.009730588644742966,
+        -0.021792838349938393,
+        -0.024519823491573334,
+        -0.01843440905213356,
+        -0.0016325484029948711,
+        -0.008116388693451881,
+        -0.017774557694792747,
+        -0.04375867918133736,
+        -0.03893980756402016,
+        -0.018188582733273506,
+        -0.007122726645320654,
+        -0.028115490451455116,
+        -0.01821342669427395,
+        -0.01011319737881422,
+        -0.02616124413907528,
+        -0.013797983527183533,
+        -0.03202736750245094,
+        -0.030110370367765427,
+        -0.01883666031062603,
+        -0.01185502391308546,
+        -0.006012012716382742,
+        -0.017311619594693184,
+        -0.022577986121177673,
+        -0.02101938985288143,
+        0.0025952248834073544,
+        -0.005058783106505871,
+        -0.004162575118243694,
+        -0.01559755764901638,
+        -0.017923563718795776,
+        -0.04231095686554909,
+        -0.017630560323596,
+        -0.011938830837607384,
+        -0.01587115228176117,
+        0.004972478374838829,
+        -0.016601158306002617,
+        0.15419845283031464,
+        0.0009241115767508745,
+        0.051028184592723846,
+        0.008128340356051922,
+        -0.019917558878660202,
+        -0.0010339801665395498,
+        0.022349294275045395,
+        -0.0072520882822573185,
+        0.0017750378465279937,
+        -0.10526080429553986,
+        0.03420695662498474,
+        0.019183926284313202,
+        -0.0006544998032040894,
+        -0.0032203509472310543,
+        -0.01216941885650158,
+        -0.03561796247959137,
+        0.024905826896429062,
+        -0.026948239654302597,
+        -0.01913355104625225,
+        -0.014459407888352871,
+        0.006972283590584993,
+        -0.033184293657541275,
+        0.04884861409664154,
+        -0.002296984428539872,
+        -0.19194477796554565,
+        0.00392142403870821,
+        0.009490449912846088,
+        -0.02687196619808674,
+        -0.06327224522829056,
+        -0.03684951737523079,
+        -0.0002613202668726444,
+        -0.012086644768714905,
+        0.03630973398685455,
+        0.007296048104763031,
+        0.011186012998223305,
+        0.0074085514061152935,
+        -0.020394617691636086,
+        -0.010585476644337177,
+        -0.030289918184280396,
+        0.0773506686091423,
+        0.008841303177177906,
+        0.019423579797148705,
+        0.001184571417979896,
+        0.005553434602916241,
+        0.015373414382338524,
+        -0.0027953842654824257,
+        0.013204757124185562,
+        0.029097743332386017,
+        0.012627501040697098,
+        0.02102004364132881,
+        -0.09469914436340332,
+        -0.023324014618992805,
+        0.029243655502796173,
+        0.002979277865961194,
+        -0.004492263309657574,
+        0.20549021661281586,
+        -0.3244459927082062,
+        0.025892559438943863,
+        0.009620796889066696,
+        -0.05520407855510712,
+        -0.02271144650876522,
+        0.008378816768527031,
+        -0.0671214610338211,
+        -0.016056722030043602,
+        -0.02355658821761608,
+        0.0005429868469946086,
+        -0.007960098795592785,
+        0.02513299137353897,
+        -0.13005328178405762,
+        -0.0025323680602014065,
+        -0.02197088487446308,
+        -0.02404806576669216,
+        0.08261960744857788,
+        0.17078880965709686,
+        0.02880753017961979,
+        -0.03642067685723305,
+        0.021994341164827347,
+        -0.012368184514343739,
+        -0.10681373625993729,
+        0.16371481120586395,
+        0.17881983518600464,
+        -0.10202010720968246,
+        -0.08641688525676727,
+        -0.1259487271308899,
+        0.06907707452774048,
+        0.023792706429958344,
+        -0.02534419298171997,
+        0.016984017565846443,
+        -0.06743635982275009,
+        0.08445960283279419,
+        -0.08037827908992767,
+        -0.11935994029045105,
+        -0.31716489791870117,
+        -0.01860150322318077,
+        0.060669515281915665,
+        -0.06137414649128914,
+        0.09878886491060257,
+        0.01794014871120453,
+        0.12382296472787857,
+        -0.016424886882305145,
+        0.09045679122209549,
+        -0.02998783066868782,
+        -0.00972777884453535,
+        -0.024124544113874435,
+        0.09879253059625626,
+        0.05500243604183197,
+        -0.06635259836912155,
+        0.11268552392721176,
+        0.011751363053917885,
+        -0.04690232127904892,
+        -0.025168607011437416,
+        0.088335320353508,
+        -0.1140628531575203,
+        0.04129032790660858,
+        -0.04258979484438896,
+        -0.0903872698545456,
+        0.008473021909594536,
+        -0.026690304279327393,
+        -0.051559556275606155,
+        -0.05481572076678276,
+        -0.05251916125416756,
+        -0.0018165932269766927,
+        0.09836867451667786,
+        0.0054859439842402935,
+        0.06432581692934036,
+        0.10621821135282516,
+        -0.019325286149978638,
+        -0.028727786615490913,
+        0.014013150706887245,
+        -0.008022608235478401,
+        -0.006281842477619648,
+        -0.0297000203281641,
+        0.01525485422462225,
+        -0.4346403479576111,
+        0.07787995040416718,
+        -0.25380268692970276,
+        0.05261845141649246,
+        0.010875157080590725,
+        0.0014149334747344255,
+        0.05021188035607338,
+        -0.24382442235946655,
+        0.0807114690542221,
+        0.022907381877303123,
+        0.006440790370106697,
+        -0.017028095200657845,
+        0.001552293193526566,
+        0.05961666256189346,
+        -0.14113056659698486,
+        0.03398876264691353,
+        -0.005411976482719183,
+        -0.014025667682290077,
+        -0.5433799624443054,
+        0.019015472382307053,
+        0.04091138765215874,
+        0.05059061944484711,
+        0.0274446289986372,
+        -0.010288042947649956,
+        -0.001335533568635583,
+        -0.013533512130379677,
+        0.018798377364873886,
+        -0.04099345579743385,
+        0.0031264263670891523,
+        -0.21071769297122955,
+        -0.014384736306965351,
+        -0.1045387014746666,
+        -0.014340974390506744,
+        0.001986369490623474,
+        -0.04118456318974495,
+        -0.10952988266944885,
+        0.049147430807352066,
+        -0.08382093161344528,
+        -0.1741400957107544,
+        -0.0885215476155281,
+        -0.10934099555015564,
+        0.05553343519568443,
+        0.02434251271188259,
+        0.006634524557739496,
+        -0.0017163373995572329,
+        0.0185443926602602,
+        0.06250902265310287,
+        -0.17145656049251556,
+        -0.07543934881687164,
+        0.026583310216665268,
+        0.01634727604687214,
+        0.003603539662435651,
+        -0.2817271649837494,
+        0.03882112354040146,
+        0.011341865174472332,
+        0.00826666783541441,
+        0.050427842885255814,
+        -0.22358834743499756,
+        0.06419781595468521,
+        0.03245265409350395,
+        -0.04503164440393448,
+        -0.023194484412670135,
+        -0.027968740090727806,
+        0.08563586324453354,
+        0.07954753190279007,
+        -0.08513130992650986,
+        0.02850884199142456,
+        0.008976672776043415,
+        0.07886530458927155,
+        0.0022273347713053226,
+        -0.09540755301713943,
+        0.032016951590776443,
+        -0.05196075513958931,
+        0.10555616766214371,
+        0.07629868388175964,
+        0.039732079952955246,
+        -0.0029798501636832952,
+        0.014692343771457672,
+        0.09200941026210785,
+        -0.04299614951014519,
+        -0.023488566279411316,
+        -0.01851060427725315,
+        0.09257487207651138,
+        0.055612049996852875,
+        0.06423109769821167,
+        -0.28587806224823,
+        -0.09950444847345352,
+        0.10397437959909439,
+        0.025166453793644905,
+        -0.03235514089465141,
+        -0.033381711691617966,
+        0.1513858139514923,
+        0.06468874961137772,
+        0.01928441785275936,
+        0.0032701045274734497,
+        -0.0579083226621151,
+        -0.022929169237613678,
+        0.012971373274922371,
+        -0.018524186685681343,
+        -0.06484643369913101,
+        0.012233717367053032,
+        0.06590451300144196,
+        -0.04558677598834038,
+        0.05253027006983757,
+        0.048656731843948364,
+        -0.2288871705532074,
+        0.037114787846803665,
+        -0.20519588887691498,
+        0.0058607361279428005,
+        -0.002009372925385833,
+        -0.006671734619885683,
+        -0.07107856124639511,
+        -0.07407436519861221,
+        0.03941629081964493,
+        0.0447598397731781,
+        0.03509354963898659,
+        -0.061107732355594635,
+        -0.09305761009454727,
+        -0.012180411256849766,
+        0.04902016744017601,
+        0.07974442094564438,
+        -0.016854410991072655,
+        0.005089411046355963,
+        -0.08127597719430923,
+        0.03258403390645981,
+        0.039813362061977386,
+        -0.01668727956712246,
+        0.027226485311985016,
+        -0.029213925823569298,
+        -0.008598217740654945,
+        0.00931101106107235,
+        0.026936721056699753,
+        -0.03083401545882225,
+        -0.05799110606312752,
+        -0.008277476765215397,
+        -0.014854338951408863,
+        -0.20012643933296204,
+        0.012290815822780132,
+        0.007194168865680695,
+        0.06858328729867935,
+        -0.3296163082122803,
+        -0.11424986273050308,
+        0.009912200272083282,
+        -0.06211454048752785,
+        0.0007546336855739355,
+        0.03507614880800247,
+        0.10649498552083969,
+        -0.03036407195031643,
+        0.0646015852689743,
+        -0.01595110446214676,
+        -0.16919563710689545,
+        0.0013865949586033821,
+        -0.08339446783065796,
+        0.06962471455335617,
+        0.016058098524808884,
+        -0.04729780554771423,
+        0.010602935217320919,
+        0.01470863912254572,
+        0.06903938204050064,
+        0.014901719056069851,
+        -0.15120048820972443,
+        0.016727851703763008,
+        0.05003673583269119,
+        0.04370126873254776,
+        0.029703885316848755,
+        0.021875420585274696,
+        0.026293285191059113,
+        -0.01048936415463686,
+        0.00040810942300595343,
+        -0.015616541728377342,
+        -0.062451593577861786,
+        0.010016348212957382,
+        -0.06790193170309067,
+        -0.02077331207692623,
+        0.007985175587236881,
+        -0.04435744881629944,
+        0.06920231133699417,
+        0.018344474956393242,
+        0.028591370210051537,
+        0.021957838907837868,
+        0.0017570338677614927,
+        0.036665257066488266,
+        0.015438515692949295,
+        -0.0006347382441163063,
+        0.04621066153049469,
+        -0.001942177303135395,
+        0.010664877481758595,
+        -0.016754357144236565,
+        0.006541184149682522,
+        -0.027716301381587982,
+        -0.0058586387895047665,
+        -0.005346015095710754,
+        0.020482052117586136,
+        0.06882552057504654,
+        0.0026622572913765907,
+        0.016321638599038124,
+        0.017728103324770927,
+        -0.13356441259384155,
+        0.030281176790595055,
+        1.0354949154134374e-05,
+        0.050639618188142776,
+        0.0013030078262090683,
+        -0.11136802285909653,
+        -0.006832807790488005,
+        -0.09628921747207642,
+        0.046699415892362595,
+        0.002175685251131654,
+        0.008100612089037895,
+        0.012449901551008224,
+        -0.01713990420103073,
+        -0.000769267207942903,
+        0.022544430568814278,
+        -0.0018787183798849583,
+        -0.014189678244292736,
+        0.37042510509490967,
+        -0.030317893251776695,
+        0.012663356028497219,
+        -0.04071582853794098,
+        0.01653047651052475,
+        0.06578584760427475,
+        0.005606585182249546,
+        0.0029362838249653578,
+        -0.02035594917833805,
+        0.016131827607750893,
+        -0.06512665003538132,
+        0.020292088389396667,
+        0.12818951904773712,
+        -0.00017647731874603778,
+        0.0004811069811694324,
+        0.013025660999119282,
+        -0.006004344671964645,
+        0.011330580338835716,
+        0.0021733916364610195,
+        -0.0026290342211723328,
+        0.008579215034842491,
+        -0.017107143998146057,
+        0.0032798980828374624,
+        0.21415431797504425,
+        -0.011049880646169186,
+        0.04915957152843475,
+        -0.01152863260358572,
+        0.01988764852285385,
+        -0.30189022421836853,
+        0.1491061896085739,
+        0.022540517151355743,
+        0.02323656715452671,
+        -0.0028044115751981735,
+        -0.02501249685883522,
+        0.0016759912250563502,
+        0.023405946791172028,
+        0.0865691602230072,
+        0.0056661744602024555,
+        0.2334042638540268,
+        -0.05771901085972786,
+        0.03428330272436142,
+        -0.05191519856452942,
+        0.025708407163619995,
+        -0.11474912613630295,
+        0.05345827341079712,
+        0.050046734511852264,
+        -0.03785427287220955,
+        0.02726786397397518,
+        0.008640051819384098,
+        -0.05810163915157318,
+        0.19147679209709167,
+        0.12065602838993073,
+        -0.08667072653770447,
+        -0.12831886112689972,
+        0.027053257450461388,
+        -0.1771622896194458,
+        -0.2615586817264557,
+        0.112942636013031,
+        0.002398239215835929,
+        0.00907410029321909,
+        0.059947770088911057,
+        0.040937639772892,
+        0.003431124845519662,
+        0.012721046805381775,
+        -0.10228776186704636,
+        0.04169567674398422,
+        -0.04826785624027252,
+        -0.021415220573544502,
+        0.027615519240498543,
+        0.16087181866168976,
+        0.03552674129605293,
+        -0.36409878730773926,
+        0.0015418739058077335,
+        0.03940089792013168,
+        -0.12929502129554749,
+        0.017082052305340767,
+        -0.07193783670663834,
+        0.10395099222660065,
+        -0.2240910828113556,
+        -0.003303584409877658,
+        -0.0074868109077215195,
+        -0.13708709180355072,
+        0.2098008245229721,
+        0.013808795250952244,
+        -0.03606148064136505,
+        0.001965852687135339,
+        0.04186573252081871,
+        0.02105732634663582,
+        -0.11873909085988998,
+        -0.08529136329889297,
+        0.0060731275007128716,
+        0.04803553968667984,
+        0.07665349543094635,
+        0.026997262611985207,
+        0.05191565304994583,
+        0.09013131260871887,
+        0.013081093318760395,
+        0.04667182266712189,
+        -0.19899451732635498,
+        0.004642056301236153,
+        0.0025570227298885584,
+        -0.2640555500984192,
+        0.008254006505012512,
+        0.05971720814704895,
+        -0.002980671590194106,
+        0.0011313167633488774,
+        -0.004445134196430445,
+        0.01951296627521515,
+        -0.006634386721998453,
+        -0.008033698424696922,
+        0.012400158680975437,
+        -0.15906694531440735,
+        0.007047838997095823,
+        0.0003521084145177156,
+        -0.00517050176858902,
+        -0.0003226286207791418,
+        -0.01226231548935175,
+        -0.06750697642564774,
+        -0.03061128593981266,
+        -0.0027100055012851954,
+        0.004726986400783062,
+        0.010185977444052696,
+        0.021205933764576912,
+        -0.05105980113148689,
+        -0.006725164130330086,
+        0.26042309403419495,
+        0.003935054875910282,
+        0.009450466372072697,
+        -0.009512278251349926,
+        0.036205559968948364,
+        0.0066987741738557816,
+        0.05687355250120163,
+        -0.0070350514724850655,
+        0.021287698298692703,
+        0.004246287513524294,
+        -0.004053668584674597,
+        0.0030501342844218016,
+        -0.003596516093239188,
+        0.00571554945781827,
+        0.039099883288145065,
+        0.06648323684930801,
+        0.011140268296003342,
+        0.002779693342745304,
+        0.0004113377653993666,
+        0.0019621821120381355,
+        0.002047213725745678,
+        -9.034215327119455e-05,
+        0.006674906238913536,
+        -0.024464793503284454,
+        4.372629337012768e-05,
+        0.04560312256217003,
+        0.029951298609375954,
+        0.0053787860088050365,
+        0.010052027180790901,
+        0.0018156497972086072,
+        0.001613074098713696,
+        -0.3710610568523407,
+        0.18385423719882965,
+        0.0197732076048851,
+        -2.409513217571657e-05,
+        0.043657880276441574,
+        0.029824273660779,
+        -0.0015272254822775722,
+        -0.0009817760437726974,
+        0.030571524053812027,
+        0.05133187025785446,
+        0.021092001348733902,
+        -0.022430723533034325,
+        -0.011050102300941944,
+        -0.01653454266488552,
+        0.00856624636799097,
+        0.007617316208779812,
+        0.023697074502706528,
+        -0.00541776092723012,
+        -0.06940567493438721,
+        -0.024501511827111244,
+        0.0029131292831152678,
+        0.005110545549541712,
+        0.02394089475274086,
+        0.009317552670836449,
+        -0.05198051407933235,
+        -0.14872707426548004,
+        -0.03553030639886856,
+        0.05354774370789528,
+        0.053996339440345764,
+        0.016679847612977028,
+        -0.4505158066749573,
+        0.006403166800737381,
+        -0.014287465251982212,
+        0.010499212890863419,
+        0.00510875741019845,
+        0.0230255089700222,
+        -0.04791099205613136,
+        -0.08405473828315735,
+        -0.00807158276438713,
+        -0.016310568898916245,
+        -0.018034789711236954,
+        -0.03381670266389847,
+        0.038599055260419846,
+        0.01189411524683237,
+        0.0038598189130425453,
+        0.0077203805558383465,
+        -0.0006835742969997227,
+        0.3038807809352875,
+        0.00930703990161419,
+        -0.017654214054346085,
+        -0.029550395905971527,
+        0.0014829621650278568,
+        -0.010562432929873466,
+        -0.011867706663906574,
+        -0.008104459382593632,
+        0.008003979921340942,
+        -0.028282882645726204,
+        0.00898829661309719,
+        -0.04963170364499092,
+        0.014971665106713772,
+        0.028662119060754776,
+        0.055792808532714844,
+        0.018142173066735268,
+        0.029526766389608383,
+        0.04726170003414154,
+        0.020290115848183632,
+        -0.01347910612821579,
+        -0.027794860303401947,
+        -0.033374592661857605,
+        0.05699307844042778,
+        -0.005888971965759993,
+        0.009723466821014881,
+        0.011825029738247395,
+        0.0005665962235070765,
+        -0.22433574497699738,
+        0.04777664318680763,
+        0.054696254432201385,
+        0.06447272002696991,
+        0.006656138692051172,
+        -0.2656468152999878,
+        -0.006602808367460966,
+        -0.04309352487325668,
+        0.024392882362008095,
+        -0.046948980540037155,
+        0.17317010462284088,
+        -0.014694501645863056,
+        0.09150391072034836,
+        0.05414793640375137,
+        -0.0034523033536970615,
+        -0.029682809486985207,
+        -0.11646991223096848,
+        0.036394182592630386,
+        -0.008510537445545197,
+        -0.09555189311504364,
+        0.012331446632742882,
+        0.022554755210876465,
+        0.037040166556835175,
+        0.011939534917473793,
+        -0.035405583679676056,
+        -0.008284371346235275,
+        0.008629710413515568,
+        -0.0017152110813185573,
+        -0.01656493730843067,
+        0.02205522358417511,
+        -0.008015291765332222,
+        -0.02198217809200287,
+        -0.08165504783391953,
+        0.018647879362106323,
+        0.010489191859960556,
+        0.0009643095545470715,
+        0.08301698416471481,
+        0.00795030314475298,
+        -0.08973152935504913,
+        0.05324552580714226,
+        0.0187348835170269,
+        0.00770497927442193,
+        0.016434336081147194,
+        0.0031714467331767082,
+        0.031489044427871704,
+        -0.01682765781879425,
+        -0.0006042059976607561,
+        0.006229344755411148,
+        0.0031935630831867456,
+        -0.03694210946559906,
+        -0.027148112654685974,
+        0.03319454565644264,
+        0.013541879132390022,
+        0.04362545907497406,
+        0.010766182094812393,
+        0.01287879142910242,
+        0.02723391354084015,
+        0.01831277459859848,
+        -0.0028144901152700186,
+        0.0317537821829319,
+        -0.05053209140896797,
+        0.03341667726635933,
+        0.009338690899312496,
+        0.030376508831977844,
+        0.028512636199593544,
+        0.002190604107454419,
+        0.031132254749536514,
+        0.04174429178237915,
+        0.025147251784801483,
+        0.02602408640086651,
+        0.022863827645778656,
+        0.024160150438547134,
+        0.04043813422322273,
+        0.011693909764289856,
+        0.008020071312785149,
+        0.010814648121595383,
+        0.014862221665680408,
+        0.043966785073280334,
+        0.04133215174078941,
+        0.03920775279402733,
+        0.02128027193248272,
+        -0.0024078795686364174,
+        0.03185494989156723,
+        0.030951442196965218,
+        0.008766901679337025,
+        -0.0013500713976100087,
+        0.012680909596383572,
+        0.01911563239991665,
+        0.02226334996521473,
+        0.03873631730675697,
+        0.005242412444204092,
+        0.02335301972925663,
+        0.00577192846685648,
+        0.0019918885082006454,
+        0.019501060247421265,
+        0.048295676708221436,
+        0.027288099750876427,
+        0.03500128164887428,
+        0.032504353672266006,
+        0.03619033470749855,
+        0.022762063890695572,
+        0.014124974608421326,
+        0.04055529460310936,
+        0.040181197226047516,
+        0.04843837395310402,
+        0.019578352570533752,
+        0.04370861127972603,
+        0.024640914052724838,
+        0.027013463899493217,
+        0.04700532928109169,
+        0.018523193895816803,
+        0.03569294884800911,
+        0.031140455976128578,
+        0.010298499837517738,
+        0.03979840502142906,
+        0.015059049241244793,
+        0.020604899153113365,
+        0.010335667058825493,
+        0.02557498589158058,
+        0.015946611762046814,
+        0.018900645896792412,
+        0.05494159087538719,
+        0.015756357461214066,
+        0.0452926866710186,
+        0.04820817708969116,
+        -0.0183499027043581,
+        0.04002442955970764,
+        -0.08226092159748077,
+        -0.034417178481817245,
+        0.059122342616319656,
+        0.028960591182112694,
+        -0.020427608862519264,
+        -0.043222296983003616,
+        0.023134637624025345,
+        -0.014232538640499115,
+        -0.06970997899770737,
+        -0.0035826240200549364,
+        -0.015384080819785595,
+        -0.0695020854473114,
+        0.03645527362823486,
+        0.013986784033477306,
+        -0.027729706838726997,
+        -0.05711805075407028,
+        -0.0763891413807869,
+        -0.16338491439819336,
+        -0.02358265034854412,
+        -0.004730133805423975,
+        0.022057903930544853,
+        -0.011578230187296867,
+        0.040772147476673126,
+        -0.059327173978090286,
+        -0.03819728270173073,
+        -0.050089117139577866,
+        -0.005152902565896511,
+        -0.3071111738681793,
+        -0.010683669708669186,
+        0.030922774225473404,
+        0.08924981951713562,
+        0.005679265595972538,
+        0.06334424018859863,
+        0.016136568039655685,
+        -0.02575727365911007,
+        -0.012562219053506851,
+        0.007206748705357313,
+        -0.1373208612203598,
+        -0.010450832545757294,
+        -0.05991309881210327,
+        -0.006700845435261726,
+        -0.006468744482845068,
+        -0.02040017955005169,
+        -0.010068708099424839,
+        0.008442427963018417,
+        0.012259873561561108,
+        -0.002103718463331461,
+        -0.019605906680226326,
+        -0.010690353810787201,
+        0.0005222380859777331,
+        -0.015031278133392334,
+        -0.012983204796910286,
+        -0.03552224859595299,
+        -0.007792052812874317,
+        -0.035602111369371414,
+        -0.03479204699397087,
+        -0.02480080910027027,
+        -0.05733964219689369,
+        4.38804054283537e-05,
+        -0.021825626492500305,
+        -0.03287259489297867,
+        -0.05437042564153671,
+        -0.007981077767908573,
+        0.023045696318149567,
+        0.05785335600376129,
+        0.03685669228434563,
+        0.04314129799604416,
+        -0.005843586288392544,
+        -0.024806369096040726,
+        -0.02562016434967518,
+        0.0015172295970842242,
+        -0.01568800024688244,
+        -0.005925294477492571,
+        0.010173594579100609,
+        0.06834683567285538,
+        0.024159085005521774,
+        -0.009547322988510132,
+        0.014080812223255634,
+        0.013578452169895172,
+        0.035671167075634,
+        0.01240566186606884,
+        -0.021352441981434822,
+        0.05245270952582359,
+        -0.008943279273808002,
+        -0.010131126269698143,
+        0.02976749651134014,
+        0.0600045844912529,
+        0.0014893191400915384,
+        0.03796907886862755,
+        0.01258794590830803,
+        -0.025344882160425186,
+        0.14140591025352478,
+        0.028354406356811523,
+        0.0035325682256370783,
+        0.05017172172665596,
+        0.01994139887392521,
+        0.03679897263646126,
+        -0.009579945355653763,
+        -0.012607194483280182,
+        -0.00034231581958010793,
+        0.00046832446241751313,
+        0.057916246354579926,
+        0.02351403795182705,
+        0.06157909706234932,
+        0.00789523497223854,
+        -0.018361341208219528,
+        0.0018971840618178248,
+        -0.007180131506174803,
+        -0.0010631990153342485,
+        -0.03140748664736748,
+        -0.028505641967058182,
+        0.010669395327568054,
+        -0.036474280059337616,
+        0.01703447848558426,
+        0.04667484760284424,
+        -0.007303370162844658,
+        0.01768752932548523,
+        0.012412219308316708,
+        0.013702306896448135,
+        0.07651616632938385,
+        0.05469715967774391,
+        0.013292597606778145,
+        -0.006288900971412659,
+        0.0215559434145689,
+        0.010094149969518185,
+        -0.024216346442699432,
+        -0.15225785970687866,
+        0.05467289313673973,
+        0.019871067255735397,
+        0.04662928730249405,
+        0.05072600021958351,
+        -0.011824453249573708,
+        -0.028083933517336845,
+        0.013322187587618828,
+        -0.044827401638031006,
+        0.05955006927251816,
+        -0.006152187939733267,
+        0.013426700606942177,
+        -0.014220507815480232,
+        0.022510837763547897,
+        0.019426455721259117,
+        -0.05546477064490318,
+        -0.49202534556388855,
+        0.026985207572579384,
+        -0.08852843940258026,
+        0.07166163623332977,
+        0.05509938299655914,
+        -0.42284780740737915,
+        -0.05131356418132782,
+        0.0196990966796875,
+        -0.008681846782565117,
+        0.02739996463060379,
+        0.0010900507913902402,
+        0.04289104416966438,
+        -0.06694932281970978,
+        0.05930810049176216,
+        -0.02174360118806362,
+        0.03464379161596298,
+        0.018284866586327553,
+        0.018807150423526764,
+        0.019874336197972298,
+        -0.03665176033973694,
+        -0.2980017066001892,
+        0.050937239080667496,
+        -0.013874954544007778,
+        -0.0229057464748621,
+        0.016420641914010048,
+        0.024160616099834442,
+        -0.10750921070575714,
+        -0.010134756565093994,
+        0.026874780654907227,
+        0.007151094265282154,
+        0.06304068863391876,
+        -0.11811652034521103,
+        -0.12590888142585754,
+        0.031846947968006134,
+        -0.06898463517427444,
+        0.03395693376660347,
+        -0.00010166154243052006,
+        -0.19019480049610138,
+        0.06616076827049255,
+        -0.035927142947912216,
+        0.08526375889778137,
+        0.0015017242403700948,
+        -0.009137739427387714,
+        0.04529058188199997,
+        -0.23621641099452972,
+        0.02148340456187725,
+        -0.02741178683936596,
+        -0.20779411494731903,
+    ]
+    value = numpy.array(list_value, dtype=numpy.float32).reshape((64, 64, 1, 1))
+    tensor = numpy_helper.from_array(value, name="onnx::Conv_504")
+
+    initializers.append(tensor)
+
+    list_value = [
+        5.195802688598633,
+        0.940099835395813,
+        -7.016428470611572,
+        5.185446739196777,
+        -4.134859085083008,
+        2.0121846199035645,
+        5.215719223022461,
+        3.371406078338623,
+        3.7616095542907715,
+        -3.6593239307403564,
+        15.99945068359375,
+        3.306276321411133,
+        5.790191173553467,
+        6.33050537109375,
+        3.4512906074523926,
+        2.5531861782073975,
+        4.278702259063721,
+        4.350361347198486,
+        8.025779724121094,
+        -2.8830037117004395,
+        2.915111541748047,
+        3.592482805252075,
+        5.810481071472168,
+        3.4743332862854004,
+        3.5245680809020996,
+        1.8243598937988281,
+        8.069726943969727,
+        1.401036024093628,
+        5.110081672668457,
+        -12.873579978942871,
+        10.977816581726074,
+        5.909627437591553,
+        -0.4007779359817505,
+        -20.147268295288086,
+        6.649413585662842,
+        3.325921058654785,
+        5.84471321105957,
+        4.47447395324707,
+        3.754193067550659,
+        -5.167671203613281,
+        3.2778055667877197,
+        -9.067073822021484,
+        2.6243438720703125,
+        1.7002031803131104,
+        5.476454734802246,
+        2.510835647583008,
+        3.856968402862549,
+        2.3172807693481445,
+        12.462139129638672,
+        7.355924129486084,
+        4.140628814697266,
+        4.807559967041016,
+        5.7524309158325195,
+        4.128836154937744,
+        11.4532470703125,
+        -12.482564926147461,
+        5.590144157409668,
+        0.9172697067260742,
+        4.356811046600342,
+        0.9934853315353394,
+        -4.3548994064331055,
+        15.853201866149902,
+        -5.241130828857422,
+        5.9644365310668945,
+    ]
+    value = numpy.array(list_value, dtype=numpy.float32)
+    tensor = numpy_helper.from_array(value, name="onnx::Conv_505")
+
+    initializers.append(tensor)
+
+    # inputs
+
+    inputs.append(make_tensor_value_info("input", 1, ["batch_size", 3, 32, 32]))
+
+    # outputs
+
+    outputs.append(make_tensor_value_info("/layer1/layer1.0/relu/Relu_output_0", 1, ["batch_size", 64, 8, 8]))
+
+    # nodes
+
+    node = make_node(
+        "Conv",
+        ["input", "onnx::Conv_501", "onnx::Conv_502"],
+        ["/conv1/Conv_output_0"],
+        name="/conv1/Conv",
+        dilations=[1, 1],
+        group=1,
+        kernel_shape=[7, 7],
+        pads=[3, 3, 3, 3],
+        strides=[2, 2],
+        domain="",
+    )
+    nodes.append(node)
+
+    node = make_node("Relu", ["/conv1/Conv_output_0"], ["/relu/Relu_output_0"], name="/relu/Relu", domain="")
+    nodes.append(node)
+
+    node = make_node(
+        "MaxPool",
+        ["/relu/Relu_output_0"],
+        ["/maxpool/MaxPool_output_0"],
+        name="/maxpool/MaxPool",
+        ceil_mode=0,
+        kernel_shape=[3, 3],
+        pads=[1, 1, 1, 1],
+        strides=[2, 2],
+        domain="",
+    )
+    nodes.append(node)
+
+    node = make_node(
+        "Conv",
+        ["/maxpool/MaxPool_output_0", "onnx::Conv_504", "onnx::Conv_505"],
+        ["/layer1/layer1.0/conv1/Conv_output_0"],
+        name="/layer1/layer1.0/conv1/Conv",
+        dilations=[1, 1],
+        group=1,
+        kernel_shape=[1, 1],
+        pads=[0, 0, 0, 0],
+        strides=[1, 1],
+        domain="",
+    )
+    nodes.append(node)
+
+    node = make_node(
+        "Relu",
+        ["/layer1/layer1.0/conv1/Conv_output_0"],
+        ["/layer1/layer1.0/relu/Relu_output_0"],
+        name="/layer1/layer1.0/relu/Relu",
+        domain="",
+    )
+    nodes.append(node)
+
+    # opsets
+    opset_imports = [make_opsetid(domain, 1 if version is None else version) for domain, version in opsets.items()]
+
+    # graph
+    graph = make_graph(nodes, "torch_jit", inputs, outputs, initializers)
+    # '7'
+
+    onnx_model = make_model(graph, opset_imports=opset_imports, functions=functions)
+    onnx_model.ir_version = 7
+    onnx_model.producer_name = "pytorch"
+    onnx_model.producer_version = ""
+    onnx_model.domain = ""
+    onnx_model.model_version = 0
+    onnx_model.doc_string = ""
+    set_model_props(onnx_model, {})
+
+    return onnx_model
diff --git a/onnxruntime/test/python/quantization/test_conv_dynamic.py b/onnxruntime/test/python/quantization/test_conv_dynamic.py
index 9578c9fe708aa..18467bcbc1083 100644
--- a/onnxruntime/test/python/quantization/test_conv_dynamic.py
+++ b/onnxruntime/test/python/quantization/test_conv_dynamic.py
@@ -95,7 +95,7 @@ def test_quant_conv(self):
         for use_quant_config in [True, False]:
             self.dynamic_quant_conv_test(QuantType.QUInt8, extra_options={}, use_quant_config=use_quant_config)
 
-    # TODO: uncomment following after ConvInteger s8 supportted
+    # TODO: uncomment following after ConvInteger s8 supported
     # def test_quant_conv_s8s8(self):
     #    self.dynamic_quant_conv_test(QuantType.QInt8, extra_options={'ActivationSymmetric': True})
 
diff --git a/onnxruntime/test/python/quantization/test_minimum_real_range_option.py b/onnxruntime/test/python/quantization/test_minimum_real_range_option.py
new file mode 100644
index 0000000000000..77f95ab9030b2
--- /dev/null
+++ b/onnxruntime/test/python/quantization/test_minimum_real_range_option.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+import unittest
+
+import numpy as np
+import onnx
+from onnx import TensorProto, helper, numpy_helper
+
+from onnxruntime import quantization
+
+
+class TestMinimumRealRangeOption(unittest.TestCase):
+    def setUp(self):
+        self.qdq_model_name = "model_qdq_u8.onnx"
+
+        # Set up activations/weights with zero value ranges (i.e., rmax - rmax == 0).
+        self.zero_range_activations = [
+            np.zeros([1, 2, 32, 32], dtype="float32"),
+        ]
+
+        self.zero_range_weights = np.zeros([1, 2, 2, 2], dtype="float32")
+
+    def perform_quantization(self, activations, weight, min_real_range):
+        # One-layer convolution model to be quantized with uint8 activations and uint8 weights.
+        act = helper.make_tensor_value_info("ACT", TensorProto.FLOAT, activations[0].shape)
+        helper.make_tensor_value_info("WGT", TensorProto.FLOAT, weight.shape)
+        res = helper.make_tensor_value_info("RES", TensorProto.FLOAT, [None, None, None, None])
+        wgt_init = numpy_helper.from_array(weight, "WGT")
+        conv_node = onnx.helper.make_node("Conv", ["ACT", "WGT"], ["RES"])
+        graph = helper.make_graph([conv_node], "test", [act], [res], initializer=[wgt_init])
+        model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 11)])
+        onnx.save(model, "model.onnx")
+
+        # Quantize model
+        class DummyDataReader(quantization.CalibrationDataReader):
+            def __init__(self):
+                self.iterator = ({"ACT": act} for act in activations)
+
+            def get_next(self):
+                return next(self.iterator, None)
+
+        quantization.quantize_static(
+            model_input="model.onnx",
+            model_output=self.qdq_model_name,
+            calibration_data_reader=DummyDataReader(),
+            quant_format=quantization.QuantFormat.QDQ,
+            activation_type=quantization.QuantType.QUInt8,
+            weight_type=quantization.QuantType.QUInt8,
+            op_types_to_quantize=["Conv"],
+            extra_options={"MinimumRealRange": min_real_range},
+        )
+
+        # Extract quantization parameters: scales and zero points for activations and weights.
+        model = onnx.load(self.qdq_model_name)
+        act_zp = next(init for init in model.graph.initializer if init.name == "ACT_zero_point").int32_data[0]
+        act_sc = next(init for init in model.graph.initializer if init.name == "ACT_scale").float_data[0]
+        wgt_zp = next(init for init in model.graph.initializer if init.name == "WGT_zero_point").int32_data[0]
+        wgt_sc = next(init for init in model.graph.initializer if init.name == "WGT_scale").float_data[0]
+
+        # Return quantization parameters
+        return act_zp, act_sc, wgt_zp, wgt_sc
+
+    def test_default(self):
+        """
+        Test default behavior without specifying the MinimumRealRange option.
+        """
+        act_zp, act_sc, wgt_zp, wgt_sc = self.perform_quantization(
+            self.zero_range_activations,
+            self.zero_range_weights,
+            min_real_range=None,  # default behavior
+        )
+
+        # No minimum real range is set. Expect default behavior (scale = 1.0, zp = 0)
+        self.assertEqual(act_zp, 0)
+        self.assertEqual(act_sc, 1.0)
+        self.assertEqual(wgt_zp, 0)
+        self.assertEqual(wgt_sc, 1.0)
+
+    def test_min_real_range(self):
+        """
+        Test a MinimumRealRange value of 0.0001.
+        """
+        min_real_range = 0.0001
+
+        act_zp, act_sc, wgt_zp, wgt_sc = self.perform_quantization(
+            self.zero_range_activations,
+            self.zero_range_weights,
+            min_real_range=min_real_range,
+        )
+
+        expected_scale = np.float32(min_real_range / 255)
+
+        # Minimum floating-point range is set. Expect small scale values.
+        self.assertEqual(act_zp, 0)
+        self.assertEqual(act_sc, expected_scale)
+        self.assertEqual(wgt_zp, 0)
+        self.assertEqual(wgt_sc, expected_scale)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/onnxruntime/test/python/quantization/test_op_gemm.py b/onnxruntime/test/python/quantization/test_op_gemm.py
index 54ef1cc1d5446..bac0f6d48e9fc 100644
--- a/onnxruntime/test/python/quantization/test_op_gemm.py
+++ b/onnxruntime/test/python/quantization/test_op_gemm.py
@@ -192,24 +192,9 @@ def static_quant_test(
         check_qtype_by_node_type(self, model_int8_path, qnode_io_qtypes)
         data_reader.rewind()
         if activation_type_str == "f8e4m3fn" and weight_type_str == "f8e4m3fn":
-            # QGemm is not implemented for CPU.
-            try:
-                check_model_correctness(
-                    self,
-                    model_fp32_path,
-                    model_int8_path,
-                    data_reader.get_next(),
-                    providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
-                    is_gemm=True,
-                )
-            except Exception as e:
-                if (
-                    "Type 'tensor(float8e4m3fn)' of input parameter (input_quantized) of operator (QGemm) in node () is invalid."
-                    in str(e)
-                ):
-                    warnings.warn("Fix this test when QGemm is implemented.")
-                    return
-                raise e
+            # QGemm for float 8 is not implemented. The test should be updated when it is.
+            warnings.warn("Fix this test when QGemm is implemented for float 8 types.")
+            return
         else:
             check_model_correctness(self, model_fp32_path, model_int8_path, data_reader.get_next(), is_gemm=True)
 
diff --git a/onnxruntime/test/python/quantization/test_op_matmul_4bits.py b/onnxruntime/test/python/quantization/test_op_matmul_4bits.py
new file mode 100644
index 0000000000000..02f51cc4fa809
--- /dev/null
+++ b/onnxruntime/test/python/quantization/test_op_matmul_4bits.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+import tempfile
+import unittest
+from importlib.util import find_spec
+from pathlib import Path
+from typing import Dict, Tuple, Union
+
+import numpy as np
+import onnx
+from onnx import TensorProto, helper
+from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count
+
+from onnxruntime.quantization import quant_utils
+
+
+class TestOpMatMul4Bits(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._tmp_model_dir = tempfile.TemporaryDirectory(prefix="test_matmul4bits.")
+
+    @classmethod
+    def tearDownClass(cls):
+        cls._tmp_model_dir.cleanup()
+
+    def fill_int4_data(self, shape: Union[int, Tuple[int, ...]], symmetric: bool) -> np.ndarray:
+        line = np.zeros(shape)
+        line = line.reshape(-1)
+
+        if symmetric:
+            v = -2.0
+            for i in range(line.shape[0]):
+                if v == 0 or v == -3 or v == 3:
+                    v += 1
+                line[i] = v
+                v += 1
+                if v >= 8:
+                    v = -8
+        else:
+            v = 0.0
+            for i in range(line.shape[0]):
+                line[i] = v
+                v += 1
+                if v >= 16:
+                    v = 0
+
+        return line.reshape(shape)
+
+    def input_feeds(self, n: int, name2shape: Dict[str, Union[int, Tuple[int, ...]]]) -> TestDataFeeds:
+        input_data_list = []
+        for _i in range(n):
+            inputs = {}
+            for name, shape in name2shape.items():
+                inputs.update({name: np.random.randint(-1, 2, shape).astype(np.float32)})
+            input_data_list.extend([inputs])
+        dr = TestDataFeeds(input_data_list)
+        return dr
+
+    def construct_model_matmul(self, output_model_path: str, symmetric: bool) -> None:
+        #      (input)
+        #         |
+        #       MatMul
+        #         |
+        #      (output)
+        input_name = "input"
+        output_name = "output"
+        initializers = []
+
+        def make_matmul(input_name, weight_shape: Union[int, Tuple[int, ...]], weight_name: str, output_name: str):
+            weight_data = self.fill_int4_data(weight_shape, symmetric).astype(np.float32)
+            initializers.append(onnx.numpy_helper.from_array(weight_data, name=weight_name))
+            return onnx.helper.make_node(
+                "MatMul",
+                [input_name, weight_name],
+                [output_name],
+            )
+
+        in_features = 52
+        out_features = 288
+        # make MatMul node
+        matmul_node = make_matmul(
+            input_name,
+            [in_features, out_features],
+            "linear1.weight",
+            output_name,
+        )
+
+        # make graph
+        input_tensor = helper.make_tensor_value_info(input_name, TensorProto.FLOAT, [-1, in_features])
+        output_tensor = helper.make_tensor_value_info(output_name, TensorProto.FLOAT, [-1, out_features])
+        graph_name = "matmul_4bits_test"
+        graph = helper.make_graph(
+            [matmul_node],
+            graph_name,
+            [input_tensor],
+            [output_tensor],
+            initializer=initializers,
+        )
+        model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+        model.ir_version = 7  # use stable onnx ir version
+
+        onnx.save(model, output_model_path)
+
+    def quant_test(
+        self,
+        model_fp32_path: str,
+        data_reader: TestDataFeeds,
+        block_size: int,
+        is_symmetric: bool,
+    ):
+        model_int4_path = str(
+            Path(self._tmp_model_dir.name).joinpath(f"MatMulNBits_{block_size}_{is_symmetric}.onnx").absolute()
+        )
+
+        # Quantize fp32 model to int4 model
+        from onnxruntime.quantization import matmul_4bits_quantizer
+
+        model = quant_utils.load_model_with_shape_infer(Path(model_fp32_path))
+        quant = matmul_4bits_quantizer.MatMul4BitsQuantizer(model, block_size, is_symmetric)
+        quant.process()
+        quant.model.save_model_to_file(model_int4_path, False)
+
+        quant_nodes = {"MatMulNBits": 1}
+        check_op_type_count(self, model_int4_path, **quant_nodes)
+
+        data_reader.rewind()
+
+        try:
+            check_model_correctness(self, model_fp32_path, model_int4_path, data_reader.get_next())
+        except Exception as exception:
+            if "4b quantization not yet supported on this hardware platform!" in exception.args[0]:
+                # Currently we don't have int4 quantization support on all platforms, has to tolerate this exception
+                pass
+            else:
+                raise exception
+
+    @unittest.skipIf(
+        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_4bits"
+    )
+    def test_quantize_matmul_int4_symmetric(self):
+        np.random.seed(13)
+
+        model_fp32_path = str(Path(self._tmp_model_dir.name).joinpath("matmul_fp32_symmetric.onnx").absolute())
+        self.construct_model_matmul(model_fp32_path, symmetric=True)
+        data_reader = self.input_feeds(1, {"input": [100, 52]})
+        self.quant_test(model_fp32_path, data_reader, 32, True)
+
+    @unittest.skipIf(
+        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_4bits"
+    )
+    def test_quantize_matmul_int4_offsets(self):
+        model_fp32_path = str(Path(self._tmp_model_dir.name).joinpath("matmul_fp32_offset.onnx").absolute())
+        self.construct_model_matmul(model_fp32_path, symmetric=False)
+        data_reader = self.input_feeds(1, {"input": [100, 52]})
+        self.quant_test(model_fp32_path, data_reader, 32, False)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/onnxruntime/test/python/quantization/test_op_matmul_bnb4.py b/onnxruntime/test/python/quantization/test_op_matmul_bnb4.py
new file mode 100644
index 0000000000000..88432d75c653e
--- /dev/null
+++ b/onnxruntime/test/python/quantization/test_op_matmul_bnb4.py
@@ -0,0 +1,186 @@
+#!/usr/bin/env python
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+import tempfile
+import unittest
+from importlib.util import find_spec
+from pathlib import Path
+from typing import Dict, Tuple, Union
+
+import numpy as np
+import onnx
+from onnx import TensorProto, helper
+from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count
+
+from onnxruntime.quantization import quant_utils
+
+quant_maps = {
+    0: [
+        0.00000000,
+        5.208333333e-03,
+        0.66666667,
+        1.00000000,
+        0.33333333,
+        0.50000000,
+        0.16666667,
+        0.25000000,
+        -0.00000000,
+        -5.208333333e-03,
+        -0.66666667,
+        -1.00000000,
+        -0.33333333,
+        -0.50000000,
+        -0.16666667,
+        -0.25000000,
+    ],
+    1: [
+        -1.0,
+        -0.6961928009986877,
+        -0.5250730514526367,
+        -0.39491748809814453,
+        -0.28444138169288635,
+        -0.18477343022823334,
+        -0.09105003625154495,
+        0.0,
+        0.07958029955625534,
+        0.16093020141124725,
+        0.24611230194568634,
+        0.33791524171829224,
+        0.44070982933044434,
+        0.5626170039176941,
+        0.7229568362236023,
+        1.0,
+    ],
+}
+
+
+class TestOpMatMulBnb4(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._tmp_model_dir = tempfile.TemporaryDirectory(prefix="test_matmulbnb4.")
+
+    @classmethod
+    def tearDownClass(cls):
+        cls._tmp_model_dir.cleanup()
+
+    def fill_bnb4_data(self, shape: Tuple[int, int], quant_type: int) -> np.ndarray:
+        rows, cols = shape
+        line = np.zeros(shape)
+        line = line.reshape(-1)
+        quant_map = np.array(quant_maps[quant_type], dtype=np.float32)
+
+        v = 0
+        for i in range(line.shape[0]):
+            line[i] = quant_map[v]
+            v += 1
+            if v >= 16:
+                v = 0
+
+        # bnb quantization quantizes weight.T after flattening
+        line = line.reshape(cols, rows).transpose()
+        return line.reshape(shape)
+
+    def input_feeds(self, n: int, name2shape: Dict[str, Union[int, Tuple[int, ...]]]) -> TestDataFeeds:
+        input_data_list = []
+        for _i in range(n):
+            inputs = {}
+            for name, shape in name2shape.items():
+                inputs.update({name: np.random.randint(-1, 2, shape).astype(np.float32)})
+            input_data_list.extend([inputs])
+        dr = TestDataFeeds(input_data_list)
+        return dr
+
+    def construct_model_matmul(self, output_model_path: str, quant_type: int) -> None:
+        #      (input)
+        #         |
+        #       MatMul
+        #         |
+        #      (output)
+        input_name = "input"
+        output_name = "output"
+        initializers = []
+
+        def make_matmul(input_name, weight_shape: Union[int, Tuple[int, ...]], weight_name: str, output_name: str):
+            weight_data = self.fill_bnb4_data(weight_shape, quant_type).astype(np.float32)
+            initializers.append(onnx.numpy_helper.from_array(weight_data, name=weight_name))
+            return onnx.helper.make_node(
+                "MatMul",
+                [input_name, weight_name],
+                [output_name],
+            )
+
+        # for this to work (in_features * out_features) % block_size == 0
+        in_features = 52
+        out_features = 288
+        # make MatMul node
+        matmul_node = make_matmul(
+            input_name,
+            [in_features, out_features],
+            "linear1.weight",
+            output_name,
+        )
+
+        # make graph
+        input_tensor = helper.make_tensor_value_info(input_name, TensorProto.FLOAT, [-1, in_features])
+        output_tensor = helper.make_tensor_value_info(output_name, TensorProto.FLOAT, [-1, out_features])
+        graph_name = "matmul_bnb4_test"
+        graph = helper.make_graph(
+            [matmul_node],
+            graph_name,
+            [input_tensor],
+            [output_tensor],
+            initializer=initializers,
+        )
+        model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+        model.ir_version = 7  # use stable onnx ir version
+
+        onnx.save(model, output_model_path)
+
+    def quant_test(self, quant_type: int, block_size: int):
+        model_fp32_path = str(Path(self._tmp_model_dir.name).joinpath(f"matmul_fp32_{quant_type}.onnx").absolute())
+        self.construct_model_matmul(model_fp32_path, quant_type)
+        data_reader = self.input_feeds(1, {"input": [100, 52]})
+
+        model_bnb4_path = str(
+            Path(self._tmp_model_dir.name).joinpath(f"MatMulBnb4_{quant_type}_{block_size}.onnx").absolute()
+        )
+
+        # Quantize fp32 model to bnb4 model
+        from onnxruntime.quantization import matmul_bnb4_quantizer
+
+        model = quant_utils.load_model_with_shape_infer(Path(model_fp32_path))
+        quant = matmul_bnb4_quantizer.MatMulBnb4Quantizer(model, quant_type, block_size)
+        quant.process()
+        quant.model.save_model_to_file(model_bnb4_path, False)
+
+        quant_nodes = {"MatMulBnb4": 1}
+        check_op_type_count(self, model_bnb4_path, **quant_nodes)
+
+        data_reader.rewind()
+
+        try:
+            check_model_correctness(self, model_fp32_path, model_bnb4_path, data_reader.get_next())
+        except Exception as exception:
+            raise exception
+
+    @unittest.skipIf(
+        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_bnb4"
+    )
+    def test_quantize_matmul_bnb4_fp4(self):
+        np.random.seed(13)
+        self.quant_test(0, 64)
+
+    @unittest.skipIf(
+        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_bnb4"
+    )
+    def test_quantize_matmul_bnb4_nf4(self):
+        np.random.seed(13)
+        self.quant_test(1, 64)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/onnxruntime/test/python/quantization/test_op_pad.py b/onnxruntime/test/python/quantization/test_op_pad.py
index c413dedbef051..005f4752c16cc 100644
--- a/onnxruntime/test/python/quantization/test_op_pad.py
+++ b/onnxruntime/test/python/quantization/test_op_pad.py
@@ -5,6 +5,7 @@
 # license information.
 # --------------------------------------------------------------------------
 
+import itertools
 import unittest
 
 import numpy as np
@@ -404,6 +405,123 @@ def test_static_mode_constant_value_edge_case(self):
             "constant", constant_value=0.1, quantize_mode="static", extra_options={"dual_feed": True}
         )
 
+    @classmethod
+    def construct_model_add_pad_add(
+        cls,
+        # Name of model input, i.e., "input" in the illustration graph below.
+        name,
+        # Name of model output.
+        final_name,
+        # model input shape.
+        shape,
+    ):
+        # Graph implemented below is
+        #  `name`, `name` -> Add -> "first_add_output"
+        #  "first_add_output", "pads" -> Pad -> "pad_output"
+        #  "pad_output", "pad_output" -> Add -> `final_name`
+        # where `name` is the 2nd argument of this function,
+        # `final_name` is the 3rd argument of this function,
+        # and the rest lowercase strings are tensor names in the graph.
+
+        input_name = name
+        first_add_output_name = "first_add_output"
+        pads_name = "pads"
+        pad_output_name = "pad_output"
+        second_add_output_name = final_name
+
+        input_shape = shape
+        input_rank = len(input_shape)
+
+        input_tensor = helper.make_tensor_value_info(input_name, TensorProto.FLOAT, input_shape)
+
+        first_add_node = helper.make_node(
+            "Add",
+            [input_name, input_name],
+            [first_add_output_name],
+            name="FirstAdd",
+        )
+
+        pads = [1, 2] * input_rank
+        pads_initializer = helper.make_tensor(
+            pads_name,
+            TensorProto.INT64,
+            # 1-D tensor of shape [2 * input_rank].
+            [len(pads)],
+            pads,
+        )
+        pad_node = helper.make_node(
+            "Pad",
+            [first_add_output_name, pads_name, ""],
+            [pad_output_name],
+            name="PadNode",
+            mode="constant",
+        )
+        pad_output_shape = tuple(input_shape[i] + pads[i] + pads[i + input_rank] for i in range(input_rank))
+
+        second_add_node = helper.make_node(
+            "Add",
+            [pad_output_name, pad_output_name],
+            [second_add_output_name],
+            name="SecondAdd",
+        )
+
+        output_tensor = helper.make_tensor_value_info(second_add_output_name, TensorProto.FLOAT, pad_output_shape)
+
+        graph = helper.make_graph(
+            [first_add_node, pad_node, second_add_node],
+            "TestPadWithEmptyStringInput",
+            [input_tensor],
+            [output_tensor],
+            initializer=[pads_initializer],
+        )
+        model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+        model.ir_version = 7  # use stable onnx ir version
+
+        return model
+
+    def test_pad_with_empty_string_input_name(self):
+        np.random.seed(108)
+        model_fp32_path = "pad_with_empty_string_input_name_fp32.onnx"
+        model_i8_path = "pad_with_empty_string_input_name_i8.onnx"
+
+        shape = [
+            3,
+        ]
+        name = "input"
+        data_reader = self.input_feeds(
+            1,
+            {
+                name: shape,
+            },
+        )
+
+        model_fp32 = TestOpQuatizerPad.construct_model_add_pad_add(name=name, shape=shape, final_name="output")
+
+        onnx.save(model_fp32, model_fp32_path)
+
+        self.quantize_model(
+            model_fp32_path,
+            model_i8_path,
+            data_reader=data_reader,
+        )
+
+        model_i8 = onnx.load(model_i8_path)
+
+        # Assert quantization really happens.
+        self.assertEqual(model_i8.graph.node[0].op_type, "QuantizeLinear")
+        self.assertEqual(model_i8.graph.node[1].op_type, "QLinearAdd")
+        self.assertEqual(model_i8.graph.node[2].op_type, "Pad")
+        self.assertEqual(model_i8.graph.node[3].op_type, "QLinearAdd")
+        self.assertEqual(model_i8.graph.node[4].op_type, "DequantizeLinear")
+
+        for node in model_i8.graph.node:
+            # Examine no empty string flows to quantization process.
+            # Previously, optional input specified by `""` in NodeProto.input
+            # may cause phantom node to generate `"_quantized"` in quantization process.
+            for name in itertools.chain(node.input, node.output):
+                self.assertNotEqual(name, "")
+                self.assertNotEqual(name, "_quantized")
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/onnxruntime/test/python/quantization/test_op_softmax.py b/onnxruntime/test/python/quantization/test_op_softmax.py
index 8e6e4d4100348..3416198450137 100644
--- a/onnxruntime/test/python/quantization/test_op_softmax.py
+++ b/onnxruntime/test/python/quantization/test_op_softmax.py
@@ -43,6 +43,7 @@ def construct_model_conv_softmax(
         softmax_input_shape,
         softmax_attributes,
         output_shape,
+        add_ms_domain_opset=False,
     ):
         #      (input)
         #          \
@@ -74,11 +75,16 @@ def construct_model_conv_softmax(
             [identity_out, output_tensor],
             initializer=initializers,
         )
-        model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+
+        opset_imports = [helper.make_opsetid("", 13)]
+        if add_ms_domain_opset:
+            opset_imports.append(helper.make_opsetid("com.microsoft", 1))
+
+        model = helper.make_model(graph, opset_imports=opset_imports)
         model.ir_version = 7  # use stable onnx ir version
         onnx.save(model, output_model_path)
 
-    def quantize_softmax_test(self, activation_type, weight_type, extra_options={}):  # noqa: B006
+    def quantize_softmax_test_qop(self, activation_type, weight_type, extra_options={}):  # noqa: B006
         np.random.seed(1)
         model_fp32_path = "softmax_fp32.onnx"
         self.construct_model_conv_softmax(
@@ -91,11 +97,10 @@ def quantize_softmax_test(self, activation_type, weight_type, extra_options={}):
         )
         data_reader = self.input_feeds(1, {"input": [1, 2, 26, 42]})
 
-        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
-        activation_type_str = "u8" if (activation_type == QuantType.QUInt8) else "s8"
-        weight_type_str = "u8" if (weight_type == QuantType.QUInt8) else "s8"
+        activation_proto_qtype = activation_type.tensor_type
+        activation_type_str = str(activation_type)
+        weight_type_str = str(weight_type)
         model_q8_path = f"softmax_{activation_type_str}{weight_type_str}.onnx"
-        model_q8_qdq_path = f"softmax_qdq_{activation_type_str}{weight_type_str}.onnx"
 
         # Verify QOperator mode
         data_reader.rewind()
@@ -138,11 +143,30 @@ def quantize_softmax_test(self, activation_type, weight_type, extra_options={}):
         data_reader.rewind()
         check_model_correctness(self, model_fp32_path, model_q8_path, data_reader.get_next())
 
+    def quantize_softmax_test_qdq(self, activation_type, weight_type, extra_options={}):  # noqa: B006
+        np.random.seed(1)
+        model_fp32_path = "softmax_fp32.onnx"
+        self.construct_model_conv_softmax(
+            model_fp32_path,
+            [1, 2, 26, 42],
+            [3, 2, 3, 3],
+            [1, 3, 24, 40],
+            {"axis": -2},
+            [1, 3, 24, 40],
+            add_ms_domain_opset=extra_options.get("UseQDQContribOps", False),
+        )
+        data_reader = self.input_feeds(1, {"input": [1, 2, 26, 42]})
+
+        activation_proto_qtype = activation_type.tensor_type
+        activation_type_str = str(activation_type)
+        weight_type_str = str(weight_type)
+        model_qdq_path = f"softmax_qdq_{activation_type_str}{weight_type_str}.onnx"
+
         # Verify QDQ mode
         data_reader.rewind()
         quantize_static(
             model_fp32_path,
-            model_q8_qdq_path,
+            model_qdq_path,
             data_reader,
             quant_format=QuantFormat.QDQ,
             activation_type=activation_type,
@@ -150,7 +174,7 @@ def quantize_softmax_test(self, activation_type, weight_type, extra_options={}):
             extra_options=extra_options,
         )
 
-        result_model = onnx.load(Path(model_q8_qdq_path))
+        result_model = onnx.load(Path(model_qdq_path))
         qnode_cnt = 0
         dqnode_cnt = 0
         softmax_cnt = 0
@@ -166,9 +190,15 @@ def quantize_softmax_test(self, activation_type, weight_type, extra_options={}):
         self.assertEqual(3, qnode_cnt, f"Expected 3 QuantizeLinear nodes, found {qnode_cnt}")
         self.assertEqual(4, dqnode_cnt, f"Expected 4 DequantizeLinear nodes, found {dqnode_cnt}")
         self.assertEqual(1, softmax_cnt, f"Expected 1 Softmax node, found {softmax_cnt}")
-        if extra_options.get("ActivationSymmetric", False):
-            for tensor in result_model.graph.initializer:
-                if tensor.name in qnode_zeropoints:
+        for tensor in result_model.graph.initializer:
+            if tensor.name in qnode_zeropoints:
+                self.assertEqual(
+                    tensor.data_type,
+                    activation_proto_qtype,
+                    f"QuantizeLinear zero-point must be of proto type {activation_proto_qtype}, "
+                    f"but found {tensor.data_type} instead.",
+                )
+                if extra_options.get("ActivationSymmetric", False):
                     np_value = numpy_helper.to_array(tensor)
                     self.assertEqual(
                         0,
@@ -176,30 +206,52 @@ def quantize_softmax_test(self, activation_type, weight_type, extra_options={}):
                         f"QuantizeLinear node zero point value must be 0, found {np_value} instead!",
                     )
 
-        qnode_io_qtypes = {
-            "QuantizeLinear": [
-                ["i", 2, activation_proto_qtype],
-                ["o", 0, activation_proto_qtype],
-            ]
-        }
-        check_qtype_by_node_type(self, model_q8_qdq_path, qnode_io_qtypes)
         data_reader.rewind()
-        check_model_correctness(self, model_fp32_path, model_q8_qdq_path, data_reader.get_next())
+        check_model_correctness(self, model_fp32_path, model_qdq_path, data_reader.get_next())
 
     def test_quantize_softmax(self):
-        self.quantize_softmax_test(QuantType.QUInt8, QuantType.QUInt8)
+        self.quantize_softmax_test_qop(QuantType.QUInt8, QuantType.QUInt8)
+        self.quantize_softmax_test_qdq(QuantType.QUInt8, QuantType.QUInt8)
 
     def test_quantize_softmax_s8s8(self):
-        self.quantize_softmax_test(
+        self.quantize_softmax_test_qop(
+            QuantType.QInt8,
+            QuantType.QInt8,
+        )
+        self.quantize_softmax_test_qdq(
+            QuantType.QInt8,
+            QuantType.QInt8,
+        )
+        self.quantize_softmax_test_qop(
             QuantType.QInt8,
             QuantType.QInt8,
+            extra_options={"ActivationSymmetric": True},
         )
-        self.quantize_softmax_test(
+        self.quantize_softmax_test_qdq(
             QuantType.QInt8,
             QuantType.QInt8,
             extra_options={"ActivationSymmetric": True},
         )
 
+    def test_quantize_softmax_qdq_u16u16(self):
+        self.quantize_softmax_test_qdq(
+            QuantType.QUInt16,
+            QuantType.QUInt16,
+            extra_options={"UseQDQContribOps": True},
+        )
+
+    def test_quantize_softmax_qdq_s16s16(self):
+        self.quantize_softmax_test_qdq(
+            QuantType.QInt16,
+            QuantType.QInt16,
+            extra_options={"UseQDQContribOps": True},
+        )
+        self.quantize_softmax_test_qdq(
+            QuantType.QInt16,
+            QuantType.QInt16,
+            extra_options={"UseQDQContribOps": True, "ActivationSymmetric": True},
+        )
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/onnxruntime/test/python/quantization/test_quant_util.py b/onnxruntime/test/python/quantization/test_quant_util.py
index 6efa279393f03..65cdff025bbe4 100644
--- a/onnxruntime/test/python/quantization/test_quant_util.py
+++ b/onnxruntime/test/python/quantization/test_quant_util.py
@@ -33,6 +33,18 @@ def test_compute_scale_zp(self):
         self.assertEqual(compute_scale_zp(-tiny_float, tiny_float, 0, 255, symmetric=True), [0, 1.0])
         self.assertEqual(compute_scale_zp(-tiny_float, 0.0, 0, 255, symmetric=False), [0, 1.0])
 
+        # Test enforcing a minimum floatint-point range.
+        self.assertEqual(compute_scale_zp(0.0, 0.0, 0, 255, symmetric=False, min_real_range=0.0001), [0, 0.0001 / 255])
+        self.assertEqual(
+            compute_scale_zp(0.0, 0.0, -128, 127, symmetric=True, min_real_range=0.0001), [0, 0.0002 / 255]
+        )
+        self.assertEqual(
+            compute_scale_zp(0.0, 0.0, 0, 65535, symmetric=False, min_real_range=0.0001), [0, 0.0001 / 65535]
+        )
+        self.assertEqual(
+            compute_scale_zp(0.0, 0.0, -32768, 32767, symmetric=True, min_real_range=0.0001), [0, 0.0002 / 65535]
+        )
+
     def test_load_external_model(self):
         input_name = "input"
         output_name = "output"
diff --git a/onnxruntime/test/python/quantization/test_quantize_static_resnet.py b/onnxruntime/test/python/quantization/test_quantize_static_resnet.py
new file mode 100644
index 0000000000000..1efa283af6881
--- /dev/null
+++ b/onnxruntime/test/python/quantization/test_quantize_static_resnet.py
@@ -0,0 +1,138 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+import os
+import random
+import tempfile
+import unittest
+
+import numpy as np
+import onnx
+from numpy.testing import assert_allclose
+from onnx.numpy_helper import to_array
+from resnet_code import create_model
+
+from onnxruntime import InferenceSession
+from onnxruntime import __version__ as ort_version
+from onnxruntime.quantization import QuantFormat, QuantType, quantize_static
+from onnxruntime.quantization.calibrate import CalibrationDataReader, CalibrationMethod
+
+
+class FakeResnetCalibrationDataReader(CalibrationDataReader):
+    def __init__(self, batch_size: int = 16):
+        super().__init__()
+        self.dataset = [
+            (np.random.rand(1, 3, 32, 32).astype(np.float32), random.randint(0, 9)) for _ in range(batch_size)
+        ]
+        self.iterator = iter(self.dataset)
+
+    def get_next(self) -> dict:
+        try:
+            return {"input": next(self.iterator)[0]}
+        except Exception:
+            return None
+
+
+class TestStaticQuantizationResNet(unittest.TestCase):
+    def test_quantize_static_resnet(self):
+        kwargs = {
+            "activation_type": QuantType.QUInt8,
+            "weight_type": QuantType.QInt8,
+            "calibrate_method": CalibrationMethod.Percentile,
+            "extra_options": {
+                "ActivationSymmetric": False,
+                "EnableSubgraph": False,
+                "ForceQuantizeNoInputCheck": False,
+                "MatMulConstBOnly": False,
+                "WeightSymmetric": True,
+                "extra.Sigmoid.nnapi": False,
+            },
+            "nodes_to_exclude": None,
+            "nodes_to_quantize": None,
+            "op_types_to_quantize": None,
+            "per_channel": True,
+            "quant_format": QuantFormat.QDQ,
+            "reduce_range": False,
+        }
+
+        proto = create_model()
+
+        with tempfile.TemporaryDirectory() as temp:
+            model = os.path.join(temp, "resnet_first_nodes.onnx")
+            with open(model, "wb") as f:
+                f.write(proto.SerializeToString())
+
+            for per_channel in [True, False]:
+                kwargs["per_channel"] = per_channel
+                dataloader = FakeResnetCalibrationDataReader(16)
+                with self.subTest(per_channel=per_channel):
+                    qdq_file = os.path.join(
+                        temp, f"preprocessed-small-qdq-{1 if per_channel else 0}-ort-{ort_version}.onnx"
+                    )
+                    quantize_static(
+                        model_input=model,
+                        model_output=qdq_file,
+                        calibration_data_reader=dataloader,
+                        use_external_data_format=False,
+                        **kwargs,
+                    )
+
+                    # With onnxruntime==1.15.1, the initializer 'onnx::Conv_504_zero_point' is:
+                    # * uint8(128) if per_channel is False
+                    # * int8([0, 0, ....]) if per_channel is True
+                    # With onnxruntime>1.16.0
+                    # * uint8(128) if per_channel is False
+                    # * uint8([128, 128, ..., 127, ...]) if per_channel is True
+                    # QLinearConv : zero point of per-channel filter must be same.
+                    # That's why the quantization forces a symmetric quantization into INT8.
+                    # zero_point is guaranted to be zero whatever the channel is.
+
+                    with open(qdq_file, "rb") as f:
+                        onx = onnx.load(f)
+                    for init in onx.graph.initializer:
+                        arr = to_array(init)
+                        if (
+                            arr.dtype == np.int8
+                            and "zero_point" not in init.name
+                            and not init.name.endswith("quantized")
+                        ):
+                            raise AssertionError(
+                                f"Initializer {init.name!r} has type {arr.dtype} and "
+                                f"shape {arr.shape} but should be {np.uint8}."
+                            )
+
+                    sess = InferenceSession(qdq_file, providers=["CPUExecutionProvider"])
+                    shape = (1, 3, 32, 32)
+                    size = np.prod(shape)
+                    dummy = (np.arange(size) / float(size)).astype(np.float32).reshape(shape)
+                    got = sess.run(None, {"input": dummy})
+                    self.assertEqual(got[0].shape, (1, 64, 8, 8))
+                    self.assertEqual(got[0].dtype, np.float32)
+                    if per_channel:
+                        expected = np.array(
+                            [
+                                [[1.0862497091293335, 0.9609132409095764], [1.0862497091293335, 0.9191343784332275]],
+                                [[0.7520190477371216, 1.0026921033859253], [1.0444709062576294, 1.0862497091293335]],
+                                [[0.0, 0.0], [0.0, 0.0]],
+                                [[0.0, 0.0], [0.9609132409095764, 0.7937979102134705]],
+                            ],
+                            dtype=np.float32,
+                        )
+                        assert_allclose(expected, got[0][0, :4, :2, :2], atol=0.2)
+                    else:
+                        expected = np.array(
+                            [
+                                [[1.428238868713379, 1.2602107524871826], [1.3442248106002808, 1.2182037830352783]],
+                                [[0.8821475505828857, 1.0921826362609863], [1.1341897249221802, 1.1761966943740845]],
+                                [[0.0, 0.0], [0.0, 0.0]],
+                                [[0.0, 0.0], [1.2182037830352783, 1.050175666809082]],
+                            ],
+                            dtype=np.float32,
+                        )
+                        assert_allclose(expected, got[0][0, :4, :2, :2], atol=0.2)
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/onnxruntime/test/python/quantization/test_quantizeblockwise_4bits.py b/onnxruntime/test/python/quantization/test_quantizeblockwise_4bits.py
new file mode 100644
index 0000000000000..765825d4b86e3
--- /dev/null
+++ b/onnxruntime/test/python/quantization/test_quantizeblockwise_4bits.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+import unittest
+from importlib.util import find_spec
+
+import numpy as np
+import numpy.typing as npt
+
+
+def dequantize_blockwise_4bits(quant_values, scale, zero_point, valid_len):
+    blob_size = quant_values.shape[0]
+    block_size = blob_size * 2
+
+    quant_float = np.zeros((block_size), dtype=scale.dtype)
+    for b in range(blob_size):
+        v = quant_values[b]
+        quant_float[2 * b] = ((v & 0xF) - zero_point) * scale if 2 * b < valid_len else 0.0
+        quant_float[2 * b + 1] = ((v >> 4) - zero_point) * scale if 2 * b + 1 < valid_len else 0.0
+    return quant_float
+
+
+def quantize_blockwise_4bits_ref(matrix_float: npt.ArrayLike, block_size: int, is_symmetric: bool):
+    if len(matrix_float.shape) != 2:
+        raise ValueError("Current int4 block quantization only supports 2D tensors!")
+    rows, cols = matrix_float.shape
+
+    blob_size = block_size // 2
+    k_blocks = (rows + block_size - 1) // block_size
+    padded_rows = k_blocks * block_size
+    pad_len = padded_rows - rows
+    matrix_float_padded = matrix_float
+    if pad_len > 0:
+        matrix_float_padded = np.pad(matrix_float, ((0, pad_len), (0, 0)), "constant")
+
+    packed = np.zeros((cols, k_blocks, blob_size), dtype="uint8")
+    scales = np.zeros((cols, k_blocks), dtype=matrix_float_padded.dtype)
+    zero_point = np.full((cols, (k_blocks + 1) // 2), 136, dtype="uint8")
+
+    matrix_float_padded = np.transpose(matrix_float_padded)
+    for n in range(cols):
+        for k_id in range(0, rows, block_size):
+            if is_symmetric:
+                amax_idx = np.argmax(np.abs(matrix_float_padded[n, k_id : k_id + block_size]))
+                bmax = np.float32(matrix_float_padded[n, k_id + amax_idx])
+                scale = bmax / (-8.0)
+                zp = 8
+            else:
+                vmin = np.min(np.float32(matrix_float_padded[n, k_id : k_id + block_size]))
+                vmax = np.max(np.float32(matrix_float_padded[n, k_id : k_id + block_size]))
+                vmin = min(vmin, 0.0)
+                vmax = max(vmax, 0.0)
+                scale = (vmax - vmin) / ((1 << 4) - 1)
+                zero_point_fp = vmin
+                if scale != 0.0:
+                    zero_point_fp = 0.0 - vmin / scale
+                zp = min(15, max(0, round(zero_point_fp)))
+
+            reciprocal_scale = 1.0 / scale if scale != 0 else 0.0
+            block_idx = k_id // block_size
+            scales[n, block_idx] = scale
+            zp_pair = zero_point[n, block_idx // 2]
+            zero_point[n, block_idx // 2] = (
+                ((zp_pair & 0x0F) | (zp << 4)) if (block_idx & 1) else ((zp_pair & 0xF0) | zp)
+            )
+
+            blk_int0 = np.clip(
+                np.round(np.float32(matrix_float_padded[n, k_id : k_id + block_size : 2] * reciprocal_scale + zp)),
+                0,
+                15,
+            ).astype("uint8")
+            blk_int1 = np.clip(
+                np.round(np.float32(matrix_float_padded[n, k_id + 1 : k_id + block_size : 2] * reciprocal_scale + zp)),
+                0,
+                15,
+            ).astype("uint8")
+            packed[n, block_idx] = np.bitwise_or(blk_int0, np.left_shift(blk_int1, 4))
+
+    return (packed, scales, zero_point)
+
+
+def quantize_blockwise_4bits_target(matrix_float: npt.ArrayLike, block_size: int, is_symmetric: bool):
+    if len(matrix_float.shape) != 2:
+        raise ValueError("Current int4 block quantization only supports 2D tensors!")
+    rows, cols = matrix_float.shape
+
+    k_blocks = (rows + block_size - 1) // block_size
+    packed = np.zeros((cols, k_blocks, block_size // 2), dtype="uint8")
+    scales = np.zeros((cols, k_blocks), dtype=matrix_float.dtype)
+    zero_point = np.full((cols, (k_blocks + 1) // 2), 136, dtype="uint8")
+    from onnxruntime.capi._pybind_state import quantize_matmul_4bits
+
+    quantize_matmul_4bits(packed, matrix_float, scales, zero_point, block_size, cols, rows, is_symmetric)
+    return (packed, scales, zero_point)
+
+
+class TestQuantizeBlockwise4Bits(unittest.TestCase):
+    @unittest.skipIf(
+        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_4bits"
+    )
+    def test_quantize_blockwise_4bits(self):
+        for rows, cols in [(128, 128), (32, 128), (128, 32), (52, 128), (128, 52), (73, 123)]:
+            for block_size in [16, 32, 64, 128]:
+                for type in [np.float32, np.float16]:
+                    for is_symmetric in [True, False]:
+                        matrix_float = np.random.rand(rows, cols).astype(type)
+                        quant_value_ref, scales_ref, zero_point_ref = quantize_blockwise_4bits_ref(
+                            matrix_float, block_size, is_symmetric
+                        )
+                        quant_value, scales, zero_point = quantize_blockwise_4bits_target(
+                            matrix_float, block_size, is_symmetric
+                        )
+                        assert np.allclose(scales_ref, scales)
+                        assert np.allclose(zero_point_ref, zero_point)
+                        for c in range(quant_value_ref.shape[0]):
+                            for k in range(quant_value_ref.shape[1]):
+                                assert np.allclose(
+                                    dequantize_blockwise_4bits(
+                                        quant_value_ref[c, k],
+                                        scales_ref[c, k],
+                                        (zero_point_ref[c, k // 2] >> 4)
+                                        if (k & 1)
+                                        else (zero_point_ref[c, k // 2] & 0x0F),
+                                        min(block_size, rows - k * block_size),
+                                    ),
+                                    dequantize_blockwise_4bits(
+                                        quant_value[c, k],
+                                        scales[c, k],
+                                        (zero_point[c, k // 2] >> 4) if (k & 1) else (zero_point[c, k // 2] & 0x0F),
+                                        min(block_size, rows - k * block_size),
+                                    ),
+                                    atol=1.2 * abs(scales[c, k]),
+                                )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/onnxruntime/test/python/quantization/test_quantizeblockwise_bnb4.py b/onnxruntime/test/python/quantization/test_quantizeblockwise_bnb4.py
new file mode 100644
index 0000000000000..9e9d05fae027d
--- /dev/null
+++ b/onnxruntime/test/python/quantization/test_quantizeblockwise_bnb4.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+import unittest
+from importlib.util import find_spec
+
+import numpy as np
+import numpy.typing as npt
+
+quant_enums = {"FP4": 0, "NF4": 1}
+
+
+def quantize_block_fp4(block: npt.ArrayLike):
+    # quantize a block of float32 values to uint8 by simulating a binary search using pivots
+    # could have used (block[:,None] - quant_map).argmin(axis=1) but there are some mismatches due to
+    # floating point precision
+    # block: 1-D array of normalized [-1,1] float32 values, len(block) % 2 == 0
+
+    # pivots to find the quantization index
+    # only half of the pivots are needed since the other half is symmetric
+    pivots = np.array(
+        [0.00260417, 0.0859375, 0.20833333, 0.29166667, 0.4166667, 0.583333, 0.8333333, 1], dtype=np.float32
+    )
+    # indices are not 0,1,2,3,4,5,6,7 because it is a floating point data type
+    pivot_indices = np.array([0, 1, 6, 7, 4, 5, 2, 3], dtype=np.uint8)
+
+    # signs of the block
+    signs = (block < 0).astype(np.uint8) * 8
+
+    # find the uint8 quantization index
+    # argmax finds the first occurrence of True
+    quant_indices = pivot_indices[(np.abs(block)[:, None] <= pivots).argmax(axis=1)] + signs
+
+    return np.bitwise_or(np.left_shift(quant_indices[::2], 4), quant_indices[1::2])
+
+
+def quantize_block_nf4(block: npt.ArrayLike):
+    pivots = np.array(
+        [
+            -0.8480964004993439,
+            -0.6106329262256622,
+            -0.4599952697753906,
+            -0.33967943489551544,
+            -0.23460740596055984,
+            -0.13791173323988914,
+            -0.045525018125772476,
+            0.03979014977812767,
+            0.1202552504837513,
+            0.2035212516784668,
+            0.2920137718319893,
+            0.3893125355243683,
+            0.5016634166240692,
+            0.6427869200706482,
+            0.8614784181118011,
+            1.0,
+        ],
+        dtype=np.float32,
+    )
+
+    quant_indices = (block[:, None] <= pivots).argmax(axis=1).astype(np.uint8)
+
+    return np.bitwise_or(np.left_shift(quant_indices[::2], 4), quant_indices[1::2])
+
+
+def quantize_blockwise_bnb4_ref(matrix_float: npt.ArrayLike, block_size: int, quant_type: str, target=None):
+    if len(matrix_float.shape) != 2:
+        raise ValueError("Current bnb4 block quantization only supports 2D tensors!")
+
+    numel = matrix_float.size
+    num_blocks = (numel + block_size - 1) // block_size
+    quantized_numel = (numel + 1) // 2
+
+    packed = np.zeros(quantized_numel, dtype=np.uint8)
+    absmax = np.zeros(num_blocks, dtype=matrix_float.dtype)
+
+    flattened_matrix_float = matrix_float.flatten()
+    for block_idx in range(num_blocks):
+        block_len = min(block_size, numel - block_idx * block_size)
+        block = np.float32(flattened_matrix_float[block_idx * block_size : block_idx * block_size + block_len])
+
+        block_absmax = np.max(np.abs(block))
+        reciprocal_absmax = 1.0 / block_absmax if block_absmax != 0 else 0.0
+        absmax[block_idx] = block_absmax
+
+        if block_len % 2 != 0:
+            block = np.append(block, 0.0)
+            block_len += 1
+
+        block *= reciprocal_absmax
+        start = block_idx * block_size // 2
+        end = start + block_len // 2
+        if quant_type == "FP4":
+            packed[start:end] = quantize_block_fp4(block)
+        else:
+            packed[start:end] = quantize_block_nf4(block)
+
+    return (packed, absmax)
+
+
+def quantize_blockwise_bnb4_target(matrix_float: npt.ArrayLike, block_size: int, quant_type: str):
+    if len(matrix_float.shape) != 2:
+        raise ValueError("Current int4 block quantization only supports 2D tensors!")
+    quant_type_enum = quant_enums[quant_type]
+
+    n, k = matrix_float.shape  # already transposed
+    numel = n * k
+    num_blocks = (numel + block_size - 1) // block_size
+    quantized_numel = (numel + 1) // 2
+
+    packed = np.zeros(quantized_numel, dtype="uint8")
+    absmax = np.zeros(num_blocks, dtype=matrix_float.dtype)
+    from onnxruntime.capi._pybind_state import quantize_matmul_bnb4
+
+    quantize_matmul_bnb4(packed, matrix_float, absmax, block_size, quant_type_enum, n, k)
+    return (packed, absmax)
+
+
+class TestQuantizeBlockwiseBnb4(unittest.TestCase):
+    @unittest.skipIf(
+        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_bnb4"
+    )
+    def test_quantize_blockwise_bnb4(self):
+        for quant_type in ["FP4", "NF4"]:
+            for k, n in [(128, 128), (32, 128), (128, 32), (52, 128), (128, 52), (73, 123)]:
+                for block_size in [16, 32, 64, 128]:
+                    for type in [np.float32, np.float16]:
+                        matrix_float = np.random.uniform(-1, 1, (k, n)).astype(type)
+                        quant_value_ref, absmax_ref = quantize_blockwise_bnb4_ref(matrix_float, block_size, quant_type)
+                        quant_value, absmax = quantize_blockwise_bnb4_target(matrix_float, block_size, quant_type)
+                        assert np.allclose(quant_value_ref, quant_value)
+                        assert np.allclose(absmax_ref, absmax)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/onnxruntime/test/python/quantization/test_symmetric_flag.py b/onnxruntime/test/python/quantization/test_symmetric_flag.py
index f24daddbbcf83..701da80d543d3 100644
--- a/onnxruntime/test/python/quantization/test_symmetric_flag.py
+++ b/onnxruntime/test/python/quantization/test_symmetric_flag.py
@@ -74,10 +74,10 @@ def get_next(self):
 
         # Extract quantization parameters: scales and zero points for activations, weights, and results
         model = onnx.load("quantized-model.onnx")
-        act_zp = [init for init in model.graph.initializer if init.name == "ACT_zero_point"][0].int32_data[0]
-        act_sc = [init for init in model.graph.initializer if init.name == "ACT_scale"][0].float_data[0]
-        wgt_zp = [init for init in model.graph.initializer if init.name == "WGT_zero_point"][0].int32_data[0]
-        wgt_sc = [init for init in model.graph.initializer if init.name == "WGT_scale"][0].float_data[0]
+        act_zp = next(init for init in model.graph.initializer if init.name == "ACT_zero_point").int32_data[0]
+        act_sc = next(init for init in model.graph.initializer if init.name == "ACT_scale").float_data[0]
+        wgt_zp = next(init for init in model.graph.initializer if init.name == "WGT_zero_point").int32_data[0]
+        wgt_sc = next(init for init in model.graph.initializer if init.name == "WGT_scale").float_data[0]
 
         # Return quantization parameters
         return act_zp, act_sc, wgt_zp, wgt_sc
diff --git a/onnxruntime/test/python/transformers/benchmark_gqa.py b/onnxruntime/test/python/transformers/benchmark_gqa.py
new file mode 100644
index 0000000000000..a9bef025a70bb
--- /dev/null
+++ b/onnxruntime/test/python/transformers/benchmark_gqa.py
@@ -0,0 +1,339 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+"""
+Benchmark performance of MultiHeadAttention with Nvidia GPU of Compute Capability 8.0, 8.6 or 8.9 in Linux:
+sh benchmark_mha.sh
+"""
+
+import math
+import random
+import statistics
+import time
+
+import torch
+from onnx import TensorProto, helper
+
+from onnxruntime import InferenceSession, OrtValue, SessionOptions
+
+
+class InputFormats:
+    QKV_BSNH = 0
+    QKV_BNSH = 1
+
+
+class Config:
+    batch_size = 0
+    sequence_length = 0
+    kv_sequence_length = 0
+    past_sequence_length = 0
+    num_heads = 0
+    kv_num_heads = 0
+    head_size = 0
+
+    def __init__(self, b, s, s2, sp, n, n2, h):
+        self.batch_size = b
+        self.sequence_length = s
+        self.kv_sequence_length = s2
+        self.past_sequence_length = sp
+        self.num_heads = n
+        self.kv_num_heads = n2
+        self.head_size = h
+
+
+def create_group_query_attention_graph_past(
+    config, causal=False, past_kv_format=InputFormats.QKV_BSNH, share_buffer=True
+):
+    past_kv_seqlen = config.kv_sequence_length if share_buffer else config.past_sequence_length
+    present_kv_seqlen = (
+        config.kv_sequence_length if share_buffer else config.past_sequence_length + config.sequence_length
+    )
+    nodes = [
+        helper.make_node(
+            "GroupQueryAttention",
+            [
+                "query",
+                "key",
+                "value",
+                "past_key",
+                "past_value",
+                "past_sequence_length" if share_buffer else "",
+            ],
+            ["output", "present_key", "present_value"],
+            "GroupQueryAttention_0",
+            num_heads=config.num_heads,
+            kv_num_heads=config.kv_num_heads,
+            unidirectional=1 if causal else 0,
+            is_past_bsnh=1 if past_kv_format == InputFormats.QKV_BSNH else 0,
+            domain="com.microsoft",
+        ),
+    ]
+
+    graph_input = [
+        helper.make_tensor_value_info(
+            "query",
+            TensorProto.FLOAT16,
+            [
+                config.batch_size,
+                config.sequence_length,
+                config.num_heads * config.head_size,
+            ],
+        ),
+        helper.make_tensor_value_info(
+            "key",
+            TensorProto.FLOAT16,
+            [
+                config.batch_size,
+                config.sequence_length,
+                config.kv_num_heads * config.head_size,
+            ],
+        ),
+        helper.make_tensor_value_info(
+            "value",
+            TensorProto.FLOAT16,
+            [
+                config.batch_size,
+                config.sequence_length,
+                config.kv_num_heads * config.head_size,
+            ],
+        ),
+        helper.make_tensor_value_info(
+            "past_key",
+            TensorProto.FLOAT16,
+            [
+                config.batch_size,
+                past_kv_seqlen if past_kv_format == InputFormats.QKV_BSNH else config.kv_num_heads,
+                config.kv_num_heads if past_kv_format == InputFormats.QKV_BSNH else past_kv_seqlen,
+                config.head_size,
+            ],
+        ),
+        helper.make_tensor_value_info(
+            "past_value",
+            TensorProto.FLOAT16,
+            [
+                config.batch_size,
+                past_kv_seqlen if past_kv_format == InputFormats.QKV_BSNH else config.kv_num_heads,
+                config.kv_num_heads if past_kv_format == InputFormats.QKV_BSNH else past_kv_seqlen,
+                config.head_size,
+            ],
+        ),
+    ]
+    if share_buffer:
+        graph_input += [
+            helper.make_tensor_value_info(
+                "past_sequence_length",
+                TensorProto.INT32,
+                [1],
+            )
+        ]
+
+    graph_output = [
+        helper.make_tensor_value_info(
+            "output",
+            TensorProto.FLOAT16,
+            [config.batch_size, config.sequence_length, config.num_heads * config.head_size],
+        ),
+        helper.make_tensor_value_info(
+            "present_key",
+            TensorProto.FLOAT16,
+            [
+                config.batch_size,
+                present_kv_seqlen if past_kv_format == InputFormats.QKV_BSNH else config.kv_num_heads,
+                config.kv_num_heads if past_kv_format == InputFormats.QKV_BSNH else present_kv_seqlen,
+                config.head_size,
+            ],
+        ),
+        helper.make_tensor_value_info(
+            "present_value",
+            TensorProto.FLOAT16,
+            [
+                config.batch_size,
+                present_kv_seqlen if past_kv_format == InputFormats.QKV_BSNH else config.kv_num_heads,
+                config.kv_num_heads if past_kv_format == InputFormats.QKV_BSNH else present_kv_seqlen,
+                config.head_size,
+            ],
+        ),
+    ]
+
+    graph = helper.make_graph(
+        nodes,
+        "GroupQueryAttention_Graph",
+        graph_input,
+        graph_output,
+    )
+
+    model = helper.make_model(graph)
+    return model.SerializeToString()
+
+
+def create_gqa_session(
+    config: Config,
+    causal: bool = False,
+    past_format=InputFormats.QKV_BSNH,
+    share_buffer: bool = True,
+) -> InferenceSession:
+    onnx_model_str = create_group_query_attention_graph_past(config, causal, past_format, share_buffer)
+    sess_options = SessionOptions()
+    ort_session = InferenceSession(onnx_model_str, sess_options, providers=["CUDAExecutionProvider"])
+    return ort_session
+
+
+def bind_io(io_binding, input_dict, device, share_buffer=True):
+    io_binding.bind_cpu_input("query", input_dict["query"])
+    io_binding.bind_cpu_input("key", input_dict["key"])
+    io_binding.bind_cpu_input("value", input_dict["value"])
+    io_binding.bind_input(
+        "past_key", "cuda", 0, "float16", input_dict["past_key"].shape(), input_dict["past_key"].data_ptr()
+    )
+    io_binding.bind_input(
+        "past_value",
+        "cuda",
+        0,
+        "float16",
+        input_dict["past_value"].shape(),
+        input_dict["past_value"].data_ptr(),
+    )
+    io_binding.bind_output("output")
+    if share_buffer:
+        io_binding.bind_cpu_input("past_sequence_length", input_dict["past_sequence_length"])
+        io_binding.bind_output(
+            "present_key",
+            device_type="cuda",
+            device_id=device,
+            element_type="float16",
+            shape=input_dict["past_key"].shape(),
+            buffer_ptr=input_dict["past_key"].data_ptr(),
+        )
+        io_binding.bind_output(
+            "present_value",
+            device_type="cuda",
+            device_id=device,
+            element_type="float16",
+            shape=input_dict["past_value"].shape(),
+            buffer_ptr=input_dict["past_value"].data_ptr(),
+        )
+    else:
+        io_binding.bind_output("present_key")
+        io_binding.bind_output("present_value")
+
+
+def measure_latency(ort_session, io_binding):
+    start = time.time()
+    _ = ort_session.run_with_iobinding(io_binding)
+    end = time.time()
+    return end - start
+
+
+def flops(batch, q_seqlen, kv_seqlen, head_size, num_heads):
+    return 4 * batch * q_seqlen * kv_seqlen * num_heads * head_size
+
+
+def tflops_per_second(flop, time):
+    return (flop / time / 10**12) if not math.isnan(time) else 0.0
+
+
+def benchmark_op(session, io_binding, repeats=100):
+    # warm up session
+    _ = measure_latency(session, io_binding)
+
+    latency_list = []
+    for _ in range(repeats):
+        latency = measure_latency(session, io_binding)
+        latency_list.append(latency)
+    return statistics.mean(latency_list)
+
+
+def run_tflops_test(dtype=torch.float16, repeats: int = 100):
+    device_id = torch.cuda.current_device()
+    device = torch.device("cuda", device_id)
+    print("---- GQA BSNH vs GQA BNSH ----")
+    print("op\tbatch\ts_kv\theads\th_dim\tms\tTFLOPS")
+    mean_bsnh_lat = 0
+    mean_bnsh_lat = 0
+    num_trials = 0
+    share_buffer = True
+    random.seed(69)
+    for b in [1, 3, 8, 16]:
+        for s_q, s_kv in [(1, 128), (128, 256), (512, 512), (128, 1024), (1, 2048)]:
+            for n_q, n_kv in [(8, 8), (16, 8), (32, 32), (12, 3), (128, 64)]:
+                for h in [32, 64, 128]:
+                    sp = random.randint(1, s_kv - 1) if s_kv - 1 > 0 else 0
+                    config = Config(b, s_q, s_kv, sp, n_q, n_kv, h)
+
+                    bsnh_session = create_gqa_session(
+                        config,
+                        causal=False,
+                        past_format=InputFormats.QKV_BSNH,
+                        share_buffer=share_buffer,
+                    )
+                    bnsh_session = create_gqa_session(
+                        config,
+                        causal=False,
+                        past_format=InputFormats.QKV_BNSH,
+                        share_buffer=share_buffer,
+                    )
+
+                    q = torch.randn(b, s_q, n_q * h, device=device, dtype=dtype)
+                    kv = torch.randn(b, s_q, 2, n_kv * h, device=device, dtype=dtype)
+                    k, v = kv.unbind(dim=2)
+
+                    past_kv = torch.rand(b, s_kv if share_buffer else sp, 2, n_kv, h, device=device, dtype=dtype)
+                    past_k, past_v = past_kv.unbind(dim=2)
+
+                    input_dict_bsnh = {
+                        "query": q.detach().cpu().numpy(),
+                        "key": k.detach().cpu().numpy(),
+                        "value": v.detach().cpu().numpy(),
+                        "past_key": OrtValue.ortvalue_from_numpy(past_k.detach().cpu().numpy(), "cuda", device_id),
+                        "past_value": OrtValue.ortvalue_from_numpy(past_v.detach().cpu().numpy(), "cuda", device_id),
+                    }
+                    input_dict_bnsh = {
+                        "query": q.detach().cpu().numpy(),
+                        "key": k.detach().cpu().numpy(),
+                        "value": v.detach().cpu().numpy(),
+                        "past_key": OrtValue.ortvalue_from_numpy(
+                            past_k.transpose(1, 2).detach().cpu().numpy(), "cuda", 0
+                        ),
+                        "past_value": OrtValue.ortvalue_from_numpy(
+                            past_v.transpose(1, 2).detach().cpu().numpy(), "cuda", 0
+                        ),
+                    }
+                    if share_buffer:
+                        input_dict_bsnh["past_sequence_length"] = (
+                            torch.tensor([sp], device="cuda", dtype=torch.int32).detach().cpu().numpy()
+                        )
+                        input_dict_bnsh["past_sequence_length"] = (
+                            torch.tensor([sp], device="cuda", dtype=torch.int32).detach().cpu().numpy()
+                        )
+
+                    io_binding_bsnh = bsnh_session.io_binding()
+                    io_binding_bnsh = bnsh_session.io_binding()
+                    bind_io(io_binding_bsnh, input_dict_bsnh, device_id, share_buffer)
+                    bind_io(io_binding_bnsh, input_dict_bnsh, device_id, share_buffer)
+                    average_gqa_bsnh_latency = benchmark_op(bsnh_session, io_binding_bsnh, repeats)
+                    average_gqa_bnsh_latency = benchmark_op(bnsh_session, io_binding_bnsh, repeats)
+
+                    del bsnh_session
+                    del bnsh_session
+
+                    # compute TFLOPS per second
+                    bsnh_speed = tflops_per_second(flops(b, s_q, s_kv, h, n_q), average_gqa_bsnh_latency)
+                    print(f"bsnh\t{b}\t{s_kv}\t{n_q}\t{h}\t{average_gqa_bsnh_latency * 1000:.2f}\t{bsnh_speed:.2f}")
+                    bnsh_speed = tflops_per_second(flops(b, s_q, s_kv, h, n_q), average_gqa_bnsh_latency)
+                    print(f"bnsh\t{b}\t{s_kv}\t{n_q}\t{h}\t{average_gqa_bnsh_latency * 1000:.2f}\t{bnsh_speed:.2f}")
+                    print("---------")
+                    if average_gqa_bsnh_latency > 10 * average_gqa_bnsh_latency:
+                        continue
+                    num_trials += 1
+                    mean_bsnh_lat += average_gqa_bsnh_latency
+                    mean_bnsh_lat += average_gqa_bnsh_latency
+    mean_bsnh_lat /= num_trials
+    mean_bnsh_lat /= num_trials
+    print(f"average bsnh latency:\t{mean_bsnh_lat}")
+    print(f"average bnsh latency:\t{mean_bnsh_lat}")
+
+
+if __name__ == "__main__":
+    run_tflops_test()
diff --git a/onnxruntime/test/python/transformers/conformer_model_generator.py b/onnxruntime/test/python/transformers/conformer_model_generator.py
new file mode 100644
index 0000000000000..71e4f2b63cf4f
--- /dev/null
+++ b/onnxruntime/test/python/transformers/conformer_model_generator.py
@@ -0,0 +1,543 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.  See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+from typing import List
+
+import numpy as np
+import onnx
+from bert_model_generator import float_tensor
+from onnx import TensorProto, helper, numpy_helper
+
+
+# Adapted from bert_model_generator.py
+def get_tensor_and_weight(name: str, shape: List[int], random=False, zeros=False):
+    low = 0.0
+    high = 1.0
+    total_elements = 1
+    for x in shape:
+        total_elements *= x
+    weights = (
+        [np.random.uniform(low, high) for _ in range(total_elements)]
+        if random
+        else [0.0] * total_elements
+        if zeros
+        else [1.0] * total_elements
+    )
+    return helper.make_tensor(name, TensorProto.FLOAT, shape, weights), weights
+
+
+def create_conformer_attention(
+    hidden_size=512,
+    num_heads=8,
+    epsilon=0.000009999999747378752,
+    add_before_layernorm=False,
+    fused=False,
+):
+    # Get head size and ensure head size is an integer
+    assert hidden_size % num_heads == 0
+    head_size = hidden_size // num_heads
+
+    # Construct input and output nodes
+    inputs = [
+        helper.make_tensor_value_info("input_0", TensorProto.FLOAT, ["batch_size", 8, 512]),
+        helper.make_tensor_value_info("input_1", TensorProto.FLOAT, ["batch_size", 8, 512]),
+        helper.make_tensor_value_info("inp_cache_k", TensorProto.FLOAT, [24, "batch_size", 8, 72, head_size]),
+        helper.make_tensor_value_info("inp_cache_v", TensorProto.FLOAT, [24, "batch_size", 8, 72, head_size]),
+    ]
+    outputs = [
+        helper.make_tensor_value_info("output_0", TensorProto.FLOAT, ["batch_size", 8, hidden_size]),
+        helper.make_tensor_value_info("output_1", TensorProto.FLOAT, ["batch_size", 8, 512]),
+        helper.make_tensor_value_info("oup_cache_k", TensorProto.FLOAT, ["batch_size", 8, 80, 64]),
+        helper.make_tensor_value_info("oup_cache_v", TensorProto.FLOAT, ["batch_size", 8, 80, 64]),
+    ]
+    nodes = []
+
+    # Create layernorm (Add + LayerNorm or SkipLayerNorm)
+    if add_before_layernorm:
+        nodes.extend(
+            [
+                helper.make_node(
+                    "Add", ["input_0", "input_1"], ["layernorm_output_to_skiplayernorm"], "add_before_layernorm"
+                ),
+                helper.make_node(
+                    "LayerNormalization",
+                    ["layernorm_output_to_skiplayernorm", "layernorm_weight", "layernorm_bias"],
+                    ["layernorm_add_output_to_matmul"],
+                    "layernorm",
+                    epsilon=epsilon,
+                ),
+            ]
+        )
+    else:
+        nodes.append(
+            helper.make_node(
+                "SkipLayerNormalization",
+                ["input_0", "input_1", "layernorm_weight", "layernorm_bias"],
+                ["layernorm_add_output_to_matmul", "", "", "layernorm_add_output_to_skiplayernorm"],
+                "skiplayernorm",
+                domain="com.microsoft",
+                epsilon=epsilon,
+            )
+        )
+
+    if fused:
+        fused_q_nodes = [
+            helper.make_node(
+                "MatMul",
+                ["layernorm_add_output_to_matmul", "q_weight"],
+                ["q_matmul_output"],
+                "q_path_matmul",
+            ),
+            helper.make_node("Add", ["q_bias", "q_matmul_output"], ["q_add_output"], "q_path_add"),
+            helper.make_node(
+                "Reshape", ["q_add_output", "k_attn_heads_output"], ["q_4d_bsnh"], "q_reshape_to_4d", allowzero=0
+            ),
+            helper.make_node("Transpose", ["q_4d_bsnh"], ["q_4d_bnsh"], "q_transpose_to_bnsh", perm=[0, 2, 1, 3]),
+            helper.make_node(
+                "Div",
+                ["q_4d_bnsh", "q_scale"],
+                ["q_div_output"],
+                "q_div_by_sqrt_head_size",
+            ),
+        ]
+        nodes.extend(fused_q_nodes)
+        nodes.extend(
+            [
+                helper.make_node(
+                    "MatMul",
+                    ["layernorm_add_output_to_matmul", "k_weight"],
+                    ["k_matmul_output"],
+                    "k_path_matmul",
+                ),
+                helper.make_node(
+                    "MatMul",
+                    ["layernorm_add_output_to_matmul", "v_weight"],
+                    ["v_matmul_output"],
+                    "v_path_matmul",
+                ),
+                helper.make_node(
+                    "Reshape", ["q_div_output", "position_embed_output"], ["reshape_pos_emb"], "r_pos_emb", allowzero=0
+                ),
+                helper.make_node(
+                    "Transpose", ["reshape_pos_emb"], ["transpose_reshape_pos_emb"], "p_transpose", perm=[1, 0, 2]
+                ),
+                helper.make_node(
+                    "MatMul",
+                    ["transpose_reshape_pos_emb", "transpose_reshape_pos_emb"],
+                    ["pos_matmul"],
+                    "pos_embed_matmul",
+                ),
+                helper.make_node(
+                    "Transpose", ["pos_matmul"], ["transpose_pos_matmul"], "p_matmul_transpose", perm=[1, 0, 2]
+                ),
+                helper.make_node(
+                    "Reshape",
+                    ["transpose_pos_matmul", "position_embed_output"],
+                    ["reshape_position_emb"],
+                    "final_reshape_pos_emb",
+                    allowzero=0,
+                ),
+                helper.make_node(
+                    "MultiHeadAttention",
+                    [
+                        "q_matmul_output",
+                        "k_matmul_output",
+                        "v_matmul_output",
+                        "Attention_0_qkv_bias",
+                        "",
+                        "reshape_position_emb",
+                        "gather_past_k_output",
+                        "gather_past_v_output",
+                    ],
+                    ["attn_output", "oup_cache_k", "oup_cache_v"],
+                    "Attention_0",
+                    domain="com.microsoft",
+                    num_heads=num_heads,
+                ),
+            ]
+        )
+        # Create nodes used with qkv concats, reshapes, and transposes
+        nodes.extend(
+            [
+                helper.make_node("Shape", ["layernorm_add_output_to_matmul"], ["shape_output"], "shape", start=0),
+                helper.make_node("Gather", ["shape_output", "idx_0"], ["gather_0_output"], "gather_0", axis=0),
+                helper.make_node(
+                    "Mul",
+                    ["gather_0_output", "num_heads_int"],
+                    ["mul_attn_heads_output"],
+                    "mul_num_heads",
+                ),
+                helper.make_node(
+                    "Unsqueeze",
+                    ["mul_attn_heads_output", "unsqueeze_axes_input"],
+                    ["unsqueeze_position_embed"],
+                    "unsqueeze_position_embed",
+                ),
+                helper.make_node(
+                    "Concat",
+                    ["unsqueeze_position_embed", "neg_one", "head_size"],
+                    ["position_embed_output"],
+                    "position_embed_concat_output",
+                    axis=0,
+                ),
+                helper.make_node(
+                    "Unsqueeze",
+                    ["gather_0_output", "unsqueeze_axes_input"],
+                    ["unsqueeze_attn_heads_output"],
+                    "unsqueeze_num_heads",
+                ),
+                helper.make_node(
+                    "Concat",
+                    ["unsqueeze_attn_heads_output", "neg_one", "head_size", "q_bsnh_reshape"],
+                    ["k_attn_heads_output"],
+                    "k_num_heads",
+                    axis=0,
+                ),
+            ]
+        )
+
+        nodes.extend(
+            [
+                helper.make_node("Gather", ["inp_cache_v", "idx_0"], ["gather_past_v_output"], "gather_past_v", axis=0),
+                helper.make_node("Gather", ["inp_cache_k", "idx_0"], ["gather_past_k_output"], "gather_past_k", axis=0),
+            ]
+        )
+    else:
+        # Create nodes for Q/K/V paths
+        q_nodes = [
+            helper.make_node(
+                "MatMul", ["layernorm_add_output_to_matmul", "q_weight"], ["q_matmul_output"], "q_path_matmul"
+            ),
+            helper.make_node("Add", ["q_bias", "q_matmul_output"], ["q_add_output"], "q_path_add"),
+            helper.make_node("Reshape", ["q_add_output", "q_attn_heads_output"], ["q_4d_bsnh"], "q_reshape_to_4d"),
+            helper.make_node("Transpose", ["q_4d_bsnh"], ["q_4d_bnsh"], "q_transpose_to_bnsh", perm=[0, 2, 1, 3]),
+            helper.make_node(
+                "Div",
+                ["q_4d_bnsh", "q_scale"],
+                ["q_div_output"],
+                "q_div_by_sqrt_head_size",
+            ),
+        ]
+        k_nodes = [
+            helper.make_node(
+                "MatMul",
+                ["layernorm_add_output_to_matmul", "k_weight"],
+                ["k_matmul_output"],
+                "k_path_matmul",
+            ),
+            helper.make_node("Add", ["k_bias", "k_matmul_output"], ["k_add_output"], "k_path_add"),
+            helper.make_node("Reshape", ["k_add_output", "k_attn_heads_output"], ["k_4d_bsnh"], "k_reshape_to_4d"),
+            helper.make_node("Transpose", ["k_4d_bsnh"], ["k_4d_bnsh"], "k_transpose_to_bnsh", perm=[0, 2, 1, 3]),
+            helper.make_node(
+                "Concat",
+                ["gather_past_k_output", "k_4d_bnsh"],
+                ["oup_cache_k"],
+                "concat_past_k_and_curr_k",
+                axis=2,
+            ),
+            helper.make_node(
+                "Transpose",
+                ["oup_cache_k"],
+                ["k_output_transpose"],
+                "k_transpose_last_two_dims",
+                perm=[0, 1, 3, 2],
+            ),
+        ]
+        v_nodes = [
+            helper.make_node(
+                "MatMul",
+                ["layernorm_add_output_to_matmul", "v_weight"],
+                ["v_matmul_output"],
+                "v_path_matmul",
+            ),
+            helper.make_node("Add", ["v_bias", "v_matmul_output"], ["v_add_output"], "v_path_add"),
+            helper.make_node("Reshape", ["v_add_output", "v_attn_heads_output"], ["v_4d_bsnh"], "v_reshape_to_4d"),
+            helper.make_node("Transpose", ["v_4d_bsnh"], ["v_4d_bnsh"], "v_transpose_to_bnsh", perm=[0, 2, 1, 3]),
+            helper.make_node(
+                "Concat",
+                ["gather_past_v_output", "v_4d_bnsh"],
+                ["oup_cache_v"],
+                "concat_past_v_and_curr_v",
+                axis=2,
+            ),
+        ]
+        pos_embed = [
+            helper.make_node("Reshape", ["q_div_output", "position_embed_output"], ["reshape_pos_emb"], "r_pos_emb"),
+            helper.make_node(
+                "Transpose", ["reshape_pos_emb"], ["transpose_reshape_pos_emb"], "p_transpose", perm=[1, 0, 2]
+            ),
+            helper.make_node(
+                "MatMul",
+                ["transpose_reshape_pos_emb", "transpose_reshape_pos_emb"],
+                ["pos_matmul"],
+                "pos_embed_matmul",
+            ),
+            helper.make_node(
+                "Transpose", ["pos_matmul"], ["transpose_pos_matmul"], "p_matmul_transpose", perm=[1, 0, 2]
+            ),
+            helper.make_node(
+                "Reshape",
+                ["transpose_pos_matmul", "position_embed_output"],
+                ["reshape_position_emb"],
+                "final_reshape_pos_emb",
+            ),
+        ]
+        nodes.extend(q_nodes)
+        nodes.extend(k_nodes)
+        nodes.extend(v_nodes)
+        nodes.extend(pos_embed)
+
+        # Create nodes used with qkv concats, reshapes, and transposes
+        nodes.extend(
+            [
+                helper.make_node("Shape", ["layernorm_add_output_to_matmul"], ["shape_output"], "shape", start=0),
+                helper.make_node("Gather", ["shape_output", "idx_0"], ["gather_0_output"], "gather_0", axis=0),
+                helper.make_node(
+                    "Mul",
+                    ["gather_0_output", "num_heads_int"],
+                    ["mul_attn_heads_output"],
+                    "mul_num_heads",
+                ),
+                helper.make_node(
+                    "Unsqueeze",
+                    ["mul_attn_heads_output", "unsqueeze_axes_input"],
+                    ["unsqueeze_position_embed"],
+                    "unsqueeze_position_embed",
+                ),
+                helper.make_node(
+                    "Concat",
+                    ["unsqueeze_position_embed", "neg_one", "head_size"],
+                    ["position_embed_output"],
+                    "position_embed_concat_output",
+                    axis=0,
+                ),
+                helper.make_node(
+                    "Unsqueeze",
+                    ["gather_0_output", "unsqueeze_axes_input"],
+                    ["unsqueeze_attn_heads_output"],
+                    "unsqueeze_num_heads",
+                ),
+                helper.make_node(
+                    "Concat",
+                    ["unsqueeze_attn_heads_output", "neg_one", "head_size", "q_bsnh_reshape"],
+                    ["q_attn_heads_output"],
+                    "q_num_heads",
+                    axis=0,
+                ),
+                helper.make_node(
+                    "Concat",
+                    ["unsqueeze_attn_heads_output", "neg_one", "head_size", "q_bsnh_reshape"],
+                    ["k_attn_heads_output"],
+                    "k_num_heads",
+                    axis=0,
+                ),
+                helper.make_node(
+                    "Concat",
+                    ["unsqueeze_attn_heads_output", "neg_one", "head_size", "q_bsnh_reshape"],
+                    ["v_attn_heads_output"],
+                    "v_num_heads",
+                    axis=0,
+                ),
+                helper.make_node(
+                    "Concat",
+                    ["unsqueeze_attn_heads_output", "neg_one", "head_size"],
+                    ["bsd_format"],
+                    axis=0,
+                ),
+                helper.make_node(
+                    "Constant",
+                    inputs=[],
+                    outputs=["q_bsnh_reshape"],
+                    value=numpy_helper.from_array(
+                        np.array([0, 0, num_heads, head_size], dtype="int64"), name="const_tensor"
+                    ),
+                ),
+            ]
+        )
+
+        nodes.extend(
+            [
+                helper.make_node("Gather", ["inp_cache_v", "idx_0"], ["gather_past_v_output"], "gather_past_v", axis=0),
+                helper.make_node("Gather", ["inp_cache_k", "idx_0"], ["gather_past_k_output"], "gather_past_k", axis=0),
+            ]
+        )
+
+        # Compute Q x K'
+        nodes.extend(
+            [
+                helper.make_node(
+                    "MatMul",
+                    [
+                        "q_div_output",
+                        "k_output_transpose",
+                    ],
+                    ["qk_output"],
+                    "matmul_qk",
+                )
+            ]
+        )
+
+        # Create nodes for computing softmax(Q x K') x V
+        nodes.extend(
+            [
+                helper.make_node(
+                    "Add",
+                    [
+                        "qk_output",
+                        "reshape_position_emb",
+                    ],
+                    ["add_qk_output"],
+                    "add_qk",
+                ),
+                helper.make_node(
+                    "Softmax",
+                    ["add_qk_output"],
+                    ["softmax_output"],
+                    "softmax_qk",
+                    axis=2,
+                ),
+                helper.make_node(
+                    "MatMul",
+                    ["softmax_output", "oup_cache_v"],
+                    ["qkv_output_(num_heads*batch_size,seq_len,head_size)"],
+                    "matmul_qkv",
+                ),
+                helper.make_node(
+                    "Transpose",
+                    ["qkv_output_(num_heads*batch_size,seq_len,head_size)"],
+                    ["qkv_bsnh"],
+                    "transpose_bnsh_to_bsnh",
+                    perm=[0, 2, 1, 3],
+                ),
+                helper.make_node("Reshape", ["qkv_bsnh", "bsd_format"], ["attn_output"], "qkv_bsd"),
+            ]
+        )
+
+    # Create final nodes to conclude attention
+    nodes.append(
+        helper.make_node(
+            "MatMul",
+            ["attn_output", "matmul_after_attn_initializer"],
+            ["matmul_after_attn_output"],
+            "matmul_after_attn",
+        ),
+    )
+    if not fused:
+        next_sln_inputs = [
+            "layernorm_add_output_to_skiplayernorm",
+            "add_after_attn_output",
+            "layernorm_weight",
+            "layernorm_bias",
+        ]
+        nodes.extend(
+            [
+                helper.make_node(
+                    "Add",
+                    ["add_after_attn_initializer", "matmul_after_attn_output"],
+                    ["add_after_attn_output"],
+                    "add_after_attn",
+                ),
+                helper.make_node(
+                    "SkipLayerNormalization",
+                    next_sln_inputs,
+                    ["output_0", "", "", "output_1"],
+                    "next_skiplayernorm",
+                    domain="com.microsoft",
+                    epsilon=epsilon,
+                ),
+            ]
+        )
+    else:
+        next_sln_inputs = [
+            "matmul_after_attn_output",
+            "layernorm_add_output_to_skiplayernorm",
+            "layernorm_weight",
+            "layernorm_bias",
+            "add_after_attn_initializer",
+        ]
+        nodes.append(
+            helper.make_node(
+                "SkipLayerNormalization",
+                next_sln_inputs,
+                ["output_0", "", "", "output_1"],
+                "SkipLayerNorm_AddBias_0",
+                domain="com.microsoft",
+                epsilon=epsilon,
+            )
+        )
+
+    # Create initializers
+    v_weight, v_weight_data = get_tensor_and_weight("v_weight", [hidden_size, hidden_size])
+    v_bias, v_bias_data = get_tensor_and_weight("v_bias", [hidden_size])
+    q_weight, q_weight_data = get_tensor_and_weight("q_weight", [hidden_size, hidden_size])
+    q_bias, q_bias_data = get_tensor_and_weight("q_bias", [hidden_size])
+    k_weight, k_weight_data = get_tensor_and_weight("k_weight", [hidden_size, hidden_size])
+    k_bias, k_bias_data = get_tensor_and_weight("k_bias", [hidden_size])
+
+    qkv_bias = helper.make_tensor(
+        "Attention_0_qkv_bias",
+        TensorProto.FLOAT,
+        [3 * hidden_size],
+        q_bias_data + k_bias_data + v_bias_data,
+    )
+    initializers = [
+        float_tensor("layernorm_weight", [hidden_size]),
+        float_tensor("layernorm_bias", [hidden_size]),
+        float_tensor("matmul_after_attn_initializer", [hidden_size, hidden_size]),
+        float_tensor("add_after_attn_initializer", [hidden_size]),
+    ]
+
+    # Add Q/K/V weight tensors as initializers
+    if fused:
+        initializers.extend([q_weight, k_weight, v_weight])
+        initializers.extend([q_bias])
+        initializers.append(qkv_bias)
+        initializers.extend(
+            [
+                numpy_helper.from_array(np.array(num_heads, dtype="int64"), name="num_heads_int"),
+                numpy_helper.from_array(np.array([head_size], dtype="int64"), name="head_size"),
+                numpy_helper.from_array(np.array(1 / np.sqrt(head_size), dtype="float32"), name="q_scale"),
+                numpy_helper.from_array(np.array(0, dtype="int64"), name="idx_0"),
+                numpy_helper.from_array(np.array([-1], dtype="int64"), name="neg_one"),
+                numpy_helper.from_array(np.array([0], dtype="int64"), name="unsqueeze_axes_input"),
+                numpy_helper.from_array(np.array([0, 0, num_heads, head_size], dtype="int64"), name="q_bsnh_reshape"),
+            ]
+        )
+    else:
+        initializers.extend([q_weight, k_weight, v_weight])
+
+        initializers.extend([q_bias, k_bias, v_bias])
+
+        initializers.extend(
+            [
+                numpy_helper.from_array(np.array(num_heads, dtype="int64"), name="num_heads_int"),
+                numpy_helper.from_array(np.array([num_heads], dtype="int64"), name="num_heads"),
+                numpy_helper.from_array(np.array([head_size], dtype="int64"), name="head_size"),
+                numpy_helper.from_array(np.array([hidden_size], dtype="int64"), name="hidden_size"),
+                numpy_helper.from_array(np.array(1 / np.sqrt(head_size), dtype="float32"), name="q_scale"),
+                numpy_helper.from_array(np.array(0, dtype="int64"), name="idx_0"),
+                numpy_helper.from_array(np.array(1, dtype="int64"), name="idx_1"),
+                numpy_helper.from_array(np.array([-1], dtype="int64"), name="neg_one"),
+                numpy_helper.from_array(np.array([0], dtype="int64"), name="unsqueeze_axes_input"),
+            ]
+        )
+
+    # Construct graph
+    graph = helper.make_graph(nodes, "conformer_self_mha_graph", inputs, outputs, initializers, doc_string="conformer")
+    opsetid = helper.make_opsetid("ai.onnx", min(onnx.defs.onnx_opset_version(), 16))
+    return helper.make_model(graph, opset_imports=(opsetid,))
+
+
+if __name__ == "__main__":
+    np.random.seed(2)
+    num_heads = 8
+    hidden_size = 512
+
+    model = create_conformer_attention(num_heads=num_heads, hidden_size=hidden_size)
+    onnx.save(model, "conformer_self_mha.onnx")
+
+    model = create_conformer_attention(num_heads=num_heads, hidden_size=hidden_size, fused=True)
+    onnx.save(model, "./test_data/models/conformer/conformer_self_mha_fused.onnx")
diff --git a/onnxruntime/test/python/transformers/test_conformer.py b/onnxruntime/test/python/transformers/test_conformer.py
new file mode 100644
index 0000000000000..471ba9756bcf8
--- /dev/null
+++ b/onnxruntime/test/python/transformers/test_conformer.py
@@ -0,0 +1,69 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.  See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+import os
+import unittest
+
+import onnx
+from conformer_model_generator import create_conformer_attention
+from parity_utilities import find_transformers_source
+
+if find_transformers_source():
+    from fusion_options import FusionOptions
+    from onnx_model import OnnxModel
+    from optimizer import optimize_model
+else:
+    from onnxruntime.transformers.fusion_options import FusionOptions
+    from onnxruntime.transformers.onnx_model import OnnxModel
+    from onnxruntime.transformers.optimizer import optimize_model
+
+
+class TestFusion(unittest.TestCase):
+    def verify_fusion(self, optimized_model, expected_model_filename):
+        optimized_model.topological_sort(is_deterministic=True)
+
+        expected_model_path = os.path.join(
+            os.path.dirname(__file__), "test_data", "models", "conformer", expected_model_filename
+        )
+        print("Expected model path = ", expected_model_path)
+        expected_model = OnnxModel(onnx.load(expected_model_path))
+        expected_model.topological_sort(is_deterministic=True)
+
+        nodes = optimized_model.model.graph.node
+        self.assertEqual(len(nodes), len(expected_model.model.graph.node))
+
+        for i in range(len(nodes)):
+            self.assertEqual(nodes[i], expected_model.model.graph.node[i])
+
+        for expected_initializer in expected_model.model.graph.initializer:
+            print("Expected initializer initial = ", expected_initializer.name)
+            self.assertTrue(
+                OnnxModel.has_same_value(
+                    optimized_model.get_initializer(expected_initializer.name), expected_initializer
+                )
+            )
+
+    def test_ct_mha_fusion(self):
+        num_heads = 8
+        hidden_size = 512
+        model = create_conformer_attention(num_heads=num_heads, hidden_size=hidden_size, add_before_layernorm=False)
+        dir = "."
+        model_path = os.path.join(dir, "conformer_self_mha.onnx")
+        onnx.save(model, model_path)
+        options = FusionOptions("conformer")
+        optimized_model = optimize_model(
+            model_path,
+            model_type="conformer",
+            num_heads=num_heads,
+            hidden_size=hidden_size,
+            optimization_options=options,
+        )
+        os.remove(model_path)
+        self.verify_fusion(optimized_model, "conformer_self_mha_fused.onnx")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/onnxruntime/test/python/transformers/test_data/models/conformer/conformer_self_mha_fused.onnx b/onnxruntime/test/python/transformers/test_data/models/conformer/conformer_self_mha_fused.onnx
new file mode 100644
index 0000000000000..9d882751db265
Binary files /dev/null and b/onnxruntime/test/python/transformers/test_data/models/conformer/conformer_self_mha_fused.onnx differ
diff --git a/onnxruntime/test/python/transformers/test_flash_attn.py b/onnxruntime/test/python/transformers/test_flash_attn.py
index f90a9475b4588..8a839875de2a2 100644
--- a/onnxruntime/test/python/transformers/test_flash_attn.py
+++ b/onnxruntime/test/python/transformers/test_flash_attn.py
@@ -10,6 +10,10 @@
 # license information.
 # -------------------------------------------------------------------------
 import math
+import os
+import platform
+import random
+import unittest
 
 import numpy
 import torch
@@ -17,23 +21,53 @@
 from einops import rearrange, repeat
 from onnx import TensorProto, helper
 
-from onnxruntime import InferenceSession, SessionOptions
+from onnxruntime import InferenceSession, OrtValue, SessionOptions
 
 torch.manual_seed(0)
 
+pipeline_mode = True  # Reduces number of tests so pipeline doesn't time out
+
+
+class Formats:
+    BSNH = 0
+    BNSH = 1
+
 
 class Config:
     batch_size = 0
     sequence_length = 0
     kv_sequence_length = 0
+    past_sequence_length = 0
     num_heads = 0
+    kv_num_heads = 0
     head_size = 0
 
-    def __init__(self, b, s, s2, n, h):
+    def __init__(self, b, s, s2, sp, n, n2, h):
         self.batch_size = b
         self.sequence_length = s
         self.kv_sequence_length = s2
+        self.past_sequence_length = sp
+        self.num_heads = n
+        self.kv_num_heads = n2
+        self.head_size = h
+
+
+class PromptConfig:
+    batch_size = 0
+    q_sequence_length = 0
+    kv_sequence_length = 0
+    buffer_sequence_length = 0
+    num_heads = 0
+    kv_num_heads = 0
+    head_size = 0
+
+    def __init__(self, b, sq, skv, sb, n, n2, h):
+        self.batch_size = b
+        self.q_sequence_length = sq
+        self.kv_sequence_length = skv
+        self.buffer_sequence_length = sb
         self.num_heads = n
+        self.kv_num_heads = n2
         self.head_size = h
 
 
@@ -149,6 +183,287 @@ def create_multihead_attention_graph(config):
     return model.SerializeToString()
 
 
+def create_group_query_attention_graph_prompt(
+    config, past_kv_format=Formats.BSNH, share_buffer=True, local_window_size=-1
+):
+    past_kv_seqlen = config.buffer_sequence_length if share_buffer else 0
+    present_kv_seqlen = config.buffer_sequence_length if share_buffer else config.kv_sequence_length
+    nodes = [
+        helper.make_node(
+            "GroupQueryAttention",
+            [
+                "query",
+                "key",
+                "value",
+                "past_key" if share_buffer else "",
+                "past_value" if share_buffer else "",
+                "seqlens_k",
+                "total_sequence_length",
+            ],
+            ["output", "present_key", "present_value"],
+            "GroupQueryAttention_0",
+            num_heads=config.num_heads,
+            kv_num_heads=config.kv_num_heads,
+            local_window_size=local_window_size,
+            # is_past_bsnh=1 if past_kv_format == Formats.BSNH else 0,
+            # kv_share_buffer=1 if share_buffer else 0,
+            domain="com.microsoft",
+        ),
+    ]
+
+    graph_input = [
+        helper.make_tensor_value_info(
+            "query",
+            TensorProto.FLOAT16,
+            [
+                config.batch_size,
+                config.q_sequence_length,
+                config.num_heads * config.head_size,
+            ],
+        ),
+        helper.make_tensor_value_info(
+            "key",
+            TensorProto.FLOAT16,
+            [
+                config.batch_size,
+                config.kv_sequence_length,
+                config.kv_num_heads * config.head_size,
+            ],
+        ),
+        helper.make_tensor_value_info(
+            "value",
+            TensorProto.FLOAT16,
+            [
+                config.batch_size,
+                config.kv_sequence_length,
+                config.kv_num_heads * config.head_size,
+            ],
+        ),
+        helper.make_tensor_value_info(
+            "seqlens_k",
+            TensorProto.INT32,
+            [config.batch_size],
+        ),
+        helper.make_tensor_value_info(
+            "total_sequence_length",
+            TensorProto.INT32,
+            [1],
+        ),
+    ]
+    if share_buffer:
+        graph_input += [
+            helper.make_tensor_value_info(
+                "past_key",
+                TensorProto.FLOAT16,
+                [
+                    config.batch_size,
+                    past_kv_seqlen if past_kv_format == Formats.BSNH else config.kv_num_heads,
+                    config.kv_num_heads if past_kv_format == Formats.BSNH else past_kv_seqlen,
+                    config.head_size,
+                ],
+            ),
+            helper.make_tensor_value_info(
+                "past_value",
+                TensorProto.FLOAT16,
+                [
+                    config.batch_size,
+                    past_kv_seqlen if past_kv_format == Formats.BSNH else config.kv_num_heads,
+                    config.kv_num_heads if past_kv_format == Formats.BSNH else past_kv_seqlen,
+                    config.head_size,
+                ],
+            ),
+        ]
+
+    graph_output = [
+        helper.make_tensor_value_info(
+            "output",
+            TensorProto.FLOAT16,
+            [config.batch_size, config.q_sequence_length, config.num_heads * config.head_size],
+        ),
+        helper.make_tensor_value_info(
+            "present_key",
+            TensorProto.FLOAT16,
+            [
+                config.batch_size,
+                present_kv_seqlen if past_kv_format == Formats.BSNH else config.kv_num_heads,
+                config.kv_num_heads if past_kv_format == Formats.BSNH else present_kv_seqlen,
+                config.head_size,
+            ],
+        ),
+        helper.make_tensor_value_info(
+            "present_value",
+            TensorProto.FLOAT16,
+            [
+                config.batch_size,
+                present_kv_seqlen if past_kv_format == Formats.BSNH else config.kv_num_heads,
+                config.kv_num_heads if past_kv_format == Formats.BSNH else present_kv_seqlen,
+                config.head_size,
+            ],
+        ),
+        helper.make_tensor_value_info(
+            "present_key",
+            TensorProto.FLOAT16,
+            [
+                config.batch_size,
+                config.kv_sequence_length if past_kv_format == Formats.BSNH else config.kv_num_heads,
+                config.kv_num_heads if past_kv_format == Formats.BSNH else config.kv_sequence_length,
+                config.head_size,
+            ],
+        ),
+        helper.make_tensor_value_info(
+            "present_value",
+            TensorProto.FLOAT16,
+            [
+                config.batch_size,
+                config.kv_sequence_length if past_kv_format == Formats.BSNH else config.kv_num_heads,
+                config.kv_num_heads if past_kv_format == Formats.BSNH else config.kv_sequence_length,
+                config.head_size,
+            ],
+        ),
+    ]
+
+    graph = helper.make_graph(
+        nodes,
+        "GroupQueryAttention_Graph",
+        graph_input,
+        graph_output,
+    )
+
+    model = helper.make_model(graph)
+    return model.SerializeToString()
+
+
+def create_group_query_attention_graph_past(
+    config, past_kv_format=Formats.BSNH, share_buffer=True, local_window_size=-1
+):
+    past_kv_seqlen = config.kv_sequence_length
+    present_kv_seqlen = (
+        config.kv_sequence_length if share_buffer else config.kv_sequence_length + config.sequence_length
+    )
+    nodes = [
+        helper.make_node(
+            "GroupQueryAttention",
+            [
+                "query",
+                "key",
+                "value",
+                "past_key",
+                "past_value",
+                "seqlens_k",
+                "total_sequence_length",
+            ],
+            ["output", "present_key", "present_value"],
+            "GroupQueryAttention_0",
+            num_heads=config.num_heads,
+            kv_num_heads=config.kv_num_heads,
+            local_window_size=local_window_size,
+            # is_past_bsnh=1 if past_kv_format == Formats.BSNH else 0,
+            # kv_share_buffer=1 if share_buffer else 0,
+            domain="com.microsoft",
+        ),
+    ]
+
+    graph_input = [
+        helper.make_tensor_value_info(
+            "query",
+            TensorProto.FLOAT16,
+            [
+                config.batch_size,
+                config.sequence_length,
+                config.num_heads * config.head_size,
+            ],
+        ),
+        helper.make_tensor_value_info(
+            "key",
+            TensorProto.FLOAT16,
+            [
+                config.batch_size,
+                config.sequence_length,
+                config.kv_num_heads * config.head_size,
+            ],
+        ),
+        helper.make_tensor_value_info(
+            "value",
+            TensorProto.FLOAT16,
+            [
+                config.batch_size,
+                config.sequence_length,
+                config.kv_num_heads * config.head_size,
+            ],
+        ),
+        helper.make_tensor_value_info(
+            "past_key",
+            TensorProto.FLOAT16,
+            [
+                config.batch_size,
+                past_kv_seqlen if past_kv_format == Formats.BSNH else config.kv_num_heads,
+                config.kv_num_heads if past_kv_format == Formats.BSNH else past_kv_seqlen,
+                config.head_size,
+            ],
+        ),
+        helper.make_tensor_value_info(
+            "past_value",
+            TensorProto.FLOAT16,
+            [
+                config.batch_size,
+                past_kv_seqlen if past_kv_format == Formats.BSNH else config.kv_num_heads,
+                config.kv_num_heads if past_kv_format == Formats.BSNH else past_kv_seqlen,
+                config.head_size,
+            ],
+        ),
+    ]
+    graph_input += [
+        helper.make_tensor_value_info(
+            "seqlens_k",
+            TensorProto.INT32,
+            [config.batch_size],
+        ),
+        helper.make_tensor_value_info(
+            "total_sequence_length",
+            TensorProto.INT32,
+            [1],
+        ),
+    ]
+
+    graph_output = [
+        helper.make_tensor_value_info(
+            "output",
+            TensorProto.FLOAT16,
+            [config.batch_size, config.sequence_length, config.num_heads * config.head_size],
+        ),
+        helper.make_tensor_value_info(
+            "present_key",
+            TensorProto.FLOAT16,
+            [
+                config.batch_size,
+                present_kv_seqlen if past_kv_format == Formats.BSNH else config.kv_num_heads,
+                config.kv_num_heads if past_kv_format == Formats.BSNH else present_kv_seqlen,
+                config.head_size,
+            ],
+        ),
+        helper.make_tensor_value_info(
+            "present_value",
+            TensorProto.FLOAT16,
+            [
+                config.batch_size,
+                present_kv_seqlen if past_kv_format == Formats.BSNH else config.kv_num_heads,
+                config.kv_num_heads if past_kv_format == Formats.BSNH else present_kv_seqlen,
+                config.head_size,
+            ],
+        ),
+    ]
+
+    graph = helper.make_graph(
+        nodes,
+        "GroupQueryAttention_Graph",
+        graph_input,
+        graph_output,
+    )
+
+    model = helper.make_model(graph)
+    return model.SerializeToString()
+
+
 def generate_random_padding_mask(max_seqlen, batch_size, device, mode="random"):
     assert mode in ["full", "random", "third"]
     if mode == "full":
@@ -329,7 +644,7 @@ def flash_attn_varlen_qkvpacked_func(qkv_unpad, cu_seqlens, token_offset, config
     return output
 
 
-def flash_attn_func(q, k, v, config, causal=False):
+def mha_func(q, k, v, config):
     onnx_model_str = create_multihead_attention_graph(config)
     q = torch.reshape(q, (config.batch_size, config.sequence_length, -1))
     k = torch.reshape(k, (config.batch_size, config.kv_sequence_length, -1))
@@ -342,10 +657,201 @@ def flash_attn_func(q, k, v, config, causal=False):
     sess_options = SessionOptions()
     ort_session = InferenceSession(onnx_model_str, sess_options, providers=["CUDAExecutionProvider"])
     ort_output = ort_session.run(None, ort_inputs)
+    ort_output = numpy.array(ort_output)
     output = torch.tensor(ort_output)
     return output
 
 
+def gqa_prompt_func(
+    q, k, v, config, new_k, new_v, seqlens_k=None, window_size=-1, past_kv_format=Formats.BSNH, share_buffer=True
+):
+    onnx_model_str = create_group_query_attention_graph_prompt(
+        config, past_kv_format, share_buffer, local_window_size=window_size
+    )
+    q = torch.reshape(q, (config.batch_size, config.q_sequence_length, -1))
+    past_k = k.clone() if share_buffer else None
+    past_v = v.clone() if share_buffer else None
+    new_k = torch.reshape(new_k, (config.batch_size, config.kv_sequence_length, -1))
+    new_v = torch.reshape(new_v, (config.batch_size, config.kv_sequence_length, -1))
+    if share_buffer:
+        ort_inputs = {
+            "query": q.detach().cpu().numpy(),
+            "key": new_k.detach().cpu().numpy(),
+            "value": new_v.detach().cpu().numpy(),
+            "past_key": OrtValue.ortvalue_from_numpy(past_k.detach().cpu().numpy(), "cuda", 0),
+            "past_value": OrtValue.ortvalue_from_numpy(past_v.detach().cpu().numpy(), "cuda", 0),
+            "seqlens_k": seqlens_k.detach().cpu().numpy().astype(numpy.int32),
+            "total_sequence_length": torch.tensor([config.q_sequence_length], dtype=torch.int32).detach().cpu().numpy(),
+        }
+        sess_options = SessionOptions()
+        ort_session = InferenceSession(onnx_model_str, sess_options, providers=["CUDAExecutionProvider"])
+        io_binding = ort_session.io_binding()
+        io_binding.bind_cpu_input("query", ort_inputs["query"])
+        io_binding.bind_cpu_input("key", ort_inputs["key"])
+        io_binding.bind_cpu_input("value", ort_inputs["value"])
+        io_binding.bind_input(
+            "past_key", "cuda", 0, numpy.float16, ort_inputs["past_key"].shape(), ort_inputs["past_key"].data_ptr()
+        )
+        io_binding.bind_input(
+            "past_value",
+            "cuda",
+            0,
+            numpy.float16,
+            ort_inputs["past_value"].shape(),
+            ort_inputs["past_value"].data_ptr(),
+        )
+        io_binding.bind_cpu_input("seqlens_k", ort_inputs["seqlens_k"])
+        io_binding.bind_cpu_input("total_sequence_length", ort_inputs["total_sequence_length"])
+        io_binding.bind_output("output")
+        io_binding.bind_ortvalue_output("present_key", ort_inputs["past_key"])
+        io_binding.bind_ortvalue_output("present_value", ort_inputs["past_value"])
+        ort_session.run_with_iobinding(io_binding)
+        ort_output, present_k, present_v = io_binding.copy_outputs_to_cpu()
+        ort_output = numpy.array(ort_output)
+        output = torch.tensor(ort_output)
+        return output, present_k, present_v
+    else:
+        ort_inputs = {
+            "query": q.detach().cpu().numpy(),
+            "key": new_k.detach().cpu().numpy(),
+            "value": new_v.detach().cpu().numpy(),
+            "seqlens_k": seqlens_k.detach().cpu().numpy().astype(numpy.int32),
+            "total_sequence_length": torch.tensor([config.q_sequence_length], dtype=torch.int32).detach().cpu().numpy(),
+        }
+        sess_options = SessionOptions()
+        ort_session = InferenceSession(onnx_model_str, sess_options, providers=["CUDAExecutionProvider"])
+        io_binding = ort_session.io_binding()
+        io_binding.bind_cpu_input("query", ort_inputs["query"])
+        io_binding.bind_cpu_input("key", ort_inputs["key"])
+        io_binding.bind_cpu_input("value", ort_inputs["value"])
+        io_binding.bind_cpu_input("seqlens_k", ort_inputs["seqlens_k"])
+        io_binding.bind_cpu_input("total_sequence_length", ort_inputs["total_sequence_length"])
+        io_binding.bind_output("output")
+        io_binding.bind_output("present_key")
+        io_binding.bind_output("present_value")
+        ort_session.run_with_iobinding(io_binding)
+        ort_output, present_k, present_v = io_binding.copy_outputs_to_cpu()
+        ort_output = numpy.array(ort_output)
+        output = torch.tensor(ort_output)
+        return output, present_k, present_v
+
+
+def gqa_past_func(
+    q, k, v, config, new_k, new_v, seqlens_k=None, past_kv_format=Formats.BSNH, share_buffer=True, window_size=-1
+):
+    onnx_model_str = create_group_query_attention_graph_past(
+        config, past_kv_format, share_buffer, local_window_size=window_size
+    )
+    q = torch.reshape(q, (config.batch_size, config.sequence_length, -1))
+    past_k = k.clone()
+    past_v = v.clone()
+    new_k = torch.reshape(new_k, (config.batch_size, config.sequence_length, -1))
+    new_v = torch.reshape(new_v, (config.batch_size, config.sequence_length, -1))
+    if share_buffer:
+        ort_inputs = {
+            "query": q.detach().cpu().numpy(),
+            "key": new_k.detach().cpu().numpy(),
+            "value": new_v.detach().cpu().numpy(),
+            "past_key": OrtValue.ortvalue_from_numpy(past_k.detach().cpu().numpy(), "cuda", 0),
+            "past_value": OrtValue.ortvalue_from_numpy(past_v.detach().cpu().numpy(), "cuda", 0),
+            "seqlens_k": seqlens_k.detach().cpu().numpy().astype(numpy.int32),
+            "total_sequence_length": torch.tensor([config.kv_sequence_length], dtype=torch.int32)
+            .detach()
+            .cpu()
+            .numpy(),
+        }
+        sess_options = SessionOptions()
+        ort_session = InferenceSession(onnx_model_str, sess_options, providers=["CUDAExecutionProvider"])
+        io_binding = ort_session.io_binding()
+        io_binding.bind_cpu_input("query", ort_inputs["query"])
+        io_binding.bind_cpu_input("key", ort_inputs["key"])
+        io_binding.bind_cpu_input("value", ort_inputs["value"])
+        io_binding.bind_input(
+            "past_key", "cuda", 0, numpy.float16, ort_inputs["past_key"].shape(), ort_inputs["past_key"].data_ptr()
+        )
+        io_binding.bind_input(
+            "past_value",
+            "cuda",
+            0,
+            numpy.float16,
+            ort_inputs["past_value"].shape(),
+            ort_inputs["past_value"].data_ptr(),
+        )
+        io_binding.bind_cpu_input("seqlens_k", ort_inputs["seqlens_k"])
+        io_binding.bind_cpu_input("total_sequence_length", ort_inputs["total_sequence_length"])
+        io_binding.bind_output("output")
+        io_binding.bind_ortvalue_output("present_key", ort_inputs["past_key"])
+        io_binding.bind_ortvalue_output("present_value", ort_inputs["past_value"])
+        ort_session.run_with_iobinding(io_binding)
+        ort_output, present_k, present_v = io_binding.copy_outputs_to_cpu()
+        ort_output = numpy.array(ort_output)
+        output = torch.tensor(ort_output)
+        return output, present_k, present_v
+    else:
+        ort_inputs = {
+            "query": q.detach().cpu().numpy(),
+            "key": new_k.detach().cpu().numpy(),
+            "value": new_v.detach().cpu().numpy(),
+            "past_key": past_k.detach().cpu().numpy(),
+            "past_value": past_v.detach().cpu().numpy(),
+            "seqlens_k": seqlens_k.detach().cpu().numpy().astype(numpy.int32),
+            "total_sequence_length": torch.tensor(
+                [config.kv_sequence_length + config.sequence_length], dtype=torch.int32
+            )
+            .detach()
+            .cpu()
+            .numpy(),
+        }
+        sess_options = SessionOptions()
+        ort_session = InferenceSession(onnx_model_str, sess_options, providers=["CUDAExecutionProvider"])
+        io_binding = ort_session.io_binding()
+        io_binding.bind_cpu_input("query", ort_inputs["query"])
+        io_binding.bind_cpu_input("key", ort_inputs["key"])
+        io_binding.bind_cpu_input("value", ort_inputs["value"])
+        io_binding.bind_cpu_input("past_key", ort_inputs["past_key"])
+        io_binding.bind_cpu_input("past_value", ort_inputs["past_value"])
+        io_binding.bind_cpu_input("seqlens_k", ort_inputs["seqlens_k"])
+        io_binding.bind_cpu_input("total_sequence_length", ort_inputs["total_sequence_length"])
+        io_binding.bind_output("output")
+        io_binding.bind_output("present_key")
+        io_binding.bind_output("present_value")
+        ort_session.run_with_iobinding(io_binding)
+        ort_output, present_k, present_v = io_binding.copy_outputs_to_cpu()
+        ort_output = numpy.array(ort_output)
+        output = torch.tensor(ort_output)
+        return output, present_k, present_v
+
+
+def construct_causal_mask(seqlen_q, seqlen_k, query_padding_mask=None, key_padding_mask=None, device=None):
+    row_idx = rearrange(torch.arange(seqlen_q, device=device, dtype=torch.long), "s -> s 1")
+    col_idx = torch.arange(seqlen_k, device=device, dtype=torch.long)
+    sk = seqlen_k if key_padding_mask is None else rearrange(key_padding_mask.sum(-1), "b -> b 1 1 1")
+    sq = seqlen_q if query_padding_mask is None else rearrange(query_padding_mask.sum(-1), "b -> b 1 1 1")
+    return col_idx > row_idx + sk - sq
+
+
+def construct_local_mask(
+    seqlen_q,
+    seqlen_k,
+    window_size=(-1, -1),  # -1 means infinite window size
+    query_padding_mask=None,
+    key_padding_mask=None,
+    device=None,
+):
+    row_idx = rearrange(torch.arange(seqlen_q, device=device, dtype=torch.long), "s -> s 1")
+    col_idx = torch.arange(seqlen_k, device=device, dtype=torch.long)
+    sk = seqlen_k if key_padding_mask is None else rearrange(key_padding_mask.sum(-1), "b -> b 1 1 1")
+    sq = seqlen_q if query_padding_mask is None else rearrange(query_padding_mask.sum(-1), "b -> b 1 1 1")
+    if window_size[0] < 0:
+        return col_idx > row_idx + sk - sq + window_size[1]
+    else:
+        sk = torch.full_like(col_idx, seqlen_k) if key_padding_mask is None else sk
+        return torch.logical_or(
+            col_idx > torch.minimum(row_idx + sk - sq + window_size[1], sk),
+            col_idx < row_idx + sk - sq - window_size[0],
+        )
+
+
 def attention_ref(
     q,
     k,
@@ -355,6 +861,7 @@ def attention_ref(
     dropout_p=0.0,
     dropout_mask=None,
     causal=False,
+    window_size=(-1, -1),  # -1 means infinite window size
     upcast=True,
     reorder_ops=False,
 ):
@@ -367,6 +874,8 @@ def attention_ref(
         key_padding_mask: (batch_size, seqlen_k)
         dropout_p: float
         dropout_mask: (batch_size, nheads, seqlen_q, seqlen_k)
+        causal: whether to apply causal masking
+        window_size: (int, int), left and right window size
         upcast: whether to cast all inputs to fp32, do all computation in fp32, then cast
             output back to fp16/bf16.
         reorder_ops: whether to change the order of operations (scaling k instead of scaling k, etc.)
@@ -376,6 +885,8 @@ def attention_ref(
         output: (batch_size, seqlen_q, nheads, head_dim)
         attention: (batch_size, nheads, seqlen_q, seqlen_k), softmax after dropout
     """
+    if causal:
+        window_size = (window_size[0], 0)
     dtype_og = q.dtype
     if upcast:
         q, k, v = q.float(), k.float(), v.float()
@@ -389,10 +900,24 @@ def attention_ref(
         scores = torch.einsum("bthd,bshd->bhts", q, k / math.sqrt(d))
     if key_padding_mask is not None:
         scores.masked_fill_(rearrange(~key_padding_mask, "b s -> b 1 1 s"), float("-inf"))
-    if causal:
-        causal_mask = torch.triu(torch.ones(seqlen_q, seqlen_k, dtype=torch.bool, device=q.device), 1)
-        scores.masked_fill_(causal_mask, float("-inf"))
+    if window_size[0] >= 0 or window_size[1] >= 0:
+        local_mask = construct_local_mask(
+            seqlen_q,
+            seqlen_k,
+            window_size,
+            query_padding_mask,
+            key_padding_mask,
+            q.device,
+        )
+        scores.masked_fill_(local_mask, float("-inf"))
     attention = torch.softmax(scores, dim=-1)
+    # Some rows might be completely masked out so we fill them with zero instead of NaN
+    if window_size[0] >= 0 or window_size[1] >= 0:
+        attention = attention.masked_fill(torch.all(local_mask, dim=-1, keepdim=True), 0.0)
+    # We want to mask here so that the attention matrix doesn't have any NaNs
+    # Otherwise we'll get NaN in dV
+    if query_padding_mask is not None:
+        attention = attention.masked_fill(rearrange(~query_padding_mask, "b s -> b 1 s 1"), 0.0)
     dropout_scaling = 1.0 / (1 - dropout_p)
     if dropout_mask is not None:
         attention_drop = attention.masked_fill(~dropout_mask, 0.0)
@@ -401,7 +926,6 @@ def attention_ref(
     output = torch.einsum("bhts,bshd->bthd", attention_drop, v * dropout_scaling)
     if query_padding_mask is not None:
         output.masked_fill_(rearrange(~query_padding_mask, "b s -> b s 1 1"), 0.0)
-        attention = attention.masked_fill(rearrange(~query_padding_mask, "b s -> b 1 s 1"), 0.0)
     return output.to(dtype=dtype_og), attention.to(dtype=dtype_og)
 
 
@@ -422,7 +946,7 @@ def attention_qkvpacked_ref(
     )
 
 
-def parity_check(
+def parity_check_mha(
     config,
     packed,
     rtol=1e-3,
@@ -456,7 +980,7 @@ def parity_check(
         k = torch.randn(
             config.batch_size,
             config.kv_sequence_length,
-            config.num_heads,
+            config.kv_num_heads,
             config.head_size,
             device="cuda",
             dtype=torch.float16,
@@ -465,19 +989,20 @@ def parity_check(
         v = torch.randn(
             config.batch_size,
             config.kv_sequence_length,
-            config.num_heads,
+            config.kv_num_heads,
             config.head_size,
             device="cuda",
             dtype=torch.float16,
             requires_grad=False,
         )
-        out = flash_attn_func(q, k, v, config)
+        out = mha_func(q, k, v, config)
         out = torch.squeeze(out, 0)
         out = torch.reshape(out, (config.batch_size, config.sequence_length, config.num_heads, config.head_size))
         out = out.detach().cpu().numpy()
         # Pytorch to compare
-        out_ref, _ = attention_ref(q, k, v, None, None, 0.0, None)
+        out_ref, _ = attention_ref(q, k, v, None, None, 0.0, None, causal=False)
         out_ref = out_ref.detach().cpu().numpy()
+
     # Compare results
     print(
         " B:",
@@ -486,6 +1011,8 @@ def parity_check(
         config.sequence_length,
         " N:",
         config.num_heads,
+        " kvN:",
+        config.kv_num_heads,
         " h:",
         config.head_size,
         " Mean Error:",
@@ -500,29 +1027,731 @@ def parity_check(
     )
 
 
+def parity_check_gqa_prompt(
+    config,
+    causal=False,
+    local=False,
+    past_format=Formats.BSNH,
+    rtol=1e-3,
+    atol=1e-3,
+):
+    q = torch.randn(
+        config.batch_size,
+        config.q_sequence_length,
+        config.num_heads,
+        config.head_size,
+        device="cuda",
+        dtype=torch.float16,
+        requires_grad=False,
+    )
+    k = torch.randn(
+        config.batch_size,
+        config.buffer_sequence_length if past_format == Formats.BSNH else config.kv_num_heads,
+        config.kv_num_heads if past_format == Formats.BSNH else config.buffer_sequence_length,
+        config.head_size,
+        device="cuda",
+        dtype=torch.float16,
+        requires_grad=False,
+    )
+    v = torch.randn(
+        config.batch_size,
+        config.buffer_sequence_length if past_format == Formats.BSNH else config.kv_num_heads,
+        config.kv_num_heads if past_format == Formats.BSNH else config.buffer_sequence_length,
+        config.head_size,
+        device="cuda",
+        dtype=torch.float16,
+        requires_grad=False,
+    )
+    new_k = torch.randn(
+        config.batch_size,
+        config.kv_sequence_length,
+        config.kv_num_heads,
+        config.head_size,
+        device="cuda",
+        dtype=torch.float16,
+        requires_grad=False,
+    )
+    new_v = torch.randn(
+        config.batch_size,
+        config.kv_sequence_length,
+        config.kv_num_heads,
+        config.head_size,
+        device="cuda",
+        dtype=torch.float16,
+        requires_grad=False,
+    )
+
+    window_size = (-1, -1)
+    left_window_size = -1
+    if local:
+        left_window_size = random.randint(0, config.kv_sequence_length)
+        window_size = (left_window_size, 0)
+    elif causal:
+        left_window_size = -1
+        window_size = (-1, 0)
+
+    # Pytorch to compare
+    k_cache_ref = k.clone()
+    v_cache_ref = v.clone()
+    if past_format == Formats.BNSH:
+        k_cache_ref = k_cache_ref.transpose(1, 2)
+        v_cache_ref = v_cache_ref.transpose(1, 2)
+    cache_seqlens = torch.tensor([config.kv_sequence_length], device="cuda").repeat(config.batch_size)
+    # cache_seqlens = torch.randint(
+    #     0,
+    #     config.kv_sequence_length,
+    #     (config.batch_size,),
+    #     dtype=torch.int32,
+    #     device="cuda",
+    # )
+    # cache_seqlens[random.randint(0, cache_seqlens.size(dim=0) - 1)] = config.kv_sequence_length
+    rearrange(torch.arange(config.kv_sequence_length, device="cuda"), "s -> 1 s")
+    arange = rearrange(torch.arange(config.buffer_sequence_length, device="cuda"), "s -> 1 s")
+    cache_seqlens_expanded = rearrange(cache_seqlens, "b -> b 1")
+    kv_seqlens = torch.tensor([config.kv_sequence_length], device="cuda").repeat(config.batch_size)
+    kv_seqlens_expanded = rearrange(kv_seqlens, "b -> b 1")
+    update_mask = arange < kv_seqlens_expanded
+    k_cache_ref[update_mask] = rearrange(new_k, "b s ... -> (b s) ...")
+    v_cache_ref[update_mask] = rearrange(new_v, "b s ... -> (b s) ...")
+    k_cache_rep = repeat(k_cache_ref, "b s h d -> b s (h g) d", g=config.num_heads // config.kv_num_heads)
+    v_cache_rep = repeat(v_cache_ref, "b s h d -> b s (h g) d", g=config.num_heads // config.kv_num_heads)
+    key_padding_mask = arange < cache_seqlens_expanded
+    out_ref, _ = attention_ref(
+        q, k_cache_rep, v_cache_rep, None, key_padding_mask, 0.0, None, causal=True, window_size=window_size
+    )
+    out_ref = out_ref.detach().cpu().numpy()
+    if past_format == Formats.BNSH:
+        k_cache_ref = k_cache_ref.transpose(1, 2)
+        v_cache_ref = v_cache_ref.transpose(1, 2)
+
+    # Flash function
+    out, present_k, present_v = gqa_prompt_func(
+        q, k, v, config, new_k, new_v, cache_seqlens, left_window_size, past_format, True
+    )
+    out = torch.squeeze(out, 0)
+    out = torch.reshape(out, (config.batch_size, config.q_sequence_length, config.num_heads, config.head_size))
+    out = out.detach().cpu().numpy()
+
+    # Make sure past-present buffer updating correctly
+    assert numpy.allclose(present_k, k_cache_ref.detach().cpu().numpy(), rtol=rtol, atol=atol, equal_nan=True)
+    assert numpy.allclose(present_v, v_cache_ref.detach().cpu().numpy(), rtol=rtol, atol=atol, equal_nan=True)
+
+    # Compare results
+    print(
+        "KV-buffer",
+        " causal:",
+        causal,
+        " local:",
+        local,
+        "past kv format:",
+        "BSNH" if past_format == Formats.BSNH else "BNSH",
+        " B:",
+        config.batch_size,
+        " S:",
+        config.q_sequence_length,
+        " kv S:",
+        config.kv_sequence_length,
+        " N:",
+        config.num_heads,
+        " kv N:",
+        config.kv_num_heads,
+        " h:",
+        config.head_size,
+        " Mean Error:",
+        numpy.mean(numpy.abs(out - out_ref)),
+        numpy.allclose(
+            out,
+            out_ref,
+            rtol=rtol,
+            atol=atol,
+            equal_nan=True,
+        ),
+    )
+
+
+def parity_check_gqa_prompt_no_buff(
+    config,
+    causal=False,
+    local=False,
+    past_format=Formats.BSNH,
+    rtol=1e-3,
+    atol=1e-3,
+):
+    q = torch.randn(
+        config.batch_size,
+        config.q_sequence_length,
+        config.num_heads,
+        config.head_size,
+        device="cuda",
+        dtype=torch.float16,
+        requires_grad=False,
+    )
+    new_k = torch.randn(
+        config.batch_size,
+        config.kv_sequence_length,
+        config.kv_num_heads,
+        config.head_size,
+        device="cuda",
+        dtype=torch.float16,
+        requires_grad=False,
+    )
+    new_v = torch.randn(
+        config.batch_size,
+        config.kv_sequence_length,
+        config.kv_num_heads,
+        config.head_size,
+        device="cuda",
+        dtype=torch.float16,
+        requires_grad=False,
+    )
+
+    window_size = (-1, -1)
+    left_window_size = -1
+    if local:
+        left_window_size = random.randint(0, config.kv_sequence_length)
+        window_size = (left_window_size, 0)
+    elif causal:
+        left_window_size = -1
+        window_size = (-1, 0)
+
+    # Pytorch to compare
+    k_cache_ref = new_k.clone()
+    v_cache_ref = new_v.clone()
+    # if past_format == Formats.BNSH:
+    #     k_cache_ref = k_cache_ref.transpose(1, 2)
+    #     v_cache_ref = v_cache_ref.transpose(1, 2)
+    cache_seqlens = torch.tensor([config.kv_sequence_length], device="cuda").repeat(config.batch_size)
+    # cache_seqlens = torch.randint(
+    #     0,
+    #     config.kv_sequence_length,
+    #     (config.batch_size,),
+    #     dtype=torch.int32,
+    #     device="cuda",
+    # )
+    # cache_seqlens[random.randint(0, cache_seqlens.size(dim=0) - 1)] = config.kv_sequence_length
+    brange = rearrange(torch.arange(config.kv_sequence_length, device="cuda"), "s -> 1 s")
+    cache_seqlens_expanded = rearrange(cache_seqlens, "b -> b 1")
+    new_mask = brange < cache_seqlens_expanded
+    k_cache_rep = repeat(k_cache_ref, "b s h d -> b s (h g) d", g=config.num_heads // config.kv_num_heads)
+    v_cache_rep = repeat(v_cache_ref, "b s h d -> b s (h g) d", g=config.num_heads // config.kv_num_heads)
+    out_ref, _ = attention_ref(
+        q, k_cache_rep, v_cache_rep, None, new_mask, 0.0, None, causal=True, window_size=window_size
+    )
+    out_ref = out_ref.detach().cpu().numpy()
+    if past_format == Formats.BNSH:
+        k_cache_ref = k_cache_ref.transpose(1, 2)
+        v_cache_ref = v_cache_ref.transpose(1, 2)
+
+    # Flash function
+    out, present_k, present_v = gqa_prompt_func(
+        q, None, None, config, new_k, new_v, cache_seqlens, left_window_size, past_format, False
+    )
+    out = torch.squeeze(out, 0)
+    out = torch.reshape(out, (config.batch_size, config.q_sequence_length, config.num_heads, config.head_size))
+    out = out.detach().cpu().numpy()
+
+    # Make sure past-present buffer updating correctly
+    assert numpy.allclose(present_k, k_cache_ref.detach().cpu().numpy(), rtol=rtol, atol=atol, equal_nan=True)
+    assert numpy.allclose(present_v, v_cache_ref.detach().cpu().numpy(), rtol=rtol, atol=atol, equal_nan=True)
+
+    # Compare results
+    print(
+        "KV-buffer",
+        "past kv format:",
+        "BSNH" if past_format == Formats.BSNH else "BNSH",
+        " B:",
+        config.batch_size,
+        " S:",
+        config.q_sequence_length,
+        " kv S:",
+        config.kv_sequence_length,
+        " N:",
+        config.num_heads,
+        " kv N:",
+        config.kv_num_heads,
+        " h:",
+        config.head_size,
+        " Mean Error:",
+        numpy.mean(numpy.abs(out - out_ref)),
+        numpy.allclose(
+            out,
+            out_ref,
+            rtol=rtol,
+            atol=atol,
+            equal_nan=True,
+        ),
+    )
+
+
+def parity_check_gqa_past(
+    config,
+    causal=False,
+    local=False,
+    past_format=Formats.BSNH,
+    rtol=1e-3,
+    atol=1e-3,
+):
+    q = torch.randn(
+        config.batch_size,
+        config.sequence_length,
+        config.num_heads,
+        config.head_size,
+        device="cuda",
+        dtype=torch.float16,
+        requires_grad=False,
+    )
+    k = torch.randn(
+        config.batch_size,
+        config.kv_sequence_length if past_format == Formats.BSNH else config.kv_num_heads,
+        config.kv_num_heads if past_format == Formats.BSNH else config.kv_sequence_length,
+        config.head_size,
+        device="cuda",
+        dtype=torch.float16,
+        requires_grad=False,
+    )
+    v = torch.randn(
+        config.batch_size,
+        config.kv_sequence_length if past_format == Formats.BSNH else config.kv_num_heads,
+        config.kv_num_heads if past_format == Formats.BSNH else config.kv_sequence_length,
+        config.head_size,
+        device="cuda",
+        dtype=torch.float16,
+        requires_grad=False,
+    )
+    new_k = torch.randn(
+        config.batch_size,
+        config.sequence_length,
+        config.kv_num_heads,
+        config.head_size,
+        device="cuda",
+        dtype=torch.float16,
+        requires_grad=False,
+    )
+    new_v = torch.randn(
+        config.batch_size,
+        config.sequence_length,
+        config.kv_num_heads,
+        config.head_size,
+        device="cuda",
+        dtype=torch.float16,
+        requires_grad=False,
+    )
+    window_size = (-1, -1)
+    left_window_size = -1
+    if local:
+        left_window_size = random.randint(0, config.kv_sequence_length)
+        window_size = (left_window_size, 0)
+    elif causal:
+        left_window_size = -1
+        window_size = (-1, 0)
+
+    # Pytorch to compare
+    k_cache_ref = k.clone()
+    v_cache_ref = v.clone()
+    if past_format == Formats.BNSH:
+        k_cache_ref = k_cache_ref.transpose(1, 2)
+        v_cache_ref = v_cache_ref.transpose(1, 2)
+    # cache_seqlens = torch.tensor([config.past_sequence_length], device="cuda").repeat(config.batch_size)
+    cache_seqlens = torch.randint(
+        0,
+        config.kv_sequence_length - config.sequence_length + 1,
+        (config.batch_size,),
+        dtype=torch.int32,
+        device="cuda",
+    )
+    arange = rearrange(torch.arange(config.kv_sequence_length, device="cuda"), "s -> 1 s")
+    cache_seqlens_expanded = rearrange(cache_seqlens, "b -> b 1")
+    update_mask = torch.logical_and(
+        cache_seqlens_expanded <= arange, arange < cache_seqlens_expanded + config.sequence_length
+    )
+    k_cache_ref[update_mask] = rearrange(new_k, "b s ... -> (b s) ...")
+    v_cache_ref[update_mask] = rearrange(new_v, "b s ... -> (b s) ...")
+    k_cache_rep = repeat(k_cache_ref, "b s h d -> b s (h g) d", g=config.num_heads // config.kv_num_heads)
+    v_cache_rep = repeat(v_cache_ref, "b s h d -> b s (h g) d", g=config.num_heads // config.kv_num_heads)
+    key_padding_mask = arange < cache_seqlens_expanded + config.sequence_length
+    out_ref, _ = attention_ref(
+        q, k_cache_rep, v_cache_rep, None, key_padding_mask, 0.0, None, causal=True, window_size=window_size
+    )
+    out_ref = out_ref.detach().cpu().numpy()
+    if past_format == Formats.BNSH:
+        k_cache_ref = k_cache_ref.transpose(1, 2)
+        v_cache_ref = v_cache_ref.transpose(1, 2)
+
+    # Flash function
+    out, present_k, present_v = gqa_past_func(
+        q, k, v, config, new_k, new_v, cache_seqlens, past_format, True, left_window_size
+    )
+    out = torch.squeeze(out, 0)
+    out = torch.reshape(out, (config.batch_size, config.sequence_length, config.num_heads, config.head_size))
+    out = out.detach().cpu().numpy()
+
+    # Make sure past-present buffer updating correctly
+    assert numpy.allclose(present_k, k_cache_ref.detach().cpu().numpy(), rtol=rtol, atol=atol, equal_nan=True)
+    assert numpy.allclose(present_v, v_cache_ref.detach().cpu().numpy(), rtol=rtol, atol=atol, equal_nan=True)
+
+    # Compare results
+    print(
+        "KV-buffer",
+        "past kv format:",
+        "BSNH" if past_format == Formats.BSNH else "BNSH",
+        " causal:",
+        causal,
+        " local:",
+        local,
+        " B:",
+        config.batch_size,
+        " S:",
+        config.sequence_length,
+        " kv S:",
+        config.kv_sequence_length,
+        " N:",
+        config.num_heads,
+        " kv N:",
+        config.kv_num_heads,
+        " h:",
+        config.head_size,
+        " Mean Error:",
+        numpy.mean(numpy.abs(out - out_ref)),
+        numpy.allclose(
+            out,
+            out_ref,
+            rtol=rtol,
+            atol=atol,
+            equal_nan=True,
+        ),
+    )
+
+
+def parity_check_gqa_past_no_buff(
+    config,
+    causal=False,
+    local=False,
+    past_format=Formats.BSNH,
+    rtol=1e-3,
+    atol=1e-3,
+):
+    torch.manual_seed(69)
+    q = torch.randn(
+        config.batch_size,
+        config.sequence_length,
+        config.num_heads,
+        config.head_size,
+        device="cuda",
+        dtype=torch.float16,
+        requires_grad=False,
+    )
+    k = torch.randn(
+        config.batch_size,
+        config.kv_sequence_length if past_format == Formats.BSNH else config.kv_num_heads,
+        config.kv_num_heads if past_format == Formats.BSNH else config.kv_sequence_length,
+        config.head_size,
+        device="cuda",
+        dtype=torch.float16,
+        requires_grad=False,
+    )
+    v = torch.randn(
+        config.batch_size,
+        config.kv_sequence_length if past_format == Formats.BSNH else config.kv_num_heads,
+        config.kv_num_heads if past_format == Formats.BSNH else config.kv_sequence_length,
+        config.head_size,
+        device="cuda",
+        dtype=torch.float16,
+        requires_grad=False,
+    )
+    new_k = torch.randn(
+        config.batch_size,
+        config.sequence_length,
+        config.kv_num_heads,
+        config.head_size,
+        device="cuda",
+        dtype=torch.float16,
+        requires_grad=False,
+    )
+    new_v = torch.randn(
+        config.batch_size,
+        config.sequence_length,
+        config.kv_num_heads,
+        config.head_size,
+        device="cuda",
+        dtype=torch.float16,
+        requires_grad=False,
+    )
+
+    window_size = (-1, -1)
+    left_window_size = -1
+    if local:
+        left_window_size = random.randint(0, config.kv_sequence_length)
+        window_size = (left_window_size, 0)
+    elif causal:
+        left_window_size = -1
+        window_size = (-1, 0)
+
+    # Pytorch to compare
+    k_cache_ref = k.clone()
+    v_cache_ref = v.clone()
+    if past_format == Formats.BNSH:
+        k_cache_ref = k_cache_ref.transpose(1, 2)
+        v_cache_ref = v_cache_ref.transpose(1, 2)
+    k_cache_ref = torch.cat((k_cache_ref, new_k), 1)
+    v_cache_ref = torch.cat((v_cache_ref, new_v), 1)
+    # cache_seqlens = torch.tensor([config.past_sequence_length], device="cuda").repeat(config.batch_size)
+    cache_seqlens = torch.randint(
+        0,
+        config.kv_sequence_length,
+        (config.batch_size,),
+        dtype=torch.int32,
+        device="cuda",
+    )
+    cache_seqlens[random.randint(0, config.batch_size - 1)] = config.kv_sequence_length
+    arange = rearrange(torch.arange(config.kv_sequence_length + config.sequence_length, device="cuda"), "s -> 1 s")
+    cache_seqlens_expanded = rearrange(cache_seqlens, "b -> b 1")
+    update_mask = torch.logical_and(
+        cache_seqlens_expanded <= arange, arange < cache_seqlens_expanded + config.sequence_length
+    )
+    k_cache_ref[update_mask] = rearrange(new_k, "b s ... -> (b s) ...")
+    v_cache_ref[update_mask] = rearrange(new_v, "b s ... -> (b s) ...")
+    k_cache_rep = repeat(k_cache_ref, "b s h d -> b s (h g) d", g=config.num_heads // config.kv_num_heads)
+    v_cache_rep = repeat(v_cache_ref, "b s h d -> b s (h g) d", g=config.num_heads // config.kv_num_heads)
+    key_padding_mask = arange < cache_seqlens_expanded + config.sequence_length
+    out_ref, _ = attention_ref(
+        q, k_cache_rep, v_cache_rep, None, key_padding_mask, 0.0, None, causal=True, window_size=window_size
+    )
+    out_ref = out_ref.detach().cpu().numpy()
+    if past_format == Formats.BNSH:
+        k_cache_ref = k_cache_ref.transpose(1, 2)
+        v_cache_ref = v_cache_ref.transpose(1, 2)
+
+    # Flash function
+    out, present_k, present_v = gqa_past_func(
+        q, k, v, config, new_k, new_v, cache_seqlens, past_format, False, window_size=left_window_size
+    )
+    out = torch.squeeze(out, 0)
+    out = torch.reshape(out, (config.batch_size, config.sequence_length, config.num_heads, config.head_size))
+    out = out.detach().cpu().numpy()
+
+    # Make sure past-present buffer updating correctly
+    # assert numpy.allclose(
+    #     present_k[:, :, :-1, :], k_cache_ref.detach().cpu().numpy()[:, :, :-1, :], rtol=rtol, atol=atol, equal_nan=True
+    # )
+    # assert numpy.allclose(
+    #     present_v[:, :, :-1, :], v_cache_ref.detach().cpu().numpy()[:, :, :-1, :], rtol=rtol, atol=atol, equal_nan=True
+    # )
+
+    # Compare results
+    print(
+        "NO buff",
+        " causal:",
+        causal,
+        " local:",
+        local,
+        "past kv format:",
+        "BSNH" if past_format == Formats.BSNH else "BNSH",
+        " B:",
+        config.batch_size,
+        " S:",
+        config.sequence_length,
+        " kv S:",
+        config.kv_sequence_length,
+        " N:",
+        config.num_heads,
+        " kv N:",
+        config.kv_num_heads,
+        " h:",
+        config.head_size,
+        " Mean Error:",
+        numpy.mean(numpy.abs(out - out_ref)),
+        numpy.allclose(
+            out,
+            out_ref,
+            rtol=rtol,
+            atol=atol,
+            equal_nan=True,
+        ),
+    )
+
+
+class TestMHA(unittest.TestCase):
+    def test_packed_mha(self):
+        if not torch.cuda.is_available() or platform.system() != "Linux":
+            return
+        major, _ = torch.cuda.get_device_capability()
+        if major < 8:
+            return
+        print("-------- TEST PACKED MHA ---------")
+        batches = [2] if pipeline_mode else [1, 5]
+        seqs = [8, 97, 256, 1024] if pipeline_mode else [97, 128, 200, 256, 257, 384, 512, 768, 1024, 1025, 2048]
+        num_h = [1, 3] if pipeline_mode else [1, 6, 16]
+        h_sizes = [16, 256] if pipeline_mode else [32, 40, 64, 80, 96, 128, 160, 192, 224, 256]
+        for b in batches:
+            for s in seqs:
+                for n in num_h:
+                    for h in h_sizes:
+                        config = Config(b, s, s, 0, n, n, h)
+                        parity_check_mha(config, True)
+
+    def test_mha(self):
+        if not torch.cuda.is_available() or platform.system() != "Linux":
+            return
+        major, _ = torch.cuda.get_device_capability()
+        if major < 8:
+            return
+        print("-------- TEST MHA ---------")
+        batches = [2] if pipeline_mode else [1, 5]
+        seqs = (
+            [(1, 128), (113, 211), (2048, 2048)]
+            if pipeline_mode
+            else [
+                (113, 203),
+                (128, 217),
+                (113, 211),
+                (108, 256),
+                (256, 512),
+                (512, 256),
+                (1024, 1024),
+                (1023, 1024),
+                (1024, 1023),
+                (2048, 2048),
+            ]
+        )
+        num_h = [1, 3] if pipeline_mode else [1, 6, 16]
+        h_sizes = [16, 256] if pipeline_mode else [32, 40, 64, 80, 96, 128, 160, 192, 224, 256]
+        for b in batches:
+            for s, s2 in seqs:
+                for n in num_h:
+                    for h in h_sizes:
+                        config = Config(b, s, s2, 0, n, n, h)
+                        parity_check_mha(config, False)
+
+
+class TestGQA(unittest.TestCase):
+    def test_gqa_no_past(self):
+        if not torch.cuda.is_available():
+            return
+        major, minor = torch.cuda.get_device_capability()
+        torch.manual_seed(69)
+        print("-------- TEST GQA NO PAST (PROMPT CASE) ---------")
+        batches = [3] if pipeline_mode else [1, 3, 5]
+        seqs = (
+            [
+                (127, 127),
+                (35, 35),
+                (2000, 2000),
+                (200, 200),
+                (240, 240),
+            ]
+            if pipeline_mode
+            else [
+                (127, 127),
+                (35, 35),
+                (2000, 2000),
+                (200, 200),
+                (240, 240),
+            ]
+        )
+        num_h = [(32, 32), (9, 3), (4, 4)] if pipeline_mode else [(6, 6), (6, 3), (9, 9), (9, 3)]
+        h_sizes = [16, 128, 256] if pipeline_mode else [32, 40, 64, 80, 96, 128, 160, 192, 224, 256]
+        if major < 5 or (major == 5 and minor < 3):
+            return
+        print("------- MEMORY EFFICIENT ATTENTION (PROMPT CASE) ---------")
+        os.environ["ORT_DISABLE_FLASH_ATTENTION"] = "1"
+        for b in batches:
+            for sq, skv in seqs:
+                for n, n2 in num_h:
+                    for h in h_sizes:
+                        for past_kv_format in [Formats.BNSH]:
+                            config = PromptConfig(b, sq, skv, sq + skv + 8, n, n2, h)
+                            parity_check_gqa_prompt(config, past_format=past_kv_format)
+                            parity_check_gqa_prompt_no_buff(config, past_format=past_kv_format)
+        if major < 8 or platform.system() != "Linux":
+            return
+        print("------- FLASH ATTENTION (PROMPT CASE) --------")
+        os.environ["ORT_DISABLE_FLASH_ATTENTION"] = "0"
+        for b in batches:
+            for sq, skv in seqs:
+                for n, n2 in num_h:
+                    for h in h_sizes:
+                        for local in [False, True]:
+                            for past_kv_format in [Formats.BNSH]:
+                                config = PromptConfig(b, sq, skv, sq + skv + 8, n, n2, h)
+                                parity_check_gqa_prompt(config, local=local, past_format=past_kv_format)
+                                parity_check_gqa_prompt_no_buff(config, local=local, past_format=past_kv_format)
+
+    def test_gqa_past(self):
+        if not torch.cuda.is_available():
+            return
+        major, minor = torch.cuda.get_device_capability()
+        if major < 5 or (major == 5 and minor < 3):
+            return
+        os.environ["ORT_DISABLE_FLASH_ATTENTION"] = "1"
+        print("-------- TEST GQA PAST (TOKEN GEN) ---------")
+        print("-------- MEMORY EFFICIENT (TOKEN GEN) --------")
+        batches = [5] if pipeline_mode else [1, 3, 5]
+        seqs = (
+            [(1, 128), (1, 1024), (1, 2048)]
+            if pipeline_mode
+            else [
+                (1, 128),
+                (1, 339),
+                (1, 1024),
+                (1, 5000),
+                (1, 800),
+                (1, 256),
+                (1, 799),
+                (1, 2048),
+                # (1, 128 * 512),
+                # (16, 128 * 512),
+                # (128, 128),
+            ]
+        )
+        num_h = [(32, 32), (9, 3), (4, 4)] if pipeline_mode else [(6, 6), (6, 3), (9, 9), (9, 3)]
+        h_sizes = [16, 128, 256] if pipeline_mode else [32, 40, 64, 80, 96, 128, 160, 192, 224, 256]
+        random.seed(69)
+        for b in batches:
+            for s, s2 in seqs:
+                for n, n2 in num_h:
+                    for h in h_sizes:
+                        for past_kv_format in [Formats.BNSH]:
+                            sp = random.randint(1, s2 - s) if s2 - s > 0 else 0
+                            config = Config(b, s, s2, sp, n, n2, h)
+                            parity_check_gqa_past(
+                                config,
+                                past_format=past_kv_format,
+                                rtol=1e-3,
+                                atol=1e-3,
+                            )
+                            parity_check_gqa_past_no_buff(
+                                config,
+                                past_format=past_kv_format,
+                                rtol=1e-3,
+                                atol=1e-3,
+                            )
+        if major < 8 or platform.system() != "Linux":
+            return
+        print("------- FLASH ATTENTION (TOKEN GEN) -------")
+        os.environ["ORT_DISABLE_FLASH_ATTENTION"] = "0"
+        for b in batches:
+            for s, s2 in seqs:
+                for n, n2 in num_h:
+                    for h in h_sizes:
+                        for local in [False, True]:
+                            for past_kv_format in [Formats.BNSH]:
+                                sp = random.randint(1, s2 - s) if s2 - s > 0 else 0
+                                config = Config(b, s, s2, sp, n, n2, h)
+                                parity_check_gqa_past(
+                                    config,
+                                    local=local,
+                                    past_format=past_kv_format,
+                                    rtol=1e-3,
+                                    atol=1e-3,
+                                )
+                                parity_check_gqa_past_no_buff(
+                                    config,
+                                    local=local,
+                                    past_format=past_kv_format,
+                                    rtol=1e-3,
+                                    atol=1e-3,
+                                )
+
+
 if __name__ == "__main__":
-    print("-------- TEST PACKED MHA ---------")
-    for b in [5]:
-        for s in [97, 128, 200, 256, 257, 384, 512, 768, 1024, 1025, 2048]:
-            for n in [6]:
-                for h in [32, 40, 59, 64, 80, 96, 111, 128, 160, 192, 224, 256]:
-                    config = Config(b, s, s, n, h)
-                    parity_check(config, True)
-    print("-------- TEST MHA ---------")
-    for b in [5]:
-        for s, s2 in [
-            (113, 203),
-            (128, 217),
-            (113, 211),
-            (108, 256),
-            (256, 512),
-            (512, 256),
-            (1024, 1024),
-            (1023, 1024),
-            (1024, 1023),
-            (2048, 2048),
-        ]:
-            for n in [6]:
-                for h in [32, 40, 59, 64, 80, 96, 111, 128, 160, 192, 224, 256]:
-                    config = Config(b, s, s2, n, h)
-                    parity_check(config, False)
+    unittest.main()
diff --git a/onnxruntime/test/python/transformers/test_generation.py b/onnxruntime/test/python/transformers/test_generation.py
index 55c51435823c6..c9db1fbc02931 100644
--- a/onnxruntime/test/python/transformers/test_generation.py
+++ b/onnxruntime/test/python/transformers/test_generation.py
@@ -378,6 +378,21 @@ def test_logits_processor(self):
         logits_processor = ["--use_logits_processor"]
         self.run_configs(logits_processor)
 
+    @pytest.mark.slow
+    def test_cross_qk_overall(self):
+        decoder_input_ids = [
+            "--chain_model",
+            "--collect_cross_qk",
+            "--output_cross_qk",
+            "--use_forced_decoder_ids",
+            "--extra_decoding_ids",
+            "--output_no_speech_probs",
+            "--use_vocab_mask",
+            "--use_prefix_vocab_mask",
+            "--use_logits_processor",
+        ]
+        self.run_configs(decoder_input_ids)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/onnxruntime/test/python/transformers/test_group_norm.py b/onnxruntime/test/python/transformers/test_group_norm.py
new file mode 100644
index 0000000000000..bf295a65c8b53
--- /dev/null
+++ b/onnxruntime/test/python/transformers/test_group_norm.py
@@ -0,0 +1,541 @@
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.  See License.txt in the project root for
+# license information.
+# -------------------------------------------------------------------------
+import statistics
+from dataclasses import dataclass
+from enum import Enum
+from time import perf_counter
+from typing import Optional, Tuple
+
+import numpy
+import torch
+from onnx import TensorProto, helper
+
+from onnxruntime import InferenceSession
+from onnxruntime.transformers.io_binding_helper import CudaSession
+
+torch.manual_seed(0)
+
+
+class GroupNormOpType(Enum):
+    GROUP_NORM = 1
+    SKIP_GROUP_NORM = 2
+
+
+@dataclass
+class GroupNormConfig:
+    batch_size: int
+    height: int
+    width: int
+    channels: int
+    epsilon: float = 1e-5
+    num_groups: int = 32
+    activation: bool = False
+    channels_last: bool = True
+    fp16: bool = False
+
+    op_type: GroupNormOpType = GroupNormOpType.GROUP_NORM
+    has_bias: bool = False
+    has_add_out: bool = False
+    broadcast_skip: int = 0  # 2 for (N, C), 4 for (N, 1, 1, C)
+
+    def get_skip_symbolic_shape(self):
+        skip_shape = {0: ["N", "H", "W", "C"], 2: ["N", "C"], 4: ["N", 1, 1, "C"]}
+        return skip_shape[self.broadcast_skip]
+
+    def get_skip_shape(self):
+        skip_shape = {
+            0: [self.batch_size, self.height, self.width, self.channels],
+            2: [self.batch_size, self.channels],
+            4: [self.batch_size, 1, 1, self.channels],
+        }
+        return skip_shape[self.broadcast_skip]
+
+    def broadcast(self, skip: torch.Tensor):
+        if self.broadcast_skip == 2:
+            return skip.reshape(self.batch_size, 1, 1, self.channels)
+
+        return skip
+
+    @staticmethod
+    def create(
+        b: int,
+        h: int,
+        w: int,
+        c: int,
+        fp16: bool = False,
+        activation: bool = False,
+        template: int = 0,
+        num_groups: int = 32,
+    ):
+        if template == 0:
+            return GroupNormConfig(
+                b, h, w, c, fp16=fp16, activation=activation, op_type=GroupNormOpType.GROUP_NORM, num_groups=num_groups
+            )
+
+        if template == 1:
+            return GroupNormConfig(
+                b,
+                h,
+                w,
+                c,
+                fp16=fp16,
+                activation=activation,
+                op_type=GroupNormOpType.SKIP_GROUP_NORM,
+                has_bias=True,
+                has_add_out=True,
+                broadcast_skip=0,
+                num_groups=num_groups,
+            )
+
+        if template == 2:
+            return GroupNormConfig(
+                b,
+                h,
+                w,
+                c,
+                fp16=fp16,
+                activation=activation,
+                op_type=GroupNormOpType.SKIP_GROUP_NORM,
+                has_bias=False,
+                has_add_out=False,
+                broadcast_skip=2,
+                num_groups=num_groups,
+            )
+
+        if template == 3:
+            return GroupNormConfig(
+                b,
+                h,
+                w,
+                c,
+                fp16=fp16,
+                activation=activation,
+                op_type=GroupNormOpType.SKIP_GROUP_NORM,
+                has_bias=True,
+                has_add_out=False,
+                broadcast_skip=4,
+                num_groups=num_groups,
+            )
+
+        if template == 4:  # No bias
+            return GroupNormConfig(
+                b,
+                h,
+                w,
+                c,
+                fp16=fp16,
+                activation=activation,
+                op_type=GroupNormOpType.SKIP_GROUP_NORM,
+                has_bias=False,
+                has_add_out=True,
+                broadcast_skip=0,
+                num_groups=num_groups,
+            )
+
+        if template == 5:  # No bias, no add_out
+            return GroupNormConfig(
+                b,
+                h,
+                w,
+                c,
+                fp16=fp16,
+                activation=activation,
+                op_type=GroupNormOpType.SKIP_GROUP_NORM,
+                has_bias=False,
+                has_add_out=False,
+                broadcast_skip=0,
+                num_groups=num_groups,
+            )
+
+        return None
+
+
+def create_group_norm_graph(config: GroupNormConfig) -> bytes:
+    inputs = ["input", "gamma", "beta"]
+    outputs = ["output"]
+    op_type = "GroupNorm"
+    if config.op_type == GroupNormOpType.SKIP_GROUP_NORM:
+        op_type = "SkipGroupNorm"
+        inputs = [*inputs, "skip"]
+        if config.has_bias:
+            inputs = [*inputs, "bias"]
+        if config.has_add_out:
+            outputs = [*outputs, "add_out"]
+
+    nodes = [
+        helper.make_node(
+            op_type,
+            inputs,
+            outputs,
+            op_type + "_0",
+            activation=int(config.activation),
+            channels_last=int(config.channels_last),
+            epsilon=config.epsilon,
+            groups=config.num_groups,
+            domain="com.microsoft",
+        ),
+    ]
+
+    float_type = TensorProto.FLOAT16 if config.fp16 else TensorProto.FLOAT
+
+    input_shapes = [
+        helper.make_tensor_value_info("input", float_type, ["N", "H", "W", "C"]),
+        helper.make_tensor_value_info("gamma", TensorProto.FLOAT, ["C"]),
+        helper.make_tensor_value_info("beta", TensorProto.FLOAT, ["C"]),
+    ]
+    output_shapes = [
+        helper.make_tensor_value_info("output", float_type, ["N", "H", "W", "C"]),
+    ]
+
+    if config.op_type == GroupNormOpType.SKIP_GROUP_NORM:
+        input_shapes = [
+            *input_shapes,
+            helper.make_tensor_value_info("skip", float_type, config.get_skip_symbolic_shape()),
+        ]
+        if config.has_bias:
+            input_shapes = [*input_shapes, helper.make_tensor_value_info("bias", float_type, ["C"])]
+        if config.has_add_out:
+            output_shapes = [*output_shapes, helper.make_tensor_value_info("add_out", float_type, ["N", "H", "W", "C"])]
+
+    graph = helper.make_graph(
+        nodes,
+        "Group_Norm_Graph",
+        input_shapes,
+        output_shapes,
+    )
+
+    model = helper.make_model(graph)
+    return model.SerializeToString()
+
+
+def group_norm_ort(
+    src: torch.Tensor,
+    gamma: torch.Tensor,
+    beta: torch.Tensor,
+    skip: Optional[torch.Tensor],
+    bias: Optional[torch.Tensor],
+    config: GroupNormConfig,
+    measure_latency=False,
+) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[float]]:
+    onnx_model_str = create_group_norm_graph(config)
+    ort_session = InferenceSession(onnx_model_str, providers=["CUDAExecutionProvider"])
+
+    session = CudaSession(ort_session, device=torch.device("cuda:0"))
+
+    io_shape = {
+        "input": [config.batch_size, config.height, config.width, config.channels],
+        "gamma": [config.channels],
+        "beta": [config.channels],
+        "output": [config.batch_size, config.height, config.width, config.channels],
+    }
+
+    if config.op_type == GroupNormOpType.SKIP_GROUP_NORM:
+        io_shape["skip"] = config.get_skip_shape()
+        if config.has_bias:
+            io_shape["bias"] = [config.channels]
+        if config.has_add_out:
+            io_shape["add_out"] = [config.batch_size, config.height, config.width, config.channels]
+
+    session.allocate_buffers(io_shape)
+
+    ort_inputs = {
+        "input": src,
+        "gamma": gamma,
+        "beta": beta,
+    }
+
+    if config.op_type == GroupNormOpType.SKIP_GROUP_NORM:
+        ort_inputs["skip"] = skip
+        if config.has_bias:
+            ort_inputs["bias"] = bias
+
+    ort_outputs = session.infer(ort_inputs)
+    output = ort_outputs["output"]
+
+    add_out = (
+        ort_outputs["add_out"] if config.op_type == GroupNormOpType.SKIP_GROUP_NORM and config.has_add_out else None
+    )
+
+    if measure_latency:
+        latency_list = []
+        for _ in range(10000):
+            start_time = perf_counter()
+            session.infer(ort_inputs)
+            end_time = perf_counter()
+            latency_list.append(end_time - start_time)
+        average_latency = statistics.mean(latency_list)
+        return output, add_out, average_latency
+
+    return output, add_out, None
+
+
+def group_norm_torch(
+    src: torch.Tensor,
+    gamma: torch.Tensor,
+    beta: torch.Tensor,
+    skip: Optional[torch.Tensor],
+    bias: Optional[torch.Tensor],
+    config: GroupNormConfig,
+) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
+    add_out = src
+
+    if skip is not None:
+        assert config.op_type == GroupNormOpType.SKIP_GROUP_NORM
+        add_out = add_out + config.broadcast(skip)
+
+    if bias is not None:
+        assert config.op_type == GroupNormOpType.SKIP_GROUP_NORM
+        add_out = add_out + bias.reshape(1, 1, 1, bias.shape[0])
+
+    x = add_out
+    if config.channels_last:
+        x = add_out.clone().permute(0, 3, 1, 2)  # from NHWC to NCHW
+
+    weight = gamma.to(x.dtype)
+    bias = beta.to(x.dtype)
+    output = torch.nn.functional.group_norm(x, config.num_groups, weight=weight, bias=bias, eps=config.epsilon)
+
+    if config.activation:
+        torch.nn.functional.silu(output, inplace=True)
+
+    if config.channels_last:
+        output = output.permute(0, 2, 3, 1)  # from NCHW to NHWC
+
+    return output, add_out
+
+
+def print_tensor(name, tensor):
+    # Print in the format that could be directly added to unit tests in C++.
+    torch.set_printoptions(precision=6, sci_mode=False, linewidth=100, profile="full", threshold=1000)
+    print(name)
+    if tensor is not None:
+        print("shape", tensor.shape)
+        text = str(tensor.clone().flatten())
+        print(text.replace("[", "[\n").replace("]", ",\n]").replace(",", "f,"))
+    else:
+        print(tensor)
+
+
+def run_parity(config, measure_latency=True, verbose=False):
+    float_type = torch.float16 if config.fp16 else torch.float32
+
+    input_tensor = torch.randn(
+        config.batch_size,
+        config.height,
+        config.width,
+        config.channels,
+        device="cuda",
+        dtype=float_type,
+        requires_grad=False,
+    )
+
+    gamma = torch.randn(
+        config.channels,
+        device="cuda",
+        dtype=torch.float32,
+        requires_grad=False,
+    )
+
+    beta = torch.randn(
+        config.channels,
+        device="cuda",
+        dtype=torch.float32,
+        requires_grad=False,
+    )
+
+    skip = None
+    bias = None
+    if config.op_type == GroupNormOpType.SKIP_GROUP_NORM:
+        skip = torch.randn(
+            *config.get_skip_shape(),
+            device="cuda",
+            dtype=float_type,
+            requires_grad=False,
+        )
+        if config.has_bias:
+            bias = torch.randn(
+                config.channels,
+                device="cuda",
+                dtype=float_type,
+                requires_grad=False,
+            )
+
+    if verbose:
+        print(config)
+        print_tensor("input", input_tensor)
+        print_tensor("gamma", gamma)
+        print_tensor("beta", beta)
+        print_tensor("skip", skip)
+        print_tensor("bias", bias)
+
+    out_ort, ort_add_out, latency = group_norm_ort(
+        input_tensor, gamma, beta, skip, bias, config, measure_latency=measure_latency
+    )
+
+    if verbose:
+        print_tensor("out_ort", out_ort)
+        print_tensor("ort_add_out", ort_add_out)
+
+    torch_out, torch_add_out = group_norm_torch(input_tensor, gamma, beta, skip, bias, config)
+
+    if verbose:
+        print_tensor("torch_out", torch_out)
+        print_tensor("torch_add_out", torch_add_out)
+
+    average_diff = numpy.mean(numpy.abs(out_ort.detach().cpu().numpy() - torch_out.detach().cpu().numpy()))
+
+    is_close = numpy.allclose(
+        out_ort.detach().cpu().numpy(),
+        torch_out.detach().cpu().numpy(),
+        rtol=1e-1 if config.fp16 else 1e-3,
+        atol=1e-1 if config.fp16 else 1e-3,
+        equal_nan=True,
+    )
+
+    is_add_out_close = (
+        numpy.allclose(
+            ort_add_out.detach().cpu().numpy(),
+            torch_add_out.detach().cpu().numpy(),
+            rtol=1e-1 if config.fp16 else 1e-3,
+            atol=1e-1 if config.fp16 else 1e-3,
+            equal_nan=True,
+        )
+        if ort_add_out is not None
+        else ""
+    )
+
+    # Compare results
+    print(
+        config.op_type.name,
+        " B:",
+        config.batch_size,
+        " H:",
+        config.height,
+        " W:",
+        config.width,
+        " C:",
+        config.channels,
+        " G:",
+        config.num_groups,
+        " activation:",
+        int(config.activation),
+        " channels_last:",
+        int(config.channels_last),
+        " fp16:",
+        int(config.fp16),
+        f" Latency(μs): {int(latency * 1e6)}" if isinstance(latency, float) else "",
+        " AvgDiff:",
+        average_diff,
+        " Pass:",
+        is_close,
+        is_add_out_close,
+    )
+
+
+def get_latent_height_width():
+    default_size = [(512, 512), (768, 768), (1024, 1024)]
+    small_img_size = [(512, 768), (768, 512)]
+    xl_img_size = [
+        (1152, 896),
+        (896, 1152),
+        (1216, 832),
+        (832, 1216),
+        (1344, 768),
+        (768, 1344),
+        (1536, 640),
+        (640, 1536),
+    ]
+    return [(int(h / 8), int(w / 8)) for (h, w) in default_size + small_img_size + xl_img_size]
+
+
+def get_channels():
+    return [128, 256, 512, 1024, 2048, 320, 640, 960, 1920, 2560, 384, 768, 1536, 3072, 1152, 2304]
+
+
+def run_activation(template: int, fp16, measure_latency=False):
+    print("Test GroupNorm with Silu Activation for ", "fp16" if fp16 else "fp32")
+    for b in [2]:
+        for h, w in get_latent_height_width():
+            for c in get_channels():
+                config = GroupNormConfig.create(b, h, w, c, fp16=fp16, activation=True, template=template)
+                run_parity(config, measure_latency=measure_latency)
+
+
+def run_no_activation(template: int, fp16, measure_latency=False):
+    print("Test GroupNorm without Activation for ", "fp16" if fp16 else "fp32")
+    for b in [1, 2, 4]:
+        for h, w in get_latent_height_width():
+            for c in get_channels():
+                config = GroupNormConfig.create(b, h, w, c, fp16=fp16, template=template)
+                run_parity(config, measure_latency=measure_latency)
+
+
+def run_all_groups(template: int, fp16, measure_latency=False):
+    group_sizes = [1, 2, 4, 8, 16, 32]
+    print("Test GroupNorm for different group sizes:", group_sizes)
+    for group_size in group_sizes:
+        for h, w in get_latent_height_width()[:3]:
+            for c in get_channels()[:2]:
+                config = GroupNormConfig.create(2, h, w, c, fp16=fp16, num_groups=group_size, template=template)
+                run_parity(config, measure_latency=measure_latency)
+
+
+def run_odd_channels(template: int, fp16, measure_latency=False):
+    # Test some random number of channels that can be divisible by 2 * num_groups
+    for h, w in get_latent_height_width():
+        for c in [448, 704, 832, 1664, 2240, 2688, 2880, 3008]:
+            config = GroupNormConfig.create(2, h, w, c, fp16=fp16, num_groups=32, template=template)
+            run_parity(config, measure_latency=measure_latency)
+
+
+def run_small_inputs(template: int, fp16):
+    config = GroupNormConfig.create(2, 2, 2, 16, fp16=fp16, activation=False, num_groups=4, template=template)
+    run_parity(config, measure_latency=False)
+
+    config = GroupNormConfig.create(1, 1, 1, 64, fp16=fp16, activation=False, num_groups=8, template=template)
+    run_parity(config, measure_latency=False)
+
+    config = GroupNormConfig.create(1, 1, 1, 64, fp16=fp16, activation=True, num_groups=8, template=template)
+    run_parity(config, measure_latency=False)
+
+
+def run_performance(fp16):
+    # Run perf test to tune parameters for given number of channels.
+    for h, w in get_latent_height_width()[:3]:
+        for c in get_channels():
+            config = GroupNormConfig.create(2, h, w, c, fp16=fp16, num_groups=32, template=0)
+            run_parity(config, measure_latency=True)
+
+
+def run_all(template: int):
+    for fp16 in [True, False]:
+        run_small_inputs(template, fp16)
+        run_odd_channels(template, fp16)
+        run_all_groups(template, fp16)
+        run_activation(template, fp16)
+        run_no_activation(template, fp16)
+
+
+def run_not_implemented():
+    # Expect failure. Check whether the error message is expected.
+    try:
+        config = GroupNormConfig(1, 2, 2, 513, num_groups=3)
+        run_parity(config)
+    except RuntimeError as e:
+        assert "GroupNorm in CUDA does not support the input: n=1 h=2 w=2 c=513 groups=3" in str(e)
+
+
+def main():
+    run_performance(True)
+
+    run_not_implemented()
+
+    for template in range(6):
+        run_all(template)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/onnxruntime/test/python/transformers/test_optimizer.py b/onnxruntime/test/python/transformers/test_optimizer.py
index eedadfd8d4448..c7db636a2f11f 100644
--- a/onnxruntime/test/python/transformers/test_optimizer.py
+++ b/onnxruntime/test/python/transformers/test_optimizer.py
@@ -122,7 +122,7 @@ def _test_optimizer_on_huggingface_model(
             "SkipLayerNormalization": expected_fusion_result_list[6],
         }
 
-        for _onnx_path, value in model_fusion_statistics.items():
+        for value in model_fusion_statistics.values():
             actual_node_count = value
 
         for op_type, count in expected_node_count.items():
@@ -354,7 +354,7 @@ def _test_optimizer_on_tf_model(self, model_name, expected_fusion_result_list, i
                 fusion_options,
             )
 
-        onnx_model = list(model_fusion_statistics.keys())[0]
+        onnx_model = next(iter(model_fusion_statistics.keys()))
         fusion_result_list = list(model_fusion_statistics[onnx_model].values())
 
         if validate_model:
diff --git a/onnxruntime/test/python/transformers/test_parity_moe.py b/onnxruntime/test/python/transformers/test_parity_moe.py
new file mode 100644
index 0000000000000..72ca5d9975c05
--- /dev/null
+++ b/onnxruntime/test/python/transformers/test_parity_moe.py
@@ -0,0 +1,431 @@
+# --------------------------------------------------------------------------
+# Copyright 2020 The HuggingFace Inc. team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.  See License.txt in the project root for
+# license information.
+# -------------------------------------------------------------------------
+
+import time
+import unittest
+
+import numpy
+import pytest
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from onnx import TensorProto, helper
+
+import onnxruntime
+
+torch.manual_seed(42)
+numpy.random.seed(42)
+
+
+ORT_DTYPE = TensorProto.FLOAT16
+NP_TYPE = numpy.float16 if ORT_DTYPE == TensorProto.FLOAT16 else numpy.float32
+THRESHOLD = 3e-2
+
+
+def value_string_of(numpy_array):
+    arr = numpy_array.flatten()
+    lines = ["f, ".join([str(v) for v in arr[i : min(i + 8, arr.size)]]) for i in range(0, arr.size, 8)]
+    return "{\n    " + "f,\n    ".join(lines) + "f}"
+
+
+def print_tensor(name, numpy_array):
+    print(f"const std::vector<float> {name} = {value_string_of(numpy_array)};")
+
+
+def create_moe_onnx_graph(
+    num_rows,
+    num_experts,
+    hidden_size,
+    inter_size,
+    fc1_experts_weights,
+    fc2_experts_weights,
+    fc1_experts_bias,
+    fc2_experts_bias,
+):
+    nodes = [
+        helper.make_node(
+            "MoE",
+            [
+                "input",
+                "router_probs",
+                "fc1_experts_weights",
+                "fc2_experts_weights",
+                "fc1_experts_bias",
+                "fc2_experts_bias",
+            ],
+            ["output"],
+            "MoE_0",
+            k=1,
+            activation_type="gelu",
+            domain="com.microsoft",
+        ),
+    ]
+
+    fc1_shape = [num_experts, hidden_size, inter_size]
+    fc2_shape = [num_experts, inter_size, hidden_size]
+
+    torch_type = torch.float16 if ORT_DTYPE == TensorProto.FLOAT16 else torch.float32
+
+    initializers = [
+        helper.make_tensor(
+            "fc1_experts_weights",
+            ORT_DTYPE,
+            fc1_shape,
+            fc1_experts_weights.to(torch_type).flatten().tolist(),
+            raw=False,
+        ),
+        helper.make_tensor(
+            "fc2_experts_weights",
+            ORT_DTYPE,
+            fc2_shape,
+            fc2_experts_weights.to(torch_type).flatten().tolist(),
+            raw=False,
+        ),
+    ]
+
+    fc1_bias_shape = [num_experts, inter_size]
+    fc2_bias_shape = [num_experts, hidden_size]
+    initializers.extend(
+        [
+            helper.make_tensor(
+                "fc1_experts_bias",
+                ORT_DTYPE,
+                fc1_bias_shape,
+                fc1_experts_bias.to(torch_type).flatten().tolist(),
+                raw=False,
+            ),
+            helper.make_tensor(
+                "fc2_experts_bias",
+                ORT_DTYPE,
+                fc2_bias_shape,
+                fc2_experts_bias.to(torch_type).flatten().tolist(),
+                raw=False,
+            ),
+        ]
+    )
+
+    graph_inputs = [
+        helper.make_tensor_value_info("input", ORT_DTYPE, [num_rows, hidden_size]),
+    ]
+
+    graph_inputs.append(
+        helper.make_tensor_value_info(
+            "router_probs",
+            ORT_DTYPE,
+            [num_rows, num_experts],
+        )
+    )
+
+    graph_outputs = [
+        helper.make_tensor_value_info("output", ORT_DTYPE, [num_rows, hidden_size]),
+    ]
+
+    graph = helper.make_graph(
+        nodes,
+        "MoE_Graph",
+        graph_inputs,
+        graph_outputs,
+        initializers,
+    )
+
+    model = helper.make_model(graph)
+    return model.SerializeToString()
+
+
+def get_activation_fn(activation):
+    if activation == "relu":
+        return nn.ReLU
+    elif activation == "gelu":
+        return nn.GELU
+    else:
+        raise NotImplementedError
+
+
+class MoEGate(nn.Module):
+    def __init__(self, num_experts, in_features):
+        super().__init__()
+        self.wg_reduction = torch.nn.Linear(in_features, 16, bias=False)
+
+        wg = torch.empty(num_experts, 16)
+        torch.nn.init.orthogonal_(wg, gain=0.32)
+        self.register_parameter("wg", torch.nn.Parameter(wg))
+
+    def forward(self, input):
+        input = self.wg_reduction(input)
+        with torch.no_grad():
+            wg_norm = self.wg.norm(p=2.0, dim=1, keepdim=True)
+            self.wg.mul_(1.5 / wg_norm)
+        logits = self._cosine(input, self.wg)
+        return logits
+
+    def _cosine(self, mat1, mat2, eps=1e-4):
+        assert mat1.dim() == 2
+        assert mat2.dim() == 2
+
+        mat2 = F.normalize(mat2.float(), p=2.0, dim=1, eps=eps)
+        return mat1.float().matmul(mat2.transpose(0, 1)).type_as(mat1)
+
+
+class MoERuntimeExperts(nn.Module):
+    def __init__(
+        self,
+        num_experts,
+        in_features,
+        hidden_features=None,
+        out_features=None,
+        act_layer=nn.GELU,
+        drop=0.0,
+        bias=True,
+        chunk_size=-1,
+    ):
+        super().__init__()
+        # assert bias is False, "Current bias is not supported"
+        assert drop == 0.0, "Current drop is not supported"
+        assert chunk_size == -1, "Current chunk is not supported"
+
+        self.weight1 = nn.Parameter(torch.rand(num_experts, in_features, hidden_features))
+        self.weight2 = nn.Parameter(torch.rand(num_experts, hidden_features, out_features))
+
+        self.bias1 = nn.Parameter(torch.rand(num_experts, hidden_features)) if bias else None
+        self.bias2 = nn.Parameter(torch.rand(num_experts, in_features)) if bias else None
+
+        self.act = act_layer()
+
+    def forward(self, x, indices_s):
+        x = x.unsqueeze(1)
+        x = self.bmm(x, self.weight1, indices_s)
+        if self.bias1 is not None:
+            x = x + self.bias1[indices_s].unsqueeze(1)  # S x hidden_features
+        x = self.act(x)
+        x = self.bmm(x, self.weight2, indices_s)
+        if self.bias2 is not None:
+            x = x + self.bias2[indices_s].unsqueeze(1)  # S x 1 x in_features
+        return x
+
+    def bmm(self, x, weight, indices_s):
+        x = torch.bmm(x, weight[indices_s])  # S x 1 x hidden_features
+        return x
+
+
+class MoE(nn.Module):
+    def __init__(
+        self,
+        batch_size,
+        num_rows,
+        num_experts,
+        in_features,
+        hidden_features=None,
+        out_features=None,
+        eval_capacity=-1,
+        activation="gelu",
+    ):
+        super().__init__()
+        self.num_experts = num_experts
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.eval_capacity = eval_capacity  # -1 means we route all tokens
+
+        self.gate = MoEGate(num_experts=num_experts, in_features=in_features)
+        self.moe_experts = MoERuntimeExperts(
+            num_experts=num_experts,
+            in_features=in_features,
+            hidden_features=hidden_features,
+            out_features=out_features,
+            act_layer=get_activation_fn(activation),
+            bias=True,
+        )
+
+        self.moe_onnx_graph = create_moe_onnx_graph(
+            batch_size * num_rows,
+            num_experts,
+            in_features,
+            hidden_features,
+            self.moe_experts.weight1,
+            self.moe_experts.weight2,
+            self.moe_experts.bias1,
+            self.moe_experts.bias2,
+        )
+
+        self.ort_sess = self.create_ort_session()
+
+        self.torch_input = torch.randn(batch_size, num_rows, in_features)
+
+    def create_ort_session(self):
+        from onnxruntime import InferenceSession, SessionOptions
+
+        sess_options = SessionOptions()
+
+        cuda_providers = ["CUDAExecutionProvider"]
+        if cuda_providers[0] not in onnxruntime.get_available_providers():
+            return None
+
+        sess_options.log_severity_level = 2
+        ort_session = InferenceSession(self.moe_onnx_graph, sess_options, providers=["CUDAExecutionProvider"])
+
+        return ort_session
+
+    def ort_run_with_iobinding(self, ort_inputs, repeat=1000):
+        iobinding = self.ort_sess.io_binding()
+        device_id = torch.cuda.current_device()
+
+        iobinding.bind_input(
+            name="input",
+            device_type="cuda",
+            device_id=device_id,
+            element_type=NP_TYPE,
+            shape=ort_inputs["input"].shape,
+            buffer_ptr=onnxruntime.OrtValue.ortvalue_from_numpy(ort_inputs["input"], "cuda", device_id).data_ptr(),
+        )
+        iobinding.bind_input(
+            name="router_probs",
+            device_type="cuda",
+            device_id=device_id,
+            element_type=NP_TYPE,
+            shape=ort_inputs["router_probs"].shape,
+            buffer_ptr=onnxruntime.OrtValue.ortvalue_from_numpy(
+                ort_inputs["router_probs"], "cuda", device_id
+            ).data_ptr(),
+        )
+
+        iobinding.synchronize_inputs()
+
+        iobinding.bind_output(
+            name="output",
+            device_type="cuda",
+            device_id=device_id,
+            element_type=NP_TYPE,
+            shape=ort_inputs["input"].shape,
+            buffer_ptr=onnxruntime.OrtValue.ortvalue_from_numpy(
+                numpy.zeros(ort_inputs["input"].shape), "cuda", device_id
+            ).data_ptr(),
+        )
+        iobinding.synchronize_outputs()
+
+        s = time.time()
+        for _ in range(repeat):
+            self.ort_sess.run_with_iobinding(iobinding)
+        e = time.time()
+        print(f"MoE cuda kernel time: {(e - s) / repeat * 1000} ms")
+
+    def torch_forward(self):
+        x = self.torch_input
+
+        b, t, c = x.shape
+        x = x.reshape(-1, c)
+        logits = self.gate(x)
+        gates = torch.nn.functional.softmax(logits, dim=1)
+        ret = torch.max(gates, dim=1)
+        indices_s = ret.indices  # dim: [bs], the index of the expert with highest softmax value
+        scores = ret.values.unsqueeze(-1).unsqueeze(-1)  # S
+        x = self.moe_experts(x, indices_s)
+
+        x = x * scores
+        x = x.reshape(b * t, c)
+
+        return x, torch.sum(x)
+
+    def onnx_forward(self, iobinding=False):
+        x = self.torch_input
+
+        _, _, c = x.shape
+        y = x.reshape(-1, c)
+        logits = self.gate(y)
+
+        ort_inputs = {
+            "input": numpy.ascontiguousarray(y.detach().numpy().astype(NP_TYPE)),
+            "router_probs": numpy.ascontiguousarray(logits.detach().numpy().astype(NP_TYPE)),
+        }
+
+        ort_output = None
+        if self.ort_sess is not None:
+            if not iobinding:
+                ort_output = self.ort_sess.run(None, ort_inputs)
+            else:
+                self.ort_run_with_iobinding(ort_inputs)
+                return None
+
+        # print_tensor("input", ort_inputs["input"])
+        # print_tensor("router_probs", ort_inputs["router_probs"])
+        # print_tensor("fc1_experts_weights", self.moe_experts.weight1.detach().numpy())
+        # print_tensor("fc2_experts_weights", self.moe_experts.weight2.detach().numpy())
+        # print_tensor("fc1_experts_bias", self.moe_experts.bias1.detach().numpy())
+        # print_tensor("fc2_experts_bias", self.moe_experts.bias2.detach().numpy())
+        # print_tensor("output", ort_output[0])
+
+        return ort_output
+
+    def parity_check(self):
+        torch_out = self.torch_forward()
+        ort_out = self.onnx_forward()
+        if ort_out is not None:
+            # print("max diff", numpy.max(numpy.abs(torch_out[0].detach().numpy() - ort_out[0])))
+            assert numpy.allclose(torch_out[0].detach().numpy(), ort_out[0], rtol=THRESHOLD, atol=THRESHOLD)
+
+    def benchmark(self):
+        self.onnx_forward(iobinding=True)
+
+
+class TestMoE(unittest.TestCase):
+    def test_moe_small(self):
+        rt = MoE(
+            batch_size=2,
+            num_rows=8,
+            num_experts=4,
+            in_features=16,
+            hidden_features=32,
+            out_features=16,
+        )
+        rt.parity_check()
+
+    @pytest.mark.slow
+    def test_moe_large(self):
+        for batch_size in [1, 8]:
+            for num_rows in [16, 64]:
+                for num_experts in [16, 64]:
+                    for in_features in [256]:
+                        for hidden_features in [512]:
+                            print(
+                                f"batch_size={batch_size}, num_rows={num_rows}, num_experts={num_experts}, in_features={in_features}, hidden_features={hidden_features}"
+                            )
+                            rt = MoE(
+                                batch_size=batch_size,
+                                num_rows=num_rows,
+                                num_experts=num_experts,
+                                in_features=in_features,
+                                hidden_features=hidden_features,
+                                out_features=in_features,
+                            )
+                            rt.parity_check()
+
+    @pytest.mark.slow
+    def test_moe_benchmark(self):
+        for batch_size in [32, 64]:
+            for num_rows in [128, 512]:
+                for num_experts in [64, 128]:
+                    for in_features in [256, 512]:
+                        for hidden_features in [1024, 2048]:
+                            print(
+                                f"batch_size={batch_size}, num_rows={num_rows}, num_experts={num_experts}, in_features={in_features}, hidden_features={hidden_features}"
+                            )
+                            rt = MoE(
+                                batch_size=batch_size,
+                                num_rows=num_rows,
+                                num_experts=num_experts,
+                                in_features=in_features,
+                                hidden_features=hidden_features,
+                                out_features=in_features,
+                            )
+                            rt.benchmark()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/onnxruntime/test/python/transformers/test_parity_rotary_embedding.py b/onnxruntime/test/python/transformers/test_parity_rotary_embedding.py
new file mode 100644
index 0000000000000..cf8128e0eebcf
--- /dev/null
+++ b/onnxruntime/test/python/transformers/test_parity_rotary_embedding.py
@@ -0,0 +1,489 @@
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.  See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+
+# Notes
+# 1) The test cases in this file are for the following LLaMA-2 scenarios:
+# - Microsoft rotary embeddings with interleaved = True
+#   - Prompt generation
+#   - Token generation
+# - Hugging Face rotary embeddings (equal to Microsoft rotary embeddings with interleaved = False)
+#   - Prompt generation
+#   - Token generation
+#
+# 2) Shapes of position ids in ORT and `interleaved` for LLaMA-2 scenarios:
+# - Microsoft model: When shape of position ids == (1), interleaved = True
+# - Hugging Face model: When shape of position ids == (batch_size, sequence_length), interleaved = False
+
+
+import unittest
+from copy import deepcopy
+
+import numpy as np
+import torch
+import torch.nn as nn
+from onnx import TensorProto, helper
+
+import onnxruntime as ort
+
+
+class SampleInputConfig:
+    def __init__(
+        self,
+        batch_size=2,
+        sequence_length=8,
+        num_heads=4,
+        head_size=6,
+        max_sequence_length=16,
+    ):
+        self.batch_size = batch_size
+        self.sequence_length = sequence_length
+        self.num_heads = num_heads
+        self.head_size = head_size
+        self.hidden_size = self.num_heads * self.head_size
+        self.max_sequence_length = max_sequence_length
+
+
+# LLaMA Hugging Face model
+class LlamaHFRotaryEmbedding(nn.Module):
+    def __init__(self, dim, max_position_embeddings=2048, base=10000, device="cpu"):
+        super().__init__()
+
+        self.dim = dim
+        self.max_position_embeddings = max_position_embeddings
+        self.base = base
+        inv_freq = 1.0 / (self.base ** (torch.arange(0, self.dim, 2).float().to(device) / self.dim))
+        self.register_buffer("inv_freq", inv_freq, persistent=False)
+
+        # Build here to make `torch.jit.trace` work.
+        self._set_cos_sin_cache(
+            seq_len=max_position_embeddings, device=self.inv_freq.device, dtype=torch.get_default_dtype()
+        )
+
+    def _set_cos_sin_cache(self, seq_len, device, dtype):
+        self.max_seq_len_cached = seq_len
+        t = torch.arange(self.max_seq_len_cached, device=device, dtype=self.inv_freq.dtype)
+
+        freqs = torch.einsum("i,j->ij", t, self.inv_freq)
+        # Different from paper, but it uses a different permutation in order to obtain the same calculation
+        emb = torch.cat((freqs, freqs), dim=-1)
+        self.register_buffer("cos_cached", emb.cos()[None, None, :, :].to(dtype), persistent=False)
+        self.register_buffer("sin_cached", emb.sin()[None, None, :, :].to(dtype), persistent=False)
+
+    def get_cos_sin_cache(self, seq_len=None, device=torch.device("cpu"), dtype=torch.float32):  # noqa: B008
+        # x: [bs, num_attention_heads, seq_len, head_size]
+        if seq_len > self.max_seq_len_cached:
+            self._set_cos_sin_cache(seq_len=seq_len, device=device, dtype=dtype)
+
+        return (
+            self.cos_cached[:, :, :seq_len, ...].to(dtype=dtype),
+            self.sin_cached[:, :, :seq_len, ...].to(dtype=dtype),
+        )
+
+    def rotate_half(self, x):
+        """Rotates half the hidden dims of the input."""
+        x1 = x[..., : x.shape[-1] // 2]
+        x2 = x[..., x.shape[-1] // 2 :]
+        return torch.cat((-x2, x1), dim=-1)
+
+    def apply_rope_bnsh(self, x, cos, sin, position_ids):
+        # The first two dimensions of cos and sin are always 1, so we can `squeeze` them.
+        cos = cos.squeeze(1).squeeze(0)  # [seq_len, dim]
+        sin = sin.squeeze(1).squeeze(0)  # [seq_len, dim]
+        cos = cos[position_ids].unsqueeze(1)  # [bs, 1, seq_len, dim]
+        sin = sin[position_ids].unsqueeze(1)  # [bs, 1, seq_len, dim]
+        x_embed = (x * cos) + (self.rotate_half(x) * sin)
+        return x_embed
+
+    def apply_rope_bsnh(self, x, cos, sin, position_ids):
+        # Two dimensions of cos and sin are always 1, so we can `squeeze` them.
+        cos = cos.squeeze()  # [seq_len, dim]
+        sin = sin.squeeze()  # [seq_len, dim]
+        cos = cos[position_ids].unsqueeze(2)  # [bs, seq_len, 1, dim]
+        sin = sin[position_ids].unsqueeze(2)  # [bs, seq_len, 1, dim]
+        x_embed = (x * cos) + (self.rotate_half(x) * sin)
+        return x_embed
+
+    def forward(self, x, cos, sin, pos_ids, x_format="bnsh"):
+        if x_format == "bnsh":
+            return self.apply_rope_bnsh(x, cos, sin, pos_ids)
+        return self.apply_rope_bsnh(x, cos, sin, pos_ids)
+
+
+# LLaMA Microsoft model
+class LlamaMSRotaryEmbedding(nn.Module):
+    def __init__(self, hidden_size, num_heads, max_sequence_length):
+        super().__init__()
+
+        self.hidden_size = hidden_size
+        self.num_heads = num_heads
+        self.max_sequence_length = max_sequence_length
+
+    def get_cos_sin_cache(self, theta=10000.0, head_scale=1.0, device="cpu", dtype=torch.float32):
+        hidden_size = self.hidden_size
+        n_heads = self.num_heads
+        max_seq_len = self.max_sequence_length
+
+        # Precalculate rotary matrices for the sequence
+        # According to "Attention Is All You Need", theta_i = 10000 ^ (2 * (i - 1)/dim), i in [1, 2, ..., dim//2]
+        head_dim = head_scale * hidden_size / n_heads
+
+        pos = torch.arange(0, 2 * (head_dim // 2), step=2, device=device, dtype=dtype)
+        freqs = 1.0 / (theta ** (pos / head_dim))
+
+        idx = torch.arange(max_seq_len, device=freqs.device)
+        freqs = torch.outer(idx, freqs)
+
+        cos = torch.reshape(torch.cos(freqs), [1, max_seq_len, 1, -1])
+        sin = torch.reshape(torch.sin(freqs), [1, max_seq_len, 1, -1])
+        dtype = torch.get_default_dtype()
+
+        return cos.to(dtype), sin.to(dtype)
+
+    def rotate_tensor(
+        self,
+        x: torch.Tensor,  # BxSxNxH
+        cos: torch.Tensor,  # 1xSx1x(H/2)
+        sin: torch.Tensor,  # 1xSx1x(H/2)
+        pos: int,
+        interleaved: bool,
+    ):
+        # Dimension of x is [batch_size, seq_len, n_heads, head_dim]
+        rot_dim = 2 * cos.shape[3]
+
+        # Dolly requires partial rotation
+        x_rot = x[:, :, :, :rot_dim]
+
+        if interleaved:
+            x1 = x_rot[:, :, :, 0::2]
+            x2 = x_rot[:, :, :, 1::2]
+        else:
+            half = x_rot.shape[-1] // 2
+            x1 = x[:, :, :, 0:half]
+            x2 = x[:, :, :, half : 2 * half]
+
+        seq_len = x.shape[1]
+        cos_x = cos[:, pos : pos + seq_len, :, :]
+        sin_x = sin[:, pos : pos + seq_len, :, :]
+
+        # cos_x: (1, S, 1, H/2)
+        # sin_x: (1, S, 1, H/2)
+        # x1: (B, S, N, H/2)
+        # x2: (B, S, N, H/2)
+        real = cos_x * x1 - sin_x * x2
+        imag = sin_x * x1 + cos_x * x2
+
+        if interleaved:
+            x_rot[:, :, :, 0::2] = real
+            x_rot[:, :, :, 1::2] = imag
+        else:
+            x_rot = torch.cat((real, imag), dim=-1)
+
+        return torch.cat((x_rot, x[:, :, :, rot_dim:]), dim=-1)
+
+    def forward(self, x, cos, sin, pos, interleaved):
+        return self.rotate_tensor(x, cos, sin, pos, interleaved)
+
+
+class TestLlamaRotaryEmbedding(unittest.TestCase):
+    def setUp(self):
+        self.config = SampleInputConfig()
+        self.llama_hf = LlamaHFRotaryEmbedding(self.config.head_size, self.config.max_sequence_length)
+        self.llama_ms = LlamaMSRotaryEmbedding(
+            self.config.hidden_size, self.config.num_heads, self.config.max_sequence_length
+        )
+
+        seed = 2
+        np.random.seed(seed)
+        torch.manual_seed(seed)
+        torch.set_printoptions(sci_mode=False)
+
+    def create_onnx_graph(self, x_shape, pos_shape, cos, sin, interleaved):
+        inputs = [
+            helper.make_tensor_value_info(
+                name="input",
+                elem_type=TensorProto.FLOAT,
+                shape=list(x_shape),
+            ),
+            helper.make_tensor_value_info(
+                name="position_ids",
+                elem_type=TensorProto.INT64,
+                shape=list(pos_shape),
+            ),
+        ]
+        outputs = [
+            helper.make_tensor_value_info(
+                name="output",
+                elem_type=TensorProto.FLOAT,
+                shape=list(x_shape),
+            ),
+        ]
+
+        initializers = [
+            helper.make_tensor(
+                name="cos_cache",
+                data_type=TensorProto.FLOAT,
+                dims=list(torch.squeeze(cos).shape),
+                vals=cos.flatten().tolist(),
+            ),
+            helper.make_tensor(
+                name="sin_cache",
+                data_type=TensorProto.FLOAT,
+                dims=list(torch.squeeze(sin).shape),
+                vals=sin.flatten().tolist(),
+            ),
+        ]
+        nodes = [
+            helper.make_node(
+                op_type="RotaryEmbedding",
+                inputs=["input", "position_ids", "cos_cache", "sin_cache"],
+                outputs=["output"],
+                interleaved=interleaved,
+                name="RotaryEmbedding_0",
+                domain="com.microsoft",
+            ),
+        ]
+
+        graph = helper.make_graph(
+            nodes=nodes,
+            name="RotaryEmbedding_Graph",
+            inputs=inputs,
+            outputs=outputs,
+            initializer=initializers,
+        )
+        opset_import = helper.make_opsetid(domain="com.microsoft", version=1)
+        model = helper.make_model(graph, opset_imports=[opset_import])
+        return model.SerializeToString()
+
+    def get_eps(self):
+        eps = ["CPUExecutionProvider", "CUDAExecutionProvider"]
+        return list(filter(lambda ep: ep in ort.get_available_providers(), eps))
+
+    def run_ort_ep_tests(self, onnx_graph, inputs_ort, expected_output_bsnh, transposed=False):
+        eps = self.get_eps()
+        for ep in eps:
+            sess = ort.InferenceSession(onnx_graph, providers=[ep])
+            output_ort = sess.run(None, inputs_ort)[0]
+            if not transposed:
+                output_ort = output_ort.reshape(
+                    (self.config.batch_size, inputs_ort["input"].shape[1], self.config.num_heads, self.config.head_size)
+                )
+
+            # Compare outputs as BxSxNxH
+            self.assertTrue(np.allclose(expected_output_bsnh, output_ort))
+
+    # apply_rope(x_bnsh) == apply_rope(x_bsnh).transpose(1,2)
+    def test_hf_bnsh_and_hf_bsnh(self):
+        x_bnsh = torch.randn(
+            self.config.batch_size, self.config.num_heads, self.config.sequence_length, self.config.head_size
+        )
+        cos_hf, sin_hf = self.llama_hf.get_cos_sin_cache(self.config.sequence_length)
+        pos_hf = torch.stack([torch.arange(0, self.config.sequence_length) for _ in range(self.config.batch_size)])
+
+        x_bnsh_after_rope = self.llama_hf(x_bnsh, cos_hf, sin_hf, pos_hf)  # output is BxNxSxH
+        x_bsnh_after_rope = self.llama_hf(
+            x_bnsh.transpose(1, 2), cos_hf.transpose(1, 2), sin_hf.transpose(1, 2), pos_hf, "bsnh"
+        )  # output is BxSxNxH
+
+        self.assertTrue(torch.allclose(x_bnsh_after_rope, x_bsnh_after_rope.transpose(1, 2)))
+
+    # HF rotary == MSFT rotary non-interleaved
+    def test_hf_rotary_and_msft_rotary_noninterleaved(self):
+        x_bnsh = torch.randn(
+            self.config.batch_size, self.config.num_heads, self.config.sequence_length, self.config.head_size
+        )
+        cos_hf, sin_hf = self.llama_hf.get_cos_sin_cache(self.config.sequence_length)
+        pos_hf = torch.stack([torch.arange(0, self.config.sequence_length) for _ in range(self.config.batch_size)])
+        output_hf = self.llama_hf(x_bnsh, cos_hf, sin_hf, pos_hf)  # output is BxNxSxH
+
+        x_bsnh = x_bnsh.transpose(1, 2)
+        x_bsd = deepcopy(x_bsnh)  # deepcopy to avoid changes made by self.llama_ms forward pass
+        cos_ms, sin_ms = self.llama_ms.get_cos_sin_cache()
+        pos_ms = 0
+        output_ms = (
+            self.llama_ms(x_bsd, cos_ms, sin_ms, pos_ms, interleaved=False).detach().cpu().numpy()  # output is BxSxNxH
+        )
+
+        # Compare caches as Mx(H/2)
+        self.assertTrue(
+            torch.allclose(self.llama_hf.cos_cached.squeeze()[:, : (self.config.head_size // 2)], cos_ms.squeeze())
+        )
+        self.assertTrue(
+            torch.allclose(self.llama_hf.sin_cached.squeeze()[:, : (self.config.head_size // 2)], sin_ms.squeeze())
+        )
+
+        # Compare outputs as BxSxNxH
+        self.assertTrue(np.allclose(output_hf.transpose(1, 2).detach().cpu().numpy(), output_ms))
+
+    # Prompt step, interleaved = true, pos ids shape = (1)
+    def test_msft_prompt_rotary_interleaved(self):
+        # Calculated this way to match the data in rotary_embedding_op_test.cc
+        x_bnsh = torch.randn(
+            self.config.batch_size, self.config.num_heads, self.config.sequence_length, self.config.head_size
+        )
+        x_bsnh = x_bnsh.transpose(1, 2)
+        x_bsd = deepcopy(x_bsnh)  # deepcopy to avoid changes made by self.llama_ms forward pass
+        cos_ms, sin_ms = self.llama_ms.get_cos_sin_cache()
+        pos_ms = 0
+        output_ms = self.llama_ms(deepcopy(x_bsnh), cos_ms, sin_ms, pos_ms, interleaved=True).detach().cpu().numpy()
+
+        x_bsd = x_bsd.reshape(self.config.batch_size, self.config.sequence_length, self.config.hidden_size)
+        pos_ms = torch.tensor([pos_ms])
+        onnx_graph = self.create_onnx_graph(x_bsd.shape, pos_ms.shape, cos_ms, sin_ms, interleaved=True)
+        inputs_ort = {
+            "input": x_bsd.detach().cpu().numpy(),
+            "position_ids": pos_ms.detach().cpu().numpy(),
+        }
+
+        # Compare inputs/outputs as BxSxNxH
+        self.assertTrue(np.allclose(x_bsnh.flatten(), x_bsd.flatten()))
+        self.run_ort_ep_tests(onnx_graph, inputs_ort, output_ms)
+
+    # Token generation step, interleaved = true, pos ids shape = (1)
+    def test_msft_token_rotary_interleaved(self):
+        # Calculated this way to match the data in rotary_embedding_op_test.cc
+        x_bnsh = torch.randn(
+            self.config.batch_size, self.config.num_heads, self.config.sequence_length, self.config.head_size
+        )
+        x_bsnh = x_bnsh.transpose(1, 2)
+        x_bsd = deepcopy(x_bsnh)  # deepcopy to avoid changes made by self.llama_ms forward pass
+        cos_ms, sin_ms = self.llama_ms.get_cos_sin_cache()
+        pos_ms = 2
+        output_ms = self.llama_ms(deepcopy(x_bsnh), cos_ms, sin_ms, pos_ms, interleaved=True).detach().cpu().numpy()
+
+        x_bsd = x_bsd.reshape(self.config.batch_size, self.config.sequence_length, self.config.hidden_size)
+        pos_ms = torch.tensor([pos_ms])
+        onnx_graph = self.create_onnx_graph(x_bsd.shape, pos_ms.shape, cos_ms, sin_ms, interleaved=True)
+        inputs_ort = {
+            "input": x_bsd.detach().cpu().numpy(),
+            "position_ids": pos_ms.detach().cpu().numpy(),
+        }
+
+        # Compare inputs/outputs as BxSxNxH
+        self.assertTrue(np.allclose(x_bsnh.flatten(), x_bsd.flatten()))
+        self.run_ort_ep_tests(onnx_graph, inputs_ort, output_ms)
+
+    # Prompt step, interleaved = false, pos ids shape = (batch_size, sequence_length)
+    def test_hf_prompt_rotary_batched_pos_ids(self):
+        x_bnsh = torch.randn(
+            self.config.batch_size, self.config.num_heads, self.config.sequence_length, self.config.head_size
+        )
+        cos_hf, sin_hf = self.llama_hf.get_cos_sin_cache(self.config.sequence_length)
+        pos_ids = torch.stack([torch.arange(0, self.config.sequence_length) for _ in range(self.config.batch_size)])
+        output_hf = self.llama_hf(x_bnsh, cos_hf, sin_hf, pos_ids)  # output is BxNxSxH
+
+        x_bsnh = x_bnsh.transpose(1, 2)
+        x_bsd = x_bsnh.reshape(self.config.batch_size, self.config.sequence_length, self.config.hidden_size)
+        cos_ms, sin_ms = self.llama_ms.get_cos_sin_cache()
+        onnx_graph = self.create_onnx_graph(x_bsd.shape, pos_ids.shape, cos_ms, sin_ms, interleaved=False)
+        inputs_ort = {
+            "input": x_bsd.detach().cpu().numpy(),
+            "position_ids": pos_ids.detach().cpu().numpy(),
+        }
+
+        self.run_ort_ep_tests(onnx_graph, inputs_ort, output_hf.transpose(1, 2).detach().cpu().numpy())
+
+    # Token generation step, interleaved = false, pos ids shape = (batch_size, sequence_length)
+    def test_hf_token_rotary_batched_pos_ids(self):
+        x_bnsh = torch.randn(self.config.batch_size, self.config.num_heads, 1, self.config.head_size)
+        cos_hf, sin_hf = self.llama_hf.get_cos_sin_cache(self.config.sequence_length)
+        pos_ids = torch.stack([torch.tensor([2]) for _ in range(self.config.batch_size)])
+        output_hf = self.llama_hf(x_bnsh, cos_hf, sin_hf, pos_ids)  # output is BxNxSxH
+
+        x_bsnh = x_bnsh.transpose(1, 2)
+        x_bsd = x_bsnh.reshape(self.config.batch_size, 1, self.config.hidden_size)
+        cos_ms, sin_ms = self.llama_ms.get_cos_sin_cache()
+        onnx_graph = self.create_onnx_graph(x_bsd.shape, pos_ids.shape, cos_ms, sin_ms, interleaved=False)
+        inputs_ort = {
+            "input": x_bsd.detach().cpu().numpy(),
+            "position_ids": pos_ids.detach().cpu().numpy(),
+        }
+
+        # Compare outputs as BxSxNxH
+        self.run_ort_ep_tests(onnx_graph, inputs_ort, output_hf.transpose(1, 2).detach().cpu().numpy())
+
+    # Bonus test: Prompt step, interleaved = false, pos ids shape = (1)
+    def test_hf_prompt_rotary_one_pos_id(self):
+        x_bnsh = torch.randn(
+            self.config.batch_size, self.config.num_heads, self.config.sequence_length, self.config.head_size
+        )
+        cos_hf, sin_hf = self.llama_hf.get_cos_sin_cache(self.config.sequence_length)
+        pos_hf = torch.stack([torch.arange(0, self.config.sequence_length) for _ in range(self.config.batch_size)])
+        output_hf = self.llama_hf(x_bnsh, cos_hf, sin_hf, pos_hf)  # output is BxNxSxH
+
+        x_bsnh = x_bnsh.transpose(1, 2)
+        x_bsd = x_bsnh.reshape(self.config.batch_size, self.config.sequence_length, self.config.hidden_size)
+        cos_ms, sin_ms = self.llama_ms.get_cos_sin_cache()
+        pos_ms = torch.tensor([0])
+        onnx_graph = self.create_onnx_graph(x_bsd.shape, pos_ms.shape, cos_ms, sin_ms, interleaved=False)
+        inputs_ort = {
+            "input": x_bsd.detach().cpu().numpy(),
+            "position_ids": pos_ms.detach().cpu().numpy(),
+        }
+
+        # Compare outputs as BxSxNxH
+        self.run_ort_ep_tests(onnx_graph, inputs_ort, output_hf.transpose(1, 2).detach().cpu().numpy())
+
+    # Bonus test: Token generation step, interleaved = false, pos ids shape = (1)
+    def test_hf_token_rotary_one_pos_id(self):
+        x_bnsh = torch.randn(self.config.batch_size, self.config.num_heads, 1, self.config.head_size)
+        cos_hf, sin_hf = self.llama_hf.get_cos_sin_cache(self.config.sequence_length)
+        pos_ids = torch.stack([torch.tensor([2]) for _ in range(self.config.batch_size)])
+        output_hf = self.llama_hf(x_bnsh, cos_hf, sin_hf, pos_ids)  # output is BxNxSxH
+
+        x_bsnh = x_bnsh.transpose(1, 2)
+        x_bsd = x_bsnh.reshape(self.config.batch_size, 1, self.config.hidden_size)
+        cos_ms, sin_ms = self.llama_ms.get_cos_sin_cache()
+        pos_ms = torch.tensor([2])
+        onnx_graph = self.create_onnx_graph(x_bsd.shape, pos_ms.shape, cos_ms, sin_ms, interleaved=False)
+        inputs_ort = {
+            "input": x_bsd.detach().cpu().numpy(),
+            "position_ids": pos_ms.detach().cpu().numpy(),
+        }
+
+        # Compare outputs as BxSxNxH
+        self.run_ort_ep_tests(onnx_graph, inputs_ort, output_hf.transpose(1, 2).detach().cpu().numpy())
+
+    # Bonus test: Prompt step, interleaved = false, pos ids shape = (1), transposed
+    def test_hf_prompt_rotary_one_pos_id_transposed(self):
+        x_bnsh = torch.randn(
+            self.config.batch_size, self.config.num_heads, self.config.sequence_length, self.config.head_size
+        )
+        cos_hf, sin_hf = self.llama_hf.get_cos_sin_cache(self.config.sequence_length)
+        pos_hf = torch.stack([torch.arange(0, self.config.sequence_length) for _ in range(self.config.batch_size)])
+        output_hf = self.llama_hf(x_bnsh, cos_hf, sin_hf, pos_hf)  # output is BxNxSxH
+
+        cos_ms, sin_ms = self.llama_ms.get_cos_sin_cache()
+        pos_ms = torch.tensor([0])
+        onnx_graph = self.create_onnx_graph(x_bnsh.shape, pos_ms.shape, cos_ms, sin_ms, interleaved=False)
+        inputs_ort = {
+            "input": x_bnsh.detach().cpu().numpy(),
+            "position_ids": pos_ms.detach().cpu().numpy(),
+        }
+
+        # Compare outputs as BxNxSxH
+        self.run_ort_ep_tests(onnx_graph, inputs_ort, output_hf.detach().cpu().numpy(), transposed=True)
+
+    # Bonus test: Token generation step, interleaved = false, pos ids shape = (1), transposed
+    def test_hf_token_rotary_one_pos_id_transposed(self):
+        x_bnsh = torch.randn(self.config.batch_size, self.config.num_heads, 1, self.config.head_size)
+        cos_hf, sin_hf = self.llama_hf.get_cos_sin_cache(self.config.sequence_length)
+        pos_ids = torch.stack([torch.tensor([2]) for _ in range(self.config.batch_size)])
+        output_hf = self.llama_hf(x_bnsh, cos_hf, sin_hf, pos_ids)  # output is BxSxNxH
+
+        cos_ms, sin_ms = self.llama_ms.get_cos_sin_cache()
+        pos_ms = torch.tensor([2])
+        onnx_graph = self.create_onnx_graph(x_bnsh.shape, pos_ms.shape, cos_ms, sin_ms, interleaved=False)
+        inputs_ort = {
+            "input": x_bnsh.detach().cpu().numpy(),
+            "position_ids": pos_ms.detach().cpu().numpy(),
+        }
+
+        # Set tranposed=True to compare outputs as BxSxNxH
+        self.run_ort_ep_tests(onnx_graph, inputs_ort, output_hf.detach().cpu().numpy(), transposed=True)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/onnxruntime/test/python/transformers/test_rotary_embedding_fusion.py b/onnxruntime/test/python/transformers/test_rotary_embedding_fusion.py
new file mode 100644
index 0000000000000..7bca48c29019e
--- /dev/null
+++ b/onnxruntime/test/python/transformers/test_rotary_embedding_fusion.py
@@ -0,0 +1,447 @@
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.  See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+import os
+import sys
+import unittest
+from typing import List
+
+import numpy as np
+import onnx
+from onnx import TensorProto, helper
+from parity_utilities import find_transformers_source
+
+if find_transformers_source():
+    from fusion_options import FusionOptions
+    from onnx_model import OnnxModel
+    from optimizer import optimize_model
+else:
+    from onnxruntime.transformers.fusion_options import FusionOptions
+    from onnxruntime.transformers.onnx_model import OnnxModel
+    from onnxruntime.transformers.optimizer import optimize_model
+
+
+def float_tensor(name: str, shape: List[int], random=False):
+    low = 0.0
+    high = 1.0
+    total_elements = 1
+    for x in shape:
+        total_elements *= x
+    weights = [np.random.uniform(low, high) for _ in range(total_elements)] if random else [1.0] * total_elements
+    return helper.make_tensor(name, TensorProto.FLOAT, shape, weights)
+
+
+class TestRotaryEmbeddingFusion(unittest.TestCase):
+    def setUp(self):
+        self.batch_size = 2
+        self.sequence_length = 8
+        self.num_heads = 4
+        self.head_size = 6
+        self.hidden_size = self.num_heads * self.head_size
+
+        self.past_sequence_length = 2
+        self.max_sequence_length = 12
+
+    def verify_fusion(self, expected_model_path, original_model_path):
+        expected_model = OnnxModel(onnx.load(expected_model_path))
+        expected_model.topological_sort(is_deterministic=True)
+
+        options = FusionOptions("gpt2")
+        optimized_model = optimize_model(original_model_path, optimization_options=options, opt_level=0)
+        optimized_model.topological_sort(is_deterministic=True)
+
+        self.assertTrue(str(expected_model.model.graph), str(optimized_model.model.graph))
+
+    def create_initializers(self):
+        initializers = [
+            float_tensor("cos_cache", [self.max_sequence_length, self.head_size]),
+            float_tensor("sin_cache", [self.max_sequence_length, self.head_size]),
+            helper.make_tensor(
+                "pos_ids_new_shape",
+                TensorProto.FLOAT,
+                [2],
+                np.array([self.batch_size, self.sequence_length], dtype=np.int64),
+            ),
+            helper.make_tensor("zero", TensorProto.FLOAT, [1], np.array([0], dtype=np.int64)),
+            helper.make_tensor("one", TensorProto.FLOAT, [1], np.array([1], dtype=np.int64)),
+            helper.make_tensor("two", TensorProto.FLOAT, [1], np.array([2], dtype=np.int64)),
+            helper.make_tensor("three", TensorProto.FLOAT, [1], np.array([3], dtype=np.int64)),
+            helper.make_tensor("int_max", TensorProto.FLOAT, [1], np.array([sys.maxsize], dtype=np.int64)),
+        ]
+        return initializers
+
+    def create_inputs_and_outputs(self, model_type: str = ""):
+        inputs = [
+            helper.make_tensor_value_info(
+                "input_0",
+                TensorProto.FLOAT,
+                [self.batch_size, self.sequence_length, self.num_heads, self.head_size],
+            ),
+            helper.make_tensor_value_info("position_ids", TensorProto.INT64, [self.batch_size, self.sequence_length]),
+        ]
+        if model_type in {"past", "merged"}:
+            # Input will be removed in fused model since it's not used in RotaryEmbedding.
+            # We create this input so that we can check the `past_seq_len` path during
+            # RotaryEmbedding fusion.
+            inputs.append(
+                helper.make_tensor_value_info(
+                    "past_key",
+                    TensorProto.FLOAT,
+                    [self.batch_size, self.num_heads, self.past_sequence_length, self.head_size],
+                )
+            )
+        # Dummy input to test nodes for `curr_seq_len` path
+        if model_type != "":
+            inputs.append(
+                helper.make_tensor_value_info(
+                    "curr_key",
+                    TensorProto.FLOAT,
+                    [self.batch_size, self.sequence_length, self.num_heads, self.head_size],
+                )
+            )
+        outputs = [
+            helper.make_tensor_value_info(
+                "output_0",
+                TensorProto.FLOAT,
+                [self.batch_size, self.num_heads, self.sequence_length, self.head_size],
+            )
+        ]
+        if model_type in {"merged"}:
+            # Dummy output to test that nodes for `past_seq_len` path are not removed for merged model
+            outputs.append(helper.make_tensor_value_info("past_seq_len_plus_zero", TensorProto.FLOAT, [1]))
+        return inputs, outputs
+
+    def create_fused_model(self, interleaved: bool, initializers: List[TensorProto]):
+        inputs, outputs = self.create_inputs_and_outputs()
+
+        rope_node = helper.make_node(
+            "RotaryEmbedding",
+            inputs=[inputs[0].name, inputs[1].name, initializers[0].name, initializers[1].name],
+            outputs=[outputs[0].name],
+            name="RotaryEmbedding_0",
+            interleaved=int(interleaved),
+        )
+
+        graph = helper.make_graph(
+            nodes=[rope_node],
+            name="RotaryEmbedding_Graph",
+            inputs=inputs,
+            outputs=outputs,
+            initializer=initializers,
+        )
+        opset_import = helper.make_opsetid(domain="com.microsoft", version=1)
+        model = helper.make_model(graph, opset_imports=[opset_import])
+        return model
+
+    def create_cache_path(self, model_type: str, use_redundant_squeeze_ops: bool):
+        # Create position ids path
+        reshape_node = helper.make_node(
+            "Reshape",
+            inputs=["position_ids", "pos_ids_new_shape"],
+            outputs=["pos_ids_reshaped"],
+            name="Reshape_0",
+        )
+        pos_ids_nodes = [reshape_node]
+
+        # Create cos path
+        cos_init_unsqueeze_node = helper.make_node(
+            "Unsqueeze",
+            inputs=["new_seq_len", "zero"],
+            outputs=["cos_unsqueeze"],
+            name="Unsqueeze_2",
+        )
+        cos_slice_node = helper.make_node(
+            "Slice",
+            inputs=["cos_cache", "zero", "cos_unsqueeze", "two", "one"],
+            outputs=["cos_sliced"],
+            name="Slice_2",
+        )
+        cos_nodes = [cos_init_unsqueeze_node, cos_slice_node]
+
+        if use_redundant_squeeze_ops:
+            # These two nodes are eliminated by this transformers PR: https://github.com/huggingface/transformers/pull/26162
+            cos_squeeze_1_node = helper.make_node(
+                "Squeeze",
+                inputs=["cos_sliced", "zero"],
+                outputs=["cos_squeeze_1"],
+                name="Squeeze_0",
+            )
+            cos_squeeze_2_node = helper.make_node(
+                "Squeeze",
+                inputs=["cos_squeeze_1", "zero"],
+                outputs=["cos_squeeze_2"],
+                name="Squeeze_1",
+            )
+            cos_nodes.extend([cos_squeeze_1_node, cos_squeeze_2_node])
+
+        cos_gather_node = helper.make_node(
+            "Gather",
+            inputs=["cos_squeeze_2" if use_redundant_squeeze_ops else "cos_sliced", "pos_ids_reshaped"],
+            outputs=["cos_indexed"],
+            name="Gather_1",
+        )
+        cos_end_unsqueeze_node = helper.make_node(
+            "Unsqueeze",
+            inputs=["cos_indexed", "one"],
+            outputs=["cos"],
+            name="Unsqueeze_3",
+        )
+        cos_nodes.extend([cos_gather_node, cos_end_unsqueeze_node])
+
+        # Create sin path
+        sin_init_unsqueeze_node = helper.make_node(
+            "Unsqueeze",
+            inputs=["new_seq_len", "zero"],
+            outputs=["sin_unsqueeze"],
+            name="Unsqueeze_4",
+        )
+        sin_slice_node = helper.make_node(
+            "Slice",
+            inputs=["sin_cache", "zero", "sin_unsqueeze", "two", "one"],
+            outputs=["sin_sliced"],
+            name="Slice_3",
+        )
+        sin_nodes = [sin_init_unsqueeze_node, sin_slice_node]
+
+        if use_redundant_squeeze_ops:
+            sin_squeeze_1_node = helper.make_node(
+                "Squeeze",
+                inputs=["sin_sliced", "zero"],
+                outputs=["sin_squeeze_1"],
+                name="Squeeze_2",
+            )
+            sin_squeeze_2_node = helper.make_node(
+                "Squeeze",
+                inputs=["sin_squeeze_1", "zero"],
+                outputs=["sin_squeeze_2"],
+                name="Squeeze_3",
+            )
+            sin_nodes.extend([sin_squeeze_1_node, sin_squeeze_2_node])
+
+        sin_gather_node = helper.make_node(
+            "Gather",
+            inputs=["sin_squeeze_2" if use_redundant_squeeze_ops else "sin_sliced", "pos_ids_reshaped"],
+            outputs=["sin_indexed"],
+            name="Gather_2",
+        )
+        sin_end_unsqueeze_node = helper.make_node(
+            "Unsqueeze",
+            inputs=["sin_indexed", "one"],
+            outputs=["sin"],
+            name="Unsqueeze_5",
+        )
+        sin_nodes.extend([sin_gather_node, sin_end_unsqueeze_node])
+
+        # Create beginning nodes before cos and sin paths
+
+        # Create curr seq len path
+        curr_transpose_node = helper.make_node(
+            "Transpose",
+            inputs=["curr_key"],
+            outputs=["curr_key_transposed"],
+            name="Transpose_curr",
+            perm=[0, 2, 1, 3],
+        )
+        curr_shape_node = helper.make_node(
+            "Shape",
+            inputs=["curr_key_transposed"],
+            outputs=["curr_shape"],
+            name="Shape_curr",
+        )
+        curr_gather_node = helper.make_node(
+            "Gather",
+            inputs=["curr_shape", "two"],
+            outputs=["curr_seq_len" if model_type in {"past", "merged"} else "new_seq_len"],
+            name="Gather_curr",
+        )
+        beginning_nodes = [curr_transpose_node, curr_shape_node, curr_gather_node]
+
+        if model_type in {"past", "merged"}:
+            # Create past seq len path
+            past_shape_node = helper.make_node(
+                "Shape",
+                inputs=["past_key"],
+                outputs=["past_shape"],
+                name="Shape_past",
+            )
+            past_gather_node = helper.make_node(
+                "Gather",
+                inputs=["past_shape", "two"],
+                outputs=["past_seq_len"],
+                name="Gather_past",
+            )
+            add_node = helper.make_node(
+                "Add",
+                inputs=["curr_seq_len", "past_seq_len"],
+                outputs=["new_seq_len"],
+                name="Add_1",
+            )
+            beginning_nodes.extend([past_shape_node, past_gather_node, add_node])
+
+        if model_type == "merged":
+            dummy_node = helper.make_node(
+                "Add",
+                inputs=["past_seq_len", "zero"],
+                outputs=["past_seq_len_plus_zero"],
+                name="Add_dummy_node",
+            )
+            beginning_nodes.append(dummy_node)
+
+        return pos_ids_nodes + cos_nodes + sin_nodes + beginning_nodes
+
+    def create_apply_rope_path(self):
+        start_node = helper.make_node(
+            "Transpose",
+            inputs=["input_0"],
+            outputs=["x"],
+            name="Transpose_0",
+            perm=[0, 2, 1, 3],
+        )
+
+        # Calculate x_half_shape
+        shape_node = helper.make_node(
+            "Shape",
+            inputs=["x"],
+            outputs=["x_shape"],
+            name="Shape_0",
+        )
+        gather_node = helper.make_node(
+            "Gather",
+            inputs=["x_shape", "three"],
+            outputs=["x_last_idx_shape"],
+            name="Gather_0",
+            axis=0,
+        )
+        div_node = helper.make_node(
+            "Div",
+            inputs=["x_last_idx_shape", "two"],
+            outputs=["x_half_shape"],
+            name="Div_0",
+        )
+        unsqueeze_0_node = helper.make_node(
+            "Unsqueeze",
+            inputs=["x_half_shape", "zero"],
+            outputs=["x_half_shape_0"],
+            name="Unsqueeze_0",
+        )
+        unsqueeze_1_node = helper.make_node(
+            "Unsqueeze",
+            inputs=["x_half_shape", "zero"],
+            outputs=["x_half_shape_1"],
+            name="Unsqueeze_1",
+        )
+        x_half_shape_nodes = [shape_node, gather_node, div_node, unsqueeze_0_node, unsqueeze_1_node]
+
+        # Calculate rotate_half
+        x1_node = helper.make_node(
+            "Slice",
+            inputs=["x", "zero", "x_half_shape_0", "three", "one"],
+            outputs=["x1"],
+            name="Slice_0",
+        )
+        x2_node = helper.make_node(
+            "Slice",
+            inputs=["x", "x_half_shape_1", "int_max", "three", "one"],
+            outputs=["x2"],
+            name="Slice_1",
+        )
+        neg_node = helper.make_node(
+            "Neg",
+            inputs=["x2"],
+            outputs=["x2_neg"],
+            name="Neg_0",
+        )
+        x_rotate_half_node = helper.make_node(
+            "Concat",
+            inputs=["x2_neg", "x1"],
+            outputs=["x_rotate_half"],
+            name="Concat_0",
+            axis=-1,
+        )
+        rotate_half_nodes = [x1_node, x2_node, neg_node, x_rotate_half_node]
+
+        # Calculate x_embed
+        x_cos_node = helper.make_node(
+            "Mul",
+            inputs=["x", "cos"],
+            outputs=["x_cos"],
+            name="Mul_0",
+        )
+        x_sin_node = helper.make_node(
+            "Mul",
+            inputs=["x_rotate_half", "sin"],
+            outputs=["x_rotate_half_sin"],
+            name="Mul_1",
+        )
+        end_node = helper.make_node(
+            "Add",
+            inputs=["x_cos", "x_rotate_half_sin"],
+            outputs=["output_0"],
+            name="Add_0",
+        )
+        x_embed_nodes = [start_node, x_cos_node, x_sin_node, end_node]
+
+        return x_half_shape_nodes + rotate_half_nodes + x_embed_nodes
+
+    def create_test_model(self, model_type: str, use_redundant_squeeze_ops: bool, initializers: List[TensorProto]):
+        apply_rope_nodes = self.create_apply_rope_path()
+        cache_nodes = self.create_cache_path(model_type, use_redundant_squeeze_ops)
+        inputs, outputs = self.create_inputs_and_outputs(model_type)
+
+        graph = helper.make_graph(
+            nodes=apply_rope_nodes + cache_nodes,
+            name="RotaryEmbedding_Graph",
+            inputs=inputs,
+            outputs=outputs,
+            initializer=initializers,
+        )
+        opset_import = helper.make_opsetid(domain="ai.onnx", version=13)
+        model = helper.make_model(graph, opset_imports=[opset_import])
+        return model
+
+    def check_models(self, interleaved: bool, model_type: str):
+        initializers = self.create_initializers()
+
+        expected_model_filename = "expected_model.onnx"
+        expected_model = self.create_fused_model(interleaved, initializers)
+        onnx.save(expected_model, expected_model_filename)
+
+        original_model_filename = "original_model.onnx"
+        use_redundant_squeeze_ops = True
+        original_model = self.create_test_model(model_type, use_redundant_squeeze_ops, initializers)
+        onnx.save(original_model, original_model_filename)
+
+        self.verify_fusion(expected_model_filename, original_model_filename)
+        os.remove(original_model_filename)
+
+        use_redundant_squeeze_ops = False
+        original_model = self.create_test_model(model_type, use_redundant_squeeze_ops, initializers)
+        onnx.save(original_model, original_model_filename)
+
+        self.verify_fusion(expected_model_filename, original_model_filename)
+        os.remove(expected_model_filename)
+        os.remove(original_model_filename)
+
+    # Hugging Face's `decoder_model.onnx`
+    def test_hf_decoder_model(self):
+        interleaved = False  # HF model does not use interleaving
+        model_type = "no_past"
+        self.check_models(interleaved, model_type)
+
+    # Hugging Face's `decoder_with_past_model.onnx`
+    def test_hf_decoder_with_past_model(self):
+        interleaved = False  # HF model does not use interleaving
+        model_type = "past"
+        self.check_models(interleaved, model_type)
+
+    # Hugging Face's `decoder_merged.onnx`
+    def test_hf_decoder_merged_model(self):
+        interleaved = False  # HF model does not use interleaving
+        model_type = "merged"
+        self.check_models(interleaved, model_type)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/onnxruntime/test/python/transformers/test_rotary_mha_fusion.py b/onnxruntime/test/python/transformers/test_rotary_mha_fusion.py
new file mode 100644
index 0000000000000..373ad86ced1a7
--- /dev/null
+++ b/onnxruntime/test/python/transformers/test_rotary_mha_fusion.py
@@ -0,0 +1,1210 @@
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.  See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+import os
+import sys
+import unittest
+from typing import List
+
+import numpy as np
+import onnx
+from onnx import NodeProto, TensorProto, helper
+from parity_utilities import find_transformers_source
+
+if find_transformers_source():
+    from fusion_options import FusionOptions
+    from onnx_model import OnnxModel
+    from optimizer import optimize_model
+else:
+    from onnxruntime.transformers.fusion_options import FusionOptions
+    from onnxruntime.transformers.onnx_model import OnnxModel
+    from onnxruntime.transformers.optimizer import optimize_model
+
+
+def float_tensor(name: str, shape: List[int], random=False):
+    low = 0.0
+    high = 1.0
+    total_elements = 1
+    for x in shape:
+        total_elements *= x
+    weights = [np.random.uniform(low, high) for _ in range(total_elements)] if random else [1.0] * total_elements
+    return helper.make_tensor(name, TensorProto.FLOAT, shape, weights)
+
+
+class TestRotaryAttentionFusion(unittest.TestCase):
+    def setUp(self):
+        self.batch_size = 2
+        self.sequence_length = 8
+        self.num_heads = 4
+        self.head_size = 6
+        self.hidden_size = self.num_heads * self.head_size
+
+        self.past_sequence_length = 2
+        self.max_sequence_length = 12
+
+    def verify_fusion(self, expected_model_path, original_model_path):
+        expected_model = OnnxModel(onnx.load(expected_model_path))
+        expected_model.topological_sort(is_deterministic=True)
+
+        model_type = "gpt2"
+        options = FusionOptions(model_type)
+        optimized_model = optimize_model(
+            original_model_path,
+            model_type,
+            self.num_heads,
+            self.hidden_size,
+            optimization_options=options,
+            opt_level=0,
+        )
+        optimized_model.topological_sort(is_deterministic=True)
+
+        self.assertTrue(str(expected_model.model.graph), str(optimized_model.model.graph))
+
+    def create_initializers(self, fused_model: bool = False):
+        initializers = [
+            float_tensor("cos_cache", [self.max_sequence_length, self.head_size // 2]),
+            float_tensor("sin_cache", [self.max_sequence_length, self.head_size // 2]),
+            float_tensor("q_weight", [self.hidden_size, self.hidden_size]),
+            float_tensor("k_weight", [self.hidden_size, self.hidden_size]),
+            float_tensor("v_weight", [self.hidden_size, self.hidden_size]),
+            float_tensor("o_weight", [self.hidden_size, self.hidden_size]),
+            helper.make_tensor(
+                "sqrt_head_size", TensorProto.FLOAT, [1], np.array([np.sqrt(self.head_size)], dtype=np.float32)
+            ),
+            helper.make_tensor("neg_int_max", TensorProto.FLOAT, [1], np.array([-sys.maxsize - 1], dtype=np.int64)),
+            helper.make_tensor("num_heads", TensorProto.FLOAT, [1], np.array([self.num_heads], dtype=np.float32)),
+            helper.make_tensor("head_size", TensorProto.FLOAT, [1], np.array([self.head_size], dtype=np.float32)),
+            helper.make_tensor("hidden_size", TensorProto.FLOAT, [1], np.array([self.hidden_size], dtype=np.float32)),
+            helper.make_tensor("zero", TensorProto.FLOAT, [1], np.array([0], dtype=np.int64)),
+            helper.make_tensor("one", TensorProto.FLOAT, [1], np.array([1], dtype=np.int64)),
+            helper.make_tensor("two", TensorProto.FLOAT, [1], np.array([2], dtype=np.int64)),
+            helper.make_tensor("three", TensorProto.FLOAT, [1], np.array([3], dtype=np.int64)),
+        ]
+        return initializers
+
+    def create_inputs_and_outputs(self, model_type: str):
+        attn_mask_size = [self.batch_size, self.sequence_length]
+        if model_type == "llama2_msft":
+            attn_mask_size.append(self.sequence_length)
+
+        inputs = [
+            helper.make_tensor_value_info(
+                "input_0", TensorProto.FLOAT, [self.batch_size, self.sequence_length, self.hidden_size]
+            ),
+            helper.make_tensor_value_info("position_ids", TensorProto.INT64, [self.batch_size, self.sequence_length]),
+            helper.make_tensor_value_info("attn_mask", TensorProto.INT64, attn_mask_size),
+        ]
+        if model_type in {"past", "merged", "llama2_msft", "70b_distributed_merged"}:
+            inputs.extend(
+                [
+                    helper.make_tensor_value_info(
+                        "past_key",
+                        TensorProto.FLOAT,
+                        [self.batch_size, self.num_heads, self.past_sequence_length, self.head_size],
+                    ),
+                    helper.make_tensor_value_info(
+                        "past_value",
+                        TensorProto.FLOAT,
+                        [self.batch_size, self.num_heads, self.past_sequence_length, self.head_size],
+                    ),
+                ]
+            )
+        outputs = [
+            helper.make_tensor_value_info(
+                "output_0", TensorProto.FLOAT, [self.batch_size, self.sequence_length, self.hidden_size]
+            ),
+            helper.make_tensor_value_info(
+                "present_key",
+                TensorProto.FLOAT,
+                [self.batch_size, self.num_heads, self.past_sequence_length + 1, self.head_size],
+            ),
+            helper.make_tensor_value_info(
+                "present_value",
+                TensorProto.FLOAT,
+                [self.batch_size, self.num_heads, self.past_sequence_length + 1, self.head_size],
+            ),
+        ]
+        return inputs, outputs
+
+    def create_matmul_nodes(self, is_fused: bool, model_type: str):
+        q_matmul_node = helper.make_node(
+            "MatMul",
+            inputs=["input_0", "q_weight"],
+            outputs=["q_out" if is_fused or model_type == "llama2_msft" else "q_matmul_out"],
+            name="Q_MatMul",
+        )
+
+        k_matmul_node = helper.make_node(
+            "MatMul",
+            inputs=["input_0", "k_weight"],
+            outputs=["k_out" if is_fused or model_type == "llama2_msft" else "k_matmul_out"],
+            name="K_MatMul",
+        )
+
+        v_matmul_node = helper.make_node(
+            "MatMul",
+            inputs=["input_0", "v_weight"],
+            outputs=["v_out"],
+            name="V_MatMul",
+        )
+
+        return [q_matmul_node, k_matmul_node, v_matmul_node]
+
+    def create_rotary_embeddings(
+        self,
+        is_fused: bool,
+        model_type: str,
+        interleaved: bool,
+        inputs: List[TensorProto],
+        initializers: List[TensorProto],
+    ):
+        def get_first_rope_input(node_type: str):
+            if is_fused or model_type == "llama2_msft":
+                # q_out/k_out
+                return f"{node_type}_out"
+            if model_type in {"no_past", "past", "merged", "70b_distributed_merged"}:
+                if node_type == "k":
+                    return "k_before_rope"
+                return "q_before_rope"
+            return ""
+
+        def get_first_rope_output(node_type: str):
+            if is_fused or model_type in {"llama2_msft", "past", "merged", "70b_distributed_merged"}:
+                if node_type == "q":
+                    return "q_rope"
+                return "k_rope"
+            if model_type in {"no_past"}:
+                if node_type == "k":
+                    return "present_key"
+                return "q_rope"
+            return ""
+
+        q_rope_node = helper.make_node(
+            "RotaryEmbedding",
+            inputs=[get_first_rope_input("q"), inputs[1].name, initializers[0].name, initializers[1].name],
+            outputs=[get_first_rope_output("q")],
+            name="Q_RotaryEmbedding",
+            interleaved=int(interleaved),
+        )
+
+        k_rope_node = helper.make_node(
+            "RotaryEmbedding",
+            inputs=[get_first_rope_input("k"), inputs[1].name, initializers[0].name, initializers[1].name],
+            outputs=[get_first_rope_output("k")],
+            name="K_RotaryEmbedding",
+            interleaved=int(interleaved),
+        )
+
+        return [q_rope_node, k_rope_node]
+
+    def create_q_path(self, model_type: str):
+        if model_type == "llama2_msft":
+            transpose_q_node = helper.make_node(
+                "Transpose",
+                inputs=["q_rope"],
+                outputs=["q_transposed"],
+                name="Transpose_q",
+                perm=[0, 2, 1, 3],
+            )
+            reshape_q_node = helper.make_node(
+                "Reshape",
+                inputs=["q_transposed", "concat_q_extra_out"],
+                outputs=["q"],
+                name="Reshape_q",
+            )
+            return [transpose_q_node, reshape_q_node]
+
+        reshape_q_node = helper.make_node(
+            "Reshape",
+            inputs=["q_matmul_out", "concat_q_extra_out"],
+            outputs=["q_reshaped"],
+            name="Reshape_q",
+        )
+        transpose_q_node = helper.make_node(
+            "Transpose",
+            inputs=["q_reshaped"],
+            outputs=["q_before_rope"],
+            name="Transpose_q",
+        )
+        return [reshape_q_node, transpose_q_node]
+
+    def create_k_path_llama2_msft(self):
+        # Create k cache slicing path
+        k_cache_unsqueeze_node = helper.make_node(
+            "Unsqueeze",
+            inputs=["position_ids", "zero"],
+            outputs=["k_pos_id"],
+        )
+        k_cache_slice_node = helper.make_node(
+            "Slice",
+            inputs=["past_key", "zero", "k_pos_id", "two", "one"],
+            outputs=["k_cache_sliced"],
+        )
+        # Create k path
+        transpose_k_1_node = helper.make_node(
+            "Transpose",
+            inputs=["k_rope"],
+            outputs=["k_rope_transposed"],
+            name="Transpose_k_1",
+            perm=[0, 2, 1, 3],
+        )
+        concat_k_node = helper.make_node(
+            "Concat",
+            inputs=["k_cache_sliced", "k_rope_transposed"],
+            outputs=["present_key"],
+            name="Concat_k",
+            axis=2,
+        )
+        transpose_k_2_node = helper.make_node(
+            "Transpose",
+            inputs=["present_key"],
+            outputs=["present_key_transposed"],
+            name="Transpose_k_2",
+            perm=[0, 2, 3, 1],
+        )
+        reshape_k_node = helper.make_node(
+            "Reshape",
+            inputs=["present_key_transposed", "concat_k_extra_out"],
+            outputs=["k"],
+            name="Reshape_k",
+        )
+        return [
+            k_cache_unsqueeze_node,
+            k_cache_slice_node,
+            transpose_k_1_node,
+            concat_k_node,
+            transpose_k_2_node,
+            reshape_k_node,
+        ]
+
+    def create_k_path_hf(self, model_type: str):
+        reshape_k_node = helper.make_node(
+            "Reshape",
+            inputs=["k_matmul_out", "concat_k_extra_out"],
+            outputs=["k_reshaped"],
+            name="Reshape_k",
+        )
+        transpose_k_1_node = helper.make_node(
+            "Transpose",
+            inputs=["k_reshaped"],
+            outputs=["k_before_rope"],
+            name="Transpose_k_1",
+            perm=[0, 2, 1, 3],
+        )
+        k_nodes = [reshape_k_node, transpose_k_1_node]
+
+        if model_type == "70b_distributed_merged":
+            concat_k_node = helper.make_node(
+                "Concat",
+                inputs=["past_key", "k_rope"],
+                outputs=["present_key"],
+                axis=2,
+            )
+            shape_k1 = helper.make_node("Shape", inputs=["present_value"], outputs=["shape_k1_out"], name="Shape_k1")
+            shape_k2 = helper.make_node("Shape", inputs=["present_value"], outputs=["shape_k2_out"], name="Shape_k2")
+            shape_k3 = helper.make_node("Shape", inputs=["present_value"], outputs=["shape_k3_out"], name="Shape_k3")
+            shape_k4 = helper.make_node("Shape", inputs=["present_value"], outputs=["shape_k4_out"], name="Shape_k4")
+
+            gather_k_1 = helper.make_node(
+                "Gather",
+                inputs=["shape_k1_out", "one"],
+                outputs=["gather_k1_out"],
+                name="Gather_k_1",
+                axis=0,
+            )
+            gather_k_2 = helper.make_node(
+                "Gather",
+                inputs=["shape_k2_out", "one"],
+                outputs=["gather_k2_out"],
+                name="Gather_k_2",
+                axis=0,
+            )
+            gather_k_3 = helper.make_node(
+                "Gather",
+                inputs=["shape_k3_out", "one"],
+                outputs=["gather_k3_out"],
+                name="Gather_k_3",
+                axis=0,
+            )
+            gather_k_4 = helper.make_node(
+                "Gather",
+                inputs=["shape_k4_out", "one"],
+                outputs=["gather_k4_out"],
+                name="Gather_k_4",
+                axis=0,
+            )
+
+            unsqueeze_k_1 = helper.make_node(
+                "Unsqueeze",
+                inputs=["present_value", "zero"],
+                outputs=["unsqueeze_k1_out"],
+                name="Unsqueeze_k1",
+            )
+            unsqueeze_k_2 = helper.make_node(
+                "Unsqueeze",
+                inputs=["gather_k1_out", "zero"],
+                outputs=["unsqueeze_k2_out"],
+                name="Unsqueeze_k2",
+            )
+            unsqueeze_k_3 = helper.make_node(
+                "Unsqueeze",
+                inputs=["gather_k2_out", "zero"],
+                outputs=["unsqueeze_k3_out"],
+                name="Unsqueeze_k3",
+            )
+            unsqueeze_k_4 = helper.make_node(
+                "Unsqueeze",
+                inputs=["gather_k3_out", "zero"],
+                outputs=["unsqueeze_k4_out"],
+                name="Unsqueeze_k4",
+            )
+            unsqueeze_k_5 = helper.make_node(
+                "Unsqueeze",
+                inputs=["gather_k4_out", "zero"],
+                outputs=["unsqueeze_k5_out"],
+                name="Unsqueeze_k5",
+            )
+
+            concat_k_2 = helper.make_node(
+                "Concat",
+                inputs=["unsqueeze_k2_out", "unsqueeze_k3_out", "One", "unsqueeze_k4_out", "unsqueeze_k5_out"],
+                outputs=["concat_k2_ouot"],
+                name="Concat_k2",
+                axis=0,
+            )
+            reshape_k_2 = helper.make_node(
+                "Reshape",
+                inputs=["concat_k2_ouot", "One"],
+                outputs=["reshape_k2_out"],
+                name="Reshape_k_2",
+            )
+            shape_k5 = helper.make_node("Shape", inputs=["reshape_k2_out"], outputs=["shape_k5_out"], name="Shape_k5")
+            constant_of_shape_k_1 = helper.make_node(
+                "ConstantOfShape",
+                inputs=["shape_k5_out"],
+                outputs=["constant_of_shape_k1_out"],
+                name="ConstantOfShape_k1",
+            )
+            mul_k_1 = helper.make_node(
+                "Mul",
+                inputs=["constant_of_shape_k1_out", "One"],
+                outputs=["mul_k1_out"],
+                name="mul_k1",
+            )
+            equal_k_1 = helper.make_node(
+                "Equal",
+                inputs=["reshape_k2_out", "mul_k1_out"],
+                outputs=["equal_k_1_out"],
+                name="equal_k1",
+            )
+            where_k_1 = helper.make_node(
+                "Where",
+                inputs=["equal_k_1_out", "constant_of_shape_k1_out", "reshape_k2_out"],
+                outputs=["where_k_1_out"],
+                name="where_k1",
+            )
+            unsqueeze_k_6 = helper.make_node(
+                "Unsqueeze",
+                inputs=["gather_k1_out", "zero"],
+                outputs=["unsqueeze_k6_out"],
+                name="Unsqueeze_k6",
+            )
+            mul_k_2 = helper.make_node(
+                "Mul",
+                inputs=["gather_k2_out", "One"],
+                outputs=["mul_k2_out"],
+                name="mul_k2",
+            )
+            unsqueeze_k_7 = helper.make_node(
+                "Unsqueeze",
+                inputs=["mul_k2_out", "zero"],
+                outputs=["unsqueeze_k7_out"],
+                name="Unsqueeze_k7",
+            )
+            unsqueeze_k_8 = helper.make_node(
+                "Unsqueeze",
+                inputs=["gather_k3_out", "zero"],
+                outputs=["unsqueeze_k8_out"],
+                name="Unsqueeze_k8",
+            )
+            unsqueeze_k_9 = helper.make_node(
+                "Unsqueeze",
+                inputs=["gather_k4_out", "zero"],
+                outputs=["unsqueeze_k9_out"],
+                name="Unsqueeze_k9",
+            )
+            concat_k_3 = helper.make_node(
+                "Concat",
+                inputs=["unsqueeze_k6_out", "unsqueeze_k7_out", "unsqueeze_k8_out", "unsqueeze_k9_out"],
+                outputs=["concat_k3_out"],
+                name="Concat_k3",
+                axis=0,
+            )
+            expand_k_1 = helper.make_node(
+                "Expand",
+                inputs=["unsqueeze_k1_out", "where_k_1_out"],
+                outputs=["expand_k1_out"],
+                name="expand_k1",
+            )
+            reshape_k_3 = helper.make_node(
+                "Reshape",
+                inputs=["expand_k1_out", "concat_k3_out"],
+                outputs=["reshape_k3_out"],
+                name="Reshape_k_3",
+            )
+            transpose_k_2_node = helper.make_node(
+                "Transpose",
+                inputs=["reshape_k3_out"],
+                outputs=["k"],
+                name="Transpose_k_2",
+                perm=[0, 1, 3, 2],
+            )
+
+            k_nodes_for_70b_model = [
+                concat_k_node,
+                shape_k1,
+                shape_k2,
+                shape_k3,
+                shape_k4,
+                gather_k_1,
+                gather_k_2,
+                gather_k_3,
+                gather_k_4,
+                unsqueeze_k_1,
+                unsqueeze_k_2,
+                unsqueeze_k_3,
+                unsqueeze_k_4,
+                unsqueeze_k_5,
+                concat_k_2,
+                reshape_k_2,
+                shape_k5,
+                constant_of_shape_k_1,
+                mul_k_1,
+                equal_k_1,
+                where_k_1,
+                unsqueeze_k_6,
+                mul_k_2,
+                unsqueeze_k_7,
+                unsqueeze_k_8,
+                unsqueeze_k_9,
+                concat_k_3,
+                expand_k_1,
+                reshape_k_3,
+                transpose_k_2_node,
+            ]
+            k_nodes.extend(k_nodes_for_70b_model)
+            return k_nodes
+        else:
+            if model_type in {"past", "merged"}:
+                concat_k_node = helper.make_node(
+                    "Concat",
+                    inputs=["past_key", "k_rope"],
+                    outputs=["present_key"],
+                    axis=2,
+                )
+                k_nodes.append(concat_k_node)
+
+            transpose_k_2_node = helper.make_node(
+                "Transpose",
+                inputs=["present_key"],
+                outputs=["k"],
+                name="Transpose_k_2",
+                perm=[0, 1, 3, 2],
+            )
+            return k_nodes + [transpose_k_2_node]  # noqa: RUF005
+
+    def create_k_path(self, model_type: str):
+        if model_type == "llama2_msft":
+            return self.create_k_path_llama2_msft()
+        return self.create_k_path_hf(model_type)
+
+    def create_attn_mask_path_llama2_msft(self):
+        x_shape_node = helper.make_node(
+            "Shape",
+            inputs=["input_0"],
+            outputs=["input_0_shape"],
+            name="Shape_input",
+        )
+        x_get_seq_len_node = helper.make_node(
+            "Gather",
+            inputs=["input_0_shape", "one"],
+            outputs=["input_0_seq_len"],
+            name="Gather_input",
+            axis=0,
+        )
+        x_new_seq_len_node = helper.make_node(
+            "Add",
+            inputs=["position_ids", "input_0_seq_len"],
+            outputs=["new_seq_len"],
+            name="Add_mask",
+        )
+        unsqueeze_0_node = helper.make_node(
+            "Unsqueeze",
+            inputs=["position_ids", "zero"],
+            outputs=["unsqueeze_mask_0_out"],
+            name="Unsqueeze_mask_0",
+        )
+        unsqueeze_1_node = helper.make_node(
+            "Unsqueeze",
+            inputs=["new_seq_len", "zero"],
+            outputs=["unsqueeze_mask_1_out"],
+            name="Unsqueeze_mask_1",
+        )
+        unsqueeze_2_node = helper.make_node(
+            "Unsqueeze",
+            inputs=["new_seq_len", "zero"],
+            outputs=["unsqueeze_mask_2_out"],
+            name="Unsqueeze_mask_2",
+        )
+        slice_mask_1_node = helper.make_node(
+            "Slice",
+            inputs=["attn_mask", "unsqueeze_mask_0_out", "unsqueeze_mask_1_out", "one", "one"],
+            outputs=["slice_mask_1_out"],
+            name="Slice_mask_1",
+        )
+        slice_mask_2_node = helper.make_node(
+            "Slice",
+            inputs=["slice_mask_1_out", "zero", "unsqueeze_mask_2_out", "two", "one"],
+            outputs=["slice_mask_2_out"],
+            name="Slice_mask_2",
+        )
+        concat_mask_node = helper.make_node(
+            "Concat",
+            inputs=["slice_mask_2_out" for _ in range(self.num_heads)],
+            outputs=["attn_mask_out"],
+            name="Concat_mask",
+            axis=0,
+        )
+        return [
+            x_shape_node,
+            x_get_seq_len_node,
+            x_new_seq_len_node,
+            unsqueeze_0_node,
+            unsqueeze_1_node,
+            unsqueeze_2_node,
+            slice_mask_1_node,
+            slice_mask_2_node,
+            concat_mask_node,
+        ]
+
+    def create_attn_mask_path_hf(self, model_type: str):
+        unsqueeze_1_node = helper.make_node(
+            "Unsqueeze",
+            inputs=["attn_mask", "one"],
+            outputs=["unsqueeze_1_mask_out"],
+            name="Unsqueeze_1_mask",
+        )
+        unsqueeze_2_node = helper.make_node(
+            "Unsqueeze",
+            inputs=["unsqueeze_1_mask_out", "two"],
+            outputs=["unsqueeze_2_mask_out"],
+            name="Unsqueeze_2_mask",
+        )
+        expand_node = helper.make_node(
+            "Expand",
+            inputs=["unsqueeze_2_mask_out", "zero"],
+            outputs=["expand_out"],
+            name="Expand_mask",
+        )
+        cast_node = helper.make_node(
+            "Cast",
+            inputs=["expand_out"],
+            outputs=["cast_out"],
+            name="Cast_mask",
+            to=TensorProto.FLOAT,
+        )
+        sub_node = helper.make_node(
+            "Sub",
+            inputs=["one", "cast_out"],
+            outputs=["sub_out"],
+            name="Sub_mask",
+        )
+        where_node = helper.make_node(
+            "Where",
+            inputs=["zero", "neg_int_max", "sub_out"],
+            outputs=["where_out" if model_type != "past" else "attn_mask_out"],
+            name="Where_mask",
+        )
+        attn_mask_nodes = [unsqueeze_1_node, unsqueeze_2_node, expand_node, cast_node, sub_node, where_node]
+
+        if model_type == "past":
+            return attn_mask_nodes
+
+        add_node = helper.make_node(
+            "Add",
+            inputs=["where_out", "zero"],
+            outputs=["attn_mask_out"],
+            name="Add_mask",
+        )
+        return attn_mask_nodes + [add_node]  # noqa: RUF005
+
+    def create_attn_mask_path(self, is_fused: bool, model_type: str):
+        if model_type == "llama2_msft":
+            attn_mask_nodes = self.create_attn_mask_path_llama2_msft()
+            if is_fused:
+                attn_mask_nodes.pop()
+                attn_mask_nodes[-1].output[0] = "attn_mask_out"
+            return attn_mask_nodes
+
+        attn_mask_nodes = self.create_attn_mask_path_hf(model_type)
+        if is_fused:
+            new_output_name = "attn_mask_out_mask"
+            attn_mask_nodes[-1].output[0] = new_output_name
+            concat_mask_node = helper.make_node(
+                "Concat",
+                inputs=[new_output_name for _ in range(self.num_heads)],
+                outputs=["attn_mask_out"],
+                name="Concat_mask",
+                axis=0,
+            )
+            attn_mask_nodes.append(concat_mask_node)
+        return attn_mask_nodes
+
+    def create_qk_path(self, model_type: str):
+        matmul_qk_node = helper.make_node(
+            "MatMul",
+            inputs=["q" if model_type == "llama2_msft" else "q_rope", "k"],
+            outputs=["qk"],
+            name="MatMul_q_k",
+        )
+        div_node = helper.make_node(
+            "Div",
+            inputs=["qk", "sqrt_head_size"],
+            outputs=["qk_div"],
+            name="Div_0",
+        )
+        add_node = helper.make_node(
+            "Add",
+            inputs=["qk_div", "attn_mask_out"],
+            outputs=["qk_plus_mask"],
+            name="Add_0",
+        )
+        softmax_node = helper.make_node(
+            "Softmax",
+            inputs=["qk_plus_mask"],
+            outputs=["softmax_out"],
+            name="Softmax_0",
+        )
+        return [matmul_qk_node, div_node, add_node, softmax_node]
+
+    def create_v_path(self, model_type: str):
+        reshape_v_1_node = helper.make_node(
+            "Reshape",
+            inputs=["v_out", "concat_v_1_extra_out"],
+            outputs=["reshape_v_1_out"],
+            name="Reshape_v_1",
+        )
+        transpose_v_1_node = helper.make_node(
+            "Transpose",
+            inputs=["reshape_v_1_out"],
+            outputs=["transpose_v_1_out" if model_type != "no_past" else "present_value"],
+            name="Transpose_v_1",
+        )
+        v_nodes = [reshape_v_1_node, transpose_v_1_node]
+
+        if model_type == "no_past":
+            return v_nodes
+
+        if model_type in {"past", "merged", "70b_distributed_merged"}:
+            concat_v_node = helper.make_node(
+                "Concat",
+                inputs=["past_value", "transpose_v_1_out"],
+                outputs=["present_value"],
+                name="Concat_v",
+                axis=2,
+            )
+
+            if model_type != "70b_distributed_merged":
+                return v_nodes + [concat_v_node]  # noqa: RUF005
+
+            shape_v1 = helper.make_node("Shape", inputs=["present_value"], outputs=["shape_1_out"], name="Shape_v1")
+            shape_v2 = helper.make_node("Shape", inputs=["present_value"], outputs=["shape_2_out"], name="Shape_v2")
+            shape_v3 = helper.make_node("Shape", inputs=["present_value"], outputs=["shape_3_out"], name="Shape_v3")
+            shape_v4 = helper.make_node("Shape", inputs=["present_value"], outputs=["shape_4_out"], name="Shape_v4")
+            gather_v_1 = helper.make_node(
+                "Gather",
+                inputs=["shape_1_out", "one"],
+                outputs=["gather_1_out"],
+                name="Gather_v1",
+                axis=0,
+            )
+            gather_v_2 = helper.make_node(
+                "Gather",
+                inputs=["shape_2_out", "one"],
+                outputs=["gather_2_out"],
+                name="Gather_v2",
+                axis=0,
+            )
+            gather_v_3 = helper.make_node(
+                "Gather",
+                inputs=["shape_3_out", "one"],
+                outputs=["gather_3_out"],
+                name="Gather_v3",
+                axis=0,
+            )
+            gather_v_4 = helper.make_node(
+                "Gather",
+                inputs=["shape_4_out", "one"],
+                outputs=["gather_4_out"],
+                name="Gather_v4",
+                axis=0,
+            )
+            unsqueeze_v_1 = helper.make_node(
+                "Unsqueeze",
+                inputs=["present_value", "zero"],
+                outputs=["unsqueeze_v1_out"],
+                name="Unsqueeze_v1",
+            )
+            unsqueeze_v_2 = helper.make_node(
+                "Unsqueeze",
+                inputs=["gather_1_out", "zero"],
+                outputs=["unsqueeze_v2_out"],
+                name="Unsqueeze_v2",
+            )
+            unsqueeze_v_3 = helper.make_node(
+                "Unsqueeze",
+                inputs=["gather_2_out", "zero"],
+                outputs=["unsqueeze_v3_out"],
+                name="Unsqueeze_v3",
+            )
+            unsqueeze_v_4 = helper.make_node(
+                "Unsqueeze",
+                inputs=["gather_3_out", "zero"],
+                outputs=["unsqueeze_v4_out"],
+                name="Unsqueeze_v4",
+            )
+            unsqueeze_v_5 = helper.make_node(
+                "Unsqueeze",
+                inputs=["gather_4_out", "zero"],
+                outputs=["unsqueeze_v5_out"],
+                name="Unsqueeze_v5",
+            )
+            concat_v_2 = helper.make_node(
+                "Concat",
+                inputs=["unsqueeze_v2_out", "unsqueeze_v3_out", "One", "unsqueeze_v4_out", "unsqueeze_v5_out"],
+                outputs=["concat_v2_ouot"],
+                name="Concat_v2",
+                axis=0,
+            )
+            reshape_v_2 = helper.make_node(
+                "Reshape",
+                inputs=["concat_v2_ouot", "One"],
+                outputs=["reshape_v2_out"],
+                name="Reshape_v2",
+            )
+            shape_v5 = helper.make_node("Shape", inputs=["reshape_v2_out"], outputs=["shape_5_out"], name="Shape_v5")
+            constant_of_shape_v_1 = helper.make_node(
+                "ConstantOfShape",
+                inputs=["shape_5_out"],
+                outputs=["constant_of_shape_v1_out"],
+                name="ConstantOfShape_v1",
+            )
+            mul_v_1 = helper.make_node(
+                "Mul",
+                inputs=["constant_of_shape_v1_out", "One"],
+                outputs=["mul_v1_out"],
+                name="mul_v1",
+            )
+            equal_v_1 = helper.make_node(
+                "Equal",
+                inputs=["reshape_v2_out", "mul_v1_out"],
+                outputs=["equal_v_1_out"],
+                name="equal_v1",
+            )
+            where_v_1 = helper.make_node(
+                "Where",
+                inputs=["equal_v_1_out", "constant_of_shape_v1_out", "reshape_v2_out"],
+                outputs=["where_v_1_out"],
+                name="where_v1",
+            )
+            unsqueeze_v_6 = helper.make_node(
+                "Unsqueeze",
+                inputs=["gather_1_out", "zero"],
+                outputs=["unsqueeze_v6_out"],
+                name="Unsqueeze_v6",
+            )
+            mul_v_2 = helper.make_node(
+                "Mul",
+                inputs=["gather_2_out", "One"],
+                outputs=["mul_v2_out"],
+                name="mul_v2",
+            )
+            unsqueeze_v_7 = helper.make_node(
+                "Unsqueeze",
+                inputs=["mul_v2_out", "zero"],
+                outputs=["unsqueeze_v7_out"],
+                name="Unsqueeze_v7",
+            )
+            unsqueeze_v_8 = helper.make_node(
+                "Unsqueeze",
+                inputs=["gather_3_out", "zero"],
+                outputs=["unsqueeze_v8_out"],
+                name="Unsqueeze_v8",
+            )
+            unsqueeze_v_9 = helper.make_node(
+                "Unsqueeze",
+                inputs=["gather_4_out", "zero"],
+                outputs=["unsqueeze_v9_out"],
+                name="Unsqueeze_v9",
+            )
+            concat_v_3 = helper.make_node(
+                "Concat",
+                inputs=["unsqueeze_v6_out", "unsqueeze_v7_out", "unsqueeze_v8_out", "unsqueeze_v9_out"],
+                outputs=["concat_v3_out"],
+                name="Concat_v3",
+                axis=0,
+            )
+            expand_v_1 = helper.make_node(
+                "Expand",
+                inputs=["unsqueeze_v1_out", "where_v_1_out"],
+                outputs=["expand_v1_out"],
+                name="expand_v1",
+            )
+            reshape_v_3 = helper.make_node(
+                "Reshape",
+                inputs=["expand_v1_out", "concat_v3_out"],
+                outputs=["reshape_v3_out"],
+                name="Reshape_v3",
+            )
+
+            v_nodes_for_70b_model = [
+                concat_v_node,
+                shape_v1,
+                shape_v2,
+                shape_v3,
+                shape_v4,
+                gather_v_1,
+                gather_v_2,
+                gather_v_3,
+                gather_v_4,
+                unsqueeze_v_1,
+                unsqueeze_v_2,
+                unsqueeze_v_3,
+                unsqueeze_v_4,
+                unsqueeze_v_5,
+                concat_v_2,
+                reshape_v_2,
+                shape_v5,
+                constant_of_shape_v_1,
+                mul_v_1,
+                equal_v_1,
+                where_v_1,
+                unsqueeze_v_6,
+                mul_v_2,
+                unsqueeze_v_7,
+                unsqueeze_v_8,
+                unsqueeze_v_9,
+                concat_v_3,
+                expand_v_1,
+                reshape_v_3,
+            ]
+            v_nodes.extend(v_nodes_for_70b_model)
+
+            return v_nodes
+
+        # Create extra nodes for `position_ids`
+        unsqueeze_v_node = helper.make_node(
+            "Unsqueeze",
+            inputs=["position_ids", "zero"],
+            outputs=["unsqueeze_v_out"],
+            name="Unsqueeze_v",
+        )
+        slice_v_node = helper.make_node(
+            "Slice",
+            inputs=["past_value", "zero", "unsqueeze_v_out", "two", "one"],
+            outputs=["v_cache_sliced_out"],
+            name="Slice_v",
+        )
+        concat_v_node = helper.make_node(
+            "Concat",
+            inputs=["v_cache_sliced_out", "transpose_v_1_out"],
+            outputs=["present_value"],
+            name="Concat_v",
+            axis=2,
+        )
+        v_nodes.extend([unsqueeze_v_node, slice_v_node, concat_v_node])
+
+        # Create remaining nodes for v path
+        transpose_v_2_node = helper.make_node(
+            "Transpose",
+            inputs=["present_value"],
+            outputs=["transpose_v_2_out"],
+            name="Transpose_v_2",
+        )
+        reshape_v_2_node = helper.make_node(
+            "Reshape",
+            inputs=["transpose_v_2_out", "concat_v_2_extra_out"],
+            outputs=["v"],
+            name="Reshape_v_2",
+        )
+        return v_nodes + [transpose_v_2_node, reshape_v_2_node]  # noqa: RUF005
+
+    def create_qkv_path(self, model_type: str):
+        matmul_qkv_node = helper.make_node(
+            "MatMul",
+            inputs=["softmax_out", "v" if model_type == "llama2_msft" else "present_value"],
+            outputs=["softmax_v_out"],
+            name="MatMul_softmax_v",
+        )
+        qkv_nodes = [matmul_qkv_node]
+
+        if model_type == "llama2_msft":
+            reshape_qkv_1_node = helper.make_node(
+                "Reshape",
+                inputs=["softmax_v_out", "concat_qkv_1_extra_out"],
+                outputs=["reshape_qkv_1_out"],
+                name="Reshape_qkv_1",
+            )
+            qkv_nodes.append(reshape_qkv_1_node)
+
+        transpose_qkv_node = helper.make_node(
+            "Transpose",
+            inputs=["reshape_qkv_1_out" if model_type == "llama2_msft" else "softmax_v_out"],
+            outputs=["transpose_qkv_out"],
+            name="Transpose_qkv",
+        )
+        reshape_qkv_2_node = helper.make_node(
+            "Reshape",
+            inputs=["transpose_qkv_out", "concat_qkv_2_extra_out"],
+            outputs=["attn_output"],
+            name="Reshape_qkv_2",
+        )
+
+        return qkv_nodes + [transpose_qkv_node, reshape_qkv_2_node]  # noqa: RUF005
+
+    def create_concat_unsqueeze_paths(self, model_type: str, reshape_nodes: List[NodeProto]):
+        # Create initial shape paths
+        shape_0_node = helper.make_node(
+            "Shape",
+            inputs=["input_0"],
+            outputs=["input_0_shape_0"],
+            name="Shape_0",
+        )
+        gather_0_node = helper.make_node(
+            "Gather",
+            inputs=["input_0_shape_0", "zero"],
+            outputs=["input_0_shape_0_indexed"],
+            name="Gather_0",
+            axis=0,
+        )
+        shape_1_node = helper.make_node(
+            "Shape",
+            inputs=["input_0"],
+            outputs=["input_0_shape_1"],
+            name="Shape_1",
+        )
+        gather_1_node = helper.make_node(
+            "Gather",
+            inputs=["input_0_shape_1", "one"],
+            outputs=["input_0_shape_1_indexed"],
+            name="Gather_1",
+            axis=0,
+        )
+        extra_nodes = [shape_0_node, gather_0_node, shape_1_node, gather_1_node]
+
+        if model_type == "llama2_msft":
+            mul_node = helper.make_node(
+                "Mul",
+                inputs=[gather_0_node.output[0], "num_heads"],
+                outputs=["mul_extra_out"],
+                name="Mul_extra_0",
+            )
+            add_node = helper.make_node(
+                "Add",
+                inputs=[gather_1_node.output[0], "position_ids"],
+                outputs=["add_extra_out"],
+                name="Add_extra_0",
+            )
+            extra_nodes.extend([mul_node, add_node])
+
+        for i, reshape_node in enumerate(reshape_nodes):
+            use_mul_and_add_nodes_0 = model_type == "llama2_msft" and reshape_node.output[0] in {"q", "k", "v"}
+            use_mul_and_add_nodes_1 = model_type == "llama2_msft" and reshape_node.output[0] in {"k", "v"}
+
+            unsqueeze_0_node = helper.make_node(
+                "Unsqueeze",
+                inputs=[gather_0_node.output[0] if not use_mul_and_add_nodes_0 else "mul_extra_out", "zero"],
+                outputs=[f"unsqueeze_extra_{2*i}"],
+                name=f"Unsqueeze_extra_{2*i}",
+            )
+            unsqueeze_1_node = helper.make_node(
+                "Unsqueeze",
+                inputs=[gather_1_node.output[0] if not use_mul_and_add_nodes_1 else "add_extra_out", "zero"],
+                outputs=[f"unsqueeze_extra_{2*i + 1}"],
+                name=f"Unsqueeze_extra_{2*i + 1}",
+            )
+
+            reshape_name = reshape_node.name
+            if reshape_name == "Reshape_qkv_2":
+                concat_node_inputs = [unsqueeze_0_node.output[0], unsqueeze_1_node.output[0], "hidden_size"]
+            elif reshape_name == "Reshape_qkv_1":
+                concat_node_inputs = [unsqueeze_0_node.output[0], "num_heads", unsqueeze_1_node.output[0], "head_size"]
+            elif reshape_name == "Reshape_v_2":
+                concat_node_inputs = [unsqueeze_0_node.output[0], unsqueeze_1_node.output[0], "head_size"]
+            elif reshape_name == "Reshape_v_1":
+                concat_node_inputs = [unsqueeze_0_node.output[0], unsqueeze_1_node.output[0], "num_heads", "head_size"]
+            elif reshape_name == "Reshape_k":
+                concat_node_inputs = [unsqueeze_0_node.output[0], "head_size", unsqueeze_1_node.output[0]]
+            elif reshape_name == "Reshape_q":
+                concat_node_inputs = [unsqueeze_0_node.output[0], unsqueeze_1_node.output[0], "head_size"]
+
+            concat_node = helper.make_node(
+                "Concat",
+                inputs=concat_node_inputs,
+                outputs=[reshape_nodes[i].input[1]],
+                name=f"Concat_extra_{i}",
+                axis=0,
+            )
+            extra_nodes.extend([unsqueeze_0_node, unsqueeze_1_node, concat_node])
+
+        return extra_nodes
+
+    def create_end_nodes(self, model_type):
+        if model_type == "70b_distributed_merged":
+            matmul_o_node = helper.make_node(
+                "MatMul",
+                inputs=["attn_output", "o_weight"],
+                outputs=["output_proj"],
+                name="MatMul_o_proj",
+            )
+            all_reduce = helper.make_node(
+                "AllReduce",
+                inputs=["output_proj"],
+                outputs=["allreduce_proj"],
+                name="allreduce_proj",
+            )
+            end_node = helper.make_node(
+                "Add",
+                inputs=["zero", "allreduce_proj"],
+                outputs=["output_0"],
+                name="Add_normalize_node",
+            )
+            return [matmul_o_node, all_reduce, end_node]
+
+        matmul_o_node = helper.make_node(
+            "MatMul",
+            inputs=["attn_output", "o_weight"],
+            outputs=["output_proj"],
+            name="MatMul_o_proj",
+        )
+        end_node = helper.make_node(
+            "Add",
+            inputs=["zero", "output_proj"],
+            outputs=["output_0"],
+            name="Add_normalize_node",
+        )
+        return [matmul_o_node, end_node]
+
+    def create_fused_model(self, model_type: str, interleaved: bool, initializers: List[TensorProto]):
+        inputs, outputs = self.create_inputs_and_outputs(model_type)
+        matmul_nodes = self.create_matmul_nodes(True, model_type=model_type)
+        rope_nodes = self.create_rotary_embeddings(True, model_type, interleaved, inputs, initializers)
+        attn_mask_nodes = self.create_attn_mask_path(True, model_type)
+
+        mha_inputs = [
+            rope_nodes[0].output[0],  # q
+            rope_nodes[1].output[0],  # k
+            matmul_nodes[-1].output[0],  # v
+            "",  # bias
+            "attn_mask_out" if model_type == "llama2_msft" else "",  # attn_mask
+            "attn_mask_out" if model_type != "llama2_msft" else "",  # add_qk
+            "past_key" if model_type != "no_past" else "",  # past_key
+            "past_value" if model_type != "no_past" else "",  # past_value
+        ]
+        mha_node = helper.make_node(
+            "MultiHeadAttention",
+            inputs=mha_inputs,
+            outputs=["attn_output", "present_key", "present_value"],
+            name="MultiHeadAttention_0",
+            num_heads=self.num_heads,
+        )
+
+        end_nodes = self.create_end_nodes(model_type)
+
+        graph = helper.make_graph(
+            nodes=matmul_nodes + rope_nodes + attn_mask_nodes + [mha_node] + end_nodes,
+            name="RotaryAttention_Graph",
+            inputs=inputs,
+            outputs=outputs,
+            initializer=initializers,
+        )
+        opset_import = helper.make_opsetid(domain="com.microsoft", version=1)
+        model = helper.make_model(graph, opset_imports=[opset_import])
+        return model
+
+    def create_test_model(self, model_type: str, interleaved: bool, initializers: List[TensorProto]):
+        inputs, outputs = self.create_inputs_and_outputs(model_type)
+        matmul_nodes = self.create_matmul_nodes(False, model_type)
+        rope_nodes = self.create_rotary_embeddings(False, model_type, interleaved, inputs, initializers)
+
+        # Create main paths
+        q_nodes = self.create_q_path(model_type)
+        k_nodes = self.create_k_path(model_type)
+        attn_mask_nodes = self.create_attn_mask_path(False, model_type)
+        qk_nodes = self.create_qk_path(model_type)
+        v_nodes = self.create_v_path(model_type)
+        qkv_nodes = self.create_qkv_path(model_type)
+
+        reshape_nodes = list(filter(lambda node: node.op_type == "Reshape", q_nodes + k_nodes + v_nodes + qkv_nodes))
+        extra_nodes = self.create_concat_unsqueeze_paths(model_type, reshape_nodes)
+
+        end_nodes = self.create_end_nodes(model_type)
+
+        first_set_of_nodes = matmul_nodes + rope_nodes + q_nodes + k_nodes + attn_mask_nodes
+        second_set_of_nodes = qk_nodes + v_nodes + qkv_nodes + extra_nodes + end_nodes
+        graph = helper.make_graph(
+            nodes=first_set_of_nodes + second_set_of_nodes,
+            name="RotaryAttention_Graph",
+            inputs=inputs,
+            outputs=outputs,
+            initializer=initializers,
+        )
+        opset_import = helper.make_opsetid(domain="ai.onnx", version=17)
+        model = helper.make_model(graph, opset_imports=[opset_import])
+        return model
+
+    def check_models(self, model_type: str, interleaved: bool):
+        initializers = self.create_initializers()
+
+        expected_model_filename = "expected_model.onnx"
+        expected_model = self.create_fused_model(model_type, interleaved, initializers)
+        onnx.save(expected_model, expected_model_filename)
+
+        original_model_filename = "original_model.onnx"
+        original_model = self.create_test_model(model_type, interleaved, initializers)
+        onnx.save(original_model, original_model_filename)
+
+        self.verify_fusion(expected_model_filename, original_model_filename)
+        os.remove(expected_model_filename)
+        os.remove(original_model_filename)
+
+    def test_llama2_msft_model(self):
+        model_type = "llama2_msft"
+        interleaved = True
+        self.check_models(model_type, interleaved)
+
+    def test_hf_decoder_model(self):
+        model_type = "no_past"
+        interleaved = False
+        self.check_models(model_type, interleaved)
+
+    def test_hf_decoder_with_past_model(self):
+        model_type = "past"
+        interleaved = False
+        self.check_models(model_type, interleaved)
+
+    def test_hf_decoder_merged_model(self):
+        model_type = "merged"
+        interleaved = False
+        self.check_models(model_type, interleaved)
+
+    def test_hf_70b_distributed_decoder_merged_model(self):
+        model_type = "70b_distributed_merged"
+        interleaved = False
+        self.check_models(model_type, interleaved)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/onnxruntime/test/python/transformers/test_simplified_layernorm_fusion.py b/onnxruntime/test/python/transformers/test_simplified_layernorm_fusion.py
new file mode 100644
index 0000000000000..e86bdda7baffb
--- /dev/null
+++ b/onnxruntime/test/python/transformers/test_simplified_layernorm_fusion.py
@@ -0,0 +1,243 @@
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.  See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+import os
+import unittest
+from typing import List
+
+import numpy as np
+import onnx
+from onnx import TensorProto, helper
+from parity_utilities import find_transformers_source
+
+if find_transformers_source():
+    from fusion_options import FusionOptions
+    from onnx_model import OnnxModel
+    from optimizer import optimize_model
+else:
+    from onnxruntime.transformers.fusion_options import FusionOptions
+    from onnxruntime.transformers.onnx_model import OnnxModel
+    from onnxruntime.transformers.optimizer import optimize_model
+
+
+def float_tensor(name: str, shape: List[int], random=False):
+    low = 0.0
+    high = 1.0
+    total_elements = 1
+    for x in shape:
+        total_elements *= x
+    weights = [np.random.uniform(low, high) for _ in range(total_elements)] if random else [1.0] * total_elements
+    return helper.make_tensor(name, TensorProto.FLOAT, shape, weights)
+
+
+class TestSimplifiedLayerNormFusion(unittest.TestCase):
+    def setUp(self):
+        self.vocab_size = 5
+        self.batch_size = 2
+        self.sequence_length = 8
+        self.hidden_size = 16
+        self.epsilon = 0.000009999999747378752
+
+    def verify_fusion(self, expected_model_path, original_model_path):
+        expected_model = OnnxModel(onnx.load(expected_model_path))
+        expected_model.topological_sort(is_deterministic=True)
+
+        options = FusionOptions("gpt2")
+        optimized_model = optimize_model(original_model_path, optimization_options=options)
+        optimized_model.topological_sort(is_deterministic=True)
+
+        self.assertTrue(str(expected_model.model.graph), str(optimized_model.model.graph))
+
+    def create_initializers(self, use_embed_weight: bool = False):
+        initializers = [
+            helper.make_tensor("Two", TensorProto.FLOAT, [1], np.array([2], dtype=np.float32)),
+            helper.make_tensor("epsilon", TensorProto.FLOAT, [1], np.array([self.epsilon], dtype=np.float32)),
+            helper.make_tensor("One", TensorProto.FLOAT, [1], np.array([1], dtype=np.float32)),
+            float_tensor("scale", [self.hidden_size]),
+        ]
+        if use_embed_weight:
+            initializers = [  # noqa: RUF005
+                float_tensor("embed_weight", [self.vocab_size, self.hidden_size])
+            ] + initializers
+        return initializers
+
+    def create_inputs_and_outputs(self, start_node_type: str):
+        inputs, start_node = None, None
+        if start_node_type == "Add":
+            start_node = helper.make_node(
+                "Add",
+                inputs=["input_0", "input_1"],
+                outputs=["D"],
+                name="Add_0",
+            )
+            input_0 = helper.make_tensor_value_info(
+                "input_0",
+                TensorProto.FLOAT,
+                [self.batch_size, self.sequence_length, self.hidden_size],
+            )
+            input_1 = helper.make_tensor_value_info(
+                "input_1",
+                TensorProto.FLOAT,
+                [self.batch_size, self.sequence_length, self.hidden_size],
+            )
+            inputs = [input_0, input_1]
+        elif start_node_type == "Gather":
+            start_node = helper.make_node(
+                "Gather",
+                inputs=["embed_weight", "input_0"],
+                outputs=["D"],
+                name="Gather_0",
+            )
+            input_0 = helper.make_tensor_value_info(
+                "input_0",
+                TensorProto.INT64,
+                [self.batch_size, self.sequence_length],
+            )
+            inputs = [input_0]
+        else:
+            # start_node_type is a graph input
+            assert start_node_type == "GraphInput"
+            input_0 = helper.make_tensor_value_info(
+                "D",
+                TensorProto.FLOAT,
+                [self.batch_size, self.sequence_length, self.hidden_size],
+            )
+            inputs = [input_0]
+
+        outputs = [
+            helper.make_tensor_value_info(
+                "output_0",
+                TensorProto.FLOAT,
+                [self.batch_size, self.sequence_length, self.hidden_size],
+            )
+        ]
+        return inputs, outputs, start_node
+
+    def create_fused_model(self, start_node_type: str, initializers: List[TensorProto]):
+        inputs, outputs, start_node = self.create_inputs_and_outputs(start_node_type)
+
+        sln_node = helper.make_node(
+            "SimplifiedLayerNormalization",
+            inputs=[start_node.output[0] if start_node is not None else "D", initializers[0].name],
+            outputs=[outputs[0].name],
+            axis=-1,
+            epsilon=initializers[2].float_data[0],
+            stash_type=1,
+        )
+
+        graph = helper.make_graph(
+            nodes=[sln_node] + ([] if start_node is None else [start_node]),
+            name="SimplifiedLayerNorm_Graph",
+            inputs=inputs,
+            outputs=outputs,
+            initializer=initializers,
+        )
+        opset_import = helper.make_opsetid(domain="com.microsoft", version=1)
+        model = helper.make_model(graph, opset_imports=[opset_import])
+        return model
+
+    # Notation follows https://onnx.ai/onnx/operators/onnx__LayerNormalization.html#summary
+    def create_test_model(self, start_node_type: str, first_parent_idx: int, initializers: List[TensorProto]):
+        end_node = helper.make_node(
+            "Mul",
+            inputs=["scale", "Normalized"] if first_parent_idx == 1 else ["Normalized", "scale"],
+            outputs=["output_0"],
+            name="Mul_1",
+        )
+        mul_node = helper.make_node(
+            "Mul",
+            inputs=["D", "InvStdDev"],
+            outputs=["Normalized"],
+            name="Mul_0",
+        )
+        div_node = helper.make_node(
+            "Div",
+            inputs=["One", "StdDev"],
+            outputs=["InvStdDev"],
+            name="Div_0",
+        )
+        sqrt_node = helper.make_node(
+            "Sqrt",
+            inputs=["VarEps"],
+            outputs=["StdDev"],
+            name="Sqrt_0",
+        )
+        add_node = helper.make_node(
+            "Add",
+            inputs=["Var", "epsilon"],
+            outputs=["VarEps"],
+            name="Add_1",
+        )
+        reducemean_node = helper.make_node(
+            "ReduceMean",
+            inputs=["DD"],
+            outputs=["Var"],
+            name="ReduceMean_0",
+        )
+        pow_node = helper.make_node(
+            "Pow",
+            inputs=["D", "Two"],
+            outputs=["DD"],
+            name="Pow_0",
+        )
+
+        inputs, outputs, start_node = self.create_inputs_and_outputs(start_node_type)
+
+        main_nodes = [pow_node, reducemean_node, add_node, sqrt_node, div_node, mul_node, end_node]
+        graph = helper.make_graph(
+            nodes=main_nodes + ([] if start_node is None else [start_node]),
+            name="SimplifiedLayerNorm_Graph",
+            inputs=inputs,
+            outputs=outputs,
+            initializer=initializers,
+        )
+        opset_import = helper.make_opsetid(domain="com.microsoft", version=1)
+        model = helper.make_model(graph, opset_imports=[opset_import])
+        return model
+
+    def check_models(self, start_node_type: str, first_parent_idx: int, initializers: List[TensorProto]):
+        expected_model_filename = "expected_model.onnx"
+        expected_model = self.create_fused_model(start_node_type, initializers)
+        onnx.save(expected_model, expected_model_filename)
+
+        original_model_filename = "original_model.onnx"
+        original_model = self.create_test_model(start_node_type, first_parent_idx, initializers)
+        onnx.save(original_model, original_model_filename)
+
+        self.verify_fusion(expected_model_filename, original_model_filename)
+        os.remove(expected_model_filename)
+        os.remove(original_model_filename)
+
+    # sim_ln_nodes_1
+    def test_simplified_layernorm_add_idx1(self):
+        start_node_type = "Add"
+        first_parent_idx = 1
+        initializers = self.create_initializers()
+        self.check_models(start_node_type, first_parent_idx, initializers)
+
+    # sim_ln_nodes_2
+    def test_simplified_layernorm_gather_idx1(self):
+        start_node_type = "Gather"
+        first_parent_idx = 1
+        initializers = self.create_initializers(use_embed_weight=True)
+        self.check_models(start_node_type, first_parent_idx, initializers)
+
+    # sim_ln_nodes_3
+    def test_simplified_layernorm_add_idx0(self):
+        start_node_type = "Add"
+        first_parent_idx = 0
+        initializers = self.create_initializers()
+        self.check_models(start_node_type, first_parent_idx, initializers)
+
+    # sim_ln_nodes_4
+    def test_simplified_layernorm_gather_graph_input(self):
+        start_node_type = "GraphInput"
+        first_parent_idx = 0
+        initializers = self.create_initializers()
+        self.check_models(start_node_type, first_parent_idx, initializers)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/onnxruntime/test/python/transformers/test_whisper.py b/onnxruntime/test/python/transformers/test_whisper.py
index ebda0bccaadcf..ceda5a88c3925 100644
--- a/onnxruntime/test/python/transformers/test_whisper.py
+++ b/onnxruntime/test/python/transformers/test_whisper.py
@@ -50,7 +50,7 @@ def verify_fusion(self, optimized_model, expected_model_filename):
                 )
             )
 
-    # Attention type #1 in onnx_model_bart.py
+    # Attention type #1 in fusion_bart_attention.py
     def test_encoder_attention_fusion_with_skiplayernorm(self):
         num_heads = 4
         hidden_size = 64
@@ -67,7 +67,7 @@ def test_encoder_attention_fusion_with_skiplayernorm(self):
         os.remove(model_path)
         self.verify_fusion(optimized_model, "encoder_attention_with_sln_fused.onnx")
 
-    # Attention type #2 in onnx_model_bart.py
+    # Attention type #2 in fusion_bart_attention.py
     def test_decoder_attention_fusion_with_skiplayernorm(self):
         num_heads = 4
         hidden_size = 64
@@ -84,7 +84,7 @@ def test_decoder_attention_fusion_with_skiplayernorm(self):
         os.remove(model_path)
         self.verify_fusion(optimized_model, "decoder_attention_with_sln_fused.onnx")
 
-    # Attention type #4 in onnx_model_bart.py
+    # Attention type #4 in fusion_bart_attention.py
     def test_decoder_multihead_attention_fusion(self):
         num_heads = 4
         hidden_size = 64
@@ -100,7 +100,7 @@ def test_decoder_multihead_attention_fusion(self):
         os.remove(model_path)
         self.verify_fusion(optimized_model, "decoder_mha_fused.onnx")
 
-    # Attention type #3 in onnx_model_bart.py
+    # Attention type #3 in fusion_bart_attention.py
     def test_decoder_with_past_multihead_self_attention_fusion_with_skiplayernorm(self):
         num_heads = 4
         hidden_size = 64
@@ -118,7 +118,7 @@ def test_decoder_with_past_multihead_self_attention_fusion_with_skiplayernorm(se
         os.remove(model_path)
         self.verify_fusion(optimized_model, "decoder_with_past_self_mha_fused.onnx")
 
-    # Attention type #5 in onnx_model_bart.py
+    # Attention type #5 in fusion_bart_attention.py
     def test_decoder_with_past_multihead_cross_attention_fusion(self):
         num_heads = 4
         hidden_size = 64
@@ -134,7 +134,7 @@ def test_decoder_with_past_multihead_cross_attention_fusion(self):
         os.remove(model_path)
         self.verify_fusion(optimized_model, "decoder_with_past_cross_mha_fused.onnx")
 
-    # Attention type #4 in onnx_model_bart.py
+    # Attention type #4 in fusion_bart_attention.py
     def test_decoder_multihead_attention_split_bias_fusion(self):
         num_heads = 4
         hidden_size = 64
@@ -151,7 +151,7 @@ def test_decoder_multihead_attention_split_bias_fusion(self):
         os.remove(model_path)
         self.verify_fusion(optimized_model, "decoder_mha_split_bias_fused.onnx")
 
-    # Attention type #3 in onnx_model_bart.py
+    # Attention type #3 in fusion_bart_attention.py
     def test_decoder_with_past_multihead_self_attention_split_bias_fusion_with_skiplayernorm(self):
         num_heads = 4
         hidden_size = 64
@@ -171,7 +171,7 @@ def test_decoder_with_past_multihead_self_attention_split_bias_fusion_with_skipl
         os.remove(model_path)
         self.verify_fusion(optimized_model, "decoder_with_past_self_mha_split_bias_fused.onnx")
 
-    # Attention type #5 in onnx_model_bart.py
+    # Attention type #5 in fusion_bart_attention.py
     def test_decoder_with_past_multihead_cross_attention_split_bias_fusion(self):
         num_heads = 4
         hidden_size = 64
diff --git a/onnxruntime/test/python/transformers/test_whisper_timestamp_processor.py b/onnxruntime/test/python/transformers/test_whisper_timestamp_processor.py
index 66200af06f511..77ce09d7e793b 100644
--- a/onnxruntime/test/python/transformers/test_whisper_timestamp_processor.py
+++ b/onnxruntime/test/python/transformers/test_whisper_timestamp_processor.py
@@ -10,7 +10,7 @@
 import pytest
 import torch
 
-from onnxruntime import InferenceSession, SessionOptions
+from onnxruntime import InferenceSession, SessionOptions, get_available_providers
 
 
 class TestTimestampProcessor(unittest.TestCase):
@@ -52,12 +52,13 @@ def run_timestamp(self, provider: str):
         ort_transcription = processor.batch_decode(
             ort_out_tensor[0][0].view(1, -1), skip_special_tokens=True, output_offsets=True
         )
+        print(ort_transcription)
         expected_transcription = [
             {
-                "text": " Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.",
+                "text": "<|0.00|> Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.<|5.44|>",
                 "offsets": [
                     {
-                        "text": " Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.",
+                        "text": "<|0.00|> Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.<|5.44|>",
                         "timestamp": (0.0, 5.44),
                     }
                 ],
@@ -70,6 +71,12 @@ def test_timestamp_cpu(self):
         provider = "CPUExecutionProvider"
         self.run_timestamp(provider)
 
+    @pytest.mark.slow
+    def test_timestamp_cuda(self):
+        cuda_provider = "CUDAExecutionProvider"
+        if cuda_provider in get_available_providers():
+            self.run_timestamp(cuda_provider)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc
index 8357ce22fb710..7dee0bc41a6f3 100644
--- a/onnxruntime/test/shared_lib/test_inference.cc
+++ b/onnxruntime/test/shared_lib/test_inference.cc
@@ -183,7 +183,10 @@ static constexpr PATH_TYPE SEQUENCE_MODEL_URI = TSTR("testdata/sequence_length.o
 static constexpr PATH_TYPE SEQUENCE_MODEL_URI_2 = TSTR("testdata/optional_sequence_tensor.onnx");
 #endif
 static constexpr PATH_TYPE CUSTOM_OP_MODEL_URI = TSTR("testdata/foo_1.onnx");
+static constexpr PATH_TYPE CUSTOM_OP_LIBRARY_ATTR_TESTER_URI = TSTR("testdata/custom_op_library/attr_tester.onnx");
 static constexpr PATH_TYPE CUSTOM_OP_LIBRARY_TEST_MODEL_URI = TSTR("testdata/custom_op_library/custom_op_test.onnx");
+static constexpr PATH_TYPE CUSTOM_OP_LIBRARY_COPY_TENSOR_ARRAY_2 = TSTR("testdata/custom_op_library/copy_2_inputs_2_outputs.onnx");
+static constexpr PATH_TYPE CUSTOM_OP_LIBRARY_COPY_TENSOR_ARRAY_3 = TSTR("testdata/custom_op_library/copy_3_inputs_3_outputs.onnx");
 #if !defined(DISABLE_FLOAT8_TYPES)
 static constexpr PATH_TYPE CUSTOM_OP_LIBRARY_TEST_MODEL_FLOAT8_URI = TSTR("testdata/custom_op_library/custom_op_test_float8.onnx");
 #endif
@@ -1406,6 +1409,89 @@ TEST(CApiTest, test_custom_op_library) {
 #endif
 }
 
+#if defined(__ANDROID__)
+TEST(CApiTest, DISABLED_test_custom_op_shape_infer_attr) {
+// To accomodate a reduced op build pipeline
+#elif defined(REDUCED_OPS_BUILD) && defined(USE_CUDA)
+TEST(CApiTest, DISABLED_test_custom_op_shape_infer_attr) {
+#else
+TEST(CApiTest, test_custom_op_shape_infer_attr) {
+#endif
+  std::vector<Input> inputs(1);
+  inputs[0].name = "input_0";
+  inputs[0].dims = {5};
+  inputs[0].values = {1.f, 2.f, 3.f, 4.f, 5.f};
+
+  // prepare expected inputs and outputs
+  std::vector<int64_t> expected_dims_y = {5};
+  std::vector<float> expected_values_y = {6.f, 12.f, 18.f, 24.f, 30.f};
+
+  onnxruntime::PathString lib_name;
+#if defined(_WIN32)
+  lib_name = ORT_TSTR("custom_op_library.dll");
+#elif defined(__APPLE__)
+  lib_name = ORT_TSTR("libcustom_op_library.dylib");
+#else
+  lib_name = ORT_TSTR("./libcustom_op_library.so");
+#endif
+
+  TestInference<float>(*ort_env, CUSTOM_OP_LIBRARY_ATTR_TESTER_URI, inputs, "output_0", expected_dims_y,
+                       expected_values_y, 0, nullptr, lib_name.c_str());
+}
+
+// It has memory leak. The OrtCustomOpDomain created in custom_op_library.cc:RegisterCustomOps function was not freed
+#if defined(__ANDROID__)
+TEST(CApiTest, test_custom_op_library_copy_variadic) {
+// To accomodate a reduced op build pipeline
+#elif defined(REDUCED_OPS_BUILD) && defined(USE_CUDA)
+TEST(CApiTest, test_custom_op_library_copy_variadic) {
+#else
+TEST(CApiTest, test_custom_op_library_copy_variadic) {
+#endif
+  std::cout << "Running inference using custom op shared library" << std::endl;
+
+  std::vector<Input> inputs(2);
+  inputs[0].name = "input_0";
+  inputs[0].dims = {15};
+  inputs[0].values = {1.1f, 2.2f, 3.3f, 4.4f, 5.5f,
+                      6.6f, 7.7f, 8.8f, 9.9f, 10.0f,
+                      11.1f, 12.2f, 13.3f, 14.4f, 15.5f};
+  inputs[1].name = "input_1";
+  inputs[1].dims = {15};
+  inputs[1].values = {15.5f, 14.4f, 13.3f, 12.2f, 11.1f,
+                      10.0f, 9.9f, 8.8f, 7.7f, 6.6f,
+                      5.5f, 4.4f, 3.3f, 2.2f, 1.1f};
+
+  // prepare expected inputs and outputs
+  std::vector<int64_t> expected_dims_y = {15};
+  std::vector<float> expected_values_y = inputs[1].values;
+
+  onnxruntime::PathString lib_name;
+#if defined(_WIN32)
+  lib_name = ORT_TSTR("custom_op_library.dll");
+#elif defined(__APPLE__)
+  lib_name = ORT_TSTR("libcustom_op_library.dylib");
+#else
+  lib_name = ORT_TSTR("./libcustom_op_library.so");
+#endif
+
+  TestInference<float>(*ort_env, CUSTOM_OP_LIBRARY_COPY_TENSOR_ARRAY_2,
+                       inputs, "output_1", expected_dims_y,
+                       expected_values_y, 0, nullptr, lib_name.c_str());
+
+  inputs.push_back({});
+  inputs[2].name = "input_2";
+  inputs[2].dims = {15};
+  inputs[2].values = {6.6f, 7.7f, 8.8f, 9.9f, 10.0f,
+                      1.1f, 2.2f, 3.3f, 4.4f, 5.5f,
+                      11.1f, 12.2f, 13.3f, 14.4f, 15.5f};
+
+  expected_values_y = inputs[2].values;
+  TestInference<float>(*ort_env, CUSTOM_OP_LIBRARY_COPY_TENSOR_ARRAY_3,
+                       inputs, "output_2", expected_dims_y,
+                       expected_values_y, 0, nullptr, lib_name.c_str());
+}
+
 #if !defined(DISABLE_FLOAT8_TYPES)
 
 struct InputF8 {
@@ -2746,6 +2832,184 @@ TEST(CApiTest, ConfigureCudaArenaAndDemonstrateMemoryArenaShrinkage) {
 #endif
 
 #ifdef USE_TENSORRT
+TEST(TensorrtExecutionProviderTest, ShapeTensorTest) {
+  const auto& api = Ort::GetApi();
+
+  // Test input tensor which is shape tensor with explicit trt profile shapes
+  Ort::SessionOptions session_options;
+  OrtTensorRTProviderOptionsV2* trt_options;
+  ASSERT_TRUE(api.CreateTensorRTProviderOptions(&trt_options) == nullptr);
+  std::unique_ptr<OrtTensorRTProviderOptionsV2, decltype(api.ReleaseTensorRTProviderOptions)>
+      rel_trt_options(trt_options, api.ReleaseTensorRTProviderOptions);
+
+  const char* trt_profile_min_shapes = "data:2x2,shape:4x1";
+  const char* trt_profile_max_shapes = "data:2x2,shape:4x1";
+  const char* trt_profile_opt_shapes = "data:2x2,shape:4x1";
+  std::vector<const char*> keys{"trt_profile_min_shapes", "trt_profile_max_shapes", "trt_profile_opt_shapes"};
+  std::vector<const char*> values{trt_profile_min_shapes, trt_profile_max_shapes, trt_profile_opt_shapes};
+  ASSERT_TRUE(api.UpdateTensorRTProviderOptions(rel_trt_options.get(), keys.data(), values.data(), keys.size()) == nullptr);
+  ASSERT_TRUE(api.SessionOptionsAppendExecutionProvider_TensorRT_V2(
+                  static_cast<OrtSessionOptions*>(session_options),
+                  rel_trt_options.get()) == nullptr);
+
+  auto model_path = ORT_TSTR("testdata/trt_reshape.onnx");
+
+  std::vector<float> input_value_0{1.1f, 1.2f, 1.3f, 1.4f};
+  std::vector<int64_t> input_shape_0{2, 2};
+  std::vector<int64_t> input_value_1{4, 1};
+  std::vector<int64_t> input_shape_1{2};
+
+  std::vector<const char*> input_names{"data", "shape"};
+  Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
+
+  std::vector<Ort::Value> ort_inputs;
+  ort_inputs.emplace_back(Ort::Value::CreateTensor<float>(info, input_value_0.data(), input_value_0.size(), input_shape_0.data(), input_shape_0.size()));
+  ort_inputs.emplace_back(Ort::Value::CreateTensor<int64_t>(info, input_value_1.data(), input_value_1.size(), input_shape_1.data(), input_shape_1.size()));
+
+  const char* output_names[] = {"reshaped"};
+
+  Ort::Session session(*ort_env, model_path, session_options);
+  session.Run(Ort::RunOptions{}, input_names.data(), ort_inputs.data(), ort_inputs.size(), output_names, countof(output_names));
+
+  // Test input tensor which is shape tensor with implicit trt profile shapes
+  Ort::SessionOptions session_options_2;
+  OrtTensorRTProviderOptionsV2* trt_options_2;
+  ASSERT_TRUE(api.CreateTensorRTProviderOptions(&trt_options_2) == nullptr);
+  std::unique_ptr<OrtTensorRTProviderOptionsV2, decltype(api.ReleaseTensorRTProviderOptions)>
+      rel_trt_options_2(trt_options_2, api.ReleaseTensorRTProviderOptions);
+  ASSERT_TRUE(api.SessionOptionsAppendExecutionProvider_TensorRT_V2(
+                  static_cast<OrtSessionOptions*>(session_options_2),
+                  rel_trt_options_2.get()) == nullptr);
+  Ort::Session session_2(*ort_env, model_path, session_options_2);
+  session_2.Run(Ort::RunOptions{}, input_names.data(), ort_inputs.data(), ort_inputs.size(), output_names, countof(output_names));
+}
+
+TEST(CApiTest, TestExternalCUDAStreamWithIOBinding) {
+  const auto& api = Ort::GetApi();
+  Ort::SessionOptions session_options;
+
+  OrtTensorRTProviderOptionsV2* trt_options;
+  ASSERT_TRUE(api.CreateTensorRTProviderOptions(&trt_options) == nullptr);
+  std::unique_ptr<OrtTensorRTProviderOptionsV2, decltype(api.ReleaseTensorRTProviderOptions)>
+      rel_trt_options(trt_options, api.ReleaseTensorRTProviderOptions);
+
+  // updating provider option with user provided compute stream
+  cudaStream_t compute_stream = nullptr;
+  void* user_compute_stream = nullptr;
+  cudaStreamCreate(&compute_stream);
+  ASSERT_TRUE(api.UpdateTensorRTProviderOptionsWithValue(rel_trt_options.get(), "user_compute_stream", compute_stream) == nullptr);
+  ASSERT_TRUE(api.GetTensorRTProviderOptionsByName(rel_trt_options.get(), "user_compute_stream", &user_compute_stream) == nullptr);
+  ASSERT_TRUE(user_compute_stream == (void*)compute_stream);
+
+  ASSERT_TRUE(api.SessionOptionsAppendExecutionProvider_TensorRT_V2(
+                  static_cast<OrtSessionOptions*>(session_options),
+                  rel_trt_options.get()) == nullptr);
+
+  Ort::Session session(*ort_env, MODEL_URI, session_options);
+  Ort::MemoryInfo info_cuda("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemTypeDefault);
+
+  const std::array<int64_t, 2> x_shape = {3, 2};
+  std::array<float, 3 * 2> x_values = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
+
+  /*
+   * Use cudaMallocHost() (pinned memory allocation) to create input/output tensors
+   */
+  float* input_data;
+  cudaMallocHost(&input_data, 3 * 2 * sizeof(float));
+  ASSERT_NE(input_data, nullptr);
+  cudaMemcpy(input_data, x_values.data(), sizeof(float) * x_values.size(), cudaMemcpyHostToDevice);
+
+  std::cout << "pinned memory allocation" << std::endl;
+  std::cout << "input tesnor:" << std::endl;
+  for (int i = 0; i < 6; i++) {
+    std::cout << input_data[i] << std::endl;
+  }
+
+  // Create an OrtValue tensor backed by data on CUDA memory
+  Ort::Value bound_x = Ort::Value::CreateTensor(info_cuda, reinterpret_cast<float*>(input_data), x_values.size(),
+                                                x_shape.data(), x_shape.size());
+
+  const std::array<int64_t, 2> expected_y_shape = {3, 2};
+  std::array<float, 3 * 2> expected_y = {1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f};
+
+  float* output_data;
+  cudaMallocHost(&output_data, 3 * 2 * sizeof(float));
+  ASSERT_NE(output_data, nullptr);
+
+  // Create an OrtValue tensor backed by data on CUDA memory
+  Ort::Value bound_y = Ort::Value::CreateTensor(info_cuda, reinterpret_cast<float*>(output_data),
+                                                expected_y.size(), expected_y_shape.data(), expected_y_shape.size());
+
+  // Create IoBinding for inputs and outputs.
+  Ort::IoBinding binding(session);
+  binding.BindInput("X", bound_x);
+  binding.BindOutput("Y", bound_y);
+
+  /*
+   * Use cudaMalloc() (pageable memory allocation first and then implicit pinned memory allocation) to create input/output tensors
+   */
+  float* input_data_2;
+  cudaMalloc(&input_data_2, 3 * 2 * sizeof(float));
+  ASSERT_NE(input_data_2, nullptr);
+  cudaMemcpy(input_data_2, x_values.data(), sizeof(float) * x_values.size(), cudaMemcpyHostToDevice);
+
+  // Create an OrtValue tensor backed by data on CUDA memory
+  Ort::Value bound_x_2 = Ort::Value::CreateTensor(info_cuda, reinterpret_cast<float*>(input_data_2), x_values.size(),
+                                                  x_shape.data(), x_shape.size());
+
+  float* output_data_2;
+  cudaMalloc(&output_data_2, 3 * 2 * sizeof(float));
+  ASSERT_NE(output_data_2, nullptr);
+
+  // Create an OrtValue tensor backed by data on CUDA memory
+  Ort::Value bound_y_2 = Ort::Value::CreateTensor(info_cuda, reinterpret_cast<float*>(output_data_2),
+                                                  expected_y.size(), expected_y_shape.data(), expected_y_shape.size());
+
+  // Create IoBinding for inputs and outputs.
+  Ort::IoBinding binding_2(session);
+  binding_2.BindInput("X", bound_x_2);
+  binding_2.BindOutput("Y", bound_y_2);
+
+  // Run with first iobindings
+  session.Run(Ort::RunOptions(), binding);
+
+  // Check the values against the bound raw memory (needs copying from device to host first)
+  std::array<float, 3 * 2> y_values;
+  cudaMemcpy(y_values.data(), output_data, sizeof(float) * y_values.size(), cudaMemcpyDeviceToHost);
+
+  std::cout << "pinned memory allocation" << std::endl;
+  std::cout << "output: " << std::endl;
+  for (auto y : y_values) {
+    std::cout << y << std::endl;
+  }
+  ASSERT_THAT(y_values, ::testing::ContainerEq(expected_y));
+
+  // Run with second iobindings
+  session.Run(Ort::RunOptions(), binding_2);
+
+  // Check the values against the bound raw memory (needs copying from device to host first)
+  cudaMemcpy(y_values.data(), output_data_2, sizeof(float) * y_values.size(), cudaMemcpyDeviceToHost);
+
+  std::cout << "pageable memory allocation" << std::endl;
+  std::cout << "output: " << std::endl;
+  for (auto y : y_values) {
+    std::cout << y << std::endl;
+  }
+  ASSERT_THAT(y_values, ::testing::ContainerEq(expected_y));
+
+  // Clean up
+  binding.ClearBoundInputs();
+  binding.ClearBoundOutputs();
+  binding_2.ClearBoundInputs();
+  binding_2.ClearBoundOutputs();
+
+  cudaFreeHost(input_data);
+  cudaFreeHost(output_data);
+  cudaFree(input_data_2);
+  cudaFree(output_data_2);
+  cudaStreamDestroy(compute_stream);
+}
+
 class CApiTensorRTTest : public testing::Test, public ::testing::WithParamInterface<std::string> {};
 
 // This test uses CreateTensorRTProviderOptions/UpdateTensorRTProviderOptions APIs to configure and create a TensorRT Execution Provider
@@ -2763,15 +3027,6 @@ TEST_P(CApiTensorRTTest, TestConfigureTensorRTProviderOptions) {
   ASSERT_TRUE(api.CreateTensorRTProviderOptions(&trt_options) == nullptr);
   std::unique_ptr<OrtTensorRTProviderOptionsV2, decltype(api.ReleaseTensorRTProviderOptions)> rel_trt_options(trt_options, api.ReleaseTensorRTProviderOptions);
 
-  // Only test updating provider option with user provided compute stream
-  cudaStream_t compute_stream = nullptr;
-  void* user_compute_stream = nullptr;
-  cudaStreamCreateWithFlags(&compute_stream, cudaStreamNonBlocking);
-  ASSERT_TRUE(api.UpdateTensorRTProviderOptionsWithValue(rel_trt_options.get(), "user_compute_stream", compute_stream) == nullptr);
-  ASSERT_TRUE(api.GetTensorRTProviderOptionsByName(rel_trt_options.get(), "user_compute_stream", &user_compute_stream) == nullptr);
-  ASSERT_TRUE(user_compute_stream == (void*)compute_stream);
-  cudaStreamDestroy(compute_stream);
-
   const char* engine_cache_path = "./trt_engine_folder";
 
   std::vector<const char*> keys{"device_id", "has_user_compute_stream", "trt_fp16_enable", "trt_int8_enable", "trt_engine_cache_enable",
@@ -3120,15 +3375,34 @@ TEST(LiteCustomOpTest, CustomFunc) {
   ASSERT_TRUE(floats_output[1] == 16);
 }
 
+TEST(LiteCustomOpTest, CustomFuncOpsetMismatch) {
+  Ort::SessionOptions session_options;
+  session_options.SetIntraOpNumThreads(1);
+  session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
+  session_options.SetLogSeverityLevel(0);
+#if defined(_WIN32)
+  session_options.RegisterCustomOpsLibrary(ORT_TSTR("custom_op_library.dll"));
+#elif defined(__APPLE__)
+  session_options.RegisterCustomOpsLibrary(ORT_TSTR("libcustom_op_library.dylib"));
+#else
+  session_options.RegisterCustomOpsLibrary(ORT_TSTR("./libcustom_op_library.so"));
+#endif
+
+  EXPECT_THROW(Ort::Session(*ort_env, TSTR("testdata/fuse_select_filter_opset_8.onnx"), session_options), std::exception);
+}
+
 struct Merge {
   Merge(const OrtApi* ort_api, const OrtKernelInfo* info) {
     int64_t reverse;
     ORT_ENFORCE(ort_api->KernelInfoGetAttribute_int64(info, "reverse", &reverse) == nullptr);
     reverse_ = reverse != 0;
   }
-  void Compute(const Ort::Custom::Tensor<std::string_view>& strings_in,
-               std::string_view string_in,
-               Ort::Custom::Tensor<std::string>* strings_out) {
+  Ort::Status Compute(const Ort::Custom::Tensor<std::string_view>& strings_in,
+                      std::string_view string_in,
+                      Ort::Custom::Tensor<std::string>* strings_out) {
+    if (strings_in.NumberOfElement() == 0) {
+      return Ort::Status("the 1st input must have more than one string!", OrtErrorCode::ORT_INVALID_ARGUMENT);
+    }
     std::vector<std::string> string_pool;
     for (const auto& s : strings_in.Data()) {
       string_pool.emplace_back(s.data(), s.size());
@@ -3141,6 +3415,16 @@ struct Merge {
       std::reverse(string_pool.begin(), string_pool.end());
     }
     strings_out->SetStringOutput(string_pool, {static_cast<int64_t>(string_pool.size())});
+    return Ort::Status(nullptr);
+  }
+  static Ort::Status InferOutputShape(Ort::ShapeInferContext& ctx) {
+    auto input_count = ctx.GetInputCount();
+    if (input_count != 2) {
+      return Ort::Status("input count should be 2", OrtErrorCode::ORT_INVALID_ARGUMENT);
+    }
+    Ort::ShapeInferContext::Shape shape_1 = {{-1}};
+    ctx.SetOutputShape(0, shape_1);
+    return Ort::Status(nullptr);
   }
   bool reverse_ = false;
 };
diff --git a/onnxruntime/test/testdata/CNTK/gen.py b/onnxruntime/test/testdata/CNTK/gen.py
index 51ad5e781c243..37241a46808b5 100644
--- a/onnxruntime/test/testdata/CNTK/gen.py
+++ b/onnxruntime/test/testdata/CNTK/gen.py
@@ -48,10 +48,10 @@ def Save(dir, func, feed, outputs):  # noqa: N802
             if actual_input_name.startswith(cntk_name):
                 cntk_to_actual_names[cntk_name] = actual_input_name
 
-    if type(feed) is not dict:
+    if type(feed) is not dict:  # noqa: E721
         feed = {func.arguments[0]: feed}
 
-    if type(outputs) is not dict:
+    if type(outputs) is not dict:  # noqa: E721
         outputs = {func.outputs[0]: outputs}
 
     test_data_dir = os.path.join(dir, data_dir)
@@ -213,7 +213,7 @@ def GenScan():  # noqa: N802
 
     for n in out_mp.graph.node:
         if n.op_type == "Scan":
-            body = [attr for attr in n.attribute if attr.name == "body"][0]
+            body = next(attr for attr in n.attribute if attr.name == "body")
             for vi in list(body.g.input) + list(body.g.output) + list(body.g.value_info):
                 dim = vi.type.tensor_type.shape.dim
                 dim[0].dim_param = "batch"
diff --git a/onnxruntime/test/testdata/custom_op_library/attr_tester.onnx b/onnxruntime/test/testdata/custom_op_library/attr_tester.onnx
new file mode 100644
index 0000000000000..c99b411669ec3
Binary files /dev/null and b/onnxruntime/test/testdata/custom_op_library/attr_tester.onnx differ
diff --git a/onnxruntime/test/testdata/custom_op_library/copy_2_inputs_2_outputs.onnx b/onnxruntime/test/testdata/custom_op_library/copy_2_inputs_2_outputs.onnx
new file mode 100644
index 0000000000000..1756aae6e8d5a
--- /dev/null
+++ b/onnxruntime/test/testdata/custom_op_library/copy_2_inputs_2_outputs.onnx
@@ -0,0 +1,16 @@
+	:�
+d
+input_0
+input_1output_0output_1copy_tensor_array"CopyTensorArrayAllVariadic:test.customopgraphZ
+input_0
+
+���������Z
+input_1
+
+���������b
+output_0
+
+���������b
+output_1
+
+���������B
\ No newline at end of file
diff --git a/onnxruntime/test/testdata/custom_op_library/copy_3_inputs_3_outputs.onnx b/onnxruntime/test/testdata/custom_op_library/copy_3_inputs_3_outputs.onnx
new file mode 100644
index 0000000000000..86c9ec1c1fc37
--- /dev/null
+++ b/onnxruntime/test/testdata/custom_op_library/copy_3_inputs_3_outputs.onnx
@@ -0,0 +1,23 @@
+	:�
+t
+input_0
+input_1
+input_2output_0output_1output_2copy_tensor_array"CopyTensorArrayCombined:test.customopgraphZ
+input_0
+
+���������Z
+input_1
+
+���������Z
+input_2
+
+���������b
+output_0
+
+���������b
+output_1
+
+���������b
+output_2
+
+���������B
\ No newline at end of file
diff --git a/onnxruntime/test/testdata/custom_op_library/cpu/cpu_ops.cc b/onnxruntime/test/testdata/custom_op_library/cpu/cpu_ops.cc
index f9e537fb61047..85edfa0e59f1d 100644
--- a/onnxruntime/test/testdata/custom_op_library/cpu/cpu_ops.cc
+++ b/onnxruntime/test/testdata/custom_op_library/cpu/cpu_ops.cc
@@ -7,21 +7,47 @@
 
 #include "onnxruntime_lite_custom_op.h"
 
+#define CUSTOM_ENFORCE(cond, msg)                                \
+  if (!(cond)) {                                                 \
+    ORT_CXX_API_THROW(msg, OrtErrorCode::ORT_RUNTIME_EXCEPTION); \
+  }
+
 using namespace Ort::Custom;
 
 namespace Cpu {
 
-void KernelOne(const Ort::Custom::Tensor<float>& X,
-               const Ort::Custom::Tensor<float>& Y,
-               Ort::Custom::Tensor<float>& Z) {
-  auto input_shape = X.Shape();
-  auto x_raw = X.Data();
-  auto y_raw = Y.Data();
-  auto z_raw = Z.Allocate(input_shape);
-  for (int64_t i = 0; i < Z.NumberOfElement(); ++i) {
-    z_raw[i] = x_raw[i] + y_raw[i];
+struct KernelOne {
+  KernelOne(const OrtApi*, const OrtKernelInfo*) {}
+
+  Ort::Status Compute(const Ort::Custom::Tensor<float>& X,
+                      const Ort::Custom::Tensor<float>& Y,
+                      Ort::Custom::Tensor<float>& Z) {
+    if (X.NumberOfElement() != Y.NumberOfElement()) {
+      return Ort::Status("x and y has different number of elements", OrtErrorCode::ORT_INVALID_ARGUMENT);
+    }
+    auto x_shape = X.Shape();
+    auto x_raw = X.Data();
+    auto y_raw = Y.Data();
+    auto z_raw = Z.Allocate(x_shape);
+    for (int64_t i = 0; i < Z.NumberOfElement(); ++i) {
+      z_raw[i] = x_raw[i] + y_raw[i];
+    }
+    return Ort::Status{nullptr};
   }
-}
+
+  static Ort::Status InferOutputShape(Ort::ShapeInferContext& ctx) {
+    auto input_count = ctx.GetInputCount();
+    if (input_count != 2) {
+      return Ort::Status("input count should be 2", OrtErrorCode::ORT_INVALID_ARGUMENT);
+    }
+    Ort::ShapeInferContext::Shape shape_3_5 = {{3}, {5}};
+    if (ctx.GetInputShape(0) != shape_3_5 ||
+        ctx.GetInputShape(1) != shape_3_5) {
+      return Ort::Status("input shape mismatch", OrtErrorCode::ORT_INVALID_ARGUMENT);
+    }
+    return Ort::Status{nullptr};
+  }
+};
 
 // lite custom op as a function
 void KernelTwo(const Ort::Custom::Tensor<float>& X,
@@ -68,23 +94,28 @@ void Select(const Ort::Custom::Span<int32_t>& indices_in,
   }
 }
 
-void Filter(const Ort::Custom::Tensor<float>& floats_in,
-            Ort::Custom::Tensor<float>& floats_out) {
-  const float* in = floats_in.Data();
-  auto in_len = floats_in.NumberOfElement();
+struct Filter {
+  Filter(const OrtApi*, const OrtKernelInfo*) {}
+  Ort::Status Compute(const Ort::Custom::Tensor<float>& floats_in,
+                      Ort::Custom::Tensor<float>& floats_out) {
+    const float* in = floats_in.Data();
+    auto in_len = floats_in.NumberOfElement();
 
-  std::vector<float> filter_floats;
-  for (int64_t i = 0; i < in_len; ++i) {
-    if (in[i] > 1.f) {
-      filter_floats.push_back(in[i]);
+    std::vector<float> filter_floats;
+    for (int64_t i = 0; i < in_len; ++i) {
+      if (in[i] > 1.f) {
+        filter_floats.push_back(in[i]);
+      }
     }
-  }
 
-  float* out = static_cast<float*>(floats_out.Allocate({static_cast<int64_t>(filter_floats.size())}));
-  for (size_t j = 0; j < filter_floats.size(); ++j) {
-    out[j] = filter_floats[j];
+    float* out = static_cast<float*>(floats_out.Allocate({static_cast<int64_t>(filter_floats.size())}));
+    for (size_t j = 0; j < filter_floats.size(); ++j) {
+      out[j] = filter_floats[j];
+    }
+
+    return Ort::Status{nullptr};
   }
-}
+};
 
 void Box(const Ort::Custom::Tensor<float>* float_in_1,
          const Ort::Custom::Tensor<float>* float_in_2,
@@ -162,15 +193,120 @@ void FilterFloat8(const Ort::Custom::Tensor<Ort::Float8E4M3FN_t>& floats_in,
 }
 #endif
 
+// a sample custom op accepting variadic inputs, and generate variadic outputs by simply 1:1 copying.
+template <typename T>
+Ort::Status CopyTensorArrayAllVariadic(const Ort::Custom::TensorArray& inputs, Ort::Custom::TensorArray& outputs) {
+  for (size_t ith_input = 0; ith_input < inputs.Size(); ++ith_input) {
+    const auto& input = inputs[ith_input];
+    const auto& input_shape = input->Shape();
+    const T* raw_input = reinterpret_cast<const T*>(input->DataRaw());
+    auto num_elements = input->NumberOfElement();
+    T* raw_output = outputs.AllocateOutput<T>(ith_input, input_shape);
+    if (!raw_output) {
+      return Ort::Status("Failed to allocate output!", OrtErrorCode::ORT_FAIL);
+    }
+    for (int64_t jth_elem = 0; jth_elem < num_elements; ++jth_elem) {
+      raw_output[jth_elem] = raw_input[jth_elem];
+    }
+  }
+  return Ort::Status{nullptr};
+}
+
+template <typename T>
+Ort::Status CopyTensorArrayCombined(const Ort::Custom::Tensor<float>& first_input,
+                                    const Ort::Custom::TensorArray& other_inputs,
+                                    Ort::Custom::Tensor<float>& first_output,
+                                    Ort::Custom::TensorArray& other_outputs) {
+  const auto first_input_shape = first_input.Shape();
+  const T* raw_input = reinterpret_cast<const T*>(first_input.DataRaw());
+
+  T* raw_output = first_output.Allocate(first_input_shape);
+  if (!raw_output) {
+    return Ort::Status("Failed to allocate output!", OrtErrorCode::ORT_FAIL);
+  }
+
+  auto num_elements = first_input.NumberOfElement();
+  for (int64_t ith_elem = 0; ith_elem < num_elements; ++ith_elem) {
+    raw_output[ith_elem] = raw_input[ith_elem];
+  }
+
+  return CopyTensorArrayAllVariadic<T>(other_inputs, other_outputs);
+}
+
+Ort::Status AttrTesterIntFloatCompute(const Ort::Custom::Tensor<float>& X, Ort::Custom::Tensor<float>& Z) {
+  auto x_shape = X.Shape();
+  auto x_raw = X.Data();
+  auto z_raw = Z.Allocate(x_shape);
+  for (int64_t i = 0; i < X.NumberOfElement(); ++i) {
+    z_raw[i] = x_raw[i] * 2;
+  }
+  return Ort::Status{nullptr};
+}
+
+Ort::Status AttrTesterIntFloatShapeInfer(Ort::ShapeInferContext& ctx) {
+  CUSTOM_ENFORCE(ctx.GetAttrInt("a_int") == 1, "int attr mismatch");
+  CUSTOM_ENFORCE(ctx.GetAttrFloat("a_float") == 2.f, "float attr mismatch");
+  std::vector<int64_t> ints{3, 4, 5};
+  CUSTOM_ENFORCE(ctx.GetAttrInts("ints") == ints, "ints attr mismatch");
+  std::vector<float> floats{6, 7, 8};
+  CUSTOM_ENFORCE(ctx.GetAttrFloats("floats") == floats, "floats attr mismatch");
+  auto input_shape = ctx.GetInputShape(0);
+  CUSTOM_ENFORCE(input_shape.size() == 1 &&
+                     !input_shape[0].IsInt() &&
+                     std::string{input_shape[0].AsSym()} == "d",
+                 "input dim is not symbolic");
+  ctx.SetOutputShape(0, input_shape);
+  return Ort::Status{nullptr};
+}
+
+struct AttrTesterStringKernel {
+  void Compute(OrtKernelContext* context) {
+    Ort::KernelContext ctx(context);
+    auto input_X = ctx.GetInput(0);
+    const auto* X = input_X.GetTensorData<float>();
+    auto dimensions = input_X.GetTensorTypeAndShapeInfo().GetShape();
+    auto output = ctx.GetOutput(0, dimensions);
+    auto* out = output.GetTensorMutableData<float>();
+    const size_t size = output.GetTensorTypeAndShapeInfo().GetElementCount();
+    for (size_t i = 0; i < size; i++) {
+      out[i] = X[i] * 3;
+    }
+  }
+};
+
+struct AttrTesterStringOp : Ort::CustomOpBase<AttrTesterStringOp, AttrTesterStringKernel> {
+  void* CreateKernel(const OrtApi&, const OrtKernelInfo*) const {
+    return std::make_unique<AttrTesterStringKernel>().release();
+  };
+  const char* GetName() const { return "AttrTesterString"; };
+  const char* GetExecutionProviderType() const { return "CPUExecutionProvider"; };
+  size_t GetInputTypeCount() const { return 1; };
+  ONNXTensorElementDataType GetInputType(size_t) const { return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; };
+  size_t GetOutputTypeCount() const { return 1; };
+  ONNXTensorElementDataType GetOutputType(size_t) const { return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; };
+
+  static Ort::Status InferOutputShape(Ort::ShapeInferContext& ctx) {
+    CUSTOM_ENFORCE(ctx.GetAttrString("a_string") == "iamastring", "string attr mismatch");
+    std::vector<std::string> strings{"more", "strings"};
+    CUSTOM_ENFORCE(ctx.GetAttrStrings("strings") == strings, "strings attr mismatch");
+    return Ort::Status{nullptr};
+  }
+};
+
 void RegisterOps(Ort::CustomOpDomain& domain) {
-  static const std::unique_ptr<OrtLiteCustomOp> c_CustomOpOne{Ort::Custom::CreateLiteCustomOp("CustomOpOne", "CPUExecutionProvider", KernelOne)};
+  static const std::unique_ptr<OrtLiteCustomOp> c_CustomOpOne{Ort::Custom::CreateLiteCustomOp<KernelOne>("CustomOpOne", "CPUExecutionProvider")};
   static const std::unique_ptr<OrtLiteCustomOp> c_CustomOpTwo{Ort::Custom::CreateLiteCustomOp("CustomOpTwo", "CPUExecutionProvider", KernelTwo)};
   static const std::unique_ptr<OrtLiteCustomOp> c_MulTopOpFloat{Ort::Custom::CreateLiteCustomOp("MulTop", "CPUExecutionProvider", MulTop<float>)};
   static const std::unique_ptr<OrtLiteCustomOp> c_MulTopOpInt32{Ort::Custom::CreateLiteCustomOp("MulTop", "CPUExecutionProvider", MulTop<int32_t>)};
-  static const std::unique_ptr<OrtLiteCustomOp> c_Fuse{Ort::Custom::CreateLiteCustomOp("Fuse", "CPUExecutionProvider", Fuse)};
+  static const std::unique_ptr<OrtLiteCustomOp> c_Fuse{Ort::Custom::CreateLiteCustomOp("Fuse", "CPUExecutionProvider", Fuse, {}, 10, 12)};
   static const std::unique_ptr<OrtLiteCustomOp> c_Select{Ort::Custom::CreateLiteCustomOp("Select", "CPUExecutionProvider", Select)};
-  static const std::unique_ptr<OrtLiteCustomOp> c_Fill{Ort::Custom::CreateLiteCustomOp("Filter", "CPUExecutionProvider", Filter)};
+  static const std::unique_ptr<OrtLiteCustomOp> c_Filter{Ort::Custom::CreateLiteCustomOp<Filter>("Filter", "CPUExecutionProvider", 15, 17)};
   static const std::unique_ptr<OrtLiteCustomOp> c_Box{Ort::Custom::CreateLiteCustomOp("Box", "CPUExecutionProvider", Box)};
+  static const std::unique_ptr<OrtLiteCustomOp> c_CopyTensorArrayAllVariadic{Ort::Custom::CreateLiteCustomOp("CopyTensorArrayAllVariadic", "CPUExecutionProvider", CopyTensorArrayAllVariadic<float>)};
+  static const std::unique_ptr<OrtLiteCustomOp> c_CopyTensorArrayCombined{Ort::Custom::CreateLiteCustomOp("CopyTensorArrayCombined", "CPUExecutionProvider", CopyTensorArrayCombined<float>)};
+
+  static const std::unique_ptr<OrtLiteCustomOp> c_AtterTesterIntFloat{Ort::Custom::CreateLiteCustomOp("AttrTesterIntFloat", "CPUExecutionProvider", AttrTesterIntFloatCompute, AttrTesterIntFloatShapeInfer)};
+  static const AttrTesterStringOp c_AtterTesterString;
 
 #if !defined(DISABLE_FLOAT8_TYPES)
   static const CustomOpOneFloat8 c_CustomOpOneFloat8;
@@ -183,8 +319,13 @@ void RegisterOps(Ort::CustomOpDomain& domain) {
   domain.Add(c_MulTopOpInt32.get());
   domain.Add(c_Fuse.get());
   domain.Add(c_Select.get());
-  domain.Add(c_Fill.get());
+  domain.Add(c_Filter.get());
   domain.Add(c_Box.get());
+  domain.Add(c_CopyTensorArrayAllVariadic.get());
+  domain.Add(c_CopyTensorArrayCombined.get());
+  domain.Add(c_AtterTesterIntFloat.get());
+  domain.Add(&c_AtterTesterString);
+
 #if !defined(DISABLE_FLOAT8_TYPES)
   domain.Add(&c_CustomOpOneFloat8);
   domain.Add(c_FilterFloat8.get());
diff --git a/onnxruntime/test/testdata/custom_op_library/cuda/cuda_ops.cc b/onnxruntime/test/testdata/custom_op_library/cuda/cuda_ops.cc
index aba35b33b75c6..3d561d378cb8c 100644
--- a/onnxruntime/test/testdata/custom_op_library/cuda/cuda_ops.cc
+++ b/onnxruntime/test/testdata/custom_op_library/cuda/cuda_ops.cc
@@ -1,7 +1,7 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-#ifdef USE_CUDA
+#if defined(USE_CUDA) && !defined(ENABLE_TRAINING)
 
 #define ORT_API_MANUAL_INIT
 #include "onnxruntime_cxx_api.h"
@@ -32,6 +32,9 @@ void KernelOne(const Ort::Custom::CudaContext& cuda_ctx,
   CUSTOM_ENFORCE(cuda_ctx.cuda_stream, "failed to fetch cuda stream");
   CUSTOM_ENFORCE(cuda_ctx.cudnn_handle, "failed to fetch cudnn handle");
   CUSTOM_ENFORCE(cuda_ctx.cublas_handle, "failed to fetch cublas handle");
+  void* deferred_cpu_mem = cuda_ctx.AllocDeferredCpuMem(sizeof(int32_t));
+  CUSTOM_ENFORCE(deferred_cpu_mem, "failed to allocate deferred cpu allocator");
+  cuda_ctx.FreeDeferredCpuMem(deferred_cpu_mem);
   auto z_raw = Z.Allocate(input_shape);
   cuda_add(Z.NumberOfElement(), z_raw, X.Data(), Y.Data(), cuda_ctx.cuda_stream);
 }
@@ -43,8 +46,4 @@ void RegisterOps(Ort::CustomOpDomain& domain) {
 
 }  // namespace Cuda
 
-#else
-
-void Cuda::RegisterOps(Ort::CustomOpDomain& domain) {}
-
 #endif
\ No newline at end of file
diff --git a/onnxruntime/test/testdata/custom_op_library/cuda/cuda_ops.h b/onnxruntime/test/testdata/custom_op_library/cuda/cuda_ops.h
index c0287c4932c98..35cd36fcd4cb7 100644
--- a/onnxruntime/test/testdata/custom_op_library/cuda/cuda_ops.h
+++ b/onnxruntime/test/testdata/custom_op_library/cuda/cuda_ops.h
@@ -5,6 +5,14 @@
 
 namespace Cuda {
 
+#if defined(USE_CUDA) && !defined(ENABLE_TRAINING)
+
 void RegisterOps(Ort::CustomOpDomain& domain);
 
-}
\ No newline at end of file
+#else
+
+void RegisterOps(Ort::CustomOpDomain&) {}
+
+#endif
+
+}  // namespace Cuda
\ No newline at end of file
diff --git a/onnxruntime/test/testdata/custom_op_library/custom_op_library.cc b/onnxruntime/test/testdata/custom_op_library/custom_op_library.cc
index 40fb127eb0b8f..2d5ffc3c81b0f 100644
--- a/onnxruntime/test/testdata/custom_op_library/custom_op_library.cc
+++ b/onnxruntime/test/testdata/custom_op_library/custom_op_library.cc
@@ -13,6 +13,8 @@
 #include "core/framework/ortdevice.h"
 #include "core/framework/ortmemoryinfo.h"
 #include "cpu/cpu_ops.h"
+#include "cuda/cuda_ops.h"
+#include "rocm/rocm_ops.h"
 #include "onnxruntime_lite_custom_op.h"
 
 static const char* c_OpDomain = "test.customop";
@@ -31,10 +33,15 @@ OrtStatus* ORT_API_CALL RegisterCustomOps(OrtSessionOptions* options, const OrtA
   ORT_TRY {
     Ort::CustomOpDomain domain{c_OpDomain};
     Cpu::RegisterOps(domain);
-
     Ort::CustomOpDomain domain_v2{"v2"};
     Cpu::RegisterOps(domain_v2);
 
+    Cuda::RegisterOps(domain);
+    Cuda::RegisterOps(domain_v2);
+
+    Rocm::RegisterOps(domain);
+    Rocm::RegisterOps(domain_v2);
+
     Ort::UnownedSessionOptions session_options(options);
     session_options.Add(domain);
     session_options.Add(domain_v2);
diff --git a/onnxruntime/test/testdata/custom_op_library/rocm/rocm_ops.cc b/onnxruntime/test/testdata/custom_op_library/rocm/rocm_ops.cc
index 113bfb85454a2..069246b4201e7 100644
--- a/onnxruntime/test/testdata/custom_op_library/rocm/rocm_ops.cc
+++ b/onnxruntime/test/testdata/custom_op_library/rocm/rocm_ops.cc
@@ -19,7 +19,7 @@ using namespace Ort::Custom;
     throw std::runtime_error(msg); \
   }
 
-namespace Cuda {
+namespace Rocm {
 
 void KernelOne(const Ort::Custom::RocmContext& rocm_ctx,
                const Ort::Custom::Tensor<float>& X,
@@ -38,10 +38,6 @@ void RegisterOps(Ort::CustomOpDomain& domain) {
   domain.Add(c_CustomOpOne.get());
 }
 
-}  // namespace Cuda
-
-#else
-
-void Cuda::RegisterOps(Ort::CustomOpDomain& domain) {}
+}  // namespace Rocm
 
 #endif
\ No newline at end of file
diff --git a/onnxruntime/test/testdata/custom_op_library/rocm/rocm_ops.h b/onnxruntime/test/testdata/custom_op_library/rocm/rocm_ops.h
index 4e8958cd9dae0..d3e9e4040a5c3 100644
--- a/onnxruntime/test/testdata/custom_op_library/rocm/rocm_ops.h
+++ b/onnxruntime/test/testdata/custom_op_library/rocm/rocm_ops.h
@@ -5,6 +5,14 @@
 
 namespace Rocm {
 
+#ifdef USE_ROCM
+
 void RegisterOps(Ort::CustomOpDomain& domain);
 
-}
\ No newline at end of file
+#else
+
+inline void RegisterOps(Ort::CustomOpDomain&) {}
+
+#endif
+
+}  // namespace Rocm
diff --git a/onnxruntime/test/testdata/fuse_select_filter.onnx b/onnxruntime/test/testdata/fuse_select_filter.onnx
index 15d7dd64788d3..0b881228edb9d 100644
--- a/onnxruntime/test/testdata/fuse_select_filter.onnx
+++ b/onnxruntime/test/testdata/fuse_select_filter.onnx
@@ -1,4 +1,4 @@
-:�
+	:�
 P
 vector_1
 vector_2
@@ -25,4 +25,5 @@ N
 ���������b&
 vector_filtered
 
-���������B
\ No newline at end of file
+���������B
+v2
\ No newline at end of file
diff --git a/onnxruntime/test/testdata/fuse_select_filter_opset_8.onnx b/onnxruntime/test/testdata/fuse_select_filter_opset_8.onnx
new file mode 100644
index 0000000000000..3ea27767eb9f5
--- /dev/null
+++ b/onnxruntime/test/testdata/fuse_select_filter_opset_8.onnx
@@ -0,0 +1,29 @@
+	:�
+P
+vector_1
+vector_2
+alphavector_fused	fuse_node"Fuse*
+	fuse_algo�:v2
+4
+indicesindices_selectedselect_node"Select:v2
+N
+vector_fused
+indices_selectedvector_gatheredgather_node"GatherElements
+;
+vector_gatheredvector_filteredfilter_node"Filter:v2graphZ
+vector_1
+
+���������Z
+vector_2
+
+���������Z
+alpha
+
+���������Z
+indices
+
+���������b&
+vector_filtered
+
+���������B
+v2
\ No newline at end of file
diff --git a/onnxruntime/test/testdata/layout_transform_nonconst_broadcast_input.onnx b/onnxruntime/test/testdata/layout_transform_nonconst_broadcast_input.onnx
new file mode 100644
index 0000000000000..8682be9992c62
Binary files /dev/null and b/onnxruntime/test/testdata/layout_transform_nonconst_broadcast_input.onnx differ
diff --git a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc
index 71a10f646a7c6..bfdc0b1d26953 100644
--- a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc
+++ b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc
@@ -233,7 +233,54 @@
         "^test_resize_upsample_sizes_nearest_cuda",
         "^test_resize_upsample_sizes_nearest_floor_align_corners_cuda",
         "^test_resize_upsample_sizes_nearest_not_larger_cuda",
-        "^test_resize_upsample_sizes_nearest_round_prefer_ceil_asymmetric_cuda"
+        "^test_resize_upsample_sizes_nearest_round_prefer_ceil_asymmetric_cuda",
+        // onnx 1.15 (opset 20) new and updated op tests
+        "^test_ai_onnx_ml_label_encoder_string_int",
+        "^test_ai_onnx_ml_label_encoder_string_int_no_default",
+        "^test_ai_onnx_ml_label_encoder_tensor_mapping",
+        "^test_ai_onnx_ml_label_encoder_tensor_value_only_mapping",
+        "^test_image_decoder_decode_bmp_rgb",
+        "^test_image_decoder_decode_jpeg2k_rgb",
+        "^test_image_decoder_decode_jpeg_bgr",
+        "^test_image_decoder_decode_jpeg_grayscale",
+        "^test_image_decoder_decode_jpeg_rgb",
+        "^test_image_decoder_decode_png_rgb",
+        "^test_image_decoder_decode_pnm_rgb",
+        "^test_image_decoder_decode_tiff_rgb",
+        "^test_image_decoder_decode_webp_rgb",
+        "^test_regex_full_match_basic",
+        "^test_regex_full_match_email_domain",
+        "^test_regex_full_match_empty",
+        "^test_string_concat_broadcasting",
+        "^test_string_concat",
+        "^test_string_concat_empty_string",
+        "^test_string_concat_utf8",
+        "^test_string_concat_zero_dimensional",
+        "^test_string_split_basic",
+        "^test_string_split_consecutive_delimiters",
+        "^test_string_split_empty_string_delimiter",
+        "^test_string_split_empty_tensor",
+        "^test_string_split_maxsplit",
+        "^test_string_split_no_delimiter",
+        "^test_dft_axis",
+        "^test_dft",
+        "^test_dft_inverse",
+        "^test_reduce_max_bool_inputs",
+        "^test_reduce_min_bool_inputs",
+        "^test_reduce_min_empty_set",
+        "^test_reduce_l1_empty_set",
+        "^test_reduce_l1_empty_set_expanded",
+        "^test_reduce_l2_empty_set",
+        "^test_reduce_l2_empty_set_expanded",
+        "^test_reduce_log_sum_empty_set",
+        "^test_reduce_log_sum_empty_set_expanded",
+        "^test_reduce_log_sum_exp_empty_set",
+        "^test_reduce_log_sum_exp_empty_set_expanded",
+        "^test_reduce_prod_empty_set",
+        "^test_reduce_sum_empty_set",
+        "^test_reduce_sum_empty_set_non_reduced_axis_zero",
+        "^test_reduce_sum_square_empty_set",
+        "^test_reduce_sum_square_empty_set_expanded"
     ],
     "current_failing_tests_x86": [
         "^test_vgg19",
@@ -316,7 +363,24 @@
         "^test_layer_normalization_4d_axis_negative_1_expanded_ver18_cpu",
         "^test_layer_normalization_4d_axis_negative_2_expanded_ver18_cpu",
         "^test_layer_normalization_4d_axis_negative_3_expanded_ver18_cpu",
-        "^test_layer_normalization_default_axis_expanded_ver18_cpu"
+        "^test_layer_normalization_default_axis_expanded_ver18_cpu",
+        // onnx 1.15 (opset 20) new and updated op tests (test_affine_grid_???_expanded utilizes ConstantOfShape so it needs to be skipped as well)
+        // https://dev.azure.com/onnxruntime/onnxruntime/_build/results?buildId=1139541&view=logs&j=249e9d58-0012-5814-27cf-6a201adbd9cf&t=bb33e81f-0527-50e0-0fd2-e94f509f0a82
+        // only supported with cpu provider
+        "^test_affine_grid_2d",
+        "^test_affine_grid_2d_align_corners",
+        "^test_affine_grid_2d_align_corners_expanded",
+        "^test_affine_grid_2d_expanded",
+        "^test_affine_grid_3d",
+        "^test_affine_grid_3d_align_corners",
+        "^test_affine_grid_3d_align_corners_expanded",
+        "^test_affine_grid_3d_expanded",
+        "^test_constantofshape_float_ones",
+        "^test_constantofshape_int_shape_zero",
+        "^test_constantofshape_int_zeros",
+        // https://dev.azure.com/onnxruntime/onnxruntime/_build/results?buildId=1141563&view=logs&j=a018b46d-e41a-509d-6581-c95fdaa42fcd&t=d61c1d37-f101-5d28-982f-e5931b720302
+        "^test_gelu_tanh_2_cpu",
+        "^test_gelu_tanh_2_expanded_cpu"
     ],
     "current_failing_tests_NNAPI": [
         "^test_maxpool_2d_uint8",
@@ -390,6 +454,14 @@
         "^test_squeeze_negative_axes"
     ],
     "current_failing_tests_OPENVINO_CPU_FP32": [
+        "^test_affine_grid_2d_align_corners",
+        "^test_affine_grid_2d_align_corners_expanded",
+        "^test_affine_grid_2d",
+        "^test_affine_grid_2d_expanded",
+        "^test_affine_grid_3d_align_corners",
+        "^test_affine_grid_3d_align_corners_expanded",
+        "^test_affine_grid_3d",
+        "^test_affine_grid_3d_expanded",
         "^test_operator_permute2",
         "^test_operator_repeat",
         "^test_operator_repeat_dim_overflow",
@@ -431,6 +503,10 @@
         "test_scan_sum_cpu", // Disabled due to output mismatch with tolerance.
         "test_scan9_sum_cpu" // Disabled due to output mismatch with tolerance.
     ],
+    "current_failing_tests_OPENVINO_NPU_FP16": [
+        "^test_prelu_broadcast",
+        "test_loop11_cpu"
+    ],
     "current_failing_tests_OPENVINO_opset18": [
         // pending opset 18 support, RUNTIME_EXCEPTION : Encountered unknown exception in Initialize()
         "^test_center_crop_pad_crop_axes_chw",
@@ -569,7 +645,22 @@
         "^test_sequence_map_identity_1_sequence_cpu",
         "^test_sequence_map_identity_1_sequence_expanded_cpu",
         "^test_sequence_map_identity_2_sequences_cpu",
-        "^test_sequence_map_identity_2_sequences_expanded_cpu"
+        "^test_sequence_map_identity_2_sequences_expanded_cpu",
+        // onnx 1.15 (opset 20) new and updated op tests (test_affine_grid_???_expanded utilizes ConstantOfShape so it needs to be skipped as well)
+        // https://dev.azure.com/onnxruntime/onnxruntime/_build/results?buildId=1139542&view=logs&j=3032dfba-5baf-5872-0871-2e69cb7f4b6a&t=f0d05deb-fc26-5aaf-e43e-7db2764c07da
+        // only supported with cpu provider
+        "^test_affine_grid_2d",
+        "^test_affine_grid_2d_align_corners",
+        "^test_affine_grid_2d_align_corners_expanded",
+        "^test_affine_grid_2d_expanded",
+        "^test_affine_grid_3d",
+        "^test_affine_grid_3d_align_corners",
+        "^test_affine_grid_3d_align_corners_expanded",
+        "^test_affine_grid_3d_expanded",
+        "^test_constantofshape_float_ones",
+        "^test_constantofshape_int_shape_zero",
+        "^test_constantofshape_int_zeros"
+
     ],
     // ORT first supported opset 7, so models with nodes that require versions prior to opset 7 are not supported
     "tests_with_pre_opset7_dependencies": [
diff --git a/onnxruntime/test/testdata/onnx_backend_test_series_overrides.jsonc b/onnxruntime/test/testdata/onnx_backend_test_series_overrides.jsonc
index caeea0a758ad9..07385ac9ade05 100644
--- a/onnxruntime/test/testdata/onnx_backend_test_series_overrides.jsonc
+++ b/onnxruntime/test/testdata/onnx_backend_test_series_overrides.jsonc
@@ -7,6 +7,9 @@
         "test_dft": 1e-3,
         "test_dft_axis": 1e-3,
         "test_dft_inverse": 1e-3,
+        "test_dft_opset19": 1e-3,
+        "test_dft_axis_opset19": 1e-3,
+        "test_dft_inverse_opset19": 1e-3,
         "test_stft": 1e-4,
         "test_stft_with_window": 1e-4
     },
diff --git a/onnxruntime/test/testdata/squeezenet/model_opset11.onnx b/onnxruntime/test/testdata/squeezenet/model_opset11.onnx
new file mode 100644
index 0000000000000..dcf322a58c042
Binary files /dev/null and b/onnxruntime/test/testdata/squeezenet/model_opset11.onnx differ
diff --git a/onnxruntime/test/testdata/transform/fusion/fuse-matmul-bn-directly.onnx b/onnxruntime/test/testdata/transform/fusion/fuse-matmul-bn-directly.onnx
new file mode 100644
index 0000000000000..fa11adaac8d95
Binary files /dev/null and b/onnxruntime/test/testdata/transform/fusion/fuse-matmul-bn-directly.onnx differ
diff --git a/onnxruntime/test/testdata/transform/fusion/fuse-matmul-bn-non-ignorable-node.onnx b/onnxruntime/test/testdata/transform/fusion/fuse-matmul-bn-non-ignorable-node.onnx
new file mode 100644
index 0000000000000..1050a7285b4a6
Binary files /dev/null and b/onnxruntime/test/testdata/transform/fusion/fuse-matmul-bn-non-ignorable-node.onnx differ
diff --git a/onnxruntime/test/testdata/transform/fusion/fuse-matmul-bn-only-reshape.onnx b/onnxruntime/test/testdata/transform/fusion/fuse-matmul-bn-only-reshape.onnx
new file mode 100644
index 0000000000000..c361a42700a30
Binary files /dev/null and b/onnxruntime/test/testdata/transform/fusion/fuse-matmul-bn-only-reshape.onnx differ
diff --git a/onnxruntime/test/testdata/transform/fusion/fuse-matmul-bn-only-transpose.onnx b/onnxruntime/test/testdata/transform/fusion/fuse-matmul-bn-only-transpose.onnx
new file mode 100644
index 0000000000000..f70ae2e6229e7
Binary files /dev/null and b/onnxruntime/test/testdata/transform/fusion/fuse-matmul-bn-only-transpose.onnx differ
diff --git a/onnxruntime/test/testdata/transform/fusion/fuse-matmul-bn-with-reshape.onnx b/onnxruntime/test/testdata/transform/fusion/fuse-matmul-bn-with-reshape.onnx
new file mode 100644
index 0000000000000..8e4bc49514548
Binary files /dev/null and b/onnxruntime/test/testdata/transform/fusion/fuse-matmul-bn-with-reshape.onnx differ
diff --git a/onnxruntime/test/testdata/transform/fusion/fuse-pad-conv.onnx b/onnxruntime/test/testdata/transform/fusion/fuse-pad-conv.onnx
new file mode 100644
index 0000000000000..ced1950005985
Binary files /dev/null and b/onnxruntime/test/testdata/transform/fusion/fuse-pad-conv.onnx differ
diff --git a/onnxruntime/test/testdata/transform/fusion/fuse-pad-maxpool-opset8.onnx b/onnxruntime/test/testdata/transform/fusion/fuse-pad-maxpool-opset8.onnx
new file mode 100644
index 0000000000000..feb1f024ceed7
Binary files /dev/null and b/onnxruntime/test/testdata/transform/fusion/fuse-pad-maxpool-opset8.onnx differ
diff --git a/onnxruntime/test/testdata/transform/fusion/fuse-pad-maxpool.onnx b/onnxruntime/test/testdata/transform/fusion/fuse-pad-maxpool.onnx
new file mode 100644
index 0000000000000..32e959262f6b5
Binary files /dev/null and b/onnxruntime/test/testdata/transform/fusion/fuse-pad-maxpool.onnx differ
diff --git a/onnxruntime/test/testdata/transform/gh_issue_17392.onnx b/onnxruntime/test/testdata/transform/gh_issue_17392.onnx
new file mode 100644
index 0000000000000..ca9b78a4179bd
--- /dev/null
+++ b/onnxruntime/test/testdata/transform/gh_issue_17392.onnx
@@ -0,0 +1,9 @@
+	:Z
++C"Constant*
+value_stringsJabcJdef�
+
+CY"IdentityConstantb
+Y
+
+
+B
\ No newline at end of file
diff --git a/onnxruntime/test/testdata/transform/gh_issue_18338.onnx b/onnxruntime/test/testdata/transform/gh_issue_18338.onnx
new file mode 100644
index 0000000000000..afb499a347ec7
Binary files /dev/null and b/onnxruntime/test/testdata/transform/gh_issue_18338.onnx differ
diff --git a/onnxruntime/test/testdata/transform/gh_issue_18338.py b/onnxruntime/test/testdata/transform/gh_issue_18338.py
new file mode 100644
index 0000000000000..dc5446ac56c09
--- /dev/null
+++ b/onnxruntime/test/testdata/transform/gh_issue_18338.py
@@ -0,0 +1,859 @@
+import google.protobuf.text_format
+import onnx
+from numpy import array, float16
+
+import onnxruntime as ort
+
+# Run n times
+N = 1
+
+onnx_model_text = """
+ir_version: 8
+producer_name: "pytorch"
+producer_version: "2.2.0"
+graph {
+  node {
+    output: "_val_1"
+    name: "Constant_0"
+    op_type: "Constant"
+    attribute {
+      name: "value_ints"
+      ints: -1
+      type: INTS
+    }
+    doc_string: ""
+  }
+  node {
+    input: "input_0"
+    input: "_val_1"
+    output: "_val_2"
+    name: "Reshape_1"
+    op_type: "Reshape"
+    attribute {
+      name: "allowzero"
+      i: 0
+      type: INT
+    }
+    doc_string: ""
+  }
+  node {
+    input: "_val_2"
+    output: "_val_3"
+    name: "_aten_linalg_vector_norm_no_dim_onnx_2"
+    op_type: "_aten_linalg_vector_norm_no_dim_onnx"
+    attribute {
+      name: "keepdim"
+      i: 0
+      type: INT
+    }
+    attribute {
+      name: "ord"
+      f: 2.0
+      type: FLOAT
+    }
+    doc_string: ""
+    domain: "pkg.onnxscript.torch_lib"
+  }
+  name: "main_graph"
+  input {
+    name: "input_0"
+    type {
+      tensor_type {
+        elem_type: 10
+        shape {
+        }
+      }
+    }
+  }
+  output {
+    name: "_val_3"
+    type {
+      tensor_type {
+        elem_type: 10
+        shape {
+        }
+      }
+    }
+  }
+  value_info {
+    name: "_val_1"
+    type {
+      tensor_type {
+        elem_type: 7
+        shape {
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  value_info {
+    name: "_val_2"
+    type {
+      tensor_type {
+        elem_type: 10
+        shape {
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  domain: "pkg.onnxscript.torch_lib"
+  version: 1
+}
+opset_import {
+  domain: ""
+  version: 18
+}
+opset_import {
+  domain: "pkg.onnxscript.torch_lib.common"
+  version: 1
+}
+functions {
+  name: "_aten_linalg_vector_norm_no_dim_onnx"
+  input: "self"
+  output: "result_29"
+  attribute: "ord"
+  attribute: "keepdim"
+  node {
+    input: "self"
+    output: "tmp"
+    name: "n0"
+    op_type: "Shape"
+    domain: ""
+  }
+  node {
+    input: "tmp"
+    output: "self_rank"
+    name: "n1"
+    op_type: "Size"
+    domain: ""
+  }
+  node {
+    output: "int64_0"
+    name: "n2"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        data_type: 7
+        int64_data: 0
+        name: "int64_0"
+      }
+      type: TENSOR
+    }
+    domain: ""
+  }
+  node {
+    input: "int64_0"
+    input: "self_rank"
+    output: "int64_0_cast"
+    name: "n3"
+    op_type: "CastLike"
+    domain: ""
+  }
+  node {
+    input: "self_rank"
+    input: "int64_0_cast"
+    output: "cond"
+    name: "n4"
+    op_type: "Equal"
+    domain: ""
+  }
+  node {
+    input: "cond"
+    output: "self_2"
+    name: "n5"
+    op_type: "If"
+    attribute {
+      name: "then_branch"
+      g {
+        node {
+          output: "int64_0_1d"
+          name: "n0"
+          op_type: "Constant"
+          attribute {
+            name: "value"
+            t {
+              dims: 1
+              data_type: 7
+              int64_data: 0
+              name: "int64_0_1d"
+            }
+            type: TENSOR
+          }
+          domain: ""
+        }
+        node {
+          input: "self"
+          input: "int64_0_1d"
+          output: "self_0"
+          name: "n1"
+          op_type: "Unsqueeze"
+          domain: ""
+        }
+        name: "thenGraph_4"
+        output {
+          name: "self_0"
+          type {
+          }
+        }
+      }
+      type: GRAPH
+    }
+    attribute {
+      name: "else_branch"
+      g {
+        node {
+          input: "self"
+          output: "self_1"
+          name: "n0"
+          op_type: "Identity"
+          domain: ""
+        }
+        name: "elseGraph_4"
+        output {
+          name: "self_1"
+          type {
+          }
+        }
+      }
+      type: GRAPH
+    }
+    domain: ""
+  }
+  node {
+    input: "self_2"
+    output: "self_3"
+    name: "n6"
+    op_type: "Abs"
+    domain: ""
+  }
+  node {
+    output: "ord"
+    name: "n7"
+    op_type: "Constant"
+    attribute {
+      name: "value_float"
+      type: FLOAT
+      ref_attr_name: "ord"
+    }
+    domain: ""
+  }
+  node {
+    input: "ord"
+    output: "ord_4"
+    name: "n8"
+    op_type: "Cast"
+    attribute {
+      name: "to"
+      i: 1
+      type: INT
+    }
+    domain: ""
+  }
+  node {
+    input: "ord_4"
+    output: "cond_5"
+    name: "n9"
+    op_type: "IsInf"
+    attribute {
+      name: "detect_negative"
+      i: 0
+      type: INT
+    }
+    attribute {
+      name: "detect_positive"
+      i: 1
+      type: INT
+    }
+    domain: ""
+  }
+  node {
+    input: "cond_5"
+    output: "result_24"
+    name: "n10"
+    op_type: "If"
+    attribute {
+      name: "then_branch"
+      g {
+        node {
+          input: "self_3"
+          output: "result"
+          name: "n0"
+          op_type: "ReduceMax"
+          attribute {
+            name: "keepdims"
+            type: INT
+            ref_attr_name: "keepdim"
+          }
+          domain: ""
+        }
+        name: "thenGraph_9"
+        output {
+          name: "result"
+          type {
+          }
+        }
+      }
+      type: GRAPH
+    }
+    attribute {
+      name: "else_branch"
+      g {
+        node {
+          input: "ord_4"
+          output: "cond_6"
+          name: "n0"
+          op_type: "IsInf"
+          attribute {
+            name: "detect_negative"
+            i: 1
+            type: INT
+          }
+          attribute {
+            name: "detect_positive"
+            i: 0
+            type: INT
+          }
+          domain: ""
+        }
+        node {
+          input: "cond_6"
+          output: "result_23"
+          name: "n1"
+          op_type: "If"
+          attribute {
+            name: "then_branch"
+            g {
+              node {
+                input: "self_3"
+                output: "result_7"
+                name: "n0"
+                op_type: "ReduceMin"
+                attribute {
+                  name: "keepdims"
+                  type: INT
+                  ref_attr_name: "keepdim"
+                }
+                domain: ""
+              }
+              name: "thenGraph_11"
+              output {
+                name: "result_7"
+                type {
+                }
+              }
+            }
+            type: GRAPH
+          }
+          attribute {
+            name: "else_branch"
+            g {
+              node {
+                output: "const"
+                name: "n0"
+                op_type: "Constant"
+                attribute {
+                  name: "value"
+                  t {
+                    data_type: 1
+                    float_data: 0.0
+                    name: "const"
+                  }
+                  type: TENSOR
+                }
+                domain: ""
+              }
+              node {
+                input: "const"
+                input: "ord_4"
+                output: "const_cast"
+                name: "n1"
+                op_type: "CastLike"
+                domain: ""
+              }
+              node {
+                input: "ord_4"
+                input: "const_cast"
+                output: "cond_8"
+                name: "n2"
+                op_type: "Equal"
+                domain: ""
+              }
+              node {
+                input: "cond_8"
+                output: "result_22"
+                name: "n3"
+                op_type: "If"
+                attribute {
+                  name: "then_branch"
+                  g {
+                    node {
+                      input: "self_3"
+                      output: "self_bool"
+                      name: "n0"
+                      op_type: "Cast"
+                      attribute {
+                        name: "to"
+                        i: 9
+                        type: INT
+                      }
+                      domain: ""
+                    }
+                    node {
+                      input: "self_bool"
+                      input: "self_3"
+                      output: "self_0_1"
+                      name: "n1"
+                      op_type: "CastLike"
+                      domain: ""
+                    }
+                    node {
+                      input: "self_0_1"
+                      output: "result_9"
+                      name: "n2"
+                      op_type: "ReduceSum"
+                      attribute {
+                        name: "keepdims"
+                        i: 0
+                        type: INT
+                      }
+                      domain: ""
+                    }
+                    name: "thenGraph_13"
+                    output {
+                      name: "result_9"
+                      type {
+                      }
+                    }
+                  }
+                  type: GRAPH
+                }
+                attribute {
+                  name: "else_branch"
+                  g {
+                    node {
+                      output: "const_10"
+                      name: "n0"
+                      op_type: "Constant"
+                      attribute {
+                        name: "value"
+                        t {
+                          data_type: 1
+                          float_data: 1.0
+                          name: "const_10"
+                        }
+                        type: TENSOR
+                      }
+                      domain: ""
+                    }
+                    node {
+                      input: "const_10"
+                      input: "ord_4"
+                      output: "const_10_cast"
+                      name: "n1"
+                      op_type: "CastLike"
+                      domain: ""
+                    }
+                    node {
+                      input: "ord_4"
+                      input: "const_10_cast"
+                      output: "cond_11"
+                      name: "n2"
+                      op_type: "Equal"
+                      domain: ""
+                    }
+                    node {
+                      input: "cond_11"
+                      output: "result_21"
+                      name: "n3"
+                      op_type: "If"
+                      attribute {
+                        name: "then_branch"
+                        g {
+                          node {
+                            input: "self_3"
+                            output: "result_12"
+                            name: "n0"
+                            op_type: "ReduceL1"
+                            attribute {
+                              name: "keepdims"
+                              type: INT
+                              ref_attr_name: "keepdim"
+                            }
+                            domain: ""
+                          }
+                          name: "thenGraph_18"
+                          output {
+                            name: "result_12"
+                            type {
+                            }
+                          }
+                        }
+                        type: GRAPH
+                      }
+                      attribute {
+                        name: "else_branch"
+                        g {
+                          node {
+                            output: "const_13"
+                            name: "n0"
+                            op_type: "Constant"
+                            attribute {
+                              name: "value"
+                              t {
+                                data_type: 1
+                                float_data: 2.0
+                                name: "const_13"
+                              }
+                              type: TENSOR
+                            }
+                            domain: ""
+                          }
+                          node {
+                            input: "const_13"
+                            input: "ord_4"
+                            output: "const_13_cast"
+                            name: "n1"
+                            op_type: "CastLike"
+                            domain: ""
+                          }
+                          node {
+                            input: "ord_4"
+                            input: "const_13_cast"
+                            output: "cond_14"
+                            name: "n2"
+                            op_type: "Equal"
+                            domain: ""
+                          }
+                          node {
+                            input: "cond_14"
+                            output: "result_20"
+                            name: "n3"
+                            op_type: "If"
+                            attribute {
+                              name: "then_branch"
+                              g {
+                                node {
+                                  input: "self_3"
+                                  output: "result_15"
+                                  name: "n0"
+                                  op_type: "ReduceL2"
+                                  attribute {
+                                    name: "keepdims"
+                                    type: INT
+                                    ref_attr_name: "keepdim"
+                                  }
+                                  domain: ""
+                                }
+                                name: "thenGraph_20"
+                                output {
+                                  name: "result_15"
+                                  type {
+                                  }
+                                }
+                              }
+                              type: GRAPH
+                            }
+                            attribute {
+                              name: "else_branch"
+                              g {
+                                node {
+                                  input: "ord_4"
+                                  input: "self_3"
+                                  output: "ord_float"
+                                  name: "n0"
+                                  op_type: "CastLike"
+                                  domain: ""
+                                }
+                                node {
+                                  input: "self_3"
+                                  input: "ord_float"
+                                  output: "self_pow"
+                                  name: "n1"
+                                  op_type: "Pow"
+                                  domain: ""
+                                }
+                                node {
+                                  input: "self_pow"
+                                  output: "tmp_16"
+                                  name: "n2"
+                                  op_type: "ReduceSum"
+                                  attribute {
+                                    name: "keepdims"
+                                    type: INT
+                                    ref_attr_name: "keepdim"
+                                  }
+                                  domain: ""
+                                }
+                                node {
+                                  output: "const_17"
+                                  name: "n3"
+                                  op_type: "Constant"
+                                  attribute {
+                                    name: "value"
+                                    t {
+                                      data_type: 1
+                                      float_data: 1.0
+                                      name: "const_17"
+                                    }
+                                    type: TENSOR
+                                  }
+                                  domain: ""
+                                }
+                                node {
+                                  input: "const_17"
+                                  input: "ord_float"
+                                  output: "const_17_cast"
+                                  name: "n4"
+                                  op_type: "CastLike"
+                                  domain: ""
+                                }
+                                node {
+                                  input: "const_17_cast"
+                                  input: "ord_float"
+                                  output: "tmp_18"
+                                  name: "n5"
+                                  op_type: "Div"
+                                  domain: ""
+                                }
+                                node {
+                                  input: "tmp_16"
+                                  input: "tmp_18"
+                                  output: "result_19"
+                                  name: "n6"
+                                  op_type: "Pow"
+                                  domain: ""
+                                }
+                                name: "elseGraph_20"
+                                output {
+                                  name: "result_19"
+                                  type {
+                                  }
+                                }
+                              }
+                              type: GRAPH
+                            }
+                            domain: ""
+                          }
+                          name: "elseGraph_18"
+                          output {
+                            name: "result_20"
+                            type {
+                            }
+                          }
+                        }
+                        type: GRAPH
+                      }
+                      domain: ""
+                    }
+                    name: "elseGraph_13"
+                    output {
+                      name: "result_21"
+                      type {
+                      }
+                    }
+                  }
+                  type: GRAPH
+                }
+                domain: ""
+              }
+              name: "elseGraph_11"
+              output {
+                name: "result_22"
+                type {
+                }
+              }
+            }
+            type: GRAPH
+          }
+          domain: ""
+        }
+        name: "elseGraph_9"
+        output {
+          name: "result_23"
+          type {
+          }
+        }
+      }
+      type: GRAPH
+    }
+    domain: ""
+  }
+  node {
+    output: "int64_0_25"
+    name: "n11"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        data_type: 7
+        int64_data: 0
+        name: "int64_0_25"
+      }
+      type: TENSOR
+    }
+    domain: ""
+  }
+  node {
+    input: "int64_0_25"
+    input: "self_rank"
+    output: "int64_0_25_cast"
+    name: "n12"
+    op_type: "CastLike"
+    domain: ""
+  }
+  node {
+    input: "self_rank"
+    input: "int64_0_25_cast"
+    output: "cond_26"
+    name: "n13"
+    op_type: "Equal"
+    domain: ""
+  }
+  node {
+    input: "cond_26"
+    output: "result_29"
+    name: "n14"
+    op_type: "If"
+    attribute {
+      name: "then_branch"
+      g {
+        node {
+          input: "result_24"
+          output: "result_27"
+          name: "n0"
+          op_type: "Squeeze"
+          domain: ""
+        }
+        name: "thenGraph_27"
+        output {
+          name: "result_27"
+          type {
+          }
+        }
+      }
+      type: GRAPH
+    }
+    attribute {
+      name: "else_branch"
+      g {
+        node {
+          input: "result_24"
+          output: "result_28"
+          name: "n0"
+          op_type: "Identity"
+          domain: ""
+        }
+        name: "elseGraph_27"
+        output {
+          name: "result_28"
+          type {
+          }
+        }
+      }
+      type: GRAPH
+    }
+    domain: ""
+  }
+  opset_import {
+    domain: ""
+    version: 18
+  }
+  domain: "pkg.onnxscript.torch_lib"
+}
+functions {
+  name: "Rank"
+  input: "input"
+  output: "return_val"
+  node {
+    input: "input"
+    output: "tmp"
+    name: "n0"
+    op_type: "Shape"
+    domain: ""
+  }
+  node {
+    input: "tmp"
+    output: "return_val"
+    name: "n1"
+    op_type: "Size"
+    domain: ""
+  }
+  doc_string: "Take the rank of the input tensor."
+  opset_import {
+    domain: ""
+    version: 18
+  }
+  domain: "pkg.onnxscript.torch_lib.common"
+}
+functions {
+  name: "IsScalar"
+  input: "input"
+  output: "return_val"
+  node {
+    input: "input"
+    output: "tmp"
+    name: "n0"
+    op_type: "Shape"
+    domain: ""
+  }
+  node {
+    input: "tmp"
+    output: "tmp_0"
+    name: "n1"
+    op_type: "Size"
+    domain: ""
+  }
+  node {
+    output: "tmp_1"
+    name: "n2"
+    op_type: "Constant"
+    attribute {
+      name: "value_int"
+      i: 0
+      type: INT
+    }
+    domain: ""
+  }
+  node {
+    input: "tmp_0"
+    input: "tmp_1"
+    output: "return_val"
+    name: "n3"
+    op_type: "Equal"
+    domain: ""
+  }
+  doc_string: "Return whether the input has rank 0, or is a scalar."
+  opset_import {
+    domain: ""
+    version: 18
+  }
+  domain: "pkg.onnxscript.torch_lib.common"
+}
+
+"""
+
+ort_inputs = {"input_0": array(0.8965, dtype=float16)}
+
+# Set up the inference session
+session_options = ort.SessionOptions()
+session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL
+onnx_model = onnx.ModelProto()
+google.protobuf.text_format.Parse(onnx_model_text, onnx_model)
+
+# Uncomment this line to save the model to a file for examination
+# onnx.save_model(onnx_model, "test_output_match_opinfo__linalg_vector_norm_cpu_float16.onnx")
+
+onnx.checker.check_model(onnx_model)
+session = ort.InferenceSession(onnx_model.SerializeToString(), session_options, providers=("CPUExecutionProvider",))
+
+# Run the model
+for _ in range(N):
+    ort_outputs = session.run(None, ort_inputs)
diff --git a/onnxruntime/test/testdata/transform/transform_nested_ifs_toplogical_sorted_nodes.onnx b/onnxruntime/test/testdata/transform/transform_nested_ifs_toplogical_sorted_nodes.onnx
new file mode 100644
index 0000000000000..afb499a347ec7
Binary files /dev/null and b/onnxruntime/test/testdata/transform/transform_nested_ifs_toplogical_sorted_nodes.onnx differ
diff --git a/onnxruntime/test/testdata/transform/transform_nested_ifs_toplogical_sorted_nodes.py b/onnxruntime/test/testdata/transform/transform_nested_ifs_toplogical_sorted_nodes.py
new file mode 100644
index 0000000000000..ebda865895d02
--- /dev/null
+++ b/onnxruntime/test/testdata/transform/transform_nested_ifs_toplogical_sorted_nodes.py
@@ -0,0 +1,859 @@
+import google.protobuf.text_format
+import onnx
+from numpy import array, float16
+
+import onnxruntime as ort
+
+# Run n times
+N = 1
+
+onnx_model_text = """
+ir_version: 8
+producer_name: "pytorch"
+producer_version: "2.2.0"
+graph {
+  node {
+    output: "_val_1"
+    name: "Constant_0"
+    op_type: "Constant"
+    attribute {
+      name: "value_ints"
+      ints: -1
+      type: INTS
+    }
+    doc_string: ""
+  }
+  node {
+    input: "input_0"
+    input: "_val_1"
+    output: "_val_2"
+    name: "Reshape_1"
+    op_type: "Reshape"
+    attribute {
+      name: "allowzero"
+      i: 0
+      type: INT
+    }
+    doc_string: ""
+  }
+  node {
+    input: "_val_2"
+    output: "_val_3"
+    name: "_aten_linalg_vector_norm_no_dim_onnx_2"
+    op_type: "_aten_linalg_vector_norm_no_dim_onnx"
+    attribute {
+      name: "keepdim"
+      i: 0
+      type: INT
+    }
+    attribute {
+      name: "ord"
+      f: 2.0
+      type: FLOAT
+    }
+    doc_string: ""
+    domain: "pkg.onnxscript.torch_lib"
+  }
+  name: "main_graph"
+  input {
+    name: "input_0"
+    type {
+      tensor_type {
+        elem_type: 10
+        shape {
+        }
+      }
+    }
+  }
+  output {
+    name: "_val_3"
+    type {
+      tensor_type {
+        elem_type: 10
+        shape {
+        }
+      }
+    }
+  }
+  value_info {
+    name: "_val_1"
+    type {
+      tensor_type {
+        elem_type: 7
+        shape {
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  value_info {
+    name: "_val_2"
+    type {
+      tensor_type {
+        elem_type: 10
+        shape {
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  domain: "pkg.onnxscript.torch_lib"
+  version: 1
+}
+opset_import {
+  domain: ""
+  version: 18
+}
+opset_import {
+  domain: "pkg.onnxscript.torch_lib.common"
+  version: 1
+}
+functions {
+  name: "_aten_linalg_vector_norm_no_dim_onnx"
+  input: "self"
+  output: "result_29"
+  attribute: "ord"
+  attribute: "keepdim"
+  node {
+    input: "self"
+    output: "tmp"
+    name: "n0"
+    op_type: "Shape"
+    domain: ""
+  }
+  node {
+    input: "tmp"
+    output: "self_rank"
+    name: "n1"
+    op_type: "Size"
+    domain: ""
+  }
+  node {
+    output: "int64_0"
+    name: "n2"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        data_type: 7
+        int64_data: 0
+        name: "int64_0"
+      }
+      type: TENSOR
+    }
+    domain: ""
+  }
+  node {
+    input: "int64_0"
+    input: "self_rank"
+    output: "int64_0_cast"
+    name: "n3"
+    op_type: "CastLike"
+    domain: ""
+  }
+  node {
+    input: "self_rank"
+    input: "int64_0_cast"
+    output: "cond"
+    name: "n4"
+    op_type: "Equal"
+    domain: ""
+  }
+  node {
+    input: "cond"
+    output: "self_2"
+    name: "n5"
+    op_type: "If"
+    attribute {
+      name: "then_branch"
+      g {
+        node {
+          output: "int64_0_1d"
+          name: "n0"
+          op_type: "Constant"
+          attribute {
+            name: "value"
+            t {
+              dims: 1
+              data_type: 7
+              int64_data: 0
+              name: "int64_0_1d"
+            }
+            type: TENSOR
+          }
+          domain: ""
+        }
+        node {
+          input: "self"
+          input: "int64_0_1d"
+          output: "self_0"
+          name: "n1"
+          op_type: "Unsqueeze"
+          domain: ""
+        }
+        name: "thenGraph_4"
+        output {
+          name: "self_0"
+          type {
+          }
+        }
+      }
+      type: GRAPH
+    }
+    attribute {
+      name: "else_branch"
+      g {
+        node {
+          input: "self"
+          output: "self_1"
+          name: "n0"
+          op_type: "Identity"
+          domain: ""
+        }
+        name: "elseGraph_4"
+        output {
+          name: "self_1"
+          type {
+          }
+        }
+      }
+      type: GRAPH
+    }
+    domain: ""
+  }
+  node {
+    input: "self_2"
+    output: "self_3"
+    name: "n6"
+    op_type: "Abs"
+    domain: ""
+  }
+  node {
+    output: "ord"
+    name: "n7"
+    op_type: "Constant"
+    attribute {
+      name: "value_float"
+      type: FLOAT
+      ref_attr_name: "ord"
+    }
+    domain: ""
+  }
+  node {
+    input: "ord"
+    output: "ord_4"
+    name: "n8"
+    op_type: "Cast"
+    attribute {
+      name: "to"
+      i: 1
+      type: INT
+    }
+    domain: ""
+  }
+  node {
+    input: "ord_4"
+    output: "cond_5"
+    name: "n9"
+    op_type: "IsInf"
+    attribute {
+      name: "detect_negative"
+      i: 0
+      type: INT
+    }
+    attribute {
+      name: "detect_positive"
+      i: 1
+      type: INT
+    }
+    domain: ""
+  }
+  node {
+    input: "cond_5"
+    output: "result_24"
+    name: "n10"
+    op_type: "If"
+    attribute {
+      name: "then_branch"
+      g {
+        node {
+          input: "self_3"
+          output: "result"
+          name: "n0"
+          op_type: "ReduceMax"
+          attribute {
+            name: "keepdims"
+            type: INT
+            ref_attr_name: "keepdim"
+          }
+          domain: ""
+        }
+        name: "thenGraph_9"
+        output {
+          name: "result"
+          type {
+          }
+        }
+      }
+      type: GRAPH
+    }
+    attribute {
+      name: "else_branch"
+      g {
+        node {
+          input: "ord_4"
+          output: "cond_6"
+          name: "n0"
+          op_type: "IsInf"
+          attribute {
+            name: "detect_negative"
+            i: 1
+            type: INT
+          }
+          attribute {
+            name: "detect_positive"
+            i: 0
+            type: INT
+          }
+          domain: ""
+        }
+        node {
+          input: "cond_6"
+          output: "result_23"
+          name: "n1"
+          op_type: "If"
+          attribute {
+            name: "then_branch"
+            g {
+              node {
+                input: "self_3"
+                output: "result_7"
+                name: "n0"
+                op_type: "ReduceMin"
+                attribute {
+                  name: "keepdims"
+                  type: INT
+                  ref_attr_name: "keepdim"
+                }
+                domain: ""
+              }
+              name: "thenGraph_11"
+              output {
+                name: "result_7"
+                type {
+                }
+              }
+            }
+            type: GRAPH
+          }
+          attribute {
+            name: "else_branch"
+            g {
+              node {
+                output: "const"
+                name: "n0"
+                op_type: "Constant"
+                attribute {
+                  name: "value"
+                  t {
+                    data_type: 1
+                    float_data: 0.0
+                    name: "const"
+                  }
+                  type: TENSOR
+                }
+                domain: ""
+              }
+              node {
+                input: "const"
+                input: "ord_4"
+                output: "const_cast"
+                name: "n1"
+                op_type: "CastLike"
+                domain: ""
+              }
+              node {
+                input: "ord_4"
+                input: "const_cast"
+                output: "cond_8"
+                name: "n2"
+                op_type: "Equal"
+                domain: ""
+              }
+              node {
+                input: "cond_8"
+                output: "result_22"
+                name: "n3"
+                op_type: "If"
+                attribute {
+                  name: "then_branch"
+                  g {
+                    node {
+                      input: "self_3"
+                      output: "self_bool"
+                      name: "n0"
+                      op_type: "Cast"
+                      attribute {
+                        name: "to"
+                        i: 9
+                        type: INT
+                      }
+                      domain: ""
+                    }
+                    node {
+                      input: "self_bool"
+                      input: "self_3"
+                      output: "self_0_1"
+                      name: "n1"
+                      op_type: "CastLike"
+                      domain: ""
+                    }
+                    node {
+                      input: "self_0_1"
+                      output: "result_9"
+                      name: "n2"
+                      op_type: "ReduceSum"
+                      attribute {
+                        name: "keepdims"
+                        i: 0
+                        type: INT
+                      }
+                      domain: ""
+                    }
+                    name: "thenGraph_13"
+                    output {
+                      name: "result_9"
+                      type {
+                      }
+                    }
+                  }
+                  type: GRAPH
+                }
+                attribute {
+                  name: "else_branch"
+                  g {
+                    node {
+                      output: "const_10"
+                      name: "n0"
+                      op_type: "Constant"
+                      attribute {
+                        name: "value"
+                        t {
+                          data_type: 1
+                          float_data: 1.0
+                          name: "const_10"
+                        }
+                        type: TENSOR
+                      }
+                      domain: ""
+                    }
+                    node {
+                      input: "const_10"
+                      input: "ord_4"
+                      output: "const_10_cast"
+                      name: "n1"
+                      op_type: "CastLike"
+                      domain: ""
+                    }
+                    node {
+                      input: "ord_4"
+                      input: "const_10_cast"
+                      output: "cond_11"
+                      name: "n2"
+                      op_type: "Equal"
+                      domain: ""
+                    }
+                    node {
+                      input: "cond_11"
+                      output: "result_21"
+                      name: "n3"
+                      op_type: "If"
+                      attribute {
+                        name: "then_branch"
+                        g {
+                          node {
+                            input: "self_3"
+                            output: "result_12"
+                            name: "n0"
+                            op_type: "ReduceL1"
+                            attribute {
+                              name: "keepdims"
+                              type: INT
+                              ref_attr_name: "keepdim"
+                            }
+                            domain: ""
+                          }
+                          name: "thenGraph_18"
+                          output {
+                            name: "result_12"
+                            type {
+                            }
+                          }
+                        }
+                        type: GRAPH
+                      }
+                      attribute {
+                        name: "else_branch"
+                        g {
+                          node {
+                            output: "const_13"
+                            name: "n0"
+                            op_type: "Constant"
+                            attribute {
+                              name: "value"
+                              t {
+                                data_type: 1
+                                float_data: 2.0
+                                name: "const_13"
+                              }
+                              type: TENSOR
+                            }
+                            domain: ""
+                          }
+                          node {
+                            input: "const_13"
+                            input: "ord_4"
+                            output: "const_13_cast"
+                            name: "n1"
+                            op_type: "CastLike"
+                            domain: ""
+                          }
+                          node {
+                            input: "ord_4"
+                            input: "const_13_cast"
+                            output: "cond_14"
+                            name: "n2"
+                            op_type: "Equal"
+                            domain: ""
+                          }
+                          node {
+                            input: "cond_14"
+                            output: "result_20"
+                            name: "n3"
+                            op_type: "If"
+                            attribute {
+                              name: "then_branch"
+                              g {
+                                node {
+                                  input: "self_3"
+                                  output: "result_15"
+                                  name: "n0"
+                                  op_type: "ReduceL2"
+                                  attribute {
+                                    name: "keepdims"
+                                    type: INT
+                                    ref_attr_name: "keepdim"
+                                  }
+                                  domain: ""
+                                }
+                                name: "thenGraph_20"
+                                output {
+                                  name: "result_15"
+                                  type {
+                                  }
+                                }
+                              }
+                              type: GRAPH
+                            }
+                            attribute {
+                              name: "else_branch"
+                              g {
+                                node {
+                                  input: "ord_4"
+                                  input: "self_3"
+                                  output: "ord_float"
+                                  name: "n0"
+                                  op_type: "CastLike"
+                                  domain: ""
+                                }
+                                node {
+                                  input: "self_3"
+                                  input: "ord_float"
+                                  output: "self_pow"
+                                  name: "n1"
+                                  op_type: "Pow"
+                                  domain: ""
+                                }
+                                node {
+                                  input: "self_pow"
+                                  output: "tmp_16"
+                                  name: "n2"
+                                  op_type: "ReduceSum"
+                                  attribute {
+                                    name: "keepdims"
+                                    type: INT
+                                    ref_attr_name: "keepdim"
+                                  }
+                                  domain: ""
+                                }
+                                node {
+                                  output: "const_17"
+                                  name: "n3"
+                                  op_type: "Constant"
+                                  attribute {
+                                    name: "value"
+                                    t {
+                                      data_type: 1
+                                      float_data: 1.0
+                                      name: "const_17"
+                                    }
+                                    type: TENSOR
+                                  }
+                                  domain: ""
+                                }
+                                node {
+                                  input: "const_17"
+                                  input: "ord_float"
+                                  output: "const_17_cast"
+                                  name: "n4"
+                                  op_type: "CastLike"
+                                  domain: ""
+                                }
+                                node {
+                                  input: "const_17_cast"
+                                  input: "ord_float"
+                                  output: "tmp_18"
+                                  name: "n5"
+                                  op_type: "Div"
+                                  domain: ""
+                                }
+                                node {
+                                  input: "tmp_16"
+                                  input: "tmp_18"
+                                  output: "result_19"
+                                  name: "n6"
+                                  op_type: "Pow"
+                                  domain: ""
+                                }
+                                name: "elseGraph_20"
+                                output {
+                                  name: "result_19"
+                                  type {
+                                  }
+                                }
+                              }
+                              type: GRAPH
+                            }
+                            domain: ""
+                          }
+                          name: "elseGraph_18"
+                          output {
+                            name: "result_20"
+                            type {
+                            }
+                          }
+                        }
+                        type: GRAPH
+                      }
+                      domain: ""
+                    }
+                    name: "elseGraph_13"
+                    output {
+                      name: "result_21"
+                      type {
+                      }
+                    }
+                  }
+                  type: GRAPH
+                }
+                domain: ""
+              }
+              name: "elseGraph_11"
+              output {
+                name: "result_22"
+                type {
+                }
+              }
+            }
+            type: GRAPH
+          }
+          domain: ""
+        }
+        name: "elseGraph_9"
+        output {
+          name: "result_23"
+          type {
+          }
+        }
+      }
+      type: GRAPH
+    }
+    domain: ""
+  }
+  node {
+    output: "int64_0_25"
+    name: "n11"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        data_type: 7
+        int64_data: 0
+        name: "int64_0_25"
+      }
+      type: TENSOR
+    }
+    domain: ""
+  }
+  node {
+    input: "int64_0_25"
+    input: "self_rank"
+    output: "int64_0_25_cast"
+    name: "n12"
+    op_type: "CastLike"
+    domain: ""
+  }
+  node {
+    input: "self_rank"
+    input: "int64_0_25_cast"
+    output: "cond_26"
+    name: "n13"
+    op_type: "Equal"
+    domain: ""
+  }
+  node {
+    input: "cond_26"
+    output: "result_29"
+    name: "n14"
+    op_type: "If"
+    attribute {
+      name: "then_branch"
+      g {
+        node {
+          input: "result_24"
+          output: "result_27"
+          name: "n0"
+          op_type: "Squeeze"
+          domain: ""
+        }
+        name: "thenGraph_27"
+        output {
+          name: "result_27"
+          type {
+          }
+        }
+      }
+      type: GRAPH
+    }
+    attribute {
+      name: "else_branch"
+      g {
+        node {
+          input: "result_24"
+          output: "result_28"
+          name: "n0"
+          op_type: "Identity"
+          domain: ""
+        }
+        name: "elseGraph_27"
+        output {
+          name: "result_28"
+          type {
+          }
+        }
+      }
+      type: GRAPH
+    }
+    domain: ""
+  }
+  opset_import {
+    domain: ""
+    version: 18
+  }
+  domain: "pkg.onnxscript.torch_lib"
+}
+functions {
+  name: "Rank"
+  input: "input"
+  output: "return_val"
+  node {
+    input: "input"
+    output: "tmp"
+    name: "n0"
+    op_type: "Shape"
+    domain: ""
+  }
+  node {
+    input: "tmp"
+    output: "return_val"
+    name: "n1"
+    op_type: "Size"
+    domain: ""
+  }
+  doc_string: "Take the rank of the input tensor."
+  opset_import {
+    domain: ""
+    version: 18
+  }
+  domain: "pkg.onnxscript.torch_lib.common"
+}
+functions {
+  name: "IsScalar"
+  input: "input"
+  output: "return_val"
+  node {
+    input: "input"
+    output: "tmp"
+    name: "n0"
+    op_type: "Shape"
+    domain: ""
+  }
+  node {
+    input: "tmp"
+    output: "tmp_0"
+    name: "n1"
+    op_type: "Size"
+    domain: ""
+  }
+  node {
+    output: "tmp_1"
+    name: "n2"
+    op_type: "Constant"
+    attribute {
+      name: "value_int"
+      i: 0
+      type: INT
+    }
+    domain: ""
+  }
+  node {
+    input: "tmp_0"
+    input: "tmp_1"
+    output: "return_val"
+    name: "n3"
+    op_type: "Equal"
+    domain: ""
+  }
+  doc_string: "Return whether the input has rank 0, or is a scalar."
+  opset_import {
+    domain: ""
+    version: 18
+  }
+  domain: "pkg.onnxscript.torch_lib.common"
+}
+
+"""
+
+ort_inputs = {"input_0": array(0.8965, dtype=float16)}
+
+# Set up the inference session
+session_options = ort.SessionOptions()
+session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL
+onnx_model = onnx.ModelProto()
+google.protobuf.text_format.Parse(onnx_model_text, onnx_model)
+
+# Uncomment this line to save the model to a file for examination
+# onnx.save_model(onnx_model, "transform_nested_ifs_toplogical_sorted_nodes.onnx")
+
+onnx.checker.check_model(onnx_model)
+session = ort.InferenceSession(onnx_model.SerializeToString(), session_options, providers=("CPUExecutionProvider",))
+
+# Run the model
+for _ in range(N):
+    ort_outputs = session.run(None, ort_inputs)
diff --git a/onnxruntime/test/testdata/transform/transpose_graph_gen.py b/onnxruntime/test/testdata/transform/transpose_graph_gen.py
new file mode 100644
index 0000000000000..14f2994a1925d
--- /dev/null
+++ b/onnxruntime/test/testdata/transform/transpose_graph_gen.py
@@ -0,0 +1,41 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+import onnx
+from onnx import TensorProto, helper
+
+
+def GenerateModel(model_name, valid):  # noqa: N802
+    nodes = [
+        helper.make_node("Transpose", ["input_0"], ["transposed_input_0"], perm=[2, 1, 3, 0]),
+        helper.make_node("Add", ["transposed_input_0", "input_1"], ["output"]),
+    ]
+
+    if valid:
+        inputs = [
+            helper.make_tensor_value_info("input_0", TensorProto.FLOAT, [1, 1, 3, 3]),
+            helper.make_tensor_value_info("input_1", TensorProto.FLOAT, [3, 1, 3, 1]),
+        ]
+        outputs = [helper.make_tensor_value_info("output", TensorProto.FLOAT, [3, 1, 3, 1])]
+    else:
+        inputs = [
+            helper.make_tensor_value_info("input_0", TensorProto.FLOAT, [1, 2, 3, 3]),
+            helper.make_tensor_value_info("input_1", TensorProto.FLOAT, [3, 2, 3, 1]),
+        ]
+        outputs = [helper.make_tensor_value_info("output", TensorProto.FLOAT, [3, 2, 3, 1])]
+
+    graph = helper.make_graph(
+        nodes,
+        "TransposeAndAdd",  # name
+        inputs,
+        outputs,
+        [],
+    )
+
+    model = helper.make_model(graph)
+    onnx.save(model, model_name)
+
+
+GenerateModel("transpose_to_reshape_valid.onnx", True)
+GenerateModel("transpose_to_reshape_invalid.onnx", False)
diff --git a/onnxruntime/test/testdata/transform/transpose_to_reshape_invalid.onnx b/onnxruntime/test/testdata/transform/transpose_to_reshape_invalid.onnx
new file mode 100644
index 0000000000000..a09b13fc184a8
Binary files /dev/null and b/onnxruntime/test/testdata/transform/transpose_to_reshape_invalid.onnx differ
diff --git a/onnxruntime/test/testdata/transform/transpose_to_reshape_valid.onnx b/onnxruntime/test/testdata/transform/transpose_to_reshape_valid.onnx
new file mode 100644
index 0000000000000..344d18ac10f77
Binary files /dev/null and b/onnxruntime/test/testdata/transform/transpose_to_reshape_valid.onnx differ
diff --git a/onnxruntime/test/testdata/transpose_optimizer_shared_initializers.onnx b/onnxruntime/test/testdata/transpose_optimizer_shared_initializers.onnx
new file mode 100644
index 0000000000000..797584f10ab24
Binary files /dev/null and b/onnxruntime/test/testdata/transpose_optimizer_shared_initializers.onnx differ
diff --git a/onnxruntime/test/testdata/transpose_optimizer_shared_initializers.py b/onnxruntime/test/testdata/transpose_optimizer_shared_initializers.py
new file mode 100644
index 0000000000000..d710c796fb0ad
--- /dev/null
+++ b/onnxruntime/test/testdata/transpose_optimizer_shared_initializers.py
@@ -0,0 +1,116 @@
+import numpy as np
+import onnx
+from onnx import TensorProto, helper
+
+
+# Create a model with shared initializers that can be updated in-place by the transpose optimizer,
+# including ones behind a DQ node. The transpose optimizer updates the first usage and inserts
+# Transpose/Unsqueeze ops on the others (see UnsqueezeInput and TransposeInput).
+# When we push the Transpose past other usages we should be able to cancel out those Transpose/Unsqueeze ops.
+# We need 3 DQ nodes to ensure the Transpose or Unsqueeze added by the transpose optimizer is not
+# removed prematurely.
+def create_model(broadcast_weights: bool):
+    if broadcast_weights:
+        bias_shape = [2, 2]
+        bias_values = np.random.randn(2, 2)
+    else:
+        bias_shape = [1, 3, 2, 2]
+        bias_values = np.random.randn(1, 3, 2, 2)
+
+    graph = helper.make_graph(
+        name="graph",
+        inputs=[
+            helper.make_tensor_value_info("input0", TensorProto.FLOAT, [1, 2, 2, 3]),
+        ],
+        initializer=[
+            helper.make_tensor("bias_quant", TensorProto.UINT8, bias_shape, bias_values.astype(np.uint8)),
+            helper.make_tensor("bias_fp32", TensorProto.FLOAT, bias_shape, bias_values.astype(np.float32)),
+            helper.make_tensor("dq_scale0", TensorProto.FLOAT, [], [1.5]),
+            helper.make_tensor("dq_zp0", TensorProto.UINT8, [], [5]),
+            helper.make_tensor("dq_scale1", TensorProto.FLOAT, [], [0.5]),
+        ],
+        nodes=[
+            # Transpose input from channels last to channels first
+            helper.make_node("Transpose", ["input0"], ["input_T"], perm=[0, 3, 1, 2]),
+            helper.make_node("DequantizeLinear", ["bias_quant", "dq_scale0", "dq_zp0"], ["DQ0"], "DQ0"),
+            helper.make_node("Add", ["input_T", "DQ0"], ["A0"], "A0"),
+            helper.make_node("DequantizeLinear", ["bias_quant", "dq_scale1"], ["DQ1"], "DQ1"),
+            helper.make_node("Add", ["A0", "DQ1"], ["A1"], "A1"),
+            helper.make_node("DequantizeLinear", ["bias_quant", "dq_scale0"], ["DQ2"], "DQ2"),
+            helper.make_node("Add", ["A1", "DQ2"], ["A2"], "A2"),
+            helper.make_node("Add", ["A2", "bias_fp32"], ["A3"], "A3"),
+            helper.make_node("Add", ["A3", "bias_fp32"], ["A4"], "A4"),
+            # NCHW to NHWC
+            helper.make_node("Transpose", ["A4"], ["output0"], perm=[0, 2, 3, 1]),
+        ],
+        outputs=[
+            helper.make_tensor_value_info("output0", TensorProto.FLOAT, [1, 2, 2, 3]),
+        ],
+    )
+
+    model = helper.make_model(graph)
+    onnx.checker.check_model(model, full_check=True)
+    return model
+
+
+def create_model_with_Where():  # noqa 'Where' is the operator name
+    """
+    Create a model to validate the logic to cancel out the Transpose -> Squeeze -> DQ between an updated shared
+    initializer and other usage. We need to use Where as we require more than 2 inputs.
+    The `condition` input will be having a Transpose pushed through it will have a negative cost.
+    The `X` input will have a positive cost which cancels out the negative value.
+    The `Y` input will be a shared initializer that is braodcast. If we don't find the Transpose to make the cost of it
+    negative we will not push the Transpose though.
+
+    If we only have 2 inputs, the broadcast initializer will always cost less due to its smaller rank, meaning we don't
+    actually need to look for the Squeeze in that case.
+    """
+    cond_0_shape = [3, 2]  # transpose to 2, 3
+    cond_1_shape = [2, 3]
+    x_0_shape = [3]  # broadcast so Transpose goes through Where0
+    x_1_shape = [3]  # also broadcast
+    y_shape = [3]  # should be transposed and broadcast to [3, 1] if we push the transpose through the Where
+    y_values = np.random.randn(3)
+
+    graph = helper.make_graph(
+        name="graph",
+        inputs=[
+            helper.make_tensor_value_info("cond_in_0", TensorProto.BOOL, cond_0_shape),
+            helper.make_tensor_value_info("cond_in_1", TensorProto.BOOL, cond_1_shape),
+            helper.make_tensor_value_info("x_in_0", TensorProto.FLOAT, x_0_shape),
+            helper.make_tensor_value_info("x_in_1", TensorProto.FLOAT, x_1_shape),
+        ],
+        initializer=[
+            helper.make_tensor("y_quant", TensorProto.UINT8, y_shape, y_values.astype(np.uint8)),
+            helper.make_tensor("dq_scale0", TensorProto.FLOAT, [], [1.5]),
+            helper.make_tensor("dq_scale1", TensorProto.FLOAT, [], [0.5]),
+        ],
+        nodes=[
+            # Transpose the cond input
+            helper.make_node("Transpose", ["cond_in_0"], ["cond_in_T"], perm=[1, 0]),
+            helper.make_node("DequantizeLinear", ["y_quant", "dq_scale0"], ["DQ0"], "DQ0"),
+            # first usage of shared initializer. simple so we know the Transpose can push through it
+            helper.make_node("Where", ["cond_in_T", "x_in_0", "DQ0"], ["Where0"], "Where0"),
+            helper.make_node("DequantizeLinear", ["y_quant", "dq_scale1"], ["DQ1"], "DQ1"),
+            helper.make_node("Add", ["x_in_1", "Where0"], ["Add0"], "Add0"),
+            # second usage of shared initializer. requires looking past the Squeeze to push the transpose through
+            helper.make_node("Where", ["cond_in_1", "Add0", "DQ1"], ["Where1"], "Where1"),
+            helper.make_node("Transpose", ["Where1"], ["output0"], perm=[1, 0]),
+        ],
+        outputs=[
+            helper.make_tensor_value_info("output0", TensorProto.FLOAT, [3, 2]),
+        ],
+    )
+
+    model = helper.make_model(graph)
+    onnx.checker.check_model(model, full_check=True)
+    return model
+
+
+if __name__ == "__main__":
+    model = create_model(broadcast_weights=False)
+    onnx.save(model, "transpose_optimizer_shared_initializers.onnx")
+    model = create_model(broadcast_weights=True)
+    onnx.save(model, "transpose_optimizer_shared_initializers_broadcast.onnx")
+    model = create_model_with_Where()
+    onnx.save(model, "transpose_optimizer_shared_initializers_broadcast2.onnx")
diff --git a/onnxruntime/test/testdata/transpose_optimizer_shared_initializers_broadcast.onnx b/onnxruntime/test/testdata/transpose_optimizer_shared_initializers_broadcast.onnx
new file mode 100644
index 0000000000000..8bb2c6fd4a8b5
Binary files /dev/null and b/onnxruntime/test/testdata/transpose_optimizer_shared_initializers_broadcast.onnx differ
diff --git a/onnxruntime/test/testdata/transpose_optimizer_shared_initializers_broadcast2.onnx b/onnxruntime/test/testdata/transpose_optimizer_shared_initializers_broadcast2.onnx
new file mode 100644
index 0000000000000..ad05fb70cb26e
Binary files /dev/null and b/onnxruntime/test/testdata/transpose_optimizer_shared_initializers_broadcast2.onnx differ
diff --git a/onnxruntime/test/testdata/trt_reshape.onnx b/onnxruntime/test/testdata/trt_reshape.onnx
new file mode 100644
index 0000000000000..7d195af2ae204
--- /dev/null
+++ b/onnxruntime/test/testdata/trt_reshape.onnx
@@ -0,0 +1,16 @@
+	:�
+)
+data
+shapereshapedReshape"Reshapetrt_engine_wrapperZ
+data
+	
+N
+Z
+shape
+
+
+b
+reshaped
+
+
+B
\ No newline at end of file
diff --git a/onnxruntime/test/testdata/trt_reshape_test.py b/onnxruntime/test/testdata/trt_reshape_test.py
new file mode 100644
index 0000000000000..42777bd3d50c7
--- /dev/null
+++ b/onnxruntime/test/testdata/trt_reshape_test.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import onnx
+from onnx import TensorProto, helper
+
+
+def generate_model(model_name):
+    nodes = [
+        helper.make_node(
+            "Reshape",
+            ["data", "shape"],
+            ["reshaped"],
+            "Reshape",
+        ),
+    ]
+
+    graph = helper.make_graph(
+        nodes,
+        "trt_engine_wrapper",
+        [  # input
+            helper.make_tensor_value_info("data", TensorProto.FLOAT, ["N", 2]),
+            helper.make_tensor_value_info(
+                "shape",
+                TensorProto.INT64,
+                [
+                    2,
+                ],
+            ),
+        ],
+        [  # output
+            helper.make_tensor_value_info("reshaped", TensorProto.FLOAT, [4, 1]),
+        ],
+    )
+
+    model = helper.make_model(graph)
+    onnx.save(model, model_name)
+
+
+if __name__ == "__main__":
+    generate_model("trt_reshape.onnx")
diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc
index 28af61e15b2b5..65646a7286719 100644
--- a/onnxruntime/test/util/default_providers.cc
+++ b/onnxruntime/test/util/default_providers.cc
@@ -69,7 +69,9 @@ std::unique_ptr<IExecutionProvider> DefaultMIGraphXExecutionProvider() {
   OrtMIGraphXProviderOptions params{
       0,
       0,
-      0};
+      0,
+      0,
+      nullptr};
   return MIGraphXProviderFactoryCreator::Create(&params)->CreateProvider();
 #else
   return nullptr;
@@ -268,7 +270,7 @@ std::unique_ptr<IExecutionProvider> DefaultCannExecutionProvider() {
 
 std::unique_ptr<IExecutionProvider> DefaultDmlExecutionProvider() {
 #ifdef USE_DML
-  if (auto factory = DMLProviderFactoryCreator::Create(0))
+  if (auto factory = DMLProviderFactoryCreator::Create(0, false, false, false))
     return factory->CreateProvider();
 #endif
   return nullptr;
diff --git a/onnxruntime/test/util/include/inference_session_wrapper.h b/onnxruntime/test/util/include/inference_session_wrapper.h
index eab83c26b681f..757caf7987d35 100644
--- a/onnxruntime/test/util/include/inference_session_wrapper.h
+++ b/onnxruntime/test/util/include/inference_session_wrapper.h
@@ -12,9 +12,8 @@ namespace test {
 // InferenceSession wrapper class for use in tests where we need access to the Graph and SessionState
 class InferenceSessionWrapper : public InferenceSession {
  public:
-  explicit InferenceSessionWrapper(const SessionOptions& session_options,
-                                   const Environment& env) : InferenceSession(session_options, env) {
-  }
+  // Expose the constructors from InferenceSession
+  using InferenceSession::InferenceSession;
 
   const Graph& GetGraph() const {
     return model_->MainGraph();
diff --git a/onnxruntime/test/util/include/providers.h b/onnxruntime/test/util/include/providers.h
index a6420e2342d9b..aa489e6cd958b 100644
--- a/onnxruntime/test/util/include/providers.h
+++ b/onnxruntime/test/util/include/providers.h
@@ -10,9 +10,6 @@
 #ifdef USE_TVM
 #include "core/providers/tvm/tvm_provider_factory.h"
 #endif
-#ifdef USE_TENSORRT
-#include "core/providers/tensorrt/tensorrt_provider_factory.h"
-#endif
 #ifdef USE_OPENVINO
 #include "core/providers/openvino/openvino_provider_factory.h"
 #endif
diff --git a/onnxruntime/test/util/include/test_utils.h b/onnxruntime/test/util/include/test_utils.h
index eb072a134b924..48a71b8acb261 100644
--- a/onnxruntime/test/util/include/test_utils.h
+++ b/onnxruntime/test/util/include/test_utils.h
@@ -20,6 +20,7 @@
 
 namespace onnxruntime {
 class Graph;
+struct SessionOptions;
 
 namespace test {
 
@@ -62,11 +63,13 @@ using ModelPathOrBytes = std::variant<std::basic_string_view<ORTCHAR_T>,
 
 // Run the model using the CPU EP to get expected output, comparing to the output when the 'execution_provider'
 // is enabled.
+// session_options_updater can be used to update the SessionOptions the inference session is created with.
 void RunAndVerifyOutputsWithEP(ModelPathOrBytes model_path_or_bytes,
                                std::string_view log_id,
                                std::unique_ptr<IExecutionProvider> execution_provider,
                                const NameMLValMap& feeds,
-                               const EPVerificationParams& params = EPVerificationParams());
+                               const EPVerificationParams& params = EPVerificationParams(),
+                               const std::function<void(SessionOptions&)>& session_options_updater = {});
 
 // Tests model loading only.
 // This can be used to test EPs in builds where only loading (and not running) of a model is supported.
diff --git a/onnxruntime/test/util/test_utils.cc b/onnxruntime/test/util/test_utils.cc
index 43845a5052e36..5f1fdae72f031 100644
--- a/onnxruntime/test/util/test_utils.cc
+++ b/onnxruntime/test/util/test_utils.cc
@@ -132,11 +132,16 @@ static gsl::span<const std::byte> GetModelBytes(ModelPathOrBytes model_path_or_b
 void RunAndVerifyOutputsWithEP(ModelPathOrBytes model_path_or_bytes, std::string_view log_id,
                                std::unique_ptr<IExecutionProvider> execution_provider,
                                const NameMLValMap& feeds,
-                               const EPVerificationParams& params) {
+                               const EPVerificationParams& params,
+                               const std::function<void(SessionOptions&)>& session_options_updater) {
   std::vector<std::byte> model_data_buffer{};
   const auto model_data = GetModelBytes(model_path_or_bytes, model_data_buffer);
 
   SessionOptions so;
+  if (session_options_updater) {
+    session_options_updater(so);
+  }
+
   so.session_logid = log_id;
   RunOptions run_options;
   run_options.run_tag = so.session_logid;
diff --git a/onnxruntime/test/xctest/xcgtest.mm b/onnxruntime/test/xctest/xcgtest.mm
index 5367f3e89c07c..c02f18d906cbe 100644
--- a/onnxruntime/test/xctest/xcgtest.mm
+++ b/onnxruntime/test/xctest/xcgtest.mm
@@ -201,7 +201,7 @@ + (void)registerTestClasses {
   delete listeners.Release(listeners.default_result_printer());
   free(argv);
 
-  BOOL runDisabledTests = testing::GTEST_FLAG(also_run_disabled_tests);
+  BOOL runDisabledTests = GTEST_FLAG_GET(also_run_disabled_tests);
   NSMutableDictionary* testFilterMap = [NSMutableDictionary dictionary];
   NSCharacterSet* decimalDigitCharacterSet = [NSCharacterSet decimalDigitCharacterSet];
 
diff --git a/onnxruntime/wasm/api.cc b/onnxruntime/wasm/api.cc
index 174edabbc91fe..0e58bb4f93f7f 100644
--- a/onnxruntime/wasm/api.cc
+++ b/onnxruntime/wasm/api.cc
@@ -9,6 +9,7 @@
 #include "api.h"
 
 #include <iostream>
+#include <sstream>
 #include <vector>
 
 namespace {
@@ -17,6 +18,14 @@ OrtErrorCode g_last_error_code;
 std::string g_last_error_message;
 }  // namespace
 
+enum DataLocation {
+  DATA_LOCATION_NONE = 0,
+  DATA_LOCATION_CPU = 1,
+  DATA_LOCATION_CPU_PINNED = 2,
+  DATA_LOCATION_TEXTURE = 3,
+  DATA_LOCATION_GPU_BUFFER = 4
+};
+
 static_assert(sizeof(const char*) == sizeof(size_t), "size of a pointer and a size_t value should be the same.");
 static_assert(sizeof(size_t) == 4, "size of size_t should be 4 in this build (wasm32).");
 
@@ -223,13 +232,23 @@ void OrtFree(void* ptr) {
   }
 }
 
-OrtValue* OrtCreateTensor(int data_type, void* data, size_t data_length, size_t* dims, size_t dims_length) {
+OrtValue* OrtCreateTensor(int data_type, void* data, size_t data_length, size_t* dims, size_t dims_length, int data_location) {
+  if (data_location != DATA_LOCATION_CPU &&
+      data_location != DATA_LOCATION_CPU_PINNED &&
+      data_location != DATA_LOCATION_GPU_BUFFER) {
+    std::ostringstream ostr;
+    ostr << "Invalid data location: " << data_location;
+    CheckStatus(Ort::GetApi().CreateStatus(ORT_INVALID_ARGUMENT, ostr.str().c_str()));
+    return nullptr;
+  }
+
   std::vector<int64_t> shapes(dims_length);
   for (size_t i = 0; i < dims_length; i++) {
     shapes[i] = dims[i];
   }
 
   if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING) {
+    // data_location is ignored for string tensor. It is always CPU.
     OrtAllocator* allocator = nullptr;
     RETURN_NULLPTR_IF_ERROR(GetAllocatorWithDefaultOptions, &allocator);
 
@@ -244,12 +263,16 @@ OrtValue* OrtCreateTensor(int data_type, void* data, size_t data_length, size_t*
 
     return UNREGISTER_AUTO_RELEASE(value);
   } else {
-    OrtMemoryInfo* memoryInfo = nullptr;
-    RETURN_NULLPTR_IF_ERROR(CreateCpuMemoryInfo, OrtDeviceAllocator, OrtMemTypeDefault, &memoryInfo);
-    REGISTER_AUTO_RELEASE_HANDLE(MemoryInfo, memoryInfo);
+    OrtMemoryInfo* memory_info = nullptr;
+    if (data_location != DATA_LOCATION_GPU_BUFFER) {
+      RETURN_NULLPTR_IF_ERROR(CreateCpuMemoryInfo, OrtDeviceAllocator, OrtMemTypeDefault, &memory_info);
+    } else {
+      RETURN_NULLPTR_IF_ERROR(CreateMemoryInfo, "WebGPU_Buffer", OrtDeviceAllocator, 0, OrtMemTypeDefault, &memory_info);
+    }
+    REGISTER_AUTO_RELEASE_HANDLE(MemoryInfo, memory_info);
 
     OrtValue* value = nullptr;
-    int error_code = CHECK_STATUS(CreateTensorWithDataAsOrtValue, memoryInfo, data, data_length,
+    int error_code = CHECK_STATUS(CreateTensorWithDataAsOrtValue, memory_info, data, data_length,
                                   dims_length > 0 ? shapes.data() : nullptr, dims_length,
                                   static_cast<ONNXTensorElementDataType>(data_type), &value);
 
@@ -373,15 +396,85 @@ void OrtReleaseRunOptions(OrtRunOptions* run_options) {
   Ort::GetApi().ReleaseRunOptions(run_options);
 }
 
+OrtIoBinding* OrtCreateBinding(OrtSession* session) {
+  OrtIoBinding* binding = nullptr;
+  int error_code = CHECK_STATUS(CreateIoBinding, session, &binding);
+  return (error_code == ORT_OK) ? binding : nullptr;
+}
+
+int EMSCRIPTEN_KEEPALIVE OrtBindInput(OrtIoBinding* io_binding,
+                                      const char* name,
+                                      OrtValue* input) {
+  return CHECK_STATUS(BindInput, io_binding, name, input);
+}
+
+int EMSCRIPTEN_KEEPALIVE OrtBindOutput(OrtIoBinding* io_binding,
+                                       const char* name,
+                                       OrtValue* output,
+                                       int output_location) {
+  if (output) {
+    return CHECK_STATUS(BindOutput, io_binding, name, output);
+  } else {
+    if (output_location != DATA_LOCATION_NONE &&
+        output_location != DATA_LOCATION_CPU &&
+        output_location != DATA_LOCATION_CPU_PINNED &&
+        output_location != DATA_LOCATION_GPU_BUFFER) {
+      std::ostringstream ostr;
+      ostr << "Invalid data location (" << output_location << ") for output: \"" << name << "\".";
+      return CheckStatus(Ort::GetApi().CreateStatus(ORT_INVALID_ARGUMENT, ostr.str().c_str()));
+    }
+
+    OrtMemoryInfo* memory_info = nullptr;
+    if (output_location != DATA_LOCATION_GPU_BUFFER) {
+      RETURN_ERROR_CODE_IF_ERROR(CreateCpuMemoryInfo, OrtDeviceAllocator, OrtMemTypeDefault, &memory_info);
+    } else {
+      RETURN_ERROR_CODE_IF_ERROR(CreateMemoryInfo, "WebGPU_Buffer", OrtDeviceAllocator, 0, OrtMemTypeDefault, &memory_info);
+    }
+    REGISTER_AUTO_RELEASE_HANDLE(MemoryInfo, memory_info);
+    return CHECK_STATUS(BindOutputToDevice, io_binding, name, memory_info);
+  }
+}
+
+void OrtClearBoundOutputs(OrtIoBinding* io_binding) {
+  Ort::GetApi().ClearBoundOutputs(io_binding);
+}
+
+void OrtReleaseBinding(OrtIoBinding* io_binding) {
+  Ort::GetApi().ReleaseIoBinding(io_binding);
+}
+
+int OrtRunWithBinding(OrtSession* session,
+                      OrtIoBinding* io_binding,
+                      size_t output_count,
+                      OrtValue** outputs,
+                      OrtRunOptions* run_options) {
+  RETURN_ERROR_CODE_IF_ERROR(RunWithBinding, session, run_options, io_binding);
+
+  OrtAllocator* allocator = nullptr;
+  RETURN_ERROR_CODE_IF_ERROR(GetAllocatorWithDefaultOptions, &allocator);
+
+  size_t binding_output_count = 0;
+  OrtValue** binding_outputs = nullptr;
+  RETURN_ERROR_CODE_IF_ERROR(GetBoundOutputValues, io_binding, allocator, &binding_outputs, &binding_output_count);
+  REGISTER_AUTO_RELEASE_BUFFER(OrtValue*, binding_outputs, allocator);
+
+  if (binding_output_count != output_count) {
+    return CheckStatus(
+        Ort::GetApi().CreateStatus(ORT_INVALID_ARGUMENT, "Output count is inconsistent with IO Binding output data."));
+  }
+
+  for (size_t i = 0; i < output_count; i++) {
+    outputs[i] = binding_outputs[i];
+  }
+
+  return ORT_OK;
+}
+
 int OrtRun(OrtSession* session,
            const char** input_names, const ort_tensor_handle_t* inputs, size_t input_count,
            const char** output_names, size_t output_count, ort_tensor_handle_t* outputs,
            OrtRunOptions* run_options) {
-  auto status_code = CHECK_STATUS(Run, session, run_options, input_names, inputs, input_count, output_names, output_count, outputs);
-#if defined(USE_JSEP)
-  EM_ASM({ Module.jsepRunPromiseResolve ?.($0); }, status_code);
-#endif
-  return status_code;
+  return CHECK_STATUS(Run, session, run_options, input_names, inputs, input_count, output_names, output_count, outputs);
 }
 
 char* OrtEndProfiling(ort_session_handle_t session) {
@@ -400,6 +493,14 @@ char* OrtEndProfiling(ort_session_handle_t session) {
 #define CHECK_TRAINING_STATUS(ORT_API_NAME, ...) \
   CheckStatus(Ort::GetTrainingApi().ORT_API_NAME(__VA_ARGS__))
 
+#define RETURN_TRAINING_ERROR_CODE_IF_ERROR(ORT_API_NAME, ...)         \
+  do {                                                                 \
+    int error_code = CHECK_TRAINING_STATUS(ORT_API_NAME, __VA_ARGS__); \
+    if (error_code != ORT_OK) {                                        \
+      return error_code;                                               \
+    }                                                                  \
+  } while (false)
+
 ort_training_checkpoint_handle_t EMSCRIPTEN_KEEPALIVE OrtTrainingLoadCheckpoint(void* checkpoint_data_buffer,
                                                                                 size_t checkpoint_size) {
   OrtCheckpointState* checkpoint_state = nullptr;
@@ -478,6 +579,57 @@ int EMSCRIPTEN_KEEPALIVE OrtTrainingCopyParametersFromBuffer(ort_training_sessio
   return CHECK_TRAINING_STATUS(CopyBufferToParameters, training_handle, parameters_buffer, trainable_only);
 }
 
+int EMSCRIPTEN_KEEPALIVE OrtTrainingGetModelInputOutputCount(ort_training_session_handle_t training_handle,
+                                                             size_t* input_count,
+                                                             size_t* output_count,
+                                                             bool isEvalModel) {
+  if (isEvalModel) {
+    RETURN_TRAINING_ERROR_CODE_IF_ERROR(TrainingSessionGetEvalModelInputCount, training_handle, input_count);
+    RETURN_TRAINING_ERROR_CODE_IF_ERROR(TrainingSessionGetEvalModelOutputCount, training_handle, output_count);
+    return ORT_OK;
+  } else {
+    RETURN_TRAINING_ERROR_CODE_IF_ERROR(TrainingSessionGetTrainingModelInputCount, training_handle, input_count);
+    RETURN_TRAINING_ERROR_CODE_IF_ERROR(TrainingSessionGetTrainingModelOutputCount, training_handle, output_count);
+    return ORT_OK;
+  }
+}
+
+char* EMSCRIPTEN_KEEPALIVE OrtTrainingGetModelInputOutputName(ort_training_session_handle_t training_handle,
+                                                              size_t index,
+                                                              bool isInput,
+                                                              bool isEvalModel) {
+  OrtAllocator* allocator = nullptr;
+  RETURN_NULLPTR_IF_ERROR(GetAllocatorWithDefaultOptions, &allocator);
+
+  char* name = nullptr;
+
+  if (isEvalModel) {
+    if (isInput) {
+      return (CHECK_TRAINING_STATUS(TrainingSessionGetEvalModelInputName, training_handle, index,
+                                    allocator, &name) == ORT_OK)
+                 ? name
+                 : nullptr;
+    } else {
+      return (CHECK_TRAINING_STATUS(TrainingSessionGetEvalModelOutputName, training_handle, index,
+                                    allocator, &name) == ORT_OK)
+                 ? name
+                 : nullptr;
+    }
+  } else {
+    if (isInput) {
+      return (CHECK_TRAINING_STATUS(TrainingSessionGetTrainingModelInputName, training_handle, index,
+                                    allocator, &name) == ORT_OK)
+                 ? name
+                 : nullptr;
+    } else {
+      return (CHECK_TRAINING_STATUS(TrainingSessionGetTrainingModelOutputName, training_handle, index,
+                                    allocator, &name) == ORT_OK)
+                 ? name
+                 : nullptr;
+    }
+  }
+}
+
 void EMSCRIPTEN_KEEPALIVE OrtTrainingReleaseSession(ort_training_session_handle_t training_handle) {
   Ort::GetTrainingApi().ReleaseTrainingSession(training_handle);
 }
diff --git a/onnxruntime/wasm/api.h b/onnxruntime/wasm/api.h
index 398c901e0e5ed..2cd1515d191c8 100644
--- a/onnxruntime/wasm/api.h
+++ b/onnxruntime/wasm/api.h
@@ -15,6 +15,9 @@
 struct OrtSession;
 using ort_session_handle_t = OrtSession*;
 
+struct OrtIoBinding;
+using ort_io_binding_handle_t = OrtIoBinding*;
+
 struct OrtSessionOptions;
 using ort_session_options_handle_t = OrtSessionOptions*;
 
@@ -164,9 +167,10 @@ void EMSCRIPTEN_KEEPALIVE OrtFree(void* ptr);
  * @param data_length size of the buffer 'data' in bytes.
  * @param dims a pointer to an array of dims. the array should contain (dims_length) element(s).
  * @param dims_length the length of the tensor's dimension
+ * @param data_location specify the memory location of the tensor data. 0 for CPU, 1 for GPU buffer.
  * @returns a tensor handle. Caller must release it after use by calling OrtReleaseTensor().
  */
-ort_tensor_handle_t EMSCRIPTEN_KEEPALIVE OrtCreateTensor(int data_type, void* data, size_t data_length, size_t* dims, size_t dims_length);
+ort_tensor_handle_t EMSCRIPTEN_KEEPALIVE OrtCreateTensor(int data_type, void* data, size_t data_length, size_t* dims, size_t dims_length, int data_location);
 
 /**
  * get type, shape info and data of the specified tensor.
@@ -216,6 +220,58 @@ int EMSCRIPTEN_KEEPALIVE OrtAddRunConfigEntry(ort_run_options_handle_t run_optio
  */
 void EMSCRIPTEN_KEEPALIVE OrtReleaseRunOptions(ort_run_options_handle_t run_options);
 
+/**
+ * create an instance of ORT IO binding.
+ */
+ort_io_binding_handle_t EMSCRIPTEN_KEEPALIVE OrtCreateBinding(ort_session_handle_t session);
+
+/**
+ * bind an input tensor to the IO binding instance. A cross device copy will be performed if necessary.
+ * @param io_binding handle of the IO binding
+ * @param name name of the input
+ * @param input handle of the input tensor
+ * @returns ORT error code. If not zero, call OrtGetLastError() to get detailed error message.
+ */
+int EMSCRIPTEN_KEEPALIVE OrtBindInput(ort_io_binding_handle_t io_binding,
+                                      const char* name,
+                                      ort_tensor_handle_t input);
+
+/**
+ * bind an output tensor or location to the IO binding instance.
+ * @param io_binding handle of the IO binding
+ * @param name name of the output
+ * @param output handle of the output tensor. nullptr for output location binding.
+ * @param output_location specify the memory location of the output tensor data.
+ * @returns ORT error code. If not zero, call OrtGetLastError() to get detailed error message.
+ */
+int EMSCRIPTEN_KEEPALIVE OrtBindOutput(ort_io_binding_handle_t io_binding,
+                                       const char* name,
+                                       ort_tensor_handle_t output,
+                                       int output_location);
+
+/**
+ * clear all bound outputs.
+ */
+void EMSCRIPTEN_KEEPALIVE OrtClearBoundOutputs(ort_io_binding_handle_t io_binding);
+
+/**
+ * release the specified ORT IO binding.
+ */
+void EMSCRIPTEN_KEEPALIVE OrtReleaseBinding(ort_io_binding_handle_t io_binding);
+
+/**
+ * inference the model.
+ * @param session handle of the specified session
+ * @param io_binding handle of the IO binding
+ * @param run_options handle of the run options
+ * @returns ORT error code. If not zero, call OrtGetLastError() to get detailed error message.
+ */
+int EMSCRIPTEN_KEEPALIVE OrtRunWithBinding(ort_session_handle_t session,
+                                           ort_io_binding_handle_t io_binding,
+                                           size_t output_count,
+                                           ort_tensor_handle_t* outputs,
+                                           ort_run_options_handle_t run_options);
+
 /**
  * inference the model.
  * @param session handle of the specified session
@@ -376,6 +432,35 @@ int EMSCRIPTEN_KEEPALIVE OrtTrainingCopyParametersFromBuffer(ort_training_sessio
                                                              size_t parameter_count,
                                                              bool trainable_only);
 
+/**
+ * Gets the input count and output count of the training or eval model associated with the given training handle.
+ * @param traning_handle handle of the traning session
+ * @param input_count [out] a pointer to a size_t variable to accept input_count
+ * @param output_count [out] a pointer to a size_t variable to accept output_count
+ * @param isEvalModel when false, returns input & output count of the training model. When true, returns input & output
+ *                    count of the eval model.
+ * @returns ORT error code. If not zero, call OrtGetLastError() to get a detailed error message.
+ */
+int EMSCRIPTEN_KEEPALIVE OrtTrainingGetModelInputOutputCount(ort_training_session_handle_t training_handle,
+                                                             size_t* input_count,
+                                                             size_t* output_count,
+                                                             bool isEvalModel);
+
+/**
+ * Gets the input or output name at the specified index associated with the training or eval model from the
+ * given training session.
+ * @param traning_handle handle of the traning session
+ * @param index the input or output index
+ * @param isInput if true, this method retrieves an input name. If false, this method retrieves an output name.
+ * @param isEvalModel when false, returns input & output names of the training model. When true, returns input & output
+ *                    names of the eval model.
+ * @returns a pointer to a buffer which contains C-style string. Caller must release the C style string after use by
+ */
+char* EMSCRIPTEN_KEEPALIVE OrtTrainingGetModelInputOutputName(ort_training_session_handle_t training_handle,
+                                                              size_t index,
+                                                              bool isInput,
+                                                              bool isEvalModel);
+
 /**
  * @brief Release the specified ORT training session.
  *
diff --git a/onnxruntime/wasm/js_internal_api.js b/onnxruntime/wasm/js_internal_api.js
index 15d393f4ce62d..427ad6f6d14f3 100644
--- a/onnxruntime/wasm/js_internal_api.js
+++ b/onnxruntime/wasm/js_internal_api.js
@@ -14,40 +14,156 @@ Module['jsepInit'] = (backend, alloc, free, copy, copyAsync, createKernel, relea
   Module.jsepReleaseKernel = releaseKernel;
   Module.jsepRunKernel = runKernel;
 
-  Module['jsepOnRunStart'] = sessionId => {
-    Module['jsepRunPromise'] = new Promise(r => {
-      Module.jsepRunPromiseResolve = r;
-    });
-
-    if (Module.jsepSessionState) {
-      throw new Error('Session already started');
-    }
-
-    Module.jsepSessionState = {
-      sessionId,
-      errors: []
+  // This is a simplified version of cwrap() with options.async === true (-sASYNCIFY=1)
+  // It removes some overhead in cwarp() and ccall() that we don't need.
+  //
+  // Currently in JSEP build, we only use this for the following functions:
+  // - OrtRun()
+  // - OrtRunWithBinding()
+  // - OrtBindInput()
+  //
+  // Note: about parameters "getFunc" and "setFunc":
+  // - Emscripten has different behaviors for Debug and Release builds for generating exported function wrapper.
+  //
+  //   - In Debug build, it will generate a wrapper function for each exported function. For example, it generates a
+  //     wrapper for OrtRun() like this (minified):
+  //     ```
+  //     var _OrtRun = Module["_OrtRun"] = createExportWrapper("OrtRun");
+  //     ```
+  //
+  //   - In Release build, it will generate a lazy loading wrapper for each exported function. For example, it generates
+  //     a wrapper for OrtRun() like this (minified):
+  //     ```
+  //     d._OrtRun = (a, b, c, e, f, h, l, q) => (d._OrtRun = J.ka)(a, b, c, e, f, h, l, q);
+  //     ```
+  //
+  //   The behavior of these two wrappers are different. The debug build will assign `Module["_OrtRun"]` only once
+  //   because `createExportWrapper()` does not reset `Module["_OrtRun"]` inside. The release build, however, will
+  //   reset d._OrtRun to J.ka when the first time it is called.
+  //
+  //   The difference is important because we need to design the async wrapper in a way that it can handle both cases.
+  //
+  //   Now, let's look at how the async wrapper is designed to work for both cases:
+  //
+  //   - Debug build:
+  //      1. When Web assembly is being loaded, `Module["_OrtRun"]` is assigned to `createExportWrapper("OrtRun")`.
+  //      2. When the first time `Module["jsepInit"]` is called, `Module["_OrtRun"]` is re-assigned to a new async
+  //         wrapper function.
+  //      Value of `Module["_OrtRun"]` will not be changed again.
+  //
+  //   - Release build:
+  //      1. When Web assembly is being loaded, `Module["_OrtRun"]` is assigned to a lazy loading wrapper function.
+  //      2. When the first time `Module["jsepInit"]` is called, `Module["_OrtRun"]` is re-assigned to a new async
+  //         wrapper function.
+  //      3. When the first time `Module["_OrtRun"]` is called, the async wrapper will be called. It will call into this
+  //         function:
+  //         ```
+  //         (a, b, c, e, f, h, l, q) => (d._OrtRun = J.ka)(a, b, c, e, f, h, l, q);
+  //         ```
+  //         This function will assign d._OrtRun (ie. the minimized `Module["_OrtRun"]`) to the real function (J.ka).
+  //      4. Since d._OrtRun is re-assigned, we need to update the async wrapper to re-assign its stored
+  //         function to the updated value (J.ka), and re-assign the value of `d._OrtRun` back to the async wrapper.
+  //      Value of `Module["_OrtRun"]` will not be changed again.
+  //
+  //   The value of `Module["_OrtRun"]` will need to be assigned for 2 times for debug build and 4 times for release
+  //   build.
+  //
+  //   This is why we need this `getFunc` and `setFunc` parameters. They are used to get the current value of an
+  //   exported function and set the new value of an exported function.
+  //
+  const jsepWrapAsync = (func, getFunc, setFunc) => {
+    return (...args) => {
+      // cache the async data before calling the function.
+      const previousAsync = Asyncify.currData;
+
+      const previousFunc = getFunc?.();
+      const ret = func(...args);
+      const newFunc = getFunc?.();
+      if (previousFunc !== newFunc) {
+        // The exported function has been updated.
+        // Set the sync function reference to the new function.
+        func = newFunc;
+        // Set the exported function back to the async wrapper.
+        setFunc(previousFunc);
+        // Remove getFunc and setFunc. They are no longer needed.
+        setFunc = null;
+        getFunc = null;
+      }
+
+      // If the async data has been changed, it means that the function started an async operation.
+      if (Asyncify.currData != previousAsync) {
+        // returns the promise
+        return Asyncify.whenDone();
+      }
+      // the function is synchronous. returns the result.
+      return ret;
     };
   };
 
-  Module['jsepOnRunEnd'] = sessionId => {
-    if (Module.jsepSessionState.sessionId !== sessionId) {
-      throw new Error('Session ID mismatch');
-    }
-
-    const errorPromises = Module.jsepSessionState.errors;
-    Module.jsepSessionState = null;
-
-    return errorPromises.length === 0 ? Promise.resolve() : new Promise((resolve, reject) => {
-      Promise.all(errorPromises).then(errors => {
-        errors = errors.filter(e => e);
-        if (errors.length > 0) {
-          reject(new Error(errors.join('\n')));
-        } else {
-          resolve();
+  // This is a wrapper for OrtRun() and OrtRunWithBinding() to ensure that Promises are handled correctly.
+  const runAsync = (runAsyncFunc) => {
+    return async (...args) => {
+      try {
+        // Module.jsepSessionState should be null, unless we are in the middle of a session.
+        // If it is not null, it means that the previous session has not finished yet.
+        if (Module.jsepSessionState) {
+          throw new Error('Session already started');
+        }
+        const state = Module.jsepSessionState = {sessionHandle: args[0], errors: []};
+
+        // Run the acyncified function: OrtRun() or OrtRunWithBinding()
+        const ret = await runAsyncFunc(...args);
+
+        // Check if the session is still valid. this object should be the same as the one we set above.
+        if (Module.jsepSessionState !== state) {
+          throw new Error('Session mismatch');
+        }
+
+        // Flush the backend. This will submit all pending commands to the GPU.
+        backend['flush']();
+
+        // Await all pending promises. This includes GPU validation promises for diagnostic purposes.
+        const errorPromises = state.errors;
+        if (errorPromises.length > 0) {
+          let errors = await Promise.all(errorPromises);
+          errors = errors.filter(e => e);
+          if (errors.length > 0) {
+            throw new Error(errors.join('\n'));
+          }
         }
-      }, reason => {
-        reject(reason);
-      });
-    });
+
+        return ret;
+      } finally {
+        Module.jsepSessionState = null;
+      }
+    };
+  };
+
+  // replace the original functions with asyncified versions
+  Module['_OrtRun'] = runAsync(jsepWrapAsync(
+      Module['_OrtRun'],
+      () => Module['_OrtRun'],
+      v => Module['_OrtRun'] = v));
+  Module['_OrtRunWithBinding'] = runAsync(jsepWrapAsync(
+      Module['_OrtRunWithBinding'],
+      () => Module['_OrtRunWithBinding'],
+      v => Module['_OrtRunWithBinding'] = v));
+  Module['_OrtBindInput'] = jsepWrapAsync(
+      Module['_OrtBindInput'],
+      () => Module['_OrtBindInput'],
+      v => Module['_OrtBindInput'] = v);
+
+  // expose webgpu backend functions
+  Module['jsepRegisterBuffer'] = (sessionId, index, buffer, size) => {
+    return backend['registerBuffer'](sessionId, index, buffer, size);
+  };
+  Module['jsepUnregisterBuffers'] = sessionId => {
+    backend['unregisterBuffers'](sessionId);
+  };
+  Module['jsepGetBuffer'] = (dataId) => {
+    return backend['getBuffer'](dataId);
+  };
+  Module['jsepCreateDownloader'] = (gpuBuffer, size, type) => {
+    return backend['createDownloader'](gpuBuffer, size, type);
   };
 };
diff --git a/orttraining/orttraining/core/agent/training_agent.cc b/orttraining/orttraining/core/agent/training_agent.cc
index 3b701fa8bf577..0b38a79cc21c9 100644
--- a/orttraining/orttraining/core/agent/training_agent.cc
+++ b/orttraining/orttraining/core/agent/training_agent.cc
@@ -1,11 +1,17 @@
 ﻿// Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#include <map>
+#include <memory>
+#include <utility>
+#include <string>
+
 #include "orttraining/core/agent/training_agent.h"
 #include "core/framework/utils.h"
 #include "core/framework/feeds_fetches_manager.h"
 #include "core/framework/partial_graph_execution_state.h"
 #include "core/framework/stream_execution_context.h"
+#include "orttraining/core/optimizer/memory_optimizer/memory_insight.h"
 
 namespace onnxruntime {
 namespace training {
@@ -25,7 +31,8 @@ TrainingAgent::TrainingAgent(InferenceSession& session,
   std::vector<std::string> bw_feed_names;
 
   size_t break_point = 0;
-  auto& training_node_execution_order = session_state.GetGraphViewer().GetNodesInTopologicalOrder(session.GetSessionOptions().execution_order);
+  auto& training_node_execution_order = session_state.GetGraphViewer().GetNodesInTopologicalOrder(
+      session.GetSessionOptions().execution_order);
   for (auto node_index : training_node_execution_order) {
     if (session_state.GetKernel(node_index)->KernelDef().OpName() == "YieldOp") {
       auto& node = *(session_state.GetGraphViewer().GetGraph().GetNode(node_index));
@@ -89,7 +96,8 @@ void TrainingAgent::CreateAndInitializeFeedsFetchesManager(const SessionState& s
                                                            const std::vector<std::string>& feed_names,
                                                            const std::vector<std::string>& fetches_names,
                                                            const std::vector<OrtDevice>& outputs_device_info,
-                                                           std::unique_ptr<FeedsFetchesManager>& feeds_fetches_manager) {
+                                                           std::unique_ptr<FeedsFetchesManager>&
+                                                               feeds_fetches_manager) {
   ORT_THROW_IF_ERROR(FeedsFetchesManager::Create(feed_names, fetches_names, session_state.GetOrtValueNameIdxMap(),
                                                  feeds_fetches_manager));
   auto& fetch_info = feeds_fetches_manager->GetMutableFetchesDeviceCopyInfo();
@@ -100,5 +108,23 @@ void TrainingAgent::CreateAndInitializeFeedsFetchesManager(const SessionState& s
   ORT_ENFORCE(utils::InitializeFeedFetchCopyInfo(session_state, *feeds_fetches_manager) == Status::OK());
 }
 
+std::string TrainingAgent::GetSerializedORTModuleMemoryStat(std::string_view memory_optimization_config,
+                                                            std::string_view recompute_probe_level,
+                                                            std::map<std::string, std::pair<std::string, int>>&
+                                                                cluster_id_combinations_to_saved_symbolic_byte_map)
+    const {
+  auto& session_state = inference_session_.GetSessionState();
+  const OrtValueNameIdxMap& ortvalue_name_to_idx_map = session_state.GetOrtValueNameIdxMap();
+  const SequentialExecutionPlan& p_seq_exec_plan = *session_state.GetExecutionPlan();
+  return optimizer::memory_optimizer::GetSerializedORTModuleMemoryStat(
+      session_state.GetGraphViewer(),
+      memory_optimization_config,
+      recompute_probe_level,
+      *inference_session_.GetLogger(),
+      cluster_id_combinations_to_saved_symbolic_byte_map,
+      &ortvalue_name_to_idx_map,
+      &p_seq_exec_plan);
+}
+
 }  // namespace training
 }  // namespace onnxruntime
diff --git a/orttraining/orttraining/core/agent/training_agent.h b/orttraining/orttraining/core/agent/training_agent.h
index b12f5e6d75ef1..37e5272f66e32 100644
--- a/orttraining/orttraining/core/agent/training_agent.h
+++ b/orttraining/orttraining/core/agent/training_agent.h
@@ -5,11 +5,15 @@
 
 #include <thread>
 #include <future>
+#include <map>
+#include <utility>
+#include <string>
 
 #include "core/common/common.h"
 #include "core/common/logging/logging.h"
 #include "core/framework/framework_common.h"
 #include "core/session/inference_session.h"
+#include "orttraining/core/optimizer/memory_optimizer/memory_insight.h"
 
 namespace onnxruntime {
 struct PartialGraphExecutionState;
@@ -45,6 +49,11 @@ class TrainingAgent {
                                               const std::vector<OrtDevice>& outputs_device_info,
                                               std::unique_ptr<FeedsFetchesManager>& feeds_fetches_manager);
 
+  std::string GetSerializedORTModuleMemoryStat(std::string_view memory_optimization_config,
+                                               std::string_view recompute_probe_level,
+                                               std::map<std::string, std::pair<std::string, int>>&
+                                                   cluster_id_combinations_to_saved_symbolic_byte_map) const;
+
  private:
   // TrainingAgent runs on a InferenceSession under the hood
   InferenceSession& inference_session_;
diff --git a/orttraining/orttraining/core/framework/gradient_graph_builder.h b/orttraining/orttraining/core/framework/gradient_graph_builder.h
index 8068d4825cee3..93ba836b53e22 100644
--- a/orttraining/orttraining/core/framework/gradient_graph_builder.h
+++ b/orttraining/orttraining/core/framework/gradient_graph_builder.h
@@ -70,6 +70,7 @@ static std::unordered_map<std::string, std::unordered_set<size_t>>
         {"Split", {1}},
         {"Clip", {1, 2}},
         {"Pad", {1, 2}},
+        {"MatMulBnb4", {1, 2}},  // quantified weight (non float) and absmax constant don't need gradients.
         {"Multinomial", {0}},
         {"RandomNormalLike", {0}},
         {"RandomUniformLike", {0}},
diff --git a/orttraining/orttraining/core/framework/torch/custom_function_register.cc b/orttraining/orttraining/core/framework/torch/custom_function_register.cc
index 2bf0be1d719c2..1a51da3daa27f 100644
--- a/orttraining/orttraining/core/framework/torch/custom_function_register.cc
+++ b/orttraining/orttraining/core/framework/torch/custom_function_register.cc
@@ -95,6 +95,16 @@ void OrtTorchFunctionPool::RegisterTorchAutogradFunction(
   RegisterEntry(mutex_, key, backward.get(), backward_core_pool_);
 }
 
+void OrtTorchFunctionPool::RegisterShapeInferenceFunction(const std::string& key,
+                                                          PyObject* obj) {
+  RegisterEntry(mutex_, key, obj, shape_inference_function_pool_);
+}
+
+void OrtTorchFunctionPool::RegisterInputAliasFunction(const std::string& key,
+                                                      PyObject* obj) {
+  RegisterEntry(mutex_, key, obj, input_alias_function_pool_);
+}
+
 static void RegisterEntry(
     std::mutex& mutex,
     PyObject* obj,
@@ -153,6 +163,26 @@ PyObject* OrtTorchFunctionPool::GetBackwardCore(const std::string& key) {
   return iter->second.get();
 }
 
+std::optional<PyObject*> OrtTorchFunctionPool::TryGettingShapeInferenceFunction(const std::string& key) {
+  ORT_ENFORCE(!key.empty(), "Cannot be empty string.");
+  std::lock_guard<std::mutex> lock(mutex_);
+  auto iter = shape_inference_function_pool_.find(key);
+  if (iter != shape_inference_function_pool_.end()) {
+    return iter->second.get();
+  }
+  return std::nullopt;
+}
+
+std::optional<PyObject*> OrtTorchFunctionPool::TryGettingInputAliasFunction(const std::string& key) {
+  ORT_ENFORCE(!key.empty(), "Cannot be empty string.");
+  std::lock_guard<std::mutex> lock(mutex_);
+  auto iter = input_alias_function_pool_.find(key);
+  if (iter != input_alias_function_pool_.end()) {
+    return iter->second.get();
+  }
+  return std::nullopt;
+}
+
 void OrtTorchFunctionPool::RegisterMiscellaneousConstInput(PyObject* obj) {
   ORT_ENFORCE(obj, "Cannot register NULL reference input.");
   const void* address = static_cast<const void*>(obj);
@@ -205,6 +235,8 @@ void OrtTorchFunctionPool::UnRegisterGlobalFunctions() {
 void OrtTorchFunctionPool::UnRegisterModelSpecificFunctions() {
   forward_core_pool_.clear();
   backward_core_pool_.clear();
+  shape_inference_function_pool_.clear();
+  input_alias_function_pool_.clear();
   miscellaneous_const_input_pool_.clear();
 }
 
diff --git a/orttraining/orttraining/core/framework/torch/custom_function_register.h b/orttraining/orttraining/core/framework/torch/custom_function_register.h
index 0dea6d036a6bd..d51cc7dadc1af 100644
--- a/orttraining/orttraining/core/framework/torch/custom_function_register.h
+++ b/orttraining/orttraining/core/framework/torch/custom_function_register.h
@@ -34,6 +34,16 @@ class OrtTorchFunctionPool final {
   //  2. Caller of GetBackwardCore should not decrease the reference count of the returned object.
   PyObject* GetBackwardCore(const std::string& key);  // The "key" is the "name" attribute in PythonOpGrad.
 
+  // Shape inference function is used to infer output shape of a PythonOp.
+  void RegisterShapeInferenceFunction(const std::string& key, PyObject* obj);
+  // Return a borrowed reference to the stored Python function, if it exists; otherwise, return nullptr.
+  std::optional<PyObject*> TryGettingShapeInferenceFunction(const std::string& key);
+
+  // Input alias function is used to infer memory reuse map of a PythonOp.
+  void RegisterInputAliasFunction(const std::string& key, PyObject* obj);
+  // Return a borrowed reference to the stored Python function, if it exists; otherwise, return nullptr.
+  std::optional<PyObject*> TryGettingInputAliasFunction(const std::string& key);
+
   // Autograd function may take input of "non-tensor && non int/float && non int/float tuple" types.
   // While PythonOp running requires those inputs be there otherwise kernel execution will fail.
   // So during model exporting, we need register those input with this API, then a ref cnt is increased by 1,
@@ -92,6 +102,9 @@ class OrtTorchFunctionPool final {
 
   std::unordered_map<std::string, PythonObjectPtr> forward_core_pool_;
   std::unordered_map<std::string, PythonObjectPtr> backward_core_pool_;
+  std::unordered_map<std::string, PythonObjectPtr> shape_inference_function_pool_;
+  std::unordered_map<std::string, PythonObjectPtr> input_alias_function_pool_;
+
   std::unordered_map<std::string, PythonObjectPtr> miscellaneous_const_input_pool_;
   std::unordered_map<int64_t, PythonObjectPtr> func_context_pool_;
 
diff --git a/orttraining/orttraining/core/framework/torch/torch_proxy.cc b/orttraining/orttraining/core/framework/torch/torch_proxy.cc
index 377f564a00337..f36f913366a37 100644
--- a/orttraining/orttraining/core/framework/torch/torch_proxy.cc
+++ b/orttraining/orttraining/core/framework/torch/torch_proxy.cc
@@ -10,9 +10,7 @@
 #include "orttraining/core/framework/torch/gil.h"
 #include "core/platform/env.h"
 
-namespace onnxruntime {
-namespace language_interop_ops {
-namespace torch {
+namespace onnxruntime::language_interop_ops::torch {
 
 void PythonObjectDeleter(PyObject* ptr) { Py_XDECREF(ptr); };
 
@@ -130,6 +128,18 @@ PyObject* CreateRequiresGradFlags(
   return flags;
 }
 
+PyObject* CreateInplaceMap(
+    const std::vector<int64_t>& inplace_map) {
+  PyObject* inplace_map_obj = Ort_PyList_New(inplace_map.size(), "inplace_map");
+
+  for (size_t output_index = 0; output_index < inplace_map.size(); ++output_index) {
+    PyObject* input_index = PyLong_FromLong(inplace_map[output_index]);
+    Ort_PyList_SetItem_NoIncref(inplace_map_obj, output_index, input_index, std::to_string(__LINE__));
+  }
+
+  return inplace_map_obj;
+}
+
 void InvokeRunner(
     PyObject* callback_runner,
     PyObject* args,
@@ -197,14 +207,15 @@ PythonObjectPtr CreatePythonCallArguments(
     const std::vector<void*>& obj_args,
     const std::vector<int64_t>& obj_indices,
     const bool is_training_mode,
-    const bool is_inplace,
-    const std::string& invoke_id) {
+    const std::vector<int64_t>& inplace_map,
+    const std::string& invoke_id,
+    const std::string& func_name) {
   ORT_ENFORCE(PyCallable_Check(callback), "Forward callback is not callable.");
   // The number of variables before those of
   // autograd.Function.apply and autograd.Function.backward.
   // The extra variables are used to configure the launch
   // forward and backward runners.
-  constexpr int64_t num_control_args = 6;
+  constexpr int64_t num_control_args = 7;
 
   // All arguments created for Python call will be destroyed along with PythonObjectPtr.
   PythonObjectPtr args(Ort_PyTuple_New(num_control_args + len, "forward_arguments_tuple"), PythonObjectDeleter);
@@ -216,11 +227,16 @@ PythonObjectPtr CreatePythonCallArguments(
   Ort_PyTuple_SetItem_NoIncref(args.get(), 2, tensor_flags, "tensor_flags");
   PyObject* is_training_mode_arg = is_training_mode ? Py_True : Py_False;
   Ort_PyTuple_SetItem_Incref(args.get(), 3, is_training_mode_arg, "is_training_mode");
-  PyObject* is_inplace_arg = is_inplace ? Py_True : Py_False;
-  Ort_PyTuple_SetItem_Incref(args.get(), 4, is_inplace_arg, "is_inplace_mode");
+
+  PyObject* inplace_map_arg = CreateInplaceMap(inplace_map);
+  Ort_PyTuple_SetItem_NoIncref(args.get(), 4, inplace_map_arg, "inplace_map");
+
   PyObject* kernel_invoke_id_arg = PyBytes_FromStringAndSize(invoke_id.c_str(), invoke_id.size());
   Ort_PyTuple_SetItem_NoIncref(args.get(), 5, kernel_invoke_id_arg, "kernel_invoke_id_arg");
 
+  PyObject* func_name_arg = PyBytes_FromStringAndSize(func_name.c_str(), func_name.size());
+  Ort_PyTuple_SetItem_NoIncref(args.get(), 6, func_name_arg, "func_name_arg");
+
   // Tensor inputs to call autograd.Function.apply or autograd.Function.backward.
   for (size_t i = 0; i < tensor_args.size(); ++i) {
     if (!tensor_args[i].has_value()) {
@@ -246,6 +262,7 @@ PythonObjectPtr CreatePythonCallArguments(
 }
 
 void Invoke(
+    const std::string& func_name,
     PyObject* runner,
     PyObject* callback,
     const std::vector<int64_t>& requires_grads,
@@ -253,11 +270,11 @@ void Invoke(
     const std::vector<int64_t>& tensor_indices,
     const std::vector<void*>& obj_args,
     const std::vector<int64_t>& obj_indices,
-    void** diff_ctx,
-    std::vector<OrtValue>& returned_ortvalues,
     const bool is_training_mode,
-    const bool is_inplace,
-    const std::string& invoke_id) {
+    const std::vector<int64_t>& inplace_map,
+    const std::string& invoke_id,
+    void** diff_ctx,
+    std::vector<OrtValue>& returned_ortvalues) {
   const auto len = tensor_args.size() + obj_args.size();
   CheckArguments(len, requires_grads, tensor_args, tensor_indices, obj_args, obj_indices);
   RefCountTracker::GetInstance().Reset();
@@ -271,8 +288,9 @@ void Invoke(
         obj_args,
         obj_indices,
         is_training_mode,
-        is_inplace,
-        invoke_id);
+        inplace_map,
+        invoke_id,
+        func_name);
 
     RefCountTracker::GetInstance().DumpDetails("Before Invoke Python Call");
     InvokeRunner(runner, args.get(), is_training_mode, diff_ctx, returned_ortvalues);
@@ -282,17 +300,18 @@ void Invoke(
 }
 
 void TorchProxy::Forward(
+    const std::string& func_name,
     void* callback,
     const std::vector<int64_t>& requires_grads,
     const std::vector<std::optional<OrtValue>>& tensor_args,
     const std::vector<int64_t>& tensor_indices,
     const std::vector<void*>& obj_args,
     const std::vector<int64_t>& obj_indices,
-    void** diff_ctx,
-    std::vector<OrtValue>& returned_ortvalues,
     const bool is_training_mode,
-    const bool is_inplace,
-    const std::string& invoke_id) {
+    const std::vector<int64_t>& inplace_map,
+    const std::string& invoke_id,
+    void** diff_ctx,
+    std::vector<OrtValue>& returned_ortvalues) {
   // Semantically, this lock uniquely takes the ownership of TorchProxy
   // so that there will be only one of TorchProxy::Forward TorchProxy::Backward
   // can be run at one time.
@@ -301,6 +320,7 @@ void TorchProxy::Forward(
   GilGuard guard;
   auto runner = OrtTorchFunctionPool::GetInstance().GetForwardRunner();
   Invoke(
+      func_name,
       runner,
       reinterpret_cast<PyObject*>(callback),
       requires_grads,
@@ -308,22 +328,23 @@ void TorchProxy::Forward(
       tensor_indices,
       obj_args,
       obj_indices,
-      diff_ctx,
-      returned_ortvalues,
       is_training_mode,
-      is_inplace,
-      invoke_id);
+      inplace_map,
+      invoke_id,
+      diff_ctx,
+      returned_ortvalues);
 }
 
 void TorchProxy::Backward(
+    const std::string& func_name,
     void* callback,
     const std::vector<std::optional<OrtValue>>& tensor_args,
     const std::vector<int64_t>& tensor_indices,
     const std::vector<void*>& obj_args,
     const std::vector<int64_t>& obj_indices,
-    std::vector<OrtValue>& returned_ortvalues,
-    const bool is_inplace,
-    const std::string& invoke_id) {
+    const std::vector<int64_t>& inplace_map,
+    const std::string& invoke_id,
+    std::vector<OrtValue>& returned_ortvalues) {
   // Semantically, this lock uniquely takes the ownership of TorchProxy
   // so that there will be only one of TorchProxy::Forward TorchProxy::Backward
   // can be run at one time.
@@ -336,6 +357,7 @@ void TorchProxy::Backward(
   const auto all_input_count = tensor_args.size() + obj_args.size();
   const std::vector<int64_t> requires_grads(all_input_count, 0);
   Invoke(
+      func_name,
       runner,
       reinterpret_cast<PyObject*>(callback),
       requires_grads,
@@ -343,12 +365,61 @@ void TorchProxy::Backward(
       tensor_indices,
       obj_args,
       obj_indices,
-      nullptr /* context to store */,
-      returned_ortvalues,
       true /* is_training_mode */,
-      is_inplace,
-      invoke_id);
+      inplace_map,
+      invoke_id,
+      nullptr /* context to store */,
+      returned_ortvalues);
+}
+
+void TorchProxy::RunInputAliasFunction(
+    void* input_alias_function,
+    const std::string& node_proto_str,
+    std::vector<int64_t>& fw_output_to_input_alias_map,
+    std::vector<int64_t>& bw_output_to_input_alias_map) {
+  PyObject* input_alias_func = reinterpret_cast<PyObject*>(input_alias_function);
+  ORT_ENFORCE(PyCallable_Check(input_alias_func), "input_alias_func is not callable.");
+
+  // All arguments created for Python call will be destroyed along with PythonObjectPtr.
+  PythonObjectPtr args(Ort_PyTuple_New(1, "input_alias_func_arguments_tuple"), PythonObjectDeleter);
+  PyObject* node_proto_ptr_arg = PyBytes_FromStringAndSize(node_proto_str.c_str(), node_proto_str.size());
+  Ort_PyTuple_SetItem_NoIncref(args.get(), 0, node_proto_ptr_arg, "node_proto_ptr_arg");
+
+  PythonObjectPtr result_ptr(PyObject_CallObject(input_alias_func, args.get()), PythonObjectDeleter);
+  if (PyErr_Occurred()) {
+    PyErr_Print();
+    ORT_THROW("Python function execution fails with the above information.");
+  }
+
+  bool is_tuple = PyTuple_Check(result_ptr.get());
+  bool is_list = PyList_Check(result_ptr.get());
+  ORT_ENFORCE(is_tuple || is_list, "Python function must return a tuple or a list. is_tuple: ",
+              is_tuple, ", is_list: ", is_list);
+  Py_ssize_t ret_tuple_size =
+      is_tuple ? PyTuple_Size(result_ptr.get()) : PyList_Size(result_ptr.get());
+  ORT_ENFORCE(ret_tuple_size == 2, "Input alias function must return a tuple/list of size 2.");
+
+  for (Py_ssize_t tuple_index = 0; tuple_index < ret_tuple_size; ++tuple_index) {
+    PyObject* alias_map = is_tuple ? PyTuple_GetItem(result_ptr.get(), tuple_index)
+                                   : PyList_GetItem(result_ptr.get(), tuple_index);
+
+    std::vector<int64_t>& output_to_input_alias_map =
+        tuple_index == 0 ? fw_output_to_input_alias_map : bw_output_to_input_alias_map;
+
+    bool is_elem_tuple = PyTuple_Check(alias_map);
+    bool is_elem_list = PyList_Check(alias_map);
+
+    ORT_ENFORCE(is_elem_tuple || is_elem_list, "Input alias map must be a tuple or a list. is_elem_list: ",
+                is_elem_list, ", is_elem_tuple: ", is_elem_tuple);
+    Py_ssize_t output_count = is_elem_tuple ? PyTuple_Size(alias_map) : PyList_Size(alias_map);
+    for (Py_ssize_t output_index = 0; output_index < output_count; ++output_index) {
+      PyObject* input_index =
+          is_elem_tuple ? PyTuple_GetItem(alias_map, output_index) : PyList_GetItem(alias_map, output_index);
+      ORT_ENFORCE(PyLong_Check(input_index), "Alias input index must be an integer.");
+      int64_t alias_index_int = PyLong_AsLongLong(input_index);
+      output_to_input_alias_map.push_back(alias_index_int);
+    }
+  }
 }
-}  // namespace torch
-}  // namespace language_interop_ops
-}  // namespace onnxruntime
+
+}  // namespace onnxruntime::language_interop_ops::torch
diff --git a/orttraining/orttraining/core/framework/torch/torch_proxy.h b/orttraining/orttraining/core/framework/torch/torch_proxy.h
index 189efc772a62c..1d5cc1dd69095 100644
--- a/orttraining/orttraining/core/framework/torch/torch_proxy.h
+++ b/orttraining/orttraining/core/framework/torch/torch_proxy.h
@@ -2,8 +2,11 @@
 // Licensed under the MIT License.
 
 #pragma once
+
 #include <mutex>
 #include <optional>
+#include <string>
+#include <vector>
 #include "orttraining/core/framework/torch/python_common.h"
 
 #ifndef SHARED_PROVIDER
@@ -37,27 +40,57 @@ class TorchProxy {
   };
 
   void Forward(
+      const std::string& func_name,
       void* callback,
       const std::vector<int64_t>& requires_grads,
       const std::vector<std::optional<OrtValue>>& tensor_args,
       const std::vector<int64_t>& tensor_indices,
       const std::vector<void*>& obj_args,
       const std::vector<int64_t>& obj_indices,
-      void** diff_ctx,
-      std::vector<OrtValue>& returned_ortvalues,
       const bool is_training_mode,
-      const bool is_inplace,
-      const std::string& invoke_id);
+      const std::vector<int64_t>& inplace_map,
+      const std::string& invoke_id,
+      void** diff_ctx,
+      std::vector<OrtValue>& returned_ortvalues);
 
   void Backward(
+      const std::string& func_name,
       void* callback,
       const std::vector<std::optional<OrtValue>>& tensor_args,
       const std::vector<int64_t>& tensor_indices,
       const std::vector<void*>& obj_args,
       const std::vector<int64_t>& obj_indices,
-      std::vector<OrtValue>& return_args,
-      const bool is_inplace,
-      const std::string& invoke_id);
+      const std::vector<int64_t>& inplace_map,
+      const std::string& invoke_id,
+      std::vector<OrtValue>& return_args);
+
+  /**
+   * @brief Run given function to get output to input reuse map.
+   *
+   * @param input_alias_func Python function to run.
+   *  The function should take a serialized PythonOp NodeProto string as input, return a tuple of two lists.
+   *  The signature of the function should be:
+   *     def alias_input(node_proto_str: str):
+   *         fw_alias_map = [1, -1, -1]
+   *         bw_alias_map = [-1, 0]
+   *         return fw_alias_map, bw_alias_map
+   * @param node_proto_str The serialized PythonOp NodeProto string.
+   * @param fw_output_to_input_alias_map Used as returned value, return the output to input alias map for forward pass.
+   *   For example, if the inputs of the torch.autograd.Function are [non_tensor_a, tensor_b],
+   *   outputs are [tensor_x, tensor_y, tensor_z], and the alias map is [1, -1, -1], this is explained as:
+   *   tensor_x is reusing the input tensor_b, tensor_y and tensor_z are not reusing any input.
+   *   The value of alias map is 0 based input index. -1 means the output is not reusing any input.
+   * @param bw_output_to_input_alias_map Used as returned value, return the output to input alias map for backward pass.
+   *   For example, if the inputs of the torch.autograd.Function are [tensor_x_grad, None, None],
+   *   outputs are [None, tensor_b_grad], and the alias map is [-1, 0], this is explained as:
+   *   tensor_b_grad is reusing the input tensor_x_grad.
+   *   The value of alias map is 0 based grad input index. -1 means the output is not reusing any input.
+   */
+  void RunInputAliasFunction(
+      void* input_alias_func,
+      const std::string& node_proto_str,
+      std::vector<int64_t>& fw_output_to_input_alias_map,
+      std::vector<int64_t>& bw_output_to_input_alias_map);
 
  private:
   TorchProxy(){};
diff --git a/orttraining/orttraining/core/graph/gradient_builder.cc b/orttraining/orttraining/core/graph/gradient_builder.cc
index a14f849958fa7..755a8e49d9d12 100755
--- a/orttraining/orttraining/core/graph/gradient_builder.cc
+++ b/orttraining/orttraining/core/graph/gradient_builder.cc
@@ -494,6 +494,42 @@ IMPLEMENT_GRADIENT_BUILDER(GetGemmGradient) {
   return result;
 }
 
+IMPLEMENT_GRADIENT_BUILDER(GetMatmulBnb4Gradient) {
+  auto attributes = SrcNodeAttributes();
+  std::vector<AttributeProto> attrs;
+  bool find_transB = false;
+  for (auto& attr : attributes) {
+    if (attr.first == "transB") {
+      int64_t transB_value = attr.second.i();
+      transB_value = (transB_value + 1) % 2;  // revert the transpose
+      attrs.push_back(MakeAttribute("transB", transB_value));
+      find_transB = true;
+    } else {
+      attrs.push_back(attr.second);
+    }
+  }
+
+  if (!find_transB) {
+    attrs.push_back(MakeAttribute("transB", int64_t(0)));  // default is 1, so we need to set it to 0
+  }
+
+  std::vector<NodeDef> result;
+  // Y =  A * B
+  // dA = dY * B', dB = A' * dY
+  if (IsGradientRequiredForSrcNodeInput(0)) {
+    // B is 1-D, so don't need transpose here.
+    result.push_back(NodeDef(OpDef{"MatMulBnb4", kMSDomain, 1},
+                             {GO(0), I(1), I(2)},
+                             {GI(0)},
+                             attrs));
+  }
+
+  ORT_ENFORCE(!IsGradientRequiredForSrcNodeInput(1), "Gradient propagation to B is not supported yet.");
+  ORT_ENFORCE(!IsGradientRequiredForSrcNodeInput(2), "Gradient propagation to absmax is not supported yet.");
+
+  return result;
+}
+
 IMPLEMENT_GRADIENT_BUILDER(GetSplitGradient) {
   std::vector<NodeDef> result = {};
   std::vector<ArgDef> input_args;
@@ -755,13 +791,16 @@ IMPLEMENT_GRADIENT_BUILDER(GetGatherGradient) {
 
 IMPLEMENT_GRADIENT_BUILDER(GetPadAndUnflattenGradient) {
   return std::vector<NodeDef>{
-      NodeDef(OpDef("Reshape"),
-              {GO(0), O(1)},
-              {IA("GO_reshaped")}),
-      NodeDef(OpDef{"Gather", kOnnxDomain, 1},
-              {IA("GO_reshaped"), I(1)},
-              {GI(0)},
-              SrcNodeAttributes())};
+      NodeDef(OpDef{"FlattenAndUnpad", kMSDomain, 1},
+              {GO(0), I(1)},
+              {GI(0), IA("Unflatten_dims")})};
+}
+
+IMPLEMENT_GRADIENT_BUILDER(GetFlattenAndUnpadGradient) {
+  return std::vector<NodeDef>{
+      NodeDef(OpDef{"PadAndUnflatten", kMSDomain, 1},
+              {GO(0), I(1), O(1)},
+              {GI(0)})};
 }
 
 IMPLEMENT_GRADIENT_BUILDER(GetShrunkenGatherGradient) {
@@ -1765,7 +1804,6 @@ IMPLEMENT_GRADIENT_BUILDER(GetPythonOpGradient) {
   ORT_ENFORCE(utils::HasString(src_attrs.at("func_name")));
   attrs.push_back(MakeAttribute("func_name", src_attrs.at("func_name").s()));
   attrs.push_back(MakeAttribute("output_convention", src_attrs.at("input_convention").s()));
-  attrs.push_back(MakeAttribute("inplace", src_attrs.at("inplace").i()));
 
   // input_tensor_types[i] store the type of autograd.Function.apply's ith output.
   // Note that PythonOpGrad's 0-th input is the Python context generated by PythonOp.
@@ -1849,6 +1887,14 @@ IMPLEMENT_GRADIENT_BUILDER(GetPythonOpGradient) {
               "PythonOpGrad requiring gradient output count mismatch.");
   attrs.push_back(MakeAttribute("output_tensor_requires_grads", bw_tensor_output_requires_grads));
 
+  // Copy bw_tensor_reuse_map attribute from PythonOp to PythonOpGrad if it is present.
+  auto attr_it = src_attrs.find("bw_tensor_reuse_map");
+  if (attr_it != src_attrs.end()) {
+    std::vector<int64_t> tensor_output_to_tensor_input_reuse_map(attr_it->second.ints().begin(),
+                                                                 attr_it->second.ints().end());
+    attrs.push_back(MakeAttribute("tensor_reuse_map", tensor_output_to_tensor_input_reuse_map));
+  }
+
   if (src_attrs.find("comment") != src_attrs.end() && utils::HasString(src_attrs.at("comment"))) {
     attrs.push_back(MakeAttribute("comment", src_attrs.at("comment").s()));
   }
@@ -2140,5 +2186,13 @@ IMPLEMENT_GRADIENT_BUILDER(GetScaledSumGradient) {
   ORT_THROW("ScaledSum gradient builder does not support ", input_count, " inputs");
 }
 
+IMPLEMENT_GRADIENT_BUILDER(GetResizeGradient) {
+  return std::vector<NodeDef>{
+      NodeDef(OpDef{"ResizeGrad", kMSDomain, 1},
+              {GO(0), I(0), I(1), I(2)},
+              {GI(0)},
+              SrcNodeAttributes())};
+}
+
 }  // namespace training
 }  // namespace onnxruntime
diff --git a/orttraining/orttraining/core/graph/gradient_builder.h b/orttraining/orttraining/core/graph/gradient_builder.h
index a517e8af13fcc..92bfae9cd83a4 100755
--- a/orttraining/orttraining/core/graph/gradient_builder.h
+++ b/orttraining/orttraining/core/graph/gradient_builder.h
@@ -40,6 +40,7 @@ DECLARE_GRADIENT_BUILDER(GetAveragePoolGradient)
 DECLARE_GRADIENT_BUILDER(GetMaxPoolGradient)
 DECLARE_GRADIENT_BUILDER(GetGatherGradient)
 DECLARE_GRADIENT_BUILDER(GetPadAndUnflattenGradient)
+DECLARE_GRADIENT_BUILDER(GetFlattenAndUnpadGradient)
 DECLARE_GRADIENT_BUILDER(GetShrunkenGatherGradient)
 DECLARE_GRADIENT_BUILDER(GetConvGradient)
 DECLARE_GRADIENT_BUILDER(GetUnsqueezeGradient)
@@ -54,6 +55,7 @@ DECLARE_GRADIENT_BUILDER(GetSoftmaxCrossEntropyLossGradient)
 DECLARE_GRADIENT_BUILDER(GetSoftmaxCrossEntropyLossInternalGradient)
 DECLARE_GRADIENT_BUILDER(GetGlobalAveragePoolGradient)
 DECLARE_GRADIENT_BUILDER(GetGemmGradient)
+DECLARE_GRADIENT_BUILDER(GetMatmulBnb4Gradient)
 DECLARE_GRADIENT_BUILDER(GetDropoutGradient)
 DECLARE_GRADIENT_BUILDER(GetGatherNDGradient)
 DECLARE_GRADIENT_BUILDER(GetGatherElementsGradient)
@@ -90,6 +92,7 @@ DECLARE_GRADIENT_BUILDER(GetGRUGradient)
 DECLARE_GRADIENT_BUILDER(GetReciprocalGradient)
 DECLARE_GRADIENT_BUILDER(GetLeakyReluGradient)
 DECLARE_GRADIENT_BUILDER(GetConvTransposeGradient)
+DECLARE_GRADIENT_BUILDER(GetResizeGradient)
 
 DECLARE_GRADIENT_BUILDER(GetExternalGradient)
 
diff --git a/orttraining/orttraining/core/graph/gradient_builder_registry.cc b/orttraining/orttraining/core/graph/gradient_builder_registry.cc
index 4062b5d097394..ea56be9e6dfa3 100755
--- a/orttraining/orttraining/core/graph/gradient_builder_registry.cc
+++ b/orttraining/orttraining/core/graph/gradient_builder_registry.cc
@@ -68,9 +68,11 @@ void GradientBuilderRegistry::RegisterGradientBuilders() {
   REGISTER_GRADIENT_BUILDER("Reshape", GetReshapeGradient);
   REGISTER_GRADIENT_BUILDER("Transpose", GetTransposeGradient);
   REGISTER_GRADIENT_BUILDER("Gemm", GetGemmGradient);
+  REGISTER_GRADIENT_BUILDER("MatMulBnb4", GetMatmulBnb4Gradient);
   REGISTER_GRADIENT_BUILDER("MaxPool", GetMaxPoolGradient);
   REGISTER_GRADIENT_BUILDER("Gather", GetGatherGradient);
   REGISTER_GRADIENT_BUILDER("PadAndUnflatten", GetPadAndUnflattenGradient);
+  REGISTER_GRADIENT_BUILDER("FlattenAndUnpad", GetFlattenAndUnpadGradient);
   REGISTER_GRADIENT_BUILDER("ShrunkenGather", GetShrunkenGatherGradient);
   REGISTER_GRADIENT_BUILDER("Conv", GetConvGradient);
   REGISTER_GRADIENT_BUILDER("Squeeze", GetSqueezeGradient);
@@ -122,6 +124,7 @@ void GradientBuilderRegistry::RegisterGradientBuilders() {
   REGISTER_GRADIENT_BUILDER("Reciprocal", GetReciprocalGradient);
   REGISTER_GRADIENT_BUILDER("LeakyRelu", GetLeakyReluGradient);
   REGISTER_GRADIENT_BUILDER("ConvTranspose", GetConvTransposeGradient);
+  REGISTER_GRADIENT_BUILDER("Resize", GetResizeGradient);
 
   REGISTER_GRADIENT_BUILDER("ExternalGradient", GetExternalGradient);
 };
diff --git a/orttraining/orttraining/core/graph/training_op_defs.cc b/orttraining/orttraining/core/graph/training_op_defs.cc
index 91b1df7b7cf2d..8d3f76be20c65 100644
--- a/orttraining/orttraining/core/graph/training_op_defs.cc
+++ b/orttraining/orttraining/core/graph/training_op_defs.cc
@@ -2193,7 +2193,7 @@ Example 4:
               OpSchema::Variadic)
       .TypeConstraint(
           "T",
-          OpSchema::all_tensor_types(),
+          OpSchema::all_tensor_types_ir4(),
           "Constrain input and output types to all tensor types.")
       .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
         for (int i = 0; i < static_cast<int>(ctx.getNumOutputs()); ++i) {
@@ -2270,7 +2270,7 @@ Example 4:
               OpSchema::Optional)
       .TypeConstraint(
           "T",
-          OpSchema::all_tensor_types(),
+          OpSchema::all_tensor_types_ir4(),
           "Constrain output types to any tensor type.")
       .TypeConstraint(
           "Tint",
@@ -3908,10 +3908,27 @@ Return true if all elements are true and false otherwise.
           AttributeProto::INTS)
       // Other attributes.
       .Attr(
-          "inplace",
-          "Indicate if the output should reuse input memory.",
-          AttributeProto::INT,
-          static_cast<int64_t>(0))
+          "tensor_reuse_map",
+          "A int array indicating whether output at each index is reusing specific input or not."
+          "If the given index is -1, it means the output is not reusing any input."
+          "For example, there are 2 tensor inputs and 3 tensor outputs (including ctx), "
+          "tensor_reuse_map = [-1, 1, 0] means"
+          "- the output 0 (ctx) don't reuse any input buffer."
+          "- the output 1 reuses the input 1."
+          "- the output 2 reuses the input 0.",
+          AttributeProto::INTS,
+          false)
+      .Attr(
+          "bw_tensor_reuse_map",
+          "Used for backward op only."
+          "A int array indicating whether output at each index is reusing specific input or now."
+          "If the given index is -1, it means the output is not reusing any input."
+          "For example, there are 3 inputs (including ctx) and 2 outputs, tensor_reuse_map = [2, 1] means"
+          "- the output 0 reuses the input 2."
+          "- the output 1 reuses the input 1."
+          "Be noted: the input 0 is ctx.",
+          AttributeProto::INTS,
+          false)
       .Attr(
           "training_mode",
           "Indicate if the model is exported in training_mode, by default, False.",
@@ -4033,11 +4050,6 @@ Return true if all elements are true and false otherwise.
           "func_name",
           "Name of custom class.",
           AttributeProto::STRING)
-      .Attr(
-          "inplace",
-          "Indicate if the output should reuse input memory. Todo(pengwa): do we need it?",
-          AttributeProto::INT,
-          static_cast<int64_t>(0))
       .Attr(
           "input_tensor_types",
           "Input types of autograd.Function.backward (including only tensor inputs)."
@@ -4069,6 +4081,16 @@ Return true if all elements are true and false otherwise.
           "A string inidicating autograd.Function.backward outputs's type."
           "value 'c' - non-tensor output; value 'd' - tensor output.",
           AttributeProto::STRING)
+      .Attr(
+          "tensor_reuse_map",
+          "A int array indicating whether output at each index is reusing specific input or not."
+          "If the given index is -1, it means the output is not reusing any input."
+          "For example, there are 3 inputs (including ctx) and 2 outputs, tensor_reuse_map = [2, 1] means"
+          "- the output 0 reuses the input 2."
+          "- the output 1 reuses the input 1."
+          "Be noted: the input 0 is ctx.",
+          AttributeProto::INTS,
+          false)
       .Attr(
           "comment",
           "comment only for debugging purposes.",
@@ -4158,6 +4180,7 @@ Return true if all elements are true and false otherwise.
       .Attr("func_name", "Function name of the Python Triton kernel.", AttributeProto::STRING, std::string(""))
       .Attr("onnx_key", "The hash key for the ONNX graph.", AttributeProto::INT, static_cast<int64_t>(0))
       .Attr("onnx_string", "The onnx string of the triton kernel.", AttributeProto::STRING, std::string(""))
+      .AllowUncheckedAttributes()
       .Input(0, "inputs",
              "Input tensors. If to call an existing Python Triton kernel, "
              "the input count and order should match the arguments of the function. If to compute an ONNX graph, "
@@ -4171,7 +4194,7 @@ Return true if all elements are true and false otherwise.
               "T", OpSchema::Variadic,
               /*is_homogeneous*/ false,
               /*min_arity*/ 1)
-      .TypeConstraint("T", OpSchema::all_tensor_types_with_bfloat(),
+      .TypeConstraint("T", OpSchema::all_tensor_types_ir4(),
                       "Allow inputs and outputs to be any kind of tensor.");
 #endif  // ENABLE_TRITON
 
@@ -4718,7 +4741,7 @@ Return true if all elements are true and false otherwise.
           "For other indices,  the corresponding value in output will be padded to zero."
 
           "The indices don't allow duplicated index values, otherwise, though there is no runtime check"
-          "(in case of performance concern), the behaviour of output is undefined."
+          "(in case of performance concern), the behavior of output is undefined."
 
           "An example:"
           "  input: [[1, 2, 3, 4], [5, 6, 7, 8]], shape is [2, 4]"
@@ -4726,14 +4749,12 @@ Return true if all elements are true and false otherwise.
           "  unflatten_dims: [2, 3], shape is [2]"
 
           "  output: [[[1, 2, 3, 4], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [5, 6, 7, 8]]],"
-          "  shape is [2, 3, 4]"
-          "  flatten_output_shape: [6, 4], shape is [2]")
+          "  shape is [2, 3, 4]")
       .Input(0, "input", "input data of rank N, shape is [d1, d2, ..., dN]", "T")
       .Input(1, "indices", "1D Tensor of int32/int64 indices, shape is [d1], each element's value ranges in [0, M1*M2).",
              "T_INDEX")
       .Input(2, "unflatten_dims", "1D tensor with two values, [M1, M2].", "T_INT")
       .Output(0, "output", "output data of rank N+1, [M1, M2, d2, ..., dN]", "T")
-      .Output(1, "flatten_output_shape", "1D tensor with output shape, [M1*M2, d2, ..., dN]", "T_INT")
       .TypeConstraint(
           "T_INT",
           {"tensor(int32)", "tensor(int64)"},
@@ -4747,6 +4768,26 @@ Return true if all elements are true and false otherwise.
           {"tensor(int32)", "tensor(int64)"},
           "Constrain indices to integer types");
 
+  ONNX_CONTRIB_OPERATOR_SCHEMA(FlattenAndUnpad)
+      .SetDomain(kMSDomain)
+      .SinceVersion(1)
+      .SetDoc(
+          "FlattenAndUnpad operator flattens the first two dims of input tensor, and unpad according to given indices."
+          "This is used by padding elimination graph transformer.")
+      .Input(0, "input", "input data of rank N + 1, shape is [M1, M2, d2, ..., dN]", "T")
+      .Input(1, "indices", "1D Tensor of int32/int64 indices, shape is [d1], each element's value ranges in [0, M1*M2).",
+             "T_INT")
+      .Output(0, "output", "output data of rank N, [d1, d2, ..., dN]", "T")
+      .Output(1, "unflatten_dims", "1D tensor with two values, [M1, M2].", "T_INT")
+      .TypeConstraint(
+          "T_INT",
+          {"tensor(int32)", "tensor(int64)"},
+          "Constrain indices and shape to integer tensors.")
+      .TypeConstraint(
+          "T",
+          {"tensor(int32)", "tensor(int64)", "tensor(float16)", "tensor(float)", "tensor(double)", "tensor(bfloat16)"},
+          "Constrain input and output types to float tensors.");
+
   ONNX_CONTRIB_OPERATOR_SCHEMA(GRUTraining)
       .SetDomain(kMSDomain)
       .SinceVersion(1)
@@ -4979,6 +5020,26 @@ Return true if all elements are true and false otherwise.
           "T",
           {"tensor(float16)", "tensor(float)", "tensor(double)"},
           "Constrain input and output types to float tensors.");
+
+  ONNX_CONTRIB_OPERATOR_SCHEMA(ResizeGrad)
+      .SetDomain(kMSDomain)
+      .SinceVersion(1)
+      .Input(0, "dY", "Gradient of output Y.", "T")
+      .Input(1, "X", "Input tensor to the Resize operator.", "T")
+      .Input(2, "roi", "The roi input to the Resize operator.", "T", OpSchema::Optional)
+      .Input(3, "scales", "The scales input to the Resize operator.", "tensor(float)", OpSchema::Optional)
+      .Output(0, "dX", "Gradient of the input X.", "T")
+      .AllowUncheckedAttributes()
+      .TypeConstraint(
+          "T",
+          {"tensor(float16)", "tensor(float)", "tensor(double)"},
+          "Constrain input and output types to float tensors.")
+      .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
+        propagateElemTypeFromInputToOutput(ctx, 1, 0);
+        if (hasInputShape(ctx, 1)) {
+          propagateShapeFromInputToOutput(ctx, 1, 0);
+        }
+      });
 }
 
 }  // namespace training
diff --git a/orttraining/orttraining/core/optimizer/compute_optimizer/padding_elimination.cc b/orttraining/orttraining/core/optimizer/compute_optimizer/padding_elimination.cc
index 74247c059cf84..2d75a02004ff2 100644
--- a/orttraining/orttraining/core/optimizer/compute_optimizer/padding_elimination.cc
+++ b/orttraining/orttraining/core/optimizer/compute_optimizer/padding_elimination.cc
@@ -129,91 +129,43 @@ NodeArg* InsertExpandForNodeInput(Graph& graph,
   return new_expand_node->MutableOutputDefs()[0];
 }
 
-// Insert Reshape + ShrunkenGather to flatten the in_index-th input of node.
+// Insert FlattenAndUnpad to flatten and unpad the in_index-th input of node.
 // The gather_index_arg is the indices of the elements that are not padding.
 NodeArg* InsertFlattenPatternForInput(Graph& graph,
                                       Node& node,
                                       uint32_t in_index,
                                       NodeArg* gather_index_arg,
                                       const logging::Logger& logger) {
-  InlinedVector<NodeArg*> reshape_input_args;
-  reshape_input_args.reserve(2);
-  reshape_input_args.push_back(node.MutableInputDefs()[in_index]);
-  std::vector<int64_t> new_shape;
-  new_shape.push_back(-1);  // only support flatten 0 and 1 dims
-  auto input_shape = node.InputDefs()[in_index]->Shape();
-  ORT_ENFORCE(input_shape->dim_size() >= 2);
-  ONNX_NAMESPACE::TensorShapeProto flattened_shape;
-  if (input_shape->dim(0).has_dim_value() && input_shape->dim(1).has_dim_value()) {
-    flattened_shape.add_dim()->set_dim_value(input_shape->dim(0).dim_value() * input_shape->dim(1).dim_value());
-  } else {
-    std::string token_dim_name = MakeString("total_token_count_", utils::GetRandomSeed());
-    flattened_shape.add_dim()->set_dim_param(token_dim_name);
-  }
-  for (int k = 2; k < input_shape->dim_size(); k++) {
-    ORT_ENFORCE(input_shape->dim(k).has_dim_value());
-    new_shape.push_back(input_shape->dim(k).dim_value());
-    flattened_shape.add_dim()->set_dim_value(input_shape->dim(k).dim_value());
-  }
-  ONNX_NAMESPACE::TensorProto new_shape_const_tensor;
-  new_shape_const_tensor.set_name(graph.GenerateNodeArgName("new_shape"));
-  new_shape_const_tensor.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
-  new_shape_const_tensor.add_dims(new_shape.size());
-  new_shape_const_tensor.set_raw_data(new_shape.data(), new_shape.size() * sizeof(int64_t));
-  NodeArg* new_shape_arg = &graph_utils::AddInitializer(graph, new_shape_const_tensor);
-  reshape_input_args.push_back(new_shape_arg);
-
-  InlinedVector<NodeArg*> reshape_output_args;
-  reshape_output_args.push_back(
-      &graph.GetOrCreateNodeArg(graph.GenerateNodeArgName("inputs_reshape_result"),
-                                node.MutableInputDefs()[in_index]->TypeAsProto()));
-
-  Node* new_reshape_node = InsertIntermediateNodeOnDestInput(
-      graph, node,
-      in_index,
-      0,
-      0,
-      graph.GenerateNodeName("Reshape"),
-      "Reshape",
-      "Reshape node to filter invalid tokens.",
-      reshape_input_args,
-      reshape_output_args,
-      {},
-      "",
-      logger);
+  InlinedVector<NodeArg*> unpad_input_args;
+  unpad_input_args.reserve(2);
+  unpad_input_args.push_back(node.MutableInputDefs()[in_index]);
+  unpad_input_args.push_back(gather_index_arg);
 
-  new_reshape_node->SetExecutionProviderType(node.GetExecutionProviderType());
-  auto reshape_out_arg = new_reshape_node->MutableOutputDefs()[0];
-
-  reshape_out_arg->SetShape(flattened_shape);
-
-  InlinedVector<NodeArg*> gather_input_args;
-  gather_input_args.reserve(2);
-  gather_input_args.push_back(reshape_output_args[0]);
-  gather_input_args.push_back(gather_index_arg);
-
-  InlinedVector<NodeArg*> gather_output_args;
-  gather_output_args.push_back(
+  InlinedVector<NodeArg*> unpad_output_args;
+  unpad_output_args.push_back(
       &graph.GetOrCreateNodeArg(graph.GenerateNodeArgName("padding_filter_result"),
-                                reshape_out_arg->TypeAsProto()));
+                                nullptr));
+  unpad_output_args.push_back(
+      &graph.GetOrCreateNodeArg(graph.GenerateNodeArgName("d1_d2_shape"),
+                                nullptr));
 
-  Node* new_gather_node = InsertIntermediateNodeOnDestInput(
+  Node* unpad_node = InsertIntermediateNodeOnDestInput(
       graph, node,
       in_index,
       0,
       0,
       graph.GenerateNodeName("PaddingFilter"),
-      "ShrunkenGather",
-      "ShrunkenGather node to filter invalid tokens.",
-      gather_input_args,
-      gather_output_args,
+      "FlattenAndUnpad",
+      "FlattenAndUnpad node to filter invalid tokens.",
+      unpad_input_args,
+      unpad_output_args,
       {},
       kMSDomain,
       logger);
 
-  new_gather_node->SetExecutionProviderType(node.GetExecutionProviderType());
-  auto gather_out_arg = new_gather_node->MutableOutputDefs()[0];
-  return gather_out_arg;
+  unpad_node->SetExecutionProviderType(node.GetExecutionProviderType());
+  auto unpad_out_arg = unpad_node->MutableOutputDefs()[0];
+  return unpad_out_arg;
 }
 
 // Insert PadAndUnflatten to unflatten the shape of the in_index-th input of node.
@@ -236,10 +188,6 @@ NodeArg* InsertNodesForOutput(Graph& graph,
   pad_node_output_args.push_back(
       &graph.GetOrCreateNodeArg(graph.GenerateNodeArgName("padded_result"),
                                 nullptr));
-  pad_node_output_args.push_back(
-      &graph.GetOrCreateNodeArg(graph.GenerateNodeArgName("padded_d1xd2_shape"),
-                                nullptr));
-
   Node* new_gathergrad_node = InsertIntermediateNodeOnDestInput(
       graph, node,
       in_index,
@@ -522,7 +470,8 @@ Status PaddingElimination::ApplyImpl(Graph& graph, bool& modified, int graph_lev
   // Get the first two dims value of input_ids which is [batch_size, seq_len]
   NodeArg* first_two_dims_arg = GetDimsValue(graph,
                                              input_ids_arg,
-                                             CreateInitializerFromVector(graph, {2}, {0, 1}, graph.GenerateNodeArgName("first_two_indices")),
+                                             CreateInitializerFromVector(graph, {2}, {0, 1},
+                                                                         graph.GenerateNodeArgName("first_two_indices")),
                                              *embedding_node);
 
   // Add flatten pattern to each input node of the subgraph
diff --git a/orttraining/orttraining/core/optimizer/conv1d_replacement.cc b/orttraining/orttraining/core/optimizer/conv1d_replacement.cc
new file mode 100644
index 0000000000000..0412000e04e1b
--- /dev/null
+++ b/orttraining/orttraining/core/optimizer/conv1d_replacement.cc
@@ -0,0 +1,164 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include <string>
+#include "core/optimizer/initializer.h"
+#include "orttraining/core/optimizer/conv1d_replacement.h"
+#include "core/graph/graph_utils.h"
+
+/*
+  In LoRA code, it will use conv1d to do projection for qkv,
+  while the conv1d calculation is mathematically equivalent to MatMul, and MatMul is much faster than conv1d in GPU.
+  The graph transformation is doing the following graph substitution:
+  1. The input graph is:
+  conv_input  conv_weight
+        \       /
+         \     /
+          conv1d
+
+  2. The output graph is as follows,
+     the number of MatMul is equal to attribute "group" of conv1d
+        conv_input   conv1d.group    conv_weight  conv1d.group
+          \          /                   \         /
+           \        /                   Squeeze   /
+            \      /                       \     /
+              Split                         Split
+          /   /  ... \                   /   /   ... \
+         /   /    ... \                 /   /     ... \
+        /   /      ... \               /   /       ... \
+    input0  input1 ... inputN     weight0 weight1  ... weightN
+        \      \          \           /    /           /
+          \       \          \       /    /          /
+            \       \          \   /     /         /
+              \       \          X      /        /
+                \       \       /  \   /        /
+                  \       \   /      X        /
+                    \       X       / \     /
+                      \   /   \   /     \  /
+                     MatMul   MatMul ... MatMul
+                        \       |     ... /
+                          \     |       /
+                            \   |     /
+*/
+namespace onnxruntime {
+bool NodeCanBeReplacedByMatmul(const Node& node) {
+  // If node type is Conv, and attr "dilations" is 1, "kernel_shape" is 1, "stride" is 1, group is 1 or 2,
+  // then it can be replaced by MatMul
+  // Kernel_shape is 1 means it is conv1d
+  if (!graph_utils::IsSupportedOptypeVersionAndDomain(node, "Conv", {1, 11})) {
+    return false;
+  }
+  const auto* dilations = graph_utils::GetNodeAttribute(node, "dilations");
+  const auto* kernel_shape = graph_utils::GetNodeAttribute(node, "kernel_shape");
+  const auto* stride = graph_utils::GetNodeAttribute(node, "strides");
+  const auto* group = graph_utils::GetNodeAttribute(node, "group");
+  if (dilations == nullptr || kernel_shape == nullptr || stride == nullptr || group == nullptr) {
+    return false;
+  }
+  if ((dilations->ints_size() && dilations->ints(0) != 1) ||
+      (kernel_shape->ints_size() && kernel_shape->ints(0) != 1) ||
+      (stride->ints_size() && stride->ints(0) != 1) ||
+      group->i() >= 3) {
+    return false;
+  }
+
+  return true;
+}
+
+void Conv1dToMatmul(Graph& graph, Node& conv) {
+  // Shape of conv1d input: [batch_size, in_channels, in_length]
+  // Shape of conv1d weight:[output_channels, input_channels/group, kernel_shape], kernel_shape is 1
+  // We need to split the input into "group", and squeeze&split the weight, and then do MatMul
+  const std::string node_description("Conv1dReplacement");
+  auto execution_provider_type = conv.GetExecutionProviderType();
+  // 1. Split conv input
+  auto group_attr = graph_utils::GetNodeAttribute(conv, "group");
+  int64_t group_num = 1;  // default group is 1 from ONNX schema
+  if (group_attr != nullptr) {
+    group_num = group_attr->i();
+  }
+  auto conv1d_input = conv.MutableInputDefs()[0];
+  std::vector<onnxruntime::NodeArg*> conv1d_input_splitted_outputs;
+  for (int i = 0; i < group_num; i++) {
+    conv1d_input_splitted_outputs.push_back(&graph.GetOrCreateNodeArg(
+        graph.GenerateNodeArgName("input_split_output"), nullptr));
+  }
+  auto& input_split = graph.AddNode(graph.GenerateNodeName("Split"), "Split", node_description, {conv1d_input},
+                                    {conv1d_input_splitted_outputs});
+  input_split.SetExecutionProviderType(execution_provider_type);
+  input_split.AddAttribute("axis", int64_t(1));
+  auto onnx_opset_version = graph.DomainToVersionMap().at(kOnnxDomain);
+  if (onnx_opset_version >= 18) {
+    input_split.AddAttribute("num_outputs", group_num);
+  }
+  // 2. Squeeze conv weight
+  auto conv1d_weight = conv.MutableInputDefs()[1];
+  auto weight_squeeze_output = &graph.GetOrCreateNodeArg(graph.GenerateNodeArgName("weight_squeeze_output"), nullptr);
+  auto& weight_squeeze = graph.AddNode(graph.GenerateNodeName("WeightSqueeze"), "Squeeze",
+                                       node_description, {conv1d_weight}, {weight_squeeze_output});
+  if (onnx_opset_version > 12) {
+    // After onnx version 12, squeeze node has axes as input instead of attribute
+    ONNX_NAMESPACE::TensorProto initializer_proto;
+    initializer_proto.set_name(graph.GenerateNodeName("ConstAsInitializer"));
+    initializer_proto.add_dims(static_cast<int64_t>(1));
+    initializer_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
+    InlinedVector<int64_t> initializer_proto_value{2};
+    initializer_proto.set_raw_data(initializer_proto_value.data(), initializer_proto_value.size() * sizeof(int64_t));
+    auto& axes_input = graph_utils::AddInitializer(graph, initializer_proto);
+    // Squeeze node doesn't have opschema here, so we need to set input args count manually
+    weight_squeeze.MutableInputArgsCount().resize(2);
+    graph_utils::AddNodeInput(weight_squeeze, 1, axes_input);
+  } else {
+    weight_squeeze.AddAttribute("axes", std::vector<int64_t>{2});
+  }
+  weight_squeeze.SetExecutionProviderType(execution_provider_type);
+  // 3. Split conv weight
+  std::vector<onnxruntime::NodeArg*> conv1d_weight_splitted_outputs;
+  for (int i = 0; i < group_num; i++) {
+    conv1d_weight_splitted_outputs.push_back(&graph.GetOrCreateNodeArg(
+        graph.GenerateNodeArgName("weight_split_output"), nullptr));
+  }
+  auto& weight_split = graph.AddNode(graph.GenerateNodeName("Split"), "Split", node_description,
+                                     {weight_squeeze_output}, {conv1d_weight_splitted_outputs});
+  weight_split.AddAttribute("axis", int64_t(0));
+  weight_split.SetExecutionProviderType(execution_provider_type);
+  if (onnx_opset_version >= 18) {
+    weight_split.AddAttribute("num_outputs", group_num);
+  }
+  // 4. Do MatMul
+  std::vector<onnxruntime::NodeArg*> matmul_outputs;
+  for (int i = 0; i < group_num; i++) {
+    auto matmul_output = &graph.GetOrCreateNodeArg(graph.GenerateNodeArgName("matmul_output"), nullptr);
+    matmul_outputs.push_back(matmul_output);
+    auto& matmul = graph.AddNode(graph.GenerateNodeName("Matmul"), "MatMul", node_description,
+                                 {conv1d_weight_splitted_outputs[i], conv1d_input_splitted_outputs[i]},
+                                 {matmul_output});
+    matmul.SetExecutionProviderType(execution_provider_type);
+  }
+  // 5. Concat matmul outputs
+  auto& concat_node = graph.AddNode(graph.GenerateNodeName("Concat"), "Concat", node_description,
+                                    matmul_outputs, {});
+  concat_node.SetExecutionProviderType(execution_provider_type);
+  concat_node.AddAttribute("axis", int64_t(1));
+  // 6. Clean up - delted original "conv" node, its output is replaced by concat_node
+  graph_utils::FinalizeNodeFusion(graph, concat_node, conv);
+}
+
+Status Conv1dReplacement::ApplyImpl(Graph& graph, bool& modified, int graph_level, const logging::Logger& logger) const {
+  GraphViewer graph_viewer(graph);
+  const auto& node_topology_list = graph_viewer.GetNodesInTopologicalOrder();
+  for (auto node_index : node_topology_list) {
+    auto* node_ptr = graph.GetNode(node_index);
+    if (!node_ptr)
+      continue;  // node was removed
+    auto& node = *node_ptr;
+    ORT_RETURN_IF_ERROR(Recurse(node, modified, graph_level, logger));
+    if (NodeCanBeReplacedByMatmul(node)) {
+      LOGS(logger, VERBOSE) << "lora conv1d replacement, node name: " + node.Name();
+      Conv1dToMatmul(graph, node);
+      modified = true;
+    }
+  }
+  return Status::OK();
+}
+}  // namespace onnxruntime
diff --git a/orttraining/orttraining/core/optimizer/conv1d_replacement.h b/orttraining/orttraining/core/optimizer/conv1d_replacement.h
new file mode 100644
index 0000000000000..740f13c76fd6f
--- /dev/null
+++ b/orttraining/orttraining/core/optimizer/conv1d_replacement.h
@@ -0,0 +1,18 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "core/optimizer/graph_transformer.h"
+
+namespace onnxruntime {
+
+class Conv1dReplacement : public GraphTransformer {
+ public:
+  Conv1dReplacement(const InlinedHashSet<std::string_view>& compatible_execution_providers = {}) noexcept
+      : GraphTransformer("Conv1dReplacement", compatible_execution_providers) {}
+
+  Status ApplyImpl(Graph& graph, bool& modified, int graph_level, const logging::Logger& logger) const override;
+};
+
+}  // namespace onnxruntime
diff --git a/orttraining/orttraining/core/optimizer/graph_transformer_utils.cc b/orttraining/orttraining/core/optimizer/graph_transformer_utils.cc
index 6b566ed064aa4..6193a1d10c095 100644
--- a/orttraining/orttraining/core/optimizer/graph_transformer_utils.cc
+++ b/orttraining/orttraining/core/optimizer/graph_transformer_utils.cc
@@ -63,11 +63,16 @@
 #include "orttraining/core/optimizer/scaled_sum_fusion.h"
 #include "orttraining/core/optimizer/shape_optimizer.h"
 #include "orttraining/core/optimizer/transformer_layer_recompute.h"
+#include "orttraining/core/optimizer/transpose_replacement.h"
 #include "core/optimizer/compute_optimizer/upstream_gather.h"
 #include "core/optimizer/compute_optimizer/upstream_reshape.h"
 #include "core/optimizer/pre_shape_node_elimination.h"
 #include "orttraining/core/optimizer/compute_optimizer/padding_elimination.h"
 #include "orttraining/core/optimizer/compute_optimizer/sceloss_compute_optimization.h"
+#ifdef ENABLE_TRAINING_TORCH_INTEROP
+#include "orttraining/core/optimizer/pythonop_rewriter.h"
+#endif
+#include "orttraining/core/optimizer/conv1d_replacement.h"
 
 namespace onnxruntime {
 namespace training {
@@ -106,6 +111,9 @@ std::vector<std::unique_ptr<GraphTransformer>> GeneratePreTrainingTransformers(
       ORT_THROW_IF_ERROR(rule_transformer->Register(std::make_unique<InsertSoftmaxCrossEntropyLossOutput>()));
       ORT_THROW_IF_ERROR(rule_transformer->Register(std::make_unique<LSTMReplacement>()));
       ORT_THROW_IF_ERROR(rule_transformer->Register(std::make_unique<GRUReplacement>()));
+#ifdef ENABLE_TRAINING_TORCH_INTEROP
+      ORT_THROW_IF_ERROR(rule_transformer->Register(std::make_unique<PythonOpRewriter>()));
+#endif
 
       // Put ConstantSharing before CommonSubexpressionElimination by intention as it can create more opportunities for
       // CSE. For example, if A and B nodes both do Add operation with a same value but different initializers, by
@@ -187,6 +195,7 @@ std::vector<std::unique_ptr<GraphTransformer>> GeneratePreTrainingTransformers(
         // Once we have a CPU kernel for PadAndUnflatten, we can remove the guard.
         transformers.emplace_back(std::make_unique<PaddingElimination>(compatible_eps,
                                                                        config.sparse_embedding_input_names));
+        transformers.emplace_back(std::make_unique<Conv1dReplacement>(compatible_eps));
 #endif
       }
 
@@ -197,6 +206,7 @@ std::vector<std::unique_ptr<GraphTransformer>> GeneratePreTrainingTransformers(
           std::make_unique<RuleBasedGraphTransformer>(optimizer_utils::GenerateRuleBasedTransformerName(level),
                                                       compatible_eps);
       ORT_THROW_IF_ERROR(rule_transformer->Register(std::make_unique<ConcatReplacement>()));
+      ORT_THROW_IF_ERROR(rule_transformer->Register(std::make_unique<TransposeReplacement>()));
     } break;
 
     case TransformerLevel::Level3: {
diff --git a/orttraining/orttraining/core/optimizer/memory_optimizer.cc b/orttraining/orttraining/core/optimizer/memory_optimizer.cc
index 88c786d693cae..834e5ebb5f6f3 100644
--- a/orttraining/orttraining/core/optimizer/memory_optimizer.cc
+++ b/orttraining/orttraining/core/optimizer/memory_optimizer.cc
@@ -1,233 +1,84 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#include <algorithm>
+#include <iomanip>
+#include <memory>
+#include <utility>
+#include <string>
+#include <vector>
+
 #include "core/framework/random_seed.h"
 #include "core/framework/tensorprotoutils.h"
 #include "core/graph/graph_utils.h"
 #include "core/optimizer/utils.h"
 #include "orttraining/core/graph/recompute_graph_utils.h"
 #include "orttraining/core/optimizer/memory_optimizer.h"
+#include "orttraining/core/optimizer/memory_optimizer/common.h"
+#include "orttraining/core/optimizer/memory_optimizer/optimization_planner.h"
+#include "orttraining/core/optimizer/memory_optimizer/recompute_analysis.h"
+#include "orttraining/core/optimizer/memory_optimizer/memory_insight.h"
 
 namespace onnxruntime {
 
 namespace {
 
-constexpr int32_t MAXIMUM_RECOMPUTE_NODE_COUNT = 15;
-
-std::string TensorShapeProtoToString(const ONNX_NAMESPACE::TensorShapeProto* shape) {
-  std::ostringstream shape_oss;
-  if (shape != nullptr) {
-    for (int dim_index = 0; dim_index < shape->dim_size(); dim_index++) {
-      auto dim = shape->dim(dim_index);
-      if (utils::HasDimValue(dim)) {
-        shape_oss << dim.dim_value() << " x ";
-      } else {
-        shape_oss << dim.dim_param() << " x ";
-      }
-    }
-  } else {
-    shape_oss << "unknown";
-  }
-
-  return shape_oss.str();
-}
-
-int ParseIntValueFromString(std::string_view str) {
-  int int_value = 0;
-  auto result = std::from_chars(str.data(), str.data() + str.size(), int_value);
-  ORT_ENFORCE(result.ec != std::errc::invalid_argument, "Fail to convert to int from string: ", str);
-  return int_value;
-}
-
-constexpr bool IsForwardPassOperator(ptrdiff_t op_order_in_topological_sort, ptrdiff_t boundary_op_order_in_topological_sort) {
+constexpr bool IsForwardPassOperator(ptrdiff_t op_order_in_topological_sort,
+                                     ptrdiff_t boundary_op_order_in_topological_sort) {
   return op_order_in_topological_sort <= boundary_op_order_in_topological_sort;
 }
 
-static size_t GetElementSize(const ONNX_NAMESPACE::DataType& tensor_type) {
-  const ONNX_NAMESPACE::TypeProto& type_proto = ONNX_NAMESPACE::Utils::DataTypeUtils::ToTypeProto(tensor_type);
-  MLDataType ml_data_type = DataTypeImpl::TypeFromProto(type_proto);
-  const TensorTypeBase* tensor_type_base = ml_data_type->AsTensorType();
-  ORT_ENFORCE(nullptr != tensor_type_base);
-  MLDataType elt_type = tensor_type_base->GetElementType();
-  return elt_type->Size();
-}
-
-// TODO(pengwa): extend this function to be more general.
-float InputOutputSizeRatio(const Node* node) {
-  if (node->OpType().compare("Cast") == 0) {
-    const NodeArg* input = node->InputDefs()[0];
-    const NodeArg* output = node->OutputDefs()[0];
-    if (input->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType_STRING ||
-        output->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType_STRING) {
-      return 1.0f;
-    }
-    const auto& ptype1 = input->Type();
-    const auto& ptype2 = output->Type();
-    float ratio = float(GetElementSize(ptype1)) / (float)GetElementSize(ptype2);
-    return ratio;
-  }
-
-  return 1.0f;
-}
-
 }  // namespace
 
-Status MemoryOptimizer::ParseConfigFromString(const std::string& enable_memory_optimizer,
+Status MemoryOptimizer::ParseConfigFromString(const std::string& memory_optimizer_config,
                                               const std::string& level) {
-  optimizer_config_ = enable_memory_optimizer;
-  if (!enable_memory_optimizer.empty()) {
-    const auto user_config_strs = utils::SplitString(enable_memory_optimizer, ",");
-    for (const auto& user_config_str : user_config_strs) {
-      const auto user_config = utils::SplitString(user_config_str, ":");
-      ORT_RETURN_IF_NOT(user_config.size() == 3,
-                        "User config should be in format of SubgraphStr:OptimizationType:RequestApplyCount.");
-
-      const std::string subgraph_string_representation(user_config[0]);
-      int optimization_type_int = ParseIntValueFromString(user_config[1]);
-      int requested_apply_count = ParseIntValueFromString(user_config[2]);
-      ORT_RETURN_IF_NOT(optimization_type_int < static_cast<int>(OptimizationType::TypeMax) &&
-                            optimization_type_int >= 0,
-                        "Invalid optimization type specified for subgraph: ",
-                        subgraph_string_representation);
-
-      ORT_RETURN_IF_NOT(requested_apply_count == -1 || requested_apply_count >= 0,
-                        "Invalid requested_apply_count specified for subgraph: ", requested_apply_count);
-
-      // At this point, subgraph_string_representation is a pattern graph string representation.
-      pattern_subgraph_to_user_optimizer_config_map_[subgraph_string_representation] =
-          UserConfig{static_cast<OptimizationType>(optimization_type_int), requested_apply_count};
-    }
-  }
-
-  int probe_level = ParseIntValueFromString(level);
-  ORT_RETURN_IF_NOT(probe_level < static_cast<int>(ProbeLevel::LevelMax) && probe_level >= 0,
-                    "Invalid probe level specified: ", level);
-  recompute_probe_level_ = static_cast<ProbeLevel>(probe_level);
-
-  return Status::OK();
-}
-
-int64_t MemoryOptimizer::PrepareForTransformation(const Graph& graph,
-                                                  ActivationUsedMap& fw_op_output_arg_used_map,
-                                                  InlinedHashMap<NodeIndex, size_t>&
-                                                      node_index_to_its_order_in_topological_sort_map) const {
-  fw_op_output_arg_used_map.clear();
-
-  GraphViewer graph_viewer(graph);
-  const auto& node_ids = graph_viewer.GetNodesInTopologicalOrder();
+  optimizer_config_ = memory_optimizer_config;
 
-  // Find boundary ops between forward and backward pass, currently, it's limited to YieldOp.
-  ptrdiff_t yield_op_order_in_topological_sort = -1;
-  for (size_t i = 0; i < node_ids.size(); ++i) {
-    const Node* p_node = graph.GetNode(node_ids[i]);
-    if (p_node == nullptr) { /* skip removed nodes*/
-      continue;
-    }
-
-    if (p_node->OpType() == "YieldOp") {
-      yield_op_order_in_topological_sort = static_cast<ptrdiff_t>(i);
-    }
-
-    node_index_to_its_order_in_topological_sort_map[p_node->Index()] = i;
-  }
-
-  // If boundary op found, create forward op output arg used map.
-  if (yield_op_order_in_topological_sort >= 0) {
-    for (size_t i = 0; i < node_ids.size(); ++i) {
-      const Node* p_node = graph.GetNode(node_ids[i]);
-      if (p_node == nullptr /* skip removed nodes*/) {
-        continue;
-      }
+  ORT_RETURN_IF_ERROR(optimizer::memory_optimizer::ParseConfigFromString(
+      memory_optimizer_config,
+      pattern_subgraph_to_user_optimizer_config_map_));
 
-      const Node& node = *p_node;
-      bool is_forward_op = IsForwardPassOperator(static_cast<ptrdiff_t>(i), yield_op_order_in_topological_sort);
-      if (!is_forward_op) {
-        continue;
-      }
-
-      for (auto& output_arg : node.OutputDefs()) {
-        bool used_in_fw = false;
-        bool used_in_bw = false;
-        for (auto& consumer_node : graph.GetConsumerNodes(output_arg->Name())) {
-          size_t consumer_node_index_in_topological_order =
-              node_index_to_its_order_in_topological_sort_map.at(consumer_node->Index());
-          if (IsForwardPassOperator(static_cast<ptrdiff_t>(consumer_node_index_in_topological_order),
-                                    yield_op_order_in_topological_sort)) {
-            used_in_fw = true;
-          } else {
-            used_in_bw = true;
-          }
-        }
-        fw_op_output_arg_used_map.insert({{output_arg->Name(), std::make_pair(used_in_fw, used_in_bw)}});
-      }
-    }
-  }
-
-  // Return whether boundary op is found or not.
-  return yield_op_order_in_topological_sort;
-}
-
-Status MemoryOptimizer::GetStashedActivationCandidates(const Graph& graph,
-                                                       const InlinedHashMap<std::string, std::pair<bool, bool>>&
-                                                           fw_op_output_arg_used_map,
-                                                       InlinedHashMap<const Node*, InlinedVector<size_t>>&
-                                                           candidate_output_args_map,
-                                                       const logging::Logger& logger) const {
-  for (auto& kv : fw_op_output_arg_used_map) {
-    // used by fw and bw, then it is a candidates.
-    if (kv.second.first && kv.second.second) {
-      const Node* n = graph.GetProducerNode(kv.first);
-      ORT_ENFORCE(n, "Activation should have a producer node");
-      size_t k = 0;
-      for (k = 0; k < n->OutputDefs().size(); ++k) {
-        if (n->OutputDefs()[k]->Name().compare(kv.first) == 0) {
-          break;
-        }
-      }
-
-      candidate_output_args_map[n].push_back(k);
-      LOGS(logger, VERBOSE) << "Find candidate output named [" << kv.first << "] of Node " << n->Name() << "("
-                            << n->OpType() << ")";
-    }
-  }
+  int probe_level = optimizer::memory_optimizer::ParseIntValueFromString(level);
+  ORT_RETURN_IF_NOT(probe_level < static_cast<int>(optimizer::memory_optimizer::ProbeLevel::LevelMax) &&
+                        probe_level >= 0,
+                    "Invalid probe level specified: ", level);
+  recompute_probe_level_ = static_cast<optimizer::memory_optimizer::ProbeLevel>(probe_level);
 
   return Status::OK();
 }
 
 bool MemoryOptimizer::ModifyGraph(Graph& graph,
-                                  const InlinedHashMap<NodeIndex, size_t>&
+                                  const InlinedHashMap<NodeIndex, ptrdiff_t>&
                                       node_index_to_its_order_in_topological_sort_map,
                                   const InlinedHashMap<const Node*, InlinedVector<size_t>>&
                                       candidate_output_args_map,
                                   const logging::Logger& logger,
-                                  int64_t boundary_op_order_in_topological_sort,
-                                  SubGraphStores& subgraph_stores,
-                                  Node* node) const {
+                                  ptrdiff_t boundary_op_order_in_topological_sort,
+                                  Node* node,
+                                  std::shared_ptr<optimizer::memory_optimizer::NodeOptimizationPlanBase>& node_plan,
+                                  std::shared_ptr<optimizer::memory_optimizer::ClusterApplyContext>& apply_context)
+    const {
   bool graph_is_modified = false;
-  if (subgraph_stores.SubGraphDescCount() == 0) {
-    return graph_is_modified;
-  }
-
-  SubGraphStores::GraphInstanceInfo& sub_graph_instance_info =
-      subgraph_stores.GetSubGraphInstance(node);
-
-  SubGraphDesc& subgraph_desc = subgraph_stores.GetSubGraphDesc(sub_graph_instance_info.second);
-  UserConfig user_config = subgraph_desc.user_optimizer_config;
-  int skip_count = (user_config.requested_count == -1)
+  int skip_count = (apply_context->requested_count == -1)
                        ? 0
-                       : std::max(0, subgraph_desc.total_frequency - user_config.requested_count);
+                       : std::max(0, apply_context->total_frequency - apply_context->requested_count);
 
-  subgraph_desc.skip_count += 1;
+  apply_context->skip_count += 1;
 
-  if (user_config.type != OptimizationType::None && subgraph_desc.skip_count > skip_count) {
-    subgraph_desc.applied_count += 1;
+  if (apply_context->skip_count > skip_count) {
+    apply_context->applied_count += 1;
     Node* replacement_node_ptr = nullptr;
-    LOGS(logger, WARNING) << "[Modify Graph] Node " << node->Name() << "(" << node->OpType() << ") is "
-                          << UserConfigToString(user_config);
-    if (user_config.type == OptimizationType::Recompute) {
-      ORT_ENFORCE(CreateRecomputeGraph(graph, sub_graph_instance_info.first, replacement_node_ptr).IsOK());
+    LOGS(logger, INFO) << "Node " << node->Name() << "(" << node->OpType() << ") is applying following optimization:"
+                       << "type [" << optimizer::memory_optimizer::OptimizationTypeToString(apply_context->type)
+                       << "], request count [" << apply_context->requested_count << "]";
+    if (apply_context->type == optimizer::memory_optimizer::OptimizationType::Recompute ||
+        apply_context->type == optimizer::memory_optimizer::OptimizationType::RecomputeWithCompromise) {
+      optimizer::memory_optimizer::NodeRecomputePlan* recompute_plan =
+          dynamic_cast<optimizer::memory_optimizer::NodeRecomputePlan*>(node_plan.get());
+      ORT_ENFORCE(recompute_plan != nullptr);
+      ORT_ENFORCE(CreateRecomputeGraph(graph, recompute_plan->GetNodesInTopoOrder(), replacement_node_ptr).IsOK());
     } else {
-      ORT_THROW("unsupported optimization type found: " + UserConfigToString(user_config));
+      ORT_THROW("unsupported optimization type found.");
     }
     ORT_ENFORCE(replacement_node_ptr);
 
@@ -278,60 +129,44 @@ Status MemoryOptimizer::ApplyImpl(Graph& graph, bool& modified, int /*graph_leve
   LOGS(logger, VERBOSE) << "Memory optimization config: " << optimizer_config_ << ", probe level: "
                         << static_cast<int>(recompute_probe_level_);
 
-  InlinedHashMap<std::string, std::pair<bool, bool>> fw_op_output_arg_used_map;
-  InlinedHashMap<NodeIndex, size_t> node_index_to_its_order_in_topological_sort_map;
-  int64_t boundary_op_order_in_topological_sort =
-      PrepareForTransformation(graph, fw_op_output_arg_used_map,
-                               node_index_to_its_order_in_topological_sort_map);
-  if (boundary_op_order_in_topological_sort < 0) {
-    LOGS(logger, VERBOSE) << "No boundary op found. Skip memory optimization.";
+  if (pattern_subgraph_to_user_optimizer_config_map_.empty()) {
+    LOGS(logger, VERBOSE) << "No optimization pattern is specified, skip memory optimization.";
     return Status::OK();
   }
 
+  ptrdiff_t yield_op_order_in_topological_sort;
   InlinedHashMap<const Node*, InlinedVector<size_t>> candidate_output_args_map;
-  ORT_RETURN_IF_ERROR(GetStashedActivationCandidates(graph, fw_op_output_arg_used_map, candidate_output_args_map,
-                                                     logger));
-
-  SubGraphStores recompute_subgraph_stores;
-  SubGraphStores recompute_with_compromise_subgraph_stores;
-  GraphViewer graph_viewer(graph);
-  const auto& node_ids = graph_viewer.GetNodesInTopologicalOrder();
+  InlinedHashMap<NodeIndex, ptrdiff_t> node_index_to_its_order_in_topological_sort_map;
 
   // The first pass - find the candidate subgraphs.
-  for (int i = static_cast<int>(node_ids.size()) - 1; i >= 0; --i) {
-    Node* p_node = graph.GetNode(node_ids[i]);
-    if (p_node == nullptr) {
-      continue;
-    }
-
-    if (candidate_output_args_map.find(p_node) == candidate_output_args_map.end()) {
-      continue;
-    }
+  GraphViewer graph_viewer(graph);
+  optimizer::memory_optimizer::MemoryOptimizationPlanner memory_opt_planner;
+  ORT_ENFORCE(optimizer::memory_optimizer::FindORTModuleMemoryOpportunity(
+                  graph_viewer,
+                  recompute_probe_level_,
+                  logger,
+                  node_index_to_its_order_in_topological_sort_map,
+                  yield_op_order_in_topological_sort,
+                  candidate_output_args_map,
+                  memory_opt_planner)
+                  .IsOK());
 
-    bool can_compromise_stashed_activation = false;
-    CheckNodeForRecompute(*p_node, fw_op_output_arg_used_map,
-                          node_index_to_its_order_in_topological_sort_map,
-                          candidate_output_args_map,
-                          recompute_subgraph_stores, logger, false,
-                          can_compromise_stashed_activation);
-
-    if (can_compromise_stashed_activation) {
-      LOGS(logger, VERBOSE) << "Searching Node " << p_node->Name() << "(" << p_node->OpType()
-                            << ") for compromised recompute";
-      // If the subgraph recompute can save memory by comprising the assumption - recompute graphs' input must exist
-      // during backward pass, then we can try to compromise the assumption.
-      CheckNodeForRecompute(*p_node, fw_op_output_arg_used_map, node_index_to_its_order_in_topological_sort_map,
-                            candidate_output_args_map,
-                            recompute_with_compromise_subgraph_stores, logger, true,
-                            can_compromise_stashed_activation);
-    }
-  }
+  // Finalize the plan according to user config,
+  // then create a ClusterApplyContext for each unique cluster (having the same node pattern)
+  InlinedHashMap<const Node*, std::shared_ptr<optimizer::memory_optimizer::NodeOptimizationPlanBase>>
+      node_to_opt_plan_map;
+  optimizer::memory_optimizer::NodeToClusterApplyContextMap node_to_apply_context_map;
+  ORT_ENFORCE(memory_opt_planner.FinalizeNodePlansFromUserConfig(pattern_subgraph_to_user_optimizer_config_map_,
+                                                                 node_to_opt_plan_map,
+                                                                 node_to_apply_context_map)
+                  .IsOK());
 
   // The second pass - apply the transformation.
   // Iterate through the nodes in reversed topological order and find the subgraph that can be alleviated.
   // The reason we do reversed topological order is that we want the later layers' recompute nodes can be appended
   // earlier than the earlier layers, in this way, the execution order of later layers will be in front of the earlier
   // layers.
+  const auto& node_ids = graph_viewer.GetNodesInTopologicalOrder();
   for (int i = static_cast<int>(node_ids.size()) - 1; i >= 0; --i) {
     Node* p_node = graph.GetNode(node_ids[i]);
     if (p_node == nullptr) {
@@ -339,374 +174,40 @@ Status MemoryOptimizer::ApplyImpl(Graph& graph, bool& modified, int /*graph_leve
     }
 
     bool has_been_modified = false;
-    if (recompute_subgraph_stores.ContainsSubGraphInstance(p_node)) {
+    if (node_to_opt_plan_map.find(p_node) != node_to_opt_plan_map.end()) {
       has_been_modified = ModifyGraph(graph, node_index_to_its_order_in_topological_sort_map,
                                       candidate_output_args_map, logger,
-                                      boundary_op_order_in_topological_sort,
-                                      recompute_subgraph_stores, p_node);
-    }
-
-    // If there are other recompute plan for this node, we skip them because the graph is already modified.
-    if (!has_been_modified && recompute_with_compromise_subgraph_stores.ContainsSubGraphInstance(p_node)) {
-      has_been_modified = ModifyGraph(graph, node_index_to_its_order_in_topological_sort_map,
-                                      candidate_output_args_map, logger,
-                                      boundary_op_order_in_topological_sort,
-                                      recompute_with_compromise_subgraph_stores, p_node);
+                                      yield_op_order_in_topological_sort,
+                                      p_node,
+                                      node_to_opt_plan_map[p_node],
+                                      node_to_apply_context_map[p_node]);
     }
 
     modified = modified || has_been_modified;
   }
 
-  PrintSummary(recompute_subgraph_stores, recompute_with_compromise_subgraph_stores, logger);
+  PrintSummary(memory_opt_planner, node_to_apply_context_map, logger);
 
   return Status::OK();
 }
 
-void MemoryOptimizer::NodesInTopoOrderToString(const InlinedVector<const Node*>& nodes_in_topological_order,
-                                               std::string& subgraph_string_representation,
-                                               std::string& log_info) const {
-  std::ostringstream oss;
-  std::ostringstream subgraph_string_representation_oss;
-  size_t node_count = nodes_in_topological_order.size();
-  for (size_t i = 0; i < node_count; ++i) {
-    if (i < node_count - 1) {  // Ignore the last node.
-      oss << "(name:" << nodes_in_topological_order[i]->Name() << ", type:" << nodes_in_topological_order[i]->OpType()
-          << "),";
-    }
-
-    subgraph_string_representation_oss << nodes_in_topological_order[i]->OpType() << "+";
-  }
-
-  subgraph_string_representation = subgraph_string_representation_oss.str();
-  log_info = oss.str();
-  if (log_info.size() > 0) {
-    log_info = " with its precedent nodes: " + log_info;
-  }
-}
-
-std::string MemoryOptimizer::UserConfigToString(const UserConfig& config) const {
-  std::string type_str;
-  switch (config.type) {
-    case OptimizationType::None: {
-      type_str = "Disabled";
-    } break;
-    case OptimizationType::Recompute: {
-      type_str = "Recomputed";
-    } break;
-    default: {
-      type_str = "Unknown";
-    } break;
-  }
-  return type_str;
-}
-
-void MemoryOptimizer::PrintSummary(const SubGraphStores& recompute_stores,
-                                   const SubGraphStores& recompute_with_compromise_stores,
+void MemoryOptimizer::PrintSummary(const optimizer::memory_optimizer::MemoryOptimizationPlanner& memory_opt_planner,
+                                   const InlinedHashMap<
+                                       const Node*,
+                                       std::shared_ptr<optimizer::memory_optimizer::ClusterApplyContext>>&
+                                       node_to_apply_contexts_map,
                                    const logging::Logger& logger) const {
-  if (recompute_stores.SubGraphDescCount() == 0 && recompute_with_compromise_stores.SubGraphDescCount() == 0) {
-    return;
-  }
-
-  std::ostringstream summary;
-  summary << "\nMemoryOptimizer Summary:\n";
-  summary << "\tUser config:\n\t" << optimizer_config_ << "\n";
-  summary << "\t=================================\n";
-
-  auto print_info_from_stores = [&summary, this](std::string store_name, const SubGraphStores& stores) {
-    summary << "\t########" << store_name << "########\n";
-    for (auto subgraph_it = stores.subgraph_descs.begin(); subgraph_it != stores.subgraph_descs.end();
-         ++subgraph_it) {
-      std::string freq_info;
-      if (subgraph_it->second.user_optimizer_config.type != OptimizationType::None)
-        freq_info = " (requested_count=" + std::to_string(subgraph_it->second.user_optimizer_config.requested_count) +
-                    ", actual applied_count=" +
-                    std::to_string(subgraph_it->second.applied_count) + ")";
-      summary << "\tSubgraph: " << subgraph_it->first << "\n"
-              << "\t\tOptimizationType: "
-              << UserConfigToString(subgraph_it->second.user_optimizer_config) << freq_info << "\n"
-              << "\t\tPatterns: \n";
-      for (auto shape_stat_it = subgraph_it->second.shape_str_frequency.begin();
-           shape_stat_it != subgraph_it->second.shape_str_frequency.end();
-           ++shape_stat_it) {
-        summary << "\t\t\tPatternShape:" << shape_stat_it->first << "\tFrequency:" << shape_stat_it->second << "\n";
-      }
-      summary << "\t--------------------------------\n";
-    }
-    summary << "\t=================================\n";
-  };
-
-  print_info_from_stores("Recompute", recompute_stores);
-  print_info_from_stores("RecomputeWithCompromise", recompute_with_compromise_stores);
-
-  LOGS(logger, INFO) << summary.str() << "\n";
+  std::vector<std::pair<std::string, optimizer::memory_optimizer::MemoryRecord>> records_grouped_by_node_cluster_id;
+  optimizer::memory_optimizer::GetMemoryRecordsGroupedByNodeClusterId(memory_opt_planner,
+                                                                      node_to_apply_contexts_map,
+                                                                      records_grouped_by_node_cluster_id);
+  LOGS(logger, INFO) << SerializeMemoryRecords(records_grouped_by_node_cluster_id, optimizer_config_) << "\n";
 }
 
 /******************************************************
  ** Recompute related function implementation starts **
  ******************************************************/
 
-void MemoryOptimizer::RegisterAllowedRecomputeOps() {
-  if (static_cast<int>(recompute_probe_level_) >= static_cast<int>(ProbeLevel::Basic)) {
-    recomputable_op_type_to_input_arg_index_map_.insert({
-        // Binary elementwise
-        {"Add", AllowedRecomputeNodeConfig{{0, 1}}},
-        {"BiasGelu", AllowedRecomputeNodeConfig{{0, 1}}},
-        {"Div", AllowedRecomputeNodeConfig{{0, 1}}},
-        {"Mul", AllowedRecomputeNodeConfig{{0, 1}}},
-        {"Sub", AllowedRecomputeNodeConfig{{0, 1}}},
-
-        // Data layout
-        /// The shape input is trivial whether it exists or not in backward.
-        {"Reshape", AllowedRecomputeNodeConfig{{0}}},
-        {"Squeeze", AllowedRecomputeNodeConfig{{0}}},
-        {"Unsqueeze", AllowedRecomputeNodeConfig{{0}}},
-
-        // Unary elementwise
-        /// The ratio and mode input are trivial whether they exist or not in backward
-        {"BitmaskDropout", AllowedRecomputeNodeConfig{{0}}},
-        /// The axis input is trivial whether it exists or not in backward
-        {"CumSum", AllowedRecomputeNodeConfig{{0}}},
-        {"Dropout", AllowedRecomputeNodeConfig{{0}}},
-        {"Gelu", AllowedRecomputeNodeConfig{{0}}},
-        {"FastGelu", AllowedRecomputeNodeConfig{{0}}},
-
-        // Ternary elementwise
-        {"Where", AllowedRecomputeNodeConfig{{0, 1, 2}}},
-
-        // Data copy
-        {"Tile", AllowedRecomputeNodeConfig{{0}}},
-        {"Cast", AllowedRecomputeNodeConfig{{0}}},
-    });
-  }
-
-  if (static_cast<int>(recompute_probe_level_) >= static_cast<int>(ProbeLevel::Advanced)) {
-    recomputable_op_type_to_input_arg_index_map_.insert({
-        {"MatMul", AllowedRecomputeNodeConfig{{0, 1}}},
-        {"FusedMatMul", AllowedRecomputeNodeConfig{{0, 1}}},
-        {"Softmax", AllowedRecomputeNodeConfig{{0}}},
-        {"BiasSoftmax", AllowedRecomputeNodeConfig{{0, 1}}},
-        {"BiasSoftmaxDropout", AllowedRecomputeNodeConfig{{0, 1}}},
-    });
-  }
-}
-
-Status MemoryOptimizer::SelectRecomputeSubgraph(const Node& entry_node,
-                                                const InlinedVector<size_t>& node_output_index_candidates,
-                                                const ActivationUsedMap& fw_op_output_arg_used_map,
-                                                const InlinedHashMap<NodeIndex, size_t>&
-                                                    node_index_to_its_order_in_topological_sort_map,
-                                                InlinedVector<const Node*>& nodes,
-                                                const logging::Logger& logger,
-                                                bool compromise_stashed_activation,
-                                                bool& can_compromise_stashed_activation) const {
-  can_compromise_stashed_activation = false;
-
-  LOGS(logger, VERBOSE) << "Enter SelectRecomputeSubgraph for Node " << entry_node.Name() << "(" << entry_node.OpType() << ")";
-  nodes.clear();
-
-  std::deque<NodeOutputPort> q;
-  for (auto output_index : node_output_index_candidates) {
-    q.push_back(NodeOutputPort(&entry_node, static_cast<int>(output_index)));
-  }
-
-  bool early_stop = false;
-  std::set<NodeOutputPort> visited_output_arg_set;
-  std::set<const Node*> visited_node_set;
-
-  // For the initial activations in queue, they are stashed ones, so we do differently when scan the queue for them.
-  bool is_first_queue_scan = true;
-  while (nodes.size() < MAXIMUM_RECOMPUTE_NODE_COUNT && !q.empty() && !early_stop) {
-    // Loop all candidate NodeOutputPort, and find the next layer of input nodes.
-    size_t current_queue_size = q.size();
-    for (size_t i = 0; i < current_queue_size; ++i) {
-      NodeOutputPort p = q.front();
-      q.pop_front();
-      const Node* curr_node = p.first;
-
-      // Skip if the node output is already visited.
-      if (std::find(visited_output_arg_set.begin(), visited_output_arg_set.end(), p) !=
-          visited_output_arg_set.end()) {
-        continue;
-      }
-
-      visited_output_arg_set.insert({p});
-
-      // If the node already visited by from it's other output index, skip it.
-      if (visited_node_set.find(curr_node) != visited_node_set.end()) {
-        continue;
-      }
-
-      visited_node_set.insert(curr_node);
-
-      // Bottom-up search rules.
-      // If current op is entry output node (that generates stashed activations):
-      //   1. If the op is not in recomputable_op_type_to_input_arg_index_map_, skip it.
-      // Otherwise:
-      //  If current op is in allowed list, check its input args, and append the producers' NodeOutputPorts to next_q.
-      //  If current op is NOT in allowed list:
-      //    1). the output does not exist in backward, we cannot find a good solution for so, search terminates.
-      //    2). the output is used in backward, we don't need trace back further, continue searching.
-      auto op_recompute_config_it = recomputable_op_type_to_input_arg_index_map_.find(curr_node->OpType());
-      auto cur_output_arg_name = curr_node->OutputDefs()[p.second]->Name();
-      if (is_first_queue_scan) {
-        // We handle the entry node outputs differently because, we don't want this case falls into and succeed one of
-        // the checks in the other branch
-        // 1. "op is not in recompute op list, but its output is used in backward"
-        // 2. "op is in recompute op list, but its output is used in backward"
-        // (either of the above checks is true for entry node outputs)
-        if (op_recompute_config_it == recomputable_op_type_to_input_arg_index_map_.end()) {
-          early_stop = true;
-          LOGS(logger, VERBOSE) << "Entry Node " << curr_node->Name() << "(" << curr_node->OpType() << ") is **NOT** "
-                                << "in recompute op list, search terminates.";
-          break;
-        }
-      } else {
-        if (op_recompute_config_it == recomputable_op_type_to_input_arg_index_map_.end()) {
-          if (fw_op_output_arg_used_map.at(cur_output_arg_name).second) {
-            LOGS(logger, VERBOSE) << "Node " << curr_node->Name() << "(" << curr_node->OpType() << ") is **NOT** in "
-                                  << "recompute op list, but its output [" << cur_output_arg_name << "] is used in "
-                                  << "backward, we don't need trace bottom-up further. Entry node: "
-                                  << entry_node.Name() << "(" << entry_node.OpType() << ")";
-            continue;
-          } else {
-            early_stop = true;
-            LOGS(logger, VERBOSE) << "Node " << curr_node->Name() << "(" << curr_node->OpType() << ") is **NOT** in "
-                                  << "recompute op list, and its output [" << cur_output_arg_name
-                                  << "] does not exist in backward, search terminates. Entry node: "
-                                  << entry_node.Name() << "(" << entry_node.OpType() << ")";
-            break;
-          }
-        }
-
-        if (fw_op_output_arg_used_map.at(cur_output_arg_name).second) {
-          LOGS(logger, VERBOSE) << "Node " << curr_node->Name() << "(" << curr_node->OpType() << ") "
-                                << "is in recompute op list, while its output [" << cur_output_arg_name
-                                << "] is used in backward, we don't need trace bottom-up further. Entry node: "
-                                << entry_node.Name() << "(" << entry_node.OpType() << ")";
-          continue;
-        }
-      }
-
-      // Append node to the selected graph.
-      if (std::find(nodes.begin(), nodes.end(), curr_node) == nodes.end()) {
-        nodes.push_back(curr_node);
-        LOGS(logger, VERBOSE) << "Node " << curr_node->Name() << "(" << curr_node->OpType()
-                              << ") is added in selected subgraph  ";
-      }
-
-      // This check is not matured now, subject to be changed.
-      float ratio = InputOutputSizeRatio(curr_node);
-      float is_current_node_compromisable = (ratio < 1.f);
-      can_compromise_stashed_activation = can_compromise_stashed_activation || is_current_node_compromisable;
-      if (is_current_node_compromisable) {
-        LOGS(logger, VERBOSE) << "Node " << curr_node->Name() << "(" << curr_node->OpType()
-                              << ") has input/output size " << ratio << " < 1.f, can compromise stashed activation";
-      }
-
-      if (is_current_node_compromisable && compromise_stashed_activation) {
-        LOGS(logger, VERBOSE) << "Node " << curr_node->Name() << "(" << curr_node->OpType() << ") is in "
-                              << "recompute op list, and its output [" << cur_output_arg_name
-                              << "] does not exist in backward, while it meet compromised check, we don't need trace "
-                              << "bottom-up further.";
-        continue;
-      }
-
-      // Iterate all input nodes according to allowed input arg index of the entry node.
-      const auto& input_arg_indices = op_recompute_config_it->second.input_arg_indices;
-      for (auto it = curr_node->InputEdgesBegin(), end = curr_node->InputEdgesEnd(); it != end; ++it) {
-        const Node::EdgeEnd& input_edge = *it;
-        const auto& parent_node = input_edge.GetNode();
-        const auto parent_node_output_index = input_edge.GetSrcArgIndex();
-        const auto current_node_input_index = input_edge.GetDstArgIndex();
-        if (std::find(input_arg_indices.begin(), input_arg_indices.end(), current_node_input_index) !=
-            input_arg_indices.end()) {
-          NodeOutputPort next_p = std::make_pair(&parent_node, parent_node_output_index);
-
-          LOGS(logger, VERBOSE) << "Node " << parent_node.Name() << "(" << parent_node.OpType() << ")'s "
-                                << parent_node_output_index
-                                << "th output [" << parent_node.OutputDefs()[parent_node_output_index]->Name()
-                                << "] is added in recompute search list  ";
-
-          q.push_back(next_p);
-        }
-      }
-    }
-    // After handle all entry node outputs, we set the flag to false.
-    is_first_queue_scan = false;
-  }
-
-  // If input args are not found in bw, but op count exceed MAXIMUM_RECOMPUTE_NODE_COUNT, skip recompute.
-  if (!q.empty() || early_stop) {
-    LOGS(logger, VERBOSE) << "Fail to find a solution for recompute: current node count is " << nodes.size()
-                          << ", queue size: " << q.size() << ", early stop: " << early_stop;
-    nodes.clear();
-  } else {
-    // Re-order the nodes in topological order.
-    std::sort(nodes.begin(), nodes.end(),
-              [&node_index_to_its_order_in_topological_sort_map](const Node*& lhs, const Node*& rhs) {
-                return node_index_to_its_order_in_topological_sort_map.at(lhs->Index()) <
-                       node_index_to_its_order_in_topological_sort_map.at(rhs->Index());
-              });
-  }
-  return Status::OK();
-}
-
-void MemoryOptimizer::CheckNodeForRecompute(const Node& node,
-                                            const ActivationUsedMap& fw_op_output_arg_used_map,
-                                            const InlinedHashMap<NodeIndex, size_t>&
-                                                node_index_to_its_order_in_topological_sort_map,
-                                            const InlinedHashMap<const Node*, InlinedVector<size_t>>&
-                                                candidate_output_args_map,
-                                            SubGraphStores& subgraph_stores,
-                                            const logging::Logger& logger,
-                                            bool compromise_stashed_activation,
-                                            bool& can_compromise_stashed_activation) const {
-  if (recomputable_op_type_to_input_arg_index_map_.find(node.OpType()) ==
-      recomputable_op_type_to_input_arg_index_map_.end()) {
-    return;
-  }
-
-  InlinedVector<const Node*> nodes_in_topological_order;
-  ORT_ENFORCE(SelectRecomputeSubgraph(node, candidate_output_args_map.at(&node),
-                                      fw_op_output_arg_used_map,
-                                      node_index_to_its_order_in_topological_sort_map,
-                                      nodes_in_topological_order, logger,
-                                      compromise_stashed_activation,
-                                      can_compromise_stashed_activation)
-                  .IsOK());
-  if (nodes_in_topological_order.size() == 0) {
-    return;
-  }
-
-  std::string subgraph_str_representation, log_info;
-  NodesInTopoOrderToString(nodes_in_topological_order, subgraph_str_representation, log_info);
-  LOGS(logger, VERBOSE) << "Node " << node.Name() << "(" << node.OpType() << ") can be recomputed" << log_info;
-
-  // Update the subgraph optimization config map - key is the subgraph string representation, value is user config.
-  UserConfig user_config{OptimizationType::None, 0};
-  if (pattern_subgraph_to_user_optimizer_config_map_.find(subgraph_str_representation) !=
-      pattern_subgraph_to_user_optimizer_config_map_.end()) {
-    user_config = pattern_subgraph_to_user_optimizer_config_map_.at(subgraph_str_representation);
-  }
-
-  SubGraphDesc& subgraph_desc =
-      subgraph_stores.Contains(subgraph_str_representation)
-          ? subgraph_stores.GetSubGraphDesc(subgraph_str_representation)
-          : subgraph_stores.CreateSubGraphDesc(subgraph_str_representation, user_config);
-
-  subgraph_desc.total_frequency += 1;
-
-  // Update the subgraph frequency map - key is the subgraph string representation, value is number of appearances.
-  for (size_t output_index : candidate_output_args_map.at(&node)) {
-    auto shape_str = TensorShapeProtoToString(node.OutputDefs()[output_index]->Shape());
-    subgraph_desc.shape_str_frequency[shape_str]++;
-  }
-
-  subgraph_stores.AddSubGraphInstance(&node, nodes_in_topological_order, subgraph_desc);
-
-  return;
-}
-
 Status MemoryOptimizer::CreateRecomputeGraph(Graph& graph,
                                              const InlinedVector<const Node*>& nodes_in_topological_order,
                                              Node*& new_output_node_ptr) const {
@@ -716,8 +217,8 @@ Status MemoryOptimizer::CreateRecomputeGraph(Graph& graph,
 
     // Check whether the node has been recomputed/offloaded or not. Simply check the existence of the first output
     // of the node has its corresponding recompute name or not.
-    // TODO: if there is more optimization types like offload added, we will add corresponding check whether the outputs
-    // already be offloaded or not.
+    // TODO: if there is more optimization types like offload added, we will add a corresponding check
+    // whether the outputs already be offloaded or not.
     if (graph.GetNodeArg(graph_utils::RecomputeName(node_to_duplicate->MutableOutputDefs()[0]->Name())) != nullptr) {
       continue;
     }
diff --git a/orttraining/orttraining/core/optimizer/memory_optimizer.h b/orttraining/orttraining/core/optimizer/memory_optimizer.h
index 1d21c9143f62f..13eb4cdb242f4 100644
--- a/orttraining/orttraining/core/optimizer/memory_optimizer.h
+++ b/orttraining/orttraining/core/optimizer/memory_optimizer.h
@@ -2,163 +2,39 @@
 // Licensed under the MIT License.
 
 #pragma once
-#include <charconv>
+
 #include "core/common/inlined_containers.h"
 #include "core/common/string_utils.h"
 #include "core/optimizer/graph_transformer.h"
+#include "orttraining/core/optimizer/memory_optimizer/common.h"
+#include "orttraining/core/optimizer/memory_optimizer/optimization_planner.h"
+#include "orttraining/core/optimizer/memory_optimizer/recompute_analysis.h"
+#include "orttraining/core/optimizer/memory_optimizer/memory_insight.h"
 
 namespace onnxruntime {
 
 /**
 @Class MemoryOptimizer
 
-Find recomputable subgraphs and enable according to user configs.
+(TODO) move to orttraining/orttraining/core/optimizer/memory_optimizer/ folder.
+
+Find recompute subgraphs and enable them according to user configs. The way we collect subgraphs
+(in orttraining/orttraining/core/optimizer/memory_optimizer/recompute_analysis.h) in brief is:
+1. Find all nodes that generate stashed activations.
+2. For each node, check it data type is supported to recompute
+  a. If yes, add it in the subgraph, and append its input in the queue to scan next;
+  b. otherwise, stop collecting and return the subgraph (could be empty).
+3. Pick up the input node from the queue, and do 2 again. The process ends when the queue is empty or 2.b happens.
+4. Clone the recomputable subgraphs with lower node priority (to execute) and insert them back to the original graph.
 */
 
 class MemoryOptimizer : public GraphTransformer {
  private:
-  using NodeOutputPort = std::pair<const Node*, int>;
-  using ActivationUsedMap = InlinedHashMap<std::string, std::pair<bool, bool>>;
-
-  /**
-   * @brief Level to control allowed operations during subgraph detecting.
-   * Level 0: only allow cheap-to-compute operations.
-   * Level 1: allow more expensive operations.
-   */
-  enum class ProbeLevel {
-    Basic = 0,
-    Advanced = 1,
-    LevelMax = 2,
-  };
-
-  /**
-   * @brief Type of memory reduction techniques.
-   */
-  enum class OptimizationType {
-    None = 0,  // Disabled.
-    Recompute = 1,
-    TypeMax = 2,
-  };
-
-  /**
-   * @brief Type of user config.
-   * type: type of memory reduction techniques.
-   * requested_count: the number of occurrences of a subgraph pattern for alleviation. -1 means apply all.
-   *   One example: if a subgraph pattern is found 3 times, and requested_count is set 2, then the 1st and 2nd subgraph
-   *   in topological order will be applied for alleviation. This is useful to avoid alleviating more memory than
-   *   needed.
-   */
-  struct UserConfig {
-    OptimizationType type;
-    int requested_count;
-  };
-
-  /**
-   * @brief Struct to store properties of a specific subgraph.
-   */
-  struct SubGraphDesc {
-    SubGraphDesc() = default;
-
-    // A string to represent the subgraph, used as a unique "ID" for a unique subgraph.
-    std::string subgraph_representative_str;
-
-    InlinedHashMap<std::string, int> shape_str_frequency;  // shape string to frequency
-    UserConfig user_optimizer_config;
-    int total_frequency{0};  // The occurrence of this subgraph pattern in the graph.
-
-    int applied_count{0};      // The number of times this subgraph pattern has been really applied in this transformer.
-    int skip_count{0};         // The number of times this subgraph instance has been skipped in reversed topological order.
-    float saving_ratio{1.0f};  // For compromised memory saving, the ratio of memory saving.
-  };
-
-  /**
-   * @brief A struct to maintain the information of target subgraphs to optimize.
-   * Imagine we loop all nodes finding recomputable/offload-able subgraphs, we want to store them first.
-   * Afterwards, we optionally pick up some of them to apply optimization according to user configs.
-   *
-   * subgraph_descs is a map from subgraph string representation to its subgraph related configurations.
-   *
-   * _optimization_target_graphs_ is a map from activation producer node pointers to its target optimization subgraph
-   * nodes. For example, if a subgraph Cast+Gelu can be recomputed, we may have a map like:
-   *  key: node pointer of stashed activation producer Gelu; value: node vector {Cast, Gelu,}.
-   *
-   * When we AddSubGraphInstance, we must provider its corresponding subgraph desc in the parameter.
-   * Then we can know for each subgraph instance, what's the subgraph str representation, and what's the optimization
-   * config.
-   */
-  struct SubGraphStores {
-    /**********************************
-    ** subgraph desc section starts **
-    **********************************/
-
-    size_t SubGraphDescCount() const {
-      return subgraph_descs.size();
-    }
-
-    bool Contains(std::string_view subgraph_str) const {
-      return subgraph_descs.find(subgraph_str) != subgraph_descs.end();
-    }
-
-    SubGraphDesc& GetSubGraphDesc(std::string_view subgraph_string) {
-      ORT_ENFORCE(Contains(subgraph_string), "Subgraph string not found.", subgraph_string);
-      return subgraph_descs.at(subgraph_string);
-    }
-
-    SubGraphDesc& CreateSubGraphDesc(const std::string& subgraph_string,
-                                     UserConfig& config) {
-      ORT_ENFORCE(!Contains(subgraph_string), "Subgraph string already exists.", subgraph_string);
-      subgraph_descs[subgraph_string].user_optimizer_config = config;
-      subgraph_descs[subgraph_string].subgraph_representative_str = subgraph_string;
-      return subgraph_descs[subgraph_string];
-    }
-
-    /**********************************************************************
-    ** subgraph desc section ends, and subgraph instance section starts. **
-    ***********************************************************************/
-
-    // Pair of <nodes in topological order, a string to represent the subgraph>.
-    using GraphInstanceInfo = std::pair<InlinedVector<const Node*>, std::string>;
-
-    void AddSubGraphInstance(const Node* node,
-                             const InlinedVector<const Node*>& nodes_in_topological_order,
-                             const SubGraphDesc& subgraph_desc) {
-      ORT_ENFORCE(_optimization_target_graphs_.find(node) == _optimization_target_graphs_.end());
-      _optimization_target_graphs_[node] = std::make_pair(nodes_in_topological_order,
-                                                          subgraph_desc.subgraph_representative_str);
-    }
-
-    bool ContainsSubGraphInstance(const Node* node) const {
-      return _optimization_target_graphs_.find(node) != _optimization_target_graphs_.end();
-    }
-
-    GraphInstanceInfo& GetSubGraphInstance(const Node* node) {
-      ORT_ENFORCE(_optimization_target_graphs_.find(node) != _optimization_target_graphs_.end());
-      return _optimization_target_graphs_[node];
-    }
-
-    /***********************************
-    ** subgraph instance section ends **
-    ***********************************/
-
-    InlinedHashMap<std::string /*subgraph_representative_str*/, SubGraphDesc> subgraph_descs;
-    InlinedHashMap<const Node*, GraphInstanceInfo> _optimization_target_graphs_;
-  };
-
-  /**
-   * @brief Used to define per-op recompute config.
-   *
-   */
-  struct AllowedRecomputeNodeConfig {
-    InlinedVector<int> input_arg_indices;  // input index to iterate further (bottom up)
-  };
-
  public:
-  MemoryOptimizer(const std::string& enable_memory_optimizer, const std::string& level)
+  MemoryOptimizer(const std::string& memory_optimizer_config, const std::string& level)
       : GraphTransformer("MemoryOptimizer") {
     // Parse user defined configs.
-    ORT_ENFORCE(ParseConfigFromString(enable_memory_optimizer, level).IsOK());
-
-    RegisterAllowedRecomputeOps();
+    ORT_ENFORCE(ParseConfigFromString(memory_optimizer_config, level).IsOK());
   }
 
   Status ApplyImpl(Graph& graph, bool& modified, int graph_level, const logging::Logger& logger) const override;
@@ -166,35 +42,7 @@ class MemoryOptimizer : public GraphTransformer {
   bool ShouldOnlyApplyOnce() const override { return true; }
 
  private:
-  Status ParseConfigFromString(const std::string& enable_memory_optimizer, const std::string& level);
-
-  /**
-   * @brief Prepare info including activation usage, node usage in fw and bw.
-   *
-   * @param graph Graph to iterate.
-   * @param fw_op_output_arg_used_map Collected activation usage mapping.
-   *   - key: node arg name
-   *   - value: a pair of bool, representing whether the activation is used by forward nodes or by backward nodes.
-   * @return int64_t value The boundary op (for example YieldOp) order in topological order. If no boundary op found,
-   *  return -1;
-   */
-  int64_t PrepareForTransformation(const Graph& graph,
-                                   ActivationUsedMap& fw_op_output_arg_used_map,
-                                   InlinedHashMap<NodeIndex, size_t>&
-                                       node_index_to_its_order_in_topological_sort_map) const;
-  /**
-   * @brief Find all stashed activations, e.g. activations used by forward operators and backward operators.
-   *
-   * @param graph Graph to iterate.
-   * @param fw_op_output_arg_used_map Activation usage mapping.
-   * @param candidate_output_args_map Candidate activations, which are consumed by both fw and bw ops.
-   * @return Status
-   */
-  Status GetStashedActivationCandidates(
-      const Graph& graph,
-      const InlinedHashMap<std::string, std::pair<bool, bool>>& fw_op_output_arg_used_map,
-      InlinedHashMap<const Node*, InlinedVector<size_t>>& candidate_output_args_map,
-      const logging::Logger& logger) const;
+  Status ParseConfigFromString(const std::string& memory_optimizer_config, const std::string& level);
 
   /**
    * @brief Apply graph modifications based on user configs.
@@ -212,28 +60,15 @@ class MemoryOptimizer : public GraphTransformer {
    * @return false
    */
   bool ModifyGraph(Graph& graph,
-                   const InlinedHashMap<NodeIndex, size_t>& node_index_to_its_order_in_topological_sort_map,
-                   const InlinedHashMap<const Node*, InlinedVector<size_t>>& candidate_output_args_map,
+                   const InlinedHashMap<NodeIndex, ptrdiff_t>&
+                       node_index_to_its_order_in_topological_sort_map,
+                   const InlinedHashMap<const Node*, InlinedVector<size_t>>&
+                       candidate_output_args_map,
                    const logging::Logger& logger,
-                   int64_t boundary_op_order_in_topological_sort,
-                   SubGraphStores& subgraph_stores,
-                   Node* node) const;
-
-  /**
-   * @brief Convert the recompute subgraph to its string representation.
-   *
-   * @param nodes_in_topological_order The subgraph nodes in topological order.
-   * @param subgraph_string_representation Returns subgraph string representation.
-   * @param log_info Returns log info for users.
-   */
-  void NodesInTopoOrderToString(const InlinedVector<const Node*>& nodes_in_topological_order,
-                                std::string& subgraph_string_representation,
-                                std::string& log_info) const;
-
-  /**
-   * @brief Convert optimization type to string.
-   */
-  std::string UserConfigToString(const UserConfig& config) const;
+                   ptrdiff_t boundary_op_order_in_topological_sort,
+                   Node* node,
+                   std::shared_ptr<optimizer::memory_optimizer::NodeOptimizationPlanBase>& node_plan,
+                   std::shared_ptr<optimizer::memory_optimizer::ClusterApplyContext>& apply_context) const;
 
   /**
    * @brief Summarize transformation details.
@@ -241,72 +76,16 @@ class MemoryOptimizer : public GraphTransformer {
    * @param stashed_activation_statistics statistics around stashed activation memory saving.
    * @return void
    */
-  void PrintSummary(const SubGraphStores& recompute_stores,
-                    const SubGraphStores& recompute_with_compromise_stores,
+  void PrintSummary(const optimizer::memory_optimizer::MemoryOptimizationPlanner& mem_opt_stats,
+                    const InlinedHashMap<const Node*,
+                                         std::shared_ptr<optimizer::memory_optimizer::ClusterApplyContext>>&
+                        node_to_apply_contexts_map,
                     const logging::Logger& logger) const;
 
   /**************************************************
    ** Recompute related function definition starts **
    *************************************************/
 
-  void RegisterAllowedRecomputeOps();
-
-  /**
-   * @brief Find recomputable subgraphs (has at least one nodes, at most MAXIMUM_RECOMPUTE_NODE_COUNT nodes).
-   *
-   * @param node The entry node to start the subgraph matching (bottom-up), usually the last node of found subgraphs.
-   * @param node_output_index_candidates Candidate output indices of "node", which are consumed by both fw and bw ops.
-   * @param fw_op_output_arg_used_map The activation usage (in fw and bw) mapping.
-   * @param node_index_to_its_order_in_topological_sort_map The mapping of node index to its order in topological sort.
-   *   Used to re-order the collected subgraph nodes.
-   * @param nodes_in_topological_order Collected vector of nodes of found subgraph, in the order of the topological
-   *  sorted.
-   * @param logger Logger.
-   * @param compromise_stashed_activation Whether to compromise stashed activation, e.g. if we cannot find a
-   * recomputable subgraph to save a stashed activation, we can compromise to find a recomputable subgraph to reduce the
-   * size of stashed activation.
-   * @param can_compromise_stashed_activation A bool return value, to indicate there is opportunaties for finding a
-   * compromised subgraph.
-   * @return Status
-   */
-  Status SelectRecomputeSubgraph(const Node& node,
-                                 const InlinedVector<size_t>& node_output_index_candidates,
-                                 const ActivationUsedMap& fw_op_output_arg_used_map,
-                                 const InlinedHashMap<NodeIndex, size_t>&
-                                     node_index_to_its_order_in_topological_sort_map,
-                                 InlinedVector<const Node*>& nodes_in_topological_order,
-                                 const logging::Logger& logger,
-                                 bool compromise_stashed_activation,
-                                 bool& can_compromise_stashed_activation) const;
-
-  /**
-   * @brief For the node producing stashed activation, check whether a recomputable subgraph can be found or not.
-   *
-   * @param node The entry node to start the subgraph matching (bottom-up), usually the last node of found subgraphs.
-   * @param fw_op_output_arg_used_map The activation usage (in fw and bw) mapping.
-   * @param node_index_to_its_order_in_topological_sort_map The mapping of node index to its order in topological sort.
-   *   Used to re-order the collected subgraph nodes.
-   * @param candidate_output_args_map A map from node to its candidate activations, which are consumed by both fw and
-   *  bw ops.
-   * @param subgraph_stores A store to maintain all found subgraphs.
-   * @param logger Logger.
-   * @param compromise_stashed_activation Whether to compromise stashed activation, e.g. if we cannot find a
-   * recomputable subgraph to save a stashed activation, we can compromise to find a recomputable subgraph to reduce the
-   * size of stashed activation.
-   * @param can_compromise_stashed_activation A bool return value, to indicate there is opportunaties for finding a
-   * compromised subgraph.
-   */
-  void CheckNodeForRecompute(const Node& node,
-                             const ActivationUsedMap& fw_op_output_arg_used_map,
-                             const InlinedHashMap<NodeIndex, size_t>&
-                                 node_index_to_its_order_in_topological_sort_map,
-                             const InlinedHashMap<const Node*, InlinedVector<size_t>>&
-                                 candidate_output_args_map,
-                             SubGraphStores& subgraph_stores,
-                             const logging::Logger& logger,
-                             bool compromise_stashed_activation,
-                             bool& can_compromise_stashed_activation) const;
-
   /**
    * @brief Duplicate nodes to create a recompute subgraph.
    *
@@ -323,12 +102,10 @@ class MemoryOptimizer : public GraphTransformer {
    ** Recompute related function definition ends   **
    *************************************************/
 
-  // The op types that are supported predefined.
-  InlinedHashMap<std::string, AllowedRecomputeNodeConfig> recomputable_op_type_to_input_arg_index_map_;
   // User enabled map of the subgraph string representation to the alleviation type.
-  InlinedHashMap<std::string, UserConfig> pattern_subgraph_to_user_optimizer_config_map_;
+  InlinedHashMap<std::string, optimizer::memory_optimizer::UserConfig> pattern_subgraph_to_user_optimizer_config_map_;
   std::string optimizer_config_;
-  ProbeLevel recompute_probe_level_;
+  optimizer::memory_optimizer::ProbeLevel recompute_probe_level_;
 };
 
 }  // namespace onnxruntime
diff --git a/orttraining/orttraining/core/optimizer/memory_optimizer/common.cc b/orttraining/orttraining/core/optimizer/memory_optimizer/common.cc
new file mode 100644
index 0000000000000..2291d7e4f37a6
--- /dev/null
+++ b/orttraining/orttraining/core/optimizer/memory_optimizer/common.cc
@@ -0,0 +1,149 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include <charconv>
+#include <vector>
+#include <utility>
+
+#include "orttraining/core/optimizer/memory_optimizer/common.h"
+#include "core/graph/graph_utils.h"
+#include "core/optimizer/utils.h"
+#include "core/graph/graph_viewer.h"
+#include "core/framework/tensorprotoutils.h"
+
+#include "core/common/string_utils.h"
+
+namespace onnxruntime::optimizer::memory_optimizer {
+
+namespace {
+
+constexpr const char empty_dim_param_placeholder[] = "empty_dim_param";
+static size_t index_empty_dim = 0;
+
+bool TensorShapeProtoToDimParamVector(const ONNX_NAMESPACE::TensorShapeProto* shape,
+                                      std::vector<std::string>& dim_params) {
+  bool has_unknown_dim = false;
+  for (int dim_index = 0; dim_index < shape->dim_size(); dim_index++) {
+    auto dim = shape->dim(dim_index);
+    if (utils::HasDimValue(dim)) {
+      dim_params.push_back(std::to_string(dim.dim_value()));
+    } else {
+      std::string trimmed_dim_param = utils::TrimString(dim.dim_param());
+      if (trimmed_dim_param.empty()) {
+        has_unknown_dim = true;
+        dim_params.push_back(empty_dim_param_placeholder + std::to_string(index_empty_dim++));
+      } else {
+        dim_params.push_back(trimmed_dim_param);
+      }
+    }
+  }
+
+  if (shape->dim_size() == 0) {
+    dim_params.push_back("(1)");  // Scalar
+  }
+
+  return has_unknown_dim;
+}
+
+bool HasUnknowDimension(const ONNX_NAMESPACE::TensorShapeProto* shape) {
+  if (shape == nullptr) {
+    return true;
+  }
+
+  std::vector<std::string> dim_params;
+  return TensorShapeProtoToDimParamVector(shape, dim_params);
+}
+
+std::string TensorShapeProtoToString(const ONNX_NAMESPACE::TensorShapeProto* shape) {
+  if (shape == nullptr) {
+    return "unknown";
+  }
+
+  std::vector<std::string> dim_params;
+  TensorShapeProtoToDimParamVector(shape, dim_params);
+
+  std::ostringstream oss;
+  oss << "(";
+  for (auto it = dim_params.begin(); it != dim_params.end(); ++it) {
+    oss << "(" << *it << ")";
+    if (it != (dim_params.end() - 1)) {
+      oss << "*";
+    }
+  }
+  oss << ")";
+
+  return oss.str();
+}
+
+}  // namespace
+
+std::string GetTensorElemCountInSymbolicString(const Node* node, size_t output_index) {
+  const auto& output_def = node->OutputDefs()[output_index];
+  const auto shape = output_def->Shape();
+
+  std::string shape_str = TensorShapeProtoToString(shape);
+
+  // If the output shape contains unknown dimension, we try to get the shape from input.
+  // though the input shape might be different, but its elem size and count should be the same
+  // with the output.
+  if (node->OpType() == "Reshape" && HasUnknowDimension(shape) &&
+      !HasUnknowDimension(node->InputDefs()[0]->Shape())) {
+    shape_str = TensorShapeProtoToString(node->InputDefs()[0]->Shape());
+  }
+
+  return shape_str;
+}
+
+std::string OptimizationTypeToString(OptimizationType type) {
+  switch (type) {
+    case OptimizationType::None:
+      return "None";
+    case OptimizationType::Recompute:
+      return "Recompute";
+    case OptimizationType::RecomputeWithCompromise:
+      return "RecomputeWithCompromise";
+    default:
+      ORT_THROW("Unknown optimization type.");
+  }
+}
+
+int ParseIntValueFromString(std::string_view str) {
+  int int_value = 0;
+  auto result = std::from_chars(str.data(), str.data() + str.size(), int_value);
+  ORT_ENFORCE(result.ec != std::errc::invalid_argument, "Fail to convert to int from string: ", str);
+  return int_value;
+}
+
+Status ParseConfigFromString(std::string_view memory_optimization_config,
+                             InlinedHashMap<std::string, UserConfig>& cluster_id_to_config_map) {
+  if (!memory_optimization_config.empty()) {
+    const auto user_config_strs = utils::SplitString(memory_optimization_config, ",");
+    for (const auto& user_config_str : user_config_strs) {
+      const auto user_config = utils::SplitString(user_config_str, ":");
+      ORT_RETURN_IF_NOT(user_config.size() == 3,
+                        "User config should be in format of SubgraphStr:OptimizationType:RequestApplyCount.");
+
+      const std::string subgraph_string_representation(user_config[0]);
+      int optimization_type_int = ParseIntValueFromString(user_config[1]);
+      int requested_apply_count = ParseIntValueFromString(user_config[2]);
+      ORT_RETURN_IF_NOT(optimization_type_int <
+                                static_cast<int>(OptimizationType::TypeMax) &&
+                            optimization_type_int >= 0,
+                        "Invalid optimization type specified for subgraph: ",
+                        subgraph_string_representation);
+
+      ORT_RETURN_IF_NOT(requested_apply_count == -1 || requested_apply_count >= 0,
+                        "Invalid requested_apply_count specified for subgraph: ", requested_apply_count);
+
+      // At this point, subgraph_string_representation is a pattern graph string representation.
+      // If duplicated subgraph_string_representation is found in user config, the last one will be used.
+      cluster_id_to_config_map[subgraph_string_representation] = UserConfig{
+          static_cast<OptimizationType>(optimization_type_int),
+          requested_apply_count};
+    }
+  }
+
+  return Status::OK();
+}
+
+}  // namespace onnxruntime::optimizer::memory_optimizer
diff --git a/orttraining/orttraining/core/optimizer/memory_optimizer/common.h b/orttraining/orttraining/core/optimizer/memory_optimizer/common.h
new file mode 100644
index 0000000000000..85e2bf4f5d683
--- /dev/null
+++ b/orttraining/orttraining/core/optimizer/memory_optimizer/common.h
@@ -0,0 +1,76 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+
+#include "core/common/common.h"
+#include "core/common/logging/logging.h"
+#include "core/common/inlined_containers_fwd.h"
+#include "core/graph/basic_types.h"
+#include "core/framework/data_types.h"
+#include "core/graph/graph_viewer.h"
+
+namespace onnxruntime::optimizer::memory_optimizer {
+
+// Uncomment for debugging Memory optimizer (MO).
+// #define MO_NEED_LOG_DEBUG_INFO 1
+
+#ifndef MO_LOG_DEBUG_INFO
+#ifdef MO_NEED_LOG_DEBUG_INFO
+#define MO_LOG_DEBUG_INFO(logger, message) LOGS(logger, WARNING) << message
+#else
+#define MO_LOG_DEBUG_INFO(logger, message) \
+  ORT_UNUSED_PARAMETER(logger);            \
+  do {                                     \
+  } while (0)
+#endif
+#endif
+
+using NodeOutputPort = std::pair<const Node*, size_t>;
+using ActivationUsedMap = InlinedHashMap<std::string, std::pair<bool, bool>>;
+
+/**
+ * @brief Type of memory reduction techniques.
+ */
+enum class OptimizationType {
+  None = 0,  // Disabled.
+  Recompute = 1,
+  RecomputeWithCompromise = 2,
+  TypeMax = 3,
+};
+
+std::string OptimizationTypeToString(OptimizationType type);
+
+/**
+ * @brief Type of user config.
+ * type: type of memory reduction techniques.
+ * requested_count: the number of occurrences of a subgraph pattern for alleviation. -1 means apply all.
+ *   One example: if a subgraph pattern is found 3 times, and requested_count is set 2, then the 1st and 2nd subgraph
+ *   in topological order will be applied for alleviation. This is useful to avoid alleviating more memory than
+ *   needed.
+ */
+struct UserConfig {
+  OptimizationType type;
+  int requested_count;
+};
+
+/**
+ * @brief Get total element count inn format of a symbolic string.
+ *
+ * @param node The node to get element count.
+ * @param output_index The output index of the node.
+ * @return std::string
+ */
+std::string GetTensorElemCountInSymbolicString(const Node* node, size_t output_index);
+
+int ParseIntValueFromString(std::string_view str);
+
+Status ParseConfigFromString(std::string_view memory_optimization_config,
+                             InlinedHashMap<std::string, UserConfig>& cluster_id_to_config_map);
+
+}  // namespace onnxruntime::optimizer::memory_optimizer
diff --git a/orttraining/orttraining/core/optimizer/memory_optimizer/memory_insight.cc b/orttraining/orttraining/core/optimizer/memory_optimizer/memory_insight.cc
new file mode 100644
index 0000000000000..60f62a9881ef4
--- /dev/null
+++ b/orttraining/orttraining/core/optimizer/memory_optimizer/memory_insight.cc
@@ -0,0 +1,763 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include <algorithm>
+#include <iomanip>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "core/graph/graph_utils.h"
+#include "core/graph/graph_viewer.h"
+#include "orttraining/core/optimizer/memory_optimizer/common.h"
+#include "orttraining/core/optimizer/memory_optimizer/optimization_planner.h"
+#include "orttraining/core/optimizer/memory_optimizer/recompute_analysis.h"
+#include "orttraining/core/optimizer/memory_optimizer/memory_insight.h"
+
+namespace onnxruntime::optimizer::memory_optimizer {
+
+// Placeholder string for table row separator, which is used to be replaced by table row separator finally.
+constexpr const char kTableRowSeparator[] = "TABLE_SEPARATOR_PLACEHOLDER";
+// Placeholder string for table border, which is used to be replaced by table border finally.
+constexpr const char kTableBorder[] = "TABLE_BORDER_PLACEHOLDER";
+
+// The max length of the first column in the table.
+constexpr const int kFirstColumnWidth = 7;
+// The max length of left part (e.g. title) in the second column.
+constexpr const int kTitleWidthInSecondColumn = 15;
+
+/**
+ * @brief Prepare info including activation usage, node usage in fw and bw.
+ *
+ * @param graph Graph to iterate.
+ * @param boundary_op_order_in_topological_sort index of the boundary op between fw and bw.
+ * @param node_index_to_its_order_in_topological_sort_map The mapping of node index to its order in topological sort.
+ * @param fw_op_output_arg_used_map Collected activation usage mapping.
+ *   - key: node arg name
+ *   - value: a pair of bool, representing whether the activation is used by forward nodes or by backward nodes.
+ * @param is_forward_nodes Collected node is forward pass op mapping.
+ */
+void GetForwardOutputUsageMap(const GraphViewer& graph_viewer,
+                              const ptrdiff_t boundary_op_order_in_topological_sort,
+                              const InlinedHashMap<NodeIndex, size_t>&
+                                  node_index_to_its_order_in_topological_sort_map,
+                              ActivationUsedMap& fw_op_output_arg_used_map,
+                              InlinedHashMap<const Node*, bool>& is_forward_nodes) {
+  ORT_ENFORCE(boundary_op_order_in_topological_sort >= 0);
+  const auto& node_ids = graph_viewer.GetNodesInTopologicalOrder();
+  is_forward_nodes.clear();
+  is_forward_nodes.reserve(node_ids.size());
+
+  auto is_forward_pass_operator = [](ptrdiff_t op_order_in_topological_sort,
+                                     ptrdiff_t boundary_op_order_in_topological_sort) -> bool {
+    return op_order_in_topological_sort <= boundary_op_order_in_topological_sort;
+  };
+
+  fw_op_output_arg_used_map.clear();
+  fw_op_output_arg_used_map.reserve(node_ids.size());
+  for (size_t i = 0; i < node_ids.size(); ++i) {
+    const Node* p_node = graph_viewer.GetNode(node_ids[i]);
+    if (p_node == nullptr /* skip removed nodes*/) {
+      continue;
+    }
+
+    const Node& node = *p_node;
+
+    bool is_forward_op = is_forward_pass_operator(static_cast<ptrdiff_t>(i), boundary_op_order_in_topological_sort);
+    if (!is_forward_op) {
+      is_forward_nodes[p_node] = false;
+      continue;
+    }
+
+    is_forward_nodes[p_node] = true;
+
+    for (auto& output_arg : node.OutputDefs()) {
+      if (!output_arg->Exists() || output_arg->Name().empty()) {
+        continue;
+      }
+
+      bool used_in_fw = false;
+      bool used_in_bw = false;
+      for (auto& consumer_node : graph_viewer.GetConsumerNodes(output_arg->Name())) {
+        ORT_ENFORCE(consumer_node != nullptr, "Consumer node should not be null.");
+        auto it = node_index_to_its_order_in_topological_sort_map.find(consumer_node->Index());
+        ORT_ENFORCE(it !=
+                        node_index_to_its_order_in_topological_sort_map.end(),
+                    "Consumer node should be in topological order map.");
+        size_t consumer_node_index_in_topological_order = it->second;
+        if (is_forward_pass_operator(static_cast<ptrdiff_t>(consumer_node_index_in_topological_order),
+                                     boundary_op_order_in_topological_sort)) {
+          used_in_fw = true;
+        } else {
+          used_in_bw = true;
+        }
+      }
+
+      ORT_ENFORCE(fw_op_output_arg_used_map.find(output_arg->Name()) == fw_op_output_arg_used_map.end(),
+                  "Duplicated output arg found named: ", output_arg->Name());
+      fw_op_output_arg_used_map.insert({{output_arg->Name(), std::make_pair(used_in_fw, used_in_bw)}});
+    }
+  }
+}
+
+/**
+ * @brief Find all stashed activations, e.g. activations used by forward operators and backward operators.
+ *
+ * @param graph_viewer Graph to iterate.
+ * @param boundary_op_order_in_topological_sort The order of the boundary op in the topological sort.
+ * @param fw_op_output_arg_used_map Activation usage mapping.
+ * @param candidate_output_args_map Candidate activations, which are consumed by both fw and bw ops.
+ * @param is_forward_nodes Whether a node is a forward node.
+ * @param logger Logger.
+ * @return Status
+ */
+
+Status GetStashedActivationCandidates(const GraphViewer& graph_viewer,
+                                      const ptrdiff_t boundary_op_order_in_topological_sort,
+                                      ActivationUsedMap& fw_op_output_arg_used_map,
+                                      InlinedHashMap<const Node*, InlinedVector<size_t>>&
+                                          candidate_output_args_map,
+                                      InlinedHashMap<const Node*, bool>& is_forward_nodes,
+                                      const logging::Logger& logger) {
+  if (boundary_op_order_in_topological_sort < 0) {
+    LOGS(logger, VERBOSE) << "No boundary op found. Skip memory optimization.";
+    return Status::OK();
+  }
+
+  const auto& node_ids = graph_viewer.GetNodesInTopologicalOrder();
+
+  InlinedHashMap<NodeIndex, size_t> node_index_to_its_order_in_topological_sort_map;
+  for (size_t i = 0; i < node_ids.size(); ++i) {
+    const Node* p_node = graph_viewer.GetNode(node_ids[i]);
+    if (p_node == nullptr) { /* skip removed nodes*/
+      continue;
+    }
+
+    node_index_to_its_order_in_topological_sort_map[p_node->Index()] = i;
+  }
+
+  GetForwardOutputUsageMap(graph_viewer, boundary_op_order_in_topological_sort,
+                           node_index_to_its_order_in_topological_sort_map,
+                           fw_op_output_arg_used_map,
+                           is_forward_nodes);
+
+  for (auto& kv : fw_op_output_arg_used_map) {
+    // used by fw and bw, then it is a candidate.
+    if (kv.second.first && kv.second.second) {
+      const Node* n = graph_viewer.GetProducerNode(kv.first);
+      ORT_ENFORCE(n, "Activation should have a producer node");
+      size_t k = 0;
+      for (k = 0; k < n->OutputDefs().size(); ++k) {
+        if (n->OutputDefs()[k]->Name().compare(kv.first) == 0) {
+          break;
+        }
+      }
+
+      if (std::find(candidate_output_args_map[n].begin(), candidate_output_args_map[n].end(), k) !=
+          candidate_output_args_map[n].end()) {
+        ORT_ENFORCE(false, "Duplicated candidate output found.");
+      }
+
+      candidate_output_args_map[n].push_back(k);
+      LOGS(logger, VERBOSE) << "Find candidate output named [" << kv.first << "] of Node " << n->Name() << "("
+                            << n->OpType() << ")";
+    }
+  }
+
+  return Status::OK();
+}
+
+Status FindORTModuleMemoryOpportunity(const GraphViewer& graph_viewer,
+                                      const ProbeLevel probe_level,
+                                      const logging::Logger& logger,
+                                      InlinedHashMap<NodeIndex, ptrdiff_t>&
+                                          node_index_to_its_order_in_topological_sort_map,
+                                      ptrdiff_t& yield_op_order_in_topological_sort,
+                                      InlinedHashMap<const Node*, InlinedVector<size_t>>&
+                                          candidate_output_args_map,
+                                      MemoryOptimizationPlanner& memory_opt_planner) {
+  const auto& node_ids = graph_viewer.GetNodesInTopologicalOrder();
+
+  // Find boundary ops between forward and backward pass, currently, it's limited to YieldOp.
+  yield_op_order_in_topological_sort = -1;
+  for (size_t i = 0; i < node_ids.size(); ++i) {
+    const Node* p_node = graph_viewer.GetNode(node_ids[i]);
+    if (p_node == nullptr) { /* skip removed nodes*/
+      continue;
+    }
+
+    if (p_node->OpType() == "YieldOp") {
+      if (yield_op_order_in_topological_sort != -1) {
+        return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "There are multiple YieldOps in the graph, node: ",
+                               p_node->Name(), " is the second one.");
+      }
+      yield_op_order_in_topological_sort = static_cast<ptrdiff_t>(i);
+    }
+
+    node_index_to_its_order_in_topological_sort_map[p_node->Index()] = static_cast<ptrdiff_t>(i);
+  }
+
+  ActivationUsedMap fw_op_output_arg_used_map;
+
+  InlinedHashMap<const Node*, bool> is_forward_nodes;
+  ORT_RETURN_IF_ERROR(GetStashedActivationCandidates(graph_viewer,
+                                                     yield_op_order_in_topological_sort,
+                                                     fw_op_output_arg_used_map,
+                                                     candidate_output_args_map,
+                                                     is_forward_nodes,
+                                                     logger));
+
+  // The first pass - find the candidate subgraphs.
+  for (int i = static_cast<int>(node_ids.size()) - 1; i >= 0; --i) {
+    const Node* p_node = graph_viewer.GetNode(node_ids[i]);
+    if (p_node == nullptr) {
+      continue;
+    }
+
+    if (candidate_output_args_map.find(p_node) == candidate_output_args_map.end()) {
+      continue;
+    }
+
+    bool can_compromise_stashed_activation = false;
+    std::unique_ptr<NodeRecomputePlan> recompute_plan =
+        CheckNodeForRecompute(*p_node,
+                              probe_level,
+                              fw_op_output_arg_used_map,
+                              node_index_to_its_order_in_topological_sort_map,
+                              candidate_output_args_map,
+                              logger, false,
+                              can_compromise_stashed_activation);
+    if (recompute_plan != nullptr) {
+      memory_opt_planner.AddNodeOptimizationPlan(p_node, std::move(recompute_plan));
+    }
+
+    if (can_compromise_stashed_activation) {
+      LOGS(logger, VERBOSE) << "Searching Node " << p_node->Name() << "(" << p_node->OpType()
+                            << ") for compromised recompute";
+      // If the subgraph recompute can save memory by comprising the assumption - recompute graphs' input must exist
+      // during backward pass, then we can consider to recompute them.
+      std::unique_ptr<NodeRecomputePlan> recompute_with_compromise_plan =
+          CheckNodeForRecompute(*p_node, probe_level, fw_op_output_arg_used_map,
+                                node_index_to_its_order_in_topological_sort_map,
+                                candidate_output_args_map,
+                                logger, true,
+                                can_compromise_stashed_activation);
+      if (recompute_with_compromise_plan != nullptr) {
+        memory_opt_planner.AddNodeOptimizationPlan(p_node, std::move(recompute_with_compromise_plan));
+      }
+    }
+  }
+
+  return Status::OK();
+}
+
+void GetMemoryRecordsGroupedByNodeClusterId(const MemoryOptimizationPlanner& memory_opt_planner,
+                                            const NodeToClusterApplyContextMap& node_to_apply_contexts_map,
+                                            std::vector<std::pair<std::string, MemoryRecord>>& generated_records) {
+  // Group by node cluster id, generate memory record.
+  InlinedHashMap<std::string, MemoryRecord> records;
+  const auto& node_to_optimization_plan_map = memory_opt_planner.GetNodeToOptimizationPlanMap();
+  for (const auto& node_to_optimization_plan : node_to_optimization_plan_map) {
+    const auto& node = node_to_optimization_plan.first;
+    const auto& node_plans = node_to_optimization_plan.second;
+    const std::string node_cluster_id = memory_opt_planner.GenerateNodeClusterId(node);
+
+    std::pair<InlinedHashMap<std::string, MemoryRecord>::iterator, bool> insert_result =
+        records.insert({node_cluster_id, MemoryRecord()});
+    bool already_exist = !insert_result.second;
+    auto& record = insert_result.first->second;
+    record.freq++;
+
+    // Collect more information for display.
+    for (auto& plan : node_plans) {
+      // Same node cluster id, plans might still have different reuse_buffer pattern, so we need to collect all of them.
+      if (plan->reuse_buffers.size() > 0) {
+        gsl::span<const size_t> output_indices = plan->GetActivationOutputIndices();
+        for (auto output_index : output_indices) {
+          bool is_output_reusing_buffers = plan->reuse_buffers.find(output_index) != plan->reuse_buffers.end();
+          if (plan->GetOptimizationType() == OptimizationType::RecomputeWithCompromise) {
+            if (is_output_reusing_buffers) {
+              record.output_port_reuse_recompute_with_compromise_count[output_index] += 1;
+            }
+          } else if (plan->GetOptimizationType() == OptimizationType::Recompute) {
+            if (is_output_reusing_buffers) {
+              record.output_port_reuse_recompute_count[output_index] += 1;
+            }
+          }
+        }
+      }
+
+      // For other infos that are guaranteed identity by cluster id, just skip collecting.
+      if (already_exist) {
+        continue;
+      }
+
+      if (plan->GetOptimizationType() == OptimizationType::RecomputeWithCompromise) {
+        record.recompute_with_compromise_subgraph_str =
+            dynamic_cast<NodeRecomputePlan*>(plan.get())->GetNodesInTopoOrderStr();
+      } else if (plan->GetOptimizationType() == OptimizationType::Recompute) {
+        record.recompute_subgraph_str = dynamic_cast<NodeRecomputePlan*>(plan.get())->GetNodesInTopoOrderStr();
+      }
+
+      gsl::span<const size_t> output_indices = plan->GetActivationOutputIndices();
+      for (auto output_index : output_indices) {
+        const auto& output_def = node->OutputDefs()[output_index];
+        MLDataType ml_data_type = DataTypeImpl::TypeFromProto(*output_def->TypeAsProto());
+        ORT_ENFORCE(ml_data_type->IsTensorType(), "ml_type must be a tensor type, but it is ",
+                    DataTypeImpl::ToString(ml_data_type));
+        const TensorTypeBase* tensor_type_base = ml_data_type->AsTensorType();
+        ORT_ENFORCE(nullptr != tensor_type_base);
+        MLDataType elt_type = tensor_type_base->GetElementType();
+
+        const auto byte_count_per_element = elt_type->Size();
+        if (plan->GetOptimizationType() == OptimizationType::RecomputeWithCompromise) {
+          record.compromise_recomputed_outputs.emplace_back(
+              output_index,
+              GetTensorElemCountInSymbolicString(node, output_index),
+              byte_count_per_element,
+              plan->GetSaveRatio());
+
+        } else if (plan->GetOptimizationType() == OptimizationType::Recompute) {
+          record.recomputed_outputs.emplace_back(output_index,
+                                                 GetTensorElemCountInSymbolicString(node, output_index),
+                                                 byte_count_per_element,
+                                                 plan->GetSaveRatio());
+        }
+      }
+    }
+  }
+
+  // Sort by feq and then by record key, to make sure the output is deterministic.
+  InlinedVector<std::pair<int, std::string>> freq_to_record_key;
+  for (const auto& p : records) {
+    freq_to_record_key.push_back({p.second.freq, p.first});
+  }
+
+  std::sort(freq_to_record_key.begin(), freq_to_record_key.end(), [](auto& left, auto& right) {
+    if (left.first == right.first) {
+      return left.second.compare(right.second) > 0;
+    }
+    return left.first > right.first;
+  });
+
+  for (const auto& p : freq_to_record_key) {
+    const std::string record_key = p.second;
+    generated_records.push_back({record_key, records[record_key]});
+  }
+
+  // If apply context is provided, also update the actual applied count.
+  if (node_to_apply_contexts_map.size() > 0) {
+    InlinedHashMap<std::string, MemoryRecord*> node_cluster_id_to_record_map;
+    for (auto& p : generated_records) {
+      node_cluster_id_to_record_map[p.first] = &p.second;
+    }
+
+    for (const auto& p : node_to_apply_contexts_map) {
+      const auto& node = p.first;
+      const auto& apply_context = p.second;
+      std::string node_cluster_id = memory_opt_planner.GenerateNodeClusterId(node);
+      if (apply_context->type == OptimizationType::Recompute) {
+        node_cluster_id_to_record_map[node_cluster_id]->actual_recompute_count += 1;
+        node_cluster_id_to_record_map[node_cluster_id]->request_recompute_count = apply_context->requested_count;
+      } else if (apply_context->type == OptimizationType::RecomputeWithCompromise) {
+        node_cluster_id_to_record_map[node_cluster_id]->actual_recompute_with_compromise_count += 1;
+        node_cluster_id_to_record_map[node_cluster_id]->request_recompute_with_compromise_count =
+            apply_context->requested_count;
+      } else {
+        ORT_THROW("Unsupported optimization type found.");
+      }
+    }
+  }
+}
+
+// Function declare to make it compile.
+void IterateNodeOptimizationPlan(const std::shared_ptr<NodeOptimizationPlanBase>& plan,
+                                 const InlinedHashMap<const Node*, InlinedVector<std::shared_ptr<NodeOptimizationPlanBase>>>&
+                                     node_to_optimization_plans_map,
+                                 const InlinedVector<std::shared_ptr<NodeOptimizationPlanBase>>&
+                                     current_combination,
+                                 const logging::Logger& logger,
+                                 InlinedVector<InlinedVector<std::shared_ptr<NodeOptimizationPlanBase>>>&
+                                     all_combinations);
+
+/*
+ * Iterate from a node, generate combinations for each optimization plan for it.
+ */
+void IterateNode(const Node* node,
+                 const InlinedHashMap<const Node*, InlinedVector<std::shared_ptr<NodeOptimizationPlanBase>>>&
+                     node_to_optimization_plans_map,
+                 const InlinedVector<std::shared_ptr<NodeOptimizationPlanBase>>&
+                     current_combination,
+                 const logging::Logger& logger,
+                 InlinedVector<InlinedVector<std::shared_ptr<NodeOptimizationPlanBase>>>&
+                     all_combinations) {
+  MO_LOG_DEBUG_INFO(logger, "Enter IterateNode: " + node->Name());
+  if (node_to_optimization_plans_map.find(node) == node_to_optimization_plans_map.end()) {
+    MO_LOG_DEBUG_INFO(logger, "Exit IterateNode since reused node don't have optimization plans: " + node->Name());
+    return;
+  }
+
+  for (const std::shared_ptr<NodeOptimizationPlanBase>& plan : node_to_optimization_plans_map.at(node)) {
+    if (std::find(current_combination.begin(), current_combination.end(), plan) !=
+        current_combination.end()) {
+      continue;
+    }
+    InlinedVector<std::shared_ptr<NodeOptimizationPlanBase>> new_combination = current_combination;
+    new_combination.push_back(plan);
+    IterateNodeOptimizationPlan(plan, node_to_optimization_plans_map, new_combination, logger, all_combinations);
+  }
+  MO_LOG_DEBUG_INFO(logger, "Exit IterateNode: " + node->Name());
+}
+
+void ListAllCombinations(const InlinedVector<InlinedVector<InlinedVector<std::shared_ptr<NodeOptimizationPlanBase>>>>&
+                             all_possible_node_optimization_plans,
+                         int index,
+                         const InlinedVector<std::shared_ptr<NodeOptimizationPlanBase>>& current_combination,
+                         const logging::Logger& logger,
+                         InlinedVector<InlinedVector<std::shared_ptr<NodeOptimizationPlanBase>>>&
+                             all_combinations) {
+  MO_LOG_DEBUG_INFO(logger, "Enter ListAllCombinations");
+  if (index == static_cast<int>(all_possible_node_optimization_plans.size())) {
+    if (std::find(all_combinations.begin(), all_combinations.end(), current_combination) ==
+        all_combinations.end()) {
+      all_combinations.push_back(current_combination);
+    }
+    MO_LOG_DEBUG_INFO(logger, "Exit ListAllCombinations after finding a new combination");
+    return;
+  }
+
+  for (const auto& plans : all_possible_node_optimization_plans[index]) {
+    for (const auto& plan : plans) {
+      InlinedVector<std::shared_ptr<NodeOptimizationPlanBase>> new_combination = current_combination;
+      new_combination.push_back(plan);
+      ListAllCombinations(all_possible_node_optimization_plans, index + 1, new_combination, logger, all_combinations);
+    }
+  }
+
+  MO_LOG_DEBUG_INFO(logger, "Exit ListAllCombinations");
+}
+
+/**
+ * Iterate from a node optimization plan, if there is any buffer reuse in its node outputs,
+ * iterate all possible reuse buffer plan combinations.
+ */
+void IterateNodeOptimizationPlan(const std::shared_ptr<NodeOptimizationPlanBase>& plan,
+                                 const InlinedHashMap<const Node*, InlinedVector<std::shared_ptr<NodeOptimizationPlanBase>>>&
+                                     node_to_optimization_plans_map,
+                                 const InlinedVector<std::shared_ptr<NodeOptimizationPlanBase>>&
+                                     current_combination,
+                                 const logging::Logger& logger,
+                                 InlinedVector<InlinedVector<std::shared_ptr<NodeOptimizationPlanBase>>>&
+                                     all_combinations) {
+  MO_LOG_DEBUG_INFO(logger, "Enter IterateNodeOptimizationPlan: " + plan->GetClusterId());
+
+  // No reuse buffer, don't need to iterate further, we found a plan combination already.
+  if (plan->reuse_buffers.size() == 0) {
+    MO_LOG_DEBUG_INFO(logger, "length of current_combination: " +
+                                  std::to_string(current_combination.size()) + ", " + plan->GetClusterId());
+    all_combinations.push_back(current_combination);
+    MO_LOG_DEBUG_INFO(logger, "Exit IterateNodeOptimizationPlan");
+    return;
+  }
+
+  InlinedVector<InlinedVector<InlinedVector<std::shared_ptr<NodeOptimizationPlanBase>>>>
+      all_possible_node_optimization_plans;
+  all_possible_node_optimization_plans.resize(plan->reuse_buffers.size());
+
+  size_t i = 0;
+  for (const auto& p : plan->reuse_buffers) {
+    MO_LOG_DEBUG_INFO(logger, ">>>reuse buffer: " + std::to_string(p.first));
+    IterateNode(p.second.first, node_to_optimization_plans_map, {}, logger, all_possible_node_optimization_plans[i]);
+    ++i;
+  }
+
+  ListAllCombinations(all_possible_node_optimization_plans, 0, current_combination, logger, all_combinations);
+
+  MO_LOG_DEBUG_INFO(logger, "Exit IterateNodeOptimizationPlan: " + plan->GetClusterId());
+}
+
+// Return a deterministic string for multiple plans combinations.
+std::string GetMultiplePlanClusterId(const InlinedVector<std::shared_ptr<NodeOptimizationPlanBase>>& plans) {
+  constexpr const int request_count = -1;  // -1 means apply optimization to all appearances.
+
+  std::ostringstream oss;
+  InlinedVector<std::string> sorted_plans;
+  for (const auto& plan : plans) {
+    sorted_plans.push_back(plan->GetClusterId() + ":" + std::to_string(static_cast<int>(plan->GetOptimizationType())) +
+                           ":" + std::to_string(request_count));
+  }
+
+  std::sort(sorted_plans.begin(), sorted_plans.end());
+
+  for (const auto& plan : sorted_plans) {
+    if (oss.str().size() > 0) {
+      oss << ",";
+    }
+    oss << plan;
+  }
+  return oss.str();
+}
+
+void GetMemorySavingSymbolicString(const MemoryOptimizationPlanner& memory_opt_planner,
+                                   const logging::Logger& logger,
+                                   std::map<std::string, std::pair<std::string, int>>&
+                                       combination_cluster_ids_to_saved_symbolic_byte_map) {
+  // Group by "ClusterId:OptimizationType:RequestCount".
+  InlinedVector<InlinedVector<std::shared_ptr<NodeOptimizationPlanBase>>> all_combinations;
+
+  combination_cluster_ids_to_saved_symbolic_byte_map.clear();
+  const auto& node_to_optimization_plan_map = memory_opt_planner.GetNodeToOptimizationPlanMap();
+  for (const auto& node_to_optimization_plan : node_to_optimization_plan_map) {
+    const auto& node = node_to_optimization_plan.first;
+    InlinedVector<std::shared_ptr<NodeOptimizationPlanBase>> current_combination;
+    MO_LOG_DEBUG_INFO(logger, ">>>Start looping node: " + node->Name());
+    IterateNode(node, node_to_optimization_plan_map, current_combination, logger, all_combinations);
+    MO_LOG_DEBUG_INFO(logger, "<<<End looping node: " + node->Name());
+  }
+
+  for (const auto& combination : all_combinations) {
+    std::string combination_cluster_id = GetMultiplePlanClusterId(combination);
+    std::string symbolic_byte_count = "";
+    for (const auto& plan : combination) {
+      if (symbolic_byte_count.size() > 0) {
+        symbolic_byte_count += " + ";
+      }
+      symbolic_byte_count += plan->GetMemorySavingSymbolicString();
+    }
+
+    if (symbolic_byte_count.size() > 0) {
+      symbolic_byte_count = "(" + symbolic_byte_count + ")";
+    }
+    auto& p = combination_cluster_ids_to_saved_symbolic_byte_map[combination_cluster_id];
+    const auto& original = p.first;
+    if (original.size() > 0) {
+      symbolic_byte_count = original + " + " + symbolic_byte_count;
+    }
+
+    MO_LOG_DEBUG_INFO(logger, "combination_cluster_id: " + combination_cluster_id +
+                                  ", symbolic_byte_count: " + symbolic_byte_count);
+
+    p.first = symbolic_byte_count;
+    p.second += 1;
+  }
+}
+
+namespace {
+
+template <typename T>
+std::string ToFixedLengthString(T value, int length) {
+  std::ostringstream oss;
+  oss << std::setw(length) << std::left;
+  oss << value;
+  return oss.str();
+}
+
+void FormatRecomputeMemoryRecords(int option_index,
+                                  const MemoryRecord& record,
+                                  bool compromise_recompute,
+                                  InlinedVector<std::string>& rows) {
+  const auto subgraph_str = compromise_recompute ? record.recompute_with_compromise_subgraph_str
+                                                 : record.recompute_subgraph_str;
+  const auto opt_type = compromise_recompute ? OptimizationType::RecomputeWithCompromise
+                                             : OptimizationType::Recompute;
+  const auto request_count = compromise_recompute ? record.request_recompute_with_compromise_count
+                                                  : record.request_recompute_count;
+  const auto actual_count = compromise_recompute ? record.actual_recompute_with_compromise_count
+                                                 : record.actual_recompute_count;
+
+  const std::string empty_first_col = "|" + ToFixedLengthString(std::string(), kFirstColumnWidth) + "|";
+
+  rows.push_back(empty_first_col);
+  rows.push_back(empty_first_col +
+                 ToFixedLengthString(">>Option " + std::to_string(option_index), kTitleWidthInSecondColumn) + ": " +
+                 OptimizationTypeToString(opt_type) + " subgraph " + subgraph_str);
+
+  if (request_count) {
+    // Only show this if user requested it.
+    rows.push_back(
+        empty_first_col +
+        ToFixedLengthString("  Status", kTitleWidthInSecondColumn) + ": " + "Enabled, requested count=" +
+        std::to_string(request_count) +
+        ", actual applied count=" + std::to_string(actual_count));
+  } else {
+    rows.push_back(empty_first_col + ToFixedLengthString("  Status", kTitleWidthInSecondColumn) +
+                   ": Disabled. Enable with export ORTMODULE_MEMORY_OPT_CONFIG=" +
+                   subgraph_str + ":" + std::to_string(static_cast<int>(opt_type)) + ":-1");
+  }
+
+  std::string activation_str = empty_first_col + "  Stashed Activations: ";
+  rows.push_back(activation_str);
+
+  const auto& reused_buffers = compromise_recompute ? record.output_port_reuse_recompute_with_compromise_count
+                                                    : record.output_port_reuse_recompute_count;
+  if (reused_buffers.size() > 0) {
+    std::string reused_buffers_summary = empty_first_col + ToFixedLengthString("   - ReuseFreq", kTitleWidthInSecondColumn) + ": ";
+    for (const auto& p : reused_buffers) {
+      reused_buffers_summary += " Output " + std::to_string(p.first) + "(" + std::to_string(p.second) + "),";
+    }
+
+    rows.push_back(reused_buffers_summary);
+  }
+
+  const auto activation_count = compromise_recompute ? record.compromise_recomputed_outputs.size()
+                                                     : record.recomputed_outputs.size();
+  for (size_t i = 0; i < activation_count; ++i) {
+    const MemoryRecord::OutputStat* stat;
+    if (compromise_recompute) {
+      stat = &record.compromise_recomputed_outputs[i];
+    } else {
+      stat = &record.recomputed_outputs[i];
+    }
+
+    rows.push_back(empty_first_col +
+                   ToFixedLengthString("   - Output " + std::to_string(stat->output_index), kTitleWidthInSecondColumn) +
+                   ": [" + stat->output_shape_str + "], byte/elem: " +
+                   std::to_string(stat->output_byte_count_per_element) +
+                   ", " + std::to_string(static_cast<int>(stat->saving_ratio * 100)) +
+                   "% saved");
+  }
+}
+}  // namespace
+
+std::string SerializeMemoryRecords(
+    const std::vector<std::pair<std::string, MemoryRecord>>& records_grouped_by_node_cluster_id,
+    std::string_view user_config) {
+  InlinedVector<std::string> rows;
+  rows.push_back(kTableBorder);
+  rows.push_back("|" + ToFixedLengthString("Freq", kFirstColumnWidth) +
+                 "| Memory Optimization Opportunities (Clustered by node-level activation patterns)");
+  rows.push_back(kTableRowSeparator);
+
+  for (const auto& p : records_grouped_by_node_cluster_id) {
+    const auto& record = p.second;
+    rows.push_back("|" + ToFixedLengthString(record.freq, kFirstColumnWidth) +
+                   "|For each row options are mutually exclusive, only one of them can be enabled.");
+
+    int option_index = 1;
+    if (record.recomputed_outputs.size() > 0) {
+      FormatRecomputeMemoryRecords(option_index, record, false, rows);
+      option_index++;
+    }
+
+    if (record.compromise_recomputed_outputs.size() > 0) {
+      FormatRecomputeMemoryRecords(option_index, record, true, rows);
+      option_index++;
+    }
+    rows.push_back(kTableRowSeparator);
+  }
+
+  rows.push_back(kTableBorder);
+
+  size_t max_length = 0;
+  for (auto& row : rows) {
+    max_length = std::max(max_length, row.length());
+  }
+
+  // Example is:
+  // static const std::string row_separator =
+  //     "|_ _ _ _|_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _|\n";
+  static const std::string kTableRowSeparatorStart = "|_ _ _ _|";
+  size_t second_row_length = max_length - kTableRowSeparatorStart.length();
+  if (second_row_length % 2 == 0) {
+    second_row_length += 2;
+    max_length += 2;
+  } else {
+    second_row_length += 3;  // add 3 to make it even
+    max_length += 3;
+  }
+  std::string row_separator_full(second_row_length, ' ');
+  for (size_t i = 0; i < row_separator_full.size() - 1; ++i) {
+    if (i % 2 == 0) {
+      row_separator_full[i] = '_';
+    }
+  }
+  row_separator_full[row_separator_full.size() - 1] = '|';
+  row_separator_full = kTableRowSeparatorStart + row_separator_full;
+
+  std::string table_border_full(max_length, '=');
+  std::ostringstream summary;
+  summary << std::endl;
+  summary << MakeString("MemoryInsight Summary - User config: ", (user_config.empty() ? "not provided" : user_config))
+          << std::endl;
+  for (auto& row : rows) {
+    if (row == kTableRowSeparator) {
+      summary << row_separator_full << std::endl;
+    } else if (row == kTableBorder) {
+      summary << table_border_full << std::endl;
+    } else {
+      std::string filled_up = std::string(max_length - row.length(), ' ');
+      filled_up[filled_up.length() - 1] = '|';
+      summary << row << filled_up << std::endl;
+    }
+  }
+  summary << "Note: use comma as a separator for enabling more than one subgraphs." << std::endl;
+  return summary.str();
+}
+
+std::string GetSerializedORTModuleMemoryStat(const GraphViewer& graph_viewer,
+                                             std::string_view memory_optimization_config,
+                                             std::string_view recompute_probe_level,
+                                             const logging::Logger& logger,
+                                             std::map<std::string, std::pair<std::string, int>>&
+                                                 cluster_id_combinations_to_saved_symbolic_byte_map,
+                                             const OrtValueNameIdxMap* ortvalue_name_to_idx_map,
+                                             const SequentialExecutionPlan* p_seq_exec_plan) {
+  ProbeLevel probe_level = ProbeLevel::Advanced;
+  if (!recompute_probe_level.empty()) {
+    int probe_level_int = ParseIntValueFromString(recompute_probe_level);
+    ORT_ENFORCE(probe_level_int < static_cast<int>(ProbeLevel::LevelMax) &&
+                    probe_level_int >= 0,
+                "Invalid probe level specified: ", recompute_probe_level);
+    probe_level = static_cast<ProbeLevel>(probe_level);
+  }
+
+  ptrdiff_t yield_op_order_in_topological_sort;
+  InlinedHashMap<const Node*, InlinedVector<size_t>> candidate_output_args_map;
+  InlinedHashMap<NodeIndex, ptrdiff_t> node_index_to_its_order_in_topological_sort_map;
+
+  // The first pass - find the candidate subgraphs.
+  MemoryOptimizationPlanner memory_opt_planner;
+  ORT_ENFORCE(FindORTModuleMemoryOpportunity(
+                  graph_viewer,
+                  probe_level,
+                  logger,
+                  node_index_to_its_order_in_topological_sort_map,
+                  yield_op_order_in_topological_sort,
+                  candidate_output_args_map,
+                  memory_opt_planner)
+                  .IsOK());
+
+  InlinedHashMap<std::string, UserConfig> cluster_id_to_config_map;
+  // Finalize the plan according to user config,
+  // then create a ClusterApplyContext for each unique cluster (having the same node pattern)
+
+  NodeToClusterApplyContextMap node_to_apply_context_map;
+
+  if (!memory_optimization_config.empty()) {
+    ORT_ENFORCE(ParseConfigFromString(memory_optimization_config, cluster_id_to_config_map)
+                    .IsOK());
+    InlinedHashMap<const Node*, std::shared_ptr<NodeOptimizationPlanBase>> node_to_opt_plan_map;
+    ORT_ENFORCE(memory_opt_planner.FinalizeNodePlansFromUserConfig(cluster_id_to_config_map,
+                                                                   node_to_opt_plan_map,
+                                                                   node_to_apply_context_map)
+                    .IsOK());
+  }
+
+  if (ortvalue_name_to_idx_map != nullptr && p_seq_exec_plan != nullptr) {
+    ORT_ENFORCE(memory_opt_planner.UpdateNodePlansFromExecutionPlan(graph_viewer,
+                                                                    *ortvalue_name_to_idx_map,
+                                                                    *p_seq_exec_plan)
+                    .IsOK());
+  }
+
+  std::vector<std::pair<std::string, MemoryRecord>> records;
+  GetMemoryRecordsGroupedByNodeClusterId(memory_opt_planner, node_to_apply_context_map, records);
+
+  GetMemorySavingSymbolicString(memory_opt_planner, logger, cluster_id_combinations_to_saved_symbolic_byte_map);
+
+  return SerializeMemoryRecords(records, memory_optimization_config);
+}
+
+}  // namespace onnxruntime::optimizer::memory_optimizer
diff --git a/orttraining/orttraining/core/optimizer/memory_optimizer/memory_insight.h b/orttraining/orttraining/core/optimizer/memory_optimizer/memory_insight.h
new file mode 100644
index 0000000000000..c4267efdbea51
--- /dev/null
+++ b/orttraining/orttraining/core/optimizer/memory_optimizer/memory_insight.h
@@ -0,0 +1,129 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <utility>
+
+#include "orttraining/core/optimizer/memory_optimizer/common.h"
+#include "orttraining/core/optimizer/memory_optimizer/optimization_planner.h"
+#include "orttraining/core/optimizer/memory_optimizer/recompute_analysis.h"
+
+namespace onnxruntime::optimizer::memory_optimizer {
+
+/**
+ * @brief A data structure to store memory optimization statistics for a specific node cluster id.
+ *
+ * We will collect statistics for each node cluster id.
+ * The node cluster id is generated from all possible optimization plans for a specific node, plus shape, data type,
+ * outputs, etc. For the nodes have the same node cluster id, they will have one single MemoryRecord, displayed
+ * as a row in the final memory optimization statistics table.
+ */
+class MemoryRecord {
+ public:
+  class OutputStat {
+   public:
+    OutputStat(size_t output_index, std::string_view output_shape, size_t output_byte_count_per_element,
+               float saving_ratio)
+        : output_index(output_index),
+          output_shape_str(output_shape),
+          output_byte_count_per_element(output_byte_count_per_element),
+          saving_ratio(saving_ratio) {}
+
+    // output index, shape, byte count per element, saving ratio
+    size_t output_index;
+    std::string output_shape_str;
+    size_t output_byte_count_per_element;
+    float saving_ratio;
+  };
+
+  // Recompute Column
+  std::string recompute_subgraph_str;
+  InlinedVector<OutputStat> recomputed_outputs;
+  int request_recompute_count = 0;
+  int actual_recompute_count = 0;
+  InlinedHashMap<size_t, int> output_port_reuse_recompute_count;
+
+  // RecomputeWithCompromise Column
+  std::string recompute_with_compromise_subgraph_str;
+  InlinedVector<OutputStat> compromise_recomputed_outputs;
+  int request_recompute_with_compromise_count = 0;
+  int actual_recompute_with_compromise_count = 0;
+  InlinedHashMap<size_t, int> output_port_reuse_recompute_with_compromise_count;
+
+  // Frequency Column
+  int freq = 0;
+};
+
+/**
+ * @brief Iterate the graph and find all possible memory optimization opportunities for related nodes.
+ *
+ * @param graph_viewer  The graph to iterate.
+ * @param probe_level The level to control allowed operations during recomputable subgraph detecting.
+ * @param logger Logger.
+ * @param node_index_to_its_order_in_topological_sort_map  The mapping of node index to its order in topological sort.
+ * @param yield_op_order_in_topological_sort The order of the boundary op in the topological sort.
+ * @param candidate_output_args_map  A map from node to its candidate activations, which are consumed by both fw and
+ * @param mem_opt_stats  A store to maintain all found optimization plans for related nodes.
+ * @return Status
+ */
+Status FindORTModuleMemoryOpportunity(const GraphViewer& graph_viewer,
+                                      const ProbeLevel probe_level,
+                                      const logging::Logger& logger,
+                                      InlinedHashMap<NodeIndex, ptrdiff_t>&
+                                          node_index_to_its_order_in_topological_sort_map,
+                                      ptrdiff_t& yield_op_order_in_topological_sort,
+                                      InlinedHashMap<const Node*, InlinedVector<size_t>>& candidate_output_args_map,
+                                      MemoryOptimizationPlanner& mem_opt_stats);
+
+/**
+ * @brief From the optimization plans, generate the memory optimization statistics table containing many MemoryRecords,
+ * each represents one node cluster id.
+ *
+ * @param memory_opt_planner The optimization planner to get optimization plans.
+ * @param node_to_apply_contexts_map The optimization applying information.
+ * @param generated_records Returns the generated memory optimization statistics table.
+ * (for example, how many are actually applied) to each MemoryRecord.
+ */
+void GetMemoryRecordsGroupedByNodeClusterId(const MemoryOptimizationPlanner& memory_opt_planner,
+                                            const NodeToClusterApplyContextMap&
+                                                node_to_apply_contexts_map,
+                                            std::vector<std::pair<std::string, MemoryRecord>>& generated_records);
+
+/**
+ * @brief Serialize the memory optimization statistics table to a string.
+ *
+ * @param records_grouped_by_node_cluster_id The memory optimization statistics table.
+ * @param user_config The user configuration to the serialized string.
+ * @return std::string
+ */
+std::string SerializeMemoryRecords(const std::vector<std::pair<std::string, MemoryRecord>>&
+                                       records_grouped_by_node_cluster_id,
+                                   std::string_view user_config);
+
+/**
+ * @brief A public API exposed to retrieve the memory optimization statistics table, given a graph.
+ *
+ * If possible, session's allocation plans and execution plan will also be available to help the analysis.
+ *
+ * @param graph_viewer The graph to analyze.
+ * @param memory_optimization_config The user configuration to control the memory optimization.
+ * @param recompute_probe_level The level to control allowed operations during recomputable subgraph detecting.
+ * @param logger Logger.
+ * @param ortvalue_name_to_idx_map Optional. If provided, we will use it to map ort value name to index.
+ * @param p_seq_exec_plan Optional. If provided, we will use it to get allocation plans.
+ * @return std::string
+ */
+std::string GetSerializedORTModuleMemoryStat(const GraphViewer& graph_viewer,
+                                             std::string_view memory_optimization_config,
+                                             std::string_view recompute_probe_level,
+                                             const logging::Logger& logger,
+                                             // used as Python binding, so used std::map instead of InlinedHashMap
+                                             std::map<std::string, std::pair<std::string, int>>&
+                                                 cluster_id_combinations_to_saved_symbolic_byte_map,
+                                             const OrtValueNameIdxMap* ortvalue_name_to_idx_map = nullptr,
+                                             const SequentialExecutionPlan* p_seq_exec_plan = nullptr);
+
+}  // namespace onnxruntime::optimizer::memory_optimizer
diff --git a/orttraining/orttraining/core/optimizer/memory_optimizer/optimization_planner.cc b/orttraining/orttraining/core/optimizer/memory_optimizer/optimization_planner.cc
new file mode 100644
index 0000000000000..7e042031f66a2
--- /dev/null
+++ b/orttraining/orttraining/core/optimizer/memory_optimizer/optimization_planner.cc
@@ -0,0 +1,140 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "core/graph/graph_utils.h"
+#include "core/optimizer/utils.h"
+#include "core/framework/ort_value_name_idx_map.h"
+#include "core/framework/sequential_execution_plan.h"
+
+#include "orttraining/core/optimizer/memory_optimizer/common.h"
+#include "orttraining/core/optimizer/memory_optimizer/optimization_planner.h"
+
+namespace onnxruntime::optimizer::memory_optimizer {
+
+std::string NodeOptimizationPlanBase::GetMemorySavingSymbolicString() const {
+  std::string saving_str;
+  for (auto output_index : activation_output_indices_) {
+    // If the output is reusing other node's buffer, then no memory saving.
+    if (reuse_buffers.find(output_index) != reuse_buffers.end()) {
+      continue;
+    }
+
+    const auto& output_def = node->OutputDefs()[output_index];
+    MLDataType ml_data_type = DataTypeImpl::TypeFromProto(*output_def->TypeAsProto());
+    ORT_ENFORCE(ml_data_type->IsTensorType(), "ml_type must be a tensor type, but it is ",
+                DataTypeImpl::ToString(ml_data_type));
+    const TensorTypeBase* tensor_type_base = ml_data_type->AsTensorType();
+    ORT_ENFORCE(nullptr != tensor_type_base);
+    MLDataType elt_type = tensor_type_base->GetElementType();
+    const auto byte_count_per_element = elt_type->Size();
+    if (!saving_str.empty()) {
+      saving_str += " + ";
+    }
+    saving_str = "(" + GetTensorElemCountInSymbolicString(node, output_index) + " * " +
+                 std::to_string(byte_count_per_element) + " * " +
+                 std::to_string(GetSaveRatio()) + ")";
+  }
+  if (saving_str.empty()) {
+    return saving_str;
+  }
+  return "(" + saving_str + ")";
+}
+
+Status MemoryOptimizationPlanner::UpdateNodePlansFromExecutionPlan(const GraphViewer& graph_viewer,
+                                                                   const OrtValueNameIdxMap& ortvalue_name_to_idx_map,
+                                                                   const SequentialExecutionPlan& p_seq_exec_plan) {
+  InlinedHashMap<int, std::string> idx_to_ortvalue_name_map;
+  for (const auto& entry : ortvalue_name_to_idx_map) {
+    idx_to_ortvalue_name_map[entry.second] = entry.first;
+  }
+
+  for (const auto& node_to_optimization_plan : node_to_optimization_plans_map) {
+    const auto& node_plans = node_to_optimization_plan.second;
+
+    for (auto& node_plan : node_plans) {
+      const std::string cluster_id = node_plan->GetClusterId();
+      const Node* node = node_plan->node;
+      for (auto& output_index : node_plan->GetActivationOutputIndices()) {
+        const NodeArg* node_arg = node->OutputDefs()[output_index];
+        const auto& ort_value_name = node_arg->Name();
+        int ort_value_idx;
+        ORT_ENFORCE(ortvalue_name_to_idx_map.GetIdx(ort_value_name, ort_value_idx).IsOK());
+        const auto& alloc_plan = p_seq_exec_plan.allocation_plan;
+        ORT_ENFORCE(ort_value_idx >= 0 && static_cast<size_t>(ort_value_idx) < alloc_plan.size());
+        const auto& per_alloc_plan = alloc_plan[ort_value_idx];
+        if (per_alloc_plan.alloc_kind != AllocKind::kReuse) {
+          continue;
+        }
+        int reused_ort_value_idx = per_alloc_plan.reused_buffer;
+        const auto& reused_ort_value_name = idx_to_ortvalue_name_map.at(reused_ort_value_idx);
+
+        const Node* p_node = graph_viewer.GetProducerNode(reused_ort_value_name);
+        if (p_node == nullptr) {
+          // This is a graph input.
+          continue;
+        }
+
+        int src_op_output_index = optimizer_utils::IndexOfNodeOutput(*p_node, *node_arg);
+        node_plan->reuse_buffers[output_index] = std::make_pair(p_node, src_op_output_index);
+      }
+    }
+  }
+
+  return Status::OK();
+}
+
+Status MemoryOptimizationPlanner::FinalizeNodePlansFromUserConfig(
+    const InlinedHashMap<std::string, UserConfig>& cluster_id_to_user_configs,
+    InlinedHashMap<const Node*, std::shared_ptr<NodeOptimizationPlanBase>>& node_to_opt_plan_map,
+    NodeToClusterApplyContextMap& node_to_apply_context_map) const {
+  if (cluster_id_to_user_configs.size() == 0) {
+    return Status::OK();
+  }
+
+  // Create a temporary map to store the apply context for each cluster pattern.
+  InlinedHashMap<std::string, std::shared_ptr<ClusterApplyContext>> cluster_id_to_apply_contexts_map;
+
+  // We loop all nodes' optimization plans and find the match in user configs.
+  // If found in user configs, we finalize the plan and create/update the apply context for this node.
+  // If not found in user configs, we will not include the node in the returned result.
+  for (const auto& node_to_optimization_plan : node_to_optimization_plans_map) {
+    const auto& node = node_to_optimization_plan.first;
+    const auto& node_plans = node_to_optimization_plan.second;
+
+    for (auto& node_plan : node_plans) {
+      const std::string cluster_id = node_plan->GetClusterId();
+      if (cluster_id_to_user_configs.find(cluster_id) == cluster_id_to_user_configs.end()) {
+        continue;
+      }
+
+      const auto& user_config = cluster_id_to_user_configs.at(cluster_id);
+      if (node_plan->GetOptimizationType() == user_config.type) {
+        // First finalize the plan for this node.
+        node_to_opt_plan_map[node] = node_plan;
+
+        // Create/Update the apply context for this node.
+        if (cluster_id_to_apply_contexts_map.find(cluster_id) == cluster_id_to_apply_contexts_map.end()) {
+          std::shared_ptr<ClusterApplyContext> apply_context = std::make_shared<ClusterApplyContext>();
+          apply_context->requested_count = user_config.requested_count;
+          apply_context->type = user_config.type;
+          apply_context->total_frequency++;
+          cluster_id_to_apply_contexts_map.insert({cluster_id, apply_context});
+        }
+
+        node_to_apply_context_map[node] = cluster_id_to_apply_contexts_map.at(cluster_id);
+
+        // If different plans for the same node have same cluster id, we only need to finalize the first one.
+        // The rest of them will be ignored.
+        break;
+      }
+    }
+  }
+
+  return Status::OK();
+}
+
+}  // namespace onnxruntime::optimizer::memory_optimizer
diff --git a/orttraining/orttraining/core/optimizer/memory_optimizer/optimization_planner.h b/orttraining/orttraining/core/optimizer/memory_optimizer/optimization_planner.h
new file mode 100644
index 0000000000000..0e5e2967ec15a
--- /dev/null
+++ b/orttraining/orttraining/core/optimizer/memory_optimizer/optimization_planner.h
@@ -0,0 +1,133 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "orttraining/core/optimizer/memory_optimizer/common.h"
+#include "core/framework/ort_value_name_idx_map.h"
+#include "core/framework/sequential_execution_plan.h"
+
+namespace onnxruntime::optimizer::memory_optimizer {
+
+/**
+ * @brief Struct to store properties of a specific subgraph.
+ */
+class ClusterApplyContext {
+ public:
+  ClusterApplyContext() = default;
+
+  OptimizationType type;
+  int requested_count{0};
+  int total_frequency{0};  // The occurrence of this subgraph pattern in the graph.
+
+  int applied_count{0};  // The number of times this subgraph pattern has been really applied in this transformer.
+  int skip_count{0};     // The number of times this subgraph instance has been skipped in reversed topological order.
+};
+
+/**
+ * @brief Base class for a concrete optimization plan.
+ *
+ */
+class NodeOptimizationPlanBase {
+ public:
+  NodeOptimizationPlanBase(const Node* node,
+                           gsl::span<const size_t> activation_output_indices,
+                           float save_ratio)
+      : node(node),
+        activation_output_indices_(activation_output_indices.begin(), activation_output_indices.end()),
+        save_ratio_(save_ratio) {
+  }
+
+  virtual ~NodeOptimizationPlanBase() = default;
+
+  virtual OptimizationType GetOptimizationType() const = 0;
+
+  /**
+   * Get the cluster id for this optimization plan.
+   * This cluster id is used to enable the optimization as a unique identity, for example, for recompute it is a
+   * subgraph string representation.
+   * @return std::string
+   */
+  virtual std::string GetClusterId() const = 0;
+
+  /**
+   * Get a string used to generate node cluster id for this optimization plan.
+   * Node cluster id is on Node level, each node can have multiple optimization plans, each plan generates its
+   * normalization string. Once combined we get Node cluster id. This id is used to categorize nodes into different
+   * groups, showing them as one row in memory optimization opportunity table.
+   * @return std::string
+   */
+  virtual std::string NormalizeForNodeClusterId() const = 0;
+
+  /**
+   * Return all output indices that are used as activation buffers.
+   */
+  gsl::span<const size_t> GetActivationOutputIndices() const { return activation_output_indices_; }
+
+  /**
+   * Return the saving ratio for this optimization plan.
+   */
+  float GetSaveRatio() const { return save_ratio_; }
+
+  /**
+   * Get a symbolic string to represent the memory saving for this optimization plan.
+   */
+  std::string GetMemorySavingSymbolicString() const;
+
+  const Node* node;
+  // A map: output index reusing other node's output (other_node, output index)
+  InlinedHashMap<size_t, NodeOutputPort> reuse_buffers;
+
+ private:
+  InlinedVector<size_t> activation_output_indices_;
+  float save_ratio_ = 1.0f;
+};
+
+using NodeToClusterApplyContextMap = InlinedHashMap<const Node*, std::shared_ptr<ClusterApplyContext>>;
+
+class MemoryOptimizationPlanner {
+ public:
+  void AddNodeOptimizationPlan(const Node* node,
+                               std::shared_ptr<NodeOptimizationPlanBase> plan) {
+    if (node_to_optimization_plans_map.find(node) == node_to_optimization_plans_map.end()) {
+      node_to_optimization_plans_map.insert({node, {}});
+    }
+
+    node_to_optimization_plans_map[node].emplace_back(plan);
+  }
+
+  Status UpdateNodePlansFromExecutionPlan(const GraphViewer& graph_viewer,
+                                          const OrtValueNameIdxMap& ortvalue_name_to_idx_map,
+                                          const SequentialExecutionPlan& p_seq_exec_plan);
+
+  Status FinalizeNodePlansFromUserConfig(
+      const InlinedHashMap<std::string, UserConfig>& cluster_id_to_user_configs,
+      InlinedHashMap<const Node*, std::shared_ptr<NodeOptimizationPlanBase>>& node_to_opt_plan_map,
+      NodeToClusterApplyContextMap& node_to_apply_context_map) const;
+
+  std::string GenerateNodeClusterId(const Node* node) const {
+    ORT_ENFORCE(node_to_optimization_plans_map.find(node) != node_to_optimization_plans_map.end(),
+                "Node not found in node_to_optimization_plans_map.");
+    std::ostringstream oss;
+    const auto& node_plans = node_to_optimization_plans_map.at(node);
+    for (auto& plan : node_plans) {
+      oss << plan->NormalizeForNodeClusterId();
+    }
+
+    return oss.str();
+  }
+
+  const InlinedHashMap<const Node*,
+                       InlinedVector<std::shared_ptr<NodeOptimizationPlanBase>>>&
+  GetNodeToOptimizationPlanMap() const {
+    return node_to_optimization_plans_map;
+  }
+
+ private:
+  InlinedHashMap<const Node*, InlinedVector<std::shared_ptr<NodeOptimizationPlanBase>>> node_to_optimization_plans_map;
+};
+
+}  // namespace onnxruntime::optimizer::memory_optimizer
diff --git a/orttraining/orttraining/core/optimizer/memory_optimizer/recompute_analysis.cc b/orttraining/orttraining/core/optimizer/memory_optimizer/recompute_analysis.cc
new file mode 100644
index 0000000000000..0782cbdae2eec
--- /dev/null
+++ b/orttraining/orttraining/core/optimizer/memory_optimizer/recompute_analysis.cc
@@ -0,0 +1,405 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include <algorithm>
+#include <deque>
+#include <memory>
+#include <set>
+#include <string>
+#include <utility>
+
+#include "orttraining/core/optimizer/memory_optimizer/common.h"
+#include "orttraining/core/optimizer/memory_optimizer/recompute_analysis.h"
+#include "core/framework/data_types.h"
+
+namespace onnxruntime::optimizer::memory_optimizer {
+
+namespace {
+
+constexpr int32_t MAXIMUM_RECOMPUTE_NODE_COUNT = 15;
+
+static size_t GetElementSize(const ONNX_NAMESPACE::DataType& tensor_type) {
+  const ONNX_NAMESPACE::TypeProto& type_proto = ONNX_NAMESPACE::Utils::DataTypeUtils::ToTypeProto(tensor_type);
+  MLDataType ml_data_type = DataTypeImpl::TypeFromProto(type_proto);
+  const TensorTypeBase* tensor_type_base = ml_data_type->AsTensorType();
+  ORT_ENFORCE(nullptr != tensor_type_base);
+  MLDataType elt_type = tensor_type_base->GetElementType();
+  return elt_type->Size();
+}
+
+// TODO(pengwa): extent this function to be more general.
+float InputOutputSizeRatio(const Node* node) {
+  if (node->OpType().compare("Cast") == 0) {
+    const NodeArg* input = node->InputDefs()[0];
+    const NodeArg* output = node->OutputDefs()[0];
+    if (input->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType_STRING ||
+        output->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType_STRING) {
+      return 1.0f;
+    }
+    const auto& ptype1 = input->Type();
+    const auto& ptype2 = output->Type();
+    float ratio = static_cast<float>(GetElementSize(ptype1)) / static_cast<float>(GetElementSize(ptype2));
+    return ratio;
+  }
+
+  return 1.0f;
+}
+
+/**
+ * @brief Used to define per-op recompute config.
+ *
+ */
+struct AllowedRecomputeNodeConfig {
+  InlinedVector<int> input_arg_indices;  // input index to iterate further (bottom up)
+};
+
+// The op types that are supported predefined.
+
+const InlinedHashMap<std::string, AllowedRecomputeNodeConfig>& GetAllowedRecomputeOps(int probe_op_level) {
+  static InlinedHashMap<int, InlinedHashMap<std::string, AllowedRecomputeNodeConfig>> recomputable_op_table_map;
+  if (recomputable_op_table_map.find(probe_op_level) != recomputable_op_table_map.end()) {
+    return recomputable_op_table_map.at(probe_op_level);
+  }
+
+  recomputable_op_table_map.insert({probe_op_level, InlinedHashMap<std::string, AllowedRecomputeNodeConfig>()});
+  auto& recomputable_op_table = recomputable_op_table_map.at(probe_op_level);
+  if (probe_op_level >= static_cast<int>(ProbeLevel::Basic)) {
+    recomputable_op_table.insert({
+        // Binary elementwise
+        {"Add", AllowedRecomputeNodeConfig{{0, 1}}},
+        {"BiasGelu", AllowedRecomputeNodeConfig{{0, 1}}},
+        {"Div", AllowedRecomputeNodeConfig{{0, 1}}},
+        {"Mul", AllowedRecomputeNodeConfig{{0, 1}}},
+        {"Sub", AllowedRecomputeNodeConfig{{0, 1}}},
+
+        // Data layout
+        /// The shape input is trivial whether it exists or not in backward.
+        {"Reshape", AllowedRecomputeNodeConfig{{0}}},
+        {"Squeeze", AllowedRecomputeNodeConfig{{0}}},
+        {"Unsqueeze", AllowedRecomputeNodeConfig{{0}}},
+
+        // Unary elementwise
+        /// The ratio and mode input are trivial whether they exist or not in backward
+        {"BitmaskDropout", AllowedRecomputeNodeConfig{{0}}},
+        /// The axis input is trivial whether it exists or not in backward
+        {"CumSum", AllowedRecomputeNodeConfig{{0}}},
+        {"Dropout", AllowedRecomputeNodeConfig{{0}}},
+        {"Gelu", AllowedRecomputeNodeConfig{{0}}},
+        {"FastGelu", AllowedRecomputeNodeConfig{{0}}},
+
+        // Ternary elementwise
+        {"Where", AllowedRecomputeNodeConfig{{0, 1, 2}}},
+
+        // Data copy
+        {"Tile", AllowedRecomputeNodeConfig{{0}}},
+        {"Cast", AllowedRecomputeNodeConfig{{0}}},
+    });
+  }
+
+  if (probe_op_level >= static_cast<int>(ProbeLevel::Advanced)) {
+    recomputable_op_table.insert({
+        {"MatMul", AllowedRecomputeNodeConfig{{0, 1}}},
+        {"FusedMatMul", AllowedRecomputeNodeConfig{{0, 1}}},
+        {"Softmax", AllowedRecomputeNodeConfig{{0}}},
+        {"BiasSoftmax", AllowedRecomputeNodeConfig{{0, 1}}},
+        {"BiasSoftmaxDropout", AllowedRecomputeNodeConfig{{0, 1}}},
+    });
+  }
+
+  return recomputable_op_table;
+}
+
+/**
+ * @brief Check whether a node is a recomputable node at given probe level.
+ */
+bool IsRecomputable(const Node& node, ProbeLevel probe_level) {
+  const auto& op_table = GetAllowedRecomputeOps(static_cast<int>(probe_level));
+  return op_table.find(node.OpType()) != op_table.end();
+}
+
+/**
+ * @brief Find recomputable subgraphs (has at least one nodes, at most MAXIMUM_RECOMPUTE_NODE_COUNT nodes).
+ *
+ * @param node The entry node to start the subgraph matching (bottom-up), usually the last node of found subgraphs.
+ * @param node_output_index_candidates Candidate output indices of "node", which are consumed by both fw and bw ops.
+ * @param fw_op_output_arg_used_map The activation usage (in fw and bw) mapping.
+ * @param node_index_to_its_order_in_topological_sort_map The mapping of node index to its order in topological sort.
+ *   Used to re-order the collected subgraph nodes.
+ * @param nodes_in_topological_order Collected vector of nodes of found subgraph, in the order of the topological
+ *  sorted.
+ * @param logger Logger.
+ * @param compromise_stashed_activation Whether to compromise stashed activation, e.g. if we cannot find a
+ * recomputable subgraph to save a stashed activation, we can compromise to find a recomputable subgraph to reduce the
+ * size of stashed activation.
+ * @param can_compromise_stashed_activation A bool return value, to indicate there is opportunaties for finding a
+ * compromised subgraph.
+ * @param save_ratio The ratio of memory saving if we can find a recomputable subgraph.
+ * @return Status
+ */
+Status SelectRecomputeSubgraph(const Node& entry_node,
+                               const ProbeLevel probe_level,
+                               const InlinedVector<size_t>& node_output_index_candidates,
+                               const ActivationUsedMap& fw_op_output_arg_used_map,
+                               const InlinedHashMap<NodeIndex, ptrdiff_t>&
+                                   node_index_to_its_order_in_topological_sort_map,
+                               const logging::Logger& logger,
+                               InlinedVector<const Node*>& nodes,
+                               bool compromise_stashed_activation,
+                               bool& can_compromise_stashed_activation,
+                               float& save_ratio) {
+  const auto& recomputable_op_table = GetAllowedRecomputeOps(static_cast<int>(probe_level));
+
+  can_compromise_stashed_activation = false;
+
+  LOGS(logger, VERBOSE) << "Enter SelectRecomputeSubgraph for Node " << entry_node.Name() << "("
+                        << entry_node.OpType() << ")";
+  nodes.clear();
+
+  std::deque<NodeOutputPort> q;
+  for (auto output_index : node_output_index_candidates) {
+    q.push_back(NodeOutputPort(&entry_node, output_index));
+  }
+
+  bool early_stop = false;
+  std::set<NodeOutputPort> visited_output_arg_set;
+  std::set<const Node*> visited_node_set;
+
+  // For the initial activations in queue, they are stashed ones, so we do differently when scanning the queue for them.
+  bool is_first_queue_scan = true;
+  while (nodes.size() < MAXIMUM_RECOMPUTE_NODE_COUNT && !q.empty() && !early_stop) {
+    // Loop all candidate NodeOutputPort, and find the next layer of input nodes.
+    size_t current_queue_size = q.size();
+    for (size_t i = 0; i < current_queue_size; ++i) {
+      NodeOutputPort p = q.front();
+      q.pop_front();
+      const Node* curr_node = p.first;
+
+      // Skip if the node output is already visited.
+      if (std::find(visited_output_arg_set.begin(), visited_output_arg_set.end(), p) !=
+          visited_output_arg_set.end()) {
+        continue;
+      }
+
+      visited_output_arg_set.insert({p});
+
+      // If the node is already visited by from its other output index, skip it.
+      if (visited_node_set.find(curr_node) != visited_node_set.end()) {
+        continue;
+      }
+
+      visited_node_set.insert(curr_node);
+
+      // Bottom-up search rules.
+      // If current op is entry output node (that generates stashed activations):
+      //   1. If the op is not in recomputable_op_table, skip it.
+      // Otherwise:
+      //  If current op is in allowed list, check its input args, and append the producers' NodeOutputPorts to next_q.
+      //  If current op is NOT in allowed list:
+      //    1). the output does not exist in backward, we cannot find a good solution for so, the search terminates.
+      //    2). the output is used in backward, we don't need to trace back further, so continue searching.
+      auto op_recompute_config_it = recomputable_op_table.find(curr_node->OpType());
+      auto cur_output_arg_name = curr_node->OutputDefs()[p.second]->Name();
+      if (is_first_queue_scan) {
+        // We handle the entry node outputs differently because, we don't want this case falls into and succeed one of
+        // the checks in the other branch
+        // 1. "op is not in recompute op list, but its output is used in backward"
+        // 2. "op is in recompute op list, but its output is used in backward"
+        // (either of the above checks is true for entry node outputs)
+        if (op_recompute_config_it == recomputable_op_table.end()) {
+          early_stop = true;
+          LOGS(logger, VERBOSE) << "Entry Node " << curr_node->Name() << "(" << curr_node->OpType() << ") is **NOT** "
+                                << "in recompute op list, search terminates.";
+          break;
+        }
+      } else {
+        if (op_recompute_config_it == recomputable_op_table.end()) {
+          if (fw_op_output_arg_used_map.at(cur_output_arg_name).second) {
+            LOGS(logger, VERBOSE) << "Node " << curr_node->Name() << "(" << curr_node->OpType() << ") is **NOT** in "
+                                  << "recompute op list, but its output [" << cur_output_arg_name << "] is used in "
+                                  << "backward, we don't need trace bottom-up further. Entry node: "
+                                  << entry_node.Name() << "(" << entry_node.OpType() << ")";
+            continue;
+          } else {
+            early_stop = true;
+            LOGS(logger, VERBOSE) << "Node " << curr_node->Name() << "(" << curr_node->OpType() << ") is **NOT** in "
+                                  << "recompute op list, and its output [" << cur_output_arg_name
+                                  << "] does not exist in backward, search terminates. Entry node: "
+                                  << entry_node.Name() << "(" << entry_node.OpType() << ")";
+            break;
+          }
+        }
+
+        if (fw_op_output_arg_used_map.at(cur_output_arg_name).second) {
+          LOGS(logger, VERBOSE) << "Node " << curr_node->Name() << "(" << curr_node->OpType() << ") "
+                                << "is in recompute op list, while its output [" << cur_output_arg_name
+                                << "] is used in backward, we don't need trace bottom-up further. Entry node: "
+                                << entry_node.Name() << "(" << entry_node.OpType() << ")";
+          continue;
+        }
+      }
+
+      // Append node to the selected graph.
+      if (std::find(nodes.begin(), nodes.end(), curr_node) == nodes.end()) {
+        nodes.push_back(curr_node);
+        LOGS(logger, VERBOSE) << "Node " << curr_node->Name() << "(" << curr_node->OpType()
+                              << ") is added in selected subgraph  ";
+      }
+
+      // This check is not matured now, subject to change.
+      float ratio = InputOutputSizeRatio(curr_node);
+      float saving_ratio = 1.0f - ratio;
+      float is_current_node_compromisable = (ratio < 1.f);
+      can_compromise_stashed_activation = can_compromise_stashed_activation || is_current_node_compromisable;
+      if (is_current_node_compromisable) {
+        LOGS(logger, VERBOSE) << "Node " << curr_node->Name() << "(" << curr_node->OpType()
+                              << ") has input/output size " << ratio << " < 1.f, can compromise stashed activation";
+      }
+
+      if (is_current_node_compromisable && compromise_stashed_activation) {
+        LOGS(logger, VERBOSE) << "Node " << curr_node->Name() << "(" << curr_node->OpType() << ") is in "
+                              << "recompute op list, and its output [" << cur_output_arg_name
+                              << "] does not exist in backward, while it meets compromised check, we don't need trace "
+                              << "bottom-up further.";
+        save_ratio = saving_ratio;
+        continue;
+      }
+
+      // Iterate all input nodes according to allowed input arg index of the entry node.
+      const auto& input_arg_indices = op_recompute_config_it->second.input_arg_indices;
+      for (auto it = curr_node->InputEdgesBegin(), end = curr_node->InputEdgesEnd(); it != end; ++it) {
+        const Node::EdgeEnd& input_edge = *it;
+        const auto& parent_node = input_edge.GetNode();
+        const auto parent_node_output_index = input_edge.GetSrcArgIndex();
+        const auto current_node_input_index = input_edge.GetDstArgIndex();
+        if (std::find(input_arg_indices.begin(), input_arg_indices.end(), current_node_input_index) !=
+            input_arg_indices.end()) {
+          NodeOutputPort next_p = std::make_pair(&parent_node, parent_node_output_index);
+
+          LOGS(logger, VERBOSE) << "Node " << parent_node.Name() << "(" << parent_node.OpType() << ")'s "
+                                << parent_node_output_index
+                                << "th output [" << parent_node.OutputDefs()[parent_node_output_index]->Name()
+                                << "] is added in recompute search list  ";
+
+          q.push_back(next_p);
+        }
+      }
+    }
+    // After handling all entry node outputs, we set the flag to false.
+    is_first_queue_scan = false;
+  }
+
+  // If input args are not found in bw, but op count exceed MAXIMUM_RECOMPUTE_NODE_COUNT, skip recompute.
+  if (!q.empty() || early_stop) {
+    LOGS(logger, VERBOSE) << "Fail to find a solution for recompute: current node count is " << nodes.size()
+                          << ", queue size: " << q.size() << ", early stop: " << early_stop;
+    nodes.clear();
+  } else {
+    // Re-order the nodes in topological order.
+    std::sort(nodes.begin(), nodes.end(),
+              [&node_index_to_its_order_in_topological_sort_map](const Node*& lhs, const Node*& rhs) {
+                return node_index_to_its_order_in_topological_sort_map.at(lhs->Index()) <
+                       node_index_to_its_order_in_topological_sort_map.at(rhs->Index());
+              });
+  }
+  return Status::OK();
+}
+
+/**
+ * @brief Convert the recompute subgraph to its string representation.
+ *
+ * @param nodes_in_topological_order The subgraph nodes in topological order.
+ * @param subgraph_string_representation Returns subgraph string representation.
+ * @param log_info Returns log info for users.
+ */
+void NodesInTopoOrderToString(gsl::span<const Node* const> nodes_in_topological_order,
+                              std::string& subgraph_string_representation,
+                              std::string& log_info) {
+  std::ostringstream oss;
+  std::ostringstream subgraph_string_representation_oss;
+  size_t node_count = nodes_in_topological_order.size();
+  for (size_t i = 0; i < node_count; ++i) {
+    if (i < node_count - 1) {  // Ignore the last node.
+      oss << "(name:" << nodes_in_topological_order[i]->Name() << ", type:" << nodes_in_topological_order[i]->OpType()
+          << "),";
+    }
+
+    subgraph_string_representation_oss << nodes_in_topological_order[i]->OpType() << "+";
+  }
+
+  subgraph_string_representation = subgraph_string_representation_oss.str();
+  log_info = oss.str();
+  if (log_info.size() > 0) {
+    log_info = " with its precedent nodes: " + log_info;
+  }
+}
+
+}  // namespace
+
+std::unique_ptr<NodeRecomputePlan> CheckNodeForRecompute(const Node& node,
+                                                         const ProbeLevel probe_level,
+                                                         const ActivationUsedMap& fw_op_output_arg_used_map,
+                                                         const InlinedHashMap<NodeIndex, ptrdiff_t>&
+                                                             node_index_to_its_order_in_topological_sort_map,
+                                                         const InlinedHashMap<const Node*, InlinedVector<size_t>>&
+                                                             candidate_output_args_map,
+                                                         const logging::Logger& logger,
+                                                         bool compromise_stashed_activation,
+                                                         bool& can_compromise_stashed_activation) {
+  if (!IsRecomputable(node, probe_level)) {
+    return nullptr;
+  }
+
+  InlinedVector<const Node*> nodes_in_topological_order;
+  float save_ratio = 1.f;
+  ORT_ENFORCE(SelectRecomputeSubgraph(node,
+                                      probe_level,
+                                      candidate_output_args_map.at(&node),
+                                      fw_op_output_arg_used_map,
+                                      node_index_to_its_order_in_topological_sort_map,
+                                      logger,
+                                      nodes_in_topological_order,
+                                      compromise_stashed_activation,
+                                      can_compromise_stashed_activation,
+                                      save_ratio)
+                  .IsOK());
+  if (nodes_in_topological_order.size() == 0) {
+    return nullptr;
+  }
+
+  std::string subgraph_str_representation, log_info;
+  NodesInTopoOrderToString(nodes_in_topological_order, subgraph_str_representation, log_info);
+
+  LOGS(logger, VERBOSE) << "Node " << node.Name() << "(" << node.OpType() << ") can be recomputed" << log_info;
+
+  return std::make_unique<NodeRecomputePlan>(&node, candidate_output_args_map.at(&node),
+                                             nodes_in_topological_order,
+                                             compromise_stashed_activation,
+                                             save_ratio);
+}
+
+std::string NodeRecomputePlan::GetClusterId() const {
+  std::ostringstream oss;
+  oss << GetNodesInTopoOrderStr();
+  return oss.str();
+}
+
+std::string NodeRecomputePlan::NormalizeForNodeClusterId() const {
+  std::ostringstream oss;
+  oss << "recompute:" << node->OpType() << "-"
+      << compromise_recompute_ << "-";
+  for (auto& output_index : GetActivationOutputIndices()) {
+    oss << output_index << ":" << GetTensorElemCountInSymbolicString(node, output_index);
+    oss << ":" << node->OutputDefs()[output_index]->TypeAsProto()->tensor_type().elem_type() << "-";
+  }
+
+  oss << GetNodesInTopoOrderStr();
+  return oss.str();
+}
+
+std::string NodeRecomputePlan::GetNodesInTopoOrderStr() const {
+  std::string subgraph_str_representation, log_info;
+  NodesInTopoOrderToString(nodes_in_topological_order_, subgraph_str_representation, log_info);
+  return subgraph_str_representation;
+}
+
+}  // namespace onnxruntime::optimizer::memory_optimizer
diff --git a/orttraining/orttraining/core/optimizer/memory_optimizer/recompute_analysis.h b/orttraining/orttraining/core/optimizer/memory_optimizer/recompute_analysis.h
new file mode 100644
index 0000000000000..9211e5044cd86
--- /dev/null
+++ b/orttraining/orttraining/core/optimizer/memory_optimizer/recompute_analysis.h
@@ -0,0 +1,104 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "orttraining/core/optimizer/memory_optimizer/common.h"
+#include "orttraining/core/optimizer/memory_optimizer/optimization_planner.h"
+
+namespace onnxruntime::optimizer::memory_optimizer {
+
+/**
+ * @brief Level to control allowed operations during subgraph detecting.
+ * Level 0: only allow cheap-to-compute operations.
+ * Level 1: allow more expensive operations.
+ */
+enum class ProbeLevel {
+  Basic = 0,
+  Advanced = 1,
+  LevelMax = 2,
+};
+
+/**
+ * @brief A child class used for Recompute/RecomputeWithCompromise optimization plan.
+ *
+ * For each node generating stashed activations, a recompute plan can be created for it.
+ */
+class NodeRecomputePlan : public NodeOptimizationPlanBase {
+ public:
+  NodeRecomputePlan(const Node* node,
+                    const InlinedVector<size_t>& activation_output_indices,
+                    const InlinedVector<const Node*>& nodes_in_topological_order,
+                    bool compromise_recompute = false,
+                    float save_ratio = 1.0f) : NodeOptimizationPlanBase(node, activation_output_indices, save_ratio) {
+    compromise_recompute_ = compromise_recompute;
+    // Be noted, recompute is node level, each node arg should have the same optimization type.
+    nodes_in_topological_order_ = nodes_in_topological_order;
+  }
+
+  const InlinedVector<const Node*>& GetNodesInTopoOrder() const { return nodes_in_topological_order_; }
+
+  bool IsCompromiseRecompute() const { return compromise_recompute_; }
+
+  OptimizationType GetOptimizationType() const override {
+    return compromise_recompute_ ? OptimizationType::RecomputeWithCompromise
+                                 : OptimizationType::Recompute;
+  }
+
+  /**
+   * @brief Get the cluster id for this recompute plan.
+   * The cluster id is used to identify a unique subgraph.
+   * User can pass such cluster id to enable specific memory optimization for some subgraph.
+   */
+  std::string GetClusterId() const override;
+
+  /**
+   * @brief Get the serialized string for this recompute plan to create Node-level cluster id.
+   * Imagine, a Node can have multiple optimization plans, each plan generates its normalization string.
+   * Once combined we get Node cluster id.
+   *
+   * Node cluster id is used to categorize nodes into different groups, showing them as one row in memory
+   * optimization opportunity table.
+   */
+  std::string NormalizeForNodeClusterId() const override;
+
+  std::string GetNodesInTopoOrderStr() const;
+
+ private:
+  bool compromise_recompute_;
+  InlinedVector<const Node*> nodes_in_topological_order_;
+};
+
+/**
+ * @brief For the node producing stashed activation, check whether a recomputable subgraph can be found or not.
+ *
+ * @param node The entry node to start the subgraph matching (bottom-up), usually the last node of found subgraphs.
+ * @param probe_level The level to control allowed operations during subgraph detecting.
+ * @param fw_op_output_arg_used_map The activation usage (in fw and bw) mapping.
+ * @param node_index_to_its_order_in_topological_sort_map The mapping of node index to its order in topological sort.
+ *   Used to re-order the collected subgraph nodes.
+ * @param candidate_output_args_map A map from node to its candidate activations, which are consumed by both fw and
+ *  bw ops.
+ * @param subgraph_stores A store to maintain all found subgraphs.
+ * @param logger Logger.
+ * @param compromise_stashed_activation Whether to compromise stashed activation, e.g. if we cannot find a
+ * recomputable subgraph to save a stashed activation, we can compromise to find a recomputable subgraph to reduce the
+ * size of stashed activation.
+ * @param can_compromise_stashed_activation A bool return value, to indicate there is opportunaties for finding a
+ * compromised subgraph.
+ */
+std::unique_ptr<NodeRecomputePlan> CheckNodeForRecompute(const Node& node,
+                                                         const ProbeLevel probe_level,
+                                                         const ActivationUsedMap& fw_op_output_arg_used_map,
+                                                         const InlinedHashMap<NodeIndex, ptrdiff_t>&
+                                                             node_index_to_its_order_in_topological_sort_map,
+                                                         const InlinedHashMap<const Node*, InlinedVector<size_t>>&
+                                                             candidate_output_args_map,
+                                                         const logging::Logger& logger,
+                                                         bool compromise_stashed_activation,
+                                                         bool& can_compromise_stashed_activation);
+
+}  // namespace onnxruntime::optimizer::memory_optimizer
diff --git a/orttraining/orttraining/core/optimizer/pythonop_rewriter.cc b/orttraining/orttraining/core/optimizer/pythonop_rewriter.cc
new file mode 100644
index 0000000000000..e1cd71958bed1
--- /dev/null
+++ b/orttraining/orttraining/core/optimizer/pythonop_rewriter.cc
@@ -0,0 +1,114 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#ifdef ENABLE_TRAINING_TORCH_INTEROP
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "orttraining/core/optimizer/pythonop_rewriter.h"
+
+#include "core/graph/graph.h"
+#include "core/graph/graph_utils.h"
+#include "orttraining/core/framework/torch/torch_proxy.h"
+#include "orttraining/core/framework/torch/custom_function_register.h"
+
+namespace onnxruntime {
+
+Status PythonOpRewriter::Apply(Graph&, Node& node, RewriteRuleEffect& rule_effect, const logging::Logger&) const {
+  bool modified = false;
+  if (graph_utils::IsSupportedOptypeVersionAndDomain(node, "PythonOp", {1}, kMSDomain) &&
+      node.GetAttributes().find("tensor_reuse_map") == node.GetAttributes().end()) {
+    auto func_name = static_cast<std::string>(node.GetAttributes().at("func_name").s());
+    std::optional<PyObject*> input_alias_function =
+        language_interop_ops::torch::OrtTorchFunctionPool::GetInstance().TryGettingInputAliasFunction(func_name);
+    if (input_alias_function.has_value()) {
+      // Serialize node proto to string
+      ONNX_NAMESPACE::NodeProto node_proto;
+      node.ToProto(node_proto);
+      std::string node_proto_str;
+      node_proto.SerializeToString(&node_proto_str);
+
+      // Call input alias function
+      std::vector<int64_t> fw_all_output_to_tensor_input_reuse_map;
+      std::vector<int64_t> bw_all_output_to_tensor_input_reuse_map;
+      language_interop_ops::torch::TorchProxy::GetInstance().RunInputAliasFunction(
+          static_cast<void*>(input_alias_function.value()),
+          node_proto_str,
+          fw_all_output_to_tensor_input_reuse_map,
+          bw_all_output_to_tensor_input_reuse_map);
+
+      auto input_convention = static_cast<std::string>(node.GetAttributes().at("input_convention").s());
+      {
+        // Handle forward input alias map.
+        std::vector<int64_t> fw_tensor_output_to_tensor_input_reuse_map =
+            std::vector<int64_t>((node.OutputDefs().size()), -1);
+
+        // Map input index from `global` input index to `tensor` input index, because node.InputDefs() only contains
+        // tensor inputs.
+        std::unordered_map<int64_t, int64_t> position_to_tensor_index;
+        int64_t tensor_index = 0;
+        const size_t all_input_count = input_convention.size();
+        position_to_tensor_index.reserve(all_input_count);
+        for (size_t i = 0; i < all_input_count; ++i) {
+          if (input_convention[i] == 'd') {
+            position_to_tensor_index[i] = tensor_index;
+            ++tensor_index;
+          }
+        }
+
+        for (size_t i = 1; i < fw_tensor_output_to_tensor_input_reuse_map.size(); ++i) {
+          if (fw_all_output_to_tensor_input_reuse_map[i - 1] != -1) {
+            ORT_ENFORCE(fw_all_output_to_tensor_input_reuse_map[i - 1] < static_cast<int64_t>(all_input_count),
+                        "PythonOp input alias function output index out of range. func_name: ", func_name, " ",
+                        fw_all_output_to_tensor_input_reuse_map[i - 1], " >= ", all_input_count);
+            fw_tensor_output_to_tensor_input_reuse_map[i] =
+                position_to_tensor_index.at(fw_all_output_to_tensor_input_reuse_map[i - 1]);
+          }
+        }
+
+        node.AddAttribute("tensor_reuse_map", fw_tensor_output_to_tensor_input_reuse_map);
+      }
+
+      {
+        // Handle backward input alias map.
+        auto& output_convention = input_convention;
+        ORT_ENFORCE(bw_all_output_to_tensor_input_reuse_map.size() == output_convention.size(),
+                    "PythonOpGrad input alias function output count mismatch. func_name: ", func_name, " ",
+                    bw_all_output_to_tensor_input_reuse_map.size(), " != ", output_convention.size());
+
+        std::vector<int64_t> bw_tensor_output_to_tensor_input_reuse_map =
+            std::vector<int64_t>(node.InputDefs().size(), -1);
+        size_t tensor_output_index = 0;
+        for (size_t i = 0; i < output_convention.size(); ++i) {
+          if (output_convention[i] == 'd') {
+            ORT_ENFORCE(tensor_output_index < bw_tensor_output_to_tensor_input_reuse_map.size(),
+                        "PythonOpGrad input alias function output count mismatch. func_name: ", func_name, " ",
+                        tensor_output_index, " >= ", bw_tensor_output_to_tensor_input_reuse_map.size());
+            // input index shift by 1 to skip the context
+            bw_tensor_output_to_tensor_input_reuse_map[tensor_output_index] =
+                bw_all_output_to_tensor_input_reuse_map[i] == -1 ? -1 : bw_all_output_to_tensor_input_reuse_map[i] + 1;
+            ++tensor_output_index;
+          }
+        }
+        node.AddAttribute("bw_tensor_reuse_map", bw_tensor_output_to_tensor_input_reuse_map);
+      }
+
+      modified = true;
+    }
+  }
+
+  if (modified)
+    rule_effect = RewriteRuleEffect::kUpdatedCurrentNode;
+
+  return Status::OK();
+}
+
+bool PythonOpRewriter::SatisfyCondition(const Graph&, const Node&, const logging::Logger&) const {
+  return true;
+}
+
+}  // namespace onnxruntime
+
+#endif
diff --git a/orttraining/orttraining/core/optimizer/pythonop_rewriter.h b/orttraining/orttraining/core/optimizer/pythonop_rewriter.h
new file mode 100644
index 0000000000000..5534b190979f0
--- /dev/null
+++ b/orttraining/orttraining/core/optimizer/pythonop_rewriter.h
@@ -0,0 +1,36 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#ifdef ENABLE_TRAINING_TORCH_INTEROP
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include "core/optimizer/rewrite_rule.h"
+
+namespace onnxruntime {
+
+/**
+This transformer is to add schema supplementary for PythonOp.
+
+Currently, add memory reuse output to input map as an attribute, if users registered alias input function
+in `OrtTorchFunctionPool`.
+*/
+
+class PythonOpRewriter : public RewriteRule {
+ public:
+  PythonOpRewriter() noexcept : RewriteRule("PythonOpRewriter") {}
+
+  std::vector<std::string> TargetOpTypes() const noexcept override {
+    return {"PythonOp"};
+  }
+
+ private:
+  bool SatisfyCondition(const Graph& graph, const Node& node, const logging::Logger& logger) const override;
+
+  Status Apply(Graph& graph, Node& node, RewriteRuleEffect& rule_effect, const logging::Logger& logger) const override;
+};
+
+}  // namespace onnxruntime
+#endif
diff --git a/orttraining/orttraining/core/optimizer/scaled_sum_fusion.cc b/orttraining/orttraining/core/optimizer/scaled_sum_fusion.cc
index dcb3abf2474d3..e719a21118028 100644
--- a/orttraining/orttraining/core/optimizer/scaled_sum_fusion.cc
+++ b/orttraining/orttraining/core/optimizer/scaled_sum_fusion.cc
@@ -254,7 +254,9 @@ Status ScaledSumFusion::ApplyImpl(Graph& graph, bool& modified, int /*graph_leve
     handled_scaled_sum_count += 1;
   }
 
-  LOGS(logger, INFO) << "Total fused ScaledSum node count:  " << handled_scaled_sum_count;
+  if (handled_scaled_sum_count > 0) {
+    LOGS(logger, INFO) << "Total fused ScaledSum node count:  " << handled_scaled_sum_count;
+  }
 
   return Status::OK();
 }
diff --git a/orttraining/orttraining/core/optimizer/transpose_replacement..cc b/orttraining/orttraining/core/optimizer/transpose_replacement..cc
new file mode 100644
index 0000000000000..48e9c4d6e6a07
--- /dev/null
+++ b/orttraining/orttraining/core/optimizer/transpose_replacement..cc
@@ -0,0 +1,68 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "orttraining/core/optimizer/transpose_replacement.h"
+
+#include "core/common/logging/logging.h"
+#include "core/optimizer/rewrite_rule.h"
+#include "core/optimizer/utils.h"
+#include "core/graph/graph.h"
+#include "core/graph/graph_utils.h"
+
+namespace onnxruntime {
+
+Status TransposeReplacement::Apply(Graph& graph,
+                                   Node& transpose_node,
+                                   RewriteRuleEffect& rule_effect,
+                                   const logging::Logger& logger) const {
+  auto& transpose_inputs = transpose_node.MutableInputDefs();
+  auto& transpose_outputs = transpose_node.MutableOutputDefs();
+  NodeArg* input = transpose_inputs[0];
+  auto input_shape = input->Shape();
+  if (!input_shape) {
+    LOG_DEBUG_INFO(logger, "Exit TransposeReplacement optimization for input shape is None.");
+    return Status::OK();
+  }
+  auto perm = graph_utils::onnx_repeated_values::RetrieveValues<int64_t>(transpose_node.GetAttributes().at("perm"));
+  InlinedVector<int64_t> new_shape;
+  new_shape.reserve(perm.size());
+  int64_t last_permuted_axis = 0;
+  for (int i = 0; i < static_cast<int>(perm.size()); ++i) {
+    if (!input_shape->dim(static_cast<int>(perm[i])).has_dim_value()) {
+      LOG_DEBUG_INFO(logger, "Exit TransposeReplacement optimization for not supporting symbolic shape.");
+      return Status::OK();
+    }
+    new_shape.push_back(input_shape->dim(static_cast<int>(perm[i])).dim_value());
+    if (input_shape->dim(static_cast<int>(perm[i])).dim_value() == 1)
+      continue;
+    if (perm[i] < last_permuted_axis) {
+      LOG_DEBUG_INFO(logger, "Exit TransposeReplacement optimization for not supporting shape.");
+      return Status::OK();
+    }
+    last_permuted_axis = perm[i];
+  }
+
+  transpose_inputs.push_back(
+      optimizer::compute_optimizer::CreateInitializerFromVector(graph,
+                                                                {static_cast<int64_t>(new_shape.size())},
+                                                                new_shape,
+                                                                graph.GenerateNodeArgName("transpose_reshape_shape")));
+
+  Node& transpose_reshape_node = graph.AddNode(graph.GenerateNodeName("Transpose_Reshape"),
+                                               "Reshape",
+                                               "Transpose replaced Reshape",
+                                               transpose_inputs,
+                                               transpose_outputs,
+                                               nullptr,
+                                               kOnnxDomain);
+  transpose_reshape_node.SetExecutionProviderType(transpose_node.GetExecutionProviderType());
+  graph_utils::FinalizeNodeFusion(graph, transpose_reshape_node, transpose_node);
+  rule_effect = RewriteRuleEffect::kRemovedCurrentNode;
+  return Status::OK();
+}
+
+bool TransposeReplacement::SatisfyCondition(const Graph&, const Node&, const logging::Logger&) const {
+  return true;
+}
+
+}  // namespace onnxruntime
diff --git a/orttraining/orttraining/core/optimizer/transpose_replacement.h b/orttraining/orttraining/core/optimizer/transpose_replacement.h
new file mode 100644
index 0000000000000..c38e402339823
--- /dev/null
+++ b/orttraining/orttraining/core/optimizer/transpose_replacement.h
@@ -0,0 +1,38 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "core/optimizer/rewrite_rule.h"
+#include "core/optimizer/compute_optimizer/shared_utils.h"
+
+namespace onnxruntime {
+
+/**
+@Class TransposeReplacement
+
+Transpose is equivalent to a Reshape if:
+ empty dimensions (which dim_value=1) can change place, not empty dimensions must be in
+ the same order in the permuted tenosr.
+ Example: Shape=(1,1,1024,4096) -> perm=(2,0,3,1).
+
+This Rewrite rule replaces Transpose which meets the requirments with Reshape.
+Because Transpose need memory copy while Reshape needn't, this replacement can save overhead for memory copy.
+
+It is attempted to be triggered only on nodes with op type "Transpose".
+*/
+class TransposeReplacement : public RewriteRule {
+ public:
+  TransposeReplacement() noexcept : RewriteRule("TransposeReplacement") {}
+
+  std::vector<std::string> TargetOpTypes() const noexcept override {
+    return {"Transpose"};
+  }
+
+ private:
+  bool SatisfyCondition(const Graph& graph, const Node& node, const logging::Logger& logger) const override;
+
+  Status Apply(Graph& graph, Node& node, RewriteRuleEffect& rule_effect, const logging::Logger& logger) const override;
+};
+
+}  // namespace onnxruntime
diff --git a/orttraining/orttraining/models/runner/training_runner.cc b/orttraining/orttraining/models/runner/training_runner.cc
index d35366e556a42..9ac9f3ee090bb 100644
--- a/orttraining/orttraining/models/runner/training_runner.cc
+++ b/orttraining/orttraining/models/runner/training_runner.cc
@@ -1252,7 +1252,7 @@ Status WithOrtValuesFromTensorProtos(
 
     OrtValue ort_value;
 
-    ORT_RETURN_IF_ERROR(utils::TensorProtoToMLValue(
+    ORT_RETURN_IF_ERROR(utils::TensorProtoToOrtValue(
         Env::Default(), model_location.c_str(), tensor_proto, mem_buffer,
         ort_value));
 
diff --git a/orttraining/orttraining/models/runner/training_util.cc b/orttraining/orttraining/models/runner/training_util.cc
index fa9c3f24b2ee5..7764508d9a091 100644
--- a/orttraining/orttraining/models/runner/training_util.cc
+++ b/orttraining/orttraining/models/runner/training_util.cc
@@ -52,7 +52,7 @@ common::Status DataSet::AddData(const vector<ONNX_NAMESPACE::TensorProto>& featu
     OrtValue ort_value;
     OrtMemoryInfo info("Cpu", OrtDeviceAllocator, OrtDevice{}, 0, OrtMemTypeDefault);
     std::unique_ptr<char[]> buffer = std::make_unique<char[]>(cpu_tensor_length);
-    ORT_RETURN_IF_ERROR(utils::TensorProtoToMLValue(
+    ORT_RETURN_IF_ERROR(utils::TensorProtoToOrtValue(
         Env::Default(), nullptr, tensor_proto, MemBuffer(buffer.get(), cpu_tensor_length, info), ort_value));
 
     sample->push_back(ort_value);
diff --git a/orttraining/orttraining/python/checkpointing_utils.py b/orttraining/orttraining/python/checkpointing_utils.py
deleted file mode 100644
index 460b9982297d1..0000000000000
--- a/orttraining/orttraining/python/checkpointing_utils.py
+++ /dev/null
@@ -1,127 +0,0 @@
-import os
-
-import torch
-
-
-def list_checkpoint_files(checkpoint_dir, checkpoint_prefix, extension=".ort.pt"):
-    ckpt_file_names = [f for f in os.listdir(checkpoint_dir) if f.startswith(checkpoint_prefix)]
-    ckpt_file_names = [f for f in ckpt_file_names if f.endswith(extension)]
-    ckpt_file_names = [os.path.join(checkpoint_dir, f) for f in ckpt_file_names]
-
-    assert len(ckpt_file_names) > 0, 'No checkpoint files found with prefix "{}" in directory {}.'.format(
-        checkpoint_prefix, checkpoint_dir
-    )
-    return ckpt_file_names
-
-
-def get_checkpoint_name(prefix, is_partitioned, world_rank=None, world_size=None):
-    SINGLE_CHECKPOINT_FILENAME = "{prefix}.ort.pt"  # noqa: N806
-    MULTIPLE_CHECKPOINT_FILENAME = "{prefix}.ZeRO.{world_rank}.{world_size}.ort.pt"  # noqa: N806
-
-    if is_partitioned:
-        filename = MULTIPLE_CHECKPOINT_FILENAME.format(
-            prefix=prefix, world_rank=world_rank, world_size=(world_size - 1)
-        )
-    else:
-        filename = SINGLE_CHECKPOINT_FILENAME.format(prefix=prefix)
-
-    return filename
-
-
-def _split_state_dict(state_dict):
-    optimizer_keys = ["Moment_1_", "Moment_2_", "Update_Count_", "Step"]
-    split_sd = {"optimizer": {}, "fp32_param": {}, "fp16_param": {}}
-    for k, v in state_dict.items():
-        mode = "fp32_param"
-        for optim_key in optimizer_keys:
-            if k.startswith(optim_key):
-                mode = "optimizer"
-                break
-        if k.endswith("_fp16"):
-            mode = "fp16_param"
-        split_sd[mode][k] = v
-    return split_sd
-
-
-class CombineZeroCheckpoint:
-    def __init__(self, checkpoint_files, clean_state_dict=None):
-        assert len(checkpoint_files) > 0, "No checkpoint files passed"
-        self.checkpoint_files = checkpoint_files
-        self.clean_state_dict = clean_state_dict
-        self.world_size = int(self.checkpoint_files[0].split("ZeRO")[1].split(".")[2]) + 1
-        assert len(self.checkpoint_files) == self.world_size, f"Could not find {self.world_size} files"
-        self.weight_shape_map = dict()
-        self.sharded_params = set()
-
-    def _split_name(self, name: str):
-        name_split = name.split("_view_")
-        view_num = None
-        if len(name_split) > 1:
-            view_num = int(name_split[1])
-        optimizer_key = ""
-        mp_suffix = ""
-        if name_split[0].startswith("Moment_1"):
-            optimizer_key = "Moment_1_"
-        elif name_split[0].startswith("Moment_2"):
-            optimizer_key = "Moment_2_"
-        elif name_split[0].startswith("Update_Count"):
-            optimizer_key = "Update_Count_"
-        elif name_split[0].endswith("_fp16"):
-            mp_suffix = "_fp16"
-        param_name = name_split[0]
-        if optimizer_key:
-            param_name = param_name.split(optimizer_key)[1]
-        param_name = param_name.split("_fp16")[0]
-        return param_name, optimizer_key, view_num, mp_suffix
-
-    def _update_weight_statistics(self, name, value):
-        if name not in self.weight_shape_map:
-            self.weight_shape_map[name] = value.size()  # original shape of tensor
-
-    def _reshape_tensor(self, key):
-        value = self.aggregate_state_dict[key]
-        weight_name, _, _, _ = self._split_name(key)
-        set_size = self.weight_shape_map[weight_name]
-        self.aggregate_state_dict[key] = value.reshape(set_size)
-
-    def _aggregate(self, param_dict):
-        for k, v in param_dict.items():
-            weight_name, optimizer_key, view_num, mp_suffix = self._split_name(k)
-            if view_num is not None:
-                # parameter is sharded
-                param_name = optimizer_key + weight_name + mp_suffix
-
-                if param_name in self.aggregate_state_dict and optimizer_key not in ["Update_Count_"]:
-                    self.sharded_params.add(param_name)
-                    # Found a previous shard of the param, concatenate shards ordered by ranks
-                    self.aggregate_state_dict[param_name] = torch.cat((self.aggregate_state_dict[param_name], v))
-                else:
-                    self.aggregate_state_dict[param_name] = v
-            else:
-                if k in self.aggregate_state_dict:
-                    assert (self.aggregate_state_dict[k] == v).all(), "Unsharded params must have the same value"
-                else:
-                    self.aggregate_state_dict[k] = v
-                self._update_weight_statistics(weight_name, v)
-
-    def aggregate_checkpoints(self):
-        checkpoint_prefix = self.checkpoint_files[0].split(".ZeRO")[0]
-        self.aggregate_state_dict = dict()
-
-        for i in range(self.world_size):
-            checkpoint_name = get_checkpoint_name(checkpoint_prefix, True, i, self.world_size)
-            rank_state_dict = torch.load(checkpoint_name, map_location=torch.device("cpu"))
-            if "model" in rank_state_dict:
-                rank_state_dict = rank_state_dict["model"]
-
-            if self.clean_state_dict:
-                rank_state_dict = self.clean_state_dict(rank_state_dict)
-
-            rank_state_dict = _split_state_dict(rank_state_dict)
-            self._aggregate(rank_state_dict["fp16_param"])
-            self._aggregate(rank_state_dict["fp32_param"])
-            self._aggregate(rank_state_dict["optimizer"])
-
-        for k in self.sharded_params:
-            self._reshape_tensor(k)
-        return self.aggregate_state_dict
diff --git a/orttraining/orttraining/python/deprecated/__init__.py b/orttraining/orttraining/python/deprecated/__init__.py
deleted file mode 100644
index 6e02db707bc47..0000000000000
--- a/orttraining/orttraining/python/deprecated/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# -------------------------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
-# --------------------------------------------------------------------------
-from onnxruntime.capi._pybind_state import TrainingParameters  # noqa: F401
-from onnxruntime.capi.training.training_session import TrainingSession  # noqa: F401
diff --git a/orttraining/orttraining/python/deprecated/training_session.py b/orttraining/orttraining/python/deprecated/training_session.py
deleted file mode 100644
index a6900578e174b..0000000000000
--- a/orttraining/orttraining/python/deprecated/training_session.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# -------------------------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
-# --------------------------------------------------------------------------
-
-import os  # noqa: F401
-import sys  # noqa: F401
-
-from onnxruntime.capi import _pybind_state as C
-from onnxruntime.capi.onnxruntime_inference_collection import IOBinding  # noqa: F401
-from onnxruntime.capi.onnxruntime_inference_collection import (
-    InferenceSession,
-    Session,
-    check_and_normalize_provider_args,
-)
-
-
-class TrainingSession(InferenceSession):
-    def __init__(self, path_or_bytes, parameters, sess_options=None, providers=None, provider_options=None):
-        Session.__init__(self)
-
-        if sess_options:
-            self._sess = C.TrainingSession(sess_options)
-        else:
-            self._sess = C.TrainingSession()
-
-        # providers needs to be passed explicitly as of ORT 1.10
-        # retain the pre-1.10 behavior by setting to the available providers.
-        if providers is None:
-            providers = C.get_available_providers()
-
-        providers, provider_options = check_and_normalize_provider_args(
-            providers, provider_options, C.get_available_providers()
-        )
-
-        if isinstance(path_or_bytes, str):
-            config_result = self._sess.load_model(path_or_bytes, parameters, providers, provider_options)
-        elif isinstance(path_or_bytes, bytes):
-            config_result = self._sess.read_bytes(path_or_bytes, parameters, providers, provider_options)
-        else:
-            raise TypeError(f"Unable to load from type '{type(path_or_bytes)}'")
-
-        self.loss_scale_input_name = config_result.loss_scale_input_name
-
-        self._inputs_meta = self._sess.inputs_meta
-        self._outputs_meta = self._sess.outputs_meta
-
-    def __del__(self):
-        if self._sess:
-            self._sess.finalize()
-
-    def get_state(self):
-        return self._sess.get_state()
-
-    def get_model_state(self, include_mixed_precision_weights=False):
-        return self._sess.get_model_state(include_mixed_precision_weights)
-
-    def get_optimizer_state(self):
-        return self._sess.get_optimizer_state()
-
-    def get_partition_info_map(self):
-        return self._sess.get_partition_info_map()
-
-    def load_state(self, dict, strict=False):
-        self._sess.load_state(dict, strict)
-
-    def is_output_fp32_node(self, output_name):
-        return self._sess.is_output_fp32_node(output_name)
diff --git a/orttraining/orttraining/python/ort_trainer.py b/orttraining/orttraining/python/ort_trainer.py
deleted file mode 100644
index 7c90054a85dc5..0000000000000
--- a/orttraining/orttraining/python/ort_trainer.py
+++ /dev/null
@@ -1,1241 +0,0 @@
-import io
-import os
-import warnings
-
-import numpy as np
-import onnx
-import torch
-import torch.nn
-import torch.onnx
-from onnx import helper, numpy_helper
-from packaging.version import Version as LooseVersion
-
-import onnxruntime as ort
-import onnxruntime.capi.pt_patch
-from onnxruntime.tools.symbolic_shape_infer import SymbolicShapeInference
-
-from ..training import postprocess
-from .checkpointing_utils import CombineZeroCheckpoint, get_checkpoint_name, list_checkpoint_files
-
-DEFAULT_OPSET_VERSION = 14
-
-
-class IODescription:
-    def __init__(self, name, shape, dtype=None, num_classes=None):
-        self.name_ = name
-        self.shape_ = shape
-        self.dtype_ = dtype
-        self.num_classes_ = num_classes
-
-
-class ModelDescription:
-    def __init__(self, inputs, outputs):
-        self.inputs_ = inputs
-        self.outputs_ = outputs
-
-
-def resolve_symbolic_dimensions(inputs, input_descs, output_descs):
-    import copy
-
-    output_descs_copy = copy.deepcopy(output_descs)
-    resolved_dims = {}
-    for input, input_desc in zip(inputs, input_descs):
-        for i, axis in enumerate(input_desc.shape_):
-            if isinstance(axis, str):
-                resolved_dims[axis] = input.size()[i]
-
-    for output_desc in output_descs_copy:
-        for i, axis in enumerate(output_desc.shape_):
-            if isinstance(axis, str):
-                output_desc.shape_[i] = resolved_dims[axis]
-
-    if any(isinstance(axis, str) for axis in output_desc.shape_ for output_desc in output_descs):
-        raise RuntimeError("Cannot run model with unknown output dimensions")
-
-    return output_descs_copy
-
-
-def generate_sample(desc, device=None):
-    # symbolic dimensions are described with strings. set symbolic dimensions to be 1
-    size = [s if isinstance(s, (int)) else 1 for s in desc.shape_]
-    if desc.num_classes_:
-        return torch.randint(0, desc.num_classes_, size, dtype=desc.dtype_).to(device)
-    else:
-        return torch.randn(size, dtype=desc.dtype_).to(device)
-
-
-def get_device_index(device):
-    if type(device) == str:
-        # could be 'cuda:0', 'cuda:1', or 'cpu'. with cpu, set index=0
-        device = torch.device(device)
-    return 0 if device.index is None else device.index
-
-
-def input_get_device_index(input):
-    if isinstance(input, (list, tuple)):
-        device_index = get_device_index(input[0].device)
-    else:
-        device_index = get_device_index(input.device)
-
-    return device_index
-
-
-def get_all_gradients_finite_arg_name(session):
-    all_fp16_or_fp32_gradients_finite_node_args = [x for x in session._outputs_meta if "all_gradients_finite" in x.name]
-    if len(all_fp16_or_fp32_gradients_finite_node_args) < 1:
-        raise RuntimeError(
-            "Failed to find a group NodeArg with name that matches 'all_gradients_finite'\
-             from the training session."
-        )
-
-    return all_fp16_or_fp32_gradients_finite_node_args[0].name
-
-
-def get_group_accumulated_gradients_output_node_arg_name(session):
-    # TODO: get the constant string via pybind.
-    # optimizer_graph_builder BuildGroupNode with fixed string: 'Group_Accumulated_Gradients'
-    accumulated_gradients_output_node_args = [
-        x for x in session._outputs_meta if "Group_Accumulated_Gradients" in x.name
-    ]
-    if len(accumulated_gradients_output_node_args) != 1:
-        raise RuntimeError(
-            "Failed to find a group NodeArg with name that matches 'Group_Accumulated_Gradients'\
-             from the training session."
-        )
-
-    return accumulated_gradients_output_node_args[0].name
-
-
-def ort_training_session_run_helper(session, iobinding, inputs, input_descs, output_descs, device, run_options=None):
-    for input, input_desc in zip(inputs, input_descs):
-        device_index = input_get_device_index(input)
-        iobinding.bind_input(
-            input_desc.name_,
-            input.device.type,
-            device_index,
-            dtype_torch_to_numpy(input.dtype),
-            list(input.size()),
-            input.data_ptr(),
-        )
-
-    output_descs_resolved = resolve_symbolic_dimensions(inputs, input_descs, output_descs)
-    torch_outputs = {}
-    for output_desc in output_descs_resolved:
-        torch_tensor = torch.zeros(
-            output_desc.shape_,
-            device=device,
-            dtype=output_desc.eval_dtype_ if hasattr(output_desc, "eval_dtype_") else output_desc.dtype_,
-        )
-        iobinding.bind_output(
-            output_desc.name_,
-            torch_tensor.device.type,
-            get_device_index(device),
-            dtype_torch_to_numpy(torch_tensor.dtype),
-            list(torch_tensor.size()),
-            torch_tensor.data_ptr(),
-        )
-        torch_outputs[output_desc.name_] = torch_tensor
-
-    session.run_with_iobinding(iobinding, run_options)
-    return torch_outputs
-
-
-def FuseSofmaxNLLToSoftmaxCE(onnx_model):  # noqa: N802
-    nll_count = 0
-    while True:
-        nll_count = nll_count + 1
-        nll_loss_node = None
-        nll_loss_node_index = 0
-        for nll_loss_node_index, node in enumerate(onnx_model.graph.node):  # noqa: B007
-            if node.op_type == "nll_loss" or node.op_type == "NegativeLogLikelihoodLoss":
-                nll_loss_node = node
-                break
-
-        if nll_loss_node is None:
-            break
-
-        softmax_node = None
-        softmax_node_index = 0
-        label_input_name = None
-        weight_input_name = None
-        for softmax_node_index, node in enumerate(onnx_model.graph.node):  # noqa: B007
-            if node.op_type == "LogSoftmax":
-                # has to be connected to nll_loss
-                if len(nll_loss_node.input) > 2:
-                    weight_input_name = nll_loss_node.input[2]
-                if node.output[0] == nll_loss_node.input[0]:
-                    softmax_node = node
-                    label_input_name = nll_loss_node.input[1]
-                    break
-                elif node.output[0] == nll_loss_node.input[1]:
-                    softmax_node = node
-                    label_input_name = nll_loss_node.input[0]
-                    break
-            else:
-                if softmax_node is not None:
-                    break
-
-        if softmax_node is None:
-            break
-
-        # delete nll_loss and LogSoftmax nodes in order
-        if nll_loss_node_index < softmax_node_index:
-            del onnx_model.graph.node[softmax_node_index]
-            del onnx_model.graph.node[nll_loss_node_index]
-        else:
-            del onnx_model.graph.node[nll_loss_node_index]
-            del onnx_model.graph.node[softmax_node_index]
-
-        probability_output_name = softmax_node.output[0]
-        node = onnx_model.graph.node.add()
-        inputs = (
-            [softmax_node.input[0], label_input_name, weight_input_name]
-            if weight_input_name
-            else [softmax_node.input[0], label_input_name]
-        )
-        node.CopyFrom(
-            onnx.helper.make_node(
-                "SparseSoftmaxCrossEntropy",
-                inputs,
-                [nll_loss_node.output[0], probability_output_name],
-                "nll_loss_node_" + str(nll_count),
-            )
-        )
-
-    return onnx_model
-
-
-def delete_input_with_name(input, name):
-    index = 0
-    for i in input:
-        if i.name == name:
-            del input[index]
-            break
-        index = index + 1
-
-
-# reference:
-# https://docs.scipy.org/doc/numpy-1.13.0/user/basics.types.html
-# https://pytorch.org/docs/stable/tensors.html
-# also must map to types accepted by:
-# MLDataType NumpyTypeToOnnxRuntimeType(int numpy_type)
-def dtype_torch_to_numpy(torch_dtype):
-    if torch_dtype == torch.float64 or torch_dtype == torch.double:
-        return np.float64
-    elif torch_dtype == torch.float32 or torch_dtype == torch.float:
-        return np.float32
-    elif torch_dtype == torch.float16 or torch_dtype == torch.half:
-        return np.float16
-    elif torch_dtype == torch.int64 or torch_dtype == torch.long:
-        return np.longlong
-    elif torch_dtype == torch.int32 or torch_dtype == torch.int:
-        return np.int32
-    elif torch_dtype == torch.int16 or torch_dtype == torch.short:
-        return np.int16
-    elif torch_dtype == torch.bool:
-        return bool
-    else:
-        raise Exception("Torch type to numpy type mapping unavailable for: " + str(torch_dtype))
-
-
-class model_loss_cls(torch.nn.Module):  # noqa: N801
-    def __init__(self, model, loss_fn):
-        super().__init__()
-        self.model_ = model
-        self.loss_fn_ = loss_fn
-
-    def forward(self, *inputs):
-        # here we assume input can be unpacked into input and label
-        input, label = inputs[:-1], inputs[-1]
-        preds = self.model_(*input)
-        return self.loss_fn_(preds, label), preds
-
-
-class WrapModel(torch.nn.Module):
-    def __init__(self, model, loss_fn, input_names):
-        super().__init__()
-        self.model_ = model
-        self.loss_fn_ = loss_fn
-        self.input_names_ = input_names
-
-    def forward(self, *inputs):
-        import inspect
-
-        # *inputs is given by torch trace. It is in the order of input_names.
-        # model_ takes input in a order (which can be obtained via inspect.signature(model.forward)) different than input_names.
-        sig = inspect.signature(self.model_.forward)
-        list(sig.parameters.keys())
-
-        input_dict = {}
-        for key in sig.parameters:
-            if key in self.input_names_:
-                input_dict[key] = inputs[self.input_names_.index(key)]
-
-        model_out = self.model_(**input_dict)
-        if self.loss_fn_ is None:
-            return model_out
-
-        label = inputs[-1]
-        preds = model_out
-        return self.loss_fn_(preds, label), preds
-
-
-def wrap_for_input_match(model, loss_fn, input_names):
-    import inspect
-
-    sig = inspect.signature(model.forward)
-    ordered_list_keys = list(sig.parameters.keys())
-    if loss_fn:
-        sig_loss = inspect.signature(loss_fn)
-        if len(sig_loss.parameters) != 2:
-            raise RuntimeError("loss function should take two arguments - predict and label.")
-
-        # label shall be the second input to loss_fn.
-        ordered_list_keys = [*ordered_list_keys, list(sig_loss.parameters.keys())[1]]
-
-    # name match is needed only when input_names are a subset
-    # of expected inputs (inputs to model and loss_fn combined).
-    if len(input_names) > len(ordered_list_keys):
-        # this is likely the case where input arguments are packed.
-        # TODO: to unpack the input argument.
-        return model_loss_cls(model, loss_fn) if loss_fn else model
-    elif len(input_names) == len(ordered_list_keys):
-        # in this case, we do not require name match.
-        return model_loss_cls(model, loss_fn) if loss_fn else model
-
-    if not all(x in ordered_list_keys for x in input_names):
-        # model desc has name(s) not matching the model signature. We cannot do anything in this case.
-        # better to warning the user.
-        return model_loss_cls(model, loss_fn) if loss_fn else model
-
-    # if input_names match ordered_list_keys, there is not need for wrapping
-    match = True
-    for i, input_name in enumerate(input_names):
-        if input_name != ordered_list_keys[i]:
-            match = False
-            break
-
-    if match:
-        return model_loss_cls(model, loss_fn) if loss_fn else model
-
-    model = WrapModel(model, loss_fn, input_names)
-
-    return model
-
-
-def convert_model_loss_fn_to_onnx(model, loss_fn, model_desc, device, inputs, opset_version=DEFAULT_OPSET_VERSION):
-    # example: {input0:{0:'batch'}, input1:{0:'batch'}}
-    dynamic_axes = {}
-    for input in model_desc.inputs_:
-        symbolic_axis = {}
-        for i, axis in enumerate(input.shape_):
-            if isinstance(axis, str):
-                symbolic_axis[i] = axis
-        if len(symbolic_axis):
-            dynamic_axes[input.name_] = symbolic_axis
-
-    for output in model_desc.outputs_:
-        symbolic_axis = {}
-        for i, axis in enumerate(output.shape_):
-            if isinstance(axis, str):
-                symbolic_axis[i] = axis
-        if len(symbolic_axis):
-            dynamic_axes[output.name_] = symbolic_axis
-
-    input_names = [input.name_ for input in model_desc.inputs_]
-    output_names = [output.name_ for output in model_desc.outputs_]
-
-    if isinstance(inputs, torch.Tensor):
-        inputs = [inputs]
-    if isinstance(inputs, dict):
-        sample_inputs = [inputs[k.name_].to(device=device) for k in model_desc.inputs_]
-    elif isinstance(inputs, (list, tuple)):
-        sample_inputs = [input.to(device=device) for i, input in enumerate(inputs) if i < len(model_desc.inputs_)]
-    else:
-        raise RuntimeError("Unexpected input type. Only torch.Tensor, or dict/list/tuple of torch.Tensor is supported.")
-
-    # pytorch onnx exporter/trace does not try to match argument names.
-    # e.g. for models with optional inputs, it requires all inputs be present.
-    # this is a problem because the model graph depends on inputs provided.
-    model = wrap_for_input_match(model, loss_fn, input_names)
-
-    model.eval()
-    with torch.no_grad():
-        import copy
-
-        # Deepcopy inputs, since input values may change after model run.
-        sample_inputs_copy = copy.deepcopy(sample_inputs)
-        try:
-            # Deepcopy model, in case model is stateful and changes after model run.
-            model_copy = copy.deepcopy(model)
-        except Exception:
-            model_copy = model
-            warnings.warn(
-                "This model cannot be deep copied (or pickled), which is a required step for stateful models to be properly exported to ONNX."
-                " Compute will continue, but unexpected results may occur!"
-            )
-
-        sample_outputs = model_copy(*sample_inputs_copy)
-    if isinstance(sample_outputs, torch.Tensor):
-        sample_outputs = [sample_outputs]
-    for sample_output, output_desc in zip(sample_outputs, model_desc.outputs_):
-        output_desc.dtype_ = sample_output.dtype
-    model.train()
-
-    f = io.BytesIO()
-
-    # Other export options to use(this is for backward compatibility).
-    other_export_options = {}
-    other_export_options["training"] = True
-
-    # This option was added after 1.4 release.
-    if LooseVersion(torch.__version__) > LooseVersion("1.4.0") and LooseVersion(torch.__version__) < LooseVersion(
-        "1.10.0"
-    ):
-        other_export_options["enable_onnx_checker"] = False
-    # This option was added after 1.6 release.
-    if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"):
-        other_export_options["training"] = torch.onnx.TrainingMode.TRAINING
-
-    # Deepcopy inputs, since input values may change after model run.
-    import copy
-
-    sample_inputs_copy = copy.deepcopy(sample_inputs)
-
-    # Enable contrib ops export from PyTorch
-    from onnxruntime.tools import pytorch_export_contrib_ops
-
-    pytorch_export_contrib_ops.register()
-
-    torch.onnx._export(
-        model,
-        tuple(sample_inputs_copy),
-        f,
-        input_names=input_names,
-        output_names=output_names,
-        opset_version=opset_version,
-        dynamic_axes=dynamic_axes,
-        do_constant_folding=False,
-        **other_export_options,
-    )
-
-    onnx_model = onnx.load_model_from_string(f.getvalue())
-
-    # Remove 'model_.' prefix introduced by model wrapper for initializers.
-    if isinstance(model, (WrapModel, model_loss_cls)):
-        replace_name_dict = {}
-        for n in onnx_model.graph.initializer:
-            if n.name.startswith("model_."):
-                replace_name_dict[n.name] = n.name[len("model_.") :]
-                n.name = replace_name_dict[n.name]
-        for n in onnx_model.graph.node:
-            for i, name in enumerate(n.input):
-                if name in replace_name_dict:
-                    n.input[i] = replace_name_dict[name]
-
-    return onnx_model
-
-
-def create_ort_training_session_with_optimizer(
-    model,
-    device,
-    training_optimizer_name,
-    lr_params_feed_name,
-    map_optimizer_attributes,
-    world_rank=-1,
-    world_size=1,
-    gradient_accumulation_steps=1,
-    bind_parameters=False,
-    use_mixed_precision=False,
-    allreduce_post_accumulation=False,
-    deepspeed_zero_stage=0,
-    enable_grad_norm_clip=True,
-    frozen_weights=[],  # noqa: B006
-    opset_version=DEFAULT_OPSET_VERSION,
-    use_deterministic_compute=False,
-    use_memory_efficient_gradient=False,
-    enable_adasum=False,
-    optimized_model_filepath="",
-):
-    output_name = model.graph.output[0].name
-    ort_parameters = ort.TrainingParameters()
-    ort_parameters.loss_output_name = output_name
-    ort_parameters.use_mixed_precision = use_mixed_precision
-    ort_parameters.world_rank = world_rank
-    ort_parameters.world_size = world_size
-    ort_parameters.gradient_accumulation_steps = gradient_accumulation_steps
-    ort_parameters.allreduce_post_accumulation = allreduce_post_accumulation
-    ort_parameters.deepspeed_zero_stage = deepspeed_zero_stage
-    ort_parameters.enable_grad_norm_clip = enable_grad_norm_clip
-    ort_parameters.set_gradients_as_graph_outputs = False
-    ort_parameters.use_memory_efficient_gradient = use_memory_efficient_gradient
-    ort_parameters.enable_adasum = enable_adasum
-    output_types = {}
-    for output in model.graph.output:
-        output_types[output.name] = output.type.tensor_type
-
-    # pybind does not allow to add directly to ort_parameters.weights_to_train.
-    # Have to work around by using a temporary weights_to_train.
-    torch_params = {}
-    optimizer_attributes_map = {}
-    optimizer_int_attributes_map = {}
-
-    unused_frozen_weights = [n for n in frozen_weights if n not in [i.name for i in model.graph.initializer]]
-    if unused_frozen_weights:
-        raise RuntimeError(f"{unused_frozen_weights} in frozen_weights not found in model weights.")
-
-    weights_to_train = set()
-    for initializer in model.graph.initializer:
-        if initializer.name in frozen_weights:
-            continue
-        weights_to_train.add(initializer.name)
-        if map_optimizer_attributes is not None:
-            attributes = map_optimizer_attributes(initializer.name)
-            optimizer_attributes_map[initializer.name] = {}
-            optimizer_int_attributes_map[initializer.name] = {}
-            for k, v in attributes.items():
-                if isinstance(v, float):
-                    optimizer_attributes_map[initializer.name][k] = v
-                elif isinstance(v, int):
-                    optimizer_int_attributes_map[initializer.name][k] = v
-                else:
-                    raise ValueError("Optimizer attributes must be either float or int.")
-        else:
-            optimizer_attributes_map[initializer.name] = {}
-            optimizer_int_attributes_map[initializer.name] = {}
-
-    if bind_parameters:
-        for initializer in model.graph.initializer:
-            torch_tensor = torch.nn.Parameter(torch.as_tensor(numpy_helper.to_array(initializer), device=device))
-            delete_input_with_name(model.graph.input, initializer.name)
-            model.graph.input.extend(
-                [helper.make_tensor_value_info(initializer.name, initializer.data_type, initializer.dims)]
-            )
-            torch_params[initializer.name] = torch_tensor
-
-        del model.graph.initializer[:]
-
-    ort_parameters.weights_to_train = weights_to_train
-    ort_parameters.training_optimizer_name = training_optimizer_name
-    ort_parameters.lr_params_feed_name = lr_params_feed_name
-    ort_parameters.optimizer_attributes_map = optimizer_attributes_map
-    ort_parameters.optimizer_int_attributes_map = optimizer_int_attributes_map
-
-    sessionOptions = ort.SessionOptions()  # noqa: N806
-    sessionOptions.use_deterministic_compute = use_deterministic_compute
-    if len(optimized_model_filepath) > 0:
-        sessionOptions.optimized_model_filepath = optimized_model_filepath
-    session = ort.TrainingSession(model.SerializeToString(), ort_parameters, sessionOptions)
-    train_io_binding = session.io_binding()
-    eval_io_binding = session.io_binding()
-
-    if bind_parameters:
-        for param in torch_params:
-            torch_tensor = torch_params[param]
-
-            train_io_binding.bind_input(
-                param,
-                torch_tensor.device.type,
-                get_device_index(torch_tensor.device),
-                dtype_torch_to_numpy(torch_params[param].dtype),
-                list(torch_tensor.size()),
-                torch_tensor.data_ptr(),
-            )
-            eval_io_binding.bind_input(
-                param,
-                torch_tensor.device.type,
-                get_device_index(torch_tensor.device),
-                dtype_torch_to_numpy(torch_params[param].dtype),
-                list(torch_tensor.size()),
-                torch_tensor.data_ptr(),
-            )
-
-    return session, train_io_binding, eval_io_binding, output_name, torch_params, output_types
-
-
-def save_checkpoint(
-    model, checkpoint_dir, checkpoint_prefix="ORT_checkpoint", checkpoint_state_dict=None, include_optimizer_state=True
-):
-    if checkpoint_state_dict is None:
-        checkpoint_state_dict = {"model": model.state_dict(include_optimizer_state)}
-    else:
-        checkpoint_state_dict.update({"model": model.state_dict(include_optimizer_state)})
-
-    assert os.path.exists(checkpoint_dir), f"ERROR: Checkpoint directory doesn't exist: {checkpoint_dir}"
-
-    checkpoint_name = get_checkpoint_name(
-        checkpoint_prefix, model.deepspeed_zero_stage_, model.world_rank, model.world_size
-    )
-    checkpoint_file = os.path.join(checkpoint_dir, checkpoint_name)
-
-    if os.path.exists(checkpoint_file):
-        warnings.warn(f"{checkpoint_file} already exists, overwriting.")
-
-    torch.save(checkpoint_state_dict, checkpoint_file)
-
-
-def _load_single_checkpoint(model, checkpoint_dir, checkpoint_prefix, is_partitioned, strict):
-    checkpoint_name = get_checkpoint_name(checkpoint_prefix, is_partitioned, model.world_rank, model.world_size)
-    checkpoint_file = os.path.join(checkpoint_dir, checkpoint_name)
-
-    if is_partitioned:
-        assert_msg = (
-            "Couldn't find checkpoint file {}."
-            "Optimizer partitioning is enabled using ZeRO. Please make sure that the "
-            "checkpoint file exists for rank {} of {}."
-        ).format(checkpoint_file, model.world_rank, model.world_size)
-    else:
-        assert_msg = f"Couldn't find checkpoint file {checkpoint_file}."
-
-    assert os.path.exists(checkpoint_file), assert_msg
-
-    checkpoint_state = torch.load(checkpoint_file, map_location="cpu")
-
-    model.load_state_dict(checkpoint_state["model"], strict=strict)
-    del checkpoint_state["model"]
-    return checkpoint_state
-
-
-def _load_multi_checkpoint(model, checkpoint_dir, checkpoint_prefix, strict):
-    checkpoint_files = list_checkpoint_files(checkpoint_dir, checkpoint_prefix)
-
-    ckpt_agg = CombineZeroCheckpoint(checkpoint_files)
-    aggregate_state_dict = ckpt_agg.aggregate_checkpoints()
-
-    model.load_state_dict(aggregate_state_dict, strict=strict)
-
-    # aggregate other keys in the state_dict.
-    # Values will be overwritten for matching keys among workers
-    all_checkpoint_states = {}
-    for checkpoint_file in checkpoint_files:
-        checkpoint_state = torch.load(checkpoint_file, map_location="cpu")
-        del checkpoint_state["model"]
-        all_checkpoint_states.update(checkpoint_state)
-    return all_checkpoint_states
-
-
-def load_checkpoint(model, checkpoint_dir, checkpoint_prefix="ORT_checkpoint", strict=False):
-    checkpoint_files = list_checkpoint_files(checkpoint_dir, checkpoint_prefix)
-    is_partitioned = False
-    if len(checkpoint_files) > 1:
-        warnings.warn(
-            f"Found more than one file with prefix {checkpoint_prefix} in directory {checkpoint_dir}."
-            "Attempting to load ZeRO checkpoint."
-        )
-        is_partitioned = True
-    if (not model.deepspeed_zero_stage_) and is_partitioned:
-        return _load_multi_checkpoint(model, checkpoint_dir, checkpoint_prefix, strict)
-    else:
-        return _load_single_checkpoint(model, checkpoint_dir, checkpoint_prefix, is_partitioned, strict)
-
-
-class ORTTrainer:
-    def __init__(
-        self,
-        model,
-        loss_fn,
-        model_desc,
-        training_optimizer_name,
-        map_optimizer_attributes,
-        learning_rate_description,
-        device,
-        gradient_accumulation_steps=1,
-        world_rank=0,
-        world_size=1,
-        use_mixed_precision=False,
-        allreduce_post_accumulation=False,
-        global_step=0,
-        get_lr_this_step=None,
-        loss_scaler=None,
-        deepspeed_zero_stage=0,
-        enable_grad_norm_clip=True,
-        frozen_weights=[],  # noqa: B006
-        _opset_version=DEFAULT_OPSET_VERSION,
-        _enable_internal_postprocess=True,
-        _extra_postprocess=None,
-        _use_deterministic_compute=False,
-        use_memory_efficient_gradient=False,
-        run_symbolic_shape_infer=False,
-        enable_adasum=False,
-        optimized_model_filepath="",
-    ):
-        super().__init__()
-        """
-        Initialize ORTTrainer.
-
-        Args:
-
-            model: one of
-               - a PyTorch model (class that inherits from torch.nn.Module)
-               - a combined PyTorch model and loss function.
-                  Inputs to this combined PyTorch model are a concatenation of the
-                  model's input and the loss function's label input.
-                  Outputs are a concatenation of the loss function's output and the
-                  model's output.
-               - a combined ONNX model and loss function.
-            loss_fn: one of
-               - a PyTorch loss function if 'model' is a PyTorch model. A loss
-                 function takes two inputs (prediction, label) and outputs a loss
-                 tensor.
-               - None if model is already combined with a loss function.
-            model_desc: Specify input/output shapes, types, and names.
-               Must be consistent with the training model.
-            training_optimizer_name: one of
-               - 'SGDOptimizer'
-               - 'AdamOptimizer'
-               - 'LambOptimizer'
-            map_optimizer_attributes: for optimizers with weight-dependent
-               parameters. A callable that maps weight name to a set of optimization
-               parameters.
-               Defaults to None.
-            learning_rate_description: the name, shape and type of the learning
-               rate in form of IODescription(Learning_Rate_Name, [1,], torch.float32).
-               Because learning_rate is an input to the training model,
-               Learning_Rate_Name must be specified so that there is no name conflict
-               within the model.
-            device: device to store tensors (e.g. 'cpu', 'cuda', 'cuda:<int_idx>').
-            gradient_accumulation_steps: number of training steps to accumulate
-               gradients before averaging and applying them.
-               Defaults to 1.
-            world_rank: rank id used for distributed training.
-               Defaults to 0.
-            world_size: number of ranks participating in distributed training.
-               Defaults to 1.
-            use_mixed_precision: flag to enable mixed precision (aka fp16).
-               Defaults to False.
-            allreduce_post_accumulation: controls whether overlaping gradient
-               computation is applied with allreduce.
-               Defaults to False.
-            global_step: training step that is used as input to 'get_lr_this_step'.
-               Defaults to 0.
-            get_lr_this_step: functor used as learning rate scheduler.
-               It uses 'global_step' as input.
-               Defaults to None.
-            loss_scaler: updates loss scale automatically when 'use_mixed_precision'
-               is specified.
-               Defaults to None.
-            deepspeed_zero_stage: controls whether to partition state using the DeepSpeed ZeRO technique.  Stages 0 and 1 are supported.
-               Defaults to 0 (disabled).
-            enable_grad_norm_clip: enables gradient norm clipping.
-               Defaults to True.
-            frozen_weights: list of model parameters to be frozen (not trained).
-               Defaults to [].
-            _enable_internal_postprocess: whether to run or not the internal postprocesses.
-               Defaults to True
-            _extra_postprocess: a callable to postprocess the ONNX model that is converted from PyTorch.
-               Defaults to None
-            use_memory_efficient_gradient: use memory aware gradient builder.
-               Defaults to False
-            run_symbolic_shape_infer: run symbolic shape inference
-               Defaults to False
-            optimized_model_filepath: path to output the optimized training graph.
-               Defaults to "" (no output).
-        """
-        warnings.warn(
-            "ORTTrainer is deprecated and will be removed in ort release 1.14. Please use ORTModule instead.",
-            FutureWarning,
-        )
-        warnings.warn(
-            "DISCLAIMER: This is an early version of an experimental training API and it is subject to change. DO NOT create production applications with it"
-        )
-        self.is_train = True
-
-        self.torch_model_ = None
-        self.onnx_model_ = None
-        self._enable_internal_postprocess = _enable_internal_postprocess
-        self._extra_postprocess = _extra_postprocess
-
-        if isinstance(model, torch.nn.Module):
-            self.torch_model_ = model
-            self.loss_fn_ = loss_fn
-            self._torch_state_dict_keys = list(model.state_dict().keys())
-        else:
-            self._torch_state_dict_keys = []
-            self.onnx_model_ = model
-            if loss_fn is not None:
-                warnings.warn("loss_fn is not used when creating ORTTrainer because an ONNX model is provided.")
-            # TODO: accept loss_fn as an onnx model. build self.onnx_model_ with model and loss_fn
-            self.loss_fn_ = None
-
-            if self._enable_internal_postprocess:
-                postprocess.run_postprocess(self.onnx_model_)
-
-            if self._extra_postprocess:
-                self._extra_postprocess(self.onnx_model_)
-
-        self.model_desc_ = model_desc
-        self.input_desc_with_lr = [*self.model_desc_.inputs_, learning_rate_description]
-
-        self.world_rank = world_rank
-        self.world_size = world_size
-        self.use_mixed_precision = use_mixed_precision
-
-        self.session = None
-        self.device_ = device
-        self.gradient_accumulation_steps = gradient_accumulation_steps
-        # we use self.current_step to count calls to train_step. It is used for gradient accumulation.
-        # gradients are being accumulated when self.current_step is not divisible by gradient_accumulation_steps.
-        # gradients are updated when self.current_step is divisible by gradient_accumulation_steps.
-        self.current_step = 0
-
-        # we use self.global_step_ to count optimizations being performed.
-        # it is used to calculate learning rate if self.get_lr_this_step_ is provided.
-        self.global_step_ = global_step
-        self.get_lr_this_step_ = get_lr_this_step
-        self.loss_scaler_ = loss_scaler
-
-        if self.get_lr_this_step_ is not None or self.loss_scaler_ is not None:
-            warnings.warn("It is experimental to use learning rate scheduler and loss scaler inside ORTTrainer.")
-        self.training_optimizer_name_ = training_optimizer_name
-        self.learning_rate_description_ = learning_rate_description
-        self.map_optimizer_attributes_ = map_optimizer_attributes
-        self.allreduce_post_accumulation_ = allreduce_post_accumulation
-        self.deepspeed_zero_stage_ = deepspeed_zero_stage
-        self.enable_grad_norm_clip_ = enable_grad_norm_clip
-        self.frozen_weights_ = frozen_weights
-        self.opset_version_ = _opset_version
-        self.state_dict_ = None
-        self._use_deterministic_compute = _use_deterministic_compute
-        self.use_memory_efficient_gradient = use_memory_efficient_gradient
-        self.run_symbolic_shape_infer = run_symbolic_shape_infer
-        self.enable_adasum = enable_adasum
-        self.optimized_model_filepath = optimized_model_filepath
-
-        # use this special string to workaround a corner case that external loss_scale is passed into train_step as kwargs.
-        # see prepare_input_and_fetches for more details.
-        self.loss_scale_input_name = "default_loss_scale_input_name"
-
-        self._init_session()
-
-    def _init_session(self):
-        if self.onnx_model_ is None:
-            return
-
-        self._verify_fully_optimized_model(self.onnx_model_)
-
-        if self.run_symbolic_shape_infer:
-            self.onnx_model_ = SymbolicShapeInference.infer_shapes(
-                self.onnx_model_, auto_merge=True, guess_output_rank=True
-            )
-
-        # old ort session may already exists and occupies GPU memory when creating new session, this may cause OOM error.
-        # for example, load_state_dict will be called before returing the function, and it calls _init_session again
-        del self.session
-        (
-            self.session,
-            self.train_io_binding,
-            self.eval_io_binding,
-            self.output_name,
-            _,
-            self.output_types,
-        ) = create_ort_training_session_with_optimizer(
-            self.onnx_model_,
-            self.device_,
-            self.training_optimizer_name_,
-            self.learning_rate_description_.name_,
-            self.map_optimizer_attributes_,
-            self.world_rank,
-            self.world_size,
-            self.gradient_accumulation_steps,
-            bind_parameters=False,
-            use_mixed_precision=self.use_mixed_precision,
-            allreduce_post_accumulation=self.allreduce_post_accumulation_,
-            deepspeed_zero_stage=self.deepspeed_zero_stage_,
-            enable_grad_norm_clip=self.enable_grad_norm_clip_,
-            frozen_weights=self.frozen_weights_,
-            opset_version=self.opset_version_,
-            use_deterministic_compute=self._use_deterministic_compute,
-            use_memory_efficient_gradient=self.use_memory_efficient_gradient,
-            enable_adasum=self.enable_adasum,
-            optimized_model_filepath=self.optimized_model_filepath,
-        )
-
-        self.loss_scale_input_name = self.session.loss_scale_input_name
-
-        if self.use_mixed_precision:
-            self.input_desc_with_lr_and_loss_scale = [
-                *self.input_desc_with_lr,
-                IODescription(self.loss_scale_input_name, [], torch.float32),
-            ]
-
-        # ORT backend has modified model output dtype from float32 to float16.
-        for o_desc in self.model_desc_.outputs_:
-            if (
-                self.use_mixed_precision
-                and o_desc.dtype_ == torch.float32
-                and not self.session.is_output_fp32_node(o_desc.name_)
-            ):
-                o_desc.eval_dtype_ = torch.float16
-            else:
-                o_desc.eval_dtype_ = o_desc.dtype_
-
-        # gradient accumulation buffers are connected to a single node with a boolean, dimension 1 tensor output.
-        # add a matching output to drive gradient accumulation.
-        if self.gradient_accumulation_steps > 1:
-            self.output_desc_with_group_accumulated_gradients = [
-                *self.model_desc_.outputs_,
-                IODescription(get_group_accumulated_gradients_output_node_arg_name(self.session), [1], torch.bool),
-            ]
-
-        if self.use_mixed_precision:
-            # when ready to use accumulated gradient with mixed precision, we need to fetch all_infinite to determine
-            # if the gradient is usable.
-            self.output_desc_with_all_fp_16_or_fp32_gradients_finite = [
-                *self.model_desc_.outputs_,
-                IODescription(get_all_gradients_finite_arg_name(self.session), [1], torch.bool),
-            ]
-
-        if self.state_dict_:
-            self.load_state_dict(self.state_dict_, self.strict_)
-        self.state_dict_ = None
-
-    def _init_onnx_model(self, inputs):
-        if self.onnx_model_ is not None:
-            return
-
-        if self.torch_model_ is not None:
-            # NOTE: pt model is moved to cpu to conserve gpu memory.
-            self.torch_model_.cpu()
-            # torch buffers created using 'register_buffer' are not meant to be trainable.
-            torch_buffers = list(dict(self.torch_model_.named_buffers()).keys())
-            self.frozen_weights_ = self.frozen_weights_ + torch_buffers
-            self.onnx_model_ = convert_model_loss_fn_to_onnx(
-                self.torch_model_,
-                self.loss_fn_,
-                self.model_desc_,
-                torch.device("cpu"),
-                inputs,
-                opset_version=self.opset_version_,
-            )
-
-            if self._enable_internal_postprocess:
-                postprocess.run_postprocess(self.onnx_model_)
-
-            if self._extra_postprocess:
-                self._extra_postprocess(self.onnx_model_)
-
-        self._init_session()
-
-    def train(self):
-        self.is_train = True
-
-    def eval(self):
-        self.is_train = False
-
-    def _update_onnx_model_initializers(self, state_tensors):
-        # replace the initializers with new value
-        new_weights = []
-        replace_indices = []
-        for i, w in enumerate(self.onnx_model_.graph.initializer):
-            if w.name in state_tensors:
-                new_weights.append(numpy_helper.from_array(state_tensors[w.name], w.name))
-                replace_indices.append(i)
-        replace_indices.sort(reverse=True)
-        for w_i in replace_indices:
-            del self.onnx_model_.graph.initializer[w_i]
-        self.onnx_model_.graph.initializer.extend(new_weights)
-
-    def state_dict(self, include_optimizer_state=True):
-        if not self.session:
-            warnings.warn(
-                "ONNXRuntime training session is not initialized yet. "
-                "Please run train_step or eval_step at least once before calling state_dict()."
-            )
-            return {}
-
-        # extract trained weights
-        session_state = self.session.get_state()
-        torch_state = {}
-        for name in session_state:
-            torch_state[name] = torch.from_numpy(session_state[name])
-
-        # extract untrained weights and buffer
-        for n in self.onnx_model_.graph.initializer:
-            if n.name not in torch_state:
-                torch_state[n.name] = torch.from_numpy(numpy_helper.to_array(n))
-
-        # Need to remove redundant initializers and name suffices to map back to original torch state names
-        if not include_optimizer_state and self._torch_state_dict_keys:
-            return {key: torch_state[key] for key in self._torch_state_dict_keys if key in torch_state}
-        return torch_state
-
-    def load_state_dict(self, state_dict, strict=False):
-        # Note: It may happen ONNX model has not yet been initialized
-        # In this case we cache a reference to desired state and delay the restore until after initialization
-        # Unexpected behavior will result if the user changes the reference before initialization
-        if not self.session:
-            self.state_dict_ = state_dict
-            self.strict_ = strict
-            return
-
-        # update onnx model from loaded state dict
-        cur_initializers_names = [n.name for n in self.onnx_model_.graph.initializer]
-        new_initializers = {}
-
-        for name in state_dict:
-            if name in cur_initializers_names:
-                new_initializers[name] = state_dict[name].numpy()
-            elif strict:
-                raise RuntimeError(f"Checkpoint tensor: {name} is not present in the model.")
-
-        self._update_onnx_model_initializers(new_initializers)
-
-        # create new session based on updated onnx model
-        self.state_dict_ = None
-        self._init_session()
-
-        # load training state
-        session_state = {name: state_dict[name].numpy() for name in state_dict}
-        self.session.load_state(session_state, strict)
-
-    def save_as_onnx(self, path):
-        if not self.session:
-            warnings.warn(
-                "ONNXRuntime training session is not initialized yet. "
-                "Please run train_step or eval_step at least once before calling save_as_onnx()."
-            )
-            return
-        state_tensors = self.session.get_state()
-        self._update_onnx_model_initializers(state_tensors)
-
-        with open(path, "wb") as f:
-            f.write(self.onnx_model_.SerializeToString())
-
-    def _prepare_input_and_fetches(
-        self, input_desc_with_, internal_learning_rate, internal_loss_scale, *args, **kwargs
-    ):
-        fetches = None
-        if type(args) == tuple and len(args) == 1 and type(args[0]) == list:
-            input = tuple(args[0])
-        else:
-            input = args
-
-        for input_desc in input_desc_with_:
-            if input_desc.name_ in kwargs:
-                input = (*input, kwargs[input_desc.name_])
-        if internal_learning_rate is not None:
-            input = (*input, internal_learning_rate)
-        if internal_loss_scale is not None:
-            input = (*input, internal_loss_scale)
-        elif self.use_mixed_precision:
-            # loss_scale input name is needed to call train_step, for example:
-            #   kwargs[model.loss_scale_input_name] = loss_scale
-            #   outputs = model.train_step(*args, **kwargs)
-            # However, when first time train_step is called model.loss_scale_input_name is not set.
-            # To workaround this problem, we use the special name 'default_loss_scale_input_name' to indicate
-            # the loss_scale.
-            if "default_loss_scale_input_name" in kwargs:
-                input = (*input, kwargs["default_loss_scale_input_name"])
-
-        fetches = None
-        if "fetches" in kwargs:
-            fetches = kwargs["fetches"]
-
-        return input, fetches
-
-    def train_step(self, *args, **kwargs):
-        """
-        inputs: model inputs, labels, learning rate, and, if in mixed_precision mode, loss_scale.
-        outputs: if fetches is not provided, outputs are loss and
-            (if in mixed mode and is finishing gradient accumulation) all_finite.
-            if fetches is provided, outputs contains these requested with fetches.
-        fetches: names of requested outputs
-        """
-
-        # inputs to the ONNX model includes inputs to the original PyTorch model
-        # plus learning rate and loss_scale if self.use_mixed_precision is True.
-        # 1. when there are internal learning_rate and loss_scale (in fp16 cases) generators,
-        #   *args and **kwargs together contain ONLY and COMPLETE inputs to the PyTorch model.
-        #   In this case, changes to the training script is minimized.
-        # 2. without internal learning rate and loss scale (in fp16 cases) generators,
-        #   *args and **kwargs passed in from the training script shall contains
-        #   inputs to the PyTorch model plus learning_rate and loss_scale.
-        #   it optionally contains the fetches.
-        # localized arguments (*args) contains inputs to the ONNX model.
-        # named arguments can contain both inputs, learning_rate and loss_scale, and the fetches
-
-        learning_rate, loss_scale = None, None
-        if self.get_lr_this_step_ is not None:
-            # $args, **kwargs contains inputs to the pytorch model
-            lr_this_step = self.get_lr_this_step_(self.global_step_)
-            learning_rate = torch.tensor([lr_this_step])
-        if self.loss_scaler_ is not None and self.use_mixed_precision:
-            loss_scale = torch.tensor([self.loss_scaler_.loss_scale_])
-
-        if self.onnx_model_ is None:
-            sample_input, _ = self._prepare_input_and_fetches(self.model_desc_.inputs_, None, None, *args, **kwargs)
-            self._init_onnx_model(sample_input)
-
-        if self.use_mixed_precision:
-            input, fetches = self._prepare_input_and_fetches(
-                self.input_desc_with_lr_and_loss_scale, learning_rate, loss_scale, *args, **kwargs
-            )
-            assert len(self.input_desc_with_lr_and_loss_scale) == len(input)
-            input_descs = self.input_desc_with_lr_and_loss_scale
-        else:
-            input, fetches = self._prepare_input_and_fetches(
-                self.input_desc_with_lr, learning_rate, loss_scale, *args, **kwargs
-            )
-            assert len(self.input_desc_with_lr) == len(input)
-            input_descs = self.input_desc_with_lr
-
-        self.current_step += 1
-
-        # handle gradient accumulation in fully optimized mode
-        run_options = None
-        has_if_all_finite = False
-        if fetches:
-            output_desc = [output for fetch in fetches for output in self.model_desc_.outputs_ if output.name_ == fetch]
-        elif self.current_step % self.gradient_accumulation_steps != 0:
-            run_options = ort.RunOptions()
-            run_options.only_execute_path_to_fetches = True
-            output_desc = self.output_desc_with_group_accumulated_gradients
-        elif self.use_mixed_precision:
-            has_if_all_finite = True
-            output_desc = self.output_desc_with_all_fp_16_or_fp32_gradients_finite
-        else:
-            output_desc = self.model_desc_.outputs_
-
-        if not isinstance(input, (list, tuple)):
-            input = (input,)
-
-        session_run_results = ort_training_session_run_helper(
-            self.session, self.train_io_binding, input, input_descs, output_desc, self.device_, run_options
-        )
-
-        if has_if_all_finite:
-            # After session run with all_fp32_gradients_finite, we need to clear the iobinding's output state.
-            # Otherwise next run with only_execute_path_to_fetches will lead to gradient all reduce
-            # because all_fp32_gradients_finite is still in the feed.
-            self.train_io_binding.clear_binding_outputs()
-            all_finite = session_run_results[self.output_desc_with_all_fp_16_or_fp32_gradients_finite[-1].name_]
-            if self.loss_scaler_ is not None:
-                self.loss_scaler_.update_loss_scale(all_finite)
-            if all_finite:
-                # optimization has done, increase self.global_step_
-                self.global_step_ = self.global_step_ + 1
-        elif self.current_step % self.gradient_accumulation_steps == 0:
-            # optimization has done, increase self.global_step_
-            self.global_step_ = self.global_step_ + 1
-
-        if fetches is not None:
-            results = [session_run_results[fetch] for fetch in fetches]
-        elif has_if_all_finite and self.loss_scaler_ is None:
-            # return descripted outputs plus the all_finite flag so that the training script can handle loss scaling.
-            results = [
-                session_run_results[output_desc.name_]
-                for output_desc in self.output_desc_with_all_fp_16_or_fp32_gradients_finite
-            ]
-        else:
-            results = [session_run_results[output_desc.name_] for output_desc in self.model_desc_.outputs_]
-        return results[0] if len(results) == 1 else results
-
-    def __call__(self, *args, **kwargs):
-        if self.is_train:
-            return self.train_step(*args, **kwargs)
-        else:
-            return self.eval_step(*args, **kwargs)
-
-    def eval_step(self, *args, **kwargs):
-        """
-        inputs: model inputs and/or labels.
-        outputs: if 'fetches' is not provided, outputs are loss and
-            (if in mixed mode and is finishing gradient accumulation) all_finite.
-            if fetches is provided, outputs contains these requested with fetches.
-        fetches: names of requested outputs
-        """
-
-        # with model_loss_cls, the last input is label, first output is loss
-        input, fetches = self._prepare_input_and_fetches(self.model_desc_.inputs_, None, None, *args, **kwargs)
-
-        if self.onnx_model_ is None:
-            if self.torch_model_ is not None:
-                self._init_onnx_model(input)
-            else:
-                raise RuntimeError(
-                    "Model is unintialized. Please ensure a valid ONNX model or PyTorch model is provided to this Trainer."
-                )
-
-        input_desc = self.model_desc_.inputs_[0 : len(input)]
-        if fetches is None:
-            output_desc = self.model_desc_.outputs_
-        else:
-            output_desc = [output for fetch in fetches for output in self.model_desc_.outputs_ if output.name_ == fetch]
-
-        if not isinstance(input, (list, tuple)):
-            input = (input,)
-
-        run_options = ort.RunOptions()
-        run_options.only_execute_path_to_fetches = True
-        run_options.training_mode = False
-
-        session_run_results = ort_training_session_run_helper(
-            self.session, self.eval_io_binding, input, input_desc, output_desc, self.device_, run_options
-        )
-
-        if len(session_run_results) == 1:
-            return session_run_results[list(session_run_results.keys())[0]]
-        else:
-            return [session_run_results[output_desc.name_] for output_desc in output_desc]
-
-    def _verify_fully_optimized_model(self, model):
-        assert len(model.graph.output) > 0
-        # model's first output must be the loss tensor
-        if model.graph.output[0].type.tensor_type.elem_type not in {
-            onnx.TensorProto.FLOAT,
-            onnx.TensorProto.FLOAT16,
-            onnx.TensorProto.DOUBLE,
-            onnx.TensorProto.COMPLEX64,
-            onnx.TensorProto.COMPLEX128,
-            onnx.TensorProto.BFLOAT16,
-            onnx.TensorProto.FLOAT8E4M3FN,
-            onnx.TensorProto.FLOAT8E4M3FNUZ,
-            onnx.TensorProto.FLOAT8E5M2,
-            onnx.TensorProto.FLOAT8E5M2FNUZ,
-        }:
-            raise RuntimeError(
-                "the first output of a model to run with fully optimized ORT backend must be float types."
-            )
-        if len(model.graph.output[0].type.tensor_type.shape.dim) != 0:
-            raise RuntimeError(
-                "the first output of a model to run with fully optimized ORT backend assumed to be loss and must be a scalar."
-            )
-
-
-class LossScaler:
-    def __init__(
-        self,
-        loss_scale_input_name,
-        is_dynamic_scale,
-        loss_scale=float(1 << 16),
-        up_scale_window=2000,
-        min_loss_scale=1.0,
-        max_loss_scale=float(1 << 24),
-    ):
-        super().__init__()
-        self.loss_scale_input_name_ = loss_scale_input_name
-        self.is_dynamic_scale_ = is_dynamic_scale
-        self.initial_loss_scale_ = loss_scale
-        self.up_scale_window_ = up_scale_window
-        self.min_loss_scale_ = min_loss_scale
-        self.max_loss_scale_ = max_loss_scale
-        self.loss_scale_ = loss_scale
-        self.stable_steps_ = 0
-
-    def update_loss_scale(self, is_all_finite):
-        if not self.is_dynamic_scale_:
-            return
-
-        if is_all_finite:
-            self.stable_steps_ += 1
-
-            if self.stable_steps_ >= self.up_scale_window_:
-                self.loss_scale_ = min(self.max_loss_scale_, self.loss_scale_ * 2)
-                self.stable_steps_ = 0
-        else:
-            self.loss_scale_ = max(self.min_loss_scale_, self.loss_scale_ / 2)
-            self.stable_steps_ = 0
-
-    def reset(self):
-        self.loss_scale_ = self.initial_loss_scale_
-        self.stable_steps_ = 0
diff --git a/orttraining/orttraining/python/orttraining_pybind_state.cc b/orttraining/orttraining/python/orttraining_pybind_state.cc
index 3f3aa396e6ca0..a5f46d88e4e8b 100644
--- a/orttraining/orttraining/python/orttraining_pybind_state.cc
+++ b/orttraining/orttraining/python/orttraining_pybind_state.cc
@@ -18,7 +18,6 @@
 #include "core/session/environment.h"
 #include "core/session/custom_ops.h"
 #include "core/dlpack/dlpack_converter.h"
-#include "orttraining/core/session/training_session.h"
 #include "orttraining/core/agent/training_agent.h"
 #include "orttraining/core/graph/gradient_config.h"
 #include "orttraining/core/graph/optimizer_config.h"
@@ -113,14 +112,11 @@ struct TrainingParameters {
   std::unordered_set<std::string> weights_to_train;
   std::unordered_set<std::string> weights_not_to_train;
 
-  onnxruntime::training::TrainingSession::ImmutableWeights immutable_weights;
-
   // optimizer
   std::string training_optimizer_name;
   std::string lr_params_feed_name = "Learning_Rate";
   std::unordered_map<std::string, std::unordered_map<std::string, float>> optimizer_attributes_map;
   std::unordered_map<std::string, std::unordered_map<std::string, int64_t>> optimizer_int_attributes_map;
-  onnxruntime::training::TrainingSession::OptimizerState optimizer_initial_state;
   std::unordered_map<std::string, std::vector<int>> sliced_schema;
   std::unordered_map<std::string, int> sliced_axes;
   std::vector<std::string> sliced_tensor_names;
@@ -206,185 +202,6 @@ struct PyGradientGraphBuilderContext {
         local_registries_(local_registries) {}
 };
 
-// TODO: this method does not handle parallel optimization.
-TrainingConfigurationResult ConfigureSessionForTraining(
-    training::PipelineTrainingSession* sess, TrainingParameters& parameters) {
-  // TODO tix, refactor the mpi related code to populate all fields correctly by default.
-  ORT_ENFORCE(parameters.data_parallel_size <= parameters.world_size, "data_parallel_size: ", parameters.data_parallel_size, ", world_size: ", parameters.world_size);
-  ORT_ENFORCE(parameters.horizontal_parallel_size <= parameters.world_size, "horizontal_parallel_size: ", parameters.horizontal_parallel_size, ", world_size: ", parameters.world_size);
-  ORT_ENFORCE(parameters.pipeline_parallel_size <= parameters.world_size, "pipeline_parallel_size: ", parameters.pipeline_parallel_size, ", world_size: ", parameters.world_size);
-
-  // When DxHxP != the total number of ranks, we try adjusting D so that DxHxP == the total number of ranks.
-  if (parameters.world_size != parameters.data_parallel_size * parameters.horizontal_parallel_size * parameters.pipeline_parallel_size) {
-    ORT_ENFORCE(parameters.world_size % parameters.horizontal_parallel_size * parameters.pipeline_parallel_size == 0,
-                "D, H, P sizes are incorrect. To enable automatic correction, total number of ranks must be a divisible by HxP.");
-
-    const auto new_data_parallel_size = parameters.world_size / (parameters.horizontal_parallel_size * parameters.pipeline_parallel_size);
-    parameters.data_parallel_size = new_data_parallel_size;
-
-    const std::string msg = "Cannot distribute " + std::to_string(parameters.world_size) + " ranks for distributed computation with D=" + std::to_string(parameters.data_parallel_size) +
-                            ", H=" + std::to_string(parameters.horizontal_parallel_size) + ", P=" + std::to_string(parameters.pipeline_parallel_size) + ", so D is automatically changed to " + std::to_string(new_data_parallel_size);
-    LOGS(*(sess->GetLogger()), WARNING) << msg;
-  }
-
-  training::PipelineTrainingSession::TrainingConfiguration config{};
-  config.weight_names_to_train = parameters.weights_to_train;
-  config.weight_names_to_not_train = parameters.weights_not_to_train;
-  config.immutable_weights = parameters.immutable_weights;
-  config.gradient_accumulation_steps = parameters.gradient_accumulation_steps;
-
-  config.distributed_config.world_rank = parameters.world_rank;
-  config.distributed_config.world_size = parameters.world_size;
-  config.distributed_config.local_rank = parameters.local_rank;
-  config.distributed_config.local_size = parameters.local_size;
-  config.distributed_config.data_parallel_size = parameters.data_parallel_size;
-  config.distributed_config.horizontal_parallel_size = parameters.horizontal_parallel_size;
-  config.distributed_config.pipeline_parallel_size = parameters.pipeline_parallel_size;
-  config.distributed_config.num_pipeline_micro_batches = parameters.num_pipeline_micro_batches;
-  config.distributed_config.sliced_schema = parameters.sliced_schema;
-  config.distributed_config.sliced_axes = parameters.sliced_axes;
-  config.distributed_config.sliced_tensor_names = parameters.sliced_tensor_names;
-
-  if (parameters.use_mixed_precision) {
-    training::PipelineTrainingSession::TrainingConfiguration::MixedPrecisionConfiguration mp{};
-    mp.use_mixed_precision_initializers = true;
-
-    config.mixed_precision_config = mp;
-  }
-
-  if (config.distributed_config.pipeline_parallel_size > 1) {
-    training::PipelineTrainingSession::TrainingConfiguration::PipelineConfiguration pipeline_config;
-
-    // Currently don't support auto-partition. User needs to pass in cut information for pipeline
-    pipeline_config.do_partition = true;
-    assert(!parameters.pipeline_cut_info_string.empty());
-
-    auto process_with_delimiter = [](std::string& input_str, const std::string& delimiter) {
-      std::vector<std::string> result;
-      size_t pos = 0;
-      while ((pos = input_str.find(delimiter)) != std::string::npos) {
-        std::string token = input_str.substr(0, pos);
-        result.emplace_back(token);
-        input_str.erase(0, pos + delimiter.length());
-      }
-      // push the last split of substring into result.
-      result.emplace_back(input_str);
-      return result;
-    };
-
-    auto process_cut_info = [&](std::string& cut_info_string) {
-      std::vector<PipelineTrainingSession::TrainingConfiguration::CutInfo> cut_list;
-      const std::string group_delimiter = ",";
-      const std::string edge_delimiter = ":";
-      const std::string consumer_delimiter = "/";
-      const std::string producer_consumer_delimiter = "-";
-
-      auto cut_info_groups = process_with_delimiter(cut_info_string, group_delimiter);
-      for (auto& cut_info_group : cut_info_groups) {
-        PipelineTrainingSession::TrainingConfiguration::CutInfo cut_info;
-        auto cut_edges = process_with_delimiter(cut_info_group, edge_delimiter);
-        for (auto& cut_edge : cut_edges) {
-          auto process_edge = process_with_delimiter(cut_edge, producer_consumer_delimiter);
-          if (process_edge.size() == 1) {
-            PipelineTrainingSession::TrainingConfiguration::CutEdge edge{process_edge[0]};
-            cut_info.emplace_back(edge);
-          } else {
-            ORT_ENFORCE(process_edge.size() == 2);
-            auto consumer_list = process_with_delimiter(process_edge[1], consumer_delimiter);
-
-            PipelineTrainingSession::TrainingConfiguration::CutEdge edge{process_edge[0], consumer_list};
-            cut_info.emplace_back(edge);
-          }
-        }
-        cut_list.emplace_back(cut_info);
-      }
-      return cut_list;
-    };
-
-    pipeline_config.cut_list = process_cut_info(parameters.pipeline_cut_info_string);
-    config.pipeline_config = pipeline_config;
-  }
-  config.loss_name = parameters.loss_output_name;
-
-  if (!parameters.training_optimizer_name.empty()) {
-    training::PipelineTrainingSession::TrainingConfiguration::OptimizerConfiguration opt{};
-    opt.name = parameters.training_optimizer_name;
-    opt.learning_rate_input_name = parameters.lr_params_feed_name;
-    opt.weight_attributes_generator = [&parameters](const std::string& weight_name) {
-      const auto it = parameters.optimizer_attributes_map.find(weight_name);
-      ORT_ENFORCE(
-          it != parameters.optimizer_attributes_map.end(),
-          "Failed to find attribute map for weight ", weight_name);
-      return it->second;
-    };
-    opt.weight_int_attributes_generator = [&parameters](const std::string& weight_name) {
-      const auto it = parameters.optimizer_int_attributes_map.find(weight_name);
-      ORT_ENFORCE(
-          it != parameters.optimizer_int_attributes_map.end(),
-          "Failed to find int attribute map for weight ", weight_name);
-      return it->second;
-    };
-    opt.use_mixed_precision_moments = parameters.use_fp16_moments;
-    opt.do_all_reduce_in_mixed_precision_type = true;
-    // TODO: this mapping is temporary.
-    // For now, nccl allreduce kernel only implements for allreduce_post_accumulation
-    // hovorod allreduce kernel only implements for not allreduce_post_accumulation.
-    // eventually we will have one all reduce kernel and let opt to have
-    // an allreduce_post_accumulation option and remove the use_nccl option.
-    opt.use_nccl = parameters.allreduce_post_accumulation;
-    opt.deepspeed_zero = onnxruntime::training::ZeROConfig(parameters.deepspeed_zero_stage);
-    opt.enable_grad_norm_clip = parameters.enable_grad_norm_clip;
-
-    // TODO reduction types
-    if (parameters.enable_adasum) {
-#ifdef USE_CUDA
-      opt.adasum_reduction_type = training::AdasumReductionType::GpuHierarchicalReduction;
-#else
-      opt.adasum_reduction_type = training::AdasumReductionType::CpuReduction;
-#endif
-    }
-
-    config.optimizer_config = opt;
-  }
-
-  if (!parameters.optimizer_initial_state.empty()) {
-    config.init_optimizer_states = parameters.optimizer_initial_state;
-  }
-
-  config.gradient_graph_config.use_memory_efficient_gradient = parameters.use_memory_efficient_gradient;
-  config.gradient_graph_config.set_gradients_as_graph_outputs = parameters.set_gradients_as_graph_outputs;
-
-  config.graph_transformer_config.attn_dropout_recompute = parameters.attn_dropout_recompute;
-  config.graph_transformer_config.gelu_recompute = parameters.gelu_recompute;
-  config.graph_transformer_config.transformer_layer_recompute = parameters.transformer_layer_recompute;
-  config.graph_transformer_config.number_recompute_layers = parameters.number_recompute_layers;
-  config.graph_transformer_config.propagate_cast_ops_config.strategy = parameters.propagate_cast_ops_strategy;
-  config.graph_transformer_config.propagate_cast_ops_config.level = parameters.propagate_cast_ops_level;
-  config.graph_transformer_config.propagate_cast_ops_config.allow = parameters.propagate_cast_ops_allow;
-
-  if (!parameters.model_after_graph_transforms_path.empty()) {
-    config.model_after_graph_transforms_path = ToPathString(parameters.model_after_graph_transforms_path);
-  }
-  if (!parameters.model_with_gradient_graph_path.empty()) {
-    config.model_with_gradient_graph_path = ToPathString(parameters.model_with_gradient_graph_path);
-  }
-  if (!parameters.model_with_training_graph_path.empty()) {
-    config.model_with_training_graph_path = ToPathString(parameters.model_with_training_graph_path);
-  }
-
-  training::PipelineTrainingSession::TrainingConfigurationResult config_result{};
-
-  OrtPybindThrowIfError(sess->ConfigureForTraining(config, config_result));
-
-  TrainingConfigurationResult python_config_result{};
-  if (config_result.mixed_precision_config_result.has_value()) {
-    const auto& mp_config_result = config_result.mixed_precision_config_result.value();
-    python_config_result.loss_scale_input_name = mp_config_result.loss_scale_input_name;
-  }
-
-  return python_config_result;
-}
-
 #if defined(USE_MPI)
 void CopyMPIContextToTrainingParameters(TrainingParameters& parameters, const logging::Logger* logger) {
   LOGS(*logger, INFO) << "MPIContext::GetInstance().GetWorldRank(): " << MPIContext::GetInstance().GetWorldRank();
@@ -424,7 +241,7 @@ std::unordered_map<std::string, std::unordered_map<std::string, py::object>> Con
   return py_tensor_state;
 }
 
-void addObjectMethodsForTraining(py::module& m, ExecutionProviderRegistrationFn ep_registration_fn) {
+void addObjectMethodsForTraining(py::module& m) {
   py::class_<OrtValueCache, OrtValueCachePtr>(m, "OrtValueCache")
       .def(py::init<>())
       .def("insert", [](const OrtValueCachePtr& cache_ptr, std::string node_arg_name, OrtValue& value) {
@@ -451,7 +268,6 @@ void addObjectMethodsForTraining(py::module& m, ExecutionProviderRegistrationFn
   py::class_<TrainingParameters> parameters(m, "TrainingParameters", R"pbdoc(Configuration information for training.)pbdoc");
   parameters.def(py::init())
       .def_readwrite("loss_output_name", &TrainingParameters::loss_output_name)
-      .def_readwrite("immutable_weights", &TrainingParameters::immutable_weights)
       .def_readwrite("weights_not_to_train", &TrainingParameters::weights_not_to_train)
       .def_readwrite("weights_to_train", &TrainingParameters::weights_to_train)
       .def_readwrite("sliced_tensor_names", &TrainingParameters::sliced_tensor_names)
@@ -484,25 +300,6 @@ void addObjectMethodsForTraining(py::module& m, ExecutionProviderRegistrationFn
       .def_readwrite("data_parallel_size", &TrainingParameters::data_parallel_size)
       .def_readwrite("horizontal_parallel_size", &TrainingParameters::horizontal_parallel_size)
       .def_readwrite("pipeline_parallel_size", &TrainingParameters::pipeline_parallel_size)
-      .def("set_optimizer_initial_state",
-           [](TrainingParameters& parameters, const std::unordered_map<std::string, std::unordered_map<std::string, py::object>>& py_state) -> void {
-             onnxruntime::training::TrainingSession::OptimizerState optim_state;
-             for (const auto& weight_it : py_state) {
-               auto state = weight_it.second;
-               NameMLValMap state_tensors;
-               for (auto& initializer : state) {
-                 OrtValue ml_value;
-
-                 // InputDeflist is null because parameters havent been tied to session yet
-                 // Likewise, there is no need to specify the name (as the name was previously used to lookup the def list)
-                 CreateGenericMLValue(nullptr, GetAllocator(), "", initializer.second, &ml_value, true);
-                 ThrowIfPyErrOccured();
-                 state_tensors.emplace(initializer.first, ml_value);
-               }
-               optim_state.emplace(weight_it.first, state_tensors);
-             }
-             parameters.optimizer_initial_state = optim_state;
-           })
       .def_readwrite("model_after_graph_transforms_path", &TrainingParameters::model_after_graph_transforms_path)
       .def_readwrite("model_with_gradient_graph_path", &TrainingParameters::model_with_gradient_graph_path)
       .def_readwrite("model_with_training_graph_path", &TrainingParameters::model_with_training_graph_path)
@@ -533,12 +330,44 @@ void addObjectMethodsForTraining(py::module& m, ExecutionProviderRegistrationFn
         ORT_UNUSED_PARAMETER(obj);
 #endif
   });
-  m.def("register_torch_autograd_function", [](std::string key, py::object obj) -> void {
+  m.def("register_torch_autograd_function", [](std::string function_full_qual_name, py::object obj) -> void {
+#ifdef ENABLE_TRAINING_TORCH_INTEROP
+    auto& pool = onnxruntime::language_interop_ops::torch::OrtTorchFunctionPool::GetInstance();
+    pool.RegisterTorchAutogradFunction(function_full_qual_name, obj.ptr());
+#else
+        ORT_UNUSED_PARAMETER(function_full_qual_name);
+        ORT_UNUSED_PARAMETER(obj);
+#endif
+  });
+  m.def("register_shape_inference_function", [](std::string function_full_qual_name, py::object obj) -> void {
 #ifdef ENABLE_TRAINING_TORCH_INTEROP
     auto& pool = onnxruntime::language_interop_ops::torch::OrtTorchFunctionPool::GetInstance();
-    pool.RegisterTorchAutogradFunction(key, obj.ptr());
+    pool.RegisterShapeInferenceFunction(function_full_qual_name, obj.ptr());
 #else
-        ORT_UNUSED_PARAMETER(key);
+        ORT_UNUSED_PARAMETER(function_full_qual_name);
+        ORT_UNUSED_PARAMETER(obj);
+#endif
+  });
+  m.def("get_shape_inference_function", [](std::string function_full_qual_name) -> py::object {
+#ifdef ENABLE_TRAINING_TORCH_INTEROP
+    auto& pool = onnxruntime::language_interop_ops::torch::OrtTorchFunctionPool::GetInstance();
+    auto py_object = pool.TryGettingShapeInferenceFunction(function_full_qual_name);
+    if (py_object.has_value()) {
+      Py_INCREF(py_object.value());
+      return py::reinterpret_steal<py::object>(py_object.value());
+    }
+#else
+        ORT_UNUSED_PARAMETER(function_full_qual_name);
+#endif
+    return py::none();
+  });
+
+  m.def("register_input_alias_function", [](std::string function_full_qual_name, py::object obj) -> void {
+#ifdef ENABLE_TRAINING_TORCH_INTEROP
+    auto& pool = onnxruntime::language_interop_ops::torch::OrtTorchFunctionPool::GetInstance();
+    pool.RegisterInputAliasFunction(function_full_qual_name, obj.ptr());
+#else
+        ORT_UNUSED_PARAMETER(function_full_qual_name);
         ORT_UNUSED_PARAMETER(obj);
 #endif
   });
@@ -579,130 +408,6 @@ void addObjectMethodsForTraining(py::module& m, ExecutionProviderRegistrationFn
         });
 #endif
 
-  py::class_<TrainingConfigurationResult> config_result(m, "TrainingConfigurationResult", "pbdoc(Configuration result for training.)pbdoc");
-  config_result.def(py::init())
-      .def_property_readonly("loss_scale_input_name", [](const TrainingConfigurationResult& result) -> py::object {
-        if (result.loss_scale_input_name.has_value()) {
-          return py::str{result.loss_scale_input_name.value()};
-        }
-        return py::none();
-      });
-
-  // Thin wrapper over internal C++ InferenceSession to accommodate custom op library management for the Python user
-  struct PyTrainingSession : public PyInferenceSession {
-    PyTrainingSession(std::shared_ptr<Environment> env, const PySessionOptions& so)
-        : PyInferenceSession(env, std::make_unique<PipelineTrainingSession>(so.value, *env)) {
-    }
-    ~PyTrainingSession() = default;
-  };
-
-  py::class_<PyTrainingSession, PyInferenceSession> training_session(m, "TrainingSession");
-  training_session
-      .def(py::init([](const PySessionOptions& so) {
-        auto& training_env = GetTrainingEnv();
-        return std::make_unique<PyTrainingSession>(training_env.GetORTEnv(), so);
-      }))
-      .def(py::init([]() {
-        auto& training_env = GetTrainingEnv();
-        return std::make_unique<PyTrainingSession>(training_env.GetORTEnv(), GetDefaultCPUSessionOptions());
-      }))
-      .def("finalize", [](py::object) {
-#if defined(USE_MPI)
-#ifdef _WIN32
-        // https://docs.microsoft.com/en-us/windows/win32/dlls/dynamic-link-library-best-practices
-        // shutdown_mpi() is not called within MPIContext destructor because of DllMain's restriction
-        // call shutdown_mpi() here instead.
-        MPIContext::shutdown_mpi();
-#endif
-#endif
-      })
-      .def("load_model", [ep_registration_fn](PyTrainingSession* sess, const std::string& path, TrainingParameters& parameters, const std::vector<std::string>& provider_types, const ProviderOptionsVector& provider_options) {
-        OrtPybindThrowIfError(sess->GetSessionHandle()->Load(path));
-
-#if defined(USE_MPI)
-        bool use_nccl = parameters.allreduce_post_accumulation;
-        if (!use_nccl && parameters.world_size > 1)
-          CopyMPIContextToTrainingParameters(parameters, sess->GetSessionHandle()->GetLogger());
-#endif
-        const auto config_result = ConfigureSessionForTraining(static_cast<PipelineTrainingSession*>(sess->GetSessionHandle()), parameters);
-
-        ProviderOptionsVector merged_options;
-        ResolveExtraProviderOptions(provider_types, provider_options, merged_options);
-
-        InitializeSession(sess->GetSessionHandle(), ep_registration_fn, provider_types, merged_options);
-
-        return config_result;
-      })
-      .def("read_bytes", [ep_registration_fn](PyTrainingSession* sess, const py::bytes& serialized_model, TrainingParameters& parameters, const std::vector<std::string>& provider_types, const ProviderOptionsVector& provider_options) {
-        std::istringstream buffer(serialized_model);
-        OrtPybindThrowIfError(sess->GetSessionHandle()->Load(buffer));
-
-#if defined(USE_MPI)
-        bool use_nccl = parameters.allreduce_post_accumulation;
-        if (!use_nccl && parameters.world_size > 1)
-          CopyMPIContextToTrainingParameters(parameters, sess->GetSessionHandle()->GetLogger());
-#endif
-        const auto config_result = ConfigureSessionForTraining(static_cast<PipelineTrainingSession*>(sess->GetSessionHandle()), parameters);
-        ProviderOptionsVector merged_options;
-        ResolveExtraProviderOptions(provider_types, provider_options, merged_options);
-
-        InitializeSession(sess->GetSessionHandle(), ep_registration_fn, provider_types, merged_options);
-
-        return config_result;
-      })
-      .def("get_state", [](PyTrainingSession* sess) {
-        NameMLValMap state_tensors;
-        ORT_THROW_IF_ERROR(static_cast<PipelineTrainingSession*>(sess->GetSessionHandle())->GetStateTensors(state_tensors));
-        auto& data_transfer_manager = sess->GetSessionHandle()->GetDataTransferManager();
-        // convert to numpy array
-        std::map<std::string, py::object> rmap;
-        for (auto& kv : state_tensors) {
-          if (kv.second.IsTensor()) {
-            py::object obj;
-            const Tensor& rtensor = kv.second.Get<Tensor>();
-            GetPyObjFromTensor(rtensor, obj, &data_transfer_manager);
-            rmap.insert({kv.first, obj});
-          } else {
-            throw std::runtime_error("Non tensor type in session state tensors is not expected.");
-          }
-        }
-        return rmap;
-      })
-      .def("get_model_state", [](PyTrainingSession* sess, bool include_mixed_precision_weights) {
-        std::unordered_map<std::string, NameMLValMap> model_state_tensors;
-        ORT_THROW_IF_ERROR(static_cast<TrainingSession*>(sess->GetSessionHandle())->GetModelState(model_state_tensors, include_mixed_precision_weights));
-        auto& data_transfer_manager = sess->GetSessionHandle()->GetDataTransferManager();
-        return ConvertORTTensorMapToNumpy(model_state_tensors, data_transfer_manager);
-      })
-      .def("get_optimizer_state", [](PyTrainingSession* sess) {
-        std::unordered_map<std::string, NameMLValMap> opt_state_tensors;
-        ORT_THROW_IF_ERROR(static_cast<TrainingSession*>(sess->GetSessionHandle())->GetOptimizerState(opt_state_tensors));
-        auto& data_transfer_manager = sess->GetSessionHandle()->GetDataTransferManager();
-        return ConvertORTTensorMapToNumpy(opt_state_tensors, data_transfer_manager);
-      })
-      .def("get_partition_info_map", [](PyTrainingSession* sess) {
-        std::unordered_map<std::string, std::unordered_map<std::string, std::vector<int>>> part_info_map;
-        ORT_THROW_IF_ERROR(static_cast<TrainingSession*>(sess->GetSessionHandle())->GetPartitionInfoMap(part_info_map));
-        return part_info_map;
-      })
-      .def("load_state", [](PyTrainingSession* sess, std::unordered_map<std::string, py::object>& state, bool strict) {
-        NameMLValMap state_tensors;
-        for (auto initializer : state) {
-          OrtValue ml_value;
-          auto px = sess->GetSessionHandle()->GetModelInputs();
-          if (!px.first.IsOK() || !px.second) {
-            throw std::runtime_error("Either failed to get model inputs from the session object or the input def list was null");
-          }
-          CreateGenericMLValue(px.second, GetAllocator(), initializer.first, initializer.second, &ml_value);
-          ThrowIfPyErrOccured();
-          state_tensors.insert(std::make_pair(initializer.first, ml_value));
-        }
-        ORT_THROW_IF_ERROR(static_cast<PipelineTrainingSession*>(sess->GetSessionHandle())->SetStateTensors(state_tensors, strict));
-      })
-      .def("is_output_fp32_node", [](PyTrainingSession* sess, const std::string& output_name) {
-        return static_cast<PipelineTrainingSession*>(sess->GetSessionHandle())->IsGraphOutputFp32Node(output_name);
-      });
-
   py::class_<PartialGraphExecutionState>(m, "PartialGraphExecutionState")
       .def(py::init([]() {
         return std::make_unique<PartialGraphExecutionState>();
@@ -728,7 +433,20 @@ void addObjectMethodsForTraining(py::module& m, ExecutionProviderRegistrationFn
         if (!status.IsOK()) {
           throw std::runtime_error("Error in backward pass execution: " + status.ErrorMessage());
         }
-      });
+      })
+      .def("get_serialized_ortmodule_memory_stat",            // for memory optimization
+           [](TrainingAgent* agent,                           // agent
+              const std::string& memory_optimization_config,  // user config string
+              const std::string& recompute_probe_level        // user config string for probe level
+              ) -> std::tuple<std::string, std::map<std::string, std::pair<std::string, int>>> {
+             std::map<std::string, std::pair<std::string, int>> cluster_id_combinations_to_saved_symbolic_byte_map;
+             std::string opportunity_table =
+                 agent->GetSerializedORTModuleMemoryStat(memory_optimization_config,
+                                                         recompute_probe_level,
+                                                         cluster_id_combinations_to_saved_symbolic_byte_map);
+             return std::tuple<std::string, std::map<std::string, std::pair<std::string, int>>>(
+                 opportunity_table, cluster_id_combinations_to_saved_symbolic_byte_map);
+           });
 
   py::enum_<GraphTransformerConfiguration::PropagateCastOpsConfiguration::Strategy>(m, "PropagateCastOpsStrategy", py::module_local(), py::arithmetic{})
       .value("NONE", GraphTransformerConfiguration::PropagateCastOpsConfiguration::Strategy::None)
@@ -1065,17 +783,60 @@ void addObjectMethodsForTraining(py::module& m, ExecutionProviderRegistrationFn
       checkpoint_state(m, "CheckpointState", R"pbdoc(CheckpointState.)pbdoc");
   checkpoint_state
       .def(py::init())
-      .def("add_property", [](onnxruntime::training::api::CheckpointState* state,
-                              const std::string& property_name,
-                              const std::variant<int64_t, float, std::string>& property_value) {
-        state->property_bag.AddProperty(property_name, property_value);
-      })
-      .def("get_property", [](onnxruntime::training::api::CheckpointState* state, const std::string& property_name) {
-        return state->property_bag.GetProperty<onnxruntime::training::api::PropertyDataType>(property_name);
-      })
-      .def("has_property", [](onnxruntime::training::api::CheckpointState* state, const std::string& property_name) {
-        return state->property_bag.HasProperty(property_name);
-      });
+      .def("add_property",
+           [](onnxruntime::training::api::CheckpointState* state,
+              const std::string& property_name,
+              const std::variant<int64_t, float, std::string>& property_value) {
+             state->property_bag.AddProperty(property_name, property_value);
+           })
+      .def("get_property",
+           [](onnxruntime::training::api::CheckpointState* state, const std::string& property_name) {
+             return state->property_bag.GetProperty<onnxruntime::training::api::PropertyDataType>(property_name);
+           })
+      .def("has_property",
+           [](onnxruntime::training::api::CheckpointState* state, const std::string& property_name) {
+             return state->property_bag.HasProperty(property_name);
+           })
+      .def("copy_parameter_from",
+           [](onnxruntime::training::api::CheckpointState* state,
+              const std::string& parameter_name, OrtValue& value) -> void {
+             auto it = state->module_checkpoint_state.named_parameters.find(parameter_name);
+             if (it == state->module_checkpoint_state.named_parameters.end()) {
+               ORT_THROW("Parameter with name ", parameter_name, " does not exist.");
+             }
+             ORT_THROW_IF_ERROR(it->second->CopyFrom(
+                 state->module_checkpoint_state.train_session_data_transfer_mgr, value));
+           })
+      .def("get_parameter",
+           [](onnxruntime::training::api::CheckpointState* state, const std::string& parameter_name) {
+             auto it = state->module_checkpoint_state.named_parameters.find(parameter_name);
+             if (it == state->module_checkpoint_state.named_parameters.end()) {
+               ORT_THROW("Parameter with name ", parameter_name, " does not exist.");
+             }
+             return it->second;
+           })
+      .def("has_parameter",
+           [](onnxruntime::training::api::CheckpointState* state, const std::string& parameter_name) {
+             return state->module_checkpoint_state.named_parameters.count(parameter_name);
+           })
+      .def("parameter_names",
+           [](onnxruntime::training::api::CheckpointState* state) {
+             std::vector<std::string> names;
+             for ([[maybe_unused]] auto& [name, value] : state->module_checkpoint_state.named_parameters) {
+               names.push_back(name);
+             }
+             std::sort(names.begin(), names.end());
+             return names;
+           })
+      .def("property_names",
+           [](onnxruntime::training::api::CheckpointState* state) {
+             std::vector<std::string> names;
+             for ([[maybe_unused]] auto& [name, value] : state->property_bag) {
+               names.push_back(name);
+             }
+             std::sort(names.begin(), names.end());
+             return names;
+           });
 
   py::class_<PyOptimizer>
       training_optimizer(m, "Optimizer", R"pbdoc(Training Optimizer.)pbdoc");
@@ -1111,6 +872,21 @@ void addObjectMethodsForTraining(py::module& m, ExecutionProviderRegistrationFn
         ORT_THROW_IF_ERROR(scheduler->Step());
       });
 
+  py::class_<onnxruntime::training::api::Parameter,
+             std::unique_ptr<onnxruntime::training::api::Parameter, py::nodelete>>
+      parameter(m, "Parameter");
+  parameter
+      .def_property_readonly("name", &onnxruntime::training::api::Parameter::Name)
+      .def_property_readonly("data", &onnxruntime::training::api::Parameter::Data)
+      .def_property_readonly("grad", &onnxruntime::training::api::Parameter::Gradient)
+      .def_property_readonly("requires_grad", &onnxruntime::training::api::Parameter::RequiresGrad)
+      .def("copy_from",
+           [](onnxruntime::training::api::Parameter* parameter,
+              onnxruntime::training::api::CheckpointState* state,
+              OrtValue& value) -> void {
+             ORT_THROW_IF_ERROR(parameter->CopyFrom(state->module_checkpoint_state.train_session_data_transfer_mgr, value));
+           });
+
   m.def(
       "save_checkpoint",
       [](const std::vector<py::bytes>& trainable_tensor_protos_pybytes,
diff --git a/orttraining/orttraining/python/orttraining_python_module.cc b/orttraining/orttraining/python/orttraining_python_module.cc
index 7024244629c3e..4d1db7334f280 100644
--- a/orttraining/orttraining/python/orttraining_python_module.cc
+++ b/orttraining/orttraining/python/orttraining_python_module.cc
@@ -15,6 +15,12 @@ namespace onnxruntime {
 namespace python {
 namespace py = pybind11;
 
+#if defined(USE_MPI) && defined(ORT_USE_NCCL)
+static constexpr bool HAS_COLLECTIVE_OPS = true;
+#else
+static constexpr bool HAS_COLLECTIVE_OPS = false;
+#endif
+
 using namespace onnxruntime::logging;
 
 std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
@@ -34,7 +40,7 @@ const ROCMExecutionProviderInfo GetRocmExecutionProviderInfo(ProviderInfo_ROCM*
 
 void addGlobalMethods(py::module& m);
 void addObjectMethods(py::module& m, ExecutionProviderRegistrationFn ep_registration_fn);
-void addObjectMethodsForTraining(py::module& m, ExecutionProviderRegistrationFn ep_registration_fn);
+void addObjectMethodsForTraining(py::module& m);
 void addObjectMethodsForEager(py::module& m);
 #ifdef ENABLE_LAZY_TENSOR
 void addObjectMethodsForLazyTensor(py::module& m);
@@ -333,7 +339,7 @@ PYBIND11_MODULE(onnxruntime_pybind11_state, m) {
   }
 #endif
 
-  addObjectMethodsForTraining(m, ORTTrainingRegisterExecutionProviders);
+  addObjectMethodsForTraining(m);
 
 #ifdef ENABLE_LAZY_TENSOR
   addObjectMethodsForLazyTensor(m);
@@ -361,6 +367,8 @@ PYBIND11_MODULE(onnxruntime_pybind11_state, m) {
       },
       "Clean the execution provider instances used in ort training module.");
 
+  m.def("has_collective_ops", []() -> bool { return HAS_COLLECTIVE_OPS; });
+
   // See documentation for class TrainingEnvInitialzer earlier in this module
   // for an explanation as to why this is needed.
   auto atexit = py::module_::import("atexit");
diff --git a/orttraining/orttraining/python/training/__init__.py b/orttraining/orttraining/python/training/__init__.py
index 73b1f826f68e1..a3c22686a1039 100644
--- a/orttraining/orttraining/python/training/__init__.py
+++ b/orttraining/orttraining/python/training/__init__.py
@@ -8,26 +8,16 @@
     TrainingParameters,
     is_ortmodule_available,
 )
-from onnxruntime.capi.training.training_session import TrainingSession
-
 
 # Options need to be imported before `ORTTrainer`.
-from .orttrainer_options import ORTTrainerOptions
-from .orttrainer import ORTTrainer, TrainStepInfo
-from . import amp, artifacts, checkpoint, model_desc_validation, optim
+from . import amp, artifacts, optim
 
 __all__ = [
     "PropagateCastOpsStrategy",
     "TrainingParameters",
     "is_ortmodule_available",
-    "TrainingSession",
-    "ORTTrainerOptions",
-    "ORTTrainer",
-    "TrainStepInfo",
     "amp",
     "artifacts",
-    "checkpoint",
-    "model_desc_validation",
     "optim",
 ]
 
diff --git a/orttraining/orttraining/python/training/_checkpoint_storage.py b/orttraining/orttraining/python/training/_checkpoint_storage.py
deleted file mode 100644
index 7a8ada7dee96b..0000000000000
--- a/orttraining/orttraining/python/training/_checkpoint_storage.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# -------------------------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
-# --------------------------------------------------------------------------
-
-import pickle
-from collections.abc import Mapping
-
-import h5py
-
-
-def _dfs_save(group, save_obj):
-    """Recursively go over each level in the save_obj dictionary and save values to a hdf5 group"""
-
-    for key, value in save_obj.items():
-        if isinstance(value, Mapping):
-            subgroup = group.create_group(key)
-            _dfs_save(subgroup, value)
-        else:
-            group[key] = value
-
-
-def save(save_obj: dict, path):
-    """Persists the input dictionary to a file specified by path.
-
-    Saves an hdf5 representation of the save_obj dictionary to a file or a file-like object specified by path.
-    Values are saved in a format supported by h5py. For example, a PyTorch tensor is saved and loaded as a
-    numpy object. So, user types may be converted from their original types to numpy equivalent types.
-
-    Args:
-        save_obj: dictionary that needs to be saved.
-            save_obj should consist of types supported by hdf5 file format.
-            if hdf5 does not recognize a type, an exception is raised.
-            if save_obj is not a dictionary, a ValueError is raised.
-        path: string representation to a file path or a python file-like object.
-            if file already exists at path, an exception is raised.
-    """
-    if not isinstance(save_obj, Mapping):
-        raise ValueError("Object to be saved must be a dictionary")
-
-    with h5py.File(path, "w-") as f:
-        _dfs_save(f, save_obj)
-
-
-def _dfs_load(group, load_obj):
-    """Recursively go over each level in the hdf5 group and load the values into the given dictionary"""
-
-    for key in group:
-        if isinstance(group[key], h5py.Group):
-            load_obj[key] = {}
-            _dfs_load(group[key], load_obj[key])
-        else:
-            load_obj[key] = group[key][()]
-
-
-def load(path, key=None):
-    """Loads the data stored in the binary file specified at the given path into a dictionary and returns it.
-
-    Loads the data from an hdf5 file specified at the given path into a python dictionary.
-    Loaded dictionary contains numpy equivalents of python data types. For example:
-        PyTorch tensor -> saved as a numpy array and loaded as a numpy array.
-        bool -> saved as a numpy bool and loaded as a numpy bool
-    If a '/' separated key is provided, the value at that hierarchical level in the hdf5 group is returned.
-
-    Args:
-        path: string representation to a file path or a python file-like object.
-            if file does not already exist at path, an exception is raised.
-        key: '/' separated representation of the hierarchy level value that needs to be returned/
-            for example, if the saved binary file has structure {a: {b: x, c:y}} and the user would like
-            to query the value for c, the key provided should be 'a/c'.
-            the default value of None for key implies that the entire hdf5 file structure needs to be loaded into a dictionary and returned.
-
-    Returns:
-        a dictionary loaded from the specified binary hdf5 file.
-    """
-    if not h5py.is_hdf5(path):
-        raise ValueError(f"{path} is not an hdf5 file or a python file-like object.")
-
-    load_obj = {}
-    with h5py.File(path, "r") as f:
-        if key:
-            f = f[key]  # noqa: PLW2901
-        if isinstance(f, h5py.Dataset):
-            return f[()]
-
-        _dfs_load(f, load_obj)
-
-    return load_obj
-
-
-def to_serialized_hex(user_dict):
-    """Serialize the user_dict and convert the serialized bytes to a hex string and return"""
-
-    return pickle.dumps(user_dict).hex()
-
-
-def from_serialized_hex(serialized_hex):
-    """Convert serialized_hex to bytes and deserialize it and return"""
-
-    # serialized_hex can be either a regular string or a byte string.
-    # if it is a byte string, convert to regular string using decode()
-    # if it is a regular string, do nothing to it
-    try:  # noqa: SIM105
-        serialized_hex = serialized_hex.decode()
-    except AttributeError:
-        pass
-    return pickle.loads(bytes.fromhex(serialized_hex))
diff --git a/orttraining/orttraining/python/training/_utils.py b/orttraining/orttraining/python/training/_utils.py
index 4eb79443c8f1a..091274d1d171d 100644
--- a/orttraining/orttraining/python/training/_utils.py
+++ b/orttraining/orttraining/python/training/_utils.py
@@ -6,11 +6,9 @@
 import importlib.util
 import os
 import sys
-from functools import wraps  # noqa: F401
 
 import numpy as np
 import torch
-from onnx import TensorProto  # noqa: F401
 from packaging.version import Version
 
 
@@ -23,16 +21,6 @@ def get_device_index(device):
     return 0 if device.index is None else device.index
 
 
-def get_device_index_from_input(input):
-    """Returns device index from a input PyTorch Tensor"""
-
-    if isinstance(input, (list, tuple)):
-        device_index = get_device_index(input[0].device)
-    else:
-        device_index = get_device_index(input.device)
-    return device_index
-
-
 def get_device_str(device):
     if isinstance(device, str):
         # could be 'cuda:0', 'cuda:1', or 'cpu'. with cpu, set index=0
@@ -50,24 +38,6 @@ def get_device_str(device):
     return device
 
 
-def get_all_gradients_finite_name_from_session(session):
-    """Find all_gradients_finite node on Session graph and return its name"""
-
-    nodes = [x for x in session._outputs_meta if "all_gradients_finite" in x.name]
-    if len(nodes) != 1:
-        raise RuntimeError("'all_gradients_finite' node not found within training session")
-    return nodes[0].name
-
-
-def get_gradient_accumulation_name_from_session(session):
-    """Find Group_Accumulated_Gradients node on Session graph and return its name"""
-
-    nodes = [x for x in session._outputs_meta if "Group_Accumulated_Gradients" in x.name]
-    if len(nodes) != 1:
-        raise RuntimeError("'Group_Accumulated_Gradients' node not found within training session")
-    return nodes[0].name
-
-
 def dtype_torch_to_numpy(torch_dtype):
     """Converts PyTorch types to Numpy types
 
@@ -232,111 +202,3 @@ def import_module_from_file(file_path, module_name=None):
     sys.modules[module_name] = module
     spec.loader.exec_module(module)
     return module
-
-
-def state_dict_model_key():
-    """Returns the model key name in the state dictionary"""
-
-    return "model"
-
-
-def state_dict_optimizer_key():
-    """Returns the optimizer key name in the state dictionary"""
-
-    return "optimizer"
-
-
-def state_dict_partition_info_key():
-    """Returns the partition info key name in the state dictionary"""
-
-    return "partition_info"
-
-
-def state_dict_trainer_options_key():
-    """Returns the trainer options key name in the state dictionary"""
-
-    return "trainer_options"
-
-
-def state_dict_full_precision_key():
-    """Returns the full precision key name in the state dictionary"""
-
-    return "full_precision"
-
-
-def state_dict_original_dimension_key():
-    """Returns the original dimension key name in the state dictionary"""
-
-    return "original_dim"
-
-
-def state_dict_sharded_optimizer_keys():
-    """Returns the optimizer key names that can be sharded in the state dictionary"""
-
-    return {"Moment_1", "Moment_2"}
-
-
-def state_dict_user_dict_key():
-    """Returns the user dict key name in the state dictionary"""
-
-    return "user_dict"
-
-
-def state_dict_trainer_options_mixed_precision_key():
-    """Returns the trainer options mixed precision key name in the state dictionary"""
-
-    return "mixed_precision"
-
-
-def state_dict_trainer_options_zero_stage_key():
-    """Returns the trainer options zero_stage key name in the state dictionary"""
-
-    return "zero_stage"
-
-
-def state_dict_trainer_options_world_rank_key():
-    """Returns the trainer options world_rank key name in the state dictionary"""
-
-    return "world_rank"
-
-
-def state_dict_trainer_options_world_size_key():
-    """Returns the trainer options world_size key name in the state dictionary"""
-
-    return "world_size"
-
-
-def state_dict_trainer_options_data_parallel_size_key():
-    """Returns the trainer options data_parallel_size key name in the state dictionary"""
-
-    return "data_parallel_size"
-
-
-def state_dict_trainer_options_horizontal_parallel_size_key():
-    """Returns the trainer options horizontal_parallel_size key name in the state dictionary"""
-
-    return "horizontal_parallel_size"
-
-
-def state_dict_trainer_options_optimizer_name_key():
-    """Returns the trainer options optimizer_name key name in the state dictionary"""
-
-    return "optimizer_name"
-
-
-def state_dict_train_step_info_key():
-    """Returns the train step info key name in the state dictionary"""
-
-    return "train_step_info"
-
-
-def state_dict_train_step_info_optimization_step_key():
-    """Returns the train step info optimization step key name in the state dictionary"""
-
-    return "optimization_step"
-
-
-def state_dict_train_step_info_step_key():
-    """Returns the train step info step key name in the state dictionary"""
-
-    return "step"
diff --git a/orttraining/orttraining/python/training/api/checkpoint_state.py b/orttraining/orttraining/python/training/api/checkpoint_state.py
index 285264bbed744..ba95cd04fce7e 100644
--- a/orttraining/orttraining/python/training/api/checkpoint_state.py
+++ b/orttraining/orttraining/python/training/api/checkpoint_state.py
@@ -5,70 +5,171 @@
 
 import os
 
+import numpy as np
+
 from onnxruntime.capi import _pybind_state as C
+from onnxruntime.capi.onnxruntime_inference_collection import OrtValue
 
 
-class CheckpointState:
-    """Class that holds the state of the training session
+class Parameter:
+    """Class that represents a model parameter
 
-    This class holds all the state information of the training session such as the model parameters,
-    its gradients, the optimizer state and user defined properties.
+    This class represents a model parameter and provides access to its data,
+    gradient and other properties. This class is not expected to be instantiated directly.
+    Instead, it is returned by the `CheckpointState` object.
+
+    Args:
+        parameter: The C.Parameter object that holds the underlying parameter data.
+        state: The C.CheckpointState object that holds the underlying session state.
+    """
+
+    def __init__(self, parameter: C.Parameter, state: C.CheckpointState):
+        self._parameter = parameter
+        self._state = state
 
-    User defined properties can be indexed by name from the `CheckpointState` object.
+    @property
+    def name(self) -> str:
+        """The name of the parameter"""
+        return self._parameter.name
 
-    To create the `CheckpointState`, use the `CheckpointState.load_checkpoint` method.
+    @property
+    def data(self) -> np.ndarray:
+        """The data of the parameter"""
+        return self._parameter.data.numpy()
+
+    @data.setter
+    def data(self, value: np.ndarray) -> None:
+        """Sets the data of the parameter"""
+        self._parameter.copy_from(self._state, OrtValue.ortvalue_from_numpy(value)._ortvalue)
+
+    @property
+    def grad(self) -> np.ndarray:
+        """The gradient of the parameter"""
+        return self._parameter.grad.numpy() if self._parameter.grad.has_value() else None
+
+    @property
+    def requires_grad(self) -> bool:
+        """Whether or not the parameter requires its gradient to be computed"""
+        return self._parameter.requires_grad
+
+    def __repr__(self) -> str:
+        """Returns a string representation of the parameter"""
+        return f"Parameter(name={self.name}, requires_grad={self.requires_grad})"
+
+
+class Parameters:
+    """Class that holds all the model parameters
+
+    This class holds all the model parameters and provides access to them.
+    This class is not expected to be instantiated directly. Instead, it is returned by the
+    `CheckpointState`'s parameters attribute.
+    This class behaves like a dictionary and provides access to the parameters by name.
 
     Args:
-        state: The C.Checkpoint state object that holds the underlying session state.
+        state: The C.CheckpointState object that holds the underlying session state.
     """
 
     def __init__(self, state: C.CheckpointState):
-        if not isinstance(state, C.CheckpointState):
-            raise TypeError(f"Invalid argument for CheckpointState received {type(state)}")
         self._state = state
 
-    @classmethod
-    def load_checkpoint(cls, checkpoint_uri: str | os.PathLike) -> CheckpointState:
-        """Loads the checkpoint state from the checkpoint file
+    def __getitem__(self, name: str) -> Parameter:
+        """Gets the parameter associated with the given name
+
+        Searches for the name in the parameters of the checkpoint state.
 
         Args:
-            checkpoint_uri: The path to the checkpoint file.
+            name: The name of the parameter
 
         Returns:
-            CheckpointState: The checkpoint state object.
+            The value of the parameter
+
+        Raises:
+            KeyError: If the parameter is not found
         """
-        return cls(C.load_checkpoint(os.fspath(checkpoint_uri)))
 
-    @classmethod
-    def save_checkpoint(
-        cls, state: CheckpointState, checkpoint_uri: str | os.PathLike, include_optimizer_state: bool = False
-    ) -> None:
-        """Saves the checkpoint state to the checkpoint file
+        if name not in self:
+            raise KeyError(f"Parameter {name} not found.")
+
+        return Parameter(self._state.get_parameter(name), self._state)
+
+    def __setitem__(self, name: str, value: np.ndarray) -> None:
+        """Sets the parameter value for the given name
+
+        Searches for the name in the parameters of the checkpoint state.
+        If the name is found in parameters, the value is updated.
 
         Args:
-            state: The checkpoint state object.
-            checkpoint_uri: The path to the checkpoint file.
-            include_optimizer_state: If True, the optimizer state is also saved to the checkpoint file.
+            name: The name of the parameter
+            value: The value of the parameter as a numpy array
+
+        Raises:
+            KeyError: If the parameter is not found
         """
-        C.save_checkpoint(state._state, os.fspath(checkpoint_uri), include_optimizer_state)
+        if name not in self:
+            raise KeyError(f"Parameter {name} not found.")
+
+        self._state.copy_parameter_from(name, OrtValue.ortvalue_from_numpy(value)._ortvalue)
+
+    def __contains__(self, name: str) -> bool:
+        """Checks if the parameter exists in the state
+
+        Args:
+            name: The name of the parameter
+
+        Returns:
+            True if the name is a parameter False otherwise
+        """
+
+        return self._state.has_parameter(name)
+
+    def __iter__(self):
+        """Returns an iterator over the properties"""
+        for parameter_name in self._state.parameter_names():
+            yield parameter_name, Parameter(self._state.get_parameter(parameter_name), self._state)
+
+    def __repr__(self) -> str:
+        """Returns a string representation of the parameters"""
+        return self._state.parameter_names()
+
+    def __len__(self) -> int:
+        """Returns the number of parameters"""
+        return len(self._state.parameter_names())
+
+
+class Properties:
+    def __init__(self, state: C.CheckpointState):
+        self._state = state
 
     def __getitem__(self, name: str) -> int | float | str:
         """Gets the property associated with the given name
 
+        Searches for the name in the properties of the checkpoint state.
+
         Args:
             name: The name of the property
 
         Returns:
             The value of the property
+
+        Raises:
+            KeyError: If the property is not found
         """
+
+        if name not in self:
+            raise KeyError(f"Property {name} not found.")
+
         return self._state.get_property(name)
 
     def __setitem__(self, name: str, value: int | float | str) -> None:
         """Sets the property value for the given name
 
+        Searches for the name in the properties of the checkpoint state.
+        The value is added or updated in the properties.
+
         Args:
             name: The name of the property
             value: The value of the property
+                   Properties only support int, float and str values.
         """
         self._state.add_property(name, value)
 
@@ -79,6 +180,75 @@ def __contains__(self, name: str) -> bool:
             name: The name of the property
 
         Returns:
-            True if the property exists, False otherwise
+            True if the name is a property, False otherwise
         """
+
         return self._state.has_property(name)
+
+    def __iter__(self):
+        """Returns an iterator over the properties"""
+        for property_name in self._state.property_names():
+            yield property_name, self._state.get_property(property_name)
+
+    def __repr__(self) -> str:
+        """Returns a string representation of the properties"""
+        return self._state.property_names()
+
+    def __len__(self) -> int:
+        """Returns the number of properties"""
+        return len(self._state.property_names())
+
+
+class CheckpointState:
+    """Class that holds the state of the training session
+
+    This class holds all the state information of the training session such as the model parameters,
+    its gradients, the optimizer state and user defined properties.
+
+    To create the `CheckpointState`, use the `CheckpointState.load_checkpoint` method.
+
+    Args:
+        state: The C.Checkpoint state object that holds the underlying session state.
+    """
+
+    def __init__(self, state: C.CheckpointState):
+        if not isinstance(state, C.CheckpointState):
+            raise TypeError(f"Invalid argument for CheckpointState received {type(state)}")
+        self._state = state
+        self._parameters = Parameters(self._state)
+        self._properties = Properties(self._state)
+
+    @classmethod
+    def load_checkpoint(cls, checkpoint_uri: str | os.PathLike) -> CheckpointState:
+        """Loads the checkpoint state from the checkpoint file
+
+        Args:
+            checkpoint_uri: The path to the checkpoint file.
+
+        Returns:
+            CheckpointState: The checkpoint state object.
+        """
+        return cls(C.load_checkpoint(os.fspath(checkpoint_uri)))
+
+    @classmethod
+    def save_checkpoint(
+        cls, state: CheckpointState, checkpoint_uri: str | os.PathLike, include_optimizer_state: bool = False
+    ) -> None:
+        """Saves the checkpoint state to the checkpoint file
+
+        Args:
+            state: The checkpoint state object.
+            checkpoint_uri: The path to the checkpoint file.
+            include_optimizer_state: If True, the optimizer state is also saved to the checkpoint file.
+        """
+        C.save_checkpoint(state._state, os.fspath(checkpoint_uri), include_optimizer_state)
+
+    @property
+    def parameters(self) -> Parameters:
+        """Returns the model parameters from the checkpoint state"""
+        return self._parameters
+
+    @property
+    def properties(self) -> Properties:
+        """Returns the properties from the checkpoint state"""
+        return self._properties
diff --git a/orttraining/orttraining/python/training/artifacts.py b/orttraining/orttraining/python/training/artifacts.py
index 549614de496a6..a57105545e114 100644
--- a/orttraining/orttraining/python/training/artifacts.py
+++ b/orttraining/orttraining/python/training/artifacts.py
@@ -53,6 +53,8 @@ def generate_artifacts(
         3. Checkpoint (directory): Contains the model parameters.
         4. Optimizer model (onnx.ModelProto): Model containing the optimizer graph.
 
+    All generated ModelProtos will use the same opsets defined by *model*.
+
     Args:
         model: The base model to be used for gradient graph generation.
         requires_grad: List of names of model parameters that require gradient computation
@@ -207,11 +209,17 @@ def _export_to_ort_format(model_path, output_dir, extra_options):
 
     logging.info("Optimizer enum provided: %s", optimizer.name)
 
+    opset_version = None
+    for domain in model.opset_import:
+        if domain.domain == "" or domain.domain == "ai.onnx":
+            opset_version = domain.version
+            break
+
     optim_model = None
     optim_blocks = {OptimType.AdamW: onnxblock.optim.AdamW, OptimType.SGD: onnxblock.optim.SGD}
 
     optim_block = optim_blocks[optimizer]()
-    with onnxblock.empty_base():
+    with onnxblock.empty_base(opset_version=opset_version):
         _ = optim_block(model_params)
         optim_model = optim_block.to_model_proto()
 
diff --git a/orttraining/orttraining/python/training/checkpoint.py b/orttraining/orttraining/python/training/checkpoint.py
deleted file mode 100644
index d0ff0650662b7..0000000000000
--- a/orttraining/orttraining/python/training/checkpoint.py
+++ /dev/null
@@ -1,748 +0,0 @@
-import os
-import tempfile
-import warnings
-from enum import Enum
-
-import numpy as np
-import onnx
-import torch
-
-from . import _checkpoint_storage, _utils
-
-################################################################################
-# Experimental Checkpoint APIs
-################################################################################
-
-
-def experimental_state_dict(ort_trainer, include_optimizer_state=True):
-    warnings.warn(
-        "experimental_state_dict() will be deprecated soon. Please use ORTTrainer.state_dict() instead.",
-        DeprecationWarning,
-    )
-
-    if not ort_trainer._training_session:
-        warnings.warn(
-            "ONNX Runtime training session is not initialized yet. "
-            "Please run train_step or eval_step at least once before calling state_dict()."
-        )
-        return ort_trainer._state_dict
-
-    # extract trained weights
-    session_state = ort_trainer._training_session.get_state()
-    torch_state = {}
-    for name in session_state:
-        torch_state[name] = torch.from_numpy(session_state[name])
-
-    # extract untrained weights and buffer
-    for n in ort_trainer._onnx_model.graph.initializer:
-        if n.name not in torch_state and n.name in ort_trainer.options.utils.frozen_weights:
-            torch_state[n.name] = torch.from_numpy(np.array(onnx.numpy_helper.to_array(n)))
-
-    # Need to remove redundant (optimizer) initializers to map back to original torch state names
-    if not include_optimizer_state and ort_trainer._torch_state_dict_keys:
-        return {key: torch_state[key] for key in ort_trainer._torch_state_dict_keys if key in torch_state}
-    return torch_state
-
-
-def experimental_load_state_dict(ort_trainer, state_dict, strict=False):
-    warnings.warn(
-        "experimental_load_state_dict() will be deprecated soon. Please use ORTTrainer.load_state_dict() instead.",
-        DeprecationWarning,
-    )
-
-    # Note: It may happen ONNX model has not yet been initialized
-    # In this case we cache a reference to desired state and delay the restore until after initialization
-    # Unexpected behavior will result if the user changes the reference before initialization
-    if not ort_trainer._training_session:
-        ort_trainer._state_dict = state_dict
-        ort_trainer._load_state_dict_strict = strict
-        return
-
-    # Update onnx model from loaded state dict
-    cur_initializers_names = [n.name for n in ort_trainer._onnx_model.graph.initializer]
-    new_initializers = {}
-
-    for name in state_dict:
-        if name in cur_initializers_names:
-            new_initializers[name] = state_dict[name].numpy()
-        elif strict:
-            raise RuntimeError(f"Checkpoint tensor: {name} is not present in the model.")
-
-    ort_trainer._update_onnx_model_initializers(new_initializers)
-
-    # create new session based on updated onnx model
-    ort_trainer._state_dict = None
-    ort_trainer._init_session()
-
-    # load training state
-    session_state = {name: state_dict[name].numpy() for name in state_dict}
-    ort_trainer._training_session.load_state(session_state, strict)
-
-
-def experimental_save_checkpoint(
-    ort_trainer,
-    checkpoint_dir,
-    checkpoint_prefix="ORT_checkpoint",
-    checkpoint_state_dict=None,
-    include_optimizer_state=True,
-):
-    warnings.warn(
-        "experimental_save_checkpoint() will be deprecated soon. Please use ORTTrainer.save_checkpoint() instead.",
-        DeprecationWarning,
-    )
-
-    if checkpoint_state_dict is None:
-        checkpoint_state_dict = {"model": experimental_state_dict(ort_trainer, include_optimizer_state)}
-    else:
-        checkpoint_state_dict.update({"model": experimental_state_dict(ort_trainer, include_optimizer_state)})
-
-    assert os.path.exists(checkpoint_dir), f"checkpoint_dir ({checkpoint_dir}) directory doesn't exist"
-
-    checkpoint_name = _get_checkpoint_name(
-        checkpoint_prefix,
-        ort_trainer.options.distributed.deepspeed_zero_optimization.stage,
-        ort_trainer.options.distributed.world_rank,
-        ort_trainer.options.distributed.world_size,
-    )
-    checkpoint_file = os.path.join(checkpoint_dir, checkpoint_name)
-    if os.path.exists(checkpoint_file):
-        msg = f"{checkpoint_file} already exists, overwriting."
-        warnings.warn(msg)
-    torch.save(checkpoint_state_dict, checkpoint_file)
-
-
-def experimental_load_checkpoint(ort_trainer, checkpoint_dir, checkpoint_prefix="ORT_checkpoint", strict=False):
-    warnings.warn(
-        "experimental_load_checkpoint() will be deprecated soon. Please use ORTTrainer.load_checkpoint() instead.",
-        DeprecationWarning,
-    )
-
-    checkpoint_files = _list_checkpoint_files(checkpoint_dir, checkpoint_prefix)
-    is_partitioned = False
-    if len(checkpoint_files) > 1:
-        msg = (
-            f"Found more than one file with prefix {checkpoint_prefix} in directory {checkpoint_dir}."
-            " Attempting to load ZeRO checkpoint."
-        )
-        warnings.warn(msg)
-        is_partitioned = True
-    if (not ort_trainer.options.distributed.deepspeed_zero_optimization.stage) and is_partitioned:
-        return _load_multi_checkpoint(ort_trainer, checkpoint_dir, checkpoint_prefix, strict)
-    else:
-        return _load_single_checkpoint(ort_trainer, checkpoint_dir, checkpoint_prefix, is_partitioned, strict)
-
-
-class _AGGREGATION_MODE(Enum):  # noqa: N801
-    Zero = 0
-    Megatron = 1
-
-
-def _order_paths(paths, D_groups, H_groups):
-    """Reorders the given paths in order of aggregation of ranks for D and H parallellism respectively
-    and returns the ordered dict"""
-
-    trainer_options_path_tuples = []
-    world_rank = _utils.state_dict_trainer_options_world_rank_key()
-
-    for path in paths:
-        trainer_options_path_tuples.append(
-            (_checkpoint_storage.load(path, key=_utils.state_dict_trainer_options_key()), path)
-        )
-
-    # sort paths according to rank
-    sorted_paths = [
-        path
-        for _, path in sorted(
-            trainer_options_path_tuples, key=lambda trainer_options_path_pair: trainer_options_path_pair[0][world_rank]
-        )
-    ]
-
-    ordered_paths = dict()
-    ordered_paths["D"] = [[sorted_paths[i] for i in D_groups[group_id]] for group_id in range(len(D_groups))]
-    ordered_paths["H"] = [[sorted_paths[i] for i in H_groups[group_id]] for group_id in range(len(H_groups))]
-
-    return ordered_paths
-
-
-def _add_or_update_sharded_key(
-    state_key, state_value, state_sub_dict, model_state_key, state_partition_info, sharded_states_original_dims, mode
-):
-    """Add or update the record for the sharded state_key in the state_sub_dict"""
-
-    # record the original dimension for this state
-    original_dim = _utils.state_dict_original_dimension_key()
-    sharded_states_original_dims[model_state_key] = state_partition_info[original_dim]
-
-    axis = 0
-    if mode == _AGGREGATION_MODE.Megatron and state_partition_info["megatron_row_partition"] == 0:
-        axis = -1
-
-    if state_key in state_sub_dict:
-        # state_dict already contains a record for this state
-        # since this state is sharded, concatenate the state value to
-        # the record in the state_dict
-        state_sub_dict[state_key] = np.concatenate((state_sub_dict[state_key], state_value), axis)
-    else:
-        # create a new entry for this state in the state_dict
-        state_sub_dict[state_key] = state_value
-
-
-def _add_or_validate_unsharded_key(state_key, state_value, state_sub_dict, mismatch_error_string):
-    """Add or validate the record for the unsharded state_key in the state_sub_dict"""
-
-    if state_key in state_sub_dict:
-        # state_dict already contains a record for this unsharded state.
-        # assert that all values are the same for this previously loaded state
-        assert (state_sub_dict[state_key] == state_value).all(), mismatch_error_string
-    else:
-        # create a new entry for this state in the state_sub_dict
-        state_sub_dict[state_key] = state_value
-
-
-def _aggregate_model_states(
-    rank_state_dict, sharded_states_original_dims, state_dict, mixed_precision_enabled, mode=_AGGREGATION_MODE.Zero
-):
-    """Aggregates all model states from the rank_state_dict into state_dict"""
-
-    model = _utils.state_dict_model_key()
-    full_precision = _utils.state_dict_full_precision_key()
-    partition_info = _utils.state_dict_partition_info_key()
-
-    # if there are no model states in the rank_state_dict, no model aggregation is needed
-    if model not in rank_state_dict:
-        return
-
-    if model not in state_dict:
-        state_dict[model] = {}
-
-    if full_precision not in state_dict[model]:
-        state_dict[model][full_precision] = {}
-
-    # iterate over all model state keys
-    for model_state_key, model_state_value in rank_state_dict[model][full_precision].items():
-        # ZERO: full precision model states are sharded only when they exist in the partition_info subdict and mixed
-        # precision training was enabled. for full precision training, full precision model states are not sharded
-        # MEGATRON : full precision model states are sharded when they exist in the partition_info subdict
-        if (model_state_key in rank_state_dict[partition_info]) and (
-            mode == _AGGREGATION_MODE.Megatron or mixed_precision_enabled
-        ):
-            # this model state is sharded
-            _add_or_update_sharded_key(
-                model_state_key,
-                model_state_value,
-                state_dict[model][full_precision],
-                model_state_key,
-                rank_state_dict[partition_info][model_state_key],
-                sharded_states_original_dims,
-                mode,
-            )
-        else:
-            # this model state is not sharded since a record for it does not exist in the partition_info subdict
-            _add_or_validate_unsharded_key(
-                model_state_key,
-                model_state_value,
-                state_dict[model][full_precision],
-                f"Value mismatch for model state {model_state_key}",
-            )
-
-
-def _aggregate_optimizer_states(rank_state_dict, sharded_states_original_dims, state_dict, mode=_AGGREGATION_MODE.Zero):
-    """Aggregates all optimizer states from the rank_state_dict into state_dict"""
-
-    optimizer = _utils.state_dict_optimizer_key()
-    partition_info = _utils.state_dict_partition_info_key()
-    sharded_optimizer_keys = _utils.state_dict_sharded_optimizer_keys()
-
-    # if there are no optimizer states in the rank_state_dict, no optimizer aggregation is needed
-    if optimizer not in rank_state_dict:
-        return
-
-    if optimizer not in state_dict:
-        state_dict[optimizer] = {}
-
-    # iterate over all optimizer state keys
-    for model_state_key, optimizer_dict in rank_state_dict[optimizer].items():
-        for optimizer_key, optimizer_value in optimizer_dict.items():
-            if model_state_key not in state_dict[optimizer]:
-                state_dict[optimizer][model_state_key] = {}
-
-            if optimizer_key in sharded_optimizer_keys and model_state_key in rank_state_dict[partition_info]:
-                # this optimizer state is sharded since a record exists in the partition_info subdict
-                _add_or_update_sharded_key(
-                    optimizer_key,
-                    optimizer_value,
-                    state_dict[optimizer][model_state_key],
-                    model_state_key,
-                    rank_state_dict[partition_info][model_state_key],
-                    sharded_states_original_dims,
-                    mode,
-                )
-            else:
-                # this optimizer state is not sharded since a record for it does not exist in the partition_info subdict
-                # or this optimizer key is not one of the sharded optimizer keys
-                _add_or_validate_unsharded_key(
-                    optimizer_key,
-                    optimizer_value,
-                    state_dict[optimizer][model_state_key],
-                    f"Value mismatch for model state {model_state_key} and optimizer state {optimizer_key}",
-                )
-
-
-def _reshape_states(sharded_states_original_dims, state_dict, mixed_precision_enabled):
-    """Reshape model and optimizer states in the state_dict according to dimensions in sharded_states_original_dims"""
-
-    model = _utils.state_dict_model_key()
-    full_precision = _utils.state_dict_full_precision_key()
-    optimizer = _utils.state_dict_optimizer_key()
-    sharded_optimizer_keys = _utils.state_dict_sharded_optimizer_keys()
-
-    for sharded_state_key, original_dim in sharded_states_original_dims.items():
-        # reshape model states to original_dim only when mixed precision is enabled
-        if mixed_precision_enabled and (model in state_dict):
-            state_dict[model][full_precision][sharded_state_key] = state_dict[model][full_precision][
-                sharded_state_key
-            ].reshape(original_dim)
-
-        # reshape optimizer states to original_dim
-        if optimizer in state_dict:
-            for optimizer_key, optimizer_value in state_dict[optimizer][sharded_state_key].items():
-                if optimizer_key in sharded_optimizer_keys:
-                    state_dict[optimizer][sharded_state_key][optimizer_key] = optimizer_value.reshape(original_dim)
-
-
-def _aggregate_trainer_options(rank_state_dict, state_dict, partial_aggregation):
-    """Extracts trainer options from rank_state_dict and loads them accordingly on state_dict"""
-    trainer_options = _utils.state_dict_trainer_options_key()
-    state_dict[trainer_options] = {}
-
-    mixed_precision = _utils.state_dict_trainer_options_mixed_precision_key()
-    zero_stage = _utils.state_dict_trainer_options_zero_stage_key()
-    world_rank = _utils.state_dict_trainer_options_world_rank_key()
-    world_size = _utils.state_dict_trainer_options_world_size_key()
-    optimizer_name = _utils.state_dict_trainer_options_optimizer_name_key()
-    D_size = _utils.state_dict_trainer_options_data_parallel_size_key()  # noqa: N806
-    H_size = _utils.state_dict_trainer_options_horizontal_parallel_size_key()  # noqa: N806
-
-    state_dict[trainer_options][mixed_precision] = rank_state_dict[trainer_options][mixed_precision]
-    state_dict[trainer_options][zero_stage] = 0
-    state_dict[trainer_options][world_rank] = rank_state_dict[trainer_options][world_rank] if partial_aggregation else 0
-    state_dict[trainer_options][world_size] = 1
-    state_dict[trainer_options][optimizer_name] = rank_state_dict[trainer_options][optimizer_name]
-    state_dict[trainer_options][D_size] = 1
-    state_dict[trainer_options][H_size] = 1
-
-
-def _aggregate_megatron_partition_info(rank_state_dict, state_dict):
-    """Extracts partition_info from rank_state_dict and loads on state_dict for megatron-partitioned weights"""
-    partition_info = _utils.state_dict_partition_info_key()
-    if partition_info not in state_dict:
-        state_dict[partition_info] = {}
-
-    rank_partition_info = rank_state_dict[partition_info]
-    for model_state_key, partition_info_dict in rank_partition_info.items():
-        if model_state_key not in state_dict[partition_info]:
-            # add partition info only if weight is megatron partitioned
-            if partition_info_dict["megatron_row_partition"] >= 0:
-                state_dict[partition_info][model_state_key] = partition_info_dict
-
-
-def _to_pytorch_format(state_dict):
-    """Convert ORT state dictionary schema (hierarchical structure) to PyTorch state dictionary schema (flat structure)"""
-
-    pytorch_state_dict = {}
-    for model_state_key, model_state_value in state_dict[_utils.state_dict_model_key()][
-        _utils.state_dict_full_precision_key()
-    ].items():
-        # convert numpy array to a torch tensor
-        pytorch_state_dict[model_state_key] = torch.tensor(model_state_value)
-    return pytorch_state_dict
-
-
-def _get_parallellism_groups(data_parallel_size, horizontal_parallel_size, world_size):
-    """Returns the D and H groups for the given sizes"""
-    num_data_groups = world_size // data_parallel_size
-    data_groups = []
-    for data_group_id in range(num_data_groups):
-        data_group_ranks = []
-        for r in range(data_parallel_size):
-            data_group_ranks.append(data_group_id + horizontal_parallel_size * r)
-        data_groups.append(data_group_ranks)
-
-    num_horizontal_groups = world_size // horizontal_parallel_size
-    horizontal_groups = []
-    for hori_group_id in range(num_horizontal_groups):
-        hori_group_ranks = []
-        for r in range(horizontal_parallel_size):
-            hori_group_ranks.append(hori_group_id * horizontal_parallel_size + r)
-        horizontal_groups.append(hori_group_ranks)
-
-    return data_groups, horizontal_groups
-
-
-def _aggregate_over_ranks(
-    ordered_paths,
-    ranks,
-    sharded_states_original_dims=None,
-    mode=_AGGREGATION_MODE.Zero,
-    partial_aggregation=False,
-    pytorch_format=True,
-):
-    """Aggregate checkpoint files over set of ranks and return a single state dictionary
-
-    Args:
-        ordered_paths: list of paths in the order in which they must be aggregated
-        ranks: list of ranks that are to be aggregated
-        sharded_states_original_dims: dict containing the original dims for sharded states that are persisted over
-                                        multiple calls to _aggregate_over_ranks()
-        mode: mode of aggregation: Zero or Megatron
-        partial_aggregation: boolean flag to indicate whether to produce a partially
-                                aggregated state which can be further aggregated over
-        pytorch_format: boolean flag to select either ONNX Runtime or PyTorch state schema of the returned state_dict
-    Returns:
-        state_dict that can be loaded into an ORTTrainer or into a PyTorch model
-    """
-    state_dict = {}
-    if sharded_states_original_dims is None:
-        sharded_states_original_dims = dict()
-    world_rank = _utils.state_dict_trainer_options_world_rank_key()
-    mixed_precision = _utils.state_dict_trainer_options_mixed_precision_key()
-    zero_stage = _utils.state_dict_trainer_options_zero_stage_key()
-    world_size = _utils.state_dict_trainer_options_world_size_key()
-    optimizer_name = _utils.state_dict_trainer_options_optimizer_name_key()
-
-    loaded_mixed_precision = None
-    loaded_world_size = None
-    loaded_zero_stage = None
-    loaded_optimizer_name = None
-
-    for i, path in enumerate(ordered_paths):
-        rank_state_dict = _checkpoint_storage.load(path)
-
-        assert _utils.state_dict_partition_info_key() in rank_state_dict, "Missing information: partition_info"
-        assert _utils.state_dict_trainer_options_key() in rank_state_dict, "Missing information: trainer_options"
-        assert (
-            ranks[i] == rank_state_dict[_utils.state_dict_trainer_options_key()][world_rank]
-        ), "Unexpected rank in file at path {}. Expected {}, got {}".format(
-            path, rank, rank_state_dict[_utils.state_dict_trainer_options_key()][world_rank]  # noqa: F821
-        )
-        if loaded_mixed_precision is None:
-            loaded_mixed_precision = rank_state_dict[_utils.state_dict_trainer_options_key()][mixed_precision]
-        else:
-            assert (
-                loaded_mixed_precision == rank_state_dict[_utils.state_dict_trainer_options_key()][mixed_precision]
-            ), f"Mixed precision state mismatch among checkpoint files. File: {path}"
-        if loaded_world_size is None:
-            loaded_world_size = rank_state_dict[_utils.state_dict_trainer_options_key()][world_size]
-        else:
-            assert (
-                loaded_world_size == rank_state_dict[_utils.state_dict_trainer_options_key()][world_size]
-            ), f"World size state mismatch among checkpoint files. File: {path}"
-        if loaded_zero_stage is None:
-            loaded_zero_stage = rank_state_dict[_utils.state_dict_trainer_options_key()][zero_stage]
-        else:
-            assert (
-                loaded_zero_stage == rank_state_dict[_utils.state_dict_trainer_options_key()][zero_stage]
-            ), f"Zero stage mismatch among checkpoint files. File: {path}"
-        if loaded_optimizer_name is None:
-            loaded_optimizer_name = rank_state_dict[_utils.state_dict_trainer_options_key()][optimizer_name]
-        else:
-            assert (
-                loaded_optimizer_name == rank_state_dict[_utils.state_dict_trainer_options_key()][optimizer_name]
-            ), f"Optimizer name mismatch among checkpoint files. File: {path}"
-
-        # aggregate all model states
-        _aggregate_model_states(rank_state_dict, sharded_states_original_dims, state_dict, loaded_mixed_precision, mode)
-
-        if not pytorch_format:
-            # aggregate all optimizer states if pytorch_format is False
-            _aggregate_optimizer_states(rank_state_dict, sharded_states_original_dims, state_dict, mode)
-
-            # for D+H aggregation scenario, the first pass of aggregation(partial aggregation) is over D groups
-            # to aggregate over Zero, and another pass to aggregate Megatron partitioned
-            # states. Preserve the relevant partition info only for weights that are megatron partitioned for
-            # a partial aggregation call
-            if partial_aggregation:
-                _aggregate_megatron_partition_info(rank_state_dict, state_dict)
-
-            # entry for trainer_options in the state_dict to perform other sanity checks
-            if _utils.state_dict_trainer_options_key() not in state_dict:
-                _aggregate_trainer_options(rank_state_dict, state_dict, partial_aggregation)
-
-            # entry for user_dict in the state_dict if not already present
-            if (
-                _utils.state_dict_user_dict_key() not in state_dict
-                and _utils.state_dict_user_dict_key() in rank_state_dict
-            ):
-                state_dict[_utils.state_dict_user_dict_key()] = rank_state_dict[_utils.state_dict_user_dict_key()]
-
-    # for a partial aggregation scenario, we might not have the entire tensor aggregated yet, thus skip reshape
-    if not partial_aggregation:
-        # reshape all the sharded tensors based on the original dimensions stored in sharded_states_original_dims
-        _reshape_states(sharded_states_original_dims, state_dict, loaded_mixed_precision)
-
-    # return a flat structure for PyTorch model in case pytorch_format is True
-    # else return the hierarchical structure for ORTTrainer
-    return _to_pytorch_format(state_dict) if pytorch_format else state_dict
-
-
-def _aggregate_over_D_H(ordered_paths, D_groups, H_groups, pytorch_format):  # noqa: N802
-    """Aggregate checkpoint files and return a single state dictionary for the D+H
-    (Zero+Megatron) partitioning strategy.
-    For D+H aggregation scenario, the first pass of aggregation(partial aggregation) is over D groups
-    to aggregate over Zero, and another pass over the previously aggregated states
-    to aggregate Megatron partitioned states.
-    """
-    sharded_states_original_dims = {}
-    aggregate_data_checkpoint_files = []
-
-    # combine for Zero over data groups and save to temp file
-    with tempfile.TemporaryDirectory() as save_dir:
-        for group_id, d_group in enumerate(D_groups):
-            aggregate_state_dict = _aggregate_over_ranks(
-                ordered_paths["D"][group_id],
-                d_group,
-                sharded_states_original_dims,
-                partial_aggregation=True,
-                pytorch_format=False,
-            )
-
-            filename = "ort.data_group." + str(group_id) + ".ort.pt"
-            filepath = os.path.join(save_dir, filename)
-            _checkpoint_storage.save(aggregate_state_dict, filepath)
-            aggregate_data_checkpoint_files.append(filepath)
-
-        assert len(aggregate_data_checkpoint_files) > 0
-
-        # combine for megatron:
-        aggregate_state = _aggregate_over_ranks(
-            aggregate_data_checkpoint_files,
-            H_groups[0],
-            sharded_states_original_dims,
-            mode=_AGGREGATION_MODE.Megatron,
-            pytorch_format=pytorch_format,
-        )
-
-    return aggregate_state
-
-
-def aggregate_checkpoints(paths, pytorch_format=True):
-    """Aggregate checkpoint files and return a single state dictionary
-
-    Aggregates checkpoint files specified by paths and loads them one at a time, merging
-    them into a single state dictionary.
-    The checkpoint files represented by paths must be saved through ORTTrainer.save_checkpoint() function.
-    The schema of the state_dict returned will be in the same as the one returned by ORTTrainer.state_dict()
-
-    Args:
-        paths: list of more than one file represented as strings where the checkpoint is saved
-        pytorch_format: boolean flag to select either ONNX Runtime or PyTorch state schema of the returned state_dict
-    Returns:
-        state_dict that can be loaded into an ORTTrainer or into a PyTorch model
-    """
-
-    loaded_trainer_options = _checkpoint_storage.load(paths[0], key=_utils.state_dict_trainer_options_key())
-    D_size = _utils.state_dict_trainer_options_data_parallel_size_key()  # noqa: N806
-    H_size = _utils.state_dict_trainer_options_horizontal_parallel_size_key()  # noqa: N806
-    world_size = _utils.state_dict_trainer_options_world_size_key()
-
-    D_size = loaded_trainer_options[D_size]  # noqa: N806
-    H_size = loaded_trainer_options[H_size]  # noqa: N806
-    world_size = loaded_trainer_options[world_size]
-    D_groups, H_groups = _get_parallellism_groups(D_size, H_size, world_size)  # noqa: N806
-
-    combine_zero = loaded_trainer_options[_utils.state_dict_trainer_options_zero_stage_key()] > 0
-    combine_megatron = len(H_groups[0]) > 1
-
-    # order the paths in the order of groups in which they must be aggregated according to
-    # data-parallel groups and H-parallel groups obtained
-    # eg: {'D': [[path_0, path_2],[path_1, path_3]], 'H': [[path_0, path_1],[path_2, path_3]]}
-    ordered_paths = _order_paths(paths, D_groups, H_groups)
-
-    aggregate_state = None
-    if combine_zero and combine_megatron:
-        aggregate_state = _aggregate_over_D_H(ordered_paths, D_groups, H_groups, pytorch_format)
-    elif combine_zero:
-        aggregate_state = _aggregate_over_ranks(
-            ordered_paths["D"][0], D_groups[0], mode=_AGGREGATION_MODE.Zero, pytorch_format=pytorch_format
-        )
-    elif combine_megatron:
-        aggregate_state = _aggregate_over_ranks(
-            ordered_paths["H"][0], H_groups[0], mode=_AGGREGATION_MODE.Megatron, pytorch_format=pytorch_format
-        )
-
-    return aggregate_state
-
-
-################################################################################
-# Helper functions
-################################################################################
-
-
-def _load_single_checkpoint(ort_trainer, checkpoint_dir, checkpoint_prefix, is_partitioned, strict):
-    checkpoint_name = _get_checkpoint_name(
-        checkpoint_prefix,
-        is_partitioned,
-        ort_trainer.options.distributed.world_rank,
-        ort_trainer.options.distributed.world_size,
-    )
-    checkpoint_file = os.path.join(checkpoint_dir, checkpoint_name)
-
-    if is_partitioned:
-        assert_msg = (
-            f"Couldn't find checkpoint file {checkpoint_file}."
-            " Optimizer partitioning is enabled using ZeRO. Please make sure the checkpoint file exists "
-            f"for rank {ort_trainer.options.distributed.world_rank} of {ort_trainer.options.distributed.world_size}"
-        )
-    else:
-        assert_msg = f"Couldn't find checkpoint file {checkpoint_file}."
-    assert os.path.exists(checkpoint_file), assert_msg
-
-    checkpoint_state = torch.load(checkpoint_file, map_location="cpu")
-    experimental_load_state_dict(ort_trainer, checkpoint_state["model"], strict=strict)
-    del checkpoint_state["model"]
-    return checkpoint_state
-
-
-def _load_multi_checkpoint(ort_trainer, checkpoint_dir, checkpoint_prefix, strict):
-    checkpoint_files = _list_checkpoint_files(checkpoint_dir, checkpoint_prefix)
-
-    ckpt_agg = _CombineZeroCheckpoint(checkpoint_files)
-    aggregate_state_dict = ckpt_agg.aggregate_checkpoints()
-
-    experimental_load_state_dict(ort_trainer, aggregate_state_dict, strict=strict)
-
-    # aggregate other keys in the state_dict.
-    # Values will be overwritten for matching keys among workers
-    all_checkpoint_states = dict()
-    for checkpoint_file in checkpoint_files:
-        checkpoint_state = torch.load(checkpoint_file, map_location="cpu")
-        del checkpoint_state["model"]
-        all_checkpoint_states.update(checkpoint_state)
-    return all_checkpoint_states
-
-
-def _list_checkpoint_files(checkpoint_dir, checkpoint_prefix, extension=".ort.pt"):
-    ckpt_file_names = [f for f in os.listdir(checkpoint_dir) if f.startswith(checkpoint_prefix)]
-    ckpt_file_names = [f for f in ckpt_file_names if f.endswith(extension)]
-    ckpt_file_names = [os.path.join(checkpoint_dir, f) for f in ckpt_file_names]
-
-    assert len(ckpt_file_names) > 0, f"No checkpoint found with prefix '{checkpoint_prefix}' at '{checkpoint_dir}'"
-    return ckpt_file_names
-
-
-def _get_checkpoint_name(prefix, is_partitioned, world_rank=None, world_size=None):
-    SINGLE_CHECKPOINT_FILENAME = "{prefix}.ort.pt"  # noqa: N806
-    MULTIPLE_CHECKPOINT_FILENAME = "{prefix}.ZeRO.{world_rank}.{world_size}.ort.pt"  # noqa: N806
-
-    if is_partitioned:
-        filename = MULTIPLE_CHECKPOINT_FILENAME.format(
-            prefix=prefix, world_rank=world_rank, world_size=(world_size - 1)
-        )
-    else:
-        filename = SINGLE_CHECKPOINT_FILENAME.format(prefix=prefix)
-    return filename
-
-
-def _split_state_dict(state_dict):
-    optimizer_keys = ["Moment_1_", "Moment_2_", "Update_Count_", "Step"]
-    split_sd = {"optimizer": {}, "fp32_param": {}, "fp16_param": {}}
-    for k, v in state_dict.items():
-        mode = "fp32_param"
-        for optim_key in optimizer_keys:
-            if k.startswith(optim_key):
-                mode = "optimizer"
-                break
-        if k.endswith("_fp16"):
-            mode = "fp16_param"
-        split_sd[mode][k] = v
-    return split_sd
-
-
-class _CombineZeroCheckpoint:
-    def __init__(self, checkpoint_files, clean_state_dict=None):
-        assert len(checkpoint_files) > 0, "No checkpoint files passed"
-        self.checkpoint_files = checkpoint_files
-        self.clean_state_dict = clean_state_dict
-        self.world_size = int(self.checkpoint_files[0].split("ZeRO")[1].split(".")[2]) + 1
-        assert len(self.checkpoint_files) == self.world_size, f"Could not find {self.world_size} files"
-        self.weight_shape_map = {}
-        self.sharded_params = set()
-
-    def _split_name(self, name: str):
-        name_split = name.split("_view_")
-        view_num = None
-        if len(name_split) > 1:
-            view_num = int(name_split[1])
-        optimizer_key = ""
-        mp_suffix = ""
-        if name_split[0].startswith("Moment_1"):
-            optimizer_key = "Moment_1_"
-        elif name_split[0].startswith("Moment_2"):
-            optimizer_key = "Moment_2_"
-        elif name_split[0].startswith("Update_Count"):
-            optimizer_key = "Update_Count_"
-        elif name_split[0].endswith("_fp16"):
-            mp_suffix = "_fp16"
-        param_name = name_split[0]
-        if optimizer_key:
-            param_name = param_name.split(optimizer_key)[1]
-        param_name = param_name.split("_fp16")[0]
-        return param_name, optimizer_key, view_num, mp_suffix
-
-    def _update_weight_statistics(self, name, value):
-        if name not in self.weight_shape_map:
-            self.weight_shape_map[name] = value.size()  # original shape of tensor
-
-    def _reshape_tensor(self, key):
-        value = self.aggregate_state_dict[key]
-        weight_name, _, _, _ = self._split_name(key)
-        set_size = self.weight_shape_map[weight_name]
-        self.aggregate_state_dict[key] = value.reshape(set_size)
-
-    def _aggregate(self, param_dict):
-        for k, v in param_dict.items():
-            weight_name, optimizer_key, view_num, mp_suffix = self._split_name(k)
-            if view_num is not None:
-                # parameter is sharded
-                param_name = optimizer_key + weight_name + mp_suffix
-
-                if param_name in self.aggregate_state_dict and optimizer_key not in ["Update_Count_"]:
-                    self.sharded_params.add(param_name)
-                    # Found a previous shard of the param, concatenate shards ordered by ranks
-                    self.aggregate_state_dict[param_name] = torch.cat((self.aggregate_state_dict[param_name], v))
-                else:
-                    self.aggregate_state_dict[param_name] = v
-            else:
-                if k in self.aggregate_state_dict:
-                    assert (self.aggregate_state_dict[k] == v).all(), "Unsharded params must have the same value"
-                else:
-                    self.aggregate_state_dict[k] = v
-                self._update_weight_statistics(weight_name, v)
-
-    def aggregate_checkpoints(self):
-        warnings.warn(
-            "_CombineZeroCheckpoint.aggregate_checkpoints() will be deprecated soon. "
-            "Please use aggregate_checkpoints() instead.",
-            DeprecationWarning,
-        )
-
-        checkpoint_prefix = self.checkpoint_files[0].split(".ZeRO")[0]
-        self.aggregate_state_dict = dict()
-
-        for i in range(self.world_size):
-            checkpoint_name = _get_checkpoint_name(checkpoint_prefix, True, i, self.world_size)
-            rank_state_dict = torch.load(checkpoint_name, map_location=torch.device("cpu"))
-            if "model" in rank_state_dict:
-                rank_state_dict = rank_state_dict["model"]
-
-            if self.clean_state_dict:
-                rank_state_dict = self.clean_state_dict(rank_state_dict)
-
-            rank_state_dict = _split_state_dict(rank_state_dict)
-            self._aggregate(rank_state_dict["fp16_param"])
-            self._aggregate(rank_state_dict["fp32_param"])
-            self._aggregate(rank_state_dict["optimizer"])
-
-        for k in self.sharded_params:
-            self._reshape_tensor(k)
-        return self.aggregate_state_dict
diff --git a/orttraining/orttraining/python/training/model_desc_validation.py b/orttraining/orttraining/python/training/model_desc_validation.py
deleted file mode 100644
index dd3f4cb95cd59..0000000000000
--- a/orttraining/orttraining/python/training/model_desc_validation.py
+++ /dev/null
@@ -1,408 +0,0 @@
-from collections import namedtuple
-
-import cerberus
-import torch
-
-from ._utils import static_vars
-
-LEARNING_RATE_IO_DESCRIPTION_NAME = "__learning_rate"
-ALL_FINITE_IO_DESCRIPTION_NAME = "__all_finite"
-LOSS_SCALE_INPUT_IO_DESCRIPTION_NAME = "__loss_scale_input_name"
-GRADIENT_ACCUMULATION_IO_DESCRIPTION_NAME = "__gradient_accumulation_name"
-
-
-class _ORTTrainerModelDesc:
-    def __init__(self, model_desc):
-        # Keep a copy of original input for debug
-        self._original = dict(model_desc)
-
-        # Global counter used to validate occurrences of 'is_loss=True' whithin 'model_desc.outputs'
-        #   A stateless validator is used for each tuple, but validation accross the whole list of tuple is needed
-        #       because just one 'is_loss=True' is allowed withing 'model_desc.outputs' list of tuples
-        _model_desc_outputs_validation.loss_counter = 0
-
-        # Used for logging purposes
-        self._main_class_name = self.__class__.__name__
-
-        # Validates user input
-        self._validated = dict(self._original)
-        validator = cerberus.Validator(MODEL_DESC_SCHEMA)
-        self._validated = validator.validated(self._validated)
-        if self._validated is None:
-            raise ValueError(f"Invalid model_desc: {validator.errors}")
-
-        # Normalize inputs to a list of namedtuple(name, shape)
-        self._InputDescription = namedtuple("InputDescription", ["name", "shape"])
-        self._InputDescriptionTyped = namedtuple("InputDescriptionTyped", ["name", "shape", "dtype"])
-        for idx, input in enumerate(self._validated["inputs"]):
-            self._validated["inputs"][idx] = self._InputDescription(*input)
-
-        # Normalize outputs to a list of namedtuple(name, shape, is_loss)
-        self._OutputDescription = namedtuple("OutputDescription", ["name", "shape", "is_loss"])
-        self._OutputDescriptionTyped = namedtuple(
-            "OutputDescriptionTyped", ["name", "shape", "is_loss", "dtype", "dtype_amp"]
-        )
-        for idx, output in enumerate(self._validated["outputs"]):
-            if len(output) == 2:
-                self._validated["outputs"][idx] = self._OutputDescription(*output, False)
-            else:
-                self._validated["outputs"][idx] = self._OutputDescription(*output)
-
-        # Hard-code learning rate, all_finite descriptors
-        self.learning_rate = self._InputDescriptionTyped(LEARNING_RATE_IO_DESCRIPTION_NAME, [1], torch.float32)
-
-        # Convert dict in object
-        for k, v in self._validated.items():
-            setattr(self, k, self._wrap(v))
-
-    def __repr__(self):
-        """Pretty representation for a model description class"""
-
-        pretty_msg = "Model description:\n"
-
-        # Inputs
-        inputs = []
-        for i_desc in self.inputs:
-            if isinstance(i_desc, self._InputDescription):
-                inputs.append(f"(name={i_desc.name}, shape={i_desc.shape})")
-            elif isinstance(i_desc, self._InputDescriptionTyped):
-                inputs.append(f"(name={i_desc.name}, shape={i_desc.shape}, dtype={i_desc.dtype})")
-            else:
-                raise ValueError(f"Unexpected type {type(i_desc)} for input description")
-
-        pretty_msg += "\nInputs:"
-        for idx, item in enumerate(inputs):
-            pretty_msg += f"\n\t{idx}: {item}"
-
-        # Outputs
-        outputs = []
-        for o_desc in self.outputs:
-            if isinstance(o_desc, self._OutputDescription):
-                outputs.append(f"(name={o_desc.name}, shape={o_desc.shape})")
-            elif isinstance(o_desc, self._OutputDescriptionTyped):
-                outputs.append(
-                    f"(name={o_desc.name}, shape={o_desc.shape}, dtype={o_desc.dtype}, dtype_amp={o_desc.dtype_amp})"
-                )
-            else:
-                raise ValueError(f"Unexpected type {type(o_desc)} for output description")
-        pretty_msg += "\nOutputs:"
-        for idx, item in enumerate(outputs):
-            pretty_msg += f"\n\t{idx}: {item}"
-
-        # Learning rate
-        if self.learning_rate:
-            pretty_msg += "\nLearning rate: "
-            pretty_msg += (
-                f"(name={self.learning_rate.name}, shape={self.learning_rate.shape}, dtype={self.learning_rate.dtype})"
-            )
-
-        # Mixed precision
-        if getattr(self, ALL_FINITE_IO_DESCRIPTION_NAME, None) or getattr(
-            self, LOSS_SCALE_INPUT_IO_DESCRIPTION_NAME, None
-        ):
-            pretty_msg += "\nMixed Precision:"
-            if getattr(self, ALL_FINITE_IO_DESCRIPTION_NAME, None):
-                pretty_msg += "\n\tis gradients finite: "
-                pretty_msg += (
-                    f"(name={self.all_finite.name}, shape={self.all_finite.shape}, dtype={self.all_finite.dtype})"
-                )
-            if getattr(self, LOSS_SCALE_INPUT_IO_DESCRIPTION_NAME, None):
-                pretty_msg += "\n\tloss scale input name: "
-                pretty_msg += f"(name={self.loss_scale_input.name}, shape={self.loss_scale_input.shape}, dtype={self.loss_scale_input.dtype})"
-
-        # Gradient Accumulation steps
-        if self.gradient_accumulation:
-            pretty_msg += "\nGradient Accumulation: "
-            pretty_msg += f"(name={self.gradient_accumulation.name}, shape={self.gradient_accumulation.shape}, dtype={self.gradient_accumulation.dtype})"
-
-        return pretty_msg
-
-    def add_type_to_input_description(self, index, dtype):
-        """Updates an existing input description at position 'index' with 'dtype' type information
-
-        Args:
-            index (int): position within 'inputs' description
-            dtype (torch.dtype): input data type
-        """
-
-        assert isinstance(index, int) and index >= 0, "input 'index' must be a positive int"
-        assert isinstance(dtype, torch.dtype), "input 'dtype' must be a torch.dtype type"
-        existing_values = (*self.inputs[index],)
-        if isinstance(self.inputs[index], self._InputDescriptionTyped):
-            existing_values = (*existing_values[:-1],)
-        self.inputs[index] = self._InputDescriptionTyped(*existing_values, dtype)
-
-    def add_type_to_output_description(self, index, dtype, dtype_amp=None):
-        """Updates an existing output description at position 'index' with 'dtype' type information
-
-        Args:
-            index (int): position within 'inputs' description
-            dtype (torch.dtype): input data type
-            dtype_amp (torch.dtype, default is None): input data type for evaluation with mixed precision
-        """
-
-        assert isinstance(index, int) and index >= 0, "output 'index' must be a positive int"
-        assert isinstance(dtype, torch.dtype), "output 'dtype' must be a torch.dtype type"
-        assert dtype_amp is None or isinstance(
-            dtype_amp, torch.dtype
-        ), "output 'dtype_amp' must be either None or torch.dtype type"
-        existing_values = (*self.outputs[index],)
-        if isinstance(self.outputs[index], self._OutputDescriptionTyped):
-            existing_values = (*existing_values[:-2],)
-        self.outputs[index] = self._OutputDescriptionTyped(*existing_values, dtype, dtype_amp)
-
-    @property
-    def gradient_accumulation(self):
-        return getattr(self, GRADIENT_ACCUMULATION_IO_DESCRIPTION_NAME, None)
-
-    @gradient_accumulation.setter
-    def gradient_accumulation(self, name):
-        self._add_output_description(
-            self, name, [1], False, torch.bool, None, GRADIENT_ACCUMULATION_IO_DESCRIPTION_NAME, ignore_duplicate=True
-        )
-
-    @property
-    def all_finite(self):
-        return getattr(self, ALL_FINITE_IO_DESCRIPTION_NAME, None)
-
-    @all_finite.setter
-    def all_finite(self, name):
-        self._add_output_description(
-            self, name, [1], False, torch.bool, None, ALL_FINITE_IO_DESCRIPTION_NAME, ignore_duplicate=True
-        )
-
-    @property
-    def loss_scale_input(self):
-        return getattr(self, LOSS_SCALE_INPUT_IO_DESCRIPTION_NAME, None)
-
-    @loss_scale_input.setter
-    def loss_scale_input(self, name):
-        self._add_input_description(
-            self, name, [], torch.float32, LOSS_SCALE_INPUT_IO_DESCRIPTION_NAME, ignore_duplicate=True
-        )
-
-    def _add_input_description(self, node, name, shape, dtype=None, attr_name=None, ignore_duplicate=False):
-        """Add a new input description into the node object
-
-        If 'dtype' is specified, a typed input description namedtuple(name, shape, dtype) is created.
-        Otherwise an untyped input description namedtuple(name, shape) is created instead.
-
-        Args:
-            node (list or object): node to append input description to. When 'node' is 'self.inputs',
-                a new input description is appended to the list.
-                Otherwise, a new input description is created as an attribute into 'node' with name 'attr_name'
-            name (str): name of input description
-            shape (list): shape of input description
-            dtype (torch.dtype): input data type
-            attr_name (str, default is None): friendly name to allow direct access to the output description
-            ignore_duplicate (bool, default is False): silently skips addition of duplicate inputs
-        """
-
-        assert isinstance(name, str) and len(name) > 0, "'name' is an invalid input name"
-        not_found = True
-        if not ignore_duplicate:
-            if id(node) == id(self.inputs):
-                not_found = all([name not in i_desc.name for i_desc in node])
-                assert not_found, f"'name' {name} already exists in the inputs description"
-            else:
-                not_found = attr_name not in dir(self)
-                assert not_found, f"'attr_name' {attr_name} already exists in the 'node'"
-        elif not not_found:
-            return
-        assert isinstance(shape, list) and all(
-            [(isinstance(dim, int) or (isinstance(dim, str) and len(dim) > 0)) for dim in shape]
-        ), "'shape' must be a list of int or str with length at least 1"
-        assert dtype is None or isinstance(dtype, torch.dtype), "'dtype' must be either None or a torch.dtype type"
-        if dtype:
-            new_input_desc = self._InputDescriptionTyped(name, shape, dtype)
-        else:
-            new_input_desc = self._InputDescription(name, shape)
-
-        if id(node) == id(self.inputs):
-            self.inputs.append(new_input_desc)
-        else:
-            assert isinstance(attr_name, str) and len(attr_name) > 0, "Invalid 'attr_name'"
-            setattr(node, attr_name, new_input_desc)
-
-    def _add_output_description(
-        self, node, name, shape, is_loss, dtype=None, dtype_amp=None, attr_name=None, ignore_duplicate=False
-    ):
-        """Add a new output description into the node object as a tuple
-
-        When (name, shape, is_loss, dtype) is specified, a typed output description is created
-        Otherwise an untyped output description (name, shape, is_loss) is created instead
-
-        Args:
-            node (list or object): node to append output description to. When 'node' is 'self.outputs',
-                a new output description is appended to the list.
-                Otherwise, a new output description is created as an attribute into 'node' with name 'attr_name'
-            name (str): name of output description
-            shape (list): shape of output description
-            is_loss (bool): specifies whether this output is a loss
-            dtype (torch.dtype): input data type
-            dtype_amp (torch.dtype, default is None): input data type for evaluation with mixed precision.
-            attr_name (str, default is None): friendly name to allow direct access to the output description
-            ignore_duplicate (bool, default is False): silently skips addition of duplicate outputs
-        """
-
-        assert isinstance(name, str) and len(name) > 0, "'name' is an invalid output name"
-        assert isinstance(shape, list) and all(
-            [(isinstance(dim, int) or (isinstance(dim, str) and len(dim) > 0)) for dim in shape]
-        ), "'shape' must be a list of int or str with length at least 1"
-        assert isinstance(is_loss, bool), "'is_loss' must be a bool"
-
-        not_found = True
-        if not ignore_duplicate:
-            if id(node) == id(self.outputs):
-                not_found = all([name not in o_desc.name for o_desc in node])
-                assert not_found, f"'name' {name} already exists in the outputs description"
-                assert (
-                    all([not o_desc.is_loss for o_desc in node]) if is_loss else True
-                ), "Only one 'is_loss' is supported at outputs description"
-            else:
-                not_found = attr_name not in dir(self)
-                assert not_found, f"'attr_name' {attr_name} already exists in the 'node'"
-        elif not not_found:
-            return
-
-        assert dtype is None or isinstance(dtype, torch.dtype), "'dtype' must be either None or a torch.dtype type"
-        if dtype:
-            new_output_desc = self._OutputDescriptionTyped(name, shape, is_loss, dtype, None)
-        else:
-            new_output_desc = self._OutputDescription(name, shape, is_loss)
-
-        if id(node) == id(self.outputs):
-            self.outputs.append(new_output_desc)
-        else:
-            assert isinstance(attr_name, str) and len(attr_name) > 0, "Invalid 'attr_name'"
-            setattr(node, attr_name, new_output_desc)
-
-    def _wrap(self, v):
-        """Add 'v' as self's attribute to allow direct access as self.v"""
-        if isinstance(v, (list)):
-            return type(v)([self._wrap(v) for v in v])
-        elif isinstance(
-            v,
-            (
-                self._InputDescription,
-                self._InputDescriptionTyped,
-                self._OutputDescription,
-                self._OutputDescriptionTyped,
-            ),
-        ):
-            return v
-        elif isinstance(v, (tuple)):
-            return type(v)([self._wrap(v) for v in v])
-        elif isinstance(v, (dict, int, float, bool, str)):
-            return _ORTTrainerModelDescInternal(self._main_class_name, v) if isinstance(v, dict) else v
-        else:
-            raise ValueError(
-                f"Unsupported type for model_desc ({v})."
-                "Only int, float, bool, str, list, tuple and dict are supported"
-            )
-
-
-class _ORTTrainerModelDescInternal(_ORTTrainerModelDesc):
-    r"""Internal class used by ONNX Runtime training backend for input validation
-
-    NOTE: Users MUST NOT use this class in any way!
-    """
-
-    def __init__(self, main_class_name, model_desc):
-        # Used for logging purposes
-        self._main_class_name = main_class_name
-
-        # Convert dict in object
-        for k, v in dict(model_desc).items():
-            setattr(self, k, self._wrap(v))
-
-
-def _model_desc_inputs_validation(field, value, error):
-    r"""Cerberus custom check method for 'model_desc.inputs'
-
-    'model_desc.inputs' is a list of tuples.
-    The list has variable length, but each tuple has size 2
-
-    The first element of the tuple is a string which represents the input name
-    The second element is a list of shapes. Each shape must be either an int or string.
-        Empty list represents a scalar output
-
-    Validation is done within each tuple to enforce the schema described above.
-
-    Example:
-
-        .. code-block:: python
-
-            model_desc['inputs'] = [('input1', ['batch', 1024]),
-                                    ('input2', [])
-                                    ('input3', [512])]
-    """
-
-    if not isinstance(value, tuple) or len(value) != 2:
-        error(field, "must be a tuple with size 2")
-    if not isinstance(value[0], str):
-        error(field, "the first element of the tuple (aka name) must be a string")
-    if not isinstance(value[1], list):
-        error(field, "the second element of the tuple (aka shape) must be a list")
-    else:
-        for shape in value[1]:
-            if not isinstance(shape, str) and not isinstance(shape, int) or isinstance(shape, bool):
-                error(field, "each shape must be either a string or integer")
-
-
-@static_vars(loss_counter=0)
-def _model_desc_outputs_validation(field, value, error):
-    r"""Cerberus custom check method for 'model_desc.outputs'
-
-    'model_desc.outputs' is a list of tuples with variable length.
-    The first element of the tuple is a string which represents the output name
-    The second element is a list of shapes. Each shape must be either an int or string.
-        Empty list represents a scalar output
-    The third element is optional and is a flag that signals whether the output is a loss value
-
-    Validation is done within each tuple to enforce the schema described above, but also
-    throughout the list of tuples to ensure a single 'is_loss=True' occurrence.
-
-    Example:
-
-        .. code-block:: python
-
-            model_desc['outputs'] = [('output1', ['batch', 1024], is_loss=True),
-                                     ('output2', [], is_loss=False)
-                                     ('output3', [512])]
-    """
-
-    if not isinstance(value, tuple) or len(value) < 2 or len(value) > 3:
-        error(field, "must be a tuple with size 2 or 3")
-    if len(value) == 3 and not isinstance(value[2], bool):
-        error(field, "the third element of the tuple (aka is_loss) must be a boolean")
-    elif len(value) == 3:
-        if value[2]:
-            _model_desc_outputs_validation.loss_counter += 1
-        if _model_desc_outputs_validation.loss_counter > 1:
-            error(field, "only one is_loss can bet set to True")
-    if not isinstance(value[0], str):
-        error(field, "the first element of the tuple (aka name) must be a string")
-    if not isinstance(value[1], list):
-        error(field, "the second element of the tuple (aka shape) must be a list")
-    else:
-        for shape in value[1]:
-            if not isinstance(shape, str) and not isinstance(shape, int) or isinstance(shape, bool):
-                error(field, "each shape must be either a string or integer")
-
-
-# Validation schema for model description dictionary
-MODEL_DESC_SCHEMA = {
-    "inputs": {
-        "type": "list",
-        "required": True,
-        "minlength": 1,
-        "schema": {"check_with": _model_desc_inputs_validation},
-    },
-    "outputs": {
-        "type": "list",
-        "required": True,
-        "minlength": 1,
-        "schema": {"check_with": _model_desc_outputs_validation},
-    },
-}
diff --git a/orttraining/orttraining/python/training/onnxblock/model_accessor.py b/orttraining/orttraining/python/training/onnxblock/model_accessor.py
index cff435c5626c4..ac7a53a554e0a 100644
--- a/orttraining/orttraining/python/training/onnxblock/model_accessor.py
+++ b/orttraining/orttraining/python/training/onnxblock/model_accessor.py
@@ -69,7 +69,7 @@ def base(model: onnx.ModelProto):
             "model from scratch."
         )
 
-    _GLOBAL_ACCESSOR = ModelAccessor(model_clone)  # noqa: PLW0603
+    _GLOBAL_ACCESSOR = ModelAccessor(model_clone)
     try:
         yield _GLOBAL_ACCESSOR
     finally:
@@ -112,7 +112,7 @@ def empty_base(opset_version: int | None = None):
         )
     )
 
-    _GLOBAL_ACCESSOR = ModelAccessor(model)  # noqa: PLW0603
+    _GLOBAL_ACCESSOR = ModelAccessor(model)
     try:
         yield _GLOBAL_ACCESSOR
     finally:
@@ -144,7 +144,7 @@ def custom_op_library(custom_op_library_path: os.PathLike):
     if not os.path.exists(custom_op_library_path):
         raise RuntimeError(f"Custom op library path {custom_op_library_path} does not exist.")
 
-    _GLOBAL_CUSTOM_OP_LIBRARY = copy.copy(custom_op_library_path)  # noqa: PLW0603
+    _GLOBAL_CUSTOM_OP_LIBRARY = copy.copy(custom_op_library_path)
     try:
         yield _GLOBAL_CUSTOM_OP_LIBRARY
     finally:
diff --git a/orttraining/orttraining/python/training/optim/_ds_code_store.py b/orttraining/orttraining/python/training/optim/_ds_code_store.py
new file mode 100644
index 0000000000000..dc1e20bc3dcff
--- /dev/null
+++ b/orttraining/orttraining/python/training/optim/_ds_code_store.py
@@ -0,0 +1,81 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+#
+# Copyright 2020 The Microsoft DeepSpeed Team
+#
+# !!!IMPORTANT: This file is a copy of the original one in DeepSpeed repo at given version,
+# It is used to compare with the source code of current installed DeepSpeed during runtime.
+# Please don't modify it or do any code formatting for it.
+# 'orttraining/orttraining/python/training/optim/_ds_code_store.py' is removed from lintrunner config by intention.
+# --------------------------------------------------------------------------
+
+# Wrap code in this to make sure the indentation is correct compared with raw DeepSpeed.
+
+class Stage1And2_DeepSpeedZeroOptimizer_0_9_2:
+
+    def has_overflow_serial(self, params, is_grad_list=False):
+        for p in params:
+            if p.grad is not None and self._has_inf_or_nan(p.grad.data):
+                return True
+
+        return False
+
+
+    def get_grad_norm_direct(self, gradients, params, norm_type=2):
+        """Clips gradient norm of an iterable of parameters.
+
+        This is adapted from torch.nn.utils.clip_grad.clip_grad_norm_ and
+        added functionality to handle model parallel parameters. Note that
+        the gradients are modified in place.
+
+        Arguments:
+            parameters (Iterable[Tensor] or Tensor): an iterable of Tensors or a
+                single Tensor that will have gradients normalized
+            max_norm (float or int): max norm of the gradients
+            norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for
+                infinity norm.
+
+        Returns:
+            Total norm of the parameters (viewed as a single vector).
+        """
+        norm_type = float(norm_type)
+        if norm_type == inf:
+            total_norm = max(g.data.abs().max() for g in gradients)
+            total_norm_cuda = get_accelerator().FloatTensor([float(total_norm)])
+            dist.all_reduce(total_norm_cuda, op=dist.ReduceOp.MAX, group=self.dp_process_group)
+
+            # Take max across all GPUs.
+            self._model_parallel_all_reduce(tensor=total_norm_cuda, op=dist.ReduceOp.MAX)
+            total_norm = total_norm_cuda[0].item()
+        else:
+            total_norm = 0.0
+            # if dist.get_rank() == 0:
+            #    logger.info(f"Total Norm beginning {total_norm}")
+            for g, p in zip(gradients, params):
+                # Pipeline parallelism may replicate parameters. Avoid multi-counting.
+                if hasattr(p, PIPE_REPLICATED) and p.ds_pipe_replicated:
+                    continue
+                if is_model_parallel_parameter(p) or (self.model_parallel_rank == 0):
+                    param_norm = g.data.double().norm(2)
+                    total_norm += param_norm.item()**2
+            # Sum across all model parallel GPUs.
+            total_norm_cuda = get_accelerator().FloatTensor([float(total_norm)])
+            dist.all_reduce(total_norm_cuda, op=dist.ReduceOp.SUM, group=self.dp_process_group)
+
+            self._model_parallel_all_reduce(tensor=total_norm_cuda, op=dist.ReduceOp.SUM)
+
+            total_norm = total_norm_cuda[0].item()**(1. / norm_type)
+
+        if total_norm == float('inf') or total_norm == -float('inf') or total_norm != total_norm:
+            total_norm = -1
+
+        return total_norm
+
+
+    def has_overflow_partitioned_grads_serial(self):
+        for i in range(len(self.bit16_groups)):
+            for j, grad in enumerate(self.averaged_gradients[i]):
+                if grad is not None and self._has_inf_or_nan(grad.data, j):
+                    return True
+        return False
diff --git a/orttraining/orttraining/python/training/optim/_ds_modifier.py b/orttraining/orttraining/python/training/optim/_ds_modifier.py
index 6a52550882c25..20f4f814e5476 100644
--- a/orttraining/orttraining/python/training/optim/_ds_modifier.py
+++ b/orttraining/orttraining/python/training/optim/_ds_modifier.py
@@ -10,6 +10,9 @@
 # - has_overflow_partitioned_grads_serial : https://github.com/microsoft/DeepSpeed/blob/d8e9ef6f99e27bb95e10bd146d145b3372b4cfda/deepspeed/runtime/zero/stage2.py#L1799
 # --------------------------------------------------------------------------
 
+from __future__ import annotations
+
+import inspect
 import types
 import warnings
 
@@ -17,12 +20,69 @@
 from numpy import inf
 from packaging.version import Version
 
+from ._ds_code_store import Stage1And2_DeepSpeedZeroOptimizer_0_9_2
 from ._modifier import FP16OptimizerModifier, check_overflow, check_overflow_for_grads
 from ._multi_tensor_apply import MultiTensorApply
 
 multi_tensor_applier = MultiTensorApply(2048 * 32)
 
 
+def _get_normalized_str(function) -> str:
+    return inspect.getsource(function)
+
+
+def _dynamic_checks(cur_ds_version: Version, optimizer) -> bool:
+    _functions_to_override = ["has_overflow_serial", "get_grad_norm_direct", "has_overflow_partitioned_grads_serial"]
+
+    _version_to_source_code_map = {"0.9.2": Stage1And2_DeepSpeedZeroOptimizer_0_9_2}
+
+    # Try to find the biggest version that is smaller than or equal to cur_ds_version.
+    # then compare the source code (in case the found version is the latest version supported);
+    # If current code does not match the found version, return False, and raise a warning to
+    # add the new version to the list.
+    versions = [Version(v) for v in _version_to_source_code_map]
+    sorted_versions = sorted(versions, reverse=True)
+    version_to_compare = None
+    for sv in sorted_versions:
+        if cur_ds_version >= sv:
+            version_to_compare = sv
+            break
+
+    if version_to_compare is None:
+        warnings.warn(
+            "Unable to find a DeepSpeed version that is smaller than or equal to the current version "
+            f"{cur_ds_version}. Skip modifying optimizer.",
+            UserWarning,
+        )
+        return False
+
+    v_optimizer_cls = _version_to_source_code_map[str(version_to_compare)]
+    all_match = True
+    for func_name in _functions_to_override:
+        if not getattr(optimizer, func_name):
+            warnings.warn(
+                f"DeepSpeed function {func_name} is not found in optimizer. Skip modifying optimizer.", UserWarning
+            )
+            all_match = False
+        cur_code_str = _get_normalized_str(getattr(optimizer, func_name))
+        v_code_str = _get_normalized_str(getattr(v_optimizer_cls, func_name))
+        if cur_code_str != v_code_str:
+            warnings.warn(
+                f"DeepSpeed function {func_name} has changed after version {version_to_compare}. "
+                f"Please append new version {cur_ds_version} in _version_to_source_code_map and _ds_code_store.py.\n"
+                f"---[{func_name}] Old Source Code Start----\n"
+                f"{v_code_str}\n"
+                f"---{func_name} Old Source Code End----\n"
+                f"---[{func_name}] New Source Code Start----\n"
+                f"{cur_code_str}\n"
+                f"---{func_name} New Source Code End----",
+                UserWarning,
+            )
+            all_match = False
+
+    return all_match
+
+
 class DeepSpeedZeROModifier(FP16OptimizerModifier):
     def __init__(self, optimizer, **kwargs) -> None:
         super().__init__(optimizer)
@@ -30,19 +90,32 @@ def __init__(self, optimizer, **kwargs) -> None:
     def can_be_modified(self):
         import deepspeed
 
+        # Note 1:
         # This modifier relies on the implementation of has_overflow_serial, get_grad_norm_direct,
         # and has_overflow_partitioned_grads_serial
         # in https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/runtime/zero/stage_1_and_2.py.
-        # Everytime if we want to update this version supporting list to a newer version,
-        # we need to check if the implementation of these functions are changed.
-        # An easy way to check is to check the history of this file, if there is no change during the update,
+        # The minimum version supported is 0.4.0, all versions in between [0.4.0, 0.9.1]
+        # are manually checked to make sure the implementation of these functions are "logically" not changed.
+        # The way we did the check is to check the history of this file, if there is no change during the update,
         # it's safe to update the version supporting list. Otherwise, or the file is moved or renamed,
         # we need to check the implementation of these functions in detail.
+        #
+        # Note 2:
+        # Since version 0.9.2, we added dynamic source code check, by comparing installed version of code with
+        # the source code in our code store. If the source code is changed, we will raise a warning to ask user
+        # to add the new version to the code store. Otherwise, we will override the functions.
+
         ds_version = Version(deepspeed.__version__)
-        if ds_version > Version("0.9.1") or ds_version < Version("0.4.0"):
+        if ds_version < Version("0.4.0"):
+            warnings.warn(
+                f"Skip modifying optimizer because of unsupported DeepSpeed version {ds_version}, "
+                "minimum supported version: 0.4.0, current version",
+                UserWarning,
+            )
+            return False
+        if ds_version > Version("0.9.1") and not _dynamic_checks(ds_version, self._optimizer):
             warnings.warn(
-                "Skip modifying optimizer because of unsupported DeepSpeed version {}, "
-                "supported version: 0.4.0 - 0.9.1.".format(deepspeed.__version__),
+                f"Skip modifying optimizer because of unsupported DeepSpeed version {ds_version}.",
                 UserWarning,
             )
             return False
@@ -55,7 +128,7 @@ def can_be_modified(self):
             if not get_accelerator().device_name().startswith("cuda"):
                 warnings.warn(
                     "Skip modifying optimizer as device is not supported, "
-                    "device name: {}".format(get_accelerator().device_name()),
+                    f"device name: {get_accelerator().device_name()}",
                     UserWarning,
                 )
                 return False
diff --git a/orttraining/orttraining/python/training/optim/_modifier_registry.py b/orttraining/orttraining/python/training/optim/_modifier_registry.py
index 4a3a33ecc0513..a88740dac60b7 100644
--- a/orttraining/orttraining/python/training/optim/_modifier_registry.py
+++ b/orttraining/orttraining/python/training/optim/_modifier_registry.py
@@ -3,13 +3,59 @@
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
 
+from __future__ import annotations
+
+import warnings
+from typing import ClassVar
+
 from ._apex_amp_modifier import ApexAMPModifier
 from ._ds_modifier import DeepSpeedZeROModifier
 from ._megatron_modifier import LegacyMegatronLMModifier
+from ._modifier import FP16OptimizerModifier
+
+
+class _AccelerateDeepSpeedZeROModifier(DeepSpeedZeROModifier):
+    """
+    Modifier for wrapper of DeepSpeed Optimizer in accelerator.
+    https://github.com/huggingface/accelerate/blob/7843286f2e1c50735d259fbc0084a7f1c85e00e3/src/accelerate/utils/deepspeed.py#L182C19-L182C19
+    """
+
+    def __init__(self, accelerator_optimizer, **kwargs) -> None:
+        super().__init__(accelerator_optimizer.optimizer)
+
+
+def get_full_qualified_type_name(o):
+    klass = o.__class__
+    module = klass.__module__
+    if module == "builtins":
+        return klass.__qualname__
+    return module + "." + klass.__qualname__
+
+
+class OptimizerModifierTypeRegistry:
+    _MAP: ClassVar[dict[str, FP16OptimizerModifier]] = {
+        "megatron.fp16.fp16.FP16_Optimizer": LegacyMegatronLMModifier,
+        "deepspeed.runtime.zero.stage2.FP16_DeepSpeedZeroOptimizer": DeepSpeedZeROModifier,
+        "deepspeed.runtime.zero.stage_1_and_2.DeepSpeedZeroOptimizer": DeepSpeedZeROModifier,
+        "apex.amp.optimizer.unique_name_as_id": ApexAMPModifier,
+    }
+
+    @staticmethod
+    def create_modifier(optimizer_full_qualified_name: str, optimizer, **kwargs) -> FP16OptimizerModifier | None:
+        """Create modifier for optimizer."""
+        if optimizer_full_qualified_name in OptimizerModifierTypeRegistry._MAP:
+            return OptimizerModifierTypeRegistry._MAP[optimizer_full_qualified_name](optimizer, **kwargs)
+
+        if optimizer_full_qualified_name == "accelerate.utils.deepspeed.DeepSpeedOptimizerWrapper":
+            if (
+                hasattr(optimizer, "optimizer")
+                and get_full_qualified_type_name(optimizer.optimizer) in OptimizerModifierTypeRegistry._MAP
+            ):
+                return _AccelerateDeepSpeedZeROModifier(optimizer, **kwargs)
 
-OptimizerModifierTypeRegistry = {
-    "megatron.fp16.fp16.FP16_Optimizer": LegacyMegatronLMModifier,
-    "deepspeed.runtime.zero.stage2.FP16_DeepSpeedZeroOptimizer": DeepSpeedZeROModifier,
-    "deepspeed.runtime.zero.stage_1_and_2.DeepSpeedZeroOptimizer": DeepSpeedZeROModifier,
-    "apex.amp.optimizer.unique_name_as_id": ApexAMPModifier,
-}
+        warnings.warn(
+            "Skip modifying optimizer because of optimizer name not found in the registry: "
+            f"{optimizer_full_qualified_name}",
+            UserWarning,
+        )
+        return None
diff --git a/orttraining/orttraining/python/training/optim/fp16_optimizer.py b/orttraining/orttraining/python/training/optim/fp16_optimizer.py
index 2a5dfbc2189d3..fc93eadc32112 100644
--- a/orttraining/orttraining/python/training/optim/fp16_optimizer.py
+++ b/orttraining/orttraining/python/training/optim/fp16_optimizer.py
@@ -3,9 +3,8 @@
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
 
-import warnings
 
-from ._modifier_registry import OptimizerModifierTypeRegistry
+from ._modifier_registry import OptimizerModifierTypeRegistry, get_full_qualified_type_name
 
 
 def FP16_Optimizer(optimizer, **kwargs):  # noqa: N802
@@ -80,22 +79,13 @@ def FP16_Optimizer(optimizer, **kwargs):  # noqa: N802
 
     """
 
-    def get_full_qualified_type_name(o):
-        if hasattr(optimizer, "_amp_stash"):
-            return "apex.amp.optimizer.unique_name_as_id"
-
-        klass = o.__class__
-        module = klass.__module__
-        if module == "builtins":
-            return klass.__qualname__
-        return module + "." + klass.__qualname__
-
-    optimizer_full_qualified_name = get_full_qualified_type_name(optimizer)
-    if optimizer_full_qualified_name not in OptimizerModifierTypeRegistry:
-        warnings.warn("Skip modifying optimizer because of optimizer name not found in registry.", UserWarning)
-        return optimizer
-
-    modifier = OptimizerModifierTypeRegistry[optimizer_full_qualified_name](optimizer, **kwargs)
-    modifier.apply()
+    optimizer_full_qualified_name = (
+        "apex.amp.optimizer.unique_name_as_id"
+        if hasattr(optimizer, "_amp_stash")
+        else get_full_qualified_type_name(optimizer)
+    )
+    modifier = OptimizerModifierTypeRegistry.create_modifier(optimizer_full_qualified_name, optimizer, **kwargs)
+    if modifier is not None:
+        modifier.apply()
 
     return optimizer
diff --git a/orttraining/orttraining/python/training/ort_triton/_codegen.py b/orttraining/orttraining/python/training/ort_triton/_codegen.py
index c071f01f87ea5..462491365c1fa 100644
--- a/orttraining/orttraining/python/training/ort_triton/_codegen.py
+++ b/orttraining/orttraining/python/training/ort_triton/_codegen.py
@@ -45,17 +45,15 @@ class TritonCodegen(NodeVisitor):
     Specialized codegen for Triton backend.
     """
 
-    def __init__(self):
-        super().__init__()
-
     def codegen(self, node: IRNode, context: CodegenContext, code_buffer: CodeBuffer, indent: int):
         func = getattr(self, node.__class__.__name__)
-        assert func is not None, "unimplemented node: %s" % node.__class__.__name__
+        assert func is not None, f"unimplemented node: {node.__class__.__name__}"
         func(node, context, code_buffer, indent)
 
     def _get_elementwise_offset_mask(self, offset_calc: OffsetCalculator, arg_name: str) -> Tuple[str, str]:
         if offset_calc.is_x_reduced(arg_name):
-            return "", ""
+            # Scalar.
+            return "tl.full([1], 0, tl.int32)", ""
         if offset_calc.is_same_x_shape(arg_name):
             return "xindex", "xmask" if offset_calc.requires_x_mask else ""
         strides = offset_calc.get_input_strides(arg_name)
@@ -91,13 +89,16 @@ def _get_reduce_offset_mask(self, offset_calc: OffsetCalculator, arg_name: str)
             if offset_calc.requires_r_mask:
                 mask_strs.append("rmask")
 
+        # If both is_x_reduced and is_r_reduced are True, it's scalar.
+        if len(offset_strs) == 0:
+            offset_strs.append("tl.full([1, 1], 0, tl.int32)")
         return " + ".join(offset_strs), " & ".join(mask_strs)
 
-    def _get_offset_mask(self, node: OffsetCalculator, arg_name: str) -> Tuple[str, str]:
+    def _get_offset_mask(self, offset_calc: OffsetCalculator, arg_name: str) -> Tuple[str, str]:
         return (
-            self._get_reduce_offset_mask(node, arg_name)
-            if node.is_reduction
-            else self._get_elementwise_offset_mask(node, arg_name)
+            self._get_reduce_offset_mask(offset_calc, arg_name)
+            if offset_calc.is_reduction
+            else self._get_elementwise_offset_mask(offset_calc, arg_name)
         )
 
     def IONode(self, node: IONode, context: CodegenContext, code_buffer: CodeBuffer, indent: int):  # noqa: N802
@@ -125,18 +126,29 @@ def IONode(self, node: IONode, context: CodegenContext, code_buffer: CodeBuffer,
     def _gen_kernel_signature(self, node: KernelNode, context: CodegenContext, code_buffer: CodeBuffer, indent: int):
         is_reduction = node.offset_calc.is_reduction
         space_indent = " " * indent
-        autotune_configs_str = ""
-        for config in node.offset_calc.autotune_configs.configs:
-            if is_reduction:
-                autotune_configs_str += (
-                    f'{space_indent}        triton.Config({{"XBLOCK": {config[0]}, "RBLOCK": {config[1]}}}, '
-                    f"num_warps={config[2]}),\n"
-                )
-            else:
-                autotune_configs_str += (
-                    f'{space_indent}        triton.Config({{"XBLOCK": {config[0]}}}, num_warps={config[2]}),\n'
-                )
-        keys_str = '"xnumel", "rnumel"' if is_reduction else '"xnumel"'
+
+        if len(node.offset_calc.autotune_configs.configs) > 1:
+            autotune_configs_str = ""
+            for config in node.offset_calc.autotune_configs.configs:
+                if is_reduction:
+                    autotune_configs_str += (
+                        f'{space_indent}        triton.Config({{"XBLOCK": {config[0]}, "RBLOCK": {config[1]}}}, '
+                        f"num_warps={config[2]}),\n"
+                    )
+                else:
+                    autotune_configs_str += (
+                        f'{space_indent}        triton.Config({{"XBLOCK": {config[0]}}}, num_warps={config[2]}),\n'
+                    )
+            keys_str = '"xnumel", "rnumel"' if is_reduction else '"xnumel"'
+            code_buffer += (
+                f"{space_indent}@triton.autotune(\n"
+                f"{space_indent}    configs=[\n"
+                f"{autotune_configs_str}"
+                f"{space_indent}    ],\n"
+                f"{space_indent}    key=[{keys_str}],\n"
+                f"{space_indent})\n"
+            )
+
         input_args = [context.get_variable_name(input.name) for input in node.inputs]
         input_args_str = ", ".join(input_args)
         if input_args_str:
@@ -158,12 +170,6 @@ def _gen_kernel_signature(self, node: KernelNode, context: CodegenContext, code_
         )
 
         code_buffer += (
-            f"{space_indent}@triton.autotune(\n"
-            f"{space_indent}    configs=[\n"
-            f"{autotune_configs_str}"
-            f"{space_indent}    ],\n"
-            f"{space_indent}    key=[{keys_str}],\n"
-            f"{space_indent})\n"
             f"{space_indent}@triton.jit\n"
             f"{space_indent}def {node.name}({input_args_str}{output_args_str}{other_input_args}{blocks_str}):\n"
         )
@@ -175,8 +181,10 @@ def ElementwiseKernelNode(  # noqa: N802
         offset_calc = node.offset_calc
         indent += 4
         space_indent = " " * indent
+        x_numel_str = str(offset_calc.x_numel)
+        if x_numel_str.isnumeric():
+            code_buffer += f"{space_indent}xnumel = {x_numel_str}\n"
         code_buffer += (
-            f"{space_indent}xnumel = {offset_calc.x_numel}\n"
             f"{space_indent}xoffset = tl.program_id(0) * XBLOCK\n"
             f"{space_indent}xindex = xoffset + tl.arange(0, XBLOCK)\n"
         )
@@ -207,9 +215,13 @@ def ReduceKernelNode(  # noqa: N802
         offset_calc = node.offset_calc
         indent += 4
         space_indent = " " * indent
+        x_numel_str = str(offset_calc.x_numel)
+        if x_numel_str.isnumeric():
+            code_buffer += f"{space_indent}xnumel = {x_numel_str}\n"
+        r_numel_str = str(offset_calc.r_numel)
+        if r_numel_str.isnumeric():
+            code_buffer += f"{space_indent}rnumel = {r_numel_str}\n"
         code_buffer += (
-            f"{space_indent}xnumel = {offset_calc.x_numel}\n"
-            f"{space_indent}rnumel = {offset_calc.r_numel}\n"
             f"{space_indent}xoffset = tl.program_id(0) * XBLOCK\n"
             f"{space_indent}xindex = xoffset + tl.arange(0, XBLOCK)[:, None]\n"
             f"{space_indent}rbase = tl.arange(0, RBLOCK)[None, :]\n"
@@ -263,14 +275,24 @@ def ReduceKernelNode(  # noqa: N802
         "Rsqrt": "{indent}{o0} = 1.0 / tl.sqrt({i0})\n",
         "Cast": "{indent}{o0} = {i0}.to(tl.{dtype})\n",
         "CastBool": "{indent}{o0} = {i0} != 0\n",
-        "Erf": "{indent}{o0} = tl.libdevice.erf({i0})\n",
-        "Gelu": "{indent}{o0} = (tl.libdevice.erf({i0} / 1.41421356237) + 1.0) * 0.5\n",
+        "Erf": "{indent}{o0} = tl.erf({i0})\n",
+        "Gelu": "{indent}{o0} = {i0} * 0.5 * (tl.math.erf({i0} * 0.70710678118654752440) + 1.0)\n",
+        "QuickGelu": "{indent}{o0} = {i0} * tl.sigmoid({i0} * {alpha})\n",
+        "GeluGrad": (
+            "{indent}{o0} = {i0} * (0.5 * (1.0 + tl.math.erf(0.70710678118654752440 * {i1})) + "
+            "{i1} * 1.12837916709551257390 * 0.70710678118654752440 * 0.5 * tl.exp(-0.5 * {i1} * {i1}))\n"
+        ),
+        "QuickGeluGrad": (
+            "{indent}tmp_v = {i1} * {alpha}\n"
+            "{indent}tmp_sigmoid = tl.sigmoid(tmp_v)\n"
+            "{indent}{o0} = {i0} * tmp_sigmoid * (1.0 + tmp_v * (1.0 - tmp_sigmoid))\n"
+        ),
         "Exp": "{indent}{o0} = tl.exp({i0})\n",
-        "Tanh": "{indent}{o0} = tl.libdevice.tanh({i0})\n",
+        "Tanh": "{indent}{o0} = tl.math.tanh({i0})\n",
         "Where": "{indent}{o0} = tl.where({i0}, {i1}, {i2})\n",
         "Sigmoid": "{indent}{o0} = tl.sigmoid({i0})\n",
         "Log": "{indent}{o0} = tl.log({i0})\n",
-        "DropoutGrad": "{indent}p = 1 - {i2}\n{indent}{o0} = tl.where({i1}, {i0} / p, 0.0)\n",
+        "DropoutGrad": "{indent}p = 1.0 - {i2}\n{indent}{o0} = tl.where({i1}, {i0} / p, 0.0)\n",
         "Identity": "{indent}{o0} = {i0}\n",
     }
 
@@ -303,6 +325,9 @@ def ComputeNode(  # noqa: N802
             else:
                 kwargs["dtype"] = to_dtype.__name__
 
+        if op_type == "QuickGelu" or op_type == "QuickGeluGrad":
+            kwargs["alpha"] = str(node.attributes.get("alpha", 1.702))
+
         if op_type == "Sum":
             output_var = kwargs["o0"]
             formula = " + ".join([kwargs[f"i{idx}"] for idx in range(len(node.inputs))])
@@ -407,7 +432,7 @@ def DropoutNode(  # noqa: N802
         offset_str = f"{node.global_offset} + " if node.global_offset != sympy.Integer(0) else ""
         offset_str += self._get_offset_mask(node.offset_calc, node.inputs[0].name)[0]
         code_buffer += (
-            f"{space_indent}p = 1 - {p_var_name}\n"
+            f"{space_indent}p = 1.0 - {p_var_name}\n"
             f"{space_indent}random = tl.rand(t_seed_cuda, {offset_str})\n"
             f"{space_indent}{mask_var_name} = random < p\n"
             f"{space_indent}{output_var_name} = tl.where({mask_var_name}, {input_var_name} / p, 0.0)\n"
@@ -431,6 +456,13 @@ def ModuleNode(self, node: ModuleNode, context: CodegenContext, code_buffer: Cod
         indent += 4
         space_indent = " " * indent
 
+        seen_symbolic_shape = set()
+        for input in node.inputs:
+            for idx, dim in enumerate(input.shape):
+                if dim.is_symbol and dim not in seen_symbolic_shape:
+                    code_buffer += f"{space_indent}{dim} = {context.get_variable_name(input.name)}.size()[{idx}]\n"
+                    seen_symbolic_shape.add(dim)
+
         if node.has_dropout:
             code_buffer += (
                 f'{space_indent}seed_cuda = torch.randint(2**31, size=(), dtype=torch.int64, device="cuda")\n\n'
@@ -457,18 +489,31 @@ def ModuleNode(self, node: ModuleNode, context: CodegenContext, code_buffer: Cod
             if kernel_node.has_dropout:
                 kernel_args_str += ", seed_cuda"
 
+            # Support symbolic shape if any.
+            symbolic_shape_args_str = ", ".join(kernel_node.symbolic_shape_variables)
+            if symbolic_shape_args_str:
+                kernel_args_str += f", {symbolic_shape_args_str}"
+
+            block_str = ""
+            if len(kernel_node.offset_calc.autotune_configs.configs) == 1:
+                config = kernel_node.offset_calc.autotune_configs.configs[0]
+                if kernel_node.offset_calc.is_reduction:
+                    block_str = f", XBLOCK={config[0]}, RBLOCK={config[1]}, num_warps={config[2]}"
+                else:
+                    block_str = f", XBLOCK={config[0]}, num_warps={config[2]}"
+
             if isinstance(kernel_node, ReduceKernelNode):
                 code_buffer += (
                     f"{space_indent}x_numel = {kernel_node.offset_calc.x_numel}\n"
                     f"{space_indent}r_numel = {kernel_node.offset_calc.r_numel}\n"
                     f'{space_indent}grid = lambda meta: (triton.cdiv(x_numel, meta["XBLOCK"]),)\n'
-                    f"{space_indent}{kernel_node.name}[grid]({kernel_args_str}, x_numel, r_numel)\n"
+                    f"{space_indent}{kernel_node.name}[grid]({kernel_args_str}, x_numel, r_numel{block_str})\n"
                 )
             else:
                 code_buffer += (
                     f"{space_indent}n_elements = {kernel_node.offset_calc.x_numel}\n"
                     f'{space_indent}grid = lambda meta: (triton.cdiv(n_elements, meta["XBLOCK"]),)\n'
-                    f"{space_indent}{kernel_node.name}[grid]({kernel_args_str}, n_elements)\n"
+                    f"{space_indent}{kernel_node.name}[grid]({kernel_args_str}, n_elements{block_str})\n"
                 )
 
             for name in node.cross_kernel_args_to_delete[idx]:
diff --git a/orttraining/orttraining/python/training/ort_triton/_common.py b/orttraining/orttraining/python/training/ort_triton/_common.py
index 65540202420b5..b7e55bc733ede 100644
--- a/orttraining/orttraining/python/training/ort_triton/_common.py
+++ b/orttraining/orttraining/python/training/ort_triton/_common.py
@@ -9,9 +9,11 @@
 import sympy
 from onnx import GraphProto, NodeProto, TensorProto
 
-from ._sympy_utils import parse_shape
+from ._sympy_utils import extract_shape_from_symbol
 from ._utils import get_attribute, get_reduce_info, next_power_of_2
 
+_SPECIAL_FLOATS: List[str] = ["inf", "-inf"]
+
 
 class CodegenContext:
     """
@@ -28,7 +30,8 @@ def get_variable_name(self, name: str) -> str:
     # For some operators such as data load/store, we need an internal variable name inside the kernel function.
     def get_internal_variable_name(self, name: str) -> str:
         var_name = self._var_map[name]
-        return self._var_map[var_name] if var_name in self._var_map else var_name
+        var_name = self._var_map[var_name] if var_name in self._var_map else var_name
+        return f'float("{var_name}")' if var_name in _SPECIAL_FLOATS else var_name
 
 
 class CodeBuffer:
@@ -49,14 +52,38 @@ def codegen(self, node: Any, context: CodegenContext, code_buffer: CodeBuffer, i
         pass
 
 
+class SymbolicDSU:
+    """
+    A 'disjoint set union' to merge symbolics so that we use less variables in the generated code.
+    When handling shape inference for elementwise Ops, if two symbols are not equal and they are not 1, we merge them.
+    """
+
+    def __init__(self):
+        self._dsu: Dict[sympy.Expr, sympy.Expr] = {}
+
+    def find(self, symbolic: sympy.Expr) -> sympy.Expr:
+        if symbolic not in self._dsu:
+            self._dsu[symbolic] = symbolic
+            return symbolic
+        if symbolic == self._dsu[symbolic]:
+            return symbolic
+        self._dsu[symbolic] = self.find(self._dsu[symbolic])
+        return self._dsu[symbolic]
+
+    def union(self, symbolic: sympy.Expr, other_symbolic: sympy.Expr):
+        root = self.find(symbolic)
+        other_root = self.find(other_symbolic)
+        self._dsu[other_root] = root
+
+
 class TensorInfo:
     """
     Represent a input/output tensor of a node.
     """
 
-    def __init__(self, dtype: TensorProto.DataType, shape: List[Any]):
+    def __init__(self, dtype: TensorProto.DataType, shape: List[sympy.Expr]):
         self._dtype: TensorProto.DataType = dtype
-        self._shape: List[sympy.Expr] = parse_shape(shape)
+        self._shape: List[sympy.Expr] = shape
 
     @property
     def dtype(self) -> TensorProto.DataType:
@@ -66,27 +93,42 @@ def dtype(self) -> TensorProto.DataType:
     def shape(self) -> List[sympy.Expr]:
         return self._shape
 
+    def update_shape(self, symbolics: SymbolicDSU):
+        self._shape = [symbolics.find(dim) if dim.is_symbol else dim for dim in self._shape]
+
 
-def _infer_elementwise_shape(input_infos: List[TensorInfo]) -> List[sympy.Expr]:
+def _infer_elementwise_shape(input_infos: List[TensorInfo], symbolics: SymbolicDSU) -> List[sympy.Expr]:
     max_len = max([len(input_info.shape) for input_info in input_infos])
     output_shape: List[sympy.Expr] = [sympy.Integer(1)] * max_len
     for input_info in input_infos:
         offset = max_len - len(input_info.shape)
-        for i in range(len(input_info.shape)):
-            if not input_info.shape[i].is_number or input_info.shape[i] != 1:
-                output_shape[i + offset] = input_info.shape[i]
+        for idx, dim in enumerate(input_info.shape):
+            if not dim.is_number or dim != 1:
+                if not output_shape[idx + offset].is_number or output_shape[idx + offset] != 1:
+                    symbolics.union(output_shape[idx + offset], dim)
+                else:
+                    output_shape[idx + offset] = dim
     return output_shape
 
 
-def _infer_elementwise(node: NodeProto, input_infos: List[TensorInfo], graph: GraphProto) -> List[TensorInfo]:
-    return [TensorInfo(input_infos[0].dtype, _infer_elementwise_shape(input_infos))]
+def _infer_elementwise(
+    node: NodeProto, input_infos: List[TensorInfo], graph: GraphProto, symbolics: SymbolicDSU
+) -> List[TensorInfo]:
+    # pylint: disable=unused-argument
+    return [TensorInfo(input_infos[0].dtype, _infer_elementwise_shape(input_infos, symbolics))]
 
 
-def _infer_where(node: NodeProto, input_infos: List[TensorInfo], graph: GraphProto) -> List[TensorInfo]:
-    return [TensorInfo(input_infos[1].dtype, _infer_elementwise_shape(input_infos))]
+def _infer_where(
+    node: NodeProto, input_infos: List[TensorInfo], graph: GraphProto, symbolics: SymbolicDSU
+) -> List[TensorInfo]:
+    # pylint: disable=unused-argument
+    return [TensorInfo(input_infos[1].dtype, _infer_elementwise_shape(input_infos, symbolics))]
 
 
-def _infer_reduction(node: NodeProto, input_infos: List[TensorInfo], graph: GraphProto) -> List[TensorInfo]:
+def _infer_reduction(
+    node: NodeProto, input_infos: List[TensorInfo], graph: GraphProto, symbolics: SymbolicDSU
+) -> List[TensorInfo]:
+    # pylint: disable=unused-argument
     input_rank = len(input_infos[0].shape)
     keep_dims, axes = get_reduce_info(node, graph, input_rank)
     axes = [axis + input_rank if axis < 0 else axis for axis in axes]
@@ -98,17 +140,26 @@ def _infer_reduction(node: NodeProto, input_infos: List[TensorInfo], graph: Grap
     return [TensorInfo(input_infos[0].dtype, shape)]
 
 
-def _infer_unary(node: NodeProto, input_infos: List[TensorInfo], graph: GraphProto) -> List[TensorInfo]:
+def _infer_unary(
+    node: NodeProto, input_infos: List[TensorInfo], graph: GraphProto, symbolics: SymbolicDSU
+) -> List[TensorInfo]:
+    # pylint: disable=unused-argument
     return [input_infos[0]]
 
 
-def _infer_cast(node: NodeProto, input_infos: List[TensorInfo], graph: GraphProto) -> List[TensorInfo]:
+def _infer_cast(
+    node: NodeProto, input_infos: List[TensorInfo], graph: GraphProto, symbolics: SymbolicDSU
+) -> List[TensorInfo]:
+    # pylint: disable=unused-argument
     dtype = get_attribute(node, "to", TensorProto.UNDEFINED)
     assert dtype != TensorProto.UNDEFINED
     return [TensorInfo(dtype, input_infos[0].shape)]
 
 
-def _infer_dropout(node: NodeProto, input_infos: List[TensorInfo], graph: GraphProto) -> List[TensorInfo]:
+def _infer_dropout(
+    node: NodeProto, input_infos: List[TensorInfo], graph: GraphProto, symbolics: SymbolicDSU
+) -> List[TensorInfo]:
+    # pylint: disable=unused-argument
     return [input_infos[0], TensorInfo(TensorProto.BOOL, input_infos[0].shape)]
 
 
@@ -131,13 +182,19 @@ class TypeAndShapeInfer:
         "ReduceMax": _infer_reduction,
         "ReduceMin": _infer_reduction,
         "Sum": _infer_elementwise,
+        "Gelu": _infer_unary,
+        "QuickGelu": _infer_unary,
+        "GeluGrad": _infer_elementwise,
+        "QuickGeluGrad": _infer_elementwise,
     }
 
     @classmethod
-    def infer(cls, node: NodeProto, input_infos: List[TensorInfo], graph: GraphProto) -> List[TensorInfo]:
+    def infer(
+        cls, node: NodeProto, input_infos: List[TensorInfo], graph: GraphProto, symbolics: SymbolicDSU
+    ) -> List[TensorInfo]:
         if node.op_type not in cls._INFER_FUNC_MAP:
             raise NotImplementedError(f"Unsupported op type: {node.op_type}")
-        return cls._INFER_FUNC_MAP[node.op_type](node, input_infos, graph)
+        return cls._INFER_FUNC_MAP[node.op_type](node, input_infos, graph, symbolics)
 
 
 class AutotuneConfigs:
@@ -148,9 +205,30 @@ class AutotuneConfigs:
     If it's reduction kernel on last contiguous dimensions, the contiguous flag is True.
     """
 
-    def __init__(self, x_numel: int, r_numel: int, contiguous: bool):
-        self.configs: List[Tuple[int, int, int]] = self._gen_autotune_configs(x_numel, r_numel, contiguous)
-        self.requires_for_loop: bool = any(config[1] < r_numel for config in self.configs)
+    def __init__(self, x_numel: sympy.Expr, r_numel: sympy.Expr, contiguous: bool):
+        x_numel_int = (
+            int(x_numel)
+            if x_numel.is_number
+            else int(
+                x_numel.subs(
+                    {symbol: sympy.Integer(extract_shape_from_symbol(symbol.name)) for symbol in x_numel.free_symbols}
+                )
+            )
+        )
+        r_numel_int = (
+            int(r_numel)
+            if r_numel.is_number
+            else int(
+                r_numel.subs(
+                    {symbol: sympy.Integer(extract_shape_from_symbol(symbol.name)) for symbol in r_numel.free_symbols}
+                )
+            )
+        )
+        self.configs: List[Tuple[int, int, int]] = self._gen_autotune_configs(x_numel_int, r_numel_int, contiguous)
+        # If there is symbolic shape, we will not tune the kernel.
+        if not x_numel.is_number or not r_numel.is_number:
+            self.configs = self.configs[-1:]
+        self.requires_for_loop: bool = any(config[1] < r_numel_int for config in self.configs)
 
     def _num_warps(self, x: int, r: int) -> int:
         return min(max(x * r // 256, 2), 8)
diff --git a/orttraining/orttraining/python/training/ort_triton/_decompose.py b/orttraining/orttraining/python/training/ort_triton/_decompose.py
index e18bb16bb80db..ffd20b09b42ea 100644
--- a/orttraining/orttraining/python/training/ort_triton/_decompose.py
+++ b/orttraining/orttraining/python/training/ort_triton/_decompose.py
@@ -58,7 +58,7 @@ def _get_dtype_and_shape(self, arg_name: str, **kwargs):
         arg_info = node_arg_infos[arg_name]
         return arg_info.dtype, arg_info.shape
 
-    def _decompose_elementwise_precision(self, node: NodeProto, graph: GraphProto, **kwargs):
+    def _decompose_elementwise_precision(self, node: NodeProto, **kwargs):
         x = node.input[0]
         dtype, _ = self._get_dtype_and_shape(x, **kwargs)
         if not _is_half_dtype(dtype):
@@ -79,15 +79,19 @@ def _decompose_elementwise_precision(self, node: NodeProto, graph: GraphProto, *
         return [*cast_nodes, op_node, cast_node1]
 
     def Exp(self, node: NodeProto, graph: GraphProto, **kwargs):  # noqa: N802
-        return self._decompose_elementwise_precision(node, graph, **kwargs)
+        # pylint: disable=unused-argument
+        return self._decompose_elementwise_precision(node, **kwargs)
 
     def Pow(self, node: NodeProto, graph: GraphProto, **kwargs):  # noqa: N802
-        return self._decompose_elementwise_precision(node, graph, **kwargs)
+        # pylint: disable=unused-argument
+        return self._decompose_elementwise_precision(node, **kwargs)
 
     def Sqrt(self, node: NodeProto, graph: GraphProto, **kwargs):  # noqa: N802
-        return self._decompose_elementwise_precision(node, graph, **kwargs)
+        # pylint: disable=unused-argument
+        return self._decompose_elementwise_precision(node, **kwargs)
 
     def LayerNormalization(self, node: NodeProto, graph: GraphProto, **kwargs):  # noqa: N802
+        # pylint: disable=unused-argument
         node_name = node.name
         x = node.input[0]
         w = node.input[1]
@@ -153,6 +157,7 @@ def LayerNormalization(self, node: NodeProto, graph: GraphProto, **kwargs):  # n
         ]
 
     def LayerNormalizationGrad(self, node: NodeProto, graph: GraphProto, **kwargs):  # noqa: N802
+        # pylint: disable=unused-argument
         node_name = node.name
         dy = node.input[0]
         x = node.input[1]
@@ -241,6 +246,7 @@ def LayerNormalizationGrad(self, node: NodeProto, graph: GraphProto, **kwargs):
         return decomposed_nodes
 
     def Softmax(self, node: NodeProto, graph: GraphProto, **kwargs):  # noqa: N802
+        # pylint: disable=unused-argument
         node_name = node.name
         x = node.input[0]
         y = node.output[0]
@@ -259,6 +265,7 @@ def Softmax(self, node: NodeProto, graph: GraphProto, **kwargs):  # noqa: N802
         return [max_node, sub_node, exp_node, sum_node, div_node]
 
     def SoftmaxGrad_13(self, node: NodeProto, graph: GraphProto, **kwargs):  # noqa: N802
+        # pylint: disable=unused-argument
         node_name = node.name
         dy = node.input[0]
         y = node.input[1]
diff --git a/orttraining/orttraining/python/training/ort_triton/_ir.py b/orttraining/orttraining/python/training/ort_triton/_ir.py
index 8aa5c1b13159b..50121cbf49804 100644
--- a/orttraining/orttraining/python/training/ort_triton/_ir.py
+++ b/orttraining/orttraining/python/training/ort_triton/_ir.py
@@ -5,7 +5,7 @@
 
 from abc import abstractmethod
 from collections import defaultdict
-from typing import Dict, List, Optional, Set, Tuple
+from typing import Any, Dict, List, Optional, Set, Tuple
 
 import numpy as np
 import sympy
@@ -88,13 +88,15 @@ def __init__(self, target_shape: List[sympy.Expr], reduce_axes: List[int]):
                 self.r_strides.insert(0, self.r_strides[0] * self.r_dims[i + 1])
         self.r_compute_dims: Set[int] = set()
         self.input_strides: Dict[str, List[sympy.Expr]] = dict()
-        # Support concrete shape only for now.
-        assert self.x_numel.is_integer and self.r_numel.is_integer
         self.autotune_configs: AutotuneConfigs = AutotuneConfigs(
-            int(self.x_numel), int(self.r_numel), not self.is_reduction or self.reduce_axes[-1] == self.rank - 1
+            self.x_numel, self.r_numel, not self.is_reduction or self.reduce_axes[-1] == self.rank - 1
+        )
+        self.requires_x_mask: bool = not self.x_numel.is_number or any(
+            int(self.x_numel) % config[0] != 0 for config in self.autotune_configs.configs
+        )
+        self.requires_r_mask: bool = not self.r_numel.is_number or any(
+            int(self.r_numel) % config[1] != 0 for config in self.autotune_configs.configs
         )
-        self.requires_x_mask: bool = any(int(self.x_numel) % config[0] != 0 for config in self.autotune_configs.configs)
-        self.requires_r_mask: bool = any(int(self.r_numel) % config[1] != 0 for config in self.autotune_configs.configs)
         self.reduced_args: Set[str] = set()
 
     def get_input_strides(self, name: str) -> List[sympy.Expr]:
@@ -129,7 +131,7 @@ def register_tensor_arg(self, tensor_arg: TensorArg):
         input_shape = tensor_arg.shape
         if tensor_arg.name in self.reduced_args:
             assert self.is_reduction
-            reduced_rank = len(input_shape) - len(self.reduce_axes)
+            reduced_rank = len(self.target_shape) - len(self.reduce_axes)
             if len(input_shape) < reduced_rank:
                 input_shape = [sympy.Integer(1)] * (reduced_rank - len(input_shape)) + input_shape
             input_shape = (
@@ -141,7 +143,9 @@ def register_tensor_arg(self, tensor_arg: TensorArg):
             input_shape = [sympy.Integer(1)] * (len(self.target_shape) - len(input_shape)) + input_shape
         running_stride = sympy.Integer(1)
         for i in range(len(self.target_shape) - 1, -1, -1):
-            if self.target_shape[i] == input_shape[i]:
+            if self.target_shape[i] == input_shape[i] and not (
+                tensor_arg.name in self.reduced_args and i in self.reduce_axes
+            ):
                 strides.insert(0, running_stride)
                 running_stride = running_stride * input_shape[i]
             else:
@@ -184,14 +188,25 @@ class ComputeNode(IRNode):
     Each operator is represented as a ComputeNode.
     """
 
-    def __init__(self, op_type: str, inputs: List[TensorArg], outputs: List[TensorArg]):
+    def __init__(
+        self,
+        op_type: str,
+        inputs: List[TensorArg],
+        outputs: List[TensorArg],
+        attributes: Dict[str, Any] = {},  # noqa: B006
+    ):
         super().__init__(inputs, outputs)
         self._op_type: str = op_type
+        self._attributes: Dict[str, Any] = attributes
 
     @property
     def op_type(self):
         return self._op_type
 
+    @property
+    def attributes(self):
+        return self._attributes
+
 
 class ReduceNode(ComputeNode):
     def __init__(self, op_type: str, inputs: List[TensorArg], outputs: List[TensorArg], offset_calc: OffsetCalculator):
@@ -289,17 +304,20 @@ def gen_variable_names(self):
             self.var_map[name] = "t_" + name
         for name in self.internal_args:
             self.var_map[name] = gen_variable_name(name, "t", existing_names)
-        for constant_name in self.constants:
-            self.var_map[constant_name] = gen_variable_name(constant_name, "c", existing_names)
-            if self.constants[constant_name].data is not None:
-                value = self.constants[constant_name].data
+        for name, tensor_arg in self.constants.items():
+            self.var_map[name] = gen_variable_name(name, "c", existing_names)
+            if tensor_arg.data is not None:
+                value = tensor_arg.data
                 if value is not None:
                     assert value.size == 1, f"unsupported constant array {value}"
-                    variable_name = self.var_map[constant_name]
+                    variable_name = self.var_map[name]
                     assert variable_name not in self.var_map
                     self.var_map[variable_name] = str(np.array(value.item(), value.dtype))
-
-        self.symbolic_shape_variables = [str(dim) for dim in self.target_shape if dim.is_symbol]
+        seen = set()
+        for dim in self.target_shape:
+            if dim.is_symbol and dim not in seen:
+                seen.add(dim)
+                self.symbolic_shape_variables.append(str(dim))
 
 
 class ElementwiseKernelNode(KernelNode):
diff --git a/orttraining/orttraining/python/training/ort_triton/_lowering.py b/orttraining/orttraining/python/training/ort_triton/_lowering.py
index 16db9ab000834..5c848d2cecc58 100644
--- a/orttraining/orttraining/python/training/ort_triton/_lowering.py
+++ b/orttraining/orttraining/python/training/ort_triton/_lowering.py
@@ -9,7 +9,7 @@
 from typing import Any, Dict, List, Set, Tuple
 
 import sympy
-from onnx import NodeProto
+from onnx import NodeProto, helper
 
 from ._common import AutotuneConfigs, TensorInfo
 from ._ir import (
@@ -51,10 +51,8 @@ def __init__(self, node: NodeProto, reduce_axes: List[int], keep_dims: int, node
         # r_numel is meant to hint how many elements in a row of tensor will be processed by each kernel.
         # r is a abbreviation of reduction, so, it's only used for reduction nodes.
         r_numel: sympy.Expr = sympy.prod(r_dims) if len(r_dims) > 0 else sympy.Integer(1)
-        # Support concrete shape only for now.
-        assert x_numel.is_integer and r_numel.is_integer
         self.autotune_configs: AutotuneConfigs = AutotuneConfigs(
-            int(x_numel), int(r_numel), len(self.reduce_axes) == 0 or self.reduce_axes[-1] == rank - 1
+            x_numel, r_numel, len(self.reduce_axes) == 0 or self.reduce_axes[-1] == rank - 1
         )
         self.reduced_args: Set[str] = set()
         if keep_dims != 1:
@@ -69,10 +67,8 @@ def _compatible_shape(self, shape: List[sympy.Expr], split_if_different: bool) -
         if len(shape) > len(self.target_shape):
             return False
         shape = [sympy.Integer(1)] * (len(self.target_shape) - len(shape)) + shape
-        for axis in range(len(shape)):
-            if shape[axis] != self.target_shape[axis] and (
-                not shape[axis].is_number or shape[axis] != sympy.Integer(1)
-            ):
+        for axis, dim in enumerate(shape):
+            if dim != self.target_shape[axis] and (not dim.is_number or dim != sympy.Integer(1)):
                 return False
         return True
 
@@ -129,7 +125,7 @@ def has_reduced_elementwise_nodes(self) -> bool:
         return not is_reduction_node(self.nodes_groups[0]) and len(self.reduced_args) > 0
 
     def dependent_nodes(self, keep_reduce_node: bool):
-        node_map = dict()
+        node_map = {}
         reduce_nodes = []
         if not keep_reduce_node and self.has_reduced_elementwise_nodes():
             for item in self.nodes_groups:
@@ -151,8 +147,8 @@ def flatten(self, sorted_nodes: List[NodeProto]) -> Tuple[List[NodeProto], List[
             layers = []
             group_layer = [self]
             while len(group_layer) > 0:
-                node_map = dict()
-                reduce_node_map = dict()
+                node_map = {}
+                reduce_node_map = {}
                 next_layer = []
                 for group in group_layer:
                     sub_node_map, reduce_nodes = group.dependent_nodes(False)
@@ -201,7 +197,7 @@ def __init__(self):
         self.cross_kernel_inputs: List[str] = []
         self.constants: List[str] = []
         self.module_outputs: List[str] = []
-        self.cross_kernel_outputs: [str] = []
+        self.cross_kernel_outputs: List[str] = []
         self.internal_args: List[str] = []
 
 
@@ -284,7 +280,7 @@ def _process_node(self, node: NodeProto, precessors: Dict[str, List[NodeProto]],
         return dependent_nodes
 
     def _group_nodes(self):
-        producers = dict()
+        producers = {}
         precessors = defaultdict(list)
         processed = set()
         groups = []
@@ -321,13 +317,16 @@ def _group_nodes(self):
                             group_dependencies[k].add(j)
 
         flag = set()
-        for i in range(len(groups)):
-            if i not in flag:
-                for j in range(i + 1, len(groups)):
-                    if j not in flag and j not in group_dependencies[i] and groups[i].try_merge(groups[j]):
-                        flag.add(j)
-                self._groups.append(groups[i])
-                flag.add(i)
+        for i, group_i in enumerate(groups):
+            if i in flag:
+                continue
+            for j, group_j in enumerate(groups):
+                if j <= i:
+                    continue
+                if j not in flag and j not in group_dependencies[i] and group_i.try_merge(group_j):
+                    flag.add(j)
+            self._groups.append(group_i)
+            flag.add(i)
 
     def _get_node_io(self, node: NodeProto) -> Tuple[List[TensorArg], List[TensorArg]]:
         input_args = []
@@ -378,7 +377,10 @@ def _to_compute_node(self, node: NodeProto, offset_calc: OffsetCalculator):
             return DropoutNode(inputs, outputs, offset_calc)
         if is_reduction_node(node):
             return ReduceNode(op_type, inputs, outputs, offset_calc)
-        return ComputeNode(op_type, inputs, outputs)
+        attributes = {}
+        for attr in node.attribute:
+            attributes[attr.name] = helper.get_attribute_value(attr)
+        return ComputeNode(op_type, inputs, outputs, attributes)
 
     def _analyze_kernel_io_list(self):
         cross_kernel_inputs = set()
@@ -392,7 +394,7 @@ def _analyze_kernel_io_list(self):
 
     def _insert_load_and_store(self, kernel_node: KernelNode):
         input_names = [input.name for input in kernel_node.inputs]
-        output_name_map = dict()
+        output_name_map = {}
         for output in kernel_node.outputs:
             output_name_map[output.name] = 0
         for node in kernel_node.sub_nodes:
@@ -496,7 +498,7 @@ def _lower(self):
             warnings.warn("Use triton's random for Dropout, ignore the random seed from ORT.", UserWarning)
 
         self._analyze_kernel_io_list()
-        cross_kernel_arg_map = dict()
+        cross_kernel_arg_map = {}
         for idx, kernel_io in enumerate(self._kernel_io_list):
             for output in itertools.chain(kernel_io.cross_kernel_outputs, kernel_io.module_outputs):
                 cross_kernel_arg_map[output] = idx
diff --git a/orttraining/orttraining/python/training/ort_triton/_op_config.py b/orttraining/orttraining/python/training/ort_triton/_op_config.py
index f58d0e1847207..7d9af00933a75 100644
--- a/orttraining/orttraining/python/training/ort_triton/_op_config.py
+++ b/orttraining/orttraining/python/training/ort_triton/_op_config.py
@@ -36,6 +36,10 @@
     "DropoutGrad": {"domain": "com.microsoft", "versions": [1]},
     "Identity": {"versions": [13], "is_no_op": True},
     "Sum": {"versions": [13]},
+    "Gelu": {"domain": "com.microsoft", "versions": [1]},
+    "QuickGelu": {"domain": "com.microsoft", "versions": [1]},
+    "GeluGrad": {"domain": "com.microsoft", "versions": [1]},
+    "QuickGeluGrad": {"domain": "com.microsoft", "versions": [1]},
 }
 
 _REDUCTION_OPS = {
diff --git a/orttraining/orttraining/python/training/ort_triton/_sorted_graph.py b/orttraining/orttraining/python/training/ort_triton/_sorted_graph.py
index 69df567500a89..32e54d0868013 100644
--- a/orttraining/orttraining/python/training/ort_triton/_sorted_graph.py
+++ b/orttraining/orttraining/python/training/ort_triton/_sorted_graph.py
@@ -5,15 +5,17 @@
 
 import copy
 import itertools
-from typing import Any, Dict, List, Set
+from typing import Dict, List, Set
 
 import numpy as np
 import onnx
-from onnx import GraphProto, ModelProto, NodeProto, helper
+import sympy
+from onnx import GraphProto, ModelProto, NodeProto, TensorProto, helper
 
-from ._common import TensorInfo, TypeAndShapeInfer
+from ._common import SymbolicDSU, TensorInfo, TypeAndShapeInfer
 from ._decompose import DecomposeDispatch
 from ._op_config import is_elementwise_node
+from ._sympy_utils import parse_shape
 from ._utils import get_attribute, to_numpy_array, topological_sort
 
 
@@ -29,17 +31,20 @@ class SortedGraph:
         input_shapes: the shapes of the model inputs. Can be numeric values or symbolic values.
     """
 
-    def __init__(self, model: ModelProto, input_shapes: List[List[Any]]):
+    def __init__(self, model: ModelProto, input_shapes: List[List[sympy.Expr]]):
         self._model: ModelProto = model
         self._graph: GraphProto = model.graph
-        self._input_shapes: List[List[Any]] = input_shapes
+        self._input_shapes: List[List[sympy.Expr]] = input_shapes
 
         # For elementwise graph outputs, when we group nodes to different kernels, if the target shape is different
         # from other nodes' target shape, even it can be broadcasted, we still need to create a new kernel for it.
         self._elementwise_graph_outputs: Set[str] = set()
+        graph_output_names = [output.name for output in self._graph.output]
         for node in self._graph.node:
             if is_elementwise_node(node):
-                self._elementwise_graph_outputs.update(node.output)
+                self._elementwise_graph_outputs.update(
+                    [output for output in node.output if output in graph_output_names]
+                )
 
         # Topological sort the nodes in the graph.
         self._sorted_nodes: List[NodeProto] = topological_sort(
@@ -53,7 +58,7 @@ def __init__(self, model: ModelProto, input_shapes: List[List[Any]]):
         for initializer in self._graph.initializer:
             self._node_arg_infos[initializer.name] = TensorInfo(
                 initializer.data_type,
-                list(to_numpy_array(initializer).shape),
+                parse_shape(list(to_numpy_array(initializer).shape)),
             )
 
         # Decompose complex operators.
@@ -66,7 +71,7 @@ def __init__(self, model: ModelProto, input_shapes: List[List[Any]]):
         initializers = {}
         for initializer in self._graph.initializer:
             initializers[initializer.name] = initializer
-        self._sorted_initializers: List[TensorInfo] = []
+        self._sorted_initializers: List[TensorProto] = []
         for node in self._sorted_nodes:
             for input in node.input:
                 if input in initializers:
@@ -157,6 +162,7 @@ def elementwise_graph_outputs(self) -> Set[str]:
 
     def _decompose(self):
         dispatch = DecomposeDispatch()
+        symbolics: SymbolicDSU = SymbolicDSU()
         pos = 0
         # If a node is complex, decompose it and insert the decomposed nodes at the same position.
         # All complex Ops are defined in DecomposeDispatch.
@@ -175,16 +181,18 @@ def _decompose(self):
                 value_attr = get_attribute(node, "value")
                 self._node_arg_infos[node.output[0]] = TensorInfo(
                     value_attr.data_type,
-                    list(to_numpy_array(value_attr).shape),
+                    parse_shape(list(to_numpy_array(value_attr).shape)),
                 )
             else:
                 input_infos = []
                 for input in node.input:
                     input_infos.append(self._node_arg_infos[input])
-                output_infos = TypeAndShapeInfer.infer(node, input_infos, self._graph)
+                output_infos = TypeAndShapeInfer.infer(node, input_infos, self._graph, symbolics)
                 for idx, output in enumerate(node.output):
                     self._node_arg_infos[output] = output_infos[idx]
             pos += 1
+        for tensor_info in self._node_arg_infos.values():
+            tensor_info.update_shape(symbolics)
 
     # Save the ONNX graphs for debug purpose. The original ONNX graph is the subgraph from backend.
     # The processed ONNX graph is the subgraph after decompose, it also contains the concrete shapes for each arg.
@@ -197,13 +205,20 @@ def save_onnx(self, file_path_prefix):
         for node in itertools.chain(processed_model.graph.input, processed_model.graph.output):
             node.type.tensor_type.shape.Clear()
             for dim in self.node_arg_infos[node.name].shape:
-                node.type.tensor_type.shape.dim.add().dim_value = int(dim)
+                if dim.is_number:
+                    node.type.tensor_type.shape.dim.add().dim_value = int(dim)
+                else:
+                    node.type.tensor_type.shape.dim.add().dim_param = str(dim)
         value_infos = []
         for node in itertools.chain(self.const_nodes, self.sorted_nodes):
             for output in node.output:
                 tensor_info = self.node_arg_infos[output]
                 value_infos.append(
-                    helper.make_tensor_value_info(output, tensor_info.dtype, [int(dim) for dim in tensor_info.shape])
+                    helper.make_tensor_value_info(
+                        output,
+                        tensor_info.dtype,
+                        [int(dim) if dim.is_number else str(dim) for dim in tensor_info.shape],
+                    )
                 )
         processed_model.graph.ClearField("value_info")
         processed_model.graph.value_info.extend(value_infos)
diff --git a/orttraining/orttraining/python/training/ort_triton/_sympy_utils.py b/orttraining/orttraining/python/training/ort_triton/_sympy_utils.py
index e3629b5effa38..a4a384c021fe8 100644
--- a/orttraining/orttraining/python/training/ort_triton/_sympy_utils.py
+++ b/orttraining/orttraining/python/training/ort_triton/_sympy_utils.py
@@ -9,6 +9,12 @@
 import sympy
 
 
+def extract_shape_from_symbol(symbol: str) -> int:
+    match = re.match(r"i(\d+)_dim(\d+)_(\d+)", symbol)
+    assert match
+    return int(match.group(3))
+
+
 def sympy_dot(seq1: List[sympy.Expr], seq2: List[sympy.Expr]) -> sympy.Expr:
     assert len(seq1) == len(seq2)
     return sympy.expand(sum(a * b for a, b in zip(seq1, seq2)))
diff --git a/orttraining/orttraining/python/training/ort_triton/kernel/__init__.py b/orttraining/orttraining/python/training/ort_triton/kernel/__init__.py
index 97318ea2e53ae..3213a8831ae22 100644
--- a/orttraining/orttraining/python/training/ort_triton/kernel/__init__.py
+++ b/orttraining/orttraining/python/training/ort_triton/kernel/__init__.py
@@ -3,15 +3,30 @@
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
 
-from ._mm import triton_gemm, triton_gemm_out, triton_matmul, triton_matmul_out
-from ._slice_scel import slice_scel, slice_scel_backward, transform_slice_scel
+import os
 
-__all__ = [
+import torch
+
+from ._mm import triton_gemm, triton_gemm_out, triton_matmul, triton_matmul_out  # noqa: F401
+from ._slice_scel import slice_scel, slice_scel_backward  # noqa: F401
+
+_all_kernels = [
     "triton_gemm",
     "triton_gemm_out",
     "triton_matmul",
     "triton_matmul_out",
     "slice_scel",
     "slice_scel_backward",
-    "transform_slice_scel",
 ]
+
+if (
+    "ORTMODULE_USE_FLASH_ATTENTION" in os.environ
+    and int(os.getenv("ORTMODULE_USE_FLASH_ATTENTION")) == 1
+    and torch.cuda.is_available()
+    and torch.cuda.get_device_capability()[0] >= 8
+):
+    from ._flash_attn import flash_attn_backward, flash_attn_forward  # noqa: F401
+
+    _all_kernels.extend(["flash_attn_forward", "flash_attn_backward"])
+
+__all__ = _all_kernels  # noqa: PLE0605
diff --git a/orttraining/orttraining/python/training/ort_triton/kernel/_flash_attn.py b/orttraining/orttraining/python/training/ort_triton/kernel/_flash_attn.py
new file mode 100644
index 0000000000000..03bb0f4373d8d
--- /dev/null
+++ b/orttraining/orttraining/python/training/ort_triton/kernel/_flash_attn.py
@@ -0,0 +1,1244 @@
+"""
+*Experimental* implementation of FlashAttention in Triton.
+Tested with triton==2.0.0.dev20221202.
+Triton 2.0 has a new backend (MLIR) but seems like it doesn't yet work for head dimensions
+other than 64:
+https://github.com/openai/triton/blob/d376020f90002757eea3ea9475d4f7cfc2ec5ead/python/triton/ops/flash_attention.py#L207
+We'll update this implementation with the new Triton backend once this is fixed.
+
+We use the FlashAttention implementation from Phil Tillet a starting point.
+https://github.com/openai/triton/blob/master/python/tutorials/06-fused-attention.py
+
+Changes:
+- Implement both causal and non-causal attention.
+- Implement both self-attention and cross-attention.
+- Support arbitrary seqlens (not just multiples of 128), for both forward and backward.
+- Support all head dimensions up to 128 (not just 16, 32, 64, 128), for both forward and backward.
+- Support attention bias.
+- Speed up the forward pass a bit, and only store the LSE instead of m and l.
+- Make the backward for d=128 much faster by reducing register spilling.
+- Optionally parallelize the backward pass across seqlen_k, to deal with the case of
+small batch size * nheads.
+
+Caution:
+- This is an *experimental* implementation. The forward pass should be quite robust but
+I'm not 100% sure that the backward pass doesn't have race conditions (due to the Triton compiler).
+- This implementation has only been tested on A100.
+- If you plan to use headdim other than 64 and 128, you should test for race conditions
+(due to the Triton compiler), as done in tests/test_flash_attn.py
+"test_flash_attn_triton_race_condition". I've tested and fixed many race conditions
+for different head dimensions (40, 48, 64, 128, 80, 88, 96), but I'm still not 100% confident
+that there are none left for other head dimensions.
+
+Differences between this Triton version and the CUDA version:
+- Triton version doesn't support dropout.
+- Triton forward is generally faster than CUDA forward, while Triton backward is
+generally slower than CUDA backward. Overall Triton forward + backward is slightly slower
+than CUDA forward + backward.
+- Triton version doesn't support different sequence lengths in a batch (i.e., RaggedTensor/NestedTensor).
+- Triton version supports attention bias, while CUDA version doesn't.
+"""
+
+import math
+from typing import List, Tuple
+
+import torch
+import triton
+import triton.language as tl
+from onnx import GraphProto, NodeProto, TensorProto, helper
+
+from onnxruntime.training.ortmodule import register_graph_optimizer
+from onnxruntime.training.ortmodule.graph_optimizers.utils import GraphMatcher, check_attribute_value, update_graph
+
+
+# Disabling autotune for now, set num_warps=4 if headdim=64 and num_warps=8 if headdim=128
+# @triton.autotune(
+#     configs=[
+#         triton.Config({"BLOCK_M": 128, "BLOCK_N": 128}, num_warps=4, num_stages=1),
+#         # This config has a race condition when EVEN_M == False, disabling it for now.
+#         # triton.Config({"BLOCK_M": 64, "BLOCK_N": 64}, num_warps=4, num_stages=1),
+#     ],
+#     key=['CACHE_KEY_SEQLEN_Q', 'CACHE_KEY_SEQLEN_K', 'BIAS_TYPE', 'IS_CAUSAL', 'BLOCK_HEADDIM']
+# )
+@triton.heuristics(
+    {
+        "EVEN_M": lambda args: args["seqlen_q"] % args["BLOCK_M"] == 0,
+        "EVEN_N": lambda args: args["seqlen_k"] % args["BLOCK_N"] == 0,
+        "EVEN_HEADDIM": lambda args: args["headdim"] == args["BLOCK_HEADDIM"],
+    }
+)
+@triton.jit
+def _fwd_kernel(
+    Q,
+    K,
+    V,
+    Bias,
+    Out,
+    Lse,
+    TMP,  # NOTE: TMP is a scratchpad buffer to workaround a compiler bug
+    softmax_scale,
+    stride_qb,
+    stride_qh,
+    stride_qm,
+    stride_kb,
+    stride_kh,
+    stride_kn,
+    stride_vb,
+    stride_vh,
+    stride_vn,
+    stride_bb,
+    stride_bh,
+    stride_bm,
+    stride_ob,
+    stride_oh,
+    stride_om,
+    nheads,
+    seqlen_q,
+    seqlen_k,
+    seqlen_q_rounded,
+    headdim,
+    CACHE_KEY_SEQLEN_Q,
+    CACHE_KEY_SEQLEN_K,
+    BIAS_TYPE: tl.constexpr,
+    IS_CAUSAL: tl.constexpr,
+    BLOCK_HEADDIM: tl.constexpr,
+    EVEN_M: tl.constexpr,
+    EVEN_N: tl.constexpr,
+    EVEN_HEADDIM: tl.constexpr,
+    BLOCK_M: tl.constexpr,
+    BLOCK_N: tl.constexpr,
+):
+    start_m = tl.program_id(0)
+    off_hb = tl.program_id(1)
+    off_b = off_hb // nheads
+    off_h = off_hb % nheads
+    # initialize offsets
+    offs_m = start_m * BLOCK_M + tl.arange(0, BLOCK_M)
+    offs_n = tl.arange(0, BLOCK_N)
+    offs_d = tl.arange(0, BLOCK_HEADDIM)
+    # Initialize pointers to Q, K, V
+    # Adding parenthesis around indexing might use int32 math instead of int64 math?
+    # https://github.com/openai/triton/issues/741
+    # I'm seeing a tiny bit of difference (5-7us)
+    q_ptrs = Q + off_b * stride_qb + off_h * stride_qh + (offs_m[:, None] * stride_qm + offs_d[None, :])
+    k_ptrs = K + off_b * stride_kb + off_h * stride_kh + (offs_n[:, None] * stride_kn + offs_d[None, :])
+    v_ptrs = V + off_b * stride_vb + off_h * stride_vh + (offs_n[:, None] * stride_vn + offs_d[None, :])
+    if BIAS_TYPE == "vector":
+        b_ptrs = Bias + off_b * stride_bb + off_h * stride_bh + offs_n
+    elif BIAS_TYPE == "matrix":
+        b_ptrs = Bias + off_b * stride_bb + off_h * stride_bh + (offs_m[:, None] * stride_bm + offs_n[None, :])
+    # initialize pointer to m and l
+    t_ptrs = TMP + off_hb * seqlen_q_rounded + offs_m
+    lse_i = tl.zeros([BLOCK_M], dtype=tl.float32) - float("inf")
+    m_i = tl.zeros([BLOCK_M], dtype=tl.float32) - float("inf")
+    acc_o = tl.zeros([BLOCK_M, BLOCK_HEADDIM], dtype=tl.float32)
+    # load q: it will stay in SRAM throughout
+    # [2022-10-30] TD: Triton bug - in the case of EVEN_M=True and EVEN_N=False, if we just call
+    # tl.load(q_ptrs), we get the wrong output!
+    if EVEN_M & EVEN_N:
+        if EVEN_HEADDIM:
+            q = tl.load(q_ptrs)
+        else:
+            q = tl.load(q_ptrs, mask=offs_d[None, :] < headdim, other=0.0)
+    else:
+        if EVEN_HEADDIM:
+            q = tl.load(q_ptrs, mask=offs_m[:, None] < seqlen_q, other=0.0)
+        else:
+            q = tl.load(q_ptrs, mask=(offs_m[:, None] < seqlen_q) & (offs_d[None, :] < headdim), other=0.0)
+    # loop over k, v and update accumulator
+    end_n = seqlen_k if not IS_CAUSAL else tl.minimum((start_m + 1) * BLOCK_M, seqlen_k)
+    for start_n in range(0, end_n, BLOCK_N):
+        start_n = tl.multiple_of(start_n, BLOCK_N)
+        # -- compute qk ----
+        if EVEN_N & EVEN_M:  # If we just do "if EVEN_N", there seems to be some race condition
+            if EVEN_HEADDIM:
+                k = tl.load(k_ptrs + start_n * stride_kn)
+            else:
+                k = tl.load(k_ptrs + start_n * stride_kn, mask=offs_d[None, :] < headdim, other=0.0)
+        else:
+            if EVEN_HEADDIM:
+                k = tl.load(
+                    k_ptrs + start_n * stride_kn,
+                    mask=(start_n + offs_n)[:, None] < seqlen_k,
+                    other=0.0,
+                )
+            else:
+                k = tl.load(
+                    k_ptrs + start_n * stride_kn,
+                    mask=((start_n + offs_n)[:, None] < seqlen_k) & (offs_d[None, :] < headdim),
+                    other=0.0,
+                )
+        qk = tl.zeros([BLOCK_M, BLOCK_N], dtype=tl.float32)
+        qk += tl.dot(q, k, trans_b=True)
+        # Trying to combine the two masks seem to make the result wrong
+        if not EVEN_N:  # Need to mask out otherwise the softmax is wrong
+            qk += tl.where((start_n + offs_n)[None, :] < seqlen_k, 0, float("-inf"))
+        if IS_CAUSAL:
+            qk += tl.where(offs_m[:, None] >= (start_n + offs_n)[None, :], 0, float("-inf"))
+        if BIAS_TYPE != "none":
+            if BIAS_TYPE == "vector":
+                if EVEN_N:
+                    bias = tl.load(b_ptrs + start_n).to(tl.float32)
+                else:
+                    bias = tl.load(b_ptrs + start_n, mask=(start_n + offs_n) < seqlen_k, other=0.0).to(tl.float32)
+                bias = bias[None, :]
+            elif BIAS_TYPE == "matrix":
+                if EVEN_M & EVEN_N:
+                    bias = tl.load(b_ptrs + start_n).to(tl.float32)
+                else:
+                    bias = tl.load(
+                        b_ptrs + start_n,
+                        mask=(offs_m[:, None] < seqlen_q) & ((start_n + offs_n)[None, :] < seqlen_k),
+                        other=0.0,
+                    ).to(tl.float32)
+            # Slightly faster to multiply the softmax_scale in the tl.exp below since the compiler
+            # can then fuse the mult and add into an fma instruction. But if we have bias we need to
+            # to multiply with softmax_scale here.
+            qk = qk * softmax_scale + bias
+            m_ij = tl.maximum(tl.max(qk, 1), lse_i)
+            p = tl.exp(qk - m_ij[:, None])
+        else:
+            m_ij = tl.maximum(tl.max(qk, 1) * softmax_scale, lse_i)
+            p = tl.exp(qk * softmax_scale - m_ij[:, None])
+        l_ij = tl.sum(p, 1)
+
+        # scale acc_o
+        acc_o_scale = tl.exp(m_i - m_ij)
+
+        # # -- update output accumulator --
+        # BUG: have to store and immediately load
+        tl.store(t_ptrs, acc_o_scale)
+        acc_o_scale = tl.load(t_ptrs)
+        acc_o = acc_o * acc_o_scale[:, None]
+        # update acc_o
+        if EVEN_N & EVEN_M:  # If we just do "if EVEN_N", there seems to be some race condition
+            if EVEN_HEADDIM:
+                v = tl.load(v_ptrs + start_n * stride_vn)
+            else:
+                v = tl.load(v_ptrs + start_n * stride_vn, mask=offs_d[None, :] < headdim, other=0.0)
+        else:
+            if EVEN_HEADDIM:
+                v = tl.load(
+                    v_ptrs + start_n * stride_vn,
+                    mask=(start_n + offs_n)[:, None] < seqlen_k,
+                    other=0.0,
+                )
+            else:
+                v = tl.load(
+                    v_ptrs + start_n * stride_vn,
+                    mask=((start_n + offs_n)[:, None] < seqlen_k) & (offs_d[None, :] < headdim),
+                    other=0.0,
+                )
+        p = p.to(v.dtype)
+        acc_o += tl.dot(p, v)
+
+        # -- update statistics
+        m_i = m_ij
+        l_i_new = tl.exp(lse_i - m_ij) + l_ij
+        lse_i = m_ij + tl.log(l_i_new)
+
+    o_scale = tl.exp(m_i - lse_i)
+    # BUG: have to store and immediately load
+    tl.store(t_ptrs, o_scale)
+    o_scale = tl.load(t_ptrs)
+    acc_o = acc_o * o_scale[:, None]
+    # rematerialize offsets to save registers
+    start_m = tl.program_id(0)
+    offs_m = start_m * BLOCK_M + tl.arange(0, BLOCK_M)
+    # write back l and m
+    lse_ptrs = Lse + off_hb * seqlen_q_rounded + offs_m
+    tl.store(lse_ptrs, lse_i)
+    # initialize pointers to output
+    offs_d = tl.arange(0, BLOCK_HEADDIM)
+    out_ptrs = Out + off_b * stride_ob + off_h * stride_oh + (offs_m[:, None] * stride_om + offs_d[None, :])
+    if EVEN_M:
+        if EVEN_HEADDIM:
+            tl.store(out_ptrs, acc_o)
+        else:
+            tl.store(out_ptrs, acc_o, mask=offs_d[None, :] < headdim)
+    else:
+        if EVEN_HEADDIM:
+            tl.store(out_ptrs, acc_o, mask=offs_m[:, None] < seqlen_q)
+        else:
+            tl.store(out_ptrs, acc_o, mask=(offs_m[:, None] < seqlen_q) & (offs_d[None, :] < headdim))
+
+
+@triton.jit
+def _bwd_preprocess_do_o_dot(
+    Out,
+    DO,
+    Delta,
+    stride_ob,
+    stride_oh,
+    stride_om,
+    stride_dob,
+    stride_doh,
+    stride_dom,
+    nheads,
+    seqlen_q,
+    seqlen_q_rounded,
+    headdim,
+    BLOCK_M: tl.constexpr,
+    BLOCK_HEADDIM: tl.constexpr,
+):
+    start_m = tl.program_id(0)
+    off_hb = tl.program_id(1)
+    off_b = off_hb // nheads
+    off_h = off_hb % nheads
+    # initialize offsets
+    offs_m = start_m * BLOCK_M + tl.arange(0, BLOCK_M)
+    offs_d = tl.arange(0, BLOCK_HEADDIM)
+    # load
+    o = tl.load(
+        Out + off_b * stride_ob + off_h * stride_oh + offs_m[:, None] * stride_om + offs_d[None, :],
+        mask=(offs_m[:, None] < seqlen_q) & (offs_d[None, :] < headdim),
+        other=0.0,
+    ).to(tl.float32)
+    do = tl.load(
+        DO + off_b * stride_dob + off_h * stride_doh + offs_m[:, None] * stride_dom + offs_d[None, :],
+        mask=(offs_m[:, None] < seqlen_q) & (offs_d[None, :] < headdim),
+        other=0.0,
+    ).to(tl.float32)
+    delta = tl.sum(o * do, axis=1)
+    # write-back
+    tl.store(Delta + off_hb * seqlen_q_rounded + offs_m, delta)
+
+
+@triton.jit
+def _bwd_store_dk_dv(
+    dk_ptrs,
+    dv_ptrs,
+    dk,
+    dv,
+    offs_n,
+    offs_d,
+    seqlen_k,
+    headdim,
+    EVEN_M: tl.constexpr,
+    EVEN_N: tl.constexpr,
+    EVEN_HEADDIM: tl.constexpr,
+):
+    # [2022-11-01] TD: Same bug. In the case of EVEN_N=True and EVEN_M=False,
+    # if we just call tl.store(dv_ptrs), there's a race condition
+    if EVEN_N & EVEN_M:
+        if EVEN_HEADDIM:
+            tl.store(dv_ptrs, dv)
+            tl.store(dk_ptrs, dk)
+        else:
+            tl.store(dv_ptrs, dv, mask=offs_d[None, :] < headdim)
+            tl.store(dk_ptrs, dk, mask=offs_d[None, :] < headdim)
+    else:
+        if EVEN_HEADDIM:
+            tl.store(dv_ptrs, dv, mask=offs_n[:, None] < seqlen_k)
+            tl.store(dk_ptrs, dk, mask=offs_n[:, None] < seqlen_k)
+        else:
+            tl.store(dv_ptrs, dv, mask=(offs_n[:, None] < seqlen_k) & (offs_d[None, :] < headdim))
+            tl.store(dk_ptrs, dk, mask=(offs_n[:, None] < seqlen_k) & (offs_d[None, :] < headdim))
+
+
+@triton.jit
+def _bwd_kernel_one_col_block(
+    start_n,
+    Q,
+    K,
+    V,
+    Bias,
+    DO,
+    DQ,
+    DK,
+    DV,
+    LSE,
+    D,
+    softmax_scale,
+    stride_qm,
+    stride_kn,
+    stride_vn,
+    stride_bm,
+    stride_dom,
+    stride_dqm,
+    stride_dkn,
+    stride_dvn,
+    seqlen_q,
+    seqlen_k,
+    headdim,
+    ATOMIC_ADD: tl.constexpr,
+    BIAS_TYPE: tl.constexpr,
+    IS_CAUSAL: tl.constexpr,
+    BLOCK_HEADDIM: tl.constexpr,
+    EVEN_M: tl.constexpr,
+    EVEN_N: tl.constexpr,
+    EVEN_HEADDIM: tl.constexpr,
+    BLOCK_M: tl.constexpr,
+    BLOCK_N: tl.constexpr,
+):
+    # We need to make sure begin_m is a multiple of BLOCK_M (not BLOCK_N)
+    begin_m = 0 if not IS_CAUSAL else ((start_n * BLOCK_N) // BLOCK_M) * BLOCK_M
+    # initialize row/col offsets
+    offs_qm = begin_m + tl.arange(0, BLOCK_M)
+    offs_n = start_n * BLOCK_N + tl.arange(0, BLOCK_N)
+    offs_m = tl.arange(0, BLOCK_M)
+    offs_d = tl.arange(0, BLOCK_HEADDIM)
+    # initialize pointers to value-like data
+    q_ptrs = Q + (offs_qm[:, None] * stride_qm + offs_d[None, :])
+    k_ptrs = K + (offs_n[:, None] * stride_kn + offs_d[None, :])
+    v_ptrs = V + (offs_n[:, None] * stride_vn + offs_d[None, :])
+    do_ptrs = DO + (offs_qm[:, None] * stride_dom + offs_d[None, :])
+    dq_ptrs = DQ + (offs_qm[:, None] * stride_dqm + offs_d[None, :])
+    if BIAS_TYPE == "vector":
+        b_ptrs = Bias + offs_n
+    elif BIAS_TYPE == "matrix":
+        b_ptrs = Bias + (offs_qm[:, None] * stride_bm + offs_n[None, :])
+    # initialize dv and dk
+    dv = tl.zeros([BLOCK_N, BLOCK_HEADDIM], dtype=tl.float32)
+    dk = tl.zeros([BLOCK_N, BLOCK_HEADDIM], dtype=tl.float32)
+    # There seems to be some problem with Triton pipelining that makes results wrong for
+    # headdim=64, seqlen=(113, 255), bias_type='matrix'. In this case the for loop
+    # may have zero step, and pipelining with the bias matrix could cause the problem.
+    # So we just exit early.
+    if begin_m >= seqlen_q:
+        dv_ptrs = DV + (offs_n[:, None] * stride_dvn + offs_d[None, :])
+        dk_ptrs = DK + (offs_n[:, None] * stride_dkn + offs_d[None, :])
+        _bwd_store_dk_dv(
+            dk_ptrs,
+            dv_ptrs,
+            dk,
+            dv,
+            offs_n,
+            offs_d,
+            seqlen_k,
+            headdim,
+            EVEN_M=EVEN_M,
+            EVEN_N=EVEN_N,
+            EVEN_HEADDIM=EVEN_HEADDIM,
+        )
+        return
+    # k and v stay in SRAM throughout
+    # [2022-10-30] TD: Same bug as the fwd. In the case of EVEN_N=True and EVEN_M=False,
+    # if we just call tl.load(k_ptrs), we get the wrong output!
+    if EVEN_N & EVEN_M:
+        if EVEN_HEADDIM:
+            k = tl.load(k_ptrs)
+            v = tl.load(v_ptrs)
+        else:
+            k = tl.load(k_ptrs, mask=offs_d[None, :] < headdim, other=0.0)
+            v = tl.load(v_ptrs, mask=offs_d[None, :] < headdim, other=0.0)
+    else:
+        if EVEN_HEADDIM:
+            k = tl.load(k_ptrs, mask=offs_n[:, None] < seqlen_k, other=0.0)
+            v = tl.load(v_ptrs, mask=offs_n[:, None] < seqlen_k, other=0.0)
+        else:
+            k = tl.load(k_ptrs, mask=(offs_n[:, None] < seqlen_k) & (offs_d[None, :] < headdim), other=0.0)
+            v = tl.load(v_ptrs, mask=(offs_n[:, None] < seqlen_k) & (offs_d[None, :] < headdim), other=0.0)
+    # loop over rows
+    num_block_m = tl.cdiv(seqlen_q, BLOCK_M)
+    for start_m in range(begin_m, num_block_m * BLOCK_M, BLOCK_M):
+        start_m = tl.multiple_of(start_m, BLOCK_M)
+        offs_m_curr = start_m + offs_m
+        # load q, k, v, do on-chip
+        # Same bug as below. Otherwise gives wrong result for headdim=40, seqlen=(128, 117)
+        if EVEN_M & EVEN_HEADDIM:
+            q = tl.load(q_ptrs)
+        else:
+            if EVEN_HEADDIM:
+                q = tl.load(q_ptrs, mask=offs_m_curr[:, None] < seqlen_q, other=0.0)
+            else:
+                q = tl.load(
+                    q_ptrs,
+                    mask=(offs_m_curr[:, None] < seqlen_q) & (offs_d[None, :] < headdim),
+                    other=0.0,
+                )
+        # recompute p = softmax(qk, dim=-1).T
+        qk = tl.dot(q, k, trans_b=True)
+        # Trying to combine the two masks seem to make the result wrong
+        if not EVEN_N:  # Need to mask out otherwise the softmax is wrong
+            qk = tl.where(offs_n[None, :] < seqlen_k, qk, float("-inf"))
+        if IS_CAUSAL:
+            qk = tl.where(offs_m_curr[:, None] >= (offs_n[None, :]), qk, float("-inf"))
+        if BIAS_TYPE != "none":
+            tl.debug_barrier()  # Race condition otherwise
+            if BIAS_TYPE == "vector":
+                if EVEN_N:
+                    bias = tl.load(b_ptrs).to(tl.float32)
+                else:
+                    bias = tl.load(b_ptrs, mask=offs_n < seqlen_k, other=0.0).to(tl.float32)
+                bias = bias[None, :]
+            elif BIAS_TYPE == "matrix":
+                if EVEN_M & EVEN_N:
+                    bias = tl.load(b_ptrs).to(tl.float32)
+                else:
+                    bias = tl.load(
+                        b_ptrs,
+                        mask=(offs_m_curr[:, None] < seqlen_q) & (offs_n[None, :] < seqlen_k),
+                        other=0.0,
+                    ).to(tl.float32)
+            qk = qk * softmax_scale + bias
+        # There seems to be a race condition when headdim=48/96, and dq, dk, dv are wrong.
+        # Also wrong for headdim=64.
+        if not (EVEN_M & EVEN_HEADDIM):
+            tl.debug_barrier()
+        lse_i = tl.load(LSE + offs_m_curr)
+        if BIAS_TYPE == "none":
+            p = tl.exp(qk * softmax_scale - lse_i[:, None])
+        else:
+            p = tl.exp(qk - lse_i[:, None])
+        # compute dv
+        # [2022-10-30] TD: A Triton bug: if EVEN_M=True and EVEN_HEADDIM=False, if we call
+        # do = tl.load(do_ptrs, mask=offs_d[None, :] < headdim, other=0.0), we get wrong outputs
+        # in the case of headdim=48/96, seqlen_q & seqlen_k >= 512. If headdim=40 or seqlen < 512,
+        # the output is correct.
+        if EVEN_M & EVEN_HEADDIM:
+            do = tl.load(do_ptrs)
+        else:
+            # [2022-11-01] TD: Triton bug, there's a race condition if we just use m_mask and not d_mask.
+            do = tl.load(
+                do_ptrs,
+                mask=(offs_m_curr[:, None] < seqlen_q) & (offs_d[None, :] < headdim),
+                other=0.0,
+            )
+        dv += tl.dot(p.to(do.dtype), do, trans_a=True)
+        # compute dp = dot(v, do)
+        # There seems to be a race condition when headdim=48/96, and dq, dk are wrong.
+        # Also wrong for headdim=128, seqlen=(108, 256), and ATOMIC_ADD=True
+        # Also wrong for headdim=64, seqlen=(1023, 1024), and ATOMIC_ADD=False
+        if not (EVEN_M & EVEN_HEADDIM):
+            tl.debug_barrier()
+        dp = tl.dot(do, v, trans_b=True)
+        # There's a race condition for headdim=48
+        if not EVEN_HEADDIM:
+            tl.debug_barrier()
+        # compute ds = p * (dp - delta[:, None])
+        # Putting the subtraction after the dp matmul (instead of before) is slightly faster
+        Di = tl.load(D + offs_m_curr)
+        # Converting ds to q.dtype here reduces register pressure and makes it much faster
+        # for BLOCK_HEADDIM=128
+        ds = (p * (dp - Di[:, None]) * softmax_scale).to(q.dtype)
+        # compute dk = dot(ds.T, q)
+        dk += tl.dot(ds, q, trans_a=True)
+        # compute dq
+        if not (EVEN_M & EVEN_HEADDIM):  # Otherewise there's a race condition when BIAS_TYPE='matrix'
+            tl.debug_barrier()
+        if not ATOMIC_ADD:
+            if EVEN_M & EVEN_HEADDIM:  # Race condition if we just do EVEN_M
+                dq = tl.load(dq_ptrs, eviction_policy="evict_last")
+                dq += tl.dot(ds, k)
+                tl.store(dq_ptrs, dq, eviction_policy="evict_last")
+            else:
+                if EVEN_HEADDIM:
+                    dq = tl.load(
+                        dq_ptrs,
+                        mask=offs_m_curr[:, None] < seqlen_q,
+                        other=0.0,
+                        eviction_policy="evict_last",
+                    )
+                    dq += tl.dot(ds, k)
+                    tl.store(
+                        dq_ptrs,
+                        dq,
+                        mask=offs_m_curr[:, None] < seqlen_q,
+                        eviction_policy="evict_last",
+                    )
+                else:
+                    dq = tl.load(
+                        dq_ptrs,
+                        mask=(offs_m_curr[:, None] < seqlen_q) & (offs_d[None, :] < headdim),
+                        other=0.0,
+                        eviction_policy="evict_last",
+                    )
+                    dq += tl.dot(ds, k)
+                    tl.store(
+                        dq_ptrs,
+                        dq,
+                        mask=(offs_m_curr[:, None] < seqlen_q) & (offs_d[None, :] < headdim),
+                        eviction_policy="evict_last",
+                    )
+        else:  # If we're parallelizing across the seqlen_k dimension
+            dq = tl.dot(ds, k)
+            if EVEN_M & EVEN_HEADDIM:  # Race condition if we just do EVEN_M
+                tl.atomic_add(dq_ptrs, dq)
+            else:
+                if EVEN_HEADDIM:
+                    tl.atomic_add(dq_ptrs, dq, mask=offs_m_curr[:, None] < seqlen_q)
+                else:
+                    tl.atomic_add(
+                        dq_ptrs,
+                        dq,
+                        mask=(offs_m_curr[:, None] < seqlen_q) & (offs_d[None, :] < headdim),
+                    )
+        # increment pointers
+        dq_ptrs += BLOCK_M * stride_dqm
+        q_ptrs += BLOCK_M * stride_qm
+        do_ptrs += BLOCK_M * stride_dom
+        if BIAS_TYPE == "matrix":
+            b_ptrs += BLOCK_M * stride_bm
+    # write-back
+    dv_ptrs = DV + (offs_n[:, None] * stride_dvn + offs_d[None, :])
+    dk_ptrs = DK + (offs_n[:, None] * stride_dkn + offs_d[None, :])
+    _bwd_store_dk_dv(
+        dk_ptrs,
+        dv_ptrs,
+        dk,
+        dv,
+        offs_n,
+        offs_d,
+        seqlen_k,
+        headdim,
+        EVEN_M=EVEN_M,
+        EVEN_N=EVEN_N,
+        EVEN_HEADDIM=EVEN_HEADDIM,
+    )
+
+
+def init_to_zero(name):
+    return lambda nargs: nargs[name].zero_()
+
+
+@triton.autotune(
+    configs=[
+        triton.Config(
+            {"BLOCK_M": 128, "BLOCK_N": 128, "SEQUENCE_PARALLEL": False},
+            num_warps=8,
+            num_stages=1,
+            pre_hook=init_to_zero("DQ"),
+        ),
+        triton.Config(
+            {"BLOCK_M": 128, "BLOCK_N": 128, "SEQUENCE_PARALLEL": True},
+            num_warps=8,
+            num_stages=1,
+            pre_hook=init_to_zero("DQ"),
+        ),
+        # Other configs seem to give wrong results when seqlen_q % 128 != 0, disabling them for now
+        # # Kernel is buggy (give wrong result) if we set BLOCK_m=128, BLOCK_n=64, num_warps=*4*
+        # triton.Config({"BLOCK_M": 128, "BLOCK_N": 64, "SEQUENCE_PARALLEL": False}, num_warps=8, num_stages=1, pre_hook=init_to_zero('DQ')),
+        # triton.Config({"BLOCK_M": 128, "BLOCK_N": 64, "SEQUENCE_PARALLEL": True}, num_warps=8, num_stages=1, pre_hook=init_to_zero('DQ')),
+        # triton.Config({"BLOCK_M": 64, "BLOCK_N": 64, "SEQUENCE_PARALLEL": False}, num_warps=4, num_stages=1, pre_hook=init_to_zero('DQ')),
+        # triton.Config({"BLOCK_M": 64, "BLOCK_N": 64, "SEQUENCE_PARALLEL": True}, num_warps=4, num_stages=1, pre_hook=init_to_zero('DQ')),
+    ],
+    key=["CACHE_KEY_SEQLEN_Q", "CACHE_KEY_SEQLEN_K", "BIAS_TYPE", "IS_CAUSAL", "BLOCK_HEADDIM"],
+)
+@triton.heuristics(
+    {
+        "EVEN_M": lambda args: args["seqlen_q"] % args["BLOCK_M"] == 0,
+        "EVEN_N": lambda args: args["seqlen_k"] % args["BLOCK_N"] == 0,
+        "EVEN_HEADDIM": lambda args: args["headdim"] == args["BLOCK_HEADDIM"],
+    }
+)
+@triton.jit
+def _bwd_kernel(
+    Q,
+    K,
+    V,
+    Bias,
+    DO,
+    DQ,
+    DK,
+    DV,
+    LSE,
+    D,
+    softmax_scale,
+    stride_qb,
+    stride_qh,
+    stride_qm,
+    stride_kb,
+    stride_kh,
+    stride_kn,
+    stride_vb,
+    stride_vh,
+    stride_vn,
+    stride_bb,
+    stride_bh,
+    stride_bm,
+    stride_dob,
+    stride_doh,
+    stride_dom,
+    stride_dqb,
+    stride_dqh,
+    stride_dqm,
+    stride_dkb,
+    stride_dkh,
+    stride_dkn,
+    stride_dvb,
+    stride_dvh,
+    stride_dvn,
+    nheads,
+    seqlen_q,
+    seqlen_k,
+    seqlen_q_rounded,
+    headdim,
+    CACHE_KEY_SEQLEN_Q,
+    CACHE_KEY_SEQLEN_K,
+    BIAS_TYPE: tl.constexpr,
+    IS_CAUSAL: tl.constexpr,
+    BLOCK_HEADDIM: tl.constexpr,
+    SEQUENCE_PARALLEL: tl.constexpr,
+    EVEN_M: tl.constexpr,
+    EVEN_N: tl.constexpr,
+    EVEN_HEADDIM: tl.constexpr,
+    BLOCK_M: tl.constexpr,
+    BLOCK_N: tl.constexpr,
+):
+    off_hb = tl.program_id(1)
+    off_b = off_hb // nheads
+    off_h = off_hb % nheads
+    # offset pointers for batch/head
+    Q += off_b * stride_qb + off_h * stride_qh
+    K += off_b * stride_kb + off_h * stride_kh
+    V += off_b * stride_vb + off_h * stride_vh
+    DO += off_b * stride_dob + off_h * stride_doh
+    DQ += off_b * stride_dqb + off_h * stride_dqh
+    DK += off_b * stride_dkb + off_h * stride_dkh
+    DV += off_b * stride_dvb + off_h * stride_dvh
+    if BIAS_TYPE != "none":
+        Bias += off_b * stride_bb + off_h * stride_bh
+    # pointer to row-wise quantities in value-like data
+    D += off_hb * seqlen_q_rounded
+    LSE += off_hb * seqlen_q_rounded
+    if not SEQUENCE_PARALLEL:
+        num_block_n = tl.cdiv(seqlen_k, BLOCK_N)
+        for start_n in range(0, num_block_n):
+            _bwd_kernel_one_col_block(
+                start_n,
+                Q,
+                K,
+                V,
+                Bias,
+                DO,
+                DQ,
+                DK,
+                DV,
+                LSE,
+                D,
+                softmax_scale,
+                stride_qm,
+                stride_kn,
+                stride_vn,
+                stride_bm,
+                stride_dom,
+                stride_dqm,
+                stride_dkn,
+                stride_dvn,
+                seqlen_q,
+                seqlen_k,
+                headdim,
+                ATOMIC_ADD=False,
+                BIAS_TYPE=BIAS_TYPE,
+                IS_CAUSAL=IS_CAUSAL,
+                BLOCK_HEADDIM=BLOCK_HEADDIM,
+                EVEN_M=EVEN_M,
+                EVEN_N=EVEN_N,
+                EVEN_HEADDIM=EVEN_HEADDIM,
+                BLOCK_M=BLOCK_M,
+                BLOCK_N=BLOCK_N,
+            )
+    else:
+        start_n = tl.program_id(0)
+        _bwd_kernel_one_col_block(
+            start_n,
+            Q,
+            K,
+            V,
+            Bias,
+            DO,
+            DQ,
+            DK,
+            DV,
+            LSE,
+            D,
+            softmax_scale,
+            stride_qm,
+            stride_kn,
+            stride_vn,
+            stride_bm,
+            stride_dom,
+            stride_dqm,
+            stride_dkn,
+            stride_dvn,
+            seqlen_q,
+            seqlen_k,
+            headdim,
+            ATOMIC_ADD=True,
+            BIAS_TYPE=BIAS_TYPE,
+            IS_CAUSAL=IS_CAUSAL,
+            BLOCK_HEADDIM=BLOCK_HEADDIM,
+            EVEN_M=EVEN_M,
+            EVEN_N=EVEN_N,
+            EVEN_HEADDIM=EVEN_HEADDIM,
+            BLOCK_M=BLOCK_M,
+            BLOCK_N=BLOCK_N,
+        )
+
+
+def flash_attn_forward(q, k, v, bias=None, **kwargs):
+    # shape constraints
+    batch, seqlen_q, nheads, d = q.shape
+    _, seqlen_k, _, _ = k.shape
+    assert k.shape == (batch, seqlen_k, nheads, d)
+    assert v.shape == (batch, seqlen_k, nheads, d)
+    assert d <= 128, "FlashAttention only support head dimensions up to 128"
+    assert q.dtype == k.dtype == v.dtype, "All tensors must have the same type"
+    assert q.dtype in [torch.float16, torch.bfloat16], "Only support fp16 and bf16"
+    assert q.is_cuda and k.is_cuda and v.is_cuda
+
+    causal = kwargs.get("causal", 0) == 1
+    softmax_scale = kwargs.get("softmax_scale", 1.0 / math.sqrt(d))
+    has_bias = bias is not None
+    bias_type = "none"
+    if has_bias:
+        assert bias.dtype in [q.dtype, torch.float]
+        assert bias.is_cuda
+        assert bias.dim() == 4
+        if bias.stride(-1) != 1:
+            bias = bias.contiguous()
+        if bias.shape[2:] == (1, seqlen_k):
+            bias_type = "vector"
+        elif bias.shape[2:] == (seqlen_q, seqlen_k):
+            bias_type = "matrix"
+        else:
+            raise RuntimeError("Last 2 dimensions of bias must be (1, seqlen_k)" " or (seqlen_q, seqlen_k)")
+        bias = bias.expand(batch, nheads, seqlen_q, seqlen_k)
+    bias_strides = (bias.stride(0), bias.stride(1), bias.stride(2)) if has_bias else (0, 0, 0)
+
+    seqlen_q_rounded = math.ceil(seqlen_q / 128) * 128
+    lse = torch.empty((batch, nheads, seqlen_q_rounded), device=q.device, dtype=torch.float32)
+    tmp = torch.empty((batch, nheads, seqlen_q_rounded), device=q.device, dtype=torch.float32)
+    o = torch.empty_like(q)
+
+    BLOCK_HEADDIM = max(triton.next_power_of_2(d), 16)
+    BLOCK = 128
+    num_warps = 4 if d <= 64 else 8
+    grid = lambda META: (triton.cdiv(seqlen_q, META["BLOCK_M"]), batch * nheads)
+    _fwd_kernel[grid](
+        q,
+        k,
+        v,
+        bias,
+        o,
+        lse,
+        tmp,
+        softmax_scale,
+        q.stride(0),
+        q.stride(2),
+        q.stride(1),
+        k.stride(0),
+        k.stride(2),
+        k.stride(1),
+        v.stride(0),
+        v.stride(2),
+        v.stride(1),
+        *bias_strides,
+        o.stride(0),
+        o.stride(2),
+        o.stride(1),
+        nheads,
+        seqlen_q,
+        seqlen_k,
+        seqlen_q_rounded,
+        d,
+        seqlen_q // 32,
+        seqlen_k // 32,  # key for triton cache (limit number of compilations)
+        # Can't use kwargs here because triton autotune expects key to be args, not kwargs
+        # IS_CAUSAL=causal, BLOCK_HEADDIM=d,
+        bias_type,
+        causal,
+        BLOCK_HEADDIM,
+        BLOCK_M=BLOCK,
+        BLOCK_N=BLOCK,
+        num_warps=num_warps,
+        num_stages=1,
+    )
+    return o, lse
+
+
+def flash_attn_backward(do, q, k, v, o, lse, bias=None, **kwargs):
+    dq = torch.empty_like(q)
+    dk = torch.empty_like(k)
+    dv = torch.empty_like(v)
+    # Make sure that the last dimension is contiguous
+    if do.stride(-1) != 1:
+        do = do.contiguous()
+    batch, seqlen_q, nheads, d = q.shape
+    _, seqlen_k, _, _ = k.shape
+    # assert d in {16, 32, 64, 128}
+    assert d <= 128
+    seqlen_q_rounded = math.ceil(seqlen_q / 128) * 128
+    assert lse.shape == (batch, nheads, seqlen_q_rounded)
+    assert q.stride(-1) == k.stride(-1) == v.stride(-1) == o.stride(-1) == 1
+    assert dq.stride(-1) == dk.stride(-1) == dv.stride(-1) == 1
+
+    causal = kwargs.get("causal", 0) == 1
+    softmax_scale = kwargs.get("softmax_scale", 1.0 / math.sqrt(d))
+    # dq_accum = torch.zeros_like(q, dtype=torch.float32)
+    dq_accum = torch.empty_like(q, dtype=torch.float32)
+    delta = torch.empty_like(lse)
+    # delta = torch.zeros_like(lse)
+
+    BLOCK_HEADDIM = max(triton.next_power_of_2(d), 16)
+    grid = lambda META: (triton.cdiv(seqlen_q, META["BLOCK_M"]), batch * nheads)
+    _bwd_preprocess_do_o_dot[grid](
+        o,
+        do,
+        delta,
+        o.stride(0),
+        o.stride(2),
+        o.stride(1),
+        do.stride(0),
+        do.stride(2),
+        do.stride(1),
+        nheads,
+        seqlen_q,
+        seqlen_q_rounded,
+        d,
+        BLOCK_M=128,
+        BLOCK_HEADDIM=BLOCK_HEADDIM,
+    )
+
+    has_bias = bias is not None
+    bias_type = "none"
+    if has_bias:
+        assert bias.dtype in [q.dtype, torch.float]
+        assert bias.is_cuda
+        assert bias.dim() == 4
+        assert bias.stride(-1) == 1
+        if bias.shape[2:] == (1, seqlen_k):
+            bias_type = "vector"
+        elif bias.shape[2:] == (seqlen_q, seqlen_k):
+            bias_type = "matrix"
+        else:
+            raise RuntimeError("Last 2 dimensions of bias must be (1, seqlen_k)" " or (seqlen_q, seqlen_k)")
+        bias = bias.expand(batch, nheads, seqlen_q, seqlen_k)
+    bias_strides = (bias.stride(0), bias.stride(1), bias.stride(2)) if has_bias else (0, 0, 0)
+
+    # BLOCK_M = 128
+    # BLOCK_N = 64
+    # num_warps = 4
+    grid = lambda META: (
+        triton.cdiv(seqlen_k, META["BLOCK_N"]) if META["SEQUENCE_PARALLEL"] else 1,
+        batch * nheads,
+    )
+    _bwd_kernel[grid](
+        q,
+        k,
+        v,
+        bias,
+        do,
+        dq_accum,
+        dk,
+        dv,
+        lse,
+        delta,
+        softmax_scale,
+        q.stride(0),
+        q.stride(2),
+        q.stride(1),
+        k.stride(0),
+        k.stride(2),
+        k.stride(1),
+        v.stride(0),
+        v.stride(2),
+        v.stride(1),
+        *bias_strides,
+        do.stride(0),
+        do.stride(2),
+        do.stride(1),
+        dq_accum.stride(0),
+        dq_accum.stride(2),
+        dq_accum.stride(1),
+        dk.stride(0),
+        dk.stride(2),
+        dk.stride(1),
+        dv.stride(0),
+        dv.stride(2),
+        dv.stride(1),
+        nheads,
+        seqlen_q,
+        seqlen_k,
+        seqlen_q_rounded,
+        d,
+        seqlen_q // 32,
+        seqlen_k // 32,  # key for triton cache (limit number of compilations)
+        # Can't use kwargs here because triton autotune expects key to be args, not kwargs
+        # IS_CAUSAL=causal, BLOCK_HEADDIM=d,
+        bias_type,
+        causal,
+        BLOCK_HEADDIM,
+        # SEQUENCE_PARALLEL=False,
+        # BLOCK_M=BLOCK_M, BLOCK_N=BLOCK_N,
+        # num_warps=num_warps,
+        # num_stages=1,
+    )
+    dq.copy_(dq_accum)
+    return dq, dk, dv
+
+
+def _make_flash_attention_nodes(
+    idx: int,
+    q: str,
+    k: str,
+    v: str,
+    y: str,
+    dy: str,
+    dq: str,
+    dk: str,
+    dv: str,
+    bias: str,
+    scale: float,
+):
+    logsumexp = helper.make_tensor_value_info("logsumexp_" + str(idx), TensorProto.FLOAT, [])
+    fwd_node = helper.make_node(
+        "TritonOp",
+        [q, k, v, bias],
+        [y, logsumexp.name],
+        "TritonOp_Flash_Attn_Fwd_" + str(idx),
+        None,
+        "com.microsoft",
+        func_name="flash_attn_forward",
+        causal=0,
+        softmax_scale=scale,
+    )
+    bwd_node = helper.make_node(
+        "TritonOp",
+        [dy, q, k, v, y, logsumexp.name, bias],
+        [dq, dk, dv],
+        "TritonOp_Flash_Attn_Bwd_" + str(idx),
+        None,
+        "com.microsoft",
+        func_name="flash_attn_backward",
+        causal=0,
+        softmax_scale=scale,
+    )
+    return [fwd_node, bwd_node], [logsumexp]
+
+
+# Without causal mask, without Dropout. For example, BERT model in HuggingFace.
+_PATTERN_0: List[Tuple[str, bool, List[Tuple[int, int, int]]]] = [
+    ("MatMul", False, []),  # 0
+    ("Transpose", True, [(0, 0, 0)]),  # 1
+    ("Transpose", True, [(0, 0, 1)]),  # 2
+    ("Div", False, [(0, 0, 0)]),  # 3
+    ("Add", False, [(3, 0, 0)]),  # 4
+    ("Softmax", False, [(4, 0, 0)]),  # 5
+    ("MatMul", False, [(5, 0, 0)]),  # 6
+    ("Transpose", True, [(6, 0, 1)]),  # 7
+    ("Transpose", False, [(6, 0, 0)]),  # 8
+    ("FusedMatMul", False, [(7, 0, 1)]),  # 9
+    ("SoftmaxGrad_13", False, [(9, 0, 0), (5, 0, 1)]),  # 10
+    ("Identity", False, [(10, 0, 0)]),  # 11
+    ("Div", False, [(11, 0, 0)]),  # 12
+    ("Identity", False, [(12, 0, 0)]),  # 13
+    ("FusedMatMul", False, [(2, 0, 1), (13, 0, 0)]),  # 14
+    ("FusedMatMul", False, [(1, 0, 0), (13, 0, 1)]),  # 15
+    ("FusedMatMul", False, [(5, 0, 0)]),  # 16
+    ("Transpose", True, [(16, 0, 1)]),  # 17
+    ("Transpose", False, [(14, 0, 0)]),  # 18
+    ("Transpose", False, [(15, 0, 0)]),  # 19
+    ("Transpose", False, [(16, 0, 0)]),  # 20
+]
+
+
+def _optimize_for_pattern_0(matcher: GraphMatcher, idx: int, nodes: List[NodeProto]):
+    # Check forward only as the backward is expected to be consistent if it's built correctly.
+    scale_value = matcher.get_constant_value(nodes[3].input[1])
+    if not (
+        check_attribute_value(nodes[1], "perm", [0, 2, 1, 3])
+        and check_attribute_value(nodes[2], "perm", [0, 2, 3, 1])
+        and scale_value is not None
+        and check_attribute_value(nodes[7], "perm", [0, 2, 1, 3])
+        and check_attribute_value(nodes[8], "perm", [0, 2, 1, 3])
+    ):
+        return [], [], []
+
+    nodes_to_add, new_value_infos = _make_flash_attention_nodes(
+        idx,
+        nodes[1].input[0],
+        nodes[2].input[0],
+        nodes[7].input[0],
+        nodes[8].output[0],
+        nodes[17].input[0],
+        nodes[18].output[0],
+        nodes[19].output[0],
+        nodes[20].output[0],
+        nodes[4].input[1],
+        1 / float(scale_value[0] if isinstance(scale_value, list) else scale_value),
+    )
+    return nodes, nodes_to_add, new_value_infos
+
+
+# llama2+peft, k doesn't require grad.
+_PATTERN_1: List[Tuple[str, bool, List[Tuple[int, int, int]]]] = [
+    ("MatMul", False, []),  # 0
+    ("Transpose", True, [(0, 0, 1)]),  # 1
+    ("Div", False, [(0, 0, 0)]),  # 2
+    ("Add", False, [(2, 0, 0)]),  # 3
+    ("Softmax", False, [(3, 0, 0)]),  # 4
+    ("MatMul", False, [(4, 0, 0)]),  # 5
+    ("Transpose", True, [(5, 0, 1)]),  # 6
+    ("Identity", False, [(6, 0, 0)]),  # 7
+    ("YieldOp", False, [(7, 0, -1)]),  # 8
+    ("Transpose", False, [(5, 0, 0)]),  # 9
+    ("FusedMatMul", False, [(6, 0, 1)]),  # 10
+    ("SoftmaxGrad_13", False, [(10, 0, 0), (4, 0, 1)]),  # 11
+    ("Identity", False, [(11, 0, 0)]),  # 12
+    ("Div", False, [(12, 0, 0)]),  # 13
+    ("Identity", False, [(13, 0, 0)]),  # 14
+    ("FusedMatMul", False, [(1, 0, 1), (14, 0, 0)]),  # 15
+    ("FusedMatMul", False, [(4, 0, 0)]),  # 16
+    ("Transpose", True, [(16, 0, 1)]),  # 17
+    ("Sum", False, [(16, 0, 0)]),  # 18
+    ("Transpose", False, [(18, 0, 0)]),  # 19
+]
+
+
+def _optimize_for_pattern_1(matcher: GraphProto, idx: int, nodes: List[NodeProto]):
+    # Check forward only as the backward is expected to be consistent if it's built correctly.
+    scale_value = matcher.get_constant_value(nodes[2].input[1])
+    if not (
+        check_attribute_value(nodes[1], "perm", [0, 1, 3, 2])
+        and scale_value is not None
+        and check_attribute_value(nodes[6], "perm", [0, 2, 1, 3])
+        and check_attribute_value(nodes[9], "perm", [0, 2, 1, 3])
+        and matcher.get_consumer_count(nodes[14].output[0]) == 1
+    ):
+        return [], [], []
+
+    dtype, _ = matcher.get_type_and_shape(nodes[0].input[0])
+    assert dtype is not None
+    trans_q_tensor = helper.make_tensor_value_info("trans_q_" + str(idx), dtype, None)
+    trans_q_grad_tensor = helper.make_tensor_value_info("trans_q_grad_" + str(idx), dtype, None)
+    trans_k_tensor = helper.make_tensor_value_info("trans_k_" + str(idx), dtype, None)
+    trans_q = helper.make_node(
+        "Transpose", [nodes[0].input[0]], [trans_q_tensor.name], "Trans_Q_" + str(idx), perm=[0, 2, 1, 3]
+    )
+    trans_q_grad = helper.make_node(
+        "Transpose", [trans_q_grad_tensor.name], [nodes[15].output[0]], "Trans_Q_Grad_" + str(idx), perm=[0, 2, 1, 3]
+    )
+    trans_k = helper.make_node(
+        "Transpose", [nodes[1].input[0]], [trans_k_tensor.name], "Trans_K_" + str(idx), perm=[0, 2, 1, 3]
+    )
+    nodes[19].input[0] = nodes[18].input[1]
+    v_grad = nodes[19].output[0]
+    nodes[19].output[0] = nodes[18].output[0]
+    nodes[18].input[1] = nodes[18].output[0]
+    nodes[18].output[0] = v_grad
+    nodes_to_add, new_value_infos = _make_flash_attention_nodes(
+        idx,
+        trans_q_tensor.name,
+        trans_k_tensor.name,
+        nodes[6].input[0],
+        nodes[9].output[0],
+        nodes[17].input[0],
+        trans_q_grad_tensor.name,
+        "",
+        nodes[16].output[0],
+        nodes[3].input[1],
+        1 / float(scale_value[0] if isinstance(scale_value, list) else scale_value),
+    )
+    nodes_to_remove = nodes[:6] + nodes[9:18]
+    nodes_to_add.extend([trans_q, trans_q_grad, trans_k])
+    new_value_infos.extend([trans_q_tensor, trans_q_grad_tensor, trans_k_tensor])
+    return nodes_to_remove, nodes_to_add, new_value_infos
+
+
+# llama2+peft, k requires grad.
+_PATTERN_2: List[Tuple[str, bool, List[Tuple[int, int, int]]]] = [
+    ("MatMul", False, []),  # 0
+    ("Transpose", True, [(0, 0, 1)]),  # 1
+    ("Div", False, [(0, 0, 0)]),  # 2
+    ("Add", False, [(2, 0, 0)]),  # 3
+    ("Softmax", False, [(3, 0, 0)]),  # 4
+    ("MatMul", False, [(4, 0, 0)]),  # 5
+    ("Transpose", True, [(5, 0, 1)]),  # 6
+    ("Identity", False, [(6, 0, 0)]),  # 7
+    ("YieldOp", False, [(7, 0, -1)]),  # 8
+    ("Transpose", False, [(5, 0, 0)]),  # 9
+    ("FusedMatMul", False, [(6, 0, 1)]),  # 10
+    ("SoftmaxGrad_13", False, [(10, 0, 0), (4, 0, 1)]),  # 11
+    ("Identity", False, [(11, 0, 0)]),  # 12
+    ("Div", False, [(12, 0, 0)]),  # 13
+    ("Identity", False, [(13, 0, 0)]),  # 14
+    ("FusedMatMul", False, [(1, 0, 1), (14, 0, 0)]),  # 15
+    ("FusedMatMul", False, [(14, 0, 1)]),  # 16
+    ("Transpose", False, [(16, 0, 0)]),  # 17
+    ("FusedMatMul", False, [(4, 0, 0)]),  # 18
+    ("Transpose", True, [(18, 0, 1)]),  # 19
+    ("Sum", False, [(18, 0, 0)]),  # 20
+    ("Transpose", False, [(20, 0, 0)]),  # 21
+]
+
+
+def _aptimize_for_pattern_2(matcher: GraphProto, idx: int, nodes: List[NodeProto]):
+    # Check forward only as the backward is expected to be consistent if it's built correctly.
+    scale_value = matcher.get_constant_value(nodes[2].input[1])
+    if not (
+        check_attribute_value(nodes[1], "perm", [0, 1, 3, 2])
+        and scale_value is not None
+        and check_attribute_value(nodes[6], "perm", [0, 2, 1, 3])
+        and check_attribute_value(nodes[9], "perm", [0, 2, 1, 3])
+        and matcher.get_consumer_count(nodes[14].output[0]) == 2
+    ):
+        return [], [], []
+
+    dtype, _ = matcher.get_type_and_shape(nodes[0].input[0])
+    assert dtype is not None
+    trans_q_tensor = helper.make_tensor_value_info("trans_q_" + str(idx), dtype, None)
+    trans_q_grad_tensor = helper.make_tensor_value_info("trans_q_grad_" + str(idx), dtype, None)
+    trans_k_tensor = helper.make_tensor_value_info("trans_k_" + str(idx), dtype, None)
+    trans_k_grad_tensor = helper.make_tensor_value_info("trans_k_grad_" + str(idx), dtype, None)
+    trans_q = helper.make_node(
+        "Transpose", [nodes[0].input[0]], [trans_q_tensor.name], "Trans_Q_" + str(idx), perm=[0, 2, 1, 3]
+    )
+    trans_q_grad = helper.make_node(
+        "Transpose", [trans_q_grad_tensor.name], [nodes[15].output[0]], "Trans_Q_Grad_" + str(idx), perm=[0, 2, 1, 3]
+    )
+    trans_k = helper.make_node(
+        "Transpose", [nodes[1].input[0]], [trans_k_tensor.name], "Trans_K_" + str(idx), perm=[0, 2, 1, 3]
+    )
+    trans_k_grad = helper.make_node(
+        "Transpose", [trans_k_grad_tensor.name], [nodes[17].output[0]], "Trans_K_Grad_" + str(idx), perm=[0, 2, 1, 3]
+    )
+    nodes[21].input[0] = nodes[20].input[1]
+    v_grad = nodes[21].output[0]
+    nodes[21].output[0] = nodes[20].output[0]
+    nodes[20].input[1] = nodes[20].output[0]
+    nodes[20].output[0] = v_grad
+    nodes_to_add, new_value_infos = _make_flash_attention_nodes(
+        idx,
+        trans_q_tensor.name,
+        trans_k_tensor.name,
+        nodes[6].input[0],
+        nodes[9].output[0],
+        nodes[19].input[0],
+        trans_q_grad_tensor.name,
+        trans_k_grad_tensor.name,
+        nodes[18].output[0],
+        nodes[3].input[1],
+        1 / float(scale_value[0] if isinstance(scale_value, list) else scale_value),
+    )
+    nodes_to_remove = nodes[:6] + nodes[9:20]
+    nodes_to_add.extend([trans_q, trans_q_grad, trans_k, trans_k_grad])
+    new_value_infos.extend([trans_q_tensor, trans_q_grad_tensor, trans_k_tensor, trans_k_grad_tensor])
+    return nodes_to_remove, nodes_to_add, new_value_infos
+
+
+# TODO: add pattern to support attention with causal mask, such as GPT2 in HuggingFace.
+_PATTERNS = [
+    (_PATTERN_0, _optimize_for_pattern_0),
+    (_PATTERN_1, _optimize_for_pattern_1),
+    (_PATTERN_2, _aptimize_for_pattern_2),
+]
+
+
+@register_graph_optimizer(devices="cuda")
+def optimize_graph_for_flash_attention(graph: GraphProto):
+    nodes_to_remove = []
+    nodes_to_add = []
+    new_value_infos = []
+    matcher = GraphMatcher(graph)
+    idx = 0
+    for pattern_tuple in _PATTERNS:
+        for nodes in matcher.match_pattern(pattern_tuple[0]):
+            remove_nodes, add_nodes, add_value_infos = pattern_tuple[1](matcher, idx, nodes)
+            if len(add_nodes) > 0:
+                nodes_to_remove.extend(remove_nodes)
+                nodes_to_add.extend(add_nodes)
+                new_value_infos.extend(add_value_infos)
+                idx += 1
+    update_graph(graph, nodes_to_remove, nodes_to_add, new_value_infos)
diff --git a/orttraining/orttraining/python/training/ort_triton/kernel/_slice_scel.py b/orttraining/orttraining/python/training/ort_triton/kernel/_slice_scel.py
index 8edcc9b63ef4f..fb7ddc68900c9 100644
--- a/orttraining/orttraining/python/training/ort_triton/kernel/_slice_scel.py
+++ b/orttraining/orttraining/python/training/ort_triton/kernel/_slice_scel.py
@@ -11,7 +11,7 @@
 import triton.language as tl
 from onnx import TensorProto, helper
 
-from onnxruntime.training.ortmodule import register_graph_transformer
+from onnxruntime.training.ortmodule import register_graph_optimizer
 
 from .._utils import get_attribute, to_numpy_array
 
@@ -246,8 +246,8 @@ def _get_shape_related_nodes(graph, start_arg, sub_graph_nodes):
                         args.append(output)
 
 
-@register_graph_transformer(devices="cuda")
-def transform_slice_scel(graph):
+@register_graph_optimizer(devices="cuda")
+def optimize_graph_for_slice_scel(graph):
     remove_nodes = []
     triton_nodes = []
     value_infos = []
diff --git a/orttraining/orttraining/python/training/ort_triton/triton_op_executor.py b/orttraining/orttraining/python/training/ort_triton/triton_op_executor.py
index b970c730d0441..1fe61750e651e 100644
--- a/orttraining/orttraining/python/training/ort_triton/triton_op_executor.py
+++ b/orttraining/orttraining/python/training/ort_triton/triton_op_executor.py
@@ -8,7 +8,7 @@
 import os
 import sys
 from types import ModuleType
-from typing import List, Tuple
+from typing import List, Tuple, Union
 
 import onnx
 from torch._C import _from_dlpack
@@ -18,8 +18,8 @@
 from ._codegen import codegen
 from ._op_config import get_supported_ops
 from ._sorted_graph import SortedGraph
-from ._sympy_utils import parse_shape
-from ._utils import gen_unique_name
+from ._sympy_utils import extract_shape_from_symbol, parse_shape
+from ._utils import gen_unique_name, next_power_of_2
 
 _DEBUG_MODE = "ORTMODULE_TRITON_DEBUG" in os.environ and int(os.getenv("ORTMODULE_TRITON_DEBUG")) == 1
 
@@ -31,11 +31,46 @@ def _gen_module_internal(sorted_graph: SortedGraph) -> Tuple[str, str, ModuleTyp
     return func_name, src_code, PyCodeCache().load(src_code)
 
 
-def _gen_key(onnx_key: int, onnx_str: bytes, shapes: List[List[int]]) -> int:
+class _ShapeCache:
+    """
+    Cache the shapes of the inputs. The inputs are the concrete shapes of inputs from each step for a given ONNX model.
+    For those dimensions that the concrete shape is not changed, we use the same concrete shape.
+    For those dimensions that the concrete shape is changed between different steps, we use a symbolic shape.
+    """
+
+    cache = dict()  # noqa: RUF012
+    clear = staticmethod(cache.clear)
+
+    @classmethod
+    def get_shape(cls, onnx_key: int, shapes: List[List[int]]) -> List[List[Union[int, str]]]:
+        if onnx_key not in cls.cache:
+            cls.cache[onnx_key] = shapes
+        else:
+            changed = False
+            for i, shape in enumerate(shapes):
+                for j, dim in enumerate(shape):
+                    if dim != cls.cache[onnx_key][i][j] and isinstance(cls.cache[onnx_key][i][j], int):
+                        max_dim = max(dim, cls.cache[onnx_key][i][j])
+                        shape[j] = f"i{i}_dim{j}_{next_power_of_2(max_dim)}"
+                        changed = True
+                    elif isinstance(cls.cache[onnx_key][i][j], str):
+                        pre = extract_shape_from_symbol(cls.cache[onnx_key][i][j])
+                        if pre >= dim:
+                            shape[j] = cls.cache[onnx_key][i][j]
+                        else:
+                            shape[j] = f"i{i}_dim{j}_{next_power_of_2(dim)}"
+                            changed = True
+            if changed:
+                cls.cache[onnx_key] = shapes
+        return cls.cache[onnx_key]
+
+
+def _gen_key(onnx_key: int, onnx_str: bytes, shapes: List[List[Union[int, str]]]) -> int:
+    # pylint: disable=unused-argument
     return hash(f"{onnx_key}|{str(shapes).replace(' ', '')}") % (10**8)
 
 
-def _gen_module(onnx_key: int, onnx_str: bytes, shapes: List[List[int]]) -> Tuple[str, ModuleType]:
+def _gen_module(onnx_key: int, onnx_str: bytes, shapes: List[List[Union[int, str]]]) -> Tuple[str, ModuleType]:
     model = onnx.load_model_from_string(onnx_str)
     sorted_graph = SortedGraph(model, [parse_shape(shape) for shape in shapes])
     if _DEBUG_MODE:
@@ -44,7 +79,7 @@ def _gen_module(onnx_key: int, onnx_str: bytes, shapes: List[List[int]]) -> Tupl
     func_name, src_code, mod = _gen_module_internal(sorted_graph)
     if _DEBUG_MODE:
         py_file_path = f"triton_debug/{func_name}_{onnx_key}.py"
-        with open(py_file_path, "w") as f:
+        with open(py_file_path, "w", encoding="UTF-8") as f:
             f.write(src_code)
     return func_name, mod
 
@@ -52,7 +87,8 @@ def _gen_module(onnx_key: int, onnx_str: bytes, shapes: List[List[int]]) -> Tupl
 def get_config() -> str:
     """
     Get the supported ops and other configs in JSON format to control the Triton fusion on backend side.
-    All supported ops are from _op_config.py. The Triton fusion will try to fuse subgraphs with connected supported ops.
+    All supported ops are from user config specified by env ORTMODULE_TRITON_CONFIG_FILE or from _op_config.py.
+    The Triton fusion will try to fuse subgraphs with connected supported ops.
     The initializer value can be "none", "scalar", and "all".
         "none": no initializer will be added to subgraphs.
         "scalar": only related scalar initializers will be added to subgraphs.
@@ -60,6 +96,11 @@ def get_config() -> str:
     The min_nodes is used to control the minimum number of non-no-op nodes in a subgraph.
     """
 
+    config_file = os.getenv("ORTMODULE_TRITON_CONFIG_FILE", "")
+    if config_file and os.path.exists(config_file):
+        with open(config_file, encoding="UTF-8") as f:
+            return f.read()
+
     config = {"ops": get_supported_ops(), "initializer": "scalar", "min_nodes": 2}
     return json.dumps(config)
 
@@ -90,7 +131,8 @@ def call_triton_by_onnx(onnx_key: int, onnx_str: bytes, *tensors):
     assert all(tensor is not None for tensor in tensors)
     torch_tensors = [_from_dlpack(tensor) for tensor in tensors]
     concrete_shapes = [list(tensor.size()) for tensor in torch_tensors]
-    func_name, mod = ModuleCache.load(_gen_key, _gen_module, onnx_key, onnx_str, concrete_shapes)
+    shapes = _ShapeCache.get_shape(onnx_key, concrete_shapes)
+    func_name, mod = ModuleCache.load(_gen_key, _gen_module, onnx_key, onnx_str, shapes)
     func = getattr(mod, func_name)
     output = func(*torch_tensors)
     if isinstance(output, tuple):
diff --git a/orttraining/orttraining/python/training/ortmodule/__init__.py b/orttraining/orttraining/python/training/ortmodule/__init__.py
index 59cf05bb082fc..fbf1b7c2bac42 100644
--- a/orttraining/orttraining/python/training/ortmodule/__init__.py
+++ b/orttraining/orttraining/python/training/ortmodule/__init__.py
@@ -124,7 +124,8 @@ def _are_deterministic_algorithms_enabled():
     return ORTMODULE_IS_DETERMINISTIC
 
 
-from .graph_transformer_registry import register_graph_transformer  # noqa: E402, F401
+from .graph_optimizer_registry import register_graph_optimizer  # noqa: E402, F401
+from .graph_optimizers import *  # noqa: E402, F403
 from .options import DebugOptions, LogLevel  # noqa: E402, F401
 
 # ORTModule must be loaded only after all validation passes
diff --git a/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_exporter.py b/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_exporter.py
index f75d553a5f460..8efbe16d7d61d 100644
--- a/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_exporter.py
+++ b/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_exporter.py
@@ -3,8 +3,10 @@
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
 
+from __future__ import annotations
+
 import sys
-from typing import Callable, ClassVar, Dict, Optional
+from typing import ClassVar
 
 import torch
 import torch.utils.checkpoint
@@ -12,54 +14,102 @@
 from packaging import version
 from torch.onnx import symbolic_helper
 
-from onnxruntime.capi._pybind_state import register_miscellaneous_const_input, register_torch_autograd_function
+from onnxruntime.capi._pybind_state import (
+    register_input_alias_function,
+    register_miscellaneous_const_input,
+    register_shape_inference_function,
+    register_torch_autograd_function,
+)
 from onnxruntime.training import ortmodule
-from onnxruntime.training.utils import pytorch_dtype_to_onnx
+from onnxruntime.training.utils import pytorch_scalar_type_to_pytorch_dtype, pytorch_type_to_onnx_dtype
 
 from ._custom_op_symbolic_registry import wrap_custom_export_function
 from ._fallback import ORTModuleONNXModelException, wrap_exception
 from ._utils import get_fully_qualified_class_name, get_runtime_pytorch_version
 
 
-class PythonOpShapeInferStore:
-    """A class to store shape inference functions for torch.autograd.Function."""
+class _SpecialCustomFunctionHandler:
+    """A class to handle high priority export of torch.autograd.Function.
+    `register_high_priority_handler` can be used as function decorator to register a handler for a torch.autograd.Function.
+    """
+
+    _HIGH_PRIORITY_EXPORT_HANDLER_MAP: ClassVar[dict[str, callable]] = {}
+
+    @staticmethod
+    def add_handler(func_name: str, handler: callable) -> None:
+        """Add a handler for a function name.
+
+        Args:
+            func_name (str): The function name.
+            handler (callable): The handler.
 
-    _CLASS_MAP: ClassVar[Dict[str, Callable]] = {}
+        """
+        _SpecialCustomFunctionHandler._HIGH_PRIORITY_EXPORT_HANDLER_MAP[func_name] = handler
 
-    @classmethod
-    def register(cls, kclass: torch.autograd.Function) -> None:
-        """Register a shape inference function for a torch.autograd.Function if there is staticmethod
-        "infer_shape" defined.
+    @staticmethod
+    def get_handler(func_name: str) -> callable | None:
+        """Get the handler for a function name.
 
-        The signature of the shape inference function should be:
-            @staticmethod
-            def infer_shape(
-                node: onnx.NodeProto,
-                tensor_input_shapes: List[Optional[List[Union[int, str]]]],
-                tensor_input_dtypes: List[torch.onnx.TensorProtoDataType],
-            ) -> Tuple[List[Optional[List[Union[int, str]]]], List[torch.onnx.TensorProtoDataType]]:
-                tensor_output_shapes = []
-                tensor_output_dtypes = []
-                ...
-                return tensor_output_shapes, tensor_output_dtypes
+        Args:
+            func_name (str): The function name.
 
-        The tensor_input_shapes and tensor_input_dtypes are lists of shapes and dtypes of the input tensors.
-        The tensor_output_shapes and tensor_output_dtypes are lists of shapes and dtypes of the output tensors.
-        Be noted: we only pass in tensor inputs, and return tensor outputs, non-tensor inputs/outputs are ignored.
+        Returns:
+            callable | None: The handler.
 
         """
-        kclass_name = get_fully_qualified_class_name(kclass)
-        if hasattr(kclass, "infer_shape") and kclass_name not in cls._CLASS_MAP:
-            cls._CLASS_MAP[kclass_name] = kclass.infer_shape
+        return _SpecialCustomFunctionHandler._HIGH_PRIORITY_EXPORT_HANDLER_MAP.get(func_name, None)
+
 
-    @classmethod
-    def register_func(cls, name: str, func: Callable) -> None:
-        """Register a shape inference function for a torch.autograd.Function by name."""
-        cls._CLASS_MAP[name] = func
+def register_high_priority_handler(func_name):
+    """Register a handler for a torch.autograd.Function using its full qualified class name."""
 
-    @classmethod
-    def get_shape_infer(cls, name: str) -> Optional[Callable]:
-        return cls._CLASS_MAP.get(name, None)
+    def symbolic_wrapper(fn):
+        _SpecialCustomFunctionHandler.add_handler(func_name, fn)
+        return fn
+
+    return symbolic_wrapper
+
+
+def register_custom_function_schema_supplementary(kclass: torch.autograd.Function) -> None:
+    """Register a shape inference function for a torch.autograd.Function if there is staticmethod
+    "infer_shape" defined.
+
+    The signature of the shape inference function should be:
+        @staticmethod
+        def infer_shape(
+            node: onnx.NodeProto,
+            tensor_input_shapes: List[Optional[List[Union[int, str]]]],
+            tensor_input_dtypes: List[torch.onnx.TensorProtoDataType],
+        ) -> Tuple[List[Optional[List[Union[int, str]]]], List[torch.onnx.TensorProtoDataType]]:
+            tensor_output_shapes = []
+            tensor_output_dtypes = []
+            ...
+            return tensor_output_shapes, tensor_output_dtypes
+
+    The tensor_input_shapes and tensor_input_dtypes are lists of shapes and dtypes of the input tensors.
+    The tensor_output_shapes and tensor_output_dtypes are lists of shapes and dtypes of the output tensors.
+    Be noted: we only pass in tensor inputs, and return tensor outputs, non-tensor inputs/outputs are ignored.
+
+
+    The signature of the alias input function should be:
+        @staticmethod
+        def alias_input(node_proto_str: str) -> Tuple[List[int], List[int]]:
+            fw_alias_map = [1, -1, -1]
+            bw_alias_map = [-1, 0]
+            return fw_alias_map, bw_alias_map
+
+    The alias input function should return a tuple of two lists:
+    - The first list is the forward alias map, its length is equal to the number of all outputs of the node.
+    - The second list is the backward alias map, its length is equal to the number of all inputs
+        (tensor and non-tensor) of the node.
+
+    """
+    kclass_name = get_fully_qualified_class_name(kclass)
+    if hasattr(kclass, "infer_shape"):
+        register_shape_inference_function(kclass_name, kclass.infer_shape)
+
+    if hasattr(kclass, "alias_input"):
+        register_input_alias_function(kclass_name, kclass.alias_input)
 
 
 """
@@ -91,6 +141,30 @@ def get_shape_infer(cls, name: str) -> Optional[Callable]:
 )
 
 
+def _get_training_mode() -> bool:
+    # TODO move to public API once the exporter team exposes that
+    training_mode = None
+    if get_runtime_pytorch_version() >= version.parse("1.12"):
+        # FIXME: using private modules
+        from torch.onnx import _globals
+
+        # before https://github.com/pytorch/pytorch/commit/c8b9b6266b505328e503b12f6a42fd88c56374f9,
+        # training_mode is still a bool type
+        if isinstance(_globals.GLOBALS.training_mode, bool):
+            training_mode = _globals.GLOBALS.training_mode
+        else:
+            if _globals.GLOBALS.training_mode not in [
+                torch.onnx.TrainingMode.EVAL,
+                torch.onnx.TrainingMode.TRAINING,
+            ]:
+                raise Exception(f"Unexpected training mode {_globals.GLOBALS.training_mode}")
+            training_mode = _globals.GLOBALS.training_mode == torch.onnx.TrainingMode.TRAINING
+    else:
+        training_mode = symbolic_helper._training_mode
+
+    return bool(training_mode)
+
+
 def _export_pt_1_10(g, n, *args, **kwargs):
     """Export torch.autograd.Function in ORT PythonOp.
 
@@ -109,6 +183,15 @@ def _export_pt_1_10(g, n, *args, **kwargs):
         func_class = n.pyobj().__self__
         func_full_qual_name = get_fully_qualified_class_name(func_class)
 
+        # Check if the function is handled by high priority exporter.
+        hi_pri_handler = _SpecialCustomFunctionHandler.get_handler(func_full_qual_name)
+        if hi_pri_handler:
+            try_export = hi_pri_handler(g, n, *args, **kwargs)
+            if try_export is not None:
+                return try_export
+
+        # Fall back to common exporter if not handled by high priority exporter.
+
         # Check if the checkpointing activation is allowed.
         is_ckpt_activation_allowed = ortmodule._defined_from_envvar("ORTMODULE_ALLOW_AUTOGRAD_CHECKPOINT", 0) == 1
         if is_ckpt_activation_allowed is False and func_full_qual_name in _UNSUPPORTED_CKPT_FUNC_NAMES:
@@ -118,27 +201,6 @@ def _export_pt_1_10(g, n, *args, **kwargs):
                 "wrap exportable sub-nn.Module's as ORTModule."
             )
 
-        inplace = kwargs["inplace"]
-        # TODO move to public API once the exporter team exposes that
-        training_mode = None
-        if get_runtime_pytorch_version() >= version.parse("1.12"):
-            # FIXME: using private modules
-            from torch.onnx import _globals
-
-            # before https://github.com/pytorch/pytorch/commit/c8b9b6266b505328e503b12f6a42fd88c56374f9,
-            # training_mode is still a bool type
-            if isinstance(_globals.GLOBALS.training_mode, bool):
-                training_mode = _globals.GLOBALS.training_mode
-            else:
-                if _globals.GLOBALS.training_mode not in [
-                    torch.onnx.TrainingMode.EVAL,
-                    torch.onnx.TrainingMode.TRAINING,
-                ]:
-                    raise Exception(f"Unexpected training mode {_globals.GLOBALS.training_mode}")
-                training_mode = _globals.GLOBALS.training_mode == torch.onnx.TrainingMode.TRAINING
-        else:
-            training_mode = symbolic_helper._training_mode
-
         cconv = n.cconv()
 
         input_tensor_types = []
@@ -175,7 +237,7 @@ def _export_pt_1_10(g, n, *args, **kwargs):
             if call_type == "d":
                 # Got a tensor variable.
                 tensor_args.append(arg)
-                scalar_type = pytorch_dtype_to_onnx(arg.type().scalarType())
+                scalar_type = pytorch_type_to_onnx_dtype(arg.type().scalarType())
                 input_tensor_types.append(scalar_type)
                 input_tensor_ranks.append(arg.type().dim())
                 continue
@@ -254,20 +316,19 @@ def _export_pt_1_10(g, n, *args, **kwargs):
         output_tensor_ranks = []
         for arg in n.outputs():
             # Type of tensor's elements.
-            scalar_type = pytorch_dtype_to_onnx(arg.type().scalarType())
+            scalar_type = pytorch_type_to_onnx_dtype(arg.type().scalarType())
             output_tensor_types.append(scalar_type)
             output_tensor_ranks.append(arg.type().dim())
 
         attrs = {
             "func_name_s": func_full_qual_name,
-            "inplace_i": inplace,
             "input_convention_s": cconv,
             "outputs": n.outputsSize(),
             "input_tensor_types_i": input_tensor_types,
             "input_tensor_ranks_i": input_tensor_ranks,
             "output_tensor_types_i": output_tensor_types,
             "output_tensor_ranks_i": output_tensor_ranks,
-            "training_mode_i": 1 if training_mode else 0,
+            "training_mode_i": 1 if _get_training_mode() else 0,
             "comment_s": debug_comment,
         }
 
@@ -301,8 +362,8 @@ def _export_pt_1_10(g, n, *args, **kwargs):
         # Register function with class names.
         register_torch_autograd_function(func_full_qual_name, func_class)
 
-        # Register shape inference function.
-        PythonOpShapeInferStore.register(func_class)
+        register_custom_function_schema_supplementary(func_class)
+
         return returned_args
     except Exception as e:
         sys.stdout.flush()
@@ -329,7 +390,76 @@ def post_process_enabling_autograd_function(exported_model: ModelProto) -> Model
                     op_name_prefix = kclass_name
                     break
 
-        node.name = f"{op_name_prefix}_id_{index}"
+            node.name = f"{op_name_prefix}_id_{index}"
         index += 1
 
     return exported_model
+
+
+@register_high_priority_handler("bitsandbytes.autograd._functions.MatMul4Bit")
+def _matmul4bit_export(g, n, *args, **kwargs):
+    cconv = n.cconv()
+    can_converted = (
+        len(cconv) >= 5
+        and cconv[0] == "d"
+        and cconv[1] == "d"
+        and cconv[2] == "c"
+        and cconv[3] == "c"
+        and cconv[4] == "c"
+    )
+    can_converted = can_converted and (args[2] is None and args[3] is None and args[4] is not None)
+    if not can_converted:
+        return None
+
+    quant_state = args[4]
+    if isinstance(quant_state, list):
+        # version <= 0.41.1
+        absmax, shape, dtype, blocksize, compressed_stats, quant_type, data_type = quant_state
+        nested = compressed_stats is not None
+    else:
+        # version > 0.41.1
+        absmax = quant_state.absmax
+        shape = quant_state.shape
+        blocksize = quant_state.blocksize
+        nested = quant_state.nested
+        quant_type = quant_state.quant_type
+
+    # MatMulBnb4's blocksize needs to be a power of 2 and not smaller than 16
+    if blocksize < 16 or blocksize & (blocksize - 1) != 0:
+        return None
+
+    # MatMulBnb4 does not support double de-quantization (e.g. absmax is int, needs to be dequantized too)
+    if nested:
+        return None
+
+    # The PyTorch linear weight shape is [out_feature, in_feature]
+    in_feature = shape[1]
+    out_feature = shape[0]
+    if quant_type == "fp4":
+        quant_type = 0
+    elif quant_type == "nf4":
+        quant_type = 1
+    else:
+        return None
+    attrs = {
+        "K_i": in_feature,
+        "N_i": out_feature,
+        "block_size_i": blocksize,
+        "quant_type_i": quant_type,
+        "training_mode_i": 1 if _get_training_mode() else 0,
+    }
+
+    # Make sure the quant weight can be flatten to 1D tensor safely, which com.microsoft::MatMulBnb4 requires.
+    found_dim1 = any(v == 1 for v in args[1].type().sizes())
+    if not found_dim1:
+        return None
+
+    absmax = g.op(
+        "Constant",
+        value_t=torch.tensor(absmax, dtype=pytorch_scalar_type_to_pytorch_dtype(args[0].type().scalarType())),
+    )
+    quant_weight = g.op(
+        "Reshape", args[1], g.op("Constant", value_t=torch.tensor([-1], dtype=torch.int64))
+    )  # flatten to 1D
+    tensor_args = [args[0], quant_weight, absmax]
+    return g.op("com.microsoft::MatMulBnb4", *tensor_args, **attrs)
diff --git a/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_runner.py b/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_runner.py
index a5b96c4e37140..dd32e2aced561 100644
--- a/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_runner.py
+++ b/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_runner.py
@@ -3,6 +3,7 @@
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
 
+
 import sys
 import warnings
 from collections import OrderedDict
@@ -14,10 +15,21 @@
 from onnxruntime.training.ortmodule.torch_cpp_extensions import torch_interop_utils
 
 from ._fallback import ORTModuleFallbackException, ORTModuleIOError, _FallbackManager, wrap_exception  # noqa: F401
+from ._utils import get_rank
+
+
+def _log_warning(message: str):
+    """Configure the logger for PythonOp runner according to following rules.
+    1. If multiple processes are used, the rank will be appended
+       to the logger name.
+    2. The logger will be disabled for non-zero ranks.
+    """
+    if get_rank() == 0:
+        warnings.warn(f"[rank-{get_rank()}] {message}")
 
 
 class CustomFuncOpKernelInfo:
-    """Store the kernel specific information retrieved with the first-time run."""
+    """Store the kernel-specific information retrieved with the first-time run."""
 
     def __init__(self, kernel_invoke_id: str):
         # kernel_invoke_id is a string contains session thread id, op kernel creation time stamp in ms, a random int,
@@ -31,9 +43,9 @@ def __init__(self, kernel_invoke_id: str):
         # reference, may release the content of the tensor before it is needed in backward). Once
         # `autograd.Function.apply` completes, by checking the existence of the tensor in the saved_tensors,
         # `_GlobalOpKernelInfoMap` is updated to save the input indices that are saved in context.
-        # 2. For the subsequent runs, if the input index is in `input_indices_to_save_in_ctx`, the tensor
+        # 2. For the subsequent runs, if the input index is in `tensor_input_indices_to_save_in_ctx`, the tensor
         # will be cloned before fed into `autograd.Function.apply` as input.
-        self.input_indices_to_save_in_ctx: List[int] = []
+        self.tensor_input_indices_to_save_in_ctx: Optional[List[int]] = None
 
         # To align with PyTorch `ctx.set_materialize_grads(False|True)``
         # materialize_grads_config is a map from output index to (device, dtype, shape) of the output tensor, used
@@ -41,27 +53,224 @@ def __init__(self, kernel_invoke_id: str):
         self.materialize_grads: bool = False
         self.materialize_grads_config: Optional[Dict[int, Tuple[torch.device, torch.dtype, torch.shape]]] = None
 
+        # For the tensors generated from ORT backend, there is special handling here:
+        # 1. For the first time run for the kernel (the uniqueness of the kernel is defined by kernel_invoke_id),
+        # all such tensors will be cloned (with gradient) in case they are marked as dirty (if not cloned, but marked
+        # as dirty, PyTorch will complain the tensor is a leaf, should not be used for inplace update). Once
+        # `autograd.Function.apply` completes, by checking the existence of the tensor in the dirty_tensors,
+        # `_GlobalOpKernelInfoMap` is updated to save the input indices that are marked as dirty.
+        # 2. For the subsequent runs, if the input index is in `tensor_input_indices_for_mark_dirty`, the tensor
+        # will be cloned (with gradient) before fed into `autograd.Function.apply` as input.
+        self.tensor_input_indices_for_mark_dirty: Optional[List[int]] = None
+
+        # A list of output indices that needs to be clone before returned, due to inplace update analysis.
+        self.output_indices_for_clone: Optional[List[int]] = None
 
-# Store the kernel specific information that cannot be retrieved and saved by PyTorch exporter.
-# For those infos that can only be retrieved with real run, we try to collect them in the first time run.
+
+# Store the kernel-specific information that cannot be retrieved and saved by PyTorch exporter.
+# For the infos that can only be retrieved with real run, we try to collect them in the first time run.
 # key: kernel_invoke_id, value: CustomFuncOpKernelInfo.
 _GlobalOpKernelInfoMap: Dict[str, CustomFuncOpKernelInfo] = {}
 
 
+def _process_inplace_outputs(
+    kernel_info: CustomFuncOpKernelInfo,
+    func_name: str,
+    input_tensors_of_kernel_run: Dict[int, Union[torch.Tensor, None]],
+    all_outputs_of_kernel_run: List[Union[torch.Tensor, any]],
+    all_outputs_to_tensor_inputs_reuse_map: List[int],
+    raw_input_tensors_used_inplace: Dict[int, Union[torch.Tensor, None]],
+    is_backward=False,
+):
+    """Special handling for in-place reusing in forward or backward.
+
+    Args:
+        kernel_info: kernel-specific information.
+        func_name: name of the autograd.Function.
+        input_tensors_of_kernel_run: all tensor input tensors used to run the autograd.Function forward/backward.
+        all_outputs_of_kernel_run: all outputs of the autograd.Function forward/backward.
+        all_outputs_to_tensor_inputs_reuse_map: a list of the same length of kernel outputs, each element representing
+            which input index it is reusing. If there is no reuse, the value is -1.
+        raw_input_tensors_used_inplace: a dict of raw input tensors marked as inplace in
+            `all_outputs_to_tensor_inputs_reuse_map`, the key is the tensor input index, value is the raw input tensor.
+        is_backward: indicates if this is backward or forward.
+
+    Procedures:
+    1. Detect all outputs to tensor inputs reuse mapping.
+    2. Validate the detected inplace_map with the registered inplace_map in ORT. For the output tensor,
+        2.0 If the reuse mapping value is the same in both inplace_map and detected inplace_map:
+            2.0.1 Most likely, we don't need to do anything, except 2.0.2.
+            2.0.2 Conditions:
+                > During forward run,
+                > The output tensor is reusing one of input tensors,
+                > The raw input tensor to be reused given from ORT is copied to run the forward kernels
+                    (for two possible reasons:
+                    a. the first time forward run, all inputs will be copied to detect
+                    `tensor_input_indices_to_save_in_ctx`;
+                    b. for every iteration, the input needs to be cloned because it is in
+                    `tensor_input_indices_to_save_in_ctx`).
+
+                In this case, need to copy the output tensor back to the raw input tensor, to make it compatible with
+                ORT statistically planned buffer reuse.
+        2.1 If the reuse mapping value is NOT equal in both inplace_map and detected inplace_map:
+            2.1.1 If the detected reuse input index is -1 (e.g. there is NO buffer reuse for this output),
+                while user specified reuse input index is NOT -1 (ORT planned the reuse), we raise an error.
+            2.1.2 If the detected reuse input index is NOT -1 (e.g. there is buffer reuse for this output),
+                while user specified reuse input index is -1 (ORT did not plan the reuse). We will try to clone the
+                output tensor before returning to ORT, to align with ORT's NO Buffer reuse plan; otherwise, once the
+                input buffer is released by ORT memory planner, the output tensor read/write will be corrupted.
+                Raise a warning to notify users to update inplace_map explicitly for performance consideration.
+            2.1.3 Other cases (for example user gives a wrong mapping index compared with detected ones), raise an
+                error.
+    3. Do copies for 2.1.2 cases.
+    4. Do copies for 2.0.2 cases.
+    """
+
+    log_prefix = f"{func_name}->{'Backward' if is_backward else 'Forward'}: "
+    input_tensor_address_list = [
+        t.data_ptr() if isinstance(t, torch.Tensor) else -1 for t in input_tensors_of_kernel_run.values()
+    ]
+    if is_backward:
+        input_tensor_address_list = [-1, *input_tensor_address_list]  # skip the context input
+
+    is_first_time_init = kernel_info.output_indices_for_clone is None
+    # If this is the first time run, collect runtime tensor reuse mapping.
+    if is_first_time_init:
+        # Procedure 1: Detect all outputs to tensor inputs reuse mapping, according to `all_outputs_of_kernel_run` and
+        # `input_tensors_of_kernel_run`.
+        assert len(all_outputs_to_tensor_inputs_reuse_map) == len(all_outputs_of_kernel_run), (
+            f"{log_prefix}all_outputs_to_tensor_inputs_reuse_map and kernel run outputs should have the same length."
+            f"all_outputs_to_tensor_inputs_reuse_map: {all_outputs_to_tensor_inputs_reuse_map}, "
+            f"kernel run outputs: {all_outputs_of_kernel_run}"
+        )
+
+        # Detect all outputs to tensor inputs reuse mapping.
+        detected_reuse_map = [-1] * (len(all_outputs_of_kernel_run))
+        for output_index, arg in enumerate(all_outputs_of_kernel_run):
+            if not isinstance(arg, torch.Tensor):
+                continue
+            if arg.data_ptr() in input_tensor_address_list:
+                input_index = input_tensor_address_list.index(arg.data_ptr())
+                detected_reuse_map[output_index] = input_index
+
+        # Procedure 2: Validate the detected inplace_map with the registered inplace_map in ORT.
+        output_indices_for_clone = (
+            []
+        )  # collect the output indices that need to be cloned before returned in case 2.1.2.
+        for output_index, (detected_inplace_index, inplace_index) in enumerate(
+            zip(detected_reuse_map, all_outputs_to_tensor_inputs_reuse_map)
+        ):
+            if inplace_index == detected_inplace_index:
+                continue
+
+            if (
+                inplace_index in raw_input_tensors_used_inplace
+                and raw_input_tensors_used_inplace[inplace_index] is None
+            ):
+                # Use specified inplace input index, but the input tensor is None, which means the input is not
+                # a tensor, so we don't do further checks.
+                continue
+
+            # If users register inplace_map (alloc planner will do buffer reuse),
+            # but detected inplace_map indicates it is NO inplace reusing, we raise an error.
+            if inplace_index != -1 and detected_inplace_index == -1:
+                raise RuntimeError(
+                    f"{log_prefix}Fatal: "
+                    f"ONNX Op attribute 'tensor_reuse_map' indicates {output_index}-th output is reusing input "
+                    f"{inplace_index}, but detected inplace_map indicates it is NOT reusing any input. "
+                    "Please update inplace_map explicitly to make it consistent "
+                    f"to avoid undefined behavior due to ORT's memory reuse plan. "
+                    f"inplace_map: {all_outputs_to_tensor_inputs_reuse_map}, "
+                    f"detected inplace_map: {detected_reuse_map}"
+                )
+
+            if inplace_index == -1 and detected_inplace_index != -1:
+                output_indices_for_clone.append(output_index)
+                continue
+
+            raise RuntimeError(
+                f"{log_prefix}Fatal: "
+                f"ONNX Op attribute 'inplace_map' indicates {inplace_index}-th output is reusing "
+                f"input index {detected_inplace_index}, but detected inplace_map indicates it is reusing "
+                f"input index {inplace_index}. Please update inplace_map explicitly to avoid undefined behavior "
+                f"due to memory reuse. inplace_map: {all_outputs_to_tensor_inputs_reuse_map}, "
+                f"detected inplace_map: {detected_reuse_map}"
+            )
+
+        kernel_info.output_indices_for_clone = output_indices_for_clone
+
+    assert kernel_info.output_indices_for_clone is not None
+
+    # Procedure 3: Do copies for 2.1.2 cases.
+    for output_index in kernel_info.output_indices_for_clone:
+        _log_warning(
+            f"{log_prefix}ONNX Op attribute "
+            f"'tensor_reuse_map' doesn't indicate {output_index}-th output is reusing any input, "
+            f"but detected inplace_map indicates it is reusing some input index. "
+            "A clone will be done before returning to ORT, to align with ORT's NO Buffer reuse plan. "
+            "Please update inplace_map explicitly to avoid such a copy."
+        )
+        all_outputs_of_kernel_run[output_index] = all_outputs_of_kernel_run[output_index].detach().clone()
+
+    # Procedure 4: Do copies for 2.0.2 cases.
+    if is_backward is False and (
+        is_first_time_init
+        or kernel_info.tensor_input_indices_to_save_in_ctx
+        or kernel_info.tensor_input_indices_for_mark_dirty
+    ):
+        for raw_tensor_input_index, raw_input_tensor in raw_input_tensors_used_inplace.items():
+            # raw_input_tensor can be None for backward run, but backward won't go here.
+            if not isinstance(raw_input_tensor, torch.Tensor):
+                continue
+
+            # We did not do the check with tensor_input_indices_to_save_in_ctx/tensor_input_indices_for_mark_dirty
+            # because even for those tensor indices not in
+            # tensor_input_indices_to_save_in_ctx/tensor_input_indices_for_mark_dirty, we still need to do the
+            # copy for the first-time run.
+            if raw_input_tensor.data_ptr() == input_tensor_address_list[raw_tensor_input_index]:
+                # If the raw input tensor is not copied, we don't need this handling.
+                continue
+
+            copied = False  # for each tensor, we don't do the copy once.
+            output_indices_reusing_current_raw_input = [
+                output_index
+                for output_index, input_index in enumerate(all_outputs_to_tensor_inputs_reuse_map)
+                if input_index == raw_tensor_input_index
+            ]
+            output_tensor_address = all_outputs_of_kernel_run[output_indices_reusing_current_raw_input[0]].data_ptr()
+            for output_index in output_indices_reusing_current_raw_input:
+                assert (
+                    output_tensor_address == all_outputs_of_kernel_run[output_index].data_ptr()
+                ), "Outputs reusing the same input tensor should have the same address."
+
+                if not copied:
+                    # Only need a copy once.
+                    # Inplace copy only happens for non-leaf variables, so we have to set requires_grad to False.
+                    raw_input_tensor.requires_grad = False
+                    raw_input_tensor.copy_(all_outputs_of_kernel_run[output_index])
+                    _log_warning(
+                        f"{log_prefix}Copy output tensor {output_index} to raw input tensor {raw_tensor_input_index}. "
+                        f"{'Provide output to input reuse mapping to avoid the copy overhead.' if not is_first_time_init else ''}"
+                    )
+                    copied = True
+
+                all_outputs_of_kernel_run[output_index] = raw_input_tensor
+
+
 def _get_context(forward_tensor_outputs: List[torch.Tensor]) -> Tuple[any, Optional[torch.Tensor]]:
     """Search for context among all outputs.
 
-    Note1: All forward outputs of torch.autograd.Function shared the same gradient function pointer,
+    Note 1: All forward outputs of torch.autograd.Function shared the same gradient function pointer,
         so here we just get the first tensor having grad_fn attribute.
         (https://github.com/PyTorch/PyTorch/blob/15532595209d2daf34d35e10f8d3d3b64966aea2/torch/csrc/autograd/custom_function.cpp#L267)
 
-    Note2: Context can be None because NOT all torch.autograd.Function's are differentiable. The function
+    Note 2: Context can be None because NOT all torch.autograd.Function's are differentiable. The function
         https://github.com/PyTorch/PyTorch/blob/d701357d921ef167d42c125e65b6f7da6be3ad0f/torch/csrc/autograd/custom_function.cpp#L209?
-        means if all output of forward function is not differentiable, then grad_fn will be None (not be set).
+        means if all output of the forward function is not differentiable, then grad_fn will be None (not be set).
 
         For example,
             class Bar(torch.autograd.Function):
-                # A non-differentiable autograd Function whose forard output
+                # A non-differentiable autograd Function whose forward output
                 # doesn't have grad_fn attribute.
                 @staticmethod
                 def forward(ctx, x):
@@ -85,7 +294,7 @@ def backward(ctx, dy):
             continue
 
         if arg.grad_fn is None:
-            # For following case, it is possible grad_fn exist, but its value is None,
+            # For the following case, it is possible grad_fn exists, but its value is None,
             # so we need to continue to search for the first tensor having a non-None grad_fn.
             #
             # >>> w = torch.randn(5, 6)
@@ -106,9 +315,10 @@ def backward(ctx, dy):
     return (ctx, first_tensor_output)
 
 
-def _finalize_traing_mode_forward(
+def _finalize_training_mode_forward(
     kernel_invoke_id: str,
-    input_tensors_from_ort: Dict[int, torch.Tensor],
+    func_name: str,
+    input_tensors_used_for_fw_run: Dict[int, torch.Tensor],
     forward_output_tensors: List[Union[torch.Tensor, None]],
 ):
     """Complete the epilogue of forward runner for training mode.
@@ -120,16 +330,25 @@ def _finalize_traing_mode_forward(
 
     Things to do:
     1. Try to get context from forward output tensors.
-    2. Remove the gradient functions between current autograd.Function and its input's gradient function, because
+    2. Remove the gradient functions between the current autograd.Function and its input's gradient function, because
        in ORT we don't depend on PyTorch's autograd engine.
     3. Register the current autograd.Function's gradient function into our PyNodeSharedPointerPool.
-    4. Save kernel specific information into _GlobalOpKernelInfoMap in the first-time kernel run.
+    4. Save kernel-specific information into _GlobalOpKernelInfoMap in the first-time kernel run.
     """
 
     ctx, tensor_owning_ctx = _get_context(forward_output_tensors)
 
+    kernel_info = _GlobalOpKernelInfoMap[kernel_invoke_id]
+
     # ctx being None in training mode means the forward function is not differentiable, so backward is not needed.
     if ctx is None:
+        # If this is the first time run, collect kernel-specific information.
+        if kernel_info.tensor_input_indices_to_save_in_ctx is None:
+            kernel_info.tensor_input_indices_to_save_in_ctx = []
+
+        if kernel_info.tensor_input_indices_for_mark_dirty is None:
+            kernel_info.tensor_input_indices_for_mark_dirty = []
+
         return None
 
     # Filter out the None in the saved_tensors.
@@ -137,19 +356,20 @@ def _finalize_traing_mode_forward(
 
     ctx.fw_kernel_invoke_id = kernel_invoke_id
 
-    # If this is the first time run, collect kernel specific information.
-    if kernel_invoke_id not in _GlobalOpKernelInfoMap:
-        kernel_info = CustomFuncOpKernelInfo(kernel_invoke_id)
-        _GlobalOpKernelInfoMap[kernel_invoke_id] = kernel_info
+    # If this is the first time run, collect kernel-specific information.
+    if kernel_info.tensor_input_indices_to_save_in_ctx is None:
+        kernel_info.tensor_input_indices_to_save_in_ctx = []
         if len(saved_tensors):
-            # Check tensors generated by ORT is in the saved_tensors or not.
+            # Check tensors generated by ORT are in the saved_tensors or not.
             # If yes, save the input index of the tensor in the _GlobalOpKernelInfoMap.
-            kernel_info.input_indices_to_save_in_ctx = [
-                arg_index
-                for arg_index, tensor in input_tensors_from_ort.items()
+            kernel_info.tensor_input_indices_to_save_in_ctx = [
+                tensor_input_index
+                for tensor_input_index, tensor in input_tensors_used_for_fw_run.items()
                 if any(tensor is saved_tensor for saved_tensor in saved_tensors)
             ]
-            warnings.warn("Add input index to _GlobalOpKernelInfoMap, to avoid extra copy in every iteration.")
+            _log_warning(
+                f"{func_name}: Add input index to _GlobalOpKernelInfoMap, to avoid extra copy in every iteration."
+            )
         kernel_info.materialize_grads = torch_interop_utils.get_materialize_grads(tensor_owning_ctx)
         kernel_info.materialize_grads_config = OrderedDict()
         if kernel_info.materialize_grads:
@@ -161,6 +381,22 @@ def _finalize_traing_mode_forward(
                         tensor.shape,
                     )
 
+    if kernel_info.tensor_input_indices_for_mark_dirty is None:
+        kernel_info.tensor_input_indices_for_mark_dirty = []
+        # Check tensors generated by ORT are marked as dirty(for inplace update) or not.
+        # If yes, save the input index of the tensor in the _GlobalOpKernelInfoMap.
+        are_tensors_marked_as_dirty = torch_interop_utils.are_tensors_marked_as_dirty(
+            tensor_owning_ctx, [t for t in input_tensors_used_for_fw_run.values()]
+        )
+        kernel_info.tensor_input_indices_for_mark_dirty = [
+            tensor_input_index
+            for is_dirty, (tensor_input_index, tensor) in zip(
+                are_tensors_marked_as_dirty, input_tensors_used_for_fw_run.items()
+            )
+            if is_dirty is True
+        ]
+        _log_warning(f"{func_name}: Add input index to _GlobalOpKernelInfoMap, to support leaf node do inplace update.")
+
     #         FORWARD                                                    BACKWARD FUNCTION CONNECTIONS
     # input_1 (leaf, constructed by from_dlpack)   <----reference----  AccumulateGrad gradient function
     #             ↓                                                                 ↑
@@ -188,8 +424,9 @@ def call_python_forward_function(
     requires_grad_flags: List[bool],
     tensor_type_flags: List[int],
     is_training_mode: bool,
-    inplace: bool,
+    inplace_map: List[int],
     kernel_invoke_id: str,
+    func_name: Union[bytes, str],
     *args,
 ):
     """
@@ -206,93 +443,126 @@ def call_python_forward_function(
         requires_grad_flags: requires_grad_flags[i] indicates if the i-th arg needs gradient.
         tensor_type_flags: tensor_type_flags[i] indicates the type of the i-th arg, 0 - non-tensor, 1 - tensor.
         is_training_mode: indicates if this model is running under training mode.
-        inplace: indicates if args can be modified inside the custom function.
+        inplace_map: a list of the same length of kernel outputs, each element represents which input index
+          it is reusing. If there is no reuse, the value is -1.
         args: inputs to "backward_function".
     """
 
-    def generate_non_leaf_or_not(grad_flag, tensor_flag, arg, is_training_mode, is_inplace):
-        if is_training_mode and tensor_flag and grad_flag and is_inplace:
-            # "multiply one" helps change the torch tensor's is_leaf to False.
-            # This is required when the torch tensor is updated in-place during forward pass.
-            # We cannot use view here, because PyTorch handles grad_fn for view differently.
-            non_leaf_arg = arg * 1
-            return non_leaf_arg
-        else:
-            return arg
-
     try:
-        wrapped_args = []
-        tensor_input_args_map = OrderedDict()
+        func_name = func_name.decode("utf-8") if isinstance(func_name, bytes) else func_name
+        # If this is the first time run, collect runtime tensor reuse mapping.
+        is_first_time_run = kernel_invoke_id not in _GlobalOpKernelInfoMap
+        if is_first_time_run:
+            kernel_info = CustomFuncOpKernelInfo(kernel_invoke_id)
+            _GlobalOpKernelInfoMap[kernel_invoke_id] = kernel_info
+
+        kernel_info = _GlobalOpKernelInfoMap[kernel_invoke_id]
 
-        # Be noted: in inference mode, we won't insert any information into _GlobalOpKernelInfoMap, because ctx
-        # will always be None in the first time run.
-        input_indices_to_save_in_ctx = None  # Uninitialized
-        if kernel_invoke_id in _GlobalOpKernelInfoMap:
-            input_indices_to_save_in_ctx = _GlobalOpKernelInfoMap[kernel_invoke_id].input_indices_to_save_in_ctx
+        tensor_input_indices_to_save_in_ctx = kernel_info.tensor_input_indices_to_save_in_ctx
+        tensor_input_indices_for_mark_dirty = kernel_info.tensor_input_indices_for_mark_dirty
 
-        for arg_index, (grad_flag, tensor_flag, arg) in enumerate(zip(requires_grad_flags, tensor_type_flags, args)):
+        # Collect the tensor address for all inputs used for run forward, used for reuse detection.
+        tensor_input_index = 0
+        # If the input is reused, we need to save the raw input tensor for special handling.
+        raw_input_tensors_used_inplace = OrderedDict()  # Orders matter here.
+        input_tensors_used_for_fw_run = OrderedDict()  # Orders matter here.
+
+        wrapped_args = []
+        for _, (grad_flag, tensor_flag, arg) in enumerate(zip(requires_grad_flags, tensor_type_flags, args)):
             if tensor_flag:
                 # Assume it's a DLPack tensor and convert it to PyTorch tensor.
-                # Note1:
-                #   If it's first-time kernel invocation, input_indices_to_save_in_ctx is None, we do the
-                #   copy for all tensor. Otherwise, we only copy the tensors whose indices are in
-                #   input_indices_to_save_in_ctx.
-                #
-                # Note2:
-                #   For inference mode, we don't need do the copy because ctx will be None,
-                #   so nothing will be saved for ctx.
-                if is_training_mode and (
-                    input_indices_to_save_in_ctx is None or arg_index in input_indices_to_save_in_ctx
-                ):
-                    wrapped_arg = from_dlpack(arg).detach().clone()
-                else:
-                    wrapped_arg = from_dlpack(arg)
+                wrapped_arg = from_dlpack(arg)
+
+                if tensor_input_index in inplace_map:
+                    raw_input_tensors_used_inplace[tensor_input_index] = wrapped_arg
 
                 # Only requires gradient when running under training mode
                 # and the associated tensor has grad_flag=True (i.e.,
                 # "requires_grad=True" in the original PyTorch script).
                 wrapped_arg.requires_grad = is_training_mode and grad_flag
+
+                # Note1:
+                #   If it's first-time kernel invocation, tensor_input_indices_to_save_in_ctx is None, we do the
+                #   copy for all tensors. Otherwise, we only copy the tensors whose indices are in
+                #   tensor_input_indices_to_save_in_ctx.
+                # Note2:
+                #   For inference mode, we don't need to do the copy because ctx will be None,
+                #   so nothing will be saved for ctx.
+                # Note3:
+                # To fix this issue:
+                # "a leaf Variable that requires grad has been used in an in-place operation."
+                # If it's first-time kernel invocation, tensor_input_indices_for_mark_dirty is None, we do the
+                # copy for all tensors to generate grad for it. Otherwise, we only clone (to generate grad) for
+                # the tensors whose indices are in tensor_input_indices_for_mark_dirty.
+                if is_training_mode:
+                    if is_first_time_run:
+                        with torch.set_grad_enabled(True):
+                            wrapped_arg = wrapped_arg.clone()
+                    else:
+                        is_input_index_saved_in_ctx = (
+                            tensor_input_indices_to_save_in_ctx is None
+                            or tensor_input_index in tensor_input_indices_to_save_in_ctx
+                        )
+                        is_input_index_marked_dirty = (
+                            tensor_input_indices_for_mark_dirty is None
+                            or tensor_input_index in tensor_input_indices_for_mark_dirty
+                        )
+                        if is_input_index_saved_in_ctx or is_input_index_marked_dirty:
+                            # when with grad, the leaf tensor after clone will not be leaf.
+                            with torch.set_grad_enabled(is_input_index_marked_dirty):
+                                wrapped_arg = wrapped_arg.clone()
+                            wrapped_arg.requires_grad = is_training_mode and grad_flag
+
                 wrapped_args.append(wrapped_arg)
-                tensor_input_args_map[arg_index] = wrapped_arg
+                input_tensors_used_for_fw_run[tensor_input_index] = wrapped_arg
 
+                tensor_input_index += 1
             else:
                 # Use non-tensor as is. It's a PyObject*.
                 wrapped_args.append(arg)
 
         with torch.set_grad_enabled(is_training_mode):
-            # Another level of wrap to avoid requires_grad=True for leaf variables.
-            new_wrapped_args = list(
-                generate_non_leaf_or_not(grad_flag, tensor_flag, arg, is_training_mode, inplace)
-                for grad_flag, tensor_flag, arg in zip(requires_grad_flags, tensor_type_flags, wrapped_args)
-            )
-
             # Run autograd.Function.apply(...).
-            # TODO(pengwa): looks we are assuming all outputs will be either Tensor or None.
+            # TODO(pengwa): looks like we are assuming all outputs will be either Tensor or None.
             # We should revisit if it is possible to support other types of output, for example int, or, etc.
-            # But that might also requires some work in backend.
-            result = forward_function(*new_wrapped_args)
+            # But that might also require some work in backend.
+            result = forward_function(*wrapped_args)
 
-            # Extract results as DLPack tensors plus autograd context. Also skips all None values.
+            results = []
             if isinstance(result, torch.Tensor):
-                ctx = None
-                if is_training_mode:
-                    ctx = _finalize_traing_mode_forward(kernel_invoke_id, tensor_input_args_map, [result])
-                unwrapped_values = [ctx, to_dlpack(result)]
+                results = [result]
             elif isinstance(result, (tuple, list)):
-                ctx = None
-                if is_training_mode:
-                    ctx = _finalize_traing_mode_forward(kernel_invoke_id, tensor_input_args_map, result)
-                wrapped = [ctx]
-                wrapped.extend(list(to_dlpack(value) if value is not None else None for value in result))
-                # Inside the returned list, first element is context and the rest
-                # are DLPack tensors.
-                unwrapped_values = wrapped
+                results = [r for r in result]
             else:
                 raise wrap_exception(
                     ORTModuleIOError,
                     TypeError(f"ORTModule does not support the following model output type {type(result)}."),
                 )
-        return tuple(unwrapped_values)
+
+            ctx = None
+            if is_training_mode:
+                ctx = _finalize_training_mode_forward(
+                    kernel_invoke_id, func_name, input_tensors_used_for_fw_run, results
+                )
+
+            final_rets = [ctx]
+            final_rets.extend(results)
+
+            _process_inplace_outputs(
+                kernel_info,
+                func_name,
+                input_tensors_used_for_fw_run,
+                final_rets,
+                inplace_map,
+                raw_input_tensors_used_inplace,
+            )
+
+            dlpacks = [final_rets[0]]
+            dlpacks.extend(list(to_dlpack(value) if value is not None else None for value in final_rets[1:]))
+
+            # Inside the returned list, the first element is context and the rest
+            # are DLPack tensors.
+        return tuple(dlpacks)
     except Exception as e:
         # Flush buffers. Otherwise, calling this from C++ may lose them.
         print("Exception happens when running ", forward_function)
@@ -306,8 +576,9 @@ def call_python_backward_function(
     requires_grad_flags: List[bool],
     tensor_type_flags: List[int],
     is_training_mode: bool,
-    inplace: bool,
+    inplace_map: List[int],
     kernel_invoke_id: str,
+    func_name: Union[bytes, str],
     *args,
 ):
     """
@@ -319,11 +590,13 @@ def call_python_backward_function(
     Args:
         backward_function: pointer to autograd.Function.backward (e.g., MyReLU.backward).
         requires_grad_flags: requires_grad_flags[i] indicates if the i-th arg needs gradient.
-        tensor_type_flags: tensor_type_flagsi] indicates the type of the i-th arg.
+        tensor_type_flags: tensor_type_flags[i] indicates the type of the i-th arg.
         is_training_mode: indicates if this model is running under training mode.
-        inplace: indicates if args can be modified inside the custom function.
+        inplace_map: a list of the same length of kernel outputs, each element represents which input index
+          it is reusing. If there is no reuse, the value is -1.
         args: inputs to "backward_function".
     """
+    func_name = func_name.decode("utf-8") if isinstance(func_name, bytes) else func_name
     with torch.no_grad():
 
         def wrap_all_outputs(result):
@@ -338,6 +611,13 @@ def wrap_all_outputs(result):
                 )
 
         try:
+            # If this is the first time run, collect runtime tensor reuse mapping.
+            if kernel_invoke_id not in _GlobalOpKernelInfoMap:
+                kernel_info = CustomFuncOpKernelInfo(kernel_invoke_id)
+                _GlobalOpKernelInfoMap[kernel_invoke_id] = kernel_info
+
+            kernel_info = _GlobalOpKernelInfoMap[kernel_invoke_id]
+
             # Backward inputs should not require gradients.
             assert all(grad_flag == 0 for grad_flag in requires_grad_flags)
 
@@ -345,6 +625,12 @@ def wrap_all_outputs(result):
             ctx = args[0]
             fw_kernel_invoke_id = ctx.fw_kernel_invoke_id
             wrapped_args = []
+
+            # Collect the tensor address for all inputs used for run backward, used for reuse detection.
+            tensor_input_index = 1  # skip the context input
+            # If input is reused, we need to save the raw input tensor for special handling.
+            raw_input_tensors_used_inplace = OrderedDict()  # Orders matter here.
+            input_tensors_used_for_bw_run = OrderedDict()  # Orders matter here.
             for grad_input_index, (grad_flag, tensor_flag, arg) in enumerate(
                 zip(requires_grad_flags, tensor_type_flags, args)
             ):
@@ -358,16 +644,28 @@ def wrap_all_outputs(result):
                             wrapped_arg = torch.zeros(shape, device=device, dtype=dtype)
                         else:
                             wrapped_arg = arg
+
+                        if grad_input_index in inplace_map:
+                            raw_input_tensors_used_inplace[tensor_input_index] = arg
+
                     else:
                         # Assume it's a DLPack tensor# and convert it to PyTorch tensor.
                         wrapped_arg = from_dlpack(arg)
 
+                        if grad_input_index in inplace_map:
+                            raw_input_tensors_used_inplace[tensor_input_index] = wrapped_arg
+
+                    # This may include None values.
+                    input_tensors_used_for_bw_run[tensor_input_index] = wrapped_arg
+
                     if wrapped_arg is not None:
                         # Only requires gradient when running under training mode
                         # and the associated tensor has grad_flag=True (i.e.,
                         # "requires_grad=True" in the original PyTorch script).
                         wrapped_arg.requires_grad = is_training_mode and grad_flag
+
                     wrapped_args.append(wrapped_arg)
+                    tensor_input_index += 1
                 else:
                     # Use non-tensor as is. It's a PyObject*.
                     wrapped_args.append(arg)
@@ -386,6 +684,16 @@ def wrap_all_outputs(result):
                     TypeError(f"ORTModule does not support the following model output type {type(result)}."),
                 )
 
+            _process_inplace_outputs(
+                kernel_info,
+                func_name,
+                input_tensors_used_for_bw_run,
+                result,
+                inplace_map,
+                raw_input_tensors_used_inplace,
+                is_backward=True,
+            )
+
             wrapped_returned_args = wrap_all_outputs(result)
 
             torch_interop_utils.unregister_grad_fn(id(ctx))
diff --git a/orttraining/orttraining/python/training/ortmodule/_custom_gradient_registry.py b/orttraining/orttraining/python/training/ortmodule/_custom_gradient_registry.py
index 156c3e001d88f..77317242727b4 100644
--- a/orttraining/orttraining/python/training/ortmodule/_custom_gradient_registry.py
+++ b/orttraining/orttraining/python/training/ortmodule/_custom_gradient_registry.py
@@ -271,8 +271,3 @@ def upsample_nearest2d_gradient():
 @register_gradient("org.pytorch.aten", "ATen", "upsample_nearest3d", "vec")
 def upsample_nearest3d_gradient():
     return _upsample_gradient("upsample_nearest3d_backward", 3)
-
-
-@register_gradient("org.pytorch.aten", "ATen", "upsample_bilinear2d", "vec")
-def upsample_bilinear2d_gradient():
-    return _upsample_gradient("upsample_bilinear2d_backward", 2)
diff --git a/orttraining/orttraining/python/training/ortmodule/_custom_op_symbolic_registry.py b/orttraining/orttraining/python/training/ortmodule/_custom_op_symbolic_registry.py
index 0dd33d493b0d1..99e8851b6a697 100644
--- a/orttraining/orttraining/python/training/ortmodule/_custom_op_symbolic_registry.py
+++ b/orttraining/orttraining/python/training/ortmodule/_custom_op_symbolic_registry.py
@@ -12,7 +12,7 @@
 from torch.onnx import register_custom_op_symbolic
 from torch.onnx.symbolic_helper import _get_tensor_dim_size, _get_tensor_sizes, parse_args
 
-from onnxruntime.training.utils import pytorch_dtype_to_onnx
+from onnxruntime.training.utils import pytorch_type_to_onnx_dtype
 
 from ._utils import get_runtime_pytorch_version
 
@@ -129,7 +129,7 @@ def cross_entropy_loss(g, node, logits, target, weight, reduction, ignore_index,
         output_type = logits_casted.type()
     else:
         # For higher version torch we can get node output types
-        loss_output = list(node.outputs())[0]
+        loss_output = next(iter(node.outputs()))
         output_type = loss_output.type()
     ##################################
 
@@ -145,7 +145,7 @@ def cross_entropy_loss(g, node, logits, target, weight, reduction, ignore_index,
         weight_casted,
         ignore_index,
         reduction_s=reduction,
-        output_type_i=pytorch_dtype_to_onnx(output_type.scalarType()),
+        output_type_i=pytorch_type_to_onnx_dtype(output_type.scalarType()),
         outputs=2,
     )
     output.setType(output_type)
@@ -808,16 +808,3 @@ def upsample_nearest2d(g, input, output_size, scale_factors):
 @register_symbolic("upsample_nearest3d")
 def upsample_nearest3d(g, input, output_size, scale_factors):
     return _upsample_nearest(g, input, output_size, scale_factors, "upsample_nearest3d")
-
-
-@register_symbolic("upsample_bilinear2d")
-def upsample_bilinear2d(g, input, output_size, align_corners, scale_factors):
-    return g.op(
-        "org.pytorch.aten::ATen",
-        input,
-        output_size,
-        align_corners,
-        scale_factors,
-        operator_s="upsample_bilinear2d",
-        overload_name_s="vec",
-    )
diff --git a/orttraining/orttraining/python/training/ortmodule/_execution_agent.py b/orttraining/orttraining/python/training/ortmodule/_execution_agent.py
index 533fea5a0a721..7a89aadee9950 100644
--- a/orttraining/orttraining/python/training/ortmodule/_execution_agent.py
+++ b/orttraining/orttraining/python/training/ortmodule/_execution_agent.py
@@ -3,6 +3,8 @@
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
 
+from typing import Tuple
+
 import onnxruntime
 from onnxruntime.capi import _pybind_state as C
 from onnxruntime.capi._pybind_state import TrainingAgent as C_TrainingAgent
@@ -161,3 +163,13 @@ def run_backward(self, feeds, fetches, state):
         :param state: State of the graph that is used for executing partial graph runs.
         """
         self._training_agent.run_backward(feeds, fetches, state)
+
+    def get_serialized_ortmodule_memory_stat(
+        self, memory_optimization_config: str, recompute_probe_level: str
+    ) -> Tuple[str, dict]:
+        """
+        Get serialized memory stats for OrtModule.
+        """
+        return self._training_agent.get_serialized_ortmodule_memory_stat(
+            memory_optimization_config, recompute_probe_level
+        )
diff --git a/orttraining/orttraining/python/training/ortmodule/_fallback.py b/orttraining/orttraining/python/training/ortmodule/_fallback.py
index 44f96dcff7fb0..56bb45d064d8a 100644
--- a/orttraining/orttraining/python/training/ortmodule/_fallback.py
+++ b/orttraining/orttraining/python/training/ortmodule/_fallback.py
@@ -175,9 +175,9 @@ def fallback(self, log_level: _logger.LogLevel, *inputs, **kwargs):
 
             # This warning will not be raised again if retry is not enabled
             self._logger.warning(
-                "Fallback to PyTorch due to exception {} was triggered. "
+                f"Fallback to PyTorch due to exception {exception_type} was triggered. "
                 "Report this issue with a minimal repro at https://www.github.com/microsoft/onnxruntime. "
-                "See details below:\n\n{}".format(exception_type, exception_string)
+                f"See details below:\n\n{exception_string}"
             )
 
             self._raised_fallback_exception = True
diff --git a/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager.py b/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager.py
index dfaac5f0fa836..26993dec17ccf 100755
--- a/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager.py
+++ b/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager.py
@@ -19,7 +19,7 @@
 import onnxruntime
 from onnxruntime.capi import _pybind_state as C
 from onnxruntime.tools.symbolic_shape_infer import SymbolicShapeInference
-from onnxruntime.training.utils import ORTModelInputOutputSchemaType, onnx_dtype_to_pytorch
+from onnxruntime.training.utils import ORTModelInputOutputSchemaType, PTable, onnx_dtype_to_pytorch_dtype
 from onnxruntime.training.utils.hooks import configure_ort_compatible_zero_stage3
 
 from . import _are_deterministic_algorithms_enabled, _io, _logger, _onnx_models, _utils
@@ -36,6 +36,7 @@
 from ._io import _FlattenedModule, _InputInfo
 from ._runtime_inspector import RuntimeInspector
 from ._utils import check_function_has_param, get_rank
+from ._zero_stage3_compatibility import stage3_export_context
 from .options import DebugOptions, LogLevel, _RuntimeOptions
 from .torch_cpp_extensions.cpu.aten_op_executor import load_aten_op_executor_cpp_extension
 
@@ -90,7 +91,8 @@ def __init__(
         self._first_skip_check_warning = True
 
         # Inspector for runtime information, for example input data, memory usage, etc.
-        self._runtime_inspector = RuntimeInspector(self._logger)
+        self._runtime_inspector = RuntimeInspector(self._logger, self._original_module)
+        self._runtime_inspector.memory_ob.enable_memory_stats_by_step(self._runtime_options.print_memory_stat_by_step)
 
         # Tracker for ORTModule model export, session creation overhead.
         self.time_tracker = _logger.TimeTracker()
@@ -143,9 +145,6 @@ def __init__(
         self._zero_stage3_param_map = {}
         if self._runtime_options.enable_zero_stage3_support:
             # Cannot toggle feature enabling/disabling after the first time enabled.
-            from onnxruntime.training.utils.hooks._zero_offload_subscriber import _get_all_zero_stage3_params
-
-            self._zero_stage3_param_map = _get_all_zero_stage3_params(self._flattened_module)
 
             configure_ort_compatible_zero_stage3(debug=False, stats_output_dir="ort_output", stats_overwrite=True)
 
@@ -244,12 +243,6 @@ def _get_session_config(self):
         # 0:Verbose, 1:Info, 2:Warning. 3:Error, 4:Fatal. Default is 2.
         session_options.log_severity_level = int(self._debug_options.logging.log_level)
 
-        session_options.add_session_config_entry(
-            "optimization.enable_memory_optimizer", self._runtime_options.memory_optimizer_config
-        )
-        session_options.add_session_config_entry(
-            "optimization.enable_memory_probe_recompute_level", self._runtime_options.probe_level
-        )
         # Disable weight prepacking
         session_options.add_session_config_entry("session.disable_prepacking", "1")
 
@@ -320,7 +313,8 @@ def _get_exported_model(self, input_schema: ORTModelInputOutputSchemaType, *inpu
         """
 
         # VERBOSE -> FULL export verbose log + FULL torch other logs from stdout and stderr (C++ backend)
-        # INFO -> FULL export verbose log + FILTERED torch other logs from stdout and stderr (C++ backend)
+        # DEVINFO -> FULL export verbose log + FULL torch other logs from stdout and stderr (C++ backend)
+        # INFO -> [Rank 0] FULL export verbose log + FILTERED torch other logs from stdout and stderr (C++ backend)
         # WARNING/ERROR -> [Rank 0] NO export verbose log + FILTERED torch other logs from stdout and stderr (C++ backend)
         # Be noted: rank 0 log only is controlled by logger configured in _logger.py
         torch_exporter_verbose_log = self._debug_options.logging.log_level <= LogLevel.INFO
@@ -348,7 +342,7 @@ def _get_exported_model(self, input_schema: ORTModelInputOutputSchemaType, *inpu
                 cache_dir, f"{hash_fn(str(self._flattened_module).encode()).hexdigest()}_{get_rank()}.onnx"
             )
             if os.path.exists(cache_dir) and os.path.isfile(filename):
-                self._logger.info(
+                self._logger.warning(
                     f"Cached model detected! Cached model will be used to save export and initialization time."
                     f"If you want the model to be re-exported then DELETE {filename}."
                 )
@@ -370,7 +364,7 @@ def _get_exported_model(self, input_schema: ORTModelInputOutputSchemaType, *inpu
         assert self._export_mode is not None, "Please use a concrete instance of ExecutionManager"
 
         try:
-            with torch.no_grad():
+            with torch.no_grad(), stage3_export_context(self._runtime_options.enable_zero_stage3_support, self):
                 required_export_kwargs = {
                     "input_names": self._input_info.names,
                     "output_names": output_names,
@@ -475,7 +469,6 @@ def _get_graph_transformer_config(self) -> C.TrainingGraphTransformerConfigurati
         return graph_transformer_config
 
     @_logger.TrackTime(_logger.ORTModuleInitPhase.GRAPH_BUILDER_INIT)
-    @_logger.SuppressLogs(_logger.ORTModuleInitPhase.GRAPH_BUILDER_INIT)
     def _initialize_graph_builder(self):
         """Creates a new OrtModuleGraphBuilder, initializes it and saves it to self._graph_builder"""
 
@@ -568,7 +561,6 @@ def _enable_conditional_optimizations(
            enable sparsity-based optimization.
 
         """
-
         # Enable data sparsity inspection if sparse optimizer is ON or user wants to print input density.
         if self._runtime_options.enable_sparse_optimizer or self._runtime_options.print_input_density:
             self._runtime_inspector.enable_input_inspector(
@@ -615,9 +607,6 @@ def _enable_conditional_optimizations(
             if not self._runtime_options.print_input_density:
                 self._runtime_inspector.disable_input_inspector()
 
-        if self._runtime_options.print_memory_stat:
-            self._runtime_inspector.enable_memory_inspector(self._original_module)
-
     def _append_pull_weight_trigger_as_input(self, kwargs: Dict, device: torch.device):
         from ._zero_stage3_compatibility import (
             STAGE3_PULL_WEIGHT_TRIGGER_NAME,
@@ -627,7 +616,7 @@ def _append_pull_weight_trigger_as_input(self, kwargs: Dict, device: torch.devic
 
         kwargs[STAGE3_PULL_WEIGHT_TRIGGER_NAME] = torch.zeros(
             STAGE3_PULL_WEIGHT_TRIGGER_OUTPUT_SHAPE,
-            dtype=onnx_dtype_to_pytorch(STAGE3_PULL_WEIGHT_TRIGGER_OUTPUT_DTYPE),
+            dtype=onnx_dtype_to_pytorch_dtype(STAGE3_PULL_WEIGHT_TRIGGER_OUTPUT_DTYPE),
             device=device,
         ).requires_grad_()
 
@@ -637,105 +626,141 @@ def _log_feature_stats(self):
         if get_rank() != 0:
             return
 
-        feature_map: List[Tuple[str, bool, str]] = [
-            ("ATen Executor", True, "Dispatch ATen operators to ORT's ATen executor"),
-            (
+        if self._runtime_inspector.memory_ob.is_enabled() and self._debug_options.log_level <= LogLevel.DEVINFO:
+            self._logger.info(self._runtime_inspector.memory_ob.memory_optimization_opportunity_table_str)
+
+        tbl = PTable()
+
+        def _add_record(tbl, columns):
+            return tbl.add_row([columns[0], ":", "ON" if columns[1] else "OFF", ":", columns[2]])
+
+        notes = []
+
+        _add_record(tbl, ["ATen Executor", True, "Dispatch ATen operators to ORT's ATen executor"])
+        _add_record(
+            tbl,
+            [
                 "Cast Propagation",
                 self._runtime_options.propagate_cast_ops_level > 0,
                 f"Level {self._runtime_options.propagate_cast_ops_level} enabled",
-            ),
-            (
+            ],
+        )
+        _add_record(
+            tbl,
+            [
                 "Custom Function",
                 self._runtime_options.enable_custom_autograd_function,
                 "Support custom torch.autograd.Function export and execution",
-            ),
-            (
-                "Memory Optimizer",
-                len(self._runtime_options.memory_optimizer_config) > 0,
-                "Enable with env ORTMODULE_MEMORY_OPT_CONFIG=<config>",
-            ),
-        ]
+            ],
+        )
 
-        # Add compute optimizer
-        feature_map.extend(
+        output_memory_optimization_details = self._debug_options.log_level <= LogLevel.INFO
+        mem_row = _add_record(
+            tbl,
             [
+                "Memory Optimizer",
+                len(self._runtime_options.memory_optimizer_config) > 0,
                 (
-                    "Compute Optimizer",
-                    self._runtime_options.enable_compute_optimizer,
-                    "Enable/Disable with env ORTMODULE_ENABLE_COMPUTE_OPTIMIZER=1/0",
-                ),
-                (
-                    " -FLOPReduction",
-                    self._runtime_options.enable_compute_optimizer,
-                    "Reduce FLOPs by upstreaming shrinking-sized ops",
+                    f"User config: {self._runtime_options.memory_optimizer_config}, probe level: {self._runtime_options.probe_level}"
+                    if len(self._runtime_options.memory_optimizer_config) > 0
+                    else "Enable with env ORTMODULE_MEMORY_OPT_CONFIG=<config>"
                 ),
-            ]
+            ],
+        )
+
+        if self._runtime_inspector.memory_ob.is_enabled() and output_memory_optimization_details:
+            mem_notes, mem_tbl = self._runtime_inspector.memory_ob.display_memory_optimization_plans(
+                self._runtime_options.memory_optimizer_config
+            )
+            if mem_tbl is not None:
+                mem_row.append_annotation_table(mem_tbl)
+                notes.extend(mem_notes)
+
+        _add_record(
+            tbl,
+            [
+                "Compute Optimizer",
+                self._runtime_options.enable_compute_optimizer,
+                "Enable/Disable with env ORTMODULE_ENABLE_COMPUTE_OPTIMIZER=1/0",
+            ],
+        )
+        _add_record(
+            tbl,
+            [
+                " - FLOPReduction",
+                self._runtime_options.enable_compute_optimizer,
+                "Reduce FLOPs by upstreaming shrinking-sized ops",
+            ],
         )
 
         if self._runtime_options.enable_compute_optimizer:
             if len(self._runtime_options.label_sparsity_ratio) > 0:
-                feature_map.append(
-                    (" -LabelSparsityOpt", True, f"Input density: {self._runtime_options.label_sparsity_ratio}")
+                _add_record(
+                    tbl, [" - LabelSparsityOpt", True, f"Input density: {self._runtime_options.label_sparsity_ratio}"]
                 )
 
             if len(self._runtime_options.embed_sparsity_ratio) > 0:
-                feature_map.append(
-                    (" -EmbedSparsityOpt", True, f"Input density: {self._runtime_options.embed_sparsity_ratio}")
+                _add_record(
+                    tbl, [" - EmbedSparsityOpt", True, f"Input density: {self._runtime_options.embed_sparsity_ratio}"]
                 )
 
         # Add fallback
-        feature_map.append(
-            (
+        _add_record(
+            tbl,
+            [
                 "Auto Fallback",
                 self._runtime_options.fallback_policy is not _FallbackPolicy.FALLBACK_DISABLE,
                 "Fallback to PyTorch when encountering unsupported ops",
-            )
+            ],
         )
 
-        if self._runtime_options.enable_triton:
-            feature_map.append(
-                (
-                    "TritonOp Enabled",
-                    True,
-                    "ORT will switch to Triton for executing some ops to further accelerate training.",
-                )
-            )
+        # Add Triton
+        _add_record(
+            tbl,
+            [
+                "TritonOp Enabled",
+                self._runtime_options.enable_triton,
+                "ORT will switch to Triton for executing some ops to further accelerate training.",
+            ],
+        )
 
         if self._runtime_options.enable_tuning:
             desc = "Enable tunning Ops online"
             if self._runtime_options.tuning_results_path:
                 desc += f", save tuning results to {self._runtime_options.tuning_results_path}"
-            feature_map.append(("Online Op Tuning", True, desc))
+            _add_record(tbl, ["Online Op Tuning", True, desc])
         elif self._runtime_options.tuning_results_path:
-            feature_map.append(
-                (
+            _add_record(
+                tbl,
+                [
                     "Offline Op Tuning",
                     True,
                     f"Use offline tuning results from {self._runtime_options.tuning_results_path}",
-                )
+                ],
             )
 
-        feature_map.append(
-            (
+        _add_record(
+            tbl,
+            [
                 "ZeRO Stage3 Support",
                 self._runtime_options.enable_zero_stage3_support,
                 "Enable/Disable with env ORTMODULE_ENABLE_ZERO_STAGE3=1/0",
-            )
+            ],
         )
 
         mode = "training" if self._export_mode == torch.onnx.TrainingMode.TRAINING else "inference"
         mode = f"{_logger.LogColor.UNDERLINE}{mode}{_logger.LogColor.ENDC}"
-
-        stat = f"\n\n{_logger.LogColor.HEADER}***** ONNX Runtime Training (ORTModule) is accelerating your model *****{_logger.LogColor.ENDC}\n\n"
+        stat = f"\n{_logger.LogColor.HEADER}***** ONNX Runtime Training (ORTModule) is accelerating your model *****{_logger.LogColor.ENDC}\n\n"
         stat += f"ORTModule is enabled with following features ON/OFF for [{mode}] mode:\n\n"
-        for feature_tuple in feature_map:
-            switch_str = "ON" if feature_tuple[1] else "OFF"
-            stat += f"{feature_tuple[0]:<20}:\t{switch_str:<10}:\t{feature_tuple[2]:<80}\n"
+        stat += tbl.get_string() + "\n"
 
         # Collect ORTModule overheads for different phases.
         stat += f"\n{self.time_tracker.to_string(self._debug_options.logging.log_level < LogLevel.WARNING)}\n"
-
         stat += f"Versions: ONNX Runtime - {onnxruntime.__version__}, ONNX - {onnx.__version__}\n\n"
-        stat += f"{_logger.LogColor.HEADER}************************************************************************{_logger.LogColor.ENDC}\n\n"
 
+        # Add notes
+        for index, note in enumerate(notes):
+            stat += f"Note {index + 1}: {note}\n"
+
+        stat += f"\n{_logger.LogColor.HEADER}************************************************************************{_logger.LogColor.ENDC}\n\n"
         self._logger.warning(stat)
diff --git a/orttraining/orttraining/python/training/ortmodule/_inference_manager.py b/orttraining/orttraining/python/training/ortmodule/_inference_manager.py
index 8d8be81c549d1..6690af9b71bf1 100644
--- a/orttraining/orttraining/python/training/ortmodule/_inference_manager.py
+++ b/orttraining/orttraining/python/training/ortmodule/_inference_manager.py
@@ -16,7 +16,7 @@
 from ._fallback import ORTModuleFallbackException, _FallbackManager, _FallbackPolicy
 from ._graph_execution_manager import GraphExecutionManager, _RunStateInfo
 from ._io import unflatten_user_output
-from ._logger import ORTModuleInitPhase, SuppressLogs, TrackTime
+from ._logger import ORTModuleInitPhase, TrackTime
 from ._utils import save_tuning_results, set_tuning_results
 from .options import DebugOptions, _SkipCheck
 
@@ -207,7 +207,6 @@ def forward(self, *inputs, **kwargs):
             return self._fallback_manager.fallback(self._debug_options.logging.log_level, *inputs, **kwargs)
 
     @TrackTime(ORTModuleInitPhase.BUILD_GRAPH)
-    @SuppressLogs(ORTModuleInitPhase.BUILD_GRAPH)
     def _build_graph(self, graph_transformer_config):
         """Build an inference graph using the module_graph_builder"""
 
@@ -221,7 +220,6 @@ def _build_graph(self, graph_transformer_config):
             )
 
     @TrackTime(ORTModuleInitPhase.CREATE_SESSION)
-    @SuppressLogs(ORTModuleInitPhase.CREATE_SESSION)
     def _create_execution_agent(self):
         """Creates an InferenceAgent that can run forward graph on an inference model"""
 
diff --git a/orttraining/orttraining/python/training/ortmodule/_io.py b/orttraining/orttraining/python/training/ortmodule/_io.py
index e7c1b30daae0d..f5fbd5093fca3 100644
--- a/orttraining/orttraining/python/training/ortmodule/_io.py
+++ b/orttraining/orttraining/python/training/ortmodule/_io.py
@@ -206,10 +206,11 @@ def _expand_inputs(current_input, non_none_inputs, name=""):
     _expand_inputs(inputs, non_none_inputs)
     flattened_kwargs_inputs = {}
     _expand_inputs(kwargs, flattened_kwargs_inputs)
-    buffer_names_dict = {buffer_name: inp for buffer_name, inp in named_buffer}
+    buffer_names_dict = None
     result = []
     embed_sparsity_results = OrderedDict()
     label_sparsity_results = OrderedDict()
+    onnx_input_to_value_map = OrderedDict()
 
     for input_idx, name in enumerate(onnx_input_names):
         inp = None
@@ -232,6 +233,8 @@ def _expand_inputs(current_input, non_none_inputs, name=""):
 
         if inp is None:
             # Registered buffers are translated to user_input+initializer in ONNX
+            if buffer_names_dict is None:
+                buffer_names_dict = {buffer_name: i for buffer_name, i in named_buffer}
             try:  # noqa: SIM105
                 inp = buffer_names_dict[name]
             except KeyError:
@@ -249,6 +252,8 @@ def _expand_inputs(current_input, non_none_inputs, name=""):
                 if label_density < 100:
                     label_sparsity_results[name] = label_density
             result.append(inp)
+
+            onnx_input_to_value_map[name] = inp
         else:
             raise wrap_exception(
                 ORTModuleONNXModelException, RuntimeError(f"Input is present in ONNX graph but not provided: {name}.")
@@ -262,6 +267,10 @@ def _expand_inputs(current_input, non_none_inputs, name=""):
     else:
         result.extend(params)
 
+    if rt_inspector.memory_ob.is_enabled() and not rt_inspector.memory_ob.symbolic_dim_collecting_completed:
+        rt_inspector.memory_ob.collect_symbolic_dim_values(input_info.dynamic_axes, onnx_input_to_value_map)
+        rt_inspector.memory_ob.symbolic_dim_collecting_completed = True
+
     return result, embed_sparsity_results, label_sparsity_results
 
 
diff --git a/orttraining/orttraining/python/training/ortmodule/_logger.py b/orttraining/orttraining/python/training/ortmodule/_logger.py
index e075ced8eaac2..a01db28374b8d 100644
--- a/orttraining/orttraining/python/training/ortmodule/_logger.py
+++ b/orttraining/orttraining/python/training/ortmodule/_logger.py
@@ -21,15 +21,18 @@
 
 class LogLevel(IntEnum):
     VERBOSE = 0
-    INFO = 1
-    WARNING = 2
-    ERROR = 3
-    FATAL = 4
+    DEVINFO = 1  # For ORT developers.
+    INFO = 2  # For ORT users.
+    WARNING = 3
+    ERROR = 4
+    FATAL = 5
 
 
 ORTMODULE_LOG_LEVEL_MAP: Dict[LogLevel, List[int]] = {
     LogLevel.VERBOSE: [Severity.VERBOSE, logging.DEBUG],
-    LogLevel.INFO: [Severity.INFO, logging.INFO],
+    LogLevel.DEVINFO: [Severity.INFO, logging.INFO],
+    # ONNX Runtime has too many INFO logs, so we map it to WARNING for a better user experience.
+    LogLevel.INFO: [Severity.WARNING, logging.INFO],
     LogLevel.WARNING: [Severity.WARNING, logging.WARNING],
     LogLevel.ERROR: [Severity.ERROR, logging.ERROR],
     LogLevel.FATAL: [Severity.FATAL, logging.FATAL],
@@ -48,13 +51,13 @@ def configure_ortmodule_logger(log_level: LogLevel) -> logging.Logger:
     """Configure the logger for ortmodule according to following rules.
     1. If multiple processes are used, the rank will be appended
        to the logger name.
-    2. If the log level is greater than info, the logger will be
+    2. If the log level is equal to or greater than INFO, the logger will be
        disabled for non-zero ranks.
     """
     rank_info = f".rank-{get_rank()}" if get_world_size() > 1 else ""
     logger = logging.getLogger(f"orttraining{rank_info}")
-    # Disable the logger for non-zero ranks when level > info
-    logger.disabled = log_level > LogLevel.INFO and get_rank() != 0
+    # Disable the logger for non-zero ranks when level >= INFO
+    logger.disabled = log_level >= LogLevel.INFO and get_rank() != 0
     logger.setLevel(ortmodule_loglevel_to_python_loglevel(log_level))
     return logger
 
@@ -260,7 +263,7 @@ def wrapper(graph_execution_manager, *args, **kwargs):
                 raise RuntimeError("The class of the function to be tracked must have a '_debug_options' attribute.")
 
             with _suppress_os_stream_output(
-                enable=graph_execution_manager._debug_options.log_level >= LogLevel.INFO,
+                enable=graph_execution_manager._debug_options.log_level >= LogLevel.DEVINFO,
                 on_exit=partial(
                     _log_with_filter,
                     graph_execution_manager._logger,
diff --git a/orttraining/orttraining/python/training/ortmodule/_runtime_inspector.py b/orttraining/orttraining/python/training/ortmodule/_runtime_inspector.py
index dda909e8cb0f1..cfd2e25e13e26 100644
--- a/orttraining/orttraining/python/training/ortmodule/_runtime_inspector.py
+++ b/orttraining/orttraining/python/training/ortmodule/_runtime_inspector.py
@@ -5,12 +5,18 @@
 
 from enum import IntEnum
 from logging import Logger
-from typing import List, Tuple, Union
+from typing import Dict, List, Optional, Tuple, Union
 
 import onnx
 import torch
 from onnx import ModelProto, helper
 from onnx import onnx_pb as onnx_proto
+from sympy import Symbol, simplify
+from sympy.parsing.sympy_parser import parse_expr
+
+from onnxruntime.training.utils import PTable
+
+from ._execution_agent import TrainingAgent
 
 
 class Phase(IntEnum):
@@ -39,11 +45,11 @@ class RuntimeInspector:
     Runtime inspector for ORTModule.
     """
 
-    def __init__(self, logger: Logger):
+    def __init__(self, logger: Logger, module: torch.nn.Module):
         self._logger = logger
 
         self.input_density_ob: Union[InputDensityObserver, None] = None
-        self.memory_ob: Union[MemoryObserver, None] = None
+        self.memory_ob = MemoryObserver(module, self._logger)
 
     def enable_input_inspector(self, model: ModelProto, user_input_names: List[str]) -> None:
         """Initialize input inspector from the given ONNX model and user input names.
@@ -82,26 +88,6 @@ def disable_input_inspector(self) -> None:
         """Disable input density inspector."""
         self.input_density_ob = None
 
-    def enable_memory_inspector(self, module: torch.nn.Module):
-        """Enable memory inspector for ORTModule.
-
-        Args:
-            module: ORTModule.
-        """
-        if self.memory_ob is None:
-            self.memory_ob = MemoryObserver(module, self._logger)
-        else:
-            raise RuntimeError("Memory observer is already enabled.")
-
-    def inspect_memory(self, phase: Phase) -> None:
-        """Inspect memory usage and print statistics.
-
-        Args:
-            phase: Phase to inspect.
-        """
-        if self.memory_ob is not None:
-            self.memory_ob.inspect_memory(phase)
-
 
 class InputDensityObserver:
     """Training input data observer for ORTModule.
@@ -460,6 +446,16 @@ def _try_get_initializer_value(self, model, name):
         return value
 
 
+class MemoryOptimizationSummary:
+    """Memory optimization summary for a cluster id combination."""
+
+    def __init__(self, saving_str="", simplified_saving_expr=None, evaluated_saving=None, freq=0):
+        self.raw_symbolic_saving_str = saving_str
+        self.simplified_symbolic_saving_expr: Optional[Symbol] = simplified_saving_expr
+        self.evaluated_saving: Union[str, int, None] = evaluated_saving
+        self.freq = freq
+
+
 class MemoryObserver:
     """Memory inspector across the training lifetime.
 
@@ -472,6 +468,19 @@ class MemoryObserver:
 
     def __init__(self, m: torch.nn.Module, logger: Logger):
         self._logger = logger
+        self._is_enabled = True
+
+        # Memory optimization related.
+        self.memory_optimization_opportunity_table_str = None
+        self.cluster_id_combination_to_saving_symbolics_map: Dict[str, MemoryOptimizationSummary] = {}
+        ## The value is a list of symbolic dim values parsed from the first batch.
+        self.symbolic_dim_name_to_value_map: Dict = {}
+
+        ## Used to control only the first batch is used to collect symbolic dim values.
+        self.symbolic_dim_collecting_completed = False
+
+        # For per-step memory inspection.
+        self._print_memory_stats_by_step = False
         self._current_step = 0
         self._rank = 0
         self._world_size = 1
@@ -485,8 +494,77 @@ def __init__(self, m: torch.nn.Module, logger: Logger):
 
         self._is_first_inspect = True
 
+    def is_enabled(self) -> bool:
+        """Check if memory inspector is enabled."""
+        return self._is_enabled
+
+    def enable_memory_stats_by_step(self, print_memory_stats_by_step: bool):
+        # For per-step memory inspection.
+        self._print_memory_stats_by_step = print_memory_stats_by_step
+
+    def collect_symbolic_dim_values(
+        self,
+        onnx_input_name_to_dynamic_axes_map: Dict[str, Dict[int, str]],
+        onnx_input_to_value_map: Dict[str, torch.Tensor],
+    ):
+        """Collect symbolic dim values."""
+        for input_name, dynamic_axes in onnx_input_name_to_dynamic_axes_map.items():
+            if input_name in onnx_input_to_value_map:
+                for dim_idx, dim_name in dynamic_axes.items():
+                    self.symbolic_dim_name_to_value_map[Symbol(dim_name)] = onnx_input_to_value_map[input_name].size()[
+                        dim_idx
+                    ]
+
+    def find_memory_optimization_opportunity(
+        self, execution_agent: TrainingAgent, memory_optimizer_config, probe_level
+    ):
+        """Find memory optimization opportunity.
+
+        Args:
+            execution_agent: TrainingAgent.
+            memory_optimizer_config: Memory optimization config.
+            probe_level: Memory probe level.
+        """
+        (
+            self.memory_optimization_opportunity_table_str,
+            memory_optimization_saving_symbolics,
+        ) = execution_agent.get_serialized_ortmodule_memory_stat(memory_optimizer_config, probe_level)
+
+        cluster_id_to_saving_symbol_map: Dict[str, MemoryOptimizationSummary] = {}
+        for cluster_id, memory_saving_stat in memory_optimization_saving_symbolics.items():
+            memory_saving_symbolic = memory_saving_stat[0]
+            freq = memory_saving_stat[1]
+            expr = parse_expr(memory_saving_symbolic)
+            simplified_expr = simplify(expr)
+            r = simplified_expr.evalf(subs=self.symbolic_dim_name_to_value_map)
+            evaluated_saving = None
+            if r.is_number:
+                evaluated_saving = float(r)
+            else:
+                evaluated_saving = r
+
+            cluster_id_to_saving_symbol_map[cluster_id] = MemoryOptimizationSummary(
+                memory_saving_symbolic, simplified_expr, evaluated_saving, freq
+            )
+
+        # Sorted by evaluated_saving if it is a float
+        sorted_list = sorted(
+            cluster_id_to_saving_symbol_map.items(),
+            key=lambda x: x[1].evaluated_saving if isinstance(x[1].evaluated_saving, float) else 0,
+            reverse=True,
+        )
+
+        for cluster_id, values in sorted_list:
+            self.cluster_id_combination_to_saving_symbolics_map[cluster_id] = values
+
     def inspect_memory(self, cur_phase: Phase):
-        if not torch.cuda.is_available():
+        """Inspect memory usage and print statistics.
+
+        Args:
+            phase: Phase to inspect.
+        """
+
+        if not torch.cuda.is_available() or not self._print_memory_stats_by_step:
             return
 
         if self._is_first_inspect:
@@ -498,36 +576,38 @@ def inspect_memory(self, cur_phase: Phase):
         if self._rank != 0:
             return
 
-        if cur_phase < Phase.PRE_FORWARD or cur_phase > self._last_phase:
-            raise RuntimeError(f"Invalid phase detected: {cur_phase}")
+        if cur_phase < Phase.PRE_FORWARD or (cur_phase <= self._last_phase):
+            raise RuntimeError(f"Invalid phase detected: {cur_phase}, last_phase: {self._last_phase}")
 
         if (cur_phase - self._pre_phase) != 1:
             raise RuntimeError(f"Invalid phase transition detected: {self._pre_phase} -> {cur_phase}")
 
-        cur_mem_allocated = self._normalize(torch.cuda.memory_allocated())
-        max_mem_allocated = self._normalize(torch.cuda.max_memory_allocated())
-        cur_mem_cached = self._normalize(torch.cuda.memory_reserved())
-        max_mem_cached = self._normalize(torch.cuda.max_memory_reserved())
-        torch_mem_stat = torch.cuda.memory_stats()
-        cur_mem_inactive = self._normalize(torch_mem_stat.get("inactive_split_bytes.all.current", 0))
-        max_mem_inactive = self._normalize(torch_mem_stat.get("inactive_split_bytes.all.peak", 0))
-
-        mem_stats = [
-            ["phase", _convert_phase_to_string(cur_phase)],
-            ["allocated", cur_mem_allocated],  # current memory alloeated for tensors
-            ["max allocated", max_mem_allocated],  # peak memory allocated for tensors
-            ["cached", cur_mem_cached],  # current memory cached for caching allocator
-            ["max cached", max_mem_cached],  # peak memory cached for caching allocator.
-            ["inactive", cur_mem_inactive],  # amount of inactive, non-releasable memory
-            ["max inactive", max_mem_inactive],  # peak of inactive, non-releasable memory
-        ]
-
-        summ = f"{self._rank_info} step {self._current_step} memory ({MemoryObserver.NORMALIZER_UNIT})"
-        for stat in mem_stats:
-            summ += f" | {stat[0]}: {stat[1]}"
-
         # For the 10+ steps, only print when it is power of 2.
-        if self._current_step < 10 or (self._current_step & (self._current_step - 1) == 0):
+        need_print = self._current_step < 10 or (self._current_step & (self._current_step - 1) == 0)
+
+        if need_print:
+            cur_mem_allocated = self._normalize(torch.cuda.memory_allocated())
+            max_mem_allocated = self._normalize(torch.cuda.max_memory_allocated())
+            cur_mem_cached = self._normalize(torch.cuda.memory_reserved())
+            max_mem_cached = self._normalize(torch.cuda.max_memory_reserved())
+            torch_mem_stat = torch.cuda.memory_stats()
+            cur_mem_inactive = self._normalize(torch_mem_stat.get("inactive_split_bytes.all.current", 0))
+            max_mem_inactive = self._normalize(torch_mem_stat.get("inactive_split_bytes.all.peak", 0))
+
+            mem_stats = [
+                ["phase", _convert_phase_to_string(cur_phase)],
+                ["allocated", cur_mem_allocated],  # current memory allocated for tensors
+                ["max allocated", max_mem_allocated],  # peak memory allocated for tensors
+                ["cached", cur_mem_cached],  # current memory cached for the caching allocator
+                ["max cached", max_mem_cached],  # peak memory cached for caching allocator.
+                ["inactive", cur_mem_inactive],  # amount of inactive, non-releasable memory
+                ["max inactive", max_mem_inactive],  # peak of inactive, non-releasable memory
+            ]
+
+            summ = f"{self._rank_info} step {self._current_step} memory ({MemoryObserver.NORMALIZER_UNIT})"
+            for stat in mem_stats:
+                summ += f" | {stat[0]}: {stat[1]}"
+
             self._logger.info(summ)
 
         if cur_phase == self._last_phase:
@@ -542,3 +622,72 @@ def _increase_step(self):
 
     def _normalize(self, mem_size_in_bytes: Union[float, int]) -> str:
         return f"{float(mem_size_in_bytes) / MemoryObserver.NORMALIZER_FACTOR:.0f}"
+
+    def display_memory_optimization_plans(self, memory_optimizer_config) -> Tuple[List[str], PTable]:
+        mem_plan_count = len(self.cluster_id_combination_to_saving_symbolics_map)
+
+        if mem_plan_count > 0:
+            mem_tbl = PTable()
+            mem_tbl.add_row(["", "", "", "", "Configs", "Freq", "Max Saving(Bytes)", "Saving Symbolic(Bytes)"])
+
+            index = 1
+
+            def _get_user_config_without_freq(configs: str):
+                if len(configs) == 0:
+                    return []
+                config_list = configs.split(",")
+                configs_with_out_freq = []
+                for config in config_list:
+                    config_values = config.split(":")
+                    freq = int(config_values[2])
+                    if freq == 0:
+                        continue
+                    configs_with_out_freq.append(config_values[0] + ":" + config_values[1])
+
+                return configs_with_out_freq
+
+            user_configs_with_out_freq = _get_user_config_without_freq(memory_optimizer_config)
+
+            for (
+                cluster_id,
+                saving_symbolic,
+            ) in self.cluster_id_combination_to_saving_symbolics_map.items():
+                saving_bytes = saving_symbolic.evaluated_saving
+                if isinstance(saving_bytes, float):
+                    saving_bytes = f"{saving_bytes:,.0f}"
+
+                cluster_ids_without_freq = _get_user_config_without_freq(cluster_id)
+
+                mem_tbl.add_row(
+                    [
+                        f" - Plan {index}",
+                        ":",
+                        "ON"
+                        if all(cluster_id in user_configs_with_out_freq for cluster_id in cluster_ids_without_freq)
+                        else "OFF",
+                        ":",
+                        cluster_id,
+                        saving_symbolic.freq,
+                        saving_bytes,
+                        saving_symbolic.simplified_symbolic_saving_expr,
+                    ]
+                )
+
+                index += 1
+
+            saving_recommendation = (
+                "use comma as delimiter to enable multiple memory optimization plans at the same time:\n"
+            )
+            saving_recommendation += "  export ORTMODULE_MEMORY_OPT_CONFIG=<plan1 config>,<plan2 config>,..."
+
+            notes = []
+            notes.append(saving_recommendation)
+
+            saving_recommendation = "memory saving is calculated based on the 1st batch symbolic dim values:\n"
+            for dim_param, dim_value in self.symbolic_dim_name_to_value_map.items():
+                saving_recommendation += f"  {dim_param}={dim_value},"
+            notes.append(saving_recommendation)
+
+            return notes, mem_tbl
+
+        return [], None
diff --git a/orttraining/orttraining/python/training/ortmodule/_training_manager.py b/orttraining/orttraining/python/training/ortmodule/_training_manager.py
index 19effe2086e0a..96a95557bb9a1 100644
--- a/orttraining/orttraining/python/training/ortmodule/_training_manager.py
+++ b/orttraining/orttraining/python/training/ortmodule/_training_manager.py
@@ -18,10 +18,10 @@
 from ._gradient_accumulation_manager import GradientAccumulationManager
 from ._graph_execution_manager import GraphExecutionManager, _RunStateInfo
 from ._io import _FlattenedModule, _InputInfo, unflatten_user_output
-from ._logger import ORTModuleInitPhase, SuppressLogs, TrackTime
+from ._logger import LogLevel, ORTModuleInitPhase, TrackTime
 from ._runtime_inspector import Phase
 from ._utils import save_tuning_results, set_tuning_results
-from .graph_transformer_registry import GraphTransformerRegistry
+from .graph_optimizer_registry import GraphOptimizerRegistry
 from .options import DebugOptions, _SkipCheck
 
 
@@ -111,7 +111,7 @@ def forward(ctx, *inputs):
 
                 Module outputs are returned to the user
                 """
-                self._runtime_inspector.inspect_memory(Phase.PRE_FORWARD)
+                self._runtime_inspector.memory_ob.inspect_memory(Phase.PRE_FORWARD)
 
                 if self._runtime_options.skip_check.is_set(_SkipCheck.SKIP_CHECK_DEVICE) is False:
                     # Assert that the input and model device match
@@ -146,7 +146,7 @@ def forward(ctx, *inputs):
                 for idx in self._graph_info.output_grad_indices_non_differentiable:
                     ctx.mark_non_differentiable(user_outputs[idx])
 
-                self._runtime_inspector.inspect_memory(Phase.POST_FORWARD)
+                self._runtime_inspector.memory_ob.inspect_memory(Phase.POST_FORWARD)
 
                 return user_outputs
 
@@ -154,7 +154,7 @@ def forward(ctx, *inputs):
             def backward(ctx, *grad_outputs):
                 """Performs backward pass based on grad wrt module output"""
 
-                self._runtime_inspector.inspect_memory(Phase.PRE_BACKWARD)
+                self._runtime_inspector.memory_ob.inspect_memory(Phase.PRE_BACKWARD)
 
                 assert ctx.run_info is not None, "forward() or __call__() methods must be called before backward()"
                 if self._runtime_options.skip_check.is_set(_SkipCheck.SKIP_CHECK_DEVICE) is False:
@@ -205,7 +205,7 @@ def backward(ctx, *grad_outputs):
                 # This version only works if backward_outputs is an OrtValueVector.
                 transferred_backward_outputs = _utils._ortvalues_to_torch_tensor(backward_outputs, self._device)
 
-                self._runtime_inspector.inspect_memory(Phase.POST_BACKWARD)
+                self._runtime_inspector.memory_ob.inspect_memory(Phase.POST_BACKWARD)
 
                 return tuple(transferred_backward_outputs[idx] if idx != -1 else None for idx in self._gradient_map)
 
@@ -242,7 +242,6 @@ def forward(self, *inputs, **kwargs):
                     self._runtime_options.skip_check.is_set(_SkipCheck.SKIP_CHECK_EXECUTION_AGENT),
                     self._runtime_options.skip_check.is_set(_SkipCheck.SKIP_CHECK_DEVICE),
                 )
-
             # If exporting module to ONNX for the first time, this skip check will not take effect.
             # It will only take effect on subsequent forward calls.
             build_gradient_graph = False
@@ -358,7 +357,6 @@ def forward(self, *inputs, **kwargs):
             return self._fallback_manager.fallback(self._debug_options.logging.log_level, *inputs, **kwargs)
 
     @TrackTime(ORTModuleInitPhase.BUILD_GRAPH)
-    @SuppressLogs(ORTModuleInitPhase.BUILD_GRAPH)
     def _build_graph(self, graph_transformer_config):
         """Build an optimized gradient graph using the module_graph_builder"""
 
@@ -369,7 +367,7 @@ def _build_graph(self, graph_transformer_config):
         device_type = self._device.type
         if device_type == "cuda" and self.is_rocm_pytorch:
             device_type = "rocm"
-        GraphTransformerRegistry.transform_all(
+        GraphOptimizerRegistry.optimize_all(
             type(self._flattened_module._original_module).__name__, device_type, self._onnx_models.optimized_model.graph
         )
 
@@ -401,13 +399,12 @@ def _build_graph(self, graph_transformer_config):
                 self._gradient_map.append(-1)
 
     @TrackTime(ORTModuleInitPhase.CREATE_SESSION)
-    @SuppressLogs(ORTModuleInitPhase.CREATE_SESSION)
     def _create_execution_agent(self):
         """Creates a TrainingAgent that can run the forward and backward graph on the training model"""
 
         session_options, providers, provider_options = self._get_session_config()
         fw_feed_names = [input.name for input in self._onnx_models.optimized_model.graph.input]
-        device_type = self._device if type(self._device) is str else self._device.type.lower()
+        device_type = self._device if type(self._device) is str else self._device.type.lower()  # noqa: E721
         if device_type == "ort":
             fw_outputs_device_info = [C.get_ort_device(self._device.index)] * (
                 len(self._graph_info.user_output_names) + len(self._graph_info.frontier_node_arg_map)
@@ -435,6 +432,39 @@ def _create_execution_agent(self):
 
         local_device_rank = self._device.index if device_type == "ort" else _utils.get_device_index(self._device)
 
+        # When log level is <= INFO, we would collect memory optimization opportunities.
+        # (TODO: consider to enable by default once memory optimization feature is stable and well improved.)
+        # Create a training agent without enabling memory optimization here is beneficial for memory analyzing
+        # when we have an allocation plan in place, and reuse information is available.
+        if self._runtime_inspector.memory_ob.is_enabled() and self._debug_options.log_level <= LogLevel.INFO:
+            # Create a training agent without enabling memory optimization.
+            execution_agent = TrainingAgent(
+                self._onnx_models.optimized_model.SerializeToString(),
+                fw_feed_names,
+                fw_outputs_device_info,
+                bw_fetches_names,
+                bw_outputs_device_info,
+                session_options,
+                providers,
+                provider_options,
+                local_device_rank,
+            )
+
+            self._runtime_inspector.memory_ob.find_memory_optimization_opportunity(
+                execution_agent, self._runtime_options.memory_optimizer_config, self._runtime_options.probe_level
+            )
+
+            # Release it as early as possible.
+            del execution_agent
+
+        # Enable memory optimization if it is enabled in the session options.
+        session_options.add_session_config_entry(
+            "optimization.memory_optimizer_config", self._runtime_options.memory_optimizer_config
+        )
+        session_options.add_session_config_entry(
+            "optimization.enable_memory_probe_recompute_level", self._runtime_options.probe_level
+        )
+
         self._execution_agent = TrainingAgent(
             self._onnx_models.optimized_model.SerializeToString(),
             fw_feed_names,
diff --git a/orttraining/orttraining/python/training/ortmodule/_zero_stage3_compatibility.py b/orttraining/orttraining/python/training/ortmodule/_zero_stage3_compatibility.py
index 17756600d601e..d076ecacd6ba5 100644
--- a/orttraining/orttraining/python/training/ortmodule/_zero_stage3_compatibility.py
+++ b/orttraining/orttraining/python/training/ortmodule/_zero_stage3_compatibility.py
@@ -3,15 +3,21 @@
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
 
+
+from contextlib import contextmanager
 from typing import Dict, List, Optional, Tuple, Union
 
 import torch
 from onnx import ModelProto, NodeProto, TensorProto, ValueInfoProto, helper
 
-from onnxruntime.capi._pybind_state import register_torch_autograd_function
-from onnxruntime.training.utils import pytorch_dtype_to_onnx
+from onnxruntime.capi._pybind_state import (
+    register_input_alias_function,
+    register_shape_inference_function,
+    register_torch_autograd_function,
+)
+from onnxruntime.training.utils import pytorch_type_to_onnx_dtype
 
-from ._custom_autograd_function_exporter import PythonOpShapeInferStore
+from ._custom_autograd_function_exporter import register_custom_function_schema_supplementary
 from ._utils import get_fully_qualified_class_name
 
 STAGE3_PULL_WEIGHT_TRIGGER_NAME = "pull_weight_trigger"
@@ -35,6 +41,8 @@ def post_processing_enable_zero_stage3_compat(
     # Register symbolic shape inference functions for PythonOp used in DeepSpeed ZeRO stage3.
     _register_symbolic_shape_infer_functions()
 
+    _register_alias_input_functions()
+
     # Create weight retrieving function using zero_stage3_named_params.
     func_full_qual_name = _create_weight_retrieval_function(zero_stage3_named_params)
 
@@ -68,7 +76,7 @@ def _get_func_name(node: NodeProto) -> Optional[str]:
 
     from onnxruntime.training.utils.hooks._zero_offload_subscriber import ORTZeROOffloadPreForwardFunction
 
-    prefowrad_function_name = get_fully_qualified_class_name(ORTZeROOffloadPreForwardFunction)
+    pre_forward_function_name = get_fully_qualified_class_name(ORTZeROOffloadPreForwardFunction)
 
     # Connect weight consumers to use the full-sized parameter output of ORTZeROOffloadPreForwardFunction.
     for graph_input in exported_model.graph.input:
@@ -86,7 +94,7 @@ def _get_func_name(node: NodeProto) -> Optional[str]:
                 continue
 
             func_name = _get_func_name(c)
-            if func_name == prefowrad_function_name:
+            if func_name == pre_forward_function_name:
                 assert (
                     pre_forward_pythonop_node is None
                 ), "Multiple ORTZeROOffloadPreForwardFunction nodes found, it should not happen"
@@ -97,6 +105,7 @@ def _get_func_name(node: NodeProto) -> Optional[str]:
                 "Fail to find ORTZeROOffloadPreForwardFunction for partitioned param: " + graph_input.name
             )
 
+        pull_weight_trigger_input_name = _get_param_pull_trigger_name(graph_input.name)
         index_offset_on_python_op_input = []
         for i, input_name in enumerate(pre_forward_pythonop_node.input):
             if input_name == graph_input.name:
@@ -104,21 +113,32 @@ def _get_func_name(node: NodeProto) -> Optional[str]:
 
         assert (
             len(index_offset_on_python_op_input) == 1
-        ), f"index_offset_on_python_op_input length is not 1: {index_offset_on_python_op_input}"
+        ), f"index_offset_on_python_op_input length is not 1: {index_offset_on_python_op_input} for node {pre_forward_pythonop_node.name}, input {graph_input.name}, {pre_forward_pythonop_node.input}"
 
         reverse_index_among_inputs = index_offset_on_python_op_input[0] - len(pre_forward_pythonop_node.input)
-        new_input_name = _get_param_pull_trigger_name(graph_input.name)
-        pre_forward_pythonop_node.input[index_offset_on_python_op_input[0]] = new_input_name
+
+        pre_forward_pythonop_node.input[index_offset_on_python_op_input[0]] = pull_weight_trigger_input_name
 
         _update_python_op_input_related_attributes(
             pre_forward_pythonop_node,
-            new_input_name,
+            pull_weight_trigger_input_name,
             len(STAGE3_PULL_WEIGHT_TRIGGER_OUTPUT_SHAPE),  # new rank
             STAGE3_PULL_WEIGHT_TRIGGER_OUTPUT_DTYPE,  # new data type
         )
 
         output_index = reverse_index_among_inputs + len(pre_forward_pythonop_node.output)
-        pre_forward_pythonop_node.output[output_index] = graph_input.name
+
+        ready_weight_name = f"ready_{graph_input.name}"
+        pre_forward_pythonop_node.output[output_index] = ready_weight_name
+
+        # Update consumer's input to use the full-sized parameter output of ORTZeROOffloadPreForwardFunction.
+        for c in consumers:
+            new_inputs = [c_input for c_input in c.input]
+            for c_input_index in range(len(c.input)):
+                if c.input[c_input_index] == graph_input.name:
+                    new_inputs[c_input_index] = ready_weight_name
+            del c.input[:]
+            c.input.extend(new_inputs)
 
         # If the consumer of original `graph_input.name` is PythonOp, we need also update its attributes because now
         # `graph_input.name` as output of pre_forward_pythonop_node, is full-sized parameter, the rank might differ
@@ -130,7 +150,7 @@ def _get_func_name(node: NodeProto) -> Optional[str]:
                 c,
                 graph_input.name,
                 len(zero_stage3_named_params[graph_input.name].ds_shape),  # new rank
-                pytorch_dtype_to_onnx(zero_stage3_named_params[graph_input.name].dtype),  # new data type
+                pytorch_type_to_onnx_dtype(zero_stage3_named_params[graph_input.name].dtype),  # new data type
             )
 
     # Delete exported_model.graph.input
@@ -185,11 +205,13 @@ def infer_shape(
             tensor_output_dtypes = [
                 tensor_input_dtypes[0],
             ] * param_count
+
             return tensor_output_shapes, tensor_output_dtypes
 
     func_full_qual_name = get_fully_qualified_class_name(WeightRetrievalFunction)
     register_torch_autograd_function(func_full_qual_name, WeightRetrievalFunction)
-    PythonOpShapeInferStore.register(WeightRetrievalFunction)
+
+    register_custom_function_schema_supplementary(WeightRetrievalFunction)
 
     return func_full_qual_name
 
@@ -205,10 +227,10 @@ def _simple_pass_through_infer_shape(
     ) -> Tuple[List[Optional[List[Union[int, str]]]], List[torch.onnx.TensorProtoDataType]]:
         return tensor_input_shapes, tensor_input_dtypes
 
-    PythonOpShapeInferStore.register_func(
+    register_shape_inference_function(
         "deepspeed.runtime.zero.parameter_offload.PreBackwardFunction", _simple_pass_through_infer_shape
     )
-    PythonOpShapeInferStore.register_func(
+    register_shape_inference_function(
         "deepspeed.runtime.zero.parameter_offload.PostBackwardFunction", _simple_pass_through_infer_shape
     )
 
@@ -224,9 +246,36 @@ def _linear_infer_shape(
         output_shape[-1] = shape2[-2]
         return [output_shape], [tensor_input_dtypes[0]]
 
-    PythonOpShapeInferStore.register_func(
-        "deepspeed.runtime.zero.linear.LinearFunctionForZeroStage3", _linear_infer_shape
-    )
+    register_shape_inference_function("deepspeed.runtime.zero.linear.LinearFunctionForZeroStage3", _linear_infer_shape)
+
+
+def _register_alias_input_functions():
+    """This function is used to register symbolic shape inference functions for PythonOp used in
+    DeepSpeed ZeRO stage3."""
+
+    def _alias_input(node_proto_str: str):
+        node: NodeProto = NodeProto()
+        node.ParseFromString(node_proto_str)
+        non_tensor_fw_input_count = 2
+
+        fw_output_count = len(node.output) - 1  # exclude the first output appended in ONNX
+        fw_alias_map = [-1] * fw_output_count
+        bw_alias_map = [-1] * (non_tensor_fw_input_count + len(node.input))
+
+        for i in range(fw_output_count):
+            fw_alias_map[i] = i + non_tensor_fw_input_count
+
+        tensor_input_index = 0
+        for i in range(len(bw_alias_map)):
+            if i < non_tensor_fw_input_count:
+                continue
+            bw_alias_map[i] = tensor_input_index
+            tensor_input_index += 1
+
+        return fw_alias_map, bw_alias_map
+
+    register_input_alias_function("deepspeed.runtime.zero.parameter_offload.PreBackwardFunction", _alias_input)
+    register_input_alias_function("deepspeed.runtime.zero.parameter_offload.PostBackwardFunction", _alias_input)
 
 
 def _create_weight_retrieval_pythonop(
@@ -234,16 +283,16 @@ def _create_weight_retrieval_pythonop(
     func_full_qual_name: str,
     input_name: str,
     output_names: List[str],
-    STAGE3_PULL_WEIGHT_TRIGGER_OUTPUT_DTYPE,
-    STAGE3_PULL_WEIGHT_TRIGGER_OUTPUT_SHAPE: List[int],
+    pull_weight_trigger_output_dtype: int,
+    pull_weight_trigger_output_shape: List[int],
 ) -> Tuple[ValueInfoProto, NodeProto]:
     """This function is used to create a weight retrieving PythonOp."""
     offload_param_count = 0 if zero_stage3_named_params is None else len(zero_stage3_named_params)
     new_input = helper.make_tensor_value_info(
-        input_name, STAGE3_PULL_WEIGHT_TRIGGER_OUTPUT_DTYPE, STAGE3_PULL_WEIGHT_TRIGGER_OUTPUT_SHAPE
+        input_name, pull_weight_trigger_output_dtype, pull_weight_trigger_output_shape
     )
-    output_rank_for_pull_weight_trigger = len(STAGE3_PULL_WEIGHT_TRIGGER_OUTPUT_SHAPE)
-    output_dtype_for_pull_weight_trigger = STAGE3_PULL_WEIGHT_TRIGGER_OUTPUT_DTYPE
+    output_rank_for_pull_weight_trigger = len(pull_weight_trigger_output_shape)
+    output_dtype_for_pull_weight_trigger = pull_weight_trigger_output_dtype
     output_tensor_ranks = [
         output_rank_for_pull_weight_trigger,
     ] * offload_param_count
@@ -253,10 +302,9 @@ def _create_weight_retrieval_pythonop(
 
     node_attributes = {
         "comment": "",
-        "inplace": 0,
         "input_convention": "d",
-        "input_tensor_ranks": [len(STAGE3_PULL_WEIGHT_TRIGGER_OUTPUT_SHAPE)],
-        "input_tensor_types": [STAGE3_PULL_WEIGHT_TRIGGER_OUTPUT_DTYPE],
+        "input_tensor_ranks": [len(pull_weight_trigger_output_shape)],
+        "input_tensor_types": [pull_weight_trigger_output_dtype],
         "output_tensor_ranks": output_tensor_ranks,
         "output_tensor_types": output_tensor_types,
         "training_mode": 1,
@@ -276,7 +324,9 @@ def _create_weight_retrieval_pythonop(
     return new_input, weight_pull_node
 
 
-def _update_python_op_input_related_attributes(node: NodeProto, input_name: str, new_rank: int, new_dtype: int):
+def _update_python_op_input_related_attributes(
+    node: NodeProto, input_name: str, new_rank: int, new_dtype: torch.onnx.TensorProtoDataType
+):
     """This function is used to update PythonOp's input related attributes, e.g.
         input_tensor_ranks and input_tensor_types.
 
@@ -284,7 +334,7 @@ def _update_python_op_input_related_attributes(node: NodeProto, input_name: str,
         node (NodeProto): The PythonOp node.
         input_name (str): The input name to be updated.
         new_rank (int): The new rank of the input, to be used in input_tensor_ranks.
-        new_dtype (int): The new data type of the input, to be used in input_tensor_types.
+        new_dtype (torch.onnx.TensorProtoDataType): The new data type of the input, to be used in input_tensor_types.
     """
     input_tensor_ranks = None
     input_tensor_dtypes = None
@@ -304,9 +354,60 @@ def _update_python_op_input_related_attributes(node: NodeProto, input_name: str,
     for index, node_input_name in enumerate(node.input):
         if node_input_name == input_name:
             input_tensor_ranks[index] = new_rank
-            input_tensor_dtypes[index] = new_dtype
+            input_tensor_dtypes[index] = int(new_dtype)
 
     node.attribute.remove(rank_attr)
     node.attribute.remove(dtype_attr)
     node.attribute.append(helper.make_attribute("input_tensor_ranks", input_tensor_ranks))
     node.attribute.append(helper.make_attribute("input_tensor_types", input_tensor_dtypes))
+
+
+@contextmanager
+def stage3_export_context(enable: bool, graph_execution_manager):
+    """Context manager for stage3 specific model export.
+    Some export functions are overridden when entering the context; the original functions are restored when
+    exiting the context.
+
+    Also collect the zero stage3 parameter maps for graph execution manager.
+    """
+    if not enable:
+        yield
+
+    else:
+        original_func = torch.onnx.symbolic_helper._get_tensor_rank
+        from onnxruntime.training.utils.hooks._zero_offload_subscriber import _get_all_zero_stage3_params
+
+        # Delay collecting stage3 parameters here instead of in the graph execution manager,
+        # to make sure DeepSpeed initialization is done, so that the parameters ds_status are correct.
+        graph_execution_manager._zero_stage3_param_map = _get_all_zero_stage3_params(
+            graph_execution_manager._flattened_module
+        )
+
+        try:
+            from torch.onnx._internal import _beartype
+
+            @_beartype.beartype
+            def _get_tensor_rank(x) -> Optional[int]:
+                ### Adapted from https://github.com/pytorch/pytorch/blob/185515368bcd7d94ac06ab1634f22b747b03c6d9/torch/onnx/symbolic_helper.py#L561
+                # Retrieve the real rank for the stage3 weights, because stage3 weights are all (0).
+                from typing import cast as typing_cast
+
+                from torch import _C
+                from torch.onnx.symbolic_helper import _is_tensor
+
+                input_name = x.debugName()
+                if input_name in graph_execution_manager._zero_stage3_param_map:
+                    rank = len(graph_execution_manager._zero_stage3_param_map[input_name].ds_shape)
+                    return rank
+
+                if not _is_tensor(x) or x.type() is None:
+                    return None
+                x_type = x.type()
+                x_type = typing_cast(_C.TensorType, x_type)
+                return x_type.dim()
+
+            torch.onnx.symbolic_helper._get_tensor_rank = _get_tensor_rank
+
+            yield
+        finally:
+            torch.onnx.symbolic_helper._get_tensor_rank = original_func
diff --git a/orttraining/orttraining/python/training/ortmodule/graph_optimizer_registry.py b/orttraining/orttraining/python/training/ortmodule/graph_optimizer_registry.py
new file mode 100644
index 0000000000000..897ecac148bfb
--- /dev/null
+++ b/orttraining/orttraining/python/training/ortmodule/graph_optimizer_registry.py
@@ -0,0 +1,47 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+from typing import Callable
+
+from onnx.onnx_ml_pb2 import GraphProto
+
+
+class GraphOptimizerRegistry:
+    _OPTIMIZER_FUNCS = {}  # noqa: RUF012
+
+    @classmethod
+    def register(cls, target_modules: str, devices: str, priority: int, fn: Callable[[GraphProto], None]):
+        modules = []
+        if target_modules == "all":
+            modules.append("all")
+        else:
+            modules = target_modules.split("|")
+        for module in modules:
+            if module in cls._OPTIMIZER_FUNCS:
+                cls._OPTIMIZER_FUNCS[module].append((fn, devices, priority))
+            else:
+                cls._OPTIMIZER_FUNCS[module] = [(fn, devices, priority)]
+
+    @classmethod
+    def optimize_all(cls, module_name: str, device: str, graph: GraphProto):
+        optimizers_to_apply = []
+        if "all" in cls._OPTIMIZER_FUNCS:
+            optimizers_to_apply.extend(cls._OPTIMIZER_FUNCS["all"])
+        if module_name in cls._OPTIMIZER_FUNCS:
+            optimizers_to_apply.extend(cls._OPTIMIZER_FUNCS[module_name])
+        optimizers_to_apply = [x for x in optimizers_to_apply if x[1] == "all" or device in x[1]]
+        optimizers_to_apply.sort(key=lambda x: x[2], reverse=True)
+        for fn, _, _ in optimizers_to_apply:
+            fn(graph)
+
+
+# target_modules can be multiple module names separated by "|", or "all" means apply to all modules.
+# devices can be multiple device types separated by "|" or "all" means apply to all devices.
+def register_graph_optimizer(target_modules: str = "all", devices: str = "all", priority: int = 0):
+    def graph_optimizer_wrapper(fn):
+        GraphOptimizerRegistry.register(target_modules, devices, priority, fn)
+        return fn
+
+    return graph_optimizer_wrapper
diff --git a/orttraining/orttraining/python/training/ortmodule/graph_optimizers/__init__.py b/orttraining/orttraining/python/training/ortmodule/graph_optimizers/__init__.py
new file mode 100644
index 0000000000000..3d3538a62da61
--- /dev/null
+++ b/orttraining/orttraining/python/training/ortmodule/graph_optimizers/__init__.py
@@ -0,0 +1,22 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+import os
+
+import torch
+from packaging.version import Version
+
+_all_optimizers = []
+
+if (
+    "ORTMODULE_USE_EFFICIENT_ATTENTION" in os.environ
+    and int(os.getenv("ORTMODULE_USE_EFFICIENT_ATTENTION")) == 1
+    and Version(torch.__version__) >= Version("2.1.1")
+):
+    from ._aten_attn import optimize_graph_for_aten_efficient_attention  # noqa: F401
+
+    _all_optimizers.append("optimize_graph_for_aten_efficient_attention")
+
+__all__ = _all_optimizers  # noqa: PLE0605
diff --git a/orttraining/orttraining/python/training/ortmodule/graph_optimizers/_aten_attn.py b/orttraining/orttraining/python/training/ortmodule/graph_optimizers/_aten_attn.py
new file mode 100644
index 0000000000000..b1e8809f03fc0
--- /dev/null
+++ b/orttraining/orttraining/python/training/ortmodule/graph_optimizers/_aten_attn.py
@@ -0,0 +1,395 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+"""
+PyTorch's _efficient_attention_forward/_efficient_attention_backward APIs is keep changing. Current implementation
+is tested well on version 2.2.0.dev20231010+cu121, and should be run well since official version 2.2.0. If may fail to
+run is you are using PyTorch with older versions.
+
+PyTorch also has API for flash attention (currently doesn't support random attention mask or Dropout), we can add
+support if we want to try in the future.
+"""
+
+from typing import List, Tuple
+
+from onnx import GraphProto, NodeProto, TensorProto, helper
+
+from ..graph_optimizer_registry import register_graph_optimizer
+from .utils import GraphMatcher, check_attribute_value, make_constant_node, update_graph
+
+
+def _make_efficient_attention_nodes(
+    idx: int,
+    q: str,
+    k: str,
+    v: str,
+    y: str,
+    dy: str,
+    dq: str,
+    dk: str,
+    dv: str,
+    bias: str,
+    expand_bias: bool,
+    scale: float,
+    dropout_ratio: float,
+    causal: bool,
+):
+    nodes_to_add = []
+    scale_node = make_constant_node("scale_" + str(idx), TensorProto.FLOAT, [], [scale])
+    dropout_ratio_node = make_constant_node("dropout_ratio_" + str(idx), TensorProto.FLOAT, [], [dropout_ratio])
+    causal_node = make_constant_node("causal_" + str(idx), TensorProto.INT64, [], [1 if causal else 0])
+    int_zero_node = make_constant_node("int_zero_" + str(idx), TensorProto.INT64, [], [0])
+    true_node = make_constant_node("true_" + str(idx), TensorProto.BOOL, [], [True])
+    false_node = make_constant_node("false_" + str(idx), TensorProto.BOOL, [], [False])
+    logsumexp = helper.make_tensor_value_info("logsumexp" + str(idx), TensorProto.FLOAT, [])
+    seed = helper.make_tensor_value_info("seed" + str(idx), TensorProto.INT64, [])
+    offset = helper.make_tensor_value_info("offset" + str(idx), TensorProto.INT64, [])
+    new_value_infos = [logsumexp, seed, offset]
+    if expand_bias:
+        shape_0 = helper.make_node("Shape", [q], ["shape_0_" + str(idx)], start=0, end=1)
+        shape_1 = helper.make_node("Shape", [q], ["shape_1_" + str(idx)], start=2, end=3)
+        shape_2 = helper.make_node("Shape", [q], ["shape_2_" + str(idx)], start=1, end=2)
+        shape_3 = helper.make_node("Shape", [k], ["shape_3_" + str(idx)], start=1, end=2)
+        concat = helper.make_node(
+            "Concat",
+            ["shape_0_" + str(idx), "shape_1_" + str(idx), "shape_2_" + str(idx), "shape_3_" + str(idx)],
+            ["concated_shape_" + str(idx)],
+            axis=0,
+        )
+        expand = helper.make_node("Expand", [bias, "concated_shape_" + str(idx)], ["expanded_bias_" + str(idx)])
+        nodes_to_add.extend([shape_0, shape_1, shape_2, shape_3, concat, expand])
+        bias = "expanded_bias_" + str(idx)
+    fwd_node = helper.make_node(
+        "ATen",
+        [
+            q,
+            k,
+            v,
+            bias,
+            "",
+            "",
+            "",
+            dropout_ratio_node.output[0],
+            causal_node.output[0],
+            true_node.output[0],
+            scale_node.output[0],
+            "",
+            "",
+        ],
+        [y, logsumexp.name, seed.name, offset.name],
+        "efficient_attention_forward_" + str(idx),
+        None,
+        "org.pytorch.aten",
+        operator="_efficient_attention_forward",
+    )
+    bwd_node = helper.make_node(
+        "ATen",
+        [
+            dy,
+            q,
+            k,
+            v,
+            bias,
+            y,
+            "",
+            "",
+            int_zero_node.output[0],
+            int_zero_node.output[0],
+            logsumexp.name,
+            dropout_ratio_node.output[0],
+            seed.name,
+            offset.name,
+            causal_node.output[0],
+            false_node.output[0],
+            scale_node.output[0],
+            "",
+        ],
+        [dq, dk, dv, ""],
+        "efficient_attention_backward_" + str(idx),
+        None,
+        "org.pytorch.aten",
+        operator="_efficient_attention_backward",
+    )
+    nodes_to_add.extend(
+        [scale_node, dropout_ratio_node, causal_node, int_zero_node, true_node, false_node, fwd_node, bwd_node]
+    )
+    return nodes_to_add, new_value_infos
+
+
+# Without causal mask, with Dropout. For example, BERT model in HuggingFace.
+_PATTERN_0: List[Tuple[str, bool, List[Tuple[int, int, int]]]] = [
+    ("MatMul", False, []),  # 0
+    ("Transpose", True, [(0, 0, 0)]),  # 1
+    ("Transpose", True, [(0, 0, 1)]),  # 2
+    ("Div", False, [(0, 0, 0)]),  # 3
+    ("Add", False, [(3, 0, 0)]),  # 4
+    ("Softmax", False, [(4, 0, 0)]),  # 5
+    ("Dropout", False, [(5, 0, 0)]),  # 6
+    ("MatMul", False, [(6, 0, 0)]),  # 7
+    ("Transpose", True, [(7, 0, 1)]),  # 8
+    ("Transpose", False, [(7, 0, 0)]),  # 9
+    ("FusedMatMul", False, [(8, 0, 1)]),  # 10
+    ("DropoutGrad", False, [(10, 0, 0), (6, 1, 1)]),  # 11
+    ("SoftmaxGrad_13", False, [(11, 0, 0), (5, 0, 1)]),  # 12
+    ("Identity", False, [(12, 0, 0)]),  # 13
+    ("Div", False, [(13, 0, 0)]),  # 14
+    ("Identity", False, [(14, 0, 0)]),  # 15
+    ("FusedMatMul", False, [(2, 0, 1), (15, 0, 0)]),  # 16
+    ("FusedMatMul", False, [(1, 0, 0), (15, 0, 1)]),  # 17
+    ("FusedMatMul", False, [(6, 0, 0)]),  # 18
+    ("Transpose", True, [(18, 0, 1)]),  # 19
+    ("Transpose", False, [(16, 0, 0)]),  # 20
+    ("Transpose", False, [(17, 0, 0)]),  # 21
+    ("Transpose", False, [(18, 0, 0)]),  # 22
+]
+
+
+def _optimize_for_pattern_0(matcher: GraphMatcher, idx: int, nodes: List[NodeProto]):
+    # Check forward only as the backward is expected to be consistent if it's built correctly.
+    scale_value = matcher.get_constant_value(nodes[3].input[1])
+    ratio_value = matcher.get_constant_value(nodes[6].input[1])
+    if not (
+        check_attribute_value(nodes[1], "perm", [0, 2, 1, 3])
+        and check_attribute_value(nodes[2], "perm", [0, 2, 3, 1])
+        and scale_value is not None
+        and ratio_value is not None
+        and check_attribute_value(nodes[8], "perm", [0, 2, 1, 3])
+        and check_attribute_value(nodes[9], "perm", [0, 2, 1, 3])
+    ):
+        return [], [], []
+
+    _, add_input_shape_0 = matcher.get_type_and_shape(nodes[4].input[0])
+    _, add_input_shape_1 = matcher.get_type_and_shape(nodes[4].input[1])
+    nodes_to_add, new_value_infos = _make_efficient_attention_nodes(
+        idx,
+        nodes[1].input[0],
+        nodes[2].input[0],
+        nodes[8].input[0],
+        nodes[9].output[0],
+        nodes[19].input[0],
+        nodes[20].output[0],
+        nodes[21].output[0],
+        nodes[22].output[0],
+        nodes[4].input[1],
+        add_input_shape_0 != add_input_shape_1,
+        1 / float(scale_value[0] if isinstance(scale_value, list) else scale_value),
+        ratio_value,
+        False,
+    )
+    return nodes, nodes_to_add, new_value_infos
+
+
+# Without causal mask, without Dropout. For example, BERT model and disabling attention dropout in HuggingFace.
+_PATTERN_1: List[Tuple[str, bool, List[Tuple[int, int, int]]]] = [
+    ("MatMul", False, []),  # 0
+    ("Transpose", True, [(0, 0, 0)]),  # 1
+    ("Transpose", True, [(0, 0, 1)]),  # 2
+    ("Div", False, [(0, 0, 0)]),  # 3
+    ("Add", False, [(3, 0, 0)]),  # 4
+    ("Softmax", False, [(4, 0, 0)]),  # 5
+    ("MatMul", False, [(5, 0, 0)]),  # 6
+    ("Transpose", True, [(6, 0, 1)]),  # 7
+    ("Transpose", False, [(6, 0, 0)]),  # 8
+    ("FusedMatMul", False, [(7, 0, 1)]),  # 9
+    ("SoftmaxGrad_13", False, [(9, 0, 0), (5, 0, 1)]),  # 10
+    ("Identity", False, [(10, 0, 0)]),  # 11
+    ("Div", False, [(11, 0, 0)]),  # 12
+    ("Identity", False, [(12, 0, 0)]),  # 13
+    ("FusedMatMul", False, [(2, 0, 1), (13, 0, 0)]),  # 14
+    ("FusedMatMul", False, [(1, 0, 0), (13, 0, 1)]),  # 15
+    ("FusedMatMul", False, [(5, 0, 0)]),  # 16
+    ("Transpose", True, [(16, 0, 1)]),  # 17
+    ("Transpose", False, [(14, 0, 0)]),  # 18
+    ("Transpose", False, [(15, 0, 0)]),  # 19
+    ("Transpose", False, [(16, 0, 0)]),  # 20
+]
+
+
+def _optimize_for_pattern_1(matcher: GraphMatcher, idx: int, nodes: List[NodeProto]):
+    # Check forward only as the backward is expected to be consistent if it's built correctly.
+    scale_value = matcher.get_constant_value(nodes[3].input[1])
+    if not (
+        check_attribute_value(nodes[1], "perm", [0, 2, 1, 3])
+        and check_attribute_value(nodes[2], "perm", [0, 2, 3, 1])
+        and scale_value is not None
+        and check_attribute_value(nodes[7], "perm", [0, 2, 1, 3])
+        and check_attribute_value(nodes[8], "perm", [0, 2, 1, 3])
+    ):
+        return [], [], []
+
+    _, add_input_shape_0 = matcher.get_type_and_shape(nodes[4].input[0])
+    _, add_input_shape_1 = matcher.get_type_and_shape(nodes[4].input[1])
+    nodes_to_add, new_value_infos = _make_efficient_attention_nodes(
+        idx,
+        nodes[1].input[0],
+        nodes[2].input[0],
+        nodes[7].input[0],
+        nodes[8].output[0],
+        nodes[17].input[0],
+        nodes[18].output[0],
+        nodes[19].output[0],
+        nodes[20].output[0],
+        nodes[4].input[1],
+        add_input_shape_0 != add_input_shape_1,
+        1 / float(scale_value[0] if isinstance(scale_value, list) else scale_value),
+        0.0,
+        False,
+    )
+    return nodes, nodes_to_add, new_value_infos
+
+
+# No causal mask, no attention mask, without Dropout.
+_PATTERN_2: List[Tuple[str, bool, List[Tuple[int, int, int]]]] = [
+    ("MatMul", False, []),  # 0
+    ("Mul", True, [(0, 0, 0)]),  # 1
+    ("Mul", True, [(0, 0, 1)]),  # 2
+    ("Transpose", True, [(1, 0, 0)]),  # 3
+    ("Transpose", True, [(2, 0, 0)]),  # 4
+    ("Softmax", False, [(0, 0, 0)]),  # 5
+    ("MatMul", False, [(5, 0, 0)]),  # 6
+    ("Transpose", True, [(6, 0, 1)]),  # 7
+    ("Transpose", False, [(6, 0, 0)]),  # 8
+    ("FusedMatMul", False, [(7, 0, 1)]),  # 9
+    ("SoftmaxGrad_13", False, [(9, 0, 0), (5, 0, 1)]),  # 10
+    ("FusedMatMul", False, [(2, 0, 1), (10, 0, 0)]),  # 11
+    ("FusedMatMul", False, [(1, 0, 0), (10, 0, 1)]),  # 12
+    ("Mul", False, [(11, 0, 0)]),  # 13
+    ("Mul", False, [(12, 0, 0)]),  # 14
+    ("Identity", False, [(13, 0, 0)]),  # 15
+    ("Identity", False, [(14, 0, 0)]),  # 16
+    ("Transpose", False, [(15, 0, 0)]),  # 17
+    ("Transpose", False, [(16, 0, 0)]),  # 18
+    ("FusedMatMul", False, [(5, 0, 0)]),  # 19
+    ("Transpose", True, [(19, 0, 1)]),  # 20
+    ("Transpose", False, [(19, 0, 0)]),  # 21
+]
+
+
+def _optimize_for_pattern_2(matcher: GraphMatcher, idx: int, nodes: List[NodeProto]):
+    # Check forward only as the backward is expected to be consistent if it's built correctly.
+    scale_value_1 = matcher.get_constant_value(nodes[1].input[1])
+    scale_value_1 = scale_value_1[0] if isinstance(scale_value_1, list) else scale_value_1
+    scale_value_2 = matcher.get_constant_value(nodes[2].input[1])
+    scale_value_2 = scale_value_2[0] if isinstance(scale_value_2, list) else scale_value_2
+    if not (
+        check_attribute_value(nodes[3], "perm", [0, 2, 1, 3])
+        and check_attribute_value(nodes[4], "perm", [0, 2, 3, 1])
+        and check_attribute_value(nodes[7], "perm", [0, 2, 1, 3])
+        and check_attribute_value(nodes[8], "perm", [0, 2, 1, 3])
+        and scale_value_1 == scale_value_2
+    ):
+        return [], [], []
+
+    nodes_to_add, new_value_infos = _make_efficient_attention_nodes(
+        idx,
+        nodes[3].input[0],
+        nodes[4].input[0],
+        nodes[7].input[0],
+        nodes[8].output[0],
+        nodes[20].input[0],
+        nodes[17].output[0],
+        nodes[18].output[0],
+        nodes[21].output[0],
+        "",
+        False,
+        scale_value_1,
+        0.0,
+        False,
+    )
+    return nodes, nodes_to_add, new_value_infos
+
+
+# Has causal mask, no attention mask, without Dropout.
+_PATTERN_3: List[Tuple[str, bool, List[Tuple[int, int, int]]]] = [
+    ("MatMul", False, []),  # 0
+    ("Mul", True, [(0, 0, 0)]),  # 1
+    ("Mul", True, [(0, 0, 1)]),  # 2
+    ("Transpose", True, [(1, 0, 0)]),  # 3
+    ("Transpose", True, [(2, 0, 0)]),  # 4
+    ("Add", False, [(0, 0, 0)]),  # 5
+    ("Slice", True, [(5, 0, 1)]),  # 6
+    ("Slice", True, [(6, 0, 0)]),  # 7
+    ("Unsqueeze", True, [(6, 0, 2)]),  # 8
+    ("Gather", True, [(8, 0, 0)]),  # 9
+    ("Shape", True, [(9, 0, 0)]),  # 10
+    ("Softmax", False, [(5, 0, 0)]),  # 11
+    ("MatMul", False, [(11, 0, 0)]),  # 12
+    ("Transpose", True, [(12, 0, 1)]),  # 13
+    ("Transpose", False, [(12, 0, 0)]),  # 14
+    ("FusedMatMul", False, [(13, 0, 1)]),  # 15
+    ("SoftmaxGrad_13", False, [(15, 0, 0), (11, 0, 1)]),  # 16
+    ("Identity", False, [(16, 0, 0)]),  # 17
+    ("FusedMatMul", False, [(2, 0, 1), (17, 0, 0)]),  # 18
+    ("FusedMatMul", False, [(1, 0, 0), (17, 0, 1)]),  # 19
+    ("Mul", False, [(18, 0, 0)]),  # 20
+    ("Mul", False, [(19, 0, 0)]),  # 21
+    ("Identity", False, [(20, 0, 0)]),  # 22
+    ("Identity", False, [(21, 0, 0)]),  # 23
+    ("Transpose", False, [(22, 0, 0)]),  # 24
+    ("Transpose", False, [(23, 0, 0)]),  # 25
+    ("FusedMatMul", False, [(11, 0, 0)]),  # 26
+    ("Transpose", True, [(26, 0, 1)]),  # 27
+    ("Transpose", False, [(26, 0, 0)]),  # 28
+]
+
+
+def _optimize_for_pattern_3(matcher: GraphMatcher, idx: int, nodes: List[NodeProto]):
+    # Check forward only as the backward is expected to be consistent if it's built correctly.
+    scale_value_1 = matcher.get_constant_value(nodes[1].input[1])
+    scale_value_1 = scale_value_1[0] if isinstance(scale_value_1, list) else scale_value_1
+    scale_value_2 = matcher.get_constant_value(nodes[2].input[1])
+    scale_value_2 = scale_value_2[0] if isinstance(scale_value_2, list) else scale_value_2
+    if not (
+        check_attribute_value(nodes[3], "perm", [0, 2, 1, 3])
+        and check_attribute_value(nodes[4], "perm", [0, 2, 3, 1])
+        and check_attribute_value(nodes[13], "perm", [0, 2, 1, 3])
+        and check_attribute_value(nodes[14], "perm", [0, 2, 1, 3])
+        and scale_value_1 == scale_value_2
+    ):
+        return [], [], []
+
+    nodes_to_add, new_value_infos = _make_efficient_attention_nodes(
+        idx,
+        nodes[3].input[0],
+        nodes[4].input[0],
+        nodes[13].input[0],
+        nodes[14].output[0],
+        nodes[27].input[0],
+        nodes[24].output[0],
+        nodes[25].output[0],
+        nodes[28].output[0],
+        "",
+        False,
+        scale_value_1,
+        0.0,
+        True,
+    )
+    return nodes, nodes_to_add, new_value_infos
+
+
+_PATTERNS = [
+    (_PATTERN_0, _optimize_for_pattern_0),
+    (_PATTERN_1, _optimize_for_pattern_1),
+    (_PATTERN_2, _optimize_for_pattern_2),
+    (_PATTERN_3, _optimize_for_pattern_3),
+]
+
+
+@register_graph_optimizer(devices="cuda")
+def optimize_graph_for_aten_efficient_attention(graph: GraphProto):
+    nodes_to_remove = []
+    nodes_to_add = []
+    new_value_infos = []
+    matcher = GraphMatcher(graph)
+    idx = 0
+    for pattern_tuple in _PATTERNS:
+        for nodes in matcher.match_pattern(pattern_tuple[0]):
+            remove_nodes, add_nodes, add_value_infos = pattern_tuple[1](matcher, idx, nodes)
+            if len(add_nodes) > 0:
+                nodes_to_remove.extend(remove_nodes)
+                nodes_to_add.extend(add_nodes)
+                new_value_infos.extend(add_value_infos)
+                idx += 1
+    update_graph(graph, nodes_to_remove, nodes_to_add, new_value_infos)
diff --git a/orttraining/orttraining/python/training/ortmodule/graph_optimizers/utils.py b/orttraining/orttraining/python/training/ortmodule/graph_optimizers/utils.py
new file mode 100644
index 0000000000000..e6e5ce56773e1
--- /dev/null
+++ b/orttraining/orttraining/python/training/ortmodule/graph_optimizers/utils.py
@@ -0,0 +1,178 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+import itertools
+from typing import Any, Dict, List, Sequence, Tuple
+
+import numpy as np
+from onnx import GraphProto, NodeProto, TensorProto, helper, numpy_helper
+
+
+def _get_attribute(node: NodeProto, attr_name: str, default_value: Any = None) -> Any:
+    """Get attribute value from node by attribute key."""
+    found = [attr for attr in node.attribute if attr.name == attr_name]
+    if found:
+        return helper.get_attribute_value(found[0])
+    return default_value
+
+
+def _to_numpy_array(node: Any) -> np.ndarray:
+    """Convert Constant node or TensorProto to Python value."""
+    tensor = node
+    if isinstance(node, NodeProto):
+        tensor = _get_attribute(node, "value")
+    assert isinstance(tensor, TensorProto)
+    return numpy_helper.to_array(tensor).tolist()
+
+
+class GraphMatcher:
+    """Sub-graph matcher with given pattern.
+
+    GraphMatcher takes an ONNX graph to initialize. It tries to match sub-graphs to a given pattern and yield
+    matched sub-graphs (a list of matched nodes for each sub-graph) one by one.
+
+    Pattern is described by a list. Each entry of the list is a Tuple:
+
+        Tuple[str, bool, List[Tuple[int, int, int]]], e.g., ("FusedMatMul", False, [(2, 0, 1), (15, 0, 0)])
+
+        * First string is the Op type, e.g., "FusedMatMul".
+        * Second bool indicates it's producer node or consumer node for source node.
+        * There is a list to describe the edge infos of this node to other nodes, each edge is a tuple with 3 integers,
+          first integer is the index of the target node in the list, second integer is the output index of the edge,
+          and thrid integer is the input index of the edge.
+
+    For each entry, GraphMatcher used the first edge to lookup target node, and try to use make sure the sug-graph also
+    matches rest edge infos.
+
+    Note that when lookup target node, it will only take the first matched node as target node. For example, if a source
+    node has multiple "MatMul" consumers nodes comsuming same output, only the first "MatMul" node will be returned.
+    You need to avoid using such confusing edge info as the first edge info for node lookup. Try to use other edge to
+    avoid such confusion if possible.
+    """
+
+    def __init__(self, graph: GraphProto):
+        self._graph: GraphProto = graph
+        self._op_type_to_nodes: Dict[str, List[NodeProto]] = {}
+        self._consumer_count: Dict[str, int] = {}
+        for node in graph.node:
+            if node.op_type not in self._op_type_to_nodes:
+                self._op_type_to_nodes[node.op_type] = []
+            self._op_type_to_nodes[node.op_type].append(node)
+            for input in node.input:
+                self._consumer_count[input] = self._consumer_count.get(input, 0) + 1
+
+    def _get_producer(self, arg: str, op_type: str, output_idx: int):
+        for node in self._op_type_to_nodes.get(op_type, []):
+            if (output_idx >= 0 and len(node.output) > output_idx and node.output[output_idx] == arg) or (
+                output_idx == -1 and arg in node.output
+            ):
+                return node
+        return None
+
+    def _get_consumer(self, arg: str, op_type: str, input_idx: int):
+        for node in self._op_type_to_nodes.get(op_type, []):
+            if (input_idx >= 0 and len(node.input) > input_idx and node.input[input_idx] == arg) or (
+                input_idx == -1 and arg in node.input
+            ):
+                return node
+        return None
+
+    def get_consumer_count(self, arg: str):
+        return self._consumer_count.get(arg, 0)
+
+    def get_constant_value(self, arg: str):
+        node_or_initializer = None
+        if "Constant" in self._op_type_to_nodes:
+            for node in self._op_type_to_nodes["Constant"]:
+                if arg in node.output:
+                    node_or_initializer = node
+                    break
+        if node_or_initializer is None:
+            for initializer in self._graph.initializer:
+                if arg == initializer.name:
+                    node_or_initializer = initializer
+                    break
+        if node_or_initializer is None:
+            return None
+        return _to_numpy_array(node_or_initializer)
+
+    def get_type_and_shape(self, arg: str):
+        value_infos = [
+            value_info
+            for value_info in itertools.chain(self._graph.input, self._graph.value_info)
+            if value_info.name == arg
+        ]
+        if len(value_infos) > 0 and value_infos[0].type.tensor_type.HasField("shape"):
+            shape = []
+            for dim in value_infos[0].type.tensor_type.shape.dim:
+                if dim.dim_param:
+                    shape.append(dim.dim_param)
+                else:
+                    shape.append(dim.dim_value)
+            return value_infos[0].type.tensor_type.elem_type, shape
+        initializers = [initializer for initializer in self._graph.initializer if initializer.name == arg]
+        if len(initializers) > 0:
+            return initializers[0].data_type, initializers[0].dims
+        return None, None
+
+    def _match_pattern(self, node: NodeProto, pattern: List[Tuple[str, bool, List[Tuple[int, int, int]]]]):
+        nodes = [node]
+        for i in range(1, len(pattern)):
+            next_op_type = pattern[i][0]
+            is_producer = pattern[i][1]
+            node_idx, output_idx, input_idx = pattern[i][2][0]
+            next_node = (
+                self._get_producer(nodes[node_idx].input[input_idx], next_op_type, output_idx)
+                if is_producer
+                else self._get_consumer(nodes[node_idx].output[output_idx], next_op_type, input_idx)
+            )
+            if next_node is None:
+                return []
+            for j in range(1, len(pattern[i][2])):
+                node_idx, output_idx, input_idx = pattern[i][2][j]
+                assert output_idx >= 0 and input_idx >= 0
+                if (not is_producer and nodes[node_idx].output[output_idx] != next_node.input[input_idx]) or (
+                    is_producer and next_node.output[output_idx] != nodes[node_idx].input[input_idx]
+                ):
+                    return []
+            nodes.append(next_node)
+        return nodes
+
+    def match_pattern(self, pattern: List[Tuple[str, bool, List[Tuple[int, int, int]]]]):
+        for node in self._op_type_to_nodes.get(pattern[0][0], []):
+            result = self._match_pattern(node, pattern)
+            if len(result) == len(pattern):
+                yield result
+
+
+def check_attribute_value(node: NodeProto, attr_name: str, expected_value: Any):
+    """Check if the attribute of given node has expected value."""
+    value = _get_attribute(node, attr_name)
+    return value == expected_value
+
+
+def make_constant_node(name: str, dtype: TensorProto.DataType, dims: Sequence[int], vals: Any):
+    """Create a constant node with given constant tensor (data type, shape, and data)."""
+    return helper.make_node(
+        "Constant",
+        inputs=[],
+        outputs=[name],
+        value=helper.make_tensor(name=name, data_type=dtype, dims=dims, vals=vals),
+    )
+
+
+def update_graph(
+    graph: GraphProto,
+    nodes_to_remove: List[NodeProto],
+    nodes_to_add: List[NodeProto],
+    new_value_infos: List[TensorProto] = [],  # noqa: B006
+):
+    """Update an ONNX graph by removing some nodes, and adding some new nodes and value infos."""
+    nodes = [node for node in graph.node if node not in nodes_to_remove]
+    nodes.extend(nodes_to_add)
+    graph.ClearField("node")
+    graph.node.extend(nodes)
+    if len(new_value_infos) > 0:
+        graph.value_info.extend(new_value_infos)
diff --git a/orttraining/orttraining/python/training/ortmodule/graph_transformer_registry.py b/orttraining/orttraining/python/training/ortmodule/graph_transformer_registry.py
deleted file mode 100644
index 70056179c140e..0000000000000
--- a/orttraining/orttraining/python/training/ortmodule/graph_transformer_registry.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# -------------------------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
-# --------------------------------------------------------------------------
-
-from typing import Callable
-
-from onnx.onnx_ml_pb2 import GraphProto
-
-
-class GraphTransformerRegistry:
-    _TRANSFORMER_FUNCS = {}  # noqa: RUF012
-
-    @classmethod
-    def register(cls, target_modules: str, devices: str, priority: int, fn: Callable[[GraphProto], None]):
-        modules = []
-        if target_modules == "all":
-            modules.append("all")
-        else:
-            modules = target_modules.split("|")
-        for module in modules:
-            if module in cls._TRANSFORMER_FUNCS:
-                cls._TRANSFORMER_FUNCS[module].append((fn, devices, priority))
-            else:
-                cls._TRANSFORMER_FUNCS[module] = [(fn, devices, priority)]
-
-    @classmethod
-    def transform_all(cls, module_name: str, device: str, graph: GraphProto):
-        transformers_to_apply = []
-        if "all" in cls._TRANSFORMER_FUNCS:
-            transformers_to_apply.extend(cls._TRANSFORMER_FUNCS["all"])
-        if module_name in cls._TRANSFORMER_FUNCS:
-            transformers_to_apply.extend(cls._TRANSFORMER_FUNCS[module_name])
-        transformers_to_apply = [x for x in transformers_to_apply if x[1] == "all" or device in x[1]]
-        transformers_to_apply.sort(key=lambda x: x[2], reverse=True)
-        for fn, _, _ in transformers_to_apply:
-            fn(graph)
-
-
-# target_modules can be multiple module names separated by "|", or "all" means apply to all modules.
-# devices can be multiple device types separated by "|" or "all" means apply to all devices.
-def register_graph_transformer(target_modules: str = "all", devices: str = "all", priority: int = 0):
-    def graph_transformer_wrapper(fn):
-        GraphTransformerRegistry.register(target_modules, devices, priority, fn)
-        return fn
-
-    return graph_transformer_wrapper
diff --git a/orttraining/orttraining/python/training/ortmodule/options.py b/orttraining/orttraining/python/training/ortmodule/options.py
index 0eb6790d7a462..77022f86d3ff3 100644
--- a/orttraining/orttraining/python/training/ortmodule/options.py
+++ b/orttraining/orttraining/python/training/ortmodule/options.py
@@ -137,7 +137,7 @@ def logging(self):
     def torch_exporter_filter(self):
         """Accessor for the filter export logs configuration."""
         torch_version = get_runtime_pytorch_version()
-        if self.log_level >= LogLevel.INFO:
+        if self.log_level > LogLevel.DEVINFO:
             if torch_version < version.parse("2.0"):
                 return [
                     # WARNING: The shape inference of com.microsoft::SoftmaxCrossEntropyLossInternal type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function.
@@ -167,11 +167,6 @@ def torch_exporter_filter(self):
     @property
     def onnxruntime_log_filter(self):
         """Accessor for the filter onnxruntime logs configuration."""
-        if self.log_level >= LogLevel.INFO:
-            return [
-                "CleanUnusedInitializersAndNodeArgs] Removing initializer",
-                "Serializing optimized model with Graph Optimization level greater than ORT_ENABLE_EXTENDED",
-            ]
         return None
 
 
@@ -267,7 +262,7 @@ def __init__(self, logger: Logger):
 
         # Configuration for dev tools.
         self.print_input_density = False
-        self.print_memory_stat = False
+        self.print_memory_stat_by_step = False
 
         # Configuration for fallback.
         self.fallback_policy = ortmodule.ORTMODULE_FALLBACK_POLICY
@@ -326,7 +321,7 @@ def _override_from_env_vars(self):
         if "ORTMODULE_PRINT_INPUT_DENSITY" in os.environ:
             self.print_input_density = int(os.getenv("ORTMODULE_PRINT_INPUT_DENSITY")) == 1
         if "ORTMODULE_PRINT_MEMORY_STATS" in os.environ:
-            self.print_memory_stat = int(os.getenv("ORTMODULE_PRINT_MEMORY_STATS")) == 1
+            self.print_memory_stat_by_step = int(os.getenv("ORTMODULE_PRINT_MEMORY_STATS")) == 1
 
         # Configuration for fallback.
         if "ORTMODULE_FALLBACK_POLICY" in os.environ:
@@ -366,7 +361,7 @@ def _override_from_env_vars(self):
 
         # Cache exported model
         if "ORTMODULE_CACHE_DIR" in os.environ:
-            self._logger.info("ORTModule cache optimization is ON.")
+            self._logger.warning("ORTModule optimization for caching exported model is ON.")
             self.ortmodule_cache_dir = os.getenv("ORTMODULE_CACHE_DIR")
 
         # Experimental features.
diff --git a/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cpu/torch_interop_utils/torch_interop_utils.cc b/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cpu/torch_interop_utils/torch_interop_utils.cc
index e55aacb2334b2..d36720100e57a 100644
--- a/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cpu/torch_interop_utils/torch_interop_utils.cc
+++ b/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cpu/torch_interop_utils/torch_interop_utils.cc
@@ -150,6 +150,34 @@ bool get_materialize_grads(at::Tensor target) {
   return py_fn->materialize_grads;
 }
 
+std::vector<bool> are_tensors_marked_as_dirty(at::Tensor target, std::vector<at::Tensor> tensors_to_check) {
+  torch::autograd::AutogradMeta* autograd_meta = torch::autograd::impl::get_autograd_meta(target);
+  const auto& grad_fn = autograd_meta->grad_fn_;
+  auto py_node_fn = dynamic_cast<torch::autograd::PyNode*>(grad_fn.get());
+  TORCH_CHECK(py_node_fn != nullptr, "grad_fn is not PyNode type.");
+  THPFunction* py_fn = (THPFunction*)py_node_fn->obj;
+  std::vector<bool> are_tensors_marked_dirty(tensors_to_check.size(), false);
+  if (!py_fn->dirty_tensors)
+    return are_tensors_marked_dirty;
+
+  Py_ssize_t num_dirty = PyTuple_GET_SIZE(py_fn->dirty_tensors);
+  for (const auto j : c10::irange(tensors_to_check.size())) {
+    bool is_tensor_marked_dirty = false;
+    for (const auto i : c10::irange(num_dirty)) {
+      PyObject* obj = PyTuple_GET_ITEM(py_fn->dirty_tensors, i);
+      const auto& tensor = THPVariable_Unpack(obj);
+      if (tensor.is_same(tensors_to_check[j])) {
+        is_tensor_marked_dirty = true;
+        break;
+      }
+    }
+
+    are_tensors_marked_dirty[j] = is_tensor_marked_dirty;
+  }
+
+  return are_tensors_marked_dirty;
+}
+
 PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
   m.def("register_grad_fn_and_remove_from_autograd", &register_grad_fn_and_remove_from_autograd,
         "Increase grad_fn shared pointer reference.");
@@ -158,4 +186,5 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
   m.def("clear_grad_fns_for_next_edges", &clear_grad_fns_for_next_edges,
         "Remove reference on next edges' gradient functions.");
   m.def("get_materialize_grads", &get_materialize_grads, "Return whether materialize_grads is enabled or not.");
+  m.def("are_tensors_marked_as_dirty", &are_tensors_marked_as_dirty, "Return whether the tensors are marked dirty or not.");
 }
diff --git a/orttraining/orttraining/python/training/orttrainer.py b/orttraining/orttraining/python/training/orttrainer.py
deleted file mode 100644
index a6c6c8af2723b..0000000000000
--- a/orttraining/orttraining/python/training/orttrainer.py
+++ /dev/null
@@ -1,1537 +0,0 @@
-import copy
-import io
-import os
-import warnings
-from functools import partial
-from inspect import signature
-
-import numpy as np
-import onnx
-import torch
-
-import onnxruntime as ort
-from onnxruntime.tools.symbolic_shape_infer import SymbolicShapeInference
-
-from . import _checkpoint_storage, _utils, amp, checkpoint, optim, postprocess
-from .model_desc_validation import _ORTTrainerModelDesc
-from .orttrainer_options import ORTTrainerOptions
-
-
-class TrainStepInfo:
-    r"""Private class used to store runtime information from current train step.
-
-    After every train step, :py:meth:`ORTTrainer.train_step` updates the internal instance of
-    :py:class:`.TrainStepInfo` residing on :py:class:`.ORTTrainer` with relevant information
-    from the forward pass.
-
-    This class shouldn't be accessed directly by the user, unless they really know what they are doing.
-    Instead, :py:class:`.ORTTrainer` passes it to relevant class methods automatically,
-    such as :py:method:`._LRScheduler.get_lr` or :py:class:`.LossScaler.update`.
-
-    Args:
-        optimizer_config (optim._OptimizerConfig): reference to optimizer config
-        all_finite (bool, default is True): flag that indicates whether all gradients are still finite after last step
-        fetches (list of str, default is []): list of output names to fetch from train_step/eval_step. Set it to [] to reset normal behavior.
-        optimization_step (int): indicates the number of optimizations performed. Used for learning rate scheduling
-        step (int): indicates current training step. Used for gradient accumulation
-
-    Example:
-
-        .. code-block:: python
-
-            info = TrainStepInfo(optimizer_config=optim.SGDConfig(lr=0.01))
-            if info.all_finite:
-                print(f'Yay, all gradients are finite at {step} step!')
-
-    """
-
-    def __init__(self, optimizer_config, all_finite=True, fetches=[], optimization_step=0, step=0):  # noqa: B006
-        assert isinstance(optimizer_config, optim._OptimizerConfig), "optimizer_config must be a optim._OptimizerConfig"
-        assert isinstance(all_finite, bool), "all_finite must be a bool"
-        assert isinstance(fetches, list) and all(
-            [isinstance(item, str) for item in fetches]
-        ), "fetches must be a list of str"
-        assert isinstance(optimization_step, int) and optimization_step >= 0, "optimization_step must be a positive int"
-        assert isinstance(step, int) and step >= 0, "step must be a positive int"
-
-        self.optimizer_config = optimizer_config
-        self.all_finite = all_finite
-        self.fetches = fetches
-        self.optimization_step = optimization_step
-        self.step = step
-
-
-class ORTTrainer:
-    r"""Pytorch frontend for ONNX Runtime training
-
-    Entry point that exposes the C++ backend of ORT as a Pytorch frontend.
-
-    Args:
-        model (torch.nn.Module or onnx.ModelProto): either a PyTorch or ONNX model.
-            When a PyTorch model and :py:attr:`loss_fn` are specified, :py:attr:`model` and :py:obj:`loss_fn` are combined.
-            When a ONNX model is provided, the loss is identified by the flag :py:obj:`is_loss=True` in one of the :py:attr:`.model_desc.outputs` entries.
-        model_desc (dict): model input and output description.
-            This is used to identify inputs and outputs and their shapes, so that ORT can generate back propagation graph, plan memory allocation for
-            training, and perform optimizations.
-            :py:attr:`model_desc` must be consistent with the training :py:attr:`model` and have the following (:py:obj:`dict`) schema
-            :py:obj:`{ 'inputs': [tuple(name, shape)], 'outputs': [tuple(name, shape, is_loss)]}`.
-            :py:attr:`name` is a string representing the name of input or output of the model.
-            For :py:obj:`model_desc['inputs']` entries, :py:attr:`name` must match input names of the original PyTorch model's :py:meth:`torch.nn.Module.forward` method.
-            For ONNX models, both name and order of input names must match.
-            For :py:obj:`model_desc['outputs']` entries, the order must match the original PyTorch's output as returned by :py:meth:`torch.nn.Module.forward` method.
-            For ONNX models, both name and order of output names must match.
-            :py:attr:`shape` is a list of string or integers that describes the shape of the input/output.
-            Each dimension size can be either a string or an int. String means the dimension size is dynamic, while integers mean static dimensions.
-            An empty list implies a scalar.
-            Lastly, :py:attr:`is_loss` is a boolean (default is False) that flags if this output is considered a loss.
-            ORT backend needs to know which output is loss in order to generate back propagation graph.
-            Loss output must be specified when either :py:attr:`loss_fn` is specified or when loss is embedded in the model.
-            Note that only one loss output is supported per model.
-        optimizer_config (optim._OptimizerConfig): optimizer config.
-            One of :py:class:`.optim.AdamConfig`, :py:class:`.optim.LambConfig` or :py:class:`.optim.SGDConfig`.
-        loss_fn (callable, default is None): a PyTorch loss function.
-            It takes two inputs [prediction, label] and outputs a scalar loss tensor.
-            If provided, :py:attr:`loss_fn` is combined with the PyTorch :py:attr:`model` to form a combined PyTorch model.
-            Inputs to the combined PyTorch model are concatenation of the :py:attr:`model`'s input and :py:attr:`loss_fn`'s label input.
-            Outputs of the combined PyTorch model are concatenation of :py:attr:`loss_fn`'s loss output and :py:attr:`model`'s outputs.
-        options (ORTTrainerOptions, default is None): options for additional features.
-    Example:
-
-        .. code-block:: python
-
-            model = ...
-            loss_fn = ...
-            model_desc = {
-                "inputs": [
-                    ("input_ids", ["batch", "max_seq_len_in_batch"]),
-                    ("attention_mask", ["batch", "max_seq_len_in_batch"]),
-                    ("token_type_ids", ["batch", "max_seq_len_in_batch"]),
-                    ("masked_lm_labels", ["batch", "max_seq_len_in_batch"]),
-                    ("next_sentence_label", ["batch", 1])
-                ],
-                "outputs": [
-                    ("loss", [], True),
-                ],
-            }
-            optim_config = optim.LambConfig(param_groups = [ { 'params' : ['model_param0'], 'alpha' : 0.8, 'beta' : 0.7},
-                                                             { 'params' : ['model_param1' , 'model_param_2'], 'alpha' : 0.0}
-                                                           ],
-                                            alpha=0.9, beta=0.999)
-            ort_trainer = ORTTrainer(model, model_desc, optim_config, loss_fn)
-    """
-
-    def __init__(self, model, model_desc, optim_config, loss_fn=None, options=None):
-        warnings.warn(
-            "ORTTrainer is deprecated and will be removed in ort release 1.14. Please use ORTModule instead.",
-            FutureWarning,
-        )
-
-        assert model is not None, "'model' is required and must be either a 'torch.nn.Module' or ONNX model"
-        assert isinstance(model_desc, dict), "'model_desc' must be a 'dict'"
-        assert isinstance(
-            optim_config, optim._OptimizerConfig
-        ), "'optim_config' is required and must be any of 'AdamConfig', 'LambConfig' or 'SGDConfig'"
-        assert loss_fn is None or (
-            callable(loss_fn) and len(signature(loss_fn).parameters) == 2
-        ), "'loss_fn' must be either 'None' or a callable with two parameters"
-        assert options is None or isinstance(
-            options, ORTTrainerOptions
-        ), "'options' must be either 'None' or 'ORTTrainerOptions'"
-
-        #            Model + Loss validation
-        #           Supported combinarios are
-        #    ----------------------------------------
-        #   |   | Model            | Loss            |
-        #    ----------------------------------------
-        #   | 1 | torch.nn.Module  | None            |
-        #   | 2 | torch.nn.Module  | torch.nn.Module |
-        #   | 3 | ONNX             | None            |
-        #    ----------------------------------------
-        self._torch_model = None
-        self._onnx_model = None
-        if isinstance(model, torch.nn.Module):
-            assert loss_fn is None or isinstance(
-                model, torch.nn.Module
-            ), "'loss_fn' must be either 'None' or 'torch.nn.Module'"
-            self._torch_model = model
-            self.loss_fn = loss_fn
-            # TODO: Remove when experimental checkpoint functions are removed.
-            self._torch_state_dict_keys = list(model.state_dict().keys())
-        elif isinstance(model, onnx.ModelProto):
-            assert loss_fn is None, "'loss_fn' must not be specified when 'model' is an ONNX model"
-            self._onnx_model = model
-            self.loss_fn = None
-        else:
-            raise ValueError("'model' must be either 'torch.nn.Module' or 'onnx.ModelProto'")
-
-        self.model_desc = _ORTTrainerModelDesc(model_desc)
-        self.optim_config = optim_config
-
-        # ORTTrainerOptions
-        if not options:
-            options = ORTTrainerOptions()
-        self.options = options
-        if self.options.mixed_precision.enabled and not self.options.mixed_precision.loss_scaler:
-            # TODO: Move this to model_desc_validation.py
-            self.options.mixed_precision.loss_scaler = amp.loss_scaler.DynamicLossScaler()
-        # Post processing ONNX model given as input
-        if self._onnx_model:
-            if self.options._internal_use.enable_internal_postprocess:
-                self._onnx_model = postprocess.run_postprocess(self._onnx_model)
-            if self.options._internal_use.extra_postprocess:
-                self._onnx_model = self.options._internal_use.extra_postprocess(self._onnx_model)
-                assert isinstance(self._onnx_model, onnx.ModelProto), "'extra_postprocess' must return a ONNX model"
-
-            # When input model is already ONNX (and not exported from Pytorch within ORTTrainer),
-            # append 'dtype' from ONNX into model description's
-            for idx_i, i_desc in enumerate(self.model_desc.inputs):
-                dtype = None
-                for onnx_input in self._onnx_model.graph.input:
-                    if onnx_input.name == i_desc.name:
-                        dtype = _utils.dtype_onnx_to_torch(onnx_input.type.tensor_type.elem_type)
-                        self.model_desc.add_type_to_input_description(idx_i, dtype)
-                        break
-                assert dtype is not None, f"ONNX model with unknown input type ({i_desc.name})"
-            for idx_o, o_desc in enumerate(self.model_desc.outputs):
-                dtype = None
-                for onnx_output in self._onnx_model.graph.output:
-                    if onnx_output.name == o_desc.name:
-                        dtype = _utils.dtype_onnx_to_torch(onnx_output.type.tensor_type.elem_type)
-                        self.model_desc.add_type_to_output_description(idx_o, dtype)
-                        break
-                assert dtype is not None, f"ONNX model with unknown output type ({o_desc.name})"
-
-        try:
-            from torch.utils.cpp_extension import ROCM_HOME
-
-            self.is_rocm_pytorch = bool(torch.version.hip is not None and ROCM_HOME is not None)
-        except ImportError:
-            self.is_rocm_pytorch = False
-
-        # TODO: Remove when experimental checkpoint functions are removed.
-        self._state_dict = {}
-
-        self._train_step_info = TrainStepInfo(self.optim_config)
-        self._training_session = None
-        self._load_state_dict = None
-        self._init_session(
-            provider_options=self.options._validated_opts["provider_options"],
-            session_options=self.options.session_options,
-        )
-
-    def eval_step(self, *args, **kwargs):
-        r"""Evaluation step method
-
-        Args:
-            *args: Arbitrary arguments that are used as model input (data only)
-            **kwargs: Arbitrary keyword arguments that are used as model input (data only)
-
-        Returns:
-            ordered :py:obj:`list` with model outputs as described by :py:attr:`.ORTTrainer.model_desc`
-        """
-        # Get data. CombineTorchModelLossFn takes label as last input and outputs loss first
-        sample_input = self._prepare_model_input(self.model_desc.inputs, None, None, *args, **kwargs)
-
-        # Export model to ONNX
-        if self._onnx_model is None:
-            if self._torch_model is not None:
-                self._init_onnx_model(sample_input)
-            else:
-                raise RuntimeError("Model is uninitialized. Only ONNX and PyTorch models are supported")
-
-        # Prepare input/output description
-        inputs_desc = self.model_desc.inputs
-        outputs_desc = self.model_desc.outputs
-        if self._train_step_info.fetches:
-            outputs_desc = [o_desc for o_desc in outputs_desc if o_desc.name in self._train_step_info.fetches]
-            if len(outputs_desc) != len(self._train_step_info.fetches):
-                raise RuntimeError("The specified fetches list contains invalid output names")
-
-        # Normalize input
-        if not isinstance(sample_input, (list, tuple)):
-            sample_input = (sample_input,)
-
-        # RunOptions
-        run_options = ort.RunOptions()
-        run_options.only_execute_path_to_fetches = True
-        run_options.training_mode = False
-
-        # Run a eval step and return
-        session_run_results = self._training_session_run_helper(
-            False, sample_input, inputs_desc, outputs_desc, run_options
-        )
-
-        # Output must be returned in the same order as defined in the model description
-        results = [session_run_results[o_desc.name] for o_desc in outputs_desc]
-        return results[0] if len(results) == 1 else results
-
-    def save_as_onnx(self, path):
-        r"""Persists ONNX model into :py:attr:`path`
-
-        The model will be saved as a Google Protocol Buffers (aka protobuf) file as per ONNX standard.
-        The graph includes full information, including inference and training metadata.
-
-        Args:
-            path (str): Full path, including filename, to save the ONNX model in the filesystem
-
-        Raises:
-            RuntimeWarning: raised when neither `train_step` or `eval_step` was called at least once
-            ValueError: raised when `path` is not valid path
-        """
-        if not self._training_session:
-            warnings.warn(
-                "Training session is not initialized yet. "
-                "'train_step' or 'eval_step' methods must be executed at least once before calling 'save_as_onnx()'."
-            )
-            return
-        state_tensors = self._training_session.get_state()
-        self._update_onnx_model_initializers(state_tensors)
-
-        assert isinstance(path, str), "'path' must be a valid path string"
-        dir_name = os.path.dirname(path)
-        file_name = os.path.basename(path)
-        if (dir_name and not os.path.exists(dir_name)) or not file_name:
-            warnings.warn("'path' is not valid or does not exist")
-            return
-
-        with open(path, "wb") as f:
-            f.write(self._onnx_model.SerializeToString())
-
-    def _check_model_export(self, input):
-        from numpy.testing import assert_allclose
-        from onnx import TensorProto, helper, numpy_helper  # noqa: F401
-
-        onnx_model_copy = copy.deepcopy(self._onnx_model)
-
-        # Mute the dropout nodes
-        dropout_nodes = [n for n in onnx_model_copy.graph.node if n.op_type == "Dropout"]
-        for node in dropout_nodes:
-            ratio_node = [n for n in onnx_model_copy.graph.node if node.input[1] in n.output][0]
-            training_mode_node = [n for n in onnx_model_copy.graph.node if node.input[2] in n.output][0]
-
-            training_mode_node.attribute.pop()
-            ratio_node.attribute.pop()
-            new_training_mode_arr = np.array(False, dtype=bool)
-            new_ratio_arr = np.array(0.0, dtype=np.float32)
-            new_training_mode = numpy_helper.from_array(new_training_mode_arr)
-            new_ratio = numpy_helper.from_array(new_ratio_arr)
-            training_mode_node.attribute.add().t.CopyFrom(new_training_mode)
-            ratio_node.attribute.add().t.CopyFrom(new_ratio)
-            training_mode_node.attribute[0].type = 4
-            ratio_node.attribute[0].type = 4
-            training_mode_node.attribute[0].name = "value"
-            ratio_node.attribute[0].name = "value"
-
-        _inference_sess = ort.InferenceSession(
-            onnx_model_copy.SerializeToString(), providers=ort.get_available_providers()
-        )
-        inf_inputs = {}
-        for i, input_elem in enumerate(input):
-            inf_inputs[_inference_sess.get_inputs()[i].name] = input_elem.cpu().numpy()
-        _inference_outs = _inference_sess.run(None, inf_inputs)
-        for torch_item, ort_item in zip(self.torch_sample_outputs, _inference_outs):
-            assert_allclose(
-                torch_item,
-                ort_item,
-                rtol=1e-2,
-                atol=1e-6,
-                err_msg="Mismatch between outputs of PyTorch model and exported ONNX model. "
-                "Note that different backends may exhibit small computational differences."
-                "If this is within acceptable margin, or if there is random generator "
-                "in the model causing inevitable mismatch, you can proceed training by "
-                "setting the flag debug.check_model_export to False.",
-            )
-
-    def train_step(self, *args, **kwargs):
-        r"""Train step method
-
-        After forward pass, an ordered list with all outputs described at :py:attr:`ORTTrainer.model_desc` is returned.
-        Additional information relevant to the train step is maintend by :py:attr:`ORTTrainer._train_step_info`.
-        See :py:class:`.TrainStepInfo` for details.
-
-        Args:
-            *args: Arbitrary arguments that are used as model input (data only)
-            **kwargs: Arbitrary keyword arguments that are used as model input (data only)
-
-        Returns:
-            ordered :py:obj:`list` with model outputs as described by :py:attr:`ORTTrainer.model_desc`
-        """
-        # Export model to ONNX
-        if self._onnx_model is None:
-            sample_input = self._prepare_model_input(self.model_desc.inputs, None, None, *args, **kwargs)
-            self._init_onnx_model(sample_input)
-
-            # Debug Model Export if indicated
-            if self.options.debug.check_model_export:
-                self._check_model_export(sample_input)
-
-        # Prepare inputs+lr and output descriptions
-        inputs_desc = self._model_desc_inputs_with_lr
-        outputs_desc = self.model_desc.outputs
-
-        # Train step must be incremented *before* gradient accumulation code
-        # Gradients are accumulated when
-        # self._train_step_info.step % self.options.batch.gradient_accumulation_steps != 0,
-        # and they are updated otherwise
-        self._train_step_info.step += 1
-
-        # RunOptions
-        run_options = None
-        mixed_precision_without_fetches = False
-        if self._train_step_info.fetches:
-            outputs_desc = [o_desc for o_desc in outputs_desc if o_desc.name in self._train_step_info.fetches]
-            if len(outputs_desc) != len(self._train_step_info.fetches):
-                raise RuntimeError("The specified fetches list contains invalid output names")
-        elif self._train_step_info.step % self.options.batch.gradient_accumulation_steps != 0:
-            run_options = ort.RunOptions()
-            run_options.only_execute_path_to_fetches = True
-            outputs_desc = self._model_desc_outputs_with_gradient_accumulation
-        elif self.options.mixed_precision.enabled:
-            mixed_precision_without_fetches = True
-            outputs_desc = self._model_desc_outputs_with_all_finite
-
-        # Update Learning Rate if Necessary
-        lr = self.optim_config.lr
-        if self.options.lr_scheduler:
-            lr = self.options.lr_scheduler._step(self._train_step_info)[0]
-
-        # Loss Scale for mixed precision
-        loss_scale = None
-        if self.options.mixed_precision.enabled:
-            loss_scaler = self.options.mixed_precision.loss_scaler
-            assert loss_scaler, "Loss scaler is required when mixed precision is enabled"
-            loss_scale = loss_scaler.loss_scale
-            inputs_desc = self._model_desc_inputs_with_lr_and_loss_scale
-
-        # Get data. CombineTorchModelLossFn takes label as last input and outputs loss first
-        input = self._prepare_model_input(inputs_desc, lr, loss_scale, *args, **kwargs)
-
-        # Normalize input
-        if not isinstance(args, (list, tuple)):
-            args = (args,)
-
-        # Run a train step and return
-        session_run_results = self._training_session_run_helper(True, input, inputs_desc, outputs_desc, run_options)
-        if mixed_precision_without_fetches:
-            # After session run with all_fp32_gradients_finite, we need to clear the training I/O binding's output
-            # Otherwise next run with only_execute_path_to_fetches will lead to gradient all reduce
-            # because all_fp32_gradients_finite is still in the feed.
-            self._train_io_binding.clear_binding_outputs()
-
-            is_all_finite = session_run_results[self.model_desc.all_finite.name]
-            self._train_step_info.all_finite = is_all_finite
-            if loss_scaler:
-                loss_scaler.update(self._train_step_info)
-            if is_all_finite:
-                # Optimization step must be incremented *after* optimization is successful
-                self._train_step_info.optimization_step += 1
-        elif self._train_step_info.step % self.options.batch.gradient_accumulation_steps == 0:
-            # Optimization step must be incremented *after* optimization is successful
-            self._train_step_info.optimization_step += 1
-
-        # Output must be returned in the same order as defined in the model description
-        # or in the order specified by TrainStepInfo.fetches, if applicable
-        if self._train_step_info.fetches:
-            results = [session_run_results[o_desc] for o_desc in self._train_step_info.fetches]
-        else:
-            results = [session_run_results[o_desc.name] for o_desc in self.model_desc.outputs]
-        return results[0] if len(results) == 1 else results
-
-    def _convert_torch_model_loss_fn_to_onnx(self, inputs, device):
-        # Dynamic axes
-        dynamic_axes = {}
-        for input in self.model_desc.inputs:
-            symbolic_axis = {}
-            for i, axis in enumerate(input.shape):
-                if isinstance(axis, str):
-                    symbolic_axis[i] = axis
-            if len(symbolic_axis):
-                dynamic_axes[input.name] = symbolic_axis
-        for output in self.model_desc.outputs:
-            symbolic_axis = {}
-            for i, axis in enumerate(output.shape):
-                if isinstance(axis, str):
-                    symbolic_axis[i] = axis
-            if len(symbolic_axis):
-                dynamic_axes[output.name] = symbolic_axis
-
-        if isinstance(inputs, torch.Tensor):
-            inputs = [inputs]
-        if isinstance(inputs, dict):
-            sample_inputs = [inputs[k.name_].to(device=device) for k in self.model_desc.inputs]
-        elif isinstance(inputs, (list, tuple)):
-            sample_inputs = [
-                input.to(device=device) for i, input in enumerate(inputs) if i < len(self.model_desc.inputs)
-            ]
-        else:
-            raise RuntimeError(
-                "Unexpected input type. Only torch.Tensor, or dict/list/tuple of torch.Tensor is supported."
-            )
-
-        # PyTorch ONNX exporter does not match argument names
-        # This is an issue because the ONNX graph depends on all inputs to be specified
-
-        # Validate loss_fn
-        if self.loss_fn:
-            sig_loss = signature(self.loss_fn)
-            if len(sig_loss.parameters) != 2:
-                raise RuntimeError("loss function should take two arguments - predict and label.")
-
-        # Basic input names from model
-        input_names = [input.name for input in self.model_desc.inputs]
-        sig = signature(self._torch_model.forward)
-        ordered_input_list = list(sig.parameters.keys())
-
-        # Label from loss_fn goes after model input
-        if self.loss_fn:
-            ordered_input_list = [*ordered_input_list, list(sig_loss.parameters.keys())[1]]
-
-        class CombineTorchModelLossFnWrapInput(torch.nn.Module):
-            def __init__(self, model, loss_fn, input_names):
-                super().__init__()
-                self.model = model
-                self.loss_fn = loss_fn
-                self.input_names = input_names
-
-            def forward(self, *inputs):
-                sig = signature(self.model.forward)
-
-                input_dict = {}
-                for key in sig.parameters:
-                    if key in self.input_names:
-                        input_dict[key] = inputs[self.input_names.index(key)]
-
-                model_out = self.model(**input_dict)
-                if self.loss_fn is None:
-                    return model_out
-
-                label = inputs[-1]
-                preds = model_out
-                return self.loss_fn(preds, label), preds
-
-        model = CombineTorchModelLossFnWrapInput(self._torch_model, self.loss_fn, input_names)
-
-        # Do an inference to grab output types
-        model.eval()
-        with torch.no_grad():
-            # Deepcopy inputs, since input values may change after model run.
-            sample_inputs_copy = copy.deepcopy(sample_inputs)
-            try:
-                # Deepcopy model, in case model is stateful and changes after model run.
-                model_copy = copy.deepcopy(model)
-            except Exception:
-                model_copy = model
-                warnings.warn(
-                    "This model cannot be deep copied (or pickled), which is a required step for stateful models to be properly exported to ONNX."
-                    " Compute will continue, but unexpected results may occur!"
-                )
-            sample_outputs = model_copy(*sample_inputs_copy)
-            self.torch_sample_outputs = sample_outputs
-        model.train()
-
-        if isinstance(sample_outputs, torch.Tensor):
-            sample_outputs = [sample_outputs]
-
-        # Append 'dtype' for model description's inputs/outputs
-        for idx_i, sample_input in enumerate(sample_inputs):
-            if idx_i < len(self.model_desc.inputs):
-                self.model_desc.add_type_to_input_description(idx_i, sample_input.dtype)
-        for idx_o, sample_output in enumerate(sample_outputs):
-            if idx_o < len(self.model_desc.outputs):
-                self.model_desc.add_type_to_output_description(idx_o, sample_output.dtype)
-
-        # Export the model to ONNX
-        f = io.BytesIO()
-
-        # Deepcopy inputs, since input values may change after model run.
-        sample_inputs_copy = copy.deepcopy(sample_inputs)
-
-        # Handle contrib OPs support
-        from onnxruntime.tools import pytorch_export_contrib_ops
-
-        if self.options._internal_use.enable_onnx_contrib_ops:
-            pytorch_export_contrib_ops.register()
-        else:
-            # Unregister in case they were registered in previous calls.
-            pytorch_export_contrib_ops.unregister()
-
-        # Export torch.nn.Module to ONNX
-        torch.onnx.export(
-            model,
-            tuple(sample_inputs_copy),
-            f,
-            input_names=[input.name for input in self.model_desc.inputs],
-            output_names=[output.name for output in self.model_desc.outputs],
-            opset_version=self.options._internal_use.onnx_opset_version,
-            dynamic_axes=dynamic_axes,
-            do_constant_folding=False,
-            training=torch.onnx.TrainingMode.TRAINING,
-        )
-        onnx_model = onnx.load_model_from_string(f.getvalue())
-
-        # Remove 'model.' prefix introduced by CombineTorchModelLossFn class
-        if isinstance(model, CombineTorchModelLossFnWrapInput):
-            replace_name_dict = {}
-            for n in onnx_model.graph.initializer:
-                if n.name.startswith("model."):
-                    replace_name_dict[n.name] = n.name[len("model.") :]
-                    n.name = replace_name_dict[n.name]
-            for n in onnx_model.graph.node:
-                for i, name in enumerate(n.input):
-                    if name in replace_name_dict:
-                        n.input[i] = replace_name_dict[name]
-
-        return onnx_model
-
-    def _create_ort_training_session(self, optimizer_state_dict=None, session_options=None, provider_options=None):
-        if optimizer_state_dict is None:
-            optimizer_state_dict = {}
-        # Validating frozen_weights names
-        unused_frozen_weights = [
-            n
-            for n in self.options.utils.frozen_weights
-            if n not in [i.name for i in self._onnx_model.graph.initializer]
-        ]
-        if unused_frozen_weights:
-            raise RuntimeError(f"{unused_frozen_weights} params from 'frozen_weights' not found in the ONNX model.")
-
-        # Get loss name from model description
-        loss_name = [item.name for item in self.model_desc.outputs if item.is_loss]
-        assert len(loss_name) == 1, f"Only one loss output is supported ({len(loss_name)} were specified)"
-        loss_name = loss_name[0]
-
-        # Parse optimizer parameters
-        optimizer_attributes_map = {}
-        optimizer_int_attributes_map = {}
-        trainable_params = set()
-        for initializer in self._onnx_model.graph.initializer:
-            if initializer.name in self.options.utils.frozen_weights:
-                continue  # only trainable parameters are passed to the backend
-            trainable_params.add(initializer.name)
-            optimizer_attributes_map[initializer.name] = {}
-            optimizer_int_attributes_map[initializer.name] = {}
-            not_in_param_groups = True
-            for param_group in self.optim_config.params:
-                if initializer.name not in param_group["params"]:
-                    continue  # keep looking for a matching param_group
-                not_in_param_groups = False
-                for k, v in param_group.items():
-                    # 'params' is not a hyper parameter, skip it. 'lr' per weight is not supported
-                    if k == "params" or k == "lr":
-                        continue
-                    if isinstance(v, float):
-                        optimizer_attributes_map[initializer.name][k] = v
-                    elif isinstance(v, int):
-                        optimizer_int_attributes_map[initializer.name][k] = v
-                    else:
-                        raise ValueError("Optimizer attributes must be either float or int.")
-
-            # set default values for params not found in groups
-            if not_in_param_groups:
-                for k, v in self.optim_config.defaults.items():
-                    if k == "lr":
-                        continue
-                    if isinstance(v, float):
-                        optimizer_attributes_map[initializer.name][k] = v
-                    elif isinstance(v, int):
-                        optimizer_int_attributes_map[initializer.name][k] = v
-                    else:
-                        raise ValueError("Optimizer attributes must be either float or int.")
-
-        self.options.distributed.horizontal_parallel_size = max(self.options.distributed.horizontal_parallel_size, 1)
-        self.options.distributed.data_parallel_size = (
-            self.options.distributed.world_size // self.options.distributed.horizontal_parallel_size
-        )
-
-        # TrainingParameters
-        ort_parameters = ort.TrainingParameters()
-        ort_parameters.loss_output_name = loss_name
-        ort_parameters.use_mixed_precision = self.options.mixed_precision.enabled
-        ort_parameters.world_rank = self.options.distributed.world_rank
-        ort_parameters.world_size = self.options.distributed.world_size
-        ort_parameters.gradient_accumulation_steps = self.options.batch.gradient_accumulation_steps
-        ort_parameters.allreduce_post_accumulation = self.options.distributed.allreduce_post_accumulation
-        ort_parameters.enable_adasum = self.options.distributed.enable_adasum
-        ort_parameters.deepspeed_zero_stage = self.options.distributed.deepspeed_zero_optimization.stage
-        ort_parameters.enable_grad_norm_clip = self.options.utils.grad_norm_clip
-        ort_parameters.set_gradients_as_graph_outputs = False
-        ort_parameters.use_memory_efficient_gradient = self.options.utils.memory_efficient_gradient
-        ort_parameters.training_optimizer_name = self.optim_config.name
-        ort_parameters.lr_params_feed_name = self.model_desc.learning_rate.name
-        ort_parameters.weights_to_train = trainable_params
-        ort_parameters.optimizer_attributes_map = optimizer_attributes_map
-        ort_parameters.optimizer_int_attributes_map = optimizer_int_attributes_map
-        if bool(optimizer_state_dict):
-            ort_parameters.set_optimizer_initial_state(optimizer_state_dict)
-
-        ort_parameters.attn_dropout_recompute = self.options.graph_transformer.attn_dropout_recompute
-        ort_parameters.gelu_recompute = self.options.graph_transformer.gelu_recompute
-        ort_parameters.transformer_layer_recompute = self.options.graph_transformer.transformer_layer_recompute
-        ort_parameters.number_recompute_layers = self.options.graph_transformer.number_recompute_layers
-
-        ort_parameters.data_parallel_size = self.options.distributed.data_parallel_size
-        ort_parameters.horizontal_parallel_size = self.options.distributed.horizontal_parallel_size
-        ort_parameters.pipeline_parallel_size = self.options.distributed.pipeline_parallel.pipeline_parallel_size
-        ort_parameters.num_pipeline_micro_batches = (
-            self.options.distributed.pipeline_parallel.num_pipeline_micro_batches
-        )
-        ort_parameters.pipeline_cut_info_string = self.options.distributed.pipeline_parallel.pipeline_cut_info_string
-        # We have special handling for dictionary-typed option.
-        # sliced_schema._validated_opts is the original dictionary while sliced_schema is a _ORTTrainerOptionsInternal.
-        ort_parameters.sliced_schema = self.options.distributed.pipeline_parallel.sliced_schema._validated_opts
-        # We have special handling for dictionary-typed option.
-        # sliced_axes._validated_opts is the original dictionary while sliced_schema is a _ORTTrainerOptionsInternal.
-        ort_parameters.sliced_axes = self.options.distributed.pipeline_parallel.sliced_axes._validated_opts
-        ort_parameters.sliced_tensor_names = self.options.distributed.pipeline_parallel.sliced_tensor_names
-
-        ort_parameters.model_after_graph_transforms_path = (
-            self.options.debug.graph_save_paths.model_after_graph_transforms_path
-        )
-        ort_parameters.model_with_gradient_graph_path = (
-            self.options.debug.graph_save_paths.model_with_gradient_graph_path
-        )
-        ort_parameters.model_with_training_graph_path = (
-            self.options.debug.graph_save_paths.model_with_training_graph_path
-        )
-
-        # SessionOptions
-        session_options = ort.SessionOptions() if session_options is None else session_options
-        session_options.use_deterministic_compute = self.options.debug.deterministic_compute
-        if (
-            self.options.graph_transformer.attn_dropout_recompute
-            or self.options.graph_transformer.gelu_recompute
-            or self.options.graph_transformer.transformer_layer_recompute
-        ):
-            session_options.execution_order = ort.ExecutionOrder.PRIORITY_BASED
-        if len(self.options.debug.graph_save_paths.model_with_training_graph_after_optimization_path) > 0:
-            session_options.optimized_model_filepath = (
-                self.options.debug.graph_save_paths.model_with_training_graph_after_optimization_path
-            )
-
-        # old ort session may already exists and occupies GPU memory when creating new session, this may cause OOM error.
-        # for example, load_state_dict will be called before returing the function, and it calls _init_session again
-        del self._training_session
-
-        # Set provider-specific options if needed
-        def get_providers(provider_options):
-            providers = ort.get_available_providers()
-            if provider_options:
-                for provider_name in provider_options:
-                    if provider_name in providers:
-                        providers[providers.index(provider_name)] = (provider_name, provider_options[provider_name])
-                    else:
-                        providers.insert(0, (provider_name, provider_options[provider_name]))
-            # default: using cuda
-            elif "cuda" in self.options.device.id.lower():
-                gpu_ep_options = {"device_id": _utils.get_device_index(self.options.device.id)}
-                gpu_ep_name = "ROCMExecutionProvider" if self.is_rocm_pytorch else "CUDAExecutionProvider"
-                if self.options.device.mem_limit > 0:
-                    gpu_ep_options["gpu_mem_limit"] = self.options.device.mem_limit
-
-                if gpu_ep_name not in providers:
-                    raise RuntimeError(
-                        "ORTTrainer options specify a CUDA device but the {} provider is unavailable.".format(
-                            cuda_ep_name  # noqa: F821
-                        )
-                    )
-
-                providers[providers.index(gpu_ep_name)] = (gpu_ep_name, gpu_ep_options)
-
-            return providers
-
-        # TrainingSession
-        self._training_session = ort.TrainingSession(
-            self._onnx_model.SerializeToString(), ort_parameters, session_options, get_providers(provider_options)
-        )
-
-        # I/O bindings
-        self._train_io_binding = self._training_session.io_binding()
-        self._eval_io_binding = self._training_session.io_binding()
-
-    def _init_onnx_model(self, inputs):
-        if self._onnx_model is not None:
-            return
-
-        if self._torch_model is not None:
-            # PyTorch model is moved to cpu to save GPU memory
-            self._torch_model.cpu()
-
-            # PyTorch buffers (created using 'register_buffer') shouldn't be trained
-            torch_buffers = list(dict(self._torch_model.named_buffers()).keys())
-            self.options.utils.frozen_weights.extend(torch_buffers)
-
-            # Export to ONNX
-            self._onnx_model = self._convert_torch_model_loss_fn_to_onnx(inputs, "cpu")
-
-            # Post processing for ONNX models expported from PyTorch
-            if self.options._internal_use.enable_internal_postprocess:
-                self._onnx_model = postprocess.run_postprocess(self._onnx_model)
-            if self.options._internal_use.extra_postprocess:
-                self._onnx_model = self.options._internal_use.extra_postprocess(self._onnx_model)
-
-        optimizer_state_dict = {}
-        if self._load_state_dict:
-            optimizer_state_dict = self._load_state_dict()
-
-        self._init_session(
-            optimizer_state_dict,
-            session_options=self.options.session_options,
-            provider_options=self.options._validated_opts["provider_options"],
-        )
-
-    def _init_session(self, optimizer_state_dict={}, session_options=None, provider_options=None):  # noqa: B006
-        if self._onnx_model is None:
-            return
-
-        if self.options.utils.run_symbolic_shape_infer:
-            self._onnx_model = SymbolicShapeInference.infer_shapes(
-                self._onnx_model, auto_merge=True, guess_output_rank=True
-            )
-
-        # Create training session used by train_step
-        # pass all optimizer states to the backend
-        self._create_ort_training_session(
-            optimizer_state_dict, session_options=session_options, provider_options=provider_options
-        )
-
-        # Update model description to update dtype when mixed precision is enabled
-        # C++ backend modifies model's output dtype from float32 to float16 for mixed precision
-        # Note that for training we must use float32 and for evaluation we must use float16
-        for idx, o_desc in enumerate(self.model_desc.outputs):
-            if (
-                self.options.mixed_precision.enabled
-                and o_desc.dtype == torch.float32
-                and not self._training_session.is_output_fp32_node(o_desc.name)
-            ):
-                self.model_desc.add_type_to_output_description(idx, o_desc.dtype, torch.float16)
-
-        # Update model description
-        self._model_desc_inputs_with_lr = [*self.model_desc.inputs, self.model_desc.learning_rate]
-
-        # Update Mixed Precision, if applicable
-        if self.options.mixed_precision.enabled:
-            self.model_desc.loss_scale_input = self._training_session.loss_scale_input_name
-            self._model_desc_inputs_with_lr_and_loss_scale = [
-                *self._model_desc_inputs_with_lr,
-                self.model_desc.loss_scale_input,
-            ]
-            self.model_desc.all_finite = _utils.get_all_gradients_finite_name_from_session(self._training_session)
-            self._model_desc_outputs_with_all_finite = [*self.model_desc.outputs, self.model_desc.all_finite]
-        elif self.options.mixed_precision.loss_scaler:
-            raise ValueError("Loss Scaler cannot be specified when Mixed Precision is not enabled")
-
-        # Update Loss Scaler Input Name, if applicable
-        if self.options.mixed_precision.enabled and self.options.mixed_precision.loss_scaler:
-            self.options.mixed_precision.loss_scaler.input_name = self.model_desc.loss_scale_input.name
-        elif not self.options.mixed_precision.enabled and self.options.mixed_precision.loss_scaler:
-            raise ValueError("Loss Scaler cannot be specified when Mixed Precision is not enabled")
-
-        # Update Gradient Accumulation, if applicable
-        if self.options.batch.gradient_accumulation_steps > 1:
-            self.model_desc.gradient_accumulation = _utils.get_gradient_accumulation_name_from_session(
-                self._training_session
-            )
-            self._model_desc_outputs_with_gradient_accumulation = [
-                *self.model_desc.outputs,
-                self.model_desc.gradient_accumulation,
-            ]
-
-        # TODO: Remove when experimental checkpoint functions are removed
-        if self._state_dict:
-            checkpoint.experimental_load_state_dict(self, self._state_dict, self._load_state_dict_strict)
-            self._state_dict_debug = self._state_dict
-        self._state_dict = {}
-
-    def _prepare_model_input(self, inputs_desc, lr, loss_scale, *inputs, **kwargs):
-        # Normalize input to tuple of samples
-        if type(inputs) == tuple and len(inputs) == 1 and type(inputs[0]) == list:
-            input = tuple(inputs[0])
-        else:
-            input = inputs
-
-        # Append input from 'kwargs'
-        for input_desc in inputs_desc:
-            if input_desc.name in kwargs:
-                input = (*input, kwargs[input_desc.name])
-
-        # Append learning rate
-        extra_inputs = 0
-        if lr is not None:
-            lr = torch.tensor([lr])
-            input += (lr,)
-            extra_inputs += 1
-
-        # Append loss scale
-        if loss_scale is not None:
-            assert self.options.mixed_precision.enabled, "Loss scale cannot be used without mixed precision"
-            loss_scale = torch.tensor([loss_scale])
-            input += (loss_scale,)
-            extra_inputs += 1
-
-        # Only assert length of input when fetches is not used
-        assert self._train_step_info.fetches or len(self.model_desc.inputs) + extra_inputs == len(input)
-        return input
-
-    def _resolve_symbolic_dimensions(self, inputs, inputs_desc, outputs_desc):
-        outputs = copy.deepcopy(outputs_desc)
-        resolved_dims = {}
-        for input, i_desc in zip(inputs, inputs_desc):
-            for i_idx, i_axis in enumerate(i_desc.shape):
-                if isinstance(i_axis, str):
-                    if i_axis not in resolved_dims:
-                        resolved_dims[i_axis] = input.size()[i_idx]
-                    else:
-                        assert resolved_dims[i_axis] == input.size()[i_idx], f"Mismatch in dynamic shape {i_axis}"
-
-        for o_desc in outputs:
-            for idx_o, o_axis in enumerate(o_desc.shape):
-                if isinstance(o_axis, str):
-                    o_desc.shape[idx_o] = resolved_dims[o_axis]
-
-        unknown_dim = [o_desc.name for dim in o_desc.shape for o_desc in outputs if isinstance(dim, str)]
-        if unknown_dim:
-            raise RuntimeError(f"Cannot execute model with unknown output dimensions ({unknown_dim}")
-
-        return outputs
-
-    def _training_session_run_helper(self, is_train, inputs, inputs_desc, outputs_desc, run_options=None):
-        # Select IO binding
-        if is_train:
-            iobinding = self._train_io_binding
-        else:
-            iobinding = self._eval_io_binding
-
-        # Get the list of the actual session inputs because unused inputs can be removed.
-        input_nodes = self._training_session.get_inputs()
-        input_node_names = [input_node.name for input_node in input_nodes]
-
-        # Bind input tensors
-        for input, input_desc in zip(inputs, inputs_desc):
-            if input_desc.name in input_node_names:
-                device_index = _utils.get_device_index_from_input(input)
-                iobinding.bind_input(
-                    input_desc.name,
-                    input.device.type,
-                    device_index,
-                    _utils.dtype_torch_to_numpy(input.dtype),
-                    list(input.size()),
-                    input.data_ptr(),
-                )
-
-        # Bind output tensors
-        outputs_desc_resolved = self._resolve_symbolic_dimensions(inputs, inputs_desc, outputs_desc)
-        result = {}
-        for output_desc in outputs_desc_resolved:
-            target_device = self.options.device.id
-            if self.options.mixed_precision.enabled and output_desc.name == self.model_desc.all_finite.name:
-                # Keep all finite flag on CPU to match backend implementation
-                # This prevents CPU -> GPU -> CPU copies between frontend and backend
-                target_device = "cpu"
-            # the self.options.device may be a device that pytorch does not recognize.
-            # in that case, we temporary prefer to leave the input/output on CPU and let ORT session
-            # to move the data between device and host.
-            # so output will be on the same device as input.
-            try:
-                torch.device(target_device)
-            except Exception:
-                # in this case, input/output must on CPU
-                assert input.device.type == "cpu"
-                target_device = "cpu"
-
-            torch_tensor = torch.zeros(
-                output_desc.shape,
-                device=target_device,
-                dtype=output_desc.dtype_amp if output_desc.dtype_amp else output_desc.dtype,
-            )
-            iobinding.bind_output(
-                output_desc.name,
-                torch_tensor.device.type,
-                _utils.get_device_index(target_device),
-                _utils.dtype_torch_to_numpy(torch_tensor.dtype),
-                list(torch_tensor.size()),
-                torch_tensor.data_ptr(),
-            )
-            result[output_desc.name] = torch_tensor
-
-        # Run a train/eval step
-        self._training_session.run_with_iobinding(iobinding, run_options)
-        return result
-
-    def _update_onnx_model_initializers(self, state_tensors):
-        r"""Updates ONNX graph initializers with state_tensors's values
-
-        Usually called to save or load an ONNX model.
-
-        The tensors names of state_tensors are compared to all ONNX initializer tensors
-        and when the name matches, the ONNX graph is updated with the new value.
-        """
-        assert isinstance(state_tensors, dict), "state_tensors must be a dict"
-
-        new_weights = []
-        replace_indices = []
-        for i, w in enumerate(self._onnx_model.graph.initializer):
-            if w.name in state_tensors:
-                new_weights.append(onnx.numpy_helper.from_array(state_tensors[w.name], w.name))
-                replace_indices.append(i)
-        replace_indices.sort(reverse=True)
-        for w_i in replace_indices:
-            del self._onnx_model.graph.initializer[w_i]
-        self._onnx_model.graph.initializer.extend(new_weights)
-
-    def _extract_model_states(self, state_dict, pytorch_format):
-        """Extract model states from the training session and load into the state_dict"""
-
-        model_states = self._training_session.get_model_state(include_mixed_precision_weights=False)
-        state_dict[_utils.state_dict_model_key()] = {}
-
-        # extract trained model weights from the training session
-        for precision in model_states:
-            state_dict[_utils.state_dict_model_key()][precision] = {}
-            for model_state_key in model_states[precision]:
-                if pytorch_format:
-                    state_dict[_utils.state_dict_model_key()][precision][model_state_key] = torch.from_numpy(
-                        model_states[precision][model_state_key]
-                    )
-                else:
-                    state_dict[_utils.state_dict_model_key()][precision][model_state_key] = model_states[precision][
-                        model_state_key
-                    ]
-
-        # extract untrained (frozen) model weights
-        for node in self._onnx_model.graph.initializer:
-            if (
-                node.name not in state_dict[_utils.state_dict_model_key()][_utils.state_dict_full_precision_key()]
-                and node.name in self.options.utils.frozen_weights
-            ):
-                if pytorch_format:
-                    state_dict[_utils.state_dict_model_key()][_utils.state_dict_full_precision_key()][
-                        node.name
-                    ] = torch.from_numpy(onnx.numpy_helper.to_array(node))
-                else:
-                    state_dict[_utils.state_dict_model_key()][_utils.state_dict_full_precision_key()][
-                        node.name
-                    ] = onnx.numpy_helper.to_array(node)
-
-    def _extract_trainer_options(self, state_dict):
-        """Extract relevant trainer configuration and load it into the state_dict"""
-
-        mixed_precision = _utils.state_dict_trainer_options_mixed_precision_key()
-        zero_stage = _utils.state_dict_trainer_options_zero_stage_key()
-        world_rank = _utils.state_dict_trainer_options_world_rank_key()
-        world_size = _utils.state_dict_trainer_options_world_size_key()
-        optimizer_name = _utils.state_dict_trainer_options_optimizer_name_key()
-        D_size = _utils.state_dict_trainer_options_data_parallel_size_key()  # noqa: N806
-        H_size = _utils.state_dict_trainer_options_horizontal_parallel_size_key()  # noqa: N806
-
-        state_dict[_utils.state_dict_trainer_options_key()] = {}
-        state_dict[_utils.state_dict_trainer_options_key()][mixed_precision] = self.options.mixed_precision.enabled
-        state_dict[_utils.state_dict_trainer_options_key()][
-            zero_stage
-        ] = self.options.distributed.deepspeed_zero_optimization.stage
-        state_dict[_utils.state_dict_trainer_options_key()][world_rank] = self.options.distributed.world_rank
-        state_dict[_utils.state_dict_trainer_options_key()][world_size] = self.options.distributed.world_size
-        state_dict[_utils.state_dict_trainer_options_key()][optimizer_name] = self.optim_config.name
-        state_dict[_utils.state_dict_trainer_options_key()][D_size] = self.options.distributed.data_parallel_size
-        state_dict[_utils.state_dict_trainer_options_key()][H_size] = self.options.distributed.horizontal_parallel_size
-
-    def _extract_train_step_info(self, state_dict):
-        """Extract train step info settings and save it into the state_dict"""
-
-        optimization_step = _utils.state_dict_train_step_info_optimization_step_key()
-        step = _utils.state_dict_train_step_info_step_key()
-
-        state_dict[_utils.state_dict_train_step_info_key()] = {}
-        state_dict[_utils.state_dict_train_step_info_key()][optimization_step] = self._train_step_info.optimization_step
-        state_dict[_utils.state_dict_train_step_info_key()][step] = self._train_step_info.step
-
-    def state_dict(self, pytorch_format=False):
-        """Returns a dictionary with model, train step info and optionally, optimizer states
-
-        The returned dictionary contains the following information:
-        - Model and optimizer states
-        - Required ORTTrainerOptions settings
-        - Distributed training information, such as but not limited to ZeRO
-        - Train step info settings
-
-        Structure of the returned dictionary:
-        - When `pytorch_format = False`
-        schema:
-        {
-            "model":
-            {
-                type: dict,
-                schema:
-                {
-                    "full_precision":
-                    {
-                        type: dict,
-                        schema:
-                        {
-                            model_weight_name:
-                            {
-                                type: array
-                            }
-                        }
-                    }
-                }
-            },
-            "optimizer":
-            {
-                type: dict,
-                schema:
-                {
-                    model_weight_name:
-                    {
-                        type: dict,
-                        schema:
-                        {
-                            "Moment_1":
-                            {
-                                type: array
-                            },
-                            "Moment_2":
-                            {
-                                type: array
-                            },
-                            "Update_Count":
-                            {
-                                type: array,
-                                optional: True # present if optimizer is adam, absent otherwise
-                            }
-                        }
-                    },
-                    "shared_optimizer_state":
-                    {
-                        type: dict,
-                        optional: True, # present optimizer is shared, absent otherwise.
-                        schema:
-                        {
-                            "step":
-                            {
-                                type: array,
-                            }
-                        }
-                    }
-                }
-            },
-            "trainer_options":
-            {
-                type: dict,
-                schema:
-                {
-                    "mixed_precision":
-                    {
-                        type: bool
-                    },
-                    "zero_stage":
-                    {
-                        type: int
-                    },
-                    "world_rank":
-                    {
-                        type: int
-                    },
-                    "world_size":
-                    {
-                        type: int
-                    },
-                    "optimizer_name":
-                    {
-                        type: str
-                    },
-                    "data_parallel_size":
-                    {
-                        type: int
-                    },
-                    "horizontal_parallel_size":
-                    {
-                        type: int
-                    }
-                }
-            },
-            "partition_info":
-            {
-                type: dict,
-                optional: True, # present if states partitioned, else absent
-                schema:
-                {
-                    model_weight_name:
-                    {
-                        type: dict,
-                        schema:
-                        {
-                            "original_dim":
-                            {
-                                type: array
-                            },
-                            "megatron_row_partition":
-                            {
-                                type: int
-                            }
-                        }
-                    }
-                }
-            },
-            "train_step_info":
-            {
-                type: dict,
-                schema:
-                {
-                    "optimization_step":
-                    {
-                        type: int
-                    },
-                    "step":
-                    {
-                        type: int
-                    }
-                }
-            }
-        }
-        - When `pytorch_format = True`
-        schema:
-        {
-            model_weight_name:
-            {
-                type: tensor
-            }
-        }
-
-        Args:
-            pytorch_format: boolean flag to select either ONNX Runtime or PyTorch state schema
-
-        Returns:
-            A dictionary with `ORTTrainer` state
-        """
-        if not self._training_session:
-            warnings.warn(
-                "ONNX Runtime training session is not initialized yet. "
-                "Please run train_step or eval_step at least once before calling ORTTrainer.state_dict().",
-                UserWarning,
-            )
-            return self._load_state_dict.args[0] if self._load_state_dict else {}
-
-        state_dict = {}
-
-        # load training session model states into the state_dict
-        self._extract_model_states(state_dict, pytorch_format)
-        if pytorch_format:
-            if self.options.distributed.deepspeed_zero_optimization.stage > 0:
-                warnings.warn("Incomplete state_dict: ZeRO enabled", UserWarning)
-            if self.options.distributed.horizontal_parallel_size > 1:
-                warnings.warn("Incomplete state_dict: Megatron enabled", UserWarning)
-            # if pytorch_format is true, return a flat dictionary with only model states
-            # which is compatible with a PyTorch model
-            return state_dict[_utils.state_dict_model_key()][_utils.state_dict_full_precision_key()]
-
-        # load training session optimizer states into the state_dict
-        state_dict[_utils.state_dict_optimizer_key()] = self._training_session.get_optimizer_state()
-
-        # extract the relevant training configuration from the trainer and load them into the state_dict
-        self._extract_trainer_options(state_dict)
-
-        # Extract train step info settings and load it into the state_dict
-        self._extract_train_step_info(state_dict)
-
-        # add partition information in case of a distributed run
-        if (
-            self.options.distributed.deepspeed_zero_optimization.stage > 0
-            or self.options.distributed.horizontal_parallel_size > 1
-        ):
-            state_dict[_utils.state_dict_partition_info_key()] = self._training_session.get_partition_info_map()
-
-        return state_dict
-
-    def _load_model_states(self, state_dict, strict):
-        """Load the model states onto the onnx model graph"""
-
-        if _utils.state_dict_model_key() not in state_dict:
-            return
-
-        # collect all initializer names from the current onnx graph
-        assert self._onnx_model, "ONNX model graph is not exported"
-        initializer_names = {node.name for node in self._onnx_model.graph.initializer}
-
-        # loaded_initializers dict will be loaded with all the model states from the state dictionary
-        # that are found in the initializer_names dictionary
-        loaded_initializers = {}
-
-        # copy over model states from the input state dict onto the onnx model
-        for precision, precision_states in state_dict[_utils.state_dict_model_key()].items():
-            for state_key, state_value in precision_states.items():
-                if state_key in initializer_names:
-                    loaded_initializers[state_key] = state_value
-                elif strict:
-                    raise RuntimeError(f"Unexpected key: {state_key} in state_dict[model][{precision}]")
-
-        # update onnx model from loaded initializers
-        self._update_onnx_model_initializers(loaded_initializers)
-
-    def _load_optimizer_states(self, current_state_dict, state_dict):
-        """Load the optimizer states onto the training session state dictionary"""
-
-        def _check_optimizer_mismatch(state_dict):
-            """Assert that the loaded optimizer has the same config as the current training session config"""
-
-            # the state_dict optimizer_name can be a byte string (if coming from checkpoint file)
-            # or can be a regular string (coming from user)
-            optimizer_name = state_dict[_utils.state_dict_trainer_options_key()][
-                _utils.state_dict_trainer_options_optimizer_name_key()
-            ]
-
-            # optimizer_name can be either a regular string or a byte string.
-            # if it is a byte string, convert to regular string using decode()
-            # if it is a regular string, do nothing to it
-            try:  # noqa: SIM105
-                optimizer_name = optimizer_name.decode()
-            except AttributeError:
-                pass
-            assert self.optim_config.name == optimizer_name, "Optimizer mismatch: expected {}, got {}".format(
-                self.optim_config.name, optimizer_name
-            )
-
-        if _utils.state_dict_optimizer_key() not in state_dict:
-            return
-
-        # check optimizer config names are the same for current session and the sessino being loaded
-        _check_optimizer_mismatch(state_dict)
-
-        # create an entry for the optimizer in the training session state dictionary
-        if _utils.state_dict_optimizer_key() not in current_state_dict:
-            current_state_dict[_utils.state_dict_optimizer_key()] = {}
-
-        # copy over optimizer states from the input state dict onto the training session state dict
-        for model_state_key, optimizer_dict in state_dict[_utils.state_dict_optimizer_key()].items():
-            if model_state_key not in current_state_dict[_utils.state_dict_optimizer_key()]:
-                current_state_dict[_utils.state_dict_optimizer_key()][model_state_key] = {}
-            for optimizer_state_key, optimizer_state_value in optimizer_dict.items():
-                current_state_dict[_utils.state_dict_optimizer_key()][model_state_key][
-                    optimizer_state_key
-                ] = optimizer_state_value
-
-    def _load_state_dict_impl(self, state_dict, strict=True):
-        """Load the state dictionary onto the onnx model and on the training session graph"""
-
-        # clear the callable partial
-        self._load_state_dict = None
-
-        def _mismatch_keys(keys1, keys2, in_error_str, allow_unexpected=False):
-            """Find out the missing and the unexpected keys in two dictionaries
-
-            Throws a runtime error if missing or unexpected keys are found
-            - Keys in keys1 not in keys2 will be marked as missing
-            - Keys in keys2 not in keys1 will be marked as unexpected
-            """
-            keys1 = set(keys1)
-            keys2 = set(keys2)
-            missing_keys = list(keys1 - keys2)
-            unexpected_keys = list(keys2 - keys1)
-            if len(missing_keys) > 0:
-                raise RuntimeError(f"Missing keys: {missing_keys} in {in_error_str}")
-            if len(unexpected_keys) > 0 and not allow_unexpected:
-                raise RuntimeError(f"Unexpected keys: {unexpected_keys} in {in_error_str}")
-
-        def _check_model_key_mismatch(current_state_dict, state_dict, allow_unexpected=False):
-            """Check if there is any mismatch in the model sub state dictionary between the two state_dicts"""
-
-            # check unxexpected and missing precision keys in the model state_dict compared to the training
-            # session model state_dict
-            _mismatch_keys(
-                current_state_dict[_utils.state_dict_model_key()],
-                state_dict[_utils.state_dict_model_key()],
-                "state_dict[model]",
-                allow_unexpected,
-            )
-
-            # check for model state key mismatch
-            for precision_key in current_state_dict[_utils.state_dict_model_key()]:
-                _mismatch_keys(
-                    current_state_dict[_utils.state_dict_model_key()][precision_key],
-                    state_dict[_utils.state_dict_model_key()][precision_key],
-                    f"state_dict[model][{precision_key}]",
-                    allow_unexpected,
-                )
-
-        def _check_optimizer_key_mismatch(current_state_dict, state_dict, allow_unexpected=False):
-            """Check if there is any mismatch in the optimizer sub state dictionary between the two state_dicts"""
-
-            # check for model state key mismatch for the optimizer state_dict
-            _mismatch_keys(
-                current_state_dict[_utils.state_dict_optimizer_key()],
-                state_dict[_utils.state_dict_optimizer_key()],
-                "state_dict[optimizer]",
-                allow_unexpected,
-            )
-
-            # check for optimizer state keys mismatch
-            for model_state_key in current_state_dict[_utils.state_dict_optimizer_key()]:
-                _mismatch_keys(
-                    current_state_dict[_utils.state_dict_optimizer_key()][model_state_key],
-                    state_dict[_utils.state_dict_optimizer_key()][model_state_key],
-                    f"state_dict[optimizer][{model_state_key}]",
-                    allow_unexpected,
-                )
-
-        def _check_key_mismatch(current_state_dict, state_dict, allow_unexpected=False):
-            """Check if there is a mismatch in the keys (model and optimizer) in the two state_dicts"""
-
-            # check presence of 'model' in the input state_dict
-            if _utils.state_dict_model_key() in state_dict:
-                _check_model_key_mismatch(current_state_dict, state_dict, allow_unexpected)
-            else:
-                warnings.warn("Missing key: model in state_dict", UserWarning)
-            # check presence of 'optimizer' in the input state_dict
-            if _utils.state_dict_optimizer_key() in state_dict:
-                _check_optimizer_key_mismatch(current_state_dict, state_dict, allow_unexpected)
-            else:
-                warnings.warn("Missing key: optimizer in state_dict", UserWarning)
-
-        # extract state dict from the current training session. this is to persist the states between
-        # two training sessions.
-        # for example, if user provided only the model states, the optimizer states from the current
-        # training session must be persisted
-        current_state_dict = {}
-        if self._training_session:
-            current_state_dict = self.state_dict()
-            if strict:
-                # for Zero enabled, the current trainer might not have the complete state, and we must allow
-                # extra keys to be present in the state dict
-                allow_unexpected = self.options.distributed.deepspeed_zero_optimization.stage > 0
-                _check_key_mismatch(current_state_dict, state_dict, allow_unexpected)
-
-        # load the model states from the input state dictionary into the onnx graph
-        self._load_model_states(state_dict, strict)
-
-        # load the optimizer states from the input state dictionary into the training session states
-        # dictionary
-        self._load_optimizer_states(current_state_dict, state_dict)
-
-        return (
-            current_state_dict[_utils.state_dict_optimizer_key()]
-            if _utils.state_dict_optimizer_key() in current_state_dict
-            else {}
-        )
-
-    def _load_train_step_info(self, state_dict):
-        """Load the train step info settings from state dict"""
-
-        if _utils.state_dict_train_step_info_key() not in state_dict:
-            warnings.warn("Missing key: train_step_info in state_dict", UserWarning)
-            return
-
-        optimization_step = _utils.state_dict_train_step_info_optimization_step_key()
-        step = _utils.state_dict_train_step_info_step_key()
-
-        self._train_step_info.optimization_step = state_dict[_utils.state_dict_train_step_info_key()][optimization_step]
-        self._train_step_info.step = state_dict[_utils.state_dict_train_step_info_key()][step]
-
-    def load_state_dict(self, state_dict, strict=True):
-        """Loads state_dict containing model/optimizer states into ORTTrainer
-
-        The state_dict dictionary may contain the following information:
-        - Model and optimizer states
-        - Required ORTTrainerOptions settings
-        - Distributed training information, such as but not limited to ZeRO
-
-        Args:
-            state_dict: state dictionary containing both model and optimizer states. The structure of this dictionary
-                should be the same as the one that is returned by ORTTrainer.state_dict for the case when pytorch_format=False
-            strict: boolean flag to strictly enforce that the input state_dict keys match the keys from ORTTrainer.state_dict
-        """
-
-        # if onnx graph has not been initialized, loading of states will be put on hold.
-        # a copy of the state_dict and other arguments to the function will be stored until the onnx graph has
-        # been initialized. Once the graph is initialized, the desired states will be loaded onto the grpah
-        if not self._training_session:
-            self._load_state_dict = partial(self._load_state_dict_impl, state_dict, strict=strict)
-            return
-
-        # load the train step info settings
-        self._load_train_step_info(state_dict)
-
-        # load states onto the frontend onnx graph
-        optimizer_state_dict = self._load_state_dict_impl(state_dict, strict=strict)
-
-        # create a new training session after loading initializer states onto the onnx graph
-        # pass the populated states to the training session to populate the backend graph
-        self._init_session(
-            optimizer_state_dict,
-            session_options=self.options.session_options,
-            provider_options=self.options._validated_opts["provider_options"],
-        )
-
-    def save_checkpoint(self, path, user_dict={}, include_optimizer_states=True):  # noqa: B006
-        """Persists ORTTrainer state dictionary on disk along with user_dict.
-
-        Saves the state_dict along with the user_dict to a file specified by path.
-
-        Args:
-            path: string representation to a file path or a python file-like object.
-                if file already exists at path, an exception is raised.
-            user_dict: custom data to be saved along with the state_dict. This data will be returned
-                to the user when load_checkpoint is called.
-            include_optimizer_states: boolean flag indicating whether or not to persist the optimizer states.
-                on load_checkpoint, only model states will be loaded if include_optimizer_states==True
-        """
-
-        # extract state_dict to be saved in the checkpoint
-        state_dict = self.state_dict()
-
-        # if user_dict is provided, serialize to bytes and convert to hex string.
-        # this helps in loading the types as they are given by the user since hdf5
-        # converts to numpy types otherwise
-        if bool(user_dict):
-            state_dict[_utils.state_dict_user_dict_key()] = _checkpoint_storage.to_serialized_hex(user_dict)
-
-        # if include_optimizer_states is False, only save the model states in the checkpoint file
-        if not include_optimizer_states:
-            if _utils.state_dict_optimizer_key() in state_dict:
-                del state_dict[_utils.state_dict_optimizer_key()]
-
-        _checkpoint_storage.save(state_dict, path)
-
-    def _aggregation_required(self, loaded_trainer_options):
-        """Checks if aggregation is required for the loading the state_dict into the ORTTrainer"""
-
-        # To load states in the backend, aggregation is required for every ZeRO
-        # or Megatron checkpoint
-        return (
-            loaded_trainer_options[_utils.state_dict_trainer_options_zero_stage_key()] > 0
-            or loaded_trainer_options[_utils.state_dict_trainer_options_horizontal_parallel_size_key()] > 1
-        )
-
-    def load_checkpoint(self, *paths, strict=True):
-        """Loads the saved checkpoint state dictionary into the ORTTrainer
-
-        Reads the saved checkpoint files specified by paths from disk and loads the state dictionary
-        onto the ORTTrainer.
-        Aggregates the checkpoint files if aggregation is required.
-
-        Args:
-            paths: one or more files represented as strings where the checkpoint is saved
-            strict: boolean flag to strictly enforce that the saved checkpoint state_dict
-                keys match the keys from ORTTrainer.state_dict
-        Returns:
-            dictionary that the user had saved when calling save_checkpoint
-        """
-        state_dict = {}
-
-        # check if aggregation is required
-        loaded_trainer_options = _checkpoint_storage.load(paths[0], key=_utils.state_dict_trainer_options_key())
-        if self._aggregation_required(loaded_trainer_options):
-            # if aggregation is required, aggregation logic must be run on the saved checkpoints
-            state_dict = checkpoint.aggregate_checkpoints(paths, pytorch_format=False)
-        else:
-            # if aggregation is not required, there must only be a single file that needs to be loaded
-            assert len(paths) == 1, f"Expected number of files to load: 1, got {len(paths)}"
-            state_dict = _checkpoint_storage.load(paths[0])
-
-        # extract user dict from the saved checkpoint
-        user_dict = {}
-        if _utils.state_dict_user_dict_key() in state_dict:
-            user_dict = _checkpoint_storage.from_serialized_hex(state_dict[_utils.state_dict_user_dict_key()])
-            del state_dict[_utils.state_dict_user_dict_key()]
-
-        self.load_state_dict(state_dict, strict=strict)
-
-        return user_dict
diff --git a/orttraining/orttraining/python/training/orttrainer_options.py b/orttraining/orttraining/python/training/orttrainer_options.py
deleted file mode 100644
index c63ac6f82c87f..0000000000000
--- a/orttraining/orttraining/python/training/orttrainer_options.py
+++ /dev/null
@@ -1,692 +0,0 @@
-import cerberus
-
-import onnxruntime as ort
-from onnxruntime.capi._pybind_state import PropagateCastOpsStrategy
-
-from .amp import loss_scaler
-from .optim import lr_scheduler
-
-
-class ORTTrainerOptions:
-    r"""Settings used by ONNX Runtime training backend
-
-    The parameters are hierarchically organized to facilitate configuration through semantic groups
-    that encompasses features, such as distributed training, etc.
-
-    Input validation is performed on the input dict during instantiation to ensure
-    that supported parameters and values are passed in. Invalid input results
-    in :py:obj:`ValueError` exception with details on it.
-
-    Args:
-        options (dict): contains all training options
-        _validate (bool, default is True): for internal use only
-
-    Supported schema for kwargs:
-
-    .. code-block:: python
-
-    schema = {
-                'batch' : {
-                    'type' : 'dict',
-                    'required': False,
-                    'default' : {},
-                    'schema' : {
-                        'gradient_accumulation_steps' : {
-                            'type' : 'integer',
-                            'min' : 1,
-                            'default' : 1
-                        }
-                    },
-                },
-                'device' : {
-                    'type' : 'dict',
-                    'required': False,
-                    'default' : {},
-                    'schema' : {
-                        'id' : {
-                            'type' : 'string',
-                            'default' : 'cuda'
-                        },
-                        'mem_limit' : {
-                            'type' : 'integer',
-                            'min' : 0,
-                            'default' : 0
-                        }
-                    }
-                },
-                'distributed': {
-                    'type': 'dict',
-                    'default': {},
-                    'required': False,
-                    'schema': {
-                        'world_rank': {
-                            'type': 'integer',
-                            'min': 0,
-                            'default': 0
-                        },
-                        'world_size': {
-                            'type': 'integer',
-                            'min': 1,
-                            'default': 1
-                        },
-                        'local_rank': {
-                            'type': 'integer',
-                            'min': 0,
-                            'default': 0
-                        },
-                        'data_parallel_size': {
-                            'type': 'integer',
-                            'min': 1,
-                            'default': 1
-                        },
-                        'horizontal_parallel_size': {
-                            'type': 'integer',
-                            'min': 1,
-                            'default': 1
-                        },
-                        'pipeline_parallel' : {
-                            'type': 'dict',
-                            'default': {},
-                            'required': False,
-                            'schema': {
-                                'pipeline_parallel_size': {
-                                    'type': 'integer',
-                                    'min': 1,
-                                    'default': 1
-                                },
-                                'num_pipeline_micro_batches': {
-                                    'type': 'integer',
-                                    'min': 1,
-                                    'default': 1
-                                },
-                                'pipeline_cut_info_string': {
-                                    'type': 'string',
-                                    'default': ''
-                                },
-                                'sliced_schema': {
-                                    'type': 'dict',
-                                    'default': {},
-                                    'keysrules': {'type': 'string'},
-                                    'valuesrules': {
-                                        'type': 'list',
-                                        'schema': {'type': 'integer'}
-                                    }
-                                },
-                                'sliced_axes': {
-                                    'type': 'dict',
-                                    'default': {},
-                                    'keysrules': {'type': 'string'},
-                                    'valuesrules': {'type': 'integer'}
-                                },
-                                'sliced_tensor_names': {
-                                    'type': 'list',
-                                    'schema': {'type': 'string'},
-                                    'default': []
-                                }
-                            }
-                        },
-                        'allreduce_post_accumulation': {
-                            'type': 'boolean',
-                            'default': False
-                        },
-                        'deepspeed_zero_optimization': {
-                            'type': 'dict',
-                            'default': {},
-                            'required': False,
-                            'schema': {
-                                'stage': {
-                                    'type': 'integer',
-                                    'min': 0,
-                                    'max': 1,
-                                    'default': 0
-                                },
-                            }
-                        },
-                        'enable_adasum': {
-                            'type': 'boolean',
-                            'default': False
-                        }
-                    }
-                },
-                'lr_scheduler' : {
-                    'type' : 'optim.lr_scheduler',
-                    'nullable' : True,
-                    'default' : None
-                },
-                'mixed_precision' : {
-                    'type' : 'dict',
-                    'required': False,
-                    'default' : {},
-                    'schema' : {
-                        'enabled' : {
-                            'type' : 'boolean',
-                            'default' : False
-                        },
-                        'loss_scaler' : {
-                            'type' : 'amp.loss_scaler',
-                            'nullable' : True,
-                            'default' : None
-                        }
-                    }
-                },
-                'graph_transformer': {
-                    'type': 'dict',
-                    'required': False,
-                    'default': {},
-                    'schema': {
-                        'attn_dropout_recompute': {
-                            'type': 'boolean',
-                            'default': False
-                        },
-                        'gelu_recompute': {
-                            'type': 'boolean',
-                            'default': False
-                        },
-                        'transformer_layer_recompute': {
-                            'type': 'boolean',
-                            'default': False
-                        },
-                        'number_recompute_layers': {
-                            'type': 'integer',
-                            'min': 0,
-                            'default': 0
-                        },
-                        'propagate_cast_ops_config': {
-                            'type': 'dict',
-                            'required': False,
-                            'default': {},
-                            'schema': {
-                                'propagate_cast_ops_strategy': {
-                                    'type': 'onnxruntime.training.PropagateCastOpsStrategy',
-                                    'default': PropagateCastOpsStrategy.FLOOD_FILL
-                                },
-                                'propagate_cast_ops_level': {
-                                    'type': 'integer',
-                                    'default': 1
-                                },
-                                'propagate_cast_ops_allow': {
-                                    'type': 'list',
-                                    'schema': {'type': 'string'},
-                                    'default': []
-                                }
-                            }
-                        }
-                    }
-                },
-                'utils' : {
-                    'type' : 'dict',
-                    'required': False,
-                    'default' : {},
-                    'schema' : {
-                        'frozen_weights' : {
-                            'type' : 'list',
-                            'default' : []
-                        },
-                        'grad_norm_clip' : {
-                            'type' : 'boolean',
-                            'default' : True
-                        },
-                        'memory_efficient_gradient' : {
-                            'type' : 'boolean',
-                            'default' : False
-                        },
-                        'run_symbolic_shape_infer' : {
-                            'type' : 'boolean',
-                            'default' : False
-                        }
-                    }
-                },
-                'debug' : {
-                    'type' : 'dict',
-                    'required': False,
-                    'default' : {},
-                    'schema' : {
-                        'deterministic_compute' : {
-                            'type' : 'boolean',
-                            'default' : False
-                        },
-                        'check_model_export' : {
-                            'type' : 'boolean',
-                            'default' : False
-                        },
-                        'graph_save_paths' : {
-                            'type' : 'dict',
-                            'default': {},
-                            'required': False,
-                            'schema': {
-                                'model_after_graph_transforms_path': {
-                                    'type': 'string',
-                                    'default': ''
-                                },
-                                'model_with_gradient_graph_path':{
-                                    'type': 'string',
-                                    'default': ''
-                                },
-                                'model_with_training_graph_path': {
-                                    'type': 'string',
-                                    'default': ''
-                                },
-                                'model_with_training_graph_after_optimization_path': {
-                                    'type': 'string',
-                                    'default': ''
-                                },
-                            }
-                        },
-                    }
-                },
-                '_internal_use' : {
-                    'type' : 'dict',
-                    'required': False,
-                    'default' : {},
-                    'schema' : {
-                        'enable_internal_postprocess' : {
-                            'type' : 'boolean',
-                            'default' : True
-                        },
-                        'extra_postprocess' : {
-                            'type' : 'callable',
-                            'nullable' : True,
-                            'default' : None
-                        },
-                        'onnx_opset_version': {
-                            'type': 'integer',
-                            'min' : 12,
-                            'max' :14,
-                            'default': 14
-                        },
-                        'enable_onnx_contrib_ops' : {
-                            'type' : 'boolean',
-                            'default' : True
-                        }
-                    }
-                },
-                'provider_options':{
-                    'type': 'dict',
-                    'default': {},
-                    'required': False,
-                    'schema': {}
-                },
-                'session_options': {
-                    'type': 'SessionOptions',
-                    'nullable': True,
-                    'default': None
-                },
-             }
-
-    Keyword arguments:
-        batch (dict):
-            batch related settings
-        batch.gradient_accumulation_steps (int, default is 1):
-            number of steps to accumulate before do collective gradient reduction
-        device (dict):
-            compute device related settings
-        device.id (string, default is 'cuda'):
-            device to run training
-        device.mem_limit (int):
-            maximum memory size (in bytes) used by device.id
-        distributed (dict):
-            distributed training options.
-        distributed.world_rank (int, default is 0):
-            rank ID used for data/horizontal parallelism
-        distributed.world_size (int, default is 1):
-            number of ranks participating in parallelism
-        distributed.data_parallel_size (int, default is 1):
-            number of ranks participating in data parallelism
-        distributed.horizontal_parallel_size (int, default is 1):
-            number of ranks participating in horizontal parallelism
-        distributed.pipeline_parallel (dict):
-            Options which are only useful to pipeline parallel.
-        distributed.pipeline_parallel.pipeline_parallel_size (int, default is 1):
-            number of ranks participating in pipeline parallelism
-        distributed.pipeline_parallel.num_pipeline_micro_batches (int, default is 1):
-            number of micro-batches. We divide input batch into micro-batches and run the graph.
-        distributed.pipeline_parallel.pipeline_cut_info_string (string, default is ''):
-            string of cutting ids for pipeline partition.
-        distributed.allreduce_post_accumulation (bool, default is False):
-            True enables overlap of AllReduce with computation, while False,
-            postpone AllReduce until all gradients are ready
-        distributed.deepspeed_zero_optimization:
-            DeepSpeed ZeRO options.
-        distributed.deepspeed_zero_optimization.stage (int, default is 0):
-            select which stage of DeepSpeed ZeRO to use. Stage 0 means disabled.
-        distributed.enable_adasum (bool, default is False):
-            enable `Adasum <https://arxiv.org/abs/2006.02924>`_
-            algorithm for AllReduce
-        lr_scheduler (optim._LRScheduler, default is None):
-            specifies learning rate scheduler
-        mixed_precision (dict):
-            mixed precision training options
-        mixed_precision.enabled (bool, default is False):
-            enable mixed precision (fp16)
-        mixed_precision.loss_scaler (amp.LossScaler, default is None):
-            specifies a loss scaler to be used for fp16. If not specified,
-            :py:class:`.DynamicLossScaler` is used with default values.
-            Users can also instantiate :py:class:`.DynamicLossScaler` and
-            override its parameters. Lastly, a completely new implementation
-            can be specified by extending :py:class:`.LossScaler` class from scratch
-        graph_transformer (dict):
-            graph transformer related configurations
-        graph_transformer.attn_dropout_recompute(bool, default False)
-        graph_transformer.gelu_recompute(bool, default False)
-        graph_transformer.transformer_layer_recompute(bool, default False)
-        graph_transformer.number_recompute_layers(bool, default False)
-        graph_transformer.propagate_cast_ops_config (dict):
-            graph_transformer.propagate_cast_ops_config.strategy(PropagateCastOpsStrategy, default FLOOD_FILL)
-                Specify the choice of the cast propagation optimization strategy, either, NONE, INSERT_AND_REDUCE or FLOOD_FILL.
-                NONE strategy does not perform any cast propagation transformation on the graph, although other optimizations
-                locally change cast operations, for example, in order to fuse Transpose and MatMul nodes, the TransposeMatMulFunsion optimization could
-                interchange Transpose and Cast if the Cast node exists between Transpose and MatMul.
-                INSERT_AND_REDUCE strategy inserts and reduces cast operations around the nodes with allowed opcodes.
-                FLOOD_FILL strategy expands float16 regions in the graph using the allowed opcodes, and unlike
-                INSERT_AND_REDUCE does not touch opcodes outside expanded float16 region.
-            graph_transformer.propagate_cast_ops_config.level(integer, default 1)
-                Optimize by moving Cast operations if propagate_cast_ops_level is non-negative.
-                Use predetermined list of opcodes considered safe to move before/after cast operation
-                if propagate_cast_ops_level is positive and use propagate_cast_ops_allow otherwise.
-            graph_transformer.propagate_cast_ops_config.allow(list of str, [])
-                List of opcodes to be considered safe to move before/after cast operation if propagate_cast_ops_level is zero.
-        attn_dropout_recompute (bool, default is False):
-            enable recomputing attention dropout to save memory
-        gelu_recompute (bool, default is False):
-            enable recomputing Gelu activation output to save memory
-        transformer_layer_recompute (bool, default is False):
-            enable recomputing transformer layerwise to save memory
-        number_recompute_layers (int, default is 0)
-            number of layers to apply transformer_layer_recompute, by default system will
-            apply recompute to all the layers, except for the last one
-        utils (dict):
-            miscellaneous options
-        utils.frozen_weights (list of str, []):
-            list of model parameter names to skip training (weights don't change)
-        utils.grad_norm_clip (bool, default is True):
-            enables gradient norm clipping for 'AdamOptimizer' and 'LambOptimizer'
-        utils.memory_efficient_gradient (bool, default is False):
-            enables use of memory aware gradient builder.
-        utils.run_symbolic_shape_infer (bool, default is False):
-            runs symbolic shape inference on the model
-        debug (dict):
-            debug options
-        debug.deterministic_compute (bool, default is False)
-            forces compute to be deterministic accross runs
-        debug.check_model_export (bool, default is False)
-            compares PyTorch model outputs with ONNX model outputs in inference before the first
-            train step to ensure successful model export
-        debug.graph_save_paths (dict):
-            paths used for dumping ONNX graphs for debugging purposes
-        debug.graph_save_paths.model_after_graph_transforms_path (str, default is "")
-            path to export the ONNX graph after training-related graph transforms have been applied.
-            No output when it is empty.
-        debug.graph_save_paths.model_with_gradient_graph_path (str, default is "")
-            path to export the ONNX graph with the gradient graph added. No output when it is empty.
-        debug.graph_save_paths.model_with_training_graph_path (str, default is "")
-            path to export the training ONNX graph with forward, gradient and optimizer nodes.
-            No output when it is empty.
-        debug.graph_save_paths.model_with_training_graph_after_optimization_path (str, default is "")
-            outputs the optimized training graph to the path if nonempty.
-        _internal_use (dict):
-            internal options, possibly undocumented, that might be removed without notice
-        _internal_use.enable_internal_postprocess (bool, default is True):
-            enable internal internal post processing of the ONNX model
-        _internal_use.extra_postprocess (callable, default is None)
-            a functor to postprocess the ONNX model and return a new ONNX model.
-            It does not override :py:attr:`._internal_use.enable_internal_postprocess`, but complement it
-        _internal_use.onnx_opset_version (int, default is 14):
-            ONNX opset version used during model exporting.
-        _internal_use.enable_onnx_contrib_ops (bool, default is True)
-            enable PyTorch to export nodes as contrib ops in ONNX.
-            This flag may be removed anytime in the future.
-        session_options (onnxruntime.SessionOptions):
-            The SessionOptions instance that TrainingSession will use.
-        provider_options (dict):
-            The provider_options for customized execution providers. it is dict map from EP name to
-            a key-value pairs, like {'EP1' : {'key1' : 'val1'}, ....}
-
-    Example:
-        .. code-block:: python
-
-            opts = ORTTrainerOptions({
-                               'batch' : {
-                                   'gradient_accumulation_steps' : 128
-                               },
-                               'device' : {
-                                   'id' : 'cuda:0',
-                                   'mem_limit' : 2*1024*1024*1024,
-                               },
-                               'lr_scheduler' : optim.lr_scheduler.LinearWarmupLRScheduler(),
-                               'mixed_precision' : {
-                                   'enabled': True,
-                                   'loss_scaler': amp.LossScaler(loss_scale=float(1 << 16))
-                               }
-            })
-            fp16_enabled = opts.mixed_precision.enabled
-    """
-
-    def __init__(self, options={}):  # noqa: B006
-        # Keep a copy of original input for debug
-        self._original_opts = dict(options)
-
-        # Used for logging purposes
-        self._main_class_name = self.__class__.__name__
-
-        # Validates user input
-        self._validated_opts = dict(self._original_opts)
-        validator = ORTTrainerOptionsValidator(_ORTTRAINER_OPTIONS_SCHEMA)
-        self._validated_opts = validator.validated(self._validated_opts)
-        if self._validated_opts is None:
-            raise ValueError(f"Invalid options: {validator.errors}")
-
-        # Convert dict in object
-        for k, v in self._validated_opts.items():
-            setattr(self, k, self._wrap(v))
-
-    def __repr__(self):
-        return "{%s}" % str(
-            ", ".join(
-                f"'{k}': {v!r}"
-                for (k, v) in self.__dict__.items()
-                if k not in ["_original_opts", "_validated_opts", "_main_class_name"]
-            )
-        )
-
-    def _wrap(self, v):
-        if isinstance(v, (tuple, list, set, frozenset)):
-            return type(v)([self._wrap(i) for i in v])
-        else:
-            return _ORTTrainerOptionsInternal(self._main_class_name, v) if isinstance(v, dict) else v
-
-
-class _ORTTrainerOptionsInternal(ORTTrainerOptions):
-    r"""Internal class used by ONNX Runtime training backend for input validation
-
-    NOTE: Users MUST NOT use this class in any way!
-    """
-
-    def __init__(self, main_class_name, options):
-        # Used for logging purposes
-        self._main_class_name = main_class_name
-        # We don't call super().__init__(options) here but still called it "_validated_opts"
-        # instead of "_original_opts" because it has been validated in the top-level
-        # ORTTrainerOptions's constructor.
-        self._validated_opts = dict(options)
-        # Convert dict in object
-        for k, v in dict(options).items():
-            setattr(self, k, self._wrap(v))
-
-
-class ORTTrainerOptionsValidator(cerberus.Validator):
-    _LR_SCHEDULER = cerberus.TypeDefinition("lr_scheduler", (lr_scheduler._LRScheduler,), ())
-    _LOSS_SCALER = cerberus.TypeDefinition("loss_scaler", (loss_scaler.LossScaler,), ())
-
-    _SESSION_OPTIONS = cerberus.TypeDefinition("session_options", (ort.SessionOptions,), ())
-
-    _PROPAGATE_CAST_OPS_STRATEGY = cerberus.TypeDefinition(
-        "propagate_cast_ops_strategy", (PropagateCastOpsStrategy,), ()
-    )
-
-    types_mapping = cerberus.Validator.types_mapping.copy()
-    types_mapping["lr_scheduler"] = _LR_SCHEDULER
-    types_mapping["loss_scaler"] = _LOSS_SCALER
-    types_mapping["session_options"] = _SESSION_OPTIONS
-    types_mapping["propagate_cast_ops_strategy"] = _PROPAGATE_CAST_OPS_STRATEGY
-
-
-def _check_is_callable(field, value, error):
-    result = False
-    try:
-        # Python 3
-        result = value is None or callable(value)
-    except Exception:
-        # Python 3 but < 3.2
-        if hasattr(value, "__call__"):  # noqa: B004
-            result = True
-    if not result:
-        error(field, "Must be callable or None")
-
-
-_ORTTRAINER_OPTIONS_SCHEMA = {
-    "batch": {
-        "type": "dict",
-        "default_setter": lambda _: {},
-        "required": False,
-        "schema": {"gradient_accumulation_steps": {"type": "integer", "min": 1, "default": 1}},
-    },
-    "device": {
-        "type": "dict",
-        "default_setter": lambda _: {},
-        "required": False,
-        "schema": {
-            "id": {"type": "string", "default": "cuda"},
-            "mem_limit": {"type": "integer", "min": 0, "default": 0},
-        },
-    },
-    "distributed": {
-        "type": "dict",
-        "default_setter": lambda _: {},
-        "required": False,
-        "schema": {
-            "world_rank": {"type": "integer", "min": 0, "default": 0},
-            "world_size": {"type": "integer", "min": 1, "default": 1},
-            "local_rank": {"type": "integer", "min": 0, "default": 0},
-            "data_parallel_size": {"type": "integer", "min": 1, "default": 1},
-            "horizontal_parallel_size": {"type": "integer", "min": 1, "default": 1},
-            "pipeline_parallel": {
-                "type": "dict",
-                "default_setter": lambda _: {},
-                "required": False,
-                "schema": {
-                    "pipeline_parallel_size": {"type": "integer", "min": 1, "default": 1},
-                    "num_pipeline_micro_batches": {"type": "integer", "min": 1, "default": 1},
-                    "pipeline_cut_info_string": {"type": "string", "default": ""},
-                    "sliced_schema": {
-                        "type": "dict",
-                        "default_setter": lambda _: {},
-                        "keysrules": {"type": "string"},
-                        "valuesrules": {"type": "list", "schema": {"type": "integer"}},
-                    },
-                    "sliced_axes": {
-                        "type": "dict",
-                        "default_setter": lambda _: {},
-                        "keysrules": {"type": "string"},
-                        "valuesrules": {"type": "integer"},
-                    },
-                    "sliced_tensor_names": {"type": "list", "schema": {"type": "string"}, "default": []},
-                },
-            },
-            "allreduce_post_accumulation": {"type": "boolean", "default": False},
-            "deepspeed_zero_optimization": {
-                "type": "dict",
-                "default_setter": lambda _: {},
-                "required": False,
-                "schema": {
-                    "stage": {"type": "integer", "min": 0, "max": 1, "default": 0},
-                },
-            },
-            "enable_adasum": {"type": "boolean", "default": False},
-        },
-    },
-    "lr_scheduler": {"type": "lr_scheduler", "nullable": True, "default": None},
-    "mixed_precision": {
-        "type": "dict",
-        "default_setter": lambda _: {},
-        "required": False,
-        "schema": {
-            "enabled": {"type": "boolean", "default": False},
-            "loss_scaler": {"type": "loss_scaler", "nullable": True, "default": None},
-        },
-    },
-    "graph_transformer": {
-        "type": "dict",
-        "default_setter": lambda _: {},
-        "required": False,
-        "schema": {
-            "attn_dropout_recompute": {"type": "boolean", "default": False},
-            "gelu_recompute": {"type": "boolean", "default": False},
-            "transformer_layer_recompute": {"type": "boolean", "default": False},
-            "number_recompute_layers": {"type": "integer", "min": 0, "default": 0},
-            "propagate_cast_ops_config": {
-                "type": "dict",
-                "default_setter": lambda _: {},
-                "required": False,
-                "schema": {
-                    "strategy": {
-                        "type": "propagate_cast_ops_strategy",
-                        "nullable": True,
-                        "default": PropagateCastOpsStrategy.FLOOD_FILL,
-                    },
-                    "level": {"type": "integer", "min": -1, "default": 1},
-                    "allow": {"type": "list", "schema": {"type": "string"}, "default": []},
-                },
-            },
-        },
-    },
-    "utils": {
-        "type": "dict",
-        "default_setter": lambda _: {},
-        "required": False,
-        "schema": {
-            "frozen_weights": {"type": "list", "default": []},
-            "grad_norm_clip": {"type": "boolean", "default": True},
-            "memory_efficient_gradient": {"type": "boolean", "default": False},
-            "run_symbolic_shape_infer": {"type": "boolean", "default": False},
-        },
-    },
-    "debug": {
-        "type": "dict",
-        "default_setter": lambda _: {},
-        "required": False,
-        "schema": {
-            "deterministic_compute": {"type": "boolean", "default": False},
-            "check_model_export": {"type": "boolean", "default": False},
-            "graph_save_paths": {
-                "type": "dict",
-                "default_setter": lambda _: {},
-                "required": False,
-                "schema": {
-                    "model_after_graph_transforms_path": {"type": "string", "default": ""},
-                    "model_with_gradient_graph_path": {"type": "string", "default": ""},
-                    "model_with_training_graph_path": {"type": "string", "default": ""},
-                    "model_with_training_graph_after_optimization_path": {"type": "string", "default": ""},
-                },
-            },
-        },
-    },
-    "_internal_use": {
-        "type": "dict",
-        "default_setter": lambda _: {},
-        "required": False,
-        "schema": {
-            "enable_internal_postprocess": {"type": "boolean", "default": True},
-            "extra_postprocess": {"check_with": _check_is_callable, "nullable": True, "default": None},
-            "onnx_opset_version": {"type": "integer", "min": 12, "max": 14, "default": 14},
-            "enable_onnx_contrib_ops": {"type": "boolean", "default": True},
-        },
-    },
-    "provider_options": {
-        "type": "dict",
-        "default_setter": lambda _: {},
-        "required": False,
-        "allow_unknown": True,
-        "schema": {},
-    },
-    "session_options": {"type": "session_options", "nullable": True, "default": None},
-}
diff --git a/orttraining/orttraining/python/training/postprocess.py b/orttraining/orttraining/python/training/postprocess.py
deleted file mode 100644
index 6c2adb6af7978..0000000000000
--- a/orttraining/orttraining/python/training/postprocess.py
+++ /dev/null
@@ -1,478 +0,0 @@
-import os.path  # noqa: F401
-import struct
-import sys  # noqa: F401
-
-import numpy as np  # noqa: F401
-import onnx
-from onnx import *  # noqa: F403
-from onnx import helper, numpy_helper  # noqa: F401
-
-
-def run_postprocess(model):
-    # this post pass is not required for pytorch >= 1.5
-    # where add_node_name in torch.onnx.export is default to True
-    model = add_name(model)
-
-    # this post pass is not required for pytorch > 1.6
-    model = fuse_softmaxNLL_to_softmaxCE(model)
-
-    model = fix_expand_shape(model)
-    model = fix_expand_shape_pt_1_5(model)
-    return model
-
-
-def find_input_node(model, arg):
-    result = []
-    for node in model.graph.node:
-        for output in node.output:
-            if output == arg:
-                result.append(node)
-    return result[0] if len(result) == 1 else None
-
-
-def find_output_node(model, arg):
-    result = []
-    for node in model.graph.node:
-        for input in node.input:
-            if input == arg:
-                result.append(node)
-    return result[0] if len(result) == 1 else result
-
-
-def add_name(model):
-    i = 0
-    for node in model.graph.node:
-        node.name = "%s_%d" % (node.op_type, i)
-        i += 1
-    return model
-
-
-# Expand Shape PostProcess
-
-
-def fix_expand_shape(model):
-    expand_nodes = [n for n in model.graph.node if n.op_type == "Expand"]
-    model_inputs_names = [i.name for i in model.graph.input]
-
-    for expand_node in expand_nodes:
-        shape = find_input_node(model, expand_node.input[1])
-        if shape.op_type == "Shape":
-            # an expand subgraph
-            # Input    Input2
-            # |        |
-            # |        Shape
-            # |        |
-            # |__    __|
-            #    |  |
-            #   Expand
-            #     |
-            #   output
-            #
-            # Only if Input2 is one of the model inputs, assign Input2's shape to output of expand.
-            shape_input_name = shape.input[0]
-            if shape_input_name in model_inputs_names:
-                index = model_inputs_names.index(shape_input_name)
-                expand_out = model.graph.value_info.add()
-                expand_out.name = expand_node.output[0]
-                expand_out.type.CopyFrom(model.graph.input[index].type)
-    return model
-
-
-def fix_expand_shape_pt_1_5(model):
-    # expand subgraph
-    #                      Constant
-    #                        +
-    #                     ConstantOfShape
-    #                      | +  |
-    #                      | +  |
-    # (Reshape subgraph)   Mul  |
-    #       |___   _________|   |
-    #       +   | |             |
-    #       +  Equal            |
-    #       +++++|++++++++++++++|++
-    #            |____________  | +
-    #                         | | +
-    #   (subgraph)            Where
-    #       |                   |
-    #       |_____   ___________|
-    #             | |
-    #           Expand
-    #             |
-    #           output
-    #
-    # where the Reshape subgraph is
-    #
-    #  Input
-    #   | |
-    #   | |___________________
-    #   |                     |
-    #  Shape   Constant      Shape   Constant
-    #   |  ______|            |  ______|
-    #   | |                   | |
-    #  Gather                Gather
-    #   |                     |
-    # Unsqueeze             Unsqueeze
-    #   |                     |
-    #   |  ..Number of dims.. |
-    #   |    _________________|
-    #   |...|
-    #  Concat                       Constant
-    #     |                            |
-    #     |______    __________________|
-    #            |  |
-    #           Reshape
-    #             |
-    #           output
-    #
-    # This pass will copy Input's shape to the output of Expand.
-    expand_nodes = [n for n in model.graph.node if n.op_type == "Expand"]
-    model_inputs_names = [i.name for i in model.graph.input]
-
-    for expand_node in expand_nodes:
-        n_where = find_input_node(model, expand_node.input[1])
-        if n_where.op_type != "Where":
-            continue
-
-        n_equal = find_input_node(model, n_where.input[0])
-        n_cos = find_input_node(model, n_where.input[1])
-        n_reshape = find_input_node(model, n_where.input[2])
-
-        if n_equal.op_type != "Equal" or n_cos.op_type != "ConstantOfShape" or n_reshape.op_type != "Reshape":
-            continue
-
-        n_reshape_e = find_input_node(model, n_equal.input[0])
-        n_mul = find_input_node(model, n_equal.input[1])
-        if n_reshape_e != n_reshape or n_mul.op_type != "Mul":
-            continue
-
-        n_cos_m = find_input_node(model, n_mul.input[0])
-        n_constant = find_input_node(model, n_mul.input[1])
-        if n_cos_m != n_cos or n_constant.op_type != "Constant":
-            continue
-
-        n_concat = find_input_node(model, n_reshape.input[0])
-        n_constant_r = find_input_node(model, n_reshape.input[1])
-        if n_concat.op_type != "Concat" or n_constant_r.op_type != "Constant":
-            continue
-
-        n_input_candidates = []
-        for concat_in in n_concat.input:
-            n_unsqueeze = find_input_node(model, concat_in)
-            if n_unsqueeze.op_type != "Unsqueeze":
-                break
-            n_gather = find_input_node(model, n_unsqueeze.input[0])
-            if n_gather.op_type != "Gather":
-                break
-            n_shape = find_input_node(model, n_gather.input[0])
-            n_constant_g = find_input_node(model, n_gather.input[1])
-            if n_shape.op_type != "Shape" or n_constant_g.op_type != "Constant":
-                break
-            n_input = n_shape.input[0]
-            if n_input not in model_inputs_names:
-                break
-            n_input_candidates.append(n_input)
-
-        if not n_input_candidates or not all(elem == n_input_candidates[0] for elem in n_input_candidates):
-            continue
-
-        index = model_inputs_names.index(n_input_candidates[0])
-        expand_out = model.graph.value_info.add()
-        expand_out.name = expand_node.output[0]
-        expand_out.type.CopyFrom(model.graph.input[index].type)
-    return model
-
-
-# LayerNorm PostProcess
-
-
-def find_nodes(graph, op_type):
-    nodes = []
-    for node in graph.node:
-        if node.op_type == op_type:
-            nodes.append(node)
-    return nodes
-
-
-def is_type(node, op_type):
-    if node is None or isinstance(node, list):
-        return False
-    return node.op_type == op_type
-
-
-def add_const(model, name, output, t_value=None, f_value=None):
-    const_node = model.graph.node.add()
-    const_node.op_type = "Constant"
-    const_node.name = name
-    const_node.output.extend([output])
-    attr = const_node.attribute.add()
-    attr.name = "value"
-    if t_value is not None:
-        attr.type = 4
-        attr.t.CopyFrom(t_value)
-    else:
-        attr.type = 1
-        attr.f = f_value
-    return const_node
-
-
-def layer_norm_transform(model):
-    # DEPRECATED: This pass is no longer needed as the transform is handled at the backend.
-    # Converting below subgraph
-    #
-    # input
-    #   |
-    # ReduceMean
-    #   |
-    #  Sub                         Constant
-    #  _||_____                       |
-    # |        |                      |
-    # |        |                      |
-    # |   (optional) Cast      (optional) Cast
-    # |        |                      |
-    # |        |  ____________________|
-    # |        | |
-    # |        Pow
-    # |        |
-    # |       ReduceMean
-    # |        |
-    # |        Add
-    # |        |
-    # |__    __Sqrt
-    #    |  |
-    #     Div  (weight)
-    #     |       |
-    #     |  _____|
-    #     | |
-    #     Mul   (bias)
-    #     |       |
-    #     |  _____|
-    #     | |
-    #     Add
-    #     |
-    #     output
-    #
-    # to the below subgraph
-    #
-    # input    (weight)    (bias)
-    #   |         |          |
-    #   |  _______|          |
-    #   | |  ________________|
-    #   | | |
-    # LayerNormalization
-    #   |
-    # output
-    graph = model.graph
-
-    nodes_ReduceMean = find_nodes(graph, "ReduceMean")  # noqa: N806
-
-    id = 0
-    layer_norm_nodes = []
-    remove_nodes = []
-    for reduce_mean in nodes_ReduceMean:
-        # check that reduce_mean output is Sub
-        sub = find_output_node(model, reduce_mean.output[0])
-        if not is_type(sub, "Sub"):
-            continue
-
-        # check that sub output[0] is Div and output[1] is Pow
-        pow, div = find_output_node(model, sub.output[0])
-        if is_type(pow, "Cast"):
-            # During an update in PyTorch, Cast nodes are inserted between Sub and Pow.
-            remove_nodes += [pow]
-            pow = find_output_node(model, pow.output[0])
-            if not is_type(pow, "Pow"):
-                continue
-            cast_pow = find_input_node(model, pow.input[1])
-            if not is_type(cast_pow, "Cast"):
-                continue
-            remove_nodes += [cast_pow]
-        if not is_type(div, "Div") or not is_type(pow, "Pow"):
-            continue
-
-        # check that pow ouput is ReduceMean
-        reduce_mean2 = find_output_node(model, pow.output[0])
-        if not is_type(reduce_mean2, "ReduceMean"):
-            continue
-
-        # check that reduce_mean2 output is Add
-        add = find_output_node(model, reduce_mean2.output[0])
-        if not is_type(add, "Add"):
-            continue
-
-        # check that add output is Sqrt
-        sqrt = find_output_node(model, add.output[0])
-        if not is_type(sqrt, "Sqrt"):
-            continue
-
-        # check that sqrt output is div
-        if div != find_output_node(model, sqrt.output[0]):
-            continue
-
-        # check if div output is Mul
-        optional_mul = find_output_node(model, div.output[0])
-        if not is_type(optional_mul, "Mul"):
-            optional_mul = None
-            continue  # default bias and weight not supported
-
-        # check if mul output is Add
-        if optional_mul is not None:
-            optional_add = find_output_node(model, optional_mul.output[0])
-        else:
-            optional_add = find_output_node(model, div.output[0])
-        if not is_type(optional_add, "Add"):
-            optional_add = None
-            continue  # default bias and weight not supported
-
-        # add nodes to remove_nodes
-        remove_nodes.extend([reduce_mean, sub, div, pow, reduce_mean2, add, sqrt])
-
-        # create LayerNorm node
-        layer_norm_input = []
-        layer_norm_output = []
-
-        layer_norm_input.append(reduce_mean.input[0])
-
-        if optional_mul is not None:
-            remove_nodes.append(optional_mul)
-            weight = optional_mul.input[1]
-            layer_norm_input.append(weight)
-
-        if optional_add is not None:
-            remove_nodes.append(optional_add)
-            bias = optional_add.input[1]
-            layer_norm_input.append(bias)
-
-        if optional_add is not None:
-            layer_norm_output.append(optional_add.output[0])
-        elif optional_mul is not None:
-            layer_norm_output.append(optional_mul.output[0])
-        else:
-            layer_norm_output.append(div.output[0])
-
-        layer_norm_output.append("saved_mean_" + str(id))
-        layer_norm_output.append("saved_inv_std_var_" + str(id))
-
-        epsilon_node = find_input_node(model, add.input[1])
-        epsilon = epsilon_node.attribute[0].t.raw_data
-        epsilon = struct.unpack("f", epsilon)[0]
-
-        layer_norm = helper.make_node(
-            "LayerNormalization",
-            layer_norm_input,
-            layer_norm_output,
-            "LayerNormalization_" + str(id),
-            None,
-            axis=reduce_mean.attribute[0].ints[0],
-            epsilon=epsilon,
-        )
-        layer_norm_nodes.append(layer_norm)
-        id += 1
-
-    # remove orphan constant nodes
-    for constant in graph.node:
-        if constant.op_type == "Constant" and constant not in remove_nodes:
-            is_orphan = True
-            for out_name in constant.output:
-                out = find_output_node(model, out_name)
-                if out not in remove_nodes:
-                    is_orphan = False
-            if is_orphan:
-                remove_nodes.append(constant)
-
-    all_nodes = []
-    for node in graph.node:
-        if node not in remove_nodes:
-            all_nodes.append(node)
-
-    for node in layer_norm_nodes:
-        all_nodes.append(node)  # noqa: PERF402
-
-    graph.ClearField("node")
-    graph.node.extend(all_nodes)
-    return model
-
-
-# Fuse SoftmaxCrossEntropy
-
-
-def fuse_softmaxNLL_to_softmaxCE(onnx_model):  # noqa: N802
-    # Converting below subgraph
-    #
-    #    (subgraph)
-    #        |
-    #    LogSoftmax     (target)    (optional weight)
-    #        |             |             |
-    #   nll_loss/NegativeLogLikelihoodLoss
-    #                   |
-    #                output
-    #
-    # to the following
-    #
-    #    (subgraph)     (target)    (optional weight)
-    #        |             |        _____|
-    #        |             |       |
-    #       SparseSoftmaxCrossEntropy
-    #                   |
-    #                output
-    nll_count = 0
-    while True:
-        nll_count = nll_count + 1
-        nll_loss_node = None
-        nll_loss_node_index = 0
-        for nll_loss_node_index, node in enumerate(onnx_model.graph.node):  # noqa: B007
-            if node.op_type == "nll_loss" or node.op_type == "NegativeLogLikelihoodLoss":
-                nll_loss_node = node
-                break
-
-        if nll_loss_node is None:
-            break
-
-        softmax_node = None
-        softmax_node_index = 0
-        label_input_name = None
-        weight_input_name = None
-        for softmax_node_index, node in enumerate(onnx_model.graph.node):  # noqa: B007
-            if node.op_type == "LogSoftmax":
-                # has to be connected to nll_loss
-                if len(nll_loss_node.input) > 2:
-                    weight_input_name = nll_loss_node.input[2]
-                if node.output[0] == nll_loss_node.input[0]:
-                    softmax_node = node
-                    label_input_name = nll_loss_node.input[1]
-                    break
-                elif node.output[0] == nll_loss_node.input[1]:
-                    softmax_node = node
-                    label_input_name = nll_loss_node.input[0]
-                    break
-            else:
-                if softmax_node is not None:
-                    break
-
-        if softmax_node is None:
-            break
-
-        # delete nll_loss and LogSoftmax nodes in order
-        if nll_loss_node_index < softmax_node_index:
-            del onnx_model.graph.node[softmax_node_index]
-            del onnx_model.graph.node[nll_loss_node_index]
-        else:
-            del onnx_model.graph.node[nll_loss_node_index]
-            del onnx_model.graph.node[softmax_node_index]
-
-        probability_output_name = softmax_node.output[0]
-        node = onnx_model.graph.node.add()
-        inputs = (
-            [softmax_node.input[0], label_input_name, weight_input_name]
-            if weight_input_name
-            else [softmax_node.input[0], label_input_name]
-        )
-        node.CopyFrom(
-            onnx.helper.make_node(
-                "SparseSoftmaxCrossEntropy",
-                inputs,
-                [nll_loss_node.output[0], probability_output_name],
-                "nll_loss_node_" + str(nll_count),
-            )
-        )
-
-    return onnx_model
diff --git a/orttraining/orttraining/python/training/utils/__init__.py b/orttraining/orttraining/python/training/utils/__init__.py
index fa7c9f2750cdd..244557c3c1072 100644
--- a/orttraining/orttraining/python/training/utils/__init__.py
+++ b/orttraining/orttraining/python/training/utils/__init__.py
@@ -2,6 +2,7 @@
 # Licensed under the MIT License.
 # __init__.py
 
+from onnxruntime.training.utils.ptable import PTable
 from onnxruntime.training.utils.torch_io_helper import (
     ORTModelInputOutputSchemaType,
     ORTModelInputOutputType,
@@ -9,7 +10,11 @@
     extract_data_and_schema,
     unflatten_data_using_schema,
 )
-from onnxruntime.training.utils.torch_type_map import onnx_dtype_to_pytorch, pytorch_dtype_to_onnx
+from onnxruntime.training.utils.torch_type_map import (
+    onnx_dtype_to_pytorch_dtype,
+    pytorch_scalar_type_to_pytorch_dtype,
+    pytorch_type_to_onnx_dtype,
+)
 
 __all__ = [
     "PrimitiveType",
@@ -17,6 +22,8 @@
     "ORTModelInputOutputSchemaType",
     "extract_data_and_schema",
     "unflatten_data_using_schema",
-    "pytorch_dtype_to_onnx",
-    "onnx_dtype_to_pytorch",
+    "pytorch_type_to_onnx_dtype",
+    "onnx_dtype_to_pytorch_dtype",
+    "pytorch_scalar_type_to_pytorch_dtype",
+    "PTable",
 ]
diff --git a/orttraining/orttraining/python/training/utils/hooks/_statistics_subscriber.py b/orttraining/orttraining/python/training/utils/hooks/_statistics_subscriber.py
index db1c69cf95ba4..c5be17236ac06 100644
--- a/orttraining/orttraining/python/training/utils/hooks/_statistics_subscriber.py
+++ b/orttraining/orttraining/python/training/utils/hooks/_statistics_subscriber.py
@@ -93,6 +93,13 @@ def infer_shape(
     ) -> Tuple[List[Optional[List[Union[int, str]]]], List[torch.onnx.TensorProtoDataType]]:
         return tensor_input_shapes, tensor_input_dtypes
 
+    @staticmethod
+    def alias_input(node_proto_str: str):
+        fw_alias_map = [3]
+        bw_alias_map = [-1] * 6
+        bw_alias_map[3] = 0
+        return fw_alias_map, bw_alias_map
+
 
 class StatisticsSubscriber(SubscriberBase):
     """
diff --git a/orttraining/orttraining/python/training/utils/hooks/_subscriber_manager.py b/orttraining/orttraining/python/training/utils/hooks/_subscriber_manager.py
index b2bc64be42fc1..c9c06dabab4de 100644
--- a/orttraining/orttraining/python/training/utils/hooks/_subscriber_manager.py
+++ b/orttraining/orttraining/python/training/utils/hooks/_subscriber_manager.py
@@ -68,6 +68,26 @@ def infer_shape(
     ) -> Tuple[List[Optional[List[Union[int, str]]]], List[torch.onnx.TensorProtoDataType]]:
         return tensor_input_shapes, tensor_input_dtypes
 
+    @staticmethod
+    def alias_input(node_proto_str: str):
+        node = onnx.NodeProto()
+        node.ParseFromString(node_proto_str)
+        non_tensor_fw_input_count = 1
+        fw_output_count = len(node.output) - 1  # exclude the first output appended in ONNX
+        fw_alias_map = [-1] * fw_output_count
+        bw_alias_map = [-1] * (non_tensor_fw_input_count + len(node.input))
+
+        for i in range(fw_output_count):
+            fw_alias_map[i] = i + non_tensor_fw_input_count
+
+        tensor_input_index = 0
+        for i in range(len(bw_alias_map)):
+            if i < non_tensor_fw_input_count:
+                continue
+            bw_alias_map[i] = tensor_input_index
+            tensor_input_index += 1
+        return fw_alias_map, bw_alias_map
+
 
 class SubscriberManager:
     """This class is used to manage all the subscribers and register subscribers' custom actions as PyTorch hooks
diff --git a/orttraining/orttraining/python/training/utils/hooks/_zero_offload_subscriber.py b/orttraining/orttraining/python/training/utils/hooks/_zero_offload_subscriber.py
index ad1297962db71..61f3b20224a72 100644
--- a/orttraining/orttraining/python/training/utils/hooks/_zero_offload_subscriber.py
+++ b/orttraining/orttraining/python/training/utils/hooks/_zero_offload_subscriber.py
@@ -7,8 +7,9 @@
 import inspect
 import warnings
 from collections import OrderedDict
+from datetime import timedelta
 from types import CodeType, FunctionType
-from typing import Callable, Dict, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 import onnx
 import torch
@@ -16,7 +17,7 @@
 from onnxruntime.training.utils import (
     ORTModelInputOutputType,
     extract_data_and_schema,
-    pytorch_dtype_to_onnx,
+    pytorch_type_to_onnx_dtype,
     unflatten_data_using_schema,
 )
 
@@ -56,6 +57,52 @@ def _setup_zero_stage3_ort_compatible_hooks(self):
     return _setup_zero_stage3_ort_compatible_hooks
 
 
+# Creating this dummy class because several functions would not be available during export step
+class DummyWork(torch.distributed.distributed_c10d.Work):
+    def is_completed(self) -> bool:
+        return True
+
+    def is_success(self) -> bool:
+        return True
+
+    def exception(self) -> Any:
+        return None
+
+    def wait(self, timeout: timedelta = timedelta) -> bool:
+        return True
+
+    def source_rank(self) -> int:
+        return 0
+
+    def _source_rank(self) -> int:
+        return 0
+
+    def result(self) -> List[torch.Tensor]:
+        return []
+
+    def synchronize(self):
+        pass
+
+
+def _get_ort_compatible_allgather_fn():
+    from deepspeed.utils import get_caller_func
+
+    original_allgather_fn = deepspeed.comm.allgather_fn
+    output_get_caller_func = get_caller_func()
+
+    # For Monkey patching the original function
+    # Original code https://github.com/microsoft/DeepSpeed/blob/604d701e35548e5407b017c088bdc3760832c9e0/deepspeed/comm/comm.py#L315
+    def _ort_compatible_allgather_fn_zero_stage3(
+        output_tensor, input_tensor, group=None, async_op=False, debug=output_get_caller_func
+    ):
+        if torch.onnx.is_in_onnx_export():
+            return DummyWork()
+
+        return original_allgather_fn(output_tensor, input_tensor, group=group, async_op=async_op, debug=debug)
+
+    return _ort_compatible_allgather_fn_zero_stage3
+
+
 # Adapted from https://github.com/microsoft/DeepSpeed/blob/e8318634b4313eaad89842cf4322e1762d34ced3/deepspeed/runtime/zero/linear.py#L104
 # In the original logic, if bias is None, after export to ONNX, None becomes a constant, so backward op complains
 # output count more than needed.
@@ -87,6 +134,7 @@ def collect_code(self, function: Callable):
 try:
     # Have to import below explicitly, otherwise it complains about _apply_to_tensors_only not found.
     # The hooks reference functions or classes in that file.
+    import deepspeed
     from deepspeed.runtime.zero.parameter_offload import *  # noqa: F403
     from deepspeed.runtime.zero.parameter_offload import DeepSpeedZeRoOffload, _apply_to_tensors_only  # noqa: F401
     from deepspeed.utils import instrument_w_nvtx  # noqa: F401
@@ -109,6 +157,10 @@ def configure_ort_compatible_zero_stage3(debug=False, stats_output_dir="./", sta
             debug, stats_output_dir, stats_overwrite
         )
 
+        # This function will overwrite the original allgather_fn in deepspeed comm to make it ort compatible.
+        # Only need to define it once
+        deepspeed.comm.allgather_fn = _get_ort_compatible_allgather_fn()
+
         from deepspeed.runtime.zero.linear import zero3_linear_wrap
 
         if torch.nn.functional.linear is zero3_linear_wrap:
@@ -239,7 +291,7 @@ def backward(ctx, *grads):
                     raise RuntimeError(f"param {p} has no grad, this should not happen.")
                 # Param gradient accumulation is triggered here, along with the attached hooks, done by PyTorch.
                 assert p.shape == g.shape, f"param_index: {param_index} - param shape {p.shape} != grad shape {g.shape}"
-                p.backward(g)
+                # p.backward(g)
 
         # At this point, the **real** param grads are already updated, the following grads are only used for
         # completing the full backward propagation, will not affect parameter updates.
@@ -272,12 +324,41 @@ def infer_shape(
         start_offset = len(tensor_input_shapes) - len(partitioned_params)
         for index, param in enumerate(partitioned_params):
             tensor_output_shapes[start_offset + index] = list(param.ds_shape)
-            tensor_output_dtypes[start_offset + index] = pytorch_dtype_to_onnx(param.dtype)
+            tensor_output_dtypes[start_offset + index] = int(pytorch_type_to_onnx_dtype(param.dtype))
         assert len(tensor_output_shapes) == len(tensor_input_shapes)
         assert len(tensor_output_dtypes) == len(tensor_input_dtypes)
 
         return tensor_output_shapes, tensor_output_dtypes
 
+    @staticmethod
+    def alias_input(node_proto_str: str):
+        node = onnx.NodeProto()
+        node.ParseFromString(node_proto_str)
+        input_pointer_scalars_attr_name = "input_pointer_scalars"
+        found = [attr for attr in node.attribute if attr.name == input_pointer_scalars_attr_name]
+        assert len(found) == 1
+        input_pointer_scalars = found[0].ints
+        # Restore the nn.Module from the pointer.
+        module = ctypes.cast(input_pointer_scalars[0], ctypes.py_object).value
+        partitioned_params = _get_params_for_current_module(module)
+
+        non_tensor_fw_input_count = 6
+        fw_output_count = len(node.output) - 1  # exclude the first output appended in ONNX
+        fw_alias_map = [-1] * fw_output_count
+        bw_alias_map = [-1] * (non_tensor_fw_input_count + len(node.input))
+
+        for i in range(fw_output_count - len(partitioned_params)):
+            fw_alias_map[i] = i + non_tensor_fw_input_count
+
+        tensor_input_index = 0
+        for i in range(len(bw_alias_map) - len(partitioned_params)):
+            if i < non_tensor_fw_input_count:
+                continue
+            bw_alias_map[i] = tensor_input_index
+            tensor_input_index += 1
+
+        return fw_alias_map, bw_alias_map
+
 
 class ORTZeROOffloadPostForwardFunction(torch.autograd.Function):
     @staticmethod
@@ -332,6 +413,27 @@ def infer_shape(
     ) -> Tuple[List[Optional[List[Union[int, str]]]], List[torch.onnx.TensorProtoDataType]]:
         return tensor_input_shapes, tensor_input_dtypes
 
+    @staticmethod
+    def alias_input(node_proto_str: str):
+        node = onnx.NodeProto()
+        node.ParseFromString(node_proto_str)
+        non_tensor_fw_input_count = 4
+        fw_output_count = len(node.output) - 1  # exclude the first output appended in ONNX
+        fw_alias_map = [-1] * fw_output_count
+        bw_alias_map = [-1] * (non_tensor_fw_input_count + len(node.input))
+
+        for i in range(fw_output_count):
+            fw_alias_map[i] = i + non_tensor_fw_input_count
+
+        tensor_input_index = 0
+        for i in range(len(bw_alias_map)):
+            if i < non_tensor_fw_input_count:
+                continue
+            bw_alias_map[i] = tensor_input_index
+            tensor_input_index += 1
+
+        return fw_alias_map, bw_alias_map
+
 
 class _ZeROOffloadFunctions:
     def __init__(self, one_time_init: _ZeROOffloadOneTimeInitializer, offloader) -> None:
diff --git a/orttraining/orttraining/python/training/utils/ptable.py b/orttraining/orttraining/python/training/utils/ptable.py
new file mode 100644
index 0000000000000..3b3b80d29ed92
--- /dev/null
+++ b/orttraining/orttraining/python/training/utils/ptable.py
@@ -0,0 +1,64 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+from typing import List
+
+
+class Row:
+    """A row in a PTable"""
+
+    def __init__(self, columns: List[str]) -> None:
+        self._columns: List[str] = columns  # List of strings
+        self._annotation_table = None  # Optional PTable used for displaying detailed information about the feature row.
+
+    def append_annotation_table(self, ptable) -> None:
+        self._annotation_table = ptable
+
+
+class PTable:
+    """A table that can be printed to the console."""
+
+    def __init__(self) -> None:
+        self._rows: List[Row] = []
+        self._column_count = None
+
+    def add_row(self, columns: List[str]) -> Row:
+        """Add a row to the table. The number of columns must match the number of columns in the table."""
+        if self._column_count is None:
+            self._column_count = len(columns)
+        assert self._column_count == len(columns)
+        row = Row(columns)
+        self._rows.append(row)
+        return row
+
+    def get_string(self, first_column_width=None, second_column_width=None) -> str:
+        """Serialize the table to a string."""
+        # Collect the max width of each column
+        column_widths = []
+        for row in self._rows:
+            if column_widths:
+                assert len(column_widths) == len(row._columns)
+            else:
+                column_widths = [0] * len(row._columns)
+            for i, column in enumerate(row._columns):
+                column_widths[i] = max(column_widths[i], len(str(column)))
+
+        if first_column_width:
+            column_widths[0] = max(first_column_width, column_widths[0])
+
+        if second_column_width:
+            column_widths[2] = max(second_column_width, column_widths[2])
+
+        serialized_table = ""
+        for row in self._rows:
+            for i, column in enumerate(row._columns):
+                serialized_table += f"{str(column).ljust(column_widths[i] + 2)}"
+            serialized_table += "\n"
+            if row._annotation_table:
+                serialized_table += row._annotation_table.get_string(
+                    first_column_width=column_widths[0], second_column_width=column_widths[2]
+                )
+
+        return serialized_table
diff --git a/orttraining/orttraining/python/training/utils/torch_type_map.py b/orttraining/orttraining/python/training/utils/torch_type_map.py
index bdacab8ad04fe..2b429f3fd4f3a 100644
--- a/orttraining/orttraining/python/training/utils/torch_type_map.py
+++ b/orttraining/orttraining/python/training/utils/torch_type_map.py
@@ -36,8 +36,10 @@
 _ONNX_TO_DTYPE = {onnx_dtype: torch_dtype for torch_dtype, onnx_dtype in _DTYPE_TO_ONNX.items()}
 
 
-def pytorch_dtype_to_onnx(dtype_or_scalar_type: Union[torch.dtype, str]) -> torch.onnx.TensorProtoDataType:
-    """Converts a pytorch dtype or scalar type string to an onnx dtype."""
+def pytorch_type_to_onnx_dtype(dtype_or_scalar_type: Union[torch.dtype, str]) -> torch.onnx.TensorProtoDataType:
+    """Converts a pytorch dtype or scalar type string to an onnx dtype.
+    PyTorch type can be either a dtype or a scalar type string.
+    """
     dtype = dtype_or_scalar_type
     if isinstance(dtype, str):
         if dtype not in _CAST_PYTORCH_TO_ONNX:
@@ -49,7 +51,15 @@ def pytorch_dtype_to_onnx(dtype_or_scalar_type: Union[torch.dtype, str]) -> torc
     return _DTYPE_TO_ONNX[dtype]
 
 
-def onnx_dtype_to_pytorch(dtype: torch.onnx.TensorProtoDataType) -> torch.dtype:
+def pytorch_scalar_type_to_pytorch_dtype(dtype: str) -> torch.dtype:
+    """Converts a pytorch scalar type string to a pytorch dtype."""
+    assert isinstance(dtype, str)
+    if dtype not in _CAST_PYTORCH_TO_ONNX:
+        raise RuntimeError(f"Unsupported dtype {dtype}")
+    return _CAST_PYTORCH_TO_ONNX[dtype][1]
+
+
+def onnx_dtype_to_pytorch_dtype(dtype: torch.onnx.TensorProtoDataType) -> torch.dtype:
     """Converts an onnx dtype to a pytorch dtype."""
     if dtype not in _ONNX_TO_DTYPE:
         raise RuntimeError(f"Unsupported dtype {dtype}")
diff --git a/orttraining/orttraining/test/external_transformer/test/external_transformers_test.py b/orttraining/orttraining/test/external_transformer/test/external_transformers_test.py
deleted file mode 100644
index f57f55d14eb1b..0000000000000
--- a/orttraining/orttraining/test/external_transformer/test/external_transformers_test.py
+++ /dev/null
@@ -1,144 +0,0 @@
-import sys
-import threading
-import time
-
-
-class OutputGrabber:
-    """
-    Class used to grab standard output or another stream.
-    """
-
-    escape_char = "\b"
-
-    def __init__(self, stream=None, threaded=False):
-        self.origstream = stream
-        self.threaded = threaded
-        if self.origstream is None:
-            self.origstream = sys.stdout
-        self.origstreamfd = self.origstream.fileno()
-        self.capturedtext = ""
-        # Create a pipe so the stream can be captured:
-        self.pipe_out, self.pipe_in = os.pipe()
-
-    def __enter__(self):
-        self.start()
-        return self
-
-    def __exit__(self, type, value, traceback):
-        self.stop()
-
-    def start(self):
-        """
-        Start capturing the stream data.
-        """
-        self.capturedtext = ""
-        # Save a copy of the stream:
-        self.streamfd = os.dup(self.origstreamfd)
-        # Replace the original stream with our write pipe:
-        os.dup2(self.pipe_in, self.origstreamfd)
-        if self.threaded:
-            # Start thread that will read the stream:
-            self.workerThread = threading.Thread(target=self.readOutput)
-            self.workerThread.start()
-            # Make sure that the thread is running and os.read() has executed:
-            time.sleep(0.01)
-
-    def stop(self):
-        """
-        Stop capturing the stream data and save the text in `capturedtext`.
-        """
-        # Print the escape character to make the readOutput method stop:
-        self.origstream.write(self.escape_char)
-        # Flush the stream to make sure all our data goes in before
-        # the escape character:
-        self.origstream.flush()
-        if self.threaded:
-            # wait until the thread finishes so we are sure that
-            # we have until the last character:
-            self.workerThread.join()
-        else:
-            self.readOutput()
-        # Close the pipe:
-        os.close(self.pipe_in)
-        os.close(self.pipe_out)
-        # Restore the original stream:
-        os.dup2(self.streamfd, self.origstreamfd)
-        # Close the duplicate stream:
-        os.close(self.streamfd)
-
-    def readOutput(self):
-        """
-        Read the stream data (one byte at a time)
-        and save the text in `capturedtext`.
-        """
-        while True:
-            char = os.read(self.pipe_out, 1).decode(self.origstream.encoding)
-            if not char or self.escape_char in char:
-                break
-            self.capturedtext += char
-
-
-import os  # noqa: E402
-import unittest  # noqa: E402
-
-import numpy as np  # noqa: E402, F401
-import torch  # noqa: E402
-import torch.nn as nn  # noqa: E402
-import torch.nn.functional as F  # noqa: E402
-
-from onnxruntime.capi import _pybind_state as torch_ort_eager  # noqa: E402, F401
-from onnxruntime.training import optim, orttrainer, orttrainer_options  # noqa: E402, F401
-
-
-def my_loss(x, target):
-    return F.nll_loss(F.log_softmax(x, dim=1), target)
-
-
-class NeuralNet(nn.Module):
-    def __init__(self, input_size, hidden_size, num_classes):
-        super().__init__()
-        self.fc1 = nn.Linear(input_size, hidden_size)
-        self.relu = nn.ReLU()
-        self.fc2 = nn.Linear(hidden_size, num_classes)
-
-    def forward(self, x, target):
-        out = self.fc1(x)
-        out = self.relu(out)
-        out = self.fc2(out)
-        return my_loss(out, target)
-
-
-class OrtEPTests(unittest.TestCase):
-    def test_external_graph_transformer_triggering(self):
-        input_size = 784
-        hidden_size = 500
-        num_classes = 10
-        batch_size = 128
-        model = NeuralNet(input_size, hidden_size, num_classes)
-
-        model_desc = {
-            "inputs": [
-                ("x", [batch_size, input_size]),
-                (
-                    "target",
-                    [
-                        batch_size,
-                    ],
-                ),
-            ],
-            "outputs": [("loss", [], True)],
-        }
-        optim_config = optim.SGDConfig()
-        opts = orttrainer.ORTTrainerOptions({"device": {"id": "cpu"}})
-        model = orttrainer.ORTTrainer(model, model_desc, optim_config, options=opts)
-        # because orttrainer is lazy initialized, feed in a random data to trigger the graph transformer
-        data = torch.rand(batch_size, input_size)
-        target = torch.randint(0, 10, (batch_size,))
-
-        with OutputGrabber() as out:
-            model.train_step(data, target)
-        assert "******************Trigger Customized Graph Transformer:  MyGraphTransformer!" in out.capturedtext
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/orttraining/orttraining/test/external_transformer/test_exeternal_transformers/test_external_transformers.cc b/orttraining/orttraining/test/external_transformer/test_exeternal_transformers/test_external_transformers.cc
deleted file mode 100644
index 00e933dd14914..0000000000000
--- a/orttraining/orttraining/test/external_transformer/test_exeternal_transformers/test_external_transformers.cc
+++ /dev/null
@@ -1,35 +0,0 @@
-#include "core/optimizer/rewrite_rule.h"
-#include "orttraining/core/optimizer/graph_transformer_registry.h"
-#include "onnx/defs/schema.h"
-#include <memory>
-#include <iostream>
-
-namespace onnxruntime {
-namespace training {
-
-class MyRewriteRule : public RewriteRule {
- public:
-  MyRewriteRule() noexcept
-      : RewriteRule("MyRewriteRule") {
-  }
-  std::vector<std::string> TargetOpTypes() const noexcept override {
-    return {};
-  }
-
- private:
-  bool SatisfyCondition(const Graph& /*graph*/, const Node& /*node*/, const logging::Logger& /*logger*/) const override {
-    return true;
-  }
-
-  Status Apply(Graph& /*graph*/, Node& /*node*/, RewriteRuleEffect& /*rule_effect*/, const logging::Logger& /*logger*/) const override {
-    std::cout << "******************Trigger Customized Graph Transformer:  MyGraphTransformer!" << std::endl;
-    return Status::OK();
-  }
-};
-
-void RegisterTrainingExternalTransformers() {
-  ONNX_REGISTER_EXTERNAL_REWRITE_RULE(MyRewriteRule, Level1, true);
-}
-
-}  // namespace training
-}  // namespace onnxruntime
diff --git a/orttraining/orttraining/test/framework/checkpointing_test.cc b/orttraining/orttraining/test/framework/checkpointing_test.cc
index b91cc2f1d5f5f..a7ee776b9bc39 100644
--- a/orttraining/orttraining/test/framework/checkpointing_test.cc
+++ b/orttraining/orttraining/test/framework/checkpointing_test.cc
@@ -52,21 +52,18 @@ void CompareOrtValuesToTensorProtoValues(
   ASSERT_EQ(name_to_ort_value.size(), name_to_tensor_proto.size());
 
   NameMLValMap name_to_ort_value_from_tensor_proto{};
-  std::vector<std::vector<char>> tensor_buffers{};
+  AllocatorPtr tmp_allocator = std::make_shared<CPUAllocator>();
 
   for (const auto& name_and_tensor_proto : name_to_tensor_proto) {
     const auto& name = name_and_tensor_proto.first;
     const auto& tensor_proto = name_and_tensor_proto.second;
     TensorShape shape{tensor_proto.dims().data(), static_cast<size_t>(tensor_proto.dims().size())};
     ASSERT_EQ(tensor_proto.data_type(), ONNX_NAMESPACE::TensorProto_DataType_FLOAT);
-    std::vector<char> tensor_buffer(shape.Size() * sizeof(float));
-    MemBuffer m(tensor_buffer.data(), tensor_buffer.size(), cpu_alloc_info);
     OrtValue ort_value;
-    ASSERT_STATUS_OK(utils::TensorProtoToMLValue(
-        Env::Default(), model_path.c_str(), tensor_proto, m, ort_value));
+    ASSERT_STATUS_OK(utils::TensorProtoToOrtValue(Env::Default(), model_path.c_str(), tensor_proto,
+                                                  tmp_allocator, ort_value));
 
     name_to_ort_value_from_tensor_proto.emplace(name, ort_value);
-    tensor_buffers.emplace_back(std::move(tensor_buffer));
   }
 
   for (const auto& name_and_ort_value : name_to_ort_value) {
diff --git a/orttraining/orttraining/test/gradient/gradient_ops_test.cc b/orttraining/orttraining/test/gradient/gradient_ops_test.cc
index 597801f4030c1..6fb42dd59b6a0 100644
--- a/orttraining/orttraining/test/gradient/gradient_ops_test.cc
+++ b/orttraining/orttraining/test/gradient/gradient_ops_test.cc
@@ -3011,7 +3011,6 @@ TEST(GradientCheckerTest, PadAndUnflattenGrad) {
   std::vector<std::vector<float>> x_datas = {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, {3, 5, 0, 1}, {5, 2}};
 
   TensorInfo padded_out_info({5, 2, 3}, true);
-  TensorInfo out_shape_info({2}, false, nullptr, DataTypeImpl::GetTensorType<int64_t>());
 
   std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
 #ifdef USE_CUDA
@@ -3021,7 +3020,7 @@ TEST(GradientCheckerTest, PadAndUnflattenGrad) {
 #endif
 
   ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, indices_info, shape_info},
-                                                         {padded_out_info, out_shape_info}, &max_error,
+                                                         {padded_out_info}, &max_error,
                                                          x_datas, {}, true, false, &execution_providers));
   EXPECT_IS_TINY(max_error);
 }
@@ -3298,6 +3297,41 @@ TEST(GradientCheckerTest, ConvTransposeGrad) {
   execution_providers.push_back(DefaultCudaExecutionProvider());
   ConvTransposeGradientCheckerTest(&execution_providers);
 }
+
+// TODO: Enable test for ROCM
+TEST(GradientCheckerTest, ResizeGrad) {
+  std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+  execution_providers.push_back(DefaultCudaExecutionProvider());
+  const std::vector<ONNX_NAMESPACE::AttributeProto> attributes = {
+      MakeAttribute("coordinate_transformation_mode", "half_pixel"),
+      MakeAttribute("cubic_coeff_a", -0.75f),
+      MakeAttribute("exclude_outside", static_cast<int64_t>(0)),
+      MakeAttribute("extrapolation_value", 0.0f),
+      MakeAttribute("mode", "linear"),
+      MakeAttribute("nearest_mode", "floor")};
+
+  float max_error;
+  GradientChecker<float, float, float> gradient_checker;
+  OpDef op_def{"Resize", kOnnxDomain, 18};
+
+  TensorInfo x_info({1, 2, 4, 4}, true);
+  TensorInfo roi_info({4}, false, nullptr, DataTypeImpl::GetTensorType<float>());
+  TensorInfo scales_info({4}, false, nullptr, DataTypeImpl::GetTensorType<float>());
+
+  TensorInfo y_info({1, 2, 8, 8}, true);
+
+  std::vector<std::vector<float>> x_datas = {{0.2f, 0.4f, 0.6f, 0.8f, 0.2f, 0.4f, 0.6f, 0.8f,
+                                              0.2f, 0.4f, 0.6f, 0.8f, 0.2f, 0.4f, 0.6f, 0.8f,
+                                              0.2f, 0.4f, 0.6f, 0.8f, 0.2f, 0.4f, 0.6f, 0.8f,
+                                              0.2f, 0.4f, 0.6f, 0.8f, 0.2f, 0.4f, 0.6f, 0.8f},
+                                             {1.0f, 1.0f, 1.0f, 1.0f},
+                                             {1.0f, 1.0f, 2.0f, 2.0f}};
+
+  ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, roi_info, scales_info},
+                                                         {y_info}, &max_error, x_datas, attributes, true, false, &execution_providers));
+  EXPECT_IS_TINY(max_error);
+}
+
 #endif  // USE_CUDA
 
 }  // namespace test
diff --git a/orttraining/orttraining/test/gradient/optimizer_ops_test.cc b/orttraining/orttraining/test/gradient/optimizer_ops_test.cc
index c100730aacc44..bfb59f1525e47 100644
--- a/orttraining/orttraining/test/gradient/optimizer_ops_test.cc
+++ b/orttraining/orttraining/test/gradient/optimizer_ops_test.cc
@@ -1542,7 +1542,6 @@ TEST(OptimizerTest, LambOptimizerTestLarge) {
     std::vector<float> m(size);
     std::vector<float> v(size);
 
-    std::random_device random_device;
     std::mt19937 random_engine(0);
     std::uniform_real_distribution<float> dist(0.1f, 1.0f);
     for (int i = 0; i < size; ++i) {
@@ -1581,7 +1580,6 @@ TEST(OptimizerTest, LambOptimizerTestLarge) {
 
 TEST(OptimizerTest, LambOptimizerMultiTensorRatio) {
   constexpr int group_count = 127;
-  std::random_device random_device;
   std::mt19937 random_engine(0);
   std::uniform_real_distribution<float> dist(0.1f, 1.0f);
   std::uniform_int_distribution<int64_t> dist_int(1, 1228);
diff --git a/orttraining/orttraining/test/optimizer/graph_transform_test.cc b/orttraining/orttraining/test/optimizer/graph_transform_test.cc
index 94ca87b2ac519..b774fec11cc8d 100644
--- a/orttraining/orttraining/test/optimizer/graph_transform_test.cc
+++ b/orttraining/orttraining/test/optimizer/graph_transform_test.cc
@@ -18,6 +18,7 @@
 #include "orttraining/core/optimizer/concat_replacement.h"
 #include "orttraining/core/optimizer/batchnorm_replacement.h"
 #include "orttraining/core/optimizer/localized_recompute.h"
+#include "orttraining/core/optimizer/transpose_replacement.h"
 #include "test/optimizer/graph_transform_test_builder.h"
 #include "test/optimizer/graph_transform_test_fixture.h"
 #include "test/util/include/default_providers.h"
@@ -34,6 +35,7 @@
 #ifdef ENABLE_TRITON
 #include "orttraining/core/optimizer/triton_fusion.h"
 #endif
+#include "orttraining/core/optimizer/conv1d_replacement.h"
 
 #include <random>
 
@@ -551,6 +553,46 @@ TEST_F(GraphTransformationTests, ConcatReplacement) {
   ASSERT_EQ(op_to_count["com.microsoft.ConcatTraining"], 1);
 }
 
+TEST_F(GraphTransformationTests, TransposeReplacement) {
+  {
+    auto model_uri = MODEL_FOLDER "transpose_to_reshape_valid.onnx";
+    std::shared_ptr<Model> p_model;
+    ASSERT_TRUE(Model::Load(model_uri, p_model, nullptr, *logger_).IsOK());
+    Graph& graph = p_model->MainGraph();
+
+    auto rule_transformer_L1 = std::make_unique<RuleBasedGraphTransformer>("TransposeReplacement");
+    ASSERT_STATUS_OK(rule_transformer_L1->Register(std::make_unique<TransposeReplacement>()));
+    onnxruntime::GraphTransformerManager graph_transformation_mgr{1};
+    ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::move(rule_transformer_L1), TransformerLevel::Level1));
+
+    ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, *logger_));
+
+    std::map<std::string, int> op_to_count = CountOpsInGraph(graph);
+
+    ASSERT_EQ(op_to_count["Transpose"], 0);
+    ASSERT_EQ(op_to_count["Reshape"], 1);
+  }
+
+  {
+    auto model_uri = MODEL_FOLDER "transpose_to_reshape_invalid.onnx";
+    std::shared_ptr<Model> p_model;
+    ASSERT_TRUE(Model::Load(model_uri, p_model, nullptr, *logger_).IsOK());
+    Graph& graph = p_model->MainGraph();
+
+    auto rule_transformer_L1 = std::make_unique<RuleBasedGraphTransformer>("TransposeReplacement");
+    ASSERT_STATUS_OK(rule_transformer_L1->Register(std::make_unique<TransposeReplacement>()));
+    onnxruntime::GraphTransformerManager graph_transformation_mgr{1};
+    ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::move(rule_transformer_L1), TransformerLevel::Level1));
+
+    ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, *logger_));
+
+    std::map<std::string, int> op_to_count = CountOpsInGraph(graph);
+
+    ASSERT_EQ(op_to_count["Transpose"], 1);
+    ASSERT_EQ(op_to_count["Reshape"], 0);
+  }
+}
+
 TEST_F(GraphTransformationTests, MegatronMLPPartitionRank0) {
   auto model_uri = MODEL_FOLDER "model_parallel/mlp_megatron_basic_test.onnx";
   std::shared_ptr<Model> p_model;
@@ -1158,6 +1200,103 @@ TEST_P(QDQFusionTestsParameterized, CheckModelComposition) {
   ASSERT_EQ(op_to_count_post_fusion["com.microsoft.FakeQuant"], 1);
 }
 
+TEST_F(GraphTransformationTests, Conv1dReplacement) {
+  auto pre_graph_checker = [&](Graph& graph) {
+    auto op_count_map = CountOpsInGraph(graph);
+    TEST_RETURN_IF_NOT(op_count_map["Conv"] == 1);
+    return Status::OK();
+  };
+
+  for (auto opset : {11, 12, 13, 14, 15, 16, 17, 18}) {
+    for (auto group : {1, 2}) {
+      auto build_test_case = [&](ModelTestBuilder& builder) {
+        auto [batch_size, in_channel, in_length] = std::make_tuple(8, 16, 128);
+        auto out_channel = 64;
+        auto* data_arg = builder.MakeInput<float>({{batch_size, in_channel, in_length}});
+
+        auto* weight_arg = builder.MakeInitializer<float>({out_channel, in_channel / group, 1}, {-1.0f, 1.0f});
+        auto* conv_output = builder.MakeOutput();
+
+        auto& conv_node = builder.AddNode("Conv", {data_arg, weight_arg}, {conv_output});
+        conv_node.AddAttribute("dilations", std::vector<int64_t>{1});
+        conv_node.AddAttribute("kernel_shape", std::vector<int64_t>{1});
+        conv_node.AddAttribute("strides", std::vector<int64_t>{1});
+        conv_node.AddAttribute("group", static_cast<int64_t>(group));
+      };
+
+      auto post_graph_checker = [&](Graph& graph) {
+        auto op_count_map = CountOpsInGraph(graph);
+        TEST_RETURN_IF_NOT(op_count_map["Conv"] == 0);
+        // after graph transformation, the graph should have 1 squeeze, 2 split, group matmul, 1 concat
+        TEST_RETURN_IF_NOT(op_count_map["Squeeze"] == 1);
+        TEST_RETURN_IF_NOT(op_count_map["Split"] == 2);
+        TEST_RETURN_IF_NOT(op_count_map["MatMul"] == group);
+        TEST_RETURN_IF_NOT(op_count_map["Concat"] == 1);
+        return Status::OK();
+      };
+
+      std::unique_ptr<GraphTransformer> transformer = std::make_unique<Conv1dReplacement>();
+      ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, opset, *logger_, std::move(transformer),
+                                            TransformerLevel::Level1, 1,
+                                            pre_graph_checker, post_graph_checker));
+    }
+  }
+}
+
+TEST_F(GraphTransformationTests, Conv1dReplacement_NoTakeEffect) {
+  auto pre_graph_checker = [&](Graph& graph) {
+    auto op_count_map = CountOpsInGraph(graph);
+    TEST_RETURN_IF_NOT(op_count_map["Conv"] == 1);
+    return Status::OK();
+  };
+
+  // "group" is 3 so conv not replaced
+  for (auto opset : {11, 12, 13, 14, 15, 16, 17, 18}) {
+    auto build_test_case = [&](ModelTestBuilder& builder) {
+      auto [batch_size, in_channel, in_length] = std::make_tuple(8, 16, 128);
+      auto out_channel = 64;
+      auto* data_arg = builder.MakeInput<float>({{batch_size, in_channel, in_length}});
+
+      auto* weight_arg = builder.MakeInitializer<float>({out_channel, in_channel / 3, 1}, {-1.0f, 1.0f});
+      auto* conv_output = builder.MakeOutput();
+
+      auto& conv_node = builder.AddNode("Conv", {data_arg, weight_arg}, {conv_output});
+      conv_node.AddAttribute("dilations", std::vector<int64_t>{1});
+      conv_node.AddAttribute("kernel_shape", std::vector<int64_t>{1});
+      conv_node.AddAttribute("strides", std::vector<int64_t>{1});
+      conv_node.AddAttribute("group", static_cast<int64_t>(3));
+    };
+
+    std::unique_ptr<GraphTransformer> transformer = std::make_unique<Conv1dReplacement>();
+    ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, opset, *logger_, std::move(transformer),
+                                          TransformerLevel::Level1, 1,
+                                          pre_graph_checker, pre_graph_checker));
+  }
+
+  // "kernel_shape" is not 1 so conv not replaced
+  for (auto opset : {11, 12, 13, 14, 15, 16, 17, 18}) {
+    auto build_test_case = [&](ModelTestBuilder& builder) {
+      auto [batch_size, in_channel, in_length] = std::make_tuple(8, 16, 128);
+      auto out_channel = 64;
+      auto* data_arg = builder.MakeInput<float>({{batch_size, in_channel, in_length}});
+
+      auto* weight_arg = builder.MakeInitializer<float>({out_channel, in_channel, 1}, {-1.0f, 1.0f});
+      auto* conv_output = builder.MakeOutput();
+
+      auto& conv_node = builder.AddNode("Conv", {data_arg, weight_arg}, {conv_output});
+      conv_node.AddAttribute("dilations", std::vector<int64_t>{1});
+      conv_node.AddAttribute("kernel_shape", std::vector<int64_t>{2});
+      conv_node.AddAttribute("strides", std::vector<int64_t>{1});
+      conv_node.AddAttribute("group", static_cast<int64_t>(1));
+    };
+
+    std::unique_ptr<GraphTransformer> transformer = std::make_unique<Conv1dReplacement>();
+    ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, opset, *logger_, std::move(transformer),
+                                          TransformerLevel::Level1, 1,
+                                          pre_graph_checker, pre_graph_checker));
+  }
+}
+
 INSTANTIATE_TEST_SUITE_P(
     QDQFusionTests,
     QDQFusionTestsParameterized,
diff --git a/orttraining/orttraining/test/python/_test_commons.py b/orttraining/orttraining/test/python/_test_commons.py
index 1413d59096832..fb7e62551de63 100644
--- a/orttraining/orttraining/test/python/_test_commons.py
+++ b/orttraining/orttraining/test/python/_test_commons.py
@@ -1,26 +1,7 @@
-import copy
-import math
 import os
 import subprocess
 import sys
 
-import numpy as np
-import onnx
-import torch
-from numpy.testing import assert_allclose
-
-import onnxruntime
-from onnxruntime.training import _utils, optim
-
-
-def _single_run(execution_file, scenario, checkopint_dir=None):
-    cmd = [sys.executable, execution_file]
-    if scenario:
-        cmd += ["--scenario", scenario]
-    if checkopint_dir:
-        cmd += ["--checkpoint_dir", checkopint_dir]
-    assert subprocess.call(cmd) == 0
-
 
 def is_windows():
     return sys.platform.startswith("win")
@@ -46,197 +27,3 @@ def run_subprocess(args, cwd=None, capture=False, dll_path=None, shell=False, en
     if log:
         log.debug("Subprocess completed. Return code=" + str(completed_process.returncode))
     return completed_process
-
-
-def legacy_constant_lr_scheduler(global_step, initial_lr, total_steps, warmup):
-    num_warmup_steps = warmup * total_steps
-    if global_step < num_warmup_steps:
-        new_lr = initial_lr * float(global_step) / float(max(1, num_warmup_steps))
-    else:
-        new_lr = initial_lr
-    return new_lr
-
-
-def legacy_cosine_lr_scheduler(global_step, initial_lr, total_steps, warmup, cycles):
-    num_warmup_steps = warmup * total_steps
-    if global_step < num_warmup_steps:
-        new_lr = initial_lr * float(global_step) / float(max(1, num_warmup_steps))
-    else:
-        progress = float(global_step - num_warmup_steps) / float(max(1, total_steps - num_warmup_steps))
-        new_lr = initial_lr * max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(cycles) * 2.0 * progress)))
-    return new_lr
-
-
-def legacy_linear_lr_scheduler(global_step, initial_lr, total_steps, warmup):
-    num_warmup_steps = warmup * total_steps
-    if global_step < num_warmup_steps:
-        new_lr = initial_lr * float(global_step) / float(max(1, num_warmup_steps))
-    else:
-        new_lr = initial_lr * max(0.0, float(total_steps - global_step) / float(max(1, total_steps - num_warmup_steps)))
-    return new_lr
-
-
-def legacy_poly_lr_scheduler(global_step, initial_lr, total_steps, warmup, power, lr_end):
-    num_warmup_steps = warmup * total_steps
-    if global_step < num_warmup_steps:
-        new_lr = initial_lr * float(global_step) / float(max(1, num_warmup_steps))
-    elif global_step > total_steps:
-        new_lr = lr_end
-    else:
-        lr_range = initial_lr - lr_end
-        decay_steps = total_steps - num_warmup_steps
-        pct_remaining = 1 - (global_step - num_warmup_steps) / decay_steps
-        decay = lr_range * pct_remaining**power + lr_end
-        new_lr = decay
-    return new_lr
-
-
-def generate_dummy_optim_state(model, optimizer):
-    np.random.seed(0)
-    if not (isinstance(optimizer, (optim.AdamConfig, optim.LambConfig))):
-        return dict()
-
-    moment_keys = ["Moment_1", "Moment_2"]
-    uc_key = "Update_Count"
-    step_key = "Step"
-    shared_state_key = "shared_optimizer_state"
-
-    optim_state = dict()
-    weight_shape_map = dict()
-    if isinstance(model, torch.nn.Module):
-        weight_shape_map = {name: param.size() for name, param in model.named_parameters()}
-    elif isinstance(model, onnx.ModelProto):
-        weight_shape_map = {n.name: n.dims for n in model.graph.initializer}
-    else:
-        raise ValueError("'model' must be either 'torch.nn.Module' or 'onnx.ModelProto'")
-
-    for weight_name, weight_shape in weight_shape_map.items():
-        per_weight_state = dict()
-        for moment in moment_keys:
-            per_weight_state[moment] = np.random.uniform(-2, 2, weight_shape).astype(np.float32)
-        if isinstance(optimizer, optim.AdamConfig):
-            per_weight_state[uc_key] = np.full([1], 5, dtype=np.int64)
-        optim_state[weight_name] = copy.deepcopy(per_weight_state)
-    if isinstance(optimizer, optim.LambConfig):
-        step_val = np.full([1], 5, dtype=np.int64)
-        optim_state[shared_state_key] = {step_key: step_val}
-    return {"optimizer": optim_state, "trainer_options": {"optimizer_name": optimizer.name}}
-
-
-def _load_pytorch_transformer_model(device, dynamic_axes=False, legacy_api=False, data_dir=None):
-    # Loads external Pytorch TransformerModel into utils
-    root = "samples"
-    if not os.path.exists(root):
-        root = os.path.normpath(
-            os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..", "..", "samples")
-        )
-    if not os.path.exists(root):
-        raise FileNotFoundError("Unable to find folder 'samples', tried %r." % root)
-    pytorch_transformer_path = os.path.join(root, "python", "training", "orttrainer", "pytorch_transformer")
-    pt_model_path = os.path.join(pytorch_transformer_path, "pt_model.py")
-    pt_model = _utils.import_module_from_file(pt_model_path)
-    ort_utils_path = os.path.join(pytorch_transformer_path, "ort_utils.py")
-    ort_utils = _utils.import_module_from_file(ort_utils_path)
-    utils_path = os.path.join(pytorch_transformer_path, "utils.py")
-    utils = _utils.import_module_from_file(utils_path)
-
-    # Modeling
-    model = pt_model.TransformerModel(28785, 200, 2, 200, 2, 0.2).to(device)
-    my_loss = ort_utils.my_loss
-    if legacy_api:
-        if dynamic_axes:
-            model_desc = ort_utils.legacy_transformer_model_description_dynamic_axes()
-        else:
-            model_desc = ort_utils.legacy_transformer_model_description()
-    else:
-        if dynamic_axes:
-            model_desc = ort_utils.transformer_model_description_dynamic_axes()
-        else:
-            model_desc = ort_utils.transformer_model_description()
-
-    # Preparing data
-    train_data, val_data, test_data = utils.prepare_data(device, 20, 20, data_dir)
-    return model, model_desc, my_loss, utils.get_batch, train_data, val_data, test_data
-
-
-def generate_random_input_from_bart_model_desc(desc, seed=1, device="cuda:0"):
-    """Generates a sample input for the BART model using the model desc"""
-
-    torch.manual_seed(seed)
-    onnxruntime.set_seed(seed)
-    dtype = torch.int64
-    vocab_size = 30528
-    sample_input = []
-    for _index, input in enumerate(desc["inputs"]):
-        size = []
-        for s in input[1]:
-            if isinstance(s, (int)):
-                size.append(s)
-            else:
-                size.append(1)
-        sample_input.append(torch.randint(0, vocab_size, tuple(size), dtype=dtype).to(device))
-    return sample_input
-
-
-def _load_bart_model():
-    bart_onnx_model_path = os.path.join("testdata", "bart_tiny.onnx")
-    model = onnx.load(bart_onnx_model_path)
-    batch = 2
-    seq_len = 1024
-    model_desc = {
-        "inputs": [
-            (
-                "src_tokens",
-                [batch, seq_len],
-            ),
-            (
-                "prev_output_tokens",
-                [batch, seq_len],
-            ),
-            (
-                "target",
-                [batch * seq_len],
-            ),
-        ],
-        "outputs": [("loss", [], True)],
-    }
-
-    return model, model_desc
-
-
-def assert_all_states_close_ort(state_dict_pre_checkpoint, state_dict_post_checkpoint, reshape_states=False):
-    """Assert that the two ORTTrainer (hierarchical) state dictionaries are very close for all states"""
-
-    assert ("model" in state_dict_pre_checkpoint) == ("model" in state_dict_post_checkpoint)
-    assert ("optimizer" in state_dict_pre_checkpoint) == ("optimizer" in state_dict_post_checkpoint)
-
-    if "model" in state_dict_pre_checkpoint:
-        for model_state_key in state_dict_pre_checkpoint["model"]["full_precision"]:
-            if reshape_states:
-                assert_allclose(
-                    state_dict_pre_checkpoint["model"]["full_precision"][model_state_key],
-                    state_dict_post_checkpoint["model"]["full_precision"][model_state_key].reshape(
-                        state_dict_pre_checkpoint["model"]["full_precision"][model_state_key].shape
-                    ),
-                )
-            else:
-                assert_allclose(
-                    state_dict_pre_checkpoint["model"]["full_precision"][model_state_key],
-                    state_dict_post_checkpoint["model"]["full_precision"][model_state_key],
-                )
-
-    if "optimizer" in state_dict_pre_checkpoint:
-        for model_state_key in state_dict_pre_checkpoint["optimizer"]:
-            for optimizer_state_key in state_dict_pre_checkpoint["optimizer"][model_state_key]:
-                if reshape_states:
-                    assert_allclose(
-                        state_dict_pre_checkpoint["optimizer"][model_state_key][optimizer_state_key],
-                        state_dict_post_checkpoint["optimizer"][model_state_key][optimizer_state_key].reshape(
-                            state_dict_pre_checkpoint["optimizer"][model_state_key][optimizer_state_key].shape
-                        ),
-                    )
-                else:
-                    assert_allclose(
-                        state_dict_pre_checkpoint["optimizer"][model_state_key][optimizer_state_key],
-                        state_dict_post_checkpoint["optimizer"][model_state_key][optimizer_state_key],
-                    )
diff --git a/orttraining/orttraining/test/python/_test_helpers.py b/orttraining/orttraining/test/python/_test_helpers.py
index a9a4c7b1cc2ef..8f2a18b5ec00b 100644
--- a/orttraining/orttraining/test/python/_test_helpers.py
+++ b/orttraining/orttraining/test/python/_test_helpers.py
@@ -1,30 +1,11 @@
 import copy
 import os
 
-import numpy as np
 import torch
 from numpy.testing import assert_allclose
 
-from onnxruntime.capi.ort_trainer import ORTTrainer as Legacy_ORTTrainer
-from onnxruntime.training import orttrainer
-
-try:
-    from onnxruntime.training.ortmodule import ORTModule
-    from onnxruntime.training.ortmodule._fallback import ORTModuleInitException
-    from onnxruntime.training.ortmodule._graph_execution_manager_factory import (  # noqa: F401
-        GraphExecutionManagerFactory,
-    )
-except ImportError:
-    # Some pipelines do not contain ORTModule
-    pass
-except Exception as e:
-    from onnxruntime.training.ortmodule._fallback import ORTModuleInitException
-
-    if isinstance(e, ORTModuleInitException):
-        # ORTModule is present but not ready to run
-        # That is OK because this file is also used by ORTTrainer tests
-        pass
-    raise
+from onnxruntime.training.ortmodule import ORTModule
+from onnxruntime.training.ortmodule._graph_execution_manager_factory import GraphExecutionManagerFactory  # noqa: F401
 
 
 def is_all_or_nothing_fallback_enabled(model, policy=None):
@@ -66,103 +47,6 @@ def assert_model_outputs(output_a, output_b, verbose=False, rtol=1e-7, atol=0):
     assert_allclose(output_a, output_b, rtol=rtol, atol=atol, err_msg="Model output value mismatch")
 
 
-def assert_onnx_weights(model_a, model_b, verbose=False, rtol=1e-7, atol=0):
-    r"""Asserts whether weight difference between models a and b differences are within specified tolerance
-
-    Compares the weights of two different ONNX models (model_a and model_b)
-    and raises AssertError when they diverge by more than atol or rtol
-
-    Args:
-        model_a, model_b (ORTTrainer): Two instances of ORTTrainer with the same model structure
-        verbose (bool, default is False): if True, prints absolute difference for each weight
-        rtol (float, default is 1e-7): Max relative difference
-        atol (float, default is 1e-4): Max absolute difference
-    """
-    assert isinstance(model_a, orttrainer.ORTTrainer) and isinstance(model_b, orttrainer.ORTTrainer)
-    state_dict_a, state_dict_b = model_a._training_session.get_state(), model_b._training_session.get_state()
-    assert len(state_dict_a.items()) == len(state_dict_b.items())
-    _assert_state_dict_weights(state_dict_a, state_dict_b, verbose, rtol, atol)
-
-
-def assert_legacy_onnx_weights(model_a, model_b, verbose=False, rtol=1e-7, atol=0):
-    r"""Asserts whether weight difference between models a and b differences are within specified tolerance
-
-    Compares the weights of a legacy model model_a and experimental model_b model
-    and raises AssertError when they diverge by more than atol or rtol.
-
-    Args:
-        model_a (ORTTrainer): Instance of legacy ORTTrainer
-        model_b (ORTTrainer): Instance of experimental ORTTrainer
-        verbose (bool, default is False): if True, prints absolute difference for each weight.
-        rtol (float, default is 1e-7): Max relative difference
-        atol (float, default is 1e-4): Max absolute difference
-    """
-    assert isinstance(model_a, orttrainer.ORTTrainer) and isinstance(model_b, Legacy_ORTTrainer)
-    state_dict_a, state_dict_b = model_a._training_session.get_state(), model_b.session.get_state()
-    assert len(state_dict_a.items()) == len(state_dict_b.items())
-    _assert_state_dict_weights(state_dict_a, state_dict_b, verbose, rtol, atol)
-
-
-def _assert_state_dict_weights(state_dict_a, state_dict_b, verbose, rtol, atol):
-    r"""Asserts whether dicts a and b value differences are within specified tolerance
-
-    Compares the weights of two model's state_dict dicts and raises AssertError
-    when they diverge by more than atol or rtol
-
-    Args:
-        model_a (ORTTrainer): Instance of legacy ORTTrainer
-        model_b (ORTTrainer): Instance of experimental ORTTrainer
-        verbose (bool, default is False): if True, prints absolute difference for each weight.
-        rtol (float, default is 1e-7): Max relative difference
-        atol (float, default is 1e-4): Max absolute difference
-    """
-
-    for (a_name, a_val), (_b_name, b_val) in zip(state_dict_a.items(), state_dict_b.items()):
-        np_a_vals = np.array(a_val).flatten()
-        np_b_vals = np.array(b_val).flatten()
-        assert np_a_vals.shape == np_b_vals.shape
-        if verbose:
-            print(f"Weight name: {a_name}: absolute difference: {np.abs(np_a_vals-np_b_vals).max()}")
-        assert_allclose(a_val, b_val, rtol=rtol, atol=atol, err_msg=f"Weight mismatch for {a_name}")
-
-
-def assert_optim_state(expected_state, actual_state, rtol=1e-7, atol=0):
-    r"""Asserts whether optimizer state differences are within specified tolerance
-
-    Compares the expected and actual optimizer states of dicts and raises AssertError
-    when they diverge by more than atol or rtol.
-    The optimizer dict is of the form:
-        model_weight_name:
-            {
-                "Moment_1": moment1_tensor,
-                "Moment_2": moment2_tensor,
-                "Update_Count": update_tensor # if optimizer is adam, absent otherwise
-            },
-        ...
-        "shared_optimizer_state": # if optimizer is shared, absent otherwise.
-                                    So far, only lamb optimizer uses this.
-        {
-            "step": step_tensor # int array of size 1
-        }
-
-    Args:
-        expected_state (dict(dict())): Expected optimizer state
-        actual_state (dict(dict())): Actual optimizer state
-        rtol (float, default is 1e-7): Max relative difference
-        atol (float, default is 0): Max absolute difference
-    """
-    assert expected_state.keys() == actual_state.keys()
-    for param_name, a_state in actual_state.items():
-        for k, v in a_state.items():
-            assert_allclose(
-                v,
-                expected_state[param_name][k],
-                rtol=rtol,
-                atol=atol,
-                err_msg=f"Optimizer state mismatch for param {param_name}, key {k}",
-            )
-
-
 def is_dynamic_axes(model):
     # Check inputs
     for inp in model._torch_module._execution_manager(model._is_training())._onnx_models.optimized_model.graph.input:
diff --git a/orttraining/orttraining/test/python/onnxruntime_test_postprocess.py b/orttraining/orttraining/test/python/onnxruntime_test_postprocess.py
deleted file mode 100644
index d5298cf8e860e..0000000000000
--- a/orttraining/orttraining/test/python/onnxruntime_test_postprocess.py
+++ /dev/null
@@ -1,325 +0,0 @@
-import os
-import unittest
-
-import torch
-import torch.nn as nn
-from orttraining_test_bert_postprocess import postprocess_model
-from orttraining_test_data_loader import create_ort_test_dataloader
-from orttraining_test_transformers import BertForPreTraining, BertModelTest
-from orttraining_test_utils import map_optimizer_attributes
-
-import onnxruntime
-from onnxruntime.capi.ort_trainer import (  # noqa: F401
-    IODescription,
-    LossScaler,
-    ModelDescription,
-    ORTTrainer,
-    generate_sample,
-)
-
-torch.manual_seed(1)
-onnxruntime.set_seed(1)
-
-
-class Test_PostPasses(unittest.TestCase):  # noqa: N801
-    def get_onnx_model(
-        self, model, model_desc, inputs, device, _enable_internal_postprocess=True, _extra_postprocess=None
-    ):
-        lr_desc = IODescription(
-            "Learning_Rate",
-            [
-                1,
-            ],
-            torch.float32,
-        )
-        model = ORTTrainer(
-            model,
-            None,
-            model_desc,
-            "LambOptimizer",
-            map_optimizer_attributes,
-            lr_desc,
-            device,
-            world_rank=0,
-            world_size=1,
-            _opset_version=14,
-            _enable_internal_postprocess=_enable_internal_postprocess,
-            _extra_postprocess=_extra_postprocess,
-        )
-
-        model.train_step(*inputs)
-        return model.onnx_model_
-
-    def count_all_nodes(self, model):
-        return len(model.graph.node)
-
-    def count_nodes(self, model, node_type):
-        count = 0
-        for node in model.graph.node:
-            if node.op_type == node_type:
-                count += 1
-        return count
-
-    def find_nodes(self, model, node_type):
-        nodes = []
-        for node in model.graph.node:
-            if node.op_type == node_type:
-                nodes.append(node)
-        return nodes
-
-    def get_name(self, name):
-        if os.path.exists(name):
-            return name
-        rel = os.path.join("testdata", name)
-        if os.path.exists(rel):
-            return rel
-        this = os.path.dirname(__file__)
-        data = os.path.join(this, "..", "..", "..", "..", "onnxruntime", "test", "testdata")
-        res = os.path.join(data, name)
-        if os.path.exists(res):
-            return res
-        raise FileNotFoundError(f"Unable to find '{name}' or '{rel}' or '{res}'")
-
-    def test_layer_norm(self):
-        class LayerNormNet(nn.Module):
-            def __init__(self, target):
-                super().__init__()
-                self.ln_1 = nn.LayerNorm(10)
-                self.loss = nn.CrossEntropyLoss()
-                self.target = target
-
-            def forward(self, x):
-                output1 = self.ln_1(x)
-                loss = self.loss(output1, self.target)
-                return loss, output1
-
-        device = torch.device("cpu")
-        target = torch.ones(20, 10, 10, dtype=torch.int64).to(device)
-        model = LayerNormNet(target)
-        input = torch.randn(20, 5, 10, 10, dtype=torch.float32).to(device)
-
-        input_desc = IODescription("input", [], "float32")
-        output0_desc = IODescription("output0", [], "float32")
-        output1_desc = IODescription("output1", [20, 5, 10, 10], "float32")
-        model_desc = ModelDescription([input_desc], [output0_desc, output1_desc])
-
-        learning_rate = torch.tensor([1.0000000e00]).to(device)
-        input_args = [input, learning_rate]
-
-        onnx_model = self.get_onnx_model(model, model_desc, input_args, device)
-
-        count_layer_norm = self.count_nodes(onnx_model, "LayerNormalization")
-        count_nodes = self.count_all_nodes(onnx_model)
-
-        assert count_layer_norm == 0
-        assert count_nodes == 3
-
-    def test_expand(self):
-        class ExpandNet(nn.Module):
-            def __init__(self, target):
-                super().__init__()
-                self.loss = nn.CrossEntropyLoss()
-                self.target = target
-                self.linear = torch.nn.Linear(2, 2)
-
-            def forward(self, x, x1):
-                output = x.expand_as(x1)
-                output = self.linear(output)
-                output = output + output
-                loss = self.loss(output, self.target)
-                return loss, output
-
-        device = torch.device("cpu")
-        target = torch.ones(5, 5, 2, dtype=torch.int64).to(device)
-        model = ExpandNet(target).to(device)
-
-        x = torch.randn(5, 3, 1, 2, dtype=torch.float32).to(device)
-        x1 = torch.randn(5, 3, 5, 2, dtype=torch.float32).to(device)
-
-        input0_desc = IODescription("x", [5, 3, 1, 2], "float32")
-        input1_desc = IODescription("x1", [5, 3, 5, 2], "float32")
-        output0_desc = IODescription("output0", [], "float32")
-        output1_desc = IODescription("output1", [5, 3, 5, 2], "float32")
-        model_desc = ModelDescription([input0_desc, input1_desc], [output0_desc, output1_desc])
-
-        learning_rate = torch.tensor([1.0000000e00]).to(device)
-        input_args = [x, x1, learning_rate]
-
-        onnx_model = self.get_onnx_model(model, model_desc, input_args, device)
-
-        # check that expand output has shape
-        expand_nodes = self.find_nodes(onnx_model, "Expand")
-        assert len(expand_nodes) == 1
-
-        model_info = onnx_model.graph.value_info
-        assert model_info[0].name == expand_nodes[0].output[0]
-        assert model_info[0].type == onnx_model.graph.input[1].type
-
-    def test_bert(self):
-        device = torch.device("cpu")
-
-        model_tester = BertModelTest.BertModelTester(self)
-        (
-            config,
-            input_ids,
-            token_type_ids,
-            input_mask,
-            sequence_labels,
-            token_labels,
-            choice_labels,
-        ) = model_tester.prepare_config_and_inputs()
-
-        model = BertForPreTraining(config=config)
-        model.eval()
-
-        loss, prediction_scores, seq_relationship_score = model(
-            input_ids,
-            attention_mask=input_mask,
-            token_type_ids=token_type_ids,
-            masked_lm_labels=token_labels,
-            next_sentence_label=sequence_labels,
-        )
-
-        model_desc = ModelDescription(
-            [
-                model_tester.input_ids_desc,
-                model_tester.attention_mask_desc,
-                model_tester.token_type_ids_desc,
-                model_tester.masked_lm_labels_desc,
-                model_tester.next_sentence_label_desc,
-            ],
-            [model_tester.loss_desc, model_tester.prediction_scores_desc, model_tester.seq_relationship_scores_desc],
-        )
-
-        from collections import namedtuple
-
-        MyArgs = namedtuple(
-            "MyArgs", "local_rank world_size max_steps learning_rate warmup_proportion batch_size seq_len"
-        )
-        args = MyArgs(
-            local_rank=0,
-            world_size=1,
-            max_steps=100,
-            learning_rate=0.00001,
-            warmup_proportion=0.01,
-            batch_size=13,
-            seq_len=7,
-        )
-
-        dataset_len = 100
-        dataloader = create_ort_test_dataloader(model_desc.inputs_, args.batch_size, args.seq_len, dataset_len, device)
-        learning_rate = torch.tensor(1.0e0, dtype=torch.float32).to(device)
-        for b in dataloader:
-            batch = b
-            break
-        learning_rate = torch.tensor([1.00e00]).to(device)
-        inputs = [*batch, learning_rate]
-
-        onnx_model = self.get_onnx_model(model, model_desc, inputs, device, _extra_postprocess=postprocess_model)
-
-        self._bert_helper(onnx_model)
-
-    def _bert_helper(self, onnx_model):
-        # count layer_norm
-        count_layer_norm = self.count_nodes(onnx_model, "LayerNormalization")
-        assert count_layer_norm == 0
-
-        # get expand node and check output shape
-        expand_nodes = self.find_nodes(onnx_model, "Expand")
-        assert len(expand_nodes) == 1
-
-        model_info = onnx_model.graph.value_info
-        assert model_info[0].name == expand_nodes[0].output[0]
-        assert model_info[0].type == onnx_model.graph.input[0].type
-
-    def test_extra_postpass(self):
-        def postpass_replace_first_add_with_sub(model):
-            # this post pass replaces the first Add node with Sub in the model.
-            # Previous graph
-            #   (subgraph 1)        (subgraph 2)
-            #        |                   |
-            #        |                   |
-            #        |________   ________|
-            #                 | |
-            #                 Add
-            #                  |
-            #             (subgraph 3)
-            #
-            # Post graph
-            #   (subgraph 1)        (subgraph 2)
-            #        |                   |
-            #        |                   |
-            #        |________   ________|
-            #                 | |
-            #                 Sub
-            #                  |
-            #             (subgraph 3)
-            add_nodes = [n for n in model.graph.node if n.op_type == "Add"]
-            add_nodes[0].op_type = "Sub"
-
-        class MultiAdd(nn.Module):
-            def __init__(self, target):
-                super().__init__()
-                self.loss = nn.CrossEntropyLoss()
-                self.target = target
-                self.linear = torch.nn.Linear(2, 2, bias=False)
-
-            def forward(self, x, x1):
-                output = x + x1
-                output = output + x
-                output = output + x1
-                output = self.linear(output)
-                loss = self.loss(output, self.target)
-                return loss, output
-
-        device = torch.device("cpu")
-        target = torch.ones(5, 2, dtype=torch.int64).to(device)
-        model = MultiAdd(target).to(device)
-
-        x = torch.randn(5, 5, 2, dtype=torch.float32).to(device)
-        x1 = torch.randn(5, 5, 2, dtype=torch.float32).to(device)
-
-        input0_desc = IODescription("x", [5, 5, 2], "float32")
-        input1_desc = IODescription("x1", [5, 5, 2], "float32")
-        output0_desc = IODescription("output0", [], "float32")
-        output1_desc = IODescription("output1", [5, 5, 2], "float32")
-        model_desc = ModelDescription([input0_desc, input1_desc], [output0_desc, output1_desc])
-
-        learning_rate = torch.tensor([1.0000000e00]).to(device)
-        input_args = [x, x1, learning_rate]
-
-        onnx_model = self.get_onnx_model(
-            model, model_desc, input_args, device, _extra_postprocess=postpass_replace_first_add_with_sub
-        )
-
-        # check that extra postpass is called, and called only once.
-        add_nodes = self.find_nodes(onnx_model, "Add")
-        sub_nodes = self.find_nodes(onnx_model, "Sub")
-        assert len(add_nodes) == 2
-        assert len(sub_nodes) == 1
-
-        unprocessed_onnx_model = self.get_onnx_model(
-            model, model_desc, input_args, device, _extra_postprocess=None, _enable_internal_postprocess=False
-        )
-        # check that the model is unchanged.
-        add_nodes = self.find_nodes(unprocessed_onnx_model, "Add")
-        sub_nodes = self.find_nodes(unprocessed_onnx_model, "Sub")
-        assert len(add_nodes) == 3
-        assert len(sub_nodes) == 0
-
-        processed_onnx_model = self.get_onnx_model(
-            unprocessed_onnx_model,
-            model_desc,
-            input_args,
-            device,
-            _extra_postprocess=postpass_replace_first_add_with_sub,
-        )
-        # check that extra postpass is called, and called only once.
-        add_nodes = self.find_nodes(processed_onnx_model, "Add")
-        sub_nodes = self.find_nodes(processed_onnx_model, "Sub")
-        assert len(add_nodes) == 2
-        assert len(sub_nodes) == 1
-
-
-if __name__ == "__main__":
-    unittest.main(module=__name__, buffer=True)
diff --git a/orttraining/orttraining/test/python/orttraining_ortmodule_tests.py b/orttraining/orttraining/test/python/orttraining_ortmodule_tests.py
index 0e7e9d23ee627..5341cd053ac18 100644
--- a/orttraining/orttraining/test/python/orttraining_ortmodule_tests.py
+++ b/orttraining/orttraining/test/python/orttraining_ortmodule_tests.py
@@ -43,7 +43,7 @@ def run_ortmodule_ops_tests(cwd, log, transformers_cache):
 
     env = get_env_with_transformers_cache(transformers_cache)
 
-    command = [sys.executable, "-m", "pytest", "-sv", "orttraining_test_onnx_ops_ortmodule.py"]
+    command = [sys.executable, "-m", "pytest", "-sv", "orttraining_test_ortmodule_onnx_ops.py"]
 
     run_subprocess(command, cwd=cwd, log=log, env=env).check_returncode()
 
@@ -146,7 +146,7 @@ def run_data_sampler_tests(cwd, log):
 def run_hooks_tests(cwd, log):
     log.debug("Running: Data hooks tests")
 
-    command = [sys.executable, "-m", "pytest", "-sv", "orttraining_test_hooks.py"]
+    command = [sys.executable, "-m", "pytest", "-sv", "orttraining_test_ortmodule_hooks.py"]
 
     run_subprocess(command, cwd=cwd, log=log).check_returncode()
 
diff --git a/orttraining/orttraining/test/python/orttraining_run_bert_pretrain.py b/orttraining/orttraining/test/python/orttraining_run_bert_pretrain.py
deleted file mode 100644
index eea733684f140..0000000000000
--- a/orttraining/orttraining/test/python/orttraining_run_bert_pretrain.py
+++ /dev/null
@@ -1,801 +0,0 @@
-# ==================
-import dataclasses
-import datetime
-import glob
-import json
-import logging
-import os
-import random
-import shutil
-import unittest
-from concurrent.futures import ProcessPoolExecutor
-from dataclasses import dataclass, field
-from typing import Any, Dict, Optional
-
-import h5py
-import numpy as np
-import torch
-import torch.distributed as dist
-from torch.utils.data import DataLoader, Dataset, RandomSampler
-from torch.utils.tensorboard import SummaryWriter
-from tqdm import tqdm
-from transformers import BertConfig, BertForPreTraining, HfArgumentParser
-
-import onnxruntime as ort
-
-# need to override torch.onnx.symbolic_opset12.nll_loss to handle ignore_index == -100 cases.
-# the fix for ignore_index == -100 cases is already in pytorch master.
-# however to use current torch master is causing computation changes in many tests.
-# eventually we will use pytorch with fixed nll_loss once computation
-# issues are understood and solved.
-import onnxruntime.capi.pt_patch
-from onnxruntime.training import amp, optim, orttrainer
-from onnxruntime.training.checkpoint import aggregate_checkpoints
-from onnxruntime.training.optim import LinearWarmupLRScheduler, PolyWarmupLRScheduler  # noqa: F401
-
-# we cannot make full convergence run in nightly pipeling because of its timeout limit,
-# max_steps is still needed to calculate learning rate. force_to_stop_max_steps is used to
-# terminate the training before the pipeline run hit its timeout.
-force_to_stop_max_steps = 2500
-
-logging.basicConfig(
-    format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO
-)
-logger = logging.getLogger(__name__)
-
-
-def get_rank():
-    if not dist.is_available():
-        return 0
-    if not dist.is_initialized():
-        return 0
-    return dist.get_rank()
-
-
-def is_main_process(args):
-    if hasattr(args, "world_rank"):
-        return args.world_rank in [-1, 0]
-    else:
-        return get_rank() == 0
-
-
-def bert_model_description(config):
-    vocab_size = config.vocab_size
-    new_model_desc = {
-        "inputs": [
-            (
-                "input_ids",
-                ["batch", "max_seq_len_in_batch"],
-            ),
-            (
-                "attention_mask",
-                ["batch", "max_seq_len_in_batch"],
-            ),
-            (
-                "token_type_ids",
-                ["batch", "max_seq_len_in_batch"],
-            ),
-            (
-                "masked_lm_labels",
-                ["batch", "max_seq_len_in_batch"],
-            ),
-            (
-                "next_sentence_label",
-                [
-                    "batch",
-                ],
-            ),
-        ],
-        "outputs": [
-            ("loss", [], True),
-            (
-                "prediction_scores",
-                ["batch", "max_seq_len_in_batch", vocab_size],
-            ),
-            (
-                "seq_relationship_scores",
-                ["batch", 2],
-            ),
-        ],
-    }
-    return new_model_desc
-
-
-def create_pretraining_dataset(input_file, max_pred_length, args):
-    train_data = pretraining_dataset(input_file=input_file, max_pred_length=max_pred_length)
-    train_sampler = RandomSampler(train_data)
-    train_dataloader = DataLoader(
-        train_data, sampler=train_sampler, batch_size=args.train_batch_size * args.n_gpu, num_workers=0, pin_memory=True
-    )
-    return train_dataloader, input_file
-
-
-class pretraining_dataset(Dataset):  # noqa: N801
-    def __init__(self, input_file, max_pred_length):
-        logger.info("pretraining_dataset: %s, max_pred_length: %d", input_file, max_pred_length)
-        self.input_file = input_file
-        self.max_pred_length = max_pred_length
-        f = h5py.File(input_file, "r")
-        keys = [
-            "input_ids",
-            "input_mask",
-            "segment_ids",
-            "masked_lm_positions",
-            "masked_lm_ids",
-            "next_sentence_labels",
-        ]
-        self.inputs = [np.asarray(f[key][:]) for key in keys]
-        f.close()
-
-    def __len__(self):
-        "Denotes the total number of samples"
-        return len(self.inputs[0])
-
-    def __getitem__(self, index):
-        [input_ids, input_mask, segment_ids, masked_lm_positions, masked_lm_ids, next_sentence_labels] = [
-            torch.from_numpy(input[index].astype(np.int64))
-            if indice < 5
-            else torch.from_numpy(np.asarray(input[index].astype(np.int64)))
-            for indice, input in enumerate(self.inputs)
-        ]
-
-        # HF model use default ignore_index value (-100) for CrossEntropyLoss
-        masked_lm_labels = torch.ones(input_ids.shape, dtype=torch.long) * -100
-        index = self.max_pred_length
-        # store number of  masked tokens in index
-        padded_mask_indices = (masked_lm_positions == 0).nonzero()
-        if len(padded_mask_indices) != 0:
-            index = padded_mask_indices[0].item()
-        masked_lm_labels[masked_lm_positions[:index]] = masked_lm_ids[:index]
-        return [input_ids, segment_ids, input_mask, masked_lm_labels, next_sentence_labels]
-
-
-import argparse  # noqa: E402
-
-
-def parse_arguments():
-    parser = argparse.ArgumentParser()
-
-    # batch size test config parameters
-    parser.add_argument(
-        "--enable_mixed_precision",
-        default=False,
-        action="store_true",
-        help="Whether to use 16-bit float precision instead of 32-bit",
-    )
-
-    parser.add_argument(
-        "--sequence_length",
-        default=512,
-        type=int,
-        help="The maximum total input sequence length after WordPiece tokenization. \n"
-        "Sequences longer than this will be truncated, and sequences shorter \n"
-        "than this will be padded.",
-    )
-    parser.add_argument(
-        "--max_predictions_per_seq", default=80, type=int, help="The maximum total of masked tokens in input sequence"
-    )
-    parser.add_argument("--max_batch_size", default=32, type=int, help="Total batch size for training.")
-
-    parser.add_argument("--gelu_recompute", default=False, action="store_true")
-
-    parser.add_argument("--attn_dropout_recompute", default=False, action="store_true")
-
-    parser.add_argument("--transformer_layer_recompute", default=False, action="store_true")
-
-    args = parser.parse_args()
-    return args
-
-
-@dataclass
-class PretrainArguments:
-    """
-    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
-    """
-
-    input_dir: str = field(
-        default=None, metadata={"help": "The input data dir. Should contain .hdf5 files  for the task"}
-    )
-
-    bert_model: str = field(
-        default=None,
-        metadata={
-            "help": "Bert pre-trained model selected in the list: bert-base-uncased, \
-            bert-large-uncased, bert-base-cased, bert-base-multilingual, bert-base-chinese."
-        },
-    )
-
-    output_dir: str = field(
-        default=None, metadata={"help": "The output directory where the model checkpoints will be written."}
-    )
-
-    cache_dir: str = field(
-        default="/tmp/bert_pretrain/",
-        metadata={"help": "The output directory where the model checkpoints will be written."},
-    )
-    max_seq_length: Optional[int] = field(
-        default=512,
-        metadata={
-            "help": "The maximum total input sequence length after tokenization. Sequences longer \
-            than this will be truncated, sequences shorter will be padded."
-        },
-    )
-
-    max_predictions_per_seq: Optional[int] = field(
-        default=80, metadata={"help": "The maximum total of masked tokens in input sequence."}
-    )
-
-    train_batch_size: Optional[int] = field(default=32, metadata={"help": "Batch size for training."})
-
-    learning_rate: Optional[float] = field(default=5e-5, metadata={"help": "The initial learning rate for Lamb."})
-
-    num_train_epochs: Optional[float] = field(
-        default=3.0, metadata={"help": "Total number of training epochs to perform."}
-    )
-
-    max_steps: Optional[float] = field(default=1000, metadata={"help": "Total number of training steps to perform."})
-
-    warmup_proportion: Optional[float] = field(
-        default=0.01,
-        metadata={
-            "help": "Proportion of training to perform linear learning rate warmup for. \
-            E.g., 0.1 = 10%% of training."
-        },
-    )
-
-    local_rank: Optional[int] = field(default=-1, metadata={"help": "local_rank for distributed training on gpus."})
-
-    world_rank: Optional[int] = field(default=-1)
-
-    world_size: Optional[int] = field(default=1)
-
-    seed: Optional[int] = field(default=42, metadata={"help": "random seed for initialization."})
-
-    gradient_accumulation_steps: Optional[int] = field(
-        default=1, metadata={"help": "Number of updates steps to accumualte before performing a backward/update pass."}
-    )
-
-    fp16: bool = field(default=False, metadata={"help": "Whether to use 16-bit float precision instead of 32-bit."})
-
-    gelu_recompute: bool = field(
-        default=False, metadata={"help": "Whether to enable recomputing Gelu activation output to save memory."}
-    )
-    attn_dropout_recompute: bool = field(
-        default=False, metadata={"help": "Whether to enable recomputing attention dropout to save memory."}
-    )
-    transformer_layer_recompute: bool = field(
-        default=False, metadata={"help": "Whether to enable recomputing transformer layerwise to save memory."}
-    )
-
-    loss_scale: Optional[float] = field(
-        default=0.0, metadata={"help": "Loss scaling, positive power of 2 values can improve fp16 convergence."}
-    )
-
-    deepspeed_zero_stage: Optional[int] = field(default=0, metadata={"help": "Deepspeed Zero Stage. 0 => disabled"})
-
-    log_freq: Optional[float] = field(default=1.0, metadata={"help": "frequency of logging loss."})
-
-    checkpoint_activations: bool = field(default=False, metadata={"help": "Whether to use gradient checkpointing."})
-
-    resume_from_checkpoint: bool = field(
-        default=False, metadata={"help": "Whether to resume training from checkpoint."}
-    )
-
-    resume_step: Optional[int] = field(default=-1, metadata={"help": "Step to resume training from."})
-
-    num_steps_per_checkpoint: Optional[int] = field(
-        default=100, metadata={"help": "Number of update steps until a model checkpoint is saved to disk."}
-    )
-
-    save_checkpoint: Optional[bool] = field(
-        default=False, metadata={"help": "Enable for saving a model checkpoint to disk."}
-    )
-
-    init_state_dict: Optional[dict] = field(default=None, metadata={"help": "State to load before training."})
-
-    phase2: bool = field(default=False, metadata={"help": "Whether to train with seq len 512."})
-
-    allreduce_post_accumulation: bool = field(
-        default=False, metadata={"help": "Whether to do allreduces during gradient accumulation steps."}
-    )
-
-    allreduce_post_accumulation_fp16: bool = field(
-        default=False, metadata={"help": "Whether to do fp16 allreduce post accumulation."}
-    )
-
-    accumulate_into_fp16: bool = field(default=False, metadata={"help": "Whether to use fp16 gradient accumulators."})
-
-    phase1_end_step: Optional[int] = field(
-        default=7038, metadata={"help": "Whether to use fp16 gradient accumulators."}
-    )
-
-    tensorboard_dir: Optional[str] = field(
-        default=None,
-    )
-
-    schedule: Optional[str] = field(
-        default="warmup_poly",
-    )
-
-    # this argument is test specific. to run a full bert model will take too long to run. instead, we reduce
-    # number of hidden layers so that it can show convergence to an extend to help detect any regression.
-    force_num_hidden_layers: Optional[int] = field(
-        default=None, metadata={"help": "Whether to use fp16 gradient accumulators."}
-    )
-
-    def to_json_string(self):
-        """
-        Serializes this instance to a JSON string.
-        """
-        return json.dumps(dataclasses.asdict(self), indent=2)
-
-    def to_sanitized_dict(self) -> Dict[str, Any]:
-        """
-        Sanitized serialization to use with TensorBoard`s hparams
-        """
-        d = dataclasses.asdict(self)
-        valid_types = [bool, int, float, str, torch.Tensor]
-        return {k: v if type(v) in valid_types else str(v) for k, v in d.items()}
-
-
-def setup_training(args):
-    assert torch.cuda.is_available()
-
-    if args.local_rank == -1:
-        args.local_rank = 0
-        args.world_rank = 0
-
-    print("args.local_rank: ", args.local_rank)
-    torch.cuda.set_device(args.local_rank)
-    device = torch.device("cuda", args.local_rank)
-    args.n_gpu = 1
-
-    if args.gradient_accumulation_steps < 1:
-        raise ValueError(
-            f"Invalid gradient_accumulation_steps parameter: {args.gradient_accumulation_steps}, should be >= 1"
-        )
-    if args.train_batch_size % args.gradient_accumulation_steps != 0:
-        raise ValueError(
-            "Invalid gradient_accumulation_steps parameter: {}, batch size {} should be divisible".format(
-                args.gradient_accumulation_steps, args.train_batch_size
-            )
-        )
-
-    # args.train_batch_size is per global step (optimization step) batch size
-    # now make it a per gpu batch size
-    args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps
-    args.train_batch_size = args.train_batch_size // args.world_size
-
-    logger.info("setup_training: args.train_batch_size = %d", args.train_batch_size)
-    return device, args
-
-
-def setup_torch_distributed(world_rank, world_size):
-    os.environ["RANK"] = str(world_rank)
-    os.environ["WORLD_SIZE"] = str(world_size)
-    os.environ["MASTER_ADDR"] = "localhost"
-    os.environ["MASTER_PORT"] = "12345"
-    torch.distributed.init_process_group(backend="nccl", world_size=world_size, rank=world_rank)
-    return
-
-
-def prepare_model(args, device):
-    config = BertConfig.from_pretrained(args.bert_model, cache_dir=args.cache_dir)
-
-    # config.num_hidden_layers = 12
-    if args.force_num_hidden_layers:
-        logger.info("Modifying model config with num_hidden_layers to %d", args.force_num_hidden_layers)
-        config.num_hidden_layers = args.force_num_hidden_layers
-
-    model = BertForPreTraining(config)
-    if args.init_state_dict is not None:
-        model.load_state_dict(args.init_state_dict)
-    model_desc = bert_model_description(config)
-
-    lr_scheduler = LinearWarmupLRScheduler(total_steps=int(args.max_steps), warmup=args.warmup_proportion)
-
-    loss_scaler = amp.DynamicLossScaler() if args.fp16 else None
-
-    options = orttrainer.ORTTrainerOptions(
-        {
-            "batch": {"gradient_accumulation_steps": args.gradient_accumulation_steps},
-            "device": {"id": str(device)},
-            "mixed_precision": {"enabled": args.fp16, "loss_scaler": loss_scaler},
-            "graph_transformer": {
-                "attn_dropout_recompute": args.attn_dropout_recompute,
-                "gelu_recompute": args.gelu_recompute,
-                "transformer_layer_recompute": args.transformer_layer_recompute,
-            },
-            "debug": {
-                "deterministic_compute": True,
-            },
-            "utils": {"grad_norm_clip": True},
-            "distributed": {
-                "world_rank": max(0, args.local_rank),
-                "world_size": args.world_size,
-                "local_rank": max(0, args.local_rank),
-                "allreduce_post_accumulation": args.allreduce_post_accumulation,
-                "deepspeed_zero_optimization": {"stage": args.deepspeed_zero_stage},
-                "enable_adasum": False,
-            },
-            "lr_scheduler": lr_scheduler,
-        }
-    )
-
-    param_optimizer = list(model.named_parameters())
-    no_decay_keys = ["bias", "gamma", "beta", "LayerNorm"]
-    params = [
-        {
-            "params": [n for n, p in param_optimizer if any(no_decay_key in n for no_decay_key in no_decay_keys)],
-            "alpha": 0.9,
-            "beta": 0.999,
-            "lambda": 0.0,
-            "epsilon": 1e-6,
-        },
-        {
-            "params": [n for n, p in param_optimizer if not any(no_decay_key in n for no_decay_key in no_decay_keys)],
-            "alpha": 0.9,
-            "beta": 0.999,
-            "lambda": 0.0,
-            "epsilon": 1e-6,
-        },
-    ]
-
-    optim_config = optim.AdamConfig(params=params, lr=2e-5, do_bias_correction=True)
-    model = orttrainer.ORTTrainer(model, model_desc, optim_config, options=options)
-
-    return model
-
-
-def get_data_file(f_id, world_rank, world_size, files):
-    num_files = len(files)
-    if world_size > num_files:
-        remainder = world_size % num_files
-        return files[(f_id * world_size + world_rank + remainder * f_id) % num_files]
-    elif world_size > 1:
-        return files[(f_id * world_size + world_rank) % num_files]
-    else:
-        return files[f_id % num_files]
-
-
-def main():
-    parser = HfArgumentParser(PretrainArguments)
-    args = parser.parse_args_into_dataclasses()[0]
-    do_pretrain(args)
-
-
-def do_pretrain(args):
-    if is_main_process(args) and args.tensorboard_dir:
-        tb_writer = SummaryWriter(log_dir=args.tensorboard_dir)
-        tb_writer.add_text("args", args.to_json_string())
-        tb_writer.add_hparams(args.to_sanitized_dict(), metric_dict={})
-    else:
-        tb_writer = None
-
-    random.seed(args.seed)
-    np.random.seed(args.seed)
-    torch.manual_seed(args.seed)
-    ort.set_seed(args.seed)
-
-    device, args = setup_training(args)
-
-    model = prepare_model(args, device)
-
-    logger.info("Running training: Batch size = %d, initial LR = %f", args.train_batch_size, args.learning_rate)
-
-    average_loss = 0.0
-    epoch = 0
-    training_steps = 0
-
-    pool = ProcessPoolExecutor(1)
-    while True:
-        files = [
-            os.path.join(args.input_dir, f)
-            for f in os.listdir(args.input_dir)
-            if os.path.isfile(os.path.join(args.input_dir, f)) and "training" in f
-        ]
-        files.sort()
-        random.shuffle(files)
-
-        f_id = 0
-        train_dataloader, data_file = create_pretraining_dataset(
-            get_data_file(f_id, args.world_rank, args.world_size, files), args.max_predictions_per_seq, args
-        )
-
-        for f_id in range(1, len(files)):
-            logger.info("data file %s" % (data_file))
-
-            dataset_future = pool.submit(
-                create_pretraining_dataset,
-                get_data_file(f_id, args.world_rank, args.world_size, files),
-                args.max_predictions_per_seq,
-                args,
-            )
-
-            train_iter = tqdm(train_dataloader, desc="Iteration") if is_main_process(args) else train_dataloader
-            for _step, batch in enumerate(train_iter):
-                training_steps += 1
-                batch = [t.to(device) for t in batch]  # noqa: PLW2901
-                input_ids, segment_ids, input_mask, masked_lm_labels, next_sentence_labels = batch
-
-                loss, _, _ = model.train_step(
-                    input_ids, input_mask, segment_ids, masked_lm_labels, next_sentence_labels
-                )
-                average_loss += loss.item()
-
-                global_step = model._train_step_info.optimization_step
-                if training_steps % (args.log_freq * args.gradient_accumulation_steps) == 0:
-                    if is_main_process(args):
-                        divisor = args.log_freq * args.gradient_accumulation_steps
-                        if tb_writer:
-                            lr = model.options.lr_scheduler.get_last_lr()[0]
-                            tb_writer.add_scalar("train/summary/scalar/Learning_Rate", lr, global_step)
-                            if args.fp16:
-                                tb_writer.add_scalar("train/summary/scalar/loss_scale_25", loss, global_step)
-                                # TODO: ORTTrainer to expose all_finite
-                                # tb_writer.add_scalar('train/summary/scalar/all_fp16_gradients_finite_859', all_finite, global_step)
-                            tb_writer.add_scalar("train/summary/total_loss", average_loss / divisor, global_step)
-
-                        print(f"Step:{global_step} Average Loss = {average_loss / divisor}")
-
-                    if global_step >= args.max_steps or global_step >= force_to_stop_max_steps:
-                        if tb_writer:
-                            tb_writer.close()
-
-                    if global_step >= args.max_steps:
-                        if args.save_checkpoint:
-                            model.save_checkpoint(os.path.join(args.output_dir, f"checkpoint-{args.world_rank}.ortcp"))
-                        final_loss = average_loss / (args.log_freq * args.gradient_accumulation_steps)
-                        return final_loss
-
-                    average_loss = 0
-
-            del train_dataloader
-
-            train_dataloader, data_file = dataset_future.result(timeout=None)
-
-        epoch += 1
-
-
-def generate_tensorboard_logdir(root_dir):
-    current_date_time = datetime.datetime.today()
-
-    dt_string = current_date_time.strftime("BERT_pretrain_%y_%m_%d_%I_%M_%S")
-    return os.path.join(root_dir, dt_string)
-
-
-class ORTBertPretrainTest(unittest.TestCase):
-    def setUp(self):
-        self.output_dir = "/bert_data/hf_data/test_out/bert_pretrain_results"
-        self.bert_model = "bert-base-uncased"
-        self.local_rank = -1
-        self.world_rank = -1
-        self.world_size = 1
-        self.max_steps = 300000
-        self.learning_rate = 5e-4
-        self.max_seq_length = 512
-        self.max_predictions_per_seq = 20
-        self.input_dir = "/bert_data/hdf5_lower_case_1_seq_len_128_max_pred_20_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5/books_wiki_en_corpus/train"
-        self.train_batch_size = 4096
-        self.gradient_accumulation_steps = 64
-        self.fp16 = True
-        self.allreduce_post_accumulation = True
-        self.tensorboard_dir = "/bert_data/hf_data/test_out"
-
-    def test_pretrain_throughput(self, process_args=None):
-        if process_args.sequence_length == 128:
-            input_dir = "/bert_data/hdf5_lower_case_1_seq_len_128_max_pred_20_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5/books_wiki_en_corpus/train"
-        else:
-            input_dir = "/bert_data/hdf5_lower_case_1_seq_len_512_max_pred_80_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5/books_wiki_en_corpus/train"
-
-        print("process_args.enable_mixed_precision: ", process_args.enable_mixed_precision)
-        print("process_args.sequence_length: ", process_args.sequence_length)
-        print("process_args.max_batch_size: ", process_args.max_batch_size)
-        print("process_args.max_predictions_per_seq: ", process_args.max_predictions_per_seq)
-        print("process_args.gelu_recompute: ", process_args.gelu_recompute)
-        print("process_args.attn_dropout_recompute: ", process_args.attn_dropout_recompute)
-        print("process_args.transformer_layer_recompute: ", process_args.transformer_layer_recompute)
-
-        args = PretrainArguments(
-            input_dir=input_dir,
-            output_dir="/bert_data/hf_data/test_out/bert_pretrain_results",
-            bert_model="bert-large-uncased",
-            local_rank=self.local_rank,
-            world_rank=self.world_rank,
-            world_size=self.world_size,
-            max_steps=10,
-            learning_rate=5e-4,
-            max_seq_length=process_args.sequence_length,
-            max_predictions_per_seq=process_args.max_predictions_per_seq,
-            train_batch_size=process_args.max_batch_size,
-            gradient_accumulation_steps=1,
-            fp16=process_args.enable_mixed_precision,
-            gelu_recompute=process_args.gelu_recompute,
-            attn_dropout_recompute=process_args.attn_dropout_recompute,
-            transformer_layer_recompute=process_args.transformer_layer_recompute,
-            allreduce_post_accumulation=True,
-            # TODO: remove
-            force_num_hidden_layers=2,
-        )
-        do_pretrain(args)
-
-    def test_pretrain_convergence(self):
-        args = PretrainArguments(
-            output_dir=self.output_dir,
-            bert_model=self.bert_model,
-            local_rank=self.local_rank,
-            world_rank=self.world_rank,
-            world_size=self.world_size,
-            max_steps=self.max_steps,
-            learning_rate=self.learning_rate,
-            max_seq_length=self.max_seq_length,
-            max_predictions_per_seq=self.max_predictions_per_seq,
-            train_batch_size=self.train_batch_size,
-            gradient_accumulation_steps=self.gradient_accumulation_steps,
-            input_dir=self.input_dir,
-            fp16=self.fp16,
-            allreduce_post_accumulation=self.allreduce_post_accumulation,
-            force_num_hidden_layers=self.force_num_hidden_layers,
-            tensorboard_dir=generate_tensorboard_logdir("/bert_data/hf_data/test_out/"),
-        )
-        final_loss = do_pretrain(args)
-        return final_loss
-
-    def test_pretrain_zero(self):
-        assert self.world_size > 0, "ZeRO test requires a distributed run."
-        setup_torch_distributed(self.world_rank, self.world_size)
-        per_gpu_batch_size = 32
-        optimization_batch_size = per_gpu_batch_size * self.world_size  # set to disable grad accumulation
-
-        self.train_batch_size = optimization_batch_size
-        self.gradient_accumulation_steps = 1
-        self.deepspeed_zero_stage = 1
-        self.force_num_hidden_layers = 2
-        self.max_seq_length = 32
-        self.output_dir = "./bert_pretrain_ckpt"
-        if self.world_rank == 0:
-            if os.path.isdir(self.output_dir):
-                shutil.rmtree(self.output_dir)
-            os.makedirs(self.output_dir, exist_ok=True)
-
-        torch.distributed.barrier()
-
-        assert os.path.exists(self.output_dir)
-
-        # run a few optimization steps
-        self.max_steps = 200
-        args = PretrainArguments(
-            output_dir=self.output_dir,
-            bert_model=self.bert_model,
-            local_rank=self.local_rank,
-            world_rank=self.world_rank,
-            world_size=self.world_size,
-            max_steps=self.max_steps,
-            learning_rate=self.learning_rate,
-            max_seq_length=self.max_seq_length,
-            max_predictions_per_seq=self.max_predictions_per_seq,
-            train_batch_size=self.train_batch_size,
-            gradient_accumulation_steps=self.gradient_accumulation_steps,
-            input_dir=self.input_dir,
-            fp16=self.fp16,
-            allreduce_post_accumulation=self.allreduce_post_accumulation,
-            force_num_hidden_layers=self.force_num_hidden_layers,
-            deepspeed_zero_stage=self.deepspeed_zero_stage,
-            save_checkpoint=True,
-        )
-        do_pretrain(args)
-
-        # ensure all workers reach this point before loading the checkpointed state
-        torch.distributed.barrier()
-
-        # on rank 0, load the trained state
-        if args.world_rank == 0:
-            checkpoint_files = glob.glob(os.path.join(self.output_dir, "checkpoint*.ortcp"))
-            args.init_state_dict = aggregate_checkpoints(checkpoint_files, pytorch_format=True)
-
-        torch.distributed.barrier()
-
-        # run a single step to get the loss, on rank 0 should be lesser than starting loss
-        args.save_checkpoint = False
-        args.max_steps = 1
-        args.deepspeed_zero_stage = 0
-        final_loss = do_pretrain(args)
-        return final_loss
-
-
-if __name__ == "__main__":
-    import sys
-
-    logger.warning("sys.argv: %s", sys.argv)
-    # usage:
-    # data parallel training
-    #   mpirun -n 4 python orttraining_run_bert_pretrain.py
-    #
-    # single gpu:
-    # python orttraining_run_bert_pretrain.py ORTBertPretrainTest.test_pretrain_throughput
-    #   [batch size test arguments]
-    # python orttraining_run_bert_pretrain.py ORTBertPretrainTest.test_pretrain_convergence
-    #
-    # pytorch.distributed.launch will not work because ort backend requires MPI to broadcast ncclUniqueId
-    # calling unpublished get_mpi_context_xxx to get rank/size numbers.
-    try:
-        # In case ORT is not built with MPI/NCCL, there are no get_mpi_context_xxx internal apis.
-        from onnxruntime.capi._pybind_state import get_mpi_context_local_size  # noqa: F401
-        from onnxruntime.capi._pybind_state import get_mpi_context_world_rank  # noqa: F401
-        from onnxruntime.capi._pybind_state import get_mpi_context_local_rank, get_mpi_context_world_size
-
-        has_get_mpi_context_internal_api = True
-    except ImportError:
-        has_get_mpi_context_internal_api = False
-        pass
-    if has_get_mpi_context_internal_api and get_mpi_context_world_size() > 1:
-        world_size = get_mpi_context_world_size()
-        print("get_mpi_context_world_size(): ", world_size)
-        local_rank = get_mpi_context_local_rank()
-
-        if local_rank == 0:
-            print("================================================================> os.getpid() = ", os.getpid())
-
-        test = ORTBertPretrainTest()
-        test.setUp()
-        test.local_rank = local_rank
-        test.world_rank = local_rank
-        test.world_size = world_size
-
-        if len(sys.argv) >= 2 and sys.argv[1] == "ORTBertPretrainTest.test_pretrain_zero":
-            logger.info("running ORTBertPretrainTest.test_pretrain_zero()...")
-            final_loss = test.test_pretrain_zero()
-            logger.info("ORTBertPretrainTest.test_pretrain_zero() rank = %i final loss = %f", local_rank, final_loss)
-            if local_rank == 0:
-                test.assertLess(final_loss, 10.2)
-            else:
-                test.assertGreater(final_loss, 11.0)
-            logger.info("ORTBertPretrainTest.test_pretrain_zero() passed")
-        elif len(sys.argv) >= 2 and sys.argv[1] == "ORTBertPretrainTest.test_pretrain_convergence":
-            logger.info("running ORTBertPretrainTest.test_pretrain_convergence()...")
-            test.max_steps = 200
-            test.force_num_hidden_layers = 8
-            final_loss = test.test_pretrain_convergence()
-            logger.info("ORTBertPretrainTest.test_pretrain_convergence() final loss = %f", final_loss)
-            test.assertLess(final_loss, 8.5)
-            logger.info("ORTBertPretrainTest.test_pretrain_convergence() passed")
-        else:
-            # https://microsoft.sharepoint.com/teams/ONNX2/_layouts/15/Doc.aspx?sourcedoc={170774be-e1c6-4f8b-a3ae-984f211fe410}&action=edit&wd=target%28ONNX%20Training.one%7C8176133b-c7cb-4ef2-aa9d-3fdad5344c40%2FGitHub%20Master%20Merge%20Schedule%7Cb67f0db1-e3a0-4add-80a6-621d67fd8107%2F%29
-            # to make equivalent args for cpp convergence test
-            test.max_seq_length = 128
-            test.max_predictions_per_seq = 20
-            test.gradient_accumulation_steps = 16
-
-            # cpp_batch_size (=64) * grad_acc * world_size
-            test.train_batch_size = 64 * test.gradient_accumulation_steps * test.world_size
-            test.max_steps = 300000
-
-            test.force_num_hidden_layers = None
-
-            # already using Adam (e.g. AdamConfig)
-            test.learning_rate = 5e-4
-            test.warmup_proportion = 0.1
-
-            final_loss = test.test_pretrain_convergence()
-            logger.info("ORTBertPretrainTest.test_pretrain_convergence() final loss = %f", final_loss)
-    else:
-        # unittest does not accept user defined arguments
-        # we need to run this script with user defined arguments
-        if len(sys.argv) >= 2 and sys.argv[1] == "ORTBertPretrainTest.test_pretrain_throughput":
-            run_test_pretrain_throughput, run_test_pretrain_convergence = True, False
-            sys.argv.remove("ORTBertPretrainTest.test_pretrain_throughput")
-        elif len(sys.argv) >= 2 and sys.argv[1] == "ORTBertPretrainTest.test_pretrain_convergence":
-            run_test_pretrain_throughput, run_test_pretrain_convergence = False, True
-            sys.argv.remove("ORTBertPretrainTest.test_pretrain_convergence")
-        else:
-            run_test_pretrain_throughput, run_test_pretrain_convergence = True, True
-        process_args = parse_arguments()
-        test = ORTBertPretrainTest()
-        test.setUp()
-
-        if run_test_pretrain_throughput:
-            logger.info("running single GPU ORTBertPretrainTest.test_pretrain_throughput()...")
-            test.test_pretrain_throughput(process_args)
-            logger.info("single GPU ORTBertPretrainTest.test_pretrain_throughput() passed")
-
-        # unittest.main()
diff --git a/orttraining/orttraining/test/python/orttraining_run_frontend_batch_size_test.py b/orttraining/orttraining/test/python/orttraining_run_frontend_batch_size_test.py
deleted file mode 100644
index e96b90138c3d5..0000000000000
--- a/orttraining/orttraining/test/python/orttraining_run_frontend_batch_size_test.py
+++ /dev/null
@@ -1,67 +0,0 @@
-import collections
-import subprocess
-import sys
-
-Config = collections.namedtuple(
-    "Config",
-    [
-        "enable_mixed_precision",
-        "sequence_length",
-        "max_batch_size",
-        "max_predictions_per_seq",
-        "gelu_recompute",
-        "attn_dropout_recompute",
-        "transformer_layer_recompute",
-    ],
-)
-
-configs = [
-    Config(True, 128, 46, 20, False, False, False),
-    Config(True, 512, 8, 80, False, False, False),
-    Config(False, 128, 26, 20, False, False, False),
-    Config(False, 512, 4, 80, False, False, False),
-    Config(True, 128, 50, 20, True, False, False),
-    Config(True, 128, 50, 20, False, True, False),
-    Config(True, 128, 76, 20, False, False, True),
-    Config(True, 512, 8, 80, True, False, False),
-    Config(True, 512, 9, 80, False, True, False),
-    Config(True, 512, 15, 80, False, False, True),
-]
-
-
-def run_with_config(config):
-    print(
-        "##### testing name - {}-{} #####".format(
-            "fp16" if config.enable_mixed_precision else "fp32", config.sequence_length
-        )
-    )
-    print("gelu_recompute: ", config.gelu_recompute)
-    print("attn_dropout_recompute: ", config.attn_dropout_recompute)
-    print("transformer_layer_recompute: ", config.transformer_layer_recompute)
-
-    cmds = [
-        sys.executable,
-        "orttraining_run_bert_pretrain.py",
-        "ORTBertPretrainTest.test_pretrain_throughput",
-        "--sequence_length",
-        str(config.sequence_length),
-        "--max_batch_size",
-        str(config.max_batch_size),
-        "--max_predictions_per_seq",
-        str(config.max_predictions_per_seq),
-    ]
-    if config.enable_mixed_precision:
-        cmds.append("--enable_mixed_precision")
-    if config.gelu_recompute:
-        cmds.append("--gelu_recompute")
-    if config.attn_dropout_recompute:
-        cmds.append("--attn_dropout_recompute")
-    if config.transformer_layer_recompute:
-        cmds.append("--transformer_layer_recompute")
-
-    # access to azure storage shared disk is much slower so we need a longer timeout.
-    subprocess.run(cmds, timeout=1200).check_returncode()
-
-
-for config in configs:
-    run_with_config(config)
diff --git a/orttraining/orttraining/test/python/orttraining_run_glue.py b/orttraining/orttraining/test/python/orttraining_run_glue.py
deleted file mode 100644
index 794e2f8cc7240..0000000000000
--- a/orttraining/orttraining/test/python/orttraining_run_glue.py
+++ /dev/null
@@ -1,323 +0,0 @@
-# adapted from run_glue.py of huggingface transformers
-
-import dataclasses  # noqa: F401
-import logging
-import os
-import unittest
-from dataclasses import dataclass, field
-from typing import Dict, Optional
-
-import numpy as np
-from numpy.testing import assert_allclose
-from transformers import (
-    AutoConfig,
-    AutoModelForSequenceClassification,
-    AutoTokenizer,
-    EvalPrediction,
-    GlueDataset,
-    GlueDataTrainingArguments,
-    TrainingArguments,
-    glue_compute_metrics,
-    glue_output_modes,
-    glue_tasks_num_labels,
-    set_seed,
-)
-
-import onnxruntime
-from onnxruntime.capi.ort_trainer import IODescription, LossScaler, ModelDescription, ORTTrainer  # noqa: F401
-
-try:
-    from onnxruntime.capi._pybind_state import get_mpi_context_local_size  # noqa: F401
-    from onnxruntime.capi._pybind_state import get_mpi_context_world_rank  # noqa: F401
-    from onnxruntime.capi._pybind_state import get_mpi_context_local_rank, get_mpi_context_world_size
-
-    has_get_mpi_context_internal_api = True
-except ImportError:
-    has_get_mpi_context_internal_api = False
-    pass
-
-
-import torch  # noqa: F401
-from orttraining_transformer_trainer import ORTTransformerTrainer
-
-logger = logging.getLogger(__name__)
-
-
-def verify_old_and_new_api_are_equal(results_per_api):
-    new_api_results = results_per_api[True]
-    old_api_results = results_per_api[False]
-    for key in new_api_results:
-        assert_allclose(new_api_results[key], old_api_results[key])
-
-
-@dataclass
-class ModelArguments:
-    """
-    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
-    """
-
-    model_name_or_path: str = field(metadata={"help": "model identifier from huggingface.co/models"})
-    config_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
-    )
-    tokenizer_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
-    )
-    cache_dir: Optional[str] = field(
-        default=None, metadata={"help": "Where do you want to store the pretrained models downloaded from s3"}
-    )
-
-
-class ORTGlueTest(unittest.TestCase):
-    def setUp(self):
-        # configurations not to be changed accoss tests
-        self.max_seq_length = 128
-        self.train_batch_size = 8
-        self.learning_rate = 2e-5
-        self.num_train_epochs = 3.0
-        self.local_rank = -1
-        self.world_size = 1
-        self.overwrite_output_dir = True
-        self.gradient_accumulation_steps = 1
-        self.data_dir = "/bert_data/hf_data/glue_data/"
-        self.output_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "glue_test_output/")
-        self.cache_dir = "/tmp/glue/"
-        self.logging_steps = 10
-
-    def test_roberta_with_mrpc(self):
-        expected_acc = 0.85
-        expected_f1 = 0.88
-        expected_loss = 0.35
-        results = self.run_glue(model_name="roberta-base", task_name="MRPC", fp16=False)
-
-        assert results["acc"] >= expected_acc
-        assert results["f1"] >= expected_f1
-        assert results["loss"] <= expected_loss
-
-    def test_roberta_fp16_with_mrpc(self):
-        expected_acc = 0.87
-        expected_f1 = 0.90
-        expected_loss = 0.33
-
-        results = self.run_glue(model_name="roberta-base", task_name="MRPC", fp16=True)
-
-        assert results["acc"] >= expected_acc
-        assert results["f1"] >= expected_f1
-        assert results["loss"] <= expected_loss
-
-    def test_bert_with_mrpc(self):
-        if self.local_rank == -1:
-            expected_acc = 0.83
-            expected_f1 = 0.88
-            expected_loss = 0.44
-        elif self.local_rank == 0:
-            expected_acc = 0.81
-            expected_f1 = 0.86
-            expected_loss = 0.44
-
-        results = self.run_glue(model_name="bert-base-cased", task_name="MRPC", fp16=False)
-
-        if self.local_rank in [-1, 0]:
-            assert results["acc"] >= expected_acc
-            assert results["f1"] >= expected_f1
-            assert results["loss"] <= expected_loss
-
-    def test_bert_fp16_with_mrpc(self):
-        expected_acc = 0.84
-        expected_f1 = 0.88
-        expected_loss = 0.46
-
-        results = self.run_glue(model_name="bert-base-cased", task_name="MRPC", fp16=True)
-
-        assert results["acc"] >= expected_acc
-        assert results["f1"] >= expected_f1
-        assert results["loss"] <= expected_loss
-
-    def model_to_desc(self, model_name, model):
-        if model_name.startswith("bert") or model_name.startswith("xlnet"):
-            model_desc = {
-                "inputs": [
-                    (
-                        "input_ids",
-                        ["batch", "max_seq_len_in_batch"],
-                    ),
-                    (
-                        "attention_mask",
-                        ["batch", "max_seq_len_in_batch"],
-                    ),
-                    (
-                        "token_type_ids",
-                        ["batch", "max_seq_len_in_batch"],
-                    ),
-                    (
-                        "labels",
-                        [
-                            "batch",
-                        ],
-                    ),
-                ],
-                "outputs": [("loss", [], True), ("logits", ["batch", 2])],
-            }
-        elif model_name.startswith("roberta"):
-            model_desc = {
-                "inputs": [
-                    (
-                        "input_ids",
-                        ["batch", "max_seq_len_in_batch"],
-                    ),
-                    (
-                        "attention_mask",
-                        ["batch", "max_seq_len_in_batch"],
-                    ),
-                    (
-                        "labels",
-                        [
-                            "batch",
-                        ],
-                    ),
-                ],
-                "outputs": [("loss", [], True), ("logits", ["batch", 2])],
-            }
-        else:
-            raise RuntimeError(f"unsupported base model name {model_name}.")
-
-        return model_desc
-
-    def run_glue(self, model_name, task_name, fp16):
-        model_args = ModelArguments(model_name_or_path=model_name, cache_dir=self.cache_dir)
-        data_args = GlueDataTrainingArguments(
-            task_name=task_name, data_dir=os.path.join(self.data_dir, task_name), max_seq_length=self.max_seq_length
-        )
-
-        training_args = TrainingArguments(
-            output_dir=os.path.join(self.output_dir, task_name),
-            do_train=True,
-            do_eval=True,
-            per_gpu_train_batch_size=self.train_batch_size,
-            learning_rate=self.learning_rate,
-            num_train_epochs=self.num_train_epochs,
-            local_rank=self.local_rank,
-            overwrite_output_dir=self.overwrite_output_dir,
-            gradient_accumulation_steps=self.gradient_accumulation_steps,
-            fp16=fp16,
-            logging_steps=self.logging_steps,
-        )
-
-        # Setup logging
-        logging.basicConfig(
-            format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
-            datefmt="%m/%d/%Y %H:%M:%S",
-            level=logging.INFO if training_args.local_rank in [-1, 0] else logging.WARN,
-        )
-        logger.warning(
-            "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
-            training_args.local_rank,
-            training_args.device,
-            training_args.n_gpu,
-            bool(training_args.local_rank != -1),
-            training_args.fp16,
-        )
-        logger.info("Training/evaluation parameters %s", training_args)
-
-        set_seed(training_args.seed)
-        onnxruntime.set_seed(training_args.seed)
-
-        try:
-            num_labels = glue_tasks_num_labels[data_args.task_name]
-            output_mode = glue_output_modes[data_args.task_name]
-        except KeyError:
-            raise ValueError("Task not found: %s" % (data_args.task_name))  # noqa: B904
-
-        config = AutoConfig.from_pretrained(
-            model_args.config_name if model_args.config_name else model_args.model_name_or_path,
-            num_labels=num_labels,
-            finetuning_task=data_args.task_name,
-            cache_dir=model_args.cache_dir,
-        )
-        tokenizer = AutoTokenizer.from_pretrained(
-            model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
-            cache_dir=model_args.cache_dir,
-        )
-
-        model = AutoModelForSequenceClassification.from_pretrained(
-            model_args.model_name_or_path,
-            from_tf=bool(".ckpt" in model_args.model_name_or_path),
-            config=config,
-            cache_dir=model_args.cache_dir,
-        )
-
-        train_dataset = GlueDataset(data_args, tokenizer=tokenizer) if training_args.do_train else None
-
-        eval_dataset = GlueDataset(data_args, tokenizer=tokenizer, mode="dev") if training_args.do_eval else None
-
-        def compute_metrics(p: EvalPrediction) -> Dict:
-            if output_mode == "classification":
-                preds = np.argmax(p.predictions, axis=1)
-            elif output_mode == "regression":
-                preds = np.squeeze(p.predictions)
-            return glue_compute_metrics(data_args.task_name, preds, p.label_ids)
-
-        model_desc = self.model_to_desc(model_name, model)
-        # Initialize the ORTTrainer within ORTTransformerTrainer
-        trainer = ORTTransformerTrainer(
-            model=model,
-            model_desc=model_desc,
-            args=training_args,
-            train_dataset=train_dataset,
-            eval_dataset=eval_dataset,
-            compute_metrics=compute_metrics,
-            world_size=self.world_size,
-        )
-
-        # Training
-        if training_args.do_train:
-            trainer.train()
-            trainer.save_model()
-
-        # Evaluation
-        results = {}
-        if training_args.do_eval and training_args.local_rank in [-1, 0]:
-            logger.info("*** Evaluate ***")
-
-            result = trainer.evaluate()
-
-            logger.info(f"***** Eval results {data_args.task_name} *****")
-            for key, value in result.items():
-                logger.info("  %s = %s", key, value)
-
-            results.update(result)
-
-        return results
-
-
-if __name__ == "__main__":
-    if has_get_mpi_context_internal_api:
-        local_rank = get_mpi_context_local_rank()
-        world_size = get_mpi_context_world_size()
-    else:
-        local_rank = -1
-        world_size = 1
-
-    if world_size > 1:
-        # mpi launch
-        logger.warning("mpirun launch, local_rank / world_size: %s : % s", local_rank, world_size)
-
-        # TrainingArguments._setup_devices will call torch.distributed.init_process_group(backend="nccl")
-        # pytorch expects following environment settings (which would be set if launched with torch.distributed.launch).
-
-        os.environ["RANK"] = str(local_rank)
-        os.environ["WORLD_SIZE"] = str(world_size)
-        os.environ["MASTER_ADDR"] = "127.0.0.1"
-        os.environ["MASTER_PORT"] = "29500"
-
-        from onnxruntime.capi._pybind_state import set_cuda_device_id
-
-        set_cuda_device_id(local_rank)
-
-        test = ORTGlueTest()
-        test.setUp()
-        test.local_rank = local_rank
-        test.world_size = world_size
-        test.test_bert_with_mrpc()
-    else:
-        unittest.main()
diff --git a/orttraining/orttraining/test/python/orttraining_run_multiple_choice.py b/orttraining/orttraining/test/python/orttraining_run_multiple_choice.py
deleted file mode 100644
index 92db204593bcd..0000000000000
--- a/orttraining/orttraining/test/python/orttraining_run_multiple_choice.py
+++ /dev/null
@@ -1,281 +0,0 @@
-# adapted from run_multiple_choice.py of huggingface transformers
-# https://github.com/huggingface/transformers/blob/master/examples/multiple-choice/run_multiple_choice.py
-
-import dataclasses  # noqa: F401
-import logging
-import os
-import unittest
-from dataclasses import dataclass, field
-from typing import Dict, Optional
-
-import numpy as np
-import torch  # noqa: F401
-from numpy.testing import assert_allclose  # noqa: F401
-from orttraining_run_glue import verify_old_and_new_api_are_equal  # noqa: F401
-from orttraining_transformer_trainer import ORTTransformerTrainer
-from transformers import HfArgumentParser  # noqa: F401
-from transformers import Trainer  # noqa: F401
-from transformers import (
-    AutoConfig,
-    AutoModelForMultipleChoice,
-    AutoTokenizer,
-    EvalPrediction,
-    TrainingArguments,
-    set_seed,
-)
-from utils_multiple_choice import MultipleChoiceDataset, Split, SwagProcessor
-
-import onnxruntime
-from onnxruntime.capi.ort_trainer import IODescription, LossScaler, ModelDescription, ORTTrainer  # noqa: F401
-
-logger = logging.getLogger(__name__)
-
-
-def simple_accuracy(preds, labels):
-    return (preds == labels).mean()
-
-
-@dataclass
-class ModelArguments:
-    """
-    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
-    """
-
-    model_name_or_path: str = field(metadata={"help": "model identifier from huggingface.co/models"})
-    config_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
-    )
-    tokenizer_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
-    )
-    cache_dir: Optional[str] = field(
-        default=None, metadata={"help": "Where do you want to store the pretrained models downloaded from s3"}
-    )
-
-
-@dataclass
-class DataTrainingArguments:
-    """
-    Arguments pertaining to what data we are going to input our model for training and eval.
-    """
-
-    task_name: str = field(metadata={"help": "The name of the task to train on."})
-    data_dir: str = field(metadata={"help": "Should contain the data files for the task."})
-    max_seq_length: int = field(
-        default=128,
-        metadata={
-            "help": "The maximum total input sequence length after tokenization. Sequences longer "
-            "than this will be truncated, sequences shorter will be padded."
-        },
-    )
-    overwrite_cache: bool = field(default=False, metadata={"help": "Overwrite the cached training and evaluation sets"})
-
-
-class ORTMultipleChoiceTest(unittest.TestCase):
-    def setUp(self):
-        # configurations not to be changed accoss tests
-        self.max_seq_length = 80
-        self.train_batch_size = 16
-        self.eval_batch_size = 2
-        self.learning_rate = 2e-5
-        self.num_train_epochs = 1.0
-        self.local_rank = -1
-        self.overwrite_output_dir = True
-        self.gradient_accumulation_steps = 8
-        self.data_dir = "/bert_data/hf_data/swag/swagaf/data"
-        self.output_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "multiple_choice_test_output/")
-        self.cache_dir = "/tmp/multiple_choice/"
-        self.logging_steps = 10
-        self.rtol = 2e-01
-
-    def test_bert_with_swag(self):
-        expected_acc = 0.75
-        expected_loss = 0.64
-
-        results = self.run_multiple_choice(model_name="bert-base-cased", task_name="swag", fp16=False)
-        assert results["acc"] >= expected_acc
-        assert results["loss"] <= expected_loss
-
-    def test_bert_fp16_with_swag(self):
-        # larger batch can be handled with mixed precision
-        self.train_batch_size = 32
-
-        expected_acc = 0.73
-        expected_loss = 0.68
-
-        results = self.run_multiple_choice(model_name="bert-base-cased", task_name="swag", fp16=True)
-        assert results["acc"] >= expected_acc
-        assert results["loss"] <= expected_loss
-
-    def run_multiple_choice(self, model_name, task_name, fp16):
-        model_args = ModelArguments(model_name_or_path=model_name, cache_dir=self.cache_dir)
-        data_args = DataTrainingArguments(
-            task_name=task_name, data_dir=self.data_dir, max_seq_length=self.max_seq_length
-        )
-
-        training_args = TrainingArguments(
-            output_dir=os.path.join(self.output_dir, task_name),
-            do_train=True,
-            do_eval=True,
-            per_gpu_train_batch_size=self.train_batch_size,
-            per_gpu_eval_batch_size=self.eval_batch_size,
-            learning_rate=self.learning_rate,
-            num_train_epochs=self.num_train_epochs,
-            local_rank=self.local_rank,
-            overwrite_output_dir=self.overwrite_output_dir,
-            gradient_accumulation_steps=self.gradient_accumulation_steps,
-            fp16=fp16,
-            logging_steps=self.logging_steps,
-        )
-
-        # Setup logging
-        logging.basicConfig(
-            format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
-            datefmt="%m/%d/%Y %H:%M:%S",
-            level=logging.INFO if training_args.local_rank in [-1, 0] else logging.WARN,
-        )
-        logger.warning(
-            "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
-            training_args.local_rank,
-            training_args.device,
-            training_args.n_gpu,
-            bool(training_args.local_rank != -1),
-            training_args.fp16,
-        )
-        logger.info("Training/evaluation parameters %s", training_args)
-
-        set_seed(training_args.seed)
-        onnxruntime.set_seed(training_args.seed)
-
-        try:
-            processor = SwagProcessor()
-            label_list = processor.get_labels()
-            num_labels = len(label_list)
-        except KeyError:
-            raise ValueError("Task not found: %s" % (data_args.task_name))  # noqa: B904
-
-        config = AutoConfig.from_pretrained(
-            model_args.config_name if model_args.config_name else model_args.model_name_or_path,
-            num_labels=num_labels,
-            finetuning_task=data_args.task_name,
-            cache_dir=model_args.cache_dir,
-        )
-
-        tokenizer = AutoTokenizer.from_pretrained(
-            model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
-            cache_dir=model_args.cache_dir,
-        )
-
-        model = AutoModelForMultipleChoice.from_pretrained(
-            model_args.model_name_or_path,
-            from_tf=bool(".ckpt" in model_args.model_name_or_path),
-            config=config,
-            cache_dir=model_args.cache_dir,
-        )
-
-        # Get datasets
-        train_dataset = (
-            MultipleChoiceDataset(
-                data_dir=data_args.data_dir,
-                tokenizer=tokenizer,
-                task=data_args.task_name,
-                processor=processor,
-                max_seq_length=data_args.max_seq_length,
-                overwrite_cache=data_args.overwrite_cache,
-                mode=Split.train,
-            )
-            if training_args.do_train
-            else None
-        )
-        eval_dataset = (
-            MultipleChoiceDataset(
-                data_dir=data_args.data_dir,
-                tokenizer=tokenizer,
-                task=data_args.task_name,
-                processor=processor,
-                max_seq_length=data_args.max_seq_length,
-                overwrite_cache=data_args.overwrite_cache,
-                mode=Split.dev,
-            )
-            if training_args.do_eval
-            else None
-        )
-
-        def compute_metrics(p: EvalPrediction) -> Dict:
-            preds = np.argmax(p.predictions, axis=1)
-            return {"acc": simple_accuracy(preds, p.label_ids)}
-
-        if model_name.startswith("bert"):
-            model_desc = {
-                "inputs": [
-                    (
-                        "input_ids",
-                        ["batch", num_labels, "max_seq_len_in_batch"],
-                    ),
-                    (
-                        "attention_mask",
-                        ["batch", num_labels, "max_seq_len_in_batch"],
-                    ),
-                    (
-                        "token_type_ids",
-                        ["batch", num_labels, "max_seq_len_in_batch"],
-                    ),
-                    (
-                        "labels",
-                        ["batch", num_labels],
-                    ),
-                ],
-                "outputs": [("loss", [], True), ("reshaped_logits", ["batch", num_labels])],
-            }
-        else:
-            model_desc = {
-                "inputs": [
-                    (
-                        "input_ids",
-                        ["batch", num_labels, "max_seq_len_in_batch"],
-                    ),
-                    (
-                        "attention_mask",
-                        ["batch", num_labels, "max_seq_len_in_batch"],
-                    ),
-                    (
-                        "labels",
-                        ["batch", num_labels],
-                    ),
-                ],
-                "outputs": [("loss", [], True), ("reshaped_logits", ["batch", num_labels])],
-            }
-
-        # Initialize the ORTTrainer within ORTTransformerTrainer
-        trainer = ORTTransformerTrainer(
-            model=model,
-            model_desc=model_desc,
-            args=training_args,
-            train_dataset=train_dataset,
-            eval_dataset=eval_dataset,
-            compute_metrics=compute_metrics,
-        )
-
-        # Training
-        if training_args.do_train:
-            trainer.train()
-            trainer.save_model()
-
-        # Evaluation
-        results = {}
-        if training_args.do_eval and training_args.local_rank in [-1, 0]:
-            logger.info("*** Evaluate ***")
-
-            result = trainer.evaluate()
-
-            logger.info(f"***** Eval results {data_args.task_name} *****")
-            for key, value in result.items():
-                logger.info("  %s = %s", key, value)
-
-            results.update(result)
-
-        return results
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/orttraining/orttraining/test/python/orttraining_test_bert_postprocess.py b/orttraining/orttraining/test/python/orttraining_test_bert_postprocess.py
deleted file mode 100644
index 71e6bb8e4d2f2..0000000000000
--- a/orttraining/orttraining/test/python/orttraining_test_bert_postprocess.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from orttraining_test_layer_norm_transform import layer_norm_transform  # noqa: F401
-from orttraining_test_model_transform import add_expand_shape, add_name, fix_transpose  # noqa: F401
-
-
-def postprocess_model(model):
-    add_name(model)
diff --git a/orttraining/orttraining/test/python/orttraining_test_checkpoint_storage.py b/orttraining/orttraining/test/python/orttraining_test_checkpoint_storage.py
deleted file mode 100644
index 71d13fdcfd290..0000000000000
--- a/orttraining/orttraining/test/python/orttraining_test_checkpoint_storage.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
-# orttraining_test_checkpoint_storage.py
-
-import os
-import pickle
-import shutil
-
-import numpy as np
-import pytest
-import torch
-
-from onnxruntime.training import _checkpoint_storage
-
-# Helper functions
-
-
-def _equals(a, b):
-    """Checks recursively if two dictionaries are equal"""
-    if isinstance(a, dict):
-        return all(not (key not in b or not _equals(a[key], b[key])) for key in a)
-    else:
-        if isinstance(a, bytes):
-            a = a.decode()
-        if isinstance(b, bytes):
-            b = b.decode()
-        are_equal = a == b
-        return are_equal if isinstance(are_equal, bool) else are_equal.all()
-
-    return False
-
-
-def _numpy_types(obj_value):
-    """Return a bool indicating whether or not the input obj_value is a numpy type object
-
-    Recursively checks if the obj_value (could be a dictionary) is a numpy type object.
-    Exceptions are str and bytes.
-
-    Returns true if object is numpy type, str, or bytes
-    False if any other type
-    """
-    if not isinstance(obj_value, dict):
-        return isinstance(obj_value, (str, bytes)) or type(obj_value).__module__ == np.__name__
-
-    return all(_numpy_types(value) for _, value in obj_value.items())
-
-
-def _get_dict(separated_key):
-    """Create dummy dictionary with different datatypes
-
-    Returns the tuple of the entire dummy dictionary created, key argument as a dictionary for _checkpoint_storage.load
-    function and the value for that key in the original dictionary
-
-    For example the complete dictionary is represented by:
-    {
-        'int1':1,
-        'int2': 2,
-        'int_list': [1,2,3,5,6],
-        'dict1': {
-            'np_array': np.arange(100),
-            'dict2': {'int3': 3, 'int4': 4},
-            'str1': "onnxruntime"
-        },
-        'bool1': bool(True),
-        'int5': 5,
-        'float1': 2.345,
-        'np_array_float': np.array([1.234, 2.345, 3.456]),
-        'np_array_float_3_dim': np.array([[[1,2],[3,4]], [[5,6],[7,8]]])
-    }
-
-    if the input key is ['dict1', 'str1'], then the key argument returned is 'dict1/str1'
-    and the value corresponding to that is "onnxruntime"
-
-    so, for the above example, the returned tuple is:
-    (original_dict, {'key': 'dict1/str1', "onnxruntime")
-    """
-    test_dict = {
-        "int1": 1,
-        "int2": 2,
-        "int_list": [1, 2, 3, 5, 6],
-        "dict1": {"np_array": np.arange(100), "dict2": {"int3": 3, "int4": 4}, "str1": "onnxruntime"},
-        "bool1": bool(True),
-        "int5": 5,
-        "float1": 2.345,
-        "np_array_float": np.array([1.234, 2.345, 3.456]),
-        "np_array_float_3_dim": np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]),
-    }
-    key = ""
-    expected_val = test_dict
-    for single_key in separated_key:
-        key += single_key + "/"
-        expected_val = expected_val[single_key]
-    return test_dict, {"key": key} if len(separated_key) > 0 else dict(), expected_val
-
-
-class _CustomClass:
-    """Custom object that encpsulates dummy values for loss, epoch and train_step"""
-
-    def __init__(self):
-        self._loss = 1.23
-        self._epoch = 12000
-        self._train_step = 25
-
-    def __eq__(self, other):
-        if isinstance(other, _CustomClass):
-            return self._loss == other._loss and self._epoch == other._epoch and self._train_step == other._train_step
-
-
-# Test fixtures
-
-
-@pytest.yield_fixture(scope="function")
-def checkpoint_storage_test_setup():
-    checkpoint_dir = os.path.abspath("checkpoint_dir/")
-    if not os.path.exists(checkpoint_dir):
-        os.makedirs(checkpoint_dir, exist_ok=True)
-    pytest.checkpoint_path = os.path.join(checkpoint_dir, "checkpoint.ortcp")
-    yield "checkpoint_storage_test_setup"
-    shutil.rmtree(checkpoint_dir)
-
-
-@pytest.yield_fixture(scope="function")
-def checkpoint_storage_test_parameterized_setup(request, checkpoint_storage_test_setup):
-    yield request.param
-
-
-# Tests
-
-
-@pytest.mark.parametrize(
-    "checkpoint_storage_test_parameterized_setup",
-    [
-        _get_dict([]),
-        _get_dict(["int1"]),
-        _get_dict(["dict1"]),
-        _get_dict(["dict1", "dict2"]),
-        _get_dict(["dict1", "dict2", "int4"]),
-        _get_dict(["dict1", "str1"]),
-        _get_dict(["bool1"]),
-        _get_dict(["float1"]),
-        _get_dict(["np_array_float"]),
-    ],
-    indirect=True,
-)
-def test_checkpoint_storage_saved_dict_matches_loaded(checkpoint_storage_test_parameterized_setup):
-    to_save = checkpoint_storage_test_parameterized_setup[0]
-    key_arg = checkpoint_storage_test_parameterized_setup[1]
-    expected = checkpoint_storage_test_parameterized_setup[2]
-    _checkpoint_storage.save(to_save, pytest.checkpoint_path)
-    loaded = _checkpoint_storage.load(pytest.checkpoint_path, **key_arg)
-    assert _equals(loaded, expected)
-    assert _numpy_types(loaded)
-
-
-@pytest.mark.parametrize(
-    "checkpoint_storage_test_parameterized_setup",
-    [{"int_set": {1, 2, 3, 4, 5}}, {"str_set": {"one", "two"}}, [1, 2, 3], 2.352],
-    indirect=True,
-)
-def test_checkpoint_storage_saving_non_supported_types_fails(checkpoint_storage_test_parameterized_setup):
-    to_save = checkpoint_storage_test_parameterized_setup
-    with pytest.raises(Exception):  # noqa: B017
-        _checkpoint_storage.save(to_save, pytest.checkpoint_path)
-
-
-@pytest.mark.parametrize(
-    "checkpoint_storage_test_parameterized_setup",
-    [
-        ({"int64_tensor": torch.tensor(np.arange(100))}, "int64_tensor", torch.int64, np.int64),
-        ({"int32_tensor": torch.tensor(np.arange(100), dtype=torch.int32)}, "int32_tensor", torch.int32, np.int32),
-        ({"int16_tensor": torch.tensor(np.arange(100), dtype=torch.int16)}, "int16_tensor", torch.int16, np.int16),
-        ({"int8_tensor": torch.tensor(np.arange(100), dtype=torch.int8)}, "int8_tensor", torch.int8, np.int8),
-        ({"float64_tensor": torch.tensor(np.array([1.0, 2.0]))}, "float64_tensor", torch.float64, np.float64),
-        (
-            {"float32_tensor": torch.tensor(np.array([1.0, 2.0]), dtype=torch.float32)},
-            "float32_tensor",
-            torch.float32,
-            np.float32,
-        ),
-        (
-            {"float16_tensor": torch.tensor(np.array([1.0, 2.0]), dtype=torch.float16)},
-            "float16_tensor",
-            torch.float16,
-            np.float16,
-        ),
-    ],
-    indirect=True,
-)
-def test_checkpoint_storage_saving_tensor_datatype(checkpoint_storage_test_parameterized_setup):
-    tensor_dict = checkpoint_storage_test_parameterized_setup[0]
-    tensor_name = checkpoint_storage_test_parameterized_setup[1]
-    tensor_dtype = checkpoint_storage_test_parameterized_setup[2]
-    np_dtype = checkpoint_storage_test_parameterized_setup[3]
-
-    _checkpoint_storage.save(tensor_dict, pytest.checkpoint_path)
-
-    loaded = _checkpoint_storage.load(pytest.checkpoint_path)
-    assert isinstance(loaded[tensor_name], np.ndarray)
-    assert tensor_dict[tensor_name].dtype == tensor_dtype
-    assert loaded[tensor_name].dtype == np_dtype
-    assert (tensor_dict[tensor_name].numpy() == loaded[tensor_name]).all()
-
-
-@pytest.mark.parametrize(
-    "checkpoint_storage_test_parameterized_setup",
-    [
-        ({"two_dim": torch.ones([2, 4], dtype=torch.float64)}, "two_dim"),
-        ({"three_dim": torch.ones([2, 4, 6], dtype=torch.float64)}, "three_dim"),
-        ({"four_dim": torch.ones([2, 4, 6, 8], dtype=torch.float64)}, "four_dim"),
-    ],
-    indirect=True,
-)
-def test_checkpoint_storage_saving_multiple_dimension_tensors(checkpoint_storage_test_parameterized_setup):
-    tensor_dict = checkpoint_storage_test_parameterized_setup[0]
-    tensor_name = checkpoint_storage_test_parameterized_setup[1]
-
-    _checkpoint_storage.save(tensor_dict, pytest.checkpoint_path)
-
-    loaded = _checkpoint_storage.load(pytest.checkpoint_path)
-    assert isinstance(loaded[tensor_name], np.ndarray)
-    assert (tensor_dict[tensor_name].numpy() == loaded[tensor_name]).all()
-
-
-@pytest.mark.parametrize(
-    "checkpoint_storage_test_parameterized_setup", [{}, {"a": {}}, {"a": {"b": {}}}], indirect=True
-)
-def test_checkpoint_storage_saving_and_loading_empty_dictionaries_succeeds(checkpoint_storage_test_parameterized_setup):
-    saved = checkpoint_storage_test_parameterized_setup
-    _checkpoint_storage.save(saved, pytest.checkpoint_path)
-
-    loaded = _checkpoint_storage.load(pytest.checkpoint_path)
-    assert _equals(saved, loaded)
-
-
-def test_checkpoint_storage_load_file_that_does_not_exist_fails(checkpoint_storage_test_setup):
-    with pytest.raises(Exception):  # noqa: B017
-        _checkpoint_storage.load(pytest.checkpoint_path)
-
-
-def test_checkpoint_storage_for_custom_user_dict_succeeds(checkpoint_storage_test_setup):
-    custom_class = _CustomClass()
-    user_dict = {"tensor1": torch.tensor(np.arange(100), dtype=torch.float32), "custom_class": custom_class}
-
-    pickled_bytes = pickle.dumps(user_dict).hex()
-    to_save = {"a": torch.tensor(np.array([1.0, 2.0]), dtype=torch.float32), "user_dict": pickled_bytes}
-    _checkpoint_storage.save(to_save, pytest.checkpoint_path)
-
-    loaded_dict = _checkpoint_storage.load(pytest.checkpoint_path)
-    assert (loaded_dict["a"] == to_save["a"].numpy()).all()
-    try:  # noqa: SIM105
-        loaded_dict["user_dict"] = loaded_dict["user_dict"].decode()
-    except AttributeError:
-        pass
-    loaded_obj = pickle.loads(bytes.fromhex(loaded_dict["user_dict"]))
-
-    assert torch.all(loaded_obj["tensor1"].eq(user_dict["tensor1"]))
-    assert loaded_obj["custom_class"] == custom_class
diff --git a/orttraining/orttraining/test/python/orttraining_test_data_loader.py b/orttraining/orttraining/test/python/orttraining_test_data_loader.py
index aa15b44ae0d66..0009d2d3d7e1b 100644
--- a/orttraining/orttraining/test/python/orttraining_test_data_loader.py
+++ b/orttraining/orttraining/test/python/orttraining_test_data_loader.py
@@ -4,8 +4,6 @@
 import torch
 from torch.utils.data import DataLoader, Dataset
 
-from onnxruntime.capi.ort_trainer import generate_sample
-
 global_rng = random.Random()
 
 
@@ -41,6 +39,16 @@ def floats_tensor(shape, scale=1.0, rng=None, name=None):
     return torch.tensor(data=values, dtype=torch.float).view(shape).contiguous()
 
 
+def generate_sample(desc, device=None):
+    """Generate a sample based on the description"""
+    # symbolic dimensions are described with strings. set symbolic dimensions to be 1
+    size = [s if isinstance(s, (int)) else 1 for s in desc.shape_]
+    if desc.num_classes_:
+        return torch.randint(0, desc.num_classes_, size, dtype=desc.dtype_).to(device)
+    else:
+        return torch.randn(size, dtype=desc.dtype_).to(device)
+
+
 class OrtTestDataset(Dataset):
     def __init__(self, input_desc, seq_len, dataset_len, device):
         import copy
diff --git a/orttraining/orttraining/test/python/orttraining_test_debuggability.py b/orttraining/orttraining/test/python/orttraining_test_debuggability.py
deleted file mode 100644
index 499f0ba7a1ff5..0000000000000
--- a/orttraining/orttraining/test/python/orttraining_test_debuggability.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import pytest
-import torch
-from _test_commons import _load_pytorch_transformer_model
-
-from onnxruntime import set_seed
-from onnxruntime.training import optim, orttrainer
-
-###############################################################################
-# Testing starts here #########################################################
-###############################################################################
-
-
-@pytest.mark.parametrize(
-    "seed, device",
-    [
-        (24, "cuda"),
-    ],
-)
-def testORTTransformerModelExport(seed, device):
-    # Common setup
-    optim_config = optim.LambConfig()
-    opts = orttrainer.ORTTrainerOptions(
-        {
-            "debug": {
-                "check_model_export": True,
-            },
-            "device": {
-                "id": device,
-            },
-        }
-    )
-
-    # Setup for the first ORTTRainer run
-    torch.manual_seed(seed)
-    set_seed(seed)
-    model, model_desc, my_loss, batcher_fn, train_data, val_data, _ = _load_pytorch_transformer_model(device)
-    first_trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, loss_fn=my_loss, options=opts)
-    data, targets = batcher_fn(train_data, 0)
-    _ = first_trainer.train_step(data, targets)
-    assert first_trainer._onnx_model is not None
diff --git a/orttraining/orttraining/test/python/orttraining_test_dort.py b/orttraining/orttraining/test/python/orttraining_test_dort.py
index 88d9c00984d3e..2a7012787be6e 100644
--- a/orttraining/orttraining/test/python/orttraining_test_dort.py
+++ b/orttraining/orttraining/test/python/orttraining_test_dort.py
@@ -19,6 +19,7 @@ class TestTorchDynamoOrt(unittest.TestCase):
     def setUp(self):
         # Make computation deterministic.
         torch.manual_seed(42)
+        print(f"TestTorchDynamoOrt uses PyTorch version {torch.__version__}")
 
     def test_elementwise_model(self):
         torch._dynamo.reset()
diff --git a/orttraining/orttraining/test/python/orttraining_test_ort_apis.py b/orttraining/orttraining/test/python/orttraining_test_ort_apis.py
index 506aafbe9f618..a3e666dd404f2 100644
--- a/orttraining/orttraining/test/python/orttraining_test_ort_apis.py
+++ b/orttraining/orttraining/test/python/orttraining_test_ort_apis.py
@@ -27,7 +27,7 @@ def run_training_apis_python_api_tests(cwd, log):
 
     log.debug("Running: ort training api tests")
 
-    command = [sys.executable, "-m", "pytest", "-sv", "orttraining_test_python_bindings.py"]
+    command = [sys.executable, "-m", "pytest", "-sv", "orttraining_test_ort_apis_py_bindings.py"]
 
     run_subprocess(command, cwd=cwd, log=log).check_returncode()
 
@@ -37,7 +37,7 @@ def run_onnxblock_tests(cwd, log):
 
     log.debug("Running: onnxblock tests")
 
-    command = [sys.executable, "-m", "pytest", "-sv", "orttraining_test_onnxblock.py"]
+    command = [sys.executable, "-m", "pytest", "-sv", "orttraining_test_ort_apis_onnxblock.py"]
 
     run_subprocess(command, cwd=cwd, log=log).check_returncode()
 
diff --git a/orttraining/orttraining/test/python/orttraining_test_onnxblock.py b/orttraining/orttraining/test/python/orttraining_test_ort_apis_onnxblock.py
similarity index 97%
rename from orttraining/orttraining/test/python/orttraining_test_onnxblock.py
rename to orttraining/orttraining/test/python/orttraining_test_ort_apis_onnxblock.py
index f7a7220dd66ea..6e5d54cbb9427 100644
--- a/orttraining/orttraining/test/python/orttraining_test_onnxblock.py
+++ b/orttraining/orttraining/test/python/orttraining_test_ort_apis_onnxblock.py
@@ -17,6 +17,14 @@
 # PyTorch Module definitions
 
 
+def get_opsets_model(filename):
+    if isinstance(filename, onnx.ModelProto):
+        onx = filename
+    else:
+        onx = onnx.load(filename)
+    return {d.domain: d.version for d in onx.opset_import}
+
+
 class SimpleNet(torch.nn.Module):
     def __init__(self, input_size, hidden_size, num_classes):
         super().__init__()
@@ -999,3 +1007,13 @@ def test_save_ort_format():
         assert os.path.exists(os.path.join(temp_dir, "eval_model.ort"))
         assert os.path.exists(os.path.join(temp_dir, "optimizer_model.onnx"))
         assert os.path.exists(os.path.join(temp_dir, "optimizer_model.ort"))
+        base_opsets = get_opsets_model(base_model)
+        training_opsets = get_opsets_model(os.path.join(temp_dir, "training_model.onnx"))
+        eval_opsets = get_opsets_model(os.path.join(temp_dir, "eval_model.onnx"))
+        optimizer_opsets = get_opsets_model(os.path.join(temp_dir, "optimizer_model.onnx"))
+        if base_opsets[""] != training_opsets[""]:
+            raise AssertionError(f"Opsets mismatch {base_opsets['']} != {training_opsets['']}.")
+        if base_opsets[""] != eval_opsets[""]:
+            raise AssertionError(f"Opsets mismatch {base_opsets['']} != {eval_opsets['']}.")
+        if base_opsets[""] != optimizer_opsets[""]:
+            raise AssertionError(f"Opsets mismatch {base_opsets['']} != {optimizer_opsets['']}.")
diff --git a/orttraining/orttraining/test/python/orttraining_test_python_bindings.py b/orttraining/orttraining/test/python/orttraining_test_ort_apis_py_bindings.py
similarity index 87%
rename from orttraining/orttraining/test/python/orttraining_test_python_bindings.py
rename to orttraining/orttraining/test/python/orttraining_test_ort_apis_py_bindings.py
index 56338ddbaffef..34d8c24ccfab4 100644
--- a/orttraining/orttraining/test/python/orttraining_test_python_bindings.py
+++ b/orttraining/orttraining/test/python/orttraining_test_ort_apis_py_bindings.py
@@ -11,7 +11,7 @@
 import onnx
 import pytest
 import torch
-from orttraining_test_onnxblock import _get_models
+from orttraining_test_ort_apis_onnxblock import _get_models
 
 import onnxruntime.training.onnxblock as onnxblock
 from onnxruntime import OrtValue, SessionOptions
@@ -360,14 +360,18 @@ def test_add_get_property(property_value):
         if isinstance(property_value, float):
             property_value = float(np.float32(property_value))
 
-        state["property"] = property_value
-        assert "property" in state
-        assert state["property"] == property_value
+        assert len(state.properties) == 0
+
+        state.properties["property"] = property_value
+        assert "property" in state.properties
+        assert state.properties["property"] == property_value
+        assert len(state.properties) == 1
 
         CheckpointState.save_checkpoint(state, checkpoint_file_path)
         new_state = CheckpointState.load_checkpoint(checkpoint_file_path)
-        assert "property" in new_state
-        assert new_state["property"] == property_value
+        assert "property" in new_state.properties
+        assert new_state.properties["property"] == property_value
+        assert len(new_state.properties) == 1
 
 
 def test_get_input_output_names():
@@ -563,3 +567,60 @@ def test_eval_step_with_ort_values():
         fetches = model(inputs, labels)
         assert isinstance(fetches, OrtValue)
         assert fetches
+
+
+@pytest.mark.parametrize("device", ["cpu", "cuda"])
+def test_get_and_set_parameter_values(device):
+    with tempfile.TemporaryDirectory() as temp_dir:
+        (
+            checkpoint_file_path,
+            training_model_file_path,
+            eval_model_file_path,
+            _,
+            pt_model,
+        ) = _create_training_artifacts(
+            temp_dir, requires_grad=["fc2.weight", "fc2.bias"], frozen_params=["fc1.weight", "fc1.bias"]
+        )
+
+        state = CheckpointState.load_checkpoint(checkpoint_file_path)
+
+        model = Module(training_model_file_path, state, eval_model_file_path, device=device)
+
+        state_dict = pt_model.state_dict()
+        assert len(state_dict) == len(state.parameters)
+        for parameter_name, _ in state.parameters:
+            assert parameter_name in state_dict
+
+        for name, pt_param in pt_model.named_parameters():
+            ort_param = state.parameters[name]
+            assert ort_param.name == name
+            assert np.allclose(pt_param.detach().cpu().numpy(), ort_param.data)
+            if name in ["fc1.weight", "fc1.bias"]:
+                assert ort_param.requires_grad is False
+                assert ort_param.grad is None
+            else:
+                assert ort_param.requires_grad is True
+                assert np.allclose(ort_param.grad, np.zeros_like(ort_param.data, dtype=np.float32))
+
+        original_param = state.parameters["fc1.weight"].data
+        state.parameters["fc1.weight"].data = np.ones_like(state.parameters["fc1.weight"].data, dtype=np.float32)
+        updated_param = state.parameters["fc1.weight"].data
+        assert np.allclose(updated_param, np.ones_like(updated_param, dtype=np.float32))
+
+        model.train()
+        inputs = torch.randn(64, 784).numpy()
+        labels = torch.randint(high=10, size=(64,), dtype=torch.int64).numpy()
+        loss = model(inputs, labels)
+        assert loss is not None
+        for name, _ in pt_model.named_parameters():
+            ort_param = state.parameters[name]
+            assert ort_param.name == name
+            if name in ["fc1.weight", "fc1.bias"]:
+                assert ort_param.requires_grad is False
+                assert ort_param.grad is None
+            else:
+                assert ort_param.requires_grad is True
+                assert ort_param.grad.any()
+
+        state.parameters["fc1.weight"] = original_param
+        assert np.allclose(state.parameters["fc1.weight"].data, original_param)
diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py
index bf26fd1822dc4..ad0e5d8beba3d 100644
--- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py
+++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py
@@ -1773,13 +1773,17 @@ def run_step(model, input):
     _test_helpers.assert_values_are_close(ort_input.grad, pt_input.grad)
 
 
-def test_aten_upsample_bilinear():
+@pytest.mark.parametrize("interpolate_size_scale", ({"size": (8, 12)}, {"scale_factor": 4.7}))
+@pytest.mark.parametrize("align_corners", (True, False))
+def test_resize_grad_correctness_bilinear_2d(interpolate_size_scale, align_corners):
     class _NeuralNetUpsampleBilinear(torch.nn.Module):
         def __init__(self):
             super().__init__()
 
         def forward(self, input):
-            return torch.nn.functional.interpolate(input, size=(8, 12), mode="bilinear")
+            return torch.nn.functional.interpolate(
+                input, align_corners=align_corners, mode="bilinear", **interpolate_size_scale
+            )
 
     device = "cuda"
     pt_model = _NeuralNetUpsampleBilinear().to(device)
@@ -3900,9 +3904,9 @@ def forward(self, input1, bool_argument, int_argument, float_argument):
                 out = self.relu(out)
             return out
 
-    assert type(bool_argument) is bool
-    assert type(int_argument) is int
-    assert type(float_argument) is float
+    assert type(bool_argument) is bool  # noqa: E721
+    assert type(int_argument) is int  # noqa: E721
+    assert type(float_argument) is float  # noqa: E721
 
     device = "cuda"
     N, D_in, H, D_out = 32, 784, 500, 10  # noqa: N806
@@ -3938,8 +3942,8 @@ def forward(self, input1, bool_argument):
                 out = self.relu(out)
             return out
 
-    assert type(bool_arguments[0]) is bool
-    assert type(bool_arguments[1]) is bool
+    assert type(bool_arguments[0]) is bool  # noqa: E721
+    assert type(bool_arguments[1]) is bool  # noqa: E721
 
     device = "cuda"
     N, D_in, H, D_out = 32, 784, 500, 10  # noqa: N806
@@ -5757,6 +5761,7 @@ def run_step(model, input, positions):
         ("MatMul", 1),
         ("Dropout", 0),
         ("LayerNormalization", 0),
+        ("LayerNormalization", 1),
         ("Cast", 0),
         ("BiasGelu", 0),
         ("Gelu", 0),
@@ -5769,12 +5774,18 @@ def test_ops_for_padding_elimination(test_cases):
     test_op = test_cases[0]
     case = test_cases[1]
 
+    vocab_size, hidden_size = 50265, 768
+    batch_size, max_seq_length = 8, 128
+
     class ToyModel(torch.nn.Module):
         def __init__(self, vocab_size, hidden_size, pad_token_id):
             super().__init__()
             self.word_embeddings = nn.Embedding(vocab_size, hidden_size, padding_idx=pad_token_id)
             if test_op == "LayerNormalization":
-                self.LayerNorm = nn.LayerNorm(hidden_size, eps=1e-05)
+                if case == 0:
+                    self.LayerNorm = nn.LayerNorm(hidden_size, eps=1e-05)
+                else:
+                    self.LayerNorm = nn.LayerNorm([max_seq_length, hidden_size], eps=1e-05)
             self.hidden_size = hidden_size
 
         # test test_elementwise op for padding elimination
@@ -5782,14 +5793,14 @@ def __init__(self, vocab_size, hidden_size, pad_token_id):
         #            the test_op should be included in padding elimination subgraph and the PadAndUnflatten should be
         #            added to output of test_op.
         # in case 2, the shapes of inputs of test_op are [batch_size, seqlen, hidden_size] and [batch_size, 1, hidden_size],
-        #            the test_op should be included in padding elimination subgraph and a 'Expand + Reshape + ShrunkenGather'
+        #            the test_op should be included in padding elimination subgraph and a 'Expand + FlattenAndUnpad'
         #            pattern should be insert to the arg of [batch_size, 1, hidden_size].
         # in case 3, the shapes of inputs of test_op are [batch_size, seqlen, hidden_size] and [1, hidden_size],
-        #            the test_op should be included in padding elimination subgraph and a 'Expand + Reshape + ShrunkenGather'
+        #            the test_op should be included in padding elimination subgraph and a 'Expand + FlattenAndUnpad'
         #            pattern should be insert to the arg of [batch_size, 1, hidden_size].
         # in case 4, the shapes of inputs of test_op are [batch_size, seqlen, hidden_size] and [batch_size, seqlen, hidden_size],
         #            the test_op should be included in padding elimination subgraph and the PadAndUnflatten should be added to
-        #            output of test_op. Besides, the other input of Add should be added 'Reshape + ShrunkenGather' to
+        #            output of test_op. Besides, the other input of Add should be added 'FlattenAndUnpad' to
         #            flatten and elimination padding.
         def test_elementwise(self, input_ids):
             input_shape = input_ids.size()
@@ -5885,8 +5896,6 @@ def generate_inputs(batch_size, max_seq_length, vocab_size):
             batched_inputs.append(torch.cat((input_id, padding)))
         return torch.stack(batched_inputs)
 
-    vocab_size, hidden_size = 50265, 768
-    batch_size, max_seq_length = 8, 128
     device = "cuda"
     model = ORTModule(ToyModel(vocab_size, hidden_size, 1).to(device))
     x = generate_inputs(batch_size, max_seq_length, vocab_size)
@@ -5901,10 +5910,10 @@ def generate_inputs(batch_size, max_seq_length, vocab_size):
     assert len([node.op_type for node in training_model.graph.node if node.op_type == "Squeeze"]) == 1
     assert len([node.op_type for node in training_model.graph.node if node.op_type == "PadAndUnflatten"]) == 1
     if case >= 2:
-        assert len([node.op_type for node in training_model.graph.node if node.op_type == "ShrunkenGather"]) == 2
+        assert len([node.op_type for node in training_model.graph.node if node.op_type == "FlattenAndUnpad"]) == 3
     else:
-        assert len([node.op_type for node in training_model.graph.node if node.op_type == "ShrunkenGather"]) == 1
-    gathergrad_node = [node for node in training_model.graph.node if node.op_type == "PadAndUnflatten"][0]
+        assert len([node.op_type for node in training_model.graph.node if node.op_type == "FlattenAndUnpad"]) == 2
+    recover_pad_node = next(node for node in training_model.graph.node if node.op_type == "PadAndUnflatten")
 
     def find_input_node_type(model, arg):
         result = []
@@ -5913,14 +5922,14 @@ def find_input_node_type(model, arg):
                 result.append(node)
         return result[0].op_type if len(result) == 1 else None
 
-    gathergrad_input_optypes = [find_input_node_type(training_model, arg) for arg in gathergrad_node.input]
+    recover_pad_input_optypes = [find_input_node_type(training_model, arg) for arg in recover_pad_node.input]
     if test_op == "Add" or test_op == "Mul" or test_op == "Sub":
-        assert test_op in gathergrad_input_optypes
+        assert test_op in recover_pad_input_optypes
     else:
         if case == 0:
-            assert test_op in gathergrad_input_optypes
+            assert test_op in recover_pad_input_optypes
         else:
-            assert "ATen" in gathergrad_input_optypes
+            assert "ATen" in recover_pad_input_optypes
 
     del os.environ["ORTMODULE_ENABLE_EMBEDDING_SPARSE_OPTIMIZER"]
 
@@ -6067,7 +6076,7 @@ def generate_inputs(batch_size, max_seq_length, vocab_size):
             _test_helpers.assert_values_are_close(ort_prediction, pt_prediction, atol=1e-3, rtol=1e-4)
 
     training_model = ort_model._torch_module._execution_manager(True)._onnx_models.optimized_model
-    assert "ShrunkenGather" in [node.op_type for node in training_model.graph.node]
+    assert "FlattenAndUnpad" in [node.op_type for node in training_model.graph.node]
     assert "PadAndUnflatten" in [node.op_type for node in training_model.graph.node]
     del os.environ["ORTMODULE_ENABLE_EMBEDDING_SPARSE_OPTIMIZER"]
 
diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd.py
index ae9bc4328cb26..958c7d94c4241 100644
--- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd.py
+++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd.py
@@ -1549,7 +1549,7 @@ def _run_step(model, input):
                     count += 1
 
         if index == 0:
-            assert count == 1
+            assert count == 2
         else:
             assert count == 0
 
@@ -1717,3 +1717,97 @@ def forward(self, model_input):
     ).train()
     _ = ortmodule(torch.randn(output_size, dtype=torch.float))
     _check_pythonop_shape(ortmodule)
+
+
+def test_python_op_return_persistent_param_as_value():
+    """Some PythonOp return values that are still used by PyTorch computation. This test makes sure that ORTModule
+    will not release/erase the storage of those return values during tear down OrtValue of the corresponding PythonOp
+    return values.
+    """
+
+    class SimplePassThrough(torch.autograd.Function):
+        @staticmethod
+        def forward(ctx, x):
+            return x.detach()
+
+        @staticmethod
+        def backward(ctx, grad_output):
+            return grad_output
+
+    class GeluWithExternalOutput(torch.autograd.Function):
+        @staticmethod
+        def forward(ctx, x, bias_param):
+            ctx.save_for_backward(x)
+            return x * 0.5 * (1.0 + torch.tanh(0.79788456 * x * (1 + 0.044715 * x * x))), bias_param.detach()
+
+        @staticmethod
+        def backward(ctx, *grad_outputs):
+            (x,) = ctx.saved_tensors
+            tanh_out = torch.tanh(0.79788456 * x * (1 + 0.044715 * x * x))
+            ff = 0.5 * x * ((1 - tanh_out * tanh_out) * (0.79788456 + 0.1070322243 * x * x)) + 0.5 * (1 + tanh_out)
+            g = ff * grad_outputs[0]
+            return g, grad_outputs[1]
+
+    class TestLayer(torch.nn.Module):
+        def __init__(self, output_size):
+            super().__init__()
+            self.relu = GeluWithExternalOutput.apply
+            self._output_size = output_size
+            self.bias = Parameter(torch.empty(output_size, device=torch.cuda.current_device(), dtype=torch.float))
+            self.w = Parameter(
+                torch.empty(output_size, output_size, device=torch.cuda.current_device(), dtype=torch.float)
+            )
+            with torch.no_grad():
+                self.bias.uniform_()
+                self.w.uniform_()
+
+        def forward(self, model_input):
+            activation0 = torch.add(model_input, 0.4)
+            activation1 = activation0.view(self._output_size, -1)
+
+            # Returned detached_bias_param Tensor shares the same storage with self.bias
+            # We are testing to make sure ORT will not erase the storage of self.bias during tear down OrtValue as
+            # the returned value of the SimplePassThrough PythonOp.
+            detached_bias_param = SimplePassThrough.apply(self.bias)
+            relu_out, detached_bias_param = self.relu(activation1, detached_bias_param)
+            activation2 = torch.add(relu_out, self.bias)
+            activation3 = torch.add(activation2, detached_bias_param)
+            activation3 = torch.matmul(self.w, activation3)
+            activation4 = torch.div(activation3, 1000)
+            return activation4
+
+    class TestModule(torch.nn.Module):
+        def __init__(self, output_size) -> None:
+            super().__init__()
+            self.layers = torch.nn.ModuleList([TestLayer(output_size) for i in range(6)])
+
+        def forward(self, x):
+            # ModuleList can act as an iterable, or be indexed using ints
+            for layer in self.layers:
+                x = x.view(-1)
+                x = torch.nn.functional.relu(layer(x))
+            return x
+
+    device = "cuda"
+    output_size = 1024
+    pt_model = TestModule(output_size).to(device)
+    ort_model = ORTModule(copy.deepcopy(pt_model))
+
+    def _run_step(model, input):
+        loss = model(input).sum()
+        loss.backward()
+        return loss
+
+    for _ in range(5):
+        input = torch.randn(output_size, device=device, dtype=torch.float)
+        _run_step(pt_model, input)
+        _run_step(ort_model, input)
+
+        pt_params = {n: p for n, p in pt_model.named_parameters()}
+        for name, param in ort_model.named_parameters():
+            assert_values_are_close(param, pt_params[name], rtol=1e-04, atol=1e-3)
+            if param.grad is not None:
+                assert pt_params[name].grad is not None, f"pt param.grad is None for {name}"
+                assert_values_are_close(param.grad, pt_params[name].grad, rtol=1e-04, atol=1e-3)
+            else:
+                assert pt_params[name].grad is None
diff --git a/orttraining/orttraining/test/python/orttraining_test_hooks.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_hooks.py
similarity index 100%
rename from orttraining/orttraining/test/python/orttraining_test_hooks.py
rename to orttraining/orttraining/test/python/orttraining_test_ortmodule_hooks.py
diff --git a/orttraining/orttraining/test/python/orttraining_test_onnx_ops_ortmodule.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_onnx_ops.py
similarity index 100%
rename from orttraining/orttraining/test/python/orttraining_test_onnx_ops_ortmodule.py
rename to orttraining/orttraining/test/python/orttraining_test_ortmodule_onnx_ops.py
diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_triton.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_triton.py
index 318de843efb8f..0c381d70ca4c1 100644
--- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_triton.py
+++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_triton.py
@@ -5,6 +5,7 @@
 import json
 import os
 import random
+import uuid
 
 import _test_helpers
 import onnx
@@ -135,8 +136,31 @@ def _torch_layer_norm(input, weight, bias, **kwargs):
     return torch.nn.functional.layer_norm(input, normalized_shape, weight, bias)
 
 
+def _torch_gelu(input):
+    return torch.nn.functional.gelu(input)
+
+
+def _torch_quick_gelu(input, **kwargs):
+    alpha = kwargs.get("alpha", 1.702)
+    return input * torch.sigmoid(input * alpha)
+
+
+def _torch_gelu_grad(dy, x):
+    alpha = 0.70710678118654752440
+    beta = 1.12837916709551257390 * 0.70710678118654752440 * 0.5
+    cdf = 0.5 * (1 + torch.erf(x * alpha))
+    pdf = beta * torch.exp(x * x * -0.5)
+    return dy * (cdf + x * pdf)
+
+
+def _torch_quick_gelu_grad(dy, x, **kwargs):
+    alpha = kwargs.get("alpha", 1.702)
+    sigmoid = torch.sigmoid(x * alpha)
+    return dy * sigmoid * (1.0 + x * alpha * (1.0 - sigmoid))
+
+
 class TorchFuncExecutor:
-    _INFER_FUNC_MAP = {  # noqa: RUF012
+    _TORCH_FUNC_MAP = {  # noqa: RUF012
         "Add": _torch_add,
         "Sub": _torch_sub,
         "Mul": _torch_mul,
@@ -154,13 +178,17 @@ class TorchFuncExecutor:
         "ReduceMin": _torch_reduce_min,
         "Softmax": _torch_softmax,
         "LayerNormalization": _torch_layer_norm,
+        "Gelu": _torch_gelu,
+        "QuickGelu": _torch_quick_gelu,
+        "GeluGrad": _torch_gelu_grad,
+        "QuickGeluGrad": _torch_quick_gelu_grad,
     }
 
     @classmethod
     def run(cls, op_type, *torch_tensors, **kwargs):
-        if op_type not in cls._INFER_FUNC_MAP:
+        if op_type not in cls._TORCH_FUNC_MAP:
             raise NotImplementedError(f"Unsupported op type: {op_type}")
-        return cls._INFER_FUNC_MAP[op_type](*torch_tensors, **kwargs)
+        return cls._TORCH_FUNC_MAP[op_type](*torch_tensors, **kwargs)
 
 
 def _run_op_test(op_type, onnx_dtype, create_model_func, gen_inputs_func, **kwargs):
@@ -169,9 +197,12 @@ def _run_op_test(op_type, onnx_dtype, create_model_func, gen_inputs_func, **kwar
     pt_inputs = gen_inputs_func(_onnx_dtype_to_torch_dtype(onnx_dtype))
     ort_inputs = copy.deepcopy(pt_inputs)
     ort_inputs = [tensor.to(torch.uint8) if tensor.dtype == torch.bool else tensor for tensor in ort_inputs]
+    if "::" in op_type:
+        _, op_type = op_type.split("::")
     pt_outputs = TorchFuncExecutor.run(op_type, *pt_inputs, **kwargs)
     model_str = create_model_func(op_type, onnx_dtype, **kwargs).SerializeToString()
-    ort_outputs = call_triton_by_onnx(hash(model_str), model_str, *[to_dlpack(tensor) for tensor in ort_inputs])
+    unique_id = uuid.uuid1().int >> 64
+    ort_outputs = call_triton_by_onnx(unique_id, model_str, *[to_dlpack(tensor) for tensor in ort_inputs])
     if isinstance(pt_outputs, tuple):
         assert isinstance(ort_outputs, tuple)
         assert len(pt_outputs) == len(ort_outputs)
@@ -200,9 +231,9 @@ def _run_module_test(module_cls, dtype, gen_inputs_func, triton_op_count, **kwar
         ort_output = _run_step(ort_model, *ort_inputs)
         _test_helpers.assert_values_are_close(pt_output, ort_output, rtol=rtol, atol=atol)
         _test_helpers.assert_gradients_match_and_reset_gradient(pt_model, ort_model, rtol=rtol, atol=atol)
-        for i in range(len(pt_inputs)):
-            if pt_inputs[i].requires_grad:
-                _test_helpers.assert_values_are_close(pt_inputs[i].grad, ort_inputs[i].grad, rtol=rtol, atol=atol)
+        for idx, pt_input in enumerate(pt_inputs):
+            if pt_input.requires_grad:
+                _test_helpers.assert_values_are_close(pt_input.grad, ort_inputs[idx].grad, rtol=rtol, atol=atol)
 
     assert os.path.exists(os.path.join(os.getcwd(), "triton_model_torch_exported_training.onnx"))
     assert os.path.exists(os.path.join(os.getcwd(), "triton_model_optimized_training.onnx"))
@@ -221,12 +252,12 @@ def _run_module_test(module_cls, dtype, gen_inputs_func, triton_op_count, **kwar
 
 
 def _run_tunable_op_test(module_cls, dtype, gen_inputs_func, tunable_op, impl_count, **kwargs):
+    os.environ["ORTMODULE_ENABLE_TUNING"] = "1"
+    os.environ["ORTMODULE_TUNING_RESULTS_PATH"] = "./"
     pt_model = module_cls().to(DEVICE).to(dtype)
     ort_model = ORTModule(copy.deepcopy(pt_model))
     rtol = kwargs.get("rtol", 1e-03 if dtype == torch.float16 else 1e-04)
     atol = kwargs.get("atol", 1e-03 if dtype == torch.float16 else 1e-05)
-    os.environ["ORTMODULE_ENABLE_TUNING"] = "1"
-    os.environ["ORTMODULE_TUNING_RESULTS_PATH"] = "./"
     for _ in range(5):
         pt_inputs = gen_inputs_func(dtype)
         ort_inputs = copy.deepcopy(pt_inputs)
@@ -236,7 +267,7 @@ def _run_tunable_op_test(module_cls, dtype, gen_inputs_func, tunable_op, impl_co
         _test_helpers.assert_gradients_match_and_reset_gradient(pt_model, ort_model, rtol=rtol, atol=atol)
     tunable_results_file = os.path.join(os.getcwd(), "tuning_results_training.json")
     assert os.path.exists(tunable_results_file)
-    with open(tunable_results_file) as f:
+    with open(tunable_results_file, encoding="UTF-8") as f:
         tunable_results = json.load(f)
     assert tunable_op in str(tunable_results)
     del os.environ["ORTMODULE_ENABLE_TUNING"]
@@ -246,7 +277,7 @@ def _run_tunable_op_test(module_cls, dtype, gen_inputs_func, tunable_op, impl_co
             if tunable_op in k:
                 for param, impl in v.items():
                     v[param] = (impl + 1 + i) % impl_count
-        with open(tunable_results_file, "w") as f:
+        with open(tunable_results_file, "w", encoding="UTF-8") as f:
             json.dump(new_tunable_results, f)
         ort_model = ORTModule(copy.deepcopy(pt_model))
         for _ in range(5):
@@ -260,13 +291,27 @@ def _run_tunable_op_test(module_cls, dtype, gen_inputs_func, tunable_op, impl_co
     del os.environ["ORTMODULE_TUNING_RESULTS_PATH"]
 
 
-@pytest.mark.parametrize("op_type", ["Add", "Sub", "Mul", "Div"])
+@pytest.mark.parametrize(
+    "op",
+    [
+        ("Add", {}),
+        ("Sub", {}),
+        ("Mul", {}),
+        ("Div", {}),
+        ("com.microsoft::GeluGrad", {}),
+        ("com.microsoft::QuickGeluGrad", {}),
+        ("com.microsoft::QuickGeluGrad", {"alpha": 1.0}),
+    ],
+)
 @pytest.mark.parametrize("onnx_dtype", [TensorProto.FLOAT, TensorProto.FLOAT16])
 @pytest.mark.parametrize("input_shapes", [([1024, 2], [1024, 2]), ([2, 3, 3, 3], [3, 1, 3]), ([2049], [1])])
-def test_binary_elementwise_op(op_type, onnx_dtype, input_shapes):
-    def _create_model(op_type, onnx_dtype):
+def test_binary_elementwise_op(op, onnx_dtype, input_shapes):
+    def _create_model(op_type, onnx_dtype, **kwargs):
+        domain = ""
+        if "::" in op_type:
+            domain, op_type = op_type.split("::")
         graph = helper.make_graph(
-            [helper.make_node(op_type, ["X", "Y"], ["Z"], name="test")],
+            [helper.make_node(op_type, ["X", "Y"], ["Z"], name="test", domain=domain, **kwargs)],
             "test",
             [
                 helper.make_tensor_value_info("X", onnx_dtype, None),
@@ -282,7 +327,7 @@ def _gen_inputs(dtype):
             torch.randn(*input_shapes[1], dtype=dtype, device=DEVICE),
         ]
 
-    _run_op_test(op_type, onnx_dtype, _create_model, _gen_inputs)
+    _run_op_test(op[0], onnx_dtype, _create_model, _gen_inputs, **op[1])
 
 
 @pytest.mark.parametrize("onnx_dtype", [TensorProto.FLOAT, TensorProto.FLOAT16])
@@ -303,13 +348,25 @@ def _gen_inputs(dtype):
     _run_op_test("Sum", onnx_dtype, _create_model, _gen_inputs)
 
 
-@pytest.mark.parametrize("op_type", ["Sqrt", "Exp"])
+@pytest.mark.parametrize(
+    "op",
+    [
+        ("Sqrt", {}),
+        ("Exp", {}),
+        ("com.microsoft::Gelu", {}),
+        ("com.microsoft::QuickGelu", {}),
+        ("com.microsoft::QuickGelu", {"alpha": 1.0}),
+    ],
+)
 @pytest.mark.parametrize("onnx_dtype", [TensorProto.FLOAT, TensorProto.FLOAT16])
 @pytest.mark.parametrize("input_shape", [[1024, 4], [2, 3, 3, 3], [2049, 1]])
-def test_unary_elementwise_op(op_type, onnx_dtype, input_shape):
-    def _create_model(op_type, onnx_dtype):
+def test_unary_elementwise_op(op, onnx_dtype, input_shape):
+    def _create_model(op_type, onnx_dtype, **kwargs):
+        domain = ""
+        if "::" in op_type:
+            domain, op_type = op_type.split("::")
         graph = helper.make_graph(
-            [helper.make_node(op_type, ["X"], ["Y"], name="test")],
+            [helper.make_node(op_type, ["X"], ["Y"], name="test", domain=domain, **kwargs)],
             "test",
             [helper.make_tensor_value_info("X", onnx_dtype, None)],
             [helper.make_tensor_value_info("Y", onnx_dtype, None)],
@@ -319,7 +376,7 @@ def _create_model(op_type, onnx_dtype):
     def _gen_inputs(dtype):
         return [torch.rand(*input_shape, dtype=dtype, device=DEVICE)]
 
-    _run_op_test(op_type, onnx_dtype, _create_model, _gen_inputs)
+    _run_op_test(op[0], onnx_dtype, _create_model, _gen_inputs, **op[1])
 
 
 @pytest.mark.parametrize("onnx_dtype", [TensorProto.FLOAT, TensorProto.FLOAT16])
@@ -486,6 +543,8 @@ def _gen_inputs(dtype):
         ([123, 4, 5, 6], [2], False),
         ([16, 8, 16, 8], [1, 3], True),
         ([16, 8, 16, 8], [0, 2], False),
+        ([16, 8, 16, 8], [0, 1, 2, 3], True),
+        ([16, 1, 16, 8], [0, 1, 2, 3], False),
     ],
 )
 def test_reduce_op(op_type, onnx_dtype, input_shape_and_reduce_info):
@@ -726,6 +785,43 @@ def _gen_inputs(dtype):
     _run_module_test(NeuralNetLayerNorm, dtype, _gen_inputs, 2)
 
 
+@pytest.mark.parametrize("dtype", [torch.float32, torch.float16])
+def test_dynamic_shapes_elementwise_module(dtype):
+    class NeuralNetSymbolicShapesElementwise(torch.nn.Module):
+        def forward(self, x, y, u, v):
+            return x * y - (u + v)
+
+    def _gen_inputs(dtype):
+        dim1 = 64 * random.randint(2, 4)
+        dim2 = 64 * random.randint(2, 4)
+        return [
+            torch.rand(16, dim1, dim2, dtype=dtype, device=DEVICE, requires_grad=True),
+            torch.rand(16, 1, dim2, dtype=dtype, device=DEVICE, requires_grad=True),
+            torch.rand(dim1, 1, dtype=dtype, device=DEVICE, requires_grad=True),
+            torch.rand(16, dim1, dim2, dtype=dtype, device=DEVICE, requires_grad=True),
+        ]
+
+    _run_module_test(NeuralNetSymbolicShapesElementwise, dtype, _gen_inputs, 1)
+
+
+@pytest.mark.parametrize("dtype", [torch.float32, torch.float16])
+def test_dynamic_shapes_reduction_module(dtype):
+    class NeuralNetSymbolicShapesReduction(torch.nn.Module):
+        def forward(self, x, y, z):
+            return torch.softmax(x * y + z, dim=-1)
+
+    def _gen_inputs(dtype):
+        dim1 = 64 * random.randint(2, 4)
+        dim2 = 64 * random.randint(2, 4)
+        return [
+            torch.rand(16, dim1, dim2, dtype=dtype, device=DEVICE, requires_grad=True),
+            torch.rand(16, 1, dim2, dtype=dtype, device=DEVICE, requires_grad=True),
+            torch.rand(dim1, 1, dtype=dtype, device=DEVICE, requires_grad=True),
+        ]
+
+    _run_module_test(NeuralNetSymbolicShapesReduction, dtype, _gen_inputs, 2)
+
+
 @pytest.mark.parametrize("dtype", [torch.float32, torch.float16])
 @pytest.mark.parametrize("has_sum", [True, False])
 def test_slice_scel_module(dtype, has_sum):
@@ -777,3 +873,34 @@ def _gen_inputs(dtype):
         return [torch.rand(m_n_k[0], m_n_k[2], dtype=dtype, device=DEVICE, requires_grad=True)]
 
     _run_tunable_op_test(NeuralNetGemm, dtype, _gen_inputs, "GemmTunableOp", 2)
+
+
+def test_user_config():
+    n, d, h, w = 8, 768, 12, 64
+    dtype = torch.float32
+
+    class NeuralNetElementwise(torch.nn.Module):
+        def forward(self, input1, input2, input3, input4):
+            return input1 + input2 - input3 * input4
+
+    def _gen_inputs(dtype):
+        return [
+            torch.rand(n, d, h, w, dtype=dtype, device=DEVICE, requires_grad=True),
+            torch.rand(w, dtype=dtype, device=DEVICE, requires_grad=True),
+            torch.rand(d, 1, 1, dtype=dtype, device=DEVICE, requires_grad=True),
+            torch.rand(n, 1, h, w, dtype=dtype, device=DEVICE, requires_grad=True),
+        ]
+
+    user_config = (
+        '{"ops": {"Add": {"versions": [13, 14]}, "Mul": {"versions": [13, 14]}}, '
+        '"initializer": "scalar", "min_nodes": 2}'
+    )
+    with open("user_config.json", "w", encoding="UTF-8") as f:
+        f.write(user_config)
+    os.environ["ORTMODULE_TRITON_CONFIG_FILE"] = "./user_config.json"
+
+    # Mul is not supported, the graph is splited to 2 subgraphs with single Op, which will not be fused to TritonOp.
+    _run_module_test(NeuralNetElementwise, dtype, _gen_inputs, 0)
+
+    del os.environ["ORTMODULE_TRITON_CONFIG_FILE"]
+    os.remove(os.path.join(os.getcwd(), "user_config.json"))
diff --git a/orttraining/orttraining/test/python/orttraining_test_orttrainer_bert_toy_onnx.py b/orttraining/orttraining/test/python/orttraining_test_orttrainer_bert_toy_onnx.py
deleted file mode 100644
index 45b87b32f7d64..0000000000000
--- a/orttraining/orttraining/test/python/orttraining_test_orttrainer_bert_toy_onnx.py
+++ /dev/null
@@ -1,1283 +0,0 @@
-import copy  # noqa: F401
-import inspect  # noqa: F401
-import math  # noqa: F401
-import os
-from functools import partial
-
-import _test_commons
-import _test_helpers
-import onnx
-import pytest
-import torch
-from numpy.testing import assert_allclose
-
-import onnxruntime
-from onnxruntime.capi.ort_trainer import IODescription as Legacy_IODescription
-from onnxruntime.capi.ort_trainer import LossScaler as Legacy_LossScaler
-from onnxruntime.capi.ort_trainer import ModelDescription as Legacy_ModelDescription
-from onnxruntime.capi.ort_trainer import ORTTrainer as Legacy_ORTTrainer
-from onnxruntime.training import amp, optim, orttrainer
-
-###############################################################################
-# Helper functions ############################################################
-###############################################################################
-
-
-def generate_random_input_from_model_desc(desc, seed=1, device="cuda:0"):
-    """Generates a sample input for the BERT model using the model desc"""
-
-    torch.manual_seed(seed)
-    onnxruntime.set_seed(seed)
-    dtype = torch.int64
-    vocab_size = 30528
-    num_classes = [vocab_size, 2, 2, vocab_size, 2]
-    dims = {"batch_size": 16, "seq_len": 1}
-    sample_input = []
-    for index, input in enumerate(desc["inputs"]):
-        size = []
-        for s in input[1]:
-            if isinstance(s, (int)):
-                size.append(s)
-            else:
-                size.append(dims[s] if s in dims else 1)
-        sample_input.append(torch.randint(0, num_classes[index], tuple(size), dtype=dtype).to(device))
-    return sample_input
-
-
-# EXPERIMENTAL HELPER FUNCTIONS
-
-
-def bert_model_description(dynamic_shape=True):
-    """Creates the model description dictionary with static dimensions"""
-
-    if dynamic_shape:
-        model_desc = {
-            "inputs": [
-                ("input_ids", ["batch_size", "seq_len"]),
-                (
-                    "segment_ids",
-                    ["batch_size", "seq_len"],
-                ),
-                (
-                    "input_mask",
-                    ["batch_size", "seq_len"],
-                ),
-                (
-                    "masked_lm_labels",
-                    ["batch_size", "seq_len"],
-                ),
-                (
-                    "next_sentence_labels",
-                    [
-                        "batch_size",
-                    ],
-                ),
-            ],
-            "outputs": [("loss", [], True)],
-        }
-    else:
-        batch_size = 16
-        seq_len = 1
-        model_desc = {
-            "inputs": [
-                ("input_ids", [batch_size, seq_len]),
-                (
-                    "segment_ids",
-                    [batch_size, seq_len],
-                ),
-                (
-                    "input_mask",
-                    [batch_size, seq_len],
-                ),
-                (
-                    "masked_lm_labels",
-                    [batch_size, seq_len],
-                ),
-                (
-                    "next_sentence_labels",
-                    [
-                        batch_size,
-                    ],
-                ),
-            ],
-            "outputs": [("loss", [], True)],
-        }
-    return model_desc
-
-
-def optimizer_parameters(model):
-    """A method to assign different hyper parameters for different model parameter groups"""
-
-    no_decay_keys = ["bias", "gamma", "beta", "LayerNorm"]
-    no_decay_param_group = []
-    for initializer in model.graph.initializer:
-        if any(key in initializer.name for key in no_decay_keys):
-            no_decay_param_group.append(initializer.name)
-    params = [
-        {
-            "params": no_decay_param_group,
-            "alpha": 0.9,
-            "beta": 0.999,
-            "lambda_coef": 0.0,
-            "epsilon": 1e-6,
-            "do_bias_correction": False,
-        }
-    ]
-
-    return params
-
-
-def load_bert_onnx_model():
-    bert_onnx_model_path = os.path.join("testdata", "bert_toy_postprocessed.onnx")
-    model = onnx.load(bert_onnx_model_path)
-    return model
-
-
-class CustomLossScaler(amp.LossScaler):
-    def __init__(self, loss_scale=float(1 << 16)):
-        super().__init__(loss_scale)
-        self._initial_loss_scale = loss_scale
-        self.loss_scale = loss_scale
-
-    def reset(self):
-        self.loss_scale = self._initial_loss_scale
-
-    def update(self, train_step_info):
-        self.loss_scale *= 0.9
-        return self.loss_scale
-
-
-# LEGACY HELPER FUNCTIONS
-
-
-class LegacyCustomLossScaler:
-    def __init__(self, loss_scale=float(1 << 16)):
-        self._initial_loss_scale = loss_scale
-        self.loss_scale_ = loss_scale
-
-    def reset(self):
-        self.loss_scale_ = self._initial_loss_scale
-
-    def update_loss_scale(self, is_all_finite):
-        self.loss_scale_ *= 0.9
-
-
-def legacy_model_params(lr, device=torch.device("cuda", 0)):  # noqa: B008
-    legacy_model_desc = legacy_bert_model_description()
-    learning_rate_description = legacy_ort_trainer_learning_rate_description()
-    learning_rate = torch.tensor([lr]).to(device)
-    return (legacy_model_desc, learning_rate_description, learning_rate)
-
-
-def legacy_ort_trainer_learning_rate_description():
-    return Legacy_IODescription(
-        "Learning_Rate",
-        [
-            1,
-        ],
-        torch.float32,
-    )
-
-
-def legacy_bert_model_description():
-    input_ids_desc = Legacy_IODescription("input_ids", ["batch", "max_seq_len_in_batch"])
-    segment_ids_desc = Legacy_IODescription("segment_ids", ["batch", "max_seq_len_in_batch"])
-    input_mask_desc = Legacy_IODescription("input_mask", ["batch", "max_seq_len_in_batch"])
-    masked_lm_labels_desc = Legacy_IODescription("masked_lm_labels", ["batch", "max_seq_len_in_batch"])
-    next_sentence_labels_desc = Legacy_IODescription(
-        "next_sentence_labels",
-        [
-            "batch",
-        ],
-    )
-    loss_desc = Legacy_IODescription("loss", [])
-
-    return Legacy_ModelDescription(
-        [input_ids_desc, segment_ids_desc, input_mask_desc, masked_lm_labels_desc, next_sentence_labels_desc],
-        [loss_desc],
-    )
-
-
-def legacy_optim_params_a(name):
-    return {"alpha": 0.9, "beta": 0.999, "lambda": 0.01, "epsilon": 1e-6, "do_bias_correction": False}
-
-
-def legacy_optim_params_b(name):
-    params = ["bert.embeddings.LayerNorm.bias", "bert.embeddings.LayerNorm.weight"]
-    if name in params:
-        return {"alpha": 0.9, "beta": 0.999, "lambda": 0.0, "epsilon": 1e-6, "do_bias_correction": False}
-    return {"alpha": 0.9, "beta": 0.999, "lambda": 0.01, "epsilon": 1e-6, "do_bias_correction": False}
-
-
-def legacy_optim_params_c(name):
-    params_group = optimizer_parameters(load_bert_onnx_model())
-    if name in params_group[0]["params"]:
-        return {"alpha": 0.9, "beta": 0.999, "lambda": 0.0, "epsilon": 1e-6, "do_bias_correction": False}
-    return {"alpha": 0.9, "beta": 0.999, "lambda": 0.01, "epsilon": 1e-6, "do_bias_correction": False}
-
-
-###############################################################################
-# Testing starts here #########################################################
-###############################################################################
-
-
-@pytest.mark.parametrize("dynamic_shape", [(True), (False)])
-def testToyBERTModelBasicTraining(dynamic_shape):
-    model_desc = bert_model_description(dynamic_shape)
-    model = load_bert_onnx_model()
-
-    optim_config = optim.LambConfig()
-    opts = orttrainer.ORTTrainerOptions({})
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, options=opts)
-
-    for _i in range(10):
-        sample_input = generate_random_input_from_model_desc(model_desc)
-        output = trainer.train_step(*sample_input)
-        assert output.shape == torch.Size([])
-
-
-@pytest.mark.parametrize(
-    "expected_losses",
-    [([11.041123, 10.986166, 11.101636, 11.013366, 11.03775, 11.041175, 10.957118, 11.069563, 11.040824, 11.16437])],
-)
-def testToyBERTDeterministicCheck(expected_losses):
-    # Common setup
-    train_steps = 10
-    device = "cuda"
-    seed = 1
-    rtol = 1e-3
-    torch.manual_seed(seed)
-    onnxruntime.set_seed(seed)
-
-    # Modeling
-    model_desc = bert_model_description()
-    model = load_bert_onnx_model()
-    optimizer_parameters(model)
-    optim_config = optim.LambConfig()
-    opts = orttrainer.ORTTrainerOptions(
-        {
-            "debug": {"deterministic_compute": True},
-            "device": {
-                "id": device,
-            },
-        }
-    )
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, options=opts)
-
-    # Train
-    experimental_losses = []
-    for i in range(train_steps):
-        sample_input = generate_random_input_from_model_desc(model_desc, i)
-        experimental_losses.append(trainer.train_step(*sample_input).cpu().item())
-
-    # Check output
-    _test_helpers.assert_model_outputs(experimental_losses, expected_losses, rtol=rtol)
-
-
-@pytest.mark.parametrize(
-    "initial_lr, lr_scheduler, expected_learning_rates, expected_losses",
-    [
-        (
-            1.0,
-            optim.lr_scheduler.ConstantWarmupLRScheduler,
-            [0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
-            [
-                10.988012313842773,
-                10.99213981628418,
-                120.79301452636719,
-                36.11647033691406,
-                95.83200073242188,
-                221.2766571044922,
-                208.40316772460938,
-                279.5332946777344,
-                402.46380615234375,
-                325.79254150390625,
-            ],
-        ),
-        (
-            0.5,
-            optim.lr_scheduler.ConstantWarmupLRScheduler,
-            [0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
-            [
-                10.988012313842773,
-                10.99213981628418,
-                52.69743347167969,
-                19.741533279418945,
-                83.88340759277344,
-                126.39848327636719,
-                91.53898620605469,
-                63.62016296386719,
-                102.21206665039062,
-                180.1424560546875,
-            ],
-        ),
-        (
-            1.0,
-            optim.lr_scheduler.CosineWarmupLRScheduler,
-            [
-                0.0,
-                0.9931806517013612,
-                0.9397368756032445,
-                0.8386407858128706,
-                0.7008477123264848,
-                0.5412896727361662,
-                0.37725725642960045,
-                0.22652592093878665,
-                0.10542974530180327,
-                0.02709137914968268,
-            ],
-            [
-                10.988012313842773,
-                10.99213981628418,
-                120.6441650390625,
-                32.152557373046875,
-                89.63705444335938,
-                138.8782196044922,
-                117.57748413085938,
-                148.01927185058594,
-                229.60403442382812,
-                110.2930908203125,
-            ],
-        ),
-        (
-            1.0,
-            optim.lr_scheduler.LinearWarmupLRScheduler,
-            [
-                0.0,
-                0.9473684210526315,
-                0.8421052631578947,
-                0.7368421052631579,
-                0.631578947368421,
-                0.5263157894736842,
-                0.42105263157894735,
-                0.3157894736842105,
-                0.21052631578947367,
-                0.10526315789473684,
-            ],
-            [
-                10.988012313842773,
-                10.99213981628418,
-                112.89633178710938,
-                31.114538192749023,
-                80.94029235839844,
-                131.34490966796875,
-                111.4329605102539,
-                133.74252319335938,
-                219.37344360351562,
-                109.67041015625,
-            ],
-        ),
-        (
-            1.0,
-            optim.lr_scheduler.PolyWarmupLRScheduler,
-            [
-                0.0,
-                0.9473684263157895,
-                0.8421052789473684,
-                0.7368421315789474,
-                0.6315789842105263,
-                0.5263158368421054,
-                0.42105268947368424,
-                0.31578954210526317,
-                0.21052639473684212,
-                0.10526324736842106,
-            ],
-            [
-                10.988012313842773,
-                10.99213981628418,
-                112.89633178710938,
-                31.114538192749023,
-                80.9402847290039,
-                131.3447265625,
-                111.43253326416016,
-                133.7415008544922,
-                219.37147521972656,
-                109.66986083984375,
-            ],
-        ),
-    ],
-)
-def testToyBERTModelLRScheduler(initial_lr, lr_scheduler, expected_learning_rates, expected_losses):
-    return  # TODO: re-enable after nondeterminism on backend is fixed
-    # Common setup
-    device = "cuda"
-    total_steps = 10
-    seed = 1
-    warmup = 0.05
-    cycles = 0.5
-    power = 1.0
-    lr_end = 1e-7
-    rtol = 1e-3
-    torch.manual_seed(seed)
-    onnxruntime.set_seed(seed)
-
-    # Setup LR Schedulers
-    if (
-        lr_scheduler == optim.lr_scheduler.ConstantWarmupLRScheduler
-        or lr_scheduler == optim.lr_scheduler.LinearWarmupLRScheduler
-    ):
-        lr_scheduler = lr_scheduler(total_steps=total_steps, warmup=warmup)
-    elif lr_scheduler == optim.lr_scheduler.CosineWarmupLRScheduler:
-        lr_scheduler = lr_scheduler(total_steps=total_steps, warmup=warmup, cycles=cycles)
-    elif lr_scheduler == optim.lr_scheduler.PolyWarmupLRScheduler:
-        lr_scheduler = lr_scheduler(total_steps=total_steps, warmup=warmup, power=power, lr_end=lr_end)
-    else:
-        raise RuntimeError("Invalid lr_scheduler")
-
-    # Modeling
-    model_desc = bert_model_description()
-    model = load_bert_onnx_model()
-    optim_config = optim.AdamConfig(lr=initial_lr)
-    opts = orttrainer.ORTTrainerOptions(
-        {
-            "debug": {"deterministic_compute": True},
-            "device": {
-                "id": device,
-            },
-            "lr_scheduler": lr_scheduler,
-        }
-    )
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, options=opts)
-
-    # Train
-    losses = []
-    learning_rates = []
-    for i in range(total_steps):
-        sample_input = generate_random_input_from_model_desc(model_desc, i)
-        losses.append(trainer.train_step(*sample_input).cpu().item())
-        learning_rates.append(trainer.options.lr_scheduler.get_last_lr()[0])
-
-    # Check output
-    _test_helpers.assert_model_outputs(learning_rates, expected_learning_rates, rtol=rtol)
-    _test_helpers.assert_model_outputs(losses, expected_losses, rtol=rtol)
-
-
-@pytest.mark.parametrize(
-    "loss_scaler, expected_losses",
-    [
-        (
-            None,
-            [
-                11.041126,
-                10.986309,
-                11.101673,
-                11.013394,
-                11.037781,
-                11.041253,
-                10.957072,
-                11.069506,
-                11.040807,
-                11.164349,
-            ],
-        ),
-        (
-            amp.DynamicLossScaler(),
-            [
-                11.041126,
-                10.986309,
-                11.101673,
-                11.013394,
-                11.037781,
-                11.041253,
-                10.957072,
-                11.069506,
-                11.040807,
-                11.164349,
-            ],
-        ),
-        (
-            CustomLossScaler(),
-            [
-                11.041126,
-                10.986309,
-                11.101645,
-                11.013412,
-                11.037757,
-                11.041273,
-                10.957077,
-                11.069525,
-                11.040765,
-                11.164298,
-            ],
-        ),
-    ],
-)
-def testToyBERTModelMixedPrecisionLossScaler(loss_scaler, expected_losses):
-    # Common setup
-    total_steps = 10
-    device = "cuda"
-    seed = 1
-    rtol = 1e-3
-    torch.manual_seed(seed)
-    onnxruntime.set_seed(seed)
-
-    # Modeling
-    model_desc = bert_model_description()
-    model = load_bert_onnx_model()
-    optim_config = optim.LambConfig()
-    opts = orttrainer.ORTTrainerOptions(
-        {
-            "debug": {"deterministic_compute": True},
-            "device": {
-                "id": device,
-            },
-            "mixed_precision": {"enabled": True, "loss_scaler": loss_scaler},
-        }
-    )
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, options=opts)
-
-    # Train
-    losses = []
-    for i in range(total_steps):
-        sample_input = generate_random_input_from_model_desc(model_desc, i)
-        losses.append(trainer.train_step(*sample_input).cpu().item())
-
-    # Check output
-    _test_helpers.assert_model_outputs(losses, expected_losses, rtol=rtol)
-
-
-@pytest.mark.parametrize(
-    "gradient_accumulation_steps, expected_losses",
-    [
-        (
-            1,
-            [
-                11.041123,
-                10.986166,
-                11.101636,
-                11.013366,
-                11.03775,
-                11.041175,
-                10.957118,
-                11.069563,
-                11.040824,
-                11.16437,
-            ],
-        ),
-        (
-            4,
-            [
-                11.041123,
-                10.982856,
-                11.105512,
-                11.006721,
-                11.03358,
-                11.05058,
-                10.955864,
-                11.059035,
-                11.037753,
-                11.162649,
-            ],
-        ),
-        (
-            7,
-            [
-                11.041123,
-                10.982856,
-                11.105512,
-                11.006721,
-                11.036314,
-                11.055109,
-                10.960751,
-                11.05809,
-                11.038856,
-                11.159635,
-            ],
-        ),
-    ],
-)
-def testToyBERTModelGradientAccumulation(gradient_accumulation_steps, expected_losses):
-    # Common setup
-    total_steps = 10
-    device = "cuda"
-    seed = 1
-    rtol = 1e-3
-    torch.manual_seed(seed)
-    onnxruntime.set_seed(seed)
-
-    # Modeling
-    model_desc = bert_model_description()
-    model = load_bert_onnx_model()
-    optim_config = optim.LambConfig()
-    opts = orttrainer.ORTTrainerOptions(
-        {
-            "debug": {"deterministic_compute": True},
-            "device": {
-                "id": device,
-            },
-            "batch": {"gradient_accumulation_steps": gradient_accumulation_steps},
-        }
-    )
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, options=opts)
-
-    # Train
-    losses = []
-    for i in range(total_steps):
-        sample_input = generate_random_input_from_model_desc(model_desc, i)
-        losses.append(trainer.train_step(*sample_input).cpu().item())
-
-    # Check output
-    _test_helpers.assert_model_outputs(losses, expected_losses, rtol=rtol)
-
-
-def testToyBertCheckpointBasic():
-    # Common setup
-    seed = 1
-    torch.manual_seed(seed)
-    onnxruntime.set_seed(seed)
-    optim_config = optim.LambConfig()
-    opts = orttrainer.ORTTrainerOptions({"debug": {"deterministic_compute": True}})
-
-    # Create ORTTrainer and save initial state in a dict
-    model = load_bert_onnx_model()
-    model_desc = bert_model_description()
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, options=opts)
-    sd = trainer.state_dict()
-
-    ## All initializers must be present in the state_dict
-    ##  when the specified model for ORTTRainer is an ONNX model
-    for param in trainer._onnx_model.graph.initializer:
-        assert param.name in sd["model"]["full_precision"]
-
-    ## Modify one of the state values and load into ORTTrainer
-    sd["model"]["full_precision"]["bert.encoder.layer.0.attention.output.LayerNorm.weight"] += 10
-    trainer.load_state_dict(sd)
-
-    ## Save a checkpoint
-    ckpt_dir = "testdata"
-    trainer.save_checkpoint(os.path.join(ckpt_dir, "bert_toy_save_test.ortcp"))
-    del trainer
-    del model
-
-    # Create a new ORTTrainer and load the checkpoint from previous ORTTrainer
-    model2 = load_bert_onnx_model()
-    model_desc2 = bert_model_description()
-    trainer2 = orttrainer.ORTTrainer(model2, model_desc2, optim_config, options=opts)
-    trainer2.load_checkpoint(os.path.join(ckpt_dir, "bert_toy_save_test.ortcp"))
-    loaded_sd = trainer2.state_dict()
-
-    # Assert whether original state and the one loaded from checkpoint matches
-    _test_commons.assert_all_states_close_ort(sd, loaded_sd)
-
-
-def testToyBertCheckpointFrozenWeights():
-    # Common setup
-    seed = 1
-    total_steps = 10
-    torch.manual_seed(seed)
-    onnxruntime.set_seed(seed)
-    opts = orttrainer.ORTTrainerOptions(
-        {
-            "debug": {"deterministic_compute": True},
-            "utils": {"frozen_weights": ["bert.encoder.layer.0.attention.self.value.weight"]},
-        }
-    )
-
-    # Create ORTTrainer and save initial state in a dict
-    model = load_bert_onnx_model()
-    model_desc = bert_model_description()
-    optim_config = optim.LambConfig()
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, options=opts)
-
-    # Train for a few steps
-    for _i in range(total_steps):
-        sample_input = generate_random_input_from_model_desc(model_desc, seed)
-        _ = trainer.train_step(*sample_input)
-    sample_input = generate_random_input_from_model_desc(model_desc, seed + total_steps + 1)
-    # Evaluate once to get a base loss
-    loss = trainer.eval_step(*sample_input)
-    # Save checkpoint
-    state_dict = trainer.state_dict()
-
-    # Load previous state into another instance of ORTTrainer
-    model2 = load_bert_onnx_model()
-    model_desc2 = bert_model_description()
-    optim_config2 = optim.LambConfig()
-    trainer2 = orttrainer.ORTTrainer(model2, model_desc2, optim_config2, options=opts)
-    trainer2.load_state_dict(state_dict)
-    # Evaluate once to get a base loss
-    ckpt_loss = trainer2.eval_step(*sample_input)
-
-    # Must match as both trainers have the same dict state
-    assert_allclose(loss.cpu(), ckpt_loss.cpu())
-    loaded_state_dict = trainer2.state_dict()
-    _test_commons.assert_all_states_close_ort(state_dict, loaded_state_dict)
-
-
-@pytest.mark.parametrize(
-    "optimizer, mixedprecision_enabled",
-    [
-        (optim.LambConfig(), False),
-        (optim.AdamConfig(), False),
-        (optim.LambConfig(), True),
-        (optim.AdamConfig(), True),
-    ],
-)
-def testToyBertLoadOptimState(optimizer, mixedprecision_enabled):
-    # Common setup
-    device = "cuda"
-    seed = 1
-    torch.manual_seed(seed)
-    onnxruntime.set_seed(seed)
-    optim_config = optimizer
-    opts = orttrainer.ORTTrainerOptions(
-        {
-            "debug": {"deterministic_compute": True},
-            "device": {"id": device},
-            "mixed_precision": {
-                "enabled": mixedprecision_enabled,
-            },
-            "distributed": {"allreduce_post_accumulation": True},
-        }
-    )
-
-    # Create ORTTrainer and save initial state in a dict
-    model = load_bert_onnx_model()
-    model_desc = bert_model_description()
-    dummy_init_state = _test_commons.generate_dummy_optim_state(model, optimizer)
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, options=opts)
-    trainer.load_state_dict(dummy_init_state)
-
-    # Expected values
-    input_ids = torch.tensor(
-        [
-            [26598],
-            [21379],
-            [19922],
-            [5219],
-            [5644],
-            [20559],
-            [23777],
-            [25672],
-            [22969],
-            [16824],
-            [16822],
-            [635],
-            [27399],
-            [20647],
-            [18519],
-            [15546],
-        ],
-        device=device,
-    )
-    segment_ids = torch.tensor(
-        [[0], [1], [0], [1], [0], [0], [1], [0], [0], [1], [1], [0], [0], [1], [1], [1]], device=device
-    )
-    input_mask = torch.tensor(
-        [[0], [0], [0], [0], [1], [1], [1], [0], [1], [1], [0], [0], [0], [1], [0], [0]], device=device
-    )
-    masked_lm_labels = torch.tensor(
-        [
-            [25496],
-            [16184],
-            [11005],
-            [16228],
-            [14884],
-            [21660],
-            [8678],
-            [23083],
-            [4027],
-            [8397],
-            [11921],
-            [1333],
-            [26482],
-            [1666],
-            [17925],
-            [27978],
-        ],
-        device=device,
-    )
-    next_sentence_labels = torch.tensor([0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0], device=device)
-
-    # Actual values
-    _ = trainer.eval_step(input_ids, segment_ids, input_mask, masked_lm_labels, next_sentence_labels)
-
-    actual_state_dict = trainer.state_dict()
-    del actual_state_dict["model"]
-    _test_commons.assert_all_states_close_ort(actual_state_dict, dummy_init_state)
-
-
-@pytest.mark.parametrize(
-    "model_params",
-    [
-        (["bert.embeddings.LayerNorm.bias"]),
-        (
-            [
-                "bert.embeddings.LayerNorm.bias",
-                "bert.embeddings.LayerNorm.weight",
-                "bert.encoder.layer.0.attention.output.LayerNorm.bias",
-            ]
-        ),
-    ],
-)
-def testORTTrainerFrozenWeights(model_params):
-    device = "cuda"
-    total_steps = 10
-    seed = 1
-
-    # EXPERIMENTAL API
-    model_desc = bert_model_description()
-    model = load_bert_onnx_model()
-
-    optim_config = optim.LambConfig()
-    # Setup ORTTrainer WITHOUT frozen weights
-    opts_dict = {
-        "debug": {"deterministic_compute": True},
-        "device": {
-            "id": device,
-        },
-    }
-    opts = orttrainer.ORTTrainerOptions(opts_dict)
-
-    torch.manual_seed(seed)
-    onnxruntime.set_seed(seed)
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, options=opts)
-
-    for i in range(total_steps):
-        sample_input = generate_random_input_from_model_desc(model_desc, i)
-        trainer.train_step(*sample_input)
-
-    # All model_params must be in the session state
-    assert trainer._onnx_model is not None
-    session_state = trainer._training_session.get_state()
-    assert all([param in session_state for param in model_params])
-
-    # Setup ORTTrainer WITH frozen weights
-    opts_dict.update({"utils": {"frozen_weights": model_params}})
-    opts = orttrainer.ORTTrainerOptions(opts_dict)
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, options=opts)
-
-    for i in range(total_steps):
-        sample_input = generate_random_input_from_model_desc(model_desc, i)
-        trainer.train_step(*sample_input)
-
-    # All model_params CANNOT be in the session state
-    assert trainer._onnx_model is not None
-    session_state = trainer._training_session.get_state()
-    assert not any([param in session_state for param in model_params])
-
-
-def testToyBERTSaveAsONNX():
-    device = "cuda"
-    onnx_file_name = "_____temp_toy_bert_onnx_model.onnx"
-    if os.path.exists(onnx_file_name):
-        os.remove(onnx_file_name)
-    assert not os.path.exists(onnx_file_name)
-
-    # Load trainer
-    model_desc = bert_model_description()
-    model = load_bert_onnx_model()
-
-    optim_config = optim.LambConfig()
-    opts = orttrainer.ORTTrainerOptions(
-        {
-            "debug": {"deterministic_compute": True},
-            "device": {
-                "id": device,
-            },
-        }
-    )
-
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, options=opts)
-
-    trainer.save_as_onnx(onnx_file_name)
-    assert os.path.exists(onnx_file_name)
-
-    with open(onnx_file_name, "rb") as f:
-        bin_str = f.read()
-        reload_onnx_model = onnx.load_model_from_string(bin_str)
-    os.remove(onnx_file_name)
-
-    # Create a new trainer from persisted ONNX model and compare with original ONNX model
-    trainer_from_onnx = orttrainer.ORTTrainer(reload_onnx_model, model_desc, optim_config, options=opts)
-    assert trainer_from_onnx._onnx_model is not None
-    assert id(trainer_from_onnx._onnx_model) != id(trainer._onnx_model)
-    for initializer, loaded_initializer in zip(
-        trainer._onnx_model.graph.initializer, trainer_from_onnx._onnx_model.graph.initializer
-    ):
-        assert initializer.name == loaded_initializer.name
-    assert onnx.helper.printable_graph(trainer_from_onnx._onnx_model.graph) == onnx.helper.printable_graph(
-        trainer._onnx_model.graph
-    )
-    _test_helpers.assert_onnx_weights(trainer, trainer_from_onnx)
-
-
-###############################################################################
-# Temporary tests comparing Legacy vs Experimental ORTTrainer APIs ############
-###############################################################################
-@pytest.mark.parametrize(
-    "optimizer_config",
-    [
-        (optim.AdamConfig),
-        #    (optim.LambConfig), # TODO: re-enable after nondeterminism on backend is fixed
-        (optim.SGDConfig),
-    ],
-)
-def testToyBERTModelLegacyExperimentalBasicTraining(optimizer_config):
-    # Common setup
-    train_steps = 512
-
-    device = "cuda"
-    seed = 1
-    torch.manual_seed(seed)
-    onnxruntime.set_seed(seed)
-
-    # EXPERIMENTAL API
-    model_desc = bert_model_description()
-    model = load_bert_onnx_model()
-    opts = orttrainer.ORTTrainerOptions(
-        {
-            "debug": {"deterministic_compute": True},
-            "device": {
-                "id": device,
-            },
-        }
-    )
-    optim_config = optimizer_config(lr=0.01)
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, options=opts)
-    experimental_losses = []
-    for i in range(train_steps):
-        sample_input = generate_random_input_from_model_desc(model_desc, i)
-        experimental_losses.append(trainer.train_step(*sample_input).cpu().item())
-
-    # LEGACY IMPLEMENTATION
-    torch.manual_seed(seed)
-    onnxruntime.set_seed(seed)
-
-    if optimizer_config == optim.AdamConfig:
-        legacy_optimizer = "AdamOptimizer"
-    elif optimizer_config == optim.LambConfig:
-        legacy_optimizer = "LambOptimizer"
-    elif optimizer_config == optim.SGDConfig:
-        legacy_optimizer = "SGDOptimizer"
-    else:
-        raise RuntimeError("Invalid optimizer_config")
-
-    device = torch.device(device)
-    model = load_bert_onnx_model()
-    legacy_model_desc, learning_rate_description, learning_rate = legacy_model_params(lr=optim_config.lr)
-    legacy_trainer = Legacy_ORTTrainer(
-        model,
-        None,
-        legacy_model_desc,
-        legacy_optimizer,
-        None,
-        learning_rate_description,
-        device,
-        _use_deterministic_compute=True,
-    )
-    legacy_losses = []
-    for i in range(train_steps):
-        sample_input = generate_random_input_from_model_desc(model_desc, i)
-        leg_loss = legacy_trainer.train_step(*sample_input, learning_rate)
-        legacy_losses.append(leg_loss.cpu().item())
-
-    # Check results
-    _test_helpers.assert_model_outputs(experimental_losses, legacy_losses, True)
-
-
-@pytest.mark.parametrize(
-    "initial_lr, lr_scheduler, legacy_lr_scheduler",
-    [
-        (1.0, optim.lr_scheduler.ConstantWarmupLRScheduler, _test_commons.legacy_constant_lr_scheduler),
-        (0.5, optim.lr_scheduler.ConstantWarmupLRScheduler, _test_commons.legacy_constant_lr_scheduler),
-        (1.0, optim.lr_scheduler.CosineWarmupLRScheduler, _test_commons.legacy_cosine_lr_scheduler),
-        (1.0, optim.lr_scheduler.LinearWarmupLRScheduler, _test_commons.legacy_linear_lr_scheduler),
-        (1.0, optim.lr_scheduler.PolyWarmupLRScheduler, _test_commons.legacy_poly_lr_scheduler),
-    ],
-)
-def testToyBERTModelLegacyExperimentalLRScheduler(initial_lr, lr_scheduler, legacy_lr_scheduler):
-    ############################################################################
-    # These tests require hard-coded values for 'total_steps' and 'initial_lr' #
-    ############################################################################
-
-    # Common setup
-    total_steps = 128
-    device = "cuda"
-    seed = 1
-    warmup = 0.05
-    cycles = 0.5
-    power = 1.0
-    lr_end = 1e-7
-
-    # Setup both Experimental and Legacy LR Schedulers before the experimental loop
-    if (
-        legacy_lr_scheduler == _test_commons.legacy_constant_lr_scheduler
-        or legacy_lr_scheduler == _test_commons.legacy_linear_lr_scheduler
-    ):
-        legacy_lr_scheduler = partial(
-            legacy_lr_scheduler, initial_lr=initial_lr, total_steps=total_steps, warmup=warmup
-        )
-    elif legacy_lr_scheduler == _test_commons.legacy_cosine_lr_scheduler:
-        legacy_lr_scheduler = partial(
-            legacy_lr_scheduler, initial_lr=initial_lr, total_steps=total_steps, warmup=warmup, cycles=cycles
-        )
-    elif legacy_lr_scheduler == _test_commons.legacy_poly_lr_scheduler:
-        legacy_lr_scheduler = partial(
-            legacy_lr_scheduler,
-            initial_lr=initial_lr,
-            total_steps=total_steps,
-            warmup=warmup,
-            power=power,
-            lr_end=lr_end,
-        )
-    else:
-        raise RuntimeError("Invalid legacy_lr_scheduler")
-    if (
-        lr_scheduler == optim.lr_scheduler.ConstantWarmupLRScheduler
-        or lr_scheduler == optim.lr_scheduler.LinearWarmupLRScheduler
-    ):
-        lr_scheduler = lr_scheduler(total_steps=total_steps, warmup=warmup)
-    elif lr_scheduler == optim.lr_scheduler.CosineWarmupLRScheduler:
-        lr_scheduler = lr_scheduler(total_steps=total_steps, warmup=warmup, cycles=cycles)
-    elif lr_scheduler == optim.lr_scheduler.PolyWarmupLRScheduler:
-        lr_scheduler = lr_scheduler(total_steps=total_steps, warmup=warmup, power=power, lr_end=lr_end)
-    else:
-        raise RuntimeError("Invalid lr_scheduler")
-
-    # EXPERIMENTAL API
-    model_desc = bert_model_description()
-    model = load_bert_onnx_model()
-    torch.manual_seed(seed)
-    onnxruntime.set_seed(seed)
-    optim_config = optim.AdamConfig(lr=initial_lr)
-    opts = orttrainer.ORTTrainerOptions(
-        {
-            "debug": {"deterministic_compute": True},
-            "device": {
-                "id": device,
-            },
-            "lr_scheduler": lr_scheduler,
-        }
-    )
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, options=opts)
-    experimental_losses = []
-    for i in range(total_steps):
-        sample_input = generate_random_input_from_model_desc(model_desc, i)
-        experimental_losses.append(trainer.train_step(*sample_input).cpu().item())
-        assert_allclose(trainer.options.lr_scheduler.get_last_lr()[0], legacy_lr_scheduler(i))
-
-    # LEGACY IMPLEMENTATION
-    torch.manual_seed(seed)
-    onnxruntime.set_seed(seed)
-    device = torch.device(device)
-    model = load_bert_onnx_model()
-    legacy_model_desc, learning_rate_description, learning_rate = legacy_model_params(initial_lr)
-    legacy_trainer = Legacy_ORTTrainer(
-        model,
-        None,
-        legacy_model_desc,
-        "AdamOptimizer",
-        None,
-        learning_rate_description,
-        device,
-        _use_deterministic_compute=True,
-        get_lr_this_step=legacy_lr_scheduler,
-    )
-    legacy_losses = []
-    for i in range(total_steps):
-        sample_input = generate_random_input_from_model_desc(model_desc, i)
-        leg_loss = legacy_trainer.train_step(*sample_input)
-        legacy_losses.append(leg_loss.cpu().item())
-
-    # Check results
-    _test_helpers.assert_model_outputs(experimental_losses, legacy_losses)
-
-
-@pytest.mark.parametrize(
-    "loss_scaler, legacy_loss_scaler",
-    [
-        (None, Legacy_LossScaler("ort_test_input_loss_scaler", True)),
-        (amp.DynamicLossScaler(), Legacy_LossScaler("ort_test_input_loss_scaler", True)),
-        (CustomLossScaler(), LegacyCustomLossScaler()),
-    ],
-)
-def testToyBERTModelMixedPrecisionLossScalerLegacyExperimental(loss_scaler, legacy_loss_scaler):
-    # Common setup
-    total_steps = 128
-    device = "cuda"
-    seed = 1
-
-    # EXPERIMENTAL IMPLEMENTATION
-    torch.manual_seed(seed)
-    onnxruntime.set_seed(seed)
-    model_desc = bert_model_description()
-    model = load_bert_onnx_model()
-    optim_config = optim.AdamConfig(lr=0.001)
-    opts = orttrainer.ORTTrainerOptions(
-        {
-            "debug": {"deterministic_compute": True},
-            "device": {
-                "id": device,
-            },
-            "mixed_precision": {"enabled": True, "loss_scaler": loss_scaler},
-        }
-    )
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, options=opts)
-    experimental_losses = []
-    for i in range(total_steps):
-        sample_input = generate_random_input_from_model_desc(model_desc, i)
-        experimental_losses.append(trainer.train_step(*sample_input).cpu().item())
-
-    # LEGACY IMPLEMENTATION
-    torch.manual_seed(seed)
-    onnxruntime.set_seed(seed)
-    device = torch.device(device)
-    model = load_bert_onnx_model()
-    legacy_model_desc, learning_rate_description, learning_rate = legacy_model_params(optim_config.lr)
-    legacy_trainer = Legacy_ORTTrainer(
-        model,
-        None,
-        legacy_model_desc,
-        "AdamOptimizer",
-        None,
-        learning_rate_description,
-        device,
-        _use_deterministic_compute=True,
-        use_mixed_precision=True,
-        loss_scaler=legacy_loss_scaler,
-    )
-    legacy_losses = []
-    for i in range(total_steps):
-        sample_input = generate_random_input_from_model_desc(model_desc, i)
-        leg_loss = legacy_trainer.train_step(*sample_input, learning_rate)
-        legacy_losses.append(leg_loss.cpu().item())
-
-    # Check results
-    _test_helpers.assert_model_outputs(experimental_losses, legacy_losses)
-
-
-@pytest.mark.parametrize("gradient_accumulation_steps", [(1), (4), (7)])
-def testToyBERTModelGradientAccumulationLegacyExperimental(gradient_accumulation_steps):
-    # Common setup
-    total_steps = 128
-    device = "cuda"
-    seed = 1
-
-    # EXPERIMENTAL IMPLEMENTATION
-    torch.manual_seed(seed)
-    onnxruntime.set_seed(seed)
-    model_desc = bert_model_description()
-    model = load_bert_onnx_model()
-    optim_config = optim.AdamConfig()
-    opts = orttrainer.ORTTrainerOptions(
-        {
-            "debug": {"deterministic_compute": True},
-            "device": {
-                "id": device,
-            },
-            "batch": {"gradient_accumulation_steps": gradient_accumulation_steps},
-        }
-    )
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, options=opts)
-    experimental_losses = []
-    for i in range(total_steps):
-        sample_input = generate_random_input_from_model_desc(model_desc, i)
-        loss = trainer.train_step(*sample_input)
-        experimental_losses.append(loss.cpu().item())
-
-    # LEGACY IMPLEMENTATION
-    torch.manual_seed(seed)
-    onnxruntime.set_seed(seed)
-    device = torch.device(device)
-    model = load_bert_onnx_model()
-    legacy_model_desc, learning_rate_description, learning_rate = legacy_model_params(optim_config.lr)
-    legacy_trainer = Legacy_ORTTrainer(
-        model,
-        None,
-        legacy_model_desc,
-        "AdamOptimizer",
-        None,
-        learning_rate_description,
-        device,
-        _use_deterministic_compute=True,
-        gradient_accumulation_steps=gradient_accumulation_steps,
-    )
-    legacy_losses = []
-    for i in range(total_steps):
-        sample_input = generate_random_input_from_model_desc(model_desc, i)
-        leg_loss = legacy_trainer.train_step(*sample_input, learning_rate)
-        legacy_losses.append(leg_loss.cpu().item())
-
-    # Check results
-    _test_helpers.assert_model_outputs(experimental_losses, legacy_losses)
-
-
-@pytest.mark.parametrize(
-    "params, legacy_optim_map",
-    [
-        # Change the hyper parameters for all parameters
-        ([], legacy_optim_params_a),
-        # Change the hyperparameters for a subset of hardcoded parameters
-        (
-            [
-                {
-                    "params": ["bert.embeddings.LayerNorm.bias", "bert.embeddings.LayerNorm.weight"],
-                    "alpha": 0.9,
-                    "beta": 0.999,
-                    "lambda_coef": 0.0,
-                    "epsilon": 1e-6,
-                    "do_bias_correction": False,
-                }
-            ],
-            legacy_optim_params_b,
-        ),
-        # Change the hyperparameters for a generated set of paramers
-        (optimizer_parameters(load_bert_onnx_model()), legacy_optim_params_c),
-    ],
-)
-def testToyBERTModelLegacyExperimentalCustomOptimParameters(params, legacy_optim_map):
-    # Common setup
-    total_steps = 128
-    device = "cuda"
-    seed = 1
-
-    # EXPERIMENTAL API
-    torch.manual_seed(seed)
-    onnxruntime.set_seed(seed)
-    model_desc = bert_model_description()
-    model = load_bert_onnx_model()
-
-    optim_config = optim.AdamConfig(
-        params, alpha=0.9, beta=0.999, lambda_coef=0.01, epsilon=1e-6, do_bias_correction=False
-    )
-    opts = orttrainer.ORTTrainerOptions(
-        {
-            "debug": {"deterministic_compute": True},
-            "device": {
-                "id": device,
-            },
-        }
-    )
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, options=opts)
-
-    experimental_losses = []
-    for i in range(total_steps):
-        sample_input = generate_random_input_from_model_desc(model_desc, i)
-        experimental_losses.append(trainer.train_step(*sample_input).cpu().item())
-
-    # LEGACY IMPLEMENTATION
-    torch.manual_seed(seed)
-    onnxruntime.set_seed(seed)
-    device = torch.device(device)
-    model = load_bert_onnx_model()
-    legacy_model_desc, learning_rate_description, learning_rate = legacy_model_params(trainer.optim_config.lr)
-
-    legacy_trainer = Legacy_ORTTrainer(
-        model,
-        None,
-        legacy_model_desc,
-        "AdamOptimizer",
-        legacy_optim_map,
-        learning_rate_description,
-        device,
-        _use_deterministic_compute=True,
-    )
-    legacy_losses = []
-    for i in range(total_steps):
-        sample_input = generate_random_input_from_model_desc(model_desc, i)
-        legacy_sample_input = [*sample_input, learning_rate]
-        legacy_losses.append(legacy_trainer.train_step(legacy_sample_input).cpu().item())
-
-    # Check results
-    _test_helpers.assert_model_outputs(experimental_losses, legacy_losses)
diff --git a/orttraining/orttraining/test/python/orttraining_test_orttrainer_checkpoint_functions.py b/orttraining/orttraining/test/python/orttraining_test_orttrainer_checkpoint_functions.py
deleted file mode 100644
index d366f2cb26557..0000000000000
--- a/orttraining/orttraining/test/python/orttraining_test_orttrainer_checkpoint_functions.py
+++ /dev/null
@@ -1,722 +0,0 @@
-from unittest.mock import Mock, patch
-
-import numpy as np
-import onnx
-import pytest
-import torch
-from _test_commons import _load_pytorch_transformer_model
-
-from onnxruntime.training import _checkpoint_storage, amp, checkpoint, optim, orttrainer  # noqa: F401
-
-# Helper functions
-
-
-def _create_trainer(zero_enabled=False):
-    """Cerates a simple ORTTrainer for ORTTrainer functional tests"""
-
-    device = "cuda"
-    optim_config = optim.LambConfig(lr=0.1)
-    opts = {"device": {"id": device}, "debug": {"deterministic_compute": True}}
-    if zero_enabled:
-        opts["distributed"] = {
-            "world_rank": 0,
-            "world_size": 1,
-            "horizontal_parallel_size": 1,
-            "data_parallel_size": 1,
-            "allreduce_post_accumulation": True,
-            "deepspeed_zero_optimization": {"stage": 1},
-        }
-    model, model_desc, loss_fn, batcher_fn, train_data, _, _ = _load_pytorch_transformer_model(device)
-    trainer = orttrainer.ORTTrainer(
-        model, model_desc, optim_config, loss_fn=loss_fn, options=orttrainer.ORTTrainerOptions(opts)
-    )
-
-    return trainer
-
-
-class _training_session_mock:  # noqa: N801
-    """Mock object for the ORTTrainer _training_session member"""
-
-    def __init__(self, model_states, optimizer_states, partition_info):
-        self.model_states = model_states
-        self.optimizer_states = optimizer_states
-        self.partition_info = partition_info
-
-    def get_model_state(self, include_mixed_precision_weights=False):
-        return self.model_states
-
-    def get_optimizer_state(self):
-        return self.optimizer_states
-
-    def get_partition_info_map(self):
-        return self.partition_info
-
-
-def _get_load_state_dict_strict_error_arguments():
-    """Return a list of tuples that can be used as parameters for test_load_state_dict_errors_when_model_key_missing
-
-    Construct a list of tuples (training_session_state_dict, input_state_dict, error_arguments)
-    The load_state_dict function will compare the two state dicts (training_session_state_dict, input_state_dict) and
-    throw a runtime error with the missing/unexpected keys. The error arguments capture these missing/unexpected keys.
-    """
-
-    training_session_state_dict = {
-        "model": {"full_precision": {"a": np.arange(5), "b": np.arange(7)}},
-        "optimizer": {
-            "a": {"Moment_1": np.arange(5), "Moment_2": np.arange(7)},
-            "shared_optimizer_state": {"step": np.arange(5)},
-        },
-    }
-
-    # input state dictionaries
-    precision_key_missing = {"model": {}, "optimizer": {}}
-    precision_key_unexpected = {"model": {"full_precision": {}, "mixed_precision": {}}, "optimizer": {}}
-    model_state_key_missing = {"model": {"full_precision": {}}, "optimizer": {}}
-    model_state_key_unexpected = {"model": {"full_precision": {"a": 2, "b": 3, "c": 4}}, "optimizer": {}}
-    optimizer_model_state_key_missing = {"model": {"full_precision": {"a": 2, "b": 3}}, "optimizer": {}}
-    optimizer_model_state_key_unexpected = {
-        "model": {"full_precision": {"a": 2, "b": 3}},
-        "optimizer": {"a": {}, "shared_optimizer_state": {}, "b": {}},
-    }
-    optimizer_state_key_missing = {
-        "model": {"full_precision": {"a": 2, "b": 3}},
-        "optimizer": {"a": {}, "shared_optimizer_state": {"step": np.arange(5)}},
-    }
-    optimizer_state_key_unexpected = {
-        "model": {"full_precision": {"a": 2, "b": 3}},
-        "optimizer": {
-            "a": {"Moment_1": np.arange(5), "Moment_2": np.arange(7)},
-            "shared_optimizer_state": {"step": np.arange(5), "another_step": np.arange(1)},
-        },
-    }
-
-    input_arguments = [
-        (training_session_state_dict, precision_key_missing, ["full_precision"]),
-        (training_session_state_dict, precision_key_unexpected, ["mixed_precision"]),
-        (training_session_state_dict, model_state_key_missing, ["a", "b"]),
-        (training_session_state_dict, model_state_key_unexpected, ["c"]),
-        (training_session_state_dict, optimizer_model_state_key_missing, ["a", "shared_optimizer_state"]),
-        (training_session_state_dict, optimizer_model_state_key_unexpected, ["b"]),
-        (training_session_state_dict, optimizer_state_key_missing, ["Moment_1", "Moment_2"]),
-        (training_session_state_dict, optimizer_state_key_unexpected, ["another_step"]),
-    ]
-
-    return input_arguments
-
-
-# Tests
-
-
-def test_empty_state_dict_when_training_session_uninitialized():
-    trainer = _create_trainer()
-    with pytest.warns(UserWarning) as user_warning:
-        state_dict = trainer.state_dict()
-
-    assert len(state_dict.keys()) == 0
-    assert (
-        user_warning[0].message.args[0] == "ONNX Runtime training session is not initialized yet. "
-        "Please run train_step or eval_step at least once before calling ORTTrainer.state_dict()."
-    )
-
-
-@patch("onnx.ModelProto")
-def test_training_session_provides_empty_model_states(onnx_model_mock):
-    trainer = _create_trainer()
-    training_session_mock = _training_session_mock({}, {}, {})
-    trainer._training_session = training_session_mock
-    trainer._onnx_model = onnx_model_mock()
-
-    state_dict = trainer.state_dict()
-    assert len(state_dict["model"].keys()) == 0
-
-
-@patch("onnx.ModelProto")
-def test_training_session_provides_model_states(onnx_model_mock):
-    trainer = _create_trainer()
-    model_states = {"full_precision": {"a": np.arange(5), "b": np.arange(7)}}
-    training_session_mock = _training_session_mock(model_states, {}, {})
-    trainer._training_session = training_session_mock
-    trainer._onnx_model = onnx_model_mock()
-
-    state_dict = trainer.state_dict()
-    assert (state_dict["model"]["full_precision"]["a"] == np.arange(5)).all()
-    assert (state_dict["model"]["full_precision"]["b"] == np.arange(7)).all()
-
-
-@patch("onnx.ModelProto")
-def test_training_session_provides_model_states_pytorch_format(onnx_model_mock):
-    trainer = _create_trainer()
-    model_states = {"full_precision": {"a": np.arange(5), "b": np.arange(7)}}
-    training_session_mock = _training_session_mock(model_states, {}, {})
-    trainer._training_session = training_session_mock
-    trainer._onnx_model = onnx_model_mock()
-
-    state_dict = trainer.state_dict(pytorch_format=True)
-    assert torch.all(torch.eq(state_dict["a"], torch.tensor(np.arange(5))))
-    assert torch.all(torch.eq(state_dict["b"], torch.tensor(np.arange(7))))
-
-
-@patch("onnx.ModelProto")
-def test_onnx_graph_provides_frozen_model_states(onnx_model_mock):
-    trainer = _create_trainer()
-    model_states = {"full_precision": {"a": np.arange(5), "b": np.arange(7)}}
-    training_session_mock = _training_session_mock(model_states, {}, {})
-    trainer._training_session = training_session_mock
-    trainer._onnx_model = onnx_model_mock()
-    trainer.options.utils.frozen_weights = ["a_frozen_weight", "a_float16_weight"]
-    trainer._onnx_model.graph.initializer = [
-        onnx.numpy_helper.from_array(np.array([1, 2, 3], dtype=np.float32), "a_frozen_weight"),
-        onnx.numpy_helper.from_array(np.array([4, 5, 6], dtype=np.float32), "a_non_fronzen_weight"),
-        onnx.numpy_helper.from_array(np.array([7, 8, 9], dtype=np.float16), "a_float16_weight"),
-    ]
-
-    state_dict = trainer.state_dict()
-    assert (state_dict["model"]["full_precision"]["a"] == np.arange(5)).all()
-    assert (state_dict["model"]["full_precision"]["b"] == np.arange(7)).all()
-    assert (state_dict["model"]["full_precision"]["a_frozen_weight"] == np.array([1, 2, 3], dtype=np.float32)).all()
-    assert "a_non_fronzen_weight" not in state_dict["model"]["full_precision"]
-    assert (state_dict["model"]["full_precision"]["a_float16_weight"] == np.array([7, 8, 9], dtype=np.float32)).all()
-
-
-@patch("onnx.ModelProto")
-def test_training_session_provides_empty_optimizer_states(onnx_model_mock):
-    trainer = _create_trainer()
-    training_session_mock = _training_session_mock({}, {}, {})
-    trainer._training_session = training_session_mock
-    trainer._onnx_model = onnx_model_mock()
-
-    state_dict = trainer.state_dict()
-    assert len(state_dict["optimizer"].keys()) == 0
-
-
-@patch("onnx.ModelProto")
-def test_training_session_provides_optimizer_states(onnx_model_mock):
-    trainer = _create_trainer()
-    optimizer_states = {
-        "model_weight": {"Moment_1": np.arange(5), "Moment_2": np.arange(7)},
-        "shared_optimizer_state": {"step": np.arange(1)},
-    }
-    training_session_mock = _training_session_mock({}, optimizer_states, {})
-    trainer._training_session = training_session_mock
-    trainer._onnx_model = onnx_model_mock()
-
-    state_dict = trainer.state_dict()
-    assert (state_dict["optimizer"]["model_weight"]["Moment_1"] == np.arange(5)).all()
-    assert (state_dict["optimizer"]["model_weight"]["Moment_2"] == np.arange(7)).all()
-    assert (state_dict["optimizer"]["shared_optimizer_state"]["step"] == np.arange(1)).all()
-
-
-@patch("onnx.ModelProto")
-def test_training_session_provides_optimizer_states_pytorch_format(onnx_model_mock):
-    trainer = _create_trainer()
-    model_states = {"full_precision": {"a": np.arange(5), "b": np.arange(7)}}
-    optimizer_states = {
-        "model_weight": {"Moment_1": np.arange(5), "Moment_2": np.arange(7)},
-        "shared_optimizer_state": {"step": np.arange(1)},
-    }
-    training_session_mock = _training_session_mock(model_states, optimizer_states, {})
-    trainer._training_session = training_session_mock
-    trainer._onnx_model = onnx_model_mock()
-
-    state_dict = trainer.state_dict(pytorch_format=True)
-    assert "optimizer" not in state_dict
-
-
-@patch("onnx.ModelProto")
-def test_training_session_provides_empty_partition_info_map(onnx_model_mock):
-    trainer = _create_trainer(zero_enabled=True)
-    training_session_mock = _training_session_mock({}, {}, {})
-    trainer._training_session = training_session_mock
-    trainer._onnx_model = onnx_model_mock()
-
-    state_dict = trainer.state_dict()
-    assert len(state_dict["partition_info"].keys()) == 0
-
-
-@patch("onnx.ModelProto")
-def test_training_session_provides_partition_info_map(onnx_model_mock):
-    trainer = _create_trainer(zero_enabled=True)
-    partition_info = {"a": {"original_dim": [1, 2, 3]}}
-    training_session_mock = _training_session_mock({}, {}, partition_info)
-    trainer._training_session = training_session_mock
-    trainer._onnx_model = onnx_model_mock()
-
-    state_dict = trainer.state_dict()
-    assert state_dict["partition_info"]["a"]["original_dim"] == [1, 2, 3]
-
-
-@patch("onnx.ModelProto")
-def test_training_session_provides_all_states(onnx_model_mock):
-    trainer = _create_trainer(zero_enabled=True)
-    model_states = {"full_precision": {"a": np.arange(5), "b": np.arange(7)}}
-    optimizer_states = {
-        "model_weight": {"Moment_1": np.arange(5), "Moment_2": np.arange(7)},
-        "shared_optimizer_state": {"step": np.arange(1)},
-    }
-    partition_info = {"a": {"original_dim": [1, 2, 3]}}
-    training_session_mock = _training_session_mock(model_states, optimizer_states, partition_info)
-    trainer._training_session = training_session_mock
-    trainer._onnx_model = onnx_model_mock()
-
-    state_dict = trainer.state_dict()
-    assert (state_dict["model"]["full_precision"]["a"] == np.arange(5)).all()
-    assert (state_dict["model"]["full_precision"]["b"] == np.arange(7)).all()
-    assert (state_dict["optimizer"]["model_weight"]["Moment_1"] == np.arange(5)).all()
-    assert (state_dict["optimizer"]["model_weight"]["Moment_2"] == np.arange(7)).all()
-    assert (state_dict["optimizer"]["shared_optimizer_state"]["step"] == np.arange(1)).all()
-    assert state_dict["partition_info"]["a"]["original_dim"] == [1, 2, 3]
-
-
-def test_load_state_dict_holds_when_training_session_not_initialized():
-    trainer = _create_trainer()
-    state_dict = {
-        "model": {"full_precision": {"a": np.arange(5), "b": np.arange(7)}},
-        "optimizer": {
-            "a": {"Moment_1": np.arange(5), "Moment_2": np.arange(7)},
-            "shared_optimizer_state": {"step": np.arange(5)},
-        },
-    }
-    assert not trainer._load_state_dict
-    state_dict = trainer.load_state_dict(state_dict)
-    assert trainer._load_state_dict
-
-
-@pytest.mark.parametrize(
-    "state_dict, input_state_dict, error_key",
-    [
-        (
-            {"model": {}, "optimizer": {}},
-            {"model": {}, "optimizer": {}, "trainer_options": {"optimizer_name": "LambOptimizer"}},
-            "train_step_info",
-        ),
-        (
-            {"optimizer": {}, "train_step_info": {"optimization_step": 0, "step": 0}},
-            {
-                "optimizer": {},
-                "trainer_options": {"optimizer_name": "LambOptimizer"},
-                "train_step_info": {"optimization_step": 0, "step": 0},
-            },
-            "model",
-        ),
-        (
-            {"model": {}, "train_step_info": {"optimization_step": 0, "step": 0}},
-            {
-                "model": {},
-                "trainer_options": {"optimizer_name": "LambOptimizer"},
-                "train_step_info": {"optimization_step": 0, "step": 0},
-            },
-            "optimizer",
-        ),
-    ],
-)
-def test_load_state_dict_warns_when_model_optimizer_key_missing(state_dict, input_state_dict, error_key):
-    trainer = _create_trainer()
-    trainer._training_session = _training_session_mock({}, {}, {})
-    trainer.state_dict = Mock(return_value=state_dict)
-    trainer._update_onnx_model_initializers = Mock()
-    trainer._init_session = Mock()
-    with patch("onnx.ModelProto") as onnx_model_mock:
-        trainer._onnx_model = onnx_model_mock()
-        trainer._onnx_model.graph.initializer = []
-        with pytest.warns(UserWarning) as user_warning:
-            trainer.load_state_dict(input_state_dict)
-
-    assert user_warning[0].message.args[0] == f"Missing key: {error_key} in state_dict"
-
-
-@pytest.mark.parametrize("state_dict, input_state_dict, error_keys", _get_load_state_dict_strict_error_arguments())
-def test_load_state_dict_errors_when_state_dict_mismatch(state_dict, input_state_dict, error_keys):
-    trainer = _create_trainer()
-    trainer._training_session = _training_session_mock({}, {}, {})
-    trainer.state_dict = Mock(return_value=state_dict)
-    with pytest.raises(RuntimeError) as runtime_error:
-        trainer.load_state_dict(input_state_dict)
-
-    assert any(key in str(runtime_error.value) for key in error_keys)
-
-
-@patch("onnx.ModelProto")
-def test_load_state_dict_loads_the_states_and_inits_training_session(onnx_model_mock):
-    trainer = _create_trainer()
-    training_session_state_dict = {
-        "model": {"full_precision": {"a": np.arange(5), "b": np.arange(7)}},
-        "optimizer": {
-            "a": {"Moment_1": np.arange(5), "Moment_2": np.arange(7)},
-            "shared_optimizer_state": {"step": np.arange(1)},
-        },
-    }
-
-    input_state_dict = {
-        "model": {"full_precision": {"a": np.array([1, 2]), "b": np.array([3, 4])}},
-        "optimizer": {
-            "a": {"Moment_1": np.array([5, 6]), "Moment_2": np.array([7, 8])},
-            "shared_optimizer_state": {"step": np.array([9])},
-        },
-        "trainer_options": {"optimizer_name": "LambOptimizer"},
-    }
-    trainer._training_session = _training_session_mock({}, {}, {})
-    trainer.state_dict = Mock(return_value=training_session_state_dict)
-    trainer._onnx_model = onnx_model_mock()
-    trainer._onnx_model.graph.initializer = [
-        onnx.numpy_helper.from_array(np.arange(20, dtype=np.float32), "a"),
-        onnx.numpy_helper.from_array(np.arange(25, dtype=np.float32), "b"),
-    ]
-    trainer._update_onnx_model_initializers = Mock()
-    trainer._init_session = Mock()
-
-    trainer.load_state_dict(input_state_dict)
-
-    loaded_initializers, _ = trainer._update_onnx_model_initializers.call_args
-    state_dict_to_load, _ = trainer._init_session.call_args
-
-    assert "a" in loaded_initializers[0]
-    assert (loaded_initializers[0]["a"] == np.array([1, 2])).all()
-    assert "b" in loaded_initializers[0]
-    assert (loaded_initializers[0]["b"] == np.array([3, 4])).all()
-
-    assert (state_dict_to_load[0]["a"]["Moment_1"] == np.array([5, 6])).all()
-    assert (state_dict_to_load[0]["a"]["Moment_2"] == np.array([7, 8])).all()
-    assert (state_dict_to_load[0]["shared_optimizer_state"]["step"] == np.array([9])).all()
-
-
-@patch("onnxruntime.training._checkpoint_storage.save")
-def test_save_checkpoint_calls_checkpoint_storage_save(save_mock):
-    trainer = _create_trainer()
-    state_dict = {"model": {}, "optimizer": {}}
-    trainer.state_dict = Mock(return_value=state_dict)
-
-    trainer.save_checkpoint("abc")
-
-    save_args, _ = save_mock.call_args
-    assert "model" in save_args[0]
-    assert not bool(save_args[0]["model"])
-    assert "optimizer" in save_args[0]
-    assert not bool(save_args[0]["optimizer"])
-    assert save_args[1] == "abc"
-
-
-@patch("onnxruntime.training._checkpoint_storage.save")
-def test_save_checkpoint_exclude_optimizer_states(save_mock):
-    trainer = _create_trainer()
-    state_dict = {"model": {}, "optimizer": {}}
-    trainer.state_dict = Mock(return_value=state_dict)
-
-    trainer.save_checkpoint("abc", include_optimizer_states=False)
-
-    save_args, _ = save_mock.call_args
-    assert "model" in save_args[0]
-    assert not bool(save_args[0]["model"])
-    assert "optimizer" not in save_args[0]
-    assert save_args[1] == "abc"
-
-
-@patch("onnxruntime.training._checkpoint_storage.save")
-def test_save_checkpoint_user_dict(save_mock):
-    trainer = _create_trainer()
-    state_dict = {"model": {}, "optimizer": {}}
-    trainer.state_dict = Mock(return_value=state_dict)
-
-    trainer.save_checkpoint("abc", user_dict={"abc": np.arange(4)})
-
-    save_args, _ = save_mock.call_args
-    assert "user_dict" in save_args[0]
-    assert save_args[0]["user_dict"] == _checkpoint_storage.to_serialized_hex({"abc": np.arange(4)})
-
-
-@patch("onnxruntime.training._checkpoint_storage.load")
-@patch("onnxruntime.training.checkpoint.aggregate_checkpoints")
-def test_load_checkpoint(aggregate_checkpoints_mock, load_mock):
-    trainer = _create_trainer()
-    trainer_options = {
-        "mixed_precision": np.bool_(False),
-        "world_rank": np.int64(0),
-        "world_size": np.int64(1),
-        "horizontal_parallel_size": np.int64(1),
-        "data_parallel_size": np.int64(1),
-        "zero_stage": np.int64(0),
-    }
-    state_dict = {
-        "model": {},
-        "optimizer": {},
-        "trainer_options": {
-            "mixed_precision": np.bool_(False),
-            "world_rank": np.int64(0),
-            "world_size": np.int64(1),
-            "horizontal_parallel_size": np.int64(1),
-            "data_parallel_size": np.int64(1),
-            "zero_stage": np.int64(0),
-        },
-    }
-    trainer.load_state_dict = Mock()
-
-    load_mock.side_effect = [trainer_options, state_dict]
-    trainer.load_checkpoint("abc")
-
-    args_list = load_mock.call_args_list
-    load_args, load_kwargs = args_list[0]
-    assert load_args[0] == "abc"
-    assert load_kwargs["key"] == "trainer_options"
-    load_args, load_kwargs = args_list[1]
-    assert load_args[0] == "abc"
-    assert "key" not in load_kwargs
-    assert not aggregate_checkpoints_mock.called
-
-
-@patch("onnxruntime.training._checkpoint_storage.load")
-@patch("onnxruntime.training.checkpoint.aggregate_checkpoints")
-@pytest.mark.parametrize(
-    "trainer_options",
-    [
-        {
-            "mixed_precision": np.bool_(False),
-            "world_rank": np.int64(0),
-            "world_size": np.int64(4),
-            "horizontal_parallel_size": np.int64(1),
-            "data_parallel_size": np.int64(4),
-            "zero_stage": np.int64(1),
-        },
-        {
-            "mixed_precision": np.bool_(True),
-            "world_rank": np.int64(0),
-            "world_size": np.int64(1),
-            "horizontal_parallel_size": np.int64(1),
-            "data_parallel_size": np.int64(1),
-            "zero_stage": np.int64(1),
-        },
-        {
-            "mixed_precision": np.bool_(True),
-            "world_rank": np.int64(0),
-            "world_size": np.int64(1),
-            "horizontal_parallel_size": np.int64(1),
-            "data_parallel_size": np.int64(1),
-            "zero_stage": np.int64(1),
-        },
-    ],
-)
-def test_load_checkpoint_aggregation_required_zero_enabled(aggregate_checkpoints_mock, load_mock, trainer_options):
-    trainer = _create_trainer()
-    trainer.load_state_dict = Mock()
-
-    load_mock.side_effect = [trainer_options]
-    trainer.load_checkpoint("abc")
-
-    args_list = load_mock.call_args_list
-    load_args, load_kwargs = args_list[0]
-    assert load_args[0] == "abc"
-    assert load_kwargs["key"] == "trainer_options"
-    assert aggregate_checkpoints_mock.called
-    call_args, _ = aggregate_checkpoints_mock.call_args
-    assert call_args[0] == tuple(["abc"])
-
-
-@patch("onnxruntime.training._checkpoint_storage.load")
-@patch("onnxruntime.training.checkpoint.aggregate_checkpoints")
-def test_load_checkpoint_user_dict(aggregate_checkpoints_mock, load_mock):
-    trainer = _create_trainer()
-    trainer_options = {
-        "mixed_precision": np.bool_(False),
-        "world_rank": np.int64(0),
-        "world_size": np.int64(1),
-        "horizontal_parallel_size": np.int64(1),
-        "data_parallel_size": np.int64(1),
-        "zero_stage": np.int64(0),
-    }
-    state_dict = {
-        "model": {},
-        "optimizer": {},
-        "trainer_options": {
-            "mixed_precision": np.bool_(False),
-            "world_rank": np.int64(0),
-            "world_size": np.int64(1),
-            "horizontal_parallel_size": np.int64(1),
-            "data_parallel_size": np.int64(1),
-            "zero_stage": np.int64(0),
-        },
-        "user_dict": _checkpoint_storage.to_serialized_hex({"array": torch.tensor(np.arange(5))}),
-    }
-    trainer.load_state_dict = Mock()
-
-    load_mock.side_effect = [trainer_options, state_dict]
-    user_dict = trainer.load_checkpoint("abc")
-
-    assert torch.all(torch.eq(user_dict["array"], torch.tensor(np.arange(5))))
-
-
-@patch("onnxruntime.training._checkpoint_storage.load")
-def test_checkpoint_aggregation(load_mock):
-    trainer_options1 = {
-        "mixed_precision": np.bool_(False),
-        "world_rank": np.int64(0),
-        "world_size": np.int64(2),
-        "horizontal_parallel_size": np.int64(1),
-        "data_parallel_size": np.int64(2),
-        "zero_stage": np.int64(1),
-        "optimizer_name": b"Adam",
-    }
-    trainer_options2 = {
-        "mixed_precision": np.bool_(False),
-        "world_rank": np.int64(1),
-        "world_size": np.int64(2),
-        "horizontal_parallel_size": np.int64(1),
-        "data_parallel_size": np.int64(2),
-        "zero_stage": np.int64(1),
-        "optimizer_name": b"Adam",
-    }
-
-    state_dict1 = {
-        "model": {"full_precision": {"optimizer_sharded": np.array([1, 2, 3]), "non_sharded": np.array([11, 22, 33])}},
-        "optimizer": {
-            "optimizer_sharded": {
-                "Moment_1": np.array([9, 8, 7]),
-                "Moment_2": np.array([99, 88, 77]),
-                "Step": np.array([5]),
-            },
-            "non_sharded": {
-                "Moment_1": np.array([666, 555, 444]),
-                "Moment_2": np.array([6666, 5555, 4444]),
-                "Step": np.array([55]),
-            },
-        },
-        "trainer_options": {
-            "mixed_precision": np.bool_(False),
-            "world_rank": np.int64(0),
-            "world_size": np.int64(1),
-            "horizontal_parallel_size": np.int64(1),
-            "data_parallel_size": np.int64(1),
-            "zero_stage": np.int64(0),
-            "optimizer_name": b"Adam",
-        },
-        "partition_info": {"optimizer_sharded": {"original_dim": np.array([2, 3])}},
-    }
-
-    state_dict2 = {
-        "model": {"full_precision": {"optimizer_sharded": np.array([1, 2, 3]), "non_sharded": np.array([11, 22, 33])}},
-        "optimizer": {
-            "optimizer_sharded": {
-                "Moment_1": np.array([6, 5, 4]),
-                "Moment_2": np.array([66, 55, 44]),
-                "Step": np.array([5]),
-            },
-            "non_sharded": {
-                "Moment_1": np.array([666, 555, 444]),
-                "Moment_2": np.array([6666, 5555, 4444]),
-                "Step": np.array([55]),
-            },
-        },
-        "trainer_options": {
-            "mixed_precision": np.bool_(False),
-            "world_rank": np.int64(1),
-            "world_size": np.int64(1),
-            "horizontal_parallel_size": np.int64(1),
-            "data_parallel_size": np.int64(1),
-            "zero_stage": np.int64(0),
-            "optimizer_name": b"Adam",
-        },
-        "partition_info": {"optimizer_sharded": {"original_dim": np.array([2, 3])}},
-    }
-
-    load_mock.side_effect = [trainer_options1, trainer_options2, trainer_options1, state_dict1, state_dict2]
-    state_dict = checkpoint.aggregate_checkpoints(["abc", "def"], pytorch_format=False)
-
-    assert (state_dict["model"]["full_precision"]["optimizer_sharded"] == np.array([1, 2, 3])).all()
-    assert (state_dict["model"]["full_precision"]["non_sharded"] == np.array([11, 22, 33])).all()
-    assert (state_dict["optimizer"]["optimizer_sharded"]["Moment_1"] == np.array([[9, 8, 7], [6, 5, 4]])).all()
-    assert (state_dict["optimizer"]["optimizer_sharded"]["Moment_2"] == np.array([[99, 88, 77], [66, 55, 44]])).all()
-    assert (state_dict["optimizer"]["optimizer_sharded"]["Step"] == np.array([5])).all()
-    assert (state_dict["optimizer"]["non_sharded"]["Moment_1"] == np.array([666, 555, 444])).all()
-    assert (state_dict["optimizer"]["non_sharded"]["Moment_2"] == np.array([6666, 5555, 4444])).all()
-    assert (state_dict["optimizer"]["non_sharded"]["Step"] == np.array([55])).all()
-
-    assert state_dict["trainer_options"]["mixed_precision"] is False
-    assert state_dict["trainer_options"]["world_rank"] == 0
-    assert state_dict["trainer_options"]["world_size"] == 1
-    assert state_dict["trainer_options"]["horizontal_parallel_size"] == 1
-    assert state_dict["trainer_options"]["data_parallel_size"] == 1
-    assert state_dict["trainer_options"]["zero_stage"] == 0
-    assert state_dict["trainer_options"]["optimizer_name"] == b"Adam"
-
-
-@patch("onnxruntime.training._checkpoint_storage.load")
-def test_checkpoint_aggregation_mixed_precision(load_mock):
-    trainer_options1 = {
-        "mixed_precision": np.bool_(True),
-        "world_rank": np.int64(0),
-        "world_size": np.int64(2),
-        "horizontal_parallel_size": np.int64(1),
-        "data_parallel_size": np.int64(2),
-        "zero_stage": np.int64(1),
-        "optimizer_name": b"Adam",
-    }
-    trainer_options2 = {
-        "mixed_precision": np.bool_(True),
-        "world_rank": np.int64(1),
-        "world_size": np.int64(2),
-        "horizontal_parallel_size": np.int64(1),
-        "data_parallel_size": np.int64(2),
-        "zero_stage": np.int64(1),
-        "optimizer_name": b"Adam",
-    }
-
-    state_dict1 = {
-        "model": {"full_precision": {"sharded": np.array([1, 2, 3]), "non_sharded": np.array([11, 22, 33])}},
-        "optimizer": {
-            "sharded": {"Moment_1": np.array([9, 8, 7]), "Moment_2": np.array([99, 88, 77]), "Step": np.array([5])},
-            "non_sharded": {
-                "Moment_1": np.array([666, 555, 444]),
-                "Moment_2": np.array([6666, 5555, 4444]),
-                "Step": np.array([55]),
-            },
-        },
-        "trainer_options": {
-            "mixed_precision": np.bool_(True),
-            "world_rank": np.int64(0),
-            "world_size": np.int64(1),
-            "horizontal_parallel_size": np.int64(1),
-            "data_parallel_size": np.int64(1),
-            "zero_stage": np.int64(0),
-            "optimizer_name": b"Adam",
-        },
-        "partition_info": {"sharded": {"original_dim": np.array([2, 3])}},
-    }
-
-    state_dict2 = {
-        "model": {"full_precision": {"sharded": np.array([4, 5, 6]), "non_sharded": np.array([11, 22, 33])}},
-        "optimizer": {
-            "sharded": {"Moment_1": np.array([6, 5, 4]), "Moment_2": np.array([66, 55, 44]), "Step": np.array([5])},
-            "non_sharded": {
-                "Moment_1": np.array([666, 555, 444]),
-                "Moment_2": np.array([6666, 5555, 4444]),
-                "Step": np.array([55]),
-            },
-        },
-        "trainer_options": {
-            "mixed_precision": np.bool_(True),
-            "world_rank": np.int64(1),
-            "world_size": np.int64(1),
-            "horizontal_parallel_size": np.int64(1),
-            "data_parallel_size": np.int64(1),
-            "zero_stage": np.int64(0),
-            "optimizer_name": b"Adam",
-        },
-        "partition_info": {"sharded": {"original_dim": np.array([2, 3])}},
-    }
-
-    load_mock.side_effect = [trainer_options1, trainer_options2, trainer_options1, state_dict1, state_dict2]
-    state_dict = checkpoint.aggregate_checkpoints(["abc", "def"], pytorch_format=False)
-
-    assert (state_dict["model"]["full_precision"]["sharded"] == np.array([[1, 2, 3], [4, 5, 6]])).all()
-    assert (state_dict["model"]["full_precision"]["non_sharded"] == np.array([11, 22, 33])).all()
-    assert (state_dict["optimizer"]["sharded"]["Moment_1"] == np.array([[9, 8, 7], [6, 5, 4]])).all()
-    assert (state_dict["optimizer"]["sharded"]["Moment_2"] == np.array([[99, 88, 77], [66, 55, 44]])).all()
-    assert (state_dict["optimizer"]["sharded"]["Step"] == np.array([5])).all()
-    assert (state_dict["optimizer"]["non_sharded"]["Moment_1"] == np.array([666, 555, 444])).all()
-    assert (state_dict["optimizer"]["non_sharded"]["Moment_2"] == np.array([6666, 5555, 4444])).all()
-    assert (state_dict["optimizer"]["non_sharded"]["Step"] == np.array([55])).all()
-
-    assert state_dict["trainer_options"]["mixed_precision"] is True
-    assert state_dict["trainer_options"]["world_rank"] == 0
-    assert state_dict["trainer_options"]["world_size"] == 1
-    assert state_dict["trainer_options"]["horizontal_parallel_size"] == 1
-    assert state_dict["trainer_options"]["data_parallel_size"] == 1
-    assert state_dict["trainer_options"]["zero_stage"] == 0
-    assert state_dict["trainer_options"]["optimizer_name"] == b"Adam"
diff --git a/orttraining/orttraining/test/python/orttraining_test_orttrainer_frontend.py b/orttraining/orttraining/test/python/orttraining_test_orttrainer_frontend.py
deleted file mode 100644
index fa13625f0ddac..0000000000000
--- a/orttraining/orttraining/test/python/orttraining_test_orttrainer_frontend.py
+++ /dev/null
@@ -1,2460 +0,0 @@
-import inspect
-import os
-import tempfile
-from functools import partial
-
-import _test_commons
-import _test_helpers
-import onnx
-import pytest
-import torch
-import torch.nn.functional as F
-from numpy.testing import assert_allclose
-from packaging.version import Version as StrictVersion
-
-from onnxruntime import SessionOptions, set_seed
-from onnxruntime.capi.ort_trainer import LossScaler as Legacy_LossScaler
-from onnxruntime.capi.ort_trainer import ORTTrainer as Legacy_ORTTrainer
-from onnxruntime.training import PropagateCastOpsStrategy, TrainStepInfo, _utils, amp
-from onnxruntime.training import model_desc_validation as md_val
-from onnxruntime.training import optim, orttrainer, orttrainer_options
-
-###############################################################################
-# Testing starts here #########################################################
-###############################################################################
-
-pytorch_110 = StrictVersion(".".join(torch.__version__.split(".")[:2])) >= StrictVersion("1.10.0")
-
-
-def get_model_opset(model_onnx):
-    for op in model_onnx.opset_import:
-        if op.domain == "":
-            return op.version
-    return None
-
-
-@pytest.mark.parametrize(
-    "test_input",
-    [({}), ({"batch": {}, "device": {}, "distributed": {}, "mixed_precision": {}, "utils": {}, "_internal_use": {}})],
-)
-def testORTTrainerOptionsDefaultValues(test_input):
-    """Test different ways of using default values for incomplete input"""
-
-    expected_values = {
-        "batch": {"gradient_accumulation_steps": 1},
-        "device": {"id": "cuda", "mem_limit": 0},
-        "distributed": {
-            "world_rank": 0,
-            "world_size": 1,
-            "local_rank": 0,
-            "data_parallel_size": 1,
-            "horizontal_parallel_size": 1,
-            "pipeline_parallel": {
-                "pipeline_parallel_size": 1,
-                "num_pipeline_micro_batches": 1,
-                "pipeline_cut_info_string": "",
-                "sliced_schema": {},
-                "sliced_axes": {},
-                "sliced_tensor_names": [],
-            },
-            "allreduce_post_accumulation": False,
-            "deepspeed_zero_optimization": {
-                "stage": 0,
-            },
-            "enable_adasum": False,
-        },
-        "lr_scheduler": None,
-        "mixed_precision": {"enabled": False, "loss_scaler": None},
-        "graph_transformer": {
-            "attn_dropout_recompute": False,
-            "gelu_recompute": False,
-            "transformer_layer_recompute": False,
-            "number_recompute_layers": 0,
-            "propagate_cast_ops_config": {"strategy": PropagateCastOpsStrategy.FLOOD_FILL, "level": 1, "allow": []},
-        },
-        "utils": {
-            "frozen_weights": [],
-            "grad_norm_clip": True,
-            "memory_efficient_gradient": False,
-            "run_symbolic_shape_infer": False,
-        },
-        "debug": {
-            "deterministic_compute": False,
-            "check_model_export": False,
-            "graph_save_paths": {
-                "model_after_graph_transforms_path": "",
-                "model_with_gradient_graph_path": "",
-                "model_with_training_graph_path": "",
-                "model_with_training_graph_after_optimization_path": "",
-            },
-        },
-        "_internal_use": {
-            "enable_internal_postprocess": True,
-            "extra_postprocess": None,
-            "onnx_opset_version": 14,
-            "enable_onnx_contrib_ops": True,
-        },
-        "provider_options": {},
-        "session_options": None,
-    }
-
-    actual_values = orttrainer_options.ORTTrainerOptions(test_input)
-    assert actual_values._validated_opts == expected_values
-
-
-@pytest.mark.parametrize(
-    "input,error_msg",
-    [
-        (
-            {"mixed_precision": {"enabled": 1}},
-            "Invalid options: {'mixed_precision': [{'enabled': ['must be of boolean type']}]}",
-        )
-    ],
-)
-def testORTTrainerOptionsInvalidMixedPrecisionEnabledSchema(input, error_msg):
-    """Test an invalid input based on schema validation error message"""
-
-    with pytest.raises(ValueError) as e:
-        orttrainer_options.ORTTrainerOptions(input)
-    assert str(e.value) == error_msg
-
-
-@pytest.mark.parametrize(
-    "input_dict,input_dtype,output_dtype",
-    [
-        (
-            {"inputs": [("in0", [])], "outputs": [("out0", []), ("out1", [])]},
-            (torch.int,),
-            (
-                torch.float,
-                torch.int32,
-            ),
-        ),
-        ({"inputs": [("in0", ["batch", 2, 3])], "outputs": [("out0", [], True)]}, (torch.int8,), (torch.int16,)),
-        (
-            {
-                "inputs": [
-                    ("in0", []),
-                    ("in1", [1]),
-                    ("in2", [1, 2]),
-                    ("in3", [1000, "dyn_ax1"]),
-                    ("in4", ["dyn_ax1", "dyn_ax2", "dyn_ax3"]),
-                ],
-                "outputs": [("out0", [], True), ("out1", [1], False), ("out2", [1, "dyn_ax1", 3])],
-            },
-            (
-                torch.float,
-                torch.uint8,
-                torch.bool,
-                torch.double,
-                torch.half,
-            ),
-            (torch.float, torch.float, torch.int64),
-        ),
-    ],
-)
-def testORTTrainerModelDescValidSchemas(input_dict, input_dtype, output_dtype):
-    r"""Test different ways of using default values for incomplete input"""
-
-    model_description = md_val._ORTTrainerModelDesc(input_dict)
-
-    # Validating hard-coded learning rate description
-    assert model_description.learning_rate.name == md_val.LEARNING_RATE_IO_DESCRIPTION_NAME
-    assert model_description.learning_rate.shape == [1]
-    assert model_description.learning_rate.dtype == torch.float32
-
-    # Validating model description from user
-    for idx, i_desc in enumerate(model_description.inputs):
-        assert isinstance(i_desc, model_description._InputDescription)
-        assert len(i_desc) == 2
-        assert input_dict["inputs"][idx][0] == i_desc.name
-        assert input_dict["inputs"][idx][1] == i_desc.shape
-    for idx, o_desc in enumerate(model_description.outputs):
-        assert isinstance(o_desc, model_description._OutputDescription)
-        assert len(o_desc) == 3
-        assert input_dict["outputs"][idx][0] == o_desc.name
-        assert input_dict["outputs"][idx][1] == o_desc.shape
-        is_loss = input_dict["outputs"][idx][2] if len(input_dict["outputs"][idx]) == 3 else False
-        assert is_loss == o_desc.is_loss
-
-    # Set all_finite name and check its description
-    model_description.all_finite = md_val.ALL_FINITE_IO_DESCRIPTION_NAME
-    assert model_description.all_finite.name == md_val.ALL_FINITE_IO_DESCRIPTION_NAME
-    assert model_description.all_finite.shape == [1]
-    assert model_description.all_finite.dtype == torch.bool
-
-    # Set loss_scale_input and check its description
-    model_description.loss_scale_input = md_val.LOSS_SCALE_INPUT_IO_DESCRIPTION_NAME
-    assert model_description.loss_scale_input.name == md_val.LOSS_SCALE_INPUT_IO_DESCRIPTION_NAME
-    assert model_description.loss_scale_input.shape == []
-    assert model_description.loss_scale_input.dtype == torch.float32
-
-    # Append type to inputs/outputs tuples
-    for idx, i_desc in enumerate(model_description.inputs):  # noqa: B007
-        model_description.add_type_to_input_description(idx, input_dtype[idx])
-    for idx, o_desc in enumerate(model_description.outputs):  # noqa: B007
-        model_description.add_type_to_output_description(idx, output_dtype[idx])
-
-    # Verify inputs/outputs tuples are replaced by the typed counterparts
-    for idx, i_desc in enumerate(model_description.inputs):
-        assert isinstance(i_desc, model_description._InputDescriptionTyped)
-        assert input_dtype[idx] == i_desc.dtype
-    for idx, o_desc in enumerate(model_description.outputs):
-        assert isinstance(o_desc, model_description._OutputDescriptionTyped)
-        assert output_dtype[idx] == o_desc.dtype
-
-
-@pytest.mark.parametrize(
-    "input_dict,error_msg",
-    [
-        (
-            {"inputs": [(True, [])], "outputs": [(True, [])]},
-            "Invalid model_desc: {'inputs': [{0: ['the first element of the tuple (aka name) must be a string']}], "
-            "'outputs': [{0: ['the first element of the tuple (aka name) must be a string']}]}",
-        ),
-        (
-            {"inputs": [("in1", None)], "outputs": [("out1", None)]},
-            "Invalid model_desc: {'inputs': [{0: ['the second element of the tuple (aka shape) must be a list']}], "
-            "'outputs': [{0: ['the second element of the tuple (aka shape) must be a list']}]}",
-        ),
-        (
-            {"inputs": [("in1", [])], "outputs": [("out1", [], None)]},
-            "Invalid model_desc: {'outputs': [{0: ['the third element of the tuple (aka is_loss) must be a boolean']}]}",
-        ),
-        (
-            {"inputs": [("in1", [True])], "outputs": [("out1", [True])]},
-            "Invalid model_desc: {'inputs': [{0: ['each shape must be either a string or integer']}], "
-            "'outputs': [{0: ['each shape must be either a string or integer']}]}",
-        ),
-        (
-            {"inputs": [("in1", [])], "outputs": [("out1", [], True), ("out2", [], True)]},
-            "Invalid model_desc: {'outputs': [{1: ['only one is_loss can bet set to True']}]}",
-        ),
-        (
-            {"inputz": [("in1", [])], "outputs": [("out1", [], True)]},
-            "Invalid model_desc: {'inputs': ['required field'], 'inputz': ['unknown field']}",
-        ),
-        (
-            {"inputs": [("in1", [])], "outputz": [("out1", [], True)]},
-            "Invalid model_desc: {'outputs': ['required field'], 'outputz': ['unknown field']}",
-        ),
-    ],
-)
-def testORTTrainerModelDescInvalidSchemas(input_dict, error_msg):
-    r"""Test different ways of using default values for incomplete input"""
-    with pytest.raises(ValueError) as e:
-        md_val._ORTTrainerModelDesc(input_dict)
-    assert str(e.value) == error_msg
-
-
-def testDynamicLossScaler():
-    rtol = 1e-7
-    default_scaler = amp.loss_scaler.DynamicLossScaler()
-
-    # Initial state
-    train_step_info = orttrainer.TrainStepInfo(optim.LambConfig())
-    assert_allclose(default_scaler.loss_scale, float(1 << 16), rtol=rtol, err_msg="loss scale mismatch")
-    assert default_scaler.up_scale_window == 2000
-    assert_allclose(default_scaler.min_loss_scale, 1.0, rtol=rtol, err_msg="min loss scale mismatch")
-    assert_allclose(default_scaler.max_loss_scale, float(1 << 24), rtol=rtol, err_msg="max loss scale mismatch")
-
-    # Performing 9*2000 updates to cover all branches of LossScaler.update(train_step_info.all_finite=True)
-    loss_scale = float(1 << 16)
-    for cycles in range(1, 10):
-        # 1999 updates without overflow produces 1999 stable steps
-        for i in range(1, 2000):
-            new_loss_scale = default_scaler.update(train_step_info)
-            assert default_scaler._stable_steps_count == i
-            assert_allclose(new_loss_scale, loss_scale, rtol=rtol, err_msg=f"loss scale mismatch at update {i}")
-
-        # 2000th update without overflow doubles the loss and zero stable steps until max_loss_scale is reached
-        new_loss_scale = default_scaler.update(train_step_info)
-        if cycles <= 8:
-            loss_scale *= 2
-        assert default_scaler._stable_steps_count == 0
-        assert_allclose(new_loss_scale, loss_scale, rtol=rtol, err_msg="loss scale mismatch")
-
-    # After 8 cycles, loss scale should be float(1 << 16)*(2**8)
-    assert_allclose(new_loss_scale, float(1 << 16) * (2**8), rtol=rtol, err_msg="loss scale mismatch")
-
-    # After 9 cycles, loss scale reaches max_loss_scale and it is not doubled from that point on
-    loss_scale = float(1 << 16) * (2**8)
-    for count in range(1, 2050):
-        new_loss_scale = default_scaler.update(train_step_info)
-        assert default_scaler._stable_steps_count == (count % 2000)
-        assert_allclose(new_loss_scale, loss_scale, rtol=rtol, err_msg="loss scale mismatch")
-
-    # Setting train_step_info.all_finite = False to test down scaling
-    train_step_info.all_finite = False
-
-    # Performing 24 updates to half the loss scale each time
-    loss_scale = float(1 << 16) * (2**8)
-    for count in range(1, 25):  # noqa: B007
-        new_loss_scale = default_scaler.update(train_step_info)
-        loss_scale /= 2
-        assert default_scaler._stable_steps_count == 0
-        assert_allclose(new_loss_scale, loss_scale, rtol=rtol, err_msg="loss scale mismatch")
-
-    # After 24 updates with gradient overflow, loss scale is 1.0
-    assert_allclose(new_loss_scale, 1.0, rtol=rtol, err_msg="loss scale mismatch")
-
-    # After 25 updates, min_loss_scale is reached and loss scale is not halfed from that point on
-    for count in range(1, 5):  # noqa: B007
-        new_loss_scale = default_scaler.update(train_step_info)
-        assert default_scaler._stable_steps_count == 0
-        assert_allclose(new_loss_scale, loss_scale, rtol=rtol, err_msg="loss scale mismatch")
-
-
-def testDynamicLossScalerCustomValues():
-    rtol = 1e-7
-    scaler = amp.loss_scaler.DynamicLossScaler(
-        automatic_update=False, loss_scale=3, up_scale_window=7, min_loss_scale=5, max_loss_scale=10
-    )
-    assert scaler.automatic_update is False
-    assert_allclose(scaler.loss_scale, 3, rtol=rtol, err_msg="loss scale mismatch")
-    assert_allclose(scaler.min_loss_scale, 5, rtol=rtol, err_msg="min loss scale mismatch")
-    assert_allclose(scaler.max_loss_scale, 10, rtol=rtol, err_msg="max loss scale mismatch")
-    assert scaler.up_scale_window == 7
-
-
-def testTrainStepInfo():
-    """Test valid initializations of TrainStepInfo"""
-
-    optimizer_config = optim.LambConfig()
-    fetches = ["out1", "out2"]
-    step_info = orttrainer.TrainStepInfo(
-        optimizer_config=optimizer_config, all_finite=False, fetches=fetches, optimization_step=123, step=456
-    )
-    assert step_info.optimizer_config == optimizer_config
-    assert step_info.all_finite is False
-    assert step_info.fetches == fetches
-    assert step_info.optimization_step == 123
-    assert step_info.step == 456
-
-    step_info = orttrainer.TrainStepInfo(optimizer_config)
-    assert step_info.optimizer_config == optimizer_config
-    assert step_info.all_finite is True
-    assert step_info.fetches == []
-    assert step_info.optimization_step == 0
-    assert step_info.step == 0
-
-
-@pytest.mark.parametrize(
-    "invalid_input",
-    [
-        (-1),
-        ("Hello"),
-    ],
-)
-def testTrainStepInfoInvalidInput(invalid_input):
-    """Test invalid initialization of TrainStepInfo"""
-    optimizer_config = optim.LambConfig()
-    with pytest.raises(AssertionError):
-        orttrainer.TrainStepInfo(optimizer_config=invalid_input)
-
-    with pytest.raises(AssertionError):
-        orttrainer.TrainStepInfo(optimizer_config, all_finite=invalid_input)
-
-    with pytest.raises(AssertionError):
-        orttrainer.TrainStepInfo(optimizer_config, fetches=invalid_input)
-
-    with pytest.raises(AssertionError):
-        orttrainer.TrainStepInfo(optimizer_config, optimization_step=invalid_input)
-
-    with pytest.raises(AssertionError):
-        orttrainer.TrainStepInfo(optimizer_config, step=invalid_input)
-
-
-@pytest.mark.parametrize(
-    "optim_name,lr,alpha,default_alpha",
-    [
-        ("AdamOptimizer", 0.1, 0.2, None),
-        ("LambOptimizer", 0.2, 0.3, None),
-        ("SGDOptimizer", 0.3, 0.4, None),
-        ("SGDOptimizer", 0.3, 0.4, 0.5),
-    ],
-)
-def testOptimizerConfig(optim_name, lr, alpha, default_alpha):
-    """Test initialization of _OptimizerConfig"""
-    defaults = {"lr": lr, "alpha": alpha}
-    params = [{"params": ["fc1.weight", "fc2.weight"]}]
-    if default_alpha is not None:
-        params[0].update({"alpha": default_alpha})
-    else:
-        params[0].update({"alpha": alpha})
-    cfg = optim.config._OptimizerConfig(name=optim_name, params=params, defaults=defaults)
-
-    assert cfg.name == optim_name
-    rtol = 1e-07
-    assert_allclose(defaults["lr"], cfg.lr, rtol=rtol, err_msg="lr mismatch")
-
-    # 1:1 mapping between defaults and params's hyper parameters
-    for param in params:
-        for k in param:
-            if k != "params":
-                assert k in cfg.defaults, "hyper parameter {k} not present in one of the parameter params"
-    for k in cfg.defaults:
-        for param in cfg.params:
-            assert k in param, "hyper parameter {k} not present in one of the parameter params"
-
-
-@pytest.mark.parametrize(
-    "optim_name,defaults,params",
-    [
-        ("AdamOptimizer", {"lr": -1}, []),  # invalid lr
-        ("FooOptimizer", {"lr": 0.001}, []),  # invalid name
-        ("SGDOptimizer", [], []),  # invalid type(defaults)
-        (optim.AdamConfig, {"lr": 0.003}, []),  # invalid type(name)
-        ("AdamOptimizer", {"lr": None}, []),  # missing 'lr' hyper parameter
-        ("SGDOptimizer", {"lr": 0.004}, {}),  # invalid type(params)
-        # invalid type(params[i])
-        ("AdamOptimizer", {"lr": 0.005, "alpha": 2}, [[]]),
-        # missing 'params' at 'params'
-        ("AdamOptimizer", {"lr": 0.005, "alpha": 2}, [{"alpha": 1}]),
-        # missing 'alpha' at 'defaults'
-        ("AdamOptimizer", {"lr": 0.005}, [{"params": "param1", "alpha": 1}]),
-    ],
-)
-def testOptimizerConfigInvalidInputs(optim_name, defaults, params):
-    """Test invalid initialization of _OptimizerConfig"""
-
-    with pytest.raises(AssertionError):
-        optim.config._OptimizerConfig(name=optim_name, params=params, defaults=defaults)
-
-
-def testOptimizerConfigSGD():
-    """Test initialization of SGD"""
-    cfg = optim.SGDConfig()
-    assert cfg.name == "SGDOptimizer"
-
-    rtol = 1e-07
-    assert_allclose(0.001, cfg.lr, rtol=rtol, err_msg="lr mismatch")
-
-    cfg = optim.SGDConfig(lr=0.002)
-    assert_allclose(0.002, cfg.lr, rtol=rtol, err_msg="lr mismatch")
-
-    # SGD does not support params
-    with pytest.raises(AssertionError) as e:
-        params = [{"params": ["layer1.weight"], "lr": 0.1}]
-        optim.SGDConfig(params=params, lr=0.002)
-        assert_allclose(0.002, cfg.lr, rtol=rtol, err_msg="lr mismatch")
-    assert str(e.value) == "'params' must be an empty list for SGD optimizer"
-
-
-def testOptimizerConfigAdam():
-    """Test initialization of Adam"""
-    cfg = optim.AdamConfig()
-    assert cfg.name == "AdamOptimizer"
-
-    rtol = 1e-7
-    assert_allclose(0.001, cfg.lr, rtol=rtol, err_msg="lr mismatch")
-    assert_allclose(0.9, cfg.alpha, rtol=rtol, err_msg="alpha mismatch")
-    assert_allclose(0.999, cfg.beta, rtol=rtol, err_msg="beta mismatch")
-    assert_allclose(0.0, cfg.lambda_coef, rtol=rtol, err_msg="lambda_coef mismatch")
-    assert_allclose(1e-8, cfg.epsilon, rtol=rtol, err_msg="epsilon mismatch")
-    assert_allclose(1.0, cfg.max_norm_clip, rtol=rtol, err_msg="max_norm_clip mismatch")
-    assert cfg.do_bias_correction is True, "lambda_coef mismatch"
-    assert cfg.weight_decay_mode == optim.AdamConfig.DecayMode.BEFORE_WEIGHT_UPDATE, "weight_decay_mode mismatch"
-
-
-def testOptimizerConfigLamb():
-    """Test initialization of Lamb"""
-    cfg = optim.LambConfig()
-    assert cfg.name == "LambOptimizer"
-    rtol = 1e-7
-    assert_allclose(0.001, cfg.lr, rtol=rtol, err_msg="lr mismatch")
-    assert_allclose(0.9, cfg.alpha, rtol=rtol, err_msg="alpha mismatch")
-    assert_allclose(0.999, cfg.beta, rtol=rtol, err_msg="beta mismatch")
-    assert_allclose(0.0, cfg.lambda_coef, rtol=rtol, err_msg="lambda_coef mismatch")
-    assert cfg.ratio_min == float("-inf"), "ratio_min mismatch"
-    assert cfg.ratio_max == float("inf"), "ratio_max mismatch"
-    assert_allclose(1e-6, cfg.epsilon, rtol=rtol, err_msg="epsilon mismatch")
-    assert_allclose(1.0, cfg.max_norm_clip, rtol=rtol, err_msg="max_norm_clip mismatch")
-    assert cfg.do_bias_correction is False, "do_bias_correction mismatch"
-
-
-@pytest.mark.parametrize("optim_name", [("Adam"), ("Lamb")])
-def testOptimizerConfigParams(optim_name):
-    rtol = 1e-7
-    params = [{"params": ["layer1.weight"], "alpha": 0.1}]
-    if optim_name == "Adam":
-        cfg = optim.AdamConfig(params=params, alpha=0.2)
-    elif optim_name == "Lamb":
-        cfg = optim.LambConfig(params=params, alpha=0.2)
-    else:
-        raise ValueError("invalid input")
-    assert len(cfg.params) == 1, "params should have length 1"
-    assert_allclose(cfg.params[0]["alpha"], 0.1, rtol=rtol, err_msg="invalid lr on params[0]")
-
-
-@pytest.mark.parametrize("optim_name", [("Adam"), ("Lamb")])
-def testOptimizerConfigInvalidParams(optim_name):
-    # lr is not supported within params
-    with pytest.raises(AssertionError) as e:
-        params = [{"params": ["layer1.weight"], "lr": 0.1}]
-        if optim_name == "Adam":
-            optim.AdamConfig(params=params, lr=0.2)
-        elif optim_name == "Lamb":
-            optim.LambConfig(params=params, lr=0.2)
-        else:
-            raise ValueError("invalid input")
-    assert str(e.value) == "'lr' is not supported inside params"
-
-
-def testLinearLRSchedulerCreation():
-    total_steps = 10
-    warmup = 0.05
-
-    lr_scheduler = optim.lr_scheduler.LinearWarmupLRScheduler(total_steps, warmup)
-
-    # Initial state
-    assert lr_scheduler.total_steps == total_steps
-    assert lr_scheduler.warmup == warmup
-
-
-@pytest.mark.parametrize(
-    "lr_scheduler,expected_values",
-    [
-        (optim.lr_scheduler.ConstantWarmupLRScheduler, [0.0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.0, 1.0, 1.0, 1.0]),
-        (
-            optim.lr_scheduler.CosineWarmupLRScheduler,
-            [
-                0.0,
-                0.9763960957919413,
-                0.9059835861602854,
-                0.7956724530494887,
-                0.6563036824392345,
-                0.5015739416158049,
-                0.34668951940611276,
-                0.2068719061737831,
-                0.09586187986225325,
-                0.0245691111902418,
-            ],
-        ),
-        (optim.lr_scheduler.LinearWarmupLRScheduler, [0.0, 0.2, 0.4, 0.6, 0.8, 1.0, 0.8, 0.6, 0.4, 0.2]),
-        (
-            optim.lr_scheduler.PolyWarmupLRScheduler,
-            [
-                0.0,
-                0.9509018036072144,
-                0.9008016032064128,
-                0.8507014028056112,
-                0.8006012024048097,
-                0.750501002004008,
-                0.7004008016032064,
-                0.6503006012024048,
-                0.6002004008016032,
-                0.5501002004008015,
-            ],
-        ),
-    ],
-)
-def testLRSchedulerUpdateImpl(lr_scheduler, expected_values):
-    # Test tolerance
-    rtol = 1e-03
-
-    # Initial state
-    initial_lr = 1
-    total_steps = 10
-    warmup = 0.5
-    optimizer_config = optim.SGDConfig(lr=initial_lr)
-    lr_scheduler = lr_scheduler(total_steps, warmup)
-
-    # First half is warmup
-    for optimization_step in range(total_steps):
-        # Emulate ORTTRainer.train_step() call that updates its train_step_info
-        train_step_info = TrainStepInfo(optimizer_config=optimizer_config, optimization_step=optimization_step)
-
-        lr_scheduler._step(train_step_info)
-        lr_list = lr_scheduler.get_last_lr()
-        assert len(lr_list) == 1
-        assert_allclose(lr_list[0], expected_values[optimization_step], rtol=rtol, err_msg="lr mismatch")
-
-
-def testInstantiateORTTrainerOptions():
-    session_options = SessionOptions()
-    session_options.enable_mem_pattern = False
-    provider_options = {"EP1": {"key": "val"}}
-    opts = {"session_options": session_options, "provider_options": provider_options}
-    opts = orttrainer.ORTTrainerOptions(opts)
-    assert opts.session_options.enable_mem_pattern is False
-    assert opts._validated_opts["provider_options"]["EP1"]["key"] == "val"
-
-
-@pytest.mark.parametrize(
-    "step_fn, lr_scheduler, expected_lr_values, device",
-    [
-        ("train_step", None, None, "cuda"),
-        ("eval_step", None, None, "cpu"),
-        (
-            "train_step",
-            optim.lr_scheduler.ConstantWarmupLRScheduler,
-            [0.0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.0, 1.0, 1.0, 1.0],
-            "cpu",
-        ),
-        (
-            "train_step",
-            optim.lr_scheduler.CosineWarmupLRScheduler,
-            [
-                0.0,
-                0.2,
-                0.4,
-                0.6,
-                0.8,
-                1.0,
-                0.9045084971874737,
-                0.6545084971874737,
-                0.34549150281252633,
-                0.09549150281252633,
-            ],
-            "cuda",
-        ),
-        (
-            "train_step",
-            optim.lr_scheduler.LinearWarmupLRScheduler,
-            [0.0, 0.2, 0.4, 0.6, 0.8, 1.0, 0.8, 0.6, 0.4, 0.2],
-            "cpu",
-        ),
-        (
-            "train_step",
-            optim.lr_scheduler.PolyWarmupLRScheduler,
-            [0.0, 0.2, 0.4, 0.6, 0.8, 1.0, 0.80000002, 0.60000004, 0.40000006000000005, 0.20000007999999997],
-            "cuda",
-        ),
-    ],
-)
-def testInstantiateORTTrainer(step_fn, lr_scheduler, expected_lr_values, device):
-    total_steps = 1
-    initial_lr = 1.0
-    rtol = 1e-3
-
-    # PyTorch Transformer model as example
-    opts = {"device": {"id": device}}
-    if lr_scheduler:
-        total_steps = 10
-        opts.update({"lr_scheduler": lr_scheduler(total_steps=total_steps, warmup=0.5)})
-    opts = orttrainer.ORTTrainerOptions(opts)
-    optim_config = optim.LambConfig(lr=initial_lr)
-    model, model_desc, my_loss, batcher_fn, train_data, val_data, _ = _test_commons._load_pytorch_transformer_model(
-        device
-    )
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, loss_fn=my_loss, options=opts)
-
-    # Run a train or evaluation step
-    if step_fn == "eval_step":
-        data, targets = batcher_fn(val_data, 0)
-    elif step_fn == "train_step":
-        data, targets = batcher_fn(train_data, 0)
-    else:
-        raise ValueError("Invalid step_fn")
-
-    # Export model to ONNX
-    if step_fn == "eval_step":
-        step_fn = trainer.eval_step
-        output = trainer.eval_step(data, targets)
-    elif step_fn == "train_step":
-        step_fn = trainer.train_step
-        for i in range(total_steps):
-            output = trainer.train_step(data, targets)
-            if lr_scheduler:
-                lr_list = trainer.options.lr_scheduler.get_last_lr()
-                assert_allclose(lr_list[0], expected_lr_values[i], rtol=rtol, err_msg="lr mismatch")
-    else:
-        raise ValueError("Invalid step_fn")
-    assert trainer._onnx_model is not None
-
-    # Check output shape after train/eval step
-    for out, desc in zip(output, trainer.model_desc.outputs):
-        if trainer.loss_fn and desc.is_loss:
-            continue
-        assert list(out.size()) == desc.shape
-
-    # Check name, shape and dtype of the first len(forward.parameters) ORT graph inputs
-    sig = inspect.signature(model.forward)
-    for i in range(len(sig.parameters.keys())):
-        input_name = trainer.model_desc.inputs[i][0]
-        input_dim = trainer.model_desc.inputs[i][1]
-        input_type = trainer.model_desc.inputs[i][2]
-
-        assert trainer._onnx_model.graph.input[i].name == input_name
-        for dim_idx, dim in enumerate(trainer._onnx_model.graph.input[i].type.tensor_type.shape.dim):
-            assert input_dim[dim_idx] == dim.dim_value
-            assert input_type == _utils.dtype_onnx_to_torch(
-                trainer._onnx_model.graph.input[i].type.tensor_type.elem_type
-            )
-
-    opset = get_model_opset(trainer._onnx_model)
-
-    # Check name, shape and dtype of the ORT graph outputs
-    for i in range(len(trainer.model_desc.outputs)):
-        output_name = trainer.model_desc.outputs[i][0]
-        output_dim = trainer.model_desc.outputs[i][1]
-        output_type = trainer.model_desc.outputs[i][3]
-
-        assert trainer._onnx_model.graph.output[i].name == output_name
-        for dim_idx, dim in enumerate(trainer._onnx_model.graph.output[i].type.tensor_type.shape.dim):
-            if opset is None or opset <= 12:
-                assert output_dim[dim_idx] == dim.dim_value
-            assert output_type == _utils.dtype_onnx_to_torch(
-                trainer._onnx_model.graph.output[i].type.tensor_type.elem_type
-            )
-
-    # Save current model as ONNX as a file
-    file_name = os.path.join("_____temp_onnx_model.onnx")
-    trainer.save_as_onnx(file_name)
-    assert os.path.exists(file_name)
-    with open(file_name, "rb") as f:
-        bin_str = f.read()
-        reload_onnx_model = onnx.load_model_from_string(bin_str)
-    os.remove(file_name)
-
-    # Create a new trainer from persisted ONNX model and compare with original ONNX model
-    trainer_from_onnx = orttrainer.ORTTrainer(reload_onnx_model, model_desc, optim_config)
-    step_fn(data, targets)
-    assert trainer_from_onnx._onnx_model is not None
-    assert id(trainer_from_onnx._onnx_model) != id(trainer._onnx_model)
-    assert trainer_from_onnx._onnx_model == trainer._onnx_model
-    assert trainer_from_onnx._onnx_model.graph == trainer._onnx_model.graph
-    assert onnx.helper.printable_graph(trainer_from_onnx._onnx_model.graph) == onnx.helper.printable_graph(
-        trainer._onnx_model.graph
-    )
-
-
-@pytest.mark.parametrize("seed, device", [(0, "cpu"), (24, "cuda")])
-def testORTDeterministicCompute(seed, device):
-    # Common setup
-    optim_config = optim.LambConfig()
-    opts = orttrainer.ORTTrainerOptions(
-        {"debug": {"deterministic_compute": True}, "device": {"id": device, "mem_limit": 10 * 1024 * 1024}}
-    )
-
-    # Setup for the first ORTTRainer run
-    torch.manual_seed(seed)
-    set_seed(seed)
-    model, model_desc, my_loss, batcher_fn, train_data, _, _ = _test_commons._load_pytorch_transformer_model(device)
-    first_trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, loss_fn=my_loss, options=opts)
-    data, targets = batcher_fn(train_data, 0)
-    _ = first_trainer.train_step(data, targets)
-    assert first_trainer._onnx_model is not None
-
-    # Setup for the second ORTTRainer run
-    torch.manual_seed(seed)
-    set_seed(seed)
-    model, _, _, _, _, _, _ = _test_commons._load_pytorch_transformer_model(device)
-    second_trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, loss_fn=my_loss, options=opts)
-    _ = second_trainer.train_step(data, targets)
-    assert second_trainer._onnx_model is not None
-
-    # Compare two different instances with identical setup
-    assert id(first_trainer._onnx_model) != id(second_trainer._onnx_model)
-    _test_helpers.assert_onnx_weights(first_trainer, second_trainer)
-
-
-@pytest.mark.parametrize(
-    "seed,device,expected_loss,fetches",
-    [
-        (321, "cuda", [10.5774, 10.4403, 10.4175, 10.2886, 10.2760], False),
-        (321, "cuda", [10.5774, 10.4403, 10.4175, 10.2886, 10.2760], True),
-    ],
-)
-def testORTTrainerMixedPrecisionLossScaler(seed, device, expected_loss, fetches):
-    return  # TODO: re-enable after nondeterminism on backend is fixed. update numbers
-
-    rtol = 1e-3
-    total_steps = len(expected_loss)
-    torch.manual_seed(seed)
-    set_seed(seed)
-
-    # Setup ORTTrainer
-    loss_scaler = amp.DynamicLossScaler()
-    options = orttrainer.ORTTrainerOptions(
-        {
-            "device": {"id": device},
-            "mixed_precision": {"enabled": True, "loss_scaler": loss_scaler},
-            "debug": {"deterministic_compute": True},
-        }
-    )
-    model, model_desc, my_loss, batcher_fn, train_data, val_data, _ = _test_commons._load_pytorch_transformer_model(
-        device
-    )
-    optim_config = optim.LambConfig(lr=0.001)
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, loss_fn=my_loss, options=options)
-
-    # Training loop
-    actual_loss = []
-    for i in range(total_steps):
-        data, targets = batcher_fn(train_data, i)
-        if fetches:
-            trainer._train_step_info.fetches = ["loss"]
-            loss = trainer.train_step(data, targets)
-        else:
-            loss, _ = trainer.train_step(data, targets)
-        actual_loss.append(loss.cpu())
-
-    # Eval once just to test fetches in action
-    val_data, val_targets = batcher_fn(val_data, 0)
-    if fetches:
-        trainer._train_step_info.fetches = ["loss"]
-        loss = trainer.eval_step(val_data, val_targets)
-        trainer._train_step_info.fetches = []
-    loss, _ = trainer.eval_step(val_data, val_targets)
-
-    # Compare loss to ground truth computed from current ORTTrainer API
-    _test_helpers.assert_model_outputs(expected_loss, actual_loss, True, rtol=rtol)
-    assert trainer._onnx_model is not None
-
-
-def _recompute_data():
-    device_capability_major = torch.cuda.get_device_capability()[0]
-    if device_capability_major == 7:  # V100 for Dev machine
-        expected_loss = {
-            12: [10.5598, 10.4591, 10.3477, 10.2726, 10.1945],
-            14: [10.54088, 10.498755, 10.386827, 10.338747, 10.262459],
-        }
-        return [
-            (False, False, False, 0, expected_loss),  # no recompute
-            (True, False, False, 0, expected_loss),  # attn_dropout recompute
-            (False, True, False, 0, expected_loss),  # gelu recompute
-            (False, False, True, 0, expected_loss),  # transformer_layer recompute
-            (False, False, True, 1, expected_loss),  # transformer_layer recompute with 1 layer
-        ]
-    elif device_capability_major == 5:  # M60 for CI machines
-        expected_loss = {
-            12: [10.5445, 10.4389, 10.3480, 10.2627, 10.2113],
-            14: [10.5445, 10.4389, 10.3480, 10.2627, 10.2113],
-        }
-        return [
-            (False, False, False, 0, expected_loss),  # no recompute
-            (True, False, False, 0, expected_loss),  # attn_dropout recompute
-            (False, True, False, 0, expected_loss),  # gelu recompute
-            (False, False, True, 0, expected_loss),  # transformer_layer recompute
-            (False, False, True, 1, expected_loss),  # transformer_layer recompute with 1 layer
-        ]
-
-
-@pytest.mark.parametrize("attn_dropout, gelu, transformer_layer, number_layers, expected_loss", _recompute_data())
-def testORTTrainerRecompute(attn_dropout, gelu, transformer_layer, number_layers, expected_loss):
-    seed = 321
-    device = "cuda"
-    rtol = 1e-3
-    total_steps = len(expected_loss[12])
-    torch.manual_seed(seed)
-    set_seed(seed)
-
-    # Setup ORTTrainer
-    options = orttrainer.ORTTrainerOptions(
-        {
-            "device": {"id": device},
-            "graph_transformer": {
-                "attn_dropout_recompute": attn_dropout,
-                "gelu_recompute": gelu,
-                "transformer_layer_recompute": transformer_layer,
-                "number_recompute_layers": number_layers,
-            },
-            "debug": {"deterministic_compute": True},
-        }
-    )
-    model, model_desc, my_loss, batcher_fn, train_data, val_data, _ = _test_commons._load_pytorch_transformer_model(
-        device
-    )
-    optim_config = optim.LambConfig(lr=0.001)
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, loss_fn=my_loss, options=options)
-
-    # Training loop
-    actual_loss = []
-    for i in range(total_steps):
-        data, targets = batcher_fn(train_data, i)
-        loss, _ = trainer.train_step(data, targets)
-        actual_loss.append(loss.cpu())
-
-    # Compare loss to ground truth computed from current ORTTrainer API
-    assert trainer._onnx_model is not None
-    opset = get_model_opset(trainer._onnx_model)
-    _test_helpers.assert_model_outputs(expected_loss[opset], actual_loss, True, rtol=rtol)
-
-
-@pytest.mark.parametrize(
-    "seed,device,gradient_accumulation_steps,total_steps,expected_loss",
-    [
-        (
-            0,
-            "cuda",
-            1,
-            12,
-            [
-                10.5368022919,
-                10.4146203995,
-                10.3635568619,
-                10.2650547028,
-                10.2284049988,
-                10.1304626465,
-                10.0853414536,
-                9.9987659454,
-                9.9472427368,
-                9.8832416534,
-                9.8223171234,
-                9.8222122192,
-            ],
-        ),
-        (
-            42,
-            "cuda",
-            3,
-            12,
-            [
-                10.6455879211,
-                10.6247081757,
-                10.6361322403,
-                10.5187482834,
-                10.5345087051,
-                10.5487670898,
-                10.4833698273,
-                10.4600019455,
-                10.4535751343,
-                10.3774127960,
-                10.4144191742,
-                10.3757553101,
-            ],
-        ),
-        (
-            123,
-            "cuda",
-            7,
-            12,
-            [
-                10.5353469849,
-                10.5261383057,
-                10.5240392685,
-                10.5013713837,
-                10.5678377151,
-                10.5452117920,
-                10.5184345245,
-                10.4271221161,
-                10.4458627701,
-                10.4864749908,
-                10.4416503906,
-                10.4467563629,
-            ],
-        ),
-        (
-            321,
-            "cuda",
-            12,
-            12,
-            [
-                10.5773944855,
-                10.5428829193,
-                10.5974750519,
-                10.5416746140,
-                10.6009902954,
-                10.5684127808,
-                10.5759754181,
-                10.5636739731,
-                10.5613927841,
-                10.5825119019,
-                10.6031589508,
-                10.6199369431,
-            ],
-        ),
-    ],
-)
-def testORTTrainerGradientAccumulation(seed, device, gradient_accumulation_steps, total_steps, expected_loss):
-    return  # TODO: re-enable after nondeterminism on backend is fixed. update numbers
-    rtol = 1e-3
-    torch.manual_seed(seed)
-    set_seed(seed)
-
-    # Setup ORTTrainer
-    options = orttrainer.ORTTrainerOptions(
-        {
-            "device": {"id": device},
-            "batch": {"gradient_accumulation_steps": gradient_accumulation_steps},
-            "debug": {"deterministic_compute": True},
-        }
-    )
-    model, model_desc, my_loss, batcher_fn, train_data, _, _ = _test_commons._load_pytorch_transformer_model(device)
-    optim_config = optim.LambConfig(lr=0.001)
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, loss_fn=my_loss, options=options)
-
-    # Training loop
-    actual_loss = []
-    for i in range(total_steps):
-        data, targets = batcher_fn(train_data, i)
-        loss, _ = trainer.train_step(data, targets)
-        actual_loss.append(loss.cpu())
-
-    # Compare legacy vs experimental APIs
-    _test_helpers.assert_model_outputs(expected_loss, actual_loss, rtol=rtol)
-
-
-@pytest.mark.parametrize(
-    "dynamic_axes",
-    [
-        (True),
-        (False),
-    ],
-)
-def testORTTrainerDynamicShape(dynamic_axes):
-    # Common setup
-    device = "cuda"
-
-    # Setup ORTTrainer
-    options = orttrainer.ORTTrainerOptions({})
-    model, model_desc, my_loss, batcher_fn, train_data, _, _ = _test_commons._load_pytorch_transformer_model(
-        device, dynamic_axes=dynamic_axes
-    )
-    optim_config = optim.LambConfig(lr=0.001)
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, loss_fn=my_loss, options=options)
-
-    # Training loop
-    total_steps = 10
-    for i in range(total_steps):
-        data, targets = batcher_fn(train_data, i)
-        if dynamic_axes:
-            # Forcing batches with different sizes to exercise dynamic shapes
-            data = data[: -(i + 1)]
-            targets = targets[: -(i + 1) * data.size(1)]
-        _, _ = trainer.train_step(data, targets)
-
-    assert trainer._onnx_model is not None
-
-
-@pytest.mark.parametrize(
-    "enable_onnx_contrib_ops",
-    [
-        (True),
-        (False),
-    ],
-)
-def testORTTrainerInternalUseContribOps(enable_onnx_contrib_ops):
-    # Common setup
-    device = "cuda"
-
-    # Setup ORTTrainer
-    options = orttrainer.ORTTrainerOptions({"_internal_use": {"enable_onnx_contrib_ops": enable_onnx_contrib_ops}})
-    model, model_desc, my_loss, batcher_fn, train_data, _, _ = _test_commons._load_pytorch_transformer_model(device)
-    optim_config = optim.LambConfig(lr=0.001)
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, loss_fn=my_loss, options=options)
-
-    # Training loop
-    data, targets = batcher_fn(train_data, 0)
-    if not enable_onnx_contrib_ops and not pytorch_110:
-        with pytest.raises(Exception):  # noqa: B017
-            _, _ = trainer.train_step(data, targets)
-    else:
-        _, _ = trainer.train_step(data, targets)
-
-
-@pytest.mark.parametrize(
-    "model_params",
-    [
-        (
-            [
-                "decoder.weight",
-                "transformer_encoder.layers.0.linear1.bias",
-                "transformer_encoder.layers.0.linear2.weight",
-                "transformer_encoder.layers.1.self_attn.out_proj.weight",
-                "transformer_encoder.layers.1.self_attn.out_proj.bias",
-            ]
-        ),
-    ],
-)
-def testORTTrainerFrozenWeights(model_params):
-    # Common setup
-    device = "cuda"
-    total_steps = 10
-
-    # Setup ORTTrainer WITHOUT frozen weights
-    options = orttrainer.ORTTrainerOptions({})
-    model, model_desc, my_loss, batcher_fn, train_data, _, _ = _test_commons._load_pytorch_transformer_model(device)
-    optim_config = optim.LambConfig(lr=0.001)
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, loss_fn=my_loss, options=options)
-    for i in range(total_steps):
-        data, targets = batcher_fn(train_data, i)
-        _, _ = trainer.train_step(data, targets)
-
-    # All model_params must be in the session state
-    assert trainer._onnx_model is not None
-    session_state = trainer._training_session.get_state()
-    assert all([param in session_state for param in model_params])
-
-    # Setup ORTTrainer WITH frozen weights
-    options = orttrainer.ORTTrainerOptions({"utils": {"frozen_weights": model_params}})
-    model, _, _, _, _, _, _ = _test_commons._load_pytorch_transformer_model(device)
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, loss_fn=my_loss, options=options)
-    for i in range(total_steps):
-        data, targets = batcher_fn(train_data, i)
-        _, _ = trainer.train_step(data, targets)
-
-    # All model_params CANNOT be in the session state
-    assert trainer._onnx_model is not None
-    session_state = trainer._training_session.get_state()
-    assert not all([param in session_state for param in model_params])
-
-
-@pytest.mark.parametrize(
-    "loss_scaler, optimizer_config, gradient_accumulation_steps",
-    [
-        (None, optim.AdamConfig(), 1),
-        (None, optim.LambConfig(), 1),
-        (None, optim.SGDConfig(), 1),
-        (amp.DynamicLossScaler(), optim.AdamConfig(), 1),
-        (amp.DynamicLossScaler(), optim.LambConfig(), 5),
-        # (amp.DynamicLossScaler(), optim.SGDConfig(), 1), # SGD doesnt support fp16
-    ],
-)
-def testORTTrainerStateDictWrapModelLossFn(loss_scaler, optimizer_config, gradient_accumulation_steps):
-    # Common setup
-    seed = 1
-
-    class LinearModel(torch.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.linear = torch.nn.Linear(2, 4)
-
-        def forward(self, y=None, x=None):
-            if y is not None:
-                return self.linear(x) + y
-            else:
-                return self.linear(x) + torch.ones(2, 4)
-
-    model_desc = {
-        "inputs": [
-            ("x", [2, 2]),
-            (
-                "label",
-                [
-                    2,
-                ],
-            ),
-        ],
-        "outputs": [("loss", [], True), ("output", [2, 4])],
-    }
-
-    # Dummy data
-    data1 = torch.randn(2, 2)
-    label1 = torch.tensor([0, 1], dtype=torch.int64)
-    data2 = torch.randn(2, 2)
-    label2 = torch.tensor([0, 1], dtype=torch.int64)
-
-    # Setup training based on test parameters
-    opts = {
-        "debug": {"deterministic_compute": True},
-        "batch": {"gradient_accumulation_steps": gradient_accumulation_steps},
-    }
-    if loss_scaler:
-        opts["mixed_precision"] = {"enabled": True, "loss_scaler": loss_scaler}
-    opts = orttrainer.ORTTrainerOptions(opts)
-
-    # Training session 1
-    torch.manual_seed(seed)
-    set_seed(seed)
-    pt_model = LinearModel()
-
-    def loss_fn(x, label):
-        return F.nll_loss(F.log_softmax(x, dim=1), label)
-
-    trainer = orttrainer.ORTTrainer(pt_model, model_desc, optimizer_config, loss_fn=loss_fn, options=opts)
-
-    # Check state_dict keys before train. Must be empty
-    state_dict = trainer.state_dict()
-    assert state_dict == {}
-
-    # Train once and check initial state
-    trainer.train_step(x=data1, label=label1)
-    state_dict = trainer.state_dict()
-    assert all([weight in state_dict["model"]["full_precision"] for weight in ["linear.bias", "linear.weight"]])
-
-    # Initialize training session 2 from state of Training 1
-    torch.manual_seed(seed)
-    set_seed(seed)
-    trainer2 = orttrainer.ORTTrainer(pt_model, model_desc, optimizer_config, loss_fn=loss_fn, options=opts)
-    trainer2.load_state_dict(state_dict)
-
-    # Verify state was loaded properly
-    _test_commons.assert_all_states_close_ort(state_dict, trainer2._load_state_dict.args[0])
-
-    # Perform a second step in both training session 1 and 2 and verify they match
-    trainer.train_step(x=data2, label=label2)
-    state_dict = trainer.state_dict()
-    trainer2.train_step(x=data2, label=label2)
-    state_dict2 = trainer2.state_dict()
-    _test_commons.assert_all_states_close_ort(state_dict, state_dict2)
-
-
-def testORTTrainerNonPickableModel():
-    # Common setup
-    import threading
-
-    seed = 1
-
-    class UnpickableModel(torch.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.linear = torch.nn.Linear(2, 4)
-            self._lock = threading.Lock()
-
-        def forward(self, y=None, x=None):
-            with self._lock:
-                if y is not None:
-                    return self.linear(x) + y
-                else:
-                    return self.linear(x) + torch.ones(2, 4)
-
-    model_desc = {
-        "inputs": [
-            ("x", [2, 2]),
-            (
-                "label",
-                [
-                    2,
-                ],
-            ),
-        ],
-        "outputs": [("loss", [], True), ("output", [2, 4])],
-    }
-
-    # Dummy data
-    data = torch.randn(2, 2)
-    label = torch.tensor([0, 1], dtype=torch.int64)
-
-    # Setup training based on test parameters
-    opts = orttrainer.ORTTrainerOptions({"debug": {"deterministic_compute": True}})
-
-    # Training session
-    torch.manual_seed(seed)
-    set_seed(seed)
-    pt_model = UnpickableModel()
-
-    def loss_fn(x, label):
-        return F.nll_loss(F.log_softmax(x, dim=1), label)
-
-    optim_config = optim.AdamConfig()
-    trainer = orttrainer.ORTTrainer(pt_model, model_desc, optim_config, loss_fn=loss_fn, options=opts)
-
-    # Train must succeed despite warning
-    _, _ = trainer.train_step(data, label)
-
-
-###############################################################################
-# Temporary tests comparing Legacy vs Experimental ORTTrainer APIs ############
-###############################################################################
-
-
-@pytest.mark.parametrize("seed,device", [(1234, "cuda")])
-def testORTTrainerLegacyAndExperimentalWeightsCheck(seed, device):
-    # Common data
-    rtol = 1e-7
-    total_steps = 5
-
-    # Setup for the experimental ORTTRainer run
-    torch.manual_seed(seed)
-    set_seed(seed)
-    optim_config = optim.LambConfig()
-    opts = orttrainer.ORTTrainerOptions(
-        {
-            "device": {"id": device},
-            "debug": {"deterministic_compute": True},
-        }
-    )
-    model, model_desc, my_loss, batcher_fn, train_data, _, _ = _test_commons._load_pytorch_transformer_model(device)
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, loss_fn=my_loss, options=opts)
-    # Training loop
-    for i in range(total_steps):
-        data, targets = batcher_fn(train_data, i)
-        _ = trainer.train_step(data, targets)
-
-    # Setup for the legacy ORTTrainer run
-    torch.manual_seed(seed)
-    set_seed(seed)
-    model, (model_desc, lr_desc), _, _, _, _, _ = _test_commons._load_pytorch_transformer_model(device, legacy_api=True)
-    legacy_trainer = Legacy_ORTTrainer(
-        model, my_loss, model_desc, "LambOptimizer", None, lr_desc, device, _use_deterministic_compute=True
-    )
-    # Training loop
-    for i in range(total_steps):
-        data, targets = batcher_fn(train_data, i)
-        _, _ = legacy_trainer.train_step(data, targets, torch.tensor([optim_config.lr]))
-
-    # Compare legacy vs experimental APIs
-    _test_helpers.assert_legacy_onnx_weights(trainer, legacy_trainer, rtol=rtol)
-
-
-@pytest.mark.parametrize(
-    "seed,device",
-    [
-        (321, "cuda"),
-    ],
-)
-def testORTTrainerLegacyAndExperimentalPrecisionLossScaler(seed, device):
-    # Common data
-    total_steps = 128
-
-    # Setup experimental API
-    torch.manual_seed(seed)
-    set_seed(seed)
-    loss_scaler = amp.DynamicLossScaler()
-    options = orttrainer.ORTTrainerOptions(
-        {
-            "device": {"id": device},
-            "mixed_precision": {"enabled": True, "loss_scaler": loss_scaler},
-            "debug": {
-                "deterministic_compute": True,
-            },
-        }
-    )
-    model, model_desc, my_loss, batcher_fn, train_data, _, _ = _test_commons._load_pytorch_transformer_model(device)
-    optim_config = optim.LambConfig(lr=0.001)
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, loss_fn=my_loss, options=options)
-    # Training loop
-    experimental_loss = []
-    experimental_preds_dtype = []
-    for i in range(total_steps):
-        data, targets = batcher_fn(train_data, i)
-        exp_loss, exp_preds = trainer.train_step(data, targets)
-        experimental_loss.append(exp_loss.cpu())
-        experimental_preds_dtype.append(exp_preds.dtype)
-
-    # Setup legacy API
-    torch.manual_seed(seed)
-    set_seed(seed)
-    model, (model_desc, lr_desc), _, _, _, _, _ = _test_commons._load_pytorch_transformer_model(device, legacy_api=True)
-    loss_scaler = Legacy_LossScaler("ort_test_input_loss_scalar", True)
-    legacy_trainer = Legacy_ORTTrainer(
-        model,
-        my_loss,
-        model_desc,
-        "LambOptimizer",
-        None,
-        lr_desc,
-        device=device,
-        _use_deterministic_compute=True,
-        use_mixed_precision=True,
-        loss_scaler=loss_scaler,
-    )
-    # Training loop
-    legacy_loss = []
-    legacy_preds_dtype = []
-    for i in range(total_steps):
-        data, targets = batcher_fn(train_data, i)
-        leg_loss, leg_preds = legacy_trainer.train_step(data, targets, torch.tensor([optim_config.lr]))
-        legacy_loss.append(leg_loss.cpu())
-        legacy_preds_dtype.append(leg_preds.dtype)
-
-    # Compare legacy vs experimental APIs
-    assert experimental_preds_dtype == legacy_preds_dtype
-    _test_helpers.assert_legacy_onnx_weights(trainer, legacy_trainer)
-    _test_helpers.assert_model_outputs(legacy_loss, experimental_loss)
-
-
-@pytest.mark.parametrize(
-    "seed,device,gradient_accumulation_steps,total_steps",
-    [
-        (0, "cuda", 1, 12),
-        (42, "cuda", 3, 12),
-        (123, "cuda", 7, 12),
-        (321, "cuda", 12, 12),
-    ],
-)
-def testORTTrainerLegacyAndExperimentalGradientAccumulation(seed, device, gradient_accumulation_steps, total_steps):
-    # Common data
-    torch.set_printoptions(precision=10)
-
-    # Setup experimental API
-    torch.manual_seed(seed)
-    set_seed(seed)
-    options = orttrainer.ORTTrainerOptions(
-        {
-            "device": {"id": device},
-            "batch": {"gradient_accumulation_steps": gradient_accumulation_steps},
-            "debug": {"deterministic_compute": True},
-        }
-    )
-    model, model_desc, my_loss, batcher_fn, train_data, _, _ = _test_commons._load_pytorch_transformer_model(device)
-    optim_config = optim.LambConfig(lr=0.001)
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, loss_fn=my_loss, options=options)
-    # Training loop
-    experimental_loss = []
-    for i in range(total_steps):
-        data, targets = batcher_fn(train_data, i)
-        exp_loss, _ = trainer.train_step(data, targets)
-        experimental_loss.append(exp_loss.cpu())
-
-    # Setup legacy API
-    torch.manual_seed(seed)
-    set_seed(seed)
-    model, (model_desc, lr_desc), _, _, _, _, _ = _test_commons._load_pytorch_transformer_model(device, legacy_api=True)
-    legacy_trainer = Legacy_ORTTrainer(
-        model,
-        my_loss,
-        model_desc,
-        "LambOptimizer",
-        None,
-        lr_desc,
-        device=device,
-        _use_deterministic_compute=True,
-        gradient_accumulation_steps=gradient_accumulation_steps,
-    )
-    # Training loop
-    legacy_loss = []
-    for i in range(total_steps):
-        data, targets = batcher_fn(train_data, i)
-        leg_loss, _ = legacy_trainer.train_step(data, targets, torch.tensor([optim_config.lr]))
-        legacy_loss.append(leg_loss.cpu())
-
-    # Compare legacy vs experimental APIs
-    _test_helpers.assert_model_outputs(legacy_loss, experimental_loss)
-
-
-@pytest.mark.parametrize(
-    "seed,device,optimizer_config,lr_scheduler, get_lr_this_step",
-    [
-        (
-            0,
-            "cuda",
-            optim.AdamConfig,
-            optim.lr_scheduler.ConstantWarmupLRScheduler,
-            _test_commons.legacy_constant_lr_scheduler,
-        ),
-        (
-            0,
-            "cuda",
-            optim.LambConfig,
-            optim.lr_scheduler.ConstantWarmupLRScheduler,
-            _test_commons.legacy_constant_lr_scheduler,
-        ),
-        (
-            0,
-            "cuda",
-            optim.SGDConfig,
-            optim.lr_scheduler.ConstantWarmupLRScheduler,
-            _test_commons.legacy_constant_lr_scheduler,
-        ),
-        (
-            42,
-            "cuda",
-            optim.AdamConfig,
-            optim.lr_scheduler.LinearWarmupLRScheduler,
-            _test_commons.legacy_linear_lr_scheduler,
-        ),
-        (
-            42,
-            "cuda",
-            optim.LambConfig,
-            optim.lr_scheduler.LinearWarmupLRScheduler,
-            _test_commons.legacy_linear_lr_scheduler,
-        ),
-        (
-            42,
-            "cuda",
-            optim.SGDConfig,
-            optim.lr_scheduler.LinearWarmupLRScheduler,
-            _test_commons.legacy_linear_lr_scheduler,
-        ),
-        (
-            123,
-            "cuda",
-            optim.AdamConfig,
-            optim.lr_scheduler.CosineWarmupLRScheduler,
-            _test_commons.legacy_cosine_lr_scheduler,
-        ),
-        (
-            123,
-            "cuda",
-            optim.LambConfig,
-            optim.lr_scheduler.CosineWarmupLRScheduler,
-            _test_commons.legacy_cosine_lr_scheduler,
-        ),
-        (
-            123,
-            "cuda",
-            optim.SGDConfig,
-            optim.lr_scheduler.CosineWarmupLRScheduler,
-            _test_commons.legacy_cosine_lr_scheduler,
-        ),
-        (
-            321,
-            "cuda",
-            optim.AdamConfig,
-            optim.lr_scheduler.PolyWarmupLRScheduler,
-            _test_commons.legacy_poly_lr_scheduler,
-        ),
-        (
-            321,
-            "cuda",
-            optim.LambConfig,
-            optim.lr_scheduler.PolyWarmupLRScheduler,
-            _test_commons.legacy_poly_lr_scheduler,
-        ),
-        (
-            321,
-            "cuda",
-            optim.SGDConfig,
-            optim.lr_scheduler.PolyWarmupLRScheduler,
-            _test_commons.legacy_poly_lr_scheduler,
-        ),
-    ],
-)
-def testORTTrainerLegacyAndExperimentalLRScheduler(seed, device, optimizer_config, lr_scheduler, get_lr_this_step):
-    # Common data
-    total_steps = 10
-    lr = 0.001
-    warmup = 0.5
-    cycles = 0.5
-    power = 1.0
-    lr_end = 1e-7
-    torch.set_printoptions(precision=10)
-
-    # Setup experimental API
-    torch.manual_seed(seed)
-    set_seed(seed)
-    if (
-        lr_scheduler == optim.lr_scheduler.ConstantWarmupLRScheduler
-        or lr_scheduler == optim.lr_scheduler.LinearWarmupLRScheduler
-    ):
-        lr_scheduler = lr_scheduler(total_steps=total_steps, warmup=warmup)
-    elif lr_scheduler == optim.lr_scheduler.CosineWarmupLRScheduler:
-        lr_scheduler = lr_scheduler(total_steps=total_steps, warmup=warmup, cycles=cycles)
-    elif lr_scheduler == optim.lr_scheduler.PolyWarmupLRScheduler:
-        lr_scheduler = lr_scheduler(total_steps=total_steps, warmup=warmup, power=power, lr_end=lr_end)
-    else:
-        raise RuntimeError("Invalid lr_scheduler")
-
-    options = orttrainer.ORTTrainerOptions(
-        {"device": {"id": device}, "debug": {"deterministic_compute": True}, "lr_scheduler": lr_scheduler}
-    )
-    model, model_desc, my_loss, batcher_fn, train_data, _, _ = _test_commons._load_pytorch_transformer_model(device)
-    optim_config = optimizer_config(lr=lr)
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, loss_fn=my_loss, options=options)
-    # Training loop
-    experimental_loss = []
-    for i in range(total_steps):
-        data, targets = batcher_fn(train_data, i)
-        exp_loss, exp_preds = trainer.train_step(data, targets)
-        experimental_loss.append(exp_loss.cpu())
-
-    # Setup legacy API
-    torch.manual_seed(seed)
-    set_seed(seed)
-
-    if optimizer_config == optim.AdamConfig:
-        legacy_optimizer_config = "AdamOptimizer"
-    elif optimizer_config == optim.LambConfig:
-        legacy_optimizer_config = "LambOptimizer"
-    elif optimizer_config == optim.SGDConfig:
-        legacy_optimizer_config = "SGDOptimizer"
-    else:
-        raise RuntimeError("Invalid optimizer_config")
-
-    if (
-        get_lr_this_step == _test_commons.legacy_constant_lr_scheduler
-        or get_lr_this_step == _test_commons.legacy_linear_lr_scheduler
-    ):
-        get_lr_this_step = partial(get_lr_this_step, initial_lr=lr, total_steps=total_steps, warmup=warmup)
-    elif get_lr_this_step == _test_commons.legacy_cosine_lr_scheduler:
-        get_lr_this_step = partial(
-            get_lr_this_step, initial_lr=lr, total_steps=total_steps, warmup=warmup, cycles=cycles
-        )
-    elif get_lr_this_step == _test_commons.legacy_poly_lr_scheduler:
-        get_lr_this_step = partial(
-            get_lr_this_step, initial_lr=lr, total_steps=total_steps, warmup=warmup, power=power, lr_end=lr_end
-        )
-    else:
-        raise RuntimeError("Invalid get_lr_this_step")
-
-    model, (model_desc, lr_desc), _, _, _, _, _ = _test_commons._load_pytorch_transformer_model(device, legacy_api=True)
-    legacy_trainer = Legacy_ORTTrainer(
-        model,
-        my_loss,
-        model_desc,
-        legacy_optimizer_config,
-        None,
-        lr_desc,
-        device=device,
-        _use_deterministic_compute=True,
-        get_lr_this_step=get_lr_this_step,
-    )
-    # Training loop
-    legacy_loss = []
-    for i in range(total_steps):
-        data, targets = batcher_fn(train_data, i)
-        leg_loss, leg_preds = legacy_trainer.train_step(data, targets)
-        legacy_loss.append(leg_loss.cpu())
-
-    # Compare legacy vs experimental APIs
-    _test_helpers.assert_model_outputs(legacy_loss, experimental_loss)
-
-
-def testLossScalerLegacyAndExperimentalFullCycle():
-    orttrainer.TrainStepInfo(
-        optimizer_config=optim.LambConfig(lr=0.001), all_finite=True, fetches=[], optimization_step=0, step=0
-    )
-    new_ls = amp.DynamicLossScaler()
-    old_ls = Legacy_LossScaler("ort_test_input_loss_scaler", True)
-
-    # Initial state
-    train_step_info = orttrainer.TrainStepInfo(optim.LambConfig())
-    assert_allclose(new_ls.loss_scale, old_ls.loss_scale_)
-    assert new_ls.up_scale_window == old_ls.up_scale_window_
-    assert_allclose(new_ls.min_loss_scale, old_ls.min_loss_scale_)
-    assert_allclose(new_ls.max_loss_scale, old_ls.max_loss_scale_)
-
-    # Performing 9*2000 updates to cover all branches of LossScaler.update(train_step_info.all_finite=True)
-    for _cycles in range(1, 10):
-        # 1999 updates without overflow produces 1999 stable steps
-        for _i in range(1, 2000):
-            new_loss_scale = new_ls.update(train_step_info)
-            old_ls.update_loss_scale(train_step_info.all_finite)
-            old_loss_scale = old_ls.loss_scale_
-            assert new_ls._stable_steps_count == old_ls.stable_steps_
-            assert_allclose(new_loss_scale, old_loss_scale)
-
-        # 2000th update without overflow doubles the loss and zero stable steps until max_loss_scale is reached
-        new_loss_scale = new_ls.update(train_step_info)
-        old_ls.update_loss_scale(train_step_info.all_finite)
-        old_loss_scale = old_ls.loss_scale_
-        assert new_ls._stable_steps_count == old_ls.stable_steps_
-        assert_allclose(new_loss_scale, old_loss_scale)
-
-    # After 8 cycles, loss scale should be float(1 << 16)*(2**8)
-    assert_allclose(new_loss_scale, old_loss_scale)
-
-    # After 9 cycles, loss scale reaches max_loss_scale and it is not doubled from that point on
-    for _count in range(1, 2050):
-        new_loss_scale = new_ls.update(train_step_info)
-        old_ls.update_loss_scale(train_step_info.all_finite)
-        old_loss_scale = old_ls.loss_scale_
-        assert new_ls._stable_steps_count == old_ls.stable_steps_
-        assert_allclose(new_loss_scale, old_loss_scale)
-
-    # Setting train_step_info.all_finite = False to test down scaling
-    train_step_info.all_finite = False
-
-    # Performing 24 updates to half the loss scale each time
-    for _count in range(1, 25):
-        new_loss_scale = new_ls.update(train_step_info)
-        old_ls.update_loss_scale(train_step_info.all_finite)
-        old_loss_scale = old_ls.loss_scale_
-        assert new_ls._stable_steps_count == old_ls.stable_steps_
-        assert_allclose(new_loss_scale, old_loss_scale)
-
-    # After 24 updates with gradient overflow, loss scale is 1.0
-    assert_allclose(new_loss_scale, old_loss_scale)
-
-    # After 25 updates, min_loss_scale is reached and loss scale is not halfed from that point on
-    for _count in range(1, 5):
-        new_loss_scale = new_ls.update(train_step_info)
-        old_ls.update_loss_scale(train_step_info.all_finite)
-        old_loss_scale = old_ls.loss_scale_
-        assert new_ls._stable_steps_count == old_ls.stable_steps_
-        assert_allclose(new_loss_scale, old_loss_scale)
-
-
-def testLossScalerLegacyAndExperimentalRandomAllFinite():
-    new_ls = amp.DynamicLossScaler()
-    old_ls = Legacy_LossScaler("ort_test_input_loss_scaler", True)
-
-    # Initial state
-    train_step_info = orttrainer.TrainStepInfo(optim.LambConfig())
-    assert_allclose(new_ls.loss_scale, old_ls.loss_scale_)
-    assert new_ls.up_scale_window == old_ls.up_scale_window_
-    assert_allclose(new_ls.min_loss_scale, old_ls.min_loss_scale_)
-    assert_allclose(new_ls.max_loss_scale, old_ls.max_loss_scale_)
-
-    import random
-
-    out = []
-    for _ in range(1, 64):
-        train_step_info.all_finite = bool(random.getrandbits(1))
-        new_loss_scale = new_ls.update(train_step_info)
-        old_ls.update_loss_scale(train_step_info.all_finite)
-        old_loss_scale = old_ls.loss_scale_
-        assert new_ls._stable_steps_count == old_ls.stable_steps_
-        assert_allclose(new_loss_scale, old_loss_scale)
-        out.append(new_loss_scale)
-        assert new_loss_scale > 1e-7
-
-
-def testORTTrainerRunSymbolicShapeInfer():
-    # Common data
-    seed = 0
-    total_steps = 12
-    device = "cuda"
-    torch.set_printoptions(precision=10)
-
-    # Setup without symbolic shape inference
-    torch.manual_seed(seed)
-    set_seed(seed)
-    options = orttrainer.ORTTrainerOptions({"device": {"id": device}, "debug": {"deterministic_compute": True}})
-    model, model_desc, my_loss, batcher_fn, train_data, _, _ = _test_commons._load_pytorch_transformer_model(device)
-    optim_config = optim.LambConfig(lr=0.001)
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, loss_fn=my_loss, options=options)
-    # Training loop
-    expected_loss = []
-    for i in range(total_steps):
-        data, targets = batcher_fn(train_data, i)
-        loss, _ = trainer.train_step(data, targets)
-        expected_loss.append(loss.cpu())
-
-    # Setup with symbolic shape inference
-    torch.manual_seed(seed)
-    set_seed(seed)
-    model, model_desc, my_loss, batcher_fn, train_data, _, _ = _test_commons._load_pytorch_transformer_model(device)
-    optim_config = optim.LambConfig(lr=0.001)
-    options.utils.run_symbolic_shape_infer = True
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, loss_fn=my_loss, options=options)
-    # Training loop
-    new_loss = []
-    for i in range(total_steps):
-        data, targets = batcher_fn(train_data, i)
-        loss, _ = trainer.train_step(data, targets)
-        new_loss.append(loss.cpu())
-
-    # Setup with symbolic shape inference in legacy API
-    torch.manual_seed(seed)
-    set_seed(seed)
-    model, (model_desc, lr_desc), _, _, _, _, _ = _test_commons._load_pytorch_transformer_model(device, legacy_api=True)
-    legacy_trainer = Legacy_ORTTrainer(
-        model,
-        my_loss,
-        model_desc,
-        "LambOptimizer",
-        None,
-        lr_desc,
-        device=device,
-        run_symbolic_shape_infer=True,
-        _use_deterministic_compute=True,
-    )
-    # Training loop
-    legacy_loss = []
-    for i in range(total_steps):
-        data, targets = batcher_fn(train_data, i)
-        loss, _ = legacy_trainer.train_step(data, targets, torch.tensor([optim_config.lr]))
-        legacy_loss.append(loss.cpu())
-
-    # Compare losses
-    _test_helpers.assert_model_outputs(new_loss, expected_loss)
-    _test_helpers.assert_model_outputs(legacy_loss, expected_loss)
-
-
-@pytest.mark.parametrize(
-    "test_input",
-    [
-        (
-            {
-                "distributed": {"enable_adasum": True},
-            }
-        )
-    ],
-)
-def testORTTrainerOptionsEnabledAdasumFlag(test_input):
-    """Test the enabled_adasum flag values when set enabled"""
-
-    actual_values = orttrainer_options.ORTTrainerOptions(test_input)
-    assert actual_values.distributed.enable_adasum is True
-
-
-@pytest.mark.parametrize(
-    "test_input",
-    [
-        (
-            {
-                "distributed": {"enable_adasum": False},
-            }
-        )
-    ],
-)
-def testORTTrainerOptionsDisabledAdasumFlag(test_input):
-    """Test the enabled_adasum flag values when set disabled"""
-
-    actual_values = orttrainer_options.ORTTrainerOptions(test_input)
-    assert actual_values.distributed.enable_adasum is False
-
-
-def testORTTrainerUnusedInput():
-    class UnusedInputModel(torch.nn.Module):
-        def __init__(self):
-            super().__init__()
-
-        def forward(self, x, y):
-            return torch.mean(x)
-
-    model = UnusedInputModel()
-    model_desc = {"inputs": [("x", [1]), ("y", [1])], "outputs": [("loss", [], True)]}
-    optim_config = optim.LambConfig(lr=0.001)
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config)
-
-    # Run just one step to make sure there are no iobinding errors for the unused input.
-    try:
-        trainer.train_step(torch.FloatTensor([1.0]), torch.FloatTensor([1.0]))
-    except RuntimeError:
-        pytest.fail("RuntimeError doing train_step with an unused input.")
-
-
-@pytest.mark.parametrize(
-    "debug_files",
-    [
-        {
-            "model_after_graph_transforms_path": "transformed.onnx",
-            "model_with_gradient_graph_path": "transformed_grad.onnx",
-            "model_with_training_graph_path": "training.onnx",
-            "model_with_training_graph_after_optimization_path": "training_optimized.onnx",
-        },
-        {"model_after_graph_transforms_path": "transformed.onnx", "model_with_training_graph_path": ""},
-    ],
-)
-def testTrainingGraphExport(debug_files):
-    device = "cuda"
-    model, model_desc, my_loss, batcher_fn, train_data, _, _ = _test_commons._load_pytorch_transformer_model(device)
-
-    with tempfile.TemporaryDirectory() as tempdir:
-        debug_paths = {}
-        for k, v in debug_files.items():
-            debug_paths[k] = os.path.join(tempdir, v)
-        opts = orttrainer.ORTTrainerOptions({"device": {"id": device}, "debug": {"graph_save_paths": debug_paths}})
-        optim_config = optim.AdamConfig()
-        trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, loss_fn=my_loss, options=opts)
-        data, targets = batcher_fn(train_data, 0)
-        trainer.train_step(data, targets)
-        for k, v in debug_files.items():
-            path = debug_paths[k]
-            if len(v) > 0:
-                assert os.path.isfile(path)
-                saved_graph = onnx.load(path).graph
-                if k == "model_with_training_graph_path":
-                    assert any("AdamOptimizer" in n.op_type for n in saved_graph.node)
-                elif k == "model_with_gradient_graph_path":
-                    assert any("Grad" in n.name for n in saved_graph.node)
-                elif k == "model_after_graph_transforms_path":
-                    assert any("LayerNormalization" in n.op_type for n in saved_graph.node)
-                elif k == "model_with_training_graph_after_optimization_path":
-                    assert any("FusedMatMul" in n.op_type for n in saved_graph.node)
-                # remove saved file
-                os.remove(path)
-            else:
-                assert not os.path.isfile(path)
-
-
-def _adam_max_norm_clip_data():
-    device_capability_major = torch.cuda.get_device_capability()[0]
-    if device_capability_major == 7:  # V100 for Dev machine
-        return [
-            (
-                0,
-                "cuda",
-                1.0,
-                1,
-                12,
-                {
-                    12: [
-                        10.592951,
-                        10.067989,
-                        9.619152,
-                        9.245731,
-                        8.881137,
-                        8.578644,
-                        8.280573,
-                        8.063023,
-                        7.797933,
-                        7.486215,
-                        7.233806,
-                        7.011791,
-                    ],
-                    14: [
-                        10.584141,
-                        10.068119,
-                        9.581743,
-                        9.191472,
-                        8.880169,
-                        8.5352,
-                        8.311425,
-                        8.061202,
-                        7.773032,
-                        7.523009,
-                        7.258711,
-                        7.02805,
-                    ],
-                },
-            ),
-            (
-                0,
-                "cuda",
-                0.1,
-                1,
-                12,
-                {
-                    12: [
-                        10.592951,
-                        10.068722,
-                        9.620503,
-                        9.247791,
-                        8.883972,
-                        8.582286,
-                        8.285027,
-                        8.068308,
-                        7.803638,
-                        7.492318,
-                        7.240352,
-                        7.018665,
-                    ],
-                    14: [
-                        10.584141,
-                        10.068845,
-                        9.583107,
-                        9.193537,
-                        8.882966,
-                        8.538839,
-                        8.315872,
-                        8.066408,
-                        7.778978,
-                        7.529708,
-                        7.265849,
-                        7.035439,
-                    ],
-                },
-            ),
-            (
-                42,
-                "cuda",
-                1.0,
-                1,
-                12,
-                {
-                    12: [
-                        10.647908,
-                        10.144501,
-                        9.672352,
-                        9.306980,
-                        8.956026,
-                        8.602655,
-                        8.351079,
-                        8.088144,
-                        7.867220,
-                        7.564082,
-                        7.289846,
-                        7.073726,
-                    ],
-                    14: [
-                        10.697515,
-                        10.229034,
-                        9.765422,
-                        9.428294,
-                        9.080612,
-                        8.715208,
-                        8.459574,
-                        8.169073,
-                        7.940211,
-                        7.654147,
-                        7.390446,
-                        7.166227,
-                    ],
-                },
-            ),
-            (
-                42,
-                "cuda",
-                0.1,
-                1,
-                12,
-                {
-                    12: [
-                        10.647908,
-                        10.145191,
-                        9.673690,
-                        9.309031,
-                        8.959020,
-                        8.606632,
-                        8.355836,
-                        8.093478,
-                        7.873327,
-                        7.570731,
-                        7.296772,
-                        7.0809422,
-                    ],
-                    14: [
-                        10.697515,
-                        10.22967,
-                        9.766556,
-                        9.430037,
-                        9.083106,
-                        8.718601,
-                        8.463726,
-                        8.17396,
-                        7.945755,
-                        7.660188,
-                        7.396963,
-                        7.172944,
-                    ],
-                },
-            ),
-        ]
-    elif device_capability_major == 5:  # M60 for CI machines (Python Packaging Pipeline)
-        return [
-            (
-                0,
-                "cuda",
-                1.0,
-                1,
-                12,
-                {
-                    12: [
-                        10.618382,
-                        10.08292,
-                        9.603334,
-                        9.258133,
-                        8.917768,
-                        8.591574,
-                        8.318401,
-                        8.042292,
-                        7.783608,
-                        7.50226,
-                        7.236041,
-                        7.035602,
-                    ],
-                    14: [
-                        10.618382,
-                        10.08292,
-                        9.603334,
-                        9.258133,
-                        8.917768,
-                        8.591574,
-                        8.318401,
-                        8.042292,
-                        7.783608,
-                        7.50226,
-                        7.236041,
-                        7.035602,
-                    ],
-                },
-            ),
-            (
-                0,
-                "cuda",
-                0.1,
-                1,
-                12,
-                {
-                    12: [
-                        10.618382,
-                        10.083632,
-                        9.604639,
-                        9.260109,
-                        8.920504,
-                        8.595082,
-                        8.322799,
-                        8.047493,
-                        7.78929,
-                        7.508382,
-                        7.242587,
-                        7.042367,
-                    ],
-                    14: [
-                        10.618382,
-                        10.083632,
-                        9.604639,
-                        9.260109,
-                        8.920504,
-                        8.595082,
-                        8.322799,
-                        8.047493,
-                        7.78929,
-                        7.508382,
-                        7.242587,
-                        7.042367,
-                    ],
-                },
-            ),
-            (
-                42,
-                "cuda",
-                1.0,
-                1,
-                12,
-                {
-                    12: [
-                        10.68639,
-                        10.102986,
-                        9.647681,
-                        9.293091,
-                        8.958928,
-                        8.625297,
-                        8.351107,
-                        8.079577,
-                        7.840723,
-                        7.543044,
-                        7.284141,
-                        7.072688,
-                    ],
-                    14: [
-                        10.68639,
-                        10.102986,
-                        9.647681,
-                        9.293091,
-                        8.958928,
-                        8.625297,
-                        8.351107,
-                        8.079577,
-                        7.840723,
-                        7.543044,
-                        7.284141,
-                        7.072688,
-                    ],
-                },
-            ),
-            (
-                42,
-                "cuda",
-                0.1,
-                1,
-                12,
-                {
-                    12: [
-                        10.68639,
-                        10.103672,
-                        9.649025,
-                        9.295167,
-                        8.961777,
-                        8.629059,
-                        8.355571,
-                        8.084871,
-                        7.846589,
-                        7.549438,
-                        7.290722,
-                        7.079446,
-                    ],
-                    14: [
-                        10.697515,
-                        10.22967,
-                        9.766556,
-                        9.430037,
-                        9.083106,
-                        8.718601,
-                        8.463726,
-                        8.17396,
-                        7.945755,
-                        7.660188,
-                        7.396963,
-                        7.172944,
-                    ],
-                },
-            ),
-        ]
-
-
-@pytest.mark.parametrize(
-    "seed,device,max_norm_clip,gradient_accumulation_steps,total_steps,expected_loss", _adam_max_norm_clip_data()
-)
-def testORTTrainerAdamMaxNormClip(seed, device, max_norm_clip, gradient_accumulation_steps, total_steps, expected_loss):
-    rtol = 1e-5
-    torch.manual_seed(seed)
-    set_seed(seed)
-
-    # Setup ORTTrainer
-    options = orttrainer.ORTTrainerOptions(
-        {
-            "device": {"id": device},
-            "batch": {"gradient_accumulation_steps": gradient_accumulation_steps},
-            "debug": {"deterministic_compute": True},
-        }
-    )
-    model, model_desc, my_loss, batcher_fn, train_data, _, _ = _test_commons._load_pytorch_transformer_model(device)
-    optim_config = optim.AdamConfig(lr=0.001, max_norm_clip=max_norm_clip)
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, loss_fn=my_loss, options=options)
-
-    # Training loop
-    actual_loss = []
-    for i in range(total_steps):
-        data, targets = batcher_fn(train_data, i)
-        loss, _ = trainer.train_step(data, targets)
-        actual_loss.append(loss.cpu().item())
-
-    # Compare legacy vs experimental APIs
-    assert trainer._onnx_model is not None
-    opset = get_model_opset(trainer._onnx_model)
-    _test_helpers.assert_model_outputs(expected_loss[opset], actual_loss, rtol=rtol)
-
-
-def _lamb_max_norm_clip_data():
-    device_capability_major = torch.cuda.get_device_capability()[0]
-    if device_capability_major == 7:  # V100 for Dev machine
-        return [
-            (
-                0,
-                "cuda",
-                1.0,
-                1,
-                12,
-                {
-                    12: [
-                        10.592951,
-                        10.487728,
-                        10.422251,
-                        10.350913,
-                        10.244248,
-                        10.213003,
-                        10.129222,
-                        10.095112,
-                        10.035983,
-                        9.974586,
-                        9.909771,
-                        9.874278,
-                    ],
-                    14: [
-                        10.584141,
-                        10.497192,
-                        10.389251,
-                        10.286045,
-                        10.231354,
-                        10.17018,
-                        10.066779,
-                        10.048138,
-                        9.958029,
-                        9.8908,
-                        9.82965,
-                        9.755484,
-                    ],
-                },
-            ),
-            (
-                0,
-                "cuda",
-                0.1,
-                1,
-                12,
-                {
-                    12: [
-                        10.592951,
-                        10.452503,
-                        10.349832,
-                        10.245314,
-                        10.106587,
-                        10.046009,
-                        9.934781,
-                        9.875164,
-                        9.792067,
-                        9.704592,
-                        9.617104,
-                        9.563070,
-                    ],
-                    14: [
-                        10.584141,
-                        10.461154,
-                        10.315399,
-                        10.178979,
-                        10.092329,
-                        9.999928,
-                        9.869949,
-                        9.824564,
-                        9.707565,
-                        9.61643,
-                        9.532847,
-                        9.439593,
-                    ],
-                },
-            ),
-            (
-                42,
-                "cuda",
-                1.0,
-                1,
-                12,
-                {
-                    12: [
-                        10.647908,
-                        10.566276,
-                        10.476154,
-                        10.406275,
-                        10.311079,
-                        10.240053,
-                        10.196469,
-                        10.113955,
-                        10.117376,
-                        10.013077,
-                        9.930301,
-                        9.893368,
-                    ],
-                    14: [
-                        10.697515,
-                        10.631279,
-                        10.528757,
-                        10.496689,
-                        10.411219,
-                        10.322109,
-                        10.297314,
-                        10.215549,
-                        10.149698,
-                        10.087336,
-                        10.010884,
-                        9.934544,
-                    ],
-                },
-            ),
-            (
-                42,
-                "cuda",
-                0.1,
-                1,
-                12,
-                {
-                    12: [
-                        10.647908,
-                        10.531957,
-                        10.405246,
-                        10.302971,
-                        10.176583,
-                        10.075583,
-                        10.005772,
-                        9.897825,
-                        9.875748,
-                        9.748932,
-                        9.642885,
-                        9.586762,
-                    ],
-                    14: [
-                        10.697515,
-                        10.596729,
-                        10.457815,
-                        10.393475,
-                        10.277581,
-                        10.158909,
-                        10.108126,
-                        10.000326,
-                        9.912526,
-                        9.826057,
-                        9.727899,
-                        9.633768,
-                    ],
-                },
-            ),
-        ]
-    elif device_capability_major == 5:  # M60 for CI machines (Python Packaging Pipeline)
-        return [
-            (
-                0,
-                "cuda",
-                1.0,
-                1,
-                12,
-                {
-                    12: [
-                        10.618382,
-                        10.50222,
-                        10.403347,
-                        10.35298,
-                        10.288447,
-                        10.237399,
-                        10.184225,
-                        10.089048,
-                        10.008952,
-                        9.972644,
-                        9.897674,
-                        9.84524,
-                    ],
-                    14: [0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4],
-                },
-            ),
-            (
-                0,
-                "cuda",
-                0.1,
-                1,
-                12,
-                {
-                    12: [
-                        10.618382,
-                        10.466732,
-                        10.330871,
-                        10.24715,
-                        10.150972,
-                        10.069127,
-                        9.98974,
-                        9.870169,
-                        9.763693,
-                        9.704323,
-                        9.605957,
-                        9.533117,
-                    ],
-                    14: [1, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4],
-                },
-            ),
-            (
-                42,
-                "cuda",
-                1.0,
-                1,
-                12,
-                {
-                    12: [
-                        10.68639,
-                        10.511692,
-                        10.447308,
-                        10.405255,
-                        10.334866,
-                        10.261473,
-                        10.169422,
-                        10.107138,
-                        10.069889,
-                        9.97798,
-                        9.928105,
-                        9.896435,
-                    ],
-                    14: [2, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4],
-                },
-            ),
-            (
-                42,
-                "cuda",
-                0.1,
-                1,
-                12,
-                {
-                    12: [
-                        10.68639,
-                        10.477489,
-                        10.376671,
-                        10.301725,
-                        10.200718,
-                        10.098477,
-                        9.97995,
-                        9.890104,
-                        9.828899,
-                        9.713555,
-                        9.639567,
-                        9.589856,
-                    ],
-                    14: [3, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4],
-                },
-            ),
-        ]
-
-
-@pytest.mark.parametrize(
-    "seed,device,max_norm_clip, gradient_accumulation_steps,total_steps,expected_loss", _lamb_max_norm_clip_data()
-)
-def testORTTrainerLambMaxNormClip(seed, device, max_norm_clip, gradient_accumulation_steps, total_steps, expected_loss):
-    rtol = 1e-3
-    torch.manual_seed(seed)
-    set_seed(seed)
-
-    # Setup ORTTrainer
-    options = orttrainer.ORTTrainerOptions(
-        {
-            "device": {"id": device},
-            "batch": {"gradient_accumulation_steps": gradient_accumulation_steps},
-            "debug": {"deterministic_compute": True},
-        }
-    )
-    model, model_desc, my_loss, batcher_fn, train_data, _, _ = _test_commons._load_pytorch_transformer_model(device)
-    optim_config = optim.LambConfig(lr=0.001, max_norm_clip=max_norm_clip)
-    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, loss_fn=my_loss, options=options)
-
-    # Training loop
-    actual_loss = []
-    for i in range(total_steps):
-        data, targets = batcher_fn(train_data, i)
-        loss, _ = trainer.train_step(data, targets)
-        actual_loss.append(loss.cpu().item())
-
-    # Compare legacy vs experimental APIs
-    opset = get_model_opset(trainer._onnx_model)
-    _test_helpers.assert_model_outputs(expected_loss[opset], actual_loss, rtol=rtol)
diff --git a/orttraining/orttraining/test/python/orttraining_test_transformers.py b/orttraining/orttraining/test/python/orttraining_test_transformers.py
deleted file mode 100644
index dbaf4a293c466..0000000000000
--- a/orttraining/orttraining/test/python/orttraining_test_transformers.py
+++ /dev/null
@@ -1,480 +0,0 @@
-import random
-import unittest
-
-import numpy as np
-import torch
-from numpy.testing import assert_allclose
-from orttraining_test_data_loader import BatchArgsOption, ids_tensor
-from orttraining_test_utils import get_lr, run_test
-from transformers import BertConfig, BertForPreTraining
-
-import onnxruntime
-from onnxruntime.capi.ort_trainer import IODescription, LossScaler, ModelDescription, ORTTrainer  # noqa: F401
-
-
-class BertModelTest(unittest.TestCase):
-    class BertModelTester:
-        def __init__(
-            self,
-            parent,
-            batch_size=13,
-            seq_length=7,
-            is_training=True,
-            use_input_mask=True,
-            use_token_type_ids=True,
-            use_labels=True,
-            vocab_size=99,
-            hidden_size=32,
-            num_hidden_layers=5,
-            num_attention_heads=4,
-            intermediate_size=37,
-            hidden_act="gelu",
-            hidden_dropout_prob=0.1,
-            attention_probs_dropout_prob=0.1,
-            max_position_embeddings=512,
-            type_vocab_size=16,
-            type_sequence_label_size=2,
-            initializer_range=0.02,
-            num_labels=3,
-            num_choices=4,
-            scope=None,
-            device="cpu",
-        ):
-            self.parent = parent
-            self.batch_size = batch_size
-            self.seq_length = seq_length
-            self.is_training = is_training
-            self.use_input_mask = use_input_mask
-            self.use_token_type_ids = use_token_type_ids
-            self.use_labels = use_labels
-            self.vocab_size = vocab_size
-            self.hidden_size = hidden_size
-            self.num_hidden_layers = num_hidden_layers
-            self.num_attention_heads = num_attention_heads
-            self.intermediate_size = intermediate_size
-            self.hidden_act = hidden_act
-            self.hidden_dropout_prob = hidden_dropout_prob
-            self.attention_probs_dropout_prob = attention_probs_dropout_prob
-            self.max_position_embeddings = max_position_embeddings
-            self.type_vocab_size = type_vocab_size
-            self.type_sequence_label_size = type_sequence_label_size
-            self.initializer_range = initializer_range
-            self.num_labels = num_labels
-            self.num_choices = num_choices
-            self.scope = scope
-            self.device = device
-
-            # 1. superset of bert input/output descs
-            # see BertPreTrainedModel doc
-            self.input_ids_desc = IODescription(
-                "input_ids", ["batch", "max_seq_len_in_batch"], torch.int64, num_classes=self.vocab_size
-            )
-            self.attention_mask_desc = IODescription(
-                "attention_mask", ["batch", "max_seq_len_in_batch"], torch.int64, num_classes=2
-            )
-            self.token_type_ids_desc = IODescription(
-                "token_type_ids", ["batch", "max_seq_len_in_batch"], torch.int64, num_classes=2
-            )
-            self.position_ids_desc = IODescription(
-                "position_ids", ["batch", "max_seq_len_in_batch"], torch.int64, num_classes=self.max_position_embeddings
-            )
-            self.head_mask_desc = IODescription(
-                "head_mask", [self.num_hidden_layers, self.num_attention_heads], torch.int64, num_classes=2
-            )
-            self.inputs_embeds_desc = IODescription(
-                "inputs_embeds", ["batch", "max_seq_len_in_batch", self.hidden_size], torch.float32
-            )
-
-            self.encoder_hidden_states_desc = IODescription(
-                "encoder_hidden_states", ["batch", "max_seq_len_in_batch", self.hidden_size], torch.float32
-            )
-            self.encoder_attention_mask_desc = IODescription(
-                "encoder_attention_mask", ["batch", "max_seq_len_in_batch"], torch.float32
-            )
-
-            # see BertForPreTraining doc
-            self.masked_lm_labels_desc = IODescription(
-                "masked_lm_labels", ["batch", "max_seq_len_in_batch"], torch.int64, num_classes=self.vocab_size
-            )
-            self.next_sentence_label_desc = IODescription(
-                "next_sentence_label",
-                [
-                    "batch",
-                ],
-                torch.int64,
-                num_classes=2,
-            )
-
-            # outputs
-            self.loss_desc = IODescription(
-                "loss",
-                [
-                    1,
-                ],
-                torch.float32,
-            )
-            self.prediction_scores_desc = IODescription(
-                "prediction_scores", ["batch", "max_seq_len_in_batch", self.vocab_size], torch.float32
-            )
-
-            self.seq_relationship_scores_desc = IODescription(
-                "seq_relationship_scores", ["batch", 2], torch.float32
-            )  # IODescription('seq_relationship_scores', ['batch', 'max_seq_len_in_batch', 2], torch.float32)
-            self.hidden_states_desc = IODescription(
-                "hidden_states",
-                [self.num_hidden_layers, "batch", "max_seq_len_in_batch", self.hidden_size],
-                torch.float32,
-            )
-            self.attentions_desc = IODescription(
-                "attentions",
-                [
-                    self.num_hidden_layers,
-                    "batch",
-                    self.num_attention_heads,
-                    "max_seq_len_in_batch",
-                    "max_seq_len_in_batch",
-                ],
-                torch.float32,
-            )
-            self.last_hidden_state_desc = IODescription(
-                "last_hidden_state", ["batch", "max_seq_len_in_batch", self.hidden_size], torch.float32
-            )
-            self.pooler_output_desc = IODescription("pooler_output", ["batch", self.hidden_size], torch.float32)
-
-        def BertForPreTraining_descs(self):
-            return ModelDescription(
-                [
-                    self.input_ids_desc,
-                    self.attention_mask_desc,
-                    self.token_type_ids_desc,
-                    self.masked_lm_labels_desc,
-                    self.next_sentence_label_desc,
-                ],
-                # returns loss_desc if both masked_lm_labels_desc, next_sentence_label are provided
-                # hidden_states_desc, attentions_desc shall be included according to config.output_attentions, config.output_hidden_states
-                [
-                    self.loss_desc,
-                    self.prediction_scores_desc,
-                    self.seq_relationship_scores_desc,
-                    # hidden_states_desc, attentions_desc
-                ],
-            )
-
-        def prepare_config_and_inputs(self):
-            input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).to(self.device)
-
-            input_mask = None
-            if self.use_input_mask:
-                input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2).to(self.device)
-
-            token_type_ids = None
-            if self.use_token_type_ids:
-                token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size).to(self.device)
-
-            sequence_labels = None
-            token_labels = None
-            choice_labels = None
-            if self.use_labels:
-                sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size).to(self.device)
-                token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels).to(self.device)
-                choice_labels = ids_tensor([self.batch_size], self.num_choices).to(self.device)
-
-            config = BertConfig(
-                vocab_size=self.vocab_size,
-                vocab_size_or_config_json_file=self.vocab_size,
-                hidden_size=self.hidden_size,
-                num_hidden_layers=self.num_hidden_layers,
-                num_attention_heads=self.num_attention_heads,
-                intermediate_size=self.intermediate_size,
-                hidden_act=self.hidden_act,
-                hidden_dropout_prob=self.hidden_dropout_prob,
-                attention_probs_dropout_prob=self.attention_probs_dropout_prob,
-                max_position_embeddings=self.max_position_embeddings,
-                type_vocab_size=self.type_vocab_size,
-                is_decoder=False,
-                initializer_range=self.initializer_range,
-            )
-
-            return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
-
-        def create_and_check_bert_for_pretraining(
-            self,
-            config,
-            input_ids,
-            token_type_ids,
-            input_mask,
-            sequence_labels,
-            token_labels,
-            choice_labels,
-            option_fp16,
-            option_allreduce_post_accumulation,
-            option_gradient_accumulation_steps,
-            option_split_batch,
-            option_use_internal_get_lr_this_step=[True],  # noqa: B006
-            option_use_internal_loss_scaler=[True],  # noqa: B006
-        ):
-            seed = 42
-            random.seed(seed)
-            np.random.seed(seed)
-            torch.manual_seed(seed)
-            torch.cuda.manual_seed_all(seed)
-            onnxruntime.set_seed(seed)
-
-            model = BertForPreTraining(config=config)
-            model.eval()
-            loss, prediction_scores, seq_relationship_score = model(
-                input_ids,
-                attention_mask=input_mask,
-                token_type_ids=token_type_ids,
-                masked_lm_labels=token_labels,
-                next_sentence_label=sequence_labels,
-            )
-            model_desc = ModelDescription(
-                [
-                    self.input_ids_desc,
-                    self.attention_mask_desc,
-                    self.token_type_ids_desc,
-                    self.masked_lm_labels_desc,
-                    self.next_sentence_label_desc,
-                ],
-                [self.loss_desc, self.prediction_scores_desc, self.seq_relationship_scores_desc],
-            )
-
-            from collections import namedtuple
-
-            MyArgs = namedtuple(
-                "MyArgs", "local_rank world_size max_steps learning_rate warmup_proportion batch_size seq_len"
-            )
-
-            dataset_len = 100
-            epochs = 8
-            max_steps = epochs * dataset_len
-            args = MyArgs(
-                local_rank=0,
-                world_size=1,
-                max_steps=max_steps,
-                learning_rate=0.00001,
-                warmup_proportion=0.01,
-                batch_size=13,
-                seq_len=7,
-            )
-
-            def get_lr_this_step(global_step):
-                return get_lr(args, global_step)
-
-            loss_scaler = LossScaler("loss_scale_input_name", True, up_scale_window=2000)
-
-            for fp16 in option_fp16:
-                for allreduce_post_accumulation in option_allreduce_post_accumulation:
-                    for gradient_accumulation_steps in option_gradient_accumulation_steps:
-                        for use_internal_get_lr_this_step in option_use_internal_get_lr_this_step:
-                            for use_internal_loss_scaler in option_use_internal_loss_scaler:
-                                for split_batch in option_split_batch:
-                                    print("gradient_accumulation_steps:", gradient_accumulation_steps)
-                                    print("split_batch:", split_batch)
-
-                                    seed = 42
-                                    random.seed(seed)
-                                    np.random.seed(seed)
-                                    torch.manual_seed(seed)
-                                    torch.cuda.manual_seed_all(seed)
-                                    onnxruntime.set_seed(seed)
-
-                                    (
-                                        old_api_loss_ort,
-                                        old_api_prediction_scores_ort,
-                                        old_api_seq_relationship_score_ort,
-                                    ) = run_test(
-                                        model,
-                                        model_desc,
-                                        self.device,
-                                        args,
-                                        gradient_accumulation_steps,
-                                        fp16,
-                                        allreduce_post_accumulation,
-                                        get_lr_this_step,
-                                        use_internal_get_lr_this_step,
-                                        loss_scaler,
-                                        use_internal_loss_scaler,
-                                        split_batch,
-                                        dataset_len,
-                                        epochs,
-                                        use_new_api=False,
-                                    )
-
-                                    random.seed(seed)
-                                    np.random.seed(seed)
-                                    torch.manual_seed(seed)
-                                    torch.cuda.manual_seed_all(seed)
-                                    onnxruntime.set_seed(seed)
-                                    if use_internal_get_lr_this_step and use_internal_loss_scaler:
-                                        (
-                                            new_api_loss_ort,
-                                            new_api_prediction_scores_ort,
-                                            new_api_seq_relationship_score_ort,
-                                        ) = run_test(
-                                            model,
-                                            model_desc,
-                                            self.device,
-                                            args,
-                                            gradient_accumulation_steps,
-                                            fp16,
-                                            allreduce_post_accumulation,
-                                            get_lr_this_step,
-                                            use_internal_get_lr_this_step,
-                                            loss_scaler,
-                                            use_internal_loss_scaler,
-                                            split_batch,
-                                            dataset_len,
-                                            epochs,
-                                            use_new_api=True,
-                                        )
-
-                                        assert_allclose(old_api_loss_ort, new_api_loss_ort)
-                                        assert_allclose(old_api_prediction_scores_ort, new_api_prediction_scores_ort)
-                                        assert_allclose(
-                                            old_api_seq_relationship_score_ort, new_api_seq_relationship_score_ort
-                                        )
-
-    def setUp(self):
-        self.model_tester = BertModelTest.BertModelTester(self)
-
-    def test_for_pretraining_mixed_precision(self):
-        # It would be better to test both with/without mixed precision and allreduce_post_accumulation.
-        # However, stress test of all the 4 cases is not stable at least on the test machine.
-        # There we only test mixed precision and allreduce_post_accumulation because it is the most useful use cases.
-        option_fp16 = [True]
-        option_allreduce_post_accumulation = [True]
-        option_gradient_accumulation_steps = [1]
-        option_split_batch = [BatchArgsOption.ListAndDict]
-        config_and_inputs = self.model_tester.prepare_config_and_inputs()
-        self.model_tester.create_and_check_bert_for_pretraining(
-            *config_and_inputs,
-            option_fp16,
-            option_allreduce_post_accumulation,
-            option_gradient_accumulation_steps,
-            option_split_batch,
-        )
-
-    def test_for_pretraining_mixed_precision_with_gradient_accumulation(self):
-        # It would be better to test both with/without mixed precision and allreduce_post_accumulation.
-        # However, stress test of all the 4 cases is not stable at least on the test machine.
-        # There we only test mixed precision and allreduce_post_accumulation because it is the most useful use cases.
-        option_fp16 = [True]
-        option_allreduce_post_accumulation = [True]
-        option_gradient_accumulation_steps = [8]
-        option_split_batch = [BatchArgsOption.ListAndDict]
-        config_and_inputs = self.model_tester.prepare_config_and_inputs()
-        self.model_tester.create_and_check_bert_for_pretraining(
-            *config_and_inputs,
-            option_fp16,
-            option_allreduce_post_accumulation,
-            option_gradient_accumulation_steps,
-            option_split_batch,
-        )
-
-    def test_for_pretraining_full_precision_all(self):
-        # This test is not stable because it create and run ORTSession multiple times.
-        # It occasionally gets seg fault at ~MemoryPattern()
-        # when releasing patterns_. In order not to block PR merging CI test,
-        # this test is broke into following individual tests.
-        option_fp16 = [False]
-        option_allreduce_post_accumulation = [True]
-        option_gradient_accumulation_steps = [1, 8]
-        option_split_batch = [BatchArgsOption.List, BatchArgsOption.Dict, BatchArgsOption.ListAndDict]
-        config_and_inputs = self.model_tester.prepare_config_and_inputs()
-        self.model_tester.create_and_check_bert_for_pretraining(
-            *config_and_inputs,
-            option_fp16,
-            option_allreduce_post_accumulation,
-            option_gradient_accumulation_steps,
-            option_split_batch,
-        )
-
-    def test_for_pretraining_full_precision_list_input(self):
-        option_fp16 = [False]
-        option_allreduce_post_accumulation = [True]
-        option_gradient_accumulation_steps = [1]
-        option_split_batch = [BatchArgsOption.List]
-        config_and_inputs = self.model_tester.prepare_config_and_inputs()
-        self.model_tester.create_and_check_bert_for_pretraining(
-            *config_and_inputs,
-            option_fp16,
-            option_allreduce_post_accumulation,
-            option_gradient_accumulation_steps,
-            option_split_batch,
-        )
-
-    def test_for_pretraining_full_precision_dict_input(self):
-        option_fp16 = [False]
-        option_allreduce_post_accumulation = [True]
-        option_gradient_accumulation_steps = [1]
-        option_split_batch = [BatchArgsOption.Dict]
-        config_and_inputs = self.model_tester.prepare_config_and_inputs()
-        self.model_tester.create_and_check_bert_for_pretraining(
-            *config_and_inputs,
-            option_fp16,
-            option_allreduce_post_accumulation,
-            option_gradient_accumulation_steps,
-            option_split_batch,
-        )
-
-    def test_for_pretraining_full_precision_list_and_dict_input(self):
-        option_fp16 = [False]
-        option_allreduce_post_accumulation = [True]
-        option_gradient_accumulation_steps = [1]
-        option_split_batch = [BatchArgsOption.ListAndDict]
-        config_and_inputs = self.model_tester.prepare_config_and_inputs()
-        self.model_tester.create_and_check_bert_for_pretraining(
-            *config_and_inputs,
-            option_fp16,
-            option_allreduce_post_accumulation,
-            option_gradient_accumulation_steps,
-            option_split_batch,
-        )
-
-    def test_for_pretraining_full_precision_grad_accumulation_list_input(self):
-        option_fp16 = [False]
-        option_allreduce_post_accumulation = [True]
-        option_gradient_accumulation_steps = [8]
-        option_split_batch = [BatchArgsOption.List]
-        config_and_inputs = self.model_tester.prepare_config_and_inputs()
-        self.model_tester.create_and_check_bert_for_pretraining(
-            *config_and_inputs,
-            option_fp16,
-            option_allreduce_post_accumulation,
-            option_gradient_accumulation_steps,
-            option_split_batch,
-        )
-
-    def test_for_pretraining_full_precision_grad_accumulation_dict_input(self):
-        option_fp16 = [False]
-        option_allreduce_post_accumulation = [True]
-        option_gradient_accumulation_steps = [8]
-        option_split_batch = [BatchArgsOption.Dict]
-        config_and_inputs = self.model_tester.prepare_config_and_inputs()
-        self.model_tester.create_and_check_bert_for_pretraining(
-            *config_and_inputs,
-            option_fp16,
-            option_allreduce_post_accumulation,
-            option_gradient_accumulation_steps,
-            option_split_batch,
-        )
-
-    def test_for_pretraining_full_precision_grad_accumulation_list_and_dict_input(self):
-        option_fp16 = [False]
-        option_allreduce_post_accumulation = [True]
-        option_gradient_accumulation_steps = [8]
-        option_split_batch = [BatchArgsOption.ListAndDict]
-        config_and_inputs = self.model_tester.prepare_config_and_inputs()
-        self.model_tester.create_and_check_bert_for_pretraining(
-            *config_and_inputs,
-            option_fp16,
-            option_allreduce_post_accumulation,
-            option_gradient_accumulation_steps,
-            option_split_batch,
-        )
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/orttraining/orttraining/test/python/orttraining_test_utils.py b/orttraining/orttraining/test/python/orttraining_test_utils.py
deleted file mode 100644
index 527cfb8a0ba7d..0000000000000
--- a/orttraining/orttraining/test/python/orttraining_test_utils.py
+++ /dev/null
@@ -1,246 +0,0 @@
-import math
-
-import torch
-from orttraining_test_data_loader import BatchArgsOption, create_ort_test_dataloader, split_batch
-
-from onnxruntime.capi.ort_trainer import IODescription, ORTTrainer
-from onnxruntime.training import amp, optim, orttrainer
-from onnxruntime.training.optim import _LRScheduler
-
-
-def warmup_cosine(x, warmup=0.002):
-    if x < warmup:
-        return x / warmup
-    return 0.5 * (1.0 + torch.cos(math.pi * x))
-
-
-def warmup_constant(x, warmup=0.002):
-    if x < warmup:
-        return x / warmup
-    return 1.0
-
-
-def warmup_linear(x, warmup=0.002):
-    if x < warmup:
-        return x / warmup
-    return max((x - 1.0) / (warmup - 1.0), 0.0)
-
-
-def warmup_poly(x, warmup=0.002, degree=0.5):
-    if x < warmup:
-        return x / warmup
-    return (1.0 - x) ** degree
-
-
-SCHEDULES = {
-    "warmup_cosine": warmup_cosine,
-    "warmup_constant": warmup_constant,
-    "warmup_linear": warmup_linear,
-    "warmup_poly": warmup_poly,
-}
-
-
-def get_lr(args, training_steps, schedule="warmup_poly"):
-    if args.max_steps == -1:
-        return args.learning_rate
-
-    schedule_fct = SCHEDULES[schedule]
-    return args.learning_rate * schedule_fct(training_steps / args.max_steps, args.warmup_proportion)
-
-
-def map_optimizer_attributes(name):
-    no_decay_keys = ["bias", "gamma", "beta", "LayerNorm"]
-    no_decay = any(no_decay_key in name for no_decay_key in no_decay_keys)
-    if no_decay:
-        return {"alpha": 0.9, "beta": 0.999, "lambda": 0.0, "epsilon": 1e-6}
-    else:
-        return {"alpha": 0.9, "beta": 0.999, "lambda": 0.0, "epsilon": 1e-6}
-
-
-class WrapLRScheduler(_LRScheduler):
-    def __init__(self, get_lr_this_step):
-        super().__init__()
-        self.get_lr_this_step = get_lr_this_step
-
-    def get_lr(self, train_step_info):
-        return [self.get_lr_this_step(train_step_info.optimization_step)]
-
-
-def run_test(
-    model,
-    model_desc,
-    device,
-    args,
-    gradient_accumulation_steps,
-    fp16,
-    allreduce_post_accumulation,
-    get_lr_this_step,
-    use_internal_get_lr_this_step,
-    loss_scaler,
-    use_internal_loss_scaler,
-    batch_args_option,
-    dataset_len,
-    epochs,
-    use_new_api,
-):
-    dataloader = create_ort_test_dataloader(model_desc.inputs_, args.batch_size, args.seq_len, dataset_len, device)
-
-    if use_new_api:
-        assert use_internal_loss_scaler, "new api should always use internal loss scaler"
-
-        new_api_lr_scheduler = WrapLRScheduler(get_lr_this_step)
-
-        new_api_loss_scaler = amp.DynamicLossScaler() if fp16 else None
-        options = orttrainer.ORTTrainerOptions(
-            {
-                "batch": {"gradient_accumulation_steps": gradient_accumulation_steps},
-                "device": {"id": device},
-                "mixed_precision": {"enabled": fp16, "loss_scaler": new_api_loss_scaler},
-                "debug": {
-                    "deterministic_compute": True,
-                },
-                "utils": {"grad_norm_clip": True},
-                "distributed": {"allreduce_post_accumulation": True},
-                "lr_scheduler": new_api_lr_scheduler,
-            }
-        )
-
-        param_optimizer = list(model.named_parameters())
-        params = [
-            {
-                "params": [n for n, p in param_optimizer if "bias" in n or "LayerNorm.weight" in n],
-                "alpha": 0.9,
-                "beta": 0.999,
-                "lambda": 0.0,
-                "epsilon": 1e-6,
-            },
-            {
-                "params": [n for n, p in param_optimizer if not ("bias" in n or "LayerNorm.weight" in n)],
-                "alpha": 0.9,
-                "beta": 0.999,
-                "lambda": 0.0,
-                "epsilon": 1e-6,
-            },
-        ]
-
-        vocab_size = 99
-        new_model_desc = {
-            "inputs": [
-                (
-                    "input_ids",
-                    ["batch", "max_seq_len_in_batch"],
-                ),
-                (
-                    "attention_mask",
-                    ["batch", "max_seq_len_in_batch"],
-                ),
-                (
-                    "token_type_ids",
-                    ["batch", "max_seq_len_in_batch"],
-                ),
-                (
-                    "masked_lm_labels",
-                    ["batch", "max_seq_len_in_batch"],
-                ),
-                (
-                    "next_sentence_label",
-                    [
-                        "batch",
-                    ],
-                ),
-            ],
-            "outputs": [
-                (
-                    "loss",
-                    [
-                        1,
-                    ],
-                    True,
-                ),
-                ("prediction_scores", ["batch", "max_seq_len_in_batch", vocab_size]),
-                ("seq_relationship_scores", ["batch", 2]),
-            ],
-        }
-
-        optim_config = optim.LambConfig(params=params, lr=2e-5)
-        model = orttrainer.ORTTrainer(model, new_model_desc, optim_config, options=options)
-        print("running with new frontend API")
-    else:
-        model = ORTTrainer(
-            model,
-            None,
-            model_desc,
-            "LambOptimizer",
-            map_optimizer_attributes=map_optimizer_attributes,
-            learning_rate_description=IODescription(
-                "Learning_Rate",
-                [
-                    1,
-                ],
-                torch.float32,
-            ),
-            device=device,
-            _enable_internal_postprocess=True,
-            gradient_accumulation_steps=gradient_accumulation_steps,
-            # BertLAMB default initial settings: b1=0.9, b2=0.999, e=1e-6
-            world_rank=args.local_rank,
-            world_size=args.world_size,
-            use_mixed_precision=fp16,
-            allreduce_post_accumulation=allreduce_post_accumulation,
-            get_lr_this_step=get_lr_this_step if use_internal_get_lr_this_step else None,
-            loss_scaler=loss_scaler if use_internal_loss_scaler else None,
-            _opset_version=14,
-            _use_deterministic_compute=True,
-        )
-        print("running with old frontend API")
-
-    # training loop
-    eval_batch = None
-    if not use_new_api:
-        model.train()
-    for _epoch in range(epochs):
-        for step, batch in enumerate(dataloader):
-            if eval_batch is None:
-                eval_batch = batch
-
-            if not use_internal_get_lr_this_step:
-                lr = get_lr_this_step(step)
-                learning_rate = torch.tensor([lr])
-
-            if not use_internal_loss_scaler and fp16:
-                loss_scale = torch.tensor([loss_scaler.loss_scale_])
-
-            if batch_args_option == BatchArgsOption.List:
-                if not use_internal_get_lr_this_step:
-                    batch = [*batch, learning_rate]  # noqa: PLW2901
-                if not use_internal_loss_scaler and fp16:
-                    batch = [*batch, loss_scale]  # noqa: PLW2901
-                outputs = model.train_step(*batch)
-            elif batch_args_option == BatchArgsOption.Dict:
-                args, kwargs = split_batch(batch, model_desc.inputs_, 0)
-                if not use_internal_get_lr_this_step:
-                    kwargs["Learning_Rate"] = learning_rate
-                if not use_internal_loss_scaler and fp16:
-                    kwargs[model.loss_scale_input_name] = loss_scale
-                outputs = model.train_step(*args, **kwargs)
-            else:
-                args_count = int(len(model_desc.inputs_) / 2)  # approx helf args, half kwargs
-                args, kwargs = split_batch(batch, model_desc.inputs_, args_count)
-                if not use_internal_get_lr_this_step:
-                    kwargs["Learning_Rate"] = learning_rate
-                if not use_internal_loss_scaler and fp16:
-                    kwargs[model.loss_scale_input_name] = loss_scale
-                outputs = model.train_step(*args, **kwargs)
-
-    # eval
-    if batch_args_option == BatchArgsOption.List:
-        outputs = model.eval_step(*batch)
-    elif batch_args_option == BatchArgsOption.Dict:
-        args, kwargs = split_batch(batch, model_desc.inputs_, 0)
-        outputs = model.eval_step(*args, **kwargs)
-    else:
-        args_count = int(len(model_desc.inputs_) / 2)  # approx helf args, half kwargs
-        args, kwargs = split_batch(batch, model_desc.inputs_, args_count)
-        outputs = model.eval_step(*args, **kwargs)
-
-    return (output.cpu().numpy() for output in outputs)
diff --git a/orttraining/orttraining/test/python/orttraining_transformer_trainer.py b/orttraining/orttraining/test/python/orttraining_transformer_trainer.py
deleted file mode 100644
index bce726871bacf..0000000000000
--- a/orttraining/orttraining/test/python/orttraining_transformer_trainer.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# adapted from Trainer.py of huggingface transformers
-
-import json
-import logging
-import os
-import random
-from typing import Callable, Dict, List, NamedTuple, Optional
-
-import numpy as np
-import torch
-from torch.utils.data.dataloader import DataLoader
-from torch.utils.data.dataset import Dataset
-from torch.utils.data.distributed import DistributedSampler
-from torch.utils.data.sampler import SequentialSampler
-from tqdm import tqdm, trange
-from transformers.data.data_collator import DefaultDataCollator
-from transformers.modeling_utils import PreTrainedModel
-from transformers.training_args import TrainingArguments
-
-import onnxruntime
-from onnxruntime.training import amp, optim, orttrainer
-
-try:
-    from torch.utils.tensorboard import SummaryWriter
-
-    _has_tensorboard = True
-except ImportError:
-    try:
-        from tensorboardX import SummaryWriter  # noqa: F401
-
-        _has_tensorboard = True
-    except ImportError:
-        _has_tensorboard = False
-
-
-def is_tensorboard_available():
-    return _has_tensorboard
-
-
-logger = logging.getLogger(__name__)
-
-
-def set_seed(seed: int):
-    random.seed(seed)
-    np.random.seed(seed)
-    torch.manual_seed(seed)
-    torch.cuda.manual_seed_all(seed)
-    onnxruntime.set_seed(seed)
-
-
-class EvalPrediction(NamedTuple):
-    predictions: np.ndarray
-    label_ids: np.ndarray
-
-
-class PredictionOutput(NamedTuple):
-    predictions: np.ndarray
-    label_ids: Optional[np.ndarray]
-    metrics: Optional[Dict[str, float]]
-
-
-class TrainOutput(NamedTuple):
-    global_step: int
-    training_loss: float
-
-
-def get_linear_schedule_with_warmup(num_warmup_steps, num_training_steps, base_lr):
-    def lr_lambda_linear(current_step):
-        if current_step < num_warmup_steps:
-            return float(current_step) / float(max(1, num_warmup_steps))
-        return max(0.0, float(num_training_steps - current_step) / float(max(1, num_training_steps - num_warmup_steps)))
-
-    def lambda_lr_get_lr(current_global_step):
-        # LambdaLR increment self.last_epoch at evert sept()
-        return base_lr * lr_lambda_linear(current_global_step)
-
-    return lambda_lr_get_lr
-
-
-class ORTTransformerTrainer:
-    """ """
-
-    model: PreTrainedModel
-    args: TrainingArguments
-    train_dataset: Dataset
-    eval_dataset: Dataset
-    compute_metrics: Callable[[EvalPrediction], Dict]
-
-    def __init__(
-        self,
-        model: PreTrainedModel,
-        model_desc: dict,
-        args: TrainingArguments,
-        train_dataset: Dataset,
-        eval_dataset: Dataset,
-        compute_metrics: Callable[[EvalPrediction], Dict],
-        world_size: Optional[int] = 1,
-    ):
-        """ """
-
-        self.model = model
-        self.model_desc = model_desc
-        self.args = args
-        self.world_size = world_size
-        self.data_collator = DefaultDataCollator()
-        self.train_dataset = train_dataset
-        self.eval_dataset = eval_dataset
-        self.compute_metrics = compute_metrics
-        set_seed(self.args.seed)
-        # Create output directory if needed
-        if self.args.local_rank in [-1, 0]:
-            os.makedirs(self.args.output_dir, exist_ok=True)
-
-    def get_train_dataloader(self) -> DataLoader:
-        if self.train_dataset is None:
-            raise ValueError("Trainer: training requires a train_dataset.")
-        train_sampler = (
-            SequentialSampler(self.train_dataset)
-            if self.args.local_rank == -1
-            else DistributedSampler(self.train_dataset)
-        )
-        return DataLoader(
-            self.train_dataset,
-            batch_size=self.args.train_batch_size,
-            sampler=train_sampler,
-            collate_fn=self.data_collator.collate_batch,
-        )
-
-    def get_eval_dataloader(self) -> DataLoader:
-        return DataLoader(
-            self.eval_dataset,
-            batch_size=self.args.eval_batch_size,
-            shuffle=False,
-            collate_fn=self.data_collator.collate_batch,
-        )
-
-    def get_test_dataloader(self, test_dataset: Dataset) -> DataLoader:
-        # We use the same batch_size as for eval.
-        return DataLoader(
-            test_dataset,
-            batch_size=self.args.eval_batch_size,
-            shuffle=False,
-            collate_fn=self.data_collator.collate_batch,
-        )
-
-    def train(self):
-        """
-        Main training entry point.
-        """
-        train_dataloader = self.get_train_dataloader()
-
-        if self.args.max_steps > 0:
-            t_total = self.args.max_steps
-            num_train_epochs = (
-                self.args.max_steps // (len(train_dataloader) // self.args.gradient_accumulation_steps) + 1
-            )
-        else:
-            t_total = int(len(train_dataloader) // self.args.gradient_accumulation_steps * self.args.num_train_epochs)
-            num_train_epochs = self.args.num_train_epochs
-
-        lr_scheduler = orttrainer.optim.LinearWarmupLRScheduler(t_total, self.args.warmup_steps / float(t_total))
-
-        loss_scaler = amp.DynamicLossScaler() if self.args.fp16 else None
-        device = self.args.device.type
-
-        device = f"{device}:{self.args.device.index}" if self.args.device.index else f"{device}:0"
-        options = orttrainer.ORTTrainerOptions(
-            {
-                "batch": {"gradient_accumulation_steps": self.args.gradient_accumulation_steps},
-                "device": {"id": device},
-                "mixed_precision": {"enabled": self.args.fp16, "loss_scaler": loss_scaler},
-                "debug": {
-                    "deterministic_compute": True,
-                },
-                "utils": {"grad_norm_clip": False},
-                "distributed": {
-                    # we are running single node multi gpu test. thus world_rank = local_rank
-                    # and world_size = self.args.n_gpu
-                    "world_rank": max(0, self.args.local_rank),
-                    "world_size": int(self.world_size),
-                    "local_rank": max(0, self.args.local_rank),
-                    "allreduce_post_accumulation": True,
-                },
-                "lr_scheduler": lr_scheduler,
-            }
-        )
-
-        param_optimizer = list(self.model.named_parameters())
-        params = [
-            {
-                "params": [n for n, p in param_optimizer if "bias" in n or "LayerNorm.weight" in n],
-                "weight_decay_mode": 1,
-            },
-            {
-                "params": [n for n, p in param_optimizer if not ("bias" in n or "LayerNorm.weight" in n)],
-                "weight_decay_mode": 1,
-            },
-        ]
-
-        optim_config = optim.AdamConfig(params=params, lr=2e-5, do_bias_correction=True)
-        self.model = orttrainer.ORTTrainer(self.model, self.model_desc, optim_config, options=options)
-
-        # Train!
-        logger.info("***** Running training *****")
-        logger.info("  Num examples = %d", len(train_dataloader.dataset))
-        logger.info("  Num Epochs = %d", num_train_epochs)
-        logger.info("  Instantaneous batch size per GPU = %d", self.args.per_gpu_train_batch_size)
-        logger.info(
-            "  Total train batch size (w. parallel, distributed & accumulation) = %d",
-            self.args.train_batch_size
-            * self.args.gradient_accumulation_steps
-            * (torch.distributed.get_world_size() if self.args.local_rank != -1 else 1),
-        )
-        logger.info("  Gradient Accumulation steps = %d", self.args.gradient_accumulation_steps)
-        logger.info("  Total optimization steps = %d", t_total)
-
-        global_step = 0
-        epochs_trained = 0
-        steps_trained_in_current_epoch = 0
-
-        tr_loss = 0.0
-        logging_loss = 0.0
-        train_iterator = trange(
-            epochs_trained,
-            int(num_train_epochs),
-            desc="Epoch",
-            disable=self.args.local_rank not in [-1, 0],
-        )
-
-        for _epoch in train_iterator:
-            epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=self.args.local_rank not in [-1, 0])
-            for step, inputs in enumerate(epoch_iterator):
-                # Skip past any already trained steps if resuming training
-                if steps_trained_in_current_epoch > 0:
-                    steps_trained_in_current_epoch -= 1
-                    continue
-
-                tr_loss += self._training_step(self.model, inputs)
-
-                if (step + 1) % self.args.gradient_accumulation_steps == 0 or (
-                    len(epoch_iterator) <= self.args.gradient_accumulation_steps and (step + 1) == len(epoch_iterator)
-                ):
-                    global_step += 1
-
-                    if self.args.local_rank in [-1, 0]:
-                        if (self.args.logging_steps > 0 and global_step % self.args.logging_steps == 0) or (
-                            global_step == 1 and self.args.logging_first_step
-                        ):
-                            logs = {}
-                            if self.args.evaluate_during_training:
-                                results = self.evaluate()
-                                for key, value in results.items():
-                                    eval_key = f"eval_{key}"
-                                    logs[eval_key] = value
-
-                            loss_scalar = (tr_loss - logging_loss) / self.args.logging_steps
-
-                            logs["loss"] = loss_scalar
-                            logging_loss = tr_loss
-
-                            epoch_iterator.write(json.dumps({**logs, **{"step": global_step}}))
-
-                if self.args.max_steps > 0 and global_step > self.args.max_steps:
-                    epoch_iterator.close()
-                    break
-            if self.args.max_steps > 0 and global_step > self.args.max_steps:
-                train_iterator.close()
-                break
-
-        logger.info("\n\nTraining completed. \n\n")
-        return TrainOutput(global_step, tr_loss / global_step)
-
-    def _training_step(self, model, inputs: Dict[str, torch.Tensor]) -> float:
-        for k, v in inputs.items():
-            inputs[k] = v.to(self.args.device)
-
-        outputs = model.train_step(**inputs)
-        loss = outputs[0]  # model outputs are always tuple in transformers (see doc)
-
-        return loss.item()
-
-    def save_model(self, output_dir: Optional[str] = None):
-        output_dir = output_dir if output_dir is not None else self.args.output_dir
-        os.makedirs(output_dir, exist_ok=True)
-        self.model.save_as_onnx(os.path.join(output_dir, "transformer.onnx"))
-
-    def evaluate(self) -> Dict[str, float]:
-        """
-        Run evaluation and return metrics.
-
-        Returns:
-            A dict containing:
-                - the eval loss
-                - the potential metrics computed from the predictions
-        """
-        eval_dataloader = self.get_eval_dataloader()
-
-        output = self._prediction_loop(eval_dataloader, description="Evaluation")
-        return output.metrics
-
-    def predict(self, test_dataset: Dataset) -> PredictionOutput:
-        """
-        Run prediction and return predictions and potential metrics.
-
-        Depending on the dataset and your use case, your test dataset may contain labels.
-        In that case, this method will also return metrics, like in evaluate().
-        """
-        test_dataloader = self.get_test_dataloader(test_dataset)
-        return self._prediction_loop(test_dataloader, description="Prediction")
-
-    def _prediction_loop(self, dataloader: DataLoader, description: str) -> PredictionOutput:
-        """
-        Prediction/evaluation loop, shared by `evaluate()` and `predict()`.
-
-        Works both with or without labels.
-        """
-
-        logger.info("***** Running %s *****", description)
-        logger.info("  Num examples = %d", len(dataloader.dataset))
-        logger.info("  Batch size = %d", dataloader.batch_size)
-        eval_losses: List[float] = []
-        preds: np.ndarray = None
-        label_ids: np.ndarray = None
-
-        for inputs in tqdm(dataloader, desc=description):
-            has_labels = any(inputs.get(k) is not None for k in ["labels", "masked_lm_labels"])
-
-            for k, v in inputs.items():
-                inputs[k] = v.to(self.args.device)
-
-            with torch.no_grad():
-                outputs = self.model.eval_step(**inputs)
-
-                if has_labels:
-                    step_eval_loss, logits = outputs[:2]
-                    eval_losses += [step_eval_loss.mean().item()]
-                else:
-                    logits = outputs[0]
-
-            if preds is None:
-                preds = logits.detach().cpu().numpy()
-            else:
-                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
-            if inputs.get("labels") is not None:
-                if label_ids is None:
-                    label_ids = inputs["labels"].detach().cpu().numpy()
-                else:
-                    label_ids = np.append(label_ids, inputs["labels"].detach().cpu().numpy(), axis=0)
-
-        if self.compute_metrics is not None and preds is not None and label_ids is not None:
-            metrics = self.compute_metrics(EvalPrediction(predictions=preds, label_ids=label_ids))
-        else:
-            metrics = {}
-        if len(eval_losses) > 0:
-            metrics["loss"] = np.mean(eval_losses)
-
-        return PredictionOutput(predictions=preds, label_ids=label_ids, metrics=metrics)
diff --git a/orttraining/orttraining/test/python/utils_multiple_choice.py b/orttraining/orttraining/test/python/utils_multiple_choice.py
deleted file mode 100644
index f425cf3d61545..0000000000000
--- a/orttraining/orttraining/test/python/utils_multiple_choice.py
+++ /dev/null
@@ -1,271 +0,0 @@
-# adapted from run_multiple_choice.py of huggingface transformers
-# https://github.com/huggingface/transformers/blob/master/examples/multiple-choice/utils_multiple_choice.py
-
-import csv
-import glob  # noqa: F401
-import json  # noqa: F401
-import logging
-import os
-from dataclasses import dataclass
-from enum import Enum
-from typing import List, Optional
-
-import torch
-import tqdm
-from filelock import FileLock
-from torch.utils.data.dataset import Dataset
-from transformers import PreTrainedTokenizer, is_tf_available, is_torch_available  # noqa: F401
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass(frozen=True)
-class InputExample:
-    """
-    A single training/test example for multiple choice
-
-    Args:
-        example_id: Unique id for the example.
-        question: string. The untokenized text of the second sequence (question).
-        contexts: list of str. The untokenized text of the first sequence (context of corresponding question).
-        endings: list of str. multiple choice's options. Its length must be equal to contexts' length.
-        label: (Optional) string. The label of the example. This should be
-        specified for train and dev examples, but not for test examples.
-    """
-
-    example_id: str
-    question: str
-    contexts: List[str]
-    endings: List[str]
-    label: Optional[str]
-
-
-@dataclass(frozen=True)
-class InputFeatures:
-    """
-    A single set of features of data.
-    Property names are the same names as the corresponding inputs to a model.
-    """
-
-    example_id: str
-    input_ids: List[List[int]]
-    attention_mask: Optional[List[List[int]]]
-    token_type_ids: Optional[List[List[int]]]
-    label: Optional[int]
-
-
-class Split(Enum):
-    train = "train"
-    dev = "dev"
-    test = "test"
-
-
-class DataProcessor:
-    """Base class for data converters for multiple choice data sets."""
-
-    def get_train_examples(self, data_dir):
-        """Gets a collection of `InputExample`s for the train set."""
-        raise NotImplementedError()
-
-    def get_dev_examples(self, data_dir):
-        """Gets a collection of `InputExample`s for the dev set."""
-        raise NotImplementedError()
-
-    def get_test_examples(self, data_dir):
-        """Gets a collection of `InputExample`s for the test set."""
-        raise NotImplementedError()
-
-    def get_labels(self):
-        """Gets the list of labels for this data set."""
-        raise NotImplementedError()
-
-
-class MultipleChoiceDataset(Dataset):
-    """
-    This will be superseded by a framework-agnostic approach
-    soon.
-    """
-
-    features: List[InputFeatures]
-
-    def __init__(
-        self,
-        data_dir: str,
-        tokenizer: PreTrainedTokenizer,
-        task: str,
-        processor: DataProcessor,
-        max_seq_length: Optional[int] = None,
-        overwrite_cache=False,
-        mode: Split = Split.train,
-    ):
-        processor = processor
-
-        cached_features_file = os.path.join(
-            data_dir,
-            "cached_{}_{}_{}_{}".format(
-                mode.value,
-                tokenizer.__class__.__name__,
-                str(max_seq_length),
-                task,
-            ),
-        )
-
-        # Make sure only the first process in distributed training processes the dataset,
-        # and the others will use the cache.
-        lock_path = cached_features_file + ".lock"
-        with FileLock(lock_path):
-            if os.path.exists(cached_features_file) and not overwrite_cache:
-                logger.info(f"Loading features from cached file {cached_features_file}")
-                self.features = torch.load(cached_features_file)
-            else:
-                logger.info(f"Creating features from dataset file at {data_dir}")
-                label_list = processor.get_labels()
-                if mode == Split.dev:
-                    examples = processor.get_dev_examples(data_dir)
-                elif mode == Split.test:
-                    examples = processor.get_test_examples(data_dir)
-                else:
-                    examples = processor.get_train_examples(data_dir)
-                logger.info("Training examples: %s", len(examples))
-                # TODO clean up all this to leverage built-in features of tokenizers
-                self.features = convert_examples_to_features(
-                    examples,
-                    label_list,
-                    max_seq_length,
-                    tokenizer,
-                    pad_on_left=bool(tokenizer.padding_side == "left"),
-                    pad_token=tokenizer.pad_token_id,
-                    pad_token_segment_id=tokenizer.pad_token_type_id,
-                )
-                logger.info("Saving features into cached file %s", cached_features_file)
-                torch.save(self.features, cached_features_file)
-
-    def __len__(self):
-        return len(self.features)
-
-    def __getitem__(self, i) -> InputFeatures:
-        return self.features[i]
-
-
-class SwagProcessor(DataProcessor):
-    """Processor for the SWAG data set."""
-
-    def get_train_examples(self, data_dir):
-        """See base class."""
-        logger.info(f"LOOKING AT {data_dir} train")
-        return self._create_examples(self._read_csv(os.path.join(data_dir, "train.csv")), "train")
-
-    def get_dev_examples(self, data_dir):
-        """See base class."""
-        logger.info(f"LOOKING AT {data_dir} dev")
-        return self._create_examples(self._read_csv(os.path.join(data_dir, "val.csv")), "dev")
-
-    def get_test_examples(self, data_dir):
-        """See base class."""
-        logger.info(f"LOOKING AT {data_dir} dev")
-        raise ValueError(
-            "For swag testing, the input file does not contain a label column. It can not be tested in current code"
-            "setting!"
-        )
-        return self._create_examples(self._read_csv(os.path.join(data_dir, "test.csv")), "test")
-
-    def get_labels(self):
-        """See base class."""
-        return ["0", "1", "2", "3"]
-
-    def _read_csv(self, input_file):
-        with open(input_file, encoding="utf-8") as f:
-            return list(csv.reader(f))
-
-    def _create_examples(self, lines: List[List[str]], type: str):
-        """Creates examples for the training and dev sets."""
-        if type == "train" and lines[0][-1] != "label":
-            raise ValueError("For training, the input file must contain a label column.")
-
-        examples = [
-            InputExample(
-                example_id=line[2],
-                question=line[5],  # in the swag dataset, the
-                # common beginning of each
-                # choice is stored in "sent2".
-                contexts=[line[4], line[4], line[4], line[4]],
-                endings=[line[7], line[8], line[9], line[10]],
-                label=line[11],
-            )
-            for line in lines[1:]  # we skip the line with the column names
-        ]
-
-        return examples
-
-
-def convert_examples_to_features(
-    examples: List[InputExample],
-    label_list: List[str],
-    max_length: int,
-    tokenizer: PreTrainedTokenizer,
-    pad_token_segment_id=0,
-    pad_on_left=False,
-    pad_token=0,
-    mask_padding_with_zero=True,
-) -> List[InputFeatures]:
-    """
-    Loads a data file into a list of `InputFeatures`
-    """
-
-    label_map = {label: i for i, label in enumerate(label_list)}
-
-    features = []
-    for ex_index, example in tqdm.tqdm(enumerate(examples), desc="convert examples to features"):
-        if ex_index % 10000 == 0:
-            logger.info("Writing example %d of %d" % (ex_index, len(examples)))
-        choices_inputs = []
-        for _ending_idx, (context, ending) in enumerate(zip(example.contexts, example.endings)):
-            text_a = context
-            if example.question.find("_") != -1:
-                # this is for cloze question
-                text_b = example.question.replace("_", ending)
-            else:
-                text_b = example.question + " " + ending
-
-            inputs = tokenizer.encode_plus(
-                text_a,
-                text_b,
-                add_special_tokens=True,
-                max_length=max_length,
-                pad_to_max_length=True,
-                return_overflowing_tokens=True,
-            )
-            if "num_truncated_tokens" in inputs and inputs["num_truncated_tokens"] > 0:
-                logger.info(
-                    "Attention! you are cropping tokens (swag task is ok). "
-                    "If you are training ARC and RACE and you are poping question + options,"
-                    "you need to try to use a bigger max seq length!"
-                )
-
-            choices_inputs.append(inputs)
-
-        label = label_map[example.label]
-
-        input_ids = [x["input_ids"] for x in choices_inputs]
-        attention_mask = (
-            [x["attention_mask"] for x in choices_inputs] if "attention_mask" in choices_inputs[0] else None
-        )
-        token_type_ids = (
-            [x["token_type_ids"] for x in choices_inputs] if "token_type_ids" in choices_inputs[0] else None
-        )
-
-        features.append(
-            InputFeatures(
-                example_id=example.example_id,
-                input_ids=input_ids,
-                attention_mask=attention_mask,
-                token_type_ids=token_type_ids,
-                label=label,
-            )
-        )
-
-    for f in features[:2]:
-        logger.info("*** Example ***")
-        logger.info("feature: %s" % f)
-
-    return features
diff --git a/orttraining/orttraining/test/training_api/core/training_capi_tests.cc b/orttraining/orttraining/test/training_api/core/training_capi_tests.cc
index d734be8e3474b..e46952d87c2bf 100644
--- a/orttraining/orttraining/test/training_api/core/training_capi_tests.cc
+++ b/orttraining/orttraining/test/training_api/core/training_capi_tests.cc
@@ -318,4 +318,106 @@ TEST(TrainingCApiTest, LoadModelsFromBufferThrows) {
                 testing::HasSubstr("Training Session Creation failed. Train model data cannot be NULL."));
   }
 }
+
+TEST(TrainingCApiTest, GetParameter) {
+  auto model_uri = MODEL_FOLDER "training_model.onnx";
+
+  Ort::Env env;
+  Ort::CheckpointState checkpoint_state = Ort::CheckpointState::LoadCheckpoint(MODEL_FOLDER "checkpoint.ckpt");
+  Ort::TrainingSession training_session = Ort::TrainingSession(env, Ort::SessionOptions(), checkpoint_state, model_uri);
+
+  Ort::Value parameter = checkpoint_state.GetParameter("fc1.weight");
+  auto tensor_info = parameter.GetTensorTypeAndShapeInfo();
+  auto shape = tensor_info.GetShape();
+  ASSERT_EQ(shape.size(), 2U);
+  ASSERT_EQ(shape.front(), static_cast<int64_t>(500));
+  ASSERT_EQ(shape.back(), static_cast<int64_t>(784));
+}
+
+TEST(TrainingCApiTest, UpdateParameter) {
+  auto model_uri = MODEL_FOLDER "training_model.onnx";
+
+  Ort::Env env;
+  Ort::CheckpointState checkpoint_state = Ort::CheckpointState::LoadCheckpoint(MODEL_FOLDER "checkpoint.ckpt");
+  Ort::TrainingSession training_session = Ort::TrainingSession(env, Ort::SessionOptions(), checkpoint_state, model_uri);
+
+  Ort::Value parameter = checkpoint_state.GetParameter("fc1.weight");
+  auto tensor_info = parameter.GetTensorTypeAndShapeInfo();
+  auto shape = tensor_info.GetShape();
+  ASSERT_EQ(shape.size(), 2U);
+  ASSERT_EQ(shape.front(), static_cast<int64_t>(500));
+  ASSERT_EQ(shape.back(), static_cast<int64_t>(784));
+
+  OrtValue* updated_param_value = std::make_unique<OrtValue>().release();
+  GenerateRandomInput(std::array<int64_t, 2>{500, 784}, *updated_param_value);
+  Ort::Value updated_parameter{updated_param_value};
+  checkpoint_state.UpdateParameter("fc1.weight", updated_parameter);
+
+  Ort::Value current_parameter = checkpoint_state.GetParameter("fc1.weight");
+  gsl::span actual = gsl::span(current_parameter.GetTensorMutableData<float>(),
+                               current_parameter.GetTensorTypeAndShapeInfo().GetElementCount());
+  gsl::span expected = gsl::span(updated_parameter.GetTensorMutableData<float>(),
+                                 updated_parameter.GetTensorTypeAndShapeInfo().GetElementCount());
+  gsl::span not_expected = gsl::span(parameter.GetTensorMutableData<float>(),
+                                     parameter.GetTensorTypeAndShapeInfo().GetElementCount());
+  ASSERT_EQ(actual, expected);
+  ASSERT_NE(actual, not_expected);
+
+  checkpoint_state.UpdateParameter("fc1.weight", parameter);
+  current_parameter = checkpoint_state.GetParameter("fc1.weight");
+  actual = gsl::span(current_parameter.GetTensorMutableData<float>(),
+                     current_parameter.GetTensorTypeAndShapeInfo().GetElementCount());
+  expected = gsl::span(parameter.GetTensorMutableData<float>(),
+                       parameter.GetTensorTypeAndShapeInfo().GetElementCount());
+  not_expected = gsl::span(updated_parameter.GetTensorMutableData<float>(),
+                           updated_parameter.GetTensorTypeAndShapeInfo().GetElementCount());
+  ASSERT_EQ(actual, expected);
+  ASSERT_NE(actual, not_expected);
+}
+
+#ifdef USE_CUDA
+TEST(TrainingCApiTest, UpdateParameterDifferentDevices) {
+  auto model_uri = MODEL_FOLDER "training_model.onnx";
+
+  Ort::Env env;
+  Ort::SessionOptions session_options;
+  Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0));
+  Ort::CheckpointState checkpoint_state = Ort::CheckpointState::LoadCheckpoint(MODEL_FOLDER "checkpoint.ckpt");
+  Ort::TrainingSession training_session = Ort::TrainingSession(env, session_options, checkpoint_state, model_uri);
+
+  Ort::Value parameter = checkpoint_state.GetParameter("fc1.weight");
+  auto tensor_info = parameter.GetTensorTypeAndShapeInfo();
+  auto shape = tensor_info.GetShape();
+  ASSERT_EQ(shape.size(), 2U);
+  ASSERT_EQ(shape.front(), static_cast<int64_t>(500));
+  ASSERT_EQ(shape.back(), static_cast<int64_t>(784));
+
+  OrtValue* updated_param_value = std::make_unique<OrtValue>().release();
+  GenerateRandomInput(std::array<int64_t, 2>{500, 784}, *updated_param_value);
+  Ort::Value updated_parameter{updated_param_value};
+  checkpoint_state.UpdateParameter("fc1.weight", updated_parameter);
+
+  Ort::Value current_parameter = checkpoint_state.GetParameter("fc1.weight");
+  gsl::span actual = gsl::span(current_parameter.GetTensorMutableData<float>(),
+                               current_parameter.GetTensorTypeAndShapeInfo().GetElementCount());
+  gsl::span expected = gsl::span(updated_parameter.GetTensorMutableData<float>(),
+                                 updated_parameter.GetTensorTypeAndShapeInfo().GetElementCount());
+  gsl::span not_expected = gsl::span(parameter.GetTensorMutableData<float>(),
+                                     parameter.GetTensorTypeAndShapeInfo().GetElementCount());
+  ASSERT_EQ(actual, expected);
+  ASSERT_NE(actual, not_expected);
+
+  checkpoint_state.UpdateParameter("fc1.weight", parameter);
+  current_parameter = checkpoint_state.GetParameter("fc1.weight");
+  actual = gsl::span(current_parameter.GetTensorMutableData<float>(),
+                     current_parameter.GetTensorTypeAndShapeInfo().GetElementCount());
+  expected = gsl::span(parameter.GetTensorMutableData<float>(),
+                       parameter.GetTensorTypeAndShapeInfo().GetElementCount());
+  not_expected = gsl::span(updated_parameter.GetTensorMutableData<float>(),
+                           updated_parameter.GetTensorTypeAndShapeInfo().GetElementCount());
+  ASSERT_EQ(actual, expected);
+  ASSERT_NE(actual, not_expected);
+}
+#endif
+
 }  // namespace onnxruntime::training::test
diff --git a/orttraining/orttraining/test/training_ops/cpu/reduction/reduction_ops_test.cc b/orttraining/orttraining/test/training_ops/cpu/reduction/reduction_ops_test.cc
index be8b0aaa0bce1..60c3ecbcce8ce 100644
--- a/orttraining/orttraining/test/training_ops/cpu/reduction/reduction_ops_test.cc
+++ b/orttraining/orttraining/test/training_ops/cpu/reduction/reduction_ops_test.cc
@@ -275,7 +275,6 @@ void TestMultiTensorReduce(
   test.SetDeterminism(use_determinism);
 
   // Set up random number generator.
-  std::random_device random_device;
   std::mt19937 random_engine(0);
   std::uniform_real_distribution<float> dist(min, max);
   std::uniform_int_distribution<int64_t> dist_int(min_tensor_size, max_tensor_size);
diff --git a/orttraining/orttraining/test/training_ops/cuda/flatten_and_unpad_test.cc b/orttraining/orttraining/test/training_ops/cuda/flatten_and_unpad_test.cc
new file mode 100644
index 0000000000000..dd5fa18ab3edd
--- /dev/null
+++ b/orttraining/orttraining/test/training_ops/cuda/flatten_and_unpad_test.cc
@@ -0,0 +1,157 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "test/common/tensor_op_test_utils.h"
+#include "test/providers/provider_test_utils.h"
+
+namespace onnxruntime {
+namespace test {
+
+#if defined(USE_CUDA) || defined(USE_ROCM)
+
+TEST(FlattenAndUnpadTest, Int32Type2D) {
+  std::vector<int32_t> input = {1, 1, 3, 2, 0, 3, 0, 4,
+                                0, 5, 0, 6, 0, 0, 0};
+  std::vector<int64_t> indices = {1, 3, 5, 7, 9, 11};
+
+  std::vector<int32_t> output = {1, 2, 3, 4, 5, 6};
+  std::vector<int64_t> unflatten_dims = {5, 3};
+
+  OpTester test("FlattenAndUnpad", 1, onnxruntime::kMSDomain);
+  test.AddInput<int32_t>("input", {5, 3}, input);
+  test.AddInput<int64_t>("indices", {6}, indices);
+  test.AddOutput<int32_t>("output", {6}, output);
+  test.AddOutput<int64_t>("unflatten_dims", {2}, unflatten_dims);
+  test.Run();
+}
+
+TEST(FlattenAndUnpadTest, Int32Type3D) {
+  std::vector<int32_t> input = {0, 0, 0, 1, 2, 3, 0, 0, 0,
+                                4, 5, 6, 7, 8, 9, 0, 0, 0};
+  std::vector<int64_t> indices = {1, 3, 4};
+
+  std::vector<int32_t> output = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+  std::vector<int64_t> unflatten_dims = {2, 3};
+
+  OpTester test("FlattenAndUnpad", 1, onnxruntime::kMSDomain);
+  test.AddInput<int32_t>("input", {2, 3, 3}, input);
+  test.AddInput<int64_t>("indices", {3}, indices);
+  test.AddOutput<int32_t>("output", {3, 3}, output);
+  test.AddOutput<int64_t>("unflatten_dims", {2}, unflatten_dims);
+  test.Run();
+}
+
+TEST(FlattenAndUnpadTest, Int64Type2D) {
+  std::vector<int64_t> input = {1, 1, 3, 2, 0, 3, 0, 4,
+                                0, 5, 0, 6, 0, 0, 0};
+  std::vector<int64_t> indices = {1, 3, 5, 7, 9, 11};
+
+  std::vector<int64_t> output = {1, 2, 3, 4, 5, 6};
+  std::vector<int64_t> unflatten_dims = {5, 3};
+
+  OpTester test("FlattenAndUnpad", 1, onnxruntime::kMSDomain);
+  test.AddInput<int64_t>("input", {5, 3}, input);
+  test.AddInput<int64_t>("indices", {6}, indices);
+  test.AddOutput<int64_t>("output", {6}, output);
+  test.AddOutput<int64_t>("unflatten_dims", {2}, unflatten_dims);
+  test.Run();
+}
+
+TEST(FlattenAndUnpadTest, Int64Type3D) {
+  std::vector<int64_t> input = {0, 0, 0, 1, 2, 3, 0, 0, 0,
+                                4, 5, 6, 7, 8, 9, 0, 0, 0};
+  std::vector<int64_t> indices = {1, 3, 4};
+
+  std::vector<int64_t> output = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+  std::vector<int64_t> unflatten_dims = {2, 3};
+
+  OpTester test("FlattenAndUnpad", 1, onnxruntime::kMSDomain);
+  test.AddInput<int64_t>("input", {2, 3, 3}, input);
+  test.AddInput<int64_t>("indices", {3}, indices);
+  test.AddOutput<int64_t>("output", {3, 3}, output);
+  test.AddOutput<int64_t>("unflatten_dims", {2}, unflatten_dims);
+  test.Run();
+}
+
+TEST(FlattenAndUnpadTest, FloatType2D) {
+  std::vector<float> input = {1.0f, 1.0f, 3.0f, 2.0f, 0.0f, 3.0f, 0.0f, 4.0f,
+                              0.0f, 5.0f, 0.0f, 6.0f, 0.0f, 0.0f, 0.0f};
+  std::vector<int64_t> indices = {1, 3, 5, 7, 9, 11};
+
+  std::vector<float> output = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.f};
+  std::vector<int64_t> unflatten_dims = {5, 3};
+
+  OpTester test("FlattenAndUnpad", 1, onnxruntime::kMSDomain);
+  test.AddInput<float>("input", {5, 3}, input);
+  test.AddInput<int64_t>("indices", {6}, indices);
+  test.AddOutput<float>("output", {6}, output);
+  test.AddOutput<int64_t>("unflatten_dims", {2}, unflatten_dims);
+  test.Run();
+}
+
+TEST(FlattenAndUnpadTest, FloatType3D) {
+  std::vector<float> input = {0.0f, 0.0f, 0.0f, 1.0f, 2.0f, 3.0f, 0.0f, 0.0f, 0.0f,
+                              4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 0.0f, 0.0f, 0.0f};
+  std::vector<int64_t> indices = {1, 3, 4};
+
+  std::vector<float> output = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.f, 7.f, 8.f, 9.f};
+  std::vector<int64_t> unflatten_dims = {2, 3};
+
+  OpTester test("FlattenAndUnpad", 1, onnxruntime::kMSDomain);
+  test.AddInput<float>("input", {2, 3, 3}, input);
+  test.AddInput<int64_t>("indices", {3}, indices);
+  test.AddOutput<float>("output", {3, 3}, output);
+  test.AddOutput<int64_t>("unflatten_dims", {2}, unflatten_dims);
+  test.Run();
+}
+
+TEST(FlattenAndUnpadTest, MLFloat16Type2D) {
+  std::vector<float> input = {0.0f, 1.0f, 0.0f, 2.0f, 0.0f, 3.0f, 0.0f, 4.0f,
+                              0.0f, 5.0f, 0.0f, 6.0f, 0.0f, 0.0f, 0.0f};
+  std::vector<int64_t> indices = {1, 3, 5, 7, 9, 11};
+
+  std::vector<float> output = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.f};
+  std::vector<int64_t> unflatten_dims = {5, 3};
+
+  std::vector<MLFloat16> input_half;
+  input_half.resize(input.size());
+  ConvertFloatToMLFloat16(input.data(), input_half.data(), static_cast<int>(input.size()));
+  std::vector<MLFloat16> output_half;
+  output_half.resize(output.size());
+  ConvertFloatToMLFloat16(output.data(), output_half.data(), static_cast<int>(output.size()));
+
+  OpTester test("FlattenAndUnpad", 1, onnxruntime::kMSDomain);
+  test.AddInput<MLFloat16>("input", {5, 3}, input_half);
+  test.AddInput<int64_t>("indices", {6}, indices);
+  test.AddOutput<MLFloat16>("output", {6}, output_half);
+  test.AddOutput<int64_t>("unflatten_dims", {2}, unflatten_dims);
+  test.Run();
+}
+
+TEST(FlattenAndUnpadTest, MLFloat16Type3D) {
+  std::vector<float> input = {0.0f, 0.0f, 0.0f, 1.0f, 2.0f, 3.0f, 0.0f, 0.0f, 0.0f,
+                              4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 0.0f, 0.0f, 0.0f};
+  std::vector<int64_t> indices = {1, 3, 4};
+
+  std::vector<float> output = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.f, 7.f, 8.f, 9.f};
+  std::vector<int64_t> unflatten_dims = {2, 3};
+
+  std::vector<MLFloat16> input_half;
+  input_half.resize(input.size());
+  ConvertFloatToMLFloat16(input.data(), input_half.data(), static_cast<int>(input.size()));
+  std::vector<MLFloat16> output_half;
+  output_half.resize(output.size());
+  ConvertFloatToMLFloat16(output.data(), output_half.data(), static_cast<int>(output.size()));
+
+  OpTester test("FlattenAndUnpad", 1, onnxruntime::kMSDomain);
+  test.AddInput<MLFloat16>("input", {2, 3, 3}, input_half);
+  test.AddInput<int64_t>("indices", {3}, indices);
+  test.AddOutput<MLFloat16>("output", {3, 3}, output_half);
+  test.AddOutput<int64_t>("unflatten_dims", {2}, unflatten_dims);
+  test.Run();
+}
+
+#endif
+
+}  // namespace test
+}  // namespace onnxruntime
diff --git a/orttraining/orttraining/test/training_ops/cuda/pad_and_unflatten_test.cc b/orttraining/orttraining/test/training_ops/cuda/pad_and_unflatten_test.cc
index a800f17e59ae0..9a86955e09379 100644
--- a/orttraining/orttraining/test/training_ops/cuda/pad_and_unflatten_test.cc
+++ b/orttraining/orttraining/test/training_ops/cuda/pad_and_unflatten_test.cc
@@ -17,14 +17,11 @@ TEST(PadAndUnflattenTest, FloatType1D) {
   std::vector<float> output = {0.0f, 1.0f, 0.0f, 2.0f, 0.0f, 3.0f, 0.0f, 4.0f,
                                0.0f, 5.0f, 0.0f, 6.0f, 0.0f, 0.0f, 0.0f};
 
-  std::vector<int64_t> full_flatten_dims = {15};
-
   OpTester test("PadAndUnflatten", 1, onnxruntime::kMSDomain);
   test.AddInput<float>("input", {6}, input);
   test.AddInput<int64_t>("indices", {6}, indices);
   test.AddInput<int64_t>("unflatten_dims", {2}, unflatten_dims);
   test.AddOutput<float>("output", {5, 3}, output);
-  test.AddOutput<int64_t>("full_flatten_dims", {1}, full_flatten_dims);
   test.Run();
 }
 
@@ -36,14 +33,11 @@ TEST(PadAndUnflattenTest, FloatType2D) {
   std::vector<float> output = {0.0f, 0.0f, 0.0f, 1.0f, 2.0f, 3.0f, 0.0f, 0.0f, 0.0f,
                                4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 0.0f, 0.0f, 0.0f};
 
-  std::vector<int64_t> full_flatten_dims = {6, 3};
-
   OpTester test("PadAndUnflatten", 1, onnxruntime::kMSDomain);
   test.AddInput<float>("input", {3, 3}, input);
   test.AddInput<int64_t>("indices", {3}, indices);
   test.AddInput<int64_t>("unflatten_dims", {2}, unflatten_dims);
   test.AddOutput<float>("output", {2, 3, 3}, output);
-  test.AddOutput<int64_t>("full_flatten_dims", {2}, full_flatten_dims);
   test.Run();
 }
 
@@ -55,8 +49,6 @@ TEST(PadAndUnflattenTest, MLFloat16Type1D) {
   std::vector<float> output = {0.0f, 1.0f, 0.0f, 2.0f, 0.0f, 3.0f, 0.0f, 4.0f,
                                0.0f, 5.0f, 0.0f, 6.0f, 0.0f, 0.0f, 0.0f};
 
-  std::vector<int64_t> full_flatten_dims = {15};
-
   std::vector<MLFloat16> input_half;
   input_half.resize(input.size());
   ConvertFloatToMLFloat16(input.data(), input_half.data(), int(input.size()));
@@ -69,7 +61,6 @@ TEST(PadAndUnflattenTest, MLFloat16Type1D) {
   test.AddInput<int64_t>("indices", {6}, indices);
   test.AddInput<int64_t>("unflatten_dims", {2}, unflatten_dims);
   test.AddOutput<MLFloat16>("output", {5, 3}, output_half);
-  test.AddOutput<int64_t>("full_flatten_dims", {1}, full_flatten_dims);
   test.Run();
 }
 
@@ -81,8 +72,6 @@ TEST(PadAndUnflattenTest, MLFloat16Type2D) {
   std::vector<float> output = {0.0f, 0.0f, 0.0f, 1.0f, 2.0f, 3.0f, 0.0f, 0.0f, 0.0f,
                                4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 0.0f, 0.0f, 0.0f};
 
-  std::vector<int64_t> full_flatten_dims = {6, 3};
-
   std::vector<MLFloat16> input_half;
   input_half.resize(input.size());
   ConvertFloatToMLFloat16(input.data(), input_half.data(), int(input.size()));
@@ -95,7 +84,6 @@ TEST(PadAndUnflattenTest, MLFloat16Type2D) {
   test.AddInput<int64_t>("indices", {3}, indices);
   test.AddInput<int64_t>("unflatten_dims", {2}, unflatten_dims);
   test.AddOutput<MLFloat16>("output", {2, 3, 3}, output_half);
-  test.AddOutput<int64_t>("full_flatten_dims", {2}, full_flatten_dims);
   test.Run();
 }
 
diff --git a/orttraining/orttraining/test/training_ops/cuda/resize_grad_test.cc b/orttraining/orttraining/test/training_ops/cuda/resize_grad_test.cc
new file mode 100644
index 0000000000000..8fc13af8816be
--- /dev/null
+++ b/orttraining/orttraining/test/training_ops/cuda/resize_grad_test.cc
@@ -0,0 +1,227 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "test/providers/compare_provider_test_utils.h"
+#include "test/providers/provider_test_utils.h"
+#include "test/util/include/default_providers.h"
+
+namespace onnxruntime::test {
+
+#if defined(USE_CUDA) || defined(USE_ROCM)
+
+namespace {
+
+void AddResizeGradAttributes(OpTester& test, const std::string& coordinate_transformation_mode) {
+  test.AddAttribute<std::string>("mode", "linear");
+  test.AddAttribute<std::string>("coordinate_transformation_mode", coordinate_transformation_mode);
+}
+
+}  // namespace
+
+TEST(ResizeGradTest, ResizeGradWithSizes) {
+  std::vector<std::unique_ptr<IExecutionProvider>> providers;
+#ifdef USE_CUDA
+  providers.emplace_back(DefaultCudaExecutionProvider());
+#elif USE_ROCM
+  providers.emplace_back(DefaultRocmExecutionProvider());
+#endif
+
+  OpTester test("ResizeGrad", 1, onnxruntime::kMSDomain);
+
+  AddResizeGradAttributes(test, "half_pixel");
+
+  std::vector<float> dY(128, 1.0f);
+  std::vector<int64_t> dY_shape = {1, 2, 8, 8};
+
+  std::vector<float> X(32, 1.0f);
+  std::vector<int64_t> X_shape = {1, 2, 4, 4};
+
+  std::vector<float> dX(32, 4.0f);
+  std::vector<int64_t> dX_shape = X_shape;
+
+  test.AddInput<float>("dY", dY_shape, dY);
+  test.AddInput<float>("X", X_shape, X);
+
+  test.AddOutput<float>("dX", dX_shape, dX);
+
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &providers);
+}
+
+TEST(ResizeGradTest, ResizeGradWithSizesHalf) {
+  std::vector<std::unique_ptr<IExecutionProvider>> providers;
+#ifdef USE_CUDA
+  providers.emplace_back(DefaultCudaExecutionProvider());
+#elif USE_ROCM
+  providers.emplace_back(DefaultRocmExecutionProvider());
+#endif
+
+  OpTester test("ResizeGrad", 1, onnxruntime::kMSDomain);
+
+  AddResizeGradAttributes(test, "half_pixel");
+
+  std::vector<float> dY(128, 1.0f);
+  std::vector<MLFloat16> dY_half(dY.size());
+  ConvertFloatToMLFloat16(dY.data(), dY_half.data(), static_cast<int>(dY.size()));
+  std::vector<int64_t> dY_shape = {1, 2, 8, 8};
+
+  std::vector<float> X(32, 1.0f);
+  std::vector<MLFloat16> X_half(X.size());
+  ConvertFloatToMLFloat16(X.data(), X_half.data(), static_cast<int>(X.size()));
+  std::vector<int64_t> X_shape = {1, 2, 4, 4};
+
+  std::vector<float> dX(32, 4.0f);
+  std::vector<MLFloat16> dX_half(dX.size());
+  ConvertFloatToMLFloat16(dX.data(), dX_half.data(), static_cast<int>(dX.size()));
+  std::vector<int64_t> dX_shape = X_shape;
+
+  test.AddInput<MLFloat16>("dY", dY_shape, dY_half);
+  test.AddInput<MLFloat16>("X", X_shape, X_half);
+
+  test.AddOutput<MLFloat16>("dX", dX_shape, dX_half);
+
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &providers);
+}
+
+TEST(ResizeGradTest, ResizeGradWithSizesAndAlignCorners) {
+  std::vector<std::unique_ptr<IExecutionProvider>> providers;
+#ifdef USE_CUDA
+  providers.emplace_back(DefaultCudaExecutionProvider());
+#elif USE_ROCM
+  providers.emplace_back(DefaultRocmExecutionProvider());
+#endif
+
+  OpTester test("ResizeGrad", 1, onnxruntime::kMSDomain);
+
+  AddResizeGradAttributes(test, "align_corners");
+
+  std::vector<float> dY(128, 1.0f);
+  std::vector<int64_t> dY_shape = {1, 2, 8, 8};
+
+  std::vector<float> X(32, 1.0f);
+  std::vector<int64_t> X_shape = {1, 2, 4, 4};
+
+  std::vector<float> dX({2.9388f, 3.9184f, 3.9184f, 2.9388f, 3.9184f, 5.2245f, 5.2245f, 3.9184f,
+                         3.9184f, 5.2245f, 5.2245f, 3.9184f, 2.9388f, 3.9184f, 3.9184f, 2.9388f,
+                         2.9388f, 3.9184f, 3.9184f, 2.9388f, 3.9184f, 5.2245f, 5.2245f, 3.9184f,
+                         3.9184f, 5.2245f, 5.2245f, 3.9184f, 2.9388f, 3.9184f, 3.9184f, 2.9388f});
+  std::vector<int64_t> dX_shape = X_shape;
+
+  test.AddInput<float>("dY", dY_shape, dY);
+  test.AddInput<float>("X", X_shape, X);
+
+  test.AddOutput<float>("dX", dX_shape, dX);
+
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &providers);
+}
+
+TEST(ResizeGradTest, ResizeGradWithScales) {
+  std::vector<std::unique_ptr<IExecutionProvider>> providers;
+#ifdef USE_CUDA
+  providers.emplace_back(DefaultCudaExecutionProvider());
+#elif USE_ROCM
+  providers.emplace_back(DefaultRocmExecutionProvider());
+#endif
+
+  OpTester test("ResizeGrad", 1, onnxruntime::kMSDomain);
+
+  AddResizeGradAttributes(test, "half_pixel");
+
+  std::vector<float> dY(72, 1.0f);
+  std::vector<int64_t> dY_shape = {1, 2, 6, 6};
+
+  std::vector<float> X(32, 1.0f);
+  std::vector<int64_t> X_shape = {1, 2, 4, 4};
+
+  std::vector<float> dX({2.7128f, 2.9550f, 2.7612f, 1.4533f, 2.9550f, 3.2189f, 3.0078f, 1.5830f,
+                         2.7612f, 3.0078f, 2.8106f, 1.4792f, 1.4533f, 1.5830f, 1.4792f, 0.7785f,
+                         2.7128f, 2.9550f, 2.7612f, 1.4533f, 2.9550f, 3.2189f, 3.0078f, 1.5830f,
+                         2.7612f, 3.0078f, 2.8106f, 1.4792f, 1.4533f, 1.5830f, 1.4792f, 0.7785f});
+  std::vector<int64_t> dX_shape = X_shape;
+
+  test.AddInput<float>("dY", dY_shape, dY);
+  test.AddInput<float>("X", X_shape, X);
+  test.AddInput<float>("", {0}, {});
+  test.AddInput<float>("scales", {4}, {1.0f, 1.0f, 1.7f, 1.7f});
+
+  test.AddOutput<float>("dX", dX_shape, dX);
+
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &providers);
+}
+
+TEST(ResizeGradTest, ResizeGradWithScalesHalf) {
+  std::vector<std::unique_ptr<IExecutionProvider>> providers;
+#ifdef USE_CUDA
+  providers.emplace_back(DefaultCudaExecutionProvider());
+#elif USE_ROCM
+  providers.emplace_back(DefaultRocmExecutionProvider());
+#endif
+
+  OpTester test("ResizeGrad", 1, onnxruntime::kMSDomain);
+
+  AddResizeGradAttributes(test, "half_pixel");
+
+  std::vector<float> dY(72, 1.0f);
+  std::vector<MLFloat16> dY_half(dY.size());
+  ConvertFloatToMLFloat16(dY.data(), dY_half.data(), static_cast<int>(dY.size()));
+  std::vector<int64_t> dY_shape = {1, 2, 6, 6};
+
+  std::vector<float> X(32, 1.0f);
+  std::vector<MLFloat16> X_half(X.size());
+  ConvertFloatToMLFloat16(X.data(), X_half.data(), static_cast<int>(X.size()));
+  std::vector<int64_t> X_shape = {1, 2, 4, 4};
+
+  std::vector<float> dX({2.7128f, 2.9550f, 2.7612f, 1.4533f, 2.9550f, 3.2189f, 3.0078f, 1.5830f,
+                         2.7612f, 3.0078f, 2.8106f, 1.4792f, 1.4533f, 1.5830f, 1.4792f, 0.7785f,
+                         2.7128f, 2.9550f, 2.7612f, 1.4533f, 2.9550f, 3.2189f, 3.0078f, 1.5830f,
+                         2.7612f, 3.0078f, 2.8106f, 1.4792f, 1.4533f, 1.5830f, 1.4792f, 0.7785f});
+  std::vector<MLFloat16> dX_half(dX.size());
+  ConvertFloatToMLFloat16(dX.data(), dX_half.data(), static_cast<int>(dX.size()));
+  std::vector<int64_t> dX_shape = X_shape;
+
+  test.AddInput<MLFloat16>("dY", dY_shape, dY_half);
+  test.AddInput<MLFloat16>("X", X_shape, X_half);
+  test.AddInput<float>("", {0}, {});
+  test.AddInput<float>("scales", {4}, {1.0f, 1.0f, 1.7f, 1.7f});
+
+  test.AddOutput<MLFloat16>("dX", dX_shape, dX_half);
+
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &providers);
+}
+
+TEST(ResizeGradTest, ResizeGradWithScalesAndAlignCorners) {
+  std::vector<std::unique_ptr<IExecutionProvider>> providers;
+#ifdef USE_CUDA
+  providers.emplace_back(DefaultCudaExecutionProvider());
+#elif USE_ROCM
+  providers.emplace_back(DefaultRocmExecutionProvider());
+#endif
+
+  OpTester test("ResizeGrad", 1, onnxruntime::kMSDomain);
+
+  AddResizeGradAttributes(test, "align_corners");
+
+  std::vector<float> dY(72, 1.0f);
+  std::vector<int64_t> dY_shape = {1, 2, 6, 6};
+
+  std::vector<float> X(32, 1.0f);
+  std::vector<int64_t> X_shape = {1, 2, 4, 4};
+
+  std::vector<float> dX({1.9600f, 2.2400f, 2.2400f, 1.9600f, 2.2400f, 2.5600f, 2.5600f, 2.2400f,
+                         2.2400f, 2.5600f, 2.5600f, 2.2400f, 1.9600f, 2.2400f, 2.2400f, 1.9600f,
+                         1.9600f, 2.2400f, 2.2400f, 1.9600f, 2.2400f, 2.5600f, 2.5600f, 2.2400f,
+                         2.2400f, 2.5600f, 2.5600f, 2.2400f, 1.9600f, 2.2400f, 2.2400f, 1.9600f});
+  std::vector<int64_t> dX_shape = X_shape;
+
+  test.AddInput<float>("dY", dY_shape, dY);
+  test.AddInput<float>("X", X_shape, X);
+  test.AddInput<float>("", {0}, {});
+  test.AddInput<float>("scales", {4}, {1.0f, 1.0f, 1.7f, 1.7f});
+
+  test.AddOutput<float>("dX", dX_shape, dX);
+
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &providers);
+}
+
+#endif  // defined(USE_CUDA) || defined(USE_ROCM)
+
+}  // namespace onnxruntime::test
diff --git a/orttraining/orttraining/training_api/checkpoint_property.h b/orttraining/orttraining/training_api/checkpoint_property.h
index d7b1e295df53e..3c38c99b3152f 100644
--- a/orttraining/orttraining/training_api/checkpoint_property.h
+++ b/orttraining/orttraining/training_api/checkpoint_property.h
@@ -22,10 +22,12 @@ struct PropertyBag {
   PropertyBag() = default;
 
   void AddProperty(const std::string& name, const PropertyDataType& val) {
-    ORT_ENFORCE(named_properties_.find(name) == named_properties_.end(),
-                "Duplicated property named ", name);
-
-    named_properties_.insert({name, val});
+    auto it = named_properties_.find(name);
+    if (it == named_properties_.end()) {
+      named_properties_.insert({name, val});
+    } else {
+      it->second = val;
+    }
   }
 
   template <typename T>
diff --git a/orttraining/orttraining/training_api/include/onnxruntime_training_c_api.h b/orttraining/orttraining/training_api/include/onnxruntime_training_c_api.h
index 0af737074964d..0e8544a7639ba 100644
--- a/orttraining/orttraining/training_api/include/onnxruntime_training_c_api.h
+++ b/orttraining/orttraining/training_api/include/onnxruntime_training_c_api.h
@@ -608,14 +608,14 @@ struct OrtTrainingApi {
   /// \name Accessing The Training Session State
   /// @{
 
-  /** \brief Adds the given property to the checkpoint state.
+  /** \brief Adds or updates the given property to/in the checkpoint state.
    *
    * Runtime properties such as epoch, training step, best score, and others can be added to the checkpoint
-   * state by the user if they desire by calling this function with the appropriate property name and
-   * value. The given property name must be unique to be able to successfully add the property.
+   * state by the user by calling this function with the corresponding property name and value.
+   * The given property name must be unique to be able to successfully add the property.
    *
    * \param[in] checkpoint_state The checkpoint state which should hold the property.
-   * \param[in] property_name Unique name of the property being added.
+   * \param[in] property_name Name of the property being added or updated.
    * \param[in] property_type Type of the property associated with the given name.
    * \param[in] property_value Property value associated with the given name.
    *
@@ -632,7 +632,7 @@ struct OrtTrainingApi {
    * exist in the checkpoint state to be able to retrieve it successfully.
    *
    * \param[in] checkpoint_state The checkpoint state that is currently holding the property.
-   * \param[in] property_name Unique name of the property being retrieved.
+   * \param[in] property_name Name of the property being retrieved.
    * \param[in] allocator Allocator used to allocate the memory for the property_value.
    * \param[out] property_type Type of the property associated with the given name.
    * \param[out] property_value Property value associated with the given name.
@@ -669,6 +669,57 @@ struct OrtTrainingApi {
   ORT_API2_STATUS(LoadCheckpointFromBuffer, _In_ const void* checkpoint_buffer,
                   _In_ const size_t num_bytes, _Outptr_ OrtCheckpointState** checkpoint_state);
 
+  /** \brief Retrieves the type and shape information of the parameter associated with the given parameter name.
+   *
+   * This function retrieves the type and shape of the parameter associated with the given parameter name.
+   * The parameter must exist in the checkpoint state to be able to retrieve its type and shape information successfully.
+   *
+   * \param[in] checkpoint_state The checkpoint state.
+   * \param[in] parameter_name Name of the parameter being retrieved.
+   * \param[out] parameter_type_and_shape The type and shape of the parameter being retrieved.
+   *
+   * \snippet{doc} snippets.dox OrtStatus Return Value
+   *
+   */
+  ORT_API2_STATUS(GetParameterTypeAndShape, _In_ const OrtCheckpointState* checkpoint_state,
+                  _In_ const char* parameter_name, _Outptr_ OrtTensorTypeAndShapeInfo** parameter_type_and_shape);
+
+  /** \brief Updates the data associated with the model parameter in the checkpoint state for the given parameter name.
+   *
+   * This function updates a model parameter in the checkpoint state with the given parameter data.
+   * The training session must be already created with the checkpoint state that contains the parameter
+   * being updated. The given parameter is copied over to the registered device for the training session.
+   * The parameter must exist in the checkpoint state to be able to update it successfully.
+   *
+   * \param[in] checkpoint_state The checkpoint state.
+   * \param[in] parameter_name Name of the parameter being updated.
+   * \param[in] parameter The parameter data that should replace the existing parameter data.
+   *
+   * \snippet{doc} snippets.dox OrtStatus Return Value
+   *
+   */
+  ORT_API2_STATUS(UpdateParameter, _Inout_ OrtCheckpointState* checkpoint_state,
+                  _In_ const char* parameter_name, _In_ OrtValue* parameter);
+
+  /** \brief Gets the data associated with the model parameter from the checkpoint state for the given parameter name.
+   *
+   * This function retrieves the model parameter data from the checkpoint state for the given parameter name.
+   * The parameter is copied over and returned as an OrtValue. The training session must be already created
+   * with the checkpoint state that contains the parameter being retrieved.
+   * The parameter must exist in the checkpoint state to be able to retrieve it successfully.
+   *
+   * \param[in] checkpoint_state The checkpoint state.
+   * \param[in] parameter_name Name of the parameter being retrieved.
+   * \param[in] allocator Allocator used to allocate the memory for the parameter.
+   * \param[out] parameter The parameter data that is retrieved from the checkpoint state.
+   *
+   * \snippet{doc} snippets.dox OrtStatus Return Value
+   *
+   */
+  ORT_API2_STATUS(GetParameter, _In_ const OrtCheckpointState* checkpoint_state,
+                  _In_ const char* parameter_name, _Inout_ OrtAllocator* allocator,
+                  _Outptr_ OrtValue** parameter);
+
   /// @}
 };
 
diff --git a/orttraining/orttraining/training_api/include/onnxruntime_training_cxx_api.h b/orttraining/orttraining/training_api/include/onnxruntime_training_cxx_api.h
index 0edef20ba6da8..218bef524200c 100644
--- a/orttraining/orttraining/training_api/include/onnxruntime_training_cxx_api.h
+++ b/orttraining/orttraining/training_api/include/onnxruntime_training_cxx_api.h
@@ -112,13 +112,13 @@ class CheckpointState : public detail::Base<OrtCheckpointState> {
                              const std::basic_string<ORTCHAR_T>& path_to_checkpoint,
                              const bool include_optimizer_state = false);
 
-  /** \brief Adds the given property to the checkpoint state.
+  /** \brief Adds or updates the given property to/in the checkpoint state.
    *
    * Runtime properties such as epoch, training step, best score, and others can be added to the checkpoint
-   * state by the user if they desire by calling this function with the appropriate property name and
-   * value. The given property name must be unique to be able to successfully add the property.
+   * state by the user by calling this function with the corresponding property name and value.
+   * The given property name must be unique to be able to successfully add the property.
    *
-   * \param[in] property_name Unique name of the property being added.
+   * \param[in] property_name Name of the property being added or updated.
    * \param[in] property_value Property value associated with the given name.
    *
    */
@@ -129,12 +129,38 @@ class CheckpointState : public detail::Base<OrtCheckpointState> {
    * Gets the property value from an existing entry in the checkpoint state. The property must
    * exist in the checkpoint state to be able to retrieve it successfully.
    *
-   * \param[in] property_name Unique name of the property being retrieved.
+   * \param[in] property_name Name of the property being retrieved.
    * \return Property value associated with the given property name.
    *
    */
   Property GetProperty(const std::string& property_name);
 
+  /** \brief Updates the data associated with the model parameter in the checkpoint state for the given parameter name.
+   *
+   * This function updates a model parameter in the checkpoint state with the given parameter data.
+   * The training session must be already created with the checkpoint state that contains the parameter
+   * being updated. The given parameter is copied over to the registered device for the training session.
+   * The parameter must exist in the checkpoint state to be able to update it successfully.
+   *
+   * \param[in] parameter_name Name of the parameter being updated.
+   * \param[in] parameter The parameter data that should replace the existing parameter data.
+   *
+   */
+  void UpdateParameter(const std::string& parameter_name, const Value& parameter);
+
+  /** \brief Gets the data associated with the model parameter from the checkpoint state for the given parameter name.
+   *
+   * This function retrieves the model parameter data from the checkpoint state for the given parameter name.
+   * The parameter is copied over to the provided OrtValue. The training session must be already created
+   * with the checkpoint state that contains the parameter being retrieved.
+   * The parameter must exist in the checkpoint state to be able to retrieve it successfully.
+   *
+   * \param[in] parameter_name Name of the parameter being retrieved.
+   * \return The parameter data that is retrieved from the checkpoint state.
+   *
+   */
+  Value GetParameter(const std::string& parameter_name);
+
   /// @}
 };
 
diff --git a/orttraining/orttraining/training_api/include/onnxruntime_training_cxx_inline.h b/orttraining/orttraining/training_api/include/onnxruntime_training_cxx_inline.h
index c0048458ddf4d..7d1326a10f8f8 100644
--- a/orttraining/orttraining/training_api/include/onnxruntime_training_cxx_inline.h
+++ b/orttraining/orttraining/training_api/include/onnxruntime_training_cxx_inline.h
@@ -279,4 +279,16 @@ inline Property CheckpointState::GetProperty(const std::string& property_name) {
   return property;
 }
 
+inline void CheckpointState::UpdateParameter(const std::string& parameter_name, const Value& parameter) {
+  ThrowOnError(GetTrainingApi().UpdateParameter(p_, parameter_name.c_str(), parameter));
+}
+
+inline Value CheckpointState::GetParameter(const std::string& parameter_name) {
+  AllocatorWithDefaultOptions allocator;
+  OrtValue* parameter;
+  ThrowOnError(GetTrainingApi().GetParameter(p_, parameter_name.c_str(), allocator, &parameter));
+
+  return Value{parameter};
+}
+
 }  // namespace Ort
diff --git a/orttraining/orttraining/training_api/module.cc b/orttraining/orttraining/training_api/module.cc
index d1775e358163c..cf49a01517d6b 100644
--- a/orttraining/orttraining/training_api/module.cc
+++ b/orttraining/orttraining/training_api/module.cc
@@ -119,6 +119,61 @@ Status TransformModelInputsForInference(Graph& inference_graph,
 #endif
 }  // namespace
 
+Status Parameter::CopyTo(const DataTransferManager* data_transfer_manager, OrtValue& data) const {
+  ORT_ENFORCE(data.IsAllocated(), "Given parameter data is not allocated. Cannot copy the checkpoint parameter to it.");
+  ORT_ENFORCE(data.IsTensor(), "Parameter data should be of tensor type.");
+  ORT_ENFORCE(data.Get<Tensor>().Shape() == data_.Get<Tensor>().Shape(),
+              "Parameter data shape mismatch. Expected: ", data_.Get<Tensor>().Shape().ToString(),
+              ", Got: ", data.Get<Tensor>().Shape().ToString());
+#ifdef ENABLE_STRIDED_TENSORS
+  auto data_strides = data.Get<Tensor>().Strides();
+  auto param_strides = data_.Get<Tensor>().Strides();
+  ORT_ENFORCE(data_strides.size() == param_strides.size(),
+              "Parameter data stride mismatch. Expected strides of size: ", param_strides.size(),
+              ", Got: ", data_strides.size());
+  ORT_ENFORCE(std::equal(data_strides.begin(), data_strides.end(), param_strides.begin()),
+              "Parameter data stride value mismatch.");
+#endif
+  ORT_ENFORCE(data.Get<Tensor>().DataType() == data_.Get<Tensor>().DataType(),
+              "Parameter data type mismatch. Expected: ", data_.Get<Tensor>().DataType(),
+              ", Got: ", data.Get<Tensor>().DataType());
+  ORT_ENFORCE(data_transfer_manager != nullptr,
+              "Data transfer manager must be provided to copy data to the parameter. "
+              "Please create the TrainingSession before trying to update the parameter.");
+
+  ORT_THROW_IF_ERROR(data_transfer_manager->CopyTensor(data_.Get<Tensor>(), *data.GetMutable<Tensor>()));
+
+  return Status::OK();
+}
+
+Status Parameter::CopyFrom(const DataTransferManager* data_transfer_manager, const OrtValue& data) {
+  ORT_ENFORCE(data_.IsAllocated(),
+              "The checkpoint parameter is not allocated. Cannot copy the given parameter data to it.");
+  ORT_ENFORCE(data.IsTensor(), "Parameter data should be of tensor type.");
+  ORT_ENFORCE(data.Get<Tensor>().Shape() == data_.Get<Tensor>().Shape(),
+              "Parameter data shape mismatch. Expected: ", data_.Get<Tensor>().Shape().ToString(),
+              ", Got: ", data.Get<Tensor>().Shape().ToString());
+#ifdef ENABLE_STRIDED_TENSORS
+  auto data_strides = data.Get<Tensor>().Strides();
+  auto param_strides = data_.Get<Tensor>().Strides();
+  ORT_ENFORCE(data_strides.size() == param_strides.size(),
+              "Parameter data stride mismatch. Expected strides of size: ", param_strides.size(),
+              ", Got: ", data_strides.size());
+  ORT_ENFORCE(std::equal(data_strides.begin(), data_strides.end(), param_strides.begin()),
+              "Parameter data stride value mismatch.");
+#endif
+  ORT_ENFORCE(data.Get<Tensor>().DataType() == data_.Get<Tensor>().DataType(),
+              "Parameter data type mismatch. Expected: ", data_.Get<Tensor>().DataType(),
+              ", Got: ", data.Get<Tensor>().DataType());
+  ORT_ENFORCE(data_transfer_manager != nullptr,
+              "Data transfer manager must be provided to copy data to the parameter. "
+              "Please create the TrainingSession before trying to update the parameter.");
+
+  ORT_THROW_IF_ERROR(data_transfer_manager->CopyTensor(data.Get<Tensor>(), *data_.GetMutable<Tensor>()));
+
+  return Status::OK();
+}
+
 Status Parameter::SetGrad(const std::string& gradient_name, const OrtValue& param_grad) {
   // assert param is allocated
   ORT_ENFORCE(data_.IsAllocated(), "Parameter data should be allocated before allocating gradient.");
@@ -334,6 +389,10 @@ Module::Module(const ModelIdentifiers& model_identifiers,
   }
 }
 
+Module::~Module() {
+  state_->module_checkpoint_state.train_session_data_transfer_mgr = nullptr;
+}
+
 size_t Module::GetTrainingModelOutputCount() const noexcept {
   return train_output_names_.size();
 }
diff --git a/orttraining/orttraining/training_api/module.h b/orttraining/orttraining/training_api/module.h
index adb633343263e..f323e6be72d49 100644
--- a/orttraining/orttraining/training_api/module.h
+++ b/orttraining/orttraining/training_api/module.h
@@ -21,6 +21,8 @@ struct Parameter {
 
   // Return the mutable data.
   OrtValue& Data() { return data_; }
+  Status CopyTo(const DataTransferManager* data_transfer_manager, OrtValue& data) const;
+  Status CopyFrom(const DataTransferManager* data_transfer_manager, const OrtValue& data);
   const std::string& Name() const { return name_; }
 
   // Returns whether this parameter is trainable or not.
@@ -34,7 +36,6 @@ struct Parameter {
   // Reset and release the gradient buffer of this Parameter greedily.
   Status ResetGrad();
 
- protected:
   Status SetGrad(const std::string& gradient_name, const OrtValue& param_grad);
 
  private:
@@ -83,6 +84,8 @@ struct Module {
          const std::vector<std::shared_ptr<IExecutionProvider>>& providers,
          gsl::span<OrtCustomOpDomain* const> op_domains = gsl::span<OrtCustomOpDomain* const>());
 
+  ~Module();
+
   // Return the trainable/nontrainable parameters
   std::vector<std::shared_ptr<Parameter>> Parameters() const;
 
diff --git a/orttraining/orttraining/training_api/onnxruntime_training_c_api.cc b/orttraining/orttraining/training_api/onnxruntime_training_c_api.cc
index 6693bba348648..38a9aad9640ea 100644
--- a/orttraining/orttraining/training_api/onnxruntime_training_c_api.cc
+++ b/orttraining/orttraining/training_api/onnxruntime_training_c_api.cc
@@ -333,6 +333,10 @@ ORT_API_STATUS_IMPL(OrtTrainingApis::LoadCheckpointFromBuffer, _In_ const void*
                     _In_ const size_t num_bytes, _Outptr_ OrtCheckpointState** checkpoint_state) {
   API_IMPL_BEGIN
 
+  if (checkpoint_buffer == nullptr) {
+    return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "Expected a valid checkpoint buffer. Actual: nullptr.");
+  }
+
   *checkpoint_state = nullptr;
   auto chkpt_state = std::make_unique<onnxruntime::training::api::CheckpointState>();
   const auto* checkpoint_bytes = reinterpret_cast<const uint8_t*>(checkpoint_buffer);
@@ -559,6 +563,76 @@ ORT_API_STATUS_IMPL(OrtTrainingApis::GetProperty, _In_ const OrtCheckpointState*
   API_IMPL_END
 }
 
+ORT_API_STATUS_IMPL(OrtTrainingApis::GetParameterTypeAndShape, _In_ const OrtCheckpointState* checkpoint_state,
+                    _In_ const char* parameter_name, _Outptr_ OrtTensorTypeAndShapeInfo** parameter_type_and_shape) {
+  API_IMPL_BEGIN
+
+  auto chkpt_state = reinterpret_cast<const onnxruntime::training::api::CheckpointState*>(checkpoint_state);
+  auto it = chkpt_state->module_checkpoint_state.named_parameters.find(parameter_name);
+  if (it == chkpt_state->module_checkpoint_state.named_parameters.end()) {
+    std::string err_msg = "Parameter name " + std::string(parameter_name) + " not found in checkpoint state.";
+    return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, err_msg.c_str());
+  }
+
+  return OrtApis::GetTensorTypeAndShape(&it->second->Data(), parameter_type_and_shape);
+  API_IMPL_END
+}
+
+ORT_API_STATUS_IMPL(OrtTrainingApis::UpdateParameter, _Inout_ OrtCheckpointState* checkpoint_state,
+                    _In_ const char* parameter_name, _In_ OrtValue* parameter) {
+  API_IMPL_BEGIN
+  if (parameter == nullptr) {
+    return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "Expected a valid parameter. Actual: nullptr.");
+  }
+
+  auto chkpt_state = reinterpret_cast<const onnxruntime::training::api::CheckpointState*>(checkpoint_state);
+  auto it = chkpt_state->module_checkpoint_state.named_parameters.find(parameter_name);
+  if (it == chkpt_state->module_checkpoint_state.named_parameters.end()) {
+    std::string err_msg = "Parameter name " + std::string(parameter_name) + " not found in checkpoint state.";
+    return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, err_msg.c_str());
+  }
+  ORT_API_RETURN_IF_STATUS_NOT_OK(it->second->CopyFrom(
+      chkpt_state->module_checkpoint_state.train_session_data_transfer_mgr, *parameter));
+
+  return nullptr;
+  API_IMPL_END
+}
+
+ORT_API_STATUS_IMPL(OrtTrainingApis::GetParameter, _In_ const OrtCheckpointState* checkpoint_state,
+                    _In_ const char* parameter_name, _Inout_ OrtAllocator* allocator,
+                    _Outptr_ OrtValue** parameter) {
+  API_IMPL_BEGIN
+
+  if (parameter == nullptr) {
+    return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "Expected a valid parameter. Actual: nullptr.");
+  }
+
+  auto chkpt_state = reinterpret_cast<const onnxruntime::training::api::CheckpointState*>(checkpoint_state);
+  auto it = chkpt_state->module_checkpoint_state.named_parameters.find(parameter_name);
+  if (it == chkpt_state->module_checkpoint_state.named_parameters.end()) {
+    std::string err_msg = "Parameter name " + std::string(parameter_name) + " not found in checkpoint state.";
+    return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, err_msg.c_str());
+  }
+
+  if (!it->second->Data().IsTensor()) {
+    return OrtApis::CreateStatus(ORT_FAIL, "Expected a tensor type for the parameter. Found a non-tensor type.");
+  }
+  const auto& parameter_tensor = it->second->Data().Get<onnxruntime::Tensor>();
+  ORT_API_RETURN_IF_ERROR(OrtApis::CreateTensorAsOrtValue(
+      allocator, parameter_tensor.Shape().GetDims().data(), parameter_tensor.Shape().NumDimensions(),
+      ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, parameter));
+
+  auto status = it->second->CopyTo(
+      chkpt_state->module_checkpoint_state.train_session_data_transfer_mgr, **parameter);
+  if (!status.IsOK()) {
+    OrtApis::ReleaseValue(*parameter);
+    return onnxruntime::ToOrtStatus(status);
+  }
+
+  return nullptr;
+  API_IMPL_END
+}
+
 static constexpr OrtTrainingApi ort_training_api = {
     // NOTE: The C# bindings depend on the API order within this struct. Since Training APIs are not officially
     // released, it is OK to change the order here, however a corresponding matching change should also be done in the
@@ -592,7 +666,10 @@ static constexpr OrtTrainingApi ort_training_api = {
     &OrtTrainingApis::TrainingSessionGetEvalModelInputName,
     &OrtTrainingApis::AddProperty,
     &OrtTrainingApis::GetProperty,
-    &OrtTrainingApis::LoadCheckpointFromBuffer};
+    &OrtTrainingApis::LoadCheckpointFromBuffer,
+    &OrtTrainingApis::GetParameterTypeAndShape,
+    &OrtTrainingApis::UpdateParameter,
+    &OrtTrainingApis::GetParameter};
 
 ORT_API(const OrtTrainingApi*, OrtTrainingApis::GetTrainingApi, uint32_t) {
   // No constraints on the API version yet.
diff --git a/orttraining/orttraining/training_api/ort_training_apis.h b/orttraining/orttraining/training_api/ort_training_apis.h
index c87108957c975..2a8c1e30361c6 100644
--- a/orttraining/orttraining/training_api/ort_training_apis.h
+++ b/orttraining/orttraining/training_api/ort_training_apis.h
@@ -94,4 +94,14 @@ ORT_API_STATUS_IMPL(GetProperty, _In_ const OrtCheckpointState* checkpoint_state
 ORT_API_STATUS_IMPL(LoadCheckpointFromBuffer, _In_ const void* checkpoint_buffer,
                     _In_ const size_t num_bytes, _Outptr_ OrtCheckpointState** checkpoint_state);
 
+ORT_API_STATUS_IMPL(GetParameterTypeAndShape, _In_ const OrtCheckpointState* checkpoint_state,
+                    _In_ const char* parameter_name, _Outptr_ OrtTensorTypeAndShapeInfo** parameter_type_and_shape);
+
+ORT_API_STATUS_IMPL(UpdateParameter, _Inout_ OrtCheckpointState* checkpoint_state,
+                    _In_ const char* parameter_name, _In_ OrtValue* parameter);
+
+ORT_API_STATUS_IMPL(GetParameter, _In_ const OrtCheckpointState* checkpoint_state,
+                    _In_ const char* parameter_name, _Inout_ OrtAllocator* allocator,
+                    _Outptr_ OrtValue** parameter);
+
 }  // namespace OrtTrainingApis
diff --git a/orttraining/orttraining/training_ops/cpu/torch/torch_custom_function_kernel_base.cc b/orttraining/orttraining/training_ops/cpu/torch/torch_custom_function_kernel_base.cc
index e1d4be24861f5..41f4a41a7c38a 100644
--- a/orttraining/orttraining/training_ops/cpu/torch/torch_custom_function_kernel_base.cc
+++ b/orttraining/orttraining/training_ops/cpu/torch/torch_custom_function_kernel_base.cc
@@ -49,7 +49,6 @@ std::vector<std::optional<OrtValue>> CreateOrtValueArgs(OpKernelContext* context
 
 void PythonOpBase::Init(const OpKernelInfo& info) {
   ORT_THROW_IF_ERROR(info.GetAttr("func_name", &name_));
-  ORT_THROW_IF_ERROR(info.GetAttr("inplace", &inplace_));
 
   is_training_mode_ = static_cast<bool>(info.GetAttrOrDefault("training_mode", static_cast<int64_t>(0)));
   ORT_THROW_IF_ERROR(info.GetAttr("input_convention", &input_convention_));
@@ -117,6 +116,9 @@ void PythonOpBase::Init(const OpKernelInfo& info) {
   // Output tensors.
   ORT_THROW_IF_ERROR(info.GetAttrs("output_tensor_types", output_tensor_types_));
 
+  all_output_to_tensor_input_reuse_map_ =
+      info.GetAttrsOrDefault("tensor_reuse_map", std::vector<int64_t>((info.node().OutputDefs().size()), -1));
+
   CreateConstArgs();
   CreateArgPositions();
 
@@ -141,17 +143,18 @@ void PythonOpBase::RunForward(OpKernelContext* context,
   std::vector<std::optional<OrtValue>> args = CreateOrtValueArgs(context, 0, context->InputCount());
   // Invoke Python calls.
   TorchProxy::GetInstance().Forward(
+      name_,
       OrtTorchFunctionPool::GetInstance().GetForwardCore(name_),
       input_requires_grads_,
       args,
       arg_positions_,
       const_arg_set_.GetDataPtrs(),
       const_arg_set_.GetPositions(),
-      diff_ctx,
-      returned_ortvalues,
       is_training_mode_,
-      inplace_ != 0,
-      kernel_invoke_id_);
+      all_output_to_tensor_input_reuse_map_,
+      kernel_invoke_id_,
+      diff_ctx,
+      returned_ortvalues);
 
   const size_t returned_output_count = 1 + returned_ortvalues.size();
   const size_t kernel_output_count = static_cast<size_t>(context->OutputCount());
@@ -291,14 +294,33 @@ void PythonOpBase::SetContextOutput(OpKernelContext* context, void* diff_ctx) co
 
 void PythonOpBase::SetOtherOutputs(OpKernelContext* context, std::vector<OrtValue>& returned_ortvalues) const {
   auto* ctx_internal = reinterpret_cast<onnxruntime::OpKernelContextInternal*>(context);
+  ORT_ENFORCE(returned_ortvalues.size() == all_output_to_tensor_input_reuse_map_.size() - 1,
+              "PythonOp output count mismatch inplace map count.",
+              returned_ortvalues.size(), " != ", all_output_to_tensor_input_reuse_map_.size() - 1);
   for (size_t i = 0; i < returned_ortvalues.size(); ++i) {
+    size_t output_index = i + 1;
+    if (all_output_to_tensor_input_reuse_map_[output_index] != -1) {
+      const void* tensor_address = returned_ortvalues[i].Get<Tensor>().DataRaw();
+      const void* input_tensor_address = context->Input<Tensor>(all_output_to_tensor_input_reuse_map_[output_index])->DataRaw();
+      ORT_ENFORCE(tensor_address == input_tensor_address,
+                  "PythonOp inplace tensor address mismatch, output index: ", output_index, ", input index: ",
+                  all_output_to_tensor_input_reuse_map_[output_index]);
+    }
+
+    // Notes: if the buffer is created, managed by PyTorch, converted to OrtValue through dlpack here,
+    // but also be used outside ORT later, we don't need to be concerned about
+    // "when the buffer of returned_ortvalues[i] is erased by ORT during releasing that OrtValue causing
+    //  the PyTorch code still using that buffer will be failed".
+    // In this case, the created OrtValue's destructor will not release the buffer,
+    // instead it will release a tensor pointing to that buffer, where PyTorch will decide whether to release
+    // the buffer or not, if the tensor storage is not used by any other tensors
+    // (https://github.com/PyTorch/PyTorch/blob/ac603bc2f8ffac8fc061cfb99e77537464da4b18/aten/src/ATen/DLConvertor.cpp#L257C25-L257C29).
     ORT_THROW_IF_ERROR(ctx_internal->SetOutputMLValue(static_cast<int>(i + 1), returned_ortvalues[i]));
   }
 }
 
 void PythonOpGradBase::Init(const OpKernelInfo& info) {
   ORT_THROW_IF_ERROR(info.GetAttr("func_name", &name_));
-  ORT_THROW_IF_ERROR(info.GetAttr("inplace", &inplace_));
   ORT_THROW_IF_ERROR(info.GetAttrs("input_tensor_types", input_tensor_types_));
   ORT_THROW_IF_ERROR(info.GetAttr("output_convention", &output_convention_));
   ORT_THROW_IF_ERROR(info.GetAttrs("output_tensor_types", output_tensor_types_));
@@ -306,6 +328,24 @@ void PythonOpGradBase::Init(const OpKernelInfo& info) {
   ORT_ENFORCE(output_tensor_types_.size() == output_tensor_requires_grads_.size(),
               "backward tensor output count mismatch");
 
+  std::vector<int64_t> tensor_output_to_tensor_input_alias_map =
+      info.GetAttrsOrDefault("tensor_reuse_map",
+                             std::vector<int64_t>((info.node().OutputDefs().size()), -1));
+  all_output_to_tensor_input_reuse_map_.clear();
+  all_output_to_tensor_input_reuse_map_.reserve(output_convention_.size());
+  size_t tensor_output_index = 0;
+  for (size_t i = 0; i < output_convention_.size(); ++i) {
+    if (output_convention_[i] == 'd') {
+      all_output_to_tensor_input_reuse_map_.push_back(
+          tensor_output_to_tensor_input_alias_map[tensor_output_index] == -1
+              ? -1
+              : tensor_output_to_tensor_input_alias_map[tensor_output_index]);
+      ++tensor_output_index;
+    } else {
+      all_output_to_tensor_input_reuse_map_.push_back(-1);
+    }
+  }
+
   SetPositions();
 
   kernel_invoke_id_ = GetInvokeIdString(this);
@@ -314,7 +354,7 @@ void PythonOpGradBase::Init(const OpKernelInfo& info) {
 void PythonOpGradBase::RunBackward(OpKernelContext* context,
                                    std::vector<OrtValue>& returned_ortvalues) const {
   std::vector<std::optional<OrtValue>> args = CreateOrtValueArgs(context, 1, context->InputCount() - 1);
-  // This is called "const" because that's how Pytorch calls all non-tensor inputs.
+  // This is called "const" because that's how PyTorch calls all non-tensor inputs.
   const Tensor* context_id_tensor = context->Input<Tensor>(0);
   ORT_ENFORCE(context_id_tensor, "Context ID (first input) should not be null.");
   const int64_t* context_index_ptr = context_id_tensor->template Data<int64_t>();
@@ -323,15 +363,15 @@ void PythonOpGradBase::RunBackward(OpKernelContext* context,
 
   std::string err;
   TorchProxy::GetInstance().Backward(
-      OrtTorchFunctionPool::GetInstance()
-          .GetBackwardCore(name_),
+      name_,
+      OrtTorchFunctionPool::GetInstance().GetBackwardCore(name_),
       args,
       arg_positions_,
       const_args,
       const_arg_positions_,
-      returned_ortvalues,
-      inplace_ != 0,
-      kernel_invoke_id_);
+      all_output_to_tensor_input_reuse_map_,
+      kernel_invoke_id_,
+      returned_ortvalues);
 
   OrtTorchFunctionPool::GetInstance().UnregisterContext(*context_index_ptr);
 }
@@ -343,6 +383,29 @@ void PythonOpGradBase::SetOutputs(OpKernelContext* context, std::vector<OrtValue
   for (size_t i = 0; i < returned_ortvalues.size(); ++i) {
     if (output_convention_[i] == 'd') {
       if (output_tensor_requires_grads_[tensor_output_index]) {
+        if (all_output_to_tensor_input_reuse_map_[i] != -1) {
+          const Tensor* input_tensor = context->Input<Tensor>(all_output_to_tensor_input_reuse_map_[i]);
+          if (input_tensor) {
+            ORT_ENFORCE(input_tensor, "PythonOpGrad input tensor should not be null. input index: ", all_output_to_tensor_input_reuse_map_[i]);
+
+            // Be noted: PythonOpGrad's input won't be non-tensor.
+            ORT_ENFORCE(all_output_to_tensor_input_reuse_map_[i] < context->InputCount(), "PythonOpGrad inplace tensor index out of bound.");
+            const void* tensor_address = returned_ortvalues[i].Get<Tensor>().DataRaw();
+
+            const void* input_tensor_address = input_tensor->DataRaw();
+            ORT_ENFORCE(tensor_address == input_tensor_address,
+                        "PythonOpGrad inplace tensor address mismatch, output index: ", i, ", input index: ", all_output_to_tensor_input_reuse_map_[i]);
+          }
+        }
+
+        // Notes: if the buffer is created, managed by PyTorch, converted to OrtValue through dlpack here,
+        // but also be used outside ORT later, we don't need to be concerned about
+        // "when the buffer of returned_ortvalues[i] is erased by ORT during releasing that OrtValue causing
+        //  the PyTorch code still using that buffer will be failed".
+        // In this case, the created OrtValue's destructor will not release the buffer,
+        // instead it will release a tensor pointing to that buffer, where PyTorch will decide whether to release
+        // the buffer or not, if the tensor storage is not used by any other tensors
+        // (https://github.com/PyTorch/PyTorch/blob/ac603bc2f8ffac8fc061cfb99e77537464da4b18/aten/src/ATen/DLConvertor.cpp#L257C25-L257C29).
         ORT_THROW_IF_ERROR(ctx_internal->SetOutputMLValue(tensor_output_index, returned_ortvalues.at(i)));
       }
       ++tensor_output_index;
@@ -356,11 +419,11 @@ void PythonOpGradBase::SetPositions() {
   ORT_ENFORCE(const_arg_positions_.size() == 0);
   ORT_ENFORCE(arg_positions_.size() == 0);
 
-  // Pytorch's autograd context is the first (indexed by 0) input of the called Python function.
+  // PyTorch's autograd context is the first (indexed by 0) input of the called Python function.
   // Note that here we will call autograd.Function.backward(ctx, tensor0, tensor1, ...).
   const_arg_positions_ = {0};
 
-  // The rest inputs are just Pytorch tensors.
+  // The rest inputs are just PyTorch tensors.
   arg_positions_.resize(input_tensor_types_.size());
   for (size_t i = 0; i < arg_positions_.size(); ++i) {
     // i-th tensor is the (i+1)-th input of autograd.Function.backward.
diff --git a/orttraining/orttraining/training_ops/cpu/torch/torch_custom_function_kernel_base.h b/orttraining/orttraining/training_ops/cpu/torch/torch_custom_function_kernel_base.h
index 1657bf49ea2e6..d4a53a223abf1 100644
--- a/orttraining/orttraining/training_ops/cpu/torch/torch_custom_function_kernel_base.h
+++ b/orttraining/orttraining/training_ops/cpu/torch/torch_custom_function_kernel_base.h
@@ -106,7 +106,7 @@ class PythonOpBase {
 
   // Name of containing class. For example, MyReLU.
   std::string name_;
-  int64_t inplace_;
+  std::vector<int64_t> all_output_to_tensor_input_reuse_map_;
   std::string input_convention_;
   bool is_training_mode_;
   // input_requires_grads_[i] indicates if the i-th inputs of apply() should have gradient.
@@ -179,7 +179,7 @@ class PythonOpGradBase {
  protected:
   // Name of containing class. For example, MyReLU.
   std::string name_;
-  int64_t inplace_;
+
   // Input types of MyReLU.backward(...).
   std::vector<int64_t> input_tensor_types_;
 
@@ -190,6 +190,9 @@ class PythonOpGradBase {
   std::vector<int64_t> arg_positions_;
   std::vector<int64_t> const_arg_positions_;
 
+  // Memory reuse map for all outputs.
+  std::vector<int64_t> all_output_to_tensor_input_reuse_map_;
+
  private:
   void SetPositions();
 
diff --git a/orttraining/orttraining/training_ops/cpu/triton/triton_op.cc b/orttraining/orttraining/training_ops/cpu/triton/triton_op.cc
index 28f4ff665f797..c230a0c9a3b1d 100644
--- a/orttraining/orttraining/training_ops/cpu/triton/triton_op.cc
+++ b/orttraining/orttraining/training_ops/cpu/triton/triton_op.cc
@@ -17,8 +17,8 @@ InlinedHashSet<size_t> TritonOp::GetBoolOutputs(size_t output_size) const {
   InlinedHashSet<size_t> bool_outputs;
   for (size_t i = 0; i < output_size; ++i) {
     ORT_ENFORCE(i < Node().OutputDefs().size(), "Output index out of range.");
-    if (Node().OutputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
-        ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL) {
+    if (Node().OutputDefs()[i]->Exists() && Node().OutputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
+                                                ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL) {
       bool_outputs.insert(i);
     }
   }
@@ -37,13 +37,15 @@ Status TritonOp::Compute(OpKernelContext* context) const {
   InlinedHashSet<size_t> bool_outputs = GetBoolOutputs(output_size);
   auto& executor = training::framework::triton::TritonOpExecutor::Instance();
   if (func_name_ != "") {
-    executor.ExecuteByFuncName(func_name_, inputs, outputs, bool_outputs);
+    executor.ExecuteByFuncName(func_name_, inputs, outputs, bool_outputs, kwargs_);
   } else {
     executor.ExecuteByOnnx(onnx_key_, onnx_string_, inputs, outputs, bool_outputs);
   }
   ORT_ENFORCE(output_size == outputs.size());
   for (size_t i = 0; i < output_size; ++i) {
-    ORT_THROW_IF_ERROR(p_ctx_internal->SetOutputMLValue(static_cast<int>(i), outputs[i]));
+    if (Node().OutputDefs()[i]->Exists()) {
+      ORT_THROW_IF_ERROR(p_ctx_internal->SetOutputMLValue(static_cast<int>(i), outputs[i]));
+    }
   }
   return Status::OK();
 }
diff --git a/orttraining/orttraining/training_ops/cpu/triton/triton_op.h b/orttraining/orttraining/training_ops/cpu/triton/triton_op.h
index 25e7b1f15ff6b..f226db76f7ed7 100644
--- a/orttraining/orttraining/training_ops/cpu/triton/triton_op.h
+++ b/orttraining/orttraining/training_ops/cpu/triton/triton_op.h
@@ -5,6 +5,8 @@
 
 #pragma once
 
+#include "core/common/inlined_containers.h"
+
 #ifndef SHARED_PROVIDER
 #include "core/framework/op_kernel.h"
 #endif
@@ -18,6 +20,19 @@ class TritonOp final : public OpKernel {
     ORT_THROW_IF_ERROR(info.GetAttr("func_name", &func_name_));
     ORT_THROW_IF_ERROR(info.GetAttr("onnx_key", &onnx_key_));
     ORT_THROW_IF_ERROR(info.GetAttr("onnx_string", &onnx_string_));
+    for (const auto& attr : info.node().GetAttributes()) {
+      if (attr.first.rfind("_", 0) == 0 || attr.first == "func_name" || attr.first == "onnx_key" ||
+          attr.first == "onnx_string") {
+        continue;
+      }
+      // Support int64 and float only for now, skip other types.
+      if (attr.second.type() == ONNX_NAMESPACE::AttributeProto::AttributeType::AttributeProto_AttributeType_INT) {
+        kwargs_.insert({attr.first, {std::to_string(attr.second.i()), ONNX_NAMESPACE::TensorProto_DataType_INT64}});
+      } else if (attr.second.type() ==
+                 ONNX_NAMESPACE::AttributeProto::AttributeType::AttributeProto_AttributeType_FLOAT) {
+        kwargs_.insert({attr.first, {std::to_string(attr.second.f()), ONNX_NAMESPACE::TensorProto_DataType_FLOAT}});
+      }
+    }
   }
 
   Status Compute(OpKernelContext* context) const override;
@@ -28,6 +43,7 @@ class TritonOp final : public OpKernel {
   std::string func_name_;
   int64_t onnx_key_;
   std::string onnx_string_;
+  InlinedHashMap<std::string, std::pair<std::string, int>> kwargs_;
 };
 
 bool IsTritonOpExecutorInitialized();
diff --git a/orttraining/orttraining/training_ops/cuda/activation/activations_grad.cc b/orttraining/orttraining/training_ops/cuda/activation/activations_grad.cc
index 7fde69d758ca9..98e3b878c9e0e 100644
--- a/orttraining/orttraining/training_ops/cuda/activation/activations_grad.cc
+++ b/orttraining/orttraining/training_ops/cuda/activation/activations_grad.cc
@@ -43,11 +43,15 @@ namespace cuda {
   ACTIVATION_GRAD_OP_TYPED(name, ver, domain, float)     \
   ACTIVATION_GRAD_OP_TYPED(name, ver, domain, double)
 
+#define ACTIVATION_GRAD_OP_HFDX(name, ver, domain) \
+  ACTIVATION_GRAD_OP_HFD(name, ver, domain)        \
+  ACTIVATION_GRAD_OP_TYPED(name, ver, domain, BFloat16)
+
 ACTIVATION_GRAD_OP_HFD(GeluGrad, 1, kMSDomain);
 ACTIVATION_GRAD_OP_HFD(FastGeluGrad, 1, kMSDomain);
 ACTIVATION_GRAD_OP_HFD(ReluGrad, 1, kMSDomain);
 ACTIVATION_GRAD_OP_HFD(SigmoidGrad, 1, kMSDomain);
-ACTIVATION_GRAD_OP_HFD(QuickGeluGrad, 1, kMSDomain);
+ACTIVATION_GRAD_OP_HFDX(QuickGeluGrad, 1, kMSDomain);
 ACTIVATION_GRAD_OP_HFD(TanhGrad, 1, kMSDomain);
 ACTIVATION_GRAD_OP_HFD(LeakyReluGrad, 1, kMSDomain);
 
diff --git a/orttraining/orttraining/training_ops/cuda/activation/activations_grad_impl.cu b/orttraining/orttraining/training_ops/cuda/activation/activations_grad_impl.cu
index 164aba866722e..dd6a44b9e3b56 100644
--- a/orttraining/orttraining/training_ops/cuda/activation/activations_grad_impl.cu
+++ b/orttraining/orttraining/training_ops/cuda/activation/activations_grad_impl.cu
@@ -83,14 +83,15 @@ struct OP_LeakyReluGrad : public CtxLeakyReluGrad {
 #define SPECIALIZED_BINARY_ELEMENTWISE_IMPL(name, T) \
   template void Impl_##name<T>(cudaStream_t stream, const T* lhs_data, const T* rhs_data, T* output_data, const Ctx##name* func_ctx, size_t count);
 
-#define SPECIALIZED_BINARY_ELEMENTWISE_IMPL_HFD(x) \
+#define SPECIALIZED_BINARY_ELEMENTWISE_IMPL_HFDX(x) \
   SPECIALIZED_BINARY_ELEMENTWISE_IMPL(x, half)     \
   SPECIALIZED_BINARY_ELEMENTWISE_IMPL(x, float)    \
-  SPECIALIZED_BINARY_ELEMENTWISE_IMPL(x, double)
+  SPECIALIZED_BINARY_ELEMENTWISE_IMPL(x, double)  \
+  SPECIALIZED_BINARY_ELEMENTWISE_IMPL(x, BFloat16)
 
 #define ACTIVATION_GRAD_OP_NAME(name) \
   BINARY_ELEMENTWISE_IMPL(name);      \
-  SPECIALIZED_BINARY_ELEMENTWISE_IMPL_HFD(name)
+  SPECIALIZED_BINARY_ELEMENTWISE_IMPL_HFDX(name)
 
 ACTIVATION_GRAD_OPS()
 #undef ACTIVATION_GRAD_OP_NAME
diff --git a/orttraining/orttraining/training_ops/cuda/cuda_training_kernels.cc b/orttraining/orttraining/training_ops/cuda/cuda_training_kernels.cc
index 8e61dbee506f2..dcf733153bdad 100644
--- a/orttraining/orttraining/training_ops/cuda/cuda_training_kernels.cc
+++ b/orttraining/orttraining/training_ops/cuda/cuda_training_kernels.cc
@@ -121,6 +121,7 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, QuickGeluGrad);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, double, QuickGeluGrad);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, QuickGeluGrad);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, BFloat16, QuickGeluGrad);
 
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, TanhGrad);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, double, TanhGrad);
@@ -206,7 +207,11 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, FakeQuantGrad);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, BatchScale);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, PadAndUnflatten);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, FlattenAndUnpad);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, ScaledSum);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, ResizeGrad);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, ResizeGrad);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, double, ResizeGrad);
 
 // the kernels within the following ifdef are not included in a build with
 // --enable_training_ops but without --enable_training
@@ -375,6 +380,7 @@ Status RegisterCudaTrainingKernels(KernelRegistry& kernel_registry) {
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, QuickGeluGrad)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, double, QuickGeluGrad)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, QuickGeluGrad)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, BFloat16, QuickGeluGrad)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, TanhGrad)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, double, TanhGrad)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, TanhGrad)>,
@@ -453,13 +459,15 @@ Status RegisterCudaTrainingKernels(KernelRegistry& kernel_registry) {
 
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, InplaceClipGradNorm)>,
 
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(
-        kCudaExecutionProvider, kMSDomain, 1, float, FakeQuant)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(
-        kCudaExecutionProvider, kMSDomain, 1, float, FakeQuantGrad)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, FakeQuant)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, FakeQuantGrad)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, BatchScale)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, PadAndUnflatten)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, FlattenAndUnpad)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, ScaledSum)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, ResizeGrad)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, ResizeGrad)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, double, ResizeGrad)>,
 // the kernels within the following ifdef are not included in a build with
 // --enable_training_ops but without --enable_training
 #ifdef ENABLE_TRAINING
diff --git a/orttraining/orttraining/training_ops/cuda/tensor/flatten_and_unpad.cc b/orttraining/orttraining/training_ops/cuda/tensor/flatten_and_unpad.cc
new file mode 100644
index 0000000000000..8bfb10f686771
--- /dev/null
+++ b/orttraining/orttraining/training_ops/cuda/tensor/flatten_and_unpad.cc
@@ -0,0 +1,91 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "orttraining/training_ops/cuda/tensor/flatten_and_unpad.h"
+#include "orttraining/training_ops/cuda/tensor/flatten_and_unpad_impl.h"
+#include "core/providers/cuda/shared_inc/cuda_utils.h"
+
+namespace onnxruntime {
+namespace cuda {
+
+ONNX_OPERATOR_KERNEL_EX(
+    FlattenAndUnpad,
+    kMSDomain,
+    1,
+    kCudaExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .TypeConstraint("T", BuildKernelDefConstraints<int32_t, int64_t, MLFloat16, float, double, BFloat16>())
+        .TypeConstraint("T_INT", DataTypeImpl::GetTensorType<int64_t>())
+        .OutputMemoryType(OrtMemTypeCPUOutput, 1),
+    FlattenAndUnpad);
+
+// Put implementation in the anonymous namespace to avoid name collision in the global namespace.
+namespace {
+
+template <typename T>
+struct FlattenAndUnpadFunctor {
+  void operator()(cudaStream_t stream,
+                  const int64_t output_element_count,
+                  const fast_divmod output_element_stride_fdm,
+                  const int64_t index_value_upper_bound,
+                  const Tensor& input_tensor,
+                  const Tensor& indices_tensor,
+                  Tensor& output_tensor) const {
+    typedef typename ToCudaType<T>::MappedType CudaT;
+    const CudaT* input_data = reinterpret_cast<const CudaT*>(input_tensor.Data<T>());
+
+    FlattenAndUnpadImpl<CudaT>(stream, output_element_count, output_element_stride_fdm, index_value_upper_bound,
+                               input_data, indices_tensor.Data<int64_t>(),
+                               reinterpret_cast<CudaT*>(output_tensor.MutableData<T>()));
+  }
+};
+
+}  // namespace
+
+Status FlattenAndUnpad::ComputeInternal(OpKernelContext* context) const {
+  const Tensor* input_tensor = context->Input<Tensor>(0);
+  const Tensor* indices_tensor = context->Input<Tensor>(1);
+  ORT_ENFORCE(input_tensor->Shape().NumDimensions() >= 2,
+              "input_tensor tensor must have at least 2 dimensions.", input_tensor->Shape().NumDimensions());
+  ORT_ENFORCE(indices_tensor->Shape().NumDimensions() == 1,
+              "indices_tensor tensor must be 1-D.", indices_tensor->Shape().NumDimensions());
+
+  const auto& input_shape = input_tensor->Shape();
+  std::vector<int64_t> output_shape_vec;
+  output_shape_vec.reserve(input_shape.NumDimensions() - 1);
+  output_shape_vec.push_back(indices_tensor->Shape()[0]);
+  int64_t element_stride = 1;
+  for (size_t i = 2; i < input_shape.NumDimensions(); ++i) {
+    output_shape_vec.push_back(input_shape[i]);
+    element_stride *= input_shape[i];
+  }
+
+  fast_divmod output_element_stride_fdm(static_cast<int>(element_stride));
+  auto output_shape = TensorShape(output_shape_vec);
+  Tensor* output_tensor = context->Output(0, output_shape);
+
+  std::vector<int64_t> unflatten_dims_vec;
+  unflatten_dims_vec.reserve(2);
+  unflatten_dims_vec.push_back(input_shape[0]);
+  unflatten_dims_vec.push_back(input_shape[1]);
+  const int64_t index_value_upper_bound = input_shape[0] * input_shape[1];
+
+  utils::MLTypeCallDispatcher<int32_t, int64_t, float, MLFloat16, double, BFloat16>
+      t_disp(input_tensor->GetElementType());
+  t_disp.Invoke<FlattenAndUnpadFunctor>(Stream(context),
+                                        output_shape.Size(),
+                                        output_element_stride_fdm,
+                                        index_value_upper_bound,
+                                        *input_tensor,
+                                        *indices_tensor,
+                                        *output_tensor);
+
+  size_t rank = unflatten_dims_vec.size();
+  Tensor* unflatten_dims_tensor = context->Output(1, {static_cast<int>(rank)});
+  TensorShape(unflatten_dims_vec).CopyDims(unflatten_dims_tensor->MutableData<int64_t>(), rank);
+
+  return Status::OK();
+}
+
+}  // namespace cuda
+}  // namespace onnxruntime
diff --git a/orttraining/orttraining/training_ops/cuda/tensor/flatten_and_unpad.h b/orttraining/orttraining/training_ops/cuda/tensor/flatten_and_unpad.h
new file mode 100644
index 0000000000000..f9c6819a393b8
--- /dev/null
+++ b/orttraining/orttraining/training_ops/cuda/tensor/flatten_and_unpad.h
@@ -0,0 +1,21 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "core/providers/cuda/cuda_kernel.h"
+#include "core/providers/common.h"
+
+namespace onnxruntime {
+namespace cuda {
+
+class FlattenAndUnpad final : public CudaKernel {
+ public:
+  FlattenAndUnpad(const OpKernelInfo& info) : CudaKernel(info) {
+  }
+
+  Status ComputeInternal(OpKernelContext* context) const override;
+};
+
+}  // namespace cuda
+}  // namespace onnxruntime
diff --git a/orttraining/orttraining/training_ops/cuda/tensor/flatten_and_unpad_impl.cu b/orttraining/orttraining/training_ops/cuda/tensor/flatten_and_unpad_impl.cu
new file mode 100644
index 0000000000000..2091a7082ee79
--- /dev/null
+++ b/orttraining/orttraining/training_ops/cuda/tensor/flatten_and_unpad_impl.cu
@@ -0,0 +1,83 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "orttraining/training_ops/cuda/tensor/flatten_and_unpad_impl.h"
+#include "core/providers/cuda/cu_inc/common.cuh"
+
+namespace onnxruntime {
+namespace cuda {
+
+constexpr int kBlockSize = 256;
+constexpr int kNumUnroll = 4;
+
+template <typename T>
+__global__ void ExtractIputWithIndexKernel(const CUDA_LONG N,
+                                           const fast_divmod output_element_stride_fdm,
+                                           const int64_t index_value_upper_bound,
+                                           const T* input_data,
+                                           const int64_t* indices_data,
+                                           T* output_data) {
+  CUDA_LONG idx = blockDim.x * blockIdx.x + threadIdx.x;
+  CUDA_LONG id = idx * kNumUnroll;
+
+  T input[kNumUnroll];
+  if (id < N) {
+#pragma unroll
+    for (int i = 0; i < kNumUnroll; ++i) {
+      CUDA_LONG li = id + i;
+      if (li < N) {
+        int row_index, col_index;
+        output_element_stride_fdm.divmod(li, row_index, col_index);
+        assert(indices_data[row_index] < index_value_upper_bound);
+        input[i] = input_data[indices_data[row_index] * output_element_stride_fdm.d_ + col_index];
+      }
+    }
+  }
+
+#pragma unroll
+  for (int i = 0; i < kNumUnroll; ++i) {
+    CUDA_LONG li = id + i;
+    if (li < N) {
+      output_data[li] = input[i];
+    }
+  }
+}
+
+template <typename T>
+void FlattenAndUnpadImpl(cudaStream_t stream,
+                         const int64_t total_element_count,
+                         const fast_divmod output_element_stride_fdm,
+                         const int64_t index_value_upper_bound,
+                         const T* input_data,
+                         const int64_t* indices_data,
+                         T* output_data) {
+  const int blocksPerGrid = static_cast<int>(CeilDiv(total_element_count, kBlockSize * kNumUnroll));
+  ExtractIputWithIndexKernel<T><<<blocksPerGrid, kBlockSize, 0, stream>>>(
+      static_cast<CUDA_LONG>(total_element_count),
+      output_element_stride_fdm,
+      index_value_upper_bound,
+      input_data,
+      indices_data,
+      output_data);
+}
+
+#define FLATTEN_AND_UNPAD_IMPL(T)                                       \
+  template void FlattenAndUnpadImpl<T>(cudaStream_t stream,                         \
+                                       const int64_t total_element_count,           \
+                                       const fast_divmod output_element_stride_fdm, \
+                                       const int64_t index_value_upper_bound,       \
+                                       const T* input_data,                         \
+                                       const int64_t* indices_data,                 \
+                                       T* output_data);
+
+FLATTEN_AND_UNPAD_IMPL(float)
+FLATTEN_AND_UNPAD_IMPL(double)
+FLATTEN_AND_UNPAD_IMPL(half)
+FLATTEN_AND_UNPAD_IMPL(BFloat16)
+FLATTEN_AND_UNPAD_IMPL(int32_t)
+FLATTEN_AND_UNPAD_IMPL(int64_t)
+
+#undef FLATTEN_AND_UNPAD_FROM_MASK_IMPL
+
+}  // namespace cuda
+}  // namespace onnxruntime
diff --git a/orttraining/orttraining/training_ops/cuda/tensor/flatten_and_unpad_impl.h b/orttraining/orttraining/training_ops/cuda/tensor/flatten_and_unpad_impl.h
new file mode 100644
index 0000000000000..75f8c243d3425
--- /dev/null
+++ b/orttraining/orttraining/training_ops/cuda/tensor/flatten_and_unpad_impl.h
@@ -0,0 +1,25 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#ifdef USE_ROCM
+#include "core/providers/rocm/shared_inc/rocm_utils.h"
+#else
+#include "core/providers/cuda/shared_inc/cuda_utils.h"
+#endif
+
+namespace onnxruntime {
+namespace cuda {
+
+template <typename T>
+void FlattenAndUnpadImpl(cudaStream_t stream,
+                         const int64_t total_element_count,
+                         const fast_divmod output_element_stride_fdm,
+                         const int64_t index_value_upper_bound,
+                         const T* input_data,
+                         const int64_t* indices_data,
+                         T* output_data);
+
+}  // namespace cuda
+}  // namespace onnxruntime
diff --git a/orttraining/orttraining/training_ops/cuda/tensor/pad_and_unflatten.cc b/orttraining/orttraining/training_ops/cuda/tensor/pad_and_unflatten.cc
index caf89ef840e0c..7bd759e8976c1 100644
--- a/orttraining/orttraining/training_ops/cuda/tensor/pad_and_unflatten.cc
+++ b/orttraining/orttraining/training_ops/cuda/tensor/pad_and_unflatten.cc
@@ -17,8 +17,7 @@ ONNX_OPERATOR_KERNEL_EX(
         .TypeConstraint("T", BuildKernelDefConstraints<MLFloat16, float, double, BFloat16>())
         .TypeConstraint("T_INT", DataTypeImpl::GetTensorType<int64_t>())
         .TypeConstraint("T_INDEX", DataTypeImpl::GetTensorType<int64_t>())
-        .InputMemoryType(OrtMemTypeCPUInput, 2)
-        .OutputMemoryType(OrtMemTypeCPUOutput, 1),
+        .InputMemoryType(OrtMemTypeCPUInput, 2),
     PadAndUnflatten);
 
 // Put implementation in the anonymous namespace to avoid name collision in the global namespace.
@@ -63,14 +62,11 @@ Status PadAndUnflatten::ComputeInternal(OpKernelContext* context) const {
   output_shape_vec.push_back(dims_ptr[0]);
   output_shape_vec.push_back(dims_ptr[1]);
 
-  std::vector<int64_t> full_size_flatten_shape_vec;
   const int64_t flatten_dim_factor = dims_ptr[0] * dims_ptr[1];
-  full_size_flatten_shape_vec.push_back(flatten_dim_factor);
 
   int64_t element_stride = 1;
   for (size_t i = 1; i < input_shape.NumDimensions(); ++i) {
     output_shape_vec.push_back(input_shape[i]);
-    full_size_flatten_shape_vec.push_back(input_shape[i]);
     element_stride *= input_shape[i];
   }
 
@@ -87,11 +83,6 @@ Status PadAndUnflatten::ComputeInternal(OpKernelContext* context) const {
                                         *indices_tensor,
                                         *output_tensor);
 
-  // Set input shape output tensor.
-  size_t rank = full_size_flatten_shape_vec.size();
-  Tensor* input_shape_tensor = context->Output(1, {static_cast<int>(rank)});
-  TensorShape(full_size_flatten_shape_vec).CopyDims(input_shape_tensor->MutableData<int64_t>(), rank);
-
   return Status::OK();
 }
 
diff --git a/orttraining/orttraining/training_ops/cuda/tensor/pad_and_unflatten_impl.cu b/orttraining/orttraining/training_ops/cuda/tensor/pad_and_unflatten_impl.cu
index 22a4f518dfa47..e96770f974bf0 100644
--- a/orttraining/orttraining/training_ops/cuda/tensor/pad_and_unflatten_impl.cu
+++ b/orttraining/orttraining/training_ops/cuda/tensor/pad_and_unflatten_impl.cu
@@ -61,7 +61,7 @@ void PadAndUnflattenImpl(cudaStream_t stream,
       output_data);
 }
 
-#define SPECIALIZED_RESTORE_FROM_MASK_IMPL(T)                                       \
+#define PAD_AND_UNFLATTEN_IMPL(T)                                       \
   template void PadAndUnflattenImpl<T>(cudaStream_t stream,                         \
                                        const int64_t total_element_count,           \
                                        const fast_divmod output_element_stride_fdm, \
@@ -70,12 +70,12 @@ void PadAndUnflattenImpl(cudaStream_t stream,
                                        const int64_t* indices_data,                 \
                                        T* output_data);
 
-SPECIALIZED_RESTORE_FROM_MASK_IMPL(float)
-SPECIALIZED_RESTORE_FROM_MASK_IMPL(double)
-SPECIALIZED_RESTORE_FROM_MASK_IMPL(half)
-SPECIALIZED_RESTORE_FROM_MASK_IMPL(BFloat16)
+PAD_AND_UNFLATTEN_IMPL(float)
+PAD_AND_UNFLATTEN_IMPL(double)
+PAD_AND_UNFLATTEN_IMPL(half)
+PAD_AND_UNFLATTEN_IMPL(BFloat16)
 
-#undef SPECIALIZED_RESTORE_FROM_MASK_IMPL
+#undef PAD_AND_UNFLATTEN_FROM_MASK_IMPL
 
 }  // namespace cuda
 }  // namespace onnxruntime
diff --git a/orttraining/orttraining/training_ops/cuda/tensor/resize_grad.cc b/orttraining/orttraining/training_ops/cuda/tensor/resize_grad.cc
new file mode 100644
index 0000000000000..a5e8f7cd35d88
--- /dev/null
+++ b/orttraining/orttraining/training_ops/cuda/tensor/resize_grad.cc
@@ -0,0 +1,81 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include <memory>
+#include <utility>
+
+#include "orttraining/training_ops/cuda/tensor/resize_grad.h"
+#include "orttraining/training_ops/cuda/tensor/resize_grad_impl.h"
+
+namespace onnxruntime::cuda {
+
+#define REGISTER_RESIZEGRAD_KERNEL_TYPED(T)                                \
+  ONNX_OPERATOR_TYPED_KERNEL_EX(                                           \
+      ResizeGrad,                                                          \
+      kMSDomain,                                                           \
+      1,                                                                   \
+      T,                                                                   \
+      kCudaExecutionProvider,                                              \
+      (*KernelDefBuilder::Create())                                        \
+          .InputMemoryType(OrtMemTypeCPUInput, 2) /* Keep roi on CPU */    \
+          .InputMemoryType(OrtMemTypeCPUInput, 3) /* Keep scales on CPU */ \
+          .TypeConstraint("T", DataTypeImpl::GetTensorType<T>()),          \
+      ResizeGrad<T>);
+
+REGISTER_RESIZEGRAD_KERNEL_TYPED(MLFloat16)
+REGISTER_RESIZEGRAD_KERNEL_TYPED(float)
+REGISTER_RESIZEGRAD_KERNEL_TYPED(double)
+
+template <typename T>
+Status ResizeGrad<T>::ComputeInternal(OpKernelContext* context) const {
+  typedef typename ToCudaType<T>::MappedType CudaT;
+
+  const Tensor* dY = context->Input<Tensor>(0);
+  const Tensor* X = context->Input<Tensor>(1);
+  const Tensor* scales = context->Input<Tensor>(3);
+
+  ORT_ENFORCE(X->Shape().NumDimensions() == 4, "Expected input tensor to have 4 dimensions. Actual: ",
+              X->Shape().NumDimensions());
+
+  const auto get_scales_from_input = [](const Tensor* scales) {
+    if (nullptr == scales) {
+      return std::make_pair(std::optional<float>{}, std::optional<float>{});
+    }
+
+    ORT_ENFORCE(scales->Shape().Size() == 4, "There must be a scale for each dimension.");
+
+    const auto* scales_data = scales->Data<float>();
+    return std::make_pair(std::optional<float>{scales_data[2]}, std::optional<float>{scales_data[3]});
+  };
+
+  std::pair<std::optional<float>, std::optional<float>> scale_factors = get_scales_from_input(scales);
+
+  Tensor* dX = context->Output(0, X->Shape());
+
+  const int64_t batch_size = X->Shape()[0];
+  const int64_t num_channels = X->Shape()[1];
+  const int64_t output_height = dY->Shape()[2];
+  const int64_t output_width = dY->Shape()[3];
+  const int64_t input_height = X->Shape()[2];
+  const int64_t input_width = X->Shape()[3];
+
+  if (dX->Shape() == dY->Shape()) {
+    CUDA_RETURN_IF_ERROR(cudaMemcpyAsync(dX->MutableDataRaw(), dY->DataRaw(), dY->SizeInBytes(), cudaMemcpyDeviceToDevice));
+    return Status::OK();
+  }
+
+  CUDA_RETURN_IF_ERROR(cudaMemsetAsync(dX->MutableDataRaw(), 0, dX->SizeInBytes(), Stream(context)));
+
+  const bool align_corners = coordinate_transform_mode_ == ResizeCoordinateTransformationMode::ALIGN_CORNERS;
+  const CudaT* dy_data = reinterpret_cast<const CudaT*>(dY->Data<T>());
+  CudaT* dx_data = reinterpret_cast<CudaT*>(dX->MutableData<T>());
+
+  ResizeGradImpl(Stream(context), input_height, input_width, output_height,
+                 output_width, batch_size, num_channels, align_corners,
+                 scale_factors.first, scale_factors.second,
+                 dy_data, dx_data);
+
+  return Status::OK();
+}
+
+}  // namespace onnxruntime::cuda
diff --git a/orttraining/orttraining/training_ops/cuda/tensor/resize_grad.h b/orttraining/orttraining/training_ops/cuda/tensor/resize_grad.h
new file mode 100644
index 0000000000000..53f8d5f0d71f5
--- /dev/null
+++ b/orttraining/orttraining/training_ops/cuda/tensor/resize_grad.h
@@ -0,0 +1,41 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include <string>
+#include "core/common/common.h"
+#include "core/providers/cuda/cuda_kernel.h"
+#include "core/providers/cpu/tensor/upsamplebase.h"
+
+namespace onnxruntime::cuda {
+
+template <typename T>
+class ResizeGrad final : public UpsampleBase, public CudaKernel {
+ public:
+  ResizeGrad(const OpKernelInfo& info) : UpsampleBase(info), CudaKernel(info) {
+    ORT_ENFORCE(!antialias_, "Antialiasing is not supported in ResizeGrad yet.");
+
+    ORT_ENFORCE(axes_.empty(), "ReizeGrad does not support the `axes` attribute yet.");
+
+    std::string coordinate_transform_mode =
+        info.GetAttrOrDefault<std::string>("coordinate_transformation_mode", "half_pixel");
+    coordinate_transform_mode_ = StringToCoordinateTransformationMode(coordinate_transform_mode);
+    ORT_ENFORCE(coordinate_transform_mode_ == ResizeCoordinateTransformationMode::HALF_PIXEL ||
+                    coordinate_transform_mode_ == ResizeCoordinateTransformationMode::ALIGN_CORNERS,
+                "ReizeGrad only supports the `HALF_PIXEL` and `ALIGN_CORNERS` coordinate_transform_mode ",
+                coordinate_transform_mode, " is not supported yet.");
+
+    ORT_ENFORCE(keep_aspect_ratio_policy_ == AspectRatioPolicy::STRETCH,
+                "ReizeGrad only supports the `STRETCH` policy.");
+
+    std::string mode;
+    ORT_ENFORCE(info.GetAttr<std::string>("mode", &mode).IsOK());
+    ORT_ENFORCE((UpsampleMode::LINEAR == mode_),
+                "ReizeGrad only supports the `LINEAR` mode. ", mode, " mode is not supported yet.");
+  }
+
+  Status ComputeInternal(OpKernelContext* context) const override;
+};
+
+}  // namespace onnxruntime::cuda
diff --git a/orttraining/orttraining/training_ops/cuda/tensor/resize_grad_impl.cu b/orttraining/orttraining/training_ops/cuda/tensor/resize_grad_impl.cu
new file mode 100644
index 0000000000000..0507cda62390b
--- /dev/null
+++ b/orttraining/orttraining/training_ops/cuda/tensor/resize_grad_impl.cu
@@ -0,0 +1,151 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// Contents of this file are derived from the pytorch cuda implementation of
+// the upsample_bilinear2d_backward implementation at:
+// https://github.com/pytorch/pytorch/blob/ce50132748f652ed6079c3db8008a6817594dbae/aten/src/ATen/native/cuda/UpSampleBilinear2d.cu
+
+#include "orttraining/training_ops/cuda/tensor/resize_grad_impl.h"
+#include "core/providers/cuda/cu_inc/common.cuh"
+#include "core/providers/cuda/atomic/common.cuh"
+
+namespace onnxruntime::cuda {
+
+namespace {
+
+constexpr int NumThreadsPerBlock = GridDim::maxThreadsPerBlock;
+
+}  // namespace
+
+__device__ __forceinline__ size_t
+idx(const size_t nc,
+    const size_t height,
+    const size_t width,
+    const size_t h,
+    const size_t w) {
+  return (nc * height + h) * width + w;
+}
+
+template <typename T>
+__device__ __forceinline__ static T AreaPixelComputeSourceIndex(
+    T scale,
+    int dst_index,
+    bool align_corners,
+    bool cubic) {
+  if (align_corners) {
+    return scale * dst_index;
+  } else {
+    T src_idx = scale * (dst_index + static_cast<T>(0.5)) -
+                static_cast<T>(0.5);
+    return (!cubic && src_idx < static_cast<T>(0))
+               ? static_cast<T>(0)
+               : src_idx;
+  }
+}
+
+template <typename T, typename AccT>
+__global__ void UpsampleGrad(const int64_t nc, const int64_t input_height,
+                             const int64_t input_width, const int64_t output_height,
+                             const int64_t output_width, const AccT rheight,
+                             const AccT rwidth, const bool align_corners,
+                             const T* dY_data, T* dX_data) {
+  const size_t dy_numel = nc * output_width * output_height;
+  const size_t dx_numel = nc * input_width * input_height;
+  for (size_t index = blockDim.x * blockIdx.x + threadIdx.x;
+       index < dy_numel;
+       index += blockDim.x * gridDim.x) {
+    size_t index_temp = index;
+    const int w2 = index_temp % output_width;  // 0:width2-1
+    index_temp /= output_width;
+    const int h2 = index_temp % output_height;  // 0:height2-1
+    const size_t nc = index_temp / output_height;
+
+    const AccT h1r = AreaPixelComputeSourceIndex<AccT>(
+        rheight, h2, align_corners, /*cubic=*/false);
+    const int h1 = h1r;
+    const int h1p = (h1 < input_height - 1) ? 1 : 0;
+    const AccT h1lambda = h1r - h1;
+    const AccT h0lambda = static_cast<AccT>(1) - h1lambda;
+
+    const AccT w1r = AreaPixelComputeSourceIndex<AccT>(
+        rwidth, w2, align_corners, /*cubic=*/false);
+    const int w1 = w1r;
+    const int w1p = (w1 < input_width - 1) ? 1 : 0;
+    const AccT w1lambda = w1r - w1;
+    const AccT w0lambda = static_cast<AccT>(1) - w1lambda;
+
+    const T d2val = dY_data[index];
+    AtomicAdd(
+        dX_data,
+        idx(nc, input_height, input_width, h1, w1),
+        dx_numel,
+        static_cast<T>(h0lambda * w0lambda) * d2val);
+    AtomicAdd(
+        dX_data,
+        idx(nc, input_height, input_width, h1, w1 + w1p),
+        dx_numel,
+        static_cast<T>(h0lambda * w1lambda) * d2val);
+    AtomicAdd(
+        dX_data,
+        idx(nc, input_height, input_width, h1 + h1p, w1),
+        dx_numel,
+        static_cast<T>(h1lambda * w0lambda) * d2val);
+    AtomicAdd(
+        dX_data,
+        idx(nc, input_height, input_width, h1 + h1p, w1 + w1p),
+        dx_numel,
+        static_cast<T>(h1lambda * w1lambda) * d2val);
+  }
+}
+
+template <typename T>
+T AreaPixelComputeScale(int64_t input_size, int64_t output_size, bool align_corners,
+                        const std::optional<float>& scale) {
+  if (align_corners) {
+    if (output_size <= 1) {
+      return T{0};
+    }
+    return static_cast<T>(input_size - 1) / static_cast<T>(output_size - 1);
+  } else {
+    if (scale.has_value()) {
+      return static_cast<T>(T{1.0} / *scale);
+    } else {
+      return static_cast<T>(input_size) / static_cast<T>(output_size);
+    }
+  }
+}
+
+template <typename T>
+void ResizeGradImpl(cudaStream_t stream, int64_t input_height,
+                    int64_t input_width, int64_t output_height,
+                    int64_t output_width, int64_t batch_size,
+                    int64_t channels, bool align_corners,
+                    const std::optional<float>& scale_height,
+                    const std::optional<float>& scale_width,
+                    const T* dY_data, T* dX_data) {
+  float rheight = AreaPixelComputeScale<float>(input_height, output_height, align_corners, scale_height);
+  float rwidth = AreaPixelComputeScale<float>(input_width, output_width, align_corners, scale_width);
+
+  const size_t output_numel = batch_size * channels * output_height * output_width;
+  int blocks_per_grid = (int)(ceil(static_cast<float>(output_numel) / NumThreadsPerBlock));
+  UpsampleGrad<T><<<blocks_per_grid, NumThreadsPerBlock, 0, stream>>>(
+      batch_size * channels, input_height, input_width, output_height, output_width,
+      rheight, rwidth, align_corners, dY_data, dX_data);
+}
+
+#define SPECIALIZED_RESIZEGRAD_IMPL(T)                                        \
+  template void ResizeGradImpl<T>(cudaStream_t stream, int64_t input_height,  \
+                                  int64_t input_width, int64_t output_height, \
+                                  int64_t output_width, int64_t batch_size,   \
+                                  int64_t channels, bool align_corners,       \
+                                  const std::optional<float>& scale_height,   \
+                                  const std::optional<float>& scale_width,    \
+                                  const T* dY_data, T* dX_data);
+
+SPECIALIZED_RESIZEGRAD_IMPL(half)
+SPECIALIZED_RESIZEGRAD_IMPL(float)
+SPECIALIZED_RESIZEGRAD_IMPL(double)
+
+#undef SPECIALIZED_RESIZEGRAD_IMPL
+
+}  // namespace onnxruntime::cuda
diff --git a/orttraining/orttraining/training_ops/cuda/tensor/resize_grad_impl.h b/orttraining/orttraining/training_ops/cuda/tensor/resize_grad_impl.h
new file mode 100644
index 0000000000000..3e917f9071e30
--- /dev/null
+++ b/orttraining/orttraining/training_ops/cuda/tensor/resize_grad_impl.h
@@ -0,0 +1,20 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include <stdint.h>
+#include <optional>
+
+namespace onnxruntime::cuda {
+
+template <typename T>
+void ResizeGradImpl(cudaStream_t stream, int64_t input_height,
+                    int64_t input_width, int64_t output_height,
+                    int64_t output_width, int64_t batch_size,
+                    int64_t channels, bool align_corners,
+                    const std::optional<float>& scale_height,
+                    const std::optional<float>& scale_width,
+                    const T* dY_data, T* dX_data);
+
+}  // namespace onnxruntime::cuda
diff --git a/orttraining/orttraining/training_ops/rocm/rocm_training_kernels.cc b/orttraining/orttraining/training_ops/rocm/rocm_training_kernels.cc
index 2321aa23dd6eb..e107a2542fc0b 100644
--- a/orttraining/orttraining/training_ops/rocm/rocm_training_kernels.cc
+++ b/orttraining/orttraining/training_ops/rocm/rocm_training_kernels.cc
@@ -187,6 +187,10 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float_BFloat16, ReduceAllL2);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, BFloat16_BFloat16, ReduceAllL2);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, PadAndUnflatten);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, FlattenAndUnpad);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, ResizeGrad);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, ResizeGrad);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, double, ResizeGrad);
 
 #if defined(ORT_USE_NCCL) || defined(USE_MPI)
 // P2P communication operators.
@@ -387,6 +391,10 @@ Status RegisterRocmTrainingKernels(KernelRegistry& kernel_registry) {
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float_BFloat16, ReduceAllL2)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, BFloat16_BFloat16, ReduceAllL2)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, PadAndUnflatten)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, FlattenAndUnpad)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, MLFloat16, ResizeGrad)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, float, ResizeGrad)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kRocmExecutionProvider, kMSDomain, 1, double, ResizeGrad)>,
 
 // P2P communication operators.
 #if defined(ORT_USE_NCCL) || defined(USE_MPI)
diff --git a/orttraining/pytorch_frontend_examples/mnist_training.py b/orttraining/pytorch_frontend_examples/mnist_training.py
deleted file mode 100644
index dc9b3f654400c..0000000000000
--- a/orttraining/pytorch_frontend_examples/mnist_training.py
+++ /dev/null
@@ -1,200 +0,0 @@
-## This code is from https://github.com/pytorch/examples/blob/master/mnist/main.py
-## with modification to do training using onnxruntime as backend on cuda device.
-## A private PyTorch build from https://aiinfra.visualstudio.com/Lotus/_git/pytorch (ORTTraining branch) is needed to run the demo.
-
-## Model testing is not complete.
-
-import argparse
-import os
-
-import numpy as np  # noqa: F401
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import torch.optim as optim  # noqa: F401
-from mpi4py import MPI
-from torchvision import datasets, transforms
-
-from onnxruntime.capi.ort_trainer import IODescription, ModelDescription, ORTTrainer
-
-try:  # noqa: SIM105
-    from onnxruntime.capi._pybind_state import set_cuda_device_id
-except ImportError:
-    pass
-
-
-class NeuralNet(nn.Module):
-    def __init__(self, input_size, hidden_size, num_classes):
-        super().__init__()
-        self.fc1 = nn.Linear(input_size, hidden_size)
-        self.relu = nn.ReLU()
-        self.fc2 = nn.Linear(hidden_size, num_classes)
-
-    def forward(self, x):
-        out = self.fc1(x)
-        out = self.relu(out)
-        out = self.fc2(out)
-        return out
-
-
-def my_loss(x, target):
-    return F.nll_loss(F.log_softmax(x, dim=1), target)
-
-
-def train_with_trainer(args, trainer, device, train_loader, epoch):
-    for batch_idx, (data, target) in enumerate(train_loader):
-        data, target = data.to(device), target.to(device)  # noqa: PLW2901
-        data = data.reshape(data.shape[0], -1)  # noqa: PLW2901
-
-        learning_rate = torch.tensor([args.lr])
-        loss = trainer.train_step(data, target, learning_rate)
-
-        # Since the output corresponds to [loss_desc, probability_desc], the first value is taken as loss.
-        if batch_idx % args.log_interval == 0:
-            print(
-                "Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
-                    epoch,
-                    batch_idx * len(data),
-                    len(train_loader.dataset),
-                    100.0 * batch_idx / len(train_loader),
-                    loss[0],
-                )
-            )
-
-
-# TODO: comple this once ORT training can do evaluation.
-def test_with_trainer(args, trainer, device, test_loader):
-    test_loss = 0
-    correct = 0
-    with torch.no_grad():
-        for data, target in test_loader:
-            data, target = data.to(device), target.to(device)  # noqa: PLW2901
-            data = data.reshape(data.shape[0], -1)  # noqa: PLW2901
-            output = F.log_softmax(trainer.eval_step(data, fetches=["probability"]), dim=1)
-            test_loss += F.nll_loss(output, target, reduction="sum").item()  # sum up batch loss
-            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
-            correct += pred.eq(target.view_as(pred)).sum().item()
-
-    test_loss /= len(test_loader.dataset)
-
-    print(
-        "\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format(
-            test_loss, correct, len(test_loader.dataset), 100.0 * correct / len(test_loader.dataset)
-        )
-    )
-
-
-def mnist_model_description():
-    input_desc = IODescription("input1", ["batch", 784], torch.float32)
-    label_desc = IODescription(
-        "label",
-        [
-            "batch",
-        ],
-        torch.int64,
-        num_classes=10,
-    )
-    loss_desc = IODescription("loss", [], torch.float32)
-    probability_desc = IODescription("probability", ["batch", 10], torch.float32)
-    return ModelDescription([input_desc, label_desc], [loss_desc, probability_desc])
-
-
-def main():
-    # Training settings
-    parser = argparse.ArgumentParser(description="PyTorch MNIST Example")
-    parser.add_argument(
-        "--batch-size", type=int, default=64, metavar="N", help="input batch size for training (default: 64)"
-    )
-    parser.add_argument(
-        "--test-batch-size", type=int, default=1000, metavar="N", help="input batch size for testing (default: 1000)"
-    )
-    parser.add_argument("--epochs", type=int, default=10, metavar="N", help="number of epochs to train (default: 10)")
-    parser.add_argument("--lr", type=float, default=0.01, metavar="LR", help="learning rate (default: 0.01)")
-    parser.add_argument("--no-cuda", action="store_true", default=False, help="disables CUDA training")
-    parser.add_argument("--seed", type=int, default=1, metavar="S", help="random seed (default: 1)")
-    parser.add_argument(
-        "--log-interval",
-        type=int,
-        default=10,
-        metavar="N",
-        help="how many batches to wait before logging training status",
-    )
-
-    args = parser.parse_args()
-    use_cuda = not args.no_cuda and torch.cuda.is_available()
-
-    torch.manual_seed(args.seed)
-
-    kwargs = {"num_workers": 0, "pin_memory": True}
-    train_loader = torch.utils.data.DataLoader(
-        datasets.MNIST(
-            "../data",
-            train=True,
-            download=True,
-            transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]),
-        ),
-        batch_size=args.batch_size,
-        shuffle=True,
-        **kwargs,
-    )
-    test_loader = torch.utils.data.DataLoader(
-        datasets.MNIST(
-            "../data",
-            train=False,
-            transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]),
-        ),
-        batch_size=args.test_batch_size,
-        shuffle=True,
-        **kwargs,
-    )
-
-    comm = MPI.COMM_WORLD
-    args.local_rank = (
-        int(os.environ["OMPI_COMM_WORLD_LOCAL_RANK"]) if ("OMPI_COMM_WORLD_LOCAL_RANK" in os.environ) else 0
-    )
-    args.world_rank = int(os.environ["OMPI_COMM_WORLD_RANK"]) if ("OMPI_COMM_WORLD_RANK" in os.environ) else 0
-    args.world_size = comm.Get_size()
-    if use_cuda:
-        torch.cuda.set_device(args.local_rank)
-        device = torch.device("cuda", args.local_rank)
-        args.n_gpu = 1
-        set_cuda_device_id(args.local_rank)
-    else:
-        device = torch.device("cpu")
-
-    input_size = 784
-    hidden_size = 500
-    num_classes = 10
-    model = NeuralNet(input_size, hidden_size, num_classes)
-
-    model_desc = mnist_model_description()
-    # use log_interval as gradient accumulate steps
-    trainer = ORTTrainer(
-        model,
-        my_loss,
-        model_desc,
-        "SGDOptimizer",
-        None,
-        IODescription(
-            "Learning_Rate",
-            [
-                1,
-            ],
-            torch.float32,
-        ),
-        device,
-        1,
-        args.world_rank,
-        args.world_size,
-        use_mixed_precision=False,
-        allreduce_post_accumulation=True,
-    )
-    print("\nBuild ort model done.")
-
-    for epoch in range(1, args.epochs + 1):
-        train_with_trainer(args, trainer, device, train_loader, epoch)
-        test_with_trainer(args, trainer, device, test_loader)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/orttraining/tools/ci_test/results/ci-mi200.huggingface.bert-large-rocm5.6.json b/orttraining/tools/ci_test/results/ci-mi200.huggingface.bert-large-rocm5.7.json
similarity index 100%
rename from orttraining/tools/ci_test/results/ci-mi200.huggingface.bert-large-rocm5.6.json
rename to orttraining/tools/ci_test/results/ci-mi200.huggingface.bert-large-rocm5.7.json
diff --git a/orttraining/tools/ci_test/run_batch_size_test.py b/orttraining/tools/ci_test/run_batch_size_test.py
index ba2be03618197..348d490678e9a 100755
--- a/orttraining/tools/ci_test/run_batch_size_test.py
+++ b/orttraining/tools/ci_test/run_batch_size_test.py
@@ -108,7 +108,7 @@ def main():
         if config.enable_mixed_precision:
             cmds.append("--use_mixed_precision"),
 
-        subprocess.run(cmds, timeout=120).check_returncode()
+        subprocess.run(cmds, timeout=120).check_returncode()  # noqa: PLW1510
 
     return 0
 
diff --git a/orttraining/tools/ci_test/run_bert_perf_test.py b/orttraining/tools/ci_test/run_bert_perf_test.py
index fbc1403583ba0..bb15d6f5965b6 100644
--- a/orttraining/tools/ci_test/run_bert_perf_test.py
+++ b/orttraining/tools/ci_test/run_bert_perf_test.py
@@ -97,7 +97,7 @@ def main():
             cmds.append("--use_mixed_precision"),
             cmds.append("--allreduce_in_fp16"),
 
-        subprocess.run(cmds).check_returncode()
+        subprocess.run(cmds).check_returncode()  # noqa: PLW1510
         if c.expected_perf > 0.0:
             json_filename = "onnxruntime_perf_metrics_{}.onnx_bert_{}_{}_Lamb.json".format(
                 model, precision_prefix, c.max_seq_length
diff --git a/orttraining/tools/ci_test/run_convergence_test.py b/orttraining/tools/ci_test/run_convergence_test.py
index 58250e7f8ae8c..2ec32bca77640 100755
--- a/orttraining/tools/ci_test/run_convergence_test.py
+++ b/orttraining/tools/ci_test/run_convergence_test.py
@@ -35,7 +35,7 @@ def main():
         convergence_test_output_path = os.path.join(output_dir, "convergence_test_out.csv")
 
         # run BERT training
-        subprocess.run(
+        subprocess.run(  # noqa: PLW1510
             [
                 os.path.join(args.binary_dir, "onnxruntime_training_bert"),
                 "--model_name",
diff --git a/orttraining/tools/ci_test/run_gpt2_perf_test.py b/orttraining/tools/ci_test/run_gpt2_perf_test.py
index e64fc3c7812e3..18e59d275b6b5 100644
--- a/orttraining/tools/ci_test/run_gpt2_perf_test.py
+++ b/orttraining/tools/ci_test/run_gpt2_perf_test.py
@@ -62,7 +62,7 @@ def main():
         if c.use_mixed_precision:
             cmds.append("--use_mixed_precision"),
 
-        subprocess.run(cmds).check_returncode()
+        subprocess.run(cmds).check_returncode()  # noqa: PLW1510
 
     return 0
 
diff --git a/pyproject.toml b/pyproject.toml
index 89011a7944ab6..97515cb9fa62b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -92,3 +92,4 @@ unfixable = [
 "tools/nuget/generate_nuspec_for_native_nuget.py" = ["ISC003"] # Too many errors to fix
 "onnxruntime/test/python/quantization/test_op_gemm.py" = ["N806"]  # use of A for a matrix
 "onnxruntime/test/python/quantization/op_test_utils.py" = ["N806", "PERF203", "RUF012"]  # use of A for a matrix
+"orttraining/orttraining/python/training/ort_triton/kernel/_flash_attn.py" = ["N806", "PLW2901", "ISC001", "E731"]  # Long triton code from other repo.
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 73e04e6b37c0b..1b5ca65cf8037 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -17,4 +17,4 @@ scikit-learn
 scipy
 sympy
 wheel
-setuptools>=41.4.0
+setuptools>=61.0.0
diff --git a/requirements-lintrunner.txt b/requirements-lintrunner.txt
index 2068040443a20..25454ce40c263 100644
--- a/requirements-lintrunner.txt
+++ b/requirements-lintrunner.txt
@@ -1,11 +1,9 @@
 # This file is auto updated by dependabot
-lintrunner-adapters>=0.8.0
-# RUFF, RUFF-FIX
-ruff==0.0.278
+lintrunner-adapters>=0.11.0
+# RUFF
+ruff==0.1.4
 # BLACK-ISORT
-black==23.7.0
+black==23.10.1
 isort==5.12.0
-# PYLINT
-pylint==2.17.2
 # CLANGFORMAT
-clang-format==16.0.6
+clang-format==17.0.4
diff --git a/requirements-training.txt b/requirements-training.txt
index 4b1be6cef9b7c..dbfd7305d1bec 100644
--- a/requirements-training.txt
+++ b/requirements-training.txt
@@ -6,4 +6,4 @@ onnx
 packaging
 protobuf
 sympy
-setuptools>=41.4.0
+setuptools>=61.0.0
diff --git a/rust/justfile b/rust/justfile
new file mode 100644
index 0000000000000..fd7b12d60a25e
--- /dev/null
+++ b/rust/justfile
@@ -0,0 +1,13 @@
+
+
+vendor:
+    mkdir -p ./onnxruntime-sys/vendor/onnxruntime-src
+    cp -rf ../onnxruntime ./onnxruntime-sys/vendor/onnxruntime-src
+    cp -rf ../cmake ./onnxruntime-sys/vendor/onnxruntime-src
+    rm -rf ./onnxruntime-sys/vendor/onnxruntime-src/cmake/external/onnx
+    cp -rf ../include ./onnxruntime-sys/vendor/onnxruntime-src
+    mkdir -p ./onnxruntime-sys/vendor/onnxruntime-src/tools
+    cp -rf ../tools/ci_build ./onnxruntime-sys/vendor/onnxruntime-src/tools
+    cp -rf ../samples ./onnxruntime-sys/vendor/onnxruntime-src
+    cp -f ../requirements.txt.in ./onnxruntime-sys/vendor/onnxruntime-src
+    cp -f ../VERSION_NUMBER ./onnxruntime-sys/vendor/onnxruntime-src
diff --git a/rust/onnxruntime-sys/.gitignore b/rust/onnxruntime-sys/.gitignore
new file mode 100644
index 0000000000000..22d0d82f8095e
--- /dev/null
+++ b/rust/onnxruntime-sys/.gitignore
@@ -0,0 +1 @@
+vendor
diff --git a/rust/onnxruntime-sys/Cargo.toml b/rust/onnxruntime-sys/Cargo.toml
index 4806e6ca2953c..236c2c1fae860 100644
--- a/rust/onnxruntime-sys/Cargo.toml
+++ b/rust/onnxruntime-sys/Cargo.toml
@@ -3,18 +3,16 @@ authors = ["Nicolas Bigaouette <nbigaouette@elementai.com>"]
 edition = "2018"
 name = "onnxruntime-sys"
 version = "0.0.14"
-
 links = "onnxruntime"
-
 description = "Unsafe wrapper around Microsoft's ONNX Runtime"
 documentation = "https://docs.rs/onnxruntime-sys"
 homepage = "https://github.com/microsoft/onnxruntime"
 license = "MIT OR Apache-2.0"
 readme = "../README.md"
 repository = "https://github.com/microsoft/onnxruntime"
-
 categories = ["science"]
 keywords = ["neuralnetworks", "onnx", "bindings"]
+include = ["src", "example", "vendor", "build.rs"]
 
 [dependencies]
 libloading = "0.7"
@@ -22,6 +20,7 @@ libloading = "0.7"
 [build-dependencies]
 bindgen = "0.63"
 cmake = "0.1"
+anyhow = "1.0"
 
 # Used on unix
 flate2 = "1.0"
diff --git a/rust/onnxruntime-sys/build.rs b/rust/onnxruntime-sys/build.rs
index f59ee99fa29a7..b2a4f7b16141f 100644
--- a/rust/onnxruntime-sys/build.rs
+++ b/rust/onnxruntime-sys/build.rs
@@ -8,12 +8,16 @@ use std::{
     str::FromStr,
 };
 
+// use cmake::build;
+
+use anyhow::{anyhow, Context, Result};
+
 /// ONNX Runtime version
 ///
 /// WARNING: If version is changed, bindings for all platforms will have to be re-generated.
 ///          To do so, run this:
 ///              cargo build --package onnxruntime-sys --features generate-bindings
-const ORT_VERSION: &str = include_str!("../../VERSION_NUMBER");
+const ORT_VERSION: &str = include_str!("./vendor/onnxruntime-src/VERSION_NUMBER");
 
 /// Base Url from which to download pre-built releases/
 const ORT_RELEASE_BASE_URL: &str = "https://github.com/microsoft/onnxruntime/releases/download";
@@ -34,8 +38,8 @@ const ORT_RUST_ENV_GPU: &str = "ORT_RUST_USE_CUDA";
 /// Subdirectory (of the 'target' directory) into which to extract the prebuilt library.
 const ORT_PREBUILT_EXTRACT_DIR: &str = "onnxruntime";
 
-fn main() {
-    let libort_install_dir = prepare_libort_dir();
+fn main() -> Result<()> {
+    let libort_install_dir = prepare_libort_dir().context("preparing libort directory")?;
 
     let include_dir = libort_install_dir.join("include");
     let lib_dir = libort_install_dir.join("lib");
@@ -55,6 +59,7 @@ fn main() {
     );
 
     generate_bindings(&include_dir);
+    Ok(())
 }
 
 fn generate_bindings(include_dir: &Path) {
@@ -70,11 +75,7 @@ fn generate_bindings(include_dir: &Path) {
         ),
     ];
 
-    let path = include_dir
-        .join("onnxruntime")
-        .join("core")
-        .join("session")
-        .join("onnxruntime_c_api.h");
+    let path = include_dir.join("onnxruntime").join("onnxruntime_c_api.h");
 
     // The bindgen::Builder is the main entry point
     // to bindgen, and lets you build up options for
@@ -106,7 +107,7 @@ fn generate_bindings(include_dir: &Path) {
 
     let generated_file = PathBuf::from(env::var("OUT_DIR").unwrap()).join("bindings.rs");
     bindings
-        .write_to_file(&generated_file)
+        .write_to_file(generated_file)
         .expect("Couldn't write bindings!");
 }
 
@@ -144,7 +145,7 @@ fn extract_archive(filename: &Path, output: &Path) {
 }
 
 fn extract_tgz(filename: &Path, output: &Path) {
-    let file = fs::File::open(&filename).unwrap();
+    let file = fs::File::open(filename).unwrap();
     let buf = io::BufReader::new(file);
     let tar = flate2::read::GzDecoder::new(buf);
     let mut archive = tar::Archive::new(tar);
@@ -152,7 +153,7 @@ fn extract_tgz(filename: &Path, output: &Path) {
 }
 
 fn extract_zip(filename: &Path, outpath: &Path) {
-    let file = fs::File::open(&filename).unwrap();
+    let file = fs::File::open(filename).unwrap();
     let buf = io::BufReader::new(file);
     let mut archive = zip::ZipArchive::new(buf).unwrap();
     for i in 0..archive.len() {
@@ -168,7 +169,7 @@ fn extract_zip(filename: &Path, outpath: &Path) {
             );
             if let Some(p) = outpath.parent() {
                 if !p.exists() {
-                    fs::create_dir_all(&p).unwrap();
+                    fs::create_dir_all(p).unwrap();
                 }
             }
             let mut outfile = fs::File::create(&outpath).unwrap();
@@ -190,15 +191,15 @@ enum Architecture {
 }
 
 impl FromStr for Architecture {
-    type Err = String;
+    type Err = anyhow::Error;
 
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
+    fn from_str(s: &str) -> Result<Self> {
         match s.to_lowercase().as_str() {
             "x86" => Ok(Architecture::X86),
             "x86_64" => Ok(Architecture::X86_64),
             "arm" => Ok(Architecture::Arm),
             "aarch64" => Ok(Architecture::Arm64),
-            _ => Err(format!("Unsupported architecture: {}", s)),
+            _ => Err(anyhow!("Unsupported architecture: {s}")),
         }
     }
 }
@@ -233,14 +234,14 @@ impl Os {
 }
 
 impl FromStr for Os {
-    type Err = String;
+    type Err = anyhow::Error;
 
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
+    fn from_str(s: &str) -> Result<Self> {
         match s.to_lowercase().as_str() {
             "windows" => Ok(Os::Windows),
             "macos" => Ok(Os::MacOs),
             "linux" => Ok(Os::Linux),
-            _ => Err(format!("Unsupported os: {}", s)),
+            _ => Err(anyhow!("Unsupported os: {s}")),
         }
     }
 }
@@ -262,9 +263,9 @@ enum Accelerator {
 }
 
 impl FromStr for Accelerator {
-    type Err = String;
+    type Err = anyhow::Error;
 
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
+    fn from_str(s: &str) -> Result<Self> {
         match s.to_lowercase().as_str() {
             "1" | "yes" | "true" | "on" => Ok(Accelerator::Cuda),
             _ => Ok(Accelerator::Cpu),
@@ -393,36 +394,37 @@ fn prepare_libort_dir_prebuilt() -> PathBuf {
     extract_dir.join(prebuilt_archive.file_stem().unwrap())
 }
 
-fn prepare_libort_dir() -> PathBuf {
+fn prepare_libort_dir() -> Result<PathBuf> {
     let strategy = env::var(ORT_RUST_ENV_STRATEGY);
     println!(
         "strategy: {:?}",
         strategy.as_ref().map_or_else(|_| "unknown", String::as_str)
     );
     match strategy.as_ref().map(String::as_str) {
-        Ok("download") => prepare_libort_dir_prebuilt(),
-        Ok("system") => PathBuf::from(match env::var(ORT_RUST_ENV_SYSTEM_LIB_LOCATION) {
-            Ok(p) => p,
-            Err(e) => {
-                panic!(
-                    "Could not get value of environment variable {:?}: {:?}",
-                    ORT_RUST_ENV_SYSTEM_LIB_LOCATION, e
-                );
-            }
-        }),
+        Ok("download") => Ok(prepare_libort_dir_prebuilt()),
+        Ok("system") => {
+            let location = env::var(ORT_RUST_ENV_SYSTEM_LIB_LOCATION).context(format!(
+                "Could not get value of environment variable {:?}",
+                ORT_RUST_ENV_SYSTEM_LIB_LOCATION
+            ))?;
+            Ok(PathBuf::from(location))
+        }
         Ok("compile") | Err(_) => prepare_libort_dir_compiled(),
-        _ => panic!("Unknown value for {:?}", ORT_RUST_ENV_STRATEGY),
+        _ => Err(anyhow!("Unknown value for {:?}", ORT_RUST_ENV_STRATEGY)),
     }
 }
 
-fn prepare_libort_dir_compiled() -> PathBuf {
-    let mut config = cmake::Config::new("../../cmake");
+fn prepare_libort_dir_compiled() -> Result<PathBuf> {
+    let manifest_dir_string = env::var("CARGO_MANIFEST_DIR").unwrap();
+    let mut config = cmake::Config::new(format!(
+        "{manifest_dir_string}/vendor/onnxruntime-src/cmake"
+    ));
 
     config.define("onnxruntime_BUILD_SHARED_LIB", "ON");
 
-    if env::var(ORT_RUST_ENV_GPU).unwrap_or_default().parse() == Ok(Accelerator::Cuda) {
+    if let Ok(Accelerator::Cuda) = env::var(ORT_RUST_ENV_GPU).unwrap_or_default().parse() {
         config.define("onnxruntime_USE_CUDA", "ON");
-    }
+    };
 
-    config.build()
+    Ok(config.build())
 }
diff --git a/rust/onnxruntime-sys/examples/c_api_sample.rs b/rust/onnxruntime-sys/examples/c_api_sample.rs
index 499f1548de396..e8c9ca8f09a5a 100644
--- a/rust/onnxruntime-sys/examples/c_api_sample.rs
+++ b/rust/onnxruntime-sys/examples/c_api_sample.rs
@@ -307,7 +307,7 @@ fn main() {
 
     let output_node_names_cstring: Vec<std::ffi::CString> = output_node_names
         .iter()
-        .map(|n| std::ffi::CString::new(n.clone()).unwrap())
+        .map(|n| std::ffi::CString::new(*n).unwrap())
         .collect();
     let output_node_names_ptr: Vec<*const i8> = output_node_names_cstring
         .iter()
diff --git a/rust/onnxruntime/src/tensor/ort_output_tensor.rs b/rust/onnxruntime/src/tensor/ort_output_tensor.rs
index 5176a58c423ea..006fbdba6cdb8 100644
--- a/rust/onnxruntime/src/tensor/ort_output_tensor.rs
+++ b/rust/onnxruntime/src/tensor/ort_output_tensor.rs
@@ -290,9 +290,6 @@ impl<'a> TryFrom<OrtOutputTensor> for OrtOutput<'a> {
                 .unwrap()(shape_info);
 
             match element_type {
-                sys::ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED => {
-                    unimplemented!()
-                }
                 sys::ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT => {
                     WithOutputTensor::try_from(value).map(OrtOutput::Float)
                 }
@@ -317,12 +314,6 @@ impl<'a> TryFrom<OrtOutputTensor> for OrtOutput<'a> {
                 sys::ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING => {
                     WithOutputTensor::try_from(value).map(OrtOutput::String)
                 }
-                sys::ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL => {
-                    unimplemented!()
-                }
-                sys::ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16 => {
-                    unimplemented!()
-                }
                 sys::ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE => {
                     WithOutputTensor::try_from(value).map(OrtOutput::Double)
                 }
@@ -332,14 +323,18 @@ impl<'a> TryFrom<OrtOutputTensor> for OrtOutput<'a> {
                 sys::ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64 => {
                     WithOutputTensor::try_from(value).map(OrtOutput::UInt64)
                 }
-                sys::ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64 => {
-                    unimplemented!()
-                }
-                sys::ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128 => {
-                    unimplemented!()
-                }
-                sys::ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16 => {
-                    unimplemented!()
+                // Unimplemented output tensor data types
+                sys::ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64
+                | sys::ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED
+                | sys::ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL
+                | sys::ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16
+                | sys::ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128
+                | sys::ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16
+                | sys::ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E4M3FN
+                | sys::ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E4M3FNUZ
+                | sys::ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E5M2FNUZ
+                | sys::ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E5M2 => {
+                    unimplemented!("{:?}", element_type)
                 }
             }
         }
diff --git a/samples/python/training/orttrainer/mnist/mnist_original.onnx b/samples/python/training/orttrainer/mnist/mnist_original.onnx
deleted file mode 100644
index 15931affb5ccf..0000000000000
Binary files a/samples/python/training/orttrainer/mnist/mnist_original.onnx and /dev/null differ
diff --git a/samples/python/training/orttrainer/mnist/ort_mnist.py b/samples/python/training/orttrainer/mnist/ort_mnist.py
deleted file mode 100644
index 8f8ccf373ccf6..0000000000000
--- a/samples/python/training/orttrainer/mnist/ort_mnist.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# This code is from https://github.com/pytorch/examples/blob/master/mnist/main.py
-# with modification to do training using onnxruntime as backend on cuda device.
-
-import argparse
-import os
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from torchvision import datasets, transforms
-
-import onnxruntime
-from onnxruntime.training import ORTTrainer, ORTTrainerOptions, optim
-
-
-# Pytorch model
-class NeuralNet(nn.Module):
-    def __init__(self, input_size, hidden_size, num_classes):
-        super().__init__()
-        self.fc1 = nn.Linear(input_size, hidden_size)
-        self.relu = nn.ReLU()
-        self.fc2 = nn.Linear(hidden_size, num_classes)
-
-    def forward(self, input1):
-        out = self.fc1(input1)
-        out = self.relu(out)
-        out = self.fc2(out)
-        return out
-
-
-# ONNX Runtime training
-def mnist_model_description():
-    return {
-        "inputs": [("input1", ["batch", 784]), ("label", ["batch"])],
-        "outputs": [("loss", [], True), ("probability", ["batch", 10])],
-    }
-
-
-def my_loss(x, target):
-    return F.nll_loss(F.log_softmax(x, dim=1), target)
-
-
-# Helpers
-def train(log_interval, trainer, device, train_loader, epoch, train_steps):
-    for batch_idx, (data, target) in enumerate(train_loader):
-        if batch_idx == train_steps:
-            break
-
-        # Fetch data
-        data, target = data.to(device), target.to(device)  # noqa: PLW2901
-        data = data.reshape(data.shape[0], -1)  # noqa: PLW2901
-
-        # Train step
-        loss, prob = trainer.train_step(data, target)
-
-        # Stats
-        if batch_idx % log_interval == 0:
-            print(
-                "Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
-                    epoch, batch_idx * len(data), len(train_loader.dataset), 100.0 * batch_idx / len(train_loader), loss
-                )
-            )
-
-
-def test(trainer, device, test_loader):
-    test_loss = 0
-    correct = 0
-    with torch.no_grad():
-        for data, target in test_loader:
-            data, target = data.to(device), target.to(device)  # noqa: PLW2901
-            data = data.reshape(data.shape[0], -1)  # noqa: PLW2901
-
-            # Using fetches around without eval_step to not pass 'target' as input
-            trainer._train_step_info.fetches = ["probability"]
-            output = F.log_softmax(trainer.eval_step(data), dim=1)
-            trainer._train_step_info.fetches = []
-
-            # Stats
-            test_loss += F.nll_loss(output, target, reduction="sum").item()
-            pred = output.argmax(dim=1, keepdim=True)
-            correct += pred.eq(target.view_as(pred)).sum().item()
-
-    test_loss /= len(test_loader.dataset)
-
-    print(
-        "\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format(
-            test_loss, correct, len(test_loader.dataset), 100.0 * correct / len(test_loader.dataset)
-        )
-    )
-
-
-def main():
-    # Training settings
-    parser = argparse.ArgumentParser(description="ONNX Runtime MNIST Example")
-    parser.add_argument(
-        "--train-steps",
-        type=int,
-        default=-1,
-        metavar="N",
-        help="number of steps to train. Set -1 to run through whole dataset (default: -1)",
-    )
-    parser.add_argument(
-        "--batch-size", type=int, default=20, metavar="N", help="input batch size for training (default: 20)"
-    )
-    parser.add_argument(
-        "--test-batch-size", type=int, default=1000, metavar="N", help="input batch size for testing (default: 1000)"
-    )
-    parser.add_argument("--epochs", type=int, default=1, metavar="N", help="number of epochs to train (default: 1)")
-    parser.add_argument("--lr", type=float, default=0.01, metavar="LR", help="learning rate (default: 0.01)")
-    parser.add_argument("--no-cuda", action="store_true", default=False, help="disables CUDA training")
-    parser.add_argument("--seed", type=int, default=1, metavar="S", help="random seed (default: 1)")
-    parser.add_argument(
-        "--log-interval",
-        type=int,
-        default=10,
-        metavar="N",
-        help="how many batches to wait before logging training status",
-    )
-    parser.add_argument("--save-path", type=str, default="", help="Path for Saving the current Model state")
-
-    # Basic setup
-    args = parser.parse_args()
-    if not args.no_cuda and torch.cuda.is_available():
-        device = "cuda"
-    else:
-        device = "cpu"
-    torch.manual_seed(args.seed)
-    onnxruntime.set_seed(args.seed)
-
-    # Data loader
-    train_loader = torch.utils.data.DataLoader(
-        datasets.MNIST(
-            "./data",
-            train=True,
-            download=True,
-            transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]),
-        ),
-        batch_size=args.batch_size,
-        shuffle=True,
-    )
-
-    if args.test_batch_size > 0:
-        test_loader = torch.utils.data.DataLoader(
-            datasets.MNIST(
-                "./data",
-                train=False,
-                transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]),
-            ),
-            batch_size=args.test_batch_size,
-            shuffle=True,
-        )
-
-    # Modeling
-    model = NeuralNet(784, 500, 10)
-    model_desc = mnist_model_description()
-    optim_config = optim.SGDConfig(lr=args.lr)
-    opts = {"device": {"id": device}}
-    opts = ORTTrainerOptions(opts)
-
-    trainer = ORTTrainer(model, model_desc, optim_config, loss_fn=my_loss, options=opts)
-
-    # Train loop
-    for epoch in range(1, args.epochs + 1):
-        train(args.log_interval, trainer, device, train_loader, epoch, args.train_steps)
-        if args.test_batch_size > 0:
-            test(trainer, device, test_loader)
-
-    # Save model
-    if args.save_path:
-        torch.save(model.state_dict(), os.path.join(args.save_path, "mnist_cnn.pt"))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/samples/python/training/orttrainer/mnist/pytorch_mnist.py b/samples/python/training/orttrainer/mnist/pytorch_mnist.py
deleted file mode 100644
index 2e451d85f62e8..0000000000000
--- a/samples/python/training/orttrainer/mnist/pytorch_mnist.py
+++ /dev/null
@@ -1,157 +0,0 @@
-import argparse
-import os
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import torch.optim as optim
-from torchvision import datasets, transforms
-
-
-# Pytorch model
-class NeuralNet(nn.Module):
-    def __init__(self, input_size, hidden_size, num_classes):
-        super().__init__()
-        self.fc1 = nn.Linear(input_size, hidden_size)
-        self.relu = nn.ReLU()
-        self.fc2 = nn.Linear(hidden_size, num_classes)
-
-    def forward(self, input1):
-        out = self.fc1(input1)
-        out = self.relu(out)
-        out = self.fc2(out)
-        return out
-
-
-def my_loss(x, target, is_train=True):
-    if is_train:
-        return F.nll_loss(F.log_softmax(x, dim=1), target)
-    else:
-        return F.nll_loss(F.log_softmax(x, dim=1), target, reduction="sum")
-
-
-# Helpers
-def train(args, model, device, train_loader, optimizer, epoch):
-    model.train()
-    for batch_idx, (data, target) in enumerate(train_loader):
-        if batch_idx == args.train_steps:
-            break
-        data, target = data.to(device), target.to(device)  # noqa: PLW2901
-        data = data.reshape(data.shape[0], -1)  # noqa: PLW2901
-        optimizer.zero_grad()
-        output = model(data)
-        loss = my_loss(output, target)
-        loss.backward()
-        optimizer.step()
-        if batch_idx % args.log_interval == 0:
-            print(
-                "Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
-                    epoch,
-                    batch_idx * len(data),
-                    len(train_loader.dataset),
-                    100.0 * batch_idx / len(train_loader),
-                    loss.item(),
-                )
-            )
-
-
-def test(model, device, test_loader):
-    model.eval()
-    test_loss = 0
-    correct = 0
-    with torch.no_grad():
-        for data, target in test_loader:
-            data, target = data.to(device), target.to(device)  # noqa: PLW2901
-            data = data.reshape(data.shape[0], -1)  # noqa: PLW2901
-            output = model(data)
-            # Stats
-            test_loss += my_loss(output, target, False).item()
-            pred = output.argmax(dim=1, keepdim=True)
-            correct += pred.eq(target.view_as(pred)).sum().item()
-
-    test_loss /= len(test_loader.dataset)
-
-    print(
-        "\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format(
-            test_loss, correct, len(test_loader.dataset), 100.0 * correct / len(test_loader.dataset)
-        )
-    )
-
-
-def main():
-    # Training settings
-    parser = argparse.ArgumentParser(description="PyTorch MNIST Example")
-    parser.add_argument(
-        "--train-steps",
-        type=int,
-        default=-1,
-        metavar="N",
-        help="number of steps to train. Set -1 to run through whole dataset (default: -1)",
-    )
-    parser.add_argument(
-        "--batch-size", type=int, default=20, metavar="N", help="input batch size for training (default: 20)"
-    )
-    parser.add_argument(
-        "--test-batch-size", type=int, default=1000, metavar="N", help="input batch size for testing (default: 1000)"
-    )
-    parser.add_argument("--epochs", type=int, default=1, metavar="N", help="number of epochs to train (default: 1)")
-    parser.add_argument("--lr", type=float, default=0.01, metavar="LR", help="learning rate (default: 0.01)")
-    parser.add_argument("--no-cuda", action="store_true", default=False, help="disables CUDA training")
-    parser.add_argument("--seed", type=int, default=1, metavar="S", help="random seed (default: 1)")
-    parser.add_argument(
-        "--log-interval",
-        type=int,
-        default=10,
-        metavar="N",
-        help="how many batches to wait before logging training status",
-    )
-    parser.add_argument("--save-path", type=str, default="", help="Path for Saving the current Model")
-
-    # Basic setup
-    args = parser.parse_args()
-    if not args.no_cuda and torch.cuda.is_available():
-        device = "cuda"
-    else:
-        device = "cpu"
-    torch.manual_seed(args.seed)
-
-    # Data loader
-    train_loader = torch.utils.data.DataLoader(
-        datasets.MNIST(
-            "./data",
-            train=True,
-            download=True,
-            transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]),
-        ),
-        batch_size=args.batch_size,
-        shuffle=True,
-    )
-
-    if args.test_batch_size > 0:
-        test_loader = torch.utils.data.DataLoader(
-            datasets.MNIST(
-                "./data",
-                train=False,
-                transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]),
-            ),
-            batch_size=args.test_batch_size,
-            shuffle=True,
-        )
-
-    # Modeling
-    model = NeuralNet(784, 500, 10).to(device)
-    optimizer = optim.SGD(model.parameters(), lr=args.lr)
-
-    # Train loop
-    for epoch in range(1, args.epochs + 1):
-        train(args, model, device, train_loader, optimizer, epoch)
-        if args.test_batch_size > 0:
-            test(model, device, test_loader)
-
-    # Save model
-    if args.save_path:
-        torch.save(model.state_dict(), os.path.join(args.save_path, "mnist_cnn.pt"))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/samples/python/training/orttrainer/pytorch_transformer/README.md b/samples/python/training/orttrainer/pytorch_transformer/README.md
deleted file mode 100644
index cda8cba6ca0ad..0000000000000
--- a/samples/python/training/orttrainer/pytorch_transformer/README.md
+++ /dev/null
@@ -1,33 +0,0 @@
-# TransformerModel example
-
-This example was adapted from Pytorch's [Sequence-to-Sequence Modeling with nn.Transformer and TorchText](https://pytorch.org/tutorials/beginner/transformer_tutorial.html) tutorial
-
-## Requirements
-
-* PyTorch 1.6+
-* TorchText 0.6+
-* ONNX Runtime 1.5+
-
-## Running PyTorch version
-
-```bash
-python pt_train.py
-```
-
-## Running ONNX Runtime version
-
-```bash
-python ort_train.py
-```
-
-## Optional arguments
-
-| Argument          | Description                                             | Default   |
-| :---------------- | :-----------------------------------------------------: | --------: |
-| --batch-size      | input batch size for training                           | 20        |
-| --test-batch-size | input batch size for testing                            | 20        |
-| --epochs          | number of epochs to train                               | 2         |
-| --lr              | learning rate                                           | 0.001     |
-| --no-cuda         | disables CUDA training                                  | False     |
-| --seed            | random seed                                             | 1         |
-| --log-interval    | how many batches to wait before logging training status | 200       |
diff --git a/samples/python/training/orttrainer/pytorch_transformer/ort_train.py b/samples/python/training/orttrainer/pytorch_transformer/ort_train.py
deleted file mode 100644
index 551e878cc9035..0000000000000
--- a/samples/python/training/orttrainer/pytorch_transformer/ort_train.py
+++ /dev/null
@@ -1,89 +0,0 @@
-import argparse
-
-import torch
-from ort_utils import my_loss, transformer_model_description_dynamic_axes
-from pt_model import TransformerModel
-from utils import get_batch, prepare_data
-
-import onnxruntime
-
-
-def train(trainer, data_source, device, epoch, args, bptt=35):
-    total_loss = 0.0
-    for batch, i in enumerate(range(0, data_source.size(0) - 1, bptt)):
-        data, targets = get_batch(data_source, i)
-
-        loss, pred = trainer.train_step(data, targets)
-        total_loss += loss.item()
-        if batch % args.log_interval == 0 and batch > 0:
-            cur_loss = total_loss / args.log_interval
-            print(
-                "epoch {:3d} | {:5d}/{:5d} batches | loss {:5.2f}".format(
-                    epoch, batch, len(data_source) // bptt, cur_loss
-                )
-            )
-            total_loss = 0
-
-
-def evaluate(trainer, data_source, bptt=35):
-    total_loss = 0.0
-    with torch.no_grad():
-        for i in range(0, data_source.size(0) - 1, bptt):
-            data, targets = get_batch(data_source, i)
-            loss, pred = trainer.eval_step(data, targets)
-            total_loss += len(data) * loss.item()
-    return total_loss / (len(data_source) - 1)
-
-
-if __name__ == "__main__":
-    # Training settings
-    parser = argparse.ArgumentParser(description="PyTorch TransformerModel example")
-    parser.add_argument(
-        "--batch-size", type=int, default=20, metavar="N", help="input batch size for training (default: 20)"
-    )
-    parser.add_argument(
-        "--test-batch-size", type=int, default=20, metavar="N", help="input batch size for testing (default: 20)"
-    )
-    parser.add_argument("--epochs", type=int, default=2, metavar="N", help="number of epochs to train (default: 2)")
-    parser.add_argument("--lr", type=float, default=0.001, metavar="LR", help="learning rate (default: 0.001)")
-    parser.add_argument("--no-cuda", action="store_true", default=False, help="disables CUDA training")
-    parser.add_argument("--seed", type=int, default=1, metavar="S", help="random seed (default: 1)")
-    parser.add_argument(
-        "--log-interval",
-        type=int,
-        default=200,
-        metavar="N",
-        help="how many batches to wait before logging training status (default: 200)",
-    )
-
-    # Basic setup
-    args = parser.parse_args()
-    if not args.no_cuda and torch.cuda.is_available():
-        device = "cuda"
-    else:
-        device = "cpu"
-    torch.manual_seed(args.seed)
-    onnxruntime.set_seed(args.seed)
-
-    # Model
-    optim_config = onnxruntime.training.optim.SGDConfig(lr=args.lr)
-    model_desc = transformer_model_description_dynamic_axes()
-    model = TransformerModel(28785, 200, 2, 200, 2, 0.2).to(device)
-
-    # Preparing data
-    train_data, val_data, test_data = prepare_data(device, args.batch_size, args.test_batch_size)
-    trainer = onnxruntime.training.ORTTrainer(model, model_desc, optim_config, loss_fn=my_loss)
-
-    # Train
-    for epoch in range(1, args.epochs + 1):
-        train(trainer, train_data, device, epoch, args)
-        val_loss = evaluate(trainer, val_data)
-        print("-" * 89)
-        print(f"| end of epoch {epoch:3d} | valid loss {val_loss:5.2f} | ")
-        print("-" * 89)
-
-    # Evaluate
-    test_loss = evaluate(trainer, test_data)
-    print("=" * 89)
-    print(f"| End of training | test loss {test_loss:5.2f}")
-    print("=" * 89)
diff --git a/samples/python/training/orttrainer/pytorch_transformer/ort_utils.py b/samples/python/training/orttrainer/pytorch_transformer/ort_utils.py
deleted file mode 100644
index 73992f5596f5f..0000000000000
--- a/samples/python/training/orttrainer/pytorch_transformer/ort_utils.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import torch
-
-from onnxruntime.capi.ort_trainer import IODescription as Legacy_IODescription
-from onnxruntime.capi.ort_trainer import ModelDescription as Legacy_ModelDescription
-
-
-def my_loss(x, target):
-    x = x.view(-1, 28785)
-    return torch.nn.CrossEntropyLoss()(x, target)
-
-
-def transformer_model_description(bptt=35, batch_size=20, ntokens=28785):
-    model_desc = {
-        "inputs": [("input1", [bptt, batch_size]), ("label", [bptt * batch_size])],
-        "outputs": [("loss", [], True), ("predictions", [bptt, batch_size, ntokens])],
-    }
-    return model_desc
-
-
-def transformer_model_description_dynamic_axes(ntokens=28785):
-    model_desc = {
-        "inputs": [("input1", ["bptt", "batch_size"]), ("label", ["bptt_x_batch_size"])],
-        "outputs": [("loss", [], True), ("predictions", ["bptt", "batch_size", ntokens])],
-    }
-    return model_desc
-
-
-def legacy_transformer_model_description(bptt=35, batch_size=20, ntokens=28785):
-    input_desc = Legacy_IODescription("input1", [bptt, batch_size])
-    label_desc = Legacy_IODescription("label", [bptt * batch_size])
-    loss_desc = Legacy_IODescription("loss", [])
-    predictions_desc = Legacy_IODescription("predictions", [bptt, batch_size, ntokens])
-    return (
-        Legacy_ModelDescription([input_desc, label_desc], [loss_desc, predictions_desc]),
-        Legacy_IODescription("__learning_rate", [1]),
-    )
-
-
-def legacy_transformer_model_description_dynamic_axes(ntokens=28785):
-    input_desc = Legacy_IODescription("input1", ["bptt", "batch_size"])
-    label_desc = Legacy_IODescription("label", ["bptt_x_batch_size"])
-    loss_desc = Legacy_IODescription("loss", [])
-    predictions_desc = Legacy_IODescription("predictions", ["bptt", "batch_size", ntokens])
-    return (
-        Legacy_ModelDescription([input_desc, label_desc], [loss_desc, predictions_desc]),
-        Legacy_IODescription("__learning_rate", [1]),
-    )
diff --git a/samples/python/training/orttrainer/pytorch_transformer/pt_model.py b/samples/python/training/orttrainer/pytorch_transformer/pt_model.py
deleted file mode 100644
index 07752f52d7a84..0000000000000
--- a/samples/python/training/orttrainer/pytorch_transformer/pt_model.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import math
-
-import torch
-import torch.nn as nn
-
-
-class TransformerModel(nn.Module):
-    def __init__(self, ntoken, ninp, nhead, nhid, nlayers, dropout=0.5):
-        super().__init__()
-        from torch.nn import TransformerEncoder, TransformerEncoderLayer
-
-        self.model_type = "Transformer"
-        self.input1_mask = None
-        self.pos_encoder = PositionalEncoding(ninp, dropout)
-        encoder_layers = TransformerEncoderLayer(ninp, nhead, nhid, dropout)
-        self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
-        self.encoder = nn.Embedding(ntoken, ninp)
-        self.ninp = ninp
-        self.decoder = nn.Linear(ninp, ntoken)
-
-        self.init_weights()
-
-    def _generate_square_subsequent_mask(self, sz):
-        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
-        mask = mask.float().masked_fill(mask == 0, float("-inf")).masked_fill(mask == 1, float(0.0))
-        return mask
-
-    def init_weights(self):
-        initrange = 0.1
-        self.encoder.weight.data.uniform_(-initrange, initrange)
-        self.decoder.bias.data.zero_()
-        self.decoder.weight.data.uniform_(-initrange, initrange)
-
-    def forward(self, input1):
-        if self.input1_mask is None or self.input1_mask.size(0) != input1.size(0):
-            device = input1.device
-            mask = self._generate_square_subsequent_mask(input1.size(0)).to(device)
-            self.input1_mask = mask
-
-        input1 = self.encoder(input1) * math.sqrt(self.ninp)
-        input1 = self.pos_encoder(input1)
-        output = self.transformer_encoder(input1, self.input1_mask)
-        output = self.decoder(output)
-        return output
-
-
-class PositionalEncoding(nn.Module):
-    def __init__(self, d_model, dropout=0.1, max_len=5000):
-        super().__init__()
-        self.dropout = nn.Dropout(p=dropout)
-
-        pe = torch.zeros(max_len, d_model)
-        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
-        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
-        pe[:, 0::2] = torch.sin(position * div_term)
-        pe[:, 1::2] = torch.cos(position * div_term)
-        pe = pe.unsqueeze(0).transpose(0, 1)
-        self.register_buffer("pe", pe)
-
-    def forward(self, x):
-        x = x + self.pe[: x.size(0), :]
-        return self.dropout(x)
diff --git a/samples/python/training/orttrainer/pytorch_transformer/pt_train.py b/samples/python/training/orttrainer/pytorch_transformer/pt_train.py
deleted file mode 100644
index a197fb50357e9..0000000000000
--- a/samples/python/training/orttrainer/pytorch_transformer/pt_train.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import argparse
-
-import torch
-import torch.nn as nn
-from pt_model import TransformerModel
-from utils import get_batch, prepare_data
-
-
-def train(model, data_source, device, epoch, args, bptt=35):
-    total_loss = 0.0
-    model.train()
-    for batch, i in enumerate(range(0, data_source.size(0) - 1, bptt)):
-        data, targets = get_batch(data_source, i)
-
-        optimizer.zero_grad()
-        output = model(data)
-        loss = criterion(output.view(-1, 28785), targets)
-        loss.backward()
-        optimizer.step()
-
-        total_loss += loss.item()
-        if batch % args.log_interval == 0 and batch > 0:
-            cur_loss = total_loss / args.log_interval
-            print(
-                "epoch {:3d} | {:5d}/{:5d} batches | loss {:5.2f}".format(
-                    epoch, batch, len(data_source) // bptt, cur_loss
-                )
-            )
-            total_loss = 0
-
-
-def evaluate(model, data_source, criterion, bptt=35):
-    total_loss = 0.0
-    model.eval()
-    with torch.no_grad():
-        for i in range(0, data_source.size(0) - 1, bptt):
-            data, targets = get_batch(data_source, i)
-            output = model(data)
-            output_flat = output.view(-1, 28785)
-            total_loss += len(data) * criterion(output_flat, targets).item()
-    return total_loss / (len(data_source) - 1)
-
-
-if __name__ == "__main__":
-    # Training settings
-    parser = argparse.ArgumentParser(description="PyTorch TransformerModel example")
-    parser.add_argument(
-        "--batch-size", type=int, default=20, metavar="N", help="input batch size for training (default: 20)"
-    )
-    parser.add_argument(
-        "--test-batch-size", type=int, default=20, metavar="N", help="input batch size for testing (default: 20)"
-    )
-    parser.add_argument("--epochs", type=int, default=2, metavar="N", help="number of epochs to train (default: 2)")
-    parser.add_argument("--lr", type=float, default=0.001, metavar="LR", help="learning rate (default: 0.001)")
-    parser.add_argument("--no-cuda", action="store_true", default=False, help="disables CUDA training")
-    parser.add_argument("--seed", type=int, default=1, metavar="S", help="random seed (default: 1)")
-    parser.add_argument(
-        "--log-interval",
-        type=int,
-        default=200,
-        metavar="N",
-        help="how many batches to wait before logging training status (default: 200)",
-    )
-
-    # Basic setup
-    args = parser.parse_args()
-    if not args.no_cuda and torch.cuda.is_available():
-        device = "cuda"
-    else:
-        device = "cpu"
-    torch.manual_seed(args.seed)
-
-    # Model
-    criterion = nn.CrossEntropyLoss()
-    lr = 0.001
-    model = TransformerModel(28785, 200, 2, 200, 2, 0.2).to(device)
-    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
-
-    # Preparing data
-    train_data, val_data, test_data = prepare_data(device, args.batch_size, args.test_batch_size)
-
-    # Train
-    for epoch in range(1, args.epochs + 1):
-        train(model, train_data, device, epoch, args)
-        val_loss = evaluate(model, val_data, criterion)
-        print("-" * 89)
-        print(f"| end of epoch {epoch:3d} | valid loss {val_loss:5.2f} | ")
-        print("-" * 89)
-
-    # Evaluate
-    test_loss = evaluate(model, test_data, criterion)
-    print("=" * 89)
-    print(f"| End of training | test loss {test_loss:5.2f}")
-    print("=" * 89)
diff --git a/samples/python/training/orttrainer/pytorch_transformer/utils.py b/samples/python/training/orttrainer/pytorch_transformer/utils.py
deleted file mode 100644
index 3be8b6cf3f420..0000000000000
--- a/samples/python/training/orttrainer/pytorch_transformer/utils.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import os
-
-import torch
-from torchtext.data.utils import get_tokenizer
-from torchtext.utils import download_from_url, extract_archive
-from torchtext.vocab import build_vocab_from_iterator
-
-
-def batchify(data, bsz, device):
-    # Divide the dataset into bsz parts.
-    nbatch = data.size(0) // bsz
-    # Trim off any extra elements that wouldn't cleanly fit (remainders).
-    data = data.narrow(0, 0, nbatch * bsz)
-    # Evenly divide the data across the bsz batches.
-    data = data.view(bsz, -1).t().contiguous()
-    return data.to(device)
-
-
-def get_batch(source, i, bptt=35):
-    seq_len = min(bptt, len(source) - 1 - i)
-    data = source[i : i + seq_len]
-    target = source[i + 1 : i + 1 + seq_len].view(-1)
-    return data, target
-
-
-def prepare_data(device="cpu", train_batch_size=20, eval_batch_size=20, data_dir=None):
-    url = "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip"
-
-    download_path = ".data_wikitext_2_v1"
-    extract_path = None
-    if data_dir:
-        download_path = os.path.join(data_dir, "download")
-        os.makedirs(download_path, exist_ok=True)
-        download_path = os.path.join(download_path, "wikitext-2-v1.zip")
-
-        extract_path = os.path.join(data_dir, "extracted")
-        os.makedirs(extract_path, exist_ok=True)
-
-    test_filepath, valid_filepath, train_filepath = extract_archive(
-        download_from_url(url, root=download_path), to_path=extract_path
-    )
-    tokenizer = get_tokenizer("basic_english")
-    vocab = build_vocab_from_iterator(map(tokenizer, iter(open(train_filepath, encoding="utf8"))))  # noqa: SIM115
-
-    def data_process(raw_text_iter):
-        data = [torch.tensor([vocab[token] for token in tokenizer(item)], dtype=torch.long) for item in raw_text_iter]
-        return torch.cat(tuple(filter(lambda t: t.numel() > 0, data)))
-
-    train_data = data_process(iter(open(train_filepath, encoding="utf8")))  # noqa: SIM115
-    val_data = data_process(iter(open(valid_filepath, encoding="utf8")))  # noqa: SIM115
-    test_data = data_process(iter(open(test_filepath, encoding="utf8")))  # noqa: SIM115
-
-    device = torch.device(device)
-
-    train_data = batchify(train_data, train_batch_size, device)
-    val_data = batchify(val_data, eval_batch_size, device)
-    test_data = batchify(test_data, eval_batch_size, device)
-
-    return train_data, val_data, test_data
diff --git a/setup.py b/setup.py
index 7e6ab93194b0d..798c8c4b2895b 100644
--- a/setup.py
+++ b/setup.py
@@ -192,26 +192,37 @@ def run(self):
 
                 cuda_dependencies = [
                     "libcublas.so.11",
+                    "libcublas.so.12",
                     "libcublasLt.so.11",
-                    "libcudnn.so.8",
+                    "libcublasLt.so.12",
                     "libcudart.so.11.0",
-                    "libcurand.so.10",
+                    "libcudart.so.12",
+                    "libcudnn.so.8",
                     "libcufft.so.10",
+                    "libcufft.so.11",
+                    "libcurand.so.10",
                 ]
                 rocm_dependencies = [
-                    "librccl.so.1",
-                    "libnuma.so.1",
                     "libamd_comgr.so.2",
+                    "libamdhip64.so.5",
                     "libdrm.so.2",
-                    "librocblas.so.0",
                     "libdrm_amdgpu.so.1",
-                    "libamdhip64.so.5",
-                    "libroctracer64.so.4",
-                    "libMIOpen.so.1",
-                    "libtinfo.so.6",
                     "libelf.so.1",
-                    "librocm_smi64.so.5",
+                    "libhipfft.so.0",
+                    "libhiprtc.so.5",
                     "libhsa-runtime64.so.1",
+                    "libMIOpen.so.1",
+                    "libnuma.so.1",
+                    "librccl.so.1",
+                    "librocblas.so.3",
+                    "librocfft.so.0",
+                    "librocm_smi64.so.5",
+                    "libroctracer64.so.4",
+                    "libtinfo.so.6",
+                    "libmigraphx_c.so.3",
+                    "libmigraphx.so.2",
+                    "libmigraphx_onnx.so.2",
+                    "libmigraphx_tf.so.2",
                 ]
 
                 tensorrt_dependencies = ["libnvinfer.so.8", "libnvinfer_plugin.so.8", "libnvonnxparser.so.8"]
@@ -387,7 +398,6 @@ def finalize_options(self):
     "onnxruntime",
     "onnxruntime.backend",
     "onnxruntime.capi",
-    "onnxruntime.capi.training",
     "onnxruntime.datasets",
     "onnxruntime.tools",
     "onnxruntime.tools.mobile_helpers",
@@ -462,6 +472,7 @@ def finalize_options(self):
                 "onnxruntime.training.ortmodule.torch_cpp_extensions.cpu.torch_interop_utils",
                 "onnxruntime.training.ortmodule.torch_cpp_extensions.cuda.torch_gpu_allocator",
                 "onnxruntime.training.ortmodule.torch_cpp_extensions.cuda.fused_ops",
+                "onnxruntime.training.ortmodule.graph_optimizers",
                 "onnxruntime.training.ort_triton",
                 "onnxruntime.training.ort_triton.kernel",
                 "onnxruntime.training.utils",
@@ -520,6 +531,10 @@ def finalize_options(self):
                 # Training CPU package for ADO feeds is called onnxruntime-training-cpu
                 package_name = "onnxruntime-training-cpu"
 
+            if rocm_version:
+                # Training ROCM package for ADO feeds is called onnxruntime-training-rocm
+                package_name = "onnxruntime-training-rocm"
+
 if package_name == "onnxruntime-tvm":
     packages += ["onnxruntime.providers.tvm"]
 
diff --git a/swift/OnnxRuntimeBindingsTests/Resources/single_add.basic.ort b/swift/OnnxRuntimeBindingsTests/Resources/single_add.basic.ort
deleted file mode 100644
index f622784b35366..0000000000000
Binary files a/swift/OnnxRuntimeBindingsTests/Resources/single_add.basic.ort and /dev/null differ
diff --git a/swift/OnnxRuntimeBindingsTests/SwiftOnnxRuntimeBindingsTests.swift b/swift/OnnxRuntimeBindingsTests/SwiftOnnxRuntimeBindingsTests.swift
deleted file mode 100644
index 48e276487e281..0000000000000
--- a/swift/OnnxRuntimeBindingsTests/SwiftOnnxRuntimeBindingsTests.swift
+++ /dev/null
@@ -1,62 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-import XCTest
-import Foundation
-@testable import OnnxRuntimeBindings
-
-final class SwiftOnnxRuntimeBindingsTests: XCTestCase {
-    let modelPath: String = Bundle.module.url(forResource: "single_add.basic", withExtension: "ort")!.path
-
-    func testGetVersionString() throws {
-        do {
-            let version = ORTVersion()
-            XCTAssertNotNil(version)
-        } catch let error {
-            XCTFail(error.localizedDescription)
-        }
-    }
-
-    func testCreateSession() throws {
-        do {
-            let env = try ORTEnv(loggingLevel: ORTLoggingLevel.verbose)
-            let options = try ORTSessionOptions()
-            try options.setLogSeverityLevel(ORTLoggingLevel.verbose)
-            try options.setIntraOpNumThreads(1)
-            // Create the ORTSession
-            _ = try ORTSession(env: env, modelPath: modelPath, sessionOptions: options)
-        } catch let error {
-            XCTFail(error.localizedDescription)
-        }
-    }
-
-    func testAppendCoreMLEP() throws {
-        do {
-            let env = try ORTEnv(loggingLevel: ORTLoggingLevel.verbose)
-            let sessionOptions: ORTSessionOptions = try ORTSessionOptions()
-            let coreMLOptions: ORTCoreMLExecutionProviderOptions = ORTCoreMLExecutionProviderOptions()
-            coreMLOptions.enableOnSubgraphs = true
-            try sessionOptions.appendCoreMLExecutionProvider(with: coreMLOptions)
-
-            XCTAssertTrue(ORTIsCoreMLExecutionProviderAvailable())
-            _ = try ORTSession(env: env, modelPath: modelPath, sessionOptions: sessionOptions)
-        } catch let error {
-            XCTFail(error.localizedDescription)
-        }
-    }
-
-    func testAppendXnnpackEP() throws {
-        do {
-            let env = try ORTEnv(loggingLevel: ORTLoggingLevel.verbose)
-            let sessionOptions: ORTSessionOptions = try ORTSessionOptions()
-            let XnnpackOptions: ORTXnnpackExecutionProviderOptions = ORTXnnpackExecutionProviderOptions()
-            XnnpackOptions.intra_op_num_threads = 2
-            try sessionOptions.appendXnnpackExecutionProvider(with: XnnpackOptions)
-
-            XCTAssertTrue(ORTIsCoreMLExecutionProviderAvailable())
-            _ = try ORTSession(env: env, modelPath: modelPath, sessionOptions: sessionOptions)
-        } catch let error {
-            XCTFail(error.localizedDescription)
-        }
-    }
-}
diff --git a/tools/android_custom_build/Dockerfile b/tools/android_custom_build/Dockerfile
index bc50e4fb0a943..754a6633b0c62 100644
--- a/tools/android_custom_build/Dockerfile
+++ b/tools/android_custom_build/Dockerfile
@@ -55,7 +55,7 @@ WORKDIR /workspace
 
 # install Android SDK and tools
 ENV ANDROID_HOME=~/android-sdk
-ENV NDK_VERSION=25.0.8775105
+ENV NDK_VERSION=26.1.10909125
 ENV ANDROID_NDK_HOME=${ANDROID_HOME}/ndk/${NDK_VERSION}
 
 RUN aria2c -q -d /tmp -o cmdline-tools.zip \
diff --git a/tools/android_custom_build/build_custom_android_package.py b/tools/android_custom_build/build_custom_android_package.py
index aa57cf341942c..35adb41690e98 100755
--- a/tools/android_custom_build/build_custom_android_package.py
+++ b/tools/android_custom_build/build_custom_android_package.py
@@ -22,7 +22,7 @@ def is_windows():
 def run(cmd_arg_list, **kwargs):
     print(f"Running command:\n  {shlex.join(cmd_arg_list)}")
     kwargs.update({"check": True})
-    return subprocess.run(cmd_arg_list, **kwargs)
+    return subprocess.run(cmd_arg_list, **kwargs)  # noqa: PLW1510
 
 
 def parse_args():
diff --git a/tools/ci_build/amd_hipify.py b/tools/ci_build/amd_hipify.py
index e0293128045a5..8ea0481c9b101 100644
--- a/tools/ci_build/amd_hipify.py
+++ b/tools/ci_build/amd_hipify.py
@@ -35,6 +35,9 @@ def hipify(hipify_perl_path, src_file_path, dst_file_path):
     s = s.replace("HIPBLAS_OP_T", "rocblas_operation_transpose")
     s = s.replace("HIPBLAS_OP_N", "rocblas_operation_none")
 
+    # in rocm 6.0, hipify-perl, the -roc option also maps __half -> rocblas_half which we don't want
+    s = s.replace("rocblas_half", "__half")
+
     s = s.replace("RegisterCudaContribKernels", "RegisterRocmContribKernels")
     s = s.replace("cudaEvent", "hipEvent")
     s = s.replace("CreateCudaAllocator", "CreateRocmAllocator")
@@ -150,6 +153,8 @@ def hipify(hipify_perl_path, src_file_path, dst_file_path):
 
     # CUFFT -> HIPFFT
     s = s.replace("CUFFT", "HIPFFT")
+    s = s.replace("cufftXtMakePlanMany", "hipfftXtMakePlanMany")
+    s = s.replace("cufftXtExec", "hipfftXtExec")
 
     # Undo where above hipify steps went too far.
     s = s.replace("id, ROCM", "id, CUDA")  # cuda_execution_provider.cc
@@ -169,6 +174,24 @@ def hipify(hipify_perl_path, src_file_path, dst_file_path):
     s = s.replace("#include <hiprand_kernel.h>", "#include <hiprand/hiprand_kernel.h>")
     s = s.replace("#include <rocblas.h>", "#include <rocblas/rocblas.h>")
     s = s.replace("#include <hipblas.h>", "#include <hipblas/hipblas.h>")
+    s = s.replace("#include <hipfft.h>", "#include <hipfft/hipfft.h>")
+    s = s.replace('#include "hipfft.h"', "#include <hipfft/hipfft.h>")
+    s = s.replace('#include "hipfftXt.h"', "#include <hipfft/hipfftXt.h>")
+
+    # Fix onnxruntime/contrib_ops/rocm/transformers. They include cpu headers which use "cuda" in their names.
+    s = s.replace("rocm_device_prop_", "cuda_device_prop_")
+    s = s.replace("rocm_device_arch_", "cuda_device_arch_")
+
+    # We want hipfft, which needs hipDataType etc, but only do this for files that have "fft" in their names
+    # And we do this last, undoing or fixing hipify mistakes.
+    if "fft" in src_file_path:
+        s = s.replace("rocblas_datatype", "hipDataType")
+        s = s.replace("hipDataType_f32_c", "HIP_C_32F")
+        s = s.replace("hipDataType_f32_r", "HIP_R_32F")
+        s = s.replace("hipDataType_f64_c", "HIP_C_64F")
+        s = s.replace("hipDataType_f64_r", "HIP_R_64F")
+        s = s.replace("hipDataType_f16_c", "HIP_C_16F")
+        s = s.replace("hipDataType_f16_r", "HIP_R_16F")
 
     with open(dst_file_path, "w") as f:
         f.write(s)
diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py
index 1d875f9681ef2..9d3ffb90af07e 100644
--- a/tools/ci_build/build.py
+++ b/tools/ci_build/build.py
@@ -14,6 +14,15 @@
 import sys
 from pathlib import Path
 
+
+def version_to_tuple(version: str) -> tuple:
+    v = []
+    for s in version.split("."):
+        with contextlib.suppress(ValueError):
+            v.append(int(s))
+    return tuple(v)
+
+
 SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
 REPO_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", ".."))
 
@@ -66,15 +75,13 @@ def _str_to_bool(s):
 
 
 def _openvino_verify_device_type(device_read):
-    choices = ["CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16", "VPUX_FP16", "VPUX_U8"]
+    choices = ["CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16"]
 
     choices1 = [
         "CPU_FP32_NO_PARTITION",
         "CPU_FP16_NO_PARTITION",
         "GPU_FP32_NO_PARTITION",
         "GPU_FP16_NO_PARTITION",
-        "VPUX_FP16_NO_PARTITION",
-        "VPUX_U8_NO_PARTITION",
     ]
     status_hetero = True
     res = False
@@ -89,7 +96,7 @@ def _openvino_verify_device_type(device_read):
         if len(comma_separated_devices) < 2:
             print("At least two devices required in Hetero/Multi/Auto Mode")
             status_hetero = False
-        dev_options = ["CPU", "GPU", "VPUX"]
+        dev_options = ["CPU", "GPU"]
         for dev in comma_separated_devices:
             if dev not in dev_options:
                 status_hetero = False
@@ -100,7 +107,7 @@ def invalid_hetero_build():
         print("specify the keyword HETERO or MULTI or AUTO followed by the devices ")
         print("in the order of priority you want to build\n")
         print("The different hardware devices that can be added in HETERO or MULTI or AUTO")
-        print("are ['CPU','GPU', 'VPUX'] \n")
+        print("are ['CPU','GPU'] \n")
         print("An example of how to specify the hetero build type. Ex: HETERO:GPU,CPU \n")
         print("An example of how to specify the MULTI build type. Ex: MULTI:GPU,CPU \n")
         print("An example of how to specify the AUTO build type. Ex: AUTO:GPU,CPU \n")
@@ -247,6 +254,7 @@ def convert_arg_line_to_args(self, arg_line):
         "--cudnn_home is not specified.",
     )
     parser.add_argument("--enable_cuda_line_info", action="store_true", help="Enable CUDA line info.")
+    parser.add_argument("--enable_cuda_nhwc_ops", action="store_true", help="Enable CUDA NHWC ops in build.")
 
     # Python bindings
     parser.add_argument("--enable_pybind", action="store_true", help="Enable Python Bindings.")
@@ -370,8 +378,9 @@ def convert_arg_line_to_args(self, arg_line):
     parser.add_argument("--gdk_platform", default="Scarlett", help="Sets the GDK target platform.")
 
     parser.add_argument("--ios", action="store_true", help="build for ios")
+
     parser.add_argument(
-        "--ios_sysroot", default="", help="Specify the location name of the macOS platform SDK to be used"
+        "--apple_sysroot", default="", help="Specify the location name of the macOS platform SDK to be used"
     )
     parser.add_argument(
         "--ios_toolchain_file",
@@ -797,6 +806,7 @@ def run_subprocess(
 
     my_env.update(env)
 
+    log.info(" ".join(args))
     return run(*args, cwd=cwd, capture_stdout=capture_stdout, shell=shell, env=my_env)
 
 
@@ -898,7 +908,7 @@ def number_of_nvcc_threads(args):
                 # Standard_NC4as_T4_v3 has 4 CPUs and 28 GB memory. When parallel=4 and nvcc_threads=2,
                 # total nvcc threads is 4 * 2, which is barely able to build in 28 GB memory so we will use nvcc_threads=1.
                 memory_per_thread = 4 * 1024 * 1024 * 1024
-                fmha_cu_files = 4 if is_windows() else 8
+                fmha_cu_files = 4 if is_windows() else 16
                 fmha_parallel_jobs = min(fmha_cu_files, number_of_parallel_jobs(args))
                 nvcc_threads = max(1, int(available_memory / (memory_per_thread * fmha_parallel_jobs)))
                 print(
@@ -1026,6 +1036,7 @@ def generate_build_tree(
         "-Donnxruntime_USE_MPI=" + ("ON" if args.use_mpi else "OFF"),
         "-Donnxruntime_ENABLE_MEMORY_PROFILE=" + ("ON" if args.enable_memory_profile else "OFF"),
         "-Donnxruntime_ENABLE_CUDA_LINE_NUMBER_INFO=" + ("ON" if args.enable_cuda_line_info else "OFF"),
+        "-Donnxruntime_USE_CUDA_NHWC_OPS=" + ("ON" if args.enable_cuda_nhwc_ops else "OFF"),
         "-Donnxruntime_BUILD_WEBASSEMBLY_STATIC_LIB=" + ("ON" if args.build_wasm_static_lib else "OFF"),
         "-Donnxruntime_ENABLE_WEBASSEMBLY_EXCEPTION_CATCHING="
         + ("OFF" if args.disable_wasm_exception_catching else "ON"),
@@ -1084,6 +1095,12 @@ def generate_build_tree(
     if args.use_cuda:
         nvcc_threads = number_of_nvcc_threads(args)
         cmake_args.append("-Donnxruntime_NVCC_THREADS=" + str(nvcc_threads))
+        if not disable_float8_types and args.cuda_version:
+            if version_to_tuple(args.cuda_version) < (11, 8):
+                raise BuildError(
+                    f"Float 8 types require CUDA>=11.8. They must be disabled on CUDA=={args.cuda_version}. "
+                    f"Add '--disable_types float8' to your command line. See option disable_types."
+                )
     if args.use_rocm:
         cmake_args.append("-Donnxruntime_ROCM_HOME=" + rocm_home)
         cmake_args.append("-Donnxruntime_ROCM_VERSION=" + args.rocm_version)
@@ -1157,8 +1174,6 @@ def generate_build_tree(
             "-Donnxruntime_USE_OPENVINO_GPU_FP16=" + ("ON" if args.use_openvino == "GPU_FP16" else "OFF"),
             "-Donnxruntime_USE_OPENVINO_CPU_FP32=" + ("ON" if args.use_openvino == "CPU_FP32" else "OFF"),
             "-Donnxruntime_USE_OPENVINO_CPU_FP16=" + ("ON" if args.use_openvino == "CPU_FP16" else "OFF"),
-            "-Donnxruntime_USE_OPENVINO_VPUX_FP16=" + ("ON" if args.use_openvino == "VPUX_FP16" else "OFF"),
-            "-Donnxruntime_USE_OPENVINO_VPUX_U8=" + ("ON" if args.use_openvino == "VPUX_U8" else "OFF"),
             "-Donnxruntime_USE_OPENVINO_GPU_FP32_NP="
             + ("ON" if args.use_openvino == "GPU_FP32_NO_PARTITION" else "OFF"),
             "-Donnxruntime_USE_OPENVINO_GPU_FP16_NP="
@@ -1167,18 +1182,15 @@ def generate_build_tree(
             + ("ON" if args.use_openvino == "CPU_FP32_NO_PARTITION" else "OFF"),
             "-Donnxruntime_USE_OPENVINO_CPU_FP16_NP="
             + ("ON" if args.use_openvino == "CPU_FP16_NO_PARTITION" else "OFF"),
-            "-Donnxruntime_USE_OPENVINO_VPUX_FP16_NP="
-            + ("ON" if args.use_openvino == "VPUX_FP16_NP_PARTITION" else "OFF"),
-            "-Donnxruntime_USE_OPENVINO_VPUX_U8_NP=" + ("ON" if args.use_openvino == "VPUX_U8_NP_PARTITION" else "OFF"),
             "-Donnxruntime_USE_OPENVINO_HETERO=" + ("ON" if args.use_openvino.startswith("HETERO") else "OFF"),
             "-Donnxruntime_USE_OPENVINO_DEVICE=" + (args.use_openvino),
             "-Donnxruntime_USE_OPENVINO_MULTI=" + ("ON" if args.use_openvino.startswith("MULTI") else "OFF"),
             "-Donnxruntime_USE_OPENVINO_AUTO=" + ("ON" if args.use_openvino.startswith("AUTO") else "OFF"),
         ]
 
-    # TensorRT and OpenVINO providers currently only support
+    # VitisAI and OpenVINO providers currently only support
     # full_protobuf option.
-    if args.use_full_protobuf or args.use_tensorrt or args.use_openvino or args.use_vitisai or args.gen_doc:
+    if args.use_full_protobuf or args.use_openvino or args.use_vitisai or args.gen_doc:
         cmake_args += ["-Donnxruntime_USE_FULL_PROTOBUF=ON", "-DProtobuf_USE_STATIC_LIBS=ON"]
 
     if args.use_tvm and args.llvm_path is not None:
@@ -1263,33 +1275,38 @@ def generate_build_tree(
     if args.use_snpe:
         cmake_args += ["-Donnxruntime_USE_SNPE=ON"]
 
-    if args.ios:
+    if args.build_apple_framework or args.ios:
         if not args.cmake_generator == "Xcode":
-            raise BuildError("iOS build requires use of the Xcode CMake generator ('--cmake_generator Xcode').")
+            raise BuildError(
+                "iOS/MacOS framework build requires use of the Xcode CMake generator ('--cmake_generator Xcode')."
+            )
 
         needed_args = [
-            args.ios_sysroot,
+            args.apple_sysroot,
             args.apple_deploy_target,
         ]
         arg_names = [
-            "--ios_sysroot          " + "<the location or name of the macOS platform SDK>",  # noqa: ISC003
-            "--apple_deploy_target  " + "<the minimum version of the target platform>",  # noqa: ISC003
+            "--apple_sysroot          " + "<the location or name of the macOS platform SDK>",
+            "--apple_deploy_target  " + "<the minimum version of the target platform>",
         ]
         if not all(needed_args):
             raise BuildError(
-                "iOS build on MacOS canceled due to missing arguments: "
+                "iOS/MacOS framework build on MacOS canceled due to missing arguments: "
                 + ", ".join(val for val, cond in zip(arg_names, needed_args) if not cond)
             )
         cmake_args += [
-            "-DCMAKE_SYSTEM_NAME=iOS",
             "-Donnxruntime_BUILD_SHARED_LIB=ON",
-            "-DCMAKE_OSX_SYSROOT=" + args.ios_sysroot,
+            "-DCMAKE_OSX_SYSROOT=" + args.apple_sysroot,
             "-DCMAKE_OSX_DEPLOYMENT_TARGET=" + args.apple_deploy_target,
             # we do not need protoc binary for ios cross build
             "-Dprotobuf_BUILD_PROTOC_BINARIES=OFF",
-            "-DCMAKE_TOOLCHAIN_FILE="
-            + (args.ios_toolchain_file if args.ios_toolchain_file else "../cmake/onnxruntime_ios.toolchain.cmake"),
         ]
+        if args.ios:
+            cmake_args += [
+                "-DCMAKE_SYSTEM_NAME=iOS",
+                "-DCMAKE_TOOLCHAIN_FILE="
+                + (args.ios_toolchain_file if args.ios_toolchain_file else "../cmake/onnxruntime_ios.toolchain.cmake"),
+            ]
 
     if args.build_wasm:
         emsdk_dir = os.path.join(cmake_dir, "external", "emsdk")
@@ -1642,9 +1659,7 @@ def run_adb_shell(cmd):
         # GCOV_PREFIX specifies the root directory
         # for creating the runtime code coverage files.
         if args.code_coverage:
-            adb_shell(
-                "cd {0} && GCOV_PREFIX={0} GCOV_PREFIX_STRIP={1} {2}".format(device_dir, cwd.count(os.sep) + 1, cmd)
-            )
+            adb_shell(f"cd {device_dir} && GCOV_PREFIX={device_dir} GCOV_PREFIX_STRIP={cwd.count(os.sep) + 1} {cmd}")
         else:
             adb_shell(f"cd {device_dir} && {cmd}")
 
@@ -1694,9 +1709,9 @@ def run_adb_shell(cmd):
                 )
 
             if args.use_nnapi:
-                run_adb_shell("{0}/onnx_test_runner -e nnapi {0}/test".format(device_dir))
+                run_adb_shell(f"{device_dir}/onnx_test_runner -e nnapi {device_dir}/test")
             else:
-                run_adb_shell("{0}/onnx_test_runner {0}/test".format(device_dir))
+                run_adb_shell(f"{device_dir}/onnx_test_runner {device_dir}/test")
 
             # run shared_lib_test if necessary
             if args.build_shared_lib:
@@ -1707,9 +1722,9 @@ def run_adb_shell(cmd):
                 adb_push("onnxruntime_customopregistration_test", device_dir, cwd=cwd)
                 adb_shell(f"chmod +x {device_dir}/onnxruntime_shared_lib_test")
                 adb_shell(f"chmod +x {device_dir}/onnxruntime_customopregistration_test")
-                run_adb_shell("LD_LIBRARY_PATH=$LD_LIBRARY_PATH:{0} {0}/onnxruntime_shared_lib_test".format(device_dir))
+                run_adb_shell(f"LD_LIBRARY_PATH=$LD_LIBRARY_PATH:{device_dir} {device_dir}/onnxruntime_shared_lib_test")
                 run_adb_shell(
-                    "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:{0} {0}/onnxruntime_customopregistration_test".format(device_dir)
+                    f"LD_LIBRARY_PATH=$LD_LIBRARY_PATH:{device_dir} {device_dir}/onnxruntime_customopregistration_test"
                 )
 
 
@@ -1753,10 +1768,10 @@ def run_ios_tests(args, source_dir, config, cwd):
         )
 
     if args.build_apple_framework:
-        package_test_py = os.path.join(source_dir, "tools", "ci_build", "github", "apple", "test_ios_packages.py")
+        package_test_py = os.path.join(source_dir, "tools", "ci_build", "github", "apple", "test_apple_packages.py")
         framework_info_file = os.path.join(cwd, "framework_info.json")
-        dynamic_framework_dir = os.path.join(cwd, config + "-" + args.ios_sysroot)
-        static_framework_dir = os.path.join(cwd, config + "-" + args.ios_sysroot, "static_framework")
+        dynamic_framework_dir = os.path.join(cwd, config + "-" + args.apple_sysroot)
+        static_framework_dir = os.path.join(cwd, config + "-" + args.apple_sysroot, "static_framework")
         # test dynamic framework
         run_subprocess(
             [
@@ -1766,6 +1781,8 @@ def run_ios_tests(args, source_dir, config, cwd):
                 dynamic_framework_dir,
                 "--framework_info_file",
                 framework_info_file,
+                "--variant",
+                "Mobile",
             ],
             cwd=cwd,
         )
@@ -1778,6 +1795,8 @@ def run_ios_tests(args, source_dir, config, cwd):
                 static_framework_dir,
                 "--framework_info_file",
                 framework_info_file,
+                "--variant",
+                "Mobile",
             ],
             cwd=cwd,
         )
@@ -1844,6 +1863,11 @@ def run_onnxruntime_tests(args, source_dir, ctest_path, build_dir, configs):
             # For CUDA or DML enabled builds test IOBinding feature
             if args.use_cuda or args.use_dml:
                 log.info("Testing IOBinding feature")
+                if args.use_dml:
+                    run_subprocess(
+                        [sys.executable, "-m", "pip", "uninstall", "--yes", "onnx"], cwd=cwd, dll_path=dll_path
+                    )
+                    run_subprocess([sys.executable, "-m", "pip", "install", "-q", "onnx"], cwd=cwd, dll_path=dll_path)
                 run_subprocess([sys.executable, "onnxruntime_test_python_iobinding.py"], cwd=cwd, dll_path=dll_path)
 
             if args.use_cuda:
@@ -2030,13 +2054,6 @@ def build_python_wheel(
         run_subprocess(args, cwd=cwd)
 
 
-def derive_linux_build_property():
-    if is_windows():
-        return '/p:IsLinuxBuild="false"'
-    else:
-        return '/p:IsLinuxBuild="true"'
-
-
 def build_nuget_package(
     cmake_path,
     source_dir,
@@ -2049,94 +2066,103 @@ def build_nuget_package(
     use_dnnl,
     use_tvm,
     use_winml,
-    use_snpe,
     use_qnn,
     enable_training_apis,
     msbuild_extra_options,
 ):
     if not (is_windows() or is_linux()):
         raise BuildError(
-            "Currently csharp builds and nuget package creation is only supportted on Windows and Linux platforms."
+            "Currently csharp builds and nuget package creation is only supported on Windows and Linux platforms."
         )
 
     csharp_build_dir = os.path.join(source_dir, "csharp")
-    is_linux_build = derive_linux_build_property()
 
     # in most cases we don't want/need to include the Xamarin mobile targets, as doing so means the Xamarin
     # mobile workloads must be installed on the machine.
     # they are only included in the Microsoft.ML.OnnxRuntime nuget package
     sln = "OnnxRuntime.DesktopOnly.CSharp.sln"
+    have_exclude_mobile_targets_option = "IncludeMobileTargets=false" in msbuild_extra_options
 
     # derive package name and execution provider based on the build args
     target_name = "/t:CreatePackage"
-    execution_provider = '/p:ExecutionProvider="None"'
-    package_name = '/p:OrtPackageId="Microsoft.ML.OnnxRuntime"'
-    enable_training_tests = '/p:TrainingEnabledNativeBuild="false"'
+    execution_provider = "/p:ExecutionProvider=None"
+    package_name = "/p:OrtPackageId=Microsoft.ML.OnnxRuntime"
+    enable_training_tests = "/p:TrainingEnabledNativeBuild=false"
+
     if enable_training_apis:
-        enable_training_tests = '/p:TrainingEnabledNativeBuild="true"'
+        enable_training_tests = "/p:TrainingEnabledNativeBuild=true"
         if use_cuda:
-            package_name = '/p:OrtPackageId="Microsoft.ML.OnnxRuntime.Training.Gpu"'
+            package_name = "/p:OrtPackageId=Microsoft.ML.OnnxRuntime.Training.Gpu"
         else:
-            package_name = '/p:OrtPackageId="Microsoft.ML.OnnxRuntime.Training"'
+            package_name = "/p:OrtPackageId=Microsoft.ML.OnnxRuntime.Training"
     elif use_winml:
-        package_name = '/p:OrtPackageId="Microsoft.AI.MachineLearning"'
+        package_name = "/p:OrtPackageId=Microsoft.AI.MachineLearning"
         target_name = "/t:CreateWindowsAIPackage"
     elif use_openvino:
-        execution_provider = '/p:ExecutionProvider="openvino"'
-        package_name = '/p:OrtPackageId="Microsoft.ML.OnnxRuntime.OpenVino"'
+        execution_provider = "/p:ExecutionProvider=openvino"
+        package_name = "/p:OrtPackageId=Microsoft.ML.OnnxRuntime.OpenVino"
     elif use_tensorrt:
-        execution_provider = '/p:ExecutionProvider="tensorrt"'
-        package_name = '/p:OrtPackageId="Microsoft.ML.OnnxRuntime.TensorRT"'
+        execution_provider = "/p:ExecutionProvider=tensorrt"
+        package_name = "/p:OrtPackageId=Microsoft.ML.OnnxRuntime.TensorRT"
     elif use_dnnl:
-        execution_provider = '/p:ExecutionProvider="dnnl"'
-        package_name = '/p:OrtPackageId="Microsoft.ML.OnnxRuntime.DNNL"'
+        execution_provider = "/p:ExecutionProvider=dnnl"
+        package_name = "/p:OrtPackageId=Microsoft.ML.OnnxRuntime.DNNL"
     elif use_cuda:
-        package_name = '/p:OrtPackageId="Microsoft.ML.OnnxRuntime.Gpu"'
+        package_name = "/p:OrtPackageId=Microsoft.ML.OnnxRuntime.Gpu"
     elif use_rocm:
-        package_name = '/p:OrtPackageId="Microsoft.ML.OnnxRuntime.ROCm"'
+        package_name = "/p:OrtPackageId=Microsoft.ML.OnnxRuntime.ROCm"
     elif use_tvm:
-        execution_provider = '/p:ExecutionProvider="tvm"'
-        package_name = '/p:OrtPackageId="Microsoft.ML.OnnxRuntime.Tvm"'
-    elif use_snpe:
-        execution_provider = '/p:ExecutionProvider="snpe"'
-        package_name = '/p:OrtPackageId="Microsoft.ML.OnnxRuntime.Snpe"'
+        execution_provider = "/p:ExecutionProvider=tvm"
+        package_name = "/p:OrtPackageId=Microsoft.ML.OnnxRuntime.Tvm"
     elif use_qnn:
-        execution_provider = '/p:ExecutionProvider="qnn"'
-        package_name = '/p:OrtPackageId="Microsoft.ML.OnnxRuntime.QNN"'
+        execution_provider = "/p:ExecutionProvider=qnn"
+        package_name = "/p:OrtPackageId=Microsoft.ML.OnnxRuntime.QNN"
     elif any(map(lambda x: "OrtPackageId=" in x, msbuild_extra_options)):
         pass
     else:
-        # use the solution file that includes Xamarin mobile targets
-        sln = "OnnxRuntime.CSharp.sln"
+        # we currently only allow building with mobile targets on Windows.
+        # it should be possible to allow building with android targets on Linux but that requires updating the
+        # csproj to separate the inclusion of ios and android targets.
+        if is_windows() and have_exclude_mobile_targets_option is False:
+            # use the sln that include the mobile targets
+            sln = "OnnxRuntime.CSharp.sln"
+
+    # explicitly exclude mobile targets in this case
+    if sln != "OnnxRuntime.CSharp.sln" and have_exclude_mobile_targets_option is False:
+        msbuild_extra_options.append("IncludeMobileTargets=false")
+
+    # expand extra_options to add prefix
+    extra_options = ["/p:" + option for option in msbuild_extra_options]
+
+    # we have to use msbuild directly if including Xamarin targets as dotnet only supports MAUI (.net6)
+    use_dotnet = sln != "OnnxRuntime.CSharp.sln"
+
+    if use_dotnet:
+        cmd_args = ["dotnet", "restore", sln, "--configfile", "NuGet.CSharp.config", *extra_options]
+    else:
+        cmd_args = ["msbuild", sln, "/t:restore", "/p:RestoreConfigFile=NuGet.CSharp.config", *extra_options]
 
     # set build directory based on build_dir arg
     native_dir = os.path.normpath(os.path.join(source_dir, build_dir))
-    ort_build_dir = '/p:OnnxRuntimeBuildDirectory="' + native_dir + '"'
+    ort_build_dir = "/p:OnnxRuntimeBuildDirectory=" + native_dir
 
-    # dotnet restore
-    cmd_args = ["dotnet", "restore", sln, "--configfile", "NuGet.CSharp.config"]
     run_subprocess(cmd_args, cwd=csharp_build_dir)
 
     # build csharp bindings and create nuget package for each config
     for config in configs:
-        if is_linux():
-            native_build_dir = os.path.join(native_dir, config)
-            cmd_args = [cmake_path, "-DCMAKE_INSTALL_PREFIX=./nuget-staging/usr/local", "-Pcmake_install.cmake"]
-            run_subprocess(cmd_args, cwd=native_build_dir)
-
-        configuration = '/p:Configuration="' + config + '"'
-
+        configuration = "/p:Configuration=" + config
         if not use_winml:
-            cmd_args = [
-                "dotnet",
+            cmd_args = ["dotnet"] if use_dotnet else []
+            cmd_args += [
                 "msbuild",
                 sln,
                 configuration,
                 package_name,
-                is_linux_build,
                 ort_build_dir,
                 enable_training_tests,
+                *extra_options,
             ]
+
             run_subprocess(cmd_args, cwd=csharp_build_dir)
         else:
             winml_interop_dir = os.path.join(source_dir, "csharp", "src", "Microsoft.AI.MachineLearning.Interop")
@@ -2147,7 +2173,7 @@ def build_nuget_package(
                 "msbuild",
                 winml_interop_project,
                 configuration,
-                '/p:Platform="Any CPU"',
+                "/p:Platform=Any CPU",
                 ort_build_dir,
                 "-restore",
             ]
@@ -2161,26 +2187,28 @@ def build_nuget_package(
                 # this path is setup by cmake/nuget_helpers.cmake for MSVC on Windows
                 nuget_exe = os.path.normpath(os.path.join(native_dir, config, "nuget_exe", "src", "nuget.exe"))
         else:
-            # user needs to make sure nuget is installed and can be found
-            nuget_exe = "nuget"
+            # `dotnet pack` is used on Linux
+            nuget_exe = "NugetExe_not_set"
 
         nuget_exe_arg = '/p:NugetExe="' + nuget_exe + '"'
 
-        cmd_args = [
-            "dotnet",
+        cmd_args = ["dotnet"] if use_dotnet else []
+        cmd_args += [
             "msbuild",
             "OnnxRuntime.CSharp.proj",
             target_name,
             package_name,
             configuration,
             execution_provider,
-            is_linux_build,
             ort_build_dir,
             nuget_exe_arg,
+            *extra_options,
         ]
-        cmd_args.extend(msbuild_extra_options)
+
         run_subprocess(cmd_args, cwd=csharp_build_dir)
 
+        log.info(f"nuget package was created in the {config} build output directory.")
+
 
 def run_csharp_tests(source_dir, build_dir, use_cuda, use_openvino, use_tensorrt, use_dnnl, enable_training_apis):
     # Currently only running tests on windows.
@@ -2270,7 +2298,9 @@ def generate_documentation(source_dir, build_dir, configs, validate):
             have_diff = False
 
             def diff_file(path, regenerate_qualifiers=""):
-                diff = subprocess.check_output(["git", "diff", path], cwd=source_dir).decode("utf-8")
+                diff = subprocess.check_output(["git", "diff", "--ignore-blank-lines", path], cwd=source_dir).decode(
+                    "utf-8"
+                )
                 if diff:
                     nonlocal have_diff
                     have_diff = True
@@ -2641,6 +2671,7 @@ def main():
                 enable_training_apis=args.enable_training_apis,
                 enable_rocm_profiling=args.enable_rocm_profiling,
             )
+
         if args.build_nuget:
             build_nuget_package(
                 cmake_path,
@@ -2654,7 +2685,6 @@ def main():
                 args.use_dnnl,
                 args.use_tvm,
                 args.use_winml,
-                args.use_snpe,
                 args.use_qnn,
                 args.enable_training_apis,
                 normalize_arg_list(args.msbuild_extra_options),
diff --git a/tools/ci_build/gen_def.py b/tools/ci_build/gen_def.py
index 9821f3a901c1a..b53fb33659120 100755
--- a/tools/ci_build/gen_def.py
+++ b/tools/ci_build/gen_def.py
@@ -67,7 +67,19 @@ def parse_arguments():
 
         # external symbols are removed, xnnpack ep will be created via the standard ORT API.
         # https://github.com/microsoft/onnxruntime/pull/11798
-        if c not in ("vitisai", "winml", "cuda", "rocm", "migraphx", "qnn", "snpe", "xnnpack", "cann", "dnnl"):
+        if c not in (
+            "vitisai",
+            "winml",
+            "cuda",
+            "rocm",
+            "migraphx",
+            "qnn",
+            "snpe",
+            "xnnpack",
+            "cann",
+            "dnnl",
+            "tensorrt",
+        ):
             file.write(f"#include <core/providers/{c}/{c}_provider_factory.h>\n")
     file.write("void* GetFunctionEntryByName(const char* name){\n")
     for symbol in symbols:
diff --git a/tools/ci_build/github/apple/assemble_ios_packaging_artifacts.sh b/tools/ci_build/github/apple/assemble_apple_packaging_artifacts.sh
similarity index 100%
rename from tools/ci_build/github/apple/assemble_ios_packaging_artifacts.sh
rename to tools/ci_build/github/apple/assemble_apple_packaging_artifacts.sh
diff --git a/tools/ci_build/github/apple/build_and_assemble_ios_pods.py b/tools/ci_build/github/apple/build_and_assemble_apple_pods.py
similarity index 82%
rename from tools/ci_build/github/apple/build_and_assemble_ios_pods.py
rename to tools/ci_build/github/apple/build_and_assemble_apple_pods.py
index d3443e6cb0f4d..006dc4c33ffce 100755
--- a/tools/ci_build/github/apple/build_and_assemble_ios_pods.py
+++ b/tools/ci_build/github/apple/build_and_assemble_apple_pods.py
@@ -32,13 +32,13 @@ def parse_args():
     parser.add_argument(
         "--build-dir",
         type=pathlib.Path,
-        default=REPO_DIR / "build" / "ios_framework",
+        default=REPO_DIR / "build" / "apple_framework",
         help="The build directory. This will contain the iOS framework build output.",
     )
     parser.add_argument(
         "--staging-dir",
         type=pathlib.Path,
-        default=REPO_DIR / "build" / "ios_pod_staging",
+        default=REPO_DIR / "build" / "apple_pod_staging",
         help="The staging directory. This will contain the iOS pod package files. "
         "The pod package files do not have dependencies on files in the build directory.",
     )
@@ -60,20 +60,20 @@ def parse_args():
 
     build_framework_group = parser.add_argument_group(
         title="iOS framework build arguments",
-        description="See the corresponding arguments in build_ios_framework.py for details.",
+        description="See the corresponding arguments in build_apple_framework.py for details.",
     )
 
     build_framework_group.add_argument("--include-ops-by-config")
     build_framework_group.add_argument(
-        "--build-settings-file", required=True, help="The positional argument of build_ios_framework.py."
+        "--build-settings-file", required=True, help="The positional argument of build_apple_framework.py."
     )
     build_framework_group.add_argument(
         "-b",
-        "--build-ios-framework-arg",
+        "--build-apple-framework-arg",
         action="append",
-        dest="build_ios_framework_extra_args",
+        dest="build_apple_framework_extra_args",
         default=[],
-        help="Pass an argument through to build_ios_framework.py. This may be specified multiple times.",
+        help="Pass an argument through to build_apple_framework.py. This may be specified multiple times.",
     )
 
     args = parser.parse_args()
@@ -101,27 +101,27 @@ def main():
 
     # build framework
     package_variant = PackageVariant[args.variant]
-    framework_info_file = build_dir / "framework_info.json"
+    framework_info_file = build_dir / "xcframework_info.json"
 
-    log.info("Building iOS framework.")
+    log.info("Building Apple framework.")
 
-    build_ios_framework_args = [
+    build_apple_framework_args = [
         sys.executable,
-        str(SCRIPT_DIR / "build_ios_framework.py"),
-        *args.build_ios_framework_extra_args,
+        str(SCRIPT_DIR / "build_apple_framework.py"),
+        *args.build_apple_framework_extra_args,
     ]
 
     if args.include_ops_by_config is not None:
-        build_ios_framework_args += ["--include_ops_by_config", args.include_ops_by_config]
+        build_apple_framework_args += ["--include_ops_by_config", args.include_ops_by_config]
 
-    build_ios_framework_args += ["--build_dir", str(build_dir), args.build_settings_file]
+    build_apple_framework_args += ["--build_dir", str(build_dir), args.build_settings_file]
 
-    run(build_ios_framework_args)
+    run(build_apple_framework_args)
 
     if args.test:
-        test_ios_packages_args = [
+        test_apple_packages_args = [
             sys.executable,
-            str(SCRIPT_DIR / "test_ios_packages.py"),
+            str(SCRIPT_DIR / "test_apple_packages.py"),
             "--fail_if_cocoapods_missing",
             "--framework_info_file",
             str(framework_info_file),
@@ -131,7 +131,7 @@ def main():
             package_variant.name,
         ]
 
-        run(test_ios_packages_args)
+        run(test_apple_packages_args)
 
     # assemble pods and then move them to their target locations (staging_dir/<pod_name>)
     staging_dir.mkdir(parents=True, exist_ok=True)
diff --git a/tools/ci_build/github/apple/build_ios_framework.py b/tools/ci_build/github/apple/build_apple_framework.py
similarity index 81%
rename from tools/ci_build/github/apple/build_ios_framework.py
rename to tools/ci_build/github/apple/build_apple_framework.py
index 7983581f07fd6..5137a0644b2e7 100644
--- a/tools/ci_build/github/apple/build_ios_framework.py
+++ b/tools/ci_build/github/apple/build_apple_framework.py
@@ -30,19 +30,17 @@ def _parse_build_settings(args):
 
     build_settings["build_osx_archs"] = build_settings_data.get("build_osx_archs", DEFAULT_BUILD_OSX_ARCHS)
 
-    build_params = []
     if "build_params" in build_settings_data:
-        build_params += build_settings_data["build_params"]
+        build_settings["build_params"] = build_settings_data["build_params"]
     else:
         raise ValueError("build_params is required in the build config file")
 
-    build_settings["build_params"] = build_params
     return build_settings
 
 
 # Build fat framework for all archs of a single sysroot
 # For example, arm64 and x86_64 for iphonesimulator
-def _build_for_ios_sysroot(
+def _build_for_apple_sysroot(
     build_config, intermediates_dir, base_build_command, sysroot, archs, build_dynamic_framework
 ):
     # paths of the onnxruntime libraries for different archs
@@ -54,7 +52,7 @@ def _build_for_ios_sysroot(
         build_dir_current_arch = os.path.join(intermediates_dir, sysroot + "_" + current_arch)
         build_command = [
             *base_build_command,
-            "--ios_sysroot=" + sysroot,
+            "--apple_sysroot=" + sysroot,
             "--osx_arch=" + current_arch,
             "--build_dir=" + build_dir_current_arch,
         ]
@@ -103,6 +101,20 @@ def _build_for_ios_sysroot(
     return framework_dir
 
 
+def _merge_framework_info_files(files, output_file):
+    merged_data = {}
+
+    for file in files:
+        with open(file) as f:
+            data = json.load(f)
+            for platform, values in data.items():
+                assert platform not in merged_data, f"Duplicate platform value: {platform}"
+                merged_data[platform] = values
+
+    with open(output_file, "w") as f:
+        json.dump(merged_data, f, indent=2)
+
+
 def _build_package(args):
     build_settings = _parse_build_settings(args)
     build_dir = os.path.abspath(args.build_dir)
@@ -110,20 +122,26 @@ def _build_package(args):
     # Temp dirs to hold building results
     intermediates_dir = os.path.join(build_dir, "intermediates")
     build_config = args.config
-    base_build_command = [sys.executable, BUILD_PY] + build_settings["build_params"] + ["--config=" + build_config]
-
-    if args.include_ops_by_config is not None:
-        base_build_command += ["--include_ops_by_config=" + str(args.include_ops_by_config.resolve())]
-
-    if args.path_to_protoc_exe is not None:
-        base_build_command += ["--path_to_protoc_exe=" + str(args.path_to_protoc_exe.resolve())]
 
     # build framework for individual sysroot
     framework_dirs = []
-    framework_info_path = ""
+    framework_info_files_to_merge = []
     public_headers_path = ""
     for sysroot in build_settings["build_osx_archs"]:
-        framework_dir = _build_for_ios_sysroot(
+        base_build_command = (
+            [sys.executable, BUILD_PY]
+            + build_settings["build_params"]["base"]
+            + build_settings["build_params"][sysroot]
+            + ["--config=" + build_config]
+        )
+
+        if args.include_ops_by_config is not None:
+            base_build_command += ["--include_ops_by_config=" + str(args.include_ops_by_config.resolve())]
+
+        if args.path_to_protoc_exe is not None:
+            base_build_command += ["--path_to_protoc_exe=" + str(args.path_to_protoc_exe.resolve())]
+
+        framework_dir = _build_for_apple_sysroot(
             build_config,
             intermediates_dir,
             base_build_command,
@@ -132,17 +150,20 @@ def _build_package(args):
             args.build_dynamic_framework,
         )
         framework_dirs.append(framework_dir)
-        # podspec and headers for each sysroot are the same, pick one of them
-        if not framework_info_path:
-            framework_info_path = os.path.join(os.path.dirname(framework_dir), "framework_info.json")
+
+        curr_framework_info_path = os.path.join(os.path.dirname(framework_dir), "framework_info.json")
+        framework_info_files_to_merge.append(curr_framework_info_path)
+
+        # headers for each sysroot are the same, pick one of them
+        if not public_headers_path:
             public_headers_path = os.path.join(os.path.dirname(framework_dir), "onnxruntime.framework", "Headers")
 
-    # create the folder for xcframework and copy the LICENSE and podspec file
+    # create the folder for xcframework and copy the LICENSE and framework_info.json file
     xcframework_dir = os.path.join(build_dir, "framework_out")
     pathlib.Path(xcframework_dir).mkdir(parents=True, exist_ok=True)
     shutil.copy(os.path.join(REPO_DIR, "LICENSE"), xcframework_dir)
     shutil.copytree(public_headers_path, os.path.join(xcframework_dir, "Headers"), dirs_exist_ok=True)
-    shutil.copy(framework_info_path, build_dir)
+    _merge_framework_info_files(framework_info_files_to_merge, os.path.join(build_dir, "xcframework_info.json"))
 
     # remove existing xcframework if any
     xcframework_path = os.path.join(xcframework_dir, "onnxruntime.xcframework")
@@ -171,7 +192,7 @@ def parse_args():
     parser.add_argument(
         "--build_dir",
         type=pathlib.Path,
-        default=os.path.join(REPO_DIR, "build/iOS_framework"),
+        default=os.path.join(REPO_DIR, "build/apple_framework"),
         help="Provide the root directory for build output",
     )
 
diff --git a/tools/ci_build/github/apple/c/assemble_c_pod_package.py b/tools/ci_build/github/apple/c/assemble_c_pod_package.py
index 14e7729610617..1d7647dd469db 100644
--- a/tools/ci_build/github/apple/c/assemble_c_pod_package.py
+++ b/tools/ci_build/github/apple/c/assemble_c_pod_package.py
@@ -28,8 +28,6 @@ def get_pod_config_file(package_variant: PackageVariant):
         return _script_dir / "onnxruntime-c.config.json"
     elif package_variant == PackageVariant.Mobile:
         return _script_dir / "onnxruntime-mobile-c.config.json"
-    elif package_variant == PackageVariant.Test:
-        return _script_dir / "onnxruntime-test-c.config.json"
     elif package_variant == PackageVariant.Training:
         return _script_dir / "onnxruntime-training-c.config.json"
     else:
@@ -49,7 +47,7 @@ def assemble_c_pod_package(
 
     :param staging_dir Path to the staging directory for the C/C++ pod files.
     :param pod_version C/C++ pod version.
-    :param framework_info_file Path to the framework_info.json file containing additional values for the podspec.
+    :param framework_info_file Path to the framework_info.json or xcframework_info.json file containing additional values for the podspec.
     :param public_headers_dir Path to the public headers directory to include in the pod.
     :param framework_dir Path to the onnxruntime framework directory to include in the pod.
     :param package_variant The pod package variant.
@@ -77,14 +75,16 @@ def assemble_c_pod_package(
     # generate the podspec file from the template
     variable_substitutions = {
         "DESCRIPTION": pod_config["description"],
-        "IOS_DEPLOYMENT_TARGET": framework_info["IOS_DEPLOYMENT_TARGET"],
+        # By default, we build both "iphoneos" and "iphonesimulator" architectures, and the deployment target should be the same between these two.
+        "IOS_DEPLOYMENT_TARGET": framework_info["iphonesimulator"]["APPLE_DEPLOYMENT_TARGET"],
+        "MACOSX_DEPLOYMENT_TARGET": framework_info.get("macosx", {}).get("APPLE_DEPLOYMENT_TARGET", ""),
         "LICENSE_FILE": "LICENSE",
         "NAME": pod_name,
         "ORT_C_FRAMEWORK": framework_dir.name,
         "ORT_C_HEADERS_DIR": public_headers_dir.name,
         "SUMMARY": pod_config["summary"],
         "VERSION": pod_version,
-        "WEAK_FRAMEWORK": framework_info["WEAK_FRAMEWORK"],
+        "WEAK_FRAMEWORK": framework_info["iphonesimulator"]["WEAK_FRAMEWORK"],
     }
 
     podspec_template = _script_dir / "c.podspec.template"
@@ -114,7 +114,7 @@ def parse_args():
         "--framework-info-file",
         type=pathlib.Path,
         required=True,
-        help="Path to the framework_info.json file containing additional values for the podspec. "
+        help="Path to the framework_info.json or xcframework_info.json file containing additional values for the podspec. "
         "This file should be generated by CMake in the build directory.",
     )
     parser.add_argument(
diff --git a/tools/ci_build/github/apple/c/c.podspec.template b/tools/ci_build/github/apple/c/c.podspec.template
index e0cbfe23608fc..a04f20b359229 100644
--- a/tools/ci_build/github/apple/c/c.podspec.template
+++ b/tools/ci_build/github/apple/c/c.podspec.template
@@ -6,7 +6,13 @@ Pod::Spec.new do |spec|
     spec.homepage               = "https://github.com/microsoft/onnxruntime"
     spec.source                 = { :http => "file:///http_source_placeholder" }
     spec.summary                = "@SUMMARY@"
-    spec.platform               = :ios, "@IOS_DEPLOYMENT_TARGET@"
+    spec.ios.deployment_target  = "@IOS_DEPLOYMENT_TARGET@"
+
+    macosx_deployment_target =  "@MACOSX_DEPLOYMENT_TARGET@"
+    if macosx_deployment_target != ""
+        spec.osx.deployment_target = macosx_deployment_target
+    end
+
     spec.vendored_frameworks    = "@ORT_C_FRAMEWORK@"
     spec.static_framework       = true
     spec.weak_framework         = [ @WEAK_FRAMEWORK@ ]
diff --git a/tools/ci_build/github/apple/c/onnxruntime-test-c.config.json b/tools/ci_build/github/apple/c/onnxruntime-test-c.config.json
deleted file mode 100644
index d55dbc63e057c..0000000000000
--- a/tools/ci_build/github/apple/c/onnxruntime-test-c.config.json
+++ /dev/null
@@ -1,5 +0,0 @@
-{
-    "name": "onnxruntime-test-c",
-    "summary": "TEST POD",
-    "description": "Pod for testing. Not for actual release."
-}
diff --git a/tools/ci_build/github/apple/coreml_supported_ops.md b/tools/ci_build/github/apple/coreml_supported_ops.md
index 959177bcb4d7b..e2e43587ab674 100644
--- a/tools/ci_build/github/apple/coreml_supported_ops.md
+++ b/tools/ci_build/github/apple/coreml_supported_ops.md
@@ -34,6 +34,8 @@ Keep in sync with doco generated from /docs/execution-providers/CoreML-Execution
 |ai.onnx:Shape|Attribute `start` with non-default value is not supported.<br/>Attribute `end` is not supported.|
 |ai.onnx:Sigmoid||
 |ai.onnx:Slice|Inputs `starts`, `ends`, `axes`, and `steps` should be constant. Empty slice is not supported.|
+|ai.onnx:Softmax||
+|ai.onnx:Split|If provided, `splits` should be constant. num of outputs supported is at least 2.|
 |ai.onnx:Squeeze||
 |ai.onnx:Sqrt||
 |ai.onnx:Sub||
diff --git a/tools/ci_build/github/apple/default_full_apple_framework_build_settings.json b/tools/ci_build/github/apple/default_full_apple_framework_build_settings.json
new file mode 100644
index 0000000000000..86b4efdc63750
--- /dev/null
+++ b/tools/ci_build/github/apple/default_full_apple_framework_build_settings.json
@@ -0,0 +1,37 @@
+{
+    "build_osx_archs": {
+        "iphoneos": [
+            "arm64"
+        ],
+        "iphonesimulator": [
+            "arm64",
+            "x86_64"
+        ],
+        "macosx": [
+            "arm64",
+            "x86_64"
+        ]
+    },
+    "build_params": {
+        "base": [
+            "--parallel",
+            "--use_xcode",
+            "--build_apple_framework",
+            "--use_coreml",
+            "--use_xnnpack",
+            "--skip_tests",
+            "--cmake_extra_defines=onnxruntime_BUILD_UNIT_TESTS=OFF"
+        ],
+        "macosx": [
+            "--apple_deploy_target=11.0"
+        ],
+        "iphoneos": [
+            "--ios",
+            "--apple_deploy_target=12.0"
+        ],
+        "iphonesimulator": [
+            "--ios",
+            "--apple_deploy_target=12.0"
+        ]
+    }
+}
diff --git a/tools/ci_build/github/apple/default_full_ios_framework_build_settings.json b/tools/ci_build/github/apple/default_full_ios_framework_build_settings.json
deleted file mode 100644
index 621af55fad7fa..0000000000000
--- a/tools/ci_build/github/apple/default_full_ios_framework_build_settings.json
+++ /dev/null
@@ -1,22 +0,0 @@
-{
-    "build_osx_archs": {
-        "iphoneos": [
-            "arm64"
-        ],
-        "iphonesimulator": [
-            "arm64",
-            "x86_64"
-        ]
-    },
-    "build_params": [
-        "--ios",
-        "--parallel",
-        "--use_xcode",
-        "--build_apple_framework",
-        "--use_coreml",
-        "--use_xnnpack",
-        "--skip_tests",
-        "--cmake_extra_defines=onnxruntime_BUILD_UNIT_TESTS=OFF",
-        "--apple_deploy_target=12.0"
-    ]
-}
diff --git a/tools/ci_build/github/apple/default_mobile_ios_framework_build_settings.json b/tools/ci_build/github/apple/default_mobile_ios_framework_build_settings.json
index 2738a7ca7b009..2bdf8de24f53c 100644
--- a/tools/ci_build/github/apple/default_mobile_ios_framework_build_settings.json
+++ b/tools/ci_build/github/apple/default_mobile_ios_framework_build_settings.json
@@ -8,19 +8,27 @@
             "x86_64"
         ]
     },
-    "build_params": [
-        "--ios",
-        "--parallel",
-        "--use_xcode",
-        "--build_apple_framework",
-        "--minimal_build=extended",
-        "--disable_rtti",
-        "--disable_ml_ops",
-        "--disable_exceptions",
-        "--enable_reduced_operator_type_support",
-        "--use_coreml",
-        "--skip_tests",
-        "--cmake_extra_defines=onnxruntime_BUILD_UNIT_TESTS=OFF",
-        "--apple_deploy_target=12.0"
-    ]
+    "build_params": {
+        "base": [
+            "--parallel",
+            "--use_xcode",
+            "--build_apple_framework",
+            "--minimal_build=extended",
+            "--disable_rtti",
+            "--disable_ml_ops",
+            "--disable_exceptions",
+            "--enable_reduced_operator_type_support",
+            "--use_coreml",
+            "--skip_tests",
+            "--cmake_extra_defines=onnxruntime_BUILD_UNIT_TESTS=OFF"
+        ],
+        "iphoneos": [
+            "--ios",
+            "--apple_deploy_target=12.0"
+        ],
+        "iphonesimulator": [
+            "--ios",
+            "--apple_deploy_target=12.0"
+        ]
+    }
 }
diff --git a/tools/ci_build/github/apple/default_training_ios_framework_build_settings.json b/tools/ci_build/github/apple/default_training_ios_framework_build_settings.json
index ec7fcafce04f2..f88934cd44a66 100644
--- a/tools/ci_build/github/apple/default_training_ios_framework_build_settings.json
+++ b/tools/ci_build/github/apple/default_training_ios_framework_build_settings.json
@@ -6,18 +6,33 @@
         "iphonesimulator": [
             "arm64",
             "x86_64"
+        ],
+        "macosx": [
+            "arm64",
+            "x86_64"
         ]
     },
-    "build_params": [
-        "--ios",
-        "--parallel",
-        "--use_xcode",
-        "--enable_training_apis",
-        "--build_apple_framework",
-        "--use_coreml",
-        "--use_xnnpack",
-        "--skip_tests",
-        "--cmake_extra_defines=onnxruntime_BUILD_UNIT_TESTS=OFF",
-        "--apple_deploy_target=12.0"
-    ]
+    "build_params": {
+        "base": [
+            "--parallel",
+            "--use_xcode",
+            "--enable_training_apis",
+            "--build_apple_framework",
+            "--use_coreml",
+            "--use_xnnpack",
+            "--skip_tests",
+            "--cmake_extra_defines=onnxruntime_BUILD_UNIT_TESTS=OFF"
+        ],
+        "iphoneos": [
+            "--ios",
+            "--apple_deploy_target=12.0"
+        ],
+        "iphonesimulator": [
+            "--ios",
+            "--apple_deploy_target=12.0"
+        ],
+        "macosx": [
+            "--apple_deploy_target=11.0"
+        ]
+    }
 }
diff --git a/tools/ci_build/github/apple/framework_info.json.template b/tools/ci_build/github/apple/framework_info.json.template
index 788e52302b3f1..b4c4fb8d16ebf 100644
--- a/tools/ci_build/github/apple/framework_info.json.template
+++ b/tools/ci_build/github/apple/framework_info.json.template
@@ -1,4 +1,6 @@
 {
-    "IOS_DEPLOYMENT_TARGET": "@CMAKE_OSX_DEPLOYMENT_TARGET@",
-    "WEAK_FRAMEWORK": "@APPLE_WEAK_FRAMEWORK@"
-}
\ No newline at end of file
+    "@CMAKE_OSX_SYSROOT@": {
+        "APPLE_DEPLOYMENT_TARGET": "@CMAKE_OSX_DEPLOYMENT_TARGET@",
+        "WEAK_FRAMEWORK": "@APPLE_WEAK_FRAMEWORK@"
+    }
+}
diff --git a/tools/ci_build/github/apple/objectivec/assemble_objc_pod_package.py b/tools/ci_build/github/apple/objectivec/assemble_objc_pod_package.py
index 135a55165beda..ec1feaae82175 100755
--- a/tools/ci_build/github/apple/objectivec/assemble_objc_pod_package.py
+++ b/tools/ci_build/github/apple/objectivec/assemble_objc_pod_package.py
@@ -119,7 +119,7 @@ def assemble_objc_pod_package(
 
     :param staging_dir Path to the staging directory for the Objective-C pod files.
     :param pod_version Objective-C pod version.
-    :param framework_info_file Path to the framework_info.json file containing additional values for the podspec.
+    :param framework_info_file Path to the framework_info.json or xcframework_info.json file containing additional values for the podspec.
     :param package_variant The pod package variant.
     :return Tuple of (package name, path to the podspec file).
     """
@@ -153,7 +153,7 @@ def path_patterns_as_variable_value(patterns: list[str]):
         "C_POD_NAME": c_pod_config["name"],
         "DESCRIPTION": pod_config["description"],
         "INCLUDE_DIR_LIST": path_patterns_as_variable_value(include_dirs),
-        "IOS_DEPLOYMENT_TARGET": framework_info["IOS_DEPLOYMENT_TARGET"],
+        "IOS_DEPLOYMENT_TARGET": framework_info["iphonesimulator"]["APPLE_DEPLOYMENT_TARGET"],
         "LICENSE_FILE": license_file,
         "NAME": pod_name,
         "PUBLIC_HEADER_FILE_LIST": path_patterns_as_variable_value(pod_files["public_header_files"]),
@@ -191,7 +191,7 @@ def parse_args():
         "--framework-info-file",
         type=pathlib.Path,
         required=True,
-        help="Path to the framework_info.json file containing additional values for the podspec. "
+        help="Path to the framework_info.json or xcframework_info.json file containing additional values for the podspec. "
         "This file should be generated by CMake in the build directory.",
     )
     parser.add_argument(
diff --git a/tools/ci_build/github/apple/package_assembly_utils.py b/tools/ci_build/github/apple/package_assembly_utils.py
index e5940774c54f9..bdf359df1dbb8 100644
--- a/tools/ci_build/github/apple/package_assembly_utils.py
+++ b/tools/ci_build/github/apple/package_assembly_utils.py
@@ -17,7 +17,6 @@ class PackageVariant(enum.Enum):
     Full = 0  # full ORT build with all opsets, ops, and types
     Mobile = 1  # minimal ORT build with reduced ops
     Training = 2  # full ORT build with all opsets, ops, and types, plus training APIs
-    Test = -1  # for testing purposes only
 
     @classmethod
     def release_variant_names(cls):
diff --git a/tools/ci_build/github/apple/test_ios_packages.py b/tools/ci_build/github/apple/test_apple_packages.py
similarity index 87%
rename from tools/ci_build/github/apple/test_ios_packages.py
rename to tools/ci_build/github/apple/test_apple_packages.py
index ff42e9615483a..6dc4868dac8a3 100644
--- a/tools/ci_build/github/apple/test_ios_packages.py
+++ b/tools/ci_build/github/apple/test_apple_packages.py
@@ -19,7 +19,7 @@
 REPO_DIR = SCRIPT_PATH.parents[4]
 
 
-def _test_ios_packages(args):
+def _test_apple_packages(args):
     # check if CocoaPods is installed
     if shutil.which("pod") is None:
         if args.fail_if_cocoapods_missing:
@@ -58,10 +58,10 @@ def _test_ios_packages(args):
             os.makedirs(stage_dir)
 
         # assemble the test project here
-        target_proj_path = stage_dir / "ios_package_test"
+        target_proj_path = stage_dir / "apple_package_test"
 
         # copy the test project source files to target_proj_path
-        test_proj_path = pathlib.Path(REPO_DIR, "onnxruntime/test/platform/ios/ios_package_test")
+        test_proj_path = pathlib.Path(REPO_DIR, "onnxruntime/test/platform/apple/apple_package_test")
         shutil.copytree(test_proj_path, target_proj_path)
 
         # assemble local pod files here
@@ -133,7 +133,7 @@ def _test_ios_packages(args):
                     "xcodebuild",
                     "test",
                     "-workspace",
-                    "./ios_package_test.xcworkspace",
+                    "./apple_package_test.xcworkspace",
                     "-scheme",
                     "ios_package_test",
                     "-destination",
@@ -144,6 +144,24 @@ def _test_ios_packages(args):
                 cwd=target_proj_path,
             )
 
+            if PackageVariant[args.variant] != PackageVariant.Mobile:
+                subprocess.run(
+                    [
+                        "xcrun",
+                        "xcodebuild",
+                        "test",
+                        "-workspace",
+                        "./apple_package_test.xcworkspace",
+                        "-scheme",
+                        "macos_package_test",
+                        "-destination",
+                        "platform=macos",
+                    ],
+                    shell=False,
+                    check=True,
+                    cwd=target_proj_path,
+                )
+
 
 def parse_args():
     parser = argparse.ArgumentParser(
@@ -161,7 +179,7 @@ def parse_args():
         "--framework_info_file",
         type=pathlib.Path,
         required=True,
-        help="Path to the framework_info.json file containing additional values for the podspec. "
+        help="Path to the framework_info.json or xcframework_info.json file containing additional values for the podspec. "
         "This file should be generated by CMake in the build directory.",
     )
 
@@ -172,7 +190,7 @@ def parse_args():
     parser.add_argument(
         "--variant",
         choices=PackageVariant.all_variant_names(),
-        default=PackageVariant.Test.name,
+        required=True,
         help="Pod package variant.",
     )
 
@@ -193,7 +211,7 @@ def parse_args():
 
 def main():
     args = parse_args()
-    _test_ios_packages(args)
+    _test_apple_packages(args)
 
 
 if __name__ == "__main__":
diff --git a/tools/ci_build/github/apple/use_ios_pods_with_custom_build.md b/tools/ci_build/github/apple/use_ios_pods_with_custom_build.md
index c01f0796db0fb..c8da2eff57c33 100644
--- a/tools/ci_build/github/apple/use_ios_pods_with_custom_build.md
+++ b/tools/ci_build/github/apple/use_ios_pods_with_custom_build.md
@@ -2,9 +2,9 @@
 
 If you require a custom build of ONNX Runtime, you can create CocoaPods pods with your custom build locally and use them from a Podfile.
 
-**Prerequisite** - The custom build must be able to be done with [build_ios_framework.py](./build_ios_framework.py).
+**Prerequisite** - The custom build must be able to be done with [build_apple_framework.py](./build_apple_framework.py).
 
-To do a custom build and create the pods, run [build_and_assemble_ios_pods.py](./build_and_assemble_ios_pods.py).
+To do a custom build and create the pods, run [build_and_assemble_apple_pods.py](./build_and_assemble_apple_pods.py).
 Use the `--help` argument to see more information.
 
 ## Example usage
@@ -15,7 +15,7 @@ Our custom build will use a custom reduced operator kernel config file: `/path/t
 
 Run the script:
 ```bash
-python3 tools/ci_build/github/apple/build_and_assemble_ios_pods.py \
+python3 tools/ci_build/github/apple/build_and_assemble_apple_pods.py \
   --staging-dir /path/to/staging/dir \
   --include-ops-by-config /path/to/custom.config \
   --build-settings-file tools/ci_build/github/apple/default_mobile_ios_framework_build_settings.json
diff --git a/tools/ci_build/github/azure-pipelines/android-arm64-v8a-QNN-crosscompile-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/android-arm64-v8a-QNN-crosscompile-ci-pipeline.yml
index bbab9f3d85abb..4ebc6ea510ed8 100644
--- a/tools/ci_build/github/azure-pipelines/android-arm64-v8a-QNN-crosscompile-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/android-arm64-v8a-QNN-crosscompile-ci-pipeline.yml
@@ -74,8 +74,8 @@ jobs:
         --build_dir build_qnn \
         --android_sdk_path $ANDROID_HOME \
         --android_ndk_path $ANDROID_NDK_HOME \
-        --android_abi=arm64-v8a \
-        --android_api=30 \
+        --android_abi=x86_64 \
+        --android_api=31 \
         --parallel \
         --use_qnn \
         --qnn_home $(QNN_SDK_ROOT) \
@@ -85,48 +85,36 @@ jobs:
 
   - script: |
       mkdir -p build_qnn/Release/testdata/QNN/node_tests
-      cp -r cmake/external/onnx//onnx/backend/test/data/node/test_basic_conv_with_padding build_qnn/Release/testdata/QNN/node_tests
+      cp -r cmake/external/onnx/onnx/backend/test/data/node/test_basic_conv_with_padding build_qnn/Release/testdata/QNN/node_tests
     displayName: Initialize test directories
 
   - task: JavaToolInstaller@0
-    displayName: Use jdk 8
+    displayName: Use jdk 11
     inputs:
-      versionSpec: '8'
+      versionSpec: '11'
       jdkArchitectureOption: 'x64'
       jdkSourceOption: 'PreInstalled'
 
-  - script: |
-      python3 tools/python/run_android_emulator.py \
-      --android-sdk-root ${ANDROID_SDK_ROOT} \
-      --create-avd --system-image "system-images;android-30;google_apis;arm64-v8a" \
-      --start --emulator-extra-args="-partition-size 4096" \
-      --emulator-pid-file $(Build.BinariesDirectory)/emulator.pid
-    displayName: Start Android emulator
-    enabled: false
-    continueOnError: true
+  # This is commented out for now. The emulator runs correctly, onnx_test_runner is executable, and the test passes
+  # with the CPU EP but returns 139 when attempting to use the QNN EP. Maybe some QNN EP parameters need to be provided?
+  #
+  # - template: templates/use-android-emulator.yml
+  #   parameters:
+  #     create: true
+  #     start: true
 
-  - script: |
-      $ANDROID_SDK_ROOT/emulator/emulator -list-avds
-    displayName: List emulators
+  # - task: CmdLine@2
+  #   inputs:
+  #     script: |
+  #       set -e -x
+  #       ${ANDROID_SDK_ROOT}/platform-tools/adb push onnx_test_runner /data/local/tmp/
+  #       ${ANDROID_SDK_ROOT}/platform-tools/adb push testdata/QNN/node_tests/test_basic_conv_with_padding /data/local/tmp/
+  #       ${ANDROID_SDK_ROOT}/platform-tools/adb shell "chmod +x /data/local/tmp/onnx_test_runner"
+  #       ${ANDROID_SDK_ROOT}/platform-tools/adb shell "/data/local/tmp/onnx_test_runner -v /data/local/tmp/test_basic_conv_with_padding"
+  #       ${ANDROID_SDK_ROOT}/platform-tools/adb shell "/data/local/tmp/onnx_test_runner -v -e qnn /data/local/tmp/test_basic_conv_with_padding"
+  #     workingDirectory: build_qnn/Release
+  #   displayName: Run test_basic_conv_with_padding on emulator
 
-  - task: CmdLine@2
-    inputs:
-      script: |
-        ${ANDROID_SDK_ROOT}/platform-tools/adb shell "mkdir /data/local/tmp/qnn"
-        ${ANDROID_SDK_ROOT}/platform-tools/adb push onnx_test_runner /data/local/tmp/qnn
-        ${ANDROID_SDK_ROOT}/platform-tools/adb push testdata/QNN/node_tests/test_basic_conv_with_padding /data/local/tmp/qnn
-        ${ANDROID_SDK_ROOT}/platform-tools/adb shell "/data/local/tmp/qnn/onnx_test_runner -e qnn /data/local/tmp/qnn/test_basic_conv_with_padding"
-      workingDirectory: build_qnn/Release
-    displayName: Run tests
-    enabled: false
-    continueOnError: true
-
-  - script: |
-        python3 tools/python/run_android_emulator.py \
-          --android-sdk-root ${ANDROID_SDK_ROOT} \
-          --stop \
-          --emulator-pid-file $(Build.BinariesDirectory)/emulator.pid
-    displayName: Stop Android emulator
-    condition: always()
-    enabled: false
-    continueOnError: true
+  # - template: templates/use-android-emulator.yml
+  #   parameters:
+  #     stop: true
diff --git a/tools/ci_build/github/azure-pipelines/android-x86_64-crosscompile-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/android-x86_64-crosscompile-ci-pipeline.yml
index f6f6f52440534..9136b21aec626 100644
--- a/tools/ci_build/github/azure-pipelines/android-x86_64-crosscompile-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/android-x86_64-crosscompile-ci-pipeline.yml
@@ -257,13 +257,10 @@ stages:
 
       - template: "templates/use-android-ndk.yml"
 
-      - script: |
-          python3 tools/python/run_android_emulator.py \
-          --android-sdk-root ${ANDROID_SDK_ROOT} \
-          --create-avd --system-image "system-images;android-31;default;x86_64" \
-          --start --emulator-extra-args="-partition-size 4096" \
-          --emulator-pid-file $(Build.BinariesDirectory)/emulator.pid
-        displayName: Start Android emulator
+      - template: templates/use-android-emulator.yml
+        parameters:
+          create: true
+          start: true
 
       - script: |
           python3 tools/ci_build/build.py \
@@ -277,13 +274,9 @@ stages:
           --test
         displayName: CPU EP, Test on Android Emulator
 
-      - script: |
-          python3 tools/python/run_android_emulator.py \
-            --android-sdk-root ${ANDROID_SDK_ROOT} \
-            --stop \
-            --emulator-pid-file $(Build.BinariesDirectory)/emulator.pid
-        displayName: Stop Android emulator
-        condition: always()
+      - template: templates/use-android-emulator.yml
+        parameters:
+          stop: true
 
       - template: templates/clean-agent-build-directory-step.yml
 
@@ -329,13 +322,10 @@ stages:
 
       - template: "templates/use-android-ndk.yml"
 
-      - script: |
-          python3 tools/python/run_android_emulator.py \
-          --android-sdk-root ${ANDROID_SDK_ROOT} \
-          --create-avd --system-image "system-images;android-31;default;x86_64" \
-          --start --emulator-extra-args="-partition-size 4096" \
-          --emulator-pid-file $(Build.BinariesDirectory)/emulator.pid
-        displayName: Start Android emulator
+      - template: templates/use-android-emulator.yml
+        parameters:
+          create: true
+          start: true
 
       - script: |
           python3 tools/ci_build/build.py \
@@ -358,13 +348,10 @@ stages:
         # Build Minimal ORT with NNAPI and reduced Ops, run unit tests on Android Emulator
         displayName: Build Minimal ORT with NNAPI and run tests
 
-      - script: |
-          python3 tools/python/run_android_emulator.py \
-            --android-sdk-root ${ANDROID_SDK_ROOT} \
-            --stop \
-            --emulator-pid-file $(Build.BinariesDirectory)/emulator.pid
-        displayName: Stop Android emulator
-        condition: always()
+      - template: templates/use-android-emulator.yml
+        parameters:
+          stop: true
+
       - template: templates/clean-agent-build-directory-step.yml
 
 - stage: MASTER_BUILD_STAGE
@@ -415,13 +402,10 @@ stages:
             $(Build.SourcesDirectory)/protobuf_install
         displayName: Build Host Protoc
 
-      - script: |
-          python3 tools/python/run_android_emulator.py \
-          --android-sdk-root ${ANDROID_SDK_ROOT} \
-          --create-avd --system-image "system-images;android-31;default;x86_64" \
-          --start --emulator-extra-args="-partition-size 4096" \
-          --emulator-pid-file $(Build.BinariesDirectory)/emulator.pid
-        displayName: Start Android emulator
+      - template: templates/use-android-emulator.yml
+        parameters:
+          create: true
+          start: true
 
       - script: |
           python3 tools/ci_build/build.py \
@@ -460,13 +444,10 @@ stages:
         # Build Minimal ORT with NNAPI and reduced Ops, run unit tests on Android Emulator
         displayName: Build Minimal ORT with NNAPI and run tests
 
-      - script: |
-          python3 tools/python/run_android_emulator.py \
-            --android-sdk-root ${ANDROID_SDK_ROOT} \
-            --stop \
-            --emulator-pid-file $(Build.BinariesDirectory)/emulator.pid
-        displayName: Stop Android emulator
-        condition: always()
+      - template: templates/use-android-emulator.yml
+        parameters:
+          stop: true
+
       - template: templates/clean-agent-build-directory-step.yml
 
   - job: Update_Dashboard
diff --git a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml
index fdd8c09333737..db1dcc3af792e 100644
--- a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml
+++ b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml
@@ -60,6 +60,14 @@ parameters:
   type: string
   default: '--use_azure'
 
+- name: CudaVersion
+  displayName: CUDA version
+  type: string
+  default: '11.8'
+  values:
+    - 11.8
+    - 12.2
+
 resources:
   repositories:
   - repository: onnxruntime-inference-examples # The name used to reference this repository in the checkout step
@@ -80,6 +88,8 @@ stages:
 - stage: Setup
   jobs:
   - job: Set_Variables
+    pool:
+      vmImage: ubuntu-latest
     steps:
     - checkout: none
     - bash: |
@@ -100,6 +110,8 @@ stages:
   dependsOn: Setup
   jobs:
   - job: D1
+    pool:
+      vmImage: ubuntu-latest
     variables:
       MyVar: $[stageDependencies.Setup.Set_Variables.outputs['Set_Release_Version_Suffix.ReleaseVersionSuffix']]
     steps:
@@ -142,7 +154,13 @@ stages:
     timeoutInMinutes: 120
     pool: 'Onnxruntime-Linux-GPU'
     variables:
-      CUDA_VERSION: '11.8'
+      - name: CUDA_VERSION_MAJOR
+        ${{ if eq(parameters.CudaVersion, '11.8') }}:
+          value: '11'
+        ${{ if eq(parameters.CudaVersion, '12.2') }}:
+          value: '12'
+      - name: CUDA_VERSION
+        value: ${{ parameters.CudaVersion }}
     steps:
     - template: templates/set-version-number-variables-step.yml
     - template: templates/get-docker-image-steps.yml
@@ -150,7 +168,7 @@ stages:
         Dockerfile: tools/ci_build/github/linux/docker/inference/x64/default/gpu/Dockerfile
         Context: tools/ci_build/github/linux/docker/inference/x64/default/gpu
         DockerBuildArgs: "--build-arg BUILD_UID=$( id -u )"
-        Repository: onnxruntimecuda11centosbuild
+        Repository: onnxruntimecuda$(CUDA_VERSION_MAJOR)build
 
     - script: $(Build.SourcesDirectory)/tools/ci_build/github/linux/build_cuda_c_api_package.sh
       workingDirectory: $(Build.SourcesDirectory)
@@ -193,11 +211,11 @@ stages:
     DoCompliance: ${{ parameters.DoCompliance }}
     DoEsrp: ${{ parameters.DoEsrp }}
     stage_name_suffix: gpu
-    EnvSetupScript: setup_env_cuda_11.bat
+    EnvSetupScript: setup_env_cuda.bat
     buildArch: x64
     msbuildPlatform: x64
     packageName: x64-cuda
-    buildparameter: --use_cuda --cuda_version=11.8 --cuda_home=$(Agent.TempDirectory)\v11.8 --enable_onnx_tests --enable_wcos --build_java --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" ${{parameters.AdditionalBuildFlag}}
+    buildparameter: --use_cuda --cuda_home=$(Agent.TempDirectory)\v11.8 --enable_onnx_tests --enable_wcos --build_java --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" ${{parameters.AdditionalBuildFlag}}
     runTests: ${{ parameters.RunOnnxRuntimeTests }}
     buildJava: true
     java_artifact_id: onnxruntime_gpu
@@ -213,7 +231,7 @@ stages:
     buildArch: x64
     msbuildPlatform: x64
     packageName: x64-tensorrt
-    buildparameter: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.6.1.6.Windows10.x86_64.cuda-11.8" --cuda_version=11.8 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8"  --enable_onnx_tests --enable_wcos --build_java --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80"
+    buildparameter: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.6.1.6.Windows10.x86_64.cuda-11.8"  --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8"  --enable_onnx_tests --enable_wcos --build_java --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80"
     runTests: ${{ parameters.RunOnnxRuntimeTests }}
     buildJava: true
     java_artifact_id: onnxruntime_gpu
@@ -376,7 +394,7 @@ stages:
     - task: BatchScript@1
       displayName: 'setup env'
       inputs:
-        filename: '$(Build.SourcesDirectory)\tools\ci_build\github\windows\setup_env_cuda_11.bat'
+        filename: '$(Build.SourcesDirectory)\tools\ci_build\github\windows\setup_env_cuda.bat'
         modifyEnvironment: true
         workingFolder: '$(Build.BinariesDirectory)'
 
@@ -488,13 +506,13 @@ stages:
         Steps:
         - script: |
             tools/ci_build/get_docker_image.py \
-              --dockerfile tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda11_8_tensorrt8_6 \
+              --dockerfile tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda \
               --context tools/ci_build/github/linux/docker \
-              --docker-build-args "--network=host --build-arg POLICY=manylinux_2_28 --build-arg PLATFORM=x86_64 --build-arg PREPEND_PATH=/usr/local/cuda/bin --build-arg LD_LIBRARY_PATH_ARG=/usr/local/lib64 --build-arg DEVTOOLSET_ROOTPATH=/usr --build-arg BUILD_UID=$( id -u ) --build-arg BUILD_UID=$( id -u )" \
+              --docker-build-args "--network=host --build-arg BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubi8 --build-arg TRT_VERSION=8.6.1.6-1.cuda11.8 --build-arg BUILD_UID=$( id -u )" \
               --container-registry onnxruntimebuildcache \
               --multiple_repos \
               --repository onnxruntimecuda118xtrt86build
-          displayName: "Get onnxruntimecuda118xtrt86build image for tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda11_8_tensorrt8_6"
+          displayName: "Get onnxruntimecuda118xtrt86build image for tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda"
           workingDirectory: $(Build.SourcesDirectory)/onnxruntime
         ContainerRegistry: onnxruntimebuildcache
 
@@ -655,7 +673,7 @@ stages:
       clean: all
     # we need to use the 2022 pool to create the nuget package with both pre-net6+Xamarin and net6 targets.
     # VS2019 has no support for net6 and we need to use msbuild (from the VS install) to do the packing
-    pool: 'Azure-Pipelines-EO-Windows2022-aiinfra'
+    pool: 'Onnxruntime-Win-CPU-2022'
     variables:
       breakCodesignValidationInjection: ${{ parameters.DoEsrp }}
       ReleaseVersionSuffix: $[stageDependencies.Setup.Set_Variables.outputs['Set_Release_Version_Suffix.ReleaseVersionSuffix']]
@@ -716,44 +734,29 @@ stages:
         versionSpec: 6.2.1
 
     - task: PowerShell@2
-      displayName: Install .NET 6 workloads
+      displayName: Install mobile workloads
       inputs:
         targetType: 'inline'
         script: |
-          dotnet workload install android ios macos
-        workingDirectory: '$(Build.SourcesDirectory)\csharp'
-
-    - task: PowerShell@2
-      displayName: Build .NET 6 targets using dotnet
-      inputs:
-        targetType: 'inline'
-        # we don't specify 'Any CPU' as the platform here because if we do it gets added to the output path
-        #   e.g. csharp\src\Microsoft.ML.OnnxRuntime\bin\Any CPU\RelWithDebInfo\net6.0-ios\
-        # which is inconsistent with the msbuild output path for the pre-.net6 targets
-        #   e.g. csharp\src\Microsoft.ML.OnnxRuntime\bin\RelWithDebInfo\monoandroid11.0
-        # and makes it harder to do the packing
-        #
-        # 'Any CPU' is the default (first 'mixed' platform specified in the csproj) so this should be fine.
-        script: |
-          dotnet build .\src\Microsoft.ML.OnnxRuntime\Microsoft.ML.OnnxRuntime.csproj -p:SelectedTargets=Net6 -p:Configuration=RelWithDebInfo -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId="Microsoft.ML.OnnxRuntime.Gpu" -p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} -p:ReleaseVersionSuffix=$(ReleaseVersionSuffix)
+          dotnet workload install android ios
         workingDirectory: '$(Build.SourcesDirectory)\csharp'
 
     - task: MSBuild@1
-      displayName: 'Restore NuGet Packages and create project.assets.json for pre-.net6 targets'
+      displayName: 'Restore NuGet Packages and create project.assets.json'
       inputs:
         solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln'
         platform: 'Any CPU'
         configuration: RelWithDebInfo
-        msbuildArguments: '-t:restore -p:SelectedTargets=PreNet6 -p:OrtPackageId="Microsoft.ML.OnnxRuntime.Gpu"'
+        msbuildArguments: '-t:restore -p:OrtPackageId="Microsoft.ML.OnnxRuntime.Gpu"'
         workingDirectory: '$(Build.SourcesDirectory)\csharp'
 
     - task: MSBuild@1
-      displayName: 'Build C# for pre-.net6 targets'
+      displayName: 'Build C# bindings'
       inputs:
         solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln'
         configuration: RelWithDebInfo
         platform: 'Any CPU'
-        msbuildArguments: '-p:SelectedTargets=PreNet6 -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId="Microsoft.ML.OnnxRuntime.Gpu" -p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} -p:ReleaseVersionSuffix=$(ReleaseVersionSuffix)'
+        msbuildArguments: '-p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId="Microsoft.ML.OnnxRuntime.Gpu" -p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} -p:ReleaseVersionSuffix=$(ReleaseVersionSuffix)'
         workingDirectory: '$(Build.SourcesDirectory)\csharp'
 
     - template: templates/win-esrp-dll.yml
@@ -762,15 +765,6 @@ stages:
         DisplayName: 'ESRP - Sign C# dlls'
         DoEsrp: ${{ parameters.DoEsrp }}
 
-    - task: MSBuild@1
-      displayName: Update projects.assets.json with combined list of all target frameworks
-      inputs:
-        solution: '$(Build.SourcesDirectory)\csharp\src\Microsoft.ML.OnnxRuntime\Microsoft.ML.OnnxRuntime.csproj'
-        platform: 'Any CPU'
-        configuration: RelWithDebInfo
-        msbuildArguments: '-t:restore -p:SelectedTargets=All -p:OrtPackageId=Microsoft.ML.OnnxRuntime.Gpu'
-        workingDirectory: '$(Build.SourcesDirectory)\csharp'
-
     - task: MSBuild@1
       displayName: 'Build Nuget Packages'
       inputs:
@@ -864,7 +858,7 @@ stages:
       clean: all
     # we need to use the 2022 pool to create the nuget package with both pre-net6+Xamarin and net6 targets.
     # VS2019 has no support for net6 and we need to use msbuild (from the VS install) to do the packing
-    pool: 'Azure-Pipelines-EO-Windows2022-aiinfra'
+    pool: 'Onnxruntime-Win-CPU-2022'
     variables:
       breakCodesignValidationInjection: ${{ parameters.DoEsrp }}
       ReleaseVersionSuffix: $[stageDependencies.Setup.Set_Variables.outputs['Set_Release_Version_Suffix.ReleaseVersionSuffix']]
@@ -1114,7 +1108,6 @@ stages:
     DoNugetPack : 'true'
     DoCompliance: 'false'
     DoEsrp: ${{ parameters.DoEsrp }}
-    OrtPackageId: 'Microsoft.ML.OnnxRuntime.DirectML'
     NuPackScript: |
      msbuild $(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.proj /p:Configuration=RelWithDebInfo /t:CreatePackage /p:OrtPackageId=Microsoft.ML.OnnxRuntime.DirectML /p:IsReleaseBuild=${{ parameters.IsReleaseBuild }}
      copy $(Build.SourcesDirectory)\csharp\src\Microsoft.ML.OnnxRuntime\bin\RelWithDebInfo\*.nupkg $(Build.ArtifactStagingDirectory)
diff --git a/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml
new file mode 100644
index 0000000000000..8a9592282cd46
--- /dev/null
+++ b/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml
@@ -0,0 +1,175 @@
+parameters:
+  - name: RunOnnxRuntimeTests
+    displayName: Run Tests?
+    type: boolean
+    default: true
+
+  - name: UseIncreasedTimeoutForTests
+    displayName: Increase timeout for tests? Set it to false if you are doing an Onnx Runtime release.
+    type: boolean
+    default: false
+
+  - name: DoCompliance
+    displayName: Run Compliance Tasks?
+    type: boolean
+    default: true
+
+  - name: DoEsrp
+    displayName: Run code sign tasks? Must be true if you are doing an ONNX Runtime release
+    type: boolean
+    default: true
+
+  - name: IsReleaseBuild
+    displayName: Is a release build? Set it to true if you are doing an ONNX Runtime release.
+    type: boolean
+    default: false
+
+  - name: PreReleaseVersionSuffixString
+    displayName: Suffix added to pre-release package version. Only used if IsReleaseBuild is true. Denotes the type of pre-release package.
+    type: string
+    values:
+      - alpha
+      - beta
+      - rc
+      - none
+    default: none
+
+  - name: PreReleaseVersionSuffixNumber
+    displayName: Number added to pre-release package version. Only used if IsReleaseBuild is true. Denotes the sequence of a pre-release package.
+    type: number
+    default: 0
+
+  # these 2 parameters are used for debugging.
+  - name: SpecificArtifact
+    displayName: Use Specific Artifact (Debugging only)
+    type: boolean
+    default: false
+
+  - name: BuildId
+    displayName: Pipeline BuildId, you could find it in the URL
+    type: string
+    default: '0'
+
+  - name: CudaVersion
+    displayName: CUDA version
+    type: string
+    default: '12.2'
+    values:
+      - 11.8
+      - 12.2
+
+variables:
+  - name: ReleaseVersionSuffix
+    value: ''
+  - name: docker_base_image
+    ${{ if eq(parameters.CudaVersion, '11.8') }}:
+      value: nvidia/cuda:11.8.0-cudnn8-devel-ubi8
+    ${{ if eq(parameters.CudaVersion, '12.2') }}:
+      value: nvidia/cuda:12.2.2-cudnn8-devel-ubi8
+  - name: linux_trt_version
+    ${{ if eq(parameters.CudaVersion, '11.8') }}:
+      value: 8.6.1.6-1.cuda11.8
+    ${{ if eq(parameters.CudaVersion, '12.2') }}:
+      value: 8.6.1.6-1.cuda12.0
+  - name: win_trt_home
+    ${{ if eq(parameters.CudaVersion, '11.8') }}:
+      value: $(Agent.TempDirectory)\TensorRT-8.6.1.6.Windows10.x86_64.cuda-11.8
+    ${{ if eq(parameters.CudaVersion, '12.2') }}:
+      value: $(Agent.TempDirectory)\TensorRT-8.6.1.6.Windows10.x86_64.cuda-12.0
+  - name: win_cuda_home
+    ${{ if eq(parameters.CudaVersion, '11.8') }}:
+      value: $(Agent.TempDirectory)\v11.8
+    ${{ if eq(parameters.CudaVersion, '12.2') }}:
+      value: $(Agent.TempDirectory)\v12.2
+resources:
+  repositories:
+    - repository: onnxruntime-inference-examples # The name used to reference this repository in the checkout step
+      type: github
+      endpoint: ort-examples
+      name: microsoft/onnxruntime-inference-examples
+    - repository: manylinux
+      type: Github
+      endpoint: Microsoft
+      name: pypa/manylinux
+      ref: 5eda9aded5462201e6310105728d33016e637ea7
+
+stages:
+# Set ReleaseVersionSuffix
+  - stage: Set_ReleaseVersionSuffix
+    jobs:
+      - job: Set_Variables
+        pool:
+          vmImage: ubuntu-latest
+        steps:
+          - checkout: none
+          - bash: |
+              # Do not output ##vso[] commands with `set -x` or they may be parsed again and include a trailing quote.
+              set +x
+              if [[ "${{ parameters.IsReleaseBuild }}" = True && "${{ parameters.PreReleaseVersionSuffixString }}" != "none"  ]]; then
+                if [[ "${{ parameters.PreReleaseVersionSuffixNumber }}" -eq 0 ]]; then
+                  echo "##vso[task.setvariable variable=ReleaseVersionSuffix;isOutput=true]-${{ parameters.PreReleaseVersionSuffixString }}"
+                else
+                  echo "##vso[task.setvariable variable=ReleaseVersionSuffix;isOutput=true]-${{ parameters.PreReleaseVersionSuffixString }}.${{ parameters.PreReleaseVersionSuffixNumber }}"
+                fi
+              else
+                echo "##vso[task.setvariable variable=ReleaseVersionSuffix;isOutput=true]"
+              fi
+            name: Set_Release_Version_Suffix
+          - bash: echo $(ReleaseVersionSuffix)
+            name: Debug_Release_Version_Suffix
+  # this is needed for certain artifacts to be published
+  - stage: Linux_C_API_Packaging_CPU_x64
+    dependsOn: [ ]
+    jobs:
+    - template: templates/c-api-linux-cpu.yml
+      parameters:
+        BaseImage: 'registry.access.redhat.com/ubi8/ubi'
+        OnnxruntimeArch: 'x64'
+        OnnxruntimeCFlags: '-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all'
+        OnnxruntimeCXXFlags: '-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all'
+        OnnxruntimeNodejsBindingArch: 'x64'
+        PoolName: 'onnxruntime-Ubuntu2004-AMD-CPU'
+        PackageJava: false
+        PackageNodeJS: false
+  # Nuget Packaging
+
+  - template: stages/nuget-linux-cuda-packaging-stage.yml
+    parameters:
+      CudaVersion: ${{ parameters.CudaVersion }}
+      docker_base_image: ${{ variables.docker_base_image }}
+      linux_trt_version: ${{ variables.linux_trt_version }}
+  - template: stages/nuget-win-cuda-packaging-stage.yml
+    parameters:
+      RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }}
+      UseIncreasedTimeoutForTests: ${{ parameters.UseIncreasedTimeoutForTests }}
+      CudaVersion: ${{ parameters.CudaVersion }}
+      win_trt_home: ${{ variables.win_trt_home }}
+      win_cuda_home: ${{ variables.win_cuda_home }}
+  - template: stages/nuget-combine-cuda-stage.yml
+    parameters:
+      DoCompliance: ${{ parameters.DoCompliance }}
+      DoEsrp: ${{ parameters.DoEsrp }}
+      IsReleaseBuild: ${{ parameters.IsReleaseBuild }}
+  # Testing
+  ## Windows GPU Testing
+  - template: nuget/templates/test_win.yml
+    parameters:
+      AgentPool: 'onnxruntime-Win2022-GPU-T4'
+      NugetPackageName: 'Microsoft.ML.OnnxRuntime.Gpu'
+      ArtifactSuffix: 'GPU'
+      StageSuffix: 'GPU'
+      Skipx86Tests: 'true'
+      CudaVersion: ${{ parameters.CudaVersion }}
+  ## Linux GPU Testing
+  - template: nuget/templates/test_linux.yml
+    parameters:
+      AgentPool: Onnxruntime-Linux-GPU
+      ArtifactSuffix: 'GPU'
+      StageSuffix: 'GPU'
+      NugetPackageName: 'Microsoft.ML.OnnxRuntime.Gpu'
+      SpecificArtifact: ${{ parameters.specificArtifact }}
+      CudaVersion: ${{ parameters.CudaVersion }}
+      BuildId: ${{ parameters.BuildId }}
+
+## Win/Linux GPU Combined Publishing
+#- template: templates/publish-nuget.yml
diff --git a/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml
index 33fc9d94bac09..f46febee178e1 100644
--- a/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml
@@ -56,14 +56,23 @@ stages:
         clean: true
         submodules: none
 
-      - task: NodeTool@0
+      - task: DownloadPackage@1
+        displayName: 'Download ARM64 GCC'
         inputs:
-          versionSpec: '16.x'
+          packageType: upack
+          feed: '/7424c8e4-5c62-490e-95c4-79446f31017c'
+          definition: 'gcc_aarch64_linux_gnu_host_x86_64'
+          version: 13.2.1
+          downloadPath: $(Build.BinariesDirectory)/gcc
 
-      - task: UsePythonVersion@0
+      - task: DownloadPackage@1
+        displayName: 'Download ARM32 GCC'
         inputs:
-          versionSpec: '3.8'
-          addToPath: true
+          packageType: upack
+          feed: '/7424c8e4-5c62-490e-95c4-79446f31017c'
+          definition: 'gcc_aarch32_linux_gnu_host_x86_64'
+          version: 13.2.1
+          downloadPath: $(Build.BinariesDirectory)/gcc
 
       - template: templates/get-docker-image-steps.yml
         parameters:
@@ -72,6 +81,45 @@ stages:
           DockerBuildArgs: "--build-arg BUILD_UID=$( id -u ) --build-arg BASEIMAGE=registry.access.redhat.com/ubi8/ubi"
           Repository: onnxruntimecpubuild
 
+      - task: PythonScript@0
+        displayName: 'Update deps.txt'
+        inputs:
+          scriptPath: $(Build.SourcesDirectory)/tools/ci_build/replace_urls_in_deps.py
+          arguments: --new_dir $(Build.BinariesDirectory)/deps
+          workingDirectory: $(Build.BinariesDirectory)
+          pythonInterpreter: /usr/bin/python3
+
+      - script: |
+          set -e -x
+          # ARM64 build
+          mkdir -p $(Build.BinariesDirectory)/gccbin
+          tar -Jxf $(Build.BinariesDirectory)/gcc/arm-gnu-toolchain-13.2.rel1-x86_64-aarch64-none-linux-gnu.tar.xz --strip=1 -C $(Build.BinariesDirectory)/gccbin
+          export PATH=$(Build.BinariesDirectory)/gccbin/bin:$PATH
+          mkdir $(Build.BinariesDirectory)/aarch64build
+          cd $(Build.BinariesDirectory)/aarch64build
+          cmake $(Build.SourcesDirectory)/cmake -Donnxruntime_ENABLE_CPUINFO=OFF -DPython_EXECUTABLE=/usr/bin/python3 -DPYTHON_EXECUTABLE=/usr/bin/python3 -DCMAKE_BUILD_TYPE=Debug -DCMAKE_TOOLCHAIN_FILE=$(Build.SourcesDirectory)/cmake/linux_arm64_crosscompile_toolchain.cmake -G Ninja
+          ninja
+          rm -rf $(Build.BinariesDirectory)/aarch64build $(Build.BinariesDirectory)/gccbin
+          # ARM32 build
+          mkdir -p $(Build.BinariesDirectory)/gccbin
+          tar -Jxf $(Build.BinariesDirectory)/gcc/arm-gnu-toolchain-13.2.rel1-x86_64-arm-none-linux-gnueabihf.tar.xz --strip=1 -C $(Build.BinariesDirectory)/gccbin
+          ls $(Build.BinariesDirectory)/gccbin/bin
+          mkdir $(Build.BinariesDirectory)/arm32build
+          cd $(Build.BinariesDirectory)/arm32build
+          # TODO: fix the warnings and remove the --compile-no-warning-as-error arg
+          cmake --compile-no-warning-as-error $(Build.SourcesDirectory)/cmake -Donnxruntime_ENABLE_CPUINFO=OFF -DPython_EXECUTABLE=/usr/bin/python3 -DPYTHON_EXECUTABLE=/usr/bin/python3 -DCMAKE_BUILD_TYPE=Debug -DCMAKE_TOOLCHAIN_FILE=$(Build.SourcesDirectory)/cmake/linux_arm32_crosscompile_toolchain.cmake -G Ninja
+          ninja
+          rm -rf $(Build.BinariesDirectory)/arm32build $(Build.BinariesDirectory)/gccbin
+        displayName: Cross-compile for Linux ARM32 and ARM64
+
+      - task: PythonScript@0
+        displayName: 'Update deps.txt'
+        inputs:
+          scriptPath: $(Build.SourcesDirectory)/tools/ci_build/replace_urls_in_deps.py
+          arguments: --new_dir /build/deps
+          workingDirectory: $(Build.BinariesDirectory)
+          pythonInterpreter: /usr/bin/python3
+
       - template: templates/linux-build-step-with-cache.yml
         parameters:
           WithCache: true
diff --git a/tools/ci_build/github/azure-pipelines/linux-cpu-aten-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-cpu-aten-pipeline.yml
index 2c5a69e216d14..146186e9eeaf5 100644
--- a/tools/ci_build/github/azure-pipelines/linux-cpu-aten-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-cpu-aten-pipeline.yml
@@ -53,10 +53,6 @@ jobs:
     clean: true
     submodules: recursive
 
-  - task: NodeTool@0
-    inputs:
-      versionSpec: '16.x'
-
   - template: templates/get-docker-image-steps.yml
     parameters:
       Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_aten_cpu
diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml
index 981cbec4ef50f..0993a81a02249 100644
--- a/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml
@@ -26,7 +26,14 @@ pr:
     - 'js/web'
     - 'onnxruntime/core/providers/js'
 #### end trigger ####
-
+parameters:
+  - name: CudaVersion
+    displayName: CUDA version
+    type: string
+    default: '11.8'
+    values:
+      - 11.8
+      - 12.2
 resources:
   repositories:
   - repository: manylinux
@@ -37,6 +44,17 @@ resources:
 
 variables:
   - template: templates/common-variables.yml
+  - name: docker_base_image
+    ${{ if eq(parameters.CudaVersion, '11.8') }}:
+      value: nvidia/cuda:11.8.0-cudnn8-devel-ubi8
+    ${{ if eq(parameters.CudaVersion, '12.2') }}:
+      value: nvidia/cuda:12.2.2-cudnn8-devel-ubi8
+
+  - name: linux_trt_version
+    ${{ if eq(parameters.CudaVersion, '11.8') }}:
+      value: 8.6.1.6-1.cuda11.8
+    ${{ if eq(parameters.CudaVersion, '12.2') }}:
+      value: 8.6.1.6-1.cuda12.0
 
 jobs:
 - job: Linux_Build
@@ -55,12 +73,16 @@ jobs:
   - checkout: self
     clean: true
     submodules: none
-
   - template: templates/get-docker-image-steps.yml
     parameters:
-      Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda11
+      Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
       Context: tools/ci_build/github/linux/docker
-      DockerBuildArgs: "--network=host --build-arg POLICY=manylinux_2_28 --build-arg PLATFORM=x86_64 --build-arg PREPEND_PATH=/usr/local/cuda/bin --build-arg LD_LIBRARY_PATH_ARG=/usr/local/lib64 --build-arg DEVTOOLSET_ROOTPATH=/usr --build-arg BUILD_UID=$( id -u )"
+      DockerBuildArgs: "
+      --network=host 
+      --build-arg BASEIMAGE=$(docker_base_image)
+      --build-arg TRT_VERSION=$(linux_trt_version) 
+      --build-arg BUILD_UID=$( id -u )
+      "
       Repository: onnxruntimecuda11build
 
   - task: Cache@2
@@ -106,7 +128,7 @@ jobs:
               --parallel \
               --build_wheel \
               --enable_onnx_tests --use_cuda --cuda_version=${{variables.common_cuda_version}} --cuda_home=/usr/local/cuda-${{variables.common_cuda_version}} --cudnn_home=/usr/local/cuda-${{variables.common_cuda_version}} \
-              --enable_cuda_profiling \
+              --enable_cuda_profiling --enable_cuda_nhwc_ops \
               --enable_pybind --build_java \
               --use_cache \
               --cmake_extra_defines  CMAKE_CUDA_ARCHITECTURES=75; \
@@ -154,9 +176,14 @@ jobs:
 
   - template: templates/get-docker-image-steps.yml
     parameters:
-      Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda11
+      Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
       Context: tools/ci_build/github/linux/docker
-      DockerBuildArgs: "--network=host --build-arg POLICY=manylinux_2_28 --build-arg PLATFORM=x86_64 --build-arg PREPEND_PATH=/usr/local/cuda/bin --build-arg LD_LIBRARY_PATH_ARG=/usr/local/lib64 --build-arg DEVTOOLSET_ROOTPATH=/usr --build-arg BUILD_UID=$( id -u )"
+      DockerBuildArgs: "
+      --network=host 
+      --build-arg BASEIMAGE=$(docker_base_image)
+      --build-arg TRT_VERSION=$(linux_trt_version)
+      --build-arg BUILD_UID=$( id -u )
+      "
       Repository: onnxruntimecuda11build
 
   - task: CmdLine@2
diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml
index 9450395f3cf79..4ca11a4d1565b 100644
--- a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml
@@ -26,7 +26,14 @@ pr:
     - 'js/web'
     - 'onnxruntime/core/providers/js'
 #### end trigger ####
-
+parameters:
+  - name: CudaVersion
+    displayName: CUDA version
+    type: string
+    default: '11.8'
+    values:
+      - 11.8
+      - 12.2
 resources:
   repositories:
   - repository: manylinux
@@ -34,7 +41,17 @@ resources:
     endpoint: Microsoft
     name: pypa/manylinux
     ref: 5eda9aded5462201e6310105728d33016e637ea7
-
+variables:
+  - name: docker_base_image
+    ${{ if eq(parameters.CudaVersion, '11.8') }}:
+      value: nvidia/cuda:11.8.0-cudnn8-devel-ubi8
+    ${{ if eq(parameters.CudaVersion, '12.2') }}:
+      value: nvidia/cuda:12.2.2-cudnn8-devel-ubi8
+  - name: linux_trt_version
+    ${{ if eq(parameters.CudaVersion, '11.8') }}:
+      value: 8.6.1.6-1.cuda11.8
+    ${{ if eq(parameters.CudaVersion, '12.2') }}:
+      value: 8.6.1.6-1.cuda12.0
 jobs:
 - job: Linux_Build
   timeoutInMinutes: 180
@@ -57,9 +74,14 @@ jobs:
 
   - template: templates/get-docker-image-steps.yml
     parameters:
-      Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda11_8_tensorrt8_6
+      Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
       Context: tools/ci_build/github/linux/docker
-      DockerBuildArgs: "--network=host --build-arg POLICY=manylinux_2_28 --build-arg PLATFORM=x86_64 --build-arg PREPEND_PATH=/usr/local/cuda/bin --build-arg LD_LIBRARY_PATH_ARG=/usr/local/lib64 --build-arg DEVTOOLSET_ROOTPATH=/usr --build-arg BUILD_UID=$( id -u )"
+      DockerBuildArgs: "
+      --network=host
+      --build-arg BASEIMAGE=${{ variables.docker_base_image }}
+      --build-arg TRT_VERSION=${{ variables.linux_trt_version }}
+      --build-arg BUILD_UID=$( id -u )
+      "
       Repository: onnxruntimetensorrt86gpubuild
 
   - template: templates/linux-build-step-with-cache.yml
@@ -94,7 +116,8 @@ jobs:
                       --build_shared_lib \
                       --parallel \
                       --build_wheel \
-                      --enable_onnx_tests --use_cuda --cuda_version=11.8 --cuda_home=/usr/local/cuda-11.8 --cudnn_home=/usr/local/cuda-11.8 \
+                      --enable_onnx_tests \
+                      --use_cuda --cuda_home=/usr/local/cuda-${{ parameters.CudaVersion }} --cudnn_home=/usr/local/cuda-${{ parameters.CudaVersion }} \
                       --enable_pybind --build_java \
                       --use_tensorrt --tensorrt_home /usr \
                       --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=75 \
diff --git a/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml
index 352ee19a49108..5dac8fc9cda63 100644
--- a/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml
@@ -36,7 +36,7 @@ variables:
   - name: render
     value: 109
   - name: RocmVersion
-    value: 5.6
+    value: 5.7
 
 jobs:
 - job: Linux_Build
@@ -99,6 +99,7 @@ jobs:
             ccache -s; \
             python tools/ci_build/build.py \
               --config Release \
+              --enable_training \
               --cmake_extra_defines \
                 CMAKE_HIP_COMPILER=/opt/rocm/llvm/bin/clang++ \
                 onnxruntime_BUILD_KERNEL_EXPLORER=OFF \
@@ -181,7 +182,7 @@ jobs:
           /bin/bash -c "
             set -ex; \
             cd /build/Release && xargs -a /build/Release/perms.txt chmod a+x; \
-            bash /onnxruntime_src/tools/ci_build/github/pai/migraphx_test_launcher.sh"
+            bash /onnxruntime_src/tools/ci_build/github/pai/pai_test_launcher.sh"
       workingDirectory: $(Build.SourcesDirectory)
     displayName: 'Run onnxruntime unit tests'
 
diff --git a/tools/ci_build/github/azure-pipelines/linux-multi-gpu-tensorrt-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-multi-gpu-tensorrt-ci-pipeline.yml
index 0a7dc0e456a95..e4441853240e5 100644
--- a/tools/ci_build/github/azure-pipelines/linux-multi-gpu-tensorrt-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-multi-gpu-tensorrt-ci-pipeline.yml
@@ -36,5 +36,3 @@ jobs:
     JobName: 'Linux_CI_Multi_GPU_TensorRT_Dev'
     # The latest TensorRT container only supports ubuntu20.04 and python 3.8
     RunDockerBuildArgs: '-o ubuntu20.04 -d tensorrt -x "--enable_multi_device_test"'
-    DoNugetPack:  'false'
-    ArtifactName: 'drop-linux'
diff --git a/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml
index 93ee17b4cc7e6..c92fc93abba37 100644
--- a/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml
@@ -33,6 +33,4 @@ jobs:
     AgentPool : 'Linux-CPU-2019'
     JobName: 'Linux_CI_Dev'
     RunDockerBuildArgs: '-o ubuntu20.04 -d openvino -v 2023.0.0 -x "--use_openvino CPU_FP32 --build_wheel"'
-    DoNugetPack:  'false'
-    ArtifactName: 'drop-linux'
     TimeoutInMinutes: 120
diff --git a/tools/ci_build/github/azure-pipelines/linux-qnn-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-qnn-ci-pipeline.yml
index f678b18ba9787..491c896de8788 100644
--- a/tools/ci_build/github/azure-pipelines/linux-qnn-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-qnn-ci-pipeline.yml
@@ -110,7 +110,7 @@ jobs:
         inputs:
           script: |
             ./build/Release/onnx_test_runner -e qnn \
-              -v -j 1 -c 1 -i "backend_path|$(QNN_SDK_ROOT)/lib/x86_64-linux-clang/libQnnHtp.so qnn_context_cache_enable|1 qnn_context_cache_path|./build/Release/mobilenet_qdq.bin" \
+              -v -j 1 -c 1 -i "backend_path|$(QNN_SDK_ROOT)/lib/x86_64-linux-clang/libQnnHtp.so qnn_context_cache_enable|1 qnn_context_cache_path|./build/Release/mobilenet_qdq.onnx_qnn_ctx.onnx" \
               /data/qdq_models/mobilenetv2-1.0_add_transpose_quant
 
       - task: CmdLine@2
@@ -118,5 +118,5 @@ jobs:
         inputs:
           script: |
             ./build/Release/onnx_test_runner -e qnn \
-              -v -j 1 -c 1 -i "backend_path|$(QNN_SDK_ROOT)/lib/x86_64-linux-clang/libQnnHtp.so qnn_context_cache_enable|1 qnn_context_cache_path|./build/Release/mobilenet_qdq.bin" \
+              -v -j 1 -c 1 -i "backend_path|$(QNN_SDK_ROOT)/lib/x86_64-linux-clang/libQnnHtp.so qnn_context_cache_enable|1 qnn_context_cache_path|./build/Release/mobilenet_qdq.onnx_qnn_ctx.onnx" \
               /data/qdq_models/mobilenetv2-1.0_add_transpose_quant
diff --git a/tools/ci_build/github/azure-pipelines/mac-coreml-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/mac-coreml-ci-pipeline.yml
index 60f2786bdd856..f5472a49c5148 100644
--- a/tools/ci_build/github/azure-pipelines/mac-coreml-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/mac-coreml-ci-pipeline.yml
@@ -34,7 +34,7 @@ jobs:
   pool:
     vmImage: 'macOS-13'
   variables:
-    MACOSX_DEPLOYMENT_TARGET: '10.14'
+    MACOSX_DEPLOYMENT_TARGET: '11.0'
     TODAY: $[format('{0:dd}{0:MM}{0:yyyy}', pipeline.startTime)]
     CCACHE_DIR: '$(Pipeline.Workspace)/ccache'
   timeoutInMinutes: 120
diff --git a/tools/ci_build/github/azure-pipelines/mac-ios-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/mac-ios-ci-pipeline.yml
index b1d7ede2843c8..18d53654e7c4d 100644
--- a/tools/ci_build/github/azure-pipelines/mac-ios-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/mac-ios-ci-pipeline.yml
@@ -54,7 +54,7 @@ jobs:
                 --use_coreml \
                 --use_xnnpack \
                 --ios \
-                --ios_sysroot iphonesimulator  \
+                --apple_sysroot iphonesimulator  \
                 --osx_arch x86_64 \
                 --apple_deploy_target 12.0 \
                 --use_xcode \
diff --git a/tools/ci_build/github/azure-pipelines/mac-ios-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/mac-ios-packaging-pipeline.yml
index 20263974af24a..5fd15b64e03b6 100644
--- a/tools/ci_build/github/azure-pipelines/mac-ios-packaging-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/mac-ios-packaging-pipeline.yml
@@ -100,50 +100,3 @@ stages:
 - template: templates/stages/mac-ios-packaging-build-stage.yml
   parameters:
     packageVariant: Training
-
-- stage: IosPackaging_TestPackageSwift_Full
-  dependsOn:
-  - IosPackaging_SetCommonVariables
-  - IosPackaging_Build_Full
-
-  jobs:
-  - job: j
-    displayName: "Test Package.swift with full package"
-
-    pool:
-      vmImage: "macOS-13"
-
-    variables:
-      xcodeVersion: "14.3"
-      ortPodVersion: $[stageDependencies.IosPackaging_SetCommonVariables.j.outputs['SetCommonVariables.ORT_POD_VERSION']]
-      skipComponentGovernanceDetection: true
-
-    timeoutInMinutes: 10
-
-    steps:
-    - template: templates/use-xcode-version.yml
-      parameters:
-        xcodeVersion: ${{ variables.xcodeVersion }}
-
-    - download: current
-      artifact: ios_packaging_artifacts_full
-      displayName: "Download full build artifacts"
-
-    - script: |
-        set -e -x
-        shasum -a 256 "$(Pipeline.Workspace)/ios_packaging_artifacts_full/pod-archive-onnxruntime-c-$(ortPodVersion).zip"
-      displayName: "Print ORT iOS Pod checksum"
-
-    # copy the pod archive to a path relative to Package.swift and set the env var required by Package.swift to use that.
-    # xcodebuild will implicitly use Package.swift and build/run the .testTarget (tests in swift/onnxTests).
-    # once that's done cleanup the copy of the pod zip file
-    - script: |
-        set -e -x
-
-        SIMULATOR_DEVICE_ID=$(set -o pipefail; python3 tools/ci_build/github/apple/get_simulator_device_info.py | jq --raw-output '.device_udid')
-
-        cp "$(Pipeline.Workspace)/ios_packaging_artifacts_full/pod-archive-onnxruntime-c-$(ortPodVersion).zip" swift/
-        export ORT_IOS_POD_LOCAL_PATH="swift/pod-archive-onnxruntime-c-$(ortPodVersion).zip"
-        xcodebuild test -scheme onnxruntime -destination "platform=iOS Simulator,id=${SIMULATOR_DEVICE_ID}"
-        rm swift/pod-archive-onnxruntime-c-$(ortPodVersion).zip
-      displayName: "Test Package.swift usage"
diff --git a/tools/ci_build/github/azure-pipelines/mac-objc-static-analysis-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/mac-objc-static-analysis-ci-pipeline.yml
index 6893fb95cfec5..482279fa07225 100644
--- a/tools/ci_build/github/azure-pipelines/mac-objc-static-analysis-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/mac-objc-static-analysis-ci-pipeline.yml
@@ -29,6 +29,11 @@ jobs:
         --build --parallel --target onnx_proto
     displayName: Generate compile_commands.json and ONNX protobuf files
 
+  - script: |
+      patch < "$(Build.SourcesDirectory)/cmake/patches/abseil/absl_gh_issue_1435_workaround.patch"
+    workingDirectory: "$(Build.BinariesDirectory)/Debug/_deps/abseil_cpp-src"
+    displayName: Apply absl_gh_issue_1435_workaround.patch
+
   - script: |
       set -e
 
diff --git a/tools/ci_build/github/azure-pipelines/nodejs/templates/test_linux.yml b/tools/ci_build/github/azure-pipelines/nodejs/templates/test_linux.yml
index 4563a79adb834..864d1002a90fc 100644
--- a/tools/ci_build/github/azure-pipelines/nodejs/templates/test_linux.yml
+++ b/tools/ci_build/github/azure-pipelines/nodejs/templates/test_linux.yml
@@ -1,5 +1,5 @@
 parameters:
-  AgentPool: 'onnxruntime-Ubuntu2004-AMD-CPU'
+  AgentPool: 'Azure-Pipelines-EO-Ubuntu-2004-aiinfra'
   StageSuffix: ''
 stages:
 - stage: Nodejs_Test_${{ parameters.StageSuffix }}
@@ -18,4 +18,4 @@ stages:
       value: '$(Build.BinariesDirectory)'
     steps:
     - template: test.yml
-  
\ No newline at end of file
+  
diff --git a/tools/ci_build/github/azure-pipelines/npm-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/npm-packaging-pipeline.yml
index 2e7ac9508a41e..fd26128b8b29a 100644
--- a/tools/ci_build/github/azure-pipelines/npm-packaging-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/npm-packaging-pipeline.yml
@@ -9,11 +9,6 @@ parameters:
   - 'custom'
   default: 'nightly (@dev)'
 
-- name: NodePipelineId
-  displayName: 'Node npm package build Id'
-  type: string
-  default: 'latest'
-
 variables:
   # pipeline should define the following varaibles
   #   ExtraBuildArgs
@@ -29,6 +24,11 @@ variables:
     NpmPackagingMode: '$(VersionSuffix)'
 
 resources:
+  pipelines:
+  - pipeline: build
+    source: 'Zip-Nuget-Java-Nodejs Packaging Pipeline'
+    trigger: true
+    branch: main
   repositories:
   - repository: manylinux
     type: Github
@@ -41,9 +41,14 @@ stages:
   parameters:
     NpmPackagingMode: ${{ variables.NpmPackagingMode }}
     IsReleasePipeline: true
-    PoolName: 'Azure-Pipelines-EO-Windows2022-aiinfra'
+    PoolName: 'onnxruntime-Ubuntu2004-AMD-CPU'
     PackageName: 'onnxruntime-web'
     ExtraBuildArgs: ''
+    UseWebPoolName: true
+    RunWebGpuTestsForDebugBuild: false
+    RunWebGpuTestsForReleaseBuild: true
+    WebGpuPoolName: 'onnxruntime-Win2022-webgpu-A10'
+    WebCpuPoolName: 'Onnxruntime-Win-CPU-2022'
 
 - template: templates/react-native-ci.yml
   parameters:
@@ -60,40 +65,18 @@ stages:
   - Build_web_Debug
   jobs:
   - job: Download_Node_Package_And_Publish_Validation_Script
-    pool: 'Azure-Pipelines-EO-Windows2022-aiinfra'
+    pool: 'Onnxruntime-Win-CPU-2022'
     variables:
       runCodesignValidationInjection: false
     timeoutInMinutes: 10
     steps:
-
-    - ${{ if eq(parameters.NodePipelineId, 'latest') }}:
-      - task: DownloadPipelineArtifact@2
-        inputs:
-          buildType: 'specific'
-          project: '530acbc4-21bc-487d-8cd8-348ff451d2ff'
-          definition: '940'
-          specificBuildWithTriggering: true
-          buildVersionToDownload: 'latestFromBranch'
-          branchName: 'refs/heads/main'
-          artifactName: 'NPM_packages'
-          targetPath: '$(Pipeline.Workspace)'
-        displayName: 'Download onnxruntime-node Pipeline Artifact'
-
-    - ${{ if ne(parameters.NodePipelineId, 'latest') }}:
-      - task: DownloadPipelineArtifact@2
-        inputs:
-          buildType: 'specific'
-          project: '530acbc4-21bc-487d-8cd8-348ff451d2ff'
-          definition: '940'
-          buildVersionToDownload: 'specific'
-          pipelineId: '${{ parameters.NodePipelineId }}'
-          artifactName: 'NPM_packages'
-          targetPath: '$(Pipeline.Workspace)'
-        displayName: 'Download onnxruntime-node Pipeline Artifact'
+    - download: build
+      artifact: 'NPM_packages'
+      displayName: 'Download onnxruntime-node Pipeline Artifact'
 
     - task: CopyFiles@2
       inputs:
-        sourceFolder: $(Pipeline.Workspace)
+        sourceFolder: '$(Pipeline.Workspace)\build\NPM_packages'
         contents: onnxruntime-*.tgz
         targetFolder: $(Build.ArtifactStagingDirectory)\node-artifacts
       displayName: 'Copy onnxruntime-node Artifacts'
diff --git a/tools/ci_build/github/azure-pipelines/nuget/templates/dml-vs-2022.yml b/tools/ci_build/github/azure-pipelines/nuget/templates/dml-vs-2022.yml
index b1e36e63e86ab..4e7093f04a59f 100644
--- a/tools/ci_build/github/azure-pipelines/nuget/templates/dml-vs-2022.yml
+++ b/tools/ci_build/github/azure-pipelines/nuget/templates/dml-vs-2022.yml
@@ -15,7 +15,7 @@ parameters:
   EnvSetupScript: 'setup_env.bat'
   AgentPool: 'onnxruntime-Win-CPU-2022'
   AgentDemands: []
-  OrtPackageId: Microsoft.ML.OnnxRuntime
+  OrtPackageId: Microsoft.ML.OnnxRuntime.DirectML
   BuildConfigurations: ['RelWithDebInfo'] # Options: Debug, RelWithDebInfo
   RunTests : 'true'
   EnableLto: true
@@ -137,7 +137,7 @@ stages:
       - task: MSBuild@1
         displayName: 'Restore NuGet Packages'
         inputs:
-          solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln'
+          solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.DesktopOnly.CSharp.sln'
           platform: 'Any CPU'
           configuration: '$(BuildConfig)'
           msbuildArguments: '-t:restore -p:OrtPackageId=${{ parameters.OrtPackageId }}'
@@ -146,7 +146,7 @@ stages:
       - task: MSBuild@1
         displayName: 'Build C#'
         inputs:
-          solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln'
+          solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.DesktopOnly.CSharp.sln'
           configuration: '$(BuildConfig)'
           platform: 'Any CPU'
           msbuildArguments: '-p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=${{ parameters.OrtPackageId }} -p:IsReleaseBuild=${{ parameters.IsReleaseBuild }}'
@@ -231,6 +231,15 @@ stages:
               searchPattern: '**/*.pdb'
               symbolServerType: teamServices
 
+      - ${{ if eq(parameters['DoCompliance'], 'true') }}:
+        - template: ../../templates/compliance.yml
+          parameters :
+            msbuildPlatform: ${{ parameters.sln_platform }}
+
+      - template: ../../templates/component-governance-component-detection-steps.yml
+        parameters :
+          condition : 'succeeded'
+
       # Node.js Publish
       - ${{ if eq(parameters['DoNodejsPack'], 'true') }}:
         - task: BatchScript@1
@@ -285,15 +294,6 @@ stages:
             targetPath: '$(Build.SourcesDirectory)\js\node\bin\napi-v3\win32\${{ parameters.sln_platform }}'
             artifactName: 'drop-onnxruntime-nodejs-win-${{ parameters.sln_platform }}-dml'
 
-      - ${{ if eq(parameters['DoCompliance'], 'true') }}:
-        - template: ../../templates/compliance.yml
-          parameters :
-            msbuildPlatform: ${{ parameters.sln_platform }}
-
-      - template: ../../templates/component-governance-component-detection-steps.yml
-        parameters :
-          condition : 'succeeded'
-
       - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
         displayName: 'Clean Agent Directories'
         condition: always()
diff --git a/tools/ci_build/github/azure-pipelines/nuget/templates/test_linux.yml b/tools/ci_build/github/azure-pipelines/nuget/templates/test_linux.yml
index 64fa29f06553e..1e609b052b8d3 100644
--- a/tools/ci_build/github/azure-pipelines/nuget/templates/test_linux.yml
+++ b/tools/ci_build/github/azure-pipelines/nuget/templates/test_linux.yml
@@ -7,7 +7,7 @@ parameters:
   SpecificArtifact: false
   CustomOpArtifactName: 'onnxruntime-linux-x64'
   BuildId: '0'
-
+  CudaVersion: '11.8'
 stages:
 - stage: NuGet_Test_Linux_${{ parameters.StageSuffix }}
   dependsOn:
@@ -54,9 +54,18 @@ stages:
     - ${{if contains(parameters.StageSuffix , 'GPU') }}:
       - template: ../../templates/get-docker-image-steps.yml
         parameters:
-          Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_cuda11_8_tensorrt8_6
+          Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_2004_gpu
           Context: tools/ci_build/github/linux/docker/
-          DockerBuildArgs: "--build-arg BUILD_UID=$( id -u )"
+          ${{ if eq(parameters.CudaVersion, '12.2') }}:
+            DockerBuildArgs: "
+            --build-arg BASEIMAGE=nvidia/cuda:12.2.2-cudnn8-devel-ubuntu20.04
+            --build-arg TRT_VERSION=8.6.1.6-1+cuda12.0
+            --build-arg BUILD_UID=$( id -u )
+            "
+          ${{ else }}:
+            DockerBuildArgs: "
+            --build-arg BUILD_UID=$( id -u )
+            "
           Repository: onnxruntimepackagestest
       - bash: |
           docker run --rm \
diff --git a/tools/ci_build/github/azure-pipelines/nuget/templates/test_win.yml b/tools/ci_build/github/azure-pipelines/nuget/templates/test_win.yml
index 0b9ded10ddd3e..a15c3061913f8 100644
--- a/tools/ci_build/github/azure-pipelines/nuget/templates/test_win.yml
+++ b/tools/ci_build/github/azure-pipelines/nuget/templates/test_win.yml
@@ -3,11 +3,12 @@ parameters:
   NugetPackageName : ''
   ArtifactSuffix: ''
   StageSuffix: 'CPU'
-  # For inference packages, the test data artifact name is drop-nuget and no suffix is required.
+  # For inference packages, the test data artifact name is drop-extra and no suffix is required.
   # For training packages, to differentiate the artifact name we add '-training' suffix. This needs to be passed from
   # the parent pipeline.
   TestDataArtifactSuffix: ''
   Skipx86Tests: 'false'
+  CudaVersion: ''
 
 stages:
 - stage: NuGet_Test_Win_${{ parameters.StageSuffix }}
@@ -27,6 +28,10 @@ stages:
       value: 'ON'
     - name: runCodesignValidationInjection
       value: false
+    - name: CUDA_MODULE_LOADINGL
+      value: 'LAZY'
+    - name: GRADLE_OPTS
+      value: '-Dorg.gradle.daemon=false'
 
     steps:
     - task: UsePythonVersion@0
@@ -39,13 +44,12 @@ stages:
       displayName: Use Nuget 5.7.0
       inputs:
         versionSpec: 5.7.0
-
-    - task: BatchScript@1
-      displayName: 'setup env'
-      inputs:
-        filename: '$(Build.SourcesDirectory)\tools\ci_build\github\windows\setup_env_gpu.bat'
-        modifyEnvironment: true
-        workingFolder: '$(Build.BinariesDirectory)'
+    - ${{ if ne( parameters.CudaVersion, '') }}:
+      - template: ../../templates/jobs/download_win_gpu_library.yml
+        parameters:
+          DownloadCUDA: true
+          DownloadTRT: true
+          CudaVersion: ${{ parameters.CudaVersion }}
 
     - task: BatchScript@1
       displayName: 'Setup Visual Studio env vars'
@@ -60,12 +64,6 @@ stages:
         artifactName: drop-signed-nuget-${{ parameters.ArtifactSuffix }}
         targetPath: '$(Build.BinariesDirectory)\nuget-artifact'
 
-    - task: DownloadPipelineArtifact@0
-      displayName: 'Download Pipeline Artifact - testdata'
-      inputs:
-        artifactName: 'drop-nuget${{ parameters.TestDataArtifactSuffix }}'
-        targetPath: '$(Build.BinariesDirectory)\testdata'
-
     - template: get-nuget-package-version-as-variable.yml
       parameters:
         packageFolder: '$(Build.BinariesDirectory)\nuget-artifact'
diff --git a/tools/ci_build/github/azure-pipelines/orttraining-linux-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-linux-ci-pipeline.yml
index 007630edb25be..018672e0b2dea 100644
--- a/tools/ci_build/github/azure-pipelines/orttraining-linux-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/orttraining-linux-ci-pipeline.yml
@@ -54,10 +54,6 @@ jobs:
     clean: true
     submodules: none
 
-  - task: NodeTool@0
-    inputs:
-      versionSpec: '16.x'
-
   - task: UsePythonVersion@0
     inputs:
       versionSpec: '3.8'
@@ -88,6 +84,7 @@ jobs:
         mkdir -p $(Pipeline.Workspace)/ccache
         docker run --rm \
           --volume /data/onnx:/data/onnx:ro \
+          --volume /data/models:/build/models:ro \
           --volume $(Build.SourcesDirectory):/onnxruntime_src \
           --volume $(Build.BinariesDirectory):/build \
           --volume $HOME/.onnx:/home/onnxruntimedev/.onnx \
@@ -109,51 +106,11 @@ jobs:
               --build_wheel \
               --enable_onnx_tests \
               --enable_training \
-              --use_cache \
-              --update --build; \
+              --use_cache; \
                 ccache -sv; \
                 ccache -z"
       workingDirectory: $(Build.SourcesDirectory)
 
-  - task: CmdLine@2
-    displayName: 'Install python deps'
-    inputs:
-      script: |
-         set -e -x
-         python3 -m pip uninstall -y ort-nightly-gpu ort-nightly onnxruntime onnxruntime-gpu onnxruntime-training onnxruntime-directml ort-nightly-directml onnx -qq
-         cp $(Build.SourcesDirectory)/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt $(Build.BinariesDirectory)/requirements.txt
-         # Test ORT with the latest ONNX release.
-         sed -i "s/git+http:\/\/github\.com\/onnx\/onnx.*/onnx/" $(Build.BinariesDirectory)/requirements.txt
-         python3 -m pip install -r $(Build.BinariesDirectory)/requirements.txt
-         mkdir $(Build.BinariesDirectory)/requirements_torch_cpu/
-         cp $(Build.SourcesDirectory)/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements_torch_cpu/requirements.txt $(Build.BinariesDirectory)/requirements_torch_cpu/requirements.txt
-         python3 -m pip install -r $(Build.BinariesDirectory)/requirements_torch_cpu/requirements.txt
-
-  - task: CmdLine@2
-    displayName: 'Install Release python package'
-    inputs:
-      script: |
-         rm -rf $(Build.BinariesDirectory)/Release/onnxruntime $(Build.BinariesDirectory)/Release/pybind11
-         python3 -m pip install $(Build.BinariesDirectory)/Release/dist/*.whl
-
-  - task: PythonScript@0
-    displayName: 'Run Release unit tests'
-    inputs:
-       scriptPath: $(Build.SourcesDirectory)/tools/ci_build/build.py
-       workingDirectory: $(Build.BinariesDirectory)/Release
-       arguments: >-
-          --build_dir $(Build.BinariesDirectory)
-          --cmake_generator Ninja
-          --config Release
-          --test
-          --skip_submodule_sync
-          --build_shared_lib
-          --parallel
-          --build_wheel
-          --enable_onnx_tests
-          --enable_training
-          --ctest_path ""
-
   - task: PublishTestResults@2
     displayName: 'Publish unit test results'
     inputs:
diff --git a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ci-pipeline.yml
index adf5695bd76eb..2d2719fef8f3d 100644
--- a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ci-pipeline.yml
@@ -32,7 +32,6 @@ jobs:
   parameters:
     AgentPool : 'Onnxruntime-Linux-GPU-NC6sv3'
     JobName: 'Onnxruntime_Linux_GPU_Training'
-    SubmoduleCheckoutMode: 'recursive'
     RunDockerBuildArgs: >
       -o ubuntu20.04 -d gpu
       -t onnxruntime_orttraining_ortmodule_tests_image
@@ -40,24 +39,16 @@ jobs:
       -e
       -x "
       --enable_training
-      --config $(buildConfig)
+      --config Release
       --use_cuda --cuda_version=11.8 --cuda_home=/usr/local/cuda-11.8 --cudnn_home=/usr/local/cuda-11.8
       --build_wheel
       --enable_nvtx_profile
       --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=70
       "
-    DoNugetPack: 'false'
     RunInjectedPipeline: 'true'
     InjectedPipeline: 'orttraining-linux-gpu-test-ci-pipeline.yml'
     DockerImageTag: 'onnxruntime_orttraining_ortmodule_tests_image'
-    BuildConfig: $(buildConfig)
-    ArtifactName: 'drop-linux'
     TimeoutInMinutes: 140
     # Enable unreleased onnx opsets in CI builds
     # This facilitates testing the implementation for the new opsets
     AllowReleasedOpsetOnly: '0'
-    Strategy:
-      maxParallel: 2
-      matrix:
-        Release:
-          buildConfig: Release
diff --git a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ortmodule-distributed-test-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ortmodule-distributed-test-ci-pipeline.yml
index f05d03bb54f9c..654bc0921556a 100644
--- a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ortmodule-distributed-test-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ortmodule-distributed-test-ci-pipeline.yml
@@ -124,7 +124,7 @@ stages:
           --volume $(Build.BinariesDirectory):/build \
           --volume /mnist:/mnist \
           onnxruntime_ortmodule_distributed_tests_image \
-            bash -c "rm -rf /build/RelWithDebInfo/onnxruntime/ && python3 -m pip install mpi4py && python3 -m pip install /build/RelWithDebInfo/dist/onnxruntime*.whl && mpirun -n 4 -x NCCL_DEBUG=INFO python /onnxruntime_src/onnxruntime/test/python/onnxruntime_test_collective.py" \
+            bash -c "rm -rf /build/RelWithDebInfo/onnxruntime/ && python3 -m pip install mpi4py onnxscript && python3 -m pip install /build/RelWithDebInfo/dist/onnxruntime*.whl && mpirun -n 4 -x NCCL_DEBUG=INFO python /onnxruntime_src/onnxruntime/test/python/onnxruntime_test_collective.py && mpirun -n 2 -x NCCL_DEBUG=INFO python /onnxruntime_src/onnxruntime/test/python/onnxruntime_test_distributed.py" \
       displayName: 'Run onnxruntime_test_collective.py'
       condition: succeededOrFailed()
       timeoutInMinutes: 30
diff --git a/tools/ci_build/github/azure-pipelines/orttraining-pai-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-pai-ci-pipeline.yml
index 8dd1f0c5c6461..8d02a5e5809a2 100644
--- a/tools/ci_build/github/azure-pipelines/orttraining-pai-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/orttraining-pai-ci-pipeline.yml
@@ -25,7 +25,7 @@ variables:
   - name: render
     value: 109
   - name: RocmVersion
-    value: 5.6
+    value: 5.7
   - name: BuildConfig
     value: Release
 
@@ -98,7 +98,7 @@ jobs:
           /bin/bash -c "
             set -ex; \
             ccache -s; \
-            /opt/python/cp38-cp38/bin/python3 tools/ci_build/build.py \
+            /opt/python/cp39-cp39/bin/python3 tools/ci_build/build.py \
               --config $(BuildConfig) \
               --enable_training \
               --mpi_home /opt/ompi \
@@ -108,6 +108,7 @@ jobs:
                 FETCHCONTENT_TRY_FIND_PACKAGE_MODE=NEVER \
               --use_cache \
               --use_rocm \
+              --use_migraphx \
               --rocm_version=$(RocmVersion) \
               --rocm_home ${ROCM_HOME} \
               --nccl_home ${ROCM_HOME}\
diff --git a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cpu.yml b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cpu.yml
index 983143df3f046..9755e1f0771ba 100644
--- a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cpu.yml
+++ b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cpu.yml
@@ -9,7 +9,7 @@ resources:
     ref: 5eda9aded5462201e6310105728d33016e637ea7
 
 stages:
-- stage: Python_Packaging_Linux_Trainin_CPU
+- stage: Python_Packaging_Linux_Training_CPU
 
   jobs:
     - job: Linux_Training_CPU_Wheels
diff --git a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda.yml b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda.yml
index b8dfb7f3c90a2..f244851f8cc37 100644
--- a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda.yml
+++ b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda.yml
@@ -20,16 +20,3 @@ stages:
     agent_pool: Onnxruntime-Linux-GPU
     upload_wheel: 'yes'
     debug_build: false
-
-# Added for triton compiler team. Can be potentially removed.
-- template: templates/py-packaging-training-cuda-stage.yml
-  parameters:
-    build_py_parameters: --enable_training --update --build
-    torch_version: '2.0.0'
-    opset_version: '15'
-    cuda_version: '11.8'
-    cmake_cuda_architectures: 70;75;80;86
-    docker_file: Dockerfile.manylinux2_28_training_cuda11_8
-    agent_pool: Onnxruntime-Linux-GPU
-    upload_wheel: 'no'
-    debug_build: true
diff --git a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda12.yml b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda12.yml
new file mode 100644
index 0000000000000..422fb33eec5de
--- /dev/null
+++ b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda12.yml
@@ -0,0 +1,22 @@
+trigger: none
+
+resources:
+  repositories:
+  - repository: manylinux
+    type: Github
+    endpoint: Microsoft
+    name: pypa/manylinux
+    ref: 5eda9aded5462201e6310105728d33016e637ea7
+
+stages:
+- template: templates/py-packaging-training-cuda-stage.yml
+  parameters:
+    build_py_parameters: --enable_training --update --build
+    torch_version: '2.1.0'
+    opset_version: '15'
+    cuda_version: '12.2'
+    cmake_cuda_architectures: 70;75;80;86;90
+    docker_file: Dockerfile.manylinux2_28_training_cuda12_2
+    agent_pool: Onnxruntime-Linux-GPU
+    upload_wheel: 'yes'
+    debug_build: false
diff --git a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-rocm.yml b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-rocm.yml
index eb837b35af428..f2ba99369c144 100644
--- a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-rocm.yml
+++ b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-rocm.yml
@@ -9,56 +9,51 @@ resources:
     ref: 5eda9aded5462201e6310105728d33016e637ea7
 
 stages:
-- stage: Python_Packaging
+- stage: "Python_Packaging_ROCm57_Release"
   jobs:
   - template: templates/rocm.yml
     parameters:
       PythonVersion: '3.8'
-      RocmVersion: '5.4.2'
+      RocmVersion: '5.7'
   - template: templates/rocm.yml
     parameters:
       PythonVersion: '3.9'
-      RocmVersion: '5.4.2'
+      RocmVersion: '5.7'
   - template: templates/rocm.yml
     parameters:
       PythonVersion: '3.10'
-      RocmVersion: '5.4.2'
-  - template: templates/rocm.yml
-    parameters:
-      PythonVersion: '3.8'
-      RocmVersion: '5.5'
-  - template: templates/rocm.yml
-    parameters:
-      PythonVersion: '3.9'
-      RocmVersion: '5.5'
-  - template: templates/rocm.yml
-    parameters:
-      PythonVersion: '3.10'
-      RocmVersion: '5.5'
+      RocmVersion: '5.7'
+
+- stage: "Python_Packaging_ROCm57_Debug"
+  jobs:
   - template: templates/rocm.yml
     parameters:
       PythonVersion: '3.8'
-      RocmVersion: '5.6'
+      RocmVersion: '5.7'
+      BuildConfig: 'Debug'
   - template: templates/rocm.yml
     parameters:
       PythonVersion: '3.9'
-      RocmVersion: '5.6'
+      RocmVersion: '5.7'
+      BuildConfig: 'Debug'
   - template: templates/rocm.yml
     parameters:
       PythonVersion: '3.10'
-      RocmVersion: '5.6'
+      RocmVersion: '5.7'
+      BuildConfig: 'Debug'
+
+- stage: "Python_Packaging_ROCm56_Release"
+  condition: ne(variables['ORT_DISABLE_PYTHON_PACKAGE_LOCAL_VERSION'], 'true')
+  jobs:
   - template: templates/rocm.yml
     parameters:
       PythonVersion: '3.8'
       RocmVersion: '5.6'
-      BuildConfig: 'RelWithDebInfo'
   - template: templates/rocm.yml
     parameters:
       PythonVersion: '3.9'
       RocmVersion: '5.6'
-      BuildConfig: 'RelWithDebInfo'
   - template: templates/rocm.yml
     parameters:
       PythonVersion: '3.10'
       RocmVersion: '5.6'
-      BuildConfig: 'RelWithDebInfo'
diff --git a/tools/ci_build/github/azure-pipelines/post-merge-jobs.yml b/tools/ci_build/github/azure-pipelines/post-merge-jobs.yml
index 61f9b37d4ce78..706c87fc079ca 100644
--- a/tools/ci_build/github/azure-pipelines/post-merge-jobs.yml
+++ b/tools/ci_build/github/azure-pipelines/post-merge-jobs.yml
@@ -4,9 +4,11 @@ stages:
     parameters:
       NpmPackagingMode: 'dev'
       IsReleasePipeline: true
-      PoolName: 'aiinfra-Win-CPU-2022-web-beta'
+      PoolName: 'onnxruntime-Ubuntu2004-AMD-CPU'
       BuildStaticLib: true
       ExtraBuildArgs: ''
+      UseWebPoolName: true
+      WebCpuPoolName: 'Onnxruntime-Win-CPU-2022'
 
 # This stage is to test if the combined build works on
 # o Windows ARM64
@@ -67,7 +69,7 @@ stages:
     - template: templates/jobs/win-ci-vs-2022-job.yml
       parameters:
         BuildConfig: 'RelWithDebInfo'
-        EnvSetupScript: setup_env_cuda_11.bat
+        EnvSetupScript: setup_env_cuda.bat
         buildArch: x64
         additionalBuildFlags: --enable_pybind --build_java --build_nodejs --use_cuda --cuda_home="$(Agent.TempDirectory)\v11.8" --enable_cuda_profiling --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86
         msbuildPlatform: x64
@@ -259,3 +261,40 @@ stages:
         script: docker image rm $(dockerImageTag)
         workingDirectory: '$(Build.BinariesDirectory)'
       condition: succeededOrFailed()
+
+# We currently use static frameworks in the iOS packages.
+# This test validates that dynamic frameworks also work.
+- stage: IosDynamicFramework
+  dependsOn: []
+  jobs:
+  - job: IosDynamicFramework
+
+    pool:
+      vmImage: "macOS-13"
+
+    steps:
+    - task: UsePythonVersion@0
+      inputs:
+        versionSpec: "3.9"
+        addToPath: true
+        architecture: "x64"
+
+    - template: templates/use-xcode-version.yml
+
+    - script: |
+        pip install -r tools/ci_build/github/apple/ios_packaging.requirements.txt
+      displayName: "Install Python requirements"
+
+    - script: |
+        python tools/ci_build/github/apple/build_apple_framework.py \
+          --build_dir "$(Build.BinariesDirectory)/ios_framework" \
+          --build_dynamic_framework \
+          tools/ci_build/github/apple/default_mobile_ios_framework_build_settings.json
+      displayName: "Build iOS dynamic framework"
+
+    - script: |
+        python tools/ci_build/github/apple/test_apple_packages.py \
+          --framework_info_file "$(Build.BinariesDirectory)/ios_framework/xcframework_info.json" \
+          --c_framework_dir "$(Build.BinariesDirectory)/ios_framework/framework_out" \
+          --variant Mobile
+      displayName: "Test pod with iOS dynamic framework"
diff --git a/tools/ci_build/github/azure-pipelines/py-cuda-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/py-cuda-packaging-pipeline.yml
new file mode 100644
index 0000000000000..91179d141498b
--- /dev/null
+++ b/tools/ci_build/github/azure-pipelines/py-cuda-packaging-pipeline.yml
@@ -0,0 +1,39 @@
+trigger: none
+
+parameters:
+  - name: enable_linux_gpu
+    type: boolean
+    default: true
+  - name: enable_windows_gpu
+    type: boolean
+    default: true
+  - name: cmake_build_type
+    type: string
+    default: 'Release'
+    values:
+      - Debug
+      - Release
+      - RelWithDebInfo
+      - MinSizeRel
+  - name: cuda_version
+    type: string
+    default: '12.2'
+    values:
+      - 11.8
+      - 12.2
+
+resources:
+  repositories:
+    - repository: manylinux
+      type: Github
+      endpoint: Microsoft
+      name: pypa/manylinux
+      ref: 5eda9aded5462201e6310105728d33016e637ea7
+
+stages:
+  - template: stages/py-nuget-combine-cuda-stage.yml
+    parameters:
+      enable_linux_gpu: ${{ parameters.enable_linux_gpu }}
+      enable_windows_gpu: ${{ parameters.enable_windows_gpu }}
+      cmake_build_type: ${{ parameters.cmake_build_type }}
+      cuda_version: ${{ parameters.cuda_version }}
\ No newline at end of file
diff --git a/tools/ci_build/github/azure-pipelines/py-package-test-pipeline.yml b/tools/ci_build/github/azure-pipelines/py-package-test-pipeline.yml
index 2161a9205f22d..55d3150f21aa3 100644
--- a/tools/ci_build/github/azure-pipelines/py-package-test-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/py-package-test-pipeline.yml
@@ -30,7 +30,7 @@ stages:
   - template: templates/py-packaging-linux-test-cpu.yml
     parameters:
       arch: 'aarch64'
-      machine_pool: 'aiinfra-linux-ARM64-CPU-2019'
+      machine_pool: 'onnxruntime-linux-ARM64-CPU-2019'
       base_image: 'arm64v8/almalinux:8'
       devtoolset_rootpath: /opt/rh/gcc-toolset-12/root
       ld_library_path_arg: /opt/rh/gcc-toolset-12/root/usr/lib64:/opt/rh/gcc-toolset-12/root/usr/lib:/opt/rh/gcc-toolset-12/root/usr/lib64/dyninst:/opt/rh/gcc-toolset-12/root/usr/lib/dyninst:/usr/local/lib64
@@ -84,7 +84,7 @@ stages:
       skipComponentGovernanceDetection: true
     workspace:
       clean: all
-    pool: Azure-Pipelines-EO-Windows2022-aiinfra
+    pool: Onnxruntime-Win-CPU-2022
     steps:
     - task: PowerShell@2
       displayName: 'Add Build Tag'
diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-combine-cuda-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-combine-cuda-stage.yml
new file mode 100644
index 0000000000000..d009e15559180
--- /dev/null
+++ b/tools/ci_build/github/azure-pipelines/stages/nuget-combine-cuda-stage.yml
@@ -0,0 +1,226 @@
+parameters:
+- name: DoCompliance
+  type: boolean
+  default: true
+
+- name: DoEsrp
+  type: boolean
+  default: true
+
+- name: IsReleaseBuild
+  type: boolean
+  default: false
+
+stages:
+######## Nuget ########
+# Win/Linux CUDA Combined packaging
+- stage: NuGet_Packaging_GPU
+  dependsOn:
+    - Set_ReleaseVersionSuffix
+    - Windows_Packaging_gpu
+    - Windows_Packaging_tensorrt
+    - Linux_C_API_Packaging_CPU_x64
+    - Linux_C_API_Packaging_GPU_x64
+    - Linux_C_API_Packaging_GPU_TensorRT_x64
+  condition: succeeded()
+  jobs:
+    - job:
+      workspace:
+        clean: all
+      pool: 'Onnxruntime-Win-CPU-2022'
+      variables:
+        breakCodesignValidationInjection: ${{ parameters.DoEsrp }}
+        ReleaseVersionSuffix: $[stageDependencies.Setup.Set_Variables.outputs['Set_Release_Version_Suffix.ReleaseVersionSuffix']]
+
+      steps:
+        - checkout: self
+          submodules: true
+  # Download the all artifacts
+        - task: DownloadPipelineArtifact@2
+          displayName: 'Download Pipeline Artifact from Linux_C_API_Packaging_GPU_x64 Stage'
+          inputs:
+            artifactName: 'onnxruntime-win-x64-cuda'
+            targetPath: '$(Build.BinariesDirectory)/nuget-artifact'
+        - task: DownloadPipelineArtifact@2
+          displayName: 'Download Pipeline Artifact from Linux_C_API_Packaging_GPU_TensorRT_x64 Stage'
+          inputs:
+            artifactName: 'onnxruntime-win-x64-tensorrt'
+            targetPath: '$(Build.BinariesDirectory)/nuget-artifact'
+
+        - task: DownloadPipelineArtifact@2
+          displayName: 'Download Pipeline Artifact from Windows_Packaging_gpu Stage'
+          inputs:
+            artifactName: 'onnxruntime-linux-x64-cuda'
+            targetPath: '$(Build.BinariesDirectory)/nuget-artifact'
+
+        - task: DownloadPipelineArtifact@2
+          displayName: 'Download Pipeline Artifact from Windows_Packaging_tensorrt Stage'
+          inputs:
+            artifactName: 'onnxruntime-linux-x64-tensorrt'
+            targetPath: '$(Build.BinariesDirectory)/nuget-artifact'
+
+        - task: DownloadPipelineArtifact@2
+          displayName: 'Download Pipeline Artifact - protoc from Windows_Packaging_(cpu|gpu) Stage'
+          inputs:
+            artifactName: 'drop-extra'
+            targetPath: '$(Build.BinariesDirectory)/extra-artifact'
+
+        # Reconstruct the build dir
+        - task: PowerShell@2
+          displayName: 'PS: Extract nuget files gpu'
+          inputs:
+            targetType: filePath
+            filePath: $(Build.SourcesDirectory)\tools\ci_build\github\windows\extract_nuget_files_gpu.ps1
+
+        - script: |
+            dir
+          workingDirectory: '$(Build.BinariesDirectory)/nuget-artifact'
+          displayName: 'List artifacts'
+
+        - script: |
+            mklink /D /J models C:\local\models
+          workingDirectory: '$(Build.BinariesDirectory)'
+          displayName: 'Create models link'
+
+        - task: NuGetToolInstaller@0
+          displayName: Use Nuget 6.2.1
+          inputs:
+            versionSpec: 6.2.1
+
+        - task: PowerShell@2
+          displayName: Install .NET 6 workloads
+          inputs:
+            targetType: 'inline'
+            script: |
+              dotnet workload install android ios macos
+            workingDirectory: '$(Build.SourcesDirectory)\csharp'
+
+        - task: PowerShell@2
+          displayName: Build .NET 6 targets using dotnet
+          inputs:
+            targetType: 'inline'
+            # we don't specify 'Any CPU' as the platform here because if we do it gets added to the output path
+            #   e.g. csharp\src\Microsoft.ML.OnnxRuntime\bin\Any CPU\RelWithDebInfo\net6.0-ios\
+            # which is inconsistent with the msbuild output path for the pre-.net6 targets
+            #   e.g. csharp\src\Microsoft.ML.OnnxRuntime\bin\RelWithDebInfo\monoandroid11.0
+            # and makes it harder to do the packing
+            #
+            # 'Any CPU' is the default (first 'mixed' platform specified in the csproj) so this should be fine.
+            script: |
+              dotnet build .\src\Microsoft.ML.OnnxRuntime\Microsoft.ML.OnnxRuntime.csproj -p:SelectedTargets=Net6 -p:Configuration=RelWithDebInfo -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId="Microsoft.ML.OnnxRuntime.Gpu" -p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} -p:ReleaseVersionSuffix=$(ReleaseVersionSuffix)
+            workingDirectory: '$(Build.SourcesDirectory)\csharp'
+
+        - task: MSBuild@1
+          displayName: 'Restore NuGet Packages and create project.assets.json for pre-.net6 targets'
+          inputs:
+            solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln'
+            platform: 'Any CPU'
+            configuration: RelWithDebInfo
+            msbuildArguments: '-t:restore -p:SelectedTargets=PreNet6 -p:OrtPackageId="Microsoft.ML.OnnxRuntime.Gpu"'
+            workingDirectory: '$(Build.SourcesDirectory)\csharp'
+
+        - task: MSBuild@1
+          displayName: 'Build C# for pre-.net6 targets'
+          inputs:
+            solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln'
+            configuration: RelWithDebInfo
+            platform: 'Any CPU'
+            msbuildArguments: '-p:SelectedTargets=PreNet6 -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId="Microsoft.ML.OnnxRuntime.Gpu" -p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} -p:ReleaseVersionSuffix=$(ReleaseVersionSuffix)'
+            workingDirectory: '$(Build.SourcesDirectory)\csharp'
+
+        - template: ../templates/win-esrp-dll.yml
+          parameters:
+            FolderPath: '$(Build.SourcesDirectory)\csharp\src\Microsoft.ML.OnnxRuntime\bin\RelWithDebInfo'
+            DisplayName: 'ESRP - Sign C# dlls'
+            DoEsrp: ${{ parameters.DoEsrp }}
+
+        - task: MSBuild@1
+          displayName: Update projects.assets.json with combined list of all target frameworks
+          inputs:
+            solution: '$(Build.SourcesDirectory)\csharp\src\Microsoft.ML.OnnxRuntime\Microsoft.ML.OnnxRuntime.csproj'
+            platform: 'Any CPU'
+            configuration: RelWithDebInfo
+            msbuildArguments: '-t:restore -p:SelectedTargets=All -p:OrtPackageId=Microsoft.ML.OnnxRuntime.Gpu'
+            workingDirectory: '$(Build.SourcesDirectory)\csharp'
+
+        - task: MSBuild@1
+          displayName: 'Build Nuget Packages'
+          inputs:
+            solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.proj'
+            configuration: RelWithDebInfo
+            platform: 'Any CPU'
+            msbuildArguments: '-t:CreatePackage -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=Microsoft.ML.OnnxRuntime.Gpu -p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} -p:ReleaseVersionSuffix=$(ReleaseVersionSuffix)'
+            workingDirectory: '$(Build.SourcesDirectory)\csharp'
+
+        - task: BatchScript@1
+          displayName: 'Add TensorRT header file to the native nuGet package'
+          inputs:
+            filename: $(Build.SourcesDirectory)\tools\ci_build\github\windows\bundle_nuget_with_native_headers.bat
+            workingFolder: $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo
+
+        - task: CopyFiles@2
+          displayName: 'Copy nuget packages to: $(Build.ArtifactStagingDirectory)'
+          inputs:
+            SourceFolder: '$(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo'
+            Contents: '*.snupkg'
+            TargetFolder: '$(Build.ArtifactStagingDirectory)'
+
+        - task: CopyFiles@2
+          displayName: 'Copy nuget packages to: $(Build.ArtifactStagingDirectory)'
+          inputs:
+            SourceFolder: '$(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo'
+            Contents: '*.nupkg'
+            TargetFolder: '$(Build.ArtifactStagingDirectory)'
+
+        - task: CopyFiles@2
+          displayName: 'Copy nuget packages to: $(Build.ArtifactStagingDirectory)'
+          inputs:
+            SourceFolder: '$(Build.SourcesDirectory)\csharp\src\Microsoft.ML.OnnxRuntime\bin\RelWithDebInfo'
+            Contents: '*.nupkg'
+            TargetFolder: '$(Build.ArtifactStagingDirectory)'
+
+        - template: ../templates/esrp_nuget.yml
+          parameters:
+            DisplayName: 'ESRP - sign NuGet package'
+            FolderPath: '$(Build.ArtifactStagingDirectory)'
+            DoEsrp: ${{ parameters.DoEsrp }}
+
+        - template: ../templates/validate-package.yml
+          parameters:
+            PackageType: 'nuget'
+            PackagePath: '$(Build.ArtifactStagingDirectory)'
+            PackageName: 'Microsoft.ML.OnnxRuntime.*nupkg'
+            PlatformsSupported: 'win-x64,linux-x64'
+            VerifyNugetSigning: false
+
+        - task: PublishPipelineArtifact@0
+          displayName: 'Publish Pipeline NuGet Artifact'
+          inputs:
+            artifactName: 'drop-signed-nuget-GPU'
+            targetPath: '$(Build.ArtifactStagingDirectory)'
+
+
+        - task: MSBuild@1
+          displayName: 'Clean C#'
+          inputs:
+            solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln'
+            platform: 'Any CPU'
+            configuration: RelWithDebInfo
+            msbuildArguments: '-t:Clean -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=Microsoft.ML.OnnxRuntime.Gpu'
+            workingDirectory: '$(Build.SourcesDirectory)\csharp'
+
+
+        - task: RoslynAnalyzers@2
+          displayName: 'Run Roslyn Analyzers'
+          inputs:
+            userProvideBuildInfo: msBuildInfo
+            msBuildCommandline: '"C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Current\Bin\msbuild.exe" $(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln -p:configuration="RelWithDebInfo" -p:Platform="Any CPU" -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=Microsoft.ML.OnnxRuntime.Gpu'
+          condition: and(succeeded(), eq('${{ parameters.DoCompliance }}', true))
+
+        - template: ../templates/component-governance-component-detection-steps.yml
+          parameters:
+            condition: 'succeeded'
+
+        - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
+          displayName: 'Clean Agent Directories'
+          condition: always()
diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-linux-cuda-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-linux-cuda-packaging-stage.yml
new file mode 100644
index 0000000000000..140a377ca72a3
--- /dev/null
+++ b/tools/ci_build/github/azure-pipelines/stages/nuget-linux-cuda-packaging-stage.yml
@@ -0,0 +1,161 @@
+parameters:
+- name: CudaVersion
+  type: string
+  default: '11.8'
+- name: docker_base_image
+  type: string
+- name: linux_trt_version
+  type: string
+
+stages:
+  # Linux CUDA without TensorRT Packaging
+- stage: Linux_C_API_Packaging_GPU_x64
+  dependsOn: []
+  jobs:
+  - job:
+    workspace:
+      clean: all
+    timeoutInMinutes: 120
+    pool: 'Onnxruntime-Linux-GPU'
+    variables:
+      - name: CUDA_VERSION_MAJOR
+        ${{ if eq(parameters.CudaVersion, '11.8') }}:
+          value: '11'
+        ${{ if eq(parameters.CudaVersion, '12.2') }}:
+          value: '12'
+      - name: CUDA_VERSION
+        value: ${{ parameters.CudaVersion }}
+    steps:
+    - template: ../templates/set-version-number-variables-step.yml
+    - template: ../templates/get-docker-image-steps.yml
+      parameters:
+        Dockerfile: tools/ci_build/github/linux/docker/inference/x64/default/gpu/Dockerfile
+        Context: tools/ci_build/github/linux/docker/inference/x64/default/gpu
+        DockerBuildArgs: "
+        --build-arg BUILD_UID=$( id -u )
+        --build-arg BASEIMAGE=${{ parameters.docker_base_image }}
+        "
+        Repository: onnxruntimecuda${{ variables.CUDA_VERSION_MAJOR }}build
+
+    - script: $(Build.SourcesDirectory)/tools/ci_build/github/linux/build_cuda_c_api_package.sh
+      workingDirectory: $(Build.SourcesDirectory)
+      displayName: 'Build and Test'
+
+    - template: ../templates/c-api-artifacts-package-and-publish-steps-posix.yml
+      parameters:
+        buildConfig: 'Release'
+        artifactName: 'onnxruntime-linux-x64-cuda-$(OnnxRuntimeVersion)'
+        artifactNameNoVersionString: 'onnxruntime-linux-x64-cuda'
+        libraryName: 'libonnxruntime.so.$(OnnxRuntimeVersion)'
+
+    - template: ../templates/component-governance-component-detection-steps.yml
+      parameters:
+        condition: 'succeeded'
+    - template: ../templates/clean-agent-build-directory-step.yml
+# Linux CUDA with TensorRT Packaging
+- template: ../templates/linux-gpu-tensorrt-packaging-pipeline.yml
+  parameters:
+    artifactName: 'onnxruntime-linux-x64-tensorrt-$(OnnxRuntimeVersion)'
+    artifactNameNoVersionString: 'onnxruntime-linux-x64-tensorrt'
+    buildJava: false
+    buildJavaOption: '--build_java'
+    buildNodejs: false
+    buildNodejsOption: '--build_nodejs'
+    CudaVersion: ${{ parameters.CudaVersion }}
+# Linux CUDA Combined Testing and Publishing
+- stage: Linux_Packaging_combined_GPU
+  dependsOn:
+    - Linux_C_API_Packaging_GPU_x64
+    - Linux_C_API_Packaging_GPU_TensorRT_x64
+  condition: succeeded()
+  jobs:
+    - job:
+      workspace:
+        clean: all
+      pool: 'Onnxruntime-Linux-GPU'
+
+      steps:
+        - checkout: self                           # due to checkout multiple repos, the root directory is $(Build.SourcesDirectory)/onnxruntime
+          submodules: false
+        - checkout: onnxruntime-inference-examples # due to checkout multiple repos, the root directory is $(Build.SourcesDirectory)/onnxruntime-inference-examples
+          submodules: false
+        - checkout: manylinux                      # due to checkout multiple repos, the root directory is $(Build.SourcesDirectory)/manylinux
+          submodules: false
+
+        - script: |
+            set -e -x
+            cd $(Build.SourcesDirectory)
+            mv manylinux onnxruntime
+            ls
+
+        - template: ../templates/with-container-registry-steps.yml
+          parameters:
+            Steps:
+              - script: |
+                  tools/ci_build/get_docker_image.py \
+                    --dockerfile tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda \
+                    --context tools/ci_build/github/linux/docker \
+                    --docker-build-args "--network=host --build-arg BASEIMAGE=${{ parameters.docker_base_image }} --build-arg TRT_VERSION=${{ parameters.linux_trt_version }} --build-arg BUILD_UID=$( id -u )" \
+                    --container-registry onnxruntimebuildcache \
+                    --multiple_repos \
+                    --repository onnxruntimecuda${{ variables.CUDA_VERSION_MAJOR }}xtrt86build
+                displayName: "Get onnxruntimecuda${{ variables.CUDA_VERSION_MAJOR }}xtrt86build image for tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda"
+                workingDirectory: $(Build.SourcesDirectory)/onnxruntime
+            ContainerRegistry: onnxruntimebuildcache
+
+        - template: ../templates/set-version-number-variables-step.yml
+          parameters:
+            versionFileDirectory: '$(Build.SourcesDirectory)/onnxruntime'
+            workingDirectory: '$(Build.SourcesDirectory)/onnxruntime'
+        - task: DownloadPipelineArtifact@2
+          displayName: 'Download Pipeline Artifact - Combined GPU'
+          inputs:
+            artifactName: 'onnxruntime-linux-x64-cuda'
+            targetPath: '$(Build.BinariesDirectory)/tgz-artifacts'
+
+        - task: DownloadPipelineArtifact@2
+          displayName: 'Download Pipeline Artifact - Combined GPU'
+          inputs:
+            artifactName: 'onnxruntime-linux-x64-tensorrt'
+            targetPath: '$(Build.BinariesDirectory)/tgz-artifacts'
+
+        - task: ShellScript@2
+          displayName: 'Shell Script'
+          inputs:
+            scriptPath: 'onnxruntime/tools/ci_build/github/linux/extract_and_bundle_gpu_package.sh'
+            args: '-a $(Build.BinariesDirectory)/tgz-artifacts'
+            workingDirectory: '$(Build.BinariesDirectory)/tgz-artifacts'
+
+        - task: ArchiveFiles@2
+          inputs:
+            rootFolderOrFile: '$(Build.BinariesDirectory)/tgz-artifacts/onnxruntime-linux-x64-gpu'
+            includeRootFolder: false
+            archiveType: 'tar' # Options: zip, 7z, tar, wim
+            tarCompression: 'gz'
+            archiveFile: '$(Build.ArtifactStagingDirectory)/onnxruntime-linux-x64-gpu-$(OnnxRuntimeVersion).tgz'
+            replaceExistingArchive: true
+
+        - template: ../templates/validate-package.yml
+          parameters:
+            PackageType: 'tarball'
+            PackagePath: '$(Build.ArtifactStagingDirectory)'
+            PackageName: 'onnxruntime-linux-x64-gpu-$(OnnxRuntimeVersion).tgz'
+            ScriptPath: '$(Build.SourcesDirectory)/onnxruntime/tools/nuget/validate_package.py'
+            PlatformsSupported: 'linux-x64'
+            VerifyNugetSigning: false
+            workingDirectory: '$(Build.ArtifactStagingDirectory)'
+
+
+        - task: CmdLine@2
+          displayName: 'Test C API application for GPU package'
+          inputs:
+            script: |
+              docker run --gpus all -e CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" -e CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" -e NVIDIA_VISIBLE_DEVICES=all --rm --volume $(Build.SourcesDirectory):/src_dir \
+              --volume $(Build.ArtifactStagingDirectory):/artifact_src -e NIGHTLY_BUILD onnxruntimecuda${{ variables.CUDA_VERSION_MAJOR }}xtrt86build \
+              /src_dir/onnxruntime-inference-examples/c_cxx/squeezenet/run_capi_application.sh -o /src_dir/onnxruntime -p /artifact_src/onnxruntime-linux-x64-gpu-$(OnnxRuntimeVersion).tgz -w /src_dir/onnxruntime-inference-examples/c_cxx/squeezenet
+            workingDirectory: '$(Build.ArtifactStagingDirectory)'
+
+        - task: PublishPipelineArtifact@1
+          inputs:
+            targetPath: '$(Build.ArtifactStagingDirectory)/onnxruntime-linux-x64-gpu-$(OnnxRuntimeVersion).tgz'
+            artifactName: 'onnxruntime-linux-x64-gpu'
diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml
new file mode 100644
index 0000000000000..3fb653c6b4405
--- /dev/null
+++ b/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml
@@ -0,0 +1,147 @@
+parameters:
+- name: RunOnnxRuntimeTests
+  type: boolean
+  default: true
+
+- name: UseIncreasedTimeoutForTests
+  type: boolean
+  default: false
+
+- name: DoCompliance
+  type: boolean
+  default: true
+
+- name: DoEsrp
+  type: boolean
+  default: true
+
+- name: CudaVersion
+  type: string
+  default: '11.8'
+- name: win_cuda_home
+  type: string
+- name: win_trt_home
+  type: string
+
+stages:
+# Windows CUDA without TensorRT Packaging
+- template: ../templates/win-ci.yml
+  parameters:
+    ort_build_pool_name: 'onnxruntime-Win2022-GPU-T4'
+    DoCompliance: ${{ parameters.DoCompliance }}
+    DoEsrp: ${{ parameters.DoEsrp }}
+    stage_name_suffix: gpu
+    buildArch: x64
+    msbuildPlatform: x64
+    packageName: x64-cuda
+    CudaVersion: ${{ parameters.CudaVersion }}
+    buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80"
+    runTests: ${{ parameters.RunOnnxRuntimeTests }}
+    buildJava: false
+    java_artifact_id: onnxruntime_gpu
+    PublishProtoc: true
+# Windows CUDA with TensorRT Packaging
+- template: ../templates/win-ci.yml
+  parameters:
+    ort_build_pool_name: 'onnxruntime-Win2022-GPU-T4'
+    DoCompliance: ${{ parameters.DoCompliance }}
+    DoEsrp: ${{ parameters.DoEsrp }}
+    stage_name_suffix: tensorrt
+    buildArch: x64
+    msbuildPlatform: x64
+    CudaVersion: ${{ parameters.CudaVersion }}
+    packageName: x64-tensorrt
+    buildparameter: --use_tensorrt --tensorrt_home=${{ parameters.win_trt_home }} --cuda_home=${{ parameters.win_cuda_home }}  --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80"
+    runTests: ${{ parameters.RunOnnxRuntimeTests }}
+    buildJava: false
+    java_artifact_id: onnxruntime_gpu
+    UseIncreasedTimeoutForTests: ${{ parameters.UseIncreasedTimeoutForTests }}
+
+# Windows CUDA Combined Testing and Publishing
+- stage: Windows_Packaging_combined_GPU
+  dependsOn:
+    - Windows_Packaging_gpu
+    - Windows_Packaging_tensorrt
+  condition: succeeded()
+
+  jobs:
+    - job:
+      workspace:
+        clean: all
+      pool: 'onnxruntime-Win2022-GPU-T4'
+      variables:
+        CUDA_MODULE_LOADINGL: 'LAZY'
+        GRADLE_OPTS: '-Dorg.gradle.daemon=false'
+      steps:
+        - checkout: self                           # due to checkout multiple repos, the root directory is $(Build.SourcesDirectory)/onnxruntime
+        - checkout: onnxruntime-inference-examples # due to checkout multiple repos, the root directory is $(Build.SourcesDirectory)/onnxruntime-inference-examples
+          submodules: false
+        - script: dir $(Build.SourcesDirectory)
+        - template: ../templates/jobs/download_win_gpu_library.yml
+          parameters:
+            DownloadCUDA: true
+            DownloadTRT: true
+            CudaVersion: ${{ parameters.CudaVersion }}
+
+        - template: ../templates/set-version-number-variables-step.yml
+          parameters:
+            versionFileDirectory: '$(Build.SourcesDirectory)\onnxruntime'
+            workingDirectory: '$(Build.SourcesDirectory)\onnxruntime'
+        - task: DownloadPipelineArtifact@2
+          displayName: 'Download Pipeline Artifact - onnxruntime-win-x64-cuda'
+          inputs:
+            artifactName: 'onnxruntime-win-x64-cuda'
+            targetPath: '$(Build.BinariesDirectory)/zip-artifacts'
+
+        - task: DownloadPipelineArtifact@2
+          displayName: 'Download Pipeline Artifact - onnxruntime-win-x64-tensorrt'
+          inputs:
+            artifactName: 'onnxruntime-win-x64-tensorrt'
+            targetPath: '$(Build.BinariesDirectory)/zip-artifacts'
+
+        - task: PowerShell@2
+          displayName: 'PowerShell Script'
+          inputs:
+            targetType: filePath
+            filePath: $(Build.SourcesDirectory)\onnxruntime\tools\ci_build\github\windows\extract_zip_files_gpu.ps1
+
+        - script: |
+            dir
+          workingDirectory: '$(Build.BinariesDirectory)/zip-artifacts'
+          displayName: 'List artifacts'
+
+        - task: BatchScript@1
+          displayName: 'Bundle CUDA/TRT EP binaries'
+          inputs:
+            filename: $(Build.SourcesDirectory)\onnxruntime\tools\ci_build\github\windows\bundle_dlls_gpu.bat
+            workingFolder: $(Build.BinariesDirectory)\zip-artifacts
+
+        - task: CopyFiles@2
+          displayName: 'Copy zip file to: $(Build.ArtifactStagingDirectory)'
+          inputs:
+            SourceFolder: '$(Build.BinariesDirectory)\zip-artifacts'
+            Contents: 'onnxruntime-win-x64-gpu-*.zip'
+            TargetFolder: '$(Build.ArtifactStagingDirectory)'
+
+        - template: ../templates/validate-package.yml
+          parameters:
+            PackageType: 'zip'
+            PackagePath: '$(Build.ArtifactStagingDirectory)'
+            PackageName: 'onnxruntime-win-x64-gpu-$(OnnxRuntimeVersion).zip'
+            ScriptPath: '$(Build.SourcesDirectory)\onnxruntime\tools\nuget\validate_package.py'
+            PlatformsSupported: 'win-x64'
+            VerifyNugetSigning: false
+            workingDirectory: '$(Build.ArtifactStagingDirectory)'
+
+        - task: BatchScript@1
+          displayName: 'Test C API application for GPU package'
+          inputs:
+            filename: $(Build.SourcesDirectory)\onnxruntime-inference-examples\c_cxx\squeezenet\run_capi_application.bat
+            arguments: $(Build.SourcesDirectory)\onnxruntime $(Build.ArtifactStagingDirectory)\onnxruntime-win-x64-gpu-$(OnnxRuntimeVersion).zip $(Build.SourcesDirectory)\onnxruntime-inference-examples\c_cxx\squeezenet
+            workingFolder: '$(Build.ArtifactStagingDirectory)'
+
+        - task: PublishPipelineArtifact@0
+          displayName: 'Publish Pipeline Combined GPU Package Artifact'
+          inputs:
+            artifactName: 'onnxruntime-win-x64-gpu'
+            targetPath: '$(Build.ArtifactStagingDirectory)'
\ No newline at end of file
diff --git a/tools/ci_build/github/azure-pipelines/stages/py-cuda-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/py-cuda-packaging-stage.yml
new file mode 100644
index 0000000000000..f3d68957d649c
--- /dev/null
+++ b/tools/ci_build/github/azure-pipelines/stages/py-cuda-packaging-stage.yml
@@ -0,0 +1,105 @@
+parameters:
+- name: build_py_parameters
+  displayName: >
+    Extra parameters to pass to build.py. Don't put newlines in here.
+  type: string
+  default: ''
+
+- name: enable_linux_gpu
+  displayName: 'Whether Linux GPU package is built.'
+  type: boolean
+  default: true
+
+- name: enable_windows_gpu
+  displayName: 'Whether Windows GPU package is built.'
+  type: boolean
+  default: true
+
+# TODO: Now the Windows jobs use a different cmake build type. Consider to merge it.
+- name: cmake_build_type
+  type: string
+  displayName: 'Linux packages cmake build type. Linux Only.'
+  default: 'Release'
+  values:
+   - Debug
+   - Release
+   - RelWithDebInfo
+   - MinSizeRel
+
+- name: cuda_version
+  type: string
+  displayName: 'CUDA version. Windows Only.'
+  default: '12.2'
+  values:
+   - 11.8
+   - 12.2
+
+stages:
+- stage: Python_Packaging
+  dependsOn: []
+  variables:
+  - name: docker_base_image
+    ${{ if eq(parameters.cuda_version, '11.8') }}:
+      value: nvidia/cuda:11.8.0-cudnn8-devel-ubi8
+    ${{ if eq(parameters.cuda_version, '12.2') }}:
+      value: nvidia/cuda:12.2.2-cudnn8-devel-ubi8
+  - name: linux_trt_version
+    ${{ if eq(parameters.cuda_version, '11.8') }}:
+      value: 8.6.1.6-1.cuda11.8
+    ${{ if eq(parameters.cuda_version, '12.2') }}:
+      value: 8.6.1.6-1.cuda12.0
+  - name: win_trt_home
+    ${{ if eq(parameters.cuda_version, '11.8') }}:
+      value: $(Agent.TempDirectory)\TensorRT-8.6.1.6.Windows10.x86_64.cuda-11.8
+    ${{ if eq(parameters.cuda_version, '12.2') }}:
+      value: $(Agent.TempDirectory)\TensorRT-8.6.1.6.Windows10.x86_64.cuda-12.0
+  - name: win_cuda_home
+    ${{ if eq(parameters.cuda_version, '11.8') }}:
+      value: $(Agent.TempDirectory)\v11.8
+    ${{ if eq(parameters.cuda_version, '12.2') }}:
+      value: $(Agent.TempDirectory)\v12.2
+  jobs:
+  - ${{ if eq(parameters.enable_windows_gpu, true) }}:
+      - template: ../templates/py-win-gpu.yml
+        parameters:
+          MACHINE_POOL: 'onnxruntime-Win2022-GPU-T4'
+          PYTHON_VERSION: '3.8'
+          EP_BUILD_FLAGS: --use_tensorrt --tensorrt_home=${{ variables.win_trt_home }} --cuda_home=${{ variables.win_cuda_home }}  --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80"
+          EP_NAME: gpu
+          CudaVersion: ${{ parameters.cuda_version }}
+
+      - template: ../templates/py-win-gpu.yml
+        parameters:
+          MACHINE_POOL: 'onnxruntime-Win2022-GPU-T4'
+          PYTHON_VERSION: '3.9'
+          EP_BUILD_FLAGS: --use_tensorrt --tensorrt_home=${{ variables.win_trt_home }} --cuda_home=${{ variables.win_cuda_home }}  --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80"
+          EP_NAME: gpu
+          CudaVersion: ${{ parameters.cuda_version }}
+
+      - template: ../templates/py-win-gpu.yml
+        parameters:
+          MACHINE_POOL: 'onnxruntime-Win2022-GPU-T4'
+          PYTHON_VERSION: '3.10'
+          EP_BUILD_FLAGS: --use_tensorrt --tensorrt_home=${{ variables.win_trt_home }} --cuda_home=${{ variables.win_cuda_home }}  --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80"
+          EP_NAME: gpu
+          CudaVersion: ${{ parameters.cuda_version }}
+
+      - template: ../templates/py-win-gpu.yml
+        parameters:
+          MACHINE_POOL: 'onnxruntime-Win2022-GPU-T4'
+          PYTHON_VERSION: '3.11'
+          EP_BUILD_FLAGS: --use_tensorrt --tensorrt_home=${{ variables.win_trt_home }} --cuda_home=${{ variables.win_cuda_home }}  --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80"
+          EP_NAME: gpu
+          CudaVersion: ${{ parameters.cuda_version }}
+
+
+  - ${{ if eq(parameters.enable_linux_gpu, true) }}:
+      - template: ../templates/py-linux-gpu.yml
+        parameters:
+          arch: 'x86_64'
+          machine_pool: 'onnxruntime-Ubuntu2004-AMD-CPU'
+          extra_build_arg: ${{ parameters.build_py_parameters }}
+          cmake_build_type: ${{ parameters.cmake_build_type }}
+          docker_base_image: ${{ variables.docker_base_image }}
+          trt_version: ${{ variables.linux_trt_version }}
+          cuda_version: ${{ parameters.cuda_version }}
diff --git a/tools/ci_build/github/azure-pipelines/templates/android-java-api-aar-test.yml b/tools/ci_build/github/azure-pipelines/templates/android-java-api-aar-test.yml
index 1263b21d4a03e..41322c6ce3c2b 100644
--- a/tools/ci_build/github/azure-pipelines/templates/android-java-api-aar-test.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/android-java-api-aar-test.yml
@@ -50,13 +50,10 @@ jobs:
 
   - template: install-appcenter.yml
 
-  - script: |
-      python3 $(Build.SourcesDirectory)/tools/python/run_android_emulator.py \
-        --android-sdk-root ${ANDROID_SDK_ROOT} \
-        --create-avd --system-image "system-images;android-31;default;x86_64" \
-        --start --emulator-extra-args="-partition-size 4096" \
-        --emulator-pid-file $(Build.BinariesDirectory)/emulator.pid
-    displayName: Start Android emulator
+  - template: use-android-emulator.yml
+    parameters:
+      create: true
+      start: true
 
   - script: |
       set -e -x
@@ -70,6 +67,10 @@ jobs:
     displayName: Run E2E test using Emulator
     workingDirectory: $(Build.BinariesDirectory)
 
+  - template: use-android-emulator.yml
+    parameters:
+      stop: true
+
   - script: |
       set -e -x
       cd android_test/android
@@ -84,14 +85,6 @@ jobs:
     displayName: Run E2E tests using App Center
     workingDirectory: $(Build.BinariesDirectory)
 
-  - script: |
-      python3 $(Build.SourcesDirectory)/tools/python/run_android_emulator.py \
-        --android-sdk-root ${ANDROID_SDK_ROOT} \
-        --stop \
-        --emulator-pid-file $(Build.BinariesDirectory)/emulator.pid
-    displayName: Stop Android emulator
-    condition: always()
-
   - template: component-governance-component-detection-steps.yml
     parameters :
       condition : 'succeeded'
diff --git a/tools/ci_build/github/azure-pipelines/templates/build-linux-wasm-step.yml b/tools/ci_build/github/azure-pipelines/templates/build-linux-wasm-step.yml
index 82a86e2ec8018..e664cf69dec76 100644
--- a/tools/ci_build/github/azure-pipelines/templates/build-linux-wasm-step.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/build-linux-wasm-step.yml
@@ -38,6 +38,7 @@ steps:
 
   - ${{if eq(parameters.WithCache, true)}}:
     - script: |
+        set -e -x
         pushd '$(Build.SourcesDirectory)/cmake/external/emsdk'
         source ./emsdk_env.sh
         export PATH=$(Build.SourcesDirectory)/cmake/external/emsdk/:$PATH
@@ -66,9 +67,9 @@ steps:
         EM_DIR: '$(Build.SourcesDirectory)/cmake/external/emsdk/upstream/emscripten'
 
   - ${{if eq(parameters.WithCache, false)}}:
-    - task: PythonScript@0
-      displayName: '${{parameters.DisplayName}}'
-      inputs:
-        scriptPath: '$(Build.SourcesDirectory)/tools/ci_build/build.py'
-        arguments: ${{parameters.Arguments}}
-        workingDirectory: '$(Build.BinariesDirectory)'
+    - script: |
+        set -e -x
+        source $(Build.SourcesDirectory)/cmake/external/emsdk/emsdk_env.sh
+        cd '$(Build.BinariesDirectory)'
+        python3 '$(Build.SourcesDirectory)/tools/ci_build/build.py' ${{parameters.Arguments}}
+      displayName: ${{parameters.DisplayName}}
diff --git a/tools/ci_build/github/azure-pipelines/templates/c-api-artifacts-package-and-publish-steps-windows.yml b/tools/ci_build/github/azure-pipelines/templates/c-api-artifacts-package-and-publish-steps-windows.yml
index 07aac08dac0b1..5ee425405ac70 100644
--- a/tools/ci_build/github/azure-pipelines/templates/c-api-artifacts-package-and-publish-steps-windows.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/c-api-artifacts-package-and-publish-steps-windows.yml
@@ -39,6 +39,15 @@ steps:
           mkdir $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib
           mkdir $(Build.BinariesDirectory)\${{parameters.artifactName}}\include
 
+          if exist $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\onnxruntime_providers_cuda.dll (
+            echo "cuda context headers copied"
+            mkdir $(Build.BinariesDirectory)\${{parameters.artifactName}}\include\core\providers\cuda
+            copy $(Build.SourcesDirectory)\include\onnxruntime\core\providers\resource.h $(Build.BinariesDirectory)\${{parameters.artifactName}}\include\core\providers
+            copy $(Build.SourcesDirectory)\include\onnxruntime\core\providers\custom_op_context.h $(Build.BinariesDirectory)\${{parameters.artifactName}}\include\core\providers
+            copy $(Build.SourcesDirectory)\include\onnxruntime\core\providers\cuda\cuda_context.h $(Build.BinariesDirectory)\${{parameters.artifactName}}\include\core\providers\cuda
+            copy $(Build.SourcesDirectory)\include\onnxruntime\core\providers\cuda\cuda_resource.h $(Build.BinariesDirectory)\${{parameters.artifactName}}\include\core\providers\cuda
+          )
+
           echo "Directories created"
           copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\onnxruntime.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib
           copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\onnxruntime_providers_shared.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib
@@ -58,7 +67,6 @@ steps:
           copy $(Build.SourcesDirectory)\include\onnxruntime\core\session\onnxruntime_*.h  $(Build.BinariesDirectory)\${{parameters.artifactName}}\include
           copy $(Build.SourcesDirectory)\include\onnxruntime\core\framework\provider_options.h  $(Build.BinariesDirectory)\${{parameters.artifactName}}\include
           copy $(Build.SourcesDirectory)\include\onnxruntime\core\providers\cpu\cpu_provider_factory.h  $(Build.BinariesDirectory)\${{parameters.artifactName}}\include
-          copy $(Build.SourcesDirectory)\include\onnxruntime\core\providers\tensorrt\tensorrt_provider_factory.h  $(Build.BinariesDirectory)\${{parameters.artifactName}}\include
           copy $(Build.SourcesDirectory)\orttraining\orttraining\training_api\include\onnxruntime_training*.h  $(Build.BinariesDirectory)\${{parameters.artifactName}}\include
 
           REM copy the README, license and TPN
diff --git a/tools/ci_build/github/azure-pipelines/templates/c-api-cpu.yml b/tools/ci_build/github/azure-pipelines/templates/c-api-cpu.yml
index 21cd3a44e8924..87fd4de7d3127 100644
--- a/tools/ci_build/github/azure-pipelines/templates/c-api-cpu.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/c-api-cpu.yml
@@ -117,32 +117,32 @@ stages:
 
     - script: |
         set -e -x
-        python3 tools/ci_build/github/apple/build_ios_framework.py \
-          --build_dir "$(Build.BinariesDirectory)/ios_framework" \
+        python3 tools/ci_build/github/apple/build_apple_framework.py \
+          --build_dir "$(Build.BinariesDirectory)/apple_framework" \
           --path_to_protoc_exe $(Build.BinariesDirectory)/protobuf_install/bin/protoc \
-          tools/ci_build/github/apple/default_full_ios_framework_build_settings.json
+          tools/ci_build/github/apple/default_full_apple_framework_build_settings.json
         mkdir $(Build.BinariesDirectory)/artifacts
-        mkdir -p $(Build.BinariesDirectory)/artifacts_staging/onnxruntime-ios-xcframework-$(OnnxRuntimeVersion)
-        cp -R $(Build.BinariesDirectory)/ios_framework/framework_out/onnxruntime.xcframework \
-          $(Build.BinariesDirectory)/artifacts_staging/onnxruntime-ios-xcframework-$(OnnxRuntimeVersion)
+        mkdir -p $(Build.BinariesDirectory)/artifacts_staging/onnxruntime-apple-xcframework-$(OnnxRuntimeVersion)
+        cp -R $(Build.BinariesDirectory)/apple_framework/framework_out/onnxruntime.xcframework \
+          $(Build.BinariesDirectory)/artifacts_staging/onnxruntime-apple-xcframework-$(OnnxRuntimeVersion)
         pushd $(Build.BinariesDirectory)/artifacts_staging
         zip -vr $(Build.BinariesDirectory)/artifacts/onnxruntime_xcframework.zip \
-          onnxruntime-ios-xcframework-$(OnnxRuntimeVersion)
+          onnxruntime-apple-xcframework-$(OnnxRuntimeVersion)
         popd
-      displayName: "Build iOS xcframework"
+      displayName: "Build Apple xcframework"
 
     - script: |
-        python3 tools/ci_build/github/apple/test_ios_packages.py \
+        python3 tools/ci_build/github/apple/test_apple_packages.py \
           --fail_if_cocoapods_missing \
-          --framework_info_file "$(Build.BinariesDirectory)/ios_framework/framework_info.json" \
-          --c_framework_dir "$(Build.BinariesDirectory)/ios_framework/framework_out" \
+          --framework_info_file "$(Build.BinariesDirectory)/apple_framework/xcframework_info.json" \
+          --c_framework_dir "$(Build.BinariesDirectory)/apple_framework/framework_out" \
           --variant Full
-      displayName: "Test iOS framework"
+      displayName: "Test Apple framework"
 
     - task: PublishBuildArtifacts@1
       inputs:
         pathtoPublish: '$(Build.BinariesDirectory)/artifacts'
-        artifactName: 'onnxruntime-ios-full-xcframework'
+        artifactName: 'onnxruntime-apple-full-xcframework'
 
     - template: component-governance-component-detection-steps.yml
       parameters:
@@ -304,9 +304,7 @@ stages:
   - job:
     workspace:
       clean: all
-    # we need to use the 2022 pool to create the nuget package with both pre-net6+Xamarin and net6 targets.
-    # VS2019 has no support for net6 and we need to use msbuild (from the VS install) to do the packing
-    pool: 'Azure-Pipelines-EO-Windows2022-aiinfra'
+    pool: 'onnxruntime-Win-CPU-2022'
     variables:
       OrtPackageId: ${{ parameters.OrtNugetPackageId }}
       breakCodesignValidationInjection: ${{ parameters.DoEsrp }}
@@ -315,66 +313,86 @@ stages:
     steps:
     - checkout: self
       submodules: true
-    - task: DownloadPipelineArtifact@0
-      displayName: 'Download win-x64 Pipeline Artifact'
-      inputs:
-        artifactName: 'onnxruntime-win-x64'
-        targetPath: '$(Build.BinariesDirectory)/nuget-artifact'
 
-    - task: DownloadPipelineArtifact@0
-      displayName: 'Download win-x86 Pipeline Artifact'
-      inputs:
-        artifactName: 'onnxruntime-win-x86'
-        targetPath: '$(Build.BinariesDirectory)/nuget-artifact'
+    - template: flex-downloadPipelineArtifact.yml
+      parameters:
+        StepName: 'Download Pipeline Artifact - Win x64'
+        ArtifactName: 'onnxruntime-win-x64'
+        TargetPath: '$(Build.BinariesDirectory)/nuget-artifact'
+        SpecificArtifact: ${{ parameters.specificArtifact }}
+        BuildId: ${{ parameters.BuildId }}
 
-    - task: DownloadPipelineArtifact@0
-      displayName: 'Download win-arm64 Pipeline Artifact'
-      inputs:
-        artifactName: 'onnxruntime-win-arm64'
-        targetPath: '$(Build.BinariesDirectory)/nuget-artifact'
+    - template: flex-downloadPipelineArtifact.yml
+      parameters:
+        StepName: 'Download win-x86 Pipeline Artifact'
+        ArtifactName: 'onnxruntime-win-x86'
+        TargetPath: '$(Build.BinariesDirectory)/nuget-artifact'
+        SpecificArtifact: ${{ parameters.specificArtifact }}
+        BuildId: ${{ parameters.BuildId }}
 
-    - task: DownloadPipelineArtifact@0
-      displayName: 'Download win-arm Pipeline Artifact'
-      inputs:
-        artifactName: 'onnxruntime-win-arm'
-        targetPath: '$(Build.BinariesDirectory)/nuget-artifact'
+    - template: flex-downloadPipelineArtifact.yml
+      parameters:
+        StepName: 'Download win-arm64 Pipeline Artifact'
+        ArtifactName: 'onnxruntime-win-arm64'
+        TargetPath: '$(Build.BinariesDirectory)/nuget-artifact'
+        SpecificArtifact: ${{ parameters.specificArtifact }}
+        BuildId: ${{ parameters.BuildId }}
 
-    - task: DownloadPipelineArtifact@0
-      displayName: 'Download osx-x64 Pipeline Artifact'
-      inputs:
-        artifactName: 'onnxruntime-osx'
-        targetPath: '$(Build.BinariesDirectory)/nuget-artifact'
+    - template: flex-downloadPipelineArtifact.yml
+      parameters:
+        StepName: 'Download win-arm Pipeline Artifact'
+        ArtifactName: 'onnxruntime-win-arm'
+        TargetPath: '$(Build.BinariesDirectory)/nuget-artifact'
+        SpecificArtifact: ${{ parameters.specificArtifact }}
+        BuildId: ${{ parameters.BuildId }}
 
-    - task: DownloadPipelineArtifact@0
-      displayName: 'Download linux-x64 Pipeline Artifact'
-      inputs:
-        artifactName: 'onnxruntime-linux-x64'
-        targetPath: '$(Build.BinariesDirectory)/nuget-artifact'
+    - template: flex-downloadPipelineArtifact.yml
+      parameters:
+        StepName: 'Download osx-x64 Pipeline Artifact'
+        ArtifactName: 'onnxruntime-osx'
+        TargetPath: '$(Build.BinariesDirectory)/nuget-artifact'
+        SpecificArtifact: ${{ parameters.specificArtifact }}
+        BuildId: ${{ parameters.BuildId }}
 
-    - task: DownloadPipelineArtifact@0
-      displayName: 'Download Pipeline Artifact - NuGet'
-      inputs:
-        artifactName: 'onnxruntime-linux-aarch64'
-        targetPath: '$(Build.BinariesDirectory)/nuget-artifact'
+    - template: flex-downloadPipelineArtifact.yml
+      parameters:
+        StepName: 'Download linux-x64 Pipeline Artifact'
+        ArtifactName: 'onnxruntime-linux-x64'
+        TargetPath: '$(Build.BinariesDirectory)/nuget-artifact'
+        SpecificArtifact: ${{ parameters.specificArtifact }}
+        BuildId: ${{ parameters.BuildId }}
 
-    - task: DownloadPipelineArtifact@2
-      displayName: 'Download iOS Pipeline Artifact'
-      inputs:
-        artifactName: 'onnxruntime-ios-full-xcframework'
-        targetPath: '$(Build.BinariesDirectory)/nuget-artifact'
+    - template: flex-downloadPipelineArtifact.yml
+      parameters:
+        StepName: 'Download linux-aarch64 Pipeline Artifact'
+        ArtifactName: 'onnxruntime-linux-aarch64'
+        TargetPath: '$(Build.BinariesDirectory)/nuget-artifact'
+        SpecificArtifact: ${{ parameters.specificArtifact }}
+        BuildId: ${{ parameters.BuildId }}
 
-    - task: DownloadPipelineArtifact@2
-      displayName: 'Download android-full-aar Pipeline Artifact'
-      inputs:
-        artifactName: 'onnxruntime-android-full-aar'
-        patterns: '**/*.aar'
-        targetPath: '$(Build.BinariesDirectory)/nuget-artifact'
+    - template: flex-downloadPipelineArtifact.yml
+      parameters:
+        StepName: 'Download iOS Pipeline Artifact'
+        ArtifactName: 'onnxruntime-ios-full-xcframework'
+        TargetPath: '$(Build.BinariesDirectory)/nuget-artifact'
+        SpecificArtifact: ${{ parameters.specificArtifact }}
+        BuildId: ${{ parameters.BuildId }}
 
-    - task: DownloadPipelineArtifact@0
-      displayName: 'Download drop-extra Pipeline Artifact'
-      inputs:
-        artifactName: 'drop-extra'
-        targetPath: '$(Build.BinariesDirectory)/extra-artifact'
+    - template: flex-downloadPipelineArtifact.yml
+      parameters:
+        StepName: 'Download Android-full-aar Pipeline Artifact'
+        ArtifactName: 'onnxruntime-android-full-aar'
+        TargetPath: '$(Build.BinariesDirectory)/nuget-artifact'
+        SpecificArtifact: ${{ parameters.specificArtifact }}
+        BuildId: ${{ parameters.BuildId }}
+
+    - template: flex-downloadPipelineArtifact.yml
+      parameters:
+        StepName: 'Download drop-extra Pipeline Artifact'
+        ArtifactName: 'drop-extra'
+        TargetPath: '$(Build.BinariesDirectory)/extra-artifact'
+        SpecificArtifact: ${{ parameters.specificArtifact }}
+        BuildId: ${{ parameters.BuildId }}
 
     - script: |
        dir
@@ -398,44 +416,29 @@ stages:
         versionSpec: 6.2.1
 
     - task: PowerShell@2
-      displayName: Install .NET 6 workloads
+      displayName: Install mobile workloads
       inputs:
         targetType: 'inline'
         script: |
-          dotnet workload install android ios macos
-        workingDirectory: '$(Build.SourcesDirectory)\csharp'
-
-    - task: PowerShell@2
-      displayName: Build Microsoft.ML.OnnxRuntime .NET 6 targets using dotnet
-      inputs:
-        targetType: 'inline'
-        # we don't specify 'Any CPU' as the platform here because if we do it gets added to the output path
-        #   e.g. csharp\src\Microsoft.ML.OnnxRuntime\bin\Any CPU\RelWithDebInfo\net6.0-ios\
-        # which is inconsistent with the msbuild output path for the pre-.net6 targets
-        #   e.g. csharp\src\Microsoft.ML.OnnxRuntime\bin\RelWithDebInfo\monoandroid11.0
-        # and makes it harder to do the packing
-        #
-        # 'Any CPU' is the default (first 'mixed' platform specified in the csproj) so this should be fine.
-        script: |
-          dotnet build .\src\Microsoft.ML.OnnxRuntime\Microsoft.ML.OnnxRuntime.csproj -p:SelectedTargets=Net6  -p:Configuration=RelWithDebInfo -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=$(OrtPackageId) -p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} -p:ReleaseVersionSuffix=$(ReleaseVersionSuffix)
+          dotnet workload install android ios
         workingDirectory: '$(Build.SourcesDirectory)\csharp'
 
     - task: MSBuild@1
-      displayName: 'Restore NuGet Packages and create project.assets.json for pre-.net6 targets'
+      displayName: 'Restore NuGet Packages and create project.assets.json'
       inputs:
         solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln'
         platform: 'Any CPU'
         configuration: RelWithDebInfo
-        msbuildArguments: '-t:restore -p:SelectedTargets=PreNet6 -p:OrtPackageId=$(OrtPackageId)'
+        msbuildArguments: '-t:restore -p:OrtPackageId=$(OrtPackageId)'
         workingDirectory: '$(Build.SourcesDirectory)\csharp'
 
     - task: MSBuild@1
-      displayName: 'Build C# for pre-.net6 targets'
+      displayName: 'Build C# bindings'
       inputs:
         solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln'
         platform: 'Any CPU'
         configuration: RelWithDebInfo
-        msbuildArguments: '-p:SelectedTargets=PreNet6 -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=$(OrtPackageId) -p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} -p:ReleaseVersionSuffix=$(ReleaseVersionSuffix)'
+        msbuildArguments: '-p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=$(OrtPackageId) -p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} -p:ReleaseVersionSuffix=$(ReleaseVersionSuffix)'
         workingDirectory: '$(Build.SourcesDirectory)\csharp'
 
     - ${{ if eq(parameters.DoEsrp, true) }}:
@@ -445,15 +448,6 @@ stages:
           DisplayName: 'ESRP - Sign C# dlls'
           DoEsrp: ${{ parameters.DoEsrp }}
 
-    - task: MSBuild@1
-      displayName: Update projects.assets.json with combined list of all target frameworks
-      inputs:
-        solution: '$(Build.SourcesDirectory)\csharp\src\Microsoft.ML.OnnxRuntime\Microsoft.ML.OnnxRuntime.csproj'
-        platform: 'Any CPU'
-        configuration: RelWithDebInfo
-        msbuildArguments: '-t:restore -p:SelectedTargets=All -p:OrtPackageId=$(OrtPackageId)'
-        workingDirectory: '$(Build.SourcesDirectory)\csharp'
-
     - task: MSBuild@1
       displayName: 'Build Nuget Packages'
       inputs:
@@ -804,7 +798,7 @@ stages:
 
 - template: ../nodejs/templates/test_linux.yml
   parameters:
-    AgentPool : 'onnxruntime-Ubuntu2004-AMD-CPU'
+    AgentPool : 'Azure-Pipelines-EO-Ubuntu-2004-aiinfra'
     StageSuffix : 'Linux_CPU_x64'
 
 - template: ../nodejs/templates/test_macos.yml
diff --git a/tools/ci_build/github/azure-pipelines/templates/download-deps.yml b/tools/ci_build/github/azure-pipelines/templates/download-deps.yml
index f17bc8de5739b..7484e0285fd2c 100644
--- a/tools/ci_build/github/azure-pipelines/templates/download-deps.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/download-deps.yml
@@ -11,7 +11,7 @@ steps:
       packageType: upack
       feed: '/7424c8e4-5c62-490e-95c4-79446f31017c'
       definition: '517c4f6f-5437-4392-a70d-4f15ec5be2f0'
-      version: 1.0.81
+      version: 1.0.120
       downloadPath: $(Build.BinariesDirectory)/deps
 
 # The private ADO project
@@ -22,7 +22,7 @@ steps:
       packageType: upack
       feed: '/4c7631f5-24c0-4307-8822-1aa8f180c325'
       definition: 'fd9dd5ad-b73e-4678-890e-edcf680dbc1a'
-      version: 1.0.81
+      version: 1.0.120
       downloadPath: $(Build.BinariesDirectory)/deps
 
 # You can add more ADO accounts at here.
diff --git a/tools/ci_build/github/azure-pipelines/templates/jobs/download_win_gpu_library.yml b/tools/ci_build/github/azure-pipelines/templates/jobs/download_win_gpu_library.yml
new file mode 100644
index 0000000000000..b7ae9ffa3c219
--- /dev/null
+++ b/tools/ci_build/github/azure-pipelines/templates/jobs/download_win_gpu_library.yml
@@ -0,0 +1,50 @@
+parameters:
+  - name: DownloadCUDA
+    type: boolean
+    default: false
+  - name: DownloadTRT
+    type: boolean
+    default: false
+  - name: CudaVersion
+    type: string
+    default: '11.8'
+    values:
+      - 11.8
+      - 12.2
+
+steps:
+  - ${{ if eq(parameters.DownloadCUDA, true) }}:
+    - powershell: |
+        azcopy.exe cp --recursive https://lotusscus.blob.core.windows.net/models/cuda_sdk/v${{ parameters.CudaVersion }} $(Agent.TempDirectory)
+      displayName: 'Download CUDA SDK v${{ parameters.CudaVersion }}'
+    - powershell: |
+        Write-Host "##vso[task.prependpath]$(Agent.TempDirectory)\v${{ parameters.CudaVersion }}\bin;$(Agent.TempDirectory)\v${{ parameters.CudaVersion }}\extras\CUPTI\lib64"
+      displayName: 'Append CUDA SDK Directory to PATH'
+    - task: CmdLine@2
+      inputs:
+        script: |
+          echo %PATH%
+      displayName: 'Print PATH'
+
+  - ${{ if eq(parameters.DownloadTRT, true) }}:
+    - ${{ if eq(parameters.CudaVersion, '11.8') }}:
+      - powershell: |
+          azcopy.exe cp --recursive https://lotusscus.blob.core.windows.net/models/local/TensorRT-8.6.1.6.Windows10.x86_64.cuda-11.8 $(Agent.TempDirectory)
+        displayName: 'Download TensorRT-8.6.1.6.Windows10.x86_64.cuda-11.8'
+      - powershell: |
+          Write-Host "##vso[task.prependpath]$(Agent.TempDirectory)\TensorRT-8.6.1.6.Windows10.x86_64.cuda-11.8\lib"
+        displayName: 'Append TensorRT Directory to PATH'
+
+    - ${{ if eq(parameters.CudaVersion, '12.2') }}:
+      - powershell: |
+          azcopy.exe cp --recursive https://lotusscus.blob.core.windows.net/models/local/TensorRT-8.6.1.6.Windows10.x86_64.cuda-12.0 $(Agent.TempDirectory)
+        displayName: 'Download TensorRT-8.6.1.6.Windows10.x86_64.cuda-12.0'
+      - powershell: |
+          Write-Host "##vso[task.prependpath]$(Agent.TempDirectory)\TensorRT-8.6.1.6.Windows10.x86_64.cuda-12.0\lib"
+        displayName: 'Append TensorRT Directory to PATH'
+
+    - task: CmdLine@2
+      inputs:
+        script: |
+          echo %PATH%
+      displayName: 'Print PATH'
\ No newline at end of file
diff --git a/tools/ci_build/github/azure-pipelines/templates/jobs/set-winenv.yml b/tools/ci_build/github/azure-pipelines/templates/jobs/set-winenv.yml
index ca5a52fa61ed3..0c8fb91a24a31 100644
--- a/tools/ci_build/github/azure-pipelines/templates/jobs/set-winenv.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/jobs/set-winenv.yml
@@ -1,19 +1,27 @@
 parameters:
-- name: EnvSetupScript
-  type: string
-
-- name: DownloadCUDA
-  type: boolean
-  default: false
+  - name: EnvSetupScript
+    type: string
+  - name: DownloadCUDA
+    type: boolean
+    default: false
+  - name: PrimaryCUDAVersion
+    type: string
+    default: '11.8'
+  - name: SecondaryCUDAVersion
+    type: string
+    default: '12.2'
 
 steps:
-- ${{ if eq(parameters.DownloadCUDA, 'true') }}:
-  - powershell: |
-      azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v11.8" $(Agent.TempDirectory)
-
-- task: BatchScript@1
-  displayName: 'setup env'
-  inputs:
-    filename: '$(Build.SourcesDirectory)\tools\ci_build\github\windows\${{ parameters.EnvSetupScript }}'
-    modifyEnvironment: true
-    workingFolder: '$(Build.BinariesDirectory)'
+  - ${{ if eq(parameters.DownloadCUDA, 'true') }}:
+      - powershell: |
+          azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v${{ parameters.PrimaryCUDAVersion }}" $(Agent.TempDirectory)
+        displayName: 'Download Primary CUDA SDK v${{ parameters.PrimaryCUDAVersion }}'
+      - powershell: |
+          azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v${{ parameters.SecondaryCUDAVersion }}" $(Agent.TempDirectory)
+        displayName: 'Download Secondary CUDA SDK v${{ parameters.SecondaryCUDAVersion }}'
+  - task: BatchScript@1
+    displayName: 'setup env'
+    inputs:
+      filename: '$(Build.SourcesDirectory)\tools\ci_build\github\windows\${{ parameters.EnvSetupScript }}'
+      modifyEnvironment: true
+      workingFolder: '$(Build.BinariesDirectory)'
diff --git a/tools/ci_build/github/azure-pipelines/templates/jobs/win-ci-prebuild-steps.yml b/tools/ci_build/github/azure-pipelines/templates/jobs/win-ci-prebuild-steps.yml
index 8868e671a5fa5..09c52f4d5ba0d 100644
--- a/tools/ci_build/github/azure-pipelines/templates/jobs/win-ci-prebuild-steps.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/jobs/win-ci-prebuild-steps.yml
@@ -31,7 +31,7 @@ steps:
     architecture: ${{parameters.BuildArch}}
 
 - script: |
-    python -m pip install -q setuptools wheel numpy flatbuffers
+    python -m pip install --upgrade "setuptools>=68.2.2" wheel numpy flatbuffers
   workingDirectory: '$(Build.BinariesDirectory)'
   displayName: 'Install python modules'
 
diff --git a/tools/ci_build/github/azure-pipelines/templates/jobs/win-ci-vs-2022-job.yml b/tools/ci_build/github/azure-pipelines/templates/jobs/win-ci-vs-2022-job.yml
index 46f2ae7b97acc..e40c4d0e95dc5 100644
--- a/tools/ci_build/github/azure-pipelines/templates/jobs/win-ci-vs-2022-job.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/jobs/win-ci-vs-2022-job.yml
@@ -4,6 +4,7 @@ parameters:
 
 - name: EnvSetupScript
   type: string
+  default: setup_env.bat
 
 - name: job_name_suffix
   type: string
@@ -97,7 +98,7 @@ jobs:
 
   - task: NodeTool@0
     inputs:
-      versionSpec: '16.x'
+      versionSpec: '18.x'
       force32bit: ${{ parameters.isX86 }}
 
   # Our build machine doesn't have java x86
@@ -169,16 +170,21 @@ jobs:
   - task: MSBuild@1
     displayName: 'Restore NuGet Packages'
     inputs:
-      solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln'
+      solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.DesktopOnly.CSharp.sln'
       platform: 'Any CPU'
       configuration: '${{ parameters.BuildConfig }}'
       msbuildArguments: '-t:restore -p:OrtPackageId=$(OrtPackageId)'
       workingDirectory: '$(Build.SourcesDirectory)\csharp'
 
+  - script: |
+      python3 tools\ValidateNativeDelegateAttributes.py
+    displayName: 'Validate C# native delegates'
+    workingDirectory: '$(Build.SourcesDirectory)\csharp'
+
   - task: MSBuild@1
     displayName: 'Build C#'
     inputs:
-      solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln'
+      solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.DesktopOnly.CSharp.sln'
       configuration: '${{ parameters.BuildConfig }}'
       platform: 'Any CPU'
       msbuildArguments: '-p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=$(OrtPackageId)'
@@ -197,7 +203,7 @@ jobs:
           command: test
           projects: '$(Build.SourcesDirectory)\csharp\test\Microsoft.ML.OnnxRuntime.Tests.NetCoreApp\Microsoft.ML.OnnxRuntime.Tests.NetCoreApp.csproj'
           configuration: '${{ parameters.BuildConfig }}'
-          arguments: '--configuration ${{ parameters.BuildConfig }} -p:Platform="Any CPU" -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=$(OrtPackageId) --blame'
+          arguments: '--configuration ${{ parameters.BuildConfig }} -p:Platform="Any CPU" -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=$(OrtPackageId) -p:IncludeMobileTargets=false --blame'
           workingDirectory: '$(Build.SourcesDirectory)\csharp'
 
   - ${{ if eq(parameters.EnablePython, true) }}:
diff --git a/tools/ci_build/github/azure-pipelines/templates/linux-ci.yml b/tools/ci_build/github/azure-pipelines/templates/linux-ci.yml
index 05b2dee77e689..7b9788d90b17d 100644
--- a/tools/ci_build/github/azure-pipelines/templates/linux-ci.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/linux-ci.yml
@@ -1,23 +1,14 @@
 parameters:
   AgentPool : 'onnxruntime-Ubuntu2004-AMD-CPU'
   StageName : 'Linux_CI_Dev'
-  SubmoduleCheckoutMode: ''
   RunDockerBuildArgs: '-o ubuntu20.04 -d cpu -x "--build_wheel"'
-  DoNodejsPack: 'false'
-  DoNugetPack: 'false'
   NuPackScript: ''
   RunInjectedPipeline: 'false'
   InjectedPipeline: ''
   DockerImageTag: ''
-  BuildConfig: ''
-  ArtifactName: 'drop-linux'
   TimeoutInMinutes: 120
   # Controls whether unreleased onnx opsets are allowed. Default is set to 1
   AllowReleasedOpsetOnly: '1'
-  # to inject strategy, you need to pass in the whole yaml structure -
-  # https://docs.microsoft.com/en-us/azure/devops/pipelines/yaml-schema?view=azure-devops&tabs=schema#strategies
-  # see example in orttraining-linux-gpu-ci-pipeline.yml
-  Strategy: ''
 
 jobs:
 - job: ${{ parameters.StageName }}
@@ -28,16 +19,8 @@ jobs:
     ALLOW_RELEASED_ONNX_OPSET_ONLY: ${{ parameters.AllowReleasedOpsetOnly }}
     skipComponentGovernanceDetection: true
   pool: ${{ parameters.AgentPool }}
-  ${{ if ne(parameters.Strategy, '') }}:
-    strategy:
-      ${{ parameters.Strategy }}
   steps:
     - checkout: self
-      ${{ if ne(parameters.SubmoduleCheckoutMode, '') }}:
-        submodules: ${{ parameters.SubmoduleCheckoutMode }}
-    - task: NodeTool@0
-      inputs:
-        versionSpec: '16.x'
     - template: run-docker-build-steps.yml
       parameters:
         RunDockerBuildArgs: '${{ parameters.RunDockerBuildArgs }}'
@@ -48,31 +31,10 @@ jobs:
         searchFolder: '$(Build.BinariesDirectory)'
         testRunTitle: 'Unit Test Run'
       condition: succeededOrFailed()
-    - ${{ if eq(parameters['DoNugetPack'], 'true') }}:
-      - script: |
-         ${{ parameters.NuPackScript }}
-        displayName: 'Create Artifacts'
-      - task: PublishPipelineArtifact@0
-        displayName: 'Publish Pipeline Artifact'
-        inputs:
-          artifactName: ${{ parameters.ArtifactName }}
-          targetPath: '$(Build.ArtifactStagingDirectory)'
-    - ${{ if eq(parameters['DoNodejsPack'], 'true') }}:
-      - script: |
-         npm pack
-         cp $(Build.SourcesDirectory)/js/node/onnxruntime-*.tgz $(Build.ArtifactStagingDirectory)
-         cp -R $(Build.SourcesDirectory)/js/node/prebuilds $(Build.ArtifactStagingDirectory)/prebuilds
-        workingDirectory: '$(Build.SourcesDirectory)/js/node'
-        displayName: 'Create NPM Package'
-      - task: PublishPipelineArtifact@0
-        displayName: 'Publish Pipeline Artifact: ${{ parameters.ArtifactName }}'
-        inputs:
-          artifactName: ${{ parameters.ArtifactName }}
-          targetPath: '$(Build.ArtifactStagingDirectory)'
     - ${{ if eq(parameters['RunInjectedPipeline'], 'true') }}:
       - template: |
          ${{ parameters.InjectedPipeline }}
         parameters:
           DockerImageTag: ${{ parameters.DockerImageTag }}
-          BuildConfig: ${{ parameters.BuildConfig }}
+          BuildConfig: Release
     - template: clean-agent-build-directory-step.yml
diff --git a/tools/ci_build/github/azure-pipelines/templates/linux-cpu-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/templates/linux-cpu-packaging-pipeline.yml
index 51d3a9ebc2187..1cc5c48c5513c 100644
--- a/tools/ci_build/github/azure-pipelines/templates/linux-cpu-packaging-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/linux-cpu-packaging-pipeline.yml
@@ -47,7 +47,7 @@ stages:
       OnnxruntimeCFlags: '-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -O3 -Wl,--strip-all'
       OnnxruntimeCXXFlags: '-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -O3 -Wl,--strip-all'
       OnnxruntimeNodejsBindingArch: 'arm64'
-      PoolName: 'aiinfra-linux-ARM64-CPU-2019'
+      PoolName: 'onnxruntime-linux-ARM64-CPU-2019'
       ArtifactNamePrefix: ${{ parameters.ArtifactNamePrefix }}
       PackageJava: ${{ parameters.PackageJava }}
       PackageNodeJS: ${{ parameters.PackageNodeJS }}
diff --git a/tools/ci_build/github/azure-pipelines/templates/linux-gpu-tensorrt-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/templates/linux-gpu-tensorrt-packaging-pipeline.yml
index 445f739e81c45..7693e8f2cd21c 100644
--- a/tools/ci_build/github/azure-pipelines/templates/linux-gpu-tensorrt-packaging-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/linux-gpu-tensorrt-packaging-pipeline.yml
@@ -23,12 +23,33 @@ parameters:
   type: string
   default: ''
 
+- name: CudaVersion
+  displayName: CUDA version
+  type: string
+  default: '11.8'
+  values:
+    - 11.8
+    - 12.2
+
+
+
 # We only have CUDA/TRT on x64. We do not have a build for CUDA/TRT for ARM64.
 # Therefore this file does not have an `OnnxruntimeNodejsBindingArch` parameter
   
 stages:
 - stage: Linux_C_API_Packaging_GPU_TensorRT_x64
   dependsOn: []
+  variables:
+    - name: linux_trt_version
+      ${{ if eq(parameters.CudaVersion, '11.8') }}:
+        value: 8.6.1.6-1.cuda11.8
+      ${{ if eq(parameters.CudaVersion, '12.2') }}:
+        value: 8.6.1.6-1.cuda12.0
+    - name: docker_base_image
+      ${{ if eq(parameters.CudaVersion, '11.8') }}:
+        value: nvidia/cuda:11.8.0-cudnn8-devel-ubi8
+      ${{ if eq(parameters.CudaVersion, '12.2') }}:
+        value: nvidia/cuda:12.2.2-cudnn8-devel-ubi8
   jobs:
   - job:
     dependsOn: []
@@ -37,17 +58,28 @@ stages:
     timeoutInMinutes:  180
     pool: 'Onnxruntime-Linux-GPU'
     variables:
-      CUDA_VERSION: '11.8'
+      - name: CUDA_VERSION_MAJOR
+        ${{ if eq(parameters.CudaVersion, '11.8') }}:
+          value: '11'
+        ${{ if eq(parameters.CudaVersion, '12.2') }}:
+          value: '12'
+      - name: CUDA_VERSION
+        value: ${{ parameters.CudaVersion }}
     steps:
       - checkout: self
         clean: true
         submodules: recursive
       - template: get-docker-image-steps.yml
         parameters:
-          Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda11_8_tensorrt8_6
+          Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
           Context: tools/ci_build/github/linux/docker
-          DockerBuildArgs: "--build-arg POLICY=manylinux_2_28 --build-arg PLATFORM=x86_64 --build-arg PREPEND_PATH=/usr/local/cuda/bin --build-arg LD_LIBRARY_PATH_ARG=/usr/local/lib64 --build-arg DEVTOOLSET_ROOTPATH=/usr --build-arg BUILD_UID=$( id -u )"
-          Repository: onnxruntimecuda118xtrt86build
+          DockerBuildArgs: "
+          --network=host
+          --build-arg BASEIMAGE=${{ variables.docker_base_image }}
+          --build-arg TRT_VERSION=${{ variables.linux_trt_version }}
+          --build-arg BUILD_UID=$( id -u )
+          "
+          Repository: onnxruntimecuda${{ variables.CUDA_VERSION_MAJOR }}xtrt86build
       - template: set-version-number-variables-step.yml
 
       - script: $(Build.SourcesDirectory)/tools/ci_build/github/linux/build_tensorrt_c_api_package.sh
diff --git a/tools/ci_build/github/azure-pipelines/templates/linux-wasm-ci.yml b/tools/ci_build/github/azure-pipelines/templates/linux-wasm-ci.yml
index 0e584b550f562..852d688b2dbb1 100644
--- a/tools/ci_build/github/azure-pipelines/templates/linux-wasm-ci.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/linux-wasm-ci.yml
@@ -46,7 +46,7 @@ jobs:
   variables:
     EnvSetupScript: setup_env.bat
     buildArch: x64
-    CommonBuildArgs: '--parallel --config ${{ parameters.BuildConfig }} --skip_submodule_sync --build_wasm --use_xnnpack ${{ parameters.ExtraBuildArgs }}'
+    CommonBuildArgs: '--parallel --config ${{ parameters.BuildConfig }} --skip_submodule_sync --build_wasm ${{ parameters.ExtraBuildArgs }}'
     runCodesignValidationInjection: false
     TODAY: $[format('{0:dd}{0:MM}{0:yyyy}', pipeline.startTime)]
     ORT_CACHE_DIR: $(Agent.TempDirectory)/ort_ccache
@@ -81,9 +81,6 @@ jobs:
       versionSpec: '3.8'
       addToPath: true
       architecture: $(buildArch)
-  - task: NodeTool@0
-    inputs:
-      versionSpec: '16.x'
   - template: download-deps.yml
 
   - task: PythonScript@0
@@ -93,14 +90,20 @@ jobs:
       arguments: --new_dir $(Build.BinariesDirectory)/deps
       workingDirectory: $(Build.BinariesDirectory)
 
-  - script: |
-      set -ex
-      cd '$(Build.SourcesDirectory)/cmake/external/emsdk'
-      ./emsdk install 3.1.44 ccache-git-emscripten-64bit
-      ./emsdk activate 3.1.44 ccache-git-emscripten-64bit
-      ln -s $(Build.SourcesDirectory)/cmake/external/emsdk/ccache/git-emscripten_64bit/bin/ccache /usr/local/bin/ccache
-    displayName: 'emsdk install and activate ccache for emscripten'
-    condition: eq('${{ parameters.WithCache }}', 'true')
+  - ${{if eq(parameters.WithCache, true)}}:
+      - script: |
+          set -ex
+          cd '$(Build.SourcesDirectory)/cmake/external/emsdk'
+          ./emsdk install 3.1.44 ccache-git-emscripten-64bit
+          ./emsdk activate 3.1.44 ccache-git-emscripten-64bit
+        displayName: 'emsdk install and activate ccache for emscripten'
+  - ${{if eq(parameters.WithCache, false)}}:
+      - script: |
+          set -ex
+          cd '$(Build.SourcesDirectory)/cmake/external/emsdk'
+          ./emsdk install 3.1.44
+          ./emsdk activate 3.1.44
+        displayName: 'emsdk install and activate ccache for emscripten'
 
   - template: build-linux-wasm-step.yml
     parameters:
@@ -135,7 +138,7 @@ jobs:
         AdditionalKey: wasm_simd_threads | ${{ parameters.BuildConfig }}
       CacheDir: $(ORT_CACHE_DIR)/wasm_simd_threads
       Arguments: '$(CommonBuildArgs) --build_dir $(Build.BinariesDirectory)/wasm_simd_threads --enable_wasm_simd --enable_wasm_threads --wasm_run_tests_in_browser'
-      DisplayName: 'Build and test (node) (simd + threads)'
+      DisplayName: 'Build and test (browser) (simd + threads)'
       WithCache: ${{ parameters.WithCache }}
 
   - template: build-linux-wasm-step.yml
diff --git a/tools/ci_build/github/azure-pipelines/templates/linux-web-init-and-check.yml b/tools/ci_build/github/azure-pipelines/templates/linux-web-init-and-check.yml
index abd8c94dabd91..e788e4b3dddaa 100644
--- a/tools/ci_build/github/azure-pipelines/templates/linux-web-init-and-check.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/linux-web-init-and-check.yml
@@ -3,10 +3,6 @@ steps:
     npm ci
   workingDirectory: '$(Build.SourcesDirectory)/js'
   displayName: 'npm ci /js/'
-- script: |
-    npm run lint
-  workingDirectory: '$(Build.SourcesDirectory)/js'
-  displayName: 'run ESLint without TS type populated'
 - script: |
     npm ci
   workingDirectory: '$(Build.SourcesDirectory)/js/common'
@@ -19,6 +15,10 @@ steps:
     npm ci
   workingDirectory: '$(Build.SourcesDirectory)/js/web'
   displayName: 'npm ci /js/web/'
+- script: |
+    npm run prebuild
+  workingDirectory: '$(Build.SourcesDirectory)/js/web'
+  displayName: 'run TypeScript type check in /js/web/'
 - script: |
     npm run lint
   workingDirectory: '$(Build.SourcesDirectory)/js'
diff --git a/tools/ci_build/github/azure-pipelines/templates/mac-cpu-packing-jobs.yml b/tools/ci_build/github/azure-pipelines/templates/mac-cpu-packing-jobs.yml
index adfcd98e37230..fd2113502478a 100644
--- a/tools/ci_build/github/azure-pipelines/templates/mac-cpu-packing-jobs.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/mac-cpu-packing-jobs.yml
@@ -31,7 +31,7 @@ jobs:
   workspace:
     clean: all
   variables:
-    MACOSX_DEPLOYMENT_TARGET: '10.14'
+    MACOSX_DEPLOYMENT_TARGET: '11.0'
     ALLOW_RELEASED_ONNX_OPSET_ONLY: ${{ parameters.AllowReleasedOpsetOnly }}
     TODAY: $[format('{0:dd}{0:MM}{0:yyyy}', pipeline.startTime)]
     PROTO_CACHE_DIR: $(Pipeline.Workspace)/ccache_proto
@@ -50,7 +50,7 @@ jobs:
       versionSpec: 3.11
   - task: NodeTool@0
     inputs:
-      versionSpec: '16.x'
+      versionSpec: '18.x'
 
   - template: set-version-number-variables-step.yml
 
diff --git a/tools/ci_build/github/azure-pipelines/templates/ondevice-training-cpu-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/templates/ondevice-training-cpu-packaging-pipeline.yml
index 792e828c9a880..29cea63df1662 100644
--- a/tools/ci_build/github/azure-pipelines/templates/ondevice-training-cpu-packaging-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/ondevice-training-cpu-packaging-pipeline.yml
@@ -141,7 +141,7 @@ stages:
       clean: all
     # we need to use the 2022 pool to create the nuget package with both pre-net6+Xamarin and net6 targets.
     # VS2019 has no support for net6 and we need to use msbuild (from the VS install) to do the packing
-    pool: 'Azure-Pipelines-EO-Windows2022-aiinfra'
+    pool: 'Onnxruntime-Win-CPU-2022'
     variables:
       OrtPackageId: ${{ parameters.OrtNugetPackageId }}
       breakCodesignValidationInjection: ${{ parameters.DoEsrp }}
@@ -222,44 +222,29 @@ stages:
         versionSpec: 6.2.1
 
     - task: PowerShell@2
-      displayName: Install .NET 6 workloads
+      displayName: Install mobile workloads
       inputs:
         targetType: 'inline'
         script: |
           dotnet workload install android
         workingDirectory: '$(Build.SourcesDirectory)\csharp'
 
-    - task: PowerShell@2
-      displayName: Build Microsoft.ML.OnnxRuntime .NET 6 targets using dotnet
-      inputs:
-        targetType: 'inline'
-        # we don't specify 'Any CPU' as the platform here because if we do it gets added to the output path
-        #   e.g. csharp\src\Microsoft.ML.OnnxRuntime\bin\Any CPU\RelWithDebInfo\net6.0-ios\
-        # which is inconsistent with the msbuild output path for the pre-.net6 targets
-        #   e.g. csharp\src\Microsoft.ML.OnnxRuntime\bin\RelWithDebInfo\monoandroid11.0
-        # and makes it harder to do the packing
-        #
-        # 'Any CPU' is the default (first 'mixed' platform specified in the csproj) so this should be fine.
-        script: |
-          dotnet build .\src\Microsoft.ML.OnnxRuntime\Microsoft.ML.OnnxRuntime.csproj -p:SelectedTargets=Net6  -p:Configuration=RelWithDebInfo -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=$(OrtPackageId) -p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} -p:ReleaseVersionSuffix=$(ReleaseVersionSuffix)
-        workingDirectory: '$(Build.SourcesDirectory)\csharp'
-
     - task: MSBuild@1
-      displayName: 'Restore NuGet Packages and create project.assets.json for pre-.net6 targets'
+      displayName: 'Restore NuGet Packages and create project.assets.json'
       inputs:
         solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln'
         platform: 'Any CPU'
         configuration: RelWithDebInfo
-        msbuildArguments: '-t:restore -p:SelectedTargets=PreNet6 -p:OrtPackageId=$(OrtPackageId)'
+        msbuildArguments: '-t:restore -p:OrtPackageId=$(OrtPackageId)'
         workingDirectory: '$(Build.SourcesDirectory)\csharp'
 
     - task: MSBuild@1
-      displayName: 'Build C# for pre-.net6 targets'
+      displayName: 'Build C# bindings'
       inputs:
         solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln'
         platform: 'Any CPU'
         configuration: RelWithDebInfo
-        msbuildArguments: '-p:SelectedTargets=PreNet6 -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=$(OrtPackageId) -p:IsReleaseBuild=${{ parameters.IsReleaseBuild }}'
+        msbuildArguments: '-p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=$(OrtPackageId) -p:IsReleaseBuild=${{ parameters.IsReleaseBuild }}'
         workingDirectory: '$(Build.SourcesDirectory)\csharp'
 
     - ${{ if eq(parameters.DoEsrp, true) }}:
@@ -269,15 +254,6 @@ stages:
           DisplayName: 'ESRP - Sign C# dlls'
           DoEsrp: ${{ parameters.DoEsrp }}
 
-    - task: MSBuild@1
-      displayName: Update projects.assets.json with combined list of all target frameworks
-      inputs:
-        solution: '$(Build.SourcesDirectory)\csharp\src\Microsoft.ML.OnnxRuntime\Microsoft.ML.OnnxRuntime.csproj'
-        platform: 'Any CPU'
-        configuration: RelWithDebInfo
-        msbuildArguments: '-t:restore -p:SelectedTargets=All -p:OrtPackageId=$(OrtPackageId)'
-        workingDirectory: '$(Build.SourcesDirectory)\csharp'
-
     - task: MSBuild@1
       displayName: 'Build Nuget Packages'
       inputs:
diff --git a/tools/ci_build/github/azure-pipelines/templates/py-linux-gpu.yml b/tools/ci_build/github/azure-pipelines/templates/py-linux-gpu.yml
index 3d5a71284fa6f..8cc48aac7a3b9 100644
--- a/tools/ci_build/github/azure-pipelines/templates/py-linux-gpu.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/py-linux-gpu.yml
@@ -17,7 +17,24 @@ parameters:
    - Release
    - RelWithDebInfo
    - MinSizeRel
-
+- name: docker_base_image
+  type: string
+  default: 'nvidia/cuda:11.8.0-cudnn8-devel-ubi8'
+  values:
+   - nvidia/cuda:11.8.0-cudnn8-devel-ubi8
+   - nvidia/cuda:12.2.2-cudnn8-devel-ubi8
+- name: trt_version
+  type: string
+  default: '8.6.1.6-1.cuda11.8'
+  values:
+    - 8.6.1.6-1.cuda11.8
+    - 8.6.1.6-1.cuda12.0
+- name: cuda_version
+  type: string
+  default: '11.8'
+  values:
+   - 11.8
+   - 12.2
 jobs:
 - job: Linux_py_GPU_Wheels_${{ parameters.arch }}
   timeoutInMinutes: 240
@@ -26,7 +43,13 @@ jobs:
   pool: ${{ parameters.machine_pool }}
   variables:
     # The build machine pool doesn't have dotnet, so it can't run CG.
-    skipComponentGovernanceDetection: true
+    - name: skipComponentGovernanceDetection
+      value: true
+    - name: extra_build_args
+      ${{ if ne(parameters.extra_build_arg, '') }}:
+        value: -x ${{ parameters.extra_build_arg }}
+      ${{ if eq(parameters.extra_build_arg, '') }}:
+        value: ''
   steps:
     - checkout: self
       clean: true
@@ -36,10 +59,16 @@ jobs:
 
     - template: get-docker-image-steps.yml
       parameters:
-        Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda11_8_tensorrt8_6
+        Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
         Context: tools/ci_build/github/linux/docker
-        DockerBuildArgs: "--network=host --build-arg POLICY=manylinux_2_28 --build-arg PLATFORM=x86_64 --build-arg PREPEND_PATH=/usr/local/cuda/bin --build-arg LD_LIBRARY_PATH_ARG=/usr/local/lib64 --build-arg DEVTOOLSET_ROOTPATH=/usr --build-arg BUILD_UID=$( id -u ) --build-arg PLATFORM=${{ parameters.arch }}"
-        Repository: onnxruntimecuda118xtrt86build${{ parameters.arch }}
+        DockerBuildArgs: "
+        --network=host 
+        --build-arg BASEIMAGE=${{ parameters.docker_base_image }}
+        --build-arg TRT_VERSION=${{ parameters.trt_version }}
+        --build-arg BUILD_UID=$( id -u )
+        --build-arg PLATFORM=${{ parameters.arch }}
+        "
+        Repository: onnxruntimecuda${{ replace(parameters.cuda_version, '.', '') }}xtrt86build${{ parameters.arch }}
 
 
     - task: Bash@3
@@ -47,8 +76,7 @@ jobs:
       inputs:
         targetType: filePath
         filePath: tools/ci_build/github/linux/run_python_dockerbuild.sh
-        # please check ONNXRUNTIME_CUDA_VERSION in tools/ci_build/github/linux/build_linux_arm64_python_package.sh
-        arguments: -i onnxruntimecuda118xtrt86build${{ parameters.arch }} -d "GPU" -c ${{ parameters.cmake_build_type }} -x "${{ parameters.extra_build_arg }}"
+        arguments: -i onnxruntimecuda${{ replace(parameters.cuda_version, '.', '') }}xtrt86build${{ parameters.arch }} -d "GPU" -c ${{ parameters.cmake_build_type }} $(extra_build_args)
 
     - task: PublishBuildArtifacts@1
       displayName: 'Publish Artifact: ONNXRuntime python wheel'
diff --git a/tools/ci_build/github/azure-pipelines/templates/py-linux.yml b/tools/ci_build/github/azure-pipelines/templates/py-linux.yml
index 0774c3350b9b1..db3782c69cf62 100644
--- a/tools/ci_build/github/azure-pipelines/templates/py-linux.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/py-linux.yml
@@ -46,9 +46,17 @@ jobs:
   pool: ${{ parameters.machine_pool }}
   variables:
     # The build machine pool doesn't have dotnet, so it can't run CG.
-    skipComponentGovernanceDetection: true
-    ORT_CACHE_DIR: $(Agent.TempDirectory)/ort_ccache
-    TODAY: $[format('{0:dd}{0:MM}{0:yyyy}', pipeline.startTime)]
+    - name: skipComponentGovernanceDetection
+      value: true
+    - name: ORT_CACHE_DIR
+      value: $(Agent.TempDirectory)/ort_ccache
+    - name: TODAY
+      value: $[format('{0:dd}{0:MM}{0:yyyy}', pipeline.startTime)]
+    - name: extra_build_args
+      ${{ if ne(parameters.extra_build_arg, '') }}:
+        value: -x ${{ parameters.extra_build_arg }}
+      ${{ if eq(parameters.extra_build_arg, '') }}:
+        value: ''
   steps:
     - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
       displayName: 'Clean Agent Directories'
@@ -82,7 +90,7 @@ jobs:
             inputs:
               targetType: filePath
               filePath: tools/ci_build/github/linux/run_python_dockerbuild.sh
-              arguments: -i onnxruntimecpubuildpython${{ parameters.arch }} -d "${{ parameters.device }}" -c ${{ parameters.cmake_build_type }} -x "${{ parameters.extra_build_arg }}"
+              arguments: -i onnxruntimecpubuildpython${{ parameters.arch }} -d "${{ parameters.device }}" -c ${{ parameters.cmake_build_type }} $(extra_build_args)
             ${{ if eq(parameters.with_cache, 'true') }}:
               env:
                 ADDITIONAL_DOCKER_PARAMETER: "--volume $(ORT_CACHE_DIR):/cache -e CCACHE_DIR=/cache -e ORT_BUILD_WITH_CACHE=1"
diff --git a/tools/ci_build/github/azure-pipelines/templates/py-packaging-linux-test-cuda.yml b/tools/ci_build/github/azure-pipelines/templates/py-packaging-linux-test-cuda.yml
index 43ed0172825bc..5dad3ad1f59a6 100644
--- a/tools/ci_build/github/azure-pipelines/templates/py-packaging-linux-test-cuda.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/py-packaging-linux-test-cuda.yml
@@ -81,9 +81,15 @@ jobs:
 
   - template: get-docker-image-steps.yml
     parameters:
-      Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda11_8_tensorrt8_6
+      Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
       Context: tools/ci_build/github/linux/docker
-      DockerBuildArgs: "--network=host --build-arg POLICY=manylinux_2_28 --build-arg PLATFORM=x86_64 --build-arg PREPEND_PATH=/usr/local/cuda/bin --build-arg LD_LIBRARY_PATH_ARG=/usr/local/lib64 --build-arg DEVTOOLSET_ROOTPATH=/usr --build-arg BUILD_UID=$( id -u ) --build-arg PLATFORM=${{ parameters.arch }}"
+      DockerBuildArgs: "
+      --network=host 
+      --build-arg BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubi8 
+      --build-arg TRT_VERSION=8.6.1.6-1.cuda11.8 
+      --build-arg BUILD_UID=$( id -u )
+      --build-arg PLATFORM=${{ parameters.arch }}
+      "
       Repository: onnxruntimecuda118xtrt86build${{ parameters.arch }}
 
   - task: Bash@3
diff --git a/tools/ci_build/github/azure-pipelines/templates/py-packaging-selectable-stage.yml b/tools/ci_build/github/azure-pipelines/templates/py-packaging-selectable-stage.yml
index 6b5fba7785fe0..00ba5ea4a475a 100644
--- a/tools/ci_build/github/azure-pipelines/templates/py-packaging-selectable-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/py-packaging-selectable-stage.yml
@@ -168,7 +168,7 @@ stages:
         inputs:
           filePath: '$(Build.SourcesDirectory)/tools/ci_build/github/windows/install_third_party_deps.ps1'
           workingDirectory: '$(Build.BinariesDirectory)'
-          arguments: -cpu_arch x64 -install_prefix $(Build.BinariesDirectory)\${{ parameters.BuildConfig }}\installed -build_config $(BuildConfig)
+          arguments: -cpu_arch x64 -install_prefix $(Build.BinariesDirectory)\$(BuildConfig)\installed -build_config $(BuildConfig)
 
       - task: PythonScript@0
         displayName: 'Generate cmake config'
diff --git a/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml
index 1305f5ae21725..f2b91bbaacb89 100644
--- a/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml
@@ -335,7 +335,7 @@ stages:
       pool:
         vmImage: 'macOS-13'
       variables:
-        MACOSX_DEPLOYMENT_TARGET: '10.15'
+        MACOSX_DEPLOYMENT_TARGET: '11.0'
       strategy:
         matrix:
           Python38:
@@ -371,7 +371,7 @@ stages:
           export CMAKE_ARGS="-DONNX_GEN_PB_TYPE_STUBS=OFF -DONNX_WERROR=OFF"
           export _PYTHON_HOST_PLATFORM=macosx-${{variables.MACOSX_DEPLOYMENT_TARGET}}-x86_64
           python3 -m pip install -r '$(Build.SourcesDirectory)/tools/ci_build/github/linux/docker/scripts/requirements.txt'
-          python3 $(Build.SourcesDirectory)/tools/ci_build/build.py --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --parallel --config Release --skip_onnx_tests --build_wheel ${{ parameters.build_py_parameters }}
+          python3 $(Build.SourcesDirectory)/tools/ci_build/build.py --build_dir $(Build.BinariesDirectory) --use_coreml --skip_submodule_sync --parallel --config Release --skip_onnx_tests --build_wheel ${{ parameters.build_py_parameters }}
         displayName: 'Command Line Script'
 
       - task: CopyFiles@2
@@ -445,7 +445,7 @@ stages:
           export CMAKE_ARGS="-DONNX_GEN_PB_TYPE_STUBS=OFF -DONNX_WERROR=OFF"
           export _PYTHON_HOST_PLATFORM=macosx-${{variables.MACOSX_DEPLOYMENT_TARGET}}-arm64
           python3 -m pip install -r '$(Build.SourcesDirectory)/tools/ci_build/github/linux/docker/scripts/requirements.txt'
-          python3 $(Build.SourcesDirectory)/tools/ci_build/build.py --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --parallel --config Release --skip_tests --cmake_extra_defines CMAKE_OSX_ARCHITECTURES=arm64 --build_wheel ${{ parameters.build_py_parameters }}
+          python3 $(Build.SourcesDirectory)/tools/ci_build/build.py --build_dir $(Build.BinariesDirectory) --use_coreml --skip_submodule_sync --parallel --config Release --skip_tests --cmake_extra_defines CMAKE_OSX_ARCHITECTURES=arm64 --build_wheel ${{ parameters.build_py_parameters }}
         displayName: 'Command Line Script'
 
       - script: |
@@ -484,7 +484,7 @@ stages:
       - template: py-linux.yml
         parameters:
           arch: 'aarch64'
-          machine_pool: 'aiinfra-linux-ARM64-CPU-2019'
+          machine_pool: 'onnxruntime-linux-ARM64-CPU-2019'
           base_image: 'arm64v8/almalinux:8'
           devtoolset_rootpath: /opt/rh/gcc-toolset-12/root
           ld_library_path_arg: /opt/rh/gcc-toolset-12/root/usr/lib64:/opt/rh/gcc-toolset-12/root/usr/lib:/opt/rh/gcc-toolset-12/root/usr/lib64/dyninst:/opt/rh/gcc-toolset-12/root/usr/lib/dyninst:/usr/local/lib64
diff --git a/tools/ci_build/github/azure-pipelines/templates/py-win-gpu.yml b/tools/ci_build/github/azure-pipelines/templates/py-win-gpu.yml
index 919749cac15b6..501251eaff20f 100644
--- a/tools/ci_build/github/azure-pipelines/templates/py-win-gpu.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/py-win-gpu.yml
@@ -14,21 +14,32 @@ parameters:
 
 - name: ENV_SETUP_SCRIPT
   type: string
+  default: ''
 
 - name: BUILD_PY_PARAMETERS
   displayName: >
     Extra parameters to pass to build.py. Don't put newlines in here.
   type: string
   default: ''
-
+- name: CudaVersion
+  type: string
+  default: '11.8'
+  values:
+    - 11.8
+    - 12.2
 jobs:
 - job: Win_py_${{ parameters.EP_NAME }}_Wheels_${{ replace(parameters.PYTHON_VERSION,'.','_') }}
   timeoutInMinutes: 240
   workspace:
     clean: all
-  pool: ${{ parameters.MACHINE_POOL }}
+  pool:
+    name: ${{ parameters.MACHINE_POOL }}
+#    demands:
+#      - ImageVersionOverride -equals 1.0.367516
   variables:
+    GRADLE_OPTS: '-Dorg.gradle.daemon=false'
     VSGenerator: 'Visual Studio 17 2022'
+    CUDA_MODULE_LOADING: 'LAZY'
   steps:
       - checkout: self
         clean: true
@@ -61,10 +72,21 @@ jobs:
 
       - template: download-deps.yml
 
-      - template: jobs/set-winenv.yml
-        parameters:
-          EnvSetupScript: ${{ parameters.ENV_SETUP_SCRIPT }}
-          DownloadCUDA: true
+      - ${{ if ne(parameters.ENV_SETUP_SCRIPT, '') }}:
+        - template: jobs/set-winenv.yml
+          parameters:
+            EnvSetupScript: ${{ parameters.ENV_SETUP_SCRIPT }}
+            ${{ if or(contains(parameters.EP_BUILD_FLAGS, 'use_cuda'), contains(parameters.EP_BUILD_FLAGS, 'use_tensorrt')) }}:
+              DownloadCUDA: true
+
+      - ${{ if eq(parameters.ENV_SETUP_SCRIPT, '') }}:
+        - template: jobs/download_win_gpu_library.yml
+          parameters:
+            CudaVersion: ${{ parameters.CudaVersion }}
+            ${{ if or(contains(parameters.EP_BUILD_FLAGS, 'use_cuda'), contains(parameters.EP_BUILD_FLAGS, 'use_tensorrt')) }}:
+              DownloadCUDA: true
+            ${{ if contains(parameters.EP_BUILD_FLAGS, 'use_tensorrt') }}:
+              DownloadTRT: true
 
       - task: PythonScript@0
         displayName: 'Update deps.txt'
diff --git a/tools/ci_build/github/azure-pipelines/templates/react-native-ci.yml b/tools/ci_build/github/azure-pipelines/templates/react-native-ci.yml
index 8c54e71448992..47cd72f412c67 100644
--- a/tools/ci_build/github/azure-pipelines/templates/react-native-ci.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/react-native-ci.yml
@@ -69,7 +69,7 @@ stages:
       inputs:
         versionSpec: "3.9"
         addToPath: true
-        rchitecture: "x64"
+        architecture: "x64"
 
     - task: JavaToolInstaller@0
       displayName: Use jdk 11
@@ -80,7 +80,7 @@ stages:
 
     - task: NodeTool@0
       inputs:
-        versionSpec: '16.x'
+        versionSpec: '18.x'
 
     - script:
         brew install coreutils ninja npm yarn
@@ -126,7 +126,7 @@ stages:
         BuildStep:
           - script: |
               set -e -x
-              python $(Build.SourcesDirectory)/tools/ci_build/github/apple/build_and_assemble_ios_pods.py \
+              python $(Build.SourcesDirectory)/tools/ci_build/github/apple/build_and_assemble_apple_pods.py \
                 --build-dir "$(Build.BinariesDirectory)/ios_framework_full" \
                 --staging-dir "$(Build.BinariesDirectory)/staging" \
                 --variant Full \
@@ -134,7 +134,7 @@ stages:
                 -b="--path_to_protoc_exe" -b "$(Build.BinariesDirectory)/installed/bin/protoc"
 
             # Mobile build:
-            #  python $(Build.SourcesDirectory)/tools/ci_build/github/apple/build_and_assemble_ios_pods.py \
+            #  python $(Build.SourcesDirectory)/tools/ci_build/github/apple/build_and_assemble_apple_pods.py \
             #    --build_dir $(Build.BinariesDirectory)/ios_framework_mobile \
             #    --staging-dir "$(Build.BinariesDirectory)/staging" \
             #    --include_ops_by_config $(Build.SourcesDirectory)/tools/ci_build/github/android/mobile_package.required_operators.config \
@@ -185,69 +185,6 @@ stages:
       workingDirectory: '$(Build.SourcesDirectory)/js/react_native'
       displayName: yarn js/react_native
 
-    - script: |
-        python3 tools/python/run_android_emulator.py \
-          --android-sdk-root $(ANDROID_SDK_ROOT) \
-          --create-avd --system-image "system-images;android-30;default;x86_64" \
-          --start --emulator-extra-args="-partition-size 4096 -verbose" \
-          --emulator-pid-file $(Build.BinariesDirectory)/emulator.pid
-      displayName: Start Android Emulator
-
-    - script: |
-        xcrun simctl create iPhoneRNTest com.apple.CoreSimulator.SimDeviceType.iPhone-13
-      workingDirectory: '$(Build.SourcesDirectory)/js/react_native/e2e/ios'
-      displayName: Start iOS Simulator
-
-    - template: android-dump-logs-from-steps.yml
-      parameters:
-        steps:
-        - task: Gradle@3
-          inputs:
-            gradleWrapperFile: '$(Build.SourcesDirectory)/js/react_native/android/gradlew'
-            workingDirectory: '$(Build.SourcesDirectory)/js/react_native/android'
-            options: '--stacktrace'
-            tasks: 'connectedDebugAndroidTest'
-            publishJUnitResults: true
-            testResultsFiles: '**/TEST-*.xml'
-            testRunTitle: 'React Native Android Instrumented Test results'
-            javaHomeOption: 'path'
-            jdkDirectory: '$(JAVA_HOME_11_X64)'
-            sonarQubeRunAnalysis: false
-            spotBugsAnalysis: false
-          displayName: Run React Native Android Instrumented Tests
-
-    - script: |
-        # Mobile build:
-        # ORT_MOBILE_C_LOCAL_POD_PATH=$(Build.BinariesDirectory)/staging/onnxruntime-mobile-c \
-        ORT_C_LOCAL_POD_PATH=$(Build.BinariesDirectory)/staging/onnxruntime-c \
-        pod install
-      workingDirectory: '$(Build.SourcesDirectory)/js/react_native/ios'
-      displayName: Pod install for onnxruntime react native ios bridge library
-
-    - task: Xcode@5
-      inputs:
-        actions: 'test'
-        configuration: 'Debug'
-        sdk: 'iphonesimulator'
-        xcWorkspacePath: '$(Build.SourcesDirectory)/js/react_native/ios/OnnxruntimeModule.xcworkspace'
-        scheme: 'OnnxruntimeModuleTest'
-        packageApp: false
-        destinationPlatformOption: 'iOS'
-        destinationSimulators: 'iPhone 13,OS=latest'
-        workingDirectory: '$(Build.SourcesDirectory)/js/react_native/ios'
-        xcprettyArgs: '--output build/reports/test-results.xml'
-        publishJUnitResults: true
-        testRunTitle: 'React Native iOS Instrumented Test Results'
-      displayName: Run React Native iOS Instrumented Tests
-
-    - task: PublishTestResults@2
-      inputs:
-        testResultsFiles: '$(Build.SourcesDirectory)/js/react_native/ios/build/reports/test-results.xml'
-        failTaskOnFailedTests: true
-        testRunTitle: 'React Native iOS Instrumented Test results'
-      condition: succeededOrFailed()
-      displayName: Publish React Native iOS Instrumented Test Results
-
     - task: PowerShell@2
       inputs:
         filePath: '$(Build.SourcesDirectory)/tools/ci_build/github/js/pack-npm-packages.ps1'
@@ -267,6 +204,14 @@ stages:
       workingDirectory: '$(Build.SourcesDirectory)/js/react_native/e2e'
       displayName: Bootstrap Android and iOS e2e tests
 
+    - script: |
+        # Mobile build:
+        # ORT_MOBILE_C_LOCAL_POD_PATH=$(Build.BinariesDirectory)/staging/onnxruntime-mobile-c \
+        ORT_C_LOCAL_POD_PATH=$(Build.BinariesDirectory)/staging/onnxruntime-c \
+        pod install
+      workingDirectory: '$(Build.SourcesDirectory)/js/react_native/ios'
+      displayName: Pod install for onnxruntime react native ios bridge library
+
     - script: |
         # Mobile build:
         # ORT_MOBILE_C_LOCAL_POD_PATH=$(Build.BinariesDirectory)/staging/onnxruntime-mobile-c \
@@ -301,12 +246,47 @@ stages:
       workingDirectory: '$(Build.SourcesDirectory)/js/react_native/e2e'
       displayName: Build React Native Detox Android e2e Tests
 
+    - script: |
+        detox build --configuration ios.sim.release
+      workingDirectory: '$(Build.SourcesDirectory)/js/react_native/e2e'
+      displayName: Build React Native Detox iOS e2e Tests
+
+    #
+    # Unit tests and E2E tests with Android emulator
+    #
+    - template: use-android-emulator.yml
+      parameters:
+        create: true
+        start: true
+
+    - template: android-dump-logs-from-steps.yml
+      parameters:
+        steps:
+        - task: Gradle@3
+          inputs:
+            gradleWrapperFile: '$(Build.SourcesDirectory)/js/react_native/android/gradlew'
+            workingDirectory: '$(Build.SourcesDirectory)/js/react_native/android'
+            options: '--stacktrace'
+            tasks: 'connectedDebugAndroidTest'
+            publishJUnitResults: true
+            testResultsFiles: '**/TEST-*.xml'
+            testRunTitle: 'React Native Android Instrumented Test results'
+            javaHomeOption: 'path'
+            jdkDirectory: '$(JAVA_HOME_11_X64)'
+            sonarQubeRunAnalysis: false
+            spotBugsAnalysis: false
+          displayName: Run React Native Android Instrumented Tests
+
     - script: |
         JEST_JUNIT_OUTPUT_FILE=$(Build.SourcesDirectory)/js/react_native/e2e/android-test-results.xml \
         detox test --record-logs all --configuration android.emu.release
       workingDirectory: '$(Build.SourcesDirectory)/js/react_native/e2e'
       displayName: Run React Native Detox Android e2e Tests
 
+    - template: use-android-emulator.yml
+      parameters:
+        stop: true
+
     - task: PublishTestResults@2
       inputs:
         testResultsFiles: '$(Build.SourcesDirectory)/js/react_native/e2e/android-test-results.xml'
@@ -315,10 +295,37 @@ stages:
       condition: succeededOrFailed()
       displayName: Publish React Native Detox Android e2e Test Results
 
+    #
+    # Unit tests and E2E tests with iOS simulator
+    #
     - script: |
-        detox build --configuration ios.sim.release
-      workingDirectory: '$(Build.SourcesDirectory)/js/react_native/e2e'
-      displayName: Build React Native Detox iOS e2e Tests
+        xcrun simctl create iPhoneRNTest com.apple.CoreSimulator.SimDeviceType.iPhone-13
+      workingDirectory: '$(Build.SourcesDirectory)/js/react_native/e2e/ios'
+      displayName: Start iOS Simulator
+
+    - task: Xcode@5
+      inputs:
+        actions: 'test'
+        configuration: 'Debug'
+        sdk: 'iphonesimulator'
+        xcWorkspacePath: '$(Build.SourcesDirectory)/js/react_native/ios/OnnxruntimeModule.xcworkspace'
+        scheme: 'OnnxruntimeModuleTest'
+        packageApp: false
+        destinationPlatformOption: 'iOS'
+        destinationSimulators: 'iPhone 13,OS=latest'
+        workingDirectory: '$(Build.SourcesDirectory)/js/react_native/ios'
+        xcprettyArgs: '--output build/reports/test-results.xml'
+        publishJUnitResults: true
+        testRunTitle: 'React Native iOS Instrumented Test Results'
+      displayName: Run React Native iOS Instrumented Tests
+
+    - task: PublishTestResults@2
+      inputs:
+        testResultsFiles: '$(Build.SourcesDirectory)/js/react_native/ios/build/reports/test-results.xml'
+        failTaskOnFailedTests: true
+        testRunTitle: 'React Native iOS Instrumented Test results'
+      condition: succeededOrFailed()
+      displayName: Publish React Native iOS Instrumented Test Results
 
     - script: |
         JEST_JUNIT_OUTPUT_FILE=$(Build.SourcesDirectory)/js/react_native/e2e/ios-test-results.xml \
@@ -326,6 +333,12 @@ stages:
       workingDirectory: '$(Build.SourcesDirectory)/js/react_native/e2e'
       displayName: Run React Native Detox iOS e2e Tests
 
+    - script: |
+        xcrun simctl delete iPhoneRNTest
+      workingDirectory: '$(Build.SourcesDirectory)/js/react_native/e2e/ios'
+      displayName: Stop iOS Simulator
+      condition: always()
+
     - task: PublishTestResults@2
       inputs:
         testResultsFiles: '$(Build.SourcesDirectory)/js/react_native/e2e/ios-test-results.xml'
@@ -341,20 +354,6 @@ stages:
       condition: succeededOrFailed()
       displayName: Publish React Native Detox E2E test logs
 
-    - script: |
-        python3 tools/python/run_android_emulator.py \
-          --android-sdk-root $(ANDROID_SDK_ROOT) \
-          --stop \
-          --emulator-pid-file $(Build.BinariesDirectory)/emulator.pid
-      displayName: Stop Android Emulator
-      condition: always()
-
-    - script: |
-        xcrun simctl delete iPhoneRNTest
-      workingDirectory: '$(Build.SourcesDirectory)/js/react_native/e2e/ios'
-      displayName: Stop iOS Simulator
-      condition: always()
-
     - script: |
         git restore .
       workingDirectory: '$(Build.SourcesDirectory)/js'
diff --git a/tools/ci_build/github/azure-pipelines/templates/rocm.yml b/tools/ci_build/github/azure-pipelines/templates/rocm.yml
index cc2e8745e8946..2e9e6c6b35a2e 100644
--- a/tools/ci_build/github/azure-pipelines/templates/rocm.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/rocm.yml
@@ -27,7 +27,7 @@ jobs:
       ${{ if eq(parameters.BuildConfig, 'Release') }}:
         value: 'onnxruntime_rocm'
       ${{ else }}:
-        value: 'onnxruntime_rocm_enable_profiling'
+        value: 'onnxruntime_rocm_Debug'
 
   steps:
   - task: CmdLine@2
@@ -51,7 +51,6 @@ jobs:
         --build-arg INSTALL_DEPS_EXTRA_ARGS=-tmur
         --build-arg BUILD_UID=$(id -u)
         --network=host --build-arg POLICY=manylinux_2_28 --build-arg PLATFORM=x86_64
-        --build-arg ROCM_VERSION=$(RocmVersion)
         --build-arg DEVTOOLSET_ROOTPATH=/opt/rh/gcc-toolset-12/root
         --build-arg PREPEND_PATH=/opt/rh/gcc-toolset-12/root/usr/bin:
         --build-arg LD_LIBRARY_PATH_ARG=/opt/rh/gcc-toolset-12/root/usr/lib64:/opt/rh/gcc-toolset-12/root/usr/lib:/opt/rh/gcc-toolset-12/root/usr/lib64/dyninst:/opt/rh/gcc-toolset-12/root/usr/lib/dyninst:/usr/local/lib64:/usr/local/lib
@@ -74,11 +73,13 @@ jobs:
           --entrypoint $(PythonManylinuxDir)/bin/python3 \
           -e NIGHTLY_BUILD \
           -e BUILD_BUILDNUMBER \
+          -e ORT_DISABLE_PYTHON_PACKAGE_LOCAL_VERSION \
           --user onnxruntimedev \
           onnxruntimetrainingrocmbuild-rocm${{ parameters.RocmVersion }} \
             /onnxruntime_src/tools/ci_build/build.py \
               --config ${{ parameters.BuildConfig }} \
               --use_rocm \
+              --use_migraphx \
               --rocm_version=${{ parameters.RocmVersion }} \
               --rocm_home=/opt/rocm \
               --nccl_home=/opt/rocm \
@@ -132,7 +133,7 @@ jobs:
       python3 tools/ci_build/upload_python_package_to_azure_storage.py \
           --python_wheel_path ${files[0]} \
           --final_storage
-    condition: and(succeeded(), eq(variables['DRY_RUN'], '0'))
+    condition: and(ne(variables['ORT_DISABLE_PYTHON_PACKAGE_LOCAL_VERSION'], 'true'), and(succeeded(), eq(variables['DRY_RUN'], '0')))
     displayName: 'Upload Rocm wheel to release repository'
 
   - template: component-governance-component-detection-steps.yml
diff --git a/tools/ci_build/github/azure-pipelines/templates/stages/mac-ios-packaging-build-stage.yml b/tools/ci_build/github/azure-pipelines/templates/stages/mac-ios-packaging-build-stage.yml
index 81f17a26b16a6..1a7915172e211 100644
--- a/tools/ci_build/github/azure-pipelines/templates/stages/mac-ios-packaging-build-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/stages/mac-ios-packaging-build-stage.yml
@@ -29,7 +29,7 @@ stages:
         objcPodName: onnxruntime-mobile-objc
 
       ${{ if eq(parameters.packageVariant, 'Full') }}:
-        buildSettingsFile: "tools/ci_build/github/apple/default_full_ios_framework_build_settings.json"
+        buildSettingsFile: "tools/ci_build/github/apple/default_full_apple_framework_build_settings.json"
         cPodName: onnxruntime-c
         objcPodName: onnxruntime-objc
 
@@ -38,7 +38,7 @@ stages:
         cPodName: onnxruntime-training-c
         objcPodName: onnxruntime-training-objc
 
-    timeoutInMinutes: 120
+    timeoutInMinutes: 180
 
     steps:
     - script: |
@@ -84,8 +84,8 @@ stages:
 
     # create and test mobile pods
     - script: |
-        python tools/ci_build/github/apple/build_and_assemble_ios_pods.py \
-          --build-dir "$(Build.BinariesDirectory)/ios_framework" \
+        python tools/ci_build/github/apple/build_and_assemble_apple_pods.py \
+          --build-dir "$(Build.BinariesDirectory)/apple_framework" \
           --staging-dir "$(Build.BinariesDirectory)/staging" \
           --pod-version "$(ortPodVersion)" \
           --test \
@@ -93,13 +93,13 @@ stages:
           --build-settings-file "${{ variables.buildSettingsFile }}" \
           ${{ variables.optionalIncludeOpsByConfigOption }} \
           -b="--path_to_protoc_exe=$(Build.BinariesDirectory)/protobuf_install/bin/protoc"
-      displayName: "Build iOS framework and assemble pod package files"
+      displayName: "Build macOS/iOS framework and assemble pod package files"
 
     - script: |
-        python tools/ci_build/github/apple/test_ios_packages.py \
+        python tools/ci_build/github/apple/test_apple_packages.py \
           --fail_if_cocoapods_missing \
-          --framework_info_file "$(Build.BinariesDirectory)/ios_framework/framework_info.json" \
-          --c_framework_dir "$(Build.BinariesDirectory)/ios_framework/framework_out" \
+          --framework_info_file "$(Build.BinariesDirectory)/apple_framework/xcframework_info.json" \
+          --c_framework_dir "$(Build.BinariesDirectory)/apple_framework/framework_out" \
           --variant ${{ parameters.packageVariant }} \
           --test_project_stage_dir "$(Build.BinariesDirectory)/app_center_test" \
           --prepare_test_project_only
@@ -109,7 +109,7 @@ stages:
       inputs:
         actions: 'build-for-testing'
         configuration: 'Debug'
-        xcWorkspacePath: '$(Build.BinariesDirectory)/app_center_test/ios_package_test/ios_package_test.xcworkspace'
+        xcWorkspacePath: '$(Build.BinariesDirectory)/app_center_test/apple_package_test/apple_package_test.xcworkspace'
         sdk: 'iphoneos'
         scheme: 'ios_package_test'
         xcodeVersion: 'specifyPath'
@@ -118,8 +118,8 @@ stages:
         signingIdentity: '$(APPLE_CERTIFICATE_SIGNING_IDENTITY)'
         provisioningProfileName: 'temporary *'  # temporary name, change it back to the original below later
         #provisioningProfileName: 'iOS Team Provisioning Profile'
-        args: '-derivedDataPath $(Build.BinariesDirectory)/app_center_test/ios_package_test/DerivedData'
-        workingDirectory: '$(Build.BinariesDirectory)/app_center_test/ios_package_test/'
+        args: '-derivedDataPath $(Build.BinariesDirectory)/app_center_test/apple_package_test/DerivedData'
+        workingDirectory: '$(Build.BinariesDirectory)/app_center_test/apple_package_test/'
         useXcpretty: false  # xcpretty can hide useful error output so we will disable it
       displayName: 'Build App Center iPhone arm64 tests'
 
@@ -130,7 +130,7 @@ stages:
           --devices $(app_center_test_devices) \
           --test-series "master" \
           --locale "en_US" \
-          --build-dir $(Build.BinariesDirectory)/app_center_test/ios_package_test/DerivedData/Build/Products/Debug-iphoneos \
+          --build-dir $(Build.BinariesDirectory)/app_center_test/apple_package_test/DerivedData/Build/Products/Debug-iphoneos \
           --token $(app_center_api_token)
       displayName: "Run E2E tests on App Center"
 
@@ -139,7 +139,7 @@ stages:
 
         for POD_NAME in "${{ variables.cPodName}}" "${{ variables.objcPodName }}";
         do
-          ./tools/ci_build/github/apple/assemble_ios_packaging_artifacts.sh \
+          ./tools/ci_build/github/apple/assemble_apple_packaging_artifacts.sh \
             "$(Build.BinariesDirectory)/staging" \
             "$(Build.ArtifactStagingDirectory)" \
             "${POD_NAME}" \
diff --git a/tools/ci_build/github/azure-pipelines/templates/use-android-emulator.yml b/tools/ci_build/github/azure-pipelines/templates/use-android-emulator.yml
new file mode 100644
index 0000000000000..b31882c8da18f
--- /dev/null
+++ b/tools/ci_build/github/azure-pipelines/templates/use-android-emulator.yml
@@ -0,0 +1,64 @@
+# Android Emulator helpers
+
+parameters:
+- name: create
+  type: boolean
+  default: false
+
+- name: start
+  type: boolean
+  default: false
+
+- name: stop
+  type: boolean
+  default: false
+
+steps:
+- ${{ if eq(parameters.create, true) }}:
+  - script: |
+      set -e -x
+      python3 tools/python/run_android_emulator.py \
+        --android-sdk-root $(ANDROID_SDK_ROOT) \
+        --create-avd --system-image "system-images;android-31;default;x86_64"
+    displayName: Create Android Emulator
+
+- ${{ if eq(parameters.start, true) }}:
+  - script: |
+      if test -f $(Build.BinariesDirectory)/emulator.pid; then
+        echo "Emulator PID file was not expected to exist but does and has pid:" \
+          `cat $(Build.BinariesDirectory)/emulator.pid`
+        exit 1
+      fi
+    displayName: Check emulator.pid does not exist
+
+  # Add -verbose to --emulator-extra-args to enable additional logging.
+  - script: |
+      set -e -x
+      python3 tools/python/run_android_emulator.py \
+        --android-sdk-root $(ANDROID_SDK_ROOT) \
+        --start --emulator-extra-args="-partition-size 2047" \
+        --emulator-pid-file $(Build.BinariesDirectory)/emulator.pid
+      echo "Emulator PID:"`cat $(Build.BinariesDirectory)/emulator.pid`
+    displayName: Start Android Emulator
+
+- ${{ if eq(parameters.stop, true) }}:
+  - script: |
+      set -e -x
+      python3 -m pip install psutil
+    displayName: Install psutil for emulator shutdown by run_android_emulator.py
+    condition: always()
+
+  - script: |
+      set -e -x
+      if test -f $(Build.BinariesDirectory)/emulator.pid; then
+        echo "Emulator PID:"`cat $(Build.BinariesDirectory)/emulator.pid`
+        python3 tools/python/run_android_emulator.py \
+          --android-sdk-root $(ANDROID_SDK_ROOT) \
+          --stop \
+          --emulator-pid-file $(Build.BinariesDirectory)/emulator.pid
+        rm $(Build.BinariesDirectory)/emulator.pid
+      else
+        echo "Emulator PID file was expected to exist but does not."
+      fi
+    displayName: Stop Android Emulator
+    condition: always()
diff --git a/tools/ci_build/github/azure-pipelines/templates/use-android-ndk.yml b/tools/ci_build/github/azure-pipelines/templates/use-android-ndk.yml
index 0e034dff9d0b2..b8dba89b0b899 100644
--- a/tools/ci_build/github/azure-pipelines/templates/use-android-ndk.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/use-android-ndk.yml
@@ -3,7 +3,7 @@
   parameters:
   - name: AndroidNdkVersion
     type: string
-    default: "25.0.8775105"  # LTS version
+    default: "26.1.10909125"  # LTS version
 
   steps:
   - bash: |
diff --git a/tools/ci_build/github/azure-pipelines/templates/web-browserstack-ci.yml b/tools/ci_build/github/azure-pipelines/templates/web-browserstack-ci.yml
index 4494fd36b336e..96e6ff89cd4f1 100644
--- a/tools/ci_build/github/azure-pipelines/templates/web-browserstack-ci.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/web-browserstack-ci.yml
@@ -29,7 +29,7 @@ jobs:
 
   - task: NodeTool@0
     inputs:
-      versionSpec: '16.x'
+      versionSpec: '18.x'
   - task: DownloadPipelineArtifact@2
     inputs:
       patterns: 'Release_*/**/*'
diff --git a/tools/ci_build/github/azure-pipelines/templates/web-ci.yml b/tools/ci_build/github/azure-pipelines/templates/web-ci.yml
index 0b7bd3f645442..9982b36509b68 100644
--- a/tools/ci_build/github/azure-pipelines/templates/web-ci.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/web-ci.yml
@@ -26,7 +26,7 @@ parameters:
 
 - name: WASMTemplate
   type: string
-  default: win-wasm-ci.yml
+  default: linux-wasm-ci.yml
 # parameter couldn't be compared by string, so add one boolean parameter.
 - name: UseWebPoolName
   type: boolean
@@ -39,10 +39,10 @@ parameters:
   default: false
 - name: WebGpuPoolName
   type: string
-  default: ''
+  default: 'onnxruntime-Win2022-webgpu-A10'
 - name: WebCpuPoolName
   type: string
-  default: ''
+  default: 'onnxruntime-Win-CPU-2022-web'
 
 - name: ExtraBuildArgs
   displayName: 'Extra build command line arguments'
@@ -65,7 +65,6 @@ stages:
       clean: all
     steps:
     - checkout: self
-      fetchDepth: 1
       submodules: false
     - script: |
        git submodule sync -- cmake/external/onnx
@@ -74,7 +73,7 @@ stages:
       displayName: 'Checkout submodule onnx'
     - task: NodeTool@0
       inputs:
-        versionSpec: '16.x'
+        versionSpec: '18.x'
     - template: linux-web-init-and-check.yml
     - task: Bash@3
       displayName: 'Extract commit SHA and save to __commit.txt'
diff --git a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml
index 80d285f3fd3fb..a31b2fedbf217 100644
--- a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml
@@ -11,6 +11,7 @@ parameters:
 
 - name: EnvSetupScript
   type: string
+  default: ''
 
 - name: buildArch
   type: string
@@ -63,11 +64,24 @@ parameters:
   type: boolean
   default: false
 
+- name: PublishProtoc
+  type: boolean
+  default: false
+
+- name: CudaVersion
+  type: string
+  default: '11.8'
+  values:
+      - 11.8
+      - 12.2
+
 stages:
 - stage: Windows_Packaging_${{ parameters.stage_name_suffix }}
   dependsOn: []
   variables:
+    GRADLE_OPTS: '-Dorg.gradle.daemon=false'
     VSGenerator: 'Visual Studio 17 2022'
+    CUDA_MODULE_LOADING: 'LAZY'
   jobs:
   - job:
     workspace:
@@ -101,13 +115,27 @@ stages:
       - task: NodeTool@0
         condition: and(succeeded(), eq('${{ parameters.buildNodejs}}', true))
         inputs:
-          versionSpec: '16.x'
+          versionSpec: '18.x'
+      - ${{ if ne(parameters.EnvSetupScript, '') }}:
+        - template: jobs/set-winenv.yml
+          parameters:
+            EnvSetupScript: ${{ parameters.EnvSetupScript }}
+            ${{ if contains(parameters.buildparameter, 'use_cuda') }}:
+              DownloadCUDA: true
 
-      - template: jobs/set-winenv.yml
-        parameters:
-          EnvSetupScript: ${{ parameters.EnvSetupScript }}
-          ${{ if contains(parameters.buildparameter, 'use_cuda') }}:
-            DownloadCUDA: true
+      - ${{ if eq(parameters.EnvSetupScript, '') }}:
+        - template: jobs/download_win_gpu_library.yml
+          parameters:
+            CudaVersion: ${{ parameters.CudaVersion }}
+            ${{ if contains(parameters.buildparameter, 'use_cuda') }}:
+              DownloadCUDA: true
+            ${{ if contains(parameters.buildparameter, 'use_tensorrt') }}:
+              DownloadCUDA: true
+              DownloadTRT: true
+      - powershell: |
+          Write-Host "##vso[task.prependpath]C:\Program Files (x86)\dotnet"
+        displayName: 'Append dotnet x86  Directory to PATH'
+        condition: and(succeeded(), eq('${{ parameters.buildArch}}', 'x86'))
 
       - template: download-deps.yml
 
@@ -180,7 +208,8 @@ stages:
 
       #Upload protoc.exe, which will be used in nuget build for generating C# files
       - task: PublishPipelineArtifact@1
-        condition: and(succeeded(), eq('${{ parameters.packageName}}', 'x64'))
+        displayName: Publish protoc as drop-extra
+        condition: and(succeeded(), or(eq('${{ parameters.packageName}}', 'x64'), eq('${{ parameters.PublishProtoc}}', true)))
         inputs:
           targetPath: '$(Build.BinariesDirectory)\RelWithDebInfo\installed\bin\protoc.exe'
           artifactName: 'drop-extra${{ parameters.artifact_name_suffix }}'
@@ -194,13 +223,6 @@ stages:
           Contents: 'custom_op_library.dll'
           TargetFolder: '$(Build.ArtifactStagingDirectory)/testdata'
 
-      #To be used in test_win.yml
-      - task: PublishPipelineArtifact@1
-        condition: and(succeeded(), eq('${{ parameters.packageName}}', 'x64'))
-        inputs:
-          targetPath: '$(Build.BinariesDirectory)\RelWithDebInfo\installed\bin\protoc.exe'
-          artifactName: 'drop-nuget${{ parameters.artifact_name_suffix }}'
-
       - task: CmdLine@2
         condition: and(succeeded(), eq('${{ parameters.buildJava}}', true))
         displayName: 'Add symbols and notices to Java'
diff --git a/tools/ci_build/github/azure-pipelines/templates/win-wasm-ci.yml b/tools/ci_build/github/azure-pipelines/templates/win-wasm-ci.yml
index 9d36e2dbe4944..79647cc5699c8 100644
--- a/tools/ci_build/github/azure-pipelines/templates/win-wasm-ci.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/win-wasm-ci.yml
@@ -20,7 +20,7 @@ parameters:
   default: false
 
 - name: TimeoutInMinutes
-  default: 180
+  default: 240
 
 - name: BuildJsep
   type: boolean
@@ -31,23 +31,30 @@ parameters:
   type: boolean
   default: false
 
+- name: BuildTraining
+  type: boolean
+  default: true
+
 - name: WithCache
   type: boolean
   default: false
 
 jobs:
 - job: build_WASM
-  pool: ${{ parameters.PoolName }}
-
+  pool:
+    name: ${{ parameters.PoolName }}
   variables:
     EnvSetupScript: setup_env.bat
     buildArch: x64
-    CommonBuildArgs: '--parallel --config ${{ parameters.BuildConfig }} --skip_submodule_sync --cmake_generator "MinGW Makefiles" --build_wasm --use_xnnpack ${{ parameters.ExtraBuildArgs }}'
+    CommonBuildArgs: '--parallel --config ${{ parameters.BuildConfig }} --skip_submodule_sync --cmake_generator "MinGW Makefiles" --build_wasm ${{ parameters.ExtraBuildArgs }}'
     runCodesignValidationInjection: false
   timeoutInMinutes: ${{ parameters.TimeoutInMinutes }}
   workspace:
     clean: all
   steps:
+  - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
+    displayName: 'Clean Agent Directories'
+    condition: always()
   - checkout: self
   - task: DownloadPipelineArtifact@2
     inputs:
@@ -74,7 +81,7 @@ jobs:
       architecture: $(buildArch)
   - task: NodeTool@0
     inputs:
-      versionSpec: '16.x'
+      versionSpec: '18.x'
   - template: download-deps.yml
 
   - task: PythonScript@0
@@ -108,6 +115,13 @@ jobs:
       scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py'
       arguments: '$(CommonBuildArgs) --build_dir $(Build.BinariesDirectory)\wasm_simd --enable_wasm_simd'
       workingDirectory: '$(Build.BinariesDirectory)'
+  - ${{ if eq(parameters.BuildTraining, true) }}:
+    - task: PythonScript@0
+      displayName: 'Build (training + simd)'
+      inputs:
+        scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py'
+        arguments: '$(CommonBuildArgs) --build_dir $(Build.BinariesDirectory)\training_wasm_simd --enable_training_apis --enable_wasm_simd --target onnxruntime_webassembly --skip_tests'
+        workingDirectory: '$(Build.BinariesDirectory)'
   - ${{ if eq(parameters.BuildJsep, true) }}:
     - task: PythonScript@0
       displayName: 'Build (simd + JSEP)'
@@ -137,6 +151,10 @@ jobs:
           copy $(Build.BinariesDirectory)\wasm_simd_threads_jsep\${{ parameters.BuildConfig }}\ort-wasm-simd-threaded.js $(Build.ArtifactStagingDirectory)\ort-wasm-simd-threaded.jsep.js
           copy $(Build.BinariesDirectory)\wasm_simd_threads_jsep\${{ parameters.BuildConfig }}\ort-wasm-simd-threaded.worker.js $(Build.ArtifactStagingDirectory)\ort-wasm-simd-threaded.jsep.worker.js
         )
+        if exist $(Build.BinariesDirectory)\training_wasm_simd (
+          copy $(Build.BinariesDirectory)\training_wasm_simd\${{ parameters.BuildConfig }}\ort-training-wasm-simd.wasm $(Build.ArtifactStagingDirectory)\ort-training-wasm-simd.wasm
+          copy $(Build.BinariesDirectory)\training_wasm_simd\${{ parameters.BuildConfig }}\ort-training-wasm-simd.js $(Build.ArtifactStagingDirectory)\ort-training-wasm-simd.js
+        )
       displayName: 'Create Artifacts'
   - ${{ if eq(parameters.SkipPublish, false) }}:
     - task: PublishPipelineArtifact@0
@@ -154,6 +172,3 @@ jobs:
   - template: component-governance-component-detection-steps.yml
     parameters :
       condition : 'succeeded'
-  - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
-    displayName: 'Clean Agent Directories'
-    condition: always()
diff --git a/tools/ci_build/github/azure-pipelines/templates/win-web-ci.yml b/tools/ci_build/github/azure-pipelines/templates/win-web-ci.yml
index bad7448715936..b7ec3305003d7 100644
--- a/tools/ci_build/github/azure-pipelines/templates/win-web-ci.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/win-web-ci.yml
@@ -29,6 +29,7 @@ jobs:
   pool: ${{ parameters.PoolName }}
 
   variables:
+    webgpuCommandlineExtraFlags: '--chromium-flags=--ignore-gpu-blocklist --chromium-flags=--gpu-vendor-id=0x10de'
     runCodesignValidationInjection: false
   timeoutInMinutes: 60
   workspace:
@@ -72,7 +73,7 @@ jobs:
     displayName: 'Testing: force EOL to lf on windows for /js/**'
   - task: NodeTool@0
     inputs:
-      versionSpec: '16.x'
+      versionSpec: '18.x'
   - task: DownloadPipelineArtifact@2
     inputs:
       patterns: '${{ parameters.BuildConfig }}_*/**/*'
@@ -94,14 +95,22 @@ jobs:
       targetFolder: $(Build.SourcesDirectory)\js\web\lib\wasm\binding
       flattenFolders: true
     displayName: 'Binplace js files'
+  - script: |
+      npm i -g puppeteer
+    workingDirectory: '$(Build.SourcesDirectory)'
+    displayName: 'Use puppeteer to prepare Chrome for tests'
+  - script: |
+      FOR /F "tokens=* USEBACKQ" %%F IN (`where /r %HOMEDRIVE%%HOMEPATH%\.cache\puppeteer chrome.exe`) DO (
+        SET var=%%F
+        ECHO found chrome.exe: %%F
+      )
+      ECHO ##vso[task.setvariable variable=CHROME_BIN;]%var%
+    workingDirectory: '$(Build.SourcesDirectory)'
+    displayName: 'Set CHROME_BIN'
   - script: |
      npm ci
     workingDirectory: '$(Build.SourcesDirectory)\js'
     displayName: 'npm ci /js/'
-  - script: |
-     npm run lint
-    workingDirectory: '$(Build.SourcesDirectory)\js'
-    displayName: 'run ESLint without TS type populated'
   - script: |
      npm ci
     workingDirectory: '$(Build.SourcesDirectory)\js\common'
@@ -114,6 +123,10 @@ jobs:
      npm ci
     workingDirectory: '$(Build.SourcesDirectory)\js\web'
     displayName: 'npm ci /js/web/'
+  - script: |
+     npm run prebuild
+    workingDirectory: '$(Build.SourcesDirectory)\js\web'
+    displayName: 'run TypeScript type check in /js/web/'
   - script: |
      npm run lint
     workingDirectory: '$(Build.SourcesDirectory)\js'
@@ -156,26 +169,36 @@ jobs:
       errorActionPreference: stop
     displayName: 'Pack NPM packages'
   - script: |
-     npm test -- -e=edge -b=webgl,wasm,xnnpack
+     npm test -- -e=chrome -b=webgl,wasm,xnnpack
     workingDirectory: '$(Build.SourcesDirectory)\js\web'
     displayName: 'Run ort-web tests (wasm,webgl,xnnpack backend)'
-    condition: ne('${{ parameters.RunWebGpuTests }}', 'true')
+    condition: eq('${{ parameters.RunWebGpuTests }}', 'false')
   - script: |
-     npm test -- -e=edge -b=webgl,wasm,xnnpack,webgpu --chromium-flags=--ignore-gpu-blocklist --chromium-flags=--gpu-vendor-id=0x10de
+     npm test -- -e=chrome -b=webgl,wasm,xnnpack,webgpu $(webgpuCommandlineExtraFlags)
     workingDirectory: '$(Build.SourcesDirectory)\js\web'
     displayName: 'Run ort-web tests (ALL backends)'
-    condition: ne('${{ parameters.RunWebGpuTests }}', 'false')
+    condition: eq('${{ parameters.RunWebGpuTests }}', 'true')
+  - script: |
+     npm test -- suite1 -e=chrome -b=webgpu --io-binding=gpu-tensor $(webgpuCommandlineExtraFlags)
+    workingDirectory: '$(Build.SourcesDirectory)\js\web'
+    displayName: 'Run ort-web tests (Suite1, webgpu, IO-binding=gpu-tensor)'
+    condition: eq('${{ parameters.RunWebGpuTests }}', 'true')
+  - script: |
+     npm test -- suite1 -e=chrome -b=webgpu --io-binding=gpu-location $(webgpuCommandlineExtraFlags)
+    workingDirectory: '$(Build.SourcesDirectory)\js\web'
+    displayName: 'Run ort-web tests (Suite1, webgpu, IO-binding=gpu-location)'
+    condition: eq('${{ parameters.RunWebGpuTests }}', 'true')
   - script: |
-     npm test -- --webgl-texture-pack-mode -b=webgl -e=edge
+     npm test -- --webgl-texture-pack-mode -b=webgl -e=chrome
     workingDirectory: '$(Build.SourcesDirectory)\js\web'
     displayName: 'Run ort-web tests - WebGL: packed mode'
   - script: |
-     npm test -- --wasm-enable-proxy -b=wasm -e=edge
+     npm test -- --wasm-enable-proxy -b=wasm -e=chrome
     workingDirectory: '$(Build.SourcesDirectory)\js\web'
     displayName: 'Run ort-web tests - WebAssembly: proxy'
     condition: and(succeeded(), eq('${{ parameters.BuildConfig }}', 'Release'))
   - script: |
-      npm run test:e2e -- --browser=Edge_default
+      npm run test:e2e -- --browser=Chrome_default
     workingDirectory: '$(Build.SourcesDirectory)\js\web'
     displayName: 'E2E package consuming test'
     condition: and(succeeded(), eq('${{ parameters.BuildConfig }}', 'Release'))
diff --git a/tools/ci_build/github/azure-pipelines/templates/win-web-multi-browsers.yml b/tools/ci_build/github/azure-pipelines/templates/win-web-multi-browsers.yml
index 723567389579d..f7876f15029c1 100644
--- a/tools/ci_build/github/azure-pipelines/templates/win-web-multi-browsers.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/win-web-multi-browsers.yml
@@ -33,7 +33,7 @@ jobs:
     displayName: 'Checkout submodule onnx'
   - task: NodeTool@0
     inputs:
-      versionSpec: '16.x'
+      versionSpec: '18.x'
   - task: DownloadPipelineArtifact@2
     inputs:
       patterns: 'Release_*/**/*'
diff --git a/tools/ci_build/github/azure-pipelines/win-ci-fuzz-testing.yml b/tools/ci_build/github/azure-pipelines/win-ci-fuzz-testing.yml
index f3a5728d6519b..98f1bf7ea1a16 100644
--- a/tools/ci_build/github/azure-pipelines/win-ci-fuzz-testing.yml
+++ b/tools/ci_build/github/azure-pipelines/win-ci-fuzz-testing.yml
@@ -32,7 +32,7 @@ jobs:
 
   - task: NodeTool@0
     inputs:
-      versionSpec: '16.x'
+      versionSpec: '18.x'
 
   - task: NuGetToolInstaller@0
     displayName: Use Nuget 5.7.0
diff --git a/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml
index 2a5622faf2905..d7ffc1828c943 100644
--- a/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml
@@ -40,7 +40,6 @@ stages:
     - template: templates/jobs/win-ci-vs-2022-job.yml
       parameters:
         BuildConfig: 'Debug'
-        EnvSetupScript: setup_env.bat
         buildArch: x64
         additionalBuildFlags: --build_java --build_nodejs --build_wheel --disable_memleak_checker
         msbuildPlatform: x64
@@ -59,7 +58,6 @@ stages:
     - template: templates/jobs/win-ci-vs-2022-job.yml
       parameters:
         BuildConfig: 'RelWithDebInfo'
-        EnvSetupScript: setup_env.bat
         buildArch: x64
         # Compare to our Nuget packaging pipeline, this job has "--build_wheel" but doesn't have "--enable_lto --disable_rtti --use_telemetry  --enable_wcos"
         # Python bindings use typeid so I can't disable RTTI here. If it causes a problem, we will need to split this job to two jobs.
@@ -80,7 +78,6 @@ stages:
     - template: templates/jobs/win-ci-vs-2022-job.yml
       parameters:
         BuildConfig: 'RelWithDebInfo'
-        EnvSetupScript: setup_env.bat
         buildArch: x64
         additionalBuildFlags: --build_wheel --use_dnnl --build_java
         msbuildPlatform: x64
@@ -101,7 +98,6 @@ stages:
     - template: templates/jobs/win-ci-vs-2022-job.yml
       parameters:
         BuildConfig: 'RelWithDebInfo'
-        EnvSetupScript: setup_env.bat
         buildArch: x64
         additionalBuildFlags: --build_wheel --use_xnnpack
         msbuildPlatform: x64
@@ -120,7 +116,6 @@ stages:
     - template: templates/jobs/win-ci-vs-2022-job.yml
       parameters:
         BuildConfig: 'RelWithDebInfo'
-        EnvSetupScript: setup_env.bat
         buildArch: x64
         additionalBuildFlags: --use_winml --enable_wcos --disable_rtti --cmake_extra_defines CMAKE_SYSTEM_VERSION=10.0.22000.0
         msbuildPlatform: x64
@@ -132,7 +127,7 @@ stages:
         isTraining: false
         ORT_EP_NAME: CPU
         GenerateDocumentation: false
-        WITH_CACHE: true
+        WITH_CACHE: false
         MachinePool: 'onnxruntime-Win-CPU-2022'
 
 - stage: x86_release
@@ -160,7 +155,6 @@ stages:
     - template: templates/jobs/win-ci-vs-2022-job.yml
       parameters:
         BuildConfig: 'Debug'
-        EnvSetupScript: setup_env.bat
         buildArch: x64
         additionalBuildFlags: --enable_training --build_wheel --disable_memleak_checker
         msbuildPlatform: x64
@@ -179,7 +173,6 @@ stages:
     - template: templates/jobs/win-ci-vs-2022-job.yml
       parameters:
         BuildConfig: 'RelWithDebInfo'
-        EnvSetupScript: setup_env.bat
         buildArch: x64
         additionalBuildFlags: --enable_training --build_wheel
         msbuildPlatform: x64
@@ -198,7 +191,6 @@ stages:
     - template: templates/jobs/win-ci-vs-2022-job.yml
       parameters:
         BuildConfig: 'RelWithDebInfo'
-        EnvSetupScript: setup_env.bat
         buildArch: x64
         additionalBuildFlags: --enable_training_apis
         msbuildPlatform: x64
@@ -215,10 +207,17 @@ stages:
 - stage: x64_release_azure
   dependsOn: []
   jobs:
+    - job:
+      steps:
+      - powershell: |
+          Write-Host "##vso[task.prependpath]$(Build.BinariesDirectory)\RelWithDebInfo\_deps\vcpkg-src\installed\x86-windows\bin"
+          $env:PATH
+          Write-Host "##vso[task.prependpath]$(Build.BinariesDirectory)\RelWithDebInfo\_deps\vcpkg-src\installed\x64-windows\bin"
+          $env:PATH
+      displayName: 'Append x64-windows and x86-windows to PATH'
     - template: templates/jobs/win-ci-vs-2022-job.yml
       parameters:
         BuildConfig: 'RelWithDebInfo'
-        EnvSetupScript: setup_env_azure.bat
         buildArch: x64
         additionalBuildFlags: --use_azure --use_lock_free_queue
         msbuildPlatform: x64
@@ -231,3 +230,5 @@ stages:
         GenerateDocumentation: false
         WITH_CACHE: true
         MachinePool: 'onnxruntime-Win-CPU-2022'
+
+
diff --git a/tools/ci_build/github/azure-pipelines/win-gpu-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-gpu-ci-pipeline.yml
index 8796917afa37d..fdb9238071c9e 100644
--- a/tools/ci_build/github/azure-pipelines/win-gpu-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/win-gpu-ci-pipeline.yml
@@ -40,7 +40,7 @@ stages:
     - template: templates/jobs/win-ci-vs-2022-job.yml
       parameters:
         BuildConfig: 'RelWithDebInfo'
-        EnvSetupScript: setup_env_cuda_11.bat
+        EnvSetupScript: setup_env_cuda.bat
         buildArch: x64
         additionalBuildFlags: --enable_pybind --build_java --build_nodejs --use_cuda --cuda_home="$(Agent.TempDirectory)\v11.8" --enable_cuda_profiling --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86
         msbuildPlatform: x64
@@ -57,7 +57,7 @@ stages:
     - template: templates/jobs/win-ci-vs-2022-job.yml
       parameters:
         BuildConfig: 'RelWithDebInfo'
-        EnvSetupScript: setup_env_cuda_11.bat
+        EnvSetupScript: setup_env_cuda.bat
         buildArch: x64
         additionalBuildFlags: --enable_pybind --enable_training --use_cuda --cuda_home="$(Agent.TempDirectory)\v11.8" --skip_onnx_tests --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=75
         msbuildPlatform: x64
@@ -84,7 +84,7 @@ stages:
         job_name_suffix: x64_RelWithDebInfo
         RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }}
         ORT_EP_NAME: DML
-        WITH_CACHE: true
+        WITH_CACHE: false
         MachinePool: onnxruntime-Win2022-GPU-dml-A10
 
 - stage: kernelDocumentation
@@ -93,7 +93,7 @@ stages:
     - template: templates/jobs/win-ci-vs-2022-job.yml
       parameters:
         BuildConfig: 'RelWithDebInfo'
-        EnvSetupScript: setup_env_cuda_11.bat
+        EnvSetupScript: setup_env_cuda.bat
         buildArch: x64
         # note: need to specify `--gen_doc` when creating the build config so it has to be in additionalBuildFlags
         additionalBuildFlags: --gen_doc validate --skip_tests --enable_pybind --use_dml --use_cuda --cuda_home="$(Agent.TempDirectory)\v11.8" --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 --cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=OFF
diff --git a/tools/ci_build/github/azure-pipelines/win-gpu-reduce-op-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-gpu-reduce-op-ci-pipeline.yml
index b5db8a5201405..d0f9772da7adc 100644
--- a/tools/ci_build/github/azure-pipelines/win-gpu-reduce-op-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/win-gpu-reduce-op-ci-pipeline.yml
@@ -10,7 +10,7 @@ jobs:
         BuildConfig: 'MinSizeRel'
   variables:
     MsbuildArguments: '-detailedsummary -maxcpucount -consoleloggerparameters:PerformanceSummary'
-    EnvSetupScript: setup_env_cuda_11.bat
+    EnvSetupScript: setup_env_cuda.bat
     buildArch: x64
     TODAY: $[format('{0:dd}{0:MM}{0:yyyy}', pipeline.startTime)]
   timeoutInMinutes: 120
diff --git a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml
index 15a786516396c..658c358aa4523 100644
--- a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml
@@ -56,7 +56,7 @@ jobs:
       WithCache: True
       Today: $(TODAY)
       AdditionalKey: "gpu-tensorrt | $(BuildConfig)"
-      BuildPyArguments: '--config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 17 2022" --build_wheel --enable_onnx_tests --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.6.1.6.Windows10.x86_64.cuda-11.8" --cuda_version=11.8 --cuda_home="$(Agent.TempDirectory)\v11.8" --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=75'
+      BuildPyArguments: '--config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 17 2022" --build_wheel --enable_onnx_tests --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.6.1.6.Windows10.x86_64.cuda-11.8" --cuda_home="$(Agent.TempDirectory)\v11.8" --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=75'
       MsbuildArguments: $(MsbuildArguments)
       BuildArch: $(buildArch)
       Platform: 'x64'
@@ -76,7 +76,7 @@ jobs:
      del wheel_filename_file
      python.exe -m pip install -q --upgrade %WHEEL_FILENAME%
      set PATH=$(Build.BinariesDirectory)\$(BuildConfig)\$(BuildConfig);%PATH%
-     python $(Build.SourcesDirectory)\tools\ci_build\build.py --config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --cmake_generator "Visual Studio 17 2022" --build_wheel --enable_onnx_tests --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.6.1.6.Windows10.x86_64.cuda-11.8" --cuda_version=11.8 --cuda_home="$(Agent.TempDirectory)\v11.8" --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=75
+     python $(Build.SourcesDirectory)\tools\ci_build\build.py --config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --cmake_generator "Visual Studio 17 2022" --build_wheel --enable_onnx_tests --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.6.1.6.Windows10.x86_64.cuda-11.8"  --cuda_home="$(Agent.TempDirectory)\v11.8" --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=75
 
     workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)\$(BuildConfig)'
     displayName: 'Run tests'
diff --git a/tools/ci_build/github/js/react_native_e2e_full_ios_framework_build_settings.json b/tools/ci_build/github/js/react_native_e2e_full_ios_framework_build_settings.json
index d15326de41099..78de7edb5ec29 100644
--- a/tools/ci_build/github/js/react_native_e2e_full_ios_framework_build_settings.json
+++ b/tools/ci_build/github/js/react_native_e2e_full_ios_framework_build_settings.json
@@ -4,13 +4,17 @@
             "x86_64"
         ]
     },
-    "build_params": [
-        "--ios",
-        "--parallel",
-        "--use_xcode",
-        "--build_apple_framework",
-        "--use_coreml",
-        "--skip_tests",
-        "--apple_deploy_target=12.0"
-    ]
+    "build_params": {
+        "base": [
+            "--parallel",
+            "--use_xcode",
+            "--build_apple_framework",
+            "--use_coreml",
+            "--skip_tests"
+        ],
+        "iphonesimulator": [
+            "--ios",
+            "--apple_deploy_target=12.0"
+        ]
+    }
 }
diff --git a/tools/ci_build/github/js/react_native_e2e_mobile_ios_framework_build_settings.json b/tools/ci_build/github/js/react_native_e2e_mobile_ios_framework_build_settings.json
index e733885399f72..3d80231393cc6 100644
--- a/tools/ci_build/github/js/react_native_e2e_mobile_ios_framework_build_settings.json
+++ b/tools/ci_build/github/js/react_native_e2e_mobile_ios_framework_build_settings.json
@@ -4,18 +4,22 @@
             "x86_64"
         ]
     },
-    "build_params": [
-        "--ios",
-        "--parallel",
-        "--use_xcode",
-        "--build_apple_framework",
-        "--minimal_build=extended",
-        "--disable_rtti",
-        "--disable_ml_ops",
-        "--disable_exceptions",
-        "--enable_reduced_operator_type_support",
-        "--use_coreml",
-        "--skip_tests",
-        "--apple_deploy_target=12.0"
-    ]
+    "build_params": {
+        "base": [
+            "--parallel",
+            "--use_xcode",
+            "--build_apple_framework",
+            "--minimal_build=extended",
+            "--disable_rtti",
+            "--disable_ml_ops",
+            "--disable_exceptions",
+            "--enable_reduced_operator_type_support",
+            "--use_coreml",
+            "--skip_tests"
+        ],
+        "iphonesimulator": [
+            "--ios",
+            "--apple_deploy_target=12.0"
+        ]
+    }
 }
diff --git a/tools/ci_build/github/linux/build_cuda_c_api_package.sh b/tools/ci_build/github/linux/build_cuda_c_api_package.sh
index 5cd1c8c243050..2ec8bc82ae048 100755
--- a/tools/ci_build/github/linux/build_cuda_c_api_package.sh
+++ b/tools/ci_build/github/linux/build_cuda_c_api_package.sh
@@ -4,7 +4,7 @@ export CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protect
 export CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all"
 docker run --gpus all -e CFLAGS -e CXXFLAGS  -e NVIDIA_VISIBLE_DEVICES=all --rm --volume \
 $BUILD_SOURCESDIRECTORY:/onnxruntime_src --volume $BUILD_BINARIESDIRECTORY:/build \
---volume /data/models:/build/models:ro --volume /data/onnx:/data/onnx:ro -e NIGHTLY_BUILD onnxruntimecuda11centosbuild \
+--volume /data/models:/build/models:ro --volume /data/onnx:/data/onnx:ro -e NIGHTLY_BUILD onnxruntimecuda${CUDA_VERSION_MAJOR}build \
 /usr/bin/python3.9 /onnxruntime_src/tools/ci_build/build.py --build_java --build_nodejs --build_dir /build --config Release \
 --skip_submodule_sync  --parallel --build_shared_lib --use_cuda --cuda_version=$CUDA_VERSION \
 --cuda_home=/usr/local/cuda-$CUDA_VERSION --cudnn_home=/usr/local/cuda-$CUDA_VERSION \
diff --git a/tools/ci_build/github/linux/build_linux_arm64_python_package.sh b/tools/ci_build/github/linux/build_linux_python_package.sh
similarity index 78%
rename from tools/ci_build/github/linux/build_linux_arm64_python_package.sh
rename to tools/ci_build/github/linux/build_linux_python_package.sh
index 516f320cd64c4..3c1c65c9a6862 100755
--- a/tools/ci_build/github/linux/build_linux_arm64_python_package.sh
+++ b/tools/ci_build/github/linux/build_linux_python_package.sh
@@ -15,9 +15,11 @@ do case "${parameter_Option}"
 in
 #GPU or CPU.
 d) BUILD_DEVICE=${OPTARG};;
-p) PYTHON_EXES=(${OPTARG});;
-x) EXTRA_ARG=(${OPTARG});;
+p) PYTHON_EXES=${OPTARG};;
+x) EXTRA_ARG=${OPTARG};;
 c) BUILD_CONFIG=${OPTARG};;
+*) echo "Usage: $0 -d <GPU|CPU> [-p <python_exe_path>] [-x <extra_build_arg>] [-c <build_config>]"
+   exit 1;;
 esac
 done
 
@@ -48,7 +50,7 @@ if [ "$ARCH" == "x86_64" ] && [ "$GCC_VERSION" -ge 9 ]; then
 fi
 
 echo "EXTRA_ARG:"
-echo $EXTRA_ARG
+echo "$EXTRA_ARG"
 
 if [ "$EXTRA_ARG" != "" ]; then
     BUILD_ARGS+=("$EXTRA_ARG")
@@ -60,19 +62,19 @@ if [ "$ARCH" == "x86_64" ]; then
 fi
 
 if [ "$BUILD_DEVICE" == "GPU" ]; then
+    SHORT_CUDA_VERSION=$(echo $CUDA_VERSION | sed   's/\([[:digit:]]\+\.[[:digit:]]\+\)\.[[:digit:]]\+/\1/')
     #Enable CUDA and TRT EPs.
-    ONNXRUNTIME_CUDA_VERSION="11.8"
-    BUILD_ARGS+=("--nvcc_threads=1" "--use_cuda" "--use_tensorrt" "--cuda_version=$ONNXRUNTIME_CUDA_VERSION" "--tensorrt_home=/usr" "--cuda_home=/usr/local/cuda-$ONNXRUNTIME_CUDA_VERSION" "--cudnn_home=/usr/local/cuda-$ONNXRUNTIME_CUDA_VERSION" "--cmake_extra_defines" "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80")
+    BUILD_ARGS+=("--nvcc_threads=1" "--use_cuda" "--use_tensorrt" "--cuda_version=$SHORT_CUDA_VERSION" "--tensorrt_home=/usr" "--cuda_home=/usr/local/cuda-$SHORT_CUDA_VERSION" "--cudnn_home=/usr/local/cuda-$SHORT_CUDA_VERSION" "--cmake_extra_defines" "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80")
 fi
 
 export CFLAGS
 export CXXFLAGS
 for PYTHON_EXE in "${PYTHON_EXES[@]}"
 do
-  rm -rf /build/$BUILD_CONFIG
+  rm -rf /build/"$BUILD_CONFIG"
   ${PYTHON_EXE} /onnxruntime_src/tools/ci_build/build.py "${BUILD_ARGS[@]}"
 
-  cp /build/$BUILD_CONFIG/dist/*.whl /build/dist
+  cp /build/"$BUILD_CONFIG"/dist/*.whl /build/dist
 done
 
 which ccache && ccache -sv && ccache -z
diff --git a/tools/ci_build/github/linux/build_tensorrt_c_api_package.sh b/tools/ci_build/github/linux/build_tensorrt_c_api_package.sh
index 18a32e3599391..5bf6a69170074 100755
--- a/tools/ci_build/github/linux/build_tensorrt_c_api_package.sh
+++ b/tools/ci_build/github/linux/build_tensorrt_c_api_package.sh
@@ -4,6 +4,6 @@ export CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protect
 export CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all"
 mkdir -p $HOME/.onnx
 docker run --gpus all -e CFLAGS -e CXXFLAGS -e NVIDIA_VISIBLE_DEVICES=all --rm --volume /data/onnx:/data/onnx:ro --volume $BUILD_SOURCESDIRECTORY:/onnxruntime_src --volume $BUILD_BINARIESDIRECTORY:/build \
---volume /data/models:/build/models:ro --volume $HOME/.onnx:/home/onnxruntimedev/.onnx -e NIGHTLY_BUILD onnxruntimecuda118xtrt86build \
+--volume /data/models:/build/models:ro --volume $HOME/.onnx:/home/onnxruntimedev/.onnx -e NIGHTLY_BUILD onnxruntimecuda${CUDA_VERSION_MAJOR}xtrt86build \
 /opt/python/cp38-cp38/bin/python3 /onnxruntime_src/tools/ci_build/build.py --build_dir /build --config Release \
 --skip_submodule_sync --parallel --build_shared_lib --build_java --build_nodejs --use_tensorrt --cuda_version=$CUDA_VERSION --cuda_home=/usr/local/cuda-$CUDA_VERSION --cudnn_home=/usr --tensorrt_home=/usr --cmake_extra_defines 'CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80'
diff --git a/tools/ci_build/github/linux/copy_strip_binary.sh b/tools/ci_build/github/linux/copy_strip_binary.sh
index 63690b69fc91a..42973a8fcb5b8 100755
--- a/tools/ci_build/github/linux/copy_strip_binary.sh
+++ b/tools/ci_build/github/linux/copy_strip_binary.sh
@@ -27,7 +27,6 @@ if [[ -f "$BINARY_DIR/$BUILD_CONFIG/libonnxruntime_providers_cuda.so" ]]; then
 fi
 if [[ -f "$BINARY_DIR/$BUILD_CONFIG/libonnxruntime_providers_tensorrt.so" ]]; then
     cp $BINARY_DIR/$BUILD_CONFIG/libonnxruntime_providers_tensorrt.so $BINARY_DIR/$ARTIFACT_NAME/lib
-    cp $SOURCE_DIR/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h  $BINARY_DIR/$ARTIFACT_NAME/include
 fi
 if [[ -f "$BINARY_DIR/$BUILD_CONFIG/libonnxruntime_providers_rocm.so" ]]; then
     cp $BINARY_DIR/$BUILD_CONFIG/libonnxruntime_providers_shared.so $BINARY_DIR/$ARTIFACT_NAME/lib
@@ -57,6 +56,15 @@ cp $SOURCE_DIR/orttraining/orttraining/training_api/include/onnxruntime_training
 cp $SOURCE_DIR/orttraining/orttraining/training_api/include/onnxruntime_training_cxx_api.h  $BINARY_DIR/$ARTIFACT_NAME/include
 cp $SOURCE_DIR/orttraining/orttraining/training_api/include/onnxruntime_training_cxx_inline.h  $BINARY_DIR/$ARTIFACT_NAME/include
 
+if [[ -f "$BINARY_DIR/$BUILD_CONFIG/libonnxruntime_providers_cuda.so" ]]; then
+# copy headers for context context used in custom ops
+mkdir -p $BINARY_DIR/$ARTIFACT_NAME/include/core/providers/cuda
+cp $SOURCE_DIR/include/onnxruntime/core/providers/custom_op_context.h $BINARY_DIR/$ARTIFACT_NAME/include/core/providers/custom_op_context.h
+cp $SOURCE_DIR/include/onnxruntime/core/providers/resource.h $BINARY_DIR/$ARTIFACT_NAME/include/core/providers/resource.h
+cp $SOURCE_DIR/include/onnxruntime/core/providers/cuda/cuda_context.h $BINARY_DIR/$ARTIFACT_NAME/include/core/providers/cuda/cuda_context.h
+cp $SOURCE_DIR/include/onnxruntime/core/providers/cuda/cuda_resource.h $BINARY_DIR/$ARTIFACT_NAME/include/core/providers/cuda/cuda_resource.h
+fi
+
 # copy the README, licence and TPN
 cp $SOURCE_DIR/README.md $BINARY_DIR/$ARTIFACT_NAME/README.md
 cp $SOURCE_DIR/docs/Privacy.md $BINARY_DIR/$ARTIFACT_NAME/Privacy.md
diff --git a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda11_6_tensorrt8_5 b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
similarity index 82%
rename from tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda11_6_tensorrt8_5
rename to tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
index 0337ffc5e00a0..8f265b208cd47 100644
--- a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda11_6_tensorrt8_5
+++ b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
@@ -1,22 +1,23 @@
-ARG BASEIMAGE=nvidia/cuda:11.6.1-cudnn8-devel-centos7
-ARG POLICY=manylinux2014
+# The default ARGs are for cuda 11.8 with cudnn8,TensorRT is optional
+# Please overwirete BASEIMAGE, TRT_VERSION and other arguments with
+# --docker-build-args ' --build-arg BASEIMAGE=other_base_image --build-arg TRT_VERSION=other_trt_version  etc...'
+# for other cuda version and TRT version
+ARG POLICY=manylinux_2_28
 ARG PLATFORM=x86_64
-ARG DEVTOOLSET_ROOTPATH=
-ARG LD_LIBRARY_PATH_ARG=
-ARG PREPEND_PATH=
+ARG BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubi8
+ARG DEVTOOLSET_ROOTPATH=/usr
+ARG LD_LIBRARY_PATH_ARG=/usr/local/lib64
+ARG PREPEND_PATH=/usr/local/cuda/binet
+ARG TRT_VERSION=8.6.1.6-1.cuda11.8
 
-#We need CUDA, TensorRT and manylinux. But the CUDA Toolkit End User License Agreement says NVIDIA CUDA Driver Libraries(libcuda.so, libnvidia-ptxjitcompiler.so) are only distributable in applications that meet this criteria:
-#1. The application was developed starting from a NVIDIA CUDA container obtained from Docker Hub or the NVIDIA GPU Cloud, and
-#2. The resulting application is packaged as a Docker container and distributed to users on Docker Hub or the NVIDIA GPU Cloud only.
-#So we use CUDA as the base image then add manylinux and TensorRT on top of it.
-
-#Build manylinux2014 docker image begin
+#Build manylinux docker image begin
 FROM $BASEIMAGE AS runtime_base
 ARG POLICY
 ARG PLATFORM
 ARG DEVTOOLSET_ROOTPATH
 ARG LD_LIBRARY_PATH_ARG
 ARG PREPEND_PATH
+ARG TRT_VERSION
 LABEL maintainer="The ManyLinux project"
 
 ENV AUDITWHEEL_POLICY=${POLICY} AUDITWHEEL_ARCH=${PLATFORM} AUDITWHEEL_PLAT=${POLICY}_${PLATFORM}
@@ -100,7 +101,6 @@ RUN export OPENSSL_ROOT=openssl-1.1.1q && \
 
 COPY build_scripts/build-cpython.sh /build_scripts/
 
-
 FROM build_cpython AS build_cpython38
 COPY build_scripts/ambv-pubkey.txt /build_scripts/cpython-pubkeys.txt
 RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.8.13
@@ -124,6 +124,7 @@ COPY build_scripts/install-pypy.sh \
      build_scripts/pypy.sha256 \
      build_scripts/finalize-python.sh \
      /build_scripts/
+
 RUN manylinux-entrypoint /build_scripts/install-pypy.sh 3.8 7.3.9
 RUN manylinux-entrypoint /build_scripts/install-pypy.sh 3.9 7.3.9
 COPY --from=build_cpython38 /opt/_internal /opt/_internal/
@@ -132,7 +133,6 @@ COPY --from=build_cpython310 /opt/_internal /opt/_internal/
 COPY --from=build_cpython311 /opt/_internal /opt/_internal/
 RUN manylinux-entrypoint /build_scripts/finalize-python.sh
 
-
 FROM runtime_base
 COPY --from=build_git /manylinux-rootfs /
 COPY --from=build_cpython /manylinux-rootfs /
@@ -152,14 +152,28 @@ ENV SSL_CERT_FILE=/opt/_internal/certs.pem
 
 CMD ["/bin/bash"]
 
-#Build manylinux2014 docker image end
+#Build manylinux docker image end
+
+#Install TensorRT only if TRT_VERSION is not empty
+RUN if [ -n "$TRT_VERSION" ]; then  \
+    echo "TRT_VERSION is $TRT_VERSION" && \
+    dnf -y install  \
+    libnvinfer8-${TRT_VERSION}  \
+    libnvparsers8-${TRT_VERSION}  \
+    libnvonnxparsers8-${TRT_VERSION}  \
+    libnvinfer-plugin8-${TRT_VERSION}  \
+    libnvinfer-vc-plugin8-${TRT_VERSION}  \
+    libnvinfer-devel-${TRT_VERSION}  \
+    libnvparsers-devel-${TRT_VERSION}  \
+    libnvonnxparsers-devel-${TRT_VERSION}  \
+    libnvinfer-plugin-devel-${TRT_VERSION}  \
+    libnvinfer-vc-plugin-devel-${TRT_VERSION}  \
+    libnvinfer-headers-devel-${TRT_VERSION}  \
+    libnvinfer-headers-plugin-devel-${TRT_VERSION};  \
+else \
+    echo "TRT_VERSION is none skipping Tensor RT Installation" ; \
+fi
 
-#Install TensorRT 8.5.1.7
-#RUN yum install -y wget
-RUN v="8.5.1-1.cuda11.8" &&\
-    yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo &&\
-    yum -y install libnvinfer8-${v} libnvparsers8-${v} libnvonnxparsers8-${v} libnvinfer-plugin8-${v} \
-        libnvinfer-devel-${v} libnvparsers-devel-${v} libnvonnxparsers-devel-${v} libnvinfer-plugin-devel-${v}
 ENV JAVA_HOME=/usr/lib/jvm/msopenjdk-11
 #Add our own dependencies
 ADD scripts /tmp/scripts
@@ -171,3 +185,4 @@ RUN adduser --uid $BUILD_UID $BUILD_USER
 WORKDIR /home/$BUILD_USER
 USER $BUILD_USER
 ENV PATH /usr/local/dotnet:$PATH
+ENV CUDA_MODULE_LOADING "LAZY"
\ No newline at end of file
diff --git a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda11_6_tensorrt8_4 b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda11_6_tensorrt8_4
deleted file mode 100644
index 003bb2324c049..0000000000000
--- a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda11_6_tensorrt8_4
+++ /dev/null
@@ -1,173 +0,0 @@
-ARG BASEIMAGE=nvidia/cuda:11.6.1-cudnn8-devel-centos7
-ARG POLICY=manylinux2014
-ARG PLATFORM=x86_64
-ARG DEVTOOLSET_ROOTPATH=
-ARG LD_LIBRARY_PATH_ARG=
-ARG PREPEND_PATH=
-
-#We need CUDA, TensorRT and manylinux. But the CUDA Toolkit End User License Agreement says NVIDIA CUDA Driver Libraries(libcuda.so, libnvidia-ptxjitcompiler.so) are only distributable in applications that meet this criteria:
-#1. The application was developed starting from a NVIDIA CUDA container obtained from Docker Hub or the NVIDIA GPU Cloud, and
-#2. The resulting application is packaged as a Docker container and distributed to users on Docker Hub or the NVIDIA GPU Cloud only.
-#So we use CUDA as the base image then add manylinux and TensorRT on top of it.
-
-#Build manylinux2014 docker image begin
-FROM $BASEIMAGE AS runtime_base
-ARG POLICY
-ARG PLATFORM
-ARG DEVTOOLSET_ROOTPATH
-ARG LD_LIBRARY_PATH_ARG
-ARG PREPEND_PATH
-LABEL maintainer="The ManyLinux project"
-
-ENV AUDITWHEEL_POLICY=${POLICY} AUDITWHEEL_ARCH=${PLATFORM} AUDITWHEEL_PLAT=${POLICY}_${PLATFORM}
-ENV LC_ALL=en_US.UTF-8 LANG=en_US.UTF-8 LANGUAGE=en_US.UTF-8
-ENV DEVTOOLSET_ROOTPATH=${DEVTOOLSET_ROOTPATH}
-ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH_ARG}
-ENV PATH=${PREPEND_PATH}${PATH}
-ENV PKG_CONFIG_PATH=/usr/local/lib/pkgconfig
-
-# first copy the fixup mirrors script, keep the script around
-COPY build_scripts/fixup-mirrors.sh /usr/local/sbin/fixup-mirrors
-
-# setup entrypoint, this will wrap commands with `linux32` with i686 images
-COPY build_scripts/install-entrypoint.sh \
-     build_scripts/build_utils.sh \
-     /build_scripts/
-
-RUN /build_scripts/install-entrypoint.sh && rm -rf /build_scripts
-COPY manylinux-entrypoint /usr/local/bin/manylinux-entrypoint
-ENTRYPOINT ["manylinux-entrypoint"]
-
-COPY build_scripts/install-runtime-packages.sh \
-     build_scripts/build_utils.sh \
-     /build_scripts/
-RUN manylinux-entrypoint /build_scripts/install-runtime-packages.sh && rm -rf /build_scripts/
-
-COPY build_scripts/build_utils.sh /build_scripts/
-
-COPY build_scripts/install-autoconf.sh /build_scripts/
-RUN export AUTOCONF_ROOT=autoconf-2.71 && \
-    export AUTOCONF_HASH=431075ad0bf529ef13cb41e9042c542381103e80015686222b8a9d4abef42a1c && \
-    export AUTOCONF_DOWNLOAD_URL=http://ftp.gnu.org/gnu/autoconf && \
-    manylinux-entrypoint /build_scripts/install-autoconf.sh
-
-COPY build_scripts/install-automake.sh /build_scripts/
-RUN export AUTOMAKE_ROOT=automake-1.16.5 && \
-    export AUTOMAKE_HASH=07bd24ad08a64bc17250ce09ec56e921d6343903943e99ccf63bbf0705e34605 && \
-    export AUTOMAKE_DOWNLOAD_URL=http://ftp.gnu.org/gnu/automake && \
-    manylinux-entrypoint /build_scripts/install-automake.sh
-
-COPY build_scripts/install-libtool.sh /build_scripts/
-RUN export LIBTOOL_ROOT=libtool-2.4.7 && \
-    export LIBTOOL_HASH=04e96c2404ea70c590c546eba4202a4e12722c640016c12b9b2f1ce3d481e9a8 && \
-    export LIBTOOL_DOWNLOAD_URL=http://ftp.gnu.org/gnu/libtool && \
-    manylinux-entrypoint /build_scripts/install-libtool.sh
-
-COPY build_scripts/install-libxcrypt.sh /build_scripts/
-RUN export LIBXCRYPT_VERSION=4.4.28 && \
-    export LIBXCRYPT_HASH=db7e37901969cb1d1e8020cb73a991ef81e48e31ea5b76a101862c806426b457 && \
-    export LIBXCRYPT_DOWNLOAD_URL=https://github.com/besser82/libxcrypt/archive && \
-    export PERL_ROOT=perl-5.34.0 && \
-    export PERL_HASH=551efc818b968b05216024fb0b727ef2ad4c100f8cb6b43fab615fa78ae5be9a && \
-    export PERL_DOWNLOAD_URL=https://www.cpan.org/src/5.0 && \
-    manylinux-entrypoint /build_scripts/install-libxcrypt.sh
-
-FROM runtime_base AS build_base
-COPY build_scripts/install-build-packages.sh /build_scripts/
-RUN manylinux-entrypoint /build_scripts/install-build-packages.sh
-
-
-FROM build_base AS build_git
-COPY build_scripts/build-git.sh /build_scripts/
-RUN export GIT_ROOT=git-2.36.2 && \
-    export GIT_HASH=6dc2cdea5fb23d823ba4871cc23222c1db31dfbb6d6c6ff74c4128700df57c68 && \
-    export GIT_DOWNLOAD_URL=https://www.kernel.org/pub/software/scm/git && \
-    manylinux-entrypoint /build_scripts/build-git.sh
-
-
-FROM build_base AS build_cpython
-COPY build_scripts/build-sqlite3.sh /build_scripts/
-RUN export SQLITE_AUTOCONF_ROOT=sqlite-autoconf-3390200 && \
-    export SQLITE_AUTOCONF_HASH=852be8a6183a17ba47cee0bbff7400b7aa5affd283bf3beefc34fcd088a239de && \
-    export SQLITE_AUTOCONF_DOWNLOAD_URL=https://www.sqlite.org/2022 && \
-    manylinux-entrypoint /build_scripts/build-sqlite3.sh
-
-COPY build_scripts/build-openssl.sh /build_scripts/
-RUN export OPENSSL_ROOT=openssl-1.1.1q && \
-    export OPENSSL_HASH=d7939ce614029cdff0b6c20f0e2e5703158a489a72b2507b8bd51bf8c8fd10ca && \
-    export OPENSSL_DOWNLOAD_URL=https://www.openssl.org/source && \
-    manylinux-entrypoint /build_scripts/build-openssl.sh
-
-COPY build_scripts/build-cpython.sh /build_scripts/
-
-
-FROM build_cpython AS build_cpython38
-COPY build_scripts/ambv-pubkey.txt /build_scripts/cpython-pubkeys.txt
-RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.8.13
-
-
-FROM build_cpython AS build_cpython39
-COPY build_scripts/ambv-pubkey.txt /build_scripts/cpython-pubkeys.txt
-RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.9.13
-
-
-FROM build_cpython AS build_cpython310
-COPY build_scripts/cpython-pubkey-310-311.txt /build_scripts/cpython-pubkeys.txt
-RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.10.5
-
-FROM build_cpython AS build_cpython311
-COPY build_scripts/cpython-pubkey-310-311.txt /build_scripts/cpython-pubkeys.txt
-RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.11.2
-
-FROM build_cpython AS all_python
-COPY build_scripts/install-pypy.sh \
-     build_scripts/pypy.sha256 \
-     build_scripts/finalize-python.sh \
-     /build_scripts/
-RUN manylinux-entrypoint /build_scripts/install-pypy.sh 3.8 7.3.9
-RUN manylinux-entrypoint /build_scripts/install-pypy.sh 3.9 7.3.9
-COPY --from=build_cpython38 /opt/_internal /opt/_internal/
-COPY --from=build_cpython39 /opt/_internal /opt/_internal/
-COPY --from=build_cpython310 /opt/_internal /opt/_internal/
-COPY --from=build_cpython311 /opt/_internal /opt/_internal/
-RUN manylinux-entrypoint /build_scripts/finalize-python.sh
-
-
-FROM runtime_base
-COPY --from=build_git /manylinux-rootfs /
-COPY --from=build_cpython /manylinux-rootfs /
-COPY --from=all_python /opt/_internal /opt/_internal/
-COPY build_scripts/finalize.sh \
-     build_scripts/python-tag-abi-tag.py \
-     build_scripts/requirements3.8.txt \
-     build_scripts/requirements3.9.txt \
-     build_scripts/requirements3.10.txt \
-     build_scripts/requirements3.11.txt \
-     build_scripts/requirements-base-tools.txt \
-     /build_scripts/
-COPY build_scripts/requirements-tools/* /build_scripts/requirements-tools/
-RUN manylinux-entrypoint /build_scripts/finalize.sh && rm -rf /build_scripts
-
-ENV SSL_CERT_FILE=/opt/_internal/certs.pem
-
-CMD ["/bin/bash"]
-
-#Build manylinux2014 docker image end
-
-#Install TensorRT 8.4.1.5
-#RUN yum install -y wget
-RUN v="8.4.1-1.cuda11.6" &&\
-    yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo &&\
-    yum -y install libnvinfer8-${v} libnvparsers8-${v} libnvonnxparsers8-${v} libnvinfer-plugin8-${v} \
-        libnvinfer-devel-${v} libnvparsers-devel-${v} libnvonnxparsers-devel-${v} libnvinfer-plugin-devel-${v}
-ENV JAVA_HOME=/usr/lib/jvm/msopenjdk-11
-#Add our own dependencies
-ADD scripts /tmp/scripts
-RUN cd /tmp/scripts && /tmp/scripts/manylinux/install_centos.sh && /tmp/scripts/manylinux/install_deps.sh && rm -rf /tmp/scripts
-
-ARG BUILD_UID=1001
-ARG BUILD_USER=onnxruntimedev
-RUN adduser --uid $BUILD_UID $BUILD_USER
-WORKDIR /home/$BUILD_USER
-USER $BUILD_USER
-ENV PATH /usr/local/dotnet:$PATH
diff --git a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda11_8_tensorrt8_6 b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda11_8_tensorrt8_6
deleted file mode 100644
index 2c953a10cbf64..0000000000000
--- a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda11_8_tensorrt8_6
+++ /dev/null
@@ -1,182 +0,0 @@
-ARG BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubi8
-ARG POLICY=manylinux_2_28
-ARG PLATFORM=x86_64
-ARG DEVTOOLSET_ROOTPATH=
-ARG LD_LIBRARY_PATH_ARG=
-ARG PREPEND_PATH=
-
-#We need CUDA, TensorRT and manylinux. But the CUDA Toolkit End User License Agreement says NVIDIA CUDA Driver Libraries(libcuda.so, libnvidia-ptxjitcompiler.so) are only distributable in applications that meet this criteria:
-#1. The application was developed starting from a NVIDIA CUDA container obtained from Docker Hub or the NVIDIA GPU Cloud, and
-#2. The resulting application is packaged as a Docker container and distributed to users on Docker Hub or the NVIDIA GPU Cloud only.
-#So we use CUDA as the base image then add manylinux and TensorRT on top of it.
-
-#Build manylinux2014 docker image begin
-FROM $BASEIMAGE AS runtime_base
-ARG POLICY
-ARG PLATFORM
-ARG DEVTOOLSET_ROOTPATH
-ARG LD_LIBRARY_PATH_ARG
-ARG PREPEND_PATH
-LABEL maintainer="The ManyLinux project"
-
-ENV AUDITWHEEL_POLICY=${POLICY} AUDITWHEEL_ARCH=${PLATFORM} AUDITWHEEL_PLAT=${POLICY}_${PLATFORM}
-ENV LC_ALL=en_US.UTF-8 LANG=en_US.UTF-8 LANGUAGE=en_US.UTF-8
-ENV DEVTOOLSET_ROOTPATH=${DEVTOOLSET_ROOTPATH}
-ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH_ARG}
-ENV PATH=${PREPEND_PATH}${PATH}
-ENV PKG_CONFIG_PATH=/usr/local/lib/pkgconfig
-
-# first copy the fixup mirrors script, keep the script around
-COPY build_scripts/fixup-mirrors.sh /usr/local/sbin/fixup-mirrors
-
-# setup entrypoint, this will wrap commands with `linux32` with i686 images
-COPY build_scripts/install-entrypoint.sh \
-     build_scripts/build_utils.sh \
-     /build_scripts/
-
-RUN /build_scripts/install-entrypoint.sh && rm -rf /build_scripts
-COPY manylinux-entrypoint /usr/local/bin/manylinux-entrypoint
-ENTRYPOINT ["manylinux-entrypoint"]
-
-COPY build_scripts/install-runtime-packages.sh \
-     build_scripts/build_utils.sh \
-     /build_scripts/
-RUN manylinux-entrypoint /build_scripts/install-runtime-packages.sh && rm -rf /build_scripts/
-
-COPY build_scripts/build_utils.sh /build_scripts/
-
-COPY build_scripts/install-autoconf.sh /build_scripts/
-RUN export AUTOCONF_ROOT=autoconf-2.71 && \
-    export AUTOCONF_HASH=431075ad0bf529ef13cb41e9042c542381103e80015686222b8a9d4abef42a1c && \
-    export AUTOCONF_DOWNLOAD_URL=http://ftp.gnu.org/gnu/autoconf && \
-    manylinux-entrypoint /build_scripts/install-autoconf.sh
-
-COPY build_scripts/install-automake.sh /build_scripts/
-RUN export AUTOMAKE_ROOT=automake-1.16.5 && \
-    export AUTOMAKE_HASH=07bd24ad08a64bc17250ce09ec56e921d6343903943e99ccf63bbf0705e34605 && \
-    export AUTOMAKE_DOWNLOAD_URL=http://ftp.gnu.org/gnu/automake && \
-    manylinux-entrypoint /build_scripts/install-automake.sh
-
-COPY build_scripts/install-libtool.sh /build_scripts/
-RUN export LIBTOOL_ROOT=libtool-2.4.7 && \
-    export LIBTOOL_HASH=04e96c2404ea70c590c546eba4202a4e12722c640016c12b9b2f1ce3d481e9a8 && \
-    export LIBTOOL_DOWNLOAD_URL=http://ftp.gnu.org/gnu/libtool && \
-    manylinux-entrypoint /build_scripts/install-libtool.sh
-
-COPY build_scripts/install-libxcrypt.sh /build_scripts/
-RUN export LIBXCRYPT_VERSION=4.4.28 && \
-    export LIBXCRYPT_HASH=db7e37901969cb1d1e8020cb73a991ef81e48e31ea5b76a101862c806426b457 && \
-    export LIBXCRYPT_DOWNLOAD_URL=https://github.com/besser82/libxcrypt/archive && \
-    export PERL_ROOT=perl-5.34.0 && \
-    export PERL_HASH=551efc818b968b05216024fb0b727ef2ad4c100f8cb6b43fab615fa78ae5be9a && \
-    export PERL_DOWNLOAD_URL=https://www.cpan.org/src/5.0 && \
-    manylinux-entrypoint /build_scripts/install-libxcrypt.sh
-
-FROM runtime_base AS build_base
-COPY build_scripts/install-build-packages.sh /build_scripts/
-RUN manylinux-entrypoint /build_scripts/install-build-packages.sh
-
-
-FROM build_base AS build_git
-COPY build_scripts/build-git.sh /build_scripts/
-RUN export GIT_ROOT=git-2.36.2 && \
-    export GIT_HASH=6dc2cdea5fb23d823ba4871cc23222c1db31dfbb6d6c6ff74c4128700df57c68 && \
-    export GIT_DOWNLOAD_URL=https://www.kernel.org/pub/software/scm/git && \
-    manylinux-entrypoint /build_scripts/build-git.sh
-
-
-FROM build_base AS build_cpython
-COPY build_scripts/build-sqlite3.sh /build_scripts/
-RUN export SQLITE_AUTOCONF_ROOT=sqlite-autoconf-3390200 && \
-    export SQLITE_AUTOCONF_HASH=852be8a6183a17ba47cee0bbff7400b7aa5affd283bf3beefc34fcd088a239de && \
-    export SQLITE_AUTOCONF_DOWNLOAD_URL=https://www.sqlite.org/2022 && \
-    manylinux-entrypoint /build_scripts/build-sqlite3.sh
-
-COPY build_scripts/build-openssl.sh /build_scripts/
-RUN export OPENSSL_ROOT=openssl-1.1.1q && \
-    export OPENSSL_HASH=d7939ce614029cdff0b6c20f0e2e5703158a489a72b2507b8bd51bf8c8fd10ca && \
-    export OPENSSL_DOWNLOAD_URL=https://www.openssl.org/source && \
-    manylinux-entrypoint /build_scripts/build-openssl.sh
-
-COPY build_scripts/build-cpython.sh /build_scripts/
-
-
-FROM build_cpython AS build_cpython37
-COPY build_scripts/cpython-pubkeys.txt /build_scripts/cpython-pubkeys.txt
-RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.7.13
-
-
-FROM build_cpython AS build_cpython38
-COPY build_scripts/ambv-pubkey.txt /build_scripts/cpython-pubkeys.txt
-RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.8.13
-
-
-FROM build_cpython AS build_cpython39
-COPY build_scripts/ambv-pubkey.txt /build_scripts/cpython-pubkeys.txt
-RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.9.13
-
-
-FROM build_cpython AS build_cpython310
-COPY build_scripts/cpython-pubkey-310-311.txt /build_scripts/cpython-pubkeys.txt
-RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.10.5
-
-FROM build_cpython AS build_cpython311
-COPY build_scripts/cpython-pubkey-310-311.txt /build_scripts/cpython-pubkeys.txt
-RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.11.0b5
-
-FROM build_cpython AS all_python
-COPY build_scripts/install-pypy.sh \
-     build_scripts/pypy.sha256 \
-     build_scripts/finalize-python.sh \
-     /build_scripts/
-RUN manylinux-entrypoint /build_scripts/install-pypy.sh 3.7 7.3.9
-RUN manylinux-entrypoint /build_scripts/install-pypy.sh 3.8 7.3.9
-RUN manylinux-entrypoint /build_scripts/install-pypy.sh 3.9 7.3.9
-COPY --from=build_cpython37 /opt/_internal /opt/_internal/
-COPY --from=build_cpython38 /opt/_internal /opt/_internal/
-COPY --from=build_cpython39 /opt/_internal /opt/_internal/
-COPY --from=build_cpython310 /opt/_internal /opt/_internal/
-COPY --from=build_cpython311 /opt/_internal /opt/_internal/
-RUN manylinux-entrypoint /build_scripts/finalize-python.sh
-
-
-FROM runtime_base
-COPY --from=build_git /manylinux-rootfs /
-COPY --from=build_cpython /manylinux-rootfs /
-COPY --from=all_python /opt/_internal /opt/_internal/
-COPY build_scripts/finalize.sh \
-     build_scripts/python-tag-abi-tag.py \
-     build_scripts/requirements3.7.txt \
-     build_scripts/requirements3.8.txt \
-     build_scripts/requirements3.9.txt \
-     build_scripts/requirements3.10.txt \
-     build_scripts/requirements3.11.txt \
-     build_scripts/requirements-base-tools.txt \
-     /build_scripts/
-COPY build_scripts/requirements-tools/* /build_scripts/requirements-tools/
-RUN manylinux-entrypoint /build_scripts/finalize.sh && rm -rf /build_scripts
-
-ENV SSL_CERT_FILE=/opt/_internal/certs.pem
-
-CMD ["/bin/bash"]
-
-#Build manylinux2014 docker image end
-
-#Install TensorRT 8.6.1.6
-#RUN yum install -y wget
-RUN v="8.6.1.6-1.cuda11.8" &&\
-    yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo &&\
-    yum -y install libnvinfer8-${v} libnvparsers8-${v} libnvonnxparsers8-${v} libnvinfer-plugin8-${v} libnvinfer-vc-plugin8-${v}\
-        libnvinfer-devel-${v} libnvparsers-devel-${v} libnvonnxparsers-devel-${v} libnvinfer-plugin-devel-${v} libnvinfer-vc-plugin-devel-${v} libnvinfer-headers-devel-${v}  libnvinfer-headers-plugin-devel-${v}
-ENV JAVA_HOME=/usr/lib/jvm/msopenjdk-11
-#Add our own dependencies
-ADD scripts /tmp/scripts
-RUN cd /tmp/scripts && /tmp/scripts/manylinux/install_centos.sh && /tmp/scripts/manylinux/install_deps.sh && rm -rf /tmp/scripts
-
-ARG BUILD_UID=1001
-ARG BUILD_USER=onnxruntimedev
-RUN adduser --uid $BUILD_UID $BUILD_USER
-WORKDIR /home/$BUILD_USER
-USER $BUILD_USER
-ENV PATH /usr/local/dotnet:$PATH
-ENV CUDA_MODULE_LOADING "LAZY"
diff --git a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_rocm b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_rocm
index 19599c9f613d4..9e12fe8c75451 100644
--- a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_rocm
+++ b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_rocm
@@ -31,7 +31,7 @@ RUN yum install -y hipify-clang
 RUN yum -y install wget
 
 # rocm lib
-RUN yum install -y miopen-hip-devel rocblas-devel rocrand-devel rccl-devel hipsparse-devel hipfft-devel hipcub-devel hipblas-devel rocthrust-devel
+RUN yum install -y miopen-hip-devel rocblas-devel rocrand-devel rccl-devel hipsparse-devel hipfft-devel hipcub-devel hipblas-devel rocthrust-devel migraphx-devel
 
 ENV AUDITWHEEL_POLICY=${POLICY} AUDITWHEEL_ARCH=${PLATFORM} AUDITWHEEL_PLAT=${POLICY}_${PLATFORM}
 ENV LC_ALL=en_US.UTF-8 LANG=en_US.UTF-8 LANGUAGE=en_US.UTF-8
diff --git a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda11 b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_training_cuda12_2
similarity index 91%
rename from tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda11
rename to tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_training_cuda12_2
index 933b0211b0e6c..a36f60b87768d 100644
--- a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda11
+++ b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_training_cuda12_2
@@ -1,5 +1,5 @@
-ARG BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubi8
-ARG POLICY=manylinux_2_28
+ARG BASEIMAGE=nvidia/cuda:12.2.2-cudnn8-devel-ubi8
+ARG POLICY=manylinux2014
 ARG PLATFORM=x86_64
 ARG DEVTOOLSET_ROOTPATH=
 ARG LD_LIBRARY_PATH_ARG=
@@ -153,10 +153,23 @@ ENV SSL_CERT_FILE=/opt/_internal/certs.pem
 CMD ["/bin/bash"]
 
 #Build manylinux2014 docker image end
-ENV JAVA_HOME=/usr/lib/jvm/msopenjdk-11
+ARG PYTHON_VERSION=3.9
+ARG TORCH_VERSION=2.1.0
+ARG OPSET_VERSION=15
+ARG INSTALL_DEPS_EXTRA_ARGS
+
 #Add our own dependencies
 ADD scripts /tmp/scripts
-RUN cd /tmp/scripts && /tmp/scripts/manylinux/install_centos.sh && /tmp/scripts/manylinux/install_deps.sh && rm -rf /tmp/scripts
+RUN cd /tmp/scripts && \
+    /tmp/scripts/manylinux/install_centos.sh && \
+    /tmp/scripts/install_os_deps.sh -d gpu $INSTALL_DEPS_EXTRA_ARGS && \
+    /tmp/scripts/install_rust.sh
+
+ENV PATH="/root/.cargo/bin/:$PATH"
+
+RUN /tmp/scripts/install_ninja.sh && \
+    /tmp/scripts/install_python_deps.sh -d gpu -v 12.2 -p $PYTHON_VERSION -h $TORCH_VERSION $INSTALL_DEPS_EXTRA_ARGS && \
+    rm -rf /tmp/scripts
 
 ARG BUILD_UID=1001
 ARG BUILD_USER=onnxruntimedev
@@ -164,3 +177,4 @@ RUN adduser --uid $BUILD_UID $BUILD_USER
 WORKDIR /home/$BUILD_USER
 USER $BUILD_USER
 ENV PATH /usr/local/dotnet:$PATH
+ENV ORTMODULE_ONNX_OPSET_VERSION=$OPSET_VERSION
diff --git a/tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda11_8_tensorrt8_6 b/tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda11_8_tensorrt8_6
index cdf504c8e3b03..8ef8e05b8ac77 100644
--- a/tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda11_8_tensorrt8_6
+++ b/tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda11_8_tensorrt8_6
@@ -5,8 +5,10 @@
 # Dockerfile to Test ONNX Runtime on UBI8 with CUDA 11.8 and TensorRT 8.6
 
 # Build base image with required system packages
-FROM nvidia/cuda:11.8.0-cudnn8-devel-ubi8 AS base
-
+ARG BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubi8
+ARG TRT_VERSION=8.6.1.6-1.cuda11.8
+FROM $BASEIMAGE AS base
+ARG TRT_VERSION
 ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/src/tensorrt/bin:${PATH}
 
 RUN dnf install -y bash wget &&\
@@ -22,12 +24,11 @@ RUN dnf install -y \
     ln -s /usr/bin/pip3 pip3.8;
 
 RUN pip3 install --upgrade pip
-RUN pip3 install setuptools>=41.0.0
+RUN pip3 install setuptools>=68.2.2
 
 # Install TensorRT
 RUN dnf install -y libnvinfer8 libnvonnxparsers8 libnvparsers8 libnvinfer-plugin8 libnvinfer-lean8 libnvinfer-vc-plugin8 libnvinfer-dispatch8
-RUN v="8.6.1.6-1+cuda11.8" &&\
-    dnf downgrade -y libnvinfer8-${v} libnvinfer8-${v} libnvonnxparsers8-${v} libnvparsers8-${v} libnvinfer-plugin8-${v} libnvinfer-lean8-${v} libnvinfer-vc-plugin8-${v} libnvinfer-dispatch8-${v} &&\
+RUN dnf downgrade -y libnvinfer8-${TRT_VERSION} libnvinfer8-${TRT_VERSION} libnvonnxparsers8-${TRT_VERSION} libnvparsers8-${TRT_VERSION} libnvinfer-plugin8-${TRT_VERSION} libnvinfer-lean8-${TRT_VERSION} libnvinfer-vc-plugin8-${TRT_VERSION} libnvinfer-dispatch8-${TRT_VERSION} &&\
     dnf install -y dnf-plugin-versionlock &&\
     dnf versionlock libnvinfer8 libnvonnxparsers8 libnvparsers8 libnvinfer-plugin8 libnvinfer-lean8 libnvinfer-vc-plugin8 libnvinfer-dispatch8
 RUN dnf clean dbcache
diff --git a/tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_cuda11_8_tensorrt8_6 b/tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_2004_gpu
similarity index 50%
rename from tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_cuda11_8_tensorrt8_6
rename to tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_2004_gpu
index 83a974469234f..9b9dc9ecae822 100644
--- a/tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_cuda11_8_tensorrt8_6
+++ b/tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_2004_gpu
@@ -5,11 +5,16 @@
 # Dockerfile to run ONNXRuntime with TensorRT integration
 
 # Build base image with required system packages
-FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04 AS base
-
+ARG BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
+ARG TRT_VERSION=8.6.1.6-1+cuda11.8
+ARG LD_LIBRARY_PATH_ARG=/usr/local/lib64:/usr/local/cuda/lib64
+FROM $BASEIMAGE AS base
+ARG TRT_VERSION
 ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/src/tensorrt/bin:${PATH}
 ENV DEBIAN_FRONTEND=noninteractive
 
+ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH_ARG}:${LD_LIBRARY_PATH}
+
 RUN apt-get update &&\
     apt-get install -y git bash wget
 
@@ -24,12 +29,11 @@ RUN apt-get install -y --no-install-recommends \
 RUN pip install --upgrade pip
 
 # Install TensorRT
-RUN v="8.6.1.6-1+cuda11.8" &&\
-    apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub &&\
+RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub &&\
     apt-get update &&\
-    apt-get install -y libnvinfer8=${v} libnvonnxparsers8=${v} libnvparsers8=${v} libnvinfer-plugin8=${v} libnvinfer-lean8=${v} libnvinfer-vc-plugin8=${v} libnvinfer-dispatch8=${v}\
-        libnvinfer-headers-dev=${v} libnvinfer-headers-plugin-dev=${v} libnvinfer-dev=${v} libnvonnxparsers-dev=${v} libnvparsers-dev=${v} libnvinfer-plugin-dev=${v} libnvinfer-lean-dev=${v} libnvinfer-vc-plugin-dev=${v}  libnvinfer-dispatch-dev=${v}\
-        python3-libnvinfer=${v} libnvinfer-samples=${v} tensorrt-dev=${v} tensorrt-libs=${v}
+    apt-get install -y libnvinfer8=${TRT_VERSION} libnvonnxparsers8=${TRT_VERSION} libnvparsers8=${TRT_VERSION} libnvinfer-plugin8=${TRT_VERSION} libnvinfer-lean8=${TRT_VERSION} libnvinfer-vc-plugin8=${TRT_VERSION} libnvinfer-dispatch8=${TRT_VERSION}\
+        libnvinfer-headers-dev=${TRT_VERSION} libnvinfer-headers-plugin-dev=${TRT_VERSION} libnvinfer-dev=${TRT_VERSION} libnvonnxparsers-dev=${TRT_VERSION} libnvparsers-dev=${TRT_VERSION} libnvinfer-plugin-dev=${TRT_VERSION} libnvinfer-lean-dev=${TRT_VERSION} libnvinfer-vc-plugin-dev=${TRT_VERSION}  libnvinfer-dispatch-dev=${TRT_VERSION}\
+        python3-libnvinfer=${TRT_VERSION} libnvinfer-samples=${TRT_VERSION} tensorrt-dev=${TRT_VERSION} tensorrt-libs=${TRT_VERSION}
 
 ADD scripts /tmp/scripts
 RUN cd /tmp/scripts && /tmp/scripts/install_dotnet.sh && rm -rf /tmp/scripts
diff --git a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_cuda11_6_tensorrt8_4 b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_cuda11_6_tensorrt8_4
index 10f404c7c6a85..8b32425afce1c 100644
--- a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_cuda11_6_tensorrt8_4
+++ b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_cuda11_6_tensorrt8_4
@@ -31,7 +31,7 @@ RUN apt-get install -y --no-install-recommends \
     ln -s /usr/bin/pip3 pip;
 
 RUN pip install --upgrade pip 
-RUN pip install setuptools>=41.0.0
+RUN pip install setuptools>=68.2.2
 
 # Install TensorRT
 RUN v="8.4.1-1+cuda11.6" &&\
diff --git a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_cuda11_8_tensorrt8_5 b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_cuda11_8_tensorrt8_5
index cacc09f0c7455..cfc7023ef8e61 100644
--- a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_cuda11_8_tensorrt8_5
+++ b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_cuda11_8_tensorrt8_5
@@ -28,7 +28,7 @@ RUN apt-get install -y --no-install-recommends \
     ln -s /usr/bin/pip3 pip;
 
 RUN pip install --upgrade pip 
-RUN pip install setuptools>=41.0.0
+RUN pip install setuptools>=68.2.2
 
 # Install TensorRT
 RUN v="8.5.1-1+cuda11.8" &&\
diff --git a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_cuda11_8_tensorrt8_6 b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_cuda11_8_tensorrt8_6
index 0a4885e774047..edc41197be5c9 100644
--- a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_cuda11_8_tensorrt8_6
+++ b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_cuda11_8_tensorrt8_6
@@ -28,7 +28,7 @@ RUN apt-get install -y --no-install-recommends \
     ln -s /usr/bin/pip3 pip;
 
 RUN pip install --upgrade pip 
-RUN pip install setuptools>=41.0.0
+RUN pip install setuptools>=68.2.2
 
 # Install TensorRT
 RUN v="8.6.1.6-1+cuda11.8" &&\
diff --git a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_tensorrt_bin b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_tensorrt_bin
index c9308ade37396..21b09b2d8978e 100644
--- a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_tensorrt_bin
+++ b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_tensorrt_bin
@@ -42,7 +42,7 @@ RUN apt-get install -y --no-install-recommends \
     ln -s /usr/bin/pip3 pip;
 
 RUN pip install --upgrade pip 
-RUN pip install setuptools>=41.0.0
+RUN pip install setuptools>=68.2.2
 
 # Install TensorRT from tar.gz
 RUN tar -xzvf /TensorRT-${TAR_TRT_VERSION}.tar.gz
diff --git a/tools/ci_build/github/linux/docker/inference/x64/default/gpu/Dockerfile b/tools/ci_build/github/linux/docker/inference/x64/default/gpu/Dockerfile
index 318791072f46d..b1ff40e8effef 100644
--- a/tools/ci_build/github/linux/docker/inference/x64/default/gpu/Dockerfile
+++ b/tools/ci_build/github/linux/docker/inference/x64/default/gpu/Dockerfile
@@ -2,8 +2,8 @@
 # Licensed under the MIT License.
 
 # This file is used by Zip-Nuget Packaging NoContribOps Pipeline,Zip-Nuget-Java Packaging Pipeline
-FROM nvidia/cuda:11.8.0-cudnn8-devel-ubi8
-
+ARG BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubi8
+FROM $BASEIMAGE
 ENV PATH /usr/lib/jvm/msopenjdk-11/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
 ENV LANG=en_US.UTF-8
 ENV LC_ALL=en_US.UTF-8
diff --git a/tools/ci_build/github/linux/docker/inference/x64/python/cpu/scripts/requirements.txt b/tools/ci_build/github/linux/docker/inference/x64/python/cpu/scripts/requirements.txt
index 8a9c4dac1dd58..aa0ad05b42dbf 100644
--- a/tools/ci_build/github/linux/docker/inference/x64/python/cpu/scripts/requirements.txt
+++ b/tools/ci_build/github/linux/docker/inference/x64/python/cpu/scripts/requirements.txt
@@ -2,9 +2,9 @@ numpy==1.21.6 ; python_version < '3.11'
 numpy==1.24.2 ; python_version >= '3.11'
 mypy
 pytest
-setuptools>=41.4.0
+setuptools>=68.2.2
 wheel
-git+http://github.com/onnx/onnx.git@e2525550194ce3d8a2c4a3af451c9d9b3ae6650e#egg=onnx
-protobuf==3.20.2
+onnx==1.15.0
+protobuf==4.21.12
 sympy==1.12
 flatbuffers
diff --git a/tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile b/tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile
index 7d2c818d08920..7fa606b6c294c 100644
--- a/tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile
+++ b/tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile
@@ -1,7 +1,7 @@
 # Refer to https://github.com/RadeonOpenCompute/ROCm-docker/blob/master/dev/Dockerfile-ubuntu-22.04-complete
 FROM ubuntu:22.04
 
-ARG ROCM_VERSION=5.6
+ARG ROCM_VERSION=5.7
 ARG AMDGPU_VERSION=${ROCM_VERSION}
 ARG APT_PREF='Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600'
 
@@ -66,17 +66,20 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86
     rm ~/miniconda.sh && conda clean -ya
 
 # Conda base patch
-RUN pip install cryptography==41.0.0
+RUN pip install cryptography==41.0.4
 
 # Create migraphx-ci environment
 ENV CONDA_ENVIRONMENT_PATH /opt/miniconda/envs/migraphx-ci
 ENV CONDA_DEFAULT_ENV migraphx-ci
-RUN conda create -y -n ${CONDA_DEFAULT_ENV} python=3.8
+RUN conda create -y -n ${CONDA_DEFAULT_ENV} python=3.9
 ENV PATH ${CONDA_ENVIRONMENT_PATH}/bin:${PATH}
 
 # Enable migraphx-ci environment
 SHELL ["conda", "run", "-n", "migraphx-ci", "/bin/bash", "-c"]
 
+# ln -sf is needed to make sure that version `GLIBCXX_3.4.30' is found
+RUN ln -sf /usr/lib/x86_64-linux-gnu/libstdc++.so.6 ${CONDA_ENVIRONMENT_PATH}/bin/../lib/libstdc++.so.6
+
 # Install migraphx
 RUN apt update && apt install -y migraphx
 
diff --git a/tools/ci_build/github/linux/docker/scripts/manylinux/install_deps_lort.sh b/tools/ci_build/github/linux/docker/scripts/manylinux/install_deps_lort.sh
index 3bca6413100a2..da8a45e00cc90 100755
--- a/tools/ci_build/github/linux/docker/scripts/manylinux/install_deps_lort.sh
+++ b/tools/ci_build/github/linux/docker/scripts/manylinux/install_deps_lort.sh
@@ -19,7 +19,9 @@ fi
 export ONNX_ML=1
 export CMAKE_ARGS="-DONNX_GEN_PB_TYPE_STUBS=OFF -DONNX_WERROR=OFF"
 
+# This may install PyTorch, which will be overrided by the PyTorch local build below.
 /opt/python/cp39-cp39/bin/python3.9 -m pip install transformers
+
 # beartype is installed here so that onnxscript installation step won't
 # install a version PyTorch doesn't like. Once beartype fixes this problem.
 # We can remove this line.
diff --git a/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt b/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt
index 6b8003c01c24d..d6912bfb05efe 100644
--- a/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt
+++ b/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt
@@ -2,10 +2,10 @@ numpy==1.21.6 ; python_version < '3.11'
 numpy==1.24.2 ; python_version >= '3.11'
 mypy
 pytest
-setuptools>=41.4.0
+setuptools>=68.2.2
 wheel
-git+http://github.com/onnx/onnx.git@e2525550194ce3d8a2c4a3af451c9d9b3ae6650e#egg=onnx
-protobuf==3.20.2
+onnx==1.15.0
+protobuf==4.21.12
 sympy==1.12
 flatbuffers
 neural-compressor>=2.2.1
diff --git a/tools/ci_build/github/linux/docker/scripts/requirements.txt b/tools/ci_build/github/linux/docker/scripts/requirements.txt
index 9dbe856753faa..a6452721a2b7d 100644
--- a/tools/ci_build/github/linux/docker/scripts/requirements.txt
+++ b/tools/ci_build/github/linux/docker/scripts/requirements.txt
@@ -3,11 +3,11 @@ numpy==1.21.6 ; python_version < '3.11'
 numpy==1.24.2 ; python_version >= '3.11'
 mypy
 pytest
-setuptools>=41.4.0
+setuptools>=68.2.2
 wheel>=0.35.1
-git+http://github.com/onnx/onnx.git@e2525550194ce3d8a2c4a3af451c9d9b3ae6650e#egg=onnx
+onnx==1.15.0
 argparse
 sympy==1.12
 flatbuffers
-protobuf==3.20.2
+protobuf==4.21.12
 packaging
diff --git a/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements_torch2.0.0_cu11.8/requirements.txt b/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements_torch2.0.0_cu11.8/requirements.txt
index fa28a810370f7..b3b2651c8d26d 100644
--- a/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements_torch2.0.0_cu11.8/requirements.txt
+++ b/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements_torch2.0.0_cu11.8/requirements.txt
@@ -5,4 +5,4 @@ torchvision==0.15.1+cu118
 torchtext==0.15.1
 # TODO(bmeswani): packaging 22.0 removes support for LegacyVersion leading to errors because transformers 4.4.2 uses LegacyVersion
 packaging==21.3
-setuptools>=41.4.0
+setuptools>=68.2.2
diff --git a/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements_torch2.1.0_cu12.2/requirements.txt b/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements_torch2.1.0_cu12.2/requirements.txt
new file mode 100644
index 0000000000000..152a17db90366
--- /dev/null
+++ b/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements_torch2.1.0_cu12.2/requirements.txt
@@ -0,0 +1,7 @@
+--pre
+-f https://download.pytorch.org/whl/torch_stable.html
+torch==2.1.0+cu121
+torchvision==0.16.0+cu121
+torchtext==0.16.0
+packaging==23.1
+setuptools>=68.2.2
diff --git a/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements_torch_cpu/requirements.txt b/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements_torch_cpu/requirements.txt
index 94b16d7ff4894..95d02b8400339 100644
--- a/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements_torch_cpu/requirements.txt
+++ b/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements_torch_cpu/requirements.txt
@@ -1,3 +1,3 @@
 -f https://download.pytorch.org/whl/torch_stable.html
 torch==2.0.0+cpu
-setuptools>=41.4.0
+setuptools>=68.2.2
diff --git a/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/torch_eager_cpu/requirements.txt b/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/torch_eager_cpu/requirements.txt
index b071770c629e2..08e251eddbf96 100644
--- a/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/torch_eager_cpu/requirements.txt
+++ b/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/torch_eager_cpu/requirements.txt
@@ -1,7 +1,7 @@
 --pre
 -f https://download.pytorch.org/whl/torch_stable.html
 torch==1.13.1+cpu
-setuptools>=41.4.0
+setuptools>=68.2.2
 cerberus
 h5py
 scikit-learn
diff --git a/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage2/requirements.txt b/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage2/requirements.txt
index 202d43befcca4..4cda4c17d0091 100644
--- a/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage2/requirements.txt
+++ b/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage2/requirements.txt
@@ -2,9 +2,9 @@ pandas
 scikit-learn
 numpy==1.21.6 ; python_version < '3.11'
 numpy==1.24.2 ; python_version >= '3.11'
-transformers==v4.4.2
+transformers==v4.16.1
 rsa==4.9
-tensorboard>=2.2.0,<2.5.0
+tensorboard==2.13.0
 h5py
 wget
 pytorch-lightning
diff --git a/tools/ci_build/github/linux/extract_and_bundle_gpu_package.sh b/tools/ci_build/github/linux/extract_and_bundle_gpu_package.sh
index 9492b7bcf59a6..04ac0e35a6d78 100755
--- a/tools/ci_build/github/linux/extract_and_bundle_gpu_package.sh
+++ b/tools/ci_build/github/linux/extract_and_bundle_gpu_package.sh
@@ -28,4 +28,3 @@ rm $ARTIFACT_DIR/onnxruntime-linux-x64-cuda-*.tgz
 cp onnxruntime-linux-x64-tensorrt/*/lib/libonnxruntime.so* onnxruntime-linux-x64-gpu/*/lib
 cp onnxruntime-linux-x64-tensorrt/*/lib/libonnxruntime_providers_tensorrt.so onnxruntime-linux-x64-gpu/*/lib
 cp onnxruntime-linux-x64-tensorrt/*/lib/libonnxruntime_providers_shared.so onnxruntime-linux-x64-gpu/*/lib
-cp onnxruntime-linux-x64-tensorrt/*/include/*tensorrt* onnxruntime-linux-x64-gpu/*/include
diff --git a/tools/ci_build/github/linux/ort_minimal/readelf_utils.py b/tools/ci_build/github/linux/ort_minimal/readelf_utils.py
index 43bc107df401b..dec070e3f5c75 100644
--- a/tools/ci_build/github/linux/ort_minimal/readelf_utils.py
+++ b/tools/ci_build/github/linux/ort_minimal/readelf_utils.py
@@ -23,7 +23,7 @@ def get_section_sizes(binary_path, readelf_path, dump_to_file=None):
     """
 
     cmd = [readelf_path, "--sections", "--wide", binary_path]
-    result = subprocess.run(cmd, stdout=subprocess.PIPE)
+    result = subprocess.run(cmd, stdout=subprocess.PIPE)  # noqa: PLW1510
     result.check_returncode()
     output = result.stdout.decode("utf-8")
 
diff --git a/tools/ci_build/github/linux/run_python_dockerbuild.sh b/tools/ci_build/github/linux/run_python_dockerbuild.sh
index 18ac6482827f9..ff2ce6f7ff231 100755
--- a/tools/ci_build/github/linux/run_python_dockerbuild.sh
+++ b/tools/ci_build/github/linux/run_python_dockerbuild.sh
@@ -9,24 +9,32 @@ i) DOCKER_IMAGE=${OPTARG};;
 d) DEVICE=${OPTARG};;
 x) BUILD_EXTR_PAR=${OPTARG};;
 c) BUILD_CONFIG=${OPTARG};;
+*) echo "Usage: $0 -i <docker_image> -d <GPU|CPU> [-x <extra_build_arg>] [-c <build_config>]"
+   exit 1;;
 esac
 done
 
-mkdir -p $HOME/.onnx
+mkdir -p "${HOME}/.onnx"
+DOCKER_SCRIPT_OPTIONS="-d ${DEVICE} -c ${BUILD_CONFIG}"
+
+if [ "${BUILD_EXTR_PAR}" != "" ] ; then
+    DOCKER_SCRIPT_OPTIONS+=" -x ${BUILD_EXTR_PAR}"
+fi
+
 docker run --rm \
     --volume /data/onnx:/data/onnx:ro \
-    --volume $BUILD_SOURCESDIRECTORY:/onnxruntime_src \
-    --volume $BUILD_BINARIESDIRECTORY:/build \
+    --volume "${BUILD_SOURCESDIRECTORY}:/onnxruntime_src" \
+    --volume "${BUILD_BINARIESDIRECTORY}:/build" \
     --volume /data/models:/build/models:ro \
-    --volume $HOME/.onnx:/home/onnxruntimedev/.onnx \
+    --volume "${HOME}/.onnx:/home/onnxruntimedev/.onnx" \
     -w /onnxruntime_src \
     -e NIGHTLY_BUILD \
     -e BUILD_BUILDNUMBER \
     $ADDITIONAL_DOCKER_PARAMETER \
-    $DOCKER_IMAGE tools/ci_build/github/linux/build_linux_arm64_python_package.sh -d $DEVICE -c $BUILD_CONFIG -x $BUILD_EXTR_PAR
+    $DOCKER_IMAGE tools/ci_build/github/linux/build_linux_python_package.sh $DOCKER_SCRIPT_OPTIONS
 
-sudo rm -rf $BUILD_BINARIESDIRECTORY/$BUILD_CONFIG/onnxruntime $BUILD_BINARIESDIRECTORY/$BUILD_CONFIG/pybind11 \
-    $BUILD_BINARIESDIRECTORY/$BUILD_CONFIG/models $BUILD_BINARIESDIRECTORY/$BUILD_CONFIG/_deps \
-    $BUILD_BINARIESDIRECTORY/$BUILD_CONFIG/CMakeFiles
-cd $BUILD_BINARIESDIRECTORY/$BUILD_CONFIG
-find -executable -type f > $BUILD_BINARIESDIRECTORY/$BUILD_CONFIG/perms.txt
+sudo rm -rf "${BUILD_BINARIESDIRECTORY}/${BUILD_CONFIG}/onnxruntime" "${BUILD_BINARIESDIRECTORY}/${BUILD_CONFIG}/pybind11" \
+    "${BUILD_BINARIESDIRECTORY}/${BUILD_CONFIG}/models" "${BUILD_BINARIESDIRECTORY}/${BUILD_CONFIG}/_deps" \
+    "${BUILD_BINARIESDIRECTORY}/${BUILD_CONFIG}/CMakeFiles"
+cd "${BUILD_BINARIESDIRECTORY}/${BUILD_CONFIG}"
+find -executable -type f > "${BUILD_BINARIESDIRECTORY}/${BUILD_CONFIG}/perms.txt"
diff --git a/tools/ci_build/github/pai/migraphx-excluded-tests.txt b/tools/ci_build/github/pai/migraphx-excluded-tests.txt
deleted file mode 100644
index 5e38f982d7de3..0000000000000
--- a/tools/ci_build/github/pai/migraphx-excluded-tests.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-GatherOpTest.Gather_invalid_index_cpu
-Scatter.InvalidIndex
diff --git a/tools/ci_build/github/pai/migraphx_test_launcher.sh b/tools/ci_build/github/pai/migraphx_test_launcher.sh
deleted file mode 100755
index c2d593b6417c2..0000000000000
--- a/tools/ci_build/github/pai/migraphx_test_launcher.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/bash
-
-build_dir=${1:-"."}
-script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-
-echo "Warning: The following tests are EXCLUDED on MIGraphX agent:"
-gtest_filter="-"
-while read line; do
-  gtest_filter="$gtest_filter:$line"
-  echo "$line"
-done <$script_dir/migraphx-excluded-tests.txt
-echo ""
-
-echo "Running ./onnxruntime_test_all .."
-$build_dir/onnxruntime_test_all --gtest_filter=$gtest_filter
diff --git a/tools/ci_build/github/pai/pai-excluded-tests.txt b/tools/ci_build/github/pai/pai-excluded-tests.txt
index b446dac2034de..845d36b71d215 100644
--- a/tools/ci_build/github/pai/pai-excluded-tests.txt
+++ b/tools/ci_build/github/pai/pai-excluded-tests.txt
@@ -1,6 +1,3 @@
-CudaKernelTest.NegativeLogLikelihoodLoss_TinySizeTensor
-CudaKernelTest.NegativeLogLikelihoodLoss_SmallSizeTensor
-CudaKernelTest.NegativeLogLikelihoodLoss_MediumSizeTensor
 CudaKernelTest.SoftmaxGrad_LargeTensor_LastAxis_Float16
 CudaKernelTest.SoftmaxGrad_LargeTensor_LastAxis_Float16_NoPowerOfTwo
 CudaKernelTest.SoftmaxGrad_LargeTensor_AllAxis_Float16
@@ -10,26 +7,6 @@ CudaKernelTest.LogSoftmaxGrad_LargeTensor_LastAxis_Float16_NoPowerOfTwo
 CudaKernelTest.LogSoftmaxGrad_LargeTensor_AllAxis_Float16
 CudaKernelTest.LogSoftmaxGrad_LargeTensor_AllAxis_Float16_NoPowerOfTwo
 ReductionOpTest.ReductionVariationTest
-ReductionOpTest.ReduceLogSumExp_default_axes_keepdims_double
-ReductionOpTest.ReduceLogSumExp_default_axes_do_not_keep_dims_double
-ReductionOpTest.ReduceLogSumExp_do_not_keepdims_double
-ReductionOpTest.ReduceLogSumExp_do_not_keepdims_2_double
-ReductionOpTest.ReduceLogSumExp_keepdims_double
-ReductionOpTest.ReduceLogSumExp_double
-ReductionOpTest.ReduceMax_double
-ReductionOpTest.ReduceMean_default_axes_keepdims_double
-ReductionOpTest.ReduceMean_default_axes_do_not_keep_dims_double
-ReductionOpTest.ReduceMean_do_not_keepdims_double
-ReductionOpTest.ReduceMean_do_not_keepdims_2_double
-ReductionOpTest.ReduceMean_keepdims_double
-ReductionOpTest.ReduceMean_double
-ReductionOpTest.ReduceMean0DTensor_double
-ReductionOpTest.ReduceMin_double
-ReductionOpTest.ReduceSum_double
-ReductionOpTest.ReduceSumSquare_double
-ReductionOpTest.ReduceInfMax_double
-ReductionOpTest.ReduceInfMin_double
-ReductionOpTest.ReduceInfLogSumExp_double
 GatherOpTest.Gather_invalid_index_cpu
 Scatter.InvalidIndex
 GradientCheckerTest.AddGrad
diff --git a/tools/ci_build/github/pai/rocm-ci-pipeline-env.Dockerfile b/tools/ci_build/github/pai/rocm-ci-pipeline-env.Dockerfile
index 89a7fe09c527f..2ec826fc8fd8c 100644
--- a/tools/ci_build/github/pai/rocm-ci-pipeline-env.Dockerfile
+++ b/tools/ci_build/github/pai/rocm-ci-pipeline-env.Dockerfile
@@ -1,7 +1,7 @@
 # Refer to https://github.com/RadeonOpenCompute/ROCm-docker/blob/master/dev/Dockerfile-ubuntu-22.04-complete
 FROM ubuntu:22.04
 
-ARG ROCM_VERSION=5.6
+ARG ROCM_VERSION=5.7
 ARG AMDGPU_VERSION=${ROCM_VERSION}
 ARG APT_PREF='Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600'
 
@@ -64,11 +64,11 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86
 # Create rocm-ci environment
 ENV CONDA_ENVIRONMENT_PATH /opt/miniconda/envs/rocm-ci
 ENV CONDA_DEFAULT_ENV rocm-ci
-RUN conda create -y -n ${CONDA_DEFAULT_ENV} python=3.8
+RUN conda create -y -n ${CONDA_DEFAULT_ENV} python=3.9
 ENV PATH ${CONDA_ENVIRONMENT_PATH}/bin:${PATH}
 
 # Conda base patch
-RUN pip install cryptography==41.0.0
+RUN pip install cryptography==41.0.4
 
 # Enable rocm-ci environment
 SHELL ["conda", "run", "-n", "rocm-ci", "/bin/bash", "-c"]
@@ -77,10 +77,9 @@ SHELL ["conda", "run", "-n", "rocm-ci", "/bin/bash", "-c"]
 RUN ln -sf /usr/lib/x86_64-linux-gnu/libstdc++.so.6 ${CONDA_ENVIRONMENT_PATH}/bin/../lib/libstdc++.so.6
 
 # Install Pytorch
-RUN pip install install torch==2.0.1 torchvision==0.15.2 -f https://repo.radeon.com/rocm/manylinux/rocm-rel-${ROCM_VERSION}/ && \
+RUN pip install torch==2.0.1 torchvision==0.15.2 -f https://repo.radeon.com/rocm/manylinux/rocm-rel-${ROCM_VERSION}/ && \
     pip install torch-ort --no-dependencies
 
-
 ##### Install Cupy to decrease CPU utilization
 # Install non dev openmpi
 RUN wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.5.tar.bz2 && \
@@ -130,6 +129,9 @@ RUN pip install \
     pytest-xdist \
     pytest-rerunfailures
 
+# Install migraphx
+RUN apt update && apt install -y migraphx
+
 ENV ORTMODULE_ONNX_OPSET_VERSION=15
 
 ARG BUILD_UID=1001
diff --git a/tools/ci_build/github/windows/bundle_nuget_with_native_headers.bat b/tools/ci_build/github/windows/bundle_nuget_with_native_headers.bat
index 01d2633ae2104..bc4a799b2bf40 100644
--- a/tools/ci_build/github/windows/bundle_nuget_with_native_headers.bat
+++ b/tools/ci_build/github/windows/bundle_nuget_with_native_headers.bat
@@ -7,7 +7,6 @@ FOR /R %%i IN (*.nupkg) do (
    set filename=%%~ni
    IF NOT "!filename:~25,7!"=="Managed" (
        mkdir build\native\include
-       copy %BUILD_SOURCESDIRECTORY%\include\onnxruntime\core\providers\tensorrt\tensorrt_provider_factory.h build\native\include\tensorrt_provider_factory.h 
        7z a  %%~ni.nupkg build 
    )
 ) 
diff --git a/tools/ci_build/github/windows/helpers.ps1 b/tools/ci_build/github/windows/helpers.ps1
index 6e81f901a8288..20df10b244408 100644
--- a/tools/ci_build/github/windows/helpers.ps1
+++ b/tools/ci_build/github/windows/helpers.ps1
@@ -444,7 +444,7 @@ function Install-Abseil {
     .Description
     The Install-UTF8-Range function installs Google's utf8_range library from source.
     utf8_range depends on Abseil.
-	
+
     .PARAMETER cmake_path
     The full path of cmake.exe
 
@@ -604,14 +604,17 @@ function Install-ONNX {
     pushd .
 
     Write-Host "Uninstalling onnx and ignore errors if there is any..."
-    python.exe -m pip uninstall -y onnx -qq
+    [string[]]$pip_args ="-m", "pip", "uninstall", "-y", "onnx", "-qq"
+    &"python.exe" $pip_args
+    if ($lastExitCode -ne 0) {
+      exit $lastExitCode
+    }
     
     Write-Host "Installing python packages..."
-    $p = Start-Process -NoNewWindow -Wait -PassThru -FilePath "python.exe" -ArgumentList "-m", "pip", "install", "--disable-pip-version-check", "setuptools", "wheel", "numpy", "protobuf==$protobuf_version"
-    $exitCode = $p.ExitCode
-    if ($exitCode -ne 0) {
-        Write-Host -Object "Install dependent python wheels failed. Exitcode: $exitCode"
-        exit $exitCode
+    [string[]]$pip_args = "-m", "pip", "install", "-qq", "--disable-pip-version-check", "setuptools>=68.2.2", "wheel", "numpy", "protobuf==$protobuf_version"
+    &"python.exe" $pip_args
+    if ($lastExitCode -ne 0) {
+      exit $lastExitCode
     }
 
     $url=Get-DownloadURL -name onnx -src_root $src_root
diff --git a/tools/ci_build/github/windows/setup_env_azure.bat b/tools/ci_build/github/windows/setup_env_azure.bat
deleted file mode 100644
index 44ba34b0bf23a..0000000000000
--- a/tools/ci_build/github/windows/setup_env_azure.bat
+++ /dev/null
@@ -1,4 +0,0 @@
-REM Copyright (c) Microsoft Corporation. All rights reserved.
-REM Licensed under the MIT License.
-set PATH=%cd%\RelWithDebInfo\_deps\vcpkg-src\installed\x64-windows\bin;%cd%\RelWithDebInfo\_deps\vcpkg-src\installed\x86-windows\bin;%PATH%
-set GRADLE_OPTS=-Dorg.gradle.daemon=false
diff --git a/tools/ci_build/github/windows/setup_env_cuda.bat b/tools/ci_build/github/windows/setup_env_cuda.bat
new file mode 100644
index 0000000000000..2233f7611ab6a
--- /dev/null
+++ b/tools/ci_build/github/windows/setup_env_cuda.bat
@@ -0,0 +1,17 @@
+REM Copyright (c) Microsoft Corporation. All rights reserved.
+REM Licensed under the MIT License.
+
+if exist PATH=%AGENT_TEMPDIRECTORY%\v11.8\ (
+set PATH=%AGENT_TEMPDIRECTORY%\v11.8\bin;%AGENT_TEMPDIRECTORY%\v11.8\extras\CUPTI\lib64;%PATH%
+) else (
+    set PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\extras\CUPTI\lib64;%PATH%
+)
+
+@REM The default version is still cuda v11.8, because set cuda v12.2 after it
+if exist PATH=%AGENT_TEMPDIRECTORY%\v12.2\ (
+    set PATH=%PATH%;%AGENT_TEMPDIRECTORY%\v12.2\bin;%AGENT_TEMPDIRECTORY%\v12.2\extras\CUPTI\lib64
+) else (
+    set PATH=%PATH%;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\extras\CUPTI\lib64
+)
+
+set GRADLE_OPTS=-Dorg.gradle.daemon=false
diff --git a/tools/ci_build/github/windows/setup_env_cuda_11.bat b/tools/ci_build/github/windows/setup_env_cuda_11.bat
deleted file mode 100644
index 1308e43a4f6db..0000000000000
--- a/tools/ci_build/github/windows/setup_env_cuda_11.bat
+++ /dev/null
@@ -1,9 +0,0 @@
-REM Copyright (c) Microsoft Corporation. All rights reserved.
-REM Licensed under the MIT License.
-
-if exist PATH=%AGENT_TEMPDIRECTORY%\v11.8\ {
-    set PATH=%AGENT_TEMPDIRECTORY%\v11.8\bin;%AGENT_TEMPDIRECTORY%\v11.8\extras\CUPTI\lib64;%PATH%
-} else {
-    set PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\extras\CUPTI\lib64;%PATH%
-}
-set GRADLE_OPTS=-Dorg.gradle.daemon=false
diff --git a/tools/ci_build/github/windows/setup_env_gpu.bat b/tools/ci_build/github/windows/setup_env_gpu.bat
index 4328c6eba1fe1..49b536e6ab81e 100644
--- a/tools/ci_build/github/windows/setup_env_gpu.bat
+++ b/tools/ci_build/github/windows/setup_env_gpu.bat
@@ -1,11 +1,21 @@
 REM Copyright (c) Microsoft Corporation. All rights reserved.
 REM Licensed under the MIT License.
 
-if exist PATH=%AGENT_TEMPDIRECTORY%\v11.8\ {
+if exist PATH=%AGENT_TEMPDIRECTORY%\v11.8\ (
     set PATH=%AGENT_TEMPDIRECTORY%\v11.8\bin;%AGENT_TEMPDIRECTORY%\v11.8\extras\CUPTI\lib64;%PATH%
-} else {
+) else (
     set PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\extras\CUPTI\lib64;%PATH%
-}
-set PATH=C:\local\TensorRT-8.6.1.6.Windows10.x86_64.cuda-11.8\lib;C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Current\Bin;%PATH%
+)
+set PATH=C:\local\TensorRT-8.6.1.6.Windows10.x86_64.cuda-11.8\lib;%PATH%
+
+@REM The default version is still cuda v11.8, because set cuda v12.2 after it
+set PATH=%PATH%;C:\local\TensorRT-8.6.1.6.Windows10.x86_64.cuda-12.0\lib
+if exist PATH=%AGENT_TEMPDIRECTORY%\v12.2\ (
+    set PATH=%PATH%;%AGENT_TEMPDIRECTORY%\v12.2\bin;%AGENT_TEMPDIRECTORY%\v12.2\extras\CUPTI\lib64
+) else (
+    set PATH=%PATH%;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\\extras\CUPTI\lib64
+)
+
+
 set GRADLE_OPTS=-Dorg.gradle.daemon=false
 set CUDA_MODULE_LOADING=LAZY
diff --git a/tools/ci_build/replace_urls_in_deps.py b/tools/ci_build/replace_urls_in_deps.py
index 28e3c91107c6c..ac4f515d5482b 100644
--- a/tools/ci_build/replace_urls_in_deps.py
+++ b/tools/ci_build/replace_urls_in_deps.py
@@ -8,6 +8,7 @@
 import argparse
 import csv
 import os
+import shutil
 from dataclasses import dataclass
 from pathlib import Path
 
@@ -46,7 +47,16 @@ def main():
     deps = []
 
     csv_file_path = Path(REPO_DIR) / "cmake" / "deps.txt"
+    backup_csv_file_path = Path(REPO_DIR) / "cmake" / "deps.txt.bak"
+    # prefer to use the backup file
+    if backup_csv_file_path.exists():
+        csv_file_path = backup_csv_file_path
+    else:
+        # Make a copy before modifying it
+        print("Making a copy to %s" % str(backup_csv_file_path))
+        shutil.copy(csv_file_path, backup_csv_file_path)
 
+    print("Reading from %s" % str(csv_file_path))
     # Read the whole file into memory first
     with csv_file_path.open("r", encoding="utf-8") as f:
         depfile_reader = csv.reader(f, delimiter=";")
@@ -58,6 +68,8 @@ def main():
                 continue
             deps.append(Dep(row[0], row[1], row[2]))
 
+    csv_file_path = Path(REPO_DIR) / "cmake" / "deps.txt"
+    print("Writing to %s" % str(csv_file_path))
     # Write updated content back
     with csv_file_path.open("w", newline="", encoding="utf-8") as f:
         depfile_writer = csv.writer(f, delimiter=";")
diff --git a/tools/ci_build/upload_python_package_to_azure_storage.py b/tools/ci_build/upload_python_package_to_azure_storage.py
index 365cb67381ce7..b7969f02e518e 100755
--- a/tools/ci_build/upload_python_package_to_azure_storage.py
+++ b/tools/ci_build/upload_python_package_to_azure_storage.py
@@ -55,7 +55,7 @@ def upload_whl(python_wheel_path, final_storage=False):
     with open(download_path_to_html) as f:
         lines = f.read().splitlines()
 
-    new_line = '<a href="{blobname}">{blobname}</a><br>'.format(blobname=blob_name_plus_replaced)
+    new_line = f'<a href="{blob_name_plus_replaced}">{blob_name_plus_replaced}</a><br>'
     if new_line not in lines:
         lines.append(new_line)
         lines.sort()
diff --git a/tools/nuget/generate_nuspec_for_native_nuget.py b/tools/nuget/generate_nuspec_for_native_nuget.py
index 2aefe794db2f5..df74e7e5599a8 100644
--- a/tools/nuget/generate_nuspec_for_native_nuget.py
+++ b/tools/nuget/generate_nuspec_for_native_nuget.py
@@ -437,14 +437,7 @@ def generate_files(line_list, args):
         )
 
     if args.execution_provider == "tensorrt":
-        files_list.append(
-            "<file src="
-            + '"'
-            + os.path.join(
-                args.sources_path, "include\\onnxruntime\\core\\providers\\tensorrt\\tensorrt_provider_factory.h"
-            )
-            + '" target="build\\native\\include" />'
-        )
+        files_list.append("<file src=" + '"' + '" target="build\\native\\include" />')
 
     if args.execution_provider == "dnnl":
         files_list.append(
@@ -559,11 +552,12 @@ def generate_files(line_list, args):
                 files_list.append(
                     "<file src=" + '"' + os.path.join(args.native_build_path, "onnxruntime.pdb") + runtimes + " />"
                 )
+
     else:
         files_list.append(
             "<file src="
             + '"'
-            + os.path.join(args.native_build_path, "nuget-staging/usr/local/lib", "libonnxruntime.so")
+            + os.path.join(args.native_build_path, "libonnxruntime.so")
             + '" target="runtimes\\linux-'
             + args.target_architecture
             + '\\native" />'
@@ -713,25 +707,9 @@ def generate_files(line_list, args):
         )
 
         if is_windows():
-            if "2022" in openvino_path:
-                dll_list_path = os.path.join(openvino_path, "runtime\\bin\\intel64\\Release\\")
-                tbb_list_path = os.path.join(openvino_path, "runtime\\3rdparty\\tbb\\bin\\")
-            else:
-                dll_list_path = os.path.join(
-                    openvino_path, "deployment_tools\\inference_engine\\bin\\intel64\\Release\\"
-                )
-                tbb_list_path = os.path.join(openvino_path, "deployment_tools\\inference_engine\\external\\tbb\\bin\\")
-                ngraph_list_path = os.path.join(openvino_path, "deployment_tools\\ngraph\\lib\\")
-                for ngraph_element in os.listdir(ngraph_list_path):
-                    if ngraph_element.endswith("dll"):
-                        files_list.append(
-                            "<file src="
-                            + '"'
-                            + os.path.join(ngraph_list_path, ngraph_element)
-                            + runtimes_target
-                            + args.target_architecture
-                            + '\\native" />'
-                        )
+            dll_list_path = os.path.join(openvino_path, "runtime\\bin\\intel64\\Release\\")
+            tbb_list_path = os.path.join(openvino_path, "runtime\\3rdparty\\tbb\\bin\\")
+
             for dll_element in os.listdir(dll_list_path):
                 if dll_element.endswith("dll"):
                     files_list.append(
@@ -742,26 +720,7 @@ def generate_files(line_list, args):
                         + args.target_architecture
                         + '\\native" />'
                     )
-            # plugins.xml
-            files_list.append(
-                "<file src="
-                + '"'
-                + os.path.join(dll_list_path, "plugins.xml")
-                + runtimes_target
-                + args.target_architecture
-                + '\\native" />'
-            )
-            # usb-ma2x8x.mvcmd
-            # OpenVINO 2022.3 doesn't have usb-ma2x8x.mvcmd
-            if "2022.3" not in openvino_path:
-                files_list.append(
-                    "<file src="
-                    + '"'
-                    + os.path.join(dll_list_path, "usb-ma2x8x.mvcmd")
-                    + runtimes_target
-                    + args.target_architecture
-                    + '\\native" />'
-                )
+
             for tbb_element in os.listdir(tbb_list_path):
                 if tbb_element.endswith("dll"):
                     files_list.append(
@@ -834,8 +793,10 @@ def generate_files(line_list, args):
                 "<file src=" + '"' + os.path.join(args.native_build_path, nuget_dependencies["tvm"]) + runtimes + " />"
             )
 
-        # Some tools to be packaged in nightly build only, should not be released
+        # Some tools to be packaged in nightly debug build only, should not be released
         # These are copied to the runtimes folder for convenience of loading with the dlls
+        # NOTE: nuget gives a spurious error on linux if these aren't in a separate directory to the library so
+        #       we add them to a tools folder for that reason.
         if (
             args.is_release_build.lower() != "true"
             and args.target_architecture == "x64"
@@ -845,7 +806,10 @@ def generate_files(line_list, args):
                 "<file src="
                 + '"'
                 + os.path.join(args.native_build_path, nuget_dependencies["onnxruntime_perf_test"])
-                + runtimes
+                + runtimes[:-1]
+                + "\\tools\\"
+                + nuget_dependencies["onnxruntime_perf_test"]
+                + '"'
                 + " />"
             )
 
@@ -858,7 +822,10 @@ def generate_files(line_list, args):
                 "<file src="
                 + '"'
                 + os.path.join(args.native_build_path, nuget_dependencies["onnx_test_runner"])
-                + runtimes
+                + runtimes[:-1]
+                + "\\tools\\"
+                + nuget_dependencies["onnx_test_runner"]
+                + '"'
                 + " />"
             )
 
@@ -912,7 +879,6 @@ def generate_files(line_list, args):
         os.system(copy_command + " " + source_props + " " + target_props)
         files_list.append("<file src=" + '"' + target_props + '" target="build\\native" />')
         if not is_snpe_package and not is_qnn_package:
-            files_list.append("<file src=" + '"' + target_props + '" target="build\\netstandard1.1" />')
             files_list.append("<file src=" + '"' + target_props + '" target="build\\netstandard2.0" />')
 
         # Process targets file
@@ -931,7 +897,6 @@ def generate_files(line_list, args):
         os.system(copy_command + " " + source_targets + " " + target_targets)
         files_list.append("<file src=" + '"' + target_targets + '" target="build\\native" />')
         if not is_snpe_package and not is_qnn_package:
-            files_list.append("<file src=" + '"' + target_targets + '" target="build\\netstandard1.1" />')
             files_list.append("<file src=" + '"' + target_targets + '" target="build\\netstandard2.0" />')
 
         # Process xamarin targets files
diff --git a/tools/nuget/validate_package.py b/tools/nuget/validate_package.py
index e1b9cf0c3ce11..daa96cc72e989 100644
--- a/tools/nuget/validate_package.py
+++ b/tools/nuget/validate_package.py
@@ -23,20 +23,22 @@
 ]
 gpu_related_header_files = [
     "cpu_provider_factory.h",
-    "tensorrt_provider_factory.h",
     "onnxruntime_c_api.h",
     "onnxruntime_cxx_api.h",
+    "onnxruntime_float16.h",
     "onnxruntime_cxx_inline.h",
 ]
 dmlep_related_header_files = [
     "cpu_provider_factory.h",
     "onnxruntime_c_api.h",
     "onnxruntime_cxx_api.h",
+    "onnxruntime_float16.h",
     "onnxruntime_cxx_inline.h",
     "dml_provider_factory.h",
 ]
 training_related_header_files = [
     "onnxruntime_c_api.h",
+    "onnxruntime_float16.h",
     "onnxruntime_cxx_api.h",
     "onnxruntime_cxx_inline.h",
     "onnxruntime_training_c_api.h",
diff --git a/tools/perf_view/ort_perf_view.html b/tools/perf_view/ort_perf_view.html
index e00e38702d342..509fe5593f6a1 100644
--- a/tools/perf_view/ort_perf_view.html
+++ b/tools/perf_view/ort_perf_view.html
@@ -5,7 +5,7 @@
     <script type="text/javascript" src="https://cdn.anychart.com/releases/v8/js/anychart-core.min.js"></script>
     <script src="https://cdn.anychart.com/releases/8.11.0/js/anychart-ui.min.js"></script>
     <script type="text/javascript" src="https://cdn.anychart.com/releases/v8/js/anychart-treemap.min.js"></script>
-    <link rel="stylesheet" type="text/css" href="https://cdn.anychart.com/releases/8.11.0/css/anychart-ui.min.css"/>
+    <link rel="stylesheet" type="text/css" href="https://cdn.anychart.com/releases/8.11.1/css/anychart-ui.min.css"/>
     <style>
       html, body {
         width: 100%;
@@ -86,7 +86,7 @@
             } else if (category == "Kernel") {
                 var op_name = node.args.op_name == "" ? "Session" : node.args.op_name;
                 if (summarized_gpu[op_name] == null) {
-                  
+
                   summarized_gpu[op_name] = {all:0,children:[]};
                 }
                 summarized_gpu[op_name].all += node.dur;
@@ -145,4 +145,4 @@
       });
     </script>
   </body>
-</html>
\ No newline at end of file
+</html>
diff --git a/tools/python/dump_subgraphs.py b/tools/python/dump_subgraphs.py
index a1b9782374ca7..529d798d50149 100644
--- a/tools/python/dump_subgraphs.py
+++ b/tools/python/dump_subgraphs.py
@@ -19,11 +19,11 @@ def dump_subgraph(model, output_dir, level=0):
 
     for node in graph.node:
         if node.op_type == "Scan" or node.op_type == "Loop":
-            body_attribute = list(filter(lambda attr: attr.name == "body", node.attribute))[0]
+            body_attribute = next(iter(filter(lambda attr: attr.name == "body", node.attribute)))
             export_and_recurse(node, body_attribute, output_dir, level)
         if node.op_type == "If":
-            then_attribute = list(filter(lambda attr: attr.name == "then_branch", node.attribute))[0]
-            else_attribute = list(filter(lambda attr: attr.name == "else_branch", node.attribute))[0]
+            then_attribute = next(iter(filter(lambda attr: attr.name == "then_branch", node.attribute)))
+            else_attribute = next(iter(filter(lambda attr: attr.name == "else_branch", node.attribute)))
             export_and_recurse(node, then_attribute, output_dir, level)
             export_and_recurse(node, else_attribute, output_dir, level)
 
diff --git a/tools/python/find_optimizer_opset_version_updates_required.py b/tools/python/find_optimizer_opset_version_updates_required.py
index 0076d27fe950e..8a5e57b51e38d 100644
--- a/tools/python/find_optimizer_opset_version_updates_required.py
+++ b/tools/python/find_optimizer_opset_version_updates_required.py
@@ -54,7 +54,7 @@ def get_call_args_from_file(filename: str, function_or_declaration: str) -> typi
                     # TODO: handle automatically by merging lines
                     log.error(
                         "Call/Declaration is split over multiple lines. Please check manually."
-                        "File:{} Line:{}".format(filename, line_num)
+                        f"File:{filename} Line:{line_num}"
                     )
                     continue
 
diff --git a/tools/python/fix_long_lines.py b/tools/python/fix_long_lines.py
new file mode 100644
index 0000000000000..383fdc9623551
--- /dev/null
+++ b/tools/python/fix_long_lines.py
@@ -0,0 +1,134 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import argparse
+import logging
+import os
+import pathlib
+import shutil
+import tempfile
+
+from util import logger, run
+
+_log = logger.get_logger("fix_long_lines", logging.INFO)
+
+
+# look for long lines in the file, and if found run clang-format on those lines
+def _process_files(filenames, clang_exe, tmpdir):
+    for path in filenames:
+        _log.debug(f"Checking {path}")
+        bad_lines = []
+
+        with open(path, encoding="UTF8") as f:
+            line_num = 0
+            for line in f:
+                line_num += 1  # clang-format line numbers start at 1
+                if len(line) > 120:
+                    bad_lines.append(line_num)
+
+        if bad_lines:
+            _log.info(f"Updating {path}")
+            filename = os.path.basename(path)
+            target = os.path.join(tmpdir, filename)
+            shutil.copy(path, target)
+
+            # run clang-format to update just the long lines in the file
+            cmd = [
+                clang_exe,
+                "-i",
+            ]
+            for line in bad_lines:
+                cmd.append(f"--lines={line}:{line}")
+
+            cmd.append(target)
+
+            run(*cmd, cwd=tmpdir, check=True, shell=True)
+
+            # copy updated file back to original location
+            shutil.copy(target, path)
+
+
+# file extensions we process
+_EXTENSIONS = [".cc", ".h"]
+
+
+def _get_branch_diffs(ort_root, branch):
+    command = ["git", "diff", branch, "--name-only"]
+    result = run(*command, capture_stdout=True, check=True)
+
+    # stdout is bytes. one filename per line. decode, split, and filter to the extensions we are looking at
+    for f in result.stdout.decode("utf-8").splitlines():
+        if os.path.splitext(f.lower())[-1] in _EXTENSIONS:
+            yield os.path.join(ort_root, f)
+
+
+def _get_file_list(path):
+    for root, _, files in os.walk(path):
+        for file in files:
+            if os.path.splitext(file.lower())[-1] in _EXTENSIONS:
+                yield os.path.join(root, file)
+
+
+def main():
+    argparser = argparse.ArgumentParser(
+        "Script to fix long lines in the source using clang-format. "
+        "Only lines that exceed the 120 character maximum are altered in order to minimize the impact. "
+        "Checks .cc and .h files",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+
+    argparser.add_argument(
+        "--branch",
+        type=str,
+        default="origin/main",
+        help="Limit changes to files that differ from this branch. Use origin/main when preparing a PR.",
+    )
+
+    argparser.add_argument(
+        "--all_files",
+        action="store_true",
+        help="Process all files under /include/onnxruntime and /onnxruntime/core. Ignores --branch value.",
+    )
+
+    argparser.add_argument(
+        "--clang-format",
+        type=pathlib.Path,
+        required=False,
+        default="clang-format",
+        help="Path to clang-format executable",
+    )
+
+    argparser.add_argument("--debug", action="store_true", help="Set log level to DEBUG.")
+
+    args = argparser.parse_args()
+
+    if args.debug:
+        _log.setLevel(logging.DEBUG)
+
+    script_dir = os.path.dirname(os.path.realpath(__file__))
+    ort_root = os.path.abspath(os.path.join(script_dir, "..", ".."))
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        # create config in tmpdir
+        with open(os.path.join(tmpdir, ".clang-format"), "w") as f:
+            f.write(
+                """
+            BasedOnStyle: Google
+            ColumnLimit: 120
+            DerivePointerAlignment: false
+            """
+            )
+
+        clang_format = str(args.clang_format)
+
+        if args.all_files:
+            include_path = os.path.join(ort_root, "include", "onnxruntime")
+            src_path = os.path.join(ort_root, "onnxruntime", "core")
+            _process_files(_get_file_list(include_path), clang_format, tmpdir)
+            _process_files(_get_file_list(src_path), clang_format, tmpdir)
+        else:
+            _process_files(_get_branch_diffs(ort_root, args.branch), clang_format, tmpdir)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/python/gen_opkernel_doc.py b/tools/python/gen_opkernel_doc.py
index 2d0d16cf9a0de..1075ed8192fdd 100644
--- a/tools/python/gen_opkernel_doc.py
+++ b/tools/python/gen_opkernel_doc.py
@@ -150,7 +150,7 @@ def main(output_path: pathlib.Path, provider_filter: [str]):
                             tnameindex += 1
                             tclist = []
                             for tc in sorted(tcset):
-                                tclist.append(tc)  # noqa: PERF402
+                                tclist.append(tc)
                             fout.write("**" + tname + "** = " + format_type_constraints(tclist))
                             if tnameindex < len(typemap):
                                 fout.write("<br/> ")
diff --git a/tools/python/ort_test_dir_utils.py b/tools/python/ort_test_dir_utils.py
index 2fc4921a7bb67..cd1f5022af526 100644
--- a/tools/python/ort_test_dir_utils.py
+++ b/tools/python/ort_test_dir_utils.py
@@ -212,8 +212,8 @@ def run_test_dir(model_or_dir):
         models = onnx_models + ort_models
         if len(models) > 1:
             raise ValueError(
-                "'Multiple .onnx and/or .ort files found in {}. '"
-                "'Please provide specific .onnx or .ort file as input.".format(model_dir)
+                f"'Multiple .onnx and/or .ort files found in {model_dir}. '"
+                "'Please provide specific .onnx or .ort file as input."
             )
         elif len(models) == 0:
             raise ValueError(f"'No .onnx or .ort files found in {model_dir}.")
diff --git a/tools/python/run_CIs_for_external_pr.py b/tools/python/run_CIs_for_external_pr.py
index dcc6a92d84ef2..7a77839c4a4e7 100644
--- a/tools/python/run_CIs_for_external_pr.py
+++ b/tools/python/run_CIs_for_external_pr.py
@@ -93,6 +93,10 @@ def main():
         # checks
         "onnxruntime-python-checks-ci-pipeline",
         "onnxruntime-binary-size-checks-ci-pipeline",
+        # not currently required, but running ensures we're hitting all mobile platforms
+        "Android CI Pipeline",
+        "iOS CI Pipeline",
+        "ONNX Runtime React Native CI Pipeline",
     ]
 
     # remove pipelines that have already run successfully
diff --git a/tools/python/run_android_emulator.py b/tools/python/run_android_emulator.py
index 69fa88bd082dc..2826921726556 100755
--- a/tools/python/run_android_emulator.py
+++ b/tools/python/run_android_emulator.py
@@ -16,8 +16,8 @@
 def parse_args():
     parser = argparse.ArgumentParser(
         description="Manages the running of an Android emulator. "
-        "Supported modes are to start and stop (default), only start, or only "
-        "stop the emulator."
+        "Supported modes are to create an AVD, and start or stop the emulator. "
+        "The default is to start the emulator and wait for a keypress to stop it (start and stop)."
     )
 
     parser.add_argument("--create-avd", action="store_true", help="Whether to create the Android virtual device.")
@@ -43,8 +43,8 @@ def parse_args():
 
     args = parser.parse_args()
 
-    if not args.start and not args.stop:
-        # unspecified means start and stop
+    if not args.start and not args.stop and not args.create_avd:
+        # unspecified means start and stop if not creating the AVD
         args.start = args.stop = True
 
     if args.start != args.stop and args.emulator_pid_file is None:
diff --git a/tools/python/sparsify_initializers.py b/tools/python/sparsify_initializers.py
index 8f5034c4ef5cc..f9cc8db38ecff 100644
--- a/tools/python/sparsify_initializers.py
+++ b/tools/python/sparsify_initializers.py
@@ -78,7 +78,7 @@ def convert_tensor_to_sparse(
                 indices.append(index)
                 nnz_count += 1
 
-    sparsity = float(1.0) - float(nnz_count) / data_len
+    sparsity = 1.0 - float(nnz_count) / data_len
 
     ind_data_type = TensorProto.INT8
     ind_dtype = np.int8
@@ -126,7 +126,7 @@ def convert_tensor_to_sparse(
     # int32 indices are often selected, thus we really want to guard against loosing
     # rather than winning.
     if tensor_data_bytes <= total_sparse_bytes:
-        sparsity = float(1.0) - float(tensor_data_bytes) / total_sparse_bytes
+        sparsity = 1.0 - float(tensor_data_bytes) / total_sparse_bytes
         logger.debug(f"initializer={tensor.name}, adjusted_sparsity={sparsity}")
         return (object(), sparsity)
 
diff --git a/tools/python/util/android/android.py b/tools/python/util/android/android.py
index 0baa21179d32d..47e251d11a38d 100644
--- a/tools/python/util/android/android.py
+++ b/tools/python/util/android/android.py
@@ -3,7 +3,7 @@
 
 import collections
 import contextlib
-import logging
+import datetime
 import os
 import shutil
 import signal
@@ -11,10 +11,11 @@
 import time
 import typing
 
-from ..platform_helpers import is_windows
+from ..logger import get_logger
+from ..platform_helpers import is_linux, is_windows
 from ..run import run
 
-_log = logging.getLogger("util.android")
+_log = get_logger("util.android")
 
 
 SdkToolPaths = collections.namedtuple("SdkToolPaths", ["emulator", "adb", "sdkmanager", "avdmanager"])
@@ -30,7 +31,7 @@ def filename(name, windows_extension):
     def resolve_path(dirnames, basename):
         dirnames.insert(0, "")
         for dirname in dirnames:
-            path = shutil.which(os.path.join(dirname, basename))
+            path = shutil.which(os.path.join(os.path.expanduser(dirname), basename))
             if path is not None:
                 path = os.path.realpath(path)
                 _log.debug(f"Found {basename} at {path}")
@@ -79,6 +80,10 @@ def _start_process(*args) -> subprocess.Popen:
 
 
 def _stop_process(proc: subprocess.Popen):
+    if proc.returncode is not None:
+        # process has exited
+        return
+
     _log.debug(f"Stopping process - args: {proc.args}")
     proc.send_signal(_stop_signal)
 
@@ -90,9 +95,23 @@ def _stop_process(proc: subprocess.Popen):
 
 
 def _stop_process_with_pid(pid: int):
-    # not attempting anything fancier than just sending _stop_signal for now
-    _log.debug(f"Stopping process - pid: {pid}")
-    os.kill(pid, _stop_signal)
+    # minimize scope of external module usage
+    import psutil
+
+    if psutil.pid_exists(pid):
+        process = psutil.Process(pid)
+        _log.debug(f"Stopping process - pid={pid}")
+        process.terminate()
+        try:
+            process.wait(60)
+        except psutil.TimeoutExpired:
+            print("Process did not terminate within 60 seconds. Killing.")
+            process.kill()
+            time.sleep(10)
+            if psutil.pid_exists(pid):
+                print(f"Process still exists. State:{process.status()}")
+    else:
+        _log.debug(f"No process exists with pid={pid}")
 
 
 def start_emulator(
@@ -107,29 +126,49 @@ def start_emulator(
             "4096",
             "-timezone",
             "America/Los_Angeles",
-            "-no-snapshot",
+            "-no-snapstorage",
             "-no-audio",
             "-no-boot-anim",
-            "-no-window",
+            "-gpu",
+            "guest",
+            "-delay-adb",
         ]
+
+        # For Linux CIs we must use "-no-window" otherwise you'll get
+        #   Fatal: This application failed to start because no Qt platform plugin could be initialized
+        #
+        # For macOS CIs use a window so that we can potentially capture the desktop and the emulator screen
+        # and publish screenshot.jpg and emulator.png as artifacts to debug issues.
+        #   screencapture screenshot.jpg
+        #   $(ANDROID_SDK_HOME)/platform-tools/adb exec-out screencap -p > emulator.png
+        #
+        # On Windows it doesn't matter (AFAIK) so allow a window which is nicer for local debugging.
+        if is_linux():
+            emulator_args.append("-no-window")
+
         if extra_args is not None:
             emulator_args += extra_args
 
         emulator_process = emulator_stack.enter_context(_start_process(*emulator_args))
         emulator_stack.callback(_stop_process, emulator_process)
 
+        # we're specifying -delay-adb so use a trivial command to check when adb is available.
         waiter_process = waiter_stack.enter_context(
             _start_process(
                 sdk_tool_paths.adb,
                 "wait-for-device",
                 "shell",
-                "while [[ -z $(getprop sys.boot_completed) ]]; do sleep 1; done; input keyevent 82",
+                "ls /data/local/tmp",
             )
         )
+
         waiter_stack.callback(_stop_process, waiter_process)
 
-        # poll subprocesses
-        sleep_interval_seconds = 1
+        # poll subprocesses.
+        # allow 20 minutes for startup as some CIs are slow. TODO: Make timeout configurable if needed.
+        sleep_interval_seconds = 10
+        end_time = datetime.datetime.now() + datetime.timedelta(minutes=20)
+
         while True:
             waiter_ret, emulator_ret = waiter_process.poll(), emulator_process.poll()
 
@@ -139,13 +178,43 @@ def start_emulator(
 
             if waiter_ret is not None:
                 if waiter_ret == 0:
+                    _log.debug("adb wait-for-device process has completed.")
                     break
                 raise RuntimeError(f"Waiter process exited with return code: {waiter_ret}")
 
+            if datetime.datetime.now() > end_time:
+                raise RuntimeError("Emulator startup timeout")
+
             time.sleep(sleep_interval_seconds)
 
-        # emulator is ready now
+        # emulator is started
         emulator_stack.pop_all()
+
+        # loop to check for sys.boot_completed being set.
+        # in theory `-delay-adb` should be enough but this extra check seems to be required to be sure.
+        while True:
+            # looping on device with `while` seems to be flaky so loop here and call getprop once
+            args = [
+                sdk_tool_paths.adb,
+                "shell",
+                # "while [[ -z $(getprop sys.boot_completed) | tr -d '\r' ]]; do sleep 5; done; input keyevent 82",
+                "getprop sys.boot_completed",
+            ]
+
+            _log.debug(f"Starting process - args: {args}")
+
+            getprop_output = subprocess.check_output(args, timeout=10)
+            getprop_value = bytes.decode(getprop_output).strip()
+
+            if getprop_value == "1":
+                break
+
+            elif datetime.datetime.now() > end_time:
+                raise RuntimeError("Emulator startup timeout. sys.boot_completed was not set.")
+
+            _log.debug(f"sys.boot_completed='{getprop_value}'. Sleeping for {sleep_interval_seconds} before retrying.")
+            time.sleep(sleep_interval_seconds)
+
         return emulator_process
 
 
diff --git a/tools/python/util/get_azcopy.py b/tools/python/util/get_azcopy.py
index 76c75ad8c60eb..bfcf228a956eb 100644
--- a/tools/python/util/get_azcopy.py
+++ b/tools/python/util/get_azcopy.py
@@ -27,7 +27,7 @@
 
 
 def _check_version(azcopy_path):
-    proc = subprocess.run([azcopy_path, "--version"], stdout=subprocess.PIPE, text=True)
+    proc = subprocess.run([azcopy_path, "--version"], stdout=subprocess.PIPE, text=True)  # noqa: PLW1510
     match = re.search(r"\d+(?:\.\d+)+", proc.stdout)
 
     if not match:
diff --git a/tools/python/util/logger.py b/tools/python/util/logger.py
index 15e04528ac7ac..d6f3026959daa 100644
--- a/tools/python/util/logger.py
+++ b/tools/python/util/logger.py
@@ -4,8 +4,8 @@
 import logging
 
 
-def get_logger(name):
+def get_logger(name, level=logging.DEBUG):
     logging.basicConfig(format="%(asctime)s %(name)s [%(levelname)s] - %(message)s")
     logger = logging.getLogger(name)
-    logger.setLevel(logging.DEBUG)
+    logger.setLevel(level)
     return logger
diff --git a/tools/python/util/mobile_helpers/check_model_can_use_ort_mobile_pkg.py b/tools/python/util/mobile_helpers/check_model_can_use_ort_mobile_pkg.py
index 113b5398f3981..9eccb7c36455f 100644
--- a/tools/python/util/mobile_helpers/check_model_can_use_ort_mobile_pkg.py
+++ b/tools/python/util/mobile_helpers/check_model_can_use_ort_mobile_pkg.py
@@ -10,9 +10,8 @@
 import sys
 
 import onnx
-from onnx import shape_inference
 
-from ..onnx_model_utils import get_opsets_imported
+from ..onnx_model_utils import ModelProtoWithShapeInfo, get_opsets_imported
 from ..reduced_build_config_parser import parse_config
 
 cpp_to_tensorproto_type = {
@@ -265,15 +264,13 @@ def run_check(model_path: pathlib.Path, mobile_pkg_build_config: pathlib.Path, l
     )
 
     model_file = model_path.resolve(strict=True)
-    model = onnx.load(str(model_file))
 
     # we need to run shape inferencing to populate that type info for node outputs.
     # we will get warnings if the model uses ORT contrib ops (ONNX does not have shape inferencing for those),
     # and shape inferencing will be lost downstream of those.
     # TODO: add support for checking ORT format model as it will have full type/shape info for all nodes
-    model_with_type_info = shape_inference.infer_shapes(model)
-
-    return run_check_with_model(model_with_type_info, mobile_pkg_build_config, logger)
+    model_wrapper = ModelProtoWithShapeInfo(model_file)
+    return run_check_with_model(model_wrapper.model_with_shape_info, mobile_pkg_build_config, logger)
 
 
 def main():
diff --git a/tools/python/util/mobile_helpers/usability_checker.py b/tools/python/util/mobile_helpers/usability_checker.py
index f8b0bfe707ead..dcb3451a5e0fa 100644
--- a/tools/python/util/mobile_helpers/usability_checker.py
+++ b/tools/python/util/mobile_helpers/usability_checker.py
@@ -13,6 +13,7 @@
 import onnx
 
 from ..onnx_model_utils import (
+    ModelProtoWithShapeInfo,
     get_producer_consumer_maps,
     is_fixed_size_tensor,
     iterate_graph_per_graph_func,
@@ -464,9 +465,9 @@ def check_shapes(graph: onnx.GraphProto, logger: Optional[logging.Logger] = None
     return dynamic_inputs, num_dynamic_values
 
 
-def checker(model_path, logger: logging.Logger):
-    model = onnx.load(model_path)
-    model_with_shape_info = onnx.shape_inference.infer_shapes(model)
+def checker(model_path: pathlib.Path, logger: logging.Logger):
+    model_with_shape_info_wrapper = ModelProtoWithShapeInfo(model_path)
+    model_with_shape_info = model_with_shape_info_wrapper.model_with_shape_info
 
     # create lookup map for efficiency
     value_to_shape = {}
@@ -541,10 +542,10 @@ def analyze_model(model_path: pathlib.Path, skip_optimize: bool = False, logger:
     with tempfile.TemporaryDirectory() as tmp:
         if not skip_optimize:
             tmp_path = pathlib.Path(tmp) / model_path.name
-            optimize_model(model_path, tmp_path)
+            optimize_model(model_path, tmp_path, use_external_initializers=True)
             model_path = tmp_path
 
-        try_eps = checker(str(model_path.resolve(strict=True)), logger)
+        try_eps = checker(model_path.resolve(strict=True), logger)
 
     return try_eps
 
diff --git a/tools/python/util/onnx_model_utils.py b/tools/python/util/onnx_model_utils.py
index e662d1623f8bd..5c970430a3a82 100644
--- a/tools/python/util/onnx_model_utils.py
+++ b/tools/python/util/onnx_model_utils.py
@@ -95,6 +95,7 @@ def optimize_model(
     output_path: pathlib.Path,
     level: ort.GraphOptimizationLevel = ort.GraphOptimizationLevel.ORT_ENABLE_BASIC,
     log_level: int = 3,
+    use_external_initializers: bool = False,
 ):
     """
     Optimize an ONNX model using ONNX Runtime to the specified level
@@ -103,12 +104,25 @@ def optimize_model(
     :param level: onnxruntime.GraphOptimizationLevel to use. Default is ORT_ENABLE_BASIC.
     :param log_level: Log level. Defaults to Error (3) so we don't get output about unused initializers being removed.
                       Warning (2) or Info (1) may be desirable in some scenarios.
+    :param use_external_initializers: Set flag to write initializers to an external file. Required if model > 2GB.
+                                      Requires onnxruntime 1.17+
     """
     so = ort.SessionOptions()
     so.optimized_model_filepath = str(output_path.resolve())
     so.graph_optimization_level = level
     so.log_severity_level = log_level
 
+    # save using external initializers so models > 2 GB are handled
+    if use_external_initializers:
+        major, minor, rest = ort.__version__.split(".", 3)
+        if (int(major), int(minor)) >= (1, 17):
+            so.add_session_config_entry("session.optimized_model_external_initializers_file_name", "external_data.pb")
+        else:
+            raise ValueError(
+                "ONNX Runtime 1.17 or higher required to save initializers as external data when optimizing model. "
+                f"Current ONNX Runtime version is {ort.__version__}"
+            )
+
     # create session to optimize. this will write the updated model to output_path
     _ = ort.InferenceSession(str(model_path.resolve(strict=True)), so, providers=["CPUExecutionProvider"])
 
@@ -366,3 +380,34 @@ def get_optimization_level(level):
         return ort.GraphOptimizationLevel.ORT_ENABLE_ALL
 
     raise ValueError("Invalid optimization level of " + level)
+
+
+class ModelProtoWithShapeInfo:
+    """
+    Class to load an ONNX model and run shape inferencing on it to populate the ValueInfo.
+    The model_with_shape_info property will contain the updated model.
+    If the model is > 2GB and uses external data a temporary file is required to run shape inferencing successfully.
+    This helper class handles automatic removal of the temporary file.
+    """
+
+    def __init__(self, model_path: pathlib.Path):
+        """
+        :param model_path: Path to ONNX model to load and run shape inferencing on.
+        """
+
+        self.model_path = model_path
+
+        model = onnx.load(str(model_path))
+        self.model_with_shape_info = onnx.shape_inference.infer_shapes(model, strict_mode=True)
+
+        # ONNX has a silent failure from the call to infer_shapes when the model is > 2GB.
+        # We detect that by checking the nodes in the returned model.
+        self._tmp_model_path = None
+        if len(model.graph.node) > 0 and len(self.model_with_shape_info.graph.node) == 0:
+            self._tmp_model_path = pathlib.Path(model_path).with_suffix(".temp_with_shapeinf.onnx")
+            onnx.shape_inference.infer_shapes_path(str(model_path), str(self._tmp_model_path), strict_mode=True)
+            self.model_with_shape_info = onnx.load(str(self._tmp_model_path))
+
+    def __del__(self):
+        if self._tmp_model_path:
+            self._tmp_model_path.unlink(missing_ok=True)
diff --git a/tools/python/util/ort_format_model/operator_type_usage_processors.py b/tools/python/util/ort_format_model/operator_type_usage_processors.py
index 5905000a14972..22d7dff3e13b2 100644
--- a/tools/python/util/ort_format_model/operator_type_usage_processors.py
+++ b/tools/python/util/ort_format_model/operator_type_usage_processors.py
@@ -193,7 +193,7 @@ def process_node(self, node: fbs.Node, value_name_to_typeinfo: dict):
     def is_typed_registration_needed(
         self, type_in_registration: str, globally_allowed_types: typing.Optional[typing.Set[str]]
     ):
-        if 0 not in self._input_types.keys():
+        if 0 not in self._input_types:
             # currently all standard typed registrations are for input 0.
             # custom registrations can be handled by operator specific processors (e.g. OneHotProcessor below).
             raise RuntimeError(f"Expected typed registration to use type from input 0. Node:{self.name}")
diff --git a/winml/adapter/winml_adapter_environment.cpp b/winml/adapter/winml_adapter_environment.cpp
index 43babdf43967e..e2da473c7d5b5 100644
--- a/winml/adapter/winml_adapter_environment.cpp
+++ b/winml/adapter/winml_adapter_environment.cpp
@@ -9,6 +9,7 @@
 #include "winml_adapter_apis.h"
 #include "core/framework/error_code_helper.h"
 #include "core/session/ort_env.h"
+#include "core/session/user_logging_sink.h"
 
 #ifdef USE_DML
 #include "abi_custom_registry_impl.h"
@@ -18,12 +19,12 @@
 #endif USE_DML
 namespace winmla = Windows::AI::MachineLearning::Adapter;
 
-class WinmlAdapterLoggingWrapper : public LoggingWrapper {
+class WinmlAdapterLoggingWrapper : public onnxruntime::UserLoggingSink {
  public:
   WinmlAdapterLoggingWrapper(
     OrtLoggingFunction logging_function, OrtProfilingFunction profiling_function, void* logger_param
   )
-    : LoggingWrapper(logging_function, logger_param),
+    : onnxruntime::UserLoggingSink(logging_function, logger_param),
       profiling_function_(profiling_function) {}
 
   void SendProfileEvent(onnxruntime::profiling::EventRecord& event_record) const override {
diff --git a/winml/lib/Api.Image/CpuDetensorizer.h b/winml/lib/Api.Image/CpuDetensorizer.h
index dbafeed72cda8..e175fbbb4b6a3 100644
--- a/winml/lib/Api.Image/CpuDetensorizer.h
+++ b/winml/lib/Api.Image/CpuDetensorizer.h
@@ -128,8 +128,12 @@ class CpuDetensorizer {
     return nominalRangeConverter.Denormalize(*pCPUTensor);
   }
 
+  // clang-format off
   template <>
-  static float ReadTensor<DirectX::PackedVector::HALF>(
+#if _MSVC_LANG < 202002L
+  static
+#endif
+  float ReadTensor<DirectX::PackedVector::HALF>(
     const DirectX::PackedVector::HALF* pCPUTensor, const NominalRangeConverter& nominalRangeConverter
   ) {
     return nominalRangeConverter.Denormalize(DirectX::PackedVector::XMConvertHalfToFloat(*pCPUTensor));
@@ -167,9 +171,13 @@ class CpuDetensorizer {
     }
   }
 
+  // clang-format off
 #if defined(_M_AMD64) || defined(_M_IX86)
   template <>
-  static void InterleaveRowFloatToByte(
+#if _MSVC_LANG < 202002L
+  static
+#endif
+  void InterleaveRowFloatToByte(
     const float* xChannel,
     const float* yChannel,
     const float* zChannel,
diff --git a/winml/lib/Api.Image/CpuTensorizer.h b/winml/lib/Api.Image/CpuTensorizer.h
index d4e26cde7a420..ca5773b28fce2 100644
--- a/winml/lib/Api.Image/CpuTensorizer.h
+++ b/winml/lib/Api.Image/CpuTensorizer.h
@@ -112,13 +112,23 @@ class CpuTensorizer {
   template <typename T>
   static T ConvertByteToFloat(const BYTE& input, const NominalRangeConverter& nominalRangeConverter);
 
+  // clang-format off
   template <>
-  static float ConvertByteToFloat(const BYTE& input, const NominalRangeConverter& nominalRangeConverter) {
+#if _MSVC_LANG < 202002L
+  static
+#endif
+  float ConvertByteToFloat(const BYTE& input, const NominalRangeConverter& nominalRangeConverter) {
     return nominalRangeConverter.Normalize(static_cast<float>(input));
   }
+
+  // clang-format off
   template <>
-  static DirectX::PackedVector::HALF ConvertByteToFloat(
-    const BYTE& input, const NominalRangeConverter& nominalRangeConverter
+#if _MSVC_LANG < 202002L
+  static
+#endif
+  DirectX::PackedVector::HALF ConvertByteToFloat(
+    const BYTE& input,
+    const NominalRangeConverter& nominalRangeConverter
   ) {
     return nominalRangeConverter.Normalize(DirectX::PackedVector::XMConvertFloatToHalf(input));
   }
@@ -159,9 +169,13 @@ class CpuTensorizer {
     }
   }
 
+  // clang-format off
 #if defined(_M_AMD64) || defined(_M_IX86)
   template <>
-  static void DeinterleaveRowByteToFloat(
+#if _MSVC_LANG < 202002L
+  static
+#endif
+  void DeinterleaveRowByteToFloat(
     _In_ BYTE* pBuffer,
     _Inout_ float* xChannel,
     _Inout_ float* yChannel,
diff --git a/winml/lib/Api.Image/TensorToVideoFrameConverter.cpp b/winml/lib/Api.Image/TensorToVideoFrameConverter.cpp
index 2654885d6bee8..76a5623c5b4a5 100644
--- a/winml/lib/Api.Image/TensorToVideoFrameConverter.cpp
+++ b/winml/lib/Api.Image/TensorToVideoFrameConverter.cpp
@@ -170,8 +170,8 @@ void TensorToVideoFrameConverter::DX12TensorToVideoFrame(
           D3D12_RESOURCE_DESC cachedTextureDesc = output_resource_->GetDesc();
 
           if (cachedTextureDesc.Width != videoFrameTextureDesc.Width ||
-                        cachedTextureDesc.Height != videoFrameTextureDesc.Height ||
-                        cachedTextureDesc.Format != videoFrameTextureDesc.Format) {
+              cachedTextureDesc.Height != videoFrameTextureDesc.Height ||
+              cachedTextureDesc.Format != videoFrameTextureDesc.Format) {
             // The dimensions or format don't match, so we need to re-create our texture
             output_resource_ = CreateShareableD3D12Texture(videoFrameTextureDesc, pDeviceCache->GetD3D12Device());
             D3D11_cached_texture_ = ShareD3D12Texture(output_resource_.Get(), pDeviceCache->GetD3D11Device());
@@ -197,11 +197,10 @@ void TensorToVideoFrameConverter::DX12TensorToVideoFrame(
         UINT handleSize = static_cast<UINT>(sizeof(sharedHandle));
 
         if ((FAILED(spVideoFrameTexture->GetPrivateData(
-                         _d3d11TextureGUID, &comPtrSize, spSharedD3D11Texture.GetAddressOf()
-                     )) ||
-                     !spSharedD3D11Texture.Get()) ||
-                    (FAILED(spVideoFrameTexture->GetPrivateData(_handleGUID, &handleSize, &sharedHandle)) ||
-                     sharedHandle != shared_handle_)) {
+                 _d3d11TextureGUID, &comPtrSize, spSharedD3D11Texture.GetAddressOf())) ||
+             !spSharedD3D11Texture.Get()) ||
+            (FAILED(spVideoFrameTexture->GetPrivateData(_handleGUID, &handleSize, &sharedHandle)) ||
+             sharedHandle != shared_handle_)) {
           // Create a new shared texture that we cache on the video frame texture
           output_resource_ = CreateShareableD3D12Texture(videoFrameTextureDesc, pDeviceCache->GetD3D12Device());
           spSharedD3D11Texture = ShareD3D12Texture(output_resource_.Get(), spTextureDevice.Get());
@@ -499,9 +498,9 @@ void TensorToVideoFrameConverter::ConvertGPUTensorToDX12Texture(
   outputResourceDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
 
   if (!UAV_resource_ || outputDesc.Format != UAV_resource_->GetDesc().Format ||
-        outputDesc.Width != UAV_resource_->GetDesc().Width || outputDesc.Height != UAV_resource_->GetDesc().Height) {
+      outputDesc.Width != UAV_resource_->GetDesc().Width || outputDesc.Height != UAV_resource_->GetDesc().Height) {
     WINML_THROW_IF_FAILED(device_cache.GetD3D12Device()->CreateCommittedResource(
-      &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT),
+      unmove_ptr(CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT)),
       D3D12_HEAP_FLAG_NONE,
       &outputResourceDesc,
       D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
@@ -599,31 +598,31 @@ void TensorToVideoFrameConverter::ConvertGPUTensorToDX12Texture(
 
     command_list_->ResourceBarrier(
       1,
-      &CD3DX12_RESOURCE_BARRIER::Transition(
+      unmove_ptr(CD3DX12_RESOURCE_BARRIER::Transition(
         pInputResource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE
-      )
+      ))
     );
     command_list_->Dispatch(dispatchWidth, dispatchHeight, 1);
     command_list_->ResourceBarrier(
       1,
-      &CD3DX12_RESOURCE_BARRIER::Transition(
+      unmove_ptr(CD3DX12_RESOURCE_BARRIER::Transition(
         pInputResource, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS
-      )
+      ))
     );
 
     // Copy the UAV data to the output resource after detensorization
     command_list_->ResourceBarrier(
       1,
-      &CD3DX12_RESOURCE_BARRIER::Transition(
+      unmove_ptr(CD3DX12_RESOURCE_BARRIER::Transition(
         UAV_resource_.Get(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE
-      )
+      ))
     );
     command_list_->CopyResource(pOutputResource, UAV_resource_.Get());
     command_list_->ResourceBarrier(
       1,
-      &CD3DX12_RESOURCE_BARRIER::Transition(
+      unmove_ptr(CD3DX12_RESOURCE_BARRIER::Transition(
         UAV_resource_.Get(), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS
-      )
+      ))
     );
 
     WINML_THROW_IF_FAILED(command_list_->Close());
@@ -657,9 +656,9 @@ void TensorToVideoFrameConverter::ConvertGPUTensorToSoftwareBitmap(
   // TODO: Make an allocator for readback heaps
   if (!readback_heap_ || readback_heap_->GetDesc().Width < singleVideoFramebufferSize) {
     WINML_THROW_IF_FAILED(device_cache.GetD3D12Device()->CreateCommittedResource(
-      &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK),
+      unmove_ptr(CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK)),
       D3D12_HEAP_FLAG_NONE,
-      &CD3DX12_RESOURCE_DESC::Buffer(singleVideoFramebufferSize),
+      unmove_ptr(CD3DX12_RESOURCE_DESC::Buffer(singleVideoFramebufferSize)),
       D3D12_RESOURCE_STATE_COPY_DEST,
       nullptr,
       IID_PPV_ARGS(&readback_heap_)
@@ -689,12 +688,14 @@ void TensorToVideoFrameConverter::ConvertGPUTensorToSoftwareBitmap(
   device_cache.SyncD3D12ToCPU();
 
   void* pCPUTensorBuffer = nullptr;
-  WINML_THROW_IF_FAILED(readback_heap_->Map(0, &CD3DX12_RANGE(0, singleVideoFramebufferSize), &pCPUTensorBuffer));
+  WINML_THROW_IF_FAILED(
+    readback_heap_->Map(0, unmove_ptr(CD3DX12_RANGE(0, singleVideoFramebufferSize)), &pCPUTensorBuffer)
+  );
 
   // We avoid the Video Frame pipeline by manually downloading the GPU data to the CPU and detensorize while we are filling the readback heap
   ConvertCPUTensorToSoftwareBitmap(pCPUTensorBuffer, tensorDesc, softwareBitmap);
 
-  readback_heap_->Unmap(0, &CD3DX12_RANGE(0, 0));
+  readback_heap_->Unmap(0, unmove_ptr(CD3DX12_RANGE(0, 0)));
 }
 
 void TensorToVideoFrameConverter::ConvertBatchedDX12TensorToBuffers(
@@ -708,9 +709,9 @@ void TensorToVideoFrameConverter::ConvertBatchedDX12TensorToBuffers(
   // TODO: Make an allocator for readback heaps
   if (!readback_heap_ || readback_heap_->GetDesc().Width < buffer_size_in_bytes) {
     WINML_THROW_IF_FAILED(device_cache.GetD3D12Device()->CreateCommittedResource(
-      &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK),
+      unmove_ptr(CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK)),
       D3D12_HEAP_FLAG_NONE,
-      &CD3DX12_RESOURCE_DESC::Buffer(buffer_size_in_bytes),
+      unmove_ptr(CD3DX12_RESOURCE_DESC::Buffer(buffer_size_in_bytes)),
       D3D12_RESOURCE_STATE_COPY_DEST,
       nullptr,
       IID_PPV_ARGS(&readback_heap_)
@@ -733,9 +734,9 @@ void TensorToVideoFrameConverter::ConvertBatchedDX12TensorToBuffers(
   device_cache.SyncD3D12ToCPU();
 
   byte* readback_buffer = nullptr;
-  WINML_THROW_IF_FAILED(
-    readback_heap_->Map(0, &CD3DX12_RANGE(0, buffer_size_in_bytes), reinterpret_cast<void**>(&readback_buffer))
-  );
+  WINML_THROW_IF_FAILED(readback_heap_->Map(
+    0, unmove_ptr(CD3DX12_RANGE(0, buffer_size_in_bytes)), reinterpret_cast<void**>(&readback_buffer)
+  ));
   auto readback_buffer_span = gsl::span<byte>(readback_buffer, buffer_size_in_bytes);
   _winml::StoreSpanIntoDisjointBuffers(
     buffers.size(),
@@ -748,7 +749,7 @@ void TensorToVideoFrameConverter::ConvertBatchedDX12TensorToBuffers(
     readback_buffer_span
   );
 
-  readback_heap_->Unmap(0, &CD3DX12_RANGE(0, 0));
+  readback_heap_->Unmap(0, unmove_ptr(CD3DX12_RANGE(0, 0)));
 }
 
 D3D12_SHADER_RESOURCE_VIEW_DESC TensorToVideoFrameConverter::CreateSRVDescriptor(
diff --git a/winml/lib/Api.Image/VideoFrameToTensorConverter.cpp b/winml/lib/Api.Image/VideoFrameToTensorConverter.cpp
index b856c6bdbfeca..0a763c77c94f4 100644
--- a/winml/lib/Api.Image/VideoFrameToTensorConverter.cpp
+++ b/winml/lib/Api.Image/VideoFrameToTensorConverter.cpp
@@ -600,9 +600,9 @@ void VideoFrameToTensorConverter::ConvertSoftwareBitmapToGPUTensor(
   // TODO: Make an allocator for upload heaps
   if (!upload_heap_ || upload_heap_->GetDesc().Width < bufferSize) {
     WINML_THROW_IF_FAILED(device_cache.GetD3D12Device()->CreateCommittedResource(
-      &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD),
+      unmove_ptr(CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD)),
       D3D12_HEAP_FLAG_NONE,
-      &CD3DX12_RESOURCE_DESC::Buffer(bufferSize),
+      unmove_ptr(CD3DX12_RESOURCE_DESC::Buffer(bufferSize)),
       D3D12_RESOURCE_STATE_GENERIC_READ,
       nullptr,
       IID_PPV_ARGS(&upload_heap_)
@@ -610,14 +610,14 @@ void VideoFrameToTensorConverter::ConvertSoftwareBitmapToGPUTensor(
   }
 
   void* pCPUTensorBuffer = nullptr;
-  WINML_THROW_IF_FAILED(upload_heap_->Map(0, &CD3DX12_RANGE(0, 0), &pCPUTensorBuffer));
+  WINML_THROW_IF_FAILED(upload_heap_->Map(0, unmove_ptr(CD3DX12_RANGE(0, 0)), unmove_ptr(pCPUTensorBuffer)));
 
   // We avoid the Video Frame pipeline by manually sending the CPU data to the GPU, and we tensorize while we are filling the
   // upload heap. The image may already have been cropped/scaled by the video frame pipeline, so we send the scaled bounds
   // instead of the initial input bounds
   ConvertSoftwareBitmapToCPUTensor(convertedSoftwareBitmap, tensorDesc, scaledBounds, pCPUTensorBuffer);
 
-  upload_heap_->Unmap(0, &CD3DX12_RANGE(0, bufferSize));
+  upload_heap_->Unmap(0, unmove_ptr(CD3DX12_RANGE(0, bufferSize)));
 
   ResetCommandList(device_cache);
 
@@ -642,9 +642,9 @@ void VideoFrameToTensorConverter::ConvertBuffersToBatchedGPUTensor(
   // Copy the cpu memory into the gpu resource
   if (!upload_heap_ || upload_heap_->GetDesc().Width < buffer_size_in_bytes) {
     WINML_THROW_IF_FAILED(device_cache.GetD3D12Device()->CreateCommittedResource(
-      &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD),
+      unmove_ptr(CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD)),
       D3D12_HEAP_FLAG_NONE,
-      &CD3DX12_RESOURCE_DESC::Buffer(buffer_size_in_bytes),
+      unmove_ptr(CD3DX12_RESOURCE_DESC::Buffer(buffer_size_in_bytes)),
       D3D12_RESOURCE_STATE_GENERIC_READ,
       nullptr,
       IID_PPV_ARGS(&upload_heap_)
@@ -652,7 +652,7 @@ void VideoFrameToTensorConverter::ConvertBuffersToBatchedGPUTensor(
   }
 
   byte* gpu_buffer = nullptr;
-  WINML_THROW_IF_FAILED(upload_heap_->Map(0, &CD3DX12_RANGE(0, 0), reinterpret_cast<void**>(&gpu_buffer)));
+  WINML_THROW_IF_FAILED(upload_heap_->Map(0, unmove_ptr(CD3DX12_RANGE(0, 0)), reinterpret_cast<void**>(&gpu_buffer)));
   auto gpu_buffer_span = gsl::span<byte>(gpu_buffer, buffer_size_in_bytes);
 
   _winml::LoadSpanFromDisjointBuffers(
@@ -666,7 +666,7 @@ void VideoFrameToTensorConverter::ConvertBuffersToBatchedGPUTensor(
     gpu_buffer_span
   );
 
-  upload_heap_->Unmap(0, &CD3DX12_RANGE(0, buffer_size_in_bytes));
+  upload_heap_->Unmap(0, unmove_ptr(CD3DX12_RANGE(0, buffer_size_in_bytes)));
 
   ResetCommandList(device_cache);
 
diff --git a/winml/lib/Api.Image/inc/D3DDeviceCache.h b/winml/lib/Api.Image/inc/D3DDeviceCache.h
index 13dd773ccd19d..ce3b8efccd571 100644
--- a/winml/lib/Api.Image/inc/D3DDeviceCache.h
+++ b/winml/lib/Api.Image/inc/D3DDeviceCache.h
@@ -35,6 +35,11 @@ enum class PipelineStateCacheOperation : unsigned char {
   kCount = 2
 };
 
+template <typename E>
+constexpr auto underlying(E e) noexcept {
+  return static_cast<typename std::underlying_type<E>::type>(e);
+}
+
 class D3DDeviceCache {
  public:
   ~D3DDeviceCache();
@@ -104,9 +109,10 @@ class D3DDeviceCache {
   winrt::com_ptr<ID3D12RootSignature> tensorize_root_signature_;
   winrt::com_ptr<ID3D12RootSignature> detensorize_root_signature_;
 
+  // clang-format off
   winrt::com_ptr<ID3D12PipelineState>
-    cached_pipeline_state[PipelineStateCacheType::kCount][PipelineStateCacheFormat::kCount]
-                         [PipelineStateCacheFormat::kCount][PipelineStateCacheOperation::kCount];
+    cached_pipeline_state[underlying(PipelineStateCacheType::kCount)][underlying(PipelineStateCacheFormat::kCount)]
+                         [underlying(PipelineStateCacheFormat::kCount)][underlying(PipelineStateCacheOperation::kCount)];
 
   winrt::com_ptr<ID3D12Resource> detensorize_vertex_buffer_;
 
diff --git a/winml/lib/Api.Image/inc/ImageConversionHelpers.h b/winml/lib/Api.Image/inc/ImageConversionHelpers.h
index 8e3dca2ae11e8..5a9c8f21255b5 100644
--- a/winml/lib/Api.Image/inc/ImageConversionHelpers.h
+++ b/winml/lib/Api.Image/inc/ImageConversionHelpers.h
@@ -52,5 +52,6 @@ bool VideoFramesHaveSameDevice(const wm::IVideoFrame& video_frame_1, const wm::I
 wgdx::Direct3D11::IDirect3DDevice GetDeviceFromDirect3DSurface(const wgdx::Direct3D11::IDirect3DSurface& d3dSurface);
 
 constexpr std::array<DXGI_FORMAT, 3> supportedWinMLFormats = {
-  DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_FORMAT_B8G8R8X8_UNORM};
+  DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_FORMAT_B8G8R8X8_UNORM
+};
 }  // namespace _winml::Imaging
diff --git a/winml/lib/Api.Image/inc/TensorToVideoFrameConverter.h b/winml/lib/Api.Image/inc/TensorToVideoFrameConverter.h
index 12f676459293b..138e755289df9 100644
--- a/winml/lib/Api.Image/inc/TensorToVideoFrameConverter.h
+++ b/winml/lib/Api.Image/inc/TensorToVideoFrameConverter.h
@@ -40,10 +40,7 @@ class TensorToVideoFrameConverter : public ImageConverter {
 
  private:
   GUID _d3d11TextureGUID = {
-    0x14bf1054,
-    0x6ce7,
-    0x4c00,
-    {0xa1, 0x32, 0xb0, 0xf2, 0x11, 0x5D, 0xE0, 0x7f}
+    0x14bf1054, 0x6ce7, 0x4c00, {0xa1, 0x32, 0xb0, 0xf2, 0x11, 0x5D, 0xE0, 0x7f}
   };  // {14BF1054-6CE7-4C00-A132-B0F2115DE07F}
   GUID _handleGUID = {
     0x700148fc, 0xc0cb, 0x4a7e, {0xa7, 0xc0, 0xe7, 0x43, 0xc1, 0x9, 0x9d, 0x62}
@@ -80,7 +77,7 @@ class TensorToVideoFrameConverter : public ImageConverter {
     _Inout_ wm::VideoFrame& unsupported_video_frame
   );
 
-  static D3D12_SHADER_RESOURCE_VIEW_DESC TensorToVideoFrameConverter::CreateSRVDescriptor(
+  static D3D12_SHADER_RESOURCE_VIEW_DESC CreateSRVDescriptor(
     const UINT32 batch_index, const D3D12_RESOURCE_DESC& resource_description, const ImageTensorDescription& description
   );
 
diff --git a/winml/lib/Api.Image/inc/VideoFrameToTensorConverter.h b/winml/lib/Api.Image/inc/VideoFrameToTensorConverter.h
index e34030bbd6833..ebfccabf7e814 100644
--- a/winml/lib/Api.Image/inc/VideoFrameToTensorConverter.h
+++ b/winml/lib/Api.Image/inc/VideoFrameToTensorConverter.h
@@ -50,10 +50,7 @@ class VideoFrameToTensorConverter : public ImageConverter {
 
  private:
   GUID d3d11_texture_GUID_ = {
-    0x485e4bb3,
-    0x3fe8,
-    0x497b,
-    {0x85, 0x9e, 0xc7, 0x5, 0x18, 0xdb, 0x11, 0x2a}
+    0x485e4bb3, 0x3fe8, 0x497b, {0x85, 0x9e, 0xc7, 0x5, 0x18, 0xdb, 0x11, 0x2a}
   };  // {485E4BB3-3FE8-497B-859E-C70518DB112A}
   GUID handle_GUID_ = {
     0xce43264e, 0x41f7, 0x4882, {0x9e, 0x20, 0xfa, 0xa5, 0x1e, 0x37, 0x64, 0xfc}
@@ -86,7 +83,7 @@ class VideoFrameToTensorConverter : public ImageConverter {
     const UINT32 batch_index, const D3D12_RESOURCE_DESC& resource_description, const ImageTensorDescription& description
   );
 
-  static void VideoFrameToTensorConverter::ConvertSoftwareBitmapToCPUTensor(
+  static void ConvertSoftwareBitmapToCPUTensor(
     _In_ const wgi::SoftwareBitmap& software_bitmap,
     _In_ const ImageTensorDescription& tensor_description,
     _In_ const wgi::BitmapBounds& input_bounds,
diff --git a/winml/lib/Api.Ort/OnnxruntimeEngine.cpp b/winml/lib/Api.Ort/OnnxruntimeEngine.cpp
index 78b1a109d2da0..5bb0ce424f66c 100644
--- a/winml/lib/Api.Ort/OnnxruntimeEngine.cpp
+++ b/winml/lib/Api.Ort/OnnxruntimeEngine.cpp
@@ -862,12 +862,12 @@ struct FillMapTensors {
   static HRESULT Run(
     const OrtApi* ort_api, IInspectable* map_insp, OrtValue* keys_ort_value, OrtValue* values_ort_value
   ) {
-    AbiTypeInfo<TAbiKey>::OrtType* keys_mutable_data;
+    typename AbiTypeInfo<TAbiKey>::OrtType* keys_mutable_data;
     RETURN_HR_IF_NOT_OK_MSG(
       ort_api->GetTensorMutableData(keys_ort_value, reinterpret_cast<void**>(&keys_mutable_data)), ort_api
     );
 
-    AbiTypeInfo<TAbiValue>::OrtType* values_mutable_data;
+    typename AbiTypeInfo<TAbiValue>::OrtType* values_mutable_data;
     RETURN_HR_IF_NOT_OK_MSG(
       ort_api->GetTensorMutableData(values_ort_value, reinterpret_cast<void**>(&values_mutable_data)), ort_api
     );
@@ -888,7 +888,7 @@ struct FillMapTensors<HSTRING, TAbiValue> {
   static HRESULT Run(
     const OrtApi* ort_api, IInspectable* map_insp, OrtValue* keys_ort_value, OrtValue* values_ort_value
   ) {
-    AbiTypeInfo<TAbiValue>::OrtType* values_mutable_data;
+    typename AbiTypeInfo<TAbiValue>::OrtType* values_mutable_data;
     RETURN_HR_IF_NOT_OK_MSG(
       ort_api->GetTensorMutableData(values_ort_value, reinterpret_cast<void**>(&values_mutable_data)), ort_api
     );
@@ -916,7 +916,7 @@ struct FillMapTensors<TAbiKey, HSTRING> {
   static HRESULT Run(
     const OrtApi* ort_api, IInspectable* map_insp, OrtValue* keys_ort_value, OrtValue* values_ort_value
   ) {
-    AbiTypeInfo<TAbiKey>::OrtType* keys_mutable_data;
+    typename AbiTypeInfo<TAbiKey>::OrtType* keys_mutable_data;
     RETURN_HR_IF_NOT_OK_MSG(
       ort_api->GetTensorMutableData(keys_ort_value, reinterpret_cast<void**>(&keys_mutable_data)), ort_api
     );
diff --git a/winml/lib/Api.Ort/OnnxruntimeModel.cpp b/winml/lib/Api.Ort/OnnxruntimeModel.cpp
index fb8413a897e75..24eb44b73dd3c 100644
--- a/winml/lib/Api.Ort/OnnxruntimeModel.cpp
+++ b/winml/lib/Api.Ort/OnnxruntimeModel.cpp
@@ -81,7 +81,8 @@ HRESULT ModelInfo::RuntimeClassInitialize(_In_ OnnxruntimeEngineFactory* engine_
     winml_adapter_api->ModelGetInputCount,
     winml_adapter_api->ModelGetInputName,
     winml_adapter_api->ModelGetInputDescription,
-    winml_adapter_api->ModelGetInputTypeInfo};
+    winml_adapter_api->ModelGetInputTypeInfo
+  };
 
   // Create inputs
   std::vector<OnnxruntimeValueInfoWrapper> inputs;
@@ -93,7 +94,8 @@ HRESULT ModelInfo::RuntimeClassInitialize(_In_ OnnxruntimeEngineFactory* engine_
     winml_adapter_api->ModelGetOutputCount,
     winml_adapter_api->ModelGetOutputName,
     winml_adapter_api->ModelGetOutputDescription,
-    winml_adapter_api->ModelGetOutputTypeInfo};
+    winml_adapter_api->ModelGetOutputTypeInfo
+  };
 
   std::vector<OnnxruntimeValueInfoWrapper> outputs;
   RETURN_IF_FAILED(CreateFeatureDescriptors(engine_factory, &output_helpers, ort_model, outputs));
diff --git a/winml/lib/Api/HardwareCoreEnumerator.cpp b/winml/lib/Api/HardwareCoreEnumerator.cpp
new file mode 100644
index 0000000000000..a89ac561f8860
--- /dev/null
+++ b/winml/lib/Api/HardwareCoreEnumerator.cpp
@@ -0,0 +1,90 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "lib/Api/pch/pch.h"
+
+#include "HardwareCoreEnumerator.h"
+
+namespace WINMLP {
+
+struct LogicalProcessorInformation {
+  std::unique_ptr<char[]> Buffer;
+  size_t Length;
+};
+
+struct CoreCounter {
+  uint32_t PhysicalCores = 0;
+  uint32_t SocDieCores = 0;
+};
+
+static LogicalProcessorInformation GetLogicalProcessorInfos(LOGICAL_PROCESSOR_RELATIONSHIP relationship) {
+  DWORD length = 0;
+  DWORD rc = GetLogicalProcessorInformationEx(relationship, nullptr, &length);
+
+  assert(rc == FALSE);
+
+  auto processorInformationBytes = std::make_unique<char[]>(length);
+
+  rc = GetLogicalProcessorInformationEx(
+    relationship, reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(processorInformationBytes.get()), &length
+  );
+
+  assert(rc == TRUE);
+
+  return {std::move(processorInformationBytes), length};
+}
+
+uint32_t CountSetBits(DWORD input) {
+  uint32_t c;
+  for (c = 0; input; c++) {
+    input &= input - 1;
+  }
+  return c;
+}
+
+static CoreCounter GetNumberOPhysicalAndEngineeringCores() {
+  auto logicalProcessorInformation = GetLogicalProcessorInfos(RelationAll);
+
+  CoreCounter cores;
+  DWORD dwLevel2GroupMask = 0;
+  DWORD dwLevel3GroupMask = 0;
+  size_t read = 0;
+  PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX currentProcessorInfo = NULL;
+
+  while ((read + FIELD_OFFSET(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, Processor)) < logicalProcessorInformation.Length
+  ) {
+    currentProcessorInfo =
+      reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(logicalProcessorInformation.Buffer.get() + read);
+    if ((read + currentProcessorInfo->Size) > logicalProcessorInformation.Length) {
+      break;
+    }
+
+    switch (currentProcessorInfo->Relationship) {
+      case RelationProcessorCore:
+        cores.PhysicalCores++;
+        break;
+      case RelationCache:
+        if (currentProcessorInfo->Cache.Level == 2) {
+          dwLevel2GroupMask |= currentProcessorInfo->Cache.GroupMask.Mask;
+        } else if (currentProcessorInfo->Cache.Level == 3) {
+          dwLevel3GroupMask |= currentProcessorInfo->Cache.GroupMask.Mask;
+        }
+        break;
+    }
+
+    read += currentProcessorInfo->Size;
+  }
+
+  cores.SocDieCores = CountSetBits(dwLevel2GroupMask & ~dwLevel3GroupMask);
+  return cores;
+}
+
+uint32_t HardwareCoreEnumerator::DefaultIntraOpNumThreads() {
+  // # of physical cores = # of P cores + # of E Cores + # of Soc Cores.
+  // # of logical cores = # of P cores x 2 (if hyper threading is enabled) + # of E cores + # of Soc Cores.
+  auto cores = GetNumberOPhysicalAndEngineeringCores();
+  // We want to use the number of physical cores, but exclude soc cores
+  return cores.PhysicalCores - cores.SocDieCores;
+}
+
+}  // namespace WINMLP
diff --git a/winml/lib/Api/HardwareCoreEnumerator.h b/winml/lib/Api/HardwareCoreEnumerator.h
new file mode 100644
index 0000000000000..6861ba7d46bcf
--- /dev/null
+++ b/winml/lib/Api/HardwareCoreEnumerator.h
@@ -0,0 +1,11 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+namespace WINMLP {
+struct HardwareCoreEnumerator {
+  HardwareCoreEnumerator() = delete;
+  static uint32_t DefaultIntraOpNumThreads();
+};
+}  // namespace WINMLP
diff --git a/winml/lib/Api/ImageFeatureValue.h b/winml/lib/Api/ImageFeatureValue.h
index 83a21c8679cf3..9df4d7010ff66 100644
--- a/winml/lib/Api/ImageFeatureValue.h
+++ b/winml/lib/Api/ImageFeatureValue.h
@@ -23,7 +23,7 @@ struct ImageFeatureValue : ImageFeatureValueT<ImageFeatureValue, _winml::ILotusV
   wfc::IIterable<Windows::Media::VideoFrame> VideoFrames();
   winml::LearningModelFeatureKind Kind();
 
-  static winml::ImageFeatureValue ImageFeatureValue::Create(
+  static winml::ImageFeatureValue Create(
     uint32_t batchSize, Windows::Graphics::Imaging::BitmapPixelFormat format, uint32_t width, uint32_t height
   );
   static winml::ImageFeatureValue CreateFromVideoFrame(Windows::Media::VideoFrame const& image);
diff --git a/winml/lib/Api/LearningModelDevice.cpp b/winml/lib/Api/LearningModelDevice.cpp
index c9c6f5bc70ee2..9f48ee03886e1 100644
--- a/winml/lib/Api/LearningModelDevice.cpp
+++ b/winml/lib/Api/LearningModelDevice.cpp
@@ -7,6 +7,7 @@
 #include <D3d11_4.h>
 #include <d3d11on12.h>
 #include "D3DDeviceCache.h"
+#include "HardwareCoreEnumerator.h"
 
 #include "ConverterResourceStore.h"
 
@@ -131,7 +132,7 @@ LearningModelDevice::CacheThreadPool(_winml::IThreading* thread_pool) {
 
 uint32_t LearningModelDevice::NumberOfIntraOpThreads() {
   if (IsCpuDevice()) {
-    return std::thread::hardware_concurrency();
+    return HardwareCoreEnumerator::DefaultIntraOpNumThreads();
   } else {
     // GPU sessions should not rely on intra op threads.
     // Creating a large thread pool is unnecessary and wasteful, and can cause
diff --git a/winml/lib/Api/LearningModelSessionOptions.cpp b/winml/lib/Api/LearningModelSessionOptions.cpp
index 2ff9c6d1d56d0..374200fb3b9f8 100644
--- a/winml/lib/Api/LearningModelSessionOptions.cpp
+++ b/winml/lib/Api/LearningModelSessionOptions.cpp
@@ -3,11 +3,20 @@
 
 #include "lib/Api/pch/pch.h"
 #include "LearningModelSessionOptions.h"
+#include "HardwareCoreEnumerator.h"
 
 namespace WINMLP {
+
+LearningModelSessionOptions::LearningModelSessionOptions() {
+  intra_op_num_threads_override_ = HardwareCoreEnumerator::DefaultIntraOpNumThreads();
+}
+
 LearningModelSessionOptions::LearningModelSessionOptions(const LearningModelSessionOptions& options)
   : batch_size_override_(options.batch_size_override_),
-    close_model_on_session_creation_(options.close_model_on_session_creation_) {
+    close_model_on_session_creation_(options.close_model_on_session_creation_),
+    named_dim_overrides_(options.named_dim_overrides_),
+    intra_op_num_threads_override_(options.intra_op_num_threads_override_),
+    custom_ops_lib_paths_(options.custom_ops_lib_paths_) {
 }
 
 uint32_t LearningModelSessionOptions::BatchSizeOverride() {
diff --git a/winml/lib/Api/LearningModelSessionOptions.h b/winml/lib/Api/LearningModelSessionOptions.h
index 5fc7e54997403..21d0242735f94 100644
--- a/winml/lib/Api/LearningModelSessionOptions.h
+++ b/winml/lib/Api/LearningModelSessionOptions.h
@@ -11,7 +11,7 @@ struct LearningModelSessionOptions : LearningModelSessionOptionsT<
                                        LearningModelSessionOptions,
                                        ILearningModelSessionOptionsNative,
                                        ILearningModelSessionOptionsNative1> {
-  LearningModelSessionOptions() = default;
+  LearningModelSessionOptions();
 
   LearningModelSessionOptions(const LearningModelSessionOptions& options);
 
@@ -72,7 +72,7 @@ struct LearningModelSessionOptions : LearningModelSessionOptionsT<
   // The intra operator num threads property is used to control the number of threads used in the threadpool for intra operator calculations.
   // The default value here is the maximum number of logical cores to ensure that the default behavior of WinML always runs the fastest.
   // WARNING: Setting a number higher than the maximum number of logical cores may result in an inefficient threadpool
-  uint32_t intra_op_num_threads_override_ = std::thread::hardware_concurrency();
+  uint32_t intra_op_num_threads_override_;
 
   bool allow_thread_spinning_ = true;
 
diff --git a/winml/lib/Api/impl/MapBase.h b/winml/lib/Api/impl/MapBase.h
index 4686d8a05ad9b..6424f85ef5631 100644
--- a/winml/lib/Api/impl/MapBase.h
+++ b/winml/lib/Api/impl/MapBase.h
@@ -7,6 +7,8 @@
 
 #include "MapFeatureDescriptor.h"
 #include "TensorFeatureDescriptor.h"
+#include "LearningModelSession.h"
+#include "IMapFeatureValue.h"
 
 namespace _winml {
 
diff --git a/winml/lib/Api/impl/SequenceBase.h b/winml/lib/Api/impl/SequenceBase.h
index 82ac6a439dae0..7c4df9783cf04 100644
--- a/winml/lib/Api/impl/SequenceBase.h
+++ b/winml/lib/Api/impl/SequenceBase.h
@@ -6,6 +6,10 @@
 #include "MapFeatureDescriptor.h"
 #include "SequenceFeatureDescriptor.h"
 #include "TensorFeatureDescriptor.h"
+#include "LearningModelSession.h"
+#include "ISequenceFeatureValue.h"
+
+#include "FeatureValues.h"
 
 namespace _winml {
 
@@ -169,55 +173,55 @@ struct SequenceBase : public winrt::implements<
   }
   template <>
   auto CreatePlaceholderTensor<winml::TensorBoolean>() {
-    return winmlp::TensorBoolean::Create();
+    return winml::TensorBoolean::Create();
   }
   template <>
   auto CreatePlaceholderTensor<winml::TensorFloat>() {
-    return winmlp::TensorFloat::Create();
+    return winml::TensorFloat::Create();
   }
   template <>
   auto CreatePlaceholderTensor<winml::TensorDouble>() {
-    return winmlp::TensorDouble::Create();
+    return winml::TensorDouble::Create();
   }
   template <>
   auto CreatePlaceholderTensor<winml::TensorInt8Bit>() {
-    return winmlp::TensorInt8Bit::Create();
+    return winml::TensorInt8Bit::Create();
   }
   template <>
   auto CreatePlaceholderTensor<winml::TensorUInt8Bit>() {
-    return winmlp::TensorUInt8Bit::Create();
+    return winml::TensorUInt8Bit::Create();
   }
   template <>
   auto CreatePlaceholderTensor<winml::TensorUInt16Bit>() {
-    return winmlp::TensorUInt16Bit::Create();
+    return winml::TensorUInt16Bit::Create();
   }
   template <>
   auto CreatePlaceholderTensor<winml::TensorInt16Bit>() {
-    return winmlp::TensorInt16Bit::Create();
+    return winml::TensorInt16Bit::Create();
   }
   template <>
   auto CreatePlaceholderTensor<winml::TensorUInt32Bit>() {
-    return winmlp::TensorUInt32Bit::Create();
+    return winml::TensorUInt32Bit::Create();
   }
   template <>
   auto CreatePlaceholderTensor<winml::TensorInt32Bit>() {
-    return winmlp::TensorInt32Bit::Create();
+    return winml::TensorInt32Bit::Create();
   }
   template <>
   auto CreatePlaceholderTensor<winml::TensorUInt64Bit>() {
-    return winmlp::TensorUInt64Bit::Create();
+    return winml::TensorUInt64Bit::Create();
   }
   template <>
   auto CreatePlaceholderTensor<winml::TensorInt64Bit>() {
-    return winmlp::TensorInt64Bit::Create();
+    return winml::TensorInt64Bit::Create();
   }
   template <>
   auto CreatePlaceholderTensor<winml::TensorFloat16Bit>() {
-    return winmlp::TensorFloat16Bit::Create();
+    return winml::TensorFloat16Bit::Create();
   }
   template <>
   auto CreatePlaceholderTensor<winml::TensorString>() {
-    return winmlp::TensorString::Create();
+    return winml::TensorString::Create();
   }
 
   void AppendValue(_winml::BindingContext& context, wfc::IVector<T> data, winrt::com_ptr<_winml::IValue> value) {
diff --git a/winml/lib/Api/impl/TensorBase.h b/winml/lib/Api/impl/TensorBase.h
index c9299a00ddaa2..6c68abd3ce6c9 100644
--- a/winml/lib/Api/impl/TensorBase.h
+++ b/winml/lib/Api/impl/TensorBase.h
@@ -217,7 +217,8 @@ struct TensorBase : TBase {
     }
 
     D3D12_HEAP_PROPERTIES heapProperties = {
-      D3D12_HEAP_TYPE_DEFAULT, D3D12_CPU_PAGE_PROPERTY_UNKNOWN, D3D12_MEMORY_POOL_UNKNOWN, 0, 0};
+      D3D12_HEAP_TYPE_DEFAULT, D3D12_CPU_PAGE_PROPERTY_UNKNOWN, D3D12_MEMORY_POOL_UNKNOWN, 0, 0
+    };
     D3D12_RESOURCE_DESC resourceDesc = {
       D3D12_RESOURCE_DIMENSION_BUFFER,
       0,
@@ -875,7 +876,7 @@ struct TensorBase : TBase {
 
     WINML_THROW_HR_IF_TRUE_MSG(
       E_ILLEGAL_METHOD_CALL,
-      std::is_same<T, std::string>::value,
+      (std::is_same<T, std::string>::value),
       "TensorString objects cannot be created from IBuffers!"
     );
   }
diff --git a/winml/lib/Api/impl/TensorKindFrom.h b/winml/lib/Api/impl/TensorKindFrom.h
index f2b59ac9d70ec..0d3f185a64d19 100644
--- a/winml/lib/Api/impl/TensorKindFrom.h
+++ b/winml/lib/Api/impl/TensorKindFrom.h
@@ -3,6 +3,8 @@
 
 #pragma once
 
+#include "TensorFeatureDescriptor.h"
+
 namespace _winml {
 
 // We need to define our own type for Half since DirectX::PackedVector::Half resolves to uint16_t per its typedef declaration.
diff --git a/winml/lib/Api/pch/pch.h b/winml/lib/Api/pch/pch.h
index 8c09085a993b5..57591afd45547 100644
--- a/winml/lib/Api/pch/pch.h
+++ b/winml/lib/Api/pch/pch.h
@@ -12,5 +12,6 @@
 
 #include "cppwinrt_onnx.h"
 #include "dx.h"
+#include "FeatureValues.h"
 
 #pragma warning(pop)
diff --git a/winml/lib/Common/inc/common.h b/winml/lib/Common/inc/common.h
index 58549e6e52195..ff1a42db28a1a 100644
--- a/winml/lib/Common/inc/common.h
+++ b/winml/lib/Common/inc/common.h
@@ -51,3 +51,8 @@ TRACELOGGING_DECLARE_PROVIDER(winml_trace_logging_provider);
 #include "NamespaceAliases.h"
 #include "StringHelpers.h"
 #include "WinML_Lock.h"
+
+template <typename T>
+auto unmove_ptr(T&& t) {
+  return &static_cast<T&>(t);
+}
diff --git a/winml/test/adapter/AdapterDmlEpTest.cpp b/winml/test/adapter/AdapterDmlEpTest.cpp
index 81437f9db2de3..b4220650abb9c 100644
--- a/winml/test/adapter/AdapterDmlEpTest.cpp
+++ b/winml/test/adapter/AdapterDmlEpTest.cpp
@@ -116,7 +116,8 @@ std::array<float, tensor_size> tensor_values = {};
 winrt::com_ptr<ID3D12Resource> CreateD3D12Resource(ID3D12Device& device) {
   constexpr uint64_t buffer_size = tensor_size * sizeof(float);
   constexpr D3D12_HEAP_PROPERTIES heap_properties = {
-    D3D12_HEAP_TYPE_DEFAULT, D3D12_CPU_PAGE_PROPERTY_UNKNOWN, D3D12_MEMORY_POOL_UNKNOWN, 0, 0};
+    D3D12_HEAP_TYPE_DEFAULT, D3D12_CPU_PAGE_PROPERTY_UNKNOWN, D3D12_MEMORY_POOL_UNKNOWN, 0, 0
+  };
   constexpr D3D12_RESOURCE_DESC resource_desc = {
     D3D12_RESOURCE_DIMENSION_BUFFER,
     0,
@@ -365,6 +366,7 @@ const AdapterDmlEpTestApi& getapi() {
     DmlCopyTensor,
     CreateCustomRegistry,
     ValueGetDeviceId,
-    SessionGetInputRequiredDeviceId};
+    SessionGetInputRequiredDeviceId
+  };
   return api;
 }
diff --git a/winml/test/adapter/AdapterSessionTest.cpp b/winml/test/adapter/AdapterSessionTest.cpp
index 1b1a36004264c..8c9124b2ff4ae 100644
--- a/winml/test/adapter/AdapterSessionTest.cpp
+++ b/winml/test/adapter/AdapterSessionTest.cpp
@@ -368,7 +368,8 @@ const AdapterSessionTestAPI& getapi() {
     Profiling,
     CopyInputAcrossDevices,
     CopyInputAcrossDevices_DML,
-    GetNumberOfIntraOpThreads};
+    GetNumberOfIntraOpThreads
+  };
 
   if (SkipGpuTests()) {
     api.AppendExecutionProvider_DML = SkipTest;
diff --git a/winml/test/api/LearningModelAPITest.cpp b/winml/test/api/LearningModelAPITest.cpp
index ab45e2414854d..01ca2b8930506 100644
--- a/winml/test/api/LearningModelAPITest.cpp
+++ b/winml/test/api/LearningModelAPITest.cpp
@@ -247,9 +247,11 @@ static void CheckLearningModelPixelRange() {
                                           // Normalized_0_1 and image output
                                           L"Add_ImageNet1920WithImageMetadataBgr8_SRGB_0_1.onnx",
                                           // Normalized_1_1 and image output
-                                          L"Add_ImageNet1920WithImageMetadataBgr8_SRGB_1_1.onnx"};
+                                          L"Add_ImageNet1920WithImageMetadataBgr8_SRGB_1_1.onnx"
+  };
   std::vector<LearningModelPixelRange> pixelRanges = {
-    LearningModelPixelRange::ZeroTo255, LearningModelPixelRange::ZeroToOne, LearningModelPixelRange::MinusOneToOne};
+    LearningModelPixelRange::ZeroTo255, LearningModelPixelRange::ZeroToOne, LearningModelPixelRange::MinusOneToOne
+  };
   for (uint32_t model_i = 0; model_i < modelPaths.size(); model_i++) {
     LearningModel learningModel = nullptr;
     WINML_EXPECT_NO_THROW(APITest::LoadModel(modelPaths[model_i], learningModel));
@@ -329,7 +331,8 @@ const LearningModelApiTestsApi& getapi() {
     CloseModelCheckEval,
     CloseModelNoNewSessions,
     CheckMetadataCaseInsensitive,
-    CreateCorruptModel};
+    CreateCorruptModel
+  };
 
   if (RuntimeParameterExists(L"noVideoFrameTests")) {
     api.CloseModelCheckEval = SkipTest;
diff --git a/winml/test/api/LearningModelBindingAPITest.cpp b/winml/test/api/LearningModelBindingAPITest.cpp
index b77421e191020..8279f4f89f0ed 100644
--- a/winml/test/api/LearningModelBindingAPITest.cpp
+++ b/winml/test/api/LearningModelBindingAPITest.cpp
@@ -669,7 +669,8 @@ const LearningModelBindingAPITestsApi& getapi() {
     VerifyOutputAfterEvaluateAsyncCalledTwice,
     VerifyOutputAfterImageBindCalledTwice,
     SequenceLengthTensorFloat,
-    SequenceConstructTensorString};
+    SequenceConstructTensorString
+  };
 
   if (SkipGpuTests()) {
     api.GpuSqueezeNet = SkipTest;
diff --git a/winml/test/api/LearningModelSessionAPITest.cpp b/winml/test/api/LearningModelSessionAPITest.cpp
index 21cdaa62bc898..d6e70e35e3a6d 100644
--- a/winml/test/api/LearningModelSessionAPITest.cpp
+++ b/winml/test/api/LearningModelSessionAPITest.cpp
@@ -793,7 +793,8 @@ static void STFT(
   auto n_dfts = static_cast<size_t>(1 + floor((signal_size - dft_size) / hop_size));
   auto input_shape = std::vector<int64_t>{1, INT64(signal_size)};
   auto output_shape = std::vector<int64_t>{
-    INT64(batch_size), INT64(n_dfts), is_onesided ? ((INT64(dft_size) >> 1) + 1) : INT64(dft_size), 2};
+    INT64(batch_size), INT64(n_dfts), is_onesided ? ((INT64(dft_size) >> 1) + 1) : INT64(dft_size), 2
+  };
   auto dft_length = TensorInt64Bit::CreateFromArray({}, {INT64(dft_size)});
 
   auto model =
@@ -1372,7 +1373,8 @@ static void ModelBuilding_GridSample_Internal(LearningModelDeviceKind kind) {
     5.0000f,
     5.0000f,
     10.0000f,
-    10.0000f};
+    10.0000f
+  };
   input_dims = {1, 1, 3, 2};
   grid_dims = {1, 2, 4, 2};
 
@@ -2193,12 +2195,6 @@ static void SetIntraOpNumThreads() {
   auto binding = LearningModelBinding(session);
   binding.Bind(L"input", tensor_input);
   WINML_EXPECT_NO_THROW(session.Evaluate(binding, L""));
-
-  // Check to verify that the default number of threads in LearningModelSession is equal to the number of logical cores.
-  session = LearningModelSession(model, device);
-  nativeSession = session.as<ILearningModelSessionNative>();
-  WINML_EXPECT_NO_THROW(nativeSession->GetIntraOpNumThreads(&numIntraOpThreads));
-  WINML_EXPECT_EQUAL(std::thread::hardware_concurrency(), numIntraOpThreads);
 }
 
 static void SetIntraOpThreadSpinning() {
@@ -2312,7 +2308,8 @@ const LearningModelSessionAPITestsApi& getapi() {
     ModelBuilding_STFT,
     ModelBuilding_MelSpectrogramOnThreeToneSignal,
     ModelBuilding_MelWeightMatrix,
-    SetName};
+    SetName
+  };
 
   if (SkipGpuTests()) {
     api.CreateSessionDeviceDirectX = SkipTest;
diff --git a/winml/test/api/RawApiHelpers.cpp b/winml/test/api/RawApiHelpers.cpp
index b6f39f8e88224..e84af6d239799 100644
--- a/winml/test/api/RawApiHelpers.cpp
+++ b/winml/test/api/RawApiHelpers.cpp
@@ -38,7 +38,8 @@ void RunOnDevice(ml::learning_model& model, ml::learning_model_device& device, I
     auto channel_buffers_pointers = std::vector<float*>{
       &input_data.at(0),
       &input_data.at(0) + channel_buffers_sizes[0],
-      &input_data.at(0) + channel_buffers_sizes[0] + +channel_buffers_sizes[1]};
+      &input_data.at(0) + channel_buffers_sizes[0] + +channel_buffers_sizes[1]
+    };
 
     WINML_EXPECT_HRESULT_SUCCEEDED(binding->bind_as_references<float>(
       input_name,
diff --git a/winml/test/api/RawApiTestsGpu.cpp b/winml/test/api/RawApiTestsGpu.cpp
index 9c1c06a01603f..f12ba0f36cebf 100644
--- a/winml/test/api/RawApiTestsGpu.cpp
+++ b/winml/test/api/RawApiTestsGpu.cpp
@@ -165,7 +165,8 @@ const RawApiTestsGpuApi& getapi() {
     CreateDirectXMinPowerDevice,
     Evaluate,
     EvaluateNoInputCopy,
-    EvaluateManyBuffers};
+    EvaluateManyBuffers
+  };
 
   if (SkipGpuTests()) {
     api.CreateDirectXDevice = SkipTest;
diff --git a/winml/test/concurrency/ConcurrencyTests.cpp b/winml/test/concurrency/ConcurrencyTests.cpp
index 46528ef70d377..404afbf67ea1c 100644
--- a/winml/test/concurrency/ConcurrencyTests.cpp
+++ b/winml/test/concurrency/ConcurrencyTests.cpp
@@ -141,7 +141,8 @@ void EvalAsyncDifferentBindings() {
   std::vector<EvaluationUnit> evaluation_units(num_units, EvaluationUnit());
 
   std::vector<ImageFeatureValue> ifvs = {
-    FileHelpers::LoadImageFeatureValue(L"kitten_224.png"), FileHelpers::LoadImageFeatureValue(L"fish.png")};
+    FileHelpers::LoadImageFeatureValue(L"kitten_224.png"), FileHelpers::LoadImageFeatureValue(L"fish.png")
+  };
 
   // same session, different binding
   auto model = LearningModel::LoadFromFilePath(FileHelpers::GetModulePath() + L"model.onnx");
@@ -191,7 +192,8 @@ void MultiThreadMultiSessionOnDevice(const LearningModelDevice& device) {
   auto path = FileHelpers::GetModulePath() + L"model.onnx";
   auto model = LearningModel::LoadFromFilePath(path);
   std::vector<ImageFeatureValue> ivfs = {
-    FileHelpers::LoadImageFeatureValue(L"kitten_224.png"), FileHelpers::LoadImageFeatureValue(L"fish.png")};
+    FileHelpers::LoadImageFeatureValue(L"kitten_224.png"), FileHelpers::LoadImageFeatureValue(L"fish.png")
+  };
   std::vector<int> max_indices = {
     281,  // tabby, tabby cat
     0     // tench, Tinca tinca
@@ -257,7 +259,8 @@ void MultiThreadSingleSessionOnDevice(const LearningModelDevice& device) {
   LearningModelSession model_session = nullptr;
   WINML_EXPECT_NO_THROW(model_session = LearningModelSession(model, device));
   std::vector<ImageFeatureValue> ivfs = {
-    FileHelpers::LoadImageFeatureValue(L"kitten_224.png"), FileHelpers::LoadImageFeatureValue(L"fish.png")};
+    FileHelpers::LoadImageFeatureValue(L"kitten_224.png"), FileHelpers::LoadImageFeatureValue(L"fish.png")
+  };
   std::vector<int> max_indices = {
     281,  // tabby, tabby cat
     0     // tench, Tinca tinca
@@ -322,7 +325,8 @@ const ConcurrencyTestsApi& getapi() {
     MultiThreadSingleSessionGpu,
     EvalAsyncDifferentModels,
     EvalAsyncDifferentSessions,
-    EvalAsyncDifferentBindings};
+    EvalAsyncDifferentBindings
+  };
 
   if (SkipGpuTests()) {
     api.MultiThreadMultiSessionGpu = SkipTest;
diff --git a/winml/test/image/imageTestHelper.cpp b/winml/test/image/imageTestHelper.cpp
index b7c1eb42965f8..91eed2a807782 100644
--- a/winml/test/image/imageTestHelper.cpp
+++ b/winml/test/image/imageTestHelper.cpp
@@ -148,7 +148,8 @@ TensorFloat LoadInputImageFromGPU(SoftwareBitmap softwareBitmap, const std::wstr
   // 3 is number of channels we use. R G B without alpha.
   UINT64 bufferbytesize = 3 * sizeof(float) * softwareBitmap.PixelWidth() * softwareBitmap.PixelHeight();
   D3D12_HEAP_PROPERTIES heapProperties = {
-    D3D12_HEAP_TYPE_DEFAULT, D3D12_CPU_PAGE_PROPERTY_UNKNOWN, D3D12_MEMORY_POOL_UNKNOWN, 0, 0};
+    D3D12_HEAP_TYPE_DEFAULT, D3D12_CPU_PAGE_PROPERTY_UNKNOWN, D3D12_MEMORY_POOL_UNKNOWN, 0, 0
+  };
   D3D12_RESOURCE_DESC resourceDesc = {
     D3D12_RESOURCE_DIMENSION_BUFFER,
     0,
diff --git a/winml/test/image/imagetests.cpp b/winml/test/image/imagetests.cpp
index 6157520ca96a3..2251954c59e4c 100644
--- a/winml/test/image/imagetests.cpp
+++ b/winml/test/image/imagetests.cpp
@@ -939,7 +939,8 @@ TEST_F(ImageTests, ImageBindingAsGPUTensor) {
   UINT64 buffer_byte_size =
     static_cast<uint64_t>(software_bitmap.PixelWidth()) * software_bitmap.PixelHeight() * 3 * sizeof(float);
   D3D12_HEAP_PROPERTIES heap_properties = {
-    D3D12_HEAP_TYPE_DEFAULT, D3D12_CPU_PAGE_PROPERTY_UNKNOWN, D3D12_MEMORY_POOL_UNKNOWN, 0, 0};
+    D3D12_HEAP_TYPE_DEFAULT, D3D12_CPU_PAGE_PROPERTY_UNKNOWN, D3D12_MEMORY_POOL_UNKNOWN, 0, 0
+  };
   D3D12_RESOURCE_DESC resource_desc = {
     D3D12_RESOURCE_DIMENSION_BUFFER,
     0,
diff --git a/winml/test/model/model_tests.cpp b/winml/test/model/model_tests.cpp
index 0b4c10eac9142..f40f08ad2696d 100644
--- a/winml/test/model/model_tests.cpp
+++ b/winml/test/model/model_tests.cpp
@@ -232,17 +232,23 @@ static std::vector<ITestCase*> GetAllTestCases() {
     ORT_TSTR("tf_resnet_v2_152"),
     ORT_TSTR("vgg19"),
     ORT_TSTR("yolov3"),
-    ORT_TSTR("zfnet512")};
+    ORT_TSTR("zfnet512")
+  };
   allDisabledTests.insert(std::begin(x86DisabledTests), std::end(x86DisabledTests));
 #endif
   // Bad onnx test output caused by previously wrong SAME_UPPER/SAME_LOWER for ConvTranspose
   allDisabledTests.insert(ORT_TSTR("cntk_simple_seg"));
 
+  auto broken_tests = GetBrokenTests("dml");
+  auto broken_tests_keyword_set = GetBrokenTestsKeyWordSet("dml");
+
   WINML_EXPECT_NO_THROW(LoadTests(
     dataDirs,
     whitelistedTestCases,
     TestTolerances(1e-3, 1e-3, {}, {}),
     allDisabledTests,
+    std::move(broken_tests),
+    std::move(broken_tests_keyword_set),
     [&tests](std::unique_ptr<ITestCase> l) {
       tests.push_back(l.get());
       ownedTests.push_back(std::move(l));
@@ -380,13 +386,6 @@ std::string GetFullNameOfTest(ITestCase* testCase, winml::LearningModelDeviceKin
   name += tokenizedModelPath[tokenizedModelPath.size() - 2] += "_";  // model name
   name += tokenizedModelPath[tokenizedModelPath.size() - 3];         // opset version
 
-  // To introduce models from model zoo, the model path is structured like this "<source>/<opset>/<model_name>/?.onnx"
-  std::string source = tokenizedModelPath[tokenizedModelPath.size() - 4];
-  // `models` means the root of models, to be ompatible with the old structure, that is, the source name is empty.
-  if (source != "models") {
-    name += "_" + source;
-  }
-
   std::replace_if(
     name.begin(), name.end(), [](char c) { return !absl::ascii_isalnum(c); }, '_'
   );
@@ -405,6 +404,13 @@ std::string GetFullNameOfTest(ITestCase* testCase, winml::LearningModelDeviceKin
     ModifyNameIfDisabledTest(/*inout*/ name, deviceKind);
   }
 
+  // To introduce models from model zoo, the model path is structured like this "<source>/<opset>/<model_name>/?.onnx"
+  std::string source = tokenizedModelPath[tokenizedModelPath.size() - 4];
+  // `models` means the root of models, to be ompatible with the old structure, that is, the source name is empty.
+  if (source != "models") {
+    name += "_" + source;
+  }
+
   return name;
 }
 
diff --git a/winml/test/model/skip_model_tests.h b/winml/test/model/skip_model_tests.h
index 174f57143ee81..cf55d8bcbae7e 100644
--- a/winml/test/model/skip_model_tests.h
+++ b/winml/test/model/skip_model_tests.h
@@ -161,10 +161,8 @@ std::unordered_map<std::string, std::pair<std::string, std::string>> disabledGpu
     test name -> absolute difference sampleTolerance
 */
 std::unordered_map<std::string, double> sampleTolerancePerTests({
-  {"fp16_inception_v1_opset7_GPU",0.005                                  },
-  {"fp16_inception_v1_opset8_GPU", 0.005},
-  {            "candy_opset9_GPU",
-   0.00150000                           }, // Intel(R) UHD Graphics 630 (29.20.100.9020) AP machine has inaccurate GPU results for FNS Candy opset 9 https://microsoft.visualstudio.com/OS/_workitems/edit/30696168/
-  { "fp16_tiny_yolov2_opset8_GPU",
-   0.109000                             }, // Intel(R) UHD Graphics 630 (29.20.100.9020) AP machine has inaccurate GPU results for FNS Candy opset 9 https://microsoft.visualstudio.com/OS/_workitems/edit/30696168/
+  {"fp16_inception_v1_opset7_GPU",      0.005},
+  {"fp16_inception_v1_opset8_GPU",      0.005},
+  {            "candy_opset9_GPU", 0.00150000}, // Intel(R) UHD Graphics 630 (29.20.100.9020) AP machine has inaccurate GPU results for FNS Candy opset 9 https://microsoft.visualstudio.com/OS/_workitems/edit/30696168/
+  { "fp16_tiny_yolov2_opset8_GPU",   0.109000}, // Intel(R) UHD Graphics 630 (29.20.100.9020) AP machine has inaccurate GPU results for FNS Candy opset 9 https://microsoft.visualstudio.com/OS/_workitems/edit/30696168/
 });
diff --git a/winml/test/scenario/cppwinrt/CustomNullOp.h b/winml/test/scenario/cppwinrt/CustomNullOp.h
index 33709c5f72d3c..b50909548a6bf 100644
--- a/winml/test/scenario/cppwinrt/CustomNullOp.h
+++ b/winml/test/scenario/cppwinrt/CustomNullOp.h
@@ -69,7 +69,8 @@ struct NullOperatorFactory : winrt::implements<NullOperatorFactory, IMLOperatorK
     std::vector<MLOperatorEdgeDescription> allowedEdges{
       CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Double),
       CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Float),
-      CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Float16)};
+      CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Float16)
+    };
     typeConstraint.allowedTypes = allowedEdges.data();
     typeConstraint.allowedTypeCount = static_cast<uint32_t>(allowedEdges.size());
 
diff --git a/winml/test/scenario/cppwinrt/CustomOps.cpp b/winml/test/scenario/cppwinrt/CustomOps.cpp
index 075bf5ed877a3..58d0fe6e64efc 100644
--- a/winml/test/scenario/cppwinrt/CustomOps.cpp
+++ b/winml/test/scenario/cppwinrt/CustomOps.cpp
@@ -305,7 +305,8 @@ static void CustomKernelWithBuiltInSchema() {
 
   // Register the kernel
   MLOperatorEdgeDescription floatTensorType = {
-    MLOperatorEdgeType::Tensor, static_cast<uint64_t>(MLOperatorTensorDataType::Float)};
+    MLOperatorEdgeType::Tensor, static_cast<uint64_t>(MLOperatorTensorDataType::Float)
+  };
 
   MLOperatorEdgeTypeConstrant constraint = {"T", &floatTensorType, 1};
 
@@ -318,7 +319,8 @@ static void CustomKernelWithBuiltInSchema() {
     1,
     nullptr,
     0,
-    MLOperatorKernelOptions::AllowDynamicInputShapes};
+    MLOperatorKernelOptions::AllowDynamicInputShapes
+  };
 
   Microsoft::WRL::ComPtr<MLOperatorKernelFactory> factory =
     wil::MakeOrThrow<MLOperatorKernelFactory>(CreateABIFooKernel<false>);
@@ -614,7 +616,8 @@ static void CustomKernelWithCustomSchema() {
     MLOperatorEdgeTypeConstrant kernelConstraint = {"T1", &floatTensorEdgeDesc, 1};
 
     MLOperatorKernelDescription kernelDesc = {
-      "", "Foo", 7, MLOperatorExecutionType::Cpu, &kernelConstraint, testCases[caseIndex].useTypeLabel ? 1u : 0u};
+      "", "Foo", 7, MLOperatorExecutionType::Cpu, &kernelConstraint, testCases[caseIndex].useTypeLabel ? 1u : 0u
+    };
 
     if (!testCases[caseIndex].attributeDefaultsInSchema) {
       kernelDesc.defaultAttributes = defaultAttributes;
@@ -693,10 +696,8 @@ static void CustomKernelWithCustomSchema() {
 
 const CustomOpsTestsApi& getapi() {
   static CustomOpsTestsApi api = {
-    CustomOpsScenarioTestsClassSetup,
-    CustomOperatorFusion,
-    CustomKernelWithBuiltInSchema,
-    CustomKernelWithCustomSchema};
+    CustomOpsScenarioTestsClassSetup, CustomOperatorFusion, CustomKernelWithBuiltInSchema, CustomKernelWithCustomSchema
+  };
 
   if (SkipGpuTests()) {
     api.CustomOperatorFusion = SkipTest;
diff --git a/winml/test/scenario/cppwinrt/NoisyReluCpu.h b/winml/test/scenario/cppwinrt/NoisyReluCpu.h
index 5f89b20beebb9..5cccbae67407c 100644
--- a/winml/test/scenario/cppwinrt/NoisyReluCpu.h
+++ b/winml/test/scenario/cppwinrt/NoisyReluCpu.h
@@ -157,7 +157,8 @@ struct NoisyReluOperatorFactory : winrt::implements<NoisyReluOperatorFactory, IM
     std::vector<MLOperatorEdgeDescription> allowedEdges{
       CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Double),
       CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Float),
-      CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Float16)};
+      CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Float16)
+    };
     typeConstraint.allowedTypes = allowedEdges.data();
     typeConstraint.allowedTypeCount = static_cast<uint32_t>(allowedEdges.size());
 
@@ -194,7 +195,8 @@ struct NoisyReluOperatorFactory : winrt::implements<NoisyReluOperatorFactory, IM
     noisyReluVarianceAttributeValue.floats = defaultVariance;
 
     std::vector<MLOperatorAttributeNameValue> attributeDefaultValues{
-      noisyReluMeanAttributeValue, noisyReluVarianceAttributeValue};
+      noisyReluMeanAttributeValue, noisyReluVarianceAttributeValue
+    };
     noisyReluSchema.defaultAttributes = attributeDefaultValues.data();
     noisyReluSchema.defaultAttributeCount = static_cast<uint32_t>(attributeDefaultValues.size());
 
@@ -216,7 +218,8 @@ struct NoisyReluOperatorFactory : winrt::implements<NoisyReluOperatorFactory, IM
     std::vector<MLOperatorEdgeDescription> allowedEdges{
       CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Double),
       CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Float),
-      CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Float16)};
+      CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Float16)
+    };
     typeConstraint.allowedTypes = allowedEdges.data();
     typeConstraint.allowedTypeCount = static_cast<uint32_t>(allowedEdges.size());
 
@@ -239,7 +242,8 @@ struct NoisyReluOperatorFactory : winrt::implements<NoisyReluOperatorFactory, IM
     noisyReluVarianceAttributeValue.floats = defaultVariance;
 
     std::vector<MLOperatorAttributeNameValue> attributeDefaultValues{
-      noisyReluMeanAttributeValue, noisyReluVarianceAttributeValue};
+      noisyReluMeanAttributeValue, noisyReluVarianceAttributeValue
+    };
     kernelDescription.defaultAttributes = attributeDefaultValues.data();
     kernelDescription.defaultAttributeCount = static_cast<uint32_t>(attributeDefaultValues.size());
     kernelDescription.options = MLOperatorKernelOptions::None;
diff --git a/winml/test/scenario/cppwinrt/ReluCpu.h b/winml/test/scenario/cppwinrt/ReluCpu.h
index c72285a4de7fb..7bb275f7b399b 100644
--- a/winml/test/scenario/cppwinrt/ReluCpu.h
+++ b/winml/test/scenario/cppwinrt/ReluCpu.h
@@ -114,7 +114,8 @@ struct ReluOperatorFactory : winrt::implements<ReluOperatorFactory, IMLOperatorK
     std::vector<MLOperatorEdgeDescription> allowedEdges{
       CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Double),
       CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Float),
-      CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Float16)};
+      CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Float16)
+    };
     typeConstraint.allowedTypes = allowedEdges.data();
     typeConstraint.allowedTypeCount = static_cast<uint32_t>(allowedEdges.size());
 
diff --git a/winml/test/scenario/cppwinrt/scenariotestscppwinrt.cpp b/winml/test/scenario/cppwinrt/scenariotestscppwinrt.cpp
index 9b389d014c953..9a03172340bf7 100644
--- a/winml/test/scenario/cppwinrt/scenariotestscppwinrt.cpp
+++ b/winml/test/scenario/cppwinrt/scenariotestscppwinrt.cpp
@@ -510,7 +510,8 @@ static void Scenario9LoadBindEvalInputTensorGPU() {
 
   UINT64 bufferbytesize = 720 * 720 * 3 * sizeof(float);
   D3D12_HEAP_PROPERTIES heapProperties = {
-    D3D12_HEAP_TYPE_DEFAULT, D3D12_CPU_PAGE_PROPERTY_UNKNOWN, D3D12_MEMORY_POOL_UNKNOWN, 0, 0};
+    D3D12_HEAP_TYPE_DEFAULT, D3D12_CPU_PAGE_PROPERTY_UNKNOWN, D3D12_MEMORY_POOL_UNKNOWN, 0, 0
+  };
   D3D12_RESOURCE_DESC resourceDesc = {
     D3D12_RESOURCE_DIMENSION_BUFFER,
     0,
@@ -983,7 +984,8 @@ static void Scenario22ImageBindingAsGPUTensor() {
   // 3 is number of channels we use. R G B without alpha.
   UINT64 bufferbytesize = 3 * sizeof(float) * softwareBitmap.PixelWidth() * softwareBitmap.PixelHeight();
   D3D12_HEAP_PROPERTIES heapProperties = {
-    D3D12_HEAP_TYPE_DEFAULT, D3D12_CPU_PAGE_PROPERTY_UNKNOWN, D3D12_MEMORY_POOL_UNKNOWN, 0, 0};
+    D3D12_HEAP_TYPE_DEFAULT, D3D12_CPU_PAGE_PROPERTY_UNKNOWN, D3D12_MEMORY_POOL_UNKNOWN, 0, 0
+  };
   D3D12_RESOURCE_DESC resourceDesc = {
     D3D12_RESOURCE_DIMENSION_BUFFER,
     0,
@@ -1085,7 +1087,8 @@ static void Scenario23NominalPixelRange() {
   std::vector<std::wstring> modelPaths = {// Normalized_0_1 and image output
                                           modulePath + L"Add_ImageNet1920WithImageMetadataBgr8_SRGB_0_1.onnx",
                                           // Normalized_1_1 and image output
-                                          modulePath + L"Add_ImageNet1920WithImageMetadataBgr8_SRGB_1_1.onnx"};
+                                          modulePath + L"Add_ImageNet1920WithImageMetadataBgr8_SRGB_1_1.onnx"
+  };
 
   for (uint32_t model_i = 0; model_i < modelPaths.size(); model_i++) {
     // load model and create session